diff options
734 files changed, 361362 insertions, 4 deletions
diff --git a/COPYRIGHT.txt b/COPYRIGHT.txt index 7d4b53a7cb..7bb65dced4 100644 --- a/COPYRIGHT.txt +++ b/COPYRIGHT.txt @@ -174,6 +174,33 @@ Copyright: 2015-2020 Google, Inc. 2002, NVIDIA Corporation. License: glslang +Files: ./thirdparty/graphite/ +Comment: Graphite engine +Copyright: 2010, SIL International +License: MPL-2.0 + +Files: ./thirdparty/harfbuzz/ +Comment: HarfBuzz text shaping library +Copyright: 2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020 Google, Inc. + 2018,2019,2020 Ebrahim Byagowi + 2019,2020 Facebook, Inc. + 2012 Mozilla Foundation + 2011 Codethink Limited + 2008,2010 Nokia Corporation and/or its subsidiary(-ies) + 2009 Keith Stribley + 2009 Martin Hosken and SIL International + 2007 Chris Wilson + 2006 Behdad Esfahbod + 2005 David Turner + 2004,2007,2008,2009,2010 Red Hat, Inc. + 1998-2004 David Turner and Werner Lemberg +License: HarfBuzz + +Files: ./thirdparty/icu4c/ +Comment: International Components for Unicode +Copyright: 1991-2020, Unicode +License: Unicode + Files: ./thirdparty/jpeg_compressor/ Comment: jpeg-compressor Copyright: 2012, Rich Geldreich @@ -1180,6 +1207,46 @@ License: FTL Robert Wilhelm <robert.wilhelm@freetype.org> Werner Lemberg <werner.lemberg@freetype.org> +License: HarfBuzz + HarfBuzz is licensed under the so-called "Old MIT" license. Details follow. + For parts of HarfBuzz that are licensed under different licenses see individual + files names COPYING in subdirectories where applicable. + . + Copyright (C) 2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020 Google, Inc. + Copyright (C) 2018,2019,2020 Ebrahim Byagowi + Copyright (C) 2019,2020 Facebook, Inc. + Copyright (C) 2012 Mozilla Foundation + Copyright (C) 2011 Codethink Limited + Copyright (C) 2008,2010 Nokia Corporation and/or its subsidiary(-ies) + Copyright (C) 2009 Keith Stribley + Copyright (C) 2009 Martin Hosken and SIL International + Copyright (C) 2007 Chris Wilson + Copyright (C) 2006 Behdad Esfahbod + Copyright (C) 2005 David Turner + Copyright (C) 2004,2007,2008,2009,2010 Red Hat, Inc. + Copyright (C) 1998-2004 David Turner and Werner Lemberg + . + For full copyright notices consult the individual files in the package. + . + . + Permission is hereby granted, without written agreement and without + license or royalty fees, to use, copy, modify, and distribute this + software and its documentation for any purpose, provided that the + above copyright notice and the following two paragraphs appear in + all copies of this software. + . + IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + DAMAGE. + . + THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + License: MPL-2.0 Mozilla Public License Version 2.0 ================================== @@ -1650,6 +1717,41 @@ License: Tamsyn In no event will the author be held liable for damages arising from the use of this font. +License: Unicode + COPYRIGHT AND PERMISSION NOTICE (ICU 58 and later) + . + Copyright (C) 1991-2020 Unicode, Inc. All rights reserved. + Distributed under the Terms of Use in https://www.unicode.org/copyright.html. + . + Permission is hereby granted, free of charge, to any person obtaining + a copy of the Unicode data files and any associated documentation + (the "Data Files") or Unicode software and any associated documentation + (the "Software") to deal in the Data Files or Software + without restriction, including without limitation the rights to use, + copy, modify, merge, publish, distribute, and/or sell copies of + the Data Files or Software, and to permit persons to whom the Data Files + or Software are furnished to do so, provided that either + (a) this copyright and permission notice appear with all copies + of the Data Files or Software, or + (b) this copyright and permission notice appear in associated + Documentation. + . + THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF + ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE + WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT OF THIRD PARTY RIGHTS. + IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS + NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL + DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, + DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER + TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + PERFORMANCE OF THE DATA FILES OR SOFTWARE. + . + Except as contained in this notice, the name of a copyright holder + shall not be used in advertising or otherwise to promote the sale, + use or other dealings in these Data Files or Software without prior + written authorization of the copyright holder. + License: Zlib This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages diff --git a/SConstruct b/SConstruct index d82ceb514b..10f961cff8 100644 --- a/SConstruct +++ b/SConstruct @@ -144,6 +144,9 @@ opts.Add(BoolVariable("builtin_certs", "Use the built-in SSL certificates bundle opts.Add(BoolVariable("builtin_enet", "Use the built-in ENet library", True)) opts.Add(BoolVariable("builtin_freetype", "Use the built-in FreeType library", True)) opts.Add(BoolVariable("builtin_glslang", "Use the built-in glslang library", True)) +opts.Add(BoolVariable("builtin_graphite", "Use the built-in Graphite library", True)) +opts.Add(BoolVariable("builtin_harfbuzz", "Use the built-in HarfBuzz library", True)) +opts.Add(BoolVariable("builtin_icu", "Use the built-in ICU library", True)) opts.Add(BoolVariable("builtin_libogg", "Use the built-in libogg library", True)) opts.Add(BoolVariable("builtin_libpng", "Use the built-in libpng library", True)) opts.Add(BoolVariable("builtin_libtheora", "Use the built-in libtheora library", True)) diff --git a/modules/text_server_adv/SCsub b/modules/text_server_adv/SCsub new file mode 100644 index 0000000000..819ae4f4da --- /dev/null +++ b/modules/text_server_adv/SCsub @@ -0,0 +1,419 @@ +#!/usr/bin/env python + +Import("env") +Import("env_modules") + +if env["builtin_harfbuzz"]: + env_harfbuzz = env_modules.Clone() + + # Thirdparty source files + thirdparty_dir = "#thirdparty/harfbuzz/" + thirdparty_sources = [ + "src/hb-aat-layout.cc", + "src/hb-aat-map.cc", + "src/hb-blob.cc", + "src/hb-buffer-serialize.cc", + "src/hb-buffer.cc", + "src/hb-common.cc", + #'src/hb-coretext.cc', + #'src/hb-directwrite.cc', + "src/hb-draw.cc", + "src/hb-face.cc", + "src/hb-fallback-shape.cc", + "src/hb-font.cc", + "src/hb-ft.cc", + #'src/hb-gdi.cc', + #'src/hb-glib.cc', + #'src/hb-gobject-structs.cc', + "src/hb-graphite2.cc", + "src/hb-icu.cc", + "src/hb-map.cc", + "src/hb-number.cc", + "src/hb-ot-cff1-table.cc", + "src/hb-ot-cff2-table.cc", + "src/hb-ot-color.cc", + "src/hb-ot-face.cc", + "src/hb-ot-font.cc", + "src/hb-ot-layout.cc", + "src/hb-ot-map.cc", + "src/hb-ot-math.cc", + "src/hb-ot-meta.cc", + "src/hb-ot-metrics.cc", + "src/hb-ot-name.cc", + "src/hb-ot-shape-complex-arabic.cc", + "src/hb-ot-shape-complex-default.cc", + "src/hb-ot-shape-complex-hangul.cc", + "src/hb-ot-shape-complex-hebrew.cc", + "src/hb-ot-shape-complex-indic-table.cc", + "src/hb-ot-shape-complex-indic.cc", + "src/hb-ot-shape-complex-khmer.cc", + "src/hb-ot-shape-complex-myanmar.cc", + "src/hb-ot-shape-complex-thai.cc", + "src/hb-ot-shape-complex-use-table.cc", + "src/hb-ot-shape-complex-use.cc", + "src/hb-ot-shape-complex-vowel-constraints.cc", + "src/hb-ot-shape-fallback.cc", + "src/hb-ot-shape-normalize.cc", + "src/hb-ot-shape.cc", + "src/hb-ot-tag.cc", + "src/hb-ot-var.cc", + "src/hb-set.cc", + "src/hb-shape-plan.cc", + "src/hb-shape.cc", + "src/hb-shaper.cc", + "src/hb-static.cc", + "src/hb-style.cc", + "src/hb-subset-cff-common.cc", + "src/hb-subset-cff1.cc", + "src/hb-subset-cff2.cc", + "src/hb-subset-input.cc", + "src/hb-subset-plan.cc", + "src/hb-subset.cc", + "src/hb-ucd.cc", + "src/hb-unicode.cc", + #'src/hb-uniscribe.cc' + ] + thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources] + + if env["platform"] == "android" or env["platform"] == "linuxbsd" or env["platform"] == "server": + env_harfbuzz.Append(CCFLAGS=["-DHAVE_PTHREAD"]) + + if env["platform"] == "javascript": + if env["threads_enabled"]: + env_harfbuzz.Append(CCFLAGS=["-DHAVE_PTHREAD"]) + else: + env_harfbuzz.Append(CCFLAGS=["-DHB_NO_MT"]) + + env_harfbuzz.Append( + CPPPATH=[ + "#thirdparty/harfbuzz/src", + "#thirdparty/freetype/include", + "#thirdparty/graphite/include", + "#thirdparty/icu4c/common/", + ] + ) + env_harfbuzz.Append( + CCFLAGS=["-DHAVE_ICU_BUILTIN", "-DHAVE_ICU", "-DHAVE_FREETYPE", "-DHAVE_GRAPHITE2", "-DGRAPHITE2_STATIC",] + ) + env_harfbuzz.disable_warnings() + env_thirdparty = env_harfbuzz.Clone() + env_thirdparty.disable_warnings() + lib = env_thirdparty.add_library("harfbuzz_builtin", thirdparty_sources) + + # Needs to be appended to arrive after libscene in the linker call, + # but we don't want it to arrive *after* system libs, so manual hack + # LIBS contains first SCons Library objects ("SCons.Node.FS.File object") + # and then plain strings for system library. We insert between the two. + inserted = False + for idx, linklib in enumerate(env["LIBS"]): + if isinstance(linklib, (str, bytes)): # first system lib such as "X11", otherwise SCons lib object + env["LIBS"].insert(idx, lib) + inserted = True + break + if not inserted: + env.Append(LIBS=[lib]) + +if env["builtin_graphite"]: + env_graphite = env_modules.Clone() + + # Thirdparty source files + thirdparty_dir = "#thirdparty/graphite/" + thirdparty_sources = [ + "src/gr_char_info.cpp", + "src/gr_face.cpp", + "src/gr_features.cpp", + "src/gr_font.cpp", + "src/gr_logging.cpp", + "src/gr_segment.cpp", + "src/gr_slot.cpp", + "src/CmapCache.cpp", + "src/Code.cpp", + "src/Collider.cpp", + "src/Decompressor.cpp", + "src/Face.cpp", + #'src/FileFace.cpp', + "src/FeatureMap.cpp", + "src/Font.cpp", + "src/GlyphCache.cpp", + "src/GlyphFace.cpp", + "src/Intervals.cpp", + "src/Justifier.cpp", + "src/NameTable.cpp", + "src/Pass.cpp", + "src/Position.cpp", + "src/Segment.cpp", + "src/Silf.cpp", + "src/Slot.cpp", + "src/Sparse.cpp", + "src/TtfUtil.cpp", + "src/UtfCodec.cpp", + "src/FileFace.cpp", + "src/json.cpp", + ] + if not env_graphite.msvc: + thirdparty_sources += ["src/direct_machine.cpp"] + else: + thirdparty_sources += ["src/call_machine.cpp"] + + thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources] + + env_graphite.Append(CPPPATH=["#thirdparty/graphite/src", "#thirdparty/graphite/include"]) + env_graphite.Append(CCFLAGS=["-DGRAPHITE2_STATIC", "-DGRAPHITE2_NTRACING", "-DGRAPHITE2_NFILEFACE"]) + env_graphite.disable_warnings() + env_thirdparty = env_graphite.Clone() + env_thirdparty.disable_warnings() + lib = env_thirdparty.add_library("graphite_builtin", thirdparty_sources) + + # Needs to be appended to arrive after libscene in the linker call, + # but we don't want it to arrive *after* system libs, so manual hack + # LIBS contains first SCons Library objects ("SCons.Node.FS.File object") + # and then plain strings for system library. We insert between the two. + inserted = False + for idx, linklib in enumerate(env["LIBS"]): + if isinstance(linklib, (str, bytes)): # first system lib such as "X11", otherwise SCons lib object + env["LIBS"].insert(idx, lib) + inserted = True + break + if not inserted: + env.Append(LIBS=[lib]) + +if env["builtin_icu"]: + env_icu = env_modules.Clone() + + # Thirdparty source files + thirdparty_dir = "#thirdparty/icu4c/" + # Thirdparty source files + thirdparty_sources = [ + "common/appendable.cpp", + "common/bmpset.cpp", + "common/brkeng.cpp", + "common/brkiter.cpp", + "common/bytesinkutil.cpp", + "common/bytestream.cpp", + "common/bytestrie.cpp", + "common/bytestriebuilder.cpp", + "common/bytestrieiterator.cpp", + "common/caniter.cpp", + "common/characterproperties.cpp", + "common/chariter.cpp", + "common/charstr.cpp", + "common/cmemory.cpp", + "common/cstr.cpp", + "common/cstring.cpp", + "common/cwchar.cpp", + "common/dictbe.cpp", + "common/dictionarydata.cpp", + "common/dtintrv.cpp", + "common/edits.cpp", + "common/errorcode.cpp", + "common/filteredbrk.cpp", + "common/filterednormalizer2.cpp", + "common/icudataver.cpp", + "common/icuplug.cpp", + "common/loadednormalizer2impl.cpp", + "common/localebuilder.cpp", + "common/localematcher.cpp", + "common/localeprioritylist.cpp", + "common/locavailable.cpp", + "common/locbased.cpp", + "common/locdispnames.cpp", + "common/locdistance.cpp", + "common/locdspnm.cpp", + "common/locid.cpp", + "common/loclikely.cpp", + "common/loclikelysubtags.cpp", + "common/locmap.cpp", + "common/locresdata.cpp", + "common/locutil.cpp", + "common/lsr.cpp", + "common/messagepattern.cpp", + "common/normalizer2.cpp", + "common/normalizer2impl.cpp", + "common/normlzr.cpp", + "common/parsepos.cpp", + "common/patternprops.cpp", + "common/pluralmap.cpp", + "common/propname.cpp", + "common/propsvec.cpp", + "common/punycode.cpp", + "common/putil.cpp", + "common/rbbi.cpp", + "common/rbbi_cache.cpp", + "common/rbbidata.cpp", + "common/rbbinode.cpp", + "common/rbbirb.cpp", + "common/rbbiscan.cpp", + "common/rbbisetb.cpp", + "common/rbbistbl.cpp", + "common/rbbitblb.cpp", + "common/resbund.cpp", + "common/resbund_cnv.cpp", + "common/resource.cpp", + "common/restrace.cpp", + "common/ruleiter.cpp", + "common/schriter.cpp", + "common/serv.cpp", + "common/servlk.cpp", + "common/servlkf.cpp", + "common/servls.cpp", + "common/servnotf.cpp", + "common/servrbf.cpp", + "common/servslkf.cpp", + "common/sharedobject.cpp", + "common/simpleformatter.cpp", + "common/static_unicode_sets.cpp", + "common/stringpiece.cpp", + "common/stringtriebuilder.cpp", + "common/uarrsort.cpp", + "common/ubidi.cpp", + "common/ubidi_props.cpp", + "common/ubidiln.cpp", + "common/ubiditransform.cpp", + "common/ubidiwrt.cpp", + "common/ubrk.cpp", + "common/ucase.cpp", + "common/ucasemap.cpp", + "common/ucasemap_titlecase_brkiter.cpp", + "common/ucat.cpp", + "common/uchar.cpp", + "common/ucharstrie.cpp", + "common/ucharstriebuilder.cpp", + "common/ucharstrieiterator.cpp", + "common/uchriter.cpp", + "common/ucln_cmn.cpp", + "common/ucmndata.cpp", + "common/ucnv.cpp", + "common/ucnv2022.cpp", + "common/ucnv_bld.cpp", + "common/ucnv_cb.cpp", + "common/ucnv_cnv.cpp", + "common/ucnv_ct.cpp", + "common/ucnv_err.cpp", + "common/ucnv_ext.cpp", + "common/ucnv_io.cpp", + "common/ucnv_lmb.cpp", + "common/ucnv_set.cpp", + "common/ucnv_u16.cpp", + "common/ucnv_u32.cpp", + "common/ucnv_u7.cpp", + "common/ucnv_u8.cpp", + "common/ucnvbocu.cpp", + "common/ucnvdisp.cpp", + "common/ucnvhz.cpp", + "common/ucnvisci.cpp", + "common/ucnvlat1.cpp", + "common/ucnvmbcs.cpp", + "common/ucnvscsu.cpp", + "common/ucnvsel.cpp", + "common/ucol_swp.cpp", + "common/ucptrie.cpp", + "common/ucurr.cpp", + "common/udata.cpp", + "common/udatamem.cpp", + "common/udataswp.cpp", + "common/uenum.cpp", + "common/uhash.cpp", + "common/uhash_us.cpp", + "common/uidna.cpp", + "common/uinit.cpp", + "common/uinvchar.cpp", + "common/uiter.cpp", + "common/ulist.cpp", + "common/uloc.cpp", + "common/uloc_keytype.cpp", + "common/uloc_tag.cpp", + "common/umapfile.cpp", + "common/umath.cpp", + "common/umutablecptrie.cpp", + "common/umutex.cpp", + "common/unames.cpp", + "common/unifiedcache.cpp", + "common/unifilt.cpp", + "common/unifunct.cpp", + "common/uniset.cpp", + "common/uniset_closure.cpp", + "common/uniset_props.cpp", + "common/unisetspan.cpp", + "common/unistr.cpp", + "common/unistr_case.cpp", + "common/unistr_case_locale.cpp", + "common/unistr_cnv.cpp", + "common/unistr_props.cpp", + "common/unistr_titlecase_brkiter.cpp", + "common/unorm.cpp", + "common/unormcmp.cpp", + "common/uobject.cpp", + "common/uprops.cpp", + "common/ures_cnv.cpp", + "common/uresbund.cpp", + "common/uresdata.cpp", + "common/usc_impl.cpp", + "common/uscript.cpp", + "common/uscript_props.cpp", + "common/uset.cpp", + "common/uset_props.cpp", + "common/usetiter.cpp", + "common/ushape.cpp", + "common/usprep.cpp", + "common/ustack.cpp", + "common/ustr_cnv.cpp", + "common/ustr_titlecase_brkiter.cpp", + "common/ustr_wcs.cpp", + "common/ustrcase.cpp", + "common/ustrcase_locale.cpp", + "common/ustrenum.cpp", + "common/ustrfmt.cpp", + "common/ustring.cpp", + "common/ustrtrns.cpp", + "common/utext.cpp", + "common/utf_impl.cpp", + "common/util.cpp", + "common/util_props.cpp", + "common/utrace.cpp", + "common/utrie.cpp", + "common/utrie2.cpp", + "common/utrie2_builder.cpp", + "common/utrie_swap.cpp", + "common/uts46.cpp", + "common/utypes.cpp", + "common/uvector.cpp", + "common/uvectr32.cpp", + "common/uvectr64.cpp", + "common/wintz.cpp", + ] + thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources] + thirdparty_sources += ["icu_data/icudata_stub.cpp"] + + env_icu.Append(CPPPATH=["#thirdparty/icu4c/common/"]) + env_icu.Append( + CXXFLAGS=["-DU_STATIC_IMPLEMENTATION", "-DU_COMMON_IMPLEMENTATION", "-DPKGDATA_MODE=static",] + ) + + env_icu.disable_warnings() + env_thirdparty = env_icu.Clone() + env_thirdparty.disable_warnings() + lib = env_thirdparty.add_library("icu_builtin", thirdparty_sources) + + # Needs to be appended to arrive after libscene in the linker call, + # but we don't want it to arrive *after* system libs, so manual hack + # LIBS contains first SCons Library objects ("SCons.Node.FS.File object") + # and then plain strings for system library. We insert between the two. + inserted = False + for idx, linklib in enumerate(env["LIBS"]): + if isinstance(linklib, (str, bytes)): # first system lib such as "X11", otherwise SCons lib object + env["LIBS"].insert(idx, lib) + inserted = True + break + if not inserted: + env.Append(LIBS=[lib]) + +env_text_server_adv = env_modules.Clone() +env_text_server_adv.Append( + CPPPATH=[ + "#thirdparty/harfbuzz/src", + "#thirdparty/freetype/include", + "#thirdparty/graphite/include", + "#thirdparty/icu4c/common/", + ] +) +env_text_server_adv.add_source_files(env.modules_sources, "*.cpp") diff --git a/modules/text_server_adv/config.py b/modules/text_server_adv/config.py new file mode 100644 index 0000000000..1c8cd12a2d --- /dev/null +++ b/modules/text_server_adv/config.py @@ -0,0 +1,5 @@ +def can_build(env, platform): + return True + +def configure(env): + pass diff --git a/modules/text_server_adv/icu_data/icudata_stub.cpp b/modules/text_server_adv/icu_data/icudata_stub.cpp new file mode 100644 index 0000000000..13f0ac0c50 --- /dev/null +++ b/modules/text_server_adv/icu_data/icudata_stub.cpp @@ -0,0 +1,63 @@ +/*************************************************************************/ +/* icudata_stub.cpp */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2020 Juan Linietsky, Ariel Manzur. */ +/* Copyright (c) 2014-2020 Godot Engine contributors (cf. AUTHORS.md). */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ + +#include "unicode/udata.h" +#include "unicode/utypes.h" +#include "unicode/uversion.h" + +typedef struct { + uint16_t header_size; + uint8_t magic_1, magic_2; + UDataInfo info; + char padding[8]; + uint32_t count, reserved; + int fake_name_and_data[4]; +} ICU_data_header; + +extern "C" U_EXPORT const ICU_data_header U_ICUDATA_ENTRY_POINT = { + 32, + 0xDA, 0x27, + { sizeof(UDataInfo), + 0, +#if U_IS_BIG_ENDIAN + 1, +#else + 0, +#endif + U_CHARSET_FAMILY, + sizeof(UChar), + 0, + { 0x54, 0x6F, 0x43, 0x50 }, + { 1, 0, 0, 0 }, + { 0, 0, 0, 0 } }, + { 0, 0, 0, 0, 0, 0, 0, 0 }, + 0, 0, + { 0, 0, 0, 0 } +}; diff --git a/platform/android/detect.py b/platform/android/detect.py index 0accacb679..60d4146712 100644 --- a/platform/android/detect.py +++ b/platform/android/detect.py @@ -215,7 +215,7 @@ def configure(env): env.Append(CPPFLAGS=["-isystem", env["ANDROID_NDK_ROOT"] + "/sources/cxx-stl/llvm-libc++abi/include"]) # Disable exceptions and rtti on non-tools (template) builds - if env["tools"]: + if env["tools"] or env["builtin_icu"]: env.Append(CXXFLAGS=["-frtti"]) else: env.Append(CXXFLAGS=["-fno-rtti", "-fno-exceptions"]) diff --git a/platform/javascript/detect.py b/platform/javascript/detect.py index 71189cf697..9f584d0899 100644 --- a/platform/javascript/detect.py +++ b/platform/javascript/detect.py @@ -71,6 +71,8 @@ def configure(env): ) # Tools need more memory. Initial stack memory in bytes. See `src/settings.js` in emscripten repository (will be renamed to INITIAL_MEMORY). env.Append(LINKFLAGS=["-s", "TOTAL_MEMORY=33554432"]) + elif env["builtin_icu"]: + env.Append(CCFLAGS=["-frtti"]) else: # Disable exceptions and rtti on non-tools (template) builds # These flags help keep the file size down. diff --git a/platform/linuxbsd/detect.py b/platform/linuxbsd/detect.py index a8bc3a09f6..277aafc107 100644 --- a/platform/linuxbsd/detect.py +++ b/platform/linuxbsd/detect.py @@ -205,14 +205,31 @@ def configure(env): # freetype depends on libpng and zlib, so bundling one of them while keeping others # as shared libraries leads to weird issues - if env["builtin_freetype"] or env["builtin_libpng"] or env["builtin_zlib"]: + if ( + env["builtin_freetype"] + or env["builtin_libpng"] + or env["builtin_zlib"] + or env["builtin_graphite"] + or env["builtin_harfbuzz"] + ): env["builtin_freetype"] = True env["builtin_libpng"] = True env["builtin_zlib"] = True + env["builtin_graphite"] = True + env["builtin_harfbuzz"] = True if not env["builtin_freetype"]: env.ParseConfig("pkg-config freetype2 --cflags --libs") + if not env["builtin_graphite"]: + env.ParseConfig("pkg-config graphite2 --cflags --libs") + + if not env["builtin_icu"]: + env.ParseConfig("pkg-config icu-uc --cflags --libs") + + if not env["builtin_harfbuzz"]: + env.ParseConfig("pkg-config harfbuzz harfbuzz-icu --cflags --libs") + if not env["builtin_libpng"]: env.ParseConfig("pkg-config libpng16 --cflags --libs") diff --git a/platform/server/detect.py b/platform/server/detect.py index d9ac357679..bf4744a234 100644 --- a/platform/server/detect.py +++ b/platform/server/detect.py @@ -138,14 +138,31 @@ def configure(env): # freetype depends on libpng and zlib, so bundling one of them while keeping others # as shared libraries leads to weird issues - if env["builtin_freetype"] or env["builtin_libpng"] or env["builtin_zlib"]: + if ( + env["builtin_freetype"] + or env["builtin_libpng"] + or env["builtin_zlib"] + or env["builtin_graphite"] + or env["builtin_harfbuzz"] + ): env["builtin_freetype"] = True env["builtin_libpng"] = True env["builtin_zlib"] = True + env["builtin_graphite"] = True + env["builtin_harfbuzz"] = True if not env["builtin_freetype"]: env.ParseConfig("pkg-config freetype2 --cflags --libs") + if not env["builtin_graphite"]: + env.ParseConfig("pkg-config graphite2 --cflags --libs") + + if not env["builtin_icu"]: + env.ParseConfig("pkg-config icu-uc --cflags --libs") + + if not env["builtin_harfbuzz"]: + env.ParseConfig("pkg-config harfbuzz harfbuzz-icu --cflags --libs") + if not env["builtin_libpng"]: env.ParseConfig("pkg-config libpng16 --cflags --libs") @@ -233,7 +250,17 @@ def configure(env): env.Append(CPPDEFINES=["SERVER_ENABLED", "UNIX_ENABLED"]) if platform.system() == "Darwin": - env.Append(LINKFLAGS=["-framework", "Cocoa", "-framework", "Carbon", "-lz", "-framework", "IOKit"]) + env.Append( + LINKFLAGS=[ + "-framework", + "Cocoa", + "-framework", + "Carbon", + "-lz", + "-framework", + "IOKit", + ] + ) env.Append(LIBS=["pthread"]) diff --git a/thirdparty/README.md b/thirdparty/README.md index 37518857c7..d011832210 100644 --- a/thirdparty/README.md +++ b/thirdparty/README.md @@ -184,6 +184,39 @@ Files extracted from upstream source: Patches in the `patches` directory should be re-applied after updates. +## Graphite engine + +- Upstream: https://github.com/silnrsi/graphite +- Version: 1.3.14 +- License: MPL-2.0 + +Files extracted from upstream source: +- the `include` folder +- the `src` folder +- `COPYING`, `ChangeLog` + +## HarfBuzz + +- Upstream: https://github.com/harfbuzz/harfbuzz +- Version: 2.7.2 +- License: HarfBuzz + +Files extracted from upstream source: +- the `src` folder +- `AUTHORS`, `COPYING`, `NEWS`, `THANKS` + +## International Components for Unicode + +- Upstream: https://github.com/unicode-org/icu +- Version: 68.1 +- License: Unicode + +Files extracted from upstream source: +- the `common` folder +- `APIChangeReport.md`, `LICENSE` + +Files generated from upstream source: +- the `icudt68l.dat` built with the provided `godot_data.json` config file (see https://github.com/unicode-org/icu/blob/master/docs/userguide/icu_data/buildtool.md for instructions) ## jpeg-compressor diff --git a/thirdparty/graphite/COPYING b/thirdparty/graphite/COPYING new file mode 100644 index 0000000000..f6630af533 --- /dev/null +++ b/thirdparty/graphite/COPYING @@ -0,0 +1,26 @@ +/* GRAPHITE2 LICENSING + + Copyright 2010, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + + Alternatively, you may use this library under the terms of the Mozilla + Public License (http://mozilla.org/MPL) or under the GNU General Public + License, as published by the Free Sofware Foundation; either version + 2 of the license or (at your option) any later version. +*/ diff --git a/thirdparty/graphite/ChangeLog b/thirdparty/graphite/ChangeLog new file mode 100644 index 0000000000..e36110e1c1 --- /dev/null +++ b/thirdparty/graphite/ChangeLog @@ -0,0 +1,238 @@ +1.3.14 + . Bug fixes + . Allow features to be hidden (for aliases) + . Move to python3 + . Rename doc files from .txt to .asc + +1.3.13 + . Resolve minor spacing issue in rtl non-overlap kerning + . python3 for graphite.py + . Better fuzzing + . Better building on windows + +1.3.12 + . Graphite no longer does dumb rendering for fonts with no smarts + . Segment caching code removed. Anything attempting to use the segment cache gets given a regular face instead + . Add libfuzzer support + . Builds now require C++11 + . Improvements to Windows 64 bit builds + . Support different versions of python including 32 bit and python 3 + . Various minor bug fixes + +1.3.11 + . Fixes due to security review + . Minor collision avoidance fixes + . Fix LZ4 decompressor against high compression + +1.3.10 + . Address floating point build parameters to give consistent positioning results across platforms + . Various bug fixes + +1.3.9 + . Add Collision COLL_ISSPACE to allow for visible spaces in collision avoidance + . Add segment and pass direction information to tracing output + . Bug fix rule length testing in 32-bit + . Increase slanted margin distances for collision avoidance + . Change kerning algorithm to simple outline expansion. Seems to make no visible difference. + . Add trace2svg to test tools + +1.3.8 + . Various bug fixes arising from fuzzing + . Fix regression that stopped piglatin from working + . Make collision avoidance kerning give more regular results + . Minor modification to clustering algorithm to handle variable width chars + +1.3.7 + . Bug fixes + . Start to deprecate SegCache. This will be going away in a later release. + +1.3.6 + . Bug fixes + +1.3.5 + . Bug fixes + . Security bug fix + . Fix ARM misalignment problem + . Track latest cmake + +1.3.4 + . Transition from Mercurial to Git + . Bug fixes + . Fix Collision Kerning ignoring some diacritics + . Handle pass bits 16-31 to speed up fonts with > 16 passes + . Various minor fuzz bug fixes + . Make Coverity happy + . Add GR_FALLTHROUGH macro for clang c++11 + +1.3.3 + . Slight speed up in Collision Avoidance + . Remove dead bidi code + . Bug fixes + . Between pass bidi reorderings and at the end + . Decompressor fuzz bugs + . Other fuzz bugs + +1.3.2 + . Remove full bidi. All segments are assumed to be single directioned. + . Bug fixes: + . Decompressor corner cases + . Various fuzz bugs + +1.3.1 + . Deprecation warning: Full bidi support is about to be deprecated. Make contact + if this impacts you. + . Change compression block format slightly to conform to LZ4 + . Bug fixes: + . Handle mono direction text with diacritics consistently. Fonts + now see the direction they expect consistently and bidi now + gives expected results. + . Fixed lots of fuzz bugs + . Coverity cleanups + . Build now works for clang and/or asan and/or afl etc. + +1.3.0 + . Add collision avoidance + . Shift Collider + . Kern Collider + . Octabox outlines and subboxes + . Add compressed Silf and Glat table support + . Bug fixes: + . Stop loops forming in the child, sibling tree + . Handle bidi mirroring correctly if no bidi occurring + +1.2.4 + . Face failure now has error code reporting via debug logging + . can now call gr_start_logging(NULL, fname) + . gr2fonttest --alltrace added + . Format 14 table support + . Not done. To be handled entirely in the compiler + . Bidi support for Unicode 6.3 Isolating direction controls + . Fonts no longer require a glyf/loca table. In such cases the bounding box is always 0. + . Clang ASAN build support added for testing. + . Handle out of memory sanely. + . Documentation improvements + . Bug fixes: + . Enforce fonts having to store glyph attributes by monotonically increasing attribute number + . zeropadding was not getting called on feature tags + . automatic associations for unassociated characters + . use direct engine on Mac + . various extreme case reading 1 past the end errors fixed + . remove tabs from sources so that it becomes readable again + +1.2.3 + . Bug fixes only: + . fix byte swapping when testing cmap subtable lengths + . work around armel compilation problems with conditional operators + . fix pseudoglyph support for advance and bbox + +1.2.2 + . Add support for passKeySlot (makes Charis 2x faster) up to 32 passes + . Add telemetry output to json if enabled in build GRAPHITE2_TELEMETRY + . Shrink font memory footprint particularly in the fsm + . Add -S to comparerenderer + . Bug fixes: + . Fix shift.x being reversed for rtl text + . Fix faulty fallback justification + . Fix bad cmap handling + . Support compiling on old Solaris where bidi attributes clash with register names + . Follow the crowd in using Windows.h + +1.2.1 + . Bug fixes: + . Allow glyph reattachment + . Allow signed glyph attributes + . Various build problems with MacOS, old gcc versions, etc. + . Various overrun read errors fixed + +1.2.0 + . API Changes: + . Added Windows friendly gr_start_logging and gr_stop_logging, now per face + . Added gr_make_face_with_ops, gr_make_face_with_seg_cache_and_ops + . Added gr_make_font_with_ops + . Added gr_face_is_char_supported + . Added gr_face_info to give info to apps about face capabilities + . Deprecated gr_make_face, gr_make_face_with_seg_cache, gr_make_font_with_advance_fn + . Deprecated graphite_start_logging and graphite_stop_logging + . These functions are stubbed now and do nothing, but do compile and link. + . Bump API version to 3 + . Add C# wrapper to contrib + . Handle justification information in a font and do something useful with it + . Builds clang clean (has done for a while) + . Bug fixes + . Windows build and bug fixes + . Add extra information to json debug output + . Added windows build documentation + . Added freetype sample code and test + +1.1.3 + . Default build has GRAPHITE2_COMPARE_RENDERER to OFF to reduce dependencies + . Builds on Mac with clang + . Debug output improvements + . Tidy up perl wrappers + . Fuzz tester improvements + . Various bug fixes for bad font handling + +1.1.2 + . Support feature ids < 4 chars when space padded for inclusion in FF 14. + . More fuzztesting and removal of causes of valgrind bad reads and sigabrts + . Remove contrib/android into its own repo (http://hg.palaso.org/grandroid) + . Update comparerenderer to latest harfbuzzng api + +1.1.1 + . Missing Log.h included + . perl wrappers updated + +1.1.0 + . Refactored debug output to use json + . Renamed VM_MACHINE_TYPE to GRAPHITE2_VM_TYPE + . Renamed DISABLE_SEGCACHE to GRAPHITE2_NSEGCACE + . Renamed DISBALE_FILE_FACE to GRAPHITE2_NFILEFACE + . Renamed ENABLE_COMPARE_RENDERER to GRAPHTIE2_COMPARE_RENDERER + . Renamed DOXYGEN_CONFIG to GRAPHITE2_DOXYGEN_CONFIG + . Renamed GR2_CUSTOM_HEADER to GRAPHITE2_CUSTOM_HEADER + . Renamed GR2_EXPORTING to GRAPHITE2_EXPORTING + . Added GRAPHITE2_STATIC for static only builds + . Added GRAPHITE2_NTRACING to compile out tracing code + . Documented GRAPHITE2_{EXPORTING,STATIC,NTRACING} in hacking.txt + . Bump libtool version to 2.1.0 + . dumb font rendering works + . slot user attributes are now signed rather than unsigned + . add support for long class maps + . Rename perl library to avoid nameclash on Windows + . Various robustness fixes + . Moved internal .h files into src/inc + . Parallelise fuzztest + . General build improvements, particularly on Windows + +1.0.3 + . Fix UTF16 surrogate support + . script and lang tags may be space padded or null padded + . Remove need for WORDS_BIGENDIAN, do it all automatically + . Remove all #include <new>. Use CLASS_NEW_DELETE instead. + . Fix comparerenderer to work with current hbng + . Add valgrind to fuzztest to ensure good memory use at all times + . Fix new fuzztest exposed bugs. + . Fix bugs exposed by Mozilla security review + . Add continuous integration build on Windows support + +1.0.2 + . Fix Windows build + . Comparerenderer uses hbng enforcing ot rendering + . Add Bidi .hasChar support and refactor mirroring code + . Make cmake default Release rather than debug + . Don't compile in a boat load of TtfUtil that isn't used, saving 15% of binary + . Chase the FSF around its latest office moves + . WORDS_BIGENDIAN is set at the top so tests now pass on ppc, etc. + . More words in the manual + +1.0.1 + . Release is the default build in cmake now. + . Refactor cmake build to not rebuild things so much. + . Include a missing file + . Remove -nostdlibs, making gcc happy everywhere + . Update comparerenderer to latest hbng interface + . Add changelog + +1.0.0 + . First major release of perfect code! + diff --git a/thirdparty/graphite/include/graphite2/Font.h b/thirdparty/graphite/include/graphite2/Font.h new file mode 100644 index 0000000000..fe569295a5 --- /dev/null +++ b/thirdparty/graphite/include/graphite2/Font.h @@ -0,0 +1,389 @@ +/* GRAPHITE2 LICENSING + + Copyright 2010, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + + Alternatively, the contents of this file may be used under the terms + of the Mozilla Public License (http://mozilla.org/MPL) or the GNU + General Public License, as published by the Free Software Foundation, + either version 2 of the License or (at your option) any later version. +*/ +#pragma once + +#include "graphite2/Types.h" + +#define GR2_VERSION_MAJOR 1 +#define GR2_VERSION_MINOR 3 +#define GR2_VERSION_BUGFIX 14 + +#ifdef __cplusplus +extern "C" +{ +#endif + +typedef struct gr_face gr_face; +typedef struct gr_font gr_font; +typedef struct gr_feature_ref gr_feature_ref; +typedef struct gr_feature_val gr_feature_val; + +/** +* Returns version information on this engine +*/ +GR2_API void gr_engine_version(int *nMajor, int *nMinor, int *nBugFix); + +/** +* The Face Options allow the application to require that certain tables are +* read during face construction. This may be of concern if the appFaceHandle +* used in the gr_get_table_fn may change. +* The values can be combined +*/ +enum gr_face_options { + /** No preload, no cmap caching, fail if the graphite tables are invalid */ + gr_face_default = 0, + /** Dumb rendering will be enabled if the graphite tables are invalid. @deprecated Since 1.311 */ + gr_face_dumbRendering = 1, + /** preload glyphs at construction time */ + gr_face_preloadGlyphs = 2, + /** Cache the lookup from code point to glyph ID at construction time */ + gr_face_cacheCmap = 4, + /** Preload everything */ + gr_face_preloadAll = gr_face_preloadGlyphs | gr_face_cacheCmap +}; + +/** Holds information about a particular Graphite silf table that has been loaded */ +struct gr_faceinfo { + gr_uint16 extra_ascent; /**< The extra_ascent in the GDL, in design units */ + gr_uint16 extra_descent; /**< The extra_descent in the GDL, in design units */ + gr_uint16 upem; /**< The design units for the font */ + enum gr_space_contextuals { + gr_space_unknown = 0, /**< no information is known. */ + gr_space_none = 1, /**< the space character never occurs in any rules. */ + gr_space_left_only = 2, /**< the space character only occurs as the first element in a rule. */ + gr_space_right_only = 3, /**< the space character only occurs as the last element in a rule. */ + gr_space_either_only = 4, /**< the space character only occurs as the only element in a rule. */ + gr_space_both = 5, /**< the space character may occur as the first or last element of a rule. */ + gr_space_cross = 6 /**< the space character occurs in a rule not as a first or last element. */ + } space_contextuals; + unsigned int has_bidi_pass : 1; /**< the table specifies that a bidirectional pass should run */ + unsigned int line_ends : 1; /**< there are line end contextuals somewhere */ + unsigned int justifies : 1; /**< there are .justify properties set somewhere on some glyphs */ +}; + +typedef struct gr_faceinfo gr_faceinfo; + +/** type describing function to retrieve font table information + * + * @return a pointer to the table in memory. The pointed to memory must exist as + * long as the gr_face which makes the call. + * @param appFaceHandle is the unique information passed to gr_make_face() + * @param name is a 32bit tag to the table name. + * @param len returned by this function to say how long the table is in memory. + */ +typedef const void *(*gr_get_table_fn)(const void* appFaceHandle, unsigned int name, size_t *len); + +/** type describing function to release any resources allocated by the above get_table table function + * + * @param appFaceHandle is the unique information passed to gr_make_face() + * @param pointer to table memory returned by get_table. + */ +typedef void (*gr_release_table_fn)(const void* appFaceHandle, const void *table_buffer); + +/** struct housing function pointers to manage font table buffers for the graphite engine. */ +struct gr_face_ops +{ + /** size in bytes of this structure */ + size_t size; + /** a pointer to a function to request a table from the client. */ + gr_get_table_fn get_table; + /** is a pointer to a function to notify the client the a table can be released. + * This can be NULL to signify that the client does not wish to do any release handling. */ + gr_release_table_fn release_table; +}; +typedef struct gr_face_ops gr_face_ops; + +/** Create a gr_face object given application information and a table functions. + * + * @return gr_face or NULL if the font fails to load for some reason. + * @param appFaceHandle This is application specific information that is passed + * to the getTable function. The appFaceHandle must stay + * alive as long as the gr_face is alive. + * @param face_ops Pointer to face specific callback structure for table + * management. Must stay alive for the duration of the + * call only. + * @param faceOptions Bitfield describing various options. See enum gr_face_options for details. + */ +GR2_API gr_face* gr_make_face_with_ops(const void* appFaceHandle/*non-NULL*/, const gr_face_ops *face_ops, unsigned int faceOptions); + +/** @deprecated Since v1.2.0 in favour of gr_make_face_with_ops. + * Create a gr_face object given application information and a getTable function. + * + * @return gr_face or NULL if the font fails to load for some reason. + * @param appFaceHandle This is application specific information that is passed + * to the getTable function. The appFaceHandle must stay + * alive as long as the gr_face is alive. + * @param getTable Callback function to get table data. + * @param faceOptions Bitfield describing various options. See enum gr_face_options for details. + */ +GR2_DEPRECATED_API gr_face* gr_make_face(const void* appFaceHandle/*non-NULL*/, gr_get_table_fn getTable, unsigned int faceOptions); + +/** @deprecated Since 1.3.7 this function is now an alias for gr_make_face_with_ops(). + * + * Create a gr_face object given application information, with subsegmental caching support + * + * @return gr_face or NULL if the font fails to load. + * @param appFaceHandle is a pointer to application specific information that is passed to getTable. + * This may not be NULL and must stay alive as long as the gr_face is alive. + * @param face_ops Pointer to face specific callback structure for table management. Must stay + * alive for the duration of the call only. + * @param segCacheMaxSize Unused. + * @param faceOptions Bitfield of values from enum gr_face_options + */ +GR2_DEPRECATED_API gr_face* gr_make_face_with_seg_cache_and_ops(const void* appFaceHandle, const gr_face_ops *face_ops, unsigned int segCacheMaxSize, unsigned int faceOptions); + +/** @deprecated Since 1.3.7 this function is now an alias for gr_make_face(). + * + * Create a gr_face object given application information, with subsegmental caching support. + * This function is deprecated as of v1.2.0 in favour of gr_make_face_with_seg_cache_and_ops. + * + * @return gr_face or NULL if the font fails to load. + * @param appFaceHandle is a pointer to application specific information that is passed to getTable. + * This may not be NULL and must stay alive as long as the gr_face is alive. + * @param getTable The function graphite calls to access font table data + * @param segCacheMaxSize How large the segment cache is. + * @param faceOptions Bitfield of values from enum gr_face_options + */ +GR2_DEPRECATED_API gr_face* gr_make_face_with_seg_cache(const void* appFaceHandle, gr_get_table_fn getTable, unsigned int segCacheMaxSize, unsigned int faceOptions); + +/** Convert a tag in a string into a gr_uint32 + * + * @return gr_uint32 tag, zero padded + * @param str a nul terminated string of which at most the first 4 characters are read + */ +GR2_API gr_uint32 gr_str_to_tag(const char *str); + +/** Convert a gr_uint32 tag into a string + * + * @param tag contains the tag to convert + * @param str is a pointer to a char array of at least size 4 bytes. The first 4 bytes of this array + * will be overwritten by this function. No nul is appended. + */ +GR2_API void gr_tag_to_str(gr_uint32 tag, char *str); + +/** Get feature values for a given language or default + * + * @return a copy of the default feature values for a given language. The application must call + * gr_featureval_destroy() to free this object when done. + * @param pFace The font face to get feature values from + * @param langname The language tag to get feature values for. If there is no such language or + * langname is 0, the default feature values for the font are returned. + * langname is right 0 padded and assumes lowercase. Thus the en langauge + * would be 0x656E0000. Langname may also be space padded, thus 0x656E2020. + */ +GR2_API gr_feature_val* gr_face_featureval_for_lang(const gr_face* pFace, gr_uint32 langname); + +/** Get feature reference for a given feature id from a face + * + * @return a feature reference corresponding to the given id. This data is part of the gr_face and + * will be freed when the face is destroyed. + * @param pFace Font face to get information on. + * @param featId Feature id tag to get reference to. + */ +GR2_API const gr_feature_ref* gr_face_find_fref(const gr_face* pFace, gr_uint32 featId); + +/** Returns number of feature references in a face **/ +GR2_API gr_uint16 gr_face_n_fref(const gr_face* pFace); + +/** Returns feature reference at given index in face **/ +GR2_API const gr_feature_ref* gr_face_fref(const gr_face* pFace, gr_uint16 i); + +/** Return number of languages the face knows about **/ +GR2_API unsigned short gr_face_n_languages(const gr_face* pFace); + +/** Returns a language id corresponding to a language of given index in the face **/ +GR2_API gr_uint32 gr_face_lang_by_index(const gr_face* pFace, gr_uint16 i); + +/** Destroy the given face and free its memory **/ +GR2_API void gr_face_destroy(gr_face *face); + +/** Returns the number of glyphs in the face **/ +GR2_API unsigned short gr_face_n_glyphs(const gr_face* pFace); + +/** Returns a faceinfo for the face and script **/ +GR2_API const gr_faceinfo *gr_face_info(const gr_face *pFace, gr_uint32 script); + +/** Returns whether the font supports a given Unicode character + * + * @return true if the character is supported. + * @param pFace face to test within + * @param usv Unicode Scalar Value of character to test + * @param script Tag of script for selecting which set of pseudo glyphs to test. May be NULL. + */ +GR2_API int gr_face_is_char_supported(const gr_face *pFace, gr_uint32 usv, gr_uint32 script); + +#ifndef GRAPHITE2_NFILEFACE +/** Create gr_face from a font file + * + * @return gr_face that accesses a font file directly. Returns NULL on failure. + * @param filename Full path and filename to font file + * @param faceOptions Bitfile from enum gr_face_options to control face options. + */ +GR2_API gr_face* gr_make_file_face(const char *filename, unsigned int faceOptions); + +/** @deprecated Since 1.3.7. This function is now an alias for gr_make_file_face(). + * + * Create gr_face from a font file, with subsegment caching support. + * + * @return gr_face that accesses a font file directly. Returns NULL on failure. + * @param filename Full path and filename to font file + * @param segCacheMaxSize Specifies how big to make the cache in segments. + * @param faceOptions Bitfield from enum gr_face_options to control face options. + */ +GR2_DEPRECATED_API gr_face* gr_make_file_face_with_seg_cache(const char *filename, unsigned int segCacheMaxSize, unsigned int faceOptions); +#endif // !GRAPHITE2_NFILEFACE + +/** Create a font from a face + * + * @return gr_font Call font_destroy to free this font + * @param ppm Resolution of the font in pixels per em + * @param face Face this font corresponds to. This must stay alive as long as the font is alive. + */ +GR2_API gr_font* gr_make_font(float ppm, const gr_face *face); + +/** query function to find the hinted advance of a glyph + * + * @param appFontHandle is the unique information passed to gr_make_font_with_advance() + * @param glyphid is the glyph to retireve the hinted advance for. + */ +typedef float (*gr_advance_fn)(const void* appFontHandle, gr_uint16 glyphid); + +/** struct housing function pointers to manage font hinted metrics for the + * graphite engine. */ +struct gr_font_ops +{ + /** size of the structure in bytes to allow for future extensibility */ + size_t size; + /** a pointer to a function to retrieve the hinted + * advance width of a glyph which the font cannot + * provide without client assistance. This can be + * NULL to signify no horizontal hinted metrics are necessary. */ + gr_advance_fn glyph_advance_x; + /** a pointer to a function to retrieve the hinted + * advance height of a glyph which the font cannot + * provide without client assistance. This can be + * NULL to signify no horizontal hinted metrics are necessary. */ + gr_advance_fn glyph_advance_y; +}; +typedef struct gr_font_ops gr_font_ops; + +/** Creates a font with hinted advance width query functions + * + * @return gr_font to be destroyed via font_destroy + * @param ppm size of font in pixels per em + * @param appFontHandle font specific information that must stay alive as long + * as the font does + * @param font_ops pointer font specific callback structure for hinted metrics. + * Need only stay alive for the duration of the call. + * @param face the face this font corresponds to. Must stay alive as long as + * the font does. + */ +GR2_API gr_font* gr_make_font_with_ops(float ppm, const void* appFontHandle, const gr_font_ops * font_ops, const gr_face *face); + +/** Creates a font with hinted advance width query function. + * This function is deprecated. Use gr_make_font_with_ops instead. + * + * @return gr_font to be destroyed via font_destroy + * @param ppm size of font in pixels per em + * @param appFontHandle font specific information that must stay alive as long + * as the font does + * @param getAdvance callback function reference that returns horizontal advance in pixels for a glyph. + * @param face the face this font corresponds to. Must stay alive as long as + * the font does. + */ +GR2_API gr_font* gr_make_font_with_advance_fn(float ppm, const void* appFontHandle, gr_advance_fn getAdvance, const gr_face *face); + +/** Free a font **/ +GR2_API void gr_font_destroy(gr_font *font); + +/** get a feature value + * + * @return value of specific feature or 0 if any problems. + * @param pfeatureref gr_feature_ref to the feature + * @param feats gr_feature_val containing all the values + */ +GR2_API gr_uint16 gr_fref_feature_value(const gr_feature_ref* pfeatureref, const gr_feature_val* feats); + +/** set a feature value + * + * @return false if there were any problems (value out of range, etc.) + * @param pfeatureref gr_feature_ref to the feature + * @param val value to set the feature to + * @param pDest the gr_feature_val containing all the values for all the features + */ +GR2_API int gr_fref_set_feature_value(const gr_feature_ref* pfeatureref, gr_uint16 val, gr_feature_val* pDest); + +/** Returns the id tag for a gr_feature_ref **/ +GR2_API gr_uint32 gr_fref_id(const gr_feature_ref* pfeatureref); + +/** Returns number of values a feature may take, given a gr_feature_ref **/ +GR2_API gr_uint16 gr_fref_n_values(const gr_feature_ref* pfeatureref); + +/** Returns the value associated with a particular value in a feature + * + * @return value + * @param pfeatureref gr_feature_ref of the feature of interest + * @param settingno Index up to the return value of gr_fref_n_values() of the value + */ +GR2_API gr_int16 gr_fref_value(const gr_feature_ref* pfeatureref, gr_uint16 settingno); + +/** Returns a string of the UI name of a feature + * + * @return string of the UI name, in the encoding form requested. Call gr_label_destroy() after use. + * @param pfeatureref gr_feature_ref of the feature + * @param langId This is a pointer since the face may not support a string in the requested + * language. The actual language of the string is returned in langId + * @param utf Encoding form for the string + * @param length Used to return the length of the string returned in bytes. + */ +GR2_API void* gr_fref_label(const gr_feature_ref* pfeatureref, gr_uint16 *langId, enum gr_encform utf, gr_uint32 *length); + +/** Return a UI string for a possible value of a feature + * + * @return string of the UI name, in the encoding form requested. nul terminated. Call gr_label_destroy() + * after use. + * @param pfeatureref gr_feature_ref of the feature + * @param settingno Value setting index + * @param langId This is a pointer to the requested language. The requested language id is + * replaced by the actual language id of the string returned. + * @param utf Encoding form for the string + * @param length Returns the length of the string returned in bytes. + */ +GR2_API void* gr_fref_value_label(const gr_feature_ref* pfeatureref, gr_uint16 settingno/*rather than a value*/, gr_uint16 *langId, enum gr_encform utf, gr_uint32 *length); + +/** Destroy a previously returned label string **/ +GR2_API void gr_label_destroy(void * label); + +/** Copies a gr_feature_val **/ +GR2_API gr_feature_val* gr_featureval_clone(const gr_feature_val* pfeatures); + +/** Destroys a gr_feature_val **/ +GR2_API void gr_featureval_destroy(gr_feature_val *pfeatures); + +#ifdef __cplusplus +} +#endif diff --git a/thirdparty/graphite/include/graphite2/Log.h b/thirdparty/graphite/include/graphite2/Log.h new file mode 100644 index 0000000000..a5a6947fab --- /dev/null +++ b/thirdparty/graphite/include/graphite2/Log.h @@ -0,0 +1,85 @@ +/* GRAPHITE2 LICENSING + + Copyright 2010, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + + Alternatively, the contents of this file may be used under the terms + of the Mozilla Public License (http://mozilla.org/MPL) or the GNU + General Public License, as published by the Free Software Foundation, + either version 2 of the License or (at your option) any later version. +*/ +#pragma once + +#include <graphite2/Types.h> +#include <graphite2/Font.h> +#include <stdio.h> + +#ifdef __cplusplus +extern "C" +{ +#endif + +/** deprecated mechanism that doesn't do anything now. */ +typedef enum { + GRLOG_NONE = 0x0, + GRLOG_FACE = 0x01, + GRLOG_SEGMENT = 0x02, + GRLOG_PASS = 0x04, + GRLOG_CACHE = 0x08, + + GRLOG_OPCODE = 0x80, + GRLOG_ALL = 0xFF +} GrLogMask; + +/** Start logging all segment creation and updates on the provided face. This + * is logged to a JSON file, see "Segment JSON Schema.txt" for a precise + * definition of the file + * + * @return true if the file was successfully created and logging is correctly + * initialised. + * @param face the gr_face whose segments you want to log to the given file + * @param log_path a utf8 encoded file name and path to log to. + */ +GR2_API bool gr_start_logging(gr_face * face, const char *log_path); + + +/** Stop logging on the given face. This will close the log file created by + * gr_start_logging. + * + * @param face the gr_face whose segments you want to stop logging + */ +GR2_API void gr_stop_logging(gr_face * face); + +/** Start logging to a FILE object. + * This function is deprecated as of 1.2.0, use the _face versions instead. + * + * @return True on success + * @param logfile FILE reference to output logging to + * @param mask What aspects of logging to report (ignored) + */ +GR2_API bool graphite_start_logging(FILE * logFile, GrLogMask mask); //may not do anthing if disabled in the implementation of the engine. + +/** Stop logging to a FILE object. + * This function is deprecated as of 1.2.0, use the _face versions instead. + */ +GR2_API void graphite_stop_logging(); + +#ifdef __cplusplus +} +#endif diff --git a/thirdparty/graphite/include/graphite2/Segment.h b/thirdparty/graphite/include/graphite2/Segment.h new file mode 100644 index 0000000000..0e24f5d795 --- /dev/null +++ b/thirdparty/graphite/include/graphite2/Segment.h @@ -0,0 +1,461 @@ +/* GRAPHITE2 LICENSING + + Copyright 2010, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + + Alternatively, the contents of this file may be used under the terms + of the Mozilla Public License (http://mozilla.org/MPL) or the GNU + General Public License, as published by the Free Software Foundation, + either version 2 of the License or (at your option) any later version. +*/ +#pragma once + +#include "graphite2/Types.h" +#include "graphite2/Font.h" + +#ifdef __cplusplus +extern "C" +{ +#endif + +enum gr_break_weight { + gr_breakNone = 0, + /* after break weights */ + gr_breakWhitespace = 10, + gr_breakWord = 15, + gr_breakIntra = 20, + gr_breakLetter = 30, + gr_breakClip = 40, + /* before break weights */ + gr_breakBeforeWhitespace = -10, + gr_breakBeforeWord = -15, + gr_breakBeforeIntra = -20, + gr_breakBeforeLetter = -30, + gr_breakBeforeClip = -40 +}; + +enum gr_justFlags { + /// Indicates that this segment is a complete line + gr_justCompleteLine = 0, + /// Indicates that the start of the slot list is not at the start of a line + gr_justStartInline = 1, + /// Indicates that the end of the slot list is not at the end of a line + gr_justEndInline = 2 +}; + +/** Used for looking up slot attributes. Most are already available in other functions **/ +enum gr_attrCode { + /// adjusted glyph advance in x direction in design units + gr_slatAdvX = 0, + /// adjusted glyph advance in y direction (usually 0) in design units + gr_slatAdvY, + /// returns 0. Deprecated. + gr_slatAttTo, + /// This slot attaches to its parent at the given design units in the x direction + gr_slatAttX, + /// This slot attaches to its parent at the given design units in the y direction + gr_slatAttY, + /// This slot attaches to its parent at the given glyph point (not implemented) + gr_slatAttGpt, + /// x-direction adjustment from the given glyph point (not implemented) + gr_slatAttXOff, + /// y-direction adjustment from the given glyph point (not implemented) + gr_slatAttYOff, + /// Where on this glyph should align with the attachment point on the parent glyph in the x-direction. + gr_slatAttWithX, + /// Where on this glyph should align with the attachment point on the parent glyph in the y-direction + gr_slatAttWithY, + /// Which glyph point on this glyph should align with the attachment point on the parent glyph (not implemented). + gr_slatWithGpt, + /// Adjustment to gr_slatWithGpt in x-direction (not implemented) + gr_slatAttWithXOff, + /// Adjustment to gr_slatWithGpt in y-direction (not implemented) + gr_slatAttWithYOff, + /// Attach at given nesting level (not implemented) + gr_slatAttLevel, + /// Line break breakweight for this glyph + gr_slatBreak, + /// Ligature component reference (not implemented) + gr_slatCompRef, + /// bidi directionality of this glyph (not implemented) + gr_slatDir, + /// Whether insertion is allowed before this glyph + gr_slatInsert, + /// Final positioned position of this glyph relative to its parent in x-direction in pixels + gr_slatPosX, + /// Final positioned position of this glyph relative to its parent in y-direction in pixels + gr_slatPosY, + /// Amount to shift glyph by in x-direction design units + gr_slatShiftX, + /// Amount to shift glyph by in y-direction design units + gr_slatShiftY, + /// attribute user1 + gr_slatUserDefnV1, + /// not implemented + gr_slatMeasureSol, + /// not implemented + gr_slatMeasureEol, + /// Amount this slot can stretch (not implemented) + gr_slatJStretch, + /// Amount this slot can shrink (not implemented) + gr_slatJShrink, + /// Granularity by which this slot can stretch or shrink (not implemented) + gr_slatJStep, + /// Justification weight for this glyph (not implemented) + gr_slatJWeight, + /// Amount this slot mush shrink or stretch in design units + gr_slatJWidth = 29, + /// SubSegment split point + gr_slatSegSplit = gr_slatJStretch + 29, + /// User defined attribute, see subattr for user attr number + gr_slatUserDefn, + /// Bidi level + gr_slatBidiLevel = 56, + /// Collision flags + gr_slatColFlags, + /// Collision constraint rectangle left (bl.x) + gr_slatColLimitblx, + /// Collision constraint rectangle lower (bl.y) + gr_slatColLimitbly, + /// Collision constraint rectangle right (tr.x) + gr_slatColLimittrx, + /// Collision constraint rectangle upper (tr.y) + gr_slatColLimittry, + /// Collision shift x + gr_slatColShiftx, + /// Collision shift y + gr_slatColShifty, + /// Collision margin + gr_slatColMargin, + /// Margin cost weight + gr_slatColMarginWt, + // Additional glyph that excludes movement near this one: + gr_slatColExclGlyph, + gr_slatColExclOffx, + gr_slatColExclOffy, + // Collision sequence enforcing attributes: + gr_slatSeqClass, + gr_slatSeqProxClass, + gr_slatSeqOrder, + gr_slatSeqAboveXoff, + gr_slatSeqAboveWt, + gr_slatSeqBelowXlim, + gr_slatSeqBelowWt, + gr_slatSeqValignHt, + gr_slatSeqValignWt, + + /// not implemented + gr_slatMax, + /// not implemented + gr_slatNoEffect = gr_slatMax + 1 +}; + +enum gr_bidirtl { + /// Underlying paragraph direction is RTL + gr_rtl = 1, + /// Set this to not run the bidi pass internally, even if the font asks for it. + /// This presumes that the segment is in a single direction. Most of the time + /// this bit should be set unless you know you are passing full paragraphs of text. + gr_nobidi = 2, + /// Disable auto mirroring for rtl text + gr_nomirror = 4 +}; + +typedef struct gr_char_info gr_char_info; +typedef struct gr_segment gr_segment; +typedef struct gr_slot gr_slot; + +/** Returns Unicode character for a charinfo. + * + * @param p Pointer to charinfo to return information on. + */ +GR2_API unsigned int gr_cinfo_unicode_char(const gr_char_info* p/*not NULL*/); + +/** Returns breakweight for a charinfo. + * + * @return Breakweight is a number between -50 and 50 indicating the cost of a + * break before or after this character. If the value < 0, the absolute value + * is this character's contribution to the overall breakweight before it. If the value + * > 0, then the value is this character's contribution to the overall breakweight after it. + * The overall breakweight between two characters is the maximum of the breakweight + * contributions from the characters either side of it. If a character makes no + * contribution to the breakweight on one side of it, the contribution is considered + * to be 0. + * @param p Pointer to charinfo to return information on. + */ +GR2_API int gr_cinfo_break_weight(const gr_char_info* p/*not NULL*/); + +/** Returns the slot index that after this character is after in the slot stream + * + * In effect each character is associated with a set of slots and this returns + * the index of the last slot in the segment this character is associated with. + * + * @return after slot index between 0 and gr_seg_n_slots() + * @param p Pointer to charinfo to return information on. + */ +GR2_API int gr_cinfo_after(const gr_char_info* p/*not NULL*/); + +/** Returns the slot index that before this character is before in the slot stream + * + * In effect each character is associated with a set of slots and this returns + * the index of the first slot in the segment this character is associated with. + * + * @return before slot index between 0 and gr_seg_n_slots() + * @param p Pointer to charinfo to return information on. + */ +GR2_API int gr_cinfo_before(const gr_char_info* p/*not NULL*/); + +/** Returns the code unit index of this character in the input string + * + * @return code unit index between 0 and the end of the string + * @param p Pointer to charinfo to return information on. + */ +GR2_API size_t gr_cinfo_base(const gr_char_info* p/*not NULL*/); + +/** Returns the number of unicode characters in a string. + * + * @return number of characters in the string + * @param enc Specifies the type of data in the string: utf8, utf16, utf32 + * @param buffer_begin The start of the string + * @param buffer_end Measure up to the first nul or when end is reached, whichever is earliest. + * This parameter may be NULL. + * @param pError If there is a structural fault in the string, the location is returned + * in this variable. If no error occurs, pError will contain NULL. NULL + * may be passed for pError if no such information is required. + */ +GR2_API size_t gr_count_unicode_characters(enum gr_encform enc, const void* buffer_begin, const void* buffer_end, const void** pError); + +/** Creates and returns a segment. + * + * @return a segment that needs seg_destroy called on it. May return NULL if bad problems + * in segment processing. + * @param font Gives the size of the font in pixels per em for final positioning. If + * NULL, positions are returned in design units, i.e. at a ppm of the upem + * of the face. + * @param face The face containing all the non-size dependent information. + * @param script This is a tag containing a script identifier that is used to choose + * which graphite table within the font to use. Maybe 0. Tag may be 4 chars + * NULL padded in LSBs or space padded in LSBs. + * @param pFeats Pointer to a feature values to be used for the segment. Only one + * feature values may be used for a segment. If NULL the default features + * for the font will be used. + * @param enc Specifies what encoding form the string is in (utf8, utf16, utf32) + * @param pStart Start of the string + * @param nChars Number of unicode characters to process in the string. The string will + * be processed either up to the first NULL or until nChars have been + * processed. nChars is also used to initialise the internal memory + * allocations of the segment. So it is wise not to make nChars too much + * greater than the actual number of characters being processed. + * @param dir Specifies whether the segment is processed right to left (1) or left to + * right (0) and whether to run the internal bidi pass, if a font requests it. + * See enum gr_bidirtl for details. + */ +GR2_API gr_segment* gr_make_seg(const gr_font* font, const gr_face* face, gr_uint32 script, const gr_feature_val* pFeats, enum gr_encform enc, const void* pStart, size_t nChars, int dir); + +/** Destroys a segment, freeing the memory. + * + * @param p The segment to destroy + */ +GR2_API void gr_seg_destroy(gr_segment* p); + +/** Returns the advance for the whole segment. + * + * Returns the width of the segment up to the next glyph origin after the segment + */ +GR2_API float gr_seg_advance_X(const gr_segment* pSeg/*not NULL*/); + +/** Returns the height advance for the segment. **/ +GR2_API float gr_seg_advance_Y(const gr_segment* pSeg/*not NULL*/); + +/** Returns the number of gr_char_infos in the segment. **/ +GR2_API unsigned int gr_seg_n_cinfo(const gr_segment* pSeg/*not NULL*/); + +/** Returns a gr_char_info at a given index in the segment. **/ +GR2_API const gr_char_info* gr_seg_cinfo(const gr_segment* pSeg/*not NULL*/, unsigned int index/*must be <number_of_CharInfo*/); + +/** Returns the number of glyph gr_slots in the segment. **/ +GR2_API unsigned int gr_seg_n_slots(const gr_segment* pSeg/*not NULL*/); //one slot per glyph + +/** Returns the first gr_slot in the segment. + * + * The first slot in a segment has a gr_slot_prev_in_segment() of NULL. Slots are owned + * by their segment and are destroyed along with the segment. + */ +GR2_API const gr_slot* gr_seg_first_slot(gr_segment* pSeg/*not NULL*/); //may give a base slot or a slot which is attached to another + +/** Returns the last gr_slot in the segment. + * + * The last slot in a segment has a gr_slot_next_in_segment() of NULL + */ +GR2_API const gr_slot* gr_seg_last_slot(gr_segment* pSeg/*not NULL*/); //may give a base slot or a slot which is attached to another + +/** Justifies a linked list of slots for a line to a given width + * + * Passed a pointer to the start of a linked list of slots corresponding to a line, as + * set up by gr_slot_linebreak_before, this function will position the glyphs in the line + * to take up the given width. It is possible to specify a subrange within the line to process. + * This allows skipping of line initial or final whitespace, for example. While this will ensure + * that the subrange fits width, the line will still be positioned with the first glyph of the + * line at 0. So the resulting positions may be beyond width. + * + * @return float The resulting width of the range of slots justified. + * @param pSeg Pointer to the segment + * @param pStart Pointer to the start of the line linked list (including skipped characters) + * @param pFont Font to use for positioning + * @param width Width in pixels in which to fit the line. If < 0. don't adjust natural width, just run justification passes + * to handle line end contextuals, if there are any. + * @param flags Indicates line ending types. Default is linked list is a full line + * @param pFirst If not NULL, the first slot in the list to be considered part of the line (so can skip) + * @param pLast If not NULL, the last slot to process in the line (allow say trailing whitespace to be skipped) + */ +GR2_API float gr_seg_justify(gr_segment* pSeg/*not NULL*/, const gr_slot* pStart/*not NULL*/, const gr_font *pFont, double width, enum gr_justFlags flags, const gr_slot* pFirst, const gr_slot* pLast); + +/** Returns the next slot along in the segment. + * + * Slots are held in a linked list. This returns the next in the linked list. The slot + * may or may not be attached to another slot. Returns NULL at the end of the segment. + */ +GR2_API const gr_slot* gr_slot_next_in_segment(const gr_slot* p); + +/** Returns the previous slot along in the segment. + * + * Slots are held in a doubly linked list. This returns the previos slot in the linked + * list. This slot may or may not be attached to it. Returns NULL at the start of the + * segment. + */ +GR2_API const gr_slot* gr_slot_prev_in_segment(const gr_slot* p); + +/** Returns the attachment parent slot of this slot. + * + * Attached slots form a tree. This returns the parent of this slot in that tree. A + * base glyph which is not attached to another glyph, always returns NULL. + */ +GR2_API const gr_slot* gr_slot_attached_to(const gr_slot* p); + +/** Returns the first slot attached to this slot. + * + * Attached slots form a singly linked list from the parent. This returns the first + * slot in that list. Note that this is a reference to another slot that is also in + * the main segment doubly linked list. + * + * if gr_slot_first_attachment(p) != NULL then gr_slot_attached_to(gr_slot_first_attachment(p)) == p. + */ +GR2_API const gr_slot* gr_slot_first_attachment(const gr_slot* p); + +/** Returns the next slot attached to our attachment parent. + * + * This returns the next slot in the singly linked list of slots attached to this + * slot's parent. If there are no more such slots, NULL is returned. If there is + * no parent, i.e. the passed slot is a cluster base, then the next cluster base + * in graphical order (ltr, even for rtl text) is returned. + * + * if gr_slot_next_sibling_attachment(p) != NULL then gr_slot_attached_to(gr_slot_next_sibling_attachment(p)) == gr_slot_attached_to(p). + */ +GR2_API const gr_slot* gr_slot_next_sibling_attachment(const gr_slot* p); + + +/** Returns glyph id of the slot + * + * Each slot has a glyphid which is rendered at the position given by the slot. This + * glyphid is the real glyph to be rendered and never a pseudo glyph. + */ +GR2_API unsigned short gr_slot_gid(const gr_slot* p); + +/** Returns X offset of glyph from start of segment **/ +GR2_API float gr_slot_origin_X(const gr_slot* p); + +/** Returns Y offset of glyph from start of segment **/ +GR2_API float gr_slot_origin_Y(const gr_slot* p); + +/** Returns the glyph advance for this glyph as adjusted for kerning + * + * @param p Slot to give results for + * @param face gr_face of the glyphs. May be NULL if unhinted advances used + * @param font gr_font to scale for pixel results. If NULL returns design + * units advance. If not NULL then returns pixel advance based + * on hinted or scaled glyph advances in the font. face must be + * passed for hinted advances to be used. + */ +GR2_API float gr_slot_advance_X(const gr_slot* p, const gr_face* face, const gr_font *font); + +/** Returns the vertical advance for the glyph in the slot adjusted for kerning + * + * Returns design units unless font is not NULL in which case the pixel value + * is returned scaled for the given font + */ +GR2_API float gr_slot_advance_Y(const gr_slot* p, const gr_face* face, const gr_font *font); + +/** Returns the gr_char_info index before us + * + * Returns the index of the gr_char_info that a cursor before this slot, would put + * an underlying cursor before. This may also be interpretted as each slot holding + * a set of char_infos that it is associated with and this function returning the + * index of the char_info with lowest index, from this set. + */ +GR2_API int gr_slot_before(const gr_slot* p/*not NULL*/); + +/** Returns the gr_char_info index after us + * + * Returns the index of the gr_char_info that a cursor after this slot would put an + * underlying cursor after. This may also be interpretted as each slot holding a set + * of char_infos that it is associated with and this function returning the index of + * the char_info with the highest index, from this set. + */ +GR2_API int gr_slot_after(const gr_slot* p/*not NULL*/); + +/** Returns the index of this slot in the segment + * + * Returns the index given to this slot during final positioning. This corresponds + * to the value returned br gr_cinfo_before() and gr_cinfo_after() + */ +GR2_API unsigned int gr_slot_index(const gr_slot* p/*not NULL*/); + +/** Return a slot attribute value + * + * Given a slot and an attribute along with a possible subattribute, return the + * corresponding value in the slot. See enum gr_attrCode for details of each attribute. + */ +GR2_API int gr_slot_attr(const gr_slot* p/*not NULL*/, const gr_segment* pSeg/*not NULL*/, enum gr_attrCode index, gr_uint8 subindex); //tbd - do we need to expose this? + +/** Returns whether text may be inserted before this glyph. + * + * This indicates whether a cursor can be put before this slot. It applies to + * base glyphs that have no parent as well as attached glyphs that have the + * .insert attribute explicitly set to true. This is the primary mechanism + * for identifying contiguous sequences of base plus diacritics. + */ +GR2_API int gr_slot_can_insert_before(const gr_slot* p); + +/** Returns the original gr_char_info index this slot refers to. + * + * Each Slot has a gr_char_info that it originates from. This is that gr_char_info. + * The index is passed to gr_seg_cinfo(). This information is useful for testing. + */ +GR2_API int gr_slot_original(const gr_slot* p/*not NULL*/); + +/** Breaks a segment into lines. + * + * Breaks the slot linked list at the given point in the linked list. It is up + * to the application to keep track of the first slot on each line. + */ +GR2_API void gr_slot_linebreak_before(gr_slot *p/*not NULL*/); + +#ifdef __cplusplus +} +#endif diff --git a/thirdparty/graphite/include/graphite2/Types.h b/thirdparty/graphite/include/graphite2/Types.h new file mode 100644 index 0000000000..916c91191b --- /dev/null +++ b/thirdparty/graphite/include/graphite2/Types.h @@ -0,0 +1,79 @@ +/* GRAPHITE2 LICENSING + + Copyright 2010, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + + Alternatively, the contents of this file may be used under the terms + of the Mozilla Public License (http://mozilla.org/MPL) or the GNU + General Public License, as published by the Free Software Foundation, + either version 2 of the License or (at your option) any later version. +*/ +#pragma once + +#include <stddef.h> + +typedef unsigned char gr_uint8; +typedef gr_uint8 gr_byte; +typedef signed char gr_int8; +typedef unsigned short gr_uint16; +typedef short gr_int16; +typedef unsigned int gr_uint32; +typedef int gr_int32; + +enum gr_encform { + gr_utf8 = 1/*sizeof(uint8)*/, gr_utf16 = 2/*sizeof(uint16)*/, gr_utf32 = 4/*sizeof(uint32)*/ +}; + + +// Define API function declspec/attributes and how each supported compiler or OS +// allows us to specify them. +#if defined __GNUC__ + #define _gr2_and , + #define _gr2_tag_fn(a) __attribute__((a)) + #define _gr2_deprecated_flag deprecated + #define _gr2_export_flag visibility("default") + #define _gr2_import_flag visibility("default") + #define _gr2_static_flag visibility("hidden") +#endif + +#if defined _WIN32 || defined __CYGWIN__ + #if defined __GNUC__ // These three will be redefined for Windows + #undef _gr2_export_flag + #undef _gr2_import_flag + #undef _gr2_static_flag + #else // How MSVC sepcifies function level attributes adn deprecation + #define _gr2_and + #define _gr2_tag_fn(a) __declspec(a) + #define _gr2_deprecated_flag deprecated + #endif + #define _gr2_export_flag dllexport + #define _gr2_import_flag dllimport + #define _gr2_static_flag +#endif + +#if defined GRAPHITE2_STATIC + #define GR2_API _gr2_tag_fn(_gr2_static_flag) + #define GR2_DEPRECATED_API _gr2_tag_fn(_gr2_deprecated_flag _gr2_and _gr2_static_flag) +#elif defined GRAPHITE2_EXPORTING + #define GR2_API _gr2_tag_fn(_gr2_export_flag) + #define GR2_DEPRECATED_API _gr2_tag_fn(_gr2_deprecated_flag _gr2_and _gr2_export_flag) +#else + #define GR2_API _gr2_tag_fn(_gr2_import_flag) + #define GR2_DEPRECATED_API _gr2_tag_fn(_gr2_deprecated_flag _gr2_and _gr2_import_flag) +#endif diff --git a/thirdparty/graphite/src/CmapCache.cpp b/thirdparty/graphite/src/CmapCache.cpp new file mode 100644 index 0000000000..d070019a34 --- /dev/null +++ b/thirdparty/graphite/src/CmapCache.cpp @@ -0,0 +1,155 @@ +/* GRAPHITE2 LICENSING + + Copyright 2010, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ + +#include "inc/Main.h" +#include "inc/CmapCache.h" +#include "inc/Face.h" +#include "inc/TtfTypes.h" +#include "inc/TtfUtil.h" + + +using namespace graphite2; + +const void * bmp_subtable(const Face::Table & cmap) +{ + const void * stbl; + if (!cmap.size()) return 0; + if (TtfUtil::CheckCmapSubtable4(stbl = TtfUtil::FindCmapSubtable(cmap, 3, 1, cmap.size()), cmap + cmap.size()) + || TtfUtil::CheckCmapSubtable4(stbl = TtfUtil::FindCmapSubtable(cmap, 0, 3, cmap.size()), cmap + cmap.size()) + || TtfUtil::CheckCmapSubtable4(stbl = TtfUtil::FindCmapSubtable(cmap, 0, 2, cmap.size()), cmap + cmap.size()) + || TtfUtil::CheckCmapSubtable4(stbl = TtfUtil::FindCmapSubtable(cmap, 0, 1, cmap.size()), cmap + cmap.size()) + || TtfUtil::CheckCmapSubtable4(stbl = TtfUtil::FindCmapSubtable(cmap, 0, 0, cmap.size()), cmap + cmap.size())) + return stbl; + return 0; +} + +const void * smp_subtable(const Face::Table & cmap) +{ + const void * stbl; + if (!cmap.size()) return 0; + if (TtfUtil::CheckCmapSubtable12(stbl = TtfUtil::FindCmapSubtable(cmap, 3, 10, cmap.size()), cmap + cmap.size()) + || TtfUtil::CheckCmapSubtable12(stbl = TtfUtil::FindCmapSubtable(cmap, 0, 4, cmap.size()), cmap + cmap.size())) + return stbl; + return 0; +} + +template <unsigned int (*NextCodePoint)(const void *, unsigned int, int *), + uint16 (*LookupCodePoint)(const void *, unsigned int, int)> +bool cache_subtable(uint16 * blocks[], const void * cst, const unsigned int limit) +{ + int rangeKey = 0; + uint32 codePoint = NextCodePoint(cst, 0, &rangeKey), + prevCodePoint = 0; + while (codePoint < limit) + { + unsigned int block = codePoint >> 8; + if (!blocks[block]) + { + blocks[block] = grzeroalloc<uint16>(0x100); + if (!blocks[block]) + return false; + } + blocks[block][codePoint & 0xFF] = LookupCodePoint(cst, codePoint, rangeKey); + // prevent infinite loop + if (codePoint <= prevCodePoint) + codePoint = prevCodePoint + 1; + prevCodePoint = codePoint; + codePoint = NextCodePoint(cst, codePoint, &rangeKey); + } + return true; +} + + +CachedCmap::CachedCmap(const Face & face) +: m_isBmpOnly(true), + m_blocks(0) +{ + const Face::Table cmap(face, Tag::cmap); + if (!cmap) return; + + const void * bmp_cmap = bmp_subtable(cmap); + const void * smp_cmap = smp_subtable(cmap); + m_isBmpOnly = !smp_cmap; + + m_blocks = grzeroalloc<uint16 *>(m_isBmpOnly ? 0x100 : 0x1100); + if (m_blocks && smp_cmap) + { + if (!cache_subtable<TtfUtil::CmapSubtable12NextCodepoint, TtfUtil::CmapSubtable12Lookup>(m_blocks, smp_cmap, 0x10FFFF)) + return; + } + + if (m_blocks && bmp_cmap) + { + if (!cache_subtable<TtfUtil::CmapSubtable4NextCodepoint, TtfUtil::CmapSubtable4Lookup>(m_blocks, bmp_cmap, 0xFFFF)) + return; + } +} + +CachedCmap::~CachedCmap() throw() +{ + if (!m_blocks) return; + unsigned int numBlocks = (m_isBmpOnly)? 0x100 : 0x1100; + for (unsigned int i = 0; i < numBlocks; i++) + free(m_blocks[i]); + free(m_blocks); +} + +uint16 CachedCmap::operator [] (const uint32 usv) const throw() +{ + if ((m_isBmpOnly && usv > 0xFFFF) || (usv > 0x10FFFF)) + return 0; + const uint32 block = 0xFFFF & (usv >> 8); + if (m_blocks[block]) + return m_blocks[block][usv & 0xFF]; + return 0; +}; + +CachedCmap::operator bool() const throw() +{ + return m_blocks != 0; +} + + +DirectCmap::DirectCmap(const Face & face) +: _cmap(face, Tag::cmap), + _smp(smp_subtable(_cmap)), + _bmp(bmp_subtable(_cmap)) +{ +} + +uint16 DirectCmap::operator [] (const uint32 usv) const throw() +{ + return usv > 0xFFFF + ? (_smp ? TtfUtil::CmapSubtable12Lookup(_smp, usv, 0) : 0) + : TtfUtil::CmapSubtable4Lookup(_bmp, usv, 0); +} + +DirectCmap::operator bool () const throw() +{ + return _cmap && _bmp; +} + diff --git a/thirdparty/graphite/src/Code.cpp b/thirdparty/graphite/src/Code.cpp new file mode 100644 index 0000000000..ec5ab298ca --- /dev/null +++ b/thirdparty/graphite/src/Code.cpp @@ -0,0 +1,782 @@ +/* GRAPHITE2 LICENSING + + Copyright 2010, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ +// This class represents loaded graphite stack machine code. It performs +// basic sanity checks, on the incoming code to prevent more obvious problems +// from crashing graphite. +// Author: Tim Eves + +#include <cassert> +#include <cstddef> +#include <cstdlib> +#include <cstring> +#include "graphite2/Segment.h" +#include "inc/Code.h" +#include "inc/Face.h" +#include "inc/GlyphFace.h" +#include "inc/GlyphCache.h" +#include "inc/Machine.h" +#include "inc/Rule.h" +#include "inc/Silf.h" + +#include <cstdio> + +#ifdef NDEBUG +#ifdef __GNUC__ +#pragma GCC diagnostic ignored "-Wunused-parameter" +#endif +#endif + + +using namespace graphite2; +using namespace vm; + +namespace { + +inline bool is_return(const instr i) { + const opcode_t * opmap = Machine::getOpcodeTable(); + const instr pop_ret = *opmap[POP_RET].impl, + ret_zero = *opmap[RET_ZERO].impl, + ret_true = *opmap[RET_TRUE].impl; + return i == pop_ret || i == ret_zero || i == ret_true; +} + +struct context +{ + context(uint8 ref=0) : codeRef(ref) {flags.changed=false; flags.referenced=false;} + struct { + uint8 changed:1, + referenced:1; + } flags; + uint8 codeRef; +}; + +} // end namespace + + +class Machine::Code::decoder +{ +public: + struct limits; + static const int NUMCONTEXTS = 256; + + decoder(limits & lims, Code &code, enum passtype pt) throw(); + + bool load(const byte * bc_begin, const byte * bc_end); + void apply_analysis(instr * const code, instr * code_end); + byte max_ref() { return _max_ref; } + int out_index() const { return _out_index; } + +private: + void set_ref(int index) throw(); + void set_noref(int index) throw(); + void set_changed(int index) throw(); + opcode fetch_opcode(const byte * bc); + void analyse_opcode(const opcode, const int8 * const dp) throw(); + bool emit_opcode(opcode opc, const byte * & bc); + bool validate_opcode(const byte opc, const byte * const bc); + bool valid_upto(const uint16 limit, const uint16 x) const throw(); + bool test_context() const throw(); + bool test_ref(int8 index) const throw(); + bool test_attr(attrCode attr) const throw(); + void failure(const status_t s) const throw() { _code.failure(s); } + + Code & _code; + int _out_index; + uint16 _out_length; + instr * _instr; + byte * _data; + limits & _max; + enum passtype _passtype; + int _stack_depth; + bool _in_ctxt_item; + int16 _slotref; + context _contexts[NUMCONTEXTS]; + byte _max_ref; +}; + + +struct Machine::Code::decoder::limits +{ + const byte * bytecode; + const uint8 pre_context; + const uint16 rule_length, + classes, + glyf_attrs, + features; + const byte attrid[gr_slatMax]; +}; + +inline Machine::Code::decoder::decoder(limits & lims, Code &code, enum passtype pt) throw() +: _code(code), + _out_index(code._constraint ? 0 : lims.pre_context), + _out_length(code._constraint ? 1 : lims.rule_length), + _instr(code._code), _data(code._data), _max(lims), _passtype(pt), + _stack_depth(0), + _in_ctxt_item(false), + _slotref(0), + _max_ref(0) +{ } + + + +Machine::Code::Code(bool is_constraint, const byte * bytecode_begin, const byte * const bytecode_end, + uint8 pre_context, uint16 rule_length, const Silf & silf, const Face & face, + enum passtype pt, byte * * const _out) + : _code(0), _data(0), _data_size(0), _instr_count(0), _max_ref(0), _status(loaded), + _constraint(is_constraint), _modify(false), _delete(false), _own(_out==0) +{ +#ifdef GRAPHITE2_TELEMETRY + telemetry::category _code_cat(face.tele.code); +#endif + assert(bytecode_begin != 0); + if (bytecode_begin == bytecode_end) + { + // ::new (this) Code(); + return; + } + assert(bytecode_end > bytecode_begin); + const opcode_t * op_to_fn = Machine::getOpcodeTable(); + + // Allocate code and data target buffers, these sizes are a worst case + // estimate. Once we know their real sizes the we'll shrink them. + if (_out) _code = reinterpret_cast<instr *>(*_out); + else _code = static_cast<instr *>(malloc(estimateCodeDataOut(bytecode_end-bytecode_begin, 1, is_constraint ? 0 : rule_length))); + _data = reinterpret_cast<byte *>(_code + (bytecode_end - bytecode_begin)); + + if (!_code || !_data) { + failure(alloc_failed); + return; + } + + decoder::limits lims = { + bytecode_end, + pre_context, + rule_length, + silf.numClasses(), + face.glyphs().numAttrs(), + face.numFeatures(), + {1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,255, + 1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,0,0, + 0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0, silf.numUser()} + }; + + decoder dec(lims, *this, pt); + if(!dec.load(bytecode_begin, bytecode_end)) + return; + + // Is this an empty program? + if (_instr_count == 0) + { + release_buffers(); + ::new (this) Code(); + return; + } + + // When we reach the end check we've terminated it correctly + if (!is_return(_code[_instr_count-1])) { + failure(missing_return); + return; + } + + assert((_constraint && immutable()) || !_constraint); + dec.apply_analysis(_code, _code + _instr_count); + _max_ref = dec.max_ref(); + + // Now we know exactly how much code and data the program really needs + // realloc the buffers to exactly the right size so we don't waste any + // memory. + assert((bytecode_end - bytecode_begin) >= ptrdiff_t(_instr_count)); + assert((bytecode_end - bytecode_begin) >= ptrdiff_t(_data_size)); + memmove(_code + (_instr_count+1), _data, _data_size*sizeof(byte)); + size_t const total_sz = ((_instr_count+1) + (_data_size + sizeof(instr)-1)/sizeof(instr))*sizeof(instr); + if (_out) + *_out += total_sz; + else + { + instr * const old_code = _code; + _code = static_cast<instr *>(realloc(_code, total_sz)); + if (!_code) free(old_code); + } + _data = reinterpret_cast<byte *>(_code + (_instr_count+1)); + + if (!_code) + { + failure(alloc_failed); + return; + } + + // Make this RET_ZERO, we should never reach this but just in case ... + _code[_instr_count] = op_to_fn[RET_ZERO].impl[_constraint]; + +#ifdef GRAPHITE2_TELEMETRY + telemetry::count_bytes(_data_size + (_instr_count+1)*sizeof(instr)); +#endif +} + +Machine::Code::~Code() throw () +{ + if (_own) + release_buffers(); +} + + +bool Machine::Code::decoder::load(const byte * bc, const byte * bc_end) +{ + _max.bytecode = bc_end; + while (bc < bc_end) + { + const opcode opc = fetch_opcode(bc++); + if (opc == vm::MAX_OPCODE) + return false; + + analyse_opcode(opc, reinterpret_cast<const int8 *>(bc)); + + if (!emit_opcode(opc, bc)) + return false; + } + + return bool(_code); +} + +// Validation check and fixups. +// + +opcode Machine::Code::decoder::fetch_opcode(const byte * bc) +{ + const byte opc = *bc++; + + // Do some basic sanity checks based on what we know about the opcode + if (!validate_opcode(opc, bc)) return MAX_OPCODE; + + // And check its arguments as far as possible + switch (opcode(opc)) + { + case NOP : + break; + case PUSH_BYTE : + case PUSH_BYTEU : + case PUSH_SHORT : + case PUSH_SHORTU : + case PUSH_LONG : + ++_stack_depth; + break; + case ADD : + case SUB : + case MUL : + case DIV : + case MIN_ : + case MAX_ : + case AND : + case OR : + case EQUAL : + case NOT_EQ : + case LESS : + case GTR : + case LESS_EQ : + case GTR_EQ : + case BITOR : + case BITAND : + if (--_stack_depth <= 0) + failure(underfull_stack); + break; + case NEG : + case TRUNC8 : + case TRUNC16 : + case NOT : + case BITNOT : + case BITSET : + if (_stack_depth <= 0) + failure(underfull_stack); + break; + case COND : + _stack_depth -= 2; + if (_stack_depth <= 0) + failure(underfull_stack); + break; + case NEXT_N : // runtime checked + break; + case NEXT : + case COPY_NEXT : + ++_out_index; + if (_out_index < -1 || _out_index > _out_length || _slotref > _max.rule_length) + failure(out_of_range_data); + break; + case PUT_GLYPH_8BIT_OBS : + valid_upto(_max.classes, bc[0]); + test_context(); + break; + case PUT_SUBS_8BIT_OBS : + test_ref(int8(bc[0])); + valid_upto(_max.classes, bc[1]); + valid_upto(_max.classes, bc[2]); + test_context(); + break; + case PUT_COPY : + test_ref(int8(bc[0])); + test_context(); + break; + case INSERT : + if (_passtype >= PASS_TYPE_POSITIONING) + failure(invalid_opcode); + ++_out_length; + if (_out_index < 0) ++_out_index; + if (_out_index < -1 || _out_index >= _out_length) + failure(out_of_range_data); + break; + case DELETE : + if (_passtype >= PASS_TYPE_POSITIONING) + failure(invalid_opcode); + if (_out_index < _max.pre_context) + failure(out_of_range_data); + --_out_index; + --_out_length; + if (_out_index < -1 || _out_index > _out_length) + failure(out_of_range_data); + break; + case ASSOC : + if (bc[0] == 0) + failure(out_of_range_data); + for (uint8 num = bc[0]; num; --num) + test_ref(int8(bc[num])); + test_context(); + break; + case CNTXT_ITEM : + valid_upto(_max.rule_length, _max.pre_context + int8(bc[0])); + if (bc + 2 + bc[1] >= _max.bytecode) failure(jump_past_end); + if (_in_ctxt_item) failure(nested_context_item); + break; + case ATTR_SET : + case ATTR_ADD : + case ATTR_SUB : + case ATTR_SET_SLOT : + if (--_stack_depth < 0) + failure(underfull_stack); + valid_upto(gr_slatMax, bc[0]); + if (attrCode(bc[0]) == gr_slatUserDefn) // use IATTR for user attributes + failure(out_of_range_data); + test_attr(attrCode(bc[0])); + test_context(); + break; + case IATTR_SET_SLOT : + if (--_stack_depth < 0) + failure(underfull_stack); + if (valid_upto(gr_slatMax, bc[0])) + valid_upto(_max.attrid[bc[0]], bc[1]); + test_attr(attrCode(bc[0])); + test_context(); + break; + case PUSH_SLOT_ATTR : + ++_stack_depth; + valid_upto(gr_slatMax, bc[0]); + test_ref(int8(bc[1])); + if (attrCode(bc[0]) == gr_slatUserDefn) // use IATTR for user attributes + failure(out_of_range_data); + test_attr(attrCode(bc[0])); + break; + case PUSH_GLYPH_ATTR_OBS : + case PUSH_ATT_TO_GATTR_OBS : + ++_stack_depth; + valid_upto(_max.glyf_attrs, bc[0]); + test_ref(int8(bc[1])); + break; + case PUSH_ATT_TO_GLYPH_METRIC : + case PUSH_GLYPH_METRIC : + ++_stack_depth; + valid_upto(kgmetDescent, bc[0]); + test_ref(int8(bc[1])); + // level: dp[2] no check necessary + break; + case PUSH_FEAT : + ++_stack_depth; + valid_upto(_max.features, bc[0]); + test_ref(int8(bc[1])); + break; + case PUSH_ISLOT_ATTR : + ++_stack_depth; + if (valid_upto(gr_slatMax, bc[0])) + { + test_ref(int8(bc[1])); + valid_upto(_max.attrid[bc[0]], bc[2]); + } + test_attr(attrCode(bc[0])); + break; + case PUSH_IGLYPH_ATTR :// not implemented + ++_stack_depth; + break; + case POP_RET : + if (--_stack_depth < 0) + failure(underfull_stack); + GR_FALLTHROUGH; + // no break + case RET_ZERO : + case RET_TRUE : + break; + case IATTR_SET : + case IATTR_ADD : + case IATTR_SUB : + if (--_stack_depth < 0) + failure(underfull_stack); + if (valid_upto(gr_slatMax, bc[0])) + valid_upto(_max.attrid[bc[0]], bc[1]); + test_attr(attrCode(bc[0])); + test_context(); + break; + case PUSH_PROC_STATE : // dummy: dp[0] no check necessary + case PUSH_VERSION : + ++_stack_depth; + break; + case PUT_SUBS : + test_ref(int8(bc[0])); + valid_upto(_max.classes, uint16(bc[1]<< 8) | bc[2]); + valid_upto(_max.classes, uint16(bc[3]<< 8) | bc[4]); + test_context(); + break; + case PUT_SUBS2 : // not implemented + case PUT_SUBS3 : // not implemented + break; + case PUT_GLYPH : + valid_upto(_max.classes, uint16(bc[0]<< 8) | bc[1]); + test_context(); + break; + case PUSH_GLYPH_ATTR : + case PUSH_ATT_TO_GLYPH_ATTR : + ++_stack_depth; + valid_upto(_max.glyf_attrs, uint16(bc[0]<< 8) | bc[1]); + test_ref(int8(bc[2])); + break; + case SET_FEAT : + valid_upto(_max.features, bc[0]); + test_ref(int8(bc[1])); + break; + default: + failure(invalid_opcode); + break; + } + + return bool(_code) ? opcode(opc) : MAX_OPCODE; +} + + +void Machine::Code::decoder::analyse_opcode(const opcode opc, const int8 * arg) throw() +{ + switch (opc) + { + case DELETE : + _code._delete = true; + break; + case ASSOC : + set_changed(0); +// for (uint8 num = arg[0]; num; --num) +// _analysis.set_noref(num); + break; + case PUT_GLYPH_8BIT_OBS : + case PUT_GLYPH : + _code._modify = true; + set_changed(0); + break; + case ATTR_SET : + case ATTR_ADD : + case ATTR_SUB : + case ATTR_SET_SLOT : + case IATTR_SET_SLOT : + case IATTR_SET : + case IATTR_ADD : + case IATTR_SUB : + set_noref(0); + break; + case NEXT : + case COPY_NEXT : + ++_slotref; + _contexts[_slotref] = context(uint8(_code._instr_count+1)); + // if (_analysis.slotref > _analysis.max_ref) _analysis.max_ref = _analysis.slotref; + break; + case INSERT : + if (_slotref >= 0) --_slotref; + _code._modify = true; + break; + case PUT_SUBS_8BIT_OBS : // slotref on 1st parameter + case PUT_SUBS : + _code._modify = true; + set_changed(0); + GR_FALLTHROUGH; + // no break + case PUT_COPY : + if (arg[0] != 0) { set_changed(0); _code._modify = true; } + set_ref(arg[0]); + break; + case PUSH_GLYPH_ATTR_OBS : + case PUSH_SLOT_ATTR : + case PUSH_GLYPH_METRIC : + case PUSH_ATT_TO_GATTR_OBS : + case PUSH_ATT_TO_GLYPH_METRIC : + case PUSH_ISLOT_ATTR : + case PUSH_FEAT : + case SET_FEAT : + set_ref(arg[1]); + break; + case PUSH_ATT_TO_GLYPH_ATTR : + case PUSH_GLYPH_ATTR : + set_ref(arg[2]); + break; + default: + break; + } +} + + +bool Machine::Code::decoder::emit_opcode(opcode opc, const byte * & bc) +{ + const opcode_t * op_to_fn = Machine::getOpcodeTable(); + const opcode_t & op = op_to_fn[opc]; + if (op.impl[_code._constraint] == 0) + { + failure(unimplemented_opcode_used); + return false; + } + + const size_t param_sz = op.param_sz == VARARGS ? bc[0] + 1 : op.param_sz; + + // Add this instruction + *_instr++ = op.impl[_code._constraint]; + ++_code._instr_count; + + // Grab the parameters + if (param_sz) { + memcpy(_data, bc, param_sz * sizeof(byte)); + bc += param_sz; + _data += param_sz; + _code._data_size += param_sz; + } + + // recursively decode a context item so we can split the skip into + // instruction and data portions. + if (opc == CNTXT_ITEM) + { + assert(_out_index == 0); + _in_ctxt_item = true; + _out_index = _max.pre_context + int8(_data[-2]); + _slotref = int8(_data[-2]); + _out_length = _max.rule_length; + + const size_t ctxt_start = _code._instr_count; + byte & instr_skip = _data[-1]; + byte & data_skip = *_data++; + ++_code._data_size; + const byte *curr_end = _max.bytecode; + + if (load(bc, bc + instr_skip)) + { + bc += instr_skip; + data_skip = instr_skip - byte(_code._instr_count - ctxt_start); + instr_skip = byte(_code._instr_count - ctxt_start); + _max.bytecode = curr_end; + + _out_length = 1; + _out_index = 0; + _slotref = 0; + _in_ctxt_item = false; + } + else + { + _out_index = 0; + _slotref = 0; + return false; + } + } + + return bool(_code); +} + + +void Machine::Code::decoder::apply_analysis(instr * const code, instr * code_end) +{ + // insert TEMP_COPY commands for slots that need them (that change and are referenced later) + int tempcount = 0; + if (_code._constraint) return; + + const instr temp_copy = Machine::getOpcodeTable()[TEMP_COPY].impl[0]; + for (const context * c = _contexts, * const ce = c + _slotref; c < ce; ++c) + { + if (!c->flags.referenced || !c->flags.changed) continue; + + instr * const tip = code + c->codeRef + tempcount; + memmove(tip+1, tip, (code_end - tip) * sizeof(instr)); + *tip = temp_copy; + ++code_end; + ++tempcount; + _code._delete = true; + } + + _code._instr_count = code_end - code; +} + + +inline +bool Machine::Code::decoder::validate_opcode(const byte opc, const byte * const bc) +{ + if (opc >= MAX_OPCODE) + { + failure(invalid_opcode); + return false; + } + const opcode_t & op = Machine::getOpcodeTable()[opc]; + if (op.impl[_code._constraint] == 0) + { + failure(unimplemented_opcode_used); + return false; + } + if (op.param_sz == VARARGS && bc >= _max.bytecode) + { + failure(arguments_exhausted); + return false; + } + const size_t param_sz = op.param_sz == VARARGS ? bc[0] + 1 : op.param_sz; + if (bc - 1 + param_sz >= _max.bytecode) + { + failure(arguments_exhausted); + return false; + } + return true; +} + + +bool Machine::Code::decoder::valid_upto(const uint16 limit, const uint16 x) const throw() +{ + const bool t = (limit != 0) && (x < limit); + if (!t) failure(out_of_range_data); + return t; +} + +inline +bool Machine::Code::decoder::test_ref(int8 index) const throw() +{ + if (_code._constraint && !_in_ctxt_item) + { + if (index > 0 || -index > _max.pre_context) + { + failure(out_of_range_data); + return false; + } + } + else + { + if (_max.rule_length == 0 + || (_slotref + _max.pre_context + index >= _max.rule_length) + || (_slotref + _max.pre_context + index < 0)) + { + failure(out_of_range_data); + return false; + } + } + return true; +} + +bool Machine::Code::decoder::test_context() const throw() +{ + if (_out_index >= _out_length || _out_index < 0 || _slotref >= NUMCONTEXTS - 1) + { + failure(out_of_range_data); + return false; + } + return true; +} + +bool Machine::Code::decoder::test_attr(attrCode) const throw() +{ +#if 0 // This code is coming but causes backward compatibility problems. + if (_passtype < PASS_TYPE_POSITIONING) + { + if (attr != gr_slatBreak && attr != gr_slatDir && attr != gr_slatUserDefn + && attr != gr_slatCompRef) + { + failure(out_of_range_data); + return false; + } + } +#endif + return true; +} + +inline +void Machine::Code::failure(const status_t s) throw() { + release_buffers(); + _status = s; +} + + +inline +void Machine::Code::decoder::set_ref(int index) throw() { + if (index + _slotref < 0 || index + _slotref >= NUMCONTEXTS) return; + _contexts[index + _slotref].flags.referenced = true; + if (index + _slotref > _max_ref) _max_ref = index + _slotref; +} + + +inline +void Machine::Code::decoder::set_noref(int index) throw() { + if (index + _slotref < 0 || index + _slotref >= NUMCONTEXTS) return; + if (index + _slotref > _max_ref) _max_ref = index + _slotref; +} + + +inline +void Machine::Code::decoder::set_changed(int index) throw() { + if (index + _slotref < 0 || index + _slotref >= NUMCONTEXTS) return; + _contexts[index + _slotref].flags.changed= true; + if (index + _slotref > _max_ref) _max_ref = index + _slotref; +} + + +void Machine::Code::release_buffers() throw() +{ + if (_own) + free(_code); + _code = 0; + _data = 0; + _own = false; +} + + +int32 Machine::Code::run(Machine & m, slotref * & map) const +{ +// assert(_own); + assert(*this); // Check we are actually runnable + + if (m.slotMap().size() <= size_t(_max_ref + m.slotMap().context()) + || m.slotMap()[_max_ref + m.slotMap().context()] == 0) + { + m._status = Machine::slot_offset_out_bounds; + return 1; +// return m.run(_code, _data, map); + } + + return m.run(_code, _data, map); +} diff --git a/thirdparty/graphite/src/Collider.cpp b/thirdparty/graphite/src/Collider.cpp new file mode 100644 index 0000000000..1929b39a58 --- /dev/null +++ b/thirdparty/graphite/src/Collider.cpp @@ -0,0 +1,1115 @@ +/* GRAPHITE2 LICENSING + + Copyright 2010, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ +#include <algorithm> +#include <limits> +#include <cmath> +#include <string> +#include <functional> +#include "inc/Collider.h" +#include "inc/Segment.h" +#include "inc/Slot.h" +#include "inc/GlyphCache.h" +#include "inc/Sparse.h" + +#define ISQRT2 0.707106781f + +// Possible rounding error for subbox boundaries: 0.016 = 1/64 = 1/256 * 4 +// (values in font range from 0..256) +// #define SUBBOX_RND_ERR 0.016 + +using namespace graphite2; + +//// SHIFT-COLLIDER //// + +// Initialize the Collider to hold the basic movement limits for the +// target slot, the one we are focusing on fixing. +bool ShiftCollider::initSlot(Segment *seg, Slot *aSlot, const Rect &limit, float margin, float marginWeight, + const Position &currShift, const Position &currOffset, int dir, GR_MAYBE_UNUSED json * const dbgout) +{ + int i; + float mx, mn; + float a, shift; + const GlyphCache &gc = seg->getFace()->glyphs(); + unsigned short gid = aSlot->gid(); + if (!gc.check(gid)) + return false; + const BBox &bb = gc.getBoundingBBox(gid); + const SlantBox &sb = gc.getBoundingSlantBox(gid); + //float sx = aSlot->origin().x + currShift.x; + //float sy = aSlot->origin().y + currShift.y; + if (currOffset.x != 0.f || currOffset.y != 0.f) + _limit = Rect(limit.bl - currOffset, limit.tr - currOffset); + else + _limit = limit; + // For a ShiftCollider, these indices indicate which vector we are moving by: + // each _ranges represents absolute space with respect to the origin of the slot. Thus take into account true origins but subtract the vmin for the slot + for (i = 0; i < 4; ++i) + { + switch (i) { + case 0 : // x direction + mn = _limit.bl.x + currOffset.x; + mx = _limit.tr.x + currOffset.x; + _len[i] = bb.xa - bb.xi; + a = currOffset.y + currShift.y; + _ranges[i].initialise<XY>(mn, mx, margin, marginWeight, a); + break; + case 1 : // y direction + mn = _limit.bl.y + currOffset.y; + mx = _limit.tr.y + currOffset.y; + _len[i] = bb.ya - bb.yi; + a = currOffset.x + currShift.x; + _ranges[i].initialise<XY>(mn, mx, margin, marginWeight, a); + break; + case 2 : // sum (negatively sloped diagonal boundaries) + // pick closest x,y limit boundaries in s direction + shift = currOffset.x + currOffset.y + currShift.x + currShift.y; + mn = -2 * min(currShift.x - _limit.bl.x, currShift.y - _limit.bl.y) + shift; + mx = 2 * min(_limit.tr.x - currShift.x, _limit.tr.y - currShift.y) + shift; + _len[i] = sb.sa - sb.si; + a = currOffset.x - currOffset.y + currShift.x - currShift.y; + _ranges[i].initialise<SD>(mn, mx, margin / ISQRT2, marginWeight, a); + break; + case 3 : // diff (positively sloped diagonal boundaries) + // pick closest x,y limit boundaries in d direction + shift = currOffset.x - currOffset.y + currShift.x - currShift.y; + mn = -2 * min(currShift.x - _limit.bl.x, _limit.tr.y - currShift.y) + shift; + mx = 2 * min(_limit.tr.x - currShift.x, currShift.y - _limit.bl.y) + shift; + _len[i] = sb.da - sb.di; + a = currOffset.x + currOffset.y + currShift.x + currShift.y; + _ranges[i].initialise<SD>(mn, mx, margin / ISQRT2, marginWeight, a); + break; + } + } + + _target = aSlot; + if ((dir & 1) == 0) + { + // For LTR, switch and negate x limits. + _limit.bl.x = -1 * limit.tr.x; + //_limit.tr.x = -1 * limit.bl.x; + } + _currOffset = currOffset; + _currShift = currShift; + _origin = aSlot->origin() - currOffset; // the original anchor position of the glyph + + _margin = margin; + _marginWt = marginWeight; + + SlotCollision *c = seg->collisionInfo(aSlot); + _seqClass = c->seqClass(); + _seqProxClass = c->seqProxClass(); + _seqOrder = c->seqOrder(); + return true; +} + +template <class O> +float sdm(float vi, float va, float mx, float my, O op) +{ + float res = 2 * mx - vi; + if (op(res, vi + 2 * my)) + { + res = va + 2 * my; + if (op(res, 2 * mx - va)) + res = mx + my; + } + return res; +} + +// Mark an area with a cost that can vary along the x or y axis. The region is expressed in terms of the centre of the target glyph in each axis +void ShiftCollider::addBox_slope(bool isx, const Rect &box, const BBox &bb, const SlantBox &sb, const Position &org, float weight, float m, bool minright, int axis) +{ + float a, c; + switch (axis) { + case 0 : + if (box.bl.y < org.y + bb.ya && box.tr.y > org.y + bb.yi && box.width() > 0) + { + a = org.y + 0.5f * (bb.yi + bb.ya); + c = 0.5f * (bb.xi + bb.xa); + if (isx) + _ranges[axis].weighted<XY>(box.bl.x - c, box.tr.x - c, weight, a, m, + (minright ? box.tr.x : box.bl.x) - c, a, 0, false); + else + _ranges[axis].weighted<XY>(box.bl.x - c, box.tr.x - c, weight, a, 0, 0, org.y, + m * (a * a + sqr((minright ? box.tr.y : box.bl.y) - 0.5f * (bb.yi + bb.ya))), false); + } + break; + case 1 : + if (box.bl.x < org.x + bb.xa && box.tr.x > org.x + bb.xi && box.height() > 0) + { + a = org.x + 0.5f * (bb.xi + bb.xa); + c = 0.5f * (bb.yi + bb.ya); + if (isx) + _ranges[axis].weighted<XY>(box.bl.y - c, box.tr.y - c, weight, a, 0, 0, org.x, + m * (a * a + sqr((minright ? box.tr.x : box.bl.x) - 0.5f * (bb.xi + bb.xa))), false); + else + _ranges[axis].weighted<XY>(box.bl.y - c, box.tr.y - c, weight, a, m, + (minright ? box.tr.y : box.bl.y) - c, a, 0, false); + } + break; + case 2 : + if (box.bl.x - box.tr.y < org.x - org.y + sb.da && box.tr.x - box.bl.y > org.x - org.y + sb.di) + { + float d = org.x - org.y + 0.5f * (sb.di + sb.da); + c = 0.5f * (sb.si + sb.sa); + float smax = min(2 * box.tr.x - d, 2 * box.tr.y + d); + float smin = max(2 * box.bl.x - d, 2 * box.bl.y + d); + if (smin > smax) return; + float si; + a = d; + if (isx) + si = 2 * (minright ? box.tr.x : box.bl.x) - a; + else + si = 2 * (minright ? box.tr.y : box.bl.y) + a; + _ranges[axis].weighted<SD>(smin - c, smax - c, weight / 2, a, m / 2, si, 0, 0, isx); + } + break; + case 3 : + if (box.bl.x + box.bl.y < org.x + org.y + sb.sa && box.tr.x + box.tr.y > org.x + org.y + sb.si) + { + float s = org.x + org.y + 0.5f * (sb.si + sb.sa); + c = 0.5f * (sb.di + sb.da); + float dmax = min(2 * box.tr.x - s, s - 2 * box.bl.y); + float dmin = max(2 * box.bl.x - s, s - 2 * box.tr.y); + if (dmin > dmax) return; + float di; + a = s; + if (isx) + di = 2 * (minright ? box.tr.x : box.bl.x) - a; + else + di = 2 * (minright ? box.tr.y : box.bl.y) + a; + _ranges[axis].weighted<SD>(dmin - c, dmax - c, weight / 2, a, m / 2, di, 0, 0, !isx); + } + break; + default : + break; + } + return; +} + +// Mark an area with an absolute cost, making it completely inaccessible. +inline void ShiftCollider::removeBox(const Rect &box, const BBox &bb, const SlantBox &sb, const Position &org, int axis) +{ + float c; + switch (axis) { + case 0 : + if (box.bl.y < org.y + bb.ya && box.tr.y > org.y + bb.yi && box.width() > 0) + { + c = 0.5f * (bb.xi + bb.xa); + _ranges[axis].exclude(box.bl.x - c, box.tr.x - c); + } + break; + case 1 : + if (box.bl.x < org.x + bb.xa && box.tr.x > org.x + bb.xi && box.height() > 0) + { + c = 0.5f * (bb.yi + bb.ya); + _ranges[axis].exclude(box.bl.y - c, box.tr.y - c); + } + break; + case 2 : + if (box.bl.x - box.tr.y < org.x - org.y + sb.da && box.tr.x - box.bl.y > org.x - org.y + sb.di + && box.width() > 0 && box.height() > 0) + { + float di = org.x - org.y + sb.di; + float da = org.x - org.y + sb.da; + float smax = sdm(di, da, box.tr.x, box.tr.y, std::greater<float>()); + float smin = sdm(da, di, box.bl.x, box.bl.y, std::less<float>()); + c = 0.5f * (sb.si + sb.sa); + _ranges[axis].exclude(smin - c, smax - c); + } + break; + case 3 : + if (box.bl.x + box.bl.y < org.x + org.y + sb.sa && box.tr.x + box.tr.y > org.x + org.y + sb.si + && box.width() > 0 && box.height() > 0) + { + float si = org.x + org.y + sb.si; + float sa = org.x + org.y + sb.sa; + float dmax = sdm(si, sa, box.tr.x, -box.bl.y, std::greater<float>()); + float dmin = sdm(sa, si, box.bl.x, -box.tr.y, std::less<float>()); + c = 0.5f * (sb.di + sb.da); + _ranges[axis].exclude(dmin - c, dmax - c); + } + break; + default : + break; + } + return; +} + +// Adjust the movement limits for the target to avoid having it collide +// with the given neighbor slot. Also determine if there is in fact a collision +// between the target and the given slot. +bool ShiftCollider::mergeSlot(Segment *seg, Slot *slot, const SlotCollision *cslot, const Position &currShift, + bool isAfter, // slot is logically after _target + bool sameCluster, bool &hasCol, bool isExclusion, + GR_MAYBE_UNUSED json * const dbgout ) +{ + bool isCol = false; + const float sx = slot->origin().x - _origin.x + currShift.x; + const float sy = slot->origin().y - _origin.y + currShift.y; + const float sd = sx - sy; + const float ss = sx + sy; + float vmin, vmax; + float omin, omax, otmin, otmax; + float cmin, cmax; // target limits + float torg; + const GlyphCache &gc = seg->getFace()->glyphs(); + const unsigned short gid = slot->gid(); + if (!gc.check(gid)) + return false; + const BBox &bb = gc.getBoundingBBox(gid); + + // SlotCollision * cslot = seg->collisionInfo(slot); + int orderFlags = 0; + bool sameClass = _seqProxClass == 0 && cslot->seqClass() == _seqClass; + if (sameCluster && _seqClass + && (sameClass || (_seqProxClass != 0 && cslot->seqClass() == _seqProxClass))) + // Force the target glyph to be in the specified direction from the slot we're testing. + orderFlags = _seqOrder; + + // short circuit if only interested in direct collision and we are out of range + if (orderFlags || (sx + bb.xa + _margin >= _limit.bl.x && sx + bb.xi - _margin <= _limit.tr.x) + || (sy + bb.ya + _margin >= _limit.bl.y && sy + bb.yi - _margin <= _limit.tr.y)) + + { + const float tx = _currOffset.x + _currShift.x; + const float ty = _currOffset.y + _currShift.y; + const float td = tx - ty; + const float ts = tx + ty; + const SlantBox &sb = gc.getBoundingSlantBox(gid); + const unsigned short tgid = _target->gid(); + const BBox &tbb = gc.getBoundingBBox(tgid); + const SlantBox &tsb = gc.getBoundingSlantBox(tgid); + float seq_above_wt = cslot->seqAboveWt(); + float seq_below_wt = cslot->seqBelowWt(); + float seq_valign_wt = cslot->seqValignWt(); + float lmargin; + // if isAfter, invert orderFlags for diagonal orders. + if (isAfter) + { + // invert appropriate bits + orderFlags ^= (sameClass ? 0x3F : 0x3); + // consider 2 bits at a time, non overlapping. If both bits set, clear them + orderFlags = orderFlags ^ ((((orderFlags >> 1) & orderFlags) & 0x15) * 3); + } + +#if !defined GRAPHITE2_NTRACING + if (dbgout) + dbgout->setenv(0, slot); +#endif + + // Process main bounding octabox. + for (int i = 0; i < 4; ++i) + { + switch (i) { + case 0 : // x direction + vmin = max(max(bb.xi - tbb.xa + sx, sb.di - tsb.da + ty + sd), sb.si - tsb.sa - ty + ss); + vmax = min(min(bb.xa - tbb.xi + sx, sb.da - tsb.di + ty + sd), sb.sa - tsb.si - ty + ss); + otmin = tbb.yi + ty; + otmax = tbb.ya + ty; + omin = bb.yi + sy; + omax = bb.ya + sy; + torg = _currOffset.x; + cmin = _limit.bl.x + torg; + cmax = _limit.tr.x - tbb.xi + tbb.xa + torg; + lmargin = _margin; + break; + case 1 : // y direction + vmin = max(max(bb.yi - tbb.ya + sy, tsb.di - sb.da + tx - sd), sb.si - tsb.sa - tx + ss); + vmax = min(min(bb.ya - tbb.yi + sy, tsb.da - sb.di + tx - sd), sb.sa - tsb.si - tx + ss); + otmin = tbb.xi + tx; + otmax = tbb.xa + tx; + omin = bb.xi + sx; + omax = bb.xa + sx; + torg = _currOffset.y; + cmin = _limit.bl.y + torg; + cmax = _limit.tr.y - tbb.yi + tbb.ya + torg; + lmargin = _margin; + break; + case 2 : // sum - moving along the positively-sloped vector, so the boundaries are the + // negatively-sloped boundaries. + vmin = max(max(sb.si - tsb.sa + ss, 2 * (bb.yi - tbb.ya + sy) + td), 2 * (bb.xi - tbb.xa + sx) - td); + vmax = min(min(sb.sa - tsb.si + ss, 2 * (bb.ya - tbb.yi + sy) + td), 2 * (bb.xa - tbb.xi + sx) - td); + otmin = tsb.di + td; + otmax = tsb.da + td; + omin = sb.di + sd; + omax = sb.da + sd; + torg = _currOffset.x + _currOffset.y; + cmin = _limit.bl.x + _limit.bl.y + torg; + cmax = _limit.tr.x + _limit.tr.y - tsb.si + tsb.sa + torg; + lmargin = _margin / ISQRT2; + break; + case 3 : // diff - moving along the negatively-sloped vector, so the boundaries are the + // positively-sloped boundaries. + vmin = max(max(sb.di - tsb.da + sd, 2 * (bb.xi - tbb.xa + sx) - ts), -2 * (bb.ya - tbb.yi + sy) + ts); + vmax = min(min(sb.da - tsb.di + sd, 2 * (bb.xa - tbb.xi + sx) - ts), -2 * (bb.yi - tbb.ya + sy) + ts); + otmin = tsb.si + ts; + otmax = tsb.sa + ts; + omin = sb.si + ss; + omax = sb.sa + ss; + torg = _currOffset.x - _currOffset.y; + cmin = _limit.bl.x - _limit.tr.y + torg; + cmax = _limit.tr.x - _limit.bl.y - tsb.di + tsb.da + torg; + lmargin = _margin / ISQRT2; + break; + default : + continue; + } + +#if !defined GRAPHITE2_NTRACING + if (dbgout) + dbgout->setenv(1, reinterpret_cast<void *>(-1)); +#define DBGTAG(x) if (dbgout) dbgout->setenv(1, reinterpret_cast<void *>(-x)); +#else +#define DBGTAG(x) +#endif + + if (orderFlags) + { + Position org(tx, ty); + float xminf = _limit.bl.x + _currOffset.x + tbb.xi; + float xpinf = _limit.tr.x + _currOffset.x + tbb.xa; + float ypinf = _limit.tr.y + _currOffset.y + tbb.ya; + float yminf = _limit.bl.y + _currOffset.y + tbb.yi; + switch (orderFlags) { + case SlotCollision::SEQ_ORDER_RIGHTUP : + { + float r1Xedge = cslot->seqAboveXoff() + 0.5f * (bb.xi + bb.xa) + sx; + float r3Xedge = cslot->seqBelowXlim() + bb.xa + sx + 0.5f * (tbb.xa - tbb.xi); + float r2Yedge = 0.5f * (bb.yi + bb.ya) + sy; + + // DBGTAG(1x) means the regions are up and right + // region 1 + DBGTAG(11) + addBox_slope(true, Rect(Position(xminf, r2Yedge), Position(r1Xedge, ypinf)), + tbb, tsb, org, 0, seq_above_wt, true, i); + // region 2 + DBGTAG(12) + removeBox(Rect(Position(xminf, yminf), Position(r3Xedge, r2Yedge)), tbb, tsb, org, i); + // region 3, which end is zero is irrelevant since m weight is 0 + DBGTAG(13) + addBox_slope(true, Rect(Position(r3Xedge, yminf), Position(xpinf, r2Yedge - cslot->seqValignHt())), + tbb, tsb, org, seq_below_wt, 0, true, i); + // region 4 + DBGTAG(14) + addBox_slope(false, Rect(Position(sx + bb.xi, r2Yedge), Position(xpinf, r2Yedge + cslot->seqValignHt())), + tbb, tsb, org, 0, seq_valign_wt, true, i); + // region 5 + DBGTAG(15) + addBox_slope(false, Rect(Position(sx + bb.xi, r2Yedge - cslot->seqValignHt()), Position(xpinf, r2Yedge)), + tbb, tsb, org, seq_below_wt, seq_valign_wt, false, i); + break; + } + case SlotCollision::SEQ_ORDER_LEFTDOWN : + { + float r1Xedge = 0.5f * (bb.xi + bb.xa) + cslot->seqAboveXoff() + sx; + float r3Xedge = bb.xi - cslot->seqBelowXlim() + sx - 0.5f * (tbb.xa - tbb.xi); + float r2Yedge = 0.5f * (bb.yi + bb.ya) + sy; + // DBGTAG(2x) means the regions are up and right + // region 1 + DBGTAG(21) + addBox_slope(true, Rect(Position(r1Xedge, yminf), Position(xpinf, r2Yedge)), + tbb, tsb, org, 0, seq_above_wt, false, i); + // region 2 + DBGTAG(22) + removeBox(Rect(Position(r3Xedge, r2Yedge), Position(xpinf, ypinf)), tbb, tsb, org, i); + // region 3 + DBGTAG(23) + addBox_slope(true, Rect(Position(xminf, r2Yedge - cslot->seqValignHt()), Position(r3Xedge, ypinf)), + tbb, tsb, org, seq_below_wt, 0, false, i); + // region 4 + DBGTAG(24) + addBox_slope(false, Rect(Position(xminf, r2Yedge), Position(sx + bb.xa, r2Yedge + cslot->seqValignHt())), + tbb, tsb, org, 0, seq_valign_wt, true, i); + // region 5 + DBGTAG(25) + addBox_slope(false, Rect(Position(xminf, r2Yedge - cslot->seqValignHt()), + Position(sx + bb.xa, r2Yedge)), tbb, tsb, org, seq_below_wt, seq_valign_wt, false, i); + break; + } + case SlotCollision::SEQ_ORDER_NOABOVE : // enforce neighboring glyph being above + DBGTAG(31); + removeBox(Rect(Position(bb.xi - tbb.xa + sx, sy + bb.ya), + Position(bb.xa - tbb.xi + sx, ypinf)), tbb, tsb, org, i); + break; + case SlotCollision::SEQ_ORDER_NOBELOW : // enforce neighboring glyph being below + DBGTAG(32); + removeBox(Rect(Position(bb.xi - tbb.xa + sx, yminf), + Position(bb.xa - tbb.xi + sx, sy + bb.yi)), tbb, tsb, org, i); + break; + case SlotCollision::SEQ_ORDER_NOLEFT : // enforce neighboring glyph being to the left + DBGTAG(33) + removeBox(Rect(Position(xminf, bb.yi - tbb.ya + sy), + Position(bb.xi - tbb.xa + sx, bb.ya - tbb.yi + sy)), tbb, tsb, org, i); + break; + case SlotCollision::SEQ_ORDER_NORIGHT : // enforce neighboring glyph being to the right + DBGTAG(34) + removeBox(Rect(Position(bb.xa - tbb.xi + sx, bb.yi - tbb.ya + sy), + Position(xpinf, bb.ya - tbb.yi + sy)), tbb, tsb, org, i); + break; + default : + break; + } + } + + if (vmax < cmin - lmargin || vmin > cmax + lmargin || omax < otmin - lmargin || omin > otmax + lmargin) + continue; + + // Process sub-boxes that are defined for this glyph. + // We only need to do this if there was in fact a collision with the main octabox. + uint8 numsub = gc.numSubBounds(gid); + if (numsub > 0) + { + bool anyhits = false; + for (int j = 0; j < numsub; ++j) + { + const BBox &sbb = gc.getSubBoundingBBox(gid, j); + const SlantBox &ssb = gc.getSubBoundingSlantBox(gid, j); + switch (i) { + case 0 : // x + vmin = max(max(sbb.xi-tbb.xa+sx, ssb.di-tsb.da+sd+ty), ssb.si-tsb.sa+ss-ty); + vmax = min(min(sbb.xa-tbb.xi+sx, ssb.da-tsb.di+sd+ty), ssb.sa-tsb.si+ss-ty); + omin = sbb.yi + sy; + omax = sbb.ya + sy; + break; + case 1 : // y + vmin = max(max(sbb.yi-tbb.ya+sy, tsb.di-ssb.da-sd+tx), ssb.si-tsb.sa+ss-tx); + vmax = min(min(sbb.ya-tbb.yi+sy, tsb.da-ssb.di-sd+tx), ssb.sa-tsb.si+ss-tx); + omin = sbb.xi + sx; + omax = sbb.xa + sx; + break; + case 2 : // sum + vmin = max(max(ssb.si-tsb.sa+ss, 2*(sbb.yi-tbb.ya+sy)+td), 2*(sbb.xi-tbb.xa+sx)-td); + vmax = min(min(ssb.sa-tsb.si+ss, 2*(sbb.ya-tbb.yi+sy)+td), 2*(sbb.xa-tbb.xi+sx)-td); + omin = ssb.di + sd; + omax = ssb.da + sd; + break; + case 3 : // diff + vmin = max(max(ssb.di-tsb.da+sd, 2*(sbb.xi-tbb.xa+sx)-ts), -2*(sbb.ya-tbb.yi+sy)+ts); + vmax = min(min(ssb.da-tsb.di+sd, 2*(sbb.xa-tbb.xi+sx)-ts), -2*(sbb.yi-tbb.ya+sy)+ts); + omin = ssb.si + ss; + omax = ssb.sa + ss; + break; + } + if (vmax < cmin - lmargin || vmin > cmax + lmargin || omax < otmin - lmargin || omin > otmax + lmargin) + continue; + +#if !defined GRAPHITE2_NTRACING + if (dbgout) + dbgout->setenv(1, reinterpret_cast<void *>(j)); +#endif + if (omin > otmax) + _ranges[i].weightedAxis(i, vmin - lmargin, vmax + lmargin, 0, 0, 0, 0, 0, + sqr(lmargin - omin + otmax) * _marginWt, false); + else if (omax < otmin) + _ranges[i].weightedAxis(i, vmin - lmargin, vmax + lmargin, 0, 0, 0, 0, 0, + sqr(lmargin - otmin + omax) * _marginWt, false); + else + _ranges[i].exclude_with_margins(vmin, vmax, i); + anyhits = true; + } + if (anyhits) + isCol = true; + } + else // no sub-boxes + { +#if !defined GRAPHITE2_NTRACING + if (dbgout) + dbgout->setenv(1, reinterpret_cast<void *>(-1)); +#endif + isCol = true; + if (omin > otmax) + _ranges[i].weightedAxis(i, vmin - lmargin, vmax + lmargin, 0, 0, 0, 0, 0, + sqr(lmargin - omin + otmax) * _marginWt, false); + else if (omax < otmin) + _ranges[i].weightedAxis(i, vmin - lmargin, vmax + lmargin, 0, 0, 0, 0, 0, + sqr(lmargin - otmin + omax) * _marginWt, false); + else + _ranges[i].exclude_with_margins(vmin, vmax, i); + + } + } + } + bool res = true; + if (cslot->exclGlyph() > 0 && gc.check(cslot->exclGlyph()) && !isExclusion) + { + // Set up the bogus slot representing the exclusion glyph. + Slot *exclSlot = seg->newSlot(); + if (!exclSlot) + return res; + exclSlot->setGlyph(seg, cslot->exclGlyph()); + Position exclOrigin(slot->origin() + cslot->exclOffset()); + exclSlot->origin(exclOrigin); + SlotCollision exclInfo(seg, exclSlot); + res &= mergeSlot(seg, exclSlot, &exclInfo, currShift, isAfter, sameCluster, isCol, true, dbgout ); + seg->freeSlot(exclSlot); + } + hasCol |= isCol; + return res; + +} // end of ShiftCollider::mergeSlot + + +// Figure out where to move the target glyph to, and return the amount to shift by. +Position ShiftCollider::resolve(GR_MAYBE_UNUSED Segment *seg, bool &isCol, GR_MAYBE_UNUSED json * const dbgout) +{ + float tbase; + float totalCost = (float)(std::numeric_limits<float>::max() / 2); + Position resultPos = Position(0, 0); +#if !defined GRAPHITE2_NTRACING + int bestAxis = -1; + if (dbgout) + { + outputJsonDbgStartSlot(dbgout, seg); + *dbgout << "vectors" << json::array; + } +#endif + isCol = true; + for (int i = 0; i < 4; ++i) + { + float bestCost = -1; + float bestPos; + // Calculate the margin depending on whether we are moving diagonally or not: + switch (i) { + case 0 : // x direction + tbase = _currOffset.x; + break; + case 1 : // y direction + tbase = _currOffset.y; + break; + case 2 : // sum (negatively-sloped diagonals) + tbase = _currOffset.x + _currOffset.y; + break; + case 3 : // diff (positively-sloped diagonals) + tbase = _currOffset.x - _currOffset.y; + break; + } + Position testp; + bestPos = _ranges[i].closest(0, bestCost) - tbase; // Get the best relative position +#if !defined GRAPHITE2_NTRACING + if (dbgout) + outputJsonDbgOneVector(dbgout, seg, i, tbase, bestCost, bestPos) ; +#endif + if (bestCost >= 0.0f) + { + isCol = false; + switch (i) { + case 0 : testp = Position(bestPos, _currShift.y); break; + case 1 : testp = Position(_currShift.x, bestPos); break; + case 2 : testp = Position(0.5f * (_currShift.x - _currShift.y + bestPos), 0.5f * (_currShift.y - _currShift.x + bestPos)); break; + case 3 : testp = Position(0.5f * (_currShift.x + _currShift.y + bestPos), 0.5f * (_currShift.x + _currShift.y - bestPos)); break; + } + if (bestCost < totalCost - 0.01f) + { + totalCost = bestCost; + resultPos = testp; +#if !defined GRAPHITE2_NTRACING + bestAxis = i; +#endif + } + } + } // end of loop over 4 directions + +#if !defined GRAPHITE2_NTRACING + if (dbgout) + outputJsonDbgEndSlot(dbgout, resultPos, bestAxis, isCol); +#endif + + return resultPos; + +} // end of ShiftCollider::resolve + + +#if !defined GRAPHITE2_NTRACING + +void ShiftCollider::outputJsonDbg(json * const dbgout, Segment *seg, int axis) +{ + int axisMax = axis; + if (axis < 0) // output all axes + { + *dbgout << "gid" << _target->gid() + << "limit" << _limit + << "target" << json::object + << "origin" << _target->origin() + << "margin" << _margin + << "bbox" << seg->theGlyphBBoxTemporary(_target->gid()) + << "slantbox" << seg->getFace()->glyphs().slant(_target->gid()) + << json::close; // target object + *dbgout << "ranges" << json::array; + axis = 0; + axisMax = 3; + } + for (int iAxis = axis; iAxis <= axisMax; ++iAxis) + { + *dbgout << json::flat << json::array << _ranges[iAxis].position(); + for (Zones::const_iterator s = _ranges[iAxis].begin(), e = _ranges[iAxis].end(); s != e; ++s) + *dbgout << json::flat << json::array + << Position(s->x, s->xm) << s->sm << s->smx << s->c + << json::close; + *dbgout << json::close; + } + if (axis < axisMax) // looped through the _ranges array for all axes + *dbgout << json::close; // ranges array +} + +void ShiftCollider::outputJsonDbgStartSlot(json * const dbgout, Segment *seg) +{ + *dbgout << json::object // slot - not closed till the end of the caller method + << "slot" << objectid(dslot(seg, _target)) + << "gid" << _target->gid() + << "limit" << _limit + << "target" << json::object + << "origin" << _origin + << "currShift" << _currShift + << "currOffset" << seg->collisionInfo(_target)->offset() + << "bbox" << seg->theGlyphBBoxTemporary(_target->gid()) + << "slantBox" << seg->getFace()->glyphs().slant(_target->gid()) + << "fix" << "shift"; + *dbgout << json::close; // target object +} + +void ShiftCollider::outputJsonDbgEndSlot(GR_MAYBE_UNUSED json * const dbgout, + Position resultPos, int bestAxis, bool isCol) +{ + *dbgout << json::close // vectors array + << "result" << resultPos + //<< "scraping" << _scraping[bestAxis] + << "bestAxis" << bestAxis + << "stillBad" << isCol + << json::close; // slot object +} + +void ShiftCollider::outputJsonDbgOneVector(json * const dbgout, Segment *seg, int axis, + float tleft, float bestCost, float bestVal) +{ + const char * label; + switch (axis) + { + case 0: label = "x"; break; + case 1: label = "y"; break; + case 2: label = "sum (NE-SW)"; break; + case 3: label = "diff (NW-SE)"; break; + default: label = "???"; break; + } + + *dbgout << json::object // vector + << "direction" << label + << "targetMin" << tleft; + + outputJsonDbgRemovals(dbgout, axis, seg); + + *dbgout << "ranges"; + outputJsonDbg(dbgout, seg, axis); + + *dbgout << "bestCost" << bestCost + << "bestVal" << bestVal + tleft + << json::close; // vectors object +} + +void ShiftCollider::outputJsonDbgRemovals(json * const dbgout, int axis, Segment *seg) +{ + *dbgout << "removals" << json::array; + _ranges[axis].jsonDbgOut(seg); + *dbgout << json::close; // removals array +} + +#endif // !defined GRAPHITE2_NTRACING + + +//// KERN-COLLIDER //// + +inline +static float localmax (float al, float au, float bl, float bu, float x) +{ + if (al < bl) + { if (au < bu) return au < x ? au : x; } + else if (au > bu) return bl < x ? bl : x; + return x; +} + +inline +static float localmin(float al, float au, float bl, float bu, float x) +{ + if (bl > al) + { if (bu > au) return bl > x ? bl : x; } + else if (au > bu) return al > x ? al : x; + return x; +} + +// Return the given edge of the glyph at height y, taking any slant box into account. +static float get_edge(Segment *seg, const Slot *s, const Position &shift, float y, float width, float margin, bool isRight) +{ + const GlyphCache &gc = seg->getFace()->glyphs(); + unsigned short gid = s->gid(); + float sx = s->origin().x + shift.x; + float sy = s->origin().y + shift.y; + uint8 numsub = gc.numSubBounds(gid); + float res = isRight ? (float)-1e38 : (float)1e38; + + if (numsub > 0) + { + for (int i = 0; i < numsub; ++i) + { + const BBox &sbb = gc.getSubBoundingBBox(gid, i); + const SlantBox &ssb = gc.getSubBoundingSlantBox(gid, i); + if (sy + sbb.yi - margin > y + width / 2 || sy + sbb.ya + margin < y - width / 2) + continue; + if (isRight) + { + float x = sx + sbb.xa + margin; + if (x > res) + { + float td = sx - sy + ssb.da + margin + y; + float ts = sx + sy + ssb.sa + margin - y; + x = localmax(td - width / 2, td + width / 2, ts - width / 2, ts + width / 2, x); + if (x > res) + res = x; + } + } + else + { + float x = sx + sbb.xi - margin; + if (x < res) + { + float td = sx - sy + ssb.di - margin + y; + float ts = sx + sy + ssb.si - margin - y; + x = localmin(td - width / 2, td + width / 2, ts - width / 2, ts + width / 2, x); + if (x < res) + res = x; + } + } + } + } + else + { + const BBox &bb = gc.getBoundingBBox(gid); + const SlantBox &sb = gc.getBoundingSlantBox(gid); + if (sy + bb.yi - margin > y + width / 2 || sy + bb.ya + margin < y - width / 2) + return res; + float td = sx - sy + y; + float ts = sx + sy - y; + if (isRight) + res = localmax(td + sb.da - width / 2, td + sb.da + width / 2, ts + sb.sa - width / 2, ts + sb.sa + width / 2, sx + bb.xa) + margin; + else + res = localmin(td + sb.di - width / 2, td + sb.di + width / 2, ts + sb.si - width / 2, ts + sb.si + width / 2, sx + bb.xi) - margin; + } + return res; +} + + +bool KernCollider::initSlot(Segment *seg, Slot *aSlot, const Rect &limit, float margin, + const Position &currShift, const Position &offsetPrev, int dir, + float ymin, float ymax, GR_MAYBE_UNUSED json * const dbgout) +{ + const GlyphCache &gc = seg->getFace()->glyphs(); + const Slot *base = aSlot; + // const Slot *last = aSlot; + const Slot *s; + int numSlices; + while (base->attachedTo()) + base = base->attachedTo(); + if (margin < 10) margin = 10; + + _limit = limit; + _offsetPrev = offsetPrev; // kern from a previous pass + + // Calculate the height of the glyph and how many horizontal slices to use. + if (_maxy >= 1e37f) + { + _sliceWidth = margin / 1.5f; + _maxy = ymax + margin; + _miny = ymin - margin; + numSlices = int((_maxy - _miny + 2) / (_sliceWidth / 1.5f) + 1.f); // +2 helps with rounding errors + _edges.clear(); + _edges.insert(_edges.begin(), numSlices, (dir & 1) ? 1e38f : -1e38f); + _xbound = (dir & 1) ? (float)1e38f : (float)-1e38f; + } + else if (_maxy != ymax || _miny != ymin) + { + if (_miny != ymin) + { + numSlices = int((ymin - margin - _miny) / _sliceWidth - 1); + _miny += numSlices * _sliceWidth; + if (numSlices < 0) + _edges.insert(_edges.begin(), -numSlices, (dir & 1) ? 1e38f : -1e38f); + else if ((unsigned)numSlices < _edges.size()) // this shouldn't fire since we always grow the range + { + Vector<float>::iterator e = _edges.begin(); + while (numSlices--) + ++e; + _edges.erase(_edges.begin(), e); + } + } + if (_maxy != ymax) + { + numSlices = int((ymax + margin - _miny) / _sliceWidth + 1); + _maxy = numSlices * _sliceWidth + _miny; + if (numSlices > (int)_edges.size()) + _edges.insert(_edges.end(), numSlices - _edges.size(), (dir & 1) ? 1e38f : -1e38f); + else if (numSlices < (int)_edges.size()) // this shouldn't fire since we always grow the range + { + while ((int)_edges.size() > numSlices) + _edges.pop_back(); + } + } + goto done; + } + numSlices = int(_edges.size()); + +#if !defined GRAPHITE2_NTRACING + // Debugging + _seg = seg; + _slotNear.clear(); + _slotNear.insert(_slotNear.begin(), numSlices, NULL); + _nearEdges.clear(); + _nearEdges.insert(_nearEdges.begin(), numSlices, (dir & 1) ? -1e38f : +1e38f); +#endif + + // Determine the trailing edge of each slice (ie, left edge for a RTL glyph). + for (s = base; s; s = s->nextInCluster(s)) + { + SlotCollision *c = seg->collisionInfo(s); + if (!gc.check(s->gid())) + return false; + const BBox &bs = gc.getBoundingBBox(s->gid()); + float x = s->origin().x + c->shift().x + ((dir & 1) ? bs.xi : bs.xa); + // Loop over slices. + // Note smin might not be zero if glyph s is not at the bottom of the cluster; similarly for smax. + float toffset = c->shift().y - _miny + 1 + s->origin().y; + int smin = max(0, int((bs.yi + toffset) / _sliceWidth)); + int smax = min(numSlices - 1, int((bs.ya + toffset) / _sliceWidth + 1)); + for (int i = smin; i <= smax; ++i) + { + float t; + float y = _miny - 1 + (i + .5f) * _sliceWidth; // vertical center of slice + if ((dir & 1) && x < _edges[i]) + { + t = get_edge(seg, s, c->shift(), y, _sliceWidth, margin, false); + if (t < _edges[i]) + { + _edges[i] = t; + if (t < _xbound) + _xbound = t; + } + } + else if (!(dir & 1) && x > _edges[i]) + { + t = get_edge(seg, s, c->shift(), y, _sliceWidth, margin, true); + if (t > _edges[i]) + { + _edges[i] = t; + if (t > _xbound) + _xbound = t; + } + } + } + } + done: + _mingap = (float)1e37; // less than 1e38 s.t. 1e38-_mingap is really big + _target = aSlot; + _margin = margin; + _currShift = currShift; + return true; +} // end of KernCollider::initSlot + + +// Determine how much the target slot needs to kern away from the given slot. +// In other words, merge information from given slot's position with what the target slot knows +// about how it can kern. +// Return false if we know there is no collision, true if we think there might be one. +bool KernCollider::mergeSlot(Segment *seg, Slot *slot, const Position &currShift, float currSpace, int dir, GR_MAYBE_UNUSED json * const dbgout) +{ + int rtl = (dir & 1) * 2 - 1; + if (!seg->getFace()->glyphs().check(slot->gid())) + return false; + const Rect &bb = seg->theGlyphBBoxTemporary(slot->gid()); + const float sx = slot->origin().x + currShift.x; + float x = (sx + (rtl > 0 ? bb.tr.x : bb.bl.x)) * rtl; + // this isn't going to reduce _mingap so skip + if (_hit && x < rtl * (_xbound - _mingap - currSpace)) + return false; + + const float sy = slot->origin().y + currShift.y; + int smin = max(1, int((bb.bl.y + (1 - _miny + sy)) / _sliceWidth + 1)) - 1; + int smax = min((int)_edges.size() - 2, int((bb.tr.y + (1 - _miny + sy)) / _sliceWidth + 1)) + 1; + if (smin > smax) + return false; + bool collides = false; + bool nooverlap = true; + + for (int i = smin; i <= smax; ++i) + { + float here = _edges[i] * rtl; + if (here > (float)9e37) + continue; + if (!_hit || x > here - _mingap - currSpace) + { + float y = (float)(_miny - 1 + (i + .5f) * _sliceWidth); // vertical center of slice + // 2 * currSpace to account for the space that is already separating them and the space we want to add + float m = get_edge(seg, slot, currShift, y, _sliceWidth, 0., rtl > 0) * rtl + 2 * currSpace; + if (m < (float)-8e37) // only true if the glyph has a gap in it + continue; + nooverlap = false; + float t = here - m; + // _mingap is positive to shrink + if (t < _mingap || (!_hit && !collides)) + { + _mingap = t; + collides = true; + } +#if !defined GRAPHITE2_NTRACING + // Debugging - remember the closest neighboring edge for this slice. + if (m > rtl * _nearEdges[i]) + { + _slotNear[i] = slot; + _nearEdges[i] = m * rtl; + } +#endif + } + else + nooverlap = false; + } + if (nooverlap) + _mingap = max(_mingap, _xbound - rtl * (currSpace + _margin + x)); + if (collides && !nooverlap) + _hit = true; + return collides | nooverlap; // note that true is not a necessarily reliable value + +} // end of KernCollider::mergeSlot + + +// Return the amount to kern by. +Position KernCollider::resolve(GR_MAYBE_UNUSED Segment *seg, GR_MAYBE_UNUSED Slot *slot, + int dir, GR_MAYBE_UNUSED json * const dbgout) +{ + float resultNeeded = (1 - 2 * (dir & 1)) * _mingap; + // float resultNeeded = (1 - 2 * (dir & 1)) * (_mingap - margin); + float result = min(_limit.tr.x - _offsetPrev.x, max(resultNeeded, _limit.bl.x - _offsetPrev.x)); + +#if !defined GRAPHITE2_NTRACING + if (dbgout) + { + *dbgout << json::object // slot + << "slot" << objectid(dslot(seg, _target)) + << "gid" << _target->gid() + << "limit" << _limit + << "miny" << _miny + << "maxy" << _maxy + << "slicewidth" << _sliceWidth + << "target" << json::object + << "origin" << _target->origin() + //<< "currShift" << _currShift + << "offsetPrev" << _offsetPrev + << "bbox" << seg->theGlyphBBoxTemporary(_target->gid()) + << "slantBox" << seg->getFace()->glyphs().slant(_target->gid()) + << "fix" << "kern" + << json::close; // target object + + *dbgout << "slices" << json::array; + for (int is = 0; is < (int)_edges.size(); is++) + { + *dbgout << json::flat << json::object + << "i" << is + << "targetEdge" << _edges[is] + << "neighbor" << objectid(dslot(seg, _slotNear[is])) + << "nearEdge" << _nearEdges[is] + << json::close; + } + *dbgout << json::close; // slices array + + *dbgout + << "xbound" << _xbound + << "minGap" << _mingap + << "needed" << resultNeeded + << "result" << result + << "stillBad" << (result != resultNeeded) + << json::close; // slot object + } +#endif + + return Position(result, 0.); + +} // end of KernCollider::resolve + +void KernCollider::shift(const Position &mv, int dir) +{ + for (Vector<float>::iterator e = _edges.begin(); e != _edges.end(); ++e) + *e += mv.x; + _xbound += (1 - 2 * (dir & 1)) * mv.x; +} + +//// SLOT-COLLISION //// + +// Initialize the collision attributes for the given slot. +SlotCollision::SlotCollision(Segment *seg, Slot *slot) +{ + initFromSlot(seg, slot); +} + +void SlotCollision::initFromSlot(Segment *seg, Slot *slot) +{ + // Initialize slot attributes from glyph attributes. + // The order here must match the order in the grcompiler code, + // GrcSymbolTable::AssignInternalGlyphAttrIDs. + uint16 gid = slot->gid(); + uint16 aCol = seg->silf()->aCollision(); // flags attr ID + const GlyphFace * glyphFace = seg->getFace()->glyphs().glyphSafe(gid); + if (!glyphFace) + return; + const sparse &p = glyphFace->attrs(); + _flags = p[aCol]; + _limit = Rect(Position(int16(p[aCol+1]), int16(p[aCol+2])), + Position(int16(p[aCol+3]), int16(p[aCol+4]))); + _margin = p[aCol+5]; + _marginWt = p[aCol+6]; + + _seqClass = p[aCol+7]; + _seqProxClass = p[aCol+8]; + _seqOrder = p[aCol+9]; + _seqAboveXoff = p[aCol+10]; + _seqAboveWt = p[aCol+11]; + _seqBelowXlim = p[aCol+12]; + _seqBelowWt = p[aCol+13]; + _seqValignHt = p[aCol+14]; + _seqValignWt = p[aCol+15]; + + // These attributes do not have corresponding glyph attribute: + _exclGlyph = 0; + _exclOffset = Position(0, 0); +} + +float SlotCollision::getKern(int dir) const +{ + if ((_flags & SlotCollision::COLL_KERN) != 0) + return float(_shift.x * ((dir & 1) ? -1 : 1)); + else + return 0; +} + +bool SlotCollision::ignore() const +{ + return ((flags() & SlotCollision::COLL_IGNORE) || (flags() & SlotCollision::COLL_ISSPACE)); +} diff --git a/thirdparty/graphite/src/Decompressor.cpp b/thirdparty/graphite/src/Decompressor.cpp new file mode 100644 index 0000000000..42dc9113e5 --- /dev/null +++ b/thirdparty/graphite/src/Decompressor.cpp @@ -0,0 +1,125 @@ +/* GRAPHITE2 LICENSING + + Copyright 2015, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ +#include <cassert> + +#include "inc/Decompressor.h" +#include "inc/Compression.h" + +using namespace lz4; + +namespace { + +inline +u32 read_literal(u8 const * &s, u8 const * const e, u32 l) { + if (l == 15 && s != e) + { + u8 b = 0; + do { l += b = *s++; } while(b==0xff && s != e); + } + return l; +} + +bool read_sequence(u8 const * &src, u8 const * const end, u8 const * &literal, + u32 & literal_len, u32 & match_len, u32 & match_dist) +{ + u8 const token = *src++; + + literal_len = read_literal(src, end, token >> 4); + literal = src; + src += literal_len; + + // Normal exit for end of stream, wrap arround check and parital match check. + if (src > end - sizeof(u16) || src < literal) + return false; + + match_dist = *src++; + match_dist |= *src++ << 8; + match_len = read_literal(src, end, token & 0xf) + MINMATCH; + + // Malformed stream check. + return src <= end-MINCODA; +} + +} + +int lz4::decompress(void const *in, size_t in_size, void *out, size_t out_size) +{ + if (out_size <= in_size || in_size < MINSRCSIZE) + return -1; + + u8 const * src = static_cast<u8 const *>(in), + * literal = 0, + * const src_end = src + in_size; + + u8 * dst = static_cast<u8*>(out), + * const dst_end = dst + out_size; + + // Check the in and out size hasn't wrapped around. + if (src >= src_end || dst >= dst_end) + return -1; + + u32 literal_len = 0, + match_len = 0, + match_dist = 0; + + while (read_sequence(src, src_end, literal, literal_len, match_len, + match_dist)) + { + if (literal_len != 0) + { + // Copy in literal. At this point the a minimal literal + minminal + // match plus the coda (1 + 2 + 5) must be 8 bytes or more allowing + // us to remain within the src buffer for an overrun_copy on + // machines upto 64 bits. + if (align(literal_len) > out_size) + return -1; + dst = overrun_copy(dst, literal, literal_len); + out_size -= literal_len; + } + + // Copy, possibly repeating, match from earlier in the + // decoded output. + u8 const * const pcpy = dst - match_dist; + if (pcpy < static_cast<u8*>(out) + || match_len > unsigned(out_size - LASTLITERALS) + // Wrap around checks: + || out_size < LASTLITERALS || pcpy >= dst) + return -1; + if (dst > pcpy+sizeof(unsigned long) + && align(match_len) <= out_size) + dst = overrun_copy(dst, pcpy, match_len); + else + dst = safe_copy(dst, pcpy, match_len); + out_size -= match_len; + } + + if (literal > src_end - literal_len || literal_len > out_size) + return -1; + dst = fast_copy(dst, literal, literal_len); + + return int(dst - (u8*)out); +} diff --git a/thirdparty/graphite/src/Face.cpp b/thirdparty/graphite/src/Face.cpp new file mode 100644 index 0000000000..3e106050d7 --- /dev/null +++ b/thirdparty/graphite/src/Face.cpp @@ -0,0 +1,366 @@ +/* GRAPHITE2 LICENSING + + Copyright 2010, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ +#include <cstring> +#include "graphite2/Segment.h" +#include "inc/CmapCache.h" +#include "inc/debug.h" +#include "inc/Decompressor.h" +#include "inc/Endian.h" +#include "inc/Face.h" +#include "inc/FileFace.h" +#include "inc/GlyphFace.h" +#include "inc/json.h" +#include "inc/Segment.h" +#include "inc/NameTable.h" +#include "inc/Error.h" + +using namespace graphite2; + +namespace +{ +enum compression +{ + NONE, + LZ4 +}; + +} + +Face::Face(const void* appFaceHandle/*non-NULL*/, const gr_face_ops & ops) +: m_appFaceHandle(appFaceHandle), + m_pFileFace(NULL), + m_pGlyphFaceCache(NULL), + m_cmap(NULL), + m_pNames(NULL), + m_logger(NULL), + m_error(0), m_errcntxt(0), + m_silfs(NULL), + m_numSilf(0), + m_ascent(0), + m_descent(0) +{ + memset(&m_ops, 0, sizeof m_ops); + memcpy(&m_ops, &ops, min(sizeof m_ops, ops.size)); +} + + +Face::~Face() +{ + setLogger(0); + delete m_pGlyphFaceCache; + delete m_cmap; + delete[] m_silfs; +#ifndef GRAPHITE2_NFILEFACE + delete m_pFileFace; +#endif + delete m_pNames; +} + +float Face::default_glyph_advance(const void* font_ptr, gr_uint16 glyphid) +{ + const Font & font = *reinterpret_cast<const Font *>(font_ptr); + + return font.face().glyphs().glyph(glyphid)->theAdvance().x * font.scale(); +} + +bool Face::readGlyphs(uint32 faceOptions) +{ + Error e; +#ifdef GRAPHITE2_TELEMETRY + telemetry::category _glyph_cat(tele.glyph); +#endif + error_context(EC_READGLYPHS); + m_pGlyphFaceCache = new GlyphCache(*this, faceOptions); + + if (e.test(!m_pGlyphFaceCache, E_OUTOFMEM) + || e.test(m_pGlyphFaceCache->numGlyphs() == 0, E_NOGLYPHS) + || e.test(m_pGlyphFaceCache->unitsPerEm() == 0, E_BADUPEM)) + { + return error(e); + } + + if (faceOptions & gr_face_cacheCmap) + m_cmap = new CachedCmap(*this); + else + m_cmap = new DirectCmap(*this); + if (e.test(!m_cmap, E_OUTOFMEM) || e.test(!*m_cmap, E_BADCMAP)) + return error(e); + + if (faceOptions & gr_face_preloadGlyphs) + nameTable(); // preload the name table along with the glyphs. + + return true; +} + +bool Face::readGraphite(const Table & silf) +{ +#ifdef GRAPHITE2_TELEMETRY + telemetry::category _silf_cat(tele.silf); +#endif + Error e; + error_context(EC_READSILF); + const byte * p = silf; + if (e.test(!p, E_NOSILF) || e.test(silf.size() < 20, E_BADSIZE)) return error(e); + + const uint32 version = be::read<uint32>(p); + if (e.test(version < 0x00020000, E_TOOOLD)) return error(e); + if (version >= 0x00030000) + be::skip<uint32>(p); // compilerVersion + m_numSilf = be::read<uint16>(p); + + be::skip<uint16>(p); // reserved + + bool havePasses = false; + m_silfs = new Silf[m_numSilf]; + if (e.test(!m_silfs, E_OUTOFMEM)) return error(e); + for (int i = 0; i < m_numSilf; i++) + { + error_context(EC_ASILF + (i << 8)); + const uint32 offset = be::read<uint32>(p), + next = i == m_numSilf - 1 ? uint32(silf.size()) : be::peek<uint32>(p); + if (e.test(next > silf.size() || offset >= next, E_BADSIZE)) + return error(e); + + if (!m_silfs[i].readGraphite(silf + offset, next - offset, *this, version)) + return false; + + if (m_silfs[i].numPasses()) + havePasses = true; + } + + return havePasses; +} + +bool Face::readFeatures() +{ + return m_Sill.readFace(*this); +} + +bool Face::runGraphite(Segment *seg, const Silf *aSilf) const +{ +#if !defined GRAPHITE2_NTRACING + json * dbgout = logger(); + if (dbgout) + { + *dbgout << json::object + << "id" << objectid(seg) + << "passes" << json::array; + } +#endif + +// if ((seg->dir() & 1) != aSilf->dir()) +// seg->reverseSlots(); + if ((seg->dir() & 3) == 3 && aSilf->bidiPass() == 0xFF) + seg->doMirror(aSilf->aMirror()); + bool res = aSilf->runGraphite(seg, 0, aSilf->positionPass(), true); + if (res) + { + seg->associateChars(0, seg->charInfoCount()); + if (aSilf->flags() & 0x20) + res &= seg->initCollisions(); + if (res) + res &= aSilf->runGraphite(seg, aSilf->positionPass(), aSilf->numPasses(), false); + } + +#if !defined GRAPHITE2_NTRACING + if (dbgout) +{ + seg->positionSlots(0, 0, 0, seg->currdir()); + *dbgout << json::item + << json::close // Close up the passes array + << "outputdir" << (seg->currdir() ? "rtl" : "ltr") + << "output" << json::array; + for(Slot * s = seg->first(); s; s = s->next()) + *dbgout << dslot(seg, s); + *dbgout << json::close + << "advance" << seg->advance() + << "chars" << json::array; + for(size_t i = 0, n = seg->charInfoCount(); i != n; ++i) + *dbgout << json::flat << *seg->charinfo(int(i)); + *dbgout << json::close // Close up the chars array + << json::close; // Close up the segment object + } +#endif + + return res; +} + +void Face::setLogger(FILE * log_file GR_MAYBE_UNUSED) +{ +#if !defined GRAPHITE2_NTRACING + delete m_logger; + m_logger = log_file ? new json(log_file) : 0; +#endif +} + +const Silf *Face::chooseSilf(uint32 script) const +{ + if (m_numSilf == 0) + return NULL; + else if (m_numSilf == 1 || script == 0) + return m_silfs; + else // do more work here + return m_silfs; +} + +uint16 Face::findPseudo(uint32 uid) const +{ + return (m_numSilf) ? m_silfs[0].findPseudo(uid) : 0; +} + +int32 Face::getGlyphMetric(uint16 gid, uint8 metric) const +{ + switch (metrics(metric)) + { + case kgmetAscent : return m_ascent; + case kgmetDescent : return m_descent; + default: + if (gid >= glyphs().numGlyphs()) return 0; + return glyphs().glyph(gid)->getMetric(metric); + } +} + +void Face::takeFileFace(FileFace* pFileFace GR_MAYBE_UNUSED/*takes ownership*/) +{ +#ifndef GRAPHITE2_NFILEFACE + if (m_pFileFace==pFileFace) + return; + + delete m_pFileFace; + m_pFileFace = pFileFace; +#endif +} + +NameTable * Face::nameTable() const +{ + if (m_pNames) return m_pNames; + const Table name(*this, Tag::name); + if (name) + m_pNames = new NameTable(name, name.size()); + return m_pNames; +} + +uint16 Face::languageForLocale(const char * locale) const +{ + nameTable(); + if (m_pNames) + return m_pNames->getLanguageId(locale); + return 0; +} + + + +Face::Table::Table(const Face & face, const Tag n, uint32 version) throw() +: _f(&face), _sz(0), _compressed(false) +{ + _p = static_cast<const byte *>((*_f->m_ops.get_table)(_f->m_appFaceHandle, n, &_sz)); + + if (!TtfUtil::CheckTable(n, _p, _sz)) + { + release(); // Make sure we release the table buffer even if the table failed its checks + return; + } + + if (be::peek<uint32>(_p) >= version) + decompress(); +} + +void Face::Table::release() +{ + if (_compressed) + free(const_cast<byte *>(_p)); + else if (_p && _f->m_ops.release_table) + (*_f->m_ops.release_table)(_f->m_appFaceHandle, _p); + _p = 0; _sz = 0; +} + +Face::Table & Face::Table::operator = (const Table && rhs) throw() +{ + if (this == &rhs) return *this; + release(); + new (this) Table(std::move(rhs)); + return *this; +} + +Error Face::Table::decompress() +{ + Error e; + if (e.test(_sz < 5 * sizeof(uint32), E_BADSIZE)) + return e; + byte * uncompressed_table = 0; + size_t uncompressed_size = 0; + + const byte * p = _p; + const uint32 version = be::read<uint32>(p); // Table version number. + + // The scheme is in the top 5 bits of the 1st uint32. + const uint32 hdr = be::read<uint32>(p); + switch(compression(hdr >> 27)) + { + case NONE: return e; + + case LZ4: + { + uncompressed_size = hdr & 0x07ffffff; + uncompressed_table = gralloc<byte>(uncompressed_size); + if (!e.test(!uncompressed_table || uncompressed_size < 4, E_OUTOFMEM)) + { + memset(uncompressed_table, 0, 4); // make sure version number is initialised + // coverity[forward_null : FALSE] - uncompressed_table has been checked so can't be null + // coverity[checked_return : FALSE] - we test e later + e.test(lz4::decompress(p, _sz - 2*sizeof(uint32), uncompressed_table, uncompressed_size) != signed(uncompressed_size), E_SHRINKERFAILED); + } + break; + } + + default: + e.error(E_BADSCHEME); + }; + + // Check the uncompressed version number against the original. + if (!e) + // coverity[forward_null : FALSE] - uncompressed_table has already been tested so can't be null + // coverity[checked_return : FALSE] - we test e later + e.test(be::peek<uint32>(uncompressed_table) != version, E_SHRINKERFAILED); + + // Tell the provider to release the compressed form since were replacing + // it anyway. + release(); + + if (e) + { + free(uncompressed_table); + uncompressed_table = 0; + uncompressed_size = 0; + } + + _p = uncompressed_table; + _sz = uncompressed_size; + _compressed = true; + + return e; +} diff --git a/thirdparty/graphite/src/FeatureMap.cpp b/thirdparty/graphite/src/FeatureMap.cpp new file mode 100644 index 0000000000..014a88fd08 --- /dev/null +++ b/thirdparty/graphite/src/FeatureMap.cpp @@ -0,0 +1,293 @@ +/* GRAPHITE2 LICENSING + + Copyright 2010, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ +#include <cstring> + +#include "inc/Main.h" +#include "inc/bits.h" +#include "inc/Endian.h" +#include "inc/FeatureMap.h" +#include "inc/FeatureVal.h" +#include "graphite2/Font.h" +#include "inc/TtfUtil.h" +#include <cstdlib> +#include "inc/Face.h" + + +using namespace graphite2; + +namespace +{ + static int cmpNameAndFeatures(const void *ap, const void *bp) + { + const NameAndFeatureRef & a = *static_cast<const NameAndFeatureRef *>(ap), + & b = *static_cast<const NameAndFeatureRef *>(bp); + return (a < b ? -1 : (b < a ? 1 : 0)); + } + + const size_t FEAT_HEADER = sizeof(uint32) + 2*sizeof(uint16) + sizeof(uint32), + FEATURE_SIZE = sizeof(uint32) + + 2*sizeof(uint16) + + sizeof(uint32) + + 2*sizeof(uint16), + FEATURE_SETTING_SIZE = sizeof(int16) + sizeof(uint16); + + uint16 readFeatureSettings(const byte * p, FeatureSetting * s, size_t num_settings) + { + uint16 max_val = 0; + for (FeatureSetting * const end = s + num_settings; s != end; ++s) + { + const int16 value = be::read<int16>(p); + ::new (s) FeatureSetting(value, be::read<uint16>(p)); + if (uint16(value) > max_val) max_val = value; + } + + return max_val; + } +} + +FeatureRef::FeatureRef(const Face & face, + unsigned short & bits_offset, uint32 max_val, + uint32 name, uint16 uiName, flags_t flags, + FeatureSetting *settings, uint16 num_set) throw() +: m_face(&face), + m_nameValues(settings), + m_mask(mask_over_val(max_val)), + m_max(max_val), + m_id(name), + m_nameid(uiName), + m_numSet(num_set), + m_flags(flags) +{ + const uint8 need_bits = bit_set_count(m_mask); + m_index = (bits_offset + need_bits) / SIZEOF_CHUNK; + if (m_index > bits_offset / SIZEOF_CHUNK) + bits_offset = m_index*SIZEOF_CHUNK; + m_bits = bits_offset % SIZEOF_CHUNK; + bits_offset += need_bits; + m_mask <<= m_bits; +} + +FeatureRef::~FeatureRef() throw() +{ + free(m_nameValues); +} + +bool FeatureMap::readFeats(const Face & face) +{ + const Face::Table feat(face, TtfUtil::Tag::Feat); + const byte * p = feat; + if (!p) return true; + if (feat.size() < FEAT_HEADER) return false; + + const byte *const feat_start = p, + *const feat_end = p + feat.size(); + + const uint32 version = be::read<uint32>(p); + m_numFeats = be::read<uint16>(p); + be::skip<uint16>(p); + be::skip<uint32>(p); + + // Sanity checks + if (m_numFeats == 0) return true; + if (version < 0x00010000 || + p + m_numFeats*FEATURE_SIZE > feat_end) + { //defensive + m_numFeats = 0; + return false; + } + + m_feats = new FeatureRef [m_numFeats]; + uint16 * const defVals = gralloc<uint16>(m_numFeats); + if (!defVals || !m_feats) return false; + unsigned short bits = 0; //to cause overflow on first Feature + + for (int i = 0, ie = m_numFeats; i != ie; i++) + { + const uint32 label = version < 0x00020000 ? be::read<uint16>(p) : be::read<uint32>(p); + const uint16 num_settings = be::read<uint16>(p); + if (version >= 0x00020000) + be::skip<uint16>(p); + const uint32 settings_offset = be::read<uint32>(p); + const uint16 flags = be::read<uint16>(p), + uiName = be::read<uint16>(p); + + if (settings_offset > size_t(feat_end - feat_start) + || settings_offset + num_settings * FEATURE_SETTING_SIZE > size_t(feat_end - feat_start)) + { + free(defVals); + return false; + } + + FeatureSetting *uiSet; + uint32 maxVal; + if (num_settings != 0) + { + uiSet = gralloc<FeatureSetting>(num_settings); + if (!uiSet) + { + free(defVals); + return false; + } + maxVal = readFeatureSettings(feat_start + settings_offset, uiSet, num_settings); + defVals[i] = uiSet[0].value(); + } + else + { + uiSet = 0; + maxVal = 0xffffffff; + defVals[i] = 0; + } + + ::new (m_feats + i) FeatureRef (face, bits, maxVal, + label, uiName, + FeatureRef::flags_t(flags), + uiSet, num_settings); + } + new (&m_defaultFeatures) Features(bits/(sizeof(uint32)*8) + 1, *this); + m_pNamedFeats = new NameAndFeatureRef[m_numFeats]; + if (!m_pNamedFeats) + { + free(defVals); + return false; + } + for (int i = 0; i < m_numFeats; ++i) + { + m_feats[i].applyValToFeature(defVals[i], m_defaultFeatures); + m_pNamedFeats[i] = m_feats[i]; + } + + free(defVals); + + qsort(m_pNamedFeats, m_numFeats, sizeof(NameAndFeatureRef), &cmpNameAndFeatures); + + return true; +} + +bool SillMap::readFace(const Face & face) +{ + if (!m_FeatureMap.readFeats(face)) return false; + if (!readSill(face)) return false; + return true; +} + + +bool SillMap::readSill(const Face & face) +{ + const Face::Table sill(face, TtfUtil::Tag::Sill); + const byte *p = sill; + + if (!p) return true; + if (sill.size() < 12) return false; + if (be::read<uint32>(p) != 0x00010000UL) return false; + m_numLanguages = be::read<uint16>(p); + m_langFeats = new LangFeaturePair[m_numLanguages]; + if (!m_langFeats || !m_FeatureMap.m_numFeats) { m_numLanguages = 0; return true; } //defensive + + p += 6; // skip the fast search + if (sill.size() < m_numLanguages * 8U + 12) return false; + + for (int i = 0; i < m_numLanguages; i++) + { + uint32 langid = be::read<uint32>(p); + uint16 numSettings = be::read<uint16>(p); + uint16 offset = be::read<uint16>(p); + if (offset + 8U * numSettings > sill.size() && numSettings > 0) return false; + Features* feats = new Features(m_FeatureMap.m_defaultFeatures); + if (!feats) return false; + const byte *pLSet = sill + offset; + + // Apply langauge specific settings + for (int j = 0; j < numSettings; j++) + { + uint32 name = be::read<uint32>(pLSet); + uint16 val = be::read<uint16>(pLSet); + pLSet += 2; + const FeatureRef* pRef = m_FeatureMap.findFeatureRef(name); + if (pRef) pRef->applyValToFeature(val, *feats); + } + // Add the language id feature which is always feature id 1 + const FeatureRef* pRef = m_FeatureMap.findFeatureRef(1); + if (pRef) pRef->applyValToFeature(langid, *feats); + + m_langFeats[i].m_lang = langid; + m_langFeats[i].m_pFeatures = feats; + } + return true; +} + + +Features* SillMap::cloneFeatures(uint32 langname/*0 means default*/) const +{ + if (langname) + { + // the number of languages in a font is usually small e.g. 8 in Doulos + // so this loop is not very expensive + for (uint16 i = 0; i < m_numLanguages; i++) + { + if (m_langFeats[i].m_lang == langname) + return new Features(*m_langFeats[i].m_pFeatures); + } + } + return new Features (m_FeatureMap.m_defaultFeatures); +} + + + +const FeatureRef *FeatureMap::findFeatureRef(uint32 name) const +{ + NameAndFeatureRef *it; + + for (it = m_pNamedFeats; it < m_pNamedFeats + m_numFeats; ++it) + if (it->m_name == name) + return it->m_pFRef; + return NULL; +} + +bool FeatureRef::applyValToFeature(uint32 val, Features & pDest) const +{ + if (val>maxVal() || !m_face) + return false; + if (pDest.m_pMap==NULL) + pDest.m_pMap = &m_face->theSill().theFeatureMap(); + else + if (pDest.m_pMap!=&m_face->theSill().theFeatureMap()) + return false; //incompatible + if (m_index >= pDest.size()) + pDest.resize(m_index+1); + pDest[m_index] &= ~m_mask; + pDest[m_index] |= (uint32(val) << m_bits); + return true; +} + +uint32 FeatureRef::getFeatureVal(const Features& feats) const +{ + if (m_index < feats.size() && m_face + && &m_face->theSill().theFeatureMap()==feats.m_pMap) + return (feats[m_index] & m_mask) >> m_bits; + else + return 0; +} diff --git a/thirdparty/graphite/src/FileFace.cpp b/thirdparty/graphite/src/FileFace.cpp new file mode 100644 index 0000000000..7e663876a7 --- /dev/null +++ b/thirdparty/graphite/src/FileFace.cpp @@ -0,0 +1,115 @@ +/* GRAPHITE2 LICENSING + + Copyright 2012, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ +#include <cstring> +#include "inc/FileFace.h" + + +#ifndef GRAPHITE2_NFILEFACE + +using namespace graphite2; + +FileFace::FileFace(const char *filename) +: _file(fopen(filename, "rb")), + _file_len(0), + _header_tbl(NULL), + _table_dir(NULL) +{ + if (!_file) return; + + if (fseek(_file, 0, SEEK_END)) return; + _file_len = ftell(_file); + if (fseek(_file, 0, SEEK_SET)) return; + + size_t tbl_offset, tbl_len; + + // Get the header. + if (!TtfUtil::GetHeaderInfo(tbl_offset, tbl_len)) return; + if (fseek(_file, long(tbl_offset), SEEK_SET)) return; + _header_tbl = (TtfUtil::Sfnt::OffsetSubTable*)gralloc<char>(tbl_len); + if (_header_tbl) + { + if (fread(_header_tbl, 1, tbl_len, _file) != tbl_len) return; + if (!TtfUtil::CheckHeader(_header_tbl)) return; + } + + // Get the table directory + if (!TtfUtil::GetTableDirInfo(_header_tbl, tbl_offset, tbl_len)) return; + _table_dir = (TtfUtil::Sfnt::OffsetSubTable::Entry*)gralloc<char>(tbl_len); + if (fseek(_file, long(tbl_offset), SEEK_SET)) return; + if (_table_dir && fread(_table_dir, 1, tbl_len, _file) != tbl_len) + { + free(_table_dir); + _table_dir = NULL; + } + return; +} + +FileFace::~FileFace() +{ + free(_table_dir); + free(_header_tbl); + if (_file) + fclose(_file); +} + + +const void *FileFace::get_table_fn(const void* appFaceHandle, unsigned int name, size_t *len) +{ + if (appFaceHandle == 0) return 0; + const FileFace & file_face = *static_cast<const FileFace *>(appFaceHandle); + + void *tbl; + size_t tbl_offset, tbl_len; + if (!TtfUtil::GetTableInfo(name, file_face._header_tbl, file_face._table_dir, tbl_offset, tbl_len)) + return 0; + + if (tbl_offset > file_face._file_len || tbl_len > file_face._file_len - tbl_offset + || fseek(file_face._file, long(tbl_offset), SEEK_SET) != 0) + return 0; + + tbl = malloc(tbl_len); + if (!tbl || fread(tbl, 1, tbl_len, file_face._file) != tbl_len) + { + free(tbl); + return 0; + } + + if (len) *len = tbl_len; + return tbl; +} + +void FileFace::rel_table_fn(const void* appFaceHandle, const void *table_buffer) +{ + if (appFaceHandle == 0) return; + + free(const_cast<void *>(table_buffer)); +} + +const gr_face_ops FileFace::ops = { sizeof FileFace::ops, &FileFace::get_table_fn, &FileFace::rel_table_fn }; + + +#endif //!GRAPHITE2_NFILEFACE diff --git a/thirdparty/graphite/src/Font.cpp b/thirdparty/graphite/src/Font.cpp new file mode 100644 index 0000000000..faf3715f9d --- /dev/null +++ b/thirdparty/graphite/src/Font.cpp @@ -0,0 +1,58 @@ +/* GRAPHITE2 LICENSING + + Copyright 2010, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ +#include "inc/Face.h" +#include "inc/Font.h" +#include "inc/GlyphCache.h" + +using namespace graphite2; + +Font::Font(float ppm, const Face & f, const void * appFontHandle, const gr_font_ops * ops) +: m_appFontHandle(appFontHandle ? appFontHandle : this), + m_face(f), + m_scale(ppm / f.glyphs().unitsPerEm()), + m_hinted(appFontHandle && ops && (ops->glyph_advance_x || ops->glyph_advance_y)) +{ + memset(&m_ops, 0, sizeof m_ops); + if (m_hinted && ops) + memcpy(&m_ops, ops, min(sizeof m_ops, ops->size)); + else + m_ops.glyph_advance_x = &Face::default_glyph_advance; + + size_t nGlyphs = f.glyphs().numGlyphs(); + m_advances = gralloc<float>(nGlyphs); + if (m_advances) + { + for (float *advp = m_advances; nGlyphs; --nGlyphs, ++advp) + *advp = INVALID_ADVANCE; + } +} + + +/*virtual*/ Font::~Font() +{ + free(m_advances); +} diff --git a/thirdparty/graphite/src/GlyphCache.cpp b/thirdparty/graphite/src/GlyphCache.cpp new file mode 100644 index 0000000000..282bdc18fd --- /dev/null +++ b/thirdparty/graphite/src/GlyphCache.cpp @@ -0,0 +1,492 @@ +/* GRAPHITE2 LICENSING + + Copyright 2012, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ +#include "graphite2/Font.h" + +#include "inc/Main.h" +#include "inc/Face.h" //for the tags +#include "inc/GlyphCache.h" +#include "inc/GlyphFace.h" +#include "inc/Endian.h" +#include "inc/bits.h" + +using namespace graphite2; + +namespace +{ + // Iterator over version 1 or 2 glat entries which consist of a series of + // +-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+ + // v1 |k|n|v1 |v2 |...|vN | or v2 | k | n |v1 |v2 |...|vN | + // +-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+ + // variable length structures. + + template<typename W> + class _glat_iterator : public std::iterator<std::input_iterator_tag, std::pair<sparse::key_type, sparse::mapped_type> > + { + unsigned short key() const { return uint16(be::peek<W>(_e) + _n); } + unsigned int run() const { return be::peek<W>(_e+sizeof(W)); } + void advance_entry() { _n = 0; _e = _v; be::skip<W>(_v,2); } + public: + _glat_iterator(const void * glat=0) : _e(reinterpret_cast<const byte *>(glat)), _v(_e+2*sizeof(W)), _n(0) {} + + _glat_iterator<W> & operator ++ () { + ++_n; be::skip<uint16>(_v); + if (_n == run()) advance_entry(); + return *this; + } + _glat_iterator<W> operator ++ (int) { _glat_iterator<W> tmp(*this); operator++(); return tmp; } + + // This is strictly a >= operator. A true == operator could be + // implemented that test for overlap but it would be more expensive a + // test. + bool operator == (const _glat_iterator<W> & rhs) { return _v >= rhs._e - 1; } + bool operator != (const _glat_iterator<W> & rhs) { return !operator==(rhs); } + + value_type operator * () const { + return value_type(key(), be::peek<uint16>(_v)); + } + + protected: + const byte * _e, * _v; + size_t _n; + }; + + typedef _glat_iterator<uint8> glat_iterator; + typedef _glat_iterator<uint16> glat2_iterator; +} + +const SlantBox SlantBox::empty = {0,0,0,0}; + + +class GlyphCache::Loader +{ +public: + Loader(const Face & face); //return result indicates success. Do not use if failed. + + operator bool () const throw(); + unsigned short int units_per_em() const throw(); + unsigned short int num_glyphs() const throw(); + unsigned short int num_attrs() const throw(); + bool has_boxes() const throw(); + + const GlyphFace * read_glyph(unsigned short gid, GlyphFace &, int *numsubs) const throw(); + GlyphBox * read_box(uint16 gid, GlyphBox *curr, const GlyphFace & face) const throw(); + + CLASS_NEW_DELETE; +private: + Face::Table _head, + _hhea, + _hmtx, + _glyf, + _loca, + m_pGlat, + m_pGloc; + + bool _long_fmt; + bool _has_boxes; + unsigned short _num_glyphs_graphics, //i.e. boundary box and advance + _num_glyphs_attributes, + _num_attrs; // number of glyph attributes per glyph +}; + + + +GlyphCache::GlyphCache(const Face & face, const uint32 face_options) +: _glyph_loader(new Loader(face)), + _glyphs(_glyph_loader && *_glyph_loader && _glyph_loader->num_glyphs() + ? grzeroalloc<const GlyphFace *>(_glyph_loader->num_glyphs()) : 0), + _boxes(_glyph_loader && _glyph_loader->has_boxes() && _glyph_loader->num_glyphs() + ? grzeroalloc<GlyphBox *>(_glyph_loader->num_glyphs()) : 0), + _num_glyphs(_glyphs ? _glyph_loader->num_glyphs() : 0), + _num_attrs(_glyphs ? _glyph_loader->num_attrs() : 0), + _upem(_glyphs ? _glyph_loader->units_per_em() : 0) +{ + if ((face_options & gr_face_preloadGlyphs) && _glyph_loader && _glyphs) + { + int numsubs = 0; + GlyphFace * const glyphs = new GlyphFace [_num_glyphs]; + if (!glyphs) + return; + + // The 0 glyph is definately required. + _glyphs[0] = _glyph_loader->read_glyph(0, glyphs[0], &numsubs); + + // glyphs[0] has the same address as the glyphs array just allocated, + // thus assigning the &glyphs[0] to _glyphs[0] means _glyphs[0] points + // to the entire array. + const GlyphFace * loaded = _glyphs[0]; + for (uint16 gid = 1; loaded && gid != _num_glyphs; ++gid) + _glyphs[gid] = loaded = _glyph_loader->read_glyph(gid, glyphs[gid], &numsubs); + + if (!loaded) + { + _glyphs[0] = 0; + delete [] glyphs; + } + else if (numsubs > 0 && _boxes) + { + GlyphBox * boxes = (GlyphBox *)gralloc<char>(_num_glyphs * sizeof(GlyphBox) + numsubs * 8 * sizeof(float)); + GlyphBox * currbox = boxes; + + for (uint16 gid = 0; currbox && gid != _num_glyphs; ++gid) + { + _boxes[gid] = currbox; + currbox = _glyph_loader->read_box(gid, currbox, *_glyphs[gid]); + } + if (!currbox) + { + free(boxes); + _boxes[0] = 0; + } + } + delete _glyph_loader; + _glyph_loader = 0; + // coverity[leaked_storage : FALSE] - calling read_glyph on index 0 saved + // glyphs as _glyphs[0]. Setting _glyph_loader to nullptr here flags that + // the dtor needs to call delete[] on _glyphs[0] to release what was allocated + // as glyphs + } + + if (_glyphs && glyph(0) == 0) + { + free(_glyphs); + _glyphs = 0; + if (_boxes) + { + free(_boxes); + _boxes = 0; + } + _num_glyphs = _num_attrs = _upem = 0; + } +} + + +GlyphCache::~GlyphCache() +{ + if (_glyphs) + { + if (_glyph_loader) + { + const GlyphFace * * g = _glyphs; + for(unsigned short n = _num_glyphs; n; --n, ++g) + delete *g; + } + else + delete [] _glyphs[0]; + free(_glyphs); + } + if (_boxes) + { + if (_glyph_loader) + { + GlyphBox * * g = _boxes; + for (uint16 n = _num_glyphs; n; --n, ++g) + free(*g); + } + else + free(_boxes[0]); + free(_boxes); + } + delete _glyph_loader; +} + +const GlyphFace *GlyphCache::glyph(unsigned short glyphid) const //result may be changed by subsequent call with a different glyphid +{ + if (glyphid >= numGlyphs()) + return _glyphs[0]; + const GlyphFace * & p = _glyphs[glyphid]; + if (p == 0 && _glyph_loader) + { + int numsubs = 0; + GlyphFace * g = new GlyphFace(); + if (g) p = _glyph_loader->read_glyph(glyphid, *g, &numsubs); + if (!p) + { + delete g; + return *_glyphs; + } + if (_boxes) + { + _boxes[glyphid] = (GlyphBox *)gralloc<char>(sizeof(GlyphBox) + 8 * numsubs * sizeof(float)); + if (!_glyph_loader->read_box(glyphid, _boxes[glyphid], *_glyphs[glyphid])) + { + free(_boxes[glyphid]); + _boxes[glyphid] = 0; + } + } + } + return p; +} + + + +GlyphCache::Loader::Loader(const Face & face) +: _head(face, Tag::head), + _hhea(face, Tag::hhea), + _hmtx(face, Tag::hmtx), + _glyf(face, Tag::glyf), + _loca(face, Tag::loca), + _long_fmt(false), + _has_boxes(false), + _num_glyphs_graphics(0), + _num_glyphs_attributes(0), + _num_attrs(0) +{ + if (!operator bool()) + return; + + const Face::Table maxp = Face::Table(face, Tag::maxp); + if (!maxp) { _head = Face::Table(); return; } + + _num_glyphs_graphics = static_cast<unsigned short>(TtfUtil::GlyphCount(maxp)); + // This will fail if the number of glyphs is wildly out of range. + if (_glyf && TtfUtil::LocaLookup(_num_glyphs_graphics-1, _loca, _loca.size(), _head) == size_t(-2)) + { + _head = Face::Table(); + return; + } + + if ((m_pGlat = Face::Table(face, Tag::Glat, 0x00030000)) == NULL + || (m_pGloc = Face::Table(face, Tag::Gloc)) == NULL + || m_pGloc.size() < 8) + { + _head = Face::Table(); + return; + } + const byte * p = m_pGloc; + int version = be::read<uint32>(p); + const uint16 flags = be::read<uint16>(p); + _num_attrs = be::read<uint16>(p); + // We can accurately calculate the number of attributed glyphs by + // subtracting the length of the attribids array (numAttribs long if present) + // and dividing by either 2 or 4 depending on shor or lonf format + _long_fmt = flags & 1; + ptrdiff_t tmpnumgattrs = (m_pGloc.size() + - (p - m_pGloc) + - sizeof(uint16)*(flags & 0x2 ? _num_attrs : 0)) + / (_long_fmt ? sizeof(uint32) : sizeof(uint16)) - 1; + + if (version >= 0x00020000 || tmpnumgattrs < 0 || tmpnumgattrs > 65535 + || _num_attrs == 0 || _num_attrs > 0x3000 // is this hard limit appropriate? + || _num_glyphs_graphics > tmpnumgattrs + || m_pGlat.size() < 4) + { + _head = Face::Table(); + return; + } + + _num_glyphs_attributes = static_cast<unsigned short>(tmpnumgattrs); + p = m_pGlat; + version = be::read<uint32>(p); + if (version >= 0x00040000 || (version >= 0x00030000 && m_pGlat.size() < 8)) // reject Glat tables that are too new + { + _head = Face::Table(); + return; + } + else if (version >= 0x00030000) + { + unsigned int glatflags = be::read<uint32>(p); + _has_boxes = glatflags & 1; + // delete this once the compiler is fixed + _has_boxes = true; + } +} + +inline +GlyphCache::Loader::operator bool () const throw() +{ + return _head && _hhea && _hmtx && !(bool(_glyf) != bool(_loca)); +} + +inline +unsigned short int GlyphCache::Loader::units_per_em() const throw() +{ + return _head ? TtfUtil::DesignUnits(_head) : 0; +} + +inline +unsigned short int GlyphCache::Loader::num_glyphs() const throw() +{ + return max(_num_glyphs_graphics, _num_glyphs_attributes); +} + +inline +unsigned short int GlyphCache::Loader::num_attrs() const throw() +{ + return _num_attrs; +} + +inline +bool GlyphCache::Loader::has_boxes () const throw() +{ + return _has_boxes; +} + +const GlyphFace * GlyphCache::Loader::read_glyph(unsigned short glyphid, GlyphFace & glyph, int *numsubs) const throw() +{ + Rect bbox; + Position advance; + + if (glyphid < _num_glyphs_graphics) + { + int nLsb; + unsigned int nAdvWid; + if (_glyf) + { + int xMin, yMin, xMax, yMax; + size_t locidx = TtfUtil::LocaLookup(glyphid, _loca, _loca.size(), _head); + void *pGlyph = TtfUtil::GlyfLookup(_glyf, locidx, _glyf.size()); + + if (pGlyph && TtfUtil::GlyfBox(pGlyph, xMin, yMin, xMax, yMax)) + { + if ((xMin > xMax) || (yMin > yMax)) + return 0; + bbox = Rect(Position(static_cast<float>(xMin), static_cast<float>(yMin)), + Position(static_cast<float>(xMax), static_cast<float>(yMax))); + } + } + if (TtfUtil::HorMetrics(glyphid, _hmtx, _hmtx.size(), _hhea, nLsb, nAdvWid)) + advance = Position(static_cast<float>(nAdvWid), 0); + } + + if (glyphid < _num_glyphs_attributes) + { + const byte * gloc = m_pGloc; + size_t glocs = 0, gloce = 0; + + be::skip<uint32>(gloc); + be::skip<uint16>(gloc,2); + if (_long_fmt) + { + if (8 + glyphid * sizeof(uint32) > m_pGloc.size()) + return 0; + be::skip<uint32>(gloc, glyphid); + glocs = be::read<uint32>(gloc); + gloce = be::peek<uint32>(gloc); + } + else + { + if (8 + glyphid * sizeof(uint16) > m_pGloc.size()) + return 0; + be::skip<uint16>(gloc, glyphid); + glocs = be::read<uint16>(gloc); + gloce = be::peek<uint16>(gloc); + } + + if (glocs >= m_pGlat.size() - 1 || gloce > m_pGlat.size()) + return 0; + + const uint32 glat_version = be::peek<uint32>(m_pGlat); + if (glat_version >= 0x00030000) + { + if (glocs >= gloce) + return 0; + const byte * p = m_pGlat + glocs; + uint16 bmap = be::read<uint16>(p); + int num = bit_set_count((uint32)bmap); + if (numsubs) *numsubs += num; + glocs += 6 + 8 * num; + if (glocs > gloce) + return 0; + } + if (glat_version < 0x00020000) + { + if (gloce - glocs < 2*sizeof(byte)+sizeof(uint16) + || gloce - glocs > _num_attrs*(2*sizeof(byte)+sizeof(uint16))) + return 0; + new (&glyph) GlyphFace(bbox, advance, glat_iterator(m_pGlat + glocs), glat_iterator(m_pGlat + gloce)); + } + else + { + if (gloce - glocs < 3*sizeof(uint16) // can a glyph have no attributes? why not? + || gloce - glocs > _num_attrs*3*sizeof(uint16) + || glocs > m_pGlat.size() - 2*sizeof(uint16)) + return 0; + new (&glyph) GlyphFace(bbox, advance, glat2_iterator(m_pGlat + glocs), glat2_iterator(m_pGlat + gloce)); + } + if (!glyph.attrs() || glyph.attrs().capacity() > _num_attrs) + return 0; + } + return &glyph; +} + +inline float scale_to(uint8 t, float zmin, float zmax) +{ + return (zmin + t * (zmax - zmin) / 255); +} + +Rect readbox(Rect &b, uint8 zxmin, uint8 zymin, uint8 zxmax, uint8 zymax) +{ + return Rect(Position(scale_to(zxmin, b.bl.x, b.tr.x), scale_to(zymin, b.bl.y, b.tr.y)), + Position(scale_to(zxmax, b.bl.x, b.tr.x), scale_to(zymax, b.bl.y, b.tr.y))); +} + +GlyphBox * GlyphCache::Loader::read_box(uint16 gid, GlyphBox *curr, const GlyphFace & glyph) const throw() +{ + if (gid >= _num_glyphs_attributes) return 0; + + const byte * gloc = m_pGloc; + size_t glocs = 0, gloce = 0; + + be::skip<uint32>(gloc); + be::skip<uint16>(gloc,2); + if (_long_fmt) + { + be::skip<uint32>(gloc, gid); + glocs = be::read<uint32>(gloc); + gloce = be::peek<uint32>(gloc); + } + else + { + be::skip<uint16>(gloc, gid); + glocs = be::read<uint16>(gloc); + gloce = be::peek<uint16>(gloc); + } + + if (gloce > m_pGlat.size() || glocs + 6 >= gloce) + return 0; + + const byte * p = m_pGlat + glocs; + uint16 bmap = be::read<uint16>(p); + int num = bit_set_count((uint32)bmap); + + Rect bbox = glyph.theBBox(); + Rect diamax(Position(bbox.bl.x + bbox.bl.y, bbox.bl.x - bbox.tr.y), + Position(bbox.tr.x + bbox.tr.y, bbox.tr.x - bbox.bl.y)); + Rect diabound = readbox(diamax, p[0], p[2], p[1], p[3]); + ::new (curr) GlyphBox(num, bmap, &diabound); + be::skip<uint8>(p, 4); + if (glocs + 6 + num * 8 >= gloce) + return 0; + + for (int i = 0; i < num * 2; ++i) + { + Rect box = readbox((i & 1) ? diamax : bbox, p[0], p[2], p[1], p[3]); + curr->addSubBox(i >> 1, i & 1, &box); + be::skip<uint8>(p, 4); + } + return (GlyphBox *)((char *)(curr) + sizeof(GlyphBox) + 2 * num * sizeof(Rect)); +} diff --git a/thirdparty/graphite/src/GlyphFace.cpp b/thirdparty/graphite/src/GlyphFace.cpp new file mode 100644 index 0000000000..bc5e63a9f0 --- /dev/null +++ b/thirdparty/graphite/src/GlyphFace.cpp @@ -0,0 +1,48 @@ +/* GRAPHITE2 LICENSING + + Copyright 2010, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ +#include "inc/GlyphFace.h" + + +using namespace graphite2; + +int32 GlyphFace::getMetric(uint8 metric) const +{ + switch (metrics(metric)) + { + case kgmetLsb : return int32(m_bbox.bl.x); + case kgmetRsb : return int32(m_advance.x - m_bbox.tr.x); + case kgmetBbTop : return int32(m_bbox.tr.y); + case kgmetBbBottom : return int32(m_bbox.bl.y); + case kgmetBbLeft : return int32(m_bbox.bl.x); + case kgmetBbRight : return int32(m_bbox.tr.x); + case kgmetBbHeight : return int32(m_bbox.tr.y - m_bbox.bl.y); + case kgmetBbWidth : return int32(m_bbox.tr.x - m_bbox.bl.x); + case kgmetAdvWidth : return int32(m_advance.x); + case kgmetAdvHeight : return int32(m_advance.y); + default : return 0; + } +} diff --git a/thirdparty/graphite/src/Intervals.cpp b/thirdparty/graphite/src/Intervals.cpp new file mode 100644 index 0000000000..0fe99a127a --- /dev/null +++ b/thirdparty/graphite/src/Intervals.cpp @@ -0,0 +1,298 @@ +/* GRAPHITE2 LICENSING + + Copyright 2010, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ +#include <algorithm> +#include <cmath> +#include <limits> + +#include "inc/Intervals.h" +#include "inc/Segment.h" +#include "inc/Slot.h" +#include "inc/debug.h" +#include "inc/bits.h" + +using namespace graphite2; + +#include <cmath> + +inline +Zones::Exclusion Zones::Exclusion::split_at(float p) { + Exclusion r(*this); + r.xm = x = p; + return r; +} + +inline +void Zones::Exclusion::left_trim(float p) { + x = p; +} + +inline +Zones::Exclusion & Zones::Exclusion::operator += (Exclusion const & rhs) { + c += rhs.c; sm += rhs.sm; smx += rhs.smx; open = false; + return *this; +} + +inline +uint8 Zones::Exclusion::outcode(float val) const { + float p = val; + //float d = std::numeric_limits<float>::epsilon(); + float d = 0.; + return ((p - xm >= d) << 1) | (x - p > d); +} + +void Zones::exclude_with_margins(float xmin, float xmax, int axis) { + remove(xmin, xmax); + weightedAxis(axis, xmin-_margin_len, xmin, 0, 0, _margin_weight, xmin-_margin_len, 0, 0, false); + weightedAxis(axis, xmax, xmax+_margin_len, 0, 0, _margin_weight, xmax+_margin_len, 0, 0, false); +} + +namespace +{ + +inline +bool separated(float a, float b) { + return a != b; + //int exp; + //float res = frexpf(fabs(a - b), &exp); + //return (*(unsigned int *)(&res) > 4); + //return std::fabs(a-b) > std::numeric_limits<float>::epsilon(); // std::epsilon may not work. but 0.5 fails exising 64 bit tests + //return std::fabs(a-b) > 0.5f; +} + +} + +void Zones::insert(Exclusion e) +{ +#if !defined GRAPHITE2_NTRACING + addDebug(&e); +#endif + e.x = max(e.x, _pos); + e.xm = min(e.xm, _posm); + if (e.x >= e.xm) return; + + for (iterator i = _exclusions.begin(), ie = _exclusions.end(); i != ie && e.x < e.xm; ++i) + { + const uint8 oca = e.outcode(i->x), + ocb = e.outcode(i->xm); + if ((oca & ocb) != 0) continue; + + switch (oca ^ ocb) // What kind of overlap? + { + case 0: // e completely covers i + // split e at i.x into e1,e2 + // split e2 at i.mx into e2,e3 + // drop e1 ,i+e2, e=e3 + *i += e; + e.left_trim(i->xm); + break; + case 1: // e overlaps on the rhs of i + // split i at e->x into i1,i2 + // split e at i.mx into e1,e2 + // trim i1, insert i2+e1, e=e2 + if (!separated(i->xm, e.x)) break; + if (separated(i->x,e.x)) { i = _exclusions.insert(i,i->split_at(e.x)); ++i; } + *i += e; + e.left_trim(i->xm); + break; + case 2: // e overlaps on the lhs of i + // split e at i->x into e1,e2 + // split i at e.mx into i1,i2 + // drop e1, insert e2+i1, trim i2 + if (!separated(e.xm, i->x)) return; + if (separated(e.xm, i->xm)) i = _exclusions.insert(i,i->split_at(e.xm)); + *i += e; + return; + case 3: // i completely covers e + // split i at e.x into i1,i2 + // split i2 at e.mx into i2,i3 + // insert i1, insert e+i2 + if (separated(e.xm, i->xm)) i = _exclusions.insert(i,i->split_at(e.xm)); + i = _exclusions.insert(i, i->split_at(e.x)); + *++i += e; + return; + } + + ie = _exclusions.end(); + } +} + + +void Zones::remove(float x, float xm) +{ +#if !defined GRAPHITE2_NTRACING + removeDebug(x, xm); +#endif + x = max(x, _pos); + xm = min(xm, _posm); + if (x >= xm) return; + + for (iterator i = _exclusions.begin(), ie = _exclusions.end(); i != ie; ++i) + { + const uint8 oca = i->outcode(x), + ocb = i->outcode(xm); + if ((oca & ocb) != 0) continue; + + switch (oca ^ ocb) // What kind of overlap? + { + case 0: // i completely covers e + if (separated(i->x, x)) { i = _exclusions.insert(i,i->split_at(x)); ++i; } + GR_FALLTHROUGH; + // no break + case 1: // i overlaps on the rhs of e + i->left_trim(xm); + return; + case 2: // i overlaps on the lhs of e + i->xm = x; + if (separated(i->x, i->xm)) break; + GR_FALLTHROUGH; + // no break + case 3: // e completely covers i + i = _exclusions.erase(i); + --i; + break; + } + + ie = _exclusions.end(); + } +} + + +Zones::const_iterator Zones::find_exclusion_under(float x) const +{ + size_t l = 0, h = _exclusions.size(); + + while (l < h) + { + size_t const p = (l+h) >> 1; + switch (_exclusions[p].outcode(x)) + { + case 0 : return _exclusions.begin()+p; + case 1 : h = p; break; + case 2 : + case 3 : l = p+1; break; + } + } + + return _exclusions.begin()+l; +} + + +float Zones::closest(float origin, float & cost) const +{ + float best_c = std::numeric_limits<float>::max(), + best_x = 0; + + const const_iterator start = find_exclusion_under(origin); + + // Forward scan looking for lowest cost + for (const_iterator i = start, ie = _exclusions.end(); i != ie; ++i) + if (i->track_cost(best_c, best_x, origin)) break; + + // Backward scan looking for lowest cost + // We start from the exclusion to the immediate left of start since we've + // already tested start with the right most scan above. + for (const_iterator i = start-1, ie = _exclusions.begin()-1; i != ie; --i) + if (i->track_cost(best_c, best_x, origin)) break; + + cost = (best_c == std::numeric_limits<float>::max() ? -1 : best_c); + return best_x; +} + + +// Cost and test position functions + +bool Zones::Exclusion::track_cost(float & best_cost, float & best_pos, float origin) const { + const float p = test_position(origin), + localc = cost(p - origin); + if (open && localc > best_cost) return true; + + if (localc < best_cost) + { + best_cost = localc; + best_pos = p; + } + return false; +} + +inline +float Zones::Exclusion::cost(float p) const { + return (sm * p - 2 * smx) * p + c; +} + + +float Zones::Exclusion::test_position(float origin) const { + if (sm < 0) + { + // sigh, test both ends and perhaps the middle too! + float res = x; + float cl = cost(x); + if (x < origin && xm > origin) + { + float co = cost(origin); + if (co < cl) + { + cl = co; + res = origin; + } + } + float cr = cost(xm); + return cl > cr ? xm : res; + } + else + { + float zerox = smx / sm + origin; + if (zerox < x) return x; + else if (zerox > xm) return xm; + else return zerox; + } +} + + +#if !defined GRAPHITE2_NTRACING + +void Zones::jsonDbgOut(Segment *seg) const { + + if (_dbg) + { + for (Zones::idebugs s = dbgs_begin(), e = dbgs_end(); s != e; ++s) + { + *_dbg << json::flat << json::array + << objectid(dslot(seg, (Slot *)(s->_env[0]))) + << reinterpret_cast<ptrdiff_t>(s->_env[1]); + if (s->_isdel) + *_dbg << "remove" << Position(s->_excl.x, s->_excl.xm); + else + *_dbg << "exclude" << json::flat << json::array + << s->_excl.x << s->_excl.xm + << s->_excl.sm << s->_excl.smx << s->_excl.c + << json::close; + *_dbg << json::close; + } + } +} + +#endif diff --git a/thirdparty/graphite/src/Justifier.cpp b/thirdparty/graphite/src/Justifier.cpp new file mode 100644 index 0000000000..78c11e6a51 --- /dev/null +++ b/thirdparty/graphite/src/Justifier.cpp @@ -0,0 +1,282 @@ +/* GRAPHITE2 LICENSING + + Copyright 2012, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ + +#include "inc/Segment.h" +#include "graphite2/Font.h" +#include "inc/debug.h" +#include "inc/CharInfo.h" +#include "inc/Slot.h" +#include "inc/Main.h" +#include <cmath> + +using namespace graphite2; + +class JustifyTotal { +public: + JustifyTotal() : m_numGlyphs(0), m_tStretch(0), m_tShrink(0), m_tStep(0), m_tWeight(0) {} + void accumulate(Slot *s, Segment *seg, int level); + int weight() const { return m_tWeight; } + + CLASS_NEW_DELETE + +private: + int m_numGlyphs; + int m_tStretch; + int m_tShrink; + int m_tStep; + int m_tWeight; +}; + +void JustifyTotal::accumulate(Slot *s, Segment *seg, int level) +{ + ++m_numGlyphs; + m_tStretch += s->getJustify(seg, level, 0); + m_tShrink += s->getJustify(seg, level, 1); + m_tStep += s->getJustify(seg, level, 2); + m_tWeight += s->getJustify(seg, level, 3); +} + +float Segment::justify(Slot *pSlot, const Font *font, float width, GR_MAYBE_UNUSED justFlags jflags, Slot *pFirst, Slot *pLast) +{ + Slot *end = last(); + float currWidth = 0.0; + const float scale = font ? font->scale() : 1.0f; + Position res; + + if (width < 0 && !(silf()->flags())) + return width; + + if ((m_dir & 1) != m_silf->dir() && m_silf->bidiPass() != m_silf->numPasses()) + { + reverseSlots(); + std::swap(pFirst, pLast); + } + if (!pFirst) pFirst = pSlot; + while (!pFirst->isBase()) pFirst = pFirst->attachedTo(); + if (!pLast) pLast = last(); + while (!pLast->isBase()) pLast = pLast->attachedTo(); + const float base = pFirst->origin().x / scale; + width = width / scale; + if ((jflags & gr_justEndInline) == 0) + { + while (pLast != pFirst && pLast) + { + Rect bbox = theGlyphBBoxTemporary(pLast->glyph()); + if (bbox.bl.x != 0.f || bbox.bl.y != 0.f || bbox.tr.x != 0.f || bbox.tr.y == 0.f) + break; + pLast = pLast->prev(); + } + } + + if (pLast) + end = pLast->nextSibling(); + if (pFirst) + pFirst = pFirst->nextSibling(); + + int icount = 0; + int numLevels = silf()->numJustLevels(); + if (!numLevels) + { + for (Slot *s = pSlot; s && s != end; s = s->nextSibling()) + { + CharInfo *c = charinfo(s->before()); + if (isWhitespace(c->unicodeChar())) + { + s->setJustify(this, 0, 3, 1); + s->setJustify(this, 0, 2, 1); + s->setJustify(this, 0, 0, -1); + ++icount; + } + } + if (!icount) + { + for (Slot *s = pSlot; s && s != end; s = s->nextSibling()) + { + s->setJustify(this, 0, 3, 1); + s->setJustify(this, 0, 2, 1); + s->setJustify(this, 0, 0, -1); + } + } + ++numLevels; + } + + Vector<JustifyTotal> stats(numLevels); + for (Slot *s = pFirst; s && s != end; s = s->nextSibling()) + { + float w = s->origin().x / scale + s->advance() - base; + if (w > currWidth) currWidth = w; + for (int j = 0; j < numLevels; ++j) + stats[j].accumulate(s, this, j); + s->just(0); + } + + for (int i = (width < 0.0f) ? -1 : numLevels - 1; i >= 0; --i) + { + float diff; + float error = 0.; + float diffpw; + int tWeight = stats[i].weight(); + if (tWeight == 0) continue; + + do { + error = 0.; + diff = width - currWidth; + diffpw = diff / tWeight; + tWeight = 0; + for (Slot *s = pFirst; s && s != end; s = s->nextSibling()) // don't include final glyph + { + int w = s->getJustify(this, i, 3); + float pref = diffpw * w + error; + int step = s->getJustify(this, i, 2); + if (!step) step = 1; // handle lazy font developers + if (pref > 0) + { + float max = uint16(s->getJustify(this, i, 0)); + if (i == 0) max -= s->just(); + if (pref > max) pref = max; + else tWeight += w; + } + else + { + float max = uint16(s->getJustify(this, i, 1)); + if (i == 0) max += s->just(); + if (-pref > max) pref = -max; + else tWeight += w; + } + int actual = int(pref / step) * step; + + if (actual) + { + error += diffpw * w - actual; + if (i == 0) + s->just(s->just() + actual); + else + s->setJustify(this, i, 4, actual); + } + } + currWidth += diff - error; + } while (i == 0 && int(std::abs(error)) > 0 && tWeight); + } + + Slot *oldFirst = m_first; + Slot *oldLast = m_last; + if (silf()->flags() & 1) + { + m_first = pSlot = addLineEnd(pSlot); + m_last = pLast = addLineEnd(end); + if (!m_first || !m_last) return -1.0; + } + else + { + m_first = pSlot; + m_last = pLast; + } + + // run justification passes here +#if !defined GRAPHITE2_NTRACING + json * const dbgout = m_face->logger(); + if (dbgout) + *dbgout << json::object + << "justifies" << objectid(this) + << "passes" << json::array; +#endif + + if (m_silf->justificationPass() != m_silf->positionPass() && (width >= 0.f || (silf()->flags() & 1))) + m_silf->runGraphite(this, m_silf->justificationPass(), m_silf->positionPass()); + +#if !defined GRAPHITE2_NTRACING + if (dbgout) + { + *dbgout << json::item << json::close; // Close up the passes array + positionSlots(NULL, pSlot, pLast, m_dir); + Slot *lEnd = pLast->nextSibling(); + *dbgout << "output" << json::array; + for(Slot * t = pSlot; t != lEnd; t = t->next()) + *dbgout << dslot(this, t); + *dbgout << json::close << json::close; + } +#endif + + res = positionSlots(font, pSlot, pLast, m_dir); + + if (silf()->flags() & 1) + { + if (m_first) + delLineEnd(m_first); + if (m_last) + delLineEnd(m_last); + } + m_first = oldFirst; + m_last = oldLast; + + if ((m_dir & 1) != m_silf->dir() && m_silf->bidiPass() != m_silf->numPasses()) + reverseSlots(); + return res.x; +} + +Slot *Segment::addLineEnd(Slot *nSlot) +{ + Slot *eSlot = newSlot(); + if (!eSlot) return NULL; + const uint16 gid = silf()->endLineGlyphid(); + const GlyphFace * theGlyph = m_face->glyphs().glyphSafe(gid); + eSlot->setGlyph(this, gid, theGlyph); + if (nSlot) + { + eSlot->next(nSlot); + eSlot->prev(nSlot->prev()); + nSlot->prev(eSlot); + eSlot->before(nSlot->before()); + if (eSlot->prev()) + eSlot->after(eSlot->prev()->after()); + else + eSlot->after(nSlot->before()); + } + else + { + nSlot = m_last; + eSlot->prev(nSlot); + nSlot->next(eSlot); + eSlot->after(eSlot->prev()->after()); + eSlot->before(nSlot->after()); + } + return eSlot; +} + +void Segment::delLineEnd(Slot *s) +{ + Slot *nSlot = s->next(); + if (nSlot) + { + nSlot->prev(s->prev()); + if (s->prev()) + s->prev()->next(nSlot); + } + else + s->prev()->next(NULL); + freeSlot(s); +} diff --git a/thirdparty/graphite/src/NameTable.cpp b/thirdparty/graphite/src/NameTable.cpp new file mode 100644 index 0000000000..d42b7f95bd --- /dev/null +++ b/thirdparty/graphite/src/NameTable.cpp @@ -0,0 +1,254 @@ +/* GRAPHITE2 LICENSING + + Copyright 2010, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ +#include "inc/Main.h" +#include "inc/Endian.h" + +#include "inc/NameTable.h" +#include "inc/UtfCodec.h" + +using namespace graphite2; + +NameTable::NameTable(const void* data, size_t length, uint16 platformId, uint16 encodingID) + : m_platformId(0), m_encodingId(0), m_languageCount(0), + m_platformOffset(0), m_platformLastRecord(0), m_nameDataLength(0), + m_table(0), m_nameData(NULL) +{ + void *pdata = gralloc<byte>(length); + if (!pdata) return; + memcpy(pdata, data, length); + m_table = reinterpret_cast<const TtfUtil::Sfnt::FontNames*>(pdata); + + if ((length > sizeof(TtfUtil::Sfnt::FontNames)) && + (length > sizeof(TtfUtil::Sfnt::FontNames) + + sizeof(TtfUtil::Sfnt::NameRecord) * ( be::swap<uint16>(m_table->count) - 1))) + { + uint16 offset = be::swap<uint16>(m_table->string_offset); + if (offset < length) + { + m_nameData = reinterpret_cast<const uint8*>(pdata) + offset; + setPlatformEncoding(platformId, encodingID); + m_nameDataLength = uint16(length - offset); + return; + } + } + free(const_cast<TtfUtil::Sfnt::FontNames*>(m_table)); + m_table = NULL; +} + +uint16 NameTable::setPlatformEncoding(uint16 platformId, uint16 encodingID) +{ + if (!m_nameData) return 0; + uint16 i = 0; + uint16 count = be::swap<uint16>(m_table->count); + for (; i < count; i++) + { + if (be::swap<uint16>(m_table->name_record[i].platform_id) == platformId && + be::swap<uint16>(m_table->name_record[i].platform_specific_id) == encodingID) + { + m_platformOffset = i; + break; + } + } + while ((++i < count) && + (be::swap<uint16>(m_table->name_record[i].platform_id) == platformId) && + (be::swap<uint16>(m_table->name_record[i].platform_specific_id) == encodingID)) + { + m_platformLastRecord = i; + } + m_encodingId = encodingID; + m_platformId = platformId; + return 0; +} + +void* NameTable::getName(uint16& languageId, uint16 nameId, gr_encform enc, uint32& length) +{ + uint16 anyLang = 0; + uint16 enUSLang = 0; + uint16 bestLang = 0; + if (!m_table) + { + languageId = 0; + length = 0; + return NULL; + } + for (uint16 i = m_platformOffset; i <= m_platformLastRecord; i++) + { + if (be::swap<uint16>(m_table->name_record[i].name_id) == nameId) + { + uint16 langId = be::swap<uint16>(m_table->name_record[i].language_id); + if (langId == languageId) + { + bestLang = i; + break; + } + // MS language tags have the language in the lower byte, region in the higher + else if ((langId & 0xFF) == (languageId & 0xFF)) + { + bestLang = i; + } + else if (langId == 0x409) + { + enUSLang = i; + } + else + { + anyLang = i; + } + } + } + if (!bestLang) + { + if (enUSLang) bestLang = enUSLang; + else + { + bestLang = anyLang; + if (!anyLang) + { + languageId = 0; + length = 0; + return NULL; + } + } + } + const TtfUtil::Sfnt::NameRecord & nameRecord = m_table->name_record[bestLang]; + languageId = be::swap<uint16>(nameRecord.language_id); + uint16 utf16Length = be::swap<uint16>(nameRecord.length); + uint16 offset = be::swap<uint16>(nameRecord.offset); + if(offset + utf16Length > m_nameDataLength) + { + languageId = 0; + length = 0; + return NULL; + } + utf16Length >>= 1; // in utf16 units + utf16::codeunit_t * utf16Name = gralloc<utf16::codeunit_t>(utf16Length + 1); + if (!utf16Name) + { + languageId = 0; + length = 0; + return NULL; + } + const uint8* pName = m_nameData + offset; + for (size_t i = 0; i < utf16Length; i++) + { + utf16Name[i] = be::read<uint16>(pName); + } + utf16Name[utf16Length] = 0; + if (!utf16::validate(utf16Name, utf16Name + utf16Length)) + { + free(utf16Name); + languageId = 0; + length = 0; + return NULL; + } + switch (enc) + { + case gr_utf8: + { + utf8::codeunit_t* uniBuffer = gralloc<utf8::codeunit_t>(3 * utf16Length + 1); + if (!uniBuffer) + { + free(utf16Name); + languageId = 0; + length = 0; + return NULL; + } + utf8::iterator d = uniBuffer; + for (utf16::const_iterator s = utf16Name, e = utf16Name + utf16Length; s != e; ++s, ++d) + *d = *s; + length = uint32(d - uniBuffer); + uniBuffer[length] = 0; + free(utf16Name); + return uniBuffer; + } + case gr_utf16: + length = utf16Length; + return utf16Name; + case gr_utf32: + { + utf32::codeunit_t * uniBuffer = gralloc<utf32::codeunit_t>(utf16Length + 1); + if (!uniBuffer) + { + free(utf16Name); + languageId = 0; + length = 0; + return NULL; + } + utf32::iterator d = uniBuffer; + for (utf16::const_iterator s = utf16Name, e = utf16Name + utf16Length; s != e; ++s, ++d) + *d = *s; + length = uint32(d - uniBuffer); + uniBuffer[length] = 0; + free(utf16Name); + return uniBuffer; + } + } + free(utf16Name); + languageId = 0; + length = 0; + return NULL; +} + +uint16 NameTable::getLanguageId(const char * bcp47Locale) +{ + size_t localeLength = strlen(bcp47Locale); + uint16 localeId = m_locale2Lang.getMsId(bcp47Locale); + if (m_table && (be::swap<uint16>(m_table->format) == 1)) + { + const uint8 * pLangEntries = reinterpret_cast<const uint8*>(m_table) + + sizeof(TtfUtil::Sfnt::FontNames) + + sizeof(TtfUtil::Sfnt::NameRecord) * ( be::swap<uint16>(m_table->count) - 1); + uint16 numLangEntries = be::read<uint16>(pLangEntries); + const TtfUtil::Sfnt::LangTagRecord * langTag = + reinterpret_cast<const TtfUtil::Sfnt::LangTagRecord*>(pLangEntries); + if (pLangEntries + numLangEntries * sizeof(TtfUtil::Sfnt::LangTagRecord) <= m_nameData) + { + for (uint16 i = 0; i < numLangEntries; i++) + { + uint16 offset = be::swap<uint16>(langTag[i].offset); + uint16 length = be::swap<uint16>(langTag[i].length); + if ((offset + length <= m_nameDataLength) && (length == 2 * localeLength)) + { + const uint8* pName = m_nameData + offset; + bool match = true; + for (size_t j = 0; j < localeLength; j++) + { + uint16 code = be::read<uint16>(pName); + if ((code > 0x7F) || (code != bcp47Locale[j])) + { + match = false; + break; + } + } + if (match) + return 0x8000 + i; + } + } + } + } + return localeId; +} diff --git a/thirdparty/graphite/src/Pass.cpp b/thirdparty/graphite/src/Pass.cpp new file mode 100644 index 0000000000..db31c22d46 --- /dev/null +++ b/thirdparty/graphite/src/Pass.cpp @@ -0,0 +1,1107 @@ +/* GRAPHITE2 LICENSING + + Copyright 2010, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ +#include "inc/Main.h" +#include "inc/debug.h" +#include "inc/Endian.h" +#include "inc/Pass.h" +#include <cstring> +#include <cstdlib> +#include <cassert> +#include <cmath> +#include "inc/Segment.h" +#include "inc/Code.h" +#include "inc/Rule.h" +#include "inc/Error.h" +#include "inc/Collider.h" + +using namespace graphite2; +using vm::Machine; +typedef Machine::Code Code; + +enum KernCollison +{ + None = 0, + CrossSpace = 1, + InWord = 2, + reserved = 3 +}; + +Pass::Pass() +: m_silf(0), + m_cols(0), + m_rules(0), + m_ruleMap(0), + m_startStates(0), + m_transitions(0), + m_states(0), + m_codes(0), + m_progs(0), + m_numCollRuns(0), + m_kernColls(0), + m_iMaxLoop(0), + m_numGlyphs(0), + m_numRules(0), + m_numStates(0), + m_numTransition(0), + m_numSuccess(0), + m_successStart(0), + m_numColumns(0), + m_minPreCtxt(0), + m_maxPreCtxt(0), + m_colThreshold(0), + m_isReverseDir(false) +{ +} + +Pass::~Pass() +{ + free(m_cols); + free(m_startStates); + free(m_transitions); + free(m_states); + free(m_ruleMap); + + if (m_rules) delete [] m_rules; + if (m_codes) delete [] m_codes; + free(m_progs); +} + +bool Pass::readPass(const byte * const pass_start, size_t pass_length, size_t subtable_base, + GR_MAYBE_UNUSED Face & face, passtype pt, GR_MAYBE_UNUSED uint32 version, Error &e) +{ + const byte * p = pass_start, + * const pass_end = p + pass_length; + size_t numRanges; + + if (e.test(pass_length < 40, E_BADPASSLENGTH)) return face.error(e); + // Read in basic values + const byte flags = be::read<byte>(p); + if (e.test((flags & 0x1f) && + (pt < PASS_TYPE_POSITIONING || !m_silf->aCollision() || !face.glyphs().hasBoxes() || !(m_silf->flags() & 0x20)), + E_BADCOLLISIONPASS)) + return face.error(e); + m_numCollRuns = flags & 0x7; + m_kernColls = (flags >> 3) & 0x3; + m_isReverseDir = (flags >> 5) & 0x1; + m_iMaxLoop = be::read<byte>(p); + if (m_iMaxLoop < 1) m_iMaxLoop = 1; + be::skip<byte>(p,2); // skip maxContext & maxBackup + m_numRules = be::read<uint16>(p); + if (e.test(!m_numRules && m_numCollRuns == 0, E_BADEMPTYPASS)) return face.error(e); + be::skip<uint16>(p); // fsmOffset - not sure why we would want this + const byte * const pcCode = pass_start + be::read<uint32>(p) - subtable_base, + * const rcCode = pass_start + be::read<uint32>(p) - subtable_base, + * const aCode = pass_start + be::read<uint32>(p) - subtable_base; + be::skip<uint32>(p); + m_numStates = be::read<uint16>(p); + m_numTransition = be::read<uint16>(p); + m_numSuccess = be::read<uint16>(p); + m_numColumns = be::read<uint16>(p); + numRanges = be::read<uint16>(p); + be::skip<uint16>(p, 3); // skip searchRange, entrySelector & rangeShift. + assert(p - pass_start == 40); + // Perform some sanity checks. + if ( e.test(m_numTransition > m_numStates, E_BADNUMTRANS) + || e.test(m_numSuccess > m_numStates, E_BADNUMSUCCESS) + || e.test(m_numSuccess + m_numTransition < m_numStates, E_BADNUMSTATES) + || e.test(m_numRules && numRanges == 0, E_NORANGES) + || e.test(m_numColumns > 0x7FFF, E_BADNUMCOLUMNS)) + return face.error(e); + + m_successStart = m_numStates - m_numSuccess; + // test for beyond end - 1 to account for reading uint16 + if (e.test(p + numRanges * 6 - 2 > pass_end, E_BADPASSLENGTH)) return face.error(e); + m_numGlyphs = be::peek<uint16>(p + numRanges * 6 - 4) + 1; + // Calculate the start of various arrays. + const byte * const ranges = p; + be::skip<uint16>(p, numRanges*3); + const byte * const o_rule_map = p; + be::skip<uint16>(p, m_numSuccess + 1); + + // More sanity checks + if (e.test(reinterpret_cast<const byte *>(o_rule_map + m_numSuccess*sizeof(uint16)) > pass_end + || p > pass_end, E_BADRULEMAPLEN)) + return face.error(e); + const size_t numEntries = be::peek<uint16>(o_rule_map + m_numSuccess*sizeof(uint16)); + const byte * const rule_map = p; + be::skip<uint16>(p, numEntries); + + if (e.test(p + 2*sizeof(uint8) > pass_end, E_BADPASSLENGTH)) return face.error(e); + m_minPreCtxt = be::read<uint8>(p); + m_maxPreCtxt = be::read<uint8>(p); + if (e.test(m_minPreCtxt > m_maxPreCtxt, E_BADCTXTLENBOUNDS)) return face.error(e); + const byte * const start_states = p; + be::skip<int16>(p, m_maxPreCtxt - m_minPreCtxt + 1); + const uint16 * const sort_keys = reinterpret_cast<const uint16 *>(p); + be::skip<uint16>(p, m_numRules); + const byte * const precontext = p; + be::skip<byte>(p, m_numRules); + + if (e.test(p + sizeof(uint16) + sizeof(uint8) > pass_end, E_BADCTXTLENS)) return face.error(e); + m_colThreshold = be::read<uint8>(p); + if (m_colThreshold == 0) m_colThreshold = 10; // A default + const size_t pass_constraint_len = be::read<uint16>(p); + + const uint16 * const o_constraint = reinterpret_cast<const uint16 *>(p); + be::skip<uint16>(p, m_numRules + 1); + const uint16 * const o_actions = reinterpret_cast<const uint16 *>(p); + be::skip<uint16>(p, m_numRules + 1); + const byte * const states = p; + if (e.test(2u*m_numTransition*m_numColumns >= (unsigned)(pass_end - p), E_BADPASSLENGTH) + || e.test(p >= pass_end, E_BADPASSLENGTH)) + return face.error(e); + be::skip<int16>(p, m_numTransition*m_numColumns); + be::skip<uint8>(p); + if (e.test(p != pcCode, E_BADPASSCCODEPTR)) return face.error(e); + be::skip<byte>(p, pass_constraint_len); + if (e.test(p != rcCode, E_BADRULECCODEPTR) + || e.test(size_t(rcCode - pcCode) != pass_constraint_len, E_BADCCODELEN)) return face.error(e); + be::skip<byte>(p, be::peek<uint16>(o_constraint + m_numRules)); + if (e.test(p != aCode, E_BADACTIONCODEPTR)) return face.error(e); + be::skip<byte>(p, be::peek<uint16>(o_actions + m_numRules)); + + // We should be at the end or within the pass + if (e.test(p > pass_end, E_BADPASSLENGTH)) return face.error(e); + + // Load the pass constraint if there is one. + if (pass_constraint_len) + { + face.error_context(face.error_context() + 1); + m_cPConstraint = vm::Machine::Code(true, pcCode, pcCode + pass_constraint_len, + precontext[0], be::peek<uint16>(sort_keys), *m_silf, face, PASS_TYPE_UNKNOWN); + if (e.test(!m_cPConstraint, E_OUTOFMEM) + || e.test(m_cPConstraint.status() != Code::loaded, m_cPConstraint.status() + E_CODEFAILURE)) + return face.error(e); + face.error_context(face.error_context() - 1); + } + if (m_numRules) + { + if (!readRanges(ranges, numRanges, e)) return face.error(e); + if (!readRules(rule_map, numEntries, precontext, sort_keys, + o_constraint, rcCode, o_actions, aCode, face, pt, e)) return false; + } +#ifdef GRAPHITE2_TELEMETRY + telemetry::category _states_cat(face.tele.states); +#endif + return m_numRules ? readStates(start_states, states, o_rule_map, face, e) : true; +} + + +bool Pass::readRules(const byte * rule_map, const size_t num_entries, + const byte *precontext, const uint16 * sort_key, + const uint16 * o_constraint, const byte *rc_data, + const uint16 * o_action, const byte * ac_data, + Face & face, passtype pt, Error &e) +{ + const byte * const ac_data_end = ac_data + be::peek<uint16>(o_action + m_numRules); + const byte * const rc_data_end = rc_data + be::peek<uint16>(o_constraint + m_numRules); + + precontext += m_numRules; + sort_key += m_numRules; + o_constraint += m_numRules; + o_action += m_numRules; + + // Load rules. + const byte * ac_begin = 0, * rc_begin = 0, + * ac_end = ac_data + be::peek<uint16>(o_action), + * rc_end = rc_data + be::peek<uint16>(o_constraint); + + // Allocate pools + m_rules = new Rule [m_numRules]; + m_codes = new Code [m_numRules*2]; + int totalSlots = 0; + const uint16 *tsort = sort_key; + for (int i = 0; i < m_numRules; ++i) + totalSlots += be::peek<uint16>(--tsort); + const size_t prog_pool_sz = vm::Machine::Code::estimateCodeDataOut(ac_end - ac_data + rc_end - rc_data, 2 * m_numRules, totalSlots); + m_progs = gralloc<byte>(prog_pool_sz); + byte * prog_pool_free = m_progs, + * prog_pool_end = m_progs + prog_pool_sz; + if (e.test(!(m_rules && m_codes && m_progs), E_OUTOFMEM)) return face.error(e); + + Rule * r = m_rules + m_numRules - 1; + for (size_t n = m_numRules; r >= m_rules; --n, --r, ac_end = ac_begin, rc_end = rc_begin) + { + face.error_context((face.error_context() & 0xFFFF00) + EC_ARULE + int((n - 1) << 24)); + r->preContext = *--precontext; + r->sort = be::peek<uint16>(--sort_key); +#ifndef NDEBUG + r->rule_idx = uint16(n - 1); +#endif + if (r->sort > 63 || r->preContext >= r->sort || r->preContext > m_maxPreCtxt || r->preContext < m_minPreCtxt) + return false; + ac_begin = ac_data + be::peek<uint16>(--o_action); + --o_constraint; + rc_begin = be::peek<uint16>(o_constraint) ? rc_data + be::peek<uint16>(o_constraint) : rc_end; + + if (ac_begin > ac_end || ac_begin > ac_data_end || ac_end > ac_data_end + || rc_begin > rc_end || rc_begin > rc_data_end || rc_end > rc_data_end + || vm::Machine::Code::estimateCodeDataOut(ac_end - ac_begin + rc_end - rc_begin, 2, r->sort) > size_t(prog_pool_end - prog_pool_free)) + return false; + r->action = new (m_codes+n*2-2) vm::Machine::Code(false, ac_begin, ac_end, r->preContext, r->sort, *m_silf, face, pt, &prog_pool_free); + r->constraint = new (m_codes+n*2-1) vm::Machine::Code(true, rc_begin, rc_end, r->preContext, r->sort, *m_silf, face, pt, &prog_pool_free); + + if (e.test(!r->action || !r->constraint, E_OUTOFMEM) + || e.test(r->action->status() != Code::loaded, r->action->status() + E_CODEFAILURE) + || e.test(r->constraint->status() != Code::loaded, r->constraint->status() + E_CODEFAILURE) + || e.test(!r->constraint->immutable(), E_MUTABLECCODE)) + return face.error(e); + } + + byte * const moved_progs = prog_pool_free > m_progs ? static_cast<byte *>(realloc(m_progs, prog_pool_free - m_progs)) : 0; + if (e.test(!moved_progs, E_OUTOFMEM)) + { + free(m_progs); + m_progs = 0; + return face.error(e); + } + + if (moved_progs != m_progs) + { + for (Code * c = m_codes, * const ce = c + m_numRules*2; c != ce; ++c) + { + c->externalProgramMoved(moved_progs - m_progs); + } + m_progs = moved_progs; + } + + // Load the rule entries map + face.error_context((face.error_context() & 0xFFFF00) + EC_APASS); + //TODO: Coverity: 1315804: FORWARD_NULL + RuleEntry * re = m_ruleMap = gralloc<RuleEntry>(num_entries); + if (e.test(!re, E_OUTOFMEM)) return face.error(e); + for (size_t n = num_entries; n; --n, ++re) + { + const ptrdiff_t rn = be::read<uint16>(rule_map); + if (e.test(rn >= m_numRules, E_BADRULENUM)) return face.error(e); + re->rule = m_rules + rn; + } + + return true; +} + +static int cmpRuleEntry(const void *a, const void *b) { return (*(RuleEntry *)a < *(RuleEntry *)b ? -1 : + (*(RuleEntry *)b < *(RuleEntry *)a ? 1 : 0)); } + +bool Pass::readStates(const byte * starts, const byte *states, const byte * o_rule_map, GR_MAYBE_UNUSED Face & face, Error &e) +{ +#ifdef GRAPHITE2_TELEMETRY + telemetry::category _states_cat(face.tele.starts); +#endif + m_startStates = gralloc<uint16>(m_maxPreCtxt - m_minPreCtxt + 1); +#ifdef GRAPHITE2_TELEMETRY + telemetry::set_category(face.tele.states); +#endif + m_states = gralloc<State>(m_numStates); +#ifdef GRAPHITE2_TELEMETRY + telemetry::set_category(face.tele.transitions); +#endif + m_transitions = gralloc<uint16>(m_numTransition * m_numColumns); + + if (e.test(!m_startStates || !m_states || !m_transitions, E_OUTOFMEM)) return face.error(e); + // load start states + for (uint16 * s = m_startStates, + * const s_end = s + m_maxPreCtxt - m_minPreCtxt + 1; s != s_end; ++s) + { + *s = be::read<uint16>(starts); + if (e.test(*s >= m_numStates, E_BADSTATE)) + { + face.error_context((face.error_context() & 0xFFFF00) + EC_ASTARTS + int((s - m_startStates) << 24)); + return face.error(e); // true; + } + } + + // load state transition table. + for (uint16 * t = m_transitions, + * const t_end = t + m_numTransition*m_numColumns; t != t_end; ++t) + { + *t = be::read<uint16>(states); + if (e.test(*t >= m_numStates, E_BADSTATE)) + { + face.error_context((face.error_context() & 0xFFFF00) + EC_ATRANS + int(((t - m_transitions) / m_numColumns) << 8)); + return face.error(e); + } + } + + State * s = m_states, + * const success_begin = m_states + m_numStates - m_numSuccess; + const RuleEntry * rule_map_end = m_ruleMap + be::peek<uint16>(o_rule_map + m_numSuccess*sizeof(uint16)); + for (size_t n = m_numStates; n; --n, ++s) + { + RuleEntry * const begin = s < success_begin ? 0 : m_ruleMap + be::read<uint16>(o_rule_map), + * const end = s < success_begin ? 0 : m_ruleMap + be::peek<uint16>(o_rule_map); + + if (e.test(begin >= rule_map_end || end > rule_map_end || begin > end, E_BADRULEMAPPING)) + { + face.error_context((face.error_context() & 0xFFFF00) + EC_ARULEMAP + int(n << 24)); + return face.error(e); + } + s->rules = begin; + s->rules_end = (end - begin <= FiniteStateMachine::MAX_RULES)? end : + begin + FiniteStateMachine::MAX_RULES; + if (begin) // keep UBSan happy can't call qsort with null begin + qsort(begin, end - begin, sizeof(RuleEntry), &cmpRuleEntry); + } + + return true; +} + +bool Pass::readRanges(const byte * ranges, size_t num_ranges, Error &e) +{ + m_cols = gralloc<uint16>(m_numGlyphs); + if (e.test(!m_cols, E_OUTOFMEM)) return false; + memset(m_cols, 0xFF, m_numGlyphs * sizeof(uint16)); + for (size_t n = num_ranges; n; --n) + { + uint16 * ci = m_cols + be::read<uint16>(ranges), + * ci_end = m_cols + be::read<uint16>(ranges) + 1, + col = be::read<uint16>(ranges); + + if (e.test(ci >= ci_end || ci_end > m_cols+m_numGlyphs || col >= m_numColumns, E_BADRANGE)) + return false; + + // A glyph must only belong to one column at a time + while (ci != ci_end && *ci == 0xffff) + *ci++ = col; + + if (e.test(ci != ci_end, E_BADRANGE)) + return false; + } + return true; +} + + +bool Pass::runGraphite(vm::Machine & m, FiniteStateMachine & fsm, bool reverse) const +{ + Slot *s = m.slotMap().segment.first(); + if (!s || !testPassConstraint(m)) return true; + if (reverse) + { + m.slotMap().segment.reverseSlots(); + s = m.slotMap().segment.first(); + } + if (m_numRules) + { + Slot *currHigh = s->next(); + +#if !defined GRAPHITE2_NTRACING + if (fsm.dbgout) *fsm.dbgout << "rules" << json::array; + json::closer rules_array_closer(fsm.dbgout); +#endif + + m.slotMap().highwater(currHigh); + int lc = m_iMaxLoop; + do + { + findNDoRule(s, m, fsm); + if (m.status() != Machine::finished) return false; + if (s && (s == m.slotMap().highwater() || m.slotMap().highpassed() || --lc == 0)) { + if (!lc) + s = m.slotMap().highwater(); + lc = m_iMaxLoop; + if (s) + m.slotMap().highwater(s->next()); + } + } while (s); + } + //TODO: Use enums for flags + const bool collisions = m_numCollRuns || m_kernColls; + + if (!collisions || !m.slotMap().segment.hasCollisionInfo()) + return true; + + if (m_numCollRuns) + { + if (!(m.slotMap().segment.flags() & Segment::SEG_INITCOLLISIONS)) + { + m.slotMap().segment.positionSlots(0, 0, 0, m.slotMap().dir(), true); +// m.slotMap().segment.flags(m.slotMap().segment.flags() | Segment::SEG_INITCOLLISIONS); + } + if (!collisionShift(&m.slotMap().segment, m.slotMap().dir(), fsm.dbgout)) + return false; + } + if ((m_kernColls) && !collisionKern(&m.slotMap().segment, m.slotMap().dir(), fsm.dbgout)) + return false; + if (collisions && !collisionFinish(&m.slotMap().segment, fsm.dbgout)) + return false; + return true; +} + +bool Pass::runFSM(FiniteStateMachine& fsm, Slot * slot) const +{ + fsm.reset(slot, m_maxPreCtxt); + if (fsm.slots.context() < m_minPreCtxt) + return false; + + uint16 state = m_startStates[m_maxPreCtxt - fsm.slots.context()]; + uint8 free_slots = SlotMap::MAX_SLOTS; + do + { + fsm.slots.pushSlot(slot); + if (slot->gid() >= m_numGlyphs + || m_cols[slot->gid()] == 0xffffU + || --free_slots == 0 + || state >= m_numTransition) + return free_slots != 0; + + const uint16 * transitions = m_transitions + state*m_numColumns; + state = transitions[m_cols[slot->gid()]]; + if (state >= m_successStart) + fsm.rules.accumulate_rules(m_states[state]); + + slot = slot->next(); + } while (state != 0 && slot); + + fsm.slots.pushSlot(slot); + return true; +} + +#if !defined GRAPHITE2_NTRACING + +inline +Slot * input_slot(const SlotMap & slots, const int n) +{ + Slot * s = slots[slots.context() + n]; + if (!s->isCopied()) return s; + + return s->prev() ? s->prev()->next() : (s->next() ? s->next()->prev() : slots.segment.last()); +} + +inline +Slot * output_slot(const SlotMap & slots, const int n) +{ + Slot * s = slots[slots.context() + n - 1]; + return s ? s->next() : slots.segment.first(); +} + +#endif //!defined GRAPHITE2_NTRACING + +void Pass::findNDoRule(Slot * & slot, Machine &m, FiniteStateMachine & fsm) const +{ + assert(slot); + + if (runFSM(fsm, slot)) + { + // Search for the first rule which passes the constraint + const RuleEntry * r = fsm.rules.begin(), + * const re = fsm.rules.end(); + while (r != re && !testConstraint(*r->rule, m)) + { + ++r; + if (m.status() != Machine::finished) + return; + } + +#if !defined GRAPHITE2_NTRACING + if (fsm.dbgout) + { + if (fsm.rules.size() != 0) + { + *fsm.dbgout << json::item << json::object; + dumpRuleEventConsidered(fsm, *r); + if (r != re) + { + const int adv = doAction(r->rule->action, slot, m); + dumpRuleEventOutput(fsm, *r->rule, slot); + if (r->rule->action->deletes()) fsm.slots.collectGarbage(slot); + adjustSlot(adv, slot, fsm.slots); + *fsm.dbgout << "cursor" << objectid(dslot(&fsm.slots.segment, slot)) + << json::close; // Close RuelEvent object + + return; + } + else + { + *fsm.dbgout << json::close // close "considered" array + << "output" << json::null + << "cursor" << objectid(dslot(&fsm.slots.segment, slot->next())) + << json::close; + } + } + } + else +#endif + { + if (r != re) + { + const int adv = doAction(r->rule->action, slot, m); + if (m.status() != Machine::finished) return; + if (r->rule->action->deletes()) fsm.slots.collectGarbage(slot); + adjustSlot(adv, slot, fsm.slots); + return; + } + } + } + + slot = slot->next(); + return; +} + +#if !defined GRAPHITE2_NTRACING + +void Pass::dumpRuleEventConsidered(const FiniteStateMachine & fsm, const RuleEntry & re) const +{ + *fsm.dbgout << "considered" << json::array; + for (const RuleEntry *r = fsm.rules.begin(); r != &re; ++r) + { + if (r->rule->preContext > fsm.slots.context()) + continue; + *fsm.dbgout << json::flat << json::object + << "id" << r->rule - m_rules + << "failed" << true + << "input" << json::flat << json::object + << "start" << objectid(dslot(&fsm.slots.segment, input_slot(fsm.slots, -r->rule->preContext))) + << "length" << r->rule->sort + << json::close // close "input" + << json::close; // close Rule object + } +} + + +void Pass::dumpRuleEventOutput(const FiniteStateMachine & fsm, const Rule & r, Slot * const last_slot) const +{ + *fsm.dbgout << json::item << json::flat << json::object + << "id" << &r - m_rules + << "failed" << false + << "input" << json::flat << json::object + << "start" << objectid(dslot(&fsm.slots.segment, input_slot(fsm.slots, 0))) + << "length" << r.sort - r.preContext + << json::close // close "input" + << json::close // close Rule object + << json::close // close considered array + << "output" << json::object + << "range" << json::flat << json::object + << "start" << objectid(dslot(&fsm.slots.segment, input_slot(fsm.slots, 0))) + << "end" << objectid(dslot(&fsm.slots.segment, last_slot)) + << json::close // close "input" + << "slots" << json::array; + const Position rsb_prepos = last_slot ? last_slot->origin() : fsm.slots.segment.advance(); + fsm.slots.segment.positionSlots(0, 0, 0, fsm.slots.segment.currdir()); + + for(Slot * slot = output_slot(fsm.slots, 0); slot != last_slot; slot = slot->next()) + *fsm.dbgout << dslot(&fsm.slots.segment, slot); + *fsm.dbgout << json::close // close "slots" + << "postshift" << (last_slot ? last_slot->origin() : fsm.slots.segment.advance()) - rsb_prepos + << json::close; // close "output" object + +} + +#endif + + +inline +bool Pass::testPassConstraint(Machine & m) const +{ + if (!m_cPConstraint) return true; + + assert(m_cPConstraint.constraint()); + + m.slotMap().reset(*m.slotMap().segment.first(), 0); + m.slotMap().pushSlot(m.slotMap().segment.first()); + vm::slotref * map = m.slotMap().begin(); + const uint32 ret = m_cPConstraint.run(m, map); + +#if !defined GRAPHITE2_NTRACING + json * const dbgout = m.slotMap().segment.getFace()->logger(); + if (dbgout) + *dbgout << "constraint" << (ret && m.status() == Machine::finished); +#endif + + return ret && m.status() == Machine::finished; +} + + +bool Pass::testConstraint(const Rule & r, Machine & m) const +{ + const uint16 curr_context = m.slotMap().context(); + if (unsigned(r.sort + curr_context - r.preContext) > m.slotMap().size() + || curr_context - r.preContext < 0) return false; + + vm::slotref * map = m.slotMap().begin() + curr_context - r.preContext; + if (map[r.sort - 1] == 0) + return false; + + if (!*r.constraint) return true; + assert(r.constraint->constraint()); + for (int n = r.sort; n && map; --n, ++map) + { + if (!*map) continue; + const int32 ret = r.constraint->run(m, map); + if (!ret || m.status() != Machine::finished) + return false; + } + + return true; +} + + +void SlotMap::collectGarbage(Slot * &aSlot) +{ + for(Slot **s = begin(), *const *const se = end() - 1; s != se; ++s) { + Slot *& slot = *s; + if(slot && (slot->isDeleted() || slot->isCopied())) + { + if (slot == aSlot) + aSlot = slot->prev() ? slot->prev() : slot->next(); + segment.freeSlot(slot); + } + } +} + + + +int Pass::doAction(const Code *codeptr, Slot * & slot_out, vm::Machine & m) const +{ + assert(codeptr); + if (!*codeptr) return 0; + SlotMap & smap = m.slotMap(); + vm::slotref * map = &smap[smap.context()]; + smap.highpassed(false); + + int32 ret = codeptr->run(m, map); + + if (m.status() != Machine::finished) + { + slot_out = NULL; + smap.highwater(0); + return 0; + } + + slot_out = *map; + return ret; +} + + +void Pass::adjustSlot(int delta, Slot * & slot_out, SlotMap & smap) const +{ + if (!slot_out) + { + if (smap.highpassed() || slot_out == smap.highwater()) + { + slot_out = smap.segment.last(); + ++delta; + if (!smap.highwater() || smap.highwater() == slot_out) + smap.highpassed(false); + } + else + { + slot_out = smap.segment.first(); + --delta; + } + } + if (delta < 0) + { + while (++delta <= 0 && slot_out) + { + slot_out = slot_out->prev(); + if (smap.highpassed() && smap.highwater() == slot_out) + smap.highpassed(false); + } + } + else if (delta > 0) + { + while (--delta >= 0 && slot_out) + { + if (slot_out == smap.highwater() && slot_out) + smap.highpassed(true); + slot_out = slot_out->next(); + } + } +} + +bool Pass::collisionShift(Segment *seg, int dir, json * const dbgout) const +{ + ShiftCollider shiftcoll(dbgout); + // bool isfirst = true; + bool hasCollisions = false; + Slot *start = seg->first(); // turn on collision fixing for the first slot + Slot *end = NULL; + bool moved = false; + +#if !defined GRAPHITE2_NTRACING + if (dbgout) + *dbgout << "collisions" << json::array + << json::flat << json::object << "num-loops" << m_numCollRuns << json::close; +#endif + + while (start) + { +#if !defined GRAPHITE2_NTRACING + if (dbgout) *dbgout << json::object << "phase" << "1" << "moves" << json::array; +#endif + hasCollisions = false; + end = NULL; + // phase 1 : position shiftable glyphs, ignoring kernable glyphs + for (Slot *s = start; s; s = s->next()) + { + const SlotCollision * c = seg->collisionInfo(s); + if (start && (c->flags() & (SlotCollision::COLL_FIX | SlotCollision::COLL_KERN)) == SlotCollision::COLL_FIX + && !resolveCollisions(seg, s, start, shiftcoll, false, dir, moved, hasCollisions, dbgout)) + return false; + if (s != start && (c->flags() & SlotCollision::COLL_END)) + { + end = s->next(); + break; + } + } + +#if !defined GRAPHITE2_NTRACING + if (dbgout) + *dbgout << json::close << json::close; // phase-1 +#endif + + // phase 2 : loop until happy. + for (int i = 0; i < m_numCollRuns - 1; ++i) + { + if (hasCollisions || moved) + { + +#if !defined GRAPHITE2_NTRACING + if (dbgout) + *dbgout << json::object << "phase" << "2a" << "loop" << i << "moves" << json::array; +#endif + // phase 2a : if any shiftable glyphs are in collision, iterate backwards, + // fixing them and ignoring other non-collided glyphs. Note that this handles ONLY + // glyphs that are actually in collision from phases 1 or 2b, and working backwards + // has the intended effect of breaking logjams. + if (hasCollisions) + { + hasCollisions = false; + #if 0 + moved = true; + for (Slot *s = start; s != end; s = s->next()) + { + SlotCollision * c = seg->collisionInfo(s); + c->setShift(Position(0, 0)); + } + #endif + Slot *lend = end ? end->prev() : seg->last(); + Slot *lstart = start->prev(); + for (Slot *s = lend; s != lstart; s = s->prev()) + { + SlotCollision * c = seg->collisionInfo(s); + if (start && (c->flags() & (SlotCollision::COLL_FIX | SlotCollision::COLL_KERN | SlotCollision::COLL_ISCOL)) + == (SlotCollision::COLL_FIX | SlotCollision::COLL_ISCOL)) // ONLY if this glyph is still colliding + { + if (!resolveCollisions(seg, s, lend, shiftcoll, true, dir, moved, hasCollisions, dbgout)) + return false; + c->setFlags(c->flags() | SlotCollision::COLL_TEMPLOCK); + } + } + } + +#if !defined GRAPHITE2_NTRACING + if (dbgout) + *dbgout << json::close << json::close // phase 2a + << json::object << "phase" << "2b" << "loop" << i << "moves" << json::array; +#endif + + // phase 2b : redo basic diacritic positioning pass for ALL glyphs. Each successive loop adjusts + // glyphs from their current adjusted position, which has the effect of gradually minimizing the + // resulting adjustment; ie, the final result will be gradually closer to the original location. + // Also it allows more flexibility in the final adjustment, since it is moving along the + // possible 8 vectors from successively different starting locations. + if (moved) + { + moved = false; + for (Slot *s = start; s != end; s = s->next()) + { + SlotCollision * c = seg->collisionInfo(s); + if (start && (c->flags() & (SlotCollision::COLL_FIX | SlotCollision::COLL_TEMPLOCK + | SlotCollision::COLL_KERN)) == SlotCollision::COLL_FIX + && !resolveCollisions(seg, s, start, shiftcoll, false, dir, moved, hasCollisions, dbgout)) + return false; + else if (c->flags() & SlotCollision::COLL_TEMPLOCK) + c->setFlags(c->flags() & ~SlotCollision::COLL_TEMPLOCK); + } + } + // if (!hasCollisions) // no, don't leave yet because phase 2b will continue to improve things + // break; +#if !defined GRAPHITE2_NTRACING + if (dbgout) + *dbgout << json::close << json::close; // phase 2 +#endif + } + } + if (!end) + break; + start = NULL; + for (Slot *s = end->prev(); s; s = s->next()) + { + if (seg->collisionInfo(s)->flags() & SlotCollision::COLL_START) + { + start = s; + break; + } + } + } + return true; +} + +bool Pass::collisionKern(Segment *seg, int dir, json * const dbgout) const +{ + Slot *start = seg->first(); + float ymin = 1e38f; + float ymax = -1e38f; + const GlyphCache &gc = seg->getFace()->glyphs(); + + // phase 3 : handle kerning of clusters +#if !defined GRAPHITE2_NTRACING + if (dbgout) + *dbgout << json::object << "phase" << "3" << "moves" << json::array; +#endif + + for (Slot *s = seg->first(); s; s = s->next()) + { + if (!gc.check(s->gid())) + return false; + const SlotCollision * c = seg->collisionInfo(s); + const Rect &bbox = seg->theGlyphBBoxTemporary(s->gid()); + float y = s->origin().y + c->shift().y; + if (!(c->flags() & SlotCollision::COLL_ISSPACE)) + { + ymax = max(y + bbox.tr.y, ymax); + ymin = min(y + bbox.bl.y, ymin); + } + if (start && (c->flags() & (SlotCollision::COLL_KERN | SlotCollision::COLL_FIX)) + == (SlotCollision::COLL_KERN | SlotCollision::COLL_FIX)) + resolveKern(seg, s, start, dir, ymin, ymax, dbgout); + if (c->flags() & SlotCollision::COLL_END) + start = NULL; + if (c->flags() & SlotCollision::COLL_START) + start = s; + } + +#if !defined GRAPHITE2_NTRACING + if (dbgout) + *dbgout << json::close << json::close; // phase 3 +#endif + return true; +} + +bool Pass::collisionFinish(Segment *seg, GR_MAYBE_UNUSED json * const dbgout) const +{ + for (Slot *s = seg->first(); s; s = s->next()) + { + SlotCollision *c = seg->collisionInfo(s); + if (c->shift().x != 0 || c->shift().y != 0) + { + const Position newOffset = c->shift(); + const Position nullPosition(0, 0); + c->setOffset(newOffset + c->offset()); + c->setShift(nullPosition); + } + } +// seg->positionSlots(); + +#if !defined GRAPHITE2_NTRACING + if (dbgout) + *dbgout << json::close; +#endif + return true; +} + +// Can slot s be kerned, or is it attached to something that can be kerned? +static bool inKernCluster(Segment *seg, Slot *s) +{ + SlotCollision *c = seg->collisionInfo(s); + if (c->flags() & SlotCollision::COLL_KERN /** && c->flags() & SlotCollision::COLL_FIX **/ ) + return true; + while (s->attachedTo()) + { + s = s->attachedTo(); + c = seg->collisionInfo(s); + if (c->flags() & SlotCollision::COLL_KERN /** && c->flags() & SlotCollision::COLL_FIX **/ ) + return true; + } + return false; +} + +// Fix collisions for the given slot. +// Return true if everything was fixed, false if there are still collisions remaining. +// isRev means be we are processing backwards. +bool Pass::resolveCollisions(Segment *seg, Slot *slotFix, Slot *start, + ShiftCollider &coll, GR_MAYBE_UNUSED bool isRev, int dir, bool &moved, bool &hasCol, + json * const dbgout) const +{ + Slot * nbor; // neighboring slot + SlotCollision *cFix = seg->collisionInfo(slotFix); + if (!coll.initSlot(seg, slotFix, cFix->limit(), cFix->margin(), cFix->marginWt(), + cFix->shift(), cFix->offset(), dir, dbgout)) + return false; + bool collides = false; + // When we're processing forward, ignore kernable glyphs that preceed the target glyph. + // When processing backward, don't ignore these until we pass slotFix. + bool ignoreForKern = !isRev; + bool rtl = dir & 1; + Slot *base = slotFix; + while (base->attachedTo()) + base = base->attachedTo(); + Position zero(0., 0.); + + // Look for collisions with the neighboring glyphs. + for (nbor = start; nbor; nbor = isRev ? nbor->prev() : nbor->next()) + { + SlotCollision *cNbor = seg->collisionInfo(nbor); + bool sameCluster = nbor->isChildOf(base); + if (nbor != slotFix // don't process if this is the slot of interest + && !(cNbor->ignore()) // don't process if ignoring + && (nbor == base || sameCluster // process if in the same cluster as slotFix + || !inKernCluster(seg, nbor)) // or this cluster is not to be kerned +// || (rtl ^ ignoreForKern)) // or it comes before(ltr) or after(rtl) + && (!isRev // if processing forwards then good to merge otherwise only: + || !(cNbor->flags() & SlotCollision::COLL_FIX) // merge in immovable stuff + || ((cNbor->flags() & SlotCollision::COLL_KERN) && !sameCluster) // ignore other kernable clusters + || (cNbor->flags() & SlotCollision::COLL_ISCOL)) // test against other collided glyphs + && !coll.mergeSlot(seg, nbor, cNbor, cNbor->shift(), !ignoreForKern, sameCluster, collides, false, dbgout)) + return false; + else if (nbor == slotFix) + // Switching sides of this glyph - if we were ignoring kernable stuff before, don't anymore. + ignoreForKern = !ignoreForKern; + + if (nbor != start && (cNbor->flags() & (isRev ? SlotCollision::COLL_START : SlotCollision::COLL_END))) + break; + } + bool isCol = false; + if (collides || cFix->shift().x != 0.f || cFix->shift().y != 0.f) + { + Position shift = coll.resolve(seg, isCol, dbgout); + // isCol has been set to true if a collision remains. + if (std::fabs(shift.x) < 1e38f && std::fabs(shift.y) < 1e38f) + { + if (sqr(shift.x-cFix->shift().x) + sqr(shift.y-cFix->shift().y) >= m_colThreshold * m_colThreshold) + moved = true; + cFix->setShift(shift); + if (slotFix->firstChild()) + { + Rect bbox; + Position here = slotFix->origin() + shift; + float clusterMin = here.x; + slotFix->firstChild()->finalise(seg, NULL, here, bbox, 0, clusterMin, rtl, false); + } + } + } + else + { + // This glyph is not colliding with anything. +#if !defined GRAPHITE2_NTRACING + if (dbgout) + { + *dbgout << json::object + << "missed" << objectid(dslot(seg, slotFix)); + coll.outputJsonDbg(dbgout, seg, -1); + *dbgout << json::close; + } +#endif + } + + // Set the is-collision flag bit. + if (isCol) + { cFix->setFlags(cFix->flags() | SlotCollision::COLL_ISCOL | SlotCollision::COLL_KNOWN); } + else + { cFix->setFlags((cFix->flags() & ~SlotCollision::COLL_ISCOL) | SlotCollision::COLL_KNOWN); } + hasCol |= isCol; + return true; +} + +float Pass::resolveKern(Segment *seg, Slot *slotFix, GR_MAYBE_UNUSED Slot *start, int dir, + float &ymin, float &ymax, json *const dbgout) const +{ + Slot *nbor; // neighboring slot + float currSpace = 0.; + bool collides = false; + unsigned int space_count = 0; + Slot *base = slotFix; + while (base->attachedTo()) + base = base->attachedTo(); + SlotCollision *cFix = seg->collisionInfo(base); + const GlyphCache &gc = seg->getFace()->glyphs(); + const Rect &bbb = seg->theGlyphBBoxTemporary(slotFix->gid()); + const float by = slotFix->origin().y + cFix->shift().y; + + if (base != slotFix) + { + cFix->setFlags(cFix->flags() | SlotCollision::COLL_KERN | SlotCollision::COLL_FIX); + return 0; + } + bool seenEnd = (cFix->flags() & SlotCollision::COLL_END) != 0; + bool isInit = false; + KernCollider coll(dbgout); + + ymax = max(by + bbb.tr.y, ymax); + ymin = min(by + bbb.bl.y, ymin); + for (nbor = slotFix->next(); nbor; nbor = nbor->next()) + { + if (nbor->isChildOf(base)) + continue; + if (!gc.check(nbor->gid())) + return 0.; + const Rect &bb = seg->theGlyphBBoxTemporary(nbor->gid()); + SlotCollision *cNbor = seg->collisionInfo(nbor); + if ((bb.bl.y == 0.f && bb.tr.y == 0.f) || (cNbor->flags() & SlotCollision::COLL_ISSPACE)) + { + if (m_kernColls == InWord) + break; + // Add space for a space glyph. + currSpace += nbor->advance(); + ++space_count; + } + else + { + space_count = 0; + if (nbor != slotFix && !cNbor->ignore()) + { + seenEnd = true; + if (!isInit) + { + if (!coll.initSlot(seg, slotFix, cFix->limit(), cFix->margin(), + cFix->shift(), cFix->offset(), dir, ymin, ymax, dbgout)) + return 0.; + isInit = true; + } + collides |= coll.mergeSlot(seg, nbor, cNbor->shift(), currSpace, dir, dbgout); + } + } + if (cNbor->flags() & SlotCollision::COLL_END) + { + if (seenEnd && space_count < 2) + break; + else + seenEnd = true; + } + } + if (collides) + { + Position mv = coll.resolve(seg, slotFix, dir, dbgout); + coll.shift(mv, dir); + Position delta = slotFix->advancePos() + mv - cFix->shift(); + slotFix->advance(delta); + cFix->setShift(mv); + return mv.x; + } + return 0.; +} diff --git a/thirdparty/graphite/src/Position.cpp b/thirdparty/graphite/src/Position.cpp new file mode 100644 index 0000000000..d2fdbd4e7c --- /dev/null +++ b/thirdparty/graphite/src/Position.cpp @@ -0,0 +1,97 @@ +/* GRAPHITE2 LICENSING + + Copyright 2010, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ +#include "inc/Position.h" +#include <cmath> + +using namespace graphite2; + +bool Rect::hitTest(Rect &other) +{ + if (bl.x > other.tr.x) return false; + if (tr.x < other.bl.x) return false; + if (bl.y > other.tr.y) return false; + if (tr.y < other.bl.y) return false; + return true; +} + +Position Rect::overlap(Position &offset, Rect &other, Position &othero) +{ + float ax = (bl.x + offset.x) - (other.tr.x + othero.x); + float ay = (bl.y + offset.y) - (other.tr.y + othero.y); + float bx = (other.bl.x + othero.x) - (tr.x + offset.x); + float by = (other.bl.y + othero.y) - (tr.y + offset.y); + return Position((ax > bx ? ax : bx), (ay > by ? ay : by)); +} + +float boundmin(float move, float lim1, float lim2, float &error) +{ + // error is always positive for easy comparison + if (move < lim1 && move < lim2) + { error = 0.; return move; } + else if (lim1 < lim2) + { error = std::fabs(move - lim1); return lim1; } + else + { error = std::fabs(move - lim2); return lim2; } +} + +#if 0 +Position Rect::constrainedAvoid(Position &offset, Rect &box, Rect &sdbox, Position &other, Rect &obox, Rect &osdbox) +{ + // a = max, i = min, s = sum, d = diff + float eax, eay, eix, eiy, eas, eis, ead, eid; + float beste = INF; + Position res; + // calculate the movements in each direction and the error (amount of remaining overlap) + // first param is movement, second and third are movement over the constraining box + float ax = boundmin(obox.tr.x + other.x - box.bl.x - offset.x + 1, tr.x - offset.x, INF, &eax); + float ay = boundmin(obox.tr.y + other.y - box.bl.y - offset.y + 1, tr.y - offset.y, INF, &eay); + float ix = boundmin(obox.bl.x + other.x - box.tr.x - offset.x + 1, bl.x - offset.x, INF, &eix); + float iy = boundmin(obox.bl.y + other.y - box.tr.y - offset.y + 1, bl.y - offset.y, INF, &eiy); + float as = boundmin(ISQRT2 * (osdbox.tr.x + other.x + other.y - sdbox.bl.x - offset.x - offset.y) + 1, tr.x - offset.x, tr.y - offset.y, &eas); + float is = boundmin(ISQRT2 * (osdbox.bl.x + other.x + other.y - sdbox.tr.x - offset.x - offset.y) + 1, bl.x - offset.x, bl.y - offset.y, &eis); + float ad = boundmin(ISQRT2 * (osdbox.tr.y + other.x - other.y - sdbox.bl.y - offset.x + offset.y) + 1, tr.y - offset.y, tr.x - offset.x, &ead); + float id = boundmin(ISQRT2 * (osdbox.bl.y + other.x - other.y - sdbox.tr.y - offset.x + offset.y) + 1, bl.y - offset.y, bl.x - offset.x, &eid); + + if (eax < beste) + { res = Position(ax, 0); beste = eax; } + if (eay < beste) + { res = Position(0, ay); beste = eay; } + if (eix < beste) + { res = Position(ix, 0); beste = eix; } + if (eiy < beste) + { res = Position(0, iy); beste = eiy; } + if (SQRT2 * (eas) < beste) + { res = Position(as, ad); beste = SQRT2 * (eas); } + if (SQRT2 * (eis) < beste) + { res = Position(is, is); beste = SQRT2 * (eis); } + if (SQRT2 * (ead) < beste) + { res = Position(ad, ad); beste = SQRT2 * (ead); } + if (SQRT2 * (eid) < beste) + { res = Position(id, id); beste = SQRT2 * (eid); } + return res; +} +#endif diff --git a/thirdparty/graphite/src/Segment.cpp b/thirdparty/graphite/src/Segment.cpp new file mode 100644 index 0000000000..62edd4250f --- /dev/null +++ b/thirdparty/graphite/src/Segment.cpp @@ -0,0 +1,423 @@ +/* GRAPHITE2 LICENSING + + Copyright 2010, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ +#include "inc/UtfCodec.h" +#include <cstring> +#include <cstdlib> + +#include "inc/bits.h" +#include "inc/Segment.h" +#include "graphite2/Font.h" +#include "inc/CharInfo.h" +#include "inc/debug.h" +#include "inc/Slot.h" +#include "inc/Main.h" +#include "inc/CmapCache.h" +#include "inc/Collider.h" +#include "graphite2/Segment.h" + + +using namespace graphite2; + +Segment::Segment(size_t numchars, const Face* face, uint32 script, int textDir) +: m_freeSlots(NULL), + m_freeJustifies(NULL), + m_charinfo(new CharInfo[numchars]), + m_collisions(NULL), + m_face(face), + m_silf(face->chooseSilf(script)), + m_first(NULL), + m_last(NULL), + m_bufSize(numchars + 10), + m_numGlyphs(numchars), + m_numCharinfo(numchars), + m_defaultOriginal(0), + m_dir(textDir), + m_flags(((m_silf->flags() & 0x20) != 0) << 1), + m_passBits(m_silf->aPassBits() ? -1 : 0) +{ + freeSlot(newSlot()); + m_bufSize = log_binary(numchars)+1; +} + +Segment::~Segment() +{ + for (SlotRope::iterator i = m_slots.begin(); i != m_slots.end(); ++i) + free(*i); + for (AttributeRope::iterator i = m_userAttrs.begin(); i != m_userAttrs.end(); ++i) + free(*i); + for (JustifyRope::iterator i = m_justifies.begin(); i != m_justifies.end(); ++i) + free(*i); + delete[] m_charinfo; + free(m_collisions); +} + +void Segment::appendSlot(int id, int cid, int gid, int iFeats, size_t coffset) +{ + Slot *aSlot = newSlot(); + + if (!aSlot) return; + m_charinfo[id].init(cid); + m_charinfo[id].feats(iFeats); + m_charinfo[id].base(coffset); + const GlyphFace * theGlyph = m_face->glyphs().glyphSafe(gid); + m_charinfo[id].breakWeight(theGlyph ? theGlyph->attrs()[m_silf->aBreak()] : 0); + + aSlot->child(NULL); + aSlot->setGlyph(this, gid, theGlyph); + aSlot->originate(id); + aSlot->before(id); + aSlot->after(id); + if (m_last) m_last->next(aSlot); + aSlot->prev(m_last); + m_last = aSlot; + if (!m_first) m_first = aSlot; + if (theGlyph && m_silf->aPassBits()) + m_passBits &= theGlyph->attrs()[m_silf->aPassBits()] + | (m_silf->numPasses() > 16 ? (theGlyph->attrs()[m_silf->aPassBits() + 1] << 16) : 0); +} + +Slot *Segment::newSlot() +{ + if (!m_freeSlots) + { + // check that the segment doesn't grow indefinintely + if (m_numGlyphs > m_numCharinfo * MAX_SEG_GROWTH_FACTOR) + return NULL; + int numUser = m_silf->numUser(); +#if !defined GRAPHITE2_NTRACING + if (m_face->logger()) ++numUser; +#endif + Slot *newSlots = grzeroalloc<Slot>(m_bufSize); + int16 *newAttrs = grzeroalloc<int16>(m_bufSize * numUser); + if (!newSlots || !newAttrs) + { + free(newSlots); + free(newAttrs); + return NULL; + } + for (size_t i = 0; i < m_bufSize; i++) + { + ::new (newSlots + i) Slot(newAttrs + i * numUser); + newSlots[i].next(newSlots + i + 1); + } + newSlots[m_bufSize - 1].next(NULL); + newSlots[0].next(NULL); + m_slots.push_back(newSlots); + m_userAttrs.push_back(newAttrs); + m_freeSlots = (m_bufSize > 1)? newSlots + 1 : NULL; + return newSlots; + } + Slot *res = m_freeSlots; + m_freeSlots = m_freeSlots->next(); + res->next(NULL); + return res; +} + +void Segment::freeSlot(Slot *aSlot) +{ + if (aSlot == nullptr) return; + if (m_last == aSlot) m_last = aSlot->prev(); + if (m_first == aSlot) m_first = aSlot->next(); + if (aSlot->attachedTo()) + aSlot->attachedTo()->removeChild(aSlot); + while (aSlot->firstChild()) + { + if (aSlot->firstChild()->attachedTo() == aSlot) + { + aSlot->firstChild()->attachTo(nullptr); + aSlot->removeChild(aSlot->firstChild()); + } + else + aSlot->firstChild(nullptr); + } + // reset the slot incase it is reused + ::new (aSlot) Slot(aSlot->userAttrs()); + memset(aSlot->userAttrs(), 0, m_silf->numUser() * sizeof(int16)); + // Update generation counter for debug +#if !defined GRAPHITE2_NTRACING + if (m_face->logger()) + ++aSlot->userAttrs()[m_silf->numUser()]; +#endif + // update next pointer + if (!m_freeSlots) + aSlot->next(nullptr); + else + aSlot->next(m_freeSlots); + m_freeSlots = aSlot; +} + +SlotJustify *Segment::newJustify() +{ + if (!m_freeJustifies) + { + const size_t justSize = SlotJustify::size_of(m_silf->numJustLevels()); + byte *justs = grzeroalloc<byte>(justSize * m_bufSize); + if (!justs) return NULL; + for (ptrdiff_t i = m_bufSize - 2; i >= 0; --i) + { + SlotJustify *p = reinterpret_cast<SlotJustify *>(justs + justSize * i); + SlotJustify *next = reinterpret_cast<SlotJustify *>(justs + justSize * (i + 1)); + p->next = next; + } + m_freeJustifies = (SlotJustify *)justs; + m_justifies.push_back(m_freeJustifies); + } + SlotJustify *res = m_freeJustifies; + m_freeJustifies = m_freeJustifies->next; + res->next = NULL; + return res; +} + +void Segment::freeJustify(SlotJustify *aJustify) +{ + int numJust = m_silf->numJustLevels(); + if (m_silf->numJustLevels() <= 0) numJust = 1; + aJustify->next = m_freeJustifies; + memset(aJustify->values, 0, numJust*SlotJustify::NUMJUSTPARAMS*sizeof(int16)); + m_freeJustifies = aJustify; +} + +// reverse the slots but keep diacritics in their same position after their bases +void Segment::reverseSlots() +{ + m_dir = m_dir ^ 64; // invert the reverse flag + if (m_first == m_last) return; // skip 0 or 1 glyph runs + + Slot *t = 0; + Slot *curr = m_first; + Slot *tlast; + Slot *tfirst; + Slot *out = 0; + + while (curr && getSlotBidiClass(curr) == 16) + curr = curr->next(); + if (!curr) return; + tfirst = curr->prev(); + tlast = curr; + + while (curr) + { + if (getSlotBidiClass(curr) == 16) + { + Slot *d = curr->next(); + while (d && getSlotBidiClass(d) == 16) + d = d->next(); + + d = d ? d->prev() : m_last; + Slot *p = out->next(); // one after the diacritics. out can't be null + if (p) + p->prev(d); + else + tlast = d; + t = d->next(); + d->next(p); + curr->prev(out); + out->next(curr); + } + else // will always fire first time round the loop + { + if (out) + out->prev(curr); + t = curr->next(); + curr->next(out); + out = curr; + } + curr = t; + } + out->prev(tfirst); + if (tfirst) + tfirst->next(out); + else + m_first = out; + m_last = tlast; +} + +void Segment::linkClusters(Slot *s, Slot * end) +{ + end = end->next(); + + for (; s != end && !s->isBase(); s = s->next()); + Slot * ls = s; + + if (m_dir & 1) + { + for (; s != end; s = s->next()) + { + if (!s->isBase()) continue; + + s->sibling(ls); + ls = s; + } + } + else + { + for (; s != end; s = s->next()) + { + if (!s->isBase()) continue; + + ls->sibling(s); + ls = s; + } + } +} + +Position Segment::positionSlots(const Font *font, Slot * iStart, Slot * iEnd, bool isRtl, bool isFinal) +{ + Position currpos(0., 0.); + float clusterMin = 0.; + Rect bbox; + bool reorder = (currdir() != isRtl); + + if (reorder) + { + Slot *temp; + reverseSlots(); + temp = iStart; + iStart = iEnd; + iEnd = temp; + } + if (!iStart) iStart = m_first; + if (!iEnd) iEnd = m_last; + + if (!iStart || !iEnd) // only true for empty segments + return currpos; + + if (isRtl) + { + for (Slot * s = iEnd, * const end = iStart->prev(); s && s != end; s = s->prev()) + { + if (s->isBase()) + currpos = s->finalise(this, font, currpos, bbox, 0, clusterMin = currpos.x, isRtl, isFinal); + } + } + else + { + for (Slot * s = iStart, * const end = iEnd->next(); s && s != end; s = s->next()) + { + if (s->isBase()) + currpos = s->finalise(this, font, currpos, bbox, 0, clusterMin = currpos.x, isRtl, isFinal); + } + } + if (reorder) + reverseSlots(); + return currpos; +} + + +void Segment::associateChars(int offset, size_t numChars) +{ + int i = 0, j = 0; + CharInfo *c, *cend; + for (c = m_charinfo + offset, cend = m_charinfo + offset + numChars; c != cend; ++c) + { + c->before(-1); + c->after(-1); + } + for (Slot * s = m_first; s; s->index(i++), s = s->next()) + { + j = s->before(); + if (j < 0) continue; + + for (const int after = s->after(); j <= after; ++j) + { + c = charinfo(j); + if (c->before() == -1 || i < c->before()) c->before(i); + if (c->after() < i) c->after(i); + } + } + for (Slot *s = m_first; s; s = s->next()) + { + int a; + for (a = s->after() + 1; a < offset + int(numChars) && charinfo(a)->after() < 0; ++a) + { charinfo(a)->after(s->index()); } + --a; + s->after(a); + + for (a = s->before() - 1; a >= offset && charinfo(a)->before() < 0; --a) + { charinfo(a)->before(s->index()); } + ++a; + s->before(a); + } +} + + +template <typename utf_iter> +inline void process_utf_data(Segment & seg, const Face & face, const int fid, utf_iter c, size_t n_chars) +{ + const Cmap & cmap = face.cmap(); + int slotid = 0; + + const typename utf_iter::codeunit_type * const base = c; + for (; n_chars; --n_chars, ++c, ++slotid) + { + const uint32 usv = *c; + uint16 gid = cmap[usv]; + if (!gid) gid = face.findPseudo(usv); + seg.appendSlot(slotid, usv, gid, fid, c - base); + } +} + + +bool Segment::read_text(const Face *face, const Features* pFeats/*must not be NULL*/, gr_encform enc, const void* pStart, size_t nChars) +{ + assert(face); + assert(pFeats); + if (!m_charinfo) return false; + + // utf iterator is self recovering so we don't care about the error state of the iterator. + switch (enc) + { + case gr_utf8: process_utf_data(*this, *face, addFeatures(*pFeats), utf8::const_iterator(pStart), nChars); break; + case gr_utf16: process_utf_data(*this, *face, addFeatures(*pFeats), utf16::const_iterator(pStart), nChars); break; + case gr_utf32: process_utf_data(*this, *face, addFeatures(*pFeats), utf32::const_iterator(pStart), nChars); break; + } + return true; +} + +void Segment::doMirror(uint16 aMirror) +{ + Slot * s; + for (s = m_first; s; s = s->next()) + { + unsigned short g = glyphAttr(s->gid(), aMirror); + if (g && (!(dir() & 4) || !glyphAttr(s->gid(), aMirror + 1))) + s->setGlyph(this, g); + } +} + +bool Segment::initCollisions() +{ + m_collisions = grzeroalloc<SlotCollision>(slotCount()); + if (!m_collisions) return false; + + for (Slot *p = m_first; p; p = p->next()) + if (p->index() < slotCount()) + ::new (collisionInfo(p)) SlotCollision(this, p); + else + return false; + return true; +} diff --git a/thirdparty/graphite/src/Silf.cpp b/thirdparty/graphite/src/Silf.cpp new file mode 100644 index 0000000000..44d3c96171 --- /dev/null +++ b/thirdparty/graphite/src/Silf.cpp @@ -0,0 +1,439 @@ +/* GRAPHITE2 LICENSING + + Copyright 2010, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ +#include <cstdlib> +#include "graphite2/Segment.h" +#include "inc/debug.h" +#include "inc/Endian.h" +#include "inc/Silf.h" +#include "inc/Segment.h" +#include "inc/Rule.h" +#include "inc/Error.h" + + +using namespace graphite2; + +namespace { static const uint32 ERROROFFSET = 0xFFFFFFFF; } + +Silf::Silf() throw() +: m_passes(0), + m_pseudos(0), + m_classOffsets(0), + m_classData(0), + m_justs(0), + m_numPasses(0), + m_numJusts(0), + m_sPass(0), + m_pPass(0), + m_jPass(0), + m_bPass(0), + m_flags(0), + m_dir(0), + m_aPseudo(0), + m_aBreak(0), + m_aUser(0), + m_aBidi(0), + m_aMirror(0), + m_aPassBits(0), + m_iMaxComp(0), + m_aCollision(0), + m_aLig(0), + m_numPseudo(0), + m_nClass(0), + m_nLinear(0), + m_gEndLine(0) +{ + memset(&m_silfinfo, 0, sizeof m_silfinfo); +} + +Silf::~Silf() throw() +{ + releaseBuffers(); +} + +void Silf::releaseBuffers() throw() +{ + delete [] m_passes; + delete [] m_pseudos; + free(m_classOffsets); + free(m_classData); + free(m_justs); + m_passes= 0; + m_pseudos = 0; + m_classOffsets = 0; + m_classData = 0; + m_justs = 0; +} + + +bool Silf::readGraphite(const byte * const silf_start, size_t lSilf, Face& face, uint32 version) +{ + const byte * p = silf_start, + * const silf_end = p + lSilf; + Error e; + + if (e.test(version >= 0x00060000, E_BADSILFVERSION)) + { + releaseBuffers(); return face.error(e); + } + if (version >= 0x00030000) + { + if (e.test(lSilf < 28, E_BADSIZE)) { releaseBuffers(); return face.error(e); } + be::skip<int32>(p); // ruleVersion + be::skip<uint16>(p,2); // passOffset & pseudosOffset + } + else if (e.test(lSilf < 20, E_BADSIZE)) { releaseBuffers(); return face.error(e); } + const uint16 maxGlyph = be::read<uint16>(p); + m_silfinfo.extra_ascent = be::read<uint16>(p); + m_silfinfo.extra_descent = be::read<uint16>(p); + m_numPasses = be::read<uint8>(p); + m_sPass = be::read<uint8>(p); + m_pPass = be::read<uint8>(p); + m_jPass = be::read<uint8>(p); + m_bPass = be::read<uint8>(p); + m_flags = be::read<uint8>(p); + be::skip<uint8>(p,2); // max{Pre,Post}Context. + m_aPseudo = be::read<uint8>(p); + m_aBreak = be::read<uint8>(p); + m_aBidi = be::read<uint8>(p); + m_aMirror = be::read<uint8>(p); + m_aPassBits = be::read<uint8>(p); + + // Read Justification levels. + m_numJusts = be::read<uint8>(p); + if (e.test(maxGlyph >= face.glyphs().numGlyphs(), E_BADMAXGLYPH) + || e.test(p + m_numJusts * 8 >= silf_end, E_BADNUMJUSTS)) + { + releaseBuffers(); return face.error(e); + } + + if (m_numJusts) + { + m_justs = gralloc<Justinfo>(m_numJusts); + if (e.test(!m_justs, E_OUTOFMEM)) return face.error(e); + + for (uint8 i = 0; i < m_numJusts; i++) + { + ::new(m_justs + i) Justinfo(p[0], p[1], p[2], p[3]); + be::skip<byte>(p,8); + } + } + + if (e.test(p + sizeof(uint16) + sizeof(uint8)*8 >= silf_end, E_BADENDJUSTS)) { releaseBuffers(); return face.error(e); } + m_aLig = be::read<uint16>(p); + m_aUser = be::read<uint8>(p); + m_iMaxComp = be::read<uint8>(p); + m_dir = be::read<uint8>(p) - 1; + m_aCollision = be::read<uint8>(p); + be::skip<byte>(p,3); + be::skip<uint16>(p, be::read<uint8>(p)); // don't need critical features yet + be::skip<byte>(p); // reserved + if (e.test(p >= silf_end, E_BADCRITFEATURES)) { releaseBuffers(); return face.error(e); } + be::skip<uint32>(p, be::read<uint8>(p)); // don't use scriptTag array. + if (e.test(p + sizeof(uint16) + sizeof(uint32) >= silf_end, E_BADSCRIPTTAGS)) { releaseBuffers(); return face.error(e); } + m_gEndLine = be::read<uint16>(p); // lbGID + const byte * o_passes = p; + uint32 passes_start = be::read<uint32>(p); + + const size_t num_attrs = face.glyphs().numAttrs(); + if (e.test(m_aPseudo >= num_attrs, E_BADAPSEUDO) + || e.test(m_aBreak >= num_attrs, E_BADABREAK) + || e.test(m_aBidi >= num_attrs, E_BADABIDI) + || e.test(m_aMirror>= num_attrs, E_BADAMIRROR) + || e.test(m_aCollision && m_aCollision >= num_attrs - 5, E_BADACOLLISION) + || e.test(m_numPasses > 128, E_BADNUMPASSES) || e.test(passes_start >= lSilf, E_BADPASSESSTART) + || e.test(m_pPass < m_sPass, E_BADPASSBOUND) || e.test(m_pPass > m_numPasses, E_BADPPASS) || e.test(m_sPass > m_numPasses, E_BADSPASS) + || e.test(m_jPass < m_pPass, E_BADJPASSBOUND) || e.test(m_jPass > m_numPasses, E_BADJPASS) + || e.test((m_bPass != 0xFF && (m_bPass < m_jPass || m_bPass > m_numPasses)), E_BADBPASS) + || e.test(m_aLig > 127, E_BADALIG)) + { + releaseBuffers(); + return face.error(e); + } + be::skip<uint32>(p, m_numPasses); + if (e.test(unsigned(p - silf_start) + sizeof(uint16) >= passes_start, E_BADPASSESSTART)) { releaseBuffers(); return face.error(e); } + m_numPseudo = be::read<uint16>(p); + be::skip<uint16>(p, 3); // searchPseudo, pseudoSelector, pseudoShift + m_pseudos = new Pseudo[m_numPseudo]; + if (e.test(unsigned(p - silf_start) + m_numPseudo*(sizeof(uint32) + sizeof(uint16)) >= passes_start, E_BADNUMPSEUDO) + || e.test(!m_pseudos, E_OUTOFMEM)) + { + releaseBuffers(); return face.error(e); + } + for (int i = 0; i < m_numPseudo; i++) + { + m_pseudos[i].uid = be::read<uint32>(p); + m_pseudos[i].gid = be::read<uint16>(p); + } + + const size_t clen = readClassMap(p, passes_start + silf_start - p, version, e); + m_passes = new Pass[m_numPasses]; + if (e || e.test(clen > unsigned(passes_start + silf_start - p), E_BADPASSESSTART) + || e.test(!m_passes, E_OUTOFMEM)) + { releaseBuffers(); return face.error(e); } + + for (size_t i = 0; i < m_numPasses; ++i) + { + uint32 pass_start = be::read<uint32>(o_passes); + uint32 pass_end = be::peek<uint32>(o_passes); + face.error_context((face.error_context() & 0xFF00) + EC_ASILF + unsigned(i << 16)); + if (e.test(pass_start > pass_end, E_BADPASSSTART) + || e.test(pass_start < passes_start, E_BADPASSSTART) + || e.test(pass_end > lSilf, E_BADPASSEND)) { + releaseBuffers(); return face.error(e); + } + + enum passtype pt = PASS_TYPE_UNKNOWN; + if (i >= m_jPass) pt = PASS_TYPE_JUSTIFICATION; + else if (i >= m_pPass) pt = PASS_TYPE_POSITIONING; + else if (i >= m_sPass) pt = PASS_TYPE_SUBSTITUTE; + else pt = PASS_TYPE_LINEBREAK; + + m_passes[i].init(this); + if (!m_passes[i].readPass(silf_start + pass_start, pass_end - pass_start, pass_start, face, pt, + version, e)) + { + releaseBuffers(); + return false; + } + } + + // fill in gr_faceinfo + m_silfinfo.upem = face.glyphs().unitsPerEm(); + m_silfinfo.has_bidi_pass = (m_bPass != 0xFF); + m_silfinfo.justifies = (m_numJusts != 0) || (m_jPass < m_pPass); + m_silfinfo.line_ends = (m_flags & 1); + m_silfinfo.space_contextuals = gr_faceinfo::gr_space_contextuals((m_flags >> 2) & 0x7); + return true; +} + +template<typename T> inline uint32 Silf::readClassOffsets(const byte *&p, size_t data_len, Error &e) +{ + const T cls_off = 2*sizeof(uint16) + sizeof(T)*(m_nClass+1); + const uint32 max_off = (be::peek<T>(p + sizeof(T)*m_nClass) - cls_off)/sizeof(uint16); + // Check that the last+1 offset is less than or equal to the class map length. + if (e.test(be::peek<T>(p) != cls_off, E_MISALIGNEDCLASSES) + || e.test(max_off > (data_len - cls_off)/sizeof(uint16), E_HIGHCLASSOFFSET)) + return ERROROFFSET; + + // Read in all the offsets. + m_classOffsets = gralloc<uint32>(m_nClass+1); + if (e.test(!m_classOffsets, E_OUTOFMEM)) return ERROROFFSET; + for (uint32 * o = m_classOffsets, * const o_end = o + m_nClass + 1; o != o_end; ++o) + { + *o = (be::read<T>(p) - cls_off)/sizeof(uint16); + if (e.test(*o > max_off, E_HIGHCLASSOFFSET)) + return ERROROFFSET; + } + return max_off; +} + +size_t Silf::readClassMap(const byte *p, size_t data_len, uint32 version, Error &e) +{ + if (e.test(data_len < sizeof(uint16)*2, E_BADCLASSSIZE)) return ERROROFFSET; + + m_nClass = be::read<uint16>(p); + m_nLinear = be::read<uint16>(p); + + // Check that numLinear < numClass, + // that there is at least enough data for numClasses offsets. + if (e.test(m_nLinear > m_nClass, E_TOOMANYLINEAR) + || e.test((m_nClass + 1) * (version >= 0x00040000 ? sizeof(uint32) : sizeof(uint16)) > (data_len - 4), E_CLASSESTOOBIG)) + return ERROROFFSET; + + uint32 max_off; + if (version >= 0x00040000) + max_off = readClassOffsets<uint32>(p, data_len, e); + else + max_off = readClassOffsets<uint16>(p, data_len, e); + + if (max_off == ERROROFFSET) return ERROROFFSET; + + if (e.test((int)max_off < m_nLinear + (m_nClass - m_nLinear) * 6, E_CLASSESTOOBIG)) + return ERROROFFSET; + + // Check the linear offsets are sane, these must be monotonically increasing. + assert(m_nClass >= m_nLinear); + for (const uint32 *o = m_classOffsets, * const o_end = o + m_nLinear; o != o_end; ++o) + if (e.test(o[0] > o[1], E_BADCLASSOFFSET)) + return ERROROFFSET; + + // Fortunately the class data is all uint16s so we can decode these now + m_classData = gralloc<uint16>(max_off); + if (e.test(!m_classData, E_OUTOFMEM)) return ERROROFFSET; + for (uint16 *d = m_classData, * const d_end = d + max_off; d != d_end; ++d) + *d = be::read<uint16>(p); + + // Check the lookup class invariants for each non-linear class + for (const uint32 *o = m_classOffsets + m_nLinear, * const o_end = m_classOffsets + m_nClass; o != o_end; ++o) + { + const uint16 * lookup = m_classData + *o; + if (e.test(*o + 4 > max_off, E_HIGHCLASSOFFSET) // LookupClass doesn't stretch over max_off + || e.test(lookup[0] == 0 // A LookupClass with no looks is a suspicious thing ... + || lookup[0] * 2 + *o + 4 > max_off // numIDs lookup pairs fits within (start of LookupClass' lookups array, max_off] + || lookup[3] + lookup[1] != lookup[0], E_BADCLASSLOOKUPINFO) // rangeShift: numIDs - searchRange + || e.test(((o[1] - *o) & 1) != 0, ERROROFFSET)) // glyphs are in pairs so difference must be even. + return ERROROFFSET; + } + + return max_off; +} + +uint16 Silf::findPseudo(uint32 uid) const +{ + for (int i = 0; i < m_numPseudo; i++) + if (m_pseudos[i].uid == uid) return m_pseudos[i].gid; + return 0; +} + +uint16 Silf::findClassIndex(uint16 cid, uint16 gid) const +{ + if (cid > m_nClass) return -1; + + const uint16 * cls = m_classData + m_classOffsets[cid]; + if (cid < m_nLinear) // output class being used for input, shouldn't happen + { + for (unsigned int i = 0, n = m_classOffsets[cid + 1] - m_classOffsets[cid]; i < n; ++i, ++cls) + if (*cls == gid) return i; + return -1; + } + else + { + const uint16 * min = cls + 4, // lookups array + * max = min + cls[0]*2; // lookups aray is numIDs (cls[0]) uint16 pairs long + do + { + const uint16 * p = min + (-2 & ((max-min)/2)); + if (p[0] > gid) max = p; + else min = p; + } + while (max - min > 2); + return min[0] == gid ? min[1] : -1; + } +} + +uint16 Silf::getClassGlyph(uint16 cid, unsigned int index) const +{ + if (cid > m_nClass) return 0; + + uint32 loc = m_classOffsets[cid]; + if (cid < m_nLinear) + { + if (index < m_classOffsets[cid + 1] - loc) + return m_classData[index + loc]; + } + else // input class being used for output. Shouldn't happen + { + for (unsigned int i = loc + 4; i < m_classOffsets[cid + 1]; i += 2) + if (m_classData[i + 1] == index) return m_classData[i]; + } + return 0; +} + + +bool Silf::runGraphite(Segment *seg, uint8 firstPass, uint8 lastPass, int dobidi) const +{ + assert(seg != 0); + size_t maxSize = seg->slotCount() * MAX_SEG_GROWTH_FACTOR; + SlotMap map(*seg, m_dir, maxSize); + FiniteStateMachine fsm(map, seg->getFace()->logger()); + vm::Machine m(map); + uint8 lbidi = m_bPass; +#if !defined GRAPHITE2_NTRACING + json * const dbgout = seg->getFace()->logger(); +#endif + + if (lastPass == 0) + { + if (firstPass == lastPass && lbidi == 0xFF) + return true; + lastPass = m_numPasses; + } + if ((firstPass < lbidi || (dobidi && firstPass == lbidi)) && (lastPass >= lbidi || (dobidi && lastPass + 1 == lbidi))) + lastPass++; + else + lbidi = 0xFF; + + for (size_t i = firstPass; i < lastPass; ++i) + { + // bidi and mirroring + if (i == lbidi) + { +#if !defined GRAPHITE2_NTRACING + if (dbgout) + { + *dbgout << json::item << json::object +// << "pindex" << i // for debugging + << "id" << -1 + << "slotsdir" << (seg->currdir() ? "rtl" : "ltr") + << "passdir" << (m_dir & 1 ? "rtl" : "ltr") + << "slots" << json::array; + seg->positionSlots(0, 0, 0, seg->currdir()); + for(Slot * s = seg->first(); s; s = s->next()) + *dbgout << dslot(seg, s); + *dbgout << json::close + << "rules" << json::array << json::close + << json::close; + } +#endif + if (seg->currdir() != (m_dir & 1)) + seg->reverseSlots(); + if (m_aMirror && (seg->dir() & 3) == 3) + seg->doMirror(m_aMirror); + --i; + lbidi = lastPass; + --lastPass; + continue; + } + +#if !defined GRAPHITE2_NTRACING + if (dbgout) + { + *dbgout << json::item << json::object +// << "pindex" << i // for debugging + << "id" << i+1 + << "slotsdir" << (seg->currdir() ? "rtl" : "ltr") + << "passdir" << ((m_dir & 1) ^ m_passes[i].reverseDir() ? "rtl" : "ltr") + << "slots" << json::array; + seg->positionSlots(0, 0, 0, seg->currdir()); + for(Slot * s = seg->first(); s; s = s->next()) + *dbgout << dslot(seg, s); + *dbgout << json::close; + } +#endif + + // test whether to reorder, prepare for positioning + bool reverse = (lbidi == 0xFF) && (seg->currdir() != ((m_dir & 1) ^ m_passes[i].reverseDir())); + if ((i >= 32 || (seg->passBits() & (1 << i)) == 0 || m_passes[i].collisionLoops()) + && !m_passes[i].runGraphite(m, fsm, reverse)) + return false; + // only subsitution passes can change segment length, cached subsegments are short for their text + if (m.status() != vm::Machine::finished + || (seg->slotCount() && seg->slotCount() > maxSize)) + return false; + } + return true; +} diff --git a/thirdparty/graphite/src/Slot.cpp b/thirdparty/graphite/src/Slot.cpp new file mode 100644 index 0000000000..0fdb098952 --- /dev/null +++ b/thirdparty/graphite/src/Slot.cpp @@ -0,0 +1,529 @@ +/* GRAPHITE2 LICENSING + + Copyright 2010, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ +#include "inc/Segment.h" +#include "inc/Slot.h" +#include "inc/Silf.h" +#include "inc/CharInfo.h" +#include "inc/Rule.h" +#include "inc/Collider.h" + + +using namespace graphite2; + +Slot::Slot(int16 *user_attrs) : + m_next(NULL), m_prev(NULL), + m_glyphid(0), m_realglyphid(0), m_original(0), m_before(0), m_after(0), + m_index(0), m_parent(NULL), m_child(NULL), m_sibling(NULL), + m_position(0, 0), m_shift(0, 0), m_advance(0, 0), + m_attach(0, 0), m_with(0, 0), m_just(0.), + m_flags(0), m_attLevel(0), m_bidiCls(-1), m_bidiLevel(0), + m_userAttr(user_attrs), m_justs(NULL) +{ +} + +// take care, this does not copy any of the GrSlot pointer fields +void Slot::set(const Slot & orig, int charOffset, size_t sizeAttr, size_t justLevels, size_t numChars) +{ + // leave m_next and m_prev unchanged + m_glyphid = orig.m_glyphid; + m_realglyphid = orig.m_realglyphid; + m_original = orig.m_original + charOffset; + if (charOffset + int(orig.m_before) < 0) + m_before = 0; + else + m_before = orig.m_before + charOffset; + if (charOffset <= 0 && orig.m_after + charOffset >= numChars) + m_after = int(numChars) - 1; + else + m_after = orig.m_after + charOffset; + m_parent = NULL; + m_child = NULL; + m_sibling = NULL; + m_position = orig.m_position; + m_shift = orig.m_shift; + m_advance = orig.m_advance; + m_attach = orig.m_attach; + m_with = orig.m_with; + m_flags = orig.m_flags; + m_attLevel = orig.m_attLevel; + m_bidiCls = orig.m_bidiCls; + m_bidiLevel = orig.m_bidiLevel; + if (m_userAttr && orig.m_userAttr) + memcpy(m_userAttr, orig.m_userAttr, sizeAttr * sizeof(*m_userAttr)); + if (m_justs && orig.m_justs) + memcpy(m_justs, orig.m_justs, SlotJustify::size_of(justLevels)); +} + +void Slot::update(int /*numGrSlots*/, int numCharInfo, Position &relpos) +{ + m_before += numCharInfo; + m_after += numCharInfo; + m_position = m_position + relpos; +} + +Position Slot::finalise(const Segment *seg, const Font *font, Position & base, Rect & bbox, uint8 attrLevel, float & clusterMin, bool rtl, bool isFinal, int depth) +{ + SlotCollision *coll = NULL; + if (depth > 100 || (attrLevel && m_attLevel > attrLevel)) return Position(0, 0); + float scale = font ? font->scale() : 1.0f; + Position shift(m_shift.x * (rtl * -2 + 1) + m_just, m_shift.y); + float tAdvance = m_advance.x + m_just; + if (isFinal && (coll = seg->collisionInfo(this))) + { + const Position &collshift = coll->offset(); + if (!(coll->flags() & SlotCollision::COLL_KERN) || rtl) + shift = shift + collshift; + } + const GlyphFace * glyphFace = seg->getFace()->glyphs().glyphSafe(glyph()); + if (font) + { + scale = font->scale(); + shift *= scale; + if (font->isHinted() && glyphFace) + tAdvance = (m_advance.x - glyphFace->theAdvance().x + m_just) * scale + font->advance(glyph()); + else + tAdvance *= scale; + } + Position res; + + m_position = base + shift; + if (!m_parent) + { + res = base + Position(tAdvance, m_advance.y * scale); + clusterMin = m_position.x; + } + else + { + float tAdv; + m_position += (m_attach - m_with) * scale; + tAdv = m_advance.x >= 0.5f ? m_position.x + tAdvance - shift.x : 0.f; + res = Position(tAdv, 0); + if ((m_advance.x >= 0.5f || m_position.x < 0) && m_position.x < clusterMin) clusterMin = m_position.x; + } + + if (glyphFace) + { + Rect ourBbox = glyphFace->theBBox() * scale + m_position; + bbox = bbox.widen(ourBbox); + } + + if (m_child && m_child != this && m_child->attachedTo() == this) + { + Position tRes = m_child->finalise(seg, font, m_position, bbox, attrLevel, clusterMin, rtl, isFinal, depth + 1); + if ((!m_parent || m_advance.x >= 0.5f) && tRes.x > res.x) res = tRes; + } + + if (m_parent && m_sibling && m_sibling != this && m_sibling->attachedTo() == m_parent) + { + Position tRes = m_sibling->finalise(seg, font, base, bbox, attrLevel, clusterMin, rtl, isFinal, depth + 1); + if (tRes.x > res.x) res = tRes; + } + + if (!m_parent && clusterMin < base.x) + { + Position adj = Position(m_position.x - clusterMin, 0.); + res += adj; + m_position += adj; + if (m_child) m_child->floodShift(adj); + } + return res; +} + +int32 Slot::clusterMetric(const Segment *seg, uint8 metric, uint8 attrLevel, bool rtl) +{ + Position base; + if (glyph() >= seg->getFace()->glyphs().numGlyphs()) + return 0; + Rect bbox = seg->theGlyphBBoxTemporary(glyph()); + float clusterMin = 0.; + Position res = finalise(seg, NULL, base, bbox, attrLevel, clusterMin, rtl, false); + + switch (metrics(metric)) + { + case kgmetLsb : + return int32(bbox.bl.x); + case kgmetRsb : + return int32(res.x - bbox.tr.x); + case kgmetBbTop : + return int32(bbox.tr.y); + case kgmetBbBottom : + return int32(bbox.bl.y); + case kgmetBbLeft : + return int32(bbox.bl.x); + case kgmetBbRight : + return int32(bbox.tr.x); + case kgmetBbWidth : + return int32(bbox.tr.x - bbox.bl.x); + case kgmetBbHeight : + return int32(bbox.tr.y - bbox.bl.y); + case kgmetAdvWidth : + return int32(res.x); + case kgmetAdvHeight : + return int32(res.y); + default : + return 0; + } +} + +#define SLOTGETCOLATTR(x) { SlotCollision *c = seg->collisionInfo(this); return c ? int(c-> x) : 0; } + +int Slot::getAttr(const Segment *seg, attrCode ind, uint8 subindex) const +{ + if (ind >= gr_slatJStretch && ind < gr_slatJStretch + 20 && ind != gr_slatJWidth) + { + int indx = ind - gr_slatJStretch; + return getJustify(seg, indx / 5, indx % 5); + } + + switch (ind) + { + case gr_slatAdvX : return int(m_advance.x); + case gr_slatAdvY : return int(m_advance.y); + case gr_slatAttTo : return m_parent ? 1 : 0; + case gr_slatAttX : return int(m_attach.x); + case gr_slatAttY : return int(m_attach.y); + case gr_slatAttXOff : + case gr_slatAttYOff : return 0; + case gr_slatAttWithX : return int(m_with.x); + case gr_slatAttWithY : return int(m_with.y); + case gr_slatAttWithXOff: + case gr_slatAttWithYOff:return 0; + case gr_slatAttLevel : return m_attLevel; + case gr_slatBreak : return seg->charinfo(m_original)->breakWeight(); + case gr_slatCompRef : return 0; + case gr_slatDir : return seg->dir() & 1; + case gr_slatInsert : return isInsertBefore(); + case gr_slatPosX : return int(m_position.x); // but need to calculate it + case gr_slatPosY : return int(m_position.y); + case gr_slatShiftX : return int(m_shift.x); + case gr_slatShiftY : return int(m_shift.y); + case gr_slatMeasureSol: return -1; // err what's this? + case gr_slatMeasureEol: return -1; + case gr_slatJWidth: return int(m_just); + case gr_slatUserDefnV1: subindex = 0; GR_FALLTHROUGH; + // no break + case gr_slatUserDefn : return subindex < seg->numAttrs() ? m_userAttr[subindex] : 0; + case gr_slatSegSplit : return seg->charinfo(m_original)->flags() & 3; + case gr_slatBidiLevel: return m_bidiLevel; + case gr_slatColFlags : { SlotCollision *c = seg->collisionInfo(this); return c ? c->flags() : 0; } + case gr_slatColLimitblx:SLOTGETCOLATTR(limit().bl.x) + case gr_slatColLimitbly:SLOTGETCOLATTR(limit().bl.y) + case gr_slatColLimittrx:SLOTGETCOLATTR(limit().tr.x) + case gr_slatColLimittry:SLOTGETCOLATTR(limit().tr.y) + case gr_slatColShiftx : SLOTGETCOLATTR(offset().x) + case gr_slatColShifty : SLOTGETCOLATTR(offset().y) + case gr_slatColMargin : SLOTGETCOLATTR(margin()) + case gr_slatColMarginWt:SLOTGETCOLATTR(marginWt()) + case gr_slatColExclGlyph:SLOTGETCOLATTR(exclGlyph()) + case gr_slatColExclOffx:SLOTGETCOLATTR(exclOffset().x) + case gr_slatColExclOffy:SLOTGETCOLATTR(exclOffset().y) + case gr_slatSeqClass : SLOTGETCOLATTR(seqClass()) + case gr_slatSeqProxClass:SLOTGETCOLATTR(seqProxClass()) + case gr_slatSeqOrder : SLOTGETCOLATTR(seqOrder()) + case gr_slatSeqAboveXoff:SLOTGETCOLATTR(seqAboveXoff()) + case gr_slatSeqAboveWt: SLOTGETCOLATTR(seqAboveWt()) + case gr_slatSeqBelowXlim:SLOTGETCOLATTR(seqBelowXlim()) + case gr_slatSeqBelowWt: SLOTGETCOLATTR(seqBelowWt()) + case gr_slatSeqValignHt:SLOTGETCOLATTR(seqValignHt()) + case gr_slatSeqValignWt:SLOTGETCOLATTR(seqValignWt()) + default : return 0; + } +} + +#define SLOTCOLSETATTR(x) { \ + SlotCollision *c = seg->collisionInfo(this); \ + if (c) { c-> x ; c->setFlags(c->flags() & ~SlotCollision::COLL_KNOWN); } \ + break; } +#define SLOTCOLSETCOMPLEXATTR(t, y, x) { \ + SlotCollision *c = seg->collisionInfo(this); \ + if (c) { \ + const t &s = c-> y; \ + c-> x ; c->setFlags(c->flags() & ~SlotCollision::COLL_KNOWN); } \ + break; } + +void Slot::setAttr(Segment *seg, attrCode ind, uint8 subindex, int16 value, const SlotMap & map) +{ + if (ind == gr_slatUserDefnV1) + { + ind = gr_slatUserDefn; + subindex = 0; + if (seg->numAttrs() == 0) + return; + } + else if (ind >= gr_slatJStretch && ind < gr_slatJStretch + 20 && ind != gr_slatJWidth) + { + int indx = ind - gr_slatJStretch; + return setJustify(seg, indx / 5, indx % 5, value); + } + + switch (ind) + { + case gr_slatAdvX : m_advance.x = value; break; + case gr_slatAdvY : m_advance.y = value; break; + case gr_slatAttTo : + { + const uint16 idx = uint16(value); + if (idx < map.size() && map[idx]) + { + Slot *other = map[idx]; + if (other == this || other == m_parent || other->isCopied()) break; + if (m_parent) { m_parent->removeChild(this); attachTo(NULL); } + Slot *pOther = other; + int count = 0; + bool foundOther = false; + while (pOther) + { + ++count; + if (pOther == this) foundOther = true; + pOther = pOther->attachedTo(); + } + for (pOther = m_child; pOther; pOther = pOther->m_child) + ++count; + for (pOther = m_sibling; pOther; pOther = pOther->m_sibling) + ++count; + if (count < 100 && !foundOther && other->child(this)) + { + attachTo(other); + if ((map.dir() != 0) ^ (idx > subindex)) + m_with = Position(advance(), 0); + else // normal match to previous root + m_attach = Position(other->advance(), 0); + } + } + break; + } + case gr_slatAttX : m_attach.x = value; break; + case gr_slatAttY : m_attach.y = value; break; + case gr_slatAttXOff : + case gr_slatAttYOff : break; + case gr_slatAttWithX : m_with.x = value; break; + case gr_slatAttWithY : m_with.y = value; break; + case gr_slatAttWithXOff : + case gr_slatAttWithYOff : break; + case gr_slatAttLevel : + m_attLevel = byte(value); + break; + case gr_slatBreak : + seg->charinfo(m_original)->breakWeight(value); + break; + case gr_slatCompRef : break; // not sure what to do here + case gr_slatDir : break; + case gr_slatInsert : + markInsertBefore(value? true : false); + break; + case gr_slatPosX : break; // can't set these here + case gr_slatPosY : break; + case gr_slatShiftX : m_shift.x = value; break; + case gr_slatShiftY : m_shift.y = value; break; + case gr_slatMeasureSol : break; + case gr_slatMeasureEol : break; + case gr_slatJWidth : just(value); break; + case gr_slatSegSplit : seg->charinfo(m_original)->addflags(value & 3); break; + case gr_slatUserDefn : m_userAttr[subindex] = value; break; + case gr_slatColFlags : { + SlotCollision *c = seg->collisionInfo(this); + if (c) + c->setFlags(value); + break; } + case gr_slatColLimitblx : SLOTCOLSETCOMPLEXATTR(Rect, limit(), setLimit(Rect(Position(value, s.bl.y), s.tr))) + case gr_slatColLimitbly : SLOTCOLSETCOMPLEXATTR(Rect, limit(), setLimit(Rect(Position(s.bl.x, value), s.tr))) + case gr_slatColLimittrx : SLOTCOLSETCOMPLEXATTR(Rect, limit(), setLimit(Rect(s.bl, Position(value, s.tr.y)))) + case gr_slatColLimittry : SLOTCOLSETCOMPLEXATTR(Rect, limit(), setLimit(Rect(s.bl, Position(s.tr.x, value)))) + case gr_slatColMargin : SLOTCOLSETATTR(setMargin(value)) + case gr_slatColMarginWt : SLOTCOLSETATTR(setMarginWt(value)) + case gr_slatColExclGlyph : SLOTCOLSETATTR(setExclGlyph(value)) + case gr_slatColExclOffx : SLOTCOLSETCOMPLEXATTR(Position, exclOffset(), setExclOffset(Position(value, s.y))) + case gr_slatColExclOffy : SLOTCOLSETCOMPLEXATTR(Position, exclOffset(), setExclOffset(Position(s.x, value))) + case gr_slatSeqClass : SLOTCOLSETATTR(setSeqClass(value)) + case gr_slatSeqProxClass : SLOTCOLSETATTR(setSeqProxClass(value)) + case gr_slatSeqOrder : SLOTCOLSETATTR(setSeqOrder(value)) + case gr_slatSeqAboveXoff : SLOTCOLSETATTR(setSeqAboveXoff(value)) + case gr_slatSeqAboveWt : SLOTCOLSETATTR(setSeqAboveWt(value)) + case gr_slatSeqBelowXlim : SLOTCOLSETATTR(setSeqBelowXlim(value)) + case gr_slatSeqBelowWt : SLOTCOLSETATTR(setSeqBelowWt(value)) + case gr_slatSeqValignHt : SLOTCOLSETATTR(setSeqValignHt(value)) + case gr_slatSeqValignWt : SLOTCOLSETATTR(setSeqValignWt(value)) + default : + break; + } +} + +int Slot::getJustify(const Segment *seg, uint8 level, uint8 subindex) const +{ + if (level && level >= seg->silf()->numJustLevels()) return 0; + + if (m_justs) + return m_justs->values[level * SlotJustify::NUMJUSTPARAMS + subindex]; + + if (level >= seg->silf()->numJustLevels()) return 0; + Justinfo *jAttrs = seg->silf()->justAttrs() + level; + + switch (subindex) { + case 0 : return seg->glyphAttr(gid(), jAttrs->attrStretch()); + case 1 : return seg->glyphAttr(gid(), jAttrs->attrShrink()); + case 2 : return seg->glyphAttr(gid(), jAttrs->attrStep()); + case 3 : return seg->glyphAttr(gid(), jAttrs->attrWeight()); + case 4 : return 0; // not been set yet, so clearly 0 + default: return 0; + } +} + +void Slot::setJustify(Segment *seg, uint8 level, uint8 subindex, int16 value) +{ + if (level && level >= seg->silf()->numJustLevels()) return; + if (!m_justs) + { + SlotJustify *j = seg->newJustify(); + if (!j) return; + j->LoadSlot(this, seg); + m_justs = j; + } + m_justs->values[level * SlotJustify::NUMJUSTPARAMS + subindex] = value; +} + +bool Slot::child(Slot *ap) +{ + if (this == ap) return false; + else if (ap == m_child) return true; + else if (!m_child) + m_child = ap; + else + return m_child->sibling(ap); + return true; +} + +bool Slot::sibling(Slot *ap) +{ + if (this == ap) return false; + else if (ap == m_sibling) return true; + else if (!m_sibling || !ap) + m_sibling = ap; + else + return m_sibling->sibling(ap); + return true; +} + +bool Slot::removeChild(Slot *ap) +{ + if (this == ap || !m_child || !ap) return false; + else if (ap == m_child) + { + Slot *nSibling = m_child->nextSibling(); + m_child->nextSibling(NULL); + m_child = nSibling; + return true; + } + for (Slot *p = m_child; p; p = p->m_sibling) + { + if (p->m_sibling && p->m_sibling == ap) + { + p->m_sibling = p->m_sibling->m_sibling; + ap->nextSibling(NULL); + return true; + } + } + return false; +} + +void Slot::setGlyph(Segment *seg, uint16 glyphid, const GlyphFace * theGlyph) +{ + m_glyphid = glyphid; + m_bidiCls = -1; + if (!theGlyph) + { + theGlyph = seg->getFace()->glyphs().glyphSafe(glyphid); + if (!theGlyph) + { + m_realglyphid = 0; + m_advance = Position(0.,0.); + return; + } + } + m_realglyphid = theGlyph->attrs()[seg->silf()->aPseudo()]; + if (m_realglyphid > seg->getFace()->glyphs().numGlyphs()) + m_realglyphid = 0; + const GlyphFace *aGlyph = theGlyph; + if (m_realglyphid) + { + aGlyph = seg->getFace()->glyphs().glyphSafe(m_realglyphid); + if (!aGlyph) aGlyph = theGlyph; + } + m_advance = Position(aGlyph->theAdvance().x, 0.); + if (seg->silf()->aPassBits()) + { + seg->mergePassBits(uint8(theGlyph->attrs()[seg->silf()->aPassBits()])); + if (seg->silf()->numPasses() > 16) + seg->mergePassBits(theGlyph->attrs()[seg->silf()->aPassBits()+1] << 16); + } +} + +void Slot::floodShift(Position adj, int depth) +{ + if (depth > 100) + return; + m_position += adj; + if (m_child) m_child->floodShift(adj, depth + 1); + if (m_sibling) m_sibling->floodShift(adj, depth + 1); +} + +void SlotJustify::LoadSlot(const Slot *s, const Segment *seg) +{ + for (int i = seg->silf()->numJustLevels() - 1; i >= 0; --i) + { + Justinfo *justs = seg->silf()->justAttrs() + i; + int16 *v = values + i * NUMJUSTPARAMS; + v[0] = seg->glyphAttr(s->gid(), justs->attrStretch()); + v[1] = seg->glyphAttr(s->gid(), justs->attrShrink()); + v[2] = seg->glyphAttr(s->gid(), justs->attrStep()); + v[3] = seg->glyphAttr(s->gid(), justs->attrWeight()); + } +} + +Slot * Slot::nextInCluster(const Slot *s) const +{ + Slot *base; + if (s->firstChild()) + return s->firstChild(); + else if (s->nextSibling()) + return s->nextSibling(); + while ((base = s->attachedTo())) + { + // if (base->firstChild() == s && base->nextSibling()) + if (base->nextSibling()) + return base->nextSibling(); + s = base; + } + return NULL; +} + +bool Slot::isChildOf(const Slot *base) const +{ + for (Slot *p = m_parent; p; p = p->m_parent) + if (p == base) + return true; + return false; +} diff --git a/thirdparty/graphite/src/Sparse.cpp b/thirdparty/graphite/src/Sparse.cpp new file mode 100644 index 0000000000..aa43113669 --- /dev/null +++ b/thirdparty/graphite/src/Sparse.cpp @@ -0,0 +1,62 @@ +/* GRAPHITE2 LICENSING + + Copyright 2011, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ +#include <cassert> +#include "inc/Sparse.h" +#include "inc/bits.h" + +using namespace graphite2; + +const sparse::chunk sparse::empty_chunk = {0,0}; + +sparse::~sparse() throw() +{ + if (m_array.map == &empty_chunk) return; + free(m_array.values); +} + + +sparse::mapped_type sparse::operator [] (const key_type k) const throw() +{ + mapped_type g = key_type(k/SIZEOF_CHUNK - m_nchunks) >> (sizeof k*8 - 1); + const chunk & c = m_array.map[g*k/SIZEOF_CHUNK]; + const mask_t m = c.mask >> (SIZEOF_CHUNK - 1 - (k%SIZEOF_CHUNK)); + g *= m & 1; + + return g*m_array.values[g*(c.offset + bit_set_count(m >> 1))]; +} + + +size_t sparse::capacity() const throw() +{ + size_t n = m_nchunks, + s = 0; + + for (const chunk *ci=m_array.map; n; --n, ++ci) + s += bit_set_count(ci->mask); + + return s; +} diff --git a/thirdparty/graphite/src/TtfUtil.cpp b/thirdparty/graphite/src/TtfUtil.cpp new file mode 100644 index 0000000000..2eb46a11fb --- /dev/null +++ b/thirdparty/graphite/src/TtfUtil.cpp @@ -0,0 +1,2053 @@ +/* GRAPHITE2 LICENSING + + Copyright 2010, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ +/*--------------------------------------------------------------------*//*:Ignore this sentence. + +File: TtfUtil.cpp +Responsibility: Alan Ward +Last reviewed: Not yet. + +Description + Implements the methods for TtfUtil class. This file should remain portable to any C++ + environment by only using standard C++ and the TTF structurs defined in Tt.h. +-------------------------------------------------------------------------------*//*:End Ignore*/ + + +/*********************************************************************************************** + Include files +***********************************************************************************************/ +// Language headers +//#include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstring> +#include <climits> +#include <cwchar> +//#include <stdexcept> +// Platform headers +// Module headers +#include "inc/TtfUtil.h" +#include "inc/TtfTypes.h" +#include "inc/Endian.h" + +/*********************************************************************************************** + Forward declarations +***********************************************************************************************/ + +/*********************************************************************************************** + Local Constants and static variables +***********************************************************************************************/ +namespace +{ +#ifdef ALL_TTFUTILS + // max number of components allowed in composite glyphs + const int kMaxGlyphComponents = 8; +#endif + + template <int R, typename T> + inline float fixed_to_float(const T f) { + return float(f)/float(2^R); + } + +/*---------------------------------------------------------------------------------------------- + Table of standard Postscript glyph names. From Martin Hosken. Disagress with ttfdump.exe +---------------------------------------------------------------------------------------------*/ +#ifdef ALL_TTFUTILS + const int kcPostNames = 258; + + const char * rgPostName[kcPostNames] = { + ".notdef", ".null", "nonmarkingreturn", "space", "exclam", "quotedbl", "numbersign", + "dollar", "percent", "ampersand", "quotesingle", "parenleft", + "parenright", "asterisk", "plus", "comma", "hyphen", "period", "slash", + "zero", "one", "two", "three", "four", "five", "six", "seven", "eight", + "nine", "colon", "semicolon", "less", "equal", "greater", "question", + "at", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", + "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", + "bracketleft", "backslash", "bracketright", "asciicircum", + "underscore", "grave", "a", "b", "c", "d", "e", "f", "g", "h", "i", + "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", + "x", "y", "z", "braceleft", "bar", "braceright", "asciitilde", + "Adieresis", "Aring", "Ccedilla", "Eacute", "Ntilde", "Odieresis", + "Udieresis", "aacute", "agrave", "acircumflex", "adieresis", "atilde", + "aring", "ccedilla", "eacute", "egrave", "ecircumflex", "edieresis", + "iacute", "igrave", "icircumflex", "idieresis", "ntilde", "oacute", + "ograve", "ocircumflex", "odieresis", "otilde", "uacute", "ugrave", + "ucircumflex", "udieresis", "dagger", "degree", "cent", "sterling", + "section", "bullet", "paragraph", "germandbls", "registered", + "copyright", "trademark", "acute", "dieresis", "notequal", "AE", + "Oslash", "infinity", "plusminus", "lessequal", "greaterequal", "yen", + "mu", "partialdiff", "summation", "product", "pi", "integral", + "ordfeminine", "ordmasculine", "Omega", "ae", "oslash", "questiondown", + "exclamdown", "logicalnot", "radical", "florin", "approxequal", + "Delta", "guillemotleft", "guillemotright", "ellipsis", "nonbreakingspace", + "Agrave", "Atilde", "Otilde", "OE", "oe", "endash", "emdash", + "quotedblleft", "quotedblright", "quoteleft", "quoteright", "divide", + "lozenge", "ydieresis", "Ydieresis", "fraction", "currency", + "guilsinglleft", "guilsinglright", "fi", "fl", "daggerdbl", "periodcentered", + "quotesinglbase", "quotedblbase", "perthousand", "Acircumflex", + "Ecircumflex", "Aacute", "Edieresis", "Egrave", "Iacute", + "Icircumflex", "Idieresis", "Igrave", "Oacute", "Ocircumflex", + "apple", "Ograve", "Uacute", "Ucircumflex", "Ugrave", "dotlessi", + "circumflex", "tilde", "macron", "breve", "dotaccent", "ring", + "cedilla", "hungarumlaut", "ogonek", "caron", "Lslash", "lslash", + "Scaron", "scaron", "Zcaron", "zcaron", "brokenbar", "Eth", "eth", + "Yacute", "yacute", "Thorn", "thorn", "minus", "multiply", + "onesuperior", "twosuperior", "threesuperior", "onehalf", "onequarter", + "threequarters", "franc", "Gbreve", "gbreve", "Idotaccent", "Scedilla", + "scedilla", "Cacute", "cacute", "Ccaron", "ccaron", + "dcroat" }; +#endif + +} // end of namespace + +/*********************************************************************************************** + Methods +***********************************************************************************************/ + +/* Note on error processing: The code guards against bad glyph ids being used to look up data +in open ended tables (loca, hmtx). If the glyph id comes from a cmap this shouldn't happen +but it seems prudent to check for user errors here. The code does assume that data obtained +from the TTF file is valid otherwise (though the CheckTable method seeks to check for +obvious problems that might accompany a change in table versions). For example an invalid +offset in the loca table which could exceed the size of the glyf table is NOT trapped. +Likewise if numberOf_LongHorMetrics in the hhea table is wrong, this will NOT be trapped, +which could cause a lookup in the hmtx table to exceed the table length. Of course, TTF tables +that are completely corrupt will cause unpredictable results. */ + +/* Note on composite glyphs: Glyphs that have components that are themselves composites +are not supported. IsDeepComposite can be used to test for this. False is returned from many +of the methods in this cases. It is unclear how to build composite glyphs in some cases, +so this code represents my best guess until test cases can be found. See notes on the high- +level GlyfPoints method. */ +namespace graphite2 +{ +namespace TtfUtil +{ + + +/*---------------------------------------------------------------------------------------------- + Get offset and size of the offset table needed to find table directory. + Return true if success, false otherwise. + lSize excludes any table directory entries. +----------------------------------------------------------------------------------------------*/ +bool GetHeaderInfo(size_t & lOffset, size_t & lSize) +{ + lOffset = 0; + lSize = offsetof(Sfnt::OffsetSubTable, table_directory); + assert(sizeof(uint32) + 4*sizeof (uint16) == lSize); + return true; +} + +/*---------------------------------------------------------------------------------------------- + Check the offset table for expected data. + Return true if success, false otherwise. +----------------------------------------------------------------------------------------------*/ +bool CheckHeader(const void * pHdr) +{ + const Sfnt::OffsetSubTable * pOffsetTable + = reinterpret_cast<const Sfnt::OffsetSubTable *>(pHdr); + + return pHdr && be::swap(pOffsetTable->scaler_type) == Sfnt::OffsetSubTable::TrueTypeWin; +} + +/*---------------------------------------------------------------------------------------------- + Get offset and size of the table directory. + Return true if successful, false otherwise. +----------------------------------------------------------------------------------------------*/ +bool GetTableDirInfo(const void * pHdr, size_t & lOffset, size_t & lSize) +{ + const Sfnt::OffsetSubTable * pOffsetTable + = reinterpret_cast<const Sfnt::OffsetSubTable *>(pHdr); + + lOffset = offsetof(Sfnt::OffsetSubTable, table_directory); + lSize = be::swap(pOffsetTable->num_tables) + * sizeof(Sfnt::OffsetSubTable::Entry); + + return true; +} + + +/*---------------------------------------------------------------------------------------------- + Get offset and size of the specified table. + Return true if successful, false otherwise. On false, offset and size will be 0. +----------------------------------------------------------------------------------------------*/ +bool GetTableInfo(const Tag TableTag, const void * pHdr, const void * pTableDir, + size_t & lOffset, size_t & lSize) +{ + const Sfnt::OffsetSubTable * pOffsetTable + = reinterpret_cast<const Sfnt::OffsetSubTable *>(pHdr); + const size_t num_tables = be::swap(pOffsetTable->num_tables); + const Sfnt::OffsetSubTable::Entry + * entry_itr = reinterpret_cast<const Sfnt::OffsetSubTable::Entry *>( + pTableDir), + * const dir_end = entry_itr + num_tables; + + if (num_tables > 40) + return false; + + for (;entry_itr != dir_end; ++entry_itr) // 40 - safe guard + { + if (be::swap(entry_itr->tag) == TableTag) + { + lOffset = be::swap(entry_itr->offset); + lSize = be::swap(entry_itr->length); + return true; + } + } + + return false; +} + +/*---------------------------------------------------------------------------------------------- + Check the specified table. Tests depend on the table type. + Return true if successful, false otherwise. +----------------------------------------------------------------------------------------------*/ +bool CheckTable(const Tag TableId, const void * pTable, size_t lTableSize) +{ + using namespace Sfnt; + + if (pTable == 0 || lTableSize < 4) return false; + + switch(TableId) + { + case Tag::cmap: // cmap + { + const Sfnt::CharacterCodeMap * const pCmap + = reinterpret_cast<const Sfnt::CharacterCodeMap *>(pTable); + if (lTableSize < sizeof(Sfnt::CharacterCodeMap)) + return false; + return be::swap(pCmap->version) == 0; + } + + case Tag::head: // head + { + const Sfnt::FontHeader * const pHead + = reinterpret_cast<const Sfnt::FontHeader *>(pTable); + if (lTableSize < sizeof(Sfnt::FontHeader)) + return false; + bool r = be::swap(pHead->version) == OneFix + && be::swap(pHead->magic_number) == FontHeader::MagicNumber + && be::swap(pHead->glyph_data_format) + == FontHeader::GlypDataFormat + && (be::swap(pHead->index_to_loc_format) + == FontHeader::ShortIndexLocFormat + || be::swap(pHead->index_to_loc_format) + == FontHeader::LongIndexLocFormat) + && sizeof(FontHeader) <= lTableSize; + return r; + } + + case Tag::post: // post + { + const Sfnt::PostScriptGlyphName * const pPost + = reinterpret_cast<const Sfnt::PostScriptGlyphName *>(pTable); + if (lTableSize < sizeof(Sfnt::PostScriptGlyphName)) + return false; + const fixed format = be::swap(pPost->format); + bool r = format == PostScriptGlyphName::Format1 + || format == PostScriptGlyphName::Format2 + || format == PostScriptGlyphName::Format3 + || format == PostScriptGlyphName::Format25; + return r; + } + + case Tag::hhea: // hhea + { + const Sfnt::HorizontalHeader * pHhea = + reinterpret_cast<const Sfnt::HorizontalHeader *>(pTable); + if (lTableSize < sizeof(Sfnt::HorizontalHeader)) + return false; + bool r = be::swap(pHhea->version) == OneFix + && be::swap(pHhea->metric_data_format) == 0 + && sizeof (Sfnt::HorizontalHeader) <= lTableSize; + return r; + } + + case Tag::maxp: // maxp + { + const Sfnt::MaximumProfile * pMaxp = + reinterpret_cast<const Sfnt::MaximumProfile *>(pTable); + if (lTableSize < sizeof(Sfnt::MaximumProfile)) + return false; + bool r = be::swap(pMaxp->version) == OneFix + && sizeof(Sfnt::MaximumProfile) <= lTableSize; + return r; + } + + case Tag::OS_2: // OS/2 + { + const Sfnt::Compatibility * pOs2 + = reinterpret_cast<const Sfnt::Compatibility *>(pTable); + if (be::swap(pOs2->version) == 0) + { // OS/2 table version 1 size +// if (sizeof(Sfnt::Compatibility) +// - sizeof(uint32)*2 - sizeof(int16)*2 +// - sizeof(uint16)*3 <= lTableSize) + if (sizeof(Sfnt::Compatibility0) <= lTableSize) + return true; + } + else if (be::swap(pOs2->version) == 1) + { // OS/2 table version 2 size +// if (sizeof(Sfnt::Compatibility) +// - sizeof(int16) *2 +// - sizeof(uint16)*3 <= lTableSize) + if (sizeof(Sfnt::Compatibility1) <= lTableSize) + return true; + } + else if (be::swap(pOs2->version) == 2) + { // OS/2 table version 3 size + if (sizeof(Sfnt::Compatibility2) <= lTableSize) + return true; + } + else if (be::swap(pOs2->version) == 3 || be::swap(pOs2->version) == 4) + { // OS/2 table version 4 size - version 4 changed the meaning of some fields which we don't use + if (sizeof(Sfnt::Compatibility3) <= lTableSize) + return true; + } + else + return false; + break; + } + + case Tag::name: + { + const Sfnt::FontNames * pName + = reinterpret_cast<const Sfnt::FontNames *>(pTable); + if (lTableSize < sizeof(Sfnt::FontNames)) + return false; + return be::swap(pName->format) == 0; + } + + case Tag::glyf: + { + return (lTableSize >= sizeof(Sfnt::Glyph)); + } + + default: + break; + } + + return true; +} + +/*---------------------------------------------------------------------------------------------- + Return the number of glyphs in the font. Should never be less than zero. + + Note: this method is not currently used by the Graphite engine. +----------------------------------------------------------------------------------------------*/ +size_t GlyphCount(const void * pMaxp) +{ + const Sfnt::MaximumProfile * pTable = + reinterpret_cast<const Sfnt::MaximumProfile *>(pMaxp); + return be::swap(pTable->num_glyphs); +} + +#ifdef ALL_TTFUTILS +/*---------------------------------------------------------------------------------------------- + Return the maximum number of components for any composite glyph in the font. + + Note: this method is not currently used by the Graphite engine. +----------------------------------------------------------------------------------------------*/ +size_t MaxCompositeComponentCount(const void * pMaxp) +{ + const Sfnt::MaximumProfile * pTable = + reinterpret_cast<const Sfnt::MaximumProfile *>(pMaxp); + return be::swap(pTable->max_component_elements); +} + +/*---------------------------------------------------------------------------------------------- + Composite glyphs can be composed of glyphs that are themselves composites. + This method returns the maximum number of levels like this for any glyph in the font. + A non-composite glyph has a level of 1. + + Note: this method is not currently used by the Graphite engine. +----------------------------------------------------------------------------------------------*/ +size_t MaxCompositeLevelCount(const void * pMaxp) +{ + const Sfnt::MaximumProfile * pTable = + reinterpret_cast<const Sfnt::MaximumProfile *>(pMaxp); + return be::swap(pTable->max_component_depth); +} + +/*---------------------------------------------------------------------------------------------- + Return the number of glyphs in the font according to a differt source. + Should never be less than zero. Return -1 on failure. + + Note: this method is not currently used by the Graphite engine. +----------------------------------------------------------------------------------------------*/ +size_t LocaGlyphCount(size_t lLocaSize, const void * pHead) //throw(std::domain_error) +{ + + const Sfnt::FontHeader * pTable + = reinterpret_cast<const Sfnt::FontHeader *>(pHead); + + if (be::swap(pTable->index_to_loc_format) + == Sfnt::FontHeader::ShortIndexLocFormat) + // loca entries are two bytes and have been divided by two + return (lLocaSize >> 1) - 1; + + if (be::swap(pTable->index_to_loc_format) + == Sfnt::FontHeader::LongIndexLocFormat) + // loca entries are four bytes + return (lLocaSize >> 2) - 1; + + return -1; + //throw std::domain_error("head table in inconsistent state. The font may be corrupted"); +} +#endif + +/*---------------------------------------------------------------------------------------------- + Return the design units the font is designed with +----------------------------------------------------------------------------------------------*/ +int DesignUnits(const void * pHead) +{ + const Sfnt::FontHeader * pTable = + reinterpret_cast<const Sfnt::FontHeader *>(pHead); + + return be::swap(pTable->units_per_em); +} + +#ifdef ALL_TTFUTILS +/*---------------------------------------------------------------------------------------------- + Return the checksum from the head table, which serves as a unique identifer for the font. +----------------------------------------------------------------------------------------------*/ +int HeadTableCheckSum(const void * pHead) +{ + const Sfnt::FontHeader * pTable = + reinterpret_cast<const Sfnt::FontHeader *>(pHead); + + return be::swap(pTable->check_sum_adjustment); +} + +/*---------------------------------------------------------------------------------------------- + Return the create time from the head table. This consists of a 64-bit integer, which + we return here as two 32-bit integers. + + Note: this method is not currently used by the Graphite engine. +----------------------------------------------------------------------------------------------*/ +void HeadTableCreateTime(const void * pHead, + unsigned int * pnDateBC, unsigned int * pnDateAD) +{ + const Sfnt::FontHeader * pTable = + reinterpret_cast<const Sfnt::FontHeader *>(pHead); + + *pnDateBC = be::swap(pTable->created[0]); + *pnDateAD = be::swap(pTable->created[1]); +} + +/*---------------------------------------------------------------------------------------------- + Return the modify time from the head table.This consists of a 64-bit integer, which + we return here as two 32-bit integers. + + Note: this method is not currently used by the Graphite engine. +----------------------------------------------------------------------------------------------*/ +void HeadTableModifyTime(const void * pHead, + unsigned int * pnDateBC, unsigned int *pnDateAD) +{ + const Sfnt::FontHeader * pTable = + reinterpret_cast<const Sfnt::FontHeader *>(pHead); + ; + *pnDateBC = be::swap(pTable->modified[0]); + *pnDateAD = be::swap(pTable->modified[1]); +} + +/*---------------------------------------------------------------------------------------------- + Return true if the font is italic. +----------------------------------------------------------------------------------------------*/ +bool IsItalic(const void * pHead) +{ + const Sfnt::FontHeader * pTable = + reinterpret_cast<const Sfnt::FontHeader *>(pHead); + + return ((be::swap(pTable->mac_style) & 0x00000002) != 0); +} + +/*---------------------------------------------------------------------------------------------- + Return the ascent for the font +----------------------------------------------------------------------------------------------*/ +int FontAscent(const void * pOs2) +{ + const Sfnt::Compatibility * pTable = reinterpret_cast<const Sfnt::Compatibility *>(pOs2); + + return be::swap(pTable->win_ascent); +} + +/*---------------------------------------------------------------------------------------------- + Return the descent for the font +----------------------------------------------------------------------------------------------*/ +int FontDescent(const void * pOs2) +{ + const Sfnt::Compatibility * pTable = reinterpret_cast<const Sfnt::Compatibility *>(pOs2); + + return be::swap(pTable->win_descent); +} + +/*---------------------------------------------------------------------------------------------- + Get the bold and italic style bits. + Return true if successful. false otherwise. + In addition to checking the OS/2 table, one could also check + the head table's macStyle field (overridden by the OS/2 table on Win) + the sub-family name in the name table (though this can contain oblique, dark, etc too) +----------------------------------------------------------------------------------------------*/ +bool FontOs2Style(const void *pOs2, bool & fBold, bool & fItalic) +{ + const Sfnt::Compatibility * pTable = reinterpret_cast<const Sfnt::Compatibility *>(pOs2); + + fBold = (be::swap(pTable->fs_selection) & Sfnt::Compatibility::Bold) != 0; + fItalic = (be::swap(pTable->fs_selection) & Sfnt::Compatibility::Italic) != 0; + + return true; +} +#endif + +/*---------------------------------------------------------------------------------------------- + Method for searching name table. +----------------------------------------------------------------------------------------------*/ +bool GetNameInfo(const void * pName, int nPlatformId, int nEncodingId, + int nLangId, int nNameId, size_t & lOffset, size_t & lSize) +{ + lOffset = 0; + lSize = 0; + + const Sfnt::FontNames * pTable = reinterpret_cast<const Sfnt::FontNames *>(pName); + uint16 cRecord = be::swap(pTable->count); + uint16 nRecordOffset = be::swap(pTable->string_offset); + const Sfnt::NameRecord * pRecord = reinterpret_cast<const Sfnt::NameRecord *>(pTable + 1); + + for (int i = 0; i < cRecord; ++i) + { + if (be::swap(pRecord->platform_id) == nPlatformId && + be::swap(pRecord->platform_specific_id) == nEncodingId && + be::swap(pRecord->language_id) == nLangId && + be::swap(pRecord->name_id) == nNameId) + { + lOffset = be::swap(pRecord->offset) + nRecordOffset; + lSize = be::swap(pRecord->length); + return true; + } + pRecord++; + } + + return false; +} + +#ifdef ALL_TTFUTILS +/*---------------------------------------------------------------------------------------------- + Return all the lang-IDs that have data for the given name-IDs. Assume that there is room + in the return array (langIdList) for 128 items. The purpose of this method is to return + a list of all possible lang-IDs. +----------------------------------------------------------------------------------------------*/ +int GetLangsForNames(const void * pName, int nPlatformId, int nEncodingId, + int * nameIdList, int cNameIds, short * langIdList) +{ + const Sfnt::FontNames * pTable = reinterpret_cast<const Sfnt::FontNames *>(pName); + int cLangIds = 0; + uint16 cRecord = be::swap(pTable->count); + if (cRecord > 127) return cLangIds; + //uint16 nRecordOffset = swapw(pTable->stringOffset); + const Sfnt::NameRecord * pRecord = reinterpret_cast<const Sfnt::NameRecord *>(pTable + 1); + + for (int i = 0; i < cRecord; ++i) + { + if (be::swap(pRecord->platform_id) == nPlatformId && + be::swap(pRecord->platform_specific_id) == nEncodingId) + { + bool fNameFound = false; + int nLangId = be::swap(pRecord->language_id); + int nNameId = be::swap(pRecord->name_id); + for (int j = 0; j < cNameIds; j++) + { + if (nNameId == nameIdList[j]) + { + fNameFound = true; + break; + } + } + if (fNameFound) + { + // Add it if it's not there. + int ilang; + for (ilang = 0; ilang < cLangIds; ilang++) + if (langIdList[ilang] == nLangId) + break; + if (ilang >= cLangIds) + { + langIdList[cLangIds] = short(nLangId); + cLangIds++; + } + if (cLangIds == 128) + return cLangIds; + } + } + pRecord++; + } + + return cLangIds; +} + +/*---------------------------------------------------------------------------------------------- + Get the offset and size of the font family name in English for the MS Platform with Unicode + writing system. The offset is within the pName data. The string is double byte with MSB + first. +----------------------------------------------------------------------------------------------*/ +bool Get31EngFamilyInfo(const void * pName, size_t & lOffset, size_t & lSize) +{ + return GetNameInfo(pName, Sfnt::NameRecord::Microsoft, 1, 1033, + Sfnt::NameRecord::Family, lOffset, lSize); +} + +/*---------------------------------------------------------------------------------------------- + Get the offset and size of the full font name in English for the MS Platform with Unicode + writing system. The offset is within the pName data. The string is double byte with MSB + first. + + Note: this method is not currently used by the Graphite engine. +----------------------------------------------------------------------------------------------*/ +bool Get31EngFullFontInfo(const void * pName, size_t & lOffset, size_t & lSize) +{ + return GetNameInfo(pName, Sfnt::NameRecord::Microsoft, 1, 1033, + Sfnt::NameRecord::Fullname, lOffset, lSize); +} + +/*---------------------------------------------------------------------------------------------- + Get the offset and size of the font family name in English for the MS Platform with Symbol + writing system. The offset is within the pName data. The string is double byte with MSB + first. +----------------------------------------------------------------------------------------------*/ +bool Get30EngFamilyInfo(const void * pName, size_t & lOffset, size_t & lSize) +{ + return GetNameInfo(pName, Sfnt::NameRecord::Microsoft, 0, 1033, + Sfnt::NameRecord::Family, lOffset, lSize); +} + +/*---------------------------------------------------------------------------------------------- + Get the offset and size of the full font name in English for the MS Platform with Symbol + writing system. The offset is within the pName data. The string is double byte with MSB + first. + + Note: this method is not currently used by the Graphite engine. +----------------------------------------------------------------------------------------------*/ +bool Get30EngFullFontInfo(const void * pName, size_t & lOffset, size_t & lSize) +{ + return GetNameInfo(pName, Sfnt::NameRecord::Microsoft, 0, 1033, + Sfnt::NameRecord::Fullname, lOffset, lSize); +} + +/*---------------------------------------------------------------------------------------------- + Return the Glyph ID for a given Postscript name. This method finds the first glyph which + matches the requested Postscript name. Ideally every glyph should have a unique Postscript + name (except for special names such as .notdef), but this is not always true. + On failure return value less than zero. + -1 - table search failed + -2 - format 3 table (no Postscript glyph info) + -3 - other failures + + Note: this method is not currently used by the Graphite engine. +----------------------------------------------------------------------------------------------*/ +int PostLookup(const void * pPost, size_t lPostSize, const void * pMaxp, + const char * pPostName) +{ + using namespace Sfnt; + + const Sfnt::PostScriptGlyphName * pTable + = reinterpret_cast<const Sfnt::PostScriptGlyphName *>(pPost); + fixed format = be::swap(pTable->format); + + if (format == PostScriptGlyphName::Format3) + { // format 3 - no Postscript glyph info in font + return -2; + } + + // search for given Postscript name among the standard names + int iPostName = -1; // index in standard names + for (int i = 0; i < kcPostNames; i++) + { + if (!strcmp(pPostName, rgPostName[i])) + { + iPostName = i; + break; + } + } + + if (format == PostScriptGlyphName::Format1) + { // format 1 - use standard Postscript names + return iPostName; + } + + if (format == PostScriptGlyphName::Format25) + { + if (iPostName == -1) + return -1; + + const PostScriptGlyphName25 * pTable25 + = static_cast<const PostScriptGlyphName25 *>(pTable); + int cnGlyphs = GlyphCount(pMaxp); + for (gid16 nGlyphId = 0; nGlyphId < cnGlyphs && nGlyphId < kcPostNames; + nGlyphId++) + { // glyph_name_index25 contains bytes so no byte swapping needed + // search for first glyph id that uses the standard name + if (nGlyphId + pTable25->offset[nGlyphId] == iPostName) + return nGlyphId; + } + } + + if (format == PostScriptGlyphName::Format2) + { // format 2 + const PostScriptGlyphName2 * pTable2 + = static_cast<const PostScriptGlyphName2 *>(pTable); + + int cnGlyphs = be::swap(pTable2->number_of_glyphs); + + if (iPostName != -1) + { // did match a standard name, look for first glyph id mapped to that name + for (gid16 nGlyphId = 0; nGlyphId < cnGlyphs; nGlyphId++) + { + if (be::swap(pTable2->glyph_name_index[nGlyphId]) == iPostName) + return nGlyphId; + } + } + + { // did not match a standard name, search font specific names + size_t nStrSizeGoal = strlen(pPostName); + const char * pFirstGlyphName = reinterpret_cast<const char *>( + &pTable2->glyph_name_index[0] + cnGlyphs); + const char * pGlyphName = pFirstGlyphName; + int iInNames = 0; // index in font specific names + bool fFound = false; + const char * const endOfTable + = reinterpret_cast<const char *>(pTable2) + lPostSize; + while (pGlyphName < endOfTable && !fFound) + { // search Pascal strings for first matching name + size_t nStringSize = size_t(*pGlyphName); + if (nStrSizeGoal != nStringSize || + strncmp(pGlyphName + 1, pPostName, nStringSize)) + { // did not match + ++iInNames; + pGlyphName += nStringSize + 1; + } + else + { // did match + fFound = true; + } + } + if (!fFound) + return -1; // no font specific name matches request + + iInNames += kcPostNames; + for (gid16 nGlyphId = 0; nGlyphId < cnGlyphs; nGlyphId++) + { // search for first glyph id that maps to the found string index + if (be::swap(pTable2->glyph_name_index[nGlyphId]) == iInNames) + return nGlyphId; + } + return -1; // no glyph mapped to this index (very strange) + } + } + + return -3; +} + +/*---------------------------------------------------------------------------------------------- + Convert a Unicode character string from big endian (MSB first, Motorola) format to little + endian (LSB first, Intel) format. + nSize is the number of Unicode characters in the string. It should not include any + terminating null. If nSize is 0, it is assumed the string is null terminated. nSize + defaults to 0. + Return true if successful, false otherwise. +----------------------------------------------------------------------------------------------*/ +void SwapWString(void * pWStr, size_t nSize /* = 0 */) //throw (std::invalid_argument) +{ + if (pWStr == 0) + { +// throw std::invalid_argument("null pointer given"); + return; + } + + uint16 * pStr = reinterpret_cast<uint16 *>(pWStr); + uint16 * const pStrEnd = pStr + (nSize == 0 ? wcslen((const wchar_t*)pStr) : nSize); + + for (; pStr != pStrEnd; ++pStr) + *pStr = be::swap(*pStr); +// std::transform(pStr, pStrEnd, pStr, read<uint16>); + +// for (int i = 0; i < nSize; i++) +// { // swap the wide characters in the string +// pStr[i] = utf16(be::swap(uint16(pStr[i]))); +// } +} +#endif + +/*---------------------------------------------------------------------------------------------- + Get the left-side bearing and and advance width based on the given tables and Glyph ID + Return true if successful, false otherwise. On false, one or both value could be INT_MIN +----------------------------------------------------------------------------------------------*/ +bool HorMetrics(gid16 nGlyphId, const void * pHmtx, size_t lHmtxSize, const void * pHhea, + int & nLsb, unsigned int & nAdvWid) +{ + const Sfnt::HorizontalMetric * phmtx = + reinterpret_cast<const Sfnt::HorizontalMetric *>(pHmtx); + + const Sfnt::HorizontalHeader * phhea = + reinterpret_cast<const Sfnt::HorizontalHeader *>(pHhea); + + size_t cLongHorMetrics = be::swap(phhea->num_long_hor_metrics); + if (nGlyphId < cLongHorMetrics) + { // glyph id is acceptable + if ((nGlyphId + 1) * sizeof(Sfnt::HorizontalMetric) > lHmtxSize) return false; + nAdvWid = be::swap(phmtx[nGlyphId].advance_width); + nLsb = be::swap(phmtx[nGlyphId].left_side_bearing); + } + else + { + // guard against bad glyph id + size_t lLsbOffset = sizeof(Sfnt::HorizontalMetric) * cLongHorMetrics + + sizeof(int16) * (nGlyphId - cLongHorMetrics); // offset in bytes + // We test like this as LsbOffset is an offset not a length. + if (lLsbOffset >= lHmtxSize - sizeof(int16) || cLongHorMetrics == 0) + { + nLsb = 0; + return false; + } + nAdvWid = be::swap(phmtx[cLongHorMetrics - 1].advance_width); + nLsb = be::peek<int16>(reinterpret_cast<const byte *>(phmtx) + lLsbOffset); + } + + return true; +} + +/*---------------------------------------------------------------------------------------------- + Return a pointer to the requested cmap subtable. By default find the Microsoft Unicode + subtable. Pass nEncoding as -1 to find first table that matches only nPlatformId. + Return NULL if the subtable cannot be found. +----------------------------------------------------------------------------------------------*/ +const void * FindCmapSubtable(const void * pCmap, int nPlatformId, /* =3 */ int nEncodingId, /* = 1 */ size_t length) +{ + const Sfnt::CharacterCodeMap * pTable = reinterpret_cast<const Sfnt::CharacterCodeMap *>(pCmap); + uint16 csuPlatforms = be::swap(pTable->num_subtables); + if (length && (sizeof(Sfnt::CharacterCodeMap) + 8 * (csuPlatforms - 1) > length)) + return NULL; + for (int i = 0; i < csuPlatforms; i++) + { + if (be::swap(pTable->encoding[i].platform_id) == nPlatformId && + (nEncodingId == -1 || be::swap(pTable->encoding[i].platform_specific_id) == nEncodingId)) + { + uint32 offset = be::swap(pTable->encoding[i].offset); + const uint8 * pRtn = reinterpret_cast<const uint8 *>(pCmap) + offset; + if (length) + { + if (offset > length - 2) return NULL; + uint16 format = be::read<uint16>(pRtn); + if (format == 4) + { + if (offset > length - 4) return NULL; + uint16 subTableLength = be::peek<uint16>(pRtn); + if (i + 1 == csuPlatforms) + { + if (subTableLength > length - offset) + return NULL; + } + else if (subTableLength > be::swap(pTable->encoding[i+1].offset)) + return NULL; + } + if (format == 12) + { + if (offset > length - 6) return NULL; + uint32 subTableLength = be::peek<uint32>(pRtn); + if (i + 1 == csuPlatforms) + { + if (subTableLength > length - offset) + return NULL; + } + else if (subTableLength > be::swap(pTable->encoding[i+1].offset)) + return NULL; + } + } + return reinterpret_cast<const uint8 *>(pCmap) + offset; + } + } + + return 0; +} + +/*---------------------------------------------------------------------------------------------- + Check the Microsoft Unicode subtable for expected values +----------------------------------------------------------------------------------------------*/ +bool CheckCmapSubtable4(const void * pCmapSubtable4, const void * pCmapEnd /*, unsigned int maxgid*/) +{ + size_t table_len = (const byte *)pCmapEnd - (const byte *)pCmapSubtable4; + if (!pCmapSubtable4) return false; + const Sfnt::CmapSubTable * pTable = reinterpret_cast<const Sfnt::CmapSubTable *>(pCmapSubtable4); + // Bob H say some freeware TT fonts have version 1 (eg, CALIGULA.TTF) + // so don't check subtable version. 21 Mar 2002 spec changes version to language. + if (table_len < sizeof(*pTable) || be::swap(pTable->format) != 4) return false; + const Sfnt::CmapSubTableFormat4 * pTable4 = reinterpret_cast<const Sfnt::CmapSubTableFormat4 *>(pCmapSubtable4); + if (table_len < sizeof(*pTable4)) + return false; + uint16 length = be::swap(pTable4->length); + if (length > table_len) + return false; + if (length < sizeof(Sfnt::CmapSubTableFormat4)) + return false; + uint16 nRanges = be::swap(pTable4->seg_count_x2) >> 1; + if (!nRanges || length < sizeof(Sfnt::CmapSubTableFormat4) + 4 * nRanges * sizeof(uint16)) + return false; + // check last range is properly terminated + uint16 chEnd = be::peek<uint16>(pTable4->end_code + nRanges - 1); + if (chEnd != 0xFFFF) + return false; +#if 0 + int lastend = -1; + for (int i = 0; i < nRanges; ++i) + { + uint16 end = be::peek<uint16>(pTable4->end_code + i); + uint16 start = be::peek<uint16>(pTable4->end_code + nRanges + 1 + i); + int16 delta = be::peek<int16>(pTable4->end_code + 2*nRanges + 1 + i); + uint16 offset = be::peek<uint16>(pTable4->end_code + 3*nRanges + 1 + i); + if (lastend >= end || lastend >= start) + return false; + if (offset) + { + const uint16 *gstart = pTable4->end_code + 3*nRanges + 1 + i + (offset >> 1); + const uint16 *gend = gstart + end - start; + if ((char *)gend >= (char *)pCmapSubtable4 + length) + return false; + while (gstart <= gend) + { + uint16 g = be::peek<uint16>(gstart++); + if (g && ((g + delta) & 0xFFFF) > maxgid) + return false; + } + } + else if (((delta + end) & 0xFFFF) > maxgid) + return false; + lastend = end; + } +#endif + return true; +} + +/*---------------------------------------------------------------------------------------------- + Return the Glyph ID for the given Unicode ID in the Microsoft Unicode subtable. + (Actually this code only depends on subtable being format 4.) + Return 0 if the Unicode ID is not in the subtable. +----------------------------------------------------------------------------------------------*/ +gid16 CmapSubtable4Lookup(const void * pCmapSubtabel4, unsigned int nUnicodeId, int rangeKey) +{ + const Sfnt::CmapSubTableFormat4 * pTable = reinterpret_cast<const Sfnt::CmapSubTableFormat4 *>(pCmapSubtabel4); + + uint16 nSeg = be::swap(pTable->seg_count_x2) >> 1; + + uint16 n; + const uint16 * pLeft, * pMid; + uint16 cMid, chStart, chEnd; + + if (rangeKey) + { + pMid = &(pTable->end_code[rangeKey]); + chEnd = be::peek<uint16>(pMid); + } + else + { + // Binary search of the endCode[] array + pLeft = &(pTable->end_code[0]); + n = nSeg; + while (n > 0) + { + cMid = n >> 1; // Pick an element in the middle + pMid = pLeft + cMid; + chEnd = be::peek<uint16>(pMid); + if (nUnicodeId <= chEnd) + { + if (cMid == 0 || nUnicodeId > be::peek<uint16>(pMid -1)) + break; // Must be this seg or none! + n = cMid; // Continue on left side, omitting mid point + } + else + { + pLeft = pMid + 1; // Continue on right side, omitting mid point + n -= (cMid + 1); + } + } + + if (!n) + return 0; + } + + // Ok, we're down to one segment and pMid points to the endCode element + // Either this is it or none is. + + chStart = be::peek<uint16>(pMid += nSeg + 1); + if (chEnd >= nUnicodeId && nUnicodeId >= chStart) + { + // Found correct segment. Find Glyph Id + int16 idDelta = be::peek<uint16>(pMid += nSeg); + uint16 idRangeOffset = be::peek<uint16>(pMid += nSeg); + + if (idRangeOffset == 0) + return (uint16)(idDelta + nUnicodeId); // must use modulus 2^16 + + // Look up value in glyphIdArray + const ptrdiff_t offset = (nUnicodeId - chStart) + (idRangeOffset >> 1) + + (pMid - reinterpret_cast<const uint16 *>(pTable)); + if (offset * 2 + 1 >= be::swap<uint16>(pTable->length)) + return 0; + gid16 nGlyphId = be::peek<uint16>(reinterpret_cast<const uint16 *>(pTable)+offset); + // If this value is 0, return 0. Else add the idDelta + return nGlyphId ? nGlyphId + idDelta : 0; + } + + return 0; +} + +/*---------------------------------------------------------------------------------------------- + Return the next Unicode value in the cmap. Pass 0 to obtain the first item. + Returns 0xFFFF as the last item. + pRangeKey is an optional key that is used to optimize the search; its value is the range + in which the character is found. +----------------------------------------------------------------------------------------------*/ +unsigned int CmapSubtable4NextCodepoint(const void *pCmap31, unsigned int nUnicodeId, int * pRangeKey) +{ + const Sfnt::CmapSubTableFormat4 * pTable = reinterpret_cast<const Sfnt::CmapSubTableFormat4 *>(pCmap31); + + uint16 nRange = be::swap(pTable->seg_count_x2) >> 1; + + uint32 nUnicodePrev = (uint32)nUnicodeId; + + const uint16 * pStartCode = &(pTable->end_code[0]) + + nRange // length of end code array + + 1; // reserved word + + if (nUnicodePrev == 0) + { + // return the first codepoint. + if (pRangeKey) + *pRangeKey = 0; + return be::peek<uint16>(pStartCode); + } + else if (nUnicodePrev >= 0xFFFF) + { + if (pRangeKey) + *pRangeKey = nRange - 1; + return 0xFFFF; + } + + int iRange = (pRangeKey) ? *pRangeKey : 0; + // Just in case we have a bad key: + while (iRange > 0 && be::peek<uint16>(pStartCode + iRange) > nUnicodePrev) + iRange--; + while (iRange < nRange - 1 && be::peek<uint16>(pTable->end_code + iRange) < nUnicodePrev) + iRange++; + + // Now iRange is the range containing nUnicodePrev. + unsigned int nStartCode = be::peek<uint16>(pStartCode + iRange); + unsigned int nEndCode = be::peek<uint16>(pTable->end_code + iRange); + + if (nStartCode > nUnicodePrev) + // Oops, nUnicodePrev is not in the cmap! Adjust so we get a reasonable + // answer this time around. + nUnicodePrev = nStartCode - 1; + + if (nEndCode > nUnicodePrev) + { + // Next is in the same range; it is the next successive codepoint. + if (pRangeKey) + *pRangeKey = iRange; + return nUnicodePrev + 1; + } + + // Otherwise the next codepoint is the first one in the next range. + // There is guaranteed to be a next range because there must be one that + // ends with 0xFFFF. + if (pRangeKey) + *pRangeKey = iRange + 1; + return (iRange + 1 >= nRange) ? 0xFFFF : be::peek<uint16>(pStartCode + iRange + 1); +} + +/*---------------------------------------------------------------------------------------------- + Check the Microsoft UCS-4 subtable for expected values. +----------------------------------------------------------------------------------------------*/ +bool CheckCmapSubtable12(const void *pCmapSubtable12, const void *pCmapEnd /*, unsigned int maxgid*/) +{ + size_t table_len = (const byte *)pCmapEnd - (const byte *)pCmapSubtable12; + if (!pCmapSubtable12) return false; + const Sfnt::CmapSubTable * pTable = reinterpret_cast<const Sfnt::CmapSubTable *>(pCmapSubtable12); + if (table_len < sizeof(*pTable) || be::swap(pTable->format) != 12) + return false; + const Sfnt::CmapSubTableFormat12 * pTable12 = reinterpret_cast<const Sfnt::CmapSubTableFormat12 *>(pCmapSubtable12); + if (table_len < sizeof(*pTable12)) + return false; + uint32 length = be::swap(pTable12->length); + if (length > table_len) + return false; + if (length < sizeof(Sfnt::CmapSubTableFormat12)) + return false; + uint32 num_groups = be::swap(pTable12->num_groups); + if (num_groups > 0x10000000 || length != (sizeof(Sfnt::CmapSubTableFormat12) + (num_groups - 1) * sizeof(uint32) * 3)) + return false; +#if 0 + for (unsigned int i = 0; i < num_groups; ++i) + { + if (be::swap(pTable12->group[i].end_char_code) - be::swap(pTable12->group[i].start_char_code) + be::swap(pTable12->group[i].start_glyph_id) > maxgid) + return false; + if (i > 0 && be::swap(pTable12->group[i].start_char_code) <= be::swap(pTable12->group[i-1].end_char_code)) + return false; + } +#endif + return true; +} + +/*---------------------------------------------------------------------------------------------- + Return the Glyph ID for the given Unicode ID in the Microsoft UCS-4 subtable. + (Actually this code only depends on subtable being format 12.) + Return 0 if the Unicode ID is not in the subtable. +----------------------------------------------------------------------------------------------*/ +gid16 CmapSubtable12Lookup(const void * pCmap310, unsigned int uUnicodeId, int rangeKey) +{ + const Sfnt::CmapSubTableFormat12 * pTable = reinterpret_cast<const Sfnt::CmapSubTableFormat12 *>(pCmap310); + + //uint32 uLength = be::swap(pTable->length); //could use to test for premature end of table + uint32 ucGroups = be::swap(pTable->num_groups); + + for (unsigned int i = rangeKey; i < ucGroups; i++) + { + uint32 uStartCode = be::swap(pTable->group[i].start_char_code); + uint32 uEndCode = be::swap(pTable->group[i].end_char_code); + if (uUnicodeId >= uStartCode && uUnicodeId <= uEndCode) + { + uint32 uDiff = uUnicodeId - uStartCode; + uint32 uStartGid = be::swap(pTable->group[i].start_glyph_id); + return static_cast<gid16>(uStartGid + uDiff); + } + } + + return 0; +} + +/*---------------------------------------------------------------------------------------------- + Return the next Unicode value in the cmap. Pass 0 to obtain the first item. + Returns 0x10FFFF as the last item. + pRangeKey is an optional key that is used to optimize the search; its value is the range + in which the character is found. +----------------------------------------------------------------------------------------------*/ +unsigned int CmapSubtable12NextCodepoint(const void *pCmap310, unsigned int nUnicodeId, int * pRangeKey) +{ + const Sfnt::CmapSubTableFormat12 * pTable = reinterpret_cast<const Sfnt::CmapSubTableFormat12 *>(pCmap310); + + int nRange = be::swap(pTable->num_groups); + + uint32 nUnicodePrev = (uint32)nUnicodeId; + + if (nUnicodePrev == 0) + { + // return the first codepoint. + if (pRangeKey) + *pRangeKey = 0; + return be::swap(pTable->group[0].start_char_code); + } + else if (nUnicodePrev >= 0x10FFFF) + { + if (pRangeKey) + *pRangeKey = nRange; + return 0x10FFFF; + } + + int iRange = (pRangeKey) ? *pRangeKey : 0; + // Just in case we have a bad key: + while (iRange > 0 && be::swap(pTable->group[iRange].start_char_code) > nUnicodePrev) + iRange--; + while (iRange < nRange - 1 && be::swap(pTable->group[iRange].end_char_code) < nUnicodePrev) + iRange++; + + // Now iRange is the range containing nUnicodePrev. + + unsigned int nStartCode = be::swap(pTable->group[iRange].start_char_code); + unsigned int nEndCode = be::swap(pTable->group[iRange].end_char_code); + + if (nStartCode > nUnicodePrev) + // Oops, nUnicodePrev is not in the cmap! Adjust so we get a reasonable + // answer this time around. + nUnicodePrev = nStartCode - 1; + + if (nEndCode > nUnicodePrev) + { + // Next is in the same range; it is the next successive codepoint. + if (pRangeKey) + *pRangeKey = iRange; + return nUnicodePrev + 1; + } + + // Otherwise the next codepoint is the first one in the next range, or 10FFFF if we're done. + if (pRangeKey) + *pRangeKey = iRange + 1; + return (iRange + 1 >= nRange) ? 0x10FFFF : be::swap(pTable->group[iRange + 1].start_char_code); +} + +/*---------------------------------------------------------------------------------------------- + Return the offset stored in the loca table for the given Glyph ID. + (This offset is into the glyf table.) + Return -1 if the lookup failed. + Technically this method should return an unsigned long but it is unlikely the offset will + exceed 2^31. +----------------------------------------------------------------------------------------------*/ +size_t LocaLookup(gid16 nGlyphId, + const void * pLoca, size_t lLocaSize, + const void * pHead) // throw (std::out_of_range) +{ + const Sfnt::FontHeader * pTable = reinterpret_cast<const Sfnt::FontHeader *>(pHead); + size_t res = -2; + + // CheckTable verifies the index_to_loc_format is valid + if (be::swap(pTable->index_to_loc_format) == Sfnt::FontHeader::ShortIndexLocFormat) + { // loca entries are two bytes and have been divided by two + if (lLocaSize > 1 && nGlyphId + 1u < lLocaSize >> 1) // allow sentinel value to be accessed + { + const uint16 * pShortTable = reinterpret_cast<const uint16 *>(pLoca); + res = be::peek<uint16>(pShortTable + nGlyphId) << 1; + if (res == static_cast<size_t>(be::peek<uint16>(pShortTable + nGlyphId + 1) << 1)) + return -1; + } + } + else if (be::swap(pTable->index_to_loc_format) == Sfnt::FontHeader::LongIndexLocFormat) + { // loca entries are four bytes + if (lLocaSize > 3 && nGlyphId + 1u < lLocaSize >> 2) + { + const uint32 * pLongTable = reinterpret_cast<const uint32 *>(pLoca); + res = be::peek<uint32>(pLongTable + nGlyphId); + if (res == static_cast<size_t>(be::peek<uint32>(pLongTable + nGlyphId + 1))) + return -1; + } + } + + // only get here if glyph id was bad + return res; + //throw std::out_of_range("glyph id out of range for font"); +} + +/*---------------------------------------------------------------------------------------------- + Return a pointer into the glyf table based on the given offset (from LocaLookup). + Return NULL on error. +----------------------------------------------------------------------------------------------*/ +void * GlyfLookup(const void * pGlyf, size_t nGlyfOffset, size_t nTableLen) +{ + const uint8 * pByte = reinterpret_cast<const uint8 *>(pGlyf); + if (OVERFLOW_OFFSET_CHECK(pByte, nGlyfOffset) || nGlyfOffset >= nTableLen - sizeof(Sfnt::Glyph)) + return NULL; + return const_cast<uint8 *>(pByte + nGlyfOffset); +} + +/*---------------------------------------------------------------------------------------------- + Get the bounding box coordinates for a simple glyf entry (non-composite). + Return true if successful, false otherwise. +----------------------------------------------------------------------------------------------*/ +bool GlyfBox(const void * pSimpleGlyf, int & xMin, int & yMin, + int & xMax, int & yMax) +{ + const Sfnt::Glyph * pGlyph = reinterpret_cast<const Sfnt::Glyph *>(pSimpleGlyf); + + xMin = be::swap(pGlyph->x_min); + yMin = be::swap(pGlyph->y_min); + xMax = be::swap(pGlyph->x_max); + yMax = be::swap(pGlyph->y_max); + + return true; +} + +#ifdef ALL_TTFUTILS +/*---------------------------------------------------------------------------------------------- + Return the number of contours for a simple glyf entry (non-composite) + Returning -1 means this is a composite glyph +----------------------------------------------------------------------------------------------*/ +int GlyfContourCount(const void * pSimpleGlyf) +{ + const Sfnt::Glyph * pGlyph = reinterpret_cast<const Sfnt::Glyph *>(pSimpleGlyf); + return be::swap(pGlyph->number_of_contours); // -1 means composite glyph +} + +/*---------------------------------------------------------------------------------------------- + Get the point numbers for the end points of the glyph contours for a simple + glyf entry (non-composite). + cnPointsTotal - count of contours from GlyfContourCount(); (same as number of end points) + prgnContourEndPoints - should point to a buffer large enough to hold cnPoints integers + cnPoints - count of points placed in above range + Return true if successful, false otherwise. + False could indicate a multi-level composite glyphs. +----------------------------------------------------------------------------------------------*/ +bool GlyfContourEndPoints(const void * pSimpleGlyf, int * prgnContourEndPoint, + int cnPointsTotal, int & cnPoints) +{ + const Sfnt::SimpleGlyph * pGlyph = reinterpret_cast<const Sfnt::SimpleGlyph *>(pSimpleGlyf); + + int cContours = be::swap(pGlyph->number_of_contours); + if (cContours < 0) + return false; // this method isn't supposed handle composite glyphs + + for (int i = 0; i < cContours && i < cnPointsTotal; i++) + { + prgnContourEndPoint[i] = be::swap(pGlyph->end_pts_of_contours[i]); + } + + cnPoints = cContours; + return true; +} + +/*---------------------------------------------------------------------------------------------- + Get the points for a simple glyf entry (non-composite) + cnPointsTotal - count of points from largest end point obtained from GlyfContourEndPoints + prgnX & prgnY - should point to buffers large enough to hold cnPointsTotal integers + The ranges are parallel so that coordinates for point(n) are found at offset n in both + ranges. This is raw point data with relative coordinates. + prgbFlag - should point to a buffer a large enough to hold cnPointsTotal bytes + This range is parallel to the prgnX & prgnY + cnPoints - count of points placed in above ranges + Return true if successful, false otherwise. + False could indicate a composite glyph +----------------------------------------------------------------------------------------------*/ +bool GlyfPoints(const void * pSimpleGlyf, int * prgnX, int * prgnY, + char * prgbFlag, int cnPointsTotal, int & cnPoints) +{ + using namespace Sfnt; + + const Sfnt::SimpleGlyph * pGlyph = reinterpret_cast<const Sfnt::SimpleGlyph *>(pSimpleGlyf); + int cContours = be::swap(pGlyph->number_of_contours); + // return false for composite glyph + if (cContours <= 0) + return false; + int cPts = be::swap(pGlyph->end_pts_of_contours[cContours - 1]) + 1; + if (cPts > cnPointsTotal) + return false; + + // skip over bounding box data & point to byte count of instructions (hints) + const uint8 * pbGlyph = reinterpret_cast<const uint8 *> + (&pGlyph->end_pts_of_contours[cContours]); + + // skip over hints & point to first flag + int cbHints = be::swap(*(uint16 *)pbGlyph); + pbGlyph += sizeof(uint16); + pbGlyph += cbHints; + + // load flags & point to first x coordinate + int iFlag = 0; + while (iFlag < cPts) + { + if (!(*pbGlyph & SimpleGlyph::Repeat)) + { // flag isn't repeated + prgbFlag[iFlag] = (char)*pbGlyph; + pbGlyph++; + iFlag++; + } + else + { // flag is repeated; count specified by next byte + char chFlag = (char)*pbGlyph; + pbGlyph++; + int cFlags = (int)*pbGlyph; + pbGlyph++; + prgbFlag[iFlag] = chFlag; + iFlag++; + for (int i = 0; i < cFlags; i++) + { + prgbFlag[iFlag + i] = chFlag; + } + iFlag += cFlags; + } + } + if (iFlag != cPts) + return false; + + // load x coordinates + iFlag = 0; + while (iFlag < cPts) + { + if (prgbFlag[iFlag] & SimpleGlyph::XShort) + { + prgnX[iFlag] = *pbGlyph; + if (!(prgbFlag[iFlag] & SimpleGlyph::XIsPos)) + { + prgnX[iFlag] = -prgnX[iFlag]; + } + pbGlyph++; + } + else + { + if (prgbFlag[iFlag] & SimpleGlyph::XIsSame) + { + prgnX[iFlag] = 0; + // do NOT increment pbGlyph + } + else + { + prgnX[iFlag] = be::swap(*(int16 *)pbGlyph); + pbGlyph += sizeof(int16); + } + } + iFlag++; + } + + // load y coordinates + iFlag = 0; + while (iFlag < cPts) + { + if (prgbFlag[iFlag] & SimpleGlyph::YShort) + { + prgnY[iFlag] = *pbGlyph; + if (!(prgbFlag[iFlag] & SimpleGlyph::YIsPos)) + { + prgnY[iFlag] = -prgnY[iFlag]; + } + pbGlyph++; + } + else + { + if (prgbFlag[iFlag] & SimpleGlyph::YIsSame) + { + prgnY[iFlag] = 0; + // do NOT increment pbGlyph + } + else + { + prgnY[iFlag] = be::swap(*(int16 *)pbGlyph); + pbGlyph += sizeof(int16); + } + } + iFlag++; + } + + cnPoints = cPts; + return true; +} + +/*---------------------------------------------------------------------------------------------- + Fill prgnCompId with the component Glyph IDs from pSimpleGlyf. + Client must allocate space before calling. + pSimpleGlyf - assumed to point to a composite glyph + cCompIdTotal - the number of elements in prgnCompId + cCompId - the total number of Glyph IDs stored in prgnCompId + Return true if successful, false otherwise + False could indicate a non-composite glyph or the input array was not big enough +----------------------------------------------------------------------------------------------*/ +bool GetComponentGlyphIds(const void * pSimpleGlyf, int * prgnCompId, + size_t cnCompIdTotal, size_t & cnCompId) +{ + using namespace Sfnt; + + if (GlyfContourCount(pSimpleGlyf) >= 0) + return false; + + const Sfnt::SimpleGlyph * pGlyph = reinterpret_cast<const Sfnt::SimpleGlyph *>(pSimpleGlyf); + // for a composite glyph, the special data begins here + const uint8 * pbGlyph = reinterpret_cast<const uint8 *>(&pGlyph->end_pts_of_contours[0]); + + uint16 GlyphFlags; + size_t iCurrentComp = 0; + do + { + GlyphFlags = be::swap(*((uint16 *)pbGlyph)); + pbGlyph += sizeof(uint16); + prgnCompId[iCurrentComp++] = be::swap(*((uint16 *)pbGlyph)); + pbGlyph += sizeof(uint16); + if (iCurrentComp >= cnCompIdTotal) + return false; + int nOffset = 0; + nOffset += GlyphFlags & CompoundGlyph::Arg1Arg2Words ? 4 : 2; + nOffset += GlyphFlags & CompoundGlyph::HaveScale ? 2 : 0; + nOffset += GlyphFlags & CompoundGlyph::HaveXAndYScale ? 4 : 0; + nOffset += GlyphFlags & CompoundGlyph::HaveTwoByTwo ? 8 : 0; + pbGlyph += nOffset; + } while (GlyphFlags & CompoundGlyph::MoreComponents); + + cnCompId = iCurrentComp; + + return true; +} + +/*---------------------------------------------------------------------------------------------- + Return info on how a component glyph is to be placed + pSimpleGlyph - assumed to point to a composite glyph + nCompId - glyph id for component of interest + bOffset - if true, a & b are the x & y offsets for this component + if false, b is the point on this component that is attaching to point a on the + preceding glyph + Return true if successful, false otherwise + False could indicate a non-composite glyph or that component wasn't found +----------------------------------------------------------------------------------------------*/ +bool GetComponentPlacement(const void * pSimpleGlyf, int nCompId, + bool fOffset, int & a, int & b) +{ + using namespace Sfnt; + + if (GlyfContourCount(pSimpleGlyf) >= 0) + return false; + + const Sfnt::SimpleGlyph * pGlyph = reinterpret_cast<const Sfnt::SimpleGlyph *>(pSimpleGlyf); + // for a composite glyph, the special data begins here + const uint8 * pbGlyph = reinterpret_cast<const uint8 *>(&pGlyph->end_pts_of_contours[0]); + + uint16 GlyphFlags; + do + { + GlyphFlags = be::swap(*((uint16 *)pbGlyph)); + pbGlyph += sizeof(uint16); + if (be::swap(*((uint16 *)pbGlyph)) == nCompId) + { + pbGlyph += sizeof(uint16); // skip over glyph id of component + fOffset = (GlyphFlags & CompoundGlyph::ArgsAreXYValues) == CompoundGlyph::ArgsAreXYValues; + + if (GlyphFlags & CompoundGlyph::Arg1Arg2Words ) + { + a = be::swap(*(int16 *)pbGlyph); + pbGlyph += sizeof(int16); + b = be::swap(*(int16 *)pbGlyph); + pbGlyph += sizeof(int16); + } + else + { // args are signed bytes + a = *pbGlyph++; + b = *pbGlyph++; + } + return true; + } + pbGlyph += sizeof(uint16); // skip over glyph id of component + int nOffset = 0; + nOffset += GlyphFlags & CompoundGlyph::Arg1Arg2Words ? 4 : 2; + nOffset += GlyphFlags & CompoundGlyph::HaveScale ? 2 : 0; + nOffset += GlyphFlags & CompoundGlyph::HaveXAndYScale ? 4 : 0; + nOffset += GlyphFlags & CompoundGlyph::HaveTwoByTwo ? 8 : 0; + pbGlyph += nOffset; + } while (GlyphFlags & CompoundGlyph::MoreComponents); + + // didn't find requested component + fOffset = true; + a = 0; + b = 0; + return false; +} + +/*---------------------------------------------------------------------------------------------- + Return info on how a component glyph is to be transformed + pSimpleGlyph - assumed to point to a composite glyph + nCompId - glyph id for component of interest + flt11, flt11, flt11, flt11 - a 2x2 matrix giving the transform + bTransOffset - whether to transform the offset from above method + The spec is unclear about the meaning of this flag + Currently - initialize to true for MS rasterizer and false for Mac rasterizer, then + on return it will indicate whether transform should apply to offset (MSDN CD 10/99) + Return true if successful, false otherwise + False could indicate a non-composite glyph or that component wasn't found +----------------------------------------------------------------------------------------------*/ +bool GetComponentTransform(const void * pSimpleGlyf, int nCompId, + float & flt11, float & flt12, float & flt21, float & flt22, + bool & fTransOffset) +{ + using namespace Sfnt; + + if (GlyfContourCount(pSimpleGlyf) >= 0) + return false; + + const Sfnt::SimpleGlyph * pGlyph = reinterpret_cast<const Sfnt::SimpleGlyph *>(pSimpleGlyf); + // for a composite glyph, the special data begins here + const uint8 * pbGlyph = reinterpret_cast<const uint8 *>(&pGlyph->end_pts_of_contours[0]); + + uint16 GlyphFlags; + do + { + GlyphFlags = be::swap(*((uint16 *)pbGlyph)); + pbGlyph += sizeof(uint16); + if (be::swap(*((uint16 *)pbGlyph)) == nCompId) + { + pbGlyph += sizeof(uint16); // skip over glyph id of component + pbGlyph += GlyphFlags & CompoundGlyph::Arg1Arg2Words ? 4 : 2; // skip over placement data + + if (fTransOffset) // MS rasterizer + fTransOffset = !(GlyphFlags & CompoundGlyph::UnscaledOffset); + else // Apple rasterizer + fTransOffset = (GlyphFlags & CompoundGlyph::ScaledOffset) != 0; + + if (GlyphFlags & CompoundGlyph::HaveScale) + { + flt11 = fixed_to_float<14>(be::swap(*(uint16 *)pbGlyph)); + pbGlyph += sizeof(uint16); + flt12 = 0; + flt21 = 0; + flt22 = flt11; + } + else if (GlyphFlags & CompoundGlyph::HaveXAndYScale) + { + flt11 = fixed_to_float<14>(be::swap(*(uint16 *)pbGlyph)); + pbGlyph += sizeof(uint16); + flt12 = 0; + flt21 = 0; + flt22 = fixed_to_float<14>(be::swap(*(uint16 *)pbGlyph)); + pbGlyph += sizeof(uint16); + } + else if (GlyphFlags & CompoundGlyph::HaveTwoByTwo) + { + flt11 = fixed_to_float<14>(be::swap(*(uint16 *)pbGlyph)); + pbGlyph += sizeof(uint16); + flt12 = fixed_to_float<14>(be::swap(*(uint16 *)pbGlyph)); + pbGlyph += sizeof(uint16); + flt21 = fixed_to_float<14>(be::swap(*(uint16 *)pbGlyph)); + pbGlyph += sizeof(uint16); + flt22 = fixed_to_float<14>(be::swap(*(uint16 *)pbGlyph)); + pbGlyph += sizeof(uint16); + } + else + { // identity transform + flt11 = 1.0; + flt12 = 0.0; + flt21 = 0.0; + flt22 = 1.0; + } + return true; + } + pbGlyph += sizeof(uint16); // skip over glyph id of component + int nOffset = 0; + nOffset += GlyphFlags & CompoundGlyph::Arg1Arg2Words ? 4 : 2; + nOffset += GlyphFlags & CompoundGlyph::HaveScale ? 2 : 0; + nOffset += GlyphFlags & CompoundGlyph::HaveXAndYScale ? 4 : 0; + nOffset += GlyphFlags & CompoundGlyph::HaveTwoByTwo ? 8 : 0; + pbGlyph += nOffset; + } while (GlyphFlags & CompoundGlyph::MoreComponents); + + // didn't find requested component + fTransOffset = false; + flt11 = 1; + flt12 = 0; + flt21 = 0; + flt22 = 1; + return false; +} +#endif + +/*---------------------------------------------------------------------------------------------- + Return a pointer into the glyf table based on the given tables and Glyph ID + Since this method doesn't check for spaces, it is good to call IsSpace before using it. + Return NULL on error. +----------------------------------------------------------------------------------------------*/ +void * GlyfLookup(gid16 nGlyphId, const void * pGlyf, const void * pLoca, + size_t lGlyfSize, size_t lLocaSize, const void * pHead) +{ + // test for valid glyph id + // CheckTable verifies the index_to_loc_format is valid + + const Sfnt::FontHeader * pTable + = reinterpret_cast<const Sfnt::FontHeader *>(pHead); + + if (be::swap(pTable->index_to_loc_format) == Sfnt::FontHeader::ShortIndexLocFormat) + { // loca entries are two bytes (and have been divided by two) + if (nGlyphId >= (lLocaSize >> 1) - 1) // don't allow nGlyphId to access sentinel + { +// throw std::out_of_range("glyph id out of range for font"); + return NULL; + } + } + if (be::swap(pTable->index_to_loc_format) == Sfnt::FontHeader::LongIndexLocFormat) + { // loca entries are four bytes + if (nGlyphId >= (lLocaSize >> 2) - 1) + { +// throw std::out_of_range("glyph id out of range for font"); + return NULL; + } + } + + size_t lGlyfOffset = LocaLookup(nGlyphId, pLoca, lLocaSize, pHead); + void * pSimpleGlyf = GlyfLookup(pGlyf, lGlyfOffset, lGlyfSize); // invalid loca offset returns null + return pSimpleGlyf; +} + +#ifdef ALL_TTFUTILS +/*---------------------------------------------------------------------------------------------- + Determine if a particular Glyph ID has any data in the glyf table. If it is white space, + there will be no glyf data, though there will be metric data in hmtx, etc. +----------------------------------------------------------------------------------------------*/ +bool IsSpace(gid16 nGlyphId, const void * pLoca, size_t lLocaSize, const void * pHead) +{ + size_t lGlyfOffset = LocaLookup(nGlyphId, pLoca, lLocaSize, pHead); + + // the +1 should always work because there is a sentinel value at the end of the loca table + size_t lNextGlyfOffset = LocaLookup(nGlyphId + 1, pLoca, lLocaSize, pHead); + + return (lNextGlyfOffset - lGlyfOffset) == 0; +} + +/*---------------------------------------------------------------------------------------------- + Determine if a particular Glyph ID is a multi-level composite. +----------------------------------------------------------------------------------------------*/ +bool IsDeepComposite(gid16 nGlyphId, const void * pGlyf, const void * pLoca, + size_t lGlyfSize, long lLocaSize, const void * pHead) +{ + if (IsSpace(nGlyphId, pLoca, lLocaSize, pHead)) {return false;} + + void * pSimpleGlyf = GlyfLookup(nGlyphId, pGlyf, pLoca, lGlyfSize, lLocaSize, pHead); + if (pSimpleGlyf == NULL) + return false; // no way to really indicate an error occured here + + if (GlyfContourCount(pSimpleGlyf) >= 0) + return false; + + int rgnCompId[kMaxGlyphComponents]; // assumes only a limited number of glyph components + size_t cCompIdTotal = kMaxGlyphComponents; + size_t cCompId = 0; + + if (!GetComponentGlyphIds(pSimpleGlyf, rgnCompId, cCompIdTotal, cCompId)) + return false; + + for (size_t i = 0; i < cCompId; i++) + { + pSimpleGlyf = GlyfLookup(static_cast<gid16>(rgnCompId[i]), + pGlyf, pLoca, lGlyfSize, lLocaSize, pHead); + if (pSimpleGlyf == NULL) {return false;} + + if (GlyfContourCount(pSimpleGlyf) < 0) + return true; + } + + return false; +} + +/*---------------------------------------------------------------------------------------------- + Get the bounding box coordinates based on the given tables and Glyph ID + Handles both simple and composite glyphs. + Return true if successful, false otherwise. On false, all point values will be INT_MIN + False may indicate a white space glyph +----------------------------------------------------------------------------------------------*/ +bool GlyfBox(gid16 nGlyphId, const void * pGlyf, const void * pLoca, + size_t lGlyfSize, size_t lLocaSize, const void * pHead, int & xMin, int & yMin, int & xMax, int & yMax) +{ + xMin = yMin = xMax = yMax = INT_MIN; + + if (IsSpace(nGlyphId, pLoca, lLocaSize, pHead)) {return false;} + + void * pSimpleGlyf = GlyfLookup(nGlyphId, pGlyf, pLoca, lGlyfSize, lLocaSize, pHead); + if (pSimpleGlyf == NULL) {return false;} + + return GlyfBox(pSimpleGlyf, xMin, yMin, xMax, yMax); +} + +/*---------------------------------------------------------------------------------------------- + Get the number of contours based on the given tables and Glyph ID + Handles both simple and composite glyphs. + Return true if successful, false otherwise. On false, cnContours will be INT_MIN + False may indicate a white space glyph or a multi-level composite glyph. +----------------------------------------------------------------------------------------------*/ +bool GlyfContourCount(gid16 nGlyphId, const void * pGlyf, const void * pLoca, + size_t lGlyfSize, size_t lLocaSize, const void * pHead, size_t & cnContours) +{ + cnContours = static_cast<size_t>(INT_MIN); + + if (IsSpace(nGlyphId, pLoca, lLocaSize, pHead)) {return false;} + + void * pSimpleGlyf = GlyfLookup(nGlyphId, pGlyf, pLoca, lGlyfSize, lLocaSize, pHead); + if (pSimpleGlyf == NULL) {return false;} + + int cRtnContours = GlyfContourCount(pSimpleGlyf); + if (cRtnContours >= 0) + { + cnContours = size_t(cRtnContours); + return true; + } + + //handle composite glyphs + + int rgnCompId[kMaxGlyphComponents]; // assumes no glyph will be made of more than 8 components + size_t cCompIdTotal = kMaxGlyphComponents; + size_t cCompId = 0; + + if (!GetComponentGlyphIds(pSimpleGlyf, rgnCompId, cCompIdTotal, cCompId)) + return false; + + cRtnContours = 0; + int cTmp = 0; + for (size_t i = 0; i < cCompId; i++) + { + if (IsSpace(static_cast<gid16>(rgnCompId[i]), pLoca, lLocaSize, pHead)) {return false;} + pSimpleGlyf = GlyfLookup(static_cast<gid16>(rgnCompId[i]), + pGlyf, pLoca, lGlyfSize, lLocaSize, pHead); + if (pSimpleGlyf == 0) {return false;} + // return false on multi-level composite + if ((cTmp = GlyfContourCount(pSimpleGlyf)) < 0) + return false; + cRtnContours += cTmp; + } + + cnContours = size_t(cRtnContours); + return true; +} + +/*---------------------------------------------------------------------------------------------- + Get the point numbers for the end points of the glyph contours based on the given tables + and Glyph ID + Handles both simple and composite glyphs. + cnPoints - count of contours from GlyfContourCount (same as number of end points) + prgnContourEndPoints - should point to a buffer large enough to hold cnPoints integers + Return true if successful, false otherwise. On false, all end points are INT_MIN + False may indicate a white space glyph or a multi-level composite glyph. +----------------------------------------------------------------------------------------------*/ +bool GlyfContourEndPoints(gid16 nGlyphId, const void * pGlyf, const void * pLoca, + size_t lGlyfSize, size_t lLocaSize, const void * pHead, + int * prgnContourEndPoint, size_t cnPoints) +{ + memset(prgnContourEndPoint, 0xFF, cnPoints * sizeof(int)); + // std::fill_n(prgnContourEndPoint, cnPoints, INT_MIN); + + if (IsSpace(nGlyphId, pLoca, lLocaSize, pHead)) {return false;} + + void * pSimpleGlyf = GlyfLookup(nGlyphId, pGlyf, pLoca, lGlyfSize, lLocaSize, pHead); + if (pSimpleGlyf == NULL) {return false;} + + int cContours = GlyfContourCount(pSimpleGlyf); + int cActualPts = 0; + if (cContours > 0) + return GlyfContourEndPoints(pSimpleGlyf, prgnContourEndPoint, cnPoints, cActualPts); + + // handle composite glyphs + + int rgnCompId[kMaxGlyphComponents]; // assumes no glyph will be made of more than 8 components + size_t cCompIdTotal = kMaxGlyphComponents; + size_t cCompId = 0; + + if (!GetComponentGlyphIds(pSimpleGlyf, rgnCompId, cCompIdTotal, cCompId)) + return false; + + int * prgnCurrentEndPoint = prgnContourEndPoint; + int cCurrentPoints = cnPoints; + int nPrevPt = 0; + for (size_t i = 0; i < cCompId; i++) + { + if (IsSpace(static_cast<gid16>(rgnCompId[i]), pLoca, lLocaSize, pHead)) {return false;} + pSimpleGlyf = GlyfLookup(static_cast<gid16>(rgnCompId[i]), pGlyf, pLoca, lGlyfSize, lLocaSize, pHead); + if (pSimpleGlyf == NULL) {return false;} + // returns false on multi-level composite + if (!GlyfContourEndPoints(pSimpleGlyf, prgnCurrentEndPoint, cCurrentPoints, cActualPts)) + return false; + // points in composite are numbered sequentially as components are added + // must adjust end point numbers for new point numbers + for (int j = 0; j < cActualPts; j++) + prgnCurrentEndPoint[j] += nPrevPt; + nPrevPt = prgnCurrentEndPoint[cActualPts - 1] + 1; + + prgnCurrentEndPoint += cActualPts; + cCurrentPoints -= cActualPts; + } + + return true; +} + +/*---------------------------------------------------------------------------------------------- + Get the points for a glyph based on the given tables and Glyph ID + Handles both simple and composite glyphs. + cnPoints - count of points from largest end point obtained from GlyfContourEndPoints + prgnX & prgnY - should point to buffers large enough to hold cnPoints integers + The ranges are parallel so that coordinates for point(n) are found at offset n in + both ranges. These points are in absolute coordinates. + prgfOnCurve - should point to a buffer a large enough to hold cnPoints bytes (bool) + This range is parallel to the prgnX & prgnY + Return true if successful, false otherwise. On false, all points may be INT_MIN + False may indicate a white space glyph, a multi-level composite, or a corrupt font + It's not clear from the TTF spec when the transforms should be applied. Should the + transform be done before or after attachment point calcs? (current code - before) + Should the transform be applied to other offsets? (currently - no; however commented + out code is in place so that if CompoundGlyph::UnscaledOffset on the MS rasterizer is + clear (typical) then yes, and if CompoundGlyph::ScaledOffset on the Apple rasterizer is + clear (typical?) then no). See GetComponentTransform. + It's also unclear where point numbering with attachment poinst starts + (currently - first point number is relative to whole glyph, second point number is + relative to current glyph). +----------------------------------------------------------------------------------------------*/ +bool GlyfPoints(gid16 nGlyphId, const void * pGlyf, + const void * pLoca, size_t lGlyfSize, size_t lLocaSize, const void * pHead, + const int * /*prgnContourEndPoint*/, size_t /*cnEndPoints*/, + int * prgnX, int * prgnY, bool * prgfOnCurve, size_t cnPoints) +{ + memset(prgnX, 0x7F, cnPoints * sizeof(int)); + memset(prgnY, 0x7F, cnPoints * sizeof(int)); + + if (IsSpace(nGlyphId, pLoca, lLocaSize, pHead)) + return false; + + void * pSimpleGlyf = GlyfLookup(nGlyphId, pGlyf, pLoca, lGlyfSize, lLocaSize, pHead); + if (pSimpleGlyf == NULL) + return false; + + int cContours = GlyfContourCount(pSimpleGlyf); + int cActualPts; + if (cContours > 0) + { + if (!GlyfPoints(pSimpleGlyf, prgnX, prgnY, (char *)prgfOnCurve, cnPoints, cActualPts)) + return false; + CalcAbsolutePoints(prgnX, prgnY, cnPoints); + SimplifyFlags((char *)prgfOnCurve, cnPoints); + return true; + } + + // handle composite glyphs + int rgnCompId[kMaxGlyphComponents]; // assumes no glyph will be made of more than 8 components + size_t cCompIdTotal = kMaxGlyphComponents; + size_t cCompId = 0; + + // this will fail if there are more components than there is room for + if (!GetComponentGlyphIds(pSimpleGlyf, rgnCompId, cCompIdTotal, cCompId)) + return false; + + int * prgnCurrentX = prgnX; + int * prgnCurrentY = prgnY; + char * prgbCurrentFlag = (char *)prgfOnCurve; // converting bool to char should be safe + int cCurrentPoints = cnPoints; + bool fOffset = true, fTransOff = true; + int a, b; + float flt11, flt12, flt21, flt22; + // int * prgnPrevX = prgnX; // in case first att pt number relative to preceding glyph + // int * prgnPrevY = prgnY; + for (size_t i = 0; i < cCompId; i++) + { + if (IsSpace(static_cast<gid16>(rgnCompId[i]), pLoca, lLocaSize, pHead)) {return false;} + void * pCompGlyf = GlyfLookup(static_cast<gid16>(rgnCompId[i]), pGlyf, pLoca, lGlyfSize, lLocaSize, pHead); + if (pCompGlyf == NULL) {return false;} + // returns false on multi-level composite + if (!GlyfPoints(pCompGlyf, prgnCurrentX, prgnCurrentY, prgbCurrentFlag, + cCurrentPoints, cActualPts)) + return false; + if (!GetComponentPlacement(pSimpleGlyf, rgnCompId[i], fOffset, a, b)) + return false; + if (!GetComponentTransform(pSimpleGlyf, rgnCompId[i], + flt11, flt12, flt21, flt22, fTransOff)) + return false; + bool fIdTrans = flt11 == 1.0 && flt12 == 0.0 && flt21 == 0.0 && flt22 == 1.0; + + // convert points to absolute coordinates + // do before transform and attachment point placement are applied + CalcAbsolutePoints(prgnCurrentX, prgnCurrentY, cActualPts); + + // apply transform - see main method note above + // do before attachment point calcs + if (!fIdTrans) + for (int j = 0; j < cActualPts; j++) + { + int x = prgnCurrentX[j]; // store before transform applied + int y = prgnCurrentY[j]; + prgnCurrentX[j] = (int)(x * flt11 + y * flt12); + prgnCurrentY[j] = (int)(x * flt21 + y * flt22); + } + + // apply placement - see main method note above + int nXOff, nYOff; + if (fOffset) // explicit x & y offsets + { + /* ignore fTransOff for now + if (fTransOff && !fIdTrans) + { // transform x & y offsets + nXOff = (int)(a * flt11 + b * flt12); + nYOff = (int)(a * flt21 + b * flt22); + } + else */ + { // don't transform offset + nXOff = a; + nYOff = b; + } + } + else // attachment points + { // in case first point is relative to preceding glyph and second relative to current + // nXOff = prgnPrevX[a] - prgnCurrentX[b]; + // nYOff = prgnPrevY[a] - prgnCurrentY[b]; + // first point number relative to whole composite, second relative to current glyph + nXOff = prgnX[a] - prgnCurrentX[b]; + nYOff = prgnY[a] - prgnCurrentY[b]; + } + for (int j = 0; j < cActualPts; j++) + { + prgnCurrentX[j] += nXOff; + prgnCurrentY[j] += nYOff; + } + + // prgnPrevX = prgnCurrentX; + // prgnPrevY = prgnCurrentY; + prgnCurrentX += cActualPts; + prgnCurrentY += cActualPts; + prgbCurrentFlag += cActualPts; + cCurrentPoints -= cActualPts; + } + + SimplifyFlags((char *)prgfOnCurve, cnPoints); + + return true; +} + +/*---------------------------------------------------------------------------------------------- + Simplify the meaning of flags to just indicate whether point is on-curve or off-curve. +---------------------------------------------------------------------------------------------*/ +bool SimplifyFlags(char * prgbFlags, int cnPoints) +{ + for (int i = 0; i < cnPoints; i++) + prgbFlags[i] = static_cast<char>(prgbFlags[i] & Sfnt::SimpleGlyph::OnCurve); + return true; +} + +/*---------------------------------------------------------------------------------------------- + Convert relative point coordinates to absolute coordinates + Points are stored in the font such that they are offsets from one another except for the + first point of a glyph. +---------------------------------------------------------------------------------------------*/ +bool CalcAbsolutePoints(int * prgnX, int * prgnY, int cnPoints) +{ + int nX = prgnX[0]; + int nY = prgnY[0]; + for (int i = 1; i < cnPoints; i++) + { + prgnX[i] += nX; + nX = prgnX[i]; + prgnY[i] += nY; + nY = prgnY[i]; + } + + return true; +} +#endif + +/*---------------------------------------------------------------------------------------------- + Return the length of the 'name' table in bytes. + Currently used. +---------------------------------------------------------------------------------------------*/ +#if 0 +size_t NameTableLength(const byte * pTable) +{ + byte * pb = (const_cast<byte *>(pTable)) + 2; // skip format + size_t cRecords = *pb++ << 8; cRecords += *pb++; + int dbStringOffset0 = (*pb++) << 8; dbStringOffset0 += *pb++; + int dbMaxStringOffset = 0; + for (size_t irec = 0; irec < cRecords; irec++) + { + int nPlatform = (*pb++) << 8; nPlatform += *pb++; + int nEncoding = (*pb++) << 8; nEncoding += *pb++; + int nLanguage = (*pb++) << 8; nLanguage += *pb++; + int nName = (*pb++) << 8; nName += *pb++; + int cbStringLen = (*pb++) << 8; cbStringLen += *pb++; + int dbStringOffset = (*pb++) << 8; dbStringOffset += *pb++; + if (dbMaxStringOffset < dbStringOffset + cbStringLen) + dbMaxStringOffset = dbStringOffset + cbStringLen; + } + return dbStringOffset0 + dbMaxStringOffset; +} +#endif + +} // end of namespace TtfUtil +} // end of namespace graphite diff --git a/thirdparty/graphite/src/UtfCodec.cpp b/thirdparty/graphite/src/UtfCodec.cpp new file mode 100644 index 0000000000..a944bbf9d0 --- /dev/null +++ b/thirdparty/graphite/src/UtfCodec.cpp @@ -0,0 +1,45 @@ +/* GRAPHITE2 LICENSING + + Copyright 2010, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ +#include "inc/UtfCodec.h" +//using namespace graphite2; + +namespace graphite2 { + +} + +using namespace graphite2; + +const int8 _utf_codec<8>::sz_lut[16] = +{ + 1,1,1,1,1,1,1,1, // 1 byte + 0,0,0,0, // trailing byte + 2,2, // 2 bytes + 3, // 3 bytes + 4 // 4 bytes +}; + +const byte _utf_codec<8>::mask_lut[5] = {0x7f, 0xff, 0x3f, 0x1f, 0x0f}; diff --git a/thirdparty/graphite/src/call_machine.cpp b/thirdparty/graphite/src/call_machine.cpp new file mode 100644 index 0000000000..fcd8a0c2c1 --- /dev/null +++ b/thirdparty/graphite/src/call_machine.cpp @@ -0,0 +1,138 @@ +/* GRAPHITE2 LICENSING + + Copyright 2010, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ +// This call threaded interpreter implmentation for machine.h +// Author: Tim Eves + +// Build either this interpreter or the direct_machine implementation. +// The call threaded interpreter is portable across compilers and +// architectures as well as being useful to debug (you can set breakpoints on +// opcodes) but is slower that the direct threaded interpreter by a factor of 2 + +#include <cassert> +#include <cstring> +#include <graphite2/Segment.h> +#include "inc/Machine.h" +#include "inc/Segment.h" +#include "inc/Slot.h" +#include "inc/Rule.h" + +// Disable the unused parameter warning as th compiler is mistaken since dp +// is always updated (even if by 0) on every opcode. +#ifdef __GNUC__ +#pragma GCC diagnostic ignored "-Wunused-parameter" +#endif + +#define registers const byte * & dp, vm::Machine::stack_t * & sp, \ + vm::Machine::stack_t * const sb, regbank & reg + +// These are required by opcodes.h and should not be changed +#define STARTOP(name) bool name(registers) REGPARM(4);\ + bool name(registers) { +#define ENDOP return (sp - sb)/Machine::STACK_MAX==0; \ + } + +#define EXIT(status) { push(status); return false; } + +// This is required by opcode_table.h +#define do_(name) instr(name) + + +using namespace graphite2; +using namespace vm; + +struct regbank { + slotref is; + slotref * map; + SlotMap & smap; + slotref * const map_base; + const instr * & ip; + uint8 direction; + int8 flags; + Machine::status_t & status; +}; + +typedef bool (* ip_t)(registers); + +// Pull in the opcode definitions +// We pull these into a private namespace so these otherwise common names dont +// pollute the toplevel namespace. +namespace { +#define smap reg.smap +#define seg smap.segment +#define is reg.is +#define ip reg.ip +#define map reg.map +#define mapb reg.map_base +#define flags reg.flags +#define dir reg.direction +#define status reg.status + +#include "inc/opcodes.h" + +#undef smap +#undef seg +#undef is +#undef ip +#undef map +#undef mapb +#undef flags +#undef dir +} + +Machine::stack_t Machine::run(const instr * program, + const byte * data, + slotref * & map) + +{ + assert(program != 0); + + // Declare virtual machine registers + const instr * ip = program-1; + const byte * dp = data; + stack_t * sp = _stack + Machine::STACK_GUARD, + * const sb = sp; + regbank reg = {*map, map, _map, _map.begin()+_map.context(), ip, _map.dir(), 0, _status}; + + // Run the program + while ((reinterpret_cast<ip_t>(*++ip))(dp, sp, sb, reg)) {} + const stack_t ret = sp == _stack+STACK_GUARD+1 ? *sp-- : 0; + + check_final_stack(sp); + map = reg.map; + *map = reg.is; + return ret; +} + +// Pull in the opcode table +namespace { +#include "inc/opcode_table.h" +} + +const opcode_t * Machine::getOpcodeTable() throw() +{ + return opcode_table; +} diff --git a/thirdparty/graphite/src/direct_machine.cpp b/thirdparty/graphite/src/direct_machine.cpp new file mode 100644 index 0000000000..86206cfe37 --- /dev/null +++ b/thirdparty/graphite/src/direct_machine.cpp @@ -0,0 +1,140 @@ +/* GRAPHITE2 LICENSING + + Copyright 2010, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ +// This direct threaded interpreter implmentation for machine.h +// Author: Tim Eves + +// Build either this interpreter or the call_machine implementation. +// The direct threaded interpreter is relies upon a gcc feature called +// labels-as-values so is only portable to compilers that support the +// extension (gcc only as far as I know) however it should build on any +// architecture gcc supports. +// This is twice as fast as the call threaded model and is likely faster on +// inorder processors with short pipelines and little branch prediction such +// as the ARM and possibly Atom chips. + + +#include <cassert> +#include <cstring> +#include "inc/Machine.h" +#include "inc/Segment.h" +#include "inc/Slot.h" +#include "inc/Rule.h" + +#define STARTOP(name) name: { +#define ENDOP }; goto *((sp - sb)/Machine::STACK_MAX ? &&end : *++ip); +#define EXIT(status) { push(status); goto end; } + +#define do_(name) &&name + + +using namespace graphite2; +using namespace vm; + +namespace { + +// The GCC manual has this to say about labels as values: +// The &&foo expressions for the same label might have different values +// if the containing function is inlined or cloned. If a program relies +// on them being always the same, __attribute__((__noinline__,__noclone__)) +// should be used to prevent inlining and cloning. +// +// is_return in Code.cpp relies on being able to do comparisons, so it needs +// them to be always the same. +// +// The GCC manual further adds: +// If &&foo is used in a static variable initializer, inlining and +// cloning is forbidden. +// +// In this file, &&foo *is* used in a static variable initializer, and it's not +// entirely clear whether this should prevent inlining of the function or not. +// In practice, though, clang 7 can end up inlining the function with ThinLTO, +// which breaks at least is_return. https://bugs.llvm.org/show_bug.cgi?id=39241 +// So all in all, we need at least the __noinline__ attribute. __noclone__ +// is not supported by clang. +__attribute__((__noinline__)) +const void * direct_run(const bool get_table_mode, + const instr * program, + const byte * data, + Machine::stack_t * stack, + slotref * & __map, + uint8 _dir, + Machine::status_t & status, + SlotMap * __smap=0) +{ + // We need to define and return to opcode table from within this function + // other inorder to take the addresses of the instruction bodies. + #include "inc/opcode_table.h" + if (get_table_mode) + return opcode_table; + + // Declare virtual machine registers + const instr * ip = program; + const byte * dp = data; + Machine::stack_t * sp = stack + Machine::STACK_GUARD, + * const sb = sp; + SlotMap & smap = *__smap; + Segment & seg = smap.segment; + slotref is = *__map, + * map = __map, + * const mapb = smap.begin()+smap.context(); + uint8 dir = _dir; + int8 flags = 0; + + // start the program + goto **ip; + + // Pull in the opcode definitions + #include "inc/opcodes.h" + + end: + __map = map; + *__map = is; + return sp; +} + +} + +const opcode_t * Machine::getOpcodeTable() throw() +{ + slotref * dummy; + Machine::status_t dumstat = Machine::finished; + return static_cast<const opcode_t *>(direct_run(true, 0, 0, 0, dummy, 0, dumstat)); +} + + +Machine::stack_t Machine::run(const instr * program, + const byte * data, + slotref * & is) +{ + assert(program != 0); + + const stack_t *sp = static_cast<const stack_t *>( + direct_run(false, program, data, _stack, is, _map.dir(), _status, &_map)); + const stack_t ret = sp == _stack+STACK_GUARD+1 ? *sp-- : 0; + check_final_stack(sp); + return ret; +} diff --git a/thirdparty/graphite/src/gr_char_info.cpp b/thirdparty/graphite/src/gr_char_info.cpp new file mode 100644 index 0000000000..612f9ba694 --- /dev/null +++ b/thirdparty/graphite/src/gr_char_info.cpp @@ -0,0 +1,65 @@ +/* GRAPHITE2 LICENSING + + Copyright 2010, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ +#include <cassert> +#include "graphite2/Segment.h" +#include "inc/CharInfo.h" + +extern "C" +{ + +unsigned int gr_cinfo_unicode_char(const gr_char_info* p/*not NULL*/) +{ + assert(p); + return p->unicodeChar(); +} + + +int gr_cinfo_break_weight(const gr_char_info* p/*not NULL*/) +{ + assert(p); + return p->breakWeight(); +} + +int gr_cinfo_after(const gr_char_info *p/*not NULL*/) +{ + assert(p); + return p->after(); +} + +int gr_cinfo_before(const gr_char_info *p/*not NULL*/) +{ + assert(p); + return p->before(); +} + +size_t gr_cinfo_base(const gr_char_info *p/*not NULL*/) +{ + assert(p); + return p->base(); +} + +} // extern "C" diff --git a/thirdparty/graphite/src/gr_face.cpp b/thirdparty/graphite/src/gr_face.cpp new file mode 100644 index 0000000000..baa469727b --- /dev/null +++ b/thirdparty/graphite/src/gr_face.cpp @@ -0,0 +1,267 @@ +/* GRAPHITE2 LICENSING + + Copyright 2010, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ +#include "graphite2/Font.h" +#include "inc/Face.h" +#include "inc/FileFace.h" +#include "inc/GlyphCache.h" +#include "inc/CmapCache.h" +#include "inc/Silf.h" +#include "inc/json.h" + +using namespace graphite2; + +#if !defined GRAPHITE2_NTRACING +extern json *global_log; +#endif + +namespace +{ + bool load_face(Face & face, unsigned int options) + { +#ifdef GRAPHITE2_TELEMETRY + telemetry::category _misc_cat(face.tele.misc); +#endif + Face::Table silf(face, Tag::Silf, 0x00050000); + if (!silf) + return false; + + if (!face.readGlyphs(options)) + return false; + + if (silf) + { + if (!face.readFeatures() || !face.readGraphite(silf)) + { +#if !defined GRAPHITE2_NTRACING + if (global_log) + { + *global_log << json::object + << "type" << "fontload" + << "failure" << face.error() + << "context" << face.error_context() + << json::close; + } +#endif + return false; + } + else + return true; + } + else + return false; + } + + inline + uint32 zeropad(const uint32 x) + { + if (x == 0x20202020) return 0; + if ((x & 0x00FFFFFF) == 0x00202020) return x & 0xFF000000; + if ((x & 0x0000FFFF) == 0x00002020) return x & 0xFFFF0000; + if ((x & 0x000000FF) == 0x00000020) return x & 0xFFFFFF00; + return x; + } +} + +extern "C" { + +gr_face* gr_make_face_with_ops(const void* appFaceHandle/*non-NULL*/, const gr_face_ops *ops, unsigned int faceOptions) + //the appFaceHandle must stay alive all the time when the gr_face is alive. When finished with the gr_face, call destroy_face +{ + if (ops == 0) return 0; + + Face *res = new Face(appFaceHandle, *ops); + if (res && load_face(*res, faceOptions)) + return static_cast<gr_face *>(res); + + delete res; + return 0; +} + +gr_face* gr_make_face(const void* appFaceHandle/*non-NULL*/, gr_get_table_fn tablefn, unsigned int faceOptions) +{ + const gr_face_ops ops = {sizeof(gr_face_ops), tablefn, NULL}; + return gr_make_face_with_ops(appFaceHandle, &ops, faceOptions); +} + + +gr_face* gr_make_face_with_seg_cache_and_ops(const void* appFaceHandle/*non-NULL*/, const gr_face_ops *ops, unsigned int , unsigned int faceOptions) +{ + return gr_make_face_with_ops(appFaceHandle, ops, faceOptions); +} + +gr_face* gr_make_face_with_seg_cache(const void* appFaceHandle/*non-NULL*/, gr_get_table_fn tablefn, unsigned int, unsigned int faceOptions) +{ + const gr_face_ops ops = {sizeof(gr_face_ops), tablefn, NULL}; + return gr_make_face_with_ops(appFaceHandle, &ops, faceOptions); +} + +gr_uint32 gr_str_to_tag(const char *str) +{ + uint32 res = 0; + switch(max(strlen(str),size_t(4))) + { + case 4: res |= str[3]; GR_FALLTHROUGH; + case 3: res |= str[2] << 8; GR_FALLTHROUGH; + case 2: res |= str[1] << 16; GR_FALLTHROUGH; + case 1: res |= str[0] << 24; GR_FALLTHROUGH; + default: break; + } + return res; +} + +void gr_tag_to_str(gr_uint32 tag, char *str) +{ + if (!str) return; + + *str++ = char(tag >> 24); + *str++ = char(tag >> 16); + *str++ = char(tag >> 8); + *str++ = char(tag); + *str = '\0'; +} + +gr_feature_val* gr_face_featureval_for_lang(const gr_face* pFace, gr_uint32 langname/*0 means clone default*/) //clones the features. if none for language, clones the default +{ + assert(pFace); + langname = zeropad(langname); + return static_cast<gr_feature_val *>(pFace->theSill().cloneFeatures(langname)); +} + + +const gr_feature_ref* gr_face_find_fref(const gr_face* pFace, gr_uint32 featId) //When finished with the FeatureRef, call destroy_FeatureRef +{ + assert(pFace); + featId = zeropad(featId); + const FeatureRef* pRef = pFace->featureById(featId); + return static_cast<const gr_feature_ref*>(pRef); +} + +unsigned short gr_face_n_fref(const gr_face* pFace) +{ + assert(pFace); + int res = 0; + for (int i = 0; i < pFace->numFeatures(); ++i) + if (!(pFace->feature(i)->getFlags() & FeatureRef::HIDDEN)) + ++res; + return res; +} + +const gr_feature_ref* gr_face_fref(const gr_face* pFace, gr_uint16 i) //When finished with the FeatureRef, call destroy_FeatureRef +{ + assert(pFace); + int count = 0; + for (int j = 0; j < pFace->numFeatures(); ++j) + { + const FeatureRef* pRef = pFace->feature(j); + if (!(pRef->getFlags() & FeatureRef::HIDDEN)) + if (count++ == i) + return static_cast<const gr_feature_ref*>(pRef); + } + return 0; +} + +unsigned short gr_face_n_languages(const gr_face* pFace) +{ + assert(pFace); + return pFace->theSill().numLanguages(); +} + +gr_uint32 gr_face_lang_by_index(const gr_face* pFace, gr_uint16 i) +{ + assert(pFace); + return pFace->theSill().getLangName(i); +} + + +void gr_face_destroy(gr_face *face) +{ + delete static_cast<Face*>(face); +} + + +gr_uint16 gr_face_name_lang_for_locale(gr_face *face, const char * locale) +{ + if (face) + { + return face->languageForLocale(locale); + } + return 0; +} + +unsigned short gr_face_n_glyphs(const gr_face* pFace) +{ + return pFace->glyphs().numGlyphs(); +} + +const gr_faceinfo *gr_face_info(const gr_face *pFace, gr_uint32 script) +{ + if (!pFace) return 0; + const Silf *silf = pFace->chooseSilf(script); + if (silf) return silf->silfInfo(); + return 0; +} + +int gr_face_is_char_supported(const gr_face* pFace, gr_uint32 usv, gr_uint32 script) +{ + const Cmap & cmap = pFace->cmap(); + gr_uint16 gid = cmap[usv]; + if (!gid) + { + const Silf * silf = pFace->chooseSilf(script); + gid = silf->findPseudo(usv); + } + return (gid != 0); +} + +#ifndef GRAPHITE2_NFILEFACE +gr_face* gr_make_file_face(const char *filename, unsigned int faceOptions) +{ + FileFace* pFileFace = new FileFace(filename); + if (*pFileFace) + { + gr_face* pRes = gr_make_face_with_ops(pFileFace, &FileFace::ops, faceOptions); + if (pRes) + { + pRes->takeFileFace(pFileFace); //takes ownership + return pRes; + } + } + + //error when loading + + delete pFileFace; + return NULL; +} + +gr_face* gr_make_file_face_with_seg_cache(const char* filename, unsigned int, unsigned int faceOptions) //returns NULL on failure. //TBD better error handling + //when finished with, call destroy_face +{ + return gr_make_file_face(filename, faceOptions); +} +#endif //!GRAPHITE2_NFILEFACE + +} // extern "C" diff --git a/thirdparty/graphite/src/gr_features.cpp b/thirdparty/graphite/src/gr_features.cpp new file mode 100644 index 0000000000..a560e053f2 --- /dev/null +++ b/thirdparty/graphite/src/gr_features.cpp @@ -0,0 +1,138 @@ +/* GRAPHITE2 LICENSING + + Copyright 2010, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ +#include "graphite2/Font.h" +#include "inc/Face.h" +#include "inc/FeatureMap.h" +#include "inc/FeatureVal.h" +#include "inc/NameTable.h" + +using namespace graphite2; + +extern "C" { + + +gr_uint16 gr_fref_feature_value(const gr_feature_ref* pfeatureref, const gr_feature_val* feats) //returns 0 if either pointer is NULL +{ + if (!pfeatureref || !feats) return 0; + + return pfeatureref->getFeatureVal(*feats); +} + + +int gr_fref_set_feature_value(const gr_feature_ref* pfeatureref, gr_uint16 val, gr_feature_val* pDest) +{ + if (!pfeatureref || !pDest) return 0; + + return pfeatureref->applyValToFeature(val, *pDest); +} + + +gr_uint32 gr_fref_id(const gr_feature_ref* pfeatureref) //returns 0 if pointer is NULL +{ + if (!pfeatureref) + return 0; + + return pfeatureref->getId(); +} + + +gr_uint16 gr_fref_n_values(const gr_feature_ref* pfeatureref) +{ + if(!pfeatureref) + return 0; + return pfeatureref->getNumSettings(); +} + + +gr_int16 gr_fref_value(const gr_feature_ref* pfeatureref, gr_uint16 settingno) +{ + if(!pfeatureref || (settingno >= pfeatureref->getNumSettings())) + { + return 0; + } + return pfeatureref->getSettingValue(settingno); +} + + +void* gr_fref_label(const gr_feature_ref* pfeatureref, gr_uint16 *langId, gr_encform utf, gr_uint32 *length) +{ + if(!pfeatureref) + { + langId = 0; + length = 0; + return NULL; + } + uint16 label = pfeatureref->getNameId(); + NameTable * names = pfeatureref->getFace().nameTable(); + if (!names) + { + langId = 0; + length = 0; + return NULL; + } + return names->getName(*langId, label, utf, *length); +} + + +void* gr_fref_value_label(const gr_feature_ref*pfeatureref, gr_uint16 setting, + gr_uint16 *langId, gr_encform utf, gr_uint32 *length) +{ + if(!pfeatureref || (setting >= pfeatureref->getNumSettings())) + { + langId = 0; + length = 0; + return NULL; + } + uint16 label = pfeatureref->getSettingName(setting); + NameTable * names = pfeatureref->getFace().nameTable(); + if (!names) + { + langId = 0; + length = 0; + return NULL; + } + return names->getName(*langId, label, utf, *length); +} + + +void gr_label_destroy(void * label) +{ + free(label); +} + +gr_feature_val* gr_featureval_clone(const gr_feature_val* pfeatures/*may be NULL*/) +{ //When finished with the Features, call features_destroy + return static_cast<gr_feature_val*>(pfeatures ? new Features(*pfeatures) : new Features); +} + +void gr_featureval_destroy(gr_feature_val *p) +{ + delete static_cast<Features*>(p); +} + + +} // extern "C" diff --git a/thirdparty/graphite/src/gr_font.cpp b/thirdparty/graphite/src/gr_font.cpp new file mode 100644 index 0000000000..724cc83c13 --- /dev/null +++ b/thirdparty/graphite/src/gr_font.cpp @@ -0,0 +1,74 @@ +/* GRAPHITE2 LICENSING + + Copyright 2010, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ +#include "graphite2/Font.h" +#include "inc/Font.h" + + +using namespace graphite2; + +extern "C" { + +void gr_engine_version(int *nMajor, int *nMinor, int *nBugFix) +{ + if (nMajor) *nMajor = GR2_VERSION_MAJOR; + if (nMinor) *nMinor = GR2_VERSION_MINOR; + if (nBugFix) *nBugFix = GR2_VERSION_BUGFIX; +} + +gr_font* gr_make_font(float ppm/*pixels per em*/, const gr_face *face) +{ + return gr_make_font_with_advance_fn(ppm, 0, 0, face); +} + + +gr_font* gr_make_font_with_ops(float ppm/*pixels per em*/, const void* appFontHandle/*non-NULL*/, const gr_font_ops * font_ops, const gr_face * face/*needed for scaling*/) +{ //the appFontHandle must stay alive all the time when the gr_font is alive. When finished with the gr_font, call destroy_gr_font + if (face == 0 || ppm <= 0) return 0; + + Font * const res = new Font(ppm, *face, appFontHandle, font_ops); + if (*res) + return static_cast<gr_font*>(res); + else + { + delete res; + return 0; + } +} + +gr_font* gr_make_font_with_advance_fn(float ppm/*pixels per em*/, const void* appFontHandle/*non-NULL*/, gr_advance_fn getAdvance, const gr_face * face/*needed for scaling*/) +{ + const gr_font_ops ops = {sizeof(gr_font_ops), getAdvance, NULL}; + return gr_make_font_with_ops(ppm, appFontHandle, &ops, face); +} + +void gr_font_destroy(gr_font *font) +{ + delete static_cast<Font*>(font); +} + + +} // extern "C" diff --git a/thirdparty/graphite/src/gr_logging.cpp b/thirdparty/graphite/src/gr_logging.cpp new file mode 100644 index 0000000000..8f1e675e62 --- /dev/null +++ b/thirdparty/graphite/src/gr_logging.cpp @@ -0,0 +1,267 @@ +/* GRAPHITE2 LICENSING + + Copyright 2010, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ +#include <cstdio> + +#include "graphite2/Log.h" +#include "inc/debug.h" +#include "inc/CharInfo.h" +#include "inc/Slot.h" +#include "inc/Segment.h" +#include "inc/json.h" +#include "inc/Collider.h" + +#if defined _WIN32 +#include "windows.h" +#endif + +using namespace graphite2; + +#if !defined GRAPHITE2_NTRACING +json *global_log = 0; +#endif + +extern "C" { + +bool gr_start_logging(GR_MAYBE_UNUSED gr_face * face, const char *log_path) +{ + if (!log_path) return false; + +#if !defined GRAPHITE2_NTRACING + gr_stop_logging(face); +#if defined _WIN32 + int n = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, log_path, -1, 0, 0); + if (n == 0 || n > MAX_PATH - 12) return false; + + LPWSTR wlog_path = gralloc<WCHAR>(n); + if (!wlog_path) return false; + FILE *log = 0; + if (wlog_path && MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, log_path, -1, wlog_path, n)) + log = _wfopen(wlog_path, L"wt"); + + free(wlog_path); +#else // _WIN32 + FILE *log = fopen(log_path, "wt"); +#endif // _WIN32 + if (!log) return false; + + if (face) + { + face->setLogger(log); + if (!face->logger()) return false; + + *face->logger() << json::array; +#ifdef GRAPHITE2_TELEMETRY + *face->logger() << face->tele; +#endif + } + else + { + global_log = new json(log); + *global_log << json::array; + } + + return true; +#else // GRAPHITE2_NTRACING + return false; +#endif // GRAPHITE2_NTRACING +} + +bool graphite_start_logging(FILE * /* log */, GrLogMask /* mask */) +{ +//#if !defined GRAPHITE2_NTRACING +// graphite_stop_logging(); +// +// if (!log) return false; +// +// dbgout = new json(log); +// if (!dbgout) return false; +// +// *dbgout << json::array; +// return true; +//#else + return false; +//#endif +} + +void gr_stop_logging(GR_MAYBE_UNUSED gr_face * face) +{ +#if !defined GRAPHITE2_NTRACING + if (face && face->logger()) + { + FILE * log = face->logger()->stream(); + face->setLogger(0); + fclose(log); + } + else if (!face && global_log) + { + FILE * log = global_log->stream(); + delete global_log; + global_log = 0; + fclose(log); + } +#endif +} + +void graphite_stop_logging() +{ +// if (dbgout) delete dbgout; +// dbgout = 0; +} + +} // extern "C" + +#ifdef GRAPHITE2_TELEMETRY +size_t * graphite2::telemetry::_category = 0UL; +#endif + +#if !defined GRAPHITE2_NTRACING + +#ifdef GRAPHITE2_TELEMETRY + +json & graphite2::operator << (json & j, const telemetry & t) throw() +{ + j << json::object + << "type" << "telemetry" + << "silf" << t.silf + << "states" << t.states + << "starts" << t.starts + << "transitions" << t.transitions + << "glyphs" << t.glyph + << "code" << t.code + << "misc" << t.misc + << "total" << (t.silf + t.states + t.starts + t.transitions + t.glyph + t.code + t.misc) + << json::close; + return j; +} +#else +json & graphite2::operator << (json & j, const telemetry &) throw() +{ + return j; +} +#endif + + +json & graphite2::operator << (json & j, const CharInfo & ci) throw() +{ + return j << json::object + << "offset" << ci.base() + << "unicode" << ci.unicodeChar() + << "break" << ci.breakWeight() + << "flags" << ci.flags() + << "slot" << json::flat << json::object + << "before" << ci.before() + << "after" << ci.after() + << json::close + << json::close; +} + + +json & graphite2::operator << (json & j, const dslot & ds) throw() +{ + assert(ds.first); + assert(ds.second); + const Segment & seg = *ds.first; + const Slot & s = *ds.second; + const SlotCollision *cslot = seg.collisionInfo(ds.second); + + j << json::object + << "id" << objectid(ds) + << "gid" << s.gid() + << "charinfo" << json::flat << json::object + << "original" << s.original() + << "before" << s.before() + << "after" << s.after() + << json::close + << "origin" << s.origin() + << "shift" << Position(float(s.getAttr(0, gr_slatShiftX, 0)), + float(s.getAttr(0, gr_slatShiftY, 0))) + << "advance" << s.advancePos() + << "insert" << s.isInsertBefore() + << "break" << s.getAttr(&seg, gr_slatBreak, 0); + if (s.just() > 0) + j << "justification" << s.just(); + if (s.getBidiLevel() > 0) + j << "bidi" << s.getBidiLevel(); + if (!s.isBase()) + j << "parent" << json::flat << json::object + << "id" << objectid(dslot(&seg, s.attachedTo())) + << "level" << s.getAttr(0, gr_slatAttLevel, 0) + << "offset" << s.attachOffset() + << json::close; + j << "user" << json::flat << json::array; + for (int n = 0; n!= seg.numAttrs(); ++n) + j << s.userAttrs()[n]; + j << json::close; + if (s.firstChild()) + { + j << "children" << json::flat << json::array; + for (const Slot *c = s.firstChild(); c; c = c->nextSibling()) + j << objectid(dslot(&seg, c)); + j << json::close; + } + if (cslot) + { + // Note: the reason for using Positions to lump together related attributes is to make the + // JSON output slightly more compact. + j << "collision" << json::flat << json::object +// << "shift" << cslot->shift() -- not used pass level, only within the collision routine itself + << "offset" << cslot->offset() + << "limit" << cslot->limit() + << "flags" << cslot->flags() + << "margin" << Position(cslot->margin(), cslot->marginWt()) + << "exclude" << cslot->exclGlyph() + << "excludeoffset" << cslot->exclOffset(); + if (cslot->seqOrder() != 0) + { + j << "seqclass" << Position(cslot->seqClass(), cslot->seqProxClass()) + << "seqorder" << cslot->seqOrder() + << "seqabove" << Position(cslot->seqAboveXoff(), cslot->seqAboveWt()) + << "seqbelow" << Position(cslot->seqBelowXlim(), cslot->seqBelowWt()) + << "seqvalign" << Position(cslot->seqValignHt(), cslot->seqValignWt()); + } + j << json::close; + } + return j << json::close; +} + + +graphite2::objectid::objectid(const dslot & ds) throw() +{ + const Slot * const p = ds.second; + uint32 s = uint32(reinterpret_cast<size_t>(p)); + sprintf(name, "%.4x-%.2x-%.4hx", uint16(s >> 16), uint16(p ? p->userAttrs()[ds.first->silf()->numUser()] : 0), uint16(s)); + name[sizeof name-1] = 0; +} + +graphite2::objectid::objectid(const Segment * const p) throw() +{ + uint32 s = uint32(reinterpret_cast<size_t>(p)); + sprintf(name, "%.4x-%.2x-%.4hx", uint16(s >> 16), 0, uint16(s)); + name[sizeof name-1] = 0; +} + +#endif diff --git a/thirdparty/graphite/src/gr_segment.cpp b/thirdparty/graphite/src/gr_segment.cpp new file mode 100644 index 0000000000..7a27e9c562 --- /dev/null +++ b/thirdparty/graphite/src/gr_segment.cpp @@ -0,0 +1,175 @@ +/* GRAPHITE2 LICENSING + + Copyright 2010, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ +#include "graphite2/Segment.h" +#include "inc/UtfCodec.h" +#include "inc/Segment.h" + +using namespace graphite2; + +namespace +{ + + gr_segment* makeAndInitialize(const Font *font, const Face *face, uint32 script, const Features* pFeats/*must not be NULL*/, gr_encform enc, const void* pStart, size_t nChars, int dir) + { + if (script == 0x20202020) script = 0; + else if ((script & 0x00FFFFFF) == 0x00202020) script = script & 0xFF000000; + else if ((script & 0x0000FFFF) == 0x00002020) script = script & 0xFFFF0000; + else if ((script & 0x000000FF) == 0x00000020) script = script & 0xFFFFFF00; + // if (!font) return NULL; + Segment* pRes=new Segment(nChars, face, script, dir); + + + if (!pRes->read_text(face, pFeats, enc, pStart, nChars) || !pRes->runGraphite()) + { + delete pRes; + return NULL; + } + pRes->finalise(font, true); + + return static_cast<gr_segment*>(pRes); + } + + template <typename utf_iter> + inline size_t count_unicode_chars(utf_iter first, const utf_iter last, const void **error) + { + size_t n_chars = 0; + uint32 usv = 0; + + if (last) + { + if (!first.validate(last)) + { + if (error) *error = last - 1; + return 0; + } + for (;first != last; ++first, ++n_chars) + if ((usv = *first) == 0 || first.error()) break; + } + else + { + while ((usv = *first) != 0 && !first.error()) + { + ++first; + ++n_chars; + } + } + + if (error) *error = first.error() ? first : 0; + return n_chars; + } +} + + +extern "C" { + +size_t gr_count_unicode_characters(gr_encform enc, const void* buffer_begin, const void* buffer_end/*don't go on or past end, If NULL then ignored*/, const void** pError) //Also stops on nul. Any nul is not in the count +{ + assert(buffer_begin); + + switch (enc) + { + case gr_utf8: return count_unicode_chars<utf8::const_iterator>(buffer_begin, buffer_end, pError); break; + case gr_utf16: return count_unicode_chars<utf16::const_iterator>(buffer_begin, buffer_end, pError); break; + case gr_utf32: return count_unicode_chars<utf32::const_iterator>(buffer_begin, buffer_end, pError); break; + default: return 0; + } +} + + +gr_segment* gr_make_seg(const gr_font *font, const gr_face *face, gr_uint32 script, const gr_feature_val* pFeats, gr_encform enc, const void* pStart, size_t nChars, int dir) +{ + if (!face) return nullptr; + + const gr_feature_val * tmp_feats = 0; + if (pFeats == 0) + pFeats = tmp_feats = static_cast<const gr_feature_val*>(face->theSill().cloneFeatures(0)); + gr_segment * seg = makeAndInitialize(font, face, script, pFeats, enc, pStart, nChars, dir); + delete static_cast<const FeatureVal*>(tmp_feats); + + return seg; +} + + +void gr_seg_destroy(gr_segment* p) +{ + delete static_cast<Segment*>(p); +} + + +float gr_seg_advance_X(const gr_segment* pSeg/*not NULL*/) +{ + assert(pSeg); + return pSeg->advance().x; +} + + +float gr_seg_advance_Y(const gr_segment* pSeg/*not NULL*/) +{ + assert(pSeg); + return pSeg->advance().y; +} + + +unsigned int gr_seg_n_cinfo(const gr_segment* pSeg/*not NULL*/) +{ + assert(pSeg); + return static_cast<unsigned int>(pSeg->charInfoCount()); +} + + +const gr_char_info* gr_seg_cinfo(const gr_segment* pSeg/*not NULL*/, unsigned int index/*must be <number_of_CharInfo*/) +{ + assert(pSeg); + return static_cast<const gr_char_info*>(pSeg->charinfo(index)); +} + +unsigned int gr_seg_n_slots(const gr_segment* pSeg/*not NULL*/) +{ + assert(pSeg); + return static_cast<unsigned int>(pSeg->slotCount()); +} + +const gr_slot* gr_seg_first_slot(gr_segment* pSeg/*not NULL*/) +{ + assert(pSeg); + return static_cast<const gr_slot*>(pSeg->first()); +} + +const gr_slot* gr_seg_last_slot(gr_segment* pSeg/*not NULL*/) +{ + assert(pSeg); + return static_cast<const gr_slot*>(pSeg->last()); +} + +float gr_seg_justify(gr_segment* pSeg/*not NULL*/, const gr_slot* pSlot/*not NULL*/, const gr_font *pFont, double width, enum gr_justFlags flags, const gr_slot *pFirst, const gr_slot *pLast) +{ + assert(pSeg); + assert(pSlot); + return pSeg->justify(const_cast<gr_slot *>(pSlot), pFont, float(width), justFlags(flags), const_cast<gr_slot *>(pFirst), const_cast<gr_slot *>(pLast)); +} + +} // extern "C" diff --git a/thirdparty/graphite/src/gr_slot.cpp b/thirdparty/graphite/src/gr_slot.cpp new file mode 100644 index 0000000000..a3c6b46a7f --- /dev/null +++ b/thirdparty/graphite/src/gr_slot.cpp @@ -0,0 +1,173 @@ +/* GRAPHITE2 LICENSING + + Copyright 2010, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ +#include "graphite2/Segment.h" +#include "inc/Segment.h" +#include "inc/Slot.h" +#include "inc/Font.h" + + +extern "C" { + + +const gr_slot* gr_slot_next_in_segment(const gr_slot* p/*not NULL*/) +{ + assert(p); + return static_cast<const gr_slot*>(p->next()); +} + +const gr_slot* gr_slot_prev_in_segment(const gr_slot* p/*not NULL*/) +{ + assert(p); + return static_cast<const gr_slot*>(p->prev()); +} + +const gr_slot* gr_slot_attached_to(const gr_slot* p/*not NULL*/) //returns NULL iff base. If called repeatedly on result, will get to a base +{ + assert(p); + return static_cast<const gr_slot*>(p->attachedTo()); +} + + +const gr_slot* gr_slot_first_attachment(const gr_slot* p/*not NULL*/) //returns NULL iff no attachments. +{ //if slot_first_attachment(p) is not NULL, then slot_attached_to(slot_first_attachment(p))==p. + assert(p); + return static_cast<const gr_slot*>(p->firstChild()); +} + + +const gr_slot* gr_slot_next_sibling_attachment(const gr_slot* p/*not NULL*/) //returns NULL iff no more attachments. +{ //if slot_next_sibling_attachment(p) is not NULL, then slot_attached_to(slot_next_sibling_attachment(p))==slot_attached_to(p). + assert(p); + return static_cast<const gr_slot*>(p->nextSibling()); +} + + +unsigned short gr_slot_gid(const gr_slot* p/*not NULL*/) +{ + assert(p); + return p->glyph(); +} + + +float gr_slot_origin_X(const gr_slot* p/*not NULL*/) +{ + assert(p); + return p->origin().x; +} + + +float gr_slot_origin_Y(const gr_slot* p/*not NULL*/) +{ + assert(p); + return p->origin().y; +} + + +float gr_slot_advance_X(const gr_slot* p/*not NULL*/, const gr_face *face, const gr_font *font) +{ + assert(p); + float scale = 1.0; + float res = p->advance(); + if (font) + { + scale = font->scale(); + int gid = p->glyph(); + if (face && font->isHinted() && gid < face->glyphs().numGlyphs()) + res = (res - face->glyphs().glyph(gid)->theAdvance().x) * scale + font->advance(gid); + else + res = res * scale; + } + return res; +} + +float gr_slot_advance_Y(const gr_slot *p/*not NULL*/, GR_MAYBE_UNUSED const gr_face *face, const gr_font *font) +{ + assert(p); + float res = p->advancePos().y; + if (font) + return res * font->scale(); + else + return res; +} + +int gr_slot_before(const gr_slot* p/*not NULL*/) +{ + assert(p); + return p->before(); +} + + +int gr_slot_after(const gr_slot* p/*not NULL*/) +{ + assert(p); + return p->after(); +} + +unsigned int gr_slot_index(const gr_slot *p/*not NULL*/) +{ + assert(p); + return p->index(); +} + +int gr_slot_attr(const gr_slot* p/*not NULL*/, const gr_segment* pSeg/*not NULL*/, gr_attrCode index, gr_uint8 subindex) +{ + assert(p); + return p->getAttr(pSeg, index, subindex); +} + + +int gr_slot_can_insert_before(const gr_slot* p/*not NULL*/) +{ + assert(p); + return (p->isInsertBefore())? 1 : 0; +} + + +int gr_slot_original(const gr_slot* p/*not NULL*/) +{ + assert(p); + return p->original(); +} + +void gr_slot_linebreak_before(gr_slot* p/*not NULL*/) +{ + assert(p); + gr_slot *prev = (gr_slot *)p->prev(); + prev->sibling(NULL); + prev->next(NULL); + p->prev(NULL); +} + +#if 0 //what should this be +size_t id(const gr_slot* p/*not NULL*/) +{ + return (size_t)p->id(); +} +#endif + + +} // extern "C" diff --git a/thirdparty/graphite/src/inc/CharInfo.h b/thirdparty/graphite/src/inc/CharInfo.h new file mode 100644 index 0000000000..01e7e31ac9 --- /dev/null +++ b/thirdparty/graphite/src/inc/CharInfo.h @@ -0,0 +1,66 @@ +/* GRAPHITE2 LICENSING + + Copyright 2010, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ +#pragma once +#include "inc/Main.h" + + +namespace graphite2 { + +class CharInfo +{ + +public: + CharInfo() : m_char(0), m_before(-1), m_after(-1), m_base(0), m_featureid(0), m_break(0), m_flags(0) {} + void init(int cid) { m_char = cid; } + unsigned int unicodeChar() const { return m_char; } + void feats(int offset) { m_featureid = offset; } + int fid() const { return m_featureid; } + int breakWeight() const { return m_break; } + void breakWeight(int val) { m_break = val; } + int after() const { return m_after; } + void after(int val) { m_after = val; } + int before() const { return m_before; } + void before(int val) { m_before = val; } + size_t base() const { return m_base; } + void base(size_t offset) { m_base = offset; } + void addflags(uint8 val) { m_flags |= val; } + uint8 flags() const { return m_flags; } + + CLASS_NEW_DELETE +private: + int m_char; // Unicode character from character stream + int m_before; // slot index before us, comes before + int m_after; // slot index after us, comes after + size_t m_base; // offset into input string corresponding to this charinfo + uint8 m_featureid; // index into features list in the segment + int8 m_break; // breakweight coming from lb table + uint8 m_flags; // 0,1 segment split. +}; + +} // namespace graphite2 + +struct gr_char_info : public graphite2::CharInfo {}; diff --git a/thirdparty/graphite/src/inc/CmapCache.h b/thirdparty/graphite/src/inc/CmapCache.h new file mode 100644 index 0000000000..7820c958b0 --- /dev/null +++ b/thirdparty/graphite/src/inc/CmapCache.h @@ -0,0 +1,82 @@ +/* GRAPHITE2 LICENSING + + Copyright 2010, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ +#pragma once + +#include "inc/Main.h" +#include "inc/Face.h" + +namespace graphite2 { + +class Face; + +class Cmap +{ +public: + + virtual ~Cmap() throw() {} + + virtual uint16 operator [] (const uint32) const throw() { return 0; } + + virtual operator bool () const throw() { return false; } + + CLASS_NEW_DELETE; +}; + +class DirectCmap : public Cmap +{ + DirectCmap(const DirectCmap &); + DirectCmap & operator = (const DirectCmap &); + +public: + DirectCmap(const Face &); + virtual uint16 operator [] (const uint32 usv) const throw(); + virtual operator bool () const throw(); + + CLASS_NEW_DELETE; +private: + const Face::Table _cmap; + const void * _smp, + * _bmp; +}; + +class CachedCmap : public Cmap +{ + CachedCmap(const CachedCmap &); + CachedCmap & operator = (const CachedCmap &); + +public: + CachedCmap(const Face &); + virtual ~CachedCmap() throw(); + virtual uint16 operator [] (const uint32 usv) const throw(); + virtual operator bool () const throw(); + CLASS_NEW_DELETE; +private: + bool m_isBmpOnly; + uint16 ** m_blocks; +}; + +} // namespace graphite2 diff --git a/thirdparty/graphite/src/inc/Code.h b/thirdparty/graphite/src/inc/Code.h new file mode 100644 index 0000000000..3cee67c81d --- /dev/null +++ b/thirdparty/graphite/src/inc/Code.h @@ -0,0 +1,171 @@ +/* GRAPHITE2 LICENSING + + Copyright 2010, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ +// This class represents loaded graphite stack machine code. It performs +// basic sanity checks, on the incoming code to prevent more obvious problems +// from crashing graphite. +// Author: Tim Eves + +#pragma once + +#include <cassert> +#include <graphite2/Types.h> +#include "inc/Main.h" +#include "inc/Machine.h" + +namespace graphite2 { + +class Silf; +class Face; + +enum passtype { + PASS_TYPE_UNKNOWN = 0, + PASS_TYPE_LINEBREAK, + PASS_TYPE_SUBSTITUTE, + PASS_TYPE_POSITIONING, + PASS_TYPE_JUSTIFICATION +}; + +namespace vm { + +class Machine::Code +{ +public: + enum status_t + { + loaded, + alloc_failed, + invalid_opcode, + unimplemented_opcode_used, + out_of_range_data, + jump_past_end, + arguments_exhausted, + missing_return, + nested_context_item, + underfull_stack + }; + +private: + class decoder; + + instr * _code; + byte * _data; + size_t _data_size, + _instr_count; + byte _max_ref; + mutable status_t _status; + bool _constraint, + _modify, + _delete; + mutable bool _own; + + void release_buffers() throw (); + void failure(const status_t) throw(); + +public: + static size_t estimateCodeDataOut(size_t num_bytecodes, int nRules, int nSlots); + + Code() throw(); + Code(bool is_constraint, const byte * bytecode_begin, const byte * const bytecode_end, + uint8 pre_context, uint16 rule_length, const Silf &, const Face &, + enum passtype pt, byte * * const _out = 0); + Code(const Machine::Code &) throw(); + ~Code() throw(); + + Code & operator=(const Code &rhs) throw(); + operator bool () const throw() { return _code && status() == loaded; } + status_t status() const throw() { return _status; } + bool constraint() const throw() { return _constraint; } + size_t dataSize() const throw() { return _data_size; } + size_t instructionCount() const throw() { return _instr_count; } + bool immutable() const throw() { return !(_delete || _modify); } + bool deletes() const throw() { return _delete; } + size_t maxRef() const throw() { return _max_ref; } + void externalProgramMoved(ptrdiff_t) throw(); + + int32 run(Machine &m, slotref * & map) const; + + CLASS_NEW_DELETE; +}; + +inline +size_t Machine::Code::estimateCodeDataOut(size_t n_bc, int nRules, int nSlots) +{ + // max is: all codes are instructions + 1 for each rule + max tempcopies + // allocate space for separate maximal code and data then merge them later + return (n_bc + nRules + nSlots) * sizeof(instr) + n_bc * sizeof(byte); +} + + +inline Machine::Code::Code() throw() +: _code(0), _data(0), _data_size(0), _instr_count(0), _max_ref(0), + _status(loaded), _constraint(false), _modify(false), _delete(false), + _own(false) +{ +} + +inline Machine::Code::Code(const Machine::Code &obj) throw () + : _code(obj._code), + _data(obj._data), + _data_size(obj._data_size), + _instr_count(obj._instr_count), + _max_ref(obj._max_ref), + _status(obj._status), + _constraint(obj._constraint), + _modify(obj._modify), + _delete(obj._delete), + _own(obj._own) +{ + obj._own = false; +} + +inline Machine::Code & Machine::Code::operator=(const Machine::Code &rhs) throw() { + if (_instr_count > 0) + release_buffers(); + _code = rhs._code; + _data = rhs._data; + _data_size = rhs._data_size; + _instr_count = rhs._instr_count; + _status = rhs._status; + _constraint = rhs._constraint; + _modify = rhs._modify; + _delete = rhs._delete; + _own = rhs._own; + rhs._own = false; + return *this; +} + +inline void Machine::Code::externalProgramMoved(ptrdiff_t dist) throw() +{ + if (_code && !_own) + { + _code += dist / signed(sizeof(instr)); + _data += dist; + } +} + +} // namespace vm +} // namespace graphite2 diff --git a/thirdparty/graphite/src/inc/Collider.h b/thirdparty/graphite/src/inc/Collider.h new file mode 100644 index 0000000000..71e8400501 --- /dev/null +++ b/thirdparty/graphite/src/inc/Collider.h @@ -0,0 +1,245 @@ +/* GRAPHITE2 LICENSING + + Copyright 2010, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ +#pragma once + +#include "inc/List.h" +#include "inc/Position.h" +#include "inc/Intervals.h" +#include "inc/debug.h" + +namespace graphite2 { + +class json; +class Slot; +class Segment; + +#define SLOTCOLSETUINTPROP(x, y) uint16 x() const { return _ ##x; } void y (uint16 v) { _ ##x = v; } +#define SLOTCOLSETINTPROP(x, y) int16 x() const { return _ ##x; } void y (int16 v) { _ ##x = v; } +#define SLOTCOLSETPOSITIONPROP(x, y) const Position &x() const { return _ ##x; } void y (const Position &v) { _ ##x = v; } + +// Slot attributes related to collision-fixing +class SlotCollision +{ +public: + enum { + // COLL_TESTONLY = 0, // default - test other glyphs for collision with this one, but don't move this one + COLL_FIX = 1, // fix collisions involving this glyph + COLL_IGNORE = 2, // ignore this glyph altogether + COLL_START = 4, // start of range of possible collisions + COLL_END = 8, // end of range of possible collisions + COLL_KERN = 16, // collisions with this glyph are fixed by adding kerning space after it + COLL_ISCOL = 32, // this glyph has a collision + COLL_KNOWN = 64, // we've figured out what's happening with this glyph + COLL_ISSPACE = 128, // treat this glyph as a space with regard to kerning + COLL_TEMPLOCK = 256, // Lock glyphs that have been given priority positioning + ////COLL_JUMPABLE = 128, // moving glyphs may jump this stationary glyph in any direction - DELETE + ////COLL_OVERLAP = 256, // use maxoverlap to restrict - DELETE + }; + + // Behavior for the collision.order attribute. To GDL this is an enum, to us it's a bitfield, with only 1 bit set + // Allows for easier inversion. + enum { + SEQ_ORDER_LEFTDOWN = 1, + SEQ_ORDER_RIGHTUP = 2, + SEQ_ORDER_NOABOVE = 4, + SEQ_ORDER_NOBELOW = 8, + SEQ_ORDER_NOLEFT = 16, + SEQ_ORDER_NORIGHT = 32 + }; + + SlotCollision(Segment *seg, Slot *slot); + void initFromSlot(Segment *seg, Slot *slot); + + const Rect &limit() const { return _limit; } + void setLimit(const Rect &r) { _limit = r; } + SLOTCOLSETPOSITIONPROP(shift, setShift) + SLOTCOLSETPOSITIONPROP(offset, setOffset) + SLOTCOLSETPOSITIONPROP(exclOffset, setExclOffset) + SLOTCOLSETUINTPROP(margin, setMargin) + SLOTCOLSETUINTPROP(marginWt, setMarginWt) + SLOTCOLSETUINTPROP(flags, setFlags) + SLOTCOLSETUINTPROP(exclGlyph, setExclGlyph) + SLOTCOLSETUINTPROP(seqClass, setSeqClass) + SLOTCOLSETUINTPROP(seqProxClass, setSeqProxClass) + SLOTCOLSETUINTPROP(seqOrder, setSeqOrder) + SLOTCOLSETINTPROP(seqAboveXoff, setSeqAboveXoff) + SLOTCOLSETUINTPROP(seqAboveWt, setSeqAboveWt) + SLOTCOLSETINTPROP(seqBelowXlim, setSeqBelowXlim) + SLOTCOLSETUINTPROP(seqBelowWt, setSeqBelowWt) + SLOTCOLSETUINTPROP(seqValignHt, setSeqValignHt) + SLOTCOLSETUINTPROP(seqValignWt, setSeqValignWt) + + float getKern(int dir) const; + bool ignore() const; + +private: + Rect _limit; + Position _shift; // adjustment within the given pass + Position _offset; // total adjustment for collisions + Position _exclOffset; + uint16 _margin; + uint16 _marginWt; + uint16 _flags; + uint16 _exclGlyph; + uint16 _seqClass; + uint16 _seqProxClass; + uint16 _seqOrder; + int16 _seqAboveXoff; + uint16 _seqAboveWt; + int16 _seqBelowXlim; + uint16 _seqBelowWt; + uint16 _seqValignHt; + uint16 _seqValignWt; + +}; // end of class SlotColllision + +struct BBox; +struct SlantBox; + +class ShiftCollider +{ +public: + typedef std::pair<float, float> fpair; + typedef Vector<fpair> vfpairs; + typedef vfpairs::iterator ivfpairs; + + ShiftCollider(json *dbgout); + ~ShiftCollider() throw() { }; + + bool initSlot(Segment *seg, Slot *aSlot, const Rect &constraint, + float margin, float marginMin, const Position &currShift, + const Position &currOffset, int dir, GR_MAYBE_UNUSED json * const dbgout); + bool mergeSlot(Segment *seg, Slot *slot, const SlotCollision *cinfo, const Position &currShift, bool isAfter, + bool sameCluster, bool &hasCol, bool isExclusion, GR_MAYBE_UNUSED json * const dbgout); + Position resolve(Segment *seg, bool &isCol, GR_MAYBE_UNUSED json * const dbgout); + void addBox_slope(bool isx, const Rect &box, const BBox &bb, const SlantBox &sb, const Position &org, float weight, float m, bool minright, int mode); + void removeBox(const Rect &box, const BBox &bb, const SlantBox &sb, const Position &org, int mode); + const Position &origin() const { return _origin; } + +#if !defined GRAPHITE2_NTRACING + void outputJsonDbg(json * const dbgout, Segment *seg, int axis); + void outputJsonDbgStartSlot(json * const dbgout, Segment *seg); + void outputJsonDbgEndSlot(json * const dbgout, Position resultPos, int bestAxis, bool isCol); + void outputJsonDbgOneVector(json * const dbgout, Segment *seg, int axis, float tleft, float bestCost, float bestVal); + void outputJsonDbgRawRanges(json * const dbgout, int axis); + void outputJsonDbgRemovals(json * const dbgout, int axis, Segment *seg); +#endif + + CLASS_NEW_DELETE; + +protected: + Zones _ranges[4]; // possible movements in 4 directions (horizontally, vertically, diagonally); + Slot * _target; // the glyph to fix + Rect _limit; + Position _currShift; + Position _currOffset; + Position _origin; // Base for all relative calculations + float _margin; + float _marginWt; + float _len[4]; + uint16 _seqClass; + uint16 _seqProxClass; + uint16 _seqOrder; + + //bool _scraping[4]; + +}; // end of class ShiftCollider + +inline +ShiftCollider::ShiftCollider(GR_MAYBE_UNUSED json *dbgout) +: _target(0), + _margin(0.0), + _marginWt(0.0), + _seqClass(0), + _seqProxClass(0), + _seqOrder(0) +{ +#if !defined GRAPHITE2_NTRACING + for (int i = 0; i < 4; ++i) + _ranges[i].setdebug(dbgout); +#endif +} + +class KernCollider +{ +public: + KernCollider(json *dbg); + ~KernCollider() throw() { }; + bool initSlot(Segment *seg, Slot *aSlot, const Rect &constraint, float margin, + const Position &currShift, const Position &offsetPrev, int dir, + float ymin, float ymax, json * const dbgout); + bool mergeSlot(Segment *seg, Slot *slot, const Position &currShift, float currSpace, int dir, json * const dbgout); + Position resolve(Segment *seg, Slot *slot, int dir, json * const dbgout); + void shift(const Position &mv, int dir); + + CLASS_NEW_DELETE; + +private: + Slot * _target; // the glyph to fix + Rect _limit; + float _margin; + Position _offsetPrev; // kern from a previous pass + Position _currShift; // NOT USED?? + float _miny; // y-coordinates offset by global slot position + float _maxy; + Vector<float> _edges; // edges of horizontal slices + float _sliceWidth; // width of each slice + float _mingap; + float _xbound; // max or min edge + bool _hit; + +#if !defined GRAPHITE2_NTRACING + // Debugging + Segment * _seg; + Vector<float> _nearEdges; // closest potential collision in each slice + Vector<Slot*> _slotNear; +#endif +}; // end of class KernCollider + + +inline +float sqr(float x) { + return x * x; +} + +inline +KernCollider::KernCollider(GR_MAYBE_UNUSED json *dbg) +: _target(0), + _margin(0.0f), + _miny(-1e38f), + _maxy(1e38f), + _sliceWidth(0.0f), + _mingap(0.0f), + _xbound(0.0), + _hit(false) +{ +#if !defined GRAPHITE2_NTRACING + _seg = 0; +#endif +}; + +}; // end of namespace graphite2 diff --git a/thirdparty/graphite/src/inc/Compression.h b/thirdparty/graphite/src/inc/Compression.h new file mode 100644 index 0000000000..9fe10e025d --- /dev/null +++ b/thirdparty/graphite/src/inc/Compression.h @@ -0,0 +1,104 @@ +/* GRAPHITE2 LICENSING + + Copyright 2015, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ + +#pragma once + +#include <cassert> +#include <cstddef> +#include <cstring> + +namespace +{ + +#if defined(_MSC_VER) +typedef unsigned __int8 u8; +typedef unsigned __int16 u16; +typedef unsigned __int32 u32; +typedef unsigned __int64 u64; +#else +#include <stdint.h> +typedef uint8_t u8; +typedef uint16_t u16; +typedef uint32_t u32; +typedef uint64_t u64; +#endif + +ptrdiff_t const MINMATCH = 4, + LASTLITERALS = 5, + MINCODA = LASTLITERALS+1, + MINSRCSIZE = 13; + +template<int S> +inline +void unaligned_copy(void * d, void const * s) { + ::memcpy(d, s, S); +} + +inline +size_t align(size_t p) { + return (p + sizeof(unsigned long)-1) & ~(sizeof(unsigned long)-1); +} + +inline +u8 * safe_copy(u8 * d, u8 const * s, size_t n) { + while (n--) *d++ = *s++; + return d; +} + +inline +u8 * overrun_copy(u8 * d, u8 const * s, size_t n) { + size_t const WS = sizeof(unsigned long); + u8 const * e = s + n; + do + { + unaligned_copy<WS>(d, s); + d += WS; + s += WS; + } + while (s < e); + d-=(s-e); + + return d; +} + + +inline +u8 * fast_copy(u8 * d, u8 const * s, size_t n) { + size_t const WS = sizeof(unsigned long); + size_t wn = n/WS; + while (wn--) + { + unaligned_copy<WS>(d, s); + d += WS; + s += WS; + } + n &= WS-1; + return safe_copy(d, s, n); +} + + +} // end of anonymous namespace diff --git a/thirdparty/graphite/src/inc/Decompressor.h b/thirdparty/graphite/src/inc/Decompressor.h new file mode 100644 index 0000000000..10f21b7af1 --- /dev/null +++ b/thirdparty/graphite/src/inc/Decompressor.h @@ -0,0 +1,54 @@ +/* GRAPHITE2 LICENSING + + Copyright 2015, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ + +#pragma once + +#include <cstddef> + +namespace lz4 +{ + +// decompress an LZ4 block +// Parameters: +// @in - Input buffer containing an LZ4 block. +// @in_size - Size of the input LZ4 block in bytes. +// @out - Output buffer to hold decompressed results. +// @out_size - The size of the buffer pointed to by @out. +// Invariants: +// @in - This buffer must be at least 1 machine word in length, +// regardless of the actual LZ4 block size. +// @in_size - This must be at least 4 and must also be <= to the +// allocated buffer @in. +// @out - This must be bigger than the input buffer and at least +// 13 bytes. +// @out_size - Must always be big enough to hold the expected size. +// Return: +// -1 - Decompression failed. +// size - Actual number of bytes decompressed. +int decompress(void const *in, size_t in_size, void *out, size_t out_size); + +} // end of namespace shrinker diff --git a/thirdparty/graphite/src/inc/Endian.h b/thirdparty/graphite/src/inc/Endian.h new file mode 100644 index 0000000000..56ecfd8667 --- /dev/null +++ b/thirdparty/graphite/src/inc/Endian.h @@ -0,0 +1,111 @@ +/* GRAPHITE2 LICENSING + + Copyright 2011, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ + +/* +Description: + A set of fast template based decoders for decoding values of any C integer + type up to long int size laid out with most significant byte first or least + significant byte first (aka big endian or little endian). These are CPU + byte order agnostic and will function the same regardless of the CPUs native + byte order. + + Being template based means if the either le or be class is not used then + template code of unused functions will not be instantiated by the compiler + and thus shouldn't cause any overhead. +*/ + +#include <cstddef> + +#pragma once + + +class be +{ + template<int S> + inline static unsigned long int _peek(const unsigned char * p) { + return _peek<S/2>(p) << (S/2)*8 | _peek<S/2>(p+S/2); + } +public: + template<typename T> + inline static T peek(const void * p) { + return T(_peek<sizeof(T)>(static_cast<const unsigned char *>(p))); + } + + template<typename T> + inline static T read(const unsigned char * &p) { + const T r = T(_peek<sizeof(T)>(p)); + p += sizeof r; + return r; + } + + template<typename T> + inline static T swap(const T x) { + return T(_peek<sizeof(T)>(reinterpret_cast<const unsigned char *>(&x))); + } + + template<typename T> + inline static void skip(const unsigned char * &p, size_t n=1) { + p += sizeof(T)*n; + } +}; + +template<> +inline unsigned long int be::_peek<1>(const unsigned char * p) { return *p; } + + +class le +{ + template<int S> + inline static unsigned long int _peek(const unsigned char * p) { + return _peek<S/2>(p) | _peek<S/2>(p+S/2) << (S/2)*8; + } +public: + template<typename T> + inline static T peek(const void * p) { + return T(_peek<sizeof(T)>(static_cast<const unsigned char *>(p))); + } + + template<typename T> + inline static T read(const unsigned char * &p) { + const T r = T(_peek<sizeof(T)>(p)); + p += sizeof r; + return r; + } + + template<typename T> + inline static T swap(const T x) { + return T(_peek<sizeof(T)>(reinterpret_cast<const unsigned char *>(&x))); + } + + template<typename T> + inline static void skip(const unsigned char * &p, size_t n=1) { + p += sizeof(T)*n; + } +}; + +template<> +inline unsigned long int le::_peek<1>(const unsigned char * p) { return *p; } diff --git a/thirdparty/graphite/src/inc/Error.h b/thirdparty/graphite/src/inc/Error.h new file mode 100644 index 0000000000..2b7ab763a2 --- /dev/null +++ b/thirdparty/graphite/src/inc/Error.h @@ -0,0 +1,134 @@ +/* GRAPHITE2 LICENSING + + Copyright 2013, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ +#pragma once + +// numbers are explicitly assigned for future proofing + +namespace graphite2 +{ + +class Error +{ +public: + Error() : _e(0) {}; + operator bool() { return (_e != 0); } + int error() { return _e; } + void error(int e) { _e = e; } + bool test(bool pr, int err) { return (_e = int(pr) * err); } + +private: + int _e; +}; + +enum errcontext { + EC_READGLYPHS = 1, // while reading glyphs + EC_READSILF = 2, // in Silf table + EC_ASILF = 3, // in Silf %d + EC_APASS = 4, // in Silf %d, pass %d + EC_PASSCCODE = 5, // in pass constraint code for Silf %d, pass %d + EC_ARULE = 6, // in Silf %d, pass %d, rule %d + EC_ASTARTS = 7, // in Silf %d, pass %d, start state %d + EC_ATRANS = 8, // in Silf %d, pass %d, fsm state %d + EC_ARULEMAP = 9 // in Silf %d, pass %d, state %d +}; + +enum errors { + E_OUTOFMEM = 1, // Out of memory + E_NOGLYPHS = 2, // There are no glyphs in the font + E_BADUPEM = 3, // The units per em for the font is bad (0) + E_BADCMAP = 4, // The font does not contain any useful cmaps + E_NOSILF = 5, // Missing Silf table + E_TOOOLD = 6, // Silf table version is too old + E_BADSIZE = 7, // context object has the wrong structural size +// Silf Subtable Errors take a Silf subtable number * 256 in the context + E_BADMAXGLYPH = 8, // Silf max glyph id is too high + E_BADNUMJUSTS = 9, // Number of Silf justification blocks is too high + E_BADENDJUSTS = 10, // Silf justification blocks take too much of the Silf table space + E_BADCRITFEATURES = 11, // Critical features section in a Silf table is too big + E_BADSCRIPTTAGS = 12, // Silf script tags area is too big + E_BADAPSEUDO = 13, // The pseudo glyph attribute number is too high + E_BADABREAK = 14, // The linebreak glyph attribute number is too high + E_BADABIDI = 15, // The bidi glyph attribute number is too high + E_BADAMIRROR = 16, // The mirrored glyph attribute number is too high + E_BADNUMPASSES = 17, // The number of passes is > 128 + E_BADPASSESSTART = 18, // The Silf table is too small to hold any passes + E_BADPASSBOUND = 19, // The positioning pass number is too low or the substitution pass number is too high + E_BADPPASS = 20, // The positioning pass number is too high + E_BADSPASS = 21, // the substitution pass number is too high + E_BADJPASSBOUND = 22, // the justification pass must be higher than the positioning pass + E_BADJPASS = 23, // the justification pass is too high + E_BADALIG = 24, // the number of initial ligature component glyph attributes is too high + E_BADBPASS = 25, // the bidi pass number is specified and is either too high or too low + E_BADNUMPSEUDO = 26, // The number of pseudo glyphs is too high + E_BADCLASSSIZE = 27, // The size of the classes block is bad + E_TOOMANYLINEAR = 28, // The number of linear classes in the silf table is too high + E_CLASSESTOOBIG = 29, // There are too many classes for the space allocated in the Silf subtable + E_MISALIGNEDCLASSES = 30, // The class offsets in the class table don't line up with the number of classes + E_HIGHCLASSOFFSET = 31, // The class offsets point out of the class table + E_BADCLASSOFFSET = 32, // A class offset is less than one following it + E_BADCLASSLOOKUPINFO = 33, // The search header info for a non-linear class has wrong values in it +// Pass subtable errors. Context has pass number * 65536 + E_BADPASSSTART = 34, // The start offset for a particular pass is bad + E_BADPASSEND = 35, // The end offset for a particular pass is bad + E_BADPASSLENGTH = 36, // The length of the pass is too small + E_BADNUMTRANS = 37, // The number of transition states in the fsm is bad + E_BADNUMSUCCESS = 38, // The number of success states in the fsm is bad + E_BADNUMSTATES = 39, // The number of states in the fsm is bad + E_NORANGES = 40, // There are no columns in the fsm + E_BADRULEMAPLEN = 41, // The size of the success state to rule mapping is bad + E_BADCTXTLENBOUNDS = 42, // The precontext maximum is greater than its minimum + E_BADCTXTLENS = 43, // The lists of rule lengths or pre context lengths is bad + E_BADPASSCCODEPTR = 44, // The pass constraint code position does not align with where the forward reference says it should be + E_BADRULECCODEPTR = 45, // The rule constraint code position does not align with where the forward reference says it should be + E_BADCCODELEN = 46, // Bad rule/pass constraint code length + E_BADACTIONCODEPTR = 47, // The action code position does not align with where the forward reference says it should be + E_MUTABLECCODE = 48, // Constraint code edits slots. It shouldn't. + E_BADSTATE = 49, // Bad state transition referencing an illegal state + E_BADRULEMAPPING = 50, // The structure of the rule mapping is bad + E_BADRANGE = 51, // Bad column range structure including a glyph in more than one column + E_BADRULENUM = 52, // A reference to a rule is out of range (too high) + E_BADACOLLISION = 53, // Bad Silf table collision attribute number (too high) + E_BADEMPTYPASS = 54, // Can't have empty passes (no rules) except for collision passes + E_BADSILFVERSION = 55, // The Silf table has a bad version (probably too high) + E_BADCOLLISIONPASS = 56, // Collision flags set on a non positioning pass + E_BADNUMCOLUMNS = 57, // Arbitrarily limit number of columns in fsm +// Code errors + E_CODEFAILURE = 60, // Base code error. The following subcodes must align with Machine::Code::status_t in Code.h + E_CODEALLOC = 61, // Out of memory + E_INVALIDOPCODE = 62, // Invalid op code + E_UNIMPOPCODE = 63, // Unimplemented op code encountered + E_OUTOFRANGECODE = 64, // Code argument out of range + E_BADJUMPCODE = 65, // Code jumps past end of op codes + E_CODEBADARGS = 66, // Code arguments exhausted + E_CODENORETURN = 67, // Missing return type op code at end of code + E_CODENESTEDCTXT = 68, // Nested context encountered in code +// Compression errors + E_BADSCHEME = 69, + E_SHRINKERFAILED = 70, +}; + +} diff --git a/thirdparty/graphite/src/inc/Face.h b/thirdparty/graphite/src/inc/Face.h new file mode 100644 index 0000000000..355c5aa0d3 --- /dev/null +++ b/thirdparty/graphite/src/inc/Face.h @@ -0,0 +1,225 @@ +/* GRAPHITE2 LICENSING + + Copyright 2010, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ +#pragma once + +#include <cstdio> + +#include "graphite2/Font.h" + +#include "inc/Main.h" +#include "inc/FeatureMap.h" +#include "inc/TtfUtil.h" +#include "inc/Silf.h" +#include "inc/Error.h" + +namespace graphite2 { + +class Cmap; +class FileFace; +class GlyphCache; +class NameTable; +class json; +class Font; + + +using TtfUtil::Tag; + +// These are the actual tags, as distinct from the consecutive IDs in TtfUtil.h + +class Face +{ + // Prevent any kind of copying + Face(const Face&); + Face& operator=(const Face&); + +public: + class Table; + static float default_glyph_advance(const void* face_ptr, gr_uint16 glyphid); + + Face(const void* appFaceHandle/*non-NULL*/, const gr_face_ops & ops); + virtual ~Face(); + + virtual bool runGraphite(Segment *seg, const Silf *silf) const; + +public: + bool readGlyphs(uint32 faceOptions); + bool readGraphite(const Table & silf); + bool readFeatures(); + void takeFileFace(FileFace* pFileFace/*takes ownership*/); + + const SillMap & theSill() const; + const GlyphCache & glyphs() const; + Cmap & cmap() const; + NameTable * nameTable() const; + void setLogger(FILE *log_file); + json * logger() const throw(); + + const Silf * chooseSilf(uint32 script) const; + uint16 languageForLocale(const char * locale) const; + + // Features + uint16 numFeatures() const; + const FeatureRef * featureById(uint32 id) const; + const FeatureRef * feature(uint16 index) const; + + // Glyph related + int32 getGlyphMetric(uint16 gid, uint8 metric) const; + uint16 findPseudo(uint32 uid) const; + + // Errors + unsigned int error() const { return m_error; } + bool error(Error e) { m_error = e.error(); return false; } + unsigned int error_context() const { return m_error; } + void error_context(unsigned int errcntxt) { m_errcntxt = errcntxt; } + + CLASS_NEW_DELETE; +private: + SillMap m_Sill; + gr_face_ops m_ops; + const void * m_appFaceHandle; // non-NULL + FileFace * m_pFileFace; //owned + mutable GlyphCache * m_pGlyphFaceCache; // owned - never NULL + mutable Cmap * m_cmap; // cmap cache if available + mutable NameTable * m_pNames; + mutable json * m_logger; + unsigned int m_error; + unsigned int m_errcntxt; +protected: + Silf * m_silfs; // silf subtables. + uint16 m_numSilf; // num silf subtables in the silf table +private: + uint16 m_ascent, + m_descent; +#ifdef GRAPHITE2_TELEMETRY +public: + mutable telemetry tele; +#endif +}; + + + +inline +const SillMap & Face::theSill() const +{ + return m_Sill; +} + +inline +uint16 Face::numFeatures() const +{ + return m_Sill.theFeatureMap().numFeats(); +} + +inline +const FeatureRef * Face::featureById(uint32 id) const +{ + return m_Sill.theFeatureMap().findFeatureRef(id); +} + +inline +const FeatureRef *Face::feature(uint16 index) const +{ + return m_Sill.theFeatureMap().feature(index); +} + +inline +const GlyphCache & Face::glyphs() const +{ + return *m_pGlyphFaceCache; +} + +inline +Cmap & Face::cmap() const +{ + return *m_cmap; +}; + +inline +json * Face::logger() const throw() +{ + return m_logger; +} + + + +class Face::Table +{ + const Face * _f; + mutable const byte * _p; + size_t _sz; + bool _compressed; + + Error decompress(); + + void release(); + +public: + Table() throw(); + Table(const Face & face, const Tag n, uint32 version=0xffffffff) throw(); + ~Table() throw(); + Table(const Table && rhs) throw(); + + operator const byte * () const throw(); + + size_t size() const throw(); + Table & operator = (const Table && rhs) throw(); +}; + +inline +Face::Table::Table() throw() +: _f(0), _p(0), _sz(0), _compressed(false) +{ +} + +inline +Face::Table::Table(const Table && rhs) throw() +: _f(rhs._f), _p(rhs._p), _sz(rhs._sz), _compressed(rhs._compressed) +{ + rhs._p = 0; +} + +inline +Face::Table::~Table() throw() +{ + release(); +} + +inline +Face::Table::operator const byte * () const throw() +{ + return _p; +} + +inline +size_t Face::Table::size() const throw() +{ + return _sz; +} + +} // namespace graphite2 + +struct gr_face : public graphite2::Face {}; diff --git a/thirdparty/graphite/src/inc/FeatureMap.h b/thirdparty/graphite/src/inc/FeatureMap.h new file mode 100644 index 0000000000..0f05e941a2 --- /dev/null +++ b/thirdparty/graphite/src/inc/FeatureMap.h @@ -0,0 +1,198 @@ +/* GRAPHITE2 LICENSING + + Copyright 2010, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ +#pragma once +#include "inc/Main.h" +#include "inc/FeatureVal.h" + +namespace graphite2 { + +// Forward declarations for implmentation types +class FeatureMap; +class Face; + + +class FeatureSetting +{ +public: + FeatureSetting(int16 theValue, uint16 labelId) : m_label(labelId), m_value(theValue) {}; + uint16 label() const { return m_label; } + int16 value() const { return m_value; } + + CLASS_NEW_DELETE; +private: + FeatureSetting(const FeatureSetting & fs) : m_label(fs.m_label), m_value(fs.m_value) {}; + + uint16 m_label; + int16 m_value; +}; + +class FeatureRef +{ + typedef uint32 chunk_t; + static const uint8 SIZEOF_CHUNK = sizeof(chunk_t)*8; + +public: + enum flags_t : uint16 { + HIDDEN = 0x0800 + }; + FeatureRef() throw(); + FeatureRef(const Face & face, unsigned short & bits_offset, uint32 max_val, + uint32 name, uint16 uiName, flags_t flags, + FeatureSetting *settings, uint16 num_set) throw(); + ~FeatureRef() throw(); + + bool applyValToFeature(uint32 val, Features& pDest) const; //defined in GrFaceImp.h + void maskFeature(Features & pDest) const { + if (m_index < pDest.size()) //defensive + pDest[m_index] |= m_mask; + } + + uint32 getFeatureVal(const Features& feats) const; //defined in GrFaceImp.h + + uint32 getId() const { return m_id; } + uint16 getNameId() const { return m_nameid; } + uint16 getNumSettings() const { return m_numSet; } + uint16 getSettingName(uint16 index) const { return m_nameValues[index].label(); } + int16 getSettingValue(uint16 index) const { return m_nameValues[index].value(); } + flags_t getFlags() const { return m_flags; } + uint32 maxVal() const { return m_max; } + const Face & getFace() const { assert(m_face); return *m_face;} + const FeatureMap* getFeatureMap() const;// { return m_pFace;} + + CLASS_NEW_DELETE; +private: + FeatureRef(const FeatureRef & rhs); + + const Face * m_face; + FeatureSetting * m_nameValues; // array of name table ids for feature values + chunk_t m_mask, // bit mask to get the value from the vector + m_max; // max value the value can take + uint32 m_id; // feature identifier/name + uint16 m_nameid, // Name table id for feature name + m_numSet; // number of values (number of entries in m_nameValues) + flags_t m_flags; // feature flags see FeatureRef::flags_t. + byte m_bits, // how many bits to shift the value into place + m_index; // index into the array to find the ulong to mask + +private: //unimplemented + FeatureRef& operator=(const FeatureRef&); +}; + +inline +FeatureRef::FeatureRef() throw() +: m_face(0), + m_nameValues(0), + m_mask(0), m_max(0), + m_id(0), m_nameid(0), m_numSet(0), + m_flags(flags_t(0)), + m_bits(0), m_index(0) +{ +} + + +class NameAndFeatureRef +{ + public: + NameAndFeatureRef(uint32 name = 0) : m_name(name) , m_pFRef(NULL){} + NameAndFeatureRef(FeatureRef const & p) : m_name(p.getId()), m_pFRef(&p) {} + + bool operator<(const NameAndFeatureRef& rhs) const //orders by m_name + { return m_name<rhs.m_name; } + + CLASS_NEW_DELETE + + uint32 m_name; + const FeatureRef* m_pFRef; +}; + +class FeatureMap +{ +public: + FeatureMap() : m_numFeats(0), m_feats(NULL), m_pNamedFeats(NULL) {} + ~FeatureMap() { delete[] m_feats; delete[] m_pNamedFeats; } + + bool readFeats(const Face & face); + const FeatureRef *findFeatureRef(uint32 name) const; + FeatureRef *feature(uint16 index) const { return m_feats + index; } + //GrFeatureRef *featureRef(byte index) { return index < m_numFeats ? m_feats + index : NULL; } + const FeatureRef *featureRef(byte index) const { return index < m_numFeats ? m_feats + index : NULL; } + FeatureVal* cloneFeatures(uint32 langname/*0 means default*/) const; //call destroy_Features when done. + uint16 numFeats() const { return m_numFeats; }; + CLASS_NEW_DELETE +private: +friend class SillMap; + uint16 m_numFeats; + + FeatureRef *m_feats; + NameAndFeatureRef* m_pNamedFeats; //owned + FeatureVal m_defaultFeatures; //owned + +private: //defensive on m_feats, m_pNamedFeats, and m_defaultFeatures + FeatureMap(const FeatureMap&); + FeatureMap& operator=(const FeatureMap&); +}; + + +class SillMap +{ +private: + class LangFeaturePair + { + LangFeaturePair(const LangFeaturePair &); + LangFeaturePair & operator = (const LangFeaturePair &); + + public: + LangFeaturePair() : m_lang(0), m_pFeatures(0) {} + ~LangFeaturePair() { delete m_pFeatures; } + + uint32 m_lang; + Features* m_pFeatures; //owns + CLASS_NEW_DELETE + }; +public: + SillMap() : m_langFeats(NULL), m_numLanguages(0) {} + ~SillMap() { delete[] m_langFeats; } + bool readFace(const Face & face); + bool readSill(const Face & face); + FeatureVal* cloneFeatures(uint32 langname/*0 means default*/) const; //call destroy_Features when done. + uint16 numLanguages() const { return m_numLanguages; }; + uint32 getLangName(uint16 index) const { return (index < m_numLanguages)? m_langFeats[index].m_lang : 0; }; + + const FeatureMap & theFeatureMap() const { return m_FeatureMap; }; +private: + FeatureMap m_FeatureMap; //of face + LangFeaturePair * m_langFeats; + uint16 m_numLanguages; + +private: //defensive on m_langFeats + SillMap(const SillMap&); + SillMap& operator=(const SillMap&); +}; + +} // namespace graphite2 + +struct gr_feature_ref : public graphite2::FeatureRef {}; diff --git a/thirdparty/graphite/src/inc/FeatureVal.h b/thirdparty/graphite/src/inc/FeatureVal.h new file mode 100644 index 0000000000..cd3f93b2b5 --- /dev/null +++ b/thirdparty/graphite/src/inc/FeatureVal.h @@ -0,0 +1,68 @@ +/* GRAPHITE2 LICENSING + + Copyright 2010, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ +#pragma once +#include <cstring> +#include <cassert> +#include "inc/Main.h" +#include "inc/List.h" + +namespace graphite2 { + +class FeatureRef; +class FeatureMap; + +class FeatureVal : public Vector<uint32> +{ +public: + FeatureVal() : m_pMap(0) { } + FeatureVal(int num, const FeatureMap & pMap) : Vector<uint32>(num), m_pMap(&pMap) {} + FeatureVal(const FeatureVal & rhs) : Vector<uint32>(rhs), m_pMap(rhs.m_pMap) {} + + FeatureVal & operator = (const FeatureVal & rhs) { Vector<uint32>::operator = (rhs); m_pMap = rhs.m_pMap; return *this; } + + bool operator ==(const FeatureVal & b) const + { + size_t n = size(); + if (n != b.size()) return false; + + for(const_iterator l = begin(), r = b.begin(); n && *l == *r; --n, ++l, ++r); + + return n == 0; + } + + CLASS_NEW_DELETE +private: + friend class FeatureRef; //so that FeatureRefs can manipulate m_vec directly + const FeatureMap* m_pMap; +}; + +typedef FeatureVal Features; + +} // namespace graphite2 + + +struct gr_feature_val : public graphite2::FeatureVal {}; diff --git a/thirdparty/graphite/src/inc/FileFace.h b/thirdparty/graphite/src/inc/FileFace.h new file mode 100644 index 0000000000..35927847f8 --- /dev/null +++ b/thirdparty/graphite/src/inc/FileFace.h @@ -0,0 +1,80 @@ +/* GRAPHITE2 LICENSING + + Copyright 2012, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ +#pragma once + +//#include "inc/FeatureMap.h" +//#include "inc/GlyphsCache.h" +//#include "inc/Silf.h" + +#ifndef GRAPHITE2_NFILEFACE + +#include <cstdio> +#include <cassert> + +#include "graphite2/Font.h" + +#include "inc/Main.h" +#include "inc/TtfTypes.h" +#include "inc/TtfUtil.h" + +namespace graphite2 { + + +class FileFace +{ + static const void * get_table_fn(const void* appFaceHandle, unsigned int name, size_t *len); + static void rel_table_fn(const void* appFaceHandle, const void *table_buffer); + +public: + static const gr_face_ops ops; + + FileFace(const char *filename); + ~FileFace(); + + operator bool () const throw(); + CLASS_NEW_DELETE; + +private: //defensive + FILE * _file; + size_t _file_len; + + TtfUtil::Sfnt::OffsetSubTable * _header_tbl; + TtfUtil::Sfnt::OffsetSubTable::Entry * _table_dir; + + FileFace(const FileFace&); + FileFace& operator=(const FileFace&); +}; + +inline +FileFace::operator bool() const throw() +{ + return _file && _header_tbl && _table_dir; +} + +} // namespace graphite2 + +#endif //!GRAPHITE2_NFILEFACE diff --git a/thirdparty/graphite/src/inc/Font.h b/thirdparty/graphite/src/inc/Font.h new file mode 100644 index 0000000000..9bc9ffb510 --- /dev/null +++ b/thirdparty/graphite/src/inc/Font.h @@ -0,0 +1,90 @@ +/* GRAPHITE2 LICENSING + + Copyright 2010, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ +#pragma once +#include <cassert> +#include "graphite2/Font.h" +#include "inc/Main.h" +#include "inc/Face.h" + +namespace graphite2 { + +#define INVALID_ADVANCE -1e38f // can't be a static const because non-integral + +class Font +{ +public: + Font(float ppm, const Face & face, const void * appFontHandle=0, const gr_font_ops * ops=0); + virtual ~Font(); + + float advance(unsigned short glyphid) const; + float scale() const; + bool isHinted() const; + const Face & face() const; + operator bool () const throw() { return m_advances; } + + CLASS_NEW_DELETE; +private: + gr_font_ops m_ops; + const void * const m_appFontHandle; + float * m_advances; // One advance per glyph in pixels. Nan if not defined + const Face & m_face; + float m_scale; // scales from design units to ppm + bool m_hinted; + + Font(const Font&); + Font& operator=(const Font&); +}; + +inline +float Font::advance(unsigned short glyphid) const +{ + if (m_advances[glyphid] == INVALID_ADVANCE) + m_advances[glyphid] = (*m_ops.glyph_advance_x)(m_appFontHandle, glyphid); + return m_advances[glyphid]; +} + +inline +float Font::scale() const +{ + return m_scale; +} + +inline +bool Font::isHinted() const +{ + return m_hinted; +} + +inline +const Face & Font::face() const +{ + return m_face; +} + +} // namespace graphite2 + +struct gr_font : public graphite2::Font {}; diff --git a/thirdparty/graphite/src/inc/GlyphCache.h b/thirdparty/graphite/src/inc/GlyphCache.h new file mode 100644 index 0000000000..7d5324e522 --- /dev/null +++ b/thirdparty/graphite/src/inc/GlyphCache.h @@ -0,0 +1,223 @@ +/* GRAPHITE2 LICENSING + + Copyright 2012, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ +#pragma once + + +#include "graphite2/Font.h" +#include "inc/Main.h" +#include "inc/Position.h" +#include "inc/GlyphFace.h" + +namespace graphite2 { + +class Face; +class FeatureVal; +class Segment; + + +struct SlantBox +{ + static const SlantBox empty; + +// SlantBox(float psi = 0., float pdi = 0., float psa = 0., float pda = 0.) : si(psi), di(pdi), sa(psa), da(pda) {}; + float width() const { return sa - si; } + float height() const { return da - di; } + float si; // min + float di; // min + float sa; // max + float da; // max +}; + + +struct BBox +{ + BBox(float pxi = 0, float pyi = 0., float pxa = 0., float pya = 0.) : xi(pxi), yi(pyi), xa(pxa), ya(pya) {}; + float width() const { return xa - xi; } + float height() const { return ya - yi; } + float xi; // min + float yi; // min + float xa; // max + float ya; // max +}; + + +class GlyphBox +{ + GlyphBox(const GlyphBox &); + GlyphBox & operator = (const GlyphBox &); + +public: + GlyphBox(uint8 numsubs, unsigned short bitmap, Rect *slanted) : _num(numsubs), _bitmap(bitmap), _slant(*slanted) {}; + + void addSubBox(int subindex, int boundary, Rect *val) { _subs[subindex * 2 + boundary] = *val; } + Rect &subVal(int subindex, int boundary) { return _subs[subindex * 2 + boundary]; } + const Rect &slant() const { return _slant; } + uint8 num() const { return _num; } + const Rect *subs() const { return _subs; } + +private: + uint8 _num; + unsigned short _bitmap; + Rect _slant; + Rect _subs[1]; +}; + +class GlyphCache +{ + class Loader; + + GlyphCache(const GlyphCache&); + GlyphCache& operator=(const GlyphCache&); + +public: + GlyphCache(const Face & face, const uint32 face_options); + ~GlyphCache(); + + unsigned short numGlyphs() const throw(); + unsigned short numAttrs() const throw(); + unsigned short unitsPerEm() const throw(); + + const GlyphFace *glyph(unsigned short glyphid) const; //result may be changed by subsequent call with a different glyphid + const GlyphFace *glyphSafe(unsigned short glyphid) const; + float getBoundingMetric(unsigned short glyphid, uint8 metric) const; + uint8 numSubBounds(unsigned short glyphid) const; + float getSubBoundingMetric(unsigned short glyphid, uint8 subindex, uint8 metric) const; + const Rect & slant(unsigned short glyphid) const { return _boxes[glyphid] ? _boxes[glyphid]->slant() : _empty_slant_box; } + const SlantBox & getBoundingSlantBox(unsigned short glyphid) const; + const BBox & getBoundingBBox(unsigned short glyphid) const; + const SlantBox & getSubBoundingSlantBox(unsigned short glyphid, uint8 subindex) const; + const BBox & getSubBoundingBBox(unsigned short glyphid, uint8 subindex) const; + bool check(unsigned short glyphid) const; + bool hasBoxes() const { return _boxes != 0; } + + CLASS_NEW_DELETE; + +private: + const Rect _empty_slant_box; + const Loader * _glyph_loader; + const GlyphFace * * _glyphs; + GlyphBox * * _boxes; + unsigned short _num_glyphs, + _num_attrs, + _upem; +}; + +inline +unsigned short GlyphCache::numGlyphs() const throw() +{ + return _num_glyphs; +} + +inline +unsigned short GlyphCache::numAttrs() const throw() +{ + return _num_attrs; +} + +inline +unsigned short GlyphCache::unitsPerEm() const throw() +{ + return _upem; +} + +inline +bool GlyphCache::check(unsigned short glyphid) const +{ + return _boxes && glyphid < _num_glyphs; +} + +inline +const GlyphFace *GlyphCache::glyphSafe(unsigned short glyphid) const +{ + return glyphid < _num_glyphs ? glyph(glyphid) : NULL; +} + +inline +float GlyphCache::getBoundingMetric(unsigned short glyphid, uint8 metric) const +{ + if (glyphid >= _num_glyphs) return 0.; + switch (metric) { + case 0: return (float)(glyph(glyphid)->theBBox().bl.x); // x_min + case 1: return (float)(glyph(glyphid)->theBBox().bl.y); // y_min + case 2: return (float)(glyph(glyphid)->theBBox().tr.x); // x_max + case 3: return (float)(glyph(glyphid)->theBBox().tr.y); // y_max + case 4: return (float)(_boxes[glyphid] ? _boxes[glyphid]->slant().bl.x : 0.f); // sum_min + case 5: return (float)(_boxes[glyphid] ? _boxes[glyphid]->slant().bl.y : 0.f); // diff_min + case 6: return (float)(_boxes[glyphid] ? _boxes[glyphid]->slant().tr.x : 0.f); // sum_max + case 7: return (float)(_boxes[glyphid] ? _boxes[glyphid]->slant().tr.y : 0.f); // diff_max + default: return 0.; + } +} + +inline const SlantBox &GlyphCache::getBoundingSlantBox(unsigned short glyphid) const +{ + return _boxes[glyphid] ? *(SlantBox *)(&(_boxes[glyphid]->slant())) : SlantBox::empty; +} + +inline const BBox &GlyphCache::getBoundingBBox(unsigned short glyphid) const +{ + return *(BBox *)(&(glyph(glyphid)->theBBox())); +} + +inline +float GlyphCache::getSubBoundingMetric(unsigned short glyphid, uint8 subindex, uint8 metric) const +{ + GlyphBox *b = _boxes[glyphid]; + if (b == NULL || subindex >= b->num()) return 0; + + switch (metric) { + case 0: return b->subVal(subindex, 0).bl.x; + case 1: return b->subVal(subindex, 0).bl.y; + case 2: return b->subVal(subindex, 0).tr.x; + case 3: return b->subVal(subindex, 0).tr.y; + case 4: return b->subVal(subindex, 1).bl.x; + case 5: return b->subVal(subindex, 1).bl.y; + case 6: return b->subVal(subindex, 1).tr.x; + case 7: return b->subVal(subindex, 1).tr.y; + default: return 0.; + } +} + +inline const SlantBox &GlyphCache::getSubBoundingSlantBox(unsigned short glyphid, uint8 subindex) const +{ + GlyphBox *b = _boxes[glyphid]; + return *(SlantBox *)(b->subs() + 2 * subindex + 1); +} + +inline const BBox &GlyphCache::getSubBoundingBBox(unsigned short glyphid, uint8 subindex) const +{ + GlyphBox *b = _boxes[glyphid]; + return *(BBox *)(b->subs() + 2 * subindex); +} + +inline +uint8 GlyphCache::numSubBounds(unsigned short glyphid) const +{ + return _boxes[glyphid] ? _boxes[glyphid]->num() : 0; +} + +} // namespace graphite2 diff --git a/thirdparty/graphite/src/inc/GlyphFace.h b/thirdparty/graphite/src/inc/GlyphFace.h new file mode 100644 index 0000000000..fc29056146 --- /dev/null +++ b/thirdparty/graphite/src/inc/GlyphFace.h @@ -0,0 +1,83 @@ +/* GRAPHITE2 LICENSING + + Copyright 2010, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ +#pragma once + +#include "inc/Main.h" +#include "inc/Position.h" +#include "inc/Sparse.h" + +namespace graphite2 { + +enum metrics { + kgmetLsb = 0, kgmetRsb, + kgmetBbTop, kgmetBbBottom, kgmetBbLeft, kgmetBbRight, + kgmetBbHeight, kgmetBbWidth, + kgmetAdvWidth, kgmetAdvHeight, + kgmetAscent, kgmetDescent +}; + + +class GlyphFace +{ +public: + GlyphFace(); + template<typename I> + GlyphFace(const Rect & bbox, const Position & adv, I first, const I last); + + const Position & theAdvance() const; + const Rect & theBBox() const { return m_bbox; } + const sparse & attrs() const { return m_attrs; } + int32 getMetric(uint8 metric) const; + + CLASS_NEW_DELETE; +private: + Rect m_bbox; // bounding box metrics in design units + Position m_advance; // Advance width and height in design units + sparse m_attrs; +}; + + +// Inlines: class GlyphFace +// +inline +GlyphFace::GlyphFace() +{} + +template<typename I> +GlyphFace::GlyphFace(const Rect & bbox, const Position & adv, I first, const I last) +: m_bbox(bbox), + m_advance(adv), + m_attrs(first, last) +{ +} + +inline +const Position & GlyphFace::theAdvance() const { + return m_advance; +} + +} // namespace graphite2 diff --git a/thirdparty/graphite/src/inc/Intervals.h b/thirdparty/graphite/src/inc/Intervals.h new file mode 100644 index 0000000000..81d23187b6 --- /dev/null +++ b/thirdparty/graphite/src/inc/Intervals.h @@ -0,0 +1,234 @@ +/* GRAPHITE2 LICENSING + + Copyright 2010, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ +#pragma once + +#include <utility> + +#include "inc/Main.h" +#include "inc/List.h" +#include "inc/json.h" +#include "inc/Position.h" + +// An IntervalSet represents the possible movement of a given glyph in a given direction +// (horizontally, vertically, or diagonally). +// A vector is needed to represent disjoint ranges, eg, -300..-150, 20..200, 500..750. +// Each pair represents the min/max of a sub-range. + +namespace graphite2 { + +class Segment; + +enum zones_t {SD, XY}; + +class Zones +{ + struct Exclusion + { + template<zones_t O> + static Exclusion weighted(float xmin, float xmax, float f, float a0, + float m, float xi, float ai, float c, bool nega); + + float x, // x position + xm, // xmax position + c, // constant + sum(MiXi^2) + sm, // sum(Mi) + smx; // sum(MiXi) + bool open; + + Exclusion(float x, float w, float smi, float smxi, float c); + Exclusion & operator += (Exclusion const & rhs); + uint8 outcode(float p) const; + + Exclusion split_at(float p); + void left_trim(float p); + + bool track_cost(float & cost, float & x, float origin) const; + + private: + float test_position(float origin) const; + float cost(float x) const; + }; + + typedef Vector<Exclusion> exclusions; + + typedef exclusions::iterator iterator; + typedef Exclusion * pointer; + typedef Exclusion & reference; + typedef std::reverse_iterator<iterator> reverse_iterator; + +public: + typedef exclusions::const_iterator const_iterator; + typedef Exclusion const * const_pointer; + typedef Exclusion const & const_reference; + typedef std::reverse_iterator<const_iterator> const_reverse_iterator; + +#if !defined GRAPHITE2_NTRACING + struct Debug + { + Exclusion _excl; + bool _isdel; + Vector<void *> _env; + + Debug(Exclusion *e, bool isdel, json *dbg) : _excl(*e), _isdel(isdel), _env(dbg->getenvs()) { }; + }; + + typedef Vector<Debug> debugs; + typedef debugs::const_iterator idebugs; + void addDebug(Exclusion *e); + void removeDebug(float pos, float posm); + void setdebug(json *dbgout) { _dbg = dbgout; } + idebugs dbgs_begin() const { return _dbgs.begin(); } + idebugs dbgs_end() const { return _dbgs.end(); } + void jsonDbgOut(Segment *seg) const; + Position position() const { return Position(_pos, _posm); } +#endif + + Zones(); + template<zones_t O> + void initialise(float xmin, float xmax, float margin_len, float margin_weight, float ao); + + void exclude(float xmin, float xmax); + void exclude_with_margins(float xmin, float xmax, int axis); + + template<zones_t O> + void weighted(float xmin, float xmax, float f, float a0, float mi, float xi, float ai, float c, bool nega); + void weightedAxis(int axis, float xmin, float xmax, float f, float a0, float mi, float xi, float ai, float c, bool nega); + + float closest( float origin, float &cost) const; + + const_iterator begin() const { return _exclusions.begin(); } + const_iterator end() const { return _exclusions.end(); } + +private: + exclusions _exclusions; +#if !defined GRAPHITE2_NTRACING + json * _dbg; + debugs _dbgs; +#endif + float _margin_len, + _margin_weight, + _pos, + _posm; + + void insert(Exclusion e); + void remove(float x, float xm); + const_iterator find_exclusion_under(float x) const; +}; + + +inline +Zones::Zones() +: _margin_len(0), _margin_weight(0), _pos(0), _posm(0) +{ +#if !defined GRAPHITE2_NTRACING + _dbg = 0; +#endif + _exclusions.reserve(8); +} + +inline +Zones::Exclusion::Exclusion(float x_, float xm_, float smi, float smxi, float c_) +: x(x_), xm(xm_), c(c_), sm(smi), smx(smxi), open(false) +{ } + +template<zones_t O> +inline +void Zones::initialise(float xmin, float xmax, float margin_len, + float margin_weight, float a0) +{ + _margin_len = margin_len; + _margin_weight = margin_weight; + _pos = xmin; + _posm = xmax; + _exclusions.clear(); + _exclusions.push_back(Exclusion::weighted<O>(xmin, xmax, 1, a0, 0, 0, 0, 0, false)); + _exclusions.front().open = true; +#if !defined GRAPHITE2_NTRACING + _dbgs.clear(); +#endif +} + +inline +void Zones::exclude(float xmin, float xmax) { + remove(xmin, xmax); +} + +template<zones_t O> +inline +void Zones::weighted(float xmin, float xmax, float f, float a0, + float m, float xi, float ai, float c, bool nega) { + insert(Exclusion::weighted<O>(xmin, xmax, f, a0, m, xi, ai, c, nega)); +} + +inline +void Zones::weightedAxis(int axis, float xmin, float xmax, float f, float a0, + float m, float xi, float ai, float c, bool nega) { + if (axis < 2) + weighted<XY>(xmin, xmax, f, a0, m, xi, ai, c, nega); + else + weighted<SD>(xmin, xmax, f, a0, m, xi, ai, c, nega); +} + +#if !defined GRAPHITE2_NTRACING +inline +void Zones::addDebug(Exclusion *e) { + if (_dbg) + _dbgs.push_back(Debug(e, false, _dbg)); +} + +inline +void Zones::removeDebug(float pos, float posm) { + if (_dbg) + { + Exclusion e(pos, posm, 0, 0, 0); + _dbgs.push_back(Debug(&e, true, _dbg)); + } +} +#endif + +template<> +inline +Zones::Exclusion Zones::Exclusion::weighted<XY>(float xmin, float xmax, float f, float a0, + float m, float xi, GR_MAYBE_UNUSED float ai, float c, GR_MAYBE_UNUSED bool nega) { + return Exclusion(xmin, xmax, + m + f, + m * xi, + m * xi * xi + f * a0 * a0 + c); +} + +template<> +inline +Zones::Exclusion Zones::Exclusion::weighted<SD>(float xmin, float xmax, float f, float a0, + float m, float xi, float ai,float c, bool nega) { + float xia = nega ? xi - ai : xi + ai; + return Exclusion(xmin, xmax, + 0.25f * (m + 2.f * f), + 0.25f * m * xia, + 0.25f * (m * xia * xia + 2.f * f * a0 * a0) + c); +} + +} // end of namespace graphite2 diff --git a/thirdparty/graphite/src/inc/List.h b/thirdparty/graphite/src/inc/List.h new file mode 100644 index 0000000000..a3b7a77961 --- /dev/null +++ b/thirdparty/graphite/src/inc/List.h @@ -0,0 +1,168 @@ +/* GRAPHITE2 LICENSING + + Copyright 2010, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ + +// designed to have a limited subset of the std::vector api +#pragma once + +#include <cstddef> +#include <cassert> +#include <cstring> +#include <cstdlib> +#include <new> + +#include "Main.h" + +namespace graphite2 { + +template <typename T> +inline +ptrdiff_t distance(T* first, T* last) { return last-first; } + + +template <typename T> +class Vector +{ + T * m_first, *m_last, *m_end; +public: + typedef T & reference; + typedef const T & const_reference; + typedef T * iterator; + typedef const T * const_iterator; + + Vector() : m_first(0), m_last(0), m_end(0) {} + Vector(size_t n, const T& value = T()) : m_first(0), m_last(0), m_end(0) { insert(begin(), n, value); } + Vector(const Vector<T> &rhs) : m_first(0), m_last(0), m_end(0) { insert(begin(), rhs.begin(), rhs.end()); } + template <typename I> + Vector(I first, const I last) : m_first(0), m_last(0), m_end(0) { insert(begin(), first, last); } + ~Vector() { clear(); free(m_first); } + + iterator begin() { return m_first; } + const_iterator begin() const { return m_first; } + + iterator end() { return m_last; } + const_iterator end() const { return m_last; } + + bool empty() const { return m_first == m_last; } + size_t size() const { return m_last - m_first; } + size_t capacity() const{ return m_end - m_first; } + + void reserve(size_t n); + void resize(size_t n, const T & v = T()); + + reference front() { assert(size() > 0); return *begin(); } + const_reference front() const { assert(size() > 0); return *begin(); } + reference back() { assert(size() > 0); return *(end()-1); } + const_reference back() const { assert(size() > 0); return *(end()-1); } + + Vector<T> & operator = (const Vector<T> & rhs) { assign(rhs.begin(), rhs.end()); return *this; } + reference operator [] (size_t n) { assert(size() > n); return m_first[n]; } + const_reference operator [] (size_t n) const { assert(size() > n); return m_first[n]; } + + void assign(size_t n, const T& u) { clear(); insert(begin(), n, u); } + void assign(const_iterator first, const_iterator last) { clear(); insert(begin(), first, last); } + iterator insert(iterator p, const T & x) { p = _insert_default(p, 1); new (p) T(x); return p; } + void insert(iterator p, size_t n, const T & x); + void insert(iterator p, const_iterator first, const_iterator last); + void pop_back() { assert(size() > 0); --m_last; } + void push_back(const T &v) { if (m_last == m_end) reserve(size()+1); new (m_last++) T(v); } + + void clear() { erase(begin(), end()); } + iterator erase(iterator p) { return erase(p, p+1); } + iterator erase(iterator first, iterator last); + +private: + iterator _insert_default(iterator p, size_t n); +}; + +template <typename T> +inline +void Vector<T>::reserve(size_t n) +{ + if (n > capacity()) + { + const ptrdiff_t sz = size(); + size_t requested; + if (checked_mul(n,sizeof(T), requested)) std::abort(); + m_first = static_cast<T*>(realloc(m_first, requested)); + if (!m_first) std::abort(); + m_last = m_first + sz; + m_end = m_first + n; + } +} + +template <typename T> +inline +void Vector<T>::resize(size_t n, const T & v) { + const ptrdiff_t d = n-size(); + if (d < 0) erase(end()+d, end()); + else if (d > 0) insert(end(), d, v); +} + +template<typename T> +inline +typename Vector<T>::iterator Vector<T>::_insert_default(iterator p, size_t n) +{ + assert(begin() <= p && p <= end()); + const ptrdiff_t i = p - begin(); + reserve(((size() + n + 7) >> 3) << 3); + p = begin() + i; + // Move tail if there is one + if (p != end()) memmove(p + n, p, distance(p,end())*sizeof(T)); + m_last += n; + return p; +} + +template<typename T> +inline +void Vector<T>::insert(iterator p, size_t n, const T & x) +{ + p = _insert_default(p, n); + // Copy in elements + for (; n; --n, ++p) { new (p) T(x); } +} + +template<typename T> +inline +void Vector<T>::insert(iterator p, const_iterator first, const_iterator last) +{ + p = _insert_default(p, distance(first, last)); + // Copy in elements + for (;first != last; ++first, ++p) { new (p) T(*first); } +} + +template<typename T> +inline +typename Vector<T>::iterator Vector<T>::erase(iterator first, iterator last) +{ + for (iterator e = first; e != last; ++e) e->~T(); + const size_t sz = distance(first, last); + if (m_last != last) memmove(first, last, distance(last,end())*sizeof(T)); + m_last -= sz; + return first; +} + +} // namespace graphite2 diff --git a/thirdparty/graphite/src/inc/Machine.h b/thirdparty/graphite/src/inc/Machine.h new file mode 100644 index 0000000000..b23819fb98 --- /dev/null +++ b/thirdparty/graphite/src/inc/Machine.h @@ -0,0 +1,207 @@ +/* GRAPHITE2 LICENSING + + Copyright 2010, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ +// This general interpreter interface. +// Author: Tim Eves + +// Build one of direct_machine.cpp or call_machine.cpp to implement this +// interface. + +#pragma once +#include <cstring> +#include <limits> +#include <graphite2/Types.h> +#include "inc/Main.h" + +#if defined(__GNUC__) +#if defined(__clang__) || (__GNUC__ * 100 + __GNUC_MINOR__ * 10) < 430 +#define HOT +#if defined(__x86_64) +#define REGPARM(n) __attribute__((regparm(n))) +#else +#define REGPARM(n) +#endif +#else +#define HOT __attribute__((hot)) +#if defined(__x86_64) +#define REGPARM(n) __attribute__((hot, regparm(n))) +#else +#define REGPARM(n) +#endif +#endif +#else +#define HOT +#define REGPARM(n) +#endif + +#if defined(__MINGW32__) +// MinGW's <limits> at some point includes winnt.h which #define's a +// DELETE macro, which conflicts with enum opcode below, so we undefine +// it here. +#undef DELETE +#endif + +namespace graphite2 { + +// Forward declarations +class Segment; +class Slot; +class SlotMap; + + +namespace vm +{ + + +typedef void * instr; +typedef Slot * slotref; + +enum {VARARGS = 0xff, MAX_NAME_LEN=32}; + +enum opcode { + NOP = 0, + + PUSH_BYTE, PUSH_BYTEU, PUSH_SHORT, PUSH_SHORTU, PUSH_LONG, + + ADD, SUB, MUL, DIV, + MIN_, MAX_, + NEG, + TRUNC8, TRUNC16, + + COND, + + AND, OR, NOT, + EQUAL, NOT_EQ, + LESS, GTR, LESS_EQ, GTR_EQ, + + NEXT, NEXT_N, COPY_NEXT, + PUT_GLYPH_8BIT_OBS, PUT_SUBS_8BIT_OBS, PUT_COPY, + INSERT, DELETE, + ASSOC, + CNTXT_ITEM, + + ATTR_SET, ATTR_ADD, ATTR_SUB, + ATTR_SET_SLOT, + IATTR_SET_SLOT, + PUSH_SLOT_ATTR, PUSH_GLYPH_ATTR_OBS, + PUSH_GLYPH_METRIC, PUSH_FEAT, + PUSH_ATT_TO_GATTR_OBS, PUSH_ATT_TO_GLYPH_METRIC, + PUSH_ISLOT_ATTR, + + PUSH_IGLYPH_ATTR, // not implemented + + POP_RET, RET_ZERO, RET_TRUE, + IATTR_SET, IATTR_ADD, IATTR_SUB, + PUSH_PROC_STATE, PUSH_VERSION, + PUT_SUBS, PUT_SUBS2, PUT_SUBS3, + PUT_GLYPH, PUSH_GLYPH_ATTR, PUSH_ATT_TO_GLYPH_ATTR, + BITOR, BITAND, BITNOT, + BITSET, SET_FEAT, + MAX_OPCODE, + // private opcodes for internal use only, comes after all other on disk opcodes + TEMP_COPY = MAX_OPCODE +}; + +struct opcode_t +{ + instr impl[2]; + uint8 param_sz; + char name[MAX_NAME_LEN]; +}; + + +class Machine +{ +public: + typedef int32 stack_t; + static size_t const STACK_ORDER = 10, + STACK_MAX = 1 << STACK_ORDER, + STACK_GUARD = 2; + + class Code; + + enum status_t { + finished = 0, + stack_underflow, + stack_not_empty, + stack_overflow, + slot_offset_out_bounds, + died_early + }; + + Machine(SlotMap &) throw(); + static const opcode_t * getOpcodeTable() throw(); + + CLASS_NEW_DELETE; + + SlotMap & slotMap() const throw(); + status_t status() const throw(); +// operator bool () const throw(); + +private: + void check_final_stack(const stack_t * const sp); + stack_t run(const instr * program, const byte * data, + slotref * & map) HOT; + + SlotMap & _map; + stack_t _stack[STACK_MAX + 2*STACK_GUARD]; + status_t _status; +}; + +inline Machine::Machine(SlotMap & map) throw() +: _map(map), _status(finished) +{ + // Initialise stack guard +1 entries as the stack pointer points to the + // current top of stack, hence the first push will never write entry 0. + // Initialising the guard space like this is unnecessary and is only + // done to keep valgrind happy during fuzz testing. Hopefully loop + // unrolling will flatten this. + for (size_t n = STACK_GUARD + 1; n; --n) _stack[n-1] = 0; +} + +inline SlotMap& Machine::slotMap() const throw() +{ + return _map; +} + +inline Machine::status_t Machine::status() const throw() +{ + return _status; +} + +inline void Machine::check_final_stack(const stack_t * const sp) +{ + if (_status != finished) return; + + stack_t const * const base = _stack + STACK_GUARD, + * const limit = base + STACK_MAX; + if (sp < base) _status = stack_underflow; // This should be impossible now. + else if (sp >= limit) _status = stack_overflow; // So should this. + else if (sp != base) _status = stack_not_empty; +} + +} // namespace vm +} // namespace graphite2 diff --git a/thirdparty/graphite/src/inc/Main.h b/thirdparty/graphite/src/inc/Main.h new file mode 100644 index 0000000000..ebf02dd553 --- /dev/null +++ b/thirdparty/graphite/src/inc/Main.h @@ -0,0 +1,199 @@ +/* GRAPHITE2 LICENSING + + Copyright 2010, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ +#pragma once + +#include <cstdlib> +#include "graphite2/Types.h" + +#ifdef GRAPHITE2_CUSTOM_HEADER +#include GRAPHITE2_CUSTOM_HEADER +#endif + +namespace graphite2 { + +typedef gr_uint8 uint8; +typedef gr_uint8 byte; +typedef gr_uint16 uint16; +typedef gr_uint32 uint32; +typedef gr_int8 int8; +typedef gr_int16 int16; +typedef gr_int32 int32; +typedef size_t uintptr; + +#ifdef GRAPHITE2_TELEMETRY +struct telemetry +{ + class category; + + static size_t * _category; + static void set_category(size_t & t) throw() { _category = &t; } + static void stop() throw() { _category = 0; } + static void count_bytes(size_t n) throw() { if (_category) *_category += n; } + + size_t misc, + silf, + glyph, + code, + states, + starts, + transitions; + + telemetry() : misc(0), silf(0), glyph(0), code(0), states(0), starts(0), transitions(0) {} +}; + +class telemetry::category +{ + size_t * _prev; +public: + category(size_t & t) : _prev(_category) { _category = &t; } + ~category() { _category = _prev; } +}; + +#else +struct telemetry {}; +#endif + +// Checked multiplaction to catch overflow or underflow when allocating memory +#if defined(__has_builtin) + #if __has_builtin(__builtin_mul_overflow) + #define HAVE_BUILTIN_OVERFLOW + #endif +#elif defined(__GNUC__) && (__GNUC__ >= 5) && !defined(__INTEL_COMPILER) + #define HAVE_BUILTIN_OVERFLOW +#endif +#if defined(__has_include) + #if __has_include(<intsafe.h>) && !defined(__CYGWIN__) + #define HAVE_INTSAFE_H + #endif +#elif defined(_WIN32) + #define HAVE_INTSAFE_H +#endif + +// Need to import intsafe into the top level namespace +#if defined(HAVE_INTSAFE_H) +} // namespace graphite2 + +#include <intsafe.h> + +namespace graphite2 { +#endif + +#if defined(HAVE_BUILTIN_OVERFLOW) +inline +bool checked_mul(const size_t a, const size_t b, size_t & t) { + return __builtin_mul_overflow(a, b, &t); +} +#elif defined(HAVE_INTSAFE_H) +inline +bool checked_mul(const size_t a, const size_t b, size_t & t) { + return SizeTMult(a, b, &t) == INTSAFE_E_ARITHMETIC_OVERFLOW; +} +#else +inline +bool checked_mul(const size_t a, const size_t b, size_t & t) { + t = a*b; + return (((a | b) & (~size_t(0) << (sizeof(size_t) << 2))) && (t / a != b)); +} +#endif + +// typesafe wrapper around malloc for simple types +// use free(pointer) to deallocate + +template <typename T> T * gralloc(size_t n) +{ + size_t total; + if (checked_mul(n, sizeof(T), total)) + return 0; +#ifdef GRAPHITE2_TELEMETRY + telemetry::count_bytes(total); +#endif + return static_cast<T*>(malloc(total)); +} + +template <typename T> T * grzeroalloc(size_t n) +{ +#ifdef GRAPHITE2_TELEMETRY + telemetry::count_bytes(sizeof(T) * n); +#endif + return static_cast<T*>(calloc(n, sizeof(T))); +} + +template <typename T> +inline T min(const T a, const T b) +{ + return a < b ? a : b; +} + +template <typename T> +inline T max(const T a, const T b) +{ + return a > b ? a : b; +} + +} // namespace graphite2 + +#define CLASS_NEW_DELETE \ + void * operator new (size_t size){ return gralloc<byte>(size);} \ + void * operator new (size_t, void * p) throw() { return p; } \ + void * operator new[] (size_t size) {return gralloc<byte>(size);} \ + void * operator new[] (size_t, void * p) throw() { return p; } \ + void operator delete (void * p) throw() { free(p);} \ + void operator delete (void *, void *) throw() {} \ + void operator delete[] (void * p)throw() { free(p); } \ + void operator delete[] (void *, void *) throw() {} + +#if defined(__GNUC__) || defined(__clang__) +#define GR_MAYBE_UNUSED __attribute__((unused)) +#else +#define GR_MAYBE_UNUSED +#endif + +#ifndef __has_cpp_attribute +# define __has_cpp_attribute(x) 0 +#endif + +#if __has_cpp_attribute(clang::fallthrough) +# define GR_FALLTHROUGH [[clang::fallthrough]] +#elif __has_cpp_attribute(gnu::fallthrough) +# define GR_FALLTHROUGH [[gnu::fallthrough]] +#elif defined(_MSC_VER) + /* + * MSVC's __fallthrough annotations are checked by /analyze (Code Analysis): + * https://msdn.microsoft.com/en-us/library/ms235402%28VS.80%29.aspx + */ + #include <sal.h> + #define GR_FALLTHROUGH __fallthrough +#elif __GNUC__ >= 7 + #define GR_FALLTHROUGH __attribute__ ((fallthrough)) +#else + #define GR_FALLTHROUGH /* fallthrough */ +#endif + +#ifdef _MSC_VER +#pragma warning(disable: 4800) +#pragma warning(disable: 4355) +#endif diff --git a/thirdparty/graphite/src/inc/NameTable.h b/thirdparty/graphite/src/inc/NameTable.h new file mode 100644 index 0000000000..0fdbeb4d85 --- /dev/null +++ b/thirdparty/graphite/src/inc/NameTable.h @@ -0,0 +1,65 @@ +/* GRAPHITE2 LICENSING + + Copyright 2010, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ +#pragma once + +#include <graphite2/Segment.h> +#include "inc/TtfTypes.h" +#include "inc/locale2lcid.h" + +namespace graphite2 { + +class NameTable +{ + NameTable(const NameTable &); + NameTable & operator = (const NameTable &); + +public: + NameTable(const void * data, size_t length, uint16 platfromId=3, uint16 encodingID = 1); + ~NameTable() { free(const_cast<TtfUtil::Sfnt::FontNames *>(m_table)); } + enum eNameFallback { + eNoFallback = 0, + eEnUSFallbackOnly = 1, + eEnOrAnyFallback = 2 + }; + uint16 setPlatformEncoding(uint16 platfromId=3, uint16 encodingID = 1); + void * getName(uint16 & languageId, uint16 nameId, gr_encform enc, uint32 & length); + uint16 getLanguageId(const char * bcp47Locale); + + CLASS_NEW_DELETE +private: + uint16 m_platformId; + uint16 m_encodingId; + uint16 m_languageCount; + uint16 m_platformOffset; // offset of first NameRecord with for platform 3, encoding 1 + uint16 m_platformLastRecord; + uint16 m_nameDataLength; + const TtfUtil::Sfnt::FontNames * m_table; + const uint8 * m_nameData; + Locale2Lang m_locale2Lang; +}; + +} // namespace graphite2 diff --git a/thirdparty/graphite/src/inc/Pass.h b/thirdparty/graphite/src/inc/Pass.h new file mode 100644 index 0000000000..e687a87d8c --- /dev/null +++ b/thirdparty/graphite/src/inc/Pass.h @@ -0,0 +1,118 @@ +/* GRAPHITE2 LICENSING + + Copyright 2010, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ +#pragma once + +#include <cstdlib> +#include "inc/Code.h" + +namespace graphite2 { + +class Segment; +class Face; +class Silf; +struct Rule; +struct RuleEntry; +struct State; +class FiniteStateMachine; +class Error; +class ShiftCollider; +class KernCollider; +class json; + +enum passtype; + +class Pass +{ +public: + Pass(); + ~Pass(); + + bool readPass(const byte * pPass, size_t pass_length, size_t subtable_base, Face & face, + enum passtype pt, uint32 version, Error &e); + bool runGraphite(vm::Machine & m, FiniteStateMachine & fsm, bool reverse) const; + void init(Silf *silf) { m_silf = silf; } + byte collisionLoops() const { return m_numCollRuns; } + bool reverseDir() const { return m_isReverseDir; } + + CLASS_NEW_DELETE +private: + void findNDoRule(Slot* & iSlot, vm::Machine &, FiniteStateMachine& fsm) const; + int doAction(const vm::Machine::Code* codeptr, Slot * & slot_out, vm::Machine &) const; + bool testPassConstraint(vm::Machine & m) const; + bool testConstraint(const Rule & r, vm::Machine &) const; + bool readRules(const byte * rule_map, const size_t num_entries, + const byte *precontext, const uint16 * sort_key, + const uint16 * o_constraint, const byte *constraint_data, + const uint16 * o_action, const byte * action_data, + Face &, enum passtype pt, Error &e); + bool readStates(const byte * starts, const byte * states, const byte * o_rule_map, Face &, Error &e); + bool readRanges(const byte * ranges, size_t num_ranges, Error &e); + uint16 glyphToCol(const uint16 gid) const; + bool runFSM(FiniteStateMachine & fsm, Slot * slot) const; + void dumpRuleEventConsidered(const FiniteStateMachine & fsm, const RuleEntry & re) const; + void dumpRuleEventOutput(const FiniteStateMachine & fsm, const Rule & r, Slot * os) const; + void adjustSlot(int delta, Slot * & slot_out, SlotMap &) const; + bool collisionShift(Segment *seg, int dir, json * const dbgout) const; + bool collisionKern(Segment *seg, int dir, json * const dbgout) const; + bool collisionFinish(Segment *seg, GR_MAYBE_UNUSED json * const dbgout) const; + bool resolveCollisions(Segment *seg, Slot *slot, Slot *start, ShiftCollider &coll, bool isRev, + int dir, bool &moved, bool &hasCol, json * const dbgout) const; + float resolveKern(Segment *seg, Slot *slot, Slot *start, int dir, + float &ymin, float &ymax, json *const dbgout) const; + + const Silf * m_silf; + uint16 * m_cols; + Rule * m_rules; // rules + RuleEntry * m_ruleMap; + uint16 * m_startStates; // prectxt length + uint16 * m_transitions; + State * m_states; + vm::Machine::Code * m_codes; + byte * m_progs; + + byte m_numCollRuns; + byte m_kernColls; + byte m_iMaxLoop; + uint16 m_numGlyphs; + uint16 m_numRules; + uint16 m_numStates; + uint16 m_numTransition; + uint16 m_numSuccess; + uint16 m_successStart; + uint16 m_numColumns; + byte m_minPreCtxt; + byte m_maxPreCtxt; + byte m_colThreshold; + bool m_isReverseDir; + vm::Machine::Code m_cPConstraint; + +private: //defensive + Pass(const Pass&); + Pass& operator=(const Pass&); +}; + +} // namespace graphite2 diff --git a/thirdparty/graphite/src/inc/Position.h b/thirdparty/graphite/src/inc/Position.h new file mode 100644 index 0000000000..510e4f4c41 --- /dev/null +++ b/thirdparty/graphite/src/inc/Position.h @@ -0,0 +1,68 @@ +/* GRAPHITE2 LICENSING + + Copyright 2010, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ +#pragma once + +namespace graphite2 { + +class Position +{ +public: + Position() : x(0), y(0) { } + Position(const float inx, const float iny) : x(inx), y(iny) {} + Position operator + (const Position& a) const { return Position(x + a.x, y + a.y); } + Position operator - (const Position& a) const { return Position(x - a.x, y - a.y); } + Position operator * (const float m) const { return Position(x * m, y * m); } + Position &operator += (const Position &a) { x += a.x; y += a.y; return *this; } + Position &operator *= (const float m) { x *= m; y *= m; return *this; } + + float x; + float y; +}; + +class Rect +{ +public : + Rect() {} + Rect(const Position& botLeft, const Position& topRight): bl(botLeft), tr(topRight) {} + Rect widen(const Rect& other) { return Rect(Position(bl.x > other.bl.x ? other.bl.x : bl.x, bl.y > other.bl.y ? other.bl.y : bl.y), Position(tr.x > other.tr.x ? tr.x : other.tr.x, tr.y > other.tr.y ? tr.y : other.tr.y)); } + Rect operator + (const Position &a) const { return Rect(Position(bl.x + a.x, bl.y + a.y), Position(tr.x + a.x, tr.y + a.y)); } + Rect operator - (const Position &a) const { return Rect(Position(bl.x - a.x, bl.y - a.y), Position(tr.x - a.x, tr.y - a.y)); } + Rect operator * (float m) const { return Rect(Position(bl.x, bl.y) * m, Position(tr.x, tr.y) * m); } + float width() const { return tr.x - bl.x; } + float height() const { return tr.y - bl.y; } + + bool hitTest(Rect &other); + + // returns Position(overlapx, overlapy) where overlap<0 if overlapping else positive) + Position overlap(Position &offset, Rect &other, Position &otherOffset); + //Position constrainedAvoid(Position &offset, Rect &box, Rect &sdbox, Position &other, Rect &obox, Rect &osdbox); + + Position bl; + Position tr; +}; + +} // namespace graphite2 diff --git a/thirdparty/graphite/src/inc/Rule.h b/thirdparty/graphite/src/inc/Rule.h new file mode 100644 index 0000000000..5964e003a6 --- /dev/null +++ b/thirdparty/graphite/src/inc/Rule.h @@ -0,0 +1,305 @@ +/* GRAPHITE2 LICENSING + + Copyright 2011, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ + +#pragma once + +#include "inc/Code.h" +#include "inc/Slot.h" + +namespace graphite2 { + +struct Rule { + const vm::Machine::Code * constraint, + * action; + unsigned short sort; + byte preContext; +#ifndef NDEBUG + uint16 rule_idx; +#endif + + Rule(); + ~Rule() {} + + CLASS_NEW_DELETE; + +private: + Rule(const Rule &); + Rule & operator = (const Rule &); +}; + +inline +Rule::Rule() +: constraint(0), + action(0), + sort(0), + preContext(0) +{ +#ifndef NDEBUG + rule_idx = 0; +#endif +} + + +struct RuleEntry +{ + const Rule * rule; + + inline + bool operator < (const RuleEntry &r) const + { + const unsigned short lsort = rule->sort, rsort = r.rule->sort; + return lsort > rsort || (lsort == rsort && rule < r.rule); + } + + inline + bool operator == (const RuleEntry &r) const + { + return rule == r.rule; + } +}; + + +struct State +{ + const RuleEntry * rules, + * rules_end; + + bool empty() const; +}; + +inline +bool State::empty() const +{ + return rules_end == rules; +} + + +class SlotMap +{ +public: + enum {MAX_SLOTS=64}; + SlotMap(Segment & seg, uint8 direction, size_t maxSize); + + Slot * * begin(); + Slot * * end(); + size_t size() const; + unsigned short context() const; + void reset(Slot &, unsigned short); + + Slot * const & operator[](int n) const; + Slot * & operator [] (int); + void pushSlot(Slot * const slot); + void collectGarbage(Slot *& aSlot); + + Slot * highwater() { return m_highwater; } + void highwater(Slot *s) { m_highwater = s; m_highpassed = false; } + bool highpassed() const { return m_highpassed; } + void highpassed(bool v) { m_highpassed = v; } + + uint8 dir() const { return m_dir; } + int decMax() { return --m_maxSize; } + + Segment & segment; +private: + Slot * m_slot_map[MAX_SLOTS+1]; + unsigned short m_size; + unsigned short m_precontext; + Slot * m_highwater; + int m_maxSize; + uint8 m_dir; + bool m_highpassed; +}; + + +class FiniteStateMachine +{ +public: + enum {MAX_RULES=128}; + +private: + class Rules + { + public: + Rules(); + void clear(); + const RuleEntry * begin() const; + const RuleEntry * end() const; + size_t size() const; + + void accumulate_rules(const State &state); + + private: + RuleEntry * m_begin, + * m_end, + m_rules[MAX_RULES*2]; + }; + +public: + FiniteStateMachine(SlotMap & map, json * logger); + void reset(Slot * & slot, const short unsigned int max_pre_ctxt); + + Rules rules; + SlotMap & slots; + json * const dbgout; +}; + + +inline +FiniteStateMachine::FiniteStateMachine(SlotMap& map, json * logger) +: slots(map), + dbgout(logger) +{ +} + +inline +void FiniteStateMachine::reset(Slot * & slot, const short unsigned int max_pre_ctxt) +{ + rules.clear(); + int ctxt = 0; + for (; ctxt != max_pre_ctxt && slot->prev(); ++ctxt, slot = slot->prev()); + slots.reset(*slot, ctxt); +} + +inline +FiniteStateMachine::Rules::Rules() + : m_begin(m_rules), m_end(m_rules) +{ +} + +inline +void FiniteStateMachine::Rules::clear() +{ + m_end = m_begin; +} + +inline +const RuleEntry * FiniteStateMachine::Rules::begin() const +{ + return m_begin; +} + +inline +const RuleEntry * FiniteStateMachine::Rules::end() const +{ + return m_end; +} + +inline +size_t FiniteStateMachine::Rules::size() const +{ + return m_end - m_begin; +} + +inline +void FiniteStateMachine::Rules::accumulate_rules(const State &state) +{ + // Only bother if there are rules in the State object. + if (state.empty()) return; + + // Merge the new sorted rules list into the current sorted result set. + const RuleEntry * lre = begin(), * rre = state.rules; + RuleEntry * out = m_rules + (m_begin == m_rules)*MAX_RULES; + const RuleEntry * const lrend = out + MAX_RULES, + * const rrend = state.rules_end; + m_begin = out; + while (lre != end() && out != lrend) + { + if (*lre < *rre) *out++ = *lre++; + else if (*rre < *lre) { *out++ = *rre++; } + else { *out++ = *lre++; ++rre; } + + if (rre == rrend) + { + while (lre != end() && out != lrend) { *out++ = *lre++; } + m_end = out; + return; + } + } + while (rre != rrend && out != lrend) { *out++ = *rre++; } + m_end = out; +} + +inline +SlotMap::SlotMap(Segment & seg, uint8 direction, size_t maxSize) +: segment(seg), m_size(0), m_precontext(0), m_highwater(0), + m_maxSize(int(maxSize)), m_dir(direction), m_highpassed(false) +{ + m_slot_map[0] = 0; +} + +inline +Slot * * SlotMap::begin() +{ + return &m_slot_map[1]; // allow map to go 1 before slot_map when inserting + // at start of segment. +} + +inline +Slot * * SlotMap::end() +{ + return m_slot_map + m_size + 1; +} + +inline +size_t SlotMap::size() const +{ + return m_size; +} + +inline +short unsigned int SlotMap::context() const +{ + return m_precontext; +} + +inline +void SlotMap::reset(Slot & slot, short unsigned int ctxt) +{ + m_size = 0; + m_precontext = ctxt; + *m_slot_map = slot.prev(); +} + +inline +void SlotMap::pushSlot(Slot*const slot) +{ + m_slot_map[++m_size] = slot; +} + +inline +Slot * const & SlotMap::operator[](int n) const +{ + return m_slot_map[n + 1]; +} + +inline +Slot * & SlotMap::operator[](int n) +{ + return m_slot_map[n + 1]; +} + +} // namespace graphite2 diff --git a/thirdparty/graphite/src/inc/Segment.h b/thirdparty/graphite/src/inc/Segment.h new file mode 100644 index 0000000000..6cf83408d4 --- /dev/null +++ b/thirdparty/graphite/src/inc/Segment.h @@ -0,0 +1,236 @@ +/* GRAPHITE2 LICENSING + + Copyright 2010, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ +#pragma once + +#include "inc/Main.h" + +#include <cassert> + +#include "inc/CharInfo.h" +#include "inc/Face.h" +#include "inc/FeatureVal.h" +#include "inc/GlyphCache.h" +#include "inc/GlyphFace.h" +#include "inc/Slot.h" +#include "inc/Position.h" +#include "inc/List.h" +#include "inc/Collider.h" + +#define MAX_SEG_GROWTH_FACTOR 64 + +namespace graphite2 { + +typedef Vector<Features> FeatureList; +typedef Vector<Slot *> SlotRope; +typedef Vector<int16 *> AttributeRope; +typedef Vector<SlotJustify *> JustifyRope; + +class Font; +class Segment; +class Silf; + +enum SpliceParam { +/** sub-Segments longer than this are not cached + * (in Unicode code points) */ + eMaxSpliceSize = 96 +}; + +enum justFlags { + gr_justStartInline = 1, + gr_justEndInline = 2 +}; + +class SegmentScopeState +{ +private: + friend class Segment; + Slot * realFirstSlot; + Slot * slotBeforeScope; + Slot * slotAfterScope; + Slot * realLastSlot; + size_t numGlyphsOutsideScope; +}; + +class Segment +{ + // Prevent copying of any kind. + Segment(const Segment&); + Segment& operator=(const Segment&); + +public: + + enum { + SEG_INITCOLLISIONS = 1, + SEG_HASCOLLISIONS = 2 + }; + + size_t slotCount() const { return m_numGlyphs; } //one slot per glyph + void extendLength(ptrdiff_t num) { m_numGlyphs += num; } + Position advance() const { return m_advance; } + bool runGraphite() { if (m_silf) return m_face->runGraphite(this, m_silf); else return true;}; + void chooseSilf(uint32 script) { m_silf = m_face->chooseSilf(script); } + const Silf *silf() const { return m_silf; } + size_t charInfoCount() const { return m_numCharinfo; } + const CharInfo *charinfo(unsigned int index) const { return index < m_numCharinfo ? m_charinfo + index : NULL; } + CharInfo *charinfo(unsigned int index) { return index < m_numCharinfo ? m_charinfo + index : NULL; } + + Segment(size_t numchars, const Face* face, uint32 script, int dir); + ~Segment(); + uint8 flags() const { return m_flags; } + void flags(uint8 f) { m_flags = f; } + Slot *first() { return m_first; } + void first(Slot *p) { m_first = p; } + Slot *last() { return m_last; } + void last(Slot *p) { m_last = p; } + void appendSlot(int i, int cid, int gid, int fid, size_t coffset); + Slot *newSlot(); + void freeSlot(Slot *); + SlotJustify *newJustify(); + void freeJustify(SlotJustify *aJustify); + Position positionSlots(const Font *font=0, Slot *first=0, Slot *last=0, bool isRtl = false, bool isFinal = true); + void associateChars(int offset, size_t num); + void linkClusters(Slot *first, Slot *last); + uint16 getClassGlyph(uint16 cid, uint16 offset) const { return m_silf->getClassGlyph(cid, offset); } + uint16 findClassIndex(uint16 cid, uint16 gid) const { return m_silf->findClassIndex(cid, gid); } + int addFeatures(const Features& feats) { m_feats.push_back(feats); return int(m_feats.size()) - 1; } + uint32 getFeature(int index, uint8 findex) const { const FeatureRef* pFR=m_face->theSill().theFeatureMap().featureRef(findex); if (!pFR) return 0; else return pFR->getFeatureVal(m_feats[index]); } + void setFeature(int index, uint8 findex, uint32 val) { + const FeatureRef* pFR=m_face->theSill().theFeatureMap().featureRef(findex); + if (pFR) + { + if (val > pFR->maxVal()) val = pFR->maxVal(); + pFR->applyValToFeature(val, m_feats[index]); + } } + int8 dir() const { return m_dir; } + void dir(int8 val) { m_dir = val; } + bool currdir() const { return ((m_dir >> 6) ^ m_dir) & 1; } + uint8 passBits() const { return m_passBits; } + void mergePassBits(const uint8 val) { m_passBits &= val; } + int16 glyphAttr(uint16 gid, uint16 gattr) const { const GlyphFace * p = m_face->glyphs().glyphSafe(gid); return p ? p->attrs()[gattr] : 0; } + int32 getGlyphMetric(Slot *iSlot, uint8 metric, uint8 attrLevel, bool rtl) const; + float glyphAdvance(uint16 gid) const { return m_face->glyphs().glyph(gid)->theAdvance().x; } + const Rect &theGlyphBBoxTemporary(uint16 gid) const { return m_face->glyphs().glyph(gid)->theBBox(); } //warning value may become invalid when another glyph is accessed + Slot *findRoot(Slot *is) const { return is->attachedTo() ? findRoot(is->attachedTo()) : is; } + int numAttrs() const { return m_silf->numUser(); } + int defaultOriginal() const { return m_defaultOriginal; } + const Face * getFace() const { return m_face; } + const Features & getFeatures(unsigned int /*charIndex*/) { assert(m_feats.size() == 1); return m_feats[0]; } + void bidiPass(int paradir, uint8 aMirror); + int8 getSlotBidiClass(Slot *s) const; + void doMirror(uint16 aMirror); + Slot *addLineEnd(Slot *nSlot); + void delLineEnd(Slot *s); + bool hasJustification() const { return m_justifies.size() != 0; } + void reverseSlots(); + + bool isWhitespace(const int cid) const; + bool hasCollisionInfo() const { return (m_flags & SEG_HASCOLLISIONS) && m_collisions; } + SlotCollision *collisionInfo(const Slot *s) const { return m_collisions ? m_collisions + s->index() : 0; } + CLASS_NEW_DELETE + +public: //only used by: GrSegment* makeAndInitialize(const GrFont *font, const GrFace *face, uint32 script, const FeaturesHandle& pFeats/*must not be IsNull*/, encform enc, const void* pStart, size_t nChars, int dir); + bool read_text(const Face *face, const Features* pFeats/*must not be NULL*/, gr_encform enc, const void*pStart, size_t nChars); + void finalise(const Font *font, bool reverse=false); + float justify(Slot *pSlot, const Font *font, float width, enum justFlags flags, Slot *pFirst, Slot *pLast); + bool initCollisions(); + +private: + Position m_advance; // whole segment advance + SlotRope m_slots; // Vector of slot buffers + AttributeRope m_userAttrs; // Vector of userAttrs buffers + JustifyRope m_justifies; // Slot justification info buffers + FeatureList m_feats; // feature settings referenced by charinfos in this segment + Slot * m_freeSlots; // linked list of free slots + SlotJustify * m_freeJustifies; // Slot justification blocks free list + CharInfo * m_charinfo; // character info, one per input character + SlotCollision * m_collisions; + const Face * m_face; // GrFace + const Silf * m_silf; + Slot * m_first; // first slot in segment + Slot * m_last; // last slot in segment + size_t m_bufSize, // how big a buffer to create when need more slots + m_numGlyphs, + m_numCharinfo; // size of the array and number of input characters + int m_defaultOriginal; // number of whitespace chars in the string + int8 m_dir; + uint8 m_flags, // General purpose flags + m_passBits; // if bit set then skip pass +}; + +inline +int8 Segment::getSlotBidiClass(Slot *s) const +{ + int8 res = s->getBidiClass(); + if (res != -1) return res; + res = int8(glyphAttr(s->gid(), m_silf->aBidi())); + s->setBidiClass(res); + return res; +} + +inline +void Segment::finalise(const Font *font, bool reverse) +{ + if (!m_first || !m_last) return; + + m_advance = positionSlots(font, m_first, m_last, m_silf->dir(), true); + //associateChars(0, m_numCharinfo); + if (reverse && currdir() != (m_dir & 1)) + reverseSlots(); + linkClusters(m_first, m_last); +} + +inline +int32 Segment::getGlyphMetric(Slot *iSlot, uint8 metric, uint8 attrLevel, bool rtl) const { + if (attrLevel > 0) + { + Slot *is = findRoot(iSlot); + return is->clusterMetric(this, metric, attrLevel, rtl); + } + else + return m_face->getGlyphMetric(iSlot->gid(), metric); +} + +inline +bool Segment::isWhitespace(const int cid) const +{ + return ((cid >= 0x0009) * (cid <= 0x000D) + + (cid == 0x0020) + + (cid == 0x0085) + + (cid == 0x00A0) + + (cid == 0x1680) + + (cid == 0x180E) + + (cid >= 0x2000) * (cid <= 0x200A) + + (cid == 0x2028) + + (cid == 0x2029) + + (cid == 0x202F) + + (cid == 0x205F) + + (cid == 0x3000)) != 0; +} + +} // namespace graphite2 + +struct gr_segment : public graphite2::Segment {}; diff --git a/thirdparty/graphite/src/inc/Silf.h b/thirdparty/graphite/src/inc/Silf.h new file mode 100644 index 0000000000..edc0c3a16d --- /dev/null +++ b/thirdparty/graphite/src/inc/Silf.h @@ -0,0 +1,128 @@ +/* GRAPHITE2 LICENSING + + Copyright 2010, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ +#pragma once + +#include "graphite2/Font.h" +#include "inc/Main.h" +#include "inc/Pass.h" + +namespace graphite2 { + +class Face; +class Segment; +class FeatureVal; +class VMScratch; +class Error; + +class Pseudo +{ +public: + uint32 uid; + uint32 gid; + CLASS_NEW_DELETE; +}; + +class Justinfo +{ +public: + Justinfo(uint8 stretch, uint8 shrink, uint8 step, uint8 weight) : + m_astretch(stretch), m_ashrink(shrink), m_astep(step), + m_aweight(weight) {}; + uint8 attrStretch() const { return m_astretch; } + uint8 attrShrink() const { return m_ashrink; } + uint8 attrStep() const { return m_astep; } + uint8 attrWeight() const { return m_aweight; } + +private: + uint8 m_astretch; + uint8 m_ashrink; + uint8 m_astep; + uint8 m_aweight; +}; + +class Silf +{ + // Prevent copying + Silf(const Silf&); + Silf& operator=(const Silf&); + +public: + Silf() throw(); + ~Silf() throw(); + + bool readGraphite(const byte * const pSilf, size_t lSilf, Face &face, uint32 version); + bool runGraphite(Segment *seg, uint8 firstPass=0, uint8 lastPass=0, int dobidi = 0) const; + uint16 findClassIndex(uint16 cid, uint16 gid) const; + uint16 getClassGlyph(uint16 cid, unsigned int index) const; + uint16 findPseudo(uint32 uid) const; + uint8 numUser() const { return m_aUser; } + uint8 aPseudo() const { return m_aPseudo; } + uint8 aBreak() const { return m_aBreak; } + uint8 aMirror() const {return m_aMirror; } + uint8 aPassBits() const { return m_aPassBits; } + uint8 aBidi() const { return m_aBidi; } + uint8 aCollision() const { return m_aCollision; } + uint8 substitutionPass() const { return m_sPass; } + uint8 positionPass() const { return m_pPass; } + uint8 justificationPass() const { return m_jPass; } + uint8 bidiPass() const { return m_bPass; } + uint8 numPasses() const { return m_numPasses; } + uint8 maxCompPerLig() const { return m_iMaxComp; } + uint16 numClasses() const { return m_nClass; } + byte flags() const { return m_flags; } + byte dir() const { return m_dir; } + uint8 numJustLevels() const { return m_numJusts; } + Justinfo *justAttrs() const { return m_justs; } + uint16 endLineGlyphid() const { return m_gEndLine; } + const gr_faceinfo *silfInfo() const { return &m_silfinfo; } + + CLASS_NEW_DELETE; + +private: + size_t readClassMap(const byte *p, size_t data_len, uint32 version, Error &e); + template<typename T> inline uint32 readClassOffsets(const byte *&p, size_t data_len, Error &e); + + Pass * m_passes; + Pseudo * m_pseudos; + uint32 * m_classOffsets; + uint16 * m_classData; + Justinfo * m_justs; + uint8 m_numPasses; + uint8 m_numJusts; + uint8 m_sPass, m_pPass, m_jPass, m_bPass, + m_flags, m_dir; + + uint8 m_aPseudo, m_aBreak, m_aUser, m_aBidi, m_aMirror, m_aPassBits, + m_iMaxComp, m_aCollision; + uint16 m_aLig, m_numPseudo, m_nClass, m_nLinear, + m_gEndLine; + gr_faceinfo m_silfinfo; + + void releaseBuffers() throw(); +}; + +} // namespace graphite2 diff --git a/thirdparty/graphite/src/inc/Slot.h b/thirdparty/graphite/src/inc/Slot.h new file mode 100644 index 0000000000..df39d9a3bb --- /dev/null +++ b/thirdparty/graphite/src/inc/Slot.h @@ -0,0 +1,170 @@ +/* GRAPHITE2 LICENSING + + Copyright 2010, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ +#pragma once + +#include "graphite2/Types.h" +#include "graphite2/Segment.h" +#include "inc/Main.h" +#include "inc/Font.h" +#include "inc/Position.h" + +namespace graphite2 { + +typedef gr_attrCode attrCode; + +class GlyphFace; +class Segment; + +struct SlotJustify +{ + static const int NUMJUSTPARAMS = 5; + + SlotJustify(const SlotJustify &); + SlotJustify & operator = (const SlotJustify &); + +public: + static size_t size_of(size_t levels) { return sizeof(SlotJustify) + ((levels > 1 ? levels : 1)*NUMJUSTPARAMS - 1)*sizeof(int16); } + + void LoadSlot(const Slot *s, const Segment *seg); + + SlotJustify *next; + int16 values[1]; +}; + +class Slot +{ + enum Flag + { + DELETED = 1, + INSERTED = 2, + COPIED = 4, + POSITIONED = 8, + ATTACHED = 16 + }; + +public: + struct iterator; + + unsigned short gid() const { return m_glyphid; } + Position origin() const { return m_position; } + float advance() const { return m_advance.x; } + void advance(Position &val) { m_advance = val; } + Position advancePos() const { return m_advance; } + int before() const { return m_before; } + int after() const { return m_after; } + uint32 index() const { return m_index; } + void index(uint32 val) { m_index = val; } + + Slot(int16 *m_userAttr = NULL); + void set(const Slot & slot, int charOffset, size_t numUserAttr, size_t justLevels, size_t numChars); + Slot *next() const { return m_next; } + void next(Slot *s) { m_next = s; } + Slot *prev() const { return m_prev; } + void prev(Slot *s) { m_prev = s; } + uint16 glyph() const { return m_realglyphid ? m_realglyphid : m_glyphid; } + void setGlyph(Segment *seg, uint16 glyphid, const GlyphFace * theGlyph = NULL); + void setRealGid(uint16 realGid) { m_realglyphid = realGid; } + void adjKern(const Position &pos) { m_shift = m_shift + pos; m_advance = m_advance + pos; } + void origin(const Position &pos) { m_position = pos + m_shift; } + void originate(int ind) { m_original = ind; } + int original() const { return m_original; } + void before(int ind) { m_before = ind; } + void after(int ind) { m_after = ind; } + bool isBase() const { return (!m_parent); } + void update(int numSlots, int numCharInfo, Position &relpos); + Position finalise(const Segment* seg, const Font* font, Position & base, Rect & bbox, uint8 attrLevel, float & clusterMin, bool rtl, bool isFinal, int depth = 0); + bool isDeleted() const { return (m_flags & DELETED) ? true : false; } + void markDeleted(bool state) { if (state) m_flags |= DELETED; else m_flags &= ~DELETED; } + bool isCopied() const { return (m_flags & COPIED) ? true : false; } + void markCopied(bool state) { if (state) m_flags |= COPIED; else m_flags &= ~COPIED; } + bool isPositioned() const { return (m_flags & POSITIONED) ? true : false; } + void markPositioned(bool state) { if (state) m_flags |= POSITIONED; else m_flags &= ~POSITIONED; } + bool isInsertBefore() const { return !(m_flags & INSERTED); } + uint8 getBidiLevel() const { return m_bidiLevel; } + void setBidiLevel(uint8 level) { m_bidiLevel = level; } + int8 getBidiClass(const Segment *seg); + int8 getBidiClass() const { return m_bidiCls; } + void setBidiClass(int8 cls) { m_bidiCls = cls; } + int16 *userAttrs() const { return m_userAttr; } + void userAttrs(int16 *p) { m_userAttr = p; } + void markInsertBefore(bool state) { if (!state) m_flags |= INSERTED; else m_flags &= ~INSERTED; } + void setAttr(Segment* seg, attrCode ind, uint8 subindex, int16 val, const SlotMap & map); + int getAttr(const Segment *seg, attrCode ind, uint8 subindex) const; + int getJustify(const Segment *seg, uint8 level, uint8 subindex) const; + void setJustify(Segment *seg, uint8 level, uint8 subindex, int16 value); + bool isLocalJustify() const { return m_justs != NULL; }; + void attachTo(Slot *ap) { m_parent = ap; } + Slot *attachedTo() const { return m_parent; } + Position attachOffset() const { return m_attach - m_with; } + Slot* firstChild() const { return m_child; } + void firstChild(Slot *ap) { m_child = ap; } + bool child(Slot *ap); + Slot* nextSibling() const { return m_sibling; } + void nextSibling(Slot *ap) { m_sibling = ap; } + bool sibling(Slot *ap); + bool removeChild(Slot *ap); + int32 clusterMetric(const Segment* seg, uint8 metric, uint8 attrLevel, bool rtl); + void positionShift(Position a) { m_position += a; } + void floodShift(Position adj, int depth = 0); + float just() const { return m_just; } + void just(float j) { m_just = j; } + Slot *nextInCluster(const Slot *s) const; + bool isChildOf(const Slot *base) const; + + CLASS_NEW_DELETE + +private: + Slot *m_next; // linked list of slots + Slot *m_prev; + unsigned short m_glyphid; // glyph id + uint16 m_realglyphid; + uint32 m_original; // charinfo that originated this slot (e.g. for feature values) + uint32 m_before; // charinfo index of before association + uint32 m_after; // charinfo index of after association + uint32 m_index; // slot index given to this slot during finalising + Slot *m_parent; // index to parent we are attached to + Slot *m_child; // index to first child slot that attaches to us + Slot *m_sibling; // index to next child that attaches to our parent + Position m_position; // absolute position of glyph + Position m_shift; // .shift slot attribute + Position m_advance; // .advance slot attribute + Position m_attach; // attachment point on us + Position m_with; // attachment point position on parent + float m_just; // Justification inserted space + uint8 m_flags; // holds bit flags + byte m_attLevel; // attachment level + int8 m_bidiCls; // bidirectional class + byte m_bidiLevel; // bidirectional level + int16 *m_userAttr; // pointer to user attributes + SlotJustify *m_justs; // pointer to justification parameters + + friend class Segment; +}; + +} // namespace graphite2 + +struct gr_slot : public graphite2::Slot {}; diff --git a/thirdparty/graphite/src/inc/Sparse.h b/thirdparty/graphite/src/inc/Sparse.h new file mode 100644 index 0000000000..fcda890171 --- /dev/null +++ b/thirdparty/graphite/src/inc/Sparse.h @@ -0,0 +1,168 @@ +/* GRAPHITE2 LICENSING + + Copyright 2011, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ +#pragma once +#include <iterator> +#include <utility> + +#include "inc/Main.h" + +namespace graphite2 { + + +// A read-only packed fast sparse array of uint16 with uint16 keys. +// Like most container classes this has capacity and size properties and these +// refer to the number of stored entries and the number of addressable entries +// as normal. However due the sparse nature the capacity is always <= than the +// size. +class sparse +{ +public: + typedef uint16 key_type; + typedef uint16 mapped_type; + typedef std::pair<const key_type, mapped_type> value_type; + +private: + typedef unsigned long mask_t; + + static const unsigned char SIZEOF_CHUNK = (sizeof(mask_t) - sizeof(key_type))*8; + + struct chunk + { + mask_t mask:SIZEOF_CHUNK; + key_type offset; + }; + + static const chunk empty_chunk; + sparse(const sparse &); + sparse & operator = (const sparse &); + +public: + template<typename I> + sparse(I first, const I last); + sparse() throw(); + ~sparse() throw(); + + operator bool () const throw(); + mapped_type operator [] (const key_type k) const throw(); + + size_t capacity() const throw(); + size_t size() const throw(); + + size_t _sizeof() const throw(); + + CLASS_NEW_DELETE; + +private: + union { + chunk * map; + mapped_type * values; + } m_array; + key_type m_nchunks; +}; + + +inline +sparse::sparse() throw() : m_nchunks(0) +{ + m_array.map = const_cast<graphite2::sparse::chunk *>(&empty_chunk); +} + + +template <typename I> +sparse::sparse(I attr, const I last) +: m_nchunks(0) +{ + m_array.map = 0; + + // Find the maximum extent of the key space. + size_t n_values=0; + long lastkey = -1; + for (I i = attr; i != last; ++i, ++n_values) + { + const typename std::iterator_traits<I>::value_type v = *i; + if (v.second == 0) { --n_values; continue; } + if (v.first <= lastkey) { m_nchunks = 0; return; } + + lastkey = v.first; + const key_type k = v.first / SIZEOF_CHUNK; + if (k >= m_nchunks) m_nchunks = k+1; + } + if (m_nchunks == 0) + { + m_array.map=const_cast<graphite2::sparse::chunk *>(&empty_chunk); + return; + } + + m_array.values = grzeroalloc<mapped_type>((m_nchunks*sizeof(chunk) + sizeof(mapped_type)-1) + / sizeof(mapped_type) + + n_values); + + if (m_array.values == 0) + return; + + // coverity[forward_null : FALSE] Since m_array is union and m_array.values is not NULL + chunk * ci = m_array.map; + ci->offset = (m_nchunks*sizeof(chunk) + sizeof(mapped_type)-1)/sizeof(mapped_type); + mapped_type * vi = m_array.values + ci->offset; + for (; attr != last; ++attr, ++vi) + { + const typename std::iterator_traits<I>::value_type v = *attr; + if (v.second == 0) { --vi; continue; } + + chunk * const ci_ = m_array.map + v.first/SIZEOF_CHUNK; + + if (ci != ci_) + { + ci = ci_; + ci->offset = key_type(vi - m_array.values); + } + + ci->mask |= 1UL << (SIZEOF_CHUNK - 1 - (v.first % SIZEOF_CHUNK)); + *vi = v.second; + } +} + + +inline +sparse::operator bool () const throw() +{ + return m_array.map != 0; +} + +inline +size_t sparse::size() const throw() +{ + return m_nchunks*SIZEOF_CHUNK; +} + +inline +size_t sparse::_sizeof() const throw() +{ + return sizeof(sparse) + capacity()*sizeof(mapped_type) + m_nchunks*sizeof(chunk); +} + +} // namespace graphite2 diff --git a/thirdparty/graphite/src/inc/TtfTypes.h b/thirdparty/graphite/src/inc/TtfTypes.h new file mode 100644 index 0000000000..ae67915304 --- /dev/null +++ b/thirdparty/graphite/src/inc/TtfTypes.h @@ -0,0 +1,419 @@ +/* GRAPHITE2 LICENSING + + Copyright 2010, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ +#pragma once +/*--------------------------------------------------------------------*//*:Ignore this sentence. + +File: TtfTypes.h +Responsibility: Tim Eves +Last reviewed: Not yet. + +Description: +Provides types required to represent the TTF basic types. +-------------------------------------------------------------------------------*//*:End Ignore*/ + + +//********************************************************************************************** +// Include files +//********************************************************************************************** +namespace graphite2 +{ +namespace TtfUtil +{ +//********************************************************************************************** +// Forward declarations +//********************************************************************************************** + + +//********************************************************************************************** +// Type declarations +//********************************************************************************************** +typedef unsigned char uint8; +typedef uint8 byte; +typedef signed char int8; +typedef unsigned short uint16; +typedef short int16; +typedef unsigned int uint32; +typedef int int32; + +typedef int16 short_frac; +typedef int32 fixed; +typedef int16 fword; +typedef uint16 ufword; +typedef int16 f2dot14; +typedef uint32 long_date_time[2]; + +//********************************************************************************************** +// Constants and enum types +//**********************************************************************************************/ +enum +{ + OneFix = 1<<16 +}; + +//********************************************************************************************** +// Table declarations +//********************************************************************************************** +namespace Sfnt +{ +#pragma pack(push,1) // We need this or the structure members aren't aligned + // correctly. Fortunately this form of pragma is supposed + // to be recognised by VS C++ too (at least according to + // MSDN). + + struct OffsetSubTable + { + uint32 scaler_type; + uint16 num_tables, + search_range, + entry_selector, + range_shift; + struct Entry + { + uint32 tag, + checksum, + offset, + length; + } table_directory[1]; + + enum ScalerType + { + TrueTypeMac = 0x74727565U, + TrueTypeWin = 0x00010000U, + Type1 = 0x74797031U + }; + }; + + + + + struct CharacterCodeMap + { + uint16 version, + num_subtables; + struct + { + uint16 platform_id, + platform_specific_id; + uint32 offset; + } encoding[1]; + }; + + struct CmapSubTable + { + uint16 format, + length, + language; + }; + + struct CmapSubTableFormat4 : CmapSubTable + { + uint16 seg_count_x2, + search_range, + entry_selector, + range_shift, + end_code[1]; + // There are arrarys after this which need their + // start positions calculated since end_code is + // seg_count uint16s long. + }; + + struct CmapSubTableFormat12 + { + fixed format; + uint32 length, + language, + num_groups; + struct + { + uint32 start_char_code, + end_char_code, + start_glyph_id; + } group[1]; + }; + + + + struct FontHeader + { + fixed version, + font_revision; + uint32 check_sum_adjustment, + magic_number; + uint16 flags, + units_per_em; + long_date_time created, + modified; + fword x_min, + y_min, + x_max, + y_max; + uint16 mac_style, + lowest_rec_ppem; + int16 font_direction_hint, + index_to_loc_format, + glyph_data_format; + enum + { + MagicNumber = 0x5F0F3CF5, + GlypDataFormat = 0 + }; + enum {ShortIndexLocFormat, LongIndexLocFormat}; + }; + + + + + struct PostScriptGlyphName + { + fixed format, + italic_angle; + fword underline_position, + underline_thickness; + uint32 is_fixed_pitch, + min_mem_type42, + max_mem_type42, + min_mem_type1, + max_mem_type1; + enum + { + Format1 = 0x10000, + Format2 = 0x20000, + Format25 = 0x28000, + Format3 = 0x30000, + Format4 = 0x40000 + }; + }; + + struct PostScriptGlyphName2 : PostScriptGlyphName + { + uint16 number_of_glyphs, + glyph_name_index[1]; + }; + + struct PostScriptGlyphName25 : PostScriptGlyphName + { + uint16 number_of_glyphs; + int8 offset[1]; + }; + + struct PostScriptGlyphName3 : PostScriptGlyphName {}; + + struct PostScriptGlyphName4 : PostScriptGlyphName + { + uint16 glyph_to_char_map[1]; + }; + + + struct HorizontalHeader + { + fixed version; + fword ascent, + descent, + line_gap; + ufword advance_width_max; + fword min_left_side_bearing, + max_left_side_bearing, + x_max_element; + int16 caret_slope_rise, + caret_slope_run; + fword caret_offset; + int16 reserved[4], + metric_data_format; + uint16 num_long_hor_metrics; + }; + + struct MaximumProfile + { + fixed version; + uint16 num_glyphs, + max_points, + max_contours, + max_component_points, + max_component_contours, + max_zones, + max_twilight_points, + max_storage, + max_function_defs, + max_instruction_defs, + max_stack_elements, + max_size_of_instructions, + max_component_elements, + max_component_depth; + }; + + + typedef byte Panose[10]; + + struct Compatibility0 + { + uint16 version; + int16 x_avg_char_width; + uint16 weight_class, + width_class; + int16 fs_type, + y_subscript_x_size, + y_subscript_y_size, + y_subscript_x_offset, + y_subscript_y_offset, + y_superscript_x_size, + y_superscript_y_size, + y_superscript_x_offset, + y_superscript_y_offset, + y_strikeout_size, + y_strikeout_position, + family_class; + Panose panose; + uint32 unicode_range[4]; + int8 ach_vend_id[4]; + uint16 fs_selection, + fs_first_char_index, + fs_last_char_index, // Acording to Apple's spec this is where v0 should end + typo_ascender, + typo_descender, + type_linegap, + win_ascent, + win_descent; + + enum + { + Italic =0x01, + Underscore=0x02, + Negative =0x04, + Outlined =0x08, + StrikeOut =0x10, + Bold =0x20 + }; + }; + + struct Compatibility1 : Compatibility0 + { + uint32 codepage_range[2]; + }; + + struct Compatibility2 : Compatibility1 + { + int16 x_height, + cap_height; + uint16 default_char, + break_char, + max_context; + }; + + struct Compatibility3 : Compatibility2 {}; + + typedef Compatibility3 Compatibility; + + + struct NameRecord + { + uint16 platform_id, + platform_specific_id, + language_id, + name_id, + length, + offset; + enum {Unicode, Mactintosh, Reserved, Microsoft}; + enum + { + Copyright, Family, Subfamily, UniqueSubfamily, + Fullname, Version, PostScript + }; + }; + + struct LangTagRecord + { + uint16 length, + offset; + }; + + struct FontNames + { + uint16 format, + count, + string_offset; + NameRecord name_record[1]; + }; + + + struct HorizontalMetric + { + uint16 advance_width; + int16 left_side_bearing; + }; + + + struct Glyph + { + int16 number_of_contours; + fword x_min, + y_min, + x_max, + y_max; + }; + + struct SimpleGlyph : Glyph + { + uint16 end_pts_of_contours[1]; + enum + { + OnCurve = 0x01, + XShort = 0x02, + YShort = 0x04, + Repeat = 0x08, + XIsSame = 0x10, + XIsPos = 0x10, + YIsSame = 0x20, + YIsPos = 0x20 + }; + }; + + struct CompoundGlyph : Glyph + { + uint16 flags, + glyph_index; + enum + { + Arg1Arg2Words = 0x01, + ArgsAreXYValues = 0x02, + RoundXYToGrid = 0x04, + HaveScale = 0x08, + MoreComponents = 0x20, + HaveXAndYScale = 0x40, + HaveTwoByTwo = 0x80, + HaveInstructions = 0x100, + UseMyMetrics = 0x200, + OverlapCompund = 0x400, + ScaledOffset = 0x800, + UnscaledOffset = 0x1000 + }; + }; + +#pragma pack(pop) +} // end of namespace Sfnt + +} // end of namespace TtfUtil +} // end of namespace graphite2 diff --git a/thirdparty/graphite/src/inc/TtfUtil.h b/thirdparty/graphite/src/inc/TtfUtil.h new file mode 100644 index 0000000000..3952bc06fb --- /dev/null +++ b/thirdparty/graphite/src/inc/TtfUtil.h @@ -0,0 +1,208 @@ +/* GRAPHITE2 LICENSING + + Copyright 2010, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ +#pragma once +/*--------------------------------------------------------------------*//*:Ignore this sentence. + +File: TtfUtil.h +Responsibility: Alan Ward +Last reviewed: Not yet. + +Description: + Utility class for handling TrueType font files. +----------------------------------------------------------------------------------------------*/ + + +#include <cstddef> + +namespace graphite2 +{ +namespace TtfUtil +{ + +#define OVERFLOW_OFFSET_CHECK(p, o) (o + reinterpret_cast<size_t>(p) < reinterpret_cast<size_t>(p)) + +typedef long fontTableId32; +typedef unsigned short gid16; + +#define TTF_TAG(a,b,c,d) ((a << 24UL) + (b << 16UL) + (c << 8UL) + (d)) + +// Enumeration used to specify a table in a TTF file +class Tag +{ + unsigned int _v; +public: + Tag(const char n[5]) throw() : _v(TTF_TAG(n[0],n[1],n[2],n[3])) {} + Tag(const unsigned int tag) throw() : _v(tag) {} + + operator unsigned int () const throw () { return _v; } + + enum + { + Feat = TTF_TAG('F','e','a','t'), + Glat = TTF_TAG('G','l','a','t'), + Gloc = TTF_TAG('G','l','o','c'), + Sile = TTF_TAG('S','i','l','e'), + Silf = TTF_TAG('S','i','l','f'), + Sill = TTF_TAG('S','i','l','l'), + cmap = TTF_TAG('c','m','a','p'), + cvt = TTF_TAG('c','v','t',' '), + cryp = TTF_TAG('c','r','y','p'), + head = TTF_TAG('h','e','a','d'), + fpgm = TTF_TAG('f','p','g','m'), + gdir = TTF_TAG('g','d','i','r'), + glyf = TTF_TAG('g','l','y','f'), + hdmx = TTF_TAG('h','d','m','x'), + hhea = TTF_TAG('h','h','e','a'), + hmtx = TTF_TAG('h','m','t','x'), + loca = TTF_TAG('l','o','c','a'), + kern = TTF_TAG('k','e','r','n'), + LTSH = TTF_TAG('L','T','S','H'), + maxp = TTF_TAG('m','a','x','p'), + name = TTF_TAG('n','a','m','e'), + OS_2 = TTF_TAG('O','S','/','2'), + post = TTF_TAG('p','o','s','t'), + prep = TTF_TAG('p','r','e','p') + }; +}; + +/*---------------------------------------------------------------------------------------------- + Class providing utility methods to parse a TrueType font file (TTF). + Callling application handles all file input and memory allocation. + Assumes minimal knowledge of TTF file format. +----------------------------------------------------------------------------------------------*/ + ////////////////////////////////// tools to find & check TTF tables + bool GetHeaderInfo(size_t & lOffset, size_t & lSize); + bool CheckHeader(const void * pHdr); + bool GetTableDirInfo(const void * pHdr, size_t & lOffset, size_t & lSize); + bool GetTableInfo(const Tag TableTag, const void * pHdr, const void * pTableDir, + size_t & lOffset, size_t & lSize); + bool CheckTable(const Tag TableId, const void * pTable, size_t lTableSize); + + ////////////////////////////////// simple font wide info + size_t GlyphCount(const void * pMaxp); +#ifdef ALL_TTFUTILS + size_t MaxCompositeComponentCount(const void * pMaxp); + size_t MaxCompositeLevelCount(const void * pMaxp); + size_t LocaGlyphCount(size_t lLocaSize, const void * pHead); // throw (std::domain_error); +#endif + int DesignUnits(const void * pHead); +#ifdef ALL_TTFUTILS + int HeadTableCheckSum(const void * pHead); + void HeadTableCreateTime(const void * pHead, unsigned int * pnDateBC, unsigned int * pnDateAD); + void HeadTableModifyTime(const void * pHead, unsigned int * pnDateBC, unsigned int * pnDateAD); + bool IsItalic(const void * pHead); + int FontAscent(const void * pOs2); + int FontDescent(const void * pOs2); + bool FontOs2Style(const void *pOs2, bool & fBold, bool & fItalic); + bool Get31EngFamilyInfo(const void * pName, size_t & lOffset, size_t & lSize); + bool Get31EngFullFontInfo(const void * pName, size_t & lOffset, size_t & lSize); + bool Get30EngFamilyInfo(const void * pName, size_t & lOffset, size_t & lSize); + bool Get30EngFullFontInfo(const void * pName, size_t & lOffset, size_t & lSize); + int PostLookup(const void * pPost, size_t lPostSize, const void * pMaxp, + const char * pPostName); +#endif + + ////////////////////////////////// utility methods helpful for name table + bool GetNameInfo(const void * pName, int nPlatformId, int nEncodingId, + int nLangId, int nNameId, size_t & lOffset, size_t & lSize); + //size_t NameTableLength(const byte * pTable); +#ifdef ALL_TTFUTILS + int GetLangsForNames(const void * pName, int nPlatformId, int nEncodingId, + int *nameIdList, int cNameIds, short *langIdList); + void SwapWString(void * pWStr, size_t nSize = 0); // throw (std::invalid_argument); +#endif + + ////////////////////////////////// cmap lookup tools + const void * FindCmapSubtable(const void * pCmap, int nPlatformId = 3, + int nEncodingId = 1, size_t length = 0); + bool CheckCmapSubtable4(const void * pCmap31, const void * pCmapEnd /*, unsigned int maxgid*/); + gid16 CmapSubtable4Lookup(const void * pCmapSubtabel4, unsigned int nUnicodeId, int rangeKey = 0); + unsigned int CmapSubtable4NextCodepoint(const void *pCmap31, unsigned int nUnicodeId, + int * pRangeKey = 0); + bool CheckCmapSubtable12(const void *pCmap310, const void * pCmapEnd /*, unsigned int maxgid*/); + gid16 CmapSubtable12Lookup(const void * pCmap310, unsigned int uUnicodeId, int rangeKey = 0); + unsigned int CmapSubtable12NextCodepoint(const void *pCmap310, unsigned int nUnicodeId, + int * pRangeKey = 0); + + ///////////////////////////////// horizontal metric data for a glyph + bool HorMetrics(gid16 nGlyphId, const void * pHmtx, size_t lHmtxSize, + const void * pHhea, int & nLsb, unsigned int & nAdvWid); + + ////////////////////////////////// primitives for loca and glyf lookup + size_t LocaLookup(gid16 nGlyphId, const void * pLoca, size_t lLocaSize, + const void * pHead); // throw (std::out_of_range); + void * GlyfLookup(const void * pGlyf, size_t lGlyfOffset, size_t lTableLen); + + ////////////////////////////////// primitves for simple glyph data + bool GlyfBox(const void * pSimpleGlyf, int & xMin, int & yMin, + int & xMax, int & yMax); + +#ifdef ALL_TTFUTILS + int GlyfContourCount(const void * pSimpleGlyf); + bool GlyfContourEndPoints(const void * pSimpleGlyf, int * prgnContourEndPoint, + int cnPointsTotal, size_t & cnPoints); + bool GlyfPoints(const void * pSimpleGlyf, int * prgnX, int * prgnY, + char * prgbFlag, int cnPointsTotal, int & cnPoints); + + // primitive to find the glyph ids in a composite glyph + bool GetComponentGlyphIds(const void * pSimpleGlyf, int * prgnCompId, + size_t cnCompIdTotal, size_t & cnCompId); + // primitive to find the placement data for a component in a composite glyph + bool GetComponentPlacement(const void * pSimpleGlyf, int nCompId, + bool fOffset, int & a, int & b); + // primitive to find the transform data for a component in a composite glyph + bool GetComponentTransform(const void * pSimpleGlyf, int nCompId, + float & flt11, float & flt12, float & flt21, float & flt22, bool & fTransOffset); +#endif + + ////////////////////////////////// operate on composite or simple glyph (auto glyf lookup) + void * GlyfLookup(gid16 nGlyphId, const void * pGlyf, const void * pLoca, + size_t lGlyfSize, size_t lLocaSize, const void * pHead); // primitive used by below methods + +#ifdef ALL_TTFUTILS + // below are primary user methods for handling glyf data + bool IsSpace(gid16 nGlyphId, const void * pLoca, size_t lLocaSize, const void * pHead); + bool IsDeepComposite(gid16 nGlyphId, const void * pGlyf, const void * pLoca, + size_t lGlyfSize, size_t lLocaSize, const void * pHead); + + bool GlyfBox(gid16 nGlyphId, const void * pGlyf, const void * pLoca, size_t lGlyfSize, size_t lLocaSize, + const void * pHead, int & xMin, int & yMin, int & xMax, int & yMax); + bool GlyfContourCount(gid16 nGlyphId, const void * pGlyf, const void * pLoca, + size_t lGlyfSize, size_t lLocaSize, const void *pHead, size_t & cnContours); + bool GlyfContourEndPoints(gid16 nGlyphId, const void * pGlyf, const void * pLoca, + size_t lGlyfSize, size_t lLocaSize, const void * pHead, int * prgnContourEndPoint, size_t cnPoints); + bool GlyfPoints(gid16 nGlyphId, const void * pGlyf, const void * pLoca, + size_t lGlyfSize, size_t lLocaSize, const void * pHead, const int * prgnContourEndPoint, size_t cnEndPoints, + int * prgnX, int * prgnY, bool * prgfOnCurve, size_t cnPoints); + + // utitily method used by high-level GlyfPoints + bool SimplifyFlags(char * prgbFlags, int cnPoints); + bool CalcAbsolutePoints(int * prgnX, int * prgnY, int cnPoints); +#endif + +} // end of namespace TtfUtil +} // end of namespace graphite2 diff --git a/thirdparty/graphite/src/inc/UtfCodec.h b/thirdparty/graphite/src/inc/UtfCodec.h new file mode 100644 index 0000000000..24a343d8d9 --- /dev/null +++ b/thirdparty/graphite/src/inc/UtfCodec.h @@ -0,0 +1,251 @@ +/* GRAPHITE2 LICENSING + + Copyright 2011, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ +#pragma once + +#include <cstdlib> +#include "inc/Main.h" + +namespace graphite2 { + +typedef uint32 uchar_t; + +template <int N> +struct _utf_codec +{ + typedef uchar_t codeunit_t; + + static void put(codeunit_t * cp, const uchar_t , int8 & len) throw(); + static uchar_t get(const codeunit_t * cp, int8 & len) throw(); + static bool validate(const codeunit_t * s, const codeunit_t * const e) throw(); +}; + + +template <> +struct _utf_codec<32> +{ +private: + static const uchar_t limit = 0x110000; +public: + typedef uint32 codeunit_t; + + inline + static void put(codeunit_t * cp, const uchar_t usv, int8 & l) throw() + { + *cp = usv; l = 1; + } + + inline + static uchar_t get(const codeunit_t * cp, int8 & l) throw() + { + if (cp[0] < limit) { l = 1; return cp[0]; } + else { l = -1; return 0xFFFD; } + } + + inline + static bool validate(const codeunit_t * s, const codeunit_t * const e) throw() + { + return s <= e; + } +}; + + +template <> +struct _utf_codec<16> +{ +private: + static const int32 lead_offset = 0xD800 - (0x10000 >> 10); + static const int32 surrogate_offset = 0x10000 - (0xD800 << 10) - 0xDC00; +public: + typedef uint16 codeunit_t; + + inline + static void put(codeunit_t * cp, const uchar_t usv, int8 & l) throw() + { + if (usv < 0x10000) { l = 1; cp[0] = codeunit_t(usv); } + else + { + cp[0] = codeunit_t(lead_offset + (usv >> 10)); + cp[1] = codeunit_t(0xDC00 + (usv & 0x3FF)); + l = 2; + } + } + + inline + static uchar_t get(const codeunit_t * cp, int8 & l) throw() + { + const uint32 uh = cp[0]; + l = 1; + + if (uh < 0xD800|| uh > 0xDFFF) { return uh; } + if (uh > 0xDBFF) { l = -1; return 0xFFFD; } + const uint32 ul = cp[1]; + if (ul < 0xDC00 || ul > 0xDFFF) { l = -1; return 0xFFFD; } + ++l; + return (uh<<10) + ul + surrogate_offset; + } + + inline + static bool validate(const codeunit_t * s, const codeunit_t * const e) throw() + { + const ptrdiff_t n = e-s; + if (n <= 0) return n == 0; + const uint32 u = *(e-1); // Get the last codepoint + return (u < 0xD800 || u > 0xDBFF); + } +}; + + +template <> +struct _utf_codec<8> +{ +private: + static const int8 sz_lut[16]; + static const byte mask_lut[5]; + static const uchar_t limit = 0x110000; + +public: + typedef uint8 codeunit_t; + + inline + static void put(codeunit_t * cp, const uchar_t usv, int8 & l) throw() + { + if (usv < 0x80) {l = 1; cp[0] = usv; return; } + if (usv < 0x0800) {l = 2; cp[0] = 0xC0 + (usv >> 6); cp[1] = 0x80 + (usv & 0x3F); return; } + if (usv < 0x10000) {l = 3; cp[0] = 0xE0 + (usv >> 12); cp[1] = 0x80 + ((usv >> 6) & 0x3F); cp[2] = 0x80 + (usv & 0x3F); return; } + else {l = 4; cp[0] = 0xF0 + (usv >> 18); cp[1] = 0x80 + ((usv >> 12) & 0x3F); cp[2] = 0x80 + ((usv >> 6) & 0x3F); cp[3] = 0x80 + (usv & 0x3F); return; } + } + + inline + static uchar_t get(const codeunit_t * cp, int8 & l) throw() + { + const int8 seq_sz = sz_lut[*cp >> 4]; + uchar_t u = *cp & mask_lut[seq_sz]; + l = 1; + bool toolong = false; + + switch(seq_sz) { + case 4: u <<= 6; u |= *++cp & 0x3F; if (*cp >> 6 != 2) break; ++l; toolong = (u < 0x10); GR_FALLTHROUGH; + // no break + case 3: u <<= 6; u |= *++cp & 0x3F; if (*cp >> 6 != 2) break; ++l; toolong |= (u < 0x20); GR_FALLTHROUGH; + // no break + case 2: u <<= 6; u |= *++cp & 0x3F; if (*cp >> 6 != 2) break; ++l; toolong |= (u < 0x80); GR_FALLTHROUGH; + // no break + case 1: break; + case 0: l = -1; return 0xFFFD; + } + + if (l != seq_sz || toolong || u >= limit) + { + l = -l; + return 0xFFFD; + } + return u; + } + + inline + static bool validate(const codeunit_t * s, const codeunit_t * const e) throw() + { + const ptrdiff_t n = e-s; + if (n <= 0) return n == 0; + s += (n-1); + if (*s < 0x80) return true; + if (*s >= 0xC0) return false; + if (n == 1) return true; + if (*--s < 0x80) return true; + if (*s >= 0xE0) return false; + if (n == 2 || *s >= 0xC0) return true; + if (*--s < 0x80) return true; + if (*s >= 0xF0) return false; + return true; + } + +}; + + +template <typename C> +class _utf_iterator +{ + typedef _utf_codec<sizeof(C)*8> codec; + + C * cp; + mutable int8 sl; + +public: + typedef C codeunit_type; + typedef uchar_t value_type; + typedef uchar_t * pointer; + + class reference + { + const _utf_iterator & _i; + + reference(const _utf_iterator & i): _i(i) {} + public: + operator value_type () const throw () { return codec::get(_i.cp, _i.sl); } + reference & operator = (const value_type usv) throw() { codec::put(_i.cp, usv, _i.sl); return *this; } + + friend class _utf_iterator; + }; + + + _utf_iterator(const void * us=0) : cp(reinterpret_cast<C *>(const_cast<void *>(us))), sl(1) { } + + _utf_iterator & operator ++ () { cp += abs(sl); return *this; } + _utf_iterator operator ++ (int) { _utf_iterator tmp(*this); operator++(); return tmp; } + + bool operator == (const _utf_iterator & rhs) const throw() { return cp >= rhs.cp; } + bool operator != (const _utf_iterator & rhs) const throw() { return !operator==(rhs); } + + reference operator * () const throw() { return *this; } + pointer operator ->() const throw() { return &operator *(); } + + operator codeunit_type * () const throw() { return cp; } + + bool error() const throw() { return sl < 1; } + bool validate(const _utf_iterator & e) { return codec::validate(cp, e.cp); } +}; + +template <typename C> +struct utf +{ + typedef typename _utf_codec<sizeof(C)*8>::codeunit_t codeunit_t; + + typedef _utf_iterator<C> iterator; + typedef _utf_iterator<const C> const_iterator; + + inline + static bool validate(codeunit_t * s, codeunit_t * e) throw() { + return _utf_codec<sizeof(C)*8>::validate(s,e); + } +}; + + +typedef utf<uint32> utf32; +typedef utf<uint16> utf16; +typedef utf<uint8> utf8; + +} // namespace graphite2 diff --git a/thirdparty/graphite/src/inc/bits.h b/thirdparty/graphite/src/inc/bits.h new file mode 100644 index 0000000000..9365986a10 --- /dev/null +++ b/thirdparty/graphite/src/inc/bits.h @@ -0,0 +1,150 @@ +/* GRAPHITE2 LICENSING + + Copyright 2012, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ +#pragma once + +namespace graphite2 +{ + + +#if defined GRAPHITE2_BUILTINS && (defined __GNUC__ || defined __clang__) + +template<typename T> +inline unsigned int bit_set_count(T v) +{ + return __builtin_popcount(v); +} + +template<> +inline unsigned int bit_set_count(int16 v) +{ + return __builtin_popcount(static_cast<uint16>(v)); +} + +template<> +inline unsigned int bit_set_count(int8 v) +{ + return __builtin_popcount(static_cast<uint8>(v)); +} + +template<> +inline unsigned int bit_set_count(unsigned long v) +{ + return __builtin_popcountl(v); +} + +template<> +inline unsigned int bit_set_count(signed long v) +{ + return __builtin_popcountl(v); +} + +template<> +inline unsigned int bit_set_count(unsigned long long v) +{ + return __builtin_popcountll(v); +} + +template<> +inline unsigned int bit_set_count(signed long long v) +{ + return __builtin_popcountll(v); +} + +#else + +template<typename T> +inline unsigned int bit_set_count(T v) +{ + static size_t const ONES = ~0; + + v = v - ((v >> 1) & T(ONES/3)); // temp + v = (v & T(ONES/15*3)) + ((v >> 2) & T(ONES/15*3)); // temp + v = (v + (v >> 4)) & T(ONES/255*15); // temp + return (T)(v * T(ONES/255)) >> (sizeof(T)-1)*8; // count +} + +#endif + +//TODO: Changed these to uintmax_t when we go to C++11 +template<int S> +inline size_t _mask_over_val(size_t v) +{ + v = _mask_over_val<S/2>(v); + v |= v >> S*4; + return v; +} + +//TODO: Changed these to uintmax_t when we go to C++11 +template<> +inline size_t _mask_over_val<1>(size_t v) +{ + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + return v; +} + +template<typename T> +inline T mask_over_val(T v) +{ + return T(_mask_over_val<sizeof(T)>(v)); +} + +template<typename T> +inline unsigned long next_highest_power2(T v) +{ + return _mask_over_val<sizeof(T)>(v-1)+1; +} + +template<typename T> +inline unsigned int log_binary(T v) +{ + return bit_set_count(mask_over_val(v))-1; +} + +template<typename T> +inline T has_zero(const T x) +{ + return (x - T(~T(0)/255)) & ~x & T(~T(0)/255*128); +} + +template<typename T> +inline T zero_bytes(const T x, unsigned char n) +{ + const T t = T(~T(0)/255*n); + return T((has_zero(x^t) >> 7)*n); +} + +#if 0 +inline float float_round(float x, uint32 m) +{ + *reinterpret_cast<unsigned int *>(&x) &= m; + return *reinterpret_cast<float *>(&x); +} +#endif + +} diff --git a/thirdparty/graphite/src/inc/debug.h b/thirdparty/graphite/src/inc/debug.h new file mode 100644 index 0000000000..97175eb2cc --- /dev/null +++ b/thirdparty/graphite/src/inc/debug.h @@ -0,0 +1,89 @@ +/* GRAPHITE2 LICENSING + + Copyright 2011, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ +// debug.h +// +// Created on: 22 Dec 2011 +// Author: tim + +#pragma once + +#if !defined GRAPHITE2_NTRACING + +#include <utility> +#include "inc/json.h" +#include "inc/Position.h" + +namespace graphite2 +{ + +class CharInfo; +class Segment; +class Slot; + +typedef std::pair<const Segment * const, const Slot * const> dslot; +struct objectid +{ + char name[16]; + objectid(const dslot &) throw(); + objectid(const Segment * const p) throw(); +}; + + +json & operator << (json & j, const Position &) throw(); +json & operator << (json & j, const Rect &) throw(); +json & operator << (json & j, const CharInfo &) throw(); +json & operator << (json & j, const dslot &) throw(); +json & operator << (json & j, const objectid &) throw(); +json & operator << (json & j, const telemetry &) throw(); + + + +inline +json & operator << (json & j, const Position & p) throw() +{ + return j << json::flat << json::array << p.x << p.y << json::close; +} + + +inline +json & operator << (json & j, const Rect & p) throw() +{ + return j << json::flat << json::array << p.bl.x << p.bl.y << p.tr.x << p.tr.y << json::close; +} + + +inline +json & operator << (json & j, const objectid & sid) throw() +{ + return j << sid.name; +} + + +} // namespace graphite2 + +#endif //!defined GRAPHITE2_NTRACING + diff --git a/thirdparty/graphite/src/inc/json.h b/thirdparty/graphite/src/inc/json.h new file mode 100644 index 0000000000..554cd9a3d1 --- /dev/null +++ b/thirdparty/graphite/src/inc/json.h @@ -0,0 +1,178 @@ +/* GRAPHITE2 LICENSING + + Copyright 2011, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ +// JSON pretty printer for graphite font debug output logging. +// Created on: 15 Dec 2011 +// Author: Tim Eves + +#pragma once + +#include "inc/Main.h" +#include <cassert> +#include <cstdio> +#include <cstdint> +#include "inc/List.h" + +namespace graphite2 { + +class json +{ + // Prevent copying + json(const json &); + json & operator = (const json &); + + typedef void (*_context_t)(json &); + + FILE * const _stream; + char _contexts[128], // context stack + * _context, // current context (top of stack) + * _flatten; // if !0 points to context above which + // pretty printed output should occur. + Vector<void *> _env; + + void context(const char current) throw(); + void indent(const int d=0) throw(); + void push_context(const char, const char) throw(); + void pop_context() throw(); + +public: + class closer; + + using string = const char *; + using number = double; + enum class integer : std::intmax_t {}; + enum class integer_u : std::uintmax_t {}; + using boolean = bool; + static const std::nullptr_t null; + + void setenv(unsigned int index, void *val) { _env.reserve(index + 1); if (index >= _env.size()) _env.insert(_env.end(), _env.size() - index + 1, 0); _env[index] = val; } + void *getenv(unsigned int index) const { return _env[index]; } + const Vector<void *> &getenvs() const { return _env; } + + static void flat(json &) throw(); + static void close(json &) throw(); + static void object(json &) throw(); + static void array(json &) throw(); + static void item(json &) throw(); + + json(FILE * stream) throw(); + ~json() throw (); + + FILE * stream() const throw(); + + json & operator << (string) throw(); + json & operator << (number) throw(); + json & operator << (integer) throw(); + json & operator << (integer_u) throw(); + json & operator << (boolean) throw(); + json & operator << (std::nullptr_t) throw(); + json & operator << (_context_t) throw(); + + operator bool() const throw(); + bool good() const throw(); + bool eof() const throw(); + + CLASS_NEW_DELETE; +}; + +class json::closer +{ + // Prevent copying. + closer(const closer &); + closer & operator = (const closer &); + + json * const _j; +public: + closer(json * const j) : _j(j) {} + ~closer() throw() { if (_j) *_j << close; } +}; + +inline +json::json(FILE * s) throw() +: _stream(s), _context(_contexts), _flatten(0) +{ + if (good()) + fflush(s); +} + + +inline +json::~json() throw () +{ + while (_context > _contexts) pop_context(); +} + +inline +FILE * json::stream() const throw() { return _stream; } + + +inline +json & json::operator << (json::_context_t ctxt) throw() +{ + ctxt(*this); + return *this; +} + +inline +json & operator << (json & j, signed char d) throw() { return j << json::integer(d); } + +inline +json & operator << (json & j, unsigned char d) throw() { return j << json::integer_u(d); } + +inline +json & operator << (json & j, short int d) throw() { return j << json::integer(d); } + +inline +json & operator << (json & j, unsigned short int d) throw() { return j << json::integer_u(d); } + +inline +json & operator << (json & j, int d) throw() { return j << json::integer(d); } + +inline +json & operator << (json & j, unsigned int d) throw() { return j << json::integer_u(d); } + +inline +json & operator << (json & j, long int d) throw() { return j << json::integer(d); } + +inline +json & operator << (json & j, unsigned long int d) throw() { return j << json::integer_u(d); } + +inline +json & operator << (json & j, long long int d) throw() { return j << json::integer(d); } + +inline +json & operator << (json & j, unsigned long long int d) throw() { return j << json::integer_u(d); } + +inline +json::operator bool() const throw() { return good(); } + +inline +bool json::good() const throw() { return _stream && ferror(_stream) == 0; } + +inline +bool json::eof() const throw() { return feof(_stream) != 0; } + +} // namespace graphite2 diff --git a/thirdparty/graphite/src/inc/locale2lcid.h b/thirdparty/graphite/src/inc/locale2lcid.h new file mode 100644 index 0000000000..25d5c0a3c8 --- /dev/null +++ b/thirdparty/graphite/src/inc/locale2lcid.h @@ -0,0 +1,450 @@ +/* GRAPHITE2 LICENSING + + Copyright 2010, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ +#pragma once +#include <cstring> +#include <cassert> + +#include "inc/Main.h" + + +namespace graphite2 { + +struct IsoLangEntry +{ + unsigned short mnLang; + char maLangStr[4]; + char maCountry[3]; +}; + +// Windows Language ID, Locale ISO-639 language, country code as used in +// naming table of OpenType fonts +const IsoLangEntry LANG_ENTRIES[] = { + { 0x0401, "ar","SA" }, // Arabic Saudi Arabia + { 0x0402, "bg","BG" }, // Bulgarian Bulgaria + { 0x0403, "ca","ES" }, // Catalan Catalan + { 0x0404, "zh","TW" }, // Chinese Taiwan + { 0x0405, "cs","CZ" }, // Czech Czech Republic + { 0x0406, "da","DK" }, // Danish Denmark + { 0x0407, "de","DE" }, // German Germany + { 0x0408, "el","GR" }, // Greek Greece + { 0x0409, "en","US" }, // English United States + { 0x040A, "es","ES" }, // Spanish (Traditional Sort) Spain + { 0x040B, "fi","FI" }, // Finnish Finland + { 0x040C, "fr","FR" }, // French France + { 0x040D, "he","IL" }, // Hebrew Israel + { 0x040E, "hu","HU" }, // Hungarian Hungary + { 0x040F, "is","IS" }, // Icelandic Iceland + { 0x0410, "it","IT" }, // Italian Italy + { 0x0411, "jp","JP" }, // Japanese Japan + { 0x0412, "ko","KR" }, // Korean Korea + { 0x0413, "nl","NL" }, // Dutch Netherlands + { 0x0414, "no","NO" }, // Norwegian (Bokmal) Norway + { 0x0415, "pl","PL" }, // Polish Poland + { 0x0416, "pt","BR" }, // Portuguese Brazil + { 0x0417, "rm","CH" }, // Romansh Switzerland + { 0x0418, "ro","RO" }, // Romanian Romania + { 0x0419, "ru","RU" }, // Russian Russia + { 0x041A, "hr","HR" }, // Croatian Croatia + { 0x041B, "sk","SK" }, // Slovak Slovakia + { 0x041C, "sq","AL" }, // Albanian Albania + { 0x041D, "sv","SE" }, // Swedish Sweden + { 0x041E, "th","TH" }, // Thai Thailand + { 0x041F, "tr","TR" }, // Turkish Turkey + { 0x0420, "ur","PK" }, // Urdu Islamic Republic of Pakistan + { 0x0421, "id","ID" }, // Indonesian Indonesia + { 0x0422, "uk","UA" }, // Ukrainian Ukraine + { 0x0423, "be","BY" }, // Belarusian Belarus + { 0x0424, "sl","SI" }, // Slovenian Slovenia + { 0x0425, "et","EE" }, // Estonian Estonia + { 0x0426, "lv","LV" }, // Latvian Latvia + { 0x0427, "lt","LT" }, // Lithuanian Lithuania + { 0x0428, "tg","TJ" }, // Tajik (Cyrillic) Tajikistan + { 0x042A, "vi","VN" }, // Vietnamese Vietnam + { 0x042B, "hy","AM" }, // Armenian Armenia + { 0x042C, "az","AZ" }, // Azeri (Latin) Azerbaijan + { 0x042D, "eu","" }, // Basque Basque + { 0x042E, "hsb","DE" }, // Upper Sorbian Germany + { 0x042F, "mk","MK" }, // Macedonian (FYROM) Former Yugoslav Republic of Macedonia + { 0x0432, "tn","ZA" }, // Setswana South Africa + { 0x0434, "xh","ZA" }, // isiXhosa South Africa + { 0x0435, "zu","ZA" }, // isiZulu South Africa + { 0x0436, "af","ZA" }, // Afrikaans South Africa + { 0x0437, "ka","GE" }, // Georgian Georgia + { 0x0438, "fo","FO" }, // Faroese Faroe Islands + { 0x0439, "hi","IN" }, // Hindi India + { 0x043A, "mt","MT" }, // Maltese Malta + { 0x043B, "se","NO" }, // Sami (Northern) Norway + { 0x043E, "ms","MY" }, // Malay Malaysia + { 0x043F, "kk","KZ" }, // Kazakh Kazakhstan + { 0x0440, "ky","KG" }, // Kyrgyz Kyrgyzstan + { 0x0441, "sw","KE" }, // Kiswahili Kenya + { 0x0442, "tk","TM" }, // Turkmen Turkmenistan + { 0x0443, "uz","UZ" }, // Uzbek (Latin) Uzbekistan + { 0x0444, "tt","RU" }, // Tatar Russia + { 0x0445, "bn","IN" }, // Bengali India + { 0x0446, "pa","IN" }, // Punjabi India + { 0x0447, "gu","IN" }, // Gujarati India + { 0x0448, "or","IN" }, // Oriya India + { 0x0448, "wo","SN" }, // Wolof Senegal + { 0x0449, "ta","IN" }, // Tamil India + { 0x044A, "te","IN" }, // Telugu India + { 0x044B, "kn","IN" }, // Kannada India + { 0x044C, "ml","IN" }, // Malayalam India + { 0x044D, "as","IN" }, // Assamese India + { 0x044E, "mr","IN" }, // Marathi India + { 0x044F, "sa","IN" }, // Sanskrit India + { 0x0450, "mn","MN" }, // Mongolian (Cyrillic) Mongolia + { 0x0451, "bo","CN" }, // Tibetan PRC + { 0x0452, "cy","GB" }, // Welsh United Kingdom + { 0x0453, "km","KH" }, // Khmer Cambodia + { 0x0454, "lo","LA" }, // Lao Lao P.D.R. + { 0x0455, "my","MM" }, // Burmese Myanmar - not listed in Microsoft docs anymore + { 0x0456, "gl","ES" }, // Galician Galician + { 0x0457, "kok","IN" }, // Konkani India + { 0x045A, "syr","TR" }, // Syriac Syria + { 0x045B, "si","LK" }, // Sinhala Sri Lanka + { 0x045D, "iu","CA" }, // Inuktitut Canada + { 0x045E, "am","ET" }, // Amharic Ethiopia + { 0x0461, "ne","NP" }, // Nepali Nepal + { 0x0462, "fy","NL" }, // Frisian Netherlands + { 0x0463, "ps","AF" }, // Pashto Afghanistan + { 0x0464, "fil","PH" }, // Filipino Philippines + { 0x0465, "dv","MV" }, // Divehi Maldives + { 0x0468, "ha","NG" }, // Hausa (Latin) Nigeria + { 0x046A, "yo","NG" }, // Yoruba Nigeria + { 0x046B, "qu","BO" }, // Quechua Bolivia + { 0x046C, "st","ZA" }, // Sesotho sa Leboa South Africa + { 0x046D, "ba","RU" }, // Bashkir Russia + { 0x046E, "lb","LU" }, // Luxembourgish Luxembourg + { 0x046F, "kl","GL" }, // Greenlandic Greenland + { 0x0470, "ig","NG" }, // Igbo Nigeria + { 0x0478, "ii","CN" }, // Yi PRC + { 0x047A, "arn","CL" }, // Mapudungun Chile + { 0x047C, "moh","CA" }, // Mohawk Mohawk + { 0x047E, "br","FR" }, // Breton France + { 0x0480, "ug","CN" }, // Uighur PRC + { 0x0481, "mi","NZ" }, // Maori New Zealand + { 0x0482, "oc","FR" }, // Occitan France + { 0x0483, "co","FR" }, // Corsican France + { 0x0484, "gsw","FR" }, // Alsatian France + { 0x0485, "sah","RU" }, // Yakut Russia + { 0x0486, "qut","GT" }, // K'iche Guatemala + { 0x0487, "rw","RW" }, // Kinyarwanda Rwanda + { 0x048C, "gbz","AF" }, // Dari Afghanistan + { 0x0801, "ar","IQ" }, // Arabic Iraq + { 0x0804, "zn","CH" }, // Chinese People's Republic of China + { 0x0807, "de","CH" }, // German Switzerland + { 0x0809, "en","GB" }, // English United Kingdom + { 0x080A, "es","MX" }, // Spanish Mexico + { 0x080C, "fr","BE" }, // French Belgium + { 0x0810, "it","CH" }, // Italian Switzerland + { 0x0813, "nl","BE" }, // Dutch Belgium + { 0x0814, "nn","NO" }, // Norwegian (Nynorsk) Norway + { 0x0816, "pt","PT" }, // Portuguese Portugal + { 0x081A, "sh","RS" }, // Serbian (Latin) Serbia + { 0x081D, "sv","FI" }, // Sweden Finland + { 0x082C, "az","AZ" }, // Azeri (Cyrillic) Azerbaijan + { 0x082E, "dsb","DE" }, // Lower Sorbian Germany + { 0x083B, "se","SE" }, // Sami (Northern) Sweden + { 0x083C, "ga","IE" }, // Irish Ireland + { 0x083E, "ms","BN" }, // Malay Brunei Darussalam + { 0x0843, "uz","UZ" }, // Uzbek (Cyrillic) Uzbekistan + { 0x0845, "bn","BD" }, // Bengali Bangladesh + { 0x0850, "mn","MN" }, // Mongolian (Traditional) People's Republic of China + { 0x085D, "iu","CA" }, // Inuktitut (Latin) Canada + { 0x085F, "ber","DZ" }, // Tamazight (Latin) Algeria + { 0x086B, "es","EC" }, // Quechua Ecuador + { 0x0C01, "ar","EG" }, // Arabic Egypt + { 0x0C04, "zh","HK" }, // Chinese Hong Kong S.A.R. + { 0x0C07, "de","AT" }, // German Austria + { 0x0C09, "en","AU" }, // English Australia + { 0x0C0A, "es","ES" }, // Spanish (Modern Sort) Spain + { 0x0C0C, "fr","CA" }, // French Canada + { 0x0C1A, "sr","CS" }, // Serbian (Cyrillic) Serbia + { 0x0C3B, "se","FI" }, // Sami (Northern) Finland + { 0x0C6B, "qu","PE" }, // Quechua Peru + { 0x1001, "ar","LY" }, // Arabic Libya + { 0x1004, "zh","SG" }, // Chinese Singapore + { 0x1007, "de","LU" }, // German Luxembourg + { 0x1009, "en","CA" }, // English Canada + { 0x100A, "es","GT" }, // Spanish Guatemala + { 0x100C, "fr","CH" }, // French Switzerland + { 0x101A, "hr","BA" }, // Croatian (Latin) Bosnia and Herzegovina + { 0x103B, "smj","NO" }, // Sami (Lule) Norway + { 0x1401, "ar","DZ" }, // Arabic Algeria + { 0x1404, "zh","MO" }, // Chinese Macao S.A.R. + { 0x1407, "de","LI" }, // German Liechtenstein + { 0x1409, "en","NZ" }, // English New Zealand + { 0x140A, "es","CR" }, // Spanish Costa Rica + { 0x140C, "fr","LU" }, // French Luxembourg + { 0x141A, "bs","BA" }, // Bosnian (Latin) Bosnia and Herzegovina + { 0x143B, "smj","SE" }, // Sami (Lule) Sweden + { 0x1801, "ar","MA" }, // Arabic Morocco + { 0x1809, "en","IE" }, // English Ireland + { 0x180A, "es","PA" }, // Spanish Panama + { 0x180C, "fr","MC" }, // French Principality of Monoco + { 0x181A, "sh","BA" }, // Serbian (Latin) Bosnia and Herzegovina + { 0x183B, "sma","NO" }, // Sami (Southern) Norway + { 0x1C01, "ar","TN" }, // Arabic Tunisia + { 0x1C09, "en","ZA" }, // English South Africa + { 0x1C0A, "es","DO" }, // Spanish Dominican Republic + { 0x1C1A, "sr","BA" }, // Serbian (Cyrillic) Bosnia and Herzegovina + { 0x1C3B, "sma","SE" }, // Sami (Southern) Sweden + { 0x2001, "ar","OM" }, // Arabic Oman + { 0x2009, "en","JM" }, // English Jamaica + { 0x200A, "es","VE" }, // Spanish Venezuela + { 0x201A, "bs","BA" }, // Bosnian (Cyrillic) Bosnia and Herzegovina + { 0x203B, "sms","FI" }, // Sami (Skolt) Finland + { 0x2401, "ar","YE" }, // Arabic Yemen + { 0x2409, "en","BS" }, // English Caribbean + { 0x240A, "es","CO" }, // Spanish Colombia + { 0x243B, "smn","FI" }, // Sami (Inari) Finland + { 0x2801, "ar","SY" }, // Arabic Syria + { 0x2809, "en","BZ" }, // English Belize + { 0x280A, "es","PE" }, // Spanish Peru + { 0x2C01, "ar","JO" }, // Arabic Jordan + { 0x2C09, "en","TT" }, // English Trinidad and Tobago + { 0x2C0A, "es","AR" }, // Spanish Argentina + { 0x3001, "ar","LB" }, // Arabic Lebanon + { 0x3009, "en","ZW" }, // English Zimbabwe + { 0x300A, "es","EC" }, // Spanish Ecuador + { 0x3401, "ar","KW" }, // Arabic Kuwait + { 0x3409, "en","PH" }, // English Republic of the Philippines + { 0x340A, "es","CL" }, // Spanish Chile + { 0x3801, "ar","AE" }, // Arabic U.A.E. + { 0x380A, "es","UY" }, // Spanish Uruguay + { 0x3C01, "ar","BH" }, // Arabic Bahrain + { 0x3C0A, "es","PY" }, // Spanish Paraguay + { 0x4001, "ar","QA" }, // Arabic Qatar + { 0x4009, "en","IN" }, // English India + { 0x400A, "es","BO" }, // Spanish Bolivia + { 0x4409, "en","MY" }, // English Malaysia + { 0x440A, "es","SV" }, // Spanish El Salvador + { 0x4809, "en","SG" }, // English Singapore + { 0x480A, "es","HN" }, // Spanish Honduras + { 0x4C0A, "es","NI" }, // Spanish Nicaragua + { 0x500A, "es","PR" }, // Spanish Puerto Rico + { 0x540A, "es","US" } // Spanish United States +}; + +class Locale2Lang +{ + Locale2Lang(const Locale2Lang &); + Locale2Lang & operator = (const Locale2Lang &); + +public: + Locale2Lang() : mSeedPosition(128) + { + memset((void*)mLangLookup, 0, sizeof(mLangLookup)); + // create a tri lookup on first 2 letters of language code + static const int maxIndex = sizeof(LANG_ENTRIES)/sizeof(IsoLangEntry); + for (int i = 0; i < maxIndex; i++) + { + size_t a = LANG_ENTRIES[i].maLangStr[0] - 'a'; + size_t b = LANG_ENTRIES[i].maLangStr[1] - 'a'; + if (mLangLookup[a][b]) + { + const IsoLangEntry ** old = mLangLookup[a][b]; + int len = 1; + while (old[len]) len++; + len += 2; + mLangLookup[a][b] = gralloc<const IsoLangEntry *>(len); + if (!mLangLookup[a][b]) + { + mLangLookup[a][b] = old; + continue; + } + mLangLookup[a][b][--len] = NULL; + mLangLookup[a][b][--len] = &LANG_ENTRIES[i]; + while (--len >= 0) + { + assert(len >= 0); + mLangLookup[a][b][len] = old[len]; + } + free(old); + } + else + { + mLangLookup[a][b] = gralloc<const IsoLangEntry *>(2); + if (!mLangLookup[a][b]) continue; + mLangLookup[a][b][1] = NULL; + mLangLookup[a][b][0] = &LANG_ENTRIES[i]; + } + } + while (2 * mSeedPosition < maxIndex) + mSeedPosition *= 2; + }; + ~Locale2Lang() + { + for (int i = 0; i != 26; ++i) + for (int j = 0; j != 26; ++j) + free(mLangLookup[i][j]); + } + unsigned short getMsId(const char * locale) const + { + size_t length = strlen(locale); + size_t langLength = length; + const char * language = locale; + const char * script = NULL; + const char * region = NULL; + size_t regionLength = 0; + const char * dash = strchr(locale, '-'); + if (dash && (dash != locale)) + { + langLength = (dash - locale); + size_t nextPartLength = length - langLength - 1; + if (nextPartLength >= 2) + { + script = ++dash; + dash = strchr(dash, '-'); + if (dash) + { + nextPartLength = (dash - script); + region = ++dash; + } + if (nextPartLength == 2 && + (locale[langLength+1] > 0x40) && (locale[langLength+1] < 0x5B) && + (locale[langLength+2] > 0x40) && (locale[langLength+2] < 0x5B)) + { + region = script; + regionLength = nextPartLength; + script = NULL; + } + else if (nextPartLength == 4) + { + if (dash) + { + dash = strchr(dash, '-'); + if (dash) + { + nextPartLength = (dash - region); + } + else + { + nextPartLength = langLength - (region - locale); + } + regionLength = nextPartLength; + } + } + } + } + size_t a = 'e' - 'a'; + size_t b = 'n' - 'a'; + unsigned short langId = 0; + int i = 0; + switch (langLength) + { + case 2: + { + a = language[0] - 'a'; + b = language[1] - 'a'; + if ((a < 26) && (b < 26) && mLangLookup[a][b]) + { + while (mLangLookup[a][b][i]) + { + if (mLangLookup[a][b][i]->maLangStr[2] != '\0') + { + ++i; + continue; + } + if (region && (strncmp(mLangLookup[a][b][i]->maCountry, region, regionLength) == 0)) + { + langId = mLangLookup[a][b][i]->mnLang; + break; + } + else if (langId == 0) + { + // possible fallback code + langId = mLangLookup[a][b][i]->mnLang; + } + ++i; + } + } + } + break; + case 3: + { + a = language[0] - 'a'; + b = language[1] - 'a'; + if (mLangLookup[a][b]) + { + while (mLangLookup[a][b][i]) + { + if (mLangLookup[a][b][i]->maLangStr[2] != language[2]) + { + ++i; + continue; + } + if (region && (strncmp(mLangLookup[a][b][i]->maCountry, region, regionLength) == 0)) + { + langId = mLangLookup[a][b][i]->mnLang; + break; + } + else if (langId == 0) + { + // possible fallback code + langId = mLangLookup[a][b][i]->mnLang; + } + ++i; + } + } + } + break; + default: + break; + } + if (langId == 0) langId = 0x409; + return langId; + } + const IsoLangEntry * findEntryById(unsigned short langId) const + { + static const int maxIndex = sizeof(LANG_ENTRIES)/sizeof(IsoLangEntry); + int window = mSeedPosition; + int guess = mSeedPosition - 1; + while (LANG_ENTRIES[guess].mnLang != langId) + { + window /= 2; + if (window == 0) return NULL; + guess += (LANG_ENTRIES[guess].mnLang > langId)? -window : window; + while (guess >= maxIndex) + { + window /= 2; + guess -= window; + assert(window); + } + } + return &LANG_ENTRIES[guess]; + } + + CLASS_NEW_DELETE; + +private: + const IsoLangEntry ** mLangLookup[26][26]; + int mSeedPosition; +}; + +} // namespace graphite2 diff --git a/thirdparty/graphite/src/inc/opcode_table.h b/thirdparty/graphite/src/inc/opcode_table.h new file mode 100644 index 0000000000..cb5acde9a4 --- /dev/null +++ b/thirdparty/graphite/src/inc/opcode_table.h @@ -0,0 +1,124 @@ +/* GRAPHITE2 LICENSING + + Copyright 2010, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ +// This file will be pulled into and integrated into a machine implmentation +// DO NOT build directly +#pragma once + +#define do2(n) do_(n) ,do_(n) +#define NILOP 0U + +// types or parameters are: (.. is inclusive) +// number - any byte +// output_class - 0 .. silf.m_nClass +// input_class - 0 .. silf.m_nClass +// sattrnum - 0 .. 29 (gr_slatJWidth) , 55 (gr_slatUserDefn) +// attrid - 0 .. silf.numUser() where sattrnum == 55; 0..silf.m_iMaxComp where sattrnum == 15 otherwise 0 +// gattrnum - 0 .. face->getGlyphFaceCache->numAttrs() +// gmetric - 0 .. 11 (kgmetDescent) +// featidx - 0 .. face.numFeatures() +// level - any byte +static const opcode_t opcode_table[] = +{ + {{do2(nop)}, 0, "NOP"}, + + {{do2(push_byte)}, 1, "PUSH_BYTE"}, // number + {{do2(push_byte_u)}, 1, "PUSH_BYTE_U"}, // number + {{do2(push_short)}, 2, "PUSH_SHORT"}, // number number + {{do2(push_short_u)}, 2, "PUSH_SHORT_U"}, // number number + {{do2(push_long)}, 4, "PUSH_LONG"}, // number number number number + + {{do2(add)}, 0, "ADD"}, + {{do2(sub)}, 0, "SUB"}, + {{do2(mul)}, 0, "MUL"}, + {{do2(div_)}, 0, "DIV"}, + {{do2(min_)}, 0, "MIN"}, + {{do2(max_)}, 0, "MAX"}, + {{do2(neg)}, 0, "NEG"}, + {{do2(trunc8)}, 0, "TRUNC8"}, + {{do2(trunc16)}, 0, "TRUNC16"}, + + {{do2(cond)}, 0, "COND"}, + {{do2(and_)}, 0, "AND"}, // 0x10 + {{do2(or_)}, 0, "OR"}, + {{do2(not_)}, 0, "NOT"}, + {{do2(equal)}, 0, "EQUAL"}, + {{do2(not_eq_)}, 0, "NOT_EQ"}, + {{do2(less)}, 0, "LESS"}, + {{do2(gtr)}, 0, "GTR"}, + {{do2(less_eq)}, 0, "LESS_EQ"}, + {{do2(gtr_eq)}, 0, "GTR_EQ"}, // 0x18 + + {{do_(next), NILOP}, 0, "NEXT"}, + {{NILOP, NILOP}, 1, "NEXT_N"}, // number <= smap.end - map + {{do_(next), NILOP}, 0, "COPY_NEXT"}, + {{do_(put_glyph_8bit_obs), NILOP}, 1, "PUT_GLYPH_8BIT_OBS"}, // output_class + {{do_(put_subs_8bit_obs), NILOP}, 3, "PUT_SUBS_8BIT_OBS"}, // slot input_class output_class + {{do_(put_copy), NILOP}, 1, "PUT_COPY"}, // slot + {{do_(insert), NILOP}, 0, "INSERT"}, + {{do_(delete_), NILOP}, 0, "DELETE"}, // 0x20 + {{do_(assoc), NILOP}, VARARGS, "ASSOC"}, + {{NILOP ,do_(cntxt_item)}, 2, "CNTXT_ITEM"}, // slot offset + + {{do_(attr_set), NILOP}, 1, "ATTR_SET"}, // sattrnum + {{do_(attr_add), NILOP}, 1, "ATTR_ADD"}, // sattrnum + {{do_(attr_sub), NILOP}, 1, "ATTR_SUB"}, // sattrnum + {{do_(attr_set_slot), NILOP}, 1, "ATTR_SET_SLOT"}, // sattrnum + {{do_(iattr_set_slot), NILOP}, 2, "IATTR_SET_SLOT"}, // sattrnum attrid + {{do2(push_slot_attr)}, 2, "PUSH_SLOT_ATTR"}, // sattrnum slot + {{do2(push_glyph_attr_obs)}, 2, "PUSH_GLYPH_ATTR_OBS"}, // gattrnum slot + {{do2(push_glyph_metric)}, 3, "PUSH_GLYPH_METRIC"}, // gmetric slot level + {{do2(push_feat)}, 2, "PUSH_FEAT"}, // featidx slot + + {{do2(push_att_to_gattr_obs)}, 2, "PUSH_ATT_TO_GATTR_OBS"}, // gattrnum slot + {{do2(push_att_to_glyph_metric)}, 3, "PUSH_ATT_TO_GLYPH_METRIC"}, // gmetric slot level + {{do2(push_islot_attr)}, 3, "PUSH_ISLOT_ATTR"}, // sattrnum slot attrid + + {{NILOP,NILOP}, 3, "PUSH_IGLYPH_ATTR"}, + + {{do2(pop_ret)}, 0, "POP_RET"}, // 0x30 + {{do2(ret_zero)}, 0, "RET_ZERO"}, + {{do2(ret_true)}, 0, "RET_TRUE"}, + + {{do_(iattr_set), NILOP}, 2, "IATTR_SET"}, // sattrnum attrid + {{do_(iattr_add), NILOP}, 2, "IATTR_ADD"}, // sattrnum attrid + {{do_(iattr_sub), NILOP}, 2, "IATTR_SUB"}, // sattrnum attrid + {{do2(push_proc_state)}, 1, "PUSH_PROC_STATE"}, // dummy + {{do2(push_version)}, 0, "PUSH_VERSION"}, + {{do_(put_subs), NILOP}, 5, "PUT_SUBS"}, // slot input_class input_class output_class output_class + {{NILOP,NILOP}, 0, "PUT_SUBS2"}, + {{NILOP,NILOP}, 0, "PUT_SUBS3"}, + {{do_(put_glyph), NILOP}, 2, "PUT_GLYPH"}, // output_class output_class + {{do2(push_glyph_attr)}, 3, "PUSH_GLYPH_ATTR"}, // gattrnum gattrnum slot + {{do2(push_att_to_glyph_attr)}, 3, "PUSH_ATT_TO_GLYPH_ATTR"}, // gattrnum gattrnum slot + {{do2(bor)}, 0, "BITOR"}, + {{do2(band)}, 0, "BITAND"}, + {{do2(bnot)}, 0, "BITNOT"}, // 0x40 + {{do2(setbits)}, 4, "BITSET"}, + {{do_(set_feat), NILOP}, 2, "SET_FEAT"}, // featidx slot + // private opcodes for internal use only, comes after all other on disk opcodes. + {{do_(temp_copy), NILOP}, 0, "TEMP_COPY"} +}; diff --git a/thirdparty/graphite/src/inc/opcodes.h b/thirdparty/graphite/src/inc/opcodes.h new file mode 100644 index 0000000000..ff2f1741e2 --- /dev/null +++ b/thirdparty/graphite/src/inc/opcodes.h @@ -0,0 +1,691 @@ +/* GRAPHITE2 LICENSING + + Copyright 2010, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ +#pragma once +// This file will be pulled into and integrated into a machine implmentation +// DO NOT build directly and under no circumstances ever #include headers in +// here or you will break the direct_machine. +// +// Implementers' notes +// ================== +// You have access to a few primitives and the full C++ code: +// declare_params(n) Tells the interpreter how many bytes of parameter +// space to claim for this instruction uses and +// initialises the param pointer. You *must* before the +// first use of param. +// use_params(n) Claim n extra bytes of param space beyond what was +// claimed using delcare_param. +// param A const byte pointer for the parameter space claimed by +// this instruction. +// binop(op) Implement a binary operation on the stack using the +// specified C++ operator. +// NOT_IMPLEMENTED Any instruction body containing this will exit the +// program with an assertion error. Instructions that are +// not implemented should also be marked NILOP in the +// opcodes tables this will cause the code class to spot +// them in a live code stream and throw a runtime_error +// instead. +// push(n) Push the value n onto the stack. +// pop() Pop the top most value and return it. +// +// You have access to the following named fast 'registers': +// sp = The pointer to the current top of stack, the last value +// pushed. +// seg = A reference to the Segment this code is running over. +// is = The current slot index +// isb = The original base slot index at the start of this rule +// isf = The first positioned slot +// isl = The last positioned slot +// ip = The current instruction pointer +// endPos = Position of advance of last cluster +// dir = writing system directionality of the font + + +// #define NOT_IMPLEMENTED assert(false) +// #define NOT_IMPLEMENTED + +#define binop(op) const uint32 a = pop(); *sp = uint32(*sp) op a +#define sbinop(op) const int32 a = pop(); *sp = int32(*sp) op a +#define use_params(n) dp += n + +#define declare_params(n) const byte * param = dp; \ + use_params(n); + +#define push(n) { *++sp = n; } +#define pop() (*sp--) +#define slotat(x) (map[(x)]) +#define DIE { is=seg.last(); status = Machine::died_early; EXIT(1); } +#define POSITIONED 1 + +STARTOP(nop) + do {} while (0); +ENDOP + +STARTOP(push_byte) + declare_params(1); + push(int8(*param)); +ENDOP + +STARTOP(push_byte_u) + declare_params(1); + push(uint8(*param)); +ENDOP + +STARTOP(push_short) + declare_params(2); + const int16 r = int16(param[0]) << 8 + | uint8(param[1]); + push(r); +ENDOP + +STARTOP(push_short_u) + declare_params(2); + const uint16 r = uint16(param[0]) << 8 + | uint8(param[1]); + push(r); +ENDOP + +STARTOP(push_long) + declare_params(4); + const int32 r = int32(param[0]) << 24 + | uint32(param[1]) << 16 + | uint32(param[2]) << 8 + | uint8(param[3]); + push(r); +ENDOP + +STARTOP(add) + binop(+); +ENDOP + +STARTOP(sub) + binop(-); +ENDOP + +STARTOP(mul) + binop(*); +ENDOP + +STARTOP(div_) + const int32 b = pop(); + const int32 a = int32(*sp); + if (b == 0 || (a == std::numeric_limits<int32>::min() && b == -1)) DIE; + *sp = int32(*sp) / b; +ENDOP + +STARTOP(min_) + const int32 a = pop(), b = *sp; + if (a < b) *sp = a; +ENDOP + +STARTOP(max_) + const int32 a = pop(), b = *sp; + if (a > b) *sp = a; +ENDOP + +STARTOP(neg) + *sp = uint32(-int32(*sp)); +ENDOP + +STARTOP(trunc8) + *sp = uint8(*sp); +ENDOP + +STARTOP(trunc16) + *sp = uint16(*sp); +ENDOP + +STARTOP(cond) + const uint32 f = pop(), t = pop(), c = pop(); + push(c ? t : f); +ENDOP + +STARTOP(and_) + binop(&&); +ENDOP + +STARTOP(or_) + binop(||); +ENDOP + +STARTOP(not_) + *sp = !*sp; +ENDOP + +STARTOP(equal) + binop(==); +ENDOP + +STARTOP(not_eq_) + binop(!=); +ENDOP + +STARTOP(less) + sbinop(<); +ENDOP + +STARTOP(gtr) + sbinop(>); +ENDOP + +STARTOP(less_eq) + sbinop(<=); +ENDOP + +STARTOP(gtr_eq) + sbinop(>=); +ENDOP + +STARTOP(next) + if (map - &smap[0] >= int(smap.size())) DIE + if (is) + { + if (is == smap.highwater()) + smap.highpassed(true); + is = is->next(); + } + ++map; +ENDOP + +//STARTOP(next_n) +// use_params(1); +// NOT_IMPLEMENTED; + //declare_params(1); + //const size_t num = uint8(*param); +//ENDOP + +//STARTOP(copy_next) +// if (is) is = is->next(); +// ++map; +// ENDOP + +STARTOP(put_glyph_8bit_obs) + declare_params(1); + const unsigned int output_class = uint8(*param); + is->setGlyph(&seg, seg.getClassGlyph(output_class, 0)); +ENDOP + +STARTOP(put_subs_8bit_obs) + declare_params(3); + const int slot_ref = int8(param[0]); + const unsigned int input_class = uint8(param[1]), + output_class = uint8(param[2]); + uint16 index; + slotref slot = slotat(slot_ref); + if (slot) + { + index = seg.findClassIndex(input_class, slot->gid()); + is->setGlyph(&seg, seg.getClassGlyph(output_class, index)); + } +ENDOP + +STARTOP(put_copy) + declare_params(1); + const int slot_ref = int8(*param); + if (is && !is->isDeleted()) + { + slotref ref = slotat(slot_ref); + if (ref && ref != is) + { + int16 *tempUserAttrs = is->userAttrs(); + if (is->attachedTo() || is->firstChild()) DIE + Slot *prev = is->prev(); + Slot *next = is->next(); + memcpy(tempUserAttrs, ref->userAttrs(), seg.numAttrs() * sizeof(uint16)); + memcpy(is, ref, sizeof(Slot)); + is->firstChild(NULL); + is->nextSibling(NULL); + is->userAttrs(tempUserAttrs); + is->next(next); + is->prev(prev); + if (is->attachedTo()) + is->attachedTo()->child(is); + } + is->markCopied(false); + is->markDeleted(false); + } +ENDOP + +STARTOP(insert) + if (smap.decMax() <= 0) DIE; + Slot *newSlot = seg.newSlot(); + if (!newSlot) DIE; + Slot *iss = is; + while (iss && iss->isDeleted()) iss = iss->next(); + if (!iss) + { + if (seg.last()) + { + seg.last()->next(newSlot); + newSlot->prev(seg.last()); + newSlot->before(seg.last()->before()); + seg.last(newSlot); + } + else + { + seg.first(newSlot); + seg.last(newSlot); + } + } + else if (iss->prev()) + { + iss->prev()->next(newSlot); + newSlot->prev(iss->prev()); + newSlot->before(iss->prev()->after()); + } + else + { + newSlot->prev(NULL); + newSlot->before(iss->before()); + seg.first(newSlot); + } + newSlot->next(iss); + if (iss) + { + iss->prev(newSlot); + newSlot->originate(iss->original()); + newSlot->after(iss->before()); + } + else if (newSlot->prev()) + { + newSlot->originate(newSlot->prev()->original()); + newSlot->after(newSlot->prev()->after()); + } + else + { + newSlot->originate(seg.defaultOriginal()); + } + if (is == smap.highwater()) + smap.highpassed(false); + is = newSlot; + seg.extendLength(1); + if (map != &smap[-1]) + --map; +ENDOP + +STARTOP(delete_) + if (!is || is->isDeleted()) DIE + is->markDeleted(true); + if (is->prev()) + is->prev()->next(is->next()); + else + seg.first(is->next()); + + if (is->next()) + is->next()->prev(is->prev()); + else + seg.last(is->prev()); + + + if (is == smap.highwater()) + smap.highwater(is->next()); + if (is->prev()) + is = is->prev(); + seg.extendLength(-1); +ENDOP + +STARTOP(assoc) + declare_params(1); + unsigned int num = uint8(*param); + const int8 * assocs = reinterpret_cast<const int8 *>(param+1); + use_params(num); + int max = -1; + int min = -1; + + while (num-- > 0) + { + int sr = *assocs++; + slotref ts = slotat(sr); + if (ts && (min == -1 || ts->before() < min)) min = ts->before(); + if (ts && ts->after() > max) max = ts->after(); + } + if (min > -1) // implies max > -1 + { + is->before(min); + is->after(max); + } +ENDOP + +STARTOP(cntxt_item) + // It turns out this is a cunningly disguised condition forward jump. + declare_params(3); + const int is_arg = int8(param[0]); + const size_t iskip = uint8(param[1]), + dskip = uint8(param[2]); + + if (mapb + is_arg != map) + { + ip += iskip; + dp += dskip; + push(true); + } +ENDOP + +STARTOP(attr_set) + declare_params(1); + const attrCode slat = attrCode(uint8(*param)); + const int val = pop(); + is->setAttr(&seg, slat, 0, val, smap); +ENDOP + +STARTOP(attr_add) + declare_params(1); + const attrCode slat = attrCode(uint8(*param)); + const uint32_t val = pop(); + if ((slat == gr_slatPosX || slat == gr_slatPosY) && (flags & POSITIONED) == 0) + { + seg.positionSlots(0, *smap.begin(), *(smap.end()-1), seg.currdir()); + flags |= POSITIONED; + } + uint32_t res = uint32_t(is->getAttr(&seg, slat, 0)); + is->setAttr(&seg, slat, 0, int32_t(val + res), smap); +ENDOP + +STARTOP(attr_sub) + declare_params(1); + const attrCode slat = attrCode(uint8(*param)); + const uint32_t val = pop(); + if ((slat == gr_slatPosX || slat == gr_slatPosY) && (flags & POSITIONED) == 0) + { + seg.positionSlots(0, *smap.begin(), *(smap.end()-1), seg.currdir()); + flags |= POSITIONED; + } + uint32_t res = uint32_t(is->getAttr(&seg, slat, 0)); + is->setAttr(&seg, slat, 0, int32_t(res - val), smap); +ENDOP + +STARTOP(attr_set_slot) + declare_params(1); + const attrCode slat = attrCode(uint8(*param)); + const int offset = int(map - smap.begin())*int(slat == gr_slatAttTo); + const int val = pop() + offset; + is->setAttr(&seg, slat, offset, val, smap); +ENDOP + +STARTOP(iattr_set_slot) + declare_params(2); + const attrCode slat = attrCode(uint8(param[0])); + const uint8 idx = uint8(param[1]); + const int val = int(pop() + (map - smap.begin())*int(slat == gr_slatAttTo)); + is->setAttr(&seg, slat, idx, val, smap); +ENDOP + +STARTOP(push_slot_attr) + declare_params(2); + const attrCode slat = attrCode(uint8(param[0])); + const int slot_ref = int8(param[1]); + if ((slat == gr_slatPosX || slat == gr_slatPosY) && (flags & POSITIONED) == 0) + { + seg.positionSlots(0, *smap.begin(), *(smap.end()-1), seg.currdir()); + flags |= POSITIONED; + } + slotref slot = slotat(slot_ref); + if (slot) + { + int res = slot->getAttr(&seg, slat, 0); + push(res); + } +ENDOP + +STARTOP(push_glyph_attr_obs) + declare_params(2); + const unsigned int glyph_attr = uint8(param[0]); + const int slot_ref = int8(param[1]); + slotref slot = slotat(slot_ref); + if (slot) + push(int32(seg.glyphAttr(slot->gid(), glyph_attr))); +ENDOP + +STARTOP(push_glyph_metric) + declare_params(3); + const unsigned int glyph_attr = uint8(param[0]); + const int slot_ref = int8(param[1]); + const signed int attr_level = uint8(param[2]); + slotref slot = slotat(slot_ref); + if (slot) + push(seg.getGlyphMetric(slot, glyph_attr, attr_level, dir)); +ENDOP + +STARTOP(push_feat) + declare_params(2); + const unsigned int feat = uint8(param[0]); + const int slot_ref = int8(param[1]); + slotref slot = slotat(slot_ref); + if (slot) + { + uint8 fid = seg.charinfo(slot->original())->fid(); + push(seg.getFeature(fid, feat)); + } +ENDOP + +STARTOP(push_att_to_gattr_obs) + declare_params(2); + const unsigned int glyph_attr = uint8(param[0]); + const int slot_ref = int8(param[1]); + slotref slot = slotat(slot_ref); + if (slot) + { + slotref att = slot->attachedTo(); + if (att) slot = att; + push(int32(seg.glyphAttr(slot->gid(), glyph_attr))); + } +ENDOP + +STARTOP(push_att_to_glyph_metric) + declare_params(3); + const unsigned int glyph_attr = uint8(param[0]); + const int slot_ref = int8(param[1]); + const signed int attr_level = uint8(param[2]); + slotref slot = slotat(slot_ref); + if (slot) + { + slotref att = slot->attachedTo(); + if (att) slot = att; + push(int32(seg.getGlyphMetric(slot, glyph_attr, attr_level, dir))); + } +ENDOP + +STARTOP(push_islot_attr) + declare_params(3); + const attrCode slat = attrCode(uint8(param[0])); + const int slot_ref = int8(param[1]), + idx = uint8(param[2]); + if ((slat == gr_slatPosX || slat == gr_slatPosY) && (flags & POSITIONED) == 0) + { + seg.positionSlots(0, *smap.begin(), *(smap.end()-1), seg.currdir()); + flags |= POSITIONED; + } + slotref slot = slotat(slot_ref); + if (slot) + { + int res = slot->getAttr(&seg, slat, idx); + push(res); + } +ENDOP + +#if 0 +STARTOP(push_iglyph_attr) // not implemented + NOT_IMPLEMENTED; +ENDOP +#endif + +STARTOP(pop_ret) + const uint32 ret = pop(); + EXIT(ret); +ENDOP + +STARTOP(ret_zero) + EXIT(0); +ENDOP + +STARTOP(ret_true) + EXIT(1); +ENDOP + +STARTOP(iattr_set) + declare_params(2); + const attrCode slat = attrCode(uint8(param[0])); + const uint8 idx = uint8(param[1]); + const int val = pop(); + is->setAttr(&seg, slat, idx, val, smap); +ENDOP + +STARTOP(iattr_add) + declare_params(2); + const attrCode slat = attrCode(uint8(param[0])); + const uint8 idx = uint8(param[1]); + const uint32_t val = pop(); + if ((slat == gr_slatPosX || slat == gr_slatPosY) && (flags & POSITIONED) == 0) + { + seg.positionSlots(0, *smap.begin(), *(smap.end()-1), seg.currdir()); + flags |= POSITIONED; + } + uint32_t res = uint32_t(is->getAttr(&seg, slat, idx)); + is->setAttr(&seg, slat, idx, int32_t(val + res), smap); +ENDOP + +STARTOP(iattr_sub) + declare_params(2); + const attrCode slat = attrCode(uint8(param[0])); + const uint8 idx = uint8(param[1]); + const uint32_t val = pop(); + if ((slat == gr_slatPosX || slat == gr_slatPosY) && (flags & POSITIONED) == 0) + { + seg.positionSlots(0, *smap.begin(), *(smap.end()-1), seg.currdir()); + flags |= POSITIONED; + } + uint32_t res = uint32_t(is->getAttr(&seg, slat, idx)); + is->setAttr(&seg, slat, idx, int32_t(res - val), smap); +ENDOP + +STARTOP(push_proc_state) + use_params(1); + push(1); +ENDOP + +STARTOP(push_version) + push(0x00030000); +ENDOP + +STARTOP(put_subs) + declare_params(5); + const int slot_ref = int8(param[0]); + const unsigned int input_class = uint8(param[1]) << 8 + | uint8(param[2]); + const unsigned int output_class = uint8(param[3]) << 8 + | uint8(param[4]); + slotref slot = slotat(slot_ref); + if (slot) + { + int index = seg.findClassIndex(input_class, slot->gid()); + is->setGlyph(&seg, seg.getClassGlyph(output_class, index)); + } +ENDOP + +#if 0 +STARTOP(put_subs2) // not implemented + NOT_IMPLEMENTED; +ENDOP + +STARTOP(put_subs3) // not implemented + NOT_IMPLEMENTED; +ENDOP +#endif + +STARTOP(put_glyph) + declare_params(2); + const unsigned int output_class = uint8(param[0]) << 8 + | uint8(param[1]); + is->setGlyph(&seg, seg.getClassGlyph(output_class, 0)); +ENDOP + +STARTOP(push_glyph_attr) + declare_params(3); + const unsigned int glyph_attr = uint8(param[0]) << 8 + | uint8(param[1]); + const int slot_ref = int8(param[2]); + slotref slot = slotat(slot_ref); + if (slot) + push(int32(seg.glyphAttr(slot->gid(), glyph_attr))); +ENDOP + +STARTOP(push_att_to_glyph_attr) + declare_params(3); + const unsigned int glyph_attr = uint8(param[0]) << 8 + | uint8(param[1]); + const int slot_ref = int8(param[2]); + slotref slot = slotat(slot_ref); + if (slot) + { + slotref att = slot->attachedTo(); + if (att) slot = att; + push(int32(seg.glyphAttr(slot->gid(), glyph_attr))); + } +ENDOP + +STARTOP(temp_copy) + slotref newSlot = seg.newSlot(); + if (!newSlot || !is) DIE; + int16 *tempUserAttrs = newSlot->userAttrs(); + memcpy(newSlot, is, sizeof(Slot)); + memcpy(tempUserAttrs, is->userAttrs(), seg.numAttrs() * sizeof(uint16)); + newSlot->userAttrs(tempUserAttrs); + newSlot->markCopied(true); + *map = newSlot; +ENDOP + +STARTOP(band) + binop(&); +ENDOP + +STARTOP(bor) + binop(|); +ENDOP + +STARTOP(bnot) + *sp = ~*sp; +ENDOP + +STARTOP(setbits) + declare_params(4); + const uint16 m = uint16(param[0]) << 8 + | uint8(param[1]); + const uint16 v = uint16(param[2]) << 8 + | uint8(param[3]); + *sp = ((*sp) & ~m) | v; +ENDOP + +STARTOP(set_feat) + declare_params(2); + const unsigned int feat = uint8(param[0]); + const int slot_ref = int8(param[1]); + slotref slot = slotat(slot_ref); + if (slot) + { + uint8 fid = seg.charinfo(slot->original())->fid(); + seg.setFeature(fid, feat, pop()); + } +ENDOP diff --git a/thirdparty/graphite/src/json.cpp b/thirdparty/graphite/src/json.cpp new file mode 100644 index 0000000000..25f2190f71 --- /dev/null +++ b/thirdparty/graphite/src/json.cpp @@ -0,0 +1,147 @@ +/* GRAPHITE2 LICENSING + + Copyright 2011, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ +// JSON debug logging +// Author: Tim Eves + +#if !defined GRAPHITE2_NTRACING + +#include <cstdio> +#include <limits> +#include "inc/json.h" + +#if defined(_MSC_VER) +#define FORMAT_INTMAX "%lli" +#define FORMAT_UINTMAX "%llu" +#else +#define FORMAT_INTMAX "%ji" +#define FORMAT_UINTMAX "%ju" +#endif + +using namespace graphite2; + +namespace +{ + enum + { + seq = ',', + obj='}', member=':', empty_obj='{', + arr=']', empty_arr='[' + }; +} + +const std::nullptr_t json::null = nullptr; + +inline +void json::context(const char current) throw() +{ + fprintf(_stream, "%c", *_context); + indent(); + *_context = current; +} + + +void json::indent(const int d) throw() +{ + if (*_context == member || (_flatten && _flatten < _context)) + fputc(' ', _stream); + else + fprintf(_stream, "\n%*s", 4*int(_context - _contexts + d), ""); +} + + +inline +void json::push_context(const char prefix, const char suffix) throw() +{ + assert(_context - _contexts < ptrdiff_t(sizeof _contexts)); + + if (_context == _contexts) + *_context = suffix; + else + context(suffix); + *++_context = prefix; +} + + +void json::pop_context() throw() +{ + assert(_context > _contexts); + + if (*_context == seq) indent(-1); + else fputc(*_context, _stream); + + fputc(*--_context, _stream); + if (_context == _contexts) fputc('\n', _stream); + fflush(_stream); + + if (_flatten >= _context) _flatten = 0; + *_context = seq; +} + + +// These four functions cannot be inlined as pointers to these +// functions are needed for operator << (_context_t) to work. +void json::flat(json & j) throw() { if (!j._flatten) j._flatten = j._context; } +void json::close(json & j) throw() { j.pop_context(); } +void json::object(json & j) throw() { j.push_context('{', '}'); } +void json::array(json & j) throw() { j.push_context('[', ']'); } +void json::item(json & j) throw() +{ + while (j._context > j._contexts+1 && j._context[-1] != arr) + j.pop_context(); +} + + +json & json::operator << (json::string s) throw() +{ + const char ctxt = _context[-1] == obj ? *_context == member ? seq : member : seq; + context(ctxt); + fprintf(_stream, "\"%s\"", s); + if (ctxt == member) fputc(' ', _stream); + + return *this; +} + +json & json::operator << (json::number f) throw() +{ + context(seq); + if (std::numeric_limits<json::number>::infinity() == f) + fputs("Infinity", _stream); + else if (-std::numeric_limits<json::number>::infinity() == f) + fputs("-Infinity", _stream); + else if (std::numeric_limits<json::number>::quiet_NaN() == f || + std::numeric_limits<json::number>::signaling_NaN() == f) + fputs("NaN", _stream); + else + fprintf(_stream, "%g", f); + return *this; +} +json & json::operator << (json::integer d) throw() { context(seq); fprintf(_stream, FORMAT_INTMAX, intmax_t(d)); return *this; } +json & json::operator << (json::integer_u d) throw() { context(seq); fprintf(_stream, FORMAT_UINTMAX, uintmax_t(d)); return *this; } +json & json::operator << (json::boolean b) throw() { context(seq); fputs(b ? "true" : "false", _stream); return *this; } +json & json::operator << (std::nullptr_t) throw() { context(seq); fputs("null",_stream); return *this; } + +#endif diff --git a/thirdparty/harfbuzz/AUTHORS b/thirdparty/harfbuzz/AUTHORS new file mode 100644 index 0000000000..83c0c66f99 --- /dev/null +++ b/thirdparty/harfbuzz/AUTHORS @@ -0,0 +1,14 @@ +Behdad Esfahbod +David Corbett +David Turner +Ebrahim Byagowi +Garret Rieger +Jonathan Kew +Khaled Hosny +Lars Knoll +Martin Hosken +Owen Taylor +Roderick Sheeter +Roozbeh Pournader +Simon Hausmann +Werner Lemberg diff --git a/thirdparty/harfbuzz/COPYING b/thirdparty/harfbuzz/COPYING new file mode 100644 index 0000000000..57343164f2 --- /dev/null +++ b/thirdparty/harfbuzz/COPYING @@ -0,0 +1,38 @@ +HarfBuzz is licensed under the so-called "Old MIT" license. Details follow. +For parts of HarfBuzz that are licensed under different licenses see individual +files names COPYING in subdirectories where applicable. + +Copyright © 2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020 Google, Inc. +Copyright © 2018,2019,2020 Ebrahim Byagowi +Copyright © 2019,2020 Facebook, Inc. +Copyright © 2012 Mozilla Foundation +Copyright © 2011 Codethink Limited +Copyright © 2008,2010 Nokia Corporation and/or its subsidiary(-ies) +Copyright © 2009 Keith Stribley +Copyright © 2009 Martin Hosken and SIL International +Copyright © 2007 Chris Wilson +Copyright © 2006 Behdad Esfahbod +Copyright © 2005 David Turner +Copyright © 2004,2007,2008,2009,2010 Red Hat, Inc. +Copyright © 1998-2004 David Turner and Werner Lemberg + +For full copyright notices consult the individual files in the package. + + +Permission is hereby granted, without written agreement and without +license or royalty fees, to use, copy, modify, and distribute this +software and its documentation for any purpose, provided that the +above copyright notice and the following two paragraphs appear in +all copies of this software. + +IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR +DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES +ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN +IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGE. + +THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, +BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS +ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO +PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. diff --git a/thirdparty/harfbuzz/NEWS b/thirdparty/harfbuzz/NEWS new file mode 100644 index 0000000000..f211a3781c --- /dev/null +++ b/thirdparty/harfbuzz/NEWS @@ -0,0 +1,2412 @@ +Overview of changes leading to 2.7.2 +Saturday, August 29, 2020 +==================================== +- Fix a regression in the previous release that caused a crash with Kaithi. +- More OOM fixes. + + +Overview of changes leading to 2.7.1 +Thursday, August 13, 2020 +==================================== +- ot-funcs now handles variable empty glyphs better when hvar/vvar isn't present. +- Reverted a GDEF processing regression. +- A couple of fixes to handle OOM better. + + +Overview of changes leading to 2.7.0 +Saturday, July 25, 2020 +==================================== +- Use an implementation for round that always rounds up, some minor fluctuations + are expected on var font specially when hb-ot callback is used. +- Fix an AAT's `kerx` issue on broken rendering of Devanagari Sangam MN. +- Remove AAT's `lcar` table support from _get_ligature_carets API, not even much + use on macOS installed fonts (only two files). GDEF support is the recommended + one and expected to work properly after issues fixed two releases ago. +- Minor memory fixes to handle OOM better specially in hb-ft. +- Minor .so files versioning scheme change and remove stable/unstable scheme + differences, was never used in practice (always default to stable scheme). +- We are now suggesting careful packaging of the library using meson, + https://github.com/harfbuzz/harfbuzz/wiki/Notes-on-migration-to-meson + for more information. +- Distribution package URL is changed, either use GitHub generated tarballs, + `https://github.com/harfbuzz/harfbuzz/archive/$pkgver.tar.gz` + or, even more preferably use commit hash of the release and git checkouts like, + `git+https://github.com/harfbuzz/harfbuzz#commit=$commit` + + +Overview of changes leading to 2.6.8 +Monday, June 22, 2020 +==================================== +- New API to fetch glyph alternates from GSUB table. +- hb-coretext build fix for macOS < 10.10. +- Meson build fixes, cmake port removal is postponed but please prepare for + it and give us feedback. + Autotools is still our main build system however please consider + experimenting with meson also for packaging the library. +- New API: ++hb_ot_layout_lookup_get_glyph_alternates() + + +Overview of changes leading to 2.6.7 +Wednesday, June 3, 2020 +==================================== +- Update to Unicode 13.0.0. +- Fix hb_ot_layout_get_ligature_carets for fonts without lcar table, it was + completely broken for all the other fonts since 2.1.2. +- As a part of our migration to meson, this release will be the last one + to provide cmake port files but autotools still is our main build system. + There is a possibility that the next version or the after be released + using meson. + + +Overview of changes leading to 2.6.6 +Tuesday, May 12, 2020 +==================================== +- A fix in AAT kerning for Geeza Pro. +- Better support for resource fork fonts on macOS. + + +Overview of changes leading to 2.6.5 +Friday, April 17, 2020 +==================================== +- Add experimental meson build system. Autotools is still the primary + and supported build system. +- AAT is now always preferred for horizontal scripts when both AAT and OT + layout tables exist at the same time. +- Subsetter improvements. +- New API: ++hb_ft_font_lock_face() ++hb_ft_font_unlock_face() + + +Overview of changes leading to 2.6.4 +Monday, October 29, 2019 +==================================== +- Small bug fix. +- Build fixes. + + +Overview of changes leading to 2.6.3 +Monday, October 28, 2019 +==================================== +- Misc small fixes, mostly to build-related issues. +- New API: ++hb_font_get_nominal_glyphs() + + +Overview of changes leading to 2.6.2 +Monday, September 30, 2019 +==================================== +- Misc small fixes, mostly to build-related issues. + + +Overview of changes leading to 2.6.1 +Thursday, August 22, 2019 +==================================== +- Fix regression with hb_font_create_sub_font scaling introduced in 2.6.0. +- Change interpretation of font PTEM size / CoreText font size handling. + See https://github.com/harfbuzz/harfbuzz/pull/1484 +- hb-ot-font: Prefer symbol cmap subtable if present. +- Apply 'dist'/'abvm'/'blwm' features to all scripts. +- Drop experimental DirectWrite API. + + +Overview of changes leading to 2.6.0 +Tuesday, August 13, 2019 +==================================== +- New OpenType metrics, baseline, and metadata table access APIs. +- New API to set font variations to a named-instance. +- New hb-gdi.h header and API for creating hb_face_t from HFONT. +- Amalgam: Provide a single-file harfbuzz.cc file for easier alternate building. +- More size-reduction configurable options, enabled by HB_TINY. +- New API: ++hb_font_set_var_named_instance() ++hb_gdi_face_create() ++hb_ot_layout_baseline_tag_t ++hb_ot_layout_get_baseline() ++hb_ot_meta_tag_t ++hb_ot_meta_get_entry_tags() ++hb_ot_meta_reference_entry() ++hb_ot_metrics_tag_t ++hb_ot_metrics_get_position() ++hb_ot_metrics_get_variation() ++hb_ot_metrics_get_x_variation() ++hb_ot_metrics_get_y_variation() + + +Overview of changes leading to 2.5.3 +Wednesday, June 26, 2019 +==================================== +- Fix UCD script data for Unicode 10+ scripts. This was broken since 2.5.0. +- More optimizations for HB_TINY. + + +Overview of changes leading to 2.5.2 +Thursday, June 20, 2019 +==================================== +- More hb-config.hh facilities to shrink library size, namely when built as + HB_TINY. +- New documentation of custom configurations in CONFIG.md. +- Fix build on gcc 4.8. That's supported again. +- Universal Shaping Engine improvements thanks to David Corbett. +- API Changes: Undeprecate some horizontal-kerning API and re-enable in hb-ft, + such that Type1 fonts will continue kerning. + + +Overview of changes leading to 2.5.1 +Friday, May 31, 2019 +==================================== +- Fix build with various versions of Visual Studio. +- Improved documentation, thanks to Nathan Willis. +- Bugfix in subsetting glyf table. +- Improved scripts for cross-compiling for Windows using mingw. +- Rename HB_MATH_GLYPH_PART_FLAG_EXTENDER to HB_OT_MATH_GLYPH_PART_FLAG_EXTENDER. + A deprecated macro is added for backwards-compatibility. + + +Overview of changes leading to 2.5.0 +Friday, May 24, 2019 +==================================== +- This release does not include much functional changes, but includes major internal + code-base changes. We now require C++11. Support for gcc 4.8 and earlier has been + dropped. +- New hb-config.hh facility for compiling smaller library for embedded and web usecases. +- New Unicode Character Databse implementation that is half the size of previously-used + UCDN. +- Subsetter improvements. +- Improved documentation, thanks to Nathan Willis. +- Misc shaping fixes. + + +Overview of changes leading to 2.4.0 +Monday, March 25, 2019 +==================================== +- Unicode 12. +- Misc fixes. +- Subsetter improvements. +- New API: +HB_BUFFER_FLAG_DO_NOT_INSERT_DOTTED_CIRCLE +hb_directwrite_face_create() + + +Overview of changes leading to 2.3.1 +Wednesday, January 30, 2019 +==================================== +- AAT bug fixes. +- Misc internal housekeeping cleanup. + + +Overview of changes leading to 2.3.0 +Thursday, December 20, 2018 +==================================== +- Fix regression on big-endian architectures. Ouch! +- Misc bug and build fixes. +- Fix subsetting of simple GSUB/GDEF. +- Merge CFF / CFF2 support contributed by Adobe. This mostly involves + the subsetter, but also get_glyph_extents on CFF fonts. + +New API in hb-aat.h: ++hb_aat_layout_has_substitution() ++hb_aat_layout_has_positioning() ++hb_aat_layout_has_tracking() + + +Overview of changes leading to 2.2.0 +Thursday, November 29, 2018 +==================================== +- Misc shaping bug fixes. +- Add font variations named-instance API. +- Deprecate font variations axis enumeration API and add replacement. +- AAT shaping improvements: + o Fixed 'kern' table Format 2 implementation. + o Implement 'feat' table API for feature detection. + o Blacklist 'GSUB' table of fonts from 'MUTF' foundry that also have 'morx'. + +New API: ++hb_aat_layout_feature_type_t ++hb_aat_layout_feature_selector_t ++hb_aat_layout_get_feature_types() ++hb_aat_layout_feature_type_get_name_id ++hb_aat_layout_feature_selector_info_t ++HB_AAT_LAYOUT_NO_SELECTOR_INDEX ++hb_aat_layout_feature_type_get_selector_infos() ++hb_ot_var_axis_flags_t ++hb_ot_var_axis_info_t ++hb_ot_var_get_axis_infos() ++hb_ot_var_find_axis_info() ++hb_ot_var_get_named_instance_count() ++hb_ot_var_named_instance_get_subfamily_name_id() ++hb_ot_var_named_instance_get_postscript_name_id() ++hb_ot_var_named_instance_get_design_coords() + +Deprecated API: ++HB_OT_VAR_NO_AXIS_INDEX ++hb_ot_var_axis_t ++hb_ot_var_get_axes() ++hb_ot_var_find_axis() + + +Overview of changes leading to 2.1.3 +Friday, November 16, 2018 +==================================== +- Fix AAT 'mort' shaping, which was broken in 2.1.2 + + +Overview of changes leading to 2.1.2 +Friday, November 16, 2018 +==================================== +- Various internal changes. +- AAT shaping improvements: + o Implement kern table Format 1 state-machine-based kerning. + o Implement cross-stream kerning (cursive positioning, etc). + o Ignore emptyish GSUB tables (zero scripts) if morx present. + o Don't apply GPOS if morx is being applied. Matches Apple. + + +-Overview of changes leading to 2.1.1 +Monday, November 5, 2018 +==================================== +- AAT improvements: + o Implement 'mort' table. + o Implement 'kern' subtables Format 1 and Format 3. + + +Overview of changes leading to 2.1.0 +Tuesday, October 30, 2018 +==================================== +- AAT shaping improvements: + o Allow user controlling AAT features, for whole buffer only currently. + o Several 'morx' fixes. + o Implement tuple-kerns in 'kerx'; Fixes kerning with Apple default + San Francisco fonts. +- Support for color fonts: + o COLR/CPAL API to fetch color layers. + o SVG table to fetch SVG documents. + o CBDT/sbix API to fetch PNG images. +- New 'name' table API. +- hb-ot-font now uses 'VORG' table to correctly position CFF glyphs + in vertical layout. +- Various fuzzer-found bug fixes. + +Changed API: + +A type and a macro added in 2.0.0 were renamed: + +hb_name_id_t -> hb_ot_name_id_t +HB_NAME_ID_INVALID -> HB_OT_NAME_ID_INVALID + +New API: + ++hb_color_t ++HB_COLOR ++hb_color_get_alpha() ++hb_color_get_red() ++hb_color_get_green() ++hb_color_get_blue() ++hb_ot_color_has_palettes() ++hb_ot_color_palette_get_count() ++hb_ot_color_palette_get_name_id() ++hb_ot_color_palette_color_get_name_id() ++hb_ot_color_palette_flags_t ++hb_ot_color_palette_get_flags() ++hb_ot_color_palette_get_colors() ++hb_ot_color_has_layers() ++hb_ot_color_layer_t ++hb_ot_color_glyph_get_layers() ++hb_ot_color_has_svg() ++hb_ot_color_glyph_reference_svg() ++hb_ot_color_has_png() ++hb_ot_color_glyph_reference_png() + ++hb_ot_name_id_t ++HB_OT_NAME_ID_INVALID ++HB_OT_NAME_ID_COPYRIGHT ++HB_OT_NAME_ID_FONT_FAMILY ++HB_OT_NAME_ID_FONT_SUBFAMILY ++HB_OT_NAME_ID_UNIQUE_ID ++HB_OT_NAME_ID_FULL_NAME ++HB_OT_NAME_ID_VERSION_STRING ++HB_OT_NAME_ID_POSTSCRIPT_NAME ++HB_OT_NAME_ID_TRADEMARK ++HB_OT_NAME_ID_MANUFACTURER ++HB_OT_NAME_ID_DESIGNER ++HB_OT_NAME_ID_DESCRIPTION ++HB_OT_NAME_ID_VENDOR_URL ++HB_OT_NAME_ID_DESIGNER_URL ++HB_OT_NAME_ID_LICENSE ++HB_OT_NAME_ID_LICENSE_URL ++HB_OT_NAME_ID_TYPOGRAPHIC_FAMILY ++HB_OT_NAME_ID_TYPOGRAPHIC_SUBFAMILY ++HB_OT_NAME_ID_MAC_FULL_NAME ++HB_OT_NAME_ID_SAMPLE_TEXT ++HB_OT_NAME_ID_CID_FINDFONT_NAME ++HB_OT_NAME_ID_WWS_FAMILY ++HB_OT_NAME_ID_WWS_SUBFAMILY ++HB_OT_NAME_ID_LIGHT_BACKGROUND ++HB_OT_NAME_ID_DARK_BACKGROUND ++HB_OT_NAME_ID_VARIATIONS_PS_PREFIX ++hb_ot_name_entry_t ++hb_ot_name_list_names() ++hb_ot_name_get_utf8() ++hb_ot_name_get_utf16() ++hb_ot_name_get_utf32() + + +Overview of changes leading to 2.0.2 +Saturday, October 20, 2018 +==================================== +- Fix two minor memory access issues in AAT tables. + + +Overview of changes leading to 2.0.1 +Friday, October 19, 2018 +==================================== +- Fix hb-version.h reported release version that went wrong (1.8.0) + with previous release. +- Fix extrapolation in 'trak' table. +- Fix hb-font infinite-recursion issue with some font funcs and + subclassed fonts. +- Implement variation-kerning format in kerx table, although without + variation. +- Fix return value of hb_map_is_empty(). + + +Overview of changes leading to 2.0.0 +Thursday, October 18, 2018 +==================================== +- Added AAT shaping support (morx/kerx/trak). + Automatically used if GSUB/GPOS are not available respectively. + Set HB_OPTIONS=aat env var to have morx/kerx preferred over + GSUB/GPOS. +- Apply TrueType kern table internally, instead of relying on + hb_font_t callbacks. +- Khmer shaper significantly rewritten to better match Uniscribe. +- Indic3 tags ('dev3', etc) are passed to USE shaper. +- .dfont Mac font containers implemented. +- Script- and language-mapping revamped to better use BCP 47. +- Misc USE and Indic fixes. +- Misc everything fixes. +- Too many things to list. Biggest release since 0.9.1, with + over 500 commits in just over 5 weeks! Didn't intend it to + be a big release. Just happened to become. +- hb-ft now locks underlying FT_Face during use. + +API changes: + +- Newly-created hb_font_t's now have our internal "hb-ot-font" + callbacks set on them, so they should work out of the box + without any callbacks set. If callbacks are set, everything + is back to what it was before, the fallback callbacks are + null. If you to get the internal implementation modified, + sub_font it. + +- New hb_font_funcs_set_nominal_glyphs_func() allows speeding + up character to glyph mapping. + +New API: ++HB_FEATURE_GLOBAL_START ++HB_FEATURE_GLOBAL_END ++hb_buffer_set_invisible_glyph() ++hb_buffer_get_invisible_glyph() ++hb_font_funcs_set_nominal_glyphs_func() ++hb_ot_layout_table_select_script() ++hb_ot_layout_script_select_language() ++hb_ot_layout_feature_get_name_ids() ++hb_ot_layout_feature_get_characters() ++hb_name_id_t ++HB_NAME_ID_INVALID ++HB_OT_MAX_TAGS_PER_SCRIPT ++hb_ot_tags_from_script_and_language() ++hb_ot_tags_to_script_and_language() + +Deprecated API: +-hb_font_funcs_set_glyph_func() +-hb_unicode_eastasian_width_func_t +-hb_unicode_funcs_set_eastasian_width_func() +-hb_unicode_eastasian_width() +-hb_unicode_decompose_compatibility_func_t +-HB_UNICODE_MAX_DECOMPOSITION_LEN +-hb_unicode_funcs_set_decompose_compatibility_func() +-hb_unicode_decompose_compatibility() +-hb_font_funcs_set_glyph_h_kerning_func() +-hb_font_funcs_set_glyph_v_kerning_func() +-hb_font_get_glyph_h_kerning() +-hb_font_get_glyph_v_kerning() +-hb_font_get_glyph_kerning_for_direction() +-hb_ot_layout_table_choose_script() +-hb_ot_layout_script_find_language() +-hb_ot_tags_from_script() +-hb_ot_tag_from_language() + + +Overview of changes leading to 1.9.0 +Monday, September 10, 2018 +==================================== +- Added 'cmap' API to hb_face_t. +- Face-builder API. +- hb-ot-font re-creation should be much leaner now, as the + font tables it uses are cached on hb_face_t now. +- Internal source header file name changes: + hb-*-private.hh is renamed to hb-*.hh. + +New API: ++HB_UNICODE_MAX ++hb_face_collect_unicodes() ++hb_face_collect_variation_selectors() ++hb_face_collect_variation_unicodes() ++hb_face_builder_create() ++hb_face_builder_add_table() + + +Overview of changes leading to 1.8.8 +Tuesday, August 14, 2018 +==================================== +- Fix hb-icu crash on architectures where compare_exchange_weak() can + fail falsely. This bug was introduced in 1.8.4. + https://bugs.chromium.org/p/chromium/issues/detail?id=873568 +- More internal refactoring of atomic operations and singletons. +- API changes: + The following functions do NOT reference their return value before + returning: + * hb_unicode_funcs_get_default() + * hb_glib_get_unicode_funcs() + * hb_icu_get_unicode_funcs() + This is consistent with their naming ("get", instead of "reference") + as well as how they are used in the wild (ie. no one calls destroy() + on their return value.) + + +Overview of changes leading to 1.8.7 +Wednesday, August 8, 2018 +==================================== +- Fix assertion failure with GDEF-blacklisted fonts. + + +Overview of changes leading to 1.8.6 +Tuesday, August 7, 2018 +==================================== +- Internal code shuffling. +- New API to speed up getting advance widths for implementations + that have heavy overhead in get_h_advance callback: ++hb_font_funcs_set_glyph_h_advances_func ++hb_font_funcs_set_glyph_v_advances_func ++hb_font_get_glyph_advances_for_direction ++hb_font_get_glyph_h_advances ++hb_font_get_glyph_h_advances_func_t ++hb_font_get_glyph_v_advances ++hb_font_get_glyph_v_advances_func_t + + +Overview of changes leading to 1.8.5 +Wednesday, August 1, 2018 +==================================== +- Major Khmer shaper improvements to better match Microsoft. +- Indic bug fixes. +- Internal improvements to atomic operations. + + +Overview of changes leading to 1.8.4 +Tuesday, July 17, 2018 +==================================== +- Fix build on non-C++11. +- Use C++-style GCC atomics and C++11 atomics. + + +Overview of changes leading to 1.8.3 +Wednesday, July 11, 2018 +==================================== +- A couple of Indic / USE bug fixes. +- Disable vectorization, as it was causing unaligned access bus error on + certain 32bit architectures. + + +Overview of changes leading to 1.8.2 +Tuesday, July 3, 2018 +==================================== +- Fix infinite loop in Khmer shaper. +- Improve hb_blob_create_from_file() for streams. + + +Overview of changes leading to 1.8.1 +Tuesday, June 12, 2018 +==================================== +- Fix hb-version.h file generation; last two releases went out with wrong ones. +- Add correctness bug in hb_set_t operations, introduced in 1.7.7. +- Remove HB_SUBSET_BUILTIN build option. Not necessary. + + +Overview of changes leading to 1.8.0 +Tuesday, June 5, 2018 +==================================== +- Update to Unicode 11.0.0. + + +Overview of changes leading to 1.7.7 +Tuesday, June 5, 2018 +==================================== +- Lots of internal changes, but not yet exposed externally. +- All HarfBuzz objects are significantly smaller in size now. +- Sinhala: Position repha on top of post-consonant, not base. + This better matches Windows 10 behavior, which was changed + from previous Windows versions. +- New build options: + o New cpp macro HB_NO_ATEXIT + o New cpp macro HB_SUBSET_BUILTIN +- Significant libharfbuzz-subset changes. API subject to change. +- New API in libharfbuzz: + ++hb_blob_create_from_file() ++hb_face_count() + +A hashmap implementation: ++hb-map.h ++HB_MAP_VALUE_INVALID ++hb_map_t ++hb_map_create() ++hb_map_get_empty() ++hb_map_reference() ++hb_map_destroy() ++hb_map_set_user_data() ++hb_map_get_user_data() ++hb_map_allocation_successful() ++hb_map_clear() ++hb_map_is_empty() ++hb_map_get_population() ++hb_map_set() ++hb_map_get() ++hb_map_del() ++hb_map_has() + + +Overview of changes leading to 1.7.6 +Wednesday, March 7, 2018 +==================================== + +- Fix to hb_set_t binary operations. Ouch. +- New experimental harfbuzz-subset library. All of hb-subset.h + is experimental right now and API WILL change. + +- New API: +hb_blob_copy_writable_or_fail() +HB_OT_TAG_BASE +hb_set_previous() +hb_set_previous_range() + + +Overview of changes leading to 1.7.5 +Tuesday, January 30, 2018 +==================================== + +- Separate Khmer shaper from Indic. +- First stab at AAT morx. Not hooked up. +- Misc bug fixes. + + +Overview of changes leading to 1.7.4 +Wednesday, December 20, 2017 +==================================== + +- Fix collect_glyphs() regression caused by hb_set_t changes. + + +Overview of changes leading to 1.7.3 +Monday, December 18, 2017 +==================================== + +- hb_set_t performance tuning and optimizations. +- Speed up collect_glyphs() and reject garbage data. +- In hb_coretext_font_create() set font point-size (ptem). +- Misc fixes. + + +Overview of changes leading to 1.7.2 +Monday, December 4, 2017 +==================================== + +- Optimize hb_set_add_range(). +- Misc fixes. +- New API: +hb_coretext_font_create() + + +Overview of changes leading to 1.7.1 +Tuesday, November 14, 2017 +==================================== + +- Fix atexit object destruction regression. +- Fix minor integer-overflow. + + +Overview of changes leading to 1.7.0 +Monday, November 13, 2017 +==================================== + +- Minor Indic fixes. +- Implement kerning and glyph names in hb-ot-font. +- Various DSO optimization re .data and .bss sizes. +- Make C++11 optional; build fixes. +- Mark all other backends "unsafe-to-break". +- Graphite fix. + + +Overview of changes leading to 1.6.3 +Thursday, October 26th, 2017 +==================================== + +- Fix hb_set_t some more. Should be solid now. +- Implement get_glyph_name() for hb-ot-font. +- Misc fixes. + + +Overview of changes leading to 1.6.2 +Monday, October 23nd, 2017 +==================================== + +- Yesterday's release had a bad crasher; don't use it. That's what + happens when one works on Sunday... + https://github.com/harfbuzz/harfbuzz/issues/578 +- Build fixes for FreeBSD and Chrome Android. + + +Overview of changes leading to 1.6.1 +Sunday, October 22nd, 2017 +==================================== + +- Don't skip over COMBINING GRAPHEME JOINER when ligating, etc. + To be refined: https://github.com/harfbuzz/harfbuzz/issues/554 +- Faster hb_set_t implementation. +- Don't use deprecated ICU API. +- Fix undefined-behavior in Myanmar shaper, introduced in 1.6.0 +- Deprecated API: + hb_set_invert() + + +Overview of changes leading to 1.6.0 +Friday, October the 13th, 2017 +==================================== + +- Update to Unicode 10. + +- Various Indic and Universal Shaping Engine fixes as a result of + HarfBuzz Hackfest with Jonathan Kew at Web Engines Hackfest at + the Igalia offices in A Coruña, Spain. Thanks Igalia for having + us! + +- Implement Unicode Arabic Mark Ordering Algorithm UTR#53. + +- Implement optical sizing / tracking in CoreText backend, using + new API hb_font_set_ptem(). + +- Allow notifying hb_font_t that underlying FT_Face changed sizing, + using new API hb_ft_font_changed(). + +- More Graphite backend RTL fixes. + +- Fix caching of variable font shaping plans. + +- hb-view / hb-shape now accept following new arguments: + + o --unicodes: takes a list of hex numbers that represent Unicode + codepoints. + +New API: ++hb_face_get_table_tags() ++hb_font_set_ptem() ++hb_font_get_ptem() ++hb_ft_font_changed() + + +Overview of changes leading to 1.5.1 +Tuesday, September 5, 2017 +==================================== + +- Fix "unsafe-to-break" in fallback shaping and other corner cases. + All our tests pass with --verify now, meaning unsafe-to-break API + works as expected. +- Add --unicodes to hb-view / hb-shape. +- [indic] Treat Consonant_With_Stacker as consonant. This will need + further tweaking. +- hb_buffer_diff() tweaks. + + +Overview of changes leading to 1.5.0 +Wednesday, August 23, 2017 +==================================== + +- Misc new API, for appending a buffer to another, and for comparing + contents of two buffers for types of differences. + +- New "unsafe-to-break" API. Can be used to speed up reshaping + in line-breaking situations. Essentially, after shaping, it returns + positions in the input string (some of the cluster boundaries) that + are "safe to break" in that if the text is segmented at that position + and two sides reshaped and concatenated, the shaping result is + exactly the same as shaping the text in one piece. + + hb-view and hb-shape and hb-shape now take --verify, which verifies + the above property. + + Some corner cases of the implementation are still not quite working. + Those will be fixed in subsequent releases. + +- New API: + +hb_buffer_append() + +hb_glyph_flags_t +HB_GLYPH_FLAG_UNSAFE_TO_BREAK +HB_GLYPH_FLAG_DEFINED +hb_glyph_info_get_glyph_flags() + +HB_BUFFER_SERIALIZE_FLAG_GLYPH_FLAGS + +hb_buffer_diff_flags_t +HB_BUFFER_DIFF_FLAG_EQUAL +HB_BUFFER_DIFF_FLAG_CONTENT_TYPE_MISMATCH +HB_BUFFER_DIFF_FLAG_LENGTH_MISMATCH +HB_BUFFER_DIFF_FLAG_NOTDEF_PRESENT +HB_BUFFER_DIFF_FLAG_DOTTED_CIRCLE_PRESENT +HB_BUFFER_DIFF_FLAG_CODEPOINT_MISMATCH +HB_BUFFER_DIFF_FLAG_CLUSTER_MISMATCH +HB_BUFFER_DIFF_FLAG_GLYPH_FLAGS_MISMATCH +HB_BUFFER_DIFF_FLAG_POSITION_MISMATCH +hb_buffer_diff + + +Overview of changes leading to 1.4.8 +Tuesday, August 8, 2017 +==================================== + +- Major fix to avar table handling. +- Rename hb-shape --show-message to --trace. +- Build fixes. + + +Overview of changes leading to 1.4.7 +Tuesday, July 18, 2017 +==================================== + +- Multiple Indic, Tibetan, and Cham fixes. +- CoreText: Allow disabling kerning. +- Adjust Arabic feature order again. +- Misc build fixes. + + +Overview of changes leading to 1.4.6 +Sunday, April 23, 2017 +==================================== + +- Graphite2: Fix RTL positioning issue. +- Backlist GDEF of more versions of Padauk and Tahoma. +- New, experimental, cmake alternative build system. + + +Overview of changes leading to 1.4.5 +Friday, March 10, 2017 +==================================== + +- Revert "Fix Context lookup application when moving back after a glyph..." + This introduced memory access problems. To be fixed properly soon. + + +Overview of changes leading to 1.4.4 +Sunday, March 5, 2017 +==================================== + +- Fix Context lookup application when moving back after a glyph deletion. +- Fix buffer-overrun in Bengali. + + +Overview of changes leading to 1.4.3 +Saturday, February 25, 2017 +==================================== + +- Route Adlam script to Arabic shaper. +- Misc fixes. +- New API: + hb_font_set_face() +- Deprecate API: + hb_graphite2_font_get_gr_font() + + +Overview of changes leading to 1.4.2 +Monday, January 23, 2017 +==================================== + +- Implement OpenType Font Variation tables avar/fvar/HVAR/VVAR. +- hb-shape and hb-view now accept --variations. +- New API: + +hb_variation_t +hb_variation_from_string() +hb_variation_to_string() + +hb_font_set_variations() +hb_font_set_var_coords_design() +hb_font_get_var_coords_normalized() + +hb-ot-var.h: +hb_ot_var_axis_t +hb_ot_var_has_data() +hb_ot_var_get_axis_count() +hb_ot_var_get_axes() +hb_ot_var_find_axis() +hb_ot_var_normalize_variations() +hb_ot_var_normalize_coords() + +- MVAR to be implemented later. Access to named instances to be + implemented later as well. + +- Misc fixes. + + +Overview of changes leading to 1.4.1 +Thursday, January 5, 2017 +==================================== + +- Always build and use UCDN for Unicode data by default. + Reduces dependence on version of Unicode data in glib, + specially in the Windows bundles we are shipping, which + have very old glib. + + +Overview of changes leading to 1.4.0 +Thursday, January 5, 2017 +==================================== + +- Merged "OpenType GX" branch which adds core of support for + OpenType 1.8 Font Variations. To that extent, the relevant + new API is: + +New API: +hb_font_set_var_coords_normalized() + + with supporting API: + +New API: +HB_OT_LAYOUT_NO_VARIATIONS_INDEX +hb_ot_layout_table_find_feature_variations() +hb_ot_layout_feature_with_variations_get_lookups() +hb_shape_plan_create2() +hb_shape_plan_create_cached2() + + Currently variations in GSUB/GPOS/GDEF are fully supported, + and no other tables are supported. In particular, fvar/avar + are NOT supported, hence the hb_font_set_var_coords_normalized() + taking normalized coordinates. API to take design coordinates + will be added in the future. + + HVAR/VVAR/MVAR support will also be added to hb-ot-font in the + future. + +- Fix regression in GDEF glyph class processing. +- Add decompositions for Chakma, Limbu, and Balinese in USE shaper. +- Misc fixes. + + +Overview of changes leading to 1.3.4 +Monday, December 5, 2016 +==================================== + +- Fix vertical glyph origin in hb-ot-font. +- Implement CBDT/CBLC color font glyph extents in hb-ot-font. + + +Overview of changes leading to 1.3.3 +Wednesday, September 28, 2016 +==================================== + +- Implement parsing of OpenType MATH table. +New API: +HB_OT_TAG_MATH +HB_OT_MATH_SCRIPT +hb_ot_math_constant_t +hb_ot_math_kern_t +hb_ot_math_glyph_variant_t +hb_ot_math_glyph_part_flags_t +hb_ot_math_glyph_part_t +hb_ot_math_has_data +hb_ot_math_get_constant +hb_ot_math_get_glyph_italics_correction +hb_ot_math_get_glyph_top_accent_attachment +hb_ot_math_get_glyph_kerning +hb_ot_math_is_glyph_extended_shape +hb_ot_math_get_glyph_variants +hb_ot_math_get_min_connector_overlap +hb_ot_math_get_glyph_assembly + + +Overview of changes leading to 1.3.2 +Wednesday, September 27, 2016 +==================================== + +- Fix build of hb-coretext on older OS X versions. + + +Overview of changes leading to 1.3.1 +Wednesday, September 7, 2016 +==================================== + +- Blacklist bad GDEF of more fonts (Padauk). +- More CoreText backend crash fixes with OS X 10.9.5. +- Misc fixes. + + +Overview of changes leading to 1.3.0 +Thursday, July 21, 2016 +==================================== + +- Update to Unicode 9.0.0 +- Move Javanese from Indic shaper to Universal Shaping Engine. +- Allow MultipleSubst to delete a glyph (matching Windows engine). +- Update Universal Shaping Engine to latest draft from Microsoft. +- DirectWrite backend improvements. Note: this backend is for testing ONLY. +- CoreText backend improvements with unreachable fonts. +- Implement symbol fonts (cmap 3.0.0) in hb-ft and hb-ot-font. +- Blacklist bad GDEF of more fonts (Tahoma & others). +- Misc fixes. + + +Overview of changes leading to 1.2.7 +Monday, May 2, 2016 +==================================== + +- Blacklist another version of Times New Roman (Bold) Italic from Windows 7. +- Fix Mongolian Free Variation Selectors shaping with certain fonts. +- Fix Tibetan shorthand contractions shaping. +- Improved list of language tag mappings. +- Unbreak build on Windows CE. +- Make 'glyf' table loading lazy in hb-ot-font. + + +Overview of changes leading to 1.2.6 +Friday, April 8, 2016 +==================================== + +- Blacklist GDEF table of another set of Times New Roman (Bold) Italic. +- DirectWrite backend improvements. Note: DirectWrite backend is + exclusively for our internal testing and should NOT be used in any + production system whatsoever. + + +Overview of changes leading to 1.2.5 +Monday, April 4, 2016 +==================================== + +- Fix GDEF mark-filtering-set, which was broken in 1.2.3. + + +Overview of changes leading to 1.2.4 +Thursday, March 17, 2016 +==================================== + +- Synthesize GDEF glyph class for any glyph that does not have one in GDEF. + I really hope we don't discover broken fonts that shape badly with this + change. +- Misc build and other minor fixes. +- API changes: + - Added HB_NDEBUG. It's fine for production systems to define this to + disable high-overhead debugging checks. However, I also reduced the + overhead of those checks, so it's a non-issue right now. You can + forget it. Just not defining anything at all is fine. + + +Overview of changes leading to 1.2.3 +Thursday, February 25, 2016 +==================================== + +- Blacklist GDEF table of certain versions of Times New Roman (Bold) Italic, + due to bug in glyph class of ASCII double-quote character. This should + address "regression" introduced in 1.2.0 when we switched mark zeroing + in most shapers from BY_UNICODE_LATE to BY_GDEF_LATE. + This fourth release in a week should finally stablize things... + +- hb-ot-font's get_glyph() implementation saw some optimizations. Though, + might be really hard to measure in real-world situations. + +- Also, two rather small API changes: + +We now disable some time-consuming internal bookkeeping if built with NDEBUG +defined. This is a first time that we use NDEBUG to disable debug code. If +there exist production systems that do NOT want to enable NDEBUG, please let +me know and I'll add HB_NDEBUG. + +Added get_nominal_glyph() and get_variation_glyph() instead of get_glyph() + +New API: +- hb_font_get_nominal_glyph_func_t +- hb_font_get_variation_glyph_func_t +- hb_font_funcs_set_nominal_glyph_func() +- hb_font_funcs_set_variation_glyph_func() +- hb_font_get_nominal_glyph() +- hb_font_get_variation_glyph() + +Deprecated API: +- hb_font_get_glyph_func_t +- hb_font_funcs_set_glyph_func() + +Clients that implement their own font-funcs are encouraged to replace +their get_glyph() implementation with a get_nominal_glyph() and +get_variation_glyph() pair. The variation version can assume that +variation_selector argument is not zero. Old (deprecated) functions +will continue working indefinitely using internal gymnastics; it is +just more efficient to use the new functions. + + +Overview of changes leading to 1.2.2 +Wednesday, February 24, 2016 +==================================== + +- Fix regression with mark positioning with fonts that have + non-zero mark advances. This was introduced in 1.2.0 while + trying to make mark and cursive attachments to work together. + I have partially reverted that, so this version is much more + like what we had before. All clients who updated to 1.2.0 + should update to this version. + + +Overview of changes leading to 1.2.1 +Tuesday, February 23, 2016 +==================================== + +- CoreText: Fix bug with wrong scale if font scale was changed later. + https://github.com/libass/libass/issues/212 +- CoreText: Drastically speed up font initialization. +- CoreText: Fix tiny leak. +- Group ZWJ/ZWNJ with previous syllable under cluster-level=0. + https://github.com/harfbuzz/harfbuzz/issues/217 +- Add test/shaping/README.md about how to add tests to the suite. + + +Overview of changes leading to 1.2.0 +Friday, February 19, 2016 +==================================== + +- Fix various issues (hangs mostly) in case of memory allocation failure. +- Change mark zeroing types of most shapers from BY_UNICODE_LATE to + BY_GDEF_LATE. This seems to be what Uniscribe does. +- Change mark zeroing of USE shaper from NONE to BY_GDEF_EARLY. That's + what Windows does. +- Allow GPOS cursive connection on marks, and fix the interaction with + mark attachment. This work resulted in some changes to how mark + attachments work. See: + https://github.com/harfbuzz/harfbuzz/issues/211 + https://github.com/harfbuzz/harfbuzz/commit/86c68c7a2c971efe8e35b1f1bd99401dc8b688d2 +- Graphite2 shaper: improved negative advance handling (eg. Nastaliq). +- Add nmake-based build system for Windows. +- Minor speedup. +- Misc. improvements. + + +Overview of changes leading to 1.1.3 +Monday, January 11, 2016 +==================================== + +- Ported Indic shaper to Unicode 8.0 data. +- Universal Shaping Engine fixes. +- Speed up CoreText shaper when font fallback happens in CoreText. +- Documentation improvements, thanks to Khaled Hosny. +- Very rough directwrite shaper for testing, thanks to Ebrahim Byagowi. +- Misc bug fixes. +- New API: + + * Font extents: + hb_font_extents_t + hb_font_get_font_extents_func_t + hb_font_get_font_h_extents_func_t + hb_font_get_font_v_extents_func_t + hb_font_funcs_set_font_h_extents_func + hb_font_funcs_set_font_v_extents_func + hb_font_get_h_extents + hb_font_get_v_extents + hb_font_get_extents_for_direction + + * Buffer message (aka debug): + hb_buffer_message_func_t + hb_buffer_set_message_func() + Actual message protocol to be fleshed out later. + + +Overview of changes leading to 1.1.2 +Wednesday, November 26, 2015 +==================================== + +- Fix badly-broken fallback shaper that affected terminology. + https://github.com/harfbuzz/harfbuzz/issues/187 +- Fix y_scaling in Graphite shaper. +- API changes: + * An unset glyph_h_origin() function in font-funcs now (sensibly) + implies horizontal origin at 0,0. Ie, the nil callback returns + true instead of false. As such, implementations that have a + glyph_h_origin() that simply returns true, can remove that function + with HarfBuzz >= 1.1.2. This results in a tiny speedup. + + +Overview of changes leading to 1.1.1 +Wednesday, November 24, 2015 +==================================== + +- Build fixes, specially for hb-coretext. + + +Overview of changes leading to 1.1.0 +Wednesday, November 18, 2015 +==================================== + +- Implement 'stch' stretch feature for Syriac Abbreviation Mark. + https://github.com/harfbuzz/harfbuzz/issues/141 +- Disable use of decompose_compatibility() callback. +- Implement "shaping" of various Unicode space characters, even + if the font does not support them. + https://github.com/harfbuzz/harfbuzz/issues/153 +- If font does not support U+2011 NO-BREAK HYPHEN, fallback to + U+2010 HYPHEN. +- Changes resulting from libFuzzer continuous fuzzing: + * Reject font tables that need more than 8 edits, + * Bound buffer growth during shaping to 32x, + * Fix assertions and other issues at OOM / buffer max-growth. +- Misc fixes and optimizations. +- API changes: + * All fonts created with hb_font_create() now inherit from + (ie. have parent) hb_font_get_empty(). + + +Overview of changes leading to 1.0.6 +Thursday, October 15, 2015 +==================================== + +- Reduce max nesting level in OT lookups from 8 to 6. + Should not affect any real font as far as I know. +- Fix memory access issue in ot-font. +- Revert default load-flags of fonts created using hb_ft_font_create() + back to FT_LOAD_DEFAULT|FT_LOAD_NO_HINTING. This was changed in + last release (1.0.5), but caused major issues, so revert. + https://github.com/harfbuzz/harfbuzz/issues/143 + + +Overview of changes leading to 1.0.5 +Tuesday, October 13, 2015 +==================================== + +- Fix multiple memory access bugs discovered using libFuzzer. + https://github.com/harfbuzz/harfbuzz/issues/139 + Everyone should upgrade to this version as soon as possible. + We now have continuous fuzzing set up, to avoid issues like + these creeping in again. +- Misc fixes. + +- New API: + * hb_font_set_parent(). + * hb_ft_font_[sg]et_load_flags() + The default flags for fonts created using hb_ft_font_create() + has changed to default to FT_LOAD_DEFAULT now. Previously it + was defaulting to FT_LOAD_DFEAULT|FT_LOAD_NO_HINTING. + +- API changes: + * Fonts now default to units-per-EM as their scale, instead of 0. + * hb_font_create_sub_font() does NOT make parent font immutable + anymore. hb_font_make_immutable() does. + + +Overview of changes leading to 1.0.4 +Wednesday, September 30, 2015 +==================================== + +- Fix minor out-of-bounds read error. + + +Overview of changes leading to 1.0.3 +Tuesday, September 1, 2015 +==================================== + +- Start of user documentation, from Simon Cozens! +- Implement glyph_extents() for TrueType fonts in hb-ot-font. +- Improve GPOS cursive attachments with conflicting lookups. +- More fixes for cluster-level = 1. +- Uniscribe positioning fix. + + +Overview of changes leading to 1.0.2 +Wednesday, August 19, 2015 +==================================== + +- Fix shaping with cluster-level > 0. +- Fix Uniscribe backend font-size scaling. +- Declare dependencies in harfbuzz.pc. + FreeType is not declared though, to avoid bugs in pkg-config + 0.26 with recursive dependencies. +- Slightly improved debug infrastructure. More to come later. +- Misc build fixes. + + +Overview of changes leading to 1.0.1 +Monday, July 27, 2015 +==================================== + +- Fix out-of-bounds access in USE shaper. + + +Overview of changes leading to 1.0.0 +Sunday, July 26, 2015 +==================================== + +- Implement Universal Shaping Engine: + https://www.microsoft.com/typography/OpenTypeDev/USE/intro.htm + http://blogs.windows.com/bloggingwindows/2015/02/23/windows-shapes-the-worlds-languages/ +- Bump version to 1.0.0. The soname was NOT bumped. + + +Overview of changes leading to 0.9.42 +Thursday, July 26, 2015 +===================================== + +- New API to allow for retrieving finer-grained cluster + mappings if the client desires to handle them. Default + behavior is unchanged. +- Fix cluster merging when removing default-ignorables. +- Update to Unicode 8.0 +- hb-graphite2 fixes. +- Misc fixes. +- Removed HB_NO_MERGE_CLUSTERS hack. +- New API: + hb_buffer_cluster_level_t enum + hb_buffer_get_cluster_level() + hb_buffer_set_cluster_level() + hb-shape / hb-view --cluster-level + + +Overview of changes leading to 0.9.41 +Thursday, June 18, 2015 +===================================== + +- Fix hb-coretext with trailing whitespace in right-to-left. +- New API: hb_buffer_reverse_range(). +- Allow implementing atomic ops in config.h. +- Fix hb_language_t in language bindings. +- Misc fixes. + + +Overview of changes leading to 0.9.40 +Friday, March 20, 2015 +===================================== + +- Another hb-coretext crasher fix. Ouch! +- Happy Norouz! + + +Overview of changes leading to 0.9.39 +Wednesday, March 4, 2015 +===================================== + +- Critical hb-coretext fixes. +- Optimizations and refactoring; no functional change + expected. +- Misc build fixes. + + +Overview of changes leading to 0.9.38 +Friday, January 23, 2015 +===================================== + +- Fix minor out-of-bounds access in Indic shaper. +- Change New Tai Lue shaping engine from South-East Asian to default, + reflecting change in Unicode encoding model. +- Add hb-shape --font-size. Can take up to two numbers for separate + x / y size. +- Fix CoreText and FreeType scale issues with negative scales. +- Reject blobs larger than 2GB. This might break some icu-le-hb clients + that need security fixes. See: + http://www.icu-project.org/trac/ticket/11450 +- Avoid accessing font tables during face destruction, in casce rogue + clients released face data already. +- Fix up gobject-introspection a bit. Python bindings kinda working. + See README.python. +- Misc fixes. +- API additions: + hb_ft_face_create_referenced() + hb_ft_font_create_referenced() + + +Overview of changes leading to 0.9.37 +Wednesday, December 17, 2014 +===================================== + +- Fix out-of-bounds access in Context lookup format 3. +- Indic: Allow ZWJ/ZWNJ before syllable modifiers. + + +Overview of changes leading to 0.9.36 +Thursday, November 20, 2014 +===================================== + +- First time that three months went by without a release since + 0.9.2 was released on August 10, 2012! +- Fix performance bug in hb_ot_collect_glyphs(): + https://bugzilla.mozilla.org/show_bug.cgi?id=1090869 +- Add basic vertical-text support to hb-ot-font. +- Misc build fixes. + + +Overview of changes leading to 0.9.35 +Saturday, August 13, 2014 +===================================== + +- Fix major shape-plan caching bug when more than one shaper were + provided to hb_shape_full() (as exercised by XeTeX). + http://www.mail-archive.com/debian-bugs-dist@lists.debian.org/msg1246370.html +- Fix Arabic fallback shaping regression. This was broken in 0.9.32. +- Major hb-coretext fixes. That backend is complete now, including + respecing buffer direction and language, down to vertical writing. +- Build fixes for Windows CE. Should build fine now. +- Misc fixes: + Use atexit() only if it's safe to call from shared library + https://bugs.freedesktop.org/show_bug.cgi?id=82246 + Mandaic had errors in its Unicode Joining_Type + https://bugs.freedesktop.org/show_bug.cgi?id=82306 +- API changes: + + * hb_buffer_clear_contents() does not reset buffer flags now. + + After 763e5466c0a03a7c27020e1e2598e488612529a7, one doesn't + need to set flags for different pieces of text. The flags now + are something the client sets up once, depending on how it + actually uses the buffer. As such, don't clear it in + clear_contents(). + + I don't expect any changes to be needed to any existing client. + + +Overview of changes leading to 0.9.34 +Saturday, August 2, 2014 +===================================== + +- hb_feature_from_string() now accepts CSS font-feature-settings format. +- As a result, hb-shape / hb-view --features also accept CSS-style strings. + Eg, "'liga' off" is accepted now. +- Add old-spec Myanmar shaper: + https://bugs.freedesktop.org/show_bug.cgi?id=81775 +- Don't apply 'calt' in Hangul shaper. +- Fix mark advance zeroing for Hebrew shaper: + https://bugs.freedesktop.org/show_bug.cgi?id=76767 +- Implement Windows-1256 custom Arabic shaping. Only built on Windows, + and requires help from get_glyph(). Used by Firefox. + https://bugzilla.mozilla.org/show_bug.cgi?id=1045139 +- Disable 'liga' in vertical text. +- Build fixes. +- API changes: + + * Make HB_BUFFER_FLAG_BOT/EOT easier to use. + + Previously, we expected users to provide BOT/EOT flags when the + text *segment* was at paragraph boundaries. This meant that for + clients that provide full paragraph to HarfBuzz (eg. Pango), they + had code like this: + + hb_buffer_set_flags (hb_buffer, + (item_offset == 0 ? HB_BUFFER_FLAG_BOT : 0) | + (item_offset + item_length == paragraph_length ? + HB_BUFFER_FLAG_EOT : 0)); + + hb_buffer_add_utf8 (hb_buffer, + paragraph_text, paragraph_length, + item_offset, item_length); + + After this change such clients can simply say: + + hb_buffer_set_flags (hb_buffer, + HB_BUFFER_FLAG_BOT | HB_BUFFER_FLAG_EOT); + + hb_buffer_add_utf8 (hb_buffer, + paragraph_text, paragraph_length, + item_offset, item_length); + + Ie, HarfBuzz itself checks whether the segment is at the beginning/end + of the paragraph. Clients that only pass item-at-a-time to HarfBuzz + continue not setting any flags whatsoever. + + Another way to put it is: if there's pre-context text in the buffer, + HarfBuzz ignores the BOT flag. If there's post-context, it ignores + EOT flag. + + +Overview of changes leading to 0.9.33 +Tuesday, July 22, 2014 +===================================== + +- Turn off ARabic 'cswh' feature that was accidentally turned on. +- Add HB_TAG_MAX_SIGNED. +- Make hb_face_make_immutable() really make face immutable! +- Windows build fixes. + + +Overview of changes leading to 0.9.32 +Thursday, July 17, 2014 +===================================== + +- Apply Arabic shaping features in spec order exactly. +- Another fix for Mongolian free variation selectors. +- For non-Arabic scripts in Arabic shaper apply 'rlig' and 'calt' + together. +- Minor adjustment to U+FFFD logic. +- Fix hb-coretext build. + + +Overview of changes leading to 0.9.31 +Wednesday, July 16, 2014 +===================================== + +- Only accept valid UTF-8/16/32; we missed many cases before. +- Better shaping of invalid UTF-8/16/32. Falls back to + U+FFFD REPLACEMENT CHARACTER now. +- With all changes in this release, the buffer will contain fully + valid Unicode after hb_buffer_add_utf8/16/32 no matter how + broken the input is. This can be overridden though. See below. +- Fix Mongolian Variation Selectors for fonts without GDEF. +- Fix minor invalid buffer access. +- Accept zh-Hant and zh-Hans language tags. hb_ot_tag_to_language() + now uses these instead of private tags. +- Build fixes. +- New API: + * hb_buffer_add_codepoints(). This does what hb_buffer_add_utf32() + used to do, ie. no validity check on the input at all. add_utf32 + now replaces invalid Unicode codepoints with the replacement + character (see below). + * hb_buffer_set_replacement_codepoint() + * hb_buffer_get_replacement_codepoint() + Previously, in hb_buffer_add_utf8 and hb_buffer_add_utf16, when + we detected broken input, we replaced that with (hb_codepoint_t)-1. + This has changed to use U+FFFD now, but can be changed using these + new API. + + +Overview of changes leading to 0.9.30 +Wednesday, July 9, 2014 +===================================== + +- Update to Unicode 7.0.0: + * New scripts Manichaean and Psalter Pahlavi are shaped using + Arabic shaper. + * All the other new scripts to through the generic shaper for + now. +- Minor Indic improvements. +- Fix graphite2 backend cluster mapping [crasher!] +- API changes: + * New HB_SCRIPT_* values for Unicode 7.0 scripts. + * New function hb_ot_layout_language_get_required_feature(). +- Build fixes. + + +Overview of changes leading to 0.9.29 +Thursday, May 29, 2014 +===================================== + +- Implement cmap in hb-ot-font.h. No variation-selectors yet. +- Myanmar: Allow MedialYa+Asat. +- Various Indic fixes: + * Support most characters in Extended Devanagary and Vedic + Unicode blocks. + * Allow digits and a some punctuation as consonant placeholders. +- Build fixes. + + +Overview of changes leading to 0.9.28 +Monday, April 28, 2014 +===================================== + +- Unbreak old-spec Indic shaping. (bug 76705) +- Fix shaping of U+17DD and U+0FC6. +- Add HB_NO_MERGE_CLUSTERS build option. NOT to be enabled by default + for shipping libraries. It's an option for further experimentation + right now. When we are sure how to do it properly, we will add + public run-time API for the functionality. +- Build fixes. + + +Overview of changes leading to 0.9.27 +Tuesday, March 18, 2014 +===================================== + +- Don't use "register" storage class specifier +- Wrap definition of free_langs() with HAVE_ATEXIT +- Add coretext_aat shaper and hb_coretext_face_create() constructor +- If HAVE_ICU_BUILTIN is defined, use hb-icu Unicode callbacks +- Add Myanmar test case from OpenType Myanmar spec +- Only do fallback Hebrew composition if no GPOS 'mark' available +- Allow bootstrapping without gtk-doc +- Use AM_MISSING_PROG for ragel and git +- Typo in ucdn's Makefile.am +- Improve MemoryBarrier() implementation + + +Overview of changes leading to 0.9.26 +Thursday, January 30, 2014 +===================================== + +- Misc fixes. +- Fix application of 'rtlm' feature. +- Automatically apply frac/numr/dnom around U+2044 FRACTION SLASH. +- New header: hb-ot-shape.h +- Uniscribe: fix scratch-buffer accounting. +- Reorder Tai Tham SAKOT to after tone-marks. +- Add Hangul shaper. +- New files: + hb-ot-shape-complex-hangul.cc + hb-ot-shape-complex-hebrew.cc + hb-ot-shape-complex-tibetan.cc +- Disable 'cswh' feature in Arabic shaper. +- Coretext: better handle surrogate pairs. +- Add HB_TAG_MAX and _HB_SCRIPT_MAX_VALUE. + + +Overview of changes leading to 0.9.25 +Wednesday, December 4, 2013 +===================================== + +- Myanmar shaper improvements. +- Avoid font fallback in CoreText backend. +- Additional OpenType language tag mappiongs. +- More aggressive shape-plan caching. +- Build with / require automake 1.13. +- Build with libtool 2.4.2.418 alpha to support ppc64le. + + +Overview of changes leading to 0.9.24 +Tuesday, November 13, 2013 +===================================== + +- Misc compiler warning fixes with clang. +- No functional changes. + + +Overview of changes leading to 0.9.23 +Monday, October 28, 2013 +===================================== + +- "Udupi HarfBuzz Hackfest", Paris, October 14..18 2013. +- Fix (Chain)Context recursion with non-monotone lookup positions. +- Misc Indic bug fixes. +- New Javanese / Buginese shaping, similar to Windows 8.1. + + +Overview of changes leading to 0.9.22 +Thursday, October 3, 2013 +===================================== + +- Fix use-after-end-of-scope in hb_language_from_string(). +- Fix hiding of default_ignorables if font doesn't have space glyph. +- Protect against out-of-range lookup indices. + +- API Changes: + + * Added hb_ot_layout_table_get_lookup_count() + + +Overview of changes leading to 0.9.21 +Monday, September 16, 2013 +===================================== + +- Rename gobject-introspection library name from harfbuzz to HarfBuzz. +- Remove (long disabled) hb-old and hb-icu-le test shapers. +- Misc gtk-doc and gobject-introspection annotations. +- Misc fixes. +- API changes: + + * Add HB_SET_VALUE_INVALID + +Overview of changes leading to 0.9.20 +Thursday, August 29, 2013 +===================================== + +General: +- Misc substitute_closure() fixes. +- Build fixes. + +Documentation: +- gtk-doc boilerplate integrated. Docs are built now, but + contain no contents. By next release hopefully we have + some content in. Enable using --enable-gtk-doc. + +GObject and Introspection: +- Added harfbuzz-gobject library (hb-gobject.h) that has type + bindings for all HarfBuzz objects and enums. Enable using + --with-gobject. +- Added gobject-introspection boilerplate. Nothing useful + right now. Work in progress. Gets enabled automatically if + --with-gobject is used. Override with --disable-introspection. + +OpenType shaper: +- Apply 'mark' in Myanmar shaper. +- Don't apply 'dlig' by default. + +Uniscribe shaper: +- Support user features. +- Fix loading of fonts that are also installed on the system. +- Fix shaping of Arabic Presentation Forms. +- Fix build with wide chars. + +CoreText shaper: +- Support user features. + +Source changes: +- hb_face_t code moved to hb-face.h / hb-face.cc. +- Added hb-deprecated.h. + +API changes: +- Added HB_DISABLE_DEPRECATED. +- Deprecated HB_SCRIPT_CANADIAN_ABORIGINAL; replaced by + HB_SCRIPT_CANADIAN_SYLLABICS. +- Deprecated HB_BUFFER_FLAGS_DEFAULT; replaced by + HB_BUFFER_FLAG_DEFAULT. +- Deprecated HB_BUFFER_SERIALIZE_FLAGS_DEFAULT; replaced by + HB_BUFFER_SERIALIZE_FLAG_DEFAULT. + + +Overview of changes leading to 0.9.19 +Tuesday, July 16, 2013 +===================================== + +- Build fixes. +- Better handling of multiple variation selectors in a row. +- Pass on variation selector to GSUB if not consumed by cmap. +- Fix undefined memory access. +- Add Javanese config to Indic shaper. +- Misc bug fixes. + +Overview of changes leading to 0.9.18 +Tuesday, May 28, 2013 +===================================== + +New build system: + +- All unneeded code is all disabled by default, + +- Uniscribe and CoreText shapers can be enabled with their --with options, + +- icu_le and old shapers cannot be enabled for now, + +- glib, freetype, and cairo will be detected automatically. + They can be force on/off'ed with their --with options, + +- icu and graphite2 are default off, can be enabled with their --with + options, + +Moreover, ICU support is now build into a separate library: +libharfbuzz-icu.so, and a new harfbuzz-icu.pc is shipped for it. +Distros can enable ICU now without every application on earth +getting linked to via libharfbuzz.so. + +For distros I recommend that they make sure they are building --with-glib +--with-freetype --with-cairo, --with-icu, and optionally --with-graphite2; +And package harfbuzz and harfbuzz-icu separately. + + +Overview of changes leading to 0.9.17 +Monday, May 20, 2013 +===================================== + +- Build fixes. +- Fix bug in hb_set_get_min(). +- Fix regression with Arabic mark positioning / width-zeroing. + +Overview of changes leading to 0.9.16 +Friday, April 19, 2013 +===================================== + +- Major speedup in OpenType lookup processing. With the Amiri + Arabic font, this release is over 3x faster than previous + release. All scripts / languages should see this speedup. + +- New --num-iterations option for hb-shape / hb-view; useful for + profiling. + +Overview of changes leading to 0.9.15 +Friday, April 05, 2013 +===================================== + +- Build fixes. +- Fix crasher in graphite2 shaper. +- Fix Arabic mark width zeroing regression. +- Don't compose Hangul jamo into Unicode syllables. + + +Overview of changes leading to 0.9.14 +Thursday, March 21, 2013 +===================================== + +- Build fixes. +- Fix time-consuming sanitize with malicious fonts. +- Implement hb_buffer_deserialize_glyphs() for both json and text. +- Do not ignore Hangul filler characters. +- Indic fixes: + * Fix Malayalam pre-base reordering interaction with post-forms. + * Further adjust ZWJ handling. Should fix known regressions from + 0.9.13. + + +Overview of changes leading to 0.9.13 +Thursday, February 25, 2013 +===================================== + +- Build fixes. +- Ngapi HarfBuzz Hackfest in London (February 2013): + * Fixed all known Indic bugs, + * New Win8-style Myanmar shaper, + * New South-East Asian shaper for Tai Tham, Cham, and New Tai Lue, + * Smartly ignore Default_Ignorable characters (joiners, etc) wheb + matching GSUB/GPOS lookups, + * Fix 'Phags-Pa U+A872 shaping, + * Fix partial disabling of default-on features, + * Allow disabling of TrueType kerning. +- Fix possible crasher with broken fonts with overlapping tables. +- Removed generated files from git again. So, one needs ragel to + bootstrap from the git tree. + +API changes: +- hb_shape() and related APIs now abort if buffer direction is + HB_DIRECTION_INVALID. Previously, hb_shape() was calling + hb_buffer_guess_segment_properties() on the buffer before + shaping. The heuristics in that function are fragile. If the + user really wants the old behvaior, they can call that function + right before calling hb_shape() to get the old behavior. +- hb_blob_create_sub_blob() always creates sub-blob with + HB_MEMORY_MODE_READONLY. See comments for the reason. + + +Overview of changes leading to 0.9.12 +Thursday, January 18, 2013 +===================================== + +- Build fixes for Sun compiler. +- Minor bug fix. + +Overview of changes leading to 0.9.11 +Thursday, January 10, 2013 +===================================== + +- Build fixes. +- Fix GPOS mark attachment with null Anchor offsets. +- [Indic] Fix old-spec reordering of viramas if sequence ends in one. +- Fix multi-threaded shaper data creation crash. +- Add atomic ops for Solaris. + +API changes: +- Rename hb_buffer_clear() to hb_buffer_clear_contents(). + + +Overview of changes leading to 0.9.10 +Thursday, January 3, 2013 +===================================== + +- [Indic] Fixed rendering of Malayalam dot-reph +- Updated OT language tags. +- Updated graphite2 backend. +- Improved hb_ot_layout_get_size_params() logic. +- Improve hb-shape/hb-view help output. +- Fixed hb-set.h implementation to not crash. +- Fixed various issues with hb_ot_layout_collect_lookups(). +- Various build fixes. + +New API: + +hb_graphite2_face_get_gr_face() +hb_graphite2_font_get_gr_font() +hb_coretext_face_get_cg_font() + +Modified API: + +hb_ot_layout_get_size_params() + + +Overview of changes leading to 0.9.9 +Wednesday, December 5, 2012 +==================================== + +- Fix build on Windows. +- Minor improvements. + + +Overview of changes leading to 0.9.8 +Tuesday, December 4, 2012 +==================================== + + +- Actually implement hb_shape_plan_get_shaper (). +- Make UCDB data tables const. +- Lots of internal refactoring in OTLayout tables. +- Flesh out hb_ot_layout_lookup_collect_glyphs(). + +New API: + +hb_ot_layout_collect_lookups() +hb_ot_layout_get_size_params() + + +Overview of changes leading to 0.9.7 +Sunday, November 21, 2012 +==================================== + + +HarfBuzz "All-You-Can-Eat-Sushi" (aka Vancouver) Hackfest and follow-on fixes. + +- Fix Arabic contextual joining using pre-context text. +- Fix Sinhala "split matra" mess. +- Fix Khmer shaping with broken fonts. +- Implement Thai "PUA" shaping for old fonts. +- Do NOT route Kharoshthi script through the Indic shaper. +- Disable fallback positioning for Indic and Thai shapers. +- Misc fixes. + + +hb-shape / hb-view changes: + +- Add --text-before and --text-after +- Add --bot / --eot / --preserve-default-ignorables +- hb-shape --output-format=json + + +New API: + +hb_buffer_clear() + +hb_buffer_flags_t + +HB_BUFFER_FLAGS_DEFAULT +HB_BUFFER_FLAG_BOT +HB_BUFFER_FLAG_EOT +HB_BUFFER_FLAG_PRESERVE_DEFAULT_IGNORABLES + +hb_buffer_set_flags() +hb_buffer_get_flags() + +HB_BUFFER_SERIALIZE_FLAGS +hb_buffer_serialize_glyphs() +hb_buffer_deserialize_glyphs() +hb_buffer_serialize_list_formats() + +hb_set_add_range() +hb_set_del_range() +hb_set_get_population() +hb_set_next_range() + +hb_face_[sg]et_glyph_count() + +hb_segment_properties_t +HB_SEGMENT_PROPERTIES_DEFAULT +hb_segment_properties_equal() +hb_segment_properties_hash() + +hb_buffer_set_segment_properties() +hb_buffer_get_segment_properties() + +hb_ot_layout_glyph_class_t +hb_ot_layout_get_glyph_class() +hb_ot_layout_get_glyphs_in_class() + +hb_shape_plan_t +hb_shape_plan_create() +hb_shape_plan_create_cached() +hb_shape_plan_get_empty() +hb_shape_plan_reference() +hb_shape_plan_destroy() +hb_shape_plan_set_user_data() +hb_shape_plan_get_user_data() +hb_shape_plan_execute() +hb_shape_plan_get_shaper() + +hb_ot_shape_plan_collect_lookups() + + +API changes: + +- Remove "mask" parameter from hb_buffer_add(). +- Rename hb_ot_layout_would_substitute_lookup() and hb_ot_layout_substitute_closure_lookup(). +- hb-set.h API const correction. +- Renamed hb_set_min/max() to hb_set_get_min/max(). +- Rename hb_ot_layout_feature_get_lookup_indexes() to hb_ot_layout_feature_get_lookups(). +- Rename hb_buffer_guess_properties() to hb_buffer_guess_segment_properties(). + + + +Overview of changes leading to 0.9.6 +Sunday, November 13, 2012 +==================================== + +- Don't clear pre-context text if no new context is provided. +- Fix ReverseChainingSubstLookup, which was totally borked. +- Adjust output format of hb-shape a bit. +- Include config.h.in in-tree. Makes it easier for alternate build systems. +- Fix hb_buffer_set_length(buffer, 0) invalid memory allocation. +- Use ICU LayoutEngine's C API instead of C++. Avoids much headache. +- Drop glyphs for all of Unicode Default_Ignorable characters. +- Misc build fixes. + +Arabic shaper: +- Enable 'dlig' and 'mset' features in Arabic shaper. +- Implement 'Phags-pa shaping, improve Mongolian. + +Indic shaper: +- Decompose Sinhala split matras the way old HarfBuzz / Pango did. +- Initial support for Consonant Medials. +- Start adding new-style Myanmar shaping. +- Make reph and 'pref' logic introspect the font. +- Route Meetei-Mayek through the Indic shaper. +- Don't apply 'liga' in Indic shaper. +- Improve Malayalam pre-base reordering Ra interaction with Chillus. + + + +Overview of changes leading to 0.9.5 +Sunday, October 14, 2012 +==================================== + +- Synthetic-GSUB Arabic fallback shaping. + +- Misc Indic improvements. + +- Add build system support for pthread. + +- Imported UCDN for in-tree Unicode callbacks implementation. + +- Context-aware Arabic joining. + +- Misc other fixes. + +- New API: + + hb_feature_to/from-string() + hb_buffer_[sg]et_content_type() + + + +Overview of changes leading to 0.9.4 +Tuesday, Sep 03, 2012 +==================================== + +- Indic improvements with old-spec Malayalam. + +- Better fallback glyph positioning, specially with Thai / Lao marks. + +- Implement dotted-circle insertion. + +- Better Arabic fallback shaping / ligation. + +- Added ICU LayoutEngine backend for testing. Call it by the 'icu_le' name. + +- Misc fixes. + + + +Overview of changes leading to 0.9.3 +Friday, Aug 18, 2012 +==================================== + +- Fixed fallback mark positioning for left-to-right text. + +- Improve mark positioning for the remaining combining classes. + +- Unbreak Thai and fallback Arabic shaping. + +- Port Arabic shaper to shape-plan caching. + +- Use new ICU normalizer functions. + + + +Overview of changes leading to 0.9.2 +Friday, Aug 10, 2012 +==================================== + +- Over a thousand commits! This is the first major release of HarfBuzz. + +- HarfBuzz is feature-complete now! It should be in par, or better, than + both Pango's shapers and old HarfBuzz / Qt shapers. + +- New Indic shaper, supporting main Indic scripts, Sinhala, and Khmer. + +- Improved Arabic shaper, with fallback Arabic shaping, supporting Arabic, + Sinhala, N'ko, Mongolian, and Mandaic. + +- New Thai / Lao shaper. + +- Tibetan / Hangul support in the generic shaper. + +- Synthetic GDEF support for fonts without a GDEF table. + +- Fallback mark positioning for fonts without a GPOS table. + +- Unicode normalization shaping heuristic during glyph mapping. + +- New experimental Graphite2 backend. + +- New Uniscribe backend (primarily for testing). + +- New CoreText backend (primarily for testing). + +- Major optimization and speedup. + +- Test suites and testing infrastructure (work in progress). + +- Greatly improved hb-view cmdline tool. + +- hb-shape cmdline tool. + +- Unicode 6.1 support. + +Summary of API changes: + +o Changed API: + + - Users are expected to only include main header files now (ie. hb.h, + hb-glib.h, hb-ft.h, ...) + + - All struct tag names had their initial underscore removed. + Ie. "struct _hb_buffer_t" is "struct hb_buffer_t" now. + + - All set_user_data() functions now take a "replace" boolean parameter. + + - hb_buffer_create() takes zero arguments now. + Use hb_buffer_pre_allocate() to pre-allocate. + + - hb_buffer_add_utf*() now accept -1 for length parameteres, + meaning "nul-terminated". + + - hb_direction_t enum values changed. + + - All *_from_string() APIs now take a length parameter to allow for + non-nul-terminated strings. A -1 length means "nul-terminated". + + - Typedef for hb_language_t changed. + + - hb_get_table_func_t renamed to hb_reference_table_func_t. + + - hb_ot_layout_table_choose_script() + + - Various renames in hb-unicode.h. + +o New API: + + - hb_buffer_guess_properties() + Automatically called by hb_shape(). + + - hb_buffer_normalize_glyphs() + + - hb_tag_from_string() + + - hb-coretext.h + + - hb-uniscribe.h + + - hb_face_reference_blob() + - hb_face_[sg]et_index() + - hb_face_set_upem() + + - hb_font_get_glyph_name_func_t + hb_font_get_glyph_from_name_func_t + hb_font_funcs_set_glyph_name_func() + hb_font_funcs_set_glyph_from_name_func() + hb_font_get_glyph_name() + hb_font_get_glyph_from_name() + hb_font_glyph_to_string() + hb_font_glyph_from_string() + + - hb_font_set_funcs_data() + + - hb_ft_font_set_funcs() + - hb_ft_font_get_face() + + - hb-gobject.h (work in progress) + + - hb_ot_shape_glyphs_closure() + hb_ot_layout_substitute_closure_lookup() + + - hb-set.h + + - hb_shape_full() + + - hb_unicode_combining_class_t + + - hb_unicode_compose_func_t + hb_unicode_decompose_func_t + hb_unicode_decompose_compatibility_func_t + hb_unicode_funcs_set_compose_func() + hb_unicode_funcs_set_decompose_func() + hb_unicode_funcs_set_decompose_compatibility_func() + hb_unicode_compose() + hb_unicode_decompose() + hb_unicode_decompose_compatibility() + +o Removed API: + + - hb_ft_get_font_funcs() + + - hb_ot_layout_substitute_start() + hb_ot_layout_substitute_lookup() + hb_ot_layout_substitute_finish() + hb_ot_layout_position_start() + hb_ot_layout_position_lookup() + hb_ot_layout_position_finish() + + + +Overview of changes leading to 0.6.0 +Friday, May 27, 2011 +==================================== + +- Vertical text support in GPOS +- Almost all API entries have unit tests now, under test/ +- All thread-safety issues are fixed + +Summary of API changes follows. + + +* Simple Types API: + + o New API: + HB_LANGUAGE_INVALID + hb_language_get_default() + hb_direction_to_string() + hb_direction_from_string() + hb_script_get_horizontal_direction() + HB_UNTAG() + + o Renamed API: + hb_category_t renamed to hb_unicode_general_category_t + + o Changed API: + hb_language_t is a typed pointers now + + o Removed API: + HB_TAG_STR() + + +* Use ISO 15924 tags for hb_script_t: + + o New API: + hb_script_from_iso15924_tag() + hb_script_to_iso15924_tag() + hb_script_from_string() + + o Changed API: + HB_SCRIPT_* enum members changed value. + + +* Buffer API streamlined: + + o New API: + hb_buffer_reset() + hb_buffer_set_length() + hb_buffer_allocation_successful() + + o Renamed API: + hb_buffer_ensure() renamed to hb_buffer_pre_allocate() + hb_buffer_add_glyph() renamed to hb_buffer_add() + + o Removed API: + hb_buffer_clear() + hb_buffer_clear_positions() + + o Changed API: + hb_buffer_get_glyph_infos() takes an out length parameter now + hb_buffer_get_glyph_positions() takes an out length parameter now + + +* Blob API streamlined: + + o New API: + hb_blob_get_data() + hb_blob_get_data_writable() + + o Renamed API: + hb_blob_create_empty() renamed to hb_blob_get_empty() + + o Removed API: + hb_blob_lock() + hb_blob_unlock() + hb_blob_is_writable() + hb_blob_try_writable() + + o Changed API: + hb_blob_create() takes user_data before destroy now + + +* Unicode functions API: + + o Unicode function vectors can subclass other unicode function vectors now. + Unimplemented callbacks in the subclass automatically chainup to the parent. + + o All hb_unicode_funcs_t callbacks take a user_data now. Their setters + take a user_data and its respective destroy callback. + + o New API: + hb_unicode_funcs_get_empty() + hb_unicode_funcs_get_default() + hb_unicode_funcs_get_parent() + + o Changed API: + hb_unicode_funcs_create() now takes a parent_funcs. + + o Removed func getter functions: + hb_unicode_funcs_get_mirroring_func() + hb_unicode_funcs_get_general_category_func() + hb_unicode_funcs_get_script_func() + hb_unicode_funcs_get_combining_class_func() + hb_unicode_funcs_get_eastasian_width_func() + + +* Face API: + + o Renamed API: + hb_face_get_table() renamed to hb_face_reference_table() + hb_face_create_for_data() renamed to hb_face_create() + + o Changed API: + hb_face_create_for_tables() takes user_data before destroy now + hb_face_reference_table() returns empty blob instead of NULL + hb_get_table_func_t accepts the face as first parameter now + +* Font API: + + o Fonts can subclass other fonts now. Unimplemented callbacks in the + subclass automatically chainup to the parent. When chaining up, + scale is adjusted if the parent font has a different scale. + + o All hb_font_funcs_t callbacks take a user_data now. Their setters + take a user_data and its respective destroy callback. + + o New API: + hb_font_get_parent() + hb_font_funcs_get_empty() + hb_font_create_sub_font() + + o Removed API: + hb_font_funcs_copy() + hb_font_unset_funcs() + + o Removed func getter functions: + hb_font_funcs_get_glyph_func() + hb_font_funcs_get_glyph_advance_func() + hb_font_funcs_get_glyph_extents_func() + hb_font_funcs_get_contour_point_func() + hb_font_funcs_get_kerning_func() + + o Changed API: + hb_font_create() takes a face and references it now + hb_font_set_funcs() takes user_data before destroy now + hb_font_set_scale() accepts signed integers now + hb_font_get_contour_point_func_t now takes glyph first, then point_index + hb_font_get_glyph_func_t returns a success boolean now + + +* Changed object model: + + o All object types have a _get_empty() now: + hb_blob_get_empty() + hb_buffer_get_empty() + hb_face_get_empty() + hb_font_get_empty() + hb_font_funcs_get_empty() + hb_unicode_funcs_get_empty() + + o Added _set_user_data() and _get_user_data() for all object types: + hb_blob_get_user_data() + hb_blob_set_user_data() + hb_buffer_get_user_data() + hb_buffer_set_user_data() + hb_face_get_user_data() + hb_face_set_user_data() + hb_font_funcs_get_user_data() + hb_font_funcs_set_user_data() + hb_font_get_user_data() + hb_font_set_user_data() + hb_unicode_funcs_get_user_data() + hb_unicode_funcs_set_user_data() + + o Removed the _get_reference_count() from all object types: + hb_blob_get_reference_count() + hb_buffer_get_reference_count() + hb_face_get_reference_count() + hb_font_funcs_get_reference_count() + hb_font_get_reference_count() + hb_unicode_funcs_get_reference_count() + + o Added _make_immutable() and _is_immutable() for all object types except for buffer: + hb_blob_make_immutable() + hb_blob_is_immutable() + hb_face_make_immutable() + hb_face_is_immutable() + + +* Changed API for vertical text support + + o The following callbacks where removed: + hb_font_get_glyph_advance_func_t + hb_font_get_kerning_func_t + + o The following new callbacks added instead: + hb_font_get_glyph_h_advance_func_t + hb_font_get_glyph_v_advance_func_t + hb_font_get_glyph_h_origin_func_t + hb_font_get_glyph_v_origin_func_t + hb_font_get_glyph_h_kerning_func_t + hb_font_get_glyph_v_kerning_func_t + + o The following API removed as such: + hb_font_funcs_set_glyph_advance_func() + hb_font_funcs_set_kerning_func() + hb_font_get_glyph_advance() + hb_font_get_kerning() + + o New API added instead: + hb_font_funcs_set_glyph_h_advance_func() + hb_font_funcs_set_glyph_v_advance_func() + hb_font_funcs_set_glyph_h_origin_func() + hb_font_funcs_set_glyph_v_origin_func() + hb_font_funcs_set_glyph_h_kerning_func() + hb_font_funcs_set_glyph_v_kerning_func() + hb_font_get_glyph_h_advance() + hb_font_get_glyph_v_advance() + hb_font_get_glyph_h_origin() + hb_font_get_glyph_v_origin() + hb_font_get_glyph_h_kerning() + hb_font_get_glyph_v_kerning() + + o The following higher-leve API added for convenience: + hb_font_get_glyph_advance_for_direction() + hb_font_get_glyph_origin_for_direction() + hb_font_add_glyph_origin_for_direction() + hb_font_subtract_glyph_origin_for_direction() + hb_font_get_glyph_kerning_for_direction() + hb_font_get_glyph_extents_for_origin() + hb_font_get_glyph_contour_point_for_origin() + + +* OpenType Layout API: + + o New API: + hb_ot_layout_position_start() + hb_ot_layout_substitute_start() + hb_ot_layout_substitute_finish() + + +* Glue code: + + o New API: + hb_glib_script_to_script() + hb_glib_script_from_script() + hb_icu_script_to_script() + hb_icu_script_from_script() + + +* Version API added: + + o New API: + HB_VERSION_MAJOR + HB_VERSION_MINOR + HB_VERSION_MICRO + HB_VERSION_STRING + HB_VERSION_CHECK() + hb_version() + hb_version_string() + hb_version_check() + + diff --git a/thirdparty/harfbuzz/THANKS b/thirdparty/harfbuzz/THANKS new file mode 100644 index 0000000000..88cb7e9ea1 --- /dev/null +++ b/thirdparty/harfbuzz/THANKS @@ -0,0 +1,7 @@ +Bradley Grainger +Kenichi Ishibashi +Ivan Kuckir <https://photopea.com/> +Ryan Lortie +Jeff Muizelaar +suzuki toshiya +Philip Withnall diff --git a/thirdparty/harfbuzz/src/hb-aat-layout-ankr-table.hh b/thirdparty/harfbuzz/src/hb-aat-layout-ankr-table.hh new file mode 100644 index 0000000000..f2785a6f58 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-aat-layout-ankr-table.hh @@ -0,0 +1,98 @@ +/* + * Copyright © 2018 Ebrahim Byagowi + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + */ + +#ifndef HB_AAT_LAYOUT_ANKR_TABLE_HH +#define HB_AAT_LAYOUT_ANKR_TABLE_HH + +#include "hb-aat-layout-common.hh" + +/* + * ankr -- Anchor Point + * https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6ankr.html + */ +#define HB_AAT_TAG_ankr HB_TAG('a','n','k','r') + + +namespace AAT { + +using namespace OT; + + +struct Anchor +{ + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this)); + } + + public: + FWORD xCoordinate; + FWORD yCoordinate; + public: + DEFINE_SIZE_STATIC (4); +}; + +typedef LArrayOf<Anchor> GlyphAnchors; + +struct ankr +{ + static constexpr hb_tag_t tableTag = HB_AAT_TAG_ankr; + + const Anchor &get_anchor (hb_codepoint_t glyph_id, + unsigned int i, + unsigned int num_glyphs) const + { + const NNOffsetTo<GlyphAnchors> *offset = (this+lookupTable).get_value (glyph_id, num_glyphs); + if (!offset) + return Null (Anchor); + const GlyphAnchors &anchors = &(this+anchorData) + *offset; + return anchors[i]; + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (likely (c->check_struct (this) && + version == 0 && + c->check_range (this, anchorData) && + lookupTable.sanitize (c, this, &(this+anchorData)))); + } + + protected: + HBUINT16 version; /* Version number (set to zero) */ + HBUINT16 flags; /* Flags (currently unused; set to zero) */ + LOffsetTo<Lookup<NNOffsetTo<GlyphAnchors>>> + lookupTable; /* Offset to the table's lookup table */ + LNNOffsetTo<HBUINT8> + anchorData; /* Offset to the glyph data table */ + + public: + DEFINE_SIZE_STATIC (12); +}; + +} /* namespace AAT */ + + +#endif /* HB_AAT_LAYOUT_ANKR_TABLE_HH */ diff --git a/thirdparty/harfbuzz/src/hb-aat-layout-bsln-table.hh b/thirdparty/harfbuzz/src/hb-aat-layout-bsln-table.hh new file mode 100644 index 0000000000..cd36fc8953 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-aat-layout-bsln-table.hh @@ -0,0 +1,158 @@ +/* + * Copyright © 2018 Ebrahim Byagowi + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + */ + +#ifndef HB_AAT_LAYOUT_BSLN_TABLE_HH +#define HB_AAT_LAYOUT_BSLN_TABLE_HH + +#include "hb-aat-layout-common.hh" + +/* + * bsln -- Baseline + * https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6bsln.html + */ +#define HB_AAT_TAG_bsln HB_TAG('b','s','l','n') + + +namespace AAT { + + +struct BaselineTableFormat0Part +{ + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (likely (c->check_struct (this))); + } + + protected: + // Roman, Ideographic centered, Ideographic low, Hanging and Math + // are the default defined ones, but any other maybe accessed also. + HBINT16 deltas[32]; /* These are the FUnit distance deltas from + * the font's natural baseline to the other + * baselines used in the font. */ + public: + DEFINE_SIZE_STATIC (64); +}; + +struct BaselineTableFormat1Part +{ + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (likely (c->check_struct (this) && + lookupTable.sanitize (c))); + } + + protected: + HBINT16 deltas[32]; /* ditto */ + Lookup<HBUINT16> + lookupTable; /* Lookup table that maps glyphs to their + * baseline values. */ + public: + DEFINE_SIZE_MIN (66); +}; + +struct BaselineTableFormat2Part +{ + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (likely (c->check_struct (this))); + } + + protected: + HBGlyphID stdGlyph; /* The specific glyph index number in this + * font that is used to set the baseline values. + * This is the standard glyph. + * This glyph must contain a set of control points + * (whose numbers are contained in the ctlPoints field) + * that are used to determine baseline distances. */ + HBUINT16 ctlPoints[32]; /* Set of control point numbers, + * associated with the standard glyph. + * A value of 0xFFFF means there is no corresponding + * control point in the standard glyph. */ + public: + DEFINE_SIZE_STATIC (66); +}; + +struct BaselineTableFormat3Part +{ + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (likely (c->check_struct (this) && lookupTable.sanitize (c))); + } + + protected: + HBGlyphID stdGlyph; /* ditto */ + HBUINT16 ctlPoints[32]; /* ditto */ + Lookup<HBUINT16> + lookupTable; /* Lookup table that maps glyphs to their + * baseline values. */ + public: + DEFINE_SIZE_MIN (68); +}; + +struct bsln +{ + static constexpr hb_tag_t tableTag = HB_AAT_TAG_bsln; + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + if (unlikely (!(c->check_struct (this) && defaultBaseline < 32))) + return_trace (false); + + switch (format) + { + case 0: return_trace (parts.format0.sanitize (c)); + case 1: return_trace (parts.format1.sanitize (c)); + case 2: return_trace (parts.format2.sanitize (c)); + case 3: return_trace (parts.format3.sanitize (c)); + default:return_trace (true); + } + } + + protected: + FixedVersion<>version; /* Version number of the Baseline table. */ + HBUINT16 format; /* Format of the baseline table. Only one baseline + * format may be selected for the font. */ + HBUINT16 defaultBaseline;/* Default baseline value for all glyphs. + * This value can be from 0 through 31. */ + union { + // Distance-Based Formats + BaselineTableFormat0Part format0; + BaselineTableFormat1Part format1; + // Control Point-based Formats + BaselineTableFormat2Part format2; + BaselineTableFormat3Part format3; + } parts; + public: + DEFINE_SIZE_MIN (8); +}; + +} /* namespace AAT */ + + +#endif /* HB_AAT_LAYOUT_BSLN_TABLE_HH */ diff --git a/thirdparty/harfbuzz/src/hb-aat-layout-common.hh b/thirdparty/harfbuzz/src/hb-aat-layout-common.hh new file mode 100644 index 0000000000..75d523f5fc --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-aat-layout-common.hh @@ -0,0 +1,840 @@ +/* + * Copyright © 2017 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_AAT_LAYOUT_COMMON_HH +#define HB_AAT_LAYOUT_COMMON_HH + +#include "hb-aat-layout.hh" +#include "hb-open-type.hh" + + +namespace AAT { + +using namespace OT; + + +/* + * Lookup Table + */ + +template <typename T> struct Lookup; + +template <typename T> +struct LookupFormat0 +{ + friend struct Lookup<T>; + + private: + const T* get_value (hb_codepoint_t glyph_id, unsigned int num_glyphs) const + { + if (unlikely (glyph_id >= num_glyphs)) return nullptr; + return &arrayZ[glyph_id]; + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (arrayZ.sanitize (c, c->get_num_glyphs ())); + } + bool sanitize (hb_sanitize_context_t *c, const void *base) const + { + TRACE_SANITIZE (this); + return_trace (arrayZ.sanitize (c, c->get_num_glyphs (), base)); + } + + protected: + HBUINT16 format; /* Format identifier--format = 0 */ + UnsizedArrayOf<T> + arrayZ; /* Array of lookup values, indexed by glyph index. */ + public: + DEFINE_SIZE_UNBOUNDED (2); +}; + + +template <typename T> +struct LookupSegmentSingle +{ + static constexpr unsigned TerminationWordCount = 2u; + + int cmp (hb_codepoint_t g) const + { return g < first ? -1 : g <= last ? 0 : +1 ; } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && value.sanitize (c)); + } + bool sanitize (hb_sanitize_context_t *c, const void *base) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && value.sanitize (c, base)); + } + + HBGlyphID last; /* Last GlyphID in this segment */ + HBGlyphID first; /* First GlyphID in this segment */ + T value; /* The lookup value (only one) */ + public: + DEFINE_SIZE_STATIC (4 + T::static_size); +}; + +template <typename T> +struct LookupFormat2 +{ + friend struct Lookup<T>; + + private: + const T* get_value (hb_codepoint_t glyph_id) const + { + const LookupSegmentSingle<T> *v = segments.bsearch (glyph_id); + return v ? &v->value : nullptr; + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (segments.sanitize (c)); + } + bool sanitize (hb_sanitize_context_t *c, const void *base) const + { + TRACE_SANITIZE (this); + return_trace (segments.sanitize (c, base)); + } + + protected: + HBUINT16 format; /* Format identifier--format = 2 */ + VarSizedBinSearchArrayOf<LookupSegmentSingle<T>> + segments; /* The actual segments. These must already be sorted, + * according to the first word in each one (the last + * glyph in each segment). */ + public: + DEFINE_SIZE_ARRAY (8, segments); +}; + +template <typename T> +struct LookupSegmentArray +{ + static constexpr unsigned TerminationWordCount = 2u; + + const T* get_value (hb_codepoint_t glyph_id, const void *base) const + { + return first <= glyph_id && glyph_id <= last ? &(base+valuesZ)[glyph_id - first] : nullptr; + } + + int cmp (hb_codepoint_t g) const + { return g < first ? -1 : g <= last ? 0 : +1; } + + bool sanitize (hb_sanitize_context_t *c, const void *base) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && + first <= last && + valuesZ.sanitize (c, base, last - first + 1)); + } + template <typename ...Ts> + bool sanitize (hb_sanitize_context_t *c, const void *base, Ts&&... ds) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && + first <= last && + valuesZ.sanitize (c, base, last - first + 1, hb_forward<Ts> (ds)...)); + } + + HBGlyphID last; /* Last GlyphID in this segment */ + HBGlyphID first; /* First GlyphID in this segment */ + NNOffsetTo<UnsizedArrayOf<T>> + valuesZ; /* A 16-bit offset from the start of + * the table to the data. */ + public: + DEFINE_SIZE_STATIC (6); +}; + +template <typename T> +struct LookupFormat4 +{ + friend struct Lookup<T>; + + private: + const T* get_value (hb_codepoint_t glyph_id) const + { + const LookupSegmentArray<T> *v = segments.bsearch (glyph_id); + return v ? v->get_value (glyph_id, this) : nullptr; + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (segments.sanitize (c, this)); + } + bool sanitize (hb_sanitize_context_t *c, const void *base) const + { + TRACE_SANITIZE (this); + return_trace (segments.sanitize (c, this, base)); + } + + protected: + HBUINT16 format; /* Format identifier--format = 4 */ + VarSizedBinSearchArrayOf<LookupSegmentArray<T>> + segments; /* The actual segments. These must already be sorted, + * according to the first word in each one (the last + * glyph in each segment). */ + public: + DEFINE_SIZE_ARRAY (8, segments); +}; + +template <typename T> +struct LookupSingle +{ + static constexpr unsigned TerminationWordCount = 1u; + + int cmp (hb_codepoint_t g) const { return glyph.cmp (g); } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && value.sanitize (c)); + } + bool sanitize (hb_sanitize_context_t *c, const void *base) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && value.sanitize (c, base)); + } + + HBGlyphID glyph; /* Last GlyphID */ + T value; /* The lookup value (only one) */ + public: + DEFINE_SIZE_STATIC (2 + T::static_size); +}; + +template <typename T> +struct LookupFormat6 +{ + friend struct Lookup<T>; + + private: + const T* get_value (hb_codepoint_t glyph_id) const + { + const LookupSingle<T> *v = entries.bsearch (glyph_id); + return v ? &v->value : nullptr; + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (entries.sanitize (c)); + } + bool sanitize (hb_sanitize_context_t *c, const void *base) const + { + TRACE_SANITIZE (this); + return_trace (entries.sanitize (c, base)); + } + + protected: + HBUINT16 format; /* Format identifier--format = 6 */ + VarSizedBinSearchArrayOf<LookupSingle<T>> + entries; /* The actual entries, sorted by glyph index. */ + public: + DEFINE_SIZE_ARRAY (8, entries); +}; + +template <typename T> +struct LookupFormat8 +{ + friend struct Lookup<T>; + + private: + const T* get_value (hb_codepoint_t glyph_id) const + { + return firstGlyph <= glyph_id && glyph_id - firstGlyph < glyphCount ? + &valueArrayZ[glyph_id - firstGlyph] : nullptr; + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && valueArrayZ.sanitize (c, glyphCount)); + } + bool sanitize (hb_sanitize_context_t *c, const void *base) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && valueArrayZ.sanitize (c, glyphCount, base)); + } + + protected: + HBUINT16 format; /* Format identifier--format = 8 */ + HBGlyphID firstGlyph; /* First glyph index included in the trimmed array. */ + HBUINT16 glyphCount; /* Total number of glyphs (equivalent to the last + * glyph minus the value of firstGlyph plus 1). */ + UnsizedArrayOf<T> + valueArrayZ; /* The lookup values (indexed by the glyph index + * minus the value of firstGlyph). */ + public: + DEFINE_SIZE_ARRAY (6, valueArrayZ); +}; + +template <typename T> +struct LookupFormat10 +{ + friend struct Lookup<T>; + + private: + const typename T::type get_value_or_null (hb_codepoint_t glyph_id) const + { + if (!(firstGlyph <= glyph_id && glyph_id - firstGlyph < glyphCount)) + return Null (T); + + const HBUINT8 *p = &valueArrayZ[(glyph_id - firstGlyph) * valueSize]; + + unsigned int v = 0; + unsigned int count = valueSize; + for (unsigned int i = 0; i < count; i++) + v = (v << 8) | *p++; + + return v; + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && + valueSize <= 4 && + valueArrayZ.sanitize (c, glyphCount * valueSize)); + } + + protected: + HBUINT16 format; /* Format identifier--format = 8 */ + HBUINT16 valueSize; /* Byte size of each value. */ + HBGlyphID firstGlyph; /* First glyph index included in the trimmed array. */ + HBUINT16 glyphCount; /* Total number of glyphs (equivalent to the last + * glyph minus the value of firstGlyph plus 1). */ + UnsizedArrayOf<HBUINT8> + valueArrayZ; /* The lookup values (indexed by the glyph index + * minus the value of firstGlyph). */ + public: + DEFINE_SIZE_ARRAY (8, valueArrayZ); +}; + +template <typename T> +struct Lookup +{ + const T* get_value (hb_codepoint_t glyph_id, unsigned int num_glyphs) const + { + switch (u.format) { + case 0: return u.format0.get_value (glyph_id, num_glyphs); + case 2: return u.format2.get_value (glyph_id); + case 4: return u.format4.get_value (glyph_id); + case 6: return u.format6.get_value (glyph_id); + case 8: return u.format8.get_value (glyph_id); + default:return nullptr; + } + } + + const typename T::type get_value_or_null (hb_codepoint_t glyph_id, unsigned int num_glyphs) const + { + switch (u.format) { + /* Format 10 cannot return a pointer. */ + case 10: return u.format10.get_value_or_null (glyph_id); + default: + const T *v = get_value (glyph_id, num_glyphs); + return v ? *v : Null (T); + } + } + + typename T::type get_class (hb_codepoint_t glyph_id, + unsigned int num_glyphs, + unsigned int outOfRange) const + { + const T *v = get_value (glyph_id, num_glyphs); + return v ? *v : outOfRange; + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + if (!u.format.sanitize (c)) return_trace (false); + switch (u.format) { + case 0: return_trace (u.format0.sanitize (c)); + case 2: return_trace (u.format2.sanitize (c)); + case 4: return_trace (u.format4.sanitize (c)); + case 6: return_trace (u.format6.sanitize (c)); + case 8: return_trace (u.format8.sanitize (c)); + case 10: return_trace (u.format10.sanitize (c)); + default:return_trace (true); + } + } + bool sanitize (hb_sanitize_context_t *c, const void *base) const + { + TRACE_SANITIZE (this); + if (!u.format.sanitize (c)) return_trace (false); + switch (u.format) { + case 0: return_trace (u.format0.sanitize (c, base)); + case 2: return_trace (u.format2.sanitize (c, base)); + case 4: return_trace (u.format4.sanitize (c, base)); + case 6: return_trace (u.format6.sanitize (c, base)); + case 8: return_trace (u.format8.sanitize (c, base)); + case 10: return_trace (false); /* We don't support format10 here currently. */ + default:return_trace (true); + } + } + + protected: + union { + HBUINT16 format; /* Format identifier */ + LookupFormat0<T> format0; + LookupFormat2<T> format2; + LookupFormat4<T> format4; + LookupFormat6<T> format6; + LookupFormat8<T> format8; + LookupFormat10<T> format10; + } u; + public: + DEFINE_SIZE_UNION (2, format); +}; +/* Lookup 0 has unbounded size (dependant on num_glyphs). So we need to defined + * special NULL objects for Lookup<> objects, but since it's template our macros + * don't work. So we have to hand-code them here. UGLY. */ +} /* Close namespace. */ +/* Ugly hand-coded null objects for template Lookup<> :(. */ +extern HB_INTERNAL const unsigned char _hb_Null_AAT_Lookup[2]; +template <typename T> +struct Null<AAT::Lookup<T>> { + static AAT::Lookup<T> const & get_null () + { return *reinterpret_cast<const AAT::Lookup<T> *> (_hb_Null_AAT_Lookup); } +}; +namespace AAT { + +enum { DELETED_GLYPH = 0xFFFF }; + +/* + * (Extended) State Table + */ + +template <typename T> +struct Entry +{ + bool sanitize (hb_sanitize_context_t *c, unsigned int count) const + { + TRACE_SANITIZE (this); + /* Note, we don't recurse-sanitize data because we don't access it. + * That said, in our DEFINE_SIZE_STATIC we access T::static_size, + * which ensures that data has a simple sanitize(). To be determined + * if I need to remove that as well. + * + * HOWEVER! Because we are a template, our DEFINE_SIZE_STATIC + * assertion wouldn't be checked, hence the line below. */ + static_assert (T::static_size, ""); + + return_trace (c->check_struct (this)); + } + + public: + HBUINT16 newState; /* Byte offset from beginning of state table + * to the new state. Really?!?! Or just state + * number? The latter in morx for sure. */ + HBUINT16 flags; /* Table specific. */ + T data; /* Optional offsets to per-glyph tables. */ + public: + DEFINE_SIZE_STATIC (4 + T::static_size); +}; + +template <> +struct Entry<void> +{ + bool sanitize (hb_sanitize_context_t *c, unsigned int count /*XXX Unused?*/) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this)); + } + + public: + HBUINT16 newState; /* Byte offset from beginning of state table to the new state. */ + HBUINT16 flags; /* Table specific. */ + public: + DEFINE_SIZE_STATIC (4); +}; + +template <typename Types, typename Extra> +struct StateTable +{ + typedef typename Types::HBUINT HBUINT; + typedef typename Types::HBUSHORT HBUSHORT; + typedef typename Types::ClassTypeNarrow ClassType; + + enum State + { + STATE_START_OF_TEXT = 0, + STATE_START_OF_LINE = 1, + }; + enum Class + { + CLASS_END_OF_TEXT = 0, + CLASS_OUT_OF_BOUNDS = 1, + CLASS_DELETED_GLYPH = 2, + CLASS_END_OF_LINE = 3, + }; + + int new_state (unsigned int newState) const + { return Types::extended ? newState : ((int) newState - (int) stateArrayTable) / (int) nClasses; } + + unsigned int get_class (hb_codepoint_t glyph_id, unsigned int num_glyphs) const + { + if (unlikely (glyph_id == DELETED_GLYPH)) return CLASS_DELETED_GLYPH; + return (this+classTable).get_class (glyph_id, num_glyphs, 1); + } + + const Entry<Extra> *get_entries () const + { return (this+entryTable).arrayZ; } + + const Entry<Extra> &get_entry (int state, unsigned int klass) const + { + if (unlikely (klass >= nClasses)) + klass = StateTable<Types, Entry<Extra>>::CLASS_OUT_OF_BOUNDS; + + const HBUSHORT *states = (this+stateArrayTable).arrayZ; + const Entry<Extra> *entries = (this+entryTable).arrayZ; + + unsigned int entry = states[state * nClasses + klass]; + DEBUG_MSG (APPLY, nullptr, "e%u", entry); + + return entries[entry]; + } + + bool sanitize (hb_sanitize_context_t *c, + unsigned int *num_entries_out = nullptr) const + { + TRACE_SANITIZE (this); + if (unlikely (!(c->check_struct (this) && + nClasses >= 4 /* Ensure pre-defined classes fit. */ && + classTable.sanitize (c, this)))) return_trace (false); + + const HBUSHORT *states = (this+stateArrayTable).arrayZ; + const Entry<Extra> *entries = (this+entryTable).arrayZ; + + unsigned int num_classes = nClasses; + if (unlikely (hb_unsigned_mul_overflows (num_classes, states[0].static_size))) + return_trace (false); + unsigned int row_stride = num_classes * states[0].static_size; + + /* Apple 'kern' table has this peculiarity: + * + * "Because the stateTableOffset in the state table header is (strictly + * speaking) redundant, some 'kern' tables use it to record an initial + * state where that should not be StartOfText. To determine if this is + * done, calculate what the stateTableOffset should be. If it's different + * from the actual stateTableOffset, use it as the initial state." + * + * We implement this by calling the initial state zero, but allow *negative* + * states if the start state indeed was not the first state. Since the code + * is shared, this will also apply to 'mort' table. The 'kerx' / 'morx' + * tables are not affected since those address states by index, not offset. + */ + + int min_state = 0; + int max_state = 0; + unsigned int num_entries = 0; + + int state_pos = 0; + int state_neg = 0; + unsigned int entry = 0; + while (min_state < state_neg || state_pos <= max_state) + { + if (min_state < state_neg) + { + /* Negative states. */ + if (unlikely (hb_unsigned_mul_overflows (min_state, num_classes))) + return_trace (false); + if (unlikely (!c->check_range (&states[min_state * num_classes], + -min_state, + row_stride))) + return_trace (false); + if ((c->max_ops -= state_neg - min_state) <= 0) + return_trace (false); + { /* Sweep new states. */ + const HBUSHORT *stop = &states[min_state * num_classes]; + if (unlikely (stop > states)) + return_trace (false); + for (const HBUSHORT *p = states; stop < p; p--) + num_entries = hb_max (num_entries, *(p - 1) + 1); + state_neg = min_state; + } + } + + if (state_pos <= max_state) + { + /* Positive states. */ + if (unlikely (!c->check_range (states, + max_state + 1, + row_stride))) + return_trace (false); + if ((c->max_ops -= max_state - state_pos + 1) <= 0) + return_trace (false); + { /* Sweep new states. */ + if (unlikely (hb_unsigned_mul_overflows ((max_state + 1), num_classes))) + return_trace (false); + const HBUSHORT *stop = &states[(max_state + 1) * num_classes]; + if (unlikely (stop < states)) + return_trace (false); + for (const HBUSHORT *p = &states[state_pos * num_classes]; p < stop; p++) + num_entries = hb_max (num_entries, *p + 1); + state_pos = max_state + 1; + } + } + + if (unlikely (!c->check_array (entries, num_entries))) + return_trace (false); + if ((c->max_ops -= num_entries - entry) <= 0) + return_trace (false); + { /* Sweep new entries. */ + const Entry<Extra> *stop = &entries[num_entries]; + for (const Entry<Extra> *p = &entries[entry]; p < stop; p++) + { + int newState = new_state (p->newState); + min_state = hb_min (min_state, newState); + max_state = hb_max (max_state, newState); + } + entry = num_entries; + } + } + + if (num_entries_out) + *num_entries_out = num_entries; + + return_trace (true); + } + + protected: + HBUINT nClasses; /* Number of classes, which is the number of indices + * in a single line in the state array. */ + NNOffsetTo<ClassType, HBUINT> + classTable; /* Offset to the class table. */ + NNOffsetTo<UnsizedArrayOf<HBUSHORT>, HBUINT> + stateArrayTable;/* Offset to the state array. */ + NNOffsetTo<UnsizedArrayOf<Entry<Extra>>, HBUINT> + entryTable; /* Offset to the entry array. */ + + public: + DEFINE_SIZE_STATIC (4 * sizeof (HBUINT)); +}; + +template <typename HBUCHAR> +struct ClassTable +{ + unsigned int get_class (hb_codepoint_t glyph_id, unsigned int outOfRange) const + { + unsigned int i = glyph_id - firstGlyph; + return i >= classArray.len ? outOfRange : classArray.arrayZ[i]; + } + unsigned int get_class (hb_codepoint_t glyph_id, + unsigned int num_glyphs HB_UNUSED, + unsigned int outOfRange) const + { + return get_class (glyph_id, outOfRange); + } + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && classArray.sanitize (c)); + } + protected: + HBGlyphID firstGlyph; /* First glyph index included in the trimmed array. */ + ArrayOf<HBUCHAR> classArray; /* The class codes (indexed by glyph index minus + * firstGlyph). */ + public: + DEFINE_SIZE_ARRAY (4, classArray); +}; + +struct ObsoleteTypes +{ + static constexpr bool extended = false; + typedef HBUINT16 HBUINT; + typedef HBUINT8 HBUSHORT; + typedef ClassTable<HBUINT8> ClassTypeNarrow; + typedef ClassTable<HBUINT16> ClassTypeWide; + + template <typename T> + static unsigned int offsetToIndex (unsigned int offset, + const void *base, + const T *array) + { + return (offset - ((const char *) array - (const char *) base)) / T::static_size; + } + template <typename T> + static unsigned int byteOffsetToIndex (unsigned int offset, + const void *base, + const T *array) + { + return offsetToIndex (offset, base, array); + } + template <typename T> + static unsigned int wordOffsetToIndex (unsigned int offset, + const void *base, + const T *array) + { + return offsetToIndex (2 * offset, base, array); + } +}; +struct ExtendedTypes +{ + static constexpr bool extended = true; + typedef HBUINT32 HBUINT; + typedef HBUINT16 HBUSHORT; + typedef Lookup<HBUINT16> ClassTypeNarrow; + typedef Lookup<HBUINT16> ClassTypeWide; + + template <typename T> + static unsigned int offsetToIndex (unsigned int offset, + const void *base HB_UNUSED, + const T *array HB_UNUSED) + { + return offset; + } + template <typename T> + static unsigned int byteOffsetToIndex (unsigned int offset, + const void *base HB_UNUSED, + const T *array HB_UNUSED) + { + return offset / 2; + } + template <typename T> + static unsigned int wordOffsetToIndex (unsigned int offset, + const void *base HB_UNUSED, + const T *array HB_UNUSED) + { + return offset; + } +}; + +template <typename Types, typename EntryData> +struct StateTableDriver +{ + StateTableDriver (const StateTable<Types, EntryData> &machine_, + hb_buffer_t *buffer_, + hb_face_t *face_) : + machine (machine_), + buffer (buffer_), + num_glyphs (face_->get_num_glyphs ()) {} + + template <typename context_t> + void drive (context_t *c) + { + if (!c->in_place) + buffer->clear_output (); + + int state = StateTable<Types, EntryData>::STATE_START_OF_TEXT; + for (buffer->idx = 0; buffer->successful;) + { + unsigned int klass = buffer->idx < buffer->len ? + machine.get_class (buffer->info[buffer->idx].codepoint, num_glyphs) : + (unsigned) StateTable<Types, EntryData>::CLASS_END_OF_TEXT; + DEBUG_MSG (APPLY, nullptr, "c%u at %u", klass, buffer->idx); + const Entry<EntryData> &entry = machine.get_entry (state, klass); + + /* Unsafe-to-break before this if not in state 0, as things might + * go differently if we start from state 0 here. + * + * Ugh. The indexing here is ugly... */ + if (state && buffer->backtrack_len () && buffer->idx < buffer->len) + { + /* If there's no action and we're just epsilon-transitioning to state 0, + * safe to break. */ + if (c->is_actionable (this, entry) || + !(entry.newState == StateTable<Types, EntryData>::STATE_START_OF_TEXT && + entry.flags == context_t::DontAdvance)) + buffer->unsafe_to_break_from_outbuffer (buffer->backtrack_len () - 1, buffer->idx + 1); + } + + /* Unsafe-to-break if end-of-text would kick in here. */ + if (buffer->idx + 2 <= buffer->len) + { + const Entry<EntryData> &end_entry = machine.get_entry (state, StateTable<Types, EntryData>::CLASS_END_OF_TEXT); + if (c->is_actionable (this, end_entry)) + buffer->unsafe_to_break (buffer->idx, buffer->idx + 2); + } + + c->transition (this, entry); + + state = machine.new_state (entry.newState); + DEBUG_MSG (APPLY, nullptr, "s%d", state); + + if (buffer->idx == buffer->len) + break; + + if (!(entry.flags & context_t::DontAdvance) || buffer->max_ops-- <= 0) + buffer->next_glyph (); + } + + if (!c->in_place) + { + for (; buffer->successful && buffer->idx < buffer->len;) + buffer->next_glyph (); + buffer->swap_buffers (); + } + } + + public: + const StateTable<Types, EntryData> &machine; + hb_buffer_t *buffer; + unsigned int num_glyphs; +}; + + +struct ankr; + +struct hb_aat_apply_context_t : + hb_dispatch_context_t<hb_aat_apply_context_t, bool, HB_DEBUG_APPLY> +{ + const char *get_name () { return "APPLY"; } + template <typename T> + return_t dispatch (const T &obj) { return obj.apply (this); } + static return_t default_return_value () { return false; } + bool stop_sublookup_iteration (return_t r) const { return r; } + + const hb_ot_shape_plan_t *plan; + hb_font_t *font; + hb_face_t *face; + hb_buffer_t *buffer; + hb_sanitize_context_t sanitizer; + const ankr *ankr_table; + + /* Unused. For debug tracing only. */ + unsigned int lookup_index; + + HB_INTERNAL hb_aat_apply_context_t (const hb_ot_shape_plan_t *plan_, + hb_font_t *font_, + hb_buffer_t *buffer_, + hb_blob_t *blob = const_cast<hb_blob_t *> (&Null (hb_blob_t))); + + HB_INTERNAL ~hb_aat_apply_context_t (); + + HB_INTERNAL void set_ankr_table (const AAT::ankr *ankr_table_); + + void set_lookup_index (unsigned int i) { lookup_index = i; } +}; + + +} /* namespace AAT */ + + +#endif /* HB_AAT_LAYOUT_COMMON_HH */ diff --git a/thirdparty/harfbuzz/src/hb-aat-layout-feat-table.hh b/thirdparty/harfbuzz/src/hb-aat-layout-feat-table.hh new file mode 100644 index 0000000000..359e859cfc --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-aat-layout-feat-table.hh @@ -0,0 +1,222 @@ +/* + * Copyright © 2018 Ebrahim Byagowi + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + */ + +#ifndef HB_AAT_LAYOUT_FEAT_TABLE_HH +#define HB_AAT_LAYOUT_FEAT_TABLE_HH + +#include "hb-aat-layout-common.hh" + +/* + * feat -- Feature Name + * https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6feat.html + */ +#define HB_AAT_TAG_feat HB_TAG('f','e','a','t') + + +namespace AAT { + + +struct SettingName +{ + friend struct FeatureName; + + int cmp (hb_aat_layout_feature_selector_t key) const + { return (int) key - (int) setting; } + + hb_aat_layout_feature_selector_t get_selector () const + { return (hb_aat_layout_feature_selector_t) (unsigned) setting; } + + hb_aat_layout_feature_selector_info_t get_info (hb_aat_layout_feature_selector_t default_selector) const + { + return { + nameIndex, + (hb_aat_layout_feature_selector_t) (unsigned int) setting, + default_selector == HB_AAT_LAYOUT_FEATURE_SELECTOR_INVALID + ? (hb_aat_layout_feature_selector_t) (setting + 1) + : default_selector, + 0 + }; + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (likely (c->check_struct (this))); + } + + protected: + HBUINT16 setting; /* The setting. */ + NameID nameIndex; /* The name table index for the setting's name. */ + public: + DEFINE_SIZE_STATIC (4); +}; +DECLARE_NULL_NAMESPACE_BYTES (AAT, SettingName); + +struct feat; + +struct FeatureName +{ + int cmp (hb_aat_layout_feature_type_t key) const + { return (int) key - (int) feature; } + + enum { + Exclusive = 0x8000, /* If set, the feature settings are mutually exclusive. */ + NotDefault = 0x4000, /* If clear, then the setting with an index of 0 in + * the setting name array for this feature should + * be taken as the default for the feature + * (if one is required). If set, then bits 0-15 of this + * featureFlags field contain the index of the setting + * which is to be taken as the default. */ + IndexMask = 0x00FF /* If bits 30 and 31 are set, then these sixteen bits + * indicate the index of the setting in the setting name + * array for this feature which should be taken + * as the default. */ + }; + + unsigned int get_selector_infos (unsigned int start_offset, + unsigned int *selectors_count, /* IN/OUT. May be NULL. */ + hb_aat_layout_feature_selector_info_t *selectors, /* OUT. May be NULL. */ + unsigned int *pdefault_index, /* OUT. May be NULL. */ + const void *base) const + { + hb_array_t< const SettingName> settings_table = (base+settingTableZ).as_array (nSettings); + + static_assert (Index::NOT_FOUND_INDEX == HB_AAT_LAYOUT_NO_SELECTOR_INDEX, ""); + + hb_aat_layout_feature_selector_t default_selector = HB_AAT_LAYOUT_FEATURE_SELECTOR_INVALID; + unsigned int default_index = Index::NOT_FOUND_INDEX; + if (featureFlags & Exclusive) + { + default_index = (featureFlags & NotDefault) ? featureFlags & IndexMask : 0; + default_selector = settings_table[default_index].get_selector (); + } + if (pdefault_index) + *pdefault_index = default_index; + + if (selectors_count) + { + + settings_table.sub_array (start_offset, selectors_count) + | hb_map ([=] (const SettingName& setting) { return setting.get_info (default_selector); }) + | hb_sink (hb_array (selectors, *selectors_count)) + ; + } + return settings_table.length; + } + + hb_aat_layout_feature_type_t get_feature_type () const + { return (hb_aat_layout_feature_type_t) (unsigned int) feature; } + + hb_ot_name_id_t get_feature_name_id () const { return nameIndex; } + + bool is_exclusive () const { return featureFlags & Exclusive; } + + /* A FeatureName with no settings is meaningless */ + bool has_data () const { return nSettings; } + + bool sanitize (hb_sanitize_context_t *c, const void *base) const + { + TRACE_SANITIZE (this); + return_trace (likely (c->check_struct (this) && + (base+settingTableZ).sanitize (c, nSettings))); + } + + protected: + HBUINT16 feature; /* Feature type. */ + HBUINT16 nSettings; /* The number of records in the setting name array. */ + LNNOffsetTo<UnsizedArrayOf<SettingName>> + settingTableZ; /* Offset in bytes from the beginning of this table to + * this feature's setting name array. The actual type of + * record this offset refers to will depend on the + * exclusivity value, as described below. */ + HBUINT16 featureFlags; /* Single-bit flags associated with the feature type. */ + HBINT16 nameIndex; /* The name table index for the feature's name. + * This index has values greater than 255 and + * less than 32768. */ + public: + DEFINE_SIZE_STATIC (12); +}; + +struct feat +{ + static constexpr hb_tag_t tableTag = HB_AAT_TAG_feat; + + bool has_data () const { return version.to_int (); } + + unsigned int get_feature_types (unsigned int start_offset, + unsigned int *count, + hb_aat_layout_feature_type_t *features) const + { + if (count) + { + + namesZ.as_array (featureNameCount).sub_array (start_offset, count) + | hb_map (&FeatureName::get_feature_type) + | hb_sink (hb_array (features, *count)) + ; + } + return featureNameCount; + } + + bool exposes_feature (hb_aat_layout_feature_type_t feature_type) const + { return get_feature (feature_type).has_data (); } + + const FeatureName& get_feature (hb_aat_layout_feature_type_t feature_type) const + { return namesZ.bsearch (featureNameCount, feature_type); } + + hb_ot_name_id_t get_feature_name_id (hb_aat_layout_feature_type_t feature) const + { return get_feature (feature).get_feature_name_id (); } + + unsigned int get_selector_infos (hb_aat_layout_feature_type_t feature_type, + unsigned int start_offset, + unsigned int *selectors_count, /* IN/OUT. May be NULL. */ + hb_aat_layout_feature_selector_info_t *selectors, /* OUT. May be NULL. */ + unsigned int *default_index /* OUT. May be NULL. */) const + { + return get_feature (feature_type).get_selector_infos (start_offset, selectors_count, selectors, + default_index, this); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (likely (c->check_struct (this) && + version.major == 1 && + namesZ.sanitize (c, featureNameCount, this))); + } + + protected: + FixedVersion<>version; /* Version number of the feature name table + * (0x00010000 for the current version). */ + HBUINT16 featureNameCount; + /* The number of entries in the feature name array. */ + HBUINT16 reserved1; /* Reserved (set to zero). */ + HBUINT32 reserved2; /* Reserved (set to zero). */ + SortedUnsizedArrayOf<FeatureName> + namesZ; /* The feature name array. */ + public: + DEFINE_SIZE_ARRAY (12, namesZ); +}; + +} /* namespace AAT */ + +#endif /* HB_AAT_LAYOUT_FEAT_TABLE_HH */ diff --git a/thirdparty/harfbuzz/src/hb-aat-layout-just-table.hh b/thirdparty/harfbuzz/src/hb-aat-layout-just-table.hh new file mode 100644 index 0000000000..49506e9f5a --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-aat-layout-just-table.hh @@ -0,0 +1,417 @@ +/* + * Copyright © 2018 Ebrahim Byagowi + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + */ + +#ifndef HB_AAT_LAYOUT_JUST_TABLE_HH +#define HB_AAT_LAYOUT_JUST_TABLE_HH + +#include "hb-aat-layout-common.hh" +#include "hb-ot-layout.hh" +#include "hb-open-type.hh" + +#include "hb-aat-layout-morx-table.hh" + +/* + * just -- Justification + * https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6just.html + */ +#define HB_AAT_TAG_just HB_TAG('j','u','s','t') + + +namespace AAT { + +using namespace OT; + + +struct ActionSubrecordHeader +{ + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (likely (c->check_struct (this))); + } + + HBUINT16 actionClass; /* The JustClass value associated with this + * ActionSubrecord. */ + HBUINT16 actionType; /* The type of postcompensation action. */ + HBUINT16 actionLength; /* Length of this ActionSubrecord record, which + * must be a multiple of 4. */ + public: + DEFINE_SIZE_STATIC (6); +}; + +struct DecompositionAction +{ + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (likely (c->check_struct (this))); + } + + ActionSubrecordHeader + header; + HBFixed lowerLimit; /* If the distance factor is less than this value, + * then the ligature is decomposed. */ + HBFixed upperLimit; /* If the distance factor is greater than this value, + * then the ligature is decomposed. */ + HBUINT16 order; /* Numerical order in which this ligature will + * be decomposed; you may want infrequent ligatures + * to decompose before more frequent ones. The ligatures + * on the line of text will decompose in increasing + * value of this field. */ + ArrayOf<HBUINT16> + decomposedglyphs; + /* Number of 16-bit glyph indexes that follow; + * the ligature will be decomposed into these glyphs. + * + * Array of decomposed glyphs. */ + public: + DEFINE_SIZE_ARRAY (18, decomposedglyphs); +}; + +struct UnconditionalAddGlyphAction +{ + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this)); + } + + protected: + ActionSubrecordHeader + header; + HBGlyphID addGlyph; /* Glyph that should be added if the distance factor + * is growing. */ + + public: + DEFINE_SIZE_STATIC (8); +}; + +struct ConditionalAddGlyphAction +{ + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (likely (c->check_struct (this))); + } + + protected: + ActionSubrecordHeader + header; + HBFixed substThreshold; /* Distance growth factor (in ems) at which + * this glyph is replaced and the growth factor + * recalculated. */ + HBGlyphID addGlyph; /* Glyph to be added as kashida. If this value is + * 0xFFFF, no extra glyph will be added. Note that + * generally when a glyph is added, justification + * will need to be redone. */ + HBGlyphID substGlyph; /* Glyph to be substituted for this glyph if the + * growth factor equals or exceeds the value of + * substThreshold. */ + public: + DEFINE_SIZE_STATIC (14); +}; + +struct DuctileGlyphAction +{ + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (likely (c->check_struct (this))); + } + + protected: + ActionSubrecordHeader + header; + HBUINT32 variationAxis; /* The 4-byte tag identifying the ductile axis. + * This would normally be 0x64756374 ('duct'), + * but you may use any axis the font contains. */ + HBFixed minimumLimit; /* The lowest value for the ductility axis tha + * still yields an acceptable appearance. Normally + * this will be 1.0. */ + HBFixed noStretchValue; /* This is the default value that corresponds to + * no change in appearance. Normally, this will + * be 1.0. */ + HBFixed maximumLimit; /* The highest value for the ductility axis that + * still yields an acceptable appearance. */ + public: + DEFINE_SIZE_STATIC (22); +}; + +struct RepeatedAddGlyphAction +{ + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (likely (c->check_struct (this))); + } + + protected: + ActionSubrecordHeader + header; + HBUINT16 flags; /* Currently unused; set to 0. */ + HBGlyphID glyph; /* Glyph that should be added if the distance factor + * is growing. */ + public: + DEFINE_SIZE_STATIC (10); +}; + +struct ActionSubrecord +{ + unsigned int get_length () const { return u.header.actionLength; } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + if (unlikely (!c->check_struct (this))) + return_trace (false); + + switch (u.header.actionType) + { + case 0: return_trace (u.decompositionAction.sanitize (c)); + case 1: return_trace (u.unconditionalAddGlyphAction.sanitize (c)); + case 2: return_trace (u.conditionalAddGlyphAction.sanitize (c)); + // case 3: return_trace (u.stretchGlyphAction.sanitize (c)); + case 4: return_trace (u.decompositionAction.sanitize (c)); + case 5: return_trace (u.decompositionAction.sanitize (c)); + default: return_trace (true); + } + } + + protected: + union { + ActionSubrecordHeader header; + DecompositionAction decompositionAction; + UnconditionalAddGlyphAction unconditionalAddGlyphAction; + ConditionalAddGlyphAction conditionalAddGlyphAction; + /* StretchGlyphAction stretchGlyphAction; -- Not supported by CoreText */ + DuctileGlyphAction ductileGlyphAction; + RepeatedAddGlyphAction repeatedAddGlyphAction; + } u; /* Data. The format of this data depends on + * the value of the actionType field. */ + public: + DEFINE_SIZE_UNION (6, header); +}; + +struct PostcompensationActionChain +{ + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + if (unlikely (!c->check_struct (this))) + return_trace (false); + + unsigned int offset = min_size; + for (unsigned int i = 0; i < count; i++) + { + const ActionSubrecord& subrecord = StructAtOffset<ActionSubrecord> (this, offset); + if (unlikely (!subrecord.sanitize (c))) return_trace (false); + offset += subrecord.get_length (); + } + + return_trace (true); + } + + protected: + HBUINT32 count; + + public: + DEFINE_SIZE_STATIC (4); +}; + +struct JustWidthDeltaEntry +{ + enum Flags + { + Reserved1 =0xE000,/* Reserved. You should set these bits to zero. */ + UnlimiteGap =0x1000,/* The glyph can take unlimited gap. When this + * glyph participates in the justification process, + * it and any other glyphs on the line having this + * bit set absorb all the remaining gap. */ + Reserved2 =0x0FF0,/* Reserved. You should set these bits to zero. */ + Priority =0x000F /* The justification priority of the glyph. */ + }; + + enum Priority + { + Kashida = 0, /* Kashida priority. This is the highest priority + * during justification. */ + Whitespace = 1, /* Whitespace priority. Any whitespace glyphs (as + * identified in the glyph properties table) will + * get this priority. */ + InterCharacter = 2, /* Inter-character priority. Give this to any + * remaining glyphs. */ + NullPriority = 3 /* Null priority. You should set this priority for + * glyphs that only participate in justification + * after the above priorities. Normally all glyphs + * have one of the previous three values. If you + * don't want a glyph to participate in justification, + * and you don't want to set its factors to zero, + * you may instead assign it to the null priority. */ + }; + + protected: + HBFixed beforeGrowLimit;/* The ratio by which the advance width of the + * glyph is permitted to grow on the left or top side. */ + HBFixed beforeShrinkLimit; + /* The ratio by which the advance width of the + * glyph is permitted to shrink on the left or top side. */ + HBFixed afterGrowLimit; /* The ratio by which the advance width of the glyph + * is permitted to shrink on the left or top side. */ + HBFixed afterShrinkLimit; + /* The ratio by which the advance width of the glyph + * is at most permitted to shrink on the right or + * bottom side. */ + HBUINT16 growFlags; /* Flags controlling the grow case. */ + HBUINT16 shrinkFlags; /* Flags controlling the shrink case. */ + + public: + DEFINE_SIZE_STATIC (20); +}; + +struct WidthDeltaPair +{ + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (likely (c->check_struct (this))); + } + + protected: + HBUINT32 justClass; /* The justification category associated + * with the wdRecord field. Only 7 bits of + * this field are used. (The other bits are + * used as padding to guarantee longword + * alignment of the following record). */ + JustWidthDeltaEntry + wdRecord; /* The actual width delta record. */ + + public: + DEFINE_SIZE_STATIC (24); +}; + +typedef OT::LArrayOf<WidthDeltaPair> WidthDeltaCluster; + +struct JustificationCategory +{ + typedef void EntryData; + + enum Flags + { + SetMark =0x8000,/* If set, make the current glyph the marked + * glyph. */ + DontAdvance =0x4000,/* If set, don't advance to the next glyph before + * going to the new state. */ + MarkCategory =0x3F80,/* The justification category for the marked + * glyph if nonzero. */ + CurrentCategory =0x007F /* The justification category for the current + * glyph if nonzero. */ + }; + + bool sanitize (hb_sanitize_context_t *c, const void *base) const + { + TRACE_SANITIZE (this); + return_trace (likely (c->check_struct (this) && + morphHeader.sanitize (c) && + stHeader.sanitize (c))); + } + + protected: + ChainSubtable<ObsoleteTypes> + morphHeader; /* Metamorphosis-style subtable header. */ + StateTable<ObsoleteTypes, EntryData> + stHeader; /* The justification insertion state table header */ + public: + DEFINE_SIZE_STATIC (30); +}; + +struct JustificationHeader +{ + bool sanitize (hb_sanitize_context_t *c, const void *base) const + { + TRACE_SANITIZE (this); + return_trace (likely (c->check_struct (this) && + justClassTable.sanitize (c, base, base) && + wdcTable.sanitize (c, base) && + pcTable.sanitize (c, base) && + lookupTable.sanitize (c, base))); + } + + protected: + OffsetTo<JustificationCategory> + justClassTable; /* Offset to the justification category state table. */ + OffsetTo<WidthDeltaCluster> + wdcTable; /* Offset from start of justification table to start + * of the subtable containing the width delta factors + * for the glyphs in your font. + * + * The width delta clusters table. */ + OffsetTo<PostcompensationActionChain> + pcTable; /* Offset from start of justification table to start + * of postcompensation subtable (set to zero if none). + * + * The postcompensation subtable, if present in the font. */ + Lookup<OffsetTo<WidthDeltaCluster>> + lookupTable; /* Lookup table associating glyphs with width delta + * clusters. See the description of Width Delta Clusters + * table for details on how to interpret the lookup values. */ + + public: + DEFINE_SIZE_MIN (8); +}; + +struct just +{ + static constexpr hb_tag_t tableTag = HB_AAT_TAG_just; + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + + return_trace (likely (c->check_struct (this) && + version.major == 1 && + horizData.sanitize (c, this, this) && + vertData.sanitize (c, this, this))); + } + + protected: + FixedVersion<>version; /* Version of the justification table + * (0x00010000u for version 1.0). */ + HBUINT16 format; /* Format of the justification table (set to 0). */ + OffsetTo<JustificationHeader> + horizData; /* Byte offset from the start of the justification table + * to the header for tables that contain justification + * information for horizontal text. + * If you are not including this information, + * store 0. */ + OffsetTo<JustificationHeader> + vertData; /* ditto, vertical */ + + public: + DEFINE_SIZE_STATIC (10); +}; + +} /* namespace AAT */ + + +#endif /* HB_AAT_LAYOUT_JUST_TABLE_HH */ diff --git a/thirdparty/harfbuzz/src/hb-aat-layout-kerx-table.hh b/thirdparty/harfbuzz/src/hb-aat-layout-kerx-table.hh new file mode 100644 index 0000000000..1cd412164e --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-aat-layout-kerx-table.hh @@ -0,0 +1,999 @@ +/* + * Copyright © 2018 Ebrahim Byagowi + * Copyright © 2018 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_AAT_LAYOUT_KERX_TABLE_HH +#define HB_AAT_LAYOUT_KERX_TABLE_HH + +#include "hb-kern.hh" +#include "hb-aat-layout-ankr-table.hh" + +/* + * kerx -- Extended Kerning + * https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6kerx.html + */ +#define HB_AAT_TAG_kerx HB_TAG('k','e','r','x') + + +namespace AAT { + +using namespace OT; + + +static inline int +kerxTupleKern (int value, + unsigned int tupleCount, + const void *base, + hb_aat_apply_context_t *c) +{ + if (likely (!tupleCount || !c)) return value; + + unsigned int offset = value; + const FWORD *pv = &StructAtOffset<FWORD> (base, offset); + if (unlikely (!c->sanitizer.check_array (pv, tupleCount))) return 0; + return *pv; +} + + +struct hb_glyph_pair_t +{ + hb_codepoint_t left; + hb_codepoint_t right; +}; + +struct KernPair +{ + int get_kerning () const { return value; } + + int cmp (const hb_glyph_pair_t &o) const + { + int ret = left.cmp (o.left); + if (ret) return ret; + return right.cmp (o.right); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this)); + } + + protected: + HBGlyphID left; + HBGlyphID right; + FWORD value; + public: + DEFINE_SIZE_STATIC (6); +}; + +template <typename KernSubTableHeader> +struct KerxSubTableFormat0 +{ + int get_kerning (hb_codepoint_t left, hb_codepoint_t right, + hb_aat_apply_context_t *c = nullptr) const + { + hb_glyph_pair_t pair = {left, right}; + int v = pairs.bsearch (pair).get_kerning (); + return kerxTupleKern (v, header.tuple_count (), this, c); + } + + bool apply (hb_aat_apply_context_t *c) const + { + TRACE_APPLY (this); + + if (!c->plan->requested_kerning) + return false; + + if (header.coverage & header.Backwards) + return false; + + accelerator_t accel (*this, c); + hb_kern_machine_t<accelerator_t> machine (accel, header.coverage & header.CrossStream); + machine.kern (c->font, c->buffer, c->plan->kern_mask); + + return_trace (true); + } + + struct accelerator_t + { + const KerxSubTableFormat0 &table; + hb_aat_apply_context_t *c; + + accelerator_t (const KerxSubTableFormat0 &table_, + hb_aat_apply_context_t *c_) : + table (table_), c (c_) {} + + int get_kerning (hb_codepoint_t left, hb_codepoint_t right) const + { return table.get_kerning (left, right, c); } + }; + + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (likely (pairs.sanitize (c))); + } + + protected: + KernSubTableHeader header; + BinSearchArrayOf<KernPair, typename KernSubTableHeader::Types::HBUINT> + pairs; /* Sorted kern records. */ + public: + DEFINE_SIZE_ARRAY (KernSubTableHeader::static_size + 16, pairs); +}; + + +template <bool extended> +struct Format1Entry; + +template <> +struct Format1Entry<true> +{ + enum Flags + { + Push = 0x8000, /* If set, push this glyph on the kerning stack. */ + DontAdvance = 0x4000, /* If set, don't advance to the next glyph + * before going to the new state. */ + Reset = 0x2000, /* If set, reset the kerning data (clear the stack) */ + Reserved = 0x1FFF, /* Not used; set to 0. */ + }; + + struct EntryData + { + HBUINT16 kernActionIndex;/* Index into the kerning value array. If + * this index is 0xFFFF, then no kerning + * is to be performed. */ + public: + DEFINE_SIZE_STATIC (2); + }; + + static bool performAction (const Entry<EntryData> &entry) + { return entry.data.kernActionIndex != 0xFFFF; } + + static unsigned int kernActionIndex (const Entry<EntryData> &entry) + { return entry.data.kernActionIndex; } +}; +template <> +struct Format1Entry<false> +{ + enum Flags + { + Push = 0x8000, /* If set, push this glyph on the kerning stack. */ + DontAdvance = 0x4000, /* If set, don't advance to the next glyph + * before going to the new state. */ + Offset = 0x3FFF, /* Byte offset from beginning of subtable to the + * value table for the glyphs on the kerning stack. */ + + Reset = 0x0000, /* Not supported? */ + }; + + typedef void EntryData; + + static bool performAction (const Entry<EntryData> &entry) + { return entry.flags & Offset; } + + static unsigned int kernActionIndex (const Entry<EntryData> &entry) + { return entry.flags & Offset; } +}; + +template <typename KernSubTableHeader> +struct KerxSubTableFormat1 +{ + typedef typename KernSubTableHeader::Types Types; + typedef typename Types::HBUINT HBUINT; + + typedef Format1Entry<Types::extended> Format1EntryT; + typedef typename Format1EntryT::EntryData EntryData; + + struct driver_context_t + { + static constexpr bool in_place = true; + enum + { + DontAdvance = Format1EntryT::DontAdvance, + }; + + driver_context_t (const KerxSubTableFormat1 *table_, + hb_aat_apply_context_t *c_) : + c (c_), + table (table_), + /* Apparently the offset kernAction is from the beginning of the state-machine, + * similar to offsets in morx table, NOT from beginning of this table, like + * other subtables in kerx. Discovered via testing. */ + kernAction (&table->machine + table->kernAction), + depth (0), + crossStream (table->header.coverage & table->header.CrossStream) {} + + bool is_actionable (StateTableDriver<Types, EntryData> *driver HB_UNUSED, + const Entry<EntryData> &entry) + { return Format1EntryT::performAction (entry); } + void transition (StateTableDriver<Types, EntryData> *driver, + const Entry<EntryData> &entry) + { + hb_buffer_t *buffer = driver->buffer; + unsigned int flags = entry.flags; + + if (flags & Format1EntryT::Reset) + depth = 0; + + if (flags & Format1EntryT::Push) + { + if (likely (depth < ARRAY_LENGTH (stack))) + stack[depth++] = buffer->idx; + else + depth = 0; /* Probably not what CoreText does, but better? */ + } + + if (Format1EntryT::performAction (entry) && depth) + { + unsigned int tuple_count = hb_max (1u, table->header.tuple_count ()); + + unsigned int kern_idx = Format1EntryT::kernActionIndex (entry); + kern_idx = Types::byteOffsetToIndex (kern_idx, &table->machine, kernAction.arrayZ); + const FWORD *actions = &kernAction[kern_idx]; + if (!c->sanitizer.check_array (actions, depth, tuple_count)) + { + depth = 0; + return; + } + + hb_mask_t kern_mask = c->plan->kern_mask; + + /* From Apple 'kern' spec: + * "Each pops one glyph from the kerning stack and applies the kerning value to it. + * The end of the list is marked by an odd value... */ + bool last = false; + while (!last && depth) + { + unsigned int idx = stack[--depth]; + int v = *actions; + actions += tuple_count; + if (idx >= buffer->len) continue; + + /* "The end of the list is marked by an odd value..." */ + last = v & 1; + v &= ~1; + + hb_glyph_position_t &o = buffer->pos[idx]; + + if (HB_DIRECTION_IS_HORIZONTAL (buffer->props.direction)) + { + if (crossStream) + { + /* The following flag is undocumented in the spec, but described + * in the 'kern' table example. */ + if (v == -0x8000) + { + o.attach_type() = ATTACH_TYPE_NONE; + o.attach_chain() = 0; + o.y_offset = 0; + } + else if (o.attach_type()) + { + o.y_offset += c->font->em_scale_y (v); + buffer->scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_GPOS_ATTACHMENT; + } + } + else if (buffer->info[idx].mask & kern_mask) + { + o.x_advance += c->font->em_scale_x (v); + o.x_offset += c->font->em_scale_x (v); + } + } + else + { + if (crossStream) + { + /* CoreText doesn't do crossStream kerning in vertical. We do. */ + if (v == -0x8000) + { + o.attach_type() = ATTACH_TYPE_NONE; + o.attach_chain() = 0; + o.x_offset = 0; + } + else if (o.attach_type()) + { + o.x_offset += c->font->em_scale_x (v); + buffer->scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_GPOS_ATTACHMENT; + } + } + else if (buffer->info[idx].mask & kern_mask) + { + o.y_advance += c->font->em_scale_y (v); + o.y_offset += c->font->em_scale_y (v); + } + } + } + } + } + + private: + hb_aat_apply_context_t *c; + const KerxSubTableFormat1 *table; + const UnsizedArrayOf<FWORD> &kernAction; + unsigned int stack[8]; + unsigned int depth; + bool crossStream; + }; + + bool apply (hb_aat_apply_context_t *c) const + { + TRACE_APPLY (this); + + if (!c->plan->requested_kerning && + !(header.coverage & header.CrossStream)) + return false; + + driver_context_t dc (this, c); + + StateTableDriver<Types, EntryData> driver (machine, c->buffer, c->font->face); + driver.drive (&dc); + + return_trace (true); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + /* The rest of array sanitizations are done at run-time. */ + return_trace (likely (c->check_struct (this) && + machine.sanitize (c))); + } + + protected: + KernSubTableHeader header; + StateTable<Types, EntryData> machine; + NNOffsetTo<UnsizedArrayOf<FWORD>, HBUINT> kernAction; + public: + DEFINE_SIZE_STATIC (KernSubTableHeader::static_size + 5 * sizeof (HBUINT)); +}; + +template <typename KernSubTableHeader> +struct KerxSubTableFormat2 +{ + typedef typename KernSubTableHeader::Types Types; + typedef typename Types::HBUINT HBUINT; + + int get_kerning (hb_codepoint_t left, hb_codepoint_t right, + hb_aat_apply_context_t *c) const + { + unsigned int num_glyphs = c->sanitizer.get_num_glyphs (); + unsigned int l = (this+leftClassTable).get_class (left, num_glyphs, 0); + unsigned int r = (this+rightClassTable).get_class (right, num_glyphs, 0); + + const UnsizedArrayOf<FWORD> &arrayZ = this+array; + unsigned int kern_idx = l + r; + kern_idx = Types::offsetToIndex (kern_idx, this, arrayZ.arrayZ); + const FWORD *v = &arrayZ[kern_idx]; + if (unlikely (!v->sanitize (&c->sanitizer))) return 0; + + return kerxTupleKern (*v, header.tuple_count (), this, c); + } + + bool apply (hb_aat_apply_context_t *c) const + { + TRACE_APPLY (this); + + if (!c->plan->requested_kerning) + return false; + + if (header.coverage & header.Backwards) + return false; + + accelerator_t accel (*this, c); + hb_kern_machine_t<accelerator_t> machine (accel, header.coverage & header.CrossStream); + machine.kern (c->font, c->buffer, c->plan->kern_mask); + + return_trace (true); + } + + struct accelerator_t + { + const KerxSubTableFormat2 &table; + hb_aat_apply_context_t *c; + + accelerator_t (const KerxSubTableFormat2 &table_, + hb_aat_apply_context_t *c_) : + table (table_), c (c_) {} + + int get_kerning (hb_codepoint_t left, hb_codepoint_t right) const + { return table.get_kerning (left, right, c); } + }; + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (likely (c->check_struct (this) && + leftClassTable.sanitize (c, this) && + rightClassTable.sanitize (c, this) && + c->check_range (this, array))); + } + + protected: + KernSubTableHeader header; + HBUINT rowWidth; /* The width, in bytes, of a row in the table. */ + NNOffsetTo<typename Types::ClassTypeWide, HBUINT> + leftClassTable; /* Offset from beginning of this subtable to + * left-hand class table. */ + NNOffsetTo<typename Types::ClassTypeWide, HBUINT> + rightClassTable;/* Offset from beginning of this subtable to + * right-hand class table. */ + NNOffsetTo<UnsizedArrayOf<FWORD>, HBUINT> + array; /* Offset from beginning of this subtable to + * the start of the kerning array. */ + public: + DEFINE_SIZE_STATIC (KernSubTableHeader::static_size + 4 * sizeof (HBUINT)); +}; + +template <typename KernSubTableHeader> +struct KerxSubTableFormat4 +{ + typedef ExtendedTypes Types; + + struct EntryData + { + HBUINT16 ankrActionIndex;/* Either 0xFFFF (for no action) or the index of + * the action to perform. */ + public: + DEFINE_SIZE_STATIC (2); + }; + + struct driver_context_t + { + static constexpr bool in_place = true; + enum Flags + { + Mark = 0x8000, /* If set, remember this glyph as the marked glyph. */ + DontAdvance = 0x4000, /* If set, don't advance to the next glyph before + * going to the new state. */ + Reserved = 0x3FFF, /* Not used; set to 0. */ + }; + + enum SubTableFlags + { + ActionType = 0xC0000000, /* A two-bit field containing the action type. */ + Unused = 0x3F000000, /* Unused - must be zero. */ + Offset = 0x00FFFFFF, /* Masks the offset in bytes from the beginning + * of the subtable to the beginning of the control + * point table. */ + }; + + driver_context_t (const KerxSubTableFormat4 *table, + hb_aat_apply_context_t *c_) : + c (c_), + action_type ((table->flags & ActionType) >> 30), + ankrData ((HBUINT16 *) ((const char *) &table->machine + (table->flags & Offset))), + mark_set (false), + mark (0) {} + + bool is_actionable (StateTableDriver<Types, EntryData> *driver HB_UNUSED, + const Entry<EntryData> &entry) + { return entry.data.ankrActionIndex != 0xFFFF; } + void transition (StateTableDriver<Types, EntryData> *driver, + const Entry<EntryData> &entry) + { + hb_buffer_t *buffer = driver->buffer; + + if (mark_set && entry.data.ankrActionIndex != 0xFFFF && buffer->idx < buffer->len) + { + hb_glyph_position_t &o = buffer->cur_pos(); + switch (action_type) + { + case 0: /* Control Point Actions.*/ + { + /* Indexed into glyph outline. */ + /* Each action (record in ankrData) contains two 16-bit fields, so we must + double the ankrActionIndex to get the correct offset here. */ + const HBUINT16 *data = &ankrData[entry.data.ankrActionIndex * 2]; + if (!c->sanitizer.check_array (data, 2)) return; + unsigned int markControlPoint = *data++; + unsigned int currControlPoint = *data++; + hb_position_t markX = 0; + hb_position_t markY = 0; + hb_position_t currX = 0; + hb_position_t currY = 0; + if (!c->font->get_glyph_contour_point_for_origin (c->buffer->info[mark].codepoint, + markControlPoint, + HB_DIRECTION_LTR /*XXX*/, + &markX, &markY) || + !c->font->get_glyph_contour_point_for_origin (c->buffer->cur ().codepoint, + currControlPoint, + HB_DIRECTION_LTR /*XXX*/, + &currX, &currY)) + return; + + o.x_offset = markX - currX; + o.y_offset = markY - currY; + } + break; + + case 1: /* Anchor Point Actions. */ + { + /* Indexed into 'ankr' table. */ + /* Each action (record in ankrData) contains two 16-bit fields, so we must + double the ankrActionIndex to get the correct offset here. */ + const HBUINT16 *data = &ankrData[entry.data.ankrActionIndex * 2]; + if (!c->sanitizer.check_array (data, 2)) return; + unsigned int markAnchorPoint = *data++; + unsigned int currAnchorPoint = *data++; + const Anchor &markAnchor = c->ankr_table->get_anchor (c->buffer->info[mark].codepoint, + markAnchorPoint, + c->sanitizer.get_num_glyphs ()); + const Anchor &currAnchor = c->ankr_table->get_anchor (c->buffer->cur ().codepoint, + currAnchorPoint, + c->sanitizer.get_num_glyphs ()); + + o.x_offset = c->font->em_scale_x (markAnchor.xCoordinate) - c->font->em_scale_x (currAnchor.xCoordinate); + o.y_offset = c->font->em_scale_y (markAnchor.yCoordinate) - c->font->em_scale_y (currAnchor.yCoordinate); + } + break; + + case 2: /* Control Point Coordinate Actions. */ + { + /* Each action contains four 16-bit fields, so we multiply the ankrActionIndex + by 4 to get the correct offset for the given action. */ + const FWORD *data = (const FWORD *) &ankrData[entry.data.ankrActionIndex * 4]; + if (!c->sanitizer.check_array (data, 4)) return; + int markX = *data++; + int markY = *data++; + int currX = *data++; + int currY = *data++; + + o.x_offset = c->font->em_scale_x (markX) - c->font->em_scale_x (currX); + o.y_offset = c->font->em_scale_y (markY) - c->font->em_scale_y (currY); + } + break; + } + o.attach_type() = ATTACH_TYPE_MARK; + o.attach_chain() = (int) mark - (int) buffer->idx; + buffer->scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_GPOS_ATTACHMENT; + } + + if (entry.flags & Mark) + { + mark_set = true; + mark = buffer->idx; + } + } + + private: + hb_aat_apply_context_t *c; + unsigned int action_type; + const HBUINT16 *ankrData; + bool mark_set; + unsigned int mark; + }; + + bool apply (hb_aat_apply_context_t *c) const + { + TRACE_APPLY (this); + + driver_context_t dc (this, c); + + StateTableDriver<Types, EntryData> driver (machine, c->buffer, c->font->face); + driver.drive (&dc); + + return_trace (true); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + /* The rest of array sanitizations are done at run-time. */ + return_trace (likely (c->check_struct (this) && + machine.sanitize (c))); + } + + protected: + KernSubTableHeader header; + StateTable<Types, EntryData> machine; + HBUINT32 flags; + public: + DEFINE_SIZE_STATIC (KernSubTableHeader::static_size + 20); +}; + +template <typename KernSubTableHeader> +struct KerxSubTableFormat6 +{ + enum Flags + { + ValuesAreLong = 0x00000001, + }; + + bool is_long () const { return flags & ValuesAreLong; } + + int get_kerning (hb_codepoint_t left, hb_codepoint_t right, + hb_aat_apply_context_t *c) const + { + unsigned int num_glyphs = c->sanitizer.get_num_glyphs (); + if (is_long ()) + { + const typename U::Long &t = u.l; + unsigned int l = (this+t.rowIndexTable).get_value_or_null (left, num_glyphs); + unsigned int r = (this+t.columnIndexTable).get_value_or_null (right, num_glyphs); + unsigned int offset = l + r; + if (unlikely (offset < l)) return 0; /* Addition overflow. */ + if (unlikely (hb_unsigned_mul_overflows (offset, sizeof (FWORD32)))) return 0; + const FWORD32 *v = &StructAtOffset<FWORD32> (&(this+t.array), offset * sizeof (FWORD32)); + if (unlikely (!v->sanitize (&c->sanitizer))) return 0; + return kerxTupleKern (*v, header.tuple_count (), &(this+vector), c); + } + else + { + const typename U::Short &t = u.s; + unsigned int l = (this+t.rowIndexTable).get_value_or_null (left, num_glyphs); + unsigned int r = (this+t.columnIndexTable).get_value_or_null (right, num_glyphs); + unsigned int offset = l + r; + const FWORD *v = &StructAtOffset<FWORD> (&(this+t.array), offset * sizeof (FWORD)); + if (unlikely (!v->sanitize (&c->sanitizer))) return 0; + return kerxTupleKern (*v, header.tuple_count (), &(this+vector), c); + } + } + + bool apply (hb_aat_apply_context_t *c) const + { + TRACE_APPLY (this); + + if (!c->plan->requested_kerning) + return false; + + if (header.coverage & header.Backwards) + return false; + + accelerator_t accel (*this, c); + hb_kern_machine_t<accelerator_t> machine (accel, header.coverage & header.CrossStream); + machine.kern (c->font, c->buffer, c->plan->kern_mask); + + return_trace (true); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (likely (c->check_struct (this) && + (is_long () ? + ( + u.l.rowIndexTable.sanitize (c, this) && + u.l.columnIndexTable.sanitize (c, this) && + c->check_range (this, u.l.array) + ) : ( + u.s.rowIndexTable.sanitize (c, this) && + u.s.columnIndexTable.sanitize (c, this) && + c->check_range (this, u.s.array) + )) && + (header.tuple_count () == 0 || + c->check_range (this, vector)))); + } + + struct accelerator_t + { + const KerxSubTableFormat6 &table; + hb_aat_apply_context_t *c; + + accelerator_t (const KerxSubTableFormat6 &table_, + hb_aat_apply_context_t *c_) : + table (table_), c (c_) {} + + int get_kerning (hb_codepoint_t left, hb_codepoint_t right) const + { return table.get_kerning (left, right, c); } + }; + + protected: + KernSubTableHeader header; + HBUINT32 flags; + HBUINT16 rowCount; + HBUINT16 columnCount; + union U + { + struct Long + { + LNNOffsetTo<Lookup<HBUINT32>> rowIndexTable; + LNNOffsetTo<Lookup<HBUINT32>> columnIndexTable; + LNNOffsetTo<UnsizedArrayOf<FWORD32>> array; + } l; + struct Short + { + LNNOffsetTo<Lookup<HBUINT16>> rowIndexTable; + LNNOffsetTo<Lookup<HBUINT16>> columnIndexTable; + LNNOffsetTo<UnsizedArrayOf<FWORD>> array; + } s; + } u; + LNNOffsetTo<UnsizedArrayOf<FWORD>> vector; + public: + DEFINE_SIZE_STATIC (KernSubTableHeader::static_size + 24); +}; + + +struct KerxSubTableHeader +{ + typedef ExtendedTypes Types; + + unsigned tuple_count () const { return tupleCount; } + bool is_horizontal () const { return !(coverage & Vertical); } + + enum Coverage + { + Vertical = 0x80000000u, /* Set if table has vertical kerning values. */ + CrossStream = 0x40000000u, /* Set if table has cross-stream kerning values. */ + Variation = 0x20000000u, /* Set if table has variation kerning values. */ + Backwards = 0x10000000u, /* If clear, process the glyphs forwards, that + * is, from first to last in the glyph stream. + * If we, process them from last to first. + * This flag only applies to state-table based + * 'kerx' subtables (types 1 and 4). */ + Reserved = 0x0FFFFF00u, /* Reserved, set to zero. */ + SubtableType= 0x000000FFu, /* Subtable type. */ + }; + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (likely (c->check_struct (this))); + } + + public: + HBUINT32 length; + HBUINT32 coverage; + HBUINT32 tupleCount; + public: + DEFINE_SIZE_STATIC (12); +}; + +struct KerxSubTable +{ + friend struct kerx; + + unsigned int get_size () const { return u.header.length; } + unsigned int get_type () const { return u.header.coverage & u.header.SubtableType; } + + template <typename context_t, typename ...Ts> + typename context_t::return_t dispatch (context_t *c, Ts&&... ds) const + { + unsigned int subtable_type = get_type (); + TRACE_DISPATCH (this, subtable_type); + switch (subtable_type) { + case 0: return_trace (c->dispatch (u.format0, hb_forward<Ts> (ds)...)); + case 1: return_trace (c->dispatch (u.format1, hb_forward<Ts> (ds)...)); + case 2: return_trace (c->dispatch (u.format2, hb_forward<Ts> (ds)...)); + case 4: return_trace (c->dispatch (u.format4, hb_forward<Ts> (ds)...)); + case 6: return_trace (c->dispatch (u.format6, hb_forward<Ts> (ds)...)); + default: return_trace (c->default_return_value ()); + } + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + if (!u.header.sanitize (c) || + u.header.length <= u.header.static_size || + !c->check_range (this, u.header.length)) + return_trace (false); + + return_trace (dispatch (c)); + } + + public: + union { + KerxSubTableHeader header; + KerxSubTableFormat0<KerxSubTableHeader> format0; + KerxSubTableFormat1<KerxSubTableHeader> format1; + KerxSubTableFormat2<KerxSubTableHeader> format2; + KerxSubTableFormat4<KerxSubTableHeader> format4; + KerxSubTableFormat6<KerxSubTableHeader> format6; + } u; + public: + DEFINE_SIZE_MIN (12); +}; + + +/* + * The 'kerx' Table + */ + +template <typename T> +struct KerxTable +{ + /* https://en.wikipedia.org/wiki/Curiously_recurring_template_pattern */ + const T* thiz () const { return static_cast<const T *> (this); } + + bool has_state_machine () const + { + typedef typename T::SubTable SubTable; + + const SubTable *st = &thiz()->firstSubTable; + unsigned int count = thiz()->tableCount; + for (unsigned int i = 0; i < count; i++) + { + if (st->get_type () == 1) + return true; + st = &StructAfter<SubTable> (*st); + } + return false; + } + + bool has_cross_stream () const + { + typedef typename T::SubTable SubTable; + + const SubTable *st = &thiz()->firstSubTable; + unsigned int count = thiz()->tableCount; + for (unsigned int i = 0; i < count; i++) + { + if (st->u.header.coverage & st->u.header.CrossStream) + return true; + st = &StructAfter<SubTable> (*st); + } + return false; + } + + int get_h_kerning (hb_codepoint_t left, hb_codepoint_t right) const + { + typedef typename T::SubTable SubTable; + + int v = 0; + const SubTable *st = &thiz()->firstSubTable; + unsigned int count = thiz()->tableCount; + for (unsigned int i = 0; i < count; i++) + { + if ((st->u.header.coverage & (st->u.header.Variation | st->u.header.CrossStream)) || + !st->u.header.is_horizontal ()) + continue; + v += st->get_kerning (left, right); + st = &StructAfter<SubTable> (*st); + } + return v; + } + + bool apply (AAT::hb_aat_apply_context_t *c) const + { + typedef typename T::SubTable SubTable; + + bool ret = false; + bool seenCrossStream = false; + c->set_lookup_index (0); + const SubTable *st = &thiz()->firstSubTable; + unsigned int count = thiz()->tableCount; + for (unsigned int i = 0; i < count; i++) + { + bool reverse; + + if (!T::Types::extended && (st->u.header.coverage & st->u.header.Variation)) + goto skip; + + if (HB_DIRECTION_IS_HORIZONTAL (c->buffer->props.direction) != st->u.header.is_horizontal ()) + goto skip; + + reverse = bool (st->u.header.coverage & st->u.header.Backwards) != + HB_DIRECTION_IS_BACKWARD (c->buffer->props.direction); + + if (!c->buffer->message (c->font, "start subtable %d", c->lookup_index)) + goto skip; + + if (!seenCrossStream && + (st->u.header.coverage & st->u.header.CrossStream)) + { + /* Attach all glyphs into a chain. */ + seenCrossStream = true; + hb_glyph_position_t *pos = c->buffer->pos; + unsigned int count = c->buffer->len; + for (unsigned int i = 0; i < count; i++) + { + pos[i].attach_type() = ATTACH_TYPE_CURSIVE; + pos[i].attach_chain() = HB_DIRECTION_IS_FORWARD (c->buffer->props.direction) ? -1 : +1; + /* We intentionally don't set HB_BUFFER_SCRATCH_FLAG_HAS_GPOS_ATTACHMENT, + * since there needs to be a non-zero attachment for post-positioning to + * be needed. */ + } + } + + if (reverse) + c->buffer->reverse (); + + { + /* See comment in sanitize() for conditional here. */ + hb_sanitize_with_object_t with (&c->sanitizer, i < count - 1 ? st : (const SubTable *) nullptr); + ret |= st->dispatch (c); + } + + if (reverse) + c->buffer->reverse (); + + (void) c->buffer->message (c->font, "end subtable %d", c->lookup_index); + + skip: + st = &StructAfter<SubTable> (*st); + c->set_lookup_index (c->lookup_index + 1); + } + + return ret; + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + if (unlikely (!thiz()->version.sanitize (c) || + (unsigned) thiz()->version < (unsigned) T::minVersion || + !thiz()->tableCount.sanitize (c))) + return_trace (false); + + typedef typename T::SubTable SubTable; + + const SubTable *st = &thiz()->firstSubTable; + unsigned int count = thiz()->tableCount; + for (unsigned int i = 0; i < count; i++) + { + if (unlikely (!st->u.header.sanitize (c))) + return_trace (false); + /* OpenType kern table has 2-byte subtable lengths. That's limiting. + * MS implementation also only supports one subtable, of format 0, + * anyway. Certain versions of some fonts, like Calibry, contain + * kern subtable that exceeds 64kb. Looks like, the subtable length + * is simply ignored. Which makes sense. It's only needed if you + * have multiple subtables. To handle such fonts, we just ignore + * the length for the last subtable. */ + hb_sanitize_with_object_t with (c, i < count - 1 ? st : (const SubTable *) nullptr); + + if (unlikely (!st->sanitize (c))) + return_trace (false); + + st = &StructAfter<SubTable> (*st); + } + + return_trace (true); + } +}; + +struct kerx : KerxTable<kerx> +{ + friend struct KerxTable<kerx>; + + static constexpr hb_tag_t tableTag = HB_AAT_TAG_kerx; + static constexpr unsigned minVersion = 2u; + + typedef KerxSubTableHeader SubTableHeader; + typedef SubTableHeader::Types Types; + typedef KerxSubTable SubTable; + + bool has_data () const { return version; } + + protected: + HBUINT16 version; /* The version number of the extended kerning table + * (currently 2, 3, or 4). */ + HBUINT16 unused; /* Set to 0. */ + HBUINT32 tableCount; /* The number of subtables included in the extended kerning + * table. */ + SubTable firstSubTable; /* Subtables. */ +/*subtableGlyphCoverageArray*/ /* Only if version >= 3. We don't use. */ + + public: + DEFINE_SIZE_MIN (8); +}; + + +} /* namespace AAT */ + + +#endif /* HB_AAT_LAYOUT_KERX_TABLE_HH */ diff --git a/thirdparty/harfbuzz/src/hb-aat-layout-morx-table.hh b/thirdparty/harfbuzz/src/hb-aat-layout-morx-table.hh new file mode 100644 index 0000000000..04027a61be --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-aat-layout-morx-table.hh @@ -0,0 +1,1157 @@ +/* + * Copyright © 2017 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_AAT_LAYOUT_MORX_TABLE_HH +#define HB_AAT_LAYOUT_MORX_TABLE_HH + +#include "hb-open-type.hh" +#include "hb-aat-layout-common.hh" +#include "hb-ot-layout-common.hh" +#include "hb-aat-map.hh" + +/* + * morx -- Extended Glyph Metamorphosis + * https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6morx.html + * https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6mort.html + */ +#define HB_AAT_TAG_morx HB_TAG('m','o','r','x') +#define HB_AAT_TAG_mort HB_TAG('m','o','r','t') + + +namespace AAT { + +using namespace OT; + +template <typename Types> +struct RearrangementSubtable +{ + typedef typename Types::HBUINT HBUINT; + + typedef void EntryData; + + struct driver_context_t + { + static constexpr bool in_place = true; + enum Flags + { + MarkFirst = 0x8000, /* If set, make the current glyph the first + * glyph to be rearranged. */ + DontAdvance = 0x4000, /* If set, don't advance to the next glyph + * before going to the new state. This means + * that the glyph index doesn't change, even + * if the glyph at that index has changed. */ + MarkLast = 0x2000, /* If set, make the current glyph the last + * glyph to be rearranged. */ + Reserved = 0x1FF0, /* These bits are reserved and should be set to 0. */ + Verb = 0x000F, /* The type of rearrangement specified. */ + }; + + driver_context_t (const RearrangementSubtable *table HB_UNUSED) : + ret (false), + start (0), end (0) {} + + bool is_actionable (StateTableDriver<Types, EntryData> *driver HB_UNUSED, + const Entry<EntryData> &entry) + { + return (entry.flags & Verb) && start < end; + } + void transition (StateTableDriver<Types, EntryData> *driver, + const Entry<EntryData> &entry) + { + hb_buffer_t *buffer = driver->buffer; + unsigned int flags = entry.flags; + + if (flags & MarkFirst) + start = buffer->idx; + + if (flags & MarkLast) + end = hb_min (buffer->idx + 1, buffer->len); + + if ((flags & Verb) && start < end) + { + /* The following map has two nibbles, for start-side + * and end-side. Values of 0,1,2 mean move that many + * to the other side. Value of 3 means move 2 and + * flip them. */ + const unsigned char map[16] = + { + 0x00, /* 0 no change */ + 0x10, /* 1 Ax => xA */ + 0x01, /* 2 xD => Dx */ + 0x11, /* 3 AxD => DxA */ + 0x20, /* 4 ABx => xAB */ + 0x30, /* 5 ABx => xBA */ + 0x02, /* 6 xCD => CDx */ + 0x03, /* 7 xCD => DCx */ + 0x12, /* 8 AxCD => CDxA */ + 0x13, /* 9 AxCD => DCxA */ + 0x21, /* 10 ABxD => DxAB */ + 0x31, /* 11 ABxD => DxBA */ + 0x22, /* 12 ABxCD => CDxAB */ + 0x32, /* 13 ABxCD => CDxBA */ + 0x23, /* 14 ABxCD => DCxAB */ + 0x33, /* 15 ABxCD => DCxBA */ + }; + + unsigned int m = map[flags & Verb]; + unsigned int l = hb_min (2u, m >> 4); + unsigned int r = hb_min (2u, m & 0x0F); + bool reverse_l = 3 == (m >> 4); + bool reverse_r = 3 == (m & 0x0F); + + if (end - start >= l + r) + { + buffer->merge_clusters (start, hb_min (buffer->idx + 1, buffer->len)); + buffer->merge_clusters (start, end); + + hb_glyph_info_t *info = buffer->info; + hb_glyph_info_t buf[4]; + + memcpy (buf, info + start, l * sizeof (buf[0])); + memcpy (buf + 2, info + end - r, r * sizeof (buf[0])); + + if (l != r) + memmove (info + start + r, info + start + l, (end - start - l - r) * sizeof (buf[0])); + + memcpy (info + start, buf + 2, r * sizeof (buf[0])); + memcpy (info + end - l, buf, l * sizeof (buf[0])); + if (reverse_l) + { + buf[0] = info[end - 1]; + info[end - 1] = info[end - 2]; + info[end - 2] = buf[0]; + } + if (reverse_r) + { + buf[0] = info[start]; + info[start] = info[start + 1]; + info[start + 1] = buf[0]; + } + } + } + } + + public: + bool ret; + private: + unsigned int start; + unsigned int end; + }; + + bool apply (hb_aat_apply_context_t *c) const + { + TRACE_APPLY (this); + + driver_context_t dc (this); + + StateTableDriver<Types, EntryData> driver (machine, c->buffer, c->face); + driver.drive (&dc); + + return_trace (dc.ret); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (machine.sanitize (c)); + } + + protected: + StateTable<Types, EntryData> machine; + public: + DEFINE_SIZE_STATIC (16); +}; + +template <typename Types> +struct ContextualSubtable +{ + typedef typename Types::HBUINT HBUINT; + + struct EntryData + { + HBUINT16 markIndex; /* Index of the substitution table for the + * marked glyph (use 0xFFFF for none). */ + HBUINT16 currentIndex; /* Index of the substitution table for the + * current glyph (use 0xFFFF for none). */ + public: + DEFINE_SIZE_STATIC (4); + }; + + struct driver_context_t + { + static constexpr bool in_place = true; + enum Flags + { + SetMark = 0x8000, /* If set, make the current glyph the marked glyph. */ + DontAdvance = 0x4000, /* If set, don't advance to the next glyph before + * going to the new state. */ + Reserved = 0x3FFF, /* These bits are reserved and should be set to 0. */ + }; + + driver_context_t (const ContextualSubtable *table_, + hb_aat_apply_context_t *c_) : + ret (false), + c (c_), + mark_set (false), + mark (0), + table (table_), + subs (table+table->substitutionTables) {} + + bool is_actionable (StateTableDriver<Types, EntryData> *driver, + const Entry<EntryData> &entry) + { + hb_buffer_t *buffer = driver->buffer; + + if (buffer->idx == buffer->len && !mark_set) + return false; + + return entry.data.markIndex != 0xFFFF || entry.data.currentIndex != 0xFFFF; + } + void transition (StateTableDriver<Types, EntryData> *driver, + const Entry<EntryData> &entry) + { + hb_buffer_t *buffer = driver->buffer; + + /* Looks like CoreText applies neither mark nor current substitution for + * end-of-text if mark was not explicitly set. */ + if (buffer->idx == buffer->len && !mark_set) + return; + + const HBGlyphID *replacement; + + replacement = nullptr; + if (Types::extended) + { + if (entry.data.markIndex != 0xFFFF) + { + const Lookup<HBGlyphID> &lookup = subs[entry.data.markIndex]; + replacement = lookup.get_value (buffer->info[mark].codepoint, driver->num_glyphs); + } + } + else + { + unsigned int offset = entry.data.markIndex + buffer->info[mark].codepoint; + const UnsizedArrayOf<HBGlyphID> &subs_old = (const UnsizedArrayOf<HBGlyphID> &) subs; + replacement = &subs_old[Types::wordOffsetToIndex (offset, table, subs_old.arrayZ)]; + if (!replacement->sanitize (&c->sanitizer) || !*replacement) + replacement = nullptr; + } + if (replacement) + { + buffer->unsafe_to_break (mark, hb_min (buffer->idx + 1, buffer->len)); + buffer->info[mark].codepoint = *replacement; + ret = true; + } + + replacement = nullptr; + unsigned int idx = hb_min (buffer->idx, buffer->len - 1); + if (Types::extended) + { + if (entry.data.currentIndex != 0xFFFF) + { + const Lookup<HBGlyphID> &lookup = subs[entry.data.currentIndex]; + replacement = lookup.get_value (buffer->info[idx].codepoint, driver->num_glyphs); + } + } + else + { + unsigned int offset = entry.data.currentIndex + buffer->info[idx].codepoint; + const UnsizedArrayOf<HBGlyphID> &subs_old = (const UnsizedArrayOf<HBGlyphID> &) subs; + replacement = &subs_old[Types::wordOffsetToIndex (offset, table, subs_old.arrayZ)]; + if (!replacement->sanitize (&c->sanitizer) || !*replacement) + replacement = nullptr; + } + if (replacement) + { + buffer->info[idx].codepoint = *replacement; + ret = true; + } + + if (entry.flags & SetMark) + { + mark_set = true; + mark = buffer->idx; + } + } + + public: + bool ret; + private: + hb_aat_apply_context_t *c; + bool mark_set; + unsigned int mark; + const ContextualSubtable *table; + const UnsizedOffsetListOf<Lookup<HBGlyphID>, HBUINT, false> &subs; + }; + + bool apply (hb_aat_apply_context_t *c) const + { + TRACE_APPLY (this); + + driver_context_t dc (this, c); + + StateTableDriver<Types, EntryData> driver (machine, c->buffer, c->face); + driver.drive (&dc); + + return_trace (dc.ret); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + + unsigned int num_entries = 0; + if (unlikely (!machine.sanitize (c, &num_entries))) return_trace (false); + + if (!Types::extended) + return_trace (substitutionTables.sanitize (c, this, 0)); + + unsigned int num_lookups = 0; + + const Entry<EntryData> *entries = machine.get_entries (); + for (unsigned int i = 0; i < num_entries; i++) + { + const EntryData &data = entries[i].data; + + if (data.markIndex != 0xFFFF) + num_lookups = hb_max (num_lookups, 1 + data.markIndex); + if (data.currentIndex != 0xFFFF) + num_lookups = hb_max (num_lookups, 1 + data.currentIndex); + } + + return_trace (substitutionTables.sanitize (c, this, num_lookups)); + } + + protected: + StateTable<Types, EntryData> + machine; + NNOffsetTo<UnsizedOffsetListOf<Lookup<HBGlyphID>, HBUINT, false>, HBUINT> + substitutionTables; + public: + DEFINE_SIZE_STATIC (20); +}; + + +template <bool extended> +struct LigatureEntry; + +template <> +struct LigatureEntry<true> +{ + enum Flags + { + SetComponent = 0x8000, /* Push this glyph onto the component stack for + * eventual processing. */ + DontAdvance = 0x4000, /* Leave the glyph pointer at this glyph for the + next iteration. */ + PerformAction = 0x2000, /* Use the ligActionIndex to process a ligature + * group. */ + Reserved = 0x1FFF, /* These bits are reserved and should be set to 0. */ + }; + + struct EntryData + { + HBUINT16 ligActionIndex; /* Index to the first ligActionTable entry + * for processing this group, if indicated + * by the flags. */ + public: + DEFINE_SIZE_STATIC (2); + }; + + static bool performAction (const Entry<EntryData> &entry) + { return entry.flags & PerformAction; } + + static unsigned int ligActionIndex (const Entry<EntryData> &entry) + { return entry.data.ligActionIndex; } +}; +template <> +struct LigatureEntry<false> +{ + enum Flags + { + SetComponent = 0x8000, /* Push this glyph onto the component stack for + * eventual processing. */ + DontAdvance = 0x4000, /* Leave the glyph pointer at this glyph for the + next iteration. */ + Offset = 0x3FFF, /* Byte offset from beginning of subtable to the + * ligature action list. This value must be a + * multiple of 4. */ + }; + + typedef void EntryData; + + static bool performAction (const Entry<EntryData> &entry) + { return entry.flags & Offset; } + + static unsigned int ligActionIndex (const Entry<EntryData> &entry) + { return entry.flags & Offset; } +}; + + +template <typename Types> +struct LigatureSubtable +{ + typedef typename Types::HBUINT HBUINT; + + typedef LigatureEntry<Types::extended> LigatureEntryT; + typedef typename LigatureEntryT::EntryData EntryData; + + struct driver_context_t + { + static constexpr bool in_place = false; + enum + { + DontAdvance = LigatureEntryT::DontAdvance, + }; + enum LigActionFlags + { + LigActionLast = 0x80000000, /* This is the last action in the list. This also + * implies storage. */ + LigActionStore = 0x40000000, /* Store the ligature at the current cumulated index + * in the ligature table in place of the marked + * (i.e. currently-popped) glyph. */ + LigActionOffset = 0x3FFFFFFF, /* A 30-bit value which is sign-extended to 32-bits + * and added to the glyph ID, resulting in an index + * into the component table. */ + }; + + driver_context_t (const LigatureSubtable *table_, + hb_aat_apply_context_t *c_) : + ret (false), + c (c_), + table (table_), + ligAction (table+table->ligAction), + component (table+table->component), + ligature (table+table->ligature), + match_length (0) {} + + bool is_actionable (StateTableDriver<Types, EntryData> *driver HB_UNUSED, + const Entry<EntryData> &entry) + { + return LigatureEntryT::performAction (entry); + } + void transition (StateTableDriver<Types, EntryData> *driver, + const Entry<EntryData> &entry) + { + hb_buffer_t *buffer = driver->buffer; + + DEBUG_MSG (APPLY, nullptr, "Ligature transition at %u", buffer->idx); + if (entry.flags & LigatureEntryT::SetComponent) + { + /* Never mark same index twice, in case DontAdvance was used... */ + if (match_length && match_positions[(match_length - 1u) % ARRAY_LENGTH (match_positions)] == buffer->out_len) + match_length--; + + match_positions[match_length++ % ARRAY_LENGTH (match_positions)] = buffer->out_len; + DEBUG_MSG (APPLY, nullptr, "Set component at %u", buffer->out_len); + } + + if (LigatureEntryT::performAction (entry)) + { + DEBUG_MSG (APPLY, nullptr, "Perform action with %u", match_length); + unsigned int end = buffer->out_len; + + if (unlikely (!match_length)) + return; + + if (buffer->idx >= buffer->len) + return; /* TODO Work on previous instead? */ + + unsigned int cursor = match_length; + + unsigned int action_idx = LigatureEntryT::ligActionIndex (entry); + action_idx = Types::offsetToIndex (action_idx, table, ligAction.arrayZ); + const HBUINT32 *actionData = &ligAction[action_idx]; + + unsigned int ligature_idx = 0; + unsigned int action; + do + { + if (unlikely (!cursor)) + { + /* Stack underflow. Clear the stack. */ + DEBUG_MSG (APPLY, nullptr, "Stack underflow"); + match_length = 0; + break; + } + + DEBUG_MSG (APPLY, nullptr, "Moving to stack position %u", cursor - 1); + buffer->move_to (match_positions[--cursor % ARRAY_LENGTH (match_positions)]); + + if (unlikely (!actionData->sanitize (&c->sanitizer))) break; + action = *actionData; + + uint32_t uoffset = action & LigActionOffset; + if (uoffset & 0x20000000) + uoffset |= 0xC0000000; /* Sign-extend. */ + int32_t offset = (int32_t) uoffset; + unsigned int component_idx = buffer->cur().codepoint + offset; + component_idx = Types::wordOffsetToIndex (component_idx, table, component.arrayZ); + const HBUINT16 &componentData = component[component_idx]; + if (unlikely (!componentData.sanitize (&c->sanitizer))) break; + ligature_idx += componentData; + + DEBUG_MSG (APPLY, nullptr, "Action store %u last %u", + bool (action & LigActionStore), + bool (action & LigActionLast)); + if (action & (LigActionStore | LigActionLast)) + { + ligature_idx = Types::offsetToIndex (ligature_idx, table, ligature.arrayZ); + const HBGlyphID &ligatureData = ligature[ligature_idx]; + if (unlikely (!ligatureData.sanitize (&c->sanitizer))) break; + hb_codepoint_t lig = ligatureData; + + DEBUG_MSG (APPLY, nullptr, "Produced ligature %u", lig); + buffer->replace_glyph (lig); + + unsigned int lig_end = match_positions[(match_length - 1u) % ARRAY_LENGTH (match_positions)] + 1u; + /* Now go and delete all subsequent components. */ + while (match_length - 1u > cursor) + { + DEBUG_MSG (APPLY, nullptr, "Skipping ligature component"); + buffer->move_to (match_positions[--match_length % ARRAY_LENGTH (match_positions)]); + buffer->replace_glyph (DELETED_GLYPH); + } + + buffer->move_to (lig_end); + buffer->merge_out_clusters (match_positions[cursor % ARRAY_LENGTH (match_positions)], buffer->out_len); + } + + actionData++; + } + while (!(action & LigActionLast)); + buffer->move_to (end); + } + } + + public: + bool ret; + private: + hb_aat_apply_context_t *c; + const LigatureSubtable *table; + const UnsizedArrayOf<HBUINT32> &ligAction; + const UnsizedArrayOf<HBUINT16> &component; + const UnsizedArrayOf<HBGlyphID> &ligature; + unsigned int match_length; + unsigned int match_positions[HB_MAX_CONTEXT_LENGTH]; + }; + + bool apply (hb_aat_apply_context_t *c) const + { + TRACE_APPLY (this); + + driver_context_t dc (this, c); + + StateTableDriver<Types, EntryData> driver (machine, c->buffer, c->face); + driver.drive (&dc); + + return_trace (dc.ret); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + /* The rest of array sanitizations are done at run-time. */ + return_trace (c->check_struct (this) && machine.sanitize (c) && + ligAction && component && ligature); + } + + protected: + StateTable<Types, EntryData> + machine; + NNOffsetTo<UnsizedArrayOf<HBUINT32>, HBUINT> + ligAction; /* Offset to the ligature action table. */ + NNOffsetTo<UnsizedArrayOf<HBUINT16>, HBUINT> + component; /* Offset to the component table. */ + NNOffsetTo<UnsizedArrayOf<HBGlyphID>, HBUINT> + ligature; /* Offset to the actual ligature lists. */ + public: + DEFINE_SIZE_STATIC (28); +}; + +template <typename Types> +struct NoncontextualSubtable +{ + bool apply (hb_aat_apply_context_t *c) const + { + TRACE_APPLY (this); + + bool ret = false; + unsigned int num_glyphs = c->face->get_num_glyphs (); + + hb_glyph_info_t *info = c->buffer->info; + unsigned int count = c->buffer->len; + for (unsigned int i = 0; i < count; i++) + { + const HBGlyphID *replacement = substitute.get_value (info[i].codepoint, num_glyphs); + if (replacement) + { + info[i].codepoint = *replacement; + ret = true; + } + } + + return_trace (ret); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (substitute.sanitize (c)); + } + + protected: + Lookup<HBGlyphID> substitute; + public: + DEFINE_SIZE_MIN (2); +}; + +template <typename Types> +struct InsertionSubtable +{ + typedef typename Types::HBUINT HBUINT; + + struct EntryData + { + HBUINT16 currentInsertIndex; /* Zero-based index into the insertion glyph table. + * The number of glyphs to be inserted is contained + * in the currentInsertCount field in the flags. + * A value of 0xFFFF indicates no insertion is to + * be done. */ + HBUINT16 markedInsertIndex; /* Zero-based index into the insertion glyph table. + * The number of glyphs to be inserted is contained + * in the markedInsertCount field in the flags. + * A value of 0xFFFF indicates no insertion is to + * be done. */ + public: + DEFINE_SIZE_STATIC (4); + }; + + struct driver_context_t + { + static constexpr bool in_place = false; + enum Flags + { + SetMark = 0x8000, /* If set, mark the current glyph. */ + DontAdvance = 0x4000, /* If set, don't advance to the next glyph before + * going to the new state. This does not mean + * that the glyph pointed to is the same one as + * before. If you've made insertions immediately + * downstream of the current glyph, the next glyph + * processed would in fact be the first one + * inserted. */ + CurrentIsKashidaLike= 0x2000, /* If set, and the currentInsertList is nonzero, + * then the specified glyph list will be inserted + * as a kashida-like insertion, either before or + * after the current glyph (depending on the state + * of the currentInsertBefore flag). If clear, and + * the currentInsertList is nonzero, then the + * specified glyph list will be inserted as a + * split-vowel-like insertion, either before or + * after the current glyph (depending on the state + * of the currentInsertBefore flag). */ + MarkedIsKashidaLike= 0x1000, /* If set, and the markedInsertList is nonzero, + * then the specified glyph list will be inserted + * as a kashida-like insertion, either before or + * after the marked glyph (depending on the state + * of the markedInsertBefore flag). If clear, and + * the markedInsertList is nonzero, then the + * specified glyph list will be inserted as a + * split-vowel-like insertion, either before or + * after the marked glyph (depending on the state + * of the markedInsertBefore flag). */ + CurrentInsertBefore= 0x0800, /* If set, specifies that insertions are to be made + * to the left of the current glyph. If clear, + * they're made to the right of the current glyph. */ + MarkedInsertBefore= 0x0400, /* If set, specifies that insertions are to be + * made to the left of the marked glyph. If clear, + * they're made to the right of the marked glyph. */ + CurrentInsertCount= 0x3E0, /* This 5-bit field is treated as a count of the + * number of glyphs to insert at the current + * position. Since zero means no insertions, the + * largest number of insertions at any given + * current location is 31 glyphs. */ + MarkedInsertCount= 0x001F, /* This 5-bit field is treated as a count of the + * number of glyphs to insert at the marked + * position. Since zero means no insertions, the + * largest number of insertions at any given + * marked location is 31 glyphs. */ + }; + + driver_context_t (const InsertionSubtable *table, + hb_aat_apply_context_t *c_) : + ret (false), + c (c_), + mark (0), + insertionAction (table+table->insertionAction) {} + + bool is_actionable (StateTableDriver<Types, EntryData> *driver HB_UNUSED, + const Entry<EntryData> &entry) + { + return (entry.flags & (CurrentInsertCount | MarkedInsertCount)) && + (entry.data.currentInsertIndex != 0xFFFF ||entry.data.markedInsertIndex != 0xFFFF); + } + void transition (StateTableDriver<Types, EntryData> *driver, + const Entry<EntryData> &entry) + { + hb_buffer_t *buffer = driver->buffer; + unsigned int flags = entry.flags; + + unsigned mark_loc = buffer->out_len; + + if (entry.data.markedInsertIndex != 0xFFFF) + { + unsigned int count = (flags & MarkedInsertCount); + if (unlikely ((buffer->max_ops -= count) <= 0)) return; + unsigned int start = entry.data.markedInsertIndex; + const HBGlyphID *glyphs = &insertionAction[start]; + if (unlikely (!c->sanitizer.check_array (glyphs, count))) count = 0; + + bool before = flags & MarkedInsertBefore; + + unsigned int end = buffer->out_len; + buffer->move_to (mark); + + if (buffer->idx < buffer->len && !before) + buffer->copy_glyph (); + /* TODO We ignore KashidaLike setting. */ + for (unsigned int i = 0; i < count; i++) + buffer->output_glyph (glyphs[i]); + if (buffer->idx < buffer->len && !before) + buffer->skip_glyph (); + + buffer->move_to (end + count); + + buffer->unsafe_to_break_from_outbuffer (mark, hb_min (buffer->idx + 1, buffer->len)); + } + + if (flags & SetMark) + mark = mark_loc; + + if (entry.data.currentInsertIndex != 0xFFFF) + { + unsigned int count = (flags & CurrentInsertCount) >> 5; + if (unlikely ((buffer->max_ops -= count) <= 0)) return; + unsigned int start = entry.data.currentInsertIndex; + const HBGlyphID *glyphs = &insertionAction[start]; + if (unlikely (!c->sanitizer.check_array (glyphs, count))) count = 0; + + bool before = flags & CurrentInsertBefore; + + unsigned int end = buffer->out_len; + + if (buffer->idx < buffer->len && !before) + buffer->copy_glyph (); + /* TODO We ignore KashidaLike setting. */ + for (unsigned int i = 0; i < count; i++) + buffer->output_glyph (glyphs[i]); + if (buffer->idx < buffer->len && !before) + buffer->skip_glyph (); + + /* Humm. Not sure where to move to. There's this wording under + * DontAdvance flag: + * + * "If set, don't update the glyph index before going to the new state. + * This does not mean that the glyph pointed to is the same one as + * before. If you've made insertions immediately downstream of the + * current glyph, the next glyph processed would in fact be the first + * one inserted." + * + * This suggests that if DontAdvance is NOT set, we should move to + * end+count. If it *was*, then move to end, such that newly inserted + * glyphs are now visible. + * + * https://github.com/harfbuzz/harfbuzz/issues/1224#issuecomment-427691417 + */ + buffer->move_to ((flags & DontAdvance) ? end : end + count); + } + } + + public: + bool ret; + private: + hb_aat_apply_context_t *c; + unsigned int mark; + const UnsizedArrayOf<HBGlyphID> &insertionAction; + }; + + bool apply (hb_aat_apply_context_t *c) const + { + TRACE_APPLY (this); + + driver_context_t dc (this, c); + + StateTableDriver<Types, EntryData> driver (machine, c->buffer, c->face); + driver.drive (&dc); + + return_trace (dc.ret); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + /* The rest of array sanitizations are done at run-time. */ + return_trace (c->check_struct (this) && machine.sanitize (c) && + insertionAction); + } + + protected: + StateTable<Types, EntryData> + machine; + NNOffsetTo<UnsizedArrayOf<HBGlyphID>, HBUINT> + insertionAction; /* Byte offset from stateHeader to the start of + * the insertion glyph table. */ + public: + DEFINE_SIZE_STATIC (20); +}; + + +struct Feature +{ + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this)); + } + + public: + HBUINT16 featureType; /* The type of feature. */ + HBUINT16 featureSetting; /* The feature's setting (aka selector). */ + HBUINT32 enableFlags; /* Flags for the settings that this feature + * and setting enables. */ + HBUINT32 disableFlags; /* Complement of flags for the settings that this + * feature and setting disable. */ + + public: + DEFINE_SIZE_STATIC (12); +}; + +template <typename Types> +struct ChainSubtable +{ + typedef typename Types::HBUINT HBUINT; + + template <typename T> + friend struct Chain; + + unsigned int get_size () const { return length; } + unsigned int get_type () const { return coverage & 0xFF; } + unsigned int get_coverage () const { return coverage >> (sizeof (HBUINT) * 8 - 8); } + + enum Coverage + { + Vertical = 0x80, /* If set, this subtable will only be applied + * to vertical text. If clear, this subtable + * will only be applied to horizontal text. */ + Backwards = 0x40, /* If set, this subtable will process glyphs + * in descending order. If clear, it will + * process the glyphs in ascending order. */ + AllDirections = 0x20, /* If set, this subtable will be applied to + * both horizontal and vertical text (i.e. + * the state of bit 0x80000000 is ignored). */ + Logical = 0x10, /* If set, this subtable will process glyphs + * in logical order (or reverse logical order, + * depending on the value of bit 0x80000000). */ + }; + enum Type + { + Rearrangement = 0, + Contextual = 1, + Ligature = 2, + Noncontextual = 4, + Insertion = 5 + }; + + template <typename context_t, typename ...Ts> + typename context_t::return_t dispatch (context_t *c, Ts&&... ds) const + { + unsigned int subtable_type = get_type (); + TRACE_DISPATCH (this, subtable_type); + switch (subtable_type) { + case Rearrangement: return_trace (c->dispatch (u.rearrangement, hb_forward<Ts> (ds)...)); + case Contextual: return_trace (c->dispatch (u.contextual, hb_forward<Ts> (ds)...)); + case Ligature: return_trace (c->dispatch (u.ligature, hb_forward<Ts> (ds)...)); + case Noncontextual: return_trace (c->dispatch (u.noncontextual, hb_forward<Ts> (ds)...)); + case Insertion: return_trace (c->dispatch (u.insertion, hb_forward<Ts> (ds)...)); + default: return_trace (c->default_return_value ()); + } + } + + bool apply (hb_aat_apply_context_t *c) const + { + TRACE_APPLY (this); + hb_sanitize_with_object_t with (&c->sanitizer, this); + return_trace (dispatch (c)); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + if (!length.sanitize (c) || + length <= min_size || + !c->check_range (this, length)) + return_trace (false); + + hb_sanitize_with_object_t with (c, this); + return_trace (dispatch (c)); + } + + protected: + HBUINT length; /* Total subtable length, including this header. */ + HBUINT coverage; /* Coverage flags and subtable type. */ + HBUINT32 subFeatureFlags;/* The 32-bit mask identifying which subtable this is. */ + union { + RearrangementSubtable<Types> rearrangement; + ContextualSubtable<Types> contextual; + LigatureSubtable<Types> ligature; + NoncontextualSubtable<Types> noncontextual; + InsertionSubtable<Types> insertion; + } u; + public: + DEFINE_SIZE_MIN (2 * sizeof (HBUINT) + 4); +}; + +template <typename Types> +struct Chain +{ + typedef typename Types::HBUINT HBUINT; + + hb_mask_t compile_flags (const hb_aat_map_builder_t *map) const + { + hb_mask_t flags = defaultFlags; + { + unsigned int count = featureCount; + for (unsigned i = 0; i < count; i++) + { + const Feature &feature = featureZ[i]; + hb_aat_layout_feature_type_t type = (hb_aat_layout_feature_type_t) (unsigned int) feature.featureType; + hb_aat_layout_feature_selector_t setting = (hb_aat_layout_feature_selector_t) (unsigned int) feature.featureSetting; + retry: + // Check whether this type/setting pair was requested in the map, and if so, apply its flags. + // (The search here only looks at the type and setting fields of feature_info_t.) + hb_aat_map_builder_t::feature_info_t info = { type, setting, false, 0 }; + if (map->features.bsearch (info)) + { + flags &= feature.disableFlags; + flags |= feature.enableFlags; + } + else if (type == HB_AAT_LAYOUT_FEATURE_TYPE_LETTER_CASE && setting == HB_AAT_LAYOUT_FEATURE_SELECTOR_SMALL_CAPS) + { + /* Deprecated. https://github.com/harfbuzz/harfbuzz/issues/1342 */ + type = HB_AAT_LAYOUT_FEATURE_TYPE_LOWER_CASE; + setting = HB_AAT_LAYOUT_FEATURE_SELECTOR_LOWER_CASE_SMALL_CAPS; + goto retry; + } + } + } + return flags; + } + + void apply (hb_aat_apply_context_t *c, + hb_mask_t flags) const + { + const ChainSubtable<Types> *subtable = &StructAfter<ChainSubtable<Types>> (featureZ.as_array (featureCount)); + unsigned int count = subtableCount; + for (unsigned int i = 0; i < count; i++) + { + bool reverse; + + if (!(subtable->subFeatureFlags & flags)) + goto skip; + + if (!(subtable->get_coverage() & ChainSubtable<Types>::AllDirections) && + HB_DIRECTION_IS_VERTICAL (c->buffer->props.direction) != + bool (subtable->get_coverage() & ChainSubtable<Types>::Vertical)) + goto skip; + + /* Buffer contents is always in logical direction. Determine if + * we need to reverse before applying this subtable. We reverse + * back after if we did reverse indeed. + * + * Quoting the spac: + * """ + * Bits 28 and 30 of the coverage field control the order in which + * glyphs are processed when the subtable is run by the layout engine. + * Bit 28 is used to indicate if the glyph processing direction is + * the same as logical order or layout order. Bit 30 is used to + * indicate whether glyphs are processed forwards or backwards within + * that order. + + Bit 30 Bit 28 Interpretation for Horizontal Text + 0 0 The subtable is processed in layout order + (the same order as the glyphs, which is + always left-to-right). + 1 0 The subtable is processed in reverse layout order + (the order opposite that of the glyphs, which is + always right-to-left). + 0 1 The subtable is processed in logical order + (the same order as the characters, which may be + left-to-right or right-to-left). + 1 1 The subtable is processed in reverse logical order + (the order opposite that of the characters, which + may be right-to-left or left-to-right). + */ + reverse = subtable->get_coverage () & ChainSubtable<Types>::Logical ? + bool (subtable->get_coverage () & ChainSubtable<Types>::Backwards) : + bool (subtable->get_coverage () & ChainSubtable<Types>::Backwards) != + HB_DIRECTION_IS_BACKWARD (c->buffer->props.direction); + + if (!c->buffer->message (c->font, "start chainsubtable %d", c->lookup_index)) + goto skip; + + if (reverse) + c->buffer->reverse (); + + subtable->apply (c); + + if (reverse) + c->buffer->reverse (); + + (void) c->buffer->message (c->font, "end chainsubtable %d", c->lookup_index); + + if (unlikely (!c->buffer->successful)) return; + + skip: + subtable = &StructAfter<ChainSubtable<Types>> (*subtable); + c->set_lookup_index (c->lookup_index + 1); + } + } + + unsigned int get_size () const { return length; } + + bool sanitize (hb_sanitize_context_t *c, unsigned int version HB_UNUSED) const + { + TRACE_SANITIZE (this); + if (!length.sanitize (c) || + length < min_size || + !c->check_range (this, length)) + return_trace (false); + + if (!c->check_array (featureZ.arrayZ, featureCount)) + return_trace (false); + + const ChainSubtable<Types> *subtable = &StructAfter<ChainSubtable<Types>> (featureZ.as_array (featureCount)); + unsigned int count = subtableCount; + for (unsigned int i = 0; i < count; i++) + { + if (!subtable->sanitize (c)) + return_trace (false); + subtable = &StructAfter<ChainSubtable<Types>> (*subtable); + } + + return_trace (true); + } + + protected: + HBUINT32 defaultFlags; /* The default specification for subtables. */ + HBUINT32 length; /* Total byte count, including this header. */ + HBUINT featureCount; /* Number of feature subtable entries. */ + HBUINT subtableCount; /* The number of subtables in the chain. */ + + UnsizedArrayOf<Feature> featureZ; /* Features. */ +/*ChainSubtable firstSubtable;*//* Subtables. */ +/*subtableGlyphCoverageArray*/ /* Only if version >= 3. We don't use. */ + + public: + DEFINE_SIZE_MIN (8 + 2 * sizeof (HBUINT)); +}; + + +/* + * The 'mort'/'morx' Table + */ + +template <typename Types, hb_tag_t TAG> +struct mortmorx +{ + static constexpr hb_tag_t tableTag = TAG; + + bool has_data () const { return version != 0; } + + void compile_flags (const hb_aat_map_builder_t *mapper, + hb_aat_map_t *map) const + { + const Chain<Types> *chain = &firstChain; + unsigned int count = chainCount; + for (unsigned int i = 0; i < count; i++) + { + map->chain_flags.push (chain->compile_flags (mapper)); + chain = &StructAfter<Chain<Types>> (*chain); + } + } + + void apply (hb_aat_apply_context_t *c) const + { + if (unlikely (!c->buffer->successful)) return; + c->set_lookup_index (0); + const Chain<Types> *chain = &firstChain; + unsigned int count = chainCount; + for (unsigned int i = 0; i < count; i++) + { + chain->apply (c, c->plan->aat_map.chain_flags[i]); + if (unlikely (!c->buffer->successful)) return; + chain = &StructAfter<Chain<Types>> (*chain); + } + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + if (!version.sanitize (c) || !version || !chainCount.sanitize (c)) + return_trace (false); + + const Chain<Types> *chain = &firstChain; + unsigned int count = chainCount; + for (unsigned int i = 0; i < count; i++) + { + if (!chain->sanitize (c, version)) + return_trace (false); + chain = &StructAfter<Chain<Types>> (*chain); + } + + return_trace (true); + } + + protected: + HBUINT16 version; /* Version number of the glyph metamorphosis table. + * 1, 2, or 3. */ + HBUINT16 unused; /* Set to 0. */ + HBUINT32 chainCount; /* Number of metamorphosis chains contained in this + * table. */ + Chain<Types> firstChain; /* Chains. */ + + public: + DEFINE_SIZE_MIN (8); +}; + +struct morx : mortmorx<ExtendedTypes, HB_AAT_TAG_morx> {}; +struct mort : mortmorx<ObsoleteTypes, HB_AAT_TAG_mort> {}; + + +} /* namespace AAT */ + + +#endif /* HB_AAT_LAYOUT_MORX_TABLE_HH */ diff --git a/thirdparty/harfbuzz/src/hb-aat-layout-opbd-table.hh b/thirdparty/harfbuzz/src/hb-aat-layout-opbd-table.hh new file mode 100644 index 0000000000..8c04a6482f --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-aat-layout-opbd-table.hh @@ -0,0 +1,173 @@ +/* + * Copyright © 2019 Ebrahim Byagowi + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + */ + +#ifndef HB_AAT_LAYOUT_OPBD_TABLE_HH +#define HB_AAT_LAYOUT_OPBD_TABLE_HH + +#include "hb-aat-layout-common.hh" +#include "hb-open-type.hh" + +/* + * opbd -- Optical Bounds + * https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6opbd.html + */ +#define HB_AAT_TAG_opbd HB_TAG('o','p','b','d') + + +namespace AAT { + +struct OpticalBounds +{ + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (likely (c->check_struct (this))); + } + + FWORD leftSide; + FWORD topSide; + FWORD rightSide; + FWORD bottomSide; + public: + DEFINE_SIZE_STATIC (8); +}; + +struct opbdFormat0 +{ + bool get_bounds (hb_font_t *font, hb_codepoint_t glyph_id, + hb_glyph_extents_t *extents, const void *base) const + { + const OffsetTo<OpticalBounds> *bounds_offset = lookupTable.get_value (glyph_id, font->face->get_num_glyphs ()); + if (!bounds_offset) return false; + const OpticalBounds &bounds = base+*bounds_offset; + + if (extents) + *extents = { + font->em_scale_x (bounds.leftSide), + font->em_scale_y (bounds.topSide), + font->em_scale_x (bounds.rightSide), + font->em_scale_y (bounds.bottomSide) + }; + return true; + } + + bool sanitize (hb_sanitize_context_t *c, const void *base) const + { + TRACE_SANITIZE (this); + return_trace (likely (c->check_struct (this) && lookupTable.sanitize (c, base))); + } + + protected: + Lookup<OffsetTo<OpticalBounds>> + lookupTable; /* Lookup table associating glyphs with the four + * int16 values for the left-side, top-side, + * right-side, and bottom-side optical bounds. */ + public: + DEFINE_SIZE_MIN (2); +}; + +struct opbdFormat1 +{ + bool get_bounds (hb_font_t *font, hb_codepoint_t glyph_id, + hb_glyph_extents_t *extents, const void *base) const + { + const OffsetTo<OpticalBounds> *bounds_offset = lookupTable.get_value (glyph_id, font->face->get_num_glyphs ()); + if (!bounds_offset) return false; + const OpticalBounds &bounds = base+*bounds_offset; + + hb_position_t left = 0, top = 0, right = 0, bottom = 0, ignore; + if (font->get_glyph_contour_point (glyph_id, bounds.leftSide, &left, &ignore) || + font->get_glyph_contour_point (glyph_id, bounds.topSide, &ignore, &top) || + font->get_glyph_contour_point (glyph_id, bounds.rightSide, &right, &ignore) || + font->get_glyph_contour_point (glyph_id, bounds.bottomSide, &ignore, &bottom)) + { + if (extents) + *extents = {left, top, right, bottom}; + return true; + } + return false; + } + + bool sanitize (hb_sanitize_context_t *c, const void *base) const + { + TRACE_SANITIZE (this); + return_trace (likely (c->check_struct (this) && lookupTable.sanitize (c, base))); + } + + protected: + Lookup<OffsetTo<OpticalBounds>> + lookupTable; /* Lookup table associating glyphs with the four + * int16 values for the left-side, top-side, + * right-side, and bottom-side optical bounds. */ + public: + DEFINE_SIZE_MIN (2); +}; + +struct opbd +{ + static constexpr hb_tag_t tableTag = HB_AAT_TAG_opbd; + + bool get_bounds (hb_font_t *font, hb_codepoint_t glyph_id, + hb_glyph_extents_t *extents) const + { + switch (format) + { + case 0: return u.format0.get_bounds (font, glyph_id, extents, this); + case 1: return u.format1.get_bounds (font, glyph_id, extents, this); + default:return false; + } + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + if (unlikely (!c->check_struct (this) || version.major != 1)) + return_trace (false); + + switch (format) + { + case 0: return_trace (u.format0.sanitize (c, this)); + case 1: return_trace (u.format1.sanitize (c, this)); + default:return_trace (true); + } + } + + protected: + FixedVersion<>version; /* Version number of the optical bounds + * table (0x00010000 for the current version). */ + HBUINT16 format; /* Format of the optical bounds table. + * Format 0 indicates distance and Format 1 indicates + * control point. */ + union { + opbdFormat0 format0; + opbdFormat1 format1; + } u; + public: + DEFINE_SIZE_MIN (8); +}; + +} /* namespace AAT */ + + +#endif /* HB_AAT_LAYOUT_OPBD_TABLE_HH */ diff --git a/thirdparty/harfbuzz/src/hb-aat-layout-trak-table.hh b/thirdparty/harfbuzz/src/hb-aat-layout-trak-table.hh new file mode 100644 index 0000000000..baa1c72020 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-aat-layout-trak-table.hh @@ -0,0 +1,230 @@ +/* + * Copyright © 2018 Ebrahim Byagowi + * Copyright © 2018 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_AAT_LAYOUT_TRAK_TABLE_HH +#define HB_AAT_LAYOUT_TRAK_TABLE_HH + +#include "hb-aat-layout-common.hh" +#include "hb-ot-layout.hh" +#include "hb-open-type.hh" + +/* + * trak -- Tracking + * https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6trak.html + */ +#define HB_AAT_TAG_trak HB_TAG('t','r','a','k') + + +namespace AAT { + + +struct TrackTableEntry +{ + friend struct TrackData; + + float get_track_value () const { return track.to_float (); } + + int get_value (const void *base, unsigned int index, + unsigned int table_size) const + { return (base+valuesZ).as_array (table_size)[index]; } + + public: + bool sanitize (hb_sanitize_context_t *c, const void *base, + unsigned int table_size) const + { + TRACE_SANITIZE (this); + return_trace (likely (c->check_struct (this) && + (valuesZ.sanitize (c, base, table_size)))); + } + + protected: + HBFixed track; /* Track value for this record. */ + NameID trackNameID; /* The 'name' table index for this track. + * (a short word or phrase like "loose" + * or "very tight") */ + NNOffsetTo<UnsizedArrayOf<FWORD>> + valuesZ; /* Offset from start of tracking table to + * per-size tracking values for this track. */ + + public: + DEFINE_SIZE_STATIC (8); +}; + +struct TrackData +{ + float interpolate_at (unsigned int idx, + float target_size, + const TrackTableEntry &trackTableEntry, + const void *base) const + { + unsigned int sizes = nSizes; + hb_array_t<const HBFixed> size_table ((base+sizeTable).arrayZ, sizes); + + float s0 = size_table[idx].to_float (); + float s1 = size_table[idx + 1].to_float (); + float t = unlikely (s0 == s1) ? 0.f : (target_size - s0) / (s1 - s0); + return t * trackTableEntry.get_value (base, idx + 1, sizes) + + (1.f - t) * trackTableEntry.get_value (base, idx, sizes); + } + + int get_tracking (const void *base, float ptem) const + { + /* + * Choose track. + */ + const TrackTableEntry *trackTableEntry = nullptr; + unsigned int count = nTracks; + for (unsigned int i = 0; i < count; i++) + { + /* Note: Seems like the track entries are sorted by values. But the + * spec doesn't explicitly say that. It just mentions it in the example. */ + + /* For now we only seek for track entries with zero tracking value */ + + if (trackTable[i].get_track_value () == 0.f) + { + trackTableEntry = &trackTable[i]; + break; + } + } + if (!trackTableEntry) return 0.; + + /* + * Choose size. + */ + unsigned int sizes = nSizes; + if (!sizes) return 0.; + if (sizes == 1) return trackTableEntry->get_value (base, 0, sizes); + + hb_array_t<const HBFixed> size_table ((base+sizeTable).arrayZ, sizes); + unsigned int size_index; + for (size_index = 0; size_index < sizes - 1; size_index++) + if (size_table[size_index].to_float () >= ptem) + break; + + return roundf (interpolate_at (size_index ? size_index - 1 : 0, ptem, + *trackTableEntry, base)); + } + + bool sanitize (hb_sanitize_context_t *c, const void *base) const + { + TRACE_SANITIZE (this); + return_trace (likely (c->check_struct (this) && + sizeTable.sanitize (c, base, nSizes) && + trackTable.sanitize (c, nTracks, base, nSizes))); + } + + protected: + HBUINT16 nTracks; /* Number of separate tracks included in this table. */ + HBUINT16 nSizes; /* Number of point sizes included in this table. */ + LNNOffsetTo<UnsizedArrayOf<HBFixed>> + sizeTable; /* Offset from start of the tracking table to + * Array[nSizes] of size values.. */ + UnsizedArrayOf<TrackTableEntry> + trackTable; /* Array[nTracks] of TrackTableEntry records. */ + + public: + DEFINE_SIZE_ARRAY (8, trackTable); +}; + +struct trak +{ + static constexpr hb_tag_t tableTag = HB_AAT_TAG_trak; + + bool has_data () const { return version.to_int (); } + + bool apply (hb_aat_apply_context_t *c) const + { + TRACE_APPLY (this); + + hb_mask_t trak_mask = c->plan->trak_mask; + + const float ptem = c->font->ptem; + if (unlikely (ptem <= 0.f)) + return_trace (false); + + hb_buffer_t *buffer = c->buffer; + if (HB_DIRECTION_IS_HORIZONTAL (buffer->props.direction)) + { + const TrackData &trackData = this+horizData; + int tracking = trackData.get_tracking (this, ptem); + hb_position_t offset_to_add = c->font->em_scalef_x (tracking / 2); + hb_position_t advance_to_add = c->font->em_scalef_x (tracking); + foreach_grapheme (buffer, start, end) + { + if (!(buffer->info[start].mask & trak_mask)) continue; + buffer->pos[start].x_advance += advance_to_add; + buffer->pos[start].x_offset += offset_to_add; + } + } + else + { + const TrackData &trackData = this+vertData; + int tracking = trackData.get_tracking (this, ptem); + hb_position_t offset_to_add = c->font->em_scalef_y (tracking / 2); + hb_position_t advance_to_add = c->font->em_scalef_y (tracking); + foreach_grapheme (buffer, start, end) + { + if (!(buffer->info[start].mask & trak_mask)) continue; + buffer->pos[start].y_advance += advance_to_add; + buffer->pos[start].y_offset += offset_to_add; + } + } + + return_trace (true); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + + return_trace (likely (c->check_struct (this) && + version.major == 1 && + horizData.sanitize (c, this, this) && + vertData.sanitize (c, this, this))); + } + + protected: + FixedVersion<>version; /* Version of the tracking table + * (0x00010000u for version 1.0). */ + HBUINT16 format; /* Format of the tracking table (set to 0). */ + OffsetTo<TrackData> + horizData; /* Offset from start of tracking table to TrackData + * for horizontal text (or 0 if none). */ + OffsetTo<TrackData> + vertData; /* Offset from start of tracking table to TrackData + * for vertical text (or 0 if none). */ + HBUINT16 reserved; /* Reserved. Set to 0. */ + + public: + DEFINE_SIZE_STATIC (12); +}; + +} /* namespace AAT */ + + +#endif /* HB_AAT_LAYOUT_TRAK_TABLE_HH */ diff --git a/thirdparty/harfbuzz/src/hb-aat-layout.cc b/thirdparty/harfbuzz/src/hb-aat-layout.cc new file mode 100644 index 0000000000..fac510e9e6 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-aat-layout.cc @@ -0,0 +1,382 @@ +/* + * Copyright © 2017 Google, Inc. + * Copyright © 2018 Ebrahim Byagowi + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#include "hb.hh" + +#include "hb-aat-layout.hh" +#include "hb-aat-layout-ankr-table.hh" +#include "hb-aat-layout-bsln-table.hh" // Just so we compile it; unused otherwise. +#include "hb-aat-layout-feat-table.hh" +#include "hb-aat-layout-just-table.hh" // Just so we compile it; unused otherwise. +#include "hb-aat-layout-kerx-table.hh" +#include "hb-aat-layout-morx-table.hh" +#include "hb-aat-layout-trak-table.hh" +#include "hb-aat-ltag-table.hh" + + +/* + * hb_aat_apply_context_t + */ + +/* Note: This context is used for kerning, even without AAT, hence the condition. */ +#if !defined(HB_NO_AAT) || !defined(HB_NO_OT_KERN) + +AAT::hb_aat_apply_context_t::hb_aat_apply_context_t (const hb_ot_shape_plan_t *plan_, + hb_font_t *font_, + hb_buffer_t *buffer_, + hb_blob_t *blob) : + plan (plan_), + font (font_), + face (font->face), + buffer (buffer_), + sanitizer (), + ankr_table (&Null (AAT::ankr)), + lookup_index (0) +{ + sanitizer.init (blob); + sanitizer.set_num_glyphs (face->get_num_glyphs ()); + sanitizer.start_processing (); + sanitizer.set_max_ops (HB_SANITIZE_MAX_OPS_MAX); +} + +AAT::hb_aat_apply_context_t::~hb_aat_apply_context_t () +{ sanitizer.end_processing (); } + +void +AAT::hb_aat_apply_context_t::set_ankr_table (const AAT::ankr *ankr_table_) +{ ankr_table = ankr_table_; } + +#endif + + +/** + * SECTION:hb-aat-layout + * @title: hb-aat-layout + * @short_description: Apple Advanced Typography Layout + * @include: hb-aat.h + * + * Functions for querying OpenType Layout features in the font face. + **/ + + +#if !defined(HB_NO_AAT) || defined(HAVE_CORETEXT) + +/* Table data courtesy of Apple. Converted from mnemonics to integers + * when moving to this file. */ +static const hb_aat_feature_mapping_t feature_mappings[] = +{ + {HB_TAG ('a','f','r','c'), HB_AAT_LAYOUT_FEATURE_TYPE_FRACTIONS, HB_AAT_LAYOUT_FEATURE_SELECTOR_VERTICAL_FRACTIONS, HB_AAT_LAYOUT_FEATURE_SELECTOR_NO_FRACTIONS}, + {HB_TAG ('c','2','p','c'), HB_AAT_LAYOUT_FEATURE_TYPE_UPPER_CASE, HB_AAT_LAYOUT_FEATURE_SELECTOR_UPPER_CASE_PETITE_CAPS, HB_AAT_LAYOUT_FEATURE_SELECTOR_DEFAULT_UPPER_CASE}, + {HB_TAG ('c','2','s','c'), HB_AAT_LAYOUT_FEATURE_TYPE_UPPER_CASE, HB_AAT_LAYOUT_FEATURE_SELECTOR_UPPER_CASE_SMALL_CAPS, HB_AAT_LAYOUT_FEATURE_SELECTOR_DEFAULT_UPPER_CASE}, + {HB_TAG ('c','a','l','t'), HB_AAT_LAYOUT_FEATURE_TYPE_CONTEXTUAL_ALTERNATIVES, HB_AAT_LAYOUT_FEATURE_SELECTOR_CONTEXTUAL_ALTERNATES_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_CONTEXTUAL_ALTERNATES_OFF}, + {HB_TAG ('c','a','s','e'), HB_AAT_LAYOUT_FEATURE_TYPE_CASE_SENSITIVE_LAYOUT, HB_AAT_LAYOUT_FEATURE_SELECTOR_CASE_SENSITIVE_LAYOUT_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_CASE_SENSITIVE_LAYOUT_OFF}, + {HB_TAG ('c','l','i','g'), HB_AAT_LAYOUT_FEATURE_TYPE_LIGATURES, HB_AAT_LAYOUT_FEATURE_SELECTOR_CONTEXTUAL_LIGATURES_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_CONTEXTUAL_LIGATURES_OFF}, + {HB_TAG ('c','p','s','p'), HB_AAT_LAYOUT_FEATURE_TYPE_CASE_SENSITIVE_LAYOUT, HB_AAT_LAYOUT_FEATURE_SELECTOR_CASE_SENSITIVE_SPACING_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_CASE_SENSITIVE_SPACING_OFF}, + {HB_TAG ('c','s','w','h'), HB_AAT_LAYOUT_FEATURE_TYPE_CONTEXTUAL_ALTERNATIVES, HB_AAT_LAYOUT_FEATURE_SELECTOR_CONTEXTUAL_SWASH_ALTERNATES_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_CONTEXTUAL_SWASH_ALTERNATES_OFF}, + {HB_TAG ('d','l','i','g'), HB_AAT_LAYOUT_FEATURE_TYPE_LIGATURES, HB_AAT_LAYOUT_FEATURE_SELECTOR_RARE_LIGATURES_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_RARE_LIGATURES_OFF}, + {HB_TAG ('e','x','p','t'), HB_AAT_LAYOUT_FEATURE_TYPE_CHARACTER_SHAPE, HB_AAT_LAYOUT_FEATURE_SELECTOR_EXPERT_CHARACTERS, (hb_aat_layout_feature_selector_t) 16}, + {HB_TAG ('f','r','a','c'), HB_AAT_LAYOUT_FEATURE_TYPE_FRACTIONS, HB_AAT_LAYOUT_FEATURE_SELECTOR_DIAGONAL_FRACTIONS, HB_AAT_LAYOUT_FEATURE_SELECTOR_NO_FRACTIONS}, + {HB_TAG ('f','w','i','d'), HB_AAT_LAYOUT_FEATURE_TYPE_TEXT_SPACING, HB_AAT_LAYOUT_FEATURE_SELECTOR_MONOSPACED_TEXT, (hb_aat_layout_feature_selector_t) 7}, + {HB_TAG ('h','a','l','t'), HB_AAT_LAYOUT_FEATURE_TYPE_TEXT_SPACING, HB_AAT_LAYOUT_FEATURE_SELECTOR_ALT_HALF_WIDTH_TEXT, (hb_aat_layout_feature_selector_t) 7}, + {HB_TAG ('h','i','s','t'), HB_AAT_LAYOUT_FEATURE_TYPE_LIGATURES, HB_AAT_LAYOUT_FEATURE_SELECTOR_HISTORICAL_LIGATURES_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_HISTORICAL_LIGATURES_OFF}, + {HB_TAG ('h','k','n','a'), HB_AAT_LAYOUT_FEATURE_TYPE_ALTERNATE_KANA, HB_AAT_LAYOUT_FEATURE_SELECTOR_ALTERNATE_HORIZ_KANA_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_ALTERNATE_HORIZ_KANA_OFF}, + {HB_TAG ('h','l','i','g'), HB_AAT_LAYOUT_FEATURE_TYPE_LIGATURES, HB_AAT_LAYOUT_FEATURE_SELECTOR_HISTORICAL_LIGATURES_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_HISTORICAL_LIGATURES_OFF}, + {HB_TAG ('h','n','g','l'), HB_AAT_LAYOUT_FEATURE_TYPE_TRANSLITERATION, HB_AAT_LAYOUT_FEATURE_SELECTOR_HANJA_TO_HANGUL, HB_AAT_LAYOUT_FEATURE_SELECTOR_NO_TRANSLITERATION}, + {HB_TAG ('h','o','j','o'), HB_AAT_LAYOUT_FEATURE_TYPE_CHARACTER_SHAPE, HB_AAT_LAYOUT_FEATURE_SELECTOR_HOJO_CHARACTERS, (hb_aat_layout_feature_selector_t) 16}, + {HB_TAG ('h','w','i','d'), HB_AAT_LAYOUT_FEATURE_TYPE_TEXT_SPACING, HB_AAT_LAYOUT_FEATURE_SELECTOR_HALF_WIDTH_TEXT, (hb_aat_layout_feature_selector_t) 7}, + {HB_TAG ('i','t','a','l'), HB_AAT_LAYOUT_FEATURE_TYPE_ITALIC_CJK_ROMAN, HB_AAT_LAYOUT_FEATURE_SELECTOR_CJK_ITALIC_ROMAN_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_CJK_ITALIC_ROMAN_OFF}, + {HB_TAG ('j','p','0','4'), HB_AAT_LAYOUT_FEATURE_TYPE_CHARACTER_SHAPE, HB_AAT_LAYOUT_FEATURE_SELECTOR_JIS2004_CHARACTERS, (hb_aat_layout_feature_selector_t) 16}, + {HB_TAG ('j','p','7','8'), HB_AAT_LAYOUT_FEATURE_TYPE_CHARACTER_SHAPE, HB_AAT_LAYOUT_FEATURE_SELECTOR_JIS1978_CHARACTERS, (hb_aat_layout_feature_selector_t) 16}, + {HB_TAG ('j','p','8','3'), HB_AAT_LAYOUT_FEATURE_TYPE_CHARACTER_SHAPE, HB_AAT_LAYOUT_FEATURE_SELECTOR_JIS1983_CHARACTERS, (hb_aat_layout_feature_selector_t) 16}, + {HB_TAG ('j','p','9','0'), HB_AAT_LAYOUT_FEATURE_TYPE_CHARACTER_SHAPE, HB_AAT_LAYOUT_FEATURE_SELECTOR_JIS1990_CHARACTERS, (hb_aat_layout_feature_selector_t) 16}, + {HB_TAG ('l','i','g','a'), HB_AAT_LAYOUT_FEATURE_TYPE_LIGATURES, HB_AAT_LAYOUT_FEATURE_SELECTOR_COMMON_LIGATURES_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_COMMON_LIGATURES_OFF}, + {HB_TAG ('l','n','u','m'), HB_AAT_LAYOUT_FEATURE_TYPE_NUMBER_CASE, HB_AAT_LAYOUT_FEATURE_SELECTOR_UPPER_CASE_NUMBERS, (hb_aat_layout_feature_selector_t) 2}, + {HB_TAG ('m','g','r','k'), HB_AAT_LAYOUT_FEATURE_TYPE_MATHEMATICAL_EXTRAS, HB_AAT_LAYOUT_FEATURE_SELECTOR_MATHEMATICAL_GREEK_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_MATHEMATICAL_GREEK_OFF}, + {HB_TAG ('n','l','c','k'), HB_AAT_LAYOUT_FEATURE_TYPE_CHARACTER_SHAPE, HB_AAT_LAYOUT_FEATURE_SELECTOR_NLCCHARACTERS, (hb_aat_layout_feature_selector_t) 16}, + {HB_TAG ('o','n','u','m'), HB_AAT_LAYOUT_FEATURE_TYPE_NUMBER_CASE, HB_AAT_LAYOUT_FEATURE_SELECTOR_LOWER_CASE_NUMBERS, (hb_aat_layout_feature_selector_t) 2}, + {HB_TAG ('o','r','d','n'), HB_AAT_LAYOUT_FEATURE_TYPE_VERTICAL_POSITION, HB_AAT_LAYOUT_FEATURE_SELECTOR_ORDINALS, HB_AAT_LAYOUT_FEATURE_SELECTOR_NORMAL_POSITION}, + {HB_TAG ('p','a','l','t'), HB_AAT_LAYOUT_FEATURE_TYPE_TEXT_SPACING, HB_AAT_LAYOUT_FEATURE_SELECTOR_ALT_PROPORTIONAL_TEXT, (hb_aat_layout_feature_selector_t) 7}, + {HB_TAG ('p','c','a','p'), HB_AAT_LAYOUT_FEATURE_TYPE_LOWER_CASE, HB_AAT_LAYOUT_FEATURE_SELECTOR_LOWER_CASE_PETITE_CAPS, HB_AAT_LAYOUT_FEATURE_SELECTOR_DEFAULT_LOWER_CASE}, + {HB_TAG ('p','k','n','a'), HB_AAT_LAYOUT_FEATURE_TYPE_TEXT_SPACING, HB_AAT_LAYOUT_FEATURE_SELECTOR_PROPORTIONAL_TEXT, (hb_aat_layout_feature_selector_t) 7}, + {HB_TAG ('p','n','u','m'), HB_AAT_LAYOUT_FEATURE_TYPE_NUMBER_SPACING, HB_AAT_LAYOUT_FEATURE_SELECTOR_PROPORTIONAL_NUMBERS, (hb_aat_layout_feature_selector_t) 4}, + {HB_TAG ('p','w','i','d'), HB_AAT_LAYOUT_FEATURE_TYPE_TEXT_SPACING, HB_AAT_LAYOUT_FEATURE_SELECTOR_PROPORTIONAL_TEXT, (hb_aat_layout_feature_selector_t) 7}, + {HB_TAG ('q','w','i','d'), HB_AAT_LAYOUT_FEATURE_TYPE_TEXT_SPACING, HB_AAT_LAYOUT_FEATURE_SELECTOR_QUARTER_WIDTH_TEXT, (hb_aat_layout_feature_selector_t) 7}, + {HB_TAG ('r','u','b','y'), HB_AAT_LAYOUT_FEATURE_TYPE_RUBY_KANA, HB_AAT_LAYOUT_FEATURE_SELECTOR_RUBY_KANA_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_RUBY_KANA_OFF}, + {HB_TAG ('s','i','n','f'), HB_AAT_LAYOUT_FEATURE_TYPE_VERTICAL_POSITION, HB_AAT_LAYOUT_FEATURE_SELECTOR_SCIENTIFIC_INFERIORS, HB_AAT_LAYOUT_FEATURE_SELECTOR_NORMAL_POSITION}, + {HB_TAG ('s','m','c','p'), HB_AAT_LAYOUT_FEATURE_TYPE_LOWER_CASE, HB_AAT_LAYOUT_FEATURE_SELECTOR_LOWER_CASE_SMALL_CAPS, HB_AAT_LAYOUT_FEATURE_SELECTOR_DEFAULT_LOWER_CASE}, + {HB_TAG ('s','m','p','l'), HB_AAT_LAYOUT_FEATURE_TYPE_CHARACTER_SHAPE, HB_AAT_LAYOUT_FEATURE_SELECTOR_SIMPLIFIED_CHARACTERS, (hb_aat_layout_feature_selector_t) 16}, + {HB_TAG ('s','s','0','1'), HB_AAT_LAYOUT_FEATURE_TYPE_STYLISTIC_ALTERNATIVES, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_ONE_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_ONE_OFF}, + {HB_TAG ('s','s','0','2'), HB_AAT_LAYOUT_FEATURE_TYPE_STYLISTIC_ALTERNATIVES, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_TWO_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_TWO_OFF}, + {HB_TAG ('s','s','0','3'), HB_AAT_LAYOUT_FEATURE_TYPE_STYLISTIC_ALTERNATIVES, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_THREE_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_THREE_OFF}, + {HB_TAG ('s','s','0','4'), HB_AAT_LAYOUT_FEATURE_TYPE_STYLISTIC_ALTERNATIVES, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_FOUR_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_FOUR_OFF}, + {HB_TAG ('s','s','0','5'), HB_AAT_LAYOUT_FEATURE_TYPE_STYLISTIC_ALTERNATIVES, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_FIVE_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_FIVE_OFF}, + {HB_TAG ('s','s','0','6'), HB_AAT_LAYOUT_FEATURE_TYPE_STYLISTIC_ALTERNATIVES, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_SIX_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_SIX_OFF}, + {HB_TAG ('s','s','0','7'), HB_AAT_LAYOUT_FEATURE_TYPE_STYLISTIC_ALTERNATIVES, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_SEVEN_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_SEVEN_OFF}, + {HB_TAG ('s','s','0','8'), HB_AAT_LAYOUT_FEATURE_TYPE_STYLISTIC_ALTERNATIVES, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_EIGHT_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_EIGHT_OFF}, + {HB_TAG ('s','s','0','9'), HB_AAT_LAYOUT_FEATURE_TYPE_STYLISTIC_ALTERNATIVES, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_NINE_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_NINE_OFF}, + {HB_TAG ('s','s','1','0'), HB_AAT_LAYOUT_FEATURE_TYPE_STYLISTIC_ALTERNATIVES, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_TEN_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_TEN_OFF}, + {HB_TAG ('s','s','1','1'), HB_AAT_LAYOUT_FEATURE_TYPE_STYLISTIC_ALTERNATIVES, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_ELEVEN_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_ELEVEN_OFF}, + {HB_TAG ('s','s','1','2'), HB_AAT_LAYOUT_FEATURE_TYPE_STYLISTIC_ALTERNATIVES, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_TWELVE_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_TWELVE_OFF}, + {HB_TAG ('s','s','1','3'), HB_AAT_LAYOUT_FEATURE_TYPE_STYLISTIC_ALTERNATIVES, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_THIRTEEN_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_THIRTEEN_OFF}, + {HB_TAG ('s','s','1','4'), HB_AAT_LAYOUT_FEATURE_TYPE_STYLISTIC_ALTERNATIVES, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_FOURTEEN_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_FOURTEEN_OFF}, + {HB_TAG ('s','s','1','5'), HB_AAT_LAYOUT_FEATURE_TYPE_STYLISTIC_ALTERNATIVES, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_FIFTEEN_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_FIFTEEN_OFF}, + {HB_TAG ('s','s','1','6'), HB_AAT_LAYOUT_FEATURE_TYPE_STYLISTIC_ALTERNATIVES, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_SIXTEEN_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_SIXTEEN_OFF}, + {HB_TAG ('s','s','1','7'), HB_AAT_LAYOUT_FEATURE_TYPE_STYLISTIC_ALTERNATIVES, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_SEVENTEEN_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_SEVENTEEN_OFF}, + {HB_TAG ('s','s','1','8'), HB_AAT_LAYOUT_FEATURE_TYPE_STYLISTIC_ALTERNATIVES, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_EIGHTEEN_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_EIGHTEEN_OFF}, + {HB_TAG ('s','s','1','9'), HB_AAT_LAYOUT_FEATURE_TYPE_STYLISTIC_ALTERNATIVES, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_NINETEEN_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_NINETEEN_OFF}, + {HB_TAG ('s','s','2','0'), HB_AAT_LAYOUT_FEATURE_TYPE_STYLISTIC_ALTERNATIVES, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_TWENTY_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_TWENTY_OFF}, + {HB_TAG ('s','u','b','s'), HB_AAT_LAYOUT_FEATURE_TYPE_VERTICAL_POSITION, HB_AAT_LAYOUT_FEATURE_SELECTOR_INFERIORS, HB_AAT_LAYOUT_FEATURE_SELECTOR_NORMAL_POSITION}, + {HB_TAG ('s','u','p','s'), HB_AAT_LAYOUT_FEATURE_TYPE_VERTICAL_POSITION, HB_AAT_LAYOUT_FEATURE_SELECTOR_SUPERIORS, HB_AAT_LAYOUT_FEATURE_SELECTOR_NORMAL_POSITION}, + {HB_TAG ('s','w','s','h'), HB_AAT_LAYOUT_FEATURE_TYPE_CONTEXTUAL_ALTERNATIVES, HB_AAT_LAYOUT_FEATURE_SELECTOR_SWASH_ALTERNATES_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_SWASH_ALTERNATES_OFF}, + {HB_TAG ('t','i','t','l'), HB_AAT_LAYOUT_FEATURE_TYPE_STYLE_OPTIONS, HB_AAT_LAYOUT_FEATURE_SELECTOR_TITLING_CAPS, HB_AAT_LAYOUT_FEATURE_SELECTOR_NO_STYLE_OPTIONS}, + {HB_TAG ('t','n','a','m'), HB_AAT_LAYOUT_FEATURE_TYPE_CHARACTER_SHAPE, HB_AAT_LAYOUT_FEATURE_SELECTOR_TRADITIONAL_NAMES_CHARACTERS, (hb_aat_layout_feature_selector_t) 16}, + {HB_TAG ('t','n','u','m'), HB_AAT_LAYOUT_FEATURE_TYPE_NUMBER_SPACING, HB_AAT_LAYOUT_FEATURE_SELECTOR_MONOSPACED_NUMBERS, (hb_aat_layout_feature_selector_t) 4}, + {HB_TAG ('t','r','a','d'), HB_AAT_LAYOUT_FEATURE_TYPE_CHARACTER_SHAPE, HB_AAT_LAYOUT_FEATURE_SELECTOR_TRADITIONAL_CHARACTERS, (hb_aat_layout_feature_selector_t) 16}, + {HB_TAG ('t','w','i','d'), HB_AAT_LAYOUT_FEATURE_TYPE_TEXT_SPACING, HB_AAT_LAYOUT_FEATURE_SELECTOR_THIRD_WIDTH_TEXT, (hb_aat_layout_feature_selector_t) 7}, + {HB_TAG ('u','n','i','c'), HB_AAT_LAYOUT_FEATURE_TYPE_LETTER_CASE, (hb_aat_layout_feature_selector_t) 14, (hb_aat_layout_feature_selector_t) 15}, + {HB_TAG ('v','a','l','t'), HB_AAT_LAYOUT_FEATURE_TYPE_TEXT_SPACING, HB_AAT_LAYOUT_FEATURE_SELECTOR_ALT_PROPORTIONAL_TEXT, (hb_aat_layout_feature_selector_t) 7}, + {HB_TAG ('v','e','r','t'), HB_AAT_LAYOUT_FEATURE_TYPE_VERTICAL_SUBSTITUTION, HB_AAT_LAYOUT_FEATURE_SELECTOR_SUBSTITUTE_VERTICAL_FORMS_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_SUBSTITUTE_VERTICAL_FORMS_OFF}, + {HB_TAG ('v','h','a','l'), HB_AAT_LAYOUT_FEATURE_TYPE_TEXT_SPACING, HB_AAT_LAYOUT_FEATURE_SELECTOR_ALT_HALF_WIDTH_TEXT, (hb_aat_layout_feature_selector_t) 7}, + {HB_TAG ('v','k','n','a'), HB_AAT_LAYOUT_FEATURE_TYPE_ALTERNATE_KANA, HB_AAT_LAYOUT_FEATURE_SELECTOR_ALTERNATE_VERT_KANA_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_ALTERNATE_VERT_KANA_OFF}, + {HB_TAG ('v','p','a','l'), HB_AAT_LAYOUT_FEATURE_TYPE_TEXT_SPACING, HB_AAT_LAYOUT_FEATURE_SELECTOR_ALT_PROPORTIONAL_TEXT, (hb_aat_layout_feature_selector_t) 7}, + {HB_TAG ('v','r','t','2'), HB_AAT_LAYOUT_FEATURE_TYPE_VERTICAL_SUBSTITUTION, HB_AAT_LAYOUT_FEATURE_SELECTOR_SUBSTITUTE_VERTICAL_FORMS_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_SUBSTITUTE_VERTICAL_FORMS_OFF}, + {HB_TAG ('z','e','r','o'), HB_AAT_LAYOUT_FEATURE_TYPE_TYPOGRAPHIC_EXTRAS, HB_AAT_LAYOUT_FEATURE_SELECTOR_SLASHED_ZERO_ON, HB_AAT_LAYOUT_FEATURE_SELECTOR_SLASHED_ZERO_OFF}, +}; + +const hb_aat_feature_mapping_t * +hb_aat_layout_find_feature_mapping (hb_tag_t tag) +{ + return hb_sorted_array (feature_mappings).bsearch (tag); +} +#endif + + +#ifndef HB_NO_AAT + +/* + * mort/morx/kerx/trak + */ + + +void +hb_aat_layout_compile_map (const hb_aat_map_builder_t *mapper, + hb_aat_map_t *map) +{ + const AAT::morx& morx = *mapper->face->table.morx; + if (morx.has_data ()) + { + morx.compile_flags (mapper, map); + return; + } + + const AAT::mort& mort = *mapper->face->table.mort; + if (mort.has_data ()) + { + mort.compile_flags (mapper, map); + return; + } +} + + +/* + * hb_aat_layout_has_substitution: + * @face: + * + * Returns: + * Since: 2.3.0 + */ +hb_bool_t +hb_aat_layout_has_substitution (hb_face_t *face) +{ + return face->table.morx->has_data () || + face->table.mort->has_data (); +} + +void +hb_aat_layout_substitute (const hb_ot_shape_plan_t *plan, + hb_font_t *font, + hb_buffer_t *buffer) +{ + hb_blob_t *morx_blob = font->face->table.morx.get_blob (); + const AAT::morx& morx = *morx_blob->as<AAT::morx> (); + if (morx.has_data ()) + { + AAT::hb_aat_apply_context_t c (plan, font, buffer, morx_blob); + morx.apply (&c); + return; + } + + hb_blob_t *mort_blob = font->face->table.mort.get_blob (); + const AAT::mort& mort = *mort_blob->as<AAT::mort> (); + if (mort.has_data ()) + { + AAT::hb_aat_apply_context_t c (plan, font, buffer, mort_blob); + mort.apply (&c); + return; + } +} + +void +hb_aat_layout_zero_width_deleted_glyphs (hb_buffer_t *buffer) +{ + unsigned int count = buffer->len; + hb_glyph_info_t *info = buffer->info; + hb_glyph_position_t *pos = buffer->pos; + for (unsigned int i = 0; i < count; i++) + if (unlikely (info[i].codepoint == AAT::DELETED_GLYPH)) + pos[i].x_advance = pos[i].y_advance = pos[i].x_offset = pos[i].y_offset = 0; +} + +static bool +is_deleted_glyph (const hb_glyph_info_t *info) +{ + return info->codepoint == AAT::DELETED_GLYPH; +} + +void +hb_aat_layout_remove_deleted_glyphs (hb_buffer_t *buffer) +{ + hb_ot_layout_delete_glyphs_inplace (buffer, is_deleted_glyph); +} + +/* + * hb_aat_layout_has_positioning: + * @face: + * + * Returns: + * Since: 2.3.0 + */ +hb_bool_t +hb_aat_layout_has_positioning (hb_face_t *face) +{ + return face->table.kerx->has_data (); +} + +void +hb_aat_layout_position (const hb_ot_shape_plan_t *plan, + hb_font_t *font, + hb_buffer_t *buffer) +{ + hb_blob_t *kerx_blob = font->face->table.kerx.get_blob (); + const AAT::kerx& kerx = *kerx_blob->as<AAT::kerx> (); + + AAT::hb_aat_apply_context_t c (plan, font, buffer, kerx_blob); + c.set_ankr_table (font->face->table.ankr.get ()); + kerx.apply (&c); +} + + +/* + * hb_aat_layout_has_tracking: + * @face: + * + * Returns: + * Since: 2.3.0 + */ +hb_bool_t +hb_aat_layout_has_tracking (hb_face_t *face) +{ + return face->table.trak->has_data (); +} + +void +hb_aat_layout_track (const hb_ot_shape_plan_t *plan, + hb_font_t *font, + hb_buffer_t *buffer) +{ + const AAT::trak& trak = *font->face->table.trak; + + AAT::hb_aat_apply_context_t c (plan, font, buffer); + trak.apply (&c); +} + +/** + * hb_aat_layout_get_feature_types: + * @face: a face object + * @start_offset: iteration's start offset + * @feature_count:(inout) (allow-none): buffer size as input, filled size as output + * @features: (out caller-allocates) (array length=feature_count): features buffer + * + * Return value: Number of all available feature types. + * + * Since: 2.2.0 + */ +unsigned int +hb_aat_layout_get_feature_types (hb_face_t *face, + unsigned int start_offset, + unsigned int *feature_count, /* IN/OUT. May be NULL. */ + hb_aat_layout_feature_type_t *features /* OUT. May be NULL. */) +{ + return face->table.feat->get_feature_types (start_offset, feature_count, features); +} + +/** + * hb_aat_layout_feature_type_get_name_id: + * @face: a face object + * @feature_type: feature id + * + * Return value: Name ID index + * + * Since: 2.2.0 + */ +hb_ot_name_id_t +hb_aat_layout_feature_type_get_name_id (hb_face_t *face, + hb_aat_layout_feature_type_t feature_type) +{ + return face->table.feat->get_feature_name_id (feature_type); +} + +/** + * hb_aat_layout_feature_type_get_selectors: + * @face: a face object + * @feature_type: feature id + * @start_offset: iteration's start offset + * @selector_count: (inout) (allow-none): buffer size as input, filled size as output + * @selectors: (out caller-allocates) (array length=selector_count): settings buffer + * @default_index: (out) (allow-none): index of default selector if any + * + * If upon return, @default_index is set to #HB_AAT_LAYOUT_NO_SELECTOR_INDEX, then + * the feature type is non-exclusive. Otherwise, @default_index is the index of + * the selector that is selected by default. + * + * Return value: Number of all available feature selectors. + * + * Since: 2.2.0 + */ +unsigned int +hb_aat_layout_feature_type_get_selector_infos (hb_face_t *face, + hb_aat_layout_feature_type_t feature_type, + unsigned int start_offset, + unsigned int *selector_count, /* IN/OUT. May be NULL. */ + hb_aat_layout_feature_selector_info_t *selectors, /* OUT. May be NULL. */ + unsigned int *default_index /* OUT. May be NULL. */) +{ + return face->table.feat->get_selector_infos (feature_type, start_offset, selector_count, selectors, default_index); +} + + +#endif diff --git a/thirdparty/harfbuzz/src/hb-aat-layout.h b/thirdparty/harfbuzz/src/hb-aat-layout.h new file mode 100644 index 0000000000..b617e8b703 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-aat-layout.h @@ -0,0 +1,486 @@ +/* + * Copyright © 2018 Ebrahim Byagowi + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + */ + +#ifndef HB_AAT_H_IN +#error "Include <hb-aat.h> instead." +#endif + +#ifndef HB_AAT_LAYOUT_H +#define HB_AAT_LAYOUT_H + +#include "hb.h" + +#include "hb-ot.h" + +HB_BEGIN_DECLS + +/** + * hb_aat_layout_feature_type_t: + * + * + * Since: 2.2.0 + */ +typedef enum +{ + HB_AAT_LAYOUT_FEATURE_TYPE_INVALID = 0xFFFF, + + HB_AAT_LAYOUT_FEATURE_TYPE_ALL_TYPOGRAPHIC = 0, + HB_AAT_LAYOUT_FEATURE_TYPE_LIGATURES = 1, + HB_AAT_LAYOUT_FEATURE_TYPE_CURISVE_CONNECTION = 2, + HB_AAT_LAYOUT_FEATURE_TYPE_LETTER_CASE = 3, + HB_AAT_LAYOUT_FEATURE_TYPE_VERTICAL_SUBSTITUTION = 4, + HB_AAT_LAYOUT_FEATURE_TYPE_LINGUISTIC_REARRANGEMENT = 5, + HB_AAT_LAYOUT_FEATURE_TYPE_NUMBER_SPACING = 6, + HB_AAT_LAYOUT_FEATURE_TYPE_SMART_SWASH_TYPE = 8, + HB_AAT_LAYOUT_FEATURE_TYPE_DIACRITICS_TYPE = 9, + HB_AAT_LAYOUT_FEATURE_TYPE_VERTICAL_POSITION = 10, + HB_AAT_LAYOUT_FEATURE_TYPE_FRACTIONS = 11, + HB_AAT_LAYOUT_FEATURE_TYPE_OVERLAPPING_CHARACTERS_TYPE = 13, + HB_AAT_LAYOUT_FEATURE_TYPE_TYPOGRAPHIC_EXTRAS = 14, + HB_AAT_LAYOUT_FEATURE_TYPE_MATHEMATICAL_EXTRAS = 15, + HB_AAT_LAYOUT_FEATURE_TYPE_ORNAMENT_SETS_TYPE = 16, + HB_AAT_LAYOUT_FEATURE_TYPE_CHARACTER_ALTERNATIVES = 17, + HB_AAT_LAYOUT_FEATURE_TYPE_DESIGN_COMPLEXITY_TYPE = 18, + HB_AAT_LAYOUT_FEATURE_TYPE_STYLE_OPTIONS = 19, + HB_AAT_LAYOUT_FEATURE_TYPE_CHARACTER_SHAPE = 20, + HB_AAT_LAYOUT_FEATURE_TYPE_NUMBER_CASE = 21, + HB_AAT_LAYOUT_FEATURE_TYPE_TEXT_SPACING = 22, + HB_AAT_LAYOUT_FEATURE_TYPE_TRANSLITERATION = 23, + HB_AAT_LAYOUT_FEATURE_TYPE_ANNOTATION_TYPE = 24, + HB_AAT_LAYOUT_FEATURE_TYPE_KANA_SPACING_TYPE = 25, + HB_AAT_LAYOUT_FEATURE_TYPE_IDEOGRAPHIC_SPACING_TYPE = 26, + HB_AAT_LAYOUT_FEATURE_TYPE_UNICODE_DECOMPOSITION_TYPE = 27, + HB_AAT_LAYOUT_FEATURE_TYPE_RUBY_KANA = 28, + HB_AAT_LAYOUT_FEATURE_TYPE_CJK_SYMBOL_ALTERNATIVES_TYPE = 29, + HB_AAT_LAYOUT_FEATURE_TYPE_IDEOGRAPHIC_ALTERNATIVES_TYPE = 30, + HB_AAT_LAYOUT_FEATURE_TYPE_CJK_VERTICAL_ROMAN_PLACEMENT_TYPE = 31, + HB_AAT_LAYOUT_FEATURE_TYPE_ITALIC_CJK_ROMAN = 32, + HB_AAT_LAYOUT_FEATURE_TYPE_CASE_SENSITIVE_LAYOUT = 33, + HB_AAT_LAYOUT_FEATURE_TYPE_ALTERNATE_KANA = 34, + HB_AAT_LAYOUT_FEATURE_TYPE_STYLISTIC_ALTERNATIVES = 35, + HB_AAT_LAYOUT_FEATURE_TYPE_CONTEXTUAL_ALTERNATIVES = 36, + HB_AAT_LAYOUT_FEATURE_TYPE_LOWER_CASE = 37, + HB_AAT_LAYOUT_FEATURE_TYPE_UPPER_CASE = 38, + HB_AAT_LAYOUT_FEATURE_TYPE_LANGUAGE_TAG_TYPE = 39, + HB_AAT_LAYOUT_FEATURE_TYPE_CJK_ROMAN_SPACING_TYPE = 103, + + _HB_AAT_LAYOUT_FEATURE_TYPE_MAX_VALUE = HB_TAG_MAX_SIGNED /*< skip >*/ +} hb_aat_layout_feature_type_t; + +/** + * hb_aat_layout_feature_selector_t: + * + * + * Since: 2.2.0 + */ +typedef enum +{ + HB_AAT_LAYOUT_FEATURE_SELECTOR_INVALID = 0xFFFF, + + /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_ALL_TYPOGRAPHIC */ + HB_AAT_LAYOUT_FEATURE_SELECTOR_ALL_TYPE_FEATURES_ON = 0, + HB_AAT_LAYOUT_FEATURE_SELECTOR_ALL_TYPE_FEATURES_OFF = 1, + + /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_LIGATURES */ + HB_AAT_LAYOUT_FEATURE_SELECTOR_REQUIRED_LIGATURES_ON = 0, + HB_AAT_LAYOUT_FEATURE_SELECTOR_REQUIRED_LIGATURES_OFF = 1, + HB_AAT_LAYOUT_FEATURE_SELECTOR_COMMON_LIGATURES_ON = 2, + HB_AAT_LAYOUT_FEATURE_SELECTOR_COMMON_LIGATURES_OFF = 3, + HB_AAT_LAYOUT_FEATURE_SELECTOR_RARE_LIGATURES_ON = 4, + HB_AAT_LAYOUT_FEATURE_SELECTOR_RARE_LIGATURES_OFF = 5, + HB_AAT_LAYOUT_FEATURE_SELECTOR_LOGOS_ON = 6, + HB_AAT_LAYOUT_FEATURE_SELECTOR_LOGOS_OFF = 7, + HB_AAT_LAYOUT_FEATURE_SELECTOR_REBUS_PICTURES_ON = 8, + HB_AAT_LAYOUT_FEATURE_SELECTOR_REBUS_PICTURES_OFF = 9, + HB_AAT_LAYOUT_FEATURE_SELECTOR_DIPHTHONG_LIGATURES_ON = 10, + HB_AAT_LAYOUT_FEATURE_SELECTOR_DIPHTHONG_LIGATURES_OFF = 11, + HB_AAT_LAYOUT_FEATURE_SELECTOR_SQUARED_LIGATURES_ON = 12, + HB_AAT_LAYOUT_FEATURE_SELECTOR_SQUARED_LIGATURES_OFF = 13, + HB_AAT_LAYOUT_FEATURE_SELECTOR_ABBREV_SQUARED_LIGATURES_ON = 14, + HB_AAT_LAYOUT_FEATURE_SELECTOR_ABBREV_SQUARED_LIGATURES_OFF = 15, + HB_AAT_LAYOUT_FEATURE_SELECTOR_SYMBOL_LIGATURES_ON = 16, + HB_AAT_LAYOUT_FEATURE_SELECTOR_SYMBOL_LIGATURES_OFF = 17, + HB_AAT_LAYOUT_FEATURE_SELECTOR_CONTEXTUAL_LIGATURES_ON = 18, + HB_AAT_LAYOUT_FEATURE_SELECTOR_CONTEXTUAL_LIGATURES_OFF = 19, + HB_AAT_LAYOUT_FEATURE_SELECTOR_HISTORICAL_LIGATURES_ON = 20, + HB_AAT_LAYOUT_FEATURE_SELECTOR_HISTORICAL_LIGATURES_OFF = 21, + + /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_LIGATURES */ + HB_AAT_LAYOUT_FEATURE_SELECTOR_UNCONNECTED = 0, + HB_AAT_LAYOUT_FEATURE_SELECTOR_PARTIALLY_CONNECTED = 1, + HB_AAT_LAYOUT_FEATURE_SELECTOR_CURSIVE = 2, + + /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_LETTER_CASE */ + HB_AAT_LAYOUT_FEATURE_SELECTOR_UPPER_AND_LOWER_CASE = 0, /* deprecated */ + HB_AAT_LAYOUT_FEATURE_SELECTOR_ALL_CAPS = 1, /* deprecated */ + HB_AAT_LAYOUT_FEATURE_SELECTOR_ALL_LOWER_CASE = 2, /* deprecated */ + HB_AAT_LAYOUT_FEATURE_SELECTOR_SMALL_CAPS = 3, /* deprecated */ + HB_AAT_LAYOUT_FEATURE_SELECTOR_INITIAL_CAPS = 4, /* deprecated */ + HB_AAT_LAYOUT_FEATURE_SELECTOR_INITIAL_CAPS_AND_SMALL_CAPS = 5, /* deprecated */ + + /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_VERTICAL_SUBSTITUTION */ + HB_AAT_LAYOUT_FEATURE_SELECTOR_SUBSTITUTE_VERTICAL_FORMS_ON = 0, + HB_AAT_LAYOUT_FEATURE_SELECTOR_SUBSTITUTE_VERTICAL_FORMS_OFF = 1, + + /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_LINGUISTIC_REARRANGEMENT */ + HB_AAT_LAYOUT_FEATURE_SELECTOR_LINGUISTIC_REARRANGEMENT_ON = 0, + HB_AAT_LAYOUT_FEATURE_SELECTOR_LINGUISTIC_REARRANGEMENT_OFF = 1, + + /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_NUMBER_SPACING */ + HB_AAT_LAYOUT_FEATURE_SELECTOR_MONOSPACED_NUMBERS = 0, + HB_AAT_LAYOUT_FEATURE_SELECTOR_PROPORTIONAL_NUMBERS = 1, + HB_AAT_LAYOUT_FEATURE_SELECTOR_THIRD_WIDTH_NUMBERS = 2, + HB_AAT_LAYOUT_FEATURE_SELECTOR_QUARTER_WIDTH_NUMBERS = 3, + + /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_SMART_SWASH_TYPE */ + HB_AAT_LAYOUT_FEATURE_SELECTOR_WORD_INITIAL_SWASHES_ON = 0, + HB_AAT_LAYOUT_FEATURE_SELECTOR_WORD_INITIAL_SWASHES_OFF = 1, + HB_AAT_LAYOUT_FEATURE_SELECTOR_WORD_FINAL_SWASHES_ON = 2, + HB_AAT_LAYOUT_FEATURE_SELECTOR_WORD_FINAL_SWASHES_OFF = 3, + HB_AAT_LAYOUT_FEATURE_SELECTOR_LINE_INITIAL_SWASHES_ON = 4, + HB_AAT_LAYOUT_FEATURE_SELECTOR_LINE_INITIAL_SWASHES_OFF = 5, + HB_AAT_LAYOUT_FEATURE_SELECTOR_LINE_FINAL_SWASHES_ON = 6, + HB_AAT_LAYOUT_FEATURE_SELECTOR_LINE_FINAL_SWASHES_OFF = 7, + HB_AAT_LAYOUT_FEATURE_SELECTOR_NON_FINAL_SWASHES_ON = 8, + HB_AAT_LAYOUT_FEATURE_SELECTOR_NON_FINAL_SWASHES_OFF = 9, + + /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_DIACRITICS_TYPE */ + HB_AAT_LAYOUT_FEATURE_SELECTOR_SHOW_DIACRITICS = 0, + HB_AAT_LAYOUT_FEATURE_SELECTOR_HIDE_DIACRITICS = 1, + HB_AAT_LAYOUT_FEATURE_SELECTOR_DECOMPOSE_DIACRITICS = 2, + + /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_VERTICAL_POSITION */ + HB_AAT_LAYOUT_FEATURE_SELECTOR_NORMAL_POSITION = 0, + HB_AAT_LAYOUT_FEATURE_SELECTOR_SUPERIORS = 1, + HB_AAT_LAYOUT_FEATURE_SELECTOR_INFERIORS = 2, + HB_AAT_LAYOUT_FEATURE_SELECTOR_ORDINALS = 3, + HB_AAT_LAYOUT_FEATURE_SELECTOR_SCIENTIFIC_INFERIORS = 4, + + /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_FRACTIONS */ + HB_AAT_LAYOUT_FEATURE_SELECTOR_NO_FRACTIONS = 0, + HB_AAT_LAYOUT_FEATURE_SELECTOR_VERTICAL_FRACTIONS = 1, + HB_AAT_LAYOUT_FEATURE_SELECTOR_DIAGONAL_FRACTIONS = 2, + + /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_OVERLAPPING_CHARACTERS_TYPE */ + HB_AAT_LAYOUT_FEATURE_SELECTOR_PREVENT_OVERLAP_ON = 0, + HB_AAT_LAYOUT_FEATURE_SELECTOR_PREVENT_OVERLAP_OFF = 1, + + /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_TYPOGRAPHIC_EXTRAS */ + HB_AAT_LAYOUT_FEATURE_SELECTOR_HYPHENS_TO_EM_DASH_ON = 0, + HB_AAT_LAYOUT_FEATURE_SELECTOR_HYPHENS_TO_EM_DASH_OFF = 1, + HB_AAT_LAYOUT_FEATURE_SELECTOR_HYPHEN_TO_EN_DASH_ON = 2, + HB_AAT_LAYOUT_FEATURE_SELECTOR_HYPHEN_TO_EN_DASH_OFF = 3, + HB_AAT_LAYOUT_FEATURE_SELECTOR_SLASHED_ZERO_ON = 4, + HB_AAT_LAYOUT_FEATURE_SELECTOR_SLASHED_ZERO_OFF = 5, + HB_AAT_LAYOUT_FEATURE_SELECTOR_FORM_INTERROBANG_ON = 6, + HB_AAT_LAYOUT_FEATURE_SELECTOR_FORM_INTERROBANG_OFF = 7, + HB_AAT_LAYOUT_FEATURE_SELECTOR_SMART_QUOTES_ON = 8, + HB_AAT_LAYOUT_FEATURE_SELECTOR_SMART_QUOTES_OFF = 9, + HB_AAT_LAYOUT_FEATURE_SELECTOR_PERIODS_TO_ELLIPSIS_ON = 10, + HB_AAT_LAYOUT_FEATURE_SELECTOR_PERIODS_TO_ELLIPSIS_OFF = 11, + + /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_MATHEMATICAL_EXTRAS */ + HB_AAT_LAYOUT_FEATURE_SELECTOR_HYPHEN_TO_MINUS_ON = 0, + HB_AAT_LAYOUT_FEATURE_SELECTOR_HYPHEN_TO_MINUS_OFF = 1, + HB_AAT_LAYOUT_FEATURE_SELECTOR_ASTERISK_TO_MULTIPLY_ON = 2, + HB_AAT_LAYOUT_FEATURE_SELECTOR_ASTERISK_TO_MULTIPLY_OFF = 3, + HB_AAT_LAYOUT_FEATURE_SELECTOR_SLASH_TO_DIVIDE_ON = 4, + HB_AAT_LAYOUT_FEATURE_SELECTOR_SLASH_TO_DIVIDE_OFF = 5, + HB_AAT_LAYOUT_FEATURE_SELECTOR_INEQUALITY_LIGATURES_ON = 6, + HB_AAT_LAYOUT_FEATURE_SELECTOR_INEQUALITY_LIGATURES_OFF = 7, + HB_AAT_LAYOUT_FEATURE_SELECTOR_EXPONENTS_ON = 8, + HB_AAT_LAYOUT_FEATURE_SELECTOR_EXPONENTS_OFF = 9, + HB_AAT_LAYOUT_FEATURE_SELECTOR_MATHEMATICAL_GREEK_ON = 10, + HB_AAT_LAYOUT_FEATURE_SELECTOR_MATHEMATICAL_GREEK_OFF = 11, + + /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_ORNAMENT_SETS_TYPE */ + HB_AAT_LAYOUT_FEATURE_SELECTOR_NO_ORNAMENTS = 0, + HB_AAT_LAYOUT_FEATURE_SELECTOR_DINGBATS = 1, + HB_AAT_LAYOUT_FEATURE_SELECTOR_PI_CHARACTERS = 2, + HB_AAT_LAYOUT_FEATURE_SELECTOR_FLEURONS = 3, + HB_AAT_LAYOUT_FEATURE_SELECTOR_DECORATIVE_BORDERS = 4, + HB_AAT_LAYOUT_FEATURE_SELECTOR_INTERNATIONAL_SYMBOLS = 5, + HB_AAT_LAYOUT_FEATURE_SELECTOR_MATH_SYMBOLS = 6, + + /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_CHARACTER_ALTERNATIVES */ + HB_AAT_LAYOUT_FEATURE_SELECTOR_NO_ALTERNATES = 0, + + /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_DESIGN_COMPLEXITY_TYPE */ + HB_AAT_LAYOUT_FEATURE_SELECTOR_DESIGN_LEVEL1 = 0, + HB_AAT_LAYOUT_FEATURE_SELECTOR_DESIGN_LEVEL2 = 1, + HB_AAT_LAYOUT_FEATURE_SELECTOR_DESIGN_LEVEL3 = 2, + HB_AAT_LAYOUT_FEATURE_SELECTOR_DESIGN_LEVEL4 = 3, + HB_AAT_LAYOUT_FEATURE_SELECTOR_DESIGN_LEVEL5 = 4, + + /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_STYLE_OPTIONS */ + HB_AAT_LAYOUT_FEATURE_SELECTOR_NO_STYLE_OPTIONS = 0, + HB_AAT_LAYOUT_FEATURE_SELECTOR_DISPLAY_TEXT = 1, + HB_AAT_LAYOUT_FEATURE_SELECTOR_ENGRAVED_TEXT = 2, + HB_AAT_LAYOUT_FEATURE_SELECTOR_ILLUMINATED_CAPS = 3, + HB_AAT_LAYOUT_FEATURE_SELECTOR_TITLING_CAPS = 4, + HB_AAT_LAYOUT_FEATURE_SELECTOR_TALL_CAPS = 5, + + /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_CHARACTER_SHAPE */ + HB_AAT_LAYOUT_FEATURE_SELECTOR_TRADITIONAL_CHARACTERS = 0, + HB_AAT_LAYOUT_FEATURE_SELECTOR_SIMPLIFIED_CHARACTERS = 1, + HB_AAT_LAYOUT_FEATURE_SELECTOR_JIS1978_CHARACTERS = 2, + HB_AAT_LAYOUT_FEATURE_SELECTOR_JIS1983_CHARACTERS = 3, + HB_AAT_LAYOUT_FEATURE_SELECTOR_JIS1990_CHARACTERS = 4, + HB_AAT_LAYOUT_FEATURE_SELECTOR_TRADITIONAL_ALT_ONE = 5, + HB_AAT_LAYOUT_FEATURE_SELECTOR_TRADITIONAL_ALT_TWO = 6, + HB_AAT_LAYOUT_FEATURE_SELECTOR_TRADITIONAL_ALT_THREE = 7, + HB_AAT_LAYOUT_FEATURE_SELECTOR_TRADITIONAL_ALT_FOUR = 8, + HB_AAT_LAYOUT_FEATURE_SELECTOR_TRADITIONAL_ALT_FIVE = 9, + HB_AAT_LAYOUT_FEATURE_SELECTOR_EXPERT_CHARACTERS = 10, + HB_AAT_LAYOUT_FEATURE_SELECTOR_JIS2004_CHARACTERS = 11, + HB_AAT_LAYOUT_FEATURE_SELECTOR_HOJO_CHARACTERS = 12, + HB_AAT_LAYOUT_FEATURE_SELECTOR_NLCCHARACTERS = 13, + HB_AAT_LAYOUT_FEATURE_SELECTOR_TRADITIONAL_NAMES_CHARACTERS = 14, + + /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_NUMBER_CASE */ + HB_AAT_LAYOUT_FEATURE_SELECTOR_LOWER_CASE_NUMBERS = 0, + HB_AAT_LAYOUT_FEATURE_SELECTOR_UPPER_CASE_NUMBERS = 1, + + /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_TEXT_SPACING */ + HB_AAT_LAYOUT_FEATURE_SELECTOR_PROPORTIONAL_TEXT = 0, + HB_AAT_LAYOUT_FEATURE_SELECTOR_MONOSPACED_TEXT = 1, + HB_AAT_LAYOUT_FEATURE_SELECTOR_HALF_WIDTH_TEXT = 2, + HB_AAT_LAYOUT_FEATURE_SELECTOR_THIRD_WIDTH_TEXT = 3, + HB_AAT_LAYOUT_FEATURE_SELECTOR_QUARTER_WIDTH_TEXT = 4, + HB_AAT_LAYOUT_FEATURE_SELECTOR_ALT_PROPORTIONAL_TEXT = 5, + HB_AAT_LAYOUT_FEATURE_SELECTOR_ALT_HALF_WIDTH_TEXT = 6, + + /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_TRANSLITERATION */ + HB_AAT_LAYOUT_FEATURE_SELECTOR_NO_TRANSLITERATION = 0, + HB_AAT_LAYOUT_FEATURE_SELECTOR_HANJA_TO_HANGUL = 1, + HB_AAT_LAYOUT_FEATURE_SELECTOR_HIRAGANA_TO_KATAKANA = 2, + HB_AAT_LAYOUT_FEATURE_SELECTOR_KATAKANA_TO_HIRAGANA = 3, + HB_AAT_LAYOUT_FEATURE_SELECTOR_KANA_TO_ROMANIZATION = 4, + HB_AAT_LAYOUT_FEATURE_SELECTOR_ROMANIZATION_TO_HIRAGANA = 5, + HB_AAT_LAYOUT_FEATURE_SELECTOR_ROMANIZATION_TO_KATAKANA = 6, + HB_AAT_LAYOUT_FEATURE_SELECTOR_HANJA_TO_HANGUL_ALT_ONE = 7, + HB_AAT_LAYOUT_FEATURE_SELECTOR_HANJA_TO_HANGUL_ALT_TWO = 8, + HB_AAT_LAYOUT_FEATURE_SELECTOR_HANJA_TO_HANGUL_ALT_THREE = 9, + + /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_ANNOTATION_TYPE */ + HB_AAT_LAYOUT_FEATURE_SELECTOR_NO_ANNOTATION = 0, + HB_AAT_LAYOUT_FEATURE_SELECTOR_BOX_ANNOTATION = 1, + HB_AAT_LAYOUT_FEATURE_SELECTOR_ROUNDED_BOX_ANNOTATION = 2, + HB_AAT_LAYOUT_FEATURE_SELECTOR_CIRCLE_ANNOTATION = 3, + HB_AAT_LAYOUT_FEATURE_SELECTOR_INVERTED_CIRCLE_ANNOTATION = 4, + HB_AAT_LAYOUT_FEATURE_SELECTOR_PARENTHESIS_ANNOTATION = 5, + HB_AAT_LAYOUT_FEATURE_SELECTOR_PERIOD_ANNOTATION = 6, + HB_AAT_LAYOUT_FEATURE_SELECTOR_ROMAN_NUMERAL_ANNOTATION = 7, + HB_AAT_LAYOUT_FEATURE_SELECTOR_DIAMOND_ANNOTATION = 8, + HB_AAT_LAYOUT_FEATURE_SELECTOR_INVERTED_BOX_ANNOTATION = 9, + HB_AAT_LAYOUT_FEATURE_SELECTOR_INVERTED_ROUNDED_BOX_ANNOTATION= 10, + + /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_KANA_SPACING_TYPE */ + HB_AAT_LAYOUT_FEATURE_SELECTOR_FULL_WIDTH_KANA = 0, + HB_AAT_LAYOUT_FEATURE_SELECTOR_PROPORTIONAL_KANA = 1, + + /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_IDEOGRAPHIC_SPACING_TYPE */ + HB_AAT_LAYOUT_FEATURE_SELECTOR_FULL_WIDTH_IDEOGRAPHS = 0, + HB_AAT_LAYOUT_FEATURE_SELECTOR_PROPORTIONAL_IDEOGRAPHS = 1, + HB_AAT_LAYOUT_FEATURE_SELECTOR_HALF_WIDTH_IDEOGRAPHS = 2, + + /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_UNICODE_DECOMPOSITION_TYPE */ + HB_AAT_LAYOUT_FEATURE_SELECTOR_CANONICAL_COMPOSITION_ON = 0, + HB_AAT_LAYOUT_FEATURE_SELECTOR_CANONICAL_COMPOSITION_OFF = 1, + HB_AAT_LAYOUT_FEATURE_SELECTOR_COMPATIBILITY_COMPOSITION_ON = 2, + HB_AAT_LAYOUT_FEATURE_SELECTOR_COMPATIBILITY_COMPOSITION_OFF = 3, + HB_AAT_LAYOUT_FEATURE_SELECTOR_TRANSCODING_COMPOSITION_ON = 4, + HB_AAT_LAYOUT_FEATURE_SELECTOR_TRANSCODING_COMPOSITION_OFF = 5, + + /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_RUBY_KANA */ + HB_AAT_LAYOUT_FEATURE_SELECTOR_NO_RUBY_KANA = 0, /* deprecated - use HB_AAT_LAYOUT_FEATURE_SELECTOR_RUBY_KANA_OFF instead */ + HB_AAT_LAYOUT_FEATURE_SELECTOR_RUBY_KANA = 1, /* deprecated - use HB_AAT_LAYOUT_FEATURE_SELECTOR_RUBY_KANA_ON instead */ + HB_AAT_LAYOUT_FEATURE_SELECTOR_RUBY_KANA_ON = 2, + HB_AAT_LAYOUT_FEATURE_SELECTOR_RUBY_KANA_OFF = 3, + + /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_CJK_SYMBOL_ALTERNATIVES_TYPE */ + HB_AAT_LAYOUT_FEATURE_SELECTOR_NO_CJK_SYMBOL_ALTERNATIVES = 0, + HB_AAT_LAYOUT_FEATURE_SELECTOR_CJK_SYMBOL_ALT_ONE = 1, + HB_AAT_LAYOUT_FEATURE_SELECTOR_CJK_SYMBOL_ALT_TWO = 2, + HB_AAT_LAYOUT_FEATURE_SELECTOR_CJK_SYMBOL_ALT_THREE = 3, + HB_AAT_LAYOUT_FEATURE_SELECTOR_CJK_SYMBOL_ALT_FOUR = 4, + HB_AAT_LAYOUT_FEATURE_SELECTOR_CJK_SYMBOL_ALT_FIVE = 5, + + /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_IDEOGRAPHIC_ALTERNATIVES_TYPE */ + HB_AAT_LAYOUT_FEATURE_SELECTOR_NO_IDEOGRAPHIC_ALTERNATIVES = 0, + HB_AAT_LAYOUT_FEATURE_SELECTOR_IDEOGRAPHIC_ALT_ONE = 1, + HB_AAT_LAYOUT_FEATURE_SELECTOR_IDEOGRAPHIC_ALT_TWO = 2, + HB_AAT_LAYOUT_FEATURE_SELECTOR_IDEOGRAPHIC_ALT_THREE = 3, + HB_AAT_LAYOUT_FEATURE_SELECTOR_IDEOGRAPHIC_ALT_FOUR = 4, + HB_AAT_LAYOUT_FEATURE_SELECTOR_IDEOGRAPHIC_ALT_FIVE = 5, + + /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_CJK_VERTICAL_ROMAN_PLACEMENT_TYPE */ + HB_AAT_LAYOUT_FEATURE_SELECTOR_CJK_VERTICAL_ROMAN_CENTERED = 0, + HB_AAT_LAYOUT_FEATURE_SELECTOR_CJK_VERTICAL_ROMAN_HBASELINE = 1, + + /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_ITALIC_CJK_ROMAN */ + HB_AAT_LAYOUT_FEATURE_SELECTOR_NO_CJK_ITALIC_ROMAN = 0, /* deprecated - use HB_AAT_LAYOUT_FEATURE_SELECTOR_CJK_ITALIC_ROMAN_OFF instead */ + HB_AAT_LAYOUT_FEATURE_SELECTOR_CJK_ITALIC_ROMAN = 1, /* deprecated - use HB_AAT_LAYOUT_FEATURE_SELECTOR_CJK_ITALIC_ROMAN_ON instead */ + HB_AAT_LAYOUT_FEATURE_SELECTOR_CJK_ITALIC_ROMAN_ON = 2, + HB_AAT_LAYOUT_FEATURE_SELECTOR_CJK_ITALIC_ROMAN_OFF = 3, + + /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_CASE_SENSITIVE_LAYOUT */ + HB_AAT_LAYOUT_FEATURE_SELECTOR_CASE_SENSITIVE_LAYOUT_ON = 0, + HB_AAT_LAYOUT_FEATURE_SELECTOR_CASE_SENSITIVE_LAYOUT_OFF = 1, + HB_AAT_LAYOUT_FEATURE_SELECTOR_CASE_SENSITIVE_SPACING_ON = 2, + HB_AAT_LAYOUT_FEATURE_SELECTOR_CASE_SENSITIVE_SPACING_OFF = 3, + + /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_ALTERNATE_KANA */ + HB_AAT_LAYOUT_FEATURE_SELECTOR_ALTERNATE_HORIZ_KANA_ON = 0, + HB_AAT_LAYOUT_FEATURE_SELECTOR_ALTERNATE_HORIZ_KANA_OFF = 1, + HB_AAT_LAYOUT_FEATURE_SELECTOR_ALTERNATE_VERT_KANA_ON = 2, + HB_AAT_LAYOUT_FEATURE_SELECTOR_ALTERNATE_VERT_KANA_OFF = 3, + + /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_STYLISTIC_ALTERNATIVES */ + HB_AAT_LAYOUT_FEATURE_SELECTOR_NO_STYLISTIC_ALTERNATES = 0, + HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_ONE_ON = 2, + HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_ONE_OFF = 3, + HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_TWO_ON = 4, + HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_TWO_OFF = 5, + HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_THREE_ON = 6, + HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_THREE_OFF = 7, + HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_FOUR_ON = 8, + HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_FOUR_OFF = 9, + HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_FIVE_ON = 10, + HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_FIVE_OFF = 11, + HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_SIX_ON = 12, + HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_SIX_OFF = 13, + HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_SEVEN_ON = 14, + HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_SEVEN_OFF = 15, + HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_EIGHT_ON = 16, + HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_EIGHT_OFF = 17, + HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_NINE_ON = 18, + HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_NINE_OFF = 19, + HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_TEN_ON = 20, + HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_TEN_OFF = 21, + HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_ELEVEN_ON = 22, + HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_ELEVEN_OFF = 23, + HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_TWELVE_ON = 24, + HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_TWELVE_OFF = 25, + HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_THIRTEEN_ON = 26, + HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_THIRTEEN_OFF = 27, + HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_FOURTEEN_ON = 28, + HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_FOURTEEN_OFF = 29, + HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_FIFTEEN_ON = 30, + HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_FIFTEEN_OFF = 31, + HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_SIXTEEN_ON = 32, + HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_SIXTEEN_OFF = 33, + HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_SEVENTEEN_ON = 34, + HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_SEVENTEEN_OFF = 35, + HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_EIGHTEEN_ON = 36, + HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_EIGHTEEN_OFF = 37, + HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_NINETEEN_ON = 38, + HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_NINETEEN_OFF = 39, + HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_TWENTY_ON = 40, + HB_AAT_LAYOUT_FEATURE_SELECTOR_STYLISTIC_ALT_TWENTY_OFF = 41, + + /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_CONTEXTUAL_ALTERNATIVES */ + HB_AAT_LAYOUT_FEATURE_SELECTOR_CONTEXTUAL_ALTERNATES_ON = 0, + HB_AAT_LAYOUT_FEATURE_SELECTOR_CONTEXTUAL_ALTERNATES_OFF = 1, + HB_AAT_LAYOUT_FEATURE_SELECTOR_SWASH_ALTERNATES_ON = 2, + HB_AAT_LAYOUT_FEATURE_SELECTOR_SWASH_ALTERNATES_OFF = 3, + HB_AAT_LAYOUT_FEATURE_SELECTOR_CONTEXTUAL_SWASH_ALTERNATES_ON = 4, + HB_AAT_LAYOUT_FEATURE_SELECTOR_CONTEXTUAL_SWASH_ALTERNATES_OFF= 5, + + /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_LOWER_CASE */ + HB_AAT_LAYOUT_FEATURE_SELECTOR_DEFAULT_LOWER_CASE = 0, + HB_AAT_LAYOUT_FEATURE_SELECTOR_LOWER_CASE_SMALL_CAPS = 1, + HB_AAT_LAYOUT_FEATURE_SELECTOR_LOWER_CASE_PETITE_CAPS = 2, + + /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_UPPER_CASE */ + HB_AAT_LAYOUT_FEATURE_SELECTOR_DEFAULT_UPPER_CASE = 0, + HB_AAT_LAYOUT_FEATURE_SELECTOR_UPPER_CASE_SMALL_CAPS = 1, + HB_AAT_LAYOUT_FEATURE_SELECTOR_UPPER_CASE_PETITE_CAPS = 2, + + /* Selectors for #HB_AAT_LAYOUT_FEATURE_TYPE_CJK_ROMAN_SPACING_TYPE */ + HB_AAT_LAYOUT_FEATURE_SELECTOR_HALF_WIDTH_CJK_ROMAN = 0, + HB_AAT_LAYOUT_FEATURE_SELECTOR_PROPORTIONAL_CJK_ROMAN = 1, + HB_AAT_LAYOUT_FEATURE_SELECTOR_DEFAULT_CJK_ROMAN = 2, + HB_AAT_LAYOUT_FEATURE_SELECTOR_FULL_WIDTH_CJK_ROMAN = 3, + + _HB_AAT_LAYOUT_FEATURE_SELECTOR_MAX_VALUE = HB_TAG_MAX_SIGNED /*< skip >*/ +} hb_aat_layout_feature_selector_t; + +HB_EXTERN unsigned int +hb_aat_layout_get_feature_types (hb_face_t *face, + unsigned int start_offset, + unsigned int *feature_count, /* IN/OUT. May be NULL. */ + hb_aat_layout_feature_type_t *features /* OUT. May be NULL. */); + +HB_EXTERN hb_ot_name_id_t +hb_aat_layout_feature_type_get_name_id (hb_face_t *face, + hb_aat_layout_feature_type_t feature_type); + +typedef struct hb_aat_layout_feature_selector_info_t +{ + hb_ot_name_id_t name_id; + hb_aat_layout_feature_selector_t enable; + hb_aat_layout_feature_selector_t disable; + /*< private >*/ + unsigned int reserved; +} hb_aat_layout_feature_selector_info_t; + +#define HB_AAT_LAYOUT_NO_SELECTOR_INDEX 0xFFFFu + +HB_EXTERN unsigned int +hb_aat_layout_feature_type_get_selector_infos (hb_face_t *face, + hb_aat_layout_feature_type_t feature_type, + unsigned int start_offset, + unsigned int *selector_count, /* IN/OUT. May be NULL. */ + hb_aat_layout_feature_selector_info_t *selectors, /* OUT. May be NULL. */ + unsigned int *default_index /* OUT. May be NULL. */); + + +/* + * morx/mort + */ + +HB_EXTERN hb_bool_t +hb_aat_layout_has_substitution (hb_face_t *face); + + +/* + * kerx + */ + +HB_EXTERN hb_bool_t +hb_aat_layout_has_positioning (hb_face_t *face); + + +/* + * trak + */ + +HB_EXTERN hb_bool_t +hb_aat_layout_has_tracking (hb_face_t *face); + + +HB_END_DECLS + +#endif /* HB_AAT_LAYOUT_H */ diff --git a/thirdparty/harfbuzz/src/hb-aat-layout.hh b/thirdparty/harfbuzz/src/hb-aat-layout.hh new file mode 100644 index 0000000000..5e4e3bda15 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-aat-layout.hh @@ -0,0 +1,75 @@ +/* + * Copyright © 2017 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_AAT_LAYOUT_HH +#define HB_AAT_LAYOUT_HH + +#include "hb.hh" + +#include "hb-ot-shape.hh" +#include "hb-aat-ltag-table.hh" + +struct hb_aat_feature_mapping_t +{ + hb_tag_t otFeatureTag; + hb_aat_layout_feature_type_t aatFeatureType; + hb_aat_layout_feature_selector_t selectorToEnable; + hb_aat_layout_feature_selector_t selectorToDisable; + + int cmp (hb_tag_t key) const + { return key < otFeatureTag ? -1 : key > otFeatureTag ? 1 : 0; } +}; + +HB_INTERNAL const hb_aat_feature_mapping_t * +hb_aat_layout_find_feature_mapping (hb_tag_t tag); + +HB_INTERNAL void +hb_aat_layout_compile_map (const hb_aat_map_builder_t *mapper, + hb_aat_map_t *map); + +HB_INTERNAL void +hb_aat_layout_substitute (const hb_ot_shape_plan_t *plan, + hb_font_t *font, + hb_buffer_t *buffer); + +HB_INTERNAL void +hb_aat_layout_zero_width_deleted_glyphs (hb_buffer_t *buffer); + +HB_INTERNAL void +hb_aat_layout_remove_deleted_glyphs (hb_buffer_t *buffer); + +HB_INTERNAL void +hb_aat_layout_position (const hb_ot_shape_plan_t *plan, + hb_font_t *font, + hb_buffer_t *buffer); + +HB_INTERNAL void +hb_aat_layout_track (const hb_ot_shape_plan_t *plan, + hb_font_t *font, + hb_buffer_t *buffer); + + +#endif /* HB_AAT_LAYOUT_HH */ diff --git a/thirdparty/harfbuzz/src/hb-aat-ltag-table.hh b/thirdparty/harfbuzz/src/hb-aat-ltag-table.hh new file mode 100644 index 0000000000..711f9aa6c1 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-aat-ltag-table.hh @@ -0,0 +1,92 @@ +/* + * Copyright © 2018 Ebrahim Byagowi + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + */ + +#ifndef HB_AAT_LTAG_TABLE_HH +#define HB_AAT_LTAG_TABLE_HH + +#include "hb-open-type.hh" + +/* + * ltag -- Language Tag + * https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6ltag.html + */ +#define HB_AAT_TAG_ltag HB_TAG('l','t','a','g') + + +namespace AAT { + +using namespace OT; + + +struct FTStringRange +{ + friend struct ltag; + + bool sanitize (hb_sanitize_context_t *c, const void *base) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && (base+tag).sanitize (c, length)); + } + + protected: + NNOffsetTo<UnsizedArrayOf<HBUINT8>> + tag; /* Offset from the start of the table to + * the beginning of the string */ + HBUINT16 length; /* String length (in bytes) */ + public: + DEFINE_SIZE_STATIC (4); +}; + +struct ltag +{ + static constexpr hb_tag_t tableTag = HB_AAT_TAG_ltag; + + hb_language_t get_language (unsigned int i) const + { + const FTStringRange &range = tagRanges[i]; + return hb_language_from_string ((const char *) (this+range.tag).arrayZ, + range.length); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (likely (c->check_struct (this) && + version >= 1 && + tagRanges.sanitize (c, this))); + } + + protected: + HBUINT32 version; /* Table version; currently 1 */ + HBUINT32 flags; /* Table flags; currently none defined */ + LArrayOf<FTStringRange> + tagRanges; /* Range for each tag's string */ + public: + DEFINE_SIZE_ARRAY (12, tagRanges); +}; + +} /* namespace AAT */ + + +#endif /* HB_AAT_LTAG_TABLE_HH */ diff --git a/thirdparty/harfbuzz/src/hb-aat-map.cc b/thirdparty/harfbuzz/src/hb-aat-map.cc new file mode 100644 index 0000000000..2c38c35029 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-aat-map.cc @@ -0,0 +1,102 @@ +/* + * Copyright © 2009,2010 Red Hat, Inc. + * Copyright © 2010,2011,2013 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Red Hat Author(s): Behdad Esfahbod + * Google Author(s): Behdad Esfahbod + */ + +#include "hb.hh" + +#ifndef HB_NO_AAT_SHAPE + +#include "hb-aat-map.hh" + +#include "hb-aat-layout.hh" +#include "hb-aat-layout-feat-table.hh" + + +void hb_aat_map_builder_t::add_feature (hb_tag_t tag, unsigned value) +{ + if (!face->table.feat->has_data ()) return; + + if (tag == HB_TAG ('a','a','l','t')) + { + if (!face->table.feat->exposes_feature (HB_AAT_LAYOUT_FEATURE_TYPE_CHARACTER_ALTERNATIVES)) + return; + feature_info_t *info = features.push(); + info->type = HB_AAT_LAYOUT_FEATURE_TYPE_CHARACTER_ALTERNATIVES; + info->setting = (hb_aat_layout_feature_selector_t) value; + info->seq = features.length; + info->is_exclusive = true; + return; + } + + const hb_aat_feature_mapping_t *mapping = hb_aat_layout_find_feature_mapping (tag); + if (!mapping) return; + + const AAT::FeatureName* feature = &face->table.feat->get_feature (mapping->aatFeatureType); + if (!feature->has_data ()) + { + /* Special case: Chain::compile_flags will fall back to the deprecated version of + * small-caps if necessary, so we need to check for that possibility. + * https://github.com/harfbuzz/harfbuzz/issues/2307 */ + if (mapping->aatFeatureType == HB_AAT_LAYOUT_FEATURE_TYPE_LOWER_CASE && + mapping->selectorToEnable == HB_AAT_LAYOUT_FEATURE_SELECTOR_LOWER_CASE_SMALL_CAPS) + { + feature = &face->table.feat->get_feature (HB_AAT_LAYOUT_FEATURE_TYPE_LETTER_CASE); + if (!feature->has_data ()) return; + } + else return; + } + + feature_info_t *info = features.push(); + info->type = mapping->aatFeatureType; + info->setting = value ? mapping->selectorToEnable : mapping->selectorToDisable; + info->seq = features.length; + info->is_exclusive = feature->is_exclusive (); +} + +void +hb_aat_map_builder_t::compile (hb_aat_map_t &m) +{ + /* Sort features and merge duplicates */ + if (features.length) + { + features.qsort (); + unsigned int j = 0; + for (unsigned int i = 1; i < features.length; i++) + if (features[i].type != features[j].type || + /* Nonexclusive feature selectors come in even/odd pairs to turn a setting on/off + * respectively, so we mask out the low-order bit when checking for "duplicates" + * (selectors referring to the same feature setting) here. */ + (!features[i].is_exclusive && ((features[i].setting & ~1) != (features[j].setting & ~1)))) + features[++j] = features[i]; + features.shrink (j + 1); + } + + hb_aat_layout_compile_map (this, &m); +} + + +#endif diff --git a/thirdparty/harfbuzz/src/hb-aat-map.hh b/thirdparty/harfbuzz/src/hb-aat-map.hh new file mode 100644 index 0000000000..5a0fa70544 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-aat-map.hh @@ -0,0 +1,96 @@ +/* + * Copyright © 2018 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_AAT_MAP_HH +#define HB_AAT_MAP_HH + +#include "hb.hh" + + +struct hb_aat_map_t +{ + friend struct hb_aat_map_builder_t; + + public: + + void init () + { + memset (this, 0, sizeof (*this)); + chain_flags.init (); + } + void fini () { chain_flags.fini (); } + + public: + hb_vector_t<hb_mask_t> chain_flags; +}; + +struct hb_aat_map_builder_t +{ + public: + + HB_INTERNAL hb_aat_map_builder_t (hb_face_t *face_, + const hb_segment_properties_t *props_ HB_UNUSED) : + face (face_) {} + + HB_INTERNAL void add_feature (hb_tag_t tag, unsigned int value=1); + + HB_INTERNAL void compile (hb_aat_map_t &m); + + public: + struct feature_info_t + { + hb_aat_layout_feature_type_t type; + hb_aat_layout_feature_selector_t setting; + bool is_exclusive; + unsigned seq; /* For stable sorting only. */ + + HB_INTERNAL static int cmp (const void *pa, const void *pb) + { + const feature_info_t *a = (const feature_info_t *) pa; + const feature_info_t *b = (const feature_info_t *) pb; + if (a->type != b->type) return (a->type < b->type ? -1 : 1); + if (!a->is_exclusive && + (a->setting & ~1) != (b->setting & ~1)) return (a->setting < b->setting ? -1 : 1); + return (a->seq < b->seq ? -1 : a->seq > b->seq ? 1 : 0); + } + + /* compares type & setting only, not is_exclusive flag or seq number */ + int cmp (const feature_info_t& f) const + { + return (f.type != type) ? (f.type < type ? -1 : 1) : + (f.setting != setting) ? (f.setting < setting ? -1 : 1) : 0; + } + }; + + public: + hb_face_t *face; + + public: + hb_sorted_vector_t<feature_info_t> features; +}; + + +#endif /* HB_AAT_MAP_HH */ diff --git a/thirdparty/harfbuzz/src/hb-aat.h b/thirdparty/harfbuzz/src/hb-aat.h new file mode 100644 index 0000000000..c14313d1e2 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-aat.h @@ -0,0 +1,38 @@ +/* + * Copyright © 2018 Ebrahim Byagowi + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + */ + +#ifndef HB_AAT_H +#define HB_AAT_H +#define HB_AAT_H_IN + +#include "hb.h" + +#include "hb-aat-layout.h" + +HB_BEGIN_DECLS + +HB_END_DECLS + +#undef HB_AAT_H_IN +#endif /* HB_AAT_H */ diff --git a/thirdparty/harfbuzz/src/hb-algs.hh b/thirdparty/harfbuzz/src/hb-algs.hh new file mode 100644 index 0000000000..30b5812e12 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-algs.hh @@ -0,0 +1,1127 @@ +/* + * Copyright © 2017 Google, Inc. + * Copyright © 2019 Facebook, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + * Facebook Author(s): Behdad Esfahbod + */ + +#ifndef HB_ALGS_HH +#define HB_ALGS_HH + +#include "hb.hh" +#include "hb-meta.hh" +#include "hb-null.hh" +#include "hb-number.hh" + + +/* Encodes three unsigned integers in one 64-bit number. If the inputs have more than 21 bits, + * values will be truncated / overlap, and might not decode exactly. */ +#define HB_CODEPOINT_ENCODE3(x,y,z) (((uint64_t) (x) << 42) | ((uint64_t) (y) << 21) | (uint64_t) (z)) +#define HB_CODEPOINT_DECODE3_1(v) ((hb_codepoint_t) ((v) >> 42)) +#define HB_CODEPOINT_DECODE3_2(v) ((hb_codepoint_t) ((v) >> 21) & 0x1FFFFFu) +#define HB_CODEPOINT_DECODE3_3(v) ((hb_codepoint_t) (v) & 0x1FFFFFu) + +/* Custom encoding used by hb-ucd. */ +#define HB_CODEPOINT_ENCODE3_11_7_14(x,y,z) (((uint32_t) ((x) & 0x07FFu) << 21) | (((uint32_t) (y) & 0x007Fu) << 14) | (uint32_t) ((z) & 0x3FFFu)) +#define HB_CODEPOINT_DECODE3_11_7_14_1(v) ((hb_codepoint_t) ((v) >> 21)) +#define HB_CODEPOINT_DECODE3_11_7_14_2(v) ((hb_codepoint_t) (((v) >> 14) & 0x007Fu) | 0x0300) +#define HB_CODEPOINT_DECODE3_11_7_14_3(v) ((hb_codepoint_t) (v) & 0x3FFFu) + +struct +{ + /* Note. This is dangerous in that if it's passed an rvalue, it returns rvalue-reference. */ + template <typename T> constexpr auto + operator () (T&& v) const HB_AUTO_RETURN ( hb_forward<T> (v) ) +} +HB_FUNCOBJ (hb_identity); +struct +{ + /* Like identity(), but only retains lvalue-references. Rvalues are returned as rvalues. */ + template <typename T> constexpr T& + operator () (T& v) const { return v; } + + template <typename T> constexpr hb_remove_reference<T> + operator () (T&& v) const { return v; } +} +HB_FUNCOBJ (hb_lidentity); +struct +{ + /* Like identity(), but always returns rvalue. */ + template <typename T> constexpr hb_remove_reference<T> + operator () (T&& v) const { return v; } +} +HB_FUNCOBJ (hb_ridentity); + +struct +{ + template <typename T> constexpr bool + operator () (T&& v) const { return bool (hb_forward<T> (v)); } +} +HB_FUNCOBJ (hb_bool); + +struct +{ + private: + + template <typename T> constexpr auto + impl (const T& v, hb_priority<1>) const HB_RETURN (uint32_t, hb_deref (v).hash ()) + + template <typename T, + hb_enable_if (hb_is_integral (T))> constexpr auto + impl (const T& v, hb_priority<0>) const HB_AUTO_RETURN + ( + /* Knuth's multiplicative method: */ + (uint32_t) v * 2654435761u + ) + + public: + + template <typename T> constexpr auto + operator () (const T& v) const HB_RETURN (uint32_t, impl (v, hb_prioritize)) +} +HB_FUNCOBJ (hb_hash); + + +struct +{ + private: + + /* Pointer-to-member-function. */ + template <typename Appl, typename T, typename ...Ts> auto + impl (Appl&& a, hb_priority<2>, T &&v, Ts&&... ds) const HB_AUTO_RETURN + ((hb_deref (hb_forward<T> (v)).*hb_forward<Appl> (a)) (hb_forward<Ts> (ds)...)) + + /* Pointer-to-member. */ + template <typename Appl, typename T> auto + impl (Appl&& a, hb_priority<1>, T &&v) const HB_AUTO_RETURN + ((hb_deref (hb_forward<T> (v))).*hb_forward<Appl> (a)) + + /* Operator(). */ + template <typename Appl, typename ...Ts> auto + impl (Appl&& a, hb_priority<0>, Ts&&... ds) const HB_AUTO_RETURN + (hb_deref (hb_forward<Appl> (a)) (hb_forward<Ts> (ds)...)) + + public: + + template <typename Appl, typename ...Ts> auto + operator () (Appl&& a, Ts&&... ds) const HB_AUTO_RETURN + ( + impl (hb_forward<Appl> (a), + hb_prioritize, + hb_forward<Ts> (ds)...) + ) +} +HB_FUNCOBJ (hb_invoke); + +template <unsigned Pos, typename Appl, typename V> +struct hb_partial_t +{ + hb_partial_t (Appl a, V v) : a (a), v (v) {} + + static_assert (Pos > 0, ""); + + template <typename ...Ts, + unsigned P = Pos, + hb_enable_if (P == 1)> auto + operator () (Ts&& ...ds) -> decltype (hb_invoke (hb_declval (Appl), + hb_declval (V), + hb_declval (Ts)...)) + { + return hb_invoke (hb_forward<Appl> (a), + hb_forward<V> (v), + hb_forward<Ts> (ds)...); + } + template <typename T0, typename ...Ts, + unsigned P = Pos, + hb_enable_if (P == 2)> auto + operator () (T0&& d0, Ts&& ...ds) -> decltype (hb_invoke (hb_declval (Appl), + hb_declval (T0), + hb_declval (V), + hb_declval (Ts)...)) + { + return hb_invoke (hb_forward<Appl> (a), + hb_forward<T0> (d0), + hb_forward<V> (v), + hb_forward<Ts> (ds)...); + } + + private: + hb_reference_wrapper<Appl> a; + V v; +}; +template <unsigned Pos=1, typename Appl, typename V> +auto hb_partial (Appl&& a, V&& v) HB_AUTO_RETURN +(( hb_partial_t<Pos, Appl, V> (a, v) )) + +/* The following, HB_PARTIALIZE, macro uses a particular corner-case + * of C++11 that is not particularly well-supported by all compilers. + * What's happening is that it's using "this" in a trailing return-type + * via decltype(). Broken compilers deduce the type of "this" pointer + * in that context differently from what it resolves to in the body + * of the function. + * + * One probable cause of this is that at the time of trailing return + * type declaration, "this" points to an incomplete type, whereas in + * the function body the type is complete. That doesn't justify the + * error in any way, but is probably what's happening. + * + * In the case of MSVC, we get around this by using C++14 "decltype(auto)" + * which deduces the type from the actual return statement. For gcc 4.8 + * we use "+this" instead of "this" which produces an rvalue that seems + * to be deduced as the same type with this particular compiler, and seem + * to be fine as default code path as well. + */ +#ifdef _MSC_VER +/* https://github.com/harfbuzz/harfbuzz/issues/1730 */ \ +#define HB_PARTIALIZE(Pos) \ + template <typename _T> \ + decltype(auto) operator () (_T&& _v) const \ + { return hb_partial<Pos> (this, hb_forward<_T> (_v)); } \ + static_assert (true, "") +#else +/* https://github.com/harfbuzz/harfbuzz/issues/1724 */ +#define HB_PARTIALIZE(Pos) \ + template <typename _T> \ + auto operator () (_T&& _v) const HB_AUTO_RETURN \ + (hb_partial<Pos> (+this, hb_forward<_T> (_v))) \ + static_assert (true, "") +#endif + + +struct +{ + private: + + template <typename Pred, typename Val> auto + impl (Pred&& p, Val &&v, hb_priority<1>) const HB_AUTO_RETURN + (hb_deref (hb_forward<Pred> (p)).has (hb_forward<Val> (v))) + + template <typename Pred, typename Val> auto + impl (Pred&& p, Val &&v, hb_priority<0>) const HB_AUTO_RETURN + ( + hb_invoke (hb_forward<Pred> (p), + hb_forward<Val> (v)) + ) + + public: + + template <typename Pred, typename Val> auto + operator () (Pred&& p, Val &&v) const HB_RETURN (bool, + impl (hb_forward<Pred> (p), + hb_forward<Val> (v), + hb_prioritize) + ) +} +HB_FUNCOBJ (hb_has); + +struct +{ + private: + + template <typename Pred, typename Val> auto + impl (Pred&& p, Val &&v, hb_priority<1>) const HB_AUTO_RETURN + ( + hb_has (hb_forward<Pred> (p), + hb_forward<Val> (v)) + ) + + template <typename Pred, typename Val> auto + impl (Pred&& p, Val &&v, hb_priority<0>) const HB_AUTO_RETURN + ( + hb_forward<Pred> (p) == hb_forward<Val> (v) + ) + + public: + + template <typename Pred, typename Val> auto + operator () (Pred&& p, Val &&v) const HB_RETURN (bool, + impl (hb_forward<Pred> (p), + hb_forward<Val> (v), + hb_prioritize) + ) +} +HB_FUNCOBJ (hb_match); + +struct +{ + private: + + template <typename Proj, typename Val> auto + impl (Proj&& f, Val &&v, hb_priority<2>) const HB_AUTO_RETURN + (hb_deref (hb_forward<Proj> (f)).get (hb_forward<Val> (v))) + + template <typename Proj, typename Val> auto + impl (Proj&& f, Val &&v, hb_priority<1>) const HB_AUTO_RETURN + ( + hb_invoke (hb_forward<Proj> (f), + hb_forward<Val> (v)) + ) + + template <typename Proj, typename Val> auto + impl (Proj&& f, Val &&v, hb_priority<0>) const HB_AUTO_RETURN + ( + hb_forward<Proj> (f)[hb_forward<Val> (v)] + ) + + public: + + template <typename Proj, typename Val> auto + operator () (Proj&& f, Val &&v) const HB_AUTO_RETURN + ( + impl (hb_forward<Proj> (f), + hb_forward<Val> (v), + hb_prioritize) + ) +} +HB_FUNCOBJ (hb_get); + + +template <typename T1, typename T2> +struct hb_pair_t +{ + typedef T1 first_t; + typedef T2 second_t; + typedef hb_pair_t<T1, T2> pair_t; + + hb_pair_t (T1 a, T2 b) : first (a), second (b) {} + + template <typename Q1, typename Q2, + hb_enable_if (hb_is_convertible (T1, Q1) && + hb_is_convertible (T2, T2))> + operator hb_pair_t<Q1, Q2> () { return hb_pair_t<Q1, Q2> (first, second); } + + hb_pair_t<T1, T2> reverse () const + { return hb_pair_t<T1, T2> (second, first); } + + bool operator == (const pair_t& o) const { return first == o.first && second == o.second; } + bool operator != (const pair_t& o) const { return !(*this == o); } + bool operator < (const pair_t& o) const { return first < o.first || (first == o.first && second < o.second); } + bool operator >= (const pair_t& o) const { return !(*this < o); } + bool operator > (const pair_t& o) const { return first > o.first || (first == o.first && second > o.second); } + bool operator <= (const pair_t& o) const { return !(*this > o); } + + T1 first; + T2 second; +}; +#define hb_pair_t(T1,T2) hb_pair_t<T1, T2> +template <typename T1, typename T2> static inline hb_pair_t<T1, T2> +hb_pair (T1&& a, T2&& b) { return hb_pair_t<T1, T2> (a, b); } + +struct +{ + template <typename Pair> constexpr typename Pair::first_t + operator () (const Pair& pair) const { return pair.first; } +} +HB_FUNCOBJ (hb_first); + +struct +{ + template <typename Pair> constexpr typename Pair::second_t + operator () (const Pair& pair) const { return pair.second; } +} +HB_FUNCOBJ (hb_second); + +/* Note. In min/max impl, we can use hb_type_identity<T> for second argument. + * However, that would silently convert between different-signedness integers. + * Instead we accept two different types, such that compiler can err if + * comparing integers of different signedness. */ +struct +{ + template <typename T, typename T2> constexpr auto + operator () (T&& a, T2&& b) const HB_AUTO_RETURN + (hb_forward<T> (a) <= hb_forward<T2> (b) ? hb_forward<T> (a) : hb_forward<T2> (b)) +} +HB_FUNCOBJ (hb_min); +struct +{ + template <typename T, typename T2> constexpr auto + operator () (T&& a, T2&& b) const HB_AUTO_RETURN + (hb_forward<T> (a) >= hb_forward<T2> (b) ? hb_forward<T> (a) : hb_forward<T2> (b)) +} +HB_FUNCOBJ (hb_max); +struct +{ + template <typename T, typename T2, typename T3> constexpr auto + operator () (T&& x, T2&& min, T3&& max) const HB_AUTO_RETURN + (hb_min (hb_max (hb_forward<T> (x), hb_forward<T2> (min)), hb_forward<T3> (max))) +} +HB_FUNCOBJ (hb_clamp); + + +/* + * Bithacks. + */ + +/* Return the number of 1 bits in v. */ +template <typename T> +static inline HB_CONST_FUNC unsigned int +hb_popcount (T v) +{ +#if (defined(__GNUC__) && (__GNUC__ >= 4)) || defined(__clang__) + if (sizeof (T) <= sizeof (unsigned int)) + return __builtin_popcount (v); + + if (sizeof (T) <= sizeof (unsigned long)) + return __builtin_popcountl (v); + + if (sizeof (T) <= sizeof (unsigned long long)) + return __builtin_popcountll (v); +#endif + + if (sizeof (T) <= 4) + { + /* "HACKMEM 169" */ + uint32_t y; + y = (v >> 1) &033333333333; + y = v - y - ((y >>1) & 033333333333); + return (((y + (y >> 3)) & 030707070707) % 077); + } + + if (sizeof (T) == 8) + { + unsigned int shift = 32; + return hb_popcount<uint32_t> ((uint32_t) v) + hb_popcount ((uint32_t) (v >> shift)); + } + + if (sizeof (T) == 16) + { + unsigned int shift = 64; + return hb_popcount<uint64_t> ((uint64_t) v) + hb_popcount ((uint64_t) (v >> shift)); + } + + assert (0); + return 0; /* Shut up stupid compiler. */ +} + +/* Returns the number of bits needed to store number */ +template <typename T> +static inline HB_CONST_FUNC unsigned int +hb_bit_storage (T v) +{ + if (unlikely (!v)) return 0; + +#if (defined(__GNUC__) && (__GNUC__ >= 4)) || defined(__clang__) + if (sizeof (T) <= sizeof (unsigned int)) + return sizeof (unsigned int) * 8 - __builtin_clz (v); + + if (sizeof (T) <= sizeof (unsigned long)) + return sizeof (unsigned long) * 8 - __builtin_clzl (v); + + if (sizeof (T) <= sizeof (unsigned long long)) + return sizeof (unsigned long long) * 8 - __builtin_clzll (v); +#endif + +#if (defined(_MSC_VER) && _MSC_VER >= 1500) || (defined(__MINGW32__) && (__GNUC__ < 4)) + if (sizeof (T) <= sizeof (unsigned int)) + { + unsigned long where; + _BitScanReverse (&where, v); + return 1 + where; + } +# if defined(_WIN64) + if (sizeof (T) <= 8) + { + unsigned long where; + _BitScanReverse64 (&where, v); + return 1 + where; + } +# endif +#endif + + if (sizeof (T) <= 4) + { + /* "bithacks" */ + const unsigned int b[] = {0x2, 0xC, 0xF0, 0xFF00, 0xFFFF0000}; + const unsigned int S[] = {1, 2, 4, 8, 16}; + unsigned int r = 0; + for (int i = 4; i >= 0; i--) + if (v & b[i]) + { + v >>= S[i]; + r |= S[i]; + } + return r + 1; + } + if (sizeof (T) <= 8) + { + /* "bithacks" */ + const uint64_t b[] = {0x2ULL, 0xCULL, 0xF0ULL, 0xFF00ULL, 0xFFFF0000ULL, 0xFFFFFFFF00000000ULL}; + const unsigned int S[] = {1, 2, 4, 8, 16, 32}; + unsigned int r = 0; + for (int i = 5; i >= 0; i--) + if (v & b[i]) + { + v >>= S[i]; + r |= S[i]; + } + return r + 1; + } + if (sizeof (T) == 16) + { + unsigned int shift = 64; + return (v >> shift) ? hb_bit_storage<uint64_t> ((uint64_t) (v >> shift)) + shift : + hb_bit_storage<uint64_t> ((uint64_t) v); + } + + assert (0); + return 0; /* Shut up stupid compiler. */ +} + +/* Returns the number of zero bits in the least significant side of v */ +template <typename T> +static inline HB_CONST_FUNC unsigned int +hb_ctz (T v) +{ + if (unlikely (!v)) return 8 * sizeof (T); + +#if (defined(__GNUC__) && (__GNUC__ >= 4)) || defined(__clang__) + if (sizeof (T) <= sizeof (unsigned int)) + return __builtin_ctz (v); + + if (sizeof (T) <= sizeof (unsigned long)) + return __builtin_ctzl (v); + + if (sizeof (T) <= sizeof (unsigned long long)) + return __builtin_ctzll (v); +#endif + +#if (defined(_MSC_VER) && _MSC_VER >= 1500) || (defined(__MINGW32__) && (__GNUC__ < 4)) + if (sizeof (T) <= sizeof (unsigned int)) + { + unsigned long where; + _BitScanForward (&where, v); + return where; + } +# if defined(_WIN64) + if (sizeof (T) <= 8) + { + unsigned long where; + _BitScanForward64 (&where, v); + return where; + } +# endif +#endif + + if (sizeof (T) <= 4) + { + /* "bithacks" */ + unsigned int c = 32; + v &= - (int32_t) v; + if (v) c--; + if (v & 0x0000FFFF) c -= 16; + if (v & 0x00FF00FF) c -= 8; + if (v & 0x0F0F0F0F) c -= 4; + if (v & 0x33333333) c -= 2; + if (v & 0x55555555) c -= 1; + return c; + } + if (sizeof (T) <= 8) + { + /* "bithacks" */ + unsigned int c = 64; + v &= - (int64_t) (v); + if (v) c--; + if (v & 0x00000000FFFFFFFFULL) c -= 32; + if (v & 0x0000FFFF0000FFFFULL) c -= 16; + if (v & 0x00FF00FF00FF00FFULL) c -= 8; + if (v & 0x0F0F0F0F0F0F0F0FULL) c -= 4; + if (v & 0x3333333333333333ULL) c -= 2; + if (v & 0x5555555555555555ULL) c -= 1; + return c; + } + if (sizeof (T) == 16) + { + unsigned int shift = 64; + return (uint64_t) v ? hb_bit_storage<uint64_t> ((uint64_t) v) : + hb_bit_storage<uint64_t> ((uint64_t) (v >> shift)) + shift; + } + + assert (0); + return 0; /* Shut up stupid compiler. */ +} + + +/* + * Tiny stuff. + */ + +/* ASCII tag/character handling */ +static inline bool ISALPHA (unsigned char c) +{ return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); } +static inline bool ISALNUM (unsigned char c) +{ return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9'); } +static inline bool ISSPACE (unsigned char c) +{ return c == ' ' || c =='\f'|| c =='\n'|| c =='\r'|| c =='\t'|| c =='\v'; } +static inline unsigned char TOUPPER (unsigned char c) +{ return (c >= 'a' && c <= 'z') ? c - 'a' + 'A' : c; } +static inline unsigned char TOLOWER (unsigned char c) +{ return (c >= 'A' && c <= 'Z') ? c - 'A' + 'a' : c; } +static inline bool ISHEX (unsigned char c) +{ return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'); } +static inline unsigned char TOHEX (uint8_t c) +{ return (c & 0xF) <= 9 ? (c & 0xF) + '0' : (c & 0xF) + 'a' - 10; } +static inline uint8_t FROMHEX (unsigned char c) +{ return (c >= '0' && c <= '9') ? c - '0' : TOLOWER (c) - 'a' + 10; } + +static inline unsigned int DIV_CEIL (const unsigned int a, unsigned int b) +{ return (a + (b - 1)) / b; } + + +#undef ARRAY_LENGTH +template <typename Type, unsigned int n> +static inline unsigned int ARRAY_LENGTH (const Type (&)[n]) { return n; } +/* A const version, but does not detect erratically being called on pointers. */ +#define ARRAY_LENGTH_CONST(__array) ((signed int) (sizeof (__array) / sizeof (__array[0]))) + + +static inline int +hb_memcmp (const void *a, const void *b, unsigned int len) +{ + /* It's illegal to pass NULL to memcmp(), even if len is zero. + * So, wrap it. + * https://sourceware.org/bugzilla/show_bug.cgi?id=23878 */ + if (unlikely (!len)) return 0; + return memcmp (a, b, len); +} + +static inline void * +hb_memset (void *s, int c, unsigned int n) +{ + /* It's illegal to pass NULL to memset(), even if n is zero. */ + if (unlikely (!n)) return 0; + return memset (s, c, n); +} + +static inline unsigned int +hb_ceil_to_4 (unsigned int v) +{ + return ((v - 1) | 3) + 1; +} + +template <typename T> static inline bool +hb_in_range (T u, T lo, T hi) +{ + static_assert (!hb_is_signed<T>::value, ""); + + /* The casts below are important as if T is smaller than int, + * the subtract results will become a signed int! */ + return (T)(u - lo) <= (T)(hi - lo); +} +template <typename T> static inline bool +hb_in_ranges (T u, T lo1, T hi1, T lo2, T hi2) +{ + return hb_in_range (u, lo1, hi1) || hb_in_range (u, lo2, hi2); +} +template <typename T> static inline bool +hb_in_ranges (T u, T lo1, T hi1, T lo2, T hi2, T lo3, T hi3) +{ + return hb_in_range (u, lo1, hi1) || hb_in_range (u, lo2, hi2) || hb_in_range (u, lo3, hi3); +} + + +/* + * Overflow checking. + */ + +/* Consider __builtin_mul_overflow use here also */ +static inline bool +hb_unsigned_mul_overflows (unsigned int count, unsigned int size) +{ + return (size > 0) && (count >= ((unsigned int) -1) / size); +} + + +/* + * Sort and search. + */ + +template <typename K, typename V, typename ...Ts> +static int +_hb_cmp_method (const void *pkey, const void *pval, Ts... ds) +{ + const K& key = * (const K*) pkey; + const V& val = * (const V*) pval; + + return val.cmp (key, ds...); +} + +template <typename V, typename K, typename ...Ts> +static inline bool +hb_bsearch_impl (unsigned *pos, /* Out */ + const K& key, + V* base, size_t nmemb, size_t stride, + int (*compar)(const void *_key, const void *_item, Ts... _ds), + Ts... ds) +{ + /* This is our *only* bsearch implementation. */ + + int min = 0, max = (int) nmemb - 1; + while (min <= max) + { + int mid = ((unsigned int) min + (unsigned int) max) / 2; +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wcast-align" + V* p = (V*) (((const char *) base) + (mid * stride)); +#pragma GCC diagnostic pop + int c = compar ((const void *) hb_addressof (key), (const void *) p, ds...); + if (c < 0) + max = mid - 1; + else if (c > 0) + min = mid + 1; + else + { + *pos = mid; + return true; + } + } + *pos = min; + return false; +} + +template <typename V, typename K> +static inline V* +hb_bsearch (const K& key, V* base, + size_t nmemb, size_t stride = sizeof (V), + int (*compar)(const void *_key, const void *_item) = _hb_cmp_method<K, V>) +{ + unsigned pos; +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wcast-align" + return hb_bsearch_impl (&pos, key, base, nmemb, stride, compar) ? + (V*) (((const char *) base) + (pos * stride)) : nullptr; +#pragma GCC diagnostic pop +} +template <typename V, typename K, typename ...Ts> +static inline V* +hb_bsearch (const K& key, V* base, + size_t nmemb, size_t stride, + int (*compar)(const void *_key, const void *_item, Ts... _ds), + Ts... ds) +{ + unsigned pos; +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wcast-align" + return hb_bsearch_impl (&pos, key, base, nmemb, stride, compar, ds...) ? + (V*) (((const char *) base) + (pos * stride)) : nullptr; +#pragma GCC diagnostic pop +} + + +/* From https://github.com/noporpoise/sort_r + Feb 5, 2019 (c8c65c1e) + Modified to support optional argument using templates */ + +/* Isaac Turner 29 April 2014 Public Domain */ + +/* +hb_qsort function to be exported. +Parameters: + base is the array to be sorted + nel is the number of elements in the array + width is the size in bytes of each element of the array + compar is the comparison function + arg (optional) is a pointer to be passed to the comparison function + +void hb_qsort(void *base, size_t nel, size_t width, + int (*compar)(const void *_a, const void *_b, [void *_arg]), + [void *arg]); +*/ + +#define SORT_R_SWAP(a,b,tmp) ((tmp) = (a), (a) = (b), (b) = (tmp)) + +/* swap a and b */ +/* a and b must not be equal! */ +static inline void sort_r_swap(char *__restrict a, char *__restrict b, + size_t w) +{ + char tmp, *end = a+w; + for(; a < end; a++, b++) { SORT_R_SWAP(*a, *b, tmp); } +} + +/* swap a, b iff a>b */ +/* a and b must not be equal! */ +/* __restrict is same as restrict but better support on old machines */ +template <typename ...Ts> +static inline int sort_r_cmpswap(char *__restrict a, + char *__restrict b, size_t w, + int (*compar)(const void *_a, + const void *_b, + Ts... _ds), + Ts... ds) +{ + if(compar(a, b, ds...) > 0) { + sort_r_swap(a, b, w); + return 1; + } + return 0; +} + +/* +Swap consecutive blocks of bytes of size na and nb starting at memory addr ptr, +with the smallest swap so that the blocks are in the opposite order. Blocks may +be internally re-ordered e.g. + 12345ab -> ab34512 + 123abc -> abc123 + 12abcde -> deabc12 +*/ +static inline void sort_r_swap_blocks(char *ptr, size_t na, size_t nb) +{ + if(na > 0 && nb > 0) { + if(na > nb) { sort_r_swap(ptr, ptr+na, nb); } + else { sort_r_swap(ptr, ptr+nb, na); } + } +} + +/* Implement recursive quicksort ourselves */ +/* Note: quicksort is not stable, equivalent values may be swapped */ +template <typename ...Ts> +static inline void sort_r_simple(void *base, size_t nel, size_t w, + int (*compar)(const void *_a, + const void *_b, + Ts... _ds), + Ts... ds) +{ + char *b = (char *)base, *end = b + nel*w; + + /* for(size_t i=0; i<nel; i++) {printf("%4i", *(int*)(b + i*sizeof(int)));} + printf("\n"); */ + + if(nel < 10) { + /* Insertion sort for arbitrarily small inputs */ + char *pi, *pj; + for(pi = b+w; pi < end; pi += w) { + for(pj = pi; pj > b && sort_r_cmpswap(pj-w,pj,w,compar,ds...); pj -= w) {} + } + } + else + { + /* nel > 9; Quicksort */ + + int cmp; + char *pl, *ple, *pr, *pre, *pivot; + char *last = b+w*(nel-1), *tmp; + + /* + Use median of second, middle and second-last items as pivot. + First and last may have been swapped with pivot and therefore be extreme + */ + char *l[3]; + l[0] = b + w; + l[1] = b+w*(nel/2); + l[2] = last - w; + + /* printf("pivots: %i, %i, %i\n", *(int*)l[0], *(int*)l[1], *(int*)l[2]); */ + + if(compar(l[0],l[1],ds...) > 0) { SORT_R_SWAP(l[0], l[1], tmp); } + if(compar(l[1],l[2],ds...) > 0) { + SORT_R_SWAP(l[1], l[2], tmp); + if(compar(l[0],l[1],ds...) > 0) { SORT_R_SWAP(l[0], l[1], tmp); } + } + + /* swap mid value (l[1]), and last element to put pivot as last element */ + if(l[1] != last) { sort_r_swap(l[1], last, w); } + + /* + pl is the next item on the left to be compared to the pivot + pr is the last item on the right that was compared to the pivot + ple is the left position to put the next item that equals the pivot + ple is the last right position where we put an item that equals the pivot + v- end (beyond the array) + EEEEEELLLLLLLLuuuuuuuuGGGGGGGEEEEEEEE. + ^- b ^- ple ^- pl ^- pr ^- pre ^- last (where the pivot is) + Pivot comparison key: + E = equal, L = less than, u = unknown, G = greater than, E = equal + */ + pivot = last; + ple = pl = b; + pre = pr = last; + + /* + Strategy: + Loop into the list from the left and right at the same time to find: + - an item on the left that is greater than the pivot + - an item on the right that is less than the pivot + Once found, they are swapped and the loop continues. + Meanwhile items that are equal to the pivot are moved to the edges of the + array. + */ + while(pl < pr) { + /* Move left hand items which are equal to the pivot to the far left. + break when we find an item that is greater than the pivot */ + for(; pl < pr; pl += w) { + cmp = compar(pl, pivot, ds...); + if(cmp > 0) { break; } + else if(cmp == 0) { + if(ple < pl) { sort_r_swap(ple, pl, w); } + ple += w; + } + } + /* break if last batch of left hand items were equal to pivot */ + if(pl >= pr) { break; } + /* Move right hand items which are equal to the pivot to the far right. + break when we find an item that is less than the pivot */ + for(; pl < pr; ) { + pr -= w; /* Move right pointer onto an unprocessed item */ + cmp = compar(pr, pivot, ds...); + if(cmp == 0) { + pre -= w; + if(pr < pre) { sort_r_swap(pr, pre, w); } + } + else if(cmp < 0) { + if(pl < pr) { sort_r_swap(pl, pr, w); } + pl += w; + break; + } + } + } + + pl = pr; /* pr may have gone below pl */ + + /* + Now we need to go from: EEELLLGGGGEEEE + to: LLLEEEEEEEGGGG + Pivot comparison key: + E = equal, L = less than, u = unknown, G = greater than, E = equal + */ + sort_r_swap_blocks(b, ple-b, pl-ple); + sort_r_swap_blocks(pr, pre-pr, end-pre); + + /*for(size_t i=0; i<nel; i++) {printf("%4i", *(int*)(b + i*sizeof(int)));} + printf("\n");*/ + + sort_r_simple(b, (pl-ple)/w, w, compar, ds...); + sort_r_simple(end-(pre-pr), (pre-pr)/w, w, compar, ds...); + } +} + +static inline void +hb_qsort (void *base, size_t nel, size_t width, + int (*compar)(const void *_a, const void *_b)) +{ +#if defined(__OPTIMIZE_SIZE__) && !defined(HB_USE_INTERNAL_QSORT) + qsort (base, nel, width, compar); +#else + sort_r_simple (base, nel, width, compar); +#endif +} + +static inline void +hb_qsort (void *base, size_t nel, size_t width, + int (*compar)(const void *_a, const void *_b, void *_arg), + void *arg) +{ +#ifdef HAVE_GNU_QSORT_R + qsort_r (base, nel, width, compar, arg); +#else + sort_r_simple (base, nel, width, compar, arg); +#endif +} + + +template <typename T, typename T2, typename T3> static inline void +hb_stable_sort (T *array, unsigned int len, int(*compar)(const T2 *, const T2 *), T3 *array2) +{ + for (unsigned int i = 1; i < len; i++) + { + unsigned int j = i; + while (j && compar (&array[j - 1], &array[i]) > 0) + j--; + if (i == j) + continue; + /* Move item i to occupy place for item j, shift what's in between. */ + { + T t = array[i]; + memmove (&array[j + 1], &array[j], (i - j) * sizeof (T)); + array[j] = t; + } + if (array2) + { + T3 t = array2[i]; + memmove (&array2[j + 1], &array2[j], (i - j) * sizeof (T3)); + array2[j] = t; + } + } +} + +template <typename T> static inline void +hb_stable_sort (T *array, unsigned int len, int(*compar)(const T *, const T *)) +{ + hb_stable_sort (array, len, compar, (int *) nullptr); +} + +static inline hb_bool_t +hb_codepoint_parse (const char *s, unsigned int len, int base, hb_codepoint_t *out) +{ + unsigned int v; + const char *p = s; + const char *end = p + len; + if (unlikely (!hb_parse_uint (&p, end, &v, true/* whole buffer */, base))) + return false; + + *out = v; + return true; +} + + +/* Operators. */ + +struct hb_bitwise_and +{ HB_PARTIALIZE(2); + static constexpr bool passthru_left = false; + static constexpr bool passthru_right = false; + template <typename T> constexpr auto + operator () (const T &a, const T &b) const HB_AUTO_RETURN (a & b) +} +HB_FUNCOBJ (hb_bitwise_and); +struct hb_bitwise_or +{ HB_PARTIALIZE(2); + static constexpr bool passthru_left = true; + static constexpr bool passthru_right = true; + template <typename T> constexpr auto + operator () (const T &a, const T &b) const HB_AUTO_RETURN (a | b) +} +HB_FUNCOBJ (hb_bitwise_or); +struct hb_bitwise_xor +{ HB_PARTIALIZE(2); + static constexpr bool passthru_left = true; + static constexpr bool passthru_right = true; + template <typename T> constexpr auto + operator () (const T &a, const T &b) const HB_AUTO_RETURN (a ^ b) +} +HB_FUNCOBJ (hb_bitwise_xor); +struct hb_bitwise_sub +{ HB_PARTIALIZE(2); + static constexpr bool passthru_left = true; + static constexpr bool passthru_right = false; + template <typename T> constexpr auto + operator () (const T &a, const T &b) const HB_AUTO_RETURN (a & ~b) +} +HB_FUNCOBJ (hb_bitwise_sub); +struct +{ + template <typename T> constexpr auto + operator () (const T &a) const HB_AUTO_RETURN (~a) +} +HB_FUNCOBJ (hb_bitwise_neg); + +struct +{ HB_PARTIALIZE(2); + template <typename T, typename T2> constexpr auto + operator () (const T &a, const T2 &b) const HB_AUTO_RETURN (a + b) +} +HB_FUNCOBJ (hb_add); +struct +{ HB_PARTIALIZE(2); + template <typename T, typename T2> constexpr auto + operator () (const T &a, const T2 &b) const HB_AUTO_RETURN (a - b) +} +HB_FUNCOBJ (hb_sub); +struct +{ HB_PARTIALIZE(2); + template <typename T, typename T2> constexpr auto + operator () (const T &a, const T2 &b) const HB_AUTO_RETURN (a * b) +} +HB_FUNCOBJ (hb_mul); +struct +{ HB_PARTIALIZE(2); + template <typename T, typename T2> constexpr auto + operator () (const T &a, const T2 &b) const HB_AUTO_RETURN (a / b) +} +HB_FUNCOBJ (hb_div); +struct +{ HB_PARTIALIZE(2); + template <typename T, typename T2> constexpr auto + operator () (const T &a, const T2 &b) const HB_AUTO_RETURN (a % b) +} +HB_FUNCOBJ (hb_mod); +struct +{ + template <typename T> constexpr auto + operator () (const T &a) const HB_AUTO_RETURN (+a) +} +HB_FUNCOBJ (hb_pos); +struct +{ + template <typename T> constexpr auto + operator () (const T &a) const HB_AUTO_RETURN (-a) +} +HB_FUNCOBJ (hb_neg); +struct +{ + template <typename T> constexpr auto + operator () (T &a) const HB_AUTO_RETURN (++a) +} +HB_FUNCOBJ (hb_inc); +struct +{ + template <typename T> constexpr auto + operator () (T &a) const HB_AUTO_RETURN (--a) +} +HB_FUNCOBJ (hb_dec); + + +/* Compiler-assisted vectorization. */ + +/* Type behaving similar to vectorized vars defined using __attribute__((vector_size(...))), + * basically a fixed-size bitset. */ +template <typename elt_t, unsigned int byte_size> +struct hb_vector_size_t +{ + elt_t& operator [] (unsigned int i) { return v[i]; } + const elt_t& operator [] (unsigned int i) const { return v[i]; } + + void clear (unsigned char v = 0) { memset (this, v, sizeof (*this)); } + + template <typename Op> + hb_vector_size_t process (const Op& op) const + { + hb_vector_size_t r; + for (unsigned int i = 0; i < ARRAY_LENGTH (v); i++) + r.v[i] = op (v[i]); + return r; + } + template <typename Op> + hb_vector_size_t process (const Op& op, const hb_vector_size_t &o) const + { + hb_vector_size_t r; + for (unsigned int i = 0; i < ARRAY_LENGTH (v); i++) + r.v[i] = op (v[i], o.v[i]); + return r; + } + hb_vector_size_t operator | (const hb_vector_size_t &o) const + { return process (hb_bitwise_or, o); } + hb_vector_size_t operator & (const hb_vector_size_t &o) const + { return process (hb_bitwise_and, o); } + hb_vector_size_t operator ^ (const hb_vector_size_t &o) const + { return process (hb_bitwise_xor, o); } + hb_vector_size_t operator ~ () const + { return process (hb_bitwise_neg); } + + private: + static_assert (0 == byte_size % sizeof (elt_t), ""); + elt_t v[byte_size / sizeof (elt_t)]; +}; + + +#endif /* HB_ALGS_HH */ diff --git a/thirdparty/harfbuzz/src/hb-array.hh b/thirdparty/harfbuzz/src/hb-array.hh new file mode 100644 index 0000000000..568cd02c79 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-array.hh @@ -0,0 +1,408 @@ +/* + * Copyright © 2018 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_ARRAY_HH +#define HB_ARRAY_HH + +#include "hb.hh" +#include "hb-algs.hh" +#include "hb-iter.hh" +#include "hb-null.hh" + + +template <typename Type> +struct hb_sorted_array_t; + +template <typename Type> +struct hb_array_t : hb_iter_with_fallback_t<hb_array_t<Type>, Type&> +{ + /* + * Constructors. + */ + hb_array_t () : arrayZ (nullptr), length (0), backwards_length (0) {} + hb_array_t (Type *array_, unsigned int length_) : arrayZ (array_), length (length_), backwards_length (0) {} + template <unsigned int length_> + hb_array_t (Type (&array_)[length_]) : arrayZ (array_), length (length_), backwards_length (0) {} + + template <typename U, + hb_enable_if (hb_is_cr_convertible(U, Type))> + hb_array_t (const hb_array_t<U> &o) : + hb_iter_with_fallback_t<hb_array_t, Type&> (), + arrayZ (o.arrayZ), length (o.length), backwards_length (o.backwards_length) {} + template <typename U, + hb_enable_if (hb_is_cr_convertible(U, Type))> + hb_array_t& operator = (const hb_array_t<U> &o) + { arrayZ = o.arrayZ; length = o.length; backwards_length = o.backwards_length; return *this; } + + /* + * Iterator implementation. + */ + typedef Type& __item_t__; + static constexpr bool is_random_access_iterator = true; + Type& __item_at__ (unsigned i) const + { + if (unlikely (i >= length)) return CrapOrNull (Type); + return arrayZ[i]; + } + void __forward__ (unsigned n) + { + if (unlikely (n > length)) + n = length; + length -= n; + backwards_length += n; + arrayZ += n; + } + void __rewind__ (unsigned n) + { + if (unlikely (n > backwards_length)) + n = backwards_length; + length += n; + backwards_length -= n; + arrayZ -= n; + } + unsigned __len__ () const { return length; } + /* Ouch. The operator== compares the contents of the array. For range-based for loops, + * it's best if we can just compare arrayZ, though comparing contents is still fast, + * but also would require that Type has operator==. As such, we optimize this operator + * for range-based for loop and just compare arrayZ. No need to compare length, as we + * assume we're only compared to .end(). */ + bool operator != (const hb_array_t& o) const + { return arrayZ != o.arrayZ; } + + /* Extra operators. + */ + Type * operator & () const { return arrayZ; } + operator hb_array_t<const Type> () { return hb_array_t<const Type> (arrayZ, length); } + template <typename T> operator T * () const { return arrayZ; } + + HB_INTERNAL bool operator == (const hb_array_t &o) const; + + uint32_t hash () const { + uint32_t current = 0; + for (unsigned int i = 0; i < this->length; i++) { + current = current * 31 + hb_hash (this->arrayZ[i]); + } + return current; + } + + /* + * Compare, Sort, and Search. + */ + + /* Note: our compare is NOT lexicographic; it also does NOT call Type::cmp. */ + int cmp (const hb_array_t &a) const + { + if (length != a.length) + return (int) a.length - (int) length; + return hb_memcmp (a.arrayZ, arrayZ, get_size ()); + } + HB_INTERNAL static int cmp (const void *pa, const void *pb) + { + hb_array_t *a = (hb_array_t *) pa; + hb_array_t *b = (hb_array_t *) pb; + return b->cmp (*a); + } + + template <typename T> + Type *lsearch (const T &x, Type *not_found = nullptr) + { + unsigned i; + return lfind (x, &i) ? &this->arrayZ[i] : not_found; + } + template <typename T> + const Type *lsearch (const T &x, const Type *not_found = nullptr) const + { + unsigned i; + return lfind (x, &i) ? &this->arrayZ[i] : not_found; + } + template <typename T> + bool lfind (const T &x, unsigned *pos = nullptr) const + { + for (unsigned i = 0; i < length; ++i) + if (!this->arrayZ[i].cmp (x)) + { + if (pos) + *pos = i; + return true; + } + + return false; + } + + hb_sorted_array_t<Type> qsort (int (*cmp_)(const void*, const void*)) + { + if (likely (length)) + hb_qsort (arrayZ, length, this->get_item_size (), cmp_); + return hb_sorted_array_t<Type> (*this); + } + hb_sorted_array_t<Type> qsort () + { + if (likely (length)) + hb_qsort (arrayZ, length, this->get_item_size (), Type::cmp); + return hb_sorted_array_t<Type> (*this); + } + void qsort (unsigned int start, unsigned int end) + { + end = hb_min (end, length); + assert (start <= end); + if (likely (start < end)) + hb_qsort (arrayZ + start, end - start, this->get_item_size (), Type::cmp); + } + + /* + * Other methods. + */ + + unsigned int get_size () const { return length * this->get_item_size (); } + + /* + * Reverse the order of items in this array in the range [start, end). + */ + void reverse (unsigned start = 0, unsigned end = -1) + { + start = hb_min (start, length); + end = hb_min (end, length); + + if (end < start + 2) + return; + + for (unsigned lhs = start, rhs = end - 1; lhs < rhs; lhs++, rhs--) { + Type temp = arrayZ[rhs]; + arrayZ[rhs] = arrayZ[lhs]; + arrayZ[lhs] = temp; + } + } + + hb_array_t sub_array (unsigned int start_offset = 0, unsigned int *seg_count = nullptr /* IN/OUT */) const + { + if (!start_offset && !seg_count) + return *this; + + unsigned int count = length; + if (unlikely (start_offset > count)) + count = 0; + else + count -= start_offset; + if (seg_count) + count = *seg_count = hb_min (count, *seg_count); + return hb_array_t (arrayZ + start_offset, count); + } + hb_array_t sub_array (unsigned int start_offset, unsigned int seg_count) const + { return sub_array (start_offset, &seg_count); } + + hb_array_t truncate (unsigned length) const { return sub_array (0, length); } + + template <typename T, + unsigned P = sizeof (Type), + hb_enable_if (P == 1)> + const T *as () const + { return length < hb_null_size (T) ? &Null (T) : reinterpret_cast<const T *> (arrayZ); } + + template <typename T, + unsigned P = sizeof (Type), + hb_enable_if (P == 1)> + bool check_range (const T *p, unsigned int size = T::static_size) const + { + return arrayZ <= ((const char *) p) + && ((const char *) p) <= arrayZ + length + && (unsigned int) (arrayZ + length - (const char *) p) >= size; + } + + /* Only call if you allocated the underlying array using malloc() or similar. */ + void free () + { ::free ((void *) arrayZ); arrayZ = nullptr; length = 0; } + + template <typename hb_serialize_context_t> + hb_array_t copy (hb_serialize_context_t *c) const + { + TRACE_SERIALIZE (this); + auto* out = c->start_embed (arrayZ); + if (unlikely (!c->extend_size (out, get_size ()))) return_trace (hb_array_t ()); + for (unsigned i = 0; i < length; i++) + out[i] = arrayZ[i]; /* TODO: add version that calls c->copy() */ + return_trace (hb_array_t (out, length)); + } + + template <typename hb_sanitize_context_t> + bool sanitize (hb_sanitize_context_t *c) const + { return c->check_array (arrayZ, length); } + + /* + * Members + */ + + public: + Type *arrayZ; + unsigned int length; + unsigned int backwards_length; +}; +template <typename T> inline hb_array_t<T> +hb_array (T *array, unsigned int length) +{ return hb_array_t<T> (array, length); } +template <typename T, unsigned int length_> inline hb_array_t<T> +hb_array (T (&array_)[length_]) +{ return hb_array_t<T> (array_); } + +enum hb_bfind_not_found_t +{ + HB_BFIND_NOT_FOUND_DONT_STORE, + HB_BFIND_NOT_FOUND_STORE, + HB_BFIND_NOT_FOUND_STORE_CLOSEST, +}; + +template <typename Type> +struct hb_sorted_array_t : + hb_iter_t<hb_sorted_array_t<Type>, Type&>, + hb_array_t<Type> +{ + typedef hb_iter_t<hb_sorted_array_t, Type&> iter_base_t; + HB_ITER_USING (iter_base_t); + static constexpr bool is_random_access_iterator = true; + static constexpr bool is_sorted_iterator = true; + + hb_sorted_array_t () : hb_array_t<Type> () {} + hb_sorted_array_t (Type *array_, unsigned int length_) : hb_array_t<Type> (array_, length_) {} + template <unsigned int length_> + hb_sorted_array_t (Type (&array_)[length_]) : hb_array_t<Type> (array_) {} + + template <typename U, + hb_enable_if (hb_is_cr_convertible(U, Type))> + hb_sorted_array_t (const hb_array_t<U> &o) : + hb_iter_t<hb_sorted_array_t, Type&> (), + hb_array_t<Type> (o) {} + template <typename U, + hb_enable_if (hb_is_cr_convertible(U, Type))> + hb_sorted_array_t& operator = (const hb_array_t<U> &o) + { hb_array_t<Type> (*this) = o; return *this; } + + /* Iterator implementation. */ + bool operator != (const hb_sorted_array_t& o) const + { return this->arrayZ != o.arrayZ || this->length != o.length; } + + hb_sorted_array_t sub_array (unsigned int start_offset, unsigned int *seg_count /* IN/OUT */) const + { return hb_sorted_array_t (((const hb_array_t<Type> *) (this))->sub_array (start_offset, seg_count)); } + hb_sorted_array_t sub_array (unsigned int start_offset, unsigned int seg_count) const + { return sub_array (start_offset, &seg_count); } + + hb_sorted_array_t truncate (unsigned length) const { return sub_array (0, length); } + + template <typename T> + Type *bsearch (const T &x, Type *not_found = nullptr) + { + unsigned int i; + return bfind (x, &i) ? &this->arrayZ[i] : not_found; + } + template <typename T> + const Type *bsearch (const T &x, const Type *not_found = nullptr) const + { + unsigned int i; + return bfind (x, &i) ? &this->arrayZ[i] : not_found; + } + template <typename T> + bool bfind (const T &x, unsigned int *i = nullptr, + hb_bfind_not_found_t not_found = HB_BFIND_NOT_FOUND_DONT_STORE, + unsigned int to_store = (unsigned int) -1) const + { + unsigned pos; + + if (bsearch_impl (x, &pos)) + { + if (i) + *i = pos; + return true; + } + + if (i) + { + switch (not_found) + { + case HB_BFIND_NOT_FOUND_DONT_STORE: + break; + + case HB_BFIND_NOT_FOUND_STORE: + *i = to_store; + break; + + case HB_BFIND_NOT_FOUND_STORE_CLOSEST: + *i = pos; + break; + } + } + return false; + } + template <typename T> + bool bsearch_impl (const T &x, unsigned *pos) const + { + return hb_bsearch_impl (pos, + x, + this->arrayZ, + this->length, + sizeof (Type), + _hb_cmp_method<T, Type>); + } +}; +template <typename T> inline hb_sorted_array_t<T> +hb_sorted_array (T *array, unsigned int length) +{ return hb_sorted_array_t<T> (array, length); } +template <typename T, unsigned int length_> inline hb_sorted_array_t<T> +hb_sorted_array (T (&array_)[length_]) +{ return hb_sorted_array_t<T> (array_); } + +template <typename T> +bool hb_array_t<T>::operator == (const hb_array_t<T> &o) const +{ + if (o.length != this->length) return false; + for (unsigned int i = 0; i < this->length; i++) { + if (this->arrayZ[i] != o.arrayZ[i]) return false; + } + return true; +} + +/* TODO Specialize opeator== for hb_bytes_t and hb_ubytes_t. */ + +template <> +inline uint32_t hb_array_t<const char>::hash () const { + uint32_t current = 0; + for (unsigned int i = 0; i < this->length; i++) + current = current * 31 + (uint32_t) (this->arrayZ[i] * 2654435761u); + return current; +} + +template <> +inline uint32_t hb_array_t<const unsigned char>::hash () const { + uint32_t current = 0; + for (unsigned int i = 0; i < this->length; i++) + current = current * 31 + (uint32_t) (this->arrayZ[i] * 2654435761u); + return current; +} + + +typedef hb_array_t<const char> hb_bytes_t; +typedef hb_array_t<const unsigned char> hb_ubytes_t; + + + +#endif /* HB_ARRAY_HH */ diff --git a/thirdparty/harfbuzz/src/hb-atomic.hh b/thirdparty/harfbuzz/src/hb-atomic.hh new file mode 100644 index 0000000000..b3fb296b4e --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-atomic.hh @@ -0,0 +1,295 @@ +/* + * Copyright © 2007 Chris Wilson + * Copyright © 2009,2010 Red Hat, Inc. + * Copyright © 2011,2012 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Contributor(s): + * Chris Wilson <chris@chris-wilson.co.uk> + * Red Hat Author(s): Behdad Esfahbod + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_ATOMIC_HH +#define HB_ATOMIC_HH + +#include "hb.hh" +#include "hb-meta.hh" + + +/* + * Atomic integers and pointers. + */ + + +/* We need external help for these */ + +#if defined(hb_atomic_int_impl_add) \ + && defined(hb_atomic_ptr_impl_get) \ + && defined(hb_atomic_ptr_impl_cmpexch) + +/* Defined externally, i.e. in config.h. */ + + +#elif !defined(HB_NO_MT) && defined(__ATOMIC_ACQUIRE) + +/* C++11-style GCC primitives. */ + +#define _hb_memory_barrier() __sync_synchronize () + +#define hb_atomic_int_impl_add(AI, V) __atomic_fetch_add ((AI), (V), __ATOMIC_ACQ_REL) +#define hb_atomic_int_impl_set_relaxed(AI, V) __atomic_store_n ((AI), (V), __ATOMIC_RELAXED) +#define hb_atomic_int_impl_set(AI, V) __atomic_store_n ((AI), (V), __ATOMIC_RELEASE) +#define hb_atomic_int_impl_get_relaxed(AI) __atomic_load_n ((AI), __ATOMIC_RELAXED) +#define hb_atomic_int_impl_get(AI) __atomic_load_n ((AI), __ATOMIC_ACQUIRE) + +#define hb_atomic_ptr_impl_set_relaxed(P, V) __atomic_store_n ((P), (V), __ATOMIC_RELAXED) +#define hb_atomic_ptr_impl_get_relaxed(P) __atomic_load_n ((P), __ATOMIC_RELAXED) +#define hb_atomic_ptr_impl_get(P) __atomic_load_n ((P), __ATOMIC_ACQUIRE) +static inline bool +_hb_atomic_ptr_impl_cmplexch (const void **P, const void *O_, const void *N) +{ + const void *O = O_; // Need lvalue + return __atomic_compare_exchange_n ((void **) P, (void **) &O, (void *) N, true, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED); +} +#define hb_atomic_ptr_impl_cmpexch(P,O,N) _hb_atomic_ptr_impl_cmplexch ((const void **) (P), (O), (N)) + +#elif !defined(HB_NO_MT) && __cplusplus >= 201103L + +/* C++11 atomics. */ + +#include <atomic> + +#define _hb_memory_barrier() std::atomic_thread_fence(std::memory_order_ack_rel) +#define _hb_memory_r_barrier() std::atomic_thread_fence(std::memory_order_acquire) +#define _hb_memory_w_barrier() std::atomic_thread_fence(std::memory_order_release) + +#define hb_atomic_int_impl_add(AI, V) (reinterpret_cast<std::atomic<int> *> (AI)->fetch_add ((V), std::memory_order_acq_rel)) +#define hb_atomic_int_impl_set_relaxed(AI, V) (reinterpret_cast<std::atomic<int> *> (AI)->store ((V), std::memory_order_relaxed)) +#define hb_atomic_int_impl_set(AI, V) (reinterpret_cast<std::atomic<int> *> (AI)->store ((V), std::memory_order_release)) +#define hb_atomic_int_impl_get_relaxed(AI) (reinterpret_cast<std::atomic<int> const *> (AI)->load (std::memory_order_relaxed)) +#define hb_atomic_int_impl_get(AI) (reinterpret_cast<std::atomic<int> const *> (AI)->load (std::memory_order_acquire)) + +#define hb_atomic_ptr_impl_set_relaxed(P, V) (reinterpret_cast<std::atomic<void*> *> (P)->store ((V), std::memory_order_relaxed)) +#define hb_atomic_ptr_impl_get_relaxed(P) (reinterpret_cast<std::atomic<void*> const *> (P)->load (std::memory_order_relaxed)) +#define hb_atomic_ptr_impl_get(P) (reinterpret_cast<std::atomic<void*> *> (P)->load (std::memory_order_acquire)) +static inline bool +_hb_atomic_ptr_impl_cmplexch (const void **P, const void *O_, const void *N) +{ + const void *O = O_; // Need lvalue + return reinterpret_cast<std::atomic<const void*> *> (P)->compare_exchange_weak (O, N, std::memory_order_acq_rel, std::memory_order_relaxed); +} +#define hb_atomic_ptr_impl_cmpexch(P,O,N) _hb_atomic_ptr_impl_cmplexch ((const void **) (P), (O), (N)) + + +#elif !defined(HB_NO_MT) && defined(_WIN32) + +#include <windows.h> + +static inline void _hb_memory_barrier () +{ +#if !defined(MemoryBarrier) && !defined(__MINGW32_VERSION) + /* MinGW has a convoluted history of supporting MemoryBarrier. */ + LONG dummy = 0; + InterlockedExchange (&dummy, 1); +#else + MemoryBarrier (); +#endif +} +#define _hb_memory_barrier() _hb_memory_barrier () + +#define hb_atomic_int_impl_add(AI, V) InterlockedExchangeAdd ((LONG *) (AI), (V)) +static_assert ((sizeof (LONG) == sizeof (int)), ""); + +#define hb_atomic_ptr_impl_cmpexch(P,O,N) (InterlockedCompareExchangePointer ((P), (N), (O)) == (O)) + + +#elif !defined(HB_NO_MT) && defined(HAVE_INTEL_ATOMIC_PRIMITIVES) + +#define _hb_memory_barrier() __sync_synchronize () + +#define hb_atomic_int_impl_add(AI, V) __sync_fetch_and_add ((AI), (V)) + +#define hb_atomic_ptr_impl_cmpexch(P,O,N) __sync_bool_compare_and_swap ((P), (O), (N)) + + +#elif !defined(HB_NO_MT) && defined(HAVE_SOLARIS_ATOMIC_OPS) + +#include <atomic.h> +#include <mbarrier.h> + +#define _hb_memory_r_barrier() __machine_r_barrier () +#define _hb_memory_w_barrier() __machine_w_barrier () +#define _hb_memory_barrier() __machine_rw_barrier () + +static inline int _hb_fetch_and_add (int *AI, int V) +{ + _hb_memory_w_barrier (); + int result = atomic_add_int_nv ((uint_t *) AI, V) - V; + _hb_memory_r_barrier (); + return result; +} +static inline bool _hb_compare_and_swap_ptr (void **P, void *O, void *N) +{ + _hb_memory_w_barrier (); + bool result = atomic_cas_ptr (P, O, N) == O; + _hb_memory_r_barrier (); + return result; +} + +#define hb_atomic_int_impl_add(AI, V) _hb_fetch_and_add ((AI), (V)) + +#define hb_atomic_ptr_impl_cmpexch(P,O,N) _hb_compare_and_swap_ptr ((P), (O), (N)) + + +#elif !defined(HB_NO_MT) && defined(__APPLE__) + +#include <libkern/OSAtomic.h> +#ifdef __MAC_OS_X_MIN_REQUIRED +#include <AvailabilityMacros.h> +#elif defined(__IPHONE_OS_MIN_REQUIRED) +#include <Availability.h> +#endif + +#define _hb_memory_barrier() OSMemoryBarrier () + +#define hb_atomic_int_impl_add(AI, V) (OSAtomicAdd32Barrier ((V), (AI)) - (V)) + +#if (MAC_OS_X_VERSION_MIN_REQUIRED > MAC_OS_X_VERSION_10_4 || __IPHONE_VERSION_MIN_REQUIRED >= 20100) +#define hb_atomic_ptr_impl_cmpexch(P,O,N) OSAtomicCompareAndSwapPtrBarrier ((O), (N), (P)) +#else +#if __ppc64__ || __x86_64__ || __aarch64__ +#define hb_atomic_ptr_impl_cmpexch(P,O,N) OSAtomicCompareAndSwap64Barrier ((int64_t) (O), (int64_t) (N), (int64_t*) (P)) +#else +#define hb_atomic_ptr_impl_cmpexch(P,O,N) OSAtomicCompareAndSwap32Barrier ((int32_t) (O), (int32_t) (N), (int32_t*) (P)) +#endif +#endif + + +#elif !defined(HB_NO_MT) && defined(_AIX) && (defined(__IBMCPP__) || defined(__ibmxl__)) + +#include <builtins.h> + +#define _hb_memory_barrier() __lwsync () + +static inline int _hb_fetch_and_add (int *AI, int V) +{ + _hb_memory_barrier (); + int result = __fetch_and_add (AI, V); + _hb_memory_barrier (); + return result; +} +static inline bool _hb_compare_and_swaplp (long *P, long O, long N) +{ + _hb_memory_barrier (); + bool result = __compare_and_swaplp (P, &O, N); + _hb_memory_barrier (); + return result; +} + +#define hb_atomic_int_impl_add(AI, V) _hb_fetch_and_add ((AI), (V)) + +#define hb_atomic_ptr_impl_cmpexch(P,O,N) _hb_compare_and_swaplp ((long *) (P), (long) (O), (long) (N)) +static_assert ((sizeof (long) == sizeof (void *)), ""); + + +#elif defined(HB_NO_MT) + +#define hb_atomic_int_impl_add(AI, V) ((*(AI) += (V)) - (V)) + +#define _hb_memory_barrier() do {} while (0) + +#define hb_atomic_ptr_impl_cmpexch(P,O,N) (* (void **) (P) == (void *) (O) ? (* (void **) (P) = (void *) (N), true) : false) + + +#else + +#error "Could not find any system to define atomic_int macros." +#error "Check hb-atomic.hh for possible resolutions." + +#endif + + +#ifndef _hb_memory_r_barrier +#define _hb_memory_r_barrier() _hb_memory_barrier () +#endif +#ifndef _hb_memory_w_barrier +#define _hb_memory_w_barrier() _hb_memory_barrier () +#endif +#ifndef hb_atomic_int_impl_set_relaxed +#define hb_atomic_int_impl_set_relaxed(AI, V) (*(AI) = (V)) +#endif +#ifndef hb_atomic_int_impl_get_relaxed +#define hb_atomic_int_impl_get_relaxed(AI) (*(AI)) +#endif + +#ifndef hb_atomic_ptr_impl_set_relaxed +#define hb_atomic_ptr_impl_set_relaxed(P, V) (*(P) = (V)) +#endif +#ifndef hb_atomic_ptr_impl_get_relaxed +#define hb_atomic_ptr_impl_get_relaxed(P) (*(P)) +#endif +#ifndef hb_atomic_int_impl_set +inline void hb_atomic_int_impl_set (int *AI, int v) { _hb_memory_w_barrier (); *AI = v; } +#endif +#ifndef hb_atomic_int_impl_get +inline int hb_atomic_int_impl_get (const int *AI) { int v = *AI; _hb_memory_r_barrier (); return v; } +#endif +#ifndef hb_atomic_ptr_impl_get +inline void *hb_atomic_ptr_impl_get (void ** const P) { void *v = *P; _hb_memory_r_barrier (); return v; } +#endif + + +#define HB_ATOMIC_INT_INIT(V) {V} +struct hb_atomic_int_t +{ + void set_relaxed (int v_) { hb_atomic_int_impl_set_relaxed (&v, v_); } + void set (int v_) { hb_atomic_int_impl_set (&v, v_); } + int get_relaxed () const { return hb_atomic_int_impl_get_relaxed (&v); } + int get () const { return hb_atomic_int_impl_get (&v); } + int inc () { return hb_atomic_int_impl_add (&v, 1); } + int dec () { return hb_atomic_int_impl_add (&v, -1); } + + int v; +}; + + +#define HB_ATOMIC_PTR_INIT(V) {V} +template <typename P> +struct hb_atomic_ptr_t +{ + typedef hb_remove_pointer<P> T; + + void init (T* v_ = nullptr) { set_relaxed (v_); } + void set_relaxed (T* v_) { hb_atomic_ptr_impl_set_relaxed (&v, v_); } + T *get_relaxed () const { return (T *) hb_atomic_ptr_impl_get_relaxed (&v); } + T *get () const { return (T *) hb_atomic_ptr_impl_get ((void **) &v); } + bool cmpexch (const T *old, T *new_) const { return hb_atomic_ptr_impl_cmpexch ((void **) &v, (void *) old, (void *) new_); } + + T * operator -> () const { return get (); } + template <typename C> operator C * () const { return get (); } + + T *v; +}; + + +#endif /* HB_ATOMIC_HH */ diff --git a/thirdparty/harfbuzz/src/hb-bimap.hh b/thirdparty/harfbuzz/src/hb-bimap.hh new file mode 100644 index 0000000000..e9f3a6a52d --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-bimap.hh @@ -0,0 +1,166 @@ +/* + * Copyright © 2019 Adobe Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Adobe Author(s): Michiharu Ariza + */ + +#ifndef HB_BIMAP_HH +#define HB_BIMAP_HH + +#include "hb.hh" +#include "hb-map.hh" + +/* Bi-directional map */ +struct hb_bimap_t +{ + hb_bimap_t () { init (); } + ~hb_bimap_t () { fini (); } + + void init () + { + forw_map.init (); + back_map.init (); + } + + void fini () + { + forw_map.fini (); + back_map.fini (); + } + + void reset () + { + forw_map.reset (); + back_map.reset (); + } + + bool in_error () const { return forw_map.in_error () || back_map.in_error (); } + + void set (hb_codepoint_t lhs, hb_codepoint_t rhs) + { + if (unlikely (lhs == HB_MAP_VALUE_INVALID)) return; + if (unlikely (rhs == HB_MAP_VALUE_INVALID)) { del (lhs); return; } + forw_map.set (lhs, rhs); + back_map.set (rhs, lhs); + } + + hb_codepoint_t get (hb_codepoint_t lhs) const { return forw_map.get (lhs); } + hb_codepoint_t backward (hb_codepoint_t rhs) const { return back_map.get (rhs); } + + hb_codepoint_t operator [] (hb_codepoint_t lhs) const { return get (lhs); } + bool has (hb_codepoint_t lhs, hb_codepoint_t *vp = nullptr) const { return forw_map.has (lhs, vp); } + + void del (hb_codepoint_t lhs) + { + back_map.del (get (lhs)); + forw_map.del (lhs); + } + + void clear () + { + forw_map.clear (); + back_map.clear (); + } + + bool is_empty () const { return get_population () == 0; } + + unsigned int get_population () const { return forw_map.get_population (); } + + protected: + hb_map_t forw_map; + hb_map_t back_map; +}; + +/* Inremental bimap: only lhs is given, rhs is incrementally assigned */ +struct hb_inc_bimap_t : hb_bimap_t +{ + hb_inc_bimap_t () { init (); } + + void init () + { + hb_bimap_t::init (); + next_value = 0; + } + + /* Add a mapping from lhs to rhs with a unique value if lhs is unknown. + * Return the rhs value as the result. + */ + hb_codepoint_t add (hb_codepoint_t lhs) + { + hb_codepoint_t rhs = forw_map[lhs]; + if (rhs == HB_MAP_VALUE_INVALID) + { + rhs = next_value++; + set (lhs, rhs); + } + return rhs; + } + + hb_codepoint_t skip () + { return next_value++; } + + hb_codepoint_t get_next_value () const + { return next_value; } + + void add_set (const hb_set_t *set) + { + hb_codepoint_t i = HB_SET_VALUE_INVALID; + while (hb_set_next (set, &i)) add (i); + } + + /* Create an identity map. */ + bool identity (unsigned int size) + { + clear (); + for (hb_codepoint_t i = 0; i < size; i++) set (i, i); + return !in_error (); + } + + protected: + static int cmp_id (const void* a, const void* b) + { return (int)*(const hb_codepoint_t *)a - (int)*(const hb_codepoint_t *)b; } + + public: + /* Optional: after finished adding all mappings in a random order, + * reassign rhs to lhs so that they are in the same order. */ + void sort () + { + hb_codepoint_t count = get_population (); + hb_vector_t <hb_codepoint_t> work; + work.resize (count); + + for (hb_codepoint_t rhs = 0; rhs < count; rhs++) + work[rhs] = back_map[rhs]; + + work.qsort (cmp_id); + + clear (); + for (hb_codepoint_t rhs = 0; rhs < count; rhs++) + set (work[rhs], rhs); + } + + protected: + unsigned int next_value; +}; + +#endif /* HB_BIMAP_HH */ diff --git a/thirdparty/harfbuzz/src/hb-blob.cc b/thirdparty/harfbuzz/src/hb-blob.cc new file mode 100644 index 0000000000..94ed50fd3c --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-blob.cc @@ -0,0 +1,717 @@ +/* + * Copyright © 2009 Red Hat, Inc. + * Copyright © 2018 Ebrahim Byagowi + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Red Hat Author(s): Behdad Esfahbod + */ + +#include "hb.hh" +#include "hb-blob.hh" + +#ifdef HAVE_SYS_MMAN_H +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif /* HAVE_UNISTD_H */ +#include <sys/mman.h> +#endif /* HAVE_SYS_MMAN_H */ + +#include <stdio.h> +#include <stdlib.h> + + +/** + * SECTION: hb-blob + * @title: hb-blob + * @short_description: Binary data containers + * @include: hb.h + * + * Blobs wrap a chunk of binary data to handle lifecycle management of data + * while it is passed between client and HarfBuzz. Blobs are primarily used + * to create font faces, but also to access font face tables, as well as + * pass around other binary data. + **/ + + +/** + * hb_blob_create: (skip) + * @data: Pointer to blob data. + * @length: Length of @data in bytes. + * @mode: Memory mode for @data. + * @user_data: Data parameter to pass to @destroy. + * @destroy: Callback to call when @data is not needed anymore. + * + * Creates a new "blob" object wrapping @data. The @mode parameter is used + * to negotiate ownership and lifecycle of @data. + * + * Return value: New blob, or the empty blob if something failed or if @length is + * zero. Destroy with hb_blob_destroy(). + * + * Since: 0.9.2 + **/ +hb_blob_t * +hb_blob_create (const char *data, + unsigned int length, + hb_memory_mode_t mode, + void *user_data, + hb_destroy_func_t destroy) +{ + hb_blob_t *blob; + + if (!length || + length >= 1u << 31 || + !(blob = hb_object_create<hb_blob_t> ())) { + if (destroy) + destroy (user_data); + return hb_blob_get_empty (); + } + + blob->data = data; + blob->length = length; + blob->mode = mode; + + blob->user_data = user_data; + blob->destroy = destroy; + + if (blob->mode == HB_MEMORY_MODE_DUPLICATE) { + blob->mode = HB_MEMORY_MODE_READONLY; + if (!blob->try_make_writable ()) { + hb_blob_destroy (blob); + return hb_blob_get_empty (); + } + } + + return blob; +} + +static void +_hb_blob_destroy (void *data) +{ + hb_blob_destroy ((hb_blob_t *) data); +} + +/** + * hb_blob_create_sub_blob: + * @parent: Parent blob. + * @offset: Start offset of sub-blob within @parent, in bytes. + * @length: Length of sub-blob. + * + * Returns a blob that represents a range of bytes in @parent. The new + * blob is always created with %HB_MEMORY_MODE_READONLY, meaning that it + * will never modify data in the parent blob. The parent data is not + * expected to be modified, and will result in undefined behavior if it + * is. + * + * Makes @parent immutable. + * + * Return value: New blob, or the empty blob if something failed or if + * @length is zero or @offset is beyond the end of @parent's data. Destroy + * with hb_blob_destroy(). + * + * Since: 0.9.2 + **/ +hb_blob_t * +hb_blob_create_sub_blob (hb_blob_t *parent, + unsigned int offset, + unsigned int length) +{ + hb_blob_t *blob; + + if (!length || !parent || offset >= parent->length) + return hb_blob_get_empty (); + + hb_blob_make_immutable (parent); + + blob = hb_blob_create (parent->data + offset, + hb_min (length, parent->length - offset), + HB_MEMORY_MODE_READONLY, + hb_blob_reference (parent), + _hb_blob_destroy); + + return blob; +} + +/** + * hb_blob_copy_writable_or_fail: + * @blob: A blob. + * + * Makes a writable copy of @blob. + * + * Return value: New blob, or nullptr if allocation failed. + * + * Since: 1.8.0 + **/ +hb_blob_t * +hb_blob_copy_writable_or_fail (hb_blob_t *blob) +{ + blob = hb_blob_create (blob->data, + blob->length, + HB_MEMORY_MODE_DUPLICATE, + nullptr, + nullptr); + + if (unlikely (blob == hb_blob_get_empty ())) + blob = nullptr; + + return blob; +} + +/** + * hb_blob_get_empty: + * + * Returns the singleton empty blob. + * + * See TODO:link object types for more information. + * + * Return value: (transfer full): the empty blob. + * + * Since: 0.9.2 + **/ +hb_blob_t * +hb_blob_get_empty () +{ + return const_cast<hb_blob_t *> (&Null (hb_blob_t)); +} + +/** + * hb_blob_reference: (skip) + * @blob: a blob. + * + * Increases the reference count on @blob. + * + * See TODO:link object types for more information. + * + * Return value: @blob. + * + * Since: 0.9.2 + **/ +hb_blob_t * +hb_blob_reference (hb_blob_t *blob) +{ + return hb_object_reference (blob); +} + +/** + * hb_blob_destroy: (skip) + * @blob: a blob. + * + * Decreases the reference count on @blob, and if it reaches zero, destroys + * @blob, freeing all memory, possibly calling the destroy-callback the blob + * was created for if it has not been called already. + * + * See TODO:link object types for more information. + * + * Since: 0.9.2 + **/ +void +hb_blob_destroy (hb_blob_t *blob) +{ + if (!hb_object_destroy (blob)) return; + + blob->fini_shallow (); + + free (blob); +} + +/** + * hb_blob_set_user_data: (skip) + * @blob: a blob. + * @key: key for data to set. + * @data: data to set. + * @destroy: callback to call when @data is not needed anymore. + * @replace: whether to replace an existing data with the same key. + * + * Return value: + * + * Since: 0.9.2 + **/ +hb_bool_t +hb_blob_set_user_data (hb_blob_t *blob, + hb_user_data_key_t *key, + void * data, + hb_destroy_func_t destroy, + hb_bool_t replace) +{ + return hb_object_set_user_data (blob, key, data, destroy, replace); +} + +/** + * hb_blob_get_user_data: (skip) + * @blob: a blob. + * @key: key for data to get. + * + * + * + * Return value: (transfer none): + * + * Since: 0.9.2 + **/ +void * +hb_blob_get_user_data (hb_blob_t *blob, + hb_user_data_key_t *key) +{ + return hb_object_get_user_data (blob, key); +} + + +/** + * hb_blob_make_immutable: + * @blob: a blob. + * + * + * + * Since: 0.9.2 + **/ +void +hb_blob_make_immutable (hb_blob_t *blob) +{ + if (hb_object_is_immutable (blob)) + return; + + hb_object_make_immutable (blob); +} + +/** + * hb_blob_is_immutable: + * @blob: a blob. + * + * + * + * Return value: TODO + * + * Since: 0.9.2 + **/ +hb_bool_t +hb_blob_is_immutable (hb_blob_t *blob) +{ + return hb_object_is_immutable (blob); +} + + +/** + * hb_blob_get_length: + * @blob: a blob. + * + * + * + * Return value: the length of blob data in bytes. + * + * Since: 0.9.2 + **/ +unsigned int +hb_blob_get_length (hb_blob_t *blob) +{ + return blob->length; +} + +/** + * hb_blob_get_data: + * @blob: a blob. + * @length: (out): + * + * + * + * Returns: (transfer none) (array length=length): + * + * Since: 0.9.2 + **/ +const char * +hb_blob_get_data (hb_blob_t *blob, unsigned int *length) +{ + if (length) + *length = blob->length; + + return blob->data; +} + +/** + * hb_blob_get_data_writable: + * @blob: a blob. + * @length: (out): output length of the writable data. + * + * Tries to make blob data writable (possibly copying it) and + * return pointer to data. + * + * Fails if blob has been made immutable, or if memory allocation + * fails. + * + * Returns: (transfer none) (array length=length): Writable blob data, + * or %NULL if failed. + * + * Since: 0.9.2 + **/ +char * +hb_blob_get_data_writable (hb_blob_t *blob, unsigned int *length) +{ + if (!blob->try_make_writable ()) { + if (length) + *length = 0; + + return nullptr; + } + + if (length) + *length = blob->length; + + return const_cast<char *> (blob->data); +} + + +bool +hb_blob_t::try_make_writable_inplace_unix () +{ +#if defined(HAVE_SYS_MMAN_H) && defined(HAVE_MPROTECT) + uintptr_t pagesize = -1, mask, length; + const char *addr; + +#if defined(HAVE_SYSCONF) && defined(_SC_PAGE_SIZE) + pagesize = (uintptr_t) sysconf (_SC_PAGE_SIZE); +#elif defined(HAVE_SYSCONF) && defined(_SC_PAGESIZE) + pagesize = (uintptr_t) sysconf (_SC_PAGESIZE); +#elif defined(HAVE_GETPAGESIZE) + pagesize = (uintptr_t) getpagesize (); +#endif + + if ((uintptr_t) -1L == pagesize) { + DEBUG_MSG_FUNC (BLOB, this, "failed to get pagesize: %s", strerror (errno)); + return false; + } + DEBUG_MSG_FUNC (BLOB, this, "pagesize is %lu", (unsigned long) pagesize); + + mask = ~(pagesize-1); + addr = (const char *) (((uintptr_t) this->data) & mask); + length = (const char *) (((uintptr_t) this->data + this->length + pagesize-1) & mask) - addr; + DEBUG_MSG_FUNC (BLOB, this, + "calling mprotect on [%p..%p] (%lu bytes)", + addr, addr+length, (unsigned long) length); + if (-1 == mprotect ((void *) addr, length, PROT_READ | PROT_WRITE)) { + DEBUG_MSG_FUNC (BLOB, this, "mprotect failed: %s", strerror (errno)); + return false; + } + + this->mode = HB_MEMORY_MODE_WRITABLE; + + DEBUG_MSG_FUNC (BLOB, this, + "successfully made [%p..%p] (%lu bytes) writable\n", + addr, addr+length, (unsigned long) length); + return true; +#else + return false; +#endif +} + +bool +hb_blob_t::try_make_writable_inplace () +{ + DEBUG_MSG_FUNC (BLOB, this, "making writable inplace\n"); + + if (this->try_make_writable_inplace_unix ()) + return true; + + DEBUG_MSG_FUNC (BLOB, this, "making writable -> FAILED\n"); + + /* Failed to make writable inplace, mark that */ + this->mode = HB_MEMORY_MODE_READONLY; + return false; +} + +bool +hb_blob_t::try_make_writable () +{ + if (hb_object_is_immutable (this)) + return false; + + if (this->mode == HB_MEMORY_MODE_WRITABLE) + return true; + + if (this->mode == HB_MEMORY_MODE_READONLY_MAY_MAKE_WRITABLE && this->try_make_writable_inplace ()) + return true; + + if (this->mode == HB_MEMORY_MODE_WRITABLE) + return true; + + + DEBUG_MSG_FUNC (BLOB, this, "current data is -> %p\n", this->data); + + char *new_data; + + new_data = (char *) malloc (this->length); + if (unlikely (!new_data)) + return false; + + DEBUG_MSG_FUNC (BLOB, this, "dupped successfully -> %p\n", this->data); + + memcpy (new_data, this->data, this->length); + this->destroy_user_data (); + this->mode = HB_MEMORY_MODE_WRITABLE; + this->data = new_data; + this->user_data = new_data; + this->destroy = free; + + return true; +} + +/* + * Mmap + */ + +#ifndef HB_NO_OPEN +#ifdef HAVE_MMAP +# if !defined(HB_NO_RESOURCE_FORK) && defined(__APPLE__) +# include <sys/paths.h> +# endif +# include <sys/types.h> +# include <sys/stat.h> +# include <fcntl.h> +#endif + +#ifdef _WIN32 +# include <windows.h> +#else +# ifndef O_BINARY +# define O_BINARY 0 +# endif +#endif + +#ifndef MAP_NORESERVE +# define MAP_NORESERVE 0 +#endif + +struct hb_mapped_file_t +{ + char *contents; + unsigned long length; +#ifdef _WIN32 + HANDLE mapping; +#endif +}; + +#if (defined(HAVE_MMAP) || defined(_WIN32)) && !defined(HB_NO_MMAP) +static void +_hb_mapped_file_destroy (void *file_) +{ + hb_mapped_file_t *file = (hb_mapped_file_t *) file_; +#ifdef HAVE_MMAP + munmap (file->contents, file->length); +#elif defined(_WIN32) + UnmapViewOfFile (file->contents); + CloseHandle (file->mapping); +#else + assert (0); // If we don't have mmap we shouldn't reach here +#endif + + free (file); +} +#endif + +#ifdef _PATH_RSRCFORKSPEC +static int +_open_resource_fork (const char *file_name, hb_mapped_file_t *file) +{ + size_t name_len = strlen (file_name); + size_t len = name_len + sizeof (_PATH_RSRCFORKSPEC); + + char *rsrc_name = (char *) malloc (len); + if (unlikely (!rsrc_name)) return -1; + + strncpy (rsrc_name, file_name, name_len); + strncpy (rsrc_name + name_len, _PATH_RSRCFORKSPEC, + sizeof (_PATH_RSRCFORKSPEC) - 1); + + int fd = open (rsrc_name, O_RDONLY | O_BINARY, 0); + free (rsrc_name); + + if (fd != -1) + { + struct stat st; + if (fstat (fd, &st) != -1) + file->length = (unsigned long) st.st_size; + else + { + close (fd); + fd = -1; + } + } + + return fd; +} +#endif + +/** + * hb_blob_create_from_file: + * @file_name: font filename. + * + * Returns: A hb_blob_t pointer with the content of the file + * + * Since: 1.7.7 + **/ +hb_blob_t * +hb_blob_create_from_file (const char *file_name) +{ + /* Adopted from glib's gmappedfile.c with Matthias Clasen and + Allison Lortie permission but changed a lot to suit our need. */ +#if defined(HAVE_MMAP) && !defined(HB_NO_MMAP) + hb_mapped_file_t *file = (hb_mapped_file_t *) calloc (1, sizeof (hb_mapped_file_t)); + if (unlikely (!file)) return hb_blob_get_empty (); + + int fd = open (file_name, O_RDONLY | O_BINARY, 0); + if (unlikely (fd == -1)) goto fail_without_close; + + struct stat st; + if (unlikely (fstat (fd, &st) == -1)) goto fail; + + file->length = (unsigned long) st.st_size; + +#ifdef _PATH_RSRCFORKSPEC + if (unlikely (file->length == 0)) + { + int rfd = _open_resource_fork (file_name, file); + if (rfd != -1) + { + close (fd); + fd = rfd; + } + } +#endif + + file->contents = (char *) mmap (nullptr, file->length, PROT_READ, + MAP_PRIVATE | MAP_NORESERVE, fd, 0); + + if (unlikely (file->contents == MAP_FAILED)) goto fail; + + close (fd); + + return hb_blob_create (file->contents, file->length, + HB_MEMORY_MODE_READONLY_MAY_MAKE_WRITABLE, (void *) file, + (hb_destroy_func_t) _hb_mapped_file_destroy); + +fail: + close (fd); +fail_without_close: + free (file); + +#elif defined(_WIN32) && !defined(HB_NO_MMAP) + hb_mapped_file_t *file = (hb_mapped_file_t *) calloc (1, sizeof (hb_mapped_file_t)); + if (unlikely (!file)) return hb_blob_get_empty (); + + HANDLE fd; + unsigned int size = strlen (file_name) + 1; + wchar_t * wchar_file_name = (wchar_t *) malloc (sizeof (wchar_t) * size); + if (unlikely (!wchar_file_name)) goto fail_without_close; + mbstowcs (wchar_file_name, file_name, size); +#if !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) + { + CREATEFILE2_EXTENDED_PARAMETERS ceparams = { 0 }; + ceparams.dwSize = sizeof(CREATEFILE2_EXTENDED_PARAMETERS); + ceparams.dwFileAttributes = FILE_ATTRIBUTE_NORMAL | FILE_FLAG_OVERLAPPED & 0xFFFF; + ceparams.dwFileFlags = FILE_ATTRIBUTE_NORMAL | FILE_FLAG_OVERLAPPED & 0xFFF00000; + ceparams.dwSecurityQosFlags = FILE_ATTRIBUTE_NORMAL | FILE_FLAG_OVERLAPPED & 0x000F0000; + ceparams.lpSecurityAttributes = nullptr; + ceparams.hTemplateFile = nullptr; + fd = CreateFile2 (wchar_file_name, GENERIC_READ, FILE_SHARE_READ, + OPEN_EXISTING, &ceparams); + } +#else + fd = CreateFileW (wchar_file_name, GENERIC_READ, FILE_SHARE_READ, nullptr, + OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL|FILE_FLAG_OVERLAPPED, + nullptr); +#endif + free (wchar_file_name); + + if (unlikely (fd == INVALID_HANDLE_VALUE)) goto fail_without_close; + +#if !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) + { + LARGE_INTEGER length; + GetFileSizeEx (fd, &length); + file->length = length.LowPart; + file->mapping = CreateFileMappingFromApp (fd, nullptr, PAGE_READONLY, length.QuadPart, nullptr); + } +#else + file->length = (unsigned long) GetFileSize (fd, nullptr); + file->mapping = CreateFileMapping (fd, nullptr, PAGE_READONLY, 0, 0, nullptr); +#endif + if (unlikely (!file->mapping)) goto fail; + +#if !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) + file->contents = (char *) MapViewOfFileFromApp (file->mapping, FILE_MAP_READ, 0, 0); +#else + file->contents = (char *) MapViewOfFile (file->mapping, FILE_MAP_READ, 0, 0, 0); +#endif + if (unlikely (!file->contents)) goto fail; + + CloseHandle (fd); + return hb_blob_create (file->contents, file->length, + HB_MEMORY_MODE_READONLY_MAY_MAKE_WRITABLE, (void *) file, + (hb_destroy_func_t) _hb_mapped_file_destroy); + +fail: + CloseHandle (fd); +fail_without_close: + free (file); + +#endif + + /* The following tries to read a file without knowing its size beforehand + It's used as a fallback for systems without mmap or to read from pipes */ + unsigned long len = 0, allocated = BUFSIZ * 16; + char *data = (char *) malloc (allocated); + if (unlikely (!data)) return hb_blob_get_empty (); + + FILE *fp = fopen (file_name, "rb"); + if (unlikely (!fp)) goto fread_fail_without_close; + + while (!feof (fp)) + { + if (allocated - len < BUFSIZ) + { + allocated *= 2; + /* Don't allocate and go more than ~536MB, our mmap reader still + can cover files like that but lets limit our fallback reader */ + if (unlikely (allocated > (2 << 28))) goto fread_fail; + char *new_data = (char *) realloc (data, allocated); + if (unlikely (!new_data)) goto fread_fail; + data = new_data; + } + + unsigned long addition = fread (data + len, 1, allocated - len, fp); + + int err = ferror (fp); +#ifdef EINTR // armcc doesn't have it + if (unlikely (err == EINTR)) continue; +#endif + if (unlikely (err)) goto fread_fail; + + len += addition; + } + fclose (fp); + + return hb_blob_create (data, len, HB_MEMORY_MODE_WRITABLE, data, + (hb_destroy_func_t) free); + +fread_fail: + fclose (fp); +fread_fail_without_close: + free (data); + return hb_blob_get_empty (); +} +#endif /* !HB_NO_OPEN */ diff --git a/thirdparty/harfbuzz/src/hb-blob.h b/thirdparty/harfbuzz/src/hb-blob.h new file mode 100644 index 0000000000..f80e9af2d9 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-blob.h @@ -0,0 +1,131 @@ +/* + * Copyright © 2009 Red Hat, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Red Hat Author(s): Behdad Esfahbod + */ + +#ifndef HB_H_IN +#error "Include <hb.h> instead." +#endif + +#ifndef HB_BLOB_H +#define HB_BLOB_H + +#include "hb-common.h" + +HB_BEGIN_DECLS + + +/* + * Note re various memory-modes: + * + * - In no case shall the HarfBuzz client modify memory + * that is passed to HarfBuzz in a blob. If there is + * any such possibility, MODE_DUPLICATE should be used + * such that HarfBuzz makes a copy immediately, + * + * - Use MODE_READONLY otherwise, unless you really really + * really know what you are doing, + * + * - MODE_WRITABLE is appropriate if you really made a + * copy of data solely for the purpose of passing to + * HarfBuzz and doing that just once (no reuse!), + * + * - If the font is mmap()ed, it's ok to use + * READONLY_MAY_MAKE_WRITABLE, however, using that mode + * correctly is very tricky. Use MODE_READONLY instead. + */ +typedef enum { + HB_MEMORY_MODE_DUPLICATE, + HB_MEMORY_MODE_READONLY, + HB_MEMORY_MODE_WRITABLE, + HB_MEMORY_MODE_READONLY_MAY_MAKE_WRITABLE +} hb_memory_mode_t; + +typedef struct hb_blob_t hb_blob_t; + +HB_EXTERN hb_blob_t * +hb_blob_create (const char *data, + unsigned int length, + hb_memory_mode_t mode, + void *user_data, + hb_destroy_func_t destroy); + +HB_EXTERN hb_blob_t * +hb_blob_create_from_file (const char *file_name); + +/* Always creates with MEMORY_MODE_READONLY. + * Even if the parent blob is writable, we don't + * want the user of the sub-blob to be able to + * modify the parent data as that data may be + * shared among multiple sub-blobs. + */ +HB_EXTERN hb_blob_t * +hb_blob_create_sub_blob (hb_blob_t *parent, + unsigned int offset, + unsigned int length); + +HB_EXTERN hb_blob_t * +hb_blob_copy_writable_or_fail (hb_blob_t *blob); + +HB_EXTERN hb_blob_t * +hb_blob_get_empty (void); + +HB_EXTERN hb_blob_t * +hb_blob_reference (hb_blob_t *blob); + +HB_EXTERN void +hb_blob_destroy (hb_blob_t *blob); + +HB_EXTERN hb_bool_t +hb_blob_set_user_data (hb_blob_t *blob, + hb_user_data_key_t *key, + void * data, + hb_destroy_func_t destroy, + hb_bool_t replace); + + +HB_EXTERN void * +hb_blob_get_user_data (hb_blob_t *blob, + hb_user_data_key_t *key); + + +HB_EXTERN void +hb_blob_make_immutable (hb_blob_t *blob); + +HB_EXTERN hb_bool_t +hb_blob_is_immutable (hb_blob_t *blob); + + +HB_EXTERN unsigned int +hb_blob_get_length (hb_blob_t *blob); + +HB_EXTERN const char * +hb_blob_get_data (hb_blob_t *blob, unsigned int *length); + +HB_EXTERN char * +hb_blob_get_data_writable (hb_blob_t *blob, unsigned int *length); + +HB_END_DECLS + +#endif /* HB_BLOB_H */ diff --git a/thirdparty/harfbuzz/src/hb-blob.hh b/thirdparty/harfbuzz/src/hb-blob.hh new file mode 100644 index 0000000000..d85bd823b0 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-blob.hh @@ -0,0 +1,97 @@ +/* + * Copyright © 2009 Red Hat, Inc. + * Copyright © 2018 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Red Hat Author(s): Behdad Esfahbod + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_BLOB_HH +#define HB_BLOB_HH + +#include "hb.hh" + + +/* + * hb_blob_t + */ + +struct hb_blob_t +{ + void fini_shallow () { destroy_user_data (); } + + void destroy_user_data () + { + if (destroy) + { + destroy (user_data); + user_data = nullptr; + destroy = nullptr; + } + } + + HB_INTERNAL bool try_make_writable (); + HB_INTERNAL bool try_make_writable_inplace (); + HB_INTERNAL bool try_make_writable_inplace_unix (); + + hb_bytes_t as_bytes () const { return hb_bytes_t (data, length); } + template <typename Type> + const Type* as () const { return as_bytes ().as<Type> (); } + + public: + hb_object_header_t header; + + const char *data; + unsigned int length; + hb_memory_mode_t mode; + + void *user_data; + hb_destroy_func_t destroy; +}; + + +/* + * hb_blob_ptr_t + */ + +template <typename P> +struct hb_blob_ptr_t +{ + typedef hb_remove_pointer<P> T; + + hb_blob_ptr_t (hb_blob_t *b_ = nullptr) : b (b_) {} + hb_blob_t * operator = (hb_blob_t *b_) { return b = b_; } + const T * operator -> () const { return get (); } + const T & operator * () const { return *get (); } + template <typename C> operator const C * () const { return get (); } + operator const char * () const { return (const char *) get (); } + const T * get () const { return b->as<T> (); } + hb_blob_t * get_blob () const { return b.get_raw (); } + unsigned int get_length () const { return b.get ()->length; } + void destroy () { hb_blob_destroy (b.get ()); b = nullptr; } + + hb_nonnull_ptr_t<hb_blob_t> b; +}; + + +#endif /* HB_BLOB_HH */ diff --git a/thirdparty/harfbuzz/src/hb-buffer-deserialize-json.hh b/thirdparty/harfbuzz/src/hb-buffer-deserialize-json.hh new file mode 100644 index 0000000000..1f9e2e91db --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-buffer-deserialize-json.hh @@ -0,0 +1,643 @@ + +#line 1 "hb-buffer-deserialize-json.rl" +/* + * Copyright © 2013 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_BUFFER_DESERIALIZE_JSON_HH +#define HB_BUFFER_DESERIALIZE_JSON_HH + +#include "hb.hh" + + +#line 36 "hb-buffer-deserialize-json.hh" +static const unsigned char _deserialize_json_trans_keys[] = { + 0u, 0u, 9u, 123u, 9u, 34u, 97u, 103u, 120u, 121u, 34u, 34u, 9u, 58u, 9u, 57u, + 48u, 57u, 9u, 125u, 9u, 125u, 9u, 125u, 34u, 34u, 9u, 58u, 9u, 57u, 48u, 57u, + 9u, 125u, 9u, 125u, 108u, 108u, 34u, 34u, 9u, 58u, 9u, 57u, 9u, 125u, 9u, 125u, + 120u, 121u, 34u, 34u, 9u, 58u, 9u, 57u, 48u, 57u, 9u, 125u, 9u, 125u, 34u, 34u, + 9u, 58u, 9u, 57u, 48u, 57u, 9u, 125u, 9u, 125u, 34u, 34u, 9u, 58u, 9u, 57u, + 65u, 122u, 34u, 122u, 9u, 125u, 9u, 125u, 9u, 93u, 9u, 123u, 0u, 0u, 0 +}; + +static const char _deserialize_json_key_spans[] = { + 0, 115, 26, 7, 2, 1, 50, 49, + 10, 117, 117, 117, 1, 50, 49, 10, + 117, 117, 1, 1, 50, 49, 117, 117, + 2, 1, 50, 49, 10, 117, 117, 1, + 50, 49, 10, 117, 117, 1, 50, 49, + 58, 89, 117, 117, 85, 115, 0 +}; + +static const short _deserialize_json_index_offsets[] = { + 0, 0, 116, 143, 151, 154, 156, 207, + 257, 268, 386, 504, 622, 624, 675, 725, + 736, 854, 972, 974, 976, 1027, 1077, 1195, + 1313, 1316, 1318, 1369, 1419, 1430, 1548, 1666, + 1668, 1719, 1769, 1780, 1898, 2016, 2018, 2069, + 2119, 2178, 2268, 2386, 2504, 2590, 2706 +}; + +static const char _deserialize_json_indicies[] = { + 0, 0, 0, 0, 0, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 0, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 2, 1, 3, 3, 3, + 3, 3, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 3, 1, 4, 1, + 5, 1, 6, 7, 1, 1, 8, 1, + 9, 10, 1, 11, 1, 11, 11, 11, + 11, 11, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 11, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 12, 1, + 12, 12, 12, 12, 12, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 12, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 13, 1, 1, 14, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 1, 16, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 1, 18, 18, 18, + 18, 18, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 18, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 19, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 20, 1, 21, 21, 21, 21, 21, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 21, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 3, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 22, + 1, 18, 18, 18, 18, 18, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 18, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 19, 1, 1, 1, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 20, 1, 23, + 1, 23, 23, 23, 23, 23, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 23, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 24, 1, 24, 24, 24, 24, + 24, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 24, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 25, 1, 1, 26, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 1, 28, 29, + 29, 29, 29, 29, 29, 29, 29, 29, + 1, 30, 30, 30, 30, 30, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 30, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 31, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 32, 1, 30, + 30, 30, 30, 30, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 30, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 31, 1, 1, 1, 29, 29, + 29, 29, 29, 29, 29, 29, 29, 29, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 32, 1, 33, 1, 34, + 1, 34, 34, 34, 34, 34, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 34, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 35, 1, 35, 35, 35, 35, + 35, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 35, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 36, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 1, 38, 38, + 38, 38, 38, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 38, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 39, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 40, 1, 38, 38, 38, 38, + 38, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 38, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 39, + 1, 1, 1, 41, 41, 41, 41, 41, + 41, 41, 41, 41, 41, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 40, 1, 42, 43, 1, 44, 1, 44, + 44, 44, 44, 44, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 44, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 45, 1, 45, 45, 45, 45, 45, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 45, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 46, 1, + 1, 47, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 1, 49, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 1, 51, + 51, 51, 51, 51, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 51, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 52, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 53, 1, 51, 51, 51, + 51, 51, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 51, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 52, 1, 1, 1, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 53, 1, 54, 1, 54, 54, 54, + 54, 54, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 54, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 55, 1, + 55, 55, 55, 55, 55, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 55, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 56, 1, 1, 57, + 58, 58, 58, 58, 58, 58, 58, 58, + 58, 1, 59, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 1, 61, 61, 61, + 61, 61, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 61, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 62, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 63, 1, 61, 61, 61, 61, 61, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 61, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 62, 1, + 1, 1, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 63, + 1, 64, 1, 64, 64, 64, 64, 64, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 64, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 65, 1, 65, 65, + 65, 65, 65, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 65, 1, 66, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 67, 68, 68, + 68, 68, 68, 68, 68, 68, 68, 1, + 69, 69, 69, 69, 69, 69, 69, 69, + 69, 69, 69, 69, 69, 69, 69, 69, + 69, 69, 69, 69, 69, 69, 69, 69, + 69, 69, 1, 1, 1, 1, 1, 1, + 69, 69, 69, 69, 69, 69, 69, 69, + 69, 69, 69, 69, 69, 69, 69, 69, + 69, 69, 69, 69, 69, 69, 69, 69, + 69, 69, 1, 70, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 71, 71, + 1, 71, 71, 71, 71, 71, 71, 71, + 71, 71, 71, 1, 1, 1, 1, 1, + 1, 1, 71, 71, 71, 71, 71, 71, + 71, 71, 71, 71, 71, 71, 71, 71, + 71, 71, 71, 71, 71, 71, 71, 71, + 71, 71, 71, 71, 1, 1, 1, 1, + 71, 1, 71, 71, 71, 71, 71, 71, + 71, 71, 71, 71, 71, 71, 71, 71, + 71, 71, 71, 71, 71, 71, 71, 71, + 71, 71, 71, 71, 1, 72, 72, 72, + 72, 72, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 72, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 73, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 74, 1, 72, 72, 72, 72, 72, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 72, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 73, 1, + 1, 1, 75, 75, 75, 75, 75, 75, + 75, 75, 75, 75, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 74, + 1, 76, 76, 76, 76, 76, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 76, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 77, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 78, 1, 0, + 0, 0, 0, 0, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 0, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 2, 1, 1, 0 +}; + +static const char _deserialize_json_trans_targs[] = { + 1, 0, 2, 2, 3, 4, 18, 24, + 37, 5, 12, 6, 7, 8, 9, 11, + 9, 11, 10, 2, 44, 10, 44, 13, + 14, 15, 16, 17, 16, 17, 10, 2, + 44, 19, 20, 21, 22, 23, 10, 2, + 44, 23, 25, 31, 26, 27, 28, 29, + 30, 29, 30, 10, 2, 44, 32, 33, + 34, 35, 36, 35, 36, 10, 2, 44, + 38, 39, 40, 42, 43, 41, 10, 41, + 10, 2, 44, 43, 44, 45, 46 +}; + +static const char _deserialize_json_trans_actions[] = { + 0, 0, 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 2, 2, 2, + 0, 0, 3, 3, 4, 0, 5, 0, + 0, 2, 2, 2, 0, 0, 6, 6, + 7, 0, 0, 0, 2, 2, 8, 8, + 9, 0, 0, 0, 0, 0, 2, 2, + 2, 0, 0, 10, 10, 11, 0, 0, + 2, 2, 2, 0, 0, 12, 12, 13, + 0, 0, 0, 2, 2, 2, 14, 0, + 15, 15, 16, 0, 0, 0, 0 +}; + +static const int deserialize_json_start = 1; +static const int deserialize_json_first_final = 44; +static const int deserialize_json_error = 0; + +static const int deserialize_json_en_main = 1; + + +#line 97 "hb-buffer-deserialize-json.rl" + + +static hb_bool_t +_hb_buffer_deserialize_glyphs_json (hb_buffer_t *buffer, + const char *buf, + unsigned int buf_len, + const char **end_ptr, + hb_font_t *font) +{ + const char *p = buf, *pe = buf + buf_len; + + /* Ensure we have positions. */ + (void) hb_buffer_get_glyph_positions (buffer, nullptr); + + while (p < pe && ISSPACE (*p)) + p++; + if (p < pe && *p == (buffer->len ? ',' : '[')) + { + *end_ptr = ++p; + } + + const char *tok = nullptr; + int cs; + hb_glyph_info_t info = {0}; + hb_glyph_position_t pos = {0}; + +#line 466 "hb-buffer-deserialize-json.hh" + { + cs = deserialize_json_start; + } + +#line 471 "hb-buffer-deserialize-json.hh" + { + int _slen; + int _trans; + const unsigned char *_keys; + const char *_inds; + if ( p == pe ) + goto _test_eof; + if ( cs == 0 ) + goto _out; +_resume: + _keys = _deserialize_json_trans_keys + (cs<<1); + _inds = _deserialize_json_indicies + _deserialize_json_index_offsets[cs]; + + _slen = _deserialize_json_key_spans[cs]; + _trans = _inds[ _slen > 0 && _keys[0] <=(*p) && + (*p) <= _keys[1] ? + (*p) - _keys[0] : _slen ]; + + cs = _deserialize_json_trans_targs[_trans]; + + if ( _deserialize_json_trans_actions[_trans] == 0 ) + goto _again; + + switch ( _deserialize_json_trans_actions[_trans] ) { + case 1: +#line 38 "hb-buffer-deserialize-json.rl" + { + memset (&info, 0, sizeof (info)); + memset (&pos , 0, sizeof (pos )); +} + break; + case 5: +#line 43 "hb-buffer-deserialize-json.rl" + { + buffer->add_info (info); + if (unlikely (!buffer->successful)) + return false; + buffer->pos[buffer->len - 1] = pos; + *end_ptr = p; +} + break; + case 2: +#line 51 "hb-buffer-deserialize-json.rl" + { + tok = p; +} + break; + case 14: +#line 55 "hb-buffer-deserialize-json.rl" + { + if (!hb_font_glyph_from_string (font, + tok, p - tok, + &info.codepoint)) + return false; +} + break; + case 15: +#line 62 "hb-buffer-deserialize-json.rl" + { if (!parse_uint (tok, p, &info.codepoint)) return false; } + break; + case 8: +#line 63 "hb-buffer-deserialize-json.rl" + { if (!parse_uint (tok, p, &info.cluster )) return false; } + break; + case 10: +#line 64 "hb-buffer-deserialize-json.rl" + { if (!parse_int (tok, p, &pos.x_offset )) return false; } + break; + case 12: +#line 65 "hb-buffer-deserialize-json.rl" + { if (!parse_int (tok, p, &pos.y_offset )) return false; } + break; + case 3: +#line 66 "hb-buffer-deserialize-json.rl" + { if (!parse_int (tok, p, &pos.x_advance)) return false; } + break; + case 6: +#line 67 "hb-buffer-deserialize-json.rl" + { if (!parse_int (tok, p, &pos.y_advance)) return false; } + break; + case 16: +#line 62 "hb-buffer-deserialize-json.rl" + { if (!parse_uint (tok, p, &info.codepoint)) return false; } +#line 43 "hb-buffer-deserialize-json.rl" + { + buffer->add_info (info); + if (unlikely (!buffer->successful)) + return false; + buffer->pos[buffer->len - 1] = pos; + *end_ptr = p; +} + break; + case 9: +#line 63 "hb-buffer-deserialize-json.rl" + { if (!parse_uint (tok, p, &info.cluster )) return false; } +#line 43 "hb-buffer-deserialize-json.rl" + { + buffer->add_info (info); + if (unlikely (!buffer->successful)) + return false; + buffer->pos[buffer->len - 1] = pos; + *end_ptr = p; +} + break; + case 11: +#line 64 "hb-buffer-deserialize-json.rl" + { if (!parse_int (tok, p, &pos.x_offset )) return false; } +#line 43 "hb-buffer-deserialize-json.rl" + { + buffer->add_info (info); + if (unlikely (!buffer->successful)) + return false; + buffer->pos[buffer->len - 1] = pos; + *end_ptr = p; +} + break; + case 13: +#line 65 "hb-buffer-deserialize-json.rl" + { if (!parse_int (tok, p, &pos.y_offset )) return false; } +#line 43 "hb-buffer-deserialize-json.rl" + { + buffer->add_info (info); + if (unlikely (!buffer->successful)) + return false; + buffer->pos[buffer->len - 1] = pos; + *end_ptr = p; +} + break; + case 4: +#line 66 "hb-buffer-deserialize-json.rl" + { if (!parse_int (tok, p, &pos.x_advance)) return false; } +#line 43 "hb-buffer-deserialize-json.rl" + { + buffer->add_info (info); + if (unlikely (!buffer->successful)) + return false; + buffer->pos[buffer->len - 1] = pos; + *end_ptr = p; +} + break; + case 7: +#line 67 "hb-buffer-deserialize-json.rl" + { if (!parse_int (tok, p, &pos.y_advance)) return false; } +#line 43 "hb-buffer-deserialize-json.rl" + { + buffer->add_info (info); + if (unlikely (!buffer->successful)) + return false; + buffer->pos[buffer->len - 1] = pos; + *end_ptr = p; +} + break; +#line 624 "hb-buffer-deserialize-json.hh" + } + +_again: + if ( cs == 0 ) + goto _out; + if ( ++p != pe ) + goto _resume; + _test_eof: {} + _out: {} + } + +#line 125 "hb-buffer-deserialize-json.rl" + + + *end_ptr = p; + + return p == pe && *(p-1) != ']'; +} + +#endif /* HB_BUFFER_DESERIALIZE_JSON_HH */ diff --git a/thirdparty/harfbuzz/src/hb-buffer-deserialize-text.hh b/thirdparty/harfbuzz/src/hb-buffer-deserialize-text.hh new file mode 100644 index 0000000000..67f0a1252f --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-buffer-deserialize-text.hh @@ -0,0 +1,571 @@ + +#line 1 "hb-buffer-deserialize-text.rl" +/* + * Copyright © 2013 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_BUFFER_DESERIALIZE_TEXT_HH +#define HB_BUFFER_DESERIALIZE_TEXT_HH + +#include "hb.hh" + + +#line 36 "hb-buffer-deserialize-text.hh" +static const unsigned char _deserialize_text_trans_keys[] = { + 0u, 0u, 9u, 122u, 45u, 57u, 48u, 57u, 45u, 57u, 48u, 57u, 48u, 57u, 45u, 57u, + 48u, 57u, 44u, 44u, 45u, 57u, 48u, 57u, 44u, 57u, 9u, 124u, 9u, 124u, 0u, 0u, + 9u, 122u, 9u, 124u, 9u, 124u, 9u, 124u, 9u, 124u, 9u, 124u, 9u, 124u, 9u, 124u, + 9u, 124u, 9u, 124u, 9u, 124u, 0 +}; + +static const char _deserialize_text_key_spans[] = { + 0, 114, 13, 10, 13, 10, 10, 13, + 10, 1, 13, 10, 14, 116, 116, 0, + 114, 116, 116, 116, 116, 116, 116, 116, + 116, 116, 116 +}; + +static const short _deserialize_text_index_offsets[] = { + 0, 0, 115, 129, 140, 154, 165, 176, + 190, 201, 203, 217, 228, 243, 360, 477, + 478, 593, 710, 827, 944, 1061, 1178, 1295, + 1412, 1529, 1646 +}; + +static const char _deserialize_text_indicies[] = { + 0, 0, 0, 0, 0, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 0, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 2, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 1, 1, 1, 1, 1, 1, + 1, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 1, 1, 1, 1, 1, + 1, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 1, 5, 1, 1, 6, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 1, 8, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 1, 10, 1, 1, + 11, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 1, 13, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 1, 15, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 1, 17, 1, 1, 18, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 1, 20, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 1, 22, 1, 23, 1, 1, 24, + 25, 25, 25, 25, 25, 25, 25, 25, + 25, 1, 26, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 1, 22, 1, 1, + 1, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 1, 28, 28, 28, 28, + 28, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 28, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 29, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 30, 1, 1, 31, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 32, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 33, + 1, 34, 34, 34, 34, 34, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 34, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 35, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 36, 1, 1, 0, + 0, 0, 0, 0, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 0, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 2, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 1, 1, 1, 1, 1, 1, 1, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 1, 1, 1, 1, 1, 1, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 1, 28, 28, 28, 28, 28, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 28, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 29, 1, 1, 1, + 1, 37, 37, 37, 37, 37, 37, 37, + 37, 37, 37, 1, 1, 1, 30, 1, + 1, 31, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 32, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 33, 1, 38, + 38, 38, 38, 38, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 38, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 39, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 40, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 41, 1, 42, 42, 42, 42, + 42, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 42, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 43, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 44, + 1, 42, 42, 42, 42, 42, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 42, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 43, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 44, 1, 38, 38, + 38, 38, 38, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 38, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 39, 1, 1, 1, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 40, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 41, 1, 45, 45, 45, 45, 45, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 45, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 46, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 47, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 48, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 49, 1, + 50, 50, 50, 50, 50, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 50, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 51, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 52, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 53, 1, 50, 50, 50, + 50, 50, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 50, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 51, + 1, 1, 1, 1, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 52, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 53, 1, 45, 45, 45, 45, 45, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 45, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 46, 1, 1, 1, + 1, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 1, 1, 1, 1, 1, + 1, 47, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 48, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 49, 1, 28, + 28, 28, 28, 28, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 28, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 29, 1, 55, 55, 1, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, + 1, 1, 1, 30, 1, 1, 31, 55, + 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, + 55, 1, 1, 32, 1, 55, 1, 55, + 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, + 55, 1, 33, 1, 0 +}; + +static const char _deserialize_text_trans_targs[] = { + 1, 0, 13, 17, 26, 3, 18, 21, + 18, 21, 5, 19, 20, 19, 20, 22, + 25, 8, 9, 12, 9, 12, 10, 11, + 23, 24, 23, 24, 14, 2, 6, 7, + 15, 16, 14, 15, 16, 17, 14, 4, + 15, 16, 14, 15, 16, 14, 2, 7, + 15, 16, 14, 2, 15, 16, 25, 26 +}; + +static const char _deserialize_text_trans_actions[] = { + 0, 0, 1, 1, 1, 2, 2, 2, + 0, 0, 2, 2, 2, 0, 0, 2, + 2, 2, 2, 2, 0, 0, 3, 2, + 2, 2, 0, 0, 4, 5, 5, 5, + 4, 4, 0, 0, 0, 0, 6, 7, + 6, 6, 8, 8, 8, 9, 10, 10, + 9, 9, 11, 12, 11, 11, 0, 0 +}; + +static const char _deserialize_text_eof_actions[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 4, 0, 0, + 0, 4, 6, 8, 8, 6, 9, 11, + 11, 9, 4 +}; + +static const int deserialize_text_start = 1; +static const int deserialize_text_first_final = 13; +static const int deserialize_text_error = 0; + +static const int deserialize_text_en_main = 1; + + +#line 91 "hb-buffer-deserialize-text.rl" + + +static hb_bool_t +_hb_buffer_deserialize_glyphs_text (hb_buffer_t *buffer, + const char *buf, + unsigned int buf_len, + const char **end_ptr, + hb_font_t *font) +{ + const char *p = buf, *pe = buf + buf_len; + + /* Ensure we have positions. */ + (void) hb_buffer_get_glyph_positions (buffer, nullptr); + + while (p < pe && ISSPACE (*p)) + p++; + if (p < pe && *p == (buffer->len ? '|' : '[')) + { + *end_ptr = ++p; + } + + const char *eof = pe, *tok = nullptr; + int cs; + hb_glyph_info_t info = {0}; + hb_glyph_position_t pos = {0}; + +#line 343 "hb-buffer-deserialize-text.hh" + { + cs = deserialize_text_start; + } + +#line 348 "hb-buffer-deserialize-text.hh" + { + int _slen; + int _trans; + const unsigned char *_keys; + const char *_inds; + if ( p == pe ) + goto _test_eof; + if ( cs == 0 ) + goto _out; +_resume: + _keys = _deserialize_text_trans_keys + (cs<<1); + _inds = _deserialize_text_indicies + _deserialize_text_index_offsets[cs]; + + _slen = _deserialize_text_key_spans[cs]; + _trans = _inds[ _slen > 0 && _keys[0] <=(*p) && + (*p) <= _keys[1] ? + (*p) - _keys[0] : _slen ]; + + cs = _deserialize_text_trans_targs[_trans]; + + if ( _deserialize_text_trans_actions[_trans] == 0 ) + goto _again; + + switch ( _deserialize_text_trans_actions[_trans] ) { + case 2: +#line 51 "hb-buffer-deserialize-text.rl" + { + tok = p; +} + break; + case 5: +#line 55 "hb-buffer-deserialize-text.rl" + { + if (!hb_font_glyph_from_string (font, + tok, p - tok, + &info.codepoint)) + return false; +} + break; + case 10: +#line 62 "hb-buffer-deserialize-text.rl" + { if (!parse_uint (tok, p, &info.cluster )) return false; } + break; + case 3: +#line 63 "hb-buffer-deserialize-text.rl" + { if (!parse_int (tok, p, &pos.x_offset )) return false; } + break; + case 12: +#line 64 "hb-buffer-deserialize-text.rl" + { if (!parse_int (tok, p, &pos.y_offset )) return false; } + break; + case 7: +#line 65 "hb-buffer-deserialize-text.rl" + { if (!parse_int (tok, p, &pos.x_advance)) return false; } + break; + case 1: +#line 38 "hb-buffer-deserialize-text.rl" + { + memset (&info, 0, sizeof (info)); + memset (&pos , 0, sizeof (pos )); +} +#line 51 "hb-buffer-deserialize-text.rl" + { + tok = p; +} + break; + case 4: +#line 55 "hb-buffer-deserialize-text.rl" + { + if (!hb_font_glyph_from_string (font, + tok, p - tok, + &info.codepoint)) + return false; +} +#line 43 "hb-buffer-deserialize-text.rl" + { + buffer->add_info (info); + if (unlikely (!buffer->successful)) + return false; + buffer->pos[buffer->len - 1] = pos; + *end_ptr = p; +} + break; + case 9: +#line 62 "hb-buffer-deserialize-text.rl" + { if (!parse_uint (tok, p, &info.cluster )) return false; } +#line 43 "hb-buffer-deserialize-text.rl" + { + buffer->add_info (info); + if (unlikely (!buffer->successful)) + return false; + buffer->pos[buffer->len - 1] = pos; + *end_ptr = p; +} + break; + case 11: +#line 64 "hb-buffer-deserialize-text.rl" + { if (!parse_int (tok, p, &pos.y_offset )) return false; } +#line 43 "hb-buffer-deserialize-text.rl" + { + buffer->add_info (info); + if (unlikely (!buffer->successful)) + return false; + buffer->pos[buffer->len - 1] = pos; + *end_ptr = p; +} + break; + case 6: +#line 65 "hb-buffer-deserialize-text.rl" + { if (!parse_int (tok, p, &pos.x_advance)) return false; } +#line 43 "hb-buffer-deserialize-text.rl" + { + buffer->add_info (info); + if (unlikely (!buffer->successful)) + return false; + buffer->pos[buffer->len - 1] = pos; + *end_ptr = p; +} + break; + case 8: +#line 66 "hb-buffer-deserialize-text.rl" + { if (!parse_int (tok, p, &pos.y_advance)) return false; } +#line 43 "hb-buffer-deserialize-text.rl" + { + buffer->add_info (info); + if (unlikely (!buffer->successful)) + return false; + buffer->pos[buffer->len - 1] = pos; + *end_ptr = p; +} + break; +#line 480 "hb-buffer-deserialize-text.hh" + } + +_again: + if ( cs == 0 ) + goto _out; + if ( ++p != pe ) + goto _resume; + _test_eof: {} + if ( p == eof ) + { + switch ( _deserialize_text_eof_actions[cs] ) { + case 4: +#line 55 "hb-buffer-deserialize-text.rl" + { + if (!hb_font_glyph_from_string (font, + tok, p - tok, + &info.codepoint)) + return false; +} +#line 43 "hb-buffer-deserialize-text.rl" + { + buffer->add_info (info); + if (unlikely (!buffer->successful)) + return false; + buffer->pos[buffer->len - 1] = pos; + *end_ptr = p; +} + break; + case 9: +#line 62 "hb-buffer-deserialize-text.rl" + { if (!parse_uint (tok, p, &info.cluster )) return false; } +#line 43 "hb-buffer-deserialize-text.rl" + { + buffer->add_info (info); + if (unlikely (!buffer->successful)) + return false; + buffer->pos[buffer->len - 1] = pos; + *end_ptr = p; +} + break; + case 11: +#line 64 "hb-buffer-deserialize-text.rl" + { if (!parse_int (tok, p, &pos.y_offset )) return false; } +#line 43 "hb-buffer-deserialize-text.rl" + { + buffer->add_info (info); + if (unlikely (!buffer->successful)) + return false; + buffer->pos[buffer->len - 1] = pos; + *end_ptr = p; +} + break; + case 6: +#line 65 "hb-buffer-deserialize-text.rl" + { if (!parse_int (tok, p, &pos.x_advance)) return false; } +#line 43 "hb-buffer-deserialize-text.rl" + { + buffer->add_info (info); + if (unlikely (!buffer->successful)) + return false; + buffer->pos[buffer->len - 1] = pos; + *end_ptr = p; +} + break; + case 8: +#line 66 "hb-buffer-deserialize-text.rl" + { if (!parse_int (tok, p, &pos.y_advance)) return false; } +#line 43 "hb-buffer-deserialize-text.rl" + { + buffer->add_info (info); + if (unlikely (!buffer->successful)) + return false; + buffer->pos[buffer->len - 1] = pos; + *end_ptr = p; +} + break; +#line 557 "hb-buffer-deserialize-text.hh" + } + } + + _out: {} + } + +#line 119 "hb-buffer-deserialize-text.rl" + + + *end_ptr = p; + + return p == pe && *(p-1) != ']'; +} + +#endif /* HB_BUFFER_DESERIALIZE_TEXT_HH */ diff --git a/thirdparty/harfbuzz/src/hb-buffer-serialize.cc b/thirdparty/harfbuzz/src/hb-buffer-serialize.cc new file mode 100644 index 0000000000..bc6c978b38 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-buffer-serialize.cc @@ -0,0 +1,474 @@ +/* + * Copyright © 2012,2013 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#include "hb.hh" + +#ifndef HB_NO_BUFFER_SERIALIZE + +#include "hb-buffer.hh" + + +static const char *serialize_formats[] = { + "text", + "json", + nullptr +}; + +/** + * hb_buffer_serialize_list_formats: + * + * Returns a list of supported buffer serialization formats. + * + * Return value: (transfer none): + * A string array of buffer serialization formats. Should not be freed. + * + * Since: 0.9.7 + **/ +const char ** +hb_buffer_serialize_list_formats () +{ + return serialize_formats; +} + +/** + * hb_buffer_serialize_format_from_string: + * @str: (array length=len) (element-type uint8_t): a string to parse + * @len: length of @str, or -1 if string is %NULL terminated + * + * Parses a string into an #hb_buffer_serialize_format_t. Does not check if + * @str is a valid buffer serialization format, use + * hb_buffer_serialize_list_formats() to get the list of supported formats. + * + * Return value: + * The parsed #hb_buffer_serialize_format_t. + * + * Since: 0.9.7 + **/ +hb_buffer_serialize_format_t +hb_buffer_serialize_format_from_string (const char *str, int len) +{ + /* Upper-case it. */ + return (hb_buffer_serialize_format_t) (hb_tag_from_string (str, len) & ~0x20202020u); +} + +/** + * hb_buffer_serialize_format_to_string: + * @format: an #hb_buffer_serialize_format_t to convert. + * + * Converts @format to the string corresponding it, or %NULL if it is not a valid + * #hb_buffer_serialize_format_t. + * + * Return value: (transfer none): + * A %NULL terminated string corresponding to @format. Should not be freed. + * + * Since: 0.9.7 + **/ +const char * +hb_buffer_serialize_format_to_string (hb_buffer_serialize_format_t format) +{ + switch ((unsigned) format) + { + case HB_BUFFER_SERIALIZE_FORMAT_TEXT: return serialize_formats[0]; + case HB_BUFFER_SERIALIZE_FORMAT_JSON: return serialize_formats[1]; + default: + case HB_BUFFER_SERIALIZE_FORMAT_INVALID: return nullptr; + } +} + +static unsigned int +_hb_buffer_serialize_glyphs_json (hb_buffer_t *buffer, + unsigned int start, + unsigned int end, + char *buf, + unsigned int buf_size, + unsigned int *buf_consumed, + hb_font_t *font, + hb_buffer_serialize_flags_t flags) +{ + hb_glyph_info_t *info = hb_buffer_get_glyph_infos (buffer, nullptr); + hb_glyph_position_t *pos = (flags & HB_BUFFER_SERIALIZE_FLAG_NO_POSITIONS) ? + nullptr : hb_buffer_get_glyph_positions (buffer, nullptr); + + *buf_consumed = 0; + hb_position_t x = 0, y = 0; + for (unsigned int i = start; i < end; i++) + { + char b[1024]; + char *p = b; + + /* In the following code, we know b is large enough that no overflow can happen. */ + +#define APPEND(s) HB_STMT_START { strcpy (p, s); p += strlen (s); } HB_STMT_END + + if (i) + *p++ = ','; + + *p++ = '{'; + + APPEND ("\"g\":"); + if (!(flags & HB_BUFFER_SERIALIZE_FLAG_NO_GLYPH_NAMES)) + { + char g[128]; + hb_font_glyph_to_string (font, info[i].codepoint, g, sizeof (g)); + *p++ = '"'; + for (char *q = g; *q; q++) { + if (*q == '"') + *p++ = '\\'; + *p++ = *q; + } + *p++ = '"'; + } + else + p += hb_max (0, snprintf (p, ARRAY_LENGTH (b) - (p - b), "%u", info[i].codepoint)); + + if (!(flags & HB_BUFFER_SERIALIZE_FLAG_NO_CLUSTERS)) { + p += hb_max (0, snprintf (p, ARRAY_LENGTH (b) - (p - b), ",\"cl\":%u", info[i].cluster)); + } + + if (!(flags & HB_BUFFER_SERIALIZE_FLAG_NO_POSITIONS)) + { + p += hb_max (0, snprintf (p, ARRAY_LENGTH (b) - (p - b), ",\"dx\":%d,\"dy\":%d", + x+pos[i].x_offset, y+pos[i].y_offset)); + if (!(flags & HB_BUFFER_SERIALIZE_FLAG_NO_ADVANCES)) + p += hb_max (0, snprintf (p, ARRAY_LENGTH (b) - (p - b), ",\"ax\":%d,\"ay\":%d", + pos[i].x_advance, pos[i].y_advance)); + } + + if (flags & HB_BUFFER_SERIALIZE_FLAG_GLYPH_FLAGS) + { + if (info[i].mask & HB_GLYPH_FLAG_DEFINED) + p += hb_max (0, snprintf (p, ARRAY_LENGTH (b) - (p - b), ",\"fl\":%u", info[i].mask & HB_GLYPH_FLAG_DEFINED)); + } + + if (flags & HB_BUFFER_SERIALIZE_FLAG_GLYPH_EXTENTS) + { + hb_glyph_extents_t extents; + hb_font_get_glyph_extents(font, info[i].codepoint, &extents); + p += hb_max (0, snprintf (p, ARRAY_LENGTH (b) - (p - b), ",\"xb\":%d,\"yb\":%d", + extents.x_bearing, extents.y_bearing)); + p += hb_max (0, snprintf (p, ARRAY_LENGTH (b) - (p - b), ",\"w\":%d,\"h\":%d", + extents.width, extents.height)); + } + + *p++ = '}'; + + unsigned int l = p - b; + if (buf_size > l) + { + memcpy (buf, b, l); + buf += l; + buf_size -= l; + *buf_consumed += l; + *buf = '\0'; + } else + return i - start; + + if (pos && (flags & HB_BUFFER_SERIALIZE_FLAG_NO_ADVANCES)) + { + x += pos[i].x_advance; + y += pos[i].y_advance; + } + } + + return end - start; +} + +static unsigned int +_hb_buffer_serialize_glyphs_text (hb_buffer_t *buffer, + unsigned int start, + unsigned int end, + char *buf, + unsigned int buf_size, + unsigned int *buf_consumed, + hb_font_t *font, + hb_buffer_serialize_flags_t flags) +{ + hb_glyph_info_t *info = hb_buffer_get_glyph_infos (buffer, nullptr); + hb_glyph_position_t *pos = (flags & HB_BUFFER_SERIALIZE_FLAG_NO_POSITIONS) ? + nullptr : hb_buffer_get_glyph_positions (buffer, nullptr); + + *buf_consumed = 0; + hb_position_t x = 0, y = 0; + for (unsigned int i = start; i < end; i++) + { + char b[1024]; + char *p = b; + + /* In the following code, we know b is large enough that no overflow can happen. */ + + if (i) + *p++ = '|'; + + if (!(flags & HB_BUFFER_SERIALIZE_FLAG_NO_GLYPH_NAMES)) + { + hb_font_glyph_to_string (font, info[i].codepoint, p, 128); + p += strlen (p); + } + else + p += hb_max (0, snprintf (p, ARRAY_LENGTH (b) - (p - b), "%u", info[i].codepoint)); + + if (!(flags & HB_BUFFER_SERIALIZE_FLAG_NO_CLUSTERS)) { + p += hb_max (0, snprintf (p, ARRAY_LENGTH (b) - (p - b), "=%u", info[i].cluster)); + } + + if (!(flags & HB_BUFFER_SERIALIZE_FLAG_NO_POSITIONS)) + { + if (x+pos[i].x_offset || y+pos[i].y_offset) + p += hb_max (0, snprintf (p, ARRAY_LENGTH (b) - (p - b), "@%d,%d", x+pos[i].x_offset, y+pos[i].y_offset)); + + if (!(flags & HB_BUFFER_SERIALIZE_FLAG_NO_ADVANCES)) + { + *p++ = '+'; + p += hb_max (0, snprintf (p, ARRAY_LENGTH (b) - (p - b), "%d", pos[i].x_advance)); + if (pos[i].y_advance) + p += hb_max (0, snprintf (p, ARRAY_LENGTH (b) - (p - b), ",%d", pos[i].y_advance)); + } + } + + if (flags & HB_BUFFER_SERIALIZE_FLAG_GLYPH_FLAGS) + { + if (info[i].mask & HB_GLYPH_FLAG_DEFINED) + p += hb_max (0, snprintf (p, ARRAY_LENGTH (b) - (p - b), "#%X", info[i].mask &HB_GLYPH_FLAG_DEFINED)); + } + + if (flags & HB_BUFFER_SERIALIZE_FLAG_GLYPH_EXTENTS) + { + hb_glyph_extents_t extents; + hb_font_get_glyph_extents(font, info[i].codepoint, &extents); + p += hb_max (0, snprintf (p, ARRAY_LENGTH (b) - (p - b), "<%d,%d,%d,%d>", extents.x_bearing, extents.y_bearing, extents.width, extents.height)); + } + + unsigned int l = p - b; + if (buf_size > l) + { + memcpy (buf, b, l); + buf += l; + buf_size -= l; + *buf_consumed += l; + *buf = '\0'; + } else + return i - start; + + if (pos && (flags & HB_BUFFER_SERIALIZE_FLAG_NO_ADVANCES)) + { + x += pos[i].x_advance; + y += pos[i].y_advance; + } + } + + return end - start; +} + +/** + * hb_buffer_serialize_glyphs: + * @buffer: an #hb_buffer_t buffer. + * @start: the first item in @buffer to serialize. + * @end: the last item in @buffer to serialize. + * @buf: (out) (array length=buf_size) (element-type uint8_t): output string to + * write serialized buffer into. + * @buf_size: the size of @buf. + * @buf_consumed: (out) (allow-none): if not %NULL, will be set to the number of byes written into @buf. + * @font: (allow-none): the #hb_font_t used to shape this buffer, needed to + * read glyph names and extents. If %NULL, and empty font will be used. + * @format: the #hb_buffer_serialize_format_t to use for formatting the output. + * @flags: the #hb_buffer_serialize_flags_t that control what glyph properties + * to serialize. + * + * Serializes @buffer into a textual representation of its glyph content, + * useful for showing the contents of the buffer, for example during debugging. + * There are currently two supported serialization formats: + * + * ## text + * A human-readable, plain text format. + * The serialized glyphs will look something like: + * + * ``` + * [uni0651=0@518,0+0|uni0628=0+1897] + * ``` + * - The serialized glyphs are delimited with `[` and `]`. + * - Glyphs are separated with `|` + * - Each glyph starts with glyph name, or glyph index if + * #HB_BUFFER_SERIALIZE_FLAG_NO_GLYPH_NAMES flag is set. Then, + * - If #HB_BUFFER_SERIALIZE_FLAG_NO_CLUSTERS is not set, `=` then #hb_glyph_info_t.cluster. + * - If #HB_BUFFER_SERIALIZE_FLAG_NO_POSITIONS is not set, the #hb_glyph_position_t in the format: + * - If both #hb_glyph_position_t.x_offset and #hb_glyph_position_t.y_offset are not 0, `@x_offset,y_offset`. Then, + * - `+x_advance`, then `,y_advance` if #hb_glyph_position_t.y_advance is not 0. Then, + * - If #HB_BUFFER_SERIALIZE_FLAG_GLYPH_EXTENTS is set, the + * #hb_glyph_extents_t in the format + * `<x_bearing,y_bearing,width,height>` + * + * ## json + * TODO. + * + * Return value: + * The number of serialized items. + * + * Since: 0.9.7 + **/ +unsigned int +hb_buffer_serialize_glyphs (hb_buffer_t *buffer, + unsigned int start, + unsigned int end, + char *buf, + unsigned int buf_size, + unsigned int *buf_consumed, + hb_font_t *font, + hb_buffer_serialize_format_t format, + hb_buffer_serialize_flags_t flags) +{ + assert (start <= end && end <= buffer->len); + + unsigned int sconsumed; + if (!buf_consumed) + buf_consumed = &sconsumed; + *buf_consumed = 0; + if (buf_size) + *buf = '\0'; + + assert ((!buffer->len && (buffer->content_type == HB_BUFFER_CONTENT_TYPE_INVALID)) || + (buffer->content_type == HB_BUFFER_CONTENT_TYPE_GLYPHS)); + + if (!buffer->have_positions) + flags |= HB_BUFFER_SERIALIZE_FLAG_NO_POSITIONS; + + if (unlikely (start == end)) + return 0; + + if (!font) + font = hb_font_get_empty (); + + switch (format) + { + case HB_BUFFER_SERIALIZE_FORMAT_TEXT: + return _hb_buffer_serialize_glyphs_text (buffer, start, end, + buf, buf_size, buf_consumed, + font, flags); + + case HB_BUFFER_SERIALIZE_FORMAT_JSON: + return _hb_buffer_serialize_glyphs_json (buffer, start, end, + buf, buf_size, buf_consumed, + font, flags); + + default: + case HB_BUFFER_SERIALIZE_FORMAT_INVALID: + return 0; + + } +} + +static bool +parse_int (const char *pp, const char *end, int32_t *pv) +{ + int v; + const char *p = pp; + if (unlikely (!hb_parse_int (&p, end, &v, true/* whole buffer */))) + return false; + + *pv = v; + return true; +} + +static bool +parse_uint (const char *pp, const char *end, uint32_t *pv) +{ + unsigned int v; + const char *p = pp; + if (unlikely (!hb_parse_uint (&p, end, &v, true/* whole buffer */))) + return false; + + *pv = v; + return true; +} + +#include "hb-buffer-deserialize-json.hh" +#include "hb-buffer-deserialize-text.hh" + +/** + * hb_buffer_deserialize_glyphs: + * @buffer: an #hb_buffer_t buffer. + * @buf: (array length=buf_len): + * @buf_len: + * @end_ptr: (out): + * @font: + * @format: + * + * + * + * Return value: + * + * Since: 0.9.7 + **/ +hb_bool_t +hb_buffer_deserialize_glyphs (hb_buffer_t *buffer, + const char *buf, + int buf_len, /* -1 means nul-terminated */ + const char **end_ptr, /* May be NULL */ + hb_font_t *font, /* May be NULL */ + hb_buffer_serialize_format_t format) +{ + const char *end; + if (!end_ptr) + end_ptr = &end; + *end_ptr = buf; + + assert ((!buffer->len && (buffer->content_type == HB_BUFFER_CONTENT_TYPE_INVALID)) || + (buffer->content_type == HB_BUFFER_CONTENT_TYPE_GLYPHS)); + + if (buf_len == -1) + buf_len = strlen (buf); + + if (!buf_len) + { + *end_ptr = buf; + return false; + } + + hb_buffer_set_content_type (buffer, HB_BUFFER_CONTENT_TYPE_GLYPHS); + + if (!font) + font = hb_font_get_empty (); + + switch (format) + { + case HB_BUFFER_SERIALIZE_FORMAT_TEXT: + return _hb_buffer_deserialize_glyphs_text (buffer, + buf, buf_len, end_ptr, + font); + + case HB_BUFFER_SERIALIZE_FORMAT_JSON: + return _hb_buffer_deserialize_glyphs_json (buffer, + buf, buf_len, end_ptr, + font); + + default: + case HB_BUFFER_SERIALIZE_FORMAT_INVALID: + return false; + + } +} + + +#endif diff --git a/thirdparty/harfbuzz/src/hb-buffer.cc b/thirdparty/harfbuzz/src/hb-buffer.cc new file mode 100644 index 0000000000..4fadbb78d2 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-buffer.cc @@ -0,0 +1,2004 @@ +/* + * Copyright © 1998-2004 David Turner and Werner Lemberg + * Copyright © 2004,2007,2009,2010 Red Hat, Inc. + * Copyright © 2011,2012 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Red Hat Author(s): Owen Taylor, Behdad Esfahbod + * Google Author(s): Behdad Esfahbod + */ + +#include "hb-buffer.hh" +#include "hb-utf.hh" + + +/** + * SECTION: hb-buffer + * @title: hb-buffer + * @short_description: Input and output buffers + * @include: hb.h + * + * Buffers serve dual role in HarfBuzz; they hold the input characters that are + * passed to hb_shape(), and after shaping they hold the output glyphs. + **/ + + +/** + * hb_segment_properties_equal: + * @a: first #hb_segment_properties_t to compare. + * @b: second #hb_segment_properties_t to compare. + * + * Checks the equality of two #hb_segment_properties_t's. + * + * Return value: + * %true if all properties of @a equal those of @b, false otherwise. + * + * Since: 0.9.7 + **/ +hb_bool_t +hb_segment_properties_equal (const hb_segment_properties_t *a, + const hb_segment_properties_t *b) +{ + return a->direction == b->direction && + a->script == b->script && + a->language == b->language && + a->reserved1 == b->reserved1 && + a->reserved2 == b->reserved2; + +} + +/** + * hb_segment_properties_hash: + * @p: #hb_segment_properties_t to hash. + * + * Creates a hash representing @p. + * + * Return value: + * A hash of @p. + * + * Since: 0.9.7 + **/ +unsigned int +hb_segment_properties_hash (const hb_segment_properties_t *p) +{ + return (unsigned int) p->direction ^ + (unsigned int) p->script ^ + (intptr_t) (p->language); +} + + + +/* Here is how the buffer works internally: + * + * There are two info pointers: info and out_info. They always have + * the same allocated size, but different lengths. + * + * As an optimization, both info and out_info may point to the + * same piece of memory, which is owned by info. This remains the + * case as long as out_len doesn't exceed i at any time. + * In that case, swap_buffers() is no-op and the glyph operations operate + * mostly in-place. + * + * As soon as out_info gets longer than info, out_info is moved over + * to an alternate buffer (which we reuse the pos buffer for!), and its + * current contents (out_len entries) are copied to the new place. + * This should all remain transparent to the user. swap_buffers() then + * switches info and out_info. + */ + + + +/* Internal API */ + +bool +hb_buffer_t::enlarge (unsigned int size) +{ + if (unlikely (!successful)) + return false; + if (unlikely (size > max_len)) + { + successful = false; + return false; + } + + unsigned int new_allocated = allocated; + hb_glyph_position_t *new_pos = nullptr; + hb_glyph_info_t *new_info = nullptr; + bool separate_out = out_info != info; + + if (unlikely (hb_unsigned_mul_overflows (size, sizeof (info[0])))) + goto done; + + while (size >= new_allocated) + new_allocated += (new_allocated >> 1) + 32; + + static_assert ((sizeof (info[0]) == sizeof (pos[0])), ""); + if (unlikely (hb_unsigned_mul_overflows (new_allocated, sizeof (info[0])))) + goto done; + + new_pos = (hb_glyph_position_t *) realloc (pos, new_allocated * sizeof (pos[0])); + new_info = (hb_glyph_info_t *) realloc (info, new_allocated * sizeof (info[0])); + +done: + if (unlikely (!new_pos || !new_info)) + successful = false; + + if (likely (new_pos)) + pos = new_pos; + + if (likely (new_info)) + info = new_info; + + out_info = separate_out ? (hb_glyph_info_t *) pos : info; + if (likely (successful)) + allocated = new_allocated; + + return likely (successful); +} + +bool +hb_buffer_t::make_room_for (unsigned int num_in, + unsigned int num_out) +{ + if (unlikely (!ensure (out_len + num_out))) return false; + + if (out_info == info && + out_len + num_out > idx + num_in) + { + assert (have_output); + + out_info = (hb_glyph_info_t *) pos; + memcpy (out_info, info, out_len * sizeof (out_info[0])); + } + + return true; +} + +bool +hb_buffer_t::shift_forward (unsigned int count) +{ + assert (have_output); + if (unlikely (!ensure (len + count))) return false; + + memmove (info + idx + count, info + idx, (len - idx) * sizeof (info[0])); + if (idx + count > len) + { + /* Under memory failure we might expose this area. At least + * clean it up. Oh well... + * + * Ideally, we should at least set Default_Ignorable bits on + * these, as well as consistent cluster values. But the former + * is layering violation... */ + memset (info + len, 0, (idx + count - len) * sizeof (info[0])); + } + len += count; + idx += count; + + return true; +} + +hb_buffer_t::scratch_buffer_t * +hb_buffer_t::get_scratch_buffer (unsigned int *size) +{ + have_output = false; + have_positions = false; + + out_len = 0; + out_info = info; + + assert ((uintptr_t) pos % sizeof (scratch_buffer_t) == 0); + *size = allocated * sizeof (pos[0]) / sizeof (scratch_buffer_t); + return (scratch_buffer_t *) (void *) pos; +} + + + +/* HarfBuzz-Internal API */ + +void +hb_buffer_t::reset () +{ + if (unlikely (hb_object_is_immutable (this))) + return; + + hb_unicode_funcs_destroy (unicode); + unicode = hb_unicode_funcs_reference (hb_unicode_funcs_get_default ()); + flags = HB_BUFFER_FLAG_DEFAULT; + replacement = HB_BUFFER_REPLACEMENT_CODEPOINT_DEFAULT; + invisible = 0; + + clear (); +} + +void +hb_buffer_t::clear () +{ + if (unlikely (hb_object_is_immutable (this))) + return; + + hb_segment_properties_t default_props = HB_SEGMENT_PROPERTIES_DEFAULT; + props = default_props; + scratch_flags = HB_BUFFER_SCRATCH_FLAG_DEFAULT; + + content_type = HB_BUFFER_CONTENT_TYPE_INVALID; + successful = true; + have_output = false; + have_positions = false; + + idx = 0; + len = 0; + out_len = 0; + out_info = info; + + serial = 0; + + memset (context, 0, sizeof context); + memset (context_len, 0, sizeof context_len); + + deallocate_var_all (); +} + +void +hb_buffer_t::add (hb_codepoint_t codepoint, + unsigned int cluster) +{ + hb_glyph_info_t *glyph; + + if (unlikely (!ensure (len + 1))) return; + + glyph = &info[len]; + + memset (glyph, 0, sizeof (*glyph)); + glyph->codepoint = codepoint; + glyph->mask = 0; + glyph->cluster = cluster; + + len++; +} + +void +hb_buffer_t::add_info (const hb_glyph_info_t &glyph_info) +{ + if (unlikely (!ensure (len + 1))) return; + + info[len] = glyph_info; + + len++; +} + + +void +hb_buffer_t::remove_output () +{ + if (unlikely (hb_object_is_immutable (this))) + return; + + have_output = false; + have_positions = false; + + out_len = 0; + out_info = info; +} + +void +hb_buffer_t::clear_output () +{ + if (unlikely (hb_object_is_immutable (this))) + return; + + have_output = true; + have_positions = false; + + out_len = 0; + out_info = info; +} + +void +hb_buffer_t::clear_positions () +{ + if (unlikely (hb_object_is_immutable (this))) + return; + + have_output = false; + have_positions = true; + + out_len = 0; + out_info = info; + + hb_memset (pos, 0, sizeof (pos[0]) * len); +} + +void +hb_buffer_t::swap_buffers () +{ + if (unlikely (!successful)) return; + + assert (have_output); + have_output = false; + + if (out_info != info) + { + hb_glyph_info_t *tmp_string; + tmp_string = info; + info = out_info; + out_info = tmp_string; + pos = (hb_glyph_position_t *) out_info; + } + + unsigned int tmp; + tmp = len; + len = out_len; + out_len = tmp; + + idx = 0; +} + + +void +hb_buffer_t::replace_glyphs (unsigned int num_in, + unsigned int num_out, + const uint32_t *glyph_data) +{ + if (unlikely (!make_room_for (num_in, num_out))) return; + + assert (idx + num_in <= len); + + merge_clusters (idx, idx + num_in); + + hb_glyph_info_t orig_info = info[idx]; + hb_glyph_info_t *pinfo = &out_info[out_len]; + for (unsigned int i = 0; i < num_out; i++) + { + *pinfo = orig_info; + pinfo->codepoint = glyph_data[i]; + pinfo++; + } + + idx += num_in; + out_len += num_out; +} + +bool +hb_buffer_t::move_to (unsigned int i) +{ + if (!have_output) + { + assert (i <= len); + idx = i; + return true; + } + if (unlikely (!successful)) + return false; + + assert (i <= out_len + (len - idx)); + + if (out_len < i) + { + unsigned int count = i - out_len; + if (unlikely (!make_room_for (count, count))) return false; + + memmove (out_info + out_len, info + idx, count * sizeof (out_info[0])); + idx += count; + out_len += count; + } + else if (out_len > i) + { + /* Tricky part: rewinding... */ + unsigned int count = out_len - i; + + /* This will blow in our face if memory allocation fails later + * in this same lookup... + * + * We used to shift with extra 32 items, instead of the 0 below. + * But that would leave empty slots in the buffer in case of allocation + * failures. Setting to zero for now to avoid other problems (see + * comments in shift_forward(). This can cause O(N^2) behavior more + * severely than adding 32 empty slots can... */ + if (unlikely (idx < count && !shift_forward (count + 0))) return false; + + assert (idx >= count); + + idx -= count; + out_len -= count; + memmove (info + idx, out_info + out_len, count * sizeof (out_info[0])); + } + + return true; +} + + +void +hb_buffer_t::set_masks (hb_mask_t value, + hb_mask_t mask, + unsigned int cluster_start, + unsigned int cluster_end) +{ + hb_mask_t not_mask = ~mask; + value &= mask; + + if (!mask) + return; + + unsigned int count = len; + for (unsigned int i = 0; i < count; i++) + if (cluster_start <= info[i].cluster && info[i].cluster < cluster_end) + info[i].mask = (info[i].mask & not_mask) | value; +} + +void +hb_buffer_t::reverse_range (unsigned int start, + unsigned int end) +{ + if (end - start < 2) + return; + + hb_array_t<hb_glyph_info_t> (info, len).reverse (start, end); + + if (have_positions) { + hb_array_t<hb_glyph_position_t> (pos, len).reverse (start, end); + } +} + +void +hb_buffer_t::reverse () +{ + if (unlikely (!len)) + return; + + reverse_range (0, len); +} + +void +hb_buffer_t::reverse_clusters () +{ + unsigned int i, start, count, last_cluster; + + if (unlikely (!len)) + return; + + reverse (); + + count = len; + start = 0; + last_cluster = info[0].cluster; + for (i = 1; i < count; i++) { + if (last_cluster != info[i].cluster) { + reverse_range (start, i); + start = i; + last_cluster = info[i].cluster; + } + } + reverse_range (start, i); +} + +void +hb_buffer_t::merge_clusters_impl (unsigned int start, + unsigned int end) +{ + if (cluster_level == HB_BUFFER_CLUSTER_LEVEL_CHARACTERS) + { + unsafe_to_break (start, end); + return; + } + + unsigned int cluster = info[start].cluster; + + for (unsigned int i = start + 1; i < end; i++) + cluster = hb_min (cluster, info[i].cluster); + + /* Extend end */ + while (end < len && info[end - 1].cluster == info[end].cluster) + end++; + + /* Extend start */ + while (idx < start && info[start - 1].cluster == info[start].cluster) + start--; + + /* If we hit the start of buffer, continue in out-buffer. */ + if (idx == start) + for (unsigned int i = out_len; i && out_info[i - 1].cluster == info[start].cluster; i--) + set_cluster (out_info[i - 1], cluster); + + for (unsigned int i = start; i < end; i++) + set_cluster (info[i], cluster); +} +void +hb_buffer_t::merge_out_clusters (unsigned int start, + unsigned int end) +{ + if (cluster_level == HB_BUFFER_CLUSTER_LEVEL_CHARACTERS) + return; + + if (unlikely (end - start < 2)) + return; + + unsigned int cluster = out_info[start].cluster; + + for (unsigned int i = start + 1; i < end; i++) + cluster = hb_min (cluster, out_info[i].cluster); + + /* Extend start */ + while (start && out_info[start - 1].cluster == out_info[start].cluster) + start--; + + /* Extend end */ + while (end < out_len && out_info[end - 1].cluster == out_info[end].cluster) + end++; + + /* If we hit the end of out-buffer, continue in buffer. */ + if (end == out_len) + for (unsigned int i = idx; i < len && info[i].cluster == out_info[end - 1].cluster; i++) + set_cluster (info[i], cluster); + + for (unsigned int i = start; i < end; i++) + set_cluster (out_info[i], cluster); +} +void +hb_buffer_t::delete_glyph () +{ + /* The logic here is duplicated in hb_ot_hide_default_ignorables(). */ + + unsigned int cluster = info[idx].cluster; + if (idx + 1 < len && cluster == info[idx + 1].cluster) + { + /* Cluster survives; do nothing. */ + goto done; + } + + if (out_len) + { + /* Merge cluster backward. */ + if (cluster < out_info[out_len - 1].cluster) + { + unsigned int mask = info[idx].mask; + unsigned int old_cluster = out_info[out_len - 1].cluster; + for (unsigned i = out_len; i && out_info[i - 1].cluster == old_cluster; i--) + set_cluster (out_info[i - 1], cluster, mask); + } + goto done; + } + + if (idx + 1 < len) + { + /* Merge cluster forward. */ + merge_clusters (idx, idx + 2); + goto done; + } + +done: + skip_glyph (); +} + +void +hb_buffer_t::unsafe_to_break_impl (unsigned int start, unsigned int end) +{ + unsigned int cluster = UINT_MAX; + cluster = _unsafe_to_break_find_min_cluster (info, start, end, cluster); + _unsafe_to_break_set_mask (info, start, end, cluster); +} +void +hb_buffer_t::unsafe_to_break_from_outbuffer (unsigned int start, unsigned int end) +{ + if (!have_output) + { + unsafe_to_break_impl (start, end); + return; + } + + assert (start <= out_len); + assert (idx <= end); + + unsigned int cluster = UINT_MAX; + cluster = _unsafe_to_break_find_min_cluster (out_info, start, out_len, cluster); + cluster = _unsafe_to_break_find_min_cluster (info, idx, end, cluster); + _unsafe_to_break_set_mask (out_info, start, out_len, cluster); + _unsafe_to_break_set_mask (info, idx, end, cluster); +} + +void +hb_buffer_t::guess_segment_properties () +{ + assert ((content_type == HB_BUFFER_CONTENT_TYPE_UNICODE) || + (!len && (content_type == HB_BUFFER_CONTENT_TYPE_INVALID))); + + /* If script is set to INVALID, guess from buffer contents */ + if (props.script == HB_SCRIPT_INVALID) { + for (unsigned int i = 0; i < len; i++) { + hb_script_t script = unicode->script (info[i].codepoint); + if (likely (script != HB_SCRIPT_COMMON && + script != HB_SCRIPT_INHERITED && + script != HB_SCRIPT_UNKNOWN)) { + props.script = script; + break; + } + } + } + + /* If direction is set to INVALID, guess from script */ + if (props.direction == HB_DIRECTION_INVALID) { + props.direction = hb_script_get_horizontal_direction (props.script); + if (props.direction == HB_DIRECTION_INVALID) + props.direction = HB_DIRECTION_LTR; + } + + /* If language is not set, use default language from locale */ + if (props.language == HB_LANGUAGE_INVALID) { + /* TODO get_default_for_script? using $LANGUAGE */ + props.language = hb_language_get_default (); + } +} + + +/* Public API */ + +DEFINE_NULL_INSTANCE (hb_buffer_t) = +{ + HB_OBJECT_HEADER_STATIC, + + const_cast<hb_unicode_funcs_t *> (&_hb_Null_hb_unicode_funcs_t), + HB_BUFFER_FLAG_DEFAULT, + HB_BUFFER_CLUSTER_LEVEL_DEFAULT, + HB_BUFFER_REPLACEMENT_CODEPOINT_DEFAULT, + 0, /* invisible */ + HB_BUFFER_SCRATCH_FLAG_DEFAULT, + HB_BUFFER_MAX_LEN_DEFAULT, + HB_BUFFER_MAX_OPS_DEFAULT, + + HB_BUFFER_CONTENT_TYPE_INVALID, + HB_SEGMENT_PROPERTIES_DEFAULT, + false, /* successful */ + true, /* have_output */ + true /* have_positions */ + + /* Zero is good enough for everything else. */ +}; + + +/** + * hb_buffer_create: (Xconstructor) + * + * Creates a new #hb_buffer_t with all properties to defaults. + * + * Return value: (transfer full): + * A newly allocated #hb_buffer_t with a reference count of 1. The initial + * reference count should be released with hb_buffer_destroy() when you are done + * using the #hb_buffer_t. This function never returns %NULL. If memory cannot + * be allocated, a special #hb_buffer_t object will be returned on which + * hb_buffer_allocation_successful() returns %false. + * + * Since: 0.9.2 + **/ +hb_buffer_t * +hb_buffer_create () +{ + hb_buffer_t *buffer; + + if (!(buffer = hb_object_create<hb_buffer_t> ())) + return hb_buffer_get_empty (); + + buffer->max_len = HB_BUFFER_MAX_LEN_DEFAULT; + buffer->max_ops = HB_BUFFER_MAX_OPS_DEFAULT; + + buffer->reset (); + + return buffer; +} + +/** + * hb_buffer_get_empty: + * + * + * + * Return value: (transfer full): + * + * Since: 0.9.2 + **/ +hb_buffer_t * +hb_buffer_get_empty () +{ + return const_cast<hb_buffer_t *> (&Null (hb_buffer_t)); +} + +/** + * hb_buffer_reference: (skip) + * @buffer: an #hb_buffer_t. + * + * Increases the reference count on @buffer by one. This prevents @buffer from + * being destroyed until a matching call to hb_buffer_destroy() is made. + * + * Return value: (transfer full): + * The referenced #hb_buffer_t. + * + * Since: 0.9.2 + **/ +hb_buffer_t * +hb_buffer_reference (hb_buffer_t *buffer) +{ + return hb_object_reference (buffer); +} + +/** + * hb_buffer_destroy: (skip) + * @buffer: an #hb_buffer_t. + * + * Deallocate the @buffer. + * Decreases the reference count on @buffer by one. If the result is zero, then + * @buffer and all associated resources are freed. See hb_buffer_reference(). + * + * Since: 0.9.2 + **/ +void +hb_buffer_destroy (hb_buffer_t *buffer) +{ + if (!hb_object_destroy (buffer)) return; + + hb_unicode_funcs_destroy (buffer->unicode); + + free (buffer->info); + free (buffer->pos); +#ifndef HB_NO_BUFFER_MESSAGE + if (buffer->message_destroy) + buffer->message_destroy (buffer->message_data); +#endif + + free (buffer); +} + +/** + * hb_buffer_set_user_data: (skip) + * @buffer: an #hb_buffer_t. + * @key: + * @data: + * @destroy: + * @replace: + * + * + * + * Return value: + * + * Since: 0.9.2 + **/ +hb_bool_t +hb_buffer_set_user_data (hb_buffer_t *buffer, + hb_user_data_key_t *key, + void * data, + hb_destroy_func_t destroy, + hb_bool_t replace) +{ + return hb_object_set_user_data (buffer, key, data, destroy, replace); +} + +/** + * hb_buffer_get_user_data: (skip) + * @buffer: an #hb_buffer_t. + * @key: + * + * + * + * Return value: + * + * Since: 0.9.2 + **/ +void * +hb_buffer_get_user_data (hb_buffer_t *buffer, + hb_user_data_key_t *key) +{ + return hb_object_get_user_data (buffer, key); +} + + +/** + * hb_buffer_set_content_type: + * @buffer: an #hb_buffer_t. + * @content_type: the type of buffer contents to set + * + * Sets the type of @buffer contents, buffers are either empty, contain + * characters (before shaping) or glyphs (the result of shaping). + * + * Since: 0.9.5 + **/ +void +hb_buffer_set_content_type (hb_buffer_t *buffer, + hb_buffer_content_type_t content_type) +{ + buffer->content_type = content_type; +} + +/** + * hb_buffer_get_content_type: + * @buffer: an #hb_buffer_t. + * + * see hb_buffer_set_content_type(). + * + * Return value: + * The type of @buffer contents. + * + * Since: 0.9.5 + **/ +hb_buffer_content_type_t +hb_buffer_get_content_type (hb_buffer_t *buffer) +{ + return buffer->content_type; +} + + +/** + * hb_buffer_set_unicode_funcs: + * @buffer: an #hb_buffer_t. + * @unicode_funcs: + * + * + * + * Since: 0.9.2 + **/ +void +hb_buffer_set_unicode_funcs (hb_buffer_t *buffer, + hb_unicode_funcs_t *unicode_funcs) +{ + if (unlikely (hb_object_is_immutable (buffer))) + return; + + if (!unicode_funcs) + unicode_funcs = hb_unicode_funcs_get_default (); + + hb_unicode_funcs_reference (unicode_funcs); + hb_unicode_funcs_destroy (buffer->unicode); + buffer->unicode = unicode_funcs; +} + +/** + * hb_buffer_get_unicode_funcs: + * @buffer: an #hb_buffer_t. + * + * + * + * Return value: + * + * Since: 0.9.2 + **/ +hb_unicode_funcs_t * +hb_buffer_get_unicode_funcs (hb_buffer_t *buffer) +{ + return buffer->unicode; +} + +/** + * hb_buffer_set_direction: + * @buffer: an #hb_buffer_t. + * @direction: the #hb_direction_t of the @buffer + * + * Set the text flow direction of the buffer. No shaping can happen without + * setting @buffer direction, and it controls the visual direction for the + * output glyphs; for RTL direction the glyphs will be reversed. Many layout + * features depend on the proper setting of the direction, for example, + * reversing RTL text before shaping, then shaping with LTR direction is not + * the same as keeping the text in logical order and shaping with RTL + * direction. + * + * Since: 0.9.2 + **/ +void +hb_buffer_set_direction (hb_buffer_t *buffer, + hb_direction_t direction) + +{ + if (unlikely (hb_object_is_immutable (buffer))) + return; + + buffer->props.direction = direction; +} + +/** + * hb_buffer_get_direction: + * @buffer: an #hb_buffer_t. + * + * See hb_buffer_set_direction() + * + * Return value: + * The direction of the @buffer. + * + * Since: 0.9.2 + **/ +hb_direction_t +hb_buffer_get_direction (hb_buffer_t *buffer) +{ + return buffer->props.direction; +} + +/** + * hb_buffer_set_script: + * @buffer: an #hb_buffer_t. + * @script: an #hb_script_t to set. + * + * Sets the script of @buffer to @script. + * + * Script is crucial for choosing the proper shaping behaviour for scripts that + * require it (e.g. Arabic) and the which OpenType features defined in the font + * to be applied. + * + * You can pass one of the predefined #hb_script_t values, or use + * hb_script_from_string() or hb_script_from_iso15924_tag() to get the + * corresponding script from an ISO 15924 script tag. + * + * Since: 0.9.2 + **/ +void +hb_buffer_set_script (hb_buffer_t *buffer, + hb_script_t script) +{ + if (unlikely (hb_object_is_immutable (buffer))) + return; + + buffer->props.script = script; +} + +/** + * hb_buffer_get_script: + * @buffer: an #hb_buffer_t. + * + * See hb_buffer_set_script(). + * + * Return value: + * The #hb_script_t of the @buffer. + * + * Since: 0.9.2 + **/ +hb_script_t +hb_buffer_get_script (hb_buffer_t *buffer) +{ + return buffer->props.script; +} + +/** + * hb_buffer_set_language: + * @buffer: an #hb_buffer_t. + * @language: an hb_language_t to set. + * + * Sets the language of @buffer to @language. + * + * Languages are crucial for selecting which OpenType feature to apply to the + * buffer which can result in applying language-specific behaviour. Languages + * are orthogonal to the scripts, and though they are related, they are + * different concepts and should not be confused with each other. + * + * Use hb_language_from_string() to convert from BCP 47 language tags to + * #hb_language_t. + * + * Since: 0.9.2 + **/ +void +hb_buffer_set_language (hb_buffer_t *buffer, + hb_language_t language) +{ + if (unlikely (hb_object_is_immutable (buffer))) + return; + + buffer->props.language = language; +} + +/** + * hb_buffer_get_language: + * @buffer: an #hb_buffer_t. + * + * See hb_buffer_set_language(). + * + * Return value: (transfer none): + * The #hb_language_t of the buffer. Must not be freed by the caller. + * + * Since: 0.9.2 + **/ +hb_language_t +hb_buffer_get_language (hb_buffer_t *buffer) +{ + return buffer->props.language; +} + +/** + * hb_buffer_set_segment_properties: + * @buffer: an #hb_buffer_t. + * @props: an #hb_segment_properties_t to use. + * + * Sets the segment properties of the buffer, a shortcut for calling + * hb_buffer_set_direction(), hb_buffer_set_script() and + * hb_buffer_set_language() individually. + * + * Since: 0.9.7 + **/ +void +hb_buffer_set_segment_properties (hb_buffer_t *buffer, + const hb_segment_properties_t *props) +{ + if (unlikely (hb_object_is_immutable (buffer))) + return; + + buffer->props = *props; +} + +/** + * hb_buffer_get_segment_properties: + * @buffer: an #hb_buffer_t. + * @props: (out): the output #hb_segment_properties_t. + * + * Sets @props to the #hb_segment_properties_t of @buffer. + * + * Since: 0.9.7 + **/ +void +hb_buffer_get_segment_properties (hb_buffer_t *buffer, + hb_segment_properties_t *props) +{ + *props = buffer->props; +} + + +/** + * hb_buffer_set_flags: + * @buffer: an #hb_buffer_t. + * @flags: the buffer flags to set. + * + * Sets @buffer flags to @flags. See #hb_buffer_flags_t. + * + * Since: 0.9.7 + **/ +void +hb_buffer_set_flags (hb_buffer_t *buffer, + hb_buffer_flags_t flags) +{ + if (unlikely (hb_object_is_immutable (buffer))) + return; + + buffer->flags = flags; +} + +/** + * hb_buffer_get_flags: + * @buffer: an #hb_buffer_t. + * + * See hb_buffer_set_flags(). + * + * Return value: + * The @buffer flags. + * + * Since: 0.9.7 + **/ +hb_buffer_flags_t +hb_buffer_get_flags (hb_buffer_t *buffer) +{ + return buffer->flags; +} + +/** + * hb_buffer_set_cluster_level: + * @buffer: an #hb_buffer_t. + * @cluster_level: + * + * + * + * Since: 0.9.42 + **/ +void +hb_buffer_set_cluster_level (hb_buffer_t *buffer, + hb_buffer_cluster_level_t cluster_level) +{ + if (unlikely (hb_object_is_immutable (buffer))) + return; + + buffer->cluster_level = cluster_level; +} + +/** + * hb_buffer_get_cluster_level: + * @buffer: an #hb_buffer_t. + * + * + * + * Return value: + * + * Since: 0.9.42 + **/ +hb_buffer_cluster_level_t +hb_buffer_get_cluster_level (hb_buffer_t *buffer) +{ + return buffer->cluster_level; +} + + +/** + * hb_buffer_set_replacement_codepoint: + * @buffer: an #hb_buffer_t. + * @replacement: the replacement #hb_codepoint_t + * + * Sets the #hb_codepoint_t that replaces invalid entries for a given encoding + * when adding text to @buffer. + * + * Default is %HB_BUFFER_REPLACEMENT_CODEPOINT_DEFAULT. + * + * Since: 0.9.31 + **/ +void +hb_buffer_set_replacement_codepoint (hb_buffer_t *buffer, + hb_codepoint_t replacement) +{ + if (unlikely (hb_object_is_immutable (buffer))) + return; + + buffer->replacement = replacement; +} + +/** + * hb_buffer_get_replacement_codepoint: + * @buffer: an #hb_buffer_t. + * + * See hb_buffer_set_replacement_codepoint(). + * + * Return value: + * The @buffer replacement #hb_codepoint_t. + * + * Since: 0.9.31 + **/ +hb_codepoint_t +hb_buffer_get_replacement_codepoint (hb_buffer_t *buffer) +{ + return buffer->replacement; +} + + +/** + * hb_buffer_set_invisible_glyph: + * @buffer: an #hb_buffer_t. + * @invisible: the invisible #hb_codepoint_t + * + * Sets the #hb_codepoint_t that replaces invisible characters in + * the shaping result. If set to zero (default), the glyph for the + * U+0020 SPACE character is used. Otherwise, this value is used + * verbatim. + * + * Since: 2.0.0 + **/ +void +hb_buffer_set_invisible_glyph (hb_buffer_t *buffer, + hb_codepoint_t invisible) +{ + if (unlikely (hb_object_is_immutable (buffer))) + return; + + buffer->invisible = invisible; +} + +/** + * hb_buffer_get_invisible_glyph: + * @buffer: an #hb_buffer_t. + * + * See hb_buffer_set_invisible_glyph(). + * + * Return value: + * The @buffer invisible #hb_codepoint_t. + * + * Since: 2.0.0 + **/ +hb_codepoint_t +hb_buffer_get_invisible_glyph (hb_buffer_t *buffer) +{ + return buffer->invisible; +} + + +/** + * hb_buffer_reset: + * @buffer: an #hb_buffer_t. + * + * Resets the buffer to its initial status, as if it was just newly created + * with hb_buffer_create(). + * + * Since: 0.9.2 + **/ +void +hb_buffer_reset (hb_buffer_t *buffer) +{ + buffer->reset (); +} + +/** + * hb_buffer_clear_contents: + * @buffer: an #hb_buffer_t. + * + * Similar to hb_buffer_reset(), but does not clear the Unicode functions and + * the replacement code point. + * + * Since: 0.9.11 + **/ +void +hb_buffer_clear_contents (hb_buffer_t *buffer) +{ + buffer->clear (); +} + +/** + * hb_buffer_pre_allocate: + * @buffer: an #hb_buffer_t. + * @size: number of items to pre allocate. + * + * Pre allocates memory for @buffer to fit at least @size number of items. + * + * Return value: + * %true if @buffer memory allocation succeeded, %false otherwise. + * + * Since: 0.9.2 + **/ +hb_bool_t +hb_buffer_pre_allocate (hb_buffer_t *buffer, unsigned int size) +{ + return buffer->ensure (size); +} + +/** + * hb_buffer_allocation_successful: + * @buffer: an #hb_buffer_t. + * + * Check if allocating memory for the buffer succeeded. + * + * Return value: + * %true if @buffer memory allocation succeeded, %false otherwise. + * + * Since: 0.9.2 + **/ +hb_bool_t +hb_buffer_allocation_successful (hb_buffer_t *buffer) +{ + return buffer->successful; +} + +/** + * hb_buffer_add: + * @buffer: an #hb_buffer_t. + * @codepoint: a Unicode code point. + * @cluster: the cluster value of @codepoint. + * + * Appends a character with the Unicode value of @codepoint to @buffer, and + * gives it the initial cluster value of @cluster. Clusters can be any thing + * the client wants, they are usually used to refer to the index of the + * character in the input text stream and are output in + * #hb_glyph_info_t.cluster field. + * + * This function does not check the validity of @codepoint, it is up to the + * caller to ensure it is a valid Unicode code point. + * + * Since: 0.9.7 + **/ +void +hb_buffer_add (hb_buffer_t *buffer, + hb_codepoint_t codepoint, + unsigned int cluster) +{ + buffer->add (codepoint, cluster); + buffer->clear_context (1); +} + +/** + * hb_buffer_set_length: + * @buffer: an #hb_buffer_t. + * @length: the new length of @buffer. + * + * Similar to hb_buffer_pre_allocate(), but clears any new items added at the + * end. + * + * Return value: + * %true if @buffer memory allocation succeeded, %false otherwise. + * + * Since: 0.9.2 + **/ +hb_bool_t +hb_buffer_set_length (hb_buffer_t *buffer, + unsigned int length) +{ + if (unlikely (hb_object_is_immutable (buffer))) + return length == 0; + + if (!buffer->ensure (length)) + return false; + + /* Wipe the new space */ + if (length > buffer->len) { + memset (buffer->info + buffer->len, 0, sizeof (buffer->info[0]) * (length - buffer->len)); + if (buffer->have_positions) + memset (buffer->pos + buffer->len, 0, sizeof (buffer->pos[0]) * (length - buffer->len)); + } + + buffer->len = length; + + if (!length) + { + buffer->content_type = HB_BUFFER_CONTENT_TYPE_INVALID; + buffer->clear_context (0); + } + buffer->clear_context (1); + + return true; +} + +/** + * hb_buffer_get_length: + * @buffer: an #hb_buffer_t. + * + * Returns the number of items in the buffer. + * + * Return value: + * The @buffer length. + * The value valid as long as buffer has not been modified. + * + * Since: 0.9.2 + **/ +unsigned int +hb_buffer_get_length (hb_buffer_t *buffer) +{ + return buffer->len; +} + +/** + * hb_buffer_get_glyph_infos: + * @buffer: an #hb_buffer_t. + * @length: (out): output array length. + * + * Returns @buffer glyph information array. Returned pointer + * is valid as long as @buffer contents are not modified. + * + * Return value: (transfer none) (array length=length): + * The @buffer glyph information array. + * The value valid as long as buffer has not been modified. + * + * Since: 0.9.2 + **/ +hb_glyph_info_t * +hb_buffer_get_glyph_infos (hb_buffer_t *buffer, + unsigned int *length) +{ + if (length) + *length = buffer->len; + + return (hb_glyph_info_t *) buffer->info; +} + +/** + * hb_buffer_get_glyph_positions: + * @buffer: an #hb_buffer_t. + * @length: (out): output length. + * + * Returns @buffer glyph position array. Returned pointer + * is valid as long as @buffer contents are not modified. + * + * Return value: (transfer none) (array length=length): + * The @buffer glyph position array. + * The value valid as long as buffer has not been modified. + * + * Since: 0.9.2 + **/ +hb_glyph_position_t * +hb_buffer_get_glyph_positions (hb_buffer_t *buffer, + unsigned int *length) +{ + if (!buffer->have_positions) + buffer->clear_positions (); + + if (length) + *length = buffer->len; + + return (hb_glyph_position_t *) buffer->pos; +} + +/** + * hb_glyph_info_get_glyph_flags: + * @info: a #hb_glyph_info_t. + * + * Returns glyph flags encoded within a #hb_glyph_info_t. + * + * Return value: + * The #hb_glyph_flags_t encoded within @info. + * + * Since: 1.5.0 + **/ +hb_glyph_flags_t +(hb_glyph_info_get_glyph_flags) (const hb_glyph_info_t *info) +{ + return hb_glyph_info_get_glyph_flags (info); +} + +/** + * hb_buffer_reverse: + * @buffer: an #hb_buffer_t. + * + * Reverses buffer contents. + * + * Since: 0.9.2 + **/ +void +hb_buffer_reverse (hb_buffer_t *buffer) +{ + buffer->reverse (); +} + +/** + * hb_buffer_reverse_range: + * @buffer: an #hb_buffer_t. + * @start: start index. + * @end: end index. + * + * Reverses buffer contents between start to end. + * + * Since: 0.9.41 + **/ +void +hb_buffer_reverse_range (hb_buffer_t *buffer, + unsigned int start, unsigned int end) +{ + buffer->reverse_range (start, end); +} + +/** + * hb_buffer_reverse_clusters: + * @buffer: an #hb_buffer_t. + * + * Reverses buffer clusters. That is, the buffer contents are + * reversed, then each cluster (consecutive items having the + * same cluster number) are reversed again. + * + * Since: 0.9.2 + **/ +void +hb_buffer_reverse_clusters (hb_buffer_t *buffer) +{ + buffer->reverse_clusters (); +} + +/** + * hb_buffer_guess_segment_properties: + * @buffer: an #hb_buffer_t. + * + * Sets unset buffer segment properties based on buffer Unicode + * contents. If buffer is not empty, it must have content type + * %HB_BUFFER_CONTENT_TYPE_UNICODE. + * + * If buffer script is not set (ie. is %HB_SCRIPT_INVALID), it + * will be set to the Unicode script of the first character in + * the buffer that has a script other than %HB_SCRIPT_COMMON, + * %HB_SCRIPT_INHERITED, and %HB_SCRIPT_UNKNOWN. + * + * Next, if buffer direction is not set (ie. is %HB_DIRECTION_INVALID), + * it will be set to the natural horizontal direction of the + * buffer script as returned by hb_script_get_horizontal_direction(). + * If hb_script_get_horizontal_direction() returns %HB_DIRECTION_INVALID, + * then %HB_DIRECTION_LTR is used. + * + * Finally, if buffer language is not set (ie. is %HB_LANGUAGE_INVALID), + * it will be set to the process's default language as returned by + * hb_language_get_default(). This may change in the future by + * taking buffer script into consideration when choosing a language. + * Note that hb_language_get_default() is NOT threadsafe the first time + * it is called. See documentation for that function for details. + * + * Since: 0.9.7 + **/ +void +hb_buffer_guess_segment_properties (hb_buffer_t *buffer) +{ + buffer->guess_segment_properties (); +} + +template <typename utf_t> +static inline void +hb_buffer_add_utf (hb_buffer_t *buffer, + const typename utf_t::codepoint_t *text, + int text_length, + unsigned int item_offset, + int item_length) +{ + typedef typename utf_t::codepoint_t T; + const hb_codepoint_t replacement = buffer->replacement; + + assert ((buffer->content_type == HB_BUFFER_CONTENT_TYPE_UNICODE) || + (!buffer->len && (buffer->content_type == HB_BUFFER_CONTENT_TYPE_INVALID))); + + if (unlikely (hb_object_is_immutable (buffer))) + return; + + if (text_length == -1) + text_length = utf_t::strlen (text); + + if (item_length == -1) + item_length = text_length - item_offset; + + buffer->ensure (buffer->len + item_length * sizeof (T) / 4); + + /* If buffer is empty and pre-context provided, install it. + * This check is written this way, to make sure people can + * provide pre-context in one add_utf() call, then provide + * text in a follow-up call. See: + * + * https://bugzilla.mozilla.org/show_bug.cgi?id=801410#c13 + */ + if (!buffer->len && item_offset > 0) + { + /* Add pre-context */ + buffer->clear_context (0); + const T *prev = text + item_offset; + const T *start = text; + while (start < prev && buffer->context_len[0] < buffer->CONTEXT_LENGTH) + { + hb_codepoint_t u; + prev = utf_t::prev (prev, start, &u, replacement); + buffer->context[0][buffer->context_len[0]++] = u; + } + } + + const T *next = text + item_offset; + const T *end = next + item_length; + while (next < end) + { + hb_codepoint_t u; + const T *old_next = next; + next = utf_t::next (next, end, &u, replacement); + buffer->add (u, old_next - (const T *) text); + } + + /* Add post-context */ + buffer->clear_context (1); + end = text + text_length; + while (next < end && buffer->context_len[1] < buffer->CONTEXT_LENGTH) + { + hb_codepoint_t u; + next = utf_t::next (next, end, &u, replacement); + buffer->context[1][buffer->context_len[1]++] = u; + } + + buffer->content_type = HB_BUFFER_CONTENT_TYPE_UNICODE; +} + +/** + * hb_buffer_add_utf8: + * @buffer: an #hb_buffer_t. + * @text: (array length=text_length) (element-type uint8_t): an array of UTF-8 + * characters to append. + * @text_length: the length of the @text, or -1 if it is %NULL terminated. + * @item_offset: the offset of the first character to add to the @buffer. + * @item_length: the number of characters to add to the @buffer, or -1 for the + * end of @text (assuming it is %NULL terminated). + * + * See hb_buffer_add_codepoints(). + * + * Replaces invalid UTF-8 characters with the @buffer replacement code point, + * see hb_buffer_set_replacement_codepoint(). + * + * Since: 0.9.2 + **/ +void +hb_buffer_add_utf8 (hb_buffer_t *buffer, + const char *text, + int text_length, + unsigned int item_offset, + int item_length) +{ + hb_buffer_add_utf<hb_utf8_t> (buffer, (const uint8_t *) text, text_length, item_offset, item_length); +} + +/** + * hb_buffer_add_utf16: + * @buffer: an #hb_buffer_t. + * @text: (array length=text_length): an array of UTF-16 characters to append. + * @text_length: the length of the @text, or -1 if it is %NULL terminated. + * @item_offset: the offset of the first character to add to the @buffer. + * @item_length: the number of characters to add to the @buffer, or -1 for the + * end of @text (assuming it is %NULL terminated). + * + * See hb_buffer_add_codepoints(). + * + * Replaces invalid UTF-16 characters with the @buffer replacement code point, + * see hb_buffer_set_replacement_codepoint(). + * + * Since: 0.9.2 + **/ +void +hb_buffer_add_utf16 (hb_buffer_t *buffer, + const uint16_t *text, + int text_length, + unsigned int item_offset, + int item_length) +{ + hb_buffer_add_utf<hb_utf16_t> (buffer, text, text_length, item_offset, item_length); +} + +/** + * hb_buffer_add_utf32: + * @buffer: an #hb_buffer_t. + * @text: (array length=text_length): an array of UTF-32 characters to append. + * @text_length: the length of the @text, or -1 if it is %NULL terminated. + * @item_offset: the offset of the first character to add to the @buffer. + * @item_length: the number of characters to add to the @buffer, or -1 for the + * end of @text (assuming it is %NULL terminated). + * + * See hb_buffer_add_codepoints(). + * + * Replaces invalid UTF-32 characters with the @buffer replacement code point, + * see hb_buffer_set_replacement_codepoint(). + * + * Since: 0.9.2 + **/ +void +hb_buffer_add_utf32 (hb_buffer_t *buffer, + const uint32_t *text, + int text_length, + unsigned int item_offset, + int item_length) +{ + hb_buffer_add_utf<hb_utf32_t> (buffer, text, text_length, item_offset, item_length); +} + +/** + * hb_buffer_add_latin1: + * @buffer: an #hb_buffer_t. + * @text: (array length=text_length) (element-type uint8_t): an array of UTF-8 + * characters to append. + * @text_length: the length of the @text, or -1 if it is %NULL terminated. + * @item_offset: the offset of the first character to add to the @buffer. + * @item_length: the number of characters to add to the @buffer, or -1 for the + * end of @text (assuming it is %NULL terminated). + * + * Similar to hb_buffer_add_codepoints(), but allows only access to first 256 + * Unicode code points that can fit in 8-bit strings. + * + * <note>Has nothing to do with non-Unicode Latin-1 encoding.</note> + * + * Since: 0.9.39 + **/ +void +hb_buffer_add_latin1 (hb_buffer_t *buffer, + const uint8_t *text, + int text_length, + unsigned int item_offset, + int item_length) +{ + hb_buffer_add_utf<hb_latin1_t> (buffer, text, text_length, item_offset, item_length); +} + +/** + * hb_buffer_add_codepoints: + * @buffer: a #hb_buffer_t to append characters to. + * @text: (array length=text_length): an array of Unicode code points to append. + * @text_length: the length of the @text, or -1 if it is %NULL terminated. + * @item_offset: the offset of the first code point to add to the @buffer. + * @item_length: the number of code points to add to the @buffer, or -1 for the + * end of @text (assuming it is %NULL terminated). + * + * Appends characters from @text array to @buffer. The @item_offset is the + * position of the first character from @text that will be appended, and + * @item_length is the number of character. When shaping part of a larger text + * (e.g. a run of text from a paragraph), instead of passing just the substring + * corresponding to the run, it is preferable to pass the whole + * paragraph and specify the run start and length as @item_offset and + * @item_length, respectively, to give HarfBuzz the full context to be able, + * for example, to do cross-run Arabic shaping or properly handle combining + * marks at stat of run. + * + * This function does not check the validity of @text, it is up to the caller + * to ensure it contains a valid Unicode code points. + * + * Since: 0.9.31 + **/ +void +hb_buffer_add_codepoints (hb_buffer_t *buffer, + const hb_codepoint_t *text, + int text_length, + unsigned int item_offset, + int item_length) +{ + hb_buffer_add_utf<hb_utf32_novalidate_t> (buffer, text, text_length, item_offset, item_length); +} + + +/** + * hb_buffer_append: + * @buffer: an #hb_buffer_t. + * @source: source #hb_buffer_t. + * @start: start index into source buffer to copy. Use 0 to copy from start of buffer. + * @end: end index into source buffer to copy. Use @HB_FEATURE_GLOBAL_END to copy to end of buffer. + * + * Append (part of) contents of another buffer to this buffer. + * + * Since: 1.5.0 + **/ +HB_EXTERN void +hb_buffer_append (hb_buffer_t *buffer, + hb_buffer_t *source, + unsigned int start, + unsigned int end) +{ + assert (!buffer->have_output && !source->have_output); + assert (buffer->have_positions == source->have_positions || + !buffer->len || !source->len); + assert (buffer->content_type == source->content_type || + !buffer->len || !source->len); + + if (end > source->len) + end = source->len; + if (start > end) + start = end; + if (start == end) + return; + + if (!buffer->len) + buffer->content_type = source->content_type; + if (!buffer->have_positions && source->have_positions) + buffer->clear_positions (); + + if (buffer->len + (end - start) < buffer->len) /* Overflows. */ + { + buffer->successful = false; + return; + } + + unsigned int orig_len = buffer->len; + hb_buffer_set_length (buffer, buffer->len + (end - start)); + if (unlikely (!buffer->successful)) + return; + + memcpy (buffer->info + orig_len, source->info + start, (end - start) * sizeof (buffer->info[0])); + if (buffer->have_positions) + memcpy (buffer->pos + orig_len, source->pos + start, (end - start) * sizeof (buffer->pos[0])); +} + + +static int +compare_info_codepoint (const hb_glyph_info_t *pa, + const hb_glyph_info_t *pb) +{ + return (int) pb->codepoint - (int) pa->codepoint; +} + +static inline void +normalize_glyphs_cluster (hb_buffer_t *buffer, + unsigned int start, + unsigned int end, + bool backward) +{ + hb_glyph_position_t *pos = buffer->pos; + + /* Total cluster advance */ + hb_position_t total_x_advance = 0, total_y_advance = 0; + for (unsigned int i = start; i < end; i++) + { + total_x_advance += pos[i].x_advance; + total_y_advance += pos[i].y_advance; + } + + hb_position_t x_advance = 0, y_advance = 0; + for (unsigned int i = start; i < end; i++) + { + pos[i].x_offset += x_advance; + pos[i].y_offset += y_advance; + + x_advance += pos[i].x_advance; + y_advance += pos[i].y_advance; + + pos[i].x_advance = 0; + pos[i].y_advance = 0; + } + + if (backward) + { + /* Transfer all cluster advance to the last glyph. */ + pos[end - 1].x_advance = total_x_advance; + pos[end - 1].y_advance = total_y_advance; + + hb_stable_sort (buffer->info + start, end - start - 1, compare_info_codepoint, buffer->pos + start); + } else { + /* Transfer all cluster advance to the first glyph. */ + pos[start].x_advance += total_x_advance; + pos[start].y_advance += total_y_advance; + for (unsigned int i = start + 1; i < end; i++) { + pos[i].x_offset -= total_x_advance; + pos[i].y_offset -= total_y_advance; + } + hb_stable_sort (buffer->info + start + 1, end - start - 1, compare_info_codepoint, buffer->pos + start + 1); + } +} + +/** + * hb_buffer_normalize_glyphs: + * @buffer: an #hb_buffer_t. + * + * Reorders a glyph buffer to have canonical in-cluster glyph order / position. + * The resulting clusters should behave identical to pre-reordering clusters. + * + * <note>This has nothing to do with Unicode normalization.</note> + * + * Since: 0.9.2 + **/ +void +hb_buffer_normalize_glyphs (hb_buffer_t *buffer) +{ + assert (buffer->have_positions); + assert ((buffer->content_type == HB_BUFFER_CONTENT_TYPE_GLYPHS) || + (!buffer->len && (buffer->content_type == HB_BUFFER_CONTENT_TYPE_INVALID))); + + bool backward = HB_DIRECTION_IS_BACKWARD (buffer->props.direction); + + foreach_cluster (buffer, start, end) + normalize_glyphs_cluster (buffer, start, end, backward); +} + +void +hb_buffer_t::sort (unsigned int start, unsigned int end, int(*compar)(const hb_glyph_info_t *, const hb_glyph_info_t *)) +{ + assert (!have_positions); + for (unsigned int i = start + 1; i < end; i++) + { + unsigned int j = i; + while (j > start && compar (&info[j - 1], &info[i]) > 0) + j--; + if (i == j) + continue; + /* Move item i to occupy place for item j, shift what's in between. */ + merge_clusters (j, i + 1); + { + hb_glyph_info_t t = info[i]; + memmove (&info[j + 1], &info[j], (i - j) * sizeof (hb_glyph_info_t)); + info[j] = t; + } + } +} + + +/* + * Comparing buffers. + */ + +/** + * hb_buffer_diff: + * @buffer: a buffer. + * @reference: other buffer to compare to. + * @dottedcircle_glyph: glyph id of U+25CC DOTTED CIRCLE, or (hb_codepont_t) -1. + * @position_fuzz: allowed absolute difference in position values. + * + * If dottedcircle_glyph is (hb_codepoint_t) -1 then %HB_BUFFER_DIFF_FLAG_DOTTED_CIRCLE_PRESENT + * and %HB_BUFFER_DIFF_FLAG_NOTDEF_PRESENT are never returned. This should be used by most + * callers if just comparing two buffers is needed. + * + * Since: 1.5.0 + **/ +hb_buffer_diff_flags_t +hb_buffer_diff (hb_buffer_t *buffer, + hb_buffer_t *reference, + hb_codepoint_t dottedcircle_glyph, + unsigned int position_fuzz) +{ + if (buffer->content_type != reference->content_type && buffer->len && reference->len) + return HB_BUFFER_DIFF_FLAG_CONTENT_TYPE_MISMATCH; + + hb_buffer_diff_flags_t result = HB_BUFFER_DIFF_FLAG_EQUAL; + bool contains = dottedcircle_glyph != (hb_codepoint_t) -1; + + unsigned int count = reference->len; + + if (buffer->len != count) + { + /* + * we can't compare glyph-by-glyph, but we do want to know if there + * are .notdef or dottedcircle glyphs present in the reference buffer + */ + const hb_glyph_info_t *info = reference->info; + unsigned int i; + for (i = 0; i < count; i++) + { + if (contains && info[i].codepoint == dottedcircle_glyph) + result |= HB_BUFFER_DIFF_FLAG_DOTTED_CIRCLE_PRESENT; + if (contains && info[i].codepoint == 0) + result |= HB_BUFFER_DIFF_FLAG_NOTDEF_PRESENT; + } + result |= HB_BUFFER_DIFF_FLAG_LENGTH_MISMATCH; + return hb_buffer_diff_flags_t (result); + } + + if (!count) + return hb_buffer_diff_flags_t (result); + + const hb_glyph_info_t *buf_info = buffer->info; + const hb_glyph_info_t *ref_info = reference->info; + for (unsigned int i = 0; i < count; i++) + { + if (buf_info->codepoint != ref_info->codepoint) + result |= HB_BUFFER_DIFF_FLAG_CODEPOINT_MISMATCH; + if (buf_info->cluster != ref_info->cluster) + result |= HB_BUFFER_DIFF_FLAG_CLUSTER_MISMATCH; + if ((buf_info->mask & ~ref_info->mask & HB_GLYPH_FLAG_DEFINED)) + result |= HB_BUFFER_DIFF_FLAG_GLYPH_FLAGS_MISMATCH; + if (contains && ref_info->codepoint == dottedcircle_glyph) + result |= HB_BUFFER_DIFF_FLAG_DOTTED_CIRCLE_PRESENT; + if (contains && ref_info->codepoint == 0) + result |= HB_BUFFER_DIFF_FLAG_NOTDEF_PRESENT; + buf_info++; + ref_info++; + } + + if (buffer->content_type == HB_BUFFER_CONTENT_TYPE_GLYPHS) + { + assert (buffer->have_positions); + const hb_glyph_position_t *buf_pos = buffer->pos; + const hb_glyph_position_t *ref_pos = reference->pos; + for (unsigned int i = 0; i < count; i++) + { + if ((unsigned int) abs (buf_pos->x_advance - ref_pos->x_advance) > position_fuzz || + (unsigned int) abs (buf_pos->y_advance - ref_pos->y_advance) > position_fuzz || + (unsigned int) abs (buf_pos->x_offset - ref_pos->x_offset) > position_fuzz || + (unsigned int) abs (buf_pos->y_offset - ref_pos->y_offset) > position_fuzz) + { + result |= HB_BUFFER_DIFF_FLAG_POSITION_MISMATCH; + break; + } + buf_pos++; + ref_pos++; + } + } + + return result; +} + + +/* + * Debugging. + */ + +#ifndef HB_NO_BUFFER_MESSAGE +/** + * hb_buffer_set_message_func: + * @buffer: an #hb_buffer_t. + * @func: (closure user_data) (destroy destroy) (scope notified): + * @user_data: + * @destroy: + * + * + * + * Since: 1.1.3 + **/ +void +hb_buffer_set_message_func (hb_buffer_t *buffer, + hb_buffer_message_func_t func, + void *user_data, hb_destroy_func_t destroy) +{ + if (buffer->message_destroy) + buffer->message_destroy (buffer->message_data); + + if (func) { + buffer->message_func = func; + buffer->message_data = user_data; + buffer->message_destroy = destroy; + } else { + buffer->message_func = nullptr; + buffer->message_data = nullptr; + buffer->message_destroy = nullptr; + } +} +bool +hb_buffer_t::message_impl (hb_font_t *font, const char *fmt, va_list ap) +{ + char buf[100]; + vsnprintf (buf, sizeof (buf), fmt, ap); + return (bool) this->message_func (this, font, buf, this->message_data); +} +#endif diff --git a/thirdparty/harfbuzz/src/hb-buffer.h b/thirdparty/harfbuzz/src/hb-buffer.h new file mode 100644 index 0000000000..2f581f3c73 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-buffer.h @@ -0,0 +1,586 @@ +/* + * Copyright © 1998-2004 David Turner and Werner Lemberg + * Copyright © 2004,2007,2009 Red Hat, Inc. + * Copyright © 2011,2012 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Red Hat Author(s): Owen Taylor, Behdad Esfahbod + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_H_IN +#error "Include <hb.h> instead." +#endif + +#ifndef HB_BUFFER_H +#define HB_BUFFER_H + +#include "hb-common.h" +#include "hb-unicode.h" +#include "hb-font.h" + +HB_BEGIN_DECLS + +/** + * hb_glyph_info_t: + * @codepoint: either a Unicode code point (before shaping) or a glyph index + * (after shaping). + * @cluster: the index of the character in the original text that corresponds + * to this #hb_glyph_info_t, or whatever the client passes to + * hb_buffer_add(). More than one #hb_glyph_info_t can have the same + * @cluster value, if they resulted from the same character (e.g. one + * to many glyph substitution), and when more than one character gets + * merged in the same glyph (e.g. many to one glyph substitution) the + * #hb_glyph_info_t will have the smallest cluster value of them. + * By default some characters are merged into the same cluster + * (e.g. combining marks have the same cluster as their bases) + * even if they are separate glyphs, hb_buffer_set_cluster_level() + * allow selecting more fine-grained cluster handling. + * + * The #hb_glyph_info_t is the structure that holds information about the + * glyphs and their relation to input text. + */ +typedef struct hb_glyph_info_t +{ + hb_codepoint_t codepoint; + /*< private >*/ + hb_mask_t mask; + /*< public >*/ + uint32_t cluster; + + /*< private >*/ + hb_var_int_t var1; + hb_var_int_t var2; +} hb_glyph_info_t; + +/** + * hb_glyph_flags_t: + * @HB_GLYPH_FLAG_UNSAFE_TO_BREAK: Indicates that if input text is broken at the + * beginning of the cluster this glyph is part of, + * then both sides need to be re-shaped, as the + * result might be different. On the flip side, + * it means that when this flag is not present, + * then it's safe to break the glyph-run at the + * beginning of this cluster, and the two sides + * represent the exact same result one would get + * if breaking input text at the beginning of + * this cluster and shaping the two sides + * separately. This can be used to optimize + * paragraph layout, by avoiding re-shaping + * of each line after line-breaking, or limiting + * the reshaping to a small piece around the + * breaking point only. + * @HB_GLYPH_FLAG_DEFINED: All the currently defined flags. + * + * Since: 1.5.0 + */ +typedef enum { /*< flags >*/ + HB_GLYPH_FLAG_UNSAFE_TO_BREAK = 0x00000001, + + HB_GLYPH_FLAG_DEFINED = 0x00000001 /* OR of all defined flags */ +} hb_glyph_flags_t; + +HB_EXTERN hb_glyph_flags_t +hb_glyph_info_get_glyph_flags (const hb_glyph_info_t *info); + +#define hb_glyph_info_get_glyph_flags(info) \ + ((hb_glyph_flags_t) ((unsigned int) (info)->mask & HB_GLYPH_FLAG_DEFINED)) + + +/** + * hb_glyph_position_t: + * @x_advance: how much the line advances after drawing this glyph when setting + * text in horizontal direction. + * @y_advance: how much the line advances after drawing this glyph when setting + * text in vertical direction. + * @x_offset: how much the glyph moves on the X-axis before drawing it, this + * should not affect how much the line advances. + * @y_offset: how much the glyph moves on the Y-axis before drawing it, this + * should not affect how much the line advances. + * + * The #hb_glyph_position_t is the structure that holds the positions of the + * glyph in both horizontal and vertical directions. All positions in + * #hb_glyph_position_t are relative to the current point. + * + */ +typedef struct hb_glyph_position_t { + hb_position_t x_advance; + hb_position_t y_advance; + hb_position_t x_offset; + hb_position_t y_offset; + + /*< private >*/ + hb_var_int_t var; +} hb_glyph_position_t; + +/** + * hb_segment_properties_t: + * @direction: the #hb_direction_t of the buffer, see hb_buffer_set_direction(). + * @script: the #hb_script_t of the buffer, see hb_buffer_set_script(). + * @language: the #hb_language_t of the buffer, see hb_buffer_set_language(). + * + * The structure that holds various text properties of an #hb_buffer_t. Can be + * set and retrieved using hb_buffer_set_segment_properties() and + * hb_buffer_get_segment_properties(), respectively. + */ +typedef struct hb_segment_properties_t { + hb_direction_t direction; + hb_script_t script; + hb_language_t language; + /*< private >*/ + void *reserved1; + void *reserved2; +} hb_segment_properties_t; + +#define HB_SEGMENT_PROPERTIES_DEFAULT {HB_DIRECTION_INVALID, \ + HB_SCRIPT_INVALID, \ + HB_LANGUAGE_INVALID, \ + (void *) 0, \ + (void *) 0} + +HB_EXTERN hb_bool_t +hb_segment_properties_equal (const hb_segment_properties_t *a, + const hb_segment_properties_t *b); + +HB_EXTERN unsigned int +hb_segment_properties_hash (const hb_segment_properties_t *p); + + + +/** + * hb_buffer_t: + * + * The main structure holding the input text and its properties before shaping, + * and output glyphs and their information after shaping. + */ + +typedef struct hb_buffer_t hb_buffer_t; + +HB_EXTERN hb_buffer_t * +hb_buffer_create (void); + +HB_EXTERN hb_buffer_t * +hb_buffer_get_empty (void); + +HB_EXTERN hb_buffer_t * +hb_buffer_reference (hb_buffer_t *buffer); + +HB_EXTERN void +hb_buffer_destroy (hb_buffer_t *buffer); + +HB_EXTERN hb_bool_t +hb_buffer_set_user_data (hb_buffer_t *buffer, + hb_user_data_key_t *key, + void * data, + hb_destroy_func_t destroy, + hb_bool_t replace); + +HB_EXTERN void * +hb_buffer_get_user_data (hb_buffer_t *buffer, + hb_user_data_key_t *key); + + +/** + * hb_buffer_content_type_t: + * @HB_BUFFER_CONTENT_TYPE_INVALID: Initial value for new buffer. + * @HB_BUFFER_CONTENT_TYPE_UNICODE: The buffer contains input characters (before shaping). + * @HB_BUFFER_CONTENT_TYPE_GLYPHS: The buffer contains output glyphs (after shaping). + */ +typedef enum { + HB_BUFFER_CONTENT_TYPE_INVALID = 0, + HB_BUFFER_CONTENT_TYPE_UNICODE, + HB_BUFFER_CONTENT_TYPE_GLYPHS +} hb_buffer_content_type_t; + +HB_EXTERN void +hb_buffer_set_content_type (hb_buffer_t *buffer, + hb_buffer_content_type_t content_type); + +HB_EXTERN hb_buffer_content_type_t +hb_buffer_get_content_type (hb_buffer_t *buffer); + + +HB_EXTERN void +hb_buffer_set_unicode_funcs (hb_buffer_t *buffer, + hb_unicode_funcs_t *unicode_funcs); + +HB_EXTERN hb_unicode_funcs_t * +hb_buffer_get_unicode_funcs (hb_buffer_t *buffer); + +HB_EXTERN void +hb_buffer_set_direction (hb_buffer_t *buffer, + hb_direction_t direction); + +HB_EXTERN hb_direction_t +hb_buffer_get_direction (hb_buffer_t *buffer); + +HB_EXTERN void +hb_buffer_set_script (hb_buffer_t *buffer, + hb_script_t script); + +HB_EXTERN hb_script_t +hb_buffer_get_script (hb_buffer_t *buffer); + +HB_EXTERN void +hb_buffer_set_language (hb_buffer_t *buffer, + hb_language_t language); + + +HB_EXTERN hb_language_t +hb_buffer_get_language (hb_buffer_t *buffer); + +HB_EXTERN void +hb_buffer_set_segment_properties (hb_buffer_t *buffer, + const hb_segment_properties_t *props); + +HB_EXTERN void +hb_buffer_get_segment_properties (hb_buffer_t *buffer, + hb_segment_properties_t *props); + +HB_EXTERN void +hb_buffer_guess_segment_properties (hb_buffer_t *buffer); + + +/** + * hb_buffer_flags_t: + * @HB_BUFFER_FLAG_DEFAULT: the default buffer flag. + * @HB_BUFFER_FLAG_BOT: flag indicating that special handling of the beginning + * of text paragraph can be applied to this buffer. Should usually + * be set, unless you are passing to the buffer only part + * of the text without the full context. + * @HB_BUFFER_FLAG_EOT: flag indicating that special handling of the end of text + * paragraph can be applied to this buffer, similar to + * @HB_BUFFER_FLAG_BOT. + * @HB_BUFFER_FLAG_PRESERVE_DEFAULT_IGNORABLES: + * flag indication that character with Default_Ignorable + * Unicode property should use the corresponding glyph + * from the font, instead of hiding them (done by + * replacing them with the space glyph and zeroing the + * advance width.) This flag takes precedence over + * @HB_BUFFER_FLAG_REMOVE_DEFAULT_IGNORABLES. + * @HB_BUFFER_FLAG_REMOVE_DEFAULT_IGNORABLES: + * flag indication that character with Default_Ignorable + * Unicode property should be removed from glyph string + * instead of hiding them (done by replacing them with the + * space glyph and zeroing the advance width.) + * @HB_BUFFER_FLAG_PRESERVE_DEFAULT_IGNORABLES takes + * precedence over this flag. Since: 1.8.0 + * @HB_BUFFER_FLAG_DO_NOT_INSERT_DOTTED_CIRCLE: + * flag indicating that a dotted circle should + * not be inserted in the rendering of incorrect + * character sequences (such at <0905 093E>). Since: 2.4 + * + * Since: 0.9.20 + */ +typedef enum { /*< flags >*/ + HB_BUFFER_FLAG_DEFAULT = 0x00000000u, + HB_BUFFER_FLAG_BOT = 0x00000001u, /* Beginning-of-text */ + HB_BUFFER_FLAG_EOT = 0x00000002u, /* End-of-text */ + HB_BUFFER_FLAG_PRESERVE_DEFAULT_IGNORABLES = 0x00000004u, + HB_BUFFER_FLAG_REMOVE_DEFAULT_IGNORABLES = 0x00000008u, + HB_BUFFER_FLAG_DO_NOT_INSERT_DOTTED_CIRCLE = 0x00000010u +} hb_buffer_flags_t; + +HB_EXTERN void +hb_buffer_set_flags (hb_buffer_t *buffer, + hb_buffer_flags_t flags); + +HB_EXTERN hb_buffer_flags_t +hb_buffer_get_flags (hb_buffer_t *buffer); + +/** + * hb_buffer_cluster_level_t: + * @HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES: Return cluster values grouped by graphemes into + * monotone order. + * @HB_BUFFER_CLUSTER_LEVEL_MONOTONE_CHARACTERS: Return cluster values grouped into monotone order. + * @HB_BUFFER_CLUSTER_LEVEL_CHARACTERS: Don't group cluster values. + * @HB_BUFFER_CLUSTER_LEVEL_DEFAULT: Default cluster level, + * equal to @HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES. + * + * Since: 0.9.42 + */ +typedef enum { + HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES = 0, + HB_BUFFER_CLUSTER_LEVEL_MONOTONE_CHARACTERS = 1, + HB_BUFFER_CLUSTER_LEVEL_CHARACTERS = 2, + HB_BUFFER_CLUSTER_LEVEL_DEFAULT = HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES +} hb_buffer_cluster_level_t; + +HB_EXTERN void +hb_buffer_set_cluster_level (hb_buffer_t *buffer, + hb_buffer_cluster_level_t cluster_level); + +HB_EXTERN hb_buffer_cluster_level_t +hb_buffer_get_cluster_level (hb_buffer_t *buffer); + +/** + * HB_BUFFER_REPLACEMENT_CODEPOINT_DEFAULT: + * + * The default code point for replacing invalid characters in a given encoding. + * Set to U+FFFD REPLACEMENT CHARACTER. + * + * Since: 0.9.31 + */ +#define HB_BUFFER_REPLACEMENT_CODEPOINT_DEFAULT 0xFFFDu + +HB_EXTERN void +hb_buffer_set_replacement_codepoint (hb_buffer_t *buffer, + hb_codepoint_t replacement); + +HB_EXTERN hb_codepoint_t +hb_buffer_get_replacement_codepoint (hb_buffer_t *buffer); + +HB_EXTERN void +hb_buffer_set_invisible_glyph (hb_buffer_t *buffer, + hb_codepoint_t invisible); + +HB_EXTERN hb_codepoint_t +hb_buffer_get_invisible_glyph (hb_buffer_t *buffer); + + +HB_EXTERN void +hb_buffer_reset (hb_buffer_t *buffer); + +HB_EXTERN void +hb_buffer_clear_contents (hb_buffer_t *buffer); + +HB_EXTERN hb_bool_t +hb_buffer_pre_allocate (hb_buffer_t *buffer, + unsigned int size); + + +HB_EXTERN hb_bool_t +hb_buffer_allocation_successful (hb_buffer_t *buffer); + +HB_EXTERN void +hb_buffer_reverse (hb_buffer_t *buffer); + +HB_EXTERN void +hb_buffer_reverse_range (hb_buffer_t *buffer, + unsigned int start, unsigned int end); + +HB_EXTERN void +hb_buffer_reverse_clusters (hb_buffer_t *buffer); + + +/* Filling the buffer in */ + +HB_EXTERN void +hb_buffer_add (hb_buffer_t *buffer, + hb_codepoint_t codepoint, + unsigned int cluster); + +HB_EXTERN void +hb_buffer_add_utf8 (hb_buffer_t *buffer, + const char *text, + int text_length, + unsigned int item_offset, + int item_length); + +HB_EXTERN void +hb_buffer_add_utf16 (hb_buffer_t *buffer, + const uint16_t *text, + int text_length, + unsigned int item_offset, + int item_length); + +HB_EXTERN void +hb_buffer_add_utf32 (hb_buffer_t *buffer, + const uint32_t *text, + int text_length, + unsigned int item_offset, + int item_length); + +HB_EXTERN void +hb_buffer_add_latin1 (hb_buffer_t *buffer, + const uint8_t *text, + int text_length, + unsigned int item_offset, + int item_length); + +HB_EXTERN void +hb_buffer_add_codepoints (hb_buffer_t *buffer, + const hb_codepoint_t *text, + int text_length, + unsigned int item_offset, + int item_length); + +HB_EXTERN void +hb_buffer_append (hb_buffer_t *buffer, + hb_buffer_t *source, + unsigned int start, + unsigned int end); + +HB_EXTERN hb_bool_t +hb_buffer_set_length (hb_buffer_t *buffer, + unsigned int length); + +HB_EXTERN unsigned int +hb_buffer_get_length (hb_buffer_t *buffer); + +/* Getting glyphs out of the buffer */ + +HB_EXTERN hb_glyph_info_t * +hb_buffer_get_glyph_infos (hb_buffer_t *buffer, + unsigned int *length); + +HB_EXTERN hb_glyph_position_t * +hb_buffer_get_glyph_positions (hb_buffer_t *buffer, + unsigned int *length); + + +HB_EXTERN void +hb_buffer_normalize_glyphs (hb_buffer_t *buffer); + + +/* + * Serialize + */ + +/** + * hb_buffer_serialize_flags_t: + * @HB_BUFFER_SERIALIZE_FLAG_DEFAULT: serialize glyph names, clusters and positions. + * @HB_BUFFER_SERIALIZE_FLAG_NO_CLUSTERS: do not serialize glyph cluster. + * @HB_BUFFER_SERIALIZE_FLAG_NO_POSITIONS: do not serialize glyph position information. + * @HB_BUFFER_SERIALIZE_FLAG_NO_GLYPH_NAMES: do no serialize glyph name. + * @HB_BUFFER_SERIALIZE_FLAG_GLYPH_EXTENTS: serialize glyph extents. + * @HB_BUFFER_SERIALIZE_FLAG_GLYPH_FLAGS: serialize glyph flags. Since: 1.5.0 + * @HB_BUFFER_SERIALIZE_FLAG_NO_ADVANCES: do not serialize glyph advances, + * glyph offsets will reflect absolute glyph positions. Since: 1.8.0 + * + * Flags that control what glyph information are serialized in hb_buffer_serialize_glyphs(). + * + * Since: 0.9.20 + */ +typedef enum { /*< flags >*/ + HB_BUFFER_SERIALIZE_FLAG_DEFAULT = 0x00000000u, + HB_BUFFER_SERIALIZE_FLAG_NO_CLUSTERS = 0x00000001u, + HB_BUFFER_SERIALIZE_FLAG_NO_POSITIONS = 0x00000002u, + HB_BUFFER_SERIALIZE_FLAG_NO_GLYPH_NAMES = 0x00000004u, + HB_BUFFER_SERIALIZE_FLAG_GLYPH_EXTENTS = 0x00000008u, + HB_BUFFER_SERIALIZE_FLAG_GLYPH_FLAGS = 0x00000010u, + HB_BUFFER_SERIALIZE_FLAG_NO_ADVANCES = 0x00000020u +} hb_buffer_serialize_flags_t; + +/** + * hb_buffer_serialize_format_t: + * @HB_BUFFER_SERIALIZE_FORMAT_TEXT: a human-readable, plain text format. + * @HB_BUFFER_SERIALIZE_FORMAT_JSON: a machine-readable JSON format. + * @HB_BUFFER_SERIALIZE_FORMAT_INVALID: invalid format. + * + * The buffer serialization and de-serialization format used in + * hb_buffer_serialize_glyphs() and hb_buffer_deserialize_glyphs(). + * + * Since: 0.9.2 + */ +typedef enum { + HB_BUFFER_SERIALIZE_FORMAT_TEXT = HB_TAG('T','E','X','T'), + HB_BUFFER_SERIALIZE_FORMAT_JSON = HB_TAG('J','S','O','N'), + HB_BUFFER_SERIALIZE_FORMAT_INVALID = HB_TAG_NONE +} hb_buffer_serialize_format_t; + +HB_EXTERN hb_buffer_serialize_format_t +hb_buffer_serialize_format_from_string (const char *str, int len); + +HB_EXTERN const char * +hb_buffer_serialize_format_to_string (hb_buffer_serialize_format_t format); + +HB_EXTERN const char ** +hb_buffer_serialize_list_formats (void); + +HB_EXTERN unsigned int +hb_buffer_serialize_glyphs (hb_buffer_t *buffer, + unsigned int start, + unsigned int end, + char *buf, + unsigned int buf_size, + unsigned int *buf_consumed, + hb_font_t *font, + hb_buffer_serialize_format_t format, + hb_buffer_serialize_flags_t flags); + +HB_EXTERN hb_bool_t +hb_buffer_deserialize_glyphs (hb_buffer_t *buffer, + const char *buf, + int buf_len, + const char **end_ptr, + hb_font_t *font, + hb_buffer_serialize_format_t format); + + +/* + * Compare buffers + */ + +typedef enum { /*< flags >*/ + HB_BUFFER_DIFF_FLAG_EQUAL = 0x0000, + + /* Buffers with different content_type cannot be meaningfully compared + * in any further detail. */ + HB_BUFFER_DIFF_FLAG_CONTENT_TYPE_MISMATCH = 0x0001, + + /* For buffers with differing length, the per-glyph comparison is not + * attempted, though we do still scan reference for dottedcircle / .notdef + * glyphs. */ + HB_BUFFER_DIFF_FLAG_LENGTH_MISMATCH = 0x0002, + + /* We want to know if dottedcircle / .notdef glyphs are present in the + * reference, as we may not care so much about other differences in this + * case. */ + HB_BUFFER_DIFF_FLAG_NOTDEF_PRESENT = 0x0004, + HB_BUFFER_DIFF_FLAG_DOTTED_CIRCLE_PRESENT = 0x0008, + + /* If the buffers have the same length, we compare them glyph-by-glyph + * and report which aspect(s) of the glyph info/position are different. */ + HB_BUFFER_DIFF_FLAG_CODEPOINT_MISMATCH = 0x0010, + HB_BUFFER_DIFF_FLAG_CLUSTER_MISMATCH = 0x0020, + HB_BUFFER_DIFF_FLAG_GLYPH_FLAGS_MISMATCH = 0x0040, + HB_BUFFER_DIFF_FLAG_POSITION_MISMATCH = 0x0080 + +} hb_buffer_diff_flags_t; + +/* Compare the contents of two buffers, report types of differences. */ +HB_EXTERN hb_buffer_diff_flags_t +hb_buffer_diff (hb_buffer_t *buffer, + hb_buffer_t *reference, + hb_codepoint_t dottedcircle_glyph, + unsigned int position_fuzz); + + +/* + * Debugging. + */ + +typedef hb_bool_t (*hb_buffer_message_func_t) (hb_buffer_t *buffer, + hb_font_t *font, + const char *message, + void *user_data); + +HB_EXTERN void +hb_buffer_set_message_func (hb_buffer_t *buffer, + hb_buffer_message_func_t func, + void *user_data, hb_destroy_func_t destroy); + + +HB_END_DECLS + +#endif /* HB_BUFFER_H */ diff --git a/thirdparty/harfbuzz/src/hb-buffer.hh b/thirdparty/harfbuzz/src/hb-buffer.hh new file mode 100644 index 0000000000..3420ba434a --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-buffer.hh @@ -0,0 +1,451 @@ +/* + * Copyright © 1998-2004 David Turner and Werner Lemberg + * Copyright © 2004,2007,2009,2010 Red Hat, Inc. + * Copyright © 2011,2012 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Red Hat Author(s): Owen Taylor, Behdad Esfahbod + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_BUFFER_HH +#define HB_BUFFER_HH + +#include "hb.hh" +#include "hb-unicode.hh" + + +#ifndef HB_BUFFER_MAX_LEN_FACTOR +#define HB_BUFFER_MAX_LEN_FACTOR 32 +#endif +#ifndef HB_BUFFER_MAX_LEN_MIN +#define HB_BUFFER_MAX_LEN_MIN 8192 +#endif +#ifndef HB_BUFFER_MAX_LEN_DEFAULT +#define HB_BUFFER_MAX_LEN_DEFAULT 0x3FFFFFFF /* Shaping more than a billion chars? Let us know! */ +#endif + +#ifndef HB_BUFFER_MAX_OPS_FACTOR +#define HB_BUFFER_MAX_OPS_FACTOR 64 +#endif +#ifndef HB_BUFFER_MAX_OPS_MIN +#define HB_BUFFER_MAX_OPS_MIN 1024 +#endif +#ifndef HB_BUFFER_MAX_OPS_DEFAULT +#define HB_BUFFER_MAX_OPS_DEFAULT 0x1FFFFFFF /* Shaping more than a billion operations? Let us know! */ +#endif + +static_assert ((sizeof (hb_glyph_info_t) == 20), ""); +static_assert ((sizeof (hb_glyph_info_t) == sizeof (hb_glyph_position_t)), ""); + +HB_MARK_AS_FLAG_T (hb_buffer_flags_t); +HB_MARK_AS_FLAG_T (hb_buffer_serialize_flags_t); +HB_MARK_AS_FLAG_T (hb_buffer_diff_flags_t); + +enum hb_buffer_scratch_flags_t { + HB_BUFFER_SCRATCH_FLAG_DEFAULT = 0x00000000u, + HB_BUFFER_SCRATCH_FLAG_HAS_NON_ASCII = 0x00000001u, + HB_BUFFER_SCRATCH_FLAG_HAS_DEFAULT_IGNORABLES = 0x00000002u, + HB_BUFFER_SCRATCH_FLAG_HAS_SPACE_FALLBACK = 0x00000004u, + HB_BUFFER_SCRATCH_FLAG_HAS_GPOS_ATTACHMENT = 0x00000008u, + HB_BUFFER_SCRATCH_FLAG_HAS_UNSAFE_TO_BREAK = 0x00000010u, + HB_BUFFER_SCRATCH_FLAG_HAS_CGJ = 0x00000020u, + + /* Reserved for complex shapers' internal use. */ + HB_BUFFER_SCRATCH_FLAG_COMPLEX0 = 0x01000000u, + HB_BUFFER_SCRATCH_FLAG_COMPLEX1 = 0x02000000u, + HB_BUFFER_SCRATCH_FLAG_COMPLEX2 = 0x04000000u, + HB_BUFFER_SCRATCH_FLAG_COMPLEX3 = 0x08000000u, +}; +HB_MARK_AS_FLAG_T (hb_buffer_scratch_flags_t); + + +/* + * hb_buffer_t + */ + +struct hb_buffer_t +{ + hb_object_header_t header; + + /* Information about how the text in the buffer should be treated */ + hb_unicode_funcs_t *unicode; /* Unicode functions */ + hb_buffer_flags_t flags; /* BOT / EOT / etc. */ + hb_buffer_cluster_level_t cluster_level; + hb_codepoint_t replacement; /* U+FFFD or something else. */ + hb_codepoint_t invisible; /* 0 or something else. */ + hb_buffer_scratch_flags_t scratch_flags; /* Have space-fallback, etc. */ + unsigned int max_len; /* Maximum allowed len. */ + int max_ops; /* Maximum allowed operations. */ + + /* Buffer contents */ + hb_buffer_content_type_t content_type; + hb_segment_properties_t props; /* Script, language, direction */ + + bool successful; /* Allocations successful */ + bool have_output; /* Whether we have an output buffer going on */ + bool have_positions; /* Whether we have positions */ + + unsigned int idx; /* Cursor into ->info and ->pos arrays */ + unsigned int len; /* Length of ->info and ->pos arrays */ + unsigned int out_len; /* Length of ->out array if have_output */ + + unsigned int allocated; /* Length of allocated arrays */ + hb_glyph_info_t *info; + hb_glyph_info_t *out_info; + hb_glyph_position_t *pos; + + unsigned int serial; + + /* Text before / after the main buffer contents. + * Always in Unicode, and ordered outward. + * Index 0 is for "pre-context", 1 for "post-context". */ + static constexpr unsigned CONTEXT_LENGTH = 5u; + hb_codepoint_t context[2][CONTEXT_LENGTH]; + unsigned int context_len[2]; + + /* Debugging API */ +#ifndef HB_NO_BUFFER_MESSAGE + hb_buffer_message_func_t message_func; + void *message_data; + hb_destroy_func_t message_destroy; +#endif + + /* Internal debugging. */ + /* The bits here reflect current allocations of the bytes in glyph_info_t's var1 and var2. */ +#ifndef HB_NDEBUG + uint8_t allocated_var_bits; +#endif + + + /* Methods */ + + bool in_error () const { return !successful; } + + void allocate_var (unsigned int start, unsigned int count) + { +#ifndef HB_NDEBUG + unsigned int end = start + count; + assert (end <= 8); + unsigned int bits = (1u<<end) - (1u<<start); + assert (0 == (allocated_var_bits & bits)); + allocated_var_bits |= bits; +#endif + } + void deallocate_var (unsigned int start, unsigned int count) + { +#ifndef HB_NDEBUG + unsigned int end = start + count; + assert (end <= 8); + unsigned int bits = (1u<<end) - (1u<<start); + assert (bits == (allocated_var_bits & bits)); + allocated_var_bits &= ~bits; +#endif + } + void assert_var (unsigned int start, unsigned int count) + { +#ifndef HB_NDEBUG + unsigned int end = start + count; + assert (end <= 8); + unsigned int bits = (1u<<end) - (1u<<start); + assert (bits == (allocated_var_bits & bits)); +#endif + } + void deallocate_var_all () + { +#ifndef HB_NDEBUG + allocated_var_bits = 0; +#endif + } + + hb_glyph_info_t &cur (unsigned int i = 0) { return info[idx + i]; } + hb_glyph_info_t cur (unsigned int i = 0) const { return info[idx + i]; } + + hb_glyph_position_t &cur_pos (unsigned int i = 0) { return pos[idx + i]; } + hb_glyph_position_t cur_pos (unsigned int i = 0) const { return pos[idx + i]; } + + hb_glyph_info_t &prev () { return out_info[out_len ? out_len - 1 : 0]; } + hb_glyph_info_t prev () const { return out_info[out_len ? out_len - 1 : 0]; } + + bool has_separate_output () const { return info != out_info; } + + + HB_INTERNAL void reset (); + HB_INTERNAL void clear (); + + unsigned int backtrack_len () const { return have_output? out_len : idx; } + unsigned int lookahead_len () const { return len - idx; } + unsigned int next_serial () { return serial++; } + + HB_INTERNAL void add (hb_codepoint_t codepoint, + unsigned int cluster); + HB_INTERNAL void add_info (const hb_glyph_info_t &glyph_info); + + HB_INTERNAL void reverse_range (unsigned int start, unsigned int end); + HB_INTERNAL void reverse (); + HB_INTERNAL void reverse_clusters (); + HB_INTERNAL void guess_segment_properties (); + + HB_INTERNAL void swap_buffers (); + HB_INTERNAL void remove_output (); + HB_INTERNAL void clear_output (); + HB_INTERNAL void clear_positions (); + + HB_INTERNAL void replace_glyphs (unsigned int num_in, + unsigned int num_out, + const hb_codepoint_t *glyph_data); + + void replace_glyph (hb_codepoint_t glyph_index) + { + if (unlikely (out_info != info || out_len != idx)) { + if (unlikely (!make_room_for (1, 1))) return; + out_info[out_len] = info[idx]; + } + out_info[out_len].codepoint = glyph_index; + + idx++; + out_len++; + } + /* Makes a copy of the glyph at idx to output and replace glyph_index */ + hb_glyph_info_t & output_glyph (hb_codepoint_t glyph_index) + { + if (unlikely (!make_room_for (0, 1))) return Crap (hb_glyph_info_t); + + if (unlikely (idx == len && !out_len)) + return Crap (hb_glyph_info_t); + + out_info[out_len] = idx < len ? info[idx] : out_info[out_len - 1]; + out_info[out_len].codepoint = glyph_index; + + out_len++; + + return out_info[out_len - 1]; + } + void output_info (const hb_glyph_info_t &glyph_info) + { + if (unlikely (!make_room_for (0, 1))) return; + + out_info[out_len] = glyph_info; + + out_len++; + } + /* Copies glyph at idx to output but doesn't advance idx */ + void copy_glyph () + { + if (unlikely (!make_room_for (0, 1))) return; + + out_info[out_len] = info[idx]; + + out_len++; + } + /* Copies glyph at idx to output and advance idx. + * If there's no output, just advance idx. */ + void + next_glyph () + { + if (have_output) + { + if (out_info != info || out_len != idx) + { + if (unlikely (!make_room_for (1, 1))) return; + out_info[out_len] = info[idx]; + } + out_len++; + } + + idx++; + } + /* Copies n glyphs at idx to output and advance idx. + * If there's no output, just advance idx. */ + void + next_glyphs (unsigned int n) + { + if (have_output) + { + if (out_info != info || out_len != idx) + { + if (unlikely (!make_room_for (n, n))) return; + memmove (out_info + out_len, info + idx, n * sizeof (out_info[0])); + } + out_len += n; + } + + idx += n; + } + /* Advance idx without copying to output. */ + void skip_glyph () { idx++; } + void reset_masks (hb_mask_t mask) + { + for (unsigned int j = 0; j < len; j++) + info[j].mask = mask; + } + void add_masks (hb_mask_t mask) + { + for (unsigned int j = 0; j < len; j++) + info[j].mask |= mask; + } + HB_INTERNAL void set_masks (hb_mask_t value, hb_mask_t mask, + unsigned int cluster_start, unsigned int cluster_end); + + void merge_clusters (unsigned int start, unsigned int end) + { + if (end - start < 2) + return; + merge_clusters_impl (start, end); + } + HB_INTERNAL void merge_clusters_impl (unsigned int start, unsigned int end); + HB_INTERNAL void merge_out_clusters (unsigned int start, unsigned int end); + /* Merge clusters for deleting current glyph, and skip it. */ + HB_INTERNAL void delete_glyph (); + + void unsafe_to_break (unsigned int start, + unsigned int end) + { + if (end - start < 2) + return; + unsafe_to_break_impl (start, end); + } + HB_INTERNAL void unsafe_to_break_impl (unsigned int start, unsigned int end); + HB_INTERNAL void unsafe_to_break_from_outbuffer (unsigned int start, unsigned int end); + + + /* Internal methods */ + HB_INTERNAL bool move_to (unsigned int i); /* i is output-buffer index. */ + + HB_INTERNAL bool enlarge (unsigned int size); + + bool ensure (unsigned int size) + { return likely (!size || size < allocated) ? true : enlarge (size); } + + bool ensure_inplace (unsigned int size) + { return likely (!size || size < allocated); } + + HB_INTERNAL bool make_room_for (unsigned int num_in, unsigned int num_out); + HB_INTERNAL bool shift_forward (unsigned int count); + + typedef long scratch_buffer_t; + HB_INTERNAL scratch_buffer_t *get_scratch_buffer (unsigned int *size); + + void clear_context (unsigned int side) { context_len[side] = 0; } + + HB_INTERNAL void sort (unsigned int start, unsigned int end, int(*compar)(const hb_glyph_info_t *, const hb_glyph_info_t *)); + + bool messaging () + { +#ifdef HB_NO_BUFFER_MESSAGE + return false; +#else + return unlikely (message_func); +#endif + } + bool message (hb_font_t *font, const char *fmt, ...) HB_PRINTF_FUNC(3, 4) + { +#ifdef HB_NO_BUFFER_MESSAGE + return true; +#else + if (!messaging ()) + return true; + va_list ap; + va_start (ap, fmt); + bool ret = message_impl (font, fmt, ap); + va_end (ap); + return ret; +#endif + } + HB_INTERNAL bool message_impl (hb_font_t *font, const char *fmt, va_list ap) HB_PRINTF_FUNC(3, 0); + + static void + set_cluster (hb_glyph_info_t &inf, unsigned int cluster, unsigned int mask = 0) + { + if (inf.cluster != cluster) + { + if (mask & HB_GLYPH_FLAG_UNSAFE_TO_BREAK) + inf.mask |= HB_GLYPH_FLAG_UNSAFE_TO_BREAK; + else + inf.mask &= ~HB_GLYPH_FLAG_UNSAFE_TO_BREAK; + } + inf.cluster = cluster; + } + + unsigned int + _unsafe_to_break_find_min_cluster (const hb_glyph_info_t *infos, + unsigned int start, unsigned int end, + unsigned int cluster) const + { + for (unsigned int i = start; i < end; i++) + cluster = hb_min (cluster, infos[i].cluster); + return cluster; + } + void + _unsafe_to_break_set_mask (hb_glyph_info_t *infos, + unsigned int start, unsigned int end, + unsigned int cluster) + { + for (unsigned int i = start; i < end; i++) + if (cluster != infos[i].cluster) + { + scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_UNSAFE_TO_BREAK; + infos[i].mask |= HB_GLYPH_FLAG_UNSAFE_TO_BREAK; + } + } + + void unsafe_to_break_all () { unsafe_to_break_impl (0, len); } + void safe_to_break_all () + { + for (unsigned int i = 0; i < len; i++) + info[i].mask &= ~HB_GLYPH_FLAG_UNSAFE_TO_BREAK; + } +}; +DECLARE_NULL_INSTANCE (hb_buffer_t); + + +/* Loop over clusters. Duplicated in foreach_syllable(). */ +#define foreach_cluster(buffer, start, end) \ + for (unsigned int \ + _count = buffer->len, \ + start = 0, end = _count ? _next_cluster (buffer, 0) : 0; \ + start < _count; \ + start = end, end = _next_cluster (buffer, start)) + +static inline unsigned int +_next_cluster (hb_buffer_t *buffer, unsigned int start) +{ + hb_glyph_info_t *info = buffer->info; + unsigned int count = buffer->len; + + unsigned int cluster = info[start].cluster; + while (++start < count && cluster == info[start].cluster) + ; + + return start; +} + + +#define HB_BUFFER_XALLOCATE_VAR(b, func, var) \ + b->func (offsetof (hb_glyph_info_t, var) - offsetof(hb_glyph_info_t, var1), \ + sizeof (b->info[0].var)) +#define HB_BUFFER_ALLOCATE_VAR(b, var) HB_BUFFER_XALLOCATE_VAR (b, allocate_var, var ()) +#define HB_BUFFER_DEALLOCATE_VAR(b, var) HB_BUFFER_XALLOCATE_VAR (b, deallocate_var, var ()) +#define HB_BUFFER_ASSERT_VAR(b, var) HB_BUFFER_XALLOCATE_VAR (b, assert_var, var ()) + + +#endif /* HB_BUFFER_HH */ diff --git a/thirdparty/harfbuzz/src/hb-cache.hh b/thirdparty/harfbuzz/src/hb-cache.hh new file mode 100644 index 0000000000..bf26d96be4 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-cache.hh @@ -0,0 +1,80 @@ +/* + * Copyright © 2012 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_CACHE_HH +#define HB_CACHE_HH + +#include "hb.hh" + + +/* Implements a lock-free cache for int->int functions. */ + +template <unsigned int key_bits, unsigned int value_bits, unsigned int cache_bits> +struct hb_cache_t +{ + static_assert ((key_bits >= cache_bits), ""); + static_assert ((key_bits + value_bits - cache_bits <= 8 * sizeof (hb_atomic_int_t)), ""); + static_assert (sizeof (hb_atomic_int_t) == sizeof (unsigned int), ""); + + void init () { clear (); } + void fini () {} + + void clear () + { + for (unsigned i = 0; i < ARRAY_LENGTH (values); i++) + values[i].set_relaxed (-1); + } + + bool get (unsigned int key, unsigned int *value) const + { + unsigned int k = key & ((1u<<cache_bits)-1); + unsigned int v = values[k].get_relaxed (); + if ((key_bits + value_bits - cache_bits == 8 * sizeof (hb_atomic_int_t) && v == (unsigned int) -1) || + (v >> value_bits) != (key >> cache_bits)) + return false; + *value = v & ((1u<<value_bits)-1); + return true; + } + + bool set (unsigned int key, unsigned int value) + { + if (unlikely ((key >> key_bits) || (value >> value_bits))) + return false; /* Overflows */ + unsigned int k = key & ((1u<<cache_bits)-1); + unsigned int v = ((key>>cache_bits)<<value_bits) | value; + values[k].set_relaxed (v); + return true; + } + + private: + hb_atomic_int_t values[1u<<cache_bits]; +}; + +typedef hb_cache_t<21, 16, 8> hb_cmap_cache_t; +typedef hb_cache_t<16, 24, 8> hb_advance_cache_t; + + +#endif /* HB_CACHE_HH */ diff --git a/thirdparty/harfbuzz/src/hb-cff-interp-common.hh b/thirdparty/harfbuzz/src/hb-cff-interp-common.hh new file mode 100644 index 0000000000..91a9b7d0d1 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-cff-interp-common.hh @@ -0,0 +1,688 @@ +/* + * Copyright © 2018 Adobe Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Adobe Author(s): Michiharu Ariza + */ +#ifndef HB_CFF_INTERP_COMMON_HH +#define HB_CFF_INTERP_COMMON_HH + +namespace CFF { + +using namespace OT; + +typedef unsigned int op_code_t; + + +/* === Dict operators === */ + +/* One byte operators (0-31) */ +#define OpCode_version 0 /* CFF Top */ +#define OpCode_Notice 1 /* CFF Top */ +#define OpCode_FullName 2 /* CFF Top */ +#define OpCode_FamilyName 3 /* CFF Top */ +#define OpCode_Weight 4 /* CFF Top */ +#define OpCode_FontBBox 5 /* CFF Top */ +#define OpCode_BlueValues 6 /* CFF Private, CFF2 Private */ +#define OpCode_OtherBlues 7 /* CFF Private, CFF2 Private */ +#define OpCode_FamilyBlues 8 /* CFF Private, CFF2 Private */ +#define OpCode_FamilyOtherBlues 9 /* CFF Private, CFF2 Private */ +#define OpCode_StdHW 10 /* CFF Private, CFF2 Private */ +#define OpCode_StdVW 11 /* CFF Private, CFF2 Private */ +#define OpCode_escape 12 /* All. Shared with CS */ +#define OpCode_UniqueID 13 /* CFF Top */ +#define OpCode_XUID 14 /* CFF Top */ +#define OpCode_charset 15 /* CFF Top (0) */ +#define OpCode_Encoding 16 /* CFF Top (0) */ +#define OpCode_CharStrings 17 /* CFF Top, CFF2 Top */ +#define OpCode_Private 18 /* CFF Top, CFF2 FD */ +#define OpCode_Subrs 19 /* CFF Private, CFF2 Private */ +#define OpCode_defaultWidthX 20 /* CFF Private (0) */ +#define OpCode_nominalWidthX 21 /* CFF Private (0) */ +#define OpCode_vsindexdict 22 /* CFF2 Private/CS */ +#define OpCode_blenddict 23 /* CFF2 Private/CS */ +#define OpCode_vstore 24 /* CFF2 Top */ +#define OpCode_reserved25 25 +#define OpCode_reserved26 26 +#define OpCode_reserved27 27 + +/* Numbers */ +#define OpCode_shortint 28 /* 16-bit integer, All */ +#define OpCode_longintdict 29 /* 32-bit integer, All */ +#define OpCode_BCD 30 /* Real number, CFF2 Top/FD */ +#define OpCode_reserved31 31 + +/* 1-byte integers */ +#define OpCode_OneByteIntFirst 32 /* All. beginning of the range of first byte ints */ +#define OpCode_OneByteIntLast 246 /* All. ending of the range of first byte int */ + +/* 2-byte integers */ +#define OpCode_TwoBytePosInt0 247 /* All. first byte of two byte positive int (+108 to +1131) */ +#define OpCode_TwoBytePosInt1 248 +#define OpCode_TwoBytePosInt2 249 +#define OpCode_TwoBytePosInt3 250 + +#define OpCode_TwoByteNegInt0 251 /* All. first byte of two byte negative int (-1131 to -108) */ +#define OpCode_TwoByteNegInt1 252 +#define OpCode_TwoByteNegInt2 253 +#define OpCode_TwoByteNegInt3 254 + +/* Two byte escape operators 12, (0-41) */ +#define OpCode_ESC_Base 256 +#define Make_OpCode_ESC(byte2) ((op_code_t)(OpCode_ESC_Base + (byte2))) + +inline op_code_t Unmake_OpCode_ESC (op_code_t op) { return (op_code_t)(op - OpCode_ESC_Base); } +inline bool Is_OpCode_ESC (op_code_t op) { return op >= OpCode_ESC_Base; } +inline unsigned int OpCode_Size (op_code_t op) { return Is_OpCode_ESC (op) ? 2: 1; } + +#define OpCode_Copyright Make_OpCode_ESC(0) /* CFF Top */ +#define OpCode_isFixedPitch Make_OpCode_ESC(1) /* CFF Top (false) */ +#define OpCode_ItalicAngle Make_OpCode_ESC(2) /* CFF Top (0) */ +#define OpCode_UnderlinePosition Make_OpCode_ESC(3) /* CFF Top (-100) */ +#define OpCode_UnderlineThickness Make_OpCode_ESC(4) /* CFF Top (50) */ +#define OpCode_PaintType Make_OpCode_ESC(5) /* CFF Top (0) */ +#define OpCode_CharstringType Make_OpCode_ESC(6) /* CFF Top (2) */ +#define OpCode_FontMatrix Make_OpCode_ESC(7) /* CFF Top, CFF2 Top (.001 0 0 .001 0 0)*/ +#define OpCode_StrokeWidth Make_OpCode_ESC(8) /* CFF Top (0) */ +#define OpCode_BlueScale Make_OpCode_ESC(9) /* CFF Private, CFF2 Private (0.039625) */ +#define OpCode_BlueShift Make_OpCode_ESC(10) /* CFF Private, CFF2 Private (7) */ +#define OpCode_BlueFuzz Make_OpCode_ESC(11) /* CFF Private, CFF2 Private (1) */ +#define OpCode_StemSnapH Make_OpCode_ESC(12) /* CFF Private, CFF2 Private */ +#define OpCode_StemSnapV Make_OpCode_ESC(13) /* CFF Private, CFF2 Private */ +#define OpCode_ForceBold Make_OpCode_ESC(14) /* CFF Private (false) */ +#define OpCode_reservedESC15 Make_OpCode_ESC(15) +#define OpCode_reservedESC16 Make_OpCode_ESC(16) +#define OpCode_LanguageGroup Make_OpCode_ESC(17) /* CFF Private, CFF2 Private (0) */ +#define OpCode_ExpansionFactor Make_OpCode_ESC(18) /* CFF Private, CFF2 Private (0.06) */ +#define OpCode_initialRandomSeed Make_OpCode_ESC(19) /* CFF Private (0) */ +#define OpCode_SyntheticBase Make_OpCode_ESC(20) /* CFF Top */ +#define OpCode_PostScript Make_OpCode_ESC(21) /* CFF Top */ +#define OpCode_BaseFontName Make_OpCode_ESC(22) /* CFF Top */ +#define OpCode_BaseFontBlend Make_OpCode_ESC(23) /* CFF Top */ +#define OpCode_reservedESC24 Make_OpCode_ESC(24) +#define OpCode_reservedESC25 Make_OpCode_ESC(25) +#define OpCode_reservedESC26 Make_OpCode_ESC(26) +#define OpCode_reservedESC27 Make_OpCode_ESC(27) +#define OpCode_reservedESC28 Make_OpCode_ESC(28) +#define OpCode_reservedESC29 Make_OpCode_ESC(29) +#define OpCode_ROS Make_OpCode_ESC(30) /* CFF Top_CID */ +#define OpCode_CIDFontVersion Make_OpCode_ESC(31) /* CFF Top_CID (0) */ +#define OpCode_CIDFontRevision Make_OpCode_ESC(32) /* CFF Top_CID (0) */ +#define OpCode_CIDFontType Make_OpCode_ESC(33) /* CFF Top_CID (0) */ +#define OpCode_CIDCount Make_OpCode_ESC(34) /* CFF Top_CID (8720) */ +#define OpCode_UIDBase Make_OpCode_ESC(35) /* CFF Top_CID */ +#define OpCode_FDArray Make_OpCode_ESC(36) /* CFF Top_CID, CFF2 Top */ +#define OpCode_FDSelect Make_OpCode_ESC(37) /* CFF Top_CID, CFF2 Top */ +#define OpCode_FontName Make_OpCode_ESC(38) /* CFF Top_CID */ + + +/* === CharString operators === */ + +#define OpCode_hstem 1 /* CFF, CFF2 */ +#define OpCode_Reserved2 2 +#define OpCode_vstem 3 /* CFF, CFF2 */ +#define OpCode_vmoveto 4 /* CFF, CFF2 */ +#define OpCode_rlineto 5 /* CFF, CFF2 */ +#define OpCode_hlineto 6 /* CFF, CFF2 */ +#define OpCode_vlineto 7 /* CFF, CFF2 */ +#define OpCode_rrcurveto 8 /* CFF, CFF2 */ +#define OpCode_Reserved9 9 +#define OpCode_callsubr 10 /* CFF, CFF2 */ +#define OpCode_return 11 /* CFF */ +//#define OpCode_escape 12 /* CFF, CFF2 */ +#define OpCode_Reserved13 13 +#define OpCode_endchar 14 /* CFF */ +#define OpCode_vsindexcs 15 /* CFF2 */ +#define OpCode_blendcs 16 /* CFF2 */ +#define OpCode_Reserved17 17 +#define OpCode_hstemhm 18 /* CFF, CFF2 */ +#define OpCode_hintmask 19 /* CFF, CFF2 */ +#define OpCode_cntrmask 20 /* CFF, CFF2 */ +#define OpCode_rmoveto 21 /* CFF, CFF2 */ +#define OpCode_hmoveto 22 /* CFF, CFF2 */ +#define OpCode_vstemhm 23 /* CFF, CFF2 */ +#define OpCode_rcurveline 24 /* CFF, CFF2 */ +#define OpCode_rlinecurve 25 /* CFF, CFF2 */ +#define OpCode_vvcurveto 26 /* CFF, CFF2 */ +#define OpCode_hhcurveto 27 /* CFF, CFF2 */ +//#define OpCode_shortint 28 /* CFF, CFF2 */ +#define OpCode_callgsubr 29 /* CFF, CFF2 */ +#define OpCode_vhcurveto 30 /* CFF, CFF2 */ +#define OpCode_hvcurveto 31 /* CFF, CFF2 */ + +#define OpCode_fixedcs 255 /* 32-bit fixed */ + +/* Two byte escape operators 12, (0-41) */ +#define OpCode_dotsection Make_OpCode_ESC(0) /* CFF (obsoleted) */ +#define OpCode_ReservedESC1 Make_OpCode_ESC(1) +#define OpCode_ReservedESC2 Make_OpCode_ESC(2) +#define OpCode_and Make_OpCode_ESC(3) /* CFF */ +#define OpCode_or Make_OpCode_ESC(4) /* CFF */ +#define OpCode_not Make_OpCode_ESC(5) /* CFF */ +#define OpCode_ReservedESC6 Make_OpCode_ESC(6) +#define OpCode_ReservedESC7 Make_OpCode_ESC(7) +#define OpCode_ReservedESC8 Make_OpCode_ESC(8) +#define OpCode_abs Make_OpCode_ESC(9) /* CFF */ +#define OpCode_add Make_OpCode_ESC(10) /* CFF */ +#define OpCode_sub Make_OpCode_ESC(11) /* CFF */ +#define OpCode_div Make_OpCode_ESC(12) /* CFF */ +#define OpCode_ReservedESC13 Make_OpCode_ESC(13) +#define OpCode_neg Make_OpCode_ESC(14) /* CFF */ +#define OpCode_eq Make_OpCode_ESC(15) /* CFF */ +#define OpCode_ReservedESC16 Make_OpCode_ESC(16) +#define OpCode_ReservedESC17 Make_OpCode_ESC(17) +#define OpCode_drop Make_OpCode_ESC(18) /* CFF */ +#define OpCode_ReservedESC19 Make_OpCode_ESC(19) +#define OpCode_put Make_OpCode_ESC(20) /* CFF */ +#define OpCode_get Make_OpCode_ESC(21) /* CFF */ +#define OpCode_ifelse Make_OpCode_ESC(22) /* CFF */ +#define OpCode_random Make_OpCode_ESC(23) /* CFF */ +#define OpCode_mul Make_OpCode_ESC(24) /* CFF */ +//#define OpCode_reservedESC25 Make_OpCode_ESC(25) +#define OpCode_sqrt Make_OpCode_ESC(26) /* CFF */ +#define OpCode_dup Make_OpCode_ESC(27) /* CFF */ +#define OpCode_exch Make_OpCode_ESC(28) /* CFF */ +#define OpCode_index Make_OpCode_ESC(29) /* CFF */ +#define OpCode_roll Make_OpCode_ESC(30) /* CFF */ +#define OpCode_reservedESC31 Make_OpCode_ESC(31) +#define OpCode_reservedESC32 Make_OpCode_ESC(32) +#define OpCode_reservedESC33 Make_OpCode_ESC(33) +#define OpCode_hflex Make_OpCode_ESC(34) /* CFF, CFF2 */ +#define OpCode_flex Make_OpCode_ESC(35) /* CFF, CFF2 */ +#define OpCode_hflex1 Make_OpCode_ESC(36) /* CFF, CFF2 */ +#define OpCode_flex1 Make_OpCode_ESC(37) /* CFF, CFF2 */ + + +#define OpCode_Invalid 0xFFFFu + + +struct number_t +{ + void init () { set_real (0.0); } + void fini () {} + + void set_int (int v) { value = v; } + int to_int () const { return value; } + + void set_fixed (int32_t v) { value = v / 65536.0; } + int32_t to_fixed () const { return value * 65536.0; } + + void set_real (double v) { value = v; } + double to_real () const { return value; } + + bool in_int_range () const + { return ((double) (int16_t) to_int () == value); } + + bool operator > (const number_t &n) const { return value > n.to_real (); } + bool operator < (const number_t &n) const { return n > *this; } + bool operator >= (const number_t &n) const { return !(*this < n); } + bool operator <= (const number_t &n) const { return !(*this > n); } + + const number_t &operator += (const number_t &n) + { + set_real (to_real () + n.to_real ()); + + return *this; + } + + protected: + double value; +}; + +/* byte string */ +struct UnsizedByteStr : UnsizedArrayOf <HBUINT8> +{ + // encode 2-byte int (Dict/CharString) or 4-byte int (Dict) + template <typename T, typename V> + static bool serialize_int (hb_serialize_context_t *c, op_code_t intOp, V value) + { + TRACE_SERIALIZE (this); + + HBUINT8 *p = c->allocate_size<HBUINT8> (1); + if (unlikely (!p)) return_trace (false); + *p = intOp; + + T *ip = c->allocate_size<T> (T::static_size); + if (unlikely (!ip)) return_trace (false); + return_trace (c->check_assign (*ip, value)); + } + + template <typename V> + static bool serialize_int4 (hb_serialize_context_t *c, V value) + { return serialize_int<HBINT32> (c, OpCode_longintdict, value); } + + template <typename V> + static bool serialize_int2 (hb_serialize_context_t *c, V value) + { return serialize_int<HBINT16> (c, OpCode_shortint, value); } + + /* Defining null_size allows a Null object may be created. Should be safe because: + * A descendent struct Dict uses a Null pointer to indicate a missing table, + * checked before access. + * byte_str_t, a wrapper struct pairing a byte pointer along with its length, always + * checks the length before access. A Null pointer is used as the initial pointer + * along with zero length by the default ctor. + */ + DEFINE_SIZE_MIN(0); +}; + +/* Holder of a section of byte string within a CFFIndex entry */ +struct byte_str_t : hb_ubytes_t +{ + byte_str_t () + : hb_ubytes_t () {} + byte_str_t (const UnsizedByteStr& s, unsigned int l) + : hb_ubytes_t ((const unsigned char*)&s, l) {} + byte_str_t (const unsigned char *s, unsigned int l) + : hb_ubytes_t (s, l) {} + byte_str_t (const hb_ubytes_t &ub) /* conversion from hb_ubytes_t */ + : hb_ubytes_t (ub) {} + + /* sub-string */ + byte_str_t sub_str (unsigned int offset, unsigned int len_) const + { return byte_str_t (hb_ubytes_t::sub_array (offset, len_)); } + + bool check_limit (unsigned int offset, unsigned int count) const + { return (offset + count <= length); } +}; + +/* A byte string associated with the current offset and an error condition */ +struct byte_str_ref_t +{ + byte_str_ref_t () { init (); } + + void init () + { + str = byte_str_t (); + offset = 0; + error = false; + } + + void fini () {} + + byte_str_ref_t (const byte_str_t &str_, unsigned int offset_ = 0) + : str (str_), offset (offset_), error (false) {} + + void reset (const byte_str_t &str_, unsigned int offset_ = 0) + { + str = str_; + offset = offset_; + error = false; + } + + const unsigned char& operator [] (int i) { + if (unlikely ((unsigned int) (offset + i) >= str.length)) + { + set_error (); + return Null (unsigned char); + } + return str[offset + i]; + } + + /* Conversion to byte_str_t */ + operator byte_str_t () const { return str.sub_str (offset, str.length - offset); } + + byte_str_t sub_str (unsigned int offset_, unsigned int len_) const + { return str.sub_str (offset_, len_); } + + bool avail (unsigned int count=1) const + { return (!in_error () && str.check_limit (offset, count)); } + void inc (unsigned int count=1) + { + if (likely (!in_error () && (offset <= str.length) && (offset + count <= str.length))) + { + offset += count; + } + else + { + offset = str.length; + set_error (); + } + } + + void set_error () { error = true; } + bool in_error () const { return error; } + + byte_str_t str; + unsigned int offset; /* beginning of the sub-string within str */ + + protected: + bool error; +}; + +typedef hb_vector_t<byte_str_t> byte_str_array_t; + +/* stack */ +template <typename ELEM, int LIMIT> +struct cff_stack_t +{ + void init () + { + error = false; + count = 0; + elements.init (); + elements.resize (kSizeLimit); + for (unsigned int i = 0; i < elements.length; i++) + elements[i].init (); + } + void fini () { elements.fini_deep (); } + + ELEM& operator [] (unsigned int i) + { + if (unlikely (i >= count)) set_error (); + return elements[i]; + } + + void push (const ELEM &v) + { + if (likely (count < elements.length)) + elements[count++] = v; + else + set_error (); + } + ELEM &push () + { + if (likely (count < elements.length)) + return elements[count++]; + else + { + set_error (); + return Crap (ELEM); + } + } + + ELEM& pop () + { + if (likely (count > 0)) + return elements[--count]; + else + { + set_error (); + return Crap (ELEM); + } + } + void pop (unsigned int n) + { + if (likely (count >= n)) + count -= n; + else + set_error (); + } + + const ELEM& peek () + { + if (unlikely (count < 0)) + { + set_error (); + return Null (ELEM); + } + return elements[count - 1]; + } + + void unpop () + { + if (likely (count < elements.length)) + count++; + else + set_error (); + } + + void clear () { count = 0; } + + bool in_error () const { return (error || elements.in_error ()); } + void set_error () { error = true; } + + unsigned int get_count () const { return count; } + bool is_empty () const { return !count; } + + static constexpr unsigned kSizeLimit = LIMIT; + + protected: + bool error; + unsigned int count; + hb_vector_t<ELEM> elements; +}; + +/* argument stack */ +template <typename ARG=number_t> +struct arg_stack_t : cff_stack_t<ARG, 513> +{ + void push_int (int v) + { + ARG &n = S::push (); + n.set_int (v); + } + + void push_fixed (int32_t v) + { + ARG &n = S::push (); + n.set_fixed (v); + } + + void push_real (double v) + { + ARG &n = S::push (); + n.set_real (v); + } + + ARG& pop_num () { return this->pop (); } + + int pop_int () { return this->pop ().to_int (); } + + unsigned int pop_uint () + { + int i = pop_int (); + if (unlikely (i < 0)) + { + i = 0; + S::set_error (); + } + return (unsigned) i; + } + + void push_longint_from_substr (byte_str_ref_t& str_ref) + { + push_int ((str_ref[0] << 24) | (str_ref[1] << 16) | (str_ref[2] << 8) | (str_ref[3])); + str_ref.inc (4); + } + + bool push_fixed_from_substr (byte_str_ref_t& str_ref) + { + if (unlikely (!str_ref.avail (4))) + return false; + push_fixed ((int32_t)*(const HBUINT32*)&str_ref[0]); + str_ref.inc (4); + return true; + } + + hb_array_t<const ARG> get_subarray (unsigned int start) const + { return S::elements.sub_array (start); } + + private: + typedef cff_stack_t<ARG, 513> S; +}; + +/* an operator prefixed by its operands in a byte string */ +struct op_str_t +{ + void init () {} + void fini () {} + + op_code_t op; + byte_str_t str; +}; + +/* base of OP_SERIALIZER */ +struct op_serializer_t +{ + protected: + bool copy_opstr (hb_serialize_context_t *c, const op_str_t& opstr) const + { + TRACE_SERIALIZE (this); + + HBUINT8 *d = c->allocate_size<HBUINT8> (opstr.str.length); + if (unlikely (!d)) return_trace (false); + memcpy (d, &opstr.str[0], opstr.str.length); + return_trace (true); + } +}; + +template <typename VAL> +struct parsed_values_t +{ + void init () + { + opStart = 0; + values.init (); + } + void fini () { values.fini_deep (); } + + void add_op (op_code_t op, const byte_str_ref_t& str_ref = byte_str_ref_t ()) + { + VAL *val = values.push (); + val->op = op; + val->str = str_ref.str.sub_str (opStart, str_ref.offset - opStart); + opStart = str_ref.offset; + } + + void add_op (op_code_t op, const byte_str_ref_t& str_ref, const VAL &v) + { + VAL *val = values.push (v); + val->op = op; + val->str = str_ref.sub_str ( opStart, str_ref.offset - opStart); + opStart = str_ref.offset; + } + + bool has_op (op_code_t op) const + { + for (unsigned int i = 0; i < get_count (); i++) + if (get_value (i).op == op) return true; + return false; + } + + unsigned get_count () const { return values.length; } + const VAL &get_value (unsigned int i) const { return values[i]; } + const VAL &operator [] (unsigned int i) const { return get_value (i); } + + unsigned int opStart; + hb_vector_t<VAL> values; +}; + +template <typename ARG=number_t> +struct interp_env_t +{ + void init (const byte_str_t &str_) + { + str_ref.reset (str_); + argStack.init (); + error = false; + } + void fini () { argStack.fini (); } + + bool in_error () const + { return error || str_ref.in_error () || argStack.in_error (); } + + void set_error () { error = true; } + + op_code_t fetch_op () + { + op_code_t op = OpCode_Invalid; + if (unlikely (!str_ref.avail ())) + return OpCode_Invalid; + op = (op_code_t)(unsigned char)str_ref[0]; + if (op == OpCode_escape) { + if (unlikely (!str_ref.avail ())) + return OpCode_Invalid; + op = Make_OpCode_ESC(str_ref[1]); + str_ref.inc (); + } + str_ref.inc (); + return op; + } + + const ARG& eval_arg (unsigned int i) { return argStack[i]; } + + ARG& pop_arg () { return argStack.pop (); } + void pop_n_args (unsigned int n) { argStack.pop (n); } + + void clear_args () { pop_n_args (argStack.get_count ()); } + + byte_str_ref_t + str_ref; + arg_stack_t<ARG> + argStack; + protected: + bool error; +}; + +typedef interp_env_t<> num_interp_env_t; + +template <typename ARG=number_t> +struct opset_t +{ + static void process_op (op_code_t op, interp_env_t<ARG>& env) + { + switch (op) { + case OpCode_shortint: + env.argStack.push_int ((int16_t)((env.str_ref[0] << 8) | env.str_ref[1])); + env.str_ref.inc (2); + break; + + case OpCode_TwoBytePosInt0: case OpCode_TwoBytePosInt1: + case OpCode_TwoBytePosInt2: case OpCode_TwoBytePosInt3: + env.argStack.push_int ((int16_t)((op - OpCode_TwoBytePosInt0) * 256 + env.str_ref[0] + 108)); + env.str_ref.inc (); + break; + + case OpCode_TwoByteNegInt0: case OpCode_TwoByteNegInt1: + case OpCode_TwoByteNegInt2: case OpCode_TwoByteNegInt3: + env.argStack.push_int ((-(int16_t)(op - OpCode_TwoByteNegInt0) * 256 - env.str_ref[0] - 108)); + env.str_ref.inc (); + break; + + default: + /* 1-byte integer */ + if (likely ((OpCode_OneByteIntFirst <= op) && (op <= OpCode_OneByteIntLast))) + { + env.argStack.push_int ((int)op - 139); + } else { + /* invalid unknown operator */ + env.clear_args (); + env.set_error (); + } + break; + } + } +}; + +template <typename ENV> +struct interpreter_t +{ + ~interpreter_t() { fini (); } + + void fini () { env.fini (); } + + ENV env; +}; + +} /* namespace CFF */ + +#endif /* HB_CFF_INTERP_COMMON_HH */ diff --git a/thirdparty/harfbuzz/src/hb-cff-interp-cs-common.hh b/thirdparty/harfbuzz/src/hb-cff-interp-cs-common.hh new file mode 100644 index 0000000000..52d778ffe2 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-cff-interp-cs-common.hh @@ -0,0 +1,911 @@ +/* + * Copyright © 2018 Adobe Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Adobe Author(s): Michiharu Ariza + */ +#ifndef HB_CFF_INTERP_CS_COMMON_HH +#define HB_CFF_INTERP_CS_COMMON_HH + +#include "hb.hh" +#include "hb-cff-interp-common.hh" + +namespace CFF { + +using namespace OT; + +enum cs_type_t { + CSType_CharString, + CSType_GlobalSubr, + CSType_LocalSubr +}; + +struct call_context_t +{ + void init (const byte_str_ref_t substr_=byte_str_ref_t (), cs_type_t type_=CSType_CharString, unsigned int subr_num_=0) + { + str_ref = substr_; + type = type_; + subr_num = subr_num_; + } + + void fini () {} + + byte_str_ref_t str_ref; + cs_type_t type; + unsigned int subr_num; +}; + +/* call stack */ +const unsigned int kMaxCallLimit = 10; +struct call_stack_t : cff_stack_t<call_context_t, kMaxCallLimit> {}; + +template <typename SUBRS> +struct biased_subrs_t +{ + void init (const SUBRS *subrs_) + { + subrs = subrs_; + unsigned int nSubrs = get_count (); + if (nSubrs < 1240) + bias = 107; + else if (nSubrs < 33900) + bias = 1131; + else + bias = 32768; + } + + void fini () {} + + unsigned int get_count () const { return subrs ? subrs->count : 0; } + unsigned int get_bias () const { return bias; } + + byte_str_t operator [] (unsigned int index) const + { + if (unlikely (!subrs || index >= subrs->count)) + return Null (byte_str_t); + else + return (*subrs)[index]; + } + + protected: + unsigned int bias; + const SUBRS *subrs; +}; + +struct point_t +{ + void init () + { + x.init (); + y.init (); + } + + void set_int (int _x, int _y) + { + x.set_int (_x); + y.set_int (_y); + } + + void move_x (const number_t &dx) { x += dx; } + void move_y (const number_t &dy) { y += dy; } + void move (const number_t &dx, const number_t &dy) { move_x (dx); move_y (dy); } + void move (const point_t &d) { move_x (d.x); move_y (d.y); } + + number_t x; + number_t y; +}; + +template <typename ARG, typename SUBRS> +struct cs_interp_env_t : interp_env_t<ARG> +{ + void init (const byte_str_t &str, const SUBRS *globalSubrs_, const SUBRS *localSubrs_) + { + interp_env_t<ARG>::init (str); + + context.init (str, CSType_CharString); + seen_moveto = true; + seen_hintmask = false; + hstem_count = 0; + vstem_count = 0; + hintmask_size = 0; + pt.init (); + callStack.init (); + globalSubrs.init (globalSubrs_); + localSubrs.init (localSubrs_); + } + void fini () + { + interp_env_t<ARG>::fini (); + + callStack.fini (); + globalSubrs.fini (); + localSubrs.fini (); + } + + bool in_error () const + { + return callStack.in_error () || SUPER::in_error (); + } + + bool pop_subr_num (const biased_subrs_t<SUBRS>& biasedSubrs, unsigned int &subr_num) + { + subr_num = 0; + int n = SUPER::argStack.pop_int (); + n += biasedSubrs.get_bias (); + if (unlikely ((n < 0) || ((unsigned int)n >= biasedSubrs.get_count ()))) + return false; + + subr_num = (unsigned int)n; + return true; + } + + void call_subr (const biased_subrs_t<SUBRS>& biasedSubrs, cs_type_t type) + { + unsigned int subr_num = 0; + + if (unlikely (!pop_subr_num (biasedSubrs, subr_num) + || callStack.get_count () >= kMaxCallLimit)) + { + SUPER::set_error (); + return; + } + context.str_ref = SUPER::str_ref; + callStack.push (context); + + context.init ( biasedSubrs[subr_num], type, subr_num); + SUPER::str_ref = context.str_ref; + } + + void return_from_subr () + { + if (unlikely (SUPER::str_ref.in_error ())) + SUPER::set_error (); + context = callStack.pop (); + SUPER::str_ref = context.str_ref; + } + + void determine_hintmask_size () + { + if (!seen_hintmask) + { + vstem_count += SUPER::argStack.get_count() / 2; + hintmask_size = (hstem_count + vstem_count + 7) >> 3; + seen_hintmask = true; + } + } + + void set_endchar (bool endchar_flag_) { endchar_flag = endchar_flag_; } + bool is_endchar () const { return endchar_flag; } + + const number_t &get_x () const { return pt.x; } + const number_t &get_y () const { return pt.y; } + const point_t &get_pt () const { return pt; } + + void moveto (const point_t &pt_ ) { pt = pt_; } + + public: + call_context_t context; + bool endchar_flag; + bool seen_moveto; + bool seen_hintmask; + + unsigned int hstem_count; + unsigned int vstem_count; + unsigned int hintmask_size; + call_stack_t callStack; + biased_subrs_t<SUBRS> globalSubrs; + biased_subrs_t<SUBRS> localSubrs; + + private: + point_t pt; + + typedef interp_env_t<ARG> SUPER; +}; + +template <typename ENV, typename PARAM> +struct path_procs_null_t +{ + static void rmoveto (ENV &env, PARAM& param) {} + static void hmoveto (ENV &env, PARAM& param) {} + static void vmoveto (ENV &env, PARAM& param) {} + static void rlineto (ENV &env, PARAM& param) {} + static void hlineto (ENV &env, PARAM& param) {} + static void vlineto (ENV &env, PARAM& param) {} + static void rrcurveto (ENV &env, PARAM& param) {} + static void rcurveline (ENV &env, PARAM& param) {} + static void rlinecurve (ENV &env, PARAM& param) {} + static void vvcurveto (ENV &env, PARAM& param) {} + static void hhcurveto (ENV &env, PARAM& param) {} + static void vhcurveto (ENV &env, PARAM& param) {} + static void hvcurveto (ENV &env, PARAM& param) {} + static void moveto (ENV &env, PARAM& param, const point_t &pt) {} + static void line (ENV &env, PARAM& param, const point_t &pt1) {} + static void curve (ENV &env, PARAM& param, const point_t &pt1, const point_t &pt2, const point_t &pt3) {} + static void hflex (ENV &env, PARAM& param) {} + static void flex (ENV &env, PARAM& param) {} + static void hflex1 (ENV &env, PARAM& param) {} + static void flex1 (ENV &env, PARAM& param) {} +}; + +template <typename ARG, typename OPSET, typename ENV, typename PARAM, typename PATH=path_procs_null_t<ENV, PARAM>> +struct cs_opset_t : opset_t<ARG> +{ + static void process_op (op_code_t op, ENV &env, PARAM& param) + { + switch (op) { + + case OpCode_return: + env.return_from_subr (); + break; + case OpCode_endchar: + OPSET::check_width (op, env, param); + env.set_endchar (true); + OPSET::flush_args_and_op (op, env, param); + break; + + case OpCode_fixedcs: + env.argStack.push_fixed_from_substr (env.str_ref); + break; + + case OpCode_callsubr: + env.call_subr (env.localSubrs, CSType_LocalSubr); + break; + + case OpCode_callgsubr: + env.call_subr (env.globalSubrs, CSType_GlobalSubr); + break; + + case OpCode_hstem: + case OpCode_hstemhm: + OPSET::check_width (op, env, param); + OPSET::process_hstem (op, env, param); + break; + case OpCode_vstem: + case OpCode_vstemhm: + OPSET::check_width (op, env, param); + OPSET::process_vstem (op, env, param); + break; + case OpCode_hintmask: + case OpCode_cntrmask: + OPSET::check_width (op, env, param); + OPSET::process_hintmask (op, env, param); + break; + case OpCode_rmoveto: + OPSET::check_width (op, env, param); + PATH::rmoveto (env, param); + OPSET::process_post_move (op, env, param); + break; + case OpCode_hmoveto: + OPSET::check_width (op, env, param); + PATH::hmoveto (env, param); + OPSET::process_post_move (op, env, param); + break; + case OpCode_vmoveto: + OPSET::check_width (op, env, param); + PATH::vmoveto (env, param); + OPSET::process_post_move (op, env, param); + break; + case OpCode_rlineto: + PATH::rlineto (env, param); + process_post_path (op, env, param); + break; + case OpCode_hlineto: + PATH::hlineto (env, param); + process_post_path (op, env, param); + break; + case OpCode_vlineto: + PATH::vlineto (env, param); + process_post_path (op, env, param); + break; + case OpCode_rrcurveto: + PATH::rrcurveto (env, param); + process_post_path (op, env, param); + break; + case OpCode_rcurveline: + PATH::rcurveline (env, param); + process_post_path (op, env, param); + break; + case OpCode_rlinecurve: + PATH::rlinecurve (env, param); + process_post_path (op, env, param); + break; + case OpCode_vvcurveto: + PATH::vvcurveto (env, param); + process_post_path (op, env, param); + break; + case OpCode_hhcurveto: + PATH::hhcurveto (env, param); + process_post_path (op, env, param); + break; + case OpCode_vhcurveto: + PATH::vhcurveto (env, param); + process_post_path (op, env, param); + break; + case OpCode_hvcurveto: + PATH::hvcurveto (env, param); + process_post_path (op, env, param); + break; + + case OpCode_hflex: + PATH::hflex (env, param); + OPSET::process_post_flex (op, env, param); + break; + + case OpCode_flex: + PATH::flex (env, param); + OPSET::process_post_flex (op, env, param); + break; + + case OpCode_hflex1: + PATH::hflex1 (env, param); + OPSET::process_post_flex (op, env, param); + break; + + case OpCode_flex1: + PATH::flex1 (env, param); + OPSET::process_post_flex (op, env, param); + break; + + default: + SUPER::process_op (op, env); + break; + } + } + + static void process_hstem (op_code_t op, ENV &env, PARAM& param) + { + env.hstem_count += env.argStack.get_count () / 2; + OPSET::flush_args_and_op (op, env, param); + } + + static void process_vstem (op_code_t op, ENV &env, PARAM& param) + { + env.vstem_count += env.argStack.get_count () / 2; + OPSET::flush_args_and_op (op, env, param); + } + + static void process_hintmask (op_code_t op, ENV &env, PARAM& param) + { + env.determine_hintmask_size (); + if (likely (env.str_ref.avail (env.hintmask_size))) + { + OPSET::flush_hintmask (op, env, param); + env.str_ref.inc (env.hintmask_size); + } + } + + static void process_post_flex (op_code_t op, ENV &env, PARAM& param) + { + OPSET::flush_args_and_op (op, env, param); + } + + static void check_width (op_code_t op, ENV &env, PARAM& param) + {} + + static void process_post_move (op_code_t op, ENV &env, PARAM& param) + { + if (!env.seen_moveto) + { + env.determine_hintmask_size (); + env.seen_moveto = true; + } + OPSET::flush_args_and_op (op, env, param); + } + + static void process_post_path (op_code_t op, ENV &env, PARAM& param) + { + OPSET::flush_args_and_op (op, env, param); + } + + static void flush_args_and_op (op_code_t op, ENV &env, PARAM& param) + { + OPSET::flush_args (env, param); + OPSET::flush_op (op, env, param); + } + + static void flush_args (ENV &env, PARAM& param) + { + env.pop_n_args (env.argStack.get_count ()); + } + + static void flush_op (op_code_t op, ENV &env, PARAM& param) + { + } + + static void flush_hintmask (op_code_t op, ENV &env, PARAM& param) + { + OPSET::flush_args_and_op (op, env, param); + } + + static bool is_number_op (op_code_t op) + { + switch (op) + { + case OpCode_shortint: + case OpCode_fixedcs: + case OpCode_TwoBytePosInt0: case OpCode_TwoBytePosInt1: + case OpCode_TwoBytePosInt2: case OpCode_TwoBytePosInt3: + case OpCode_TwoByteNegInt0: case OpCode_TwoByteNegInt1: + case OpCode_TwoByteNegInt2: case OpCode_TwoByteNegInt3: + return true; + + default: + /* 1-byte integer */ + return (OpCode_OneByteIntFirst <= op) && (op <= OpCode_OneByteIntLast); + } + } + + protected: + typedef opset_t<ARG> SUPER; +}; + +template <typename PATH, typename ENV, typename PARAM> +struct path_procs_t +{ + static void rmoveto (ENV &env, PARAM& param) + { + point_t pt1 = env.get_pt (); + const number_t &dy = env.pop_arg (); + const number_t &dx = env.pop_arg (); + pt1.move (dx, dy); + PATH::moveto (env, param, pt1); + } + + static void hmoveto (ENV &env, PARAM& param) + { + point_t pt1 = env.get_pt (); + pt1.move_x (env.pop_arg ()); + PATH::moveto (env, param, pt1); + } + + static void vmoveto (ENV &env, PARAM& param) + { + point_t pt1 = env.get_pt (); + pt1.move_y (env.pop_arg ()); + PATH::moveto (env, param, pt1); + } + + static void rlineto (ENV &env, PARAM& param) + { + for (unsigned int i = 0; i + 2 <= env.argStack.get_count (); i += 2) + { + point_t pt1 = env.get_pt (); + pt1.move (env.eval_arg (i), env.eval_arg (i+1)); + PATH::line (env, param, pt1); + } + } + + static void hlineto (ENV &env, PARAM& param) + { + point_t pt1; + unsigned int i = 0; + for (; i + 2 <= env.argStack.get_count (); i += 2) + { + pt1 = env.get_pt (); + pt1.move_x (env.eval_arg (i)); + PATH::line (env, param, pt1); + pt1.move_y (env.eval_arg (i+1)); + PATH::line (env, param, pt1); + } + if (i < env.argStack.get_count ()) + { + pt1 = env.get_pt (); + pt1.move_x (env.eval_arg (i)); + PATH::line (env, param, pt1); + } + } + + static void vlineto (ENV &env, PARAM& param) + { + point_t pt1; + unsigned int i = 0; + for (; i + 2 <= env.argStack.get_count (); i += 2) + { + pt1 = env.get_pt (); + pt1.move_y (env.eval_arg (i)); + PATH::line (env, param, pt1); + pt1.move_x (env.eval_arg (i+1)); + PATH::line (env, param, pt1); + } + if (i < env.argStack.get_count ()) + { + pt1 = env.get_pt (); + pt1.move_y (env.eval_arg (i)); + PATH::line (env, param, pt1); + } + } + + static void rrcurveto (ENV &env, PARAM& param) + { + for (unsigned int i = 0; i + 6 <= env.argStack.get_count (); i += 6) + { + point_t pt1 = env.get_pt (); + pt1.move (env.eval_arg (i), env.eval_arg (i+1)); + point_t pt2 = pt1; + pt2.move (env.eval_arg (i+2), env.eval_arg (i+3)); + point_t pt3 = pt2; + pt3.move (env.eval_arg (i+4), env.eval_arg (i+5)); + PATH::curve (env, param, pt1, pt2, pt3); + } + } + + static void rcurveline (ENV &env, PARAM& param) + { + unsigned int arg_count = env.argStack.get_count (); + if (unlikely (arg_count < 8)) + return; + + unsigned int i = 0; + unsigned int curve_limit = arg_count - 2; + for (; i + 6 <= curve_limit; i += 6) + { + point_t pt1 = env.get_pt (); + pt1.move (env.eval_arg (i), env.eval_arg (i+1)); + point_t pt2 = pt1; + pt2.move (env.eval_arg (i+2), env.eval_arg (i+3)); + point_t pt3 = pt2; + pt3.move (env.eval_arg (i+4), env.eval_arg (i+5)); + PATH::curve (env, param, pt1, pt2, pt3); + } + + point_t pt1 = env.get_pt (); + pt1.move (env.eval_arg (i), env.eval_arg (i+1)); + PATH::line (env, param, pt1); + } + + static void rlinecurve (ENV &env, PARAM& param) + { + unsigned int arg_count = env.argStack.get_count (); + if (unlikely (arg_count < 8)) + return; + + unsigned int i = 0; + unsigned int line_limit = arg_count - 6; + for (; i + 2 <= line_limit; i += 2) + { + point_t pt1 = env.get_pt (); + pt1.move (env.eval_arg (i), env.eval_arg (i+1)); + PATH::line (env, param, pt1); + } + + point_t pt1 = env.get_pt (); + pt1.move (env.eval_arg (i), env.eval_arg (i+1)); + point_t pt2 = pt1; + pt2.move (env.eval_arg (i+2), env.eval_arg (i+3)); + point_t pt3 = pt2; + pt3.move (env.eval_arg (i+4), env.eval_arg (i+5)); + PATH::curve (env, param, pt1, pt2, pt3); + } + + static void vvcurveto (ENV &env, PARAM& param) + { + unsigned int i = 0; + point_t pt1 = env.get_pt (); + if ((env.argStack.get_count () & 1) != 0) + pt1.move_x (env.eval_arg (i++)); + for (; i + 4 <= env.argStack.get_count (); i += 4) + { + pt1.move_y (env.eval_arg (i)); + point_t pt2 = pt1; + pt2.move (env.eval_arg (i+1), env.eval_arg (i+2)); + point_t pt3 = pt2; + pt3.move_y (env.eval_arg (i+3)); + PATH::curve (env, param, pt1, pt2, pt3); + pt1 = env.get_pt (); + } + } + + static void hhcurveto (ENV &env, PARAM& param) + { + unsigned int i = 0; + point_t pt1 = env.get_pt (); + if ((env.argStack.get_count () & 1) != 0) + pt1.move_y (env.eval_arg (i++)); + for (; i + 4 <= env.argStack.get_count (); i += 4) + { + pt1.move_x (env.eval_arg (i)); + point_t pt2 = pt1; + pt2.move (env.eval_arg (i+1), env.eval_arg (i+2)); + point_t pt3 = pt2; + pt3.move_x (env.eval_arg (i+3)); + PATH::curve (env, param, pt1, pt2, pt3); + pt1 = env.get_pt (); + } + } + + static void vhcurveto (ENV &env, PARAM& param) + { + point_t pt1, pt2, pt3; + unsigned int i = 0; + if ((env.argStack.get_count () % 8) >= 4) + { + point_t pt1 = env.get_pt (); + pt1.move_y (env.eval_arg (i)); + point_t pt2 = pt1; + pt2.move (env.eval_arg (i+1), env.eval_arg (i+2)); + point_t pt3 = pt2; + pt3.move_x (env.eval_arg (i+3)); + i += 4; + + for (; i + 8 <= env.argStack.get_count (); i += 8) + { + PATH::curve (env, param, pt1, pt2, pt3); + pt1 = env.get_pt (); + pt1.move_x (env.eval_arg (i)); + pt2 = pt1; + pt2.move (env.eval_arg (i+1), env.eval_arg (i+2)); + pt3 = pt2; + pt3.move_y (env.eval_arg (i+3)); + PATH::curve (env, param, pt1, pt2, pt3); + + pt1 = pt3; + pt1.move_y (env.eval_arg (i+4)); + pt2 = pt1; + pt2.move (env.eval_arg (i+5), env.eval_arg (i+6)); + pt3 = pt2; + pt3.move_x (env.eval_arg (i+7)); + } + if (i < env.argStack.get_count ()) + pt3.move_y (env.eval_arg (i)); + PATH::curve (env, param, pt1, pt2, pt3); + } + else + { + for (; i + 8 <= env.argStack.get_count (); i += 8) + { + pt1 = env.get_pt (); + pt1.move_y (env.eval_arg (i)); + pt2 = pt1; + pt2.move (env.eval_arg (i+1), env.eval_arg (i+2)); + pt3 = pt2; + pt3.move_x (env.eval_arg (i+3)); + PATH::curve (env, param, pt1, pt2, pt3); + + pt1 = pt3; + pt1.move_x (env.eval_arg (i+4)); + pt2 = pt1; + pt2.move (env.eval_arg (i+5), env.eval_arg (i+6)); + pt3 = pt2; + pt3.move_y (env.eval_arg (i+7)); + if ((env.argStack.get_count () - i < 16) && ((env.argStack.get_count () & 1) != 0)) + pt3.move_x (env.eval_arg (i+8)); + PATH::curve (env, param, pt1, pt2, pt3); + } + } + } + + static void hvcurveto (ENV &env, PARAM& param) + { + point_t pt1, pt2, pt3; + unsigned int i = 0; + if ((env.argStack.get_count () % 8) >= 4) + { + point_t pt1 = env.get_pt (); + pt1.move_x (env.eval_arg (i)); + point_t pt2 = pt1; + pt2.move (env.eval_arg (i+1), env.eval_arg (i+2)); + point_t pt3 = pt2; + pt3.move_y (env.eval_arg (i+3)); + i += 4; + + for (; i + 8 <= env.argStack.get_count (); i += 8) + { + PATH::curve (env, param, pt1, pt2, pt3); + pt1 = env.get_pt (); + pt1.move_y (env.eval_arg (i)); + pt2 = pt1; + pt2.move (env.eval_arg (i+1), env.eval_arg (i+2)); + pt3 = pt2; + pt3.move_x (env.eval_arg (i+3)); + PATH::curve (env, param, pt1, pt2, pt3); + + pt1 = pt3; + pt1.move_x (env.eval_arg (i+4)); + pt2 = pt1; + pt2.move (env.eval_arg (i+5), env.eval_arg (i+6)); + pt3 = pt2; + pt3.move_y (env.eval_arg (i+7)); + } + if (i < env.argStack.get_count ()) + pt3.move_x (env.eval_arg (i)); + PATH::curve (env, param, pt1, pt2, pt3); + } + else + { + for (; i + 8 <= env.argStack.get_count (); i += 8) + { + pt1 = env.get_pt (); + pt1.move_x (env.eval_arg (i)); + pt2 = pt1; + pt2.move (env.eval_arg (i+1), env.eval_arg (i+2)); + pt3 = pt2; + pt3.move_y (env.eval_arg (i+3)); + PATH::curve (env, param, pt1, pt2, pt3); + + pt1 = pt3; + pt1.move_y (env.eval_arg (i+4)); + pt2 = pt1; + pt2.move (env.eval_arg (i+5), env.eval_arg (i+6)); + pt3 = pt2; + pt3.move_x (env.eval_arg (i+7)); + if ((env.argStack.get_count () - i < 16) && ((env.argStack.get_count () & 1) != 0)) + pt3.move_y (env.eval_arg (i+8)); + PATH::curve (env, param, pt1, pt2, pt3); + } + } + } + + /* default actions to be overridden */ + static void moveto (ENV &env, PARAM& param, const point_t &pt) + { env.moveto (pt); } + + static void line (ENV &env, PARAM& param, const point_t &pt1) + { PATH::moveto (env, param, pt1); } + + static void curve (ENV &env, PARAM& param, const point_t &pt1, const point_t &pt2, const point_t &pt3) + { PATH::moveto (env, param, pt3); } + + static void hflex (ENV &env, PARAM& param) + { + if (likely (env.argStack.get_count () == 7)) + { + point_t pt1 = env.get_pt (); + pt1.move_x (env.eval_arg (0)); + point_t pt2 = pt1; + pt2.move (env.eval_arg (1), env.eval_arg (2)); + point_t pt3 = pt2; + pt3.move_x (env.eval_arg (3)); + point_t pt4 = pt3; + pt4.move_x (env.eval_arg (4)); + point_t pt5 = pt4; + pt5.move_x (env.eval_arg (5)); + pt5.y = pt1.y; + point_t pt6 = pt5; + pt6.move_x (env.eval_arg (6)); + + curve2 (env, param, pt1, pt2, pt3, pt4, pt5, pt6); + } + else + env.set_error (); + } + + static void flex (ENV &env, PARAM& param) + { + if (likely (env.argStack.get_count () == 13)) + { + point_t pt1 = env.get_pt (); + pt1.move (env.eval_arg (0), env.eval_arg (1)); + point_t pt2 = pt1; + pt2.move (env.eval_arg (2), env.eval_arg (3)); + point_t pt3 = pt2; + pt3.move (env.eval_arg (4), env.eval_arg (5)); + point_t pt4 = pt3; + pt4.move (env.eval_arg (6), env.eval_arg (7)); + point_t pt5 = pt4; + pt5.move (env.eval_arg (8), env.eval_arg (9)); + point_t pt6 = pt5; + pt6.move (env.eval_arg (10), env.eval_arg (11)); + + curve2 (env, param, pt1, pt2, pt3, pt4, pt5, pt6); + } + else + env.set_error (); + } + + static void hflex1 (ENV &env, PARAM& param) + { + if (likely (env.argStack.get_count () == 9)) + { + point_t pt1 = env.get_pt (); + pt1.move (env.eval_arg (0), env.eval_arg (1)); + point_t pt2 = pt1; + pt2.move (env.eval_arg (2), env.eval_arg (3)); + point_t pt3 = pt2; + pt3.move_x (env.eval_arg (4)); + point_t pt4 = pt3; + pt4.move_x (env.eval_arg (5)); + point_t pt5 = pt4; + pt5.move (env.eval_arg (6), env.eval_arg (7)); + point_t pt6 = pt5; + pt6.move_x (env.eval_arg (8)); + pt6.y = env.get_pt ().y; + + curve2 (env, param, pt1, pt2, pt3, pt4, pt5, pt6); + } + else + env.set_error (); + } + + static void flex1 (ENV &env, PARAM& param) + { + if (likely (env.argStack.get_count () == 11)) + { + point_t d; + d.init (); + for (unsigned int i = 0; i < 10; i += 2) + d.move (env.eval_arg (i), env.eval_arg (i+1)); + + point_t pt1 = env.get_pt (); + pt1.move (env.eval_arg (0), env.eval_arg (1)); + point_t pt2 = pt1; + pt2.move (env.eval_arg (2), env.eval_arg (3)); + point_t pt3 = pt2; + pt3.move (env.eval_arg (4), env.eval_arg (5)); + point_t pt4 = pt3; + pt4.move (env.eval_arg (6), env.eval_arg (7)); + point_t pt5 = pt4; + pt5.move (env.eval_arg (8), env.eval_arg (9)); + point_t pt6 = pt5; + + if (fabs (d.x.to_real ()) > fabs (d.y.to_real ())) + { + pt6.move_x (env.eval_arg (10)); + pt6.y = env.get_pt ().y; + } + else + { + pt6.x = env.get_pt ().x; + pt6.move_y (env.eval_arg (10)); + } + + curve2 (env, param, pt1, pt2, pt3, pt4, pt5, pt6); + } + else + env.set_error (); + } + + protected: + static void curve2 (ENV &env, PARAM& param, + const point_t &pt1, const point_t &pt2, const point_t &pt3, + const point_t &pt4, const point_t &pt5, const point_t &pt6) + { + PATH::curve (env, param, pt1, pt2, pt3); + PATH::curve (env, param, pt4, pt5, pt6); + } +}; + +template <typename ENV, typename OPSET, typename PARAM> +struct cs_interpreter_t : interpreter_t<ENV> +{ + bool interpret (PARAM& param) + { + SUPER::env.set_endchar (false); + + for (;;) { + OPSET::process_op (SUPER::env.fetch_op (), SUPER::env, param); + if (unlikely (SUPER::env.in_error ())) + return false; + if (SUPER::env.is_endchar ()) + break; + } + + return true; + } + + private: + typedef interpreter_t<ENV> SUPER; +}; + +} /* namespace CFF */ + +#endif /* HB_CFF_INTERP_CS_COMMON_HH */ diff --git a/thirdparty/harfbuzz/src/hb-cff-interp-dict-common.hh b/thirdparty/harfbuzz/src/hb-cff-interp-dict-common.hh new file mode 100644 index 0000000000..a520ca3bce --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-cff-interp-dict-common.hh @@ -0,0 +1,201 @@ +/* + * Copyright © 2018 Adobe Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Adobe Author(s): Michiharu Ariza + */ +#ifndef HB_CFF_INTERP_DICT_COMMON_HH +#define HB_CFF_INTERP_DICT_COMMON_HH + +#include "hb-cff-interp-common.hh" + +namespace CFF { + +using namespace OT; + +/* an opstr and the parsed out dict value(s) */ +struct dict_val_t : op_str_t +{ + void init () { single_val.set_int (0); } + void fini () {} + + number_t single_val; +}; + +typedef dict_val_t num_dict_val_t; + +template <typename VAL> struct dict_values_t : parsed_values_t<VAL> {}; + +template <typename OPSTR=op_str_t> +struct top_dict_values_t : dict_values_t<OPSTR> +{ + void init () + { + dict_values_t<OPSTR>::init (); + charStringsOffset = 0; + FDArrayOffset = 0; + } + void fini () { dict_values_t<OPSTR>::fini (); } + + unsigned int charStringsOffset; + unsigned int FDArrayOffset; +}; + +struct dict_opset_t : opset_t<number_t> +{ + static void process_op (op_code_t op, interp_env_t<number_t>& env) + { + switch (op) { + case OpCode_longintdict: /* 5-byte integer */ + env.argStack.push_longint_from_substr (env.str_ref); + break; + + case OpCode_BCD: /* real number */ + env.argStack.push_real (parse_bcd (env.str_ref)); + break; + + default: + opset_t<number_t>::process_op (op, env); + break; + } + } + + /* Turns CFF's BCD format into strtod understandable string */ + static double parse_bcd (byte_str_ref_t& str_ref) + { + if (unlikely (str_ref.in_error ())) return .0; + + enum Nibble { DECIMAL=10, EXP_POS, EXP_NEG, RESERVED, NEG, END }; + + char buf[32]; + unsigned char byte = 0; + for (unsigned i = 0, count = 0; count < ARRAY_LENGTH (buf); ++i, ++count) + { + unsigned nibble; + if (!(i & 1)) + { + if (unlikely (!str_ref.avail ())) break; + + byte = str_ref[0]; + str_ref.inc (); + nibble = byte >> 4; + } + else + nibble = byte & 0x0F; + + if (unlikely (nibble == RESERVED)) break; + else if (nibble == END) + { + const char *p = buf; + double pv; + if (unlikely (!hb_parse_double (&p, p + count, &pv, true/* whole buffer */))) + break; + return pv; + } + else + { + buf[count] = "0123456789.EE?-?"[nibble]; + if (nibble == EXP_NEG) + { + ++count; + if (unlikely (count == ARRAY_LENGTH (buf))) break; + buf[count] = '-'; + } + } + } + + str_ref.set_error (); + return .0; + } + + static bool is_hint_op (op_code_t op) + { + switch (op) + { + case OpCode_BlueValues: + case OpCode_OtherBlues: + case OpCode_FamilyBlues: + case OpCode_FamilyOtherBlues: + case OpCode_StemSnapH: + case OpCode_StemSnapV: + case OpCode_StdHW: + case OpCode_StdVW: + case OpCode_BlueScale: + case OpCode_BlueShift: + case OpCode_BlueFuzz: + case OpCode_ForceBold: + case OpCode_LanguageGroup: + case OpCode_ExpansionFactor: + return true; + default: + return false; + } + } +}; + +template <typename VAL=op_str_t> +struct top_dict_opset_t : dict_opset_t +{ + static void process_op (op_code_t op, interp_env_t<number_t>& env, top_dict_values_t<VAL> & dictval) + { + switch (op) { + case OpCode_CharStrings: + dictval.charStringsOffset = env.argStack.pop_uint (); + env.clear_args (); + break; + case OpCode_FDArray: + dictval.FDArrayOffset = env.argStack.pop_uint (); + env.clear_args (); + break; + case OpCode_FontMatrix: + env.clear_args (); + break; + default: + dict_opset_t::process_op (op, env); + break; + } + } +}; + +template <typename OPSET, typename PARAM, typename ENV=num_interp_env_t> +struct dict_interpreter_t : interpreter_t<ENV> +{ + bool interpret (PARAM& param) + { + param.init (); + while (SUPER::env.str_ref.avail ()) + { + OPSET::process_op (SUPER::env.fetch_op (), SUPER::env, param); + if (unlikely (SUPER::env.in_error ())) + return false; + } + + return true; + } + + private: + typedef interpreter_t<ENV> SUPER; +}; + +} /* namespace CFF */ + +#endif /* HB_CFF_INTERP_DICT_COMMON_HH */ diff --git a/thirdparty/harfbuzz/src/hb-cff1-interp-cs.hh b/thirdparty/harfbuzz/src/hb-cff1-interp-cs.hh new file mode 100644 index 0000000000..1c8762c172 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-cff1-interp-cs.hh @@ -0,0 +1,161 @@ +/* + * Copyright © 2018 Adobe Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Adobe Author(s): Michiharu Ariza + */ +#ifndef HB_CFF1_INTERP_CS_HH +#define HB_CFF1_INTERP_CS_HH + +#include "hb.hh" +#include "hb-cff-interp-cs-common.hh" + +namespace CFF { + +using namespace OT; + +typedef biased_subrs_t<CFF1Subrs> cff1_biased_subrs_t; + +struct cff1_cs_interp_env_t : cs_interp_env_t<number_t, CFF1Subrs> +{ + template <typename ACC> + void init (const byte_str_t &str, ACC &acc, unsigned int fd) + { + SUPER::init (str, acc.globalSubrs, acc.privateDicts[fd].localSubrs); + processed_width = false; + has_width = false; + arg_start = 0; + in_seac = false; + } + + void fini () { SUPER::fini (); } + + void set_width (bool has_width_) + { + if (likely (!processed_width && (SUPER::argStack.get_count () > 0))) + { + if (has_width_) + { + width = SUPER::argStack[0]; + has_width = true; + arg_start = 1; + } + } + processed_width = true; + } + + void clear_args () + { + arg_start = 0; + SUPER::clear_args (); + } + + void set_in_seac (bool _in_seac) { in_seac = _in_seac; } + + bool processed_width; + bool has_width; + unsigned int arg_start; + number_t width; + bool in_seac; + + private: + typedef cs_interp_env_t<number_t, CFF1Subrs> SUPER; +}; + +template <typename OPSET, typename PARAM, typename PATH=path_procs_null_t<cff1_cs_interp_env_t, PARAM>> +struct cff1_cs_opset_t : cs_opset_t<number_t, OPSET, cff1_cs_interp_env_t, PARAM, PATH> +{ + /* PostScript-originated legacy opcodes (OpCode_add etc) are unsupported */ + /* Type 1-originated deprecated opcodes, seac behavior of endchar and dotsection are supported */ + + static void process_op (op_code_t op, cff1_cs_interp_env_t &env, PARAM& param) + { + switch (op) { + case OpCode_dotsection: + SUPER::flush_args_and_op (op, env, param); + break; + + case OpCode_endchar: + OPSET::check_width (op, env, param); + if (env.argStack.get_count () >= 4) + { + OPSET::process_seac (env, param); + } + OPSET::flush_args_and_op (op, env, param); + env.set_endchar (true); + break; + + default: + SUPER::process_op (op, env, param); + } + } + + static void check_width (op_code_t op, cff1_cs_interp_env_t &env, PARAM& param) + { + if (!env.processed_width) + { + bool has_width = false; + switch (op) + { + case OpCode_endchar: + case OpCode_hstem: + case OpCode_hstemhm: + case OpCode_vstem: + case OpCode_vstemhm: + case OpCode_hintmask: + case OpCode_cntrmask: + has_width = ((env.argStack.get_count () & 1) != 0); + break; + case OpCode_hmoveto: + case OpCode_vmoveto: + has_width = (env.argStack.get_count () > 1); + break; + case OpCode_rmoveto: + has_width = (env.argStack.get_count () > 2); + break; + default: + return; + } + env.set_width (has_width); + } + } + + static void process_seac (cff1_cs_interp_env_t &env, PARAM& param) + { + } + + static void flush_args (cff1_cs_interp_env_t &env, PARAM& param) + { + SUPER::flush_args (env, param); + env.clear_args (); /* pop off width */ + } + + private: + typedef cs_opset_t<number_t, OPSET, cff1_cs_interp_env_t, PARAM, PATH> SUPER; +}; + +template <typename OPSET, typename PARAM> +struct cff1_cs_interpreter_t : cs_interpreter_t<cff1_cs_interp_env_t, OPSET, PARAM> {}; + +} /* namespace CFF */ + +#endif /* HB_CFF1_INTERP_CS_HH */ diff --git a/thirdparty/harfbuzz/src/hb-cff2-interp-cs.hh b/thirdparty/harfbuzz/src/hb-cff2-interp-cs.hh new file mode 100644 index 0000000000..332ece31cd --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-cff2-interp-cs.hh @@ -0,0 +1,272 @@ +/* + * Copyright © 2018 Adobe Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Adobe Author(s): Michiharu Ariza + */ +#ifndef HB_CFF2_INTERP_CS_HH +#define HB_CFF2_INTERP_CS_HH + +#include "hb.hh" +#include "hb-cff-interp-cs-common.hh" + +namespace CFF { + +using namespace OT; + +struct blend_arg_t : number_t +{ + void init () + { + number_t::init (); + deltas.init (); + } + + void fini () + { + number_t::fini (); + deltas.fini_deep (); + } + + void set_int (int v) { reset_blends (); number_t::set_int (v); } + void set_fixed (int32_t v) { reset_blends (); number_t::set_fixed (v); } + void set_real (double v) { reset_blends (); number_t::set_real (v); } + + void set_blends (unsigned int numValues_, unsigned int valueIndex_, + unsigned int numBlends, hb_array_t<const blend_arg_t> blends_) + { + numValues = numValues_; + valueIndex = valueIndex_; + deltas.resize (numBlends); + for (unsigned int i = 0; i < numBlends; i++) + deltas[i] = blends_[i]; + } + + bool blending () const { return deltas.length > 0; } + void reset_blends () + { + numValues = valueIndex = 0; + deltas.resize (0); + } + + unsigned int numValues; + unsigned int valueIndex; + hb_vector_t<number_t> deltas; +}; + +typedef interp_env_t<blend_arg_t> BlendInterpEnv; +typedef biased_subrs_t<CFF2Subrs> cff2_biased_subrs_t; + +struct cff2_cs_interp_env_t : cs_interp_env_t<blend_arg_t, CFF2Subrs> +{ + template <typename ACC> + void init (const byte_str_t &str, ACC &acc, unsigned int fd, + const int *coords_=nullptr, unsigned int num_coords_=0) + { + SUPER::init (str, acc.globalSubrs, acc.privateDicts[fd].localSubrs); + + coords = coords_; + num_coords = num_coords_; + varStore = acc.varStore; + seen_blend = false; + seen_vsindex_ = false; + scalars.init (); + do_blend = num_coords && coords && varStore->size; + set_ivs (acc.privateDicts[fd].ivs); + } + + void fini () + { + scalars.fini (); + SUPER::fini (); + } + + op_code_t fetch_op () + { + if (this->str_ref.avail ()) + return SUPER::fetch_op (); + + /* make up return or endchar op */ + if (this->callStack.is_empty ()) + return OpCode_endchar; + else + return OpCode_return; + } + + const blend_arg_t& eval_arg (unsigned int i) + { + blend_arg_t &arg = argStack[i]; + blend_arg (arg); + return arg; + } + + const blend_arg_t& pop_arg () + { + blend_arg_t &arg = argStack.pop (); + blend_arg (arg); + return arg; + } + + void process_blend () + { + if (!seen_blend) + { + region_count = varStore->varStore.get_region_index_count (get_ivs ()); + if (do_blend) + { + if (unlikely (!scalars.resize (region_count))) + set_error (); + else + varStore->varStore.get_scalars (get_ivs (), coords, num_coords, + &scalars[0], region_count); + } + seen_blend = true; + } + } + + void process_vsindex () + { + unsigned int index = argStack.pop_uint (); + if (unlikely (seen_vsindex () || seen_blend)) + { + set_error (); + } + else + { + set_ivs (index); + } + seen_vsindex_ = true; + } + + unsigned int get_region_count () const { return region_count; } + void set_region_count (unsigned int region_count_) { region_count = region_count_; } + unsigned int get_ivs () const { return ivs; } + void set_ivs (unsigned int ivs_) { ivs = ivs_; } + bool seen_vsindex () const { return seen_vsindex_; } + + protected: + void blend_arg (blend_arg_t &arg) + { + if (do_blend && arg.blending ()) + { + if (likely (scalars.length == arg.deltas.length)) + { + double v = arg.to_real (); + for (unsigned int i = 0; i < scalars.length; i++) + { + v += (double)scalars[i] * arg.deltas[i].to_real (); + } + arg.set_real (v); + arg.deltas.resize (0); + } + } + } + + protected: + const int *coords; + unsigned int num_coords; + const CFF2VariationStore *varStore; + unsigned int region_count; + unsigned int ivs; + hb_vector_t<float> scalars; + bool do_blend; + bool seen_vsindex_; + bool seen_blend; + + typedef cs_interp_env_t<blend_arg_t, CFF2Subrs> SUPER; +}; +template <typename OPSET, typename PARAM, typename PATH=path_procs_null_t<cff2_cs_interp_env_t, PARAM>> +struct cff2_cs_opset_t : cs_opset_t<blend_arg_t, OPSET, cff2_cs_interp_env_t, PARAM, PATH> +{ + static void process_op (op_code_t op, cff2_cs_interp_env_t &env, PARAM& param) + { + switch (op) { + case OpCode_callsubr: + case OpCode_callgsubr: + /* a subroutine number shoudln't be a blended value */ + if (unlikely (env.argStack.peek ().blending ())) + { + env.set_error (); + break; + } + SUPER::process_op (op, env, param); + break; + + case OpCode_blendcs: + OPSET::process_blend (env, param); + break; + + case OpCode_vsindexcs: + if (unlikely (env.argStack.peek ().blending ())) + { + env.set_error (); + break; + } + OPSET::process_vsindex (env, param); + break; + + default: + SUPER::process_op (op, env, param); + } + } + + static void process_blend (cff2_cs_interp_env_t &env, PARAM& param) + { + unsigned int n, k; + + env.process_blend (); + k = env.get_region_count (); + n = env.argStack.pop_uint (); + /* copy the blend values into blend array of the default values */ + unsigned int start = env.argStack.get_count () - ((k+1) * n); + /* let an obvious error case fail, but note CFF2 spec doesn't forbid n==0 */ + if (unlikely (start > env.argStack.get_count ())) + { + env.set_error (); + return; + } + for (unsigned int i = 0; i < n; i++) + { + const hb_array_t<const blend_arg_t> blends = env.argStack.get_subarray (start + n + (i * k)); + env.argStack[start + i].set_blends (n, i, k, blends); + } + + /* pop off blend values leaving default values now adorned with blend values */ + env.argStack.pop (k * n); + } + + static void process_vsindex (cff2_cs_interp_env_t &env, PARAM& param) + { + env.process_vsindex (); + env.clear_args (); + } + + private: + typedef cs_opset_t<blend_arg_t, OPSET, cff2_cs_interp_env_t, PARAM, PATH> SUPER; +}; + +template <typename OPSET, typename PARAM> +struct cff2_cs_interpreter_t : cs_interpreter_t<cff2_cs_interp_env_t, OPSET, PARAM> {}; + +} /* namespace CFF */ + +#endif /* HB_CFF2_INTERP_CS_HH */ diff --git a/thirdparty/harfbuzz/src/hb-common.cc b/thirdparty/harfbuzz/src/hb-common.cc new file mode 100644 index 0000000000..5acfa78431 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-common.cc @@ -0,0 +1,1098 @@ +/* + * Copyright © 2009,2010 Red Hat, Inc. + * Copyright © 2011,2012 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Red Hat Author(s): Behdad Esfahbod + * Google Author(s): Behdad Esfahbod + */ + +#include "hb.hh" +#include "hb-machinery.hh" + +#include <locale.h> + +#ifdef HB_NO_SETLOCALE +#define setlocale(Category, Locale) "C" +#endif + +/** + * SECTION:hb-common + * @title: hb-common + * @short_description: Common data types + * @include: hb.h + * + * Common data types used across HarfBuzz are defined here. + **/ + + +/* hb_options_t */ + +hb_atomic_int_t _hb_options; + +void +_hb_options_init () +{ + hb_options_union_t u; + u.i = 0; + u.opts.initialized = true; + + const char *c = getenv ("HB_OPTIONS"); + if (c) + { + while (*c) + { + const char *p = strchr (c, ':'); + if (!p) + p = c + strlen (c); + +#define OPTION(name, symbol) \ + if (0 == strncmp (c, name, p - c) && strlen (name) == static_cast<size_t>(p - c)) do { u.opts.symbol = true; } while (0) + + OPTION ("uniscribe-bug-compatible", uniscribe_bug_compatible); + +#undef OPTION + + c = *p ? p + 1 : p; + } + + } + + /* This is idempotent and threadsafe. */ + _hb_options.set_relaxed (u.i); +} + + +/* hb_tag_t */ + +/** + * hb_tag_from_string: + * @str: (array length=len) (element-type uint8_t): + * @len: + * + * + * + * Return value: + * + * Since: 0.9.2 + **/ +hb_tag_t +hb_tag_from_string (const char *str, int len) +{ + char tag[4]; + unsigned int i; + + if (!str || !len || !*str) + return HB_TAG_NONE; + + if (len < 0 || len > 4) + len = 4; + for (i = 0; i < (unsigned) len && str[i]; i++) + tag[i] = str[i]; + for (; i < 4; i++) + tag[i] = ' '; + + return HB_TAG (tag[0], tag[1], tag[2], tag[3]); +} + +/** + * hb_tag_to_string: + * @tag: + * @buf: (out caller-allocates) (array fixed-size=4) (element-type uint8_t): + * + * + * + * Since: 0.9.5 + **/ +void +hb_tag_to_string (hb_tag_t tag, char *buf) +{ + buf[0] = (char) (uint8_t) (tag >> 24); + buf[1] = (char) (uint8_t) (tag >> 16); + buf[2] = (char) (uint8_t) (tag >> 8); + buf[3] = (char) (uint8_t) (tag >> 0); +} + + +/* hb_direction_t */ + +const char direction_strings[][4] = { + "ltr", + "rtl", + "ttb", + "btt" +}; + +/** + * hb_direction_from_string: + * @str: (array length=len) (element-type uint8_t): + * @len: + * + * + * + * Return value: + * + * Since: 0.9.2 + **/ +hb_direction_t +hb_direction_from_string (const char *str, int len) +{ + if (unlikely (!str || !len || !*str)) + return HB_DIRECTION_INVALID; + + /* Lets match loosely: just match the first letter, such that + * all of "ltr", "left-to-right", etc work! + */ + char c = TOLOWER (str[0]); + for (unsigned int i = 0; i < ARRAY_LENGTH (direction_strings); i++) + if (c == direction_strings[i][0]) + return (hb_direction_t) (HB_DIRECTION_LTR + i); + + return HB_DIRECTION_INVALID; +} + +/** + * hb_direction_to_string: + * @direction: + * + * + * + * Return value: (transfer none): + * + * Since: 0.9.2 + **/ +const char * +hb_direction_to_string (hb_direction_t direction) +{ + if (likely ((unsigned int) (direction - HB_DIRECTION_LTR) + < ARRAY_LENGTH (direction_strings))) + return direction_strings[direction - HB_DIRECTION_LTR]; + + return "invalid"; +} + + +/* hb_language_t */ + +struct hb_language_impl_t { + const char s[1]; +}; + +static const char canon_map[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '-', 0, 0, + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 0, 0, 0, 0, 0, 0, + 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 0, 0, 0, 0, '-', + 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 0, 0, 0, 0, 0 +}; + +static bool +lang_equal (hb_language_t v1, + const void *v2) +{ + const unsigned char *p1 = (const unsigned char *) v1; + const unsigned char *p2 = (const unsigned char *) v2; + + while (*p1 && *p1 == canon_map[*p2]) { + p1++; + p2++; + } + + return *p1 == canon_map[*p2]; +} + +#if 0 +static unsigned int +lang_hash (const void *key) +{ + const unsigned char *p = key; + unsigned int h = 0; + while (canon_map[*p]) + { + h = (h << 5) - h + canon_map[*p]; + p++; + } + + return h; +} +#endif + + +struct hb_language_item_t { + + struct hb_language_item_t *next; + hb_language_t lang; + + bool operator == (const char *s) const + { return lang_equal (lang, s); } + + hb_language_item_t & operator = (const char *s) { + /* If a custom allocated is used calling strdup() pairs + badly with a call to the custom free() in fini() below. + Therefore don't call strdup(), implement its behavior. + */ + size_t len = strlen(s) + 1; + lang = (hb_language_t) malloc(len); + if (likely (lang)) + { + memcpy((unsigned char *) lang, s, len); + for (unsigned char *p = (unsigned char *) lang; *p; p++) + *p = canon_map[*p]; + } + + return *this; + } + + void fini () { free ((void *) lang); } +}; + + +/* Thread-safe lock-free language list */ + +static hb_atomic_ptr_t <hb_language_item_t> langs; + +#if HB_USE_ATEXIT +static void +free_langs () +{ +retry: + hb_language_item_t *first_lang = langs; + if (unlikely (!langs.cmpexch (first_lang, nullptr))) + goto retry; + + while (first_lang) { + hb_language_item_t *next = first_lang->next; + first_lang->fini (); + free (first_lang); + first_lang = next; + } +} +#endif + +static hb_language_item_t * +lang_find_or_insert (const char *key) +{ +retry: + hb_language_item_t *first_lang = langs; + + for (hb_language_item_t *lang = first_lang; lang; lang = lang->next) + if (*lang == key) + return lang; + + /* Not found; allocate one. */ + hb_language_item_t *lang = (hb_language_item_t *) calloc (1, sizeof (hb_language_item_t)); + if (unlikely (!lang)) + return nullptr; + lang->next = first_lang; + *lang = key; + if (unlikely (!lang->lang)) + { + free (lang); + return nullptr; + } + + if (unlikely (!langs.cmpexch (first_lang, lang))) + { + lang->fini (); + free (lang); + goto retry; + } + +#if HB_USE_ATEXIT + if (!first_lang) + atexit (free_langs); /* First person registers atexit() callback. */ +#endif + + return lang; +} + + +/** + * hb_language_from_string: + * @str: (array length=len) (element-type uint8_t): a string representing + * a BCP 47 language tag + * @len: length of the @str, or -1 if it is %NULL-terminated. + * + * Converts @str representing a BCP 47 language tag to the corresponding + * #hb_language_t. + * + * Return value: (transfer none): + * The #hb_language_t corresponding to the BCP 47 language tag. + * + * Since: 0.9.2 + **/ +hb_language_t +hb_language_from_string (const char *str, int len) +{ + if (!str || !len || !*str) + return HB_LANGUAGE_INVALID; + + hb_language_item_t *item = nullptr; + if (len >= 0) + { + /* NUL-terminate it. */ + char strbuf[64]; + len = hb_min (len, (int) sizeof (strbuf) - 1); + memcpy (strbuf, str, len); + strbuf[len] = '\0'; + item = lang_find_or_insert (strbuf); + } + else + item = lang_find_or_insert (str); + + return likely (item) ? item->lang : HB_LANGUAGE_INVALID; +} + +/** + * hb_language_to_string: + * @language: an #hb_language_t to convert. + * + * See hb_language_from_string(). + * + * Return value: (transfer none): + * A %NULL-terminated string representing the @language. Must not be freed by + * the caller. + * + * Since: 0.9.2 + **/ +const char * +hb_language_to_string (hb_language_t language) +{ + if (unlikely (!language)) return nullptr; + + return language->s; +} + +/** + * hb_language_get_default: + * + * Get default language from current locale. + * + * Note that the first time this function is called, it calls + * "setlocale (LC_CTYPE, nullptr)" to fetch current locale. The underlying + * setlocale function is, in many implementations, NOT threadsafe. To avoid + * problems, call this function once before multiple threads can call it. + * This function is only used from hb_buffer_guess_segment_properties() by + * HarfBuzz itself. + * + * Return value: (transfer none): + * + * Since: 0.9.2 + **/ +hb_language_t +hb_language_get_default () +{ + static hb_atomic_ptr_t <hb_language_t> default_language; + + hb_language_t language = default_language; + if (unlikely (language == HB_LANGUAGE_INVALID)) + { + language = hb_language_from_string (setlocale (LC_CTYPE, nullptr), -1); + (void) default_language.cmpexch (HB_LANGUAGE_INVALID, language); + } + + return language; +} + + +/* hb_script_t */ + +/** + * hb_script_from_iso15924_tag: + * @tag: an #hb_tag_t representing an ISO 15924 tag. + * + * Converts an ISO 15924 script tag to a corresponding #hb_script_t. + * + * Return value: + * An #hb_script_t corresponding to the ISO 15924 tag. + * + * Since: 0.9.2 + **/ +hb_script_t +hb_script_from_iso15924_tag (hb_tag_t tag) +{ + if (unlikely (tag == HB_TAG_NONE)) + return HB_SCRIPT_INVALID; + + /* Be lenient, adjust case (one capital letter followed by three small letters) */ + tag = (tag & 0xDFDFDFDFu) | 0x00202020u; + + switch (tag) { + + /* These graduated from the 'Q' private-area codes, but + * the old code is still aliased by Unicode, and the Qaai + * one in use by ICU. */ + case HB_TAG('Q','a','a','i'): return HB_SCRIPT_INHERITED; + case HB_TAG('Q','a','a','c'): return HB_SCRIPT_COPTIC; + + /* Script variants from https://unicode.org/iso15924/ */ + case HB_TAG('C','y','r','s'): return HB_SCRIPT_CYRILLIC; + case HB_TAG('L','a','t','f'): return HB_SCRIPT_LATIN; + case HB_TAG('L','a','t','g'): return HB_SCRIPT_LATIN; + case HB_TAG('S','y','r','e'): return HB_SCRIPT_SYRIAC; + case HB_TAG('S','y','r','j'): return HB_SCRIPT_SYRIAC; + case HB_TAG('S','y','r','n'): return HB_SCRIPT_SYRIAC; + } + + /* If it looks right, just use the tag as a script */ + if (((uint32_t) tag & 0xE0E0E0E0u) == 0x40606060u) + return (hb_script_t) tag; + + /* Otherwise, return unknown */ + return HB_SCRIPT_UNKNOWN; +} + +/** + * hb_script_from_string: + * @str: (array length=len) (element-type uint8_t): a string representing an + * ISO 15924 tag. + * @len: length of the @str, or -1 if it is %NULL-terminated. + * + * Converts a string @str representing an ISO 15924 script tag to a + * corresponding #hb_script_t. Shorthand for hb_tag_from_string() then + * hb_script_from_iso15924_tag(). + * + * Return value: + * An #hb_script_t corresponding to the ISO 15924 tag. + * + * Since: 0.9.2 + **/ +hb_script_t +hb_script_from_string (const char *str, int len) +{ + return hb_script_from_iso15924_tag (hb_tag_from_string (str, len)); +} + +/** + * hb_script_to_iso15924_tag: + * @script: an #hb_script_t to convert. + * + * See hb_script_from_iso15924_tag(). + * + * Return value: + * An #hb_tag_t representing an ISO 15924 script tag. + * + * Since: 0.9.2 + **/ +hb_tag_t +hb_script_to_iso15924_tag (hb_script_t script) +{ + return (hb_tag_t) script; +} + +/** + * hb_script_get_horizontal_direction: + * @script: + * + * + * + * Return value: + * + * Since: 0.9.2 + **/ +hb_direction_t +hb_script_get_horizontal_direction (hb_script_t script) +{ + /* https://docs.google.com/spreadsheets/d/1Y90M0Ie3MUJ6UVCRDOypOtijlMDLNNyyLk36T6iMu0o */ + switch ((hb_tag_t) script) + { + /* Unicode-1.1 additions */ + case HB_SCRIPT_ARABIC: + case HB_SCRIPT_HEBREW: + + /* Unicode-3.0 additions */ + case HB_SCRIPT_SYRIAC: + case HB_SCRIPT_THAANA: + + /* Unicode-4.0 additions */ + case HB_SCRIPT_CYPRIOT: + + /* Unicode-4.1 additions */ + case HB_SCRIPT_KHAROSHTHI: + + /* Unicode-5.0 additions */ + case HB_SCRIPT_PHOENICIAN: + case HB_SCRIPT_NKO: + + /* Unicode-5.1 additions */ + case HB_SCRIPT_LYDIAN: + + /* Unicode-5.2 additions */ + case HB_SCRIPT_AVESTAN: + case HB_SCRIPT_IMPERIAL_ARAMAIC: + case HB_SCRIPT_INSCRIPTIONAL_PAHLAVI: + case HB_SCRIPT_INSCRIPTIONAL_PARTHIAN: + case HB_SCRIPT_OLD_SOUTH_ARABIAN: + case HB_SCRIPT_OLD_TURKIC: + case HB_SCRIPT_SAMARITAN: + + /* Unicode-6.0 additions */ + case HB_SCRIPT_MANDAIC: + + /* Unicode-6.1 additions */ + case HB_SCRIPT_MEROITIC_CURSIVE: + case HB_SCRIPT_MEROITIC_HIEROGLYPHS: + + /* Unicode-7.0 additions */ + case HB_SCRIPT_MANICHAEAN: + case HB_SCRIPT_MENDE_KIKAKUI: + case HB_SCRIPT_NABATAEAN: + case HB_SCRIPT_OLD_NORTH_ARABIAN: + case HB_SCRIPT_PALMYRENE: + case HB_SCRIPT_PSALTER_PAHLAVI: + + /* Unicode-8.0 additions */ + case HB_SCRIPT_HATRAN: + + /* Unicode-9.0 additions */ + case HB_SCRIPT_ADLAM: + + /* Unicode-11.0 additions */ + case HB_SCRIPT_HANIFI_ROHINGYA: + case HB_SCRIPT_OLD_SOGDIAN: + case HB_SCRIPT_SOGDIAN: + + /* Unicode-12.0 additions */ + case HB_SCRIPT_ELYMAIC: + + /* Unicode-13.0 additions */ + case HB_SCRIPT_CHORASMIAN: + case HB_SCRIPT_YEZIDI: + + return HB_DIRECTION_RTL; + + + /* https://github.com/harfbuzz/harfbuzz/issues/1000 */ + case HB_SCRIPT_OLD_HUNGARIAN: + case HB_SCRIPT_OLD_ITALIC: + case HB_SCRIPT_RUNIC: + + return HB_DIRECTION_INVALID; + } + + return HB_DIRECTION_LTR; +} + + +/* hb_version */ + + +/** + * SECTION:hb-version + * @title: hb-version + * @short_description: Information about the version of HarfBuzz in use + * @include: hb.h + * + * These functions and macros allow accessing version of the HarfBuzz + * library used at compile- as well as run-time, and to direct code + * conditionally based on those versions, again, at compile- or run-time. + **/ + + +/** + * hb_version: + * @major: (out): Library major version component. + * @minor: (out): Library minor version component. + * @micro: (out): Library micro version component. + * + * Returns library version as three integer components. + * + * Since: 0.9.2 + **/ +void +hb_version (unsigned int *major, + unsigned int *minor, + unsigned int *micro) +{ + *major = HB_VERSION_MAJOR; + *minor = HB_VERSION_MINOR; + *micro = HB_VERSION_MICRO; +} + +/** + * hb_version_string: + * + * Returns library version as a string with three components. + * + * Return value: library version string. + * + * Since: 0.9.2 + **/ +const char * +hb_version_string () +{ + return HB_VERSION_STRING; +} + +/** + * hb_version_atleast: + * @major: + * @minor: + * @micro: + * + * + * + * Return value: + * + * Since: 0.9.30 + **/ +hb_bool_t +hb_version_atleast (unsigned int major, + unsigned int minor, + unsigned int micro) +{ + return HB_VERSION_ATLEAST (major, minor, micro); +} + + + +/* hb_feature_t and hb_variation_t */ + +static bool +parse_space (const char **pp, const char *end) +{ + while (*pp < end && ISSPACE (**pp)) + (*pp)++; + return true; +} + +static bool +parse_char (const char **pp, const char *end, char c) +{ + parse_space (pp, end); + + if (*pp == end || **pp != c) + return false; + + (*pp)++; + return true; +} + +static bool +parse_uint (const char **pp, const char *end, unsigned int *pv) +{ + /* Intentionally use hb_parse_int inside instead of hb_parse_uint, + * such that -1 turns into "big number"... */ + int v; + if (unlikely (!hb_parse_int (pp, end, &v))) return false; + + *pv = v; + return true; +} + +static bool +parse_uint32 (const char **pp, const char *end, uint32_t *pv) +{ + /* Intentionally use hb_parse_int inside instead of hb_parse_uint, + * such that -1 turns into "big number"... */ + int v; + if (unlikely (!hb_parse_int (pp, end, &v))) return false; + + *pv = v; + return true; +} + +static bool +parse_bool (const char **pp, const char *end, uint32_t *pv) +{ + parse_space (pp, end); + + const char *p = *pp; + while (*pp < end && ISALPHA(**pp)) + (*pp)++; + + /* CSS allows on/off as aliases 1/0. */ + if (*pp - p == 2 + && TOLOWER (p[0]) == 'o' + && TOLOWER (p[1]) == 'n') + *pv = 1; + else if (*pp - p == 3 + && TOLOWER (p[0]) == 'o' + && TOLOWER (p[1]) == 'f' + && TOLOWER (p[2]) == 'f') + *pv = 0; + else + return false; + + return true; +} + +/* hb_feature_t */ + +static bool +parse_feature_value_prefix (const char **pp, const char *end, hb_feature_t *feature) +{ + if (parse_char (pp, end, '-')) + feature->value = 0; + else { + parse_char (pp, end, '+'); + feature->value = 1; + } + + return true; +} + +static bool +parse_tag (const char **pp, const char *end, hb_tag_t *tag) +{ + parse_space (pp, end); + + char quote = 0; + + if (*pp < end && (**pp == '\'' || **pp == '"')) + { + quote = **pp; + (*pp)++; + } + + const char *p = *pp; + while (*pp < end && (ISALNUM(**pp) || **pp == '_')) + (*pp)++; + + if (p == *pp || *pp - p > 4) + return false; + + *tag = hb_tag_from_string (p, *pp - p); + + if (quote) + { + /* CSS expects exactly four bytes. And we only allow quotations for + * CSS compatibility. So, enforce the length. */ + if (*pp - p != 4) + return false; + if (*pp == end || **pp != quote) + return false; + (*pp)++; + } + + return true; +} + +static bool +parse_feature_indices (const char **pp, const char *end, hb_feature_t *feature) +{ + parse_space (pp, end); + + bool has_start; + + feature->start = HB_FEATURE_GLOBAL_START; + feature->end = HB_FEATURE_GLOBAL_END; + + if (!parse_char (pp, end, '[')) + return true; + + has_start = parse_uint (pp, end, &feature->start); + + if (parse_char (pp, end, ':') || parse_char (pp, end, ';')) { + parse_uint (pp, end, &feature->end); + } else { + if (has_start) + feature->end = feature->start + 1; + } + + return parse_char (pp, end, ']'); +} + +static bool +parse_feature_value_postfix (const char **pp, const char *end, hb_feature_t *feature) +{ + bool had_equal = parse_char (pp, end, '='); + bool had_value = parse_uint32 (pp, end, &feature->value) || + parse_bool (pp, end, &feature->value); + /* CSS doesn't use equal-sign between tag and value. + * If there was an equal-sign, then there *must* be a value. + * A value without an equal-sign is ok, but not required. */ + return !had_equal || had_value; +} + +static bool +parse_one_feature (const char **pp, const char *end, hb_feature_t *feature) +{ + return parse_feature_value_prefix (pp, end, feature) && + parse_tag (pp, end, &feature->tag) && + parse_feature_indices (pp, end, feature) && + parse_feature_value_postfix (pp, end, feature) && + parse_space (pp, end) && + *pp == end; +} + +/** + * hb_feature_from_string: + * @str: (array length=len) (element-type uint8_t): a string to parse + * @len: length of @str, or -1 if string is %NULL terminated + * @feature: (out): the #hb_feature_t to initialize with the parsed values + * + * Parses a string into a #hb_feature_t. + * + * The format for specifying feature strings follows. All valid CSS + * font-feature-settings values other than 'normal' and the global values are + * also accepted, though not documented below. CSS string escapes are not + * supported. + * + * The range indices refer to the positions between Unicode characters. The + * position before the first character is always 0. + * + * The format is Python-esque. Here is how it all works: + * + * <informaltable pgwide='1' align='left' frame='none'> + * <tgroup cols='5'> + * <thead> + * <row><entry>Syntax</entry> <entry>Value</entry> <entry>Start</entry> <entry>End</entry></row> + * </thead> + * <tbody> + * <row><entry>Setting value:</entry></row> + * <row><entry>kern</entry> <entry>1</entry> <entry>0</entry> <entry>∞</entry> <entry>Turn feature on</entry></row> + * <row><entry>+kern</entry> <entry>1</entry> <entry>0</entry> <entry>∞</entry> <entry>Turn feature on</entry></row> + * <row><entry>-kern</entry> <entry>0</entry> <entry>0</entry> <entry>∞</entry> <entry>Turn feature off</entry></row> + * <row><entry>kern=0</entry> <entry>0</entry> <entry>0</entry> <entry>∞</entry> <entry>Turn feature off</entry></row> + * <row><entry>kern=1</entry> <entry>1</entry> <entry>0</entry> <entry>∞</entry> <entry>Turn feature on</entry></row> + * <row><entry>aalt=2</entry> <entry>2</entry> <entry>0</entry> <entry>∞</entry> <entry>Choose 2nd alternate</entry></row> + * <row><entry>Setting index:</entry></row> + * <row><entry>kern[]</entry> <entry>1</entry> <entry>0</entry> <entry>∞</entry> <entry>Turn feature on</entry></row> + * <row><entry>kern[:]</entry> <entry>1</entry> <entry>0</entry> <entry>∞</entry> <entry>Turn feature on</entry></row> + * <row><entry>kern[5:]</entry> <entry>1</entry> <entry>5</entry> <entry>∞</entry> <entry>Turn feature on, partial</entry></row> + * <row><entry>kern[:5]</entry> <entry>1</entry> <entry>0</entry> <entry>5</entry> <entry>Turn feature on, partial</entry></row> + * <row><entry>kern[3:5]</entry> <entry>1</entry> <entry>3</entry> <entry>5</entry> <entry>Turn feature on, range</entry></row> + * <row><entry>kern[3]</entry> <entry>1</entry> <entry>3</entry> <entry>3+1</entry> <entry>Turn feature on, single char</entry></row> + * <row><entry>Mixing it all:</entry></row> + * <row><entry>aalt[3:5]=2</entry> <entry>2</entry> <entry>3</entry> <entry>5</entry> <entry>Turn 2nd alternate on for range</entry></row> + * </tbody> + * </tgroup> + * </informaltable> + * + * Return value: + * %true if @str is successfully parsed, %false otherwise. + * + * Since: 0.9.5 + **/ +hb_bool_t +hb_feature_from_string (const char *str, int len, + hb_feature_t *feature) +{ + hb_feature_t feat; + + if (len < 0) + len = strlen (str); + + if (likely (parse_one_feature (&str, str + len, &feat))) + { + if (feature) + *feature = feat; + return true; + } + + if (feature) + memset (feature, 0, sizeof (*feature)); + return false; +} + +/** + * hb_feature_to_string: + * @feature: an #hb_feature_t to convert + * @buf: (array length=size) (out): output string + * @size: the allocated size of @buf + * + * Converts a #hb_feature_t into a %NULL-terminated string in the format + * understood by hb_feature_from_string(). The client in responsible for + * allocating big enough size for @buf, 128 bytes is more than enough. + * + * Since: 0.9.5 + **/ +void +hb_feature_to_string (hb_feature_t *feature, + char *buf, unsigned int size) +{ + if (unlikely (!size)) return; + + char s[128]; + unsigned int len = 0; + if (feature->value == 0) + s[len++] = '-'; + hb_tag_to_string (feature->tag, s + len); + len += 4; + while (len && s[len - 1] == ' ') + len--; + if (feature->start != HB_FEATURE_GLOBAL_START || feature->end != HB_FEATURE_GLOBAL_END) + { + s[len++] = '['; + if (feature->start) + len += hb_max (0, snprintf (s + len, ARRAY_LENGTH (s) - len, "%u", feature->start)); + if (feature->end != feature->start + 1) { + s[len++] = ':'; + if (feature->end != HB_FEATURE_GLOBAL_END) + len += hb_max (0, snprintf (s + len, ARRAY_LENGTH (s) - len, "%u", feature->end)); + } + s[len++] = ']'; + } + if (feature->value > 1) + { + s[len++] = '='; + len += hb_max (0, snprintf (s + len, ARRAY_LENGTH (s) - len, "%u", feature->value)); + } + assert (len < ARRAY_LENGTH (s)); + len = hb_min (len, size - 1); + memcpy (buf, s, len); + buf[len] = '\0'; +} + +/* hb_variation_t */ + +static bool +parse_variation_value (const char **pp, const char *end, hb_variation_t *variation) +{ + parse_char (pp, end, '='); /* Optional. */ + double v; + if (unlikely (!hb_parse_double (pp, end, &v))) return false; + + variation->value = v; + return true; +} + +static bool +parse_one_variation (const char **pp, const char *end, hb_variation_t *variation) +{ + return parse_tag (pp, end, &variation->tag) && + parse_variation_value (pp, end, variation) && + parse_space (pp, end) && + *pp == end; +} + +/** + * hb_variation_from_string: + * + * Since: 1.4.2 + */ +hb_bool_t +hb_variation_from_string (const char *str, int len, + hb_variation_t *variation) +{ + hb_variation_t var; + + if (len < 0) + len = strlen (str); + + if (likely (parse_one_variation (&str, str + len, &var))) + { + if (variation) + *variation = var; + return true; + } + + if (variation) + memset (variation, 0, sizeof (*variation)); + return false; +} + +/** + * hb_variation_to_string: + * + * Since: 1.4.2 + */ +void +hb_variation_to_string (hb_variation_t *variation, + char *buf, unsigned int size) +{ + if (unlikely (!size)) return; + + char s[128]; + unsigned int len = 0; + hb_tag_to_string (variation->tag, s + len); + len += 4; + while (len && s[len - 1] == ' ') + len--; + s[len++] = '='; + len += hb_max (0, snprintf (s + len, ARRAY_LENGTH (s) - len, "%g", (double) variation->value)); + + assert (len < ARRAY_LENGTH (s)); + len = hb_min (len, size - 1); + memcpy (buf, s, len); + buf[len] = '\0'; +} + +/** + * hb_color_get_alpha: + * color: a #hb_color_t we are interested in its channels. + * + * Return value: Alpha channel value of the given color + * + * Since: 2.1.0 + */ +uint8_t +(hb_color_get_alpha) (hb_color_t color) +{ + return hb_color_get_alpha (color); +} + +/** + * hb_color_get_red: + * color: a #hb_color_t we are interested in its channels. + * + * Return value: Red channel value of the given color + * + * Since: 2.1.0 + */ +uint8_t +(hb_color_get_red) (hb_color_t color) +{ + return hb_color_get_red (color); +} + +/** + * hb_color_get_green: + * color: a #hb_color_t we are interested in its channels. + * + * Return value: Green channel value of the given color + * + * Since: 2.1.0 + */ +uint8_t +(hb_color_get_green) (hb_color_t color) +{ + return hb_color_get_green (color); +} + +/** + * hb_color_get_blue: + * color: a #hb_color_t we are interested in its channels. + * + * Return value: Blue channel value of the given color + * + * Since: 2.1.0 + */ +uint8_t +(hb_color_get_blue) (hb_color_t color) +{ + return hb_color_get_blue (color); +} + + +/* If there is no visibility control, then hb-static.cc will NOT + * define anything. Instead, we get it to define one set in here + * only, so only libharfbuzz.so defines them, not other libs. */ +#ifdef HB_NO_VISIBILITY +#undef HB_NO_VISIBILITY +#include "hb-static.cc" +#define HB_NO_VISIBILITY 1 +#endif diff --git a/thirdparty/harfbuzz/src/hb-common.h b/thirdparty/harfbuzz/src/hb-common.h new file mode 100644 index 0000000000..a97a5f5a04 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-common.h @@ -0,0 +1,513 @@ +/* + * Copyright © 2007,2008,2009 Red Hat, Inc. + * Copyright © 2011,2012 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Red Hat Author(s): Behdad Esfahbod + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_H_IN +#error "Include <hb.h> instead." +#endif + +#ifndef HB_COMMON_H +#define HB_COMMON_H + +#ifndef HB_EXTERN +#define HB_EXTERN extern +#endif + +#ifndef HB_BEGIN_DECLS +# ifdef __cplusplus +# define HB_BEGIN_DECLS extern "C" { +# define HB_END_DECLS } +# else /* !__cplusplus */ +# define HB_BEGIN_DECLS +# define HB_END_DECLS +# endif /* !__cplusplus */ +#endif + +#if defined (_SVR4) || defined (SVR4) || defined (__OpenBSD__) || \ + defined (_sgi) || defined (__sun) || defined (sun) || \ + defined (__digital__) || defined (__HP_cc) +# include <inttypes.h> +#elif defined (_AIX) +# include <sys/inttypes.h> +#elif defined (_MSC_VER) && _MSC_VER < 1600 +/* VS 2010 (_MSC_VER 1600) has stdint.h */ +typedef __int8 int8_t; +typedef unsigned __int8 uint8_t; +typedef __int16 int16_t; +typedef unsigned __int16 uint16_t; +typedef __int32 int32_t; +typedef unsigned __int32 uint32_t; +typedef __int64 int64_t; +typedef unsigned __int64 uint64_t; +#elif defined (__KERNEL__) +# include <linux/types.h> +#else +# include <stdint.h> +#endif + +#if defined(__GNUC__) && ((__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) +#define HB_DEPRECATED __attribute__((__deprecated__)) +#elif defined(_MSC_VER) && (_MSC_VER >= 1300) +#define HB_DEPRECATED __declspec(deprecated) +#else +#define HB_DEPRECATED +#endif + +#if defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)) +#define HB_DEPRECATED_FOR(f) __attribute__((__deprecated__("Use '" #f "' instead"))) +#elif defined(_MSC_FULL_VER) && (_MSC_FULL_VER > 140050320) +#define HB_DEPRECATED_FOR(f) __declspec(deprecated("is deprecated. Use '" #f "' instead")) +#else +#define HB_DEPRECATED_FOR(f) HB_DEPRECATED +#endif + + +HB_BEGIN_DECLS + + +typedef int hb_bool_t; + +typedef uint32_t hb_codepoint_t; +typedef int32_t hb_position_t; +typedef uint32_t hb_mask_t; + +typedef union _hb_var_int_t { + uint32_t u32; + int32_t i32; + uint16_t u16[2]; + int16_t i16[2]; + uint8_t u8[4]; + int8_t i8[4]; +} hb_var_int_t; + + +/* hb_tag_t */ + +typedef uint32_t hb_tag_t; + +#define HB_TAG(c1,c2,c3,c4) ((hb_tag_t)((((uint32_t)(c1)&0xFF)<<24)|(((uint32_t)(c2)&0xFF)<<16)|(((uint32_t)(c3)&0xFF)<<8)|((uint32_t)(c4)&0xFF))) +#define HB_UNTAG(tag) (uint8_t)(((tag)>>24)&0xFF), (uint8_t)(((tag)>>16)&0xFF), (uint8_t)(((tag)>>8)&0xFF), (uint8_t)((tag)&0xFF) + +#define HB_TAG_NONE HB_TAG(0,0,0,0) +#define HB_TAG_MAX HB_TAG(0xff,0xff,0xff,0xff) +#define HB_TAG_MAX_SIGNED HB_TAG(0x7f,0xff,0xff,0xff) + +/* len=-1 means str is NUL-terminated. */ +HB_EXTERN hb_tag_t +hb_tag_from_string (const char *str, int len); + +/* buf should have 4 bytes. */ +HB_EXTERN void +hb_tag_to_string (hb_tag_t tag, char *buf); + + +/** + * hb_direction_t: + * @HB_DIRECTION_INVALID: Initial, unset direction. + * @HB_DIRECTION_LTR: Text is set horizontally from left to right. + * @HB_DIRECTION_RTL: Text is set horizontally from right to left. + * @HB_DIRECTION_TTB: Text is set vertically from top to bottom. + * @HB_DIRECTION_BTT: Text is set vertically from bottom to top. + */ +typedef enum { + HB_DIRECTION_INVALID = 0, + HB_DIRECTION_LTR = 4, + HB_DIRECTION_RTL, + HB_DIRECTION_TTB, + HB_DIRECTION_BTT +} hb_direction_t; + +/* len=-1 means str is NUL-terminated */ +HB_EXTERN hb_direction_t +hb_direction_from_string (const char *str, int len); + +HB_EXTERN const char * +hb_direction_to_string (hb_direction_t direction); + +#define HB_DIRECTION_IS_VALID(dir) ((((unsigned int) (dir)) & ~3U) == 4) +/* Direction must be valid for the following */ +#define HB_DIRECTION_IS_HORIZONTAL(dir) ((((unsigned int) (dir)) & ~1U) == 4) +#define HB_DIRECTION_IS_VERTICAL(dir) ((((unsigned int) (dir)) & ~1U) == 6) +#define HB_DIRECTION_IS_FORWARD(dir) ((((unsigned int) (dir)) & ~2U) == 4) +#define HB_DIRECTION_IS_BACKWARD(dir) ((((unsigned int) (dir)) & ~2U) == 5) +#define HB_DIRECTION_REVERSE(dir) ((hb_direction_t) (((unsigned int) (dir)) ^ 1)) + + +/* hb_language_t */ + +typedef const struct hb_language_impl_t *hb_language_t; + +HB_EXTERN hb_language_t +hb_language_from_string (const char *str, int len); + +HB_EXTERN const char * +hb_language_to_string (hb_language_t language); + +#define HB_LANGUAGE_INVALID ((hb_language_t) 0) + +HB_EXTERN hb_language_t +hb_language_get_default (void); + + +/* hb_script_t */ + +/* https://unicode.org/iso15924/ */ +/* https://docs.google.com/spreadsheets/d/1Y90M0Ie3MUJ6UVCRDOypOtijlMDLNNyyLk36T6iMu0o */ +/* Unicode Character Database property: Script (sc) */ +typedef enum +{ + /*1.1*/ HB_SCRIPT_COMMON = HB_TAG ('Z','y','y','y'), + /*1.1*/ HB_SCRIPT_INHERITED = HB_TAG ('Z','i','n','h'), + /*5.0*/ HB_SCRIPT_UNKNOWN = HB_TAG ('Z','z','z','z'), + + /*1.1*/ HB_SCRIPT_ARABIC = HB_TAG ('A','r','a','b'), + /*1.1*/ HB_SCRIPT_ARMENIAN = HB_TAG ('A','r','m','n'), + /*1.1*/ HB_SCRIPT_BENGALI = HB_TAG ('B','e','n','g'), + /*1.1*/ HB_SCRIPT_CYRILLIC = HB_TAG ('C','y','r','l'), + /*1.1*/ HB_SCRIPT_DEVANAGARI = HB_TAG ('D','e','v','a'), + /*1.1*/ HB_SCRIPT_GEORGIAN = HB_TAG ('G','e','o','r'), + /*1.1*/ HB_SCRIPT_GREEK = HB_TAG ('G','r','e','k'), + /*1.1*/ HB_SCRIPT_GUJARATI = HB_TAG ('G','u','j','r'), + /*1.1*/ HB_SCRIPT_GURMUKHI = HB_TAG ('G','u','r','u'), + /*1.1*/ HB_SCRIPT_HANGUL = HB_TAG ('H','a','n','g'), + /*1.1*/ HB_SCRIPT_HAN = HB_TAG ('H','a','n','i'), + /*1.1*/ HB_SCRIPT_HEBREW = HB_TAG ('H','e','b','r'), + /*1.1*/ HB_SCRIPT_HIRAGANA = HB_TAG ('H','i','r','a'), + /*1.1*/ HB_SCRIPT_KANNADA = HB_TAG ('K','n','d','a'), + /*1.1*/ HB_SCRIPT_KATAKANA = HB_TAG ('K','a','n','a'), + /*1.1*/ HB_SCRIPT_LAO = HB_TAG ('L','a','o','o'), + /*1.1*/ HB_SCRIPT_LATIN = HB_TAG ('L','a','t','n'), + /*1.1*/ HB_SCRIPT_MALAYALAM = HB_TAG ('M','l','y','m'), + /*1.1*/ HB_SCRIPT_ORIYA = HB_TAG ('O','r','y','a'), + /*1.1*/ HB_SCRIPT_TAMIL = HB_TAG ('T','a','m','l'), + /*1.1*/ HB_SCRIPT_TELUGU = HB_TAG ('T','e','l','u'), + /*1.1*/ HB_SCRIPT_THAI = HB_TAG ('T','h','a','i'), + + /*2.0*/ HB_SCRIPT_TIBETAN = HB_TAG ('T','i','b','t'), + + /*3.0*/ HB_SCRIPT_BOPOMOFO = HB_TAG ('B','o','p','o'), + /*3.0*/ HB_SCRIPT_BRAILLE = HB_TAG ('B','r','a','i'), + /*3.0*/ HB_SCRIPT_CANADIAN_SYLLABICS = HB_TAG ('C','a','n','s'), + /*3.0*/ HB_SCRIPT_CHEROKEE = HB_TAG ('C','h','e','r'), + /*3.0*/ HB_SCRIPT_ETHIOPIC = HB_TAG ('E','t','h','i'), + /*3.0*/ HB_SCRIPT_KHMER = HB_TAG ('K','h','m','r'), + /*3.0*/ HB_SCRIPT_MONGOLIAN = HB_TAG ('M','o','n','g'), + /*3.0*/ HB_SCRIPT_MYANMAR = HB_TAG ('M','y','m','r'), + /*3.0*/ HB_SCRIPT_OGHAM = HB_TAG ('O','g','a','m'), + /*3.0*/ HB_SCRIPT_RUNIC = HB_TAG ('R','u','n','r'), + /*3.0*/ HB_SCRIPT_SINHALA = HB_TAG ('S','i','n','h'), + /*3.0*/ HB_SCRIPT_SYRIAC = HB_TAG ('S','y','r','c'), + /*3.0*/ HB_SCRIPT_THAANA = HB_TAG ('T','h','a','a'), + /*3.0*/ HB_SCRIPT_YI = HB_TAG ('Y','i','i','i'), + + /*3.1*/ HB_SCRIPT_DESERET = HB_TAG ('D','s','r','t'), + /*3.1*/ HB_SCRIPT_GOTHIC = HB_TAG ('G','o','t','h'), + /*3.1*/ HB_SCRIPT_OLD_ITALIC = HB_TAG ('I','t','a','l'), + + /*3.2*/ HB_SCRIPT_BUHID = HB_TAG ('B','u','h','d'), + /*3.2*/ HB_SCRIPT_HANUNOO = HB_TAG ('H','a','n','o'), + /*3.2*/ HB_SCRIPT_TAGALOG = HB_TAG ('T','g','l','g'), + /*3.2*/ HB_SCRIPT_TAGBANWA = HB_TAG ('T','a','g','b'), + + /*4.0*/ HB_SCRIPT_CYPRIOT = HB_TAG ('C','p','r','t'), + /*4.0*/ HB_SCRIPT_LIMBU = HB_TAG ('L','i','m','b'), + /*4.0*/ HB_SCRIPT_LINEAR_B = HB_TAG ('L','i','n','b'), + /*4.0*/ HB_SCRIPT_OSMANYA = HB_TAG ('O','s','m','a'), + /*4.0*/ HB_SCRIPT_SHAVIAN = HB_TAG ('S','h','a','w'), + /*4.0*/ HB_SCRIPT_TAI_LE = HB_TAG ('T','a','l','e'), + /*4.0*/ HB_SCRIPT_UGARITIC = HB_TAG ('U','g','a','r'), + + /*4.1*/ HB_SCRIPT_BUGINESE = HB_TAG ('B','u','g','i'), + /*4.1*/ HB_SCRIPT_COPTIC = HB_TAG ('C','o','p','t'), + /*4.1*/ HB_SCRIPT_GLAGOLITIC = HB_TAG ('G','l','a','g'), + /*4.1*/ HB_SCRIPT_KHAROSHTHI = HB_TAG ('K','h','a','r'), + /*4.1*/ HB_SCRIPT_NEW_TAI_LUE = HB_TAG ('T','a','l','u'), + /*4.1*/ HB_SCRIPT_OLD_PERSIAN = HB_TAG ('X','p','e','o'), + /*4.1*/ HB_SCRIPT_SYLOTI_NAGRI = HB_TAG ('S','y','l','o'), + /*4.1*/ HB_SCRIPT_TIFINAGH = HB_TAG ('T','f','n','g'), + + /*5.0*/ HB_SCRIPT_BALINESE = HB_TAG ('B','a','l','i'), + /*5.0*/ HB_SCRIPT_CUNEIFORM = HB_TAG ('X','s','u','x'), + /*5.0*/ HB_SCRIPT_NKO = HB_TAG ('N','k','o','o'), + /*5.0*/ HB_SCRIPT_PHAGS_PA = HB_TAG ('P','h','a','g'), + /*5.0*/ HB_SCRIPT_PHOENICIAN = HB_TAG ('P','h','n','x'), + + /*5.1*/ HB_SCRIPT_CARIAN = HB_TAG ('C','a','r','i'), + /*5.1*/ HB_SCRIPT_CHAM = HB_TAG ('C','h','a','m'), + /*5.1*/ HB_SCRIPT_KAYAH_LI = HB_TAG ('K','a','l','i'), + /*5.1*/ HB_SCRIPT_LEPCHA = HB_TAG ('L','e','p','c'), + /*5.1*/ HB_SCRIPT_LYCIAN = HB_TAG ('L','y','c','i'), + /*5.1*/ HB_SCRIPT_LYDIAN = HB_TAG ('L','y','d','i'), + /*5.1*/ HB_SCRIPT_OL_CHIKI = HB_TAG ('O','l','c','k'), + /*5.1*/ HB_SCRIPT_REJANG = HB_TAG ('R','j','n','g'), + /*5.1*/ HB_SCRIPT_SAURASHTRA = HB_TAG ('S','a','u','r'), + /*5.1*/ HB_SCRIPT_SUNDANESE = HB_TAG ('S','u','n','d'), + /*5.1*/ HB_SCRIPT_VAI = HB_TAG ('V','a','i','i'), + + /*5.2*/ HB_SCRIPT_AVESTAN = HB_TAG ('A','v','s','t'), + /*5.2*/ HB_SCRIPT_BAMUM = HB_TAG ('B','a','m','u'), + /*5.2*/ HB_SCRIPT_EGYPTIAN_HIEROGLYPHS = HB_TAG ('E','g','y','p'), + /*5.2*/ HB_SCRIPT_IMPERIAL_ARAMAIC = HB_TAG ('A','r','m','i'), + /*5.2*/ HB_SCRIPT_INSCRIPTIONAL_PAHLAVI = HB_TAG ('P','h','l','i'), + /*5.2*/ HB_SCRIPT_INSCRIPTIONAL_PARTHIAN = HB_TAG ('P','r','t','i'), + /*5.2*/ HB_SCRIPT_JAVANESE = HB_TAG ('J','a','v','a'), + /*5.2*/ HB_SCRIPT_KAITHI = HB_TAG ('K','t','h','i'), + /*5.2*/ HB_SCRIPT_LISU = HB_TAG ('L','i','s','u'), + /*5.2*/ HB_SCRIPT_MEETEI_MAYEK = HB_TAG ('M','t','e','i'), + /*5.2*/ HB_SCRIPT_OLD_SOUTH_ARABIAN = HB_TAG ('S','a','r','b'), + /*5.2*/ HB_SCRIPT_OLD_TURKIC = HB_TAG ('O','r','k','h'), + /*5.2*/ HB_SCRIPT_SAMARITAN = HB_TAG ('S','a','m','r'), + /*5.2*/ HB_SCRIPT_TAI_THAM = HB_TAG ('L','a','n','a'), + /*5.2*/ HB_SCRIPT_TAI_VIET = HB_TAG ('T','a','v','t'), + + /*6.0*/ HB_SCRIPT_BATAK = HB_TAG ('B','a','t','k'), + /*6.0*/ HB_SCRIPT_BRAHMI = HB_TAG ('B','r','a','h'), + /*6.0*/ HB_SCRIPT_MANDAIC = HB_TAG ('M','a','n','d'), + + /*6.1*/ HB_SCRIPT_CHAKMA = HB_TAG ('C','a','k','m'), + /*6.1*/ HB_SCRIPT_MEROITIC_CURSIVE = HB_TAG ('M','e','r','c'), + /*6.1*/ HB_SCRIPT_MEROITIC_HIEROGLYPHS = HB_TAG ('M','e','r','o'), + /*6.1*/ HB_SCRIPT_MIAO = HB_TAG ('P','l','r','d'), + /*6.1*/ HB_SCRIPT_SHARADA = HB_TAG ('S','h','r','d'), + /*6.1*/ HB_SCRIPT_SORA_SOMPENG = HB_TAG ('S','o','r','a'), + /*6.1*/ HB_SCRIPT_TAKRI = HB_TAG ('T','a','k','r'), + + /* + * Since: 0.9.30 + */ + /*7.0*/ HB_SCRIPT_BASSA_VAH = HB_TAG ('B','a','s','s'), + /*7.0*/ HB_SCRIPT_CAUCASIAN_ALBANIAN = HB_TAG ('A','g','h','b'), + /*7.0*/ HB_SCRIPT_DUPLOYAN = HB_TAG ('D','u','p','l'), + /*7.0*/ HB_SCRIPT_ELBASAN = HB_TAG ('E','l','b','a'), + /*7.0*/ HB_SCRIPT_GRANTHA = HB_TAG ('G','r','a','n'), + /*7.0*/ HB_SCRIPT_KHOJKI = HB_TAG ('K','h','o','j'), + /*7.0*/ HB_SCRIPT_KHUDAWADI = HB_TAG ('S','i','n','d'), + /*7.0*/ HB_SCRIPT_LINEAR_A = HB_TAG ('L','i','n','a'), + /*7.0*/ HB_SCRIPT_MAHAJANI = HB_TAG ('M','a','h','j'), + /*7.0*/ HB_SCRIPT_MANICHAEAN = HB_TAG ('M','a','n','i'), + /*7.0*/ HB_SCRIPT_MENDE_KIKAKUI = HB_TAG ('M','e','n','d'), + /*7.0*/ HB_SCRIPT_MODI = HB_TAG ('M','o','d','i'), + /*7.0*/ HB_SCRIPT_MRO = HB_TAG ('M','r','o','o'), + /*7.0*/ HB_SCRIPT_NABATAEAN = HB_TAG ('N','b','a','t'), + /*7.0*/ HB_SCRIPT_OLD_NORTH_ARABIAN = HB_TAG ('N','a','r','b'), + /*7.0*/ HB_SCRIPT_OLD_PERMIC = HB_TAG ('P','e','r','m'), + /*7.0*/ HB_SCRIPT_PAHAWH_HMONG = HB_TAG ('H','m','n','g'), + /*7.0*/ HB_SCRIPT_PALMYRENE = HB_TAG ('P','a','l','m'), + /*7.0*/ HB_SCRIPT_PAU_CIN_HAU = HB_TAG ('P','a','u','c'), + /*7.0*/ HB_SCRIPT_PSALTER_PAHLAVI = HB_TAG ('P','h','l','p'), + /*7.0*/ HB_SCRIPT_SIDDHAM = HB_TAG ('S','i','d','d'), + /*7.0*/ HB_SCRIPT_TIRHUTA = HB_TAG ('T','i','r','h'), + /*7.0*/ HB_SCRIPT_WARANG_CITI = HB_TAG ('W','a','r','a'), + + /*8.0*/ HB_SCRIPT_AHOM = HB_TAG ('A','h','o','m'), + /*8.0*/ HB_SCRIPT_ANATOLIAN_HIEROGLYPHS = HB_TAG ('H','l','u','w'), + /*8.0*/ HB_SCRIPT_HATRAN = HB_TAG ('H','a','t','r'), + /*8.0*/ HB_SCRIPT_MULTANI = HB_TAG ('M','u','l','t'), + /*8.0*/ HB_SCRIPT_OLD_HUNGARIAN = HB_TAG ('H','u','n','g'), + /*8.0*/ HB_SCRIPT_SIGNWRITING = HB_TAG ('S','g','n','w'), + + /* + * Since 1.3.0 + */ + /*9.0*/ HB_SCRIPT_ADLAM = HB_TAG ('A','d','l','m'), + /*9.0*/ HB_SCRIPT_BHAIKSUKI = HB_TAG ('B','h','k','s'), + /*9.0*/ HB_SCRIPT_MARCHEN = HB_TAG ('M','a','r','c'), + /*9.0*/ HB_SCRIPT_OSAGE = HB_TAG ('O','s','g','e'), + /*9.0*/ HB_SCRIPT_TANGUT = HB_TAG ('T','a','n','g'), + /*9.0*/ HB_SCRIPT_NEWA = HB_TAG ('N','e','w','a'), + + /* + * Since 1.6.0 + */ + /*10.0*/HB_SCRIPT_MASARAM_GONDI = HB_TAG ('G','o','n','m'), + /*10.0*/HB_SCRIPT_NUSHU = HB_TAG ('N','s','h','u'), + /*10.0*/HB_SCRIPT_SOYOMBO = HB_TAG ('S','o','y','o'), + /*10.0*/HB_SCRIPT_ZANABAZAR_SQUARE = HB_TAG ('Z','a','n','b'), + + /* + * Since 1.8.0 + */ + /*11.0*/HB_SCRIPT_DOGRA = HB_TAG ('D','o','g','r'), + /*11.0*/HB_SCRIPT_GUNJALA_GONDI = HB_TAG ('G','o','n','g'), + /*11.0*/HB_SCRIPT_HANIFI_ROHINGYA = HB_TAG ('R','o','h','g'), + /*11.0*/HB_SCRIPT_MAKASAR = HB_TAG ('M','a','k','a'), + /*11.0*/HB_SCRIPT_MEDEFAIDRIN = HB_TAG ('M','e','d','f'), + /*11.0*/HB_SCRIPT_OLD_SOGDIAN = HB_TAG ('S','o','g','o'), + /*11.0*/HB_SCRIPT_SOGDIAN = HB_TAG ('S','o','g','d'), + + /* + * Since 2.4.0 + */ + /*12.0*/HB_SCRIPT_ELYMAIC = HB_TAG ('E','l','y','m'), + /*12.0*/HB_SCRIPT_NANDINAGARI = HB_TAG ('N','a','n','d'), + /*12.0*/HB_SCRIPT_NYIAKENG_PUACHUE_HMONG = HB_TAG ('H','m','n','p'), + /*12.0*/HB_SCRIPT_WANCHO = HB_TAG ('W','c','h','o'), + + /* + * Since 2.6.7 + */ + /*13.0*/HB_SCRIPT_CHORASMIAN = HB_TAG ('C','h','r','s'), + /*13.0*/HB_SCRIPT_DIVES_AKURU = HB_TAG ('D','i','a','k'), + /*13.0*/HB_SCRIPT_KHITAN_SMALL_SCRIPT = HB_TAG ('K','i','t','s'), + /*13.0*/HB_SCRIPT_YEZIDI = HB_TAG ('Y','e','z','i'), + + /* No script set. */ + HB_SCRIPT_INVALID = HB_TAG_NONE, + + /* Dummy values to ensure any hb_tag_t value can be passed/stored as hb_script_t + * without risking undefined behavior. We have two, for historical reasons. + * HB_TAG_MAX used to be unsigned, but that was invalid Ansi C, so was changed + * to _HB_SCRIPT_MAX_VALUE to be equal to HB_TAG_MAX_SIGNED as well. + * + * See this thread for technicalities: + * + * https://lists.freedesktop.org/archives/harfbuzz/2014-March/004150.html + */ + _HB_SCRIPT_MAX_VALUE = HB_TAG_MAX_SIGNED, /*< skip >*/ + _HB_SCRIPT_MAX_VALUE_SIGNED = HB_TAG_MAX_SIGNED /*< skip >*/ + +} hb_script_t; + + +/* Script functions */ + +HB_EXTERN hb_script_t +hb_script_from_iso15924_tag (hb_tag_t tag); + +HB_EXTERN hb_script_t +hb_script_from_string (const char *str, int len); + +HB_EXTERN hb_tag_t +hb_script_to_iso15924_tag (hb_script_t script); + +HB_EXTERN hb_direction_t +hb_script_get_horizontal_direction (hb_script_t script); + + +/* User data */ + +typedef struct hb_user_data_key_t { + /*< private >*/ + char unused; +} hb_user_data_key_t; + +typedef void (*hb_destroy_func_t) (void *user_data); + + +/* Font features and variations. */ + +/** + * HB_FEATURE_GLOBAL_START + * + * Since: 2.0.0 + */ +#define HB_FEATURE_GLOBAL_START 0 +/** + * HB_FEATURE_GLOBAL_END + * + * Since: 2.0.0 + */ +#define HB_FEATURE_GLOBAL_END ((unsigned int) -1) + +/** + * hb_feature_t: + * @tag: a feature tag + * @value: 0 disables the feature, non-zero (usually 1) enables the feature. + * For features implemented as lookup type 3 (like 'salt') the @value is a one + * based index into the alternates. + * @start: the cluster to start applying this feature setting (inclusive). + * @end: the cluster to end applying this feature setting (exclusive). + * + * The #hb_feature_t is the structure that holds information about requested + * feature application. The feature will be applied with the given value to all + * glyphs which are in clusters between @start (inclusive) and @end (exclusive). + * Setting start to @HB_FEATURE_GLOBAL_START and end to @HB_FEATURE_GLOBAL_END + * specifies that the feature always applies to the entire buffer. + */ +typedef struct hb_feature_t { + hb_tag_t tag; + uint32_t value; + unsigned int start; + unsigned int end; +} hb_feature_t; + +HB_EXTERN hb_bool_t +hb_feature_from_string (const char *str, int len, + hb_feature_t *feature); + +HB_EXTERN void +hb_feature_to_string (hb_feature_t *feature, + char *buf, unsigned int size); + +/** + * hb_variation_t: + * + * Since: 1.4.2 + */ +typedef struct hb_variation_t { + hb_tag_t tag; + float value; +} hb_variation_t; + +HB_EXTERN hb_bool_t +hb_variation_from_string (const char *str, int len, + hb_variation_t *variation); + +HB_EXTERN void +hb_variation_to_string (hb_variation_t *variation, + char *buf, unsigned int size); + +/** + * hb_color_t: + * + * Data type for holding color values. + * + * Since: 2.1.0 + */ +typedef uint32_t hb_color_t; + +#define HB_COLOR(b,g,r,a) ((hb_color_t) HB_TAG ((b),(g),(r),(a))) + +HB_EXTERN uint8_t +hb_color_get_alpha (hb_color_t color); +#define hb_color_get_alpha(color) ((color) & 0xFF) + +HB_EXTERN uint8_t +hb_color_get_red (hb_color_t color); +#define hb_color_get_red(color) (((color) >> 8) & 0xFF) + +HB_EXTERN uint8_t +hb_color_get_green (hb_color_t color); +#define hb_color_get_green(color) (((color) >> 16) & 0xFF) + +HB_EXTERN uint8_t +hb_color_get_blue (hb_color_t color); +#define hb_color_get_blue(color) (((color) >> 24) & 0xFF) + +HB_END_DECLS + +#endif /* HB_COMMON_H */ diff --git a/thirdparty/harfbuzz/src/hb-config.hh b/thirdparty/harfbuzz/src/hb-config.hh new file mode 100644 index 0000000000..fc8d424bfb --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-config.hh @@ -0,0 +1,163 @@ +/* + * Copyright © 2019 Facebook, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Facebook Author(s): Behdad Esfahbod + */ + +#ifndef HB_CONFIG_HH +#define HB_CONFIG_HH + +#if 0 /* Make test happy. */ +#include "hb.hh" +#endif + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + + +#ifdef HB_TINY +#define HB_LEAN +#define HB_MINI +#define HB_NO_MT +#define HB_NO_UCD_UNASSIGNED +#ifndef NDEBUG +#define NDEBUG +#endif +#ifndef __OPTIMIZE_SIZE__ +#define __OPTIMIZE_SIZE__ +#endif +#endif + +#ifdef HB_LEAN +#define HB_DISABLE_DEPRECATED +#define HB_NDEBUG +#define HB_NO_ATEXIT +#define HB_NO_BUFFER_MESSAGE +#define HB_NO_BUFFER_SERIALIZE +#define HB_NO_BITMAP +#define HB_NO_CFF +#define HB_NO_COLOR +#define HB_NO_DRAW +#define HB_NO_ERRNO +#define HB_NO_FACE_COLLECT_UNICODES +#define HB_NO_GETENV +#define HB_NO_HINTING +#define HB_NO_LANGUAGE_PRIVATE_SUBTAG +#define HB_NO_LAYOUT_FEATURE_PARAMS +#define HB_NO_LAYOUT_COLLECT_GLYPHS +#define HB_NO_LAYOUT_UNUSED +#define HB_NO_MATH +#define HB_NO_META +#define HB_NO_METRICS +#define HB_NO_MMAP +#define HB_NO_NAME +#define HB_NO_OPEN +#define HB_NO_SETLOCALE +#define HB_NO_OT_FONT_GLYPH_NAMES +#define HB_NO_OT_SHAPE_FRACTIONS +#define HB_NO_STYLE +#define HB_NO_SUBSET_LAYOUT +#define HB_NO_VAR +#endif + +#ifdef HB_MINI +#define HB_NO_AAT +#define HB_NO_LEGACY +#endif + + +/* Closure of options. */ + +#ifdef HB_DISABLE_DEPRECATED +#define HB_IF_NOT_DEPRECATED(x) +#else +#define HB_IF_NOT_DEPRECATED(x) x +#endif + +#ifdef HB_NO_AAT +#define HB_NO_OT_NAME_LANGUAGE_AAT +#define HB_NO_AAT_SHAPE +#endif + +#ifdef HB_NO_BITMAP +#define HB_NO_OT_FONT_BITMAP +#endif + +#ifdef HB_NO_CFF +#define HB_NO_OT_FONT_CFF +#define HB_NO_SUBSET_CFF +#endif + +#ifdef HB_NO_GETENV +#define HB_NO_UNISCRIBE_BUG_COMPATIBLE +#endif + +#ifdef HB_NO_LEGACY +#define HB_NO_CMAP_LEGACY_SUBTABLES +#define HB_NO_FALLBACK_SHAPE +#define HB_NO_OT_KERN +#define HB_NO_OT_LAYOUT_BLACKLIST +#define HB_NO_OT_SHAPE_FALLBACK +#endif + +#ifdef HB_NO_NAME +#define HB_NO_OT_NAME_LANGUAGE +#endif + +#ifdef HB_NO_OT +#define HB_NO_OT_FONT +#define HB_NO_OT_LAYOUT +#define HB_NO_OT_TAG +#define HB_NO_OT_SHAPE +#endif + +#ifdef HB_NO_OT_SHAPE +#define HB_NO_AAT_SHAPE +#endif + +#ifdef HB_NO_OT_SHAPE_FALLBACK +#define HB_NO_OT_SHAPE_COMPLEX_ARABIC_FALLBACK +#define HB_NO_OT_SHAPE_COMPLEX_HEBREW_FALLBACK +#define HB_NO_OT_SHAPE_COMPLEX_THAI_FALLBACK +#define HB_NO_OT_SHAPE_COMPLEX_VOWEL_CONSTRAINTS +#endif + +#ifdef NDEBUG +#ifndef HB_NDEBUG +#define HB_NDEBUG +#endif +#endif + +#ifdef __OPTIMIZE_SIZE__ +#ifndef HB_OPTIMIZE_SIZE +#define HB_OPTIMIZE_SIZE +#endif +#endif + +#ifdef HAVE_CONFIG_OVERRIDE_H +#include "config-override.h" +#endif + + +#endif /* HB_CONFIG_HH */ diff --git a/thirdparty/harfbuzz/src/hb-coretext.cc b/thirdparty/harfbuzz/src/hb-coretext.cc new file mode 100644 index 0000000000..7b6b2bd5ef --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-coretext.cc @@ -0,0 +1,1194 @@ +/* + * Copyright © 2012,2013 Mozilla Foundation. + * Copyright © 2012,2013 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Mozilla Author(s): Jonathan Kew + * Google Author(s): Behdad Esfahbod + */ + +#include "hb.hh" + +#ifdef HAVE_CORETEXT + +#include "hb-shaper-impl.hh" + +#include "hb-coretext.h" +#include "hb-aat-layout.hh" +#include <math.h> + + +/** + * SECTION:hb-coretext + * @title: hb-coretext + * @short_description: CoreText integration + * @include: hb-coretext.h + * + * Functions for using HarfBuzz with the CoreText fonts. + **/ + +/* https://developer.apple.com/documentation/coretext/1508745-ctfontcreatewithgraphicsfont */ +#define HB_CORETEXT_DEFAULT_FONT_SIZE 12.f + +static void +release_table_data (void *user_data) +{ + CFDataRef cf_data = reinterpret_cast<CFDataRef> (user_data); + CFRelease(cf_data); +} + +static hb_blob_t * +_hb_cg_reference_table (hb_face_t *face HB_UNUSED, hb_tag_t tag, void *user_data) +{ + CGFontRef cg_font = reinterpret_cast<CGFontRef> (user_data); + CFDataRef cf_data = CGFontCopyTableForTag (cg_font, tag); + if (unlikely (!cf_data)) + return nullptr; + + const char *data = reinterpret_cast<const char*> (CFDataGetBytePtr (cf_data)); + const size_t length = CFDataGetLength (cf_data); + if (!data || !length) + { + CFRelease (cf_data); + return nullptr; + } + + return hb_blob_create (data, length, HB_MEMORY_MODE_READONLY, + reinterpret_cast<void *> (const_cast<__CFData *> (cf_data)), + release_table_data); +} + +static void +_hb_cg_font_release (void *data) +{ + CGFontRelease ((CGFontRef) data); +} + + +static CTFontDescriptorRef +get_last_resort_font_desc () +{ + // TODO Handle allocation failures? + CTFontDescriptorRef last_resort = CTFontDescriptorCreateWithNameAndSize (CFSTR("LastResort"), 0); + CFArrayRef cascade_list = CFArrayCreate (kCFAllocatorDefault, + (const void **) &last_resort, + 1, + &kCFTypeArrayCallBacks); + CFRelease (last_resort); + CFDictionaryRef attributes = CFDictionaryCreate (kCFAllocatorDefault, + (const void **) &kCTFontCascadeListAttribute, + (const void **) &cascade_list, + 1, + &kCFTypeDictionaryKeyCallBacks, + &kCFTypeDictionaryValueCallBacks); + CFRelease (cascade_list); + + CTFontDescriptorRef font_desc = CTFontDescriptorCreateWithAttributes (attributes); + CFRelease (attributes); + return font_desc; +} + +static void +release_data (void *info, const void *data, size_t size) +{ + assert (hb_blob_get_length ((hb_blob_t *) info) == size && + hb_blob_get_data ((hb_blob_t *) info, nullptr) == data); + + hb_blob_destroy ((hb_blob_t *) info); +} + +static CGFontRef +create_cg_font (hb_face_t *face) +{ + CGFontRef cg_font = nullptr; + if (face->destroy == _hb_cg_font_release) + { + cg_font = CGFontRetain ((CGFontRef) face->user_data); + } + else + { + hb_blob_t *blob = hb_face_reference_blob (face); + unsigned int blob_length; + const char *blob_data = hb_blob_get_data (blob, &blob_length); + if (unlikely (!blob_length)) + DEBUG_MSG (CORETEXT, face, "Face has empty blob"); + + CGDataProviderRef provider = CGDataProviderCreateWithData (blob, blob_data, blob_length, &release_data); + if (likely (provider)) + { + cg_font = CGFontCreateWithDataProvider (provider); + if (unlikely (!cg_font)) + DEBUG_MSG (CORETEXT, face, "Face CGFontCreateWithDataProvider() failed"); + CGDataProviderRelease (provider); + } + } + return cg_font; +} + +static CTFontRef +create_ct_font (CGFontRef cg_font, CGFloat font_size) +{ + CTFontRef ct_font = nullptr; + + /* CoreText does not enable trak table usage / tracking when creating a CTFont + * using CTFontCreateWithGraphicsFont. The only way of enabling tracking seems + * to be through the CTFontCreateUIFontForLanguage call. */ + CFStringRef cg_postscript_name = CGFontCopyPostScriptName (cg_font); + if (CFStringHasPrefix (cg_postscript_name, CFSTR (".SFNSText")) || + CFStringHasPrefix (cg_postscript_name, CFSTR (".SFNSDisplay"))) + { +#if !(defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE) && MAC_OS_X_VERSION_MIN_REQUIRED < 1080 +# define kCTFontUIFontSystem kCTFontSystemFontType +# define kCTFontUIFontEmphasizedSystem kCTFontEmphasizedSystemFontType +#endif + CTFontUIFontType font_type = kCTFontUIFontSystem; + if (CFStringHasSuffix (cg_postscript_name, CFSTR ("-Bold"))) + font_type = kCTFontUIFontEmphasizedSystem; + + ct_font = CTFontCreateUIFontForLanguage (font_type, font_size, nullptr); + CFStringRef ct_result_name = CTFontCopyPostScriptName(ct_font); + if (CFStringCompare (ct_result_name, cg_postscript_name, 0) != kCFCompareEqualTo) + { + CFRelease(ct_font); + ct_font = nullptr; + } + CFRelease (ct_result_name); + } + CFRelease (cg_postscript_name); + + if (!ct_font) + ct_font = CTFontCreateWithGraphicsFont (cg_font, font_size, nullptr, nullptr); + + if (unlikely (!ct_font)) { + DEBUG_MSG (CORETEXT, cg_font, "Font CTFontCreateWithGraphicsFont() failed"); + return nullptr; + } + + /* crbug.com/576941 and crbug.com/625902 and the investigation in the latter + * bug indicate that the cascade list reconfiguration occasionally causes + * crashes in CoreText on OS X 10.9, thus let's skip this step on older + * operating system versions. Except for the emoji font, where _not_ + * reconfiguring the cascade list causes CoreText crashes. For details, see + * crbug.com/549610 */ + // 0x00070000 stands for "kCTVersionNumber10_10", see CoreText.h + if (&CTGetCoreTextVersion != nullptr && CTGetCoreTextVersion() < 0x00070000) { + CFStringRef fontName = CTFontCopyPostScriptName (ct_font); + bool isEmojiFont = CFStringCompare (fontName, CFSTR("AppleColorEmoji"), 0) == kCFCompareEqualTo; + CFRelease (fontName); + if (!isEmojiFont) + return ct_font; + } + + CFURLRef original_url = nullptr; +#if !(defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE) && MAC_OS_X_VERSION_MIN_REQUIRED < 1060 + ATSFontRef atsFont; + FSRef fsref; + OSStatus status; + atsFont = CTFontGetPlatformFont (ct_font, NULL); + status = ATSFontGetFileReference (atsFont, &fsref); + if (status == noErr) + original_url = CFURLCreateFromFSRef (NULL, &fsref); +#else + original_url = (CFURLRef) CTFontCopyAttribute (ct_font, kCTFontURLAttribute); +#endif + + /* Create font copy with cascade list that has LastResort first; this speeds up CoreText + * font fallback which we don't need anyway. */ + { + CTFontDescriptorRef last_resort_font_desc = get_last_resort_font_desc (); + CTFontRef new_ct_font = CTFontCreateCopyWithAttributes (ct_font, 0.0, nullptr, last_resort_font_desc); + CFRelease (last_resort_font_desc); + if (new_ct_font) + { + /* The CTFontCreateCopyWithAttributes call fails to stay on the same font + * when reconfiguring the cascade list and may switch to a different font + * when there are fonts that go by the same name, since the descriptor is + * just name and size. + * + * Avoid reconfiguring the cascade lists if the new font is outside the + * system locations that we cannot access from the sandboxed renderer + * process in Blink. This can be detected by the new file URL location + * that the newly found font points to. */ + CFURLRef new_url = nullptr; +#if !(defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE) && MAC_OS_X_VERSION_MIN_REQUIRED < 1060 + atsFont = CTFontGetPlatformFont (new_ct_font, NULL); + status = ATSFontGetFileReference (atsFont, &fsref); + if (status == noErr) + new_url = CFURLCreateFromFSRef (NULL, &fsref); +#else + new_url = (CFURLRef) CTFontCopyAttribute (new_ct_font, kCTFontURLAttribute); +#endif + // Keep reconfigured font if URL cannot be retrieved (seems to be the case + // on Mac OS 10.12 Sierra), speculative fix for crbug.com/625606 + if (!original_url || !new_url || CFEqual (original_url, new_url)) { + CFRelease (ct_font); + ct_font = new_ct_font; + } else { + CFRelease (new_ct_font); + DEBUG_MSG (CORETEXT, ct_font, "Discarding reconfigured CTFont, location changed."); + } + if (new_url) + CFRelease (new_url); + } + else + DEBUG_MSG (CORETEXT, ct_font, "Font copy with empty cascade list failed"); + } + + if (original_url) + CFRelease (original_url); + return ct_font; +} + +hb_coretext_face_data_t * +_hb_coretext_shaper_face_data_create (hb_face_t *face) +{ + CGFontRef cg_font = create_cg_font (face); + + if (unlikely (!cg_font)) + { + DEBUG_MSG (CORETEXT, face, "CGFont creation failed.."); + return nullptr; + } + + return (hb_coretext_face_data_t *) cg_font; +} + +void +_hb_coretext_shaper_face_data_destroy (hb_coretext_face_data_t *data) +{ + CFRelease ((CGFontRef) data); +} + +/** + * hb_coretext_face_create: + * @cg_font: The CGFontRef to work upon + * + * Creates an #hb_face_t face object from the specified + * CGFontRef. + * + * Return value: the new #hb_face_t face object + * + * Since: 0.9.10 + */ +hb_face_t * +hb_coretext_face_create (CGFontRef cg_font) +{ + return hb_face_create_for_tables (_hb_cg_reference_table, CGFontRetain (cg_font), _hb_cg_font_release); +} + +/** + * hb_coretext_face_get_cg_font: + * @face: The #hb_face_t to work upon + * + * Fetches the CGFontRef associated with an #hb_face_t + * face object + * + * Return value: the CGFontRef found + * + * Since: 0.9.10 + */ +CGFontRef +hb_coretext_face_get_cg_font (hb_face_t *face) +{ + return (CGFontRef) (const void *) face->data.coretext; +} + + +hb_coretext_font_data_t * +_hb_coretext_shaper_font_data_create (hb_font_t *font) +{ + hb_face_t *face = font->face; + const hb_coretext_face_data_t *face_data = face->data.coretext; + if (unlikely (!face_data)) return nullptr; + CGFontRef cg_font = (CGFontRef) (const void *) face->data.coretext; + + CGFloat font_size = (CGFloat) (font->ptem <= 0.f ? HB_CORETEXT_DEFAULT_FONT_SIZE : font->ptem); + CTFontRef ct_font = create_ct_font (cg_font, font_size); + + if (unlikely (!ct_font)) + { + DEBUG_MSG (CORETEXT, font, "CGFont creation failed.."); + return nullptr; + } + + return (hb_coretext_font_data_t *) ct_font; +} + +void +_hb_coretext_shaper_font_data_destroy (hb_coretext_font_data_t *data) +{ + CFRelease ((CTFontRef) data); +} + +static const hb_coretext_font_data_t * +hb_coretext_font_data_sync (hb_font_t *font) +{ +retry: + const hb_coretext_font_data_t *data = font->data.coretext; + if (unlikely (!data)) return nullptr; + + if (fabs (CTFontGetSize ((CTFontRef) data) - (CGFloat) font->ptem) > .5) + { + /* XXX-MT-bug + * Note that evaluating condition above can be dangerous if another thread + * got here first and destructed data. That's, as always, bad use pattern. + * If you modify the font (change font size), other threads must not be + * using it at the same time. However, since this check is delayed to + * when one actually tries to shape something, this is a XXX race condition + * (and the only one we have that I know of) right now. Ie. you modify the + * font size in one thread, then (supposedly safely) try to use it from two + * or more threads and BOOM! I'm not sure how to fix this. We want RCU. + */ + + /* Drop and recreate. */ + /* If someone dropped it in the mean time, throw it away and don't touch it. + * Otherwise, destruct it. */ + if (likely (font->data.coretext.cmpexch (const_cast<hb_coretext_font_data_t *> (data), nullptr))) + _hb_coretext_shaper_font_data_destroy (const_cast<hb_coretext_font_data_t *> (data)); + else + goto retry; + } + return font->data.coretext; +} + +/** + * hb_coretext_font_create: + * @ct_font: The CTFontRef to work upon + * + * Creates an #hb_font_t font object from the specified + * CTFontRef. + * + * Return value: the new #hb_font_t font object + * + * Since: 1.7.2 + **/ +hb_font_t * +hb_coretext_font_create (CTFontRef ct_font) +{ + CGFontRef cg_font = CTFontCopyGraphicsFont (ct_font, nullptr); + hb_face_t *face = hb_coretext_face_create (cg_font); + CFRelease (cg_font); + hb_font_t *font = hb_font_create (face); + hb_face_destroy (face); + + if (unlikely (hb_object_is_immutable (font))) + return font; + + hb_font_set_ptem (font, CTFontGetSize (ct_font)); + + /* Let there be dragons here... */ + font->data.coretext.cmpexch (nullptr, (hb_coretext_font_data_t *) CFRetain (ct_font)); + + return font; +} + +/** + * hb_coretext_face_get_ct_font: + * @font: #hb_font_t to work upon + * + * Fetches the CTFontRef associated with the specified + * #hb_font_t font object. + * + * Return value: the CTFontRef found + * + * Since: 0.9.10 + */ +CTFontRef +hb_coretext_font_get_ct_font (hb_font_t *font) +{ + const hb_coretext_font_data_t *data = hb_coretext_font_data_sync (font); + return data ? (CTFontRef) data : nullptr; +} + + +/* + * shaper + */ + +struct feature_record_t { + unsigned int feature; + unsigned int setting; +}; + +struct active_feature_t { + feature_record_t rec; + unsigned int order; + + HB_INTERNAL static int cmp (const void *pa, const void *pb) { + const active_feature_t *a = (const active_feature_t *) pa; + const active_feature_t *b = (const active_feature_t *) pb; + return a->rec.feature < b->rec.feature ? -1 : a->rec.feature > b->rec.feature ? 1 : + a->order < b->order ? -1 : a->order > b->order ? 1 : + a->rec.setting < b->rec.setting ? -1 : a->rec.setting > b->rec.setting ? 1 : + 0; + } + bool operator== (const active_feature_t *f) { + return cmp (this, f) == 0; + } +}; + +struct feature_event_t { + unsigned int index; + bool start; + active_feature_t feature; + + HB_INTERNAL static int cmp (const void *pa, const void *pb) { + const feature_event_t *a = (const feature_event_t *) pa; + const feature_event_t *b = (const feature_event_t *) pb; + return a->index < b->index ? -1 : a->index > b->index ? 1 : + a->start < b->start ? -1 : a->start > b->start ? 1 : + active_feature_t::cmp (&a->feature, &b->feature); + } +}; + +struct range_record_t { + CTFontRef font; + unsigned int index_first; /* == start */ + unsigned int index_last; /* == end - 1 */ +}; + + +hb_bool_t +_hb_coretext_shape (hb_shape_plan_t *shape_plan, + hb_font_t *font, + hb_buffer_t *buffer, + const hb_feature_t *features, + unsigned int num_features) +{ + hb_face_t *face = font->face; + CGFontRef cg_font = (CGFontRef) (const void *) face->data.coretext; + CTFontRef ct_font = (CTFontRef) hb_coretext_font_data_sync (font); + + CGFloat ct_font_size = CTFontGetSize (ct_font); + CGFloat x_mult = (CGFloat) font->x_scale / ct_font_size; + CGFloat y_mult = (CGFloat) font->y_scale / ct_font_size; + + /* Attach marks to their bases, to match the 'ot' shaper. + * Adapted from a very old version of hb-ot-shape:hb_form_clusters(). + * Note that this only makes us be closer to the 'ot' shaper, + * but by no means the same. For example, if there's + * B1 M1 B2 M2, and B1-B2 form a ligature, M2's cluster will + * continue pointing to B2 even though B2 was merged into B1's + * cluster... */ + if (buffer->cluster_level == HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES) + { + hb_unicode_funcs_t *unicode = buffer->unicode; + unsigned int count = buffer->len; + hb_glyph_info_t *info = buffer->info; + for (unsigned int i = 1; i < count; i++) + if (HB_UNICODE_GENERAL_CATEGORY_IS_MARK (unicode->general_category (info[i].codepoint))) + buffer->merge_clusters (i - 1, i + 1); + } + + hb_vector_t<feature_record_t> feature_records; + hb_vector_t<range_record_t> range_records; + + /* + * Set up features. + * (copied + modified from code from hb-uniscribe.cc) + */ + if (num_features) + { + /* Sort features by start/end events. */ + hb_vector_t<feature_event_t> feature_events; + for (unsigned int i = 0; i < num_features; i++) + { + active_feature_t feature; + +#if MAC_OS_X_VERSION_MIN_REQUIRED < 101000 + const hb_aat_feature_mapping_t * mapping = hb_aat_layout_find_feature_mapping (features[i].tag); + if (!mapping) + continue; + + feature.rec.feature = mapping->aatFeatureType; + feature.rec.setting = features[i].value ? mapping->selectorToEnable : mapping->selectorToDisable; +#else + feature.rec.feature = features[i].tag; + feature.rec.setting = features[i].value; +#endif + feature.order = i; + + feature_event_t *event; + + event = feature_events.push (); + event->index = features[i].start; + event->start = true; + event->feature = feature; + + event = feature_events.push (); + event->index = features[i].end; + event->start = false; + event->feature = feature; + } + feature_events.qsort (); + /* Add a strategic final event. */ + { + active_feature_t feature; + feature.rec.feature = HB_TAG_NONE; + feature.rec.setting = 0; + feature.order = num_features + 1; + + feature_event_t *event = feature_events.push (); + event->index = 0; /* This value does magic. */ + event->start = false; + event->feature = feature; + } + + /* Scan events and save features for each range. */ + hb_vector_t<active_feature_t> active_features; + unsigned int last_index = 0; + for (unsigned int i = 0; i < feature_events.length; i++) + { + feature_event_t *event = &feature_events[i]; + + if (event->index != last_index) + { + /* Save a snapshot of active features and the range. */ + range_record_t *range = range_records.push (); + + if (active_features.length) + { + CFMutableArrayRef features_array = CFArrayCreateMutable(kCFAllocatorDefault, 0, &kCFTypeArrayCallBacks); + + /* TODO sort and resolve conflicting features? */ + /* active_features.qsort (); */ + for (unsigned int j = 0; j < active_features.length; j++) + { +#if MAC_OS_X_VERSION_MIN_REQUIRED < 101000 + CFStringRef keys[] = { + kCTFontFeatureTypeIdentifierKey, + kCTFontFeatureSelectorIdentifierKey + }; + CFNumberRef values[] = { + CFNumberCreate (kCFAllocatorDefault, kCFNumberIntType, &active_features[j].rec.feature), + CFNumberCreate (kCFAllocatorDefault, kCFNumberIntType, &active_features[j].rec.setting) + }; +#else + char tag[5] = {HB_UNTAG (active_features[j].rec.feature)}; + CFTypeRef keys[] = { + kCTFontOpenTypeFeatureTag, + kCTFontOpenTypeFeatureValue + }; + CFTypeRef values[] = { + CFStringCreateWithCString (kCFAllocatorDefault, tag, kCFStringEncodingASCII), + CFNumberCreate (kCFAllocatorDefault, kCFNumberIntType, &active_features[j].rec.setting) + }; +#endif + static_assert ((ARRAY_LENGTH_CONST (keys) == ARRAY_LENGTH_CONST (values)), ""); + CFDictionaryRef dict = CFDictionaryCreate (kCFAllocatorDefault, + (const void **) keys, + (const void **) values, + ARRAY_LENGTH (keys), + &kCFTypeDictionaryKeyCallBacks, + &kCFTypeDictionaryValueCallBacks); + for (unsigned int i = 0; i < ARRAY_LENGTH (values); i++) + CFRelease (values[i]); + + CFArrayAppendValue (features_array, dict); + CFRelease (dict); + + } + + CFDictionaryRef attributes = CFDictionaryCreate (kCFAllocatorDefault, + (const void **) &kCTFontFeatureSettingsAttribute, + (const void **) &features_array, + 1, + &kCFTypeDictionaryKeyCallBacks, + &kCFTypeDictionaryValueCallBacks); + CFRelease (features_array); + + CTFontDescriptorRef font_desc = CTFontDescriptorCreateWithAttributes (attributes); + CFRelease (attributes); + + range->font = CTFontCreateCopyWithAttributes (ct_font, 0.0, nullptr, font_desc); + CFRelease (font_desc); + } + else + { + range->font = nullptr; + } + + range->index_first = last_index; + range->index_last = event->index - 1; + + last_index = event->index; + } + + if (event->start) + { + active_features.push (event->feature); + } else { + active_feature_t *feature = active_features.find (&event->feature); + if (feature) + active_features.remove (feature - active_features.arrayZ); + } + } + } + + unsigned int scratch_size; + hb_buffer_t::scratch_buffer_t *scratch = buffer->get_scratch_buffer (&scratch_size); + +#define ALLOCATE_ARRAY(Type, name, len, on_no_room) \ + Type *name = (Type *) scratch; \ + do { \ + unsigned int _consumed = DIV_CEIL ((len) * sizeof (Type), sizeof (*scratch)); \ + if (unlikely (_consumed > scratch_size)) \ + { \ + on_no_room; \ + assert (0); \ + } \ + scratch += _consumed; \ + scratch_size -= _consumed; \ + } while (0) + + ALLOCATE_ARRAY (UniChar, pchars, buffer->len * 2, ((void)nullptr) /*nothing*/); + unsigned int chars_len = 0; + for (unsigned int i = 0; i < buffer->len; i++) { + hb_codepoint_t c = buffer->info[i].codepoint; + if (likely (c <= 0xFFFFu)) + pchars[chars_len++] = c; + else if (unlikely (c > 0x10FFFFu)) + pchars[chars_len++] = 0xFFFDu; + else { + pchars[chars_len++] = 0xD800u + ((c - 0x10000u) >> 10); + pchars[chars_len++] = 0xDC00u + ((c - 0x10000u) & ((1u << 10) - 1)); + } + } + + ALLOCATE_ARRAY (unsigned int, log_clusters, chars_len, ((void)nullptr) /*nothing*/); + chars_len = 0; + for (unsigned int i = 0; i < buffer->len; i++) + { + hb_codepoint_t c = buffer->info[i].codepoint; + unsigned int cluster = buffer->info[i].cluster; + log_clusters[chars_len++] = cluster; + if (hb_in_range (c, 0x10000u, 0x10FFFFu)) + log_clusters[chars_len++] = cluster; /* Surrogates. */ + } + +#define FAIL(...) \ + HB_STMT_START { \ + DEBUG_MSG (CORETEXT, nullptr, __VA_ARGS__); \ + ret = false; \ + goto fail; \ + } HB_STMT_END + + bool ret = true; + CFStringRef string_ref = nullptr; + CTLineRef line = nullptr; + + if (false) + { +resize_and_retry: + DEBUG_MSG (CORETEXT, buffer, "Buffer resize"); + /* string_ref uses the scratch-buffer for backing store, and line references + * string_ref (via attr_string). We must release those before resizing buffer. */ + assert (string_ref); + assert (line); + CFRelease (string_ref); + CFRelease (line); + string_ref = nullptr; + line = nullptr; + + /* Get previous start-of-scratch-area, that we use later for readjusting + * our existing scratch arrays. */ + unsigned int old_scratch_used; + hb_buffer_t::scratch_buffer_t *old_scratch; + old_scratch = buffer->get_scratch_buffer (&old_scratch_used); + old_scratch_used = scratch - old_scratch; + + if (unlikely (!buffer->ensure (buffer->allocated * 2))) + FAIL ("Buffer resize failed"); + + /* Adjust scratch, pchars, and log_cluster arrays. This is ugly, but really the + * cleanest way to do without completely restructuring the rest of this shaper. */ + scratch = buffer->get_scratch_buffer (&scratch_size); + pchars = reinterpret_cast<UniChar *> (((char *) scratch + ((char *) pchars - (char *) old_scratch))); + log_clusters = reinterpret_cast<unsigned int *> (((char *) scratch + ((char *) log_clusters - (char *) old_scratch))); + scratch += old_scratch_used; + scratch_size -= old_scratch_used; + } + { + string_ref = CFStringCreateWithCharactersNoCopy (nullptr, + pchars, chars_len, + kCFAllocatorNull); + if (unlikely (!string_ref)) + FAIL ("CFStringCreateWithCharactersNoCopy failed"); + + /* Create an attributed string, populate it, and create a line from it, then release attributed string. */ + { + CFMutableAttributedStringRef attr_string = CFAttributedStringCreateMutable (kCFAllocatorDefault, + chars_len); + if (unlikely (!attr_string)) + FAIL ("CFAttributedStringCreateMutable failed"); + CFAttributedStringReplaceString (attr_string, CFRangeMake (0, 0), string_ref); + if (HB_DIRECTION_IS_VERTICAL (buffer->props.direction)) + { + CFAttributedStringSetAttribute (attr_string, CFRangeMake (0, chars_len), + kCTVerticalFormsAttributeName, kCFBooleanTrue); + } + + if (buffer->props.language) + { +/* What's the iOS equivalent of this check? + * The symbols was introduced in iOS 7.0. + * At any rate, our fallback is safe and works fine. */ +#if !(defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE) && MAC_OS_X_VERSION_MIN_REQUIRED < 1090 +# define kCTLanguageAttributeName CFSTR ("NSLanguage") +#endif + CFStringRef lang = CFStringCreateWithCStringNoCopy (kCFAllocatorDefault, + hb_language_to_string (buffer->props.language), + kCFStringEncodingUTF8, + kCFAllocatorNull); + if (unlikely (!lang)) + { + CFRelease (attr_string); + FAIL ("CFStringCreateWithCStringNoCopy failed"); + } + CFAttributedStringSetAttribute (attr_string, CFRangeMake (0, chars_len), + kCTLanguageAttributeName, lang); + CFRelease (lang); + } + CFAttributedStringSetAttribute (attr_string, CFRangeMake (0, chars_len), + kCTFontAttributeName, ct_font); + + if (num_features && range_records.length) + { + unsigned int start = 0; + range_record_t *last_range = &range_records[0]; + for (unsigned int k = 0; k < chars_len; k++) + { + range_record_t *range = last_range; + while (log_clusters[k] < range->index_first) + range--; + while (log_clusters[k] > range->index_last) + range++; + if (range != last_range) + { + if (last_range->font) + CFAttributedStringSetAttribute (attr_string, CFRangeMake (start, k - start), + kCTFontAttributeName, last_range->font); + + start = k; + } + + last_range = range; + } + if (start != chars_len && last_range->font) + CFAttributedStringSetAttribute (attr_string, CFRangeMake (start, chars_len - start), + kCTFontAttributeName, last_range->font); + } + /* Enable/disable kern if requested. + * + * Note: once kern is disabled, reenabling it doesn't currently seem to work in CoreText. + */ + if (num_features) + { + unsigned int zeroint = 0; + CFNumberRef zero = CFNumberCreate (kCFAllocatorDefault, kCFNumberIntType, &zeroint); + for (unsigned int i = 0; i < num_features; i++) + { + const hb_feature_t &feature = features[i]; + if (feature.tag == HB_TAG('k','e','r','n') && + feature.start < chars_len && feature.start < feature.end) + { + CFRange feature_range = CFRangeMake (feature.start, + hb_min (feature.end, chars_len) - feature.start); + if (feature.value) + CFAttributedStringRemoveAttribute (attr_string, feature_range, kCTKernAttributeName); + else + CFAttributedStringSetAttribute (attr_string, feature_range, kCTKernAttributeName, zero); + } + } + CFRelease (zero); + } + + int level = HB_DIRECTION_IS_FORWARD (buffer->props.direction) ? 0 : 1; + CFNumberRef level_number = CFNumberCreate (kCFAllocatorDefault, kCFNumberIntType, &level); +#if !(defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE) && MAC_OS_X_VERSION_MIN_REQUIRED < 1060 + extern const CFStringRef kCTTypesetterOptionForcedEmbeddingLevel; +#endif + CFDictionaryRef options = CFDictionaryCreate (kCFAllocatorDefault, + (const void **) &kCTTypesetterOptionForcedEmbeddingLevel, + (const void **) &level_number, + 1, + &kCFTypeDictionaryKeyCallBacks, + &kCFTypeDictionaryValueCallBacks); + CFRelease (level_number); + if (unlikely (!options)) + { + CFRelease (attr_string); + FAIL ("CFDictionaryCreate failed"); + } + + CTTypesetterRef typesetter = CTTypesetterCreateWithAttributedStringAndOptions (attr_string, options); + CFRelease (options); + CFRelease (attr_string); + if (unlikely (!typesetter)) + FAIL ("CTTypesetterCreateWithAttributedStringAndOptions failed"); + + line = CTTypesetterCreateLine (typesetter, CFRangeMake(0, 0)); + CFRelease (typesetter); + if (unlikely (!line)) + FAIL ("CTTypesetterCreateLine failed"); + } + + CFArrayRef glyph_runs = CTLineGetGlyphRuns (line); + unsigned int num_runs = CFArrayGetCount (glyph_runs); + DEBUG_MSG (CORETEXT, nullptr, "Num runs: %d", num_runs); + + buffer->len = 0; + uint32_t status_and = ~0, status_or = 0; + double advances_so_far = 0; + /* For right-to-left runs, CoreText returns the glyphs positioned such that + * any trailing whitespace is to the left of (0,0). Adjust coordinate system + * to fix for that. Test with any RTL string with trailing spaces. + * https://crbug.com/469028 + */ + if (HB_DIRECTION_IS_BACKWARD (buffer->props.direction)) + { + advances_so_far -= CTLineGetTrailingWhitespaceWidth (line); + if (HB_DIRECTION_IS_VERTICAL (buffer->props.direction)) + advances_so_far = -advances_so_far; + } + + const CFRange range_all = CFRangeMake (0, 0); + + for (unsigned int i = 0; i < num_runs; i++) + { + CTRunRef run = static_cast<CTRunRef>(CFArrayGetValueAtIndex (glyph_runs, i)); + CTRunStatus run_status = CTRunGetStatus (run); + status_or |= run_status; + status_and &= run_status; + DEBUG_MSG (CORETEXT, run, "CTRunStatus: %x", run_status); + double run_advance = CTRunGetTypographicBounds (run, range_all, nullptr, nullptr, nullptr); + if (HB_DIRECTION_IS_VERTICAL (buffer->props.direction)) + run_advance = -run_advance; + DEBUG_MSG (CORETEXT, run, "Run advance: %g", run_advance); + + /* CoreText does automatic font fallback (AKA "cascading") for characters + * not supported by the requested font, and provides no way to turn it off, + * so we must detect if the returned run uses a font other than the requested + * one and fill in the buffer with .notdef glyphs instead of random glyph + * indices from a different font. + */ + CFDictionaryRef attributes = CTRunGetAttributes (run); + CTFontRef run_ct_font = static_cast<CTFontRef>(CFDictionaryGetValue (attributes, kCTFontAttributeName)); + if (!CFEqual (run_ct_font, ct_font)) + { + /* The run doesn't use our main font instance. We have to figure out + * whether font fallback happened, or this is just CoreText giving us + * another CTFont using the same underlying CGFont. CoreText seems + * to do that in a variety of situations, one of which being vertical + * text, but also perhaps for caching reasons. + * + * First, see if it uses any of our subfonts created to set font features... + * + * Next, compare the CGFont to the one we used to create our fonts. + * Even this doesn't work all the time. + * + * Finally, we compare PS names, which I don't think are unique... + * + * Looks like if we really want to be sure here we have to modify the + * font to change the name table, similar to what we do in the uniscribe + * backend. + * + * However, even that wouldn't work if we were passed in the CGFont to + * construct a hb_face to begin with. + * + * See: https://github.com/harfbuzz/harfbuzz/pull/36 + * + * Also see: https://bugs.chromium.org/p/chromium/issues/detail?id=597098 + */ + bool matched = false; + for (unsigned int i = 0; i < range_records.length; i++) + if (range_records[i].font && CFEqual (run_ct_font, range_records[i].font)) + { + matched = true; + break; + } + if (!matched) + { + CGFontRef run_cg_font = CTFontCopyGraphicsFont (run_ct_font, nullptr); + if (run_cg_font) + { + matched = CFEqual (run_cg_font, cg_font); + CFRelease (run_cg_font); + } + } + if (!matched) + { + CFStringRef font_ps_name = CTFontCopyName (ct_font, kCTFontPostScriptNameKey); + CFStringRef run_ps_name = CTFontCopyName (run_ct_font, kCTFontPostScriptNameKey); + CFComparisonResult result = CFStringCompare (run_ps_name, font_ps_name, 0); + CFRelease (run_ps_name); + CFRelease (font_ps_name); + if (result == kCFCompareEqualTo) + matched = true; + } + if (!matched) + { + CFRange range = CTRunGetStringRange (run); + DEBUG_MSG (CORETEXT, run, "Run used fallback font: %ld..%ld", + range.location, range.location + range.length); + if (!buffer->ensure_inplace (buffer->len + range.length)) + goto resize_and_retry; + hb_glyph_info_t *info = buffer->info + buffer->len; + + hb_codepoint_t notdef = 0; + hb_direction_t dir = buffer->props.direction; + hb_position_t x_advance, y_advance, x_offset, y_offset; + hb_font_get_glyph_advance_for_direction (font, notdef, dir, &x_advance, &y_advance); + hb_font_get_glyph_origin_for_direction (font, notdef, dir, &x_offset, &y_offset); + hb_position_t advance = x_advance + y_advance; + x_offset = -x_offset; + y_offset = -y_offset; + + unsigned int old_len = buffer->len; + for (CFIndex j = range.location; j < range.location + range.length; j++) + { + UniChar ch = CFStringGetCharacterAtIndex (string_ref, j); + if (hb_in_range<UniChar> (ch, 0xDC00u, 0xDFFFu) && range.location < j) + { + ch = CFStringGetCharacterAtIndex (string_ref, j - 1); + if (hb_in_range<UniChar> (ch, 0xD800u, 0xDBFFu)) + /* This is the second of a surrogate pair. Don't need .notdef + * for this one. */ + continue; + } + if (buffer->unicode->is_default_ignorable (ch)) + continue; + + info->codepoint = notdef; + info->cluster = log_clusters[j]; + + info->mask = advance; + info->var1.i32 = x_offset; + info->var2.i32 = y_offset; + + info++; + buffer->len++; + } + if (HB_DIRECTION_IS_BACKWARD (buffer->props.direction)) + buffer->reverse_range (old_len, buffer->len); + advances_so_far += run_advance; + continue; + } + } + + unsigned int num_glyphs = CTRunGetGlyphCount (run); + if (num_glyphs == 0) + continue; + + if (!buffer->ensure_inplace (buffer->len + num_glyphs)) + goto resize_and_retry; + + hb_glyph_info_t *run_info = buffer->info + buffer->len; + + /* Testing used to indicate that CTRunGetGlyphsPtr, etc (almost?) always + * succeed, and so copying data to our own buffer will be rare. Reports + * have it that this changed in OS X 10.10 Yosemite, and nullptr is returned + * frequently. At any rate, we can test that codepath by setting USE_PTR + * to false. */ + +#define USE_PTR true + +#define SCRATCH_SAVE() \ + unsigned int scratch_size_saved = scratch_size; \ + hb_buffer_t::scratch_buffer_t *scratch_saved = scratch + +#define SCRATCH_RESTORE() \ + scratch_size = scratch_size_saved; \ + scratch = scratch_saved + + { /* Setup glyphs */ + SCRATCH_SAVE(); + const CGGlyph* glyphs = USE_PTR ? CTRunGetGlyphsPtr (run) : nullptr; + if (!glyphs) { + ALLOCATE_ARRAY (CGGlyph, glyph_buf, num_glyphs, goto resize_and_retry); + CTRunGetGlyphs (run, range_all, glyph_buf); + glyphs = glyph_buf; + } + const CFIndex* string_indices = USE_PTR ? CTRunGetStringIndicesPtr (run) : nullptr; + if (!string_indices) { + ALLOCATE_ARRAY (CFIndex, index_buf, num_glyphs, goto resize_and_retry); + CTRunGetStringIndices (run, range_all, index_buf); + string_indices = index_buf; + } + hb_glyph_info_t *info = run_info; + for (unsigned int j = 0; j < num_glyphs; j++) + { + info->codepoint = glyphs[j]; + info->cluster = log_clusters[string_indices[j]]; + info++; + } + SCRATCH_RESTORE(); + } + { + /* Setup positions. + * Note that CoreText does not return advances for glyphs. As such, + * for all but last glyph, we use the delta position to next glyph as + * advance (in the advance direction only), and for last glyph we set + * whatever is needed to make the whole run's advance add up. */ + SCRATCH_SAVE(); + const CGPoint* positions = USE_PTR ? CTRunGetPositionsPtr (run) : nullptr; + if (!positions) { + ALLOCATE_ARRAY (CGPoint, position_buf, num_glyphs, goto resize_and_retry); + CTRunGetPositions (run, range_all, position_buf); + positions = position_buf; + } + hb_glyph_info_t *info = run_info; + if (HB_DIRECTION_IS_HORIZONTAL (buffer->props.direction)) + { + hb_position_t x_offset = (positions[0].x - advances_so_far) * x_mult; + for (unsigned int j = 0; j < num_glyphs; j++) + { + double advance; + if (likely (j + 1 < num_glyphs)) + advance = positions[j + 1].x - positions[j].x; + else /* last glyph */ + advance = run_advance - (positions[j].x - positions[0].x); + info->mask = advance * x_mult; + info->var1.i32 = x_offset; + info->var2.i32 = positions[j].y * y_mult; + info++; + } + } + else + { + hb_position_t y_offset = (positions[0].y - advances_so_far) * y_mult; + for (unsigned int j = 0; j < num_glyphs; j++) + { + double advance; + if (likely (j + 1 < num_glyphs)) + advance = positions[j + 1].y - positions[j].y; + else /* last glyph */ + advance = run_advance - (positions[j].y - positions[0].y); + info->mask = advance * y_mult; + info->var1.i32 = positions[j].x * x_mult; + info->var2.i32 = y_offset; + info++; + } + } + SCRATCH_RESTORE(); + advances_so_far += run_advance; + } +#undef SCRATCH_RESTORE +#undef SCRATCH_SAVE +#undef USE_PTR +#undef ALLOCATE_ARRAY + + buffer->len += num_glyphs; + } + + /* Mac OS 10.6 doesn't have kCTTypesetterOptionForcedEmbeddingLevel, + * or if it does, it doesn't respect it. So we get runs with wrong + * directions. As such, disable the assert... It wouldn't crash, but + * cursoring will be off... + * + * https://crbug.com/419769 + */ + if (false) + { + /* Make sure all runs had the expected direction. */ + HB_UNUSED bool backward = HB_DIRECTION_IS_BACKWARD (buffer->props.direction); + assert (bool (status_and & kCTRunStatusRightToLeft) == backward); + assert (bool (status_or & kCTRunStatusRightToLeft) == backward); + } + + buffer->clear_positions (); + + unsigned int count = buffer->len; + hb_glyph_info_t *info = buffer->info; + hb_glyph_position_t *pos = buffer->pos; + if (HB_DIRECTION_IS_HORIZONTAL (buffer->props.direction)) + for (unsigned int i = 0; i < count; i++) + { + pos->x_advance = info->mask; + pos->x_offset = info->var1.i32; + pos->y_offset = info->var2.i32; + + info++, pos++; + } + else + for (unsigned int i = 0; i < count; i++) + { + pos->y_advance = info->mask; + pos->x_offset = info->var1.i32; + pos->y_offset = info->var2.i32; + + info++, pos++; + } + + /* Fix up clusters so that we never return out-of-order indices; + * if core text has reordered glyphs, we'll merge them to the + * beginning of the reordered cluster. CoreText is nice enough + * to tell us whenever it has produced nonmonotonic results... + * Note that we assume the input clusters were nonmonotonic to + * begin with. + * + * This does *not* mean we'll form the same clusters as Uniscribe + * or the native OT backend, only that the cluster indices will be + * monotonic in the output buffer. */ + if (count > 1 && (status_or & kCTRunStatusNonMonotonic)) + { + hb_glyph_info_t *info = buffer->info; + if (HB_DIRECTION_IS_FORWARD (buffer->props.direction)) + { + unsigned int cluster = info[count - 1].cluster; + for (unsigned int i = count - 1; i > 0; i--) + { + cluster = hb_min (cluster, info[i - 1].cluster); + info[i - 1].cluster = cluster; + } + } + else + { + unsigned int cluster = info[0].cluster; + for (unsigned int i = 1; i < count; i++) + { + cluster = hb_min (cluster, info[i].cluster); + info[i].cluster = cluster; + } + } + } + } + + buffer->unsafe_to_break_all (); + +#undef FAIL + +fail: + if (string_ref) + CFRelease (string_ref); + if (line) + CFRelease (line); + + for (unsigned int i = 0; i < range_records.length; i++) + if (range_records[i].font) + CFRelease (range_records[i].font); + + return ret; +} + + +#endif diff --git a/thirdparty/harfbuzz/src/hb-coretext.h b/thirdparty/harfbuzz/src/hb-coretext.h new file mode 100644 index 0000000000..e53dbaf2c7 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-coretext.h @@ -0,0 +1,96 @@ +/* + * Copyright © 2012 Mozilla Foundation. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Mozilla Author(s): Jonathan Kew + */ + +#ifndef HB_CORETEXT_H +#define HB_CORETEXT_H + +#include "hb.h" + +#include <TargetConditionals.h> +#if TARGET_OS_IPHONE +# include <CoreText/CoreText.h> +# include <CoreGraphics/CoreGraphics.h> +#else +# include <ApplicationServices/ApplicationServices.h> +#endif + +HB_BEGIN_DECLS + + +/** + * HB_CORETEXT_TAG_MORT: + * + * The #hb_tag_t tag for the `mort` (glyph metamorphosis) table, + * which holds AAT features. + * + * For more information, see + * https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6mort.html + * + **/ +#define HB_CORETEXT_TAG_MORT HB_TAG('m','o','r','t') + +/** + * HB_CORETEXT_TAG_MORX: + * + * The #hb_tag_t tag for the `morx` (extended glyph metamorphosis) + * table, which holds AAT features. + * + * For more information, see + * https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6morx.html + * + **/ +#define HB_CORETEXT_TAG_MORX HB_TAG('m','o','r','x') + +/** + * HB_CORETEXT_TAG_KERX: + * + * The #hb_tag_t tag for the `kerx` (extended kerning) table, which + * holds AAT kerning information. + * + * For more information, see + * https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6kerx.html + * + **/ +#define HB_CORETEXT_TAG_KERX HB_TAG('k','e','r','x') + + +HB_EXTERN hb_face_t * +hb_coretext_face_create (CGFontRef cg_font); + +HB_EXTERN hb_font_t * +hb_coretext_font_create (CTFontRef ct_font); + + +HB_EXTERN CGFontRef +hb_coretext_face_get_cg_font (hb_face_t *face); + +HB_EXTERN CTFontRef +hb_coretext_font_get_ct_font (hb_font_t *font); + + +HB_END_DECLS + +#endif /* HB_CORETEXT_H */ diff --git a/thirdparty/harfbuzz/src/hb-debug.hh b/thirdparty/harfbuzz/src/hb-debug.hh new file mode 100644 index 0000000000..ec3a1ff211 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-debug.hh @@ -0,0 +1,459 @@ +/* + * Copyright © 2017 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_DEBUG_HH +#define HB_DEBUG_HH + +#include "hb.hh" +#include "hb-atomic.hh" +#include "hb-algs.hh" + + +#ifndef HB_DEBUG +#define HB_DEBUG 0 +#endif + + +/* + * Global runtime options. + */ + +struct hb_options_t +{ + bool unused : 1; /* In-case sign bit is here. */ + bool initialized : 1; + bool uniscribe_bug_compatible : 1; +}; + +union hb_options_union_t { + int i; + hb_options_t opts; +}; +static_assert ((sizeof (hb_atomic_int_t) >= sizeof (hb_options_union_t)), ""); + +HB_INTERNAL void +_hb_options_init (); + +extern HB_INTERNAL hb_atomic_int_t _hb_options; + +static inline hb_options_t +hb_options () +{ +#ifdef HB_NO_GETENV + return hb_options_t (); +#endif + /* Make a local copy, so we can access bitfield threadsafely. */ + hb_options_union_t u; + u.i = _hb_options.get_relaxed (); + + if (unlikely (!u.i)) + { + _hb_options_init (); + u.i = _hb_options.get_relaxed (); + } + + return u.opts; +} + + +/* + * Debug output (needs enabling at compile time.) + */ + +static inline bool +_hb_debug (unsigned int level, + unsigned int max_level) +{ + return level < max_level; +} + +#define DEBUG_LEVEL_ENABLED(WHAT, LEVEL) (_hb_debug ((LEVEL), HB_DEBUG_##WHAT)) +#define DEBUG_ENABLED(WHAT) (DEBUG_LEVEL_ENABLED (WHAT, 0)) + +static inline void +_hb_print_func (const char *func) +{ + if (func) + { + unsigned int func_len = strlen (func); + /* Skip "static" */ + if (0 == strncmp (func, "static ", 7)) + func += 7; + /* Skip "typename" */ + if (0 == strncmp (func, "typename ", 9)) + func += 9; + /* Skip return type */ + const char *space = strchr (func, ' '); + if (space) + func = space + 1; + /* Skip parameter list */ + const char *paren = strchr (func, '('); + if (paren) + func_len = paren - func; + fprintf (stderr, "%.*s", func_len, func); + } +} + +template <int max_level> static inline void +_hb_debug_msg_va (const char *what, + const void *obj, + const char *func, + bool indented, + unsigned int level, + int level_dir, + const char *message, + va_list ap) HB_PRINTF_FUNC(7, 0); +template <int max_level> static inline void +_hb_debug_msg_va (const char *what, + const void *obj, + const char *func, + bool indented, + unsigned int level, + int level_dir, + const char *message, + va_list ap) +{ + if (!_hb_debug (level, max_level)) + return; + + fprintf (stderr, "%-10s", what ? what : ""); + + if (obj) + fprintf (stderr, "(%*p) ", (unsigned int) (2 * sizeof (void *)), obj); + else + fprintf (stderr, " %*s ", (unsigned int) (2 * sizeof (void *)), ""); + + if (indented) { +#define VBAR "\342\224\202" /* U+2502 BOX DRAWINGS LIGHT VERTICAL */ +#define VRBAR "\342\224\234" /* U+251C BOX DRAWINGS LIGHT VERTICAL AND RIGHT */ +#define DLBAR "\342\225\256" /* U+256E BOX DRAWINGS LIGHT ARC DOWN AND LEFT */ +#define ULBAR "\342\225\257" /* U+256F BOX DRAWINGS LIGHT ARC UP AND LEFT */ +#define LBAR "\342\225\264" /* U+2574 BOX DRAWINGS LIGHT LEFT */ + static const char bars[] = + VBAR VBAR VBAR VBAR VBAR VBAR VBAR VBAR VBAR VBAR + VBAR VBAR VBAR VBAR VBAR VBAR VBAR VBAR VBAR VBAR + VBAR VBAR VBAR VBAR VBAR VBAR VBAR VBAR VBAR VBAR + VBAR VBAR VBAR VBAR VBAR VBAR VBAR VBAR VBAR VBAR + VBAR VBAR VBAR VBAR VBAR VBAR VBAR VBAR VBAR VBAR; + fprintf (stderr, "%2u %s" VRBAR "%s", + level, + bars + sizeof (bars) - 1 - hb_min ((unsigned int) sizeof (bars) - 1, (unsigned int) (sizeof (VBAR) - 1) * level), + level_dir ? (level_dir > 0 ? DLBAR : ULBAR) : LBAR); + } else + fprintf (stderr, " " VRBAR LBAR); + + _hb_print_func (func); + + if (message) + { + fprintf (stderr, ": "); + vfprintf (stderr, message, ap); + } + + fprintf (stderr, "\n"); +} +template <> inline void HB_PRINTF_FUNC(7, 0) +_hb_debug_msg_va<0> (const char *what HB_UNUSED, + const void *obj HB_UNUSED, + const char *func HB_UNUSED, + bool indented HB_UNUSED, + unsigned int level HB_UNUSED, + int level_dir HB_UNUSED, + const char *message HB_UNUSED, + va_list ap HB_UNUSED) {} + +template <int max_level> static inline void +_hb_debug_msg (const char *what, + const void *obj, + const char *func, + bool indented, + unsigned int level, + int level_dir, + const char *message, + ...) HB_PRINTF_FUNC(7, 8); +template <int max_level> static inline void HB_PRINTF_FUNC(7, 8) +_hb_debug_msg (const char *what, + const void *obj, + const char *func, + bool indented, + unsigned int level, + int level_dir, + const char *message, + ...) +{ + va_list ap; + va_start (ap, message); + _hb_debug_msg_va<max_level> (what, obj, func, indented, level, level_dir, message, ap); + va_end (ap); +} +template <> inline void +_hb_debug_msg<0> (const char *what HB_UNUSED, + const void *obj HB_UNUSED, + const char *func HB_UNUSED, + bool indented HB_UNUSED, + unsigned int level HB_UNUSED, + int level_dir HB_UNUSED, + const char *message HB_UNUSED, + ...) HB_PRINTF_FUNC(7, 8); +template <> inline void HB_PRINTF_FUNC(7, 8) +_hb_debug_msg<0> (const char *what HB_UNUSED, + const void *obj HB_UNUSED, + const char *func HB_UNUSED, + bool indented HB_UNUSED, + unsigned int level HB_UNUSED, + int level_dir HB_UNUSED, + const char *message HB_UNUSED, + ...) {} + +#define DEBUG_MSG_LEVEL(WHAT, OBJ, LEVEL, LEVEL_DIR, ...) _hb_debug_msg<HB_DEBUG_##WHAT> (#WHAT, (OBJ), nullptr, true, (LEVEL), (LEVEL_DIR), __VA_ARGS__) +#define DEBUG_MSG(WHAT, OBJ, ...) _hb_debug_msg<HB_DEBUG_##WHAT> (#WHAT, (OBJ), nullptr, false, 0, 0, __VA_ARGS__) +#define DEBUG_MSG_FUNC(WHAT, OBJ, ...) _hb_debug_msg<HB_DEBUG_##WHAT> (#WHAT, (OBJ), HB_FUNC, false, 0, 0, __VA_ARGS__) + + +/* + * Printer + */ + +template <typename T> +struct hb_printer_t { + const char *print (const T&) { return "something"; } +}; + +template <> +struct hb_printer_t<bool> { + const char *print (bool v) { return v ? "true" : "false"; } +}; + +template <> +struct hb_printer_t<hb_empty_t> { + const char *print (hb_empty_t) { return ""; } +}; + + +/* + * Trace + */ + +template <typename T> +static inline void _hb_warn_no_return (bool returned) +{ + if (unlikely (!returned)) { + fprintf (stderr, "OUCH, returned with no call to return_trace(). This is a bug, please report.\n"); + } +} +template <> +/*static*/ inline void _hb_warn_no_return<hb_empty_t> (bool returned HB_UNUSED) +{} + +template <int max_level, typename ret_t> +struct hb_auto_trace_t +{ + explicit inline hb_auto_trace_t (unsigned int *plevel_, + const char *what_, + const void *obj_, + const char *func, + const char *message, + ...) HB_PRINTF_FUNC(6, 7) + : plevel (plevel_), what (what_), obj (obj_), returned (false) + { + if (plevel) ++*plevel; + + va_list ap; + va_start (ap, message); + _hb_debug_msg_va<max_level> (what, obj, func, true, plevel ? *plevel : 0, +1, message, ap); + va_end (ap); + } + ~hb_auto_trace_t () + { + _hb_warn_no_return<ret_t> (returned); + if (!returned) { + _hb_debug_msg<max_level> (what, obj, nullptr, true, plevel ? *plevel : 1, -1, " "); + } + if (plevel) --*plevel; + } + + template <typename T> + T ret (T&& v, + const char *func = "", + unsigned int line = 0) + { + if (unlikely (returned)) { + fprintf (stderr, "OUCH, double calls to return_trace(). This is a bug, please report.\n"); + return hb_forward<T> (v); + } + + _hb_debug_msg<max_level> (what, obj, func, true, plevel ? *plevel : 1, -1, + "return %s (line %d)", + hb_printer_t<decltype (v)>().print (v), line); + if (plevel) --*plevel; + plevel = nullptr; + returned = true; + return hb_forward<T> (v); + } + + private: + unsigned int *plevel; + const char *what; + const void *obj; + bool returned; +}; +template <typename ret_t> /* Make sure we don't use hb_auto_trace_t when not tracing. */ +struct hb_auto_trace_t<0, ret_t> +{ + explicit inline hb_auto_trace_t (unsigned int *plevel_, + const char *what_, + const void *obj_, + const char *func, + const char *message, + ...) HB_PRINTF_FUNC(6, 7) {} + + template <typename T> + T ret (T&& v, + const char *func HB_UNUSED = nullptr, + unsigned int line HB_UNUSED = 0) { return hb_forward<T> (v); } +}; + +/* For disabled tracing; optimize out everything. + * https://github.com/harfbuzz/harfbuzz/pull/605 */ +template <typename ret_t> +struct hb_no_trace_t { + template <typename T> + T ret (T&& v, + const char *func HB_UNUSED = nullptr, + unsigned int line HB_UNUSED = 0) { return hb_forward<T> (v); } +}; + +#define return_trace(RET) return trace.ret (RET, HB_FUNC, __LINE__) + + +/* + * Instances. + */ + +#ifndef HB_DEBUG_ARABIC +#define HB_DEBUG_ARABIC (HB_DEBUG+0) +#endif + +#ifndef HB_DEBUG_BLOB +#define HB_DEBUG_BLOB (HB_DEBUG+0) +#endif + +#ifndef HB_DEBUG_CORETEXT +#define HB_DEBUG_CORETEXT (HB_DEBUG+0) +#endif + +#ifndef HB_DEBUG_DIRECTWRITE +#define HB_DEBUG_DIRECTWRITE (HB_DEBUG+0) +#endif + +#ifndef HB_DEBUG_FT +#define HB_DEBUG_FT (HB_DEBUG+0) +#endif + +#ifndef HB_DEBUG_OBJECT +#define HB_DEBUG_OBJECT (HB_DEBUG+0) +#endif + +#ifndef HB_DEBUG_SHAPE_PLAN +#define HB_DEBUG_SHAPE_PLAN (HB_DEBUG+0) +#endif + +#ifndef HB_DEBUG_UNISCRIBE +#define HB_DEBUG_UNISCRIBE (HB_DEBUG+0) +#endif + +/* + * With tracing. + */ + +#ifndef HB_DEBUG_APPLY +#define HB_DEBUG_APPLY (HB_DEBUG+0) +#endif +#if HB_DEBUG_APPLY +#define TRACE_APPLY(this) \ + hb_auto_trace_t<HB_DEBUG_APPLY, bool> trace \ + (&c->debug_depth, c->get_name (), this, HB_FUNC, \ + "idx %d gid %u lookup %d", \ + c->buffer->idx, c->buffer->cur().codepoint, (int) c->lookup_index) +#else +#define TRACE_APPLY(this) hb_no_trace_t<bool> trace +#endif + +#ifndef HB_DEBUG_SANITIZE +#define HB_DEBUG_SANITIZE (HB_DEBUG+0) +#endif +#if HB_DEBUG_SANITIZE +#define TRACE_SANITIZE(this) \ + hb_auto_trace_t<HB_DEBUG_SANITIZE, bool> trace \ + (&c->debug_depth, c->get_name (), this, HB_FUNC, \ + " ") +#else +#define TRACE_SANITIZE(this) hb_no_trace_t<bool> trace +#endif + +#ifndef HB_DEBUG_SERIALIZE +#define HB_DEBUG_SERIALIZE (HB_DEBUG+0) +#endif +#if HB_DEBUG_SERIALIZE +#define TRACE_SERIALIZE(this) \ + hb_auto_trace_t<HB_DEBUG_SERIALIZE, bool> trace \ + (&c->debug_depth, "SERIALIZE", c, HB_FUNC, \ + " ") +#else +#define TRACE_SERIALIZE(this) hb_no_trace_t<bool> trace +#endif + +#ifndef HB_DEBUG_SUBSET +#define HB_DEBUG_SUBSET (HB_DEBUG+0) +#endif +#if HB_DEBUG_SUBSET +#define TRACE_SUBSET(this) \ + hb_auto_trace_t<HB_DEBUG_SUBSET, bool> trace \ + (&c->debug_depth, c->get_name (), this, HB_FUNC, \ + " ") +#else +#define TRACE_SUBSET(this) hb_no_trace_t<bool> trace +#endif + +#ifndef HB_DEBUG_DISPATCH +#define HB_DEBUG_DISPATCH ( \ + HB_DEBUG_APPLY + \ + HB_DEBUG_SANITIZE + \ + HB_DEBUG_SERIALIZE + \ + HB_DEBUG_SUBSET + \ + 0) +#endif +#if HB_DEBUG_DISPATCH +#define TRACE_DISPATCH(this, format) \ + hb_auto_trace_t<context_t::max_debug_depth, typename context_t::return_t> trace \ + (&c->debug_depth, c->get_name (), this, HB_FUNC, \ + "format %d", (int) format) +#else +#define TRACE_DISPATCH(this, format) hb_no_trace_t<typename context_t::return_t> trace +#endif + + +#endif /* HB_DEBUG_HH */ diff --git a/thirdparty/harfbuzz/src/hb-deprecated.h b/thirdparty/harfbuzz/src/hb-deprecated.h new file mode 100644 index 0000000000..43f89a4c4e --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-deprecated.h @@ -0,0 +1,195 @@ +/* + * Copyright © 2013 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_H_IN +#error "Include <hb.h> instead." +#endif + +#ifndef HB_DEPRECATED_H +#define HB_DEPRECATED_H + +#include "hb-common.h" +#include "hb-unicode.h" +#include "hb-font.h" +#include "hb-set.h" + + +/** + * SECTION:hb-deprecated + * @title: hb-deprecated + * @short_description: Deprecated API + * @include: hb.h + * + * These API have been deprecated in favor of newer API, or because they + * were deemed unnecessary. + **/ + + +HB_BEGIN_DECLS + +#ifndef HB_DISABLE_DEPRECATED + + +#define HB_SCRIPT_CANADIAN_ABORIGINAL HB_SCRIPT_CANADIAN_SYLLABICS + +#define HB_BUFFER_FLAGS_DEFAULT HB_BUFFER_FLAG_DEFAULT +#define HB_BUFFER_SERIALIZE_FLAGS_DEFAULT HB_BUFFER_SERIALIZE_FLAG_DEFAULT + +typedef hb_bool_t (*hb_font_get_glyph_func_t) (hb_font_t *font, void *font_data, + hb_codepoint_t unicode, hb_codepoint_t variation_selector, + hb_codepoint_t *glyph, + void *user_data); + +HB_EXTERN HB_DEPRECATED_FOR(hb_font_funcs_set_nominal_glyph_func and hb_font_funcs_set_variation_glyph_func) void +hb_font_funcs_set_glyph_func (hb_font_funcs_t *ffuncs, + hb_font_get_glyph_func_t func, + void *user_data, hb_destroy_func_t destroy); + +HB_EXTERN HB_DEPRECATED void +hb_set_invert (hb_set_t *set); + +/** + * hb_unicode_eastasian_width_func_t: + * + * Deprecated: 2.0.0 + */ +typedef unsigned int (*hb_unicode_eastasian_width_func_t) (hb_unicode_funcs_t *ufuncs, + hb_codepoint_t unicode, + void *user_data); + +/** + * hb_unicode_funcs_set_eastasian_width_func: + * @ufuncs: a Unicode function structure + * @func: (closure user_data) (destroy destroy) (scope notified): + * @user_data: + * @destroy: + * + * + * + * Since: 0.9.2 + * Deprecated: 2.0.0 + **/ +HB_EXTERN HB_DEPRECATED void +hb_unicode_funcs_set_eastasian_width_func (hb_unicode_funcs_t *ufuncs, + hb_unicode_eastasian_width_func_t func, + void *user_data, hb_destroy_func_t destroy); + +/** + * hb_unicode_eastasian_width: + * + * Since: 0.9.2 + * Deprecated: 2.0.0 + **/ +HB_EXTERN HB_DEPRECATED unsigned int +hb_unicode_eastasian_width (hb_unicode_funcs_t *ufuncs, + hb_codepoint_t unicode); + + +/** + * hb_unicode_decompose_compatibility_func_t: + * @ufuncs: a Unicode function structure + * @u: codepoint to decompose + * @decomposed: address of codepoint array (of length %HB_UNICODE_MAX_DECOMPOSITION_LEN) to write decomposition into + * @user_data: user data pointer as passed to hb_unicode_funcs_set_decompose_compatibility_func() + * + * Fully decompose @u to its Unicode compatibility decomposition. The codepoints of the decomposition will be written to @decomposed. + * The complete length of the decomposition will be returned. + * + * If @u has no compatibility decomposition, zero should be returned. + * + * The Unicode standard guarantees that a buffer of length %HB_UNICODE_MAX_DECOMPOSITION_LEN codepoints will always be sufficient for any + * compatibility decomposition plus an terminating value of 0. Consequently, @decompose must be allocated by the caller to be at least this length. Implementations + * of this function type must ensure that they do not write past the provided array. + * + * Return value: number of codepoints in the full compatibility decomposition of @u, or 0 if no decomposition available. + * + * Deprecated: 2.0.0 + */ +typedef unsigned int (*hb_unicode_decompose_compatibility_func_t) (hb_unicode_funcs_t *ufuncs, + hb_codepoint_t u, + hb_codepoint_t *decomposed, + void *user_data); + +/** + * HB_UNICODE_MAX_DECOMPOSITION_LEN: + * + * See Unicode 6.1 for details on the maximum decomposition length. + * + * Deprecated: 2.0.0 + */ +#define HB_UNICODE_MAX_DECOMPOSITION_LEN (18+1) /* codepoints */ + +/** + * hb_unicode_funcs_set_decompose_compatibility_func: + * @ufuncs: a Unicode function structure + * @func: (closure user_data) (destroy destroy) (scope notified): + * @user_data: + * @destroy: + * + * + * + * Since: 0.9.2 + * Deprecated: 2.0.0 + **/ +HB_EXTERN HB_DEPRECATED void +hb_unicode_funcs_set_decompose_compatibility_func (hb_unicode_funcs_t *ufuncs, + hb_unicode_decompose_compatibility_func_t func, + void *user_data, hb_destroy_func_t destroy); + +HB_EXTERN HB_DEPRECATED unsigned int +hb_unicode_decompose_compatibility (hb_unicode_funcs_t *ufuncs, + hb_codepoint_t u, + hb_codepoint_t *decomposed); + + +typedef hb_font_get_glyph_kerning_func_t hb_font_get_glyph_v_kerning_func_t; + +/** + * hb_font_funcs_set_glyph_v_kerning_func: + * @ffuncs: font functions. + * @func: (closure user_data) (destroy destroy) (scope notified): + * @user_data: + * @destroy: + * + * + * + * Since: 0.9.2 + * Deprecated: 2.0.0 + **/ +HB_EXTERN void +hb_font_funcs_set_glyph_v_kerning_func (hb_font_funcs_t *ffuncs, + hb_font_get_glyph_v_kerning_func_t func, + void *user_data, hb_destroy_func_t destroy); + +HB_EXTERN hb_position_t +hb_font_get_glyph_v_kerning (hb_font_t *font, + hb_codepoint_t top_glyph, hb_codepoint_t bottom_glyph); + +#endif + +HB_END_DECLS + +#endif /* HB_DEPRECATED_H */ diff --git a/thirdparty/harfbuzz/src/hb-directwrite.cc b/thirdparty/harfbuzz/src/hb-directwrite.cc new file mode 100644 index 0000000000..f2fce073e0 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-directwrite.cc @@ -0,0 +1,979 @@ +/* + * Copyright © 2015-2019 Ebrahim Byagowi + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + */ + +#include "hb.hh" + +#ifdef HAVE_DIRECTWRITE + +#include "hb-shaper-impl.hh" + +#include <dwrite_1.h> + +#include "hb-directwrite.h" + + +/* Declare object creator for dynamic support of DWRITE */ +typedef HRESULT (* WINAPI t_DWriteCreateFactory)( + DWRITE_FACTORY_TYPE factoryType, + REFIID iid, + IUnknown **factory +); + +/* + * hb-directwrite uses new/delete syntatically but as we let users + * to override malloc/free, we will redefine new/delete so users + * won't need to do that by their own. + */ +void* operator new (size_t size) { return malloc (size); } +void* operator new [] (size_t size) { return malloc (size); } +void operator delete (void* pointer) { free (pointer); } +void operator delete [] (void* pointer) { free (pointer); } + + +/* + * DirectWrite font stream helpers + */ + +// This is a font loader which provides only one font (unlike its original design). +// For a better implementation which was also source of this +// and DWriteFontFileStream, have a look at to NativeFontResourceDWrite.cpp in Mozilla +class DWriteFontFileLoader : public IDWriteFontFileLoader +{ +private: + IDWriteFontFileStream *mFontFileStream; +public: + DWriteFontFileLoader (IDWriteFontFileStream *fontFileStream) + { mFontFileStream = fontFileStream; } + + // IUnknown interface + IFACEMETHOD (QueryInterface) (IID const& iid, OUT void** ppObject) + { return S_OK; } + IFACEMETHOD_ (ULONG, AddRef) () { return 1; } + IFACEMETHOD_ (ULONG, Release) () { return 1; } + + // IDWriteFontFileLoader methods + virtual HRESULT STDMETHODCALLTYPE + CreateStreamFromKey (void const* fontFileReferenceKey, + uint32_t fontFileReferenceKeySize, + OUT IDWriteFontFileStream** fontFileStream) + { + *fontFileStream = mFontFileStream; + return S_OK; + } + + virtual ~DWriteFontFileLoader() {} +}; + +class DWriteFontFileStream : public IDWriteFontFileStream +{ +private: + uint8_t *mData; + uint32_t mSize; +public: + DWriteFontFileStream (uint8_t *aData, uint32_t aSize) + { + mData = aData; + mSize = aSize; + } + + // IUnknown interface + IFACEMETHOD (QueryInterface) (IID const& iid, OUT void** ppObject) + { return S_OK; } + IFACEMETHOD_ (ULONG, AddRef) () { return 1; } + IFACEMETHOD_ (ULONG, Release) () { return 1; } + + // IDWriteFontFileStream methods + virtual HRESULT STDMETHODCALLTYPE + ReadFileFragment (void const** fragmentStart, + UINT64 fileOffset, + UINT64 fragmentSize, + OUT void** fragmentContext) + { + // We are required to do bounds checking. + if (fileOffset + fragmentSize > mSize) return E_FAIL; + + // truncate the 64 bit fileOffset to size_t sized index into mData + size_t index = static_cast<size_t> (fileOffset); + + // We should be alive for the duration of this. + *fragmentStart = &mData[index]; + *fragmentContext = nullptr; + return S_OK; + } + + virtual void STDMETHODCALLTYPE + ReleaseFileFragment (void* fragmentContext) {} + + virtual HRESULT STDMETHODCALLTYPE + GetFileSize (OUT UINT64* fileSize) + { + *fileSize = mSize; + return S_OK; + } + + virtual HRESULT STDMETHODCALLTYPE + GetLastWriteTime (OUT UINT64* lastWriteTime) { return E_NOTIMPL; } + + virtual ~DWriteFontFileStream() {} +}; + + +/* +* shaper face data +*/ + +struct hb_directwrite_face_data_t +{ + HMODULE dwrite_dll; + IDWriteFactory *dwriteFactory; + IDWriteFontFile *fontFile; + DWriteFontFileStream *fontFileStream; + DWriteFontFileLoader *fontFileLoader; + IDWriteFontFace *fontFace; + hb_blob_t *faceBlob; +}; + +hb_directwrite_face_data_t * +_hb_directwrite_shaper_face_data_create (hb_face_t *face) +{ + hb_directwrite_face_data_t *data = new hb_directwrite_face_data_t; + if (unlikely (!data)) + return nullptr; + +#define FAIL(...) \ + HB_STMT_START { \ + DEBUG_MSG (DIRECTWRITE, nullptr, __VA_ARGS__); \ + return nullptr; \ + } HB_STMT_END + + data->dwrite_dll = LoadLibrary (TEXT ("DWRITE")); + if (unlikely (!data->dwrite_dll)) + FAIL ("Cannot find DWrite.DLL"); + + t_DWriteCreateFactory p_DWriteCreateFactory; + +#if defined(__GNUC__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wcast-function-type" +#endif + + p_DWriteCreateFactory = (t_DWriteCreateFactory) + GetProcAddress (data->dwrite_dll, "DWriteCreateFactory"); + +#if defined(__GNUC__) +#pragma GCC diagnostic pop +#endif + + if (unlikely (!p_DWriteCreateFactory)) + FAIL ("Cannot find DWriteCreateFactory()."); + + HRESULT hr; + + // TODO: factory and fontFileLoader should be cached separately + IDWriteFactory* dwriteFactory; + hr = p_DWriteCreateFactory (DWRITE_FACTORY_TYPE_SHARED, __uuidof (IDWriteFactory), + (IUnknown**) &dwriteFactory); + + if (unlikely (hr != S_OK)) + FAIL ("Failed to run DWriteCreateFactory()."); + + hb_blob_t *blob = hb_face_reference_blob (face); + DWriteFontFileStream *fontFileStream; + fontFileStream = new DWriteFontFileStream ((uint8_t *) hb_blob_get_data (blob, nullptr), + hb_blob_get_length (blob)); + + DWriteFontFileLoader *fontFileLoader = new DWriteFontFileLoader (fontFileStream); + dwriteFactory->RegisterFontFileLoader (fontFileLoader); + + IDWriteFontFile *fontFile; + uint64_t fontFileKey = 0; + hr = dwriteFactory->CreateCustomFontFileReference (&fontFileKey, sizeof (fontFileKey), + fontFileLoader, &fontFile); + + if (FAILED (hr)) + FAIL ("Failed to load font file from data!"); + + BOOL isSupported; + DWRITE_FONT_FILE_TYPE fileType; + DWRITE_FONT_FACE_TYPE faceType; + uint32_t numberOfFaces; + hr = fontFile->Analyze (&isSupported, &fileType, &faceType, &numberOfFaces); + if (FAILED (hr) || !isSupported) + FAIL ("Font file is not supported."); + +#undef FAIL + + IDWriteFontFace *fontFace; + dwriteFactory->CreateFontFace (faceType, 1, &fontFile, 0, + DWRITE_FONT_SIMULATIONS_NONE, &fontFace); + + data->dwriteFactory = dwriteFactory; + data->fontFile = fontFile; + data->fontFileStream = fontFileStream; + data->fontFileLoader = fontFileLoader; + data->fontFace = fontFace; + data->faceBlob = blob; + + return data; +} + +void +_hb_directwrite_shaper_face_data_destroy (hb_directwrite_face_data_t *data) +{ + if (data->fontFace) + data->fontFace->Release (); + if (data->fontFile) + data->fontFile->Release (); + if (data->dwriteFactory) + { + if (data->fontFileLoader) + data->dwriteFactory->UnregisterFontFileLoader (data->fontFileLoader); + data->dwriteFactory->Release (); + } + if (data->fontFileLoader) + delete data->fontFileLoader; + if (data->fontFileStream) + delete data->fontFileStream; + if (data->faceBlob) + hb_blob_destroy (data->faceBlob); + if (data->dwrite_dll) + FreeLibrary (data->dwrite_dll); + if (data) + delete data; +} + + +/* + * shaper font data + */ + +struct hb_directwrite_font_data_t {}; + +hb_directwrite_font_data_t * +_hb_directwrite_shaper_font_data_create (hb_font_t *font) +{ + hb_directwrite_font_data_t *data = new hb_directwrite_font_data_t; + if (unlikely (!data)) + return nullptr; + + return data; +} + +void +_hb_directwrite_shaper_font_data_destroy (hb_directwrite_font_data_t *data) +{ + delete data; +} + + +// Most of TextAnalysis is originally written by Bas Schouten for Mozilla project +// but now is relicensed to MIT for HarfBuzz use +class TextAnalysis : public IDWriteTextAnalysisSource, public IDWriteTextAnalysisSink +{ +public: + + IFACEMETHOD (QueryInterface) (IID const& iid, OUT void** ppObject) + { return S_OK; } + IFACEMETHOD_ (ULONG, AddRef) () { return 1; } + IFACEMETHOD_ (ULONG, Release) () { return 1; } + + // A single contiguous run of characters containing the same analysis + // results. + struct Run + { + uint32_t mTextStart; // starting text position of this run + uint32_t mTextLength; // number of contiguous code units covered + uint32_t mGlyphStart; // starting glyph in the glyphs array + uint32_t mGlyphCount; // number of glyphs associated with this run + // text + DWRITE_SCRIPT_ANALYSIS mScript; + uint8_t mBidiLevel; + bool mIsSideways; + + bool ContainsTextPosition (uint32_t aTextPosition) const + { + return aTextPosition >= mTextStart && + aTextPosition < mTextStart + mTextLength; + } + + Run *nextRun; + }; + +public: + TextAnalysis (const wchar_t* text, uint32_t textLength, + const wchar_t* localeName, DWRITE_READING_DIRECTION readingDirection) + : mTextLength (textLength), mText (text), mLocaleName (localeName), + mReadingDirection (readingDirection), mCurrentRun (nullptr) {} + ~TextAnalysis () + { + // delete runs, except mRunHead which is part of the TextAnalysis object + for (Run *run = mRunHead.nextRun; run;) + { + Run *origRun = run; + run = run->nextRun; + delete origRun; + } + } + + STDMETHODIMP + GenerateResults (IDWriteTextAnalyzer* textAnalyzer, Run **runHead) + { + // Analyzes the text using the script analyzer and returns + // the result as a series of runs. + + HRESULT hr = S_OK; + + // Initially start out with one result that covers the entire range. + // This result will be subdivided by the analysis processes. + mRunHead.mTextStart = 0; + mRunHead.mTextLength = mTextLength; + mRunHead.mBidiLevel = + (mReadingDirection == DWRITE_READING_DIRECTION_RIGHT_TO_LEFT); + mRunHead.nextRun = nullptr; + mCurrentRun = &mRunHead; + + // Call each of the analyzers in sequence, recording their results. + if (SUCCEEDED (hr = textAnalyzer->AnalyzeScript (this, 0, mTextLength, this))) + *runHead = &mRunHead; + + return hr; + } + + // IDWriteTextAnalysisSource implementation + + IFACEMETHODIMP + GetTextAtPosition (uint32_t textPosition, + OUT wchar_t const** textString, + OUT uint32_t* textLength) + { + if (textPosition >= mTextLength) + { + // No text at this position, valid query though. + *textString = nullptr; + *textLength = 0; + } + else + { + *textString = mText + textPosition; + *textLength = mTextLength - textPosition; + } + return S_OK; + } + + IFACEMETHODIMP + GetTextBeforePosition (uint32_t textPosition, + OUT wchar_t const** textString, + OUT uint32_t* textLength) + { + if (textPosition == 0 || textPosition > mTextLength) + { + // Either there is no text before here (== 0), or this + // is an invalid position. The query is considered valid though. + *textString = nullptr; + *textLength = 0; + } + else + { + *textString = mText; + *textLength = textPosition; + } + return S_OK; + } + + IFACEMETHODIMP_ (DWRITE_READING_DIRECTION) + GetParagraphReadingDirection () { return mReadingDirection; } + + IFACEMETHODIMP GetLocaleName (uint32_t textPosition, uint32_t* textLength, + wchar_t const** localeName) + { return S_OK; } + + IFACEMETHODIMP + GetNumberSubstitution (uint32_t textPosition, + OUT uint32_t* textLength, + OUT IDWriteNumberSubstitution** numberSubstitution) + { + // We do not support number substitution. + *numberSubstitution = nullptr; + *textLength = mTextLength - textPosition; + + return S_OK; + } + + // IDWriteTextAnalysisSink implementation + + IFACEMETHODIMP + SetScriptAnalysis (uint32_t textPosition, uint32_t textLength, + DWRITE_SCRIPT_ANALYSIS const* scriptAnalysis) + { + SetCurrentRun (textPosition); + SplitCurrentRun (textPosition); + while (textLength > 0) + { + Run *run = FetchNextRun (&textLength); + run->mScript = *scriptAnalysis; + } + + return S_OK; + } + + IFACEMETHODIMP + SetLineBreakpoints (uint32_t textPosition, + uint32_t textLength, + const DWRITE_LINE_BREAKPOINT* lineBreakpoints) + { return S_OK; } + + IFACEMETHODIMP SetBidiLevel (uint32_t textPosition, uint32_t textLength, + uint8_t explicitLevel, uint8_t resolvedLevel) + { return S_OK; } + + IFACEMETHODIMP + SetNumberSubstitution (uint32_t textPosition, uint32_t textLength, + IDWriteNumberSubstitution* numberSubstitution) + { return S_OK; } + +protected: + Run *FetchNextRun (IN OUT uint32_t* textLength) + { + // Used by the sink setters, this returns a reference to the next run. + // Position and length are adjusted to now point after the current run + // being returned. + + Run *origRun = mCurrentRun; + // Split the tail if needed (the length remaining is less than the + // current run's size). + if (*textLength < mCurrentRun->mTextLength) + SplitCurrentRun (mCurrentRun->mTextStart + *textLength); + else + // Just advance the current run. + mCurrentRun = mCurrentRun->nextRun; + *textLength -= origRun->mTextLength; + + // Return a reference to the run that was just current. + return origRun; + } + + void SetCurrentRun (uint32_t textPosition) + { + // Move the current run to the given position. + // Since the analyzers generally return results in a forward manner, + // this will usually just return early. If not, find the + // corresponding run for the text position. + + if (mCurrentRun && mCurrentRun->ContainsTextPosition (textPosition)) + return; + + for (Run *run = &mRunHead; run; run = run->nextRun) + if (run->ContainsTextPosition (textPosition)) + { + mCurrentRun = run; + return; + } + assert (0); // We should always be able to find the text position in one of our runs + } + + void SplitCurrentRun (uint32_t splitPosition) + { + if (!mCurrentRun) + { + assert (0); // SplitCurrentRun called without current run + // Shouldn't be calling this when no current run is set! + return; + } + // Split the current run. + if (splitPosition <= mCurrentRun->mTextStart) + { + // No need to split, already the start of a run + // or before it. Usually the first. + return; + } + Run *newRun = new Run; + + *newRun = *mCurrentRun; + + // Insert the new run in our linked list. + newRun->nextRun = mCurrentRun->nextRun; + mCurrentRun->nextRun = newRun; + + // Adjust runs' text positions and lengths. + uint32_t splitPoint = splitPosition - mCurrentRun->mTextStart; + newRun->mTextStart += splitPoint; + newRun->mTextLength -= splitPoint; + mCurrentRun->mTextLength = splitPoint; + mCurrentRun = newRun; + } + +protected: + // Input + // (weak references are fine here, since this class is a transient + // stack-based helper that doesn't need to copy data) + uint32_t mTextLength; + const wchar_t* mText; + const wchar_t* mLocaleName; + DWRITE_READING_DIRECTION mReadingDirection; + + // Current processing state. + Run *mCurrentRun; + + // Output is a list of runs starting here + Run mRunHead; +}; + +/* + * shaper + */ + +static hb_bool_t +_hb_directwrite_shape_full (hb_shape_plan_t *shape_plan, + hb_font_t *font, + hb_buffer_t *buffer, + const hb_feature_t *features, + unsigned int num_features, + float lineWidth) +{ + hb_face_t *face = font->face; + const hb_directwrite_face_data_t *face_data = face->data.directwrite; + IDWriteFactory *dwriteFactory = face_data->dwriteFactory; + IDWriteFontFace *fontFace = face_data->fontFace; + + IDWriteTextAnalyzer* analyzer; + dwriteFactory->CreateTextAnalyzer (&analyzer); + + unsigned int scratch_size; + hb_buffer_t::scratch_buffer_t *scratch = buffer->get_scratch_buffer (&scratch_size); +#define ALLOCATE_ARRAY(Type, name, len) \ + Type *name = (Type *) scratch; \ + do { \ + unsigned int _consumed = DIV_CEIL ((len) * sizeof (Type), sizeof (*scratch)); \ + assert (_consumed <= scratch_size); \ + scratch += _consumed; \ + scratch_size -= _consumed; \ + } while (0) + +#define utf16_index() var1.u32 + + ALLOCATE_ARRAY (wchar_t, textString, buffer->len * 2); + + unsigned int chars_len = 0; + for (unsigned int i = 0; i < buffer->len; i++) + { + hb_codepoint_t c = buffer->info[i].codepoint; + buffer->info[i].utf16_index () = chars_len; + if (likely (c <= 0xFFFFu)) + textString[chars_len++] = c; + else if (unlikely (c > 0x10FFFFu)) + textString[chars_len++] = 0xFFFDu; + else + { + textString[chars_len++] = 0xD800u + ((c - 0x10000u) >> 10); + textString[chars_len++] = 0xDC00u + ((c - 0x10000u) & ((1u << 10) - 1)); + } + } + + ALLOCATE_ARRAY (WORD, log_clusters, chars_len); + /* Need log_clusters to assign features. */ + chars_len = 0; + for (unsigned int i = 0; i < buffer->len; i++) + { + hb_codepoint_t c = buffer->info[i].codepoint; + unsigned int cluster = buffer->info[i].cluster; + log_clusters[chars_len++] = cluster; + if (hb_in_range (c, 0x10000u, 0x10FFFFu)) + log_clusters[chars_len++] = cluster; /* Surrogates. */ + } + + // TODO: Handle TEST_DISABLE_OPTIONAL_LIGATURES + + DWRITE_READING_DIRECTION readingDirection; + readingDirection = buffer->props.direction ? + DWRITE_READING_DIRECTION_RIGHT_TO_LEFT : + DWRITE_READING_DIRECTION_LEFT_TO_RIGHT; + + /* + * There's an internal 16-bit limit on some things inside the analyzer, + * but we never attempt to shape a word longer than 64K characters + * in a single gfxShapedWord, so we cannot exceed that limit. + */ + uint32_t textLength = buffer->len; + + TextAnalysis analysis (textString, textLength, nullptr, readingDirection); + TextAnalysis::Run *runHead; + HRESULT hr; + hr = analysis.GenerateResults (analyzer, &runHead); + +#define FAIL(...) \ + HB_STMT_START { \ + DEBUG_MSG (DIRECTWRITE, nullptr, __VA_ARGS__); \ + return false; \ + } HB_STMT_END + + if (FAILED (hr)) + FAIL ("Analyzer failed to generate results."); + + uint32_t maxGlyphCount = 3 * textLength / 2 + 16; + uint32_t glyphCount; + bool isRightToLeft = HB_DIRECTION_IS_BACKWARD (buffer->props.direction); + + const wchar_t localeName[20] = {0}; + if (buffer->props.language) + mbstowcs ((wchar_t*) localeName, + hb_language_to_string (buffer->props.language), 20); + + // TODO: it does work but doesn't care about ranges + DWRITE_TYPOGRAPHIC_FEATURES typographic_features; + typographic_features.featureCount = num_features; + if (num_features) + { + typographic_features.features = new DWRITE_FONT_FEATURE[num_features]; + for (unsigned int i = 0; i < num_features; ++i) + { + typographic_features.features[i].nameTag = (DWRITE_FONT_FEATURE_TAG) + hb_uint32_swap (features[i].tag); + typographic_features.features[i].parameter = features[i].value; + } + } + const DWRITE_TYPOGRAPHIC_FEATURES* dwFeatures; + dwFeatures = (const DWRITE_TYPOGRAPHIC_FEATURES*) &typographic_features; + const uint32_t featureRangeLengths[] = { textLength }; + // + + uint16_t* clusterMap; + clusterMap = new uint16_t[textLength]; + DWRITE_SHAPING_TEXT_PROPERTIES* textProperties; + textProperties = new DWRITE_SHAPING_TEXT_PROPERTIES[textLength]; +retry_getglyphs: + uint16_t* glyphIndices = new uint16_t[maxGlyphCount]; + DWRITE_SHAPING_GLYPH_PROPERTIES* glyphProperties; + glyphProperties = new DWRITE_SHAPING_GLYPH_PROPERTIES[maxGlyphCount]; + + hr = analyzer->GetGlyphs (textString, textLength, fontFace, false, + isRightToLeft, &runHead->mScript, localeName, + nullptr, &dwFeatures, featureRangeLengths, 1, + maxGlyphCount, clusterMap, textProperties, + glyphIndices, glyphProperties, &glyphCount); + + if (unlikely (hr == HRESULT_FROM_WIN32 (ERROR_INSUFFICIENT_BUFFER))) + { + delete [] glyphIndices; + delete [] glyphProperties; + + maxGlyphCount *= 2; + + goto retry_getglyphs; + } + if (FAILED (hr)) + FAIL ("Analyzer failed to get glyphs."); + + float* glyphAdvances = new float[maxGlyphCount]; + DWRITE_GLYPH_OFFSET* glyphOffsets = new DWRITE_GLYPH_OFFSET[maxGlyphCount]; + + /* The -2 in the following is to compensate for possible + * alignment needed after the WORD array. sizeof (WORD) == 2. */ + unsigned int glyphs_size = (scratch_size * sizeof (int) - 2) + / (sizeof (WORD) + + sizeof (DWRITE_SHAPING_GLYPH_PROPERTIES) + + sizeof (int) + + sizeof (DWRITE_GLYPH_OFFSET) + + sizeof (uint32_t)); + ALLOCATE_ARRAY (uint32_t, vis_clusters, glyphs_size); + +#undef ALLOCATE_ARRAY + + int fontEmSize = font->face->get_upem (); + if (fontEmSize < 0) fontEmSize = -fontEmSize; + + if (fontEmSize < 0) fontEmSize = -fontEmSize; + double x_mult = (double) font->x_scale / fontEmSize; + double y_mult = (double) font->y_scale / fontEmSize; + + hr = analyzer->GetGlyphPlacements (textString, clusterMap, textProperties, + textLength, glyphIndices, glyphProperties, + glyphCount, fontFace, fontEmSize, + false, isRightToLeft, &runHead->mScript, localeName, + &dwFeatures, featureRangeLengths, 1, + glyphAdvances, glyphOffsets); + + if (FAILED (hr)) + FAIL ("Analyzer failed to get glyph placements."); + + IDWriteTextAnalyzer1* analyzer1; + analyzer->QueryInterface (&analyzer1); + + if (analyzer1 && lineWidth) + { + DWRITE_JUSTIFICATION_OPPORTUNITY* justificationOpportunities = + new DWRITE_JUSTIFICATION_OPPORTUNITY[maxGlyphCount]; + hr = analyzer1->GetJustificationOpportunities (fontFace, fontEmSize, runHead->mScript, + textLength, glyphCount, textString, + clusterMap, glyphProperties, + justificationOpportunities); + + if (FAILED (hr)) + FAIL ("Analyzer failed to get justification opportunities."); + + float* justifiedGlyphAdvances = new float[maxGlyphCount]; + DWRITE_GLYPH_OFFSET* justifiedGlyphOffsets = new DWRITE_GLYPH_OFFSET[glyphCount]; + hr = analyzer1->JustifyGlyphAdvances (lineWidth, glyphCount, justificationOpportunities, + glyphAdvances, glyphOffsets, justifiedGlyphAdvances, + justifiedGlyphOffsets); + + if (FAILED (hr)) FAIL ("Analyzer failed to get justify glyph advances."); + + DWRITE_SCRIPT_PROPERTIES scriptProperties; + hr = analyzer1->GetScriptProperties (runHead->mScript, &scriptProperties); + if (FAILED (hr)) FAIL ("Analyzer failed to get script properties."); + uint32_t justificationCharacter = scriptProperties.justificationCharacter; + + // if a script justificationCharacter is not space, it can have GetJustifiedGlyphs + if (justificationCharacter != 32) + { + uint16_t* modifiedClusterMap = new uint16_t[textLength]; + retry_getjustifiedglyphs: + uint16_t* modifiedGlyphIndices = new uint16_t[maxGlyphCount]; + float* modifiedGlyphAdvances = new float[maxGlyphCount]; + DWRITE_GLYPH_OFFSET* modifiedGlyphOffsets = new DWRITE_GLYPH_OFFSET[maxGlyphCount]; + uint32_t actualGlyphsCount; + hr = analyzer1->GetJustifiedGlyphs (fontFace, fontEmSize, runHead->mScript, + textLength, glyphCount, maxGlyphCount, + clusterMap, glyphIndices, glyphAdvances, + justifiedGlyphAdvances, justifiedGlyphOffsets, + glyphProperties, &actualGlyphsCount, + modifiedClusterMap, modifiedGlyphIndices, + modifiedGlyphAdvances, modifiedGlyphOffsets); + + if (hr == HRESULT_FROM_WIN32 (ERROR_INSUFFICIENT_BUFFER)) + { + maxGlyphCount = actualGlyphsCount; + delete [] modifiedGlyphIndices; + delete [] modifiedGlyphAdvances; + delete [] modifiedGlyphOffsets; + + maxGlyphCount = actualGlyphsCount; + + goto retry_getjustifiedglyphs; + } + if (FAILED (hr)) + FAIL ("Analyzer failed to get justified glyphs."); + + delete [] clusterMap; + delete [] glyphIndices; + delete [] glyphAdvances; + delete [] glyphOffsets; + + glyphCount = actualGlyphsCount; + clusterMap = modifiedClusterMap; + glyphIndices = modifiedGlyphIndices; + glyphAdvances = modifiedGlyphAdvances; + glyphOffsets = modifiedGlyphOffsets; + + delete [] justifiedGlyphAdvances; + delete [] justifiedGlyphOffsets; + } + else + { + delete [] glyphAdvances; + delete [] glyphOffsets; + + glyphAdvances = justifiedGlyphAdvances; + glyphOffsets = justifiedGlyphOffsets; + } + + delete [] justificationOpportunities; + } + + /* Ok, we've got everything we need, now compose output buffer, + * very, *very*, carefully! */ + + /* Calculate visual-clusters. That's what we ship. */ + for (unsigned int i = 0; i < glyphCount; i++) + vis_clusters[i] = (uint32_t) -1; + for (unsigned int i = 0; i < buffer->len; i++) + { + uint32_t *p = + &vis_clusters[log_clusters[buffer->info[i].utf16_index ()]]; + *p = hb_min (*p, buffer->info[i].cluster); + } + for (unsigned int i = 1; i < glyphCount; i++) + if (vis_clusters[i] == (uint32_t) -1) + vis_clusters[i] = vis_clusters[i - 1]; + +#undef utf16_index + + if (unlikely (!buffer->ensure (glyphCount))) + FAIL ("Buffer in error"); + +#undef FAIL + + /* Set glyph infos */ + buffer->len = 0; + for (unsigned int i = 0; i < glyphCount; i++) + { + hb_glyph_info_t *info = &buffer->info[buffer->len++]; + + info->codepoint = glyphIndices[i]; + info->cluster = vis_clusters[i]; + + /* The rest is crap. Let's store position info there for now. */ + info->mask = glyphAdvances[i]; + info->var1.i32 = glyphOffsets[i].advanceOffset; + info->var2.i32 = glyphOffsets[i].ascenderOffset; + } + + /* Set glyph positions */ + buffer->clear_positions (); + for (unsigned int i = 0; i < glyphCount; i++) + { + hb_glyph_info_t *info = &buffer->info[i]; + hb_glyph_position_t *pos = &buffer->pos[i]; + + /* TODO vertical */ + pos->x_advance = x_mult * (int32_t) info->mask; + pos->x_offset = x_mult * (isRightToLeft ? -info->var1.i32 : info->var1.i32); + pos->y_offset = y_mult * info->var2.i32; + } + + if (isRightToLeft) hb_buffer_reverse (buffer); + + delete [] clusterMap; + delete [] glyphIndices; + delete [] textProperties; + delete [] glyphProperties; + delete [] glyphAdvances; + delete [] glyphOffsets; + + if (num_features) + delete [] typographic_features.features; + + /* Wow, done! */ + return true; +} + +hb_bool_t +_hb_directwrite_shape (hb_shape_plan_t *shape_plan, + hb_font_t *font, + hb_buffer_t *buffer, + const hb_feature_t *features, + unsigned int num_features) +{ + return _hb_directwrite_shape_full (shape_plan, font, buffer, + features, num_features, 0); +} + +HB_UNUSED static bool +_hb_directwrite_shape_experimental_width (hb_font_t *font, + hb_buffer_t *buffer, + const hb_feature_t *features, + unsigned int num_features, + float width) +{ + static const char *shapers = "directwrite"; + hb_shape_plan_t *shape_plan; + shape_plan = hb_shape_plan_create_cached (font->face, &buffer->props, + features, num_features, &shapers); + hb_bool_t res = _hb_directwrite_shape_full (shape_plan, font, buffer, + features, num_features, width); + + buffer->unsafe_to_break_all (); + + return res; +} + +struct _hb_directwrite_font_table_context { + IDWriteFontFace *face; + void *table_context; +}; + +static void +_hb_directwrite_table_data_release (void *data) +{ + _hb_directwrite_font_table_context *context = (_hb_directwrite_font_table_context *) data; + context->face->ReleaseFontTable (context->table_context); + delete context; +} + +static hb_blob_t * +_hb_directwrite_reference_table (hb_face_t *face HB_UNUSED, hb_tag_t tag, void *user_data) +{ + IDWriteFontFace *dw_face = ((IDWriteFontFace *) user_data); + const void *data; + uint32_t length; + void *table_context; + BOOL exists; + if (!dw_face || FAILED (dw_face->TryGetFontTable (hb_uint32_swap (tag), &data, + &length, &table_context, &exists))) + return nullptr; + + if (!data || !exists || !length) + { + dw_face->ReleaseFontTable (table_context); + return nullptr; + } + + _hb_directwrite_font_table_context *context = new _hb_directwrite_font_table_context; + context->face = dw_face; + context->table_context = table_context; + + return hb_blob_create ((const char *) data, length, HB_MEMORY_MODE_READONLY, + context, _hb_directwrite_table_data_release); +} + +static void +_hb_directwrite_font_release (void *data) +{ + if (data) + ((IDWriteFontFace *) data)->Release (); +} + +/** + * hb_directwrite_face_create: + * @font_face: a DirectWrite IDWriteFontFace object. + * + * Return value: #hb_face_t object corresponding to the given input + * + * Since: 2.4.0 + **/ +hb_face_t * +hb_directwrite_face_create (IDWriteFontFace *font_face) +{ + if (font_face) + font_face->AddRef (); + return hb_face_create_for_tables (_hb_directwrite_reference_table, font_face, + _hb_directwrite_font_release); +} + +/** +* hb_directwrite_face_get_font_face: +* @face: a #hb_face_t object +* +* Return value: DirectWrite IDWriteFontFace object corresponding to the given input +* +* Since: 2.5.0 +**/ +IDWriteFontFace * +hb_directwrite_face_get_font_face (hb_face_t *face) +{ + return face->data.directwrite->fontFace; +} + + +#endif diff --git a/thirdparty/harfbuzz/src/hb-directwrite.h b/thirdparty/harfbuzz/src/hb-directwrite.h new file mode 100644 index 0000000000..f837627a28 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-directwrite.h @@ -0,0 +1,40 @@ +/* + * Copyright © 2015-2019 Ebrahim Byagowi + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + */ + +#ifndef HB_DIRECTWRITE_H +#define HB_DIRECTWRITE_H + +#include "hb.h" + +HB_BEGIN_DECLS + +HB_EXTERN hb_face_t * +hb_directwrite_face_create (IDWriteFontFace *font_face); + +HB_EXTERN IDWriteFontFace * +hb_directwrite_face_get_font_face (hb_face_t *face); + +HB_END_DECLS + +#endif /* HB_DIRECTWRITE_H */ diff --git a/thirdparty/harfbuzz/src/hb-dispatch.hh b/thirdparty/harfbuzz/src/hb-dispatch.hh new file mode 100644 index 0000000000..7eace86e54 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-dispatch.hh @@ -0,0 +1,61 @@ +/* + * Copyright © 2007,2008,2009,2010 Red Hat, Inc. + * Copyright © 2012,2018 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Red Hat Author(s): Behdad Esfahbod + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_DISPATCH_HH +#define HB_DISPATCH_HH + +#include "hb.hh" + +/* + * Dispatch + */ + +template <typename Context, typename Return=hb_empty_t, unsigned int MaxDebugDepth=0> +struct hb_dispatch_context_t +{ + hb_dispatch_context_t () : debug_depth (0) {} + private: + /* https://en.wikipedia.org/wiki/Curiously_recurring_template_pattern */ + const Context* thiz () const { return static_cast<const Context *> (this); } + Context* thiz () { return static_cast< Context *> (this); } + public: + const char *get_name () { return "UNKNOWN"; } + static constexpr unsigned max_debug_depth = MaxDebugDepth; + typedef Return return_t; + template <typename T, typename F> + bool may_dispatch (const T *obj HB_UNUSED, const F *format HB_UNUSED) { return true; } + template <typename T, typename ...Ts> + return_t dispatch (const T &obj, Ts&&... ds) + { return obj.dispatch (thiz (), hb_forward<Ts> (ds)...); } + static return_t no_dispatch_return_value () { return Context::default_return_value (); } + static bool stop_sublookup_iteration (const return_t r HB_UNUSED) { return false; } + unsigned debug_depth; +}; + + +#endif /* HB_DISPATCH_HH */ diff --git a/thirdparty/harfbuzz/src/hb-draw.cc b/thirdparty/harfbuzz/src/hb-draw.cc new file mode 100644 index 0000000000..1a5f9c8c6b --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-draw.cc @@ -0,0 +1,261 @@ +/* + * Copyright © 2019-2020 Ebrahim Byagowi + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + */ + +#include "hb.hh" + +#ifndef HB_NO_DRAW +#ifdef HB_EXPERIMENTAL_API + +#include "hb-draw.hh" +#include "hb-ot.h" +#include "hb-ot-glyf-table.hh" +#include "hb-ot-cff1-table.hh" +#include "hb-ot-cff2-table.hh" + +/** + * hb_draw_funcs_set_move_to_func: + * @funcs: draw functions object + * @move_to: move-to callback + * + * Sets move-to callback to the draw functions object. + * + * Since: EXPERIMENTAL + **/ +void +hb_draw_funcs_set_move_to_func (hb_draw_funcs_t *funcs, + hb_draw_move_to_func_t move_to) +{ + if (unlikely (hb_object_is_immutable (funcs))) return; + funcs->move_to = move_to; +} + +/** + * hb_draw_funcs_set_line_to_func: + * @funcs: draw functions object + * @line_to: line-to callback + * + * Sets line-to callback to the draw functions object. + * + * Since: EXPERIMENTAL + **/ +void +hb_draw_funcs_set_line_to_func (hb_draw_funcs_t *funcs, + hb_draw_line_to_func_t line_to) +{ + if (unlikely (hb_object_is_immutable (funcs))) return; + funcs->line_to = line_to; +} + +/** + * hb_draw_funcs_set_quadratic_to_func: + * @funcs: draw functions object + * @move_to: quadratic-to callback + * + * Sets quadratic-to callback to the draw functions object. + * + * Since: EXPERIMENTAL + **/ +void +hb_draw_funcs_set_quadratic_to_func (hb_draw_funcs_t *funcs, + hb_draw_quadratic_to_func_t quadratic_to) +{ + if (unlikely (hb_object_is_immutable (funcs))) return; + funcs->quadratic_to = quadratic_to; + funcs->is_quadratic_to_set = true; +} + +/** + * hb_draw_funcs_set_cubic_to_func: + * @funcs: draw functions + * @cubic_to: cubic-to callback + * + * Sets cubic-to callback to the draw functions object. + * + * Since: EXPERIMENTAL + **/ +void +hb_draw_funcs_set_cubic_to_func (hb_draw_funcs_t *funcs, + hb_draw_cubic_to_func_t cubic_to) +{ + if (unlikely (hb_object_is_immutable (funcs))) return; + funcs->cubic_to = cubic_to; +} + +/** + * hb_draw_funcs_set_close_path_func: + * @funcs: draw functions object + * @close_path: close-path callback + * + * Sets close-path callback to the draw functions object. + * + * Since: EXPERIMENTAL + **/ +void +hb_draw_funcs_set_close_path_func (hb_draw_funcs_t *funcs, + hb_draw_close_path_func_t close_path) +{ + if (unlikely (hb_object_is_immutable (funcs))) return; + funcs->close_path = close_path; +} + +static void +_move_to_nil (hb_position_t to_x HB_UNUSED, hb_position_t to_y HB_UNUSED, void *user_data HB_UNUSED) {} + +static void +_line_to_nil (hb_position_t to_x HB_UNUSED, hb_position_t to_y HB_UNUSED, void *user_data HB_UNUSED) {} + +static void +_quadratic_to_nil (hb_position_t control_x HB_UNUSED, hb_position_t control_y HB_UNUSED, + hb_position_t to_x HB_UNUSED, hb_position_t to_y HB_UNUSED, + void *user_data HB_UNUSED) {} + +static void +_cubic_to_nil (hb_position_t control1_x HB_UNUSED, hb_position_t control1_y HB_UNUSED, + hb_position_t control2_x HB_UNUSED, hb_position_t control2_y HB_UNUSED, + hb_position_t to_x HB_UNUSED, hb_position_t to_y HB_UNUSED, + void *user_data HB_UNUSED) {} + +static void +_close_path_nil (void *user_data HB_UNUSED) {} + +/** + * hb_draw_funcs_create: + * + * Creates a new draw callbacks object. + * + * Since: EXPERIMENTAL + **/ +hb_draw_funcs_t * +hb_draw_funcs_create () +{ + hb_draw_funcs_t *funcs; + if (unlikely (!(funcs = hb_object_create<hb_draw_funcs_t> ()))) + return const_cast<hb_draw_funcs_t *> (&Null (hb_draw_funcs_t)); + + funcs->move_to = (hb_draw_move_to_func_t) _move_to_nil; + funcs->line_to = (hb_draw_line_to_func_t) _line_to_nil; + funcs->quadratic_to = (hb_draw_quadratic_to_func_t) _quadratic_to_nil; + funcs->is_quadratic_to_set = false; + funcs->cubic_to = (hb_draw_cubic_to_func_t) _cubic_to_nil; + funcs->close_path = (hb_draw_close_path_func_t) _close_path_nil; + return funcs; +} + +/** + * hb_draw_funcs_reference: + * @funcs: draw functions + * + * Add to callbacks object refcount. + * + * Returns: The same object. + * Since: EXPERIMENTAL + **/ +hb_draw_funcs_t * +hb_draw_funcs_reference (hb_draw_funcs_t *funcs) +{ + return hb_object_reference (funcs); +} + +/** + * hb_draw_funcs_destroy: + * @funcs: draw functions + * + * Decreases refcount of callbacks object and deletes the object if it reaches + * to zero. + * + * Since: EXPERIMENTAL + **/ +void +hb_draw_funcs_destroy (hb_draw_funcs_t *funcs) +{ + if (!hb_object_destroy (funcs)) return; + + free (funcs); +} + +/** + * hb_draw_funcs_make_immutable: + * @funcs: draw functions + * + * Makes funcs object immutable. + * + * Since: EXPERIMENTAL + **/ +void +hb_draw_funcs_make_immutable (hb_draw_funcs_t *funcs) +{ + if (hb_object_is_immutable (funcs)) + return; + + hb_object_make_immutable (funcs); +} + +/** + * hb_draw_funcs_is_immutable: + * @funcs: draw functions + * + * Checks whether funcs is immutable. + * + * Returns: If is immutable. + * Since: EXPERIMENTAL + **/ +hb_bool_t +hb_draw_funcs_is_immutable (hb_draw_funcs_t *funcs) +{ + return hb_object_is_immutable (funcs); +} + +/** + * hb_font_draw_glyph: + * @font: a font object + * @glyph: a glyph id + * @funcs: draw callbacks object + * @user_data: parameter you like be passed to the callbacks when are called + * + * Draw a glyph. + * + * Returns: Whether the font had the glyph and the operation completed successfully. + * Since: EXPERIMENTAL + **/ +hb_bool_t +hb_font_draw_glyph (hb_font_t *font, hb_codepoint_t glyph, + const hb_draw_funcs_t *funcs, + void *user_data) +{ + if (unlikely (funcs == &Null (hb_draw_funcs_t) || + glyph >= font->face->get_num_glyphs ())) + return false; + + draw_helper_t draw_helper (funcs, user_data); + if (font->face->table.glyf->get_path (font, glyph, draw_helper)) return true; +#ifndef HB_NO_CFF + if (font->face->table.cff1->get_path (font, glyph, draw_helper)) return true; + if (font->face->table.cff2->get_path (font, glyph, draw_helper)) return true; +#endif + + return false; +} + +#endif +#endif diff --git a/thirdparty/harfbuzz/src/hb-draw.h b/thirdparty/harfbuzz/src/hb-draw.h new file mode 100644 index 0000000000..98eccf4c0c --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-draw.h @@ -0,0 +1,98 @@ +/* + * Copyright © 2019-2020 Ebrahim Byagowi + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + */ + +#ifndef HB_H_IN +#error "Include <hb.h> instead." +#endif + +#ifndef HB_DRAW_H +#define HB_DRAW_H + +#include "hb.h" + +HB_BEGIN_DECLS + +#ifdef HB_EXPERIMENTAL_API +typedef void (*hb_draw_move_to_func_t) (hb_position_t to_x, hb_position_t to_y, void *user_data); +typedef void (*hb_draw_line_to_func_t) (hb_position_t to_x, hb_position_t to_y, void *user_data); +typedef void (*hb_draw_quadratic_to_func_t) (hb_position_t control_x, hb_position_t control_y, + hb_position_t to_x, hb_position_t to_y, + void *user_data); +typedef void (*hb_draw_cubic_to_func_t) (hb_position_t control1_x, hb_position_t control1_y, + hb_position_t control2_x, hb_position_t control2_y, + hb_position_t to_x, hb_position_t to_y, + void *user_data); +typedef void (*hb_draw_close_path_func_t) (void *user_data); + +/** + * hb_draw_funcs_t: + * + * Glyph draw callbacks. + * + * _move_to, _line_to and _cubic_to calls are nessecary to be defined but we + * translate _quadratic_to calls to _cubic_to if the callback isn't defined. + * + * Since: EXPERIMENTAL + **/ +typedef struct hb_draw_funcs_t hb_draw_funcs_t; + +HB_EXTERN void +hb_draw_funcs_set_move_to_func (hb_draw_funcs_t *funcs, + hb_draw_move_to_func_t move_to); + +HB_EXTERN void +hb_draw_funcs_set_line_to_func (hb_draw_funcs_t *funcs, + hb_draw_line_to_func_t line_to); + +HB_EXTERN void +hb_draw_funcs_set_quadratic_to_func (hb_draw_funcs_t *funcs, + hb_draw_quadratic_to_func_t quadratic_to); + +HB_EXTERN void +hb_draw_funcs_set_cubic_to_func (hb_draw_funcs_t *funcs, + hb_draw_cubic_to_func_t cubic_to); + +HB_EXTERN void +hb_draw_funcs_set_close_path_func (hb_draw_funcs_t *funcs, + hb_draw_close_path_func_t close_path); + +HB_EXTERN hb_draw_funcs_t * +hb_draw_funcs_create (void); + +HB_EXTERN hb_draw_funcs_t * +hb_draw_funcs_reference (hb_draw_funcs_t *funcs); + +HB_EXTERN void +hb_draw_funcs_destroy (hb_draw_funcs_t *funcs); + +HB_EXTERN void +hb_draw_funcs_make_immutable (hb_draw_funcs_t *funcs); + +HB_EXTERN hb_bool_t +hb_draw_funcs_is_immutable (hb_draw_funcs_t *funcs); +#endif + +HB_END_DECLS + +#endif /* HB_DRAW_H */ diff --git a/thirdparty/harfbuzz/src/hb-draw.hh b/thirdparty/harfbuzz/src/hb-draw.hh new file mode 100644 index 0000000000..2aa0a5b4db --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-draw.hh @@ -0,0 +1,139 @@ +/* + * Copyright © 2020 Ebrahim Byagowi + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + */ + +#ifndef HB_DRAW_HH +#define HB_DRAW_HH + +#include "hb.hh" + +#ifdef HB_EXPERIMENTAL_API +struct hb_draw_funcs_t +{ + hb_object_header_t header; + + hb_draw_move_to_func_t move_to; + hb_draw_line_to_func_t line_to; + hb_draw_quadratic_to_func_t quadratic_to; + bool is_quadratic_to_set; + hb_draw_cubic_to_func_t cubic_to; + hb_draw_close_path_func_t close_path; +}; + +struct draw_helper_t +{ + draw_helper_t (const hb_draw_funcs_t *funcs_, void *user_data_) + { + funcs = funcs_; + user_data = user_data_; + path_open = false; + path_start_x = current_x = path_start_y = current_y = 0; + } + ~draw_helper_t () { end_path (); } + + void move_to (hb_position_t x, hb_position_t y) + { + if (path_open) end_path (); + current_x = path_start_x = x; + current_y = path_start_y = y; + } + + void line_to (hb_position_t x, hb_position_t y) + { + if (equal_to_current (x, y)) return; + if (!path_open) start_path (); + funcs->line_to (x, y, user_data); + current_x = x; + current_y = y; + } + + void + quadratic_to (hb_position_t control_x, hb_position_t control_y, + hb_position_t to_x, hb_position_t to_y) + { + if (equal_to_current (control_x, control_y) && equal_to_current (to_x, to_y)) + return; + if (!path_open) start_path (); + if (funcs->is_quadratic_to_set) + funcs->quadratic_to (control_x, control_y, to_x, to_y, user_data); + else + funcs->cubic_to (roundf ((current_x + 2.f * control_x) / 3.f), + roundf ((current_y + 2.f * control_y) / 3.f), + roundf ((to_x + 2.f * control_x) / 3.f), + roundf ((to_y + 2.f * control_y) / 3.f), + to_x, to_y, user_data); + current_x = to_x; + current_y = to_y; + } + + void + cubic_to (hb_position_t control1_x, hb_position_t control1_y, + hb_position_t control2_x, hb_position_t control2_y, + hb_position_t to_x, hb_position_t to_y) + { + if (equal_to_current (control1_x, control1_y) && + equal_to_current (control2_x, control2_y) && + equal_to_current (to_x, to_y)) + return; + if (!path_open) start_path (); + funcs->cubic_to (control1_x, control1_y, control2_x, control2_y, to_x, to_y, user_data); + current_x = to_x; + current_y = to_y; + } + + void end_path () + { + if (path_open) + { + if ((path_start_x != current_x) || (path_start_y != current_y)) + funcs->line_to (path_start_x, path_start_y, user_data); + funcs->close_path (user_data); + } + path_open = false; + path_start_x = current_x = path_start_y = current_y = 0; + } + + protected: + bool equal_to_current (hb_position_t x, hb_position_t y) + { return current_x == x && current_y == y; } + + void start_path () + { + if (path_open) end_path (); + path_open = true; + funcs->move_to (path_start_x, path_start_y, user_data); + } + + hb_position_t path_start_x; + hb_position_t path_start_y; + + hb_position_t current_x; + hb_position_t current_y; + + bool path_open; + const hb_draw_funcs_t *funcs; + void *user_data; +}; +#endif + +#endif /* HB_DRAW_HH */ diff --git a/thirdparty/harfbuzz/src/hb-face.cc b/thirdparty/harfbuzz/src/hb-face.cc new file mode 100644 index 0000000000..7bde50df5b --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-face.cc @@ -0,0 +1,733 @@ +/* + * Copyright © 2009 Red Hat, Inc. + * Copyright © 2012 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Red Hat Author(s): Behdad Esfahbod + * Google Author(s): Behdad Esfahbod + */ + +#include "hb.hh" + +#include "hb-face.hh" +#include "hb-blob.hh" +#include "hb-open-file.hh" +#include "hb-ot-face.hh" +#include "hb-ot-cmap-table.hh" + + +/** + * SECTION:hb-face + * @title: hb-face + * @short_description: Font face objects + * @include: hb.h + * + * Font face is objects represent a single face in a font family. + * More exactly, a font face represents a single face in a binary font file. + * Font faces are typically built from a binary blob and a face index. + * Font faces are used to create fonts. + **/ + + +/** + * hb_face_count: + * @blob: a blob. + * + * Get number of faces in a blob. + * + * Return value: Number of faces in @blob + * + * Since: 1.7.7 + **/ +unsigned int +hb_face_count (hb_blob_t *blob) +{ + if (unlikely (!blob)) + return 0; + + /* TODO We shouldn't be sanitizing blob. Port to run sanitizer and return if not sane. */ + /* Make API signature const after. */ + hb_blob_t *sanitized = hb_sanitize_context_t ().sanitize_blob<OT::OpenTypeFontFile> (hb_blob_reference (blob)); + const OT::OpenTypeFontFile& ot = *sanitized->as<OT::OpenTypeFontFile> (); + unsigned int ret = ot.get_face_count (); + hb_blob_destroy (sanitized); + + return ret; +} + +/* + * hb_face_t + */ + +DEFINE_NULL_INSTANCE (hb_face_t) = +{ + HB_OBJECT_HEADER_STATIC, + + nullptr, /* reference_table_func */ + nullptr, /* user_data */ + nullptr, /* destroy */ + + 0, /* index */ + HB_ATOMIC_INT_INIT (1000), /* upem */ + HB_ATOMIC_INT_INIT (0), /* num_glyphs */ + + /* Zero for the rest is fine. */ +}; + + +/** + * hb_face_create_for_tables: + * @reference_table_func: (closure user_data) (destroy destroy) (scope notified): + * @user_data: + * @destroy: + * + * + * + * Return value: (transfer full) + * + * Since: 0.9.2 + **/ +hb_face_t * +hb_face_create_for_tables (hb_reference_table_func_t reference_table_func, + void *user_data, + hb_destroy_func_t destroy) +{ + hb_face_t *face; + + if (!reference_table_func || !(face = hb_object_create<hb_face_t> ())) { + if (destroy) + destroy (user_data); + return hb_face_get_empty (); + } + + face->reference_table_func = reference_table_func; + face->user_data = user_data; + face->destroy = destroy; + + face->num_glyphs.set_relaxed (-1); + + face->data.init0 (face); + face->table.init0 (face); + + return face; +} + + +typedef struct hb_face_for_data_closure_t { + hb_blob_t *blob; + unsigned int index; +} hb_face_for_data_closure_t; + +static hb_face_for_data_closure_t * +_hb_face_for_data_closure_create (hb_blob_t *blob, unsigned int index) +{ + hb_face_for_data_closure_t *closure; + + closure = (hb_face_for_data_closure_t *) calloc (1, sizeof (hb_face_for_data_closure_t)); + if (unlikely (!closure)) + return nullptr; + + closure->blob = blob; + closure->index = index; + + return closure; +} + +static void +_hb_face_for_data_closure_destroy (void *data) +{ + hb_face_for_data_closure_t *closure = (hb_face_for_data_closure_t *) data; + + hb_blob_destroy (closure->blob); + free (closure); +} + +static hb_blob_t * +_hb_face_for_data_reference_table (hb_face_t *face HB_UNUSED, hb_tag_t tag, void *user_data) +{ + hb_face_for_data_closure_t *data = (hb_face_for_data_closure_t *) user_data; + + if (tag == HB_TAG_NONE) + return hb_blob_reference (data->blob); + + const OT::OpenTypeFontFile &ot_file = *data->blob->as<OT::OpenTypeFontFile> (); + unsigned int base_offset; + const OT::OpenTypeFontFace &ot_face = ot_file.get_face (data->index, &base_offset); + + const OT::OpenTypeTable &table = ot_face.get_table_by_tag (tag); + + hb_blob_t *blob = hb_blob_create_sub_blob (data->blob, base_offset + table.offset, table.length); + + return blob; +} + +/** + * hb_face_create: (Xconstructor) + * @blob: + * @index: + * + * + * + * Return value: (transfer full): + * + * Since: 0.9.2 + **/ +hb_face_t * +hb_face_create (hb_blob_t *blob, + unsigned int index) +{ + hb_face_t *face; + + if (unlikely (!blob)) + blob = hb_blob_get_empty (); + + blob = hb_sanitize_context_t ().sanitize_blob<OT::OpenTypeFontFile> (hb_blob_reference (blob)); + + hb_face_for_data_closure_t *closure = _hb_face_for_data_closure_create (blob, index); + + if (unlikely (!closure)) + { + hb_blob_destroy (blob); + return hb_face_get_empty (); + } + + face = hb_face_create_for_tables (_hb_face_for_data_reference_table, + closure, + _hb_face_for_data_closure_destroy); + + face->index = index; + + return face; +} + +/** + * hb_face_get_empty: + * + * + * + * Return value: (transfer full) + * + * Since: 0.9.2 + **/ +hb_face_t * +hb_face_get_empty () +{ + return const_cast<hb_face_t *> (&Null (hb_face_t)); +} + + +/** + * hb_face_reference: (skip) + * @face: a face. + * + * + * + * Return value: + * + * Since: 0.9.2 + **/ +hb_face_t * +hb_face_reference (hb_face_t *face) +{ + return hb_object_reference (face); +} + +/** + * hb_face_destroy: (skip) + * @face: a face. + * + * + * + * Since: 0.9.2 + **/ +void +hb_face_destroy (hb_face_t *face) +{ + if (!hb_object_destroy (face)) return; + + for (hb_face_t::plan_node_t *node = face->shape_plans; node; ) + { + hb_face_t::plan_node_t *next = node->next; + hb_shape_plan_destroy (node->shape_plan); + free (node); + node = next; + } + + face->data.fini (); + face->table.fini (); + + if (face->destroy) + face->destroy (face->user_data); + + free (face); +} + +/** + * hb_face_set_user_data: (skip) + * @face: a face. + * @key: + * @data: + * @destroy: + * @replace: + * + * + * + * Return value: + * + * Since: 0.9.2 + **/ +hb_bool_t +hb_face_set_user_data (hb_face_t *face, + hb_user_data_key_t *key, + void * data, + hb_destroy_func_t destroy, + hb_bool_t replace) +{ + return hb_object_set_user_data (face, key, data, destroy, replace); +} + +/** + * hb_face_get_user_data: (skip) + * @face: a face. + * @key: + * + * + * + * Return value: (transfer none): + * + * Since: 0.9.2 + **/ +void * +hb_face_get_user_data (const hb_face_t *face, + hb_user_data_key_t *key) +{ + return hb_object_get_user_data (face, key); +} + +/** + * hb_face_make_immutable: + * @face: a face. + * + * + * + * Since: 0.9.2 + **/ +void +hb_face_make_immutable (hb_face_t *face) +{ + if (hb_object_is_immutable (face)) + return; + + hb_object_make_immutable (face); +} + +/** + * hb_face_is_immutable: + * @face: a face. + * + * + * + * Return value: + * + * Since: 0.9.2 + **/ +hb_bool_t +hb_face_is_immutable (const hb_face_t *face) +{ + return hb_object_is_immutable (face); +} + + +/** + * hb_face_reference_table: + * @face: a face. + * @tag: + * + * + * + * Return value: (transfer full): + * + * Since: 0.9.2 + **/ +hb_blob_t * +hb_face_reference_table (const hb_face_t *face, + hb_tag_t tag) +{ + if (unlikely (tag == HB_TAG_NONE)) + return hb_blob_get_empty (); + + return face->reference_table (tag); +} + +/** + * hb_face_reference_blob: + * @face: a face. + * + * + * + * Return value: (transfer full): + * + * Since: 0.9.2 + **/ +hb_blob_t * +hb_face_reference_blob (hb_face_t *face) +{ + return face->reference_table (HB_TAG_NONE); +} + +/** + * hb_face_set_index: + * @face: a face. + * @index: + * + * + * + * Since: 0.9.2 + **/ +void +hb_face_set_index (hb_face_t *face, + unsigned int index) +{ + if (hb_object_is_immutable (face)) + return; + + face->index = index; +} + +/** + * hb_face_get_index: + * @face: a face. + * + * + * + * Return value: + * + * Since: 0.9.2 + **/ +unsigned int +hb_face_get_index (const hb_face_t *face) +{ + return face->index; +} + +/** + * hb_face_set_upem: + * @face: a face. + * @upem: + * + * + * + * Since: 0.9.2 + **/ +void +hb_face_set_upem (hb_face_t *face, + unsigned int upem) +{ + if (hb_object_is_immutable (face)) + return; + + face->upem.set_relaxed (upem); +} + +/** + * hb_face_get_upem: + * @face: a face. + * + * + * + * Return value: + * + * Since: 0.9.2 + **/ +unsigned int +hb_face_get_upem (const hb_face_t *face) +{ + return face->get_upem (); +} + +/** + * hb_face_set_glyph_count: + * @face: a face. + * @glyph_count: + * + * + * + * Since: 0.9.7 + **/ +void +hb_face_set_glyph_count (hb_face_t *face, + unsigned int glyph_count) +{ + if (hb_object_is_immutable (face)) + return; + + face->num_glyphs.set_relaxed (glyph_count); +} + +/** + * hb_face_get_glyph_count: + * @face: a face. + * + * + * + * Return value: + * + * Since: 0.9.7 + **/ +unsigned int +hb_face_get_glyph_count (const hb_face_t *face) +{ + return face->get_num_glyphs (); +} + +/** + * hb_face_get_table_tags: + * @face: a face. + * @start_offset: index of first tag to return. + * @table_count: input length of @table_tags array, output number of items written. + * @table_tags: array to write tags into. + * + * Retrieves table tags for a face, if possible. + * + * Return value: total number of tables, or 0 if not possible to list. + * + * Since: 1.6.0 + **/ +unsigned int +hb_face_get_table_tags (const hb_face_t *face, + unsigned int start_offset, + unsigned int *table_count, /* IN/OUT */ + hb_tag_t *table_tags /* OUT */) +{ + if (face->destroy != (hb_destroy_func_t) _hb_face_for_data_closure_destroy) + { + if (table_count) + *table_count = 0; + return 0; + } + + hb_face_for_data_closure_t *data = (hb_face_for_data_closure_t *) face->user_data; + + const OT::OpenTypeFontFile &ot_file = *data->blob->as<OT::OpenTypeFontFile> (); + const OT::OpenTypeFontFace &ot_face = ot_file.get_face (data->index); + + return ot_face.get_table_tags (start_offset, table_count, table_tags); +} + + +/* + * Character set. + */ + + +#ifndef HB_NO_FACE_COLLECT_UNICODES +/** + * hb_face_collect_unicodes: + * @face: font face. + * @out: set to add Unicode characters covered by @face to. + * + * Since: 1.9.0 + */ +void +hb_face_collect_unicodes (hb_face_t *face, + hb_set_t *out) +{ + face->table.cmap->collect_unicodes (out, face->get_num_glyphs ()); +} +/** + * hb_face_collect_variation_selectors: + * @face: font face. + * @out: set to add Variation Selector characters covered by @face to. + * + * + * + * Since: 1.9.0 + */ +void +hb_face_collect_variation_selectors (hb_face_t *face, + hb_set_t *out) +{ + face->table.cmap->collect_variation_selectors (out); +} +/** + * hb_face_collect_variation_unicodes: + * @face: font face. + * @out: set to add Unicode characters for @variation_selector covered by @face to. + * + * + * + * Since: 1.9.0 + */ +void +hb_face_collect_variation_unicodes (hb_face_t *face, + hb_codepoint_t variation_selector, + hb_set_t *out) +{ + face->table.cmap->collect_variation_unicodes (variation_selector, out); +} +#endif + + +/* + * face-builder: A face that has add_table(). + */ + +struct hb_face_builder_data_t +{ + struct table_entry_t + { + int cmp (hb_tag_t t) const + { + if (t < tag) return -1; + if (t > tag) return -1; + return 0; + } + + hb_tag_t tag; + hb_blob_t *blob; + }; + + hb_vector_t<table_entry_t> tables; +}; + +static hb_face_builder_data_t * +_hb_face_builder_data_create () +{ + hb_face_builder_data_t *data = (hb_face_builder_data_t *) calloc (1, sizeof (hb_face_builder_data_t)); + if (unlikely (!data)) + return nullptr; + + data->tables.init (); + + return data; +} + +static void +_hb_face_builder_data_destroy (void *user_data) +{ + hb_face_builder_data_t *data = (hb_face_builder_data_t *) user_data; + + for (unsigned int i = 0; i < data->tables.length; i++) + hb_blob_destroy (data->tables[i].blob); + + data->tables.fini (); + + free (data); +} + +static hb_blob_t * +_hb_face_builder_data_reference_blob (hb_face_builder_data_t *data) +{ + + unsigned int table_count = data->tables.length; + unsigned int face_length = table_count * 16 + 12; + + for (unsigned int i = 0; i < table_count; i++) + face_length += hb_ceil_to_4 (hb_blob_get_length (data->tables[i].blob)); + + char *buf = (char *) malloc (face_length); + if (unlikely (!buf)) + return nullptr; + + hb_serialize_context_t c (buf, face_length); + c.propagate_error (data->tables); + OT::OpenTypeFontFile *f = c.start_serialize<OT::OpenTypeFontFile> (); + + bool is_cff = data->tables.lsearch (HB_TAG ('C','F','F',' ')) || data->tables.lsearch (HB_TAG ('C','F','F','2')); + hb_tag_t sfnt_tag = is_cff ? OT::OpenTypeFontFile::CFFTag : OT::OpenTypeFontFile::TrueTypeTag; + + bool ret = f->serialize_single (&c, sfnt_tag, data->tables.as_array ()); + + c.end_serialize (); + + if (unlikely (!ret)) + { + free (buf); + return nullptr; + } + + return hb_blob_create (buf, face_length, HB_MEMORY_MODE_WRITABLE, buf, free); +} + +static hb_blob_t * +_hb_face_builder_reference_table (hb_face_t *face HB_UNUSED, hb_tag_t tag, void *user_data) +{ + hb_face_builder_data_t *data = (hb_face_builder_data_t *) user_data; + + if (!tag) + return _hb_face_builder_data_reference_blob (data); + + hb_face_builder_data_t::table_entry_t *entry = data->tables.lsearch (tag); + if (entry) + return hb_blob_reference (entry->blob); + + return nullptr; +} + + +/** + * hb_face_builder_create: + * + * Creates a #hb_face_t that can be used with hb_face_builder_add_table(). + * After tables are added to the face, it can be compiled to a binary + * font file by calling hb_face_reference_blob(). + * + * Return value: (transfer full): New face. + * + * Since: 1.9.0 + **/ +hb_face_t * +hb_face_builder_create () +{ + hb_face_builder_data_t *data = _hb_face_builder_data_create (); + if (unlikely (!data)) return hb_face_get_empty (); + + return hb_face_create_for_tables (_hb_face_builder_reference_table, + data, + _hb_face_builder_data_destroy); +} + +/** + * hb_face_builder_add_table: + * + * Add table for @tag with data provided by @blob to the face. @face must + * be created using hb_face_builder_create(). + * + * Since: 1.9.0 + **/ +hb_bool_t +hb_face_builder_add_table (hb_face_t *face, hb_tag_t tag, hb_blob_t *blob) +{ + if (unlikely (face->destroy != (hb_destroy_func_t) _hb_face_builder_data_destroy)) + return false; + + hb_face_builder_data_t *data = (hb_face_builder_data_t *) face->user_data; + + hb_face_builder_data_t::table_entry_t *entry = data->tables.push (); + if (data->tables.in_error()) + return false; + + entry->tag = tag; + entry->blob = hb_blob_reference (blob); + + return true; +} diff --git a/thirdparty/harfbuzz/src/hb-face.h b/thirdparty/harfbuzz/src/hb-face.h new file mode 100644 index 0000000000..e8ff090d55 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-face.h @@ -0,0 +1,158 @@ +/* + * Copyright © 2009 Red Hat, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Red Hat Author(s): Behdad Esfahbod + */ + +#ifndef HB_H_IN +#error "Include <hb.h> instead." +#endif + +#ifndef HB_FACE_H +#define HB_FACE_H + +#include "hb-common.h" +#include "hb-blob.h" +#include "hb-set.h" + +HB_BEGIN_DECLS + + +HB_EXTERN unsigned int +hb_face_count (hb_blob_t *blob); + + +/* + * hb_face_t + */ + +typedef struct hb_face_t hb_face_t; + +HB_EXTERN hb_face_t * +hb_face_create (hb_blob_t *blob, + unsigned int index); + +typedef hb_blob_t * (*hb_reference_table_func_t) (hb_face_t *face, hb_tag_t tag, void *user_data); + +/* calls destroy() when not needing user_data anymore */ +HB_EXTERN hb_face_t * +hb_face_create_for_tables (hb_reference_table_func_t reference_table_func, + void *user_data, + hb_destroy_func_t destroy); + +HB_EXTERN hb_face_t * +hb_face_get_empty (void); + +HB_EXTERN hb_face_t * +hb_face_reference (hb_face_t *face); + +HB_EXTERN void +hb_face_destroy (hb_face_t *face); + +HB_EXTERN hb_bool_t +hb_face_set_user_data (hb_face_t *face, + hb_user_data_key_t *key, + void * data, + hb_destroy_func_t destroy, + hb_bool_t replace); + +HB_EXTERN void * +hb_face_get_user_data (const hb_face_t *face, + hb_user_data_key_t *key); + +HB_EXTERN void +hb_face_make_immutable (hb_face_t *face); + +HB_EXTERN hb_bool_t +hb_face_is_immutable (const hb_face_t *face); + + +HB_EXTERN hb_blob_t * +hb_face_reference_table (const hb_face_t *face, + hb_tag_t tag); + +HB_EXTERN hb_blob_t * +hb_face_reference_blob (hb_face_t *face); + +HB_EXTERN void +hb_face_set_index (hb_face_t *face, + unsigned int index); + +HB_EXTERN unsigned int +hb_face_get_index (const hb_face_t *face); + +HB_EXTERN void +hb_face_set_upem (hb_face_t *face, + unsigned int upem); + +HB_EXTERN unsigned int +hb_face_get_upem (const hb_face_t *face); + +HB_EXTERN void +hb_face_set_glyph_count (hb_face_t *face, + unsigned int glyph_count); + +HB_EXTERN unsigned int +hb_face_get_glyph_count (const hb_face_t *face); + +HB_EXTERN unsigned int +hb_face_get_table_tags (const hb_face_t *face, + unsigned int start_offset, + unsigned int *table_count, /* IN/OUT */ + hb_tag_t *table_tags /* OUT */); + + +/* + * Character set. + */ + +HB_EXTERN void +hb_face_collect_unicodes (hb_face_t *face, + hb_set_t *out); + +HB_EXTERN void +hb_face_collect_variation_selectors (hb_face_t *face, + hb_set_t *out); + +HB_EXTERN void +hb_face_collect_variation_unicodes (hb_face_t *face, + hb_codepoint_t variation_selector, + hb_set_t *out); + + +/* + * Builder face. + */ + +HB_EXTERN hb_face_t * +hb_face_builder_create (void); + +HB_EXTERN hb_bool_t +hb_face_builder_add_table (hb_face_t *face, + hb_tag_t tag, + hb_blob_t *blob); + + +HB_END_DECLS + +#endif /* HB_FACE_H */ diff --git a/thirdparty/harfbuzz/src/hb-face.hh b/thirdparty/harfbuzz/src/hb-face.hh new file mode 100644 index 0000000000..f1b472ccf3 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-face.hh @@ -0,0 +1,109 @@ +/* + * Copyright © 2009 Red Hat, Inc. + * Copyright © 2011 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Red Hat Author(s): Behdad Esfahbod + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_FACE_HH +#define HB_FACE_HH + +#include "hb.hh" + +#include "hb-shaper.hh" +#include "hb-shape-plan.hh" +#include "hb-ot-face.hh" + + +/* + * hb_face_t + */ + +#define HB_SHAPER_IMPLEMENT(shaper) HB_SHAPER_DATA_INSTANTIATE_SHAPERS(shaper, face); +#include "hb-shaper-list.hh" +#undef HB_SHAPER_IMPLEMENT + +struct hb_face_t +{ + hb_object_header_t header; + + hb_reference_table_func_t reference_table_func; + void *user_data; + hb_destroy_func_t destroy; + + unsigned int index; /* Face index in a collection, zero-based. */ + mutable hb_atomic_int_t upem; /* Units-per-EM. */ + mutable hb_atomic_int_t num_glyphs; /* Number of glyphs. */ + + hb_shaper_object_dataset_t<hb_face_t> data;/* Various shaper data. */ + hb_ot_face_t table; /* All the face's tables. */ + + /* Cache */ + struct plan_node_t + { + hb_shape_plan_t *shape_plan; + plan_node_t *next; + }; + hb_atomic_ptr_t<plan_node_t> shape_plans; + + hb_blob_t *reference_table (hb_tag_t tag) const + { + hb_blob_t *blob; + + if (unlikely (!reference_table_func)) + return hb_blob_get_empty (); + + blob = reference_table_func (/*XXX*/const_cast<hb_face_t *> (this), tag, user_data); + if (unlikely (!blob)) + return hb_blob_get_empty (); + + return blob; + } + + HB_PURE_FUNC unsigned int get_upem () const + { + unsigned int ret = upem.get_relaxed (); + if (unlikely (!ret)) + { + return load_upem (); + } + return ret; + } + + unsigned int get_num_glyphs () const + { + unsigned int ret = num_glyphs.get_relaxed (); + if (unlikely (ret == UINT_MAX)) + return load_num_glyphs (); + return ret; + } + + private: + HB_INTERNAL unsigned int load_upem () const; + HB_INTERNAL unsigned int load_num_glyphs () const; +}; +DECLARE_NULL_INSTANCE (hb_face_t); + + +#endif /* HB_FACE_HH */ diff --git a/thirdparty/harfbuzz/src/hb-fallback-shape.cc b/thirdparty/harfbuzz/src/hb-fallback-shape.cc new file mode 100644 index 0000000000..c5b7c2c230 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-fallback-shape.cc @@ -0,0 +1,125 @@ +/* + * Copyright © 2011 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#include "hb-shaper-impl.hh" + +#ifndef HB_NO_FALLBACK_SHAPE + +/* + * shaper face data + */ + +struct hb_fallback_face_data_t {}; + +hb_fallback_face_data_t * +_hb_fallback_shaper_face_data_create (hb_face_t *face HB_UNUSED) +{ + return (hb_fallback_face_data_t *) HB_SHAPER_DATA_SUCCEEDED; +} + +void +_hb_fallback_shaper_face_data_destroy (hb_fallback_face_data_t *data HB_UNUSED) +{ +} + + +/* + * shaper font data + */ + +struct hb_fallback_font_data_t {}; + +hb_fallback_font_data_t * +_hb_fallback_shaper_font_data_create (hb_font_t *font HB_UNUSED) +{ + return (hb_fallback_font_data_t *) HB_SHAPER_DATA_SUCCEEDED; +} + +void +_hb_fallback_shaper_font_data_destroy (hb_fallback_font_data_t *data HB_UNUSED) +{ +} + + +/* + * shaper + */ + +hb_bool_t +_hb_fallback_shape (hb_shape_plan_t *shape_plan HB_UNUSED, + hb_font_t *font, + hb_buffer_t *buffer, + const hb_feature_t *features HB_UNUSED, + unsigned int num_features HB_UNUSED) +{ + /* TODO + * + * - Apply fallback kern. + * - Handle Variation Selectors? + * - Apply normalization? + * + * This will make the fallback shaper into a dumb "TrueType" + * shaper which many people unfortunately still request. + */ + + hb_codepoint_t space; + bool has_space = (bool) font->get_nominal_glyph (' ', &space); + + buffer->clear_positions (); + + hb_direction_t direction = buffer->props.direction; + hb_unicode_funcs_t *unicode = buffer->unicode; + unsigned int count = buffer->len; + hb_glyph_info_t *info = buffer->info; + hb_glyph_position_t *pos = buffer->pos; + for (unsigned int i = 0; i < count; i++) + { + if (has_space && unicode->is_default_ignorable (info[i].codepoint)) { + info[i].codepoint = space; + pos[i].x_advance = 0; + pos[i].y_advance = 0; + continue; + } + (void) font->get_nominal_glyph (info[i].codepoint, &info[i].codepoint); + font->get_glyph_advance_for_direction (info[i].codepoint, + direction, + &pos[i].x_advance, + &pos[i].y_advance); + font->subtract_glyph_origin_for_direction (info[i].codepoint, + direction, + &pos[i].x_offset, + &pos[i].y_offset); + } + + if (HB_DIRECTION_IS_BACKWARD (direction)) + hb_buffer_reverse (buffer); + + buffer->safe_to_break_all (); + + return true; +} + +#endif diff --git a/thirdparty/harfbuzz/src/hb-font.cc b/thirdparty/harfbuzz/src/hb-font.cc new file mode 100644 index 0000000000..27959487dc --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-font.cc @@ -0,0 +1,2186 @@ +/* + * Copyright © 2009 Red Hat, Inc. + * Copyright © 2012 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Red Hat Author(s): Behdad Esfahbod + * Google Author(s): Behdad Esfahbod + */ + +#include "hb.hh" + +#include "hb-font.hh" +#include "hb-machinery.hh" + +#include "hb-ot.h" + +#include "hb-ot-var-avar-table.hh" +#include "hb-ot-var-fvar-table.hh" + + +/** + * SECTION:hb-font + * @title: hb-font + * @short_description: Font objects + * @include: hb.h + * + * Font objects represent a font face at a certain size and other + * parameters (pixels per EM, points per EM, variation settings.) + * Fonts are created from font faces, and are used as input to + * hb_shape() among other things. + **/ + + +/* + * hb_font_funcs_t + */ + +static hb_bool_t +hb_font_get_font_h_extents_nil (hb_font_t *font HB_UNUSED, + void *font_data HB_UNUSED, + hb_font_extents_t *extents, + void *user_data HB_UNUSED) +{ + memset (extents, 0, sizeof (*extents)); + return false; +} +static hb_bool_t +hb_font_get_font_h_extents_default (hb_font_t *font, + void *font_data HB_UNUSED, + hb_font_extents_t *extents, + void *user_data HB_UNUSED) +{ + hb_bool_t ret = font->parent->get_font_h_extents (extents); + if (ret) { + extents->ascender = font->parent_scale_y_distance (extents->ascender); + extents->descender = font->parent_scale_y_distance (extents->descender); + extents->line_gap = font->parent_scale_y_distance (extents->line_gap); + } + return ret; +} + +static hb_bool_t +hb_font_get_font_v_extents_nil (hb_font_t *font HB_UNUSED, + void *font_data HB_UNUSED, + hb_font_extents_t *extents, + void *user_data HB_UNUSED) +{ + memset (extents, 0, sizeof (*extents)); + return false; +} +static hb_bool_t +hb_font_get_font_v_extents_default (hb_font_t *font, + void *font_data HB_UNUSED, + hb_font_extents_t *extents, + void *user_data HB_UNUSED) +{ + hb_bool_t ret = font->parent->get_font_v_extents (extents); + if (ret) { + extents->ascender = font->parent_scale_x_distance (extents->ascender); + extents->descender = font->parent_scale_x_distance (extents->descender); + extents->line_gap = font->parent_scale_x_distance (extents->line_gap); + } + return ret; +} + +static hb_bool_t +hb_font_get_nominal_glyph_nil (hb_font_t *font HB_UNUSED, + void *font_data HB_UNUSED, + hb_codepoint_t unicode HB_UNUSED, + hb_codepoint_t *glyph, + void *user_data HB_UNUSED) +{ + *glyph = 0; + return false; +} +static hb_bool_t +hb_font_get_nominal_glyph_default (hb_font_t *font, + void *font_data HB_UNUSED, + hb_codepoint_t unicode, + hb_codepoint_t *glyph, + void *user_data HB_UNUSED) +{ + if (font->has_nominal_glyphs_func_set ()) + { + return font->get_nominal_glyphs (1, &unicode, 0, glyph, 0); + } + return font->parent->get_nominal_glyph (unicode, glyph); +} + +#define hb_font_get_nominal_glyphs_nil hb_font_get_nominal_glyphs_default +static unsigned int +hb_font_get_nominal_glyphs_default (hb_font_t *font, + void *font_data HB_UNUSED, + unsigned int count, + const hb_codepoint_t *first_unicode, + unsigned int unicode_stride, + hb_codepoint_t *first_glyph, + unsigned int glyph_stride, + void *user_data HB_UNUSED) +{ + if (font->has_nominal_glyph_func_set ()) + { + for (unsigned int i = 0; i < count; i++) + { + if (!font->get_nominal_glyph (*first_unicode, first_glyph)) + return i; + + first_unicode = &StructAtOffsetUnaligned<hb_codepoint_t> (first_unicode, unicode_stride); + first_glyph = &StructAtOffsetUnaligned<hb_codepoint_t> (first_glyph, glyph_stride); + } + return count; + } + + return font->parent->get_nominal_glyphs (count, + first_unicode, unicode_stride, + first_glyph, glyph_stride); +} + +static hb_bool_t +hb_font_get_variation_glyph_nil (hb_font_t *font HB_UNUSED, + void *font_data HB_UNUSED, + hb_codepoint_t unicode HB_UNUSED, + hb_codepoint_t variation_selector HB_UNUSED, + hb_codepoint_t *glyph, + void *user_data HB_UNUSED) +{ + *glyph = 0; + return false; +} +static hb_bool_t +hb_font_get_variation_glyph_default (hb_font_t *font, + void *font_data HB_UNUSED, + hb_codepoint_t unicode, + hb_codepoint_t variation_selector, + hb_codepoint_t *glyph, + void *user_data HB_UNUSED) +{ + return font->parent->get_variation_glyph (unicode, variation_selector, glyph); +} + + +static hb_position_t +hb_font_get_glyph_h_advance_nil (hb_font_t *font, + void *font_data HB_UNUSED, + hb_codepoint_t glyph HB_UNUSED, + void *user_data HB_UNUSED) +{ + return font->x_scale; +} +static hb_position_t +hb_font_get_glyph_h_advance_default (hb_font_t *font, + void *font_data HB_UNUSED, + hb_codepoint_t glyph, + void *user_data HB_UNUSED) +{ + if (font->has_glyph_h_advances_func_set ()) + { + hb_position_t ret; + font->get_glyph_h_advances (1, &glyph, 0, &ret, 0); + return ret; + } + return font->parent_scale_x_distance (font->parent->get_glyph_h_advance (glyph)); +} + +static hb_position_t +hb_font_get_glyph_v_advance_nil (hb_font_t *font, + void *font_data HB_UNUSED, + hb_codepoint_t glyph HB_UNUSED, + void *user_data HB_UNUSED) +{ + /* TODO use font_extents.ascender+descender */ + return font->y_scale; +} +static hb_position_t +hb_font_get_glyph_v_advance_default (hb_font_t *font, + void *font_data HB_UNUSED, + hb_codepoint_t glyph, + void *user_data HB_UNUSED) +{ + if (font->has_glyph_v_advances_func_set ()) + { + hb_position_t ret; + font->get_glyph_v_advances (1, &glyph, 0, &ret, 0); + return ret; + } + return font->parent_scale_y_distance (font->parent->get_glyph_v_advance (glyph)); +} + +#define hb_font_get_glyph_h_advances_nil hb_font_get_glyph_h_advances_default +static void +hb_font_get_glyph_h_advances_default (hb_font_t* font, + void* font_data HB_UNUSED, + unsigned int count, + const hb_codepoint_t *first_glyph, + unsigned int glyph_stride, + hb_position_t *first_advance, + unsigned int advance_stride, + void *user_data HB_UNUSED) +{ + if (font->has_glyph_h_advance_func_set ()) + { + for (unsigned int i = 0; i < count; i++) + { + *first_advance = font->get_glyph_h_advance (*first_glyph); + first_glyph = &StructAtOffsetUnaligned<hb_codepoint_t> (first_glyph, glyph_stride); + first_advance = &StructAtOffsetUnaligned<hb_position_t> (first_advance, advance_stride); + } + return; + } + + font->parent->get_glyph_h_advances (count, + first_glyph, glyph_stride, + first_advance, advance_stride); + for (unsigned int i = 0; i < count; i++) + { + *first_advance = font->parent_scale_x_distance (*first_advance); + first_advance = &StructAtOffsetUnaligned<hb_position_t> (first_advance, advance_stride); + } +} + +#define hb_font_get_glyph_v_advances_nil hb_font_get_glyph_v_advances_default +static void +hb_font_get_glyph_v_advances_default (hb_font_t* font, + void* font_data HB_UNUSED, + unsigned int count, + const hb_codepoint_t *first_glyph, + unsigned int glyph_stride, + hb_position_t *first_advance, + unsigned int advance_stride, + void *user_data HB_UNUSED) +{ + if (font->has_glyph_v_advance_func_set ()) + { + for (unsigned int i = 0; i < count; i++) + { + *first_advance = font->get_glyph_v_advance (*first_glyph); + first_glyph = &StructAtOffsetUnaligned<hb_codepoint_t> (first_glyph, glyph_stride); + first_advance = &StructAtOffsetUnaligned<hb_position_t> (first_advance, advance_stride); + } + return; + } + + font->parent->get_glyph_v_advances (count, + first_glyph, glyph_stride, + first_advance, advance_stride); + for (unsigned int i = 0; i < count; i++) + { + *first_advance = font->parent_scale_y_distance (*first_advance); + first_advance = &StructAtOffsetUnaligned<hb_position_t> (first_advance, advance_stride); + } +} + +static hb_bool_t +hb_font_get_glyph_h_origin_nil (hb_font_t *font HB_UNUSED, + void *font_data HB_UNUSED, + hb_codepoint_t glyph HB_UNUSED, + hb_position_t *x, + hb_position_t *y, + void *user_data HB_UNUSED) +{ + *x = *y = 0; + return true; +} +static hb_bool_t +hb_font_get_glyph_h_origin_default (hb_font_t *font, + void *font_data HB_UNUSED, + hb_codepoint_t glyph, + hb_position_t *x, + hb_position_t *y, + void *user_data HB_UNUSED) +{ + hb_bool_t ret = font->parent->get_glyph_h_origin (glyph, x, y); + if (ret) + font->parent_scale_position (x, y); + return ret; +} + +static hb_bool_t +hb_font_get_glyph_v_origin_nil (hb_font_t *font HB_UNUSED, + void *font_data HB_UNUSED, + hb_codepoint_t glyph HB_UNUSED, + hb_position_t *x, + hb_position_t *y, + void *user_data HB_UNUSED) +{ + *x = *y = 0; + return false; +} +static hb_bool_t +hb_font_get_glyph_v_origin_default (hb_font_t *font, + void *font_data HB_UNUSED, + hb_codepoint_t glyph, + hb_position_t *x, + hb_position_t *y, + void *user_data HB_UNUSED) +{ + hb_bool_t ret = font->parent->get_glyph_v_origin (glyph, x, y); + if (ret) + font->parent_scale_position (x, y); + return ret; +} + +static hb_position_t +hb_font_get_glyph_h_kerning_nil (hb_font_t *font HB_UNUSED, + void *font_data HB_UNUSED, + hb_codepoint_t left_glyph HB_UNUSED, + hb_codepoint_t right_glyph HB_UNUSED, + void *user_data HB_UNUSED) +{ + return 0; +} +static hb_position_t +hb_font_get_glyph_h_kerning_default (hb_font_t *font, + void *font_data HB_UNUSED, + hb_codepoint_t left_glyph, + hb_codepoint_t right_glyph, + void *user_data HB_UNUSED) +{ + return font->parent_scale_x_distance (font->parent->get_glyph_h_kerning (left_glyph, right_glyph)); +} + +#ifndef HB_DISABLE_DEPRECATED +static hb_position_t +hb_font_get_glyph_v_kerning_nil (hb_font_t *font HB_UNUSED, + void *font_data HB_UNUSED, + hb_codepoint_t top_glyph HB_UNUSED, + hb_codepoint_t bottom_glyph HB_UNUSED, + void *user_data HB_UNUSED) +{ + return 0; +} +static hb_position_t +hb_font_get_glyph_v_kerning_default (hb_font_t *font, + void *font_data HB_UNUSED, + hb_codepoint_t top_glyph, + hb_codepoint_t bottom_glyph, + void *user_data HB_UNUSED) +{ + return font->parent_scale_y_distance (font->parent->get_glyph_v_kerning (top_glyph, bottom_glyph)); +} +#endif + +static hb_bool_t +hb_font_get_glyph_extents_nil (hb_font_t *font HB_UNUSED, + void *font_data HB_UNUSED, + hb_codepoint_t glyph HB_UNUSED, + hb_glyph_extents_t *extents, + void *user_data HB_UNUSED) +{ + memset (extents, 0, sizeof (*extents)); + return false; +} +static hb_bool_t +hb_font_get_glyph_extents_default (hb_font_t *font, + void *font_data HB_UNUSED, + hb_codepoint_t glyph, + hb_glyph_extents_t *extents, + void *user_data HB_UNUSED) +{ + hb_bool_t ret = font->parent->get_glyph_extents (glyph, extents); + if (ret) { + font->parent_scale_position (&extents->x_bearing, &extents->y_bearing); + font->parent_scale_distance (&extents->width, &extents->height); + } + return ret; +} + +static hb_bool_t +hb_font_get_glyph_contour_point_nil (hb_font_t *font HB_UNUSED, + void *font_data HB_UNUSED, + hb_codepoint_t glyph HB_UNUSED, + unsigned int point_index HB_UNUSED, + hb_position_t *x, + hb_position_t *y, + void *user_data HB_UNUSED) +{ + *x = *y = 0; + return false; +} +static hb_bool_t +hb_font_get_glyph_contour_point_default (hb_font_t *font, + void *font_data HB_UNUSED, + hb_codepoint_t glyph, + unsigned int point_index, + hb_position_t *x, + hb_position_t *y, + void *user_data HB_UNUSED) +{ + hb_bool_t ret = font->parent->get_glyph_contour_point (glyph, point_index, x, y); + if (ret) + font->parent_scale_position (x, y); + return ret; +} + +static hb_bool_t +hb_font_get_glyph_name_nil (hb_font_t *font HB_UNUSED, + void *font_data HB_UNUSED, + hb_codepoint_t glyph HB_UNUSED, + char *name, unsigned int size, + void *user_data HB_UNUSED) +{ + if (size) *name = '\0'; + return false; +} +static hb_bool_t +hb_font_get_glyph_name_default (hb_font_t *font, + void *font_data HB_UNUSED, + hb_codepoint_t glyph, + char *name, unsigned int size, + void *user_data HB_UNUSED) +{ + return font->parent->get_glyph_name (glyph, name, size); +} + +static hb_bool_t +hb_font_get_glyph_from_name_nil (hb_font_t *font HB_UNUSED, + void *font_data HB_UNUSED, + const char *name HB_UNUSED, + int len HB_UNUSED, /* -1 means nul-terminated */ + hb_codepoint_t *glyph, + void *user_data HB_UNUSED) +{ + *glyph = 0; + return false; +} +static hb_bool_t +hb_font_get_glyph_from_name_default (hb_font_t *font, + void *font_data HB_UNUSED, + const char *name, int len, /* -1 means nul-terminated */ + hb_codepoint_t *glyph, + void *user_data HB_UNUSED) +{ + return font->parent->get_glyph_from_name (name, len, glyph); +} + +DEFINE_NULL_INSTANCE (hb_font_funcs_t) = +{ + HB_OBJECT_HEADER_STATIC, + + { +#define HB_FONT_FUNC_IMPLEMENT(name) nullptr, + HB_FONT_FUNCS_IMPLEMENT_CALLBACKS +#undef HB_FONT_FUNC_IMPLEMENT + }, + { +#define HB_FONT_FUNC_IMPLEMENT(name) nullptr, + HB_FONT_FUNCS_IMPLEMENT_CALLBACKS +#undef HB_FONT_FUNC_IMPLEMENT + }, + { + { +#define HB_FONT_FUNC_IMPLEMENT(name) hb_font_get_##name##_nil, + HB_FONT_FUNCS_IMPLEMENT_CALLBACKS +#undef HB_FONT_FUNC_IMPLEMENT + } + } +}; + +static const hb_font_funcs_t _hb_font_funcs_default = { + HB_OBJECT_HEADER_STATIC, + + { +#define HB_FONT_FUNC_IMPLEMENT(name) nullptr, + HB_FONT_FUNCS_IMPLEMENT_CALLBACKS +#undef HB_FONT_FUNC_IMPLEMENT + }, + { +#define HB_FONT_FUNC_IMPLEMENT(name) nullptr, + HB_FONT_FUNCS_IMPLEMENT_CALLBACKS +#undef HB_FONT_FUNC_IMPLEMENT + }, + { + { +#define HB_FONT_FUNC_IMPLEMENT(name) hb_font_get_##name##_default, + HB_FONT_FUNCS_IMPLEMENT_CALLBACKS +#undef HB_FONT_FUNC_IMPLEMENT + } + } +}; + + +/** + * hb_font_funcs_create: (Xconstructor) + * + * + * + * Return value: (transfer full): + * + * Since: 0.9.2 + **/ +hb_font_funcs_t * +hb_font_funcs_create () +{ + hb_font_funcs_t *ffuncs; + + if (!(ffuncs = hb_object_create<hb_font_funcs_t> ())) + return hb_font_funcs_get_empty (); + + ffuncs->get = _hb_font_funcs_default.get; + + return ffuncs; +} + +/** + * hb_font_funcs_get_empty: + * + * + * + * Return value: (transfer full): + * + * Since: 0.9.2 + **/ +hb_font_funcs_t * +hb_font_funcs_get_empty () +{ + return const_cast<hb_font_funcs_t *> (&_hb_font_funcs_default); +} + +/** + * hb_font_funcs_reference: (skip) + * @ffuncs: font functions. + * + * + * + * Return value: + * + * Since: 0.9.2 + **/ +hb_font_funcs_t * +hb_font_funcs_reference (hb_font_funcs_t *ffuncs) +{ + return hb_object_reference (ffuncs); +} + +/** + * hb_font_funcs_destroy: (skip) + * @ffuncs: font functions. + * + * + * + * Since: 0.9.2 + **/ +void +hb_font_funcs_destroy (hb_font_funcs_t *ffuncs) +{ + if (!hb_object_destroy (ffuncs)) return; + +#define HB_FONT_FUNC_IMPLEMENT(name) if (ffuncs->destroy.name) \ + ffuncs->destroy.name (ffuncs->user_data.name); + HB_FONT_FUNCS_IMPLEMENT_CALLBACKS +#undef HB_FONT_FUNC_IMPLEMENT + + free (ffuncs); +} + +/** + * hb_font_funcs_set_user_data: (skip) + * @ffuncs: font functions. + * @key: + * @data: + * @destroy: + * @replace: + * + * + * + * Return value: + * + * Since: 0.9.2 + **/ +hb_bool_t +hb_font_funcs_set_user_data (hb_font_funcs_t *ffuncs, + hb_user_data_key_t *key, + void * data, + hb_destroy_func_t destroy, + hb_bool_t replace) +{ + return hb_object_set_user_data (ffuncs, key, data, destroy, replace); +} + +/** + * hb_font_funcs_get_user_data: (skip) + * @ffuncs: font functions. + * @key: + * + * + * + * Return value: (transfer none): + * + * Since: 0.9.2 + **/ +void * +hb_font_funcs_get_user_data (hb_font_funcs_t *ffuncs, + hb_user_data_key_t *key) +{ + return hb_object_get_user_data (ffuncs, key); +} + + +/** + * hb_font_funcs_make_immutable: + * @ffuncs: font functions. + * + * + * + * Since: 0.9.2 + **/ +void +hb_font_funcs_make_immutable (hb_font_funcs_t *ffuncs) +{ + if (hb_object_is_immutable (ffuncs)) + return; + + hb_object_make_immutable (ffuncs); +} + +/** + * hb_font_funcs_is_immutable: + * @ffuncs: font functions. + * + * + * + * Return value: + * + * Since: 0.9.2 + **/ +hb_bool_t +hb_font_funcs_is_immutable (hb_font_funcs_t *ffuncs) +{ + return hb_object_is_immutable (ffuncs); +} + + +#define HB_FONT_FUNC_IMPLEMENT(name) \ + \ +void \ +hb_font_funcs_set_##name##_func (hb_font_funcs_t *ffuncs, \ + hb_font_get_##name##_func_t func, \ + void *user_data, \ + hb_destroy_func_t destroy) \ +{ \ + if (hb_object_is_immutable (ffuncs)) \ + { \ + if (destroy) \ + destroy (user_data); \ + return; \ + } \ + \ + if (ffuncs->destroy.name) \ + ffuncs->destroy.name (ffuncs->user_data.name); \ + \ + if (func) { \ + ffuncs->get.f.name = func; \ + ffuncs->user_data.name = user_data; \ + ffuncs->destroy.name = destroy; \ + } else { \ + ffuncs->get.f.name = hb_font_get_##name##_default; \ + ffuncs->user_data.name = nullptr; \ + ffuncs->destroy.name = nullptr; \ + } \ +} + +HB_FONT_FUNCS_IMPLEMENT_CALLBACKS +#undef HB_FONT_FUNC_IMPLEMENT + +bool +hb_font_t::has_func_set (unsigned int i) +{ + return this->klass->get.array[i] != _hb_font_funcs_default.get.array[i]; +} + +bool +hb_font_t::has_func (unsigned int i) +{ + return has_func_set (i) || + (parent && parent != &_hb_Null_hb_font_t && parent->has_func (i)); +} + +/* Public getters */ + +/** + * hb_font_get_h_extents: + * @font: a font. + * @extents: (out): + * + * + * + * Return value: + * + * Since: 1.1.3 + **/ +hb_bool_t +hb_font_get_h_extents (hb_font_t *font, + hb_font_extents_t *extents) +{ + return font->get_font_h_extents (extents); +} + +/** + * hb_font_get_v_extents: + * @font: a font. + * @extents: (out): + * + * + * + * Return value: + * + * Since: 1.1.3 + **/ +hb_bool_t +hb_font_get_v_extents (hb_font_t *font, + hb_font_extents_t *extents) +{ + return font->get_font_v_extents (extents); +} + +/** + * hb_font_get_glyph: + * @font: a font. + * @unicode: + * @variation_selector: + * @glyph: (out): + * + * + * + * Return value: + * + * Since: 0.9.2 + **/ +hb_bool_t +hb_font_get_glyph (hb_font_t *font, + hb_codepoint_t unicode, hb_codepoint_t variation_selector, + hb_codepoint_t *glyph) +{ + if (unlikely (variation_selector)) + return font->get_variation_glyph (unicode, variation_selector, glyph); + return font->get_nominal_glyph (unicode, glyph); +} + +/** + * hb_font_get_nominal_glyph: + * @font: a font. + * @unicode: + * @glyph: (out): + * + * + * + * Return value: + * + * Since: 1.2.3 + **/ +hb_bool_t +hb_font_get_nominal_glyph (hb_font_t *font, + hb_codepoint_t unicode, + hb_codepoint_t *glyph) +{ + return font->get_nominal_glyph (unicode, glyph); +} + +/** + * hb_font_get_nominal_glyphs: + * @font: a font. + * + * + * + * Return value: + * + * Since: 2.6.3 + **/ +unsigned int +hb_font_get_nominal_glyphs (hb_font_t *font, + unsigned int count, + const hb_codepoint_t *first_unicode, + unsigned int unicode_stride, + hb_codepoint_t *first_glyph, + unsigned int glyph_stride) +{ + return font->get_nominal_glyphs (count, + first_unicode, unicode_stride, + first_glyph, glyph_stride); +} + +/** + * hb_font_get_variation_glyph: + * @font: a font. + * @unicode: + * @variation_selector: + * @glyph: (out): + * + * + * + * Return value: + * + * Since: 1.2.3 + **/ +hb_bool_t +hb_font_get_variation_glyph (hb_font_t *font, + hb_codepoint_t unicode, hb_codepoint_t variation_selector, + hb_codepoint_t *glyph) +{ + return font->get_variation_glyph (unicode, variation_selector, glyph); +} + +/** + * hb_font_get_glyph_h_advance: + * @font: a font. + * @glyph: + * + * + * + * Return value: + * + * Since: 0.9.2 + **/ +hb_position_t +hb_font_get_glyph_h_advance (hb_font_t *font, + hb_codepoint_t glyph) +{ + return font->get_glyph_h_advance (glyph); +} + +/** + * hb_font_get_glyph_v_advance: + * @font: a font. + * @glyph: + * + * + * + * Return value: + * + * Since: 0.9.2 + **/ +hb_position_t +hb_font_get_glyph_v_advance (hb_font_t *font, + hb_codepoint_t glyph) +{ + return font->get_glyph_v_advance (glyph); +} + +/** + * hb_font_get_glyph_h_advances: + * @font: a font. + * + * + * + * Since: 1.8.6 + **/ +void +hb_font_get_glyph_h_advances (hb_font_t* font, + unsigned int count, + const hb_codepoint_t *first_glyph, + unsigned glyph_stride, + hb_position_t *first_advance, + unsigned advance_stride) +{ + font->get_glyph_h_advances (count, first_glyph, glyph_stride, first_advance, advance_stride); +} +/** + * hb_font_get_glyph_v_advances: + * @font: a font. + * + * + * + * Since: 1.8.6 + **/ +void +hb_font_get_glyph_v_advances (hb_font_t* font, + unsigned int count, + const hb_codepoint_t *first_glyph, + unsigned glyph_stride, + hb_position_t *first_advance, + unsigned advance_stride) +{ + font->get_glyph_v_advances (count, first_glyph, glyph_stride, first_advance, advance_stride); +} + +/** + * hb_font_get_glyph_h_origin: + * @font: a font. + * @glyph: + * @x: (out): + * @y: (out): + * + * + * + * Return value: + * + * Since: 0.9.2 + **/ +hb_bool_t +hb_font_get_glyph_h_origin (hb_font_t *font, + hb_codepoint_t glyph, + hb_position_t *x, hb_position_t *y) +{ + return font->get_glyph_h_origin (glyph, x, y); +} + +/** + * hb_font_get_glyph_v_origin: + * @font: a font. + * @glyph: + * @x: (out): + * @y: (out): + * + * + * + * Return value: + * + * Since: 0.9.2 + **/ +hb_bool_t +hb_font_get_glyph_v_origin (hb_font_t *font, + hb_codepoint_t glyph, + hb_position_t *x, hb_position_t *y) +{ + return font->get_glyph_v_origin (glyph, x, y); +} + +/** + * hb_font_get_glyph_h_kerning: + * @font: a font. + * @left_glyph: + * @right_glyph: + * + * + * + * Return value: + * + * Since: 0.9.2 + **/ +hb_position_t +hb_font_get_glyph_h_kerning (hb_font_t *font, + hb_codepoint_t left_glyph, hb_codepoint_t right_glyph) +{ + return font->get_glyph_h_kerning (left_glyph, right_glyph); +} + +#ifndef HB_DISABLE_DEPRECATED +/** + * hb_font_get_glyph_v_kerning: + * @font: a font. + * @top_glyph: + * @bottom_glyph: + * + * + * + * Return value: + * + * Since: 0.9.2 + * Deprecated: 2.0.0 + **/ +hb_position_t +hb_font_get_glyph_v_kerning (hb_font_t *font, + hb_codepoint_t top_glyph, hb_codepoint_t bottom_glyph) +{ + return font->get_glyph_v_kerning (top_glyph, bottom_glyph); +} +#endif + +/** + * hb_font_get_glyph_extents: + * @font: a font. + * @glyph: + * @extents: (out): + * + * + * + * Return value: + * + * Since: 0.9.2 + **/ +hb_bool_t +hb_font_get_glyph_extents (hb_font_t *font, + hb_codepoint_t glyph, + hb_glyph_extents_t *extents) +{ + return font->get_glyph_extents (glyph, extents); +} + +/** + * hb_font_get_glyph_contour_point: + * @font: a font. + * @glyph: + * @point_index: + * @x: (out): + * @y: (out): + * + * + * + * Return value: + * + * Since: 0.9.2 + **/ +hb_bool_t +hb_font_get_glyph_contour_point (hb_font_t *font, + hb_codepoint_t glyph, unsigned int point_index, + hb_position_t *x, hb_position_t *y) +{ + return font->get_glyph_contour_point (glyph, point_index, x, y); +} + +/** + * hb_font_get_glyph_name: + * @font: a font. + * @glyph: + * @name: (array length=size): + * @size: + * + * + * + * Return value: + * + * Since: 0.9.2 + **/ +hb_bool_t +hb_font_get_glyph_name (hb_font_t *font, + hb_codepoint_t glyph, + char *name, unsigned int size) +{ + return font->get_glyph_name (glyph, name, size); +} + +/** + * hb_font_get_glyph_from_name: + * @font: a font. + * @name: (array length=len): + * @len: + * @glyph: (out): + * + * + * + * Return value: + * + * Since: 0.9.2 + **/ +hb_bool_t +hb_font_get_glyph_from_name (hb_font_t *font, + const char *name, int len, /* -1 means nul-terminated */ + hb_codepoint_t *glyph) +{ + return font->get_glyph_from_name (name, len, glyph); +} + + +/* A bit higher-level, and with fallback */ + +/** + * hb_font_get_extents_for_direction: + * @font: a font. + * @direction: + * @extents: (out): + * + * + * + * Since: 1.1.3 + **/ +void +hb_font_get_extents_for_direction (hb_font_t *font, + hb_direction_t direction, + hb_font_extents_t *extents) +{ + return font->get_extents_for_direction (direction, extents); +} +/** + * hb_font_get_glyph_advance_for_direction: + * @font: a font. + * @glyph: + * @direction: + * @x: (out): + * @y: (out): + * + * + * + * Since: 0.9.2 + **/ +void +hb_font_get_glyph_advance_for_direction (hb_font_t *font, + hb_codepoint_t glyph, + hb_direction_t direction, + hb_position_t *x, hb_position_t *y) +{ + return font->get_glyph_advance_for_direction (glyph, direction, x, y); +} +/** + * hb_font_get_glyph_advances_for_direction: + * @font: a font. + * @direction: + * + * + * + * Since: 1.8.6 + **/ +HB_EXTERN void +hb_font_get_glyph_advances_for_direction (hb_font_t* font, + hb_direction_t direction, + unsigned int count, + const hb_codepoint_t *first_glyph, + unsigned glyph_stride, + hb_position_t *first_advance, + unsigned advance_stride) +{ + font->get_glyph_advances_for_direction (direction, count, first_glyph, glyph_stride, first_advance, advance_stride); +} + +/** + * hb_font_get_glyph_origin_for_direction: + * @font: a font. + * @glyph: + * @direction: + * @x: (out): + * @y: (out): + * + * + * + * Since: 0.9.2 + **/ +void +hb_font_get_glyph_origin_for_direction (hb_font_t *font, + hb_codepoint_t glyph, + hb_direction_t direction, + hb_position_t *x, hb_position_t *y) +{ + return font->get_glyph_origin_for_direction (glyph, direction, x, y); +} + +/** + * hb_font_add_glyph_origin_for_direction: + * @font: a font. + * @glyph: + * @direction: + * @x: (out): + * @y: (out): + * + * + * + * Since: 0.9.2 + **/ +void +hb_font_add_glyph_origin_for_direction (hb_font_t *font, + hb_codepoint_t glyph, + hb_direction_t direction, + hb_position_t *x, hb_position_t *y) +{ + return font->add_glyph_origin_for_direction (glyph, direction, x, y); +} + +/** + * hb_font_subtract_glyph_origin_for_direction: + * @font: a font. + * @glyph: + * @direction: + * @x: (out): + * @y: (out): + * + * + * + * Since: 0.9.2 + **/ +void +hb_font_subtract_glyph_origin_for_direction (hb_font_t *font, + hb_codepoint_t glyph, + hb_direction_t direction, + hb_position_t *x, hb_position_t *y) +{ + return font->subtract_glyph_origin_for_direction (glyph, direction, x, y); +} + +/** + * hb_font_get_glyph_kerning_for_direction: + * @font: a font. + * @first_glyph: + * @second_glyph: + * @direction: + * @x: (out): + * @y: (out): + * + * + * + * Since: 0.9.2 + **/ +void +hb_font_get_glyph_kerning_for_direction (hb_font_t *font, + hb_codepoint_t first_glyph, hb_codepoint_t second_glyph, + hb_direction_t direction, + hb_position_t *x, hb_position_t *y) +{ + return font->get_glyph_kerning_for_direction (first_glyph, second_glyph, direction, x, y); +} + +/** + * hb_font_get_glyph_extents_for_origin: + * @font: a font. + * @glyph: + * @direction: + * @extents: (out): + * + * + * + * Return value: + * + * Since: 0.9.2 + **/ +hb_bool_t +hb_font_get_glyph_extents_for_origin (hb_font_t *font, + hb_codepoint_t glyph, + hb_direction_t direction, + hb_glyph_extents_t *extents) +{ + return font->get_glyph_extents_for_origin (glyph, direction, extents); +} + +/** + * hb_font_get_glyph_contour_point_for_origin: + * @font: a font. + * @glyph: + * @point_index: + * @direction: + * @x: (out): + * @y: (out): + * + * + * + * Return value: + * + * Since: 0.9.2 + **/ +hb_bool_t +hb_font_get_glyph_contour_point_for_origin (hb_font_t *font, + hb_codepoint_t glyph, unsigned int point_index, + hb_direction_t direction, + hb_position_t *x, hb_position_t *y) +{ + return font->get_glyph_contour_point_for_origin (glyph, point_index, direction, x, y); +} + +/* Generates gidDDD if glyph has no name. */ +/** + * hb_font_glyph_to_string: + * @font: a font. + * @glyph: + * @s: (array length=size): + * @size: + * + * + * + * Since: 0.9.2 + **/ +void +hb_font_glyph_to_string (hb_font_t *font, + hb_codepoint_t glyph, + char *s, unsigned int size) +{ + font->glyph_to_string (glyph, s, size); +} + +/* Parses gidDDD and uniUUUU strings automatically. */ +/** + * hb_font_glyph_from_string: + * @font: a font. + * @s: (array length=len) (element-type uint8_t): + * @len: + * @glyph: (out): + * + * + * + * Return value: + * + * Since: 0.9.2 + **/ +hb_bool_t +hb_font_glyph_from_string (hb_font_t *font, + const char *s, int len, /* -1 means nul-terminated */ + hb_codepoint_t *glyph) +{ + return font->glyph_from_string (s, len, glyph); +} + + +/* + * hb_font_t + */ + +DEFINE_NULL_INSTANCE (hb_font_t) = +{ + HB_OBJECT_HEADER_STATIC, + + nullptr, /* parent */ + const_cast<hb_face_t *> (&_hb_Null_hb_face_t), + + 1000, /* x_scale */ + 1000, /* y_scale */ + 1<<16, /* x_mult */ + 1<<16, /* y_mult */ + + 0, /* x_ppem */ + 0, /* y_ppem */ + 0, /* ptem */ + + 0, /* num_coords */ + nullptr, /* coords */ + nullptr, /* design_coords */ + + const_cast<hb_font_funcs_t *> (&_hb_Null_hb_font_funcs_t), + + /* Zero for the rest is fine. */ +}; + + +static hb_font_t * +_hb_font_create (hb_face_t *face) +{ + hb_font_t *font; + + if (unlikely (!face)) + face = hb_face_get_empty (); + if (!(font = hb_object_create<hb_font_t> ())) + return hb_font_get_empty (); + + hb_face_make_immutable (face); + font->parent = hb_font_get_empty (); + font->face = hb_face_reference (face); + font->klass = hb_font_funcs_get_empty (); + font->data.init0 (font); + font->x_scale = font->y_scale = hb_face_get_upem (face); + font->x_mult = font->y_mult = 1 << 16; + + return font; +} + +/** + * hb_font_create: (Xconstructor) + * @face: a face. + * + * + * + * Return value: (transfer full): + * + * Since: 0.9.2 + **/ +hb_font_t * +hb_font_create (hb_face_t *face) +{ + hb_font_t *font = _hb_font_create (face); + +#ifndef HB_NO_OT_FONT + /* Install our in-house, very lightweight, funcs. */ + hb_ot_font_set_funcs (font); +#endif + + return font; +} + +static void +_hb_font_adopt_var_coords (hb_font_t *font, + int *coords, /* 2.14 normalized */ + float *design_coords, + unsigned int coords_length) +{ + free (font->coords); + free (font->design_coords); + + font->coords = coords; + font->design_coords = design_coords; + font->num_coords = coords_length; +} + +/** + * hb_font_create_sub_font: + * @parent: parent font. + * + * + * + * Return value: (transfer full): + * + * Since: 0.9.2 + **/ +hb_font_t * +hb_font_create_sub_font (hb_font_t *parent) +{ + if (unlikely (!parent)) + parent = hb_font_get_empty (); + + hb_font_t *font = _hb_font_create (parent->face); + + if (unlikely (hb_object_is_immutable (font))) + return font; + + font->parent = hb_font_reference (parent); + + font->x_scale = parent->x_scale; + font->y_scale = parent->y_scale; + font->mults_changed (); + font->x_ppem = parent->x_ppem; + font->y_ppem = parent->y_ppem; + font->ptem = parent->ptem; + + unsigned int num_coords = parent->num_coords; + if (num_coords) + { + int *coords = (int *) calloc (num_coords, sizeof (parent->coords[0])); + float *design_coords = (float *) calloc (num_coords, sizeof (parent->design_coords[0])); + if (likely (coords && design_coords)) + { + memcpy (coords, parent->coords, num_coords * sizeof (parent->coords[0])); + memcpy (design_coords, parent->design_coords, num_coords * sizeof (parent->design_coords[0])); + _hb_font_adopt_var_coords (font, coords, design_coords, num_coords); + } + else + { + free (coords); + free (design_coords); + } + } + + return font; +} + +/** + * hb_font_get_empty: + * + * + * + * Return value: (transfer full) + * + * Since: 0.9.2 + **/ +hb_font_t * +hb_font_get_empty () +{ + return const_cast<hb_font_t *> (&Null (hb_font_t)); +} + +/** + * hb_font_reference: (skip) + * @font: a font. + * + * + * + * Return value: (transfer full): + * + * Since: 0.9.2 + **/ +hb_font_t * +hb_font_reference (hb_font_t *font) +{ + return hb_object_reference (font); +} + +/** + * hb_font_destroy: (skip) + * @font: a font. + * + * + * + * Since: 0.9.2 + **/ +void +hb_font_destroy (hb_font_t *font) +{ + if (!hb_object_destroy (font)) return; + + font->data.fini (); + + if (font->destroy) + font->destroy (font->user_data); + + hb_font_destroy (font->parent); + hb_face_destroy (font->face); + hb_font_funcs_destroy (font->klass); + + free (font->coords); + free (font->design_coords); + + free (font); +} + +/** + * hb_font_set_user_data: (skip) + * @font: a font. + * @key: + * @data: + * @destroy: + * @replace: + * + * + * + * Return value: + * + * Since: 0.9.2 + **/ +hb_bool_t +hb_font_set_user_data (hb_font_t *font, + hb_user_data_key_t *key, + void * data, + hb_destroy_func_t destroy, + hb_bool_t replace) +{ + return hb_object_set_user_data (font, key, data, destroy, replace); +} + +/** + * hb_font_get_user_data: (skip) + * @font: a font. + * @key: + * + * + * + * Return value: (transfer none): + * + * Since: 0.9.2 + **/ +void * +hb_font_get_user_data (hb_font_t *font, + hb_user_data_key_t *key) +{ + return hb_object_get_user_data (font, key); +} + +/** + * hb_font_make_immutable: + * @font: a font. + * + * + * + * Since: 0.9.2 + **/ +void +hb_font_make_immutable (hb_font_t *font) +{ + if (hb_object_is_immutable (font)) + return; + + if (font->parent) + hb_font_make_immutable (font->parent); + + hb_object_make_immutable (font); +} + +/** + * hb_font_is_immutable: + * @font: a font. + * + * + * + * Return value: + * + * Since: 0.9.2 + **/ +hb_bool_t +hb_font_is_immutable (hb_font_t *font) +{ + return hb_object_is_immutable (font); +} + +/** + * hb_font_set_parent: + * @font: a font. + * @parent: new parent. + * + * Sets parent font of @font. + * + * Since: 1.0.5 + **/ +void +hb_font_set_parent (hb_font_t *font, + hb_font_t *parent) +{ + if (hb_object_is_immutable (font)) + return; + + if (!parent) + parent = hb_font_get_empty (); + + hb_font_t *old = font->parent; + + font->parent = hb_font_reference (parent); + + hb_font_destroy (old); +} + +/** + * hb_font_get_parent: + * @font: a font. + * + * + * + * Return value: (transfer none): + * + * Since: 0.9.2 + **/ +hb_font_t * +hb_font_get_parent (hb_font_t *font) +{ + return font->parent; +} + +/** + * hb_font_set_face: + * @font: a font. + * @face: new face. + * + * Sets font-face of @font. + * + * Since: 1.4.3 + **/ +void +hb_font_set_face (hb_font_t *font, + hb_face_t *face) +{ + if (hb_object_is_immutable (font)) + return; + + if (unlikely (!face)) + face = hb_face_get_empty (); + + hb_face_t *old = font->face; + + hb_face_make_immutable (face); + font->face = hb_face_reference (face); + font->mults_changed (); + + hb_face_destroy (old); +} + +/** + * hb_font_get_face: + * @font: a font. + * + * + * + * Return value: (transfer none): + * + * Since: 0.9.2 + **/ +hb_face_t * +hb_font_get_face (hb_font_t *font) +{ + return font->face; +} + + +/** + * hb_font_set_funcs: + * @font: a font. + * @klass: (closure font_data) (destroy destroy) (scope notified): + * @font_data: + * @destroy: + * + * + * + * Since: 0.9.2 + **/ +void +hb_font_set_funcs (hb_font_t *font, + hb_font_funcs_t *klass, + void *font_data, + hb_destroy_func_t destroy) +{ + if (hb_object_is_immutable (font)) + { + if (destroy) + destroy (font_data); + return; + } + + if (font->destroy) + font->destroy (font->user_data); + + if (!klass) + klass = hb_font_funcs_get_empty (); + + hb_font_funcs_reference (klass); + hb_font_funcs_destroy (font->klass); + font->klass = klass; + font->user_data = font_data; + font->destroy = destroy; +} + +/** + * hb_font_set_funcs_data: + * @font: a font. + * @font_data: (destroy destroy) (scope notified): + * @destroy: + * + * + * + * Since: 0.9.2 + **/ +void +hb_font_set_funcs_data (hb_font_t *font, + void *font_data, + hb_destroy_func_t destroy) +{ + /* Destroy user_data? */ + if (hb_object_is_immutable (font)) + { + if (destroy) + destroy (font_data); + return; + } + + if (font->destroy) + font->destroy (font->user_data); + + font->user_data = font_data; + font->destroy = destroy; +} + + +/** + * hb_font_set_scale: + * @font: a font. + * @x_scale: + * @y_scale: + * + * + * + * Since: 0.9.2 + **/ +void +hb_font_set_scale (hb_font_t *font, + int x_scale, + int y_scale) +{ + if (hb_object_is_immutable (font)) + return; + + font->x_scale = x_scale; + font->y_scale = y_scale; + font->mults_changed (); +} + +/** + * hb_font_get_scale: + * @font: a font. + * @x_scale: (out): + * @y_scale: (out): + * + * + * + * Since: 0.9.2 + **/ +void +hb_font_get_scale (hb_font_t *font, + int *x_scale, + int *y_scale) +{ + if (x_scale) *x_scale = font->x_scale; + if (y_scale) *y_scale = font->y_scale; +} + +/** + * hb_font_set_ppem: + * @font: a font. + * @x_ppem: + * @y_ppem: + * + * + * + * Since: 0.9.2 + **/ +void +hb_font_set_ppem (hb_font_t *font, + unsigned int x_ppem, + unsigned int y_ppem) +{ + if (hb_object_is_immutable (font)) + return; + + font->x_ppem = x_ppem; + font->y_ppem = y_ppem; +} + +/** + * hb_font_get_ppem: + * @font: a font. + * @x_ppem: (out): + * @y_ppem: (out): + * + * + * + * Since: 0.9.2 + **/ +void +hb_font_get_ppem (hb_font_t *font, + unsigned int *x_ppem, + unsigned int *y_ppem) +{ + if (x_ppem) *x_ppem = font->x_ppem; + if (y_ppem) *y_ppem = font->y_ppem; +} + +/** + * hb_font_set_ptem: + * @font: a font. + * @ptem: font size in points. + * + * Sets "point size" of the font. Set to 0 to unset. + * + * There are 72 points in an inch. + * + * Since: 1.6.0 + **/ +void +hb_font_set_ptem (hb_font_t *font, float ptem) +{ + if (hb_object_is_immutable (font)) + return; + + font->ptem = ptem; +} + +/** + * hb_font_get_ptem: + * @font: a font. + * + * Gets the "point size" of the font. A value of 0 means unset. + * + * Return value: Point size. + * + * Since: 0.9.2 + **/ +float +hb_font_get_ptem (hb_font_t *font) +{ + return font->ptem; +} + +#ifndef HB_NO_VAR +/* + * Variations + */ + +/** + * hb_font_set_variations: + * + * Since: 1.4.2 + */ +void +hb_font_set_variations (hb_font_t *font, + const hb_variation_t *variations, + unsigned int variations_length) +{ + if (hb_object_is_immutable (font)) + return; + + if (!variations_length) + { + hb_font_set_var_coords_normalized (font, nullptr, 0); + return; + } + + unsigned int coords_length = hb_ot_var_get_axis_count (font->face); + + int *normalized = coords_length ? (int *) calloc (coords_length, sizeof (int)) : nullptr; + float *design_coords = coords_length ? (float *) calloc (coords_length, sizeof (float)) : nullptr; + + if (unlikely (coords_length && !(normalized && design_coords))) + { + free (normalized); + free (design_coords); + return; + } + + const OT::fvar &fvar = *font->face->table.fvar; + for (unsigned int i = 0; i < variations_length; i++) + { + hb_ot_var_axis_info_t info; + if (hb_ot_var_find_axis_info (font->face, variations[i].tag, &info) && + info.axis_index < coords_length) + { + float v = variations[i].value; + design_coords[info.axis_index] = v; + normalized[info.axis_index] = fvar.normalize_axis_value (info.axis_index, v); + } + } + font->face->table.avar->map_coords (normalized, coords_length); + + _hb_font_adopt_var_coords (font, normalized, design_coords, coords_length); +} + +/** + * hb_font_set_var_coords_design: + * + * Since: 1.4.2 + */ +void +hb_font_set_var_coords_design (hb_font_t *font, + const float *coords, + unsigned int coords_length) +{ + if (hb_object_is_immutable (font)) + return; + + int *normalized = coords_length ? (int *) calloc (coords_length, sizeof (int)) : nullptr; + float *design_coords = coords_length ? (float *) calloc (coords_length, sizeof (float)) : nullptr; + + if (unlikely (coords_length && !(normalized && design_coords))) + { + free (normalized); + free (design_coords); + return; + } + + if (coords_length) + memcpy (design_coords, coords, coords_length * sizeof (font->design_coords[0])); + + hb_ot_var_normalize_coords (font->face, coords_length, coords, normalized); + _hb_font_adopt_var_coords (font, normalized, design_coords, coords_length); +} + +/** + * hb_font_set_var_named_instance: + * @font: a font. + * @instance_index: named instance index. + * + * Sets design coords of a font from a named instance index. + * + * Since: 2.6.0 + */ +void +hb_font_set_var_named_instance (hb_font_t *font, + unsigned instance_index) +{ + if (hb_object_is_immutable (font)) + return; + + unsigned int coords_length = hb_ot_var_named_instance_get_design_coords (font->face, instance_index, nullptr, nullptr); + + float *coords = coords_length ? (float *) calloc (coords_length, sizeof (float)) : nullptr; + if (unlikely (coords_length && !coords)) + return; + + hb_ot_var_named_instance_get_design_coords (font->face, instance_index, &coords_length, coords); + hb_font_set_var_coords_design (font, coords, coords_length); + free (coords); +} + +/** + * hb_font_set_var_coords_normalized: + * + * Since: 1.4.2 + */ +void +hb_font_set_var_coords_normalized (hb_font_t *font, + const int *coords, /* 2.14 normalized */ + unsigned int coords_length) +{ + if (hb_object_is_immutable (font)) + return; + + int *copy = coords_length ? (int *) calloc (coords_length, sizeof (coords[0])) : nullptr; + int *unmapped = coords_length ? (int *) calloc (coords_length, sizeof (coords[0])) : nullptr; + float *design_coords = coords_length ? (float *) calloc (coords_length, sizeof (design_coords[0])) : nullptr; + + if (unlikely (coords_length && !(copy && unmapped && design_coords))) + { + free (copy); + free (unmapped); + free (design_coords); + return; + } + + if (coords_length) + { + memcpy (copy, coords, coords_length * sizeof (coords[0])); + memcpy (unmapped, coords, coords_length * sizeof (coords[0])); + } + + /* Best effort design coords simulation */ + font->face->table.avar->unmap_coords (unmapped, coords_length); + for (unsigned int i = 0; i < coords_length; ++i) + design_coords[i] = font->face->table.fvar->unnormalize_axis_value (i, unmapped[i]); + free (unmapped); + + _hb_font_adopt_var_coords (font, copy, design_coords, coords_length); +} + +/** + * hb_font_get_var_coords_normalized: + * + * Return value is valid as long as variation coordinates of the font + * are not modified. + * + * Since: 1.4.2 + */ +const int * +hb_font_get_var_coords_normalized (hb_font_t *font, + unsigned int *length) +{ + if (length) + *length = font->num_coords; + + return font->coords; +} + +#ifdef HB_EXPERIMENTAL_API +/** + * hb_font_get_var_coords_design: + * + * Return value is valid as long as variation coordinates of the font + * are not modified. + * + * Since: EXPERIMENTAL + */ +const float * +hb_font_get_var_coords_design (hb_font_t *font, + unsigned int *length) +{ + if (length) + *length = font->num_coords; + + return font->design_coords; +} +#endif +#endif + +#ifndef HB_DISABLE_DEPRECATED +/* + * Deprecated get_glyph_func(): + */ + +struct hb_trampoline_closure_t +{ + void *user_data; + hb_destroy_func_t destroy; + unsigned int ref_count; +}; + +template <typename FuncType> +struct hb_trampoline_t +{ + hb_trampoline_closure_t closure; /* Must be first. */ + FuncType func; +}; + +template <typename FuncType> +static hb_trampoline_t<FuncType> * +trampoline_create (FuncType func, + void *user_data, + hb_destroy_func_t destroy) +{ + typedef hb_trampoline_t<FuncType> trampoline_t; + + trampoline_t *trampoline = (trampoline_t *) calloc (1, sizeof (trampoline_t)); + + if (unlikely (!trampoline)) + return nullptr; + + trampoline->closure.user_data = user_data; + trampoline->closure.destroy = destroy; + trampoline->closure.ref_count = 1; + trampoline->func = func; + + return trampoline; +} + +static void +trampoline_reference (hb_trampoline_closure_t *closure) +{ + closure->ref_count++; +} + +static void +trampoline_destroy (void *user_data) +{ + hb_trampoline_closure_t *closure = (hb_trampoline_closure_t *) user_data; + + if (--closure->ref_count) + return; + + if (closure->destroy) + closure->destroy (closure->user_data); + free (closure); +} + +typedef hb_trampoline_t<hb_font_get_glyph_func_t> hb_font_get_glyph_trampoline_t; + +static hb_bool_t +hb_font_get_nominal_glyph_trampoline (hb_font_t *font, + void *font_data, + hb_codepoint_t unicode, + hb_codepoint_t *glyph, + void *user_data) +{ + hb_font_get_glyph_trampoline_t *trampoline = (hb_font_get_glyph_trampoline_t *) user_data; + return trampoline->func (font, font_data, unicode, 0, glyph, trampoline->closure.user_data); +} + +static hb_bool_t +hb_font_get_variation_glyph_trampoline (hb_font_t *font, + void *font_data, + hb_codepoint_t unicode, + hb_codepoint_t variation_selector, + hb_codepoint_t *glyph, + void *user_data) +{ + hb_font_get_glyph_trampoline_t *trampoline = (hb_font_get_glyph_trampoline_t *) user_data; + return trampoline->func (font, font_data, unicode, variation_selector, glyph, trampoline->closure.user_data); +} + +/** + * hb_font_funcs_set_glyph_func: + * @ffuncs: font functions. + * @func: (closure user_data) (destroy destroy) (scope notified): callback function. + * @user_data: data to pass to @func. + * @destroy: function to call when @user_data is not needed anymore. + * + * Deprecated. Use hb_font_funcs_set_nominal_glyph_func() and + * hb_font_funcs_set_variation_glyph_func() instead. + * + * Since: 0.9.2 + * Deprecated: 1.2.3 + **/ +void +hb_font_funcs_set_glyph_func (hb_font_funcs_t *ffuncs, + hb_font_get_glyph_func_t func, + void *user_data, hb_destroy_func_t destroy) +{ + if (hb_object_is_immutable (ffuncs)) + { + if (destroy) + destroy (user_data); + return; + } + + hb_font_get_glyph_trampoline_t *trampoline; + + trampoline = trampoline_create (func, user_data, destroy); + if (unlikely (!trampoline)) + { + if (destroy) + destroy (user_data); + return; + } + + hb_font_funcs_set_nominal_glyph_func (ffuncs, + hb_font_get_nominal_glyph_trampoline, + trampoline, + trampoline_destroy); + + trampoline_reference (&trampoline->closure); + hb_font_funcs_set_variation_glyph_func (ffuncs, + hb_font_get_variation_glyph_trampoline, + trampoline, + trampoline_destroy); +} +#endif diff --git a/thirdparty/harfbuzz/src/hb-font.h b/thirdparty/harfbuzz/src/hb-font.h new file mode 100644 index 0000000000..e1a5719f1d --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-font.h @@ -0,0 +1,735 @@ +/* + * Copyright © 2009 Red Hat, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Red Hat Author(s): Behdad Esfahbod + */ + +#ifndef HB_H_IN +#error "Include <hb.h> instead." +#endif + +#ifndef HB_FONT_H +#define HB_FONT_H + +#include "hb-common.h" +#include "hb-face.h" +#include "hb-draw.h" + +HB_BEGIN_DECLS + + +typedef struct hb_font_t hb_font_t; + + +/* + * hb_font_funcs_t + */ + +typedef struct hb_font_funcs_t hb_font_funcs_t; + +HB_EXTERN hb_font_funcs_t * +hb_font_funcs_create (void); + +HB_EXTERN hb_font_funcs_t * +hb_font_funcs_get_empty (void); + +HB_EXTERN hb_font_funcs_t * +hb_font_funcs_reference (hb_font_funcs_t *ffuncs); + +HB_EXTERN void +hb_font_funcs_destroy (hb_font_funcs_t *ffuncs); + +HB_EXTERN hb_bool_t +hb_font_funcs_set_user_data (hb_font_funcs_t *ffuncs, + hb_user_data_key_t *key, + void * data, + hb_destroy_func_t destroy, + hb_bool_t replace); + + +HB_EXTERN void * +hb_font_funcs_get_user_data (hb_font_funcs_t *ffuncs, + hb_user_data_key_t *key); + + +HB_EXTERN void +hb_font_funcs_make_immutable (hb_font_funcs_t *ffuncs); + +HB_EXTERN hb_bool_t +hb_font_funcs_is_immutable (hb_font_funcs_t *ffuncs); + + +/* font and glyph extents */ + +/* Note that typically ascender is positive and descender negative in coordinate systems that grow up. */ +typedef struct hb_font_extents_t +{ + hb_position_t ascender; /* typographic ascender. */ + hb_position_t descender; /* typographic descender. */ + hb_position_t line_gap; /* suggested line spacing gap. */ + /*< private >*/ + hb_position_t reserved9; + hb_position_t reserved8; + hb_position_t reserved7; + hb_position_t reserved6; + hb_position_t reserved5; + hb_position_t reserved4; + hb_position_t reserved3; + hb_position_t reserved2; + hb_position_t reserved1; +} hb_font_extents_t; + +/* Note that height is negative in coordinate systems that grow up. */ +typedef struct hb_glyph_extents_t +{ + hb_position_t x_bearing; /* left side of glyph from origin. */ + hb_position_t y_bearing; /* top side of glyph from origin. */ + hb_position_t width; /* distance from left to right side. */ + hb_position_t height; /* distance from top to bottom side. */ +} hb_glyph_extents_t; + +/* func types */ + +typedef hb_bool_t (*hb_font_get_font_extents_func_t) (hb_font_t *font, void *font_data, + hb_font_extents_t *extents, + void *user_data); +typedef hb_font_get_font_extents_func_t hb_font_get_font_h_extents_func_t; +typedef hb_font_get_font_extents_func_t hb_font_get_font_v_extents_func_t; + + +typedef hb_bool_t (*hb_font_get_nominal_glyph_func_t) (hb_font_t *font, void *font_data, + hb_codepoint_t unicode, + hb_codepoint_t *glyph, + void *user_data); +typedef hb_bool_t (*hb_font_get_variation_glyph_func_t) (hb_font_t *font, void *font_data, + hb_codepoint_t unicode, hb_codepoint_t variation_selector, + hb_codepoint_t *glyph, + void *user_data); + +typedef unsigned int (*hb_font_get_nominal_glyphs_func_t) (hb_font_t *font, void *font_data, + unsigned int count, + const hb_codepoint_t *first_unicode, + unsigned int unicode_stride, + hb_codepoint_t *first_glyph, + unsigned int glyph_stride, + void *user_data); + + +typedef hb_position_t (*hb_font_get_glyph_advance_func_t) (hb_font_t *font, void *font_data, + hb_codepoint_t glyph, + void *user_data); +typedef hb_font_get_glyph_advance_func_t hb_font_get_glyph_h_advance_func_t; +typedef hb_font_get_glyph_advance_func_t hb_font_get_glyph_v_advance_func_t; + +typedef void (*hb_font_get_glyph_advances_func_t) (hb_font_t* font, void* font_data, + unsigned int count, + const hb_codepoint_t *first_glyph, + unsigned glyph_stride, + hb_position_t *first_advance, + unsigned advance_stride, + void *user_data); +typedef hb_font_get_glyph_advances_func_t hb_font_get_glyph_h_advances_func_t; +typedef hb_font_get_glyph_advances_func_t hb_font_get_glyph_v_advances_func_t; + +typedef hb_bool_t (*hb_font_get_glyph_origin_func_t) (hb_font_t *font, void *font_data, + hb_codepoint_t glyph, + hb_position_t *x, hb_position_t *y, + void *user_data); +typedef hb_font_get_glyph_origin_func_t hb_font_get_glyph_h_origin_func_t; +typedef hb_font_get_glyph_origin_func_t hb_font_get_glyph_v_origin_func_t; + +typedef hb_position_t (*hb_font_get_glyph_kerning_func_t) (hb_font_t *font, void *font_data, + hb_codepoint_t first_glyph, hb_codepoint_t second_glyph, + void *user_data); +typedef hb_font_get_glyph_kerning_func_t hb_font_get_glyph_h_kerning_func_t; + + +typedef hb_bool_t (*hb_font_get_glyph_extents_func_t) (hb_font_t *font, void *font_data, + hb_codepoint_t glyph, + hb_glyph_extents_t *extents, + void *user_data); +typedef hb_bool_t (*hb_font_get_glyph_contour_point_func_t) (hb_font_t *font, void *font_data, + hb_codepoint_t glyph, unsigned int point_index, + hb_position_t *x, hb_position_t *y, + void *user_data); + + +typedef hb_bool_t (*hb_font_get_glyph_name_func_t) (hb_font_t *font, void *font_data, + hb_codepoint_t glyph, + char *name, unsigned int size, + void *user_data); +typedef hb_bool_t (*hb_font_get_glyph_from_name_func_t) (hb_font_t *font, void *font_data, + const char *name, int len, /* -1 means nul-terminated */ + hb_codepoint_t *glyph, + void *user_data); + + +/* func setters */ + +/** + * hb_font_funcs_set_font_h_extents_func: + * @ffuncs: font functions. + * @func: (closure user_data) (destroy destroy) (scope notified): + * @user_data: + * @destroy: + * + * + * + * Since: 1.1.2 + **/ +HB_EXTERN void +hb_font_funcs_set_font_h_extents_func (hb_font_funcs_t *ffuncs, + hb_font_get_font_h_extents_func_t func, + void *user_data, hb_destroy_func_t destroy); + +/** + * hb_font_funcs_set_font_v_extents_func: + * @ffuncs: font functions. + * @func: (closure user_data) (destroy destroy) (scope notified): + * @user_data: + * @destroy: + * + * + * + * Since: 1.1.2 + **/ +HB_EXTERN void +hb_font_funcs_set_font_v_extents_func (hb_font_funcs_t *ffuncs, + hb_font_get_font_v_extents_func_t func, + void *user_data, hb_destroy_func_t destroy); + +/** + * hb_font_funcs_set_nominal_glyph_func: + * @ffuncs: font functions. + * @func: (closure user_data) (destroy destroy) (scope notified): + * @user_data: + * @destroy: + * + * + * + * Since: 1.2.3 + **/ +HB_EXTERN void +hb_font_funcs_set_nominal_glyph_func (hb_font_funcs_t *ffuncs, + hb_font_get_nominal_glyph_func_t func, + void *user_data, hb_destroy_func_t destroy); + +/** + * hb_font_funcs_set_nominal_glyphs_func: + * @ffuncs: font functions. + * @func: (closure user_data) (destroy destroy) (scope notified): + * @user_data: + * @destroy: + * + * + * + * Since: 2.0.0 + **/ +HB_EXTERN void +hb_font_funcs_set_nominal_glyphs_func (hb_font_funcs_t *ffuncs, + hb_font_get_nominal_glyphs_func_t func, + void *user_data, hb_destroy_func_t destroy); + +/** + * hb_font_funcs_set_variation_glyph_func: + * @ffuncs: font functions. + * @func: (closure user_data) (destroy destroy) (scope notified): + * @user_data: + * @destroy: + * + * + * + * Since: 1.2.3 + **/ +HB_EXTERN void +hb_font_funcs_set_variation_glyph_func (hb_font_funcs_t *ffuncs, + hb_font_get_variation_glyph_func_t func, + void *user_data, hb_destroy_func_t destroy); + +/** + * hb_font_funcs_set_glyph_h_advance_func: + * @ffuncs: font functions. + * @func: (closure user_data) (destroy destroy) (scope notified): + * @user_data: + * @destroy: + * + * + * + * Since: 0.9.2 + **/ +HB_EXTERN void +hb_font_funcs_set_glyph_h_advance_func (hb_font_funcs_t *ffuncs, + hb_font_get_glyph_h_advance_func_t func, + void *user_data, hb_destroy_func_t destroy); + +/** + * hb_font_funcs_set_glyph_v_advance_func: + * @ffuncs: font functions. + * @func: (closure user_data) (destroy destroy) (scope notified): + * @user_data: + * @destroy: + * + * + * + * Since: 0.9.2 + **/ +HB_EXTERN void +hb_font_funcs_set_glyph_v_advance_func (hb_font_funcs_t *ffuncs, + hb_font_get_glyph_v_advance_func_t func, + void *user_data, hb_destroy_func_t destroy); + +/** + * hb_font_funcs_set_glyph_h_advances_func: + * @ffuncs: font functions. + * @func: (closure user_data) (destroy destroy) (scope notified): + * @user_data: + * @destroy: + * + * + * + * Since: 1.8.6 + **/ +HB_EXTERN void +hb_font_funcs_set_glyph_h_advances_func (hb_font_funcs_t *ffuncs, + hb_font_get_glyph_h_advances_func_t func, + void *user_data, hb_destroy_func_t destroy); + +/** + * hb_font_funcs_set_glyph_v_advances_func: + * @ffuncs: font functions. + * @func: (closure user_data) (destroy destroy) (scope notified): + * @user_data: + * @destroy: + * + * + * + * Since: 1.8.6 + **/ +HB_EXTERN void +hb_font_funcs_set_glyph_v_advances_func (hb_font_funcs_t *ffuncs, + hb_font_get_glyph_v_advances_func_t func, + void *user_data, hb_destroy_func_t destroy); + +/** + * hb_font_funcs_set_glyph_h_origin_func: + * @ffuncs: font functions. + * @func: (closure user_data) (destroy destroy) (scope notified): + * @user_data: + * @destroy: + * + * + * + * Since: 0.9.2 + **/ +HB_EXTERN void +hb_font_funcs_set_glyph_h_origin_func (hb_font_funcs_t *ffuncs, + hb_font_get_glyph_h_origin_func_t func, + void *user_data, hb_destroy_func_t destroy); + +/** + * hb_font_funcs_set_glyph_v_origin_func: + * @ffuncs: font functions. + * @func: (closure user_data) (destroy destroy) (scope notified): + * @user_data: + * @destroy: + * + * + * + * Since: 0.9.2 + **/ +HB_EXTERN void +hb_font_funcs_set_glyph_v_origin_func (hb_font_funcs_t *ffuncs, + hb_font_get_glyph_v_origin_func_t func, + void *user_data, hb_destroy_func_t destroy); + +/** + * hb_font_funcs_set_glyph_h_kerning_func: + * @ffuncs: font functions. + * @func: (closure user_data) (destroy destroy) (scope notified): + * @user_data: + * @destroy: + * + * + * + * Since: 0.9.2 + **/ +HB_EXTERN void +hb_font_funcs_set_glyph_h_kerning_func (hb_font_funcs_t *ffuncs, + hb_font_get_glyph_h_kerning_func_t func, + void *user_data, hb_destroy_func_t destroy); + +/** + * hb_font_funcs_set_glyph_extents_func: + * @ffuncs: font functions. + * @func: (closure user_data) (destroy destroy) (scope notified): + * @user_data: + * @destroy: + * + * + * + * Since: 0.9.2 + **/ +HB_EXTERN void +hb_font_funcs_set_glyph_extents_func (hb_font_funcs_t *ffuncs, + hb_font_get_glyph_extents_func_t func, + void *user_data, hb_destroy_func_t destroy); + +/** + * hb_font_funcs_set_glyph_contour_point_func: + * @ffuncs: font functions. + * @func: (closure user_data) (destroy destroy) (scope notified): + * @user_data: + * @destroy: + * + * + * + * Since: 0.9.2 + **/ +HB_EXTERN void +hb_font_funcs_set_glyph_contour_point_func (hb_font_funcs_t *ffuncs, + hb_font_get_glyph_contour_point_func_t func, + void *user_data, hb_destroy_func_t destroy); + +/** + * hb_font_funcs_set_glyph_name_func: + * @ffuncs: font functions. + * @func: (closure user_data) (destroy destroy) (scope notified): + * @user_data: + * @destroy: + * + * + * + * Since: 0.9.2 + **/ +HB_EXTERN void +hb_font_funcs_set_glyph_name_func (hb_font_funcs_t *ffuncs, + hb_font_get_glyph_name_func_t func, + void *user_data, hb_destroy_func_t destroy); + +/** + * hb_font_funcs_set_glyph_from_name_func: + * @ffuncs: font functions. + * @func: (closure user_data) (destroy destroy) (scope notified): + * @user_data: + * @destroy: + * + * + * + * Since: 0.9.2 + **/ +HB_EXTERN void +hb_font_funcs_set_glyph_from_name_func (hb_font_funcs_t *ffuncs, + hb_font_get_glyph_from_name_func_t func, + void *user_data, hb_destroy_func_t destroy); + +/* func dispatch */ + +HB_EXTERN hb_bool_t +hb_font_get_h_extents (hb_font_t *font, + hb_font_extents_t *extents); +HB_EXTERN hb_bool_t +hb_font_get_v_extents (hb_font_t *font, + hb_font_extents_t *extents); + +HB_EXTERN hb_bool_t +hb_font_get_nominal_glyph (hb_font_t *font, + hb_codepoint_t unicode, + hb_codepoint_t *glyph); +HB_EXTERN hb_bool_t +hb_font_get_variation_glyph (hb_font_t *font, + hb_codepoint_t unicode, hb_codepoint_t variation_selector, + hb_codepoint_t *glyph); + +HB_EXTERN unsigned int +hb_font_get_nominal_glyphs (hb_font_t *font, + unsigned int count, + const hb_codepoint_t *first_unicode, + unsigned int unicode_stride, + hb_codepoint_t *first_glyph, + unsigned int glyph_stride); + +HB_EXTERN hb_position_t +hb_font_get_glyph_h_advance (hb_font_t *font, + hb_codepoint_t glyph); +HB_EXTERN hb_position_t +hb_font_get_glyph_v_advance (hb_font_t *font, + hb_codepoint_t glyph); + +HB_EXTERN void +hb_font_get_glyph_h_advances (hb_font_t* font, + unsigned int count, + const hb_codepoint_t *first_glyph, + unsigned glyph_stride, + hb_position_t *first_advance, + unsigned advance_stride); +HB_EXTERN void +hb_font_get_glyph_v_advances (hb_font_t* font, + unsigned int count, + const hb_codepoint_t *first_glyph, + unsigned glyph_stride, + hb_position_t *first_advance, + unsigned advance_stride); + +HB_EXTERN hb_bool_t +hb_font_get_glyph_h_origin (hb_font_t *font, + hb_codepoint_t glyph, + hb_position_t *x, hb_position_t *y); +HB_EXTERN hb_bool_t +hb_font_get_glyph_v_origin (hb_font_t *font, + hb_codepoint_t glyph, + hb_position_t *x, hb_position_t *y); + +HB_EXTERN hb_position_t +hb_font_get_glyph_h_kerning (hb_font_t *font, + hb_codepoint_t left_glyph, hb_codepoint_t right_glyph); + +HB_EXTERN hb_bool_t +hb_font_get_glyph_extents (hb_font_t *font, + hb_codepoint_t glyph, + hb_glyph_extents_t *extents); + +HB_EXTERN hb_bool_t +hb_font_get_glyph_contour_point (hb_font_t *font, + hb_codepoint_t glyph, unsigned int point_index, + hb_position_t *x, hb_position_t *y); + +HB_EXTERN hb_bool_t +hb_font_get_glyph_name (hb_font_t *font, + hb_codepoint_t glyph, + char *name, unsigned int size); +HB_EXTERN hb_bool_t +hb_font_get_glyph_from_name (hb_font_t *font, + const char *name, int len, /* -1 means nul-terminated */ + hb_codepoint_t *glyph); + + +/* high-level funcs, with fallback */ + +/* Calls either hb_font_get_nominal_glyph() if variation_selector is 0, + * otherwise calls hb_font_get_variation_glyph(). */ +HB_EXTERN hb_bool_t +hb_font_get_glyph (hb_font_t *font, + hb_codepoint_t unicode, hb_codepoint_t variation_selector, + hb_codepoint_t *glyph); + +HB_EXTERN void +hb_font_get_extents_for_direction (hb_font_t *font, + hb_direction_t direction, + hb_font_extents_t *extents); +HB_EXTERN void +hb_font_get_glyph_advance_for_direction (hb_font_t *font, + hb_codepoint_t glyph, + hb_direction_t direction, + hb_position_t *x, hb_position_t *y); +HB_EXTERN void +hb_font_get_glyph_advances_for_direction (hb_font_t* font, + hb_direction_t direction, + unsigned int count, + const hb_codepoint_t *first_glyph, + unsigned glyph_stride, + hb_position_t *first_advance, + unsigned advance_stride); +HB_EXTERN void +hb_font_get_glyph_origin_for_direction (hb_font_t *font, + hb_codepoint_t glyph, + hb_direction_t direction, + hb_position_t *x, hb_position_t *y); +HB_EXTERN void +hb_font_add_glyph_origin_for_direction (hb_font_t *font, + hb_codepoint_t glyph, + hb_direction_t direction, + hb_position_t *x, hb_position_t *y); +HB_EXTERN void +hb_font_subtract_glyph_origin_for_direction (hb_font_t *font, + hb_codepoint_t glyph, + hb_direction_t direction, + hb_position_t *x, hb_position_t *y); + +HB_EXTERN void +hb_font_get_glyph_kerning_for_direction (hb_font_t *font, + hb_codepoint_t first_glyph, hb_codepoint_t second_glyph, + hb_direction_t direction, + hb_position_t *x, hb_position_t *y); + +HB_EXTERN hb_bool_t +hb_font_get_glyph_extents_for_origin (hb_font_t *font, + hb_codepoint_t glyph, + hb_direction_t direction, + hb_glyph_extents_t *extents); + +HB_EXTERN hb_bool_t +hb_font_get_glyph_contour_point_for_origin (hb_font_t *font, + hb_codepoint_t glyph, unsigned int point_index, + hb_direction_t direction, + hb_position_t *x, hb_position_t *y); + +/* Generates gidDDD if glyph has no name. */ +HB_EXTERN void +hb_font_glyph_to_string (hb_font_t *font, + hb_codepoint_t glyph, + char *s, unsigned int size); +/* Parses gidDDD and uniUUUU strings automatically. */ +HB_EXTERN hb_bool_t +hb_font_glyph_from_string (hb_font_t *font, + const char *s, int len, /* -1 means nul-terminated */ + hb_codepoint_t *glyph); + + +/* + * hb_font_t + */ + +/* Fonts are very light-weight objects */ + +HB_EXTERN hb_font_t * +hb_font_create (hb_face_t *face); + +HB_EXTERN hb_font_t * +hb_font_create_sub_font (hb_font_t *parent); + +HB_EXTERN hb_font_t * +hb_font_get_empty (void); + +HB_EXTERN hb_font_t * +hb_font_reference (hb_font_t *font); + +HB_EXTERN void +hb_font_destroy (hb_font_t *font); + +HB_EXTERN hb_bool_t +hb_font_set_user_data (hb_font_t *font, + hb_user_data_key_t *key, + void * data, + hb_destroy_func_t destroy, + hb_bool_t replace); + + +HB_EXTERN void * +hb_font_get_user_data (hb_font_t *font, + hb_user_data_key_t *key); + +HB_EXTERN void +hb_font_make_immutable (hb_font_t *font); + +HB_EXTERN hb_bool_t +hb_font_is_immutable (hb_font_t *font); + +HB_EXTERN void +hb_font_set_parent (hb_font_t *font, + hb_font_t *parent); + +HB_EXTERN hb_font_t * +hb_font_get_parent (hb_font_t *font); + +HB_EXTERN void +hb_font_set_face (hb_font_t *font, + hb_face_t *face); + +HB_EXTERN hb_face_t * +hb_font_get_face (hb_font_t *font); + + +HB_EXTERN void +hb_font_set_funcs (hb_font_t *font, + hb_font_funcs_t *klass, + void *font_data, + hb_destroy_func_t destroy); + +/* Be *very* careful with this function! */ +HB_EXTERN void +hb_font_set_funcs_data (hb_font_t *font, + void *font_data, + hb_destroy_func_t destroy); + + +HB_EXTERN void +hb_font_set_scale (hb_font_t *font, + int x_scale, + int y_scale); + +HB_EXTERN void +hb_font_get_scale (hb_font_t *font, + int *x_scale, + int *y_scale); + +/* + * A zero value means "no hinting in that direction" + */ +HB_EXTERN void +hb_font_set_ppem (hb_font_t *font, + unsigned int x_ppem, + unsigned int y_ppem); + +HB_EXTERN void +hb_font_get_ppem (hb_font_t *font, + unsigned int *x_ppem, + unsigned int *y_ppem); + +/* + * Point size per EM. Used for optical-sizing in CoreText. + * A value of zero means "not set". + */ +HB_EXTERN void +hb_font_set_ptem (hb_font_t *font, float ptem); + +HB_EXTERN float +hb_font_get_ptem (hb_font_t *font); + +HB_EXTERN void +hb_font_set_variations (hb_font_t *font, + const hb_variation_t *variations, + unsigned int variations_length); + +HB_EXTERN void +hb_font_set_var_coords_design (hb_font_t *font, + const float *coords, + unsigned int coords_length); + +#ifdef HB_EXPERIMENTAL_API +HB_EXTERN const float * +hb_font_get_var_coords_design (hb_font_t *font, + unsigned int *length); +#endif + +HB_EXTERN void +hb_font_set_var_coords_normalized (hb_font_t *font, + const int *coords, /* 2.14 normalized */ + unsigned int coords_length); + +HB_EXTERN const int * +hb_font_get_var_coords_normalized (hb_font_t *font, + unsigned int *length); + +HB_EXTERN void +hb_font_set_var_named_instance (hb_font_t *font, + unsigned instance_index); + +#ifdef HB_EXPERIMENTAL_API +HB_EXTERN hb_bool_t +hb_font_draw_glyph (hb_font_t *font, hb_codepoint_t glyph, + const hb_draw_funcs_t *funcs, void *user_data); +#endif + +HB_END_DECLS + +#endif /* HB_FONT_H */ diff --git a/thirdparty/harfbuzz/src/hb-font.hh b/thirdparty/harfbuzz/src/hb-font.hh new file mode 100644 index 0000000000..8fc7f44d44 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-font.hh @@ -0,0 +1,632 @@ +/* + * Copyright © 2009 Red Hat, Inc. + * Copyright © 2011 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Red Hat Author(s): Behdad Esfahbod + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_FONT_HH +#define HB_FONT_HH + +#include "hb.hh" + +#include "hb-face.hh" +#include "hb-shaper.hh" + + +/* + * hb_font_funcs_t + */ + +#define HB_FONT_FUNCS_IMPLEMENT_CALLBACKS \ + HB_FONT_FUNC_IMPLEMENT (font_h_extents) \ + HB_FONT_FUNC_IMPLEMENT (font_v_extents) \ + HB_FONT_FUNC_IMPLEMENT (nominal_glyph) \ + HB_FONT_FUNC_IMPLEMENT (nominal_glyphs) \ + HB_FONT_FUNC_IMPLEMENT (variation_glyph) \ + HB_FONT_FUNC_IMPLEMENT (glyph_h_advance) \ + HB_FONT_FUNC_IMPLEMENT (glyph_v_advance) \ + HB_FONT_FUNC_IMPLEMENT (glyph_h_advances) \ + HB_FONT_FUNC_IMPLEMENT (glyph_v_advances) \ + HB_FONT_FUNC_IMPLEMENT (glyph_h_origin) \ + HB_FONT_FUNC_IMPLEMENT (glyph_v_origin) \ + HB_FONT_FUNC_IMPLEMENT (glyph_h_kerning) \ + HB_IF_NOT_DEPRECATED (HB_FONT_FUNC_IMPLEMENT (glyph_v_kerning)) \ + HB_FONT_FUNC_IMPLEMENT (glyph_extents) \ + HB_FONT_FUNC_IMPLEMENT (glyph_contour_point) \ + HB_FONT_FUNC_IMPLEMENT (glyph_name) \ + HB_FONT_FUNC_IMPLEMENT (glyph_from_name) \ + /* ^--- Add new callbacks here */ + +struct hb_font_funcs_t +{ + hb_object_header_t header; + + struct { +#define HB_FONT_FUNC_IMPLEMENT(name) void *name; + HB_FONT_FUNCS_IMPLEMENT_CALLBACKS +#undef HB_FONT_FUNC_IMPLEMENT + } user_data; + + struct { +#define HB_FONT_FUNC_IMPLEMENT(name) hb_destroy_func_t name; + HB_FONT_FUNCS_IMPLEMENT_CALLBACKS +#undef HB_FONT_FUNC_IMPLEMENT + } destroy; + + /* Don't access these directly. Call font->get_*() instead. */ + union get_t { + struct get_funcs_t { +#define HB_FONT_FUNC_IMPLEMENT(name) hb_font_get_##name##_func_t name; + HB_FONT_FUNCS_IMPLEMENT_CALLBACKS +#undef HB_FONT_FUNC_IMPLEMENT + } f; + void (*array[0 +#define HB_FONT_FUNC_IMPLEMENT(name) +1 + HB_FONT_FUNCS_IMPLEMENT_CALLBACKS +#undef HB_FONT_FUNC_IMPLEMENT + ]) (); + } get; +}; +DECLARE_NULL_INSTANCE (hb_font_funcs_t); + + +/* + * hb_font_t + */ + +#define HB_SHAPER_IMPLEMENT(shaper) HB_SHAPER_DATA_INSTANTIATE_SHAPERS(shaper, font); +#include "hb-shaper-list.hh" +#undef HB_SHAPER_IMPLEMENT + +struct hb_font_t +{ + hb_object_header_t header; + + hb_font_t *parent; + hb_face_t *face; + + int32_t x_scale; + int32_t y_scale; + int64_t x_mult; + int64_t y_mult; + + unsigned int x_ppem; + unsigned int y_ppem; + + float ptem; + + /* Font variation coordinates. */ + unsigned int num_coords; + int *coords; + float *design_coords; + + hb_font_funcs_t *klass; + void *user_data; + hb_destroy_func_t destroy; + + hb_shaper_object_dataset_t<hb_font_t> data; /* Various shaper data. */ + + + /* Convert from font-space to user-space */ + int64_t dir_mult (hb_direction_t direction) + { return HB_DIRECTION_IS_VERTICAL(direction) ? y_mult : x_mult; } + hb_position_t em_scale_x (int16_t v) { return em_mult (v, x_mult); } + hb_position_t em_scale_y (int16_t v) { return em_mult (v, y_mult); } + hb_position_t em_scalef_x (float v) { return em_scalef (v, x_scale); } + hb_position_t em_scalef_y (float v) { return em_scalef (v, y_scale); } + float em_fscale_x (int16_t v) { return em_fscale (v, x_scale); } + float em_fscale_y (int16_t v) { return em_fscale (v, y_scale); } + hb_position_t em_scale_dir (int16_t v, hb_direction_t direction) + { return em_mult (v, dir_mult (direction)); } + + /* Convert from parent-font user-space to our user-space */ + hb_position_t parent_scale_x_distance (hb_position_t v) + { + if (unlikely (parent && parent->x_scale != x_scale)) + return (hb_position_t) (v * (int64_t) this->x_scale / this->parent->x_scale); + return v; + } + hb_position_t parent_scale_y_distance (hb_position_t v) + { + if (unlikely (parent && parent->y_scale != y_scale)) + return (hb_position_t) (v * (int64_t) this->y_scale / this->parent->y_scale); + return v; + } + hb_position_t parent_scale_x_position (hb_position_t v) + { return parent_scale_x_distance (v); } + hb_position_t parent_scale_y_position (hb_position_t v) + { return parent_scale_y_distance (v); } + + void parent_scale_distance (hb_position_t *x, hb_position_t *y) + { + *x = parent_scale_x_distance (*x); + *y = parent_scale_y_distance (*y); + } + void parent_scale_position (hb_position_t *x, hb_position_t *y) + { + *x = parent_scale_x_position (*x); + *y = parent_scale_y_position (*y); + } + + + /* Public getters */ + + HB_INTERNAL bool has_func (unsigned int i); + HB_INTERNAL bool has_func_set (unsigned int i); + + /* has_* ... */ +#define HB_FONT_FUNC_IMPLEMENT(name) \ + bool \ + has_##name##_func () \ + { \ + hb_font_funcs_t *funcs = this->klass; \ + unsigned int i = offsetof (hb_font_funcs_t::get_t::get_funcs_t, name) / sizeof (funcs->get.array[0]); \ + return has_func (i); \ + } \ + bool \ + has_##name##_func_set () \ + { \ + hb_font_funcs_t *funcs = this->klass; \ + unsigned int i = offsetof (hb_font_funcs_t::get_t::get_funcs_t, name) / sizeof (funcs->get.array[0]); \ + return has_func_set (i); \ + } + HB_FONT_FUNCS_IMPLEMENT_CALLBACKS +#undef HB_FONT_FUNC_IMPLEMENT + + hb_bool_t get_font_h_extents (hb_font_extents_t *extents) + { + memset (extents, 0, sizeof (*extents)); + return klass->get.f.font_h_extents (this, user_data, + extents, + klass->user_data.font_h_extents); + } + hb_bool_t get_font_v_extents (hb_font_extents_t *extents) + { + memset (extents, 0, sizeof (*extents)); + return klass->get.f.font_v_extents (this, user_data, + extents, + klass->user_data.font_v_extents); + } + + bool has_glyph (hb_codepoint_t unicode) + { + hb_codepoint_t glyph; + return get_nominal_glyph (unicode, &glyph); + } + + hb_bool_t get_nominal_glyph (hb_codepoint_t unicode, + hb_codepoint_t *glyph) + { + *glyph = 0; + return klass->get.f.nominal_glyph (this, user_data, + unicode, glyph, + klass->user_data.nominal_glyph); + } + unsigned int get_nominal_glyphs (unsigned int count, + const hb_codepoint_t *first_unicode, + unsigned int unicode_stride, + hb_codepoint_t *first_glyph, + unsigned int glyph_stride) + { + return klass->get.f.nominal_glyphs (this, user_data, + count, + first_unicode, unicode_stride, + first_glyph, glyph_stride, + klass->user_data.nominal_glyphs); + } + + hb_bool_t get_variation_glyph (hb_codepoint_t unicode, hb_codepoint_t variation_selector, + hb_codepoint_t *glyph) + { + *glyph = 0; + return klass->get.f.variation_glyph (this, user_data, + unicode, variation_selector, glyph, + klass->user_data.variation_glyph); + } + + hb_position_t get_glyph_h_advance (hb_codepoint_t glyph) + { + return klass->get.f.glyph_h_advance (this, user_data, + glyph, + klass->user_data.glyph_h_advance); + } + + hb_position_t get_glyph_v_advance (hb_codepoint_t glyph) + { + return klass->get.f.glyph_v_advance (this, user_data, + glyph, + klass->user_data.glyph_v_advance); + } + + void get_glyph_h_advances (unsigned int count, + const hb_codepoint_t *first_glyph, + unsigned int glyph_stride, + hb_position_t *first_advance, + unsigned int advance_stride) + { + return klass->get.f.glyph_h_advances (this, user_data, + count, + first_glyph, glyph_stride, + first_advance, advance_stride, + klass->user_data.glyph_h_advances); + } + + void get_glyph_v_advances (unsigned int count, + const hb_codepoint_t *first_glyph, + unsigned int glyph_stride, + hb_position_t *first_advance, + unsigned int advance_stride) + { + return klass->get.f.glyph_v_advances (this, user_data, + count, + first_glyph, glyph_stride, + first_advance, advance_stride, + klass->user_data.glyph_v_advances); + } + + hb_bool_t get_glyph_h_origin (hb_codepoint_t glyph, + hb_position_t *x, hb_position_t *y) + { + *x = *y = 0; + return klass->get.f.glyph_h_origin (this, user_data, + glyph, x, y, + klass->user_data.glyph_h_origin); + } + + hb_bool_t get_glyph_v_origin (hb_codepoint_t glyph, + hb_position_t *x, hb_position_t *y) + { + *x = *y = 0; + return klass->get.f.glyph_v_origin (this, user_data, + glyph, x, y, + klass->user_data.glyph_v_origin); + } + + hb_position_t get_glyph_h_kerning (hb_codepoint_t left_glyph, + hb_codepoint_t right_glyph) + { +#ifdef HB_DISABLE_DEPRECATED + return 0; +#else + return klass->get.f.glyph_h_kerning (this, user_data, + left_glyph, right_glyph, + klass->user_data.glyph_h_kerning); +#endif + } + + hb_position_t get_glyph_v_kerning (hb_codepoint_t top_glyph, + hb_codepoint_t bottom_glyph) + { +#ifdef HB_DISABLE_DEPRECATED + return 0; +#else + return klass->get.f.glyph_v_kerning (this, user_data, + top_glyph, bottom_glyph, + klass->user_data.glyph_v_kerning); +#endif + } + + hb_bool_t get_glyph_extents (hb_codepoint_t glyph, + hb_glyph_extents_t *extents) + { + memset (extents, 0, sizeof (*extents)); + return klass->get.f.glyph_extents (this, user_data, + glyph, + extents, + klass->user_data.glyph_extents); + } + + hb_bool_t get_glyph_contour_point (hb_codepoint_t glyph, unsigned int point_index, + hb_position_t *x, hb_position_t *y) + { + *x = *y = 0; + return klass->get.f.glyph_contour_point (this, user_data, + glyph, point_index, + x, y, + klass->user_data.glyph_contour_point); + } + + hb_bool_t get_glyph_name (hb_codepoint_t glyph, + char *name, unsigned int size) + { + if (size) *name = '\0'; + return klass->get.f.glyph_name (this, user_data, + glyph, + name, size, + klass->user_data.glyph_name); + } + + hb_bool_t get_glyph_from_name (const char *name, int len, /* -1 means nul-terminated */ + hb_codepoint_t *glyph) + { + *glyph = 0; + if (len == -1) len = strlen (name); + return klass->get.f.glyph_from_name (this, user_data, + name, len, + glyph, + klass->user_data.glyph_from_name); + } + + + /* A bit higher-level, and with fallback */ + + void get_h_extents_with_fallback (hb_font_extents_t *extents) + { + if (!get_font_h_extents (extents)) + { + extents->ascender = y_scale * .8; + extents->descender = extents->ascender - y_scale; + extents->line_gap = 0; + } + } + void get_v_extents_with_fallback (hb_font_extents_t *extents) + { + if (!get_font_v_extents (extents)) + { + extents->ascender = x_scale / 2; + extents->descender = extents->ascender - x_scale; + extents->line_gap = 0; + } + } + + void get_extents_for_direction (hb_direction_t direction, + hb_font_extents_t *extents) + { + if (likely (HB_DIRECTION_IS_HORIZONTAL (direction))) + get_h_extents_with_fallback (extents); + else + get_v_extents_with_fallback (extents); + } + + void get_glyph_advance_for_direction (hb_codepoint_t glyph, + hb_direction_t direction, + hb_position_t *x, hb_position_t *y) + { + *x = *y = 0; + if (likely (HB_DIRECTION_IS_HORIZONTAL (direction))) + *x = get_glyph_h_advance (glyph); + else + *y = get_glyph_v_advance (glyph); + } + void get_glyph_advances_for_direction (hb_direction_t direction, + unsigned int count, + const hb_codepoint_t *first_glyph, + unsigned glyph_stride, + hb_position_t *first_advance, + unsigned advance_stride) + { + if (likely (HB_DIRECTION_IS_HORIZONTAL (direction))) + get_glyph_h_advances (count, first_glyph, glyph_stride, first_advance, advance_stride); + else + get_glyph_v_advances (count, first_glyph, glyph_stride, first_advance, advance_stride); + } + + void guess_v_origin_minus_h_origin (hb_codepoint_t glyph, + hb_position_t *x, hb_position_t *y) + { + *x = get_glyph_h_advance (glyph) / 2; + + /* TODO cache this somehow?! */ + hb_font_extents_t extents; + get_h_extents_with_fallback (&extents); + *y = extents.ascender; + } + + void get_glyph_h_origin_with_fallback (hb_codepoint_t glyph, + hb_position_t *x, hb_position_t *y) + { + if (!get_glyph_h_origin (glyph, x, y) && + get_glyph_v_origin (glyph, x, y)) + { + hb_position_t dx, dy; + guess_v_origin_minus_h_origin (glyph, &dx, &dy); + *x -= dx; *y -= dy; + } + } + void get_glyph_v_origin_with_fallback (hb_codepoint_t glyph, + hb_position_t *x, hb_position_t *y) + { + if (!get_glyph_v_origin (glyph, x, y) && + get_glyph_h_origin (glyph, x, y)) + { + hb_position_t dx, dy; + guess_v_origin_minus_h_origin (glyph, &dx, &dy); + *x += dx; *y += dy; + } + } + + void get_glyph_origin_for_direction (hb_codepoint_t glyph, + hb_direction_t direction, + hb_position_t *x, hb_position_t *y) + { + if (likely (HB_DIRECTION_IS_HORIZONTAL (direction))) + get_glyph_h_origin_with_fallback (glyph, x, y); + else + get_glyph_v_origin_with_fallback (glyph, x, y); + } + + void add_glyph_h_origin (hb_codepoint_t glyph, + hb_position_t *x, hb_position_t *y) + { + hb_position_t origin_x, origin_y; + + get_glyph_h_origin_with_fallback (glyph, &origin_x, &origin_y); + + *x += origin_x; + *y += origin_y; + } + void add_glyph_v_origin (hb_codepoint_t glyph, + hb_position_t *x, hb_position_t *y) + { + hb_position_t origin_x, origin_y; + + get_glyph_v_origin_with_fallback (glyph, &origin_x, &origin_y); + + *x += origin_x; + *y += origin_y; + } + void add_glyph_origin_for_direction (hb_codepoint_t glyph, + hb_direction_t direction, + hb_position_t *x, hb_position_t *y) + { + hb_position_t origin_x, origin_y; + + get_glyph_origin_for_direction (glyph, direction, &origin_x, &origin_y); + + *x += origin_x; + *y += origin_y; + } + + void subtract_glyph_h_origin (hb_codepoint_t glyph, + hb_position_t *x, hb_position_t *y) + { + hb_position_t origin_x, origin_y; + + get_glyph_h_origin_with_fallback (glyph, &origin_x, &origin_y); + + *x -= origin_x; + *y -= origin_y; + } + void subtract_glyph_v_origin (hb_codepoint_t glyph, + hb_position_t *x, hb_position_t *y) + { + hb_position_t origin_x, origin_y; + + get_glyph_v_origin_with_fallback (glyph, &origin_x, &origin_y); + + *x -= origin_x; + *y -= origin_y; + } + void subtract_glyph_origin_for_direction (hb_codepoint_t glyph, + hb_direction_t direction, + hb_position_t *x, hb_position_t *y) + { + hb_position_t origin_x, origin_y; + + get_glyph_origin_for_direction (glyph, direction, &origin_x, &origin_y); + + *x -= origin_x; + *y -= origin_y; + } + + void get_glyph_kerning_for_direction (hb_codepoint_t first_glyph, hb_codepoint_t second_glyph, + hb_direction_t direction, + hb_position_t *x, hb_position_t *y) + { + if (likely (HB_DIRECTION_IS_HORIZONTAL (direction))) { + *y = 0; + *x = get_glyph_h_kerning (first_glyph, second_glyph); + } else { + *x = 0; + *y = get_glyph_v_kerning (first_glyph, second_glyph); + } + } + + hb_bool_t get_glyph_extents_for_origin (hb_codepoint_t glyph, + hb_direction_t direction, + hb_glyph_extents_t *extents) + { + hb_bool_t ret = get_glyph_extents (glyph, extents); + + if (ret) + subtract_glyph_origin_for_direction (glyph, direction, &extents->x_bearing, &extents->y_bearing); + + return ret; + } + + hb_bool_t get_glyph_contour_point_for_origin (hb_codepoint_t glyph, unsigned int point_index, + hb_direction_t direction, + hb_position_t *x, hb_position_t *y) + { + hb_bool_t ret = get_glyph_contour_point (glyph, point_index, x, y); + + if (ret) + subtract_glyph_origin_for_direction (glyph, direction, x, y); + + return ret; + } + + /* Generates gidDDD if glyph has no name. */ + void + glyph_to_string (hb_codepoint_t glyph, + char *s, unsigned int size) + { + if (get_glyph_name (glyph, s, size)) return; + + if (size && snprintf (s, size, "gid%u", glyph) < 0) + *s = '\0'; + } + + /* Parses gidDDD and uniUUUU strings automatically. */ + hb_bool_t + glyph_from_string (const char *s, int len, /* -1 means nul-terminated */ + hb_codepoint_t *glyph) + { + if (get_glyph_from_name (s, len, glyph)) return true; + + if (len == -1) len = strlen (s); + + /* Straight glyph index. */ + if (hb_codepoint_parse (s, len, 10, glyph)) + return true; + + if (len > 3) + { + /* gidDDD syntax for glyph indices. */ + if (0 == strncmp (s, "gid", 3) && + hb_codepoint_parse (s + 3, len - 3, 10, glyph)) + return true; + + /* uniUUUU and other Unicode character indices. */ + hb_codepoint_t unichar; + if (0 == strncmp (s, "uni", 3) && + hb_codepoint_parse (s + 3, len - 3, 16, &unichar) && + get_nominal_glyph (unichar, glyph)) + return true; + } + + return false; + } + + void mults_changed () + { + signed upem = face->get_upem (); + x_mult = ((int64_t) x_scale << 16) / upem; + y_mult = ((int64_t) y_scale << 16) / upem; + } + + hb_position_t em_mult (int16_t v, int64_t mult) + { + return (hb_position_t) ((v * mult) >> 16); + } + hb_position_t em_scalef (float v, int scale) + { return (hb_position_t) roundf (v * scale / face->get_upem ()); } + float em_fscale (int16_t v, int scale) + { return (float) v * scale / face->get_upem (); } +}; +DECLARE_NULL_INSTANCE (hb_font_t); + + +#endif /* HB_FONT_HH */ diff --git a/thirdparty/harfbuzz/src/hb-ft.cc b/thirdparty/harfbuzz/src/hb-ft.cc new file mode 100644 index 0000000000..2680873c27 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ft.cc @@ -0,0 +1,1042 @@ +/* + * Copyright © 2009 Red Hat, Inc. + * Copyright © 2009 Keith Stribley + * Copyright © 2015 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Red Hat Author(s): Behdad Esfahbod + * Google Author(s): Behdad Esfahbod + */ + +#include "hb.hh" + +#ifdef HAVE_FREETYPE + +#include "hb-ft.h" + +#include "hb-font.hh" +#include "hb-machinery.hh" +#include "hb-cache.hh" + +#include FT_ADVANCES_H +#include FT_MULTIPLE_MASTERS_H +#include FT_TRUETYPE_TABLES_H + + +/** + * SECTION:hb-ft + * @title: hb-ft + * @short_description: FreeType integration + * @include: hb-ft.h + * + * Functions for using HarfBuzz with the FreeType library. + * + * HarfBuzz supports using FreeType to provide face and + * font data. + * + * <note>Note that FreeType is not thread-safe, therefore these + * functions are not thread-safe either.</note> + **/ + + +/* TODO: + * + * In general, this file does a fine job of what it's supposed to do. + * There are, however, things that need more work: + * + * - FreeType works in 26.6 mode. Clients can decide to use that mode, and everything + * would work fine. However, we also abuse this API for performing in font-space, + * but don't pass the correct flags to FreeType. We just abuse the no-hinting mode + * for that, such that no rounding etc happens. As such, we don't set ppem, and + * pass NO_HINTING as load_flags. Would be much better to use NO_SCALE, and scale + * ourselves. + * + * - We don't handle / allow for emboldening / obliqueing. + * + * - In the future, we should add constructors to create fonts in font space? + */ + + +struct hb_ft_font_t +{ + mutable hb_mutex_t lock; + FT_Face ft_face; + int load_flags; + bool symbol; /* Whether selected cmap is symbol cmap. */ + bool unref; /* Whether to destroy ft_face when done. */ + + mutable hb_atomic_int_t cached_x_scale; + mutable hb_advance_cache_t advance_cache; +}; + +static hb_ft_font_t * +_hb_ft_font_create (FT_Face ft_face, bool symbol, bool unref) +{ + hb_ft_font_t *ft_font = (hb_ft_font_t *) calloc (1, sizeof (hb_ft_font_t)); + if (unlikely (!ft_font)) return nullptr; + + ft_font->lock.init (); + ft_font->ft_face = ft_face; + ft_font->symbol = symbol; + ft_font->unref = unref; + + ft_font->load_flags = FT_LOAD_DEFAULT | FT_LOAD_NO_HINTING; + + ft_font->cached_x_scale.set_relaxed (0); + ft_font->advance_cache.init (); + + return ft_font; +} + +static void +_hb_ft_face_destroy (void *data) +{ + FT_Done_Face ((FT_Face) data); +} + +static void +_hb_ft_font_destroy (void *data) +{ + hb_ft_font_t *ft_font = (hb_ft_font_t *) data; + + ft_font->advance_cache.fini (); + + if (ft_font->unref) + _hb_ft_face_destroy (ft_font->ft_face); + + ft_font->lock.fini (); + + free (ft_font); +} + +/** + * hb_ft_font_set_load_flags: + * @font: #hb_font_t to work upon + * @load_flags: The FreeType load flags to set + * + * Sets the FT_Load_Glyph load flags for the specified #hb_font_t. + * + * For more information, see + * https://www.freetype.org/freetype2/docs/reference/ft2-base_interface.html#ft_load_xxx + * + * Since: 1.0.5 + **/ +void +hb_ft_font_set_load_flags (hb_font_t *font, int load_flags) +{ + if (hb_object_is_immutable (font)) + return; + + if (unlikely (font->destroy != (hb_destroy_func_t) _hb_ft_font_destroy)) + return; + + hb_ft_font_t *ft_font = (hb_ft_font_t *) font->user_data; + + ft_font->load_flags = load_flags; +} + +/** + * hb_ft_font_get_load_flags: + * @font: #hb_font_t to work upon + * + * Fetches the FT_Load_Glyph load flags of the specified #hb_font_t. + * + * For more information, see + * https://www.freetype.org/freetype2/docs/reference/ft2-base_interface.html#ft_load_xxx + * + * Return value: FT_Load_Glyph flags found + * + * Since: 1.0.5 + **/ +int +hb_ft_font_get_load_flags (hb_font_t *font) +{ + if (unlikely (font->destroy != (hb_destroy_func_t) _hb_ft_font_destroy)) + return 0; + + const hb_ft_font_t *ft_font = (const hb_ft_font_t *) font->user_data; + + return ft_font->load_flags; +} + +/** + * hb_ft_get_face: + * @font: #hb_font_t to work upon + * + * Fetches the FT_Face associated with the specified #hb_font_t + * font object. + * + * Return value: the FT_Face found + * + * Since: 0.9.2 + **/ +FT_Face +hb_ft_font_get_face (hb_font_t *font) +{ + if (unlikely (font->destroy != (hb_destroy_func_t) _hb_ft_font_destroy)) + return nullptr; + + const hb_ft_font_t *ft_font = (const hb_ft_font_t *) font->user_data; + + return ft_font->ft_face; +} + +/** + * hb_ft_font_lock_face: + * @font: + * + * + * + * Return value: + * Since: 2.6.5 + **/ +FT_Face +hb_ft_font_lock_face (hb_font_t *font) +{ + if (unlikely (font->destroy != (hb_destroy_func_t) _hb_ft_font_destroy)) + return nullptr; + + const hb_ft_font_t *ft_font = (const hb_ft_font_t *) font->user_data; + + ft_font->lock.lock (); + + return ft_font->ft_face; +} + +/** + * hb_ft_font_unlock_face: + * @font: + * + * + * + * Return value: + * Since: 2.6.5 + **/ +void +hb_ft_font_unlock_face (hb_font_t *font) +{ + if (unlikely (font->destroy != (hb_destroy_func_t) _hb_ft_font_destroy)) + return; + + const hb_ft_font_t *ft_font = (const hb_ft_font_t *) font->user_data; + + ft_font->lock.unlock (); +} + + +static hb_bool_t +hb_ft_get_nominal_glyph (hb_font_t *font HB_UNUSED, + void *font_data, + hb_codepoint_t unicode, + hb_codepoint_t *glyph, + void *user_data HB_UNUSED) +{ + const hb_ft_font_t *ft_font = (const hb_ft_font_t *) font_data; + hb_lock_t lock (ft_font->lock); + unsigned int g = FT_Get_Char_Index (ft_font->ft_face, unicode); + + if (unlikely (!g)) + { + if (unlikely (ft_font->symbol) && unicode <= 0x00FFu) + { + /* For symbol-encoded OpenType fonts, we duplicate the + * U+F000..F0FF range at U+0000..U+00FF. That's what + * Windows seems to do, and that's hinted about at: + * https://docs.microsoft.com/en-us/typography/opentype/spec/recom + * under "Non-Standard (Symbol) Fonts". */ + g = FT_Get_Char_Index (ft_font->ft_face, 0xF000u + unicode); + if (!g) + return false; + } + else + return false; + } + + *glyph = g; + return true; +} + +static unsigned int +hb_ft_get_nominal_glyphs (hb_font_t *font HB_UNUSED, + void *font_data, + unsigned int count, + const hb_codepoint_t *first_unicode, + unsigned int unicode_stride, + hb_codepoint_t *first_glyph, + unsigned int glyph_stride, + void *user_data HB_UNUSED) +{ + const hb_ft_font_t *ft_font = (const hb_ft_font_t *) font_data; + hb_lock_t lock (ft_font->lock); + unsigned int done; + for (done = 0; + done < count && (*first_glyph = FT_Get_Char_Index (ft_font->ft_face, *first_unicode)); + done++) + { + first_unicode = &StructAtOffsetUnaligned<hb_codepoint_t> (first_unicode, unicode_stride); + first_glyph = &StructAtOffsetUnaligned<hb_codepoint_t> (first_glyph, glyph_stride); + } + /* We don't need to do ft_font->symbol dance here, since HB calls the singular + * nominal_glyph() for what we don't handle here. */ + return done; +} + + +static hb_bool_t +hb_ft_get_variation_glyph (hb_font_t *font HB_UNUSED, + void *font_data, + hb_codepoint_t unicode, + hb_codepoint_t variation_selector, + hb_codepoint_t *glyph, + void *user_data HB_UNUSED) +{ + const hb_ft_font_t *ft_font = (const hb_ft_font_t *) font_data; + hb_lock_t lock (ft_font->lock); + unsigned int g = FT_Face_GetCharVariantIndex (ft_font->ft_face, unicode, variation_selector); + + if (unlikely (!g)) + return false; + + *glyph = g; + return true; +} + +static void +hb_ft_get_glyph_h_advances (hb_font_t* font, void* font_data, + unsigned count, + const hb_codepoint_t *first_glyph, + unsigned glyph_stride, + hb_position_t *first_advance, + unsigned advance_stride, + void *user_data HB_UNUSED) +{ + const hb_ft_font_t *ft_font = (const hb_ft_font_t *) font_data; + hb_lock_t lock (ft_font->lock); + FT_Face ft_face = ft_font->ft_face; + int load_flags = ft_font->load_flags; + int mult = font->x_scale < 0 ? -1 : +1; + + if (font->x_scale != ft_font->cached_x_scale.get ()) + { + ft_font->advance_cache.clear (); + ft_font->cached_x_scale.set (font->x_scale); + } + + for (unsigned int i = 0; i < count; i++) + { + FT_Fixed v = 0; + hb_codepoint_t glyph = *first_glyph; + + unsigned int cv; + if (ft_font->advance_cache.get (glyph, &cv)) + v = cv; + else + { + FT_Get_Advance (ft_face, glyph, load_flags, &v); + ft_font->advance_cache.set (glyph, v); + } + + *first_advance = (v * mult + (1<<9)) >> 10; + first_glyph = &StructAtOffsetUnaligned<hb_codepoint_t> (first_glyph, glyph_stride); + first_advance = &StructAtOffsetUnaligned<hb_position_t> (first_advance, advance_stride); + } +} + +static hb_position_t +hb_ft_get_glyph_v_advance (hb_font_t *font, + void *font_data, + hb_codepoint_t glyph, + void *user_data HB_UNUSED) +{ + const hb_ft_font_t *ft_font = (const hb_ft_font_t *) font_data; + hb_lock_t lock (ft_font->lock); + FT_Fixed v; + + if (unlikely (FT_Get_Advance (ft_font->ft_face, glyph, ft_font->load_flags | FT_LOAD_VERTICAL_LAYOUT, &v))) + return 0; + + if (font->y_scale < 0) + v = -v; + + /* Note: FreeType's vertical metrics grows downward while other FreeType coordinates + * have a Y growing upward. Hence the extra negation. */ + return (-v + (1<<9)) >> 10; +} + +static hb_bool_t +hb_ft_get_glyph_v_origin (hb_font_t *font, + void *font_data, + hb_codepoint_t glyph, + hb_position_t *x, + hb_position_t *y, + void *user_data HB_UNUSED) +{ + const hb_ft_font_t *ft_font = (const hb_ft_font_t *) font_data; + hb_lock_t lock (ft_font->lock); + FT_Face ft_face = ft_font->ft_face; + + if (unlikely (FT_Load_Glyph (ft_face, glyph, ft_font->load_flags))) + return false; + + /* Note: FreeType's vertical metrics grows downward while other FreeType coordinates + * have a Y growing upward. Hence the extra negation. */ + *x = ft_face->glyph->metrics.horiBearingX - ft_face->glyph->metrics.vertBearingX; + *y = ft_face->glyph->metrics.horiBearingY - (-ft_face->glyph->metrics.vertBearingY); + + if (font->x_scale < 0) + *x = -*x; + if (font->y_scale < 0) + *y = -*y; + + return true; +} + +#ifndef HB_NO_OT_SHAPE_FALLBACK +static hb_position_t +hb_ft_get_glyph_h_kerning (hb_font_t *font, + void *font_data, + hb_codepoint_t left_glyph, + hb_codepoint_t right_glyph, + void *user_data HB_UNUSED) +{ + const hb_ft_font_t *ft_font = (const hb_ft_font_t *) font_data; + FT_Vector kerningv; + + FT_Kerning_Mode mode = font->x_ppem ? FT_KERNING_DEFAULT : FT_KERNING_UNFITTED; + if (FT_Get_Kerning (ft_font->ft_face, left_glyph, right_glyph, mode, &kerningv)) + return 0; + + return kerningv.x; +} +#endif + +static hb_bool_t +hb_ft_get_glyph_extents (hb_font_t *font, + void *font_data, + hb_codepoint_t glyph, + hb_glyph_extents_t *extents, + void *user_data HB_UNUSED) +{ + const hb_ft_font_t *ft_font = (const hb_ft_font_t *) font_data; + hb_lock_t lock (ft_font->lock); + FT_Face ft_face = ft_font->ft_face; + + if (unlikely (FT_Load_Glyph (ft_face, glyph, ft_font->load_flags))) + return false; + + extents->x_bearing = ft_face->glyph->metrics.horiBearingX; + extents->y_bearing = ft_face->glyph->metrics.horiBearingY; + extents->width = ft_face->glyph->metrics.width; + extents->height = -ft_face->glyph->metrics.height; + if (font->x_scale < 0) + { + extents->x_bearing = -extents->x_bearing; + extents->width = -extents->width; + } + if (font->y_scale < 0) + { + extents->y_bearing = -extents->y_bearing; + extents->height = -extents->height; + } + return true; +} + +static hb_bool_t +hb_ft_get_glyph_contour_point (hb_font_t *font HB_UNUSED, + void *font_data, + hb_codepoint_t glyph, + unsigned int point_index, + hb_position_t *x, + hb_position_t *y, + void *user_data HB_UNUSED) +{ + const hb_ft_font_t *ft_font = (const hb_ft_font_t *) font_data; + hb_lock_t lock (ft_font->lock); + FT_Face ft_face = ft_font->ft_face; + + if (unlikely (FT_Load_Glyph (ft_face, glyph, ft_font->load_flags))) + return false; + + if (unlikely (ft_face->glyph->format != FT_GLYPH_FORMAT_OUTLINE)) + return false; + + if (unlikely (point_index >= (unsigned int) ft_face->glyph->outline.n_points)) + return false; + + *x = ft_face->glyph->outline.points[point_index].x; + *y = ft_face->glyph->outline.points[point_index].y; + + return true; +} + +static hb_bool_t +hb_ft_get_glyph_name (hb_font_t *font HB_UNUSED, + void *font_data, + hb_codepoint_t glyph, + char *name, unsigned int size, + void *user_data HB_UNUSED) +{ + const hb_ft_font_t *ft_font = (const hb_ft_font_t *) font_data; + hb_lock_t lock (ft_font->lock); + FT_Face ft_face = ft_font->ft_face; + + hb_bool_t ret = !FT_Get_Glyph_Name (ft_face, glyph, name, size); + if (ret && (size && !*name)) + ret = false; + + return ret; +} + +static hb_bool_t +hb_ft_get_glyph_from_name (hb_font_t *font HB_UNUSED, + void *font_data, + const char *name, int len, /* -1 means nul-terminated */ + hb_codepoint_t *glyph, + void *user_data HB_UNUSED) +{ + const hb_ft_font_t *ft_font = (const hb_ft_font_t *) font_data; + hb_lock_t lock (ft_font->lock); + FT_Face ft_face = ft_font->ft_face; + + if (len < 0) + *glyph = FT_Get_Name_Index (ft_face, (FT_String *) name); + else { + /* Make a nul-terminated version. */ + char buf[128]; + len = hb_min (len, (int) sizeof (buf) - 1); + strncpy (buf, name, len); + buf[len] = '\0'; + *glyph = FT_Get_Name_Index (ft_face, buf); + } + + if (*glyph == 0) + { + /* Check whether the given name was actually the name of glyph 0. */ + char buf[128]; + if (!FT_Get_Glyph_Name(ft_face, 0, buf, sizeof (buf)) && + len < 0 ? !strcmp (buf, name) : !strncmp (buf, name, len)) + return true; + } + + return *glyph != 0; +} + +static hb_bool_t +hb_ft_get_font_h_extents (hb_font_t *font HB_UNUSED, + void *font_data, + hb_font_extents_t *metrics, + void *user_data HB_UNUSED) +{ + const hb_ft_font_t *ft_font = (const hb_ft_font_t *) font_data; + hb_lock_t lock (ft_font->lock); + FT_Face ft_face = ft_font->ft_face; + metrics->ascender = FT_MulFix(ft_face->ascender, ft_face->size->metrics.y_scale); + metrics->descender = FT_MulFix(ft_face->descender, ft_face->size->metrics.y_scale); + metrics->line_gap = FT_MulFix( ft_face->height, ft_face->size->metrics.y_scale ) - (metrics->ascender - metrics->descender); + if (font->y_scale < 0) + { + metrics->ascender = -metrics->ascender; + metrics->descender = -metrics->descender; + metrics->line_gap = -metrics->line_gap; + } + return true; +} + +#if HB_USE_ATEXIT +static void free_static_ft_funcs (); +#endif + +static struct hb_ft_font_funcs_lazy_loader_t : hb_font_funcs_lazy_loader_t<hb_ft_font_funcs_lazy_loader_t> +{ + static hb_font_funcs_t *create () + { + hb_font_funcs_t *funcs = hb_font_funcs_create (); + + hb_font_funcs_set_font_h_extents_func (funcs, hb_ft_get_font_h_extents, nullptr, nullptr); + //hb_font_funcs_set_font_v_extents_func (funcs, hb_ft_get_font_v_extents, nullptr, nullptr); + hb_font_funcs_set_nominal_glyph_func (funcs, hb_ft_get_nominal_glyph, nullptr, nullptr); + hb_font_funcs_set_nominal_glyphs_func (funcs, hb_ft_get_nominal_glyphs, nullptr, nullptr); + hb_font_funcs_set_variation_glyph_func (funcs, hb_ft_get_variation_glyph, nullptr, nullptr); + hb_font_funcs_set_glyph_h_advances_func (funcs, hb_ft_get_glyph_h_advances, nullptr, nullptr); + hb_font_funcs_set_glyph_v_advance_func (funcs, hb_ft_get_glyph_v_advance, nullptr, nullptr); + //hb_font_funcs_set_glyph_h_origin_func (funcs, hb_ft_get_glyph_h_origin, nullptr, nullptr); + hb_font_funcs_set_glyph_v_origin_func (funcs, hb_ft_get_glyph_v_origin, nullptr, nullptr); +#ifndef HB_NO_OT_SHAPE_FALLBACK + hb_font_funcs_set_glyph_h_kerning_func (funcs, hb_ft_get_glyph_h_kerning, nullptr, nullptr); +#endif + //hb_font_funcs_set_glyph_v_kerning_func (funcs, hb_ft_get_glyph_v_kerning, nullptr, nullptr); + hb_font_funcs_set_glyph_extents_func (funcs, hb_ft_get_glyph_extents, nullptr, nullptr); + hb_font_funcs_set_glyph_contour_point_func (funcs, hb_ft_get_glyph_contour_point, nullptr, nullptr); + hb_font_funcs_set_glyph_name_func (funcs, hb_ft_get_glyph_name, nullptr, nullptr); + hb_font_funcs_set_glyph_from_name_func (funcs, hb_ft_get_glyph_from_name, nullptr, nullptr); + + hb_font_funcs_make_immutable (funcs); + +#if HB_USE_ATEXIT + atexit (free_static_ft_funcs); +#endif + + return funcs; + } +} static_ft_funcs; + +#if HB_USE_ATEXIT +static +void free_static_ft_funcs () +{ + static_ft_funcs.free_instance (); +} +#endif + +static hb_font_funcs_t * +_hb_ft_get_font_funcs () +{ + return static_ft_funcs.get_unconst (); +} + +static void +_hb_ft_font_set_funcs (hb_font_t *font, FT_Face ft_face, bool unref) +{ + bool symbol = ft_face->charmap && ft_face->charmap->encoding == FT_ENCODING_MS_SYMBOL; + + hb_ft_font_t *ft_font = _hb_ft_font_create (ft_face, symbol, unref); + if (unlikely (!ft_font)) return; + + hb_font_set_funcs (font, + _hb_ft_get_font_funcs (), + ft_font, + _hb_ft_font_destroy); +} + + +static hb_blob_t * +_hb_ft_reference_table (hb_face_t *face HB_UNUSED, hb_tag_t tag, void *user_data) +{ + FT_Face ft_face = (FT_Face) user_data; + FT_Byte *buffer; + FT_ULong length = 0; + FT_Error error; + + /* Note: FreeType like HarfBuzz uses the NONE tag for fetching the entire blob */ + + error = FT_Load_Sfnt_Table (ft_face, tag, 0, nullptr, &length); + if (error) + return nullptr; + + buffer = (FT_Byte *) malloc (length); + if (!buffer) + return nullptr; + + error = FT_Load_Sfnt_Table (ft_face, tag, 0, buffer, &length); + if (error) + { + free (buffer); + return nullptr; + } + + return hb_blob_create ((const char *) buffer, length, + HB_MEMORY_MODE_WRITABLE, + buffer, free); +} + +/** + * hb_ft_face_create: + * @ft_face: (destroy destroy) (scope notified): FT_Face to work upon + * @destroy: A callback to call when the face object is not needed anymore + * + * Creates an #hb_face_t face object from the specified FT_Face. + * + * This variant of the function does not provide any life-cycle management. + * + * Most client programs should use hb_ft_face_create_referenced() + * (or, perhaps, hb_ft_face_create_cached()) instead. + * + * If you know you have valid reasons not to use hb_ft_face_create_referenced(), + * then it is the client program's responsibility to destroy @ft_face + * after the #hb_face_t face object has been destroyed. + * + * Return value: (transfer full): the new #hb_face_t face object + * + * Since: 0.9.2 + **/ +hb_face_t * +hb_ft_face_create (FT_Face ft_face, + hb_destroy_func_t destroy) +{ + hb_face_t *face; + + if (!ft_face->stream->read) { + hb_blob_t *blob; + + blob = hb_blob_create ((const char *) ft_face->stream->base, + (unsigned int) ft_face->stream->size, + HB_MEMORY_MODE_READONLY, + ft_face, destroy); + face = hb_face_create (blob, ft_face->face_index); + hb_blob_destroy (blob); + } else { + face = hb_face_create_for_tables (_hb_ft_reference_table, ft_face, destroy); + } + + hb_face_set_index (face, ft_face->face_index); + hb_face_set_upem (face, ft_face->units_per_EM); + + return face; +} + +/** + * hb_ft_face_create_referenced: + * @ft_face: FT_Face to work upon + * + * Creates an #hb_face_t face object from the specified FT_Face. + * + * This is the preferred variant of the hb_ft_face_create* + * function family, because it calls FT_Reference_Face() on @ft_face, + * ensuring that @ft_face remains alive as long as the resulting + * #hb_face_t face object remains alive. Also calls FT_Done_Face() + * when the #hb_face_t face object is destroyed. + * + * Use this version unless you know you have good reasons not to. + * + * Return value: (transfer full): the new #hb_face_t face object + * + * Since: 0.9.38 + **/ +hb_face_t * +hb_ft_face_create_referenced (FT_Face ft_face) +{ + FT_Reference_Face (ft_face); + return hb_ft_face_create (ft_face, _hb_ft_face_destroy); +} + +static void +hb_ft_face_finalize (FT_Face ft_face) +{ + hb_face_destroy ((hb_face_t *) ft_face->generic.data); +} + +/** + * hb_ft_face_create_cached: + * @ft_face: FT_Face to work upon + * + * Creates an #hb_face_t face object from the specified FT_Face. + * + * This variant of the function caches the newly created #hb_face_t + * face object, using the @generic pointer of @ft_face. Subsequent function + * calls that are passed the same @ft_face parameter will have the same + * #hb_face_t returned to them, and that #hb_face_t will be correctly + * reference counted. + * + * However, client programs are still responsible for destroying + * @ft_face after the last #hb_face_t face object has been destroyed. + * + * Return value: (transfer full): the new #hb_face_t face object + * + * Since: 0.9.2 + **/ +hb_face_t * +hb_ft_face_create_cached (FT_Face ft_face) +{ + if (unlikely (!ft_face->generic.data || ft_face->generic.finalizer != (FT_Generic_Finalizer) hb_ft_face_finalize)) + { + if (ft_face->generic.finalizer) + ft_face->generic.finalizer (ft_face); + + ft_face->generic.data = hb_ft_face_create (ft_face, nullptr); + ft_face->generic.finalizer = (FT_Generic_Finalizer) hb_ft_face_finalize; + } + + return hb_face_reference ((hb_face_t *) ft_face->generic.data); +} + +/** + * hb_ft_font_create: + * @ft_face: (destroy destroy) (scope notified): FT_Face to work upon + * @destroy: (optional): A callback to call when the font object is not needed anymore + * + * Creates an #hb_font_t font object from the specified FT_Face. + * + * <note>Note: You must set the face size on @ft_face before calling + * hb_ft_font_create() on it. Otherwise, HarfBuzz will not pick up + * the face size.</note> + * + * This variant of the function does not provide any life-cycle management. + * + * Most client programs should use hb_ft_font_create_referenced() + * instead. + * + * If you know you have valid reasons not to use hb_ft_font_create_referenced(), + * then it is the client program's responsibility to destroy @ft_face + * after the #hb_font_t font object has been destroyed. + * + * HarfBuzz will use the @destroy callback on the #hb_font_t font object + * if it is supplied when you use this function. However, even if @destroy + * is provided, it is the client program's responsibility to destroy @ft_face, + * and it is the client program's responsibility to ensure that @ft_face is + * destroyed only after the #hb_font_t font object has been destroyed. + * + * Return value: (transfer full): the new #hb_font_t font object + * + * Since: 0.9.2 + **/ +hb_font_t * +hb_ft_font_create (FT_Face ft_face, + hb_destroy_func_t destroy) +{ + hb_font_t *font; + hb_face_t *face; + + face = hb_ft_face_create (ft_face, destroy); + font = hb_font_create (face); + hb_face_destroy (face); + _hb_ft_font_set_funcs (font, ft_face, false); + hb_ft_font_changed (font); + return font; +} + +/** + * hb_ft_font_has_changed: + * @font: #hb_font_t to work upon + * + * Refreshes the state of @font when the underlying FT_Face has changed. + * This function should be called after changing the size or + * variation-axis settings on the FT_Face. + * + * Since: 1.0.5 + **/ +void +hb_ft_font_changed (hb_font_t *font) +{ + if (font->destroy != (hb_destroy_func_t) _hb_ft_font_destroy) + return; + + hb_ft_font_t *ft_font = (hb_ft_font_t *) font->user_data; + + FT_Face ft_face = ft_font->ft_face; + + hb_font_set_scale (font, + (int) (((uint64_t) ft_face->size->metrics.x_scale * (uint64_t) ft_face->units_per_EM + (1u<<15)) >> 16), + (int) (((uint64_t) ft_face->size->metrics.y_scale * (uint64_t) ft_face->units_per_EM + (1u<<15)) >> 16)); +#if 0 /* hb-ft works in no-hinting model */ + hb_font_set_ppem (font, + ft_face->size->metrics.x_ppem, + ft_face->size->metrics.y_ppem); +#endif + +#if defined(HAVE_FT_GET_VAR_BLEND_COORDINATES) && !defined(HB_NO_VAR) + FT_MM_Var *mm_var = nullptr; + if (!FT_Get_MM_Var (ft_face, &mm_var)) + { + FT_Fixed *ft_coords = (FT_Fixed *) calloc (mm_var->num_axis, sizeof (FT_Fixed)); + int *coords = (int *) calloc (mm_var->num_axis, sizeof (int)); + if (coords && ft_coords) + { + if (!FT_Get_Var_Blend_Coordinates (ft_face, mm_var->num_axis, ft_coords)) + { + bool nonzero = false; + + for (unsigned int i = 0; i < mm_var->num_axis; ++i) + { + coords[i] = ft_coords[i] >>= 2; + nonzero = nonzero || coords[i]; + } + + if (nonzero) + hb_font_set_var_coords_normalized (font, coords, mm_var->num_axis); + else + hb_font_set_var_coords_normalized (font, nullptr, 0); + } + } + free (coords); + free (ft_coords); +#ifdef HAVE_FT_DONE_MM_VAR + FT_Done_MM_Var (ft_face->glyph->library, mm_var); +#else + free (mm_var); +#endif + } +#endif +} + +/** + * hb_ft_font_create_referenced: + * @ft_face: FT_Face to work upon + * + * Creates an #hb_font_t font object from the specified FT_Face. + * + * <note>Note: You must set the face size on @ft_face before calling + * hb_ft_font_create_references() on it. Otherwise, HarfBuzz will not pick up + * the face size.</note> + * + * This is the preferred variant of the hb_ft_font_create* + * function family, because it calls FT_Reference_Face() on @ft_face, + * ensuring that @ft_face remains alive as long as the resulting + * #hb_font_t font object remains alive. + * + * Use this version unless you know you have good reasons not to. + * + * Return value: (transfer full): the new #hb_font_t font object + * + * Since: 0.9.38 + **/ +hb_font_t * +hb_ft_font_create_referenced (FT_Face ft_face) +{ + FT_Reference_Face (ft_face); + return hb_ft_font_create (ft_face, _hb_ft_face_destroy); +} + +#if HB_USE_ATEXIT +static void free_static_ft_library (); +#endif + +static struct hb_ft_library_lazy_loader_t : hb_lazy_loader_t<hb_remove_pointer<FT_Library>, + hb_ft_library_lazy_loader_t> +{ + static FT_Library create () + { + FT_Library l; + if (FT_Init_FreeType (&l)) + return nullptr; + +#if HB_USE_ATEXIT + atexit (free_static_ft_library); +#endif + + return l; + } + static void destroy (FT_Library l) + { + FT_Done_FreeType (l); + } + static FT_Library get_null () + { + return nullptr; + } +} static_ft_library; + +#if HB_USE_ATEXIT +static +void free_static_ft_library () +{ + static_ft_library.free_instance (); +} +#endif + +static FT_Library +get_ft_library () +{ + return static_ft_library.get_unconst (); +} + +static void +_release_blob (FT_Face ft_face) +{ + hb_blob_destroy ((hb_blob_t *) ft_face->generic.data); +} + +/** + * hb_ft_font_set_funcs: + * @font: #hb_font_t to work upon + * + * Configures the font-functions structure of the specified + * #hb_font_t font object to use FreeType font functions. + * + * In particular, you can use this function to configure an + * existing #hb_face_t face object for use with FreeType font + * functions even if that #hb_face_t face object was initially + * created with hb_face_create(), and therefore was not + * initially configured to use FreeType font functions. + * + * An #hb_face_t face object created with hb_ft_face_create() + * is preconfigured for FreeType font functions and does not + * require this function to be used. + * + * <note>Note: Internally, this function creates an FT_Face. +* </note> + * + * Since: 1.0.5 + **/ +void +hb_ft_font_set_funcs (hb_font_t *font) +{ + hb_blob_t *blob = hb_face_reference_blob (font->face); + unsigned int blob_length; + const char *blob_data = hb_blob_get_data (blob, &blob_length); + if (unlikely (!blob_length)) + DEBUG_MSG (FT, font, "Font face has empty blob"); + + FT_Face ft_face = nullptr; + FT_Error err = FT_New_Memory_Face (get_ft_library (), + (const FT_Byte *) blob_data, + blob_length, + hb_face_get_index (font->face), + &ft_face); + + if (unlikely (err)) { + hb_blob_destroy (blob); + DEBUG_MSG (FT, font, "Font face FT_New_Memory_Face() failed"); + return; + } + + if (FT_Select_Charmap (ft_face, FT_ENCODING_MS_SYMBOL)) + FT_Select_Charmap (ft_face, FT_ENCODING_UNICODE); + + FT_Set_Char_Size (ft_face, + abs (font->x_scale), abs (font->y_scale), + 0, 0); +#if 0 + font->x_ppem * 72 * 64 / font->x_scale, + font->y_ppem * 72 * 64 / font->y_scale); +#endif + if (font->x_scale < 0 || font->y_scale < 0) + { + FT_Matrix matrix = { font->x_scale < 0 ? -1 : +1, 0, + 0, font->y_scale < 0 ? -1 : +1}; + FT_Set_Transform (ft_face, &matrix, nullptr); + } + +#if defined(HAVE_FT_GET_VAR_BLEND_COORDINATES) && !defined(HB_NO_VAR) + unsigned int num_coords; + const int *coords = hb_font_get_var_coords_normalized (font, &num_coords); + if (num_coords) + { + FT_Fixed *ft_coords = (FT_Fixed *) calloc (num_coords, sizeof (FT_Fixed)); + if (ft_coords) + { + for (unsigned int i = 0; i < num_coords; i++) + ft_coords[i] = coords[i] * 4; + FT_Set_Var_Blend_Coordinates (ft_face, num_coords, ft_coords); + free (ft_coords); + } + } +#endif + + ft_face->generic.data = blob; + ft_face->generic.finalizer = (FT_Generic_Finalizer) _release_blob; + + _hb_ft_font_set_funcs (font, ft_face, true); + hb_ft_font_set_load_flags (font, FT_LOAD_DEFAULT | FT_LOAD_NO_HINTING); +} + + +#endif diff --git a/thirdparty/harfbuzz/src/hb-ft.h b/thirdparty/harfbuzz/src/hb-ft.h new file mode 100644 index 0000000000..bf07115ab9 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ft.h @@ -0,0 +1,138 @@ +/* + * Copyright © 2009 Red Hat, Inc. + * Copyright © 2015 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Red Hat Author(s): Behdad Esfahbod + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_FT_H +#define HB_FT_H + +#include "hb.h" + +#include <ft2build.h> +#include FT_FREETYPE_H + +HB_BEGIN_DECLS + +/* + * Note: FreeType is not thread-safe. + * Hence, these functions are not either. + */ + +/* + * hb-face from ft-face. + */ + +/* This one creates a new hb-face for given ft-face. + * When the returned hb-face is destroyed, the destroy + * callback is called (if not NULL), with the ft-face passed + * to it. + * + * The client is responsible to make sure that ft-face is + * destroyed after hb-face is destroyed. + * + * Most often you don't want this function. You should use either + * hb_ft_face_create_cached(), or hb_ft_face_create_referenced(). + * In particular, if you are going to pass NULL as destroy, you + * probably should use (the more recent) hb_ft_face_create_referenced() + * instead. + */ +HB_EXTERN hb_face_t * +hb_ft_face_create (FT_Face ft_face, + hb_destroy_func_t destroy); + +/* This version is like hb_ft_face_create(), except that it caches + * the hb-face using the generic pointer of the ft-face. This means + * that subsequent calls to this function with the same ft-face will + * return the same hb-face (correctly referenced). + * + * Client is still responsible for making sure that ft-face is destroyed + * after hb-face is. + */ +HB_EXTERN hb_face_t * +hb_ft_face_create_cached (FT_Face ft_face); + +/* This version is like hb_ft_face_create(), except that it calls + * FT_Reference_Face() on ft-face, as such keeping ft-face alive + * as long as the hb-face is. + * + * This is the most convenient version to use. Use it unless you have + * very good reasons not to. + */ +HB_EXTERN hb_face_t * +hb_ft_face_create_referenced (FT_Face ft_face); + + +/* + * hb-font from ft-face. + */ + +/* + * Note: + * + * Set face size on ft-face before creating hb-font from it. + * Otherwise hb-ft would NOT pick up the font size correctly. + */ + +/* See notes on hb_ft_face_create(). Same issues re lifecycle-management + * apply here. Use hb_ft_font_create_referenced() if you can. */ +HB_EXTERN hb_font_t * +hb_ft_font_create (FT_Face ft_face, + hb_destroy_func_t destroy); + +/* See notes on hb_ft_face_create_referenced() re lifecycle-management + * issues. */ +HB_EXTERN hb_font_t * +hb_ft_font_create_referenced (FT_Face ft_face); + +HB_EXTERN FT_Face +hb_ft_font_get_face (hb_font_t *font); + +HB_EXTERN FT_Face +hb_ft_font_lock_face (hb_font_t *font); + +HB_EXTERN void +hb_ft_font_unlock_face (hb_font_t *font); + +HB_EXTERN void +hb_ft_font_set_load_flags (hb_font_t *font, int load_flags); + +HB_EXTERN int +hb_ft_font_get_load_flags (hb_font_t *font); + +/* Call when size or variations settings on underlying FT_Face change. */ +HB_EXTERN void +hb_ft_font_changed (hb_font_t *font); + +/* Makes an hb_font_t use FreeType internally to implement font functions. + * Note: this internally creates an FT_Face. Use it when you create your + * hb_face_t using hb_face_create(). */ +HB_EXTERN void +hb_ft_font_set_funcs (hb_font_t *font); + + +HB_END_DECLS + +#endif /* HB_FT_H */ diff --git a/thirdparty/harfbuzz/src/hb-gdi.cc b/thirdparty/harfbuzz/src/hb-gdi.cc new file mode 100644 index 0000000000..f6306ef89f --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-gdi.cc @@ -0,0 +1,73 @@ +/* + * Copyright © 2019 Ebrahim Byagowi + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + */ + +#include "hb.hh" + +#ifdef HAVE_GDI + +#include "hb-gdi.h" + +static hb_blob_t * +_hb_gdi_reference_table (hb_face_t *face HB_UNUSED, hb_tag_t tag, void *user_data) +{ + char *buffer = nullptr; + DWORD length = 0; + + HDC hdc = GetDC (nullptr); + if (unlikely (!SelectObject (hdc, (HFONT) user_data))) goto fail; + + length = GetFontData (hdc, hb_uint32_swap (tag), 0, buffer, length); + if (unlikely (length == GDI_ERROR)) goto fail_with_releasedc; + + buffer = (char *) malloc (length); + if (unlikely (!buffer)) goto fail_with_releasedc; + length = GetFontData (hdc, hb_uint32_swap (tag), 0, buffer, length); + if (unlikely (length == GDI_ERROR)) goto fail_with_releasedc_and_free; + ReleaseDC (nullptr, hdc); + + return hb_blob_create ((const char *) buffer, length, HB_MEMORY_MODE_WRITABLE, buffer, free); + +fail_with_releasedc_and_free: + free (buffer); +fail_with_releasedc: + ReleaseDC (nullptr, hdc); +fail: + return hb_blob_get_empty (); +} + +/** + * hb_gdi_face_create: + * @hfont: a HFONT object. + * + * Return value: #hb_face_t object corresponding to the given input + * + * Since: 2.6.0 + **/ +hb_face_t * +hb_gdi_face_create (HFONT hfont) +{ + return hb_face_create_for_tables (_hb_gdi_reference_table, (void *) hfont, nullptr); +} + +#endif diff --git a/thirdparty/harfbuzz/src/hb-gdi.h b/thirdparty/harfbuzz/src/hb-gdi.h new file mode 100644 index 0000000000..68cc43917e --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-gdi.h @@ -0,0 +1,39 @@ +/* + * Copyright © 2019 Ebrahim Byagowi + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + */ + +#ifndef HB_GDI_H +#define HB_GDI_H + +#include "hb.h" + +#include <windows.h> + +HB_BEGIN_DECLS + +HB_EXTERN hb_face_t * +hb_gdi_face_create (HFONT hfont); + +HB_END_DECLS + +#endif /* HB_GDI_H */ diff --git a/thirdparty/harfbuzz/src/hb-glib.cc b/thirdparty/harfbuzz/src/hb-glib.cc new file mode 100644 index 0000000000..f93bb8853c --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-glib.cc @@ -0,0 +1,307 @@ +/* + * Copyright © 2009 Red Hat, Inc. + * Copyright © 2011 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Red Hat Author(s): Behdad Esfahbod + * Google Author(s): Behdad Esfahbod + */ + +#include "hb.hh" + +#ifdef HAVE_GLIB + +#include "hb-glib.h" + +#include "hb-machinery.hh" + + +/** + * SECTION:hb-glib + * @title: hb-glib + * @short_description: GLib integration + * @include: hb-glib.h + * + * Functions for using HarfBuzz with the GLib library. + * + * HarfBuzz supports using GLib to provide Unicode data, by attaching + * GLib functions to the virtual methods in a #hb_unicode_funcs_t function + * structure. + **/ + + +/** + * hb_glib_script_to_script: + * @script: The GUnicodeScript identifier to query + * + * Fetches the #hb_script_t script that corresponds to the + * specified GUnicodeScript identifier. + * + * Return value: the #hb_script_t script found + * + * Since: 0.9.38 + **/ +hb_script_t +hb_glib_script_to_script (GUnicodeScript script) +{ + return (hb_script_t) g_unicode_script_to_iso15924 (script); +} + +/** + * hb_glib_script_from_script: + * @script: The #hb_script_t to query + * + * Fetches the GUnicodeScript identifier that corresponds to the + * specified #hb_script_t script. + * + * Return value: the GUnicodeScript identifier found + * + * Since: 0.9.38 + **/ +GUnicodeScript +hb_glib_script_from_script (hb_script_t script) +{ + return g_unicode_script_from_iso15924 (script); +} + + +static hb_unicode_combining_class_t +hb_glib_unicode_combining_class (hb_unicode_funcs_t *ufuncs HB_UNUSED, + hb_codepoint_t unicode, + void *user_data HB_UNUSED) + +{ + return (hb_unicode_combining_class_t) g_unichar_combining_class (unicode); +} + +static hb_unicode_general_category_t +hb_glib_unicode_general_category (hb_unicode_funcs_t *ufuncs HB_UNUSED, + hb_codepoint_t unicode, + void *user_data HB_UNUSED) + +{ + /* hb_unicode_general_category_t and GUnicodeType are identical */ + return (hb_unicode_general_category_t) g_unichar_type (unicode); +} + +static hb_codepoint_t +hb_glib_unicode_mirroring (hb_unicode_funcs_t *ufuncs HB_UNUSED, + hb_codepoint_t unicode, + void *user_data HB_UNUSED) +{ + g_unichar_get_mirror_char (unicode, &unicode); + return unicode; +} + +static hb_script_t +hb_glib_unicode_script (hb_unicode_funcs_t *ufuncs HB_UNUSED, + hb_codepoint_t unicode, + void *user_data HB_UNUSED) +{ + return hb_glib_script_to_script (g_unichar_get_script (unicode)); +} + +static hb_bool_t +hb_glib_unicode_compose (hb_unicode_funcs_t *ufuncs HB_UNUSED, + hb_codepoint_t a, + hb_codepoint_t b, + hb_codepoint_t *ab, + void *user_data HB_UNUSED) +{ +#if GLIB_CHECK_VERSION(2,29,12) + return g_unichar_compose (a, b, ab); +#endif + + /* We don't ifdef-out the fallback code such that compiler always + * sees it and makes sure it's compilable. */ + + gchar utf8[12]; + gchar *normalized; + int len; + hb_bool_t ret; + + len = g_unichar_to_utf8 (a, utf8); + len += g_unichar_to_utf8 (b, utf8 + len); + normalized = g_utf8_normalize (utf8, len, G_NORMALIZE_NFC); + len = g_utf8_strlen (normalized, -1); + if (unlikely (!len)) + return false; + + if (len == 1) { + *ab = g_utf8_get_char (normalized); + ret = true; + } else { + ret = false; + } + + g_free (normalized); + return ret; +} + +static hb_bool_t +hb_glib_unicode_decompose (hb_unicode_funcs_t *ufuncs HB_UNUSED, + hb_codepoint_t ab, + hb_codepoint_t *a, + hb_codepoint_t *b, + void *user_data HB_UNUSED) +{ +#if GLIB_CHECK_VERSION(2,29,12) + return g_unichar_decompose (ab, a, b); +#endif + + /* We don't ifdef-out the fallback code such that compiler always + * sees it and makes sure it's compilable. */ + + gchar utf8[6]; + gchar *normalized; + int len; + hb_bool_t ret; + + len = g_unichar_to_utf8 (ab, utf8); + normalized = g_utf8_normalize (utf8, len, G_NORMALIZE_NFD); + len = g_utf8_strlen (normalized, -1); + if (unlikely (!len)) + return false; + + if (len == 1) { + *a = g_utf8_get_char (normalized); + *b = 0; + ret = *a != ab; + } else if (len == 2) { + *a = g_utf8_get_char (normalized); + *b = g_utf8_get_char (g_utf8_next_char (normalized)); + /* Here's the ugly part: if ab decomposes to a single character and + * that character decomposes again, we have to detect that and undo + * the second part :-(. */ + gchar *recomposed = g_utf8_normalize (normalized, -1, G_NORMALIZE_NFC); + hb_codepoint_t c = g_utf8_get_char (recomposed); + if (c != ab && c != *a) { + *a = c; + *b = 0; + } + g_free (recomposed); + ret = true; + } else { + /* If decomposed to more than two characters, take the last one, + * and recompose the rest to get the first component. */ + gchar *end = g_utf8_offset_to_pointer (normalized, len - 1); + gchar *recomposed; + *b = g_utf8_get_char (end); + recomposed = g_utf8_normalize (normalized, end - normalized, G_NORMALIZE_NFC); + /* We expect that recomposed has exactly one character now. */ + *a = g_utf8_get_char (recomposed); + g_free (recomposed); + ret = true; + } + + g_free (normalized); + return ret; +} + + +#if HB_USE_ATEXIT +static void free_static_glib_funcs (); +#endif + +static struct hb_glib_unicode_funcs_lazy_loader_t : hb_unicode_funcs_lazy_loader_t<hb_glib_unicode_funcs_lazy_loader_t> +{ + static hb_unicode_funcs_t *create () + { + hb_unicode_funcs_t *funcs = hb_unicode_funcs_create (nullptr); + + hb_unicode_funcs_set_combining_class_func (funcs, hb_glib_unicode_combining_class, nullptr, nullptr); + hb_unicode_funcs_set_general_category_func (funcs, hb_glib_unicode_general_category, nullptr, nullptr); + hb_unicode_funcs_set_mirroring_func (funcs, hb_glib_unicode_mirroring, nullptr, nullptr); + hb_unicode_funcs_set_script_func (funcs, hb_glib_unicode_script, nullptr, nullptr); + hb_unicode_funcs_set_compose_func (funcs, hb_glib_unicode_compose, nullptr, nullptr); + hb_unicode_funcs_set_decompose_func (funcs, hb_glib_unicode_decompose, nullptr, nullptr); + + hb_unicode_funcs_make_immutable (funcs); + +#if HB_USE_ATEXIT + atexit (free_static_glib_funcs); +#endif + + return funcs; + } +} static_glib_funcs; + +#if HB_USE_ATEXIT +static +void free_static_glib_funcs () +{ + static_glib_funcs.free_instance (); +} +#endif + +/** + * hb_glib_get_unicode_funcs: + * + * Fetches a Unicode-functions structure that is populated + * with the appropriate GLib function for each method. + * + * Return value: (transfer none): a pointer to the #hb_unicode_funcs_t Unicode-functions structure + * + * Since: 0.9.38 + **/ +hb_unicode_funcs_t * +hb_glib_get_unicode_funcs () +{ + return static_glib_funcs.get_unconst (); +} + + + +#if GLIB_CHECK_VERSION(2,31,10) + +static void +_hb_g_bytes_unref (void *data) +{ + g_bytes_unref ((GBytes *) data); +} + +/** + * hb_glib_blob_create: + * @gbytes: the GBytes structure to work upon + * + * Creates an #hb_blob_t blob from the specified + * GBytes data structure. + * + * Return value: (transfer full): the new #hb_blob_t blob object + * + * Since: 0.9.38 + **/ +hb_blob_t * +hb_glib_blob_create (GBytes *gbytes) +{ + gsize size = 0; + gconstpointer data = g_bytes_get_data (gbytes, &size); + return hb_blob_create ((const char *) data, + size, + HB_MEMORY_MODE_READONLY, + g_bytes_ref (gbytes), + _hb_g_bytes_unref); +} +#endif + + +#endif diff --git a/thirdparty/harfbuzz/src/hb-glib.h b/thirdparty/harfbuzz/src/hb-glib.h new file mode 100644 index 0000000000..5f04183ba1 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-glib.h @@ -0,0 +1,56 @@ +/* + * Copyright © 2009 Red Hat, Inc. + * Copyright © 2011 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Red Hat Author(s): Behdad Esfahbod + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_GLIB_H +#define HB_GLIB_H + +#include "hb.h" + +#include <glib.h> + +HB_BEGIN_DECLS + + +HB_EXTERN hb_script_t +hb_glib_script_to_script (GUnicodeScript script); + +HB_EXTERN GUnicodeScript +hb_glib_script_from_script (hb_script_t script); + + +HB_EXTERN hb_unicode_funcs_t * +hb_glib_get_unicode_funcs (void); + +#if GLIB_CHECK_VERSION(2,31,10) +HB_EXTERN hb_blob_t * +hb_glib_blob_create (GBytes *gbytes); +#endif + +HB_END_DECLS + +#endif /* HB_GLIB_H */ diff --git a/thirdparty/harfbuzz/src/hb-gobject-structs.cc b/thirdparty/harfbuzz/src/hb-gobject-structs.cc new file mode 100644 index 0000000000..7c46e26400 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-gobject-structs.cc @@ -0,0 +1,110 @@ +/* + * Copyright © 2011 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#include "hb.hh" + +#ifdef HAVE_GOBJECT + + +/** + * SECTION:hb-gobject + * @title: hb-gobject + * @short_description: GObject integration support + * @include: hb-gobject.h + * + * Support for using HarfBuzz with the GObject library to provide + * type data. + * + * The types and functions listed here are solely a linkage between + * HarfBuzz's public data types and the GTypes used by the GObject framework. + * HarfBuzz uses GObject introspection to generate its Python bindings + * (and potentially other language bindings); client programs should never need + * to access the GObject-integration mechanics. + * + * For client programs using the GNOME and GTK software stack, please see the + * GLib and FreeType integration pages. + **/ + + +/* g++ didn't like older gtype.h gcc-only code path. */ +#include <glib.h> +#if !GLIB_CHECK_VERSION(2,29,16) +#undef __GNUC__ +#undef __GNUC_MINOR__ +#define __GNUC__ 2 +#define __GNUC_MINOR__ 6 +#endif + +#include "hb-gobject.h" + +#define HB_DEFINE_BOXED_TYPE(name,copy_func,free_func) \ +GType \ +hb_gobject_##name##_get_type () \ +{ \ + static gsize type_id = 0; \ + if (g_once_init_enter (&type_id)) { \ + GType id = g_boxed_type_register_static (g_intern_static_string ("hb_" #name "_t"), \ + (GBoxedCopyFunc) copy_func, \ + (GBoxedFreeFunc) free_func); \ + g_once_init_leave (&type_id, id); \ + } \ + return type_id; \ +} + +#define HB_DEFINE_OBJECT_TYPE(name) \ + HB_DEFINE_BOXED_TYPE (name, hb_##name##_reference, hb_##name##_destroy) + +#define HB_DEFINE_VALUE_TYPE(name) \ + static hb_##name##_t *_hb_##name##_reference (const hb_##name##_t *l) \ + { \ + hb_##name##_t *c = (hb_##name##_t *) calloc (1, sizeof (hb_##name##_t)); \ + if (unlikely (!c)) return nullptr; \ + *c = *l; \ + return c; \ + } \ + static void _hb_##name##_destroy (hb_##name##_t *l) { free (l); } \ + HB_DEFINE_BOXED_TYPE (name, _hb_##name##_reference, _hb_##name##_destroy) + +HB_DEFINE_OBJECT_TYPE (buffer) +HB_DEFINE_OBJECT_TYPE (blob) +HB_DEFINE_OBJECT_TYPE (face) +HB_DEFINE_OBJECT_TYPE (font) +HB_DEFINE_OBJECT_TYPE (font_funcs) +HB_DEFINE_OBJECT_TYPE (set) +HB_DEFINE_OBJECT_TYPE (map) +HB_DEFINE_OBJECT_TYPE (shape_plan) +HB_DEFINE_OBJECT_TYPE (unicode_funcs) +HB_DEFINE_VALUE_TYPE (feature) +HB_DEFINE_VALUE_TYPE (glyph_info) +HB_DEFINE_VALUE_TYPE (glyph_position) +HB_DEFINE_VALUE_TYPE (segment_properties) +HB_DEFINE_VALUE_TYPE (user_data_key) + +HB_DEFINE_VALUE_TYPE (ot_math_glyph_variant) +HB_DEFINE_VALUE_TYPE (ot_math_glyph_part) + + +#endif diff --git a/thirdparty/harfbuzz/src/hb-gobject-structs.h b/thirdparty/harfbuzz/src/hb-gobject-structs.h new file mode 100644 index 0000000000..6fad8d7019 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-gobject-structs.h @@ -0,0 +1,142 @@ +/* + * Copyright (C) 2011 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_GOBJECT_H_IN +#error "Include <hb-gobject.h> instead." +#endif + +#ifndef HB_GOBJECT_STRUCTS_H +#define HB_GOBJECT_STRUCTS_H + +#include "hb.h" + +#include <glib-object.h> + +HB_BEGIN_DECLS + + +/* Object types */ + +/** + * hb_gobject_blob_get_type: + * + * Since: 0.9.2 + **/ +HB_EXTERN GType +hb_gobject_blob_get_type (void); +#define HB_GOBJECT_TYPE_BLOB (hb_gobject_blob_get_type ()) + +/** + * hb_gobject_buffer_get_type: + * + * Since: 0.9.2 + **/ +HB_EXTERN GType +hb_gobject_buffer_get_type (void); +#define HB_GOBJECT_TYPE_BUFFER (hb_gobject_buffer_get_type ()) + +/** + * hb_gobject_face_get_type: + * + * Since: 0.9.2 + **/ +HB_EXTERN GType +hb_gobject_face_get_type (void); +#define HB_GOBJECT_TYPE_FACE (hb_gobject_face_get_type ()) + +/** + * hb_gobject_font_get_type: + * + * Since: 0.9.2 + **/ +HB_EXTERN GType +hb_gobject_font_get_type (void); +#define HB_GOBJECT_TYPE_FONT (hb_gobject_font_get_type ()) + +/** + * hb_gobject_font_funcs_get_type: + * + * Since: 0.9.2 + **/ +HB_EXTERN GType +hb_gobject_font_funcs_get_type (void); +#define HB_GOBJECT_TYPE_FONT_FUNCS (hb_gobject_font_funcs_get_type ()) + +HB_EXTERN GType +hb_gobject_set_get_type (void); +#define HB_GOBJECT_TYPE_SET (hb_gobject_set_get_type ()) + +HB_EXTERN GType +hb_gobject_map_get_type (void); +#define HB_GOBJECT_TYPE_MAP (hb_gobject_map_get_type ()) + +HB_EXTERN GType +hb_gobject_shape_plan_get_type (void); +#define HB_GOBJECT_TYPE_SHAPE_PLAN (hb_gobject_shape_plan_get_type ()) + +/** + * hb_gobject_unicode_funcs_get_type: + * + * Since: 0.9.2 + **/ +HB_EXTERN GType +hb_gobject_unicode_funcs_get_type (void); +#define HB_GOBJECT_TYPE_UNICODE_FUNCS (hb_gobject_unicode_funcs_get_type ()) + +/* Value types */ + +HB_EXTERN GType +hb_gobject_feature_get_type (void); +#define HB_GOBJECT_TYPE_FEATURE (hb_gobject_feature_get_type ()) + +HB_EXTERN GType +hb_gobject_glyph_info_get_type (void); +#define HB_GOBJECT_TYPE_GLYPH_INFO (hb_gobject_glyph_info_get_type ()) + +HB_EXTERN GType +hb_gobject_glyph_position_get_type (void); +#define HB_GOBJECT_TYPE_GLYPH_POSITION (hb_gobject_glyph_position_get_type ()) + +HB_EXTERN GType +hb_gobject_segment_properties_get_type (void); +#define HB_GOBJECT_TYPE_SEGMENT_PROPERTIES (hb_gobject_segment_properties_get_type ()) + +HB_EXTERN GType +hb_gobject_user_data_key_get_type (void); +#define HB_GOBJECT_TYPE_USER_DATA_KEY (hb_gobject_user_data_key_get_type ()) + +HB_EXTERN GType +hb_gobject_ot_math_glyph_variant_get_type (void); +#define HB_GOBJECT_TYPE_OT_MATH_GLYPH_VARIANT (hb_gobject_ot_math_glyph_variant_get_type ()) + +HB_EXTERN GType +hb_gobject_ot_math_glyph_part_get_type (void); +#define HB_GOBJECT_TYPE_OT_MATH_GLYPH_PART (hb_gobject_ot_math_glyph_part_get_type ()) + + +HB_END_DECLS + +#endif /* HB_GOBJECT_H */ diff --git a/thirdparty/harfbuzz/src/hb-gobject.h b/thirdparty/harfbuzz/src/hb-gobject.h new file mode 100644 index 0000000000..8891aa0ee7 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-gobject.h @@ -0,0 +1,40 @@ +/* + * Copyright (C) 2011 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_GOBJECT_H +#define HB_GOBJECT_H +#define HB_GOBJECT_H_IN + +#include "hb.h" + +#include "hb-gobject-enums.h" +#include "hb-gobject-structs.h" + +HB_BEGIN_DECLS +HB_END_DECLS + +#undef HB_GOBJECT_H_IN +#endif /* HB_GOBJECT_H */ diff --git a/thirdparty/harfbuzz/src/hb-graphite2.cc b/thirdparty/harfbuzz/src/hb-graphite2.cc new file mode 100644 index 0000000000..d8a72dc2f1 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-graphite2.cc @@ -0,0 +1,442 @@ +/* + * Copyright © 2011 Martin Hosken + * Copyright © 2011 SIL International + * Copyright © 2011,2012 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#include "hb.hh" + +#ifdef HAVE_GRAPHITE2 + +#include "hb-shaper-impl.hh" + +#include "hb-graphite2.h" + +#include <graphite2/Segment.h> + +#include "hb-ot-layout.h" + + +/** + * SECTION:hb-graphite2 + * @title: hb-graphite2 + * @short_description: Graphite2 integration + * @include: hb-graphite2.h + * + * Functions for using HarfBuzz with fonts that include Graphite features. + * + * For Graphite features to work, you must be sure that HarfBuzz was compiled + * with the `graphite2` shaping engine enabled. Currently, the default is to + * not enable `graphite2` shaping. + **/ + + +/* + * shaper face data + */ + +typedef struct hb_graphite2_tablelist_t +{ + struct hb_graphite2_tablelist_t *next; + hb_blob_t *blob; + unsigned int tag; +} hb_graphite2_tablelist_t; + +struct hb_graphite2_face_data_t +{ + hb_face_t *face; + gr_face *grface; + hb_atomic_ptr_t<hb_graphite2_tablelist_t> tlist; +}; + +static const void *hb_graphite2_get_table (const void *data, unsigned int tag, size_t *len) +{ + hb_graphite2_face_data_t *face_data = (hb_graphite2_face_data_t *) data; + hb_graphite2_tablelist_t *tlist = face_data->tlist; + + hb_blob_t *blob = nullptr; + + for (hb_graphite2_tablelist_t *p = tlist; p; p = p->next) + if (p->tag == tag) { + blob = p->blob; + break; + } + + if (unlikely (!blob)) + { + blob = face_data->face->reference_table (tag); + + hb_graphite2_tablelist_t *p = (hb_graphite2_tablelist_t *) calloc (1, sizeof (hb_graphite2_tablelist_t)); + if (unlikely (!p)) { + hb_blob_destroy (blob); + return nullptr; + } + p->blob = blob; + p->tag = tag; + +retry: + hb_graphite2_tablelist_t *tlist = face_data->tlist; + p->next = tlist; + + if (unlikely (!face_data->tlist.cmpexch (tlist, p))) + goto retry; + } + + unsigned int tlen; + const char *d = hb_blob_get_data (blob, &tlen); + *len = tlen; + return d; +} + +hb_graphite2_face_data_t * +_hb_graphite2_shaper_face_data_create (hb_face_t *face) +{ + hb_blob_t *silf_blob = face->reference_table (HB_GRAPHITE2_TAG_SILF); + /* Umm, we just reference the table to check whether it exists. + * Maybe add better API for this? */ + if (!hb_blob_get_length (silf_blob)) + { + hb_blob_destroy (silf_blob); + return nullptr; + } + hb_blob_destroy (silf_blob); + + hb_graphite2_face_data_t *data = (hb_graphite2_face_data_t *) calloc (1, sizeof (hb_graphite2_face_data_t)); + if (unlikely (!data)) + return nullptr; + + data->face = face; + data->grface = gr_make_face (data, &hb_graphite2_get_table, gr_face_preloadAll); + + if (unlikely (!data->grface)) { + free (data); + return nullptr; + } + + return data; +} + +void +_hb_graphite2_shaper_face_data_destroy (hb_graphite2_face_data_t *data) +{ + hb_graphite2_tablelist_t *tlist = data->tlist; + + while (tlist) + { + hb_graphite2_tablelist_t *old = tlist; + hb_blob_destroy (tlist->blob); + tlist = tlist->next; + free (old); + } + + gr_face_destroy (data->grface); + + free (data); +} + +/** + * hb_graphite2_face_get_gr_face: + * @face: @hb_face_t to query + * + * Fetches the Graphite2 gr_face corresponding to the specified + * #hb_face_t face object. + * + * Return value: the gr_face found + * + * Since: 0.9.10 + */ +gr_face * +hb_graphite2_face_get_gr_face (hb_face_t *face) +{ + const hb_graphite2_face_data_t *data = face->data.graphite2; + return data ? data->grface : nullptr; +} + + +/* + * shaper font data + */ + +struct hb_graphite2_font_data_t {}; + +hb_graphite2_font_data_t * +_hb_graphite2_shaper_font_data_create (hb_font_t *font HB_UNUSED) +{ + return (hb_graphite2_font_data_t *) HB_SHAPER_DATA_SUCCEEDED; +} + +void +_hb_graphite2_shaper_font_data_destroy (hb_graphite2_font_data_t *data HB_UNUSED) +{ +} + +#ifndef HB_DISABLE_DEPRECATED +/** + * hb_graphite2_font_get_gr_font: + * + * Since: 0.9.10 + * Deprecated: 1.4.2 + */ +gr_font * +hb_graphite2_font_get_gr_font (hb_font_t *font HB_UNUSED) +{ + return nullptr; +} +#endif + + +/* + * shaper + */ + +struct hb_graphite2_cluster_t { + unsigned int base_char; + unsigned int num_chars; + unsigned int base_glyph; + unsigned int num_glyphs; + unsigned int cluster; + unsigned int advance; +}; + +hb_bool_t +_hb_graphite2_shape (hb_shape_plan_t *shape_plan HB_UNUSED, + hb_font_t *font, + hb_buffer_t *buffer, + const hb_feature_t *features, + unsigned int num_features) +{ + hb_face_t *face = font->face; + gr_face *grface = face->data.graphite2->grface; + + const char *lang = hb_language_to_string (hb_buffer_get_language (buffer)); + const char *lang_end = lang ? strchr (lang, '-') : nullptr; + int lang_len = lang_end ? lang_end - lang : -1; + gr_feature_val *feats = gr_face_featureval_for_lang (grface, lang ? hb_tag_from_string (lang, lang_len) : 0); + + for (unsigned int i = 0; i < num_features; i++) + { + const gr_feature_ref *fref = gr_face_find_fref (grface, features[i].tag); + if (fref) + gr_fref_set_feature_value (fref, features[i].value, feats); + } + + gr_segment *seg = nullptr; + const gr_slot *is; + unsigned int ci = 0, ic = 0; + unsigned int curradvx = 0, curradvy = 0; + + unsigned int scratch_size; + hb_buffer_t::scratch_buffer_t *scratch = buffer->get_scratch_buffer (&scratch_size); + + uint32_t *chars = (uint32_t *) scratch; + + for (unsigned int i = 0; i < buffer->len; ++i) + chars[i] = buffer->info[i].codepoint; + + /* TODO ensure_native_direction. */ + + hb_tag_t script_tag[HB_OT_MAX_TAGS_PER_SCRIPT]; + unsigned int count = HB_OT_MAX_TAGS_PER_SCRIPT; + hb_ot_tags_from_script_and_language (hb_buffer_get_script (buffer), + HB_LANGUAGE_INVALID, + &count, + script_tag, + nullptr, nullptr); + + seg = gr_make_seg (nullptr, grface, + count ? script_tag[count - 1] : HB_OT_TAG_DEFAULT_SCRIPT, + feats, + gr_utf32, chars, buffer->len, + 2 | (hb_buffer_get_direction (buffer) == HB_DIRECTION_RTL ? 1 : 0)); + + if (unlikely (!seg)) { + if (feats) gr_featureval_destroy (feats); + return false; + } + + unsigned int glyph_count = gr_seg_n_slots (seg); + if (unlikely (!glyph_count)) { + if (feats) gr_featureval_destroy (feats); + gr_seg_destroy (seg); + buffer->len = 0; + return true; + } + + buffer->ensure (glyph_count); + scratch = buffer->get_scratch_buffer (&scratch_size); + while ((DIV_CEIL (sizeof (hb_graphite2_cluster_t) * buffer->len, sizeof (*scratch)) + + DIV_CEIL (sizeof (hb_codepoint_t) * glyph_count, sizeof (*scratch))) > scratch_size) + { + if (unlikely (!buffer->ensure (buffer->allocated * 2))) + { + if (feats) gr_featureval_destroy (feats); + gr_seg_destroy (seg); + return false; + } + scratch = buffer->get_scratch_buffer (&scratch_size); + } + +#define ALLOCATE_ARRAY(Type, name, len) \ + Type *name = (Type *) scratch; \ + do { \ + unsigned int _consumed = DIV_CEIL ((len) * sizeof (Type), sizeof (*scratch)); \ + assert (_consumed <= scratch_size); \ + scratch += _consumed; \ + scratch_size -= _consumed; \ + } while (0) + + ALLOCATE_ARRAY (hb_graphite2_cluster_t, clusters, buffer->len); + ALLOCATE_ARRAY (hb_codepoint_t, gids, glyph_count); + +#undef ALLOCATE_ARRAY + + memset (clusters, 0, sizeof (clusters[0]) * buffer->len); + + hb_codepoint_t *pg = gids; + clusters[0].cluster = buffer->info[0].cluster; + unsigned int upem = hb_face_get_upem (face); + float xscale = (float) font->x_scale / upem; + float yscale = (float) font->y_scale / upem; + yscale *= yscale / xscale; + unsigned int curradv = 0; + if (HB_DIRECTION_IS_BACKWARD(buffer->props.direction)) + { + curradv = gr_slot_origin_X(gr_seg_first_slot(seg)) * xscale; + clusters[0].advance = gr_seg_advance_X(seg) * xscale - curradv; + } + else + clusters[0].advance = 0; + for (is = gr_seg_first_slot (seg), ic = 0; is; is = gr_slot_next_in_segment (is), ic++) + { + unsigned int before = gr_slot_before (is); + unsigned int after = gr_slot_after (is); + *pg = gr_slot_gid (is); + pg++; + while (clusters[ci].base_char > before && ci) + { + clusters[ci-1].num_chars += clusters[ci].num_chars; + clusters[ci-1].num_glyphs += clusters[ci].num_glyphs; + clusters[ci-1].advance += clusters[ci].advance; + ci--; + } + + if (gr_slot_can_insert_before (is) && clusters[ci].num_chars && before >= clusters[ci].base_char + clusters[ci].num_chars) + { + hb_graphite2_cluster_t *c = clusters + ci + 1; + c->base_char = clusters[ci].base_char + clusters[ci].num_chars; + c->cluster = buffer->info[c->base_char].cluster; + c->num_chars = before - c->base_char; + c->base_glyph = ic; + c->num_glyphs = 0; + if (HB_DIRECTION_IS_BACKWARD(buffer->props.direction)) + { + c->advance = curradv - gr_slot_origin_X(is) * xscale; + curradv -= c->advance; + } + else + { + c->advance = 0; + clusters[ci].advance += gr_slot_origin_X(is) * xscale - curradv; + curradv += clusters[ci].advance; + } + ci++; + } + clusters[ci].num_glyphs++; + + if (clusters[ci].base_char + clusters[ci].num_chars < after + 1) + clusters[ci].num_chars = after + 1 - clusters[ci].base_char; + } + + if (HB_DIRECTION_IS_BACKWARD(buffer->props.direction)) + clusters[ci].advance += curradv; + else + clusters[ci].advance += gr_seg_advance_X(seg) * xscale - curradv; + ci++; + + for (unsigned int i = 0; i < ci; ++i) + { + for (unsigned int j = 0; j < clusters[i].num_glyphs; ++j) + { + hb_glyph_info_t *info = &buffer->info[clusters[i].base_glyph + j]; + info->codepoint = gids[clusters[i].base_glyph + j]; + info->cluster = clusters[i].cluster; + info->var1.i32 = clusters[i].advance; // all glyphs in the cluster get the same advance + } + } + buffer->len = glyph_count; + + /* Positioning. */ + unsigned int currclus = UINT_MAX; + const hb_glyph_info_t *info = buffer->info; + hb_glyph_position_t *pPos = hb_buffer_get_glyph_positions (buffer, nullptr); + if (!HB_DIRECTION_IS_BACKWARD(buffer->props.direction)) + { + curradvx = 0; + for (is = gr_seg_first_slot (seg); is; pPos++, ++info, is = gr_slot_next_in_segment (is)) + { + pPos->x_offset = gr_slot_origin_X (is) * xscale - curradvx; + pPos->y_offset = gr_slot_origin_Y (is) * yscale - curradvy; + if (info->cluster != currclus) { + pPos->x_advance = info->var1.i32; + curradvx += pPos->x_advance; + currclus = info->cluster; + } else + pPos->x_advance = 0.; + + pPos->y_advance = gr_slot_advance_Y (is, grface, nullptr) * yscale; + curradvy += pPos->y_advance; + } + } + else + { + curradvx = gr_seg_advance_X(seg) * xscale; + for (is = gr_seg_first_slot (seg); is; pPos++, info++, is = gr_slot_next_in_segment (is)) + { + if (info->cluster != currclus) + { + pPos->x_advance = info->var1.i32; + curradvx -= pPos->x_advance; + currclus = info->cluster; + } else + pPos->x_advance = 0.; + + pPos->y_advance = gr_slot_advance_Y (is, grface, nullptr) * yscale; + curradvy -= pPos->y_advance; + pPos->x_offset = gr_slot_origin_X (is) * xscale - info->var1.i32 - curradvx + pPos->x_advance; + pPos->y_offset = gr_slot_origin_Y (is) * yscale - curradvy; + } + hb_buffer_reverse_clusters (buffer); + } + + if (feats) gr_featureval_destroy (feats); + gr_seg_destroy (seg); + + buffer->unsafe_to_break_all (); + + return true; +} + + +#endif diff --git a/thirdparty/harfbuzz/src/hb-graphite2.h b/thirdparty/harfbuzz/src/hb-graphite2.h new file mode 100644 index 0000000000..f299da9f71 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-graphite2.h @@ -0,0 +1,60 @@ +/* + * Copyright © 2011 Martin Hosken + * Copyright © 2011 SIL International + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + */ + +#ifndef HB_GRAPHITE2_H +#define HB_GRAPHITE2_H + +#include "hb.h" + +#include <graphite2/Font.h> + +HB_BEGIN_DECLS + +/** + * HB_GRAPHITE2_TAG_SILF: + * + * The #hb_tag_t tag for the `Silf` table, which holds Graphite + * features. + * + * For more information, see http://graphite.sil.org/ + * + **/ +#define HB_GRAPHITE2_TAG_SILF HB_TAG('S','i','l','f') + + +HB_EXTERN gr_face * +hb_graphite2_face_get_gr_face (hb_face_t *face); + +#ifndef HB_DISABLE_DEPRECATED + +HB_EXTERN HB_DEPRECATED_FOR (hb_graphite2_face_get_gr_face) gr_font * +hb_graphite2_font_get_gr_font (hb_font_t *font); + +#endif + + +HB_END_DECLS + +#endif /* HB_GRAPHITE2_H */ diff --git a/thirdparty/harfbuzz/src/hb-icu.cc b/thirdparty/harfbuzz/src/hb-icu.cc new file mode 100644 index 0000000000..008a39e414 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-icu.cc @@ -0,0 +1,296 @@ +/* + * Copyright © 2009 Red Hat, Inc. + * Copyright © 2009 Keith Stribley + * Copyright © 2011 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Red Hat Author(s): Behdad Esfahbod + * Google Author(s): Behdad Esfahbod + */ + +#include "hb.hh" + +#ifdef HAVE_ICU + +#include "hb-icu.h" + +#include "hb-machinery.hh" + +#include <unicode/uchar.h> +#include <unicode/unorm2.h> +#include <unicode/ustring.h> +#include <unicode/utf16.h> +#include <unicode/uversion.h> + +/* ICU extra semicolon, fixed since 65, https://github.com/unicode-org/icu/commit/480bec3 */ +#if U_ICU_VERSION_MAJOR_NUM < 65 && (defined(__GNUC__) || defined(__clang__)) +#define HB_ICU_EXTRA_SEMI_IGNORED +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wextra-semi-stmt" +#endif + +/** + * SECTION:hb-icu + * @title: hb-icu + * @short_description: ICU integration + * @include: hb-icu.h + * + * Functions for using HarfBuzz with the International Components for Unicode + * (ICU) library. HarfBuzz supports using ICU to provide Unicode data, by attaching + * ICU functions to the virtual methods in a #hb_unicode_funcs_t function + * structure. + **/ + +/** + * hb_icu_script_to_script: + * @script: The UScriptCode identifier to query + * + * Fetches the #hb_script_t script that corresponds to the + * specified UScriptCode identifier. + * + * Return value: the #hb_script_t script found + * + **/ + +hb_script_t +hb_icu_script_to_script (UScriptCode script) +{ + if (unlikely (script == USCRIPT_INVALID_CODE)) + return HB_SCRIPT_INVALID; + + return hb_script_from_string (uscript_getShortName (script), -1); +} + +/** + * hb_icu_script_from_script: + * @script: The #hb_script_t script to query + * + * Fetches the UScriptCode identifier that corresponds to the + * specified #hb_script_t script. + * + * Return value: the UScriptCode identifier found + * + **/ +UScriptCode +hb_icu_script_from_script (hb_script_t script) +{ + if (unlikely (script == HB_SCRIPT_INVALID)) + return USCRIPT_INVALID_CODE; + + unsigned int numScriptCode = 1 + u_getIntPropertyMaxValue (UCHAR_SCRIPT); + for (unsigned int i = 0; i < numScriptCode; i++) + if (unlikely (hb_icu_script_to_script ((UScriptCode) i) == script)) + return (UScriptCode) i; + + return USCRIPT_UNKNOWN; +} + + +static hb_unicode_combining_class_t +hb_icu_unicode_combining_class (hb_unicode_funcs_t *ufuncs HB_UNUSED, + hb_codepoint_t unicode, + void *user_data HB_UNUSED) + +{ + return (hb_unicode_combining_class_t) u_getCombiningClass (unicode); +} + +static hb_unicode_general_category_t +hb_icu_unicode_general_category (hb_unicode_funcs_t *ufuncs HB_UNUSED, + hb_codepoint_t unicode, + void *user_data HB_UNUSED) +{ + switch (u_getIntPropertyValue(unicode, UCHAR_GENERAL_CATEGORY)) + { + case U_UNASSIGNED: return HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED; + + case U_UPPERCASE_LETTER: return HB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER; + case U_LOWERCASE_LETTER: return HB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER; + case U_TITLECASE_LETTER: return HB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER; + case U_MODIFIER_LETTER: return HB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER; + case U_OTHER_LETTER: return HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER; + + case U_NON_SPACING_MARK: return HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK; + case U_ENCLOSING_MARK: return HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK; + case U_COMBINING_SPACING_MARK: return HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK; + + case U_DECIMAL_DIGIT_NUMBER: return HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER; + case U_LETTER_NUMBER: return HB_UNICODE_GENERAL_CATEGORY_LETTER_NUMBER; + case U_OTHER_NUMBER: return HB_UNICODE_GENERAL_CATEGORY_OTHER_NUMBER; + + case U_SPACE_SEPARATOR: return HB_UNICODE_GENERAL_CATEGORY_SPACE_SEPARATOR; + case U_LINE_SEPARATOR: return HB_UNICODE_GENERAL_CATEGORY_LINE_SEPARATOR; + case U_PARAGRAPH_SEPARATOR: return HB_UNICODE_GENERAL_CATEGORY_PARAGRAPH_SEPARATOR; + + case U_CONTROL_CHAR: return HB_UNICODE_GENERAL_CATEGORY_CONTROL; + case U_FORMAT_CHAR: return HB_UNICODE_GENERAL_CATEGORY_FORMAT; + case U_PRIVATE_USE_CHAR: return HB_UNICODE_GENERAL_CATEGORY_PRIVATE_USE; + case U_SURROGATE: return HB_UNICODE_GENERAL_CATEGORY_SURROGATE; + + + case U_DASH_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_DASH_PUNCTUATION; + case U_START_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION; + case U_END_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION; + case U_CONNECTOR_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_CONNECT_PUNCTUATION; + case U_OTHER_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_OTHER_PUNCTUATION; + + case U_MATH_SYMBOL: return HB_UNICODE_GENERAL_CATEGORY_MATH_SYMBOL; + case U_CURRENCY_SYMBOL: return HB_UNICODE_GENERAL_CATEGORY_CURRENCY_SYMBOL; + case U_MODIFIER_SYMBOL: return HB_UNICODE_GENERAL_CATEGORY_MODIFIER_SYMBOL; + case U_OTHER_SYMBOL: return HB_UNICODE_GENERAL_CATEGORY_OTHER_SYMBOL; + + case U_INITIAL_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_INITIAL_PUNCTUATION; + case U_FINAL_PUNCTUATION: return HB_UNICODE_GENERAL_CATEGORY_FINAL_PUNCTUATION; + } + + return HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED; +} + +static hb_codepoint_t +hb_icu_unicode_mirroring (hb_unicode_funcs_t *ufuncs HB_UNUSED, + hb_codepoint_t unicode, + void *user_data HB_UNUSED) +{ + return u_charMirror(unicode); +} + +static hb_script_t +hb_icu_unicode_script (hb_unicode_funcs_t *ufuncs HB_UNUSED, + hb_codepoint_t unicode, + void *user_data HB_UNUSED) +{ + UErrorCode status = U_ZERO_ERROR; + UScriptCode scriptCode = uscript_getScript(unicode, &status); + + if (unlikely (U_FAILURE (status))) + return HB_SCRIPT_UNKNOWN; + + return hb_icu_script_to_script (scriptCode); +} + +static hb_bool_t +hb_icu_unicode_compose (hb_unicode_funcs_t *ufuncs HB_UNUSED, + hb_codepoint_t a, + hb_codepoint_t b, + hb_codepoint_t *ab, + void *user_data) +{ + const UNormalizer2 *normalizer = (const UNormalizer2 *) user_data; + UChar32 ret = unorm2_composePair (normalizer, a, b); + if (ret < 0) return false; + *ab = ret; + return true; +} + +static hb_bool_t +hb_icu_unicode_decompose (hb_unicode_funcs_t *ufuncs HB_UNUSED, + hb_codepoint_t ab, + hb_codepoint_t *a, + hb_codepoint_t *b, + void *user_data) +{ + const UNormalizer2 *normalizer = (const UNormalizer2 *) user_data; + UChar decomposed[4]; + int len; + UErrorCode icu_err = U_ZERO_ERROR; + len = unorm2_getRawDecomposition (normalizer, ab, decomposed, + ARRAY_LENGTH (decomposed), &icu_err); + if (U_FAILURE (icu_err) || len < 0) return false; + + len = u_countChar32 (decomposed, len); + if (len == 1) + { + U16_GET_UNSAFE (decomposed, 0, *a); + *b = 0; + return *a != ab; + } + else if (len == 2) + { + len = 0; + U16_NEXT_UNSAFE (decomposed, len, *a); + U16_NEXT_UNSAFE (decomposed, len, *b); + } + return true; +} + + +#if HB_USE_ATEXIT +static void free_static_icu_funcs (); +#endif + +static struct hb_icu_unicode_funcs_lazy_loader_t : hb_unicode_funcs_lazy_loader_t<hb_icu_unicode_funcs_lazy_loader_t> +{ + static hb_unicode_funcs_t *create () + { + void *user_data = nullptr; + UErrorCode icu_err = U_ZERO_ERROR; + user_data = (void *) unorm2_getNFCInstance (&icu_err); + assert (user_data); + + hb_unicode_funcs_t *funcs = hb_unicode_funcs_create (nullptr); + + hb_unicode_funcs_set_combining_class_func (funcs, hb_icu_unicode_combining_class, nullptr, nullptr); + hb_unicode_funcs_set_general_category_func (funcs, hb_icu_unicode_general_category, nullptr, nullptr); + hb_unicode_funcs_set_mirroring_func (funcs, hb_icu_unicode_mirroring, nullptr, nullptr); + hb_unicode_funcs_set_script_func (funcs, hb_icu_unicode_script, nullptr, nullptr); + hb_unicode_funcs_set_compose_func (funcs, hb_icu_unicode_compose, user_data, nullptr); + hb_unicode_funcs_set_decompose_func (funcs, hb_icu_unicode_decompose, user_data, nullptr); + + hb_unicode_funcs_make_immutable (funcs); + +#if HB_USE_ATEXIT + atexit (free_static_icu_funcs); +#endif + + return funcs; + } +} static_icu_funcs; + +#if HB_USE_ATEXIT +static +void free_static_icu_funcs () +{ + static_icu_funcs.free_instance (); +} +#endif + +/** + * hb_icu_get_unicode_funcs: + * + * Fetches a Unicode-functions structure that is populated + * with the appropriate ICU function for each method. + * + * Return value: (transfer none): a pointer to the #hb_unicode_funcs_t Unicode-functions structure + * + * Since: 0.9.38 + **/ +hb_unicode_funcs_t * +hb_icu_get_unicode_funcs () +{ + return static_icu_funcs.get_unconst (); +} + +#ifdef HB_ICU_EXTRA_SEMI_IGNORED +#pragma GCC diagnostic pop +#endif + +#endif diff --git a/thirdparty/harfbuzz/src/hb-icu.h b/thirdparty/harfbuzz/src/hb-icu.h new file mode 100644 index 0000000000..2db6a7b679 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-icu.h @@ -0,0 +1,52 @@ +/* + * Copyright © 2009 Red Hat, Inc. + * Copyright © 2011 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Red Hat Author(s): Behdad Esfahbod + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_ICU_H +#define HB_ICU_H + +#include "hb.h" + +#include <unicode/uscript.h> + +HB_BEGIN_DECLS + + +HB_EXTERN hb_script_t +hb_icu_script_to_script (UScriptCode script); + +HB_EXTERN UScriptCode +hb_icu_script_from_script (hb_script_t script); + + +HB_EXTERN hb_unicode_funcs_t * +hb_icu_get_unicode_funcs (void); + + +HB_END_DECLS + +#endif /* HB_ICU_H */ diff --git a/thirdparty/harfbuzz/src/hb-iter.hh b/thirdparty/harfbuzz/src/hb-iter.hh new file mode 100644 index 0000000000..981c5c218c --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-iter.hh @@ -0,0 +1,939 @@ +/* + * Copyright © 2018 Google, Inc. + * Copyright © 2019 Facebook, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + * Facebook Author(s): Behdad Esfahbod + */ + +#ifndef HB_ITER_HH +#define HB_ITER_HH + +#include "hb.hh" +#include "hb-algs.hh" +#include "hb-meta.hh" + + +/* Unified iterator object. + * + * The goal of this template is to make the same iterator interface + * available to all types, and make it very easy and compact to use. + * hb_iter_tator objects are small, light-weight, objects that can be + * copied by value. If the collection / object being iterated on + * is writable, then the iterator returns lvalues, otherwise it + * returns rvalues. + * + * TODO Document more. + * + * If iterator implementation implements operator!=, then can be + * used in range-based for loop. That comes free if the iterator + * is random-access. Otherwise, the range-based for loop incurs + * one traversal to find end(), which can be avoided if written + * as a while-style for loop, or if iterator implements a faster + * __end__() method. + * TODO When opting in for C++17, address this by changing return + * type of .end()? + */ + +/* + * Base classes for iterators. + */ + +/* Base class for all iterators. */ +template <typename iter_t, typename Item = typename iter_t::__item_t__> +struct hb_iter_t +{ + typedef Item item_t; + constexpr unsigned get_item_size () const { return hb_static_size (Item); } + static constexpr bool is_iterator = true; + static constexpr bool is_random_access_iterator = false; + static constexpr bool is_sorted_iterator = false; + + private: + /* https://en.wikipedia.org/wiki/Curiously_recurring_template_pattern */ + const iter_t* thiz () const { return static_cast<const iter_t *> (this); } + iter_t* thiz () { return static_cast< iter_t *> (this); } + public: + + /* TODO: + * Port operators below to use hb_enable_if to sniff which method implements + * an operator and use it, and remove hb_iter_fallback_mixin_t completely. */ + + /* Operators. */ + iter_t iter () const { return *thiz(); } + iter_t operator + () const { return *thiz(); } + iter_t begin () const { return *thiz(); } + iter_t end () const { return thiz()->__end__ (); } + explicit operator bool () const { return thiz()->__more__ (); } + unsigned len () const { return thiz()->__len__ (); } + /* The following can only be enabled if item_t is reference type. Otherwise + * it will be returning pointer to temporary rvalue. + * TODO Use a wrapper return type to fix for non-reference type. */ + template <typename T = item_t, + hb_enable_if (hb_is_reference (T))> + hb_remove_reference<item_t>* operator -> () const { return hb_addressof (**thiz()); } + item_t operator * () const { return thiz()->__item__ (); } + item_t operator * () { return thiz()->__item__ (); } + item_t operator [] (unsigned i) const { return thiz()->__item_at__ (i); } + item_t operator [] (unsigned i) { return thiz()->__item_at__ (i); } + iter_t& operator += (unsigned count) & { thiz()->__forward__ (count); return *thiz(); } + iter_t operator += (unsigned count) && { thiz()->__forward__ (count); return *thiz(); } + iter_t& operator ++ () & { thiz()->__next__ (); return *thiz(); } + iter_t operator ++ () && { thiz()->__next__ (); return *thiz(); } + iter_t& operator -= (unsigned count) & { thiz()->__rewind__ (count); return *thiz(); } + iter_t operator -= (unsigned count) && { thiz()->__rewind__ (count); return *thiz(); } + iter_t& operator -- () & { thiz()->__prev__ (); return *thiz(); } + iter_t operator -- () && { thiz()->__prev__ (); return *thiz(); } + iter_t operator + (unsigned count) const { auto c = thiz()->iter (); c += count; return c; } + friend iter_t operator + (unsigned count, const iter_t &it) { return it + count; } + iter_t operator ++ (int) { iter_t c (*thiz()); ++*thiz(); return c; } + iter_t operator - (unsigned count) const { auto c = thiz()->iter (); c -= count; return c; } + iter_t operator -- (int) { iter_t c (*thiz()); --*thiz(); return c; } + template <typename T> + iter_t& operator >> (T &v) & { v = **thiz(); ++*thiz(); return *thiz(); } + template <typename T> + iter_t operator >> (T &v) && { v = **thiz(); ++*thiz(); return *thiz(); } + template <typename T> + iter_t& operator << (const T v) & { **thiz() = v; ++*thiz(); return *thiz(); } + template <typename T> + iter_t operator << (const T v) && { **thiz() = v; ++*thiz(); return *thiz(); } + + protected: + hb_iter_t () = default; + hb_iter_t (const hb_iter_t &o HB_UNUSED) = default; + hb_iter_t (hb_iter_t &&o HB_UNUSED) = default; + hb_iter_t& operator = (const hb_iter_t &o HB_UNUSED) = default; + hb_iter_t& operator = (hb_iter_t &&o HB_UNUSED) = default; +}; + +#define HB_ITER_USING(Name) \ + using item_t = typename Name::item_t; \ + using Name::begin; \ + using Name::end; \ + using Name::get_item_size; \ + using Name::is_iterator; \ + using Name::iter; \ + using Name::operator bool; \ + using Name::len; \ + using Name::operator ->; \ + using Name::operator *; \ + using Name::operator []; \ + using Name::operator +=; \ + using Name::operator ++; \ + using Name::operator -=; \ + using Name::operator --; \ + using Name::operator +; \ + using Name::operator -; \ + using Name::operator >>; \ + using Name::operator <<; \ + static_assert (true, "") + +/* Returns iterator / item type of a type. */ +template <typename Iterable> +using hb_iter_type = decltype (hb_deref (hb_declval (Iterable)).iter ()); +template <typename Iterable> +using hb_item_type = decltype (*hb_deref (hb_declval (Iterable)).iter ()); + + +template <typename> struct hb_array_t; +template <typename> struct hb_sorted_array_t; + +struct +{ + template <typename T> hb_iter_type<T> + operator () (T&& c) const + { return hb_deref (hb_forward<T> (c)).iter (); } + + /* Specialization for C arrays. */ + + template <typename Type> inline hb_array_t<Type> + operator () (Type *array, unsigned int length) const + { return hb_array_t<Type> (array, length); } + + template <typename Type, unsigned int length> hb_array_t<Type> + operator () (Type (&array)[length]) const + { return hb_array_t<Type> (array, length); } + +} +HB_FUNCOBJ (hb_iter); +struct +{ + template <typename T> unsigned + operator () (T&& c) const + { return c.len (); } + +} +HB_FUNCOBJ (hb_len); + +/* Mixin to fill in what the subclass doesn't provide. */ +template <typename iter_t, typename item_t = typename iter_t::__item_t__> +struct hb_iter_fallback_mixin_t +{ + private: + /* https://en.wikipedia.org/wiki/Curiously_recurring_template_pattern */ + const iter_t* thiz () const { return static_cast<const iter_t *> (this); } + iter_t* thiz () { return static_cast< iter_t *> (this); } + public: + + /* Access: Implement __item__(), or __item_at__() if random-access. */ + item_t __item__ () const { return (*thiz())[0]; } + item_t __item_at__ (unsigned i) const { return *(*thiz() + i); } + + /* Termination: Implement __more__(), or __len__() if random-access. */ + bool __more__ () const { return bool (thiz()->len ()); } + unsigned __len__ () const + { iter_t c (*thiz()); unsigned l = 0; while (c) { c++; l++; } return l; } + + /* Advancing: Implement __next__(), or __forward__() if random-access. */ + void __next__ () { *thiz() += 1; } + void __forward__ (unsigned n) { while (*thiz() && n--) ++*thiz(); } + + /* Rewinding: Implement __prev__() or __rewind__() if bidirectional. */ + void __prev__ () { *thiz() -= 1; } + void __rewind__ (unsigned n) { while (*thiz() && n--) --*thiz(); } + + /* Range-based for: Implement __end__() if can be done faster, + * and operator!=. */ + iter_t __end__ () const + { + if (thiz()->is_random_access_iterator) + return *thiz() + thiz()->len (); + /* Above expression loops twice. Following loops once. */ + auto it = *thiz(); + while (it) ++it; + return it; + } + + protected: + hb_iter_fallback_mixin_t () = default; + hb_iter_fallback_mixin_t (const hb_iter_fallback_mixin_t &o HB_UNUSED) = default; + hb_iter_fallback_mixin_t (hb_iter_fallback_mixin_t &&o HB_UNUSED) = default; + hb_iter_fallback_mixin_t& operator = (const hb_iter_fallback_mixin_t &o HB_UNUSED) = default; + hb_iter_fallback_mixin_t& operator = (hb_iter_fallback_mixin_t &&o HB_UNUSED) = default; +}; + +template <typename iter_t, typename item_t = typename iter_t::__item_t__> +struct hb_iter_with_fallback_t : + hb_iter_t<iter_t, item_t>, + hb_iter_fallback_mixin_t<iter_t, item_t> +{ + protected: + hb_iter_with_fallback_t () = default; + hb_iter_with_fallback_t (const hb_iter_with_fallback_t &o HB_UNUSED) = default; + hb_iter_with_fallback_t (hb_iter_with_fallback_t &&o HB_UNUSED) = default; + hb_iter_with_fallback_t& operator = (const hb_iter_with_fallback_t &o HB_UNUSED) = default; + hb_iter_with_fallback_t& operator = (hb_iter_with_fallback_t &&o HB_UNUSED) = default; +}; + +/* + * Meta-programming predicates. + */ + +/* hb_is_iterator() / hb_is_iterator_of() */ + +template<typename Iter, typename Item> +struct hb_is_iterator_of +{ + template <typename Item2 = Item> + static hb_true_type impl (hb_priority<2>, hb_iter_t<Iter, hb_type_identity<Item2>> *); + static hb_false_type impl (hb_priority<0>, const void *); + + public: + static constexpr bool value = decltype (impl (hb_prioritize, hb_declval (Iter*)))::value; +}; +#define hb_is_iterator_of(Iter, Item) hb_is_iterator_of<Iter, Item>::value +#define hb_is_iterator(Iter) hb_is_iterator_of (Iter, typename Iter::item_t) + +/* hb_is_iterable() */ + +template <typename T> +struct hb_is_iterable +{ + private: + + template <typename U> + static auto impl (hb_priority<1>) -> decltype (hb_declval (U).iter (), hb_true_type ()); + + template <typename> + static hb_false_type impl (hb_priority<0>); + + public: + static constexpr bool value = decltype (impl<T> (hb_prioritize))::value; +}; +#define hb_is_iterable(Iterable) hb_is_iterable<Iterable>::value + +/* hb_is_source_of() / hb_is_sink_of() */ + +template<typename Iter, typename Item> +struct hb_is_source_of +{ + private: + template <typename Iter2 = Iter, + hb_enable_if (hb_is_convertible (typename Iter2::item_t, hb_add_lvalue_reference<hb_add_const<Item>>))> + static hb_true_type impl (hb_priority<2>); + template <typename Iter2 = Iter> + static auto impl (hb_priority<1>) -> decltype (hb_declval (Iter2) >> hb_declval (Item &), hb_true_type ()); + static hb_false_type impl (hb_priority<0>); + + public: + static constexpr bool value = decltype (impl (hb_prioritize))::value; +}; +#define hb_is_source_of(Iter, Item) hb_is_source_of<Iter, Item>::value + +template<typename Iter, typename Item> +struct hb_is_sink_of +{ + private: + template <typename Iter2 = Iter, + hb_enable_if (hb_is_convertible (typename Iter2::item_t, hb_add_lvalue_reference<Item>))> + static hb_true_type impl (hb_priority<2>); + template <typename Iter2 = Iter> + static auto impl (hb_priority<1>) -> decltype (hb_declval (Iter2) << hb_declval (Item), hb_true_type ()); + static hb_false_type impl (hb_priority<0>); + + public: + static constexpr bool value = decltype (impl (hb_prioritize))::value; +}; +#define hb_is_sink_of(Iter, Item) hb_is_sink_of<Iter, Item>::value + +/* This is commonly used, so define: */ +#define hb_is_sorted_source_of(Iter, Item) \ + (hb_is_source_of(Iter, Item) && Iter::is_sorted_iterator) + + +/* Range-based 'for' for iterables. */ + +template <typename Iterable, + hb_requires (hb_is_iterable (Iterable))> +static inline auto begin (Iterable&& iterable) HB_AUTO_RETURN (hb_iter (iterable).begin ()) + +template <typename Iterable, + hb_requires (hb_is_iterable (Iterable))> +static inline auto end (Iterable&& iterable) HB_AUTO_RETURN (hb_iter (iterable).end ()) + +/* begin()/end() are NOT looked up non-ADL. So each namespace must declare them. + * Do it for namespace OT. */ +namespace OT { + +template <typename Iterable, + hb_requires (hb_is_iterable (Iterable))> +static inline auto begin (Iterable&& iterable) HB_AUTO_RETURN (hb_iter (iterable).begin ()) + +template <typename Iterable, + hb_requires (hb_is_iterable (Iterable))> +static inline auto end (Iterable&& iterable) HB_AUTO_RETURN (hb_iter (iterable).end ()) + +} + + +/* + * Adaptors, combiners, etc. + */ + +template <typename Lhs, typename Rhs, + hb_requires (hb_is_iterator (Lhs))> +static inline auto +operator | (Lhs&& lhs, Rhs&& rhs) HB_AUTO_RETURN (hb_forward<Rhs> (rhs) (hb_forward<Lhs> (lhs))) + +/* hb_map(), hb_filter(), hb_reduce() */ + +enum class hb_function_sortedness_t { + NOT_SORTED, + RETAINS_SORTING, + SORTED, +}; + +template <typename Iter, typename Proj, hb_function_sortedness_t Sorted, + hb_requires (hb_is_iterator (Iter))> +struct hb_map_iter_t : + hb_iter_t<hb_map_iter_t<Iter, Proj, Sorted>, + decltype (hb_get (hb_declval (Proj), *hb_declval (Iter)))> +{ + hb_map_iter_t (const Iter& it, Proj f_) : it (it), f (f_) {} + + typedef decltype (hb_get (hb_declval (Proj), *hb_declval (Iter))) __item_t__; + static constexpr bool is_random_access_iterator = Iter::is_random_access_iterator; + static constexpr bool is_sorted_iterator = + Sorted == hb_function_sortedness_t::SORTED ? true : + Sorted == hb_function_sortedness_t::RETAINS_SORTING ? Iter::is_sorted_iterator : + false; + __item_t__ __item__ () const { return hb_get (f.get (), *it); } + __item_t__ __item_at__ (unsigned i) const { return hb_get (f.get (), it[i]); } + bool __more__ () const { return bool (it); } + unsigned __len__ () const { return it.len (); } + void __next__ () { ++it; } + void __forward__ (unsigned n) { it += n; } + void __prev__ () { --it; } + void __rewind__ (unsigned n) { it -= n; } + hb_map_iter_t __end__ () const { return hb_map_iter_t (it.end (), f); } + bool operator != (const hb_map_iter_t& o) const + { return it != o.it; } + + private: + Iter it; + hb_reference_wrapper<Proj> f; +}; + +template <typename Proj, hb_function_sortedness_t Sorted> +struct hb_map_iter_factory_t +{ + hb_map_iter_factory_t (Proj f) : f (f) {} + + template <typename Iter, + hb_requires (hb_is_iterator (Iter))> + hb_map_iter_t<Iter, Proj, Sorted> + operator () (Iter it) + { return hb_map_iter_t<Iter, Proj, Sorted> (it, f); } + + private: + Proj f; +}; +struct +{ + template <typename Proj> + hb_map_iter_factory_t<Proj, hb_function_sortedness_t::NOT_SORTED> + operator () (Proj&& f) const + { return hb_map_iter_factory_t<Proj, hb_function_sortedness_t::NOT_SORTED> (f); } +} +HB_FUNCOBJ (hb_map); +struct +{ + template <typename Proj> + hb_map_iter_factory_t<Proj, hb_function_sortedness_t::RETAINS_SORTING> + operator () (Proj&& f) const + { return hb_map_iter_factory_t<Proj, hb_function_sortedness_t::RETAINS_SORTING> (f); } +} +HB_FUNCOBJ (hb_map_retains_sorting); +struct +{ + template <typename Proj> + hb_map_iter_factory_t<Proj, hb_function_sortedness_t::SORTED> + operator () (Proj&& f) const + { return hb_map_iter_factory_t<Proj, hb_function_sortedness_t::SORTED> (f); } +} +HB_FUNCOBJ (hb_map_sorted); + +template <typename Iter, typename Pred, typename Proj, + hb_requires (hb_is_iterator (Iter))> +struct hb_filter_iter_t : + hb_iter_with_fallback_t<hb_filter_iter_t<Iter, Pred, Proj>, + typename Iter::item_t> +{ + hb_filter_iter_t (const Iter& it_, Pred p_, Proj f_) : it (it_), p (p_), f (f_) + { while (it && !hb_has (p.get (), hb_get (f.get (), *it))) ++it; } + + typedef typename Iter::item_t __item_t__; + static constexpr bool is_sorted_iterator = Iter::is_sorted_iterator; + __item_t__ __item__ () const { return *it; } + bool __more__ () const { return bool (it); } + void __next__ () { do ++it; while (it && !hb_has (p.get (), hb_get (f.get (), *it))); } + void __prev__ () { do --it; while (it && !hb_has (p.get (), hb_get (f.get (), *it))); } + hb_filter_iter_t __end__ () const { return hb_filter_iter_t (it.end (), p, f); } + bool operator != (const hb_filter_iter_t& o) const + { return it != o.it; } + + private: + Iter it; + hb_reference_wrapper<Pred> p; + hb_reference_wrapper<Proj> f; +}; +template <typename Pred, typename Proj> +struct hb_filter_iter_factory_t +{ + hb_filter_iter_factory_t (Pred p, Proj f) : p (p), f (f) {} + + template <typename Iter, + hb_requires (hb_is_iterator (Iter))> + hb_filter_iter_t<Iter, Pred, Proj> + operator () (Iter it) + { return hb_filter_iter_t<Iter, Pred, Proj> (it, p, f); } + + private: + Pred p; + Proj f; +}; +struct +{ + template <typename Pred = decltype ((hb_identity)), + typename Proj = decltype ((hb_identity))> + hb_filter_iter_factory_t<Pred, Proj> + operator () (Pred&& p = hb_identity, Proj&& f = hb_identity) const + { return hb_filter_iter_factory_t<Pred, Proj> (p, f); } +} +HB_FUNCOBJ (hb_filter); + +template <typename Redu, typename InitT> +struct hb_reduce_t +{ + hb_reduce_t (Redu r, InitT init_value) : r (r), init_value (init_value) {} + + template <typename Iter, + hb_requires (hb_is_iterator (Iter)), + typename AccuT = hb_decay<decltype (hb_declval (Redu) (hb_declval (InitT), hb_declval (typename Iter::item_t)))>> + AccuT + operator () (Iter it) + { + AccuT value = init_value; + for (; it; ++it) + value = r (value, *it); + return value; + } + + private: + Redu r; + InitT init_value; +}; +struct +{ + template <typename Redu, typename InitT> + hb_reduce_t<Redu, InitT> + operator () (Redu&& r, InitT init_value) const + { return hb_reduce_t<Redu, InitT> (r, init_value); } +} +HB_FUNCOBJ (hb_reduce); + + +/* hb_zip() */ + +template <typename A, typename B> +struct hb_zip_iter_t : + hb_iter_t<hb_zip_iter_t<A, B>, + hb_pair_t<typename A::item_t, typename B::item_t>> +{ + hb_zip_iter_t () {} + hb_zip_iter_t (const A& a, const B& b) : a (a), b (b) {} + + typedef hb_pair_t<typename A::item_t, typename B::item_t> __item_t__; + static constexpr bool is_random_access_iterator = + A::is_random_access_iterator && + B::is_random_access_iterator; + /* Note. The following categorization is only valid if A is strictly sorted, + * ie. does NOT have duplicates. Previously I tried to categorize sortedness + * more granularly, see commits: + * + * 513762849a683914fc266a17ddf38f133cccf072 + * 4d3cf2adb669c345cc43832d11689271995e160a + * + * However, that was not enough, since hb_sorted_array_t, hb_sorted_vector_t, + * SortedArrayOf, etc all needed to be updated to add more variants. At that + * point I saw it not worth the effort, and instead we now deem all sorted + * collections as essentially strictly-sorted for the purposes of zip. + * + * The above assumption is not as bad as it sounds. Our "sorted" comes with + * no guarantees. It's just a contract, put in place to help you remember, + * and think about, whether an iterator you receive is expected to be + * sorted or not. As such, it's not perfect by definition, and should not + * be treated so. The inaccuracy here just errs in the direction of being + * more permissive, so your code compiles instead of erring on the side of + * marking your zipped iterator unsorted in which case your code won't + * compile. + * + * This semantical limitation does NOT affect logic in any other place I + * know of as of this writing. + */ + static constexpr bool is_sorted_iterator = A::is_sorted_iterator; + + __item_t__ __item__ () const { return __item_t__ (*a, *b); } + __item_t__ __item_at__ (unsigned i) const { return __item_t__ (a[i], b[i]); } + bool __more__ () const { return bool (a) && bool (b); } + unsigned __len__ () const { return hb_min (a.len (), b.len ()); } + void __next__ () { ++a; ++b; } + void __forward__ (unsigned n) { a += n; b += n; } + void __prev__ () { --a; --b; } + void __rewind__ (unsigned n) { a -= n; b -= n; } + hb_zip_iter_t __end__ () const { return hb_zip_iter_t (a.end (), b.end ()); } + /* Note, we should stop if ANY of the iters reaches end. As such two compare + * unequal if both items are unequal, NOT if either is unequal. */ + bool operator != (const hb_zip_iter_t& o) const + { return a != o.a && b != o.b; } + + private: + A a; + B b; +}; +struct +{ HB_PARTIALIZE(2); + template <typename A, typename B, + hb_requires (hb_is_iterable (A) && hb_is_iterable (B))> + hb_zip_iter_t<hb_iter_type<A>, hb_iter_type<B>> + operator () (A&& a, B&& b) const + { return hb_zip_iter_t<hb_iter_type<A>, hb_iter_type<B>> (hb_iter (a), hb_iter (b)); } +} +HB_FUNCOBJ (hb_zip); + +/* hb_apply() */ + +template <typename Appl> +struct hb_apply_t +{ + hb_apply_t (Appl a) : a (a) {} + + template <typename Iter, + hb_requires (hb_is_iterator (Iter))> + void operator () (Iter it) + { + for (; it; ++it) + (void) hb_invoke (a, *it); + } + + private: + Appl a; +}; +struct +{ + template <typename Appl> hb_apply_t<Appl> + operator () (Appl&& a) const + { return hb_apply_t<Appl> (a); } + + template <typename Appl> hb_apply_t<Appl&> + operator () (Appl *a) const + { return hb_apply_t<Appl&> (*a); } +} +HB_FUNCOBJ (hb_apply); + +/* hb_range()/hb_iota()/hb_repeat() */ + +template <typename T, typename S> +struct hb_range_iter_t : + hb_iter_t<hb_range_iter_t<T, S>, T> +{ + hb_range_iter_t (T start, T end_, S step) : v (start), end_ (end_for (start, end_, step)), step (step) {} + + typedef T __item_t__; + static constexpr bool is_random_access_iterator = true; + static constexpr bool is_sorted_iterator = true; + __item_t__ __item__ () const { return hb_ridentity (v); } + __item_t__ __item_at__ (unsigned j) const { return v + j * step; } + bool __more__ () const { return v != end_; } + unsigned __len__ () const { return !step ? UINT_MAX : (end_ - v) / step; } + void __next__ () { v += step; } + void __forward__ (unsigned n) { v += n * step; } + void __prev__ () { v -= step; } + void __rewind__ (unsigned n) { v -= n * step; } + hb_range_iter_t __end__ () const { return hb_range_iter_t (end_, end_, step); } + bool operator != (const hb_range_iter_t& o) const + { return v != o.v; } + + private: + static inline T end_for (T start, T end_, S step) + { + if (!step) + return end_; + auto res = (end_ - start) % step; + if (!res) + return end_; + end_ += step - res; + return end_; + } + + private: + T v; + T end_; + S step; +}; +struct +{ + template <typename T = unsigned> hb_range_iter_t<T, unsigned> + operator () (T end = (unsigned) -1) const + { return hb_range_iter_t<T, unsigned> (0, end, 1u); } + + template <typename T, typename S = unsigned> hb_range_iter_t<T, S> + operator () (T start, T end, S step = 1u) const + { return hb_range_iter_t<T, S> (start, end, step); } +} +HB_FUNCOBJ (hb_range); + +template <typename T, typename S> +struct hb_iota_iter_t : + hb_iter_with_fallback_t<hb_iota_iter_t<T, S>, T> +{ + hb_iota_iter_t (T start, S step) : v (start), step (step) {} + + private: + + template <typename S2 = S> + auto + inc (hb_type_identity<S2> s, hb_priority<1>) + -> hb_void_t<decltype (hb_invoke (hb_forward<S2> (s), hb_declval<T&> ()))> + { v = hb_invoke (hb_forward<S2> (s), v); } + + void + inc (S s, hb_priority<0>) + { v += s; } + + public: + + typedef T __item_t__; + static constexpr bool is_random_access_iterator = true; + static constexpr bool is_sorted_iterator = true; + __item_t__ __item__ () const { return hb_ridentity (v); } + bool __more__ () const { return true; } + unsigned __len__ () const { return UINT_MAX; } + void __next__ () { inc (step, hb_prioritize); } + void __prev__ () { v -= step; } + hb_iota_iter_t __end__ () const { return *this; } + bool operator != (const hb_iota_iter_t& o) const { return true; } + + private: + T v; + S step; +}; +struct +{ + template <typename T = unsigned, typename S = unsigned> hb_iota_iter_t<T, S> + operator () (T start = 0u, S step = 1u) const + { return hb_iota_iter_t<T, S> (start, step); } +} +HB_FUNCOBJ (hb_iota); + +template <typename T> +struct hb_repeat_iter_t : + hb_iter_t<hb_repeat_iter_t<T>, T> +{ + hb_repeat_iter_t (T value) : v (value) {} + + typedef T __item_t__; + static constexpr bool is_random_access_iterator = true; + static constexpr bool is_sorted_iterator = true; + __item_t__ __item__ () const { return v; } + __item_t__ __item_at__ (unsigned j) const { return v; } + bool __more__ () const { return true; } + unsigned __len__ () const { return UINT_MAX; } + void __next__ () {} + void __forward__ (unsigned) {} + void __prev__ () {} + void __rewind__ (unsigned) {} + hb_repeat_iter_t __end__ () const { return *this; } + bool operator != (const hb_repeat_iter_t& o) const { return true; } + + private: + T v; +}; +struct +{ + template <typename T> hb_repeat_iter_t<T> + operator () (T value) const + { return hb_repeat_iter_t<T> (value); } +} +HB_FUNCOBJ (hb_repeat); + +/* hb_enumerate()/hb_take() */ + +struct +{ + template <typename Iterable, + typename Index = unsigned, + hb_requires (hb_is_iterable (Iterable))> + auto operator () (Iterable&& it, Index start = 0u) const HB_AUTO_RETURN + ( hb_zip (hb_iota (start), it) ) +} +HB_FUNCOBJ (hb_enumerate); + +struct +{ HB_PARTIALIZE(2); + template <typename Iterable, + hb_requires (hb_is_iterable (Iterable))> + auto operator () (Iterable&& it, unsigned count) const HB_AUTO_RETURN + ( hb_zip (hb_range (count), it) | hb_map (hb_second) ) + + /* Specialization arrays. */ + + template <typename Type> inline hb_array_t<Type> + operator () (hb_array_t<Type> array, unsigned count) const + { return array.sub_array (0, count); } + + template <typename Type> inline hb_sorted_array_t<Type> + operator () (hb_sorted_array_t<Type> array, unsigned count) const + { return array.sub_array (0, count); } +} +HB_FUNCOBJ (hb_take); + +struct +{ HB_PARTIALIZE(2); + template <typename Iter, + hb_requires (hb_is_iterator (Iter))> + auto operator () (Iter it, unsigned count) const HB_AUTO_RETURN + ( + + hb_iota (it, hb_add (count)) + | hb_map (hb_take (count)) + | hb_take ((hb_len (it) + count - 1) / count) + ) +} +HB_FUNCOBJ (hb_chop); + +/* hb_sink() */ + +template <typename Sink> +struct hb_sink_t +{ + hb_sink_t (Sink s) : s (s) {} + + template <typename Iter, + hb_requires (hb_is_iterator (Iter))> + void operator () (Iter it) + { + for (; it; ++it) + s << *it; + } + + private: + Sink s; +}; +struct +{ + template <typename Sink> hb_sink_t<Sink> + operator () (Sink&& s) const + { return hb_sink_t<Sink> (s); } + + template <typename Sink> hb_sink_t<Sink&> + operator () (Sink *s) const + { return hb_sink_t<Sink&> (*s); } +} +HB_FUNCOBJ (hb_sink); + +/* hb-drain: hb_sink to void / blackhole / /dev/null. */ + +struct +{ + template <typename Iter, + hb_requires (hb_is_iterator (Iter))> + void operator () (Iter it) const + { + for (; it; ++it) + (void) *it; + } +} +HB_FUNCOBJ (hb_drain); + +/* hb_unzip(): unzip and sink to two sinks. */ + +template <typename Sink1, typename Sink2> +struct hb_unzip_t +{ + hb_unzip_t (Sink1 s1, Sink2 s2) : s1 (s1), s2 (s2) {} + + template <typename Iter, + hb_requires (hb_is_iterator (Iter))> + void operator () (Iter it) + { + for (; it; ++it) + { + const auto &v = *it; + s1 << v.first; + s2 << v.second; + } + } + + private: + Sink1 s1; + Sink2 s2; +}; +struct +{ + template <typename Sink1, typename Sink2> hb_unzip_t<Sink1, Sink2> + operator () (Sink1&& s1, Sink2&& s2) const + { return hb_unzip_t<Sink1, Sink2> (s1, s2); } + + template <typename Sink1, typename Sink2> hb_unzip_t<Sink1&, Sink2&> + operator () (Sink1 *s1, Sink2 *s2) const + { return hb_unzip_t<Sink1&, Sink2&> (*s1, *s2); } +} +HB_FUNCOBJ (hb_unzip); + + +/* hb-all, hb-any, hb-none. */ + +struct +{ + template <typename Iterable, + typename Pred = decltype ((hb_identity)), + typename Proj = decltype ((hb_identity)), + hb_requires (hb_is_iterable (Iterable))> + bool operator () (Iterable&& c, + Pred&& p = hb_identity, + Proj&& f = hb_identity) const + { + for (auto it = hb_iter (c); it; ++it) + if (!hb_match (hb_forward<Pred> (p), hb_get (hb_forward<Proj> (f), *it))) + return false; + return true; + } +} +HB_FUNCOBJ (hb_all); +struct +{ + template <typename Iterable, + typename Pred = decltype ((hb_identity)), + typename Proj = decltype ((hb_identity)), + hb_requires (hb_is_iterable (Iterable))> + bool operator () (Iterable&& c, + Pred&& p = hb_identity, + Proj&& f = hb_identity) const + { + for (auto it = hb_iter (c); it; ++it) + if (hb_match (hb_forward<Pred> (p), hb_get (hb_forward<Proj> (f), *it))) + return true; + return false; + } +} +HB_FUNCOBJ (hb_any); +struct +{ + template <typename Iterable, + typename Pred = decltype ((hb_identity)), + typename Proj = decltype ((hb_identity)), + hb_requires (hb_is_iterable (Iterable))> + bool operator () (Iterable&& c, + Pred&& p = hb_identity, + Proj&& f = hb_identity) const + { + for (auto it = hb_iter (c); it; ++it) + if (hb_match (hb_forward<Pred> (p), hb_get (hb_forward<Proj> (f), *it))) + return false; + return true; + } +} +HB_FUNCOBJ (hb_none); + +/* + * Algorithms operating on iterators. + */ + +template <typename C, typename V, + hb_requires (hb_is_iterable (C))> +inline void +hb_fill (C& c, const V &v) +{ + for (auto i = hb_iter (c); i; i++) + *i = v; +} + +template <typename S, typename D> +inline void +hb_copy (S&& is, D&& id) +{ + hb_iter (is) | hb_sink (id); +} + + +#endif /* HB_ITER_HH */ diff --git a/thirdparty/harfbuzz/src/hb-kern.hh b/thirdparty/harfbuzz/src/hb-kern.hh new file mode 100644 index 0000000000..3f952fe7fc --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-kern.hh @@ -0,0 +1,138 @@ +/* + * Copyright © 2017 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_KERN_HH +#define HB_KERN_HH + +#include "hb-open-type.hh" +#include "hb-aat-layout-common.hh" +#include "hb-ot-layout-gpos-table.hh" + + +namespace OT { + + +template <typename Driver> +struct hb_kern_machine_t +{ + hb_kern_machine_t (const Driver &driver_, + bool crossStream_ = false) : + driver (driver_), + crossStream (crossStream_) {} + + HB_NO_SANITIZE_SIGNED_INTEGER_OVERFLOW + void kern (hb_font_t *font, + hb_buffer_t *buffer, + hb_mask_t kern_mask, + bool scale = true) const + { + OT::hb_ot_apply_context_t c (1, font, buffer); + c.set_lookup_mask (kern_mask); + c.set_lookup_props (OT::LookupFlag::IgnoreMarks); + auto &skippy_iter = c.iter_input; + + bool horizontal = HB_DIRECTION_IS_HORIZONTAL (buffer->props.direction); + unsigned int count = buffer->len; + hb_glyph_info_t *info = buffer->info; + hb_glyph_position_t *pos = buffer->pos; + for (unsigned int idx = 0; idx < count;) + { + if (!(info[idx].mask & kern_mask)) + { + idx++; + continue; + } + + skippy_iter.reset (idx, 1); + if (!skippy_iter.next ()) + { + idx++; + continue; + } + + unsigned int i = idx; + unsigned int j = skippy_iter.idx; + + hb_position_t kern = driver.get_kerning (info[i].codepoint, + info[j].codepoint); + + + if (likely (!kern)) + goto skip; + + if (horizontal) + { + if (scale) + kern = font->em_scale_x (kern); + if (crossStream) + { + pos[j].y_offset = kern; + buffer->scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_GPOS_ATTACHMENT; + } + else + { + hb_position_t kern1 = kern >> 1; + hb_position_t kern2 = kern - kern1; + pos[i].x_advance += kern1; + pos[j].x_advance += kern2; + pos[j].x_offset += kern2; + } + } + else + { + if (scale) + kern = font->em_scale_y (kern); + if (crossStream) + { + pos[j].x_offset = kern; + buffer->scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_GPOS_ATTACHMENT; + } + else + { + hb_position_t kern1 = kern >> 1; + hb_position_t kern2 = kern - kern1; + pos[i].y_advance += kern1; + pos[j].y_advance += kern2; + pos[j].y_offset += kern2; + } + } + + buffer->unsafe_to_break (i, j + 1); + + skip: + idx = skippy_iter.idx; + } + } + + const Driver &driver; + bool crossStream; +}; + + +} /* namespace OT */ + + +#endif /* HB_KERN_HH */ diff --git a/thirdparty/harfbuzz/src/hb-machinery.hh b/thirdparty/harfbuzz/src/hb-machinery.hh new file mode 100644 index 0000000000..54bc60d4c8 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-machinery.hh @@ -0,0 +1,307 @@ +/* + * Copyright © 2007,2008,2009,2010 Red Hat, Inc. + * Copyright © 2012,2018 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Red Hat Author(s): Behdad Esfahbod + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_MACHINERY_HH +#define HB_MACHINERY_HH + +#include "hb.hh" +#include "hb-blob.hh" + +#include "hb-dispatch.hh" +#include "hb-sanitize.hh" +#include "hb-serialize.hh" + + +/* + * Casts + */ + +/* StructAtOffset<T>(P,Ofs) returns the struct T& that is placed at memory + * location pointed to by P plus Ofs bytes. */ +template<typename Type> +static inline const Type& StructAtOffset(const void *P, unsigned int offset) +{ return * reinterpret_cast<const Type*> ((const char *) P + offset); } +template<typename Type> +static inline Type& StructAtOffset(void *P, unsigned int offset) +{ return * reinterpret_cast<Type*> ((char *) P + offset); } +template<typename Type> +static inline const Type& StructAtOffsetUnaligned(const void *P, unsigned int offset) +{ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wcast-align" + return * reinterpret_cast<const Type*> ((const char *) P + offset); +#pragma GCC diagnostic pop +} +template<typename Type> +static inline Type& StructAtOffsetUnaligned(void *P, unsigned int offset) +{ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wcast-align" + return * reinterpret_cast<Type*> ((char *) P + offset); +#pragma GCC diagnostic pop +} + +/* StructAfter<T>(X) returns the struct T& that is placed after X. + * Works with X of variable size also. X must implement get_size() */ +template<typename Type, typename TObject> +static inline const Type& StructAfter(const TObject &X) +{ return StructAtOffset<Type>(&X, X.get_size()); } +template<typename Type, typename TObject> +static inline Type& StructAfter(TObject &X) +{ return StructAtOffset<Type>(&X, X.get_size()); } + + +/* + * Size checking + */ + +/* Check _assertion in a method environment */ +#define _DEFINE_INSTANCE_ASSERTION1(_line, _assertion) \ + void _instance_assertion_on_line_##_line () const \ + { static_assert ((_assertion), ""); } +# define _DEFINE_INSTANCE_ASSERTION0(_line, _assertion) _DEFINE_INSTANCE_ASSERTION1 (_line, _assertion) +# define DEFINE_INSTANCE_ASSERTION(_assertion) _DEFINE_INSTANCE_ASSERTION0 (__LINE__, _assertion) + +/* Check that _code compiles in a method environment */ +#define _DEFINE_COMPILES_ASSERTION1(_line, _code) \ + void _compiles_assertion_on_line_##_line () const \ + { _code; } +# define _DEFINE_COMPILES_ASSERTION0(_line, _code) _DEFINE_COMPILES_ASSERTION1 (_line, _code) +# define DEFINE_COMPILES_ASSERTION(_code) _DEFINE_COMPILES_ASSERTION0 (__LINE__, _code) + + +#define DEFINE_SIZE_STATIC(size) \ + DEFINE_INSTANCE_ASSERTION (sizeof (*this) == (size)) \ + unsigned int get_size () const { return (size); } \ + static constexpr unsigned null_size = (size); \ + static constexpr unsigned min_size = (size); \ + static constexpr unsigned static_size = (size) + +#define DEFINE_SIZE_UNION(size, _member) \ + DEFINE_COMPILES_ASSERTION ((void) this->u._member.static_size) \ + DEFINE_INSTANCE_ASSERTION (sizeof(this->u._member) == (size)) \ + static constexpr unsigned null_size = (size); \ + static constexpr unsigned min_size = (size) + +#define DEFINE_SIZE_MIN(size) \ + DEFINE_INSTANCE_ASSERTION (sizeof (*this) >= (size)) \ + static constexpr unsigned null_size = (size); \ + static constexpr unsigned min_size = (size) + +#define DEFINE_SIZE_UNBOUNDED(size) \ + DEFINE_INSTANCE_ASSERTION (sizeof (*this) >= (size)) \ + static constexpr unsigned min_size = (size) + +#define DEFINE_SIZE_ARRAY(size, array) \ + DEFINE_COMPILES_ASSERTION ((void) (array)[0].static_size) \ + DEFINE_INSTANCE_ASSERTION (sizeof (*this) == (size) + (HB_VAR_ARRAY+0) * sizeof ((array)[0])) \ + static constexpr unsigned null_size = (size); \ + static constexpr unsigned min_size = (size) + +#define DEFINE_SIZE_ARRAY_SIZED(size, array) \ + unsigned int get_size () const { return (size - (array).min_size + (array).get_size ()); } \ + DEFINE_SIZE_ARRAY(size, array) + + + +/* + * Lazy loaders. + */ + +template <typename Data, unsigned int WheresData> +struct hb_data_wrapper_t +{ + static_assert (WheresData > 0, ""); + + Data * get_data () const + { return *(((Data **) (void *) this) - WheresData); } + + bool is_inert () const { return !get_data (); } + + template <typename Stored, typename Subclass> + Stored * call_create () const { return Subclass::create (get_data ()); } +}; +template <> +struct hb_data_wrapper_t<void, 0> +{ + bool is_inert () const { return false; } + + template <typename Stored, typename Funcs> + Stored * call_create () const { return Funcs::create (); } +}; + +template <typename T1, typename T2> struct hb_non_void_t { typedef T1 value; }; +template <typename T2> struct hb_non_void_t<void, T2> { typedef T2 value; }; + +template <typename Returned, + typename Subclass = void, + typename Data = void, + unsigned int WheresData = 0, + typename Stored = Returned> +struct hb_lazy_loader_t : hb_data_wrapper_t<Data, WheresData> +{ + typedef typename hb_non_void_t<Subclass, + hb_lazy_loader_t<Returned,Subclass,Data,WheresData,Stored> + >::value Funcs; + + void init0 () {} /* Init, when memory is already set to 0. No-op for us. */ + void init () { instance.set_relaxed (nullptr); } + void fini () { do_destroy (instance.get ()); } + + void free_instance () + { + retry: + Stored *p = instance.get (); + if (unlikely (p && !cmpexch (p, nullptr))) + goto retry; + do_destroy (p); + } + + static void do_destroy (Stored *p) + { + if (p && p != const_cast<Stored *> (Funcs::get_null ())) + Funcs::destroy (p); + } + + const Returned * operator -> () const { return get (); } + const Returned & operator * () const { return *get (); } + explicit operator bool () const + { return get_stored () != Funcs::get_null (); } + template <typename C> operator const C * () const { return get (); } + + Stored * get_stored () const + { + retry: + Stored *p = this->instance.get (); + if (unlikely (!p)) + { + if (unlikely (this->is_inert ())) + return const_cast<Stored *> (Funcs::get_null ()); + + p = this->template call_create<Stored, Funcs> (); + if (unlikely (!p)) + p = const_cast<Stored *> (Funcs::get_null ()); + + if (unlikely (!cmpexch (nullptr, p))) + { + do_destroy (p); + goto retry; + } + } + return p; + } + Stored * get_stored_relaxed () const + { + return this->instance.get_relaxed (); + } + + bool cmpexch (Stored *current, Stored *value) const + { + /* This *must* be called when there are no other threads accessing. */ + return this->instance.cmpexch (current, value); + } + + const Returned * get () const { return Funcs::convert (get_stored ()); } + const Returned * get_relaxed () const { return Funcs::convert (get_stored_relaxed ()); } + Returned * get_unconst () const { return const_cast<Returned *> (Funcs::convert (get_stored ())); } + + /* To be possibly overloaded by subclasses. */ + static Returned* convert (Stored *p) { return p; } + + /* By default null/init/fini the object. */ + static const Stored* get_null () { return &Null (Stored); } + static Stored *create (Data *data) + { + Stored *p = (Stored *) calloc (1, sizeof (Stored)); + if (likely (p)) + p->init (data); + return p; + } + static Stored *create () + { + Stored *p = (Stored *) calloc (1, sizeof (Stored)); + if (likely (p)) + p->init (); + return p; + } + static void destroy (Stored *p) + { + p->fini (); + free (p); + } + +// private: + /* Must only have one pointer. */ + hb_atomic_ptr_t<Stored *> instance; +}; + +/* Specializations. */ + +template <typename T, unsigned int WheresFace> +struct hb_face_lazy_loader_t : hb_lazy_loader_t<T, + hb_face_lazy_loader_t<T, WheresFace>, + hb_face_t, WheresFace> {}; + +template <typename T, unsigned int WheresFace> +struct hb_table_lazy_loader_t : hb_lazy_loader_t<T, + hb_table_lazy_loader_t<T, WheresFace>, + hb_face_t, WheresFace, + hb_blob_t> +{ + static hb_blob_t *create (hb_face_t *face) + { return hb_sanitize_context_t ().reference_table<T> (face); } + static void destroy (hb_blob_t *p) { hb_blob_destroy (p); } + + static const hb_blob_t *get_null () + { return hb_blob_get_empty (); } + + static const T* convert (const hb_blob_t *blob) + { return blob->as<T> (); } + + hb_blob_t* get_blob () const { return this->get_stored (); } +}; + +template <typename Subclass> +struct hb_font_funcs_lazy_loader_t : hb_lazy_loader_t<hb_font_funcs_t, Subclass> +{ + static void destroy (hb_font_funcs_t *p) + { hb_font_funcs_destroy (p); } + static const hb_font_funcs_t *get_null () + { return hb_font_funcs_get_empty (); } +}; +template <typename Subclass> +struct hb_unicode_funcs_lazy_loader_t : hb_lazy_loader_t<hb_unicode_funcs_t, Subclass> +{ + static void destroy (hb_unicode_funcs_t *p) + { hb_unicode_funcs_destroy (p); } + static const hb_unicode_funcs_t *get_null () + { return hb_unicode_funcs_get_empty (); } +}; + + +#endif /* HB_MACHINERY_HH */ diff --git a/thirdparty/harfbuzz/src/hb-map.cc b/thirdparty/harfbuzz/src/hb-map.cc new file mode 100644 index 0000000000..191be14372 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-map.cc @@ -0,0 +1,268 @@ +/* + * Copyright © 2018 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#include "hb-map.hh" + + +/** + * SECTION:hb-map + * @title: hb-map + * @short_description: Object representing integer to integer mapping + * @include: hb.h + * + * Map objects are integer-to-integer hash-maps. Currently they are + * not used in the HarfBuzz public API, but are provided for client's + * use if desired. + **/ + + +/** + * hb_map_create: (Xconstructor) + * + * Return value: (transfer full): + * + * Since: 1.7.7 + **/ +hb_map_t * +hb_map_create () +{ + hb_map_t *map; + + if (!(map = hb_object_create<hb_map_t> ())) + return hb_map_get_empty (); + + map->init_shallow (); + + return map; +} + +/** + * hb_map_get_empty: + * + * Return value: (transfer full): + * + * Since: 1.7.7 + **/ +hb_map_t * +hb_map_get_empty () +{ + return const_cast<hb_map_t *> (&Null (hb_map_t)); +} + +/** + * hb_map_reference: (skip) + * @map: a map. + * + * Return value: (transfer full): + * + * Since: 1.7.7 + **/ +hb_map_t * +hb_map_reference (hb_map_t *map) +{ + return hb_object_reference (map); +} + +/** + * hb_map_destroy: (skip) + * @map: a map. + * + * Since: 1.7.7 + **/ +void +hb_map_destroy (hb_map_t *map) +{ + if (!hb_object_destroy (map)) return; + + map->fini_shallow (); + + free (map); +} + +/** + * hb_map_set_user_data: (skip) + * @map: a map. + * @key: + * @data: + * @destroy: + * @replace: + * + * Return value: + * + * Since: 1.7.7 + **/ +hb_bool_t +hb_map_set_user_data (hb_map_t *map, + hb_user_data_key_t *key, + void * data, + hb_destroy_func_t destroy, + hb_bool_t replace) +{ + return hb_object_set_user_data (map, key, data, destroy, replace); +} + +/** + * hb_map_get_user_data: (skip) + * @map: a map. + * @key: + * + * Return value: (transfer none): + * + * Since: 1.7.7 + **/ +void * +hb_map_get_user_data (hb_map_t *map, + hb_user_data_key_t *key) +{ + return hb_object_get_user_data (map, key); +} + + +/** + * hb_map_allocation_successful: + * @map: a map. + * + * + * + * Return value: + * + * Since: 1.7.7 + **/ +hb_bool_t +hb_map_allocation_successful (const hb_map_t *map) +{ + return map->successful; +} + + +/** + * hb_map_set: + * @map: a map. + * @key: + * @value: + * + * + * + * Since: 1.7.7 + **/ +void +hb_map_set (hb_map_t *map, + hb_codepoint_t key, + hb_codepoint_t value) +{ + map->set (key, value); +} + +/** + * hb_map_get: + * @map: a map. + * @key: + * + * + * + * Since: 1.7.7 + **/ +hb_codepoint_t +hb_map_get (const hb_map_t *map, + hb_codepoint_t key) +{ + return map->get (key); +} + +/** + * hb_map_del: + * @map: a map. + * @key: + * + * + * + * Since: 1.7.7 + **/ +void +hb_map_del (hb_map_t *map, + hb_codepoint_t key) +{ + map->del (key); +} + +/** + * hb_map_has: + * @map: a map. + * @key: + * + * + * + * Since: 1.7.7 + **/ +hb_bool_t +hb_map_has (const hb_map_t *map, + hb_codepoint_t key) +{ + return map->has (key); +} + + +/** + * hb_map_clear: + * @map: a map. + * + * + * + * Since: 1.7.7 + **/ +void +hb_map_clear (hb_map_t *map) +{ + return map->clear (); +} + +/** + * hb_map_is_empty: + * @map: a map. + * + * + * + * Since: 1.7.7 + **/ +hb_bool_t +hb_map_is_empty (const hb_map_t *map) +{ + return map->is_empty (); +} + +/** + * hb_map_get_population: + * @map: a map. + * + * + * + * Since: 1.7.7 + **/ +unsigned int +hb_map_get_population (const hb_map_t *map) +{ + return map->get_population (); +} diff --git a/thirdparty/harfbuzz/src/hb-map.h b/thirdparty/harfbuzz/src/hb-map.h new file mode 100644 index 0000000000..b77843c2ba --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-map.h @@ -0,0 +1,104 @@ +/* + * Copyright © 2018 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_H_IN +#error "Include <hb.h> instead." +#endif + +#ifndef HB_MAP_H +#define HB_MAP_H + +#include "hb-common.h" + +HB_BEGIN_DECLS + + +/* + * Since: 1.7.7 + */ +#define HB_MAP_VALUE_INVALID ((hb_codepoint_t) -1) + +typedef struct hb_map_t hb_map_t; + + +HB_EXTERN hb_map_t * +hb_map_create (void); + +HB_EXTERN hb_map_t * +hb_map_get_empty (void); + +HB_EXTERN hb_map_t * +hb_map_reference (hb_map_t *map); + +HB_EXTERN void +hb_map_destroy (hb_map_t *map); + +HB_EXTERN hb_bool_t +hb_map_set_user_data (hb_map_t *map, + hb_user_data_key_t *key, + void * data, + hb_destroy_func_t destroy, + hb_bool_t replace); + +HB_EXTERN void * +hb_map_get_user_data (hb_map_t *map, + hb_user_data_key_t *key); + + +/* Returns false if allocation has failed before */ +HB_EXTERN hb_bool_t +hb_map_allocation_successful (const hb_map_t *map); + +HB_EXTERN void +hb_map_clear (hb_map_t *map); + +HB_EXTERN hb_bool_t +hb_map_is_empty (const hb_map_t *map); + +HB_EXTERN unsigned int +hb_map_get_population (const hb_map_t *map); + +HB_EXTERN void +hb_map_set (hb_map_t *map, + hb_codepoint_t key, + hb_codepoint_t value); + +HB_EXTERN hb_codepoint_t +hb_map_get (const hb_map_t *map, + hb_codepoint_t key); + +HB_EXTERN void +hb_map_del (hb_map_t *map, + hb_codepoint_t key); + +HB_EXTERN hb_bool_t +hb_map_has (const hb_map_t *map, + hb_codepoint_t key); + + +HB_END_DECLS + +#endif /* HB_MAP_H */ diff --git a/thirdparty/harfbuzz/src/hb-map.hh b/thirdparty/harfbuzz/src/hb-map.hh new file mode 100644 index 0000000000..92c1bd67e5 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-map.hh @@ -0,0 +1,326 @@ +/* + * Copyright © 2018 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_MAP_HH +#define HB_MAP_HH + +#include "hb.hh" + + +/* + * hb_hashmap_t + */ + +template <typename K, typename V, + K kINVALID = hb_is_pointer (K) ? 0 : hb_is_signed (K) ? hb_int_min (K) : (K) -1, + V vINVALID = hb_is_pointer (V) ? 0 : hb_is_signed (V) ? hb_int_min (V) : (V) -1> +struct hb_hashmap_t +{ + HB_DELETE_COPY_ASSIGN (hb_hashmap_t); + hb_hashmap_t () { init (); } + ~hb_hashmap_t () { fini (); } + + static_assert (hb_is_integral (K) || hb_is_pointer (K), ""); + static_assert (hb_is_integral (V) || hb_is_pointer (V), ""); + + struct item_t + { + K key; + V value; + uint32_t hash; + + void clear () { key = kINVALID; value = vINVALID; hash = 0; } + + bool operator == (const K &o) { return hb_deref (key) == hb_deref (o); } + bool operator == (const item_t &o) { return *this == o.key; } + bool is_unused () const { return key == kINVALID; } + bool is_tombstone () const { return key != kINVALID && value == vINVALID; } + bool is_real () const { return key != kINVALID && value != vINVALID; } + hb_pair_t<K, V> get_pair() const { return hb_pair_t<K, V> (key, value); } + }; + + hb_object_header_t header; + bool successful; /* Allocations successful */ + unsigned int population; /* Not including tombstones. */ + unsigned int occupancy; /* Including tombstones. */ + unsigned int mask; + unsigned int prime; + item_t *items; + + void init_shallow () + { + successful = true; + population = occupancy = 0; + mask = 0; + prime = 0; + items = nullptr; + } + void init () + { + hb_object_init (this); + init_shallow (); + } + void fini_shallow () + { + free (items); + items = nullptr; + population = occupancy = 0; + } + void fini () + { + hb_object_fini (this); + fini_shallow (); + } + + void reset () + { + if (unlikely (hb_object_is_immutable (this))) + return; + successful = true; + clear (); + } + + bool in_error () const { return !successful; } + + bool resize () + { + if (unlikely (!successful)) return false; + + unsigned int power = hb_bit_storage (population * 2 + 8); + unsigned int new_size = 1u << power; + item_t *new_items = (item_t *) malloc ((size_t) new_size * sizeof (item_t)); + if (unlikely (!new_items)) + { + successful = false; + return false; + } + for (auto &_ : hb_iter (new_items, new_size)) + _.clear (); + + unsigned int old_size = mask + 1; + item_t *old_items = items; + + /* Switch to new, empty, array. */ + population = occupancy = 0; + mask = new_size - 1; + prime = prime_for (power); + items = new_items; + + /* Insert back old items. */ + if (old_items) + for (unsigned int i = 0; i < old_size; i++) + if (old_items[i].is_real ()) + set_with_hash (old_items[i].key, + old_items[i].hash, + old_items[i].value); + + free (old_items); + + return true; + } + + void set (K key, V value) + { + set_with_hash (key, hb_hash (key), value); + } + + V get (K key) const + { + if (unlikely (!items)) return vINVALID; + unsigned int i = bucket_for (key); + return items[i].is_real () && items[i] == key ? items[i].value : vINVALID; + } + + void del (K key) { set (key, vINVALID); } + + /* Has interface. */ + static constexpr V SENTINEL = vINVALID; + typedef V value_t; + value_t operator [] (K k) const { return get (k); } + bool has (K k, V *vp = nullptr) const + { + V v = (*this)[k]; + if (vp) *vp = v; + return v != SENTINEL; + } + /* Projection. */ + V operator () (K k) const { return get (k); } + + void clear () + { + if (unlikely (hb_object_is_immutable (this))) + return; + if (items) + for (auto &_ : hb_iter (items, mask + 1)) + _.clear (); + + population = occupancy = 0; + } + + bool is_empty () const { return population == 0; } + + unsigned int get_population () const { return population; } + + /* + * Iterator + */ + auto iter () const HB_AUTO_RETURN + ( + + hb_array (items, mask ? mask + 1 : 0) + | hb_filter (&item_t::is_real) + | hb_map (&item_t::get_pair) + ) + auto keys () const HB_AUTO_RETURN + ( + + hb_array (items, mask ? mask + 1 : 0) + | hb_filter (&item_t::is_real) + | hb_map (&item_t::key) + | hb_map (hb_ridentity) + ) + auto values () const HB_AUTO_RETURN + ( + + hb_array (items, mask ? mask + 1 : 0) + | hb_filter (&item_t::is_real) + | hb_map (&item_t::value) + | hb_map (hb_ridentity) + ) + + /* Sink interface. */ + hb_hashmap_t& operator << (const hb_pair_t<K, V>& v) + { set (v.first, v.second); return *this; } + + protected: + + void set_with_hash (K key, uint32_t hash, V value) + { + if (unlikely (!successful)) return; + if (unlikely (key == kINVALID)) return; + if ((occupancy + occupancy / 2) >= mask && !resize ()) return; + unsigned int i = bucket_for_hash (key, hash); + + if (value == vINVALID && items[i].key != key) + return; /* Trying to delete non-existent key. */ + + if (!items[i].is_unused ()) + { + occupancy--; + if (items[i].is_tombstone ()) + population--; + } + + items[i].key = key; + items[i].value = value; + items[i].hash = hash; + + occupancy++; + if (!items[i].is_tombstone ()) + population++; + } + + unsigned int bucket_for (K key) const + { + return bucket_for_hash (key, hb_hash (key)); + } + + unsigned int bucket_for_hash (K key, uint32_t hash) const + { + unsigned int i = hash % prime; + unsigned int step = 0; + unsigned int tombstone = (unsigned) -1; + while (!items[i].is_unused ()) + { + if (items[i].hash == hash && items[i] == key) + return i; + if (tombstone == (unsigned) -1 && items[i].is_tombstone ()) + tombstone = i; + i = (i + ++step) & mask; + } + return tombstone == (unsigned) -1 ? i : tombstone; + } + + static unsigned int prime_for (unsigned int shift) + { + /* Following comment and table copied from glib. */ + /* Each table size has an associated prime modulo (the first prime + * lower than the table size) used to find the initial bucket. Probing + * then works modulo 2^n. The prime modulo is necessary to get a + * good distribution with poor hash functions. + */ + /* Not declaring static to make all kinds of compilers happy... */ + /*static*/ const unsigned int prime_mod [32] = + { + 1, /* For 1 << 0 */ + 2, + 3, + 7, + 13, + 31, + 61, + 127, + 251, + 509, + 1021, + 2039, + 4093, + 8191, + 16381, + 32749, + 65521, /* For 1 << 16 */ + 131071, + 262139, + 524287, + 1048573, + 2097143, + 4194301, + 8388593, + 16777213, + 33554393, + 67108859, + 134217689, + 268435399, + 536870909, + 1073741789, + 2147483647 /* For 1 << 31 */ + }; + + if (unlikely (shift >= ARRAY_LENGTH (prime_mod))) + return prime_mod[ARRAY_LENGTH (prime_mod) - 1]; + + return prime_mod[shift]; + } +}; + +/* + * hb_map_t + */ + +struct hb_map_t : hb_hashmap_t<hb_codepoint_t, + hb_codepoint_t, + HB_MAP_VALUE_INVALID, + HB_MAP_VALUE_INVALID> {}; + + +#endif /* HB_MAP_HH */ diff --git a/thirdparty/harfbuzz/src/hb-meta.hh b/thirdparty/harfbuzz/src/hb-meta.hh new file mode 100644 index 0000000000..4c0898b1b7 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-meta.hh @@ -0,0 +1,410 @@ +/* + * Copyright © 2018 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_META_HH +#define HB_META_HH + +#include "hb.hh" + + +/* + * C++ template meta-programming & fundamentals used with them. + */ + +/* Void! For when we need a expression-type of void. */ +struct hb_empty_t {}; + +/* https://en.cppreference.com/w/cpp/types/void_t */ +template<typename... Ts> struct _hb_void_t { typedef void type; }; +template<typename... Ts> using hb_void_t = typename _hb_void_t<Ts...>::type; + +template<typename Head, typename... Ts> struct _hb_head_t { typedef Head type; }; +template<typename... Ts> using hb_head_t = typename _hb_head_t<Ts...>::type; + +template <typename T, T v> struct hb_integral_constant { static constexpr T value = v; }; +template <bool b> using hb_bool_constant = hb_integral_constant<bool, b>; +using hb_true_type = hb_bool_constant<true>; +using hb_false_type = hb_bool_constant<false>; + + +/* Basic type SFINAE. */ + +template <bool B, typename T = void> struct hb_enable_if {}; +template <typename T> struct hb_enable_if<true, T> { typedef T type; }; +#define hb_enable_if(Cond) typename hb_enable_if<(Cond)>::type* = nullptr +/* Concepts/Requires alias: */ +#define hb_requires(Cond) hb_enable_if((Cond)) + +template <typename T, typename T2> struct hb_is_same : hb_false_type {}; +template <typename T> struct hb_is_same<T, T> : hb_true_type {}; +#define hb_is_same(T, T2) hb_is_same<T, T2>::value + +/* Function overloading SFINAE and priority. */ + +#define HB_RETURN(Ret, E) -> hb_head_t<Ret, decltype ((E))> { return (E); } +#define HB_AUTO_RETURN(E) -> decltype ((E)) { return (E); } +#define HB_VOID_RETURN(E) -> hb_void_t<decltype ((E))> { (E); } + +template <unsigned Pri> struct hb_priority : hb_priority<Pri - 1> {}; +template <> struct hb_priority<0> {}; +#define hb_prioritize hb_priority<16> () + +#define HB_FUNCOBJ(x) static_const x HB_UNUSED + + +template <typename T> struct hb_type_identity_t { typedef T type; }; +template <typename T> using hb_type_identity = typename hb_type_identity_t<T>::type; + +struct +{ + template <typename T> constexpr T* + operator () (T& arg) const + { +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wcast-align" + /* https://en.cppreference.com/w/cpp/memory/addressof */ + return reinterpret_cast<T*> ( + &const_cast<char&> ( + reinterpret_cast<const volatile char&> (arg))); +#pragma GCC diagnostic pop + } +} +HB_FUNCOBJ (hb_addressof); + +template <typename T> static inline T hb_declval (); +#define hb_declval(T) (hb_declval<T> ()) + +template <typename T> struct hb_match_const : hb_type_identity_t<T>, hb_bool_constant<false>{}; +template <typename T> struct hb_match_const<const T> : hb_type_identity_t<T>, hb_bool_constant<true> {}; +template <typename T> using hb_remove_const = typename hb_match_const<T>::type; +template <typename T> using hb_add_const = const T; +#define hb_is_const(T) hb_match_const<T>::value +template <typename T> struct hb_match_reference : hb_type_identity_t<T>, hb_bool_constant<false>{}; +template <typename T> struct hb_match_reference<T &> : hb_type_identity_t<T>, hb_bool_constant<true> {}; +template <typename T> struct hb_match_reference<T &&> : hb_type_identity_t<T>, hb_bool_constant<true> {}; +template <typename T> using hb_remove_reference = typename hb_match_reference<T>::type; +template <typename T> auto _hb_try_add_lvalue_reference (hb_priority<1>) -> hb_type_identity<T&>; +template <typename T> auto _hb_try_add_lvalue_reference (hb_priority<0>) -> hb_type_identity<T>; +template <typename T> using hb_add_lvalue_reference = decltype (_hb_try_add_lvalue_reference<T> (hb_prioritize)); +template <typename T> auto _hb_try_add_rvalue_reference (hb_priority<1>) -> hb_type_identity<T&&>; +template <typename T> auto _hb_try_add_rvalue_reference (hb_priority<0>) -> hb_type_identity<T>; +template <typename T> using hb_add_rvalue_reference = decltype (_hb_try_add_rvalue_reference<T> (hb_prioritize)); +#define hb_is_reference(T) hb_match_reference<T>::value +template <typename T> struct hb_match_pointer : hb_type_identity_t<T>, hb_bool_constant<false>{}; +template <typename T> struct hb_match_pointer<T *> : hb_type_identity_t<T>, hb_bool_constant<true> {}; +template <typename T> using hb_remove_pointer = typename hb_match_pointer<T>::type; +template <typename T> auto _hb_try_add_pointer (hb_priority<1>) -> hb_type_identity<hb_remove_reference<T>*>; +template <typename T> auto _hb_try_add_pointer (hb_priority<1>) -> hb_type_identity<T>; +template <typename T> using hb_add_pointer = decltype (_hb_try_add_pointer<T> (hb_prioritize)); +#define hb_is_pointer(T) hb_match_pointer<T>::value + + +/* TODO Add feature-parity to std::decay. */ +template <typename T> using hb_decay = hb_remove_const<hb_remove_reference<T>>; + + +template<bool B, class T, class F> +struct _hb_conditional { typedef T type; }; +template<class T, class F> +struct _hb_conditional<false, T, F> { typedef F type; }; +template<bool B, class T, class F> +using hb_conditional = typename _hb_conditional<B, T, F>::type; + + +template <typename From, typename To> +struct hb_is_convertible +{ + private: + static constexpr bool from_void = hb_is_same (void, hb_decay<From>); + static constexpr bool to_void = hb_is_same (void, hb_decay<To> ); + static constexpr bool either_void = from_void || to_void; + static constexpr bool both_void = from_void && to_void; + + static hb_true_type impl2 (hb_conditional<to_void, int, To>); + + template <typename T> + static auto impl (hb_priority<1>) -> decltype (impl2 (hb_declval (T))); + template <typename T> + static hb_false_type impl (hb_priority<0>); + public: + static constexpr bool value = both_void || + (!either_void && + decltype (impl<hb_conditional<from_void, int, From>> (hb_prioritize))::value); +}; +#define hb_is_convertible(From,To) hb_is_convertible<From, To>::value + +template <typename Base, typename Derived> +using hb_is_base_of = hb_is_convertible<hb_decay<Derived> *, hb_decay<Base> *>; +#define hb_is_base_of(Base,Derived) hb_is_base_of<Base, Derived>::value + +template <typename From, typename To> +using hb_is_cr_convertible = hb_bool_constant< + hb_is_same (hb_decay<From>, hb_decay<To>) && + (!hb_is_const (From) || hb_is_const (To)) && + (!hb_is_reference (To) || hb_is_const (To) || hb_is_reference (To)) +>; +#define hb_is_cr_convertible(From,To) hb_is_cr_convertible<From, To>::value + +/* std::move and std::forward */ + +template <typename T> +static constexpr hb_remove_reference<T>&& hb_move (T&& t) { return (hb_remove_reference<T>&&) (t); } + +template <typename T> +static constexpr T&& hb_forward (hb_remove_reference<T>& t) { return (T&&) t; } +template <typename T> +static constexpr T&& hb_forward (hb_remove_reference<T>&& t) { return (T&&) t; } + +struct +{ + template <typename T> constexpr auto + operator () (T&& v) const HB_AUTO_RETURN (hb_forward<T> (v)) + + template <typename T> constexpr auto + operator () (T *v) const HB_AUTO_RETURN (*v) +} +HB_FUNCOBJ (hb_deref); + +struct +{ + template <typename T> constexpr auto + operator () (T&& v) const HB_AUTO_RETURN (hb_forward<T> (v)) + + template <typename T> constexpr auto + operator () (T& v) const HB_AUTO_RETURN (hb_addressof (v)) +} +HB_FUNCOBJ (hb_ref); + +template <typename T> +struct hb_reference_wrapper +{ + hb_reference_wrapper (T v) : v (v) {} + bool operator == (const hb_reference_wrapper& o) const { return v == o.v; } + bool operator != (const hb_reference_wrapper& o) const { return v != o.v; } + operator T () const { return v; } + T get () const { return v; } + T v; +}; +template <typename T> +struct hb_reference_wrapper<T&> +{ + hb_reference_wrapper (T& v) : v (hb_addressof (v)) {} + bool operator == (const hb_reference_wrapper& o) const { return v == o.v; } + bool operator != (const hb_reference_wrapper& o) const { return v != o.v; } + operator T& () const { return *v; } + T& get () const { return *v; } + T* v; +}; + + +template <typename T> +using hb_is_integral = hb_bool_constant< + hb_is_same (hb_decay<T>, char) || + hb_is_same (hb_decay<T>, signed char) || + hb_is_same (hb_decay<T>, unsigned char) || + hb_is_same (hb_decay<T>, signed int) || + hb_is_same (hb_decay<T>, unsigned int) || + hb_is_same (hb_decay<T>, signed short) || + hb_is_same (hb_decay<T>, unsigned short) || + hb_is_same (hb_decay<T>, signed long) || + hb_is_same (hb_decay<T>, unsigned long) || + hb_is_same (hb_decay<T>, signed long long) || + hb_is_same (hb_decay<T>, unsigned long long) || + false +>; +#define hb_is_integral(T) hb_is_integral<T>::value +template <typename T> +using hb_is_floating_point = hb_bool_constant< + hb_is_same (hb_decay<T>, float) || + hb_is_same (hb_decay<T>, double) || + hb_is_same (hb_decay<T>, long double) || + false +>; +#define hb_is_floating_point(T) hb_is_floating_point<T>::value +template <typename T> +using hb_is_arithmetic = hb_bool_constant< + hb_is_integral (T) || + hb_is_floating_point (T) || + false +>; +#define hb_is_arithmetic(T) hb_is_arithmetic<T>::value + + +template <typename T> +using hb_is_signed = hb_conditional<hb_is_arithmetic (T), + hb_bool_constant<(T) -1 < (T) 0>, + hb_false_type>; +#define hb_is_signed(T) hb_is_signed<T>::value +template <typename T> +using hb_is_unsigned = hb_conditional<hb_is_arithmetic (T), + hb_bool_constant<(T) 0 < (T) -1>, + hb_false_type>; +#define hb_is_unsigned(T) hb_is_unsigned<T>::value + +template <typename T> struct hb_int_min; +template <> struct hb_int_min<char> : hb_integral_constant<char, CHAR_MIN> {}; +template <> struct hb_int_min<signed char> : hb_integral_constant<signed char, SCHAR_MIN> {}; +template <> struct hb_int_min<unsigned char> : hb_integral_constant<unsigned char, 0> {}; +template <> struct hb_int_min<signed short> : hb_integral_constant<signed short, SHRT_MIN> {}; +template <> struct hb_int_min<unsigned short> : hb_integral_constant<unsigned short, 0> {}; +template <> struct hb_int_min<signed int> : hb_integral_constant<signed int, INT_MIN> {}; +template <> struct hb_int_min<unsigned int> : hb_integral_constant<unsigned int, 0> {}; +template <> struct hb_int_min<signed long> : hb_integral_constant<signed long, LONG_MIN> {}; +template <> struct hb_int_min<unsigned long> : hb_integral_constant<unsigned long, 0> {}; +template <> struct hb_int_min<signed long long> : hb_integral_constant<signed long long, LLONG_MIN> {}; +template <> struct hb_int_min<unsigned long long> : hb_integral_constant<unsigned long long, 0> {}; +#define hb_int_min(T) hb_int_min<T>::value +template <typename T> struct hb_int_max; +template <> struct hb_int_max<char> : hb_integral_constant<char, CHAR_MAX> {}; +template <> struct hb_int_max<signed char> : hb_integral_constant<signed char, SCHAR_MAX> {}; +template <> struct hb_int_max<unsigned char> : hb_integral_constant<unsigned char, UCHAR_MAX> {}; +template <> struct hb_int_max<signed short> : hb_integral_constant<signed short, SHRT_MAX> {}; +template <> struct hb_int_max<unsigned short> : hb_integral_constant<unsigned short, USHRT_MAX> {}; +template <> struct hb_int_max<signed int> : hb_integral_constant<signed int, INT_MAX> {}; +template <> struct hb_int_max<unsigned int> : hb_integral_constant<unsigned int, UINT_MAX> {}; +template <> struct hb_int_max<signed long> : hb_integral_constant<signed long, LONG_MAX> {}; +template <> struct hb_int_max<unsigned long> : hb_integral_constant<unsigned long, ULONG_MAX> {}; +template <> struct hb_int_max<signed long long> : hb_integral_constant<signed long long, LLONG_MAX> {}; +template <> struct hb_int_max<unsigned long long> : hb_integral_constant<unsigned long long, ULLONG_MAX> {}; +#define hb_int_max(T) hb_int_max<T>::value + + + +template <typename T, typename> +struct _hb_is_destructible : hb_false_type {}; +template <typename T> +struct _hb_is_destructible<T, hb_void_t<decltype (hb_declval (T).~T ())>> : hb_true_type {}; +template <typename T> +using hb_is_destructible = _hb_is_destructible<T, void>; +#define hb_is_destructible(T) hb_is_destructible<T>::value + +template <typename T, typename, typename ...Ts> +struct _hb_is_constructible : hb_false_type {}; +template <typename T, typename ...Ts> +struct _hb_is_constructible<T, hb_void_t<decltype (T (hb_declval (Ts)...))>, Ts...> : hb_true_type {}; +template <typename T, typename ...Ts> +using hb_is_constructible = _hb_is_constructible<T, void, Ts...>; +#define hb_is_constructible(...) hb_is_constructible<__VA_ARGS__>::value + +template <typename T> +using hb_is_default_constructible = hb_is_constructible<T>; +#define hb_is_default_constructible(T) hb_is_default_constructible<T>::value + +template <typename T> +using hb_is_copy_constructible = hb_is_constructible<T, hb_add_lvalue_reference<hb_add_const<T>>>; +#define hb_is_copy_constructible(T) hb_is_copy_constructible<T>::value + +template <typename T> +using hb_is_move_constructible = hb_is_constructible<T, hb_add_rvalue_reference<hb_add_const<T>>>; +#define hb_is_move_constructible(T) hb_is_move_constructible<T>::value + +template <typename T, typename U, typename> +struct _hb_is_assignable : hb_false_type {}; +template <typename T, typename U> +struct _hb_is_assignable<T, U, hb_void_t<decltype (hb_declval (T) = hb_declval (U))>> : hb_true_type {}; +template <typename T, typename U> +using hb_is_assignable = _hb_is_assignable<T, U, void>; +#define hb_is_assignable(T,U) hb_is_assignable<T, U>::value + +template <typename T> +using hb_is_copy_assignable = hb_is_assignable<hb_add_lvalue_reference<T>, + hb_add_lvalue_reference<hb_add_const<T>>>; +#define hb_is_copy_assignable(T) hb_is_copy_assignable<T>::value + +template <typename T> +using hb_is_move_assignable = hb_is_assignable<hb_add_lvalue_reference<T>, + hb_add_rvalue_reference<T>>; +#define hb_is_move_assignable(T) hb_is_move_assignable<T>::value + +/* Trivial versions. */ + +template <typename T> union hb_trivial { T value; }; + +template <typename T> +using hb_is_trivially_destructible= hb_is_destructible<hb_trivial<T>>; +#define hb_is_trivially_destructible(T) hb_is_trivially_destructible<T>::value + +/* Don't know how to do the following. */ +//template <typename T, typename ...Ts> +//using hb_is_trivially_constructible= hb_is_constructible<hb_trivial<T>, hb_trivial<Ts>...>; +//#define hb_is_trivially_constructible(...) hb_is_trivially_constructible<__VA_ARGS__>::value + +template <typename T> +using hb_is_trivially_default_constructible= hb_is_default_constructible<hb_trivial<T>>; +#define hb_is_trivially_default_constructible(T) hb_is_trivially_default_constructible<T>::value + +template <typename T> +using hb_is_trivially_copy_constructible= hb_is_copy_constructible<hb_trivial<T>>; +#define hb_is_trivially_copy_constructible(T) hb_is_trivially_copy_constructible<T>::value + +template <typename T> +using hb_is_trivially_move_constructible= hb_is_move_constructible<hb_trivial<T>>; +#define hb_is_trivially_move_constructible(T) hb_is_trivially_move_constructible<T>::value + +/* Don't know how to do the following. */ +//template <typename T, typename U> +//using hb_is_trivially_assignable= hb_is_assignable<hb_trivial<T>, hb_trivial<U>>; +//#define hb_is_trivially_assignable(T,U) hb_is_trivially_assignable<T, U>::value + +template <typename T> +using hb_is_trivially_copy_assignable= hb_is_copy_assignable<hb_trivial<T>>; +#define hb_is_trivially_copy_assignable(T) hb_is_trivially_copy_assignable<T>::value + +template <typename T> +using hb_is_trivially_move_assignable= hb_is_move_assignable<hb_trivial<T>>; +#define hb_is_trivially_move_assignable(T) hb_is_trivially_move_assignable<T>::value + +template <typename T> +using hb_is_trivially_copyable= hb_bool_constant< + hb_is_trivially_destructible (T) && + (!hb_is_move_assignable (T) || hb_is_trivially_move_assignable (T)) && + (!hb_is_move_constructible (T) || hb_is_trivially_move_constructible (T)) && + (!hb_is_copy_assignable (T) || hb_is_trivially_copy_assignable (T)) && + (!hb_is_copy_constructible (T) || hb_is_trivially_copy_constructible (T)) && + true +>; +#define hb_is_trivially_copyable(T) hb_is_trivially_copyable<T>::value + +template <typename T> +using hb_is_trivial= hb_bool_constant< + hb_is_trivially_copyable (T) && + hb_is_trivially_default_constructible (T) +>; +#define hb_is_trivial(T) hb_is_trivial<T>::value + +/* hb_unwrap_type (T) + * If T has no T::type, returns T. Otherwise calls itself on T::type recursively. + */ + +template <typename T, typename> +struct _hb_unwrap_type : hb_type_identity_t<T> {}; +template <typename T> +struct _hb_unwrap_type<T, hb_void_t<typename T::type>> : _hb_unwrap_type<typename T::type, void> {}; +template <typename T> +using hb_unwrap_type = _hb_unwrap_type<T, void>; +#define hb_unwrap_type(T) typename hb_unwrap_type<T>::type + +#endif /* HB_META_HH */ diff --git a/thirdparty/harfbuzz/src/hb-mutex.hh b/thirdparty/harfbuzz/src/hb-mutex.hh new file mode 100644 index 0000000000..56392d049b --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-mutex.hh @@ -0,0 +1,133 @@ +/* + * Copyright © 2007 Chris Wilson + * Copyright © 2009,2010 Red Hat, Inc. + * Copyright © 2011,2012 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Contributor(s): + * Chris Wilson <chris@chris-wilson.co.uk> + * Red Hat Author(s): Behdad Esfahbod + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_MUTEX_HH +#define HB_MUTEX_HH + +#include "hb.hh" + + +/* mutex */ + +/* We need external help for these */ + +#if defined(HB_MUTEX_IMPL_INIT) \ + && defined(hb_mutex_impl_init) \ + && defined(hb_mutex_impl_lock) \ + && defined(hb_mutex_impl_unlock) \ + && defined(hb_mutex_impl_finish) + +/* Defined externally, i.e. in config.h; must have typedef'ed hb_mutex_impl_t as well. */ + + +#elif !defined(HB_NO_MT) && (defined(HAVE_PTHREAD) || defined(__APPLE__)) + +#include <pthread.h> +typedef pthread_mutex_t hb_mutex_impl_t; +#define HB_MUTEX_IMPL_INIT PTHREAD_MUTEX_INITIALIZER +#define hb_mutex_impl_init(M) pthread_mutex_init (M, nullptr) +#define hb_mutex_impl_lock(M) pthread_mutex_lock (M) +#define hb_mutex_impl_unlock(M) pthread_mutex_unlock (M) +#define hb_mutex_impl_finish(M) pthread_mutex_destroy (M) + + +#elif !defined(HB_NO_MT) && defined(_WIN32) + +typedef CRITICAL_SECTION hb_mutex_impl_t; +#define HB_MUTEX_IMPL_INIT {0} +#if !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) +#define hb_mutex_impl_init(M) InitializeCriticalSectionEx (M, 0, 0) +#else +#define hb_mutex_impl_init(M) InitializeCriticalSection (M) +#endif +#define hb_mutex_impl_lock(M) EnterCriticalSection (M) +#define hb_mutex_impl_unlock(M) LeaveCriticalSection (M) +#define hb_mutex_impl_finish(M) DeleteCriticalSection (M) + + +#elif !defined(HB_NO_MT) && defined(HAVE_INTEL_ATOMIC_PRIMITIVES) + +#if defined(HAVE_SCHED_H) && defined(HAVE_SCHED_YIELD) +# include <sched.h> +# define HB_SCHED_YIELD() sched_yield () +#else +# define HB_SCHED_YIELD() HB_STMT_START {} HB_STMT_END +#endif + +/* This actually is not a totally awful implementation. */ +typedef volatile int hb_mutex_impl_t; +#define HB_MUTEX_IMPL_INIT 0 +#define hb_mutex_impl_init(M) *(M) = 0 +#define hb_mutex_impl_lock(M) HB_STMT_START { while (__sync_lock_test_and_set((M), 1)) HB_SCHED_YIELD (); } HB_STMT_END +#define hb_mutex_impl_unlock(M) __sync_lock_release (M) +#define hb_mutex_impl_finish(M) HB_STMT_START {} HB_STMT_END + + +#elif defined(HB_NO_MT) + +typedef int hb_mutex_impl_t; +#define HB_MUTEX_IMPL_INIT 0 +#define hb_mutex_impl_init(M) HB_STMT_START {} HB_STMT_END +#define hb_mutex_impl_lock(M) HB_STMT_START {} HB_STMT_END +#define hb_mutex_impl_unlock(M) HB_STMT_START {} HB_STMT_END +#define hb_mutex_impl_finish(M) HB_STMT_START {} HB_STMT_END + + +#else + +#error "Could not find any system to define mutex macros." +#error "Check hb-mutex.hh for possible resolutions." + +#endif + + +#define HB_MUTEX_INIT {HB_MUTEX_IMPL_INIT} + +struct hb_mutex_t +{ + hb_mutex_impl_t m; + + void init () { hb_mutex_impl_init (&m); } + void lock () { hb_mutex_impl_lock (&m); } + void unlock () { hb_mutex_impl_unlock (&m); } + void fini () { hb_mutex_impl_finish (&m); } +}; + +struct hb_lock_t +{ + hb_lock_t (hb_mutex_t &mutex_) : mutex (mutex_) { mutex.lock (); } + ~hb_lock_t () { mutex.unlock (); } + private: + hb_mutex_t &mutex; +}; + + +#endif /* HB_MUTEX_HH */ diff --git a/thirdparty/harfbuzz/src/hb-null.hh b/thirdparty/harfbuzz/src/hb-null.hh new file mode 100644 index 0000000000..9853939b07 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-null.hh @@ -0,0 +1,184 @@ +/* + * Copyright © 2018 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_NULL_HH +#define HB_NULL_HH + +#include "hb.hh" +#include "hb-meta.hh" + + +/* + * Static pools + */ + +/* Global nul-content Null pool. Enlarge as necessary. */ + +#define HB_NULL_POOL_SIZE 384 + +/* Use SFINAE to sniff whether T has min_size; in which case return T::null_size, + * otherwise return sizeof(T). */ + +/* The hard way... + * https://stackoverflow.com/questions/7776448/sfinae-tried-with-bool-gives-compiler-error-template-argument-tvalue-invol + */ + +template <typename T, typename> +struct _hb_null_size : hb_integral_constant<unsigned, sizeof (T)> {}; +template <typename T> +struct _hb_null_size<T, hb_void_t<decltype (T::min_size)>> : hb_integral_constant<unsigned, T::null_size> {}; + +template <typename T> +using hb_null_size = _hb_null_size<T, void>; +#define hb_null_size(T) hb_null_size<T>::value + +/* These doesn't belong here, but since is copy/paste from above, put it here. */ + +/* hb_static_size (T) + * Returns T::static_size if T::min_size is defined, or sizeof (T) otherwise. */ + +template <typename T, typename> +struct _hb_static_size : hb_integral_constant<unsigned, sizeof (T)> {}; +template <typename T> +struct _hb_static_size<T, hb_void_t<decltype (T::min_size)>> : hb_integral_constant<unsigned, T::static_size> {}; +template <typename T> +using hb_static_size = _hb_static_size<T, void>; +#define hb_static_size(T) hb_static_size<T>::value + + +/* + * Null() + */ + +extern HB_INTERNAL +uint64_t const _hb_NullPool[(HB_NULL_POOL_SIZE + sizeof (uint64_t) - 1) / sizeof (uint64_t)]; + +/* Generic nul-content Null objects. */ +template <typename Type> +struct Null { + static Type const & get_null () + { + static_assert (hb_null_size (Type) <= HB_NULL_POOL_SIZE, "Increase HB_NULL_POOL_SIZE."); + return *reinterpret_cast<Type const *> (_hb_NullPool); + } +}; +template <typename QType> +struct NullHelper +{ + typedef hb_remove_const<hb_remove_reference<QType>> Type; + static const Type & get_null () { return Null<Type>::get_null (); } +}; +#define Null(Type) NullHelper<Type>::get_null () + +/* Specializations for arbitrary-content Null objects expressed in bytes. */ +#define DECLARE_NULL_NAMESPACE_BYTES(Namespace, Type) \ + } /* Close namespace. */ \ + extern HB_INTERNAL const unsigned char _hb_Null_##Namespace##_##Type[Namespace::Type::null_size]; \ + template <> \ + struct Null<Namespace::Type> { \ + static Namespace::Type const & get_null () { \ + return *reinterpret_cast<const Namespace::Type *> (_hb_Null_##Namespace##_##Type); \ + } \ + }; \ + namespace Namespace { \ + static_assert (true, "") /* Require semicolon after. */ +#define DEFINE_NULL_NAMESPACE_BYTES(Namespace, Type) \ + const unsigned char _hb_Null_##Namespace##_##Type[Namespace::Type::null_size] + +/* Specializations for arbitrary-content Null objects expressed as struct initializer. */ +#define DECLARE_NULL_INSTANCE(Type) \ + extern HB_INTERNAL const Type _hb_Null_##Type; \ + template <> \ + struct Null<Type> { \ + static Type const & get_null () { \ + return _hb_Null_##Type; \ + } \ + }; \ + static_assert (true, "") /* Require semicolon after. */ +#define DEFINE_NULL_INSTANCE(Type) \ + const Type _hb_Null_##Type + +/* Global writable pool. Enlarge as necessary. */ + +/* To be fully correct, CrapPool must be thread_local. However, we do not rely on CrapPool + * for correct operation. It only exist to catch and divert program logic bugs instead of + * causing bad memory access. So, races there are not actually introducing incorrectness + * in the code. Has ~12kb binary size overhead to have it, also clang build fails with it. */ +extern HB_INTERNAL +/*thread_local*/ uint64_t _hb_CrapPool[(HB_NULL_POOL_SIZE + sizeof (uint64_t) - 1) / sizeof (uint64_t)]; + +/* CRAP pool: Common Region for Access Protection. */ +template <typename Type> +static inline Type& Crap () { + static_assert (hb_null_size (Type) <= HB_NULL_POOL_SIZE, "Increase HB_NULL_POOL_SIZE."); + Type *obj = reinterpret_cast<Type *> (_hb_CrapPool); + memcpy (obj, &Null (Type), sizeof (*obj)); + return *obj; +} +template <typename QType> +struct CrapHelper +{ + typedef hb_remove_const<hb_remove_reference<QType>> Type; + static Type & get_crap () { return Crap<Type> (); } +}; +#define Crap(Type) CrapHelper<Type>::get_crap () + +template <typename Type> +struct CrapOrNullHelper { + static Type & get () { return Crap (Type); } +}; +template <typename Type> +struct CrapOrNullHelper<const Type> { + static const Type & get () { return Null (Type); } +}; +#define CrapOrNull(Type) CrapOrNullHelper<Type>::get () + + +/* + * hb_nonnull_ptr_t + */ + +template <typename P> +struct hb_nonnull_ptr_t +{ + typedef hb_remove_pointer<P> T; + + hb_nonnull_ptr_t (T *v_ = nullptr) : v (v_) {} + T * operator = (T *v_) { return v = v_; } + T * operator -> () const { return get (); } + T & operator * () const { return *get (); } + T ** operator & () const { return &v; } + /* Only auto-cast to const types. */ + template <typename C> operator const C * () const { return get (); } + operator const char * () const { return (const char *) get (); } + T * get () const { return v ? v : const_cast<T *> (&Null (T)); } + T * get_raw () const { return v; } + + T *v; +}; + + +#endif /* HB_NULL_HH */ diff --git a/thirdparty/harfbuzz/src/hb-number-parser.hh b/thirdparty/harfbuzz/src/hb-number-parser.hh new file mode 100644 index 0000000000..1a9dbba6dd --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-number-parser.hh @@ -0,0 +1,237 @@ + +#line 1 "hb-number-parser.rl" +/* + * Copyright © 2019 Ebrahim Byagowi + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + */ + +#ifndef HB_NUMBER_PARSER_HH +#define HB_NUMBER_PARSER_HH + +#include "hb.hh" + + +#line 35 "hb-number-parser.hh" +static const unsigned char _double_parser_trans_keys[] = { + 0u, 0u, 43u, 57u, 46u, 57u, 48u, 57u, 43u, 57u, 48u, 57u, 48u, 101u, 48u, 57u, + 46u, 101u, 0 +}; + +static const char _double_parser_key_spans[] = { + 0, 15, 12, 10, 15, 10, 54, 10, + 56 +}; + +static const unsigned char _double_parser_index_offsets[] = { + 0, 0, 16, 29, 40, 56, 67, 122, + 133 +}; + +static const char _double_parser_indicies[] = { + 0, 1, 2, 3, 1, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, + 1, 3, 1, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 1, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, + 1, 6, 1, 7, 1, 1, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 1, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 1, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 9, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 9, 1, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 1, 3, 1, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 9, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 9, 1, 0 +}; + +static const char _double_parser_trans_targs[] = { + 2, 0, 2, 3, 8, 6, 5, 5, + 7, 4 +}; + +static const char _double_parser_trans_actions[] = { + 0, 0, 1, 0, 2, 3, 0, 4, + 5, 0 +}; + +static const int double_parser_start = 1; +static const int double_parser_first_final = 6; +static const int double_parser_error = 0; + +static const int double_parser_en_main = 1; + + +#line 68 "hb-number-parser.rl" + + +/* Works only for n < 512 */ +static inline double +_pow10 (unsigned exponent) +{ + static const double _powers_of_10[] = + { + 1.0e+256, + 1.0e+128, + 1.0e+64, + 1.0e+32, + 1.0e+16, + 1.0e+8, + 10000., + 100., + 10. + }; + unsigned mask = 1 << (ARRAY_LENGTH (_powers_of_10) - 1); + double result = 1; + for (const double *power = _powers_of_10; mask; ++power, mask >>= 1) + if (exponent & mask) result *= *power; + return result; +} + +/* a variant of strtod that also gets end of buffer in its second argument */ +static inline double +strtod_rl (const char *p, const char **end_ptr /* IN/OUT */) +{ + double value = 0; + double frac = 0; + double frac_count = 0; + unsigned exp = 0; + bool neg = false, exp_neg = false, exp_overflow = false; + const unsigned long long MAX_FRACT = 0xFFFFFFFFFFFFFull; /* 2^52-1 */ + const unsigned MAX_EXP = 0x7FFu; /* 2^11-1 */ + + const char *pe = *end_ptr; + while (p < pe && ISSPACE (*p)) + p++; + + int cs; + +#line 139 "hb-number-parser.hh" + { + cs = double_parser_start; + } + +#line 144 "hb-number-parser.hh" + { + int _slen; + int _trans; + const unsigned char *_keys; + const char *_inds; + if ( p == pe ) + goto _test_eof; + if ( cs == 0 ) + goto _out; +_resume: + _keys = _double_parser_trans_keys + (cs<<1); + _inds = _double_parser_indicies + _double_parser_index_offsets[cs]; + + _slen = _double_parser_key_spans[cs]; + _trans = _inds[ _slen > 0 && _keys[0] <=(*p) && + (*p) <= _keys[1] ? + (*p) - _keys[0] : _slen ]; + + cs = _double_parser_trans_targs[_trans]; + + if ( _double_parser_trans_actions[_trans] == 0 ) + goto _again; + + switch ( _double_parser_trans_actions[_trans] ) { + case 1: +#line 37 "hb-number-parser.rl" + { neg = true; } + break; + case 4: +#line 38 "hb-number-parser.rl" + { exp_neg = true; } + break; + case 2: +#line 40 "hb-number-parser.rl" + { + value = value * 10. + ((*p) - '0'); +} + break; + case 3: +#line 43 "hb-number-parser.rl" + { + if (likely (frac <= MAX_FRACT / 10)) + { + frac = frac * 10. + ((*p) - '0'); + ++frac_count; + } +} + break; + case 5: +#line 50 "hb-number-parser.rl" + { + if (likely (exp * 10 + ((*p) - '0') <= MAX_EXP)) + exp = exp * 10 + ((*p) - '0'); + else + exp_overflow = true; +} + break; +#line 202 "hb-number-parser.hh" + } + +_again: + if ( cs == 0 ) + goto _out; + if ( ++p != pe ) + goto _resume; + _test_eof: {} + _out: {} + } + +#line 113 "hb-number-parser.rl" + + + *end_ptr = p; + + if (frac_count) value += frac / _pow10 (frac_count); + if (neg) value *= -1.; + + if (unlikely (exp_overflow)) + { + if (value == 0) return value; + if (exp_neg) return neg ? -DBL_MIN : DBL_MIN; + else return neg ? -DBL_MAX : DBL_MAX; + } + + if (exp) + { + if (exp_neg) value /= _pow10 (exp); + else value *= _pow10 (exp); + } + + return value; +} + +#endif /* HB_NUMBER_PARSER_HH */ diff --git a/thirdparty/harfbuzz/src/hb-number.cc b/thirdparty/harfbuzz/src/hb-number.cc new file mode 100644 index 0000000000..6e4f3f7ebd --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-number.cc @@ -0,0 +1,80 @@ +/* + * Copyright © 2019 Ebrahim Byagowi + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + */ + +#include "hb.hh" +#include "hb-machinery.hh" +#include "hb-number.hh" +#include "hb-number-parser.hh" + +template<typename T, typename Func> +static bool +_parse_number (const char **pp, const char *end, T *pv, + bool whole_buffer, Func f) +{ + char buf[32]; + unsigned len = hb_min (ARRAY_LENGTH (buf) - 1, (unsigned) (end - *pp)); + strncpy (buf, *pp, len); + buf[len] = '\0'; + + char *p = buf; + char *pend = p; + + errno = 0; + *pv = f (p, &pend); + if (unlikely (errno || p == pend || + /* Check if consumed whole buffer if is requested */ + (whole_buffer && pend - p != end - *pp))) + return false; + + *pp += pend - p; + return true; +} + +bool +hb_parse_int (const char **pp, const char *end, int *pv, bool whole_buffer) +{ + return _parse_number<int> (pp, end, pv, whole_buffer, + [] (const char *p, char **end) + { return strtol (p, end, 10); }); +} + +bool +hb_parse_uint (const char **pp, const char *end, unsigned *pv, + bool whole_buffer, int base) +{ + return _parse_number<unsigned> (pp, end, pv, whole_buffer, + [base] (const char *p, char **end) + { return strtoul (p, end, base); }); +} + +bool +hb_parse_double (const char **pp, const char *end, double *pv, bool whole_buffer) +{ + const char *pend = end; + *pv = strtod_rl (*pp, &pend); + if (unlikely (*pp == pend)) return false; + *pp = pend; + return !whole_buffer || end == pend; +} diff --git a/thirdparty/harfbuzz/src/hb-number.hh b/thirdparty/harfbuzz/src/hb-number.hh new file mode 100644 index 0000000000..14d1260aa3 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-number.hh @@ -0,0 +1,41 @@ +/* + * Copyright © 2019 Ebrahim Byagowi + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + */ + +#ifndef HB_NUMBER_HH +#define HB_NUMBER_HH + +HB_INTERNAL bool +hb_parse_int (const char **pp, const char *end, int *pv, + bool whole_buffer = false); + +HB_INTERNAL bool +hb_parse_uint (const char **pp, const char *end, unsigned int *pv, + bool whole_buffer = false, int base = 10); + +HB_INTERNAL bool +hb_parse_double (const char **pp, const char *end, double *pv, + bool whole_buffer = false); + +#endif /* HB_NUMBER_HH */ diff --git a/thirdparty/harfbuzz/src/hb-object.hh b/thirdparty/harfbuzz/src/hb-object.hh new file mode 100644 index 0000000000..39845a70e7 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-object.hh @@ -0,0 +1,342 @@ +/* + * Copyright © 2007 Chris Wilson + * Copyright © 2009,2010 Red Hat, Inc. + * Copyright © 2011,2012 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Contributor(s): + * Chris Wilson <chris@chris-wilson.co.uk> + * Red Hat Author(s): Behdad Esfahbod + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_OBJECT_HH +#define HB_OBJECT_HH + +#include "hb.hh" +#include "hb-atomic.hh" +#include "hb-mutex.hh" +#include "hb-vector.hh" + + +/* + * Lockable set + */ + +template <typename item_t, typename lock_t> +struct hb_lockable_set_t +{ + hb_vector_t<item_t> items; + + void init () { items.init (); } + + template <typename T> + item_t *replace_or_insert (T v, lock_t &l, bool replace) + { + l.lock (); + item_t *item = items.find (v); + if (item) { + if (replace) { + item_t old = *item; + *item = v; + l.unlock (); + old.fini (); + } + else { + item = nullptr; + l.unlock (); + } + } else { + item = items.push (v); + l.unlock (); + } + return item; + } + + template <typename T> + void remove (T v, lock_t &l) + { + l.lock (); + item_t *item = items.find (v); + if (item) + { + item_t old = *item; + *item = items[items.length - 1]; + items.pop (); + l.unlock (); + old.fini (); + } else { + l.unlock (); + } + } + + template <typename T> + bool find (T v, item_t *i, lock_t &l) + { + l.lock (); + item_t *item = items.find (v); + if (item) + *i = *item; + l.unlock (); + return !!item; + } + + template <typename T> + item_t *find_or_insert (T v, lock_t &l) + { + l.lock (); + item_t *item = items.find (v); + if (!item) { + item = items.push (v); + } + l.unlock (); + return item; + } + + void fini (lock_t &l) + { + if (!items.length) + { + /* No need to lock. */ + items.fini (); + return; + } + l.lock (); + while (items.length) + { + item_t old = items[items.length - 1]; + items.pop (); + l.unlock (); + old.fini (); + l.lock (); + } + items.fini (); + l.unlock (); + } + +}; + + +/* + * Reference-count. + */ + +#define HB_REFERENCE_COUNT_INERT_VALUE 0 +#define HB_REFERENCE_COUNT_POISON_VALUE -0x0000DEAD +#define HB_REFERENCE_COUNT_INIT {HB_ATOMIC_INT_INIT (HB_REFERENCE_COUNT_INERT_VALUE)} + +struct hb_reference_count_t +{ + mutable hb_atomic_int_t ref_count; + + void init (int v = 1) { ref_count.set_relaxed (v); } + int get_relaxed () const { return ref_count.get_relaxed (); } + int inc () const { return ref_count.inc (); } + int dec () const { return ref_count.dec (); } + void fini () { ref_count.set_relaxed (HB_REFERENCE_COUNT_POISON_VALUE); } + + bool is_inert () const { return ref_count.get_relaxed () == HB_REFERENCE_COUNT_INERT_VALUE; } + bool is_valid () const { return ref_count.get_relaxed () > 0; } +}; + + +/* user_data */ + +struct hb_user_data_array_t +{ + struct hb_user_data_item_t { + hb_user_data_key_t *key; + void *data; + hb_destroy_func_t destroy; + + bool operator == (const hb_user_data_key_t *other_key) const { return key == other_key; } + bool operator == (const hb_user_data_item_t &other) const { return key == other.key; } + + void fini () { if (destroy) destroy (data); } + }; + + hb_mutex_t lock; + hb_lockable_set_t<hb_user_data_item_t, hb_mutex_t> items; + + void init () { lock.init (); items.init (); } + + HB_INTERNAL bool set (hb_user_data_key_t *key, + void * data, + hb_destroy_func_t destroy, + hb_bool_t replace); + + HB_INTERNAL void *get (hb_user_data_key_t *key); + + void fini () { items.fini (lock); lock.fini (); } +}; + + +/* + * Object header + */ + +struct hb_object_header_t +{ + hb_reference_count_t ref_count; + mutable hb_atomic_int_t writable; + hb_atomic_ptr_t<hb_user_data_array_t> user_data; +}; +#define HB_OBJECT_HEADER_STATIC \ + { \ + HB_REFERENCE_COUNT_INIT, \ + HB_ATOMIC_INT_INIT (false), \ + HB_ATOMIC_PTR_INIT (nullptr) \ + } + + +/* + * Object + */ + +template <typename Type> +static inline void hb_object_trace (const Type *obj, const char *function) +{ + DEBUG_MSG (OBJECT, (void *) obj, + "%s refcount=%d", + function, + obj ? obj->header.ref_count.get_relaxed () : 0); +} + +template <typename Type> +static inline Type *hb_object_create () +{ + Type *obj = (Type *) calloc (1, sizeof (Type)); + + if (unlikely (!obj)) + return obj; + + hb_object_init (obj); + hb_object_trace (obj, HB_FUNC); + return obj; +} +template <typename Type> +static inline void hb_object_init (Type *obj) +{ + obj->header.ref_count.init (); + obj->header.writable.set_relaxed (true); + obj->header.user_data.init (); +} +template <typename Type> +static inline bool hb_object_is_inert (const Type *obj) +{ + return unlikely (obj->header.ref_count.is_inert ()); +} +template <typename Type> +static inline bool hb_object_is_valid (const Type *obj) +{ + return likely (obj->header.ref_count.is_valid ()); +} +template <typename Type> +static inline bool hb_object_is_immutable (const Type *obj) +{ + return !obj->header.writable.get_relaxed (); +} +template <typename Type> +static inline void hb_object_make_immutable (const Type *obj) +{ + obj->header.writable.set_relaxed (false); +} +template <typename Type> +static inline Type *hb_object_reference (Type *obj) +{ + hb_object_trace (obj, HB_FUNC); + if (unlikely (!obj || hb_object_is_inert (obj))) + return obj; + assert (hb_object_is_valid (obj)); + obj->header.ref_count.inc (); + return obj; +} +template <typename Type> +static inline bool hb_object_destroy (Type *obj) +{ + hb_object_trace (obj, HB_FUNC); + if (unlikely (!obj || hb_object_is_inert (obj))) + return false; + assert (hb_object_is_valid (obj)); + if (obj->header.ref_count.dec () != 1) + return false; + + hb_object_fini (obj); + return true; +} +template <typename Type> +static inline void hb_object_fini (Type *obj) +{ + obj->header.ref_count.fini (); /* Do this before user_data */ + hb_user_data_array_t *user_data = obj->header.user_data.get (); + if (user_data) + { + user_data->fini (); + free (user_data); + user_data = nullptr; + } +} +template <typename Type> +static inline bool hb_object_set_user_data (Type *obj, + hb_user_data_key_t *key, + void * data, + hb_destroy_func_t destroy, + hb_bool_t replace) +{ + if (unlikely (!obj || hb_object_is_inert (obj))) + return false; + assert (hb_object_is_valid (obj)); + +retry: + hb_user_data_array_t *user_data = obj->header.user_data.get (); + if (unlikely (!user_data)) + { + user_data = (hb_user_data_array_t *) calloc (sizeof (hb_user_data_array_t), 1); + if (unlikely (!user_data)) + return false; + user_data->init (); + if (unlikely (!obj->header.user_data.cmpexch (nullptr, user_data))) + { + user_data->fini (); + free (user_data); + goto retry; + } + } + + return user_data->set (key, data, destroy, replace); +} + +template <typename Type> +static inline void *hb_object_get_user_data (Type *obj, + hb_user_data_key_t *key) +{ + if (unlikely (!obj || hb_object_is_inert (obj))) + return nullptr; + assert (hb_object_is_valid (obj)); + hb_user_data_array_t *user_data = obj->header.user_data.get (); + if (!user_data) + return nullptr; + return user_data->get (key); +} + + +#endif /* HB_OBJECT_HH */ diff --git a/thirdparty/harfbuzz/src/hb-open-file.hh b/thirdparty/harfbuzz/src/hb-open-file.hh new file mode 100644 index 0000000000..ac13dd23c3 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-open-file.hh @@ -0,0 +1,521 @@ +/* + * Copyright © 2007,2008,2009 Red Hat, Inc. + * Copyright © 2012 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Red Hat Author(s): Behdad Esfahbod + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_OPEN_FILE_HH +#define HB_OPEN_FILE_HH + +#include "hb-open-type.hh" +#include "hb-ot-head-table.hh" + + +namespace OT { + + +/* + * + * The OpenType Font File + * + */ + + +/* + * Organization of an OpenType Font + */ + +struct OpenTypeFontFile; +struct OffsetTable; +struct TTCHeader; + + +typedef struct TableRecord +{ + int cmp (Tag t) const { return -t.cmp (tag); } + + HB_INTERNAL static int cmp (const void *pa, const void *pb) + { + const TableRecord *a = (const TableRecord *) pa; + const TableRecord *b = (const TableRecord *) pb; + return b->cmp (a->tag); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this)); + } + + Tag tag; /* 4-byte identifier. */ + CheckSum checkSum; /* CheckSum for this table. */ + Offset32 offset; /* Offset from beginning of TrueType font + * file. */ + HBUINT32 length; /* Length of this table. */ + public: + DEFINE_SIZE_STATIC (16); +} OpenTypeTable; + +typedef struct OffsetTable +{ + friend struct OpenTypeFontFile; + + unsigned int get_table_count () const { return tables.len; } + const TableRecord& get_table (unsigned int i) const + { return tables[i]; } + unsigned int get_table_tags (unsigned int start_offset, + unsigned int *table_count, /* IN/OUT */ + hb_tag_t *table_tags /* OUT */) const + { + if (table_count) + { + + tables.sub_array (start_offset, table_count) + | hb_map (&TableRecord::tag) + | hb_sink (hb_array (table_tags, *table_count)) + ; + } + return tables.len; + } + bool find_table_index (hb_tag_t tag, unsigned int *table_index) const + { + Tag t; + t = tag; + return tables.bfind (t, table_index, HB_BFIND_NOT_FOUND_STORE, Index::NOT_FOUND_INDEX); + } + const TableRecord& get_table_by_tag (hb_tag_t tag) const + { + unsigned int table_index; + find_table_index (tag, &table_index); + return get_table (table_index); + } + + public: + + template <typename item_t> + bool serialize (hb_serialize_context_t *c, + hb_tag_t sfnt_tag, + hb_array_t<item_t> items) + { + TRACE_SERIALIZE (this); + /* Alloc 12 for the OTHeader. */ + if (unlikely (!c->extend_min (*this))) return_trace (false); + /* Write sfntVersion (bytes 0..3). */ + sfnt_version = sfnt_tag; + /* Take space for numTables, searchRange, entrySelector, RangeShift + * and the TableRecords themselves. */ + if (unlikely (!tables.serialize (c, items.length))) return_trace (false); + + const char *dir_end = (const char *) c->head; + HBUINT32 *checksum_adjustment = nullptr; + + /* Write OffsetTables, alloc for and write actual table blobs. */ + for (unsigned int i = 0; i < tables.len; i++) + { + TableRecord &rec = tables.arrayZ[i]; + hb_blob_t *blob = items[i].blob; + rec.tag = items[i].tag; + rec.length = blob->length; + rec.offset.serialize (c, this); + + /* Allocate room for the table and copy it. */ + char *start = (char *) c->allocate_size<void> (rec.length); + if (unlikely (!start)) return false; + + if (likely (rec.length)) + memcpy (start, blob->data, rec.length); + + /* 4-byte alignment. */ + c->align (4); + const char *end = (const char *) c->head; + + if (items[i].tag == HB_OT_TAG_head && + (unsigned) (end - start) >= head::static_size) + { + head *h = (head *) start; + checksum_adjustment = &h->checkSumAdjustment; + *checksum_adjustment = 0; + } + + rec.checkSum.set_for_data (start, end - start); + } + + tables.qsort (); + + if (checksum_adjustment) + { + CheckSum checksum; + + /* The following line is a slower version of the following block. */ + //checksum.set_for_data (this, (const char *) c->head - (const char *) this); + checksum.set_for_data (this, dir_end - (const char *) this); + for (unsigned int i = 0; i < items.length; i++) + { + TableRecord &rec = tables.arrayZ[i]; + checksum = checksum + rec.checkSum; + } + + *checksum_adjustment = 0xB1B0AFBAu - checksum; + } + + return_trace (true); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && tables.sanitize (c)); + } + + protected: + Tag sfnt_version; /* '\0\001\0\00' if TrueType / 'OTTO' if CFF */ + BinSearchArrayOf<TableRecord> + tables; + public: + DEFINE_SIZE_ARRAY (12, tables); +} OpenTypeFontFace; + + +/* + * TrueType Collections + */ + +struct TTCHeaderVersion1 +{ + friend struct TTCHeader; + + unsigned int get_face_count () const { return table.len; } + const OpenTypeFontFace& get_face (unsigned int i) const { return this+table[i]; } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (table.sanitize (c, this)); + } + + protected: + Tag ttcTag; /* TrueType Collection ID string: 'ttcf' */ + FixedVersion<>version; /* Version of the TTC Header (1.0), + * 0x00010000u */ + LArrayOf<LOffsetTo<OffsetTable>> + table; /* Array of offsets to the OffsetTable for each font + * from the beginning of the file */ + public: + DEFINE_SIZE_ARRAY (12, table); +}; + +struct TTCHeader +{ + friend struct OpenTypeFontFile; + + private: + + unsigned int get_face_count () const + { + switch (u.header.version.major) { + case 2: /* version 2 is compatible with version 1 */ + case 1: return u.version1.get_face_count (); + default:return 0; + } + } + const OpenTypeFontFace& get_face (unsigned int i) const + { + switch (u.header.version.major) { + case 2: /* version 2 is compatible with version 1 */ + case 1: return u.version1.get_face (i); + default:return Null (OpenTypeFontFace); + } + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + if (unlikely (!u.header.version.sanitize (c))) return_trace (false); + switch (u.header.version.major) { + case 2: /* version 2 is compatible with version 1 */ + case 1: return_trace (u.version1.sanitize (c)); + default:return_trace (true); + } + } + + protected: + union { + struct { + Tag ttcTag; /* TrueType Collection ID string: 'ttcf' */ + FixedVersion<>version; /* Version of the TTC Header (1.0 or 2.0), + * 0x00010000u or 0x00020000u */ + } header; + TTCHeaderVersion1 version1; + } u; +}; + +/* + * Mac Resource Fork + * + * http://mirror.informatimago.com/next/developer.apple.com/documentation/mac/MoreToolbox/MoreToolbox-99.html + */ + +struct ResourceRecord +{ + const OpenTypeFontFace & get_face (const void *data_base) const + { return * reinterpret_cast<const OpenTypeFontFace *> ((data_base+offset).arrayZ); } + + bool sanitize (hb_sanitize_context_t *c, + const void *data_base) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && + offset.sanitize (c, data_base) && + get_face (data_base).sanitize (c)); + } + + protected: + HBUINT16 id; /* Resource ID. */ + HBINT16 nameOffset; /* Offset from beginning of resource name list + * to resource name, -1 means there is none. */ + HBUINT8 attrs; /* Resource attributes */ + NNOffsetTo<LArrayOf<HBUINT8>, HBUINT24> + offset; /* Offset from beginning of data block to + * data for this resource */ + HBUINT32 reserved; /* Reserved for handle to resource */ + public: + DEFINE_SIZE_STATIC (12); +}; + +#define HB_TAG_sfnt HB_TAG ('s','f','n','t') + +struct ResourceTypeRecord +{ + unsigned int get_resource_count () const + { return tag == HB_TAG_sfnt ? resCountM1 + 1 : 0; } + + bool is_sfnt () const { return tag == HB_TAG_sfnt; } + + const ResourceRecord& get_resource_record (unsigned int i, + const void *type_base) const + { return (type_base+resourcesZ).as_array (get_resource_count ())[i]; } + + bool sanitize (hb_sanitize_context_t *c, + const void *type_base, + const void *data_base) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && + resourcesZ.sanitize (c, type_base, + get_resource_count (), + data_base)); + } + + protected: + Tag tag; /* Resource type. */ + HBUINT16 resCountM1; /* Number of resources minus 1. */ + NNOffsetTo<UnsizedArrayOf<ResourceRecord>> + resourcesZ; /* Offset from beginning of resource type list + * to reference item list for this type. */ + public: + DEFINE_SIZE_STATIC (8); +}; + +struct ResourceMap +{ + unsigned int get_face_count () const + { + unsigned int count = get_type_count (); + for (unsigned int i = 0; i < count; i++) + { + const ResourceTypeRecord& type = get_type_record (i); + if (type.is_sfnt ()) + return type.get_resource_count (); + } + return 0; + } + + const OpenTypeFontFace& get_face (unsigned int idx, + const void *data_base) const + { + unsigned int count = get_type_count (); + for (unsigned int i = 0; i < count; i++) + { + const ResourceTypeRecord& type = get_type_record (i); + /* The check for idx < count is here because ResourceRecord is NOT null-safe. + * Because an offset of 0 there does NOT mean null. */ + if (type.is_sfnt () && idx < type.get_resource_count ()) + return type.get_resource_record (idx, &(this+typeList)).get_face (data_base); + } + return Null (OpenTypeFontFace); + } + + bool sanitize (hb_sanitize_context_t *c, const void *data_base) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && + typeList.sanitize (c, this, + &(this+typeList), + data_base)); + } + + private: + unsigned int get_type_count () const { return (this+typeList).lenM1 + 1; } + + const ResourceTypeRecord& get_type_record (unsigned int i) const + { return (this+typeList)[i]; } + + protected: + HBUINT8 reserved0[16]; /* Reserved for copy of resource header */ + HBUINT32 reserved1; /* Reserved for handle to next resource map */ + HBUINT16 resreved2; /* Reserved for file reference number */ + HBUINT16 attrs; /* Resource fork attribute */ + NNOffsetTo<ArrayOfM1<ResourceTypeRecord>> + typeList; /* Offset from beginning of map to + * resource type list */ + Offset16 nameList; /* Offset from beginning of map to + * resource name list */ + public: + DEFINE_SIZE_STATIC (28); +}; + +struct ResourceForkHeader +{ + unsigned int get_face_count () const + { return (this+map).get_face_count (); } + + const OpenTypeFontFace& get_face (unsigned int idx, + unsigned int *base_offset = nullptr) const + { + const OpenTypeFontFace &face = (this+map).get_face (idx, &(this+data)); + if (base_offset) + *base_offset = (const char *) &face - (const char *) this; + return face; + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && + data.sanitize (c, this, dataLen) && + map.sanitize (c, this, &(this+data))); + } + + protected: + LNNOffsetTo<UnsizedArrayOf<HBUINT8>> + data; /* Offset from beginning of resource fork + * to resource data */ + LNNOffsetTo<ResourceMap > + map; /* Offset from beginning of resource fork + * to resource map */ + HBUINT32 dataLen; /* Length of resource data */ + HBUINT32 mapLen; /* Length of resource map */ + public: + DEFINE_SIZE_STATIC (16); +}; + +/* + * OpenType Font File + */ + +struct OpenTypeFontFile +{ + enum { + CFFTag = HB_TAG ('O','T','T','O'), /* OpenType with Postscript outlines */ + TrueTypeTag = HB_TAG ( 0 , 1 , 0 , 0 ), /* OpenType with TrueType outlines */ + TTCTag = HB_TAG ('t','t','c','f'), /* TrueType Collection */ + DFontTag = HB_TAG ( 0 , 0 , 1 , 0 ), /* DFont Mac Resource Fork */ + TrueTag = HB_TAG ('t','r','u','e'), /* Obsolete Apple TrueType */ + Typ1Tag = HB_TAG ('t','y','p','1') /* Obsolete Apple Type1 font in SFNT container */ + }; + + hb_tag_t get_tag () const { return u.tag; } + + unsigned int get_face_count () const + { + switch (u.tag) { + case CFFTag: /* All the non-collection tags */ + case TrueTag: + case Typ1Tag: + case TrueTypeTag: return 1; + case TTCTag: return u.ttcHeader.get_face_count (); + case DFontTag: return u.rfHeader.get_face_count (); + default: return 0; + } + } + const OpenTypeFontFace& get_face (unsigned int i, unsigned int *base_offset = nullptr) const + { + if (base_offset) + *base_offset = 0; + switch (u.tag) { + /* Note: for non-collection SFNT data we ignore index. This is because + * Apple dfont container is a container of SFNT's. So each SFNT is a + * non-TTC, but the index is more than zero. */ + case CFFTag: /* All the non-collection tags */ + case TrueTag: + case Typ1Tag: + case TrueTypeTag: return u.fontFace; + case TTCTag: return u.ttcHeader.get_face (i); + case DFontTag: return u.rfHeader.get_face (i, base_offset); + default: return Null (OpenTypeFontFace); + } + } + + template <typename item_t> + bool serialize_single (hb_serialize_context_t *c, + hb_tag_t sfnt_tag, + hb_array_t<item_t> items) + { + TRACE_SERIALIZE (this); + assert (sfnt_tag != TTCTag); + if (unlikely (!c->extend_min (*this))) return_trace (false); + return_trace (u.fontFace.serialize (c, sfnt_tag, items)); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + if (unlikely (!u.tag.sanitize (c))) return_trace (false); + switch (u.tag) { + case CFFTag: /* All the non-collection tags */ + case TrueTag: + case Typ1Tag: + case TrueTypeTag: return_trace (u.fontFace.sanitize (c)); + case TTCTag: return_trace (u.ttcHeader.sanitize (c)); + case DFontTag: return_trace (u.rfHeader.sanitize (c)); + default: return_trace (true); + } + } + + protected: + union { + Tag tag; /* 4-byte identifier. */ + OpenTypeFontFace fontFace; + TTCHeader ttcHeader; + ResourceForkHeader rfHeader; + } u; + public: + DEFINE_SIZE_UNION (4, tag); +}; + + +} /* namespace OT */ + + +#endif /* HB_OPEN_FILE_HH */ diff --git a/thirdparty/harfbuzz/src/hb-open-type.hh b/thirdparty/harfbuzz/src/hb-open-type.hh new file mode 100644 index 0000000000..50558cf8d3 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-open-type.hh @@ -0,0 +1,1078 @@ +/* + * Copyright © 2007,2008,2009,2010 Red Hat, Inc. + * Copyright © 2012 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Red Hat Author(s): Behdad Esfahbod + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_OPEN_TYPE_HH +#define HB_OPEN_TYPE_HH + +#include "hb.hh" +#include "hb-blob.hh" +#include "hb-face.hh" +#include "hb-machinery.hh" +#include "hb-subset.hh" + + +namespace OT { + + +/* + * + * The OpenType Font File: Data Types + */ + + +/* "The following data types are used in the OpenType font file. + * All OpenType fonts use Motorola-style byte ordering (Big Endian):" */ + +/* + * Int types + */ + +/* Integer types in big-endian order and no alignment requirement */ +template <typename Type, unsigned int Size> +struct IntType +{ + typedef Type type; + typedef hb_conditional<hb_is_signed (Type), signed, unsigned> wide_type; + + IntType& operator = (wide_type i) { v = i; return *this; } + operator wide_type () const { return v; } + bool operator == (const IntType &o) const { return (Type) v == (Type) o.v; } + bool operator != (const IntType &o) const { return !(*this == o); } + + IntType& operator += (unsigned count) { *this = *this + count; return *this; } + IntType& operator -= (unsigned count) { *this = *this - count; return *this; } + IntType& operator ++ () { *this += 1; return *this; } + IntType& operator -- () { *this -= 1; return *this; } + IntType operator ++ (int) { IntType c (*this); ++*this; return c; } + IntType operator -- (int) { IntType c (*this); --*this; return c; } + + HB_INTERNAL static int cmp (const IntType *a, const IntType *b) + { return b->cmp (*a); } + HB_INTERNAL static int cmp (const void *a, const void *b) + { + IntType *pa = (IntType *) a; + IntType *pb = (IntType *) b; + + return pb->cmp (*pa); + } + template <typename Type2> + int cmp (Type2 a) const + { + Type b = v; + if (sizeof (Type) < sizeof (int) && sizeof (Type2) < sizeof (int)) + return (int) a - (int) b; + else + return a < b ? -1 : a == b ? 0 : +1; + } + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (likely (c->check_struct (this))); + } + protected: + BEInt<Type, Size> v; + public: + DEFINE_SIZE_STATIC (Size); +}; + +typedef IntType<uint8_t, 1> HBUINT8; /* 8-bit unsigned integer. */ +typedef IntType<int8_t, 1> HBINT8; /* 8-bit signed integer. */ +typedef IntType<uint16_t, 2> HBUINT16; /* 16-bit unsigned integer. */ +typedef IntType<int16_t, 2> HBINT16; /* 16-bit signed integer. */ +typedef IntType<uint32_t, 4> HBUINT32; /* 32-bit unsigned integer. */ +typedef IntType<int32_t, 4> HBINT32; /* 32-bit signed integer. */ +/* Note: we cannot defined a signed HBINT24 because there's no corresponding C type. + * Works for unsigned, but not signed, since we rely on compiler for sign-extension. */ +typedef IntType<uint32_t, 3> HBUINT24; /* 24-bit unsigned integer. */ + +/* 16-bit signed integer (HBINT16) that describes a quantity in FUnits. */ +typedef HBINT16 FWORD; + +/* 32-bit signed integer (HBINT32) that describes a quantity in FUnits. */ +typedef HBINT32 FWORD32; + +/* 16-bit unsigned integer (HBUINT16) that describes a quantity in FUnits. */ +typedef HBUINT16 UFWORD; + +/* 16-bit signed fixed number with the low 14 bits of fraction (2.14). */ +struct F2DOT14 : HBINT16 +{ + F2DOT14& operator = (uint16_t i ) { HBINT16::operator= (i); return *this; } + // 16384 means 1<<14 + float to_float () const { return ((int32_t) v) / 16384.f; } + void set_float (float f) { v = roundf (f * 16384.f); } + public: + DEFINE_SIZE_STATIC (2); +}; + +/* 32-bit signed fixed-point number (16.16). */ +struct HBFixed : HBINT32 +{ + HBFixed& operator = (uint32_t i) { HBINT32::operator= (i); return *this; } + // 65536 means 1<<16 + float to_float () const { return ((int32_t) v) / 65536.f; } + void set_float (float f) { v = roundf (f * 65536.f); } + public: + DEFINE_SIZE_STATIC (4); +}; + +/* Date represented in number of seconds since 12:00 midnight, January 1, + * 1904. The value is represented as a signed 64-bit integer. */ +struct LONGDATETIME +{ + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (likely (c->check_struct (this))); + } + protected: + HBINT32 major; + HBUINT32 minor; + public: + DEFINE_SIZE_STATIC (8); +}; + +/* Array of four uint8s (length = 32 bits) used to identify a script, language + * system, feature, or baseline */ +struct Tag : HBUINT32 +{ + Tag& operator = (hb_tag_t i) { HBUINT32::operator= (i); return *this; } + /* What the char* converters return is NOT nul-terminated. Print using "%.4s" */ + operator const char* () const { return reinterpret_cast<const char *> (&this->v); } + operator char* () { return reinterpret_cast<char *> (&this->v); } + public: + DEFINE_SIZE_STATIC (4); +}; + +/* Glyph index number, same as uint16 (length = 16 bits) */ +struct HBGlyphID : HBUINT16 +{ + HBGlyphID& operator = (uint16_t i) { HBUINT16::operator= (i); return *this; } +}; + +/* Script/language-system/feature index */ +struct Index : HBUINT16 { + static constexpr unsigned NOT_FOUND_INDEX = 0xFFFFu; + Index& operator = (uint16_t i) { HBUINT16::operator= (i); return *this; } +}; +DECLARE_NULL_NAMESPACE_BYTES (OT, Index); + +typedef Index NameID; + +/* Offset, Null offset = 0 */ +template <typename Type, bool has_null=true> +struct Offset : Type +{ + Offset& operator = (typename Type::type i) { Type::operator= (i); return *this; } + + typedef Type type; + + bool is_null () const { return has_null && 0 == *this; } + + void *serialize (hb_serialize_context_t *c, const void *base) + { + void *t = c->start_embed<void> (); + c->check_assign (*this, (unsigned) ((char *) t - (char *) base)); + return t; + } + + public: + DEFINE_SIZE_STATIC (sizeof (Type)); +}; + +typedef Offset<HBUINT16> Offset16; +typedef Offset<HBUINT32> Offset32; + + +/* CheckSum */ +struct CheckSum : HBUINT32 +{ + CheckSum& operator = (uint32_t i) { HBUINT32::operator= (i); return *this; } + + /* This is reference implementation from the spec. */ + static uint32_t CalcTableChecksum (const HBUINT32 *Table, uint32_t Length) + { + uint32_t Sum = 0L; + assert (0 == (Length & 3)); + const HBUINT32 *EndPtr = Table + Length / HBUINT32::static_size; + + while (Table < EndPtr) + Sum += *Table++; + return Sum; + } + + /* Note: data should be 4byte aligned and have 4byte padding at the end. */ + void set_for_data (const void *data, unsigned int length) + { *this = CalcTableChecksum ((const HBUINT32 *) data, length); } + + public: + DEFINE_SIZE_STATIC (4); +}; + + +/* + * Version Numbers + */ + +template <typename FixedType=HBUINT16> +struct FixedVersion +{ + uint32_t to_int () const { return (major << (sizeof (FixedType) * 8)) + minor; } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this)); + } + + FixedType major; + FixedType minor; + public: + DEFINE_SIZE_STATIC (2 * sizeof (FixedType)); +}; + + +/* + * Template subclasses of Offset that do the dereferencing. + * Use: (base+offset) + */ + +template <typename Type, bool has_null> +struct _hb_has_null +{ + static const Type *get_null () { return nullptr; } + static Type *get_crap () { return nullptr; } +}; +template <typename Type> +struct _hb_has_null<Type, true> +{ + static const Type *get_null () { return &Null (Type); } + static Type *get_crap () { return &Crap (Type); } +}; + +template <typename Type, typename OffsetType=HBUINT16, bool has_null=true> +struct OffsetTo : Offset<OffsetType, has_null> +{ + HB_DELETE_COPY_ASSIGN (OffsetTo); + OffsetTo () = default; + + OffsetTo& operator = (typename OffsetType::type i) { OffsetType::operator= (i); return *this; } + + const Type& operator () (const void *base) const + { + if (unlikely (this->is_null ())) return *_hb_has_null<Type, has_null>::get_null (); + return StructAtOffset<const Type> (base, *this); + } + Type& operator () (void *base) const + { + if (unlikely (this->is_null ())) return *_hb_has_null<Type, has_null>::get_crap (); + return StructAtOffset<Type> (base, *this); + } + + template <typename Base, + hb_enable_if (hb_is_convertible (const Base, const void *))> + friend const Type& operator + (const Base &base, const OffsetTo &offset) { return offset ((const void *) base); } + template <typename Base, + hb_enable_if (hb_is_convertible (const Base, const void *))> + friend const Type& operator + (const OffsetTo &offset, const Base &base) { return offset ((const void *) base); } + template <typename Base, + hb_enable_if (hb_is_convertible (Base, void *))> + friend Type& operator + (Base &&base, OffsetTo &offset) { return offset ((void *) base); } + template <typename Base, + hb_enable_if (hb_is_convertible (Base, void *))> + friend Type& operator + (OffsetTo &offset, Base &&base) { return offset ((void *) base); } + + Type& serialize (hb_serialize_context_t *c, const void *base) + { + return * (Type *) Offset<OffsetType>::serialize (c, base); + } + + template <typename ...Ts> + bool serialize_subset (hb_subset_context_t *c, const OffsetTo& src, + const void *src_base, Ts&&... ds) + { + *this = 0; + if (src.is_null ()) + return false; + + auto *s = c->serializer; + + s->push (); + + bool ret = c->dispatch (src_base+src, hb_forward<Ts> (ds)...); + + if (ret || !has_null) + s->add_link (*this, s->pop_pack ()); + else + s->pop_discard (); + + return ret; + } + + /* TODO: Somehow merge this with previous function into a serialize_dispatch(). */ + /* Workaround clang bug: https://bugs.llvm.org/show_bug.cgi?id=23029 + * Can't compile: whence = hb_serialize_context_t::Head followed by Ts&&... + */ + template <typename ...Ts> + bool serialize_copy (hb_serialize_context_t *c, const OffsetTo& src, + const void *src_base, unsigned dst_bias, + hb_serialize_context_t::whence_t whence, + Ts&&... ds) + { + *this = 0; + if (src.is_null ()) + return false; + + c->push (); + + bool ret = c->copy (src_base+src, hb_forward<Ts> (ds)...); + + c->add_link (*this, c->pop_pack (), whence, dst_bias); + + return ret; + } + + bool serialize_copy (hb_serialize_context_t *c, const OffsetTo& src, + const void *src_base, unsigned dst_bias = 0) + { return serialize_copy (c, src, src_base, dst_bias, hb_serialize_context_t::Head); } + + bool sanitize_shallow (hb_sanitize_context_t *c, const void *base) const + { + TRACE_SANITIZE (this); + if (unlikely (!c->check_struct (this))) return_trace (false); + if (unlikely (this->is_null ())) return_trace (true); + if (unlikely (!c->check_range (base, *this))) return_trace (false); + return_trace (true); + } + + template <typename ...Ts> + bool sanitize (hb_sanitize_context_t *c, const void *base, Ts&&... ds) const + { + TRACE_SANITIZE (this); + return_trace (sanitize_shallow (c, base) && + (this->is_null () || + c->dispatch (StructAtOffset<Type> (base, *this), hb_forward<Ts> (ds)...) || + neuter (c))); + } + + /* Set the offset to Null */ + bool neuter (hb_sanitize_context_t *c) const + { + if (!has_null) return false; + return c->try_set (this, 0); + } + DEFINE_SIZE_STATIC (sizeof (OffsetType)); +}; +/* Partial specializations. */ +template <typename Type, bool has_null=true> +using LOffsetTo = OffsetTo<Type, HBUINT32, has_null>; +template <typename Type, typename OffsetType=HBUINT16> +using NNOffsetTo = OffsetTo<Type, OffsetType, false>; +template <typename Type> +using LNNOffsetTo = LOffsetTo<Type, false>; + + +/* + * Array Types + */ + +template <typename Type> +struct UnsizedArrayOf +{ + typedef Type item_t; + static constexpr unsigned item_size = hb_static_size (Type); + + HB_DELETE_CREATE_COPY_ASSIGN (UnsizedArrayOf); + + const Type& operator [] (int i_) const + { + unsigned int i = (unsigned int) i_; + const Type *p = &arrayZ[i]; + if (unlikely (p < arrayZ)) return Null (Type); /* Overflowed. */ + return *p; + } + Type& operator [] (int i_) + { + unsigned int i = (unsigned int) i_; + Type *p = &arrayZ[i]; + if (unlikely (p < arrayZ)) return Crap (Type); /* Overflowed. */ + return *p; + } + + unsigned int get_size (unsigned int len) const + { return len * Type::static_size; } + + template <typename T> operator T * () { return arrayZ; } + template <typename T> operator const T * () const { return arrayZ; } + hb_array_t<Type> as_array (unsigned int len) + { return hb_array (arrayZ, len); } + hb_array_t<const Type> as_array (unsigned int len) const + { return hb_array (arrayZ, len); } + operator hb_array_t< Type> () { return as_array (); } + operator hb_array_t<const Type> () const { return as_array (); } + + template <typename T> + Type &lsearch (unsigned int len, const T &x, Type ¬_found = Crap (Type)) + { return *as_array (len).lsearch (x, ¬_found); } + template <typename T> + const Type &lsearch (unsigned int len, const T &x, const Type ¬_found = Null (Type)) const + { return *as_array (len).lsearch (x, ¬_found); } + template <typename T> + bool lfind (unsigned int len, const T &x, unsigned *pos = nullptr) const + { return as_array (len).lfind (x, pos); } + + void qsort (unsigned int len, unsigned int start = 0, unsigned int end = (unsigned int) -1) + { as_array (len).qsort (start, end); } + + bool serialize (hb_serialize_context_t *c, unsigned int items_len) + { + TRACE_SERIALIZE (this); + if (unlikely (!c->extend (*this, items_len))) return_trace (false); + return_trace (true); + } + template <typename Iterator, + hb_requires (hb_is_source_of (Iterator, Type))> + bool serialize (hb_serialize_context_t *c, Iterator items) + { + TRACE_SERIALIZE (this); + unsigned count = items.len (); + if (unlikely (!serialize (c, count))) return_trace (false); + /* TODO Umm. Just exhaust the iterator instead? Being extra + * cautious right now.. */ + for (unsigned i = 0; i < count; i++, ++items) + arrayZ[i] = *items; + return_trace (true); + } + + UnsizedArrayOf* copy (hb_serialize_context_t *c, unsigned count) const + { + TRACE_SERIALIZE (this); + auto *out = c->start_embed (this); + if (unlikely (!as_array (count).copy (c))) return_trace (nullptr); + return_trace (out); + } + + template <typename ...Ts> + bool sanitize (hb_sanitize_context_t *c, unsigned int count, Ts&&... ds) const + { + TRACE_SANITIZE (this); + if (unlikely (!sanitize_shallow (c, count))) return_trace (false); + if (!sizeof... (Ts) && hb_is_trivially_copyable (Type)) return_trace (true); + for (unsigned int i = 0; i < count; i++) + if (unlikely (!c->dispatch (arrayZ[i], hb_forward<Ts> (ds)...))) + return_trace (false); + return_trace (true); + } + + bool sanitize_shallow (hb_sanitize_context_t *c, unsigned int count) const + { + TRACE_SANITIZE (this); + return_trace (c->check_array (arrayZ, count)); + } + + public: + Type arrayZ[HB_VAR_ARRAY]; + public: + DEFINE_SIZE_UNBOUNDED (0); +}; + +/* Unsized array of offset's */ +template <typename Type, typename OffsetType, bool has_null=true> +using UnsizedOffsetArrayOf = UnsizedArrayOf<OffsetTo<Type, OffsetType, has_null>>; + +/* Unsized array of offsets relative to the beginning of the array itself. */ +template <typename Type, typename OffsetType, bool has_null=true> +struct UnsizedOffsetListOf : UnsizedOffsetArrayOf<Type, OffsetType, has_null> +{ + const Type& operator [] (int i_) const + { + unsigned int i = (unsigned int) i_; + const OffsetTo<Type, OffsetType, has_null> *p = &this->arrayZ[i]; + if (unlikely (p < this->arrayZ)) return Null (Type); /* Overflowed. */ + return this+*p; + } + Type& operator [] (int i_) + { + unsigned int i = (unsigned int) i_; + const OffsetTo<Type, OffsetType, has_null> *p = &this->arrayZ[i]; + if (unlikely (p < this->arrayZ)) return Crap (Type); /* Overflowed. */ + return this+*p; + } + + template <typename ...Ts> + bool sanitize (hb_sanitize_context_t *c, unsigned int count, Ts&&... ds) const + { + TRACE_SANITIZE (this); + return_trace ((UnsizedOffsetArrayOf<Type, OffsetType, has_null> + ::sanitize (c, count, this, hb_forward<Ts> (ds)...))); + } +}; + +/* An array with sorted elements. Supports binary searching. */ +template <typename Type> +struct SortedUnsizedArrayOf : UnsizedArrayOf<Type> +{ + hb_sorted_array_t<Type> as_array (unsigned int len) + { return hb_sorted_array (this->arrayZ, len); } + hb_sorted_array_t<const Type> as_array (unsigned int len) const + { return hb_sorted_array (this->arrayZ, len); } + operator hb_sorted_array_t<Type> () { return as_array (); } + operator hb_sorted_array_t<const Type> () const { return as_array (); } + + template <typename T> + Type &bsearch (unsigned int len, const T &x, Type ¬_found = Crap (Type)) + { return *as_array (len).bsearch (x, ¬_found); } + template <typename T> + const Type &bsearch (unsigned int len, const T &x, const Type ¬_found = Null (Type)) const + { return *as_array (len).bsearch (x, ¬_found); } + template <typename T> + bool bfind (unsigned int len, const T &x, unsigned int *i = nullptr, + hb_bfind_not_found_t not_found = HB_BFIND_NOT_FOUND_DONT_STORE, + unsigned int to_store = (unsigned int) -1) const + { return as_array (len).bfind (x, i, not_found, to_store); } +}; + + +/* An array with a number of elements. */ +template <typename Type, typename LenType=HBUINT16> +struct ArrayOf +{ + typedef Type item_t; + static constexpr unsigned item_size = hb_static_size (Type); + + HB_DELETE_CREATE_COPY_ASSIGN (ArrayOf); + + const Type& operator [] (int i_) const + { + unsigned int i = (unsigned int) i_; + if (unlikely (i >= len)) return Null (Type); + return arrayZ[i]; + } + Type& operator [] (int i_) + { + unsigned int i = (unsigned int) i_; + if (unlikely (i >= len)) return Crap (Type); + return arrayZ[i]; + } + + unsigned int get_size () const + { return len.static_size + len * Type::static_size; } + + explicit operator bool () const { return len; } + + void pop () { len--; } + + hb_array_t< Type> as_array () { return hb_array (arrayZ, len); } + hb_array_t<const Type> as_array () const { return hb_array (arrayZ, len); } + + /* Iterator. */ + typedef hb_array_t<const Type> iter_t; + typedef hb_array_t< Type> writer_t; + iter_t iter () const { return as_array (); } + writer_t writer () { return as_array (); } + operator iter_t () const { return iter (); } + operator writer_t () { return writer (); } + + hb_array_t<const Type> sub_array (unsigned int start_offset, unsigned int count) const + { return as_array ().sub_array (start_offset, count); } + hb_array_t<const Type> sub_array (unsigned int start_offset, unsigned int *count = nullptr /* IN/OUT */) const + { return as_array ().sub_array (start_offset, count); } + hb_array_t<Type> sub_array (unsigned int start_offset, unsigned int count) + { return as_array ().sub_array (start_offset, count); } + hb_array_t<Type> sub_array (unsigned int start_offset, unsigned int *count = nullptr /* IN/OUT */) + { return as_array ().sub_array (start_offset, count); } + + hb_success_t serialize (hb_serialize_context_t *c, unsigned items_len) + { + TRACE_SERIALIZE (this); + if (unlikely (!c->extend_min (*this))) return_trace (false); + c->check_assign (len, items_len); + if (unlikely (!c->extend (*this))) return_trace (false); + return_trace (true); + } + template <typename Iterator, + hb_requires (hb_is_source_of (Iterator, Type))> + hb_success_t serialize (hb_serialize_context_t *c, Iterator items) + { + TRACE_SERIALIZE (this); + unsigned count = items.len (); + if (unlikely (!serialize (c, count))) return_trace (false); + /* TODO Umm. Just exhaust the iterator instead? Being extra + * cautious right now.. */ + for (unsigned i = 0; i < count; i++, ++items) + arrayZ[i] = *items; + return_trace (true); + } + + Type* serialize_append (hb_serialize_context_t *c) + { + TRACE_SERIALIZE (this); + len++; + if (unlikely (!len || !c->extend (*this))) + { + len--; + return_trace (nullptr); + } + return_trace (&arrayZ[len - 1]); + } + + ArrayOf* copy (hb_serialize_context_t *c) const + { + TRACE_SERIALIZE (this); + auto *out = c->start_embed (this); + if (unlikely (!c->extend_min (out))) return_trace (nullptr); + c->check_assign (out->len, len); + if (unlikely (!as_array ().copy (c))) return_trace (nullptr); + return_trace (out); + } + + template <typename ...Ts> + bool sanitize (hb_sanitize_context_t *c, Ts&&... ds) const + { + TRACE_SANITIZE (this); + if (unlikely (!sanitize_shallow (c))) return_trace (false); + if (!sizeof... (Ts) && hb_is_trivially_copyable (Type)) return_trace (true); + unsigned int count = len; + for (unsigned int i = 0; i < count; i++) + if (unlikely (!c->dispatch (arrayZ[i], hb_forward<Ts> (ds)...))) + return_trace (false); + return_trace (true); + } + + template <typename T> + Type &lsearch (const T &x, Type ¬_found = Crap (Type)) + { return *as_array ().lsearch (x, ¬_found); } + template <typename T> + const Type &lsearch (const T &x, const Type ¬_found = Null (Type)) const + { return *as_array ().lsearch (x, ¬_found); } + template <typename T> + bool lfind (const T &x, unsigned *pos = nullptr) const + { return as_array ().lfind (x, pos); } + + void qsort (unsigned int start = 0, unsigned int end = (unsigned int) -1) + { as_array ().qsort (start, end); } + + bool sanitize_shallow (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (len.sanitize (c) && c->check_array (arrayZ, len)); + } + + public: + LenType len; + Type arrayZ[HB_VAR_ARRAY]; + public: + DEFINE_SIZE_ARRAY (sizeof (LenType), arrayZ); +}; +template <typename Type> +using LArrayOf = ArrayOf<Type, HBUINT32>; +using PString = ArrayOf<HBUINT8, HBUINT8>; + +/* Array of Offset's */ +template <typename Type> +using OffsetArrayOf = ArrayOf<OffsetTo<Type, HBUINT16>>; +template <typename Type> +using LOffsetArrayOf = ArrayOf<OffsetTo<Type, HBUINT32>>; +template <typename Type> +using LOffsetLArrayOf = ArrayOf<OffsetTo<Type, HBUINT32>, HBUINT32>; + +/* Array of offsets relative to the beginning of the array itself. */ +template <typename Type> +struct OffsetListOf : OffsetArrayOf<Type> +{ + const Type& operator [] (int i_) const + { + unsigned int i = (unsigned int) i_; + if (unlikely (i >= this->len)) return Null (Type); + return this+this->arrayZ[i]; + } + const Type& operator [] (int i_) + { + unsigned int i = (unsigned int) i_; + if (unlikely (i >= this->len)) return Crap (Type); + return this+this->arrayZ[i]; + } + + bool subset (hb_subset_context_t *c) const + { + TRACE_SUBSET (this); + struct OffsetListOf<Type> *out = c->serializer->embed (*this); + if (unlikely (!out)) return_trace (false); + unsigned int count = this->len; + for (unsigned int i = 0; i < count; i++) + out->arrayZ[i].serialize_subset (c, this->arrayZ[i], this, out); + return_trace (true); + } + + template <typename ...Ts> + bool sanitize (hb_sanitize_context_t *c, Ts&&... ds) const + { + TRACE_SANITIZE (this); + return_trace (OffsetArrayOf<Type>::sanitize (c, this, hb_forward<Ts> (ds)...)); + } +}; + +/* An array starting at second element. */ +template <typename Type, typename LenType=HBUINT16> +struct HeadlessArrayOf +{ + static constexpr unsigned item_size = Type::static_size; + + HB_DELETE_CREATE_COPY_ASSIGN (HeadlessArrayOf); + + const Type& operator [] (int i_) const + { + unsigned int i = (unsigned int) i_; + if (unlikely (i >= lenP1 || !i)) return Null (Type); + return arrayZ[i-1]; + } + Type& operator [] (int i_) + { + unsigned int i = (unsigned int) i_; + if (unlikely (i >= lenP1 || !i)) return Crap (Type); + return arrayZ[i-1]; + } + unsigned int get_size () const + { return lenP1.static_size + get_length () * Type::static_size; } + + unsigned get_length () const { return lenP1 ? lenP1 - 1 : 0; } + + hb_array_t< Type> as_array () { return hb_array (arrayZ, get_length ()); } + hb_array_t<const Type> as_array () const { return hb_array (arrayZ, get_length ()); } + + /* Iterator. */ + typedef hb_array_t<const Type> iter_t; + typedef hb_array_t< Type> writer_t; + iter_t iter () const { return as_array (); } + writer_t writer () { return as_array (); } + operator iter_t () const { return iter (); } + operator writer_t () { return writer (); } + + bool serialize (hb_serialize_context_t *c, unsigned int items_len) + { + TRACE_SERIALIZE (this); + if (unlikely (!c->extend_min (*this))) return_trace (false); + c->check_assign (lenP1, items_len + 1); + if (unlikely (!c->extend (*this))) return_trace (false); + return_trace (true); + } + template <typename Iterator, + hb_requires (hb_is_source_of (Iterator, Type))> + bool serialize (hb_serialize_context_t *c, Iterator items) + { + TRACE_SERIALIZE (this); + unsigned count = items.len (); + if (unlikely (!serialize (c, count))) return_trace (false); + /* TODO Umm. Just exhaust the iterator instead? Being extra + * cautious right now.. */ + for (unsigned i = 0; i < count; i++, ++items) + arrayZ[i] = *items; + return_trace (true); + } + + template <typename ...Ts> + bool sanitize (hb_sanitize_context_t *c, Ts&&... ds) const + { + TRACE_SANITIZE (this); + if (unlikely (!sanitize_shallow (c))) return_trace (false); + if (!sizeof... (Ts) && hb_is_trivially_copyable (Type)) return_trace (true); + unsigned int count = get_length (); + for (unsigned int i = 0; i < count; i++) + if (unlikely (!c->dispatch (arrayZ[i], hb_forward<Ts> (ds)...))) + return_trace (false); + return_trace (true); + } + + private: + bool sanitize_shallow (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (lenP1.sanitize (c) && + (!lenP1 || c->check_array (arrayZ, lenP1 - 1))); + } + + public: + LenType lenP1; + Type arrayZ[HB_VAR_ARRAY]; + public: + DEFINE_SIZE_ARRAY (sizeof (LenType), arrayZ); +}; + +/* An array storing length-1. */ +template <typename Type, typename LenType=HBUINT16> +struct ArrayOfM1 +{ + HB_DELETE_CREATE_COPY_ASSIGN (ArrayOfM1); + + const Type& operator [] (int i_) const + { + unsigned int i = (unsigned int) i_; + if (unlikely (i > lenM1)) return Null (Type); + return arrayZ[i]; + } + Type& operator [] (int i_) + { + unsigned int i = (unsigned int) i_; + if (unlikely (i > lenM1)) return Crap (Type); + return arrayZ[i]; + } + unsigned int get_size () const + { return lenM1.static_size + (lenM1 + 1) * Type::static_size; } + + template <typename ...Ts> + bool sanitize (hb_sanitize_context_t *c, Ts&&... ds) const + { + TRACE_SANITIZE (this); + if (unlikely (!sanitize_shallow (c))) return_trace (false); + unsigned int count = lenM1 + 1; + for (unsigned int i = 0; i < count; i++) + if (unlikely (!c->dispatch (arrayZ[i], hb_forward<Ts> (ds)...))) + return_trace (false); + return_trace (true); + } + + private: + bool sanitize_shallow (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (lenM1.sanitize (c) && + (c->check_array (arrayZ, lenM1 + 1))); + } + + public: + LenType lenM1; + Type arrayZ[HB_VAR_ARRAY]; + public: + DEFINE_SIZE_ARRAY (sizeof (LenType), arrayZ); +}; + +/* An array with sorted elements. Supports binary searching. */ +template <typename Type, typename LenType=HBUINT16> +struct SortedArrayOf : ArrayOf<Type, LenType> +{ + hb_sorted_array_t< Type> as_array () { return hb_sorted_array (this->arrayZ, this->len); } + hb_sorted_array_t<const Type> as_array () const { return hb_sorted_array (this->arrayZ, this->len); } + + /* Iterator. */ + typedef hb_sorted_array_t<const Type> iter_t; + typedef hb_sorted_array_t< Type> writer_t; + iter_t iter () const { return as_array (); } + writer_t writer () { return as_array (); } + operator iter_t () const { return iter (); } + operator writer_t () { return writer (); } + + hb_sorted_array_t<const Type> sub_array (unsigned int start_offset, unsigned int count) const + { return as_array ().sub_array (start_offset, count); } + hb_sorted_array_t<const Type> sub_array (unsigned int start_offset, unsigned int *count = nullptr /* IN/OUT */) const + { return as_array ().sub_array (start_offset, count); } + hb_sorted_array_t<Type> sub_array (unsigned int start_offset, unsigned int count) + { return as_array ().sub_array (start_offset, count); } + hb_sorted_array_t<Type> sub_array (unsigned int start_offset, unsigned int *count = nullptr /* IN/OUT */) + { return as_array ().sub_array (start_offset, count); } + + bool serialize (hb_serialize_context_t *c, unsigned int items_len) + { + TRACE_SERIALIZE (this); + bool ret = ArrayOf<Type, LenType>::serialize (c, items_len); + return_trace (ret); + } + template <typename Iterator, + hb_requires (hb_is_sorted_source_of (Iterator, Type))> + bool serialize (hb_serialize_context_t *c, Iterator items) + { + TRACE_SERIALIZE (this); + bool ret = ArrayOf<Type, LenType>::serialize (c, items); + return_trace (ret); + } + + template <typename T> + Type &bsearch (const T &x, Type ¬_found = Crap (Type)) + { return *as_array ().bsearch (x, ¬_found); } + template <typename T> + const Type &bsearch (const T &x, const Type ¬_found = Null (Type)) const + { return *as_array ().bsearch (x, ¬_found); } + template <typename T> + bool bfind (const T &x, unsigned int *i = nullptr, + hb_bfind_not_found_t not_found = HB_BFIND_NOT_FOUND_DONT_STORE, + unsigned int to_store = (unsigned int) -1) const + { return as_array ().bfind (x, i, not_found, to_store); } +}; + +/* + * Binary-search arrays + */ + +template <typename LenType=HBUINT16> +struct BinSearchHeader +{ + operator uint32_t () const { return len; } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this)); + } + + BinSearchHeader& operator = (unsigned int v) + { + len = v; + assert (len == v); + entrySelector = hb_max (1u, hb_bit_storage (v)) - 1; + searchRange = 16 * (1u << entrySelector); + rangeShift = v * 16 > searchRange + ? 16 * v - searchRange + : 0; + return *this; + } + + protected: + LenType len; + LenType searchRange; + LenType entrySelector; + LenType rangeShift; + + public: + DEFINE_SIZE_STATIC (8); +}; + +template <typename Type, typename LenType=HBUINT16> +using BinSearchArrayOf = SortedArrayOf<Type, BinSearchHeader<LenType>>; + + +struct VarSizedBinSearchHeader +{ + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this)); + } + + HBUINT16 unitSize; /* Size of a lookup unit for this search in bytes. */ + HBUINT16 nUnits; /* Number of units of the preceding size to be searched. */ + HBUINT16 searchRange; /* The value of unitSize times the largest power of 2 + * that is less than or equal to the value of nUnits. */ + HBUINT16 entrySelector; /* The log base 2 of the largest power of 2 less than + * or equal to the value of nUnits. */ + HBUINT16 rangeShift; /* The value of unitSize times the difference of the + * value of nUnits minus the largest power of 2 less + * than or equal to the value of nUnits. */ + public: + DEFINE_SIZE_STATIC (10); +}; + +template <typename Type> +struct VarSizedBinSearchArrayOf +{ + static constexpr unsigned item_size = Type::static_size; + + HB_DELETE_CREATE_COPY_ASSIGN (VarSizedBinSearchArrayOf); + + bool last_is_terminator () const + { + if (unlikely (!header.nUnits)) return false; + + /* Gah. + * + * "The number of termination values that need to be included is table-specific. + * The value that indicates binary search termination is 0xFFFF." */ + const HBUINT16 *words = &StructAtOffset<HBUINT16> (&bytesZ, (header.nUnits - 1) * header.unitSize); + unsigned int count = Type::TerminationWordCount; + for (unsigned int i = 0; i < count; i++) + if (words[i] != 0xFFFFu) + return false; + return true; + } + + const Type& operator [] (int i_) const + { + unsigned int i = (unsigned int) i_; + if (unlikely (i >= get_length ())) return Null (Type); + return StructAtOffset<Type> (&bytesZ, i * header.unitSize); + } + Type& operator [] (int i_) + { + unsigned int i = (unsigned int) i_; + if (unlikely (i >= get_length ())) return Crap (Type); + return StructAtOffset<Type> (&bytesZ, i * header.unitSize); + } + unsigned int get_length () const + { return header.nUnits - last_is_terminator (); } + unsigned int get_size () const + { return header.static_size + header.nUnits * header.unitSize; } + + template <typename ...Ts> + bool sanitize (hb_sanitize_context_t *c, Ts&&... ds) const + { + TRACE_SANITIZE (this); + if (unlikely (!sanitize_shallow (c))) return_trace (false); + if (!sizeof... (Ts) && hb_is_trivially_copyable (Type)) return_trace (true); + unsigned int count = get_length (); + for (unsigned int i = 0; i < count; i++) + if (unlikely (!(*this)[i].sanitize (c, hb_forward<Ts> (ds)...))) + return_trace (false); + return_trace (true); + } + + template <typename T> + const Type *bsearch (const T &key) const + { + unsigned pos; + return hb_bsearch_impl (&pos, + key, + (const void *) bytesZ, + get_length (), + header.unitSize, + _hb_cmp_method<T, Type>) + ? (const Type *) (((const char *) &bytesZ) + (pos * header.unitSize)) + : nullptr; + } + + private: + bool sanitize_shallow (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (header.sanitize (c) && + Type::static_size <= header.unitSize && + c->check_range (bytesZ.arrayZ, + header.nUnits, + header.unitSize)); + } + + protected: + VarSizedBinSearchHeader header; + UnsizedArrayOf<HBUINT8> bytesZ; + public: + DEFINE_SIZE_ARRAY (10, bytesZ); +}; + + +} /* namespace OT */ + + +#endif /* HB_OPEN_TYPE_HH */ diff --git a/thirdparty/harfbuzz/src/hb-ot-cff-common.hh b/thirdparty/harfbuzz/src/hb-ot-cff-common.hh new file mode 100644 index 0000000000..e5286cd792 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-cff-common.hh @@ -0,0 +1,622 @@ +/* + * Copyright © 2018 Adobe Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Adobe Author(s): Michiharu Ariza + */ +#ifndef HB_OT_CFF_COMMON_HH +#define HB_OT_CFF_COMMON_HH + +#include "hb-open-type.hh" +#include "hb-bimap.hh" +#include "hb-ot-layout-common.hh" +#include "hb-cff-interp-dict-common.hh" +#include "hb-subset-plan.hh" + +namespace CFF { + +using namespace OT; + +#define CFF_UNDEF_CODE 0xFFFFFFFF + +using objidx_t = hb_serialize_context_t::objidx_t; +using whence_t = hb_serialize_context_t::whence_t; + +/* utility macro */ +template<typename Type> +static inline const Type& StructAtOffsetOrNull (const void *P, unsigned int offset) +{ return offset ? StructAtOffset<Type> (P, offset) : Null (Type); } + +inline unsigned int calcOffSize (unsigned int dataSize) +{ + unsigned int size = 1; + unsigned int offset = dataSize + 1; + while (offset & ~0xFF) + { + size++; + offset >>= 8; + } + /* format does not support size > 4; caller should handle it as an error */ + return size; +} + +struct code_pair_t +{ + hb_codepoint_t code; + hb_codepoint_t glyph; +}; + +typedef hb_vector_t<unsigned char> str_buff_t; +struct str_buff_vec_t : hb_vector_t<str_buff_t> +{ + void fini () { SUPER::fini_deep (); } + + unsigned int total_size () const + { + unsigned int size = 0; + for (unsigned int i = 0; i < length; i++) + size += (*this)[i].length; + return size; + } + + private: + typedef hb_vector_t<str_buff_t> SUPER; +}; + +/* CFF INDEX */ +template <typename COUNT> +struct CFFIndex +{ + static unsigned int calculate_offset_array_size (unsigned int offSize, unsigned int count) + { return offSize * (count + 1); } + + unsigned int offset_array_size () const + { return calculate_offset_array_size (offSize, count); } + + CFFIndex *copy (hb_serialize_context_t *c) const + { + TRACE_SERIALIZE (this); + unsigned int size = get_size (); + CFFIndex *out = c->allocate_size<CFFIndex> (size); + if (likely (out)) + memcpy (out, this, size); + return_trace (out); + } + + bool serialize (hb_serialize_context_t *c, const CFFIndex &src) + { + TRACE_SERIALIZE (this); + unsigned int size = src.get_size (); + CFFIndex *dest = c->allocate_size<CFFIndex> (size); + if (unlikely (!dest)) return_trace (false); + memcpy (dest, &src, size); + return_trace (true); + } + + bool serialize (hb_serialize_context_t *c, + unsigned int offSize_, + const byte_str_array_t &byteArray) + { + TRACE_SERIALIZE (this); + if (byteArray.length == 0) + { + COUNT *dest = c->allocate_min<COUNT> (); + if (unlikely (!dest)) return_trace (false); + *dest = 0; + } + else + { + /* serialize CFFIndex header */ + if (unlikely (!c->extend_min (*this))) return_trace (false); + this->count = byteArray.length; + this->offSize = offSize_; + if (unlikely (!c->allocate_size<HBUINT8> (offSize_ * (byteArray.length + 1)))) + return_trace (false); + + /* serialize indices */ + unsigned int offset = 1; + unsigned int i = 0; + for (; i < byteArray.length; i++) + { + set_offset_at (i, offset); + offset += byteArray[i].get_size (); + } + set_offset_at (i, offset); + + /* serialize data */ + for (unsigned int i = 0; i < byteArray.length; i++) + { + const byte_str_t &bs = byteArray[i]; + unsigned char *dest = c->allocate_size<unsigned char> (bs.length); + if (unlikely (!dest)) return_trace (false); + memcpy (dest, &bs[0], bs.length); + } + } + return_trace (true); + } + + bool serialize (hb_serialize_context_t *c, + unsigned int offSize_, + const str_buff_vec_t &buffArray) + { + byte_str_array_t byteArray; + byteArray.init (); + byteArray.resize (buffArray.length); + for (unsigned int i = 0; i < byteArray.length; i++) + byteArray[i] = byte_str_t (buffArray[i].arrayZ, buffArray[i].length); + bool result = this->serialize (c, offSize_, byteArray); + byteArray.fini (); + return result; + } + + template <typename Iterator, + hb_requires (hb_is_iterator (Iterator))> + bool serialize (hb_serialize_context_t *c, + Iterator it) + { + TRACE_SERIALIZE (this); + if (it.len () == 0) + { + COUNT *dest = c->allocate_min<COUNT> (); + if (unlikely (!dest)) return_trace (false); + *dest = 0; + } + else + { + serialize_header(c, + it | hb_map ([] (const byte_str_t &_) { return _.length; })); + for (const byte_str_t &_ : +it) + _.copy (c); + } + return_trace (true); + } + + bool serialize (hb_serialize_context_t *c, + const byte_str_array_t &byteArray) + { return serialize (c, + hb_iter (byteArray)); } + + bool serialize (hb_serialize_context_t *c, + const str_buff_vec_t &buffArray) + { + auto it = + + hb_iter (buffArray) + | hb_map ([] (const str_buff_t &_) { return byte_str_t (_.arrayZ, _.length); }) + ; + return serialize (c, it); + } + + template <typename Iterator, + hb_requires (hb_is_iterator (Iterator))> + bool serialize_header (hb_serialize_context_t *c, + Iterator it) + { + TRACE_SERIALIZE (this); + + unsigned total = + it | hb_reduce (hb_add, 0); + unsigned off_size = calcOffSize (total); + + /* serialize CFFIndex header */ + if (unlikely (!c->extend_min (*this))) return_trace (false); + this->count = it.len (); + this->offSize = off_size; + if (unlikely (!c->allocate_size<HBUINT8> (off_size * (it.len () + 1)))) + return_trace (false); + + /* serialize indices */ + unsigned int offset = 1; + unsigned int i = 0; + for (unsigned _ : +it) + { + CFFIndex<COUNT>::set_offset_at (i++, offset); + offset += _; + } + CFFIndex<COUNT>::set_offset_at (i, offset); + + return_trace (true); + } + + void set_offset_at (unsigned int index, unsigned int offset) + { + HBUINT8 *p = offsets + offSize * index + offSize; + unsigned int size = offSize; + for (; size; size--) + { + --p; + *p = offset & 0xFF; + offset >>= 8; + } + } + + unsigned int offset_at (unsigned int index) const + { + assert (index <= count); + const HBUINT8 *p = offsets + offSize * index; + unsigned int size = offSize; + unsigned int offset = 0; + for (; size; size--) + offset = (offset << 8) + *p++; + return offset; + } + + unsigned int length_at (unsigned int index) const + { + if (unlikely ((offset_at (index + 1) < offset_at (index)) || + (offset_at (index + 1) > offset_at (count)))) + return 0; + return offset_at (index + 1) - offset_at (index); + } + + const unsigned char *data_base () const + { return (const unsigned char *) this + min_size + offset_array_size (); } + + unsigned int data_size () const { return HBINT8::static_size; } + + byte_str_t operator [] (unsigned int index) const + { + if (unlikely (index >= count)) return Null (byte_str_t); + return byte_str_t (data_base () + offset_at (index) - 1, length_at (index)); + } + + unsigned int get_size () const + { + if (this == &Null (CFFIndex)) return 0; + if (count > 0) + return min_size + offset_array_size () + (offset_at (count) - 1); + return count.static_size; /* empty CFFIndex contains count only */ + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (likely ((c->check_struct (this) && count == 0) || /* empty INDEX */ + (c->check_struct (this) && offSize >= 1 && offSize <= 4 && + c->check_array (offsets, offSize, count + 1) && + c->check_array ((const HBUINT8*) data_base (), 1, max_offset () - 1)))); + } + + protected: + unsigned int max_offset () const + { + unsigned int max = 0; + for (unsigned int i = 0; i < count + 1u; i++) + { + unsigned int off = offset_at (i); + if (off > max) max = off; + } + return max; + } + + public: + COUNT count; /* Number of object data. Note there are (count+1) offsets */ + HBUINT8 offSize; /* The byte size of each offset in the offsets array. */ + HBUINT8 offsets[HB_VAR_ARRAY]; + /* The array of (count + 1) offsets into objects array (1-base). */ + /* HBUINT8 data[HB_VAR_ARRAY]; Object data */ + public: + DEFINE_SIZE_ARRAY (COUNT::static_size + HBUINT8::static_size, offsets); +}; + +template <typename COUNT, typename TYPE> +struct CFFIndexOf : CFFIndex<COUNT> +{ + const byte_str_t operator [] (unsigned int index) const + { + if (likely (index < CFFIndex<COUNT>::count)) + return byte_str_t (CFFIndex<COUNT>::data_base () + CFFIndex<COUNT>::offset_at (index) - 1, CFFIndex<COUNT>::length_at (index)); + return Null (byte_str_t); + } + + template <typename DATA, typename PARAM1, typename PARAM2> + bool serialize (hb_serialize_context_t *c, + unsigned int offSize_, + const DATA *dataArray, + unsigned int dataArrayLen, + const hb_vector_t<unsigned int> &dataSizeArray, + const PARAM1 ¶m1, + const PARAM2 ¶m2) + { + TRACE_SERIALIZE (this); + /* serialize CFFIndex header */ + if (unlikely (!c->extend_min (*this))) return_trace (false); + this->count = dataArrayLen; + this->offSize = offSize_; + if (unlikely (!c->allocate_size<HBUINT8> (offSize_ * (dataArrayLen + 1)))) + return_trace (false); + + /* serialize indices */ + unsigned int offset = 1; + unsigned int i = 0; + for (; i < dataArrayLen; i++) + { + CFFIndex<COUNT>::set_offset_at (i, offset); + offset += dataSizeArray[i]; + } + CFFIndex<COUNT>::set_offset_at (i, offset); + + /* serialize data */ + for (unsigned int i = 0; i < dataArrayLen; i++) + { + TYPE *dest = c->start_embed<TYPE> (); + if (unlikely (!dest || !dest->serialize (c, dataArray[i], param1, param2))) + return_trace (false); + } + return_trace (true); + } +}; + +/* Top Dict, Font Dict, Private Dict */ +struct Dict : UnsizedByteStr +{ + template <typename DICTVAL, typename OP_SERIALIZER, typename ...Ts> + bool serialize (hb_serialize_context_t *c, + const DICTVAL &dictval, + OP_SERIALIZER& opszr, + Ts&&... ds) + { + TRACE_SERIALIZE (this); + for (unsigned int i = 0; i < dictval.get_count (); i++) + if (unlikely (!opszr.serialize (c, dictval[i], hb_forward<Ts> (ds)...))) + return_trace (false); + + return_trace (true); + } + + template <typename T, typename V> + static bool serialize_int_op (hb_serialize_context_t *c, op_code_t op, V value, op_code_t intOp) + { + // XXX: not sure why but LLVM fails to compile the following 'unlikely' macro invocation + if (/*unlikely*/ (!serialize_int<T, V> (c, intOp, value))) + return false; + + TRACE_SERIALIZE (this); + /* serialize the opcode */ + HBUINT8 *p = c->allocate_size<HBUINT8> (OpCode_Size (op)); + if (unlikely (!p)) return_trace (false); + if (Is_OpCode_ESC (op)) + { + *p = OpCode_escape; + op = Unmake_OpCode_ESC (op); + p++; + } + *p = op; + return_trace (true); + } + + template <typename V> + static bool serialize_int4_op (hb_serialize_context_t *c, op_code_t op, V value) + { return serialize_int_op<HBINT32> (c, op, value, OpCode_longintdict); } + + template <typename V> + static bool serialize_int2_op (hb_serialize_context_t *c, op_code_t op, V value) + { return serialize_int_op<HBINT16> (c, op, value, OpCode_shortint); } + + template <typename T, int int_op> + static bool serialize_link_op (hb_serialize_context_t *c, op_code_t op, objidx_t link, whence_t whence) + { + T &ofs = *(T *) (c->head + OpCode_Size (int_op)); + if (unlikely (!serialize_int_op<T> (c, op, 0, int_op))) return false; + c->add_link (ofs, link, whence); + return true; + } + + static bool serialize_link4_op (hb_serialize_context_t *c, op_code_t op, objidx_t link, whence_t whence = whence_t::Head) + { return serialize_link_op<HBINT32, OpCode_longintdict> (c, op, link, whence); } + + static bool serialize_link2_op (hb_serialize_context_t *c, op_code_t op, objidx_t link, whence_t whence = whence_t::Head) + { return serialize_link_op<HBINT16, OpCode_shortint> (c, op, link, whence); } +}; + +struct TopDict : Dict {}; +struct FontDict : Dict {}; +struct PrivateDict : Dict {}; + +struct table_info_t +{ + void init () { offset = size = 0; link = 0; } + + unsigned int offset; + unsigned int size; + objidx_t link; +}; + +template <typename COUNT> +struct FDArray : CFFIndexOf<COUNT, FontDict> +{ + template <typename DICTVAL, typename INFO, typename Iterator, typename OP_SERIALIZER> + bool serialize (hb_serialize_context_t *c, + Iterator it, + OP_SERIALIZER& opszr) + { + TRACE_SERIALIZE (this); + + /* serialize INDEX data */ + hb_vector_t<unsigned> sizes; + c->push (); + + it + | hb_map ([&] (const hb_pair_t<const DICTVAL&, const INFO&> &_) + { + FontDict *dict = c->start_embed<FontDict> (); + dict->serialize (c, _.first, opszr, _.second); + return c->head - (const char*)dict; + }) + | hb_sink (sizes) + ; + c->pop_pack (false); + + /* serialize INDEX header */ + return_trace (CFFIndex<COUNT>::serialize_header (c, hb_iter (sizes))); + } +}; + +/* FDSelect */ +struct FDSelect0 { + bool sanitize (hb_sanitize_context_t *c, unsigned int fdcount) const + { + TRACE_SANITIZE (this); + if (unlikely (!(c->check_struct (this)))) + return_trace (false); + for (unsigned int i = 0; i < c->get_num_glyphs (); i++) + if (unlikely (!fds[i].sanitize (c))) + return_trace (false); + + return_trace (true); + } + + hb_codepoint_t get_fd (hb_codepoint_t glyph) const + { return (hb_codepoint_t) fds[glyph]; } + + unsigned int get_size (unsigned int num_glyphs) const + { return HBUINT8::static_size * num_glyphs; } + + HBUINT8 fds[HB_VAR_ARRAY]; + + DEFINE_SIZE_MIN (0); +}; + +template <typename GID_TYPE, typename FD_TYPE> +struct FDSelect3_4_Range +{ + bool sanitize (hb_sanitize_context_t *c, const void * /*nullptr*/, unsigned int fdcount) const + { + TRACE_SANITIZE (this); + return_trace (first < c->get_num_glyphs () && (fd < fdcount)); + } + + GID_TYPE first; + FD_TYPE fd; + public: + DEFINE_SIZE_STATIC (GID_TYPE::static_size + FD_TYPE::static_size); +}; + +template <typename GID_TYPE, typename FD_TYPE> +struct FDSelect3_4 +{ + unsigned int get_size () const + { return GID_TYPE::static_size * 2 + ranges.get_size (); } + + bool sanitize (hb_sanitize_context_t *c, unsigned int fdcount) const + { + TRACE_SANITIZE (this); + if (unlikely (!c->check_struct (this) || !ranges.sanitize (c, nullptr, fdcount) || + (nRanges () == 0) || ranges[0].first != 0)) + return_trace (false); + + for (unsigned int i = 1; i < nRanges (); i++) + if (unlikely (ranges[i - 1].first >= ranges[i].first)) + return_trace (false); + + if (unlikely (!sentinel().sanitize (c) || (sentinel() != c->get_num_glyphs ()))) + return_trace (false); + + return_trace (true); + } + + hb_codepoint_t get_fd (hb_codepoint_t glyph) const + { + unsigned int i; + for (i = 1; i < nRanges (); i++) + if (glyph < ranges[i].first) + break; + + return (hb_codepoint_t) ranges[i - 1].fd; + } + + GID_TYPE &nRanges () { return ranges.len; } + GID_TYPE nRanges () const { return ranges.len; } + GID_TYPE &sentinel () { return StructAfter<GID_TYPE> (ranges[nRanges () - 1]); } + const GID_TYPE &sentinel () const { return StructAfter<GID_TYPE> (ranges[nRanges () - 1]); } + + ArrayOf<FDSelect3_4_Range<GID_TYPE, FD_TYPE>, GID_TYPE> ranges; + /* GID_TYPE sentinel */ + + DEFINE_SIZE_ARRAY (GID_TYPE::static_size, ranges); +}; + +typedef FDSelect3_4<HBUINT16, HBUINT8> FDSelect3; +typedef FDSelect3_4_Range<HBUINT16, HBUINT8> FDSelect3_Range; + +struct FDSelect +{ + bool serialize (hb_serialize_context_t *c, const FDSelect &src, unsigned int num_glyphs) + { + TRACE_SERIALIZE (this); + unsigned int size = src.get_size (num_glyphs); + FDSelect *dest = c->allocate_size<FDSelect> (size); + if (unlikely (!dest)) return_trace (false); + memcpy (dest, &src, size); + return_trace (true); + } + + unsigned int get_size (unsigned int num_glyphs) const + { + switch (format) + { + case 0: return format.static_size + u.format0.get_size (num_glyphs); + case 3: return format.static_size + u.format3.get_size (); + default:return 0; + } + } + + hb_codepoint_t get_fd (hb_codepoint_t glyph) const + { + if (this == &Null (FDSelect)) return 0; + + switch (format) + { + case 0: return u.format0.get_fd (glyph); + case 3: return u.format3.get_fd (glyph); + default:return 0; + } + } + + bool sanitize (hb_sanitize_context_t *c, unsigned int fdcount) const + { + TRACE_SANITIZE (this); + if (unlikely (!c->check_struct (this))) + return_trace (false); + + switch (format) + { + case 0: return_trace (u.format0.sanitize (c, fdcount)); + case 3: return_trace (u.format3.sanitize (c, fdcount)); + default:return_trace (false); + } + } + + HBUINT8 format; + union { + FDSelect0 format0; + FDSelect3 format3; + } u; + public: + DEFINE_SIZE_MIN (1); +}; + +template <typename COUNT> +struct Subrs : CFFIndex<COUNT> +{ + typedef COUNT count_type; + typedef CFFIndex<COUNT> SUPER; +}; + +} /* namespace CFF */ + +#endif /* HB_OT_CFF_COMMON_HH */ diff --git a/thirdparty/harfbuzz/src/hb-ot-cff1-std-str.hh b/thirdparty/harfbuzz/src/hb-ot-cff1-std-str.hh new file mode 100644 index 0000000000..65d56ae18b --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-cff1-std-str.hh @@ -0,0 +1,425 @@ +/* + * Copyright © 2019 Adobe, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Adobe Author(s): Michiharu Ariza + */ + +#ifndef HB_OT_CFF1_STD_STR_HH +#if 0 /* Make checks happy. */ +#define HB_OT_CFF1_STD_STR_HH +#include "hb.hh" +#endif + +_S(".notdef") +_S("space") +_S("exclam") +_S("quotedbl") +_S("numbersign") +_S("dollar") +_S("percent") +_S("ampersand") +_S("quoteright") +_S("parenleft") +_S("parenright") +_S("asterisk") +_S("plus") +_S("comma") +_S("hyphen") +_S("period") +_S("slash") +_S("zero") +_S("one") +_S("two") +_S("three") +_S("four") +_S("five") +_S("six") +_S("seven") +_S("eight") +_S("nine") +_S("colon") +_S("semicolon") +_S("less") +_S("equal") +_S("greater") +_S("question") +_S("at") +_S("A") +_S("B") +_S("C") +_S("D") +_S("E") +_S("F") +_S("G") +_S("H") +_S("I") +_S("J") +_S("K") +_S("L") +_S("M") +_S("N") +_S("O") +_S("P") +_S("Q") +_S("R") +_S("S") +_S("T") +_S("U") +_S("V") +_S("W") +_S("X") +_S("Y") +_S("Z") +_S("bracketleft") +_S("backslash") +_S("bracketright") +_S("asciicircum") +_S("underscore") +_S("quoteleft") +_S("a") +_S("b") +_S("c") +_S("d") +_S("e") +_S("f") +_S("g") +_S("h") +_S("i") +_S("j") +_S("k") +_S("l") +_S("m") +_S("n") +_S("o") +_S("p") +_S("q") +_S("r") +_S("s") +_S("t") +_S("u") +_S("v") +_S("w") +_S("x") +_S("y") +_S("z") +_S("braceleft") +_S("bar") +_S("braceright") +_S("asciitilde") +_S("exclamdown") +_S("cent") +_S("sterling") +_S("fraction") +_S("yen") +_S("florin") +_S("section") +_S("currency") +_S("quotesingle") +_S("quotedblleft") +_S("guillemotleft") +_S("guilsinglleft") +_S("guilsinglright") +_S("fi") +_S("fl") +_S("endash") +_S("dagger") +_S("daggerdbl") +_S("periodcentered") +_S("paragraph") +_S("bullet") +_S("quotesinglbase") +_S("quotedblbase") +_S("quotedblright") +_S("guillemotright") +_S("ellipsis") +_S("perthousand") +_S("questiondown") +_S("grave") +_S("acute") +_S("circumflex") +_S("tilde") +_S("macron") +_S("breve") +_S("dotaccent") +_S("dieresis") +_S("ring") +_S("cedilla") +_S("hungarumlaut") +_S("ogonek") +_S("caron") +_S("emdash") +_S("AE") +_S("ordfeminine") +_S("Lslash") +_S("Oslash") +_S("OE") +_S("ordmasculine") +_S("ae") +_S("dotlessi") +_S("lslash") +_S("oslash") +_S("oe") +_S("germandbls") +_S("onesuperior") +_S("logicalnot") +_S("mu") +_S("trademark") +_S("Eth") +_S("onehalf") +_S("plusminus") +_S("Thorn") +_S("onequarter") +_S("divide") +_S("brokenbar") +_S("degree") +_S("thorn") +_S("threequarters") +_S("twosuperior") +_S("registered") +_S("minus") +_S("eth") +_S("multiply") +_S("threesuperior") +_S("copyright") +_S("Aacute") +_S("Acircumflex") +_S("Adieresis") +_S("Agrave") +_S("Aring") +_S("Atilde") +_S("Ccedilla") +_S("Eacute") +_S("Ecircumflex") +_S("Edieresis") +_S("Egrave") +_S("Iacute") +_S("Icircumflex") +_S("Idieresis") +_S("Igrave") +_S("Ntilde") +_S("Oacute") +_S("Ocircumflex") +_S("Odieresis") +_S("Ograve") +_S("Otilde") +_S("Scaron") +_S("Uacute") +_S("Ucircumflex") +_S("Udieresis") +_S("Ugrave") +_S("Yacute") +_S("Ydieresis") +_S("Zcaron") +_S("aacute") +_S("acircumflex") +_S("adieresis") +_S("agrave") +_S("aring") +_S("atilde") +_S("ccedilla") +_S("eacute") +_S("ecircumflex") +_S("edieresis") +_S("egrave") +_S("iacute") +_S("icircumflex") +_S("idieresis") +_S("igrave") +_S("ntilde") +_S("oacute") +_S("ocircumflex") +_S("odieresis") +_S("ograve") +_S("otilde") +_S("scaron") +_S("uacute") +_S("ucircumflex") +_S("udieresis") +_S("ugrave") +_S("yacute") +_S("ydieresis") +_S("zcaron") +_S("exclamsmall") +_S("Hungarumlautsmall") +_S("dollaroldstyle") +_S("dollarsuperior") +_S("ampersandsmall") +_S("Acutesmall") +_S("parenleftsuperior") +_S("parenrightsuperior") +_S("twodotenleader") +_S("onedotenleader") +_S("zerooldstyle") +_S("oneoldstyle") +_S("twooldstyle") +_S("threeoldstyle") +_S("fouroldstyle") +_S("fiveoldstyle") +_S("sixoldstyle") +_S("sevenoldstyle") +_S("eightoldstyle") +_S("nineoldstyle") +_S("commasuperior") +_S("threequartersemdash") +_S("periodsuperior") +_S("questionsmall") +_S("asuperior") +_S("bsuperior") +_S("centsuperior") +_S("dsuperior") +_S("esuperior") +_S("isuperior") +_S("lsuperior") +_S("msuperior") +_S("nsuperior") +_S("osuperior") +_S("rsuperior") +_S("ssuperior") +_S("tsuperior") +_S("ff") +_S("ffi") +_S("ffl") +_S("parenleftinferior") +_S("parenrightinferior") +_S("Circumflexsmall") +_S("hyphensuperior") +_S("Gravesmall") +_S("Asmall") +_S("Bsmall") +_S("Csmall") +_S("Dsmall") +_S("Esmall") +_S("Fsmall") +_S("Gsmall") +_S("Hsmall") +_S("Ismall") +_S("Jsmall") +_S("Ksmall") +_S("Lsmall") +_S("Msmall") +_S("Nsmall") +_S("Osmall") +_S("Psmall") +_S("Qsmall") +_S("Rsmall") +_S("Ssmall") +_S("Tsmall") +_S("Usmall") +_S("Vsmall") +_S("Wsmall") +_S("Xsmall") +_S("Ysmall") +_S("Zsmall") +_S("colonmonetary") +_S("onefitted") +_S("rupiah") +_S("Tildesmall") +_S("exclamdownsmall") +_S("centoldstyle") +_S("Lslashsmall") +_S("Scaronsmall") +_S("Zcaronsmall") +_S("Dieresissmall") +_S("Brevesmall") +_S("Caronsmall") +_S("Dotaccentsmall") +_S("Macronsmall") +_S("figuredash") +_S("hypheninferior") +_S("Ogoneksmall") +_S("Ringsmall") +_S("Cedillasmall") +_S("questiondownsmall") +_S("oneeighth") +_S("threeeighths") +_S("fiveeighths") +_S("seveneighths") +_S("onethird") +_S("twothirds") +_S("zerosuperior") +_S("foursuperior") +_S("fivesuperior") +_S("sixsuperior") +_S("sevensuperior") +_S("eightsuperior") +_S("ninesuperior") +_S("zeroinferior") +_S("oneinferior") +_S("twoinferior") +_S("threeinferior") +_S("fourinferior") +_S("fiveinferior") +_S("sixinferior") +_S("seveninferior") +_S("eightinferior") +_S("nineinferior") +_S("centinferior") +_S("dollarinferior") +_S("periodinferior") +_S("commainferior") +_S("Agravesmall") +_S("Aacutesmall") +_S("Acircumflexsmall") +_S("Atildesmall") +_S("Adieresissmall") +_S("Aringsmall") +_S("AEsmall") +_S("Ccedillasmall") +_S("Egravesmall") +_S("Eacutesmall") +_S("Ecircumflexsmall") +_S("Edieresissmall") +_S("Igravesmall") +_S("Iacutesmall") +_S("Icircumflexsmall") +_S("Idieresissmall") +_S("Ethsmall") +_S("Ntildesmall") +_S("Ogravesmall") +_S("Oacutesmall") +_S("Ocircumflexsmall") +_S("Otildesmall") +_S("Odieresissmall") +_S("OEsmall") +_S("Oslashsmall") +_S("Ugravesmall") +_S("Uacutesmall") +_S("Ucircumflexsmall") +_S("Udieresissmall") +_S("Yacutesmall") +_S("Thornsmall") +_S("Ydieresissmall") +_S("001.000") +_S("001.001") +_S("001.002") +_S("001.003") +_S("Black") +_S("Bold") +_S("Book") +_S("Light") +_S("Medium") +_S("Regular") +_S("Roman") +_S("Semibold") + +#endif /* HB_OT_CFF1_STD_STR_HH */ diff --git a/thirdparty/harfbuzz/src/hb-ot-cff1-table.cc b/thirdparty/harfbuzz/src/hb-ot-cff1-table.cc new file mode 100644 index 0000000000..66b9c8c907 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-cff1-table.cc @@ -0,0 +1,620 @@ +/* + * Copyright © 2018 Adobe Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Adobe Author(s): Michiharu Ariza + */ + +#include "hb.hh" + +#ifndef HB_NO_CFF + +#include "hb-draw.hh" +#include "hb-algs.hh" +#include "hb-ot-cff1-table.hh" +#include "hb-cff1-interp-cs.hh" + +using namespace CFF; + +struct sid_to_gid_t +{ + uint16_t sid; + uint8_t gid; + + int cmp (uint16_t a) const + { + if (a == sid) return 0; + return (a < sid) ? -1 : 1; + } +}; + +/* SID to code */ +static const uint8_t standard_encoding_to_code [] = +{ + 0, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, + 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, + 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, + 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, + 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, + 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, + 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 177, + 178, 179, 180, 182, 183, 184, 185, 186, 187, 188, 189, 191, 193, 194, 195, 196, + 197, 198, 199, 200, 202, 203, 205, 206, 207, 208, 225, 227, 232, 233, 234, 235, + 241, 245, 248, 249, 250, 251 +}; + +/* SID to code */ +static const uint8_t expert_encoding_to_code [] = +{ + 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 44, 45, 46, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 58, 59, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, 0, 0, 87, 88, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 201, 0, 0, 0, 0, 189, 0, 0, 188, 0, + 0, 0, 0, 190, 202, 0, 0, 0, 0, 203, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 33, 34, 36, 37, 38, 39, 40, 41, 42, 43, 48, + 49, 50, 51, 52, 53, 54, 55, 56, 57, 60, 61, 62, 63, 65, 66, 67, + 68, 69, 73, 76, 77, 78, 79, 82, 83, 84, 86, 89, 90, 91, 93, 94, + 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, + 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, + 161, 162, 163, 166, 167, 168, 169, 170, 172, 175, 178, 179, 182, 183, 184, 191, + 192, 193, 194, 195, 196, 197, 200, 204, 205, 206, 207, 208, 209, 210, 211, 212, + 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, + 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, + 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255 +}; + +/* glyph ID to SID */ +static const uint16_t expert_charset_to_sid [] = +{ + 0, 1, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 13, 14, 15, 99, + 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 27, 28, 249, 250, 251, 252, + 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 109, 110, + 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, + 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, + 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, + 315, 316, 317, 318, 158, 155, 163, 319, 320, 321, 322, 323, 324, 325, 326, 150, + 164, 169, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, + 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, + 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, + 373, 374, 375, 376, 377, 378 +}; + +/* glyph ID to SID */ +static const uint16_t expert_subset_charset_to_sid [] = +{ + 0, 1, 231, 232, 235, 236, 237, 238, 13, 14, 15, 99, 239, 240, 241, 242, + 243, 244, 245, 246, 247, 248, 27, 28, 249, 250, 251, 253, 254, 255, 256, 257, + 258, 259, 260, 261, 262, 263, 264, 265, 266, 109, 110, 267, 268, 269, 270, 272, + 300, 301, 302, 305, 314, 315, 158, 155, 163, 320, 321, 322, 323, 324, 325, 326, + 150, 164, 169, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, + 340, 341, 342, 343, 344, 345, 346 +}; + +/* SID to glyph ID */ +static const sid_to_gid_t expert_charset_sid_to_gid [] = +{ + { 1, 1 }, { 13, 12 }, { 14, 13 }, { 15, 14 }, + { 27, 26 }, { 28, 27 }, { 99, 15 }, { 109, 46 }, + { 110, 47 }, { 150, 111 }, { 155, 101 }, { 158, 100 }, + { 163, 102 }, { 164, 112 }, { 169, 113 }, { 229, 2 }, + { 230, 3 }, { 231, 4 }, { 232, 5 }, { 233, 6 }, + { 234, 7 }, { 235, 8 }, { 236, 9 }, { 237, 10 }, + { 238, 11 }, { 239, 16 }, { 240, 17 }, { 241, 18 }, + { 242, 19 }, { 243, 20 }, { 244, 21 }, { 245, 22 }, + { 246, 23 }, { 247, 24 }, { 248, 25 }, { 249, 28 }, + { 250, 29 }, { 251, 30 }, { 252, 31 }, { 253, 32 }, + { 254, 33 }, { 255, 34 }, { 256, 35 }, { 257, 36 }, + { 258, 37 }, { 259, 38 }, { 260, 39 }, { 261, 40 }, + { 262, 41 }, { 263, 42 }, { 264, 43 }, { 265, 44 }, + { 266, 45 }, { 267, 48 }, { 268, 49 }, { 269, 50 }, + { 270, 51 }, { 271, 52 }, { 272, 53 }, { 273, 54 }, + { 274, 55 }, { 275, 56 }, { 276, 57 }, { 277, 58 }, + { 278, 59 }, { 279, 60 }, { 280, 61 }, { 281, 62 }, + { 282, 63 }, { 283, 64 }, { 284, 65 }, { 285, 66 }, + { 286, 67 }, { 287, 68 }, { 288, 69 }, { 289, 70 }, + { 290, 71 }, { 291, 72 }, { 292, 73 }, { 293, 74 }, + { 294, 75 }, { 295, 76 }, { 296, 77 }, { 297, 78 }, + { 298, 79 }, { 299, 80 }, { 300, 81 }, { 301, 82 }, + { 302, 83 }, { 303, 84 }, { 304, 85 }, { 305, 86 }, + { 306, 87 }, { 307, 88 }, { 308, 89 }, { 309, 90 }, + { 310, 91 }, { 311, 92 }, { 312, 93 }, { 313, 94 }, + { 314, 95 }, { 315, 96 }, { 316, 97 }, { 317, 98 }, + { 318, 99 }, { 319, 103 }, { 320, 104 }, { 321, 105 }, + { 322, 106 }, { 323, 107 }, { 324, 108 }, { 325, 109 }, + { 326, 110 }, { 327, 114 }, { 328, 115 }, { 329, 116 }, + { 330, 117 }, { 331, 118 }, { 332, 119 }, { 333, 120 }, + { 334, 121 }, { 335, 122 }, { 336, 123 }, { 337, 124 }, + { 338, 125 }, { 339, 126 }, { 340, 127 }, { 341, 128 }, + { 342, 129 }, { 343, 130 }, { 344, 131 }, { 345, 132 }, + { 346, 133 }, { 347, 134 }, { 348, 135 }, { 349, 136 }, + { 350, 137 }, { 351, 138 }, { 352, 139 }, { 353, 140 }, + { 354, 141 }, { 355, 142 }, { 356, 143 }, { 357, 144 }, + { 358, 145 }, { 359, 146 }, { 360, 147 }, { 361, 148 }, + { 362, 149 }, { 363, 150 }, { 364, 151 }, { 365, 152 }, + { 366, 153 }, { 367, 154 }, { 368, 155 }, { 369, 156 }, + { 370, 157 }, { 371, 158 }, { 372, 159 }, { 373, 160 }, + { 374, 161 }, { 375, 162 }, { 376, 163 }, { 377, 164 }, + { 378, 165 } +}; + +/* SID to glyph ID */ +static const sid_to_gid_t expert_subset_charset_sid_to_gid [] = +{ + { 1, 1 }, { 13, 8 }, { 14, 9 }, { 15, 10 }, + { 27, 22 }, { 28, 23 }, { 99, 11 }, { 109, 41 }, + { 110, 42 }, { 150, 64 }, { 155, 55 }, { 158, 54 }, + { 163, 56 }, { 164, 65 }, { 169, 66 }, { 231, 2 }, + { 232, 3 }, { 235, 4 }, { 236, 5 }, { 237, 6 }, + { 238, 7 }, { 239, 12 }, { 240, 13 }, { 241, 14 }, + { 242, 15 }, { 243, 16 }, { 244, 17 }, { 245, 18 }, + { 246, 19 }, { 247, 20 }, { 248, 21 }, { 249, 24 }, + { 250, 25 }, { 251, 26 }, { 253, 27 }, { 254, 28 }, + { 255, 29 }, { 256, 30 }, { 257, 31 }, { 258, 32 }, + { 259, 33 }, { 260, 34 }, { 261, 35 }, { 262, 36 }, + { 263, 37 }, { 264, 38 }, { 265, 39 }, { 266, 40 }, + { 267, 43 }, { 268, 44 }, { 269, 45 }, { 270, 46 }, + { 272, 47 }, { 300, 48 }, { 301, 49 }, { 302, 50 }, + { 305, 51 }, { 314, 52 }, { 315, 53 }, { 320, 57 }, + { 321, 58 }, { 322, 59 }, { 323, 60 }, { 324, 61 }, + { 325, 62 }, { 326, 63 }, { 327, 67 }, { 328, 68 }, + { 329, 69 }, { 330, 70 }, { 331, 71 }, { 332, 72 }, + { 333, 73 }, { 334, 74 }, { 335, 75 }, { 336, 76 }, + { 337, 77 }, { 338, 78 }, { 339, 79 }, { 340, 80 }, + { 341, 81 }, { 342, 82 }, { 343, 83 }, { 344, 84 }, + { 345, 85 }, { 346, 86 } +}; + +/* code to SID */ +static const uint8_t standard_encoding_to_sid [] = +{ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, + 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, + 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, + 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, + 0, 111, 112, 113, 114, 0, 115, 116, 117, 118, 119, 120, 121, 122, 0, 123, + 0, 124, 125, 126, 127, 128, 129, 130, 131, 0, 132, 133, 0, 134, 135, 136, + 137, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 138, 0, 139, 0, 0, 0, 0, 140, 141, 142, 143, 0, 0, 0, 0, + 0, 144, 0, 0, 0, 145, 0, 0, 146, 147, 148, 149, 0, 0, 0, 0 +}; + +hb_codepoint_t OT::cff1::lookup_standard_encoding_for_code (hb_codepoint_t sid) +{ + if (sid < ARRAY_LENGTH (standard_encoding_to_code)) + return (hb_codepoint_t)standard_encoding_to_code[sid]; + else + return 0; +} + +hb_codepoint_t OT::cff1::lookup_expert_encoding_for_code (hb_codepoint_t sid) +{ + if (sid < ARRAY_LENGTH (expert_encoding_to_code)) + return (hb_codepoint_t)expert_encoding_to_code[sid]; + else + return 0; +} + +hb_codepoint_t OT::cff1::lookup_expert_charset_for_sid (hb_codepoint_t glyph) +{ + if (glyph < ARRAY_LENGTH (expert_charset_to_sid)) + return (hb_codepoint_t)expert_charset_to_sid[glyph]; + else + return 0; +} + +hb_codepoint_t OT::cff1::lookup_expert_subset_charset_for_sid (hb_codepoint_t glyph) +{ + if (glyph < ARRAY_LENGTH (expert_subset_charset_to_sid)) + return (hb_codepoint_t)expert_subset_charset_to_sid[glyph]; + else + return 0; +} + +hb_codepoint_t OT::cff1::lookup_expert_charset_for_glyph (hb_codepoint_t sid) +{ + const auto *pair = hb_sorted_array (expert_charset_sid_to_gid).bsearch (sid); + return pair ? pair->gid : 0; +} + +hb_codepoint_t OT::cff1::lookup_expert_subset_charset_for_glyph (hb_codepoint_t sid) +{ + const auto *pair = hb_sorted_array (expert_subset_charset_sid_to_gid).bsearch (sid); + return pair ? pair->gid : 0; +} + +hb_codepoint_t OT::cff1::lookup_standard_encoding_for_sid (hb_codepoint_t code) +{ + if (code < ARRAY_LENGTH (standard_encoding_to_sid)) + return (hb_codepoint_t)standard_encoding_to_sid[code]; + else + return CFF_UNDEF_SID; +} + +struct bounds_t +{ + void init () + { + min.set_int (INT_MAX, INT_MAX); + max.set_int (INT_MIN, INT_MIN); + } + + void update (const point_t &pt) + { + if (pt.x < min.x) min.x = pt.x; + if (pt.x > max.x) max.x = pt.x; + if (pt.y < min.y) min.y = pt.y; + if (pt.y > max.y) max.y = pt.y; + } + + void merge (const bounds_t &b) + { + if (empty ()) + *this = b; + else if (!b.empty ()) + { + if (b.min.x < min.x) min.x = b.min.x; + if (b.max.x > max.x) max.x = b.max.x; + if (b.min.y < min.y) min.y = b.min.y; + if (b.max.y > max.y) max.y = b.max.y; + } + } + + void offset (const point_t &delta) + { + if (!empty ()) + { + min.move (delta); + max.move (delta); + } + } + + bool empty () const { return (min.x >= max.x) || (min.y >= max.y); } + + point_t min; + point_t max; +}; + +struct cff1_extents_param_t +{ + void init (const OT::cff1::accelerator_t *_cff) + { + path_open = false; + cff = _cff; + bounds.init (); + } + + void start_path () { path_open = true; } + void end_path () { path_open = false; } + bool is_path_open () const { return path_open; } + + bool path_open; + bounds_t bounds; + + const OT::cff1::accelerator_t *cff; +}; + +struct cff1_path_procs_extents_t : path_procs_t<cff1_path_procs_extents_t, cff1_cs_interp_env_t, cff1_extents_param_t> +{ + static void moveto (cff1_cs_interp_env_t &env, cff1_extents_param_t& param, const point_t &pt) + { + param.end_path (); + env.moveto (pt); + } + + static void line (cff1_cs_interp_env_t &env, cff1_extents_param_t& param, const point_t &pt1) + { + if (!param.is_path_open ()) + { + param.start_path (); + param.bounds.update (env.get_pt ()); + } + env.moveto (pt1); + param.bounds.update (env.get_pt ()); + } + + static void curve (cff1_cs_interp_env_t &env, cff1_extents_param_t& param, const point_t &pt1, const point_t &pt2, const point_t &pt3) + { + if (!param.is_path_open ()) + { + param.start_path (); + param.bounds.update (env.get_pt ()); + } + /* include control points */ + param.bounds.update (pt1); + param.bounds.update (pt2); + env.moveto (pt3); + param.bounds.update (env.get_pt ()); + } +}; + +static bool _get_bounds (const OT::cff1::accelerator_t *cff, hb_codepoint_t glyph, bounds_t &bounds, bool in_seac=false); + +struct cff1_cs_opset_extents_t : cff1_cs_opset_t<cff1_cs_opset_extents_t, cff1_extents_param_t, cff1_path_procs_extents_t> +{ + static void process_seac (cff1_cs_interp_env_t &env, cff1_extents_param_t& param) + { + unsigned int n = env.argStack.get_count (); + point_t delta; + delta.x = env.argStack[n-4]; + delta.y = env.argStack[n-3]; + hb_codepoint_t base = param.cff->std_code_to_glyph (env.argStack[n-2].to_int ()); + hb_codepoint_t accent = param.cff->std_code_to_glyph (env.argStack[n-1].to_int ()); + + bounds_t base_bounds, accent_bounds; + if (likely (!env.in_seac && base && accent + && _get_bounds (param.cff, base, base_bounds, true) + && _get_bounds (param.cff, accent, accent_bounds, true))) + { + param.bounds.merge (base_bounds); + accent_bounds.offset (delta); + param.bounds.merge (accent_bounds); + } + else + env.set_error (); + } +}; + +bool _get_bounds (const OT::cff1::accelerator_t *cff, hb_codepoint_t glyph, bounds_t &bounds, bool in_seac) +{ + bounds.init (); + if (unlikely (!cff->is_valid () || (glyph >= cff->num_glyphs))) return false; + + unsigned int fd = cff->fdSelect->get_fd (glyph); + cff1_cs_interpreter_t<cff1_cs_opset_extents_t, cff1_extents_param_t> interp; + const byte_str_t str = (*cff->charStrings)[glyph]; + interp.env.init (str, *cff, fd); + interp.env.set_in_seac (in_seac); + cff1_extents_param_t param; + param.init (cff); + if (unlikely (!interp.interpret (param))) return false; + bounds = param.bounds; + return true; +} + +bool OT::cff1::accelerator_t::get_extents (hb_font_t *font, hb_codepoint_t glyph, hb_glyph_extents_t *extents) const +{ +#ifdef HB_NO_OT_FONT_CFF + /* XXX Remove check when this code moves to .hh file. */ + return true; +#endif + + bounds_t bounds; + + if (!_get_bounds (this, glyph, bounds)) + return false; + + if (bounds.min.x >= bounds.max.x) + { + extents->width = 0; + extents->x_bearing = 0; + } + else + { + extents->x_bearing = font->em_scalef_x (bounds.min.x.to_real ()); + extents->width = font->em_scalef_x (bounds.max.x.to_real () - bounds.min.x.to_real ()); + } + if (bounds.min.y >= bounds.max.y) + { + extents->height = 0; + extents->y_bearing = 0; + } + else + { + extents->y_bearing = font->em_scalef_y (bounds.max.y.to_real ()); + extents->height = font->em_scalef_y (bounds.min.y.to_real () - bounds.max.y.to_real ()); + } + + return true; +} + +#ifdef HB_EXPERIMENTAL_API +struct cff1_path_param_t +{ + cff1_path_param_t (const OT::cff1::accelerator_t *cff_, hb_font_t *font_, + draw_helper_t &draw_helper_, point_t *delta_) + { + draw_helper = &draw_helper_; + cff = cff_; + font = font_; + delta = delta_; + } + + void move_to (const point_t &p) + { + point_t point = p; + if (delta) point.move (*delta); + draw_helper->move_to (font->em_scalef_x (point.x.to_real ()), font->em_scalef_y (point.y.to_real ())); + } + + void line_to (const point_t &p) + { + point_t point = p; + if (delta) point.move (*delta); + draw_helper->line_to (font->em_scalef_x (point.x.to_real ()), font->em_scalef_y (point.y.to_real ())); + } + + void cubic_to (const point_t &p1, const point_t &p2, const point_t &p3) + { + point_t point1 = p1, point2 = p2, point3 = p3; + if (delta) + { + point1.move (*delta); + point2.move (*delta); + point3.move (*delta); + } + draw_helper->cubic_to (font->em_scalef_x (point1.x.to_real ()), font->em_scalef_y (point1.y.to_real ()), + font->em_scalef_x (point2.x.to_real ()), font->em_scalef_y (point2.y.to_real ()), + font->em_scalef_x (point3.x.to_real ()), font->em_scalef_y (point3.y.to_real ())); + } + + void end_path () { draw_helper->end_path (); } + + hb_font_t *font; + draw_helper_t *draw_helper; + point_t *delta; + + const OT::cff1::accelerator_t *cff; +}; + +struct cff1_path_procs_path_t : path_procs_t<cff1_path_procs_path_t, cff1_cs_interp_env_t, cff1_path_param_t> +{ + static void moveto (cff1_cs_interp_env_t &env, cff1_path_param_t& param, const point_t &pt) + { + param.move_to (pt); + env.moveto (pt); + } + + static void line (cff1_cs_interp_env_t &env, cff1_path_param_t ¶m, const point_t &pt1) + { + param.line_to (pt1); + env.moveto (pt1); + } + + static void curve (cff1_cs_interp_env_t &env, cff1_path_param_t ¶m, const point_t &pt1, const point_t &pt2, const point_t &pt3) + { + param.cubic_to (pt1, pt2, pt3); + env.moveto (pt3); + } +}; + +static bool _get_path (const OT::cff1::accelerator_t *cff, hb_font_t *font, hb_codepoint_t glyph, + draw_helper_t &draw_helper, bool in_seac = false, point_t *delta = nullptr); + +struct cff1_cs_opset_path_t : cff1_cs_opset_t<cff1_cs_opset_path_t, cff1_path_param_t, cff1_path_procs_path_t> +{ + static void process_seac (cff1_cs_interp_env_t &env, cff1_path_param_t& param) + { + /* End previous path */ + param.end_path (); + + unsigned int n = env.argStack.get_count (); + point_t delta; + delta.x = env.argStack[n-4]; + delta.y = env.argStack[n-3]; + hb_codepoint_t base = param.cff->std_code_to_glyph (env.argStack[n-2].to_int ()); + hb_codepoint_t accent = param.cff->std_code_to_glyph (env.argStack[n-1].to_int ()); + + if (unlikely (!(!env.in_seac && base && accent + && _get_path (param.cff, param.font, base, *param.draw_helper, true) + && _get_path (param.cff, param.font, accent, *param.draw_helper, true, &delta)))) + env.set_error (); + } +}; + +bool _get_path (const OT::cff1::accelerator_t *cff, hb_font_t *font, hb_codepoint_t glyph, + draw_helper_t &draw_helper, bool in_seac, point_t *delta) +{ + if (unlikely (!cff->is_valid () || (glyph >= cff->num_glyphs))) return false; + + unsigned int fd = cff->fdSelect->get_fd (glyph); + cff1_cs_interpreter_t<cff1_cs_opset_path_t, cff1_path_param_t> interp; + const byte_str_t str = (*cff->charStrings)[glyph]; + interp.env.init (str, *cff, fd); + interp.env.set_in_seac (in_seac); + cff1_path_param_t param (cff, font, draw_helper, delta); + if (unlikely (!interp.interpret (param))) return false; + + /* Let's end the path specially since it is called inside seac also */ + param.end_path (); + + return true; +} + +bool OT::cff1::accelerator_t::get_path (hb_font_t *font, hb_codepoint_t glyph, draw_helper_t &draw_helper) const +{ +#ifdef HB_NO_OT_FONT_CFF + /* XXX Remove check when this code moves to .hh file. */ + return true; +#endif + + return _get_path (this, font, glyph, draw_helper); +} +#endif + +struct get_seac_param_t +{ + void init (const OT::cff1::accelerator_t *_cff) + { + cff = _cff; + base = 0; + accent = 0; + } + + bool has_seac () const { return base && accent; } + + const OT::cff1::accelerator_t *cff; + hb_codepoint_t base; + hb_codepoint_t accent; +}; + +struct cff1_cs_opset_seac_t : cff1_cs_opset_t<cff1_cs_opset_seac_t, get_seac_param_t> +{ + static void process_seac (cff1_cs_interp_env_t &env, get_seac_param_t& param) + { + unsigned int n = env.argStack.get_count (); + hb_codepoint_t base_char = (hb_codepoint_t)env.argStack[n-2].to_int (); + hb_codepoint_t accent_char = (hb_codepoint_t)env.argStack[n-1].to_int (); + + param.base = param.cff->std_code_to_glyph (base_char); + param.accent = param.cff->std_code_to_glyph (accent_char); + } +}; + +bool OT::cff1::accelerator_t::get_seac_components (hb_codepoint_t glyph, hb_codepoint_t *base, hb_codepoint_t *accent) const +{ + if (unlikely (!is_valid () || (glyph >= num_glyphs))) return false; + + unsigned int fd = fdSelect->get_fd (glyph); + cff1_cs_interpreter_t<cff1_cs_opset_seac_t, get_seac_param_t> interp; + const byte_str_t str = (*charStrings)[glyph]; + interp.env.init (str, *this, fd); + get_seac_param_t param; + param.init (this); + if (unlikely (!interp.interpret (param))) return false; + + if (param.has_seac ()) + { + *base = param.base; + *accent = param.accent; + return true; + } + return false; +} + + +#endif diff --git a/thirdparty/harfbuzz/src/hb-ot-cff1-table.hh b/thirdparty/harfbuzz/src/hb-ot-cff1-table.hh new file mode 100644 index 0000000000..7228f77727 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-cff1-table.hh @@ -0,0 +1,1403 @@ +/* + * Copyright © 2018 Adobe Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Adobe Author(s): Michiharu Ariza + */ + +#ifndef HB_OT_CFF1_TABLE_HH +#define HB_OT_CFF1_TABLE_HH + +#include "hb-ot-cff-common.hh" +#include "hb-subset-cff1.hh" +#include "hb-draw.hh" + +#define HB_STRING_ARRAY_NAME cff1_std_strings +#define HB_STRING_ARRAY_LIST "hb-ot-cff1-std-str.hh" +#include "hb-string-array.hh" +#undef HB_STRING_ARRAY_LIST +#undef HB_STRING_ARRAY_NAME + +namespace CFF { + +/* + * CFF -- Compact Font Format (CFF) + * https://www.adobe.com/content/dam/acom/en/devnet/font/pdfs/5176.CFF.pdf + */ +#define HB_OT_TAG_cff1 HB_TAG('C','F','F',' ') + +#define CFF_UNDEF_SID CFF_UNDEF_CODE + +enum EncodingID { StandardEncoding = 0, ExpertEncoding = 1 }; +enum CharsetID { ISOAdobeCharset = 0, ExpertCharset = 1, ExpertSubsetCharset = 2 }; + +typedef CFFIndex<HBUINT16> CFF1Index; +template <typename Type> struct CFF1IndexOf : CFFIndexOf<HBUINT16, Type> {}; + +typedef CFFIndex<HBUINT16> CFF1Index; +typedef CFF1Index CFF1CharStrings; +typedef Subrs<HBUINT16> CFF1Subrs; + +struct CFF1FDSelect : FDSelect {}; + +/* Encoding */ +struct Encoding0 { + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (codes.sanitize (c)); + } + + hb_codepoint_t get_code (hb_codepoint_t glyph) const + { + assert (glyph > 0); + glyph--; + if (glyph < nCodes ()) + { + return (hb_codepoint_t)codes[glyph]; + } + else + return CFF_UNDEF_CODE; + } + + HBUINT8 &nCodes () { return codes.len; } + HBUINT8 nCodes () const { return codes.len; } + + ArrayOf<HBUINT8, HBUINT8> codes; + + DEFINE_SIZE_ARRAY_SIZED (1, codes); +}; + +struct Encoding1_Range { + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this)); + } + + HBUINT8 first; + HBUINT8 nLeft; + + DEFINE_SIZE_STATIC (2); +}; + +struct Encoding1 { + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (ranges.sanitize (c)); + } + + hb_codepoint_t get_code (hb_codepoint_t glyph) const + { + assert (glyph > 0); + glyph--; + for (unsigned int i = 0; i < nRanges (); i++) + { + if (glyph <= ranges[i].nLeft) + { + hb_codepoint_t code = (hb_codepoint_t) ranges[i].first + glyph; + return (likely (code < 0x100) ? code: CFF_UNDEF_CODE); + } + glyph -= (ranges[i].nLeft + 1); + } + return CFF_UNDEF_CODE; + } + + HBUINT8 &nRanges () { return ranges.len; } + HBUINT8 nRanges () const { return ranges.len; } + + ArrayOf<Encoding1_Range, HBUINT8> ranges; + + DEFINE_SIZE_ARRAY_SIZED (1, ranges); +}; + +struct SuppEncoding { + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this)); + } + + HBUINT8 code; + HBUINT16 glyph; + + DEFINE_SIZE_STATIC (3); +}; + +struct CFF1SuppEncData { + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (supps.sanitize (c)); + } + + void get_codes (hb_codepoint_t sid, hb_vector_t<hb_codepoint_t> &codes) const + { + for (unsigned int i = 0; i < nSups (); i++) + if (sid == supps[i].glyph) + codes.push (supps[i].code); + } + + HBUINT8 &nSups () { return supps.len; } + HBUINT8 nSups () const { return supps.len; } + + ArrayOf<SuppEncoding, HBUINT8> supps; + + DEFINE_SIZE_ARRAY_SIZED (1, supps); +}; + +struct Encoding +{ + /* serialize a fullset Encoding */ + bool serialize (hb_serialize_context_t *c, const Encoding &src) + { + TRACE_SERIALIZE (this); + unsigned int size = src.get_size (); + Encoding *dest = c->allocate_size<Encoding> (size); + if (unlikely (!dest)) return_trace (false); + memcpy (dest, &src, size); + return_trace (true); + } + + /* serialize a subset Encoding */ + bool serialize (hb_serialize_context_t *c, + uint8_t format, + unsigned int enc_count, + const hb_vector_t<code_pair_t>& code_ranges, + const hb_vector_t<code_pair_t>& supp_codes) + { + TRACE_SERIALIZE (this); + Encoding *dest = c->extend_min (*this); + if (unlikely (!dest)) return_trace (false); + dest->format = format | ((supp_codes.length > 0) ? 0x80 : 0); + switch (format) { + case 0: + { + Encoding0 *fmt0 = c->allocate_size<Encoding0> (Encoding0::min_size + HBUINT8::static_size * enc_count); + if (unlikely (!fmt0)) return_trace (false); + fmt0->nCodes () = enc_count; + unsigned int glyph = 0; + for (unsigned int i = 0; i < code_ranges.length; i++) + { + hb_codepoint_t code = code_ranges[i].code; + for (int left = (int)code_ranges[i].glyph; left >= 0; left--) + fmt0->codes[glyph++] = code++; + if (unlikely (!((glyph <= 0x100) && (code <= 0x100)))) + return_trace (false); + } + } + break; + + case 1: + { + Encoding1 *fmt1 = c->allocate_size<Encoding1> (Encoding1::min_size + Encoding1_Range::static_size * code_ranges.length); + if (unlikely (!fmt1)) return_trace (false); + fmt1->nRanges () = code_ranges.length; + for (unsigned int i = 0; i < code_ranges.length; i++) + { + if (unlikely (!((code_ranges[i].code <= 0xFF) && (code_ranges[i].glyph <= 0xFF)))) + return_trace (false); + fmt1->ranges[i].first = code_ranges[i].code; + fmt1->ranges[i].nLeft = code_ranges[i].glyph; + } + } + break; + + } + + if (supp_codes.length) + { + CFF1SuppEncData *suppData = c->allocate_size<CFF1SuppEncData> (CFF1SuppEncData::min_size + SuppEncoding::static_size * supp_codes.length); + if (unlikely (!suppData)) return_trace (false); + suppData->nSups () = supp_codes.length; + for (unsigned int i = 0; i < supp_codes.length; i++) + { + suppData->supps[i].code = supp_codes[i].code; + suppData->supps[i].glyph = supp_codes[i].glyph; /* actually SID */ + } + } + + return_trace (true); + } + + unsigned int get_size () const + { + unsigned int size = min_size; + switch (table_format ()) + { + case 0: size += u.format0.get_size (); break; + case 1: size += u.format1.get_size (); break; + } + if (has_supplement ()) + size += suppEncData ().get_size (); + return size; + } + + hb_codepoint_t get_code (hb_codepoint_t glyph) const + { + switch (table_format ()) + { + case 0: return u.format0.get_code (glyph); + case 1: return u.format1.get_code (glyph); + default:return 0; + } + } + + uint8_t table_format () const { return format & 0x7F; } + bool has_supplement () const { return format & 0x80; } + + void get_supplement_codes (hb_codepoint_t sid, hb_vector_t<hb_codepoint_t> &codes) const + { + codes.resize (0); + if (has_supplement ()) + suppEncData().get_codes (sid, codes); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + if (unlikely (!c->check_struct (this))) + return_trace (false); + + switch (table_format ()) + { + case 0: if (unlikely (!u.format0.sanitize (c))) { return_trace (false); } break; + case 1: if (unlikely (!u.format1.sanitize (c))) { return_trace (false); } break; + default:return_trace (false); + } + return_trace (likely (!has_supplement () || suppEncData ().sanitize (c))); + } + + protected: + const CFF1SuppEncData &suppEncData () const + { + switch (table_format ()) + { + case 0: return StructAfter<CFF1SuppEncData> (u.format0.codes[u.format0.nCodes ()-1]); + case 1: return StructAfter<CFF1SuppEncData> (u.format1.ranges[u.format1.nRanges ()-1]); + default:return Null (CFF1SuppEncData); + } + } + + public: + HBUINT8 format; + union { + Encoding0 format0; + Encoding1 format1; + } u; + /* CFF1SuppEncData suppEncData; */ + + DEFINE_SIZE_MIN (1); +}; + +/* Charset */ +struct Charset0 { + bool sanitize (hb_sanitize_context_t *c, unsigned int num_glyphs) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && sids[num_glyphs - 1].sanitize (c)); + } + + hb_codepoint_t get_sid (hb_codepoint_t glyph) const + { + if (glyph == 0) + return 0; + else + return sids[glyph - 1]; + } + + hb_codepoint_t get_glyph (hb_codepoint_t sid, unsigned int num_glyphs) const + { + if (sid == 0) + return 0; + + for (unsigned int glyph = 1; glyph < num_glyphs; glyph++) + { + if (sids[glyph-1] == sid) + return glyph; + } + return 0; + } + + unsigned int get_size (unsigned int num_glyphs) const + { + assert (num_glyphs > 0); + return HBUINT16::static_size * (num_glyphs - 1); + } + + HBUINT16 sids[HB_VAR_ARRAY]; + + DEFINE_SIZE_ARRAY(0, sids); +}; + +template <typename TYPE> +struct Charset_Range { + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this)); + } + + HBUINT16 first; + TYPE nLeft; + + DEFINE_SIZE_STATIC (HBUINT16::static_size + TYPE::static_size); +}; + +template <typename TYPE> +struct Charset1_2 { + bool sanitize (hb_sanitize_context_t *c, unsigned int num_glyphs) const + { + TRACE_SANITIZE (this); + if (unlikely (!c->check_struct (this))) + return_trace (false); + num_glyphs--; + for (unsigned int i = 0; num_glyphs > 0; i++) + { + if (unlikely (!ranges[i].sanitize (c) || (num_glyphs < ranges[i].nLeft + 1))) + return_trace (false); + num_glyphs -= (ranges[i].nLeft + 1); + } + return_trace (true); + } + + hb_codepoint_t get_sid (hb_codepoint_t glyph) const + { + if (glyph == 0) return 0; + glyph--; + for (unsigned int i = 0;; i++) + { + if (glyph <= ranges[i].nLeft) + return (hb_codepoint_t)ranges[i].first + glyph; + glyph -= (ranges[i].nLeft + 1); + } + + return 0; + } + + hb_codepoint_t get_glyph (hb_codepoint_t sid, unsigned int num_glyphs) const + { + if (sid == 0) return 0; + hb_codepoint_t glyph = 1; + for (unsigned int i = 0;; i++) + { + if (glyph >= num_glyphs) + return 0; + if ((ranges[i].first <= sid) && (sid <= ranges[i].first + ranges[i].nLeft)) + return glyph + (sid - ranges[i].first); + glyph += (ranges[i].nLeft + 1); + } + + return 0; + } + + unsigned int get_size (unsigned int num_glyphs) const + { + unsigned int size = HBUINT8::static_size; + int glyph = (int)num_glyphs; + + assert (glyph > 0); + glyph--; + for (unsigned int i = 0; glyph > 0; i++) + { + glyph -= (ranges[i].nLeft + 1); + size += Charset_Range<TYPE>::static_size; + } + + return size; + } + + Charset_Range<TYPE> ranges[HB_VAR_ARRAY]; + + DEFINE_SIZE_ARRAY (0, ranges); +}; + +typedef Charset1_2<HBUINT8> Charset1; +typedef Charset1_2<HBUINT16> Charset2; +typedef Charset_Range<HBUINT8> Charset1_Range; +typedef Charset_Range<HBUINT16> Charset2_Range; + +struct Charset +{ + /* serialize a fullset Charset */ + bool serialize (hb_serialize_context_t *c, const Charset &src, unsigned int num_glyphs) + { + TRACE_SERIALIZE (this); + unsigned int size = src.get_size (num_glyphs); + Charset *dest = c->allocate_size<Charset> (size); + if (unlikely (!dest)) return_trace (false); + memcpy (dest, &src, size); + return_trace (true); + } + + /* serialize a subset Charset */ + bool serialize (hb_serialize_context_t *c, + uint8_t format, + unsigned int num_glyphs, + const hb_vector_t<code_pair_t>& sid_ranges) + { + TRACE_SERIALIZE (this); + Charset *dest = c->extend_min (*this); + if (unlikely (!dest)) return_trace (false); + dest->format = format; + switch (format) + { + case 0: + { + Charset0 *fmt0 = c->allocate_size<Charset0> (Charset0::min_size + HBUINT16::static_size * (num_glyphs - 1)); + if (unlikely (!fmt0)) return_trace (false); + unsigned int glyph = 0; + for (unsigned int i = 0; i < sid_ranges.length; i++) + { + hb_codepoint_t sid = sid_ranges[i].code; + for (int left = (int)sid_ranges[i].glyph; left >= 0; left--) + fmt0->sids[glyph++] = sid++; + } + } + break; + + case 1: + { + Charset1 *fmt1 = c->allocate_size<Charset1> (Charset1::min_size + Charset1_Range::static_size * sid_ranges.length); + if (unlikely (!fmt1)) return_trace (false); + for (unsigned int i = 0; i < sid_ranges.length; i++) + { + if (unlikely (!(sid_ranges[i].glyph <= 0xFF))) + return_trace (false); + fmt1->ranges[i].first = sid_ranges[i].code; + fmt1->ranges[i].nLeft = sid_ranges[i].glyph; + } + } + break; + + case 2: + { + Charset2 *fmt2 = c->allocate_size<Charset2> (Charset2::min_size + Charset2_Range::static_size * sid_ranges.length); + if (unlikely (!fmt2)) return_trace (false); + for (unsigned int i = 0; i < sid_ranges.length; i++) + { + if (unlikely (!(sid_ranges[i].glyph <= 0xFFFF))) + return_trace (false); + fmt2->ranges[i].first = sid_ranges[i].code; + fmt2->ranges[i].nLeft = sid_ranges[i].glyph; + } + } + break; + + } + return_trace (true); + } + + unsigned int get_size (unsigned int num_glyphs) const + { + switch (format) + { + case 0: return min_size + u.format0.get_size (num_glyphs); + case 1: return min_size + u.format1.get_size (num_glyphs); + case 2: return min_size + u.format2.get_size (num_glyphs); + default:return 0; + } + } + + hb_codepoint_t get_sid (hb_codepoint_t glyph, unsigned int num_glyphs) const + { + if (unlikely (glyph >= num_glyphs)) return 0; + switch (format) + { + case 0: return u.format0.get_sid (glyph); + case 1: return u.format1.get_sid (glyph); + case 2: return u.format2.get_sid (glyph); + default:return 0; + } + } + + hb_codepoint_t get_glyph (hb_codepoint_t sid, unsigned int num_glyphs) const + { + switch (format) + { + case 0: return u.format0.get_glyph (sid, num_glyphs); + case 1: return u.format1.get_glyph (sid, num_glyphs); + case 2: return u.format2.get_glyph (sid, num_glyphs); + default:return 0; + } + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + if (unlikely (!c->check_struct (this))) + return_trace (false); + + switch (format) + { + case 0: return_trace (u.format0.sanitize (c, c->get_num_glyphs ())); + case 1: return_trace (u.format1.sanitize (c, c->get_num_glyphs ())); + case 2: return_trace (u.format2.sanitize (c, c->get_num_glyphs ())); + default:return_trace (false); + } + } + + HBUINT8 format; + union { + Charset0 format0; + Charset1 format1; + Charset2 format2; + } u; + + DEFINE_SIZE_MIN (1); +}; + +struct CFF1StringIndex : CFF1Index +{ + bool serialize (hb_serialize_context_t *c, const CFF1StringIndex &strings, + const hb_inc_bimap_t &sidmap) + { + TRACE_SERIALIZE (this); + if (unlikely ((strings.count == 0) || (sidmap.get_population () == 0))) + { + if (unlikely (!c->extend_min (this->count))) + return_trace (false); + count = 0; + return_trace (true); + } + + byte_str_array_t bytesArray; + bytesArray.init (); + if (!bytesArray.resize (sidmap.get_population ())) + return_trace (false); + for (unsigned int i = 0; i < strings.count; i++) + { + hb_codepoint_t j = sidmap[i]; + if (j != HB_MAP_VALUE_INVALID) + bytesArray[j] = strings[i]; + } + + bool result = CFF1Index::serialize (c, bytesArray); + bytesArray.fini (); + return_trace (result); + } +}; + +struct cff1_top_dict_interp_env_t : num_interp_env_t +{ + cff1_top_dict_interp_env_t () + : num_interp_env_t(), prev_offset(0), last_offset(0) {} + + unsigned int prev_offset; + unsigned int last_offset; +}; + +struct name_dict_values_t +{ + enum name_dict_val_index_t + { + version, + notice, + copyright, + fullName, + familyName, + weight, + postscript, + fontName, + baseFontName, + registry, + ordering, + + ValCount + }; + + void init () + { + for (unsigned int i = 0; i < ValCount; i++) + values[i] = CFF_UNDEF_SID; + } + + unsigned int& operator[] (unsigned int i) + { assert (i < ValCount); return values[i]; } + + unsigned int operator[] (unsigned int i) const + { assert (i < ValCount); return values[i]; } + + static enum name_dict_val_index_t name_op_to_index (op_code_t op) + { + switch (op) { + default: // can't happen - just make some compiler happy + case OpCode_version: + return version; + case OpCode_Notice: + return notice; + case OpCode_Copyright: + return copyright; + case OpCode_FullName: + return fullName; + case OpCode_FamilyName: + return familyName; + case OpCode_Weight: + return weight; + case OpCode_PostScript: + return postscript; + case OpCode_FontName: + return fontName; + case OpCode_BaseFontName: + return baseFontName; + } + } + + unsigned int values[ValCount]; +}; + +struct cff1_top_dict_val_t : op_str_t +{ + unsigned int last_arg_offset; +}; + +struct cff1_top_dict_values_t : top_dict_values_t<cff1_top_dict_val_t> +{ + void init () + { + top_dict_values_t<cff1_top_dict_val_t>::init (); + + nameSIDs.init (); + ros_supplement = 0; + cidCount = 8720; + EncodingOffset = 0; + CharsetOffset = 0; + FDSelectOffset = 0; + privateDictInfo.init (); + } + void fini () { top_dict_values_t<cff1_top_dict_val_t>::fini (); } + + bool is_CID () const + { return nameSIDs[name_dict_values_t::registry] != CFF_UNDEF_SID; } + + name_dict_values_t nameSIDs; + unsigned int ros_supplement_offset; + unsigned int ros_supplement; + unsigned int cidCount; + + unsigned int EncodingOffset; + unsigned int CharsetOffset; + unsigned int FDSelectOffset; + table_info_t privateDictInfo; +}; + +struct cff1_top_dict_opset_t : top_dict_opset_t<cff1_top_dict_val_t> +{ + static void process_op (op_code_t op, cff1_top_dict_interp_env_t& env, cff1_top_dict_values_t& dictval) + { + cff1_top_dict_val_t val; + val.last_arg_offset = (env.last_offset-1) - dictval.opStart; /* offset to the last argument */ + + switch (op) { + case OpCode_version: + case OpCode_Notice: + case OpCode_Copyright: + case OpCode_FullName: + case OpCode_FamilyName: + case OpCode_Weight: + case OpCode_PostScript: + case OpCode_BaseFontName: + dictval.nameSIDs[name_dict_values_t::name_op_to_index (op)] = env.argStack.pop_uint (); + env.clear_args (); + break; + case OpCode_isFixedPitch: + case OpCode_ItalicAngle: + case OpCode_UnderlinePosition: + case OpCode_UnderlineThickness: + case OpCode_PaintType: + case OpCode_CharstringType: + case OpCode_UniqueID: + case OpCode_StrokeWidth: + case OpCode_SyntheticBase: + case OpCode_CIDFontVersion: + case OpCode_CIDFontRevision: + case OpCode_CIDFontType: + case OpCode_UIDBase: + case OpCode_FontBBox: + case OpCode_XUID: + case OpCode_BaseFontBlend: + env.clear_args (); + break; + + case OpCode_CIDCount: + dictval.cidCount = env.argStack.pop_uint (); + env.clear_args (); + break; + + case OpCode_ROS: + dictval.ros_supplement = env.argStack.pop_uint (); + dictval.nameSIDs[name_dict_values_t::ordering] = env.argStack.pop_uint (); + dictval.nameSIDs[name_dict_values_t::registry] = env.argStack.pop_uint (); + env.clear_args (); + break; + + case OpCode_Encoding: + dictval.EncodingOffset = env.argStack.pop_uint (); + env.clear_args (); + if (unlikely (dictval.EncodingOffset == 0)) return; + break; + + case OpCode_charset: + dictval.CharsetOffset = env.argStack.pop_uint (); + env.clear_args (); + if (unlikely (dictval.CharsetOffset == 0)) return; + break; + + case OpCode_FDSelect: + dictval.FDSelectOffset = env.argStack.pop_uint (); + env.clear_args (); + break; + + case OpCode_Private: + dictval.privateDictInfo.offset = env.argStack.pop_uint (); + dictval.privateDictInfo.size = env.argStack.pop_uint (); + env.clear_args (); + break; + + default: + env.last_offset = env.str_ref.offset; + top_dict_opset_t<cff1_top_dict_val_t>::process_op (op, env, dictval); + /* Record this operand below if stack is empty, otherwise done */ + if (!env.argStack.is_empty ()) return; + break; + } + + if (unlikely (env.in_error ())) return; + + dictval.add_op (op, env.str_ref, val); + } +}; + +struct cff1_font_dict_values_t : dict_values_t<op_str_t> +{ + void init () + { + dict_values_t<op_str_t>::init (); + privateDictInfo.init (); + fontName = CFF_UNDEF_SID; + } + void fini () { dict_values_t<op_str_t>::fini (); } + + table_info_t privateDictInfo; + unsigned int fontName; +}; + +struct cff1_font_dict_opset_t : dict_opset_t +{ + static void process_op (op_code_t op, num_interp_env_t& env, cff1_font_dict_values_t& dictval) + { + switch (op) { + case OpCode_FontName: + dictval.fontName = env.argStack.pop_uint (); + env.clear_args (); + break; + case OpCode_FontMatrix: + case OpCode_PaintType: + env.clear_args (); + break; + case OpCode_Private: + dictval.privateDictInfo.offset = env.argStack.pop_uint (); + dictval.privateDictInfo.size = env.argStack.pop_uint (); + env.clear_args (); + break; + + default: + dict_opset_t::process_op (op, env); + if (!env.argStack.is_empty ()) return; + break; + } + + if (unlikely (env.in_error ())) return; + + dictval.add_op (op, env.str_ref); + } +}; + +template <typename VAL> +struct cff1_private_dict_values_base_t : dict_values_t<VAL> +{ + void init () + { + dict_values_t<VAL>::init (); + subrsOffset = 0; + localSubrs = &Null (CFF1Subrs); + } + void fini () { dict_values_t<VAL>::fini (); } + + unsigned int subrsOffset; + const CFF1Subrs *localSubrs; +}; + +typedef cff1_private_dict_values_base_t<op_str_t> cff1_private_dict_values_subset_t; +typedef cff1_private_dict_values_base_t<num_dict_val_t> cff1_private_dict_values_t; + +struct cff1_private_dict_opset_t : dict_opset_t +{ + static void process_op (op_code_t op, num_interp_env_t& env, cff1_private_dict_values_t& dictval) + { + num_dict_val_t val; + val.init (); + + switch (op) { + case OpCode_BlueValues: + case OpCode_OtherBlues: + case OpCode_FamilyBlues: + case OpCode_FamilyOtherBlues: + case OpCode_StemSnapH: + case OpCode_StemSnapV: + env.clear_args (); + break; + case OpCode_StdHW: + case OpCode_StdVW: + case OpCode_BlueScale: + case OpCode_BlueShift: + case OpCode_BlueFuzz: + case OpCode_ForceBold: + case OpCode_LanguageGroup: + case OpCode_ExpansionFactor: + case OpCode_initialRandomSeed: + case OpCode_defaultWidthX: + case OpCode_nominalWidthX: + val.single_val = env.argStack.pop_num (); + env.clear_args (); + break; + case OpCode_Subrs: + dictval.subrsOffset = env.argStack.pop_uint (); + env.clear_args (); + break; + + default: + dict_opset_t::process_op (op, env); + if (!env.argStack.is_empty ()) return; + break; + } + + if (unlikely (env.in_error ())) return; + + dictval.add_op (op, env.str_ref, val); + } +}; + +struct cff1_private_dict_opset_subset : dict_opset_t +{ + static void process_op (op_code_t op, num_interp_env_t& env, cff1_private_dict_values_subset_t& dictval) + { + switch (op) { + case OpCode_BlueValues: + case OpCode_OtherBlues: + case OpCode_FamilyBlues: + case OpCode_FamilyOtherBlues: + case OpCode_StemSnapH: + case OpCode_StemSnapV: + case OpCode_StdHW: + case OpCode_StdVW: + case OpCode_BlueScale: + case OpCode_BlueShift: + case OpCode_BlueFuzz: + case OpCode_ForceBold: + case OpCode_LanguageGroup: + case OpCode_ExpansionFactor: + case OpCode_initialRandomSeed: + case OpCode_defaultWidthX: + case OpCode_nominalWidthX: + env.clear_args (); + break; + + case OpCode_Subrs: + dictval.subrsOffset = env.argStack.pop_uint (); + env.clear_args (); + break; + + default: + dict_opset_t::process_op (op, env); + if (!env.argStack.is_empty ()) return; + break; + } + + if (unlikely (env.in_error ())) return; + + dictval.add_op (op, env.str_ref); + } +}; + +typedef dict_interpreter_t<cff1_top_dict_opset_t, cff1_top_dict_values_t, cff1_top_dict_interp_env_t> cff1_top_dict_interpreter_t; +typedef dict_interpreter_t<cff1_font_dict_opset_t, cff1_font_dict_values_t> cff1_font_dict_interpreter_t; + +typedef CFF1Index CFF1NameIndex; +typedef CFF1IndexOf<TopDict> CFF1TopDictIndex; + +struct cff1_font_dict_values_mod_t +{ + cff1_font_dict_values_mod_t() { init (); } + + void init () { init ( &Null (cff1_font_dict_values_t), CFF_UNDEF_SID ); } + + void init (const cff1_font_dict_values_t *base_, + unsigned int fontName_) + { + base = base_; + fontName = fontName_; + privateDictInfo.init (); + } + + unsigned get_count () const { return base->get_count (); } + + const op_str_t &operator [] (unsigned int i) const { return (*base)[i]; } + + const cff1_font_dict_values_t *base; + table_info_t privateDictInfo; + unsigned int fontName; +}; + +struct CFF1FDArray : FDArray<HBUINT16> +{ + /* FDArray::serialize() requires this partial specialization to compile */ + template <typename ITER, typename OP_SERIALIZER> + bool serialize (hb_serialize_context_t *c, ITER it, OP_SERIALIZER& opszr) + { return FDArray<HBUINT16>::serialize<cff1_font_dict_values_mod_t, cff1_font_dict_values_mod_t> (c, it, opszr); } +}; + +} /* namespace CFF */ + +namespace OT { + +using namespace CFF; + +struct cff1 +{ + static constexpr hb_tag_t tableTag = HB_OT_TAG_cff1; + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && + likely (version.major == 1)); + } + + template <typename PRIVOPSET, typename PRIVDICTVAL> + struct accelerator_templ_t + { + void init (hb_face_t *face) + { + topDict.init (); + fontDicts.init (); + privateDicts.init (); + + this->blob = sc.reference_table<cff1> (face); + + /* setup for run-time santization */ + sc.init (this->blob); + sc.start_processing (); + + const OT::cff1 *cff = this->blob->template as<OT::cff1> (); + + if (cff == &Null (OT::cff1)) + { fini (); return; } + + nameIndex = &cff->nameIndex (cff); + if ((nameIndex == &Null (CFF1NameIndex)) || !nameIndex->sanitize (&sc)) + { fini (); return; } + + topDictIndex = &StructAtOffset<CFF1TopDictIndex> (nameIndex, nameIndex->get_size ()); + if ((topDictIndex == &Null (CFF1TopDictIndex)) || !topDictIndex->sanitize (&sc) || (topDictIndex->count == 0)) + { fini (); return; } + + { /* parse top dict */ + const byte_str_t topDictStr = (*topDictIndex)[0]; + if (unlikely (!topDictStr.sanitize (&sc))) { fini (); return; } + cff1_top_dict_interpreter_t top_interp; + top_interp.env.init (topDictStr); + topDict.init (); + if (unlikely (!top_interp.interpret (topDict))) { fini (); return; } + } + + if (is_predef_charset ()) + charset = &Null (Charset); + else + { + charset = &StructAtOffsetOrNull<Charset> (cff, topDict.CharsetOffset); + if (unlikely ((charset == &Null (Charset)) || !charset->sanitize (&sc))) { fini (); return; } + } + + fdCount = 1; + if (is_CID ()) + { + fdArray = &StructAtOffsetOrNull<CFF1FDArray> (cff, topDict.FDArrayOffset); + fdSelect = &StructAtOffsetOrNull<CFF1FDSelect> (cff, topDict.FDSelectOffset); + if (unlikely ((fdArray == &Null (CFF1FDArray)) || !fdArray->sanitize (&sc) || + (fdSelect == &Null (CFF1FDSelect)) || !fdSelect->sanitize (&sc, fdArray->count))) + { fini (); return; } + + fdCount = fdArray->count; + } + else + { + fdArray = &Null (CFF1FDArray); + fdSelect = &Null (CFF1FDSelect); + } + + encoding = &Null (Encoding); + if (is_CID ()) + { + if (unlikely (charset == &Null (Charset))) { fini (); return; } + } + else + { + if (!is_predef_encoding ()) + { + encoding = &StructAtOffsetOrNull<Encoding> (cff, topDict.EncodingOffset); + if (unlikely ((encoding == &Null (Encoding)) || !encoding->sanitize (&sc))) { fini (); return; } + } + } + + stringIndex = &StructAtOffset<CFF1StringIndex> (topDictIndex, topDictIndex->get_size ()); + if ((stringIndex == &Null (CFF1StringIndex)) || !stringIndex->sanitize (&sc)) + { fini (); return; } + + globalSubrs = &StructAtOffset<CFF1Subrs> (stringIndex, stringIndex->get_size ()); + if ((globalSubrs != &Null (CFF1Subrs)) && !globalSubrs->sanitize (&sc)) + { fini (); return; } + + charStrings = &StructAtOffsetOrNull<CFF1CharStrings> (cff, topDict.charStringsOffset); + + if ((charStrings == &Null (CFF1CharStrings)) || unlikely (!charStrings->sanitize (&sc))) + { fini (); return; } + + num_glyphs = charStrings->count; + if (num_glyphs != sc.get_num_glyphs ()) + { fini (); return; } + + if (unlikely (!privateDicts.resize (fdCount))) + { fini (); return; } + for (unsigned int i = 0; i < fdCount; i++) + privateDicts[i].init (); + + // parse CID font dicts and gather private dicts + if (is_CID ()) + { + for (unsigned int i = 0; i < fdCount; i++) + { + byte_str_t fontDictStr = (*fdArray)[i]; + if (unlikely (!fontDictStr.sanitize (&sc))) { fini (); return; } + cff1_font_dict_values_t *font; + cff1_font_dict_interpreter_t font_interp; + font_interp.env.init (fontDictStr); + font = fontDicts.push (); + if (unlikely (font == &Crap (cff1_font_dict_values_t))) { fini (); return; } + font->init (); + if (unlikely (!font_interp.interpret (*font))) { fini (); return; } + PRIVDICTVAL *priv = &privateDicts[i]; + const byte_str_t privDictStr (StructAtOffset<UnsizedByteStr> (cff, font->privateDictInfo.offset), font->privateDictInfo.size); + if (unlikely (!privDictStr.sanitize (&sc))) { fini (); return; } + dict_interpreter_t<PRIVOPSET, PRIVDICTVAL> priv_interp; + priv_interp.env.init (privDictStr); + priv->init (); + if (unlikely (!priv_interp.interpret (*priv))) { fini (); return; } + + priv->localSubrs = &StructAtOffsetOrNull<CFF1Subrs> (&privDictStr, priv->subrsOffset); + if (priv->localSubrs != &Null (CFF1Subrs) && + unlikely (!priv->localSubrs->sanitize (&sc))) + { fini (); return; } + } + } + else /* non-CID */ + { + cff1_top_dict_values_t *font = &topDict; + PRIVDICTVAL *priv = &privateDicts[0]; + + const byte_str_t privDictStr (StructAtOffset<UnsizedByteStr> (cff, font->privateDictInfo.offset), font->privateDictInfo.size); + if (unlikely (!privDictStr.sanitize (&sc))) { fini (); return; } + dict_interpreter_t<PRIVOPSET, PRIVDICTVAL> priv_interp; + priv_interp.env.init (privDictStr); + priv->init (); + if (unlikely (!priv_interp.interpret (*priv))) { fini (); return; } + + priv->localSubrs = &StructAtOffsetOrNull<CFF1Subrs> (&privDictStr, priv->subrsOffset); + if (priv->localSubrs != &Null (CFF1Subrs) && + unlikely (!priv->localSubrs->sanitize (&sc))) + { fini (); return; } + } + } + + void fini () + { + sc.end_processing (); + topDict.fini (); + fontDicts.fini_deep (); + privateDicts.fini_deep (); + hb_blob_destroy (blob); + blob = nullptr; + } + + bool is_valid () const { return blob; } + bool is_CID () const { return topDict.is_CID (); } + + bool is_predef_charset () const { return topDict.CharsetOffset <= ExpertSubsetCharset; } + + unsigned int std_code_to_glyph (hb_codepoint_t code) const + { + hb_codepoint_t sid = lookup_standard_encoding_for_sid (code); + if (unlikely (sid == CFF_UNDEF_SID)) + return 0; + + if (charset != &Null (Charset)) + return charset->get_glyph (sid, num_glyphs); + else if ((topDict.CharsetOffset == ISOAdobeCharset) + && (code <= 228 /*zcaron*/)) return sid; + return 0; + } + + bool is_predef_encoding () const { return topDict.EncodingOffset <= ExpertEncoding; } + + hb_codepoint_t glyph_to_code (hb_codepoint_t glyph) const + { + if (encoding != &Null (Encoding)) + return encoding->get_code (glyph); + else + { + hb_codepoint_t sid = glyph_to_sid (glyph); + if (sid == 0) return 0; + hb_codepoint_t code = 0; + switch (topDict.EncodingOffset) + { + case StandardEncoding: + code = lookup_standard_encoding_for_code (sid); + break; + case ExpertEncoding: + code = lookup_expert_encoding_for_code (sid); + break; + default: + break; + } + return code; + } + } + + hb_codepoint_t glyph_to_sid (hb_codepoint_t glyph) const + { + if (charset != &Null (Charset)) + return charset->get_sid (glyph, num_glyphs); + else + { + hb_codepoint_t sid = 0; + switch (topDict.CharsetOffset) + { + case ISOAdobeCharset: + if (glyph <= 228 /*zcaron*/) sid = glyph; + break; + case ExpertCharset: + sid = lookup_expert_charset_for_sid (glyph); + break; + case ExpertSubsetCharset: + sid = lookup_expert_subset_charset_for_sid (glyph); + break; + default: + break; + } + return sid; + } + } + + hb_codepoint_t sid_to_glyph (hb_codepoint_t sid) const + { + if (charset != &Null (Charset)) + return charset->get_glyph (sid, num_glyphs); + else + { + hb_codepoint_t glyph = 0; + switch (topDict.CharsetOffset) + { + case ISOAdobeCharset: + if (sid <= 228 /*zcaron*/) glyph = sid; + break; + case ExpertCharset: + glyph = lookup_expert_charset_for_glyph (sid); + break; + case ExpertSubsetCharset: + glyph = lookup_expert_subset_charset_for_glyph (sid); + break; + default: + break; + } + return glyph; + } + } + + protected: + hb_blob_t *blob; + hb_sanitize_context_t sc; + + public: + const Encoding *encoding; + const Charset *charset; + const CFF1NameIndex *nameIndex; + const CFF1TopDictIndex *topDictIndex; + const CFF1StringIndex *stringIndex; + const CFF1Subrs *globalSubrs; + const CFF1CharStrings *charStrings; + const CFF1FDArray *fdArray; + const CFF1FDSelect *fdSelect; + unsigned int fdCount; + + cff1_top_dict_values_t topDict; + hb_vector_t<cff1_font_dict_values_t> + fontDicts; + hb_vector_t<PRIVDICTVAL> privateDicts; + + unsigned int num_glyphs; + }; + + struct accelerator_t : accelerator_templ_t<cff1_private_dict_opset_t, cff1_private_dict_values_t> + { + void init (hb_face_t *face) + { + SUPER::init (face); + + if (!is_valid ()) return; + if (is_CID ()) return; + + /* fill glyph_names */ + for (hb_codepoint_t gid = 0; gid < num_glyphs; gid++) + { + hb_codepoint_t sid = glyph_to_sid (gid); + gname_t gname; + gname.sid = sid; + if (sid < cff1_std_strings_length) + gname.name = cff1_std_strings (sid); + else + { + byte_str_t ustr = (*stringIndex)[sid - cff1_std_strings_length]; + gname.name = hb_bytes_t ((const char*)ustr.arrayZ, ustr.length); + } + if (unlikely (!gname.name.arrayZ)) { fini (); return; } + glyph_names.push (gname); + } + glyph_names.qsort (); + } + + void fini () + { + glyph_names.fini (); + + SUPER::fini (); + } + + bool get_glyph_name (hb_codepoint_t glyph, + char *buf, unsigned int buf_len) const + { + if (!buf) return true; + if (unlikely (!is_valid ())) return false; + if (is_CID()) return false; + hb_codepoint_t sid = glyph_to_sid (glyph); + const char *str; + size_t str_len; + if (sid < cff1_std_strings_length) + { + hb_bytes_t byte_str = cff1_std_strings (sid); + str = byte_str.arrayZ; + str_len = byte_str.length; + } + else + { + byte_str_t ubyte_str = (*stringIndex)[sid - cff1_std_strings_length]; + str = (const char *)ubyte_str.arrayZ; + str_len = ubyte_str.length; + } + if (!str_len) return false; + unsigned int len = hb_min (buf_len - 1, str_len); + strncpy (buf, (const char*)str, len); + buf[len] = '\0'; + return true; + } + + bool get_glyph_from_name (const char *name, int len, + hb_codepoint_t *glyph) const + { + if (len < 0) len = strlen (name); + if (unlikely (!len)) return false; + + gname_t key = { hb_bytes_t (name, len), 0 }; + const gname_t *gname = glyph_names.bsearch (key); + if (!gname) return false; + hb_codepoint_t gid = sid_to_glyph (gname->sid); + if (!gid && gname->sid) return false; + *glyph = gid; + return true; + } + + HB_INTERNAL bool get_extents (hb_font_t *font, hb_codepoint_t glyph, hb_glyph_extents_t *extents) const; + HB_INTERNAL bool get_seac_components (hb_codepoint_t glyph, hb_codepoint_t *base, hb_codepoint_t *accent) const; +#ifdef HB_EXPERIMENTAL_API + HB_INTERNAL bool get_path (hb_font_t *font, hb_codepoint_t glyph, draw_helper_t &draw_helper) const; +#endif + + private: + struct gname_t + { + hb_bytes_t name; + uint16_t sid; + + static int cmp (const void *a_, const void *b_) + { + const gname_t *a = (const gname_t *)a_; + const gname_t *b = (const gname_t *)b_; + int minlen = hb_min (a->name.length, b->name.length); + int ret = strncmp (a->name.arrayZ, b->name.arrayZ, minlen); + if (ret) return ret; + return a->name.length - b->name.length; + } + + int cmp (const gname_t &a) const { return cmp (&a, this); } + }; + + hb_sorted_vector_t<gname_t> glyph_names; + + typedef accelerator_templ_t<cff1_private_dict_opset_t, cff1_private_dict_values_t> SUPER; + }; + + struct accelerator_subset_t : accelerator_templ_t<cff1_private_dict_opset_subset, cff1_private_dict_values_subset_t> {}; + + bool subset (hb_subset_context_t *c) const { return hb_subset_cff1 (c); } + + protected: + HB_INTERNAL static hb_codepoint_t lookup_standard_encoding_for_code (hb_codepoint_t sid); + HB_INTERNAL static hb_codepoint_t lookup_expert_encoding_for_code (hb_codepoint_t sid); + HB_INTERNAL static hb_codepoint_t lookup_expert_charset_for_sid (hb_codepoint_t glyph); + HB_INTERNAL static hb_codepoint_t lookup_expert_subset_charset_for_sid (hb_codepoint_t glyph); + HB_INTERNAL static hb_codepoint_t lookup_expert_charset_for_glyph (hb_codepoint_t sid); + HB_INTERNAL static hb_codepoint_t lookup_expert_subset_charset_for_glyph (hb_codepoint_t sid); + HB_INTERNAL static hb_codepoint_t lookup_standard_encoding_for_sid (hb_codepoint_t code); + + public: + FixedVersion<HBUINT8> version; /* Version of CFF table. set to 0x0100u */ + OffsetTo<CFF1NameIndex, HBUINT8> nameIndex; /* headerSize = Offset to Name INDEX. */ + HBUINT8 offSize; /* offset size (unused?) */ + + public: + DEFINE_SIZE_STATIC (4); +}; + +struct cff1_accelerator_t : cff1::accelerator_t {}; +} /* namespace OT */ + +#endif /* HB_OT_CFF1_TABLE_HH */ diff --git a/thirdparty/harfbuzz/src/hb-ot-cff2-table.cc b/thirdparty/harfbuzz/src/hb-ot-cff2-table.cc new file mode 100644 index 0000000000..ac0feeee21 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-cff2-table.cc @@ -0,0 +1,215 @@ +/* + * Copyright © 2018 Adobe Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Adobe Author(s): Michiharu Ariza + */ + +#include "hb.hh" + +#ifndef HB_NO_OT_FONT_CFF + +#include "hb-ot-cff2-table.hh" +#include "hb-cff2-interp-cs.hh" +#include "hb-draw.hh" + +using namespace CFF; + +struct cff2_extents_param_t +{ + void init () + { + path_open = false; + min_x.set_int (INT_MAX); + min_y.set_int (INT_MAX); + max_x.set_int (INT_MIN); + max_y.set_int (INT_MIN); + } + + void start_path () { path_open = true; } + void end_path () { path_open = false; } + bool is_path_open () const { return path_open; } + + void update_bounds (const point_t &pt) + { + if (pt.x < min_x) min_x = pt.x; + if (pt.x > max_x) max_x = pt.x; + if (pt.y < min_y) min_y = pt.y; + if (pt.y > max_y) max_y = pt.y; + } + + bool path_open; + number_t min_x; + number_t min_y; + number_t max_x; + number_t max_y; +}; + +struct cff2_path_procs_extents_t : path_procs_t<cff2_path_procs_extents_t, cff2_cs_interp_env_t, cff2_extents_param_t> +{ + static void moveto (cff2_cs_interp_env_t &env, cff2_extents_param_t& param, const point_t &pt) + { + param.end_path (); + env.moveto (pt); + } + + static void line (cff2_cs_interp_env_t &env, cff2_extents_param_t& param, const point_t &pt1) + { + if (!param.is_path_open ()) + { + param.start_path (); + param.update_bounds (env.get_pt ()); + } + env.moveto (pt1); + param.update_bounds (env.get_pt ()); + } + + static void curve (cff2_cs_interp_env_t &env, cff2_extents_param_t& param, const point_t &pt1, const point_t &pt2, const point_t &pt3) + { + if (!param.is_path_open ()) + { + param.start_path (); + param.update_bounds (env.get_pt ()); + } + /* include control points */ + param.update_bounds (pt1); + param.update_bounds (pt2); + env.moveto (pt3); + param.update_bounds (env.get_pt ()); + } +}; + +struct cff2_cs_opset_extents_t : cff2_cs_opset_t<cff2_cs_opset_extents_t, cff2_extents_param_t, cff2_path_procs_extents_t> {}; + +bool OT::cff2::accelerator_t::get_extents (hb_font_t *font, + hb_codepoint_t glyph, + hb_glyph_extents_t *extents) const +{ +#ifdef HB_NO_OT_FONT_CFF + /* XXX Remove check when this code moves to .hh file. */ + return true; +#endif + + if (unlikely (!is_valid () || (glyph >= num_glyphs))) return false; + + unsigned int fd = fdSelect->get_fd (glyph); + cff2_cs_interpreter_t<cff2_cs_opset_extents_t, cff2_extents_param_t> interp; + const byte_str_t str = (*charStrings)[glyph]; + interp.env.init (str, *this, fd, font->coords, font->num_coords); + cff2_extents_param_t param; + param.init (); + if (unlikely (!interp.interpret (param))) return false; + + if (param.min_x >= param.max_x) + { + extents->width = 0; + extents->x_bearing = 0; + } + else + { + extents->x_bearing = font->em_scalef_x (param.min_x.to_real ()); + extents->width = font->em_scalef_x (param.max_x.to_real () - param.min_x.to_real ()); + } + if (param.min_y >= param.max_y) + { + extents->height = 0; + extents->y_bearing = 0; + } + else + { + extents->y_bearing = font->em_scalef_y (param.max_y.to_real ()); + extents->height = font->em_scalef_y (param.min_y.to_real () - param.max_y.to_real ()); + } + + return true; +} + +#ifdef HB_EXPERIMENTAL_API +struct cff2_path_param_t +{ + cff2_path_param_t (hb_font_t *font_, draw_helper_t &draw_helper_) + { + draw_helper = &draw_helper_; + font = font_; + } + + void move_to (const point_t &p) + { draw_helper->move_to (font->em_scalef_x (p.x.to_real ()), font->em_scalef_y (p.y.to_real ())); } + + void line_to (const point_t &p) + { draw_helper->line_to (font->em_scalef_x (p.x.to_real ()), font->em_scalef_y (p.y.to_real ())); } + + void cubic_to (const point_t &p1, const point_t &p2, const point_t &p3) + { + draw_helper->cubic_to (font->em_scalef_x (p1.x.to_real ()), font->em_scalef_y (p1.y.to_real ()), + font->em_scalef_x (p2.x.to_real ()), font->em_scalef_y (p2.y.to_real ()), + font->em_scalef_x (p3.x.to_real ()), font->em_scalef_y (p3.y.to_real ())); + } + + protected: + draw_helper_t *draw_helper; + hb_font_t *font; +}; + +struct cff2_path_procs_path_t : path_procs_t<cff2_path_procs_path_t, cff2_cs_interp_env_t, cff2_path_param_t> +{ + static void moveto (cff2_cs_interp_env_t &env, cff2_path_param_t& param, const point_t &pt) + { + param.move_to (pt); + env.moveto (pt); + } + + static void line (cff2_cs_interp_env_t &env, cff2_path_param_t& param, const point_t &pt1) + { + param.line_to (pt1); + env.moveto (pt1); + } + + static void curve (cff2_cs_interp_env_t &env, cff2_path_param_t& param, const point_t &pt1, const point_t &pt2, const point_t &pt3) + { + param.cubic_to (pt1, pt2, pt3); + env.moveto (pt3); + } +}; + +struct cff2_cs_opset_path_t : cff2_cs_opset_t<cff2_cs_opset_path_t, cff2_path_param_t, cff2_path_procs_path_t> {}; + +bool OT::cff2::accelerator_t::get_path (hb_font_t *font, hb_codepoint_t glyph, draw_helper_t &draw_helper) const +{ +#ifdef HB_NO_OT_FONT_CFF + /* XXX Remove check when this code moves to .hh file. */ + return true; +#endif + + if (unlikely (!is_valid () || (glyph >= num_glyphs))) return false; + + unsigned int fd = fdSelect->get_fd (glyph); + cff2_cs_interpreter_t<cff2_cs_opset_path_t, cff2_path_param_t> interp; + const byte_str_t str = (*charStrings)[glyph]; + interp.env.init (str, *this, fd, font->coords, font->num_coords); + cff2_path_param_t param (font, draw_helper); + if (unlikely (!interp.interpret (param))) return false; + return true; +} +#endif + +#endif diff --git a/thirdparty/harfbuzz/src/hb-ot-cff2-table.hh b/thirdparty/harfbuzz/src/hb-ot-cff2-table.hh new file mode 100644 index 0000000000..829217feaa --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-cff2-table.hh @@ -0,0 +1,531 @@ +/* + * Copyright © 2018 Adobe Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Adobe Author(s): Michiharu Ariza + */ + +#ifndef HB_OT_CFF2_TABLE_HH +#define HB_OT_CFF2_TABLE_HH + +#include "hb-ot-cff-common.hh" +#include "hb-subset-cff2.hh" +#include "hb-draw.hh" + +namespace CFF { + +/* + * CFF2 -- Compact Font Format (CFF) Version 2 + * https://docs.microsoft.com/en-us/typography/opentype/spec/cff2 + */ +#define HB_OT_TAG_cff2 HB_TAG('C','F','F','2') + +typedef CFFIndex<HBUINT32> CFF2Index; +template <typename Type> struct CFF2IndexOf : CFFIndexOf<HBUINT32, Type> {}; + +typedef CFF2Index CFF2CharStrings; +typedef Subrs<HBUINT32> CFF2Subrs; + +typedef FDSelect3_4<HBUINT32, HBUINT16> FDSelect4; +typedef FDSelect3_4_Range<HBUINT32, HBUINT16> FDSelect4_Range; + +struct CFF2FDSelect +{ + bool serialize (hb_serialize_context_t *c, const CFF2FDSelect &src, unsigned int num_glyphs) + { + TRACE_SERIALIZE (this); + unsigned int size = src.get_size (num_glyphs); + CFF2FDSelect *dest = c->allocate_size<CFF2FDSelect> (size); + if (unlikely (!dest)) return_trace (false); + memcpy (dest, &src, size); + return_trace (true); + } + + unsigned int get_size (unsigned int num_glyphs) const + { + switch (format) + { + case 0: return format.static_size + u.format0.get_size (num_glyphs); + case 3: return format.static_size + u.format3.get_size (); + case 4: return format.static_size + u.format4.get_size (); + default:return 0; + } + } + + hb_codepoint_t get_fd (hb_codepoint_t glyph) const + { + if (this == &Null (CFF2FDSelect)) + return 0; + + switch (format) + { + case 0: return u.format0.get_fd (glyph); + case 3: return u.format3.get_fd (glyph); + case 4: return u.format4.get_fd (glyph); + default:return 0; + } + } + + bool sanitize (hb_sanitize_context_t *c, unsigned int fdcount) const + { + TRACE_SANITIZE (this); + if (unlikely (!c->check_struct (this))) + return_trace (false); + + switch (format) + { + case 0: return_trace (u.format0.sanitize (c, fdcount)); + case 3: return_trace (u.format3.sanitize (c, fdcount)); + case 4: return_trace (u.format4.sanitize (c, fdcount)); + default:return_trace (false); + } + } + + HBUINT8 format; + union { + FDSelect0 format0; + FDSelect3 format3; + FDSelect4 format4; + } u; + public: + DEFINE_SIZE_MIN (2); +}; + +struct CFF2VariationStore +{ + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (likely (c->check_struct (this)) && c->check_range (&varStore, size) && varStore.sanitize (c)); + } + + bool serialize (hb_serialize_context_t *c, const CFF2VariationStore *varStore) + { + TRACE_SERIALIZE (this); + unsigned int size_ = varStore->get_size (); + CFF2VariationStore *dest = c->allocate_size<CFF2VariationStore> (size_); + if (unlikely (!dest)) return_trace (false); + memcpy (dest, varStore, size_); + return_trace (true); + } + + unsigned int get_size () const { return HBUINT16::static_size + size; } + + HBUINT16 size; + VariationStore varStore; + + DEFINE_SIZE_MIN (2 + VariationStore::min_size); +}; + +struct cff2_top_dict_values_t : top_dict_values_t<> +{ + void init () + { + top_dict_values_t<>::init (); + vstoreOffset = 0; + FDSelectOffset = 0; + } + void fini () { top_dict_values_t<>::fini (); } + + unsigned int vstoreOffset; + unsigned int FDSelectOffset; +}; + +struct cff2_top_dict_opset_t : top_dict_opset_t<> +{ + static void process_op (op_code_t op, num_interp_env_t& env, cff2_top_dict_values_t& dictval) + { + switch (op) { + case OpCode_FontMatrix: + { + dict_val_t val; + val.init (); + dictval.add_op (op, env.str_ref); + env.clear_args (); + } + break; + + case OpCode_vstore: + dictval.vstoreOffset = env.argStack.pop_uint (); + env.clear_args (); + break; + case OpCode_FDSelect: + dictval.FDSelectOffset = env.argStack.pop_uint (); + env.clear_args (); + break; + + default: + SUPER::process_op (op, env, dictval); + /* Record this operand below if stack is empty, otherwise done */ + if (!env.argStack.is_empty ()) return; + } + + if (unlikely (env.in_error ())) return; + + dictval.add_op (op, env.str_ref); + } + + typedef top_dict_opset_t<> SUPER; +}; + +struct cff2_font_dict_values_t : dict_values_t<op_str_t> +{ + void init () + { + dict_values_t<op_str_t>::init (); + privateDictInfo.init (); + } + void fini () { dict_values_t<op_str_t>::fini (); } + + table_info_t privateDictInfo; +}; + +struct cff2_font_dict_opset_t : dict_opset_t +{ + static void process_op (op_code_t op, num_interp_env_t& env, cff2_font_dict_values_t& dictval) + { + switch (op) { + case OpCode_Private: + dictval.privateDictInfo.offset = env.argStack.pop_uint (); + dictval.privateDictInfo.size = env.argStack.pop_uint (); + env.clear_args (); + break; + + default: + SUPER::process_op (op, env); + if (!env.argStack.is_empty ()) + return; + } + + if (unlikely (env.in_error ())) return; + + dictval.add_op (op, env.str_ref); + } + + private: + typedef dict_opset_t SUPER; +}; + +template <typename VAL> +struct cff2_private_dict_values_base_t : dict_values_t<VAL> +{ + void init () + { + dict_values_t<VAL>::init (); + subrsOffset = 0; + localSubrs = &Null (CFF2Subrs); + ivs = 0; + } + void fini () { dict_values_t<VAL>::fini (); } + + unsigned int subrsOffset; + const CFF2Subrs *localSubrs; + unsigned int ivs; +}; + +typedef cff2_private_dict_values_base_t<op_str_t> cff2_private_dict_values_subset_t; +typedef cff2_private_dict_values_base_t<num_dict_val_t> cff2_private_dict_values_t; + +struct cff2_priv_dict_interp_env_t : num_interp_env_t +{ + void init (const byte_str_t &str) + { + num_interp_env_t::init (str); + ivs = 0; + seen_vsindex = false; + } + + void process_vsindex () + { + if (likely (!seen_vsindex)) + { + set_ivs (argStack.pop_uint ()); + } + seen_vsindex = true; + } + + unsigned int get_ivs () const { return ivs; } + void set_ivs (unsigned int ivs_) { ivs = ivs_; } + + protected: + unsigned int ivs; + bool seen_vsindex; +}; + +struct cff2_private_dict_opset_t : dict_opset_t +{ + static void process_op (op_code_t op, cff2_priv_dict_interp_env_t& env, cff2_private_dict_values_t& dictval) + { + num_dict_val_t val; + val.init (); + + switch (op) { + case OpCode_StdHW: + case OpCode_StdVW: + case OpCode_BlueScale: + case OpCode_BlueShift: + case OpCode_BlueFuzz: + case OpCode_ExpansionFactor: + case OpCode_LanguageGroup: + val.single_val = env.argStack.pop_num (); + env.clear_args (); + break; + case OpCode_BlueValues: + case OpCode_OtherBlues: + case OpCode_FamilyBlues: + case OpCode_FamilyOtherBlues: + case OpCode_StemSnapH: + case OpCode_StemSnapV: + env.clear_args (); + break; + case OpCode_Subrs: + dictval.subrsOffset = env.argStack.pop_uint (); + env.clear_args (); + break; + case OpCode_vsindexdict: + env.process_vsindex (); + dictval.ivs = env.get_ivs (); + env.clear_args (); + break; + case OpCode_blenddict: + break; + + default: + dict_opset_t::process_op (op, env); + if (!env.argStack.is_empty ()) return; + break; + } + + if (unlikely (env.in_error ())) return; + + dictval.add_op (op, env.str_ref, val); + } +}; + +struct cff2_private_dict_opset_subset_t : dict_opset_t +{ + static void process_op (op_code_t op, cff2_priv_dict_interp_env_t& env, cff2_private_dict_values_subset_t& dictval) + { + switch (op) { + case OpCode_BlueValues: + case OpCode_OtherBlues: + case OpCode_FamilyBlues: + case OpCode_FamilyOtherBlues: + case OpCode_StdHW: + case OpCode_StdVW: + case OpCode_BlueScale: + case OpCode_BlueShift: + case OpCode_BlueFuzz: + case OpCode_StemSnapH: + case OpCode_StemSnapV: + case OpCode_LanguageGroup: + case OpCode_ExpansionFactor: + env.clear_args (); + break; + + case OpCode_blenddict: + env.clear_args (); + return; + + case OpCode_Subrs: + dictval.subrsOffset = env.argStack.pop_uint (); + env.clear_args (); + break; + + default: + SUPER::process_op (op, env); + if (!env.argStack.is_empty ()) return; + break; + } + + if (unlikely (env.in_error ())) return; + + dictval.add_op (op, env.str_ref); + } + + private: + typedef dict_opset_t SUPER; +}; + +typedef dict_interpreter_t<cff2_top_dict_opset_t, cff2_top_dict_values_t> cff2_top_dict_interpreter_t; +typedef dict_interpreter_t<cff2_font_dict_opset_t, cff2_font_dict_values_t> cff2_font_dict_interpreter_t; + +struct CFF2FDArray : FDArray<HBUINT32> +{ + /* FDArray::serialize does not compile without this partial specialization */ + template <typename ITER, typename OP_SERIALIZER> + bool serialize (hb_serialize_context_t *c, ITER it, OP_SERIALIZER& opszr) + { return FDArray<HBUINT32>::serialize<cff2_font_dict_values_t, table_info_t> (c, it, opszr); } +}; + +} /* namespace CFF */ + +namespace OT { + +using namespace CFF; + +struct cff2 +{ + static constexpr hb_tag_t tableTag = HB_OT_TAG_cff2; + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && + likely (version.major == 2)); + } + + template <typename PRIVOPSET, typename PRIVDICTVAL> + struct accelerator_templ_t + { + void init (hb_face_t *face) + { + topDict.init (); + fontDicts.init (); + privateDicts.init (); + + this->blob = sc.reference_table<cff2> (face); + + /* setup for run-time santization */ + sc.init (this->blob); + sc.start_processing (); + + const OT::cff2 *cff2 = this->blob->template as<OT::cff2> (); + + if (cff2 == &Null (OT::cff2)) + { fini (); return; } + + { /* parse top dict */ + byte_str_t topDictStr (cff2 + cff2->topDict, cff2->topDictSize); + if (unlikely (!topDictStr.sanitize (&sc))) { fini (); return; } + cff2_top_dict_interpreter_t top_interp; + top_interp.env.init (topDictStr); + topDict.init (); + if (unlikely (!top_interp.interpret (topDict))) { fini (); return; } + } + + globalSubrs = &StructAtOffset<CFF2Subrs> (cff2, cff2->topDict + cff2->topDictSize); + varStore = &StructAtOffsetOrNull<CFF2VariationStore> (cff2, topDict.vstoreOffset); + charStrings = &StructAtOffsetOrNull<CFF2CharStrings> (cff2, topDict.charStringsOffset); + fdArray = &StructAtOffsetOrNull<CFF2FDArray> (cff2, topDict.FDArrayOffset); + fdSelect = &StructAtOffsetOrNull<CFF2FDSelect> (cff2, topDict.FDSelectOffset); + + if (((varStore != &Null (CFF2VariationStore)) && unlikely (!varStore->sanitize (&sc))) || + (charStrings == &Null (CFF2CharStrings)) || unlikely (!charStrings->sanitize (&sc)) || + (globalSubrs == &Null (CFF2Subrs)) || unlikely (!globalSubrs->sanitize (&sc)) || + (fdArray == &Null (CFF2FDArray)) || unlikely (!fdArray->sanitize (&sc)) || + (((fdSelect != &Null (CFF2FDSelect)) && unlikely (!fdSelect->sanitize (&sc, fdArray->count))))) + { fini (); return; } + + num_glyphs = charStrings->count; + if (num_glyphs != sc.get_num_glyphs ()) + { fini (); return; } + + fdCount = fdArray->count; + if (!privateDicts.resize (fdCount)) + { fini (); return; } + + /* parse font dicts and gather private dicts */ + for (unsigned int i = 0; i < fdCount; i++) + { + const byte_str_t fontDictStr = (*fdArray)[i]; + if (unlikely (!fontDictStr.sanitize (&sc))) { fini (); return; } + cff2_font_dict_values_t *font; + cff2_font_dict_interpreter_t font_interp; + font_interp.env.init (fontDictStr); + font = fontDicts.push (); + if (unlikely (font == &Crap (cff2_font_dict_values_t))) { fini (); return; } + font->init (); + if (unlikely (!font_interp.interpret (*font))) { fini (); return; } + + const byte_str_t privDictStr (StructAtOffsetOrNull<UnsizedByteStr> (cff2, font->privateDictInfo.offset), font->privateDictInfo.size); + if (unlikely (!privDictStr.sanitize (&sc))) { fini (); return; } + dict_interpreter_t<PRIVOPSET, PRIVDICTVAL, cff2_priv_dict_interp_env_t> priv_interp; + priv_interp.env.init(privDictStr); + privateDicts[i].init (); + if (unlikely (!priv_interp.interpret (privateDicts[i]))) { fini (); return; } + + privateDicts[i].localSubrs = &StructAtOffsetOrNull<CFF2Subrs> (&privDictStr[0], privateDicts[i].subrsOffset); + if (privateDicts[i].localSubrs != &Null (CFF2Subrs) && + unlikely (!privateDicts[i].localSubrs->sanitize (&sc))) + { fini (); return; } + } + } + + void fini () + { + sc.end_processing (); + topDict.fini (); + fontDicts.fini_deep (); + privateDicts.fini_deep (); + hb_blob_destroy (blob); + blob = nullptr; + } + + bool is_valid () const { return blob; } + + protected: + hb_blob_t *blob; + hb_sanitize_context_t sc; + + public: + cff2_top_dict_values_t topDict; + const CFF2Subrs *globalSubrs; + const CFF2VariationStore *varStore; + const CFF2CharStrings *charStrings; + const CFF2FDArray *fdArray; + const CFF2FDSelect *fdSelect; + unsigned int fdCount; + + hb_vector_t<cff2_font_dict_values_t> fontDicts; + hb_vector_t<PRIVDICTVAL> privateDicts; + + unsigned int num_glyphs; + }; + + struct accelerator_t : accelerator_templ_t<cff2_private_dict_opset_t, cff2_private_dict_values_t> + { + HB_INTERNAL bool get_extents (hb_font_t *font, + hb_codepoint_t glyph, + hb_glyph_extents_t *extents) const; +#ifdef HB_EXPERIMENTAL_API + HB_INTERNAL bool get_path (hb_font_t *font, hb_codepoint_t glyph, draw_helper_t &draw_helper) const; +#endif + }; + + typedef accelerator_templ_t<cff2_private_dict_opset_subset_t, cff2_private_dict_values_subset_t> accelerator_subset_t; + + bool subset (hb_subset_context_t *c) const { return hb_subset_cff2 (c); } + + public: + FixedVersion<HBUINT8> version; /* Version of CFF2 table. set to 0x0200u */ + NNOffsetTo<TopDict, HBUINT8> topDict; /* headerSize = Offset to Top DICT. */ + HBUINT16 topDictSize; /* Top DICT size */ + + public: + DEFINE_SIZE_STATIC (5); +}; + +struct cff2_accelerator_t : cff2::accelerator_t {}; +} /* namespace OT */ + +#endif /* HB_OT_CFF2_TABLE_HH */ diff --git a/thirdparty/harfbuzz/src/hb-ot-cmap-table.hh b/thirdparty/harfbuzz/src/hb-ot-cmap-table.hh new file mode 100644 index 0000000000..cc48379bb8 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-cmap-table.hh @@ -0,0 +1,1711 @@ +/* + * Copyright © 2014 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_OT_CMAP_TABLE_HH +#define HB_OT_CMAP_TABLE_HH + +#include "hb-open-type.hh" +#include "hb-set.hh" + +/* + * cmap -- Character to Glyph Index Mapping + * https://docs.microsoft.com/en-us/typography/opentype/spec/cmap + */ +#define HB_OT_TAG_cmap HB_TAG('c','m','a','p') + +namespace OT { + + +struct CmapSubtableFormat0 +{ + bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const + { + hb_codepoint_t gid = codepoint < 256 ? glyphIdArray[codepoint] : 0; + if (!gid) + return false; + *glyph = gid; + return true; + } + void collect_unicodes (hb_set_t *out) const + { + for (unsigned int i = 0; i < 256; i++) + if (glyphIdArray[i]) + out->add (i); + } + + void collect_mapping (hb_set_t *unicodes, /* OUT */ + hb_map_t *mapping /* OUT */) const + { + for (unsigned i = 0; i < 256; i++) + if (glyphIdArray[i]) + { + hb_codepoint_t glyph = glyphIdArray[i]; + unicodes->add (i); + mapping->set (i, glyph); + } + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this)); + } + + protected: + HBUINT16 format; /* Format number is set to 0. */ + HBUINT16 length; /* Byte length of this subtable. */ + HBUINT16 language; /* Ignore. */ + HBUINT8 glyphIdArray[256];/* An array that maps character + * code to glyph index values. */ + public: + DEFINE_SIZE_STATIC (6 + 256); +}; + +struct CmapSubtableFormat4 +{ + + template<typename Iterator, + hb_requires (hb_is_iterator (Iterator))> + HBUINT16* serialize_endcode_array (hb_serialize_context_t *c, + Iterator it) + { + HBUINT16 *endCode = c->start_embed<HBUINT16> (); + hb_codepoint_t prev_endcp = 0xFFFF; + + for (const hb_item_type<Iterator> _ : +it) + { + if (prev_endcp != 0xFFFF && prev_endcp + 1u != _.first) + { + HBUINT16 end_code; + end_code = prev_endcp; + c->copy<HBUINT16> (end_code); + } + prev_endcp = _.first; + } + + { + // last endCode + HBUINT16 endcode; + endcode = prev_endcp; + if (unlikely (!c->copy<HBUINT16> (endcode))) return nullptr; + // There must be a final entry with end_code == 0xFFFF. + if (prev_endcp != 0xFFFF) + { + HBUINT16 finalcode; + finalcode = 0xFFFF; + if (unlikely (!c->copy<HBUINT16> (finalcode))) return nullptr; + } + } + + return endCode; + } + + template<typename Iterator, + hb_requires (hb_is_iterator (Iterator))> + HBUINT16* serialize_startcode_array (hb_serialize_context_t *c, + Iterator it) + { + HBUINT16 *startCode = c->start_embed<HBUINT16> (); + hb_codepoint_t prev_cp = 0xFFFF; + + for (const hb_item_type<Iterator> _ : +it) + { + if (prev_cp == 0xFFFF || prev_cp + 1u != _.first) + { + HBUINT16 start_code; + start_code = _.first; + c->copy<HBUINT16> (start_code); + } + + prev_cp = _.first; + } + + // There must be a final entry with end_code == 0xFFFF. + if (it.len () == 0 || prev_cp != 0xFFFF) + { + HBUINT16 finalcode; + finalcode = 0xFFFF; + if (unlikely (!c->copy<HBUINT16> (finalcode))) return nullptr; + } + + return startCode; + } + + template<typename Iterator, + hb_requires (hb_is_iterator (Iterator))> + HBINT16* serialize_idDelta_array (hb_serialize_context_t *c, + Iterator it, + HBUINT16 *endCode, + HBUINT16 *startCode, + unsigned segcount) + { + unsigned i = 0; + hb_codepoint_t last_gid = 0, start_gid = 0, last_cp = 0xFFFF; + bool use_delta = true; + + HBINT16 *idDelta = c->start_embed<HBINT16> (); + if ((char *)idDelta - (char *)startCode != (int) segcount * (int) HBINT16::static_size) + return nullptr; + + for (const hb_item_type<Iterator> _ : +it) + { + if (_.first == startCode[i]) + { + use_delta = true; + start_gid = _.second; + } + else if (_.second != last_gid + 1) use_delta = false; + + if (_.first == endCode[i]) + { + HBINT16 delta; + if (use_delta) delta = (int)start_gid - (int)startCode[i]; + else delta = 0; + c->copy<HBINT16> (delta); + + i++; + } + + last_gid = _.second; + last_cp = _.first; + } + + if (it.len () == 0 || last_cp != 0xFFFF) + { + HBINT16 delta; + delta = 1; + if (unlikely (!c->copy<HBINT16> (delta))) return nullptr; + } + + return idDelta; + } + + template<typename Iterator, + hb_requires (hb_is_iterator (Iterator))> + HBUINT16* serialize_rangeoffset_glyid (hb_serialize_context_t *c, + Iterator it, + HBUINT16 *endCode, + HBUINT16 *startCode, + HBINT16 *idDelta, + unsigned segcount) + { + HBUINT16 *idRangeOffset = c->allocate_size<HBUINT16> (HBUINT16::static_size * segcount); + if (unlikely (!c->check_success (idRangeOffset))) return nullptr; + if (unlikely ((char *)idRangeOffset - (char *)idDelta != (int) segcount * (int) HBINT16::static_size)) return nullptr; + + + hb_range (segcount) + | hb_filter ([&] (const unsigned _) { return idDelta[_] == 0; }) + | hb_apply ([&] (const unsigned i) + { + idRangeOffset[i] = 2 * (c->start_embed<HBUINT16> () - idRangeOffset - i); + + + it + | hb_filter ([&] (const hb_item_type<Iterator> _) { return _.first >= startCode[i] && _.first <= endCode[i]; }) + | hb_apply ([&] (const hb_item_type<Iterator> _) + { + HBUINT16 glyID; + glyID = _.second; + c->copy<HBUINT16> (glyID); + }) + ; + + + }) + ; + + return idRangeOffset; + } + + template<typename Iterator, + hb_requires (hb_is_iterator (Iterator))> + void serialize (hb_serialize_context_t *c, + Iterator it) + { + auto format4_iter = + + it + | hb_filter ([&] (const hb_pair_t<hb_codepoint_t, hb_codepoint_t> _) + { return _.first <= 0xFFFF; }) + ; + + if (format4_iter.len () == 0) return; + + unsigned table_initpos = c->length (); + if (unlikely (!c->extend_min (*this))) return; + this->format = 4; + + //serialize endCode[] + HBUINT16 *endCode = serialize_endcode_array (c, format4_iter); + if (unlikely (!endCode)) return; + + unsigned segcount = (c->length () - min_size) / HBUINT16::static_size; + + // 2 bytes of padding. + if (unlikely (!c->allocate_size<HBUINT16> (HBUINT16::static_size))) return; // 2 bytes of padding. + + // serialize startCode[] + HBUINT16 *startCode = serialize_startcode_array (c, format4_iter); + if (unlikely (!startCode)) return; + + //serialize idDelta[] + HBINT16 *idDelta = serialize_idDelta_array (c, format4_iter, endCode, startCode, segcount); + if (unlikely (!idDelta)) return; + + HBUINT16 *idRangeOffset = serialize_rangeoffset_glyid (c, format4_iter, endCode, startCode, idDelta, segcount); + if (unlikely (!c->check_success (idRangeOffset))) return; + + if (unlikely (!c->check_assign(this->length, c->length () - table_initpos))) return; + this->segCountX2 = segcount * 2; + this->entrySelector = hb_max (1u, hb_bit_storage (segcount)) - 1; + this->searchRange = 2 * (1u << this->entrySelector); + this->rangeShift = segcount * 2 > this->searchRange + ? 2 * segcount - this->searchRange + : 0; + } + + struct accelerator_t + { + accelerator_t () {} + accelerator_t (const CmapSubtableFormat4 *subtable) { init (subtable); } + ~accelerator_t () { fini (); } + + void init (const CmapSubtableFormat4 *subtable) + { + segCount = subtable->segCountX2 / 2; + endCount = subtable->values.arrayZ; + startCount = endCount + segCount + 1; + idDelta = startCount + segCount; + idRangeOffset = idDelta + segCount; + glyphIdArray = idRangeOffset + segCount; + glyphIdArrayLength = (subtable->length - 16 - 8 * segCount) / 2; + } + void fini () {} + + bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const + { + struct CustomRange + { + int cmp (hb_codepoint_t k, + unsigned distance) const + { + if (k > last) return +1; + if (k < (&last)[distance]) return -1; + return 0; + } + HBUINT16 last; + }; + + const HBUINT16 *found = hb_bsearch (codepoint, + this->endCount, + this->segCount, + 2, + _hb_cmp_method<hb_codepoint_t, CustomRange, unsigned>, + this->segCount + 1); + if (!found) + return false; + unsigned int i = found - endCount; + + hb_codepoint_t gid; + unsigned int rangeOffset = this->idRangeOffset[i]; + if (rangeOffset == 0) + gid = codepoint + this->idDelta[i]; + else + { + /* Somebody has been smoking... */ + unsigned int index = rangeOffset / 2 + (codepoint - this->startCount[i]) + i - this->segCount; + if (unlikely (index >= this->glyphIdArrayLength)) + return false; + gid = this->glyphIdArray[index]; + if (unlikely (!gid)) + return false; + gid += this->idDelta[i]; + } + gid &= 0xFFFFu; + if (!gid) + return false; + *glyph = gid; + return true; + } + + HB_INTERNAL static bool get_glyph_func (const void *obj, hb_codepoint_t codepoint, hb_codepoint_t *glyph) + { return ((const accelerator_t *) obj)->get_glyph (codepoint, glyph); } + + void collect_unicodes (hb_set_t *out) const + { + unsigned int count = this->segCount; + if (count && this->startCount[count - 1] == 0xFFFFu) + count--; /* Skip sentinel segment. */ + for (unsigned int i = 0; i < count; i++) + { + hb_codepoint_t start = this->startCount[i]; + hb_codepoint_t end = this->endCount[i]; + unsigned int rangeOffset = this->idRangeOffset[i]; + if (rangeOffset == 0) + { + for (hb_codepoint_t codepoint = start; codepoint <= end; codepoint++) + { + hb_codepoint_t gid = (codepoint + this->idDelta[i]) & 0xFFFFu; + if (unlikely (!gid)) + continue; + out->add (codepoint); + } + } + else + { + for (hb_codepoint_t codepoint = start; codepoint <= end; codepoint++) + { + unsigned int index = rangeOffset / 2 + (codepoint - this->startCount[i]) + i - this->segCount; + if (unlikely (index >= this->glyphIdArrayLength)) + break; + hb_codepoint_t gid = this->glyphIdArray[index]; + if (unlikely (!gid)) + continue; + out->add (codepoint); + } + } + } + } + + void collect_mapping (hb_set_t *unicodes, /* OUT */ + hb_map_t *mapping /* OUT */) const + { + unsigned count = this->segCount; + if (count && this->startCount[count - 1] == 0xFFFFu) + count--; /* Skip sentinel segment. */ + for (unsigned i = 0; i < count; i++) + { + hb_codepoint_t start = this->startCount[i]; + hb_codepoint_t end = this->endCount[i]; + unsigned rangeOffset = this->idRangeOffset[i]; + if (rangeOffset == 0) + { + for (hb_codepoint_t codepoint = start; codepoint <= end; codepoint++) + { + hb_codepoint_t gid = (codepoint + this->idDelta[i]) & 0xFFFFu; + if (unlikely (!gid)) + continue; + unicodes->add (codepoint); + mapping->set (codepoint, gid); + } + } + else + { + for (hb_codepoint_t codepoint = start; codepoint <= end; codepoint++) + { + unsigned index = rangeOffset / 2 + (codepoint - this->startCount[i]) + i - this->segCount; + if (unlikely (index >= this->glyphIdArrayLength)) + break; + hb_codepoint_t gid = this->glyphIdArray[index]; + if (unlikely (!gid)) + continue; + unicodes->add (codepoint); + mapping->set (codepoint, gid); + } + } + } + } + + const HBUINT16 *endCount; + const HBUINT16 *startCount; + const HBUINT16 *idDelta; + const HBUINT16 *idRangeOffset; + const HBUINT16 *glyphIdArray; + unsigned int segCount; + unsigned int glyphIdArrayLength; + }; + + bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const + { + accelerator_t accel (this); + return accel.get_glyph_func (&accel, codepoint, glyph); + } + void collect_unicodes (hb_set_t *out) const + { + accelerator_t accel (this); + accel.collect_unicodes (out); + } + + void collect_mapping (hb_set_t *unicodes, /* OUT */ + hb_map_t *mapping /* OUT */) const + { + accelerator_t accel (this); + accel.collect_mapping (unicodes, mapping); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + if (unlikely (!c->check_struct (this))) + return_trace (false); + + if (unlikely (!c->check_range (this, length))) + { + /* Some broken fonts have too long of a "length" value. + * If that is the case, just change the value to truncate + * the subtable at the end of the blob. */ + uint16_t new_length = (uint16_t) hb_min ((uintptr_t) 65535, + (uintptr_t) (c->end - + (char *) this)); + if (!c->try_set (&length, new_length)) + return_trace (false); + } + + return_trace (16 + 4 * (unsigned int) segCountX2 <= length); + } + + + + protected: + HBUINT16 format; /* Format number is set to 4. */ + HBUINT16 length; /* This is the length in bytes of the + * subtable. */ + HBUINT16 language; /* Ignore. */ + HBUINT16 segCountX2; /* 2 x segCount. */ + HBUINT16 searchRange; /* 2 * (2**floor(log2(segCount))) */ + HBUINT16 entrySelector; /* log2(searchRange/2) */ + HBUINT16 rangeShift; /* 2 x segCount - searchRange */ + + UnsizedArrayOf<HBUINT16> + values; +#if 0 + HBUINT16 endCount[segCount]; /* End characterCode for each segment, + * last=0xFFFFu. */ + HBUINT16 reservedPad; /* Set to 0. */ + HBUINT16 startCount[segCount]; /* Start character code for each segment. */ + HBINT16 idDelta[segCount]; /* Delta for all character codes in segment. */ + HBUINT16 idRangeOffset[segCount];/* Offsets into glyphIdArray or 0 */ + UnsizedArrayOf<HBUINT16> + glyphIdArray; /* Glyph index array (arbitrary length) */ +#endif + + public: + DEFINE_SIZE_ARRAY (14, values); +}; + +struct CmapSubtableLongGroup +{ + friend struct CmapSubtableFormat12; + friend struct CmapSubtableFormat13; + template<typename U> + friend struct CmapSubtableLongSegmented; + friend struct cmap; + + int cmp (hb_codepoint_t codepoint) const + { + if (codepoint < startCharCode) return -1; + if (codepoint > endCharCode) return +1; + return 0; + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this)); + } + + private: + HBUINT32 startCharCode; /* First character code in this group. */ + HBUINT32 endCharCode; /* Last character code in this group. */ + HBUINT32 glyphID; /* Glyph index; interpretation depends on + * subtable format. */ + public: + DEFINE_SIZE_STATIC (12); +}; +DECLARE_NULL_NAMESPACE_BYTES (OT, CmapSubtableLongGroup); + +template <typename UINT> +struct CmapSubtableTrimmed +{ + bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const + { + /* Rely on our implicit array bound-checking. */ + hb_codepoint_t gid = glyphIdArray[codepoint - startCharCode]; + if (!gid) + return false; + *glyph = gid; + return true; + } + void collect_unicodes (hb_set_t *out) const + { + hb_codepoint_t start = startCharCode; + unsigned int count = glyphIdArray.len; + for (unsigned int i = 0; i < count; i++) + if (glyphIdArray[i]) + out->add (start + i); + } + + void collect_mapping (hb_set_t *unicodes, /* OUT */ + hb_map_t *mapping /* OUT */) const + { + hb_codepoint_t start_cp = startCharCode; + unsigned count = glyphIdArray.len; + for (unsigned i = 0; i < count; i++) + if (glyphIdArray[i]) + { + hb_codepoint_t unicode = start_cp + i; + hb_codepoint_t glyphid = glyphIdArray[i]; + unicodes->add (unicode); + mapping->set (unicode, glyphid); + } + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && glyphIdArray.sanitize (c)); + } + + protected: + UINT formatReserved; /* Subtable format and (maybe) padding. */ + UINT length; /* Byte length of this subtable. */ + UINT language; /* Ignore. */ + UINT startCharCode; /* First character code covered. */ + ArrayOf<HBGlyphID, UINT> + glyphIdArray; /* Array of glyph index values for character + * codes in the range. */ + public: + DEFINE_SIZE_ARRAY (5 * sizeof (UINT), glyphIdArray); +}; + +struct CmapSubtableFormat6 : CmapSubtableTrimmed<HBUINT16> {}; +struct CmapSubtableFormat10 : CmapSubtableTrimmed<HBUINT32 > {}; + +template <typename T> +struct CmapSubtableLongSegmented +{ + friend struct cmap; + + bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const + { + hb_codepoint_t gid = T::group_get_glyph (groups.bsearch (codepoint), codepoint); + if (!gid) + return false; + *glyph = gid; + return true; + } + + void collect_unicodes (hb_set_t *out, unsigned int num_glyphs) const + { + for (unsigned int i = 0; i < this->groups.len; i++) + { + hb_codepoint_t start = this->groups[i].startCharCode; + hb_codepoint_t end = hb_min ((hb_codepoint_t) this->groups[i].endCharCode, + (hb_codepoint_t) HB_UNICODE_MAX); + hb_codepoint_t gid = this->groups[i].glyphID; + if (!gid) + { + /* Intention is: if (hb_is_same (T, CmapSubtableFormat13)) continue; */ + if (! T::group_get_glyph (this->groups[i], end)) continue; + start++; + gid++; + } + if (unlikely ((unsigned int) gid >= num_glyphs)) continue; + if (unlikely ((unsigned int) (gid + end - start) >= num_glyphs)) + end = start + (hb_codepoint_t) num_glyphs - gid; + + out->add_range (start, end); + } + } + + void collect_mapping (hb_set_t *unicodes, /* OUT */ + hb_map_t *mapping, /* OUT */ + unsigned num_glyphs) const + { + for (unsigned i = 0; i < this->groups.len; i++) + { + hb_codepoint_t start = this->groups[i].startCharCode; + hb_codepoint_t end = hb_min ((hb_codepoint_t) this->groups[i].endCharCode, + (hb_codepoint_t) HB_UNICODE_MAX); + hb_codepoint_t gid = this->groups[i].glyphID; + if (!gid) + { + /* Intention is: if (hb_is_same (T, CmapSubtableFormat13)) continue; */ + if (! T::group_get_glyph (this->groups[i], end)) continue; + start++; + gid++; + } + if (unlikely ((unsigned int) gid >= num_glyphs)) continue; + if (unlikely ((unsigned int) (gid + end - start) >= num_glyphs)) + end = start + (hb_codepoint_t) num_glyphs - gid; + + for (unsigned cp = start; cp <= end; cp++) + { + unicodes->add (cp); + mapping->set (cp, gid); + gid++; + } + } + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && groups.sanitize (c)); + } + + protected: + HBUINT16 format; /* Subtable format; set to 12. */ + HBUINT16 reserved; /* Reserved; set to 0. */ + HBUINT32 length; /* Byte length of this subtable. */ + HBUINT32 language; /* Ignore. */ + SortedArrayOf<CmapSubtableLongGroup, HBUINT32> + groups; /* Groupings. */ + public: + DEFINE_SIZE_ARRAY (16, groups); +}; + +struct CmapSubtableFormat12 : CmapSubtableLongSegmented<CmapSubtableFormat12> +{ + static hb_codepoint_t group_get_glyph (const CmapSubtableLongGroup &group, + hb_codepoint_t u) + { return likely (group.startCharCode <= group.endCharCode) ? + group.glyphID + (u - group.startCharCode) : 0; } + + + template<typename Iterator, + hb_requires (hb_is_iterator (Iterator))> + void serialize (hb_serialize_context_t *c, + Iterator it) + { + if (it.len () == 0) return; + unsigned table_initpos = c->length (); + if (unlikely (!c->extend_min (*this))) return; + + hb_codepoint_t startCharCode = 0xFFFF, endCharCode = 0xFFFF; + hb_codepoint_t glyphID = 0; + + for (const hb_item_type<Iterator> _ : +it) + { + if (startCharCode == 0xFFFF) + { + startCharCode = _.first; + endCharCode = _.first; + glyphID = _.second; + } + else if (!_is_gid_consecutive (endCharCode, startCharCode, glyphID, _.first, _.second)) + { + CmapSubtableLongGroup grouprecord; + grouprecord.startCharCode = startCharCode; + grouprecord.endCharCode = endCharCode; + grouprecord.glyphID = glyphID; + c->copy<CmapSubtableLongGroup> (grouprecord); + + startCharCode = _.first; + endCharCode = _.first; + glyphID = _.second; + } + else + endCharCode = _.first; + } + + CmapSubtableLongGroup record; + record.startCharCode = startCharCode; + record.endCharCode = endCharCode; + record.glyphID = glyphID; + c->copy<CmapSubtableLongGroup> (record); + + this->format = 12; + this->reserved = 0; + this->length = c->length () - table_initpos; + this->groups.len = (this->length - min_size)/CmapSubtableLongGroup::static_size; + } + + static size_t get_sub_table_size (const hb_sorted_vector_t<CmapSubtableLongGroup> &groups_data) + { return 16 + 12 * groups_data.length; } + + private: + static bool _is_gid_consecutive (hb_codepoint_t endCharCode, + hb_codepoint_t startCharCode, + hb_codepoint_t glyphID, + hb_codepoint_t cp, + hb_codepoint_t new_gid) + { + return (cp - 1 == endCharCode) && + new_gid == glyphID + (cp - startCharCode); + } + +}; + +struct CmapSubtableFormat13 : CmapSubtableLongSegmented<CmapSubtableFormat13> +{ + static hb_codepoint_t group_get_glyph (const CmapSubtableLongGroup &group, + hb_codepoint_t u HB_UNUSED) + { return group.glyphID; } +}; + +typedef enum +{ + GLYPH_VARIANT_NOT_FOUND = 0, + GLYPH_VARIANT_FOUND = 1, + GLYPH_VARIANT_USE_DEFAULT = 2 +} glyph_variant_t; + +struct UnicodeValueRange +{ + int cmp (const hb_codepoint_t &codepoint) const + { + if (codepoint < startUnicodeValue) return -1; + if (codepoint > startUnicodeValue + additionalCount) return +1; + return 0; + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this)); + } + + HBUINT24 startUnicodeValue; /* First value in this range. */ + HBUINT8 additionalCount; /* Number of additional values in this + * range. */ + public: + DEFINE_SIZE_STATIC (4); +}; + +struct DefaultUVS : SortedArrayOf<UnicodeValueRange, HBUINT32> +{ + void collect_unicodes (hb_set_t *out) const + { + unsigned int count = len; + for (unsigned int i = 0; i < count; i++) + { + hb_codepoint_t first = arrayZ[i].startUnicodeValue; + hb_codepoint_t last = hb_min ((hb_codepoint_t) (first + arrayZ[i].additionalCount), + (hb_codepoint_t) HB_UNICODE_MAX); + out->add_range (first, last); + } + } + + DefaultUVS* copy (hb_serialize_context_t *c, + const hb_set_t *unicodes) const + { + DefaultUVS *out = c->start_embed<DefaultUVS> (); + if (unlikely (!out)) return nullptr; + auto snap = c->snapshot (); + + HBUINT32 len; + len = 0; + if (unlikely (!c->copy<HBUINT32> (len))) return nullptr; + unsigned init_len = c->length (); + + hb_codepoint_t lastCode = HB_MAP_VALUE_INVALID; + int count = -1; + + for (const UnicodeValueRange& _ : as_array ()) + { + for (const unsigned addcnt : hb_range ((unsigned) _.additionalCount + 1)) + { + unsigned curEntry = (unsigned) _.startUnicodeValue + addcnt; + if (!unicodes->has (curEntry)) continue; + count += 1; + if (lastCode == HB_MAP_VALUE_INVALID) + lastCode = curEntry; + else if (lastCode + count != curEntry) + { + UnicodeValueRange rec; + rec.startUnicodeValue = lastCode; + rec.additionalCount = count - 1; + c->copy<UnicodeValueRange> (rec); + + lastCode = curEntry; + count = 0; + } + } + } + + if (lastCode != HB_MAP_VALUE_INVALID) + { + UnicodeValueRange rec; + rec.startUnicodeValue = lastCode; + rec.additionalCount = count; + c->copy<UnicodeValueRange> (rec); + } + + if (c->length () - init_len == 0) + { + c->revert (snap); + return nullptr; + } + else + { + if (unlikely (!c->check_assign (out->len, (c->length () - init_len) / UnicodeValueRange::static_size))) return nullptr; + return out; + } + } + + public: + DEFINE_SIZE_ARRAY (4, *this); +}; + +struct UVSMapping +{ + int cmp (const hb_codepoint_t &codepoint) const + { return unicodeValue.cmp (codepoint); } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this)); + } + + HBUINT24 unicodeValue; /* Base Unicode value of the UVS */ + HBGlyphID glyphID; /* Glyph ID of the UVS */ + public: + DEFINE_SIZE_STATIC (5); +}; + +struct NonDefaultUVS : SortedArrayOf<UVSMapping, HBUINT32> +{ + void collect_unicodes (hb_set_t *out) const + { + unsigned int count = len; + for (unsigned int i = 0; i < count; i++) + out->add (arrayZ[i].unicodeValue); + } + + void collect_mapping (hb_set_t *unicodes, /* OUT */ + hb_map_t *mapping /* OUT */) const + { + unsigned count = len; + for (unsigned i = 0; i < count; i++) + { + hb_codepoint_t unicode = arrayZ[i].unicodeValue; + hb_codepoint_t glyphid = arrayZ[i].glyphID; + unicodes->add (unicode); + mapping->set (unicode, glyphid); + } + } + + void closure_glyphs (const hb_set_t *unicodes, + hb_set_t *glyphset) const + { + + as_array () + | hb_filter (unicodes, &UVSMapping::unicodeValue) + | hb_map (&UVSMapping::glyphID) + | hb_sink (glyphset) + ; + } + + NonDefaultUVS* copy (hb_serialize_context_t *c, + const hb_set_t *unicodes, + const hb_set_t *glyphs_requested, + const hb_map_t *glyph_map) const + { + NonDefaultUVS *out = c->start_embed<NonDefaultUVS> (); + if (unlikely (!out)) return nullptr; + + auto it = + + as_array () + | hb_filter ([&] (const UVSMapping& _) + { + return unicodes->has (_.unicodeValue) || glyphs_requested->has (_.glyphID); + }) + ; + + if (!it) return nullptr; + + HBUINT32 len; + len = it.len (); + if (unlikely (!c->copy<HBUINT32> (len))) return nullptr; + + for (const UVSMapping& _ : it) + { + UVSMapping mapping; + mapping.unicodeValue = _.unicodeValue; + mapping.glyphID = glyph_map->get (_.glyphID); + c->copy<UVSMapping> (mapping); + } + + return out; + } + + public: + DEFINE_SIZE_ARRAY (4, *this); +}; + +struct VariationSelectorRecord +{ + glyph_variant_t get_glyph (hb_codepoint_t codepoint, + hb_codepoint_t *glyph, + const void *base) const + { + if ((base+defaultUVS).bfind (codepoint)) + return GLYPH_VARIANT_USE_DEFAULT; + const UVSMapping &nonDefault = (base+nonDefaultUVS).bsearch (codepoint); + if (nonDefault.glyphID) + { + *glyph = nonDefault.glyphID; + return GLYPH_VARIANT_FOUND; + } + return GLYPH_VARIANT_NOT_FOUND; + } + + VariationSelectorRecord(const VariationSelectorRecord& other) + { + *this = other; + } + + void operator= (const VariationSelectorRecord& other) + { + varSelector = other.varSelector; + HBUINT32 offset = other.defaultUVS; + defaultUVS = offset; + offset = other.nonDefaultUVS; + nonDefaultUVS = offset; + } + + void collect_unicodes (hb_set_t *out, const void *base) const + { + (base+defaultUVS).collect_unicodes (out); + (base+nonDefaultUVS).collect_unicodes (out); + } + + void collect_mapping (const void *base, + hb_set_t *unicodes, /* OUT */ + hb_map_t *mapping /* OUT */) const + { + (base+defaultUVS).collect_unicodes (unicodes); + (base+nonDefaultUVS).collect_mapping (unicodes, mapping); + } + + int cmp (const hb_codepoint_t &variation_selector) const + { return varSelector.cmp (variation_selector); } + + bool sanitize (hb_sanitize_context_t *c, const void *base) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && + defaultUVS.sanitize (c, base) && + nonDefaultUVS.sanitize (c, base)); + } + + hb_pair_t<unsigned, unsigned> + copy (hb_serialize_context_t *c, + const hb_set_t *unicodes, + const hb_set_t *glyphs_requested, + const hb_map_t *glyph_map, + const void *base) const + { + auto snap = c->snapshot (); + auto *out = c->embed<VariationSelectorRecord> (*this); + if (unlikely (!out)) return hb_pair (0, 0); + + out->defaultUVS = 0; + out->nonDefaultUVS = 0; + + unsigned non_default_uvs_objidx = 0; + if (nonDefaultUVS != 0) + { + c->push (); + if (c->copy (base+nonDefaultUVS, unicodes, glyphs_requested, glyph_map)) + non_default_uvs_objidx = c->pop_pack (); + else c->pop_discard (); + } + + unsigned default_uvs_objidx = 0; + if (defaultUVS != 0) + { + c->push (); + if (c->copy (base+defaultUVS, unicodes)) + default_uvs_objidx = c->pop_pack (); + else c->pop_discard (); + } + + + if (!default_uvs_objidx && !non_default_uvs_objidx) + c->revert (snap); + + return hb_pair (default_uvs_objidx, non_default_uvs_objidx); + } + + HBUINT24 varSelector; /* Variation selector. */ + LOffsetTo<DefaultUVS> + defaultUVS; /* Offset to Default UVS Table. May be 0. */ + LOffsetTo<NonDefaultUVS> + nonDefaultUVS; /* Offset to Non-Default UVS Table. May be 0. */ + public: + DEFINE_SIZE_STATIC (11); +}; + +struct CmapSubtableFormat14 +{ + glyph_variant_t get_glyph_variant (hb_codepoint_t codepoint, + hb_codepoint_t variation_selector, + hb_codepoint_t *glyph) const + { return record.bsearch (variation_selector).get_glyph (codepoint, glyph, this); } + + void collect_variation_selectors (hb_set_t *out) const + { + unsigned int count = record.len; + for (unsigned int i = 0; i < count; i++) + out->add (record.arrayZ[i].varSelector); + } + void collect_variation_unicodes (hb_codepoint_t variation_selector, + hb_set_t *out) const + { record.bsearch (variation_selector).collect_unicodes (out, this); } + + void serialize (hb_serialize_context_t *c, + const hb_set_t *unicodes, + const hb_set_t *glyphs_requested, + const hb_map_t *glyph_map, + const void *base) + { + auto snap = c->snapshot (); + unsigned table_initpos = c->length (); + const char* init_tail = c->tail; + + if (unlikely (!c->extend_min (*this))) return; + this->format = 14; + + auto src_tbl = reinterpret_cast<const CmapSubtableFormat14*> (base); + + /* + * Some versions of OTS require that offsets are in order. Due to the use + * of push()/pop_pack() serializing the variation records in order results + * in the offsets being in reverse order (first record has the largest + * offset). While this is perfectly valid, it will cause some versions of + * OTS to consider this table bad. + * + * So to prevent this issue we serialize the variation records in reverse + * order, so that the offsets are ordered from small to large. Since + * variation records are supposed to be in increasing order of varSelector + * we then have to reverse the order of the written variation selector + * records after everything is finalized. + */ + hb_vector_t<hb_pair_t<unsigned, unsigned>> obj_indices; + for (int i = src_tbl->record.len - 1; i >= 0; i--) + { + hb_pair_t<unsigned, unsigned> result = src_tbl->record[i].copy (c, unicodes, glyphs_requested, glyph_map, base); + if (result.first || result.second) + obj_indices.push (result); + } + + if (c->length () - table_initpos == CmapSubtableFormat14::min_size) + { + c->revert (snap); + return; + } + + if (unlikely (!c->check_success (!obj_indices.in_error ()))) + return; + + int tail_len = init_tail - c->tail; + c->check_assign (this->length, c->length () - table_initpos + tail_len); + c->check_assign (this->record.len, + (c->length () - table_initpos - CmapSubtableFormat14::min_size) / + VariationSelectorRecord::static_size); + + /* Correct the incorrect write order by reversing the order of the variation + records array. */ + _reverse_variation_records (); + + /* Now that records are in the right order, we can set up the offsets. */ + _add_links_to_variation_records (c, obj_indices); + } + + void _reverse_variation_records () + { + record.as_array ().reverse (); + } + + void _add_links_to_variation_records (hb_serialize_context_t *c, + const hb_vector_t<hb_pair_t<unsigned, unsigned>>& obj_indices) + { + for (unsigned i = 0; i < obj_indices.length; i++) + { + /* + * Since the record array has been reversed (see comments in copy()) + * but obj_indices has not been, the indices at obj_indices[i] + * are for the variation record at record[j]. + */ + int j = obj_indices.length - 1 - i; + c->add_link (record[j].defaultUVS, obj_indices[i].first); + c->add_link (record[j].nonDefaultUVS, obj_indices[i].second); + } + } + + void closure_glyphs (const hb_set_t *unicodes, + hb_set_t *glyphset) const + { + + hb_iter (record) + | hb_filter (hb_bool, &VariationSelectorRecord::nonDefaultUVS) + | hb_map (&VariationSelectorRecord::nonDefaultUVS) + | hb_map (hb_add (this)) + | hb_apply ([=] (const NonDefaultUVS& _) { _.closure_glyphs (unicodes, glyphset); }) + ; + } + + void collect_unicodes (hb_set_t *out) const + { + for (const VariationSelectorRecord& _ : record) + _.collect_unicodes (out, this); + } + + void collect_mapping (hb_set_t *unicodes, /* OUT */ + hb_map_t *mapping /* OUT */) const + { + for (const VariationSelectorRecord& _ : record) + _.collect_mapping (this, unicodes, mapping); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && + record.sanitize (c, this)); + } + + protected: + HBUINT16 format; /* Format number is set to 14. */ + HBUINT32 length; /* Byte length of this subtable. */ + SortedArrayOf<VariationSelectorRecord, HBUINT32> + record; /* Variation selector records; sorted + * in increasing order of `varSelector'. */ + public: + DEFINE_SIZE_ARRAY (10, record); +}; + +struct CmapSubtable +{ + /* Note: We intentionally do NOT implement subtable formats 2 and 8. */ + + bool get_glyph (hb_codepoint_t codepoint, + hb_codepoint_t *glyph) const + { + switch (u.format) { + case 0: return u.format0 .get_glyph (codepoint, glyph); + case 4: return u.format4 .get_glyph (codepoint, glyph); + case 6: return u.format6 .get_glyph (codepoint, glyph); + case 10: return u.format10.get_glyph (codepoint, glyph); + case 12: return u.format12.get_glyph (codepoint, glyph); + case 13: return u.format13.get_glyph (codepoint, glyph); + case 14: + default: return false; + } + } + void collect_unicodes (hb_set_t *out, unsigned int num_glyphs = UINT_MAX) const + { + switch (u.format) { + case 0: u.format0 .collect_unicodes (out); return; + case 4: u.format4 .collect_unicodes (out); return; + case 6: u.format6 .collect_unicodes (out); return; + case 10: u.format10.collect_unicodes (out); return; + case 12: u.format12.collect_unicodes (out, num_glyphs); return; + case 13: u.format13.collect_unicodes (out, num_glyphs); return; + case 14: + default: return; + } + } + + void collect_mapping (hb_set_t *unicodes, /* OUT */ + hb_map_t *mapping, /* OUT */ + unsigned num_glyphs = UINT_MAX) const + { + switch (u.format) { + case 0: u.format0 .collect_mapping (unicodes, mapping); return; + case 4: u.format4 .collect_mapping (unicodes, mapping); return; + case 6: u.format6 .collect_mapping (unicodes, mapping); return; + case 10: u.format10.collect_mapping (unicodes, mapping); return; + case 12: u.format12.collect_mapping (unicodes, mapping, num_glyphs); return; + case 13: u.format13.collect_mapping (unicodes, mapping, num_glyphs); return; + case 14: + default: return; + } + } + + template<typename Iterator, + hb_requires (hb_is_iterator (Iterator))> + void serialize (hb_serialize_context_t *c, + Iterator it, + unsigned format, + const hb_subset_plan_t *plan, + const void *base) + { + switch (format) { + case 4: return u.format4.serialize (c, it); + case 12: return u.format12.serialize (c, it); + case 14: return u.format14.serialize (c, plan->unicodes, plan->glyphs_requested, plan->glyph_map, base); + default: return; + } + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + if (!u.format.sanitize (c)) return_trace (false); + switch (u.format) { + case 0: return_trace (u.format0 .sanitize (c)); + case 4: return_trace (u.format4 .sanitize (c)); + case 6: return_trace (u.format6 .sanitize (c)); + case 10: return_trace (u.format10.sanitize (c)); + case 12: return_trace (u.format12.sanitize (c)); + case 13: return_trace (u.format13.sanitize (c)); + case 14: return_trace (u.format14.sanitize (c)); + default:return_trace (true); + } + } + + public: + union { + HBUINT16 format; /* Format identifier */ + CmapSubtableFormat0 format0; + CmapSubtableFormat4 format4; + CmapSubtableFormat6 format6; + CmapSubtableFormat10 format10; + CmapSubtableFormat12 format12; + CmapSubtableFormat13 format13; + CmapSubtableFormat14 format14; + } u; + public: + DEFINE_SIZE_UNION (2, format); +}; + + +struct EncodingRecord +{ + int cmp (const EncodingRecord &other) const + { + int ret; + ret = platformID.cmp (other.platformID); + if (ret) return ret; + ret = encodingID.cmp (other.encodingID); + if (ret) return ret; + return 0; + } + + bool sanitize (hb_sanitize_context_t *c, const void *base) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && + subtable.sanitize (c, base)); + } + + template<typename Iterator, + hb_requires (hb_is_iterator (Iterator))> + EncodingRecord* copy (hb_serialize_context_t *c, + Iterator it, + unsigned format, + const void *base, + const hb_subset_plan_t *plan, + /* INOUT */ unsigned *objidx) const + { + TRACE_SERIALIZE (this); + auto snap = c->snapshot (); + auto *out = c->embed (this); + if (unlikely (!out)) return_trace (nullptr); + out->subtable = 0; + + if (*objidx == 0) + { + CmapSubtable *cmapsubtable = c->push<CmapSubtable> (); + unsigned origin_length = c->length (); + cmapsubtable->serialize (c, it, format, plan, &(base+subtable)); + if (c->length () - origin_length > 0) *objidx = c->pop_pack (); + else c->pop_discard (); + } + + if (*objidx == 0) + { + c->revert (snap); + return_trace (nullptr); + } + + c->add_link (out->subtable, *objidx); + return_trace (out); + } + + HBUINT16 platformID; /* Platform ID. */ + HBUINT16 encodingID; /* Platform-specific encoding ID. */ + LOffsetTo<CmapSubtable> + subtable; /* Byte offset from beginning of table to the subtable for this encoding. */ + public: + DEFINE_SIZE_STATIC (8); +}; + +struct cmap +{ + static constexpr hb_tag_t tableTag = HB_OT_TAG_cmap; + + template<typename Iterator, typename EncodingRecIter, + hb_requires (hb_is_iterator (EncodingRecIter))> + void serialize (hb_serialize_context_t *c, + Iterator it, + EncodingRecIter encodingrec_iter, + const void *base, + const hb_subset_plan_t *plan) + { + if (unlikely (!c->extend_min ((*this)))) return; + this->version = 0; + + unsigned format4objidx = 0, format12objidx = 0, format14objidx = 0; + + for (const EncodingRecord& _ : encodingrec_iter) + { + unsigned format = (base+_.subtable).u.format; + if (!plan->glyphs_requested->is_empty ()) + { + hb_set_t unicodes_set; + hb_map_t cp_glyphid_map; + (base+_.subtable).collect_mapping (&unicodes_set, &cp_glyphid_map); + + auto table_iter = + + hb_zip (unicodes_set.iter(), unicodes_set.iter() | hb_map(cp_glyphid_map)) + | hb_filter (plan->_glyphset, hb_second) + | hb_filter ([plan] (const hb_pair_t<hb_codepoint_t, hb_codepoint_t>& p) + { + return plan->unicodes->has (p.first) || + plan->glyphs_requested->has (p.second); + }) + | hb_map ([plan] (const hb_pair_t<hb_codepoint_t, hb_codepoint_t>& p_org) + { + return hb_pair_t<hb_codepoint_t, hb_codepoint_t> (p_org.first, plan->glyph_map->get(p_org.second)); + }) + ; + + if (format == 4) c->copy (_, table_iter, 4u, base, plan, &format4objidx); + else if (format == 12) c->copy (_, table_iter, 12u, base, plan, &format12objidx); + else if (format == 14) c->copy (_, table_iter, 14u, base, plan, &format14objidx); + } + /* when --gids option is not used, we iterate input unicodes instead of + * all codepoints in each subtable, which is more efficient */ + else + { + hb_set_t unicodes_set; + (base+_.subtable).collect_unicodes (&unicodes_set); + + if (format == 4) c->copy (_, + it | hb_filter (unicodes_set, hb_first), 4u, base, plan, &format4objidx); + else if (format == 12) c->copy (_, + it | hb_filter (unicodes_set, hb_first), 12u, base, plan, &format12objidx); + else if (format == 14) c->copy (_, it, 14u, base, plan, &format14objidx); + } + } + + c->check_assign(this->encodingRecord.len, (c->length () - cmap::min_size)/EncodingRecord::static_size); + } + + void closure_glyphs (const hb_set_t *unicodes, + hb_set_t *glyphset) const + { + + hb_iter (encodingRecord) + | hb_map (&EncodingRecord::subtable) + | hb_map (hb_add (this)) + | hb_filter ([&] (const CmapSubtable& _) { return _.u.format == 14; }) + | hb_apply ([=] (const CmapSubtable& _) { _.u.format14.closure_glyphs (unicodes, glyphset); }) + ; + } + + bool subset (hb_subset_context_t *c) const + { + TRACE_SUBSET (this); + + cmap *cmap_prime = c->serializer->start_embed<cmap> (); + if (unlikely (!c->serializer->check_success (cmap_prime))) return_trace (false); + + auto encodingrec_iter = + + hb_iter (encodingRecord) + | hb_filter ([&] (const EncodingRecord& _) + { + if ((_.platformID == 0 && _.encodingID == 3) || + (_.platformID == 0 && _.encodingID == 4) || + (_.platformID == 3 && _.encodingID == 1) || + (_.platformID == 3 && _.encodingID == 10) || + (this + _.subtable).u.format == 14) + return true; + + return false; + }) + ; + + if (unlikely (!encodingrec_iter.len ())) return_trace (false); + + const EncodingRecord *unicode_bmp= nullptr, *unicode_ucs4 = nullptr, *ms_bmp = nullptr, *ms_ucs4 = nullptr; + bool has_format12 = false; + + for (const EncodingRecord& _ : encodingrec_iter) + { + unsigned format = (this + _.subtable).u.format; + if (format == 12) has_format12 = true; + + const EncodingRecord *table = hb_addressof (_); + if (_.platformID == 0 && _.encodingID == 3) unicode_bmp = table; + else if (_.platformID == 0 && _.encodingID == 4) unicode_ucs4 = table; + else if (_.platformID == 3 && _.encodingID == 1) ms_bmp = table; + else if (_.platformID == 3 && _.encodingID == 10) ms_ucs4 = table; + } + + if (unlikely (!has_format12 && !unicode_bmp && !ms_bmp)) return_trace (false); + if (unlikely (has_format12 && (!unicode_ucs4 && !ms_ucs4))) return_trace (false); + + auto it = + + hb_iter (c->plan->unicodes) + | hb_map ([&] (hb_codepoint_t _) + { + hb_codepoint_t new_gid = HB_MAP_VALUE_INVALID; + c->plan->new_gid_for_codepoint (_, &new_gid); + return hb_pair_t<hb_codepoint_t, hb_codepoint_t> (_, new_gid); + }) + | hb_filter ([&] (const hb_pair_t<hb_codepoint_t, hb_codepoint_t> _) + { return (_.second != HB_MAP_VALUE_INVALID); }) + ; + cmap_prime->serialize (c->serializer, it, encodingrec_iter, this, c->plan); + return_trace (true); + } + + const CmapSubtable *find_best_subtable (bool *symbol = nullptr) const + { + if (symbol) *symbol = false; + + const CmapSubtable *subtable; + + /* Symbol subtable. + * Prefer symbol if available. + * https://github.com/harfbuzz/harfbuzz/issues/1918 */ + if ((subtable = this->find_subtable (3, 0))) + { + if (symbol) *symbol = true; + return subtable; + } + + /* 32-bit subtables. */ + if ((subtable = this->find_subtable (3, 10))) return subtable; + if ((subtable = this->find_subtable (0, 6))) return subtable; + if ((subtable = this->find_subtable (0, 4))) return subtable; + + /* 16-bit subtables. */ + if ((subtable = this->find_subtable (3, 1))) return subtable; + if ((subtable = this->find_subtable (0, 3))) return subtable; + if ((subtable = this->find_subtable (0, 2))) return subtable; + if ((subtable = this->find_subtable (0, 1))) return subtable; + if ((subtable = this->find_subtable (0, 0))) return subtable; + + /* Meh. */ + return &Null (CmapSubtable); + } + + struct accelerator_t + { + void init (hb_face_t *face) + { + this->table = hb_sanitize_context_t ().reference_table<cmap> (face); + bool symbol; + this->subtable = table->find_best_subtable (&symbol); + this->subtable_uvs = &Null (CmapSubtableFormat14); + { + const CmapSubtable *st = table->find_subtable (0, 5); + if (st && st->u.format == 14) + subtable_uvs = &st->u.format14; + } + + this->get_glyph_data = subtable; + if (unlikely (symbol)) + this->get_glyph_funcZ = get_glyph_from_symbol<CmapSubtable>; + else + { + switch (subtable->u.format) { + /* Accelerate format 4 and format 12. */ + default: + this->get_glyph_funcZ = get_glyph_from<CmapSubtable>; + break; + case 12: + this->get_glyph_funcZ = get_glyph_from<CmapSubtableFormat12>; + break; + case 4: + { + this->format4_accel.init (&subtable->u.format4); + this->get_glyph_data = &this->format4_accel; + this->get_glyph_funcZ = this->format4_accel.get_glyph_func; + break; + } + } + } + } + + void fini () { this->table.destroy (); } + + bool get_nominal_glyph (hb_codepoint_t unicode, + hb_codepoint_t *glyph) const + { + if (unlikely (!this->get_glyph_funcZ)) return false; + return this->get_glyph_funcZ (this->get_glyph_data, unicode, glyph); + } + unsigned int get_nominal_glyphs (unsigned int count, + const hb_codepoint_t *first_unicode, + unsigned int unicode_stride, + hb_codepoint_t *first_glyph, + unsigned int glyph_stride) const + { + if (unlikely (!this->get_glyph_funcZ)) return 0; + + hb_cmap_get_glyph_func_t get_glyph_funcZ = this->get_glyph_funcZ; + const void *get_glyph_data = this->get_glyph_data; + + unsigned int done; + for (done = 0; + done < count && get_glyph_funcZ (get_glyph_data, *first_unicode, first_glyph); + done++) + { + first_unicode = &StructAtOffsetUnaligned<hb_codepoint_t> (first_unicode, unicode_stride); + first_glyph = &StructAtOffsetUnaligned<hb_codepoint_t> (first_glyph, glyph_stride); + } + return done; + } + + bool get_variation_glyph (hb_codepoint_t unicode, + hb_codepoint_t variation_selector, + hb_codepoint_t *glyph) const + { + switch (this->subtable_uvs->get_glyph_variant (unicode, + variation_selector, + glyph)) + { + case GLYPH_VARIANT_NOT_FOUND: return false; + case GLYPH_VARIANT_FOUND: return true; + case GLYPH_VARIANT_USE_DEFAULT: break; + } + + return get_nominal_glyph (unicode, glyph); + } + + void collect_unicodes (hb_set_t *out, unsigned int num_glyphs) const + { subtable->collect_unicodes (out, num_glyphs); } + void collect_mapping (hb_set_t *unicodes, hb_map_t *mapping, + unsigned num_glyphs = UINT_MAX) const + { subtable->collect_mapping (unicodes, mapping, num_glyphs); } + void collect_variation_selectors (hb_set_t *out) const + { subtable_uvs->collect_variation_selectors (out); } + void collect_variation_unicodes (hb_codepoint_t variation_selector, + hb_set_t *out) const + { subtable_uvs->collect_variation_unicodes (variation_selector, out); } + + protected: + typedef bool (*hb_cmap_get_glyph_func_t) (const void *obj, + hb_codepoint_t codepoint, + hb_codepoint_t *glyph); + + template <typename Type> + HB_INTERNAL static bool get_glyph_from (const void *obj, + hb_codepoint_t codepoint, + hb_codepoint_t *glyph) + { + const Type *typed_obj = (const Type *) obj; + return typed_obj->get_glyph (codepoint, glyph); + } + + template <typename Type> + HB_INTERNAL static bool get_glyph_from_symbol (const void *obj, + hb_codepoint_t codepoint, + hb_codepoint_t *glyph) + { + const Type *typed_obj = (const Type *) obj; + if (likely (typed_obj->get_glyph (codepoint, glyph))) + return true; + + if (codepoint <= 0x00FFu) + { + /* For symbol-encoded OpenType fonts, we duplicate the + * U+F000..F0FF range at U+0000..U+00FF. That's what + * Windows seems to do, and that's hinted about at: + * https://docs.microsoft.com/en-us/typography/opentype/spec/recom + * under "Non-Standard (Symbol) Fonts". */ + return typed_obj->get_glyph (0xF000u + codepoint, glyph); + } + + return false; + } + + private: + hb_nonnull_ptr_t<const CmapSubtable> subtable; + hb_nonnull_ptr_t<const CmapSubtableFormat14> subtable_uvs; + + hb_cmap_get_glyph_func_t get_glyph_funcZ; + const void *get_glyph_data; + + CmapSubtableFormat4::accelerator_t format4_accel; + + public: + hb_blob_ptr_t<cmap> table; + }; + + protected: + + const CmapSubtable *find_subtable (unsigned int platform_id, + unsigned int encoding_id) const + { + EncodingRecord key; + key.platformID = platform_id; + key.encodingID = encoding_id; + + const EncodingRecord &result = encodingRecord.bsearch (key); + if (!result.subtable) + return nullptr; + + return &(this+result.subtable); + } + + const EncodingRecord *find_encodingrec (unsigned int platform_id, + unsigned int encoding_id) const + { + EncodingRecord key; + key.platformID = platform_id; + key.encodingID = encoding_id; + + return encodingRecord.as_array ().bsearch (key); + } + + bool find_subtable (unsigned format) const + { + auto it = + + hb_iter (encodingRecord) + | hb_map (&EncodingRecord::subtable) + | hb_map (hb_add (this)) + | hb_filter ([&] (const CmapSubtable& _) { return _.u.format == format; }) + ; + + return it.len (); + } + + public: + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && + likely (version == 0) && + encodingRecord.sanitize (c, this)); + } + + protected: + HBUINT16 version; /* Table version number (0). */ + SortedArrayOf<EncodingRecord> + encodingRecord; /* Encoding tables. */ + public: + DEFINE_SIZE_ARRAY (4, encodingRecord); +}; + +struct cmap_accelerator_t : cmap::accelerator_t {}; + +} /* namespace OT */ + + +#endif /* HB_OT_CMAP_TABLE_HH */ diff --git a/thirdparty/harfbuzz/src/hb-ot-color-cbdt-table.hh b/thirdparty/harfbuzz/src/hb-ot-color-cbdt-table.hh new file mode 100644 index 0000000000..aaa1c37c64 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-color-cbdt-table.hh @@ -0,0 +1,985 @@ +/* + * Copyright © 2016 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Seigo Nonaka, Calder Kitagawa + */ + +#ifndef HB_OT_COLOR_CBDT_TABLE_HH +#define HB_OT_COLOR_CBDT_TABLE_HH + +#include "hb-open-type.hh" + +/* + * CBLC -- Color Bitmap Location + * https://docs.microsoft.com/en-us/typography/opentype/spec/cblc + * https://docs.microsoft.com/en-us/typography/opentype/spec/eblc + * CBDT -- Color Bitmap Data + * https://docs.microsoft.com/en-us/typography/opentype/spec/cbdt + * https://docs.microsoft.com/en-us/typography/opentype/spec/ebdt + */ +#define HB_OT_TAG_CBLC HB_TAG('C','B','L','C') +#define HB_OT_TAG_CBDT HB_TAG('C','B','D','T') + + +namespace OT { + +struct cblc_bitmap_size_subset_context_t +{ + const char *cbdt; + unsigned int cbdt_length; + hb_vector_t<char> *cbdt_prime; + unsigned int size; /* INOUT + * Input: old size of IndexSubtable + * Output: new size of IndexSubtable + */ + unsigned int num_tables; /* INOUT + * Input: old number of subtables. + * Output: new number of subtables. + */ + hb_codepoint_t start_glyph; /* OUT */ + hb_codepoint_t end_glyph; /* OUT */ +}; + +static inline bool +_copy_data_to_cbdt (hb_vector_t<char> *cbdt_prime, + const void *data, + unsigned length) +{ + unsigned int new_len = cbdt_prime->length + length; + if (unlikely (!cbdt_prime->alloc (new_len))) return false; + memcpy (cbdt_prime->arrayZ + cbdt_prime->length, data, length); + cbdt_prime->length = new_len; + return true; +} + +struct SmallGlyphMetrics +{ + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this)); + } + + void get_extents (hb_font_t *font, hb_glyph_extents_t *extents) const + { + extents->x_bearing = font->em_scale_x (bearingX); + extents->y_bearing = font->em_scale_y (bearingY); + extents->width = font->em_scale_x (width); + extents->height = font->em_scale_y (-static_cast<int>(height)); + } + + HBUINT8 height; + HBUINT8 width; + HBINT8 bearingX; + HBINT8 bearingY; + HBUINT8 advance; + public: + DEFINE_SIZE_STATIC (5); +}; + +struct BigGlyphMetrics : SmallGlyphMetrics +{ + HBINT8 vertBearingX; + HBINT8 vertBearingY; + HBUINT8 vertAdvance; + public: + DEFINE_SIZE_STATIC (8); +}; + +struct SBitLineMetrics +{ + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this)); + } + + HBINT8 ascender; + HBINT8 decender; + HBUINT8 widthMax; + HBINT8 caretSlopeNumerator; + HBINT8 caretSlopeDenominator; + HBINT8 caretOffset; + HBINT8 minOriginSB; + HBINT8 minAdvanceSB; + HBINT8 maxBeforeBL; + HBINT8 minAfterBL; + HBINT8 padding1; + HBINT8 padding2; + public: + DEFINE_SIZE_STATIC (12); +}; + + +/* + * Index Subtables. + */ + +struct IndexSubtableHeader +{ + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this)); + } + + HBUINT16 indexFormat; + HBUINT16 imageFormat; + HBUINT32 imageDataOffset; + public: + DEFINE_SIZE_STATIC (8); +}; + +template <typename OffsetType> +struct IndexSubtableFormat1Or3 +{ + bool sanitize (hb_sanitize_context_t *c, unsigned int glyph_count) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && + offsetArrayZ.sanitize (c, glyph_count + 1)); + } + + bool get_image_data (unsigned int idx, + unsigned int *offset, + unsigned int *length) const + { + if (unlikely (offsetArrayZ[idx + 1] <= offsetArrayZ[idx])) + return false; + + *offset = header.imageDataOffset + offsetArrayZ[idx]; + *length = offsetArrayZ[idx + 1] - offsetArrayZ[idx]; + return true; + } + + bool add_offset (hb_serialize_context_t *c, + unsigned int offset, + unsigned int *size /* OUT (accumulated) */) + { + TRACE_SERIALIZE (this); + Offset<OffsetType> embedded_offset; + embedded_offset = offset; + *size += sizeof (OffsetType); + auto *o = c->embed (embedded_offset); + return_trace ((bool) o); + } + + IndexSubtableHeader header; + UnsizedArrayOf<Offset<OffsetType>> + offsetArrayZ; + public: + DEFINE_SIZE_ARRAY (8, offsetArrayZ); +}; + +struct IndexSubtableFormat1 : IndexSubtableFormat1Or3<HBUINT32> {}; +struct IndexSubtableFormat3 : IndexSubtableFormat1Or3<HBUINT16> {}; + +struct IndexSubtable +{ + bool sanitize (hb_sanitize_context_t *c, unsigned int glyph_count) const + { + TRACE_SANITIZE (this); + if (!u.header.sanitize (c)) return_trace (false); + switch (u.header.indexFormat) + { + case 1: return_trace (u.format1.sanitize (c, glyph_count)); + case 3: return_trace (u.format3.sanitize (c, glyph_count)); + default:return_trace (true); + } + } + + bool + finish_subtable (hb_serialize_context_t *c, + unsigned int cbdt_prime_len, + unsigned int num_glyphs, + unsigned int *size /* OUT (accumulated) */) + { + TRACE_SERIALIZE (this); + + unsigned int local_offset = cbdt_prime_len - u.header.imageDataOffset; + switch (u.header.indexFormat) + { + case 1: return_trace (u.format1.add_offset (c, local_offset, size)); + case 3: { + if (!u.format3.add_offset (c, local_offset, size)) + return_trace (false); + if (!(num_glyphs & 0x01)) // Pad to 32-bit alignment if needed. + return_trace (u.format3.add_offset (c, 0, size)); + return_trace (true); + } + // TODO: implement 2, 4, 5. + case 2: case 4: // No-op. + case 5: // Pad to 32-bit aligned. + default: return_trace (false); + } + } + + bool + fill_missing_glyphs (hb_serialize_context_t *c, + unsigned int cbdt_prime_len, + unsigned int num_missing, + unsigned int *size /* OUT (accumulated) */, + unsigned int *num_glyphs /* OUT (accumulated) */) + { + TRACE_SERIALIZE (this); + + unsigned int local_offset = cbdt_prime_len - u.header.imageDataOffset; + switch (u.header.indexFormat) + { + case 1: { + for (unsigned int i = 0; i < num_missing; i++) + { + if (unlikely (!u.format1.add_offset (c, local_offset, size))) + return_trace (false); + *num_glyphs += 1; + } + return_trace (true); + } + case 3: { + for (unsigned int i = 0; i < num_missing; i++) + { + if (unlikely (!u.format3.add_offset (c, local_offset, size))) + return_trace (false); + *num_glyphs += 1; + } + return_trace (true); + } + // TODO: implement 2, 4, 5. + case 2: // Add empty space in cbdt_prime?. + case 4: case 5: // No-op as sparse is supported. + default: return_trace (false); + } + } + + bool + copy_glyph_at_idx (hb_serialize_context_t *c, unsigned int idx, + const char *cbdt, unsigned int cbdt_length, + hb_vector_t<char> *cbdt_prime /* INOUT */, + IndexSubtable *subtable_prime /* INOUT */, + unsigned int *size /* OUT (accumulated) */) const + { + TRACE_SERIALIZE (this); + + unsigned int offset, length, format; + if (unlikely (!get_image_data (idx, &offset, &length, &format))) return_trace (false); + if (unlikely (offset > cbdt_length || cbdt_length - offset < length)) return_trace (false); + + auto *header_prime = subtable_prime->get_header (); + unsigned int new_local_offset = cbdt_prime->length - (unsigned int) header_prime->imageDataOffset; + if (unlikely (!_copy_data_to_cbdt (cbdt_prime, cbdt + offset, length))) return_trace (false); + + return_trace (subtable_prime->add_offset (c, new_local_offset, size)); + } + + bool + add_offset (hb_serialize_context_t *c, unsigned int local_offset, + unsigned int *size /* OUT (accumulated) */) + { + TRACE_SERIALIZE (this); + switch (u.header.indexFormat) + { + case 1: return_trace (u.format1.add_offset (c, local_offset, size)); + case 3: return_trace (u.format3.add_offset (c, local_offset, size)); + // TODO: Implement tables 2, 4, 5 + case 2: // Should be a no-op. + case 4: case 5: // Handle sparse cases. + default: return_trace (false); + } + } + + bool get_extents (hb_glyph_extents_t *extents HB_UNUSED) const + { + switch (u.header.indexFormat) + { + case 2: case 5: /* TODO */ + case 1: case 3: case 4: /* Variable-metrics formats do not have metrics here. */ + default:return (false); + } + } + + bool + get_image_data (unsigned int idx, unsigned int *offset, + unsigned int *length, unsigned int *format) const + { + *format = u.header.imageFormat; + switch (u.header.indexFormat) + { + case 1: return u.format1.get_image_data (idx, offset, length); + case 3: return u.format3.get_image_data (idx, offset, length); + default: return false; + } + } + + const IndexSubtableHeader* get_header () const { return &u.header; } + + void populate_header (unsigned index_format, + unsigned image_format, + unsigned int image_data_offset, + unsigned int *size) + { + u.header.indexFormat = index_format; + u.header.imageFormat = image_format; + u.header.imageDataOffset = image_data_offset; + switch (u.header.indexFormat) + { + case 1: *size += IndexSubtableFormat1::min_size; break; + case 3: *size += IndexSubtableFormat3::min_size; break; + } + } + + protected: + union { + IndexSubtableHeader header; + IndexSubtableFormat1 format1; + IndexSubtableFormat3 format3; + /* TODO: Format 2, 4, 5. */ + } u; + public: + DEFINE_SIZE_UNION (8, header); +}; + +struct IndexSubtableRecord +{ + bool sanitize (hb_sanitize_context_t *c, const void *base) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && + firstGlyphIndex <= lastGlyphIndex && + offsetToSubtable.sanitize (c, base, lastGlyphIndex - firstGlyphIndex + 1)); + } + + const IndexSubtable* get_subtable (const void *base) const + { + return &(base+offsetToSubtable); + } + + bool add_new_subtable (hb_subset_context_t* c, + cblc_bitmap_size_subset_context_t *bitmap_size_context, + IndexSubtableRecord *record, + const hb_vector_t<hb_pair_t<hb_codepoint_t, const IndexSubtableRecord*>> *lookup, /* IN */ + const void *base, + unsigned int *start /* INOUT */) const + { + TRACE_SERIALIZE (this); + + auto *subtable = c->serializer->start_embed<IndexSubtable> (); + if (unlikely (!subtable)) return_trace (false); + if (unlikely (!c->serializer->extend_min (subtable))) return_trace (false); + + auto *old_subtable = get_subtable (base); + auto *old_header = old_subtable->get_header (); + + subtable->populate_header (old_header->indexFormat, + old_header->imageFormat, + bitmap_size_context->cbdt_prime->length, + &bitmap_size_context->size); + + unsigned int num_glyphs = 0; + bool early_exit = false; + for (unsigned int i = *start; i < lookup->length; i++) + { + hb_codepoint_t new_gid = (*lookup)[i].first; + const IndexSubtableRecord *next_record = (*lookup)[i].second; + const IndexSubtable *next_subtable = next_record->get_subtable (base); + auto *next_header = next_subtable->get_header (); + if (next_header != old_header) + { + *start = i; + early_exit = true; + break; + } + unsigned int num_missing = record->add_glyph_for_subset (new_gid); + if (unlikely (!subtable->fill_missing_glyphs (c->serializer, + bitmap_size_context->cbdt_prime->length, + num_missing, + &bitmap_size_context->size, + &num_glyphs))) + return_trace (false); + + hb_codepoint_t old_gid = 0; + c->plan->old_gid_for_new_gid (new_gid, &old_gid); + if (old_gid < next_record->firstGlyphIndex) + return_trace (false); + + unsigned int old_idx = (unsigned int) old_gid - next_record->firstGlyphIndex; + if (unlikely (!next_subtable->copy_glyph_at_idx (c->serializer, + old_idx, + bitmap_size_context->cbdt, + bitmap_size_context->cbdt_length, + bitmap_size_context->cbdt_prime, + subtable, + &bitmap_size_context->size))) + return_trace (false); + num_glyphs += 1; + } + if (!early_exit) + *start = lookup->length; + if (unlikely (!subtable->finish_subtable (c->serializer, + bitmap_size_context->cbdt_prime->length, + num_glyphs, + &bitmap_size_context->size))) + return_trace (false); + return_trace (true); + } + + bool add_new_record (hb_subset_context_t *c, + cblc_bitmap_size_subset_context_t *bitmap_size_context, + const hb_vector_t<hb_pair_t<hb_codepoint_t, const IndexSubtableRecord*>> *lookup, /* IN */ + const void *base, + unsigned int *start, /* INOUT */ + hb_vector_t<IndexSubtableRecord>* records /* INOUT */) const + { + TRACE_SERIALIZE (this); + auto snap = c->serializer->snapshot (); + unsigned int old_size = bitmap_size_context->size; + unsigned int old_cbdt_prime_length = bitmap_size_context->cbdt_prime->length; + + // Set to invalid state to indicate filling glyphs is not yet started. + if (unlikely (!records->resize (records->length + 1))) + return_trace (c->serializer->check_success (false)); + + (*records)[records->length - 1].firstGlyphIndex = 1; + (*records)[records->length - 1].lastGlyphIndex = 0; + bitmap_size_context->size += IndexSubtableRecord::min_size; + + c->serializer->push (); + + if (unlikely (!add_new_subtable (c, bitmap_size_context, &((*records)[records->length - 1]), lookup, base, start))) + { + c->serializer->pop_discard (); + c->serializer->revert (snap); + bitmap_size_context->cbdt_prime->shrink (old_cbdt_prime_length); + bitmap_size_context->size = old_size; + records->resize (records->length - 1); + return_trace (false); + } + + bitmap_size_context->num_tables += 1; + return_trace (true); + } + + unsigned int add_glyph_for_subset (hb_codepoint_t gid) + { + if (firstGlyphIndex > lastGlyphIndex) + { + firstGlyphIndex = gid; + lastGlyphIndex = gid; + return 0; + } + // TODO maybe assert? this shouldn't occur. + if (lastGlyphIndex > gid) + return 0; + unsigned int num_missing = (unsigned int) (gid - lastGlyphIndex - 1); + lastGlyphIndex = gid; + return num_missing; + } + + bool get_extents (hb_glyph_extents_t *extents, const void *base) const + { return (base+offsetToSubtable).get_extents (extents); } + + bool get_image_data (unsigned int gid, + const void *base, + unsigned int *offset, + unsigned int *length, + unsigned int *format) const + { + if (gid < firstGlyphIndex || gid > lastGlyphIndex) return false; + return (base+offsetToSubtable).get_image_data (gid - firstGlyphIndex, + offset, length, format); + } + + HBGlyphID firstGlyphIndex; + HBGlyphID lastGlyphIndex; + LOffsetTo<IndexSubtable> offsetToSubtable; + public: + DEFINE_SIZE_STATIC (8); +}; + +struct IndexSubtableArray +{ + friend struct CBDT; + + bool sanitize (hb_sanitize_context_t *c, unsigned int count) const + { + TRACE_SANITIZE (this); + return_trace (indexSubtablesZ.sanitize (c, count, this)); + } + + void + build_lookup (hb_subset_context_t *c, cblc_bitmap_size_subset_context_t *bitmap_size_context, + hb_vector_t<hb_pair_t<hb_codepoint_t, + const IndexSubtableRecord*>> *lookup /* OUT */) const + { + bool start_glyph_is_set = false; + for (hb_codepoint_t new_gid = 0; new_gid < c->plan->num_output_glyphs (); new_gid++) + { + hb_codepoint_t old_gid; + if (unlikely (!c->plan->old_gid_for_new_gid (new_gid, &old_gid))) continue; + + const IndexSubtableRecord* record = find_table (old_gid, bitmap_size_context->num_tables); + if (unlikely (!record)) continue; + + // Don't add gaps to the lookup. The best way to determine if a glyph is a + // gap is that it has no image data. + unsigned int offset, length, format; + if (unlikely (!record->get_image_data (old_gid, this, &offset, &length, &format))) continue; + + lookup->push (hb_pair_t<hb_codepoint_t, const IndexSubtableRecord*> (new_gid, record)); + + if (!start_glyph_is_set) + { + bitmap_size_context->start_glyph = new_gid; + start_glyph_is_set = true; + } + + bitmap_size_context->end_glyph = new_gid; + } + } + + bool + subset (hb_subset_context_t *c, + cblc_bitmap_size_subset_context_t *bitmap_size_context) const + { + TRACE_SUBSET (this); + + auto *dst = c->serializer->start_embed<IndexSubtableArray> (); + if (unlikely (!dst)) return_trace (false); + + hb_vector_t<hb_pair_t<hb_codepoint_t, const IndexSubtableRecord*>> lookup; + build_lookup (c, bitmap_size_context, &lookup); + if (unlikely (lookup.in_error ())) + return c->serializer->check_success (false); + + bitmap_size_context->size = 0; + bitmap_size_context->num_tables = 0; + hb_vector_t<IndexSubtableRecord> records; + for (unsigned int start = 0; start < lookup.length;) + { + if (unlikely (!lookup[start].second->add_new_record (c, bitmap_size_context, &lookup, this, &start, &records))) + { + // Discard any leftover pushes to the serializer from successful records. + for (unsigned int i = 0; i < records.length; i++) + c->serializer->pop_discard (); + return_trace (false); + } + } + + /* Workaround to ensure offset ordering is from least to greatest when + * resolving links. */ + hb_vector_t<hb_serialize_context_t::objidx_t> objidxs; + for (unsigned int i = 0; i < records.length; i++) + objidxs.push (c->serializer->pop_pack ()); + for (unsigned int i = 0; i < records.length; i++) + { + IndexSubtableRecord* record = c->serializer->embed (records[i]); + if (unlikely (!record)) return_trace (false); + c->serializer->add_link (record->offsetToSubtable, objidxs[records.length - 1 - i]); + } + return_trace (true); + } + + public: + const IndexSubtableRecord* find_table (hb_codepoint_t glyph, unsigned int numTables) const + { + for (unsigned int i = 0; i < numTables; ++i) + { + unsigned int firstGlyphIndex = indexSubtablesZ[i].firstGlyphIndex; + unsigned int lastGlyphIndex = indexSubtablesZ[i].lastGlyphIndex; + if (firstGlyphIndex <= glyph && glyph <= lastGlyphIndex) + return &indexSubtablesZ[i]; + } + return nullptr; + } + + protected: + UnsizedArrayOf<IndexSubtableRecord> indexSubtablesZ; +}; + +struct BitmapSizeTable +{ + friend struct CBLC; + friend struct CBDT; + + bool sanitize (hb_sanitize_context_t *c, const void *base) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && + indexSubtableArrayOffset.sanitize (c, base, numberOfIndexSubtables) && + horizontal.sanitize (c) && + vertical.sanitize (c)); + } + + const IndexSubtableRecord * + find_table (hb_codepoint_t glyph, const void *base, const void **out_base) const + { + *out_base = &(base+indexSubtableArrayOffset); + return (base+indexSubtableArrayOffset).find_table (glyph, numberOfIndexSubtables); + } + + bool + subset (hb_subset_context_t *c, const void *base, + const char *cbdt, unsigned int cbdt_length, + hb_vector_t<char> *cbdt_prime /* INOUT */) const + { + TRACE_SUBSET (this); + auto *out_table = c->serializer->embed (this); + if (unlikely (!out_table)) return_trace (false); + + cblc_bitmap_size_subset_context_t bitmap_size_context; + bitmap_size_context.cbdt = cbdt; + bitmap_size_context.cbdt_length = cbdt_length; + bitmap_size_context.cbdt_prime = cbdt_prime; + bitmap_size_context.size = indexTablesSize; + bitmap_size_context.num_tables = numberOfIndexSubtables; + bitmap_size_context.start_glyph = 1; + bitmap_size_context.end_glyph = 0; + + if (!out_table->indexSubtableArrayOffset.serialize_subset (c, + indexSubtableArrayOffset, + base, + &bitmap_size_context)) + return_trace (false); + if (!bitmap_size_context.size || + !bitmap_size_context.num_tables || + bitmap_size_context.start_glyph > bitmap_size_context.end_glyph) + return_trace (false); + + out_table->indexTablesSize = bitmap_size_context.size; + out_table->numberOfIndexSubtables = bitmap_size_context.num_tables; + out_table->startGlyphIndex = bitmap_size_context.start_glyph; + out_table->endGlyphIndex = bitmap_size_context.end_glyph; + return_trace (true); + } + + protected: + LNNOffsetTo<IndexSubtableArray> + indexSubtableArrayOffset; + HBUINT32 indexTablesSize; + HBUINT32 numberOfIndexSubtables; + HBUINT32 colorRef; + SBitLineMetrics horizontal; + SBitLineMetrics vertical; + HBGlyphID startGlyphIndex; + HBGlyphID endGlyphIndex; + HBUINT8 ppemX; + HBUINT8 ppemY; + HBUINT8 bitDepth; + HBINT8 flags; + public: + DEFINE_SIZE_STATIC (48); +}; + + +/* + * Glyph Bitmap Data Formats. + */ + +struct GlyphBitmapDataFormat17 +{ + SmallGlyphMetrics glyphMetrics; + LArrayOf<HBUINT8> data; + public: + DEFINE_SIZE_ARRAY (9, data); +}; + +struct GlyphBitmapDataFormat18 +{ + BigGlyphMetrics glyphMetrics; + LArrayOf<HBUINT8> data; + public: + DEFINE_SIZE_ARRAY (12, data); +}; + +struct GlyphBitmapDataFormat19 +{ + LArrayOf<HBUINT8> data; + public: + DEFINE_SIZE_ARRAY (4, data); +}; + +struct CBLC +{ + friend struct CBDT; + + static constexpr hb_tag_t tableTag = HB_OT_TAG_CBLC; + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && + likely (version.major == 2 || version.major == 3) && + sizeTables.sanitize (c, this)); + } + + static bool + sink_cbdt (hb_subset_context_t *c, hb_vector_t<char>* cbdt_prime) + { + hb_blob_t *cbdt_prime_blob = hb_blob_create (cbdt_prime->arrayZ, + cbdt_prime->length, + HB_MEMORY_MODE_WRITABLE, + cbdt_prime->arrayZ, + free); + cbdt_prime->init (); // Leak arrayZ to the blob. + bool ret = c->plan->add_table (HB_OT_TAG_CBDT, cbdt_prime_blob); + hb_blob_destroy (cbdt_prime_blob); + return ret; + } + + bool + subset_size_table (hb_subset_context_t *c, const BitmapSizeTable& table, + const char *cbdt /* IN */, unsigned int cbdt_length, + CBLC *cblc_prime /* INOUT */, hb_vector_t<char> *cbdt_prime /* INOUT */) const + { + TRACE_SUBSET (this); + cblc_prime->sizeTables.len++; + + auto snap = c->serializer->snapshot (); + auto cbdt_prime_len = cbdt_prime->length; + + if (!table.subset (c, this, cbdt, cbdt_length, cbdt_prime)) + { + cblc_prime->sizeTables.len--; + c->serializer->revert (snap); + cbdt_prime->shrink (cbdt_prime_len); + return_trace (false); + } + return_trace (true); + } + + // Implemented in cc file as it depends on definition of CBDT. + HB_INTERNAL bool subset (hb_subset_context_t *c) const; + + protected: + const BitmapSizeTable &choose_strike (hb_font_t *font) const + { + unsigned count = sizeTables.len; + if (unlikely (!count)) + return Null (BitmapSizeTable); + + unsigned int requested_ppem = hb_max (font->x_ppem, font->y_ppem); + if (!requested_ppem) + requested_ppem = 1<<30; /* Choose largest strike. */ + unsigned int best_i = 0; + unsigned int best_ppem = hb_max (sizeTables[0].ppemX, sizeTables[0].ppemY); + + for (unsigned int i = 1; i < count; i++) + { + unsigned int ppem = hb_max (sizeTables[i].ppemX, sizeTables[i].ppemY); + if ((requested_ppem <= ppem && ppem < best_ppem) || + (requested_ppem > best_ppem && ppem > best_ppem)) + { + best_i = i; + best_ppem = ppem; + } + } + + return sizeTables[best_i]; + } + + protected: + FixedVersion<> version; + LArrayOf<BitmapSizeTable> sizeTables; + public: + DEFINE_SIZE_ARRAY (8, sizeTables); +}; + +struct CBDT +{ + static constexpr hb_tag_t tableTag = HB_OT_TAG_CBDT; + + struct accelerator_t + { + void init (hb_face_t *face) + { + cblc = hb_sanitize_context_t ().reference_table<CBLC> (face); + cbdt = hb_sanitize_context_t ().reference_table<CBDT> (face); + + upem = hb_face_get_upem (face); + } + + void fini () + { + this->cblc.destroy (); + this->cbdt.destroy (); + } + + bool + get_extents (hb_font_t *font, hb_codepoint_t glyph, hb_glyph_extents_t *extents) const + { + const void *base; + const BitmapSizeTable &strike = this->cblc->choose_strike (font); + const IndexSubtableRecord *subtable_record = strike.find_table (glyph, cblc, &base); + if (!subtable_record || !strike.ppemX || !strike.ppemY) + return false; + + if (subtable_record->get_extents (extents, base)) + return true; + + unsigned int image_offset = 0, image_length = 0, image_format = 0; + if (!subtable_record->get_image_data (glyph, base, &image_offset, &image_length, &image_format)) + return false; + + unsigned int cbdt_len = cbdt.get_length (); + if (unlikely (image_offset > cbdt_len || cbdt_len - image_offset < image_length)) + return false; + + switch (image_format) + { + case 17: { + if (unlikely (image_length < GlyphBitmapDataFormat17::min_size)) + return false; + auto &glyphFormat17 = StructAtOffset<GlyphBitmapDataFormat17> (this->cbdt, image_offset); + glyphFormat17.glyphMetrics.get_extents (font, extents); + break; + } + case 18: { + if (unlikely (image_length < GlyphBitmapDataFormat18::min_size)) + return false; + auto &glyphFormat18 = StructAtOffset<GlyphBitmapDataFormat18> (this->cbdt, image_offset); + glyphFormat18.glyphMetrics.get_extents (font, extents); + break; + } + default: return false; /* TODO: Support other image formats. */ + } + + /* Convert to font units. */ + float x_scale = upem / (float) strike.ppemX; + float y_scale = upem / (float) strike.ppemY; + extents->x_bearing = roundf (extents->x_bearing * x_scale); + extents->y_bearing = roundf (extents->y_bearing * y_scale); + extents->width = roundf (extents->width * x_scale); + extents->height = roundf (extents->height * y_scale); + + return true; + } + + hb_blob_t* + reference_png (hb_font_t *font, hb_codepoint_t glyph) const + { + const void *base; + const BitmapSizeTable &strike = this->cblc->choose_strike (font); + const IndexSubtableRecord *subtable_record = strike.find_table (glyph, cblc, &base); + if (!subtable_record || !strike.ppemX || !strike.ppemY) + return hb_blob_get_empty (); + + unsigned int image_offset = 0, image_length = 0, image_format = 0; + if (!subtable_record->get_image_data (glyph, base, &image_offset, &image_length, &image_format)) + return hb_blob_get_empty (); + + unsigned int cbdt_len = cbdt.get_length (); + if (unlikely (image_offset > cbdt_len || cbdt_len - image_offset < image_length)) + return hb_blob_get_empty (); + + switch (image_format) + { + case 17: + { + if (unlikely (image_length < GlyphBitmapDataFormat17::min_size)) + return hb_blob_get_empty (); + auto &glyphFormat17 = StructAtOffset<GlyphBitmapDataFormat17> (this->cbdt, image_offset); + return hb_blob_create_sub_blob (cbdt.get_blob (), + image_offset + GlyphBitmapDataFormat17::min_size, + glyphFormat17.data.len); + } + case 18: + { + if (unlikely (image_length < GlyphBitmapDataFormat18::min_size)) + return hb_blob_get_empty (); + auto &glyphFormat18 = StructAtOffset<GlyphBitmapDataFormat18> (this->cbdt, image_offset); + return hb_blob_create_sub_blob (cbdt.get_blob (), + image_offset + GlyphBitmapDataFormat18::min_size, + glyphFormat18.data.len); + } + case 19: + { + if (unlikely (image_length < GlyphBitmapDataFormat19::min_size)) + return hb_blob_get_empty (); + auto &glyphFormat19 = StructAtOffset<GlyphBitmapDataFormat19> (this->cbdt, image_offset); + return hb_blob_create_sub_blob (cbdt.get_blob (), + image_offset + GlyphBitmapDataFormat19::min_size, + glyphFormat19.data.len); + } + default: return hb_blob_get_empty (); /* TODO: Support other image formats. */ + } + } + + bool has_data () const { return cbdt.get_length (); } + + private: + hb_blob_ptr_t<CBLC> cblc; + hb_blob_ptr_t<CBDT> cbdt; + + unsigned int upem; + }; + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && + likely (version.major == 2 || version.major == 3)); + } + + protected: + FixedVersion<> version; + UnsizedArrayOf<HBUINT8> dataZ; + public: + DEFINE_SIZE_ARRAY (4, dataZ); +}; + +inline bool +CBLC::subset (hb_subset_context_t *c) const +{ + TRACE_SUBSET (this); + + auto *cblc_prime = c->serializer->start_embed<CBLC> (); + + // Use a vector as a secondary buffer as the tables need to be built in parallel. + hb_vector_t<char> cbdt_prime; + + if (unlikely (!cblc_prime)) return_trace (false); + if (unlikely (!c->serializer->extend_min (cblc_prime))) return_trace (false); + cblc_prime->version = version; + + hb_blob_t* cbdt_blob = hb_sanitize_context_t ().reference_table<CBDT> (c->plan->source); + unsigned int cbdt_length; + CBDT* cbdt = (CBDT *) hb_blob_get_data (cbdt_blob, &cbdt_length); + if (unlikely (cbdt_length < CBDT::min_size)) + { + hb_blob_destroy (cbdt_blob); + return_trace (false); + } + _copy_data_to_cbdt (&cbdt_prime, cbdt, CBDT::min_size); + + for (const BitmapSizeTable& table : + sizeTables.iter ()) + subset_size_table (c, table, (const char *) cbdt, cbdt_length, cblc_prime, &cbdt_prime); + + hb_blob_destroy (cbdt_blob); + + return_trace (CBLC::sink_cbdt (c, &cbdt_prime)); +} + +struct CBDT_accelerator_t : CBDT::accelerator_t {}; + +} /* namespace OT */ + +#endif /* HB_OT_COLOR_CBDT_TABLE_HH */ diff --git a/thirdparty/harfbuzz/src/hb-ot-color-colr-table.hh b/thirdparty/harfbuzz/src/hb-ot-color-colr-table.hh new file mode 100644 index 0000000000..92a49bb4f4 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-color-colr-table.hh @@ -0,0 +1,278 @@ +/* + * Copyright © 2018 Ebrahim Byagowi + * Copyright © 2020 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Calder Kitagawa + */ + +#ifndef HB_OT_COLOR_COLR_TABLE_HH +#define HB_OT_COLOR_COLR_TABLE_HH + +#include "hb-open-type.hh" + +/* + * COLR -- Color + * https://docs.microsoft.com/en-us/typography/opentype/spec/colr + */ +#define HB_OT_TAG_COLR HB_TAG('C','O','L','R') + + +namespace OT { + + +struct LayerRecord +{ + operator hb_ot_color_layer_t () const { return {glyphId, colorIdx}; } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this)); + } + + public: + HBGlyphID glyphId; /* Glyph ID of layer glyph */ + Index colorIdx; /* Index value to use with a + * selected color palette. + * An index value of 0xFFFF + * is a special case indicating + * that the text foreground + * color (defined by a + * higher-level client) should + * be used and shall not be + * treated as actual index + * into CPAL ColorRecord array. */ + public: + DEFINE_SIZE_STATIC (4); +}; + +struct BaseGlyphRecord +{ + int cmp (hb_codepoint_t g) const + { return g < glyphId ? -1 : g > glyphId ? 1 : 0; } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (likely (c->check_struct (this))); + } + + public: + HBGlyphID glyphId; /* Glyph ID of reference glyph */ + HBUINT16 firstLayerIdx; /* Index (from beginning of + * the Layer Records) to the + * layer record. There will be + * numLayers consecutive entries + * for this base glyph. */ + HBUINT16 numLayers; /* Number of color layers + * associated with this glyph */ + public: + DEFINE_SIZE_STATIC (6); +}; + +struct COLR +{ + static constexpr hb_tag_t tableTag = HB_OT_TAG_COLR; + + bool has_data () const { return numBaseGlyphs; } + + unsigned int get_glyph_layers (hb_codepoint_t glyph, + unsigned int start_offset, + unsigned int *count, /* IN/OUT. May be NULL. */ + hb_ot_color_layer_t *layers /* OUT. May be NULL. */) const + { + const BaseGlyphRecord &record = (this+baseGlyphsZ).bsearch (numBaseGlyphs, glyph); + + hb_array_t<const LayerRecord> all_layers = (this+layersZ).as_array (numLayers); + hb_array_t<const LayerRecord> glyph_layers = all_layers.sub_array (record.firstLayerIdx, + record.numLayers); + if (count) + { + + glyph_layers.sub_array (start_offset, count) + | hb_sink (hb_array (layers, *count)) + ; + } + return glyph_layers.length; + } + + struct accelerator_t + { + accelerator_t () {} + ~accelerator_t () { fini (); } + + void init (hb_face_t *face) + { colr = hb_sanitize_context_t ().reference_table<COLR> (face); } + + void fini () { this->colr.destroy (); } + + bool is_valid () { return colr.get_blob ()->length; } + + void closure_glyphs (hb_codepoint_t glyph, + hb_set_t *related_ids /* OUT */) const + { colr->closure_glyphs (glyph, related_ids); } + + private: + hb_blob_ptr_t<COLR> colr; + }; + + void closure_glyphs (hb_codepoint_t glyph, + hb_set_t *related_ids /* OUT */) const + { + const BaseGlyphRecord *record = get_base_glyph_record (glyph); + if (!record) return; + + auto glyph_layers = (this+layersZ).as_array (numLayers).sub_array (record->firstLayerIdx, + record->numLayers); + if (!glyph_layers.length) return; + related_ids->add_array (&glyph_layers[0].glyphId, glyph_layers.length, LayerRecord::min_size); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (likely (c->check_struct (this) && + (this+baseGlyphsZ).sanitize (c, numBaseGlyphs) && + (this+layersZ).sanitize (c, numLayers))); + } + + template<typename BaseIterator, typename LayerIterator, + hb_requires (hb_is_iterator (BaseIterator)), + hb_requires (hb_is_iterator (LayerIterator))> + bool serialize (hb_serialize_context_t *c, + unsigned version, + BaseIterator base_it, + LayerIterator layer_it) + { + TRACE_SERIALIZE (this); + if (unlikely (base_it.len () != layer_it.len ())) + return_trace (false); + + if (unlikely (!c->extend_min (this))) return_trace (false); + this->version = version; + numLayers = 0; + numBaseGlyphs = base_it.len (); + baseGlyphsZ = COLR::min_size; + layersZ = COLR::min_size + numBaseGlyphs * BaseGlyphRecord::min_size; + + for (const hb_item_type<BaseIterator> _ : + base_it.iter ()) + { + auto* record = c->embed (_); + if (unlikely (!record)) return_trace (false); + record->firstLayerIdx = numLayers; + numLayers += record->numLayers; + } + + for (const hb_item_type<LayerIterator>& _ : + layer_it.iter ()) + _.as_array ().copy (c); + + return_trace (true); + } + + const BaseGlyphRecord* get_base_glyph_record (hb_codepoint_t gid) const + { + if ((unsigned int) gid == 0) // Ignore notdef. + return nullptr; + const BaseGlyphRecord* record = &(this+baseGlyphsZ).bsearch (numBaseGlyphs, (unsigned int) gid); + if ((record && (hb_codepoint_t) record->glyphId != gid)) + record = nullptr; + return record; + } + + bool subset (hb_subset_context_t *c) const + { + TRACE_SUBSET (this); + + const hb_map_t &reverse_glyph_map = *c->plan->reverse_glyph_map; + + auto base_it = + + hb_range (c->plan->num_output_glyphs ()) + | hb_map_retains_sorting ([&](hb_codepoint_t new_gid) + { + hb_codepoint_t old_gid = reverse_glyph_map.get (new_gid); + + const BaseGlyphRecord* old_record = get_base_glyph_record (old_gid); + if (unlikely (!old_record)) + return hb_pair_t<bool, BaseGlyphRecord> (false, Null (BaseGlyphRecord)); + + BaseGlyphRecord new_record; + new_record.glyphId = new_gid; + new_record.numLayers = old_record->numLayers; + return hb_pair_t<bool, BaseGlyphRecord> (true, new_record); + }) + | hb_filter (hb_first) + | hb_map_retains_sorting (hb_second) + ; + + auto layer_it = + + hb_range (c->plan->num_output_glyphs ()) + | hb_map (reverse_glyph_map) + | hb_map_retains_sorting ([&](hb_codepoint_t old_gid) + { + const BaseGlyphRecord* old_record = get_base_glyph_record (old_gid); + hb_vector_t<LayerRecord> out_layers; + + if (unlikely (!old_record || + old_record->firstLayerIdx >= numLayers || + old_record->firstLayerIdx + old_record->numLayers > numLayers)) + return hb_pair_t<bool, hb_vector_t<LayerRecord>> (false, out_layers); + + auto layers = (this+layersZ).as_array (numLayers).sub_array (old_record->firstLayerIdx, + old_record->numLayers); + out_layers.resize (layers.length); + for (unsigned int i = 0; i < layers.length; i++) { + out_layers[i] = layers[i]; + hb_codepoint_t new_gid = 0; + if (unlikely (!c->plan->new_gid_for_old_gid (out_layers[i].glyphId, &new_gid))) + return hb_pair_t<bool, hb_vector_t<LayerRecord>> (false, out_layers); + out_layers[i].glyphId = new_gid; + } + + return hb_pair_t<bool, hb_vector_t<LayerRecord>> (true, out_layers); + }) + | hb_filter (hb_first) + | hb_map_retains_sorting (hb_second) + ; + + if (unlikely (!base_it || !layer_it || base_it.len () != layer_it.len ())) + return_trace (false); + + COLR *colr_prime = c->serializer->start_embed<COLR> (); + return_trace (colr_prime->serialize (c->serializer, version, base_it, layer_it)); + } + + protected: + HBUINT16 version; /* Table version number (starts at 0). */ + HBUINT16 numBaseGlyphs; /* Number of Base Glyph Records. */ + LNNOffsetTo<SortedUnsizedArrayOf<BaseGlyphRecord>> + baseGlyphsZ; /* Offset to Base Glyph records. */ + LNNOffsetTo<UnsizedArrayOf<LayerRecord>> + layersZ; /* Offset to Layer Records. */ + HBUINT16 numLayers; /* Number of Layer Records. */ + public: + DEFINE_SIZE_STATIC (14); +}; + +} /* namespace OT */ + + +#endif /* HB_OT_COLOR_COLR_TABLE_HH */ diff --git a/thirdparty/harfbuzz/src/hb-ot-color-cpal-table.hh b/thirdparty/harfbuzz/src/hb-ot-color-cpal-table.hh new file mode 100644 index 0000000000..fa7d3207be --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-color-cpal-table.hh @@ -0,0 +1,190 @@ +/* + * Copyright © 2016 Google, Inc. + * Copyright © 2018 Ebrahim Byagowi + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Sascha Brawer + */ + +#ifndef HB_OT_COLOR_CPAL_TABLE_HH +#define HB_OT_COLOR_CPAL_TABLE_HH + +#include "hb-open-type.hh" +#include "hb-ot-color.h" +#include "hb-ot-name.h" + + +/* + * CPAL -- Color Palette + * https://docs.microsoft.com/en-us/typography/opentype/spec/cpal + */ +#define HB_OT_TAG_CPAL HB_TAG('C','P','A','L') + + +namespace OT { + + +struct CPALV1Tail +{ + friend struct CPAL; + + private: + hb_ot_color_palette_flags_t get_palette_flags (const void *base, + unsigned int palette_index, + unsigned int palette_count) const + { + if (!paletteFlagsZ) return HB_OT_COLOR_PALETTE_FLAG_DEFAULT; + return (hb_ot_color_palette_flags_t) (uint32_t) + (base+paletteFlagsZ).as_array (palette_count)[palette_index]; + } + + hb_ot_name_id_t get_palette_name_id (const void *base, + unsigned int palette_index, + unsigned int palette_count) const + { + if (!paletteLabelsZ) return HB_OT_NAME_ID_INVALID; + return (base+paletteLabelsZ).as_array (palette_count)[palette_index]; + } + + hb_ot_name_id_t get_color_name_id (const void *base, + unsigned int color_index, + unsigned int color_count) const + { + if (!colorLabelsZ) return HB_OT_NAME_ID_INVALID; + return (base+colorLabelsZ).as_array (color_count)[color_index]; + } + + public: + bool sanitize (hb_sanitize_context_t *c, + const void *base, + unsigned int palette_count, + unsigned int color_count) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && + (!paletteFlagsZ || (base+paletteFlagsZ).sanitize (c, palette_count)) && + (!paletteLabelsZ || (base+paletteLabelsZ).sanitize (c, palette_count)) && + (!colorLabelsZ || (base+colorLabelsZ).sanitize (c, color_count))); + } + + protected: + LNNOffsetTo<UnsizedArrayOf<HBUINT32>> + paletteFlagsZ; /* Offset from the beginning of CPAL table to + * the Palette Type Array. Set to 0 if no array + * is provided. */ + LNNOffsetTo<UnsizedArrayOf<NameID>> + paletteLabelsZ; /* Offset from the beginning of CPAL table to + * the palette labels array. Set to 0 if no + * array is provided. */ + LNNOffsetTo<UnsizedArrayOf<NameID>> + colorLabelsZ; /* Offset from the beginning of CPAL table to + * the color labels array. Set to 0 + * if no array is provided. */ + public: + DEFINE_SIZE_STATIC (12); +}; + +typedef HBUINT32 BGRAColor; + +struct CPAL +{ + static constexpr hb_tag_t tableTag = HB_OT_TAG_CPAL; + + bool has_data () const { return numPalettes; } + + unsigned int get_size () const + { return min_size + numPalettes * sizeof (colorRecordIndicesZ[0]); } + + unsigned int get_palette_count () const { return numPalettes; } + unsigned int get_color_count () const { return numColors; } + + hb_ot_color_palette_flags_t get_palette_flags (unsigned int palette_index) const + { return v1 ().get_palette_flags (this, palette_index, numPalettes); } + + hb_ot_name_id_t get_palette_name_id (unsigned int palette_index) const + { return v1 ().get_palette_name_id (this, palette_index, numPalettes); } + + hb_ot_name_id_t get_color_name_id (unsigned int color_index) const + { return v1 ().get_color_name_id (this, color_index, numColors); } + + unsigned int get_palette_colors (unsigned int palette_index, + unsigned int start_offset, + unsigned int *color_count, /* IN/OUT. May be NULL. */ + hb_color_t *colors /* OUT. May be NULL. */) const + { + if (unlikely (palette_index >= numPalettes)) + { + if (color_count) *color_count = 0; + return 0; + } + unsigned int start_index = colorRecordIndicesZ[palette_index]; + hb_array_t<const BGRAColor> all_colors ((this+colorRecordsZ).arrayZ, numColorRecords); + hb_array_t<const BGRAColor> palette_colors = all_colors.sub_array (start_index, + numColors); + if (color_count) + { + + palette_colors.sub_array (start_offset, color_count) + | hb_sink (hb_array (colors, *color_count)) + ; + } + return numColors; + } + + private: + const CPALV1Tail& v1 () const + { + if (version == 0) return Null (CPALV1Tail); + return StructAfter<CPALV1Tail> (*this); + } + + public: + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && + (this+colorRecordsZ).sanitize (c, numColorRecords) && + colorRecordIndicesZ.sanitize (c, numPalettes) && + (version == 0 || v1 ().sanitize (c, this, numPalettes, numColors))); + } + + protected: + HBUINT16 version; /* Table version number */ + /* Version 0 */ + HBUINT16 numColors; /* Number of colors in each palette. */ + HBUINT16 numPalettes; /* Number of palettes in the table. */ + HBUINT16 numColorRecords; /* Total number of color records, combined for + * all palettes. */ + LNNOffsetTo<UnsizedArrayOf<BGRAColor>> + colorRecordsZ; /* Offset from the beginning of CPAL table to + * the first ColorRecord. */ + UnsizedArrayOf<HBUINT16> + colorRecordIndicesZ; /* Index of each palette’s first color record in + * the combined color record array. */ +/*CPALV1Tail v1;*/ + public: + DEFINE_SIZE_ARRAY (12, colorRecordIndicesZ); +}; + +} /* namespace OT */ + + +#endif /* HB_OT_COLOR_CPAL_TABLE_HH */ diff --git a/thirdparty/harfbuzz/src/hb-ot-color-sbix-table.hh b/thirdparty/harfbuzz/src/hb-ot-color-sbix-table.hh new file mode 100644 index 0000000000..09da11597d --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-color-sbix-table.hh @@ -0,0 +1,414 @@ +/* + * Copyright © 2018 Ebrahim Byagowi + * Copyright © 2020 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Calder Kitagawa + */ + +#ifndef HB_OT_COLOR_SBIX_TABLE_HH +#define HB_OT_COLOR_SBIX_TABLE_HH + +#include "hb-open-type.hh" +#include "hb-ot-layout-common.hh" + +/* + * sbix -- Standard Bitmap Graphics + * https://docs.microsoft.com/en-us/typography/opentype/spec/sbix + * https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6sbix.html + */ +#define HB_OT_TAG_sbix HB_TAG('s','b','i','x') + + +namespace OT { + + +struct SBIXGlyph +{ + SBIXGlyph* copy (hb_serialize_context_t *c, unsigned int data_length) const + { + TRACE_SERIALIZE (this); + SBIXGlyph* new_glyph = c->start_embed<SBIXGlyph> (); + if (unlikely (!new_glyph)) return_trace (nullptr); + if (unlikely (!c->extend_min (new_glyph))) return_trace (nullptr); + + new_glyph->xOffset = xOffset; + new_glyph->yOffset = yOffset; + new_glyph->graphicType = graphicType; + data.copy (c, data_length); + return_trace (new_glyph); + } + + HBINT16 xOffset; /* The horizontal (x-axis) offset from the left + * edge of the graphic to the glyph’s origin. + * That is, the x-coordinate of the point on the + * baseline at the left edge of the glyph. */ + HBINT16 yOffset; /* The vertical (y-axis) offset from the bottom + * edge of the graphic to the glyph’s origin. + * That is, the y-coordinate of the point on the + * baseline at the left edge of the glyph. */ + Tag graphicType; /* Indicates the format of the embedded graphic + * data: one of 'jpg ', 'png ' or 'tiff', or the + * special format 'dupe'. */ + UnsizedArrayOf<HBUINT8> + data; /* The actual embedded graphic data. The total + * length is inferred from sequential entries in + * the glyphDataOffsets array and the fixed size + * (8 bytes) of the preceding fields. */ + public: + DEFINE_SIZE_ARRAY (8, data); +}; + +struct SBIXStrike +{ + static unsigned int get_size (unsigned num_glyphs) + { return min_size + num_glyphs * HBUINT32::static_size; } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && + imageOffsetsZ.sanitize_shallow (c, c->get_num_glyphs () + 1)); + } + + hb_blob_t *get_glyph_blob (unsigned int glyph_id, + hb_blob_t *sbix_blob, + hb_tag_t file_type, + int *x_offset, + int *y_offset, + unsigned int num_glyphs, + unsigned int *strike_ppem) const + { + if (unlikely (!ppem)) return hb_blob_get_empty (); /* To get Null() object out of the way. */ + + unsigned int retry_count = 8; + unsigned int sbix_len = sbix_blob->length; + unsigned int strike_offset = (const char *) this - (const char *) sbix_blob->data; + assert (strike_offset < sbix_len); + + retry: + if (unlikely (glyph_id >= num_glyphs || + imageOffsetsZ[glyph_id + 1] <= imageOffsetsZ[glyph_id] || + imageOffsetsZ[glyph_id + 1] - imageOffsetsZ[glyph_id] <= SBIXGlyph::min_size || + (unsigned int) imageOffsetsZ[glyph_id + 1] > sbix_len - strike_offset)) + return hb_blob_get_empty (); + + unsigned int glyph_offset = strike_offset + (unsigned int) imageOffsetsZ[glyph_id] + SBIXGlyph::min_size; + unsigned int glyph_length = imageOffsetsZ[glyph_id + 1] - imageOffsetsZ[glyph_id] - SBIXGlyph::min_size; + + const SBIXGlyph *glyph = &(this+imageOffsetsZ[glyph_id]); + + if (glyph->graphicType == HB_TAG ('d','u','p','e')) + { + if (glyph_length >= 2) + { + glyph_id = *((HBUINT16 *) &glyph->data); + if (retry_count--) + goto retry; + } + return hb_blob_get_empty (); + } + + if (unlikely (file_type != glyph->graphicType)) + return hb_blob_get_empty (); + + if (strike_ppem) *strike_ppem = ppem; + if (x_offset) *x_offset = glyph->xOffset; + if (y_offset) *y_offset = glyph->yOffset; + return hb_blob_create_sub_blob (sbix_blob, glyph_offset, glyph_length); + } + + bool subset (hb_subset_context_t *c, unsigned int available_len) const + { + TRACE_SUBSET (this); + unsigned int num_output_glyphs = c->plan->num_output_glyphs (); + + auto* out = c->serializer->start_embed<SBIXStrike> (); + if (unlikely (!out)) return_trace (false); + auto snap = c->serializer->snapshot (); + if (unlikely (!c->serializer->extend (*out, num_output_glyphs + 1))) return_trace (false); + out->ppem = ppem; + out->resolution = resolution; + HBUINT32 head; + head = get_size (num_output_glyphs + 1); + + bool has_glyphs = false; + for (unsigned new_gid = 0; new_gid < num_output_glyphs; new_gid++) + { + hb_codepoint_t old_gid; + if (!c->plan->old_gid_for_new_gid (new_gid, &old_gid) || + unlikely (imageOffsetsZ[old_gid].is_null () || + imageOffsetsZ[old_gid + 1].is_null () || + imageOffsetsZ[old_gid + 1] <= imageOffsetsZ[old_gid] || + imageOffsetsZ[old_gid + 1] - imageOffsetsZ[old_gid] <= SBIXGlyph::min_size) || + (unsigned int) imageOffsetsZ[old_gid + 1] > available_len) + { + out->imageOffsetsZ[new_gid] = head; + continue; + } + has_glyphs = true; + unsigned int delta = imageOffsetsZ[old_gid + 1] - imageOffsetsZ[old_gid]; + unsigned int glyph_data_length = delta - SBIXGlyph::min_size; + if (!(this+imageOffsetsZ[old_gid]).copy (c->serializer, glyph_data_length)) + return_trace (false); + out->imageOffsetsZ[new_gid] = head; + head += delta; + } + if (has_glyphs) + out->imageOffsetsZ[num_output_glyphs] = head; + else + c->serializer->revert (snap); + return_trace (has_glyphs); + } + + public: + HBUINT16 ppem; /* The PPEM size for which this strike was designed. */ + HBUINT16 resolution; /* The device pixel density (in PPI) for which this + * strike was designed. (E.g., 96 PPI, 192 PPI.) */ + protected: + UnsizedArrayOf<LOffsetTo<SBIXGlyph>> + imageOffsetsZ; /* Offset from the beginning of the strike data header + * to bitmap data for an individual glyph ID. */ + public: + DEFINE_SIZE_ARRAY (4, imageOffsetsZ); +}; + +struct sbix +{ + static constexpr hb_tag_t tableTag = HB_OT_TAG_sbix; + + bool has_data () const { return version; } + + const SBIXStrike &get_strike (unsigned int i) const { return this+strikes[i]; } + + struct accelerator_t + { + void init (hb_face_t *face) + { + table = hb_sanitize_context_t ().reference_table<sbix> (face); + num_glyphs = face->get_num_glyphs (); + } + void fini () { table.destroy (); } + + bool has_data () const { return table->has_data (); } + + bool get_extents (hb_font_t *font, + hb_codepoint_t glyph, + hb_glyph_extents_t *extents) const + { + /* We only support PNG right now, and following function checks type. */ + return get_png_extents (font, glyph, extents); + } + + hb_blob_t *reference_png (hb_font_t *font, + hb_codepoint_t glyph_id, + int *x_offset, + int *y_offset, + unsigned int *available_ppem) const + { + return choose_strike (font).get_glyph_blob (glyph_id, table.get_blob (), + HB_TAG ('p','n','g',' '), + x_offset, y_offset, + num_glyphs, available_ppem); + } + + private: + + const SBIXStrike &choose_strike (hb_font_t *font) const + { + unsigned count = table->strikes.len; + if (unlikely (!count)) + return Null (SBIXStrike); + + unsigned int requested_ppem = hb_max (font->x_ppem, font->y_ppem); + if (!requested_ppem) + requested_ppem = 1<<30; /* Choose largest strike. */ + /* TODO Add DPI sensitivity as well? */ + unsigned int best_i = 0; + unsigned int best_ppem = table->get_strike (0).ppem; + + for (unsigned int i = 1; i < count; i++) + { + unsigned int ppem = (table->get_strike (i)).ppem; + if ((requested_ppem <= ppem && ppem < best_ppem) || + (requested_ppem > best_ppem && ppem > best_ppem)) + { + best_i = i; + best_ppem = ppem; + } + } + + return table->get_strike (best_i); + } + + struct PNGHeader + { + HBUINT8 signature[8]; + struct + { + struct + { + HBUINT32 length; + Tag type; + } header; + HBUINT32 width; + HBUINT32 height; + HBUINT8 bitDepth; + HBUINT8 colorType; + HBUINT8 compressionMethod; + HBUINT8 filterMethod; + HBUINT8 interlaceMethod; + } IHDR; + + public: + DEFINE_SIZE_STATIC (29); + }; + + bool get_png_extents (hb_font_t *font, + hb_codepoint_t glyph, + hb_glyph_extents_t *extents) const + { + /* Following code is safe to call even without data. + * But faster to short-circuit. */ + if (!has_data ()) + return false; + + int x_offset = 0, y_offset = 0; + unsigned int strike_ppem = 0; + hb_blob_t *blob = reference_png (font, glyph, &x_offset, &y_offset, &strike_ppem); + + const PNGHeader &png = *blob->as<PNGHeader>(); + + extents->x_bearing = x_offset; + extents->y_bearing = png.IHDR.height + y_offset; + extents->width = png.IHDR.width; + extents->height = -1 * png.IHDR.height; + + /* Convert to font units. */ + if (strike_ppem) + { + float scale = font->face->get_upem () / (float) strike_ppem; + extents->x_bearing = font->em_scalef_x (extents->x_bearing * scale); + extents->y_bearing = font->em_scalef_y (extents->y_bearing * scale); + extents->width = font->em_scalef_x (extents->width * scale); + extents->height = font->em_scalef_y (extents->height * scale); + } + else + { + extents->x_bearing = font->em_scale_x (extents->x_bearing); + extents->y_bearing = font->em_scale_y (extents->y_bearing); + extents->width = font->em_scale_x (extents->width); + extents->height = font->em_scale_y (extents->height); + } + + hb_blob_destroy (blob); + + return strike_ppem; + } + + private: + hb_blob_ptr_t<sbix> table; + + unsigned int num_glyphs; + }; + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (likely (c->check_struct (this) && + version >= 1 && + strikes.sanitize (c, this))); + } + + bool + add_strike (hb_subset_context_t *c, unsigned i) const + { + if (strikes[i].is_null () || c->source_blob->length < (unsigned) strikes[i]) + return false; + + return (this+strikes[i]).subset (c, c->source_blob->length - (unsigned) strikes[i]); + } + + bool serialize_strike_offsets (hb_subset_context_t *c) const + { + TRACE_SERIALIZE (this); + + auto *out = c->serializer->start_embed<LOffsetLArrayOf<SBIXStrike>> (); + if (unlikely (!out)) return_trace (false); + if (unlikely (!c->serializer->extend_min (out))) return_trace (false); + + hb_vector_t<LOffsetTo<SBIXStrike>*> new_strikes; + hb_vector_t<hb_serialize_context_t::objidx_t> objidxs; + for (int i = strikes.len - 1; i >= 0; --i) + { + auto* o = out->serialize_append (c->serializer); + if (unlikely (!o)) return_trace (false); + *o = 0; + auto snap = c->serializer->snapshot (); + c->serializer->push (); + bool ret = add_strike (c, i); + if (!ret) + { + c->serializer->pop_discard (); + out->pop (); + c->serializer->revert (snap); + } + else + { + objidxs.push (c->serializer->pop_pack ()); + new_strikes.push (o); + } + } + for (unsigned int i = 0; i < new_strikes.length; ++i) + c->serializer->add_link (*new_strikes[i], objidxs[new_strikes.length - 1 - i]); + + return_trace (true); + } + + bool subset (hb_subset_context_t* c) const + { + TRACE_SUBSET (this); + + sbix *sbix_prime = c->serializer->start_embed<sbix> (); + if (unlikely (!sbix_prime)) return_trace (false); + if (unlikely (!c->serializer->embed (this->version))) return_trace (false); + if (unlikely (!c->serializer->embed (this->flags))) return_trace (false); + + return_trace (serialize_strike_offsets (c)); + } + + protected: + HBUINT16 version; /* Table version number — set to 1 */ + HBUINT16 flags; /* Bit 0: Set to 1. Bit 1: Draw outlines. + * Bits 2 to 15: reserved (set to 0). */ + LOffsetLArrayOf<SBIXStrike> + strikes; /* Offsets from the beginning of the 'sbix' + * table to data for each individual bitmap strike. */ + public: + DEFINE_SIZE_ARRAY (8, strikes); +}; + +struct sbix_accelerator_t : sbix::accelerator_t {}; + +} /* namespace OT */ + +#endif /* HB_OT_COLOR_SBIX_TABLE_HH */ diff --git a/thirdparty/harfbuzz/src/hb-ot-color-svg-table.hh b/thirdparty/harfbuzz/src/hb-ot-color-svg-table.hh new file mode 100644 index 0000000000..1cc40ae53f --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-color-svg-table.hh @@ -0,0 +1,124 @@ +/* + * Copyright © 2018 Ebrahim Byagowi + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + */ + +#ifndef HB_OT_COLOR_SVG_TABLE_HH +#define HB_OT_COLOR_SVG_TABLE_HH + +#include "hb-open-type.hh" + +/* + * SVG -- SVG (Scalable Vector Graphics) + * https://docs.microsoft.com/en-us/typography/opentype/spec/svg + */ + +#define HB_OT_TAG_SVG HB_TAG('S','V','G',' ') + + +namespace OT { + + +struct SVGDocumentIndexEntry +{ + int cmp (hb_codepoint_t g) const + { return g < startGlyphID ? -1 : g > endGlyphID ? 1 : 0; } + + hb_blob_t *reference_blob (hb_blob_t *svg_blob, unsigned int index_offset) const + { + return hb_blob_create_sub_blob (svg_blob, + index_offset + (unsigned int) svgDoc, + svgDocLength); + } + + bool sanitize (hb_sanitize_context_t *c, const void *base) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && + svgDoc.sanitize (c, base, svgDocLength)); + } + + protected: + HBUINT16 startGlyphID; /* The first glyph ID in the range described by + * this index entry. */ + HBUINT16 endGlyphID; /* The last glyph ID in the range described by + * this index entry. Must be >= startGlyphID. */ + LNNOffsetTo<UnsizedArrayOf<HBUINT8>> + svgDoc; /* Offset from the beginning of the SVG Document Index + * to an SVG document. Must be non-zero. */ + HBUINT32 svgDocLength; /* Length of the SVG document. + * Must be non-zero. */ + public: + DEFINE_SIZE_STATIC (12); +}; + +struct SVG +{ + static constexpr hb_tag_t tableTag = HB_OT_TAG_SVG; + + bool has_data () const { return svgDocEntries; } + + struct accelerator_t + { + void init (hb_face_t *face) + { table = hb_sanitize_context_t ().reference_table<SVG> (face); } + void fini () { table.destroy (); } + + hb_blob_t *reference_blob_for_glyph (hb_codepoint_t glyph_id) const + { + return table->get_glyph_entry (glyph_id).reference_blob (table.get_blob (), + table->svgDocEntries); + } + + bool has_data () const { return table->has_data (); } + + private: + hb_blob_ptr_t<SVG> table; + }; + + const SVGDocumentIndexEntry &get_glyph_entry (hb_codepoint_t glyph_id) const + { return (this+svgDocEntries).bsearch (glyph_id); } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (likely (c->check_struct (this) && + (this+svgDocEntries).sanitize_shallow (c))); + } + + protected: + HBUINT16 version; /* Table version (starting at 0). */ + LOffsetTo<SortedArrayOf<SVGDocumentIndexEntry>> + svgDocEntries; /* Offset (relative to the start of the SVG table) to the + * SVG Documents Index. Must be non-zero. */ + /* Array of SVG Document Index Entries. */ + HBUINT32 reserved; /* Set to 0. */ + public: + DEFINE_SIZE_STATIC (10); +}; + +struct SVG_accelerator_t : SVG::accelerator_t {}; + +} /* namespace OT */ + + +#endif /* HB_OT_COLOR_SVG_TABLE_HH */ diff --git a/thirdparty/harfbuzz/src/hb-ot-color.cc b/thirdparty/harfbuzz/src/hb-ot-color.cc new file mode 100644 index 0000000000..0e7203a88b --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-color.cc @@ -0,0 +1,321 @@ +/* + * Copyright © 2016 Google, Inc. + * Copyright © 2018 Ebrahim Byagowi + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Sascha Brawer, Behdad Esfahbod + */ + +#include "hb.hh" + +#ifndef HB_NO_COLOR + +#include "hb-ot.h" + +#include "hb-ot-color-cbdt-table.hh" +#include "hb-ot-color-colr-table.hh" +#include "hb-ot-color-cpal-table.hh" +#include "hb-ot-color-sbix-table.hh" +#include "hb-ot-color-svg-table.hh" + +#include <stdlib.h> +#include <string.h> + + +/** + * SECTION:hb-ot-color + * @title: hb-ot-color + * @short_description: OpenType Color Fonts + * @include: hb-ot.h + * + * Functions for fetching color-font information from OpenType font faces. + * + * HarfBuzz supports `COLR`/`CPAL`, `sbix`, `CBDT`, and `SVG` color fonts. + **/ + + +/* + * CPAL + */ + + +/** + * hb_ot_color_has_palettes: + * @face: #hb_face_t to work upon + * + * Tests whether a face includes a `CPAL` color-palette table. + * + * Return value: true if data found, false otherwise + * + * Since: 2.1.0 + */ +hb_bool_t +hb_ot_color_has_palettes (hb_face_t *face) +{ + return face->table.CPAL->has_data (); +} + +/** + * hb_ot_color_palette_get_count: + * @face: #hb_face_t to work upon + * + * Fetches the number of color palettes in a face. + * + * Return value: the number of palettes found + * + * Since: 2.1.0 + */ +unsigned int +hb_ot_color_palette_get_count (hb_face_t *face) +{ + return face->table.CPAL->get_palette_count (); +} + +/** + * hb_ot_color_palette_get_name_id: + * @face: #hb_face_t to work upon + * @palette_index: The index of the color palette + * + * Fetches the `name` table Name ID that provides display names for + * a `CPAL` color palette. + * + * Palette display names can be generic (e.g., "Default") or provide + * specific, themed names (e.g., "Spring", "Summer", "Fall", and "Winter"). + * + * Return value: the Named ID found for the palette. + * If the requested palette has no name the result is #HB_OT_NAME_ID_INVALID. + * + * Since: 2.1.0 + */ +hb_ot_name_id_t +hb_ot_color_palette_get_name_id (hb_face_t *face, + unsigned int palette_index) +{ + return face->table.CPAL->get_palette_name_id (palette_index); +} + +/** + * hb_ot_color_palette_color_get_name_id: + * @face: #hb_face_t to work upon + * @color_index: The index of the color + * + * Fetches the `name` table Name ID that provides display names for + * the specificed color in a face's `CPAL` color palette. + * + * Display names can be generic (e.g., "Background") or specific + * (e.g., "Eye color"). + * + * Return value: the Name ID found for the color. + * + * Since: 2.1.0 + */ +hb_ot_name_id_t +hb_ot_color_palette_color_get_name_id (hb_face_t *face, + unsigned int color_index) +{ + return face->table.CPAL->get_color_name_id (color_index); +} + +/** + * hb_ot_color_palette_get_flags: + * @face: #hb_face_t to work upon + * @palette_index: The index of the color palette + * + * Fetches the flags defined for a color palette. + * + * Return value: the #hb_ot_color_palette_flags_t of the requested color palette + * + * Since: 2.1.0 + */ +hb_ot_color_palette_flags_t +hb_ot_color_palette_get_flags (hb_face_t *face, + unsigned int palette_index) +{ + return face->table.CPAL->get_palette_flags (palette_index); +} + +/** + * hb_ot_color_palette_get_colors: + * @face: #hb_face_t to work upon + * @palette_index: the index of the color palette to query + * @start_offset: offset of the first color to retrieve + * @color_count: (inout) (optional): Input = the maximum number of colors to return; + * Output = the actual number of colors returned (may be zero) + * @colors: (out) (array length=color_count) (nullable): The array of #hb_color_t records found + * + * Fetches a list of the colors in a color palette. + * + * After calling this function, @colors will be filled with the palette + * colors. If @colors is NULL, the function will just return the number + * of total colors without storing any actual colors; this can be used + * for allocating a buffer of suitable size before calling + * hb_ot_color_palette_get_colors() a second time. + * + * Return value: the total number of colors in the palette + * + * Since: 2.1.0 + */ +unsigned int +hb_ot_color_palette_get_colors (hb_face_t *face, + unsigned int palette_index, + unsigned int start_offset, + unsigned int *colors_count /* IN/OUT. May be NULL. */, + hb_color_t *colors /* OUT. May be NULL. */) +{ + return face->table.CPAL->get_palette_colors (palette_index, start_offset, colors_count, colors); +} + + +/* + * COLR + */ + +/** + * hb_ot_color_has_layers: + * @face: #hb_face_t to work upon + * + * Tests whether a face includes any `COLR` color layers. + * + * Return value: true if data found, false otherwise + * + * Since: 2.1.0 + */ +hb_bool_t +hb_ot_color_has_layers (hb_face_t *face) +{ + return face->table.COLR->has_data (); +} + +/** + * hb_ot_color_glyph_get_layers: + * @face: #hb_face_t to work upon + * @glyph: The glyph index to query + * @start_offset: offset of the first layer to retrieve + * @layer_count: (inout) (optional): Input = the maximum number of layers to return; + * Output = the actual number of layers returned (may be zero) + * @layers: (out) (array length=layer_count) (nullable): The array of layers found + * + * Fetches a list of all color layers for the specified glyph index in the specified + * face. The list returned will begin at the offset provided. + * + * Return value: Total number of layers available for the glyph index queried + * + * Since: 2.1.0 + */ +unsigned int +hb_ot_color_glyph_get_layers (hb_face_t *face, + hb_codepoint_t glyph, + unsigned int start_offset, + unsigned int *layer_count, /* IN/OUT. May be NULL. */ + hb_ot_color_layer_t *layers /* OUT. May be NULL. */) +{ + return face->table.COLR->get_glyph_layers (glyph, start_offset, layer_count, layers); +} + + +/* + * SVG + */ + +/** + * hb_ot_color_has_svg: + * @face: #hb_face_t to work upon. + * + * Tests whether a face includes any `SVG` glyph images. + * + * Return value: true if data found, false otherwise. + * + * Since: 2.1.0 + */ +hb_bool_t +hb_ot_color_has_svg (hb_face_t *face) +{ + return face->table.SVG->has_data (); +} + +/** + * hb_ot_color_glyph_reference_svg: + * @face: #hb_face_t to work upon + * @glyph: a svg glyph index + * + * Fetches the SVG document for a glyph. The blob may be either plain text or gzip-encoded. + * + * Return value: (transfer full): An #hb_blob_t containing the SVG document of the glyph, if available + * + * Since: 2.1.0 + */ +hb_blob_t * +hb_ot_color_glyph_reference_svg (hb_face_t *face, hb_codepoint_t glyph) +{ + return face->table.SVG->reference_blob_for_glyph (glyph); +} + + +/* + * PNG: CBDT or sbix + */ + +/** + * hb_ot_color_has_png: + * @face: #hb_face_t to work upon + * + * Tests whether a face has PNG glyph images (either in `CBDT` or `sbix` tables). + * + * Return value: true if data found, false otherwise + * + * Since: 2.1.0 + */ +hb_bool_t +hb_ot_color_has_png (hb_face_t *face) +{ + return face->table.CBDT->has_data () || face->table.sbix->has_data (); +} + +/** + * hb_ot_color_glyph_reference_png: + * @font: #hb_font_t to work upon + * @glyph: a glyph index + * + * Fetches the PNG image for a glyph. This function takes a font object, not a face object, + * as input. To get an optimally sized PNG blob, the UPEM value must be set on the @font + * object. If UPEM is unset, the blob returned will be the largest PNG available. + * + * Return value: (transfer full): An #hb_blob_t containing the PNG image for the glyph, if available + * + * Since: 2.1.0 + */ +hb_blob_t * +hb_ot_color_glyph_reference_png (hb_font_t *font, hb_codepoint_t glyph) +{ + hb_blob_t *blob = hb_blob_get_empty (); + + if (font->face->table.sbix->has_data ()) + blob = font->face->table.sbix->reference_png (font, glyph, nullptr, nullptr, nullptr); + + if (!blob->length && font->face->table.CBDT->has_data ()) + blob = font->face->table.CBDT->reference_png (font, glyph); + + return blob; +} + + +#endif diff --git a/thirdparty/harfbuzz/src/hb-ot-color.h b/thirdparty/harfbuzz/src/hb-ot-color.h new file mode 100644 index 0000000000..63ef20a1a0 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-color.h @@ -0,0 +1,139 @@ +/* + * Copyright © 2016 Google, Inc. + * Copyright © 2018 Khaled Hosny + * Copyright © 2018 Ebrahim Byagowi + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Sascha Brawer, Behdad Esfahbod + */ + +#ifndef HB_OT_H_IN +#error "Include <hb-ot.h> instead." +#endif + +#ifndef HB_OT_COLOR_H +#define HB_OT_COLOR_H + +#include "hb.h" +#include "hb-ot-name.h" + +HB_BEGIN_DECLS + + +/* + * Color palettes. + */ + +HB_EXTERN hb_bool_t +hb_ot_color_has_palettes (hb_face_t *face); + +HB_EXTERN unsigned int +hb_ot_color_palette_get_count (hb_face_t *face); + +HB_EXTERN hb_ot_name_id_t +hb_ot_color_palette_get_name_id (hb_face_t *face, + unsigned int palette_index); + +HB_EXTERN hb_ot_name_id_t +hb_ot_color_palette_color_get_name_id (hb_face_t *face, + unsigned int color_index); + +/** + * hb_ot_color_palette_flags_t: + * @HB_OT_COLOR_PALETTE_FLAG_DEFAULT: Default indicating that there is nothing special + * to note about a color palette. + * @HB_OT_COLOR_PALETTE_FLAG_USABLE_WITH_LIGHT_BACKGROUND: Flag indicating that the color + * palette is appropriate to use when displaying the font on a light background such as white. + * @HB_OT_COLOR_PALETTE_FLAG_USABLE_WITH_DARK_BACKGROUND: Flag indicating that the color + * palette is appropriate to use when displaying the font on a dark background such as black. + * + * Since: 2.1.0 + */ +typedef enum { /*< flags >*/ + HB_OT_COLOR_PALETTE_FLAG_DEFAULT = 0x00000000u, + HB_OT_COLOR_PALETTE_FLAG_USABLE_WITH_LIGHT_BACKGROUND = 0x00000001u, + HB_OT_COLOR_PALETTE_FLAG_USABLE_WITH_DARK_BACKGROUND = 0x00000002u +} hb_ot_color_palette_flags_t; + +HB_EXTERN hb_ot_color_palette_flags_t +hb_ot_color_palette_get_flags (hb_face_t *face, + unsigned int palette_index); + +HB_EXTERN unsigned int +hb_ot_color_palette_get_colors (hb_face_t *face, + unsigned int palette_index, + unsigned int start_offset, + unsigned int *color_count, /* IN/OUT. May be NULL. */ + hb_color_t *colors /* OUT. May be NULL. */); + + +/* + * Color layers. + */ + +HB_EXTERN hb_bool_t +hb_ot_color_has_layers (hb_face_t *face); + +/** + * hb_ot_color_layer_t: + * + * Pairs of glyph and color index. + * + * Since: 2.1.0 + **/ +typedef struct hb_ot_color_layer_t +{ + hb_codepoint_t glyph; + unsigned int color_index; +} hb_ot_color_layer_t; + +HB_EXTERN unsigned int +hb_ot_color_glyph_get_layers (hb_face_t *face, + hb_codepoint_t glyph, + unsigned int start_offset, + unsigned int *layer_count, /* IN/OUT. May be NULL. */ + hb_ot_color_layer_t *layers /* OUT. May be NULL. */); + +/* + * SVG + */ + +HB_EXTERN hb_bool_t +hb_ot_color_has_svg (hb_face_t *face); + +HB_EXTERN hb_blob_t * +hb_ot_color_glyph_reference_svg (hb_face_t *face, hb_codepoint_t glyph); + +/* + * PNG: CBDT or sbix + */ + +HB_EXTERN hb_bool_t +hb_ot_color_has_png (hb_face_t *face); + +HB_EXTERN hb_blob_t * +hb_ot_color_glyph_reference_png (hb_font_t *font, hb_codepoint_t glyph); + + +HB_END_DECLS + +#endif /* HB_OT_COLOR_H */ diff --git a/thirdparty/harfbuzz/src/hb-ot-deprecated.h b/thirdparty/harfbuzz/src/hb-ot-deprecated.h new file mode 100644 index 0000000000..bc72f8a701 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-deprecated.h @@ -0,0 +1,111 @@ +/* + * Copyright © 2018 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_OT_H_IN +#error "Include <hb-ot.h> instead." +#endif + +#ifndef HB_OT_DEPRECATED_H +#define HB_OT_DEPRECATED_H + +#include "hb.h" +#include "hb-ot-name.h" + + +HB_BEGIN_DECLS + +#ifndef HB_DISABLE_DEPRECATED + + +/* https://github.com/harfbuzz/harfbuzz/issues/1734 */ +#define HB_MATH_GLYPH_PART_FLAG_EXTENDER HB_OT_MATH_GLYPH_PART_FLAG_EXTENDER + + +/* Like hb_ot_layout_table_find_script, but takes zero-terminated array of scripts to test */ +HB_EXTERN HB_DEPRECATED_FOR (hb_ot_layout_table_select_script) hb_bool_t +hb_ot_layout_table_choose_script (hb_face_t *face, + hb_tag_t table_tag, + const hb_tag_t *script_tags, + unsigned int *script_index, + hb_tag_t *chosen_script); + +HB_EXTERN HB_DEPRECATED_FOR (hb_ot_layout_script_select_language) hb_bool_t +hb_ot_layout_script_find_language (hb_face_t *face, + hb_tag_t table_tag, + unsigned int script_index, + hb_tag_t language_tag, + unsigned int *language_index); + +HB_EXTERN HB_DEPRECATED_FOR (hb_ot_tags_from_script_and_language) void +hb_ot_tags_from_script (hb_script_t script, + hb_tag_t *script_tag_1, + hb_tag_t *script_tag_2); + +HB_EXTERN HB_DEPRECATED_FOR (hb_ot_tags_from_script_and_language) hb_tag_t +hb_ot_tag_from_language (hb_language_t language); + + +/** + * HB_OT_VAR_NO_AXIS_INDEX: + * + * Since: 1.4.2 + * Deprecated: 2.2.0 + */ +#define HB_OT_VAR_NO_AXIS_INDEX 0xFFFFFFFFu + +/** + * hb_ot_var_axis_t: + * + * Since: 1.4.2 + * Deprecated: 2.2.0 + */ +typedef struct hb_ot_var_axis_t +{ + hb_tag_t tag; + hb_ot_name_id_t name_id; + float min_value; + float default_value; + float max_value; +} hb_ot_var_axis_t; + +HB_EXTERN HB_DEPRECATED_FOR (hb_ot_var_get_axis_infos) unsigned int +hb_ot_var_get_axes (hb_face_t *face, + unsigned int start_offset, + unsigned int *axes_count /* IN/OUT */, + hb_ot_var_axis_t *axes_array /* OUT */); + +HB_EXTERN HB_DEPRECATED_FOR (hb_ot_var_find_axis_info) hb_bool_t +hb_ot_var_find_axis (hb_face_t *face, + hb_tag_t axis_tag, + unsigned int *axis_index, + hb_ot_var_axis_t *axis_info); + + +#endif + +HB_END_DECLS + +#endif /* HB_OT_DEPRECATED_H */ diff --git a/thirdparty/harfbuzz/src/hb-ot-face-table-list.hh b/thirdparty/harfbuzz/src/hb-ot-face-table-list.hh new file mode 100644 index 0000000000..367e143fdf --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-face-table-list.hh @@ -0,0 +1,138 @@ +/* + * Copyright © 2007,2008,2009 Red Hat, Inc. + * Copyright © 2012,2013 Google, Inc. + * Copyright © 2019, Facebook Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Red Hat Author(s): Behdad Esfahbod + * Google Author(s): Behdad Esfahbod + * Facebook Author(s): Behdad Esfahbod + */ + +#ifndef HB_OT_FACE_TABLE_LIST_HH +#define HB_OT_FACE_TABLE_LIST_HH +#endif /* HB_OT_FACE_TABLE_LIST_HH */ /* Dummy header guards */ + +#ifndef HB_OT_ACCELERATOR +#define HB_OT_ACCELERATOR(Namespace, Type) HB_OT_TABLE (Namespace, Type) +#define _HB_OT_ACCELERATOR_UNDEF +#endif + + +/* This lists font tables that the hb_face_t will contain and lazily + * load. Don't add a table unless it's used though. This is not + * exactly free. */ + +/* v--- Add new tables in the right place here. */ + + +/* OpenType fundamentals. */ +HB_OT_TABLE (OT, head) +#if !defined(HB_NO_FACE_COLLECT_UNICODES) || !defined(HB_NO_OT_FONT) +HB_OT_ACCELERATOR (OT, cmap) +#endif +HB_OT_TABLE (OT, hhea) +HB_OT_ACCELERATOR (OT, hmtx) +HB_OT_TABLE (OT, OS2) +#if !defined(HB_NO_OT_FONT_GLYPH_NAMES) || !defined(HB_NO_METRICS) || !defined(HB_NO_STYLE) +HB_OT_ACCELERATOR (OT, post) +#endif +#ifndef HB_NO_NAME +HB_OT_ACCELERATOR (OT, name) +#endif +#ifndef HB_NO_STYLE +HB_OT_TABLE (OT, STAT) +#endif +#ifndef HB_NO_META +HB_OT_ACCELERATOR (OT, meta) +#endif + +/* Vertical layout. */ +HB_OT_TABLE (OT, vhea) +HB_OT_ACCELERATOR (OT, vmtx) + +/* TrueType outlines. */ +HB_OT_ACCELERATOR (OT, glyf) + +/* CFF outlines. */ +#ifndef HB_NO_CFF +HB_OT_ACCELERATOR (OT, cff1) +HB_OT_ACCELERATOR (OT, cff2) +HB_OT_TABLE (OT, VORG) +#endif + +/* OpenType variations. */ +#ifndef HB_NO_VAR +HB_OT_TABLE (OT, fvar) +HB_OT_TABLE (OT, avar) +HB_OT_ACCELERATOR (OT, gvar) +HB_OT_TABLE (OT, MVAR) +#endif + +/* Legacy kern. */ +#ifndef HB_NO_OT_KERN +HB_OT_TABLE (OT, kern) +#endif + +/* OpenType shaping. */ +#ifndef HB_NO_OT_LAYOUT +HB_OT_ACCELERATOR (OT, GDEF) +HB_OT_ACCELERATOR (OT, GSUB) +HB_OT_ACCELERATOR (OT, GPOS) +//HB_OT_TABLE (OT, JSTF) +#endif + +/* OpenType baseline. */ +#ifndef HB_NO_BASE +HB_OT_TABLE (OT, BASE) +#endif + +/* AAT shaping. */ +#ifndef HB_NO_AAT +HB_OT_TABLE (AAT, morx) +HB_OT_TABLE (AAT, mort) +HB_OT_TABLE (AAT, kerx) +HB_OT_TABLE (AAT, ankr) +HB_OT_TABLE (AAT, trak) +HB_OT_TABLE (AAT, ltag) +HB_OT_TABLE (AAT, feat) +// HB_OT_TABLE (AAT, opbd) +#endif + +/* OpenType color fonts. */ +#ifndef HB_NO_COLOR +HB_OT_TABLE (OT, COLR) +HB_OT_TABLE (OT, CPAL) +HB_OT_ACCELERATOR (OT, CBDT) +HB_OT_ACCELERATOR (OT, sbix) +HB_OT_ACCELERATOR (OT, SVG) +#endif + +/* OpenType math. */ +#ifndef HB_NO_MATH +HB_OT_TABLE (OT, MATH) +#endif + + +#ifdef _HB_OT_ACCELERATOR_UNDEF +#undef HB_OT_ACCELERATOR +#endif diff --git a/thirdparty/harfbuzz/src/hb-ot-face.cc b/thirdparty/harfbuzz/src/hb-ot-face.cc new file mode 100644 index 0000000000..5ef8df43ce --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-face.cc @@ -0,0 +1,58 @@ +/* + * Copyright © 2018 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#include "hb-ot-face.hh" + +#include "hb-ot-cmap-table.hh" +#include "hb-ot-glyf-table.hh" +#include "hb-ot-cff1-table.hh" +#include "hb-ot-cff2-table.hh" +#include "hb-ot-hmtx-table.hh" +#include "hb-ot-kern-table.hh" +#include "hb-ot-meta-table.hh" +#include "hb-ot-name-table.hh" +#include "hb-ot-post-table.hh" +#include "hb-ot-color-cbdt-table.hh" +#include "hb-ot-color-sbix-table.hh" +#include "hb-ot-color-svg-table.hh" +#include "hb-ot-layout-gdef-table.hh" +#include "hb-ot-layout-gsub-table.hh" +#include "hb-ot-layout-gpos-table.hh" + + +void hb_ot_face_t::init0 (hb_face_t *face) +{ + this->face = face; +#define HB_OT_TABLE(Namespace, Type) Type.init0 (); +#include "hb-ot-face-table-list.hh" +#undef HB_OT_TABLE +} +void hb_ot_face_t::fini () +{ +#define HB_OT_TABLE(Namespace, Type) Type.fini (); +#include "hb-ot-face-table-list.hh" +#undef HB_OT_TABLE +} diff --git a/thirdparty/harfbuzz/src/hb-ot-face.hh b/thirdparty/harfbuzz/src/hb-ot-face.hh new file mode 100644 index 0000000000..e24d380bca --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-face.hh @@ -0,0 +1,74 @@ +/* + * Copyright © 2007,2008,2009 Red Hat, Inc. + * Copyright © 2012,2013 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Red Hat Author(s): Behdad Esfahbod + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_OT_FACE_HH +#define HB_OT_FACE_HH + +#include "hb.hh" + +#include "hb-machinery.hh" + + +/* + * hb_ot_face_t + */ + +/* Declare tables. */ +#define HB_OT_TABLE(Namespace, Type) namespace Namespace { struct Type; } +#define HB_OT_ACCELERATOR(Namespace, Type) HB_OT_TABLE (Namespace, Type##_accelerator_t) +#include "hb-ot-face-table-list.hh" +#undef HB_OT_ACCELERATOR +#undef HB_OT_TABLE + +struct hb_ot_face_t +{ + HB_INTERNAL void init0 (hb_face_t *face); + HB_INTERNAL void fini (); + +#define HB_OT_TABLE_ORDER(Namespace, Type) \ + HB_PASTE (ORDER_, HB_PASTE (Namespace, HB_PASTE (_, Type))) + enum order_t + { + ORDER_ZERO, +#define HB_OT_TABLE(Namespace, Type) HB_OT_TABLE_ORDER (Namespace, Type), +#include "hb-ot-face-table-list.hh" +#undef HB_OT_TABLE + }; + + hb_face_t *face; /* MUST be JUST before the lazy loaders. */ +#define HB_OT_TABLE(Namespace, Type) \ + hb_table_lazy_loader_t<Namespace::Type, HB_OT_TABLE_ORDER (Namespace, Type)> Type; +#define HB_OT_ACCELERATOR(Namespace, Type) \ + hb_face_lazy_loader_t<Namespace::Type##_accelerator_t, HB_OT_TABLE_ORDER (Namespace, Type)> Type; +#include "hb-ot-face-table-list.hh" +#undef HB_OT_ACCELERATOR +#undef HB_OT_TABLE +}; + + +#endif /* HB_OT_FACE_HH */ diff --git a/thirdparty/harfbuzz/src/hb-ot-font.cc b/thirdparty/harfbuzz/src/hb-ot-font.cc new file mode 100644 index 0000000000..a1dc88603a --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-font.cc @@ -0,0 +1,336 @@ +/* + * Copyright © 2011,2014 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod, Roozbeh Pournader + */ + +#include "hb.hh" + +#ifndef HB_NO_OT_FONT + +#include "hb-ot.h" + +#include "hb-font.hh" +#include "hb-machinery.hh" +#include "hb-ot-face.hh" + +#include "hb-ot-cmap-table.hh" +#include "hb-ot-glyf-table.hh" +#include "hb-ot-cff1-table.hh" +#include "hb-ot-cff2-table.hh" +#include "hb-ot-hmtx-table.hh" +#include "hb-ot-os2-table.hh" +#include "hb-ot-post-table.hh" +#include "hb-ot-stat-table.hh" // Just so we compile it; unused otherwise. +#include "hb-ot-vorg-table.hh" +#include "hb-ot-color-cbdt-table.hh" +#include "hb-ot-color-sbix-table.hh" + + +/** + * SECTION:hb-ot-font + * @title: hb-ot-font + * @short_description: OpenType font implementation + * @include: hb-ot.h + * + * Functions for using OpenType fonts with hb_shape(). Note that fonts returned + * by hb_font_create() default to using these functions, so most clients would + * never need to call these functions directly. + **/ + + +static hb_bool_t +hb_ot_get_nominal_glyph (hb_font_t *font HB_UNUSED, + void *font_data, + hb_codepoint_t unicode, + hb_codepoint_t *glyph, + void *user_data HB_UNUSED) +{ + const hb_ot_face_t *ot_face = (const hb_ot_face_t *) font_data; + return ot_face->cmap->get_nominal_glyph (unicode, glyph); +} + +static unsigned int +hb_ot_get_nominal_glyphs (hb_font_t *font HB_UNUSED, + void *font_data, + unsigned int count, + const hb_codepoint_t *first_unicode, + unsigned int unicode_stride, + hb_codepoint_t *first_glyph, + unsigned int glyph_stride, + void *user_data HB_UNUSED) +{ + const hb_ot_face_t *ot_face = (const hb_ot_face_t *) font_data; + return ot_face->cmap->get_nominal_glyphs (count, + first_unicode, unicode_stride, + first_glyph, glyph_stride); +} + +static hb_bool_t +hb_ot_get_variation_glyph (hb_font_t *font HB_UNUSED, + void *font_data, + hb_codepoint_t unicode, + hb_codepoint_t variation_selector, + hb_codepoint_t *glyph, + void *user_data HB_UNUSED) +{ + const hb_ot_face_t *ot_face = (const hb_ot_face_t *) font_data; + return ot_face->cmap->get_variation_glyph (unicode, variation_selector, glyph); +} + +static void +hb_ot_get_glyph_h_advances (hb_font_t* font, void* font_data, + unsigned count, + const hb_codepoint_t *first_glyph, + unsigned glyph_stride, + hb_position_t *first_advance, + unsigned advance_stride, + void *user_data HB_UNUSED) +{ + const hb_ot_face_t *ot_face = (const hb_ot_face_t *) font_data; + const OT::hmtx_accelerator_t &hmtx = *ot_face->hmtx; + + for (unsigned int i = 0; i < count; i++) + { + *first_advance = font->em_scale_x (hmtx.get_advance (*first_glyph, font)); + first_glyph = &StructAtOffsetUnaligned<hb_codepoint_t> (first_glyph, glyph_stride); + first_advance = &StructAtOffsetUnaligned<hb_position_t> (first_advance, advance_stride); + } +} + +static void +hb_ot_get_glyph_v_advances (hb_font_t* font, void* font_data, + unsigned count, + const hb_codepoint_t *first_glyph, + unsigned glyph_stride, + hb_position_t *first_advance, + unsigned advance_stride, + void *user_data HB_UNUSED) +{ + const hb_ot_face_t *ot_face = (const hb_ot_face_t *) font_data; + const OT::vmtx_accelerator_t &vmtx = *ot_face->vmtx; + + for (unsigned int i = 0; i < count; i++) + { + *first_advance = font->em_scale_y (-(int) vmtx.get_advance (*first_glyph, font)); + first_glyph = &StructAtOffsetUnaligned<hb_codepoint_t> (first_glyph, glyph_stride); + first_advance = &StructAtOffsetUnaligned<hb_position_t> (first_advance, advance_stride); + } +} + +static hb_bool_t +hb_ot_get_glyph_v_origin (hb_font_t *font, + void *font_data, + hb_codepoint_t glyph, + hb_position_t *x, + hb_position_t *y, + void *user_data HB_UNUSED) +{ + const hb_ot_face_t *ot_face = (const hb_ot_face_t *) font_data; + + *x = font->get_glyph_h_advance (glyph) / 2; + +#ifndef HB_NO_OT_FONT_CFF + const OT::VORG &VORG = *ot_face->VORG; + if (VORG.has_data ()) + { + *y = font->em_scale_y (VORG.get_y_origin (glyph)); + return true; + } +#endif + + hb_glyph_extents_t extents = {0}; + if (ot_face->glyf->get_extents (font, glyph, &extents)) + { + const OT::vmtx_accelerator_t &vmtx = *ot_face->vmtx; + hb_position_t tsb = vmtx.get_side_bearing (font, glyph); + *y = extents.y_bearing + font->em_scale_y (tsb); + return true; + } + + hb_font_extents_t font_extents; + font->get_h_extents_with_fallback (&font_extents); + *y = font_extents.ascender; + + return true; +} + +static hb_bool_t +hb_ot_get_glyph_extents (hb_font_t *font, + void *font_data, + hb_codepoint_t glyph, + hb_glyph_extents_t *extents, + void *user_data HB_UNUSED) +{ + const hb_ot_face_t *ot_face = (const hb_ot_face_t *) font_data; + +#if !defined(HB_NO_OT_FONT_BITMAP) && !defined(HB_NO_COLOR) + if (ot_face->sbix->get_extents (font, glyph, extents)) return true; +#endif + if (ot_face->glyf->get_extents (font, glyph, extents)) return true; +#ifndef HB_NO_OT_FONT_CFF + if (ot_face->cff1->get_extents (font, glyph, extents)) return true; + if (ot_face->cff2->get_extents (font, glyph, extents)) return true; +#endif +#if !defined(HB_NO_OT_FONT_BITMAP) && !defined(HB_NO_COLOR) + if (ot_face->CBDT->get_extents (font, glyph, extents)) return true; +#endif + + // TODO Hook up side-bearings variations. + return false; +} + +#ifndef HB_NO_OT_FONT_GLYPH_NAMES +static hb_bool_t +hb_ot_get_glyph_name (hb_font_t *font HB_UNUSED, + void *font_data, + hb_codepoint_t glyph, + char *name, unsigned int size, + void *user_data HB_UNUSED) +{ + const hb_ot_face_t *ot_face = (const hb_ot_face_t *) font_data; + if (ot_face->post->get_glyph_name (glyph, name, size)) return true; +#ifndef HB_NO_OT_FONT_CFF + if (ot_face->cff1->get_glyph_name (glyph, name, size)) return true; +#endif + return false; +} +static hb_bool_t +hb_ot_get_glyph_from_name (hb_font_t *font HB_UNUSED, + void *font_data, + const char *name, int len, + hb_codepoint_t *glyph, + void *user_data HB_UNUSED) +{ + const hb_ot_face_t *ot_face = (const hb_ot_face_t *) font_data; + if (ot_face->post->get_glyph_from_name (name, len, glyph)) return true; +#ifndef HB_NO_OT_FONT_CFF + if (ot_face->cff1->get_glyph_from_name (name, len, glyph)) return true; +#endif + return false; +} +#endif + +static hb_bool_t +hb_ot_get_font_h_extents (hb_font_t *font, + void *font_data HB_UNUSED, + hb_font_extents_t *metrics, + void *user_data HB_UNUSED) +{ + return _hb_ot_metrics_get_position_common (font, HB_OT_METRICS_TAG_HORIZONTAL_ASCENDER, &metrics->ascender) && + _hb_ot_metrics_get_position_common (font, HB_OT_METRICS_TAG_HORIZONTAL_DESCENDER, &metrics->descender) && + _hb_ot_metrics_get_position_common (font, HB_OT_METRICS_TAG_HORIZONTAL_LINE_GAP, &metrics->line_gap); +} + +static hb_bool_t +hb_ot_get_font_v_extents (hb_font_t *font, + void *font_data HB_UNUSED, + hb_font_extents_t *metrics, + void *user_data HB_UNUSED) +{ + return _hb_ot_metrics_get_position_common (font, HB_OT_METRICS_TAG_VERTICAL_ASCENDER, &metrics->ascender) && + _hb_ot_metrics_get_position_common (font, HB_OT_METRICS_TAG_VERTICAL_DESCENDER, &metrics->descender) && + _hb_ot_metrics_get_position_common (font, HB_OT_METRICS_TAG_VERTICAL_LINE_GAP, &metrics->line_gap); +} + +#if HB_USE_ATEXIT +static void free_static_ot_funcs (); +#endif + +static struct hb_ot_font_funcs_lazy_loader_t : hb_font_funcs_lazy_loader_t<hb_ot_font_funcs_lazy_loader_t> +{ + static hb_font_funcs_t *create () + { + hb_font_funcs_t *funcs = hb_font_funcs_create (); + + hb_font_funcs_set_font_h_extents_func (funcs, hb_ot_get_font_h_extents, nullptr, nullptr); + hb_font_funcs_set_font_v_extents_func (funcs, hb_ot_get_font_v_extents, nullptr, nullptr); + hb_font_funcs_set_nominal_glyph_func (funcs, hb_ot_get_nominal_glyph, nullptr, nullptr); + hb_font_funcs_set_nominal_glyphs_func (funcs, hb_ot_get_nominal_glyphs, nullptr, nullptr); + hb_font_funcs_set_variation_glyph_func (funcs, hb_ot_get_variation_glyph, nullptr, nullptr); + hb_font_funcs_set_glyph_h_advances_func (funcs, hb_ot_get_glyph_h_advances, nullptr, nullptr); + hb_font_funcs_set_glyph_v_advances_func (funcs, hb_ot_get_glyph_v_advances, nullptr, nullptr); + //hb_font_funcs_set_glyph_h_origin_func (funcs, hb_ot_get_glyph_h_origin, nullptr, nullptr); + hb_font_funcs_set_glyph_v_origin_func (funcs, hb_ot_get_glyph_v_origin, nullptr, nullptr); + hb_font_funcs_set_glyph_extents_func (funcs, hb_ot_get_glyph_extents, nullptr, nullptr); + //hb_font_funcs_set_glyph_contour_point_func (funcs, hb_ot_get_glyph_contour_point, nullptr, nullptr); +#ifndef HB_NO_OT_FONT_GLYPH_NAMES + hb_font_funcs_set_glyph_name_func (funcs, hb_ot_get_glyph_name, nullptr, nullptr); + hb_font_funcs_set_glyph_from_name_func (funcs, hb_ot_get_glyph_from_name, nullptr, nullptr); +#endif + + hb_font_funcs_make_immutable (funcs); + +#if HB_USE_ATEXIT + atexit (free_static_ot_funcs); +#endif + + return funcs; + } +} static_ot_funcs; + +#if HB_USE_ATEXIT +static +void free_static_ot_funcs () +{ + static_ot_funcs.free_instance (); +} +#endif + +static hb_font_funcs_t * +_hb_ot_get_font_funcs () +{ + return static_ot_funcs.get_unconst (); +} + + +/** + * hb_ot_font_set_funcs: + * + * Since: 0.9.28 + **/ +void +hb_ot_font_set_funcs (hb_font_t *font) +{ + hb_font_set_funcs (font, + _hb_ot_get_font_funcs (), + &font->face->table, + nullptr); +} + +#ifndef HB_NO_VAR +int +_glyf_get_side_bearing_var (hb_font_t *font, hb_codepoint_t glyph, bool is_vertical) +{ + return font->face->table.glyf->get_side_bearing_var (font, glyph, is_vertical); +} + +unsigned +_glyf_get_advance_var (hb_font_t *font, hb_codepoint_t glyph, bool is_vertical) +{ + return font->face->table.glyf->get_advance_var (font, glyph, is_vertical); +} +#endif + + +#endif diff --git a/thirdparty/harfbuzz/src/hb-ot-font.h b/thirdparty/harfbuzz/src/hb-ot-font.h new file mode 100644 index 0000000000..80eaa54b1a --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-font.h @@ -0,0 +1,45 @@ +/* + * Copyright © 2014 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod, Roozbeh Pournader + */ + +#ifndef HB_OT_H_IN +#error "Include <hb-ot.h> instead." +#endif + +#ifndef HB_OT_FONT_H +#define HB_OT_FONT_H + +#include "hb.h" + +HB_BEGIN_DECLS + + +HB_EXTERN void +hb_ot_font_set_funcs (hb_font_t *font); + + +HB_END_DECLS + +#endif /* HB_OT_FONT_H */ diff --git a/thirdparty/harfbuzz/src/hb-ot-gasp-table.hh b/thirdparty/harfbuzz/src/hb-ot-gasp-table.hh new file mode 100644 index 0000000000..4f291924af --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-gasp-table.hh @@ -0,0 +1,84 @@ +/* + * Copyright © 2018 Ebrahim Byagowi + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + */ + +#ifndef HB_OT_GASP_TABLE_HH +#define HB_OT_GASP_TABLE_HH + +#include "hb-open-type.hh" +#include "hb-ot-hhea-table.hh" +#include "hb-ot-os2-table.hh" +#include "hb-ot-var-hvar-table.hh" + +/* + * gasp -- Grid-fitting and Scan-conversion Procedure + * https://docs.microsoft.com/en-us/typography/opentype/spec/gasp + */ +#define HB_OT_TAG_gasp HB_TAG('g','a','s','p') + + +namespace OT { + +struct GaspRange +{ + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this)); + } + + public: + HBUINT16 rangeMaxPPEM; /* Upper limit of range, in PPEM */ + HBUINT16 rangeGaspBehavior; + /* Flags describing desired rasterizer behavior. */ + public: + DEFINE_SIZE_STATIC (4); +}; + +struct gasp +{ + static constexpr hb_tag_t tableTag = HB_OT_TAG_gasp; + + const GaspRange &get_gasp_range (unsigned int i) const + { return gaspRanges[i]; } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && + gaspRanges.sanitize (c)); + } + + protected: + HBUINT16 version; /* Version number (set to 1) */ + ArrayOf<GaspRange> + gaspRanges; /* Number of records to follow + * Sorted by ppem */ + public: + DEFINE_SIZE_ARRAY (4, gaspRanges); +}; + +} /* namespace OT */ + + +#endif /* HB_OT_GASP_TABLE_HH */ diff --git a/thirdparty/harfbuzz/src/hb-ot-glyf-table.hh b/thirdparty/harfbuzz/src/hb-ot-glyf-table.hh new file mode 100644 index 0000000000..5470bd96da --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-glyf-table.hh @@ -0,0 +1,1261 @@ +/* + * Copyright © 2015 Google, Inc. + * Copyright © 2019 Adobe Inc. + * Copyright © 2019 Ebrahim Byagowi + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod, Garret Rieger, Roderick Sheeter + * Adobe Author(s): Michiharu Ariza + */ + +#ifndef HB_OT_GLYF_TABLE_HH +#define HB_OT_GLYF_TABLE_HH + +#include "hb-open-type.hh" +#include "hb-ot-head-table.hh" +#include "hb-ot-hmtx-table.hh" +#include "hb-ot-var-gvar-table.hh" +#include "hb-draw.hh" + +namespace OT { + + +/* + * loca -- Index to Location + * https://docs.microsoft.com/en-us/typography/opentype/spec/loca + */ +#define HB_OT_TAG_loca HB_TAG('l','o','c','a') + + +struct loca +{ + friend struct glyf; + + static constexpr hb_tag_t tableTag = HB_OT_TAG_loca; + + bool sanitize (hb_sanitize_context_t *c HB_UNUSED) const + { + TRACE_SANITIZE (this); + return_trace (true); + } + + protected: + UnsizedArrayOf<HBUINT8> + dataZ; /* Location data. */ + public: + DEFINE_SIZE_MIN (0); /* In reality, this is UNBOUNDED() type; but since we always + * check the size externally, allow Null() object of it by + * defining it _MIN instead. */ +}; + + +/* + * glyf -- TrueType Glyph Data + * https://docs.microsoft.com/en-us/typography/opentype/spec/glyf + */ +#define HB_OT_TAG_glyf HB_TAG('g','l','y','f') + + +struct glyf +{ + static constexpr hb_tag_t tableTag = HB_OT_TAG_glyf; + + bool sanitize (hb_sanitize_context_t *c HB_UNUSED) const + { + TRACE_SANITIZE (this); + /* Runtime checks as eager sanitizing each glyph is costy */ + return_trace (true); + } + + template<typename Iterator, + hb_requires (hb_is_source_of (Iterator, unsigned int))> + static bool + _add_loca_and_head (hb_subset_plan_t * plan, Iterator padded_offsets) + { + unsigned max_offset = + + padded_offsets + | hb_reduce (hb_add, 0) + ; + unsigned num_offsets = padded_offsets.len () + 1; + bool use_short_loca = max_offset < 0x1FFFF; + unsigned entry_size = use_short_loca ? 2 : 4; + char *loca_prime_data = (char *) calloc (entry_size, num_offsets); + + if (unlikely (!loca_prime_data)) return false; + + DEBUG_MSG (SUBSET, nullptr, "loca entry_size %d num_offsets %d " + "max_offset %d size %d", + entry_size, num_offsets, max_offset, entry_size * num_offsets); + + if (use_short_loca) + _write_loca (padded_offsets, 1, hb_array ((HBUINT16 *) loca_prime_data, num_offsets)); + else + _write_loca (padded_offsets, 0, hb_array ((HBUINT32 *) loca_prime_data, num_offsets)); + + hb_blob_t *loca_blob = hb_blob_create (loca_prime_data, + entry_size * num_offsets, + HB_MEMORY_MODE_WRITABLE, + loca_prime_data, + free); + + bool result = plan->add_table (HB_OT_TAG_loca, loca_blob) + && _add_head_and_set_loca_version (plan, use_short_loca); + + hb_blob_destroy (loca_blob); + return result; + } + + template<typename IteratorIn, typename IteratorOut, + hb_requires (hb_is_source_of (IteratorIn, unsigned int)), + hb_requires (hb_is_sink_of (IteratorOut, unsigned))> + static void + _write_loca (IteratorIn it, unsigned right_shift, IteratorOut dest) + { + unsigned int offset = 0; + dest << 0; + + it + | hb_map ([=, &offset] (unsigned int padded_size) + { + offset += padded_size; + DEBUG_MSG (SUBSET, nullptr, "loca entry offset %d", offset); + return offset >> right_shift; + }) + | hb_sink (dest) + ; + } + + /* requires source of SubsetGlyph complains the identifier isn't declared */ + template <typename Iterator> + bool serialize (hb_serialize_context_t *c, + Iterator it, + const hb_subset_plan_t *plan) + { + TRACE_SERIALIZE (this); + unsigned init_len = c->length (); + for (const auto &_ : it) _.serialize (c, plan); + + /* As a special case when all glyph in the font are empty, add a zero byte + * to the table, so that OTS doesn’t reject it, and to make the table work + * on Windows as well. + * See https://github.com/khaledhosny/ots/issues/52 */ + if (init_len == c->length ()) + { + HBUINT8 empty_byte; + empty_byte = 0; + c->copy (empty_byte); + } + return_trace (true); + } + + /* Byte region(s) per glyph to output + unpadded, hints removed if so requested + If we fail to process a glyph we produce an empty (0-length) glyph */ + bool subset (hb_subset_context_t *c) const + { + TRACE_SUBSET (this); + + glyf *glyf_prime = c->serializer->start_embed <glyf> (); + if (unlikely (!c->serializer->check_success (glyf_prime))) return_trace (false); + + hb_vector_t<SubsetGlyph> glyphs; + _populate_subset_glyphs (c->plan, &glyphs); + + glyf_prime->serialize (c->serializer, hb_iter (glyphs), c->plan); + + auto padded_offsets = + + hb_iter (glyphs) + | hb_map (&SubsetGlyph::padded_size) + ; + + if (c->serializer->in_error ()) return_trace (false); + return_trace (c->serializer->check_success (_add_loca_and_head (c->plan, + padded_offsets))); + } + + template <typename SubsetGlyph> + void + _populate_subset_glyphs (const hb_subset_plan_t *plan, + hb_vector_t<SubsetGlyph> *glyphs /* OUT */) const + { + OT::glyf::accelerator_t glyf; + glyf.init (plan->source); + + + hb_range (plan->num_output_glyphs ()) + | hb_map ([&] (hb_codepoint_t new_gid) + { + SubsetGlyph subset_glyph = {0}; + subset_glyph.new_gid = new_gid; + + /* should never fail: all old gids should be mapped */ + if (!plan->old_gid_for_new_gid (new_gid, &subset_glyph.old_gid)) + return subset_glyph; + + subset_glyph.source_glyph = glyf.glyph_for_gid (subset_glyph.old_gid, true); + if (plan->drop_hints) subset_glyph.drop_hints_bytes (); + else subset_glyph.dest_start = subset_glyph.source_glyph.get_bytes (); + + return subset_glyph; + }) + | hb_sink (glyphs) + ; + + glyf.fini (); + } + + static bool + _add_head_and_set_loca_version (hb_subset_plan_t *plan, bool use_short_loca) + { + hb_blob_t *head_blob = hb_sanitize_context_t ().reference_table<head> (plan->source); + hb_blob_t *head_prime_blob = hb_blob_copy_writable_or_fail (head_blob); + hb_blob_destroy (head_blob); + + if (unlikely (!head_prime_blob)) + return false; + + head *head_prime = (head *) hb_blob_get_data_writable (head_prime_blob, nullptr); + head_prime->indexToLocFormat = use_short_loca ? 0 : 1; + bool success = plan->add_table (HB_OT_TAG_head, head_prime_blob); + + hb_blob_destroy (head_prime_blob); + return success; + } + + struct CompositeGlyphChain + { + protected: + enum composite_glyph_flag_t + { + ARG_1_AND_2_ARE_WORDS = 0x0001, + ARGS_ARE_XY_VALUES = 0x0002, + ROUND_XY_TO_GRID = 0x0004, + WE_HAVE_A_SCALE = 0x0008, + MORE_COMPONENTS = 0x0020, + WE_HAVE_AN_X_AND_Y_SCALE = 0x0040, + WE_HAVE_A_TWO_BY_TWO = 0x0080, + WE_HAVE_INSTRUCTIONS = 0x0100, + USE_MY_METRICS = 0x0200, + OVERLAP_COMPOUND = 0x0400, + SCALED_COMPONENT_OFFSET = 0x0800, + UNSCALED_COMPONENT_OFFSET = 0x1000 + }; + + public: + unsigned int get_size () const + { + unsigned int size = min_size; + /* arg1 and 2 are int16 */ + if (flags & ARG_1_AND_2_ARE_WORDS) size += 4; + /* arg1 and 2 are int8 */ + else size += 2; + + /* One x 16 bit (scale) */ + if (flags & WE_HAVE_A_SCALE) size += 2; + /* Two x 16 bit (xscale, yscale) */ + else if (flags & WE_HAVE_AN_X_AND_Y_SCALE) size += 4; + /* Four x 16 bit (xscale, scale01, scale10, yscale) */ + else if (flags & WE_HAVE_A_TWO_BY_TWO) size += 8; + + return size; + } + + void set_glyph_index (hb_codepoint_t new_gid) { glyphIndex = new_gid; } + hb_codepoint_t get_glyph_index () const { return glyphIndex; } + + void drop_instructions_flag () { flags = (uint16_t) flags & ~WE_HAVE_INSTRUCTIONS; } + bool has_instructions () const { return flags & WE_HAVE_INSTRUCTIONS; } + + bool has_more () const { return flags & MORE_COMPONENTS; } + bool is_use_my_metrics () const { return flags & USE_MY_METRICS; } + bool is_anchored () const { return !(flags & ARGS_ARE_XY_VALUES); } + void get_anchor_points (unsigned int &point1, unsigned int &point2) const + { + const HBUINT8 *p = &StructAfter<const HBUINT8> (glyphIndex); + if (flags & ARG_1_AND_2_ARE_WORDS) + { + point1 = ((const HBUINT16 *) p)[0]; + point2 = ((const HBUINT16 *) p)[1]; + } + else + { + point1 = p[0]; + point2 = p[1]; + } + } + + void transform_points (contour_point_vector_t &points) const + { + float matrix[4]; + contour_point_t trans; + if (get_transformation (matrix, trans)) + { + if (scaled_offsets ()) + { + points.translate (trans); + points.transform (matrix); + } + else + { + points.transform (matrix); + points.translate (trans); + } + } + } + + protected: + bool scaled_offsets () const + { return (flags & (SCALED_COMPONENT_OFFSET | UNSCALED_COMPONENT_OFFSET)) == SCALED_COMPONENT_OFFSET; } + + bool get_transformation (float (&matrix)[4], contour_point_t &trans) const + { + matrix[0] = matrix[3] = 1.f; + matrix[1] = matrix[2] = 0.f; + + int tx, ty; + const HBINT8 *p = &StructAfter<const HBINT8> (glyphIndex); + if (flags & ARG_1_AND_2_ARE_WORDS) + { + tx = *(const HBINT16 *) p; + p += HBINT16::static_size; + ty = *(const HBINT16 *) p; + p += HBINT16::static_size; + } + else + { + tx = *p++; + ty = *p++; + } + if (is_anchored ()) tx = ty = 0; + + trans.init ((float) tx, (float) ty); + + { + const F2DOT14 *points = (const F2DOT14 *) p; + if (flags & WE_HAVE_A_SCALE) + { + matrix[0] = matrix[3] = points[0].to_float (); + return true; + } + else if (flags & WE_HAVE_AN_X_AND_Y_SCALE) + { + matrix[0] = points[0].to_float (); + matrix[3] = points[1].to_float (); + return true; + } + else if (flags & WE_HAVE_A_TWO_BY_TWO) + { + matrix[0] = points[0].to_float (); + matrix[1] = points[1].to_float (); + matrix[2] = points[2].to_float (); + matrix[3] = points[3].to_float (); + return true; + } + } + return tx || ty; + } + + protected: + HBUINT16 flags; + HBGlyphID glyphIndex; + public: + DEFINE_SIZE_MIN (4); + }; + + struct composite_iter_t : hb_iter_with_fallback_t<composite_iter_t, const CompositeGlyphChain &> + { + typedef const CompositeGlyphChain *__item_t__; + composite_iter_t (hb_bytes_t glyph_, __item_t__ current_) : + glyph (glyph_), current (current_) + { if (!check_range (current)) current = nullptr; } + composite_iter_t () : glyph (hb_bytes_t ()), current (nullptr) {} + + const CompositeGlyphChain &__item__ () const { return *current; } + bool __more__ () const { return current; } + void __next__ () + { + if (!current->has_more ()) { current = nullptr; return; } + + const CompositeGlyphChain *possible = &StructAfter<CompositeGlyphChain, + CompositeGlyphChain> (*current); + if (!check_range (possible)) { current = nullptr; return; } + current = possible; + } + bool operator != (const composite_iter_t& o) const + { return glyph != o.glyph || current != o.current; } + + bool check_range (const CompositeGlyphChain *composite) const + { + return glyph.check_range (composite, CompositeGlyphChain::min_size) + && glyph.check_range (composite, composite->get_size ()); + } + + private: + hb_bytes_t glyph; + __item_t__ current; + }; + + enum phantom_point_index_t + { + PHANTOM_LEFT = 0, + PHANTOM_RIGHT = 1, + PHANTOM_TOP = 2, + PHANTOM_BOTTOM = 3, + PHANTOM_COUNT = 4 + }; + + struct accelerator_t; + + struct Glyph + { + enum simple_glyph_flag_t + { + FLAG_ON_CURVE = 0x01, + FLAG_X_SHORT = 0x02, + FLAG_Y_SHORT = 0x04, + FLAG_REPEAT = 0x08, + FLAG_X_SAME = 0x10, + FLAG_Y_SAME = 0x20, + FLAG_RESERVED1 = 0x40, + FLAG_RESERVED2 = 0x80 + }; + + private: + struct GlyphHeader + { + bool has_data () const { return numberOfContours; } + + bool get_extents (hb_font_t *font, const accelerator_t &glyf_accelerator, + hb_codepoint_t gid, hb_glyph_extents_t *extents) const + { + /* Undocumented rasterizer behavior: shift glyph to the left by (lsb - xMin), i.e., xMin = lsb */ + /* extents->x_bearing = hb_min (glyph_header.xMin, glyph_header.xMax); */ + extents->x_bearing = font->em_scale_x (glyf_accelerator.hmtx->get_side_bearing (gid)); + extents->y_bearing = font->em_scale_y (hb_max (yMin, yMax)); + extents->width = font->em_scale_x (hb_max (xMin, xMax) - hb_min (xMin, xMax)); + extents->height = font->em_scale_y (hb_min (yMin, yMax) - hb_max (yMin, yMax)); + + return true; + } + + HBINT16 numberOfContours; + /* If the number of contours is + * greater than or equal to zero, + * this is a simple glyph; if negative, + * this is a composite glyph. */ + FWORD xMin; /* Minimum x for coordinate data. */ + FWORD yMin; /* Minimum y for coordinate data. */ + FWORD xMax; /* Maximum x for coordinate data. */ + FWORD yMax; /* Maximum y for coordinate data. */ + public: + DEFINE_SIZE_STATIC (10); + }; + + struct SimpleGlyph + { + const GlyphHeader &header; + hb_bytes_t bytes; + SimpleGlyph (const GlyphHeader &header_, hb_bytes_t bytes_) : + header (header_), bytes (bytes_) {} + + unsigned int instruction_len_offset () const + { return GlyphHeader::static_size + 2 * header.numberOfContours; } + + unsigned int length (unsigned int instruction_len) const + { return instruction_len_offset () + 2 + instruction_len; } + + unsigned int instructions_length () const + { + unsigned int instruction_length_offset = instruction_len_offset (); + if (unlikely (instruction_length_offset + 2 > bytes.length)) return 0; + + const HBUINT16 &instructionLength = StructAtOffset<HBUINT16> (&bytes, instruction_length_offset); + /* Out of bounds of the current glyph */ + if (unlikely (length (instructionLength) > bytes.length)) return 0; + return instructionLength; + } + + const Glyph trim_padding () const + { + /* based on FontTools _g_l_y_f.py::trim */ + const char *glyph = bytes.arrayZ; + const char *glyph_end = glyph + bytes.length; + /* simple glyph w/contours, possibly trimmable */ + glyph += instruction_len_offset (); + + if (unlikely (glyph + 2 >= glyph_end)) return Glyph (); + unsigned int num_coordinates = StructAtOffset<HBUINT16> (glyph - 2, 0) + 1; + unsigned int num_instructions = StructAtOffset<HBUINT16> (glyph, 0); + + glyph += 2 + num_instructions; + + unsigned int coord_bytes = 0; + unsigned int coords_with_flags = 0; + while (glyph < glyph_end) + { + uint8_t flag = *glyph; + glyph++; + + unsigned int repeat = 1; + if (flag & FLAG_REPEAT) + { + if (unlikely (glyph >= glyph_end)) return Glyph (); + repeat = *glyph + 1; + glyph++; + } + + unsigned int xBytes, yBytes; + xBytes = yBytes = 0; + if (flag & FLAG_X_SHORT) xBytes = 1; + else if ((flag & FLAG_X_SAME) == 0) xBytes = 2; + + if (flag & FLAG_Y_SHORT) yBytes = 1; + else if ((flag & FLAG_Y_SAME) == 0) yBytes = 2; + + coord_bytes += (xBytes + yBytes) * repeat; + coords_with_flags += repeat; + if (coords_with_flags >= num_coordinates) break; + } + + if (unlikely (coords_with_flags != num_coordinates)) return Glyph (); + return Glyph (bytes.sub_array (0, bytes.length + coord_bytes - (glyph_end - glyph))); + } + + /* zero instruction length */ + void drop_hints () + { + GlyphHeader &glyph_header = const_cast<GlyphHeader &> (header); + (HBUINT16 &) StructAtOffset<HBUINT16> (&glyph_header, instruction_len_offset ()) = 0; + } + + void drop_hints_bytes (hb_bytes_t &dest_start, hb_bytes_t &dest_end) const + { + unsigned int instructions_len = instructions_length (); + unsigned int glyph_length = length (instructions_len); + dest_start = bytes.sub_array (0, glyph_length - instructions_len); + dest_end = bytes.sub_array (glyph_length, bytes.length - glyph_length); + } + + static bool read_points (const HBUINT8 *&p /* IN/OUT */, + contour_point_vector_t &points_ /* IN/OUT */, + const hb_bytes_t &bytes, + void (* setter) (contour_point_t &_, float v), + const simple_glyph_flag_t short_flag, + const simple_glyph_flag_t same_flag) + { + float v = 0; + for (unsigned i = 0; i < points_.length; i++) + { + uint8_t flag = points_[i].flag; + if (flag & short_flag) + { + if (unlikely (!bytes.check_range (p))) return false; + if (flag & same_flag) + v += *p++; + else + v -= *p++; + } + else + { + if (!(flag & same_flag)) + { + if (unlikely (!bytes.check_range ((const HBUINT16 *) p))) return false; + v += *(const HBINT16 *) p; + p += HBINT16::static_size; + } + } + setter (points_[i], v); + } + return true; + } + + bool get_contour_points (contour_point_vector_t &points_ /* OUT */, + bool phantom_only = false) const + { + const HBUINT16 *endPtsOfContours = &StructAfter<HBUINT16> (header); + int num_contours = header.numberOfContours; + if (unlikely (!bytes.check_range (&endPtsOfContours[num_contours + 1]))) return false; + unsigned int num_points = endPtsOfContours[num_contours - 1] + 1; + + points_.resize (num_points); + for (unsigned int i = 0; i < points_.length; i++) points_[i].init (); + if (phantom_only) return true; + + for (int i = 0; i < num_contours; i++) + points_[endPtsOfContours[i]].is_end_point = true; + + /* Skip instructions */ + const HBUINT8 *p = &StructAtOffset<HBUINT8> (&endPtsOfContours[num_contours + 1], + endPtsOfContours[num_contours]); + + /* Read flags */ + for (unsigned int i = 0; i < num_points; i++) + { + if (unlikely (!bytes.check_range (p))) return false; + uint8_t flag = *p++; + points_[i].flag = flag; + if (flag & FLAG_REPEAT) + { + if (unlikely (!bytes.check_range (p))) return false; + unsigned int repeat_count = *p++; + while ((repeat_count-- > 0) && (++i < num_points)) + points_[i].flag = flag; + } + } + + /* Read x & y coordinates */ + return read_points (p, points_, bytes, [] (contour_point_t &p, float v) { p.x = v; }, + FLAG_X_SHORT, FLAG_X_SAME) + && read_points (p, points_, bytes, [] (contour_point_t &p, float v) { p.y = v; }, + FLAG_Y_SHORT, FLAG_Y_SAME); + } + }; + + struct CompositeGlyph + { + const GlyphHeader &header; + hb_bytes_t bytes; + CompositeGlyph (const GlyphHeader &header_, hb_bytes_t bytes_) : + header (header_), bytes (bytes_) {} + + composite_iter_t get_iterator () const + { return composite_iter_t (bytes, &StructAfter<CompositeGlyphChain, GlyphHeader> (header)); } + + unsigned int instructions_length (hb_bytes_t bytes) const + { + unsigned int start = bytes.length; + unsigned int end = bytes.length; + const CompositeGlyphChain *last = nullptr; + for (auto &item : get_iterator ()) + last = &item; + if (unlikely (!last)) return 0; + + if (last->has_instructions ()) + start = (char *) last - &bytes + last->get_size (); + if (unlikely (start > end)) return 0; + return end - start; + } + + /* Trimming for composites not implemented. + * If removing hints it falls out of that. */ + const Glyph trim_padding () const { return Glyph (bytes); } + + void drop_hints () + { + for (const auto &_ : get_iterator ()) + const_cast<CompositeGlyphChain &> (_).drop_instructions_flag (); + } + + /* Chop instructions off the end */ + void drop_hints_bytes (hb_bytes_t &dest_start) const + { dest_start = bytes.sub_array (0, bytes.length - instructions_length (bytes)); } + }; + + enum glyph_type_t { EMPTY, SIMPLE, COMPOSITE }; + + public: + composite_iter_t get_composite_iterator () const + { + if (type != COMPOSITE) return composite_iter_t (); + return CompositeGlyph (*header, bytes).get_iterator (); + } + + const Glyph trim_padding () const + { + switch (type) { + case COMPOSITE: return CompositeGlyph (*header, bytes).trim_padding (); + case SIMPLE: return SimpleGlyph (*header, bytes).trim_padding (); + default: return bytes; + } + } + + void drop_hints () + { + switch (type) { + case COMPOSITE: CompositeGlyph (*header, bytes).drop_hints (); return; + case SIMPLE: SimpleGlyph (*header, bytes).drop_hints (); return; + default: return; + } + } + + void drop_hints_bytes (hb_bytes_t &dest_start, hb_bytes_t &dest_end) const + { + switch (type) { + case COMPOSITE: CompositeGlyph (*header, bytes).drop_hints_bytes (dest_start); return; + case SIMPLE: SimpleGlyph (*header, bytes).drop_hints_bytes (dest_start, dest_end); return; + default: return; + } + } + + /* Note: Recursively calls itself. + * all_points includes phantom points + */ + bool get_points (hb_font_t *font, const accelerator_t &glyf_accelerator, + contour_point_vector_t &all_points /* OUT */, + bool phantom_only = false, + unsigned int depth = 0) const + { + if (unlikely (depth > HB_MAX_NESTING_LEVEL)) return false; + contour_point_vector_t points; + + switch (type) { + case COMPOSITE: + { + /* pseudo component points for each component in composite glyph */ + unsigned num_points = hb_len (CompositeGlyph (*header, bytes).get_iterator ()); + if (unlikely (!points.resize (num_points))) return false; + for (unsigned i = 0; i < points.length; i++) + points[i].init (); + break; + } + case SIMPLE: + if (unlikely (!SimpleGlyph (*header, bytes).get_contour_points (points, phantom_only))) + return false; + break; + } + + /* Init phantom points */ + if (unlikely (!points.resize (points.length + PHANTOM_COUNT))) return false; + hb_array_t<contour_point_t> phantoms = points.sub_array (points.length - PHANTOM_COUNT, PHANTOM_COUNT); + { + for (unsigned i = 0; i < PHANTOM_COUNT; ++i) phantoms[i].init (); + int h_delta = (int) header->xMin - glyf_accelerator.hmtx->get_side_bearing (gid); + int v_orig = (int) header->yMax + glyf_accelerator.vmtx->get_side_bearing (gid); + unsigned h_adv = glyf_accelerator.hmtx->get_advance (gid); + unsigned v_adv = glyf_accelerator.vmtx->get_advance (gid); + phantoms[PHANTOM_LEFT].x = h_delta; + phantoms[PHANTOM_RIGHT].x = h_adv + h_delta; + phantoms[PHANTOM_TOP].y = v_orig; + phantoms[PHANTOM_BOTTOM].y = v_orig - (int) v_adv; + } + +#ifndef HB_NO_VAR + if (unlikely (!glyf_accelerator.gvar->apply_deltas_to_points (gid, font, points.as_array ()))) + return false; +#endif + + switch (type) { + case SIMPLE: + all_points.extend (points.as_array ()); + break; + case COMPOSITE: + { + unsigned int comp_index = 0; + for (auto &item : get_composite_iterator ()) + { + contour_point_vector_t comp_points; + if (unlikely (!glyf_accelerator.glyph_for_gid (item.get_glyph_index ()) + .get_points (font, glyf_accelerator, comp_points, + phantom_only, depth + 1) + || comp_points.length < PHANTOM_COUNT)) + return false; + + /* Copy phantom points from component if USE_MY_METRICS flag set */ + if (item.is_use_my_metrics ()) + for (unsigned int i = 0; i < PHANTOM_COUNT; i++) + phantoms[i] = comp_points[comp_points.length - PHANTOM_COUNT + i]; + + /* Apply component transformation & translation */ + item.transform_points (comp_points); + + /* Apply translation from gvar */ + comp_points.translate (points[comp_index]); + + if (item.is_anchored ()) + { + unsigned int p1, p2; + item.get_anchor_points (p1, p2); + if (likely (p1 < all_points.length && p2 < comp_points.length)) + { + contour_point_t delta; + delta.init (all_points[p1].x - comp_points[p2].x, + all_points[p1].y - comp_points[p2].y); + + comp_points.translate (delta); + } + } + + all_points.extend (comp_points.sub_array (0, comp_points.length - PHANTOM_COUNT)); + + comp_index++; + } + + all_points.extend (phantoms); + } break; + default: + all_points.extend (phantoms); + } + + if (depth == 0) /* Apply at top level */ + { + /* Undocumented rasterizer behavior: + * Shift points horizontally by the updated left side bearing + */ + contour_point_t delta; + delta.init (-phantoms[PHANTOM_LEFT].x, 0.f); + if (delta.x) all_points.translate (delta); + } + + return true; + } + + bool get_extents (hb_font_t *font, const accelerator_t &glyf_accelerator, + hb_glyph_extents_t *extents) const + { + if (type == EMPTY) return true; /* Empty glyph; zero extents. */ + return header->get_extents (font, glyf_accelerator, gid, extents); + } + + hb_bytes_t get_bytes () const { return bytes; } + + Glyph (hb_bytes_t bytes_ = hb_bytes_t (), + hb_codepoint_t gid_ = (hb_codepoint_t) -1) : bytes (bytes_), gid (gid_), + header (bytes.as<GlyphHeader> ()) + { + int num_contours = header->numberOfContours; + if (unlikely (num_contours == 0)) type = EMPTY; + else if (num_contours > 0) type = SIMPLE; + else type = COMPOSITE; /* negative numbers */ + } + + protected: + hb_bytes_t bytes; + hb_codepoint_t gid; + const GlyphHeader *header; + unsigned type; + }; + + struct accelerator_t + { + void init (hb_face_t *face_) + { + short_offset = false; + num_glyphs = 0; + loca_table = nullptr; + glyf_table = nullptr; +#ifndef HB_NO_VAR + gvar = nullptr; +#endif + hmtx = nullptr; + vmtx = nullptr; + face = face_; + const OT::head &head = *face->table.head; + if (head.indexToLocFormat > 1 || head.glyphDataFormat > 0) + /* Unknown format. Leave num_glyphs=0, that takes care of disabling us. */ + return; + short_offset = 0 == head.indexToLocFormat; + + loca_table = hb_sanitize_context_t ().reference_table<loca> (face); + glyf_table = hb_sanitize_context_t ().reference_table<glyf> (face); +#ifndef HB_NO_VAR + gvar = face->table.gvar; +#endif + hmtx = face->table.hmtx; + vmtx = face->table.vmtx; + + num_glyphs = hb_max (1u, loca_table.get_length () / (short_offset ? 2 : 4)) - 1; + num_glyphs = hb_min (num_glyphs, face->get_num_glyphs ()); + } + + void fini () + { + loca_table.destroy (); + glyf_table.destroy (); + } + + protected: + template<typename T> + bool get_points (hb_font_t *font, hb_codepoint_t gid, T consumer) const + { + if (gid >= num_glyphs) return false; + + /* Making this alloc free is not that easy + https://github.com/harfbuzz/harfbuzz/issues/2095 + mostly because of gvar handling in VF fonts, + perhaps a separate path for non-VF fonts can be considered */ + contour_point_vector_t all_points; + + bool phantom_only = !consumer.is_consuming_contour_points (); + if (unlikely (!glyph_for_gid (gid).get_points (font, *this, all_points, phantom_only))) + return false; + + if (consumer.is_consuming_contour_points ()) + { + for (unsigned point_index = 0; point_index + 4 < all_points.length; ++point_index) + consumer.consume_point (all_points[point_index]); + consumer.points_end (); + } + + /* Where to write phantoms, nullptr if not requested */ + contour_point_t *phantoms = consumer.get_phantoms_sink (); + if (phantoms) + for (unsigned i = 0; i < PHANTOM_COUNT; ++i) + phantoms[i] = all_points[all_points.length - PHANTOM_COUNT + i]; + + return true; + } + +#ifndef HB_NO_VAR + struct points_aggregator_t + { + hb_font_t *font; + hb_glyph_extents_t *extents; + contour_point_t *phantoms; + + struct contour_bounds_t + { + contour_bounds_t () { min_x = min_y = FLT_MAX; max_x = max_y = -FLT_MAX; } + + void add (const contour_point_t &p) + { + min_x = hb_min (min_x, p.x); + min_y = hb_min (min_y, p.y); + max_x = hb_max (max_x, p.x); + max_y = hb_max (max_y, p.y); + } + + bool empty () const { return (min_x >= max_x) || (min_y >= max_y); } + + void get_extents (hb_font_t *font, hb_glyph_extents_t *extents) + { + if (unlikely (empty ())) + { + extents->width = 0; + extents->x_bearing = 0; + extents->height = 0; + extents->y_bearing = 0; + return; + } + extents->x_bearing = font->em_scalef_x (min_x); + extents->width = font->em_scalef_x (max_x - min_x); + extents->y_bearing = font->em_scalef_y (max_y); + extents->height = font->em_scalef_y (min_y - max_y); + } + + protected: + float min_x, min_y, max_x, max_y; + } bounds; + + points_aggregator_t (hb_font_t *font_, hb_glyph_extents_t *extents_, contour_point_t *phantoms_) + { + font = font_; + extents = extents_; + phantoms = phantoms_; + if (extents) bounds = contour_bounds_t (); + } + + void consume_point (const contour_point_t &point) { bounds.add (point); } + void points_end () { bounds.get_extents (font, extents); } + + bool is_consuming_contour_points () { return extents; } + contour_point_t *get_phantoms_sink () { return phantoms; } + }; + + public: + unsigned + get_advance_var (hb_font_t *font, hb_codepoint_t gid, bool is_vertical) const + { + if (unlikely (gid >= num_glyphs)) return 0; + + bool success = false; + + contour_point_t phantoms[PHANTOM_COUNT]; + if (likely (font->num_coords == gvar->get_axis_count ())) + success = get_points (font, gid, points_aggregator_t (font, nullptr, phantoms)); + + if (unlikely (!success)) + return is_vertical ? vmtx->get_advance (gid) : hmtx->get_advance (gid); + + float result = is_vertical + ? phantoms[PHANTOM_TOP].y - phantoms[PHANTOM_BOTTOM].y + : phantoms[PHANTOM_RIGHT].x - phantoms[PHANTOM_LEFT].x; + return hb_clamp (roundf (result), 0.f, (float) UINT_MAX / 2); + } + + int get_side_bearing_var (hb_font_t *font, hb_codepoint_t gid, bool is_vertical) const + { + if (unlikely (gid >= num_glyphs)) return 0; + + hb_glyph_extents_t extents; + + contour_point_t phantoms[PHANTOM_COUNT]; + if (unlikely (!get_points (font, gid, points_aggregator_t (font, &extents, phantoms)))) + return is_vertical ? vmtx->get_side_bearing (gid) : hmtx->get_side_bearing (gid); + + return is_vertical + ? ceilf (phantoms[PHANTOM_TOP].y) - extents.y_bearing + : floorf (phantoms[PHANTOM_LEFT].x); + } +#endif + + public: + bool get_extents (hb_font_t *font, hb_codepoint_t gid, hb_glyph_extents_t *extents) const + { + if (unlikely (gid >= num_glyphs)) return false; + +#ifndef HB_NO_VAR + if (font->num_coords && font->num_coords == gvar->get_axis_count ()) + return get_points (font, gid, points_aggregator_t (font, extents, nullptr)); +#endif + return glyph_for_gid (gid).get_extents (font, *this, extents); + } + + const Glyph + glyph_for_gid (hb_codepoint_t gid, bool needs_padding_removal = false) const + { + if (unlikely (gid >= num_glyphs)) return Glyph (); + + unsigned int start_offset, end_offset; + + if (short_offset) + { + const HBUINT16 *offsets = (const HBUINT16 *) loca_table->dataZ.arrayZ; + start_offset = 2 * offsets[gid]; + end_offset = 2 * offsets[gid + 1]; + } + else + { + const HBUINT32 *offsets = (const HBUINT32 *) loca_table->dataZ.arrayZ; + start_offset = offsets[gid]; + end_offset = offsets[gid + 1]; + } + + if (unlikely (start_offset > end_offset || end_offset > glyf_table.get_length ())) + return Glyph (); + + Glyph glyph (hb_bytes_t ((const char *) this->glyf_table + start_offset, + end_offset - start_offset), gid); + return needs_padding_removal ? glyph.trim_padding () : glyph; + } + + void + add_gid_and_children (hb_codepoint_t gid, hb_set_t *gids_to_retain, + unsigned int depth = 0) const + { + if (unlikely (depth++ > HB_MAX_NESTING_LEVEL)) return; + /* Check if is already visited */ + if (gids_to_retain->has (gid)) return; + + gids_to_retain->add (gid); + + for (auto &item : glyph_for_gid (gid).get_composite_iterator ()) + add_gid_and_children (item.get_glyph_index (), gids_to_retain, depth); + } + +#ifdef HB_EXPERIMENTAL_API + struct path_builder_t + { + hb_font_t *font; + draw_helper_t *draw_helper; + + struct optional_point_t + { + optional_point_t () { has_data = false; } + optional_point_t (float x_, float y_) { x = x_; y = y_; has_data = true; } + + bool has_data; + float x; + float y; + + optional_point_t lerp (optional_point_t p, float t) + { return optional_point_t (x + t * (p.x - x), y + t * (p.y - y)); } + } first_oncurve, first_offcurve, last_offcurve; + + path_builder_t (hb_font_t *font_, draw_helper_t &draw_helper_) + { + font = font_; + draw_helper = &draw_helper_; + first_oncurve = first_offcurve = last_offcurve = optional_point_t (); + } + + /* based on https://github.com/RazrFalcon/ttf-parser/blob/4f32821/src/glyf.rs#L287 + See also: + * https://developer.apple.com/fonts/TrueType-Reference-Manual/RM01/Chap1.html + * https://stackoverflow.com/a/20772557 */ + void consume_point (const contour_point_t &point) + { + /* Skip empty contours */ + if (unlikely (point.is_end_point && !first_oncurve.has_data && !first_offcurve.has_data)) + return; + + bool is_on_curve = point.flag & Glyph::FLAG_ON_CURVE; + optional_point_t p (point.x, point.y); + if (!first_oncurve.has_data) + { + if (is_on_curve) + { + first_oncurve = p; + draw_helper->move_to (font->em_scalef_x (p.x), font->em_scalef_y (p.y)); + } + else + { + if (first_offcurve.has_data) + { + optional_point_t mid = first_offcurve.lerp (p, .5f); + first_oncurve = mid; + last_offcurve = p; + draw_helper->move_to (font->em_scalef_x (mid.x), font->em_scalef_y (mid.y)); + } + else + first_offcurve = p; + } + } + else + { + if (last_offcurve.has_data) + { + if (is_on_curve) + { + draw_helper->quadratic_to (font->em_scalef_x (last_offcurve.x), font->em_scalef_y (last_offcurve.y), + font->em_scalef_x (p.x), font->em_scalef_y (p.y)); + last_offcurve = optional_point_t (); + } + else + { + optional_point_t mid = last_offcurve.lerp (p, .5f); + draw_helper->quadratic_to (font->em_scalef_x (last_offcurve.x), font->em_scalef_y (last_offcurve.y), + font->em_scalef_x (mid.x), font->em_scalef_y (mid.y)); + last_offcurve = p; + } + } + else + { + if (is_on_curve) + draw_helper->line_to (font->em_scalef_x (p.x), font->em_scalef_y (p.y)); + else + last_offcurve = p; + } + } + + if (point.is_end_point) + { + if (first_offcurve.has_data && last_offcurve.has_data) + { + optional_point_t mid = last_offcurve.lerp (first_offcurve, .5f); + draw_helper->quadratic_to (font->em_scalef_x (last_offcurve.x), font->em_scalef_y (last_offcurve.y), + font->em_scalef_x (mid.x), font->em_scalef_y (mid.y)); + last_offcurve = optional_point_t (); + /* now check the rest */ + } + + if (first_offcurve.has_data && first_oncurve.has_data) + draw_helper->quadratic_to (font->em_scalef_x (first_offcurve.x), font->em_scalef_y (first_offcurve.y), + font->em_scalef_x (first_oncurve.x), font->em_scalef_y (first_oncurve.y)); + else if (last_offcurve.has_data && first_oncurve.has_data) + draw_helper->quadratic_to (font->em_scalef_x (last_offcurve.x), font->em_scalef_y (last_offcurve.y), + font->em_scalef_x (first_oncurve.x), font->em_scalef_y (first_oncurve.y)); + else if (first_oncurve.has_data) + draw_helper->line_to (font->em_scalef_x (first_oncurve.x), font->em_scalef_y (first_oncurve.y)); + + /* Getting ready for the next contour */ + first_oncurve = first_offcurve = last_offcurve = optional_point_t (); + draw_helper->end_path (); + } + } + void points_end () {} + + bool is_consuming_contour_points () { return true; } + contour_point_t *get_phantoms_sink () { return nullptr; } + }; + + bool + get_path (hb_font_t *font, hb_codepoint_t gid, draw_helper_t &draw_helper) const + { return get_points (font, gid, path_builder_t (font, draw_helper)); } +#endif + +#ifndef HB_NO_VAR + const gvar_accelerator_t *gvar; +#endif + const hmtx_accelerator_t *hmtx; + const vmtx_accelerator_t *vmtx; + + private: + bool short_offset; + unsigned int num_glyphs; + hb_blob_ptr_t<loca> loca_table; + hb_blob_ptr_t<glyf> glyf_table; + hb_face_t *face; + }; + + struct SubsetGlyph + { + hb_codepoint_t new_gid; + hb_codepoint_t old_gid; + Glyph source_glyph; + hb_bytes_t dest_start; /* region of source_glyph to copy first */ + hb_bytes_t dest_end; /* region of source_glyph to copy second */ + + bool serialize (hb_serialize_context_t *c, + const hb_subset_plan_t *plan) const + { + TRACE_SERIALIZE (this); + + hb_bytes_t dest_glyph = dest_start.copy (c); + dest_glyph = hb_bytes_t (&dest_glyph, dest_glyph.length + dest_end.copy (c).length); + unsigned int pad_length = padding (); + DEBUG_MSG (SUBSET, nullptr, "serialize %d byte glyph, width %d pad %d", dest_glyph.length, dest_glyph.length + pad_length, pad_length); + + HBUINT8 pad; + pad = 0; + while (pad_length > 0) + { + c->embed (pad); + pad_length--; + } + + if (unlikely (!dest_glyph.length)) return_trace (true); + + /* update components gids */ + for (auto &_ : Glyph (dest_glyph).get_composite_iterator ()) + { + hb_codepoint_t new_gid; + if (plan->new_gid_for_old_gid (_.get_glyph_index (), &new_gid)) + const_cast<CompositeGlyphChain &> (_).set_glyph_index (new_gid); + } + + if (plan->drop_hints) Glyph (dest_glyph).drop_hints (); + + return_trace (true); + } + + void drop_hints_bytes () + { source_glyph.drop_hints_bytes (dest_start, dest_end); } + + unsigned int length () const { return dest_start.length + dest_end.length; } + /* pad to 2 to ensure 2-byte loca will be ok */ + unsigned int padding () const { return length () % 2; } + unsigned int padded_size () const { return length () + padding (); } + }; + + protected: + UnsizedArrayOf<HBUINT8> + dataZ; /* Glyphs data. */ + public: + DEFINE_SIZE_MIN (0); /* In reality, this is UNBOUNDED() type; but since we always + * check the size externally, allow Null() object of it by + * defining it _MIN instead. */ +}; + +struct glyf_accelerator_t : glyf::accelerator_t {}; + +} /* namespace OT */ + + +#endif /* HB_OT_GLYF_TABLE_HH */ diff --git a/thirdparty/harfbuzz/src/hb-ot-hdmx-table.hh b/thirdparty/harfbuzz/src/hb-ot-hdmx-table.hh new file mode 100644 index 0000000000..c9c391bad5 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-hdmx-table.hh @@ -0,0 +1,177 @@ +/* + * Copyright © 2018 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Garret Rieger + */ + +#ifndef HB_OT_HDMX_TABLE_HH +#define HB_OT_HDMX_TABLE_HH + +#include "hb-open-type.hh" + +/* + * hdmx -- Horizontal Device Metrics + * https://docs.microsoft.com/en-us/typography/opentype/spec/hdmx + */ +#define HB_OT_TAG_hdmx HB_TAG('h','d','m','x') + + +namespace OT { + + +struct DeviceRecord +{ + static unsigned int get_size (unsigned count) + { return hb_ceil_to_4 (min_size + count * HBUINT8::static_size); } + + template<typename Iterator, + hb_requires (hb_is_iterator (Iterator))> + bool serialize (hb_serialize_context_t *c, unsigned pixelSize, Iterator it) + { + TRACE_SERIALIZE (this); + + unsigned length = it.len (); + + if (unlikely (!c->extend (*this, length))) return_trace (false); + + this->pixelSize = pixelSize; + this->maxWidth = + + it + | hb_reduce (hb_max, 0u); + + + it + | hb_sink (widthsZ.as_array (length)); + + return_trace (true); + } + + bool sanitize (hb_sanitize_context_t *c, unsigned sizeDeviceRecord) const + { + TRACE_SANITIZE (this); + return_trace (likely (c->check_struct (this) && + c->check_range (this, sizeDeviceRecord))); + } + + HBUINT8 pixelSize; /* Pixel size for following widths (as ppem). */ + HBUINT8 maxWidth; /* Maximum width. */ + UnsizedArrayOf<HBUINT8> widthsZ; /* Array of widths (numGlyphs is from the 'maxp' table). */ + public: + DEFINE_SIZE_ARRAY (2, widthsZ); +}; + + +struct hdmx +{ + static constexpr hb_tag_t tableTag = HB_OT_TAG_hdmx; + + unsigned int get_size () const + { return min_size + numRecords * sizeDeviceRecord; } + + const DeviceRecord& operator [] (unsigned int i) const + { + /* XXX Null(DeviceRecord) is NOT safe as it's num-glyphs lengthed. + * https://github.com/harfbuzz/harfbuzz/issues/1300 */ + if (unlikely (i >= numRecords)) return Null (DeviceRecord); + return StructAtOffset<DeviceRecord> (&this->firstDeviceRecord, i * sizeDeviceRecord); + } + + template<typename Iterator, + hb_requires (hb_is_iterator (Iterator))> + bool serialize (hb_serialize_context_t *c, unsigned version, Iterator it) + { + TRACE_SERIALIZE (this); + + if (unlikely (!c->extend_min ((*this)))) return_trace (false); + + this->version = version; + this->numRecords = it.len (); + this->sizeDeviceRecord = DeviceRecord::get_size (it ? (*it).second.len () : 0); + + for (const hb_item_type<Iterator>& _ : +it) + c->start_embed<DeviceRecord> ()->serialize (c, _.first, _.second); + + return_trace (c->successful); + } + + + bool subset (hb_subset_context_t *c) const + { + TRACE_SUBSET (this); + + hdmx *hdmx_prime = c->serializer->start_embed <hdmx> (); + if (unlikely (!hdmx_prime)) return_trace (false); + + auto it = + + hb_range ((unsigned) numRecords) + | hb_map ([c, this] (unsigned _) + { + const DeviceRecord *device_record = + &StructAtOffset<DeviceRecord> (&firstDeviceRecord, + _ * sizeDeviceRecord); + auto row = + + hb_range (c->plan->num_output_glyphs ()) + | hb_map (c->plan->reverse_glyph_map) + | hb_map ([this, c, device_record] (hb_codepoint_t _) + { + if (c->plan->is_empty_glyph (_)) + return Null (HBUINT8); + return device_record->widthsZ.as_array (get_num_glyphs ()) [_]; + }) + ; + return hb_pair ((unsigned) device_record->pixelSize, +row); + }) + ; + + hdmx_prime->serialize (c->serializer, version, it); + return_trace (true); + } + + unsigned get_num_glyphs () const + { + return sizeDeviceRecord - DeviceRecord::min_size; + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && + !hb_unsigned_mul_overflows (numRecords, sizeDeviceRecord) && + sizeDeviceRecord >= DeviceRecord::min_size && + c->check_range (this, get_size ())); + } + + protected: + HBUINT16 version; /* Table version number (0) */ + HBUINT16 numRecords; /* Number of device records. */ + HBUINT32 sizeDeviceRecord; + /* Size of a device record, 32-bit aligned. */ + DeviceRecord firstDeviceRecord; + /* Array of device records. */ + public: + DEFINE_SIZE_MIN (8); +}; + +} /* namespace OT */ + + +#endif /* HB_OT_HDMX_TABLE_HH */ diff --git a/thirdparty/harfbuzz/src/hb-ot-head-table.hh b/thirdparty/harfbuzz/src/hb-ot-head-table.hh new file mode 100644 index 0000000000..5613a96dbf --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-head-table.hh @@ -0,0 +1,179 @@ +/* + * Copyright © 2010 Red Hat, Inc. + * Copyright © 2012 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Red Hat Author(s): Behdad Esfahbod + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_OT_HEAD_TABLE_HH +#define HB_OT_HEAD_TABLE_HH + +#include "hb-open-type.hh" + +/* + * head -- Font Header + * https://docs.microsoft.com/en-us/typography/opentype/spec/head + */ +#define HB_OT_TAG_head HB_TAG('h','e','a','d') + + +namespace OT { + + +struct head +{ + friend struct OffsetTable; + + static constexpr hb_tag_t tableTag = HB_OT_TAG_head; + + unsigned int get_upem () const + { + unsigned int upem = unitsPerEm; + /* If no valid head table found, assume 1000, which matches typical Type1 usage. */ + return 16 <= upem && upem <= 16384 ? upem : 1000; + } + + bool serialize (hb_serialize_context_t *c) const + { + TRACE_SERIALIZE (this); + return_trace ((bool) c->embed (this)); + } + + bool subset (hb_subset_context_t *c) const + { + TRACE_SUBSET (this); + return_trace (serialize (c->serializer)); + } + + enum mac_style_flag_t { + BOLD = 1u<<0, + ITALIC = 1u<<1, + UNDERLINE = 1u<<2, + OUTLINE = 1u<<3, + SHADOW = 1u<<4, + CONDENSED = 1u<<5 + }; + + bool is_bold () const { return macStyle & BOLD; } + bool is_italic () const { return macStyle & ITALIC; } + bool is_condensed () const { return macStyle & CONDENSED; } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && + version.major == 1 && + magicNumber == 0x5F0F3CF5u); + } + + protected: + FixedVersion<>version; /* Version of the head table--currently + * 0x00010000u for version 1.0. */ + FixedVersion<>fontRevision; /* Set by font manufacturer. */ + HBUINT32 checkSumAdjustment; /* To compute: set it to 0, sum the + * entire font as HBUINT32, then store + * 0xB1B0AFBAu - sum. */ + HBUINT32 magicNumber; /* Set to 0x5F0F3CF5u. */ + HBUINT16 flags; /* Bit 0: Baseline for font at y=0; + * Bit 1: Left sidebearing point at x=0; + * Bit 2: Instructions may depend on point size; + * Bit 3: Force ppem to integer values for all + * internal scaler math; may use fractional + * ppem sizes if this bit is clear; + * Bit 4: Instructions may alter advance width + * (the advance widths might not scale linearly); + * Bits 5-10: These should be set according to + * Apple's specification. However, they are not + * implemented in OpenType. + * Bit 5: This bit should be set in fonts that are + * intended to e laid out vertically, and in + * which the glyphs have been drawn such that an + * x-coordinate of 0 corresponds to the desired + * vertical baseline. + * Bit 6: This bit must be set to zero. + * Bit 7: This bit should be set if the font + * requires layout for correct linguistic + * rendering (e.g. Arabic fonts). + * Bit 8: This bit should be set for a GX font + * which has one or more metamorphosis effects + * designated as happening by default. + * Bit 9: This bit should be set if the font + * contains any strong right-to-left glyphs. + * Bit 10: This bit should be set if the font + * contains Indic-style rearrangement effects. + * Bit 11: Font data is 'lossless,' as a result + * of having been compressed and decompressed + * with the Agfa MicroType Express engine. + * Bit 12: Font converted (produce compatible metrics) + * Bit 13: Font optimized for ClearType™. + * Note, fonts that rely on embedded bitmaps (EBDT) + * for rendering should not be considered optimized + * for ClearType, and therefore should keep this bit + * cleared. + * Bit 14: Last Resort font. If set, indicates that + * the glyphs encoded in the cmap subtables are simply + * generic symbolic representations of code point + * ranges and don’t truly represent support for those + * code points. If unset, indicates that the glyphs + * encoded in the cmap subtables represent proper + * support for those code points. + * Bit 15: Reserved, set to 0. */ + HBUINT16 unitsPerEm; /* Valid range is from 16 to 16384. This value + * should be a power of 2 for fonts that have + * TrueType outlines. */ + LONGDATETIME created; /* Number of seconds since 12:00 midnight, + January 1, 1904. 64-bit integer */ + LONGDATETIME modified; /* Number of seconds since 12:00 midnight, + January 1, 1904. 64-bit integer */ + HBINT16 xMin; /* For all glyph bounding boxes. */ + HBINT16 yMin; /* For all glyph bounding boxes. */ + HBINT16 xMax; /* For all glyph bounding boxes. */ + HBINT16 yMax; /* For all glyph bounding boxes. */ + HBUINT16 macStyle; /* Bit 0: Bold (if set to 1); + * Bit 1: Italic (if set to 1) + * Bit 2: Underline (if set to 1) + * Bit 3: Outline (if set to 1) + * Bit 4: Shadow (if set to 1) + * Bit 5: Condensed (if set to 1) + * Bit 6: Extended (if set to 1) + * Bits 7-15: Reserved (set to 0). */ + HBUINT16 lowestRecPPEM; /* Smallest readable size in pixels. */ + HBINT16 fontDirectionHint; /* Deprecated (Set to 2). + * 0: Fully mixed directional glyphs; + * 1: Only strongly left to right; + * 2: Like 1 but also contains neutrals; + * -1: Only strongly right to left; + * -2: Like -1 but also contains neutrals. */ + public: + HBUINT16 indexToLocFormat; /* 0 for short offsets, 1 for long. */ + HBUINT16 glyphDataFormat; /* 0 for current format. */ + + DEFINE_SIZE_STATIC (54); +}; + + +} /* namespace OT */ + + +#endif /* HB_OT_HEAD_TABLE_HH */ diff --git a/thirdparty/harfbuzz/src/hb-ot-hhea-table.hh b/thirdparty/harfbuzz/src/hb-ot-hhea-table.hh new file mode 100644 index 0000000000..d9c9bd3537 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-hhea-table.hh @@ -0,0 +1,104 @@ +/* + * Copyright © 2011,2012 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_OT_HHEA_TABLE_HH +#define HB_OT_HHEA_TABLE_HH + +#include "hb-open-type.hh" + +/* + * hhea -- Horizontal Header + * https://docs.microsoft.com/en-us/typography/opentype/spec/hhea + * vhea -- Vertical Header + * https://docs.microsoft.com/en-us/typography/opentype/spec/vhea + */ +#define HB_OT_TAG_hhea HB_TAG('h','h','e','a') +#define HB_OT_TAG_vhea HB_TAG('v','h','e','a') + + +namespace OT { + + +template <typename T> +struct _hea +{ + bool has_data () const { return version.major; } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && likely (version.major == 1)); + } + + public: + FixedVersion<>version; /* 0x00010000u for version 1.0. */ + FWORD ascender; /* Typographic ascent. */ + FWORD descender; /* Typographic descent. */ + FWORD lineGap; /* Typographic line gap. */ + UFWORD advanceMax; /* Maximum advance width/height value in + * metrics table. */ + FWORD minLeadingBearing; + /* Minimum left/top sidebearing value in + * metrics table. */ + FWORD minTrailingBearing; + /* Minimum right/bottom sidebearing value; + * calculated as Min(aw - lsb - + * (xMax - xMin)) for horizontal. */ + FWORD maxExtent; /* horizontal: Max(lsb + (xMax - xMin)), + * vertical: minLeadingBearing+(yMax-yMin). */ + HBINT16 caretSlopeRise; /* Used to calculate the slope of the + * cursor (rise/run); 1 for vertical caret, + * 0 for horizontal.*/ + HBINT16 caretSlopeRun; /* 0 for vertical caret, 1 for horizontal. */ + HBINT16 caretOffset; /* The amount by which a slanted + * highlight on a glyph needs + * to be shifted to produce the + * best appearance. Set to 0 for + * non-slanted fonts. */ + HBINT16 reserved1; /* Set to 0. */ + HBINT16 reserved2; /* Set to 0. */ + HBINT16 reserved3; /* Set to 0. */ + HBINT16 reserved4; /* Set to 0. */ + HBINT16 metricDataFormat;/* 0 for current format. */ + HBUINT16 numberOfLongMetrics; + /* Number of LongMetric entries in metric + * table. */ + public: + DEFINE_SIZE_STATIC (36); +}; + +struct hhea : _hea<hhea> { + static constexpr hb_tag_t tableTag = HB_OT_TAG_hhea; +}; +struct vhea : _hea<vhea> { + static constexpr hb_tag_t tableTag = HB_OT_TAG_vhea; +}; + + +} /* namespace OT */ + + +#endif /* HB_OT_HHEA_TABLE_HH */ diff --git a/thirdparty/harfbuzz/src/hb-ot-hmtx-table.hh b/thirdparty/harfbuzz/src/hb-ot-hmtx-table.hh new file mode 100644 index 0000000000..d06c0fa4a4 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-hmtx-table.hh @@ -0,0 +1,340 @@ +/* + * Copyright © 2011,2012 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod, Roderick Sheeter + */ + +#ifndef HB_OT_HMTX_TABLE_HH +#define HB_OT_HMTX_TABLE_HH + +#include "hb-open-type.hh" +#include "hb-ot-hhea-table.hh" +#include "hb-ot-var-hvar-table.hh" +#include "hb-ot-metrics.hh" + +/* + * hmtx -- Horizontal Metrics + * https://docs.microsoft.com/en-us/typography/opentype/spec/hmtx + * vmtx -- Vertical Metrics + * https://docs.microsoft.com/en-us/typography/opentype/spec/vmtx + */ +#define HB_OT_TAG_hmtx HB_TAG('h','m','t','x') +#define HB_OT_TAG_vmtx HB_TAG('v','m','t','x') + + +HB_INTERNAL int +_glyf_get_side_bearing_var (hb_font_t *font, hb_codepoint_t glyph, bool is_vertical); + +HB_INTERNAL unsigned +_glyf_get_advance_var (hb_font_t *font, hb_codepoint_t glyph, bool is_vertical); + + +namespace OT { + + +struct LongMetric +{ + UFWORD advance; /* Advance width/height. */ + FWORD sb; /* Leading (left/top) side bearing. */ + public: + DEFINE_SIZE_STATIC (4); +}; + + +template <typename T, typename H> +struct hmtxvmtx +{ + bool sanitize (hb_sanitize_context_t *c HB_UNUSED) const + { + TRACE_SANITIZE (this); + /* We don't check for anything specific here. The users of the + * struct do all the hard work... */ + return_trace (true); + } + + + bool subset_update_header (hb_subset_plan_t *plan, + unsigned int num_hmetrics) const + { + hb_blob_t *src_blob = hb_sanitize_context_t ().reference_table<H> (plan->source, H::tableTag); + hb_blob_t *dest_blob = hb_blob_copy_writable_or_fail (src_blob); + hb_blob_destroy (src_blob); + + if (unlikely (!dest_blob)) { + return false; + } + + unsigned int length; + H *table = (H *) hb_blob_get_data (dest_blob, &length); + table->numberOfLongMetrics = num_hmetrics; + + bool result = plan->add_table (H::tableTag, dest_blob); + hb_blob_destroy (dest_blob); + + return result; + } + + template<typename Iterator, + hb_requires (hb_is_iterator (Iterator))> + void serialize (hb_serialize_context_t *c, + Iterator it, + unsigned num_advances) + { + unsigned idx = 0; + for (auto _ : it) + { + if (idx < num_advances) + { + LongMetric lm; + lm.advance = _.first; + lm.sb = _.second; + if (unlikely (!c->embed<LongMetric> (&lm))) return; + } + else + { + FWORD *sb = c->allocate_size<FWORD> (FWORD::static_size); + if (unlikely (!sb)) return; + *sb = _.second; + } + idx++; + } + } + + bool subset (hb_subset_context_t *c) const + { + TRACE_SUBSET (this); + + T *table_prime = c->serializer->start_embed <T> (); + if (unlikely (!table_prime)) return_trace (false); + + accelerator_t _mtx; + _mtx.init (c->plan->source); + unsigned num_advances = _mtx.num_advances_for_subset (c->plan); + + auto it = + + hb_range (c->plan->num_output_glyphs ()) + | hb_map ([c, &_mtx] (unsigned _) + { + hb_codepoint_t old_gid; + if (!c->plan->old_gid_for_new_gid (_, &old_gid)) + return hb_pair (0u, 0); + return hb_pair (_mtx.get_advance (old_gid), _mtx.get_side_bearing (old_gid)); + }) + ; + + table_prime->serialize (c->serializer, it, num_advances); + + _mtx.fini (); + + if (unlikely (c->serializer->ran_out_of_room || c->serializer->in_error ())) + return_trace (false); + + // Amend header num hmetrics + if (unlikely (!subset_update_header (c->plan, num_advances))) + return_trace (false); + + return_trace (true); + } + + struct accelerator_t + { + friend struct hmtxvmtx; + + void init (hb_face_t *face, + unsigned int default_advance_ = 0) + { + default_advance = default_advance_ ? default_advance_ : hb_face_get_upem (face); + + num_advances = T::is_horizontal ? face->table.hhea->numberOfLongMetrics : face->table.vhea->numberOfLongMetrics; + + table = hb_sanitize_context_t ().reference_table<hmtxvmtx> (face, T::tableTag); + + /* Cap num_metrics() and num_advances() based on table length. */ + unsigned int len = table.get_length (); + if (unlikely (num_advances * 4 > len)) + num_advances = len / 4; + num_metrics = num_advances + (len - 4 * num_advances) / 2; + + /* We MUST set num_metrics to zero if num_advances is zero. + * Our get_advance() depends on that. */ + if (unlikely (!num_advances)) + { + num_metrics = num_advances = 0; + table.destroy (); + table = hb_blob_get_empty (); + } + + var_table = hb_sanitize_context_t ().reference_table<HVARVVAR> (face, T::variationsTag); + } + + void fini () + { + table.destroy (); + var_table.destroy (); + } + + int get_side_bearing (hb_codepoint_t glyph) const + { + if (glyph < num_advances) + return table->longMetricZ[glyph].sb; + + if (unlikely (glyph >= num_metrics)) + return 0; + + const FWORD *bearings = (const FWORD *) &table->longMetricZ[num_advances]; + return bearings[glyph - num_advances]; + } + + int get_side_bearing (hb_font_t *font, hb_codepoint_t glyph) const + { + int side_bearing = get_side_bearing (glyph); + +#ifndef HB_NO_VAR + if (unlikely (glyph >= num_metrics) || !font->num_coords) + return side_bearing; + + if (var_table.get_length ()) + return side_bearing + var_table->get_side_bearing_var (glyph, font->coords, font->num_coords); // TODO Optimize?! + + return _glyf_get_side_bearing_var (font, glyph, T::tableTag == HB_OT_TAG_vmtx); +#else + return side_bearing; +#endif + } + + unsigned int get_advance (hb_codepoint_t glyph) const + { + if (unlikely (glyph >= num_metrics)) + { + /* If num_metrics is zero, it means we don't have the metrics table + * for this direction: return default advance. Otherwise, it means that the + * glyph index is out of bound: return zero. */ + if (num_metrics) + return 0; + else + return default_advance; + } + + return table->longMetricZ[hb_min (glyph, (uint32_t) num_advances - 1)].advance; + } + + unsigned int get_advance (hb_codepoint_t glyph, + hb_font_t *font) const + { + unsigned int advance = get_advance (glyph); + +#ifndef HB_NO_VAR + if (unlikely (glyph >= num_metrics) || !font->num_coords) + return advance; + + if (var_table.get_length ()) + return advance + roundf (var_table->get_advance_var (glyph, font)); // TODO Optimize?! + + return _glyf_get_advance_var (font, glyph, T::tableTag == HB_OT_TAG_vmtx); +#else + return advance; +#endif + } + + unsigned int num_advances_for_subset (const hb_subset_plan_t *plan) const + { + unsigned int num_advances = plan->num_output_glyphs (); + unsigned int last_advance = _advance_for_new_gid (plan, + num_advances - 1); + while (num_advances > 1 && + last_advance == _advance_for_new_gid (plan, + num_advances - 2)) + { + num_advances--; + } + + return num_advances; + } + + private: + unsigned int _advance_for_new_gid (const hb_subset_plan_t *plan, + hb_codepoint_t new_gid) const + { + hb_codepoint_t old_gid; + if (!plan->old_gid_for_new_gid (new_gid, &old_gid)) + return 0; + + return get_advance (old_gid); + } + + protected: + unsigned int num_metrics; + unsigned int num_advances; + unsigned int default_advance; + + private: + hb_blob_ptr_t<hmtxvmtx> table; + hb_blob_ptr_t<HVARVVAR> var_table; + }; + + protected: + UnsizedArrayOf<LongMetric> + longMetricZ; /* Paired advance width and leading + * bearing values for each glyph. The + * value numOfHMetrics comes from + * the 'hhea' table. If the font is + * monospaced, only one entry need + * be in the array, but that entry is + * required. The last entry applies to + * all subsequent glyphs. */ +/*UnsizedArrayOf<FWORD> leadingBearingX;*/ + /* Here the advance is assumed + * to be the same as the advance + * for the last entry above. The + * number of entries in this array is + * derived from numGlyphs (from 'maxp' + * table) minus numberOfLongMetrics. + * This generally is used with a run + * of monospaced glyphs (e.g., Kanji + * fonts or Courier fonts). Only one + * run is allowed and it must be at + * the end. This allows a monospaced + * font to vary the side bearing + * values for each glyph. */ + public: + DEFINE_SIZE_ARRAY (0, longMetricZ); +}; + +struct hmtx : hmtxvmtx<hmtx, hhea> { + static constexpr hb_tag_t tableTag = HB_OT_TAG_hmtx; + static constexpr hb_tag_t variationsTag = HB_OT_TAG_HVAR; + static constexpr bool is_horizontal = true; +}; +struct vmtx : hmtxvmtx<vmtx, vhea> { + static constexpr hb_tag_t tableTag = HB_OT_TAG_vmtx; + static constexpr hb_tag_t variationsTag = HB_OT_TAG_VVAR; + static constexpr bool is_horizontal = false; +}; + +struct hmtx_accelerator_t : hmtx::accelerator_t {}; +struct vmtx_accelerator_t : vmtx::accelerator_t {}; + +} /* namespace OT */ + + +#endif /* HB_OT_HMTX_TABLE_HH */ diff --git a/thirdparty/harfbuzz/src/hb-ot-kern-table.hh b/thirdparty/harfbuzz/src/hb-ot-kern-table.hh new file mode 100644 index 0000000000..3563cab8bd --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-kern-table.hh @@ -0,0 +1,359 @@ +/* + * Copyright © 2017 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_OT_KERN_TABLE_HH +#define HB_OT_KERN_TABLE_HH + +#include "hb-aat-layout-kerx-table.hh" + + +/* + * kern -- Kerning + * https://docs.microsoft.com/en-us/typography/opentype/spec/kern + * https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6kern.html + */ +#define HB_OT_TAG_kern HB_TAG('k','e','r','n') + + +namespace OT { + + +template <typename KernSubTableHeader> +struct KernSubTableFormat3 +{ + int get_kerning (hb_codepoint_t left, hb_codepoint_t right) const + { + hb_array_t<const FWORD> kernValue = kernValueZ.as_array (kernValueCount); + hb_array_t<const HBUINT8> leftClass = StructAfter<const UnsizedArrayOf<HBUINT8>> (kernValue).as_array (glyphCount); + hb_array_t<const HBUINT8> rightClass = StructAfter<const UnsizedArrayOf<HBUINT8>> (leftClass).as_array (glyphCount); + hb_array_t<const HBUINT8> kernIndex = StructAfter<const UnsizedArrayOf<HBUINT8>> (rightClass).as_array (leftClassCount * rightClassCount); + + unsigned int leftC = leftClass[left]; + unsigned int rightC = rightClass[right]; + if (unlikely (leftC >= leftClassCount || rightC >= rightClassCount)) + return 0; + unsigned int i = leftC * rightClassCount + rightC; + return kernValue[kernIndex[i]]; + } + + bool apply (AAT::hb_aat_apply_context_t *c) const + { + TRACE_APPLY (this); + + if (!c->plan->requested_kerning) + return false; + + if (header.coverage & header.Backwards) + return false; + + hb_kern_machine_t<KernSubTableFormat3> machine (*this, header.coverage & header.CrossStream); + machine.kern (c->font, c->buffer, c->plan->kern_mask); + + return_trace (true); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && + c->check_range (kernValueZ, + kernValueCount * sizeof (FWORD) + + glyphCount * 2 + + leftClassCount * rightClassCount)); + } + + protected: + KernSubTableHeader + header; + HBUINT16 glyphCount; /* The number of glyphs in this font. */ + HBUINT8 kernValueCount; /* The number of kerning values. */ + HBUINT8 leftClassCount; /* The number of left-hand classes. */ + HBUINT8 rightClassCount;/* The number of right-hand classes. */ + HBUINT8 flags; /* Set to zero (reserved for future use). */ + UnsizedArrayOf<FWORD> + kernValueZ; /* The kerning values. + * Length kernValueCount. */ +#if 0 + UnsizedArrayOf<HBUINT8> + leftClass; /* The left-hand classes. + * Length glyphCount. */ + UnsizedArrayOf<HBUINT8> + rightClass; /* The right-hand classes. + * Length glyphCount. */ + UnsizedArrayOf<HBUINT8>kernIndex; + /* The indices into the kernValue array. + * Length leftClassCount * rightClassCount */ +#endif + public: + DEFINE_SIZE_ARRAY (KernSubTableHeader::static_size + 6, kernValueZ); +}; + +template <typename KernSubTableHeader> +struct KernSubTable +{ + unsigned int get_size () const { return u.header.length; } + unsigned int get_type () const { return u.header.format; } + + int get_kerning (hb_codepoint_t left, hb_codepoint_t right) const + { + switch (get_type ()) { + /* This method hooks up to hb_font_t's get_h_kerning. Only support Format0. */ + case 0: return u.format0.get_kerning (left, right); + default:return 0; + } + } + + template <typename context_t, typename ...Ts> + typename context_t::return_t dispatch (context_t *c, Ts&&... ds) const + { + unsigned int subtable_type = get_type (); + TRACE_DISPATCH (this, subtable_type); + switch (subtable_type) { + case 0: return_trace (c->dispatch (u.format0)); +#ifndef HB_NO_AAT_SHAPE + case 1: return_trace (u.header.apple ? c->dispatch (u.format1, hb_forward<Ts> (ds)...) : c->default_return_value ()); +#endif + case 2: return_trace (c->dispatch (u.format2)); +#ifndef HB_NO_AAT_SHAPE + case 3: return_trace (u.header.apple ? c->dispatch (u.format3, hb_forward<Ts> (ds)...) : c->default_return_value ()); +#endif + default: return_trace (c->default_return_value ()); + } + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + if (unlikely (!u.header.sanitize (c) || + u.header.length < u.header.min_size || + !c->check_range (this, u.header.length))) return_trace (false); + + return_trace (dispatch (c)); + } + + public: + union { + KernSubTableHeader header; + AAT::KerxSubTableFormat0<KernSubTableHeader> format0; + AAT::KerxSubTableFormat1<KernSubTableHeader> format1; + AAT::KerxSubTableFormat2<KernSubTableHeader> format2; + KernSubTableFormat3<KernSubTableHeader> format3; + } u; + public: + DEFINE_SIZE_MIN (KernSubTableHeader::static_size); +}; + + +struct KernOTSubTableHeader +{ + static constexpr bool apple = false; + typedef AAT::ObsoleteTypes Types; + + unsigned tuple_count () const { return 0; } + bool is_horizontal () const { return (coverage & Horizontal); } + + enum Coverage + { + Horizontal = 0x01u, + Minimum = 0x02u, + CrossStream = 0x04u, + Override = 0x08u, + + /* Not supported: */ + Backwards = 0x00u, + Variation = 0x00u, + }; + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this)); + } + + public: + HBUINT16 versionZ; /* Unused. */ + HBUINT16 length; /* Length of the subtable (including this header). */ + HBUINT8 format; /* Subtable format. */ + HBUINT8 coverage; /* Coverage bits. */ + public: + DEFINE_SIZE_STATIC (6); +}; + +struct KernOT : AAT::KerxTable<KernOT> +{ + friend struct AAT::KerxTable<KernOT>; + + static constexpr hb_tag_t tableTag = HB_OT_TAG_kern; + static constexpr unsigned minVersion = 0u; + + typedef KernOTSubTableHeader SubTableHeader; + typedef SubTableHeader::Types Types; + typedef KernSubTable<SubTableHeader> SubTable; + + protected: + HBUINT16 version; /* Version--0x0000u */ + HBUINT16 tableCount; /* Number of subtables in the kerning table. */ + SubTable firstSubTable; /* Subtables. */ + public: + DEFINE_SIZE_MIN (4); +}; + + +struct KernAATSubTableHeader +{ + static constexpr bool apple = true; + typedef AAT::ObsoleteTypes Types; + + unsigned tuple_count () const { return 0; } + bool is_horizontal () const { return !(coverage & Vertical); } + + enum Coverage + { + Vertical = 0x80u, + CrossStream = 0x40u, + Variation = 0x20u, + + /* Not supported: */ + Backwards = 0x00u, + }; + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this)); + } + + public: + HBUINT32 length; /* Length of the subtable (including this header). */ + HBUINT8 coverage; /* Coverage bits. */ + HBUINT8 format; /* Subtable format. */ + HBUINT16 tupleIndex; /* The tuple index (used for variations fonts). + * This value specifies which tuple this subtable covers. + * Note: We don't implement. */ + public: + DEFINE_SIZE_STATIC (8); +}; + +struct KernAAT : AAT::KerxTable<KernAAT> +{ + friend struct AAT::KerxTable<KernAAT>; + + static constexpr hb_tag_t tableTag = HB_OT_TAG_kern; + static constexpr unsigned minVersion = 0x00010000u; + + typedef KernAATSubTableHeader SubTableHeader; + typedef SubTableHeader::Types Types; + typedef KernSubTable<SubTableHeader> SubTable; + + protected: + HBUINT32 version; /* Version--0x00010000u */ + HBUINT32 tableCount; /* Number of subtables in the kerning table. */ + SubTable firstSubTable; /* Subtables. */ + public: + DEFINE_SIZE_MIN (8); +}; + +struct kern +{ + static constexpr hb_tag_t tableTag = HB_OT_TAG_kern; + + bool has_data () const { return u.version32; } + unsigned get_type () const { return u.major; } + + bool has_state_machine () const + { + switch (get_type ()) { + case 0: return u.ot.has_state_machine (); +#ifndef HB_NO_AAT_SHAPE + case 1: return u.aat.has_state_machine (); +#endif + default:return false; + } + } + + bool has_cross_stream () const + { + switch (get_type ()) { + case 0: return u.ot.has_cross_stream (); +#ifndef HB_NO_AAT_SHAPE + case 1: return u.aat.has_cross_stream (); +#endif + default:return false; + } + } + + int get_h_kerning (hb_codepoint_t left, hb_codepoint_t right) const + { + switch (get_type ()) { + case 0: return u.ot.get_h_kerning (left, right); +#ifndef HB_NO_AAT_SHAPE + case 1: return u.aat.get_h_kerning (left, right); +#endif + default:return 0; + } + } + + bool apply (AAT::hb_aat_apply_context_t *c) const + { return dispatch (c); } + + template <typename context_t, typename ...Ts> + typename context_t::return_t dispatch (context_t *c, Ts&&... ds) const + { + unsigned int subtable_type = get_type (); + TRACE_DISPATCH (this, subtable_type); + switch (subtable_type) { + case 0: return_trace (c->dispatch (u.ot, hb_forward<Ts> (ds)...)); +#ifndef HB_NO_AAT_SHAPE + case 1: return_trace (c->dispatch (u.aat, hb_forward<Ts> (ds)...)); +#endif + default: return_trace (c->default_return_value ()); + } + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + if (!u.version32.sanitize (c)) return_trace (false); + return_trace (dispatch (c)); + } + + protected: + union { + HBUINT32 version32; + HBUINT16 major; + KernOT ot; +#ifndef HB_NO_AAT_SHAPE + KernAAT aat; +#endif + } u; + public: + DEFINE_SIZE_UNION (4, version32); +}; + +} /* namespace OT */ + + +#endif /* HB_OT_KERN_TABLE_HH */ diff --git a/thirdparty/harfbuzz/src/hb-ot-layout-base-table.hh b/thirdparty/harfbuzz/src/hb-ot-layout-base-table.hh new file mode 100644 index 0000000000..02fe14fa06 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-layout-base-table.hh @@ -0,0 +1,509 @@ +/* + * Copyright © 2016 Elie Roux <elie.roux@telecom-bretagne.eu> + * Copyright © 2018 Google, Inc. + * Copyright © 2018-2019 Ebrahim Byagowi + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_OT_LAYOUT_BASE_TABLE_HH +#define HB_OT_LAYOUT_BASE_TABLE_HH + +#include "hb-open-type.hh" +#include "hb-ot-layout-common.hh" + +namespace OT { + +/* + * BASE -- Baseline + * https://docs.microsoft.com/en-us/typography/opentype/spec/base + */ + +struct BaseCoordFormat1 +{ + hb_position_t get_coord () const { return coordinate; } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (likely (c->check_struct (this))); + } + + protected: + HBUINT16 format; /* Format identifier--format = 1 */ + FWORD coordinate; /* X or Y value, in design units */ + public: + DEFINE_SIZE_STATIC (4); +}; + +struct BaseCoordFormat2 +{ + hb_position_t get_coord () const + { + /* TODO */ + return coordinate; + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this)); + } + + protected: + HBUINT16 format; /* Format identifier--format = 2 */ + FWORD coordinate; /* X or Y value, in design units */ + HBGlyphID referenceGlyph; /* Glyph ID of control glyph */ + HBUINT16 coordPoint; /* Index of contour point on the + * reference glyph */ + public: + DEFINE_SIZE_STATIC (8); +}; + +struct BaseCoordFormat3 +{ + hb_position_t get_coord (hb_font_t *font, + const VariationStore &var_store, + hb_direction_t direction) const + { + const Device &device = this+deviceTable; + return coordinate + (HB_DIRECTION_IS_VERTICAL (direction) ? + device.get_y_delta (font, var_store) : + device.get_x_delta (font, var_store)); + } + + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (likely (c->check_struct (this) && + deviceTable.sanitize (c, this))); + } + + protected: + HBUINT16 format; /* Format identifier--format = 3 */ + FWORD coordinate; /* X or Y value, in design units */ + OffsetTo<Device> + deviceTable; /* Offset to Device table for X or + * Y value, from beginning of + * BaseCoord table (may be NULL). */ + public: + DEFINE_SIZE_STATIC (6); +}; + +struct BaseCoord +{ + bool has_data () const { return u.format; } + + hb_position_t get_coord (hb_font_t *font, + const VariationStore &var_store, + hb_direction_t direction) const + { + switch (u.format) { + case 1: return u.format1.get_coord (); + case 2: return u.format2.get_coord (); + case 3: return u.format3.get_coord (font, var_store, direction); + default:return 0; + } + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + if (unlikely (!u.format.sanitize (c))) return_trace (false); + switch (u.format) { + case 1: return_trace (u.format1.sanitize (c)); + case 2: return_trace (u.format2.sanitize (c)); + case 3: return_trace (u.format3.sanitize (c)); + default:return_trace (false); + } + } + + protected: + union { + HBUINT16 format; + BaseCoordFormat1 format1; + BaseCoordFormat2 format2; + BaseCoordFormat3 format3; + } u; + public: + DEFINE_SIZE_UNION (2, format); +}; + +struct FeatMinMaxRecord +{ + int cmp (hb_tag_t key) const { return tag.cmp (key); } + + bool has_data () const { return tag; } + + void get_min_max (const BaseCoord **min, const BaseCoord **max) const + { + if (likely (min)) *min = &(this+minCoord); + if (likely (max)) *max = &(this+maxCoord); + } + + bool sanitize (hb_sanitize_context_t *c, const void *base) const + { + TRACE_SANITIZE (this); + return_trace (likely (c->check_struct (this) && + minCoord.sanitize (c, this) && + maxCoord.sanitize (c, this))); + } + + protected: + Tag tag; /* 4-byte feature identification tag--must + * match feature tag in FeatureList */ + OffsetTo<BaseCoord> + minCoord; /* Offset to BaseCoord table that defines + * the minimum extent value, from beginning + * of MinMax table (may be NULL) */ + OffsetTo<BaseCoord> + maxCoord; /* Offset to BaseCoord table that defines + * the maximum extent value, from beginning + * of MinMax table (may be NULL) */ + public: + DEFINE_SIZE_STATIC (8); + +}; + +struct MinMax +{ + void get_min_max (hb_tag_t feature_tag, + const BaseCoord **min, + const BaseCoord **max) const + { + const FeatMinMaxRecord &minMaxCoord = featMinMaxRecords.bsearch (feature_tag); + if (minMaxCoord.has_data ()) + minMaxCoord.get_min_max (min, max); + else + { + if (likely (min)) *min = &(this+minCoord); + if (likely (max)) *max = &(this+maxCoord); + } + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (likely (c->check_struct (this) && + minCoord.sanitize (c, this) && + maxCoord.sanitize (c, this) && + featMinMaxRecords.sanitize (c, this))); + } + + protected: + OffsetTo<BaseCoord> + minCoord; /* Offset to BaseCoord table that defines + * minimum extent value, from the beginning + * of MinMax table (may be NULL) */ + OffsetTo<BaseCoord> + maxCoord; /* Offset to BaseCoord table that defines + * maximum extent value, from the beginning + * of MinMax table (may be NULL) */ + SortedArrayOf<FeatMinMaxRecord> + featMinMaxRecords; + /* Array of FeatMinMaxRecords, in alphabetical + * order by featureTableTag */ + public: + DEFINE_SIZE_ARRAY (6, featMinMaxRecords); +}; + +struct BaseValues +{ + const BaseCoord &get_base_coord (int baseline_tag_index) const + { + if (baseline_tag_index == -1) baseline_tag_index = defaultIndex; + return this+baseCoords[baseline_tag_index]; + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (likely (c->check_struct (this) && + baseCoords.sanitize (c, this))); + } + + protected: + Index defaultIndex; /* Index number of default baseline for this + * script — equals index position of baseline tag + * in baselineTags array of the BaseTagList */ + OffsetArrayOf<BaseCoord> + baseCoords; /* Number of BaseCoord tables defined — should equal + * baseTagCount in the BaseTagList + * + * Array of offsets to BaseCoord tables, from beginning of + * BaseValues table — order matches baselineTags array in + * the BaseTagList */ + public: + DEFINE_SIZE_ARRAY (4, baseCoords); +}; + +struct BaseLangSysRecord +{ + int cmp (hb_tag_t key) const { return baseLangSysTag.cmp (key); } + + bool has_data () const { return baseLangSysTag; } + + const MinMax &get_min_max () const { return this+minMax; } + + bool sanitize (hb_sanitize_context_t *c, const void *base) const + { + TRACE_SANITIZE (this); + return_trace (likely (c->check_struct (this) && + minMax.sanitize (c, this))); + } + + protected: + Tag baseLangSysTag; /* 4-byte language system identification tag */ + OffsetTo<MinMax> + minMax; /* Offset to MinMax table, from beginning + * of BaseScript table */ + public: + DEFINE_SIZE_STATIC (6); +}; + +struct BaseScript +{ + const MinMax &get_min_max (hb_tag_t language_tag) const + { + const BaseLangSysRecord& record = baseLangSysRecords.bsearch (language_tag); + return record.has_data () ? record.get_min_max () : this+defaultMinMax; + } + + const BaseCoord &get_base_coord (int baseline_tag_index) const + { return (this+baseValues).get_base_coord (baseline_tag_index); } + + bool has_data () const { return baseValues; } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (likely (c->check_struct (this) && + baseValues.sanitize (c, this) && + defaultMinMax.sanitize (c, this) && + baseLangSysRecords.sanitize (c, this))); + } + + protected: + OffsetTo<BaseValues> + baseValues; /* Offset to BaseValues table, from beginning + * of BaseScript table (may be NULL) */ + OffsetTo<MinMax> + defaultMinMax; /* Offset to MinMax table, from beginning of + * BaseScript table (may be NULL) */ + SortedArrayOf<BaseLangSysRecord> + baseLangSysRecords; + /* Number of BaseLangSysRecords + * defined — may be zero (0) */ + + public: + DEFINE_SIZE_ARRAY (6, baseLangSysRecords); +}; + +struct BaseScriptList; +struct BaseScriptRecord +{ + int cmp (hb_tag_t key) const { return baseScriptTag.cmp (key); } + + bool has_data () const { return baseScriptTag; } + + const BaseScript &get_base_script (const BaseScriptList *list) const + { return list+baseScript; } + + bool sanitize (hb_sanitize_context_t *c, const void *base) const + { + TRACE_SANITIZE (this); + return_trace (likely (c->check_struct (this) && + baseScript.sanitize (c, base))); + } + + protected: + Tag baseScriptTag; /* 4-byte script identification tag */ + OffsetTo<BaseScript> + baseScript; /* Offset to BaseScript table, from beginning + * of BaseScriptList */ + + public: + DEFINE_SIZE_STATIC (6); +}; + +struct BaseScriptList +{ + const BaseScript &get_base_script (hb_tag_t script) const + { + const BaseScriptRecord *record = &baseScriptRecords.bsearch (script); + if (!record->has_data ()) record = &baseScriptRecords.bsearch (HB_TAG ('D','F','L','T')); + return record->has_data () ? record->get_base_script (this) : Null (BaseScript); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && + baseScriptRecords.sanitize (c, this)); + } + + protected: + SortedArrayOf<BaseScriptRecord> + baseScriptRecords; + + public: + DEFINE_SIZE_ARRAY (2, baseScriptRecords); +}; + +struct Axis +{ + bool get_baseline (hb_tag_t baseline_tag, + hb_tag_t script_tag, + hb_tag_t language_tag, + const BaseCoord **coord) const + { + const BaseScript &base_script = (this+baseScriptList).get_base_script (script_tag); + if (!base_script.has_data ()) return false; + + if (likely (coord)) + { + unsigned int tag_index = 0; + (this+baseTagList).bfind (baseline_tag, &tag_index); + *coord = &base_script.get_base_coord (tag_index); + } + + return true; + } + + bool get_min_max (hb_tag_t script_tag, + hb_tag_t language_tag, + hb_tag_t feature_tag, + const BaseCoord **min_coord, + const BaseCoord **max_coord) const + { + const BaseScript &base_script = (this+baseScriptList).get_base_script (script_tag); + if (!base_script.has_data ()) return false; + + base_script.get_min_max (language_tag).get_min_max (feature_tag, min_coord, max_coord); + + return true; + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (likely (c->check_struct (this) && + (this+baseTagList).sanitize (c) && + (this+baseScriptList).sanitize (c))); + } + + protected: + OffsetTo<SortedArrayOf<Tag>> + baseTagList; /* Offset to BaseTagList table, from beginning + * of Axis table (may be NULL) + * Array of 4-byte baseline identification tags — must + * be in alphabetical order */ + OffsetTo<BaseScriptList> + baseScriptList; /* Offset to BaseScriptList table, from beginning + * of Axis table + * Array of BaseScriptRecords, in alphabetical order + * by baseScriptTag */ + + public: + DEFINE_SIZE_STATIC (4); +}; + +struct BASE +{ + static constexpr hb_tag_t tableTag = HB_OT_TAG_BASE; + + const Axis &get_axis (hb_direction_t direction) const + { return HB_DIRECTION_IS_VERTICAL (direction) ? this+vAxis : this+hAxis; } + + const VariationStore &get_var_store () const + { return version.to_int () < 0x00010001u ? Null (VariationStore) : this+varStore; } + + bool get_baseline (hb_font_t *font, + hb_tag_t baseline_tag, + hb_direction_t direction, + hb_tag_t script_tag, + hb_tag_t language_tag, + hb_position_t *base) const + { + const BaseCoord *base_coord = nullptr; + if (unlikely (!get_axis (direction).get_baseline (baseline_tag, script_tag, language_tag, &base_coord) || + !base_coord || !base_coord->has_data ())) + return false; + + if (likely (base)) + *base = base_coord->get_coord (font, get_var_store (), direction); + + return true; + } + + /* TODO: Expose this separately sometime? */ + bool get_min_max (hb_font_t *font, + hb_direction_t direction, + hb_tag_t script_tag, + hb_tag_t language_tag, + hb_tag_t feature_tag, + hb_position_t *min, + hb_position_t *max) + { + const BaseCoord *min_coord, *max_coord; + if (!get_axis (direction).get_min_max (script_tag, language_tag, feature_tag, + &min_coord, &max_coord)) + return false; + + const VariationStore &var_store = get_var_store (); + if (likely (min && min_coord)) *min = min_coord->get_coord (font, var_store, direction); + if (likely (max && max_coord)) *max = max_coord->get_coord (font, var_store, direction); + return true; + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (likely (c->check_struct (this) && + likely (version.major == 1) && + hAxis.sanitize (c, this) && + vAxis.sanitize (c, this) && + (version.to_int () < 0x00010001u || varStore.sanitize (c, this)))); + } + + protected: + FixedVersion<>version; /* Version of the BASE table */ + OffsetTo<Axis>hAxis; /* Offset to horizontal Axis table, from beginning + * of BASE table (may be NULL) */ + OffsetTo<Axis>vAxis; /* Offset to vertical Axis table, from beginning + * of BASE table (may be NULL) */ + LOffsetTo<VariationStore> + varStore; /* Offset to the table of Item Variation + * Store--from beginning of BASE + * header (may be NULL). Introduced + * in version 0x00010001. */ + public: + DEFINE_SIZE_MIN (8); +}; + + +} /* namespace OT */ + + +#endif /* HB_OT_LAYOUT_BASE_TABLE_HH */ diff --git a/thirdparty/harfbuzz/src/hb-ot-layout-common.hh b/thirdparty/harfbuzz/src/hb-ot-layout-common.hh new file mode 100644 index 0000000000..3140dd6328 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-layout-common.hh @@ -0,0 +1,3178 @@ +/* + * Copyright © 2007,2008,2009 Red Hat, Inc. + * Copyright © 2010,2012 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Red Hat Author(s): Behdad Esfahbod + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_OT_LAYOUT_COMMON_HH +#define HB_OT_LAYOUT_COMMON_HH + +#include "hb.hh" +#include "hb-ot-layout.hh" +#include "hb-open-type.hh" +#include "hb-set.hh" +#include "hb-bimap.hh" + + +#ifndef HB_MAX_NESTING_LEVEL +#define HB_MAX_NESTING_LEVEL 6 +#endif +#ifndef HB_MAX_CONTEXT_LENGTH +#define HB_MAX_CONTEXT_LENGTH 64 +#endif +#ifndef HB_CLOSURE_MAX_STAGES +/* + * The maximum number of times a lookup can be applied during shaping. + * Used to limit the number of iterations of the closure algorithm. + * This must be larger than the number of times add_pause() is + * called in a collect_features call of any shaper. + */ +#define HB_CLOSURE_MAX_STAGES 32 +#endif + +#ifndef HB_MAX_SCRIPTS +#define HB_MAX_SCRIPTS 500 +#endif + +#ifndef HB_MAX_LANGSYS +#define HB_MAX_LANGSYS 2000 +#endif + +#ifndef HB_MAX_FEATURES +#define HB_MAX_FEATURES 750 +#endif + +#ifndef HB_MAX_FEATURE_INDICES +#define HB_MAX_FEATURE_INDICES 1500 +#endif + +#ifndef HB_MAX_LOOKUP_INDICES +#define HB_MAX_LOOKUP_INDICES 20000 +#endif + + +namespace OT { + + +#define NOT_COVERED ((unsigned int) -1) + + +template<typename Iterator> +static inline void Coverage_serialize (hb_serialize_context_t *c, + Iterator it); + +template<typename Iterator> +static inline void ClassDef_serialize (hb_serialize_context_t *c, + Iterator it); + +static void ClassDef_remap_and_serialize (hb_serialize_context_t *c, + const hb_set_t &glyphset, + const hb_map_t &gid_klass_map, + hb_sorted_vector_t<HBGlyphID> &glyphs, + const hb_set_t &klasses, + hb_map_t *klass_map /*INOUT*/); + +struct hb_subset_layout_context_t : + hb_dispatch_context_t<hb_subset_layout_context_t, hb_empty_t, HB_DEBUG_SUBSET> +{ + const char *get_name () { return "SUBSET_LAYOUT"; } + static return_t default_return_value () { return hb_empty_t (); } + + bool visitScript () + { + return script_count++ < HB_MAX_SCRIPTS; + } + + bool visitLangSys () + { + return langsys_count++ < HB_MAX_LANGSYS; + } + + bool visitFeatureIndex (int count) + { + feature_index_count += count; + return feature_index_count < HB_MAX_FEATURE_INDICES; + } + + bool visitLookupIndex() + { + lookup_index_count++; + return lookup_index_count < HB_MAX_LOOKUP_INDICES; + } + + hb_subset_context_t *subset_context; + const hb_tag_t table_tag; + const hb_map_t *lookup_index_map; + const hb_map_t *feature_index_map; + + hb_subset_layout_context_t (hb_subset_context_t *c_, + hb_tag_t tag_, + hb_map_t *lookup_map_, + hb_map_t *feature_map_) : + subset_context (c_), + table_tag (tag_), + lookup_index_map (lookup_map_), + feature_index_map (feature_map_), + script_count (0), + langsys_count (0), + feature_index_count (0), + lookup_index_count (0) + {} + + private: + unsigned script_count; + unsigned langsys_count; + unsigned feature_index_count; + unsigned lookup_index_count; +}; + +struct hb_collect_variation_indices_context_t : + hb_dispatch_context_t<hb_collect_variation_indices_context_t> +{ + template <typename T> + return_t dispatch (const T &obj) { obj.collect_variation_indices (this); return hb_empty_t (); } + static return_t default_return_value () { return hb_empty_t (); } + + hb_set_t *layout_variation_indices; + const hb_set_t *glyph_set; + const hb_map_t *gpos_lookups; + + hb_collect_variation_indices_context_t (hb_set_t *layout_variation_indices_, + const hb_set_t *glyph_set_, + const hb_map_t *gpos_lookups_) : + layout_variation_indices (layout_variation_indices_), + glyph_set (glyph_set_), + gpos_lookups (gpos_lookups_) {} +}; + +template<typename OutputArray> +struct subset_offset_array_t +{ + subset_offset_array_t (hb_subset_context_t *subset_context_, + OutputArray& out_, + const void *base_) : subset_context (subset_context_), + out (out_), base (base_) {} + + template <typename T> + bool operator () (T&& offset) + { + auto *o = out.serialize_append (subset_context->serializer); + if (unlikely (!o)) return false; + auto snap = subset_context->serializer->snapshot (); + bool ret = o->serialize_subset (subset_context, offset, base); + if (!ret) + { + out.pop (); + subset_context->serializer->revert (snap); + } + return ret; + } + + private: + hb_subset_context_t *subset_context; + OutputArray &out; + const void *base; +}; + + +template<typename OutputArray, typename Arg> +struct subset_offset_array_arg_t +{ + subset_offset_array_arg_t (hb_subset_context_t *subset_context_, + OutputArray& out_, + const void *base_, + Arg &&arg_) : subset_context (subset_context_), out (out_), + base (base_), arg (arg_) {} + + template <typename T> + bool operator () (T&& offset) + { + auto *o = out.serialize_append (subset_context->serializer); + if (unlikely (!o)) return false; + auto snap = subset_context->serializer->snapshot (); + bool ret = o->serialize_subset (subset_context, offset, base, arg); + if (!ret) + { + out.pop (); + subset_context->serializer->revert (snap); + } + return ret; + } + + private: + hb_subset_context_t *subset_context; + OutputArray &out; + const void *base; + Arg &&arg; +}; + +/* + * Helper to subset an array of offsets. Subsets the thing pointed to by each offset + * and discards the offset in the array if the subset operation results in an empty + * thing. + */ +struct +{ + template<typename OutputArray> + subset_offset_array_t<OutputArray> + operator () (hb_subset_context_t *subset_context, OutputArray& out, + const void *base) const + { return subset_offset_array_t<OutputArray> (subset_context, out, base); } + + /* Variant with one extra argument passed to serialize_subset */ + template<typename OutputArray, typename Arg> + subset_offset_array_arg_t<OutputArray, Arg> + operator () (hb_subset_context_t *subset_context, OutputArray& out, + const void *base, Arg &&arg) const + { return subset_offset_array_arg_t<OutputArray, Arg> (subset_context, out, base, arg); } +} +HB_FUNCOBJ (subset_offset_array); + +template<typename OutputArray> +struct subset_record_array_t +{ + subset_record_array_t (hb_subset_layout_context_t *c_, OutputArray* out_, + const void *base_) : subset_layout_context (c_), + out (out_), base (base_) {} + + template <typename T> + void + operator () (T&& record) + { + auto snap = subset_layout_context->subset_context->serializer->snapshot (); + bool ret = record.subset (subset_layout_context, base); + if (!ret) subset_layout_context->subset_context->serializer->revert (snap); + else out->len++; + } + + private: + hb_subset_layout_context_t *subset_layout_context; + OutputArray *out; + const void *base; +}; + +/* + * Helper to subset a RecordList/record array. Subsets each Record in the array and + * discards the record if the subset operation returns false. + */ +struct +{ + template<typename OutputArray> + subset_record_array_t<OutputArray> + operator () (hb_subset_layout_context_t *c, OutputArray* out, + const void *base) const + { return subset_record_array_t<OutputArray> (c, out, base); } +} +HB_FUNCOBJ (subset_record_array); + +/* + * + * OpenType Layout Common Table Formats + * + */ + + +/* + * Script, ScriptList, LangSys, Feature, FeatureList, Lookup, LookupList + */ + +struct Record_sanitize_closure_t { + hb_tag_t tag; + const void *list_base; +}; + +template <typename Type> +struct Record +{ + int cmp (hb_tag_t a) const { return tag.cmp (a); } + + bool subset (hb_subset_layout_context_t *c, const void *base) const + { + TRACE_SUBSET (this); + auto *out = c->subset_context->serializer->embed (this); + if (unlikely (!out)) return_trace (false); + bool ret = out->offset.serialize_subset (c->subset_context, offset, base, c, &tag); + return_trace (ret); + } + + bool sanitize (hb_sanitize_context_t *c, const void *base) const + { + TRACE_SANITIZE (this); + const Record_sanitize_closure_t closure = {tag, base}; + return_trace (c->check_struct (this) && offset.sanitize (c, base, &closure)); + } + + Tag tag; /* 4-byte Tag identifier */ + OffsetTo<Type> + offset; /* Offset from beginning of object holding + * the Record */ + public: + DEFINE_SIZE_STATIC (6); +}; + +template <typename Type> +struct RecordArrayOf : SortedArrayOf<Record<Type>> +{ + const OffsetTo<Type>& get_offset (unsigned int i) const + { return (*this)[i].offset; } + OffsetTo<Type>& get_offset (unsigned int i) + { return (*this)[i].offset; } + const Tag& get_tag (unsigned int i) const + { return (*this)[i].tag; } + unsigned int get_tags (unsigned int start_offset, + unsigned int *record_count /* IN/OUT */, + hb_tag_t *record_tags /* OUT */) const + { + if (record_count) + { + + this->sub_array (start_offset, record_count) + | hb_map (&Record<Type>::tag) + | hb_sink (hb_array (record_tags, *record_count)) + ; + } + return this->len; + } + bool find_index (hb_tag_t tag, unsigned int *index) const + { + return this->bfind (tag, index, HB_BFIND_NOT_FOUND_STORE, Index::NOT_FOUND_INDEX); + } +}; + +template <typename Type> +struct RecordListOf : RecordArrayOf<Type> +{ + const Type& operator [] (unsigned int i) const + { return this+this->get_offset (i); } + + bool subset (hb_subset_context_t *c, + hb_subset_layout_context_t *l) const + { + TRACE_SUBSET (this); + auto *out = c->serializer->start_embed (*this); + if (unlikely (!c->serializer->extend_min (out))) return_trace (false); + + + this->iter () + | hb_apply (subset_record_array (l, out, this)) + ; + return_trace (true); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (RecordArrayOf<Type>::sanitize (c, this)); + } +}; + +struct Feature; + +struct RecordListOfFeature : RecordListOf<Feature> +{ + bool subset (hb_subset_context_t *c, + hb_subset_layout_context_t *l) const + { + TRACE_SUBSET (this); + auto *out = c->serializer->start_embed (*this); + if (unlikely (!out || !c->serializer->extend_min (out))) return_trace (false); + + unsigned count = this->len; + + hb_zip (*this, hb_range (count)) + | hb_filter (l->feature_index_map, hb_second) + | hb_map (hb_first) + | hb_apply (subset_record_array (l, out, this)) + ; + return_trace (true); + } +}; + +struct RangeRecord +{ + int cmp (hb_codepoint_t g) const + { return g < first ? -1 : g <= last ? 0 : +1; } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this)); + } + + bool intersects (const hb_set_t *glyphs) const + { return glyphs->intersects (first, last); } + + template <typename set_t> + bool collect_coverage (set_t *glyphs) const + { return glyphs->add_range (first, last); } + + HBGlyphID first; /* First GlyphID in the range */ + HBGlyphID last; /* Last GlyphID in the range */ + HBUINT16 value; /* Value */ + public: + DEFINE_SIZE_STATIC (6); +}; +DECLARE_NULL_NAMESPACE_BYTES (OT, RangeRecord); + + +struct IndexArray : ArrayOf<Index> +{ + bool intersects (const hb_map_t *indexes) const + { return hb_any (*this, indexes); } + + template <typename Iterator, + hb_requires (hb_is_iterator (Iterator))> + void serialize (hb_serialize_context_t *c, + hb_subset_layout_context_t *l, + Iterator it) + { + if (!it) return; + if (unlikely (!c->extend_min ((*this)))) return; + + for (const auto _ : it) + { + if (!l->visitLookupIndex()) break; + + Index i; + i = _; + c->copy (i); + this->len++; + } + } + + unsigned int get_indexes (unsigned int start_offset, + unsigned int *_count /* IN/OUT */, + unsigned int *_indexes /* OUT */) const + { + if (_count) + { + + this->sub_array (start_offset, _count) + | hb_sink (hb_array (_indexes, *_count)) + ; + } + return this->len; + } + + void add_indexes_to (hb_set_t* output /* OUT */) const + { + output->add_array (arrayZ, len); + } +}; + + +struct LangSys +{ + unsigned int get_feature_count () const + { return featureIndex.len; } + hb_tag_t get_feature_index (unsigned int i) const + { return featureIndex[i]; } + unsigned int get_feature_indexes (unsigned int start_offset, + unsigned int *feature_count /* IN/OUT */, + unsigned int *feature_indexes /* OUT */) const + { return featureIndex.get_indexes (start_offset, feature_count, feature_indexes); } + void add_feature_indexes_to (hb_set_t *feature_indexes) const + { featureIndex.add_indexes_to (feature_indexes); } + + bool has_required_feature () const { return reqFeatureIndex != 0xFFFFu; } + unsigned int get_required_feature_index () const + { + if (reqFeatureIndex == 0xFFFFu) + return Index::NOT_FOUND_INDEX; + return reqFeatureIndex; + } + + LangSys* copy (hb_serialize_context_t *c) const + { + TRACE_SERIALIZE (this); + return_trace (c->embed (*this)); + } + + bool operator == (const LangSys& o) const + { + if (featureIndex.len != o.featureIndex.len || + reqFeatureIndex != o.reqFeatureIndex) + return false; + + for (const auto _ : + hb_zip (featureIndex, o.featureIndex)) + if (_.first != _.second) return false; + + return true; + } + + bool subset (hb_subset_context_t *c, + hb_subset_layout_context_t *l, + const Tag *tag = nullptr) const + { + TRACE_SUBSET (this); + auto *out = c->serializer->start_embed (*this); + if (unlikely (!out || !c->serializer->extend_min (out))) return_trace (false); + + out->reqFeatureIndex = l->feature_index_map->has (reqFeatureIndex) ? l->feature_index_map->get (reqFeatureIndex) : 0xFFFFu; + + if (!l->visitFeatureIndex (featureIndex.len)) + return_trace (false); + + auto it = + + hb_iter (featureIndex) + | hb_filter (l->feature_index_map) + | hb_map (l->feature_index_map) + ; + + bool ret = bool (it); + out->featureIndex.serialize (c->serializer, l, it); + return_trace (ret); + } + + bool sanitize (hb_sanitize_context_t *c, + const Record_sanitize_closure_t * = nullptr) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && featureIndex.sanitize (c)); + } + + Offset16 lookupOrderZ; /* = Null (reserved for an offset to a + * reordering table) */ + HBUINT16 reqFeatureIndex;/* Index of a feature required for this + * language system--if no required features + * = 0xFFFFu */ + IndexArray featureIndex; /* Array of indices into the FeatureList */ + public: + DEFINE_SIZE_ARRAY_SIZED (6, featureIndex); +}; +DECLARE_NULL_NAMESPACE_BYTES (OT, LangSys); + +struct Script +{ + unsigned int get_lang_sys_count () const + { return langSys.len; } + const Tag& get_lang_sys_tag (unsigned int i) const + { return langSys.get_tag (i); } + unsigned int get_lang_sys_tags (unsigned int start_offset, + unsigned int *lang_sys_count /* IN/OUT */, + hb_tag_t *lang_sys_tags /* OUT */) const + { return langSys.get_tags (start_offset, lang_sys_count, lang_sys_tags); } + const LangSys& get_lang_sys (unsigned int i) const + { + if (i == Index::NOT_FOUND_INDEX) return get_default_lang_sys (); + return this+langSys[i].offset; + } + bool find_lang_sys_index (hb_tag_t tag, unsigned int *index) const + { return langSys.find_index (tag, index); } + + bool has_default_lang_sys () const { return defaultLangSys != 0; } + const LangSys& get_default_lang_sys () const { return this+defaultLangSys; } + + bool subset (hb_subset_context_t *c, + hb_subset_layout_context_t *l, + const Tag *tag) const + { + TRACE_SUBSET (this); + if (!l->visitScript ()) return_trace (false); + + auto *out = c->serializer->start_embed (*this); + if (unlikely (!out || !c->serializer->extend_min (out))) return_trace (false); + + bool defaultLang = false; + if (has_default_lang_sys ()) + { + c->serializer->push (); + const LangSys& ls = this+defaultLangSys; + bool ret = ls.subset (c, l); + if (!ret && tag && *tag != HB_TAG ('D', 'F', 'L', 'T')) + { + c->serializer->pop_discard (); + out->defaultLangSys = 0; + } + else + { + c->serializer->add_link (out->defaultLangSys, c->serializer->pop_pack ()); + defaultLang = true; + } + } + + + langSys.iter () + | hb_filter ([=] (const Record<LangSys>& record) {return l->visitLangSys (); }) + | hb_filter ([&] (const Record<LangSys>& record) + { + const LangSys& d = this+defaultLangSys; + const LangSys& l = this+record.offset; + return !(l == d); + }) + | hb_apply (subset_record_array (l, &(out->langSys), this)) + ; + + return_trace (bool (out->langSys.len) || defaultLang || l->table_tag == HB_OT_TAG_GSUB); + } + + bool sanitize (hb_sanitize_context_t *c, + const Record_sanitize_closure_t * = nullptr) const + { + TRACE_SANITIZE (this); + return_trace (defaultLangSys.sanitize (c, this) && langSys.sanitize (c, this)); + } + + protected: + OffsetTo<LangSys> + defaultLangSys; /* Offset to DefaultLangSys table--from + * beginning of Script table--may be Null */ + RecordArrayOf<LangSys> + langSys; /* Array of LangSysRecords--listed + * alphabetically by LangSysTag */ + public: + DEFINE_SIZE_ARRAY_SIZED (4, langSys); +}; + +typedef RecordListOf<Script> ScriptList; + + +/* https://docs.microsoft.com/en-us/typography/opentype/spec/features_pt#size */ +struct FeatureParamsSize +{ + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + if (unlikely (!c->check_struct (this))) return_trace (false); + + /* This subtable has some "history", if you will. Some earlier versions of + * Adobe tools calculated the offset of the FeatureParams sutable from the + * beginning of the FeatureList table! Now, that is dealt with in the + * Feature implementation. But we still need to be able to tell junk from + * real data. Note: We don't check that the nameID actually exists. + * + * Read Roberts wrote on 9/15/06 on opentype-list@indx.co.uk : + * + * Yes, it is correct that a new version of the AFDKO (version 2.0) will be + * coming out soon, and that the makeotf program will build a font with a + * 'size' feature that is correct by the specification. + * + * The specification for this feature tag is in the "OpenType Layout Tag + * Registry". You can see a copy of this at: + * https://docs.microsoft.com/en-us/typography/opentype/spec/features_pt#tag-size + * + * Here is one set of rules to determine if the 'size' feature is built + * correctly, or as by the older versions of MakeOTF. You may be able to do + * better. + * + * Assume that the offset to the size feature is according to specification, + * and make the following value checks. If it fails, assume the size + * feature is calculated as versions of MakeOTF before the AFDKO 2.0 built it. + * If this fails, reject the 'size' feature. The older makeOTF's calculated the + * offset from the beginning of the FeatureList table, rather than from the + * beginning of the 'size' Feature table. + * + * If "design size" == 0: + * fails check + * + * Else if ("subfamily identifier" == 0 and + * "range start" == 0 and + * "range end" == 0 and + * "range start" == 0 and + * "menu name ID" == 0) + * passes check: this is the format used when there is a design size + * specified, but there is no recommended size range. + * + * Else if ("design size" < "range start" or + * "design size" > "range end" or + * "range end" <= "range start" or + * "menu name ID" < 256 or + * "menu name ID" > 32767 or + * menu name ID is not a name ID which is actually in the name table) + * fails test + * Else + * passes test. + */ + + if (!designSize) + return_trace (false); + else if (subfamilyID == 0 && + subfamilyNameID == 0 && + rangeStart == 0 && + rangeEnd == 0) + return_trace (true); + else if (designSize < rangeStart || + designSize > rangeEnd || + subfamilyNameID < 256 || + subfamilyNameID > 32767) + return_trace (false); + else + return_trace (true); + } + + bool subset (hb_subset_context_t *c) const + { + TRACE_SUBSET (this); + return_trace ((bool) c->serializer->embed (*this)); + } + + HBUINT16 designSize; /* Represents the design size in 720/inch + * units (decipoints). The design size entry + * must be non-zero. When there is a design + * size but no recommended size range, the + * rest of the array will consist of zeros. */ + HBUINT16 subfamilyID; /* Has no independent meaning, but serves + * as an identifier that associates fonts + * in a subfamily. All fonts which share a + * Preferred or Font Family name and which + * differ only by size range shall have the + * same subfamily value, and no fonts which + * differ in weight or style shall have the + * same subfamily value. If this value is + * zero, the remaining fields in the array + * will be ignored. */ + NameID subfamilyNameID;/* If the preceding value is non-zero, this + * value must be set in the range 256 - 32767 + * (inclusive). It records the value of a + * field in the name table, which must + * contain English-language strings encoded + * in Windows Unicode and Macintosh Roman, + * and may contain additional strings + * localized to other scripts and languages. + * Each of these strings is the name an + * application should use, in combination + * with the family name, to represent the + * subfamily in a menu. Applications will + * choose the appropriate version based on + * their selection criteria. */ + HBUINT16 rangeStart; /* Large end of the recommended usage range + * (inclusive), stored in 720/inch units + * (decipoints). */ + HBUINT16 rangeEnd; /* Small end of the recommended usage range + (exclusive), stored in 720/inch units + * (decipoints). */ + public: + DEFINE_SIZE_STATIC (10); +}; + +/* https://docs.microsoft.com/en-us/typography/opentype/spec/features_pt#ssxx */ +struct FeatureParamsStylisticSet +{ + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + /* Right now minorVersion is at zero. Which means, any table supports + * the uiNameID field. */ + return_trace (c->check_struct (this)); + } + + bool subset (hb_subset_context_t *c) const + { + TRACE_SUBSET (this); + return_trace ((bool) c->serializer->embed (*this)); + } + + HBUINT16 version; /* (set to 0): This corresponds to a “minor” + * version number. Additional data may be + * added to the end of this Feature Parameters + * table in the future. */ + + NameID uiNameID; /* The 'name' table name ID that specifies a + * string (or strings, for multiple languages) + * for a user-interface label for this + * feature. The values of uiLabelNameId and + * sampleTextNameId are expected to be in the + * font-specific name ID range (256-32767), + * though that is not a requirement in this + * Feature Parameters specification. The + * user-interface label for the feature can + * be provided in multiple languages. An + * English string should be included as a + * fallback. The string should be kept to a + * minimal length to fit comfortably with + * different application interfaces. */ + public: + DEFINE_SIZE_STATIC (4); +}; + +/* https://docs.microsoft.com/en-us/typography/opentype/spec/features_ae#cv01-cv99 */ +struct FeatureParamsCharacterVariants +{ + unsigned + get_characters (unsigned start_offset, unsigned *char_count, hb_codepoint_t *chars) const + { + if (char_count) + { + + characters.sub_array (start_offset, char_count) + | hb_sink (hb_array (chars, *char_count)) + ; + } + return characters.len; + } + + unsigned get_size () const + { return min_size + characters.len * HBUINT24::static_size; } + + bool subset (hb_subset_context_t *c) const + { + TRACE_SUBSET (this); + return_trace ((bool) c->serializer->embed (*this)); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && + characters.sanitize (c)); + } + + HBUINT16 format; /* Format number is set to 0. */ + NameID featUILableNameID; /* The ‘name’ table name ID that + * specifies a string (or strings, + * for multiple languages) for a + * user-interface label for this + * feature. (May be NULL.) */ + NameID featUITooltipTextNameID;/* The ‘name’ table name ID that + * specifies a string (or strings, + * for multiple languages) that an + * application can use for tooltip + * text for this feature. (May be + * nullptr.) */ + NameID sampleTextNameID; /* The ‘name’ table name ID that + * specifies sample text that + * illustrates the effect of this + * feature. (May be NULL.) */ + HBUINT16 numNamedParameters; /* Number of named parameters. (May + * be zero.) */ + NameID firstParamUILabelNameID;/* The first ‘name’ table name ID + * used to specify strings for + * user-interface labels for the + * feature parameters. (Must be zero + * if numParameters is zero.) */ + ArrayOf<HBUINT24> + characters; /* Array of the Unicode Scalar Value + * of the characters for which this + * feature provides glyph variants. + * (May be zero.) */ + public: + DEFINE_SIZE_ARRAY (14, characters); +}; + +struct FeatureParams +{ + bool sanitize (hb_sanitize_context_t *c, hb_tag_t tag) const + { +#ifdef HB_NO_LAYOUT_FEATURE_PARAMS + return true; +#endif + TRACE_SANITIZE (this); + if (tag == HB_TAG ('s','i','z','e')) + return_trace (u.size.sanitize (c)); + if ((tag & 0xFFFF0000u) == HB_TAG ('s','s','\0','\0')) /* ssXX */ + return_trace (u.stylisticSet.sanitize (c)); + if ((tag & 0xFFFF0000u) == HB_TAG ('c','v','\0','\0')) /* cvXX */ + return_trace (u.characterVariants.sanitize (c)); + return_trace (true); + } + + bool subset (hb_subset_context_t *c, const Tag* tag) const + { + TRACE_SUBSET (this); + if (!tag) return_trace (false); + if (*tag == HB_TAG ('s','i','z','e')) + return_trace (u.size.subset (c)); + if ((*tag & 0xFFFF0000u) == HB_TAG ('s','s','\0','\0')) /* ssXX */ + return_trace (u.stylisticSet.subset (c)); + if ((*tag & 0xFFFF0000u) == HB_TAG ('c','v','\0','\0')) /* cvXX */ + return_trace (u.characterVariants.subset (c)); + return_trace (false); + } + +#ifndef HB_NO_LAYOUT_FEATURE_PARAMS + const FeatureParamsSize& get_size_params (hb_tag_t tag) const + { + if (tag == HB_TAG ('s','i','z','e')) + return u.size; + return Null (FeatureParamsSize); + } + const FeatureParamsStylisticSet& get_stylistic_set_params (hb_tag_t tag) const + { + if ((tag & 0xFFFF0000u) == HB_TAG ('s','s','\0','\0')) /* ssXX */ + return u.stylisticSet; + return Null (FeatureParamsStylisticSet); + } + const FeatureParamsCharacterVariants& get_character_variants_params (hb_tag_t tag) const + { + if ((tag & 0xFFFF0000u) == HB_TAG ('c','v','\0','\0')) /* cvXX */ + return u.characterVariants; + return Null (FeatureParamsCharacterVariants); + } +#endif + + private: + union { + FeatureParamsSize size; + FeatureParamsStylisticSet stylisticSet; + FeatureParamsCharacterVariants characterVariants; + } u; + public: + DEFINE_SIZE_MIN (0); +}; + +struct Feature +{ + unsigned int get_lookup_count () const + { return lookupIndex.len; } + hb_tag_t get_lookup_index (unsigned int i) const + { return lookupIndex[i]; } + unsigned int get_lookup_indexes (unsigned int start_index, + unsigned int *lookup_count /* IN/OUT */, + unsigned int *lookup_tags /* OUT */) const + { return lookupIndex.get_indexes (start_index, lookup_count, lookup_tags); } + void add_lookup_indexes_to (hb_set_t *lookup_indexes) const + { lookupIndex.add_indexes_to (lookup_indexes); } + + const FeatureParams &get_feature_params () const + { return this+featureParams; } + + bool intersects_lookup_indexes (const hb_map_t *lookup_indexes) const + { return lookupIndex.intersects (lookup_indexes); } + + bool subset (hb_subset_context_t *c, + hb_subset_layout_context_t *l, + const Tag *tag = nullptr) const + { + TRACE_SUBSET (this); + auto *out = c->serializer->start_embed (*this); + if (unlikely (!out || !c->serializer->extend_min (out))) return_trace (false); + + bool subset_featureParams = out->featureParams.serialize_subset (c, featureParams, this, tag); + + auto it = + + hb_iter (lookupIndex) + | hb_filter (l->lookup_index_map) + | hb_map (l->lookup_index_map) + ; + + out->lookupIndex.serialize (c->serializer, l, it); + return_trace (bool (it) || subset_featureParams + || (tag && *tag == HB_TAG ('p', 'r', 'e', 'f'))); + } + + bool sanitize (hb_sanitize_context_t *c, + const Record_sanitize_closure_t *closure = nullptr) const + { + TRACE_SANITIZE (this); + if (unlikely (!(c->check_struct (this) && lookupIndex.sanitize (c)))) + return_trace (false); + + /* Some earlier versions of Adobe tools calculated the offset of the + * FeatureParams subtable from the beginning of the FeatureList table! + * + * If sanitizing "failed" for the FeatureParams subtable, try it with the + * alternative location. We would know sanitize "failed" if old value + * of the offset was non-zero, but it's zeroed now. + * + * Only do this for the 'size' feature, since at the time of the faulty + * Adobe tools, only the 'size' feature had FeatureParams defined. + */ + + if (likely (featureParams.is_null ())) + return_trace (true); + + unsigned int orig_offset = featureParams; + if (unlikely (!featureParams.sanitize (c, this, closure ? closure->tag : HB_TAG_NONE))) + return_trace (false); + + if (featureParams == 0 && closure && + closure->tag == HB_TAG ('s','i','z','e') && + closure->list_base && closure->list_base < this) + { + unsigned int new_offset_int = orig_offset - + (((char *) this) - ((char *) closure->list_base)); + + OffsetTo<FeatureParams> new_offset; + /* Check that it would not overflow. */ + new_offset = new_offset_int; + if (new_offset == new_offset_int && + c->try_set (&featureParams, new_offset_int) && + !featureParams.sanitize (c, this, closure ? closure->tag : HB_TAG_NONE)) + return_trace (false); + } + + return_trace (true); + } + + OffsetTo<FeatureParams> + featureParams; /* Offset to Feature Parameters table (if one + * has been defined for the feature), relative + * to the beginning of the Feature Table; = Null + * if not required */ + IndexArray lookupIndex; /* Array of LookupList indices */ + public: + DEFINE_SIZE_ARRAY_SIZED (4, lookupIndex); +}; + +typedef RecordListOf<Feature> FeatureList; + + +struct LookupFlag : HBUINT16 +{ + enum Flags { + RightToLeft = 0x0001u, + IgnoreBaseGlyphs = 0x0002u, + IgnoreLigatures = 0x0004u, + IgnoreMarks = 0x0008u, + IgnoreFlags = 0x000Eu, + UseMarkFilteringSet = 0x0010u, + Reserved = 0x00E0u, + MarkAttachmentType = 0xFF00u + }; + public: + DEFINE_SIZE_STATIC (2); +}; + +} /* namespace OT */ +/* This has to be outside the namespace. */ +HB_MARK_AS_FLAG_T (OT::LookupFlag::Flags); +namespace OT { + +struct Lookup +{ + unsigned int get_subtable_count () const { return subTable.len; } + + template <typename TSubTable> + const OffsetArrayOf<TSubTable>& get_subtables () const + { return reinterpret_cast<const OffsetArrayOf<TSubTable> &> (subTable); } + template <typename TSubTable> + OffsetArrayOf<TSubTable>& get_subtables () + { return reinterpret_cast<OffsetArrayOf<TSubTable> &> (subTable); } + + template <typename TSubTable> + const TSubTable& get_subtable (unsigned int i) const + { return this+get_subtables<TSubTable> ()[i]; } + template <typename TSubTable> + TSubTable& get_subtable (unsigned int i) + { return this+get_subtables<TSubTable> ()[i]; } + + unsigned int get_size () const + { + const HBUINT16 &markFilteringSet = StructAfter<const HBUINT16> (subTable); + if (lookupFlag & LookupFlag::UseMarkFilteringSet) + return (const char *) &StructAfter<const char> (markFilteringSet) - (const char *) this; + return (const char *) &markFilteringSet - (const char *) this; + } + + unsigned int get_type () const { return lookupType; } + + /* lookup_props is a 32-bit integer where the lower 16-bit is LookupFlag and + * higher 16-bit is mark-filtering-set if the lookup uses one. + * Not to be confused with glyph_props which is very similar. */ + uint32_t get_props () const + { + unsigned int flag = lookupFlag; + if (unlikely (flag & LookupFlag::UseMarkFilteringSet)) + { + const HBUINT16 &markFilteringSet = StructAfter<HBUINT16> (subTable); + flag += (markFilteringSet << 16); + } + return flag; + } + + template <typename TSubTable, typename context_t, typename ...Ts> + typename context_t::return_t dispatch (context_t *c, Ts&&... ds) const + { + unsigned int lookup_type = get_type (); + TRACE_DISPATCH (this, lookup_type); + unsigned int count = get_subtable_count (); + for (unsigned int i = 0; i < count; i++) { + typename context_t::return_t r = get_subtable<TSubTable> (i).dispatch (c, lookup_type, hb_forward<Ts> (ds)...); + if (c->stop_sublookup_iteration (r)) + return_trace (r); + } + return_trace (c->default_return_value ()); + } + + bool serialize (hb_serialize_context_t *c, + unsigned int lookup_type, + uint32_t lookup_props, + unsigned int num_subtables) + { + TRACE_SERIALIZE (this); + if (unlikely (!c->extend_min (*this))) return_trace (false); + lookupType = lookup_type; + lookupFlag = lookup_props & 0xFFFFu; + if (unlikely (!subTable.serialize (c, num_subtables))) return_trace (false); + if (lookupFlag & LookupFlag::UseMarkFilteringSet) + { + if (unlikely (!c->extend (*this))) return_trace (false); + HBUINT16 &markFilteringSet = StructAfter<HBUINT16> (subTable); + markFilteringSet = lookup_props >> 16; + } + return_trace (true); + } + + template <typename TSubTable> + bool subset (hb_subset_context_t *c) const + { + TRACE_SUBSET (this); + auto *out = c->serializer->start_embed (*this); + if (unlikely (!out || !c->serializer->extend_min (out))) return_trace (false); + out->lookupType = lookupType; + out->lookupFlag = lookupFlag; + + const hb_set_t *glyphset = c->plan->glyphset (); + unsigned int lookup_type = get_type (); + + hb_iter (get_subtables <TSubTable> ()) + | hb_filter ([this, glyphset, lookup_type] (const OffsetTo<TSubTable> &_) { return (this+_).intersects (glyphset, lookup_type); }) + | hb_apply (subset_offset_array (c, out->get_subtables<TSubTable> (), this, lookup_type)) + ; + + return_trace (true); + } + + template <typename TSubTable> + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + if (!(c->check_struct (this) && subTable.sanitize (c))) return_trace (false); + + unsigned subtables = get_subtable_count (); + if (unlikely (!c->visit_subtables (subtables))) return_trace (false); + + if (lookupFlag & LookupFlag::UseMarkFilteringSet) + { + const HBUINT16 &markFilteringSet = StructAfter<HBUINT16> (subTable); + if (!markFilteringSet.sanitize (c)) return_trace (false); + } + + if (unlikely (!get_subtables<TSubTable> ().sanitize (c, this, get_type ()))) + return_trace (false); + + if (unlikely (get_type () == TSubTable::Extension && !c->get_edit_count ())) + { + /* The spec says all subtables of an Extension lookup should + * have the same type, which shall not be the Extension type + * itself (but we already checked for that). + * This is specially important if one has a reverse type! + * + * We only do this if sanitizer edit_count is zero. Otherwise, + * some of the subtables might have become insane after they + * were sanity-checked by the edits of subsequent subtables. + * https://bugs.chromium.org/p/chromium/issues/detail?id=960331 + */ + unsigned int type = get_subtable<TSubTable> (0).u.extension.get_type (); + for (unsigned int i = 1; i < subtables; i++) + if (get_subtable<TSubTable> (i).u.extension.get_type () != type) + return_trace (false); + } + return_trace (true); + } + + private: + HBUINT16 lookupType; /* Different enumerations for GSUB and GPOS */ + HBUINT16 lookupFlag; /* Lookup qualifiers */ + ArrayOf<Offset16> + subTable; /* Array of SubTables */ +/*HBUINT16 markFilteringSetX[HB_VAR_ARRAY];*//* Index (base 0) into GDEF mark glyph sets + * structure. This field is only present if bit + * UseMarkFilteringSet of lookup flags is set. */ + public: + DEFINE_SIZE_ARRAY (6, subTable); +}; + +typedef OffsetListOf<Lookup> LookupList; + +template <typename TLookup> +struct LookupOffsetList : OffsetListOf<TLookup> +{ + bool subset (hb_subset_context_t *c, + hb_subset_layout_context_t *l) const + { + TRACE_SUBSET (this); + auto *out = c->serializer->start_embed (this); + if (unlikely (!out || !c->serializer->extend_min (out))) return_trace (false); + + unsigned count = this->len; + + hb_zip (*this, hb_range (count)) + | hb_filter (l->lookup_index_map, hb_second) + | hb_map (hb_first) + | hb_apply (subset_offset_array (c, *out, this)) + ; + return_trace (true); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (OffsetListOf<TLookup>::sanitize (c, this)); + } +}; + + +/* + * Coverage Table + */ + +struct CoverageFormat1 +{ + friend struct Coverage; + + private: + unsigned int get_coverage (hb_codepoint_t glyph_id) const + { + unsigned int i; + glyphArray.bfind (glyph_id, &i, HB_BFIND_NOT_FOUND_STORE, NOT_COVERED); + return i; + } + + template <typename Iterator, + hb_requires (hb_is_sorted_source_of (Iterator, hb_codepoint_t))> + bool serialize (hb_serialize_context_t *c, Iterator glyphs) + { + TRACE_SERIALIZE (this); + return_trace (glyphArray.serialize (c, glyphs)); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (glyphArray.sanitize (c)); + } + + bool intersects (const hb_set_t *glyphs) const + { + /* TODO Speed up, using hb_set_next() and bsearch()? */ + unsigned int count = glyphArray.len; + for (unsigned int i = 0; i < count; i++) + if (glyphs->has (glyphArray[i])) + return true; + return false; + } + bool intersects_coverage (const hb_set_t *glyphs, unsigned int index) const + { return glyphs->has (glyphArray[index]); } + + template <typename set_t> + bool collect_coverage (set_t *glyphs) const + { return glyphs->add_sorted_array (glyphArray.arrayZ, glyphArray.len); } + + public: + /* Older compilers need this to be public. */ + struct iter_t + { + void init (const struct CoverageFormat1 &c_) { c = &c_; i = 0; } + void fini () {} + bool more () const { return i < c->glyphArray.len; } + void next () { i++; } + hb_codepoint_t get_glyph () const { return c->glyphArray[i]; } + bool operator != (const iter_t& o) const + { return i != o.i || c != o.c; } + + private: + const struct CoverageFormat1 *c; + unsigned int i; + }; + private: + + protected: + HBUINT16 coverageFormat; /* Format identifier--format = 1 */ + SortedArrayOf<HBGlyphID> + glyphArray; /* Array of GlyphIDs--in numerical order */ + public: + DEFINE_SIZE_ARRAY (4, glyphArray); +}; + +struct CoverageFormat2 +{ + friend struct Coverage; + + private: + unsigned int get_coverage (hb_codepoint_t glyph_id) const + { + const RangeRecord &range = rangeRecord.bsearch (glyph_id); + return likely (range.first <= range.last) + ? (unsigned int) range.value + (glyph_id - range.first) + : NOT_COVERED; + } + + template <typename Iterator, + hb_requires (hb_is_sorted_source_of (Iterator, hb_codepoint_t))> + bool serialize (hb_serialize_context_t *c, Iterator glyphs) + { + TRACE_SERIALIZE (this); + if (unlikely (!c->extend_min (*this))) return_trace (false); + + if (unlikely (!glyphs)) + { + rangeRecord.len = 0; + return_trace (true); + } + + /* TODO(iter) Write more efficiently? */ + + unsigned num_ranges = 0; + hb_codepoint_t last = (hb_codepoint_t) -2; + for (auto g: glyphs) + { + if (last + 1 != g) + num_ranges++; + last = g; + } + + if (unlikely (!rangeRecord.serialize (c, num_ranges))) return_trace (false); + + unsigned count = 0; + unsigned range = (unsigned) -1; + last = (hb_codepoint_t) -2; + for (auto g: glyphs) + { + if (last + 1 != g) + { + range++; + rangeRecord[range].first = g; + rangeRecord[range].value = count; + } + rangeRecord[range].last = g; + last = g; + count++; + } + + return_trace (true); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (rangeRecord.sanitize (c)); + } + + bool intersects (const hb_set_t *glyphs) const + { + /* TODO Speed up, using hb_set_next() and bsearch()? */ + unsigned int count = rangeRecord.len; + for (unsigned int i = 0; i < count; i++) + if (rangeRecord[i].intersects (glyphs)) + return true; + return false; + } + bool intersects_coverage (const hb_set_t *glyphs, unsigned int index) const + { + unsigned int i; + unsigned int count = rangeRecord.len; + for (i = 0; i < count; i++) { + const RangeRecord &range = rangeRecord[i]; + if (range.value <= index && + index < (unsigned int) range.value + (range.last - range.first) && + range.intersects (glyphs)) + return true; + else if (index < range.value) + return false; + } + return false; + } + + template <typename set_t> + bool collect_coverage (set_t *glyphs) const + { + unsigned int count = rangeRecord.len; + for (unsigned int i = 0; i < count; i++) + if (unlikely (!rangeRecord[i].collect_coverage (glyphs))) + return false; + return true; + } + + public: + /* Older compilers need this to be public. */ + struct iter_t + { + void init (const CoverageFormat2 &c_) + { + c = &c_; + coverage = 0; + i = 0; + j = c->rangeRecord.len ? c->rangeRecord[0].first : 0; + if (unlikely (c->rangeRecord[0].first > c->rangeRecord[0].last)) + { + /* Broken table. Skip. */ + i = c->rangeRecord.len; + } + } + void fini () {} + bool more () const { return i < c->rangeRecord.len; } + void next () + { + if (j >= c->rangeRecord[i].last) + { + i++; + if (more ()) + { + unsigned int old = coverage; + j = c->rangeRecord[i].first; + coverage = c->rangeRecord[i].value; + if (unlikely (coverage != old + 1)) + { + /* Broken table. Skip. Important to avoid DoS. + * Also, our callers depend on coverage being + * consecutive and monotonically increasing, + * ie. iota(). */ + i = c->rangeRecord.len; + return; + } + } + return; + } + coverage++; + j++; + } + hb_codepoint_t get_glyph () const { return j; } + bool operator != (const iter_t& o) const + { return i != o.i || j != o.j || c != o.c; } + + private: + const struct CoverageFormat2 *c; + unsigned int i, coverage; + hb_codepoint_t j; + }; + private: + + protected: + HBUINT16 coverageFormat; /* Format identifier--format = 2 */ + SortedArrayOf<RangeRecord> + rangeRecord; /* Array of glyph ranges--ordered by + * Start GlyphID. rangeCount entries + * long */ + public: + DEFINE_SIZE_ARRAY (4, rangeRecord); +}; + +struct Coverage +{ + /* Has interface. */ + static constexpr unsigned SENTINEL = NOT_COVERED; + typedef unsigned int value_t; + value_t operator [] (hb_codepoint_t k) const { return get (k); } + bool has (hb_codepoint_t k) const { return (*this)[k] != SENTINEL; } + /* Predicate. */ + bool operator () (hb_codepoint_t k) const { return has (k); } + + unsigned int get (hb_codepoint_t k) const { return get_coverage (k); } + unsigned int get_coverage (hb_codepoint_t glyph_id) const + { + switch (u.format) { + case 1: return u.format1.get_coverage (glyph_id); + case 2: return u.format2.get_coverage (glyph_id); + default:return NOT_COVERED; + } + } + + template <typename Iterator, + hb_requires (hb_is_sorted_source_of (Iterator, hb_codepoint_t))> + bool serialize (hb_serialize_context_t *c, Iterator glyphs) + { + TRACE_SERIALIZE (this); + if (unlikely (!c->extend_min (*this))) return_trace (false); + + unsigned count = 0; + unsigned num_ranges = 0; + hb_codepoint_t last = (hb_codepoint_t) -2; + for (auto g: glyphs) + { + if (last + 1 != g) + num_ranges++; + last = g; + count++; + } + u.format = count <= num_ranges * 3 ? 1 : 2; + + switch (u.format) + { + case 1: return_trace (u.format1.serialize (c, glyphs)); + case 2: return_trace (u.format2.serialize (c, glyphs)); + default:return_trace (false); + } + } + + bool subset (hb_subset_context_t *c) const + { + TRACE_SUBSET (this); + const hb_set_t &glyphset = *c->plan->glyphset (); + const hb_map_t &glyph_map = *c->plan->glyph_map; + + auto it = + + iter () + | hb_filter (glyphset) + | hb_map_retains_sorting (glyph_map) + ; + + bool ret = bool (it); + Coverage_serialize (c->serializer, it); + return_trace (ret); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + if (!u.format.sanitize (c)) return_trace (false); + switch (u.format) + { + case 1: return_trace (u.format1.sanitize (c)); + case 2: return_trace (u.format2.sanitize (c)); + default:return_trace (true); + } + } + + bool intersects (const hb_set_t *glyphs) const + { + switch (u.format) + { + case 1: return u.format1.intersects (glyphs); + case 2: return u.format2.intersects (glyphs); + default:return false; + } + } + bool intersects_coverage (const hb_set_t *glyphs, unsigned int index) const + { + switch (u.format) + { + case 1: return u.format1.intersects_coverage (glyphs, index); + case 2: return u.format2.intersects_coverage (glyphs, index); + default:return false; + } + } + + /* Might return false if array looks unsorted. + * Used for faster rejection of corrupt data. */ + template <typename set_t> + bool collect_coverage (set_t *glyphs) const + { + switch (u.format) + { + case 1: return u.format1.collect_coverage (glyphs); + case 2: return u.format2.collect_coverage (glyphs); + default:return false; + } + } + + struct iter_t : hb_iter_with_fallback_t<iter_t, hb_codepoint_t> + { + static constexpr bool is_sorted_iterator = true; + iter_t (const Coverage &c_ = Null (Coverage)) + { + memset (this, 0, sizeof (*this)); + format = c_.u.format; + switch (format) + { + case 1: u.format1.init (c_.u.format1); return; + case 2: u.format2.init (c_.u.format2); return; + default: return; + } + } + bool __more__ () const + { + switch (format) + { + case 1: return u.format1.more (); + case 2: return u.format2.more (); + default:return false; + } + } + void __next__ () + { + switch (format) + { + case 1: u.format1.next (); break; + case 2: u.format2.next (); break; + default: break; + } + } + typedef hb_codepoint_t __item_t__; + __item_t__ __item__ () const { return get_glyph (); } + + hb_codepoint_t get_glyph () const + { + switch (format) + { + case 1: return u.format1.get_glyph (); + case 2: return u.format2.get_glyph (); + default:return 0; + } + } + bool operator != (const iter_t& o) const + { + if (format != o.format) return true; + switch (format) + { + case 1: return u.format1 != o.u.format1; + case 2: return u.format2 != o.u.format2; + default:return false; + } + } + + private: + unsigned int format; + union { + CoverageFormat2::iter_t format2; /* Put this one first since it's larger; helps shut up compiler. */ + CoverageFormat1::iter_t format1; + } u; + }; + iter_t iter () const { return iter_t (*this); } + + protected: + union { + HBUINT16 format; /* Format identifier */ + CoverageFormat1 format1; + CoverageFormat2 format2; + } u; + public: + DEFINE_SIZE_UNION (2, format); +}; + +template<typename Iterator> +static inline void +Coverage_serialize (hb_serialize_context_t *c, + Iterator it) +{ c->start_embed<Coverage> ()->serialize (c, it); } + +static void ClassDef_remap_and_serialize (hb_serialize_context_t *c, + const hb_set_t &glyphset, + const hb_map_t &gid_klass_map, + hb_sorted_vector_t<HBGlyphID> &glyphs, + const hb_set_t &klasses, + hb_map_t *klass_map /*INOUT*/) +{ + if (!klass_map) + { + ClassDef_serialize (c, hb_zip (glyphs.iter (), + glyphs.iter () + | hb_map (gid_klass_map))); + return; + } + + /* any glyph not assigned a class value falls into Class zero (0), + * if any glyph assigned to class 0, remapping must start with 0->0*/ + if (glyphset.get_population () > gid_klass_map.get_population ()) + klass_map->set (0, 0); + + unsigned idx = klass_map->has (0) ? 1 : 0; + for (const unsigned k: klasses.iter ()) + { + if (klass_map->has (k)) continue; + klass_map->set (k, idx); + idx++; + } + + auto it = + + glyphs.iter () + | hb_map_retains_sorting ([&] (const HBGlyphID& gid) -> hb_pair_t<hb_codepoint_t, unsigned> + { + unsigned new_klass = klass_map->get (gid_klass_map[gid]); + return hb_pair ((hb_codepoint_t)gid, new_klass); + }) + ; + + c->propagate_error (glyphs, klasses); + ClassDef_serialize (c, it); +} + +/* + * Class Definition Table + */ + +struct ClassDefFormat1 +{ + friend struct ClassDef; + + private: + unsigned int get_class (hb_codepoint_t glyph_id) const + { + return classValue[(unsigned int) (glyph_id - startGlyph)]; + } + + template<typename Iterator, + hb_requires (hb_is_iterator (Iterator))> + bool serialize (hb_serialize_context_t *c, + Iterator it) + { + TRACE_SERIALIZE (this); + if (unlikely (!c->extend_min (*this))) return_trace (false); + + if (unlikely (!it)) + { + startGlyph = 0; + classValue.len = 0; + return_trace (true); + } + + hb_codepoint_t glyph_min = (*it).first; + hb_codepoint_t glyph_max = + it + | hb_map (hb_first) + | hb_reduce (hb_max, 0u); + unsigned glyph_count = glyph_max - glyph_min + 1; + + startGlyph = glyph_min; + if (unlikely (!classValue.serialize (c, glyph_count))) return_trace (false); + for (const hb_pair_t<hb_codepoint_t, unsigned> gid_klass_pair : + it) + { + unsigned idx = gid_klass_pair.first - glyph_min; + classValue[idx] = gid_klass_pair.second; + } + return_trace (true); + } + + bool subset (hb_subset_context_t *c, + hb_map_t *klass_map = nullptr /*OUT*/) const + { + TRACE_SUBSET (this); + const hb_set_t &glyphset = *c->plan->_glyphset_gsub; + const hb_map_t &glyph_map = *c->plan->glyph_map; + + hb_sorted_vector_t<HBGlyphID> glyphs; + hb_set_t orig_klasses; + hb_map_t gid_org_klass_map; + + hb_codepoint_t start = startGlyph; + hb_codepoint_t end = start + classValue.len; + for (const hb_codepoint_t gid : + hb_range (start, end) + | hb_filter (glyphset)) + { + unsigned klass = classValue[gid - start]; + if (!klass) continue; + + glyphs.push (glyph_map[gid]); + gid_org_klass_map.set (glyph_map[gid], klass); + orig_klasses.add (klass); + } + + ClassDef_remap_and_serialize (c->serializer, glyphset, gid_org_klass_map, + glyphs, orig_klasses, klass_map); + return_trace ((bool) glyphs); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && classValue.sanitize (c)); + } + + template <typename set_t> + bool collect_coverage (set_t *glyphs) const + { + unsigned int start = 0; + unsigned int count = classValue.len; + for (unsigned int i = 0; i < count; i++) + { + if (classValue[i]) + continue; + + if (start != i) + if (unlikely (!glyphs->add_range (startGlyph + start, startGlyph + i))) + return false; + + start = i + 1; + } + if (start != count) + if (unlikely (!glyphs->add_range (startGlyph + start, startGlyph + count))) + return false; + + return true; + } + + template <typename set_t> + bool collect_class (set_t *glyphs, unsigned int klass) const + { + unsigned int count = classValue.len; + for (unsigned int i = 0; i < count; i++) + if (classValue[i] == klass) glyphs->add (startGlyph + i); + return true; + } + + bool intersects (const hb_set_t *glyphs) const + { + /* TODO Speed up, using hb_set_next()? */ + hb_codepoint_t start = startGlyph; + hb_codepoint_t end = startGlyph + classValue.len; + for (hb_codepoint_t iter = startGlyph - 1; + hb_set_next (glyphs, &iter) && iter < end;) + if (classValue[iter - start]) return true; + return false; + } + bool intersects_class (const hb_set_t *glyphs, unsigned int klass) const + { + unsigned int count = classValue.len; + if (klass == 0) + { + /* Match if there's any glyph that is not listed! */ + hb_codepoint_t g = HB_SET_VALUE_INVALID; + if (!hb_set_next (glyphs, &g)) return false; + if (g < startGlyph) return true; + g = startGlyph + count - 1; + if (hb_set_next (glyphs, &g)) return true; + /* Fall through. */ + } + for (unsigned int i = 0; i < count; i++) + if (classValue[i] == klass && glyphs->has (startGlyph + i)) + return true; + return false; + } + + protected: + HBUINT16 classFormat; /* Format identifier--format = 1 */ + HBGlyphID startGlyph; /* First GlyphID of the classValueArray */ + ArrayOf<HBUINT16> + classValue; /* Array of Class Values--one per GlyphID */ + public: + DEFINE_SIZE_ARRAY (6, classValue); +}; + +struct ClassDefFormat2 +{ + friend struct ClassDef; + + private: + unsigned int get_class (hb_codepoint_t glyph_id) const + { + return rangeRecord.bsearch (glyph_id).value; + } + + template<typename Iterator, + hb_requires (hb_is_iterator (Iterator))> + bool serialize (hb_serialize_context_t *c, + Iterator it) + { + TRACE_SERIALIZE (this); + if (unlikely (!c->extend_min (*this))) return_trace (false); + + if (unlikely (!it)) + { + rangeRecord.len = 0; + return_trace (true); + } + + unsigned num_ranges = 1; + hb_codepoint_t prev_gid = (*it).first; + unsigned prev_klass = (*it).second; + + RangeRecord range_rec; + range_rec.first = prev_gid; + range_rec.last = prev_gid; + range_rec.value = prev_klass; + + RangeRecord *record = c->copy (range_rec); + if (unlikely (!record)) return_trace (false); + + for (const auto gid_klass_pair : + (++it)) + { + hb_codepoint_t cur_gid = gid_klass_pair.first; + unsigned cur_klass = gid_klass_pair.second; + + if (cur_gid != prev_gid + 1 || + cur_klass != prev_klass) + { + if (unlikely (!record)) break; + record->last = prev_gid; + num_ranges++; + + range_rec.first = cur_gid; + range_rec.last = cur_gid; + range_rec.value = cur_klass; + + record = c->copy (range_rec); + } + + prev_klass = cur_klass; + prev_gid = cur_gid; + } + + if (likely (record)) record->last = prev_gid; + rangeRecord.len = num_ranges; + return_trace (true); + } + + bool subset (hb_subset_context_t *c, + hb_map_t *klass_map = nullptr /*OUT*/) const + { + TRACE_SUBSET (this); + const hb_set_t &glyphset = *c->plan->_glyphset_gsub; + const hb_map_t &glyph_map = *c->plan->glyph_map; + + hb_sorted_vector_t<HBGlyphID> glyphs; + hb_set_t orig_klasses; + hb_map_t gid_org_klass_map; + + unsigned count = rangeRecord.len; + for (unsigned i = 0; i < count; i++) + { + unsigned klass = rangeRecord[i].value; + if (!klass) continue; + hb_codepoint_t start = rangeRecord[i].first; + hb_codepoint_t end = rangeRecord[i].last + 1; + for (hb_codepoint_t g = start; g < end; g++) + { + if (!glyphset.has (g)) continue; + glyphs.push (glyph_map[g]); + gid_org_klass_map.set (glyph_map[g], klass); + orig_klasses.add (klass); + } + } + + ClassDef_remap_and_serialize (c->serializer, glyphset, gid_org_klass_map, + glyphs, orig_klasses, klass_map); + return_trace ((bool) glyphs); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (rangeRecord.sanitize (c)); + } + + template <typename set_t> + bool collect_coverage (set_t *glyphs) const + { + unsigned int count = rangeRecord.len; + for (unsigned int i = 0; i < count; i++) + if (rangeRecord[i].value) + if (unlikely (!rangeRecord[i].collect_coverage (glyphs))) + return false; + return true; + } + + template <typename set_t> + bool collect_class (set_t *glyphs, unsigned int klass) const + { + unsigned int count = rangeRecord.len; + for (unsigned int i = 0; i < count; i++) + { + if (rangeRecord[i].value == klass) + if (unlikely (!rangeRecord[i].collect_coverage (glyphs))) + return false; + } + return true; + } + + bool intersects (const hb_set_t *glyphs) const + { + /* TODO Speed up, using hb_set_next() and bsearch()? */ + unsigned int count = rangeRecord.len; + for (unsigned int i = 0; i < count; i++) + if (rangeRecord[i].intersects (glyphs)) + return true; + return false; + } + bool intersects_class (const hb_set_t *glyphs, unsigned int klass) const + { + unsigned int count = rangeRecord.len; + if (klass == 0) + { + /* Match if there's any glyph that is not listed! */ + hb_codepoint_t g = HB_SET_VALUE_INVALID; + for (unsigned int i = 0; i < count; i++) + { + if (!hb_set_next (glyphs, &g)) + break; + if (g < rangeRecord[i].first) + return true; + g = rangeRecord[i].last; + } + if (g != HB_SET_VALUE_INVALID && hb_set_next (glyphs, &g)) + return true; + /* Fall through. */ + } + for (unsigned int i = 0; i < count; i++) + if (rangeRecord[i].value == klass && rangeRecord[i].intersects (glyphs)) + return true; + return false; + } + + protected: + HBUINT16 classFormat; /* Format identifier--format = 2 */ + SortedArrayOf<RangeRecord> + rangeRecord; /* Array of glyph ranges--ordered by + * Start GlyphID */ + public: + DEFINE_SIZE_ARRAY (4, rangeRecord); +}; + +struct ClassDef +{ + /* Has interface. */ + static constexpr unsigned SENTINEL = 0; + typedef unsigned int value_t; + value_t operator [] (hb_codepoint_t k) const { return get (k); } + bool has (hb_codepoint_t k) const { return (*this)[k] != SENTINEL; } + /* Projection. */ + hb_codepoint_t operator () (hb_codepoint_t k) const { return get (k); } + + unsigned int get (hb_codepoint_t k) const { return get_class (k); } + unsigned int get_class (hb_codepoint_t glyph_id) const + { + switch (u.format) { + case 1: return u.format1.get_class (glyph_id); + case 2: return u.format2.get_class (glyph_id); + default:return 0; + } + } + + template<typename Iterator, + hb_requires (hb_is_iterator (Iterator))> + bool serialize (hb_serialize_context_t *c, Iterator it) + { + TRACE_SERIALIZE (this); + if (unlikely (!c->extend_min (*this))) return_trace (false); + + unsigned format = 2; + if (likely (it)) + { + hb_codepoint_t glyph_min = (*it).first; + hb_codepoint_t glyph_max = + it + | hb_map (hb_first) + | hb_reduce (hb_max, 0u); + + unsigned num_ranges = 1; + hb_codepoint_t prev_gid = glyph_min; + unsigned prev_klass = (*it).second; + + for (const auto gid_klass_pair : it) + { + hb_codepoint_t cur_gid = gid_klass_pair.first; + unsigned cur_klass = gid_klass_pair.second; + if (cur_gid == glyph_min || !cur_klass) continue; + if (cur_gid != prev_gid + 1 || + cur_klass != prev_klass) + num_ranges++; + + prev_gid = cur_gid; + prev_klass = cur_klass; + } + + if (1 + (glyph_max - glyph_min + 1) <= num_ranges * 3) + format = 1; + } + u.format = format; + + switch (u.format) + { + case 1: return_trace (u.format1.serialize (c, it)); + case 2: return_trace (u.format2.serialize (c, it)); + default:return_trace (false); + } + } + + bool subset (hb_subset_context_t *c, + hb_map_t *klass_map = nullptr /*OUT*/) const + { + TRACE_SUBSET (this); + switch (u.format) { + case 1: return_trace (u.format1.subset (c, klass_map)); + case 2: return_trace (u.format2.subset (c, klass_map)); + default:return_trace (false); + } + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + if (!u.format.sanitize (c)) return_trace (false); + switch (u.format) { + case 1: return_trace (u.format1.sanitize (c)); + case 2: return_trace (u.format2.sanitize (c)); + default:return_trace (true); + } + } + + /* Might return false if array looks unsorted. + * Used for faster rejection of corrupt data. */ + template <typename set_t> + bool collect_coverage (set_t *glyphs) const + { + switch (u.format) { + case 1: return u.format1.collect_coverage (glyphs); + case 2: return u.format2.collect_coverage (glyphs); + default:return false; + } + } + + /* Might return false if array looks unsorted. + * Used for faster rejection of corrupt data. */ + template <typename set_t> + bool collect_class (set_t *glyphs, unsigned int klass) const + { + switch (u.format) { + case 1: return u.format1.collect_class (glyphs, klass); + case 2: return u.format2.collect_class (glyphs, klass); + default:return false; + } + } + + bool intersects (const hb_set_t *glyphs) const + { + switch (u.format) { + case 1: return u.format1.intersects (glyphs); + case 2: return u.format2.intersects (glyphs); + default:return false; + } + } + bool intersects_class (const hb_set_t *glyphs, unsigned int klass) const + { + switch (u.format) { + case 1: return u.format1.intersects_class (glyphs, klass); + case 2: return u.format2.intersects_class (glyphs, klass); + default:return false; + } + } + + protected: + union { + HBUINT16 format; /* Format identifier */ + ClassDefFormat1 format1; + ClassDefFormat2 format2; + } u; + public: + DEFINE_SIZE_UNION (2, format); +}; + +template<typename Iterator> +static inline void ClassDef_serialize (hb_serialize_context_t *c, + Iterator it) +{ c->start_embed<ClassDef> ()->serialize (c, it); } + + +/* + * Item Variation Store + */ + +struct VarRegionAxis +{ + float evaluate (int coord) const + { + int start = startCoord, peak = peakCoord, end = endCoord; + + /* TODO Move these to sanitize(). */ + if (unlikely (start > peak || peak > end)) + return 1.; + if (unlikely (start < 0 && end > 0 && peak != 0)) + return 1.; + + if (peak == 0 || coord == peak) + return 1.; + + if (coord <= start || end <= coord) + return 0.; + + /* Interpolate */ + if (coord < peak) + return float (coord - start) / (peak - start); + else + return float (end - coord) / (end - peak); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this)); + /* TODO Handle invalid start/peak/end configs, so we don't + * have to do that at runtime. */ + } + + public: + F2DOT14 startCoord; + F2DOT14 peakCoord; + F2DOT14 endCoord; + public: + DEFINE_SIZE_STATIC (6); +}; + +struct VarRegionList +{ + float evaluate (unsigned int region_index, + const int *coords, unsigned int coord_len) const + { + if (unlikely (region_index >= regionCount)) + return 0.; + + const VarRegionAxis *axes = axesZ.arrayZ + (region_index * axisCount); + + float v = 1.; + unsigned int count = axisCount; + for (unsigned int i = 0; i < count; i++) + { + int coord = i < coord_len ? coords[i] : 0; + float factor = axes[i].evaluate (coord); + if (factor == 0.f) + return 0.; + v *= factor; + } + return v; + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && + axesZ.sanitize (c, (unsigned int) axisCount * (unsigned int) regionCount)); + } + + bool serialize (hb_serialize_context_t *c, const VarRegionList *src, const hb_bimap_t ®ion_map) + { + TRACE_SERIALIZE (this); + VarRegionList *out = c->allocate_min<VarRegionList> (); + if (unlikely (!out)) return_trace (false); + axisCount = src->axisCount; + regionCount = region_map.get_population (); + if (unlikely (!c->allocate_size<VarRegionList> (get_size () - min_size))) return_trace (false); + unsigned int region_count = src->get_region_count (); + for (unsigned int r = 0; r < regionCount; r++) + { + unsigned int backward = region_map.backward (r); + if (backward >= region_count) return_trace (false); + memcpy (&axesZ[axisCount * r], &src->axesZ[axisCount * backward], VarRegionAxis::static_size * axisCount); + } + + return_trace (true); + } + + unsigned int get_size () const { return min_size + VarRegionAxis::static_size * axisCount * regionCount; } + unsigned int get_region_count () const { return regionCount; } + + protected: + HBUINT16 axisCount; + HBUINT16 regionCount; + UnsizedArrayOf<VarRegionAxis> + axesZ; + public: + DEFINE_SIZE_ARRAY (4, axesZ); +}; + +struct VarData +{ + unsigned int get_region_index_count () const + { return regionIndices.len; } + + unsigned int get_row_size () const + { return shortCount + regionIndices.len; } + + unsigned int get_size () const + { return itemCount * get_row_size (); } + + float get_delta (unsigned int inner, + const int *coords, unsigned int coord_count, + const VarRegionList ®ions) const + { + if (unlikely (inner >= itemCount)) + return 0.; + + unsigned int count = regionIndices.len; + unsigned int scount = shortCount; + + const HBUINT8 *bytes = get_delta_bytes (); + const HBUINT8 *row = bytes + inner * (scount + count); + + float delta = 0.; + unsigned int i = 0; + + const HBINT16 *scursor = reinterpret_cast<const HBINT16 *> (row); + for (; i < scount; i++) + { + float scalar = regions.evaluate (regionIndices.arrayZ[i], coords, coord_count); + delta += scalar * *scursor++; + } + const HBINT8 *bcursor = reinterpret_cast<const HBINT8 *> (scursor); + for (; i < count; i++) + { + float scalar = regions.evaluate (regionIndices.arrayZ[i], coords, coord_count); + delta += scalar * *bcursor++; + } + + return delta; + } + + void get_scalars (const int *coords, unsigned int coord_count, + const VarRegionList ®ions, + float *scalars /*OUT */, + unsigned int num_scalars) const + { + unsigned count = hb_min (num_scalars, regionIndices.len); + for (unsigned int i = 0; i < count; i++) + scalars[i] = regions.evaluate (regionIndices.arrayZ[i], coords, coord_count); + for (unsigned int i = count; i < num_scalars; i++) + scalars[i] = 0.f; + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && + regionIndices.sanitize (c) && + shortCount <= regionIndices.len && + c->check_range (get_delta_bytes (), + itemCount, + get_row_size ())); + } + + bool serialize (hb_serialize_context_t *c, + const VarData *src, + const hb_inc_bimap_t &inner_map, + const hb_bimap_t ®ion_map) + { + TRACE_SERIALIZE (this); + if (unlikely (!c->extend_min (*this))) return_trace (false); + itemCount = inner_map.get_next_value (); + + /* Optimize short count */ + unsigned short ri_count = src->regionIndices.len; + enum delta_size_t { kZero=0, kByte, kShort }; + hb_vector_t<delta_size_t> delta_sz; + hb_vector_t<unsigned int> ri_map; /* maps old index to new index */ + delta_sz.resize (ri_count); + ri_map.resize (ri_count); + unsigned int new_short_count = 0; + unsigned int r; + for (r = 0; r < ri_count; r++) + { + delta_sz[r] = kZero; + for (unsigned int i = 0; i < inner_map.get_next_value (); i++) + { + unsigned int old = inner_map.backward (i); + int16_t delta = src->get_item_delta (old, r); + if (delta < -128 || 127 < delta) + { + delta_sz[r] = kShort; + new_short_count++; + break; + } + else if (delta != 0) + delta_sz[r] = kByte; + } + } + unsigned int short_index = 0; + unsigned int byte_index = new_short_count; + unsigned int new_ri_count = 0; + for (r = 0; r < ri_count; r++) + if (delta_sz[r]) + { + ri_map[r] = (delta_sz[r] == kShort)? short_index++ : byte_index++; + new_ri_count++; + } + + shortCount = new_short_count; + regionIndices.len = new_ri_count; + + unsigned int size = regionIndices.get_size () - HBUINT16::static_size/*regionIndices.len*/ + (get_row_size () * itemCount); + if (unlikely (!c->allocate_size<HBUINT8> (size))) + return_trace (false); + + for (r = 0; r < ri_count; r++) + if (delta_sz[r]) regionIndices[ri_map[r]] = region_map[src->regionIndices[r]]; + + for (unsigned int i = 0; i < itemCount; i++) + { + unsigned int old = inner_map.backward (i); + for (unsigned int r = 0; r < ri_count; r++) + if (delta_sz[r]) set_item_delta (i, ri_map[r], src->get_item_delta (old, r)); + } + + return_trace (true); + } + + void collect_region_refs (hb_inc_bimap_t ®ion_map, const hb_inc_bimap_t &inner_map) const + { + for (unsigned int r = 0; r < regionIndices.len; r++) + { + unsigned int region = regionIndices[r]; + if (region_map.has (region)) continue; + for (unsigned int i = 0; i < inner_map.get_next_value (); i++) + if (get_item_delta (inner_map.backward (i), r) != 0) + { + region_map.add (region); + break; + } + } + } + + protected: + const HBUINT8 *get_delta_bytes () const + { return &StructAfter<HBUINT8> (regionIndices); } + + HBUINT8 *get_delta_bytes () + { return &StructAfter<HBUINT8> (regionIndices); } + + int16_t get_item_delta (unsigned int item, unsigned int region) const + { + if ( item >= itemCount || unlikely (region >= regionIndices.len)) return 0; + const HBINT8 *p = (const HBINT8 *)get_delta_bytes () + item * get_row_size (); + if (region < shortCount) + return ((const HBINT16 *)p)[region]; + else + return (p + HBINT16::static_size * shortCount)[region - shortCount]; + } + + void set_item_delta (unsigned int item, unsigned int region, int16_t delta) + { + HBINT8 *p = (HBINT8 *)get_delta_bytes () + item * get_row_size (); + if (region < shortCount) + ((HBINT16 *)p)[region] = delta; + else + (p + HBINT16::static_size * shortCount)[region - shortCount] = delta; + } + + protected: + HBUINT16 itemCount; + HBUINT16 shortCount; + ArrayOf<HBUINT16> regionIndices; +/*UnsizedArrayOf<HBUINT8>bytesX;*/ + public: + DEFINE_SIZE_ARRAY (6, regionIndices); +}; + +struct VariationStore +{ + float get_delta (unsigned int outer, unsigned int inner, + const int *coords, unsigned int coord_count) const + { +#ifdef HB_NO_VAR + return 0.f; +#endif + + if (unlikely (outer >= dataSets.len)) + return 0.f; + + return (this+dataSets[outer]).get_delta (inner, + coords, coord_count, + this+regions); + } + + float get_delta (unsigned int index, + const int *coords, unsigned int coord_count) const + { + unsigned int outer = index >> 16; + unsigned int inner = index & 0xFFFF; + return get_delta (outer, inner, coords, coord_count); + } + + bool sanitize (hb_sanitize_context_t *c) const + { +#ifdef HB_NO_VAR + return true; +#endif + + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && + format == 1 && + regions.sanitize (c, this) && + dataSets.sanitize (c, this)); + } + + bool serialize (hb_serialize_context_t *c, + const VariationStore *src, + const hb_array_t <hb_inc_bimap_t> &inner_maps) + { + TRACE_SERIALIZE (this); + unsigned int set_count = 0; + for (unsigned int i = 0; i < inner_maps.length; i++) + if (inner_maps[i].get_population () > 0) set_count++; + + unsigned int size = min_size + HBUINT32::static_size * set_count; + if (unlikely (!c->allocate_size<HBUINT32> (size))) return_trace (false); + format = 1; + + hb_inc_bimap_t region_map; + for (unsigned int i = 0; i < inner_maps.length; i++) + (src+src->dataSets[i]).collect_region_refs (region_map, inner_maps[i]); + region_map.sort (); + + if (unlikely (!regions.serialize (c, this) + .serialize (c, &(src+src->regions), region_map))) return_trace (false); + + /* TODO: The following code could be simplified when + * OffsetListOf::subset () can take a custom param to be passed to VarData::serialize () + */ + dataSets.len = set_count; + unsigned int set_index = 0; + for (unsigned int i = 0; i < inner_maps.length; i++) + { + if (inner_maps[i].get_population () == 0) continue; + if (unlikely (!dataSets[set_index++].serialize (c, this) + .serialize (c, &(src+src->dataSets[i]), inner_maps[i], region_map))) + return_trace (false); + } + + return_trace (true); + } + + bool subset (hb_subset_context_t *c) const + { + TRACE_SUBSET (this); + + VariationStore *varstore_prime = c->serializer->start_embed<VariationStore> (); + if (unlikely (!varstore_prime)) return_trace (false); + + const hb_set_t *variation_indices = c->plan->layout_variation_indices; + if (variation_indices->is_empty ()) return_trace (false); + + hb_vector_t<hb_inc_bimap_t> inner_maps; + inner_maps.resize ((unsigned) dataSets.len); + for (unsigned i = 0; i < inner_maps.length; i++) + inner_maps[i].init (); + + for (unsigned idx : c->plan->layout_variation_indices->iter ()) + { + uint16_t major = idx >> 16; + uint16_t minor = idx & 0xFFFF; + + if (major >= inner_maps.length) + { + for (unsigned i = 0; i < inner_maps.length; i++) + inner_maps[i].fini (); + return_trace (false); + } + inner_maps[major].add (minor); + } + varstore_prime->serialize (c->serializer, this, inner_maps.as_array ()); + + for (unsigned i = 0; i < inner_maps.length; i++) + inner_maps[i].fini (); + return_trace (bool (varstore_prime->dataSets)); + } + + unsigned int get_region_index_count (unsigned int ivs) const + { return (this+dataSets[ivs]).get_region_index_count (); } + + void get_scalars (unsigned int ivs, + const int *coords, unsigned int coord_count, + float *scalars /*OUT*/, + unsigned int num_scalars) const + { +#ifdef HB_NO_VAR + for (unsigned i = 0; i < num_scalars; i++) + scalars[i] = 0.f; + return; +#endif + + (this+dataSets[ivs]).get_scalars (coords, coord_count, this+regions, + &scalars[0], num_scalars); + } + + unsigned int get_sub_table_count () const { return dataSets.len; } + + protected: + HBUINT16 format; + LOffsetTo<VarRegionList> regions; + LOffsetArrayOf<VarData> dataSets; + public: + DEFINE_SIZE_ARRAY (8, dataSets); +}; + +/* + * Feature Variations + */ + +struct ConditionFormat1 +{ + friend struct Condition; + + bool subset (hb_subset_context_t *c) const + { + TRACE_SUBSET (this); + auto *out = c->serializer->embed (this); + if (unlikely (!out)) return_trace (false); + return_trace (true); + } + + private: + bool evaluate (const int *coords, unsigned int coord_len) const + { + int coord = axisIndex < coord_len ? coords[axisIndex] : 0; + return filterRangeMinValue <= coord && coord <= filterRangeMaxValue; + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this)); + } + + protected: + HBUINT16 format; /* Format identifier--format = 1 */ + HBUINT16 axisIndex; + F2DOT14 filterRangeMinValue; + F2DOT14 filterRangeMaxValue; + public: + DEFINE_SIZE_STATIC (8); +}; + +struct Condition +{ + bool evaluate (const int *coords, unsigned int coord_len) const + { + switch (u.format) { + case 1: return u.format1.evaluate (coords, coord_len); + default:return false; + } + } + + template <typename context_t, typename ...Ts> + typename context_t::return_t dispatch (context_t *c, Ts&&... ds) const + { + TRACE_DISPATCH (this, u.format); + if (unlikely (!c->may_dispatch (this, &u.format))) return_trace (c->no_dispatch_return_value ()); + switch (u.format) { + case 1: return_trace (c->dispatch (u.format1, hb_forward<Ts> (ds)...)); + default:return_trace (c->default_return_value ()); + } + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + if (!u.format.sanitize (c)) return_trace (false); + switch (u.format) { + case 1: return_trace (u.format1.sanitize (c)); + default:return_trace (true); + } + } + + protected: + union { + HBUINT16 format; /* Format identifier */ + ConditionFormat1 format1; + } u; + public: + DEFINE_SIZE_UNION (2, format); +}; + +struct ConditionSet +{ + bool evaluate (const int *coords, unsigned int coord_len) const + { + unsigned int count = conditions.len; + for (unsigned int i = 0; i < count; i++) + if (!(this+conditions.arrayZ[i]).evaluate (coords, coord_len)) + return false; + return true; + } + + bool subset (hb_subset_context_t *c) const + { + TRACE_SUBSET (this); + auto *out = c->serializer->start_embed (this); + if (unlikely (!out || !c->serializer->extend_min (out))) return_trace (false); + + + conditions.iter () + | hb_apply (subset_offset_array (c, out->conditions, this)) + ; + return_trace (true); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (conditions.sanitize (c, this)); + } + + protected: + LOffsetArrayOf<Condition> conditions; + public: + DEFINE_SIZE_ARRAY (2, conditions); +}; + +struct FeatureTableSubstitutionRecord +{ + friend struct FeatureTableSubstitution; + + void collect_lookups (const void *base, hb_set_t *lookup_indexes /* OUT */) const + { + return (base+feature).add_lookup_indexes_to (lookup_indexes); + } + + void closure_features (const void *base, + const hb_map_t *lookup_indexes, + hb_set_t *feature_indexes /* OUT */) const + { + if ((base+feature).intersects_lookup_indexes (lookup_indexes)) + feature_indexes->add (featureIndex); + } + + bool subset (hb_subset_layout_context_t *c, const void *base) const + { + TRACE_SUBSET (this); + auto *out = c->subset_context->serializer->embed (this); + if (unlikely (!out)) return_trace (false); + + out->featureIndex = c->feature_index_map->get (featureIndex); + bool ret = out->feature.serialize_subset (c->subset_context, feature, base, c); + return_trace (ret); + } + + bool sanitize (hb_sanitize_context_t *c, const void *base) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && feature.sanitize (c, base)); + } + + protected: + HBUINT16 featureIndex; + LOffsetTo<Feature> feature; + public: + DEFINE_SIZE_STATIC (6); +}; + +struct FeatureTableSubstitution +{ + const Feature *find_substitute (unsigned int feature_index) const + { + unsigned int count = substitutions.len; + for (unsigned int i = 0; i < count; i++) + { + const FeatureTableSubstitutionRecord &record = substitutions.arrayZ[i]; + if (record.featureIndex == feature_index) + return &(this+record.feature); + } + return nullptr; + } + + void collect_lookups (const hb_set_t *feature_indexes, + hb_set_t *lookup_indexes /* OUT */) const + { + + hb_iter (substitutions) + | hb_filter (feature_indexes, &FeatureTableSubstitutionRecord::featureIndex) + | hb_apply ([this, lookup_indexes] (const FeatureTableSubstitutionRecord& r) + { r.collect_lookups (this, lookup_indexes); }) + ; + } + + void closure_features (const hb_map_t *lookup_indexes, + hb_set_t *feature_indexes /* OUT */) const + { + for (const FeatureTableSubstitutionRecord& record : substitutions) + record.closure_features (this, lookup_indexes, feature_indexes); + } + + bool subset (hb_subset_context_t *c, + hb_subset_layout_context_t *l) const + { + TRACE_SUBSET (this); + auto *out = c->serializer->start_embed (*this); + if (unlikely (!out || !c->serializer->extend_min (out))) return_trace (false); + + out->version.major = version.major; + out->version.minor = version.minor; + + + substitutions.iter () + | hb_apply (subset_record_array (l, &(out->substitutions), this)) + ; + return_trace (true); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (version.sanitize (c) && + likely (version.major == 1) && + substitutions.sanitize (c, this)); + } + + protected: + FixedVersion<> version; /* Version--0x00010000u */ + ArrayOf<FeatureTableSubstitutionRecord> + substitutions; + public: + DEFINE_SIZE_ARRAY (6, substitutions); +}; + +struct FeatureVariationRecord +{ + friend struct FeatureVariations; + + void collect_lookups (const void *base, + const hb_set_t *feature_indexes, + hb_set_t *lookup_indexes /* OUT */) const + { + return (base+substitutions).collect_lookups (feature_indexes, lookup_indexes); + } + + void closure_features (const void *base, + const hb_map_t *lookup_indexes, + hb_set_t *feature_indexes /* OUT */) const + { + (base+substitutions).closure_features (lookup_indexes, feature_indexes); + } + + bool subset (hb_subset_layout_context_t *c, const void *base) const + { + TRACE_SUBSET (this); + auto *out = c->subset_context->serializer->embed (this); + if (unlikely (!out)) return_trace (false); + + out->conditions.serialize_subset (c->subset_context, conditions, base); + out->substitutions.serialize_subset (c->subset_context, substitutions, base, c); + + return_trace (true); + } + + bool sanitize (hb_sanitize_context_t *c, const void *base) const + { + TRACE_SANITIZE (this); + return_trace (conditions.sanitize (c, base) && + substitutions.sanitize (c, base)); + } + + protected: + LOffsetTo<ConditionSet> + conditions; + LOffsetTo<FeatureTableSubstitution> + substitutions; + public: + DEFINE_SIZE_STATIC (8); +}; + +struct FeatureVariations +{ + static constexpr unsigned NOT_FOUND_INDEX = 0xFFFFFFFFu; + + bool find_index (const int *coords, unsigned int coord_len, + unsigned int *index) const + { + unsigned int count = varRecords.len; + for (unsigned int i = 0; i < count; i++) + { + const FeatureVariationRecord &record = varRecords.arrayZ[i]; + if ((this+record.conditions).evaluate (coords, coord_len)) + { + *index = i; + return true; + } + } + *index = NOT_FOUND_INDEX; + return false; + } + + const Feature *find_substitute (unsigned int variations_index, + unsigned int feature_index) const + { + const FeatureVariationRecord &record = varRecords[variations_index]; + return (this+record.substitutions).find_substitute (feature_index); + } + + FeatureVariations* copy (hb_serialize_context_t *c) const + { + TRACE_SERIALIZE (this); + return_trace (c->embed (*this)); + } + + void collect_lookups (const hb_set_t *feature_indexes, + hb_set_t *lookup_indexes /* OUT */) const + { + for (const FeatureVariationRecord& r : varRecords) + r.collect_lookups (this, feature_indexes, lookup_indexes); + } + + void closure_features (const hb_map_t *lookup_indexes, + hb_set_t *feature_indexes /* OUT */) const + { + for (const FeatureVariationRecord& record : varRecords) + record.closure_features (this, lookup_indexes, feature_indexes); + } + + bool subset (hb_subset_context_t *c, + hb_subset_layout_context_t *l) const + { + TRACE_SUBSET (this); + auto *out = c->serializer->start_embed (*this); + if (unlikely (!out || !c->serializer->extend_min (out))) return_trace (false); + + out->version.major = version.major; + out->version.minor = version.minor; + + + varRecords.iter () + | hb_apply (subset_record_array (l, &(out->varRecords), this)) + ; + return_trace (bool (out->varRecords)); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (version.sanitize (c) && + likely (version.major == 1) && + varRecords.sanitize (c, this)); + } + + protected: + FixedVersion<> version; /* Version--0x00010000u */ + LArrayOf<FeatureVariationRecord> + varRecords; + public: + DEFINE_SIZE_ARRAY_SIZED (8, varRecords); +}; + + +/* + * Device Tables + */ + +struct HintingDevice +{ + friend struct Device; + + private: + + hb_position_t get_x_delta (hb_font_t *font) const + { return get_delta (font->x_ppem, font->x_scale); } + + hb_position_t get_y_delta (hb_font_t *font) const + { return get_delta (font->y_ppem, font->y_scale); } + + public: + + unsigned int get_size () const + { + unsigned int f = deltaFormat; + if (unlikely (f < 1 || f > 3 || startSize > endSize)) return 3 * HBUINT16::static_size; + return HBUINT16::static_size * (4 + ((endSize - startSize) >> (4 - f))); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && c->check_range (this, this->get_size ())); + } + + HintingDevice* copy (hb_serialize_context_t *c) const + { + TRACE_SERIALIZE (this); + return_trace (c->embed<HintingDevice> (this)); + } + + private: + + int get_delta (unsigned int ppem, int scale) const + { + if (!ppem) return 0; + + int pixels = get_delta_pixels (ppem); + + if (!pixels) return 0; + + return (int) (pixels * (int64_t) scale / ppem); + } + int get_delta_pixels (unsigned int ppem_size) const + { + unsigned int f = deltaFormat; + if (unlikely (f < 1 || f > 3)) + return 0; + + if (ppem_size < startSize || ppem_size > endSize) + return 0; + + unsigned int s = ppem_size - startSize; + + unsigned int byte = deltaValueZ[s >> (4 - f)]; + unsigned int bits = (byte >> (16 - (((s & ((1 << (4 - f)) - 1)) + 1) << f))); + unsigned int mask = (0xFFFFu >> (16 - (1 << f))); + + int delta = bits & mask; + + if ((unsigned int) delta >= ((mask + 1) >> 1)) + delta -= mask + 1; + + return delta; + } + + protected: + HBUINT16 startSize; /* Smallest size to correct--in ppem */ + HBUINT16 endSize; /* Largest size to correct--in ppem */ + HBUINT16 deltaFormat; /* Format of DeltaValue array data: 1, 2, or 3 + * 1 Signed 2-bit value, 8 values per uint16 + * 2 Signed 4-bit value, 4 values per uint16 + * 3 Signed 8-bit value, 2 values per uint16 + */ + UnsizedArrayOf<HBUINT16> + deltaValueZ; /* Array of compressed data */ + public: + DEFINE_SIZE_ARRAY (6, deltaValueZ); +}; + +struct VariationDevice +{ + friend struct Device; + + private: + + hb_position_t get_x_delta (hb_font_t *font, const VariationStore &store) const + { return font->em_scalef_x (get_delta (font, store)); } + + hb_position_t get_y_delta (hb_font_t *font, const VariationStore &store) const + { return font->em_scalef_y (get_delta (font, store)); } + + VariationDevice* copy (hb_serialize_context_t *c, const hb_map_t *layout_variation_idx_map) const + { + TRACE_SERIALIZE (this); + auto snap = c->snapshot (); + auto *out = c->embed (this); + if (unlikely (!out)) return_trace (nullptr); + if (!layout_variation_idx_map || layout_variation_idx_map->is_empty ()) return_trace (out); + + unsigned org_idx = (outerIndex << 16) + innerIndex; + if (!layout_variation_idx_map->has (org_idx)) + { + c->revert (snap); + return_trace (nullptr); + } + unsigned new_idx = layout_variation_idx_map->get (org_idx); + out->outerIndex = new_idx >> 16; + out->innerIndex = new_idx & 0xFFFF; + return_trace (out); + } + + void record_variation_index (hb_set_t *layout_variation_indices) const + { + unsigned var_idx = (outerIndex << 16) + innerIndex; + layout_variation_indices->add (var_idx); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this)); + } + + private: + + float get_delta (hb_font_t *font, const VariationStore &store) const + { + return store.get_delta (outerIndex, innerIndex, font->coords, font->num_coords); + } + + protected: + HBUINT16 outerIndex; + HBUINT16 innerIndex; + HBUINT16 deltaFormat; /* Format identifier for this table: 0x0x8000 */ + public: + DEFINE_SIZE_STATIC (6); +}; + +struct DeviceHeader +{ + protected: + HBUINT16 reserved1; + HBUINT16 reserved2; + public: + HBUINT16 format; /* Format identifier */ + public: + DEFINE_SIZE_STATIC (6); +}; + +struct Device +{ + hb_position_t get_x_delta (hb_font_t *font, const VariationStore &store=Null (VariationStore)) const + { + switch (u.b.format) + { +#ifndef HB_NO_HINTING + case 1: case 2: case 3: + return u.hinting.get_x_delta (font); +#endif +#ifndef HB_NO_VAR + case 0x8000: + return u.variation.get_x_delta (font, store); +#endif + default: + return 0; + } + } + hb_position_t get_y_delta (hb_font_t *font, const VariationStore &store=Null (VariationStore)) const + { + switch (u.b.format) + { + case 1: case 2: case 3: +#ifndef HB_NO_HINTING + return u.hinting.get_y_delta (font); +#endif +#ifndef HB_NO_VAR + case 0x8000: + return u.variation.get_y_delta (font, store); +#endif + default: + return 0; + } + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + if (!u.b.format.sanitize (c)) return_trace (false); + switch (u.b.format) { +#ifndef HB_NO_HINTING + case 1: case 2: case 3: + return_trace (u.hinting.sanitize (c)); +#endif +#ifndef HB_NO_VAR + case 0x8000: + return_trace (u.variation.sanitize (c)); +#endif + default: + return_trace (true); + } + } + + Device* copy (hb_serialize_context_t *c, const hb_map_t *layout_variation_idx_map=nullptr) const + { + TRACE_SERIALIZE (this); + switch (u.b.format) { +#ifndef HB_NO_HINTING + case 1: + case 2: + case 3: + return_trace (reinterpret_cast<Device *> (u.hinting.copy (c))); +#endif +#ifndef HB_NO_VAR + case 0x8000: + return_trace (reinterpret_cast<Device *> (u.variation.copy (c, layout_variation_idx_map))); +#endif + default: + return_trace (nullptr); + } + } + + void collect_variation_indices (hb_set_t *layout_variation_indices) const + { + switch (u.b.format) { +#ifndef HB_NO_HINTING + case 1: + case 2: + case 3: + return; +#endif +#ifndef HB_NO_VAR + case 0x8000: + u.variation.record_variation_index (layout_variation_indices); + return; +#endif + default: + return; + } + } + + protected: + union { + DeviceHeader b; + HintingDevice hinting; +#ifndef HB_NO_VAR + VariationDevice variation; +#endif + } u; + public: + DEFINE_SIZE_UNION (6, b); +}; + + +} /* namespace OT */ + + +#endif /* HB_OT_LAYOUT_COMMON_HH */ diff --git a/thirdparty/harfbuzz/src/hb-ot-layout-gdef-table.hh b/thirdparty/harfbuzz/src/hb-ot-layout-gdef-table.hh new file mode 100644 index 0000000000..437e760f64 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-layout-gdef-table.hh @@ -0,0 +1,725 @@ +/* + * Copyright © 2007,2008,2009 Red Hat, Inc. + * Copyright © 2010,2011,2012 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Red Hat Author(s): Behdad Esfahbod + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_OT_LAYOUT_GDEF_TABLE_HH +#define HB_OT_LAYOUT_GDEF_TABLE_HH + +#include "hb-ot-layout-common.hh" + +#include "hb-font.hh" + + +namespace OT { + + +/* + * Attachment List Table + */ + +/* Array of contour point indices--in increasing numerical order */ +struct AttachPoint : ArrayOf<HBUINT16> +{ + bool subset (hb_subset_context_t *c) const + { + TRACE_SUBSET (this); + auto *out = c->serializer->start_embed (*this); + if (unlikely (!out)) return_trace (false); + + return_trace (out->serialize (c->serializer, + iter ())); + } +}; + +struct AttachList +{ + unsigned int get_attach_points (hb_codepoint_t glyph_id, + unsigned int start_offset, + unsigned int *point_count /* IN/OUT */, + unsigned int *point_array /* OUT */) const + { + unsigned int index = (this+coverage).get_coverage (glyph_id); + if (index == NOT_COVERED) + { + if (point_count) + *point_count = 0; + return 0; + } + + const AttachPoint &points = this+attachPoint[index]; + + if (point_count) + { + + points.sub_array (start_offset, point_count) + | hb_sink (hb_array (point_array, *point_count)) + ; + } + + return points.len; + } + + bool subset (hb_subset_context_t *c) const + { + TRACE_SUBSET (this); + const hb_set_t &glyphset = *c->plan->glyphset (); + const hb_map_t &glyph_map = *c->plan->glyph_map; + + auto *out = c->serializer->start_embed (*this); + if (unlikely (!c->serializer->extend_min (out))) return_trace (false); + + hb_sorted_vector_t<hb_codepoint_t> new_coverage; + + hb_zip (this+coverage, attachPoint) + | hb_filter (glyphset, hb_first) + | hb_filter (subset_offset_array (c, out->attachPoint, this), hb_second) + | hb_map (hb_first) + | hb_map (glyph_map) + | hb_sink (new_coverage) + ; + out->coverage.serialize (c->serializer, out) + .serialize (c->serializer, new_coverage.iter ()); + return_trace (bool (new_coverage)); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (coverage.sanitize (c, this) && attachPoint.sanitize (c, this)); + } + + protected: + OffsetTo<Coverage> + coverage; /* Offset to Coverage table -- from + * beginning of AttachList table */ + OffsetArrayOf<AttachPoint> + attachPoint; /* Array of AttachPoint tables + * in Coverage Index order */ + public: + DEFINE_SIZE_ARRAY (4, attachPoint); +}; + +/* + * Ligature Caret Table + */ + +struct CaretValueFormat1 +{ + friend struct CaretValue; + bool subset (hb_subset_context_t *c) const + { + TRACE_SUBSET (this); + auto *out = c->serializer->embed (this); + if (unlikely (!out)) return_trace (false); + return_trace (true); + } + + private: + hb_position_t get_caret_value (hb_font_t *font, hb_direction_t direction) const + { + return HB_DIRECTION_IS_HORIZONTAL (direction) ? font->em_scale_x (coordinate) : font->em_scale_y (coordinate); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this)); + } + + protected: + HBUINT16 caretValueFormat; /* Format identifier--format = 1 */ + FWORD coordinate; /* X or Y value, in design units */ + public: + DEFINE_SIZE_STATIC (4); +}; + +struct CaretValueFormat2 +{ + friend struct CaretValue; + bool subset (hb_subset_context_t *c) const + { + TRACE_SUBSET (this); + auto *out = c->serializer->embed (this); + if (unlikely (!out)) return_trace (false); + return_trace (true); + } + + private: + hb_position_t get_caret_value (hb_font_t *font, hb_direction_t direction, hb_codepoint_t glyph_id) const + { + hb_position_t x, y; + font->get_glyph_contour_point_for_origin (glyph_id, caretValuePoint, direction, &x, &y); + return HB_DIRECTION_IS_HORIZONTAL (direction) ? x : y; + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this)); + } + + protected: + HBUINT16 caretValueFormat; /* Format identifier--format = 2 */ + HBUINT16 caretValuePoint; /* Contour point index on glyph */ + public: + DEFINE_SIZE_STATIC (4); +}; + +struct CaretValueFormat3 +{ + friend struct CaretValue; + + hb_position_t get_caret_value (hb_font_t *font, hb_direction_t direction, + const VariationStore &var_store) const + { + return HB_DIRECTION_IS_HORIZONTAL (direction) ? + font->em_scale_x (coordinate) + (this+deviceTable).get_x_delta (font, var_store) : + font->em_scale_y (coordinate) + (this+deviceTable).get_y_delta (font, var_store); + } + + bool subset (hb_subset_context_t *c) const + { + TRACE_SUBSET (this); + auto *out = c->serializer->embed (this); + if (unlikely (!out)) return_trace (false); + + return_trace (out->deviceTable.serialize_copy (c->serializer, deviceTable, this, c->serializer->to_bias (out), + hb_serialize_context_t::Head, c->plan->layout_variation_idx_map)); + } + + void collect_variation_indices (hb_set_t *layout_variation_indices) const + { (this+deviceTable).collect_variation_indices (layout_variation_indices); } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && deviceTable.sanitize (c, this)); + } + + protected: + HBUINT16 caretValueFormat; /* Format identifier--format = 3 */ + FWORD coordinate; /* X or Y value, in design units */ + OffsetTo<Device> + deviceTable; /* Offset to Device table for X or Y + * value--from beginning of CaretValue + * table */ + public: + DEFINE_SIZE_STATIC (6); +}; + +struct CaretValue +{ + hb_position_t get_caret_value (hb_font_t *font, + hb_direction_t direction, + hb_codepoint_t glyph_id, + const VariationStore &var_store) const + { + switch (u.format) { + case 1: return u.format1.get_caret_value (font, direction); + case 2: return u.format2.get_caret_value (font, direction, glyph_id); + case 3: return u.format3.get_caret_value (font, direction, var_store); + default:return 0; + } + } + + template <typename context_t, typename ...Ts> + typename context_t::return_t dispatch (context_t *c, Ts&&... ds) const + { + TRACE_DISPATCH (this, u.format); + if (unlikely (!c->may_dispatch (this, &u.format))) return_trace (c->no_dispatch_return_value ()); + switch (u.format) { + case 1: return_trace (c->dispatch (u.format1, hb_forward<Ts> (ds)...)); + case 2: return_trace (c->dispatch (u.format2, hb_forward<Ts> (ds)...)); + case 3: return_trace (c->dispatch (u.format3, hb_forward<Ts> (ds)...)); + default:return_trace (c->default_return_value ()); + } + } + + void collect_variation_indices (hb_set_t *layout_variation_indices) const + { + switch (u.format) { + case 1: + case 2: + return; + case 3: + u.format3.collect_variation_indices (layout_variation_indices); + return; + default: return; + } + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + if (!u.format.sanitize (c)) return_trace (false); + switch (u.format) { + case 1: return_trace (u.format1.sanitize (c)); + case 2: return_trace (u.format2.sanitize (c)); + case 3: return_trace (u.format3.sanitize (c)); + default:return_trace (true); + } + } + + protected: + union { + HBUINT16 format; /* Format identifier */ + CaretValueFormat1 format1; + CaretValueFormat2 format2; + CaretValueFormat3 format3; + } u; + public: + DEFINE_SIZE_UNION (2, format); +}; + +struct LigGlyph +{ + unsigned get_lig_carets (hb_font_t *font, + hb_direction_t direction, + hb_codepoint_t glyph_id, + const VariationStore &var_store, + unsigned start_offset, + unsigned *caret_count /* IN/OUT */, + hb_position_t *caret_array /* OUT */) const + { + if (caret_count) + { + + carets.sub_array (start_offset, caret_count) + | hb_map (hb_add (this)) + | hb_map ([&] (const CaretValue &value) { return value.get_caret_value (font, direction, glyph_id, var_store); }) + | hb_sink (hb_array (caret_array, *caret_count)) + ; + } + + return carets.len; + } + + bool subset (hb_subset_context_t *c) const + { + TRACE_SUBSET (this); + auto *out = c->serializer->start_embed (*this); + if (unlikely (!c->serializer->extend_min (out))) return_trace (false); + + + hb_iter (carets) + | hb_apply (subset_offset_array (c, out->carets, this)) + ; + + return_trace (bool (out->carets)); + } + + void collect_variation_indices (hb_collect_variation_indices_context_t *c) const + { + for (const OffsetTo<CaretValue>& offset : carets.iter ()) + (this+offset).collect_variation_indices (c->layout_variation_indices); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (carets.sanitize (c, this)); + } + + protected: + OffsetArrayOf<CaretValue> + carets; /* Offset array of CaretValue tables + * --from beginning of LigGlyph table + * --in increasing coordinate order */ + public: + DEFINE_SIZE_ARRAY (2, carets); +}; + +struct LigCaretList +{ + unsigned int get_lig_carets (hb_font_t *font, + hb_direction_t direction, + hb_codepoint_t glyph_id, + const VariationStore &var_store, + unsigned int start_offset, + unsigned int *caret_count /* IN/OUT */, + hb_position_t *caret_array /* OUT */) const + { + unsigned int index = (this+coverage).get_coverage (glyph_id); + if (index == NOT_COVERED) + { + if (caret_count) + *caret_count = 0; + return 0; + } + const LigGlyph &lig_glyph = this+ligGlyph[index]; + return lig_glyph.get_lig_carets (font, direction, glyph_id, var_store, start_offset, caret_count, caret_array); + } + + bool subset (hb_subset_context_t *c) const + { + TRACE_SUBSET (this); + const hb_set_t &glyphset = *c->plan->glyphset (); + const hb_map_t &glyph_map = *c->plan->glyph_map; + + auto *out = c->serializer->start_embed (*this); + if (unlikely (!c->serializer->extend_min (out))) return_trace (false); + + hb_sorted_vector_t<hb_codepoint_t> new_coverage; + + hb_zip (this+coverage, ligGlyph) + | hb_filter (glyphset, hb_first) + | hb_filter (subset_offset_array (c, out->ligGlyph, this), hb_second) + | hb_map (hb_first) + | hb_map (glyph_map) + | hb_sink (new_coverage) + ; + out->coverage.serialize (c->serializer, out) + .serialize (c->serializer, new_coverage.iter ()); + return_trace (bool (new_coverage)); + } + + void collect_variation_indices (hb_collect_variation_indices_context_t *c) const + { + + hb_zip (this+coverage, ligGlyph) + | hb_filter (c->glyph_set, hb_first) + | hb_map (hb_second) + | hb_map (hb_add (this)) + | hb_apply ([c] (const LigGlyph& _) { _.collect_variation_indices (c); }) + ; + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (coverage.sanitize (c, this) && ligGlyph.sanitize (c, this)); + } + + protected: + OffsetTo<Coverage> + coverage; /* Offset to Coverage table--from + * beginning of LigCaretList table */ + OffsetArrayOf<LigGlyph> + ligGlyph; /* Array of LigGlyph tables + * in Coverage Index order */ + public: + DEFINE_SIZE_ARRAY (4, ligGlyph); +}; + + +struct MarkGlyphSetsFormat1 +{ + bool covers (unsigned int set_index, hb_codepoint_t glyph_id) const + { return (this+coverage[set_index]).get_coverage (glyph_id) != NOT_COVERED; } + + bool subset (hb_subset_context_t *c) const + { + TRACE_SUBSET (this); + auto *out = c->serializer->start_embed (*this); + if (unlikely (!c->serializer->extend_min (out))) return_trace (false); + out->format = format; + + bool ret = true; + for (const LOffsetTo<Coverage>& offset : coverage.iter ()) + { + auto *o = out->coverage.serialize_append (c->serializer); + if (unlikely (!o)) + { + ret = false; + break; + } + + //not using o->serialize_subset (c, offset, this, out) here because + //OTS doesn't allow null offset. + //See issue: https://github.com/khaledhosny/ots/issues/172 + c->serializer->push (); + c->dispatch (this+offset); + c->serializer->add_link (*o, c->serializer->pop_pack ()); + } + + return_trace (ret && out->coverage.len); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (coverage.sanitize (c, this)); + } + + protected: + HBUINT16 format; /* Format identifier--format = 1 */ + ArrayOf<LOffsetTo<Coverage>> + coverage; /* Array of long offsets to mark set + * coverage tables */ + public: + DEFINE_SIZE_ARRAY (4, coverage); +}; + +struct MarkGlyphSets +{ + bool covers (unsigned int set_index, hb_codepoint_t glyph_id) const + { + switch (u.format) { + case 1: return u.format1.covers (set_index, glyph_id); + default:return false; + } + } + + bool subset (hb_subset_context_t *c) const + { + TRACE_SUBSET (this); + switch (u.format) { + case 1: return_trace (u.format1.subset (c)); + default:return_trace (false); + } + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + if (!u.format.sanitize (c)) return_trace (false); + switch (u.format) { + case 1: return_trace (u.format1.sanitize (c)); + default:return_trace (true); + } + } + + protected: + union { + HBUINT16 format; /* Format identifier */ + MarkGlyphSetsFormat1 format1; + } u; + public: + DEFINE_SIZE_UNION (2, format); +}; + + +/* + * GDEF -- Glyph Definition + * https://docs.microsoft.com/en-us/typography/opentype/spec/gdef + */ + + +struct GDEF +{ + static constexpr hb_tag_t tableTag = HB_OT_TAG_GDEF; + + enum GlyphClasses { + UnclassifiedGlyph = 0, + BaseGlyph = 1, + LigatureGlyph = 2, + MarkGlyph = 3, + ComponentGlyph = 4 + }; + + bool has_data () const { return version.to_int (); } + bool has_glyph_classes () const { return glyphClassDef != 0; } + unsigned int get_glyph_class (hb_codepoint_t glyph) const + { return (this+glyphClassDef).get_class (glyph); } + void get_glyphs_in_class (unsigned int klass, hb_set_t *glyphs) const + { (this+glyphClassDef).collect_class (glyphs, klass); } + + bool has_mark_attachment_types () const { return markAttachClassDef != 0; } + unsigned int get_mark_attachment_type (hb_codepoint_t glyph) const + { return (this+markAttachClassDef).get_class (glyph); } + + bool has_attach_points () const { return attachList != 0; } + unsigned int get_attach_points (hb_codepoint_t glyph_id, + unsigned int start_offset, + unsigned int *point_count /* IN/OUT */, + unsigned int *point_array /* OUT */) const + { return (this+attachList).get_attach_points (glyph_id, start_offset, point_count, point_array); } + + bool has_lig_carets () const { return ligCaretList != 0; } + unsigned int get_lig_carets (hb_font_t *font, + hb_direction_t direction, + hb_codepoint_t glyph_id, + unsigned int start_offset, + unsigned int *caret_count /* IN/OUT */, + hb_position_t *caret_array /* OUT */) const + { return (this+ligCaretList).get_lig_carets (font, + direction, glyph_id, get_var_store(), + start_offset, caret_count, caret_array); } + + bool has_mark_sets () const { return version.to_int () >= 0x00010002u && markGlyphSetsDef != 0; } + bool mark_set_covers (unsigned int set_index, hb_codepoint_t glyph_id) const + { return version.to_int () >= 0x00010002u && (this+markGlyphSetsDef).covers (set_index, glyph_id); } + + bool has_var_store () const { return version.to_int () >= 0x00010003u && varStore != 0; } + const VariationStore &get_var_store () const + { return version.to_int () >= 0x00010003u ? this+varStore : Null (VariationStore); } + + /* glyph_props is a 16-bit integer where the lower 8-bit have bits representing + * glyph class and other bits, and high 8-bit the mark attachment type (if any). + * Not to be confused with lookup_props which is very similar. */ + unsigned int get_glyph_props (hb_codepoint_t glyph) const + { + unsigned int klass = get_glyph_class (glyph); + + static_assert (((unsigned int) HB_OT_LAYOUT_GLYPH_PROPS_BASE_GLYPH == (unsigned int) LookupFlag::IgnoreBaseGlyphs), ""); + static_assert (((unsigned int) HB_OT_LAYOUT_GLYPH_PROPS_LIGATURE == (unsigned int) LookupFlag::IgnoreLigatures), ""); + static_assert (((unsigned int) HB_OT_LAYOUT_GLYPH_PROPS_MARK == (unsigned int) LookupFlag::IgnoreMarks), ""); + + switch (klass) { + default: return 0; + case BaseGlyph: return HB_OT_LAYOUT_GLYPH_PROPS_BASE_GLYPH; + case LigatureGlyph: return HB_OT_LAYOUT_GLYPH_PROPS_LIGATURE; + case MarkGlyph: + klass = get_mark_attachment_type (glyph); + return HB_OT_LAYOUT_GLYPH_PROPS_MARK | (klass << 8); + } + } + + HB_INTERNAL bool is_blocklisted (hb_blob_t *blob, + hb_face_t *face) const; + + struct accelerator_t + { + void init (hb_face_t *face) + { + this->table = hb_sanitize_context_t ().reference_table<GDEF> (face); + if (unlikely (this->table->is_blocklisted (this->table.get_blob (), face))) + { + hb_blob_destroy (this->table.get_blob ()); + this->table = hb_blob_get_empty (); + } + } + + void fini () { this->table.destroy (); } + + hb_blob_ptr_t<GDEF> table; + }; + + unsigned int get_size () const + { + return min_size + + (version.to_int () >= 0x00010002u ? markGlyphSetsDef.static_size : 0) + + (version.to_int () >= 0x00010003u ? varStore.static_size : 0); + } + + void collect_variation_indices (hb_collect_variation_indices_context_t *c) const + { (this+ligCaretList).collect_variation_indices (c); } + + void remap_layout_variation_indices (const hb_set_t *layout_variation_indices, + hb_map_t *layout_variation_idx_map /* OUT */) const + { + if (version.to_int () < 0x00010003u || !varStore) return; + if (layout_variation_indices->is_empty ()) return; + + unsigned new_major = 0, new_minor = 0; + unsigned last_major = (layout_variation_indices->get_min ()) >> 16; + for (unsigned idx : layout_variation_indices->iter ()) + { + uint16_t major = idx >> 16; + if (major >= (this+varStore).get_sub_table_count ()) break; + if (major != last_major) + { + new_minor = 0; + ++new_major; + } + + unsigned new_idx = (new_major << 16) + new_minor; + layout_variation_idx_map->set (idx, new_idx); + ++new_minor; + last_major = major; + } + } + + bool subset (hb_subset_context_t *c) const + { + TRACE_SUBSET (this); + auto *out = c->serializer->embed (*this); + if (unlikely (!out)) return_trace (false); + + bool subset_glyphclassdef = out->glyphClassDef.serialize_subset (c, glyphClassDef, this); + bool subset_attachlist = out->attachList.serialize_subset (c, attachList, this); + bool subset_ligcaretlist = out->ligCaretList.serialize_subset (c, ligCaretList, this); + bool subset_markattachclassdef = out->markAttachClassDef.serialize_subset (c, markAttachClassDef, this); + + bool subset_markglyphsetsdef = true; + if (version.to_int () >= 0x00010002u) + { + subset_markglyphsetsdef = out->markGlyphSetsDef.serialize_subset (c, markGlyphSetsDef, this); + if (!subset_markglyphsetsdef && + version.to_int () == 0x00010002u) + out->version.minor = 0; + } + + bool subset_varstore = true; + if (version.to_int () >= 0x00010003u) + { + subset_varstore = out->varStore.serialize_subset (c, varStore, this); + if (!subset_varstore && version.to_int () == 0x00010003u) + out->version.minor = 2; + } + + return_trace (subset_glyphclassdef || subset_attachlist || + subset_ligcaretlist || subset_markattachclassdef || + (out->version.to_int () >= 0x00010002u && subset_markglyphsetsdef) || + (out->version.to_int () >= 0x00010003u && subset_varstore)); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (version.sanitize (c) && + likely (version.major == 1) && + glyphClassDef.sanitize (c, this) && + attachList.sanitize (c, this) && + ligCaretList.sanitize (c, this) && + markAttachClassDef.sanitize (c, this) && + (version.to_int () < 0x00010002u || markGlyphSetsDef.sanitize (c, this)) && + (version.to_int () < 0x00010003u || varStore.sanitize (c, this))); + } + + protected: + FixedVersion<>version; /* Version of the GDEF table--currently + * 0x00010003u */ + OffsetTo<ClassDef> + glyphClassDef; /* Offset to class definition table + * for glyph type--from beginning of + * GDEF header (may be Null) */ + OffsetTo<AttachList> + attachList; /* Offset to list of glyphs with + * attachment points--from beginning + * of GDEF header (may be Null) */ + OffsetTo<LigCaretList> + ligCaretList; /* Offset to list of positioning points + * for ligature carets--from beginning + * of GDEF header (may be Null) */ + OffsetTo<ClassDef> + markAttachClassDef; /* Offset to class definition table for + * mark attachment type--from beginning + * of GDEF header (may be Null) */ + OffsetTo<MarkGlyphSets> + markGlyphSetsDef; /* Offset to the table of mark set + * definitions--from beginning of GDEF + * header (may be NULL). Introduced + * in version 0x00010002. */ + LOffsetTo<VariationStore> + varStore; /* Offset to the table of Item Variation + * Store--from beginning of GDEF + * header (may be NULL). Introduced + * in version 0x00010003. */ + public: + DEFINE_SIZE_MIN (12); +}; + +struct GDEF_accelerator_t : GDEF::accelerator_t {}; + +} /* namespace OT */ + + +#endif /* HB_OT_LAYOUT_GDEF_TABLE_HH */ diff --git a/thirdparty/harfbuzz/src/hb-ot-layout-gpos-table.hh b/thirdparty/harfbuzz/src/hb-ot-layout-gpos-table.hh new file mode 100644 index 0000000000..2217d298fb --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-layout-gpos-table.hh @@ -0,0 +1,2740 @@ +/* + * Copyright © 2007,2008,2009,2010 Red Hat, Inc. + * Copyright © 2010,2012,2013 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Red Hat Author(s): Behdad Esfahbod + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_OT_LAYOUT_GPOS_TABLE_HH +#define HB_OT_LAYOUT_GPOS_TABLE_HH + +#include "hb-ot-layout-gsubgpos.hh" + + +namespace OT { + +struct MarkArray; +static void Markclass_closure_and_remap_indexes (const Coverage &mark_coverage, + const MarkArray &mark_array, + const hb_set_t &glyphset, + hb_map_t* klass_mapping /* INOUT */); + +/* buffer **position** var allocations */ +#define attach_chain() var.i16[0] /* glyph to which this attaches to, relative to current glyphs; negative for going back, positive for forward. */ +#define attach_type() var.u8[2] /* attachment type */ +/* Note! if attach_chain() is zero, the value of attach_type() is irrelevant. */ + +enum attach_type_t { + ATTACH_TYPE_NONE = 0X00, + + /* Each attachment should be either a mark or a cursive; can't be both. */ + ATTACH_TYPE_MARK = 0X01, + ATTACH_TYPE_CURSIVE = 0X02, +}; + + +/* Shared Tables: ValueRecord, Anchor Table, and MarkArray */ + +typedef HBUINT16 Value; + +typedef UnsizedArrayOf<Value> ValueRecord; + +struct ValueFormat : HBUINT16 +{ + enum Flags { + xPlacement = 0x0001u, /* Includes horizontal adjustment for placement */ + yPlacement = 0x0002u, /* Includes vertical adjustment for placement */ + xAdvance = 0x0004u, /* Includes horizontal adjustment for advance */ + yAdvance = 0x0008u, /* Includes vertical adjustment for advance */ + xPlaDevice = 0x0010u, /* Includes horizontal Device table for placement */ + yPlaDevice = 0x0020u, /* Includes vertical Device table for placement */ + xAdvDevice = 0x0040u, /* Includes horizontal Device table for advance */ + yAdvDevice = 0x0080u, /* Includes vertical Device table for advance */ + ignored = 0x0F00u, /* Was used in TrueType Open for MM fonts */ + reserved = 0xF000u, /* For future use */ + + devices = 0x00F0u /* Mask for having any Device table */ + }; + +/* All fields are options. Only those available advance the value pointer. */ +#if 0 + HBINT16 xPlacement; /* Horizontal adjustment for + * placement--in design units */ + HBINT16 yPlacement; /* Vertical adjustment for + * placement--in design units */ + HBINT16 xAdvance; /* Horizontal adjustment for + * advance--in design units (only used + * for horizontal writing) */ + HBINT16 yAdvance; /* Vertical adjustment for advance--in + * design units (only used for vertical + * writing) */ + OffsetTo<Device> xPlaDevice; /* Offset to Device table for + * horizontal placement--measured from + * beginning of PosTable (may be NULL) */ + OffsetTo<Device> yPlaDevice; /* Offset to Device table for vertical + * placement--measured from beginning + * of PosTable (may be NULL) */ + OffsetTo<Device> xAdvDevice; /* Offset to Device table for + * horizontal advance--measured from + * beginning of PosTable (may be NULL) */ + OffsetTo<Device> yAdvDevice; /* Offset to Device table for vertical + * advance--measured from beginning of + * PosTable (may be NULL) */ +#endif + + unsigned int get_len () const { return hb_popcount ((unsigned int) *this); } + unsigned int get_size () const { return get_len () * Value::static_size; } + + bool apply_value (hb_ot_apply_context_t *c, + const void *base, + const Value *values, + hb_glyph_position_t &glyph_pos) const + { + bool ret = false; + unsigned int format = *this; + if (!format) return ret; + + hb_font_t *font = c->font; + bool horizontal = HB_DIRECTION_IS_HORIZONTAL (c->direction); + + if (format & xPlacement) glyph_pos.x_offset += font->em_scale_x (get_short (values++, &ret)); + if (format & yPlacement) glyph_pos.y_offset += font->em_scale_y (get_short (values++, &ret)); + if (format & xAdvance) { + if (likely (horizontal)) glyph_pos.x_advance += font->em_scale_x (get_short (values, &ret)); + values++; + } + /* y_advance values grow downward but font-space grows upward, hence negation */ + if (format & yAdvance) { + if (unlikely (!horizontal)) glyph_pos.y_advance -= font->em_scale_y (get_short (values, &ret)); + values++; + } + + if (!has_device ()) return ret; + + bool use_x_device = font->x_ppem || font->num_coords; + bool use_y_device = font->y_ppem || font->num_coords; + + if (!use_x_device && !use_y_device) return ret; + + const VariationStore &store = c->var_store; + + /* pixel -> fractional pixel */ + if (format & xPlaDevice) { + if (use_x_device) glyph_pos.x_offset += (base + get_device (values, &ret)).get_x_delta (font, store); + values++; + } + if (format & yPlaDevice) { + if (use_y_device) glyph_pos.y_offset += (base + get_device (values, &ret)).get_y_delta (font, store); + values++; + } + if (format & xAdvDevice) { + if (horizontal && use_x_device) glyph_pos.x_advance += (base + get_device (values, &ret)).get_x_delta (font, store); + values++; + } + if (format & yAdvDevice) { + /* y_advance values grow downward but font-space grows upward, hence negation */ + if (!horizontal && use_y_device) glyph_pos.y_advance -= (base + get_device (values, &ret)).get_y_delta (font, store); + values++; + } + return ret; + } + + void serialize_copy (hb_serialize_context_t *c, const void *base, + const Value *values, const hb_map_t *layout_variation_idx_map) const + { + unsigned int format = *this; + if (!format) return; + + if (format & xPlacement) c->copy (*values++); + if (format & yPlacement) c->copy (*values++); + if (format & xAdvance) c->copy (*values++); + if (format & yAdvance) c->copy (*values++); + + if (format & xPlaDevice) copy_device (c, base, values++, layout_variation_idx_map); + if (format & yPlaDevice) copy_device (c, base, values++, layout_variation_idx_map); + if (format & xAdvDevice) copy_device (c, base, values++, layout_variation_idx_map); + if (format & yAdvDevice) copy_device (c, base, values++, layout_variation_idx_map); + } + + void collect_variation_indices (hb_collect_variation_indices_context_t *c, + const void *base, + const hb_array_t<const Value>& values) const + { + unsigned format = *this; + unsigned i = 0; + if (format & xPlacement) i++; + if (format & yPlacement) i++; + if (format & xAdvance) i++; + if (format & yAdvance) i++; + if (format & xPlaDevice) + { + (base + get_device (&(values[i]))).collect_variation_indices (c->layout_variation_indices); + i++; + } + + if (format & ValueFormat::yPlaDevice) + { + (base + get_device (&(values[i]))).collect_variation_indices (c->layout_variation_indices); + i++; + } + + if (format & ValueFormat::xAdvDevice) + { + + (base + get_device (&(values[i]))).collect_variation_indices (c->layout_variation_indices); + i++; + } + + if (format & ValueFormat::yAdvDevice) + { + + (base + get_device (&(values[i]))).collect_variation_indices (c->layout_variation_indices); + i++; + } + } + + private: + bool sanitize_value_devices (hb_sanitize_context_t *c, const void *base, const Value *values) const + { + unsigned int format = *this; + + if (format & xPlacement) values++; + if (format & yPlacement) values++; + if (format & xAdvance) values++; + if (format & yAdvance) values++; + + if ((format & xPlaDevice) && !get_device (values++).sanitize (c, base)) return false; + if ((format & yPlaDevice) && !get_device (values++).sanitize (c, base)) return false; + if ((format & xAdvDevice) && !get_device (values++).sanitize (c, base)) return false; + if ((format & yAdvDevice) && !get_device (values++).sanitize (c, base)) return false; + + return true; + } + + static inline OffsetTo<Device>& get_device (Value* value) + { + return *static_cast<OffsetTo<Device> *> (value); + } + static inline const OffsetTo<Device>& get_device (const Value* value, bool *worked=nullptr) + { + if (worked) *worked |= bool (*value); + return *static_cast<const OffsetTo<Device> *> (value); + } + + bool copy_device (hb_serialize_context_t *c, const void *base, + const Value *src_value, const hb_map_t *layout_variation_idx_map) const + { + Value *dst_value = c->copy (*src_value); + + if (!dst_value) return false; + if (*dst_value == 0) return true; + + *dst_value = 0; + c->push (); + if ((base + get_device (src_value)).copy (c, layout_variation_idx_map)) + { + c->add_link (*dst_value, c->pop_pack ()); + return true; + } + else + { + c->pop_discard (); + return false; + } + } + + static inline const HBINT16& get_short (const Value* value, bool *worked=nullptr) + { + if (worked) *worked |= bool (*value); + return *reinterpret_cast<const HBINT16 *> (value); + } + + public: + + bool has_device () const + { + unsigned int format = *this; + return (format & devices) != 0; + } + + bool sanitize_value (hb_sanitize_context_t *c, const void *base, const Value *values) const + { + TRACE_SANITIZE (this); + return_trace (c->check_range (values, get_size ()) && (!has_device () || sanitize_value_devices (c, base, values))); + } + + bool sanitize_values (hb_sanitize_context_t *c, const void *base, const Value *values, unsigned int count) const + { + TRACE_SANITIZE (this); + unsigned int len = get_len (); + + if (!c->check_range (values, count, get_size ())) return_trace (false); + + if (!has_device ()) return_trace (true); + + for (unsigned int i = 0; i < count; i++) { + if (!sanitize_value_devices (c, base, values)) + return_trace (false); + values += len; + } + + return_trace (true); + } + + /* Just sanitize referenced Device tables. Doesn't check the values themselves. */ + bool sanitize_values_stride_unsafe (hb_sanitize_context_t *c, const void *base, const Value *values, unsigned int count, unsigned int stride) const + { + TRACE_SANITIZE (this); + + if (!has_device ()) return_trace (true); + + for (unsigned int i = 0; i < count; i++) { + if (!sanitize_value_devices (c, base, values)) + return_trace (false); + values += stride; + } + + return_trace (true); + } +}; + +template<typename Iterator> +static void SinglePos_serialize (hb_serialize_context_t *c, + const void *src, + Iterator it, + ValueFormat valFormat, + const hb_map_t *layout_variation_idx_map); + + +struct AnchorFormat1 +{ + void get_anchor (hb_ot_apply_context_t *c, hb_codepoint_t glyph_id HB_UNUSED, + float *x, float *y) const + { + hb_font_t *font = c->font; + *x = font->em_fscale_x (xCoordinate); + *y = font->em_fscale_y (yCoordinate); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this)); + } + + AnchorFormat1* copy (hb_serialize_context_t *c) const + { + TRACE_SERIALIZE (this); + return_trace (c->embed<AnchorFormat1> (this)); + } + + protected: + HBUINT16 format; /* Format identifier--format = 1 */ + FWORD xCoordinate; /* Horizontal value--in design units */ + FWORD yCoordinate; /* Vertical value--in design units */ + public: + DEFINE_SIZE_STATIC (6); +}; + +struct AnchorFormat2 +{ + void get_anchor (hb_ot_apply_context_t *c, hb_codepoint_t glyph_id, + float *x, float *y) const + { + hb_font_t *font = c->font; + +#ifdef HB_NO_HINTING + *x = font->em_fscale_x (xCoordinate); + *y = font->em_fscale_y (yCoordinate); + return; +#endif + + unsigned int x_ppem = font->x_ppem; + unsigned int y_ppem = font->y_ppem; + hb_position_t cx = 0, cy = 0; + bool ret; + + ret = (x_ppem || y_ppem) && + font->get_glyph_contour_point_for_origin (glyph_id, anchorPoint, HB_DIRECTION_LTR, &cx, &cy); + *x = ret && x_ppem ? cx : font->em_fscale_x (xCoordinate); + *y = ret && y_ppem ? cy : font->em_fscale_y (yCoordinate); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this)); + } + + AnchorFormat2* copy (hb_serialize_context_t *c) const + { + TRACE_SERIALIZE (this); + return_trace (c->embed<AnchorFormat2> (this)); + } + + protected: + HBUINT16 format; /* Format identifier--format = 2 */ + FWORD xCoordinate; /* Horizontal value--in design units */ + FWORD yCoordinate; /* Vertical value--in design units */ + HBUINT16 anchorPoint; /* Index to glyph contour point */ + public: + DEFINE_SIZE_STATIC (8); +}; + +struct AnchorFormat3 +{ + void get_anchor (hb_ot_apply_context_t *c, hb_codepoint_t glyph_id HB_UNUSED, + float *x, float *y) const + { + hb_font_t *font = c->font; + *x = font->em_fscale_x (xCoordinate); + *y = font->em_fscale_y (yCoordinate); + + if (font->x_ppem || font->num_coords) + *x += (this+xDeviceTable).get_x_delta (font, c->var_store); + if (font->y_ppem || font->num_coords) + *y += (this+yDeviceTable).get_y_delta (font, c->var_store); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && xDeviceTable.sanitize (c, this) && yDeviceTable.sanitize (c, this)); + } + + AnchorFormat3* copy (hb_serialize_context_t *c, + const hb_map_t *layout_variation_idx_map) const + { + TRACE_SERIALIZE (this); + if (!layout_variation_idx_map) return_trace (nullptr); + + auto *out = c->embed<AnchorFormat3> (this); + if (unlikely (!out)) return_trace (nullptr); + + out->xDeviceTable.serialize_copy (c, xDeviceTable, this, 0, hb_serialize_context_t::Head, layout_variation_idx_map); + out->yDeviceTable.serialize_copy (c, yDeviceTable, this, 0, hb_serialize_context_t::Head, layout_variation_idx_map); + return_trace (out); + } + + void collect_variation_indices (hb_collect_variation_indices_context_t *c) const + { + (this+xDeviceTable).collect_variation_indices (c->layout_variation_indices); + (this+yDeviceTable).collect_variation_indices (c->layout_variation_indices); + } + + protected: + HBUINT16 format; /* Format identifier--format = 3 */ + FWORD xCoordinate; /* Horizontal value--in design units */ + FWORD yCoordinate; /* Vertical value--in design units */ + OffsetTo<Device> + xDeviceTable; /* Offset to Device table for X + * coordinate-- from beginning of + * Anchor table (may be NULL) */ + OffsetTo<Device> + yDeviceTable; /* Offset to Device table for Y + * coordinate-- from beginning of + * Anchor table (may be NULL) */ + public: + DEFINE_SIZE_STATIC (10); +}; + +struct Anchor +{ + void get_anchor (hb_ot_apply_context_t *c, hb_codepoint_t glyph_id, + float *x, float *y) const + { + *x = *y = 0; + switch (u.format) { + case 1: u.format1.get_anchor (c, glyph_id, x, y); return; + case 2: u.format2.get_anchor (c, glyph_id, x, y); return; + case 3: u.format3.get_anchor (c, glyph_id, x, y); return; + default: return; + } + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + if (!u.format.sanitize (c)) return_trace (false); + switch (u.format) { + case 1: return_trace (u.format1.sanitize (c)); + case 2: return_trace (u.format2.sanitize (c)); + case 3: return_trace (u.format3.sanitize (c)); + default:return_trace (true); + } + } + + Anchor* copy (hb_serialize_context_t *c, const hb_map_t *layout_variation_idx_map) const + { + TRACE_SERIALIZE (this); + switch (u.format) { + case 1: return_trace (reinterpret_cast<Anchor *> (u.format1.copy (c))); + case 2: return_trace (reinterpret_cast<Anchor *> (u.format2.copy (c))); + case 3: return_trace (reinterpret_cast<Anchor *> (u.format3.copy (c, layout_variation_idx_map))); + default:return_trace (nullptr); + } + } + + void collect_variation_indices (hb_collect_variation_indices_context_t *c) const + { + switch (u.format) { + case 1: case 2: + return; + case 3: + u.format3.collect_variation_indices (c); + return; + default: return; + } + } + + protected: + union { + HBUINT16 format; /* Format identifier */ + AnchorFormat1 format1; + AnchorFormat2 format2; + AnchorFormat3 format3; + } u; + public: + DEFINE_SIZE_UNION (2, format); +}; + + +struct AnchorMatrix +{ + const Anchor& get_anchor (unsigned int row, unsigned int col, + unsigned int cols, bool *found) const + { + *found = false; + if (unlikely (row >= rows || col >= cols)) return Null (Anchor); + *found = !matrixZ[row * cols + col].is_null (); + return this+matrixZ[row * cols + col]; + } + + template <typename Iterator, + hb_requires (hb_is_iterator (Iterator))> + void collect_variation_indices (hb_collect_variation_indices_context_t *c, + Iterator index_iter) const + { + for (unsigned i : index_iter) + (this+matrixZ[i]).collect_variation_indices (c); + } + + template <typename Iterator, + hb_requires (hb_is_iterator (Iterator))> + bool serialize (hb_serialize_context_t *c, + unsigned num_rows, + AnchorMatrix const *offset_matrix, + const hb_map_t *layout_variation_idx_map, + Iterator index_iter) + { + TRACE_SERIALIZE (this); + if (!index_iter) return_trace (false); + if (unlikely (!c->extend_min ((*this)))) return_trace (false); + + this->rows = num_rows; + for (const unsigned i : index_iter) + { + auto *offset = c->embed (offset_matrix->matrixZ[i]); + if (!offset) return_trace (false); + offset->serialize_copy (c, offset_matrix->matrixZ[i], + offset_matrix, c->to_bias (this), + hb_serialize_context_t::Head, + layout_variation_idx_map); + } + + return_trace (true); + } + + bool sanitize (hb_sanitize_context_t *c, unsigned int cols) const + { + TRACE_SANITIZE (this); + if (!c->check_struct (this)) return_trace (false); + if (unlikely (hb_unsigned_mul_overflows (rows, cols))) return_trace (false); + unsigned int count = rows * cols; + if (!c->check_array (matrixZ.arrayZ, count)) return_trace (false); + for (unsigned int i = 0; i < count; i++) + if (!matrixZ[i].sanitize (c, this)) return_trace (false); + return_trace (true); + } + + HBUINT16 rows; /* Number of rows */ + UnsizedArrayOf<OffsetTo<Anchor>> + matrixZ; /* Matrix of offsets to Anchor tables-- + * from beginning of AnchorMatrix table */ + public: + DEFINE_SIZE_ARRAY (2, matrixZ); +}; + + +struct MarkRecord +{ + friend struct MarkArray; + + unsigned get_class () const { return (unsigned) klass; } + bool sanitize (hb_sanitize_context_t *c, const void *base) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && markAnchor.sanitize (c, base)); + } + + MarkRecord *copy (hb_serialize_context_t *c, + const void *src_base, + unsigned dst_bias, + const hb_map_t *klass_mapping, + const hb_map_t *layout_variation_idx_map) const + { + TRACE_SERIALIZE (this); + auto *out = c->embed (this); + if (unlikely (!out)) return_trace (nullptr); + + out->klass = klass_mapping->get (klass); + out->markAnchor.serialize_copy (c, markAnchor, src_base, dst_bias, hb_serialize_context_t::Head, layout_variation_idx_map); + return_trace (out); + } + + void collect_variation_indices (hb_collect_variation_indices_context_t *c, + const void *src_base) const + { + (src_base+markAnchor).collect_variation_indices (c); + } + + protected: + HBUINT16 klass; /* Class defined for this mark */ + OffsetTo<Anchor> + markAnchor; /* Offset to Anchor table--from + * beginning of MarkArray table */ + public: + DEFINE_SIZE_STATIC (4); +}; + +struct MarkArray : ArrayOf<MarkRecord> /* Array of MarkRecords--in Coverage order */ +{ + bool apply (hb_ot_apply_context_t *c, + unsigned int mark_index, unsigned int glyph_index, + const AnchorMatrix &anchors, unsigned int class_count, + unsigned int glyph_pos) const + { + TRACE_APPLY (this); + hb_buffer_t *buffer = c->buffer; + const MarkRecord &record = ArrayOf<MarkRecord>::operator[](mark_index); + unsigned int mark_class = record.klass; + + const Anchor& mark_anchor = this + record.markAnchor; + bool found; + const Anchor& glyph_anchor = anchors.get_anchor (glyph_index, mark_class, class_count, &found); + /* If this subtable doesn't have an anchor for this base and this class, + * return false such that the subsequent subtables have a chance at it. */ + if (unlikely (!found)) return_trace (false); + + float mark_x, mark_y, base_x, base_y; + + buffer->unsafe_to_break (glyph_pos, buffer->idx); + mark_anchor.get_anchor (c, buffer->cur().codepoint, &mark_x, &mark_y); + glyph_anchor.get_anchor (c, buffer->info[glyph_pos].codepoint, &base_x, &base_y); + + hb_glyph_position_t &o = buffer->cur_pos(); + o.x_offset = roundf (base_x - mark_x); + o.y_offset = roundf (base_y - mark_y); + o.attach_type() = ATTACH_TYPE_MARK; + o.attach_chain() = (int) glyph_pos - (int) buffer->idx; + buffer->scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_GPOS_ATTACHMENT; + + buffer->idx++; + return_trace (true); + } + + template<typename Iterator, + hb_requires (hb_is_source_of (Iterator, MarkRecord))> + bool serialize (hb_serialize_context_t *c, + const hb_map_t *klass_mapping, + const hb_map_t *layout_variation_idx_map, + const void *base, + Iterator it) + { + TRACE_SERIALIZE (this); + if (unlikely (!c->extend_min (*this))) return_trace (false); + if (unlikely (!c->check_assign (len, it.len ()))) return_trace (false); + c->copy_all (it, base, c->to_bias (this), klass_mapping, layout_variation_idx_map); + return_trace (true); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (ArrayOf<MarkRecord>::sanitize (c, this)); + } +}; + + +/* Lookups */ + +struct SinglePosFormat1 +{ + bool intersects (const hb_set_t *glyphs) const + { return (this+coverage).intersects (glyphs); } + + void closure_lookups (hb_closure_lookups_context_t *c) const {} + void collect_variation_indices (hb_collect_variation_indices_context_t *c) const + { + if (!valueFormat.has_device ()) return; + + auto it = + + hb_iter (this+coverage) + | hb_filter (c->glyph_set) + ; + + if (!it) return; + valueFormat.collect_variation_indices (c, this, values.as_array (valueFormat.get_len ())); + } + + void collect_glyphs (hb_collect_glyphs_context_t *c) const + { if (unlikely (!(this+coverage).collect_coverage (c->input))) return; } + + const Coverage &get_coverage () const { return this+coverage; } + + bool apply (hb_ot_apply_context_t *c) const + { + TRACE_APPLY (this); + hb_buffer_t *buffer = c->buffer; + unsigned int index = (this+coverage).get_coverage (buffer->cur().codepoint); + if (likely (index == NOT_COVERED)) return_trace (false); + + valueFormat.apply_value (c, this, values, buffer->cur_pos()); + + buffer->idx++; + return_trace (true); + } + + template<typename Iterator, + hb_requires (hb_is_iterator (Iterator))> + void serialize (hb_serialize_context_t *c, + const void *src, + Iterator it, + ValueFormat valFormat, + const hb_map_t *layout_variation_idx_map) + { + auto out = c->extend_min (*this); + if (unlikely (!out)) return; + if (unlikely (!c->check_assign (valueFormat, valFormat))) return; + + + it + | hb_map (hb_second) + | hb_apply ([&] (hb_array_t<const Value> _) + { valFormat.serialize_copy (c, src, &_, layout_variation_idx_map); }) + ; + + auto glyphs = + + it + | hb_map_retains_sorting (hb_first) + ; + + coverage.serialize (c, this).serialize (c, glyphs); + } + + bool subset (hb_subset_context_t *c) const + { + TRACE_SUBSET (this); + const hb_set_t &glyphset = *c->plan->glyphset (); + const hb_map_t &glyph_map = *c->plan->glyph_map; + + auto it = + + hb_iter (this+coverage) + | hb_filter (glyphset) + | hb_map_retains_sorting (glyph_map) + | hb_zip (hb_repeat (values.as_array (valueFormat.get_len ()))) + ; + + bool ret = bool (it); + SinglePos_serialize (c->serializer, this, it, valueFormat, c->plan->layout_variation_idx_map); + return_trace (ret); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && + coverage.sanitize (c, this) && + valueFormat.sanitize_value (c, this, values)); + } + + protected: + HBUINT16 format; /* Format identifier--format = 1 */ + OffsetTo<Coverage> + coverage; /* Offset to Coverage table--from + * beginning of subtable */ + ValueFormat valueFormat; /* Defines the types of data in the + * ValueRecord */ + ValueRecord values; /* Defines positioning + * value(s)--applied to all glyphs in + * the Coverage table */ + public: + DEFINE_SIZE_ARRAY (6, values); +}; + +struct SinglePosFormat2 +{ + bool intersects (const hb_set_t *glyphs) const + { return (this+coverage).intersects (glyphs); } + + void closure_lookups (hb_closure_lookups_context_t *c) const {} + void collect_variation_indices (hb_collect_variation_indices_context_t *c) const + { + if (!valueFormat.has_device ()) return; + + auto it = + + hb_zip (this+coverage, hb_range ((unsigned) valueCount)) + | hb_filter (c->glyph_set, hb_first) + ; + + if (!it) return; + + unsigned sub_length = valueFormat.get_len (); + const hb_array_t<const Value> values_array = values.as_array (valueCount * sub_length); + + for (unsigned i : + it + | hb_map (hb_second)) + valueFormat.collect_variation_indices (c, this, values_array.sub_array (i * sub_length, sub_length)); + + } + + void collect_glyphs (hb_collect_glyphs_context_t *c) const + { if (unlikely (!(this+coverage).collect_coverage (c->input))) return; } + + const Coverage &get_coverage () const { return this+coverage; } + + bool apply (hb_ot_apply_context_t *c) const + { + TRACE_APPLY (this); + hb_buffer_t *buffer = c->buffer; + unsigned int index = (this+coverage).get_coverage (buffer->cur().codepoint); + if (likely (index == NOT_COVERED)) return_trace (false); + + if (likely (index >= valueCount)) return_trace (false); + + valueFormat.apply_value (c, this, + &values[index * valueFormat.get_len ()], + buffer->cur_pos()); + + buffer->idx++; + return_trace (true); + } + + template<typename Iterator, + hb_requires (hb_is_iterator (Iterator))> + void serialize (hb_serialize_context_t *c, + const void *src, + Iterator it, + ValueFormat valFormat, + const hb_map_t *layout_variation_idx_map) + { + auto out = c->extend_min (*this); + if (unlikely (!out)) return; + if (unlikely (!c->check_assign (valueFormat, valFormat))) return; + if (unlikely (!c->check_assign (valueCount, it.len ()))) return; + + + it + | hb_map (hb_second) + | hb_apply ([&] (hb_array_t<const Value> _) + { valFormat.serialize_copy (c, src, &_, layout_variation_idx_map); }) + ; + + auto glyphs = + + it + | hb_map_retains_sorting (hb_first) + ; + + coverage.serialize (c, this).serialize (c, glyphs); + } + + bool subset (hb_subset_context_t *c) const + { + TRACE_SUBSET (this); + const hb_set_t &glyphset = *c->plan->glyphset (); + const hb_map_t &glyph_map = *c->plan->glyph_map; + + unsigned sub_length = valueFormat.get_len (); + auto values_array = values.as_array (valueCount * sub_length); + + auto it = + + hb_zip (this+coverage, hb_range ((unsigned) valueCount)) + | hb_filter (glyphset, hb_first) + | hb_map_retains_sorting ([&] (const hb_pair_t<hb_codepoint_t, unsigned>& _) + { + return hb_pair (glyph_map[_.first], + values_array.sub_array (_.second * sub_length, + sub_length)); + }) + ; + + bool ret = bool (it); + SinglePos_serialize (c->serializer, this, it, valueFormat, c->plan->layout_variation_idx_map); + return_trace (ret); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && + coverage.sanitize (c, this) && + valueFormat.sanitize_values (c, this, values, valueCount)); + } + + protected: + HBUINT16 format; /* Format identifier--format = 2 */ + OffsetTo<Coverage> + coverage; /* Offset to Coverage table--from + * beginning of subtable */ + ValueFormat valueFormat; /* Defines the types of data in the + * ValueRecord */ + HBUINT16 valueCount; /* Number of ValueRecords */ + ValueRecord values; /* Array of ValueRecords--positioning + * values applied to glyphs */ + public: + DEFINE_SIZE_ARRAY (8, values); +}; + +struct SinglePos +{ + template<typename Iterator, + hb_requires (hb_is_iterator (Iterator))> + unsigned get_format (Iterator glyph_val_iter_pairs) + { + hb_array_t<const Value> first_val_iter = hb_second (*glyph_val_iter_pairs); + + for (const auto iter : glyph_val_iter_pairs) + for (const auto _ : hb_zip (iter.second, first_val_iter)) + if (_.first != _.second) + return 2; + + return 1; + } + + + template<typename Iterator, + hb_requires (hb_is_iterator (Iterator))> + void serialize (hb_serialize_context_t *c, + const void *src, + Iterator glyph_val_iter_pairs, + ValueFormat valFormat, + const hb_map_t *layout_variation_idx_map) + { + if (unlikely (!c->extend_min (u.format))) return; + unsigned format = 2; + + if (glyph_val_iter_pairs) format = get_format (glyph_val_iter_pairs); + + u.format = format; + switch (u.format) { + case 1: u.format1.serialize (c, src, glyph_val_iter_pairs, valFormat, layout_variation_idx_map); + return; + case 2: u.format2.serialize (c, src, glyph_val_iter_pairs, valFormat, layout_variation_idx_map); + return; + default:return; + } + } + + template <typename context_t, typename ...Ts> + typename context_t::return_t dispatch (context_t *c, Ts&&... ds) const + { + TRACE_DISPATCH (this, u.format); + if (unlikely (!c->may_dispatch (this, &u.format))) return_trace (c->no_dispatch_return_value ()); + switch (u.format) { + case 1: return_trace (c->dispatch (u.format1, hb_forward<Ts> (ds)...)); + case 2: return_trace (c->dispatch (u.format2, hb_forward<Ts> (ds)...)); + default:return_trace (c->default_return_value ()); + } + } + + protected: + union { + HBUINT16 format; /* Format identifier */ + SinglePosFormat1 format1; + SinglePosFormat2 format2; + } u; +}; + +template<typename Iterator> +static void +SinglePos_serialize (hb_serialize_context_t *c, + const void *src, + Iterator it, + ValueFormat valFormat, + const hb_map_t *layout_variation_idx_map) +{ c->start_embed<SinglePos> ()->serialize (c, src, it, valFormat, layout_variation_idx_map); } + + +struct PairValueRecord +{ + friend struct PairSet; + + int cmp (hb_codepoint_t k) const + { return secondGlyph.cmp (k); } + + struct serialize_closure_t + { + const void *base; + const ValueFormat *valueFormats; + unsigned len1; /* valueFormats[0].get_len() */ + const hb_map_t *glyph_map; + const hb_map_t *layout_variation_idx_map; + }; + + bool serialize (hb_serialize_context_t *c, + serialize_closure_t *closure) const + { + TRACE_SERIALIZE (this); + auto *out = c->start_embed (*this); + if (unlikely (!c->extend_min (out))) return_trace (false); + + out->secondGlyph = (*closure->glyph_map)[secondGlyph]; + + closure->valueFormats[0].serialize_copy (c, closure->base, &values[0], closure->layout_variation_idx_map); + closure->valueFormats[1].serialize_copy (c, closure->base, &values[closure->len1], closure->layout_variation_idx_map); + + return_trace (true); + } + + void collect_variation_indices (hb_collect_variation_indices_context_t *c, + const ValueFormat *valueFormats, + const void *base) const + { + unsigned record1_len = valueFormats[0].get_len (); + unsigned record2_len = valueFormats[1].get_len (); + const hb_array_t<const Value> values_array = values.as_array (record1_len + record2_len); + + if (valueFormats[0].has_device ()) + valueFormats[0].collect_variation_indices (c, base, values_array.sub_array (0, record1_len)); + + if (valueFormats[1].has_device ()) + valueFormats[1].collect_variation_indices (c, base, values_array.sub_array (record1_len, record2_len)); + } + + protected: + HBGlyphID secondGlyph; /* GlyphID of second glyph in the + * pair--first glyph is listed in the + * Coverage table */ + ValueRecord values; /* Positioning data for the first glyph + * followed by for second glyph */ + public: + DEFINE_SIZE_ARRAY (2, values); +}; + +struct PairSet +{ + friend struct PairPosFormat1; + + bool intersects (const hb_set_t *glyphs, + const ValueFormat *valueFormats) const + { + unsigned int len1 = valueFormats[0].get_len (); + unsigned int len2 = valueFormats[1].get_len (); + unsigned int record_size = HBUINT16::static_size * (1 + len1 + len2); + + const PairValueRecord *record = &firstPairValueRecord; + unsigned int count = len; + for (unsigned int i = 0; i < count; i++) + { + if (glyphs->has (record->secondGlyph)) + return true; + record = &StructAtOffset<const PairValueRecord> (record, record_size); + } + return false; + } + + void collect_glyphs (hb_collect_glyphs_context_t *c, + const ValueFormat *valueFormats) const + { + unsigned int len1 = valueFormats[0].get_len (); + unsigned int len2 = valueFormats[1].get_len (); + unsigned int record_size = HBUINT16::static_size * (1 + len1 + len2); + + const PairValueRecord *record = &firstPairValueRecord; + c->input->add_array (&record->secondGlyph, len, record_size); + } + + void collect_variation_indices (hb_collect_variation_indices_context_t *c, + const ValueFormat *valueFormats) const + { + unsigned len1 = valueFormats[0].get_len (); + unsigned len2 = valueFormats[1].get_len (); + unsigned record_size = HBUINT16::static_size * (1 + len1 + len2); + + const PairValueRecord *record = &firstPairValueRecord; + unsigned count = len; + for (unsigned i = 0; i < count; i++) + { + if (c->glyph_set->has (record->secondGlyph)) + { record->collect_variation_indices (c, valueFormats, this); } + + record = &StructAtOffset<const PairValueRecord> (record, record_size); + } + } + + bool apply (hb_ot_apply_context_t *c, + const ValueFormat *valueFormats, + unsigned int pos) const + { + TRACE_APPLY (this); + hb_buffer_t *buffer = c->buffer; + unsigned int len1 = valueFormats[0].get_len (); + unsigned int len2 = valueFormats[1].get_len (); + unsigned int record_size = HBUINT16::static_size * (1 + len1 + len2); + + const PairValueRecord *record = hb_bsearch (buffer->info[pos].codepoint, + &firstPairValueRecord, + len, + record_size); + if (record) + { + /* Note the intentional use of "|" instead of short-circuit "||". */ + if (valueFormats[0].apply_value (c, this, &record->values[0], buffer->cur_pos()) | + valueFormats[1].apply_value (c, this, &record->values[len1], buffer->pos[pos])) + buffer->unsafe_to_break (buffer->idx, pos + 1); + if (len2) + pos++; + buffer->idx = pos; + return_trace (true); + } + return_trace (false); + } + + bool subset (hb_subset_context_t *c, + const ValueFormat valueFormats[2]) const + { + TRACE_SUBSET (this); + auto snap = c->serializer->snapshot (); + + auto *out = c->serializer->start_embed (*this); + if (unlikely (!c->serializer->extend_min (out))) return_trace (false); + out->len = 0; + + const hb_set_t &glyphset = *c->plan->glyphset (); + const hb_map_t &glyph_map = *c->plan->glyph_map; + + unsigned len1 = valueFormats[0].get_len (); + unsigned len2 = valueFormats[1].get_len (); + unsigned record_size = HBUINT16::static_size + Value::static_size * (len1 + len2); + + PairValueRecord::serialize_closure_t closure = + { + this, + valueFormats, + len1, + &glyph_map, + c->plan->layout_variation_idx_map + }; + + const PairValueRecord *record = &firstPairValueRecord; + unsigned count = len, num = 0; + for (unsigned i = 0; i < count; i++) + { + if (glyphset.has (record->secondGlyph) + && record->serialize (c->serializer, &closure)) num++; + record = &StructAtOffset<const PairValueRecord> (record, record_size); + } + + out->len = num; + if (!num) c->serializer->revert (snap); + return_trace (num); + } + + struct sanitize_closure_t + { + const ValueFormat *valueFormats; + unsigned int len1; /* valueFormats[0].get_len() */ + unsigned int stride; /* 1 + len1 + len2 */ + }; + + bool sanitize (hb_sanitize_context_t *c, const sanitize_closure_t *closure) const + { + TRACE_SANITIZE (this); + if (!(c->check_struct (this) + && c->check_range (&firstPairValueRecord, + len, + HBUINT16::static_size, + closure->stride))) return_trace (false); + + unsigned int count = len; + const PairValueRecord *record = &firstPairValueRecord; + return_trace (closure->valueFormats[0].sanitize_values_stride_unsafe (c, this, &record->values[0], count, closure->stride) && + closure->valueFormats[1].sanitize_values_stride_unsafe (c, this, &record->values[closure->len1], count, closure->stride)); + } + + protected: + HBUINT16 len; /* Number of PairValueRecords */ + PairValueRecord firstPairValueRecord; + /* Array of PairValueRecords--ordered + * by GlyphID of the second glyph */ + public: + DEFINE_SIZE_MIN (2); +}; + +struct PairPosFormat1 +{ + bool intersects (const hb_set_t *glyphs) const + { + return + + hb_zip (this+coverage, pairSet) + | hb_filter (*glyphs, hb_first) + | hb_map (hb_second) + | hb_map ([glyphs, this] (const OffsetTo<PairSet> &_) + { return (this+_).intersects (glyphs, valueFormat); }) + | hb_any + ; + } + + void closure_lookups (hb_closure_lookups_context_t *c) const {} + void collect_variation_indices (hb_collect_variation_indices_context_t *c) const + { + if ((!valueFormat[0].has_device ()) && (!valueFormat[1].has_device ())) return; + + auto it = + + hb_zip (this+coverage, pairSet) + | hb_filter (c->glyph_set, hb_first) + | hb_map (hb_second) + ; + + if (!it) return; + + it + | hb_map (hb_add (this)) + | hb_apply ([&] (const PairSet& _) { _.collect_variation_indices (c, valueFormat); }) + ; + } + + void collect_glyphs (hb_collect_glyphs_context_t *c) const + { + if (unlikely (!(this+coverage).collect_coverage (c->input))) return; + unsigned int count = pairSet.len; + for (unsigned int i = 0; i < count; i++) + (this+pairSet[i]).collect_glyphs (c, valueFormat); + } + + const Coverage &get_coverage () const { return this+coverage; } + + bool apply (hb_ot_apply_context_t *c) const + { + TRACE_APPLY (this); + hb_buffer_t *buffer = c->buffer; + unsigned int index = (this+coverage).get_coverage (buffer->cur().codepoint); + if (likely (index == NOT_COVERED)) return_trace (false); + + hb_ot_apply_context_t::skipping_iterator_t &skippy_iter = c->iter_input; + skippy_iter.reset (buffer->idx, 1); + if (!skippy_iter.next ()) return_trace (false); + + return_trace ((this+pairSet[index]).apply (c, valueFormat, skippy_iter.idx)); + } + + bool subset (hb_subset_context_t *c) const + { + TRACE_SUBSET (this); + + const hb_set_t &glyphset = *c->plan->glyphset (); + const hb_map_t &glyph_map = *c->plan->glyph_map; + + auto *out = c->serializer->start_embed (*this); + if (unlikely (!c->serializer->extend_min (out))) return_trace (false); + out->format = format; + out->valueFormat[0] = valueFormat[0]; + out->valueFormat[1] = valueFormat[1]; + + hb_sorted_vector_t<hb_codepoint_t> new_coverage; + + + hb_zip (this+coverage, pairSet) + | hb_filter (glyphset, hb_first) + | hb_filter ([this, c, out] (const OffsetTo<PairSet>& _) + { + auto *o = out->pairSet.serialize_append (c->serializer); + if (unlikely (!o)) return false; + auto snap = c->serializer->snapshot (); + bool ret = o->serialize_subset (c, _, this, valueFormat); + if (!ret) + { + out->pairSet.pop (); + c->serializer->revert (snap); + } + return ret; + }, + hb_second) + | hb_map (hb_first) + | hb_map (glyph_map) + | hb_sink (new_coverage) + ; + + out->coverage.serialize (c->serializer, out) + .serialize (c->serializer, new_coverage.iter ()); + + return_trace (bool (new_coverage)); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + + if (!c->check_struct (this)) return_trace (false); + + unsigned int len1 = valueFormat[0].get_len (); + unsigned int len2 = valueFormat[1].get_len (); + PairSet::sanitize_closure_t closure = + { + valueFormat, + len1, + 1 + len1 + len2 + }; + + return_trace (coverage.sanitize (c, this) && pairSet.sanitize (c, this, &closure)); + } + + protected: + HBUINT16 format; /* Format identifier--format = 1 */ + OffsetTo<Coverage> + coverage; /* Offset to Coverage table--from + * beginning of subtable */ + ValueFormat valueFormat[2]; /* [0] Defines the types of data in + * ValueRecord1--for the first glyph + * in the pair--may be zero (0) */ + /* [1] Defines the types of data in + * ValueRecord2--for the second glyph + * in the pair--may be zero (0) */ + OffsetArrayOf<PairSet> + pairSet; /* Array of PairSet tables + * ordered by Coverage Index */ + public: + DEFINE_SIZE_ARRAY (10, pairSet); +}; + +struct PairPosFormat2 +{ + bool intersects (const hb_set_t *glyphs) const + { + return (this+coverage).intersects (glyphs) && + (this+classDef2).intersects (glyphs); + } + + void closure_lookups (hb_closure_lookups_context_t *c) const {} + void collect_variation_indices (hb_collect_variation_indices_context_t *c) const + { + if ((!valueFormat1.has_device ()) && (!valueFormat2.has_device ())) return; + + hb_set_t class1_set, class2_set; + for (const unsigned cp : c->glyph_set->iter ()) + { + unsigned klass1 = (this+classDef1).get (cp); + unsigned klass2 = (this+classDef2).get (cp); + class1_set.add (klass1); + class2_set.add (klass2); + } + + if (class1_set.is_empty () || class2_set.is_empty ()) return; + + unsigned len1 = valueFormat1.get_len (); + unsigned len2 = valueFormat2.get_len (); + const hb_array_t<const Value> values_array = values.as_array ((unsigned)class1Count * (unsigned) class2Count * (len1 + len2)); + for (const unsigned class1_idx : class1_set.iter ()) + { + for (const unsigned class2_idx : class2_set.iter ()) + { + unsigned start_offset = (class1_idx * (unsigned) class2Count + class2_idx) * (len1 + len2); + if (valueFormat1.has_device ()) + valueFormat1.collect_variation_indices (c, this, values_array.sub_array (start_offset, len1)); + + if (valueFormat2.has_device ()) + valueFormat2.collect_variation_indices (c, this, values_array.sub_array (start_offset+len1, len2)); + } + } + } + + void collect_glyphs (hb_collect_glyphs_context_t *c) const + { + if (unlikely (!(this+coverage).collect_coverage (c->input))) return; + if (unlikely (!(this+classDef2).collect_coverage (c->input))) return; + } + + const Coverage &get_coverage () const { return this+coverage; } + + bool apply (hb_ot_apply_context_t *c) const + { + TRACE_APPLY (this); + hb_buffer_t *buffer = c->buffer; + unsigned int index = (this+coverage).get_coverage (buffer->cur().codepoint); + if (likely (index == NOT_COVERED)) return_trace (false); + + hb_ot_apply_context_t::skipping_iterator_t &skippy_iter = c->iter_input; + skippy_iter.reset (buffer->idx, 1); + if (!skippy_iter.next ()) return_trace (false); + + unsigned int len1 = valueFormat1.get_len (); + unsigned int len2 = valueFormat2.get_len (); + unsigned int record_len = len1 + len2; + + unsigned int klass1 = (this+classDef1).get_class (buffer->cur().codepoint); + unsigned int klass2 = (this+classDef2).get_class (buffer->info[skippy_iter.idx].codepoint); + if (unlikely (klass1 >= class1Count || klass2 >= class2Count)) return_trace (false); + + const Value *v = &values[record_len * (klass1 * class2Count + klass2)]; + /* Note the intentional use of "|" instead of short-circuit "||". */ + if (valueFormat1.apply_value (c, this, v, buffer->cur_pos()) | + valueFormat2.apply_value (c, this, v + len1, buffer->pos[skippy_iter.idx])) + buffer->unsafe_to_break (buffer->idx, skippy_iter.idx + 1); + + buffer->idx = skippy_iter.idx; + if (len2) + buffer->idx++; + + return_trace (true); + } + + bool subset (hb_subset_context_t *c) const + { + TRACE_SUBSET (this); + auto *out = c->serializer->start_embed (*this); + if (unlikely (!c->serializer->extend_min (out))) return_trace (false); + out->format = format; + out->valueFormat1 = valueFormat1; + out->valueFormat2 = valueFormat2; + + hb_map_t klass1_map; + out->classDef1.serialize_subset (c, classDef1, this, &klass1_map); + out->class1Count = klass1_map.get_population (); + + hb_map_t klass2_map; + out->classDef2.serialize_subset (c, classDef2, this, &klass2_map); + out->class2Count = klass2_map.get_population (); + + unsigned len1 = valueFormat1.get_len (); + unsigned len2 = valueFormat2.get_len (); + + + hb_range ((unsigned) class1Count) + | hb_filter (klass1_map) + | hb_apply ([&] (const unsigned class1_idx) + { + + hb_range ((unsigned) class2Count) + | hb_filter (klass2_map) + | hb_apply ([&] (const unsigned class2_idx) + { + unsigned idx = (class1_idx * (unsigned) class2Count + class2_idx) * (len1 + len2); + valueFormat1.serialize_copy (c->serializer, this, &values[idx], c->plan->layout_variation_idx_map); + valueFormat2.serialize_copy (c->serializer, this, &values[idx + len1], c->plan->layout_variation_idx_map); + }) + ; + }) + ; + + const hb_set_t &glyphset = *c->plan->_glyphset_gsub; + const hb_map_t &glyph_map = *c->plan->glyph_map; + + auto it = + + hb_iter (this+coverage) + | hb_filter (glyphset) + | hb_map_retains_sorting (glyph_map) + ; + + out->coverage.serialize (c->serializer, out).serialize (c->serializer, it); + return_trace (out->class1Count && out->class2Count && bool (it)); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + if (!(c->check_struct (this) + && coverage.sanitize (c, this) + && classDef1.sanitize (c, this) + && classDef2.sanitize (c, this))) return_trace (false); + + unsigned int len1 = valueFormat1.get_len (); + unsigned int len2 = valueFormat2.get_len (); + unsigned int stride = len1 + len2; + unsigned int record_size = valueFormat1.get_size () + valueFormat2.get_size (); + unsigned int count = (unsigned int) class1Count * (unsigned int) class2Count; + return_trace (c->check_range ((const void *) values, + count, + record_size) && + valueFormat1.sanitize_values_stride_unsafe (c, this, &values[0], count, stride) && + valueFormat2.sanitize_values_stride_unsafe (c, this, &values[len1], count, stride)); + } + + protected: + HBUINT16 format; /* Format identifier--format = 2 */ + OffsetTo<Coverage> + coverage; /* Offset to Coverage table--from + * beginning of subtable */ + ValueFormat valueFormat1; /* ValueRecord definition--for the + * first glyph of the pair--may be zero + * (0) */ + ValueFormat valueFormat2; /* ValueRecord definition--for the + * second glyph of the pair--may be + * zero (0) */ + OffsetTo<ClassDef> + classDef1; /* Offset to ClassDef table--from + * beginning of PairPos subtable--for + * the first glyph of the pair */ + OffsetTo<ClassDef> + classDef2; /* Offset to ClassDef table--from + * beginning of PairPos subtable--for + * the second glyph of the pair */ + HBUINT16 class1Count; /* Number of classes in ClassDef1 + * table--includes Class0 */ + HBUINT16 class2Count; /* Number of classes in ClassDef2 + * table--includes Class0 */ + ValueRecord values; /* Matrix of value pairs: + * class1-major, class2-minor, + * Each entry has value1 and value2 */ + public: + DEFINE_SIZE_ARRAY (16, values); +}; + +struct PairPos +{ + template <typename context_t, typename ...Ts> + typename context_t::return_t dispatch (context_t *c, Ts&&... ds) const + { + TRACE_DISPATCH (this, u.format); + if (unlikely (!c->may_dispatch (this, &u.format))) return_trace (c->no_dispatch_return_value ()); + switch (u.format) { + case 1: return_trace (c->dispatch (u.format1, hb_forward<Ts> (ds)...)); + case 2: return_trace (c->dispatch (u.format2, hb_forward<Ts> (ds)...)); + default:return_trace (c->default_return_value ()); + } + } + + protected: + union { + HBUINT16 format; /* Format identifier */ + PairPosFormat1 format1; + PairPosFormat2 format2; + } u; +}; + + +struct EntryExitRecord +{ + friend struct CursivePosFormat1; + + bool sanitize (hb_sanitize_context_t *c, const void *base) const + { + TRACE_SANITIZE (this); + return_trace (entryAnchor.sanitize (c, base) && exitAnchor.sanitize (c, base)); + } + + void collect_variation_indices (hb_collect_variation_indices_context_t *c, + const void *src_base) const + { + (src_base+entryAnchor).collect_variation_indices (c); + (src_base+exitAnchor).collect_variation_indices (c); + } + + EntryExitRecord* copy (hb_serialize_context_t *c, + const void *src_base, + const void *dst_base, + const hb_map_t *layout_variation_idx_map) const + { + TRACE_SERIALIZE (this); + auto *out = c->embed (this); + if (unlikely (!out)) return_trace (nullptr); + + out->entryAnchor.serialize_copy (c, entryAnchor, src_base, c->to_bias (dst_base), hb_serialize_context_t::Head, layout_variation_idx_map); + out->exitAnchor.serialize_copy (c, exitAnchor, src_base, c->to_bias (dst_base), hb_serialize_context_t::Head, layout_variation_idx_map); + return_trace (out); + } + + protected: + OffsetTo<Anchor> + entryAnchor; /* Offset to EntryAnchor table--from + * beginning of CursivePos + * subtable--may be NULL */ + OffsetTo<Anchor> + exitAnchor; /* Offset to ExitAnchor table--from + * beginning of CursivePos + * subtable--may be NULL */ + public: + DEFINE_SIZE_STATIC (4); +}; + +static void +reverse_cursive_minor_offset (hb_glyph_position_t *pos, unsigned int i, hb_direction_t direction, unsigned int new_parent); + +struct CursivePosFormat1 +{ + bool intersects (const hb_set_t *glyphs) const + { return (this+coverage).intersects (glyphs); } + + void closure_lookups (hb_closure_lookups_context_t *c) const {} + + void collect_variation_indices (hb_collect_variation_indices_context_t *c) const + { + + hb_zip (this+coverage, entryExitRecord) + | hb_filter (c->glyph_set, hb_first) + | hb_map (hb_second) + | hb_apply ([&] (const EntryExitRecord& record) { record.collect_variation_indices (c, this); }) + ; + } + + void collect_glyphs (hb_collect_glyphs_context_t *c) const + { if (unlikely (!(this+coverage).collect_coverage (c->input))) return; } + + const Coverage &get_coverage () const { return this+coverage; } + + bool apply (hb_ot_apply_context_t *c) const + { + TRACE_APPLY (this); + hb_buffer_t *buffer = c->buffer; + + const EntryExitRecord &this_record = entryExitRecord[(this+coverage).get_coverage (buffer->cur().codepoint)]; + if (!this_record.entryAnchor) return_trace (false); + + hb_ot_apply_context_t::skipping_iterator_t &skippy_iter = c->iter_input; + skippy_iter.reset (buffer->idx, 1); + if (!skippy_iter.prev ()) return_trace (false); + + const EntryExitRecord &prev_record = entryExitRecord[(this+coverage).get_coverage (buffer->info[skippy_iter.idx].codepoint)]; + if (!prev_record.exitAnchor) return_trace (false); + + unsigned int i = skippy_iter.idx; + unsigned int j = buffer->idx; + + buffer->unsafe_to_break (i, j); + float entry_x, entry_y, exit_x, exit_y; + (this+prev_record.exitAnchor).get_anchor (c, buffer->info[i].codepoint, &exit_x, &exit_y); + (this+this_record.entryAnchor).get_anchor (c, buffer->info[j].codepoint, &entry_x, &entry_y); + + hb_glyph_position_t *pos = buffer->pos; + + hb_position_t d; + /* Main-direction adjustment */ + switch (c->direction) { + case HB_DIRECTION_LTR: + pos[i].x_advance = roundf (exit_x) + pos[i].x_offset; + + d = roundf (entry_x) + pos[j].x_offset; + pos[j].x_advance -= d; + pos[j].x_offset -= d; + break; + case HB_DIRECTION_RTL: + d = roundf (exit_x) + pos[i].x_offset; + pos[i].x_advance -= d; + pos[i].x_offset -= d; + + pos[j].x_advance = roundf (entry_x) + pos[j].x_offset; + break; + case HB_DIRECTION_TTB: + pos[i].y_advance = roundf (exit_y) + pos[i].y_offset; + + d = roundf (entry_y) + pos[j].y_offset; + pos[j].y_advance -= d; + pos[j].y_offset -= d; + break; + case HB_DIRECTION_BTT: + d = roundf (exit_y) + pos[i].y_offset; + pos[i].y_advance -= d; + pos[i].y_offset -= d; + + pos[j].y_advance = roundf (entry_y); + break; + case HB_DIRECTION_INVALID: + default: + break; + } + + /* Cross-direction adjustment */ + + /* We attach child to parent (think graph theory and rooted trees whereas + * the root stays on baseline and each node aligns itself against its + * parent. + * + * Optimize things for the case of RightToLeft, as that's most common in + * Arabic. */ + unsigned int child = i; + unsigned int parent = j; + hb_position_t x_offset = entry_x - exit_x; + hb_position_t y_offset = entry_y - exit_y; + if (!(c->lookup_props & LookupFlag::RightToLeft)) + { + unsigned int k = child; + child = parent; + parent = k; + x_offset = -x_offset; + y_offset = -y_offset; + } + + /* If child was already connected to someone else, walk through its old + * chain and reverse the link direction, such that the whole tree of its + * previous connection now attaches to new parent. Watch out for case + * where new parent is on the path from old chain... + */ + reverse_cursive_minor_offset (pos, child, c->direction, parent); + + pos[child].attach_type() = ATTACH_TYPE_CURSIVE; + pos[child].attach_chain() = (int) parent - (int) child; + buffer->scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_GPOS_ATTACHMENT; + if (likely (HB_DIRECTION_IS_HORIZONTAL (c->direction))) + pos[child].y_offset = y_offset; + else + pos[child].x_offset = x_offset; + + /* If parent was attached to child, break them free. + * https://github.com/harfbuzz/harfbuzz/issues/2469 + */ + if (unlikely (pos[parent].attach_chain() == -pos[child].attach_chain())) + pos[parent].attach_chain() = 0; + + buffer->idx++; + return_trace (true); + } + + template <typename Iterator, + hb_requires (hb_is_iterator (Iterator))> + void serialize (hb_serialize_context_t *c, + Iterator it, + const void *src_base, + const hb_map_t *layout_variation_idx_map) + { + if (unlikely (!c->extend_min ((*this)))) return; + this->format = 1; + this->entryExitRecord.len = it.len (); + + for (const EntryExitRecord& entry_record : + it + | hb_map (hb_second)) + c->copy (entry_record, src_base, this, layout_variation_idx_map); + + auto glyphs = + + it + | hb_map_retains_sorting (hb_first) + ; + + coverage.serialize (c, this).serialize (c, glyphs); + } + + bool subset (hb_subset_context_t *c) const + { + TRACE_SUBSET (this); + const hb_set_t &glyphset = *c->plan->glyphset (); + const hb_map_t &glyph_map = *c->plan->glyph_map; + + auto *out = c->serializer->start_embed (*this); + if (unlikely (!out)) return_trace (false); + + auto it = + + hb_zip (this+coverage, entryExitRecord) + | hb_filter (glyphset, hb_first) + | hb_map_retains_sorting ([&] (hb_pair_t<hb_codepoint_t, const EntryExitRecord&> p) -> hb_pair_t<hb_codepoint_t, const EntryExitRecord&> + { return hb_pair (glyph_map[p.first], p.second);}) + ; + + bool ret = bool (it); + out->serialize (c->serializer, it, this, c->plan->layout_variation_idx_map); + return_trace (ret); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (coverage.sanitize (c, this) && entryExitRecord.sanitize (c, this)); + } + + protected: + HBUINT16 format; /* Format identifier--format = 1 */ + OffsetTo<Coverage> + coverage; /* Offset to Coverage table--from + * beginning of subtable */ + ArrayOf<EntryExitRecord> + entryExitRecord; /* Array of EntryExit records--in + * Coverage Index order */ + public: + DEFINE_SIZE_ARRAY (6, entryExitRecord); +}; + +struct CursivePos +{ + template <typename context_t, typename ...Ts> + typename context_t::return_t dispatch (context_t *c, Ts&&... ds) const + { + TRACE_DISPATCH (this, u.format); + if (unlikely (!c->may_dispatch (this, &u.format))) return_trace (c->no_dispatch_return_value ()); + switch (u.format) { + case 1: return_trace (c->dispatch (u.format1, hb_forward<Ts> (ds)...)); + default:return_trace (c->default_return_value ()); + } + } + + protected: + union { + HBUINT16 format; /* Format identifier */ + CursivePosFormat1 format1; + } u; +}; + + +typedef AnchorMatrix BaseArray; /* base-major-- + * in order of BaseCoverage Index--, + * mark-minor-- + * ordered by class--zero-based. */ + +static void Markclass_closure_and_remap_indexes (const Coverage &mark_coverage, + const MarkArray &mark_array, + const hb_set_t &glyphset, + hb_map_t* klass_mapping /* INOUT */) +{ + hb_set_t orig_classes; + + + hb_zip (mark_coverage, mark_array) + | hb_filter (glyphset, hb_first) + | hb_map (hb_second) + | hb_map (&MarkRecord::get_class) + | hb_sink (orig_classes) + ; + + unsigned idx = 0; + for (auto klass : orig_classes.iter ()) + { + if (klass_mapping->has (klass)) continue; + klass_mapping->set (klass, idx); + idx++; + } +} + +struct MarkBasePosFormat1 +{ + bool intersects (const hb_set_t *glyphs) const + { + return (this+markCoverage).intersects (glyphs) && + (this+baseCoverage).intersects (glyphs); + } + + void closure_lookups (hb_closure_lookups_context_t *c) const {} + + void collect_variation_indices (hb_collect_variation_indices_context_t *c) const + { + + hb_zip (this+markCoverage, this+markArray) + | hb_filter (c->glyph_set, hb_first) + | hb_map (hb_second) + | hb_apply ([&] (const MarkRecord& record) { record.collect_variation_indices (c, &(this+markArray)); }) + ; + + hb_map_t klass_mapping; + Markclass_closure_and_remap_indexes (this+markCoverage, this+markArray, *c->glyph_set, &klass_mapping); + + unsigned basecount = (this+baseArray).rows; + auto base_iter = + + hb_zip (this+baseCoverage, hb_range (basecount)) + | hb_filter (c->glyph_set, hb_first) + | hb_map (hb_second) + ; + + hb_sorted_vector_t<unsigned> base_indexes; + for (const unsigned row : base_iter) + { + + hb_range ((unsigned) classCount) + | hb_filter (klass_mapping) + | hb_map ([&] (const unsigned col) { return row * (unsigned) classCount + col; }) + | hb_sink (base_indexes) + ; + } + (this+baseArray).collect_variation_indices (c, base_indexes.iter ()); + } + + void collect_glyphs (hb_collect_glyphs_context_t *c) const + { + if (unlikely (!(this+markCoverage).collect_coverage (c->input))) return; + if (unlikely (!(this+baseCoverage).collect_coverage (c->input))) return; + } + + const Coverage &get_coverage () const { return this+markCoverage; } + + bool apply (hb_ot_apply_context_t *c) const + { + TRACE_APPLY (this); + hb_buffer_t *buffer = c->buffer; + unsigned int mark_index = (this+markCoverage).get_coverage (buffer->cur().codepoint); + if (likely (mark_index == NOT_COVERED)) return_trace (false); + + /* Now we search backwards for a non-mark glyph */ + hb_ot_apply_context_t::skipping_iterator_t &skippy_iter = c->iter_input; + skippy_iter.reset (buffer->idx, 1); + skippy_iter.set_lookup_props (LookupFlag::IgnoreMarks); + do { + if (!skippy_iter.prev ()) return_trace (false); + /* We only want to attach to the first of a MultipleSubst sequence. + * https://github.com/harfbuzz/harfbuzz/issues/740 + * Reject others... + * ...but stop if we find a mark in the MultipleSubst sequence: + * https://github.com/harfbuzz/harfbuzz/issues/1020 */ + if (!_hb_glyph_info_multiplied (&buffer->info[skippy_iter.idx]) || + 0 == _hb_glyph_info_get_lig_comp (&buffer->info[skippy_iter.idx]) || + (skippy_iter.idx == 0 || + _hb_glyph_info_is_mark (&buffer->info[skippy_iter.idx - 1]) || + _hb_glyph_info_get_lig_id (&buffer->info[skippy_iter.idx]) != + _hb_glyph_info_get_lig_id (&buffer->info[skippy_iter.idx - 1]) || + _hb_glyph_info_get_lig_comp (&buffer->info[skippy_iter.idx]) != + _hb_glyph_info_get_lig_comp (&buffer->info[skippy_iter.idx - 1]) + 1 + )) + break; + skippy_iter.reject (); + } while (true); + + /* Checking that matched glyph is actually a base glyph by GDEF is too strong; disabled */ + //if (!_hb_glyph_info_is_base_glyph (&buffer->info[skippy_iter.idx])) { return_trace (false); } + + unsigned int base_index = (this+baseCoverage).get_coverage (buffer->info[skippy_iter.idx].codepoint); + if (base_index == NOT_COVERED) return_trace (false); + + return_trace ((this+markArray).apply (c, mark_index, base_index, this+baseArray, classCount, skippy_iter.idx)); + } + + bool subset (hb_subset_context_t *c) const + { + TRACE_SUBSET (this); + const hb_set_t &glyphset = *c->plan->glyphset (); + const hb_map_t &glyph_map = *c->plan->glyph_map; + + auto *out = c->serializer->start_embed (*this); + if (unlikely (!c->serializer->extend_min (out))) return_trace (false); + out->format = format; + + hb_map_t klass_mapping; + Markclass_closure_and_remap_indexes (this+markCoverage, this+markArray, glyphset, &klass_mapping); + + if (!klass_mapping.get_population ()) return_trace (false); + out->classCount = klass_mapping.get_population (); + + auto mark_iter = + + hb_zip (this+markCoverage, this+markArray) + | hb_filter (glyphset, hb_first) + ; + + hb_sorted_vector_t<hb_codepoint_t> new_coverage; + + mark_iter + | hb_map (hb_first) + | hb_map (glyph_map) + | hb_sink (new_coverage) + ; + + if (!out->markCoverage.serialize (c->serializer, out) + .serialize (c->serializer, new_coverage.iter ())) + return_trace (false); + + out->markArray.serialize (c->serializer, out) + .serialize (c->serializer, &klass_mapping, c->plan->layout_variation_idx_map, &(this+markArray), + mark_iter + | hb_map (hb_second)); + + unsigned basecount = (this+baseArray).rows; + auto base_iter = + + hb_zip (this+baseCoverage, hb_range (basecount)) + | hb_filter (glyphset, hb_first) + ; + + new_coverage.reset (); + + base_iter + | hb_map (hb_first) + | hb_map (glyph_map) + | hb_sink (new_coverage) + ; + + if (!out->baseCoverage.serialize (c->serializer, out) + .serialize (c->serializer, new_coverage.iter ())) + return_trace (false); + + hb_sorted_vector_t<unsigned> base_indexes; + for (const unsigned row : + base_iter + | hb_map (hb_second)) + { + + hb_range ((unsigned) classCount) + | hb_filter (klass_mapping) + | hb_map ([&] (const unsigned col) { return row * (unsigned) classCount + col; }) + | hb_sink (base_indexes) + ; + } + out->baseArray.serialize (c->serializer, out) + .serialize (c->serializer, base_iter.len (), &(this+baseArray), c->plan->layout_variation_idx_map, base_indexes.iter ()); + + return_trace (true); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && + markCoverage.sanitize (c, this) && + baseCoverage.sanitize (c, this) && + markArray.sanitize (c, this) && + baseArray.sanitize (c, this, (unsigned int) classCount)); + } + + protected: + HBUINT16 format; /* Format identifier--format = 1 */ + OffsetTo<Coverage> + markCoverage; /* Offset to MarkCoverage table--from + * beginning of MarkBasePos subtable */ + OffsetTo<Coverage> + baseCoverage; /* Offset to BaseCoverage table--from + * beginning of MarkBasePos subtable */ + HBUINT16 classCount; /* Number of classes defined for marks */ + OffsetTo<MarkArray> + markArray; /* Offset to MarkArray table--from + * beginning of MarkBasePos subtable */ + OffsetTo<BaseArray> + baseArray; /* Offset to BaseArray table--from + * beginning of MarkBasePos subtable */ + public: + DEFINE_SIZE_STATIC (12); +}; + +struct MarkBasePos +{ + template <typename context_t, typename ...Ts> + typename context_t::return_t dispatch (context_t *c, Ts&&... ds) const + { + TRACE_DISPATCH (this, u.format); + if (unlikely (!c->may_dispatch (this, &u.format))) return_trace (c->no_dispatch_return_value ()); + switch (u.format) { + case 1: return_trace (c->dispatch (u.format1, hb_forward<Ts> (ds)...)); + default:return_trace (c->default_return_value ()); + } + } + + protected: + union { + HBUINT16 format; /* Format identifier */ + MarkBasePosFormat1 format1; + } u; +}; + + +typedef AnchorMatrix LigatureAttach; /* component-major-- + * in order of writing direction--, + * mark-minor-- + * ordered by class--zero-based. */ + +typedef OffsetListOf<LigatureAttach> LigatureArray; + /* Array of LigatureAttach + * tables ordered by + * LigatureCoverage Index */ + +struct MarkLigPosFormat1 +{ + bool intersects (const hb_set_t *glyphs) const + { + return (this+markCoverage).intersects (glyphs) && + (this+ligatureCoverage).intersects (glyphs); + } + + void closure_lookups (hb_closure_lookups_context_t *c) const {} + + void collect_variation_indices (hb_collect_variation_indices_context_t *c) const + { + + hb_zip (this+markCoverage, this+markArray) + | hb_filter (c->glyph_set, hb_first) + | hb_map (hb_second) + | hb_apply ([&] (const MarkRecord& record) { record.collect_variation_indices (c, &(this+markArray)); }) + ; + + hb_map_t klass_mapping; + Markclass_closure_and_remap_indexes (this+markCoverage, this+markArray, *c->glyph_set, &klass_mapping); + + unsigned ligcount = (this+ligatureArray).len; + auto lig_iter = + + hb_zip (this+ligatureCoverage, hb_range (ligcount)) + | hb_filter (c->glyph_set, hb_first) + | hb_map (hb_second) + ; + + const LigatureArray& lig_array = this+ligatureArray; + for (const unsigned i : lig_iter) + { + hb_sorted_vector_t<unsigned> lig_indexes; + unsigned row_count = lig_array[i].rows; + for (unsigned row : + hb_range (row_count)) + { + + hb_range ((unsigned) classCount) + | hb_filter (klass_mapping) + | hb_map ([&] (const unsigned col) { return row * (unsigned) classCount + col; }) + | hb_sink (lig_indexes) + ; + } + + lig_array[i].collect_variation_indices (c, lig_indexes.iter ()); + } + } + + void collect_glyphs (hb_collect_glyphs_context_t *c) const + { + if (unlikely (!(this+markCoverage).collect_coverage (c->input))) return; + if (unlikely (!(this+ligatureCoverage).collect_coverage (c->input))) return; + } + + const Coverage &get_coverage () const { return this+markCoverage; } + + bool apply (hb_ot_apply_context_t *c) const + { + TRACE_APPLY (this); + hb_buffer_t *buffer = c->buffer; + unsigned int mark_index = (this+markCoverage).get_coverage (buffer->cur().codepoint); + if (likely (mark_index == NOT_COVERED)) return_trace (false); + + /* Now we search backwards for a non-mark glyph */ + hb_ot_apply_context_t::skipping_iterator_t &skippy_iter = c->iter_input; + skippy_iter.reset (buffer->idx, 1); + skippy_iter.set_lookup_props (LookupFlag::IgnoreMarks); + if (!skippy_iter.prev ()) return_trace (false); + + /* Checking that matched glyph is actually a ligature by GDEF is too strong; disabled */ + //if (!_hb_glyph_info_is_ligature (&buffer->info[skippy_iter.idx])) { return_trace (false); } + + unsigned int j = skippy_iter.idx; + unsigned int lig_index = (this+ligatureCoverage).get_coverage (buffer->info[j].codepoint); + if (lig_index == NOT_COVERED) return_trace (false); + + const LigatureArray& lig_array = this+ligatureArray; + const LigatureAttach& lig_attach = lig_array[lig_index]; + + /* Find component to attach to */ + unsigned int comp_count = lig_attach.rows; + if (unlikely (!comp_count)) return_trace (false); + + /* We must now check whether the ligature ID of the current mark glyph + * is identical to the ligature ID of the found ligature. If yes, we + * can directly use the component index. If not, we attach the mark + * glyph to the last component of the ligature. */ + unsigned int comp_index; + unsigned int lig_id = _hb_glyph_info_get_lig_id (&buffer->info[j]); + unsigned int mark_id = _hb_glyph_info_get_lig_id (&buffer->cur()); + unsigned int mark_comp = _hb_glyph_info_get_lig_comp (&buffer->cur()); + if (lig_id && lig_id == mark_id && mark_comp > 0) + comp_index = hb_min (comp_count, _hb_glyph_info_get_lig_comp (&buffer->cur())) - 1; + else + comp_index = comp_count - 1; + + return_trace ((this+markArray).apply (c, mark_index, comp_index, lig_attach, classCount, j)); + } + + bool subset (hb_subset_context_t *c) const + { + TRACE_SUBSET (this); + // TODO(subset) + return_trace (false); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && + markCoverage.sanitize (c, this) && + ligatureCoverage.sanitize (c, this) && + markArray.sanitize (c, this) && + ligatureArray.sanitize (c, this, (unsigned int) classCount)); + } + + protected: + HBUINT16 format; /* Format identifier--format = 1 */ + OffsetTo<Coverage> + markCoverage; /* Offset to Mark Coverage table--from + * beginning of MarkLigPos subtable */ + OffsetTo<Coverage> + ligatureCoverage; /* Offset to Ligature Coverage + * table--from beginning of MarkLigPos + * subtable */ + HBUINT16 classCount; /* Number of defined mark classes */ + OffsetTo<MarkArray> + markArray; /* Offset to MarkArray table--from + * beginning of MarkLigPos subtable */ + OffsetTo<LigatureArray> + ligatureArray; /* Offset to LigatureArray table--from + * beginning of MarkLigPos subtable */ + public: + DEFINE_SIZE_STATIC (12); +}; + +struct MarkLigPos +{ + template <typename context_t, typename ...Ts> + typename context_t::return_t dispatch (context_t *c, Ts&&... ds) const + { + TRACE_DISPATCH (this, u.format); + if (unlikely (!c->may_dispatch (this, &u.format))) return_trace (c->no_dispatch_return_value ()); + switch (u.format) { + case 1: return_trace (c->dispatch (u.format1, hb_forward<Ts> (ds)...)); + default:return_trace (c->default_return_value ()); + } + } + + protected: + union { + HBUINT16 format; /* Format identifier */ + MarkLigPosFormat1 format1; + } u; +}; + + +typedef AnchorMatrix Mark2Array; /* mark2-major-- + * in order of Mark2Coverage Index--, + * mark1-minor-- + * ordered by class--zero-based. */ + +struct MarkMarkPosFormat1 +{ + bool intersects (const hb_set_t *glyphs) const + { + return (this+mark1Coverage).intersects (glyphs) && + (this+mark2Coverage).intersects (glyphs); + } + + void closure_lookups (hb_closure_lookups_context_t *c) const {} + + void collect_variation_indices (hb_collect_variation_indices_context_t *c) const + { + + hb_zip (this+mark1Coverage, this+mark1Array) + | hb_filter (c->glyph_set, hb_first) + | hb_map (hb_second) + | hb_apply ([&] (const MarkRecord& record) { record.collect_variation_indices (c, &(this+mark1Array)); }) + ; + + hb_map_t klass_mapping; + Markclass_closure_and_remap_indexes (this+mark1Coverage, this+mark1Array, *c->glyph_set, &klass_mapping); + + unsigned mark2_count = (this+mark2Array).rows; + auto mark2_iter = + + hb_zip (this+mark2Coverage, hb_range (mark2_count)) + | hb_filter (c->glyph_set, hb_first) + | hb_map (hb_second) + ; + + hb_sorted_vector_t<unsigned> mark2_indexes; + for (const unsigned row : mark2_iter) + { + + hb_range ((unsigned) classCount) + | hb_filter (klass_mapping) + | hb_map ([&] (const unsigned col) { return row * (unsigned) classCount + col; }) + | hb_sink (mark2_indexes) + ; + } + (this+mark2Array).collect_variation_indices (c, mark2_indexes.iter ()); + } + + void collect_glyphs (hb_collect_glyphs_context_t *c) const + { + if (unlikely (!(this+mark1Coverage).collect_coverage (c->input))) return; + if (unlikely (!(this+mark2Coverage).collect_coverage (c->input))) return; + } + + const Coverage &get_coverage () const { return this+mark1Coverage; } + + bool apply (hb_ot_apply_context_t *c) const + { + TRACE_APPLY (this); + hb_buffer_t *buffer = c->buffer; + unsigned int mark1_index = (this+mark1Coverage).get_coverage (buffer->cur().codepoint); + if (likely (mark1_index == NOT_COVERED)) return_trace (false); + + /* now we search backwards for a suitable mark glyph until a non-mark glyph */ + hb_ot_apply_context_t::skipping_iterator_t &skippy_iter = c->iter_input; + skippy_iter.reset (buffer->idx, 1); + skippy_iter.set_lookup_props (c->lookup_props & ~LookupFlag::IgnoreFlags); + if (!skippy_iter.prev ()) return_trace (false); + + if (!_hb_glyph_info_is_mark (&buffer->info[skippy_iter.idx])) { return_trace (false); } + + unsigned int j = skippy_iter.idx; + + unsigned int id1 = _hb_glyph_info_get_lig_id (&buffer->cur()); + unsigned int id2 = _hb_glyph_info_get_lig_id (&buffer->info[j]); + unsigned int comp1 = _hb_glyph_info_get_lig_comp (&buffer->cur()); + unsigned int comp2 = _hb_glyph_info_get_lig_comp (&buffer->info[j]); + + if (likely (id1 == id2)) + { + if (id1 == 0) /* Marks belonging to the same base. */ + goto good; + else if (comp1 == comp2) /* Marks belonging to the same ligature component. */ + goto good; + } + else + { + /* If ligature ids don't match, it may be the case that one of the marks + * itself is a ligature. In which case match. */ + if ((id1 > 0 && !comp1) || (id2 > 0 && !comp2)) + goto good; + } + + /* Didn't match. */ + return_trace (false); + + good: + unsigned int mark2_index = (this+mark2Coverage).get_coverage (buffer->info[j].codepoint); + if (mark2_index == NOT_COVERED) return_trace (false); + + return_trace ((this+mark1Array).apply (c, mark1_index, mark2_index, this+mark2Array, classCount, j)); + } + + bool subset (hb_subset_context_t *c) const + { + TRACE_SUBSET (this); + const hb_set_t &glyphset = *c->plan->glyphset (); + const hb_map_t &glyph_map = *c->plan->glyph_map; + + auto *out = c->serializer->start_embed (*this); + if (unlikely (!c->serializer->extend_min (out))) return_trace (false); + out->format = format; + + hb_map_t klass_mapping; + Markclass_closure_and_remap_indexes (this+mark1Coverage, this+mark1Array, glyphset, &klass_mapping); + + if (!klass_mapping.get_population ()) return_trace (false); + out->classCount = klass_mapping.get_population (); + + auto mark1_iter = + + hb_zip (this+mark1Coverage, this+mark1Array) + | hb_filter (glyphset, hb_first) + ; + + hb_sorted_vector_t<hb_codepoint_t> new_coverage; + + mark1_iter + | hb_map (hb_first) + | hb_map (glyph_map) + | hb_sink (new_coverage) + ; + + if (!out->mark1Coverage.serialize (c->serializer, out) + .serialize (c->serializer, new_coverage.iter ())) + return_trace (false); + + out->mark1Array.serialize (c->serializer, out) + .serialize (c->serializer, &klass_mapping, c->plan->layout_variation_idx_map, &(this+mark1Array), + mark1_iter + | hb_map (hb_second)); + + unsigned mark2count = (this+mark2Array).rows; + auto mark2_iter = + + hb_zip (this+mark2Coverage, hb_range (mark2count)) + | hb_filter (glyphset, hb_first) + ; + + new_coverage.reset (); + + mark2_iter + | hb_map (hb_first) + | hb_map (glyph_map) + | hb_sink (new_coverage) + ; + + if (!out->mark2Coverage.serialize (c->serializer, out) + .serialize (c->serializer, new_coverage.iter ())) + return_trace (false); + + hb_sorted_vector_t<unsigned> mark2_indexes; + for (const unsigned row : + mark2_iter + | hb_map (hb_second)) + { + + hb_range ((unsigned) classCount) + | hb_filter (klass_mapping) + | hb_map ([&] (const unsigned col) { return row * (unsigned) classCount + col; }) + | hb_sink (mark2_indexes) + ; + } + out->mark2Array.serialize (c->serializer, out) + .serialize (c->serializer, mark2_iter.len (), &(this+mark2Array), c->plan->layout_variation_idx_map, mark2_indexes.iter ()); + + return_trace (true); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && + mark1Coverage.sanitize (c, this) && + mark2Coverage.sanitize (c, this) && + mark1Array.sanitize (c, this) && + mark2Array.sanitize (c, this, (unsigned int) classCount)); + } + + protected: + HBUINT16 format; /* Format identifier--format = 1 */ + OffsetTo<Coverage> + mark1Coverage; /* Offset to Combining Mark1 Coverage + * table--from beginning of MarkMarkPos + * subtable */ + OffsetTo<Coverage> + mark2Coverage; /* Offset to Combining Mark2 Coverage + * table--from beginning of MarkMarkPos + * subtable */ + HBUINT16 classCount; /* Number of defined mark classes */ + OffsetTo<MarkArray> + mark1Array; /* Offset to Mark1Array table--from + * beginning of MarkMarkPos subtable */ + OffsetTo<Mark2Array> + mark2Array; /* Offset to Mark2Array table--from + * beginning of MarkMarkPos subtable */ + public: + DEFINE_SIZE_STATIC (12); +}; + +struct MarkMarkPos +{ + template <typename context_t, typename ...Ts> + typename context_t::return_t dispatch (context_t *c, Ts&&... ds) const + { + TRACE_DISPATCH (this, u.format); + if (unlikely (!c->may_dispatch (this, &u.format))) return_trace (c->no_dispatch_return_value ()); + switch (u.format) { + case 1: return_trace (c->dispatch (u.format1, hb_forward<Ts> (ds)...)); + default:return_trace (c->default_return_value ()); + } + } + + protected: + union { + HBUINT16 format; /* Format identifier */ + MarkMarkPosFormat1 format1; + } u; +}; + + +struct ContextPos : Context {}; + +struct ChainContextPos : ChainContext {}; + +struct ExtensionPos : Extension<ExtensionPos> +{ + typedef struct PosLookupSubTable SubTable; +}; + + + +/* + * PosLookup + */ + + +struct PosLookupSubTable +{ + friend struct Lookup; + friend struct PosLookup; + + enum Type { + Single = 1, + Pair = 2, + Cursive = 3, + MarkBase = 4, + MarkLig = 5, + MarkMark = 6, + Context = 7, + ChainContext = 8, + Extension = 9 + }; + + template <typename context_t, typename ...Ts> + typename context_t::return_t dispatch (context_t *c, unsigned int lookup_type, Ts&&... ds) const + { + TRACE_DISPATCH (this, lookup_type); + switch (lookup_type) { + case Single: return_trace (u.single.dispatch (c, hb_forward<Ts> (ds)...)); + case Pair: return_trace (u.pair.dispatch (c, hb_forward<Ts> (ds)...)); + case Cursive: return_trace (u.cursive.dispatch (c, hb_forward<Ts> (ds)...)); + case MarkBase: return_trace (u.markBase.dispatch (c, hb_forward<Ts> (ds)...)); + case MarkLig: return_trace (u.markLig.dispatch (c, hb_forward<Ts> (ds)...)); + case MarkMark: return_trace (u.markMark.dispatch (c, hb_forward<Ts> (ds)...)); + case Context: return_trace (u.context.dispatch (c, hb_forward<Ts> (ds)...)); + case ChainContext: return_trace (u.chainContext.dispatch (c, hb_forward<Ts> (ds)...)); + case Extension: return_trace (u.extension.dispatch (c, hb_forward<Ts> (ds)...)); + default: return_trace (c->default_return_value ()); + } + } + + bool intersects (const hb_set_t *glyphs, unsigned int lookup_type) const + { + hb_intersects_context_t c (glyphs); + return dispatch (&c, lookup_type); + } + + protected: + union { + SinglePos single; + PairPos pair; + CursivePos cursive; + MarkBasePos markBase; + MarkLigPos markLig; + MarkMarkPos markMark; + ContextPos context; + ChainContextPos chainContext; + ExtensionPos extension; + } u; + public: + DEFINE_SIZE_MIN (0); +}; + + +struct PosLookup : Lookup +{ + typedef struct PosLookupSubTable SubTable; + + const SubTable& get_subtable (unsigned int i) const + { return Lookup::get_subtable<SubTable> (i); } + + bool is_reverse () const + { + return false; + } + + bool apply (hb_ot_apply_context_t *c) const + { + TRACE_APPLY (this); + return_trace (dispatch (c)); + } + + bool intersects (const hb_set_t *glyphs) const + { + hb_intersects_context_t c (glyphs); + return dispatch (&c); + } + + hb_collect_glyphs_context_t::return_t collect_glyphs (hb_collect_glyphs_context_t *c) const + { return dispatch (c); } + + hb_closure_lookups_context_t::return_t closure_lookups (hb_closure_lookups_context_t *c, unsigned this_index) const + { + if (c->is_lookup_visited (this_index)) + return hb_closure_lookups_context_t::default_return_value (); + + c->set_lookup_visited (this_index); + if (!intersects (c->glyphs)) + { + c->set_lookup_inactive (this_index); + return hb_closure_lookups_context_t::default_return_value (); + } + c->set_recurse_func (dispatch_closure_lookups_recurse_func); + + hb_closure_lookups_context_t::return_t ret = dispatch (c); + return ret; + } + + template <typename set_t> + void collect_coverage (set_t *glyphs) const + { + hb_collect_coverage_context_t<set_t> c (glyphs); + dispatch (&c); + } + + static inline bool apply_recurse_func (hb_ot_apply_context_t *c, unsigned int lookup_index); + + template <typename context_t> + static typename context_t::return_t dispatch_recurse_func (context_t *c, unsigned int lookup_index); + + HB_INTERNAL static hb_closure_lookups_context_t::return_t dispatch_closure_lookups_recurse_func (hb_closure_lookups_context_t *c, unsigned this_index); + + template <typename context_t, typename ...Ts> + typename context_t::return_t dispatch (context_t *c, Ts&&... ds) const + { return Lookup::dispatch<SubTable> (c, hb_forward<Ts> (ds)...); } + + bool subset (hb_subset_context_t *c) const + { return Lookup::subset<SubTable> (c); } + + bool sanitize (hb_sanitize_context_t *c) const + { return Lookup::sanitize<SubTable> (c); } +}; + +/* + * GPOS -- Glyph Positioning + * https://docs.microsoft.com/en-us/typography/opentype/spec/gpos + */ + +struct GPOS : GSUBGPOS +{ + static constexpr hb_tag_t tableTag = HB_OT_TAG_GPOS; + + const PosLookup& get_lookup (unsigned int i) const + { return static_cast<const PosLookup &> (GSUBGPOS::get_lookup (i)); } + + static inline void position_start (hb_font_t *font, hb_buffer_t *buffer); + static inline void position_finish_advances (hb_font_t *font, hb_buffer_t *buffer); + static inline void position_finish_offsets (hb_font_t *font, hb_buffer_t *buffer); + + bool subset (hb_subset_context_t *c) const + { + hb_subset_layout_context_t l (c, tableTag, c->plan->gpos_lookups, c->plan->gpos_features); + return GSUBGPOS::subset<PosLookup> (&l); + } + + bool sanitize (hb_sanitize_context_t *c) const + { return GSUBGPOS::sanitize<PosLookup> (c); } + + HB_INTERNAL bool is_blocklisted (hb_blob_t *blob, + hb_face_t *face) const; + + void collect_variation_indices (hb_collect_variation_indices_context_t *c) const + { + for (unsigned i = 0; i < GSUBGPOS::get_lookup_count (); i++) + { + if (!c->gpos_lookups->has (i)) continue; + const PosLookup &l = get_lookup (i); + l.dispatch (c); + } + } + + void closure_lookups (hb_face_t *face, + const hb_set_t *glyphs, + hb_set_t *lookup_indexes /* IN/OUT */) const + { GSUBGPOS::closure_lookups<PosLookup> (face, glyphs, lookup_indexes); } + + typedef GSUBGPOS::accelerator_t<GPOS> accelerator_t; +}; + + +static void +reverse_cursive_minor_offset (hb_glyph_position_t *pos, unsigned int i, hb_direction_t direction, unsigned int new_parent) +{ + int chain = pos[i].attach_chain(), type = pos[i].attach_type(); + if (likely (!chain || 0 == (type & ATTACH_TYPE_CURSIVE))) + return; + + pos[i].attach_chain() = 0; + + unsigned int j = (int) i + chain; + + /* Stop if we see new parent in the chain. */ + if (j == new_parent) + return; + + reverse_cursive_minor_offset (pos, j, direction, new_parent); + + if (HB_DIRECTION_IS_HORIZONTAL (direction)) + pos[j].y_offset = -pos[i].y_offset; + else + pos[j].x_offset = -pos[i].x_offset; + + pos[j].attach_chain() = -chain; + pos[j].attach_type() = type; +} +static void +propagate_attachment_offsets (hb_glyph_position_t *pos, + unsigned int len, + unsigned int i, + hb_direction_t direction) +{ + /* Adjusts offsets of attached glyphs (both cursive and mark) to accumulate + * offset of glyph they are attached to. */ + int chain = pos[i].attach_chain(), type = pos[i].attach_type(); + if (likely (!chain)) + return; + + pos[i].attach_chain() = 0; + + unsigned int j = (int) i + chain; + + if (unlikely (j >= len)) + return; + + propagate_attachment_offsets (pos, len, j, direction); + + assert (!!(type & ATTACH_TYPE_MARK) ^ !!(type & ATTACH_TYPE_CURSIVE)); + + if (type & ATTACH_TYPE_CURSIVE) + { + if (HB_DIRECTION_IS_HORIZONTAL (direction)) + pos[i].y_offset += pos[j].y_offset; + else + pos[i].x_offset += pos[j].x_offset; + } + else /*if (type & ATTACH_TYPE_MARK)*/ + { + pos[i].x_offset += pos[j].x_offset; + pos[i].y_offset += pos[j].y_offset; + + assert (j < i); + if (HB_DIRECTION_IS_FORWARD (direction)) + for (unsigned int k = j; k < i; k++) { + pos[i].x_offset -= pos[k].x_advance; + pos[i].y_offset -= pos[k].y_advance; + } + else + for (unsigned int k = j + 1; k < i + 1; k++) { + pos[i].x_offset += pos[k].x_advance; + pos[i].y_offset += pos[k].y_advance; + } + } +} + +void +GPOS::position_start (hb_font_t *font HB_UNUSED, hb_buffer_t *buffer) +{ + unsigned int count = buffer->len; + for (unsigned int i = 0; i < count; i++) + buffer->pos[i].attach_chain() = buffer->pos[i].attach_type() = 0; +} + +void +GPOS::position_finish_advances (hb_font_t *font HB_UNUSED, hb_buffer_t *buffer HB_UNUSED) +{ + //_hb_buffer_assert_gsubgpos_vars (buffer); +} + +void +GPOS::position_finish_offsets (hb_font_t *font HB_UNUSED, hb_buffer_t *buffer) +{ + _hb_buffer_assert_gsubgpos_vars (buffer); + + unsigned int len; + hb_glyph_position_t *pos = hb_buffer_get_glyph_positions (buffer, &len); + hb_direction_t direction = buffer->props.direction; + + /* Handle attachments */ + if (buffer->scratch_flags & HB_BUFFER_SCRATCH_FLAG_HAS_GPOS_ATTACHMENT) + for (unsigned int i = 0; i < len; i++) + propagate_attachment_offsets (pos, len, i, direction); +} + + +struct GPOS_accelerator_t : GPOS::accelerator_t {}; + + +/* Out-of-class implementation for methods recursing */ + +#ifndef HB_NO_OT_LAYOUT +template <typename context_t> +/*static*/ typename context_t::return_t PosLookup::dispatch_recurse_func (context_t *c, unsigned int lookup_index) +{ + const PosLookup &l = c->face->table.GPOS.get_relaxed ()->table->get_lookup (lookup_index); + return l.dispatch (c); +} + +/*static*/ inline hb_closure_lookups_context_t::return_t PosLookup::dispatch_closure_lookups_recurse_func (hb_closure_lookups_context_t *c, unsigned this_index) +{ + const PosLookup &l = c->face->table.GPOS.get_relaxed ()->table->get_lookup (this_index); + return l.closure_lookups (c, this_index); +} + +/*static*/ bool PosLookup::apply_recurse_func (hb_ot_apply_context_t *c, unsigned int lookup_index) +{ + const PosLookup &l = c->face->table.GPOS.get_relaxed ()->table->get_lookup (lookup_index); + unsigned int saved_lookup_props = c->lookup_props; + unsigned int saved_lookup_index = c->lookup_index; + c->set_lookup_index (lookup_index); + c->set_lookup_props (l.get_props ()); + bool ret = l.dispatch (c); + c->set_lookup_index (saved_lookup_index); + c->set_lookup_props (saved_lookup_props); + return ret; +} +#endif + + +} /* namespace OT */ + + +#endif /* HB_OT_LAYOUT_GPOS_TABLE_HH */ diff --git a/thirdparty/harfbuzz/src/hb-ot-layout-gsub-table.hh b/thirdparty/harfbuzz/src/hb-ot-layout-gsub-table.hh new file mode 100644 index 0000000000..2f41d67819 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-layout-gsub-table.hh @@ -0,0 +1,1627 @@ +/* + * Copyright © 2007,2008,2009,2010 Red Hat, Inc. + * Copyright © 2010,2012,2013 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Red Hat Author(s): Behdad Esfahbod + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_OT_LAYOUT_GSUB_TABLE_HH +#define HB_OT_LAYOUT_GSUB_TABLE_HH + +#include "hb-ot-layout-gsubgpos.hh" + + +namespace OT { + +typedef hb_pair_t<hb_codepoint_t, hb_codepoint_t> hb_codepoint_pair_t; + +template<typename Iterator> +static void SingleSubst_serialize (hb_serialize_context_t *c, + Iterator it); + + +struct SingleSubstFormat1 +{ + bool intersects (const hb_set_t *glyphs) const + { return (this+coverage).intersects (glyphs); } + + void closure (hb_closure_context_t *c) const + { + unsigned d = deltaGlyphID; + + hb_iter (this+coverage) + | hb_filter (*c->glyphs) + | hb_map ([d] (hb_codepoint_t g) { return (g + d) & 0xFFFFu; }) + | hb_sink (c->output) + ; + } + + void closure_lookups (hb_closure_lookups_context_t *c) const {} + + void collect_glyphs (hb_collect_glyphs_context_t *c) const + { + if (unlikely (!(this+coverage).collect_coverage (c->input))) return; + unsigned d = deltaGlyphID; + + hb_iter (this+coverage) + | hb_map ([d] (hb_codepoint_t g) { return (g + d) & 0xFFFFu; }) + | hb_sink (c->output) + ; + } + + const Coverage &get_coverage () const { return this+coverage; } + + bool would_apply (hb_would_apply_context_t *c) const + { return c->len == 1 && (this+coverage).get_coverage (c->glyphs[0]) != NOT_COVERED; } + + bool apply (hb_ot_apply_context_t *c) const + { + TRACE_APPLY (this); + hb_codepoint_t glyph_id = c->buffer->cur().codepoint; + unsigned int index = (this+coverage).get_coverage (glyph_id); + if (likely (index == NOT_COVERED)) return_trace (false); + + /* According to the Adobe Annotated OpenType Suite, result is always + * limited to 16bit. */ + glyph_id = (glyph_id + deltaGlyphID) & 0xFFFFu; + c->replace_glyph (glyph_id); + + return_trace (true); + } + + template<typename Iterator, + hb_requires (hb_is_sorted_source_of (Iterator, hb_codepoint_t))> + bool serialize (hb_serialize_context_t *c, + Iterator glyphs, + unsigned delta) + { + TRACE_SERIALIZE (this); + if (unlikely (!c->extend_min (*this))) return_trace (false); + if (unlikely (!coverage.serialize (c, this).serialize (c, glyphs))) return_trace (false); + c->check_assign (deltaGlyphID, delta); + return_trace (true); + } + + bool subset (hb_subset_context_t *c) const + { + TRACE_SUBSET (this); + const hb_set_t &glyphset = *c->plan->glyphset_gsub (); + const hb_map_t &glyph_map = *c->plan->glyph_map; + + hb_codepoint_t delta = deltaGlyphID; + + auto it = + + hb_iter (this+coverage) + | hb_filter (glyphset) + | hb_map_retains_sorting ([&] (hb_codepoint_t g) { + return hb_codepoint_pair_t (g, + (g + delta) & 0xFFFF); }) + | hb_filter (glyphset, hb_second) + | hb_map_retains_sorting ([&] (hb_codepoint_pair_t p) -> hb_codepoint_pair_t + { return hb_pair (glyph_map[p.first], glyph_map[p.second]); }) + ; + + bool ret = bool (it); + SingleSubst_serialize (c->serializer, it); + return_trace (ret); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (coverage.sanitize (c, this) && deltaGlyphID.sanitize (c)); + } + + protected: + HBUINT16 format; /* Format identifier--format = 1 */ + OffsetTo<Coverage> + coverage; /* Offset to Coverage table--from + * beginning of Substitution table */ + HBUINT16 deltaGlyphID; /* Add to original GlyphID to get + * substitute GlyphID, modulo 0x10000 */ + public: + DEFINE_SIZE_STATIC (6); +}; + +struct SingleSubstFormat2 +{ + bool intersects (const hb_set_t *glyphs) const + { return (this+coverage).intersects (glyphs); } + + void closure (hb_closure_context_t *c) const + { + + hb_zip (this+coverage, substitute) + | hb_filter (*c->glyphs, hb_first) + | hb_map (hb_second) + | hb_sink (c->output) + ; + } + + void closure_lookups (hb_closure_lookups_context_t *c) const {} + + void collect_glyphs (hb_collect_glyphs_context_t *c) const + { + if (unlikely (!(this+coverage).collect_coverage (c->input))) return; + + hb_zip (this+coverage, substitute) + | hb_map (hb_second) + | hb_sink (c->output) + ; + } + + const Coverage &get_coverage () const { return this+coverage; } + + bool would_apply (hb_would_apply_context_t *c) const + { return c->len == 1 && (this+coverage).get_coverage (c->glyphs[0]) != NOT_COVERED; } + + bool apply (hb_ot_apply_context_t *c) const + { + TRACE_APPLY (this); + unsigned int index = (this+coverage).get_coverage (c->buffer->cur().codepoint); + if (likely (index == NOT_COVERED)) return_trace (false); + + if (unlikely (index >= substitute.len)) return_trace (false); + + c->replace_glyph (substitute[index]); + + return_trace (true); + } + + template<typename Iterator, + hb_requires (hb_is_sorted_source_of (Iterator, + hb_codepoint_pair_t))> + bool serialize (hb_serialize_context_t *c, + Iterator it) + { + TRACE_SERIALIZE (this); + auto substitutes = + + it + | hb_map (hb_second) + ; + auto glyphs = + + it + | hb_map_retains_sorting (hb_first) + ; + if (unlikely (!c->extend_min (*this))) return_trace (false); + if (unlikely (!substitute.serialize (c, substitutes))) return_trace (false); + if (unlikely (!coverage.serialize (c, this).serialize (c, glyphs))) return_trace (false); + return_trace (true); + } + + bool subset (hb_subset_context_t *c) const + { + TRACE_SUBSET (this); + const hb_set_t &glyphset = *c->plan->glyphset_gsub (); + const hb_map_t &glyph_map = *c->plan->glyph_map; + + auto it = + + hb_zip (this+coverage, substitute) + | hb_filter (glyphset, hb_first) + | hb_filter (glyphset, hb_second) + | hb_map_retains_sorting ([&] (hb_pair_t<hb_codepoint_t, const HBGlyphID &> p) -> hb_codepoint_pair_t + { return hb_pair (glyph_map[p.first], glyph_map[p.second]); }) + ; + + bool ret = bool (it); + SingleSubst_serialize (c->serializer, it); + return_trace (ret); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (coverage.sanitize (c, this) && substitute.sanitize (c)); + } + + protected: + HBUINT16 format; /* Format identifier--format = 2 */ + OffsetTo<Coverage> + coverage; /* Offset to Coverage table--from + * beginning of Substitution table */ + ArrayOf<HBGlyphID> + substitute; /* Array of substitute + * GlyphIDs--ordered by Coverage Index */ + public: + DEFINE_SIZE_ARRAY (6, substitute); +}; + +struct SingleSubst +{ + + template<typename Iterator, + hb_requires (hb_is_sorted_source_of (Iterator, + const hb_codepoint_pair_t))> + bool serialize (hb_serialize_context_t *c, + Iterator glyphs) + { + TRACE_SERIALIZE (this); + if (unlikely (!c->extend_min (u.format))) return_trace (false); + unsigned format = 2; + unsigned delta = 0; + if (glyphs) + { + format = 1; + auto get_delta = [=] (hb_codepoint_pair_t _) + { return (unsigned) (_.second - _.first) & 0xFFFF; }; + delta = get_delta (*glyphs); + if (!hb_all (++(+glyphs), delta, get_delta)) format = 2; + } + u.format = format; + switch (u.format) { + case 1: return_trace (u.format1.serialize (c, + + glyphs + | hb_map_retains_sorting (hb_first), + delta)); + case 2: return_trace (u.format2.serialize (c, glyphs)); + default:return_trace (false); + } + } + + template <typename context_t, typename ...Ts> + typename context_t::return_t dispatch (context_t *c, Ts&&... ds) const + { + TRACE_DISPATCH (this, u.format); + if (unlikely (!c->may_dispatch (this, &u.format))) return_trace (c->no_dispatch_return_value ()); + switch (u.format) { + case 1: return_trace (c->dispatch (u.format1, hb_forward<Ts> (ds)...)); + case 2: return_trace (c->dispatch (u.format2, hb_forward<Ts> (ds)...)); + default:return_trace (c->default_return_value ()); + } + } + + protected: + union { + HBUINT16 format; /* Format identifier */ + SingleSubstFormat1 format1; + SingleSubstFormat2 format2; + } u; +}; + +template<typename Iterator> +static void +SingleSubst_serialize (hb_serialize_context_t *c, + Iterator it) +{ c->start_embed<SingleSubst> ()->serialize (c, it); } + +struct Sequence +{ + bool intersects (const hb_set_t *glyphs) const + { return hb_all (substitute, glyphs); } + + void closure (hb_closure_context_t *c) const + { c->output->add_array (substitute.arrayZ, substitute.len); } + + void collect_glyphs (hb_collect_glyphs_context_t *c) const + { c->output->add_array (substitute.arrayZ, substitute.len); } + + bool apply (hb_ot_apply_context_t *c) const + { + TRACE_APPLY (this); + unsigned int count = substitute.len; + + /* Special-case to make it in-place and not consider this + * as a "multiplied" substitution. */ + if (unlikely (count == 1)) + { + c->replace_glyph (substitute.arrayZ[0]); + return_trace (true); + } + /* Spec disallows this, but Uniscribe allows it. + * https://github.com/harfbuzz/harfbuzz/issues/253 */ + else if (unlikely (count == 0)) + { + c->buffer->delete_glyph (); + return_trace (true); + } + + unsigned int klass = _hb_glyph_info_is_ligature (&c->buffer->cur()) ? + HB_OT_LAYOUT_GLYPH_PROPS_BASE_GLYPH : 0; + + for (unsigned int i = 0; i < count; i++) { + _hb_glyph_info_set_lig_props_for_component (&c->buffer->cur(), i); + c->output_glyph_for_component (substitute.arrayZ[i], klass); + } + c->buffer->skip_glyph (); + + return_trace (true); + } + + template <typename Iterator, + hb_requires (hb_is_source_of (Iterator, hb_codepoint_t))> + bool serialize (hb_serialize_context_t *c, + Iterator subst) + { + TRACE_SERIALIZE (this); + return_trace (substitute.serialize (c, subst)); + } + + bool subset (hb_subset_context_t *c) const + { + TRACE_SUBSET (this); + const hb_set_t &glyphset = *c->plan->glyphset (); + const hb_map_t &glyph_map = *c->plan->glyph_map; + + if (!intersects (&glyphset)) return_trace (false); + + auto it = + + hb_iter (substitute) + | hb_map (glyph_map) + ; + + auto *out = c->serializer->start_embed (*this); + return_trace (out->serialize (c->serializer, it)); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (substitute.sanitize (c)); + } + + protected: + ArrayOf<HBGlyphID> + substitute; /* String of GlyphIDs to substitute */ + public: + DEFINE_SIZE_ARRAY (2, substitute); +}; + +struct MultipleSubstFormat1 +{ + bool intersects (const hb_set_t *glyphs) const + { return (this+coverage).intersects (glyphs); } + + void closure (hb_closure_context_t *c) const + { + + hb_zip (this+coverage, sequence) + | hb_filter (*c->glyphs, hb_first) + | hb_map (hb_second) + | hb_map (hb_add (this)) + | hb_apply ([c] (const Sequence &_) { _.closure (c); }) + ; + } + + void closure_lookups (hb_closure_lookups_context_t *c) const {} + + void collect_glyphs (hb_collect_glyphs_context_t *c) const + { + if (unlikely (!(this+coverage).collect_coverage (c->input))) return; + + hb_zip (this+coverage, sequence) + | hb_map (hb_second) + | hb_map (hb_add (this)) + | hb_apply ([c] (const Sequence &_) { _.collect_glyphs (c); }) + ; + } + + const Coverage &get_coverage () const { return this+coverage; } + + bool would_apply (hb_would_apply_context_t *c) const + { return c->len == 1 && (this+coverage).get_coverage (c->glyphs[0]) != NOT_COVERED; } + + bool apply (hb_ot_apply_context_t *c) const + { + TRACE_APPLY (this); + + unsigned int index = (this+coverage).get_coverage (c->buffer->cur().codepoint); + if (likely (index == NOT_COVERED)) return_trace (false); + + return_trace ((this+sequence[index]).apply (c)); + } + + bool serialize (hb_serialize_context_t *c, + hb_sorted_array_t<const HBGlyphID> glyphs, + hb_array_t<const unsigned int> substitute_len_list, + hb_array_t<const HBGlyphID> substitute_glyphs_list) + { + TRACE_SERIALIZE (this); + if (unlikely (!c->extend_min (*this))) return_trace (false); + if (unlikely (!sequence.serialize (c, glyphs.length))) return_trace (false); + for (unsigned int i = 0; i < glyphs.length; i++) + { + unsigned int substitute_len = substitute_len_list[i]; + if (unlikely (!sequence[i].serialize (c, this) + .serialize (c, substitute_glyphs_list.sub_array (0, substitute_len)))) + return_trace (false); + substitute_glyphs_list += substitute_len; + } + return_trace (coverage.serialize (c, this).serialize (c, glyphs)); + } + + bool subset (hb_subset_context_t *c) const + { + TRACE_SUBSET (this); + const hb_set_t &glyphset = *c->plan->glyphset (); + const hb_map_t &glyph_map = *c->plan->glyph_map; + + auto *out = c->serializer->start_embed (*this); + if (unlikely (!c->serializer->extend_min (out))) return_trace (false); + out->format = format; + + hb_sorted_vector_t<hb_codepoint_t> new_coverage; + + hb_zip (this+coverage, sequence) + | hb_filter (glyphset, hb_first) + | hb_filter (subset_offset_array (c, out->sequence, this), hb_second) + | hb_map (hb_first) + | hb_map (glyph_map) + | hb_sink (new_coverage) + ; + out->coverage.serialize (c->serializer, out) + .serialize (c->serializer, new_coverage.iter ()); + return_trace (bool (new_coverage)); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (coverage.sanitize (c, this) && sequence.sanitize (c, this)); + } + + protected: + HBUINT16 format; /* Format identifier--format = 1 */ + OffsetTo<Coverage> + coverage; /* Offset to Coverage table--from + * beginning of Substitution table */ + OffsetArrayOf<Sequence> + sequence; /* Array of Sequence tables + * ordered by Coverage Index */ + public: + DEFINE_SIZE_ARRAY (6, sequence); +}; + +struct MultipleSubst +{ + bool serialize (hb_serialize_context_t *c, + hb_sorted_array_t<const HBGlyphID> glyphs, + hb_array_t<const unsigned int> substitute_len_list, + hb_array_t<const HBGlyphID> substitute_glyphs_list) + { + TRACE_SERIALIZE (this); + if (unlikely (!c->extend_min (u.format))) return_trace (false); + unsigned int format = 1; + u.format = format; + switch (u.format) { + case 1: return_trace (u.format1.serialize (c, glyphs, substitute_len_list, substitute_glyphs_list)); + default:return_trace (false); + } + } + + template <typename context_t, typename ...Ts> + typename context_t::return_t dispatch (context_t *c, Ts&&... ds) const + { + TRACE_DISPATCH (this, u.format); + if (unlikely (!c->may_dispatch (this, &u.format))) return_trace (c->no_dispatch_return_value ()); + switch (u.format) { + case 1: return_trace (c->dispatch (u.format1, hb_forward<Ts> (ds)...)); + default:return_trace (c->default_return_value ()); + } + } + + protected: + union { + HBUINT16 format; /* Format identifier */ + MultipleSubstFormat1 format1; + } u; +}; + +struct AlternateSet +{ + bool intersects (const hb_set_t *glyphs) const + { return hb_any (alternates, glyphs); } + + void closure (hb_closure_context_t *c) const + { c->output->add_array (alternates.arrayZ, alternates.len); } + + void collect_glyphs (hb_collect_glyphs_context_t *c) const + { c->output->add_array (alternates.arrayZ, alternates.len); } + + bool apply (hb_ot_apply_context_t *c) const + { + TRACE_APPLY (this); + unsigned int count = alternates.len; + + if (unlikely (!count)) return_trace (false); + + hb_mask_t glyph_mask = c->buffer->cur().mask; + hb_mask_t lookup_mask = c->lookup_mask; + + /* Note: This breaks badly if two features enabled this lookup together. */ + unsigned int shift = hb_ctz (lookup_mask); + unsigned int alt_index = ((lookup_mask & glyph_mask) >> shift); + + /* If alt_index is MAX_VALUE, randomize feature if it is the rand feature. */ + if (alt_index == HB_OT_MAP_MAX_VALUE && c->random) + alt_index = c->random_number () % count + 1; + + if (unlikely (alt_index > count || alt_index == 0)) return_trace (false); + + c->replace_glyph (alternates[alt_index - 1]); + + return_trace (true); + } + + unsigned + get_alternates (unsigned start_offset, + unsigned *alternate_count /* IN/OUT. May be NULL. */, + hb_codepoint_t *alternate_glyphs /* OUT. May be NULL. */) const + { + if (alternates.len && alternate_count) + { + + alternates.sub_array (start_offset, alternate_count) + | hb_sink (hb_array (alternate_glyphs, *alternate_count)) + ; + } + return alternates.len; + } + + template <typename Iterator, + hb_requires (hb_is_source_of (Iterator, hb_codepoint_t))> + bool serialize (hb_serialize_context_t *c, + Iterator alts) + { + TRACE_SERIALIZE (this); + return_trace (alternates.serialize (c, alts)); + } + + bool subset (hb_subset_context_t *c) const + { + TRACE_SUBSET (this); + const hb_set_t &glyphset = *c->plan->glyphset (); + const hb_map_t &glyph_map = *c->plan->glyph_map; + + auto it = + + hb_iter (alternates) + | hb_filter (glyphset) + | hb_map (glyph_map) + ; + + auto *out = c->serializer->start_embed (*this); + return_trace (out->serialize (c->serializer, it) && + out->alternates); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (alternates.sanitize (c)); + } + + protected: + ArrayOf<HBGlyphID> + alternates; /* Array of alternate GlyphIDs--in + * arbitrary order */ + public: + DEFINE_SIZE_ARRAY (2, alternates); +}; + +struct AlternateSubstFormat1 +{ + bool intersects (const hb_set_t *glyphs) const + { return (this+coverage).intersects (glyphs); } + + void closure (hb_closure_context_t *c) const + { + + hb_zip (this+coverage, alternateSet) + | hb_filter (c->glyphs, hb_first) + | hb_map (hb_second) + | hb_map (hb_add (this)) + | hb_apply ([c] (const AlternateSet &_) { _.closure (c); }) + ; + } + + void closure_lookups (hb_closure_lookups_context_t *c) const {} + + void collect_glyphs (hb_collect_glyphs_context_t *c) const + { + if (unlikely (!(this+coverage).collect_coverage (c->input))) return; + + hb_zip (this+coverage, alternateSet) + | hb_map (hb_second) + | hb_map (hb_add (this)) + | hb_apply ([c] (const AlternateSet &_) { _.collect_glyphs (c); }) + ; + } + + const Coverage &get_coverage () const { return this+coverage; } + + bool would_apply (hb_would_apply_context_t *c) const + { return c->len == 1 && (this+coverage).get_coverage (c->glyphs[0]) != NOT_COVERED; } + + unsigned + get_glyph_alternates (hb_codepoint_t gid, + unsigned start_offset, + unsigned *alternate_count /* IN/OUT. May be NULL. */, + hb_codepoint_t *alternate_glyphs /* OUT. May be NULL. */) const + { return (this+alternateSet[(this+coverage).get_coverage (gid)]) + .get_alternates (start_offset, alternate_count, alternate_glyphs); } + + bool apply (hb_ot_apply_context_t *c) const + { + TRACE_APPLY (this); + + unsigned int index = (this+coverage).get_coverage (c->buffer->cur().codepoint); + if (likely (index == NOT_COVERED)) return_trace (false); + + return_trace ((this+alternateSet[index]).apply (c)); + } + + bool serialize (hb_serialize_context_t *c, + hb_sorted_array_t<const HBGlyphID> glyphs, + hb_array_t<const unsigned int> alternate_len_list, + hb_array_t<const HBGlyphID> alternate_glyphs_list) + { + TRACE_SERIALIZE (this); + if (unlikely (!c->extend_min (*this))) return_trace (false); + if (unlikely (!alternateSet.serialize (c, glyphs.length))) return_trace (false); + for (unsigned int i = 0; i < glyphs.length; i++) + { + unsigned int alternate_len = alternate_len_list[i]; + if (unlikely (!alternateSet[i].serialize (c, this) + .serialize (c, alternate_glyphs_list.sub_array (0, alternate_len)))) + return_trace (false); + alternate_glyphs_list += alternate_len; + } + return_trace (coverage.serialize (c, this).serialize (c, glyphs)); + } + + bool subset (hb_subset_context_t *c) const + { + TRACE_SUBSET (this); + const hb_set_t &glyphset = *c->plan->glyphset (); + const hb_map_t &glyph_map = *c->plan->glyph_map; + + auto *out = c->serializer->start_embed (*this); + if (unlikely (!c->serializer->extend_min (out))) return_trace (false); + out->format = format; + + hb_sorted_vector_t<hb_codepoint_t> new_coverage; + + hb_zip (this+coverage, alternateSet) + | hb_filter (glyphset, hb_first) + | hb_filter (subset_offset_array (c, out->alternateSet, this), hb_second) + | hb_map (hb_first) + | hb_map (glyph_map) + | hb_sink (new_coverage) + ; + out->coverage.serialize (c->serializer, out) + .serialize (c->serializer, new_coverage.iter ()); + return_trace (bool (new_coverage)); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (coverage.sanitize (c, this) && alternateSet.sanitize (c, this)); + } + + protected: + HBUINT16 format; /* Format identifier--format = 1 */ + OffsetTo<Coverage> + coverage; /* Offset to Coverage table--from + * beginning of Substitution table */ + OffsetArrayOf<AlternateSet> + alternateSet; /* Array of AlternateSet tables + * ordered by Coverage Index */ + public: + DEFINE_SIZE_ARRAY (6, alternateSet); +}; + +struct AlternateSubst +{ + bool serialize (hb_serialize_context_t *c, + hb_sorted_array_t<const HBGlyphID> glyphs, + hb_array_t<const unsigned int> alternate_len_list, + hb_array_t<const HBGlyphID> alternate_glyphs_list) + { + TRACE_SERIALIZE (this); + if (unlikely (!c->extend_min (u.format))) return_trace (false); + unsigned int format = 1; + u.format = format; + switch (u.format) { + case 1: return_trace (u.format1.serialize (c, glyphs, alternate_len_list, alternate_glyphs_list)); + default:return_trace (false); + } + } + + template <typename context_t, typename ...Ts> + typename context_t::return_t dispatch (context_t *c, Ts&&... ds) const + { + TRACE_DISPATCH (this, u.format); + if (unlikely (!c->may_dispatch (this, &u.format))) return_trace (c->no_dispatch_return_value ()); + switch (u.format) { + case 1: return_trace (c->dispatch (u.format1, hb_forward<Ts> (ds)...)); + default:return_trace (c->default_return_value ()); + } + } + + protected: + union { + HBUINT16 format; /* Format identifier */ + AlternateSubstFormat1 format1; + } u; +}; + + +struct Ligature +{ + bool intersects (const hb_set_t *glyphs) const + { return hb_all (component, glyphs); } + + void closure (hb_closure_context_t *c) const + { + if (!intersects (c->glyphs)) return; + c->output->add (ligGlyph); + } + + void collect_glyphs (hb_collect_glyphs_context_t *c) const + { + c->input->add_array (component.arrayZ, component.get_length ()); + c->output->add (ligGlyph); + } + + bool would_apply (hb_would_apply_context_t *c) const + { + if (c->len != component.lenP1) + return false; + + for (unsigned int i = 1; i < c->len; i++) + if (likely (c->glyphs[i] != component[i])) + return false; + + return true; + } + + bool apply (hb_ot_apply_context_t *c) const + { + TRACE_APPLY (this); + unsigned int count = component.lenP1; + + if (unlikely (!count)) return_trace (false); + + /* Special-case to make it in-place and not consider this + * as a "ligated" substitution. */ + if (unlikely (count == 1)) + { + c->replace_glyph (ligGlyph); + return_trace (true); + } + + unsigned int total_component_count = 0; + + unsigned int match_length = 0; + unsigned int match_positions[HB_MAX_CONTEXT_LENGTH]; + + if (likely (!match_input (c, count, + &component[1], + match_glyph, + nullptr, + &match_length, + match_positions, + &total_component_count))) + return_trace (false); + + ligate_input (c, + count, + match_positions, + match_length, + ligGlyph, + total_component_count); + + return_trace (true); + } + + template <typename Iterator, + hb_requires (hb_is_source_of (Iterator, hb_codepoint_t))> + bool serialize (hb_serialize_context_t *c, + hb_codepoint_t ligature, + Iterator components /* Starting from second */) + { + TRACE_SERIALIZE (this); + if (unlikely (!c->extend_min (*this))) return_trace (false); + ligGlyph = ligature; + if (unlikely (!component.serialize (c, components))) return_trace (false); + return_trace (true); + } + + bool subset (hb_subset_context_t *c) const + { + TRACE_SUBSET (this); + const hb_set_t &glyphset = *c->plan->glyphset (); + const hb_map_t &glyph_map = *c->plan->glyph_map; + + if (!intersects (&glyphset) || !glyphset.has (ligGlyph)) return_trace (false); + + auto it = + + hb_iter (component) + | hb_map (glyph_map) + ; + + auto *out = c->serializer->start_embed (*this); + return_trace (out->serialize (c->serializer, + glyph_map[ligGlyph], + it)); + } + + public: + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (ligGlyph.sanitize (c) && component.sanitize (c)); + } + + protected: + HBGlyphID ligGlyph; /* GlyphID of ligature to substitute */ + HeadlessArrayOf<HBGlyphID> + component; /* Array of component GlyphIDs--start + * with the second component--ordered + * in writing direction */ + public: + DEFINE_SIZE_ARRAY (4, component); +}; + +struct LigatureSet +{ + bool intersects (const hb_set_t *glyphs) const + { + return + + hb_iter (ligature) + | hb_map (hb_add (this)) + | hb_map ([glyphs] (const Ligature &_) { return _.intersects (glyphs); }) + | hb_any + ; + } + + void closure (hb_closure_context_t *c) const + { + + hb_iter (ligature) + | hb_map (hb_add (this)) + | hb_apply ([c] (const Ligature &_) { _.closure (c); }) + ; + } + + void collect_glyphs (hb_collect_glyphs_context_t *c) const + { + + hb_iter (ligature) + | hb_map (hb_add (this)) + | hb_apply ([c] (const Ligature &_) { _.collect_glyphs (c); }) + ; + } + + bool would_apply (hb_would_apply_context_t *c) const + { + return + + hb_iter (ligature) + | hb_map (hb_add (this)) + | hb_map ([c] (const Ligature &_) { return _.would_apply (c); }) + | hb_any + ; + } + + bool apply (hb_ot_apply_context_t *c) const + { + TRACE_APPLY (this); + unsigned int num_ligs = ligature.len; + for (unsigned int i = 0; i < num_ligs; i++) + { + const Ligature &lig = this+ligature[i]; + if (lig.apply (c)) return_trace (true); + } + + return_trace (false); + } + + bool serialize (hb_serialize_context_t *c, + hb_array_t<const HBGlyphID> ligatures, + hb_array_t<const unsigned int> component_count_list, + hb_array_t<const HBGlyphID> &component_list /* Starting from second for each ligature */) + { + TRACE_SERIALIZE (this); + if (unlikely (!c->extend_min (*this))) return_trace (false); + if (unlikely (!ligature.serialize (c, ligatures.length))) return_trace (false); + for (unsigned int i = 0; i < ligatures.length; i++) + { + unsigned int component_count = (unsigned) hb_max ((int) component_count_list[i] - 1, 0); + if (unlikely (!ligature[i].serialize (c, this) + .serialize (c, + ligatures[i], + component_list.sub_array (0, component_count)))) + return_trace (false); + component_list += component_count; + } + return_trace (true); + } + + bool subset (hb_subset_context_t *c) const + { + TRACE_SUBSET (this); + auto *out = c->serializer->start_embed (*this); + if (unlikely (!c->serializer->extend_min (out))) return_trace (false); + + + hb_iter (ligature) + | hb_filter (subset_offset_array (c, out->ligature, this)) + | hb_drain + ; + return_trace (bool (out->ligature)); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (ligature.sanitize (c, this)); + } + + protected: + OffsetArrayOf<Ligature> + ligature; /* Array LigatureSet tables + * ordered by preference */ + public: + DEFINE_SIZE_ARRAY (2, ligature); +}; + +struct LigatureSubstFormat1 +{ + bool intersects (const hb_set_t *glyphs) const + { + return + + hb_zip (this+coverage, ligatureSet) + | hb_filter (*glyphs, hb_first) + | hb_map (hb_second) + | hb_map ([this, glyphs] (const OffsetTo<LigatureSet> &_) + { return (this+_).intersects (glyphs); }) + | hb_any + ; + } + + void closure (hb_closure_context_t *c) const + { + + hb_zip (this+coverage, ligatureSet) + | hb_filter (*c->glyphs, hb_first) + | hb_map (hb_second) + | hb_map (hb_add (this)) + | hb_apply ([c] (const LigatureSet &_) { _.closure (c); }) + ; + } + + void closure_lookups (hb_closure_lookups_context_t *c) const {} + + void collect_glyphs (hb_collect_glyphs_context_t *c) const + { + if (unlikely (!(this+coverage).collect_coverage (c->input))) return; + + + hb_zip (this+coverage, ligatureSet) + | hb_map (hb_second) + | hb_map (hb_add (this)) + | hb_apply ([c] (const LigatureSet &_) { _.collect_glyphs (c); }) + ; + } + + const Coverage &get_coverage () const { return this+coverage; } + + bool would_apply (hb_would_apply_context_t *c) const + { + unsigned int index = (this+coverage).get_coverage (c->glyphs[0]); + if (likely (index == NOT_COVERED)) return false; + + const LigatureSet &lig_set = this+ligatureSet[index]; + return lig_set.would_apply (c); + } + + bool apply (hb_ot_apply_context_t *c) const + { + TRACE_APPLY (this); + + unsigned int index = (this+coverage).get_coverage (c->buffer->cur ().codepoint); + if (likely (index == NOT_COVERED)) return_trace (false); + + const LigatureSet &lig_set = this+ligatureSet[index]; + return_trace (lig_set.apply (c)); + } + + bool serialize (hb_serialize_context_t *c, + hb_sorted_array_t<const HBGlyphID> first_glyphs, + hb_array_t<const unsigned int> ligature_per_first_glyph_count_list, + hb_array_t<const HBGlyphID> ligatures_list, + hb_array_t<const unsigned int> component_count_list, + hb_array_t<const HBGlyphID> component_list /* Starting from second for each ligature */) + { + TRACE_SERIALIZE (this); + if (unlikely (!c->extend_min (*this))) return_trace (false); + if (unlikely (!ligatureSet.serialize (c, first_glyphs.length))) return_trace (false); + for (unsigned int i = 0; i < first_glyphs.length; i++) + { + unsigned int ligature_count = ligature_per_first_glyph_count_list[i]; + if (unlikely (!ligatureSet[i].serialize (c, this) + .serialize (c, + ligatures_list.sub_array (0, ligature_count), + component_count_list.sub_array (0, ligature_count), + component_list))) return_trace (false); + ligatures_list += ligature_count; + component_count_list += ligature_count; + } + return_trace (coverage.serialize (c, this).serialize (c, first_glyphs)); + } + + bool subset (hb_subset_context_t *c) const + { + TRACE_SUBSET (this); + const hb_set_t &glyphset = *c->plan->glyphset (); + const hb_map_t &glyph_map = *c->plan->glyph_map; + + auto *out = c->serializer->start_embed (*this); + if (unlikely (!c->serializer->extend_min (out))) return_trace (false); + out->format = format; + + hb_sorted_vector_t<hb_codepoint_t> new_coverage; + + hb_zip (this+coverage, ligatureSet) + | hb_filter (glyphset, hb_first) + | hb_filter (subset_offset_array (c, out->ligatureSet, this), hb_second) + | hb_map (hb_first) + | hb_map (glyph_map) + | hb_sink (new_coverage) + ; + out->coverage.serialize (c->serializer, out) + .serialize (c->serializer, new_coverage.iter ()); + return_trace (bool (new_coverage)); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (coverage.sanitize (c, this) && ligatureSet.sanitize (c, this)); + } + + protected: + HBUINT16 format; /* Format identifier--format = 1 */ + OffsetTo<Coverage> + coverage; /* Offset to Coverage table--from + * beginning of Substitution table */ + OffsetArrayOf<LigatureSet> + ligatureSet; /* Array LigatureSet tables + * ordered by Coverage Index */ + public: + DEFINE_SIZE_ARRAY (6, ligatureSet); +}; + +struct LigatureSubst +{ + bool serialize (hb_serialize_context_t *c, + hb_sorted_array_t<const HBGlyphID> first_glyphs, + hb_array_t<const unsigned int> ligature_per_first_glyph_count_list, + hb_array_t<const HBGlyphID> ligatures_list, + hb_array_t<const unsigned int> component_count_list, + hb_array_t<const HBGlyphID> component_list /* Starting from second for each ligature */) + { + TRACE_SERIALIZE (this); + if (unlikely (!c->extend_min (u.format))) return_trace (false); + unsigned int format = 1; + u.format = format; + switch (u.format) { + case 1: return_trace (u.format1.serialize (c, + first_glyphs, + ligature_per_first_glyph_count_list, + ligatures_list, + component_count_list, + component_list)); + default:return_trace (false); + } + } + + template <typename context_t, typename ...Ts> + typename context_t::return_t dispatch (context_t *c, Ts&&... ds) const + { + TRACE_DISPATCH (this, u.format); + if (unlikely (!c->may_dispatch (this, &u.format))) return_trace (c->no_dispatch_return_value ()); + switch (u.format) { + case 1: return_trace (c->dispatch (u.format1, hb_forward<Ts> (ds)...)); + default:return_trace (c->default_return_value ()); + } + } + + protected: + union { + HBUINT16 format; /* Format identifier */ + LigatureSubstFormat1 format1; + } u; +}; + + +struct ContextSubst : Context {}; + +struct ChainContextSubst : ChainContext {}; + +struct ExtensionSubst : Extension<ExtensionSubst> +{ + typedef struct SubstLookupSubTable SubTable; + bool is_reverse () const; +}; + + +struct ReverseChainSingleSubstFormat1 +{ + bool intersects (const hb_set_t *glyphs) const + { + if (!(this+coverage).intersects (glyphs)) + return false; + + const OffsetArrayOf<Coverage> &lookahead = StructAfter<OffsetArrayOf<Coverage>> (backtrack); + + unsigned int count; + + count = backtrack.len; + for (unsigned int i = 0; i < count; i++) + if (!(this+backtrack[i]).intersects (glyphs)) + return false; + + count = lookahead.len; + for (unsigned int i = 0; i < count; i++) + if (!(this+lookahead[i]).intersects (glyphs)) + return false; + + return true; + } + + void closure (hb_closure_context_t *c) const + { + if (!intersects (c->glyphs)) return; + + const OffsetArrayOf<Coverage> &lookahead = StructAfter<OffsetArrayOf<Coverage>> (backtrack); + const ArrayOf<HBGlyphID> &substitute = StructAfter<ArrayOf<HBGlyphID>> (lookahead); + + + hb_zip (this+coverage, substitute) + | hb_filter (*c->glyphs, hb_first) + | hb_map (hb_second) + | hb_sink (c->output) + ; + } + + void closure_lookups (hb_closure_lookups_context_t *c) const {} + + void collect_glyphs (hb_collect_glyphs_context_t *c) const + { + if (unlikely (!(this+coverage).collect_coverage (c->input))) return; + + unsigned int count; + + count = backtrack.len; + for (unsigned int i = 0; i < count; i++) + if (unlikely (!(this+backtrack[i]).collect_coverage (c->before))) return; + + const OffsetArrayOf<Coverage> &lookahead = StructAfter<OffsetArrayOf<Coverage>> (backtrack); + count = lookahead.len; + for (unsigned int i = 0; i < count; i++) + if (unlikely (!(this+lookahead[i]).collect_coverage (c->after))) return; + + const ArrayOf<HBGlyphID> &substitute = StructAfter<ArrayOf<HBGlyphID>> (lookahead); + count = substitute.len; + c->output->add_array (substitute.arrayZ, substitute.len); + } + + const Coverage &get_coverage () const { return this+coverage; } + + bool would_apply (hb_would_apply_context_t *c) const + { return c->len == 1 && (this+coverage).get_coverage (c->glyphs[0]) != NOT_COVERED; } + + bool apply (hb_ot_apply_context_t *c) const + { + TRACE_APPLY (this); + if (unlikely (c->nesting_level_left != HB_MAX_NESTING_LEVEL)) + return_trace (false); /* No chaining to this type */ + + unsigned int index = (this+coverage).get_coverage (c->buffer->cur ().codepoint); + if (likely (index == NOT_COVERED)) return_trace (false); + + const OffsetArrayOf<Coverage> &lookahead = StructAfter<OffsetArrayOf<Coverage>> (backtrack); + const ArrayOf<HBGlyphID> &substitute = StructAfter<ArrayOf<HBGlyphID>> (lookahead); + + if (unlikely (index >= substitute.len)) return_trace (false); + + unsigned int start_index = 0, end_index = 0; + if (match_backtrack (c, + backtrack.len, (HBUINT16 *) backtrack.arrayZ, + match_coverage, this, + &start_index) && + match_lookahead (c, + lookahead.len, (HBUINT16 *) lookahead.arrayZ, + match_coverage, this, + 1, &end_index)) + { + c->buffer->unsafe_to_break_from_outbuffer (start_index, end_index); + c->replace_glyph_inplace (substitute[index]); + /* Note: We DON'T decrease buffer->idx. The main loop does it + * for us. This is useful for preventing surprises if someone + * calls us through a Context lookup. */ + return_trace (true); + } + + return_trace (false); + } + + bool subset (hb_subset_context_t *c) const + { + TRACE_SUBSET (this); + // TODO(subset) + return_trace (false); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + if (!(coverage.sanitize (c, this) && backtrack.sanitize (c, this))) + return_trace (false); + const OffsetArrayOf<Coverage> &lookahead = StructAfter<OffsetArrayOf<Coverage>> (backtrack); + if (!lookahead.sanitize (c, this)) + return_trace (false); + const ArrayOf<HBGlyphID> &substitute = StructAfter<ArrayOf<HBGlyphID>> (lookahead); + return_trace (substitute.sanitize (c)); + } + + protected: + HBUINT16 format; /* Format identifier--format = 1 */ + OffsetTo<Coverage> + coverage; /* Offset to Coverage table--from + * beginning of table */ + OffsetArrayOf<Coverage> + backtrack; /* Array of coverage tables + * in backtracking sequence, in glyph + * sequence order */ + OffsetArrayOf<Coverage> + lookaheadX; /* Array of coverage tables + * in lookahead sequence, in glyph + * sequence order */ + ArrayOf<HBGlyphID> + substituteX; /* Array of substitute + * GlyphIDs--ordered by Coverage Index */ + public: + DEFINE_SIZE_MIN (10); +}; + +struct ReverseChainSingleSubst +{ + template <typename context_t, typename ...Ts> + typename context_t::return_t dispatch (context_t *c, Ts&&... ds) const + { + TRACE_DISPATCH (this, u.format); + if (unlikely (!c->may_dispatch (this, &u.format))) return_trace (c->no_dispatch_return_value ()); + switch (u.format) { + case 1: return_trace (c->dispatch (u.format1, hb_forward<Ts> (ds)...)); + default:return_trace (c->default_return_value ()); + } + } + + protected: + union { + HBUINT16 format; /* Format identifier */ + ReverseChainSingleSubstFormat1 format1; + } u; +}; + + + +/* + * SubstLookup + */ + +struct SubstLookupSubTable +{ + friend struct Lookup; + friend struct SubstLookup; + + enum Type { + Single = 1, + Multiple = 2, + Alternate = 3, + Ligature = 4, + Context = 5, + ChainContext = 6, + Extension = 7, + ReverseChainSingle = 8 + }; + + template <typename context_t, typename ...Ts> + typename context_t::return_t dispatch (context_t *c, unsigned int lookup_type, Ts&&... ds) const + { + TRACE_DISPATCH (this, lookup_type); + switch (lookup_type) { + case Single: return_trace (u.single.dispatch (c, hb_forward<Ts> (ds)...)); + case Multiple: return_trace (u.multiple.dispatch (c, hb_forward<Ts> (ds)...)); + case Alternate: return_trace (u.alternate.dispatch (c, hb_forward<Ts> (ds)...)); + case Ligature: return_trace (u.ligature.dispatch (c, hb_forward<Ts> (ds)...)); + case Context: return_trace (u.context.dispatch (c, hb_forward<Ts> (ds)...)); + case ChainContext: return_trace (u.chainContext.dispatch (c, hb_forward<Ts> (ds)...)); + case Extension: return_trace (u.extension.dispatch (c, hb_forward<Ts> (ds)...)); + case ReverseChainSingle: return_trace (u.reverseChainContextSingle.dispatch (c, hb_forward<Ts> (ds)...)); + default: return_trace (c->default_return_value ()); + } + } + + bool intersects (const hb_set_t *glyphs, unsigned int lookup_type) const + { + hb_intersects_context_t c (glyphs); + return dispatch (&c, lookup_type); + } + + protected: + union { + SingleSubst single; + MultipleSubst multiple; + AlternateSubst alternate; + LigatureSubst ligature; + ContextSubst context; + ChainContextSubst chainContext; + ExtensionSubst extension; + ReverseChainSingleSubst reverseChainContextSingle; + } u; + public: + DEFINE_SIZE_MIN (0); +}; + + +struct SubstLookup : Lookup +{ + typedef SubstLookupSubTable SubTable; + + const SubTable& get_subtable (unsigned int i) const + { return Lookup::get_subtable<SubTable> (i); } + + static inline bool lookup_type_is_reverse (unsigned int lookup_type) + { return lookup_type == SubTable::ReverseChainSingle; } + + bool is_reverse () const + { + unsigned int type = get_type (); + if (unlikely (type == SubTable::Extension)) + return reinterpret_cast<const ExtensionSubst &> (get_subtable (0)).is_reverse (); + return lookup_type_is_reverse (type); + } + + bool apply (hb_ot_apply_context_t *c) const + { + TRACE_APPLY (this); + return_trace (dispatch (c)); + } + + bool intersects (const hb_set_t *glyphs) const + { + hb_intersects_context_t c (glyphs); + return dispatch (&c); + } + + hb_closure_context_t::return_t closure (hb_closure_context_t *c, unsigned int this_index) const + { + if (!c->should_visit_lookup (this_index)) + return hb_closure_context_t::default_return_value (); + + c->set_recurse_func (dispatch_closure_recurse_func); + + hb_closure_context_t::return_t ret = dispatch (c); + + c->flush (); + + return ret; + } + + hb_closure_lookups_context_t::return_t closure_lookups (hb_closure_lookups_context_t *c, unsigned this_index) const + { + if (c->is_lookup_visited (this_index)) + return hb_closure_lookups_context_t::default_return_value (); + + c->set_lookup_visited (this_index); + if (!intersects (c->glyphs)) + { + c->set_lookup_inactive (this_index); + return hb_closure_lookups_context_t::default_return_value (); + } + + c->set_recurse_func (dispatch_closure_lookups_recurse_func); + + hb_closure_lookups_context_t::return_t ret = dispatch (c); + return ret; + } + + hb_collect_glyphs_context_t::return_t collect_glyphs (hb_collect_glyphs_context_t *c) const + { + c->set_recurse_func (dispatch_recurse_func<hb_collect_glyphs_context_t>); + return dispatch (c); + } + + template <typename set_t> + void collect_coverage (set_t *glyphs) const + { + hb_collect_coverage_context_t<set_t> c (glyphs); + dispatch (&c); + } + + bool would_apply (hb_would_apply_context_t *c, + const hb_ot_layout_lookup_accelerator_t *accel) const + { + if (unlikely (!c->len)) return false; + if (!accel->may_have (c->glyphs[0])) return false; + return dispatch (c); + } + + static inline bool apply_recurse_func (hb_ot_apply_context_t *c, unsigned int lookup_index); + + SubTable& serialize_subtable (hb_serialize_context_t *c, + unsigned int i) + { return get_subtables<SubTable> ()[i].serialize (c, this); } + + bool serialize_single (hb_serialize_context_t *c, + uint32_t lookup_props, + hb_sorted_array_t<const HBGlyphID> glyphs, + hb_array_t<const HBGlyphID> substitutes) + { + TRACE_SERIALIZE (this); + if (unlikely (!Lookup::serialize (c, SubTable::Single, lookup_props, 1))) return_trace (false); + return_trace (serialize_subtable (c, 0).u.single. + serialize (c, hb_zip (glyphs, substitutes))); + } + + bool serialize_multiple (hb_serialize_context_t *c, + uint32_t lookup_props, + hb_sorted_array_t<const HBGlyphID> glyphs, + hb_array_t<const unsigned int> substitute_len_list, + hb_array_t<const HBGlyphID> substitute_glyphs_list) + { + TRACE_SERIALIZE (this); + if (unlikely (!Lookup::serialize (c, SubTable::Multiple, lookup_props, 1))) return_trace (false); + return_trace (serialize_subtable (c, 0).u.multiple. + serialize (c, + glyphs, + substitute_len_list, + substitute_glyphs_list)); + } + + bool serialize_alternate (hb_serialize_context_t *c, + uint32_t lookup_props, + hb_sorted_array_t<const HBGlyphID> glyphs, + hb_array_t<const unsigned int> alternate_len_list, + hb_array_t<const HBGlyphID> alternate_glyphs_list) + { + TRACE_SERIALIZE (this); + if (unlikely (!Lookup::serialize (c, SubTable::Alternate, lookup_props, 1))) return_trace (false); + return_trace (serialize_subtable (c, 0).u.alternate. + serialize (c, + glyphs, + alternate_len_list, + alternate_glyphs_list)); + } + + bool serialize_ligature (hb_serialize_context_t *c, + uint32_t lookup_props, + hb_sorted_array_t<const HBGlyphID> first_glyphs, + hb_array_t<const unsigned int> ligature_per_first_glyph_count_list, + hb_array_t<const HBGlyphID> ligatures_list, + hb_array_t<const unsigned int> component_count_list, + hb_array_t<const HBGlyphID> component_list /* Starting from second for each ligature */) + { + TRACE_SERIALIZE (this); + if (unlikely (!Lookup::serialize (c, SubTable::Ligature, lookup_props, 1))) return_trace (false); + return_trace (serialize_subtable (c, 0).u.ligature. + serialize (c, + first_glyphs, + ligature_per_first_glyph_count_list, + ligatures_list, + component_count_list, + component_list)); + } + + template <typename context_t> + static inline typename context_t::return_t dispatch_recurse_func (context_t *c, unsigned int lookup_index); + + static inline hb_closure_context_t::return_t dispatch_closure_recurse_func (hb_closure_context_t *c, unsigned int lookup_index) + { + if (!c->should_visit_lookup (lookup_index)) + return hb_empty_t (); + + hb_closure_context_t::return_t ret = dispatch_recurse_func (c, lookup_index); + + /* While in theory we should flush here, it will cause timeouts because a recursive + * lookup can keep growing the glyph set. Skip, and outer loop will retry up to + * HB_CLOSURE_MAX_STAGES time, which should be enough for every realistic font. */ + //c->flush (); + + return ret; + } + + HB_INTERNAL static hb_closure_lookups_context_t::return_t dispatch_closure_lookups_recurse_func (hb_closure_lookups_context_t *c, unsigned lookup_index); + + template <typename context_t, typename ...Ts> + typename context_t::return_t dispatch (context_t *c, Ts&&... ds) const + { return Lookup::dispatch<SubTable> (c, hb_forward<Ts> (ds)...); } + + bool subset (hb_subset_context_t *c) const + { return Lookup::subset<SubTable> (c); } + + bool sanitize (hb_sanitize_context_t *c) const + { return Lookup::sanitize<SubTable> (c); } +}; + +/* + * GSUB -- Glyph Substitution + * https://docs.microsoft.com/en-us/typography/opentype/spec/gsub + */ + +struct GSUB : GSUBGPOS +{ + static constexpr hb_tag_t tableTag = HB_OT_TAG_GSUB; + + const SubstLookup& get_lookup (unsigned int i) const + { return static_cast<const SubstLookup &> (GSUBGPOS::get_lookup (i)); } + + bool subset (hb_subset_context_t *c) const + { + hb_subset_layout_context_t l (c, tableTag, c->plan->gsub_lookups, c->plan->gsub_features); + return GSUBGPOS::subset<SubstLookup> (&l); + } + + bool sanitize (hb_sanitize_context_t *c) const + { return GSUBGPOS::sanitize<SubstLookup> (c); } + + HB_INTERNAL bool is_blocklisted (hb_blob_t *blob, + hb_face_t *face) const; + + void closure_lookups (hb_face_t *face, + const hb_set_t *glyphs, + hb_set_t *lookup_indexes /* IN/OUT */) const + { GSUBGPOS::closure_lookups<SubstLookup> (face, glyphs, lookup_indexes); } + + typedef GSUBGPOS::accelerator_t<GSUB> accelerator_t; +}; + + +struct GSUB_accelerator_t : GSUB::accelerator_t {}; + + +/* Out-of-class implementation for methods recursing */ + +#ifndef HB_NO_OT_LAYOUT +/*static*/ inline bool ExtensionSubst::is_reverse () const +{ + return SubstLookup::lookup_type_is_reverse (get_type ()); +} +template <typename context_t> +/*static*/ typename context_t::return_t SubstLookup::dispatch_recurse_func (context_t *c, unsigned int lookup_index) +{ + const SubstLookup &l = c->face->table.GSUB.get_relaxed ()->table->get_lookup (lookup_index); + return l.dispatch (c); +} + +/*static*/ inline hb_closure_lookups_context_t::return_t SubstLookup::dispatch_closure_lookups_recurse_func (hb_closure_lookups_context_t *c, unsigned this_index) +{ + const SubstLookup &l = c->face->table.GSUB.get_relaxed ()->table->get_lookup (this_index); + return l.closure_lookups (c, this_index); +} + +/*static*/ bool SubstLookup::apply_recurse_func (hb_ot_apply_context_t *c, unsigned int lookup_index) +{ + const SubstLookup &l = c->face->table.GSUB.get_relaxed ()->table->get_lookup (lookup_index); + unsigned int saved_lookup_props = c->lookup_props; + unsigned int saved_lookup_index = c->lookup_index; + c->set_lookup_index (lookup_index); + c->set_lookup_props (l.get_props ()); + bool ret = l.dispatch (c); + c->set_lookup_index (saved_lookup_index); + c->set_lookup_props (saved_lookup_props); + return ret; +} +#endif + + +} /* namespace OT */ + + +#endif /* HB_OT_LAYOUT_GSUB_TABLE_HH */ diff --git a/thirdparty/harfbuzz/src/hb-ot-layout-gsubgpos.hh b/thirdparty/harfbuzz/src/hb-ot-layout-gsubgpos.hh new file mode 100644 index 0000000000..03a578cec0 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-layout-gsubgpos.hh @@ -0,0 +1,3422 @@ +/* + * Copyright © 2007,2008,2009,2010 Red Hat, Inc. + * Copyright © 2010,2012 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Red Hat Author(s): Behdad Esfahbod + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_OT_LAYOUT_GSUBGPOS_HH +#define HB_OT_LAYOUT_GSUBGPOS_HH + +#include "hb.hh" +#include "hb-buffer.hh" +#include "hb-map.hh" +#include "hb-set.hh" +#include "hb-ot-map.hh" +#include "hb-ot-layout-common.hh" +#include "hb-ot-layout-gdef-table.hh" + + +namespace OT { + + +struct hb_intersects_context_t : + hb_dispatch_context_t<hb_intersects_context_t, bool> +{ + template <typename T> + return_t dispatch (const T &obj) { return obj.intersects (this->glyphs); } + static return_t default_return_value () { return false; } + bool stop_sublookup_iteration (return_t r) const { return r; } + + const hb_set_t *glyphs; + + hb_intersects_context_t (const hb_set_t *glyphs_) : + glyphs (glyphs_) {} +}; + +struct hb_closure_context_t : + hb_dispatch_context_t<hb_closure_context_t> +{ + typedef return_t (*recurse_func_t) (hb_closure_context_t *c, unsigned int lookup_index); + template <typename T> + return_t dispatch (const T &obj) { obj.closure (this); return hb_empty_t (); } + static return_t default_return_value () { return hb_empty_t (); } + void recurse (unsigned int lookup_index) + { + if (unlikely (nesting_level_left == 0 || !recurse_func)) + return; + + nesting_level_left--; + recurse_func (this, lookup_index); + nesting_level_left++; + } + + bool lookup_limit_exceeded () + { return lookup_count > HB_MAX_LOOKUP_INDICES; } + + bool should_visit_lookup (unsigned int lookup_index) + { + if (lookup_count++ > HB_MAX_LOOKUP_INDICES) + return false; + + if (is_lookup_done (lookup_index)) + return false; + + done_lookups->set (lookup_index, glyphs->get_population ()); + return true; + } + + bool is_lookup_done (unsigned int lookup_index) + { + if (done_lookups->in_error ()) + return true; + + /* Have we visited this lookup with the current set of glyphs? */ + return done_lookups->get (lookup_index) == glyphs->get_population (); + } + + hb_face_t *face; + hb_set_t *glyphs; + hb_set_t output[1]; + recurse_func_t recurse_func; + unsigned int nesting_level_left; + + hb_closure_context_t (hb_face_t *face_, + hb_set_t *glyphs_, + hb_map_t *done_lookups_, + unsigned int nesting_level_left_ = HB_MAX_NESTING_LEVEL) : + face (face_), + glyphs (glyphs_), + recurse_func (nullptr), + nesting_level_left (nesting_level_left_), + done_lookups (done_lookups_), + lookup_count (0) + {} + + ~hb_closure_context_t () { flush (); } + + void set_recurse_func (recurse_func_t func) { recurse_func = func; } + + void flush () + { + hb_set_del_range (output, face->get_num_glyphs (), hb_set_get_max (output)); /* Remove invalid glyphs. */ + hb_set_union (glyphs, output); + hb_set_clear (output); + } + + private: + hb_map_t *done_lookups; + unsigned int lookup_count; +}; + +struct hb_closure_lookups_context_t : + hb_dispatch_context_t<hb_closure_lookups_context_t> +{ + typedef return_t (*recurse_func_t) (hb_closure_lookups_context_t *c, unsigned lookup_index); + template <typename T> + return_t dispatch (const T &obj) { obj.closure_lookups (this); return hb_empty_t (); } + static return_t default_return_value () { return hb_empty_t (); } + void recurse (unsigned lookup_index) + { + if (unlikely (nesting_level_left == 0 || !recurse_func)) + return; + + /* Return if new lookup was recursed to before. */ + if (is_lookup_visited (lookup_index)) + return; + + set_lookup_visited (lookup_index); + nesting_level_left--; + recurse_func (this, lookup_index); + nesting_level_left++; + } + + void set_lookup_visited (unsigned lookup_index) + { visited_lookups->add (lookup_index); } + + void set_lookup_inactive (unsigned lookup_index) + { inactive_lookups->add (lookup_index); } + + bool lookup_limit_exceeded () + { return lookup_count > HB_MAX_LOOKUP_INDICES; } + + bool is_lookup_visited (unsigned lookup_index) + { + if (lookup_count++ > HB_MAX_LOOKUP_INDICES) + return true; + + if (visited_lookups->in_error ()) + return true; + + return visited_lookups->has (lookup_index); + } + + hb_face_t *face; + const hb_set_t *glyphs; + recurse_func_t recurse_func; + unsigned int nesting_level_left; + + hb_closure_lookups_context_t (hb_face_t *face_, + const hb_set_t *glyphs_, + hb_set_t *visited_lookups_, + hb_set_t *inactive_lookups_, + unsigned nesting_level_left_ = HB_MAX_NESTING_LEVEL) : + face (face_), + glyphs (glyphs_), + recurse_func (nullptr), + nesting_level_left (nesting_level_left_), + visited_lookups (visited_lookups_), + inactive_lookups (inactive_lookups_), + lookup_count (0) {} + + void set_recurse_func (recurse_func_t func) { recurse_func = func; } + + private: + hb_set_t *visited_lookups; + hb_set_t *inactive_lookups; + unsigned int lookup_count; +}; + +struct hb_would_apply_context_t : + hb_dispatch_context_t<hb_would_apply_context_t, bool> +{ + template <typename T> + return_t dispatch (const T &obj) { return obj.would_apply (this); } + static return_t default_return_value () { return false; } + bool stop_sublookup_iteration (return_t r) const { return r; } + + hb_face_t *face; + const hb_codepoint_t *glyphs; + unsigned int len; + bool zero_context; + + hb_would_apply_context_t (hb_face_t *face_, + const hb_codepoint_t *glyphs_, + unsigned int len_, + bool zero_context_) : + face (face_), + glyphs (glyphs_), + len (len_), + zero_context (zero_context_) {} +}; + +struct hb_collect_glyphs_context_t : + hb_dispatch_context_t<hb_collect_glyphs_context_t> +{ + typedef return_t (*recurse_func_t) (hb_collect_glyphs_context_t *c, unsigned int lookup_index); + template <typename T> + return_t dispatch (const T &obj) { obj.collect_glyphs (this); return hb_empty_t (); } + static return_t default_return_value () { return hb_empty_t (); } + void recurse (unsigned int lookup_index) + { + if (unlikely (nesting_level_left == 0 || !recurse_func)) + return; + + /* Note that GPOS sets recurse_func to nullptr already, so it doesn't get + * past the previous check. For GSUB, we only want to collect the output + * glyphs in the recursion. If output is not requested, we can go home now. + * + * Note further, that the above is not exactly correct. A recursed lookup + * is allowed to match input that is not matched in the context, but that's + * not how most fonts are built. It's possible to relax that and recurse + * with all sets here if it proves to be an issue. + */ + + if (output == hb_set_get_empty ()) + return; + + /* Return if new lookup was recursed to before. */ + if (recursed_lookups->has (lookup_index)) + return; + + hb_set_t *old_before = before; + hb_set_t *old_input = input; + hb_set_t *old_after = after; + before = input = after = hb_set_get_empty (); + + nesting_level_left--; + recurse_func (this, lookup_index); + nesting_level_left++; + + before = old_before; + input = old_input; + after = old_after; + + recursed_lookups->add (lookup_index); + } + + hb_face_t *face; + hb_set_t *before; + hb_set_t *input; + hb_set_t *after; + hb_set_t *output; + recurse_func_t recurse_func; + hb_set_t *recursed_lookups; + unsigned int nesting_level_left; + + hb_collect_glyphs_context_t (hb_face_t *face_, + hb_set_t *glyphs_before, /* OUT. May be NULL */ + hb_set_t *glyphs_input, /* OUT. May be NULL */ + hb_set_t *glyphs_after, /* OUT. May be NULL */ + hb_set_t *glyphs_output, /* OUT. May be NULL */ + unsigned int nesting_level_left_ = HB_MAX_NESTING_LEVEL) : + face (face_), + before (glyphs_before ? glyphs_before : hb_set_get_empty ()), + input (glyphs_input ? glyphs_input : hb_set_get_empty ()), + after (glyphs_after ? glyphs_after : hb_set_get_empty ()), + output (glyphs_output ? glyphs_output : hb_set_get_empty ()), + recurse_func (nullptr), + recursed_lookups (hb_set_create ()), + nesting_level_left (nesting_level_left_) {} + ~hb_collect_glyphs_context_t () { hb_set_destroy (recursed_lookups); } + + void set_recurse_func (recurse_func_t func) { recurse_func = func; } +}; + + + +template <typename set_t> +struct hb_collect_coverage_context_t : + hb_dispatch_context_t<hb_collect_coverage_context_t<set_t>, const Coverage &> +{ + typedef const Coverage &return_t; // Stoopid that we have to dupe this here. + template <typename T> + return_t dispatch (const T &obj) { return obj.get_coverage (); } + static return_t default_return_value () { return Null (Coverage); } + bool stop_sublookup_iteration (return_t r) const + { + r.collect_coverage (set); + return false; + } + + hb_collect_coverage_context_t (set_t *set_) : + set (set_) {} + + set_t *set; +}; + + +struct hb_ot_apply_context_t : + hb_dispatch_context_t<hb_ot_apply_context_t, bool, HB_DEBUG_APPLY> +{ + struct matcher_t + { + matcher_t () : + lookup_props (0), + ignore_zwnj (false), + ignore_zwj (false), + mask (-1), +#define arg1(arg) (arg) /* Remove the macro to see why it's needed! */ + syllable arg1(0), +#undef arg1 + match_func (nullptr), + match_data (nullptr) {} + + typedef bool (*match_func_t) (hb_codepoint_t glyph_id, const HBUINT16 &value, const void *data); + + void set_ignore_zwnj (bool ignore_zwnj_) { ignore_zwnj = ignore_zwnj_; } + void set_ignore_zwj (bool ignore_zwj_) { ignore_zwj = ignore_zwj_; } + void set_lookup_props (unsigned int lookup_props_) { lookup_props = lookup_props_; } + void set_mask (hb_mask_t mask_) { mask = mask_; } + void set_syllable (uint8_t syllable_) { syllable = syllable_; } + void set_match_func (match_func_t match_func_, + const void *match_data_) + { match_func = match_func_; match_data = match_data_; } + + enum may_match_t { + MATCH_NO, + MATCH_YES, + MATCH_MAYBE + }; + + may_match_t may_match (const hb_glyph_info_t &info, + const HBUINT16 *glyph_data) const + { + if (!(info.mask & mask) || + (syllable && syllable != info.syllable ())) + return MATCH_NO; + + if (match_func) + return match_func (info.codepoint, *glyph_data, match_data) ? MATCH_YES : MATCH_NO; + + return MATCH_MAYBE; + } + + enum may_skip_t { + SKIP_NO, + SKIP_YES, + SKIP_MAYBE + }; + + may_skip_t may_skip (const hb_ot_apply_context_t *c, + const hb_glyph_info_t &info) const + { + if (!c->check_glyph_property (&info, lookup_props)) + return SKIP_YES; + + if (unlikely (_hb_glyph_info_is_default_ignorable_and_not_hidden (&info) && + (ignore_zwnj || !_hb_glyph_info_is_zwnj (&info)) && + (ignore_zwj || !_hb_glyph_info_is_zwj (&info)))) + return SKIP_MAYBE; + + return SKIP_NO; + } + + protected: + unsigned int lookup_props; + bool ignore_zwnj; + bool ignore_zwj; + hb_mask_t mask; + uint8_t syllable; + match_func_t match_func; + const void *match_data; + }; + + struct skipping_iterator_t + { + void init (hb_ot_apply_context_t *c_, bool context_match = false) + { + c = c_; + match_glyph_data = nullptr; + matcher.set_match_func (nullptr, nullptr); + matcher.set_lookup_props (c->lookup_props); + /* Ignore ZWNJ if we are matching GPOS, or matching GSUB context and asked to. */ + matcher.set_ignore_zwnj (c->table_index == 1 || (context_match && c->auto_zwnj)); + /* Ignore ZWJ if we are matching context, or asked to. */ + matcher.set_ignore_zwj (context_match || c->auto_zwj); + matcher.set_mask (context_match ? -1 : c->lookup_mask); + } + void set_lookup_props (unsigned int lookup_props) + { + matcher.set_lookup_props (lookup_props); + } + void set_match_func (matcher_t::match_func_t match_func_, + const void *match_data_, + const HBUINT16 glyph_data[]) + { + matcher.set_match_func (match_func_, match_data_); + match_glyph_data = glyph_data; + } + + void reset (unsigned int start_index_, + unsigned int num_items_) + { + idx = start_index_; + num_items = num_items_; + end = c->buffer->len; + matcher.set_syllable (start_index_ == c->buffer->idx ? c->buffer->cur().syllable () : 0); + } + + void reject () + { + num_items++; + if (match_glyph_data) match_glyph_data--; + } + + matcher_t::may_skip_t + may_skip (const hb_glyph_info_t &info) const + { return matcher.may_skip (c, info); } + + bool next () + { + assert (num_items > 0); + while (idx + num_items < end) + { + idx++; + const hb_glyph_info_t &info = c->buffer->info[idx]; + + matcher_t::may_skip_t skip = matcher.may_skip (c, info); + if (unlikely (skip == matcher_t::SKIP_YES)) + continue; + + matcher_t::may_match_t match = matcher.may_match (info, match_glyph_data); + if (match == matcher_t::MATCH_YES || + (match == matcher_t::MATCH_MAYBE && + skip == matcher_t::SKIP_NO)) + { + num_items--; + if (match_glyph_data) match_glyph_data++; + return true; + } + + if (skip == matcher_t::SKIP_NO) + return false; + } + return false; + } + bool prev () + { + assert (num_items > 0); + while (idx > num_items - 1) + { + idx--; + const hb_glyph_info_t &info = c->buffer->out_info[idx]; + + matcher_t::may_skip_t skip = matcher.may_skip (c, info); + if (unlikely (skip == matcher_t::SKIP_YES)) + continue; + + matcher_t::may_match_t match = matcher.may_match (info, match_glyph_data); + if (match == matcher_t::MATCH_YES || + (match == matcher_t::MATCH_MAYBE && + skip == matcher_t::SKIP_NO)) + { + num_items--; + if (match_glyph_data) match_glyph_data++; + return true; + } + + if (skip == matcher_t::SKIP_NO) + return false; + } + return false; + } + + unsigned int idx; + protected: + hb_ot_apply_context_t *c; + matcher_t matcher; + const HBUINT16 *match_glyph_data; + + unsigned int num_items; + unsigned int end; + }; + + + const char *get_name () { return "APPLY"; } + typedef return_t (*recurse_func_t) (hb_ot_apply_context_t *c, unsigned int lookup_index); + template <typename T> + return_t dispatch (const T &obj) { return obj.apply (this); } + static return_t default_return_value () { return false; } + bool stop_sublookup_iteration (return_t r) const { return r; } + return_t recurse (unsigned int sub_lookup_index) + { + if (unlikely (nesting_level_left == 0 || !recurse_func || buffer->max_ops-- <= 0)) + return default_return_value (); + + nesting_level_left--; + bool ret = recurse_func (this, sub_lookup_index); + nesting_level_left++; + return ret; + } + + skipping_iterator_t iter_input, iter_context; + + hb_font_t *font; + hb_face_t *face; + hb_buffer_t *buffer; + recurse_func_t recurse_func; + const GDEF &gdef; + const VariationStore &var_store; + + hb_direction_t direction; + hb_mask_t lookup_mask; + unsigned int table_index; /* GSUB/GPOS */ + unsigned int lookup_index; + unsigned int lookup_props; + unsigned int nesting_level_left; + + bool has_glyph_classes; + bool auto_zwnj; + bool auto_zwj; + bool random; + + uint32_t random_state; + + + hb_ot_apply_context_t (unsigned int table_index_, + hb_font_t *font_, + hb_buffer_t *buffer_) : + iter_input (), iter_context (), + font (font_), face (font->face), buffer (buffer_), + recurse_func (nullptr), + gdef ( +#ifndef HB_NO_OT_LAYOUT + *face->table.GDEF->table +#else + Null (GDEF) +#endif + ), + var_store (gdef.get_var_store ()), + direction (buffer_->props.direction), + lookup_mask (1), + table_index (table_index_), + lookup_index ((unsigned int) -1), + lookup_props (0), + nesting_level_left (HB_MAX_NESTING_LEVEL), + has_glyph_classes (gdef.has_glyph_classes ()), + auto_zwnj (true), + auto_zwj (true), + random (false), + random_state (1) { init_iters (); } + + void init_iters () + { + iter_input.init (this, false); + iter_context.init (this, true); + } + + void set_lookup_mask (hb_mask_t mask) { lookup_mask = mask; init_iters (); } + void set_auto_zwj (bool auto_zwj_) { auto_zwj = auto_zwj_; init_iters (); } + void set_auto_zwnj (bool auto_zwnj_) { auto_zwnj = auto_zwnj_; init_iters (); } + void set_random (bool random_) { random = random_; } + void set_recurse_func (recurse_func_t func) { recurse_func = func; } + void set_lookup_index (unsigned int lookup_index_) { lookup_index = lookup_index_; } + void set_lookup_props (unsigned int lookup_props_) { lookup_props = lookup_props_; init_iters (); } + + uint32_t random_number () + { + /* http://www.cplusplus.com/reference/random/minstd_rand/ */ + random_state = random_state * 48271 % 2147483647; + return random_state; + } + + bool match_properties_mark (hb_codepoint_t glyph, + unsigned int glyph_props, + unsigned int match_props) const + { + /* If using mark filtering sets, the high short of + * match_props has the set index. + */ + if (match_props & LookupFlag::UseMarkFilteringSet) + return gdef.mark_set_covers (match_props >> 16, glyph); + + /* The second byte of match_props has the meaning + * "ignore marks of attachment type different than + * the attachment type specified." + */ + if (match_props & LookupFlag::MarkAttachmentType) + return (match_props & LookupFlag::MarkAttachmentType) == (glyph_props & LookupFlag::MarkAttachmentType); + + return true; + } + + bool check_glyph_property (const hb_glyph_info_t *info, + unsigned int match_props) const + { + hb_codepoint_t glyph = info->codepoint; + unsigned int glyph_props = _hb_glyph_info_get_glyph_props (info); + + /* Not covered, if, for example, glyph class is ligature and + * match_props includes LookupFlags::IgnoreLigatures + */ + if (glyph_props & match_props & LookupFlag::IgnoreFlags) + return false; + + if (unlikely (glyph_props & HB_OT_LAYOUT_GLYPH_PROPS_MARK)) + return match_properties_mark (glyph, glyph_props, match_props); + + return true; + } + + void _set_glyph_props (hb_codepoint_t glyph_index, + unsigned int class_guess = 0, + bool ligature = false, + bool component = false) const + { + unsigned int add_in = _hb_glyph_info_get_glyph_props (&buffer->cur()) & + HB_OT_LAYOUT_GLYPH_PROPS_PRESERVE; + add_in |= HB_OT_LAYOUT_GLYPH_PROPS_SUBSTITUTED; + if (ligature) + { + add_in |= HB_OT_LAYOUT_GLYPH_PROPS_LIGATED; + /* In the only place that the MULTIPLIED bit is used, Uniscribe + * seems to only care about the "last" transformation between + * Ligature and Multiple substitutions. Ie. if you ligate, expand, + * and ligate again, it forgives the multiplication and acts as + * if only ligation happened. As such, clear MULTIPLIED bit. + */ + add_in &= ~HB_OT_LAYOUT_GLYPH_PROPS_MULTIPLIED; + } + if (component) + add_in |= HB_OT_LAYOUT_GLYPH_PROPS_MULTIPLIED; + if (likely (has_glyph_classes)) + _hb_glyph_info_set_glyph_props (&buffer->cur(), add_in | gdef.get_glyph_props (glyph_index)); + else if (class_guess) + _hb_glyph_info_set_glyph_props (&buffer->cur(), add_in | class_guess); + } + + void replace_glyph (hb_codepoint_t glyph_index) const + { + _set_glyph_props (glyph_index); + buffer->replace_glyph (glyph_index); + } + void replace_glyph_inplace (hb_codepoint_t glyph_index) const + { + _set_glyph_props (glyph_index); + buffer->cur().codepoint = glyph_index; + } + void replace_glyph_with_ligature (hb_codepoint_t glyph_index, + unsigned int class_guess) const + { + _set_glyph_props (glyph_index, class_guess, true); + buffer->replace_glyph (glyph_index); + } + void output_glyph_for_component (hb_codepoint_t glyph_index, + unsigned int class_guess) const + { + _set_glyph_props (glyph_index, class_guess, false, true); + buffer->output_glyph (glyph_index); + } +}; + + +struct hb_get_subtables_context_t : + hb_dispatch_context_t<hb_get_subtables_context_t> +{ + template <typename Type> + static inline bool apply_to (const void *obj, OT::hb_ot_apply_context_t *c) + { + const Type *typed_obj = (const Type *) obj; + return typed_obj->apply (c); + } + + typedef bool (*hb_apply_func_t) (const void *obj, OT::hb_ot_apply_context_t *c); + + struct hb_applicable_t + { + template <typename T> + void init (const T &obj_, hb_apply_func_t apply_func_) + { + obj = &obj_; + apply_func = apply_func_; + digest.init (); + obj_.get_coverage ().collect_coverage (&digest); + } + + bool apply (OT::hb_ot_apply_context_t *c) const + { + return digest.may_have (c->buffer->cur().codepoint) && apply_func (obj, c); + } + + private: + const void *obj; + hb_apply_func_t apply_func; + hb_set_digest_t digest; + }; + + typedef hb_vector_t<hb_applicable_t> array_t; + + /* Dispatch interface. */ + template <typename T> + return_t dispatch (const T &obj) + { + hb_applicable_t *entry = array.push(); + entry->init (obj, apply_to<T>); + return hb_empty_t (); + } + static return_t default_return_value () { return hb_empty_t (); } + + hb_get_subtables_context_t (array_t &array_) : + array (array_) {} + + array_t &array; +}; + + + + +typedef bool (*intersects_func_t) (const hb_set_t *glyphs, const HBUINT16 &value, const void *data); +typedef void (*collect_glyphs_func_t) (hb_set_t *glyphs, const HBUINT16 &value, const void *data); +typedef bool (*match_func_t) (hb_codepoint_t glyph_id, const HBUINT16 &value, const void *data); + +struct ContextClosureFuncs +{ + intersects_func_t intersects; +}; +struct ContextCollectGlyphsFuncs +{ + collect_glyphs_func_t collect; +}; +struct ContextApplyFuncs +{ + match_func_t match; +}; + + +static inline bool intersects_glyph (const hb_set_t *glyphs, const HBUINT16 &value, const void *data HB_UNUSED) +{ + return glyphs->has (value); +} +static inline bool intersects_class (const hb_set_t *glyphs, const HBUINT16 &value, const void *data) +{ + const ClassDef &class_def = *reinterpret_cast<const ClassDef *>(data); + return class_def.intersects_class (glyphs, value); +} +static inline bool intersects_coverage (const hb_set_t *glyphs, const HBUINT16 &value, const void *data) +{ + const OffsetTo<Coverage> &coverage = (const OffsetTo<Coverage>&)value; + return (data+coverage).intersects (glyphs); +} + +static inline bool array_is_subset_of (const hb_set_t *glyphs, + unsigned int count, + const HBUINT16 values[], + intersects_func_t intersects_func, + const void *intersects_data) +{ + for (const HBUINT16 &_ : + hb_iter (values, count)) + if (!intersects_func (glyphs, _, intersects_data)) return false; + return true; +} + + +static inline void collect_glyph (hb_set_t *glyphs, const HBUINT16 &value, const void *data HB_UNUSED) +{ + glyphs->add (value); +} +static inline void collect_class (hb_set_t *glyphs, const HBUINT16 &value, const void *data) +{ + const ClassDef &class_def = *reinterpret_cast<const ClassDef *>(data); + class_def.collect_class (glyphs, value); +} +static inline void collect_coverage (hb_set_t *glyphs, const HBUINT16 &value, const void *data) +{ + const OffsetTo<Coverage> &coverage = (const OffsetTo<Coverage>&)value; + (data+coverage).collect_coverage (glyphs); +} +static inline void collect_array (hb_collect_glyphs_context_t *c HB_UNUSED, + hb_set_t *glyphs, + unsigned int count, + const HBUINT16 values[], + collect_glyphs_func_t collect_func, + const void *collect_data) +{ + return + + hb_iter (values, count) + | hb_apply ([&] (const HBUINT16 &_) { collect_func (glyphs, _, collect_data); }) + ; +} + + +static inline bool match_glyph (hb_codepoint_t glyph_id, const HBUINT16 &value, const void *data HB_UNUSED) +{ + return glyph_id == value; +} +static inline bool match_class (hb_codepoint_t glyph_id, const HBUINT16 &value, const void *data) +{ + const ClassDef &class_def = *reinterpret_cast<const ClassDef *>(data); + return class_def.get_class (glyph_id) == value; +} +static inline bool match_coverage (hb_codepoint_t glyph_id, const HBUINT16 &value, const void *data) +{ + const OffsetTo<Coverage> &coverage = (const OffsetTo<Coverage>&)value; + return (data+coverage).get_coverage (glyph_id) != NOT_COVERED; +} + +static inline bool would_match_input (hb_would_apply_context_t *c, + unsigned int count, /* Including the first glyph (not matched) */ + const HBUINT16 input[], /* Array of input values--start with second glyph */ + match_func_t match_func, + const void *match_data) +{ + if (count != c->len) + return false; + + for (unsigned int i = 1; i < count; i++) + if (likely (!match_func (c->glyphs[i], input[i - 1], match_data))) + return false; + + return true; +} +static inline bool match_input (hb_ot_apply_context_t *c, + unsigned int count, /* Including the first glyph (not matched) */ + const HBUINT16 input[], /* Array of input values--start with second glyph */ + match_func_t match_func, + const void *match_data, + unsigned int *end_offset, + unsigned int match_positions[HB_MAX_CONTEXT_LENGTH], + unsigned int *p_total_component_count = nullptr) +{ + TRACE_APPLY (nullptr); + + if (unlikely (count > HB_MAX_CONTEXT_LENGTH)) return_trace (false); + + hb_buffer_t *buffer = c->buffer; + + hb_ot_apply_context_t::skipping_iterator_t &skippy_iter = c->iter_input; + skippy_iter.reset (buffer->idx, count - 1); + skippy_iter.set_match_func (match_func, match_data, input); + + /* + * This is perhaps the trickiest part of OpenType... Remarks: + * + * - If all components of the ligature were marks, we call this a mark ligature. + * + * - If there is no GDEF, and the ligature is NOT a mark ligature, we categorize + * it as a ligature glyph. + * + * - Ligatures cannot be formed across glyphs attached to different components + * of previous ligatures. Eg. the sequence is LAM,SHADDA,LAM,FATHA,HEH, and + * LAM,LAM,HEH form a ligature, leaving SHADDA,FATHA next to eachother. + * However, it would be wrong to ligate that SHADDA,FATHA sequence. + * There are a couple of exceptions to this: + * + * o If a ligature tries ligating with marks that belong to it itself, go ahead, + * assuming that the font designer knows what they are doing (otherwise it can + * break Indic stuff when a matra wants to ligate with a conjunct, + * + * o If two marks want to ligate and they belong to different components of the + * same ligature glyph, and said ligature glyph is to be ignored according to + * mark-filtering rules, then allow. + * https://github.com/harfbuzz/harfbuzz/issues/545 + */ + + unsigned int total_component_count = 0; + total_component_count += _hb_glyph_info_get_lig_num_comps (&buffer->cur()); + + unsigned int first_lig_id = _hb_glyph_info_get_lig_id (&buffer->cur()); + unsigned int first_lig_comp = _hb_glyph_info_get_lig_comp (&buffer->cur()); + + enum { + LIGBASE_NOT_CHECKED, + LIGBASE_MAY_NOT_SKIP, + LIGBASE_MAY_SKIP + } ligbase = LIGBASE_NOT_CHECKED; + + match_positions[0] = buffer->idx; + for (unsigned int i = 1; i < count; i++) + { + if (!skippy_iter.next ()) return_trace (false); + + match_positions[i] = skippy_iter.idx; + + unsigned int this_lig_id = _hb_glyph_info_get_lig_id (&buffer->info[skippy_iter.idx]); + unsigned int this_lig_comp = _hb_glyph_info_get_lig_comp (&buffer->info[skippy_iter.idx]); + + if (first_lig_id && first_lig_comp) + { + /* If first component was attached to a previous ligature component, + * all subsequent components should be attached to the same ligature + * component, otherwise we shouldn't ligate them... */ + if (first_lig_id != this_lig_id || first_lig_comp != this_lig_comp) + { + /* ...unless, we are attached to a base ligature and that base + * ligature is ignorable. */ + if (ligbase == LIGBASE_NOT_CHECKED) + { + bool found = false; + const auto *out = buffer->out_info; + unsigned int j = buffer->out_len; + while (j && _hb_glyph_info_get_lig_id (&out[j - 1]) == first_lig_id) + { + if (_hb_glyph_info_get_lig_comp (&out[j - 1]) == 0) + { + j--; + found = true; + break; + } + j--; + } + + if (found && skippy_iter.may_skip (out[j]) == hb_ot_apply_context_t::matcher_t::SKIP_YES) + ligbase = LIGBASE_MAY_SKIP; + else + ligbase = LIGBASE_MAY_NOT_SKIP; + } + + if (ligbase == LIGBASE_MAY_NOT_SKIP) + return_trace (false); + } + } + else + { + /* If first component was NOT attached to a previous ligature component, + * all subsequent components should also NOT be attached to any ligature + * component, unless they are attached to the first component itself! */ + if (this_lig_id && this_lig_comp && (this_lig_id != first_lig_id)) + return_trace (false); + } + + total_component_count += _hb_glyph_info_get_lig_num_comps (&buffer->info[skippy_iter.idx]); + } + + *end_offset = skippy_iter.idx - buffer->idx + 1; + + if (p_total_component_count) + *p_total_component_count = total_component_count; + + return_trace (true); +} +static inline bool ligate_input (hb_ot_apply_context_t *c, + unsigned int count, /* Including the first glyph */ + const unsigned int match_positions[HB_MAX_CONTEXT_LENGTH], /* Including the first glyph */ + unsigned int match_length, + hb_codepoint_t lig_glyph, + unsigned int total_component_count) +{ + TRACE_APPLY (nullptr); + + hb_buffer_t *buffer = c->buffer; + + buffer->merge_clusters (buffer->idx, buffer->idx + match_length); + + /* - If a base and one or more marks ligate, consider that as a base, NOT + * ligature, such that all following marks can still attach to it. + * https://github.com/harfbuzz/harfbuzz/issues/1109 + * + * - If all components of the ligature were marks, we call this a mark ligature. + * If it *is* a mark ligature, we don't allocate a new ligature id, and leave + * the ligature to keep its old ligature id. This will allow it to attach to + * a base ligature in GPOS. Eg. if the sequence is: LAM,LAM,SHADDA,FATHA,HEH, + * and LAM,LAM,HEH for a ligature, they will leave SHADDA and FATHA with a + * ligature id and component value of 2. Then if SHADDA,FATHA form a ligature + * later, we don't want them to lose their ligature id/component, otherwise + * GPOS will fail to correctly position the mark ligature on top of the + * LAM,LAM,HEH ligature. See: + * https://bugzilla.gnome.org/show_bug.cgi?id=676343 + * + * - If a ligature is formed of components that some of which are also ligatures + * themselves, and those ligature components had marks attached to *their* + * components, we have to attach the marks to the new ligature component + * positions! Now *that*'s tricky! And these marks may be following the + * last component of the whole sequence, so we should loop forward looking + * for them and update them. + * + * Eg. the sequence is LAM,LAM,SHADDA,FATHA,HEH, and the font first forms a + * 'calt' ligature of LAM,HEH, leaving the SHADDA and FATHA with a ligature + * id and component == 1. Now, during 'liga', the LAM and the LAM-HEH ligature + * form a LAM-LAM-HEH ligature. We need to reassign the SHADDA and FATHA to + * the new ligature with a component value of 2. + * + * This in fact happened to a font... See: + * https://bugzilla.gnome.org/show_bug.cgi?id=437633 + */ + + bool is_base_ligature = _hb_glyph_info_is_base_glyph (&buffer->info[match_positions[0]]); + bool is_mark_ligature = _hb_glyph_info_is_mark (&buffer->info[match_positions[0]]); + for (unsigned int i = 1; i < count; i++) + if (!_hb_glyph_info_is_mark (&buffer->info[match_positions[i]])) + { + is_base_ligature = false; + is_mark_ligature = false; + break; + } + bool is_ligature = !is_base_ligature && !is_mark_ligature; + + unsigned int klass = is_ligature ? HB_OT_LAYOUT_GLYPH_PROPS_LIGATURE : 0; + unsigned int lig_id = is_ligature ? _hb_allocate_lig_id (buffer) : 0; + unsigned int last_lig_id = _hb_glyph_info_get_lig_id (&buffer->cur()); + unsigned int last_num_components = _hb_glyph_info_get_lig_num_comps (&buffer->cur()); + unsigned int components_so_far = last_num_components; + + if (is_ligature) + { + _hb_glyph_info_set_lig_props_for_ligature (&buffer->cur(), lig_id, total_component_count); + if (_hb_glyph_info_get_general_category (&buffer->cur()) == HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK) + { + _hb_glyph_info_set_general_category (&buffer->cur(), HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER); + } + } + c->replace_glyph_with_ligature (lig_glyph, klass); + + for (unsigned int i = 1; i < count; i++) + { + while (buffer->idx < match_positions[i] && buffer->successful) + { + if (is_ligature) + { + unsigned int this_comp = _hb_glyph_info_get_lig_comp (&buffer->cur()); + if (this_comp == 0) + this_comp = last_num_components; + unsigned int new_lig_comp = components_so_far - last_num_components + + hb_min (this_comp, last_num_components); + _hb_glyph_info_set_lig_props_for_mark (&buffer->cur(), lig_id, new_lig_comp); + } + buffer->next_glyph (); + } + + last_lig_id = _hb_glyph_info_get_lig_id (&buffer->cur()); + last_num_components = _hb_glyph_info_get_lig_num_comps (&buffer->cur()); + components_so_far += last_num_components; + + /* Skip the base glyph */ + buffer->idx++; + } + + if (!is_mark_ligature && last_lig_id) + { + /* Re-adjust components for any marks following. */ + for (unsigned i = buffer->idx; i < buffer->len; ++i) + { + if (last_lig_id != _hb_glyph_info_get_lig_id (&buffer->info[i])) break; + + unsigned this_comp = _hb_glyph_info_get_lig_comp (&buffer->info[i]); + if (!this_comp) break; + + unsigned new_lig_comp = components_so_far - last_num_components + + hb_min (this_comp, last_num_components); + _hb_glyph_info_set_lig_props_for_mark (&buffer->info[i], lig_id, new_lig_comp); + } + } + return_trace (true); +} + +static inline bool match_backtrack (hb_ot_apply_context_t *c, + unsigned int count, + const HBUINT16 backtrack[], + match_func_t match_func, + const void *match_data, + unsigned int *match_start) +{ + TRACE_APPLY (nullptr); + + hb_ot_apply_context_t::skipping_iterator_t &skippy_iter = c->iter_context; + skippy_iter.reset (c->buffer->backtrack_len (), count); + skippy_iter.set_match_func (match_func, match_data, backtrack); + + for (unsigned int i = 0; i < count; i++) + if (!skippy_iter.prev ()) + return_trace (false); + + *match_start = skippy_iter.idx; + + return_trace (true); +} + +static inline bool match_lookahead (hb_ot_apply_context_t *c, + unsigned int count, + const HBUINT16 lookahead[], + match_func_t match_func, + const void *match_data, + unsigned int offset, + unsigned int *end_index) +{ + TRACE_APPLY (nullptr); + + hb_ot_apply_context_t::skipping_iterator_t &skippy_iter = c->iter_context; + skippy_iter.reset (c->buffer->idx + offset - 1, count); + skippy_iter.set_match_func (match_func, match_data, lookahead); + + for (unsigned int i = 0; i < count; i++) + if (!skippy_iter.next ()) + return_trace (false); + + *end_index = skippy_iter.idx + 1; + + return_trace (true); +} + + + +struct LookupRecord +{ + LookupRecord* copy (hb_serialize_context_t *c, + const hb_map_t *lookup_map) const + { + TRACE_SERIALIZE (this); + auto *out = c->embed (*this); + if (unlikely (!out)) return_trace (nullptr); + + out->lookupListIndex = hb_map_get (lookup_map, lookupListIndex); + return_trace (out); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this)); + } + + HBUINT16 sequenceIndex; /* Index into current glyph + * sequence--first glyph = 0 */ + HBUINT16 lookupListIndex; /* Lookup to apply to that + * position--zero--based */ + public: + DEFINE_SIZE_STATIC (4); +}; + +template <typename context_t> +static inline void recurse_lookups (context_t *c, + unsigned int lookupCount, + const LookupRecord lookupRecord[] /* Array of LookupRecords--in design order */) +{ + for (unsigned int i = 0; i < lookupCount; i++) + c->recurse (lookupRecord[i].lookupListIndex); +} + +static inline bool apply_lookup (hb_ot_apply_context_t *c, + unsigned int count, /* Including the first glyph */ + unsigned int match_positions[HB_MAX_CONTEXT_LENGTH], /* Including the first glyph */ + unsigned int lookupCount, + const LookupRecord lookupRecord[], /* Array of LookupRecords--in design order */ + unsigned int match_length) +{ + TRACE_APPLY (nullptr); + + hb_buffer_t *buffer = c->buffer; + int end; + + /* All positions are distance from beginning of *output* buffer. + * Adjust. */ + { + unsigned int bl = buffer->backtrack_len (); + end = bl + match_length; + + int delta = bl - buffer->idx; + /* Convert positions to new indexing. */ + for (unsigned int j = 0; j < count; j++) + match_positions[j] += delta; + } + + for (unsigned int i = 0; i < lookupCount && buffer->successful; i++) + { + unsigned int idx = lookupRecord[i].sequenceIndex; + if (idx >= count) + continue; + + /* Don't recurse to ourself at same position. + * Note that this test is too naive, it doesn't catch longer loops. */ + if (idx == 0 && lookupRecord[i].lookupListIndex == c->lookup_index) + continue; + + if (unlikely (!buffer->move_to (match_positions[idx]))) + break; + + if (unlikely (buffer->max_ops <= 0)) + break; + + unsigned int orig_len = buffer->backtrack_len () + buffer->lookahead_len (); + if (!c->recurse (lookupRecord[i].lookupListIndex)) + continue; + + unsigned int new_len = buffer->backtrack_len () + buffer->lookahead_len (); + int delta = new_len - orig_len; + + if (!delta) + continue; + + /* Recursed lookup changed buffer len. Adjust. + * + * TODO: + * + * Right now, if buffer length increased by n, we assume n new glyphs + * were added right after the current position, and if buffer length + * was decreased by n, we assume n match positions after the current + * one where removed. The former (buffer length increased) case is + * fine, but the decrease case can be improved in at least two ways, + * both of which are significant: + * + * - If recursed-to lookup is MultipleSubst and buffer length + * decreased, then it's current match position that was deleted, + * NOT the one after it. + * + * - If buffer length was decreased by n, it does not necessarily + * mean that n match positions where removed, as there might + * have been marks and default-ignorables in the sequence. We + * should instead drop match positions between current-position + * and current-position + n instead. + * + * It should be possible to construct tests for both of these cases. + */ + + end += delta; + if (end <= int (match_positions[idx])) + { + /* End might end up being smaller than match_positions[idx] if the recursed + * lookup ended up removing many items, more than we have had matched. + * Just never rewind end back and get out of here. + * https://bugs.chromium.org/p/chromium/issues/detail?id=659496 */ + end = match_positions[idx]; + /* There can't be any further changes. */ + break; + } + + unsigned int next = idx + 1; /* next now is the position after the recursed lookup. */ + + if (delta > 0) + { + if (unlikely (delta + count > HB_MAX_CONTEXT_LENGTH)) + break; + } + else + { + /* NOTE: delta is negative. */ + delta = hb_max (delta, (int) next - (int) count); + next -= delta; + } + + /* Shift! */ + memmove (match_positions + next + delta, match_positions + next, + (count - next) * sizeof (match_positions[0])); + next += delta; + count += delta; + + /* Fill in new entries. */ + for (unsigned int j = idx + 1; j < next; j++) + match_positions[j] = match_positions[j - 1] + 1; + + /* And fixup the rest. */ + for (; next < count; next++) + match_positions[next] += delta; + } + + buffer->move_to (end); + + return_trace (true); +} + + + +/* Contextual lookups */ + +struct ContextClosureLookupContext +{ + ContextClosureFuncs funcs; + const void *intersects_data; +}; + +struct ContextCollectGlyphsLookupContext +{ + ContextCollectGlyphsFuncs funcs; + const void *collect_data; +}; + +struct ContextApplyLookupContext +{ + ContextApplyFuncs funcs; + const void *match_data; +}; + +static inline bool context_intersects (const hb_set_t *glyphs, + unsigned int inputCount, /* Including the first glyph (not matched) */ + const HBUINT16 input[], /* Array of input values--start with second glyph */ + ContextClosureLookupContext &lookup_context) +{ + return array_is_subset_of (glyphs, + inputCount ? inputCount - 1 : 0, input, + lookup_context.funcs.intersects, lookup_context.intersects_data); +} + +static inline void context_closure_lookup (hb_closure_context_t *c, + unsigned int inputCount, /* Including the first glyph (not matched) */ + const HBUINT16 input[], /* Array of input values--start with second glyph */ + unsigned int lookupCount, + const LookupRecord lookupRecord[], + ContextClosureLookupContext &lookup_context) +{ + if (context_intersects (c->glyphs, + inputCount, input, + lookup_context)) + recurse_lookups (c, + lookupCount, lookupRecord); +} + +static inline void context_collect_glyphs_lookup (hb_collect_glyphs_context_t *c, + unsigned int inputCount, /* Including the first glyph (not matched) */ + const HBUINT16 input[], /* Array of input values--start with second glyph */ + unsigned int lookupCount, + const LookupRecord lookupRecord[], + ContextCollectGlyphsLookupContext &lookup_context) +{ + collect_array (c, c->input, + inputCount ? inputCount - 1 : 0, input, + lookup_context.funcs.collect, lookup_context.collect_data); + recurse_lookups (c, + lookupCount, lookupRecord); +} + +static inline bool context_would_apply_lookup (hb_would_apply_context_t *c, + unsigned int inputCount, /* Including the first glyph (not matched) */ + const HBUINT16 input[], /* Array of input values--start with second glyph */ + unsigned int lookupCount HB_UNUSED, + const LookupRecord lookupRecord[] HB_UNUSED, + ContextApplyLookupContext &lookup_context) +{ + return would_match_input (c, + inputCount, input, + lookup_context.funcs.match, lookup_context.match_data); +} +static inline bool context_apply_lookup (hb_ot_apply_context_t *c, + unsigned int inputCount, /* Including the first glyph (not matched) */ + const HBUINT16 input[], /* Array of input values--start with second glyph */ + unsigned int lookupCount, + const LookupRecord lookupRecord[], + ContextApplyLookupContext &lookup_context) +{ + unsigned int match_length = 0; + unsigned int match_positions[HB_MAX_CONTEXT_LENGTH]; + return match_input (c, + inputCount, input, + lookup_context.funcs.match, lookup_context.match_data, + &match_length, match_positions) + && (c->buffer->unsafe_to_break (c->buffer->idx, c->buffer->idx + match_length), + apply_lookup (c, + inputCount, match_positions, + lookupCount, lookupRecord, + match_length)); +} + +struct Rule +{ + bool intersects (const hb_set_t *glyphs, ContextClosureLookupContext &lookup_context) const + { + return context_intersects (glyphs, + inputCount, inputZ.arrayZ, + lookup_context); + } + + void closure (hb_closure_context_t *c, ContextClosureLookupContext &lookup_context) const + { + if (unlikely (c->lookup_limit_exceeded ())) return; + + const UnsizedArrayOf<LookupRecord> &lookupRecord = StructAfter<UnsizedArrayOf<LookupRecord>> + (inputZ.as_array ((inputCount ? inputCount - 1 : 0))); + context_closure_lookup (c, + inputCount, inputZ.arrayZ, + lookupCount, lookupRecord.arrayZ, + lookup_context); + } + + void closure_lookups (hb_closure_lookups_context_t *c) const + { + if (unlikely (c->lookup_limit_exceeded ())) return; + + const UnsizedArrayOf<LookupRecord> &lookupRecord = StructAfter<UnsizedArrayOf<LookupRecord>> + (inputZ.as_array (inputCount ? inputCount - 1 : 0)); + recurse_lookups (c, lookupCount, lookupRecord.arrayZ); + } + + void collect_glyphs (hb_collect_glyphs_context_t *c, + ContextCollectGlyphsLookupContext &lookup_context) const + { + const UnsizedArrayOf<LookupRecord> &lookupRecord = StructAfter<UnsizedArrayOf<LookupRecord>> + (inputZ.as_array (inputCount ? inputCount - 1 : 0)); + context_collect_glyphs_lookup (c, + inputCount, inputZ.arrayZ, + lookupCount, lookupRecord.arrayZ, + lookup_context); + } + + bool would_apply (hb_would_apply_context_t *c, + ContextApplyLookupContext &lookup_context) const + { + const UnsizedArrayOf<LookupRecord> &lookupRecord = StructAfter<UnsizedArrayOf<LookupRecord>> + (inputZ.as_array (inputCount ? inputCount - 1 : 0)); + return context_would_apply_lookup (c, + inputCount, inputZ.arrayZ, + lookupCount, lookupRecord.arrayZ, + lookup_context); + } + + bool apply (hb_ot_apply_context_t *c, + ContextApplyLookupContext &lookup_context) const + { + TRACE_APPLY (this); + const UnsizedArrayOf<LookupRecord> &lookupRecord = StructAfter<UnsizedArrayOf<LookupRecord>> + (inputZ.as_array (inputCount ? inputCount - 1 : 0)); + return_trace (context_apply_lookup (c, inputCount, inputZ.arrayZ, lookupCount, lookupRecord.arrayZ, lookup_context)); + } + + bool serialize (hb_serialize_context_t *c, + const hb_map_t *input_mapping, /* old->new glyphid or class mapping */ + const hb_map_t *lookup_map) const + { + TRACE_SERIALIZE (this); + auto *out = c->start_embed (this); + if (unlikely (!c->extend_min (out))) return_trace (false); + + out->inputCount = inputCount; + out->lookupCount = lookupCount; + + const hb_array_t<const HBUINT16> input = inputZ.as_array (inputCount - 1); + for (const auto org : input) + { + HBUINT16 d; + d = input_mapping->get (org); + c->copy (d); + } + + const UnsizedArrayOf<LookupRecord> &lookupRecord = StructAfter<UnsizedArrayOf<LookupRecord>> + (inputZ.as_array ((inputCount ? inputCount - 1 : 0))); + for (unsigned i = 0; i < (unsigned) lookupCount; i++) + c->copy (lookupRecord[i], lookup_map); + + return_trace (true); + } + + bool subset (hb_subset_context_t *c, + const hb_map_t *lookup_map, + const hb_map_t *klass_map = nullptr) const + { + TRACE_SUBSET (this); + + const hb_array_t<const HBUINT16> input = inputZ.as_array ((inputCount ? inputCount - 1 : 0)); + if (!input.length) return_trace (false); + + const hb_map_t *mapping = klass_map == nullptr ? c->plan->glyph_map : klass_map; + if (!hb_all (input, mapping)) return_trace (false); + return_trace (serialize (c->serializer, mapping, lookup_map)); + } + + public: + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (inputCount.sanitize (c) && + lookupCount.sanitize (c) && + c->check_range (inputZ.arrayZ, + inputZ.item_size * (inputCount ? inputCount - 1 : 0) + + LookupRecord::static_size * lookupCount)); + } + + protected: + HBUINT16 inputCount; /* Total number of glyphs in input + * glyph sequence--includes the first + * glyph */ + HBUINT16 lookupCount; /* Number of LookupRecords */ + UnsizedArrayOf<HBUINT16> + inputZ; /* Array of match inputs--start with + * second glyph */ +/*UnsizedArrayOf<LookupRecord> + lookupRecordX;*/ /* Array of LookupRecords--in + * design order */ + public: + DEFINE_SIZE_ARRAY (4, inputZ); +}; + +struct RuleSet +{ + bool intersects (const hb_set_t *glyphs, + ContextClosureLookupContext &lookup_context) const + { + return + + hb_iter (rule) + | hb_map (hb_add (this)) + | hb_map ([&] (const Rule &_) { return _.intersects (glyphs, lookup_context); }) + | hb_any + ; + } + + void closure (hb_closure_context_t *c, + ContextClosureLookupContext &lookup_context) const + { + if (unlikely (c->lookup_limit_exceeded ())) return; + + return + + hb_iter (rule) + | hb_map (hb_add (this)) + | hb_apply ([&] (const Rule &_) { _.closure (c, lookup_context); }) + ; + } + + void closure_lookups (hb_closure_lookups_context_t *c) const + { + if (unlikely (c->lookup_limit_exceeded ())) return; + + return + + hb_iter (rule) + | hb_map (hb_add (this)) + | hb_apply ([&] (const Rule &_) { _.closure_lookups (c); }) + ; + } + + void collect_glyphs (hb_collect_glyphs_context_t *c, + ContextCollectGlyphsLookupContext &lookup_context) const + { + return + + hb_iter (rule) + | hb_map (hb_add (this)) + | hb_apply ([&] (const Rule &_) { _.collect_glyphs (c, lookup_context); }) + ; + } + + bool would_apply (hb_would_apply_context_t *c, + ContextApplyLookupContext &lookup_context) const + { + return + + hb_iter (rule) + | hb_map (hb_add (this)) + | hb_map ([&] (const Rule &_) { return _.would_apply (c, lookup_context); }) + | hb_any + ; + } + + bool apply (hb_ot_apply_context_t *c, + ContextApplyLookupContext &lookup_context) const + { + TRACE_APPLY (this); + return_trace ( + + hb_iter (rule) + | hb_map (hb_add (this)) + | hb_map ([&] (const Rule &_) { return _.apply (c, lookup_context); }) + | hb_any + ) + ; + } + + bool subset (hb_subset_context_t *c, + const hb_map_t *lookup_map, + const hb_map_t *klass_map = nullptr) const + { + TRACE_SUBSET (this); + + auto snap = c->serializer->snapshot (); + auto *out = c->serializer->start_embed (*this); + if (unlikely (!c->serializer->extend_min (out))) return_trace (false); + + for (const OffsetTo<Rule>& _ : rule) + { + if (!_) continue; + auto *o = out->rule.serialize_append (c->serializer); + if (unlikely (!o)) continue; + + auto o_snap = c->serializer->snapshot (); + if (!o->serialize_subset (c, _, this, lookup_map, klass_map)) + { + out->rule.pop (); + c->serializer->revert (o_snap); + } + } + + bool ret = bool (out->rule); + if (!ret) c->serializer->revert (snap); + + return_trace (ret); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (rule.sanitize (c, this)); + } + + protected: + OffsetArrayOf<Rule> + rule; /* Array of Rule tables + * ordered by preference */ + public: + DEFINE_SIZE_ARRAY (2, rule); +}; + + +struct ContextFormat1 +{ + bool intersects (const hb_set_t *glyphs) const + { + struct ContextClosureLookupContext lookup_context = { + {intersects_glyph}, + nullptr + }; + + return + + hb_zip (this+coverage, ruleSet) + | hb_filter (*glyphs, hb_first) + | hb_map (hb_second) + | hb_map (hb_add (this)) + | hb_map ([&] (const RuleSet &_) { return _.intersects (glyphs, lookup_context); }) + | hb_any + ; + } + + void closure (hb_closure_context_t *c) const + { + struct ContextClosureLookupContext lookup_context = { + {intersects_glyph}, + nullptr + }; + + + hb_zip (this+coverage, ruleSet) + | hb_filter (*c->glyphs, hb_first) + | hb_map (hb_second) + | hb_map (hb_add (this)) + | hb_apply ([&] (const RuleSet &_) { _.closure (c, lookup_context); }) + ; + } + + void closure_lookups (hb_closure_lookups_context_t *c) const + { + + hb_iter (ruleSet) + | hb_map (hb_add (this)) + | hb_apply ([&] (const RuleSet &_) { _.closure_lookups (c); }) + ; + } + + void collect_variation_indices (hb_collect_variation_indices_context_t *c) const {} + + void collect_glyphs (hb_collect_glyphs_context_t *c) const + { + (this+coverage).collect_coverage (c->input); + + struct ContextCollectGlyphsLookupContext lookup_context = { + {collect_glyph}, + nullptr + }; + + + hb_iter (ruleSet) + | hb_map (hb_add (this)) + | hb_apply ([&] (const RuleSet &_) { _.collect_glyphs (c, lookup_context); }) + ; + } + + bool would_apply (hb_would_apply_context_t *c) const + { + const RuleSet &rule_set = this+ruleSet[(this+coverage).get_coverage (c->glyphs[0])]; + struct ContextApplyLookupContext lookup_context = { + {match_glyph}, + nullptr + }; + return rule_set.would_apply (c, lookup_context); + } + + const Coverage &get_coverage () const { return this+coverage; } + + bool apply (hb_ot_apply_context_t *c) const + { + TRACE_APPLY (this); + unsigned int index = (this+coverage).get_coverage (c->buffer->cur().codepoint); + if (likely (index == NOT_COVERED)) + return_trace (false); + + const RuleSet &rule_set = this+ruleSet[index]; + struct ContextApplyLookupContext lookup_context = { + {match_glyph}, + nullptr + }; + return_trace (rule_set.apply (c, lookup_context)); + } + + bool subset (hb_subset_context_t *c) const + { + TRACE_SUBSET (this); + const hb_set_t &glyphset = *c->plan->glyphset (); + const hb_map_t &glyph_map = *c->plan->glyph_map; + + auto *out = c->serializer->start_embed (*this); + if (unlikely (!c->serializer->extend_min (out))) return_trace (false); + out->format = format; + + const hb_map_t *lookup_map = c->table_tag == HB_OT_TAG_GSUB ? c->plan->gsub_lookups : c->plan->gpos_lookups; + hb_sorted_vector_t<hb_codepoint_t> new_coverage; + + hb_zip (this+coverage, ruleSet) + | hb_filter (glyphset, hb_first) + | hb_filter (subset_offset_array (c, out->ruleSet, this, lookup_map), hb_second) + | hb_map (hb_first) + | hb_map (glyph_map) + | hb_sink (new_coverage) + ; + + out->coverage.serialize (c->serializer, out) + .serialize (c->serializer, new_coverage.iter ()); + return_trace (bool (new_coverage)); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (coverage.sanitize (c, this) && ruleSet.sanitize (c, this)); + } + + protected: + HBUINT16 format; /* Format identifier--format = 1 */ + OffsetTo<Coverage> + coverage; /* Offset to Coverage table--from + * beginning of table */ + OffsetArrayOf<RuleSet> + ruleSet; /* Array of RuleSet tables + * ordered by Coverage Index */ + public: + DEFINE_SIZE_ARRAY (6, ruleSet); +}; + + +struct ContextFormat2 +{ + bool intersects (const hb_set_t *glyphs) const + { + if (!(this+coverage).intersects (glyphs)) + return false; + + const ClassDef &class_def = this+classDef; + + struct ContextClosureLookupContext lookup_context = { + {intersects_class}, + &class_def + }; + + return + + hb_iter (ruleSet) + | hb_map (hb_add (this)) + | hb_enumerate + | hb_map ([&] (const hb_pair_t<unsigned, const RuleSet &> p) + { return class_def.intersects_class (glyphs, p.first) && + p.second.intersects (glyphs, lookup_context); }) + | hb_any + ; + } + + void closure (hb_closure_context_t *c) const + { + if (!(this+coverage).intersects (c->glyphs)) + return; + + const ClassDef &class_def = this+classDef; + + struct ContextClosureLookupContext lookup_context = { + {intersects_class}, + &class_def + }; + + return + + hb_enumerate (ruleSet) + | hb_filter ([&] (unsigned _) + { return class_def.intersects_class (c->glyphs, _); }, + hb_first) + | hb_map (hb_second) + | hb_map (hb_add (this)) + | hb_apply ([&] (const RuleSet &_) { _.closure (c, lookup_context); }) + ; + } + + void closure_lookups (hb_closure_lookups_context_t *c) const + { + + hb_iter (ruleSet) + | hb_map (hb_add (this)) + | hb_apply ([&] (const RuleSet &_) { _.closure_lookups (c); }) + ; + } + + void collect_variation_indices (hb_collect_variation_indices_context_t *c) const {} + + void collect_glyphs (hb_collect_glyphs_context_t *c) const + { + (this+coverage).collect_coverage (c->input); + + const ClassDef &class_def = this+classDef; + struct ContextCollectGlyphsLookupContext lookup_context = { + {collect_class}, + &class_def + }; + + + hb_iter (ruleSet) + | hb_map (hb_add (this)) + | hb_apply ([&] (const RuleSet &_) { _.collect_glyphs (c, lookup_context); }) + ; + } + + bool would_apply (hb_would_apply_context_t *c) const + { + const ClassDef &class_def = this+classDef; + unsigned int index = class_def.get_class (c->glyphs[0]); + const RuleSet &rule_set = this+ruleSet[index]; + struct ContextApplyLookupContext lookup_context = { + {match_class}, + &class_def + }; + return rule_set.would_apply (c, lookup_context); + } + + const Coverage &get_coverage () const { return this+coverage; } + + bool apply (hb_ot_apply_context_t *c) const + { + TRACE_APPLY (this); + unsigned int index = (this+coverage).get_coverage (c->buffer->cur().codepoint); + if (likely (index == NOT_COVERED)) return_trace (false); + + const ClassDef &class_def = this+classDef; + index = class_def.get_class (c->buffer->cur().codepoint); + const RuleSet &rule_set = this+ruleSet[index]; + struct ContextApplyLookupContext lookup_context = { + {match_class}, + &class_def + }; + return_trace (rule_set.apply (c, lookup_context)); + } + + bool subset (hb_subset_context_t *c) const + { + TRACE_SUBSET (this); + auto *out = c->serializer->start_embed (*this); + if (unlikely (!c->serializer->extend_min (out))) return_trace (false); + out->format = format; + if (unlikely (!out->coverage.serialize_subset (c, coverage, this))) + return_trace (false); + + hb_map_t klass_map; + out->classDef.serialize_subset (c, classDef, this, &klass_map); + + const hb_map_t *lookup_map = c->table_tag == HB_OT_TAG_GSUB ? c->plan->gsub_lookups : c->plan->gpos_lookups; + bool ret = true; + unsigned non_zero_index = 0, index = 0; + for (const hb_pair_t<unsigned, const OffsetTo<RuleSet>&> _ : + hb_enumerate (ruleSet) + | hb_filter (klass_map, hb_first)) + { + auto *o = out->ruleSet.serialize_append (c->serializer); + if (unlikely (!o)) + { + ret = false; + break; + } + + if (o->serialize_subset (c, _.second, this, lookup_map, &klass_map)) + non_zero_index = index; + + index++; + } + + if (!ret) return_trace (ret); + + //prune empty trailing ruleSets + --index; + while (index > non_zero_index) + { + out->ruleSet.pop (); + index--; + } + + return_trace (bool (out->ruleSet)); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (coverage.sanitize (c, this) && classDef.sanitize (c, this) && ruleSet.sanitize (c, this)); + } + + protected: + HBUINT16 format; /* Format identifier--format = 2 */ + OffsetTo<Coverage> + coverage; /* Offset to Coverage table--from + * beginning of table */ + OffsetTo<ClassDef> + classDef; /* Offset to glyph ClassDef table--from + * beginning of table */ + OffsetArrayOf<RuleSet> + ruleSet; /* Array of RuleSet tables + * ordered by class */ + public: + DEFINE_SIZE_ARRAY (8, ruleSet); +}; + + +struct ContextFormat3 +{ + bool intersects (const hb_set_t *glyphs) const + { + if (!(this+coverageZ[0]).intersects (glyphs)) + return false; + + struct ContextClosureLookupContext lookup_context = { + {intersects_coverage}, + this + }; + return context_intersects (glyphs, + glyphCount, (const HBUINT16 *) (coverageZ.arrayZ + 1), + lookup_context); + } + + void closure (hb_closure_context_t *c) const + { + if (!(this+coverageZ[0]).intersects (c->glyphs)) + return; + + const LookupRecord *lookupRecord = &StructAfter<LookupRecord> (coverageZ.as_array (glyphCount)); + struct ContextClosureLookupContext lookup_context = { + {intersects_coverage}, + this + }; + context_closure_lookup (c, + glyphCount, (const HBUINT16 *) (coverageZ.arrayZ + 1), + lookupCount, lookupRecord, + lookup_context); + } + + void closure_lookups (hb_closure_lookups_context_t *c) const + { + const LookupRecord *lookupRecord = &StructAfter<LookupRecord> (coverageZ.as_array (glyphCount)); + recurse_lookups (c, lookupCount, lookupRecord); + } + + void collect_variation_indices (hb_collect_variation_indices_context_t *c) const {} + + void collect_glyphs (hb_collect_glyphs_context_t *c) const + { + (this+coverageZ[0]).collect_coverage (c->input); + + const LookupRecord *lookupRecord = &StructAfter<LookupRecord> (coverageZ.as_array (glyphCount)); + struct ContextCollectGlyphsLookupContext lookup_context = { + {collect_coverage}, + this + }; + + context_collect_glyphs_lookup (c, + glyphCount, (const HBUINT16 *) (coverageZ.arrayZ + 1), + lookupCount, lookupRecord, + lookup_context); + } + + bool would_apply (hb_would_apply_context_t *c) const + { + const LookupRecord *lookupRecord = &StructAfter<LookupRecord> (coverageZ.as_array (glyphCount)); + struct ContextApplyLookupContext lookup_context = { + {match_coverage}, + this + }; + return context_would_apply_lookup (c, + glyphCount, (const HBUINT16 *) (coverageZ.arrayZ + 1), + lookupCount, lookupRecord, + lookup_context); + } + + const Coverage &get_coverage () const { return this+coverageZ[0]; } + + bool apply (hb_ot_apply_context_t *c) const + { + TRACE_APPLY (this); + unsigned int index = (this+coverageZ[0]).get_coverage (c->buffer->cur().codepoint); + if (likely (index == NOT_COVERED)) return_trace (false); + + const LookupRecord *lookupRecord = &StructAfter<LookupRecord> (coverageZ.as_array (glyphCount)); + struct ContextApplyLookupContext lookup_context = { + {match_coverage}, + this + }; + return_trace (context_apply_lookup (c, glyphCount, (const HBUINT16 *) (coverageZ.arrayZ + 1), lookupCount, lookupRecord, lookup_context)); + } + + bool subset (hb_subset_context_t *c) const + { + TRACE_SUBSET (this); + auto *out = c->serializer->start_embed (this); + if (unlikely (!c->serializer->extend_min (out))) return_trace (false); + + out->format = format; + out->glyphCount = glyphCount; + out->lookupCount = lookupCount; + + auto coverages = coverageZ.as_array (glyphCount); + + for (const OffsetTo<Coverage>& offset : coverages) + { + auto *o = c->serializer->allocate_size<OffsetTo<Coverage>> (OffsetTo<Coverage>::static_size); + if (unlikely (!o)) return_trace (false); + if (!o->serialize_subset (c, offset, this)) return_trace (false); + } + + const LookupRecord *lookupRecord = &StructAfter<LookupRecord> (coverageZ.as_array (glyphCount)); + const hb_map_t *lookup_map = c->table_tag == HB_OT_TAG_GSUB ? c->plan->gsub_lookups : c->plan->gpos_lookups; + for (unsigned i = 0; i < (unsigned) lookupCount; i++) + c->serializer->copy (lookupRecord[i], lookup_map); + + return_trace (true); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + if (!c->check_struct (this)) return_trace (false); + unsigned int count = glyphCount; + if (!count) return_trace (false); /* We want to access coverageZ[0] freely. */ + if (!c->check_array (coverageZ.arrayZ, count)) return_trace (false); + for (unsigned int i = 0; i < count; i++) + if (!coverageZ[i].sanitize (c, this)) return_trace (false); + const LookupRecord *lookupRecord = &StructAfter<LookupRecord> (coverageZ.as_array (glyphCount)); + return_trace (c->check_array (lookupRecord, lookupCount)); + } + + protected: + HBUINT16 format; /* Format identifier--format = 3 */ + HBUINT16 glyphCount; /* Number of glyphs in the input glyph + * sequence */ + HBUINT16 lookupCount; /* Number of LookupRecords */ + UnsizedArrayOf<OffsetTo<Coverage>> + coverageZ; /* Array of offsets to Coverage + * table in glyph sequence order */ +/*UnsizedArrayOf<LookupRecord> + lookupRecordX;*/ /* Array of LookupRecords--in + * design order */ + public: + DEFINE_SIZE_ARRAY (6, coverageZ); +}; + +struct Context +{ + template <typename context_t, typename ...Ts> + typename context_t::return_t dispatch (context_t *c, Ts&&... ds) const + { + TRACE_DISPATCH (this, u.format); + if (unlikely (!c->may_dispatch (this, &u.format))) return_trace (c->no_dispatch_return_value ()); + switch (u.format) { + case 1: return_trace (c->dispatch (u.format1, hb_forward<Ts> (ds)...)); + case 2: return_trace (c->dispatch (u.format2, hb_forward<Ts> (ds)...)); + case 3: return_trace (c->dispatch (u.format3, hb_forward<Ts> (ds)...)); + default:return_trace (c->default_return_value ()); + } + } + + protected: + union { + HBUINT16 format; /* Format identifier */ + ContextFormat1 format1; + ContextFormat2 format2; + ContextFormat3 format3; + } u; +}; + + +/* Chaining Contextual lookups */ + +struct ChainContextClosureLookupContext +{ + ContextClosureFuncs funcs; + const void *intersects_data[3]; +}; + +struct ChainContextCollectGlyphsLookupContext +{ + ContextCollectGlyphsFuncs funcs; + const void *collect_data[3]; +}; + +struct ChainContextApplyLookupContext +{ + ContextApplyFuncs funcs; + const void *match_data[3]; +}; + +static inline bool chain_context_intersects (const hb_set_t *glyphs, + unsigned int backtrackCount, + const HBUINT16 backtrack[], + unsigned int inputCount, /* Including the first glyph (not matched) */ + const HBUINT16 input[], /* Array of input values--start with second glyph */ + unsigned int lookaheadCount, + const HBUINT16 lookahead[], + ChainContextClosureLookupContext &lookup_context) +{ + return array_is_subset_of (glyphs, + backtrackCount, backtrack, + lookup_context.funcs.intersects, lookup_context.intersects_data[0]) + && array_is_subset_of (glyphs, + inputCount ? inputCount - 1 : 0, input, + lookup_context.funcs.intersects, lookup_context.intersects_data[1]) + && array_is_subset_of (glyphs, + lookaheadCount, lookahead, + lookup_context.funcs.intersects, lookup_context.intersects_data[2]); +} + +static inline void chain_context_closure_lookup (hb_closure_context_t *c, + unsigned int backtrackCount, + const HBUINT16 backtrack[], + unsigned int inputCount, /* Including the first glyph (not matched) */ + const HBUINT16 input[], /* Array of input values--start with second glyph */ + unsigned int lookaheadCount, + const HBUINT16 lookahead[], + unsigned int lookupCount, + const LookupRecord lookupRecord[], + ChainContextClosureLookupContext &lookup_context) +{ + if (chain_context_intersects (c->glyphs, + backtrackCount, backtrack, + inputCount, input, + lookaheadCount, lookahead, + lookup_context)) + recurse_lookups (c, + lookupCount, lookupRecord); +} + +static inline void chain_context_collect_glyphs_lookup (hb_collect_glyphs_context_t *c, + unsigned int backtrackCount, + const HBUINT16 backtrack[], + unsigned int inputCount, /* Including the first glyph (not matched) */ + const HBUINT16 input[], /* Array of input values--start with second glyph */ + unsigned int lookaheadCount, + const HBUINT16 lookahead[], + unsigned int lookupCount, + const LookupRecord lookupRecord[], + ChainContextCollectGlyphsLookupContext &lookup_context) +{ + collect_array (c, c->before, + backtrackCount, backtrack, + lookup_context.funcs.collect, lookup_context.collect_data[0]); + collect_array (c, c->input, + inputCount ? inputCount - 1 : 0, input, + lookup_context.funcs.collect, lookup_context.collect_data[1]); + collect_array (c, c->after, + lookaheadCount, lookahead, + lookup_context.funcs.collect, lookup_context.collect_data[2]); + recurse_lookups (c, + lookupCount, lookupRecord); +} + +static inline bool chain_context_would_apply_lookup (hb_would_apply_context_t *c, + unsigned int backtrackCount, + const HBUINT16 backtrack[] HB_UNUSED, + unsigned int inputCount, /* Including the first glyph (not matched) */ + const HBUINT16 input[], /* Array of input values--start with second glyph */ + unsigned int lookaheadCount, + const HBUINT16 lookahead[] HB_UNUSED, + unsigned int lookupCount HB_UNUSED, + const LookupRecord lookupRecord[] HB_UNUSED, + ChainContextApplyLookupContext &lookup_context) +{ + return (c->zero_context ? !backtrackCount && !lookaheadCount : true) + && would_match_input (c, + inputCount, input, + lookup_context.funcs.match, lookup_context.match_data[1]); +} + +static inline bool chain_context_apply_lookup (hb_ot_apply_context_t *c, + unsigned int backtrackCount, + const HBUINT16 backtrack[], + unsigned int inputCount, /* Including the first glyph (not matched) */ + const HBUINT16 input[], /* Array of input values--start with second glyph */ + unsigned int lookaheadCount, + const HBUINT16 lookahead[], + unsigned int lookupCount, + const LookupRecord lookupRecord[], + ChainContextApplyLookupContext &lookup_context) +{ + unsigned int start_index = 0, match_length = 0, end_index = 0; + unsigned int match_positions[HB_MAX_CONTEXT_LENGTH]; + return match_input (c, + inputCount, input, + lookup_context.funcs.match, lookup_context.match_data[1], + &match_length, match_positions) + && match_backtrack (c, + backtrackCount, backtrack, + lookup_context.funcs.match, lookup_context.match_data[0], + &start_index) + && match_lookahead (c, + lookaheadCount, lookahead, + lookup_context.funcs.match, lookup_context.match_data[2], + match_length, &end_index) + && (c->buffer->unsafe_to_break_from_outbuffer (start_index, end_index), + apply_lookup (c, + inputCount, match_positions, + lookupCount, lookupRecord, + match_length)); +} + +struct ChainRule +{ + bool intersects (const hb_set_t *glyphs, ChainContextClosureLookupContext &lookup_context) const + { + const HeadlessArrayOf<HBUINT16> &input = StructAfter<HeadlessArrayOf<HBUINT16>> (backtrack); + const ArrayOf<HBUINT16> &lookahead = StructAfter<ArrayOf<HBUINT16>> (input); + return chain_context_intersects (glyphs, + backtrack.len, backtrack.arrayZ, + input.lenP1, input.arrayZ, + lookahead.len, lookahead.arrayZ, + lookup_context); + } + + void closure (hb_closure_context_t *c, + ChainContextClosureLookupContext &lookup_context) const + { + if (unlikely (c->lookup_limit_exceeded ())) return; + + const HeadlessArrayOf<HBUINT16> &input = StructAfter<HeadlessArrayOf<HBUINT16>> (backtrack); + const ArrayOf<HBUINT16> &lookahead = StructAfter<ArrayOf<HBUINT16>> (input); + const ArrayOf<LookupRecord> &lookup = StructAfter<ArrayOf<LookupRecord>> (lookahead); + chain_context_closure_lookup (c, + backtrack.len, backtrack.arrayZ, + input.lenP1, input.arrayZ, + lookahead.len, lookahead.arrayZ, + lookup.len, lookup.arrayZ, + lookup_context); + } + + void closure_lookups (hb_closure_lookups_context_t *c) const + { + if (unlikely (c->lookup_limit_exceeded ())) return; + + const HeadlessArrayOf<HBUINT16> &input = StructAfter<HeadlessArrayOf<HBUINT16>> (backtrack); + const ArrayOf<HBUINT16> &lookahead = StructAfter<ArrayOf<HBUINT16>> (input); + const ArrayOf<LookupRecord> &lookup = StructAfter<ArrayOf<LookupRecord>> (lookahead); + recurse_lookups (c, lookup.len, lookup.arrayZ); + } + + void collect_glyphs (hb_collect_glyphs_context_t *c, + ChainContextCollectGlyphsLookupContext &lookup_context) const + { + const HeadlessArrayOf<HBUINT16> &input = StructAfter<HeadlessArrayOf<HBUINT16>> (backtrack); + const ArrayOf<HBUINT16> &lookahead = StructAfter<ArrayOf<HBUINT16>> (input); + const ArrayOf<LookupRecord> &lookup = StructAfter<ArrayOf<LookupRecord>> (lookahead); + chain_context_collect_glyphs_lookup (c, + backtrack.len, backtrack.arrayZ, + input.lenP1, input.arrayZ, + lookahead.len, lookahead.arrayZ, + lookup.len, lookup.arrayZ, + lookup_context); + } + + bool would_apply (hb_would_apply_context_t *c, + ChainContextApplyLookupContext &lookup_context) const + { + const HeadlessArrayOf<HBUINT16> &input = StructAfter<HeadlessArrayOf<HBUINT16>> (backtrack); + const ArrayOf<HBUINT16> &lookahead = StructAfter<ArrayOf<HBUINT16>> (input); + const ArrayOf<LookupRecord> &lookup = StructAfter<ArrayOf<LookupRecord>> (lookahead); + return chain_context_would_apply_lookup (c, + backtrack.len, backtrack.arrayZ, + input.lenP1, input.arrayZ, + lookahead.len, lookahead.arrayZ, lookup.len, + lookup.arrayZ, lookup_context); + } + + bool apply (hb_ot_apply_context_t *c, ChainContextApplyLookupContext &lookup_context) const + { + TRACE_APPLY (this); + const HeadlessArrayOf<HBUINT16> &input = StructAfter<HeadlessArrayOf<HBUINT16>> (backtrack); + const ArrayOf<HBUINT16> &lookahead = StructAfter<ArrayOf<HBUINT16>> (input); + const ArrayOf<LookupRecord> &lookup = StructAfter<ArrayOf<LookupRecord>> (lookahead); + return_trace (chain_context_apply_lookup (c, + backtrack.len, backtrack.arrayZ, + input.lenP1, input.arrayZ, + lookahead.len, lookahead.arrayZ, lookup.len, + lookup.arrayZ, lookup_context)); + } + + template<typename Iterator, + hb_requires (hb_is_iterator (Iterator))> + void serialize_array (hb_serialize_context_t *c, + HBUINT16 len, + Iterator it) const + { + c->copy (len); + for (const auto g : it) + { + HBUINT16 gid; + gid = g; + c->copy (gid); + } + } + + ChainRule* copy (hb_serialize_context_t *c, + const hb_map_t *lookup_map, + const hb_map_t *backtrack_map, + const hb_map_t *input_map = nullptr, + const hb_map_t *lookahead_map = nullptr) const + { + TRACE_SERIALIZE (this); + auto *out = c->start_embed (this); + if (unlikely (!out)) return_trace (nullptr); + + const hb_map_t *mapping = backtrack_map; + serialize_array (c, backtrack.len, + backtrack.iter () + | hb_map (mapping)); + + const HeadlessArrayOf<HBUINT16> &input = StructAfter<HeadlessArrayOf<HBUINT16>> (backtrack); + if (input_map) mapping = input_map; + serialize_array (c, input.lenP1, + input.iter () + | hb_map (mapping)); + + const ArrayOf<HBUINT16> &lookahead = StructAfter<ArrayOf<HBUINT16>> (input); + if (lookahead_map) mapping = lookahead_map; + serialize_array (c, lookahead.len, + lookahead.iter () + | hb_map (mapping)); + + const ArrayOf<LookupRecord> &lookupRecord = StructAfter<ArrayOf<LookupRecord>> (lookahead); + HBUINT16 lookupCount; + lookupCount = lookupRecord.len; + if (!c->copy (lookupCount)) return_trace (nullptr); + + for (unsigned i = 0; i < (unsigned) lookupCount; i++) + if (!c->copy (lookupRecord[i], lookup_map)) return_trace (nullptr); + + return_trace (out); + } + + bool subset (hb_subset_context_t *c, + const hb_map_t *lookup_map, + const hb_map_t *backtrack_map = nullptr, + const hb_map_t *input_map = nullptr, + const hb_map_t *lookahead_map = nullptr) const + { + TRACE_SUBSET (this); + + const HeadlessArrayOf<HBUINT16> &input = StructAfter<HeadlessArrayOf<HBUINT16>> (backtrack); + const ArrayOf<HBUINT16> &lookahead = StructAfter<ArrayOf<HBUINT16>> (input); + + if (!backtrack_map) + { + const hb_set_t &glyphset = *c->plan->glyphset (); + if (!hb_all (backtrack, glyphset) || + !hb_all (input, glyphset) || + !hb_all (lookahead, glyphset)) + return_trace (false); + + copy (c->serializer, lookup_map, c->plan->glyph_map); + } + else + { + if (!hb_all (backtrack, backtrack_map) || + !hb_all (input, input_map) || + !hb_all (lookahead, lookahead_map)) + return_trace (false); + + copy (c->serializer, lookup_map, backtrack_map, input_map, lookahead_map); + } + + return_trace (true); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + if (!backtrack.sanitize (c)) return_trace (false); + const HeadlessArrayOf<HBUINT16> &input = StructAfter<HeadlessArrayOf<HBUINT16>> (backtrack); + if (!input.sanitize (c)) return_trace (false); + const ArrayOf<HBUINT16> &lookahead = StructAfter<ArrayOf<HBUINT16>> (input); + if (!lookahead.sanitize (c)) return_trace (false); + const ArrayOf<LookupRecord> &lookup = StructAfter<ArrayOf<LookupRecord>> (lookahead); + return_trace (lookup.sanitize (c)); + } + + protected: + ArrayOf<HBUINT16> + backtrack; /* Array of backtracking values + * (to be matched before the input + * sequence) */ + HeadlessArrayOf<HBUINT16> + inputX; /* Array of input values (start with + * second glyph) */ + ArrayOf<HBUINT16> + lookaheadX; /* Array of lookahead values's (to be + * matched after the input sequence) */ + ArrayOf<LookupRecord> + lookupX; /* Array of LookupRecords--in + * design order) */ + public: + DEFINE_SIZE_MIN (8); +}; + +struct ChainRuleSet +{ + bool intersects (const hb_set_t *glyphs, ChainContextClosureLookupContext &lookup_context) const + { + return + + hb_iter (rule) + | hb_map (hb_add (this)) + | hb_map ([&] (const ChainRule &_) { return _.intersects (glyphs, lookup_context); }) + | hb_any + ; + } + void closure (hb_closure_context_t *c, ChainContextClosureLookupContext &lookup_context) const + { + if (unlikely (c->lookup_limit_exceeded ())) return; + + return + + hb_iter (rule) + | hb_map (hb_add (this)) + | hb_apply ([&] (const ChainRule &_) { _.closure (c, lookup_context); }) + ; + } + + void closure_lookups (hb_closure_lookups_context_t *c) const + { + if (unlikely (c->lookup_limit_exceeded ())) return; + + return + + hb_iter (rule) + | hb_map (hb_add (this)) + | hb_apply ([&] (const ChainRule &_) { _.closure_lookups (c); }) + ; + } + + void collect_glyphs (hb_collect_glyphs_context_t *c, ChainContextCollectGlyphsLookupContext &lookup_context) const + { + return + + hb_iter (rule) + | hb_map (hb_add (this)) + | hb_apply ([&] (const ChainRule &_) { _.collect_glyphs (c, lookup_context); }) + ; + } + + bool would_apply (hb_would_apply_context_t *c, ChainContextApplyLookupContext &lookup_context) const + { + return + + hb_iter (rule) + | hb_map (hb_add (this)) + | hb_map ([&] (const ChainRule &_) { return _.would_apply (c, lookup_context); }) + | hb_any + ; + } + + bool apply (hb_ot_apply_context_t *c, ChainContextApplyLookupContext &lookup_context) const + { + TRACE_APPLY (this); + return_trace ( + + hb_iter (rule) + | hb_map (hb_add (this)) + | hb_map ([&] (const ChainRule &_) { return _.apply (c, lookup_context); }) + | hb_any + ) + ; + } + + bool subset (hb_subset_context_t *c, + const hb_map_t *lookup_map, + const hb_map_t *backtrack_klass_map = nullptr, + const hb_map_t *input_klass_map = nullptr, + const hb_map_t *lookahead_klass_map = nullptr) const + { + TRACE_SUBSET (this); + + auto snap = c->serializer->snapshot (); + auto *out = c->serializer->start_embed (*this); + if (unlikely (!c->serializer->extend_min (out))) return_trace (false); + + for (const OffsetTo<ChainRule>& _ : rule) + { + if (!_) continue; + auto *o = out->rule.serialize_append (c->serializer); + if (unlikely (!o)) continue; + + auto o_snap = c->serializer->snapshot (); + if (!o->serialize_subset (c, _, this, + lookup_map, + backtrack_klass_map, + input_klass_map, + lookahead_klass_map)) + { + out->rule.pop (); + c->serializer->revert (o_snap); + } + } + + bool ret = bool (out->rule); + if (!ret) c->serializer->revert (snap); + + return_trace (ret); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (rule.sanitize (c, this)); + } + + protected: + OffsetArrayOf<ChainRule> + rule; /* Array of ChainRule tables + * ordered by preference */ + public: + DEFINE_SIZE_ARRAY (2, rule); +}; + +struct ChainContextFormat1 +{ + bool intersects (const hb_set_t *glyphs) const + { + struct ChainContextClosureLookupContext lookup_context = { + {intersects_glyph}, + {nullptr, nullptr, nullptr} + }; + + return + + hb_zip (this+coverage, ruleSet) + | hb_filter (*glyphs, hb_first) + | hb_map (hb_second) + | hb_map (hb_add (this)) + | hb_map ([&] (const ChainRuleSet &_) { return _.intersects (glyphs, lookup_context); }) + | hb_any + ; + } + + void closure (hb_closure_context_t *c) const + { + struct ChainContextClosureLookupContext lookup_context = { + {intersects_glyph}, + {nullptr, nullptr, nullptr} + }; + + + hb_zip (this+coverage, ruleSet) + | hb_filter (*c->glyphs, hb_first) + | hb_map (hb_second) + | hb_map (hb_add (this)) + | hb_apply ([&] (const ChainRuleSet &_) { _.closure (c, lookup_context); }) + ; + } + + void closure_lookups (hb_closure_lookups_context_t *c) const + { + + hb_iter (ruleSet) + | hb_map (hb_add (this)) + | hb_apply ([&] (const ChainRuleSet &_) { _.closure_lookups (c); }) + ; + } + + void collect_variation_indices (hb_collect_variation_indices_context_t *c) const {} + + void collect_glyphs (hb_collect_glyphs_context_t *c) const + { + (this+coverage).collect_coverage (c->input); + + struct ChainContextCollectGlyphsLookupContext lookup_context = { + {collect_glyph}, + {nullptr, nullptr, nullptr} + }; + + + hb_iter (ruleSet) + | hb_map (hb_add (this)) + | hb_apply ([&] (const ChainRuleSet &_) { _.collect_glyphs (c, lookup_context); }) + ; + } + + bool would_apply (hb_would_apply_context_t *c) const + { + const ChainRuleSet &rule_set = this+ruleSet[(this+coverage).get_coverage (c->glyphs[0])]; + struct ChainContextApplyLookupContext lookup_context = { + {match_glyph}, + {nullptr, nullptr, nullptr} + }; + return rule_set.would_apply (c, lookup_context); + } + + const Coverage &get_coverage () const { return this+coverage; } + + bool apply (hb_ot_apply_context_t *c) const + { + TRACE_APPLY (this); + unsigned int index = (this+coverage).get_coverage (c->buffer->cur().codepoint); + if (likely (index == NOT_COVERED)) return_trace (false); + + const ChainRuleSet &rule_set = this+ruleSet[index]; + struct ChainContextApplyLookupContext lookup_context = { + {match_glyph}, + {nullptr, nullptr, nullptr} + }; + return_trace (rule_set.apply (c, lookup_context)); + } + + bool subset (hb_subset_context_t *c) const + { + TRACE_SUBSET (this); + const hb_set_t &glyphset = *c->plan->glyphset (); + const hb_map_t &glyph_map = *c->plan->glyph_map; + + auto *out = c->serializer->start_embed (*this); + if (unlikely (!c->serializer->extend_min (out))) return_trace (false); + out->format = format; + + const hb_map_t *lookup_map = c->table_tag == HB_OT_TAG_GSUB ? c->plan->gsub_lookups : c->plan->gpos_lookups; + hb_sorted_vector_t<hb_codepoint_t> new_coverage; + + hb_zip (this+coverage, ruleSet) + | hb_filter (glyphset, hb_first) + | hb_filter (subset_offset_array (c, out->ruleSet, this, lookup_map), hb_second) + | hb_map (hb_first) + | hb_map (glyph_map) + | hb_sink (new_coverage) + ; + + out->coverage.serialize (c->serializer, out) + .serialize (c->serializer, new_coverage.iter ()); + return_trace (bool (new_coverage)); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (coverage.sanitize (c, this) && ruleSet.sanitize (c, this)); + } + + protected: + HBUINT16 format; /* Format identifier--format = 1 */ + OffsetTo<Coverage> + coverage; /* Offset to Coverage table--from + * beginning of table */ + OffsetArrayOf<ChainRuleSet> + ruleSet; /* Array of ChainRuleSet tables + * ordered by Coverage Index */ + public: + DEFINE_SIZE_ARRAY (6, ruleSet); +}; + +struct ChainContextFormat2 +{ + bool intersects (const hb_set_t *glyphs) const + { + if (!(this+coverage).intersects (glyphs)) + return false; + + const ClassDef &backtrack_class_def = this+backtrackClassDef; + const ClassDef &input_class_def = this+inputClassDef; + const ClassDef &lookahead_class_def = this+lookaheadClassDef; + + struct ChainContextClosureLookupContext lookup_context = { + {intersects_class}, + {&backtrack_class_def, + &input_class_def, + &lookahead_class_def} + }; + + return + + hb_iter (ruleSet) + | hb_map (hb_add (this)) + | hb_enumerate + | hb_map ([&] (const hb_pair_t<unsigned, const ChainRuleSet &> p) + { return input_class_def.intersects_class (glyphs, p.first) && + p.second.intersects (glyphs, lookup_context); }) + | hb_any + ; + } + void closure (hb_closure_context_t *c) const + { + if (!(this+coverage).intersects (c->glyphs)) + return; + + const ClassDef &backtrack_class_def = this+backtrackClassDef; + const ClassDef &input_class_def = this+inputClassDef; + const ClassDef &lookahead_class_def = this+lookaheadClassDef; + + struct ChainContextClosureLookupContext lookup_context = { + {intersects_class}, + {&backtrack_class_def, + &input_class_def, + &lookahead_class_def} + }; + + return + + hb_enumerate (ruleSet) + | hb_filter ([&] (unsigned _) + { return input_class_def.intersects_class (c->glyphs, _); }, + hb_first) + | hb_map (hb_second) + | hb_map (hb_add (this)) + | hb_apply ([&] (const ChainRuleSet &_) { _.closure (c, lookup_context); }) + ; + } + + void closure_lookups (hb_closure_lookups_context_t *c) const + { + + hb_iter (ruleSet) + | hb_map (hb_add (this)) + | hb_apply ([&] (const ChainRuleSet &_) { _.closure_lookups (c); }) + ; + } + + void collect_variation_indices (hb_collect_variation_indices_context_t *c) const {} + + void collect_glyphs (hb_collect_glyphs_context_t *c) const + { + (this+coverage).collect_coverage (c->input); + + const ClassDef &backtrack_class_def = this+backtrackClassDef; + const ClassDef &input_class_def = this+inputClassDef; + const ClassDef &lookahead_class_def = this+lookaheadClassDef; + + struct ChainContextCollectGlyphsLookupContext lookup_context = { + {collect_class}, + {&backtrack_class_def, + &input_class_def, + &lookahead_class_def} + }; + + + hb_iter (ruleSet) + | hb_map (hb_add (this)) + | hb_apply ([&] (const ChainRuleSet &_) { _.collect_glyphs (c, lookup_context); }) + ; + } + + bool would_apply (hb_would_apply_context_t *c) const + { + const ClassDef &backtrack_class_def = this+backtrackClassDef; + const ClassDef &input_class_def = this+inputClassDef; + const ClassDef &lookahead_class_def = this+lookaheadClassDef; + + unsigned int index = input_class_def.get_class (c->glyphs[0]); + const ChainRuleSet &rule_set = this+ruleSet[index]; + struct ChainContextApplyLookupContext lookup_context = { + {match_class}, + {&backtrack_class_def, + &input_class_def, + &lookahead_class_def} + }; + return rule_set.would_apply (c, lookup_context); + } + + const Coverage &get_coverage () const { return this+coverage; } + + bool apply (hb_ot_apply_context_t *c) const + { + TRACE_APPLY (this); + unsigned int index = (this+coverage).get_coverage (c->buffer->cur().codepoint); + if (likely (index == NOT_COVERED)) return_trace (false); + + const ClassDef &backtrack_class_def = this+backtrackClassDef; + const ClassDef &input_class_def = this+inputClassDef; + const ClassDef &lookahead_class_def = this+lookaheadClassDef; + + index = input_class_def.get_class (c->buffer->cur().codepoint); + const ChainRuleSet &rule_set = this+ruleSet[index]; + struct ChainContextApplyLookupContext lookup_context = { + {match_class}, + {&backtrack_class_def, + &input_class_def, + &lookahead_class_def} + }; + return_trace (rule_set.apply (c, lookup_context)); + } + + bool subset (hb_subset_context_t *c) const + { + TRACE_SUBSET (this); + auto *out = c->serializer->start_embed (*this); + if (unlikely (!c->serializer->extend_min (out))) return_trace (false); + out->format = format; + out->coverage.serialize_subset (c, coverage, this); + + hb_map_t backtrack_klass_map; + out->backtrackClassDef.serialize_subset (c, backtrackClassDef, this, &backtrack_klass_map); + if (unlikely (!c->serializer->check_success (!backtrack_klass_map.in_error ()))) + return_trace (false); + + // subset inputClassDef based on glyphs survived in Coverage subsetting + hb_map_t input_klass_map; + out->inputClassDef.serialize_subset (c, inputClassDef, this, &input_klass_map); + if (unlikely (!c->serializer->check_success (!input_klass_map.in_error ()))) + return_trace (false); + + hb_map_t lookahead_klass_map; + out->lookaheadClassDef.serialize_subset (c, lookaheadClassDef, this, &lookahead_klass_map); + if (unlikely (!c->serializer->check_success (!lookahead_klass_map.in_error ()))) + return_trace (false); + + unsigned non_zero_index = 0, index = 0; + bool ret = true; + const hb_map_t *lookup_map = c->table_tag == HB_OT_TAG_GSUB ? c->plan->gsub_lookups : c->plan->gpos_lookups; + for (const OffsetTo<ChainRuleSet>& _ : + hb_enumerate (ruleSet) + | hb_filter (input_klass_map, hb_first) + | hb_map (hb_second)) + { + auto *o = out->ruleSet.serialize_append (c->serializer); + if (unlikely (!o)) + { + ret = false; + break; + } + if (o->serialize_subset (c, _, this, + lookup_map, + &backtrack_klass_map, + &input_klass_map, + &lookahead_klass_map)) + non_zero_index = index; + + index++; + } + + if (!ret) return_trace (ret); + + //prune empty trailing ruleSets + --index; + while (index > non_zero_index) + { + out->ruleSet.pop (); + index--; + } + + return_trace (bool (out->ruleSet)); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (coverage.sanitize (c, this) && + backtrackClassDef.sanitize (c, this) && + inputClassDef.sanitize (c, this) && + lookaheadClassDef.sanitize (c, this) && + ruleSet.sanitize (c, this)); + } + + protected: + HBUINT16 format; /* Format identifier--format = 2 */ + OffsetTo<Coverage> + coverage; /* Offset to Coverage table--from + * beginning of table */ + OffsetTo<ClassDef> + backtrackClassDef; /* Offset to glyph ClassDef table + * containing backtrack sequence + * data--from beginning of table */ + OffsetTo<ClassDef> + inputClassDef; /* Offset to glyph ClassDef + * table containing input sequence + * data--from beginning of table */ + OffsetTo<ClassDef> + lookaheadClassDef; /* Offset to glyph ClassDef table + * containing lookahead sequence + * data--from beginning of table */ + OffsetArrayOf<ChainRuleSet> + ruleSet; /* Array of ChainRuleSet tables + * ordered by class */ + public: + DEFINE_SIZE_ARRAY (12, ruleSet); +}; + +struct ChainContextFormat3 +{ + bool intersects (const hb_set_t *glyphs) const + { + const OffsetArrayOf<Coverage> &input = StructAfter<OffsetArrayOf<Coverage>> (backtrack); + + if (!(this+input[0]).intersects (glyphs)) + return false; + + const OffsetArrayOf<Coverage> &lookahead = StructAfter<OffsetArrayOf<Coverage>> (input); + struct ChainContextClosureLookupContext lookup_context = { + {intersects_coverage}, + {this, this, this} + }; + return chain_context_intersects (glyphs, + backtrack.len, (const HBUINT16 *) backtrack.arrayZ, + input.len, (const HBUINT16 *) input.arrayZ + 1, + lookahead.len, (const HBUINT16 *) lookahead.arrayZ, + lookup_context); + } + + void closure (hb_closure_context_t *c) const + { + const OffsetArrayOf<Coverage> &input = StructAfter<OffsetArrayOf<Coverage>> (backtrack); + + if (!(this+input[0]).intersects (c->glyphs)) + return; + + const OffsetArrayOf<Coverage> &lookahead = StructAfter<OffsetArrayOf<Coverage>> (input); + const ArrayOf<LookupRecord> &lookup = StructAfter<ArrayOf<LookupRecord>> (lookahead); + struct ChainContextClosureLookupContext lookup_context = { + {intersects_coverage}, + {this, this, this} + }; + chain_context_closure_lookup (c, + backtrack.len, (const HBUINT16 *) backtrack.arrayZ, + input.len, (const HBUINT16 *) input.arrayZ + 1, + lookahead.len, (const HBUINT16 *) lookahead.arrayZ, + lookup.len, lookup.arrayZ, + lookup_context); + } + + void closure_lookups (hb_closure_lookups_context_t *c) const + { + const OffsetArrayOf<Coverage> &input = StructAfter<OffsetArrayOf<Coverage>> (backtrack); + const OffsetArrayOf<Coverage> &lookahead = StructAfter<OffsetArrayOf<Coverage>> (input); + const ArrayOf<LookupRecord> &lookup = StructAfter<ArrayOf<LookupRecord>> (lookahead); + recurse_lookups (c, lookup.len, lookup.arrayZ); + } + + void collect_variation_indices (hb_collect_variation_indices_context_t *c) const {} + + void collect_glyphs (hb_collect_glyphs_context_t *c) const + { + const OffsetArrayOf<Coverage> &input = StructAfter<OffsetArrayOf<Coverage>> (backtrack); + + (this+input[0]).collect_coverage (c->input); + + const OffsetArrayOf<Coverage> &lookahead = StructAfter<OffsetArrayOf<Coverage>> (input); + const ArrayOf<LookupRecord> &lookup = StructAfter<ArrayOf<LookupRecord>> (lookahead); + struct ChainContextCollectGlyphsLookupContext lookup_context = { + {collect_coverage}, + {this, this, this} + }; + chain_context_collect_glyphs_lookup (c, + backtrack.len, (const HBUINT16 *) backtrack.arrayZ, + input.len, (const HBUINT16 *) input.arrayZ + 1, + lookahead.len, (const HBUINT16 *) lookahead.arrayZ, + lookup.len, lookup.arrayZ, + lookup_context); + } + + bool would_apply (hb_would_apply_context_t *c) const + { + const OffsetArrayOf<Coverage> &input = StructAfter<OffsetArrayOf<Coverage>> (backtrack); + const OffsetArrayOf<Coverage> &lookahead = StructAfter<OffsetArrayOf<Coverage>> (input); + const ArrayOf<LookupRecord> &lookup = StructAfter<ArrayOf<LookupRecord>> (lookahead); + struct ChainContextApplyLookupContext lookup_context = { + {match_coverage}, + {this, this, this} + }; + return chain_context_would_apply_lookup (c, + backtrack.len, (const HBUINT16 *) backtrack.arrayZ, + input.len, (const HBUINT16 *) input.arrayZ + 1, + lookahead.len, (const HBUINT16 *) lookahead.arrayZ, + lookup.len, lookup.arrayZ, lookup_context); + } + + const Coverage &get_coverage () const + { + const OffsetArrayOf<Coverage> &input = StructAfter<OffsetArrayOf<Coverage>> (backtrack); + return this+input[0]; + } + + bool apply (hb_ot_apply_context_t *c) const + { + TRACE_APPLY (this); + const OffsetArrayOf<Coverage> &input = StructAfter<OffsetArrayOf<Coverage>> (backtrack); + + unsigned int index = (this+input[0]).get_coverage (c->buffer->cur().codepoint); + if (likely (index == NOT_COVERED)) return_trace (false); + + const OffsetArrayOf<Coverage> &lookahead = StructAfter<OffsetArrayOf<Coverage>> (input); + const ArrayOf<LookupRecord> &lookup = StructAfter<ArrayOf<LookupRecord>> (lookahead); + struct ChainContextApplyLookupContext lookup_context = { + {match_coverage}, + {this, this, this} + }; + return_trace (chain_context_apply_lookup (c, + backtrack.len, (const HBUINT16 *) backtrack.arrayZ, + input.len, (const HBUINT16 *) input.arrayZ + 1, + lookahead.len, (const HBUINT16 *) lookahead.arrayZ, + lookup.len, lookup.arrayZ, lookup_context)); + } + + template<typename Iterator, + hb_requires (hb_is_iterator (Iterator))> + bool serialize_coverage_offsets (hb_subset_context_t *c, Iterator it, const void* base) const + { + TRACE_SERIALIZE (this); + auto *out = c->serializer->start_embed<OffsetArrayOf<Coverage>> (); + + if (unlikely (!c->serializer->allocate_size<HBUINT16> (HBUINT16::static_size))) return_trace (false); + + + it + | hb_apply (subset_offset_array (c, *out, base)) + ; + + return_trace (out->len); + } + + bool subset (hb_subset_context_t *c) const + { + TRACE_SUBSET (this); + + auto *out = c->serializer->start_embed (this); + if (unlikely (!out)) return_trace (false); + if (unlikely (!c->serializer->embed (this->format))) return_trace (false); + + if (!serialize_coverage_offsets (c, backtrack.iter (), this)) + return_trace (false); + + const OffsetArrayOf<Coverage> &input = StructAfter<OffsetArrayOf<Coverage>> (backtrack); + if (!serialize_coverage_offsets (c, input.iter (), this)) + return_trace (false); + + const OffsetArrayOf<Coverage> &lookahead = StructAfter<OffsetArrayOf<Coverage>> (input); + if (!serialize_coverage_offsets (c, lookahead.iter (), this)) + return_trace (false); + + const ArrayOf<LookupRecord> &lookupRecord = StructAfter<ArrayOf<LookupRecord>> (lookahead); + HBUINT16 lookupCount; + lookupCount = lookupRecord.len; + if (!c->serializer->copy (lookupCount)) return_trace (false); + + const hb_map_t *lookup_map = c->table_tag == HB_OT_TAG_GSUB ? c->plan->gsub_lookups : c->plan->gpos_lookups; + for (unsigned i = 0; i < (unsigned) lookupCount; i++) + if (!c->serializer->copy (lookupRecord[i], lookup_map)) return_trace (false); + + return_trace (true); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + if (!backtrack.sanitize (c, this)) return_trace (false); + const OffsetArrayOf<Coverage> &input = StructAfter<OffsetArrayOf<Coverage>> (backtrack); + if (!input.sanitize (c, this)) return_trace (false); + if (!input.len) return_trace (false); /* To be consistent with Context. */ + const OffsetArrayOf<Coverage> &lookahead = StructAfter<OffsetArrayOf<Coverage>> (input); + if (!lookahead.sanitize (c, this)) return_trace (false); + const ArrayOf<LookupRecord> &lookup = StructAfter<ArrayOf<LookupRecord>> (lookahead); + return_trace (lookup.sanitize (c)); + } + + protected: + HBUINT16 format; /* Format identifier--format = 3 */ + OffsetArrayOf<Coverage> + backtrack; /* Array of coverage tables + * in backtracking sequence, in glyph + * sequence order */ + OffsetArrayOf<Coverage> + inputX ; /* Array of coverage + * tables in input sequence, in glyph + * sequence order */ + OffsetArrayOf<Coverage> + lookaheadX; /* Array of coverage tables + * in lookahead sequence, in glyph + * sequence order */ + ArrayOf<LookupRecord> + lookupX; /* Array of LookupRecords--in + * design order) */ + public: + DEFINE_SIZE_MIN (10); +}; + +struct ChainContext +{ + template <typename context_t, typename ...Ts> + typename context_t::return_t dispatch (context_t *c, Ts&&... ds) const + { + TRACE_DISPATCH (this, u.format); + if (unlikely (!c->may_dispatch (this, &u.format))) return_trace (c->no_dispatch_return_value ()); + switch (u.format) { + case 1: return_trace (c->dispatch (u.format1, hb_forward<Ts> (ds)...)); + case 2: return_trace (c->dispatch (u.format2, hb_forward<Ts> (ds)...)); + case 3: return_trace (c->dispatch (u.format3, hb_forward<Ts> (ds)...)); + default:return_trace (c->default_return_value ()); + } + } + + protected: + union { + HBUINT16 format; /* Format identifier */ + ChainContextFormat1 format1; + ChainContextFormat2 format2; + ChainContextFormat3 format3; + } u; +}; + + +template <typename T> +struct ExtensionFormat1 +{ + unsigned int get_type () const { return extensionLookupType; } + + template <typename X> + const X& get_subtable () const + { return this + reinterpret_cast<const LOffsetTo<typename T::SubTable> &> (extensionOffset); } + + template <typename context_t, typename ...Ts> + typename context_t::return_t dispatch (context_t *c, Ts&&... ds) const + { + TRACE_DISPATCH (this, format); + if (unlikely (!c->may_dispatch (this, this))) return_trace (c->no_dispatch_return_value ()); + return_trace (get_subtable<typename T::SubTable> ().dispatch (c, get_type (), hb_forward<Ts> (ds)...)); + } + + void collect_variation_indices (hb_collect_variation_indices_context_t *c) const + { dispatch (c); } + + /* This is called from may_dispatch() above with hb_sanitize_context_t. */ + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && + extensionLookupType != T::SubTable::Extension); + } + + protected: + HBUINT16 format; /* Format identifier. Set to 1. */ + HBUINT16 extensionLookupType; /* Lookup type of subtable referenced + * by ExtensionOffset (i.e. the + * extension subtable). */ + Offset32 extensionOffset; /* Offset to the extension subtable, + * of lookup type subtable. */ + public: + DEFINE_SIZE_STATIC (8); +}; + +template <typename T> +struct Extension +{ + unsigned int get_type () const + { + switch (u.format) { + case 1: return u.format1.get_type (); + default:return 0; + } + } + template <typename X> + const X& get_subtable () const + { + switch (u.format) { + case 1: return u.format1.template get_subtable<typename T::SubTable> (); + default:return Null (typename T::SubTable); + } + } + + template <typename context_t, typename ...Ts> + typename context_t::return_t dispatch (context_t *c, Ts&&... ds) const + { + TRACE_DISPATCH (this, u.format); + if (unlikely (!c->may_dispatch (this, &u.format))) return_trace (c->no_dispatch_return_value ()); + switch (u.format) { + case 1: return_trace (u.format1.dispatch (c, hb_forward<Ts> (ds)...)); + default:return_trace (c->default_return_value ()); + } + } + + protected: + union { + HBUINT16 format; /* Format identifier */ + ExtensionFormat1<T> format1; + } u; +}; + + +/* + * GSUB/GPOS Common + */ + +struct hb_ot_layout_lookup_accelerator_t +{ + template <typename TLookup> + void init (const TLookup &lookup) + { + digest.init (); + lookup.collect_coverage (&digest); + + subtables.init (); + OT::hb_get_subtables_context_t c_get_subtables (subtables); + lookup.dispatch (&c_get_subtables); + } + void fini () { subtables.fini (); } + + bool may_have (hb_codepoint_t g) const + { return digest.may_have (g); } + + bool apply (hb_ot_apply_context_t *c) const + { + for (unsigned int i = 0; i < subtables.length; i++) + if (subtables[i].apply (c)) + return true; + return false; + } + + private: + hb_set_digest_t digest; + hb_get_subtables_context_t::array_t subtables; +}; + +struct GSUBGPOS +{ + bool has_data () const { return version.to_int (); } + unsigned int get_script_count () const + { return (this+scriptList).len; } + const Tag& get_script_tag (unsigned int i) const + { return (this+scriptList).get_tag (i); } + unsigned int get_script_tags (unsigned int start_offset, + unsigned int *script_count /* IN/OUT */, + hb_tag_t *script_tags /* OUT */) const + { return (this+scriptList).get_tags (start_offset, script_count, script_tags); } + const Script& get_script (unsigned int i) const + { return (this+scriptList)[i]; } + bool find_script_index (hb_tag_t tag, unsigned int *index) const + { return (this+scriptList).find_index (tag, index); } + + unsigned int get_feature_count () const + { return (this+featureList).len; } + hb_tag_t get_feature_tag (unsigned int i) const + { return i == Index::NOT_FOUND_INDEX ? HB_TAG_NONE : (this+featureList).get_tag (i); } + unsigned int get_feature_tags (unsigned int start_offset, + unsigned int *feature_count /* IN/OUT */, + hb_tag_t *feature_tags /* OUT */) const + { return (this+featureList).get_tags (start_offset, feature_count, feature_tags); } + const Feature& get_feature (unsigned int i) const + { return (this+featureList)[i]; } + bool find_feature_index (hb_tag_t tag, unsigned int *index) const + { return (this+featureList).find_index (tag, index); } + + unsigned int get_lookup_count () const + { return (this+lookupList).len; } + const Lookup& get_lookup (unsigned int i) const + { return (this+lookupList)[i]; } + + bool find_variations_index (const int *coords, unsigned int num_coords, + unsigned int *index) const + { +#ifdef HB_NO_VAR + *index = FeatureVariations::NOT_FOUND_INDEX; + return false; +#endif + return (version.to_int () >= 0x00010001u ? this+featureVars : Null (FeatureVariations)) + .find_index (coords, num_coords, index); + } + const Feature& get_feature_variation (unsigned int feature_index, + unsigned int variations_index) const + { +#ifndef HB_NO_VAR + if (FeatureVariations::NOT_FOUND_INDEX != variations_index && + version.to_int () >= 0x00010001u) + { + const Feature *feature = (this+featureVars).find_substitute (variations_index, + feature_index); + if (feature) + return *feature; + } +#endif + return get_feature (feature_index); + } + + void feature_variation_collect_lookups (const hb_set_t *feature_indexes, + hb_set_t *lookup_indexes /* OUT */) const + { +#ifndef HB_NO_VAR + if (version.to_int () >= 0x00010001u) + (this+featureVars).collect_lookups (feature_indexes, lookup_indexes); +#endif + } + + template <typename TLookup> + void closure_lookups (hb_face_t *face, + const hb_set_t *glyphs, + hb_set_t *lookup_indexes /* IN/OUT */) const + { + hb_set_t visited_lookups, inactive_lookups; + OT::hb_closure_lookups_context_t c (face, glyphs, &visited_lookups, &inactive_lookups); + + for (unsigned lookup_index : + hb_iter (lookup_indexes)) + reinterpret_cast<const TLookup &> (get_lookup (lookup_index)).closure_lookups (&c, lookup_index); + + hb_set_union (lookup_indexes, &visited_lookups); + hb_set_subtract (lookup_indexes, &inactive_lookups); + } + + template <typename TLookup> + bool subset (hb_subset_layout_context_t *c) const + { + TRACE_SUBSET (this); + auto *out = c->subset_context->serializer->embed (*this); + if (unlikely (!out)) return_trace (false); + + typedef LookupOffsetList<TLookup> TLookupList; + reinterpret_cast<OffsetTo<TLookupList> &> (out->lookupList) + .serialize_subset (c->subset_context, + reinterpret_cast<const OffsetTo<TLookupList> &> (lookupList), + this, + c); + + reinterpret_cast<OffsetTo<RecordListOfFeature> &> (out->featureList) + .serialize_subset (c->subset_context, + reinterpret_cast<const OffsetTo<RecordListOfFeature> &> (featureList), + this, + c); + + out->scriptList.serialize_subset (c->subset_context, + scriptList, + this, + c); + +#ifndef HB_NO_VAR + if (version.to_int () >= 0x00010001u) + { + bool ret = out->featureVars.serialize_subset (c->subset_context, featureVars, this, c); + if (!ret) + { + out->version.major = 1; + out->version.minor = 0; + } + } +#endif + + return_trace (true); + } + + void closure_features (const hb_map_t *lookup_indexes, /* IN */ + hb_set_t *feature_indexes /* OUT */) const + { + unsigned int feature_count = hb_min (get_feature_count (), (unsigned) HB_MAX_FEATURES); + for (unsigned i = 0; i < feature_count; i++) + { + const Feature& f = get_feature (i); + if ((!f.featureParams.is_null ()) || f.intersects_lookup_indexes (lookup_indexes)) + feature_indexes->add (i); + } +#ifndef HB_NO_VAR + if (version.to_int () >= 0x00010001u) + (this+featureVars).closure_features (lookup_indexes, feature_indexes); +#endif + } + + unsigned int get_size () const + { + return min_size + + (version.to_int () >= 0x00010001u ? featureVars.static_size : 0); + } + + template <typename TLookup> + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + typedef OffsetListOf<TLookup> TLookupList; + if (unlikely (!(version.sanitize (c) && + likely (version.major == 1) && + scriptList.sanitize (c, this) && + featureList.sanitize (c, this) && + reinterpret_cast<const OffsetTo<TLookupList> &> (lookupList).sanitize (c, this)))) + return_trace (false); + +#ifndef HB_NO_VAR + if (unlikely (!(version.to_int () < 0x00010001u || featureVars.sanitize (c, this)))) + return_trace (false); +#endif + + return_trace (true); + } + + template <typename T> + struct accelerator_t + { + void init (hb_face_t *face) + { + this->table = hb_sanitize_context_t ().reference_table<T> (face); + if (unlikely (this->table->is_blocklisted (this->table.get_blob (), face))) + { + hb_blob_destroy (this->table.get_blob ()); + this->table = hb_blob_get_empty (); + } + + this->lookup_count = table->get_lookup_count (); + + this->accels = (hb_ot_layout_lookup_accelerator_t *) calloc (this->lookup_count, sizeof (hb_ot_layout_lookup_accelerator_t)); + if (unlikely (!this->accels)) + this->lookup_count = 0; + + for (unsigned int i = 0; i < this->lookup_count; i++) + this->accels[i].init (table->get_lookup (i)); + } + + void fini () + { + for (unsigned int i = 0; i < this->lookup_count; i++) + this->accels[i].fini (); + free (this->accels); + this->table.destroy (); + } + + hb_blob_ptr_t<T> table; + unsigned int lookup_count; + hb_ot_layout_lookup_accelerator_t *accels; + }; + + protected: + FixedVersion<>version; /* Version of the GSUB/GPOS table--initially set + * to 0x00010000u */ + OffsetTo<ScriptList> + scriptList; /* ScriptList table */ + OffsetTo<FeatureList> + featureList; /* FeatureList table */ + OffsetTo<LookupList> + lookupList; /* LookupList table */ + LOffsetTo<FeatureVariations> + featureVars; /* Offset to Feature Variations + table--from beginning of table + * (may be NULL). Introduced + * in version 0x00010001. */ + public: + DEFINE_SIZE_MIN (10); +}; + + +} /* namespace OT */ + + +#endif /* HB_OT_LAYOUT_GSUBGPOS_HH */ diff --git a/thirdparty/harfbuzz/src/hb-ot-layout-jstf-table.hh b/thirdparty/harfbuzz/src/hb-ot-layout-jstf-table.hh new file mode 100644 index 0000000000..ffd2bf4574 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-layout-jstf-table.hh @@ -0,0 +1,235 @@ +/* + * Copyright © 2013 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_OT_LAYOUT_JSTF_TABLE_HH +#define HB_OT_LAYOUT_JSTF_TABLE_HH + +#include "hb-open-type.hh" +#include "hb-ot-layout-gpos-table.hh" + + +namespace OT { + + +/* + * JstfModList -- Justification Modification List Tables + */ + +typedef IndexArray JstfModList; + + +/* + * JstfMax -- Justification Maximum Table + */ + +typedef OffsetListOf<PosLookup> JstfMax; + + +/* + * JstfPriority -- Justification Priority Table + */ + +struct JstfPriority +{ + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && + shrinkageEnableGSUB.sanitize (c, this) && + shrinkageDisableGSUB.sanitize (c, this) && + shrinkageEnableGPOS.sanitize (c, this) && + shrinkageDisableGPOS.sanitize (c, this) && + shrinkageJstfMax.sanitize (c, this) && + extensionEnableGSUB.sanitize (c, this) && + extensionDisableGSUB.sanitize (c, this) && + extensionEnableGPOS.sanitize (c, this) && + extensionDisableGPOS.sanitize (c, this) && + extensionJstfMax.sanitize (c, this)); + } + + protected: + OffsetTo<JstfModList> + shrinkageEnableGSUB; /* Offset to Shrinkage Enable GSUB + * JstfModList table--from beginning of + * JstfPriority table--may be NULL */ + OffsetTo<JstfModList> + shrinkageDisableGSUB; /* Offset to Shrinkage Disable GSUB + * JstfModList table--from beginning of + * JstfPriority table--may be NULL */ + OffsetTo<JstfModList> + shrinkageEnableGPOS; /* Offset to Shrinkage Enable GPOS + * JstfModList table--from beginning of + * JstfPriority table--may be NULL */ + OffsetTo<JstfModList> + shrinkageDisableGPOS; /* Offset to Shrinkage Disable GPOS + * JstfModList table--from beginning of + * JstfPriority table--may be NULL */ + OffsetTo<JstfMax> + shrinkageJstfMax; /* Offset to Shrinkage JstfMax table-- + * from beginning of JstfPriority table + * --may be NULL */ + OffsetTo<JstfModList> + extensionEnableGSUB; /* Offset to Extension Enable GSUB + * JstfModList table--from beginning of + * JstfPriority table--may be NULL */ + OffsetTo<JstfModList> + extensionDisableGSUB; /* Offset to Extension Disable GSUB + * JstfModList table--from beginning of + * JstfPriority table--may be NULL */ + OffsetTo<JstfModList> + extensionEnableGPOS; /* Offset to Extension Enable GPOS + * JstfModList table--from beginning of + * JstfPriority table--may be NULL */ + OffsetTo<JstfModList> + extensionDisableGPOS; /* Offset to Extension Disable GPOS + * JstfModList table--from beginning of + * JstfPriority table--may be NULL */ + OffsetTo<JstfMax> + extensionJstfMax; /* Offset to Extension JstfMax table-- + * from beginning of JstfPriority table + * --may be NULL */ + + public: + DEFINE_SIZE_STATIC (20); +}; + + +/* + * JstfLangSys -- Justification Language System Table + */ + +struct JstfLangSys : OffsetListOf<JstfPriority> +{ + bool sanitize (hb_sanitize_context_t *c, + const Record_sanitize_closure_t * = nullptr) const + { + TRACE_SANITIZE (this); + return_trace (OffsetListOf<JstfPriority>::sanitize (c)); + } +}; + + +/* + * ExtenderGlyphs -- Extender Glyph Table + */ + +typedef SortedArrayOf<HBGlyphID> ExtenderGlyphs; + + +/* + * JstfScript -- The Justification Table + */ + +struct JstfScript +{ + unsigned int get_lang_sys_count () const + { return langSys.len; } + const Tag& get_lang_sys_tag (unsigned int i) const + { return langSys.get_tag (i); } + unsigned int get_lang_sys_tags (unsigned int start_offset, + unsigned int *lang_sys_count /* IN/OUT */, + hb_tag_t *lang_sys_tags /* OUT */) const + { return langSys.get_tags (start_offset, lang_sys_count, lang_sys_tags); } + const JstfLangSys& get_lang_sys (unsigned int i) const + { + if (i == Index::NOT_FOUND_INDEX) return get_default_lang_sys (); + return this+langSys[i].offset; + } + bool find_lang_sys_index (hb_tag_t tag, unsigned int *index) const + { return langSys.find_index (tag, index); } + + bool has_default_lang_sys () const { return defaultLangSys != 0; } + const JstfLangSys& get_default_lang_sys () const { return this+defaultLangSys; } + + bool sanitize (hb_sanitize_context_t *c, + const Record_sanitize_closure_t * = nullptr) const + { + TRACE_SANITIZE (this); + return_trace (extenderGlyphs.sanitize (c, this) && + defaultLangSys.sanitize (c, this) && + langSys.sanitize (c, this)); + } + + protected: + OffsetTo<ExtenderGlyphs> + extenderGlyphs; /* Offset to ExtenderGlyph table--from beginning + * of JstfScript table-may be NULL */ + OffsetTo<JstfLangSys> + defaultLangSys; /* Offset to DefaultJstfLangSys table--from + * beginning of JstfScript table--may be Null */ + RecordArrayOf<JstfLangSys> + langSys; /* Array of JstfLangSysRecords--listed + * alphabetically by LangSysTag */ + public: + DEFINE_SIZE_ARRAY (6, langSys); +}; + + +/* + * JSTF -- Justification + * https://docs.microsoft.com/en-us/typography/opentype/spec/jstf + */ + +struct JSTF +{ + static constexpr hb_tag_t tableTag = HB_OT_TAG_JSTF; + + unsigned int get_script_count () const + { return scriptList.len; } + const Tag& get_script_tag (unsigned int i) const + { return scriptList.get_tag (i); } + unsigned int get_script_tags (unsigned int start_offset, + unsigned int *script_count /* IN/OUT */, + hb_tag_t *script_tags /* OUT */) const + { return scriptList.get_tags (start_offset, script_count, script_tags); } + const JstfScript& get_script (unsigned int i) const + { return this+scriptList[i].offset; } + bool find_script_index (hb_tag_t tag, unsigned int *index) const + { return scriptList.find_index (tag, index); } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (version.sanitize (c) && + likely (version.major == 1) && + scriptList.sanitize (c, this)); + } + + protected: + FixedVersion<>version; /* Version of the JSTF table--initially set + * to 0x00010000u */ + RecordArrayOf<JstfScript> + scriptList; /* Array of JstfScripts--listed + * alphabetically by ScriptTag */ + public: + DEFINE_SIZE_ARRAY (6, scriptList); +}; + + +} /* namespace OT */ + + +#endif /* HB_OT_LAYOUT_JSTF_TABLE_HH */ diff --git a/thirdparty/harfbuzz/src/hb-ot-layout.cc b/thirdparty/harfbuzz/src/hb-ot-layout.cc new file mode 100644 index 0000000000..46408bb9d3 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-layout.cc @@ -0,0 +1,1993 @@ +/* + * Copyright © 1998-2004 David Turner and Werner Lemberg + * Copyright © 2006 Behdad Esfahbod + * Copyright © 2007,2008,2009 Red Hat, Inc. + * Copyright © 2012,2013 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Red Hat Author(s): Behdad Esfahbod + * Google Author(s): Behdad Esfahbod + */ + +#include "hb.hh" + +#ifndef HB_NO_OT_LAYOUT + +#ifdef HB_NO_OT_TAG +#error "Cannot compile hb-ot-layout.cc with HB_NO_OT_TAG." +#endif + +#include "hb-open-type.hh" +#include "hb-ot-layout.hh" +#include "hb-ot-face.hh" +#include "hb-ot-map.hh" +#include "hb-map.hh" + +#include "hb-ot-kern-table.hh" +#include "hb-ot-layout-gdef-table.hh" +#include "hb-ot-layout-gsub-table.hh" +#include "hb-ot-layout-gpos-table.hh" +#include "hb-ot-layout-base-table.hh" // Just so we compile it; unused otherwise. +#include "hb-ot-layout-jstf-table.hh" // Just so we compile it; unused otherwise. +#include "hb-ot-name-table.hh" +#include "hb-ot-os2-table.hh" + +#include "hb-aat-layout-morx-table.hh" +#include "hb-aat-layout-opbd-table.hh" // Just so we compile it; unused otherwise. + +/** + * SECTION:hb-ot-layout + * @title: hb-ot-layout + * @short_description: OpenType Layout + * @include: hb-ot.h + * + * Functions for querying OpenType Layout features in the font face. + **/ + + +/* + * kern + */ + +#ifndef HB_NO_OT_KERN +/** + * hb_ot_layout_has_kerning: + * @face: The #hb_face_t to work on + * + * Tests whether a face includes any kerning data in the 'kern' table. + * Does NOT test for kerning lookups in the GPOS table. + * + * Return value: true if data found, false otherwise + * + **/ +bool +hb_ot_layout_has_kerning (hb_face_t *face) +{ + return face->table.kern->has_data (); +} + +/** + * hb_ot_layout_has_machine_kerning: + * @face: The #hb_face_t to work on + * + * Tests whether a face includes any state-machine kerning in the 'kern' table. + * Does NOT examine the GPOS table. + * + * Return value: true if data found, false otherwise + * + **/ +bool +hb_ot_layout_has_machine_kerning (hb_face_t *face) +{ + return face->table.kern->has_state_machine (); +} + +/** + * hb_ot_layout_has_cross_kerning: + * @face: The #hb_face_t to work on + * + * Tests whether a face has any cross-stream kerning (i.e., kerns + * that make adjustments perpendicular to the direction of the text + * flow: Y adjustments in horizontal text or X adjustments in + * vertical text) in the 'kern' table. + * + * Does NOT examine the GPOS table. + * + * Return value: true is data found, false otherwise + * + **/ +bool +hb_ot_layout_has_cross_kerning (hb_face_t *face) +{ + return face->table.kern->has_cross_stream (); +} + +void +hb_ot_layout_kern (const hb_ot_shape_plan_t *plan, + hb_font_t *font, + hb_buffer_t *buffer) +{ + hb_blob_t *blob = font->face->table.kern.get_blob (); + const AAT::kern& kern = *blob->as<AAT::kern> (); + + AAT::hb_aat_apply_context_t c (plan, font, buffer, blob); + + kern.apply (&c); +} +#endif + + +/* + * GDEF + */ + +bool +OT::GDEF::is_blocklisted (hb_blob_t *blob, + hb_face_t *face) const +{ +#ifdef HB_NO_OT_LAYOUT_BLACKLIST + return false; +#endif + /* The ugly business of blocklisting individual fonts' tables happen here! + * See this thread for why we finally had to bend in and do this: + * https://lists.freedesktop.org/archives/harfbuzz/2016-February/005489.html + * + * In certain versions of Times New Roman Italic and Bold Italic, + * ASCII double quotation mark U+0022 has wrong glyph class 3 (mark) + * in GDEF. Many versions of Tahoma have bad GDEF tables that + * incorrectly classify some spacing marks such as certain IPA + * symbols as glyph class 3. So do older versions of Microsoft + * Himalaya, and the version of Cantarell shipped by Ubuntu 16.04. + * + * Nuke the GDEF tables of to avoid unwanted width-zeroing. + * + * See https://bugzilla.mozilla.org/show_bug.cgi?id=1279925 + * https://bugzilla.mozilla.org/show_bug.cgi?id=1279693 + * https://bugzilla.mozilla.org/show_bug.cgi?id=1279875 + */ + switch HB_CODEPOINT_ENCODE3(blob->length, + face->table.GSUB->table.get_length (), + face->table.GPOS->table.get_length ()) + { + /* sha1sum:c5ee92f0bca4bfb7d06c4d03e8cf9f9cf75d2e8a Windows 7? timesi.ttf */ + case HB_CODEPOINT_ENCODE3 (442, 2874, 42038): + /* sha1sum:37fc8c16a0894ab7b749e35579856c73c840867b Windows 7? timesbi.ttf */ + case HB_CODEPOINT_ENCODE3 (430, 2874, 40662): + /* sha1sum:19fc45110ea6cd3cdd0a5faca256a3797a069a80 Windows 7 timesi.ttf */ + case HB_CODEPOINT_ENCODE3 (442, 2874, 39116): + /* sha1sum:6d2d3c9ed5b7de87bc84eae0df95ee5232ecde26 Windows 7 timesbi.ttf */ + case HB_CODEPOINT_ENCODE3 (430, 2874, 39374): + /* sha1sum:8583225a8b49667c077b3525333f84af08c6bcd8 OS X 10.11.3 Times New Roman Italic.ttf */ + case HB_CODEPOINT_ENCODE3 (490, 3046, 41638): + /* sha1sum:ec0f5a8751845355b7c3271d11f9918a966cb8c9 OS X 10.11.3 Times New Roman Bold Italic.ttf */ + case HB_CODEPOINT_ENCODE3 (478, 3046, 41902): + /* sha1sum:96eda93f7d33e79962451c6c39a6b51ee893ce8c tahoma.ttf from Windows 8 */ + case HB_CODEPOINT_ENCODE3 (898, 12554, 46470): + /* sha1sum:20928dc06014e0cd120b6fc942d0c3b1a46ac2bc tahomabd.ttf from Windows 8 */ + case HB_CODEPOINT_ENCODE3 (910, 12566, 47732): + /* sha1sum:4f95b7e4878f60fa3a39ca269618dfde9721a79e tahoma.ttf from Windows 8.1 */ + case HB_CODEPOINT_ENCODE3 (928, 23298, 59332): + /* sha1sum:6d400781948517c3c0441ba42acb309584b73033 tahomabd.ttf from Windows 8.1 */ + case HB_CODEPOINT_ENCODE3 (940, 23310, 60732): + /* tahoma.ttf v6.04 from Windows 8.1 x64, see https://bugzilla.mozilla.org/show_bug.cgi?id=1279925 */ + case HB_CODEPOINT_ENCODE3 (964, 23836, 60072): + /* tahomabd.ttf v6.04 from Windows 8.1 x64, see https://bugzilla.mozilla.org/show_bug.cgi?id=1279925 */ + case HB_CODEPOINT_ENCODE3 (976, 23832, 61456): + /* sha1sum:e55fa2dfe957a9f7ec26be516a0e30b0c925f846 tahoma.ttf from Windows 10 */ + case HB_CODEPOINT_ENCODE3 (994, 24474, 60336): + /* sha1sum:7199385abb4c2cc81c83a151a7599b6368e92343 tahomabd.ttf from Windows 10 */ + case HB_CODEPOINT_ENCODE3 (1006, 24470, 61740): + /* tahoma.ttf v6.91 from Windows 10 x64, see https://bugzilla.mozilla.org/show_bug.cgi?id=1279925 */ + case HB_CODEPOINT_ENCODE3 (1006, 24576, 61346): + /* tahomabd.ttf v6.91 from Windows 10 x64, see https://bugzilla.mozilla.org/show_bug.cgi?id=1279925 */ + case HB_CODEPOINT_ENCODE3 (1018, 24572, 62828): + /* sha1sum:b9c84d820c49850d3d27ec498be93955b82772b5 tahoma.ttf from Windows 10 AU */ + case HB_CODEPOINT_ENCODE3 (1006, 24576, 61352): + /* sha1sum:2bdfaab28174bdadd2f3d4200a30a7ae31db79d2 tahomabd.ttf from Windows 10 AU */ + case HB_CODEPOINT_ENCODE3 (1018, 24572, 62834): + /* sha1sum:b0d36cf5a2fbe746a3dd277bffc6756a820807a7 Tahoma.ttf from Mac OS X 10.9 */ + case HB_CODEPOINT_ENCODE3 (832, 7324, 47162): + /* sha1sum:12fc4538e84d461771b30c18b5eb6bd434e30fba Tahoma Bold.ttf from Mac OS X 10.9 */ + case HB_CODEPOINT_ENCODE3 (844, 7302, 45474): + /* sha1sum:eb8afadd28e9cf963e886b23a30b44ab4fd83acc himalaya.ttf from Windows 7 */ + case HB_CODEPOINT_ENCODE3 (180, 13054, 7254): + /* sha1sum:73da7f025b238a3f737aa1fde22577a6370f77b0 himalaya.ttf from Windows 8 */ + case HB_CODEPOINT_ENCODE3 (192, 12638, 7254): + /* sha1sum:6e80fd1c0b059bbee49272401583160dc1e6a427 himalaya.ttf from Windows 8.1 */ + case HB_CODEPOINT_ENCODE3 (192, 12690, 7254): + /* 8d9267aea9cd2c852ecfb9f12a6e834bfaeafe44 cantarell-fonts-0.0.21/otf/Cantarell-Regular.otf */ + /* 983988ff7b47439ab79aeaf9a45bd4a2c5b9d371 cantarell-fonts-0.0.21/otf/Cantarell-Oblique.otf */ + case HB_CODEPOINT_ENCODE3 (188, 248, 3852): + /* 2c0c90c6f6087ffbfea76589c93113a9cbb0e75f cantarell-fonts-0.0.21/otf/Cantarell-Bold.otf */ + /* 55461f5b853c6da88069ffcdf7f4dd3f8d7e3e6b cantarell-fonts-0.0.21/otf/Cantarell-Bold-Oblique.otf */ + case HB_CODEPOINT_ENCODE3 (188, 264, 3426): + /* d125afa82a77a6475ac0e74e7c207914af84b37a padauk-2.80/Padauk.ttf RHEL 7.2 */ + case HB_CODEPOINT_ENCODE3 (1058, 47032, 11818): + /* 0f7b80437227b90a577cc078c0216160ae61b031 padauk-2.80/Padauk-Bold.ttf RHEL 7.2*/ + case HB_CODEPOINT_ENCODE3 (1046, 47030, 12600): + /* d3dde9aa0a6b7f8f6a89ef1002e9aaa11b882290 padauk-2.80/Padauk.ttf Ubuntu 16.04 */ + case HB_CODEPOINT_ENCODE3 (1058, 71796, 16770): + /* 5f3c98ccccae8a953be2d122c1b3a77fd805093f padauk-2.80/Padauk-Bold.ttf Ubuntu 16.04 */ + case HB_CODEPOINT_ENCODE3 (1046, 71790, 17862): + /* 6c93b63b64e8b2c93f5e824e78caca555dc887c7 padauk-2.80/Padauk-book.ttf */ + case HB_CODEPOINT_ENCODE3 (1046, 71788, 17112): + /* d89b1664058359b8ec82e35d3531931125991fb9 padauk-2.80/Padauk-bookbold.ttf */ + case HB_CODEPOINT_ENCODE3 (1058, 71794, 17514): + /* 824cfd193aaf6234b2b4dc0cf3c6ef576c0d00ef padauk-3.0/Padauk-book.ttf */ + case HB_CODEPOINT_ENCODE3 (1330, 109904, 57938): + /* 91fcc10cf15e012d27571e075b3b4dfe31754a8a padauk-3.0/Padauk-bookbold.ttf */ + case HB_CODEPOINT_ENCODE3 (1330, 109904, 58972): + /* sha1sum: c26e41d567ed821bed997e937bc0c41435689e85 Padauk.ttf + * "Padauk Regular" "Version 2.5", see https://crbug.com/681813 */ + case HB_CODEPOINT_ENCODE3 (1004, 59092, 14836): + return true; + } + return false; +} + +static void +_hb_ot_layout_set_glyph_props (hb_font_t *font, + hb_buffer_t *buffer) +{ + _hb_buffer_assert_gsubgpos_vars (buffer); + + const OT::GDEF &gdef = *font->face->table.GDEF->table; + unsigned int count = buffer->len; + for (unsigned int i = 0; i < count; i++) + { + _hb_glyph_info_set_glyph_props (&buffer->info[i], gdef.get_glyph_props (buffer->info[i].codepoint)); + _hb_glyph_info_clear_lig_props (&buffer->info[i]); + buffer->info[i].syllable() = 0; + } +} + +/* Public API */ + +/** + * hb_ot_layout_has_glyph_classes: + * @face: #hb_face_t to work upon + * + * Tests whether a face has any glyph classes defined in its GDEF table. + * + * Return value: true if data found, false otherwise + * + **/ +hb_bool_t +hb_ot_layout_has_glyph_classes (hb_face_t *face) +{ + return face->table.GDEF->table->has_glyph_classes (); +} + +/** + * hb_ot_layout_get_glyph_class: + * @face: The #hb_face_t to work on + * @glyph: The #hb_codepoint_t code point to query + * + * Fetches the GDEF class of the requested glyph in the specified face. + * + * Return value: The #hb_ot_layout_glyph_class_t glyph class of the given code + * point in the GDEF table of the face. + * + * Since: 0.9.7 + **/ +hb_ot_layout_glyph_class_t +hb_ot_layout_get_glyph_class (hb_face_t *face, + hb_codepoint_t glyph) +{ + return (hb_ot_layout_glyph_class_t) face->table.GDEF->table->get_glyph_class (glyph); +} + +/** + * hb_ot_layout_get_glyphs_in_class: + * @face: The #hb_face_t to work on + * @klass: The #hb_ot_layout_glyph_class_t GDEF class to retrieve + * @glyphs: (out): The #hb_set_t set of all glyphs belonging to the requested + * class. + * + * Retrieves the set of all glyphs from the face that belong to the requested + * glyph class in the face's GDEF table. + * + * Since: 0.9.7 + **/ +void +hb_ot_layout_get_glyphs_in_class (hb_face_t *face, + hb_ot_layout_glyph_class_t klass, + hb_set_t *glyphs /* OUT */) +{ + return face->table.GDEF->table->get_glyphs_in_class (klass, glyphs); +} + +#ifndef HB_NO_LAYOUT_UNUSED +/** + * hb_ot_layout_get_attach_points: + * @face: The #hb_face_t to work on + * @glyph: The #hb_codepoint_t code point to query + * @start_offset: offset of the first attachment point to retrieve + * @point_count: (inout) (allow-none): Input = the maximum number of attachment points to return; + * Output = the actual number of attachment points returned (may be zero) + * @point_array: (out) (array length=point_count): The array of attachment points found for the query + * + * Fetches a list of all attachment points for the specified glyph in the GDEF + * table of the face. The list returned will begin at the offset provided. + * + * Useful if the client program wishes to cache the list. + * + **/ +unsigned int +hb_ot_layout_get_attach_points (hb_face_t *face, + hb_codepoint_t glyph, + unsigned int start_offset, + unsigned int *point_count /* IN/OUT */, + unsigned int *point_array /* OUT */) +{ + return face->table.GDEF->table->get_attach_points (glyph, + start_offset, + point_count, + point_array); +} +/** + * hb_ot_layout_get_ligature_carets: + * @font: The #hb_font_t to work on + * @direction: The #hb_direction_t text direction to use + * @glyph: The #hb_codepoint_t code point to query + * @start_offset: offset of the first caret position to retrieve + * @caret_count: (inout) (allow-none): Input = the maximum number of caret positions to return; + * Output = the actual number of caret positions returned (may be zero) + * @caret_array: (out) (array length=caret_count): The array of caret positions found for the query + * + * Fetches a list of the caret positions defined for a ligature glyph in the GDEF + * table of the font. The list returned will begin at the offset provided. + * + **/ +unsigned int +hb_ot_layout_get_ligature_carets (hb_font_t *font, + hb_direction_t direction, + hb_codepoint_t glyph, + unsigned int start_offset, + unsigned int *caret_count /* IN/OUT */, + hb_position_t *caret_array /* OUT */) +{ + return font->face->table.GDEF->table->get_lig_carets (font, direction, glyph, start_offset, caret_count, caret_array); +} +#endif + + +/* + * GSUB/GPOS + */ + +bool +OT::GSUB::is_blocklisted (hb_blob_t *blob HB_UNUSED, + hb_face_t *face) const +{ +#ifdef HB_NO_OT_LAYOUT_BLACKLIST + return false; +#endif + return false; +} + +bool +OT::GPOS::is_blocklisted (hb_blob_t *blob HB_UNUSED, + hb_face_t *face HB_UNUSED) const +{ +#ifdef HB_NO_OT_LAYOUT_BLACKLIST + return false; +#endif + return false; +} + +static const OT::GSUBGPOS& +get_gsubgpos_table (hb_face_t *face, + hb_tag_t table_tag) +{ + switch (table_tag) { + case HB_OT_TAG_GSUB: return *face->table.GSUB->table; + case HB_OT_TAG_GPOS: return *face->table.GPOS->table; + default: return Null (OT::GSUBGPOS); + } +} + + +/** + * hb_ot_layout_table_get_script_tags: + * @face: #hb_face_t to work upon + * @table_tag: HB_OT_TAG_GSUB or HB_OT_TAG_GPOS + * @start_offset: offset of the first script tag to retrieve + * @script_count: (inout) (allow-none): Input = the maximum number of script tags to return; + * Output = the actual number of script tags returned (may be zero) + * @script_tags: (out) (array length=script_count): The array of #hb_tag_t script tags found for the query + * + * Fetches a list of all scripts enumerated in the specified face's GSUB table + * or GPOS table. The list returned will begin at the offset provided. + * + **/ +unsigned int +hb_ot_layout_table_get_script_tags (hb_face_t *face, + hb_tag_t table_tag, + unsigned int start_offset, + unsigned int *script_count /* IN/OUT */, + hb_tag_t *script_tags /* OUT */) +{ + const OT::GSUBGPOS &g = get_gsubgpos_table (face, table_tag); + + return g.get_script_tags (start_offset, script_count, script_tags); +} + +#define HB_OT_TAG_LATIN_SCRIPT HB_TAG ('l', 'a', 't', 'n') + +/** + * hb_ot_layout_table_find_script: + * @face: #hb_face_t to work upon + * @table_tag: HB_OT_TAG_GSUB or HB_OT_TAG_GPOS + * @script_tag: #hb_tag_t of the script tag requested + * @script_index: (out): The index of the requested script tag + * + * Fetches the index if a given script tag in the specified face's GSUB table + * or GPOS table. + * + * Return value: true if the script is found, false otherwise + * + **/ +hb_bool_t +hb_ot_layout_table_find_script (hb_face_t *face, + hb_tag_t table_tag, + hb_tag_t script_tag, + unsigned int *script_index /* OUT */) +{ + static_assert ((OT::Index::NOT_FOUND_INDEX == HB_OT_LAYOUT_NO_SCRIPT_INDEX), ""); + const OT::GSUBGPOS &g = get_gsubgpos_table (face, table_tag); + + if (g.find_script_index (script_tag, script_index)) + return true; + + /* try finding 'DFLT' */ + if (g.find_script_index (HB_OT_TAG_DEFAULT_SCRIPT, script_index)) + return false; + + /* try with 'dflt'; MS site has had typos and many fonts use it now :(. + * including many versions of DejaVu Sans Mono! */ + if (g.find_script_index (HB_OT_TAG_DEFAULT_LANGUAGE, script_index)) + return false; + + /* try with 'latn'; some old fonts put their features there even though + they're really trying to support Thai, for example :( */ + if (g.find_script_index (HB_OT_TAG_LATIN_SCRIPT, script_index)) + return false; + + if (script_index) *script_index = HB_OT_LAYOUT_NO_SCRIPT_INDEX; + return false; +} + +#ifndef HB_DISABLE_DEPRECATED +/** + * hb_ot_layout_table_choose_script: + * @face: #hb_face_t to work upon + * @table_tag: HB_OT_TAG_GSUB or HB_OT_TAG_GPOS + * @script_tags: Array of #hb_tag_t script tags + * @script_index: (out): The index of the requested script tag + * @chosen_script: (out): #hb_tag_t of the script tag requested + * + * Deprecated since 2.0.0 + **/ +hb_bool_t +hb_ot_layout_table_choose_script (hb_face_t *face, + hb_tag_t table_tag, + const hb_tag_t *script_tags, + unsigned int *script_index /* OUT */, + hb_tag_t *chosen_script /* OUT */) +{ + const hb_tag_t *t; + for (t = script_tags; *t; t++); + return hb_ot_layout_table_select_script (face, table_tag, t - script_tags, script_tags, script_index, chosen_script); +} +#endif + +/** + * hb_ot_layout_table_select_script: + * @face: #hb_face_t to work upon + * @table_tag: HB_OT_TAG_GSUB or HB_OT_TAG_GPOS + * @script_count: Number of script tags in the array + * @script_tags: Array of #hb_tag_t script tags + * @script_index: (out): The index of the requested script + * @chosen_script: (out): #hb_tag_t of the requested script + * + * Since: 2.0.0 + **/ +hb_bool_t +hb_ot_layout_table_select_script (hb_face_t *face, + hb_tag_t table_tag, + unsigned int script_count, + const hb_tag_t *script_tags, + unsigned int *script_index /* OUT */, + hb_tag_t *chosen_script /* OUT */) +{ + static_assert ((OT::Index::NOT_FOUND_INDEX == HB_OT_LAYOUT_NO_SCRIPT_INDEX), ""); + const OT::GSUBGPOS &g = get_gsubgpos_table (face, table_tag); + unsigned int i; + + for (i = 0; i < script_count; i++) + { + if (g.find_script_index (script_tags[i], script_index)) + { + if (chosen_script) + *chosen_script = script_tags[i]; + return true; + } + } + + /* try finding 'DFLT' */ + if (g.find_script_index (HB_OT_TAG_DEFAULT_SCRIPT, script_index)) { + if (chosen_script) + *chosen_script = HB_OT_TAG_DEFAULT_SCRIPT; + return false; + } + + /* try with 'dflt'; MS site has had typos and many fonts use it now :( */ + if (g.find_script_index (HB_OT_TAG_DEFAULT_LANGUAGE, script_index)) { + if (chosen_script) + *chosen_script = HB_OT_TAG_DEFAULT_LANGUAGE; + return false; + } + + /* try with 'latn'; some old fonts put their features there even though + they're really trying to support Thai, for example :( */ + if (g.find_script_index (HB_OT_TAG_LATIN_SCRIPT, script_index)) { + if (chosen_script) + *chosen_script = HB_OT_TAG_LATIN_SCRIPT; + return false; + } + + if (script_index) *script_index = HB_OT_LAYOUT_NO_SCRIPT_INDEX; + if (chosen_script) + *chosen_script = HB_OT_LAYOUT_NO_SCRIPT_INDEX; + return false; +} + + +/** + * hb_ot_layout_table_get_feature_tags: + * @face: #hb_face_t to work upon + * @table_tag: HB_OT_TAG_GSUB or HB_OT_TAG_GPOS + * @start_offset: offset of the first feature tag to retrieve + * @feature_count: (inout) (allow-none): Input = the maximum number of feature tags to return; + * Output = the actual number of feature tags returned (may be zero) + * @feature_tags: (out) (array length=feature_count): Array of feature tags found in the table + * + * Fetches a list of all feature tags in the given face's GSUB or GPOS table. + * + **/ +unsigned int +hb_ot_layout_table_get_feature_tags (hb_face_t *face, + hb_tag_t table_tag, + unsigned int start_offset, + unsigned int *feature_count /* IN/OUT */, + hb_tag_t *feature_tags /* OUT */) +{ + const OT::GSUBGPOS &g = get_gsubgpos_table (face, table_tag); + + return g.get_feature_tags (start_offset, feature_count, feature_tags); +} + + +/** + * hb_ot_layout_table_find_feature: + * @face: #hb_face_t to work upon + * @table_tag: HB_OT_TAG_GSUB or HB_OT_TAG_GPOS + * @feature_tag: The #hb_tag_t og the requested feature tag + * @feature_index: (out): The index of the requested feature + * + * Fetches the index for a given feature tag in the specified face's GSUB table + * or GPOS table. + * + * Return value: true if the feature is found, false otherwise + **/ +bool +hb_ot_layout_table_find_feature (hb_face_t *face, + hb_tag_t table_tag, + hb_tag_t feature_tag, + unsigned int *feature_index /* OUT */) +{ + static_assert ((OT::Index::NOT_FOUND_INDEX == HB_OT_LAYOUT_NO_FEATURE_INDEX), ""); + const OT::GSUBGPOS &g = get_gsubgpos_table (face, table_tag); + + unsigned int num_features = g.get_feature_count (); + for (unsigned int i = 0; i < num_features; i++) + { + if (feature_tag == g.get_feature_tag (i)) { + if (feature_index) *feature_index = i; + return true; + } + } + + if (feature_index) *feature_index = HB_OT_LAYOUT_NO_FEATURE_INDEX; + return false; +} + + +/** + * hb_ot_layout_script_get_language_tags: + * @face: #hb_face_t to work upon + * @table_tag: HB_OT_TAG_GSUB or HB_OT_TAG_GPOS + * @script_index: The index of the requested script tag + * @start_offset: offset of the first language tag to retrieve + * @language_count: (inout) (allow-none): Input = the maximum number of language tags to return; + * Output = the actual number of language tags returned (may be zero) + * @language_tags: (out) (array length=language_count): Array of language tags found in the table + * + * Fetches a list of language tags in the given face's GSUB or GPOS table, underneath + * the specified script index. The list returned will begin at the offset provided. + * + **/ +unsigned int +hb_ot_layout_script_get_language_tags (hb_face_t *face, + hb_tag_t table_tag, + unsigned int script_index, + unsigned int start_offset, + unsigned int *language_count /* IN/OUT */, + hb_tag_t *language_tags /* OUT */) +{ + const OT::Script &s = get_gsubgpos_table (face, table_tag).get_script (script_index); + + return s.get_lang_sys_tags (start_offset, language_count, language_tags); +} + + +#ifndef HB_DISABLE_DEPRECATED +/** + * hb_ot_layout_script_find_language: + * @face: #hb_face_t to work upon + * @table_tag: HB_OT_TAG_GSUB or HB_OT_TAG_GPOS + * @script_index: The index of the requested script tag + * @language_tag: The #hb_tag_t of the requested language + * @language_index: The index of the requested language + * + * Fetches the index of a given language tag in the specified face's GSUB table + * or GPOS table, underneath the specified script tag. + * + * Return value: true if the language tag is found, false otherwise + * + * Since: ?? + * Deprecated: ?? + **/ +hb_bool_t +hb_ot_layout_script_find_language (hb_face_t *face, + hb_tag_t table_tag, + unsigned int script_index, + hb_tag_t language_tag, + unsigned int *language_index) +{ + return hb_ot_layout_script_select_language (face, + table_tag, + script_index, + 1, + &language_tag, + language_index); +} +#endif + + +/** + * hb_ot_layout_script_select_language: + * @face: #hb_face_t to work upon + * @table_tag: HB_OT_TAG_GSUB or HB_OT_TAG_GPOS + * @script_index: The index of the requested script tag + * @language_count: The number of languages in the specified script + * @language_tags: The array of language tags + * @language_index: (out): The index of the requested language + * + * Fetches the index of a given language tag in the specified face's GSUB table + * or GPOS table, underneath the specified script index. + * + * Return value: true if the language tag is found, false otherwise + * + * Since: 2.0.0 + **/ +hb_bool_t +hb_ot_layout_script_select_language (hb_face_t *face, + hb_tag_t table_tag, + unsigned int script_index, + unsigned int language_count, + const hb_tag_t *language_tags, + unsigned int *language_index /* OUT */) +{ + static_assert ((OT::Index::NOT_FOUND_INDEX == HB_OT_LAYOUT_DEFAULT_LANGUAGE_INDEX), ""); + const OT::Script &s = get_gsubgpos_table (face, table_tag).get_script (script_index); + unsigned int i; + + for (i = 0; i < language_count; i++) + { + if (s.find_lang_sys_index (language_tags[i], language_index)) + return true; + } + + /* try finding 'dflt' */ + if (s.find_lang_sys_index (HB_OT_TAG_DEFAULT_LANGUAGE, language_index)) + return false; + + if (language_index) *language_index = HB_OT_LAYOUT_DEFAULT_LANGUAGE_INDEX; + return false; +} + + +/** + * hb_ot_layout_language_get_required_feature_index: + * @face: #hb_face_t to work upon + * @table_tag: HB_OT_TAG_GSUB or HB_OT_TAG_GPOS + * @script_index: The index of the requested script tag + * @language_index: The index of the requested language tag + * @feature_index: (out): The index of the requested feature + * + * Fetches the index of a requested feature in the given face's GSUB or GPOS table, + * underneath the specified script and language. + * + * Return value: true if the feature is found, false otherwise + * + **/ +hb_bool_t +hb_ot_layout_language_get_required_feature_index (hb_face_t *face, + hb_tag_t table_tag, + unsigned int script_index, + unsigned int language_index, + unsigned int *feature_index /* OUT */) +{ + return hb_ot_layout_language_get_required_feature (face, + table_tag, + script_index, + language_index, + feature_index, + nullptr); +} + + +/** + * hb_ot_layout_language_get_required_feature: + * @face: #hb_face_t to work upon + * @table_tag: HB_OT_TAG_GSUB or HB_OT_TAG_GPOS + * @script_index: The index of the requested script tag + * @language_index: The index of the requested language tag + * @feature_index: (out): The index of the requested feature + * @feature_tag: (out): The #hb_tag_t of the requested feature + * + * Fetches the tag of a requested feature index in the given face's GSUB or GPOS table, + * underneath the specified script and language. + * + * Return value: true if the feature is found, false otherwise + * + * Since: 0.9.30 + **/ +hb_bool_t +hb_ot_layout_language_get_required_feature (hb_face_t *face, + hb_tag_t table_tag, + unsigned int script_index, + unsigned int language_index, + unsigned int *feature_index /* OUT */, + hb_tag_t *feature_tag /* OUT */) +{ + const OT::GSUBGPOS &g = get_gsubgpos_table (face, table_tag); + const OT::LangSys &l = g.get_script (script_index).get_lang_sys (language_index); + + unsigned int index = l.get_required_feature_index (); + if (feature_index) *feature_index = index; + if (feature_tag) *feature_tag = g.get_feature_tag (index); + + return l.has_required_feature (); +} + + +/** + * hb_ot_layout_language_get_feature_indexes: + * @face: #hb_face_t to work upon + * @table_tag: HB_OT_TAG_GSUB or HB_OT_TAG_GPOS + * @script_index: The index of the requested script tag + * @language_index: The index of the requested language tag + * @start_offset: offset of the first feature tag to retrieve + * @feature_count: (inout) (allow-none): Input = the maximum number of feature tags to return; + * Output: the actual number of feature tags returned (may be zero) + * @feature_indexes: (out) (array length=feature_count): The array of feature indexes found for the query + * + * Fetches a list of all features in the specified face's GSUB table + * or GPOS table, underneath the specified script and language. The list + * returned will begin at the offset provided. + **/ +unsigned int +hb_ot_layout_language_get_feature_indexes (hb_face_t *face, + hb_tag_t table_tag, + unsigned int script_index, + unsigned int language_index, + unsigned int start_offset, + unsigned int *feature_count /* IN/OUT */, + unsigned int *feature_indexes /* OUT */) +{ + const OT::GSUBGPOS &g = get_gsubgpos_table (face, table_tag); + const OT::LangSys &l = g.get_script (script_index).get_lang_sys (language_index); + + return l.get_feature_indexes (start_offset, feature_count, feature_indexes); +} + + +/** + * hb_ot_layout_language_get_feature_tags: + * @face: #hb_face_t to work upon + * @table_tag: HB_OT_TAG_GSUB or HB_OT_TAG_GPOS + * @script_index: The index of the requested script tag + * @language_index: The index of the requested language tag + * @start_offset: offset of the first feature tag to retrieve + * @feature_count: (inout) (allow-none): Input = the maximum number of feature tags to return; + * Output = the actual number of feature tags returned (may be zero) + * @feature_tags: (out) (array length=feature_count): The array of #hb_tag_t feature tags found for the query + * + * Fetches a list of all features in the specified face's GSUB table + * or GPOS table, underneath the specified script and language. The list + * returned will begin at the offset provided. + * + **/ +unsigned int +hb_ot_layout_language_get_feature_tags (hb_face_t *face, + hb_tag_t table_tag, + unsigned int script_index, + unsigned int language_index, + unsigned int start_offset, + unsigned int *feature_count /* IN/OUT */, + hb_tag_t *feature_tags /* OUT */) +{ + const OT::GSUBGPOS &g = get_gsubgpos_table (face, table_tag); + const OT::LangSys &l = g.get_script (script_index).get_lang_sys (language_index); + + static_assert ((sizeof (unsigned int) == sizeof (hb_tag_t)), ""); + unsigned int ret = l.get_feature_indexes (start_offset, feature_count, (unsigned int *) feature_tags); + + if (feature_tags) { + unsigned int count = *feature_count; + for (unsigned int i = 0; i < count; i++) + feature_tags[i] = g.get_feature_tag ((unsigned int) feature_tags[i]); + } + + return ret; +} + + +/** + * hb_ot_layout_language_find_feature: + * @face: #hb_face_t to work upon + * @table_tag: HB_OT_TAG_GSUB or HB_OT_TAG_GPOS + * @script_index: The index of the requested script tag + * @language_index: The index of the requested language tag + * @feature_tag: #hb_tag_t of the feature tag requested + * @feature_index: (out): The index of the requested feature + * + * Fetches the index of a given feature tag in the specified face's GSUB table + * or GPOS table, underneath the specified script and language. + * + * Return value: true if the feature is found, false otherwise + * + **/ +hb_bool_t +hb_ot_layout_language_find_feature (hb_face_t *face, + hb_tag_t table_tag, + unsigned int script_index, + unsigned int language_index, + hb_tag_t feature_tag, + unsigned int *feature_index /* OUT */) +{ + static_assert ((OT::Index::NOT_FOUND_INDEX == HB_OT_LAYOUT_NO_FEATURE_INDEX), ""); + const OT::GSUBGPOS &g = get_gsubgpos_table (face, table_tag); + const OT::LangSys &l = g.get_script (script_index).get_lang_sys (language_index); + + unsigned int num_features = l.get_feature_count (); + for (unsigned int i = 0; i < num_features; i++) { + unsigned int f_index = l.get_feature_index (i); + + if (feature_tag == g.get_feature_tag (f_index)) { + if (feature_index) *feature_index = f_index; + return true; + } + } + + if (feature_index) *feature_index = HB_OT_LAYOUT_NO_FEATURE_INDEX; + return false; +} + + +/** + * hb_ot_layout_feature_get_lookups: + * @face: #hb_face_t to work upon + * @table_tag: HB_OT_TAG_GSUB or HB_OT_TAG_GPOS + * @feature_index: The index of the requested feature + * @start_offset: offset of the first lookup to retrieve + * @lookup_count: (inout) (allow-none): Input = the maximum number of lookups to return; + * Output = the actual number of lookups returned (may be zero) + * @lookup_indexes: (out) (array length=lookup_count): The array of lookup indexes found for the query + * + * Fetches a list of all lookups enumerated for the specified feature, in + * the specified face's GSUB table or GPOS table. The list returned will + * begin at the offset provided. + * + * Since: 0.9.7 + **/ +unsigned int +hb_ot_layout_feature_get_lookups (hb_face_t *face, + hb_tag_t table_tag, + unsigned int feature_index, + unsigned int start_offset, + unsigned int *lookup_count /* IN/OUT */, + unsigned int *lookup_indexes /* OUT */) +{ + return hb_ot_layout_feature_with_variations_get_lookups (face, + table_tag, + feature_index, + HB_OT_LAYOUT_NO_VARIATIONS_INDEX, + start_offset, + lookup_count, + lookup_indexes); +} + + +/** + * hb_ot_layout_table_get_lookup_count: + * @face: #hb_face_t to work upon + * @table_tag: HB_OT_TAG_GSUB or HB_OT_TAG_GPOS + * + * Fetches the total number of lookups enumerated in the specified + * face's GSUB table or GPOS table. + * + * Since: 0.9.22 + **/ +unsigned int +hb_ot_layout_table_get_lookup_count (hb_face_t *face, + hb_tag_t table_tag) +{ + return get_gsubgpos_table (face, table_tag).get_lookup_count (); +} + + +struct hb_collect_features_context_t +{ + hb_collect_features_context_t (hb_face_t *face, + hb_tag_t table_tag, + hb_set_t *feature_indexes_) + : g (get_gsubgpos_table (face, table_tag)), + feature_indexes (feature_indexes_), + script_count (0),langsys_count (0), feature_index_count (0) {} + + bool visited (const OT::Script &s) + { + /* We might have Null() object here. Don't want to involve + * that in the memoize. So, detect empty objects and return. */ + if (unlikely (!s.has_default_lang_sys () && + !s.get_lang_sys_count ())) + return true; + + if (script_count++ > HB_MAX_SCRIPTS) + return true; + + return visited (s, visited_script); + } + bool visited (const OT::LangSys &l) + { + /* We might have Null() object here. Don't want to involve + * that in the memoize. So, detect empty objects and return. */ + if (unlikely (!l.has_required_feature () && + !l.get_feature_count ())) + return true; + + if (langsys_count++ > HB_MAX_LANGSYS) + return true; + + return visited (l, visited_langsys); + } + + bool visited_feature_indices (unsigned count) + { + feature_index_count += count; + return feature_index_count > HB_MAX_FEATURE_INDICES; + } + + private: + template <typename T> + bool visited (const T &p, hb_set_t &visited_set) + { + hb_codepoint_t delta = (hb_codepoint_t) ((uintptr_t) &p - (uintptr_t) &g); + if (visited_set.has (delta)) + return true; + + visited_set.add (delta); + return false; + } + + public: + const OT::GSUBGPOS &g; + hb_set_t *feature_indexes; + + private: + hb_set_t visited_script; + hb_set_t visited_langsys; + unsigned int script_count; + unsigned int langsys_count; + unsigned int feature_index_count; +}; + +static void +langsys_collect_features (hb_collect_features_context_t *c, + const OT::LangSys &l, + const hb_tag_t *features) +{ + if (c->visited (l)) return; + + if (!features) + { + /* All features. */ + if (l.has_required_feature () && !c->visited_feature_indices (1)) + c->feature_indexes->add (l.get_required_feature_index ()); + + if (!c->visited_feature_indices (l.featureIndex.len)) + l.add_feature_indexes_to (c->feature_indexes); + } + else + { + /* Ugh. Any faster way? */ + for (; *features; features++) + { + hb_tag_t feature_tag = *features; + unsigned int num_features = l.get_feature_count (); + for (unsigned int i = 0; i < num_features; i++) + { + unsigned int feature_index = l.get_feature_index (i); + + if (feature_tag == c->g.get_feature_tag (feature_index)) + { + c->feature_indexes->add (feature_index); + break; + } + } + } + } +} + +static void +script_collect_features (hb_collect_features_context_t *c, + const OT::Script &s, + const hb_tag_t *languages, + const hb_tag_t *features) +{ + if (c->visited (s)) return; + + if (!languages) + { + /* All languages. */ + if (s.has_default_lang_sys ()) + langsys_collect_features (c, + s.get_default_lang_sys (), + features); + + unsigned int count = s.get_lang_sys_count (); + for (unsigned int language_index = 0; language_index < count; language_index++) + langsys_collect_features (c, + s.get_lang_sys (language_index), + features); + } + else + { + for (; *languages; languages++) + { + unsigned int language_index; + if (s.find_lang_sys_index (*languages, &language_index)) + langsys_collect_features (c, + s.get_lang_sys (language_index), + features); + } + } +} + + +/** + * hb_ot_layout_collect_features: + * @face: #hb_face_t to work upon + * @table_tag: HB_OT_TAG_GSUB or HB_OT_TAG_GPOS + * @scripts: The array of scripts to collect features for + * @languages: The array of languages to collect features for + * @features: The array of features to collect + * @feature_indexes: (out): The array of feature indexes found for the query + * + * Fetches a list of all feature indexes in the specified face's GSUB table + * or GPOS table, underneath the specified scripts, languages, and features. + * If no list of scripts is provided, all scripts will be queried. If no list + * of languages is provided, all languages will be queried. If no list of + * features is provided, all features will be queried. + * + * Since: 1.8.5 + **/ +void +hb_ot_layout_collect_features (hb_face_t *face, + hb_tag_t table_tag, + const hb_tag_t *scripts, + const hb_tag_t *languages, + const hb_tag_t *features, + hb_set_t *feature_indexes /* OUT */) +{ + hb_collect_features_context_t c (face, table_tag, feature_indexes); + if (!scripts) + { + /* All scripts. */ + unsigned int count = c.g.get_script_count (); + for (unsigned int script_index = 0; script_index < count; script_index++) + script_collect_features (&c, + c.g.get_script (script_index), + languages, + features); + } + else + { + for (; *scripts; scripts++) + { + unsigned int script_index; + if (c.g.find_script_index (*scripts, &script_index)) + script_collect_features (&c, + c.g.get_script (script_index), + languages, + features); + } + } +} + + +/** + * hb_ot_layout_collect_lookups: + * @face: #hb_face_t to work upon + * @table_tag: HB_OT_TAG_GSUB or HB_OT_TAG_GPOS + * @scripts: The array of scripts to collect lookups for + * @languages: The array of languages to collect lookups for + * @features: The array of features to collect lookups for + * @lookup_indexes: (out): The array of lookup indexes found for the query + * + * Fetches a list of all feature-lookup indexes in the specified face's GSUB + * table or GPOS table, underneath the specified scripts, languages, and + * features. If no list of scripts is provided, all scripts will be queried. + * If no list of languages is provided, all languages will be queried. If no + * list of features is provided, all features will be queried. + * + * Since: 0.9.8 + **/ +void +hb_ot_layout_collect_lookups (hb_face_t *face, + hb_tag_t table_tag, + const hb_tag_t *scripts, + const hb_tag_t *languages, + const hb_tag_t *features, + hb_set_t *lookup_indexes /* OUT */) +{ + const OT::GSUBGPOS &g = get_gsubgpos_table (face, table_tag); + + hb_set_t feature_indexes; + hb_ot_layout_collect_features (face, table_tag, scripts, languages, features, &feature_indexes); + + for (hb_codepoint_t feature_index = HB_SET_VALUE_INVALID; + hb_set_next (&feature_indexes, &feature_index);) + g.get_feature (feature_index).add_lookup_indexes_to (lookup_indexes); + + g.feature_variation_collect_lookups (&feature_indexes, lookup_indexes); +} + + +#ifndef HB_NO_LAYOUT_COLLECT_GLYPHS +/** + * hb_ot_layout_lookup_collect_glyphs: + * @face: #hb_face_t to work upon + * @table_tag: HB_OT_TAG_GSUB or HB_OT_TAG_GPOS + * @lookup_index: The index of the feature lookup to query + * @glyphs_before: (out): Array of glyphs preceding the substitution range + * @glyphs_input: (out): Array of input glyphs that would be substituted by the lookup + * @glyphs_after: (out): Array of glyphs following the substitution range + * @glyphs_output: (out): Array of glyphs that would be the substitued output of the lookup + * + * Fetches a list of all glyphs affected by the specified lookup in the + * specified face's GSUB table or GPOS table. + * + * Since: 0.9.7 + **/ +void +hb_ot_layout_lookup_collect_glyphs (hb_face_t *face, + hb_tag_t table_tag, + unsigned int lookup_index, + hb_set_t *glyphs_before, /* OUT. May be NULL */ + hb_set_t *glyphs_input, /* OUT. May be NULL */ + hb_set_t *glyphs_after, /* OUT. May be NULL */ + hb_set_t *glyphs_output /* OUT. May be NULL */) +{ + OT::hb_collect_glyphs_context_t c (face, + glyphs_before, + glyphs_input, + glyphs_after, + glyphs_output); + + switch (table_tag) + { + case HB_OT_TAG_GSUB: + { + const OT::SubstLookup& l = face->table.GSUB->table->get_lookup (lookup_index); + l.collect_glyphs (&c); + return; + } + case HB_OT_TAG_GPOS: + { + const OT::PosLookup& l = face->table.GPOS->table->get_lookup (lookup_index); + l.collect_glyphs (&c); + return; + } + } +} +#endif + + +/* Variations support */ + + +/** + * hb_ot_layout_table_find_feature_variations: + * @face: #hb_face_t to work upon + * @table_tag: HB_OT_TAG_GSUB or HB_OT_TAG_GPOS + * @coords: The variation coordinates to query + * @num_coords: The number of variation coorinates + * @variations_index: (out): The array of feature variations found for the query + * + * Fetches a list of feature variations in the specified face's GSUB table + * or GPOS table, at the specified variation coordinates. + * + **/ +hb_bool_t +hb_ot_layout_table_find_feature_variations (hb_face_t *face, + hb_tag_t table_tag, + const int *coords, + unsigned int num_coords, + unsigned int *variations_index /* out */) +{ + const OT::GSUBGPOS &g = get_gsubgpos_table (face, table_tag); + + return g.find_variations_index (coords, num_coords, variations_index); +} + + +/** + * hb_ot_layout_feature_with_variations_get_lookups: + * @face: #hb_face_t to work upon + * @table_tag: HB_OT_TAG_GSUB or HB_OT_TAG_GPOS + * @feature_index: The index of the feature to query + * @variations_index: The index of the feature variation to query + * @start_offset: offset of the first lookup to retrieve + * @lookup_count: (inout) (allow-none): Input = the maximum number of lookups to return; + * Output = the actual number of lookups returned (may be zero) + * @lookup_indexes: (out) (array length=lookup_count): The array of lookups found for the query + * + * Fetches a list of all lookups enumerated for the specified feature, in + * the specified face's GSUB table or GPOS table, enabled at the specified + * variations index. The list returned will begin at the offset provided. + * + **/ +unsigned int +hb_ot_layout_feature_with_variations_get_lookups (hb_face_t *face, + hb_tag_t table_tag, + unsigned int feature_index, + unsigned int variations_index, + unsigned int start_offset, + unsigned int *lookup_count /* IN/OUT */, + unsigned int *lookup_indexes /* OUT */) +{ + static_assert ((OT::FeatureVariations::NOT_FOUND_INDEX == HB_OT_LAYOUT_NO_VARIATIONS_INDEX), ""); + const OT::GSUBGPOS &g = get_gsubgpos_table (face, table_tag); + + const OT::Feature &f = g.get_feature_variation (feature_index, variations_index); + + return f.get_lookup_indexes (start_offset, lookup_count, lookup_indexes); +} + + +/* + * OT::GSUB + */ + + +/** + * hb_ot_layout_has_substitution: + * @face: #hb_face_t to work upon + * + * Tests whether the specified face includes any GSUB substitutions. + * + * Return value: true if data found, false otherwise + * + **/ +hb_bool_t +hb_ot_layout_has_substitution (hb_face_t *face) +{ + return face->table.GSUB->table->has_data (); +} + + +/** + * hb_ot_layout_lookup_would_substitute: + * @face: #hb_face_t to work upon + * @lookup_index: The index of the lookup to query + * @glyphs: The sequence of glyphs to query for substitution + * @glyphs_length: The length of the glyph sequence + * @zero_context: #hb_bool_t indicating whether substitutions should be context-free + * + * Tests whether a specified lookup in the specified face would + * trigger a substitution on the given glyph sequence. + * + * Return value: true if a substitution would be triggered, false otherwise + * + * Since: 0.9.7 + **/ +hb_bool_t +hb_ot_layout_lookup_would_substitute (hb_face_t *face, + unsigned int lookup_index, + const hb_codepoint_t *glyphs, + unsigned int glyphs_length, + hb_bool_t zero_context) +{ + if (unlikely (lookup_index >= face->table.GSUB->lookup_count)) return false; + OT::hb_would_apply_context_t c (face, glyphs, glyphs_length, (bool) zero_context); + + const OT::SubstLookup& l = face->table.GSUB->table->get_lookup (lookup_index); + return l.would_apply (&c, &face->table.GSUB->accels[lookup_index]); +} + + +/** + * hb_ot_layout_substitute_start: + * @font: #hb_font_t to use + * @buffer: #hb_buffer_t buffer to work upon + * + * Called before substitution lookups are performed, to ensure that glyph + * class and other properties are set on the glyphs in the buffer. + * + **/ +void +hb_ot_layout_substitute_start (hb_font_t *font, + hb_buffer_t *buffer) +{ + _hb_ot_layout_set_glyph_props (font, buffer); +} + +void +hb_ot_layout_delete_glyphs_inplace (hb_buffer_t *buffer, + bool (*filter) (const hb_glyph_info_t *info)) +{ + /* Merge clusters and delete filtered glyphs. + * NOTE! We can't use out-buffer as we have positioning data. */ + unsigned int j = 0; + unsigned int count = buffer->len; + hb_glyph_info_t *info = buffer->info; + hb_glyph_position_t *pos = buffer->pos; + for (unsigned int i = 0; i < count; i++) + { + if (filter (&info[i])) + { + /* Merge clusters. + * Same logic as buffer->delete_glyph(), but for in-place removal. */ + + unsigned int cluster = info[i].cluster; + if (i + 1 < count && cluster == info[i + 1].cluster) + continue; /* Cluster survives; do nothing. */ + + if (j) + { + /* Merge cluster backward. */ + if (cluster < info[j - 1].cluster) + { + unsigned int mask = info[i].mask; + unsigned int old_cluster = info[j - 1].cluster; + for (unsigned k = j; k && info[k - 1].cluster == old_cluster; k--) + buffer->set_cluster (info[k - 1], cluster, mask); + } + continue; + } + + if (i + 1 < count) + buffer->merge_clusters (i, i + 2); /* Merge cluster forward. */ + + continue; + } + + if (j != i) + { + info[j] = info[i]; + pos[j] = pos[i]; + } + j++; + } + buffer->len = j; +} + +/** + * hb_ot_layout_lookup_substitute_closure: + * @face: #hb_face_t to work upon + * @lookup_index: index of the feature lookup to query + * @glyphs: (out): Array of glyphs comprising the transitive closure of the lookup + * + * Compute the transitive closure of glyphs needed for a + * specified lookup. + * + * Since: 0.9.7 + **/ +void +hb_ot_layout_lookup_substitute_closure (hb_face_t *face, + unsigned int lookup_index, + hb_set_t *glyphs /* OUT */) +{ + hb_map_t done_lookups; + OT::hb_closure_context_t c (face, glyphs, &done_lookups); + + const OT::SubstLookup& l = face->table.GSUB->table->get_lookup (lookup_index); + + l.closure (&c, lookup_index); +} + +/** + * hb_ot_layout_lookups_substitute_closure: + * @face: #hb_face_t to work upon + * @lookups: The set of lookups to query + * @glyphs: (out): Array of glyphs comprising the transitive closure of the lookups + * + * Compute the transitive closure of glyphs needed for all of the + * provided lookups. + * + * Since: 1.8.1 + **/ +void +hb_ot_layout_lookups_substitute_closure (hb_face_t *face, + const hb_set_t *lookups, + hb_set_t *glyphs /* OUT */) +{ + hb_map_t done_lookups; + OT::hb_closure_context_t c (face, glyphs, &done_lookups); + const OT::GSUB& gsub = *face->table.GSUB->table; + + unsigned int iteration_count = 0; + unsigned int glyphs_length; + do + { + glyphs_length = glyphs->get_population (); + if (lookups) + { + for (hb_codepoint_t lookup_index = HB_SET_VALUE_INVALID; hb_set_next (lookups, &lookup_index);) + gsub.get_lookup (lookup_index).closure (&c, lookup_index); + } + else + { + for (unsigned int i = 0; i < gsub.get_lookup_count (); i++) + gsub.get_lookup (i).closure (&c, i); + } + } while (iteration_count++ <= HB_CLOSURE_MAX_STAGES && + glyphs_length != glyphs->get_population ()); +} + +/* + * OT::GPOS + */ + + +/** + * hb_ot_layout_has_positioning: + * @face: #hb_face_t to work upon + * + * Return value: true if the face has GPOS data, false otherwise + * + **/ +hb_bool_t +hb_ot_layout_has_positioning (hb_face_t *face) +{ + return face->table.GPOS->table->has_data (); +} + +/** + * hb_ot_layout_position_start: + * @font: #hb_font_t to use + * @buffer: #hb_buffer_t buffer to work upon + * + * Called before positioning lookups are performed, to ensure that glyph + * attachment types and glyph-attachment chains are set for the glyphs in the buffer. + * + **/ +void +hb_ot_layout_position_start (hb_font_t *font, hb_buffer_t *buffer) +{ + OT::GPOS::position_start (font, buffer); +} + + +/** + * hb_ot_layout_position_finish_advances: + * @font: #hb_font_t to use + * @buffer: #hb_buffer_t buffer to work upon + * + * Called after positioning lookups are performed, to finish glyph advances. + * + **/ +void +hb_ot_layout_position_finish_advances (hb_font_t *font, hb_buffer_t *buffer) +{ + OT::GPOS::position_finish_advances (font, buffer); +} + +/** + * hb_ot_layout_position_finish_offsets: + * @font: #hb_font_t to use + * @buffer: #hb_buffer_t buffer to work upon + * + * Called after positioning lookups are performed, to finish glyph offsets. + * + **/ +void +hb_ot_layout_position_finish_offsets (hb_font_t *font, hb_buffer_t *buffer) +{ + OT::GPOS::position_finish_offsets (font, buffer); +} + + +#ifndef HB_NO_LAYOUT_FEATURE_PARAMS +/** + * hb_ot_layout_get_size_params: + * @face: #hb_face_t to work upon + * @design_size: (out): The design size of the face + * @subfamily_id: (out): The identifier of the face within the font subfamily + * @subfamily_name_id: (out): The ‘name’ table name ID of the face within the font subfamily + * @range_start: (out): The minimum size of the recommended size range for the face + * @range_end: (out): The maximum size of the recommended size range for the face + * + * Fetches optical-size feature data (i.e., the `size` feature from GPOS). Note that + * the subfamily_id and the subfamily name string (accessible via the subfamily_name_id) + * as used here are defined as pertaining only to fonts within a font family that differ + * specifically in their respective size ranges; other ways to differentiate fonts within + * a subfamily are not covered by the `size` feature. + * + * For more information on this distinction, see the [`size` feature documentation]( + * https://docs.microsoft.com/en-us/typography/opentype/spec/features_pt#tag-size). + * + * Return value: true if data found, false otherwise + * + * Since: 0.9.10 + **/ +hb_bool_t +hb_ot_layout_get_size_params (hb_face_t *face, + unsigned int *design_size, /* OUT. May be NULL */ + unsigned int *subfamily_id, /* OUT. May be NULL */ + hb_ot_name_id_t *subfamily_name_id, /* OUT. May be NULL */ + unsigned int *range_start, /* OUT. May be NULL */ + unsigned int *range_end /* OUT. May be NULL */) +{ + const OT::GPOS &gpos = *face->table.GPOS->table; + const hb_tag_t tag = HB_TAG ('s','i','z','e'); + + unsigned int num_features = gpos.get_feature_count (); + for (unsigned int i = 0; i < num_features; i++) + { + if (tag == gpos.get_feature_tag (i)) + { + const OT::Feature &f = gpos.get_feature (i); + const OT::FeatureParamsSize ¶ms = f.get_feature_params ().get_size_params (tag); + + if (params.designSize) + { + if (design_size) *design_size = params.designSize; + if (subfamily_id) *subfamily_id = params.subfamilyID; + if (subfamily_name_id) *subfamily_name_id = params.subfamilyNameID; + if (range_start) *range_start = params.rangeStart; + if (range_end) *range_end = params.rangeEnd; + + return true; + } + } + } + + if (design_size) *design_size = 0; + if (subfamily_id) *subfamily_id = 0; + if (subfamily_name_id) *subfamily_name_id = HB_OT_NAME_ID_INVALID; + if (range_start) *range_start = 0; + if (range_end) *range_end = 0; + + return false; +} +/** + * hb_ot_layout_feature_get_name_ids: + * @face: #hb_face_t to work upon + * @table_tag: table tag to query, "GSUB" or "GPOS". + * @feature_index: index of feature to query. + * @label_id: (out) (allow-none): The ‘name’ table name ID that specifies a string + * for a user-interface label for this feature. (May be NULL.) + * @tooltip_id: (out) (allow-none): The ‘name’ table name ID that specifies a string + * that an application can use for tooltip text for this + * feature. (May be NULL.) + * @sample_id: (out) (allow-none): The ‘name’ table name ID that specifies sample text + * that illustrates the effect of this feature. (May be NULL.) + * @num_named_parameters: (out) (allow-none): Number of named parameters. (May be zero.) + * @first_param_id: (out) (allow-none): The first ‘name’ table name ID used to specify + * strings for user-interface labels for the feature + * parameters. (Must be zero if numParameters is zero.) + * + * Fetches name indices from feature parameters for "Stylistic Set" ('ssXX') or + * "Character Variant" ('cvXX') features. + * + * Return value: true if data found, false otherwise + * + * Since: 2.0.0 + **/ +hb_bool_t +hb_ot_layout_feature_get_name_ids (hb_face_t *face, + hb_tag_t table_tag, + unsigned int feature_index, + hb_ot_name_id_t *label_id, /* OUT. May be NULL */ + hb_ot_name_id_t *tooltip_id, /* OUT. May be NULL */ + hb_ot_name_id_t *sample_id, /* OUT. May be NULL */ + unsigned int *num_named_parameters, /* OUT. May be NULL */ + hb_ot_name_id_t *first_param_id /* OUT. May be NULL */) +{ + const OT::GSUBGPOS &g = get_gsubgpos_table (face, table_tag); + + hb_tag_t feature_tag = g.get_feature_tag (feature_index); + const OT::Feature &f = g.get_feature (feature_index); + + const OT::FeatureParams &feature_params = f.get_feature_params (); + if (&feature_params != &Null (OT::FeatureParams)) + { + const OT::FeatureParamsStylisticSet& ss_params = + feature_params.get_stylistic_set_params (feature_tag); + if (&ss_params != &Null (OT::FeatureParamsStylisticSet)) /* ssXX */ + { + if (label_id) *label_id = ss_params.uiNameID; + // ssXX features don't have the rest + if (tooltip_id) *tooltip_id = HB_OT_NAME_ID_INVALID; + if (sample_id) *sample_id = HB_OT_NAME_ID_INVALID; + if (num_named_parameters) *num_named_parameters = 0; + if (first_param_id) *first_param_id = HB_OT_NAME_ID_INVALID; + return true; + } + const OT::FeatureParamsCharacterVariants& cv_params = + feature_params.get_character_variants_params (feature_tag); + if (&cv_params != &Null (OT::FeatureParamsCharacterVariants)) /* cvXX */ + { + if (label_id) *label_id = cv_params.featUILableNameID; + if (tooltip_id) *tooltip_id = cv_params.featUITooltipTextNameID; + if (sample_id) *sample_id = cv_params.sampleTextNameID; + if (num_named_parameters) *num_named_parameters = cv_params.numNamedParameters; + if (first_param_id) *first_param_id = cv_params.firstParamUILabelNameID; + return true; + } + } + + if (label_id) *label_id = HB_OT_NAME_ID_INVALID; + if (tooltip_id) *tooltip_id = HB_OT_NAME_ID_INVALID; + if (sample_id) *sample_id = HB_OT_NAME_ID_INVALID; + if (num_named_parameters) *num_named_parameters = 0; + if (first_param_id) *first_param_id = HB_OT_NAME_ID_INVALID; + return false; +} +/** + * hb_ot_layout_feature_get_characters: + * @face: #hb_face_t to work upon + * @table_tag: table tag to query, "GSUB" or "GPOS". + * @feature_index: index of feature to query. + * @start_offset: offset of the first character to retrieve + * @char_count: (inout) (allow-none): Input = the maximum number of characters to return; + * Output = the actual number of characters returned (may be zero) + * @characters: (out caller-allocates) (array length=char_count): A buffer pointer. + * The Unicode codepoints of the characters for which this feature provides + * glyph variants. + * + * Fetches a list of the characters defined as having a variant under the specified + * "Character Variant" ("cvXX") feature tag. + * + * Return value: Number of total sample characters in the cvXX feature. + * + * Since: 2.0.0 + **/ +unsigned int +hb_ot_layout_feature_get_characters (hb_face_t *face, + hb_tag_t table_tag, + unsigned int feature_index, + unsigned int start_offset, + unsigned int *char_count, /* IN/OUT. May be NULL */ + hb_codepoint_t *characters /* OUT. May be NULL */) +{ + const OT::GSUBGPOS &g = get_gsubgpos_table (face, table_tag); + return g.get_feature (feature_index) + .get_feature_params () + .get_character_variants_params(g.get_feature_tag (feature_index)) + .get_characters (start_offset, char_count, characters); +} +#endif + + +/* + * Parts of different types are implemented here such that they have direct + * access to GSUB/GPOS lookups. + */ + + +struct GSUBProxy +{ + static constexpr unsigned table_index = 0u; + static constexpr bool inplace = false; + typedef OT::SubstLookup Lookup; + + GSUBProxy (hb_face_t *face) : + table (*face->table.GSUB->table), + accels (face->table.GSUB->accels) {} + + const OT::GSUB &table; + const OT::hb_ot_layout_lookup_accelerator_t *accels; +}; + +struct GPOSProxy +{ + static constexpr unsigned table_index = 1u; + static constexpr bool inplace = true; + typedef OT::PosLookup Lookup; + + GPOSProxy (hb_face_t *face) : + table (*face->table.GPOS->table), + accels (face->table.GPOS->accels) {} + + const OT::GPOS &table; + const OT::hb_ot_layout_lookup_accelerator_t *accels; +}; + + +static inline bool +apply_forward (OT::hb_ot_apply_context_t *c, + const OT::hb_ot_layout_lookup_accelerator_t &accel) +{ + bool ret = false; + hb_buffer_t *buffer = c->buffer; + while (buffer->idx < buffer->len && buffer->successful) + { + bool applied = false; + if (accel.may_have (buffer->cur().codepoint) && + (buffer->cur().mask & c->lookup_mask) && + c->check_glyph_property (&buffer->cur(), c->lookup_props)) + { + applied = accel.apply (c); + } + + if (applied) + ret = true; + else + buffer->next_glyph (); + } + return ret; +} + +static inline bool +apply_backward (OT::hb_ot_apply_context_t *c, + const OT::hb_ot_layout_lookup_accelerator_t &accel) +{ + bool ret = false; + hb_buffer_t *buffer = c->buffer; + do + { + if (accel.may_have (buffer->cur().codepoint) && + (buffer->cur().mask & c->lookup_mask) && + c->check_glyph_property (&buffer->cur(), c->lookup_props)) + ret |= accel.apply (c); + + /* The reverse lookup doesn't "advance" cursor (for good reason). */ + buffer->idx--; + + } + while ((int) buffer->idx >= 0); + return ret; +} + +template <typename Proxy> +static inline void +apply_string (OT::hb_ot_apply_context_t *c, + const typename Proxy::Lookup &lookup, + const OT::hb_ot_layout_lookup_accelerator_t &accel) +{ + hb_buffer_t *buffer = c->buffer; + + if (unlikely (!buffer->len || !c->lookup_mask)) + return; + + c->set_lookup_props (lookup.get_props ()); + + if (likely (!lookup.is_reverse ())) + { + /* in/out forward substitution/positioning */ + if (Proxy::table_index == 0u) + buffer->clear_output (); + buffer->idx = 0; + + bool ret; + ret = apply_forward (c, accel); + if (ret) + { + if (!Proxy::inplace) + buffer->swap_buffers (); + else + assert (!buffer->has_separate_output ()); + } + } + else + { + /* in-place backward substitution/positioning */ + if (Proxy::table_index == 0u) + buffer->remove_output (); + buffer->idx = buffer->len - 1; + + apply_backward (c, accel); + } +} + +template <typename Proxy> +inline void hb_ot_map_t::apply (const Proxy &proxy, + const hb_ot_shape_plan_t *plan, + hb_font_t *font, + hb_buffer_t *buffer) const +{ + const unsigned int table_index = proxy.table_index; + unsigned int i = 0; + OT::hb_ot_apply_context_t c (table_index, font, buffer); + c.set_recurse_func (Proxy::Lookup::apply_recurse_func); + + for (unsigned int stage_index = 0; stage_index < stages[table_index].length; stage_index++) { + const stage_map_t *stage = &stages[table_index][stage_index]; + for (; i < stage->last_lookup; i++) + { + unsigned int lookup_index = lookups[table_index][i].index; + if (!buffer->message (font, "start lookup %d", lookup_index)) continue; + c.set_lookup_index (lookup_index); + c.set_lookup_mask (lookups[table_index][i].mask); + c.set_auto_zwj (lookups[table_index][i].auto_zwj); + c.set_auto_zwnj (lookups[table_index][i].auto_zwnj); + if (lookups[table_index][i].random) + { + c.set_random (true); + buffer->unsafe_to_break_all (); + } + apply_string<Proxy> (&c, + proxy.table.get_lookup (lookup_index), + proxy.accels[lookup_index]); + (void) buffer->message (font, "end lookup %d", lookup_index); + } + + if (stage->pause_func) + { + buffer->clear_output (); + stage->pause_func (plan, font, buffer); + } + } +} + +void hb_ot_map_t::substitute (const hb_ot_shape_plan_t *plan, hb_font_t *font, hb_buffer_t *buffer) const +{ + GSUBProxy proxy (font->face); + if (!buffer->message (font, "start table GSUB")) return; + apply (proxy, plan, font, buffer); + (void)buffer->message (font, "end table GSUB"); +} + +void hb_ot_map_t::position (const hb_ot_shape_plan_t *plan, hb_font_t *font, hb_buffer_t *buffer) const +{ + GPOSProxy proxy (font->face); + if (!buffer->message (font, "start table GPOS")) return; + apply (proxy, plan, font, buffer); + (void)buffer->message (font, "end table GPOS"); +} + +void +hb_ot_layout_substitute_lookup (OT::hb_ot_apply_context_t *c, + const OT::SubstLookup &lookup, + const OT::hb_ot_layout_lookup_accelerator_t &accel) +{ + apply_string<GSUBProxy> (c, lookup, accel); +} + +#ifndef HB_NO_BASE +/** + * hb_ot_layout_get_baseline: + * @font: a font + * @baseline_tag: a baseline tag + * @direction: text direction. + * @script_tag: script tag. + * @language_tag: language tag. + * @coord: (out): baseline value if found. + * + * Fetches a baseline value from the face. + * + * Return value: if found baseline value in the font. + * + * Since: 2.6.0 + **/ +hb_bool_t +hb_ot_layout_get_baseline (hb_font_t *font, + hb_ot_layout_baseline_tag_t baseline_tag, + hb_direction_t direction, + hb_tag_t script_tag, + hb_tag_t language_tag, + hb_position_t *coord /* OUT. May be NULL. */) +{ + bool result = font->face->table.BASE->get_baseline (font, baseline_tag, direction, script_tag, language_tag, coord); + + if (result && coord) + *coord = HB_DIRECTION_IS_HORIZONTAL (direction) ? font->em_scale_y (*coord) : font->em_scale_x (*coord); + + return result; +} +#endif + + +struct hb_get_glyph_alternates_dispatch_t : + hb_dispatch_context_t<hb_get_glyph_alternates_dispatch_t, unsigned> +{ + static return_t default_return_value () { return 0; } + bool stop_sublookup_iteration (return_t r) const { return r; } + + hb_face_t *face; + + hb_get_glyph_alternates_dispatch_t (hb_face_t *face) : + face (face) {} + + private: + template <typename T, typename ...Ts> auto + _dispatch (const T &obj, hb_priority<1>, Ts&&... ds) HB_AUTO_RETURN + ( obj.get_glyph_alternates (hb_forward<Ts> (ds)...) ) + template <typename T, typename ...Ts> auto + _dispatch (const T &obj, hb_priority<0>, Ts&&... ds) HB_AUTO_RETURN + ( default_return_value () ) + public: + template <typename T, typename ...Ts> auto + dispatch (const T &obj, Ts&&... ds) HB_AUTO_RETURN + ( _dispatch (obj, hb_prioritize, hb_forward<Ts> (ds)...) ) +}; + +/** + * hb_ot_layout_lookup_get_glyph_alternates: + * @face: a face. + * @lookup_index: index of the feature lookup to query. + * @glyph: a glyph id. + * @start_offset: starting offset. + * @alternate_count: (inout) (allow-none): Input = the maximum number of alternate glyphs to return; + * Output = the actual number of alternate glyphs returned (may be zero). + * @alternate_glyphs: (out caller-allocates) (array length=alternate_count): A glyphs buffer. + * Alternate glyphs associated with the glyph id. + * + * Fetches alternates of a glyph from a given GSUB lookup index. + * + * Return value: total number of alternates found in the specific lookup index for the given glyph id. + * + * Since: 2.6.8 + **/ +HB_EXTERN unsigned +hb_ot_layout_lookup_get_glyph_alternates (hb_face_t *face, + unsigned lookup_index, + hb_codepoint_t glyph, + unsigned start_offset, + unsigned *alternate_count /* IN/OUT. May be NULL. */, + hb_codepoint_t *alternate_glyphs /* OUT. May be NULL. */) +{ + hb_get_glyph_alternates_dispatch_t c (face); + const OT::SubstLookup &lookup = face->table.GSUB->table->get_lookup (lookup_index); + auto ret = lookup.dispatch (&c, glyph, start_offset, alternate_count, alternate_glyphs); + if (!ret && alternate_count) *alternate_count = 0; + return ret; +} + +#endif diff --git a/thirdparty/harfbuzz/src/hb-ot-layout.h b/thirdparty/harfbuzz/src/hb-ot-layout.h new file mode 100644 index 0000000000..545d5f7fc4 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-layout.h @@ -0,0 +1,462 @@ +/* + * Copyright © 2007,2008,2009 Red Hat, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Red Hat Author(s): Behdad Esfahbod + */ + +#ifndef HB_OT_H_IN +#error "Include <hb-ot.h> instead." +#endif + +#ifndef HB_OT_LAYOUT_H +#define HB_OT_LAYOUT_H + +#include "hb.h" + +#include "hb-ot-name.h" + +HB_BEGIN_DECLS + + +#define HB_OT_TAG_BASE HB_TAG('B','A','S','E') +#define HB_OT_TAG_GDEF HB_TAG('G','D','E','F') +#define HB_OT_TAG_GSUB HB_TAG('G','S','U','B') +#define HB_OT_TAG_GPOS HB_TAG('G','P','O','S') +#define HB_OT_TAG_JSTF HB_TAG('J','S','T','F') + + +/* + * Script & Language tags. + */ + +#define HB_OT_TAG_DEFAULT_SCRIPT HB_TAG ('D', 'F', 'L', 'T') +#define HB_OT_TAG_DEFAULT_LANGUAGE HB_TAG ('d', 'f', 'l', 't') + +/** + * HB_OT_MAX_TAGS_PER_SCRIPT: + * + * Since: 2.0.0 + **/ +#define HB_OT_MAX_TAGS_PER_SCRIPT 3u +/** + * HB_OT_MAX_TAGS_PER_LANGUAGE: + * + * Since: 2.0.0 + **/ +#define HB_OT_MAX_TAGS_PER_LANGUAGE 3u + +HB_EXTERN void +hb_ot_tags_from_script_and_language (hb_script_t script, + hb_language_t language, + unsigned int *script_count /* IN/OUT */, + hb_tag_t *script_tags /* OUT */, + unsigned int *language_count /* IN/OUT */, + hb_tag_t *language_tags /* OUT */); + +HB_EXTERN hb_script_t +hb_ot_tag_to_script (hb_tag_t tag); + +HB_EXTERN hb_language_t +hb_ot_tag_to_language (hb_tag_t tag); + +HB_EXTERN void +hb_ot_tags_to_script_and_language (hb_tag_t script_tag, + hb_tag_t language_tag, + hb_script_t *script /* OUT */, + hb_language_t *language /* OUT */); + + +/* + * GDEF + */ + +HB_EXTERN hb_bool_t +hb_ot_layout_has_glyph_classes (hb_face_t *face); + +/** + * hb_ot_layout_glyph_class_t: + * @HB_OT_LAYOUT_GLYPH_CLASS_UNCLASSIFIED: Glyphs not matching the other classifications + * @HB_OT_LAYOUT_GLYPH_CLASS_BASE_GLYPH: Spacing, single characters, capable of accepting marks + * @HB_OT_LAYOUT_GLYPH_CLASS_LIGATURE: Glyphs that represent ligation of multiple characters + * @HB_OT_LAYOUT_GLYPH_CLASS_MARK: Non-spacing, combining glyphs that represent marks + * @HB_OT_LAYOUT_GLYPH_CLASS_COMPONENT: Spacing glyphs that represent part of a single character + * + * The GDEF classes defined for glyphs. + * + **/ +typedef enum { + HB_OT_LAYOUT_GLYPH_CLASS_UNCLASSIFIED = 0, + HB_OT_LAYOUT_GLYPH_CLASS_BASE_GLYPH = 1, + HB_OT_LAYOUT_GLYPH_CLASS_LIGATURE = 2, + HB_OT_LAYOUT_GLYPH_CLASS_MARK = 3, + HB_OT_LAYOUT_GLYPH_CLASS_COMPONENT = 4 +} hb_ot_layout_glyph_class_t; + +HB_EXTERN hb_ot_layout_glyph_class_t +hb_ot_layout_get_glyph_class (hb_face_t *face, + hb_codepoint_t glyph); + +HB_EXTERN void +hb_ot_layout_get_glyphs_in_class (hb_face_t *face, + hb_ot_layout_glyph_class_t klass, + hb_set_t *glyphs /* OUT */); + +/* Not that useful. Provides list of attach points for a glyph that a + * client may want to cache */ +HB_EXTERN unsigned int +hb_ot_layout_get_attach_points (hb_face_t *face, + hb_codepoint_t glyph, + unsigned int start_offset, + unsigned int *point_count /* IN/OUT */, + unsigned int *point_array /* OUT */); + +/* Ligature caret positions */ +HB_EXTERN unsigned int +hb_ot_layout_get_ligature_carets (hb_font_t *font, + hb_direction_t direction, + hb_codepoint_t glyph, + unsigned int start_offset, + unsigned int *caret_count /* IN/OUT */, + hb_position_t *caret_array /* OUT */); + + +/* + * GSUB/GPOS feature query and enumeration interface + */ + +#define HB_OT_LAYOUT_NO_SCRIPT_INDEX 0xFFFFu +#define HB_OT_LAYOUT_NO_FEATURE_INDEX 0xFFFFu +#define HB_OT_LAYOUT_DEFAULT_LANGUAGE_INDEX 0xFFFFu +#define HB_OT_LAYOUT_NO_VARIATIONS_INDEX 0xFFFFFFFFu + +HB_EXTERN unsigned int +hb_ot_layout_table_get_script_tags (hb_face_t *face, + hb_tag_t table_tag, + unsigned int start_offset, + unsigned int *script_count /* IN/OUT */, + hb_tag_t *script_tags /* OUT */); + +HB_EXTERN hb_bool_t +hb_ot_layout_table_find_script (hb_face_t *face, + hb_tag_t table_tag, + hb_tag_t script_tag, + unsigned int *script_index /* OUT */); + +HB_EXTERN hb_bool_t +hb_ot_layout_table_select_script (hb_face_t *face, + hb_tag_t table_tag, + unsigned int script_count, + const hb_tag_t *script_tags, + unsigned int *script_index /* OUT */, + hb_tag_t *chosen_script /* OUT */); + +HB_EXTERN unsigned int +hb_ot_layout_table_get_feature_tags (hb_face_t *face, + hb_tag_t table_tag, + unsigned int start_offset, + unsigned int *feature_count /* IN/OUT */, + hb_tag_t *feature_tags /* OUT */); + +HB_EXTERN unsigned int +hb_ot_layout_script_get_language_tags (hb_face_t *face, + hb_tag_t table_tag, + unsigned int script_index, + unsigned int start_offset, + unsigned int *language_count /* IN/OUT */, + hb_tag_t *language_tags /* OUT */); + +HB_EXTERN hb_bool_t +hb_ot_layout_script_select_language (hb_face_t *face, + hb_tag_t table_tag, + unsigned int script_index, + unsigned int language_count, + const hb_tag_t *language_tags, + unsigned int *language_index /* OUT */); + +HB_EXTERN hb_bool_t +hb_ot_layout_language_get_required_feature_index (hb_face_t *face, + hb_tag_t table_tag, + unsigned int script_index, + unsigned int language_index, + unsigned int *feature_index /* OUT */); + +HB_EXTERN hb_bool_t +hb_ot_layout_language_get_required_feature (hb_face_t *face, + hb_tag_t table_tag, + unsigned int script_index, + unsigned int language_index, + unsigned int *feature_index /* OUT */, + hb_tag_t *feature_tag /* OUT */); + +HB_EXTERN unsigned int +hb_ot_layout_language_get_feature_indexes (hb_face_t *face, + hb_tag_t table_tag, + unsigned int script_index, + unsigned int language_index, + unsigned int start_offset, + unsigned int *feature_count /* IN/OUT */, + unsigned int *feature_indexes /* OUT */); + +HB_EXTERN unsigned int +hb_ot_layout_language_get_feature_tags (hb_face_t *face, + hb_tag_t table_tag, + unsigned int script_index, + unsigned int language_index, + unsigned int start_offset, + unsigned int *feature_count /* IN/OUT */, + hb_tag_t *feature_tags /* OUT */); + +HB_EXTERN hb_bool_t +hb_ot_layout_language_find_feature (hb_face_t *face, + hb_tag_t table_tag, + unsigned int script_index, + unsigned int language_index, + hb_tag_t feature_tag, + unsigned int *feature_index /* OUT */); + +HB_EXTERN unsigned int +hb_ot_layout_feature_get_lookups (hb_face_t *face, + hb_tag_t table_tag, + unsigned int feature_index, + unsigned int start_offset, + unsigned int *lookup_count /* IN/OUT */, + unsigned int *lookup_indexes /* OUT */); + +HB_EXTERN unsigned int +hb_ot_layout_table_get_lookup_count (hb_face_t *face, + hb_tag_t table_tag); + +HB_EXTERN void +hb_ot_layout_collect_features (hb_face_t *face, + hb_tag_t table_tag, + const hb_tag_t *scripts, + const hb_tag_t *languages, + const hb_tag_t *features, + hb_set_t *feature_indexes /* OUT */); + +HB_EXTERN void +hb_ot_layout_collect_lookups (hb_face_t *face, + hb_tag_t table_tag, + const hb_tag_t *scripts, + const hb_tag_t *languages, + const hb_tag_t *features, + hb_set_t *lookup_indexes /* OUT */); + +HB_EXTERN void +hb_ot_layout_lookup_collect_glyphs (hb_face_t *face, + hb_tag_t table_tag, + unsigned int lookup_index, + hb_set_t *glyphs_before, /* OUT. May be NULL */ + hb_set_t *glyphs_input, /* OUT. May be NULL */ + hb_set_t *glyphs_after, /* OUT. May be NULL */ + hb_set_t *glyphs_output /* OUT. May be NULL */); + +#ifdef HB_NOT_IMPLEMENTED +typedef struct +{ + const hb_codepoint_t *before, + unsigned int before_length, + const hb_codepoint_t *input, + unsigned int input_length, + const hb_codepoint_t *after, + unsigned int after_length, +} hb_ot_layout_glyph_sequence_t; + +typedef hb_bool_t +(*hb_ot_layout_glyph_sequence_func_t) (hb_font_t *font, + hb_tag_t table_tag, + unsigned int lookup_index, + const hb_ot_layout_glyph_sequence_t *sequence, + void *user_data); + +HB_EXTERN void +Xhb_ot_layout_lookup_enumerate_sequences (hb_face_t *face, + hb_tag_t table_tag, + unsigned int lookup_index, + hb_ot_layout_glyph_sequence_func_t callback, + void *user_data); +#endif + +/* Variations support */ + +HB_EXTERN hb_bool_t +hb_ot_layout_table_find_feature_variations (hb_face_t *face, + hb_tag_t table_tag, + const int *coords, + unsigned int num_coords, + unsigned int *variations_index /* out */); + +HB_EXTERN unsigned int +hb_ot_layout_feature_with_variations_get_lookups (hb_face_t *face, + hb_tag_t table_tag, + unsigned int feature_index, + unsigned int variations_index, + unsigned int start_offset, + unsigned int *lookup_count /* IN/OUT */, + unsigned int *lookup_indexes /* OUT */); + + +/* + * GSUB + */ + +HB_EXTERN hb_bool_t +hb_ot_layout_has_substitution (hb_face_t *face); + +HB_EXTERN unsigned +hb_ot_layout_lookup_get_glyph_alternates (hb_face_t *face, + unsigned lookup_index, + hb_codepoint_t glyph, + unsigned start_offset, + unsigned *alternate_count /* IN/OUT */, + hb_codepoint_t *alternate_glyphs /* OUT */); + +HB_EXTERN hb_bool_t +hb_ot_layout_lookup_would_substitute (hb_face_t *face, + unsigned int lookup_index, + const hb_codepoint_t *glyphs, + unsigned int glyphs_length, + hb_bool_t zero_context); + +HB_EXTERN void +hb_ot_layout_lookup_substitute_closure (hb_face_t *face, + unsigned int lookup_index, + hb_set_t *glyphs + /*TODO , hb_bool_t inclusive */); + +HB_EXTERN void +hb_ot_layout_lookups_substitute_closure (hb_face_t *face, + const hb_set_t *lookups, + hb_set_t *glyphs); + + +#ifdef HB_NOT_IMPLEMENTED +/* Note: You better have GDEF when using this API, or marks won't do much. */ +HB_EXTERN hb_bool_t +Xhb_ot_layout_lookup_substitute (hb_font_t *font, + unsigned int lookup_index, + const hb_ot_layout_glyph_sequence_t *sequence, + unsigned int out_size, + hb_codepoint_t *glyphs_out, /* OUT */ + unsigned int *clusters_out, /* OUT */ + unsigned int *out_length /* OUT */); +#endif + + +/* + * GPOS + */ + +HB_EXTERN hb_bool_t +hb_ot_layout_has_positioning (hb_face_t *face); + +#ifdef HB_NOT_IMPLEMENTED +/* Note: You better have GDEF when using this API, or marks won't do much. */ +HB_EXTERN hb_bool_t +Xhb_ot_layout_lookup_position (hb_font_t *font, + unsigned int lookup_index, + const hb_ot_layout_glyph_sequence_t *sequence, + hb_glyph_position_t *positions /* IN / OUT */); +#endif + +/* Optical 'size' feature info. Returns true if found. + * https://docs.microsoft.com/en-us/typography/opentype/spec/features_pt#size */ +HB_EXTERN hb_bool_t +hb_ot_layout_get_size_params (hb_face_t *face, + unsigned int *design_size, /* OUT. May be NULL */ + unsigned int *subfamily_id, /* OUT. May be NULL */ + hb_ot_name_id_t *subfamily_name_id, /* OUT. May be NULL */ + unsigned int *range_start, /* OUT. May be NULL */ + unsigned int *range_end /* OUT. May be NULL */); + + +HB_EXTERN hb_bool_t +hb_ot_layout_feature_get_name_ids (hb_face_t *face, + hb_tag_t table_tag, + unsigned int feature_index, + hb_ot_name_id_t *label_id /* OUT. May be NULL */, + hb_ot_name_id_t *tooltip_id /* OUT. May be NULL */, + hb_ot_name_id_t *sample_id /* OUT. May be NULL */, + unsigned int *num_named_parameters /* OUT. May be NULL */, + hb_ot_name_id_t *first_param_id /* OUT. May be NULL */); + + +HB_EXTERN unsigned int +hb_ot_layout_feature_get_characters (hb_face_t *face, + hb_tag_t table_tag, + unsigned int feature_index, + unsigned int start_offset, + unsigned int *char_count /* IN/OUT. May be NULL */, + hb_codepoint_t *characters /* OUT. May be NULL */); + +/* + * BASE + */ + +/** + * hb_ot_layout_baseline_tag_t: + * @HB_OT_LAYOUT_BASELINE_TAG_ROMAN: The baseline used by alphabetic scripts such as Latin, Cyrillic and Greek. + * In vertical writing mode, the alphabetic baseline for characters rotated 90 degrees clockwise. + * (This would not apply to alphabetic characters that remain upright in vertical writing mode, since these + * characters are not rotated.) + * @HB_OT_LAYOUT_BASELINE_TAG_HANGING: The hanging baseline. In horizontal direction, this is the horizontal + * line from which syllables seem, to hang in Tibetan and other similar scripts. In vertical writing mode, + * for Tibetan (or some other similar script) characters rotated 90 degrees clockwise. + * @HB_OT_LAYOUT_BASELINE_TAG_IDEO_FACE_BOTTOM_OR_LEFT: Ideographic character face bottom or left edge, + * if the direction is horizontal or vertical, respectively. + * @HB_OT_LAYOUT_BASELINE_TAG_IDEO_FACE_TOP_OR_RIGHT: Ideographic character face top or right edge, + * if the direction is horizontal or vertical, respectively. + * @HB_OT_LAYOUT_BASELINE_TAG_IDEO_EMBOX_BOTTOM_OR_LEFT: Ideographic em-box bottom or left edge, + * if the direction is horizontal or vertical, respectively. + * @HB_OT_LAYOUT_BASELINE_TAG_IDEO_EMBOX_TOP_OR_RIGHT: Ideographic em-box top or right edge baseline, + * if the direction is horizontal or vertical, respectively. + * @HB_OT_LAYOUT_BASELINE_TAG_MATH: The baseline about which mathematical characters are centered. + * In vertical writing mode when mathematical characters rotated 90 degrees clockwise, are centered. + * + * Baseline tags from https://docs.microsoft.com/en-us/typography/opentype/spec/baselinetags + * + * Since: 2.6.0 + */ +typedef enum { + HB_OT_LAYOUT_BASELINE_TAG_ROMAN = HB_TAG ('r','o','m','n'), + HB_OT_LAYOUT_BASELINE_TAG_HANGING = HB_TAG ('h','a','n','g'), + HB_OT_LAYOUT_BASELINE_TAG_IDEO_FACE_BOTTOM_OR_LEFT = HB_TAG ('i','c','f','b'), + HB_OT_LAYOUT_BASELINE_TAG_IDEO_FACE_TOP_OR_RIGHT = HB_TAG ('i','c','f','t'), + HB_OT_LAYOUT_BASELINE_TAG_IDEO_EMBOX_BOTTOM_OR_LEFT = HB_TAG ('i','d','e','o'), + HB_OT_LAYOUT_BASELINE_TAG_IDEO_EMBOX_TOP_OR_RIGHT = HB_TAG ('i','d','t','p'), + HB_OT_LAYOUT_BASELINE_TAG_MATH = HB_TAG ('m','a','t','h'), + + _HB_OT_LAYOUT_BASELINE_TAG_MAX_VALUE = HB_TAG_MAX_SIGNED /*< skip >*/ +} hb_ot_layout_baseline_tag_t; + +HB_EXTERN hb_bool_t +hb_ot_layout_get_baseline (hb_font_t *font, + hb_ot_layout_baseline_tag_t baseline_tag, + hb_direction_t direction, + hb_tag_t script_tag, + hb_tag_t language_tag, + hb_position_t *coord /* OUT. May be NULL. */); + +HB_END_DECLS + +#endif /* HB_OT_LAYOUT_H */ diff --git a/thirdparty/harfbuzz/src/hb-ot-layout.hh b/thirdparty/harfbuzz/src/hb-ot-layout.hh new file mode 100644 index 0000000000..f3bb15581a --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-layout.hh @@ -0,0 +1,627 @@ +/* + * Copyright © 2007,2008,2009 Red Hat, Inc. + * Copyright © 2012,2013 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Red Hat Author(s): Behdad Esfahbod + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_OT_LAYOUT_HH +#define HB_OT_LAYOUT_HH + +#include "hb.hh" + +#include "hb-font.hh" +#include "hb-buffer.hh" +#include "hb-open-type.hh" +#include "hb-ot-shape.hh" +#include "hb-set-digest.hh" + + +struct hb_ot_shape_plan_t; + + +/* + * kern + */ + +HB_INTERNAL bool +hb_ot_layout_has_kerning (hb_face_t *face); + +HB_INTERNAL bool +hb_ot_layout_has_machine_kerning (hb_face_t *face); + +HB_INTERNAL bool +hb_ot_layout_has_cross_kerning (hb_face_t *face); + +HB_INTERNAL void +hb_ot_layout_kern (const hb_ot_shape_plan_t *plan, + hb_font_t *font, + hb_buffer_t *buffer); + + +/* Private API corresponding to hb-ot-layout.h: */ + +HB_INTERNAL bool +hb_ot_layout_table_find_feature (hb_face_t *face, + hb_tag_t table_tag, + hb_tag_t feature_tag, + unsigned int *feature_index); + + +/* + * GDEF + */ + +enum hb_ot_layout_glyph_props_flags_t +{ + /* The following three match LookupFlags::Ignore* numbers. */ + HB_OT_LAYOUT_GLYPH_PROPS_BASE_GLYPH = 0x02u, + HB_OT_LAYOUT_GLYPH_PROPS_LIGATURE = 0x04u, + HB_OT_LAYOUT_GLYPH_PROPS_MARK = 0x08u, + + /* The following are used internally; not derived from GDEF. */ + HB_OT_LAYOUT_GLYPH_PROPS_SUBSTITUTED = 0x10u, + HB_OT_LAYOUT_GLYPH_PROPS_LIGATED = 0x20u, + HB_OT_LAYOUT_GLYPH_PROPS_MULTIPLIED = 0x40u, + + HB_OT_LAYOUT_GLYPH_PROPS_PRESERVE = HB_OT_LAYOUT_GLYPH_PROPS_SUBSTITUTED | + HB_OT_LAYOUT_GLYPH_PROPS_LIGATED | + HB_OT_LAYOUT_GLYPH_PROPS_MULTIPLIED +}; +HB_MARK_AS_FLAG_T (hb_ot_layout_glyph_props_flags_t); + + +/* + * GSUB/GPOS + */ + + +/* Should be called before all the substitute_lookup's are done. */ +HB_INTERNAL void +hb_ot_layout_substitute_start (hb_font_t *font, + hb_buffer_t *buffer); + +HB_INTERNAL void +hb_ot_layout_delete_glyphs_inplace (hb_buffer_t *buffer, + bool (*filter) (const hb_glyph_info_t *info)); + +namespace OT { + struct hb_ot_apply_context_t; + struct SubstLookup; + struct hb_ot_layout_lookup_accelerator_t; +} + +HB_INTERNAL void +hb_ot_layout_substitute_lookup (OT::hb_ot_apply_context_t *c, + const OT::SubstLookup &lookup, + const OT::hb_ot_layout_lookup_accelerator_t &accel); + + +/* Should be called before all the position_lookup's are done. */ +HB_INTERNAL void +hb_ot_layout_position_start (hb_font_t *font, + hb_buffer_t *buffer); + +/* Should be called after all the position_lookup's are done, to fini advances. */ +HB_INTERNAL void +hb_ot_layout_position_finish_advances (hb_font_t *font, + hb_buffer_t *buffer); + +/* Should be called after hb_ot_layout_position_finish_advances, to fini offsets. */ +HB_INTERNAL void +hb_ot_layout_position_finish_offsets (hb_font_t *font, + hb_buffer_t *buffer); + + +/* + * Buffer var routines. + */ + +/* buffer var allocations, used during the entire shaping process */ +#define unicode_props() var2.u16[0] + +/* buffer var allocations, used during the GSUB/GPOS processing */ +#define glyph_props() var1.u16[0] /* GDEF glyph properties */ +#define lig_props() var1.u8[2] /* GSUB/GPOS ligature tracking */ +#define syllable() var1.u8[3] /* GSUB/GPOS shaping boundaries */ + + +/* Loop over syllables. Based on foreach_cluster(). */ +#define foreach_syllable(buffer, start, end) \ + for (unsigned int \ + _count = buffer->len, \ + start = 0, end = _count ? _hb_next_syllable (buffer, 0) : 0; \ + start < _count; \ + start = end, end = _hb_next_syllable (buffer, start)) + +static inline unsigned int +_hb_next_syllable (hb_buffer_t *buffer, unsigned int start) +{ + hb_glyph_info_t *info = buffer->info; + unsigned int count = buffer->len; + + unsigned int syllable = info[start].syllable(); + while (++start < count && syllable == info[start].syllable()) + ; + + return start; +} + +static inline void +_hb_clear_syllables (const hb_ot_shape_plan_t *plan HB_UNUSED, + hb_font_t *font HB_UNUSED, + hb_buffer_t *buffer) +{ + hb_glyph_info_t *info = buffer->info; + unsigned int count = buffer->len; + for (unsigned int i = 0; i < count; i++) + info[i].syllable() = 0; +} + + +/* unicode_props */ + +/* Design: + * unicode_props() is a two-byte number. The low byte includes: + * - General_Category: 5 bits. + * - A bit each for: + * * Is it Default_Ignorable(); we have a modified Default_Ignorable(). + * * Whether it's one of the three Mongolian Free Variation Selectors, + * CGJ, or other characters that are hidden but should not be ignored + * like most other Default_Ignorable()s do during matching. + * * Whether it's a grapheme continuation. + * + * The high-byte has different meanings, switched by the Gen-Cat: + * - For Mn,Mc,Me: the modified Combining_Class. + * - For Cf: whether it's ZWJ, ZWNJ, or something else. + * - For Ws: index of which space character this is, if space fallback + * is needed, ie. we don't set this by default, only if asked to. + */ + +enum hb_unicode_props_flags_t { + UPROPS_MASK_GEN_CAT = 0x001Fu, + UPROPS_MASK_IGNORABLE = 0x0020u, + UPROPS_MASK_HIDDEN = 0x0040u, /* MONGOLIAN FREE VARIATION SELECTOR 1..3, or TAG characters */ + UPROPS_MASK_CONTINUATION=0x0080u, + + /* If GEN_CAT=FORMAT, top byte masks: */ + UPROPS_MASK_Cf_ZWJ = 0x0100u, + UPROPS_MASK_Cf_ZWNJ = 0x0200u +}; +HB_MARK_AS_FLAG_T (hb_unicode_props_flags_t); + +static inline void +_hb_glyph_info_set_unicode_props (hb_glyph_info_t *info, hb_buffer_t *buffer) +{ + hb_unicode_funcs_t *unicode = buffer->unicode; + unsigned int u = info->codepoint; + unsigned int gen_cat = (unsigned int) unicode->general_category (u); + unsigned int props = gen_cat; + + if (u >= 0x80u) + { + buffer->scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_NON_ASCII; + + if (unlikely (unicode->is_default_ignorable (u))) + { + buffer->scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_DEFAULT_IGNORABLES; + props |= UPROPS_MASK_IGNORABLE; + if (u == 0x200Cu) props |= UPROPS_MASK_Cf_ZWNJ; + else if (u == 0x200Du) props |= UPROPS_MASK_Cf_ZWJ; + /* Mongolian Free Variation Selectors need to be remembered + * because although we need to hide them like default-ignorables, + * they need to non-ignorable during shaping. This is similar to + * what we do for joiners in Indic-like shapers, but since the + * FVSes are GC=Mn, we have use a separate bit to remember them. + * Fixes: + * https://github.com/harfbuzz/harfbuzz/issues/234 */ + else if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x180Bu, 0x180Du))) props |= UPROPS_MASK_HIDDEN; + /* TAG characters need similar treatment. Fixes: + * https://github.com/harfbuzz/harfbuzz/issues/463 */ + else if (unlikely (hb_in_range<hb_codepoint_t> (u, 0xE0020u, 0xE007Fu))) props |= UPROPS_MASK_HIDDEN; + /* COMBINING GRAPHEME JOINER should not be skipped; at least some times. + * https://github.com/harfbuzz/harfbuzz/issues/554 */ + else if (unlikely (u == 0x034Fu)) + { + buffer->scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_CGJ; + props |= UPROPS_MASK_HIDDEN; + } + } + + if (unlikely (HB_UNICODE_GENERAL_CATEGORY_IS_MARK (gen_cat))) + { + props |= UPROPS_MASK_CONTINUATION; + props |= unicode->modified_combining_class (u)<<8; + } + } + + info->unicode_props() = props; +} + +static inline void +_hb_glyph_info_set_general_category (hb_glyph_info_t *info, + hb_unicode_general_category_t gen_cat) +{ + /* Clears top-byte. */ + info->unicode_props() = (unsigned int) gen_cat | (info->unicode_props() & (0xFF & ~UPROPS_MASK_GEN_CAT)); +} + +static inline hb_unicode_general_category_t +_hb_glyph_info_get_general_category (const hb_glyph_info_t *info) +{ + return (hb_unicode_general_category_t) (info->unicode_props() & UPROPS_MASK_GEN_CAT); +} + +static inline bool +_hb_glyph_info_is_unicode_mark (const hb_glyph_info_t *info) +{ + return HB_UNICODE_GENERAL_CATEGORY_IS_MARK (info->unicode_props() & UPROPS_MASK_GEN_CAT); +} +static inline void +_hb_glyph_info_set_modified_combining_class (hb_glyph_info_t *info, + unsigned int modified_class) +{ + if (unlikely (!_hb_glyph_info_is_unicode_mark (info))) + return; + info->unicode_props() = (modified_class<<8) | (info->unicode_props() & 0xFF); +} +static inline unsigned int +_hb_glyph_info_get_modified_combining_class (const hb_glyph_info_t *info) +{ + return _hb_glyph_info_is_unicode_mark (info) ? info->unicode_props()>>8 : 0; +} +#define info_cc(info) (_hb_glyph_info_get_modified_combining_class (&(info))) + +static inline bool +_hb_glyph_info_is_unicode_space (const hb_glyph_info_t *info) +{ + return _hb_glyph_info_get_general_category (info) == + HB_UNICODE_GENERAL_CATEGORY_SPACE_SEPARATOR; +} +static inline void +_hb_glyph_info_set_unicode_space_fallback_type (hb_glyph_info_t *info, hb_unicode_funcs_t::space_t s) +{ + if (unlikely (!_hb_glyph_info_is_unicode_space (info))) + return; + info->unicode_props() = (((unsigned int) s)<<8) | (info->unicode_props() & 0xFF); +} +static inline hb_unicode_funcs_t::space_t +_hb_glyph_info_get_unicode_space_fallback_type (const hb_glyph_info_t *info) +{ + return _hb_glyph_info_is_unicode_space (info) ? + (hb_unicode_funcs_t::space_t) (info->unicode_props()>>8) : + hb_unicode_funcs_t::NOT_SPACE; +} + +static inline bool _hb_glyph_info_ligated (const hb_glyph_info_t *info); + +static inline bool +_hb_glyph_info_is_default_ignorable (const hb_glyph_info_t *info) +{ + return (info->unicode_props() & UPROPS_MASK_IGNORABLE) && + !_hb_glyph_info_ligated (info); +} +static inline bool +_hb_glyph_info_is_default_ignorable_and_not_hidden (const hb_glyph_info_t *info) +{ + return ((info->unicode_props() & (UPROPS_MASK_IGNORABLE|UPROPS_MASK_HIDDEN)) + == UPROPS_MASK_IGNORABLE) && + !_hb_glyph_info_ligated (info); +} +static inline void +_hb_glyph_info_unhide (hb_glyph_info_t *info) +{ + info->unicode_props() &= ~ UPROPS_MASK_HIDDEN; +} + +static inline void +_hb_glyph_info_set_continuation (hb_glyph_info_t *info) +{ + info->unicode_props() |= UPROPS_MASK_CONTINUATION; +} +static inline void +_hb_glyph_info_reset_continuation (hb_glyph_info_t *info) +{ + info->unicode_props() &= ~ UPROPS_MASK_CONTINUATION; +} +static inline bool +_hb_glyph_info_is_continuation (const hb_glyph_info_t *info) +{ + return info->unicode_props() & UPROPS_MASK_CONTINUATION; +} +/* Loop over grapheme. Based on foreach_cluster(). */ +#define foreach_grapheme(buffer, start, end) \ + for (unsigned int \ + _count = buffer->len, \ + start = 0, end = _count ? _hb_next_grapheme (buffer, 0) : 0; \ + start < _count; \ + start = end, end = _hb_next_grapheme (buffer, start)) + +static inline unsigned int +_hb_next_grapheme (hb_buffer_t *buffer, unsigned int start) +{ + hb_glyph_info_t *info = buffer->info; + unsigned int count = buffer->len; + + while (++start < count && _hb_glyph_info_is_continuation (&info[start])) + ; + + return start; +} + +static inline bool +_hb_glyph_info_is_unicode_format (const hb_glyph_info_t *info) +{ + return _hb_glyph_info_get_general_category (info) == + HB_UNICODE_GENERAL_CATEGORY_FORMAT; +} +static inline bool +_hb_glyph_info_is_zwnj (const hb_glyph_info_t *info) +{ + return _hb_glyph_info_is_unicode_format (info) && (info->unicode_props() & UPROPS_MASK_Cf_ZWNJ); +} +static inline bool +_hb_glyph_info_is_zwj (const hb_glyph_info_t *info) +{ + return _hb_glyph_info_is_unicode_format (info) && (info->unicode_props() & UPROPS_MASK_Cf_ZWJ); +} +static inline bool +_hb_glyph_info_is_joiner (const hb_glyph_info_t *info) +{ + return _hb_glyph_info_is_unicode_format (info) && (info->unicode_props() & (UPROPS_MASK_Cf_ZWNJ|UPROPS_MASK_Cf_ZWJ)); +} +static inline void +_hb_glyph_info_flip_joiners (hb_glyph_info_t *info) +{ + if (!_hb_glyph_info_is_unicode_format (info)) + return; + info->unicode_props() ^= UPROPS_MASK_Cf_ZWNJ | UPROPS_MASK_Cf_ZWJ; +} + +/* lig_props: aka lig_id / lig_comp + * + * When a ligature is formed: + * + * - The ligature glyph and any marks in between all the same newly allocated + * lig_id, + * - The ligature glyph will get lig_num_comps set to the number of components + * - The marks get lig_comp > 0, reflecting which component of the ligature + * they were applied to. + * - This is used in GPOS to attach marks to the right component of a ligature + * in MarkLigPos, + * - Note that when marks are ligated together, much of the above is skipped + * and the current lig_id reused. + * + * When a multiple-substitution is done: + * + * - All resulting glyphs will have lig_id = 0, + * - The resulting glyphs will have lig_comp = 0, 1, 2, ... respectively. + * - This is used in GPOS to attach marks to the first component of a + * multiple substitution in MarkBasePos. + * + * The numbers are also used in GPOS to do mark-to-mark positioning only + * to marks that belong to the same component of the same ligature. + */ + +static inline void +_hb_glyph_info_clear_lig_props (hb_glyph_info_t *info) +{ + info->lig_props() = 0; +} + +#define IS_LIG_BASE 0x10 + +static inline void +_hb_glyph_info_set_lig_props_for_ligature (hb_glyph_info_t *info, + unsigned int lig_id, + unsigned int lig_num_comps) +{ + info->lig_props() = (lig_id << 5) | IS_LIG_BASE | (lig_num_comps & 0x0F); +} + +static inline void +_hb_glyph_info_set_lig_props_for_mark (hb_glyph_info_t *info, + unsigned int lig_id, + unsigned int lig_comp) +{ + info->lig_props() = (lig_id << 5) | (lig_comp & 0x0F); +} + +static inline void +_hb_glyph_info_set_lig_props_for_component (hb_glyph_info_t *info, unsigned int comp) +{ + _hb_glyph_info_set_lig_props_for_mark (info, 0, comp); +} + +static inline unsigned int +_hb_glyph_info_get_lig_id (const hb_glyph_info_t *info) +{ + return info->lig_props() >> 5; +} + +static inline bool +_hb_glyph_info_ligated_internal (const hb_glyph_info_t *info) +{ + return !!(info->lig_props() & IS_LIG_BASE); +} + +static inline unsigned int +_hb_glyph_info_get_lig_comp (const hb_glyph_info_t *info) +{ + if (_hb_glyph_info_ligated_internal (info)) + return 0; + else + return info->lig_props() & 0x0F; +} + +static inline unsigned int +_hb_glyph_info_get_lig_num_comps (const hb_glyph_info_t *info) +{ + if ((info->glyph_props() & HB_OT_LAYOUT_GLYPH_PROPS_LIGATURE) && + _hb_glyph_info_ligated_internal (info)) + return info->lig_props() & 0x0F; + else + return 1; +} + +static inline uint8_t +_hb_allocate_lig_id (hb_buffer_t *buffer) { + uint8_t lig_id = buffer->next_serial () & 0x07; + if (unlikely (!lig_id)) + lig_id = _hb_allocate_lig_id (buffer); /* in case of overflow */ + return lig_id; +} + +/* glyph_props: */ + +static inline void +_hb_glyph_info_set_glyph_props (hb_glyph_info_t *info, unsigned int props) +{ + info->glyph_props() = props; +} + +static inline unsigned int +_hb_glyph_info_get_glyph_props (const hb_glyph_info_t *info) +{ + return info->glyph_props(); +} + +static inline bool +_hb_glyph_info_is_base_glyph (const hb_glyph_info_t *info) +{ + return !!(info->glyph_props() & HB_OT_LAYOUT_GLYPH_PROPS_BASE_GLYPH); +} + +static inline bool +_hb_glyph_info_is_ligature (const hb_glyph_info_t *info) +{ + return !!(info->glyph_props() & HB_OT_LAYOUT_GLYPH_PROPS_LIGATURE); +} + +static inline bool +_hb_glyph_info_is_mark (const hb_glyph_info_t *info) +{ + return !!(info->glyph_props() & HB_OT_LAYOUT_GLYPH_PROPS_MARK); +} + +static inline bool +_hb_glyph_info_substituted (const hb_glyph_info_t *info) +{ + return !!(info->glyph_props() & HB_OT_LAYOUT_GLYPH_PROPS_SUBSTITUTED); +} + +static inline bool +_hb_glyph_info_ligated (const hb_glyph_info_t *info) +{ + return !!(info->glyph_props() & HB_OT_LAYOUT_GLYPH_PROPS_LIGATED); +} + +static inline bool +_hb_glyph_info_multiplied (const hb_glyph_info_t *info) +{ + return !!(info->glyph_props() & HB_OT_LAYOUT_GLYPH_PROPS_MULTIPLIED); +} + +static inline bool +_hb_glyph_info_ligated_and_didnt_multiply (const hb_glyph_info_t *info) +{ + return _hb_glyph_info_ligated (info) && !_hb_glyph_info_multiplied (info); +} + +static inline void +_hb_glyph_info_clear_ligated_and_multiplied (hb_glyph_info_t *info) +{ + info->glyph_props() &= ~(HB_OT_LAYOUT_GLYPH_PROPS_LIGATED | + HB_OT_LAYOUT_GLYPH_PROPS_MULTIPLIED); +} + +static inline void +_hb_glyph_info_clear_substituted (hb_glyph_info_t *info) +{ + info->glyph_props() &= ~(HB_OT_LAYOUT_GLYPH_PROPS_SUBSTITUTED); +} + +static inline void +_hb_clear_substitution_flags (const hb_ot_shape_plan_t *plan HB_UNUSED, + hb_font_t *font HB_UNUSED, + hb_buffer_t *buffer) +{ + hb_glyph_info_t *info = buffer->info; + unsigned int count = buffer->len; + for (unsigned int i = 0; i < count; i++) + _hb_glyph_info_clear_substituted (&info[i]); +} + + +/* Allocation / deallocation. */ + +static inline void +_hb_buffer_allocate_unicode_vars (hb_buffer_t *buffer) +{ + HB_BUFFER_ALLOCATE_VAR (buffer, unicode_props); +} + +static inline void +_hb_buffer_deallocate_unicode_vars (hb_buffer_t *buffer) +{ + HB_BUFFER_DEALLOCATE_VAR (buffer, unicode_props); +} + +static inline void +_hb_buffer_assert_unicode_vars (hb_buffer_t *buffer) +{ + HB_BUFFER_ASSERT_VAR (buffer, unicode_props); +} + +static inline void +_hb_buffer_allocate_gsubgpos_vars (hb_buffer_t *buffer) +{ + HB_BUFFER_ALLOCATE_VAR (buffer, glyph_props); + HB_BUFFER_ALLOCATE_VAR (buffer, lig_props); + HB_BUFFER_ALLOCATE_VAR (buffer, syllable); +} + +static inline void +_hb_buffer_deallocate_gsubgpos_vars (hb_buffer_t *buffer) +{ + HB_BUFFER_DEALLOCATE_VAR (buffer, syllable); + HB_BUFFER_DEALLOCATE_VAR (buffer, lig_props); + HB_BUFFER_DEALLOCATE_VAR (buffer, glyph_props); +} + +static inline void +_hb_buffer_assert_gsubgpos_vars (hb_buffer_t *buffer) +{ + HB_BUFFER_ASSERT_VAR (buffer, glyph_props); + HB_BUFFER_ASSERT_VAR (buffer, lig_props); + HB_BUFFER_ASSERT_VAR (buffer, syllable); +} + +/* Make sure no one directly touches our props... */ +#undef unicode_props0 +#undef unicode_props1 +#undef lig_props +#undef glyph_props + +#endif /* HB_OT_LAYOUT_HH */ diff --git a/thirdparty/harfbuzz/src/hb-ot-map.cc b/thirdparty/harfbuzz/src/hb-ot-map.cc new file mode 100644 index 0000000000..e4bb4b6366 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-map.cc @@ -0,0 +1,342 @@ +/* + * Copyright © 2009,2010 Red Hat, Inc. + * Copyright © 2010,2011,2013 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Red Hat Author(s): Behdad Esfahbod + * Google Author(s): Behdad Esfahbod + */ + +#include "hb.hh" + +#ifndef HB_NO_OT_SHAPE + +#include "hb-ot-map.hh" +#include "hb-ot-shape.hh" +#include "hb-ot-layout.hh" + + +void hb_ot_map_t::collect_lookups (unsigned int table_index, hb_set_t *lookups_out) const +{ + for (unsigned int i = 0; i < lookups[table_index].length; i++) + lookups_out->add (lookups[table_index][i].index); +} + + +hb_ot_map_builder_t::hb_ot_map_builder_t (hb_face_t *face_, + const hb_segment_properties_t *props_) +{ + memset (this, 0, sizeof (*this)); + + feature_infos.init (); + for (unsigned int table_index = 0; table_index < 2; table_index++) + stages[table_index].init (); + + face = face_; + props = *props_; + + + /* Fetch script/language indices for GSUB/GPOS. We need these later to skip + * features not available in either table and not waste precious bits for them. */ + + unsigned int script_count = HB_OT_MAX_TAGS_PER_SCRIPT; + unsigned int language_count = HB_OT_MAX_TAGS_PER_LANGUAGE; + hb_tag_t script_tags[HB_OT_MAX_TAGS_PER_SCRIPT]; + hb_tag_t language_tags[HB_OT_MAX_TAGS_PER_LANGUAGE]; + + hb_ot_tags_from_script_and_language (props.script, props.language, &script_count, script_tags, &language_count, language_tags); + + for (unsigned int table_index = 0; table_index < 2; table_index++) { + hb_tag_t table_tag = table_tags[table_index]; + found_script[table_index] = (bool) hb_ot_layout_table_select_script (face, table_tag, script_count, script_tags, &script_index[table_index], &chosen_script[table_index]); + hb_ot_layout_script_select_language (face, table_tag, script_index[table_index], language_count, language_tags, &language_index[table_index]); + } +} + +hb_ot_map_builder_t::~hb_ot_map_builder_t () +{ + feature_infos.fini (); + for (unsigned int table_index = 0; table_index < 2; table_index++) + stages[table_index].fini (); +} + +void hb_ot_map_builder_t::add_feature (hb_tag_t tag, + hb_ot_map_feature_flags_t flags, + unsigned int value) +{ + if (unlikely (!tag)) return; + feature_info_t *info = feature_infos.push(); + info->tag = tag; + info->seq = feature_infos.length; + info->max_value = value; + info->flags = flags; + info->default_value = (flags & F_GLOBAL) ? value : 0; + info->stage[0] = current_stage[0]; + info->stage[1] = current_stage[1]; +} + +void +hb_ot_map_builder_t::add_lookups (hb_ot_map_t &m, + unsigned int table_index, + unsigned int feature_index, + unsigned int variations_index, + hb_mask_t mask, + bool auto_zwnj, + bool auto_zwj, + bool random) +{ + unsigned int lookup_indices[32]; + unsigned int offset, len; + unsigned int table_lookup_count; + + table_lookup_count = hb_ot_layout_table_get_lookup_count (face, table_tags[table_index]); + + offset = 0; + do { + len = ARRAY_LENGTH (lookup_indices); + hb_ot_layout_feature_with_variations_get_lookups (face, + table_tags[table_index], + feature_index, + variations_index, + offset, &len, + lookup_indices); + + for (unsigned int i = 0; i < len; i++) + { + if (lookup_indices[i] >= table_lookup_count) + continue; + hb_ot_map_t::lookup_map_t *lookup = m.lookups[table_index].push (); + lookup->mask = mask; + lookup->index = lookup_indices[i]; + lookup->auto_zwnj = auto_zwnj; + lookup->auto_zwj = auto_zwj; + lookup->random = random; + } + + offset += len; + } while (len == ARRAY_LENGTH (lookup_indices)); +} + + +void hb_ot_map_builder_t::add_pause (unsigned int table_index, hb_ot_map_t::pause_func_t pause_func) +{ + stage_info_t *s = stages[table_index].push (); + s->index = current_stage[table_index]; + s->pause_func = pause_func; + + current_stage[table_index]++; +} + +void +hb_ot_map_builder_t::compile (hb_ot_map_t &m, + const hb_ot_shape_plan_key_t &key) +{ + static_assert ((!(HB_GLYPH_FLAG_DEFINED & (HB_GLYPH_FLAG_DEFINED + 1))), ""); + unsigned int global_bit_mask = HB_GLYPH_FLAG_DEFINED + 1; + unsigned int global_bit_shift = hb_popcount (HB_GLYPH_FLAG_DEFINED); + + m.global_mask = global_bit_mask; + + unsigned int required_feature_index[2]; + hb_tag_t required_feature_tag[2]; + /* We default to applying required feature in stage 0. If the required + * feature has a tag that is known to the shaper, we apply required feature + * in the stage for that tag. + */ + unsigned int required_feature_stage[2] = {0, 0}; + + for (unsigned int table_index = 0; table_index < 2; table_index++) + { + m.chosen_script[table_index] = chosen_script[table_index]; + m.found_script[table_index] = found_script[table_index]; + + hb_ot_layout_language_get_required_feature (face, + table_tags[table_index], + script_index[table_index], + language_index[table_index], + &required_feature_index[table_index], + &required_feature_tag[table_index]); + } + + /* Sort features and merge duplicates */ + if (feature_infos.length) + { + feature_infos.qsort (); + unsigned int j = 0; + for (unsigned int i = 1; i < feature_infos.length; i++) + if (feature_infos[i].tag != feature_infos[j].tag) + feature_infos[++j] = feature_infos[i]; + else { + if (feature_infos[i].flags & F_GLOBAL) { + feature_infos[j].flags |= F_GLOBAL; + feature_infos[j].max_value = feature_infos[i].max_value; + feature_infos[j].default_value = feature_infos[i].default_value; + } else { + if (feature_infos[j].flags & F_GLOBAL) + feature_infos[j].flags ^= F_GLOBAL; + feature_infos[j].max_value = hb_max (feature_infos[j].max_value, feature_infos[i].max_value); + /* Inherit default_value from j */ + } + feature_infos[j].flags |= (feature_infos[i].flags & F_HAS_FALLBACK); + feature_infos[j].stage[0] = hb_min (feature_infos[j].stage[0], feature_infos[i].stage[0]); + feature_infos[j].stage[1] = hb_min (feature_infos[j].stage[1], feature_infos[i].stage[1]); + } + feature_infos.shrink (j + 1); + } + + + /* Allocate bits now */ + unsigned int next_bit = global_bit_shift + 1; + + for (unsigned int i = 0; i < feature_infos.length; i++) + { + const feature_info_t *info = &feature_infos[i]; + + unsigned int bits_needed; + + if ((info->flags & F_GLOBAL) && info->max_value == 1) + /* Uses the global bit */ + bits_needed = 0; + else + /* Limit bits per feature. */ + bits_needed = hb_min (HB_OT_MAP_MAX_BITS, hb_bit_storage (info->max_value)); + + if (!info->max_value || next_bit + bits_needed > 8 * sizeof (hb_mask_t)) + continue; /* Feature disabled, or not enough bits. */ + + + bool found = false; + unsigned int feature_index[2]; + for (unsigned int table_index = 0; table_index < 2; table_index++) + { + if (required_feature_tag[table_index] == info->tag) + required_feature_stage[table_index] = info->stage[table_index]; + + found |= (bool) hb_ot_layout_language_find_feature (face, + table_tags[table_index], + script_index[table_index], + language_index[table_index], + info->tag, + &feature_index[table_index]); + } + if (!found && (info->flags & F_GLOBAL_SEARCH)) + { + for (unsigned int table_index = 0; table_index < 2; table_index++) + { + found |= (bool) hb_ot_layout_table_find_feature (face, + table_tags[table_index], + info->tag, + &feature_index[table_index]); + } + } + if (!found && !(info->flags & F_HAS_FALLBACK)) + continue; + + + hb_ot_map_t::feature_map_t *map = m.features.push (); + + map->tag = info->tag; + map->index[0] = feature_index[0]; + map->index[1] = feature_index[1]; + map->stage[0] = info->stage[0]; + map->stage[1] = info->stage[1]; + map->auto_zwnj = !(info->flags & F_MANUAL_ZWNJ); + map->auto_zwj = !(info->flags & F_MANUAL_ZWJ); + map->random = !!(info->flags & F_RANDOM); + if ((info->flags & F_GLOBAL) && info->max_value == 1) { + /* Uses the global bit */ + map->shift = global_bit_shift; + map->mask = global_bit_mask; + } else { + map->shift = next_bit; + map->mask = (1u << (next_bit + bits_needed)) - (1u << next_bit); + next_bit += bits_needed; + m.global_mask |= (info->default_value << map->shift) & map->mask; + } + map->_1_mask = (1u << map->shift) & map->mask; + map->needs_fallback = !found; + + } + feature_infos.shrink (0); /* Done with these */ + + + add_gsub_pause (nullptr); + add_gpos_pause (nullptr); + + for (unsigned int table_index = 0; table_index < 2; table_index++) + { + /* Collect lookup indices for features */ + + unsigned int stage_index = 0; + unsigned int last_num_lookups = 0; + for (unsigned stage = 0; stage < current_stage[table_index]; stage++) + { + if (required_feature_index[table_index] != HB_OT_LAYOUT_NO_FEATURE_INDEX && + required_feature_stage[table_index] == stage) + add_lookups (m, table_index, + required_feature_index[table_index], + key.variations_index[table_index], + global_bit_mask); + + for (unsigned i = 0; i < m.features.length; i++) + if (m.features[i].stage[table_index] == stage) + add_lookups (m, table_index, + m.features[i].index[table_index], + key.variations_index[table_index], + m.features[i].mask, + m.features[i].auto_zwnj, + m.features[i].auto_zwj, + m.features[i].random); + + /* Sort lookups and merge duplicates */ + if (last_num_lookups < m.lookups[table_index].length) + { + m.lookups[table_index].qsort (last_num_lookups, m.lookups[table_index].length); + + unsigned int j = last_num_lookups; + for (unsigned int i = j + 1; i < m.lookups[table_index].length; i++) + if (m.lookups[table_index][i].index != m.lookups[table_index][j].index) + m.lookups[table_index][++j] = m.lookups[table_index][i]; + else + { + m.lookups[table_index][j].mask |= m.lookups[table_index][i].mask; + m.lookups[table_index][j].auto_zwnj &= m.lookups[table_index][i].auto_zwnj; + m.lookups[table_index][j].auto_zwj &= m.lookups[table_index][i].auto_zwj; + } + m.lookups[table_index].shrink (j + 1); + } + + last_num_lookups = m.lookups[table_index].length; + + if (stage_index < stages[table_index].length && stages[table_index][stage_index].index == stage) { + hb_ot_map_t::stage_map_t *stage_map = m.stages[table_index].push (); + stage_map->last_lookup = last_num_lookups; + stage_map->pause_func = stages[table_index][stage_index].pause_func; + + stage_index++; + } + } + } +} + + +#endif diff --git a/thirdparty/harfbuzz/src/hb-ot-map.hh b/thirdparty/harfbuzz/src/hb-ot-map.hh new file mode 100644 index 0000000000..7629a869bd --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-map.hh @@ -0,0 +1,284 @@ +/* + * Copyright © 2009,2010 Red Hat, Inc. + * Copyright © 2010,2011,2012,2013 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Red Hat Author(s): Behdad Esfahbod + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_OT_MAP_HH +#define HB_OT_MAP_HH + +#include "hb-buffer.hh" + + +#define HB_OT_MAP_MAX_BITS 8u +#define HB_OT_MAP_MAX_VALUE ((1u << HB_OT_MAP_MAX_BITS) - 1u) + +struct hb_ot_shape_plan_t; + +static const hb_tag_t table_tags[2] = {HB_OT_TAG_GSUB, HB_OT_TAG_GPOS}; + +struct hb_ot_map_t +{ + friend struct hb_ot_map_builder_t; + + public: + + struct feature_map_t { + hb_tag_t tag; /* should be first for our bsearch to work */ + unsigned int index[2]; /* GSUB/GPOS */ + unsigned int stage[2]; /* GSUB/GPOS */ + unsigned int shift; + hb_mask_t mask; + hb_mask_t _1_mask; /* mask for value=1, for quick access */ + unsigned int needs_fallback : 1; + unsigned int auto_zwnj : 1; + unsigned int auto_zwj : 1; + unsigned int random : 1; + + int cmp (const hb_tag_t tag_) const + { return tag_ < tag ? -1 : tag_ > tag ? 1 : 0; } + }; + + struct lookup_map_t { + unsigned short index; + unsigned short auto_zwnj : 1; + unsigned short auto_zwj : 1; + unsigned short random : 1; + hb_mask_t mask; + + HB_INTERNAL static int cmp (const void *pa, const void *pb) + { + const lookup_map_t *a = (const lookup_map_t *) pa; + const lookup_map_t *b = (const lookup_map_t *) pb; + return a->index < b->index ? -1 : a->index > b->index ? 1 : 0; + } + }; + + typedef void (*pause_func_t) (const struct hb_ot_shape_plan_t *plan, hb_font_t *font, hb_buffer_t *buffer); + + struct stage_map_t { + unsigned int last_lookup; /* Cumulative */ + pause_func_t pause_func; + }; + + void init () + { + memset (this, 0, sizeof (*this)); + + features.init (); + for (unsigned int table_index = 0; table_index < 2; table_index++) + { + lookups[table_index].init (); + stages[table_index].init (); + } + } + void fini () + { + features.fini (); + for (unsigned int table_index = 0; table_index < 2; table_index++) + { + lookups[table_index].fini (); + stages[table_index].fini (); + } + } + + hb_mask_t get_global_mask () const { return global_mask; } + + hb_mask_t get_mask (hb_tag_t feature_tag, unsigned int *shift = nullptr) const + { + const feature_map_t *map = features.bsearch (feature_tag); + if (shift) *shift = map ? map->shift : 0; + return map ? map->mask : 0; + } + + bool needs_fallback (hb_tag_t feature_tag) const + { + const feature_map_t *map = features.bsearch (feature_tag); + return map ? map->needs_fallback : false; + } + + hb_mask_t get_1_mask (hb_tag_t feature_tag) const + { + const feature_map_t *map = features.bsearch (feature_tag); + return map ? map->_1_mask : 0; + } + + unsigned int get_feature_index (unsigned int table_index, hb_tag_t feature_tag) const + { + const feature_map_t *map = features.bsearch (feature_tag); + return map ? map->index[table_index] : HB_OT_LAYOUT_NO_FEATURE_INDEX; + } + + unsigned int get_feature_stage (unsigned int table_index, hb_tag_t feature_tag) const + { + const feature_map_t *map = features.bsearch (feature_tag); + return map ? map->stage[table_index] : UINT_MAX; + } + + void get_stage_lookups (unsigned int table_index, unsigned int stage, + const struct lookup_map_t **plookups, unsigned int *lookup_count) const + { + if (unlikely (stage == UINT_MAX)) { + *plookups = nullptr; + *lookup_count = 0; + return; + } + assert (stage <= stages[table_index].length); + unsigned int start = stage ? stages[table_index][stage - 1].last_lookup : 0; + unsigned int end = stage < stages[table_index].length ? stages[table_index][stage].last_lookup : lookups[table_index].length; + *plookups = end == start ? nullptr : &lookups[table_index][start]; + *lookup_count = end - start; + } + + HB_INTERNAL void collect_lookups (unsigned int table_index, hb_set_t *lookups) const; + template <typename Proxy> + HB_INTERNAL void apply (const Proxy &proxy, + const struct hb_ot_shape_plan_t *plan, hb_font_t *font, hb_buffer_t *buffer) const; + HB_INTERNAL void substitute (const struct hb_ot_shape_plan_t *plan, hb_font_t *font, hb_buffer_t *buffer) const; + HB_INTERNAL void position (const struct hb_ot_shape_plan_t *plan, hb_font_t *font, hb_buffer_t *buffer) const; + + public: + hb_tag_t chosen_script[2]; + bool found_script[2]; + + private: + + hb_mask_t global_mask; + + hb_sorted_vector_t<feature_map_t> features; + hb_vector_t<lookup_map_t> lookups[2]; /* GSUB/GPOS */ + hb_vector_t<stage_map_t> stages[2]; /* GSUB/GPOS */ +}; + +enum hb_ot_map_feature_flags_t +{ + F_NONE = 0x0000u, + F_GLOBAL = 0x0001u, /* Feature applies to all characters; results in no mask allocated for it. */ + F_HAS_FALLBACK = 0x0002u, /* Has fallback implementation, so include mask bit even if feature not found. */ + F_MANUAL_ZWNJ = 0x0004u, /* Don't skip over ZWNJ when matching **context**. */ + F_MANUAL_ZWJ = 0x0008u, /* Don't skip over ZWJ when matching **input**. */ + F_MANUAL_JOINERS = F_MANUAL_ZWNJ | F_MANUAL_ZWJ, + F_GLOBAL_MANUAL_JOINERS= F_GLOBAL | F_MANUAL_JOINERS, + F_GLOBAL_HAS_FALLBACK = F_GLOBAL | F_HAS_FALLBACK, + F_GLOBAL_SEARCH = 0x0010u, /* If feature not found in LangSys, look for it in global feature list and pick one. */ + F_RANDOM = 0x0020u /* Randomly select a glyph from an AlternateSubstFormat1 subtable. */ +}; +HB_MARK_AS_FLAG_T (hb_ot_map_feature_flags_t); + + +struct hb_ot_map_feature_t +{ + hb_tag_t tag; + hb_ot_map_feature_flags_t flags; +}; + +struct hb_ot_shape_plan_key_t; + +struct hb_ot_map_builder_t +{ + public: + + HB_INTERNAL hb_ot_map_builder_t (hb_face_t *face_, + const hb_segment_properties_t *props_); + + HB_INTERNAL ~hb_ot_map_builder_t (); + + HB_INTERNAL void add_feature (hb_tag_t tag, + hb_ot_map_feature_flags_t flags=F_NONE, + unsigned int value=1); + + void add_feature (const hb_ot_map_feature_t &feat) + { add_feature (feat.tag, feat.flags); } + + void enable_feature (hb_tag_t tag, + hb_ot_map_feature_flags_t flags=F_NONE, + unsigned int value=1) + { add_feature (tag, F_GLOBAL | flags, value); } + + void disable_feature (hb_tag_t tag) + { add_feature (tag, F_GLOBAL, 0); } + + void add_gsub_pause (hb_ot_map_t::pause_func_t pause_func) + { add_pause (0, pause_func); } + void add_gpos_pause (hb_ot_map_t::pause_func_t pause_func) + { add_pause (1, pause_func); } + + HB_INTERNAL void compile (hb_ot_map_t &m, + const hb_ot_shape_plan_key_t &key); + + private: + + HB_INTERNAL void add_lookups (hb_ot_map_t &m, + unsigned int table_index, + unsigned int feature_index, + unsigned int variations_index, + hb_mask_t mask, + bool auto_zwnj = true, + bool auto_zwj = true, + bool random = false); + + struct feature_info_t { + hb_tag_t tag; + unsigned int seq; /* sequence#, used for stable sorting only */ + unsigned int max_value; + hb_ot_map_feature_flags_t flags; + unsigned int default_value; /* for non-global features, what should the unset glyphs take */ + unsigned int stage[2]; /* GSUB/GPOS */ + + HB_INTERNAL static int cmp (const void *pa, const void *pb) + { + const feature_info_t *a = (const feature_info_t *) pa; + const feature_info_t *b = (const feature_info_t *) pb; + return (a->tag != b->tag) ? (a->tag < b->tag ? -1 : 1) : + (a->seq < b->seq ? -1 : a->seq > b->seq ? 1 : 0); + } + }; + + struct stage_info_t { + unsigned int index; + hb_ot_map_t::pause_func_t pause_func; + }; + + HB_INTERNAL void add_pause (unsigned int table_index, hb_ot_map_t::pause_func_t pause_func); + + public: + + hb_face_t *face; + hb_segment_properties_t props; + + hb_tag_t chosen_script[2]; + bool found_script[2]; + unsigned int script_index[2], language_index[2]; + + private: + + unsigned int current_stage[2]; /* GSUB/GPOS */ + hb_vector_t<feature_info_t> feature_infos; + hb_vector_t<stage_info_t> stages[2]; /* GSUB/GPOS */ +}; + + + +#endif /* HB_OT_MAP_HH */ diff --git a/thirdparty/harfbuzz/src/hb-ot-math-table.hh b/thirdparty/harfbuzz/src/hb-ot-math-table.hh new file mode 100644 index 0000000000..26aa080603 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-math-table.hh @@ -0,0 +1,728 @@ +/* + * Copyright © 2016 Igalia S.L. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Igalia Author(s): Frédéric Wang + */ + +#ifndef HB_OT_MATH_TABLE_HH +#define HB_OT_MATH_TABLE_HH + +#include "hb-open-type.hh" +#include "hb-ot-layout-common.hh" +#include "hb-ot-math.h" + +namespace OT { + + +struct MathValueRecord +{ + hb_position_t get_x_value (hb_font_t *font, const void *base) const + { return font->em_scale_x (value) + (base+deviceTable).get_x_delta (font); } + hb_position_t get_y_value (hb_font_t *font, const void *base) const + { return font->em_scale_y (value) + (base+deviceTable).get_y_delta (font); } + + bool sanitize (hb_sanitize_context_t *c, const void *base) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && deviceTable.sanitize (c, base)); + } + + protected: + HBINT16 value; /* The X or Y value in design units */ + OffsetTo<Device> deviceTable; /* Offset to the device table - from the + * beginning of parent table. May be NULL. + * Suggested format for device table is 1. */ + + public: + DEFINE_SIZE_STATIC (4); +}; + +struct MathConstants +{ + bool sanitize_math_value_records (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + + unsigned int count = ARRAY_LENGTH (mathValueRecords); + for (unsigned int i = 0; i < count; i++) + if (!mathValueRecords[i].sanitize (c, this)) + return_trace (false); + + return_trace (true); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && sanitize_math_value_records (c)); + } + + hb_position_t get_value (hb_ot_math_constant_t constant, + hb_font_t *font) const + { + switch (constant) { + + case HB_OT_MATH_CONSTANT_SCRIPT_PERCENT_SCALE_DOWN: + case HB_OT_MATH_CONSTANT_SCRIPT_SCRIPT_PERCENT_SCALE_DOWN: + return percentScaleDown[constant - HB_OT_MATH_CONSTANT_SCRIPT_PERCENT_SCALE_DOWN]; + + case HB_OT_MATH_CONSTANT_DELIMITED_SUB_FORMULA_MIN_HEIGHT: + case HB_OT_MATH_CONSTANT_DISPLAY_OPERATOR_MIN_HEIGHT: + return font->em_scale_y (minHeight[constant - HB_OT_MATH_CONSTANT_DELIMITED_SUB_FORMULA_MIN_HEIGHT]); + + case HB_OT_MATH_CONSTANT_RADICAL_KERN_AFTER_DEGREE: + case HB_OT_MATH_CONSTANT_RADICAL_KERN_BEFORE_DEGREE: + case HB_OT_MATH_CONSTANT_SKEWED_FRACTION_HORIZONTAL_GAP: + case HB_OT_MATH_CONSTANT_SPACE_AFTER_SCRIPT: + return mathValueRecords[constant - HB_OT_MATH_CONSTANT_MATH_LEADING].get_x_value (font, this); + + case HB_OT_MATH_CONSTANT_ACCENT_BASE_HEIGHT: + case HB_OT_MATH_CONSTANT_AXIS_HEIGHT: + case HB_OT_MATH_CONSTANT_FLATTENED_ACCENT_BASE_HEIGHT: + case HB_OT_MATH_CONSTANT_FRACTION_DENOMINATOR_DISPLAY_STYLE_SHIFT_DOWN: + case HB_OT_MATH_CONSTANT_FRACTION_DENOMINATOR_GAP_MIN: + case HB_OT_MATH_CONSTANT_FRACTION_DENOMINATOR_SHIFT_DOWN: + case HB_OT_MATH_CONSTANT_FRACTION_DENOM_DISPLAY_STYLE_GAP_MIN: + case HB_OT_MATH_CONSTANT_FRACTION_NUMERATOR_DISPLAY_STYLE_SHIFT_UP: + case HB_OT_MATH_CONSTANT_FRACTION_NUMERATOR_GAP_MIN: + case HB_OT_MATH_CONSTANT_FRACTION_NUMERATOR_SHIFT_UP: + case HB_OT_MATH_CONSTANT_FRACTION_NUM_DISPLAY_STYLE_GAP_MIN: + case HB_OT_MATH_CONSTANT_FRACTION_RULE_THICKNESS: + case HB_OT_MATH_CONSTANT_LOWER_LIMIT_BASELINE_DROP_MIN: + case HB_OT_MATH_CONSTANT_LOWER_LIMIT_GAP_MIN: + case HB_OT_MATH_CONSTANT_MATH_LEADING: + case HB_OT_MATH_CONSTANT_OVERBAR_EXTRA_ASCENDER: + case HB_OT_MATH_CONSTANT_OVERBAR_RULE_THICKNESS: + case HB_OT_MATH_CONSTANT_OVERBAR_VERTICAL_GAP: + case HB_OT_MATH_CONSTANT_RADICAL_DISPLAY_STYLE_VERTICAL_GAP: + case HB_OT_MATH_CONSTANT_RADICAL_EXTRA_ASCENDER: + case HB_OT_MATH_CONSTANT_RADICAL_RULE_THICKNESS: + case HB_OT_MATH_CONSTANT_RADICAL_VERTICAL_GAP: + case HB_OT_MATH_CONSTANT_SKEWED_FRACTION_VERTICAL_GAP: + case HB_OT_MATH_CONSTANT_STACK_BOTTOM_DISPLAY_STYLE_SHIFT_DOWN: + case HB_OT_MATH_CONSTANT_STACK_BOTTOM_SHIFT_DOWN: + case HB_OT_MATH_CONSTANT_STACK_DISPLAY_STYLE_GAP_MIN: + case HB_OT_MATH_CONSTANT_STACK_GAP_MIN: + case HB_OT_MATH_CONSTANT_STACK_TOP_DISPLAY_STYLE_SHIFT_UP: + case HB_OT_MATH_CONSTANT_STACK_TOP_SHIFT_UP: + case HB_OT_MATH_CONSTANT_STRETCH_STACK_BOTTOM_SHIFT_DOWN: + case HB_OT_MATH_CONSTANT_STRETCH_STACK_GAP_ABOVE_MIN: + case HB_OT_MATH_CONSTANT_STRETCH_STACK_GAP_BELOW_MIN: + case HB_OT_MATH_CONSTANT_STRETCH_STACK_TOP_SHIFT_UP: + case HB_OT_MATH_CONSTANT_SUBSCRIPT_BASELINE_DROP_MIN: + case HB_OT_MATH_CONSTANT_SUBSCRIPT_SHIFT_DOWN: + case HB_OT_MATH_CONSTANT_SUBSCRIPT_TOP_MAX: + case HB_OT_MATH_CONSTANT_SUB_SUPERSCRIPT_GAP_MIN: + case HB_OT_MATH_CONSTANT_SUPERSCRIPT_BASELINE_DROP_MAX: + case HB_OT_MATH_CONSTANT_SUPERSCRIPT_BOTTOM_MAX_WITH_SUBSCRIPT: + case HB_OT_MATH_CONSTANT_SUPERSCRIPT_BOTTOM_MIN: + case HB_OT_MATH_CONSTANT_SUPERSCRIPT_SHIFT_UP: + case HB_OT_MATH_CONSTANT_SUPERSCRIPT_SHIFT_UP_CRAMPED: + case HB_OT_MATH_CONSTANT_UNDERBAR_EXTRA_DESCENDER: + case HB_OT_MATH_CONSTANT_UNDERBAR_RULE_THICKNESS: + case HB_OT_MATH_CONSTANT_UNDERBAR_VERTICAL_GAP: + case HB_OT_MATH_CONSTANT_UPPER_LIMIT_BASELINE_RISE_MIN: + case HB_OT_MATH_CONSTANT_UPPER_LIMIT_GAP_MIN: + return mathValueRecords[constant - HB_OT_MATH_CONSTANT_MATH_LEADING].get_y_value (font, this); + + case HB_OT_MATH_CONSTANT_RADICAL_DEGREE_BOTTOM_RAISE_PERCENT: + return radicalDegreeBottomRaisePercent; + + default: + return 0; + } + } + + protected: + HBINT16 percentScaleDown[2]; + HBUINT16 minHeight[2]; + MathValueRecord mathValueRecords[51]; + HBINT16 radicalDegreeBottomRaisePercent; + + public: + DEFINE_SIZE_STATIC (214); +}; + +struct MathItalicsCorrectionInfo +{ + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && + coverage.sanitize (c, this) && + italicsCorrection.sanitize (c, this)); + } + + hb_position_t get_value (hb_codepoint_t glyph, + hb_font_t *font) const + { + unsigned int index = (this+coverage).get_coverage (glyph); + return italicsCorrection[index].get_x_value (font, this); + } + + protected: + OffsetTo<Coverage> coverage; /* Offset to Coverage table - + * from the beginning of + * MathItalicsCorrectionInfo + * table. */ + ArrayOf<MathValueRecord> italicsCorrection; /* Array of MathValueRecords + * defining italics correction + * values for each + * covered glyph. */ + + public: + DEFINE_SIZE_ARRAY (4, italicsCorrection); +}; + +struct MathTopAccentAttachment +{ + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && + topAccentCoverage.sanitize (c, this) && + topAccentAttachment.sanitize (c, this)); + } + + hb_position_t get_value (hb_codepoint_t glyph, + hb_font_t *font) const + { + unsigned int index = (this+topAccentCoverage).get_coverage (glyph); + if (index == NOT_COVERED) + return font->get_glyph_h_advance (glyph) / 2; + return topAccentAttachment[index].get_x_value (font, this); + } + + protected: + OffsetTo<Coverage> topAccentCoverage; /* Offset to Coverage table - + * from the beginning of + * MathTopAccentAttachment + * table. */ + ArrayOf<MathValueRecord> topAccentAttachment; /* Array of MathValueRecords + * defining top accent + * attachment points for each + * covered glyph. */ + + public: + DEFINE_SIZE_ARRAY (2 + 2, topAccentAttachment); +}; + +struct MathKern +{ + bool sanitize_math_value_records (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + unsigned int count = 2 * heightCount + 1; + for (unsigned int i = 0; i < count; i++) + if (!mathValueRecordsZ.arrayZ[i].sanitize (c, this)) return_trace (false); + return_trace (true); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && + c->check_array (mathValueRecordsZ.arrayZ, 2 * heightCount + 1) && + sanitize_math_value_records (c)); + } + + hb_position_t get_value (hb_position_t correction_height, hb_font_t *font) const + { + const MathValueRecord* correctionHeight = mathValueRecordsZ.arrayZ; + const MathValueRecord* kernValue = mathValueRecordsZ.arrayZ + heightCount; + int sign = font->y_scale < 0 ? -1 : +1; + + /* The description of the MathKern table is a ambiguous, but interpreting + * "between the two heights found at those indexes" for 0 < i < len as + * + * correctionHeight[i-1] < correction_height <= correctionHeight[i] + * + * makes the result consistent with the limit cases and we can just use the + * binary search algorithm of std::upper_bound: + */ + unsigned int i = 0; + unsigned int count = heightCount; + while (count > 0) + { + unsigned int half = count / 2; + hb_position_t height = correctionHeight[i + half].get_y_value (font, this); + if (sign * height < sign * correction_height) + { + i += half + 1; + count -= half + 1; + } else + count = half; + } + return kernValue[i].get_x_value (font, this); + } + + protected: + HBUINT16 heightCount; + UnsizedArrayOf<MathValueRecord> + mathValueRecordsZ; + /* Array of correction heights at + * which the kern value changes. + * Sorted by the height value in + * design units (heightCount entries), + * Followed by: + * Array of kern values corresponding + * to heights. (heightCount+1 entries). + */ + + public: + DEFINE_SIZE_ARRAY (2, mathValueRecordsZ); +}; + +struct MathKernInfoRecord +{ + bool sanitize (hb_sanitize_context_t *c, const void *base) const + { + TRACE_SANITIZE (this); + + unsigned int count = ARRAY_LENGTH (mathKern); + for (unsigned int i = 0; i < count; i++) + if (unlikely (!mathKern[i].sanitize (c, base))) + return_trace (false); + + return_trace (true); + } + + hb_position_t get_kerning (hb_ot_math_kern_t kern, + hb_position_t correction_height, + hb_font_t *font, + const void *base) const + { + unsigned int idx = kern; + if (unlikely (idx >= ARRAY_LENGTH (mathKern))) return 0; + return (base+mathKern[idx]).get_value (correction_height, font); + } + + protected: + /* Offset to MathKern table for each corner - + * from the beginning of MathKernInfo table. May be NULL. */ + OffsetTo<MathKern> mathKern[4]; + + public: + DEFINE_SIZE_STATIC (8); +}; + +struct MathKernInfo +{ + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && + mathKernCoverage.sanitize (c, this) && + mathKernInfoRecords.sanitize (c, this)); + } + + hb_position_t get_kerning (hb_codepoint_t glyph, + hb_ot_math_kern_t kern, + hb_position_t correction_height, + hb_font_t *font) const + { + unsigned int index = (this+mathKernCoverage).get_coverage (glyph); + return mathKernInfoRecords[index].get_kerning (kern, correction_height, font, this); + } + + protected: + OffsetTo<Coverage> + mathKernCoverage; + /* Offset to Coverage table - + * from the beginning of the + * MathKernInfo table. */ + ArrayOf<MathKernInfoRecord> + mathKernInfoRecords; + /* Array of MathKernInfoRecords, + * per-glyph information for + * mathematical positioning + * of subscripts and + * superscripts. */ + + public: + DEFINE_SIZE_ARRAY (4, mathKernInfoRecords); +}; + +struct MathGlyphInfo +{ + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && + mathItalicsCorrectionInfo.sanitize (c, this) && + mathTopAccentAttachment.sanitize (c, this) && + extendedShapeCoverage.sanitize (c, this) && + mathKernInfo.sanitize (c, this)); + } + + hb_position_t + get_italics_correction (hb_codepoint_t glyph, hb_font_t *font) const + { return (this+mathItalicsCorrectionInfo).get_value (glyph, font); } + + hb_position_t + get_top_accent_attachment (hb_codepoint_t glyph, hb_font_t *font) const + { return (this+mathTopAccentAttachment).get_value (glyph, font); } + + bool is_extended_shape (hb_codepoint_t glyph) const + { return (this+extendedShapeCoverage).get_coverage (glyph) != NOT_COVERED; } + + hb_position_t get_kerning (hb_codepoint_t glyph, + hb_ot_math_kern_t kern, + hb_position_t correction_height, + hb_font_t *font) const + { return (this+mathKernInfo).get_kerning (glyph, kern, correction_height, font); } + + protected: + /* Offset to MathItalicsCorrectionInfo table - + * from the beginning of MathGlyphInfo table. */ + OffsetTo<MathItalicsCorrectionInfo> mathItalicsCorrectionInfo; + + /* Offset to MathTopAccentAttachment table - + * from the beginning of MathGlyphInfo table. */ + OffsetTo<MathTopAccentAttachment> mathTopAccentAttachment; + + /* Offset to coverage table for Extended Shape glyphs - + * from the beginning of MathGlyphInfo table. When the left or right glyph of + * a box is an extended shape variant, the (ink) box (and not the default + * position defined by values in MathConstants table) should be used for + * vertical positioning purposes. May be NULL.. */ + OffsetTo<Coverage> extendedShapeCoverage; + + /* Offset to MathKernInfo table - + * from the beginning of MathGlyphInfo table. */ + OffsetTo<MathKernInfo> mathKernInfo; + + public: + DEFINE_SIZE_STATIC (8); +}; + +struct MathGlyphVariantRecord +{ + friend struct MathGlyphConstruction; + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this)); + } + + protected: + HBGlyphID variantGlyph; /* Glyph ID for the variant. */ + HBUINT16 advanceMeasurement; /* Advance width/height, in design units, of the + * variant, in the direction of requested + * glyph extension. */ + + public: + DEFINE_SIZE_STATIC (4); +}; + +struct PartFlags : HBUINT16 +{ + enum Flags { + Extender = 0x0001u, /* If set, the part can be skipped or repeated. */ + + Defined = 0x0001u, /* All defined flags. */ + }; + + public: + DEFINE_SIZE_STATIC (2); +}; + +struct MathGlyphPartRecord +{ + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this)); + } + + void extract (hb_ot_math_glyph_part_t &out, + int64_t mult, + hb_font_t *font) const + { + out.glyph = glyph; + + out.start_connector_length = font->em_mult (startConnectorLength, mult); + out.end_connector_length = font->em_mult (endConnectorLength, mult); + out.full_advance = font->em_mult (fullAdvance, mult); + + static_assert ((unsigned int) HB_OT_MATH_GLYPH_PART_FLAG_EXTENDER == + (unsigned int) PartFlags::Extender, ""); + + out.flags = (hb_ot_math_glyph_part_flags_t) + (unsigned int) + (partFlags & PartFlags::Defined); + } + + protected: + HBGlyphID glyph; /* Glyph ID for the part. */ + HBUINT16 startConnectorLength; + /* Advance width/ height of the straight bar + * connector material, in design units, is at + * the beginning of the glyph, in the + * direction of the extension. */ + HBUINT16 endConnectorLength; + /* Advance width/ height of the straight bar + * connector material, in design units, is at + * the end of the glyph, in the direction of + * the extension. */ + HBUINT16 fullAdvance; /* Full advance width/height for this part, + * in the direction of the extension. + * In design units. */ + PartFlags partFlags; /* Part qualifiers. */ + + public: + DEFINE_SIZE_STATIC (10); +}; + +struct MathGlyphAssembly +{ + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && + italicsCorrection.sanitize (c, this) && + partRecords.sanitize (c)); + } + + unsigned int get_parts (hb_direction_t direction, + hb_font_t *font, + unsigned int start_offset, + unsigned int *parts_count, /* IN/OUT */ + hb_ot_math_glyph_part_t *parts /* OUT */, + hb_position_t *italics_correction /* OUT */) const + { + if (parts_count) + { + int64_t mult = font->dir_mult (direction); + for (auto _ : hb_zip (partRecords.sub_array (start_offset, parts_count), + hb_array (parts, *parts_count))) + _.first.extract (_.second, mult, font); + } + + if (italics_correction) + *italics_correction = italicsCorrection.get_x_value (font, this); + + return partRecords.len; + } + + protected: + MathValueRecord + italicsCorrection; + /* Italics correction of this + * MathGlyphAssembly. Should not + * depend on the assembly size. */ + ArrayOf<MathGlyphPartRecord> + partRecords; /* Array of part records, from + * left to right and bottom to + * top. */ + + public: + DEFINE_SIZE_ARRAY (6, partRecords); +}; + +struct MathGlyphConstruction +{ + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && + glyphAssembly.sanitize (c, this) && + mathGlyphVariantRecord.sanitize (c)); + } + + const MathGlyphAssembly &get_assembly () const { return this+glyphAssembly; } + + unsigned int get_variants (hb_direction_t direction, + hb_font_t *font, + unsigned int start_offset, + unsigned int *variants_count, /* IN/OUT */ + hb_ot_math_glyph_variant_t *variants /* OUT */) const + { + if (variants_count) + { + int64_t mult = font->dir_mult (direction); + for (auto _ : hb_zip (mathGlyphVariantRecord.sub_array (start_offset, variants_count), + hb_array (variants, *variants_count))) + _.second = {_.first.variantGlyph, font->em_mult (_.first.advanceMeasurement, mult)}; + } + return mathGlyphVariantRecord.len; + } + + protected: + /* Offset to MathGlyphAssembly table for this shape - from the beginning of + MathGlyphConstruction table. May be NULL. */ + OffsetTo<MathGlyphAssembly> glyphAssembly; + + /* MathGlyphVariantRecords for alternative variants of the glyphs. */ + ArrayOf<MathGlyphVariantRecord> mathGlyphVariantRecord; + + public: + DEFINE_SIZE_ARRAY (4, mathGlyphVariantRecord); +}; + +struct MathVariants +{ + bool sanitize_offsets (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + unsigned int count = vertGlyphCount + horizGlyphCount; + for (unsigned int i = 0; i < count; i++) + if (!glyphConstruction.arrayZ[i].sanitize (c, this)) return_trace (false); + return_trace (true); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && + vertGlyphCoverage.sanitize (c, this) && + horizGlyphCoverage.sanitize (c, this) && + c->check_array (glyphConstruction.arrayZ, vertGlyphCount + horizGlyphCount) && + sanitize_offsets (c)); + } + + hb_position_t get_min_connector_overlap (hb_direction_t direction, + hb_font_t *font) const + { return font->em_scale_dir (minConnectorOverlap, direction); } + + unsigned int get_glyph_variants (hb_codepoint_t glyph, + hb_direction_t direction, + hb_font_t *font, + unsigned int start_offset, + unsigned int *variants_count, /* IN/OUT */ + hb_ot_math_glyph_variant_t *variants /* OUT */) const + { return get_glyph_construction (glyph, direction, font) + .get_variants (direction, font, start_offset, variants_count, variants); } + + unsigned int get_glyph_parts (hb_codepoint_t glyph, + hb_direction_t direction, + hb_font_t *font, + unsigned int start_offset, + unsigned int *parts_count, /* IN/OUT */ + hb_ot_math_glyph_part_t *parts /* OUT */, + hb_position_t *italics_correction /* OUT */) const + { return get_glyph_construction (glyph, direction, font) + .get_assembly () + .get_parts (direction, font, + start_offset, parts_count, parts, + italics_correction); } + + private: + const MathGlyphConstruction & + get_glyph_construction (hb_codepoint_t glyph, + hb_direction_t direction, + hb_font_t *font HB_UNUSED) const + { + bool vertical = HB_DIRECTION_IS_VERTICAL (direction); + unsigned int count = vertical ? vertGlyphCount : horizGlyphCount; + const OffsetTo<Coverage> &coverage = vertical ? vertGlyphCoverage + : horizGlyphCoverage; + + unsigned int index = (this+coverage).get_coverage (glyph); + if (unlikely (index >= count)) return Null (MathGlyphConstruction); + + if (!vertical) + index += vertGlyphCount; + + return this+glyphConstruction[index]; + } + + protected: + HBUINT16 minConnectorOverlap; + /* Minimum overlap of connecting + * glyphs during glyph construction, + * in design units. */ + OffsetTo<Coverage> vertGlyphCoverage; + /* Offset to Coverage table - + * from the beginning of MathVariants + * table. */ + OffsetTo<Coverage> horizGlyphCoverage; + /* Offset to Coverage table - + * from the beginning of MathVariants + * table. */ + HBUINT16 vertGlyphCount; /* Number of glyphs for which + * information is provided for + * vertically growing variants. */ + HBUINT16 horizGlyphCount;/* Number of glyphs for which + * information is provided for + * horizontally growing variants. */ + + /* Array of offsets to MathGlyphConstruction tables - from the beginning of + the MathVariants table, for shapes growing in vertical/horizontal + direction. */ + UnsizedArrayOf<OffsetTo<MathGlyphConstruction>> + glyphConstruction; + + public: + DEFINE_SIZE_ARRAY (10, glyphConstruction); +}; + + +/* + * MATH -- Mathematical typesetting + * https://docs.microsoft.com/en-us/typography/opentype/spec/math + */ + +struct MATH +{ + static constexpr hb_tag_t tableTag = HB_OT_TAG_MATH; + + bool has_data () const { return version.to_int (); } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (version.sanitize (c) && + likely (version.major == 1) && + mathConstants.sanitize (c, this) && + mathGlyphInfo.sanitize (c, this) && + mathVariants.sanitize (c, this)); + } + + hb_position_t get_constant (hb_ot_math_constant_t constant, + hb_font_t *font) const + { return (this+mathConstants).get_value (constant, font); } + + const MathGlyphInfo &get_glyph_info () const { return this+mathGlyphInfo; } + + const MathVariants &get_variants () const { return this+mathVariants; } + + protected: + FixedVersion<>version; /* Version of the MATH table + * initially set to 0x00010000u */ + OffsetTo<MathConstants> + mathConstants; /* MathConstants table */ + OffsetTo<MathGlyphInfo> + mathGlyphInfo; /* MathGlyphInfo table */ + OffsetTo<MathVariants> + mathVariants; /* MathVariants table */ + + public: + DEFINE_SIZE_STATIC (10); +}; + +} /* namespace OT */ + + +#endif /* HB_OT_MATH_TABLE_HH */ diff --git a/thirdparty/harfbuzz/src/hb-ot-math.cc b/thirdparty/harfbuzz/src/hb-ot-math.cc new file mode 100644 index 0000000000..9d8c6e735a --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-math.cc @@ -0,0 +1,293 @@ +/* + * Copyright © 2016 Igalia S.L. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Igalia Author(s): Frédéric Wang + */ + +#include "hb.hh" + +#ifndef HB_NO_MATH + +#include "hb-ot-math-table.hh" + + +/** + * SECTION:hb-ot-math + * @title: hb-ot-math + * @short_description: OpenType Math information + * @include: hb-ot.h + * + * Functions for fetching mathematics layout data from OpenType fonts. + * + * HarfBuzz itself does not implement a math layout solution. The + * functions and types provided can be used by client programs to access + * the font data necessary for typesetting OpenType Math layout. + * + **/ + + +/* + * OT::MATH + */ + +/** + * hb_ot_math_has_data: + * @face: #hb_face_t to test + * + * Tests whether a face has a `MATH` table. + * + * Return value: true if the table is found, false otherwise + * + * Since: 1.3.3 + **/ +hb_bool_t +hb_ot_math_has_data (hb_face_t *face) +{ + return face->table.MATH->has_data (); +} + +/** + * hb_ot_math_get_constant: + * @font: #hb_font_t to work upon + * @constant: #hb_ot_math_constant_t the constant to retrieve + * + * Fetches the specified math constant. For most constants, the value returned + * is an #hb_position_t. + * + * However, if the requested constant is #HB_OT_MATH_CONSTANT_SCRIPT_PERCENT_SCALE_DOWN, + * #HB_OT_MATH_CONSTANT_SCRIPT_SCRIPT_PERCENT_SCALE_DOWN or + * #HB_OT_MATH_CONSTANT_SCRIPT_PERCENT_SCALE_DOWN, then the return value is + * an integer between 0 and 100 representing that percentage. + * + * Return value: the requested constant or zero + * + * Since: 1.3.3 + **/ +hb_position_t +hb_ot_math_get_constant (hb_font_t *font, + hb_ot_math_constant_t constant) +{ + return font->face->table.MATH->get_constant(constant, font); +} + +/** + * hb_ot_math_get_glyph_italics_correction: + * @font: #hb_font_t to work upon + * @glyph: The glyph index from which to retrieve the value + * + * Fetches an italics-correction value (if one exists) for the specified + * glyph index. + * + * Return value: the italics correction of the glyph or zero + * + * Since: 1.3.3 + **/ +hb_position_t +hb_ot_math_get_glyph_italics_correction (hb_font_t *font, + hb_codepoint_t glyph) +{ + return font->face->table.MATH->get_glyph_info().get_italics_correction (glyph, font); +} + +/** + * hb_ot_math_get_glyph_top_accent_attachment: + * @font: #hb_font_t to work upon + * @glyph: The glyph index from which to retrieve the value + * + * Fetches a top-accent-attachment value (if one exists) for the specified + * glyph index. + * + * For any glyph that does not have a top-accent-attachment value - that is, + * a glyph not covered by the `MathTopAccentAttachment` table (or, when + * @font has no `MathTopAccentAttachment` table or no `MATH` table, any + * glyph) - the function synthesizes a value, returning the position at + * one-half the glyph's advance width. + * + * Return value: the top accent attachment of the glyph or 0.5 * the advance + * width of @glyph + * + * Since: 1.3.3 + **/ +hb_position_t +hb_ot_math_get_glyph_top_accent_attachment (hb_font_t *font, + hb_codepoint_t glyph) +{ + return font->face->table.MATH->get_glyph_info().get_top_accent_attachment (glyph, font); +} + +/** + * hb_ot_math_is_glyph_extended_shape: + * @face: #hb_face_t to work upon + * @glyph: The glyph index to test + * + * Tests whether the given glyph index is an extended shape in the face. + * + * Return value: true if the glyph is an extended shape, false otherwise + * + * Since: 1.3.3 + **/ +hb_bool_t +hb_ot_math_is_glyph_extended_shape (hb_face_t *face, + hb_codepoint_t glyph) +{ + return face->table.MATH->get_glyph_info().is_extended_shape (glyph); +} + +/** + * hb_ot_math_get_glyph_kerning: + * @font: #hb_font_t to work upon + * @glyph: The glyph index from which to retrieve the value + * @kern: The #hb_ot_math_kern_t from which to retrieve the value + * @correction_height: the correction height to use to determine the kerning. + * + * Fetches the math kerning (cut-ins) value for the specified font, glyph index, and + * @kern. + * + * If the MathKern table is found, the function examines it to find a height + * value that is greater or equal to @correction_height. If such a height + * value is found, corresponding kerning value from the table is returned. If + * no such height value is found, the last kerning value is returned. + * + * Return value: requested kerning value or zero + * + * Since: 1.3.3 + **/ +hb_position_t +hb_ot_math_get_glyph_kerning (hb_font_t *font, + hb_codepoint_t glyph, + hb_ot_math_kern_t kern, + hb_position_t correction_height) +{ + return font->face->table.MATH->get_glyph_info().get_kerning (glyph, + kern, + correction_height, + font); +} + +/** + * hb_ot_math_get_glyph_variants: + * @font: #hb_font_t to work upon + * @glyph: The index of the glyph to stretch + * @direction: The direction of the stretching (horizontal or vertical) + * @start_offset: offset of the first variant to retrieve + * @variants_count: (inout): Input = the maximum number of variants to return; + * Output = the actual number of variants returned + * @variants: (out) (array length=variants_count): array of variants returned + * + * Fetches the MathGlyphConstruction for the specified font, glyph index, and + * direction. The corresponding list of size variants is returned as a list of + * #hb_ot_math_glyph_variant_t structs. + * + * <note>The @direction parameter is only used to select between horizontal + * or vertical directions for the construction. Even though all #hb_direction_t + * values are accepted, only the result of #HB_DIRECTION_IS_HORIZONTAL is + * considered.</note> + * + * Return value: the total number of size variants available or zero + * + * Since: 1.3.3 + **/ +unsigned int +hb_ot_math_get_glyph_variants (hb_font_t *font, + hb_codepoint_t glyph, + hb_direction_t direction, + unsigned int start_offset, + unsigned int *variants_count, /* IN/OUT */ + hb_ot_math_glyph_variant_t *variants /* OUT */) +{ + return font->face->table.MATH->get_variants().get_glyph_variants (glyph, direction, font, + start_offset, + variants_count, + variants); +} + +/** + * hb_ot_math_get_min_connector_overlap: + * @font: #hb_font_t to work upon + * @direction: direction of the stretching (horizontal or vertical) + * + * Fetches the MathVariants table for the specified font and returns the + * minimum overlap of connecting glyphs that are required to draw a glyph + * assembly in the specified direction. + * + * <note>The @direction parameter is only used to select between horizontal + * or vertical directions for the construction. Even though all #hb_direction_t + * values are accepted, only the result of #HB_DIRECTION_IS_HORIZONTAL is + * considered.</note> + * + * Return value: requested minimum connector overlap or zero + * + * Since: 1.3.3 + **/ +hb_position_t +hb_ot_math_get_min_connector_overlap (hb_font_t *font, + hb_direction_t direction) +{ + return font->face->table.MATH->get_variants().get_min_connector_overlap (direction, font); +} + +/** + * hb_ot_math_get_glyph_assembly: + * @font: #hb_font_t to work upon + * @glyph: The index of the glyph to stretch + * @direction: direction of the stretching (horizontal or vertical) + * @start_offset: offset of the first glyph part to retrieve + * @parts_count: (inout): Input = maximum number of glyph parts to return; + * Output = actual number of parts returned + * @parts: (out) (array length=parts_count): the glyph parts returned + * @italics_correction: (out): italics correction of the glyph assembly + * + * Fetches the GlyphAssembly for the specified font, glyph index, and direction. + * Returned are a list of #hb_ot_math_glyph_part_t glyph parts that can be + * used to draw the glyph and an italics-correction value (if one is defined + * in the font). + * + * <note>The @direction parameter is only used to select between horizontal + * or vertical directions for the construction. Even though all #hb_direction_t + * values are accepted, only the result of #HB_DIRECTION_IS_HORIZONTAL is + * considered.</note> + * + * Return value: the total number of parts in the glyph assembly + * + * Since: 1.3.3 + **/ +unsigned int +hb_ot_math_get_glyph_assembly (hb_font_t *font, + hb_codepoint_t glyph, + hb_direction_t direction, + unsigned int start_offset, + unsigned int *parts_count, /* IN/OUT */ + hb_ot_math_glyph_part_t *parts, /* OUT */ + hb_position_t *italics_correction /* OUT */) +{ + return font->face->table.MATH->get_variants().get_glyph_parts (glyph, + direction, + font, + start_offset, + parts_count, + parts, + italics_correction); +} + + +#endif diff --git a/thirdparty/harfbuzz/src/hb-ot-math.h b/thirdparty/harfbuzz/src/hb-ot-math.h new file mode 100644 index 0000000000..ad864a762d --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-math.h @@ -0,0 +1,230 @@ +/* + * Copyright © 2016 Igalia S.L. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Igalia Author(s): Frédéric Wang + */ + +#ifndef HB_OT_H_IN +#error "Include <hb-ot.h> instead." +#endif + +#ifndef HB_OT_MATH_H +#define HB_OT_MATH_H + +#include "hb.h" + +HB_BEGIN_DECLS + + +/* + * MATH + */ + +#define HB_OT_TAG_MATH HB_TAG('M','A','T','H') + +/* Use with hb_buffer_set_script() for math shaping. */ +#define HB_OT_MATH_SCRIPT HB_TAG('m','a','t','h') + +/* Types */ + +/** + * hb_ot_math_constant_t: + * + * The 'MATH' table constants specified at + * https://docs.microsoft.com/en-us/typography/opentype/spec/math + * + * Since: 1.3.3 + */ +typedef enum { + HB_OT_MATH_CONSTANT_SCRIPT_PERCENT_SCALE_DOWN = 0, + HB_OT_MATH_CONSTANT_SCRIPT_SCRIPT_PERCENT_SCALE_DOWN = 1, + HB_OT_MATH_CONSTANT_DELIMITED_SUB_FORMULA_MIN_HEIGHT = 2, + HB_OT_MATH_CONSTANT_DISPLAY_OPERATOR_MIN_HEIGHT = 3, + HB_OT_MATH_CONSTANT_MATH_LEADING = 4, + HB_OT_MATH_CONSTANT_AXIS_HEIGHT = 5, + HB_OT_MATH_CONSTANT_ACCENT_BASE_HEIGHT = 6, + HB_OT_MATH_CONSTANT_FLATTENED_ACCENT_BASE_HEIGHT = 7, + HB_OT_MATH_CONSTANT_SUBSCRIPT_SHIFT_DOWN = 8, + HB_OT_MATH_CONSTANT_SUBSCRIPT_TOP_MAX = 9, + HB_OT_MATH_CONSTANT_SUBSCRIPT_BASELINE_DROP_MIN = 10, + HB_OT_MATH_CONSTANT_SUPERSCRIPT_SHIFT_UP = 11, + HB_OT_MATH_CONSTANT_SUPERSCRIPT_SHIFT_UP_CRAMPED = 12, + HB_OT_MATH_CONSTANT_SUPERSCRIPT_BOTTOM_MIN = 13, + HB_OT_MATH_CONSTANT_SUPERSCRIPT_BASELINE_DROP_MAX = 14, + HB_OT_MATH_CONSTANT_SUB_SUPERSCRIPT_GAP_MIN = 15, + HB_OT_MATH_CONSTANT_SUPERSCRIPT_BOTTOM_MAX_WITH_SUBSCRIPT = 16, + HB_OT_MATH_CONSTANT_SPACE_AFTER_SCRIPT = 17, + HB_OT_MATH_CONSTANT_UPPER_LIMIT_GAP_MIN = 18, + HB_OT_MATH_CONSTANT_UPPER_LIMIT_BASELINE_RISE_MIN = 19, + HB_OT_MATH_CONSTANT_LOWER_LIMIT_GAP_MIN = 20, + HB_OT_MATH_CONSTANT_LOWER_LIMIT_BASELINE_DROP_MIN = 21, + HB_OT_MATH_CONSTANT_STACK_TOP_SHIFT_UP = 22, + HB_OT_MATH_CONSTANT_STACK_TOP_DISPLAY_STYLE_SHIFT_UP = 23, + HB_OT_MATH_CONSTANT_STACK_BOTTOM_SHIFT_DOWN = 24, + HB_OT_MATH_CONSTANT_STACK_BOTTOM_DISPLAY_STYLE_SHIFT_DOWN = 25, + HB_OT_MATH_CONSTANT_STACK_GAP_MIN = 26, + HB_OT_MATH_CONSTANT_STACK_DISPLAY_STYLE_GAP_MIN = 27, + HB_OT_MATH_CONSTANT_STRETCH_STACK_TOP_SHIFT_UP = 28, + HB_OT_MATH_CONSTANT_STRETCH_STACK_BOTTOM_SHIFT_DOWN = 29, + HB_OT_MATH_CONSTANT_STRETCH_STACK_GAP_ABOVE_MIN = 30, + HB_OT_MATH_CONSTANT_STRETCH_STACK_GAP_BELOW_MIN = 31, + HB_OT_MATH_CONSTANT_FRACTION_NUMERATOR_SHIFT_UP = 32, + HB_OT_MATH_CONSTANT_FRACTION_NUMERATOR_DISPLAY_STYLE_SHIFT_UP = 33, + HB_OT_MATH_CONSTANT_FRACTION_DENOMINATOR_SHIFT_DOWN = 34, + HB_OT_MATH_CONSTANT_FRACTION_DENOMINATOR_DISPLAY_STYLE_SHIFT_DOWN = 35, + HB_OT_MATH_CONSTANT_FRACTION_NUMERATOR_GAP_MIN = 36, + HB_OT_MATH_CONSTANT_FRACTION_NUM_DISPLAY_STYLE_GAP_MIN = 37, + HB_OT_MATH_CONSTANT_FRACTION_RULE_THICKNESS = 38, + HB_OT_MATH_CONSTANT_FRACTION_DENOMINATOR_GAP_MIN = 39, + HB_OT_MATH_CONSTANT_FRACTION_DENOM_DISPLAY_STYLE_GAP_MIN = 40, + HB_OT_MATH_CONSTANT_SKEWED_FRACTION_HORIZONTAL_GAP = 41, + HB_OT_MATH_CONSTANT_SKEWED_FRACTION_VERTICAL_GAP = 42, + HB_OT_MATH_CONSTANT_OVERBAR_VERTICAL_GAP = 43, + HB_OT_MATH_CONSTANT_OVERBAR_RULE_THICKNESS = 44, + HB_OT_MATH_CONSTANT_OVERBAR_EXTRA_ASCENDER = 45, + HB_OT_MATH_CONSTANT_UNDERBAR_VERTICAL_GAP = 46, + HB_OT_MATH_CONSTANT_UNDERBAR_RULE_THICKNESS = 47, + HB_OT_MATH_CONSTANT_UNDERBAR_EXTRA_DESCENDER = 48, + HB_OT_MATH_CONSTANT_RADICAL_VERTICAL_GAP = 49, + HB_OT_MATH_CONSTANT_RADICAL_DISPLAY_STYLE_VERTICAL_GAP = 50, + HB_OT_MATH_CONSTANT_RADICAL_RULE_THICKNESS = 51, + HB_OT_MATH_CONSTANT_RADICAL_EXTRA_ASCENDER = 52, + HB_OT_MATH_CONSTANT_RADICAL_KERN_BEFORE_DEGREE = 53, + HB_OT_MATH_CONSTANT_RADICAL_KERN_AFTER_DEGREE = 54, + HB_OT_MATH_CONSTANT_RADICAL_DEGREE_BOTTOM_RAISE_PERCENT = 55 +} hb_ot_math_constant_t; + +/** + * hb_ot_math_kern_t: + * + * The math kerning-table types defined for the four corners + * of a glyph. + * + * Since: 1.3.3 + */ +typedef enum { + HB_OT_MATH_KERN_TOP_RIGHT = 0, + HB_OT_MATH_KERN_TOP_LEFT = 1, + HB_OT_MATH_KERN_BOTTOM_RIGHT = 2, + HB_OT_MATH_KERN_BOTTOM_LEFT = 3 +} hb_ot_math_kern_t; + +/** + * hb_ot_math_glyph_variant_t: + * @glyph: The glyph index of the variant + * @advance: The advance width of the variant + * + * Data type to hold math-variant information for a glyph. + * + * Since: 1.3.3 + */ +typedef struct hb_ot_math_glyph_variant_t { + hb_codepoint_t glyph; + hb_position_t advance; +} hb_ot_math_glyph_variant_t; + +/** + * hb_ot_math_glyph_part_flags_t: + * + * Flags for math glyph parts. + * + * Since: 1.3.3 + */ +typedef enum { /*< flags >*/ + HB_OT_MATH_GLYPH_PART_FLAG_EXTENDER = 0x00000001u /* Extender glyph */ +} hb_ot_math_glyph_part_flags_t; + +/** + * hb_ot_math_glyph_part_t: + * @glyph: The glyph index of the variant part + * @start_connector_length: The length of the connector on the starting side of the variant part + * @end_connector_length: The length of the connector on the ending side of the variant part + * @full_advance: The total advance of the part + * @flags: #hb_ot_math_glyph_part_flags_t flags for the part + * + * Data type to hold information for a "part" component of a math-variant glyph. + * Large variants for stretchable math glyphs (such as parentheses) can be constructed + * on the fly from parts. + * + * Since: 1.3.3 + */ +typedef struct hb_ot_math_glyph_part_t { + hb_codepoint_t glyph; + hb_position_t start_connector_length; + hb_position_t end_connector_length; + hb_position_t full_advance; + hb_ot_math_glyph_part_flags_t flags; +} hb_ot_math_glyph_part_t; + +/* Methods */ + +HB_EXTERN hb_bool_t +hb_ot_math_has_data (hb_face_t *face); + +HB_EXTERN hb_position_t +hb_ot_math_get_constant (hb_font_t *font, + hb_ot_math_constant_t constant); + +HB_EXTERN hb_position_t +hb_ot_math_get_glyph_italics_correction (hb_font_t *font, + hb_codepoint_t glyph); + +HB_EXTERN hb_position_t +hb_ot_math_get_glyph_top_accent_attachment (hb_font_t *font, + hb_codepoint_t glyph); + +HB_EXTERN hb_bool_t +hb_ot_math_is_glyph_extended_shape (hb_face_t *face, + hb_codepoint_t glyph); + +HB_EXTERN hb_position_t +hb_ot_math_get_glyph_kerning (hb_font_t *font, + hb_codepoint_t glyph, + hb_ot_math_kern_t kern, + hb_position_t correction_height); + +HB_EXTERN unsigned int +hb_ot_math_get_glyph_variants (hb_font_t *font, + hb_codepoint_t glyph, + hb_direction_t direction, + unsigned int start_offset, + unsigned int *variants_count, /* IN/OUT */ + hb_ot_math_glyph_variant_t *variants /* OUT */); + +HB_EXTERN hb_position_t +hb_ot_math_get_min_connector_overlap (hb_font_t *font, + hb_direction_t direction); + +HB_EXTERN unsigned int +hb_ot_math_get_glyph_assembly (hb_font_t *font, + hb_codepoint_t glyph, + hb_direction_t direction, + unsigned int start_offset, + unsigned int *parts_count, /* IN/OUT */ + hb_ot_math_glyph_part_t *parts, /* OUT */ + hb_position_t *italics_correction /* OUT */); + + +HB_END_DECLS + +#endif /* HB_OT_MATH_H */ diff --git a/thirdparty/harfbuzz/src/hb-ot-maxp-table.hh b/thirdparty/harfbuzz/src/hb-ot-maxp-table.hh new file mode 100644 index 0000000000..929956d12b --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-maxp-table.hh @@ -0,0 +1,142 @@ +/* + * Copyright © 2011,2012 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_OT_MAXP_TABLE_HH +#define HB_OT_MAXP_TABLE_HH + +#include "hb-open-type.hh" + +namespace OT { + + +/* + * maxp -- Maximum Profile + * https://docs.microsoft.com/en-us/typography/opentype/spec/maxp + */ + +#define HB_OT_TAG_maxp HB_TAG('m','a','x','p') + +struct maxpV1Tail +{ + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this)); + } + + HBUINT16 maxPoints; /* Maximum points in a non-composite glyph. */ + HBUINT16 maxContours; /* Maximum contours in a non-composite glyph. */ + HBUINT16 maxCompositePoints; /* Maximum points in a composite glyph. */ + HBUINT16 maxCompositeContours; /* Maximum contours in a composite glyph. */ + HBUINT16 maxZones; /* 1 if instructions do not use the twilight zone (Z0), + * or 2 if instructions do use Z0; should be set to 2 in + * most cases. */ + HBUINT16 maxTwilightPoints; /* Maximum points used in Z0. */ + HBUINT16 maxStorage; /* Number of Storage Area locations. */ + HBUINT16 maxFunctionDefs; /* Number of FDEFs, equal to the highest function number + 1. */ + HBUINT16 maxInstructionDefs; /* Number of IDEFs. */ + HBUINT16 maxStackElements; /* Maximum stack depth. (This includes Font and CVT + * Programs, as well as the instructions for each glyph.) */ + HBUINT16 maxSizeOfInstructions; /* Maximum byte count for glyph instructions. */ + HBUINT16 maxComponentElements; /* Maximum number of components referenced at + * "top level" for any composite glyph. */ + HBUINT16 maxComponentDepth; /* Maximum levels of recursion; 1 for simple components. */ + public: + DEFINE_SIZE_STATIC (26); +}; + + +struct maxp +{ + static constexpr hb_tag_t tableTag = HB_OT_TAG_maxp; + + unsigned int get_num_glyphs () const { return numGlyphs; } + + void set_num_glyphs (unsigned int count) + { + numGlyphs = count; + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + if (unlikely (!c->check_struct (this))) + return_trace (false); + + if (version.major == 1) + { + const maxpV1Tail &v1 = StructAfter<maxpV1Tail> (*this); + return_trace (v1.sanitize (c)); + } + return_trace (likely (version.major == 0 && version.minor == 0x5000u)); + } + + bool subset (hb_subset_context_t *c) const + { + TRACE_SUBSET (this); + maxp *maxp_prime = c->serializer->embed (this); + if (unlikely (!maxp_prime)) return_trace (false); + + maxp_prime->numGlyphs = c->plan->num_output_glyphs (); + if (maxp_prime->version.major == 1) + { + const maxpV1Tail *src_v1 = &StructAfter<maxpV1Tail> (*this); + maxpV1Tail *dest_v1 = c->serializer->embed<maxpV1Tail> (src_v1); + if (unlikely (!dest_v1)) return_trace (false); + + if (c->plan->drop_hints) + drop_hint_fields (dest_v1); + } + + return_trace (true); + } + + static void drop_hint_fields (maxpV1Tail* dest_v1) + { + dest_v1->maxZones = 1; + dest_v1->maxTwilightPoints = 0; + dest_v1->maxStorage = 0; + dest_v1->maxFunctionDefs = 0; + dest_v1->maxInstructionDefs = 0; + dest_v1->maxStackElements = 0; + dest_v1->maxSizeOfInstructions = 0; + } + + protected: + FixedVersion<>version;/* Version of the maxp table (0.5 or 1.0), + * 0x00005000u or 0x00010000u. */ + HBUINT16 numGlyphs; + /* The number of glyphs in the font. */ +/*maxpV1Tail v1Tail[HB_VAR_ARRAY]; */ + public: + DEFINE_SIZE_STATIC (6); +}; + + +} /* namespace OT */ + + +#endif /* HB_OT_MAXP_TABLE_HH */ diff --git a/thirdparty/harfbuzz/src/hb-ot-meta-table.hh b/thirdparty/harfbuzz/src/hb-ot-meta-table.hh new file mode 100644 index 0000000000..1225e26ce1 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-meta-table.hh @@ -0,0 +1,127 @@ +/* + * Copyright © 2019 Ebrahim Byagowi + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + */ + +#ifndef HB_OT_META_TABLE_HH +#define HB_OT_META_TABLE_HH + +#include "hb-open-type.hh" + +/* + * meta -- Metadata Table + * https://docs.microsoft.com/en-us/typography/opentype/spec/meta + * https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6meta.html + */ +#define HB_OT_TAG_meta HB_TAG ('m','e','t','a') + + +namespace OT { + + +struct DataMap +{ + int cmp (hb_tag_t a) const { return tag.cmp (a); } + + hb_tag_t get_tag () const { return tag; } + + hb_blob_t *reference_entry (hb_blob_t *meta_blob) const + { return hb_blob_create_sub_blob (meta_blob, dataZ, dataLength); } + + bool sanitize (hb_sanitize_context_t *c, const void *base) const + { + TRACE_SANITIZE (this); + return_trace (likely (c->check_struct (this) && + dataZ.sanitize (c, base, dataLength))); + } + + protected: + Tag tag; /* A tag indicating the type of metadata. */ + LNNOffsetTo<UnsizedArrayOf<HBUINT8>> + dataZ; /* Offset in bytes from the beginning of the + * metadata table to the data for this tag. */ + HBUINT32 dataLength; /* Length of the data. The data is not required to + * be padded to any byte boundary. */ + public: + DEFINE_SIZE_STATIC (12); +}; + +struct meta +{ + static constexpr hb_tag_t tableTag = HB_OT_TAG_meta; + + struct accelerator_t + { + void init (hb_face_t *face) + { table = hb_sanitize_context_t ().reference_table<meta> (face); } + void fini () { table.destroy (); } + + hb_blob_t *reference_entry (hb_tag_t tag) const + { return table->dataMaps.lsearch (tag).reference_entry (table.get_blob ()); } + + unsigned int get_entries (unsigned int start_offset, + unsigned int *count, + hb_ot_meta_tag_t *entries) const + { + if (count) + { + + table->dataMaps.sub_array (start_offset, count) + | hb_map (&DataMap::get_tag) + | hb_map ([](hb_tag_t tag) { return (hb_ot_meta_tag_t) tag; }) + | hb_sink (hb_array (entries, *count)) + ; + } + return table->dataMaps.len; + } + + private: + hb_blob_ptr_t<meta> table; + }; + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (likely (c->check_struct (this) && + version == 1 && + dataMaps.sanitize (c, this))); + } + + protected: + HBUINT32 version; /* Version number of the metadata table — set to 1. */ + HBUINT32 flags; /* Flags — currently unused; set to 0. */ + HBUINT32 dataOffset; + /* Per Apple specification: + * Offset from the beginning of the table to the data. + * Per OT specification: + * Reserved. Not used; should be set to 0. */ + LArrayOf<DataMap> + dataMaps;/* Array of data map records. */ + public: + DEFINE_SIZE_ARRAY (16, dataMaps); +}; + +struct meta_accelerator_t : meta::accelerator_t {}; + +} /* namespace OT */ + + +#endif /* HB_OT_META_TABLE_HH */ diff --git a/thirdparty/harfbuzz/src/hb-ot-meta.cc b/thirdparty/harfbuzz/src/hb-ot-meta.cc new file mode 100644 index 0000000000..54a0e10f9b --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-meta.cc @@ -0,0 +1,77 @@ +/* + * Copyright © 2019 Ebrahim Byagowi + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + */ + +#include "hb.hh" + +#ifndef HB_NO_META + +#include "hb-ot-meta-table.hh" + +/** + * SECTION:hb-ot-meta + * @title: hb-ot-meta + * @short_description: OpenType Metadata + * @include: hb-ot.h + * + * Functions for fetching metadata from fonts. + **/ + +/** + * hb_ot_meta_get_entry_tags: + * @face: a face object + * @start_offset: iteration's start offset + * @entries_count:(inout) (allow-none): buffer size as input, filled size as output + * @entries: (out caller-allocates) (array length=entries_count): entries tags buffer + * + * Return value: Number of all available feature types. + * + * Since: 2.6.0 + **/ +unsigned int +hb_ot_meta_get_entry_tags (hb_face_t *face, + unsigned int start_offset, + unsigned int *entries_count, /* IN/OUT. May be NULL. */ + hb_ot_meta_tag_t *entries /* OUT. May be NULL. */) +{ + return face->table.meta->get_entries (start_offset, entries_count, entries); +} + +/** + * hb_ot_meta_reference_entry: + * @face: a #hb_face_t object. + * @meta_tag: tag of metadata you like to have. + * + * It fetches metadata entry of a given tag from a font. + * + * Returns: (transfer full): A blob containing the blob. + * + * Since: 2.6.0 + **/ +hb_blob_t * +hb_ot_meta_reference_entry (hb_face_t *face, hb_ot_meta_tag_t meta_tag) +{ + return face->table.meta->reference_entry (meta_tag); +} + +#endif diff --git a/thirdparty/harfbuzz/src/hb-ot-meta.h b/thirdparty/harfbuzz/src/hb-ot-meta.h new file mode 100644 index 0000000000..0278d84148 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-meta.h @@ -0,0 +1,71 @@ +/* + * Copyright © 2019 Ebrahim Byagowi + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + */ + +#ifndef HB_OT_H_IN +#error "Include <hb-ot.h> instead." +#endif + +#ifndef HB_OT_META_H +#define HB_OT_META_H + +#include "hb.h" + +HB_BEGIN_DECLS + +/** + * hb_ot_meta_tag_t: + * @HB_OT_META_TAG_DESIGN_LANGUAGES: Design languages. Text, using only + * Basic Latin (ASCII) characters. Indicates languages and/or scripts + * for the user audiences that the font was primarily designed for. + * @HB_OT_META_TAG_SUPPORTED_LANGUAGES: Supported languages. Text, using + * only Basic Latin (ASCII) characters. Indicates languages and/or scripts + * that the font is declared to be capable of supporting. + * + * Known metadata tags from https://docs.microsoft.com/en-us/typography/opentype/spec/meta + * + * Since: 2.6.0 + **/ +typedef enum { +/* + HB_OT_META_TAG_APPL = HB_TAG ('a','p','p','l'), + HB_OT_META_TAG_BILD = HB_TAG ('b','i','l','d'), +*/ + HB_OT_META_TAG_DESIGN_LANGUAGES = HB_TAG ('d','l','n','g'), + HB_OT_META_TAG_SUPPORTED_LANGUAGES = HB_TAG ('s','l','n','g'), + + _HB_OT_META_TAG_MAX_VALUE = HB_TAG_MAX_SIGNED /*< skip >*/ +} hb_ot_meta_tag_t; + +HB_EXTERN unsigned int +hb_ot_meta_get_entry_tags (hb_face_t *face, + unsigned int start_offset, + unsigned int *entries_count, /* IN/OUT. May be NULL. */ + hb_ot_meta_tag_t *entries /* OUT. May be NULL. */); + +HB_EXTERN hb_blob_t * +hb_ot_meta_reference_entry (hb_face_t *face, hb_ot_meta_tag_t meta_tag); + +HB_END_DECLS + +#endif /* HB_OT_META_H */ diff --git a/thirdparty/harfbuzz/src/hb-ot-metrics.cc b/thirdparty/harfbuzz/src/hb-ot-metrics.cc new file mode 100644 index 0000000000..181ac4d57e --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-metrics.cc @@ -0,0 +1,231 @@ +/* + * Copyright © 2018-2019 Ebrahim Byagowi + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + */ + +#include "hb.hh" + +#include "hb-ot-var-mvar-table.hh" +#include "hb-ot-gasp-table.hh" // Just so we compile it; unused otherwise. +#include "hb-ot-os2-table.hh" +#include "hb-ot-post-table.hh" +#include "hb-ot-hhea-table.hh" +#include "hb-ot-metrics.hh" +#include "hb-ot-face.hh" + + +static float +_fix_ascender_descender (float value, hb_ot_metrics_tag_t metrics_tag) +{ + if (metrics_tag == HB_OT_METRICS_TAG_HORIZONTAL_ASCENDER || + metrics_tag == HB_OT_METRICS_TAG_VERTICAL_ASCENDER) + return fabs ((double) value); + if (metrics_tag == HB_OT_METRICS_TAG_HORIZONTAL_DESCENDER || + metrics_tag == HB_OT_METRICS_TAG_VERTICAL_DESCENDER) + return -fabs ((double) value); + return value; +} + +/* The common part of _get_position logic needed on hb-ot-font and here + to be able to have slim builds without the not always needed parts */ +bool +_hb_ot_metrics_get_position_common (hb_font_t *font, + hb_ot_metrics_tag_t metrics_tag, + hb_position_t *position /* OUT. May be NULL. */) +{ + hb_face_t *face = font->face; + switch ((unsigned) metrics_tag) + { +#ifndef HB_NO_VAR +#define GET_VAR face->table.MVAR->get_var (metrics_tag, font->coords, font->num_coords) +#else +#define GET_VAR .0f +#endif +#define GET_METRIC_X(TABLE, ATTR) \ + (face->table.TABLE->has_data () && \ + (position && (*position = font->em_scalef_x (_fix_ascender_descender ( \ + face->table.TABLE->ATTR + GET_VAR, metrics_tag))), true)) +#define GET_METRIC_Y(TABLE, ATTR) \ + (face->table.TABLE->has_data () && \ + (position && (*position = font->em_scalef_y (_fix_ascender_descender ( \ + face->table.TABLE->ATTR + GET_VAR, metrics_tag))), true)) + case HB_OT_METRICS_TAG_HORIZONTAL_ASCENDER: + return (face->table.OS2->use_typo_metrics () && GET_METRIC_Y (OS2, sTypoAscender)) || + GET_METRIC_Y (hhea, ascender); + case HB_OT_METRICS_TAG_HORIZONTAL_DESCENDER: + return (face->table.OS2->use_typo_metrics () && GET_METRIC_Y (OS2, sTypoDescender)) || + GET_METRIC_Y (hhea, descender); + case HB_OT_METRICS_TAG_HORIZONTAL_LINE_GAP: + return (face->table.OS2->use_typo_metrics () && GET_METRIC_Y (OS2, sTypoLineGap)) || + GET_METRIC_Y (hhea, lineGap); + case HB_OT_METRICS_TAG_VERTICAL_ASCENDER: return GET_METRIC_X (vhea, ascender); + case HB_OT_METRICS_TAG_VERTICAL_DESCENDER: return GET_METRIC_X (vhea, descender); + case HB_OT_METRICS_TAG_VERTICAL_LINE_GAP: return GET_METRIC_X (vhea, lineGap); +#undef GET_METRIC_Y +#undef GET_METRIC_X +#undef GET_VAR + default: assert (0); return false; + } +} + +#ifndef HB_NO_METRICS + +#if 0 +static bool +_get_gasp (hb_face_t *face, float *result, hb_ot_metrics_tag_t metrics_tag) +{ + const OT::GaspRange& range = face->table.gasp->get_gasp_range (metrics_tag - HB_TAG ('g','s','p','0')); + if (&range == &Null (OT::GaspRange)) return false; + if (result) *result = range.rangeMaxPPEM + font->face->table.MVAR->get_var (metrics_tag, font->coords, font->num_coords); + return true; +} +#endif + +/* Private tags for https://github.com/harfbuzz/harfbuzz/issues/1866 */ +#define _HB_OT_METRICS_TAG_HORIZONTAL_ASCENDER_OS2 HB_TAG ('O','a','s','c') +#define _HB_OT_METRICS_TAG_HORIZONTAL_ASCENDER_HHEA HB_TAG ('H','a','s','c') +#define _HB_OT_METRICS_TAG_HORIZONTAL_DESCENDER_OS2 HB_TAG ('O','d','s','c') +#define _HB_OT_METRICS_TAG_HORIZONTAL_DESCENDER_HHEA HB_TAG ('H','d','s','c') +#define _HB_OT_METRICS_TAG_HORIZONTAL_LINE_GAP_OS2 HB_TAG ('O','l','g','p') +#define _HB_OT_METRICS_TAG_HORIZONTAL_LINE_GAP_HHEA HB_TAG ('H','l','g','p') + +/** + * hb_ot_metrics_get_position: + * @font: a #hb_font_t object. + * @metrics_tag: tag of metrics value you like to fetch. + * @position: (out) (optional): result of metrics value from the font. + * + * It fetches metrics value corresponding to a given tag from a font. + * + * Returns: Whether found the requested metrics in the font. + * Since: 2.6.0 + **/ +hb_bool_t +hb_ot_metrics_get_position (hb_font_t *font, + hb_ot_metrics_tag_t metrics_tag, + hb_position_t *position /* OUT. May be NULL. */) +{ + hb_face_t *face = font->face; + switch ((unsigned) metrics_tag) + { + case HB_OT_METRICS_TAG_HORIZONTAL_ASCENDER: + case HB_OT_METRICS_TAG_HORIZONTAL_DESCENDER: + case HB_OT_METRICS_TAG_HORIZONTAL_LINE_GAP: + case HB_OT_METRICS_TAG_VERTICAL_ASCENDER: + case HB_OT_METRICS_TAG_VERTICAL_DESCENDER: + case HB_OT_METRICS_TAG_VERTICAL_LINE_GAP: return _hb_ot_metrics_get_position_common (font, metrics_tag, position); +#ifndef HB_NO_VAR +#define GET_VAR hb_ot_metrics_get_variation (font, metrics_tag) +#else +#define GET_VAR 0 +#endif +#define GET_METRIC_X(TABLE, ATTR) \ + (face->table.TABLE->has_data () && \ + (position && (*position = font->em_scalef_x (face->table.TABLE->ATTR + GET_VAR)), true)) +#define GET_METRIC_Y(TABLE, ATTR) \ + (face->table.TABLE->has_data () && \ + (position && (*position = font->em_scalef_y (face->table.TABLE->ATTR + GET_VAR)), true)) + case HB_OT_METRICS_TAG_HORIZONTAL_CLIPPING_ASCENT: return GET_METRIC_Y (OS2, usWinAscent); + case HB_OT_METRICS_TAG_HORIZONTAL_CLIPPING_DESCENT: return GET_METRIC_Y (OS2, usWinDescent); + case HB_OT_METRICS_TAG_HORIZONTAL_CARET_RISE: return GET_METRIC_Y (hhea, caretSlopeRise); + case HB_OT_METRICS_TAG_HORIZONTAL_CARET_RUN: return GET_METRIC_X (hhea, caretSlopeRun); + case HB_OT_METRICS_TAG_HORIZONTAL_CARET_OFFSET: return GET_METRIC_X (hhea, caretOffset); + case HB_OT_METRICS_TAG_VERTICAL_CARET_RISE: return GET_METRIC_X (vhea, caretSlopeRise); + case HB_OT_METRICS_TAG_VERTICAL_CARET_RUN: return GET_METRIC_Y (vhea, caretSlopeRun); + case HB_OT_METRICS_TAG_VERTICAL_CARET_OFFSET: return GET_METRIC_Y (vhea, caretOffset); + case HB_OT_METRICS_TAG_X_HEIGHT: return GET_METRIC_Y (OS2->v2 (), sxHeight); + case HB_OT_METRICS_TAG_CAP_HEIGHT: return GET_METRIC_Y (OS2->v2 (), sCapHeight); + case HB_OT_METRICS_TAG_SUBSCRIPT_EM_X_SIZE: return GET_METRIC_X (OS2, ySubscriptXSize); + case HB_OT_METRICS_TAG_SUBSCRIPT_EM_Y_SIZE: return GET_METRIC_Y (OS2, ySubscriptYSize); + case HB_OT_METRICS_TAG_SUBSCRIPT_EM_X_OFFSET: return GET_METRIC_X (OS2, ySubscriptXOffset); + case HB_OT_METRICS_TAG_SUBSCRIPT_EM_Y_OFFSET: return GET_METRIC_Y (OS2, ySubscriptYOffset); + case HB_OT_METRICS_TAG_SUPERSCRIPT_EM_X_SIZE: return GET_METRIC_X (OS2, ySuperscriptXSize); + case HB_OT_METRICS_TAG_SUPERSCRIPT_EM_Y_SIZE: return GET_METRIC_Y (OS2, ySuperscriptYSize); + case HB_OT_METRICS_TAG_SUPERSCRIPT_EM_X_OFFSET: return GET_METRIC_X (OS2, ySuperscriptXOffset); + case HB_OT_METRICS_TAG_SUPERSCRIPT_EM_Y_OFFSET: return GET_METRIC_Y (OS2, ySuperscriptYOffset); + case HB_OT_METRICS_TAG_STRIKEOUT_SIZE: return GET_METRIC_Y (OS2, yStrikeoutSize); + case HB_OT_METRICS_TAG_STRIKEOUT_OFFSET: return GET_METRIC_Y (OS2, yStrikeoutPosition); + case HB_OT_METRICS_TAG_UNDERLINE_SIZE: return GET_METRIC_Y (post->table, underlineThickness); + case HB_OT_METRICS_TAG_UNDERLINE_OFFSET: return GET_METRIC_Y (post->table, underlinePosition); + + /* Private tags */ + case _HB_OT_METRICS_TAG_HORIZONTAL_ASCENDER_OS2: return GET_METRIC_Y (OS2, sTypoAscender); + case _HB_OT_METRICS_TAG_HORIZONTAL_ASCENDER_HHEA: return GET_METRIC_Y (hhea, ascender); + case _HB_OT_METRICS_TAG_HORIZONTAL_DESCENDER_OS2: return GET_METRIC_Y (OS2, sTypoDescender); + case _HB_OT_METRICS_TAG_HORIZONTAL_DESCENDER_HHEA: return GET_METRIC_Y (hhea, descender); + case _HB_OT_METRICS_TAG_HORIZONTAL_LINE_GAP_OS2: return GET_METRIC_Y (OS2, sTypoLineGap); + case _HB_OT_METRICS_TAG_HORIZONTAL_LINE_GAP_HHEA: return GET_METRIC_Y (hhea, lineGap); +#undef GET_METRIC_Y +#undef GET_METRIC_X +#undef GET_VAR + default: return false; + } +} + +#ifndef HB_NO_VAR +/** + * hb_ot_metrics_get_variation: + * @font: + * @metrics_tag: + * + * Returns: + * + * Since: 2.6.0 + **/ +float +hb_ot_metrics_get_variation (hb_font_t *font, hb_ot_metrics_tag_t metrics_tag) +{ + return font->face->table.MVAR->get_var (metrics_tag, font->coords, font->num_coords); +} + +/** + * hb_ot_metrics_get_x_variation: + * @font: + * @metrics_tag: + * + * Returns: + * + * Since: 2.6.0 + **/ +hb_position_t +hb_ot_metrics_get_x_variation (hb_font_t *font, hb_ot_metrics_tag_t metrics_tag) +{ + return font->em_scalef_x (hb_ot_metrics_get_variation (font, metrics_tag)); +} + +/** + * hb_ot_metrics_get_y_variation: + * @font: + * @metrics_tag: + * + * Returns: + * + * Since: 2.6.0 + **/ +hb_position_t +hb_ot_metrics_get_y_variation (hb_font_t *font, hb_ot_metrics_tag_t metrics_tag) +{ + return font->em_scalef_y (hb_ot_metrics_get_variation (font, metrics_tag)); +} +#endif + +#endif diff --git a/thirdparty/harfbuzz/src/hb-ot-metrics.h b/thirdparty/harfbuzz/src/hb-ot-metrics.h new file mode 100644 index 0000000000..42c7363c03 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-metrics.h @@ -0,0 +1,122 @@ +/* + * Copyright © 2018 Ebrahim Byagowi + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + */ + +#ifndef HB_OT_H_IN +#error "Include <hb-ot.h> instead." +#endif + +#ifndef HB_OT_METRICS_H +#define HB_OT_METRICS_H + +#include "hb.h" +#include "hb-ot-name.h" + +HB_BEGIN_DECLS + + +/** + * hb_ot_metrics_tag_t: + * @HB_OT_METRICS_TAG_HORIZONTAL_ASCENDER: horizontal ascender. + * @HB_OT_METRICS_TAG_HORIZONTAL_DESCENDER: horizontal descender. + * @HB_OT_METRICS_TAG_HORIZONTAL_LINE_GAP: horizontal line gap. + * @HB_OT_METRICS_TAG_HORIZONTAL_CLIPPING_ASCENT: horizontal clipping ascent. + * @HB_OT_METRICS_TAG_HORIZONTAL_CLIPPING_DESCENT: horizontal clipping descent. + * @HB_OT_METRICS_TAG_VERTICAL_ASCENDER: vertical ascender. + * @HB_OT_METRICS_TAG_VERTICAL_DESCENDER: vertical descender. + * @HB_OT_METRICS_TAG_VERTICAL_LINE_GAP: vertical line gap. + * @HB_OT_METRICS_TAG_HORIZONTAL_CARET_RISE: horizontal caret rise. + * @HB_OT_METRICS_TAG_HORIZONTAL_CARET_RUN: horizontal caret run. + * @HB_OT_METRICS_TAG_HORIZONTAL_CARET_OFFSET: horizontal caret offset. + * @HB_OT_METRICS_TAG_VERTICAL_CARET_RISE: vertical caret rise. + * @HB_OT_METRICS_TAG_VERTICAL_CARET_RUN: vertical caret run. + * @HB_OT_METRICS_TAG_VERTICAL_CARET_OFFSET: vertical caret offset. + * @HB_OT_METRICS_TAG_X_HEIGHT: x height. + * @HB_OT_METRICS_TAG_CAP_HEIGHT: cap height. + * @HB_OT_METRICS_TAG_SUBSCRIPT_EM_X_SIZE: subscript em x size. + * @HB_OT_METRICS_TAG_SUBSCRIPT_EM_Y_SIZE: subscript em y size. + * @HB_OT_METRICS_TAG_SUBSCRIPT_EM_X_OFFSET: subscript em x offset. + * @HB_OT_METRICS_TAG_SUBSCRIPT_EM_Y_OFFSET: subscript em y offset. + * @HB_OT_METRICS_TAG_SUPERSCRIPT_EM_X_SIZE: superscript em x size. + * @HB_OT_METRICS_TAG_SUPERSCRIPT_EM_Y_SIZE: superscript em y size. + * @HB_OT_METRICS_TAG_SUPERSCRIPT_EM_X_OFFSET: superscript em x offset. + * @HB_OT_METRICS_TAG_SUPERSCRIPT_EM_Y_OFFSET: superscript em y offset. + * @HB_OT_METRICS_TAG_STRIKEOUT_SIZE: strikeout size. + * @HB_OT_METRICS_TAG_STRIKEOUT_OFFSET: strikeout offset. + * @HB_OT_METRICS_TAG_UNDERLINE_SIZE: underline size. + * @HB_OT_METRICS_TAG_UNDERLINE_OFFSET: underline offset. + * + * From https://docs.microsoft.com/en-us/typography/opentype/spec/mvar#value-tags + * + * Since: 2.6.0 + **/ +typedef enum { + HB_OT_METRICS_TAG_HORIZONTAL_ASCENDER = HB_TAG ('h','a','s','c'), + HB_OT_METRICS_TAG_HORIZONTAL_DESCENDER = HB_TAG ('h','d','s','c'), + HB_OT_METRICS_TAG_HORIZONTAL_LINE_GAP = HB_TAG ('h','l','g','p'), + HB_OT_METRICS_TAG_HORIZONTAL_CLIPPING_ASCENT = HB_TAG ('h','c','l','a'), + HB_OT_METRICS_TAG_HORIZONTAL_CLIPPING_DESCENT = HB_TAG ('h','c','l','d'), + HB_OT_METRICS_TAG_VERTICAL_ASCENDER = HB_TAG ('v','a','s','c'), + HB_OT_METRICS_TAG_VERTICAL_DESCENDER = HB_TAG ('v','d','s','c'), + HB_OT_METRICS_TAG_VERTICAL_LINE_GAP = HB_TAG ('v','l','g','p'), + HB_OT_METRICS_TAG_HORIZONTAL_CARET_RISE = HB_TAG ('h','c','r','s'), + HB_OT_METRICS_TAG_HORIZONTAL_CARET_RUN = HB_TAG ('h','c','r','n'), + HB_OT_METRICS_TAG_HORIZONTAL_CARET_OFFSET = HB_TAG ('h','c','o','f'), + HB_OT_METRICS_TAG_VERTICAL_CARET_RISE = HB_TAG ('v','c','r','s'), + HB_OT_METRICS_TAG_VERTICAL_CARET_RUN = HB_TAG ('v','c','r','n'), + HB_OT_METRICS_TAG_VERTICAL_CARET_OFFSET = HB_TAG ('v','c','o','f'), + HB_OT_METRICS_TAG_X_HEIGHT = HB_TAG ('x','h','g','t'), + HB_OT_METRICS_TAG_CAP_HEIGHT = HB_TAG ('c','p','h','t'), + HB_OT_METRICS_TAG_SUBSCRIPT_EM_X_SIZE = HB_TAG ('s','b','x','s'), + HB_OT_METRICS_TAG_SUBSCRIPT_EM_Y_SIZE = HB_TAG ('s','b','y','s'), + HB_OT_METRICS_TAG_SUBSCRIPT_EM_X_OFFSET = HB_TAG ('s','b','x','o'), + HB_OT_METRICS_TAG_SUBSCRIPT_EM_Y_OFFSET = HB_TAG ('s','b','y','o'), + HB_OT_METRICS_TAG_SUPERSCRIPT_EM_X_SIZE = HB_TAG ('s','p','x','s'), + HB_OT_METRICS_TAG_SUPERSCRIPT_EM_Y_SIZE = HB_TAG ('s','p','y','s'), + HB_OT_METRICS_TAG_SUPERSCRIPT_EM_X_OFFSET = HB_TAG ('s','p','x','o'), + HB_OT_METRICS_TAG_SUPERSCRIPT_EM_Y_OFFSET = HB_TAG ('s','p','y','o'), + HB_OT_METRICS_TAG_STRIKEOUT_SIZE = HB_TAG ('s','t','r','s'), + HB_OT_METRICS_TAG_STRIKEOUT_OFFSET = HB_TAG ('s','t','r','o'), + HB_OT_METRICS_TAG_UNDERLINE_SIZE = HB_TAG ('u','n','d','s'), + HB_OT_METRICS_TAG_UNDERLINE_OFFSET = HB_TAG ('u','n','d','o'), + + _HB_OT_METRICS_TAG_MAX_VALUE = HB_TAG_MAX_SIGNED /*< skip >*/ +} hb_ot_metrics_tag_t; + +HB_EXTERN hb_bool_t +hb_ot_metrics_get_position (hb_font_t *font, + hb_ot_metrics_tag_t metrics_tag, + hb_position_t *position /* OUT. May be NULL. */); + +HB_EXTERN float +hb_ot_metrics_get_variation (hb_font_t *font, hb_ot_metrics_tag_t metrics_tag); + +HB_EXTERN hb_position_t +hb_ot_metrics_get_x_variation (hb_font_t *font, hb_ot_metrics_tag_t metrics_tag); + +HB_EXTERN hb_position_t +hb_ot_metrics_get_y_variation (hb_font_t *font, hb_ot_metrics_tag_t metrics_tag); + +HB_END_DECLS + +#endif /* HB_OT_METRICS_H */ diff --git a/thirdparty/harfbuzz/src/hb-ot-metrics.hh b/thirdparty/harfbuzz/src/hb-ot-metrics.hh new file mode 100644 index 0000000000..19a5e9ed41 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-metrics.hh @@ -0,0 +1,35 @@ +/* + * Copyright © 2018 Ebrahim Byagowi + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + */ + +#ifndef HB_OT_METRICS_HH +#define HB_OT_METRICS_HH + +#include "hb.hh" + +HB_INTERNAL bool +_hb_ot_metrics_get_position_common (hb_font_t *font, + hb_ot_metrics_tag_t metrics_tag, + hb_position_t *position /* OUT. May be NULL. */); + +#endif /* HB_OT_METRICS_HH */ diff --git a/thirdparty/harfbuzz/src/hb-ot-name-language-static.hh b/thirdparty/harfbuzz/src/hb-ot-name-language-static.hh new file mode 100644 index 0000000000..c496dc2981 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-name-language-static.hh @@ -0,0 +1,456 @@ +/* + * Copyright © 2018 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_OT_NAME_LANGUAGE_STATIC_HH +#define HB_OT_NAME_LANGUAGE_STATIC_HH + +#include "hb-ot-name-language.hh" + +/* Following two tables were generated by joining FreeType, FontConfig, + * and OpenType specification language lists, then filled in missing + * entries using: + * https://docs.microsoft.com/en-us/windows/desktop/intl/language-identifier-constants-and-strings + */ + +struct hb_ot_language_map_t +{ + int cmp (unsigned int key) const + { return key < code ? -1 : key > code ? +1 : 0; } + + uint16_t code; + char lang[6]; +}; + +static const hb_ot_language_map_t +hb_ms_language_map[] = +{ + {0x0001, "ar"}, /* ??? */ + {0x0004, "zh"}, /* ??? */ + {0x0009, "en"}, /* ??? */ + {0x0401, "ar"}, /* Arabic (Saudi Arabia) */ + {0x0402, "bg"}, /* Bulgarian (Bulgaria) */ + {0x0403, "ca"}, /* Catalan (Catalan) */ + {0x0404, "zh-tw"}, /* Chinese (Taiwan) */ + {0x0405, "cs"}, /* Czech (Czech Republic) */ + {0x0406, "da"}, /* Danish (Denmark) */ + {0x0407, "de"}, /* German (Germany) */ + {0x0408, "el"}, /* Greek (Greece) */ + {0x0409, "en"}, /* English (United States) */ + {0x040A, "es"}, /* Spanish (Traditional Sort) (Spain) */ + {0x040B, "fi"}, /* Finnish (Finland) */ + {0x040C, "fr"}, /* French (France) */ + {0x040D, "he"}, /* Hebrew (Israel) */ + {0x040E, "hu"}, /* Hungarian (Hungary) */ + {0x040F, "is"}, /* Icelandic (Iceland) */ + {0x0410, "it"}, /* Italian (Italy) */ + {0x0411, "ja"}, /* Japanese (Japan) */ + {0x0412, "ko"}, /* Korean (Korea) */ + {0x0413, "nl"}, /* Dutch (Netherlands) */ + {0x0414, "no"}, /* Norwegian (Bokmal) (Norway) */ + {0x0415, "pl"}, /* Polish (Poland) */ + {0x0416, "pt"}, /* Portuguese (Brazil) */ + {0x0417, "rm"}, /* Romansh (Switzerland) */ + {0x0418, "ro"}, /* Romanian (Romania) */ + {0x0419, "ru"}, /* Russian (Russia) */ + {0x041A, "hr"}, /* Croatian (Croatia) */ + {0x041B, "sk"}, /* Slovak (Slovakia) */ + {0x041C, "sq"}, /* Albanian (Albania) */ + {0x041D, "sv"}, /* Swedish (Sweden) */ + {0x041E, "th"}, /* Thai (Thailand) */ + {0x041F, "tr"}, /* Turkish (Turkey) */ + {0x0420, "ur"}, /* Urdu (Islamic Republic of Pakistan) */ + {0x0421, "id"}, /* Indonesian (Indonesia) */ + {0x0422, "uk"}, /* Ukrainian (Ukraine) */ + {0x0423, "be"}, /* Belarusian (Belarus) */ + {0x0424, "sl"}, /* Slovenian (Slovenia) */ + {0x0425, "et"}, /* Estonian (Estonia) */ + {0x0426, "lv"}, /* Latvian (Latvia) */ + {0x0427, "lt"}, /* Lithuanian (Lithuania) */ + {0x0428, "tg"}, /* Tajik (Cyrillic) (Tajikistan) */ + {0x0429, "fa"}, /* Persian (Iran) */ + {0x042A, "vi"}, /* Vietnamese (Vietnam) */ + {0x042B, "hy"}, /* Armenian (Armenia) */ + {0x042C, "az"}, /* Azeri (Latin) (Azerbaijan) */ + {0x042D, "eu"}, /* Basque (Basque) */ + {0x042E, "hsb"}, /* Upper Sorbian (Germany) */ + {0x042F, "mk"}, /* Macedonian (FYROM) (Former Yugoslav Republic of Macedonia) */ + {0x0430, "st"}, /* ??? */ + {0x0431, "ts"}, /* ??? */ + {0x0432, "tn"}, /* Setswana (South Africa) */ + {0x0433, "ven"}, /* ??? */ + {0x0434, "xh"}, /* isiXhosa (South Africa) */ + {0x0435, "zu"}, /* isiZulu (South Africa) */ + {0x0436, "af"}, /* Afrikaans (South Africa) */ + {0x0437, "ka"}, /* Georgian (Georgia) */ + {0x0438, "fo"}, /* Faroese (Faroe Islands) */ + {0x0439, "hi"}, /* Hindi (India) */ + {0x043A, "mt"}, /* Maltese (Malta) */ + {0x043B, "se"}, /* Sami (Northern) (Norway) */ + {0x043C, "ga"}, /* ??? */ + {0x043D, "yi"}, /* ??? */ + {0x043E, "ms"}, /* Malay (Malaysia) */ + {0x043F, "kk"}, /* Kazakh (Kazakhstan) */ + {0x0440, "ky"}, /* Kyrgyz (Kyrgyzstan) */ + {0x0441, "sw"}, /* Kiswahili (Kenya) */ + {0x0442, "tk"}, /* Turkmen (Turkmenistan) */ + {0x0443, "uz"}, /* Uzbek (Latin) (Uzbekistan) */ + {0x0444, "tt"}, /* Tatar (Russia) */ + {0x0445, "bn"}, /* Bengali (India) */ + {0x0446, "pa"}, /* Punjabi (India) */ + {0x0447, "gu"}, /* Gujarati (India) */ + {0x0448, "or"}, /* Odia (formerly Oriya) (India) */ + {0x0449, "ta"}, /* Tamil (India) */ + {0x044A, "te"}, /* Telugu (India) */ + {0x044B, "kn"}, /* Kannada (India) */ + {0x044C, "ml"}, /* Malayalam (India) */ + {0x044D, "as"}, /* Assamese (India) */ + {0x044E, "mr"}, /* Marathi (India) */ + {0x044F, "sa"}, /* Sanskrit (India) */ + {0x0450, "mn"}, /* Mongolian (Cyrillic) (Mongolia) */ + {0x0451, "bo"}, /* Tibetan (PRC) */ + {0x0452, "cy"}, /* Welsh (United Kingdom) */ + {0x0453, "km"}, /* Khmer (Cambodia) */ + {0x0454, "lo"}, /* Lao (Lao P.D.R.) */ + {0x0455, "my"}, /* ??? */ + {0x0456, "gl"}, /* Galician (Galician) */ + {0x0457, "kok"}, /* Konkani (India) */ + {0x0458, "mni"}, /* ??? */ + {0x0459, "sd"}, /* ??? */ + {0x045A, "syr"}, /* Syriac (Syria) */ + {0x045B, "si"}, /* Sinhala (Sri Lanka) */ + {0x045C, "chr"}, /* ??? */ + {0x045D, "iu"}, /* Inuktitut (Canada) */ + {0x045E, "am"}, /* Amharic (Ethiopia) */ + {0x0460, "ks"}, /* ??? */ + {0x0461, "ne"}, /* Nepali (Nepal) */ + {0x0462, "fy"}, /* Frisian (Netherlands) */ + {0x0463, "ps"}, /* Pashto (Afghanistan) */ + {0x0464, "phi"}, /* Filipino (Philippines) */ + {0x0465, "div"}, /* Divehi (Maldives) */ + {0x0468, "ha"}, /* Hausa (Latin) (Nigeria) */ + {0x046A, "yo"}, /* Yoruba (Nigeria) */ + {0x046B, "quz"}, /* Quechua (Bolivia) */ + {0x046C, "nso"}, /* Sesotho sa Leboa (South Africa) */ + {0x046D, "ba"}, /* Bashkir (Russia) */ + {0x046E, "lb"}, /* Luxembourgish (Luxembourg) */ + {0x046F, "kl"}, /* Greenlandic (Greenland) */ + {0x0470, "ibo"}, /* Igbo (Nigeria) */ + {0x0471, "kau"}, /* ??? */ + {0x0472, "om"}, /* ??? */ + {0x0473, "ti"}, /* ??? */ + {0x0474, "gn"}, /* ??? */ + {0x0475, "haw"}, /* ??? */ + {0x0476, "la"}, /* ??? */ + {0x0477, "so"}, /* ??? */ + {0x0478, "ii"}, /* Yi (PRC) */ + {0x0479, "pap"}, /* ??? */ + {0x047A, "arn"}, /* Mapudungun (Chile) */ + {0x047C, "moh"}, /* Mohawk (Mohawk) */ + {0x047E, "br"}, /* Breton (France) */ + {0x0480, "ug"}, /* Uighur (PRC) */ + {0x0481, "mi"}, /* Maori (New Zealand) */ + {0x0482, "oc"}, /* Occitan (France) */ + {0x0483, "co"}, /* Corsican (France) */ + {0x0484, "gsw"}, /* Alsatian (France) */ + {0x0485, "sah"}, /* Yakut (Russia) */ + {0x0486, "qut"}, /* K'iche (Guatemala) */ + {0x0487, "rw"}, /* Kinyarwanda (Rwanda) */ + {0x0488, "wo"}, /* Wolof (Senegal) */ + {0x048C, "fa"}, /* Dari (Afghanistan) */ + {0x0801, "ar"}, /* Arabic (Iraq) */ + {0x0804, "zh-cn"}, /* Chinese (People’s Republic of China) */ + {0x0807, "de"}, /* German (Switzerland) */ + {0x0809, "en"}, /* English (United Kingdom) */ + {0x080A, "es"}, /* Spanish (Mexico) */ + {0x080C, "fr"}, /* French (Belgium) */ + {0x0810, "it"}, /* Italian (Switzerland) */ + {0x0812, "ko"}, /* ??? */ + {0x0813, "nl"}, /* Dutch (Belgium) */ + {0x0814, "nn"}, /* Norwegian (Nynorsk) (Norway) */ + {0x0816, "pt"}, /* Portuguese (Portugal) */ + {0x0818, "mo"}, /* ??? */ + {0x0819, "ru"}, /* ??? */ + {0x081A, "sr"}, /* Serbian (Latin) (Serbia) */ + {0x081D, "sv"}, /* Sweden (Finland) */ + {0x0820, "ur"}, /* ??? */ + {0x0827, "lt"}, /* ??? */ + {0x082C, "az"}, /* Azeri (Cyrillic) (Azerbaijan) */ + {0x082E, "dsb"}, /* Lower Sorbian (Germany) */ +//{0x083B, ""}, /* Sami (Northern) (Sweden) */ + {0x083C, "gd"}, /* Irish (Ireland) */ + {0x083E, "ms"}, /* Malay (Brunei Darussalam) */ + {0x0843, "uz"}, /* Uzbek (Cyrillic) (Uzbekistan) */ + {0x0845, "bn"}, /* Bengali (Bangladesh) */ + {0x0846, "ar"}, /* ??? */ + {0x0850, "mn"}, /* Mongolian (Traditional) (People’s Republic of China) */ + {0x0851, "dz"}, /* ??? */ + {0x085D, "iu"}, /* Inuktitut (Latin) (Canada) */ + {0x085F, "tzm"}, /* Tamazight (Latin) (Algeria) */ + {0x0861, "ne"}, /* ??? */ +//{0x086B, ""}, /* Quechua (Ecuador) */ + {0x0873, "ti"}, /* ??? */ + {0x0C01, "ar"}, /* Arabic (Egypt) */ + {0x0C04, "zh-hk"}, /* Chinese (Hong Kong S.A.R.) */ + {0x0C07, "de"}, /* German (Austria) */ + {0x0C09, "en"}, /* English (Australia) */ + {0x0C0A, "es"}, /* Spanish (Modern Sort) (Spain) */ + {0x0C0C, "fr"}, /* French (Canada) */ + {0x0C1A, "sr"}, /* Serbian (Cyrillic) (Serbia) */ + {0x0C3B, "se"}, /* Sami (Northern) (Finland) */ +//{0x0C6B, ""}, /* Quechua (Peru) */ + {0x1001, "ar"}, /* Arabic (Libya) */ + {0x1004, "zh-sg"}, /* Chinese (Singapore) */ + {0x1007, "de"}, /* German (Luxembourg) */ + {0x1009, "en"}, /* English (Canada) */ + {0x100A, "es"}, /* Spanish (Guatemala) */ + {0x100C, "fr"}, /* French (Switzerland) */ + {0x101A, "hr"}, /* Croatian (Latin) (Bosnia and Herzegovina) */ + {0x103B, "smj"}, /* Sami (Lule) (Norway) */ + {0x1401, "ar"}, /* Arabic (Algeria) */ +//{0x1404, ""}, /* Chinese (Macao S.A.R.) */ + {0x1407, "de"}, /* German (Liechtenstein) */ + {0x1409, "en"}, /* English (New Zealand) */ + {0x140A, "es"}, /* Spanish (Costa Rica) */ + {0x140C, "fr"}, /* French (Luxembourg) */ + {0x141A, "bs"}, /* Bosnian (Latin) (Bosnia and Herzegovina) */ +//{0x143B, ""}, /* Sami (Lule) (Sweden) */ + {0x1801, "ar"}, /* Arabic (Morocco) */ + {0x1809, "en"}, /* English (Ireland) */ + {0x180A, "es"}, /* Spanish (Panama) */ + {0x180C, "fr"}, /* French (Principality of Monaco) */ +//{0x181A, ""}, /* Serbian (Latin) (Bosnia and Herzegovina) */ + {0x183B, "sma"}, /* Sami (Southern) (Norway) */ + {0x1C01, "ar"}, /* Arabic (Tunisia) */ + {0x1C09, "en"}, /* English (South Africa) */ + {0x1C0A, "es"}, /* Spanish (Dominican Republic) */ + {0x1C0C, "fr"}, /* ??? */ +//{0x1C1A, ""}, /* Serbian (Cyrillic) (Bosnia and Herzegovina) */ +//{0x1C3B, ""}, /* Sami (Southern) (Sweden) */ + {0x2001, "ar"}, /* Arabic (Oman) */ + {0x2009, "en"}, /* English (Jamaica) */ + {0x200A, "es"}, /* Spanish (Venezuela) */ + {0x200C, "fr"}, /* ??? */ + {0x201A, "bs"}, /* Bosnian (Cyrillic) (Bosnia and Herzegovina) */ + {0x203B, "sms"}, /* Sami (Skolt) (Finland) */ + {0x2401, "ar"}, /* Arabic (Yemen) */ + {0x2409, "en"}, /* English (Caribbean) */ + {0x240A, "es"}, /* Spanish (Colombia) */ + {0x240C, "fr"}, /* ??? */ + {0x243B, "smn"}, /* Sami (Inari) (Finland) */ + {0x2801, "ar"}, /* Arabic (Syria) */ + {0x2809, "en"}, /* English (Belize) */ + {0x280A, "es"}, /* Spanish (Peru) */ + {0x280C, "fr"}, /* ??? */ + {0x2C01, "ar"}, /* Arabic (Jordan) */ + {0x2C09, "en"}, /* English (Trinidad and Tobago) */ + {0x2C0A, "es"}, /* Spanish (Argentina) */ + {0x2C0C, "fr"}, /* ??? */ + {0x3001, "ar"}, /* Arabic (Lebanon) */ + {0x3009, "en"}, /* English (Zimbabwe) */ + {0x300A, "es"}, /* Spanish (Ecuador) */ + {0x300C, "fr"}, /* ??? */ + {0x3401, "ar"}, /* Arabic (Kuwait) */ + {0x3409, "en"}, /* English (Republic of the Philippines) */ + {0x340A, "es"}, /* Spanish (Chile) */ + {0x340C, "fr"}, /* ??? */ + {0x3801, "ar"}, /* Arabic (U.A.E.) */ + {0x380A, "es"}, /* Spanish (Uruguay) */ + {0x380C, "fr"}, /* ??? */ + {0x3C01, "ar"}, /* Arabic (Bahrain) */ + {0x3C09, "en"}, /* ??? */ + {0x3C0A, "es"}, /* Spanish (Paraguay) */ + {0x3C0C, "fr"}, /* ??? */ + {0x4001, "ar"}, /* Arabic (Qatar) */ + {0x4009, "en"}, /* English (India) */ + {0x400A, "es"}, /* Spanish (Bolivia) */ + {0x4409, "en"}, /* English (Malaysia) */ + {0x440A, "es"}, /* Spanish (El Salvador) */ + {0x4809, "en"}, /* English (Singapore) */ + {0x480A, "es"}, /* Spanish (Honduras) */ + {0x4C0A, "es"}, /* Spanish (Nicaragua) */ + {0x500A, "es"}, /* Spanish (Puerto Rico) */ + {0x540A, "es"}, /* Spanish (United States) */ + {0xE40A, "es"}, /* ??? */ + {0xE40C, "fr"}, /* ??? */ +}; + +static const hb_ot_language_map_t +hb_mac_language_map[] = +{ + { 0, "en"}, /* English */ + { 1, "fr"}, /* French */ + { 2, "de"}, /* German */ + { 3, "it"}, /* Italian */ + { 4, "nl"}, /* Dutch */ + { 5, "sv"}, /* Swedish */ + { 6, "es"}, /* Spanish */ + { 7, "da"}, /* Danish */ + { 8, "pt"}, /* Portuguese */ + { 9, "no"}, /* Norwegian */ + { 10, "he"}, /* Hebrew */ + { 11, "ja"}, /* Japanese */ + { 12, "ar"}, /* Arabic */ + { 13, "fi"}, /* Finnish */ + { 14, "el"}, /* Greek */ + { 15, "is"}, /* Icelandic */ + { 16, "mt"}, /* Maltese */ + { 17, "tr"}, /* Turkish */ + { 18, "hr"}, /* Croatian */ + { 19, "zh-tw"}, /* Chinese (Traditional) */ + { 20, "ur"}, /* Urdu */ + { 21, "hi"}, /* Hindi */ + { 22, "th"}, /* Thai */ + { 23, "ko"}, /* Korean */ + { 24, "lt"}, /* Lithuanian */ + { 25, "pl"}, /* Polish */ + { 26, "hu"}, /* Hungarian */ + { 27, "et"}, /* Estonian */ + { 28, "lv"}, /* Latvian */ +//{ 29, ""}, /* Sami */ + { 30, "fo"}, /* Faroese */ + { 31, "fa"}, /* Farsi/Persian */ + { 32, "ru"}, /* Russian */ + { 33, "zh-cn"}, /* Chinese (Simplified) */ + { 34, "nl"}, /* Flemish */ + { 35, "ga"}, /* Irish Gaelic */ + { 36, "sq"}, /* Albanian */ + { 37, "ro"}, /* Romanian */ + { 38, "cs"}, /* Czech */ + { 39, "sk"}, /* Slovak */ + { 40, "sl"}, /* Slovenian */ + { 41, "yi"}, /* Yiddish */ + { 42, "sr"}, /* Serbian */ + { 43, "mk"}, /* Macedonian */ + { 44, "bg"}, /* Bulgarian */ + { 45, "uk"}, /* Ukrainian */ + { 46, "be"}, /* Byelorussian */ + { 47, "uz"}, /* Uzbek */ + { 48, "kk"}, /* Kazakh */ + { 49, "az"}, /* Azerbaijani (Cyrillic script) */ + { 50, "az"}, /* Azerbaijani (Arabic script) */ + { 51, "hy"}, /* Armenian */ + { 52, "ka"}, /* Georgian */ + { 53, "mo"}, /* Moldavian */ + { 54, "ky"}, /* Kirghiz */ + { 55, "tg"}, /* Tajiki */ + { 56, "tk"}, /* Turkmen */ + { 57, "mn"}, /* Mongolian (Mongolian script) */ + { 58, "mn"}, /* Mongolian (Cyrillic script) */ + { 59, "ps"}, /* Pashto */ + { 60, "ku"}, /* Kurdish */ + { 61, "ks"}, /* Kashmiri */ + { 62, "sd"}, /* Sindhi */ + { 63, "bo"}, /* Tibetan */ + { 64, "ne"}, /* Nepali */ + { 65, "sa"}, /* Sanskrit */ + { 66, "mr"}, /* Marathi */ + { 67, "bn"}, /* Bengali */ + { 68, "as"}, /* Assamese */ + { 69, "gu"}, /* Gujarati */ + { 70, "pa"}, /* Punjabi */ + { 71, "or"}, /* Oriya */ + { 72, "ml"}, /* Malayalam */ + { 73, "kn"}, /* Kannada */ + { 74, "ta"}, /* Tamil */ + { 75, "te"}, /* Telugu */ + { 76, "si"}, /* Sinhalese */ + { 77, "my"}, /* Burmese */ + { 78, "km"}, /* Khmer */ + { 79, "lo"}, /* Lao */ + { 80, "vi"}, /* Vietnamese */ + { 81, "id"}, /* Indonesian */ + { 82, "tl"}, /* Tagalog */ + { 83, "ms"}, /* Malay (Roman script) */ + { 84, "ms"}, /* Malay (Arabic script) */ + { 85, "am"}, /* Amharic */ + { 86, "ti"}, /* Tigrinya */ + { 87, "om"}, /* Galla */ + { 88, "so"}, /* Somali */ + { 89, "sw"}, /* Swahili */ + { 90, "rw"}, /* Kinyarwanda/Ruanda */ + { 91, "rn"}, /* Rundi */ + { 92, "ny"}, /* Nyanja/Chewa */ + { 93, "mg"}, /* Malagasy */ + { 94, "eo"}, /* Esperanto */ + {128, "cy"}, /* Welsh */ + {129, "eu"}, /* Basque */ + {130, "ca"}, /* Catalan */ + {131, "la"}, /* Latin */ + {132, "qu"}, /* Quechua */ + {133, "gn"}, /* Guarani */ + {134, "ay"}, /* Aymara */ + {135, "tt"}, /* Tatar */ + {136, "ug"}, /* Uighur */ + {137, "dz"}, /* Dzongkha */ + {138, "jw"}, /* Javanese (Roman script) */ + {139, "su"}, /* Sundanese (Roman script) */ + {140, "gl"}, /* Galician */ + {141, "af"}, /* Afrikaans */ + {142, "br"}, /* Breton */ + {143, "iu"}, /* Inuktitut */ + {144, "gd"}, /* Scottish Gaelic */ + {145, "gv"}, /* Manx Gaelic */ + {146, "ga"}, /* Irish Gaelic (with dot above) */ + {147, "to"}, /* Tongan */ + {148, "el"}, /* Greek (polytonic) */ + {149, "ik"}, /* Greenlandic */ + {150, "az"}, /* Azerbaijani (Roman script) */ +}; + + +static hb_language_t +_hb_ot_name_language_for (unsigned int code, + const hb_ot_language_map_t *array, + unsigned int len) +{ +#ifdef HB_NO_OT_NAME_LANGUAGE + return HB_LANGUAGE_INVALID; +#endif + auto *entry = hb_bsearch (code, array, len); + + if (entry) + return hb_language_from_string (entry->lang, -1); + + return HB_LANGUAGE_INVALID; +} + +hb_language_t +_hb_ot_name_language_for_ms_code (unsigned int code) +{ + return _hb_ot_name_language_for (code, + hb_ms_language_map, + ARRAY_LENGTH (hb_ms_language_map)); +} + +hb_language_t +_hb_ot_name_language_for_mac_code (unsigned int code) +{ + return _hb_ot_name_language_for (code, + hb_mac_language_map, + ARRAY_LENGTH (hb_mac_language_map)); +} + +#endif /* HB_OT_NAME_LANGUAGE_STATIC_HH */ diff --git a/thirdparty/harfbuzz/src/hb-ot-name-language.hh b/thirdparty/harfbuzz/src/hb-ot-name-language.hh new file mode 100644 index 0000000000..903076c0d5 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-name-language.hh @@ -0,0 +1,40 @@ +/* + * Copyright © 2018 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_OT_NAME_LANGUAGE_HH +#define HB_OT_NAME_LANGUAGE_HH + +#include "hb.hh" + + +HB_INTERNAL hb_language_t +_hb_ot_name_language_for_ms_code (unsigned int code); + +HB_INTERNAL hb_language_t +_hb_ot_name_language_for_mac_code (unsigned int code); + + +#endif /* HB_OT_NAME_LANGUAGE_HH */ diff --git a/thirdparty/harfbuzz/src/hb-ot-name-table.hh b/thirdparty/harfbuzz/src/hb-ot-name-table.hh new file mode 100644 index 0000000000..ece3c28466 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-name-table.hh @@ -0,0 +1,376 @@ +/* + * Copyright © 2011,2012 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_OT_NAME_TABLE_HH +#define HB_OT_NAME_TABLE_HH + +#include "hb-open-type.hh" +#include "hb-ot-name-language.hh" +#include "hb-aat-layout.hh" + + +namespace OT { + + +#define entry_score var.u16[0] +#define entry_index var.u16[1] + + +/* + * name -- Naming + * https://docs.microsoft.com/en-us/typography/opentype/spec/name + */ +#define HB_OT_TAG_name HB_TAG('n','a','m','e') + +#define UNSUPPORTED 42 + +struct NameRecord +{ + hb_language_t language (hb_face_t *face) const + { +#ifndef HB_NO_OT_NAME_LANGUAGE + unsigned int p = platformID; + unsigned int l = languageID; + + if (p == 3) + return _hb_ot_name_language_for_ms_code (l); + + if (p == 1) + return _hb_ot_name_language_for_mac_code (l); + +#ifndef HB_NO_OT_NAME_LANGUAGE_AAT + if (p == 0) + return face->table.ltag->get_language (l); +#endif + +#endif + return HB_LANGUAGE_INVALID; + } + + uint16_t score () const + { + /* Same order as in cmap::find_best_subtable(). */ + unsigned int p = platformID; + unsigned int e = encodingID; + + /* 32-bit. */ + if (p == 3 && e == 10) return 0; + if (p == 0 && e == 6) return 1; + if (p == 0 && e == 4) return 2; + + /* 16-bit. */ + if (p == 3 && e == 1) return 3; + if (p == 0 && e == 3) return 4; + if (p == 0 && e == 2) return 5; + if (p == 0 && e == 1) return 6; + if (p == 0 && e == 0) return 7; + + /* Symbol. */ + if (p == 3 && e == 0) return 8; + + /* We treat all Mac Latin names as ASCII only. */ + if (p == 1 && e == 0) return 10; /* 10 is magic number :| */ + + return UNSUPPORTED; + } + + NameRecord* copy (hb_serialize_context_t *c, const void *base) const + { + TRACE_SERIALIZE (this); + auto *out = c->embed (this); + if (unlikely (!out)) return_trace (nullptr); + out->offset.serialize_copy (c, offset, base, 0, hb_serialize_context_t::Tail, length); + return_trace (out); + } + + bool isUnicode () const + { + unsigned int p = platformID; + unsigned int e = encodingID; + + return (p == 0 || + (p == 3 && (e == 0 || e == 1 || e == 10))); + } + + static int cmp (const void *pa, const void *pb) + { + const NameRecord *a = (const NameRecord *)pa; + const NameRecord *b = (const NameRecord *)pb; + + if (a->platformID != b->platformID) + return a->platformID - b->platformID; + + if (a->encodingID != b->encodingID) + return a->encodingID - b->encodingID; + + if (a->languageID != b->languageID) + return a->languageID - b->languageID; + + if (a->nameID != b->nameID) + return a->nameID - b->nameID; + + if (a->length != b->length) + return a->length - b->length; + + return 0; + } + + bool sanitize (hb_sanitize_context_t *c, const void *base) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && offset.sanitize (c, base, length)); + } + + HBUINT16 platformID; /* Platform ID. */ + HBUINT16 encodingID; /* Platform-specific encoding ID. */ + HBUINT16 languageID; /* Language ID. */ + HBUINT16 nameID; /* Name ID. */ + HBUINT16 length; /* String length (in bytes). */ + NNOffsetTo<UnsizedArrayOf<HBUINT8>> + offset; /* String offset from start of storage area (in bytes). */ + public: + DEFINE_SIZE_STATIC (12); +}; + +static int +_hb_ot_name_entry_cmp_key (const void *pa, const void *pb) +{ + const hb_ot_name_entry_t *a = (const hb_ot_name_entry_t *) pa; + const hb_ot_name_entry_t *b = (const hb_ot_name_entry_t *) pb; + + /* Compare by name_id, then language. */ + + if (a->name_id != b->name_id) + return a->name_id - b->name_id; + + if (a->language == b->language) return 0; + if (!a->language) return -1; + if (!b->language) return +1; + return strcmp (hb_language_to_string (a->language), + hb_language_to_string (b->language)); +} + +static int +_hb_ot_name_entry_cmp (const void *pa, const void *pb) +{ + /* Compare by name_id, then language, then score, then index. */ + + int v = _hb_ot_name_entry_cmp_key (pa, pb); + if (v) + return v; + + const hb_ot_name_entry_t *a = (const hb_ot_name_entry_t *) pa; + const hb_ot_name_entry_t *b = (const hb_ot_name_entry_t *) pb; + + if (a->entry_score != b->entry_score) + return a->entry_score - b->entry_score; + + if (a->entry_index != b->entry_index) + return a->entry_index - b->entry_index; + + return 0; +} + +struct name +{ + static constexpr hb_tag_t tableTag = HB_OT_TAG_name; + + unsigned int get_size () const + { return min_size + count * nameRecordZ.item_size; } + + template <typename Iterator, + hb_requires (hb_is_source_of (Iterator, const NameRecord &))> + bool serialize (hb_serialize_context_t *c, + Iterator it, + const void *src_string_pool) + { + TRACE_SERIALIZE (this); + + if (unlikely (!c->extend_min ((*this)))) return_trace (false); + + this->format = 0; + this->count = it.len (); + + NameRecord *name_records = (NameRecord *) calloc (it.len (), NameRecord::static_size); + if (unlikely (!name_records)) return_trace (false); + + hb_array_t<NameRecord> records (name_records, it.len ()); + + for (const NameRecord& record : it) + { + memcpy (name_records, &record, NameRecord::static_size); + name_records++; + } + + records.qsort (); + + c->copy_all (records, src_string_pool); + free (records.arrayZ); + + if (unlikely (c->ran_out_of_room)) return_trace (false); + + this->stringOffset = c->length (); + + return_trace (true); + } + + bool subset (hb_subset_context_t *c) const + { + TRACE_SUBSET (this); + + name *name_prime = c->serializer->start_embed<name> (); + if (unlikely (!name_prime)) return_trace (false); + + auto it = + + nameRecordZ.as_array (count) + | hb_filter (c->plan->name_ids, &NameRecord::nameID) + | hb_filter (c->plan->name_languages, &NameRecord::languageID) + | hb_filter ([&] (const NameRecord& namerecord) { return c->plan->name_legacy || namerecord.isUnicode (); }) + ; + + name_prime->serialize (c->serializer, it, hb_addressof (this + stringOffset)); + return_trace (name_prime->count); + } + + bool sanitize_records (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + const void *string_pool = (this+stringOffset).arrayZ; + return_trace (nameRecordZ.sanitize (c, count, string_pool)); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && + likely (format == 0 || format == 1) && + c->check_array (nameRecordZ.arrayZ, count) && + c->check_range (this, stringOffset) && + sanitize_records (c)); + } + + struct accelerator_t + { + void init (hb_face_t *face) + { + this->table = hb_sanitize_context_t ().reference_table<name> (face); + assert (this->table.get_length () >= this->table->stringOffset); + this->pool = (const char *) (const void *) (this->table+this->table->stringOffset); + this->pool_len = this->table.get_length () - this->table->stringOffset; + const hb_array_t<const NameRecord> all_names (this->table->nameRecordZ.arrayZ, + this->table->count); + + this->names.init (); + this->names.alloc (all_names.length); + + for (unsigned int i = 0; i < all_names.length; i++) + { + hb_ot_name_entry_t *entry = this->names.push (); + + entry->name_id = all_names[i].nameID; + entry->language = all_names[i].language (face); + entry->entry_score = all_names[i].score (); + entry->entry_index = i; + } + + this->names.qsort (_hb_ot_name_entry_cmp); + /* Walk and pick best only for each name_id,language pair, + * while dropping unsupported encodings. */ + unsigned int j = 0; + for (unsigned int i = 0; i < this->names.length; i++) + { + if (this->names[i].entry_score == UNSUPPORTED || + this->names[i].language == HB_LANGUAGE_INVALID) + continue; + if (i && + this->names[i - 1].name_id == this->names[i].name_id && + this->names[i - 1].language == this->names[i].language) + continue; + this->names[j++] = this->names[i]; + } + this->names.resize (j); + } + + void fini () + { + this->names.fini (); + this->table.destroy (); + } + + int get_index (hb_ot_name_id_t name_id, + hb_language_t language, + unsigned int *width=nullptr) const + { + const hb_ot_name_entry_t key = {name_id, {0}, language}; + const hb_ot_name_entry_t *entry = hb_bsearch (key, (const hb_ot_name_entry_t *) this->names, + this->names.length, + sizeof (hb_ot_name_entry_t), + _hb_ot_name_entry_cmp_key); + if (!entry) + return -1; + + if (width) + *width = entry->entry_score < 10 ? 2 : 1; + + return entry->entry_index; + } + + hb_bytes_t get_name (unsigned int idx) const + { + const hb_array_t<const NameRecord> all_names (table->nameRecordZ.arrayZ, table->count); + const NameRecord &record = all_names[idx]; + const hb_bytes_t string_pool (pool, pool_len); + return string_pool.sub_array (record.offset, record.length); + } + + private: + const char *pool; + unsigned int pool_len; + public: + hb_blob_ptr_t<name> table; + hb_vector_t<hb_ot_name_entry_t> names; + }; + + /* We only implement format 0 for now. */ + HBUINT16 format; /* Format selector (=0/1). */ + HBUINT16 count; /* Number of name records. */ + NNOffsetTo<UnsizedArrayOf<HBUINT8>> + stringOffset; /* Offset to start of string storage (from start of table). */ + UnsizedArrayOf<NameRecord> + nameRecordZ; /* The name records where count is the number of records. */ + public: + DEFINE_SIZE_ARRAY (6, nameRecordZ); +}; + +#undef entry_index +#undef entry_score + +struct name_accelerator_t : name::accelerator_t {}; + +} /* namespace OT */ + + +#endif /* HB_OT_NAME_TABLE_HH */ diff --git a/thirdparty/harfbuzz/src/hb-ot-name.cc b/thirdparty/harfbuzz/src/hb-ot-name.cc new file mode 100644 index 0000000000..10122b8c2e --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-name.cc @@ -0,0 +1,228 @@ +/* + * Copyright © 2018 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#include "hb.hh" + +#ifndef HB_NO_NAME + +#include "hb-ot-name-table.hh" + +#include "hb-utf.hh" + + +/** + * SECTION:hb-ot-name + * @title: hb-ot-name + * @short_description: OpenType font name information + * @include: hb-ot.h + * + * Functions for fetching name strings from OpenType fonts. + **/ + + +/** + * hb_ot_name_list_names: + * @face: font face. + * @num_entries: (out) (allow-none): number of returned entries. + * + * Enumerates all available name IDs and language combinations. Returned + * array is owned by the @face and should not be modified. It can be + * used as long as @face is alive. + * + * Returns: (out) (transfer none) (array length=num_entries): Array of available name entries. + * Since: 2.1.0 + **/ +const hb_ot_name_entry_t * +hb_ot_name_list_names (hb_face_t *face, + unsigned int *num_entries /* OUT */) +{ + const OT::name_accelerator_t &name = *face->table.name; + if (num_entries) *num_entries = name.names.length; + return (const hb_ot_name_entry_t *) name.names; +} + + +template <typename in_utf_t, typename out_utf_t> +static inline unsigned int +hb_ot_name_convert_utf (hb_bytes_t bytes, + unsigned int *text_size /* IN/OUT */, + typename out_utf_t::codepoint_t *text /* OUT */) +{ + unsigned int src_len = bytes.length / sizeof (typename in_utf_t::codepoint_t); + const typename in_utf_t::codepoint_t *src = (const typename in_utf_t::codepoint_t *) bytes.arrayZ; + const typename in_utf_t::codepoint_t *src_end = src + src_len; + + typename out_utf_t::codepoint_t *dst = text; + + hb_codepoint_t unicode; + const hb_codepoint_t replacement = HB_BUFFER_REPLACEMENT_CODEPOINT_DEFAULT; + + if (text_size && *text_size) + { + (*text_size)--; /* Same room for NUL-termination. */ + const typename out_utf_t::codepoint_t *dst_end = text + *text_size; + + while (src < src_end && dst < dst_end) + { + const typename in_utf_t::codepoint_t *src_next = in_utf_t::next (src, src_end, &unicode, replacement); + typename out_utf_t::codepoint_t *dst_next = out_utf_t::encode (dst, dst_end, unicode); + if (dst_next == dst) + break; /* Out-of-room. */ + + dst = dst_next; + src = src_next; + } + + *text_size = dst - text; + *dst = 0; /* NUL-terminate. */ + } + + /* Accumulate length of rest. */ + unsigned int dst_len = dst - text; + while (src < src_end) + { + src = in_utf_t::next (src, src_end, &unicode, replacement); + dst_len += out_utf_t::encode_len (unicode); + } + return dst_len; +} + +template <typename utf_t> +static inline unsigned int +hb_ot_name_get_utf (hb_face_t *face, + hb_ot_name_id_t name_id, + hb_language_t language, + unsigned int *text_size /* IN/OUT */, + typename utf_t::codepoint_t *text /* OUT */) +{ + const OT::name_accelerator_t &name = *face->table.name; + + if (!language) + language = hb_language_from_string ("en", 2); + + unsigned int width; + int idx = name.get_index (name_id, language, &width); + if (idx != -1) + { + hb_bytes_t bytes = name.get_name (idx); + + if (width == 2) /* UTF16-BE */ + return hb_ot_name_convert_utf<hb_utf16_be_t, utf_t> (bytes, text_size, text); + + if (width == 1) /* ASCII */ + return hb_ot_name_convert_utf<hb_ascii_t, utf_t> (bytes, text_size, text); + } + + if (text_size) + { + if (*text_size) + *text = 0; + *text_size = 0; + } + return 0; +} + +/** + * hb_ot_name_get_utf8: + * @face: font face. + * @name_id: OpenType name identifier to fetch. + * @language: language to fetch the name for. + * @text_size: (inout) (allow-none): input size of @text buffer, and output size of + * text written to buffer. + * @text: (out caller-allocates) (array length=text_size): buffer to write fetched name into. + * + * Fetches a font name from the OpenType 'name' table. + * If @language is #HB_LANGUAGE_INVALID, English ("en") is assumed. + * Returns string in UTF-8 encoding. + * + * Returns: full length of the requested string, or 0 if not found. + * Since: 2.1.0 + **/ +unsigned int +hb_ot_name_get_utf8 (hb_face_t *face, + hb_ot_name_id_t name_id, + hb_language_t language, + unsigned int *text_size /* IN/OUT */, + char *text /* OUT */) +{ + return hb_ot_name_get_utf<hb_utf8_t> (face, name_id, language, text_size, + (hb_utf8_t::codepoint_t *) text); +} + +/** + * hb_ot_name_get_utf16: + * @face: font face. + * @name_id: OpenType name identifier to fetch. + * @language: language to fetch the name for. + * @text_size: (inout) (allow-none): input size of @text buffer, and output size of + * text written to buffer. + * @text: (out caller-allocates) (array length=text_size): buffer to write fetched name into. + * + * Fetches a font name from the OpenType 'name' table. + * If @language is #HB_LANGUAGE_INVALID, English ("en") is assumed. + * Returns string in UTF-16 encoding. + * + * Returns: full length of the requested string, or 0 if not found. + * Since: 2.1.0 + **/ +unsigned int +hb_ot_name_get_utf16 (hb_face_t *face, + hb_ot_name_id_t name_id, + hb_language_t language, + unsigned int *text_size /* IN/OUT */, + uint16_t *text /* OUT */) +{ + return hb_ot_name_get_utf<hb_utf16_t> (face, name_id, language, text_size, text); +} + +/** + * hb_ot_name_get_utf32: + * @face: font face. + * @name_id: OpenType name identifier to fetch. + * @language: language to fetch the name for. + * @text_size: (inout) (allow-none): input size of @text buffer, and output size of + * text written to buffer. + * @text: (out caller-allocates) (array length=text_size): buffer to write fetched name into. + * + * Fetches a font name from the OpenType 'name' table. + * If @language is #HB_LANGUAGE_INVALID, English ("en") is assumed. + * Returns string in UTF-32 encoding. + * + * Returns: full length of the requested string, or 0 if not found. + * Since: 2.1.0 + **/ +unsigned int +hb_ot_name_get_utf32 (hb_face_t *face, + hb_ot_name_id_t name_id, + hb_language_t language, + unsigned int *text_size /* IN/OUT */, + uint32_t *text /* OUT */) +{ + return hb_ot_name_get_utf<hb_utf32_t> (face, name_id, language, text_size, text); +} + + +#endif diff --git a/thirdparty/harfbuzz/src/hb-ot-name.h b/thirdparty/harfbuzz/src/hb-ot-name.h new file mode 100644 index 0000000000..3b4ad581c7 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-name.h @@ -0,0 +1,129 @@ +/* + * Copyright © 2018 Ebrahim Byagowi. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + */ + +#ifndef HB_OT_H_IN +#error "Include <hb-ot.h> instead." +#endif + +#ifndef HB_OT_NAME_H +#define HB_OT_NAME_H + +#include "hb.h" + +HB_BEGIN_DECLS + + +/** + * hb_ot_name_id_t: + * @HB_OT_NAME_ID_INVALID: Value to represent a nonexistent name ID. + * + * An integral type representing an OpenType 'name' table name identifier. + * There are predefined name IDs, as well as name IDs return from other + * API. These can be used to fetch name strings from a font face. + * + * Since: 2.0.0 + **/ +enum +{ + HB_OT_NAME_ID_COPYRIGHT = 0, + HB_OT_NAME_ID_FONT_FAMILY = 1, + HB_OT_NAME_ID_FONT_SUBFAMILY = 2, + HB_OT_NAME_ID_UNIQUE_ID = 3, + HB_OT_NAME_ID_FULL_NAME = 4, + HB_OT_NAME_ID_VERSION_STRING = 5, + HB_OT_NAME_ID_POSTSCRIPT_NAME = 6, + HB_OT_NAME_ID_TRADEMARK = 7, + HB_OT_NAME_ID_MANUFACTURER = 8, + HB_OT_NAME_ID_DESIGNER = 9, + HB_OT_NAME_ID_DESCRIPTION = 10, + HB_OT_NAME_ID_VENDOR_URL = 11, + HB_OT_NAME_ID_DESIGNER_URL = 12, + HB_OT_NAME_ID_LICENSE = 13, + HB_OT_NAME_ID_LICENSE_URL = 14, +/*HB_OT_NAME_ID_RESERVED = 15,*/ + HB_OT_NAME_ID_TYPOGRAPHIC_FAMILY = 16, + HB_OT_NAME_ID_TYPOGRAPHIC_SUBFAMILY = 17, + HB_OT_NAME_ID_MAC_FULL_NAME = 18, + HB_OT_NAME_ID_SAMPLE_TEXT = 19, + HB_OT_NAME_ID_CID_FINDFONT_NAME = 20, + HB_OT_NAME_ID_WWS_FAMILY = 21, + HB_OT_NAME_ID_WWS_SUBFAMILY = 22, + HB_OT_NAME_ID_LIGHT_BACKGROUND = 23, + HB_OT_NAME_ID_DARK_BACKGROUND = 24, + HB_OT_NAME_ID_VARIATIONS_PS_PREFIX = 25, + + HB_OT_NAME_ID_INVALID = 0xFFFF +}; + +typedef unsigned int hb_ot_name_id_t; + + +/** + * hb_ot_name_entry_t: + * @name_id: name ID + * @language: language + * + * Structure representing a name ID in a particular language. + * + * Since: 2.1.0 + **/ +typedef struct hb_ot_name_entry_t +{ + hb_ot_name_id_t name_id; + /*< private >*/ + hb_var_int_t var; + /*< public >*/ + hb_language_t language; +} hb_ot_name_entry_t; + +HB_EXTERN const hb_ot_name_entry_t * +hb_ot_name_list_names (hb_face_t *face, + unsigned int *num_entries /* OUT */); + + +HB_EXTERN unsigned int +hb_ot_name_get_utf8 (hb_face_t *face, + hb_ot_name_id_t name_id, + hb_language_t language, + unsigned int *text_size /* IN/OUT */, + char *text /* OUT */); + +HB_EXTERN unsigned int +hb_ot_name_get_utf16 (hb_face_t *face, + hb_ot_name_id_t name_id, + hb_language_t language, + unsigned int *text_size /* IN/OUT */, + uint16_t *text /* OUT */); + +HB_EXTERN unsigned int +hb_ot_name_get_utf32 (hb_face_t *face, + hb_ot_name_id_t name_id, + hb_language_t language, + unsigned int *text_size /* IN/OUT */, + uint32_t *text /* OUT */); + + +HB_END_DECLS + +#endif /* HB_OT_NAME_H */ diff --git a/thirdparty/harfbuzz/src/hb-ot-os2-table.hh b/thirdparty/harfbuzz/src/hb-ot-os2-table.hh new file mode 100644 index 0000000000..7d31b712c4 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-os2-table.hh @@ -0,0 +1,316 @@ +/* + * Copyright © 2011,2012 Google, Inc. + * Copyright © 2018 Ebrahim Byagowi + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_OT_OS2_TABLE_HH +#define HB_OT_OS2_TABLE_HH + +#include "hb-open-type.hh" +#include "hb-ot-os2-unicode-ranges.hh" +#include "hb-ot-cmap-table.hh" + +#include "hb-set.hh" + +/* + * OS/2 and Windows Metrics + * https://docs.microsoft.com/en-us/typography/opentype/spec/os2 + */ +#define HB_OT_TAG_OS2 HB_TAG('O','S','/','2') + + +namespace OT { + +struct OS2V1Tail +{ + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this)); + } + + public: + HBUINT32 ulCodePageRange1; + HBUINT32 ulCodePageRange2; + public: + DEFINE_SIZE_STATIC (8); +}; + +struct OS2V2Tail +{ + bool has_data () const { return sxHeight || sCapHeight; } + + const OS2V2Tail * operator -> () const { return this; } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this)); + } + + public: + HBINT16 sxHeight; + HBINT16 sCapHeight; + HBUINT16 usDefaultChar; + HBUINT16 usBreakChar; + HBUINT16 usMaxContext; + public: + DEFINE_SIZE_STATIC (10); +}; + +struct OS2V5Tail +{ + inline bool get_optical_size (unsigned int *lower, unsigned int *upper) const + { + unsigned int lower_optical_size = usLowerOpticalPointSize; + unsigned int upper_optical_size = usUpperOpticalPointSize; + + /* Per https://docs.microsoft.com/en-us/typography/opentype/spec/os2#lps */ + if (lower_optical_size < upper_optical_size && + lower_optical_size >= 1 && lower_optical_size <= 0xFFFE && + upper_optical_size >= 2 && upper_optical_size <= 0xFFFF) + { + *lower = lower_optical_size; + *upper = upper_optical_size; + return true; + } + return false; + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this)); + } + + public: + HBUINT16 usLowerOpticalPointSize; + HBUINT16 usUpperOpticalPointSize; + public: + DEFINE_SIZE_STATIC (4); +}; + +struct OS2 +{ + static constexpr hb_tag_t tableTag = HB_OT_TAG_OS2; + + bool has_data () const { return usWeightClass || usWidthClass || usFirstCharIndex || usLastCharIndex; } + + const OS2V1Tail &v1 () const { return version >= 1 ? v1X : Null (OS2V1Tail); } + const OS2V2Tail &v2 () const { return version >= 2 ? v2X : Null (OS2V2Tail); } + const OS2V5Tail &v5 () const { return version >= 5 ? v5X : Null (OS2V5Tail); } + + enum selection_flag_t { + ITALIC = 1u<<0, + UNDERSCORE = 1u<<1, + NEGATIVE = 1u<<2, + OUTLINED = 1u<<3, + STRIKEOUT = 1u<<4, + BOLD = 1u<<5, + REGULAR = 1u<<6, + USE_TYPO_METRICS = 1u<<7, + WWS = 1u<<8, + OBLIQUE = 1u<<9 + }; + + bool is_italic () const { return fsSelection & ITALIC; } + bool is_oblique () const { return fsSelection & OBLIQUE; } + bool use_typo_metrics () const { return fsSelection & USE_TYPO_METRICS; } + + enum width_class_t { + FWIDTH_ULTRA_CONDENSED = 1, /* 50% */ + FWIDTH_EXTRA_CONDENSED = 2, /* 62.5% */ + FWIDTH_CONDENSED = 3, /* 75% */ + FWIDTH_SEMI_CONDENSED = 4, /* 87.5% */ + FWIDTH_NORMAL = 5, /* 100% */ + FWIDTH_SEMI_EXPANDED = 6, /* 112.5% */ + FWIDTH_EXPANDED = 7, /* 125% */ + FWIDTH_EXTRA_EXPANDED = 8, /* 150% */ + FWIDTH_ULTRA_EXPANDED = 9 /* 200% */ + }; + + float get_width () const + { + switch (usWidthClass) { + case FWIDTH_ULTRA_CONDENSED:return 50.f; + case FWIDTH_EXTRA_CONDENSED:return 62.5f; + case FWIDTH_CONDENSED: return 75.f; + case FWIDTH_SEMI_CONDENSED: return 87.5f; + default: + case FWIDTH_NORMAL: return 100.f; + case FWIDTH_SEMI_EXPANDED: return 112.5f; + case FWIDTH_EXPANDED: return 125.f; + case FWIDTH_EXTRA_EXPANDED: return 150.f; + case FWIDTH_ULTRA_EXPANDED: return 200.f; + } + } + + bool subset (hb_subset_context_t *c) const + { + TRACE_SUBSET (this); + OS2 *os2_prime = c->serializer->embed (this); + if (unlikely (!os2_prime)) return_trace (false); + + hb_set_t unicodes; + if (!c->plan->glyphs_requested->is_empty ()) + { + hb_map_t unicode_glyphid_map; + + OT::cmap::accelerator_t cmap; + cmap.init (c->plan->source); + cmap.collect_mapping (&unicodes, &unicode_glyphid_map); + cmap.fini (); + + if (c->plan->unicodes->is_empty ()) unicodes.clear (); + else hb_set_set (&unicodes, c->plan->unicodes); + + + unicode_glyphid_map.iter () + | hb_filter (c->plan->glyphs_requested, hb_second) + | hb_map (hb_first) + | hb_sink (unicodes) + ; + } + /* when --gids option is not used, no need to do collect_mapping that is + * iterating all codepoints in each subtable, which is not efficient */ + uint16_t min_cp, max_cp; + find_min_and_max_codepoint (unicodes.is_empty () ? c->plan->unicodes : &unicodes, &min_cp, &max_cp); + os2_prime->usFirstCharIndex = min_cp; + os2_prime->usLastCharIndex = max_cp; + + _update_unicode_ranges (unicodes.is_empty () ? c->plan->unicodes : &unicodes, os2_prime->ulUnicodeRange); + + return_trace (true); + } + + void _update_unicode_ranges (const hb_set_t *codepoints, + HBUINT32 ulUnicodeRange[4]) const + { + HBUINT32 newBits[4]; + for (unsigned int i = 0; i < 4; i++) + newBits[i] = 0; + + hb_codepoint_t cp = HB_SET_VALUE_INVALID; + while (codepoints->next (&cp)) { + unsigned int bit = _hb_ot_os2_get_unicode_range_bit (cp); + if (bit < 128) + { + unsigned int block = bit / 32; + unsigned int bit_in_block = bit % 32; + unsigned int mask = 1 << bit_in_block; + newBits[block] = newBits[block] | mask; + } + if (cp >= 0x10000 && cp <= 0x110000) + { + /* the spec says that bit 57 ("Non Plane 0") implies that there's + at least one codepoint beyond the BMP; so I also include all + the non-BMP codepoints here */ + newBits[1] = newBits[1] | (1 << 25); + } + } + + for (unsigned int i = 0; i < 4; i++) + ulUnicodeRange[i] = ulUnicodeRange[i] & newBits[i]; // set bits only if set in the original + } + + static void find_min_and_max_codepoint (const hb_set_t *codepoints, + uint16_t *min_cp, /* OUT */ + uint16_t *max_cp /* OUT */) + { + *min_cp = hb_min (0xFFFFu, codepoints->get_min ()); + *max_cp = hb_min (0xFFFFu, codepoints->get_max ()); + } + + /* https://github.com/Microsoft/Font-Validator/blob/520aaae/OTFontFileVal/val_OS2.cs#L644-L681 */ + enum font_page_t + { + FONT_PAGE_HEBREW = 0xB100, /* Hebrew Windows 3.1 font page */ + FONT_PAGE_SIMP_ARABIC = 0xB200, /* Simplified Arabic Windows 3.1 font page */ + FONT_PAGE_TRAD_ARABIC = 0xB300, /* Traditional Arabic Windows 3.1 font page */ + FONT_PAGE_OEM_ARABIC = 0xB400, /* OEM Arabic Windows 3.1 font page */ + FONT_PAGE_SIMP_FARSI = 0xBA00, /* Simplified Farsi Windows 3.1 font page */ + FONT_PAGE_TRAD_FARSI = 0xBB00, /* Traditional Farsi Windows 3.1 font page */ + FONT_PAGE_THAI = 0xDE00 /* Thai Windows 3.1 font page */ + }; + font_page_t get_font_page () const + { return (font_page_t) (version == 0 ? fsSelection & 0xFF00 : 0); } + + unsigned get_size () const + { + unsigned result = min_size; + if (version >= 1) result += v1X.get_size (); + if (version >= 2) result += v2X.get_size (); + if (version >= 5) result += v5X.get_size (); + return result; + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + if (unlikely (!c->check_struct (this))) return_trace (false); + if (unlikely (version >= 1 && !v1X.sanitize (c))) return_trace (false); + if (unlikely (version >= 2 && !v2X.sanitize (c))) return_trace (false); + if (unlikely (version >= 5 && !v5X.sanitize (c))) return_trace (false); + return_trace (true); + } + + public: + HBUINT16 version; + HBINT16 xAvgCharWidth; + HBUINT16 usWeightClass; + HBUINT16 usWidthClass; + HBUINT16 fsType; + HBINT16 ySubscriptXSize; + HBINT16 ySubscriptYSize; + HBINT16 ySubscriptXOffset; + HBINT16 ySubscriptYOffset; + HBINT16 ySuperscriptXSize; + HBINT16 ySuperscriptYSize; + HBINT16 ySuperscriptXOffset; + HBINT16 ySuperscriptYOffset; + HBINT16 yStrikeoutSize; + HBINT16 yStrikeoutPosition; + HBINT16 sFamilyClass; + HBUINT8 panose[10]; + HBUINT32 ulUnicodeRange[4]; + Tag achVendID; + HBUINT16 fsSelection; + HBUINT16 usFirstCharIndex; + HBUINT16 usLastCharIndex; + HBINT16 sTypoAscender; + HBINT16 sTypoDescender; + HBINT16 sTypoLineGap; + HBUINT16 usWinAscent; + HBUINT16 usWinDescent; + OS2V1Tail v1X; + OS2V2Tail v2X; + OS2V5Tail v5X; + public: + DEFINE_SIZE_MIN (78); +}; + +} /* namespace OT */ + + +#endif /* HB_OT_OS2_TABLE_HH */ diff --git a/thirdparty/harfbuzz/src/hb-ot-os2-unicode-ranges.hh b/thirdparty/harfbuzz/src/hb-ot-os2-unicode-ranges.hh new file mode 100644 index 0000000000..9613d2d186 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-os2-unicode-ranges.hh @@ -0,0 +1,231 @@ +/* + * Copyright © 2018 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Garret Rieger + */ + +#ifndef HB_OT_OS2_UNICODE_RANGES_HH +#define HB_OT_OS2_UNICODE_RANGES_HH + +#include "hb.hh" + +namespace OT { + +struct OS2Range +{ + int cmp (hb_codepoint_t key) const + { return (key < start) ? -1 : key <= end ? 0 : +1; } + + hb_codepoint_t start; + hb_codepoint_t end; + unsigned int bit; +}; + +/* Note: The contents of this array was generated using gen-os2-unicode-ranges.py. */ +static const OS2Range _hb_os2_unicode_ranges[] = +{ + { 0x0, 0x7F, 0}, // Basic Latin + { 0x80, 0xFF, 1}, // Latin-1 Supplement + { 0x100, 0x17F, 2}, // Latin Extended-A + { 0x180, 0x24F, 3}, // Latin Extended-B + { 0x250, 0x2AF, 4}, // IPA Extensions + { 0x2B0, 0x2FF, 5}, // Spacing Modifier Letters + { 0x300, 0x36F, 6}, // Combining Diacritical Marks + { 0x370, 0x3FF, 7}, // Greek and Coptic + { 0x400, 0x4FF, 9}, // Cyrillic + { 0x500, 0x52F, 9}, // Cyrillic Supplement + { 0x530, 0x58F, 10}, // Armenian + { 0x590, 0x5FF, 11}, // Hebrew + { 0x600, 0x6FF, 13}, // Arabic + { 0x700, 0x74F, 71}, // Syriac + { 0x750, 0x77F, 13}, // Arabic Supplement + { 0x780, 0x7BF, 72}, // Thaana + { 0x7C0, 0x7FF, 14}, // NKo + { 0x900, 0x97F, 15}, // Devanagari + { 0x980, 0x9FF, 16}, // Bengali + { 0xA00, 0xA7F, 17}, // Gurmukhi + { 0xA80, 0xAFF, 18}, // Gujarati + { 0xB00, 0xB7F, 19}, // Oriya + { 0xB80, 0xBFF, 20}, // Tamil + { 0xC00, 0xC7F, 21}, // Telugu + { 0xC80, 0xCFF, 22}, // Kannada + { 0xD00, 0xD7F, 23}, // Malayalam + { 0xD80, 0xDFF, 73}, // Sinhala + { 0xE00, 0xE7F, 24}, // Thai + { 0xE80, 0xEFF, 25}, // Lao + { 0xF00, 0xFFF, 70}, // Tibetan + { 0x1000, 0x109F, 74}, // Myanmar + { 0x10A0, 0x10FF, 26}, // Georgian + { 0x1100, 0x11FF, 28}, // Hangul Jamo + { 0x1200, 0x137F, 75}, // Ethiopic + { 0x1380, 0x139F, 75}, // Ethiopic Supplement + { 0x13A0, 0x13FF, 76}, // Cherokee + { 0x1400, 0x167F, 77}, // Unified Canadian Aboriginal Syllabics + { 0x1680, 0x169F, 78}, // Ogham + { 0x16A0, 0x16FF, 79}, // Runic + { 0x1700, 0x171F, 84}, // Tagalog + { 0x1720, 0x173F, 84}, // Hanunoo + { 0x1740, 0x175F, 84}, // Buhid + { 0x1760, 0x177F, 84}, // Tagbanwa + { 0x1780, 0x17FF, 80}, // Khmer + { 0x1800, 0x18AF, 81}, // Mongolian + { 0x1900, 0x194F, 93}, // Limbu + { 0x1950, 0x197F, 94}, // Tai Le + { 0x1980, 0x19DF, 95}, // New Tai Lue + { 0x19E0, 0x19FF, 80}, // Khmer Symbols + { 0x1A00, 0x1A1F, 96}, // Buginese + { 0x1B00, 0x1B7F, 27}, // Balinese + { 0x1B80, 0x1BBF, 112}, // Sundanese + { 0x1C00, 0x1C4F, 113}, // Lepcha + { 0x1C50, 0x1C7F, 114}, // Ol Chiki + { 0x1D00, 0x1D7F, 4}, // Phonetic Extensions + { 0x1D80, 0x1DBF, 4}, // Phonetic Extensions Supplement + { 0x1DC0, 0x1DFF, 6}, // Combining Diacritical Marks Supplement + { 0x1E00, 0x1EFF, 29}, // Latin Extended Additional + { 0x1F00, 0x1FFF, 30}, // Greek Extended + { 0x2000, 0x206F, 31}, // General Punctuation + { 0x2070, 0x209F, 32}, // Superscripts And Subscripts + { 0x20A0, 0x20CF, 33}, // Currency Symbols + { 0x20D0, 0x20FF, 34}, // Combining Diacritical Marks For Symbols + { 0x2100, 0x214F, 35}, // Letterlike Symbols + { 0x2150, 0x218F, 36}, // Number Forms + { 0x2190, 0x21FF, 37}, // Arrows + { 0x2200, 0x22FF, 38}, // Mathematical Operators + { 0x2300, 0x23FF, 39}, // Miscellaneous Technical + { 0x2400, 0x243F, 40}, // Control Pictures + { 0x2440, 0x245F, 41}, // Optical Character Recognition + { 0x2460, 0x24FF, 42}, // Enclosed Alphanumerics + { 0x2500, 0x257F, 43}, // Box Drawing + { 0x2580, 0x259F, 44}, // Block Elements + { 0x25A0, 0x25FF, 45}, // Geometric Shapes + { 0x2600, 0x26FF, 46}, // Miscellaneous Symbols + { 0x2700, 0x27BF, 47}, // Dingbats + { 0x27C0, 0x27EF, 38}, // Miscellaneous Mathematical Symbols-A + { 0x27F0, 0x27FF, 37}, // Supplemental Arrows-A + { 0x2800, 0x28FF, 82}, // Braille Patterns + { 0x2900, 0x297F, 37}, // Supplemental Arrows-B + { 0x2980, 0x29FF, 38}, // Miscellaneous Mathematical Symbols-B + { 0x2A00, 0x2AFF, 38}, // Supplemental Mathematical Operators + { 0x2B00, 0x2BFF, 37}, // Miscellaneous Symbols and Arrows + { 0x2C00, 0x2C5F, 97}, // Glagolitic + { 0x2C60, 0x2C7F, 29}, // Latin Extended-C + { 0x2C80, 0x2CFF, 8}, // Coptic + { 0x2D00, 0x2D2F, 26}, // Georgian Supplement + { 0x2D30, 0x2D7F, 98}, // Tifinagh + { 0x2D80, 0x2DDF, 75}, // Ethiopic Extended + { 0x2DE0, 0x2DFF, 9}, // Cyrillic Extended-A + { 0x2E00, 0x2E7F, 31}, // Supplemental Punctuation + { 0x2E80, 0x2EFF, 59}, // CJK Radicals Supplement + { 0x2F00, 0x2FDF, 59}, // Kangxi Radicals + { 0x2FF0, 0x2FFF, 59}, // Ideographic Description Characters + { 0x3000, 0x303F, 48}, // CJK Symbols And Punctuation + { 0x3040, 0x309F, 49}, // Hiragana + { 0x30A0, 0x30FF, 50}, // Katakana + { 0x3100, 0x312F, 51}, // Bopomofo + { 0x3130, 0x318F, 52}, // Hangul Compatibility Jamo + { 0x3190, 0x319F, 59}, // Kanbun + { 0x31A0, 0x31BF, 51}, // Bopomofo Extended + { 0x31C0, 0x31EF, 61}, // CJK Strokes + { 0x31F0, 0x31FF, 50}, // Katakana Phonetic Extensions + { 0x3200, 0x32FF, 54}, // Enclosed CJK Letters And Months + { 0x3300, 0x33FF, 55}, // CJK Compatibility + { 0x3400, 0x4DBF, 59}, // CJK Unified Ideographs Extension A + { 0x4DC0, 0x4DFF, 99}, // Yijing Hexagram Symbols + { 0x4E00, 0x9FFF, 59}, // CJK Unified Ideographs + { 0xA000, 0xA48F, 83}, // Yi Syllables + { 0xA490, 0xA4CF, 83}, // Yi Radicals + { 0xA500, 0xA63F, 12}, // Vai + { 0xA640, 0xA69F, 9}, // Cyrillic Extended-B + { 0xA700, 0xA71F, 5}, // Modifier Tone Letters + { 0xA720, 0xA7FF, 29}, // Latin Extended-D + { 0xA800, 0xA82F, 100}, // Syloti Nagri + { 0xA840, 0xA87F, 53}, // Phags-pa + { 0xA880, 0xA8DF, 115}, // Saurashtra + { 0xA900, 0xA92F, 116}, // Kayah Li + { 0xA930, 0xA95F, 117}, // Rejang + { 0xAA00, 0xAA5F, 118}, // Cham + { 0xAC00, 0xD7AF, 56}, // Hangul Syllables + { 0xD800, 0xDFFF, 57}, // Non-Plane 0 * + { 0xE000, 0xF8FF, 60}, // Private Use Area (plane 0) + { 0xF900, 0xFAFF, 61}, // CJK Compatibility Ideographs + { 0xFB00, 0xFB4F, 62}, // Alphabetic Presentation Forms + { 0xFB50, 0xFDFF, 63}, // Arabic Presentation Forms-A + { 0xFE00, 0xFE0F, 91}, // Variation Selectors + { 0xFE10, 0xFE1F, 65}, // Vertical Forms + { 0xFE20, 0xFE2F, 64}, // Combining Half Marks + { 0xFE30, 0xFE4F, 65}, // CJK Compatibility Forms + { 0xFE50, 0xFE6F, 66}, // Small Form Variants + { 0xFE70, 0xFEFF, 67}, // Arabic Presentation Forms-B + { 0xFF00, 0xFFEF, 68}, // Halfwidth And Fullwidth Forms + { 0xFFF0, 0xFFFF, 69}, // Specials + { 0x10000, 0x1007F, 101}, // Linear B Syllabary + { 0x10080, 0x100FF, 101}, // Linear B Ideograms + { 0x10100, 0x1013F, 101}, // Aegean Numbers + { 0x10140, 0x1018F, 102}, // Ancient Greek Numbers + { 0x10190, 0x101CF, 119}, // Ancient Symbols + { 0x101D0, 0x101FF, 120}, // Phaistos Disc + { 0x10280, 0x1029F, 121}, // Lycian + { 0x102A0, 0x102DF, 121}, // Carian + { 0x10300, 0x1032F, 85}, // Old Italic + { 0x10330, 0x1034F, 86}, // Gothic + { 0x10380, 0x1039F, 103}, // Ugaritic + { 0x103A0, 0x103DF, 104}, // Old Persian + { 0x10400, 0x1044F, 87}, // Deseret + { 0x10450, 0x1047F, 105}, // Shavian + { 0x10480, 0x104AF, 106}, // Osmanya + { 0x10800, 0x1083F, 107}, // Cypriot Syllabary + { 0x10900, 0x1091F, 58}, // Phoenician + { 0x10920, 0x1093F, 121}, // Lydian + { 0x10A00, 0x10A5F, 108}, // Kharoshthi + { 0x12000, 0x123FF, 110}, // Cuneiform + { 0x12400, 0x1247F, 110}, // Cuneiform Numbers and Punctuation + { 0x1D000, 0x1D0FF, 88}, // Byzantine Musical Symbols + { 0x1D100, 0x1D1FF, 88}, // Musical Symbols + { 0x1D200, 0x1D24F, 88}, // Ancient Greek Musical Notation + { 0x1D300, 0x1D35F, 109}, // Tai Xuan Jing Symbols + { 0x1D360, 0x1D37F, 111}, // Counting Rod Numerals + { 0x1D400, 0x1D7FF, 89}, // Mathematical Alphanumeric Symbols + { 0x1F000, 0x1F02F, 122}, // Mahjong Tiles + { 0x1F030, 0x1F09F, 122}, // Domino Tiles + { 0x20000, 0x2A6DF, 59}, // CJK Unified Ideographs Extension B + { 0x2F800, 0x2FA1F, 61}, // CJK Compatibility Ideographs Supplement + { 0xE0000, 0xE007F, 92}, // Tags + { 0xE0100, 0xE01EF, 91}, // Variation Selectors Supplement + { 0xF0000, 0xFFFFD, 90}, // Private Use (plane 15) + {0x100000, 0x10FFFD, 90}, // Private Use (plane 16) +}; + +/** + * _hb_ot_os2_get_unicode_range_bit: + * Returns the bit to be set in os/2 ulUnicodeOS2Range for a given codepoint. + **/ +static unsigned int +_hb_ot_os2_get_unicode_range_bit (hb_codepoint_t cp) +{ + auto *range = hb_sorted_array (_hb_os2_unicode_ranges).bsearch (cp); + return range ? range->bit : -1; +} + +} /* namespace OT */ + +#endif /* HB_OT_OS2_UNICODE_RANGES_HH */ diff --git a/thirdparty/harfbuzz/src/hb-ot-post-macroman.hh b/thirdparty/harfbuzz/src/hb-ot-post-macroman.hh new file mode 100644 index 0000000000..b4df8aaeea --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-post-macroman.hh @@ -0,0 +1,294 @@ +/* + * Copyright © 2017 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_OT_POST_MACROMAN_HH +#if 0 /* Make checks happy. */ +#define HB_OT_POST_MACROMAN_HH +#include "hb.hh" +#endif + + +_S(".notdef") +_S(".null") +_S("nonmarkingreturn") +_S("space") +_S("exclam") +_S("quotedbl") +_S("numbersign") +_S("dollar") +_S("percent") +_S("ampersand") +_S("quotesingle") +_S("parenleft") +_S("parenright") +_S("asterisk") +_S("plus") +_S("comma") +_S("hyphen") +_S("period") +_S("slash") +_S("zero") +_S("one") +_S("two") +_S("three") +_S("four") +_S("five") +_S("six") +_S("seven") +_S("eight") +_S("nine") +_S("colon") +_S("semicolon") +_S("less") +_S("equal") +_S("greater") +_S("question") +_S("at") +_S("A") +_S("B") +_S("C") +_S("D") +_S("E") +_S("F") +_S("G") +_S("H") +_S("I") +_S("J") +_S("K") +_S("L") +_S("M") +_S("N") +_S("O") +_S("P") +_S("Q") +_S("R") +_S("S") +_S("T") +_S("U") +_S("V") +_S("W") +_S("X") +_S("Y") +_S("Z") +_S("bracketleft") +_S("backslash") +_S("bracketright") +_S("asciicircum") +_S("underscore") +_S("grave") +_S("a") +_S("b") +_S("c") +_S("d") +_S("e") +_S("f") +_S("g") +_S("h") +_S("i") +_S("j") +_S("k") +_S("l") +_S("m") +_S("n") +_S("o") +_S("p") +_S("q") +_S("r") +_S("s") +_S("t") +_S("u") +_S("v") +_S("w") +_S("x") +_S("y") +_S("z") +_S("braceleft") +_S("bar") +_S("braceright") +_S("asciitilde") +_S("Adieresis") +_S("Aring") +_S("Ccedilla") +_S("Eacute") +_S("Ntilde") +_S("Odieresis") +_S("Udieresis") +_S("aacute") +_S("agrave") +_S("acircumflex") +_S("adieresis") +_S("atilde") +_S("aring") +_S("ccedilla") +_S("eacute") +_S("egrave") +_S("ecircumflex") +_S("edieresis") +_S("iacute") +_S("igrave") +_S("icircumflex") +_S("idieresis") +_S("ntilde") +_S("oacute") +_S("ograve") +_S("ocircumflex") +_S("odieresis") +_S("otilde") +_S("uacute") +_S("ugrave") +_S("ucircumflex") +_S("udieresis") +_S("dagger") +_S("degree") +_S("cent") +_S("sterling") +_S("section") +_S("bullet") +_S("paragraph") +_S("germandbls") +_S("registered") +_S("copyright") +_S("trademark") +_S("acute") +_S("dieresis") +_S("notequal") +_S("AE") +_S("Oslash") +_S("infinity") +_S("plusminus") +_S("lessequal") +_S("greaterequal") +_S("yen") +_S("mu") +_S("partialdiff") +_S("summation") +_S("product") +_S("pi") +_S("integral") +_S("ordfeminine") +_S("ordmasculine") +_S("Omega") +_S("ae") +_S("oslash") +_S("questiondown") +_S("exclamdown") +_S("logicalnot") +_S("radical") +_S("florin") +_S("approxequal") +_S("Delta") +_S("guillemotleft") +_S("guillemotright") +_S("ellipsis") +_S("nonbreakingspace") +_S("Agrave") +_S("Atilde") +_S("Otilde") +_S("OE") +_S("oe") +_S("endash") +_S("emdash") +_S("quotedblleft") +_S("quotedblright") +_S("quoteleft") +_S("quoteright") +_S("divide") +_S("lozenge") +_S("ydieresis") +_S("Ydieresis") +_S("fraction") +_S("currency") +_S("guilsinglleft") +_S("guilsinglright") +_S("fi") +_S("fl") +_S("daggerdbl") +_S("periodcentered") +_S("quotesinglbase") +_S("quotedblbase") +_S("perthousand") +_S("Acircumflex") +_S("Ecircumflex") +_S("Aacute") +_S("Edieresis") +_S("Egrave") +_S("Iacute") +_S("Icircumflex") +_S("Idieresis") +_S("Igrave") +_S("Oacute") +_S("Ocircumflex") +_S("apple") +_S("Ograve") +_S("Uacute") +_S("Ucircumflex") +_S("Ugrave") +_S("dotlessi") +_S("circumflex") +_S("tilde") +_S("macron") +_S("breve") +_S("dotaccent") +_S("ring") +_S("cedilla") +_S("hungarumlaut") +_S("ogonek") +_S("caron") +_S("Lslash") +_S("lslash") +_S("Scaron") +_S("scaron") +_S("Zcaron") +_S("zcaron") +_S("brokenbar") +_S("Eth") +_S("eth") +_S("Yacute") +_S("yacute") +_S("Thorn") +_S("thorn") +_S("minus") +_S("multiply") +_S("onesuperior") +_S("twosuperior") +_S("threesuperior") +_S("onehalf") +_S("onequarter") +_S("threequarters") +_S("franc") +_S("Gbreve") +_S("gbreve") +_S("Idotaccent") +_S("Scedilla") +_S("scedilla") +_S("Cacute") +_S("cacute") +_S("Ccaron") +_S("ccaron") +_S("dcroat") + + +#endif /* HB_OT_POST_MACROMAN_HH */ diff --git a/thirdparty/harfbuzz/src/hb-ot-post-table.hh b/thirdparty/harfbuzz/src/hb-ot-post-table.hh new file mode 100644 index 0000000000..8586331cd4 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-post-table.hh @@ -0,0 +1,298 @@ +/* + * Copyright © 2016 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_OT_POST_TABLE_HH +#define HB_OT_POST_TABLE_HH + +#include "hb-open-type.hh" + +#define HB_STRING_ARRAY_NAME format1_names +#define HB_STRING_ARRAY_LIST "hb-ot-post-macroman.hh" +#include "hb-string-array.hh" +#undef HB_STRING_ARRAY_LIST +#undef HB_STRING_ARRAY_NAME + +/* + * post -- PostScript + * https://docs.microsoft.com/en-us/typography/opentype/spec/post + */ +#define HB_OT_TAG_post HB_TAG('p','o','s','t') + + +namespace OT { + + +struct postV2Tail +{ + friend struct post; + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (glyphNameIndex.sanitize (c)); + } + + protected: + ArrayOf<HBUINT16> glyphNameIndex; /* This is not an offset, but is the + * ordinal number of the glyph in 'post' + * string tables. */ +/*UnsizedArrayOf<HBUINT8> + namesX;*/ /* Glyph names with length bytes [variable] + * (a Pascal string). */ + + public: + DEFINE_SIZE_ARRAY (2, glyphNameIndex); +}; + +struct post +{ + static constexpr hb_tag_t tableTag = HB_OT_TAG_post; + + void serialize (hb_serialize_context_t *c) const + { + post *post_prime = c->allocate_min<post> (); + if (unlikely (!post_prime)) return; + + memcpy (post_prime, this, post::min_size); + post_prime->version.major = 3; // Version 3 does not have any glyph names. + } + + bool subset (hb_subset_context_t *c) const + { + TRACE_SUBSET (this); + post *post_prime = c->serializer->start_embed<post> (); + if (unlikely (!post_prime)) return_trace (false); + + serialize (c->serializer); + if (c->serializer->in_error () || c->serializer->ran_out_of_room) return_trace (false); + + return_trace (true); + } + + struct accelerator_t + { + void init (hb_face_t *face) + { + index_to_offset.init (); + + table = hb_sanitize_context_t ().reference_table<post> (face); + unsigned int table_length = table.get_length (); + + version = table->version.to_int (); + if (version != 0x00020000) return; + + const postV2Tail &v2 = table->v2X; + + glyphNameIndex = &v2.glyphNameIndex; + pool = &StructAfter<uint8_t> (v2.glyphNameIndex); + + const uint8_t *end = (const uint8_t *) (const void *) table + table_length; + for (const uint8_t *data = pool; + index_to_offset.length < 65535 && data < end && data + *data < end; + data += 1 + *data) + index_to_offset.push (data - pool); + } + void fini () + { + index_to_offset.fini (); + free (gids_sorted_by_name.get ()); + table.destroy (); + } + + bool get_glyph_name (hb_codepoint_t glyph, + char *buf, unsigned int buf_len) const + { + hb_bytes_t s = find_glyph_name (glyph); + if (!s.length) return false; + if (!buf_len) return true; + unsigned int len = hb_min (buf_len - 1, s.length); + strncpy (buf, s.arrayZ, len); + buf[len] = '\0'; + return true; + } + + bool get_glyph_from_name (const char *name, int len, + hb_codepoint_t *glyph) const + { + unsigned int count = get_glyph_count (); + if (unlikely (!count)) return false; + + if (len < 0) len = strlen (name); + + if (unlikely (!len)) return false; + + retry: + uint16_t *gids = gids_sorted_by_name.get (); + + if (unlikely (!gids)) + { + gids = (uint16_t *) malloc (count * sizeof (gids[0])); + if (unlikely (!gids)) + return false; /* Anything better?! */ + + for (unsigned int i = 0; i < count; i++) + gids[i] = i; + hb_qsort (gids, count, sizeof (gids[0]), cmp_gids, (void *) this); + + if (unlikely (!gids_sorted_by_name.cmpexch (nullptr, gids))) + { + free (gids); + goto retry; + } + } + + hb_bytes_t st (name, len); + auto* gid = hb_bsearch (st, gids, count, sizeof (gids[0]), cmp_key, (void *) this); + if (gid) + { + *glyph = *gid; + return true; + } + + return false; + } + + hb_blob_ptr_t<post> table; + + protected: + + unsigned int get_glyph_count () const + { + if (version == 0x00010000) + return format1_names_length; + + if (version == 0x00020000) + return glyphNameIndex->len; + + return 0; + } + + static int cmp_gids (const void *pa, const void *pb, void *arg) + { + const accelerator_t *thiz = (const accelerator_t *) arg; + uint16_t a = * (const uint16_t *) pa; + uint16_t b = * (const uint16_t *) pb; + return thiz->find_glyph_name (b).cmp (thiz->find_glyph_name (a)); + } + + static int cmp_key (const void *pk, const void *po, void *arg) + { + const accelerator_t *thiz = (const accelerator_t *) arg; + const hb_bytes_t *key = (const hb_bytes_t *) pk; + uint16_t o = * (const uint16_t *) po; + return thiz->find_glyph_name (o).cmp (*key); + } + + hb_bytes_t find_glyph_name (hb_codepoint_t glyph) const + { + if (version == 0x00010000) + { + if (glyph >= format1_names_length) + return hb_bytes_t (); + + return format1_names (glyph); + } + + if (version != 0x00020000 || glyph >= glyphNameIndex->len) + return hb_bytes_t (); + + unsigned int index = glyphNameIndex->arrayZ[glyph]; + if (index < format1_names_length) + return format1_names (index); + index -= format1_names_length; + + if (index >= index_to_offset.length) + return hb_bytes_t (); + unsigned int offset = index_to_offset[index]; + + const uint8_t *data = pool + offset; + unsigned int name_length = *data; + data++; + + return hb_bytes_t ((const char *) data, name_length); + } + + private: + uint32_t version; + const ArrayOf<HBUINT16> *glyphNameIndex; + hb_vector_t<uint32_t> index_to_offset; + const uint8_t *pool; + hb_atomic_ptr_t<uint16_t *> gids_sorted_by_name; + }; + + bool has_data () const { return version.to_int (); } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (likely (c->check_struct (this) && + (version.to_int () == 0x00010000 || + (version.to_int () == 0x00020000 && v2X.sanitize (c)) || + version.to_int () == 0x00030000))); + } + + public: + FixedVersion<>version; /* 0x00010000 for version 1.0 + * 0x00020000 for version 2.0 + * 0x00025000 for version 2.5 (deprecated) + * 0x00030000 for version 3.0 */ + HBFixed italicAngle; /* Italic angle in counter-clockwise degrees + * from the vertical. Zero for upright text, + * negative for text that leans to the right + * (forward). */ + FWORD underlinePosition; /* This is the suggested distance of the top + * of the underline from the baseline + * (negative values indicate below baseline). + * The PostScript definition of this FontInfo + * dictionary key (the y coordinate of the + * center of the stroke) is not used for + * historical reasons. The value of the + * PostScript key may be calculated by + * subtracting half the underlineThickness + * from the value of this field. */ + FWORD underlineThickness; /* Suggested values for the underline + thickness. */ + HBUINT32 isFixedPitch; /* Set to 0 if the font is proportionally + * spaced, non-zero if the font is not + * proportionally spaced (i.e. monospaced). */ + HBUINT32 minMemType42; /* Minimum memory usage when an OpenType font + * is downloaded. */ + HBUINT32 maxMemType42; /* Maximum memory usage when an OpenType font + * is downloaded. */ + HBUINT32 minMemType1; /* Minimum memory usage when an OpenType font + * is downloaded as a Type 1 font. */ + HBUINT32 maxMemType1; /* Maximum memory usage when an OpenType font + * is downloaded as a Type 1 font. */ + postV2Tail v2X; + DEFINE_SIZE_MIN (32); +}; + +struct post_accelerator_t : post::accelerator_t {}; + +} /* namespace OT */ + + +#endif /* HB_OT_POST_TABLE_HH */ diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-complex-arabic-fallback.hh b/thirdparty/harfbuzz/src/hb-ot-shape-complex-arabic-fallback.hh new file mode 100644 index 0000000000..244e967b12 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-shape-complex-arabic-fallback.hh @@ -0,0 +1,348 @@ +/* + * Copyright © 2012 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_OT_SHAPE_COMPLEX_ARABIC_FALLBACK_HH +#define HB_OT_SHAPE_COMPLEX_ARABIC_FALLBACK_HH + +#include "hb.hh" + +#include "hb-ot-shape.hh" +#include "hb-ot-layout-gsub-table.hh" + + +/* Features ordered the same as the entries in shaping_table rows, + * followed by rlig. Don't change. */ +static const hb_tag_t arabic_fallback_features[] = +{ + HB_TAG('i','n','i','t'), + HB_TAG('m','e','d','i'), + HB_TAG('f','i','n','a'), + HB_TAG('i','s','o','l'), + HB_TAG('r','l','i','g'), +}; + +static OT::SubstLookup * +arabic_fallback_synthesize_lookup_single (const hb_ot_shape_plan_t *plan HB_UNUSED, + hb_font_t *font, + unsigned int feature_index) +{ + OT::HBGlyphID glyphs[SHAPING_TABLE_LAST - SHAPING_TABLE_FIRST + 1]; + OT::HBGlyphID substitutes[SHAPING_TABLE_LAST - SHAPING_TABLE_FIRST + 1]; + unsigned int num_glyphs = 0; + + /* Populate arrays */ + for (hb_codepoint_t u = SHAPING_TABLE_FIRST; u < SHAPING_TABLE_LAST + 1; u++) + { + hb_codepoint_t s = shaping_table[u - SHAPING_TABLE_FIRST][feature_index]; + hb_codepoint_t u_glyph, s_glyph; + + if (!s || + !hb_font_get_glyph (font, u, 0, &u_glyph) || + !hb_font_get_glyph (font, s, 0, &s_glyph) || + u_glyph == s_glyph || + u_glyph > 0xFFFFu || s_glyph > 0xFFFFu) + continue; + + glyphs[num_glyphs] = u_glyph; + substitutes[num_glyphs] = s_glyph; + + num_glyphs++; + } + + if (!num_glyphs) + return nullptr; + + /* Bubble-sort or something equally good! + * May not be good-enough for presidential candidate interviews, but good-enough for us... */ + hb_stable_sort (&glyphs[0], num_glyphs, + (int(*)(const OT::HBUINT16*, const OT::HBUINT16 *)) OT::HBGlyphID::cmp, + &substitutes[0]); + + + /* Each glyph takes four bytes max, and there's some overhead. */ + char buf[(SHAPING_TABLE_LAST - SHAPING_TABLE_FIRST + 1) * 4 + 128]; + hb_serialize_context_t c (buf, sizeof (buf)); + OT::SubstLookup *lookup = c.start_serialize<OT::SubstLookup> (); + bool ret = lookup->serialize_single (&c, + OT::LookupFlag::IgnoreMarks, + hb_sorted_array (glyphs, num_glyphs), + hb_array (substitutes, num_glyphs)); + c.end_serialize (); + + return ret && !c.in_error () ? c.copy<OT::SubstLookup> () : nullptr; +} + +static OT::SubstLookup * +arabic_fallback_synthesize_lookup_ligature (const hb_ot_shape_plan_t *plan HB_UNUSED, + hb_font_t *font) +{ + OT::HBGlyphID first_glyphs[ARRAY_LENGTH_CONST (ligature_table)]; + unsigned int first_glyphs_indirection[ARRAY_LENGTH_CONST (ligature_table)]; + unsigned int ligature_per_first_glyph_count_list[ARRAY_LENGTH_CONST (first_glyphs)]; + unsigned int num_first_glyphs = 0; + + /* We know that all our ligatures are 2-component */ + OT::HBGlyphID ligature_list[ARRAY_LENGTH_CONST (first_glyphs) * ARRAY_LENGTH_CONST(ligature_table[0].ligatures)]; + unsigned int component_count_list[ARRAY_LENGTH_CONST (ligature_list)]; + OT::HBGlyphID component_list[ARRAY_LENGTH_CONST (ligature_list) * 1/* One extra component per ligature */]; + unsigned int num_ligatures = 0; + + /* Populate arrays */ + + /* Sort out the first-glyphs */ + for (unsigned int first_glyph_idx = 0; first_glyph_idx < ARRAY_LENGTH (first_glyphs); first_glyph_idx++) + { + hb_codepoint_t first_u = ligature_table[first_glyph_idx].first; + hb_codepoint_t first_glyph; + if (!hb_font_get_glyph (font, first_u, 0, &first_glyph)) + continue; + first_glyphs[num_first_glyphs] = first_glyph; + ligature_per_first_glyph_count_list[num_first_glyphs] = 0; + first_glyphs_indirection[num_first_glyphs] = first_glyph_idx; + num_first_glyphs++; + } + hb_stable_sort (&first_glyphs[0], num_first_glyphs, + (int(*)(const OT::HBUINT16*, const OT::HBUINT16 *)) OT::HBGlyphID::cmp, + &first_glyphs_indirection[0]); + + /* Now that the first-glyphs are sorted, walk again, populate ligatures. */ + for (unsigned int i = 0; i < num_first_glyphs; i++) + { + unsigned int first_glyph_idx = first_glyphs_indirection[i]; + + for (unsigned int second_glyph_idx = 0; second_glyph_idx < ARRAY_LENGTH (ligature_table[0].ligatures); second_glyph_idx++) + { + hb_codepoint_t second_u = ligature_table[first_glyph_idx].ligatures[second_glyph_idx].second; + hb_codepoint_t ligature_u = ligature_table[first_glyph_idx].ligatures[second_glyph_idx].ligature; + hb_codepoint_t second_glyph, ligature_glyph; + if (!second_u || + !hb_font_get_glyph (font, second_u, 0, &second_glyph) || + !hb_font_get_glyph (font, ligature_u, 0, &ligature_glyph)) + continue; + + ligature_per_first_glyph_count_list[i]++; + + ligature_list[num_ligatures] = ligature_glyph; + component_count_list[num_ligatures] = 2; + component_list[num_ligatures] = second_glyph; + num_ligatures++; + } + } + + if (!num_ligatures) + return nullptr; + + + /* 16 bytes per ligature ought to be enough... */ + char buf[ARRAY_LENGTH_CONST (ligature_list) * 16 + 128]; + hb_serialize_context_t c (buf, sizeof (buf)); + OT::SubstLookup *lookup = c.start_serialize<OT::SubstLookup> (); + bool ret = lookup->serialize_ligature (&c, + OT::LookupFlag::IgnoreMarks, + hb_sorted_array (first_glyphs, num_first_glyphs), + hb_array (ligature_per_first_glyph_count_list, num_first_glyphs), + hb_array (ligature_list, num_ligatures), + hb_array (component_count_list, num_ligatures), + hb_array (component_list, num_ligatures)); + c.end_serialize (); + /* TODO sanitize the results? */ + + return ret && !c.in_error () ? c.copy<OT::SubstLookup> () : nullptr; +} + +static OT::SubstLookup * +arabic_fallback_synthesize_lookup (const hb_ot_shape_plan_t *plan, + hb_font_t *font, + unsigned int feature_index) +{ + if (feature_index < 4) + return arabic_fallback_synthesize_lookup_single (plan, font, feature_index); + else + return arabic_fallback_synthesize_lookup_ligature (plan, font); +} + +#define ARABIC_FALLBACK_MAX_LOOKUPS 5 + +struct arabic_fallback_plan_t +{ + unsigned int num_lookups; + bool free_lookups; + + hb_mask_t mask_array[ARABIC_FALLBACK_MAX_LOOKUPS]; + OT::SubstLookup *lookup_array[ARABIC_FALLBACK_MAX_LOOKUPS]; + OT::hb_ot_layout_lookup_accelerator_t accel_array[ARABIC_FALLBACK_MAX_LOOKUPS]; +}; + +#if defined(_WIN32) && !defined(HB_NO_WIN1256) +#define HB_WITH_WIN1256 +#endif + +#ifdef HB_WITH_WIN1256 +#include "hb-ot-shape-complex-arabic-win1256.hh" +#endif + +struct ManifestLookup +{ + public: + OT::Tag tag; + OT::OffsetTo<OT::SubstLookup> lookupOffset; + public: + DEFINE_SIZE_STATIC (6); +}; +typedef OT::ArrayOf<ManifestLookup> Manifest; + +static bool +arabic_fallback_plan_init_win1256 (arabic_fallback_plan_t *fallback_plan HB_UNUSED, + const hb_ot_shape_plan_t *plan HB_UNUSED, + hb_font_t *font HB_UNUSED) +{ +#ifdef HB_WITH_WIN1256 + /* Does this font look like it's Windows-1256-encoded? */ + hb_codepoint_t g; + if (!(hb_font_get_glyph (font, 0x0627u, 0, &g) && g == 199 /* ALEF */ && + hb_font_get_glyph (font, 0x0644u, 0, &g) && g == 225 /* LAM */ && + hb_font_get_glyph (font, 0x0649u, 0, &g) && g == 236 /* ALEF MAKSURA */ && + hb_font_get_glyph (font, 0x064Au, 0, &g) && g == 237 /* YEH */ && + hb_font_get_glyph (font, 0x0652u, 0, &g) && g == 250 /* SUKUN */)) + return false; + + const Manifest &manifest = reinterpret_cast<const Manifest&> (arabic_win1256_gsub_lookups.manifest); + static_assert (sizeof (arabic_win1256_gsub_lookups.manifestData) == + ARABIC_FALLBACK_MAX_LOOKUPS * sizeof (ManifestLookup), ""); + /* TODO sanitize the table? */ + + unsigned j = 0; + unsigned int count = manifest.len; + for (unsigned int i = 0; i < count; i++) + { + fallback_plan->mask_array[j] = plan->map.get_1_mask (manifest[i].tag); + if (fallback_plan->mask_array[j]) + { + fallback_plan->lookup_array[j] = const_cast<OT::SubstLookup*> (&(&manifest+manifest[i].lookupOffset)); + if (fallback_plan->lookup_array[j]) + { + fallback_plan->accel_array[j].init (*fallback_plan->lookup_array[j]); + j++; + } + } + } + + fallback_plan->num_lookups = j; + fallback_plan->free_lookups = false; + + return j > 0; +#else + return false; +#endif +} + +static bool +arabic_fallback_plan_init_unicode (arabic_fallback_plan_t *fallback_plan, + const hb_ot_shape_plan_t *plan, + hb_font_t *font) +{ + static_assert ((ARRAY_LENGTH_CONST(arabic_fallback_features) <= ARABIC_FALLBACK_MAX_LOOKUPS), ""); + unsigned int j = 0; + for (unsigned int i = 0; i < ARRAY_LENGTH(arabic_fallback_features) ; i++) + { + fallback_plan->mask_array[j] = plan->map.get_1_mask (arabic_fallback_features[i]); + if (fallback_plan->mask_array[j]) + { + fallback_plan->lookup_array[j] = arabic_fallback_synthesize_lookup (plan, font, i); + if (fallback_plan->lookup_array[j]) + { + fallback_plan->accel_array[j].init (*fallback_plan->lookup_array[j]); + j++; + } + } + } + + fallback_plan->num_lookups = j; + fallback_plan->free_lookups = true; + + return j > 0; +} + +static arabic_fallback_plan_t * +arabic_fallback_plan_create (const hb_ot_shape_plan_t *plan, + hb_font_t *font) +{ + arabic_fallback_plan_t *fallback_plan = (arabic_fallback_plan_t *) calloc (1, sizeof (arabic_fallback_plan_t)); + if (unlikely (!fallback_plan)) + return const_cast<arabic_fallback_plan_t *> (&Null (arabic_fallback_plan_t)); + + fallback_plan->num_lookups = 0; + fallback_plan->free_lookups = false; + + /* Try synthesizing GSUB table using Unicode Arabic Presentation Forms, + * in case the font has cmap entries for the presentation-forms characters. */ + if (arabic_fallback_plan_init_unicode (fallback_plan, plan, font)) + return fallback_plan; + + /* See if this looks like a Windows-1256-encoded font. If it does, use a + * hand-coded GSUB table. */ + if (arabic_fallback_plan_init_win1256 (fallback_plan, plan, font)) + return fallback_plan; + + assert (fallback_plan->num_lookups == 0); + free (fallback_plan); + return const_cast<arabic_fallback_plan_t *> (&Null (arabic_fallback_plan_t)); +} + +static void +arabic_fallback_plan_destroy (arabic_fallback_plan_t *fallback_plan) +{ + if (!fallback_plan || fallback_plan->num_lookups == 0) + return; + + for (unsigned int i = 0; i < fallback_plan->num_lookups; i++) + if (fallback_plan->lookup_array[i]) + { + fallback_plan->accel_array[i].fini (); + if (fallback_plan->free_lookups) + free (fallback_plan->lookup_array[i]); + } + + free (fallback_plan); +} + +static void +arabic_fallback_plan_shape (arabic_fallback_plan_t *fallback_plan, + hb_font_t *font, + hb_buffer_t *buffer) +{ + OT::hb_ot_apply_context_t c (0, font, buffer); + for (unsigned int i = 0; i < fallback_plan->num_lookups; i++) + if (fallback_plan->lookup_array[i]) { + c.set_lookup_mask (fallback_plan->mask_array[i]); + hb_ot_layout_substitute_lookup (&c, + *fallback_plan->lookup_array[i], + fallback_plan->accel_array[i]); + } +} + + +#endif /* HB_OT_SHAPE_COMPLEX_ARABIC_FALLBACK_HH */ diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-complex-arabic-joining-list.hh b/thirdparty/harfbuzz/src/hb-ot-shape-complex-arabic-joining-list.hh new file mode 100644 index 0000000000..c022d4bb06 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-shape-complex-arabic-joining-list.hh @@ -0,0 +1,46 @@ +/* == Start of generated function == */ +/* + * The following function is generated by running: + * + * ./gen-arabic-joining-list.py ArabicShaping.txt Scripts.txt + * + * on files with these headers: + * + * # ArabicShaping-13.0.0.txt + * # Date: 2020-01-31, 23:55:00 GMT [KW, RP] + * # Scripts-13.0.0.txt + * # Date: 2020-01-22, 00:07:43 GMT + */ + +#ifndef HB_OT_SHAPE_COMPLEX_ARABIC_JOINING_LIST_HH +#define HB_OT_SHAPE_COMPLEX_ARABIC_JOINING_LIST_HH + +static bool +has_arabic_joining (hb_script_t script) +{ + /* List of scripts that have data in arabic-table. */ + switch ((int) script) + { + case HB_SCRIPT_ADLAM: + case HB_SCRIPT_ARABIC: + case HB_SCRIPT_CHORASMIAN: + case HB_SCRIPT_HANIFI_ROHINGYA: + case HB_SCRIPT_MANDAIC: + case HB_SCRIPT_MANICHAEAN: + case HB_SCRIPT_MONGOLIAN: + case HB_SCRIPT_NKO: + case HB_SCRIPT_PHAGS_PA: + case HB_SCRIPT_PSALTER_PAHLAVI: + case HB_SCRIPT_SOGDIAN: + case HB_SCRIPT_SYRIAC: + return true; + + default: + return false; + } +} + + +#endif /* HB_OT_SHAPE_COMPLEX_ARABIC_JOINING_LIST_HH */ + +/* == End of generated function == */ diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-complex-arabic-table.hh b/thirdparty/harfbuzz/src/hb-ot-shape-complex-arabic-table.hh new file mode 100644 index 0000000000..70ffe623c0 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-shape-complex-arabic-table.hh @@ -0,0 +1,433 @@ +/* == Start of generated table == */ +/* + * The following table is generated by running: + * + * ./gen-arabic-table.py ArabicShaping.txt UnicodeData.txt Blocks.txt + * + * on files with these headers: + * + * # ArabicShaping-13.0.0.txt + * # Date: 2020-01-31, 23:55:00 GMT [KW, RP] + * # Blocks-13.0.0.txt + * # Date: 2019-07-10, 19:06:00 GMT [KW] + * UnicodeData.txt does not have a header. + */ + +#ifndef HB_OT_SHAPE_COMPLEX_ARABIC_TABLE_HH +#define HB_OT_SHAPE_COMPLEX_ARABIC_TABLE_HH + + +#define A JOINING_GROUP_ALAPH +#define DR JOINING_GROUP_DALATH_RISH +#define C JOINING_TYPE_C +#define D JOINING_TYPE_D +#define L JOINING_TYPE_L +#define R JOINING_TYPE_R +#define T JOINING_TYPE_T +#define U JOINING_TYPE_U +#define X JOINING_TYPE_X + +static const uint8_t joining_table[] = +{ + +#define joining_offset_0x0600u 0 + + /* Arabic */ + + /* 0600 */ U,U,U,U,U,U,X,X,U,X,X,U,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, + /* 0620 */ D,U,R,R,R,R,D,R,D,R,D,D,D,D,D,R,R,R,R,D,D,D,D,D,D,D,D,D,D,D,D,D, + /* 0640 */ C,D,D,D,D,D,D,D,R,D,D,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, + /* 0660 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,D,D,X,R,R,R,U,R,R,R,D,D,D,D,D,D,D,D, + /* 0680 */ D,D,D,D,D,D,D,D,R,R,R,R,R,R,R,R,R,R,R,R,R,R,R,R,R,R,D,D,D,D,D,D, + /* 06A0 */ D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D, + /* 06C0 */ R,D,D,R,R,R,R,R,R,R,R,R,D,R,D,R,D,D,R,R,X,R,X,X,X,X,X,X,X,U,X,X, + /* 06E0 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,R,R,X,X,X,X,X,X,X,X,X,X,D,D,D,X,X,D, + + /* Syriac */ + + /* 0700 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,T,A,X,D,D,D,DR,DR,R,R,R,D,D,D,D,R,D, + /* 0720 */ D,D,D,D,D,D,D,D,R,D,DR,D,R,D,D,DR,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, + /* 0740 */ X,X,X,X,X,X,X,X,X,X,X,X,X,R,D,D, + + /* Arabic Supplement */ + + /* 0740 */ D,D,D,D,D,D,D,D,D,R,R,R,D,D,D,D, + /* 0760 */ D,D,D,D,D,D,D,D,D,D,D,R,R,D,D,D,D,R,D,R,R,D,D,D,R,R,D,D,D,D,D,D, + + /* FILLER */ + + /* 0780 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, + /* 07A0 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, + + /* NKo */ + + /* 07C0 */ X,X,X,X,X,X,X,X,X,X,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D, + /* 07E0 */ D,D,D,D,D,D,D,D,D,D,D,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,C,X,X,X,X,X, + + /* FILLER */ + + /* 0800 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, + /* 0820 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, + + /* Mandaic */ + + /* 0840 */ R,D,D,D,D,D,R,R,D,R,D,D,D,D,D,D,D,D,D,D,R,D,R,R,R,X,X,X,X,X,X,X, + + /* Syriac Supplement */ + + /* 0860 */ D,U,D,D,D,D,U,R,D,R,R,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, + /* 0880 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, + + /* Arabic Extended-A */ + + /* 08A0 */ D,D,D,D,D,D,D,D,D,D,R,R,R,U,R,D,D,R,R,D,D,X,D,D,D,R,D,D,D,D,D,D, + /* 08C0 */ D,D,D,D,D,D,D,D,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, + /* 08E0 */ X,X,U, + +#define joining_offset_0x1806u 739 + + /* Mongolian */ + + /* 1800 */ U,D,X,X,C,X,X,X,U,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, + /* 1820 */ D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D, + /* 1840 */ D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D, + /* 1860 */ D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,X,X,X,X,X,X,X, + /* 1880 */ U,U,U,U,U,T,T,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D, + /* 18A0 */ D,D,D,D,D,D,D,D,D,X,D, + +#define joining_offset_0x200cu 904 + + /* General Punctuation */ + + /* 2000 */ U,C,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, + /* 2020 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,U,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, + /* 2040 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, + /* 2060 */ X,X,X,X,X,X,U,U,U,U, + +#define joining_offset_0xa840u 998 + + /* Phags-pa */ + + /* A840 */ D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D, + /* A860 */ D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,L,U, + +#define joining_offset_0x10ac0u 1050 + + /* Manichaean */ + + /* 10AC0 */ D,D,D,D,D,R,U,R,U,R,R,U,U,L,R,R,R,R,R,D,D,D,D,L,D,D,D,D,D,R,D,D, + /* 10AE0 */ D,R,U,U,R,X,X,X,X,X,X,D,D,D,D,R, + +#define joining_offset_0x10b80u 1098 + + /* Psalter Pahlavi */ + + /* 10B80 */ D,R,D,R,R,R,D,D,D,R,D,D,R,D,R,R,D,R,X,X,X,X,X,X,X,X,X,X,X,X,X,X, + /* 10BA0 */ X,X,X,X,X,X,X,X,X,R,R,R,R,D,D,U, + +#define joining_offset_0x10d00u 1146 + + /* Hanifi Rohingya */ + + /* 10D00 */ L,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D, + /* 10D20 */ D,D,R,D, + +#define joining_offset_0x10f30u 1182 + + /* Sogdian */ + + /* 10F20 */ D,D,D,R,D,D,D,D,D,D,D,D,D,D,D,D, + /* 10F40 */ D,D,D,D,D,U,X,X,X,X,X,X,X,X,X,X,X,D,D,D,R, + +#define joining_offset_0x10fb0u 1219 + + /* Chorasmian */ + + /* 10FA0 */ D,U,D,D,R,R,R,U,D,R,R,D,D,R,D,D, + /* 10FC0 */ U,D,R,R,D,U,U,U,U,R,D,L, + +#define joining_offset_0x110bdu 1247 + + /* Kaithi */ + + /* 110A0 */ U,X,X, + /* 110C0 */ X,X,X,X,X,X,X,X,X,X,X,X,X,U, + +#define joining_offset_0x1e900u 1264 + + /* Adlam */ + + /* 1E900 */ D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D, + /* 1E920 */ D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D, + /* 1E940 */ D,D,D,D,X,X,X,X,X,X,X,T, + +}; /* Table items: 1340; occupancy: 57% */ + + +static unsigned int +joining_type (hb_codepoint_t u) +{ + switch (u >> 12) + { + case 0x0u: + if (hb_in_range<hb_codepoint_t> (u, 0x0600u, 0x08E2u)) return joining_table[u - 0x0600u + joining_offset_0x0600u]; + break; + + case 0x1u: + if (hb_in_range<hb_codepoint_t> (u, 0x1806u, 0x18AAu)) return joining_table[u - 0x1806u + joining_offset_0x1806u]; + break; + + case 0x2u: + if (hb_in_range<hb_codepoint_t> (u, 0x200Cu, 0x2069u)) return joining_table[u - 0x200Cu + joining_offset_0x200cu]; + break; + + case 0xAu: + if (hb_in_range<hb_codepoint_t> (u, 0xA840u, 0xA873u)) return joining_table[u - 0xA840u + joining_offset_0xa840u]; + break; + + case 0x10u: + if (hb_in_range<hb_codepoint_t> (u, 0x10AC0u, 0x10AEFu)) return joining_table[u - 0x10AC0u + joining_offset_0x10ac0u]; + if (hb_in_range<hb_codepoint_t> (u, 0x10B80u, 0x10BAFu)) return joining_table[u - 0x10B80u + joining_offset_0x10b80u]; + if (hb_in_range<hb_codepoint_t> (u, 0x10D00u, 0x10D23u)) return joining_table[u - 0x10D00u + joining_offset_0x10d00u]; + if (hb_in_range<hb_codepoint_t> (u, 0x10F30u, 0x10F54u)) return joining_table[u - 0x10F30u + joining_offset_0x10f30u]; + if (hb_in_range<hb_codepoint_t> (u, 0x10FB0u, 0x10FCBu)) return joining_table[u - 0x10FB0u + joining_offset_0x10fb0u]; + break; + + case 0x11u: + if (hb_in_range<hb_codepoint_t> (u, 0x110BDu, 0x110CDu)) return joining_table[u - 0x110BDu + joining_offset_0x110bdu]; + break; + + case 0x1Eu: + if (hb_in_range<hb_codepoint_t> (u, 0x1E900u, 0x1E94Bu)) return joining_table[u - 0x1E900u + joining_offset_0x1e900u]; + break; + + default: + break; + } + return X; +} + +#undef A +#undef DR +#undef C +#undef D +#undef L +#undef R +#undef T +#undef U +#undef X + + +static const uint16_t shaping_table[][4] = +{ + {0x0000u, 0x0000u, 0x0000u, 0xFE80u}, /* U+0621 ARABIC LETTER HAMZA ISOLATED FORM */ + {0x0000u, 0x0000u, 0xFE82u, 0xFE81u}, /* U+0622 ARABIC LETTER ALEF WITH MADDA ABOVE */ + {0x0000u, 0x0000u, 0xFE84u, 0xFE83u}, /* U+0623 ARABIC LETTER ALEF WITH HAMZA ABOVE */ + {0x0000u, 0x0000u, 0xFE86u, 0xFE85u}, /* U+0624 ARABIC LETTER WAW WITH HAMZA ABOVE */ + {0x0000u, 0x0000u, 0xFE88u, 0xFE87u}, /* U+0625 ARABIC LETTER ALEF WITH HAMZA BELOW */ + {0xFE8Bu, 0xFE8Cu, 0xFE8Au, 0xFE89u}, /* U+0626 ARABIC LETTER YEH WITH HAMZA ABOVE */ + {0x0000u, 0x0000u, 0xFE8Eu, 0xFE8Du}, /* U+0627 ARABIC LETTER ALEF */ + {0xFE91u, 0xFE92u, 0xFE90u, 0xFE8Fu}, /* U+0628 ARABIC LETTER BEH */ + {0x0000u, 0x0000u, 0xFE94u, 0xFE93u}, /* U+0629 ARABIC LETTER TEH MARBUTA */ + {0xFE97u, 0xFE98u, 0xFE96u, 0xFE95u}, /* U+062A ARABIC LETTER TEH */ + {0xFE9Bu, 0xFE9Cu, 0xFE9Au, 0xFE99u}, /* U+062B ARABIC LETTER THEH */ + {0xFE9Fu, 0xFEA0u, 0xFE9Eu, 0xFE9Du}, /* U+062C ARABIC LETTER JEEM */ + {0xFEA3u, 0xFEA4u, 0xFEA2u, 0xFEA1u}, /* U+062D ARABIC LETTER HAH */ + {0xFEA7u, 0xFEA8u, 0xFEA6u, 0xFEA5u}, /* U+062E ARABIC LETTER KHAH */ + {0x0000u, 0x0000u, 0xFEAAu, 0xFEA9u}, /* U+062F ARABIC LETTER DAL */ + {0x0000u, 0x0000u, 0xFEACu, 0xFEABu}, /* U+0630 ARABIC LETTER THAL */ + {0x0000u, 0x0000u, 0xFEAEu, 0xFEADu}, /* U+0631 ARABIC LETTER REH */ + {0x0000u, 0x0000u, 0xFEB0u, 0xFEAFu}, /* U+0632 ARABIC LETTER ZAIN */ + {0xFEB3u, 0xFEB4u, 0xFEB2u, 0xFEB1u}, /* U+0633 ARABIC LETTER SEEN */ + {0xFEB7u, 0xFEB8u, 0xFEB6u, 0xFEB5u}, /* U+0634 ARABIC LETTER SHEEN */ + {0xFEBBu, 0xFEBCu, 0xFEBAu, 0xFEB9u}, /* U+0635 ARABIC LETTER SAD */ + {0xFEBFu, 0xFEC0u, 0xFEBEu, 0xFEBDu}, /* U+0636 ARABIC LETTER DAD */ + {0xFEC3u, 0xFEC4u, 0xFEC2u, 0xFEC1u}, /* U+0637 ARABIC LETTER TAH */ + {0xFEC7u, 0xFEC8u, 0xFEC6u, 0xFEC5u}, /* U+0638 ARABIC LETTER ZAH */ + {0xFECBu, 0xFECCu, 0xFECAu, 0xFEC9u}, /* U+0639 ARABIC LETTER AIN */ + {0xFECFu, 0xFED0u, 0xFECEu, 0xFECDu}, /* U+063A ARABIC LETTER GHAIN */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+063B */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+063C */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+063D */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+063E */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+063F */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0640 */ + {0xFED3u, 0xFED4u, 0xFED2u, 0xFED1u}, /* U+0641 ARABIC LETTER FEH */ + {0xFED7u, 0xFED8u, 0xFED6u, 0xFED5u}, /* U+0642 ARABIC LETTER QAF */ + {0xFEDBu, 0xFEDCu, 0xFEDAu, 0xFED9u}, /* U+0643 ARABIC LETTER KAF */ + {0xFEDFu, 0xFEE0u, 0xFEDEu, 0xFEDDu}, /* U+0644 ARABIC LETTER LAM */ + {0xFEE3u, 0xFEE4u, 0xFEE2u, 0xFEE1u}, /* U+0645 ARABIC LETTER MEEM */ + {0xFEE7u, 0xFEE8u, 0xFEE6u, 0xFEE5u}, /* U+0646 ARABIC LETTER NOON */ + {0xFEEBu, 0xFEECu, 0xFEEAu, 0xFEE9u}, /* U+0647 ARABIC LETTER HEH */ + {0x0000u, 0x0000u, 0xFEEEu, 0xFEEDu}, /* U+0648 ARABIC LETTER WAW */ + {0xFBE8u, 0xFBE9u, 0xFEF0u, 0xFEEFu}, /* U+0649 ARABIC LETTER */ + {0xFEF3u, 0xFEF4u, 0xFEF2u, 0xFEF1u}, /* U+064A ARABIC LETTER YEH */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+064B */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+064C */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+064D */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+064E */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+064F */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0650 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0651 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0652 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0653 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0654 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0655 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0656 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0657 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0658 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0659 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+065A */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+065B */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+065C */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+065D */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+065E */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+065F */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0660 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0661 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0662 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0663 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0664 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0665 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0666 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0667 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0668 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0669 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+066A */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+066B */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+066C */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+066D */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+066E */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+066F */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0670 */ + {0x0000u, 0x0000u, 0xFB51u, 0xFB50u}, /* U+0671 ARABIC LETTER ALEF WASLA */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0672 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0673 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0674 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0675 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0676 */ + {0x0000u, 0x0000u, 0x0000u, 0xFBDDu}, /* U+0677 ARABIC LETTER U WITH HAMZA ABOVE ISOLATED FORM */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0678 */ + {0xFB68u, 0xFB69u, 0xFB67u, 0xFB66u}, /* U+0679 ARABIC LETTER TTEH */ + {0xFB60u, 0xFB61u, 0xFB5Fu, 0xFB5Eu}, /* U+067A ARABIC LETTER TTEHEH */ + {0xFB54u, 0xFB55u, 0xFB53u, 0xFB52u}, /* U+067B ARABIC LETTER BEEH */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+067C */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+067D */ + {0xFB58u, 0xFB59u, 0xFB57u, 0xFB56u}, /* U+067E ARABIC LETTER PEH */ + {0xFB64u, 0xFB65u, 0xFB63u, 0xFB62u}, /* U+067F ARABIC LETTER TEHEH */ + {0xFB5Cu, 0xFB5Du, 0xFB5Bu, 0xFB5Au}, /* U+0680 ARABIC LETTER BEHEH */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0681 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0682 */ + {0xFB78u, 0xFB79u, 0xFB77u, 0xFB76u}, /* U+0683 ARABIC LETTER NYEH */ + {0xFB74u, 0xFB75u, 0xFB73u, 0xFB72u}, /* U+0684 ARABIC LETTER DYEH */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0685 */ + {0xFB7Cu, 0xFB7Du, 0xFB7Bu, 0xFB7Au}, /* U+0686 ARABIC LETTER TCHEH */ + {0xFB80u, 0xFB81u, 0xFB7Fu, 0xFB7Eu}, /* U+0687 ARABIC LETTER TCHEHEH */ + {0x0000u, 0x0000u, 0xFB89u, 0xFB88u}, /* U+0688 ARABIC LETTER DDAL */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0689 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+068A */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+068B */ + {0x0000u, 0x0000u, 0xFB85u, 0xFB84u}, /* U+068C ARABIC LETTER DAHAL */ + {0x0000u, 0x0000u, 0xFB83u, 0xFB82u}, /* U+068D ARABIC LETTER DDAHAL */ + {0x0000u, 0x0000u, 0xFB87u, 0xFB86u}, /* U+068E ARABIC LETTER DUL */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+068F */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0690 */ + {0x0000u, 0x0000u, 0xFB8Du, 0xFB8Cu}, /* U+0691 ARABIC LETTER RREH */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0692 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0693 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0694 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0695 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0696 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0697 */ + {0x0000u, 0x0000u, 0xFB8Bu, 0xFB8Au}, /* U+0698 ARABIC LETTER JEH */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+0699 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+069A */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+069B */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+069C */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+069D */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+069E */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+069F */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06A0 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06A1 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06A2 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06A3 */ + {0xFB6Cu, 0xFB6Du, 0xFB6Bu, 0xFB6Au}, /* U+06A4 ARABIC LETTER VEH */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06A5 */ + {0xFB70u, 0xFB71u, 0xFB6Fu, 0xFB6Eu}, /* U+06A6 ARABIC LETTER PEHEH */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06A7 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06A8 */ + {0xFB90u, 0xFB91u, 0xFB8Fu, 0xFB8Eu}, /* U+06A9 ARABIC LETTER KEHEH */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06AA */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06AB */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06AC */ + {0xFBD5u, 0xFBD6u, 0xFBD4u, 0xFBD3u}, /* U+06AD ARABIC LETTER NG */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06AE */ + {0xFB94u, 0xFB95u, 0xFB93u, 0xFB92u}, /* U+06AF ARABIC LETTER GAF */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06B0 */ + {0xFB9Cu, 0xFB9Du, 0xFB9Bu, 0xFB9Au}, /* U+06B1 ARABIC LETTER NGOEH */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06B2 */ + {0xFB98u, 0xFB99u, 0xFB97u, 0xFB96u}, /* U+06B3 ARABIC LETTER GUEH */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06B4 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06B5 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06B6 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06B7 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06B8 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06B9 */ + {0x0000u, 0x0000u, 0xFB9Fu, 0xFB9Eu}, /* U+06BA ARABIC LETTER NOON GHUNNA */ + {0xFBA2u, 0xFBA3u, 0xFBA1u, 0xFBA0u}, /* U+06BB ARABIC LETTER RNOON */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06BC */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06BD */ + {0xFBACu, 0xFBADu, 0xFBABu, 0xFBAAu}, /* U+06BE ARABIC LETTER HEH DOACHASHMEE */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06BF */ + {0x0000u, 0x0000u, 0xFBA5u, 0xFBA4u}, /* U+06C0 ARABIC LETTER HEH WITH YEH ABOVE */ + {0xFBA8u, 0xFBA9u, 0xFBA7u, 0xFBA6u}, /* U+06C1 ARABIC LETTER HEH GOAL */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06C2 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06C3 */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06C4 */ + {0x0000u, 0x0000u, 0xFBE1u, 0xFBE0u}, /* U+06C5 ARABIC LETTER KIRGHIZ OE */ + {0x0000u, 0x0000u, 0xFBDAu, 0xFBD9u}, /* U+06C6 ARABIC LETTER OE */ + {0x0000u, 0x0000u, 0xFBD8u, 0xFBD7u}, /* U+06C7 ARABIC LETTER U */ + {0x0000u, 0x0000u, 0xFBDCu, 0xFBDBu}, /* U+06C8 ARABIC LETTER YU */ + {0x0000u, 0x0000u, 0xFBE3u, 0xFBE2u}, /* U+06C9 ARABIC LETTER KIRGHIZ YU */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06CA */ + {0x0000u, 0x0000u, 0xFBDFu, 0xFBDEu}, /* U+06CB ARABIC LETTER VE */ + {0xFBFEu, 0xFBFFu, 0xFBFDu, 0xFBFCu}, /* U+06CC ARABIC LETTER FARSI YEH */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06CD */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06CE */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06CF */ + {0xFBE6u, 0xFBE7u, 0xFBE5u, 0xFBE4u}, /* U+06D0 ARABIC LETTER E */ + {0x0000u, 0x0000u, 0x0000u, 0x0000u}, /* U+06D1 */ + {0x0000u, 0x0000u, 0xFBAFu, 0xFBAEu}, /* U+06D2 ARABIC LETTER YEH BARREE */ + {0x0000u, 0x0000u, 0xFBB1u, 0xFBB0u}, /* U+06D3 ARABIC LETTER YEH BARREE WITH HAMZA ABOVE */ +}; + +#define SHAPING_TABLE_FIRST 0x0621u +#define SHAPING_TABLE_LAST 0x06D3u + + +static const struct ligature_set_t { + uint16_t first; + struct ligature_pairs_t { + uint16_t second; + uint16_t ligature; + } ligatures[4]; +} ligature_table[] = +{ + { 0xFEDFu, { + { 0xFE82u, 0xFEF5u }, /* ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE ISOLATED FORM */ + { 0xFE84u, 0xFEF7u }, /* ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE ISOLATED FORM */ + { 0xFE88u, 0xFEF9u }, /* ARABIC LIGATURE LAM WITH ALEF WITH HAMZA BELOW ISOLATED FORM */ + { 0xFE8Eu, 0xFEFBu }, /* ARABIC LIGATURE LAM WITH ALEF ISOLATED FORM */ + }}, + { 0xFEE0u, { + { 0xFE82u, 0xFEF6u }, /* ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE FINAL FORM */ + { 0xFE84u, 0xFEF8u }, /* ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE FINAL FORM */ + { 0xFE88u, 0xFEFAu }, /* ARABIC LIGATURE LAM WITH ALEF WITH HAMZA BELOW FINAL FORM */ + { 0xFE8Eu, 0xFEFCu }, /* ARABIC LIGATURE LAM WITH ALEF FINAL FORM */ + }}, +}; + + +#endif /* HB_OT_SHAPE_COMPLEX_ARABIC_TABLE_HH */ + +/* == End of generated table == */ diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-complex-arabic-win1256.hh b/thirdparty/harfbuzz/src/hb-ot-shape-complex-arabic-win1256.hh new file mode 100644 index 0000000000..b15e145f2f --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-shape-complex-arabic-win1256.hh @@ -0,0 +1,323 @@ +/* + * Copyright © 2014 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_OT_SHAPE_COMPLEX_ARABIC_WIN1256_HH + + +/* + * The macros in the first part of this file are generic macros that can + * be used to define the bytes for OpenType table data in code in a + * readable manner. We can move the macros to reside with their respective + * struct types, but since we only use these to define one data table, the + * Windows-1256 Arabic shaping table in this file, we keep them here. + */ + + +/* First we measure, then we cut. */ +#ifndef OT_MEASURE +#define OT_MEASURE +#define OT_TABLE_START static const struct TABLE_NAME { +#define OT_TABLE_END } +#define OT_LABEL_START(Name) unsigned char Name[ +#define OT_LABEL_END ]; +#define OT_UINT8(u8) +1/*byte*/ +#define OT_UINT16(u16) +2/*bytes*/ +#else +#undef OT_MEASURE +#define OT_TABLE_START TABLE_NAME = { +#define OT_TABLE_END }; +#define OT_LABEL_START(Name) { +#define OT_LABEL_END }, +#define OT_UINT8(u8) (u8), +#define OT_UINT16(u16) (unsigned char)((u16)>>8), (unsigned char)((u16)&0xFFu), +#define OT_COUNT(Name, ItemSize) ((unsigned int) sizeof(((struct TABLE_NAME*)0)->Name) \ + / (unsigned int)(ItemSize) \ + /* OT_ASSERT it's divisible (and positive). */) +#define OT_DISTANCE(From,To) ((unsigned int) \ + ((char*)(&((struct TABLE_NAME*)0)->To) - \ + (char*)(&((struct TABLE_NAME*)0)->From)) \ + /* OT_ASSERT it's positive. */) +#endif + + +#define OT_LABEL(Name) \ + OT_LABEL_END \ + OT_LABEL_START(Name) + +/* Whenever we receive an argument that is a list, it will expand to + * contain commas. That cannot be passed to another macro because the + * commas will throw off the preprocessor. The solution is to wrap + * the passed-in argument in OT_LIST() before passing to the next macro. + * Unfortunately this trick requires vararg macros. */ +#define OT_LIST(...) __VA_ARGS__ + + +/* + * Basic Types + */ + +#define OT_TAG(a,b,c,d) \ + OT_UINT8(a) OT_UINT8(b) OT_UINT8(c) OT_UINT8(d) + +#define OT_OFFSET(From, To) /* Offset from From to To in bytes */ \ + OT_UINT16(OT_DISTANCE(From, To)) + +#define OT_GLYPHID /* GlyphID */ \ + OT_UINT16 + +#define OT_UARRAY(Name, Items) \ + OT_LABEL_START(Name) \ + OT_UINT16(OT_COUNT(Name##Data, 2)) \ + OT_LABEL(Name##Data) \ + Items \ + OT_LABEL_END + +#define OT_UHEADLESSARRAY(Name, Items) \ + OT_LABEL_START(Name) \ + OT_UINT16(OT_COUNT(Name##Data, 2) + 1) \ + OT_LABEL(Name##Data) \ + Items \ + OT_LABEL_END + + +/* + * Common Types + */ + +#define OT_LOOKUP_FLAG_IGNORE_MARKS 0x08u + +#define OT_LOOKUP(Name, LookupType, LookupFlag, SubLookupOffsets) \ + OT_LABEL_START(Name) \ + OT_UINT16(LookupType) \ + OT_UINT16(LookupFlag) \ + OT_LABEL_END \ + OT_UARRAY(Name##SubLookupOffsetsArray, OT_LIST(SubLookupOffsets)) + +#define OT_SUBLOOKUP(Name, SubFormat, Items) \ + OT_LABEL_START(Name) \ + OT_UINT16(SubFormat) \ + Items + +#define OT_COVERAGE1(Name, Items) \ + OT_LABEL_START(Name) \ + OT_UINT16(1) \ + OT_LABEL_END \ + OT_UARRAY(Name##Glyphs, OT_LIST(Items)) + + +/* + * GSUB + */ + +#define OT_LOOKUP_TYPE_SUBST_SINGLE 1u +#define OT_LOOKUP_TYPE_SUBST_LIGATURE 4u + +#define OT_SUBLOOKUP_SINGLE_SUBST_FORMAT2(Name, FromGlyphs, ToGlyphs) \ + OT_SUBLOOKUP(Name, 2, \ + OT_OFFSET(Name, Name##Coverage) \ + OT_LABEL_END \ + OT_UARRAY(Name##Substitute, OT_LIST(ToGlyphs)) \ + ) \ + OT_COVERAGE1(Name##Coverage, OT_LIST(FromGlyphs)) \ + /* ASSERT_STATIC_EXPR_ZERO (len(FromGlyphs) == len(ToGlyphs)) */ + +#define OT_SUBLOOKUP_LIGATURE_SUBST_FORMAT1(Name, FirstGlyphs, LigatureSetOffsets) \ + OT_SUBLOOKUP(Name, 1, \ + OT_OFFSET(Name, Name##Coverage) \ + OT_LABEL_END \ + OT_UARRAY(Name##LigatureSetOffsetsArray, OT_LIST(LigatureSetOffsets)) \ + ) \ + OT_COVERAGE1(Name##Coverage, OT_LIST(FirstGlyphs)) \ + /* ASSERT_STATIC_EXPR_ZERO (len(FirstGlyphs) == len(LigatureSetOffsets)) */ + +#define OT_LIGATURE_SET(Name, LigatureSetOffsets) \ + OT_UARRAY(Name, OT_LIST(LigatureSetOffsets)) + +#define OT_LIGATURE(Name, Components, LigGlyph) \ + OT_LABEL_START(Name) \ + LigGlyph \ + OT_LABEL_END \ + OT_UHEADLESSARRAY(Name##ComponentsArray, OT_LIST(Components)) + +/* + * + * Start of Windows-1256 shaping table. + * + */ + +/* Table name. */ +#define TABLE_NAME arabic_win1256_gsub_lookups + +/* Table manifest. */ +#define MANIFEST(Items) \ + OT_LABEL_START(manifest) \ + OT_UINT16(OT_COUNT(manifestData, 6)) \ + OT_LABEL(manifestData) \ + Items \ + OT_LABEL_END + +#define MANIFEST_LOOKUP(Tag, Name) \ + Tag \ + OT_OFFSET(manifest, Name) + +/* Shorthand. */ +#define G OT_GLYPHID + +/* + * Table Start + */ +OT_TABLE_START + + +/* + * Manifest + */ +MANIFEST( + MANIFEST_LOOKUP(OT_TAG('r','l','i','g'), rligLookup) + MANIFEST_LOOKUP(OT_TAG('i','n','i','t'), initLookup) + MANIFEST_LOOKUP(OT_TAG('m','e','d','i'), mediLookup) + MANIFEST_LOOKUP(OT_TAG('f','i','n','a'), finaLookup) + MANIFEST_LOOKUP(OT_TAG('r','l','i','g'), rligMarksLookup) +) + +/* + * Lookups + */ +OT_LOOKUP(initLookup, OT_LOOKUP_TYPE_SUBST_SINGLE, OT_LOOKUP_FLAG_IGNORE_MARKS, + OT_OFFSET(initLookup, initmediSubLookup) + OT_OFFSET(initLookup, initSubLookup) +) +OT_LOOKUP(mediLookup, OT_LOOKUP_TYPE_SUBST_SINGLE, OT_LOOKUP_FLAG_IGNORE_MARKS, + OT_OFFSET(mediLookup, initmediSubLookup) + OT_OFFSET(mediLookup, mediSubLookup) + OT_OFFSET(mediLookup, medifinaLamAlefSubLookup) +) +OT_LOOKUP(finaLookup, OT_LOOKUP_TYPE_SUBST_SINGLE, OT_LOOKUP_FLAG_IGNORE_MARKS, + OT_OFFSET(finaLookup, finaSubLookup) + /* We don't need this one currently as the sequence inherits masks + * from the first item. Just in case we change that in the future + * to be smart about Arabic masks when ligating... */ + OT_OFFSET(finaLookup, medifinaLamAlefSubLookup) +) +OT_LOOKUP(rligLookup, OT_LOOKUP_TYPE_SUBST_LIGATURE, OT_LOOKUP_FLAG_IGNORE_MARKS, + OT_OFFSET(rligLookup, lamAlefLigaturesSubLookup) +) +OT_LOOKUP(rligMarksLookup, OT_LOOKUP_TYPE_SUBST_LIGATURE, 0, + OT_OFFSET(rligMarksLookup, shaddaLigaturesSubLookup) +) + +/* + * init/medi/fina forms + */ +OT_SUBLOOKUP_SINGLE_SUBST_FORMAT2(initmediSubLookup, + G(198) G(200) G(201) G(202) G(203) G(204) G(205) G(206) G(211) + G(212) G(213) G(214) G(223) G(225) G(227) G(228) G(236) G(237), + G(162) G(4) G(5) G(5) G(6) G(7) G(9) G(11) G(13) + G(14) G(15) G(26) G(140) G(141) G(142) G(143) G(154) G(154) +) +OT_SUBLOOKUP_SINGLE_SUBST_FORMAT2(initSubLookup, + G(218) G(219) G(221) G(222) G(229), + G(27) G(30) G(128) G(131) G(144) +) +OT_SUBLOOKUP_SINGLE_SUBST_FORMAT2(mediSubLookup, + G(218) G(219) G(221) G(222) G(229), + G(28) G(31) G(129) G(138) G(149) +) +OT_SUBLOOKUP_SINGLE_SUBST_FORMAT2(finaSubLookup, + G(194) G(195) G(197) G(198) G(199) G(201) G(204) G(205) G(206) + G(218) G(219) G(229) G(236) G(237), + G(2) G(1) G(3) G(181) G(0) G(159) G(8) G(10) G(12) + G(29) G(127) G(152) G(160) G(156) +) +OT_SUBLOOKUP_SINGLE_SUBST_FORMAT2(medifinaLamAlefSubLookup, + G(165) G(178) G(180) G(252), + G(170) G(179) G(185) G(255) +) + +/* + * Lam+Alef ligatures + */ +OT_SUBLOOKUP_LIGATURE_SUBST_FORMAT1(lamAlefLigaturesSubLookup, + G(225), + OT_OFFSET(lamAlefLigaturesSubLookup, lamLigatureSet) +) +OT_LIGATURE_SET(lamLigatureSet, + OT_OFFSET(lamLigatureSet, lamInitLigature1) + OT_OFFSET(lamLigatureSet, lamInitLigature2) + OT_OFFSET(lamLigatureSet, lamInitLigature3) + OT_OFFSET(lamLigatureSet, lamInitLigature4) +) +OT_LIGATURE(lamInitLigature1, G(199), G(165)) +OT_LIGATURE(lamInitLigature2, G(195), G(178)) +OT_LIGATURE(lamInitLigature3, G(194), G(180)) +OT_LIGATURE(lamInitLigature4, G(197), G(252)) + +/* + * Shadda ligatures + */ +OT_SUBLOOKUP_LIGATURE_SUBST_FORMAT1(shaddaLigaturesSubLookup, + G(248), + OT_OFFSET(shaddaLigaturesSubLookup, shaddaLigatureSet) +) +OT_LIGATURE_SET(shaddaLigatureSet, + OT_OFFSET(shaddaLigatureSet, shaddaLigature1) + OT_OFFSET(shaddaLigatureSet, shaddaLigature2) + OT_OFFSET(shaddaLigatureSet, shaddaLigature3) +) +OT_LIGATURE(shaddaLigature1, G(243), G(172)) +OT_LIGATURE(shaddaLigature2, G(245), G(173)) +OT_LIGATURE(shaddaLigature3, G(246), G(175)) + +/* + * Table end + */ +OT_TABLE_END + + +/* + * Clean up + */ +#undef OT_TABLE_START +#undef OT_TABLE_END +#undef OT_LABEL_START +#undef OT_LABEL_END +#undef OT_UINT8 +#undef OT_UINT16 +#undef OT_DISTANCE +#undef OT_COUNT + +/* + * Include a second time to get the table data... + */ +#if 0 +#include "hb.hh" /* Make check-includes.sh happy. */ +#endif +#ifdef OT_MEASURE +#include "hb-ot-shape-complex-arabic-win1256.hh" +#endif + +#define HB_OT_SHAPE_COMPLEX_ARABIC_WIN1256_HH +#endif /* HB_OT_SHAPE_COMPLEX_ARABIC_WIN1256_HH */ diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-complex-arabic.cc b/thirdparty/harfbuzz/src/hb-ot-shape-complex-arabic.cc new file mode 100644 index 0000000000..1e93f0efd5 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-shape-complex-arabic.cc @@ -0,0 +1,716 @@ +/* + * Copyright © 2010,2012 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#include "hb.hh" + +#ifndef HB_NO_OT_SHAPE + +#include "hb-ot-shape-complex-arabic.hh" +#include "hb-ot-shape.hh" + + +/* buffer var allocations */ +#define arabic_shaping_action() complex_var_u8_0() /* arabic shaping action */ + +#define HB_BUFFER_SCRATCH_FLAG_ARABIC_HAS_STCH HB_BUFFER_SCRATCH_FLAG_COMPLEX0 + +/* See: + * https://github.com/harfbuzz/harfbuzz/commit/6e6f82b6f3dde0fc6c3c7d991d9ec6cfff57823d#commitcomment-14248516 */ +#define HB_ARABIC_GENERAL_CATEGORY_IS_WORD(gen_cat) \ + (FLAG_UNSAFE (gen_cat) & \ + (FLAG (HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED) | \ + FLAG (HB_UNICODE_GENERAL_CATEGORY_PRIVATE_USE) | \ + /*FLAG (HB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER) |*/ \ + FLAG (HB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER) | \ + FLAG (HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER) | \ + /*FLAG (HB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER) |*/ \ + /*FLAG (HB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER) |*/ \ + FLAG (HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK) | \ + FLAG (HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK) | \ + FLAG (HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK) | \ + FLAG (HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER) | \ + FLAG (HB_UNICODE_GENERAL_CATEGORY_LETTER_NUMBER) | \ + FLAG (HB_UNICODE_GENERAL_CATEGORY_OTHER_NUMBER) | \ + FLAG (HB_UNICODE_GENERAL_CATEGORY_CURRENCY_SYMBOL) | \ + FLAG (HB_UNICODE_GENERAL_CATEGORY_MODIFIER_SYMBOL) | \ + FLAG (HB_UNICODE_GENERAL_CATEGORY_MATH_SYMBOL) | \ + FLAG (HB_UNICODE_GENERAL_CATEGORY_OTHER_SYMBOL))) + + +/* + * Joining types: + */ + +/* + * Bits used in the joining tables + */ +enum hb_arabic_joining_type_t { + JOINING_TYPE_U = 0, + JOINING_TYPE_L = 1, + JOINING_TYPE_R = 2, + JOINING_TYPE_D = 3, + JOINING_TYPE_C = JOINING_TYPE_D, + JOINING_GROUP_ALAPH = 4, + JOINING_GROUP_DALATH_RISH = 5, + NUM_STATE_MACHINE_COLS = 6, + + JOINING_TYPE_T = 7, + JOINING_TYPE_X = 8 /* means: use general-category to choose between U or T. */ +}; + +#include "hb-ot-shape-complex-arabic-table.hh" + +static unsigned int get_joining_type (hb_codepoint_t u, hb_unicode_general_category_t gen_cat) +{ + unsigned int j_type = joining_type(u); + if (likely (j_type != JOINING_TYPE_X)) + return j_type; + + return (FLAG_UNSAFE(gen_cat) & + (FLAG(HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK) | + FLAG(HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK) | + FLAG(HB_UNICODE_GENERAL_CATEGORY_FORMAT)) + ) ? JOINING_TYPE_T : JOINING_TYPE_U; +} + +#define FEATURE_IS_SYRIAC(tag) hb_in_range<unsigned char> ((unsigned char) (tag), '2', '3') + +static const hb_tag_t arabic_features[] = +{ + HB_TAG('i','s','o','l'), + HB_TAG('f','i','n','a'), + HB_TAG('f','i','n','2'), + HB_TAG('f','i','n','3'), + HB_TAG('m','e','d','i'), + HB_TAG('m','e','d','2'), + HB_TAG('i','n','i','t'), + HB_TAG_NONE +}; + + +/* Same order as the feature array */ +enum arabic_action_t { + ISOL, + FINA, + FIN2, + FIN3, + MEDI, + MED2, + INIT, + + NONE, + + ARABIC_NUM_FEATURES = NONE, + + /* We abuse the same byte for other things... */ + STCH_FIXED, + STCH_REPEATING, +}; + +static const struct arabic_state_table_entry { + uint8_t prev_action; + uint8_t curr_action; + uint16_t next_state; +} arabic_state_table[][NUM_STATE_MACHINE_COLS] = +{ + /* jt_U, jt_L, jt_R, jt_D, jg_ALAPH, jg_DALATH_RISH */ + + /* State 0: prev was U, not willing to join. */ + { {NONE,NONE,0}, {NONE,ISOL,2}, {NONE,ISOL,1}, {NONE,ISOL,2}, {NONE,ISOL,1}, {NONE,ISOL,6}, }, + + /* State 1: prev was R or ISOL/ALAPH, not willing to join. */ + { {NONE,NONE,0}, {NONE,ISOL,2}, {NONE,ISOL,1}, {NONE,ISOL,2}, {NONE,FIN2,5}, {NONE,ISOL,6}, }, + + /* State 2: prev was D/L in ISOL form, willing to join. */ + { {NONE,NONE,0}, {NONE,ISOL,2}, {INIT,FINA,1}, {INIT,FINA,3}, {INIT,FINA,4}, {INIT,FINA,6}, }, + + /* State 3: prev was D in FINA form, willing to join. */ + { {NONE,NONE,0}, {NONE,ISOL,2}, {MEDI,FINA,1}, {MEDI,FINA,3}, {MEDI,FINA,4}, {MEDI,FINA,6}, }, + + /* State 4: prev was FINA ALAPH, not willing to join. */ + { {NONE,NONE,0}, {NONE,ISOL,2}, {MED2,ISOL,1}, {MED2,ISOL,2}, {MED2,FIN2,5}, {MED2,ISOL,6}, }, + + /* State 5: prev was FIN2/FIN3 ALAPH, not willing to join. */ + { {NONE,NONE,0}, {NONE,ISOL,2}, {ISOL,ISOL,1}, {ISOL,ISOL,2}, {ISOL,FIN2,5}, {ISOL,ISOL,6}, }, + + /* State 6: prev was DALATH/RISH, not willing to join. */ + { {NONE,NONE,0}, {NONE,ISOL,2}, {NONE,ISOL,1}, {NONE,ISOL,2}, {NONE,FIN3,5}, {NONE,ISOL,6}, } +}; + + +static void +arabic_fallback_shape (const hb_ot_shape_plan_t *plan, + hb_font_t *font, + hb_buffer_t *buffer); + +static void +record_stch (const hb_ot_shape_plan_t *plan, + hb_font_t *font, + hb_buffer_t *buffer); + +static void +collect_features_arabic (hb_ot_shape_planner_t *plan) +{ + hb_ot_map_builder_t *map = &plan->map; + + /* We apply features according to the Arabic spec, with pauses + * in between most. + * + * The pause between init/medi/... and rlig is required. See eg: + * https://bugzilla.mozilla.org/show_bug.cgi?id=644184 + * + * The pauses between init/medi/... themselves are not necessarily + * needed as only one of those features is applied to any character. + * The only difference it makes is when fonts have contextual + * substitutions. We now follow the order of the spec, which makes + * for better experience if that's what Uniscribe is doing. + * + * At least for Arabic, looks like Uniscribe has a pause between + * rlig and calt. Otherwise the IranNastaliq's ALLAH ligature won't + * work. However, testing shows that rlig and calt are applied + * together for Mongolian in Uniscribe. As such, we only add a + * pause for Arabic, not other scripts. + * + * A pause after calt is required to make KFGQPC Uthmanic Script HAFS + * work correctly. See https://github.com/harfbuzz/harfbuzz/issues/505 + */ + + + map->enable_feature (HB_TAG('s','t','c','h')); + map->add_gsub_pause (record_stch); + + map->enable_feature (HB_TAG('c','c','m','p')); + map->enable_feature (HB_TAG('l','o','c','l')); + + map->add_gsub_pause (nullptr); + + for (unsigned int i = 0; i < ARABIC_NUM_FEATURES; i++) + { + bool has_fallback = plan->props.script == HB_SCRIPT_ARABIC && !FEATURE_IS_SYRIAC (arabic_features[i]); + map->add_feature (arabic_features[i], has_fallback ? F_HAS_FALLBACK : F_NONE); + map->add_gsub_pause (nullptr); + } + + /* Normally, Unicode says a ZWNJ means "don't ligate". In Arabic script + * however, it says a ZWJ should also mean "don't ligate". So we run + * the main ligating features as MANUAL_ZWJ. */ + + map->enable_feature (HB_TAG('r','l','i','g'), F_MANUAL_ZWJ | F_HAS_FALLBACK); + + if (plan->props.script == HB_SCRIPT_ARABIC) + map->add_gsub_pause (arabic_fallback_shape); + + /* No pause after rclt. See 98460779bae19e4d64d29461ff154b3527bf8420. */ + map->enable_feature (HB_TAG('r','c','l','t'), F_MANUAL_ZWJ); + map->enable_feature (HB_TAG('c','a','l','t'), F_MANUAL_ZWJ); + map->add_gsub_pause (nullptr); + + /* The spec includes 'cswh'. Earlier versions of Windows + * used to enable this by default, but testing suggests + * that Windows 8 and later do not enable it by default, + * and spec now says 'Off by default'. + * We disabled this in ae23c24c32. + * Note that IranNastaliq uses this feature extensively + * to fixup broken glyph sequences. Oh well... + * Test case: U+0643,U+0640,U+0631. */ + //map->enable_feature (HB_TAG('c','s','w','h')); + map->enable_feature (HB_TAG('m','s','e','t')); +} + +#include "hb-ot-shape-complex-arabic-fallback.hh" + +struct arabic_shape_plan_t +{ + /* The "+ 1" in the next array is to accommodate for the "NONE" command, + * which is not an OpenType feature, but this simplifies the code by not + * having to do a "if (... < NONE) ..." and just rely on the fact that + * mask_array[NONE] == 0. */ + hb_mask_t mask_array[ARABIC_NUM_FEATURES + 1]; + + hb_atomic_ptr_t<arabic_fallback_plan_t> fallback_plan; + + unsigned int do_fallback : 1; + unsigned int has_stch : 1; +}; + +void * +data_create_arabic (const hb_ot_shape_plan_t *plan) +{ + arabic_shape_plan_t *arabic_plan = (arabic_shape_plan_t *) calloc (1, sizeof (arabic_shape_plan_t)); + if (unlikely (!arabic_plan)) + return nullptr; + + arabic_plan->do_fallback = plan->props.script == HB_SCRIPT_ARABIC; + arabic_plan->has_stch = !!plan->map.get_1_mask (HB_TAG ('s','t','c','h')); + for (unsigned int i = 0; i < ARABIC_NUM_FEATURES; i++) { + arabic_plan->mask_array[i] = plan->map.get_1_mask (arabic_features[i]); + arabic_plan->do_fallback = arabic_plan->do_fallback && + (FEATURE_IS_SYRIAC (arabic_features[i]) || + plan->map.needs_fallback (arabic_features[i])); + } + + return arabic_plan; +} + +void +data_destroy_arabic (void *data) +{ + arabic_shape_plan_t *arabic_plan = (arabic_shape_plan_t *) data; + + arabic_fallback_plan_destroy (arabic_plan->fallback_plan); + + free (data); +} + +static void +arabic_joining (hb_buffer_t *buffer) +{ + unsigned int count = buffer->len; + hb_glyph_info_t *info = buffer->info; + unsigned int prev = UINT_MAX, state = 0; + + /* Check pre-context */ + for (unsigned int i = 0; i < buffer->context_len[0]; i++) + { + unsigned int this_type = get_joining_type (buffer->context[0][i], buffer->unicode->general_category (buffer->context[0][i])); + + if (unlikely (this_type == JOINING_TYPE_T)) + continue; + + const arabic_state_table_entry *entry = &arabic_state_table[state][this_type]; + state = entry->next_state; + break; + } + + for (unsigned int i = 0; i < count; i++) + { + unsigned int this_type = get_joining_type (info[i].codepoint, _hb_glyph_info_get_general_category (&info[i])); + + if (unlikely (this_type == JOINING_TYPE_T)) { + info[i].arabic_shaping_action() = NONE; + continue; + } + + const arabic_state_table_entry *entry = &arabic_state_table[state][this_type]; + + if (entry->prev_action != NONE && prev != UINT_MAX) + { + info[prev].arabic_shaping_action() = entry->prev_action; + buffer->unsafe_to_break (prev, i + 1); + } + + info[i].arabic_shaping_action() = entry->curr_action; + + prev = i; + state = entry->next_state; + } + + for (unsigned int i = 0; i < buffer->context_len[1]; i++) + { + unsigned int this_type = get_joining_type (buffer->context[1][i], buffer->unicode->general_category (buffer->context[1][i])); + + if (unlikely (this_type == JOINING_TYPE_T)) + continue; + + const arabic_state_table_entry *entry = &arabic_state_table[state][this_type]; + if (entry->prev_action != NONE && prev != UINT_MAX) + info[prev].arabic_shaping_action() = entry->prev_action; + break; + } +} + +static void +mongolian_variation_selectors (hb_buffer_t *buffer) +{ + /* Copy arabic_shaping_action() from base to Mongolian variation selectors. */ + unsigned int count = buffer->len; + hb_glyph_info_t *info = buffer->info; + for (unsigned int i = 1; i < count; i++) + if (unlikely (hb_in_range<hb_codepoint_t> (info[i].codepoint, 0x180Bu, 0x180Du))) + info[i].arabic_shaping_action() = info[i - 1].arabic_shaping_action(); +} + +void +setup_masks_arabic_plan (const arabic_shape_plan_t *arabic_plan, + hb_buffer_t *buffer, + hb_script_t script) +{ + HB_BUFFER_ALLOCATE_VAR (buffer, arabic_shaping_action); + + arabic_joining (buffer); + if (script == HB_SCRIPT_MONGOLIAN) + mongolian_variation_selectors (buffer); + + unsigned int count = buffer->len; + hb_glyph_info_t *info = buffer->info; + for (unsigned int i = 0; i < count; i++) + info[i].mask |= arabic_plan->mask_array[info[i].arabic_shaping_action()]; +} + +static void +setup_masks_arabic (const hb_ot_shape_plan_t *plan, + hb_buffer_t *buffer, + hb_font_t *font HB_UNUSED) +{ + const arabic_shape_plan_t *arabic_plan = (const arabic_shape_plan_t *) plan->data; + setup_masks_arabic_plan (arabic_plan, buffer, plan->props.script); +} + +static void +arabic_fallback_shape (const hb_ot_shape_plan_t *plan, + hb_font_t *font, + hb_buffer_t *buffer) +{ +#ifdef HB_NO_OT_SHAPE_COMPLEX_ARABIC_FALLBACK + return; +#endif + + const arabic_shape_plan_t *arabic_plan = (const arabic_shape_plan_t *) plan->data; + + if (!arabic_plan->do_fallback) + return; + +retry: + arabic_fallback_plan_t *fallback_plan = arabic_plan->fallback_plan; + if (unlikely (!fallback_plan)) + { + /* This sucks. We need a font to build the fallback plan... */ + fallback_plan = arabic_fallback_plan_create (plan, font); + if (unlikely (!arabic_plan->fallback_plan.cmpexch (nullptr, fallback_plan))) + { + arabic_fallback_plan_destroy (fallback_plan); + goto retry; + } + } + + arabic_fallback_plan_shape (fallback_plan, font, buffer); +} + +/* + * Stretch feature: "stch". + * See example here: + * https://docs.microsoft.com/en-us/typography/script-development/syriac + * We implement this in a generic way, such that the Arabic subtending + * marks can use it as well. + */ + +static void +record_stch (const hb_ot_shape_plan_t *plan, + hb_font_t *font HB_UNUSED, + hb_buffer_t *buffer) +{ + const arabic_shape_plan_t *arabic_plan = (const arabic_shape_plan_t *) plan->data; + if (!arabic_plan->has_stch) + return; + + /* 'stch' feature was just applied. Look for anything that multiplied, + * and record it for stch treatment later. Note that rtlm, frac, etc + * are applied before stch, but we assume that they didn't result in + * anything multiplying into 5 pieces, so it's safe-ish... */ + + unsigned int count = buffer->len; + hb_glyph_info_t *info = buffer->info; + for (unsigned int i = 0; i < count; i++) + if (unlikely (_hb_glyph_info_multiplied (&info[i]))) + { + unsigned int comp = _hb_glyph_info_get_lig_comp (&info[i]); + info[i].arabic_shaping_action() = comp % 2 ? STCH_REPEATING : STCH_FIXED; + buffer->scratch_flags |= HB_BUFFER_SCRATCH_FLAG_ARABIC_HAS_STCH; + } +} + +static void +apply_stch (const hb_ot_shape_plan_t *plan HB_UNUSED, + hb_buffer_t *buffer, + hb_font_t *font) +{ + if (likely (!(buffer->scratch_flags & HB_BUFFER_SCRATCH_FLAG_ARABIC_HAS_STCH))) + return; + + /* The Arabic shaper currently always processes in RTL mode, so we should + * stretch / position the stretched pieces to the left / preceding glyphs. */ + + /* We do a two pass implementation: + * First pass calculates the exact number of extra glyphs we need, + * We then enlarge buffer to have that much room, + * Second pass applies the stretch, copying things to the end of buffer. + */ + + int sign = font->x_scale < 0 ? -1 : +1; + unsigned int extra_glyphs_needed = 0; // Set during MEASURE, used during CUT + enum { MEASURE, CUT } /* step_t */; + + for (unsigned int step = MEASURE; step <= CUT; step = step + 1) + { + unsigned int count = buffer->len; + hb_glyph_info_t *info = buffer->info; + hb_glyph_position_t *pos = buffer->pos; + unsigned int new_len = count + extra_glyphs_needed; // write head during CUT + unsigned int j = new_len; + for (unsigned int i = count; i; i--) + { + if (!hb_in_range<uint8_t> (info[i - 1].arabic_shaping_action(), STCH_FIXED, STCH_REPEATING)) + { + if (step == CUT) + { + --j; + info[j] = info[i - 1]; + pos[j] = pos[i - 1]; + } + continue; + } + + /* Yay, justification! */ + + hb_position_t w_total = 0; // Total to be filled + hb_position_t w_fixed = 0; // Sum of fixed tiles + hb_position_t w_repeating = 0; // Sum of repeating tiles + int n_fixed = 0; + int n_repeating = 0; + + unsigned int end = i; + while (i && + hb_in_range<uint8_t> (info[i - 1].arabic_shaping_action(), STCH_FIXED, STCH_REPEATING)) + { + i--; + hb_position_t width = font->get_glyph_h_advance (info[i].codepoint); + if (info[i].arabic_shaping_action() == STCH_FIXED) + { + w_fixed += width; + n_fixed++; + } + else + { + w_repeating += width; + n_repeating++; + } + } + unsigned int start = i; + unsigned int context = i; + while (context && + !hb_in_range<uint8_t> (info[context - 1].arabic_shaping_action(), STCH_FIXED, STCH_REPEATING) && + (_hb_glyph_info_is_default_ignorable (&info[context - 1]) || + HB_ARABIC_GENERAL_CATEGORY_IS_WORD (_hb_glyph_info_get_general_category (&info[context - 1])))) + { + context--; + w_total += pos[context].x_advance; + } + i++; // Don't touch i again. + + DEBUG_MSG (ARABIC, nullptr, "%s stretch at (%d,%d,%d)", + step == MEASURE ? "measuring" : "cutting", context, start, end); + DEBUG_MSG (ARABIC, nullptr, "rest of word: count=%d width %d", start - context, w_total); + DEBUG_MSG (ARABIC, nullptr, "fixed tiles: count=%d width=%d", n_fixed, w_fixed); + DEBUG_MSG (ARABIC, nullptr, "repeating tiles: count=%d width=%d", n_repeating, w_repeating); + + /* Number of additional times to repeat each repeating tile. */ + int n_copies = 0; + + hb_position_t w_remaining = w_total - w_fixed; + if (sign * w_remaining > sign * w_repeating && sign * w_repeating > 0) + n_copies = (sign * w_remaining) / (sign * w_repeating) - 1; + + /* See if we can improve the fit by adding an extra repeat and squeezing them together a bit. */ + hb_position_t extra_repeat_overlap = 0; + hb_position_t shortfall = sign * w_remaining - sign * w_repeating * (n_copies + 1); + if (shortfall > 0 && n_repeating > 0) + { + ++n_copies; + hb_position_t excess = (n_copies + 1) * sign * w_repeating - sign * w_remaining; + if (excess > 0) + extra_repeat_overlap = excess / (n_copies * n_repeating); + } + + if (step == MEASURE) + { + extra_glyphs_needed += n_copies * n_repeating; + DEBUG_MSG (ARABIC, nullptr, "will add extra %d copies of repeating tiles", n_copies); + } + else + { + buffer->unsafe_to_break (context, end); + hb_position_t x_offset = 0; + for (unsigned int k = end; k > start; k--) + { + hb_position_t width = font->get_glyph_h_advance (info[k - 1].codepoint); + + unsigned int repeat = 1; + if (info[k - 1].arabic_shaping_action() == STCH_REPEATING) + repeat += n_copies; + + DEBUG_MSG (ARABIC, nullptr, "appending %d copies of glyph %d; j=%d", + repeat, info[k - 1].codepoint, j); + for (unsigned int n = 0; n < repeat; n++) + { + x_offset -= width; + if (n > 0) + x_offset += extra_repeat_overlap; + pos[k - 1].x_offset = x_offset; + /* Append copy. */ + --j; + info[j] = info[k - 1]; + pos[j] = pos[k - 1]; + } + } + } + } + + if (step == MEASURE) + { + if (unlikely (!buffer->ensure (count + extra_glyphs_needed))) + break; + } + else + { + assert (j == 0); + buffer->len = new_len; + } + } +} + + +static void +postprocess_glyphs_arabic (const hb_ot_shape_plan_t *plan, + hb_buffer_t *buffer, + hb_font_t *font) +{ + apply_stch (plan, buffer, font); + + HB_BUFFER_DEALLOCATE_VAR (buffer, arabic_shaping_action); +} + +/* https://www.unicode.org/reports/tr53/ */ + +static hb_codepoint_t +modifier_combining_marks[] = +{ + 0x0654u, /* ARABIC HAMZA ABOVE */ + 0x0655u, /* ARABIC HAMZA BELOW */ + 0x0658u, /* ARABIC MARK NOON GHUNNA */ + 0x06DCu, /* ARABIC SMALL HIGH SEEN */ + 0x06E3u, /* ARABIC SMALL LOW SEEN */ + 0x06E7u, /* ARABIC SMALL HIGH YEH */ + 0x06E8u, /* ARABIC SMALL HIGH NOON */ + 0x08D3u, /* ARABIC SMALL LOW WAW */ + 0x08F3u, /* ARABIC SMALL HIGH WAW */ +}; + +static inline bool +info_is_mcm (const hb_glyph_info_t &info) +{ + hb_codepoint_t u = info.codepoint; + for (unsigned int i = 0; i < ARRAY_LENGTH (modifier_combining_marks); i++) + if (u == modifier_combining_marks[i]) + return true; + return false; +} + +static void +reorder_marks_arabic (const hb_ot_shape_plan_t *plan HB_UNUSED, + hb_buffer_t *buffer, + unsigned int start, + unsigned int end) +{ + hb_glyph_info_t *info = buffer->info; + + DEBUG_MSG (ARABIC, buffer, "Reordering marks from %d to %d", start, end); + + unsigned int i = start; + for (unsigned int cc = 220; cc <= 230; cc += 10) + { + DEBUG_MSG (ARABIC, buffer, "Looking for %d's starting at %d", cc, i); + while (i < end && info_cc(info[i]) < cc) + i++; + DEBUG_MSG (ARABIC, buffer, "Looking for %d's stopped at %d", cc, i); + + if (i == end) + break; + + if (info_cc(info[i]) > cc) + continue; + + unsigned int j = i; + while (j < end && info_cc(info[j]) == cc && info_is_mcm (info[j])) + j++; + + if (i == j) + continue; + + DEBUG_MSG (ARABIC, buffer, "Found %d's from %d to %d", cc, i, j); + + /* Shift it! */ + DEBUG_MSG (ARABIC, buffer, "Shifting %d's: %d %d", cc, i, j); + hb_glyph_info_t temp[HB_OT_SHAPE_COMPLEX_MAX_COMBINING_MARKS]; + assert (j - i <= ARRAY_LENGTH (temp)); + buffer->merge_clusters (start, j); + memmove (temp, &info[i], (j - i) * sizeof (hb_glyph_info_t)); + memmove (&info[start + j - i], &info[start], (i - start) * sizeof (hb_glyph_info_t)); + memmove (&info[start], temp, (j - i) * sizeof (hb_glyph_info_t)); + + /* Renumber CC such that the reordered sequence is still sorted. + * 22 and 26 are chosen because they are smaller than all Arabic categories, + * and are folded back to 220/230 respectively during fallback mark positioning. + * + * We do this because the CGJ-handling logic in the normalizer relies on + * mark sequences having an increasing order even after this reordering. + * https://github.com/harfbuzz/harfbuzz/issues/554 + * This, however, does break some obscure sequences, where the normalizer + * might compose a sequence that it should not. For example, in the seequence + * ALEF, HAMZAH, MADDAH, we should NOT try to compose ALEF+MADDAH, but with this + * renumbering, we will. + */ + unsigned int new_start = start + j - i; + unsigned int new_cc = cc == 220 ? HB_MODIFIED_COMBINING_CLASS_CCC22 : HB_MODIFIED_COMBINING_CLASS_CCC26; + while (start < new_start) + { + _hb_glyph_info_set_modified_combining_class (&info[start], new_cc); + start++; + } + + i = j; + } +} + +const hb_ot_complex_shaper_t _hb_ot_complex_shaper_arabic = +{ + collect_features_arabic, + nullptr, /* override_features */ + data_create_arabic, + data_destroy_arabic, + nullptr, /* preprocess_text */ + postprocess_glyphs_arabic, + HB_OT_SHAPE_NORMALIZATION_MODE_DEFAULT, + nullptr, /* decompose */ + nullptr, /* compose */ + setup_masks_arabic, + HB_TAG_NONE, /* gpos_tag */ + reorder_marks_arabic, + HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_LATE, + true, /* fallback_position */ +}; + + +#endif diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-complex-arabic.hh b/thirdparty/harfbuzz/src/hb-ot-shape-complex-arabic.hh new file mode 100644 index 0000000000..5bf6ff6338 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-shape-complex-arabic.hh @@ -0,0 +1,50 @@ +/* + * Copyright © 2015 Mozilla Foundation. + * Copyright © 2015 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Mozilla Author(s): Jonathan Kew + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_OT_SHAPE_COMPLEX_ARABIC_HH +#define HB_OT_SHAPE_COMPLEX_ARABIC_HH + +#include "hb.hh" + +#include "hb-ot-shape-complex.hh" + + +struct arabic_shape_plan_t; + +HB_INTERNAL void * +data_create_arabic (const hb_ot_shape_plan_t *plan); + +HB_INTERNAL void +data_destroy_arabic (void *data); + +HB_INTERNAL void +setup_masks_arabic_plan (const arabic_shape_plan_t *arabic_plan, + hb_buffer_t *buffer, + hb_script_t script); + +#endif /* HB_OT_SHAPE_COMPLEX_ARABIC_HH */ diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-complex-default.cc b/thirdparty/harfbuzz/src/hb-ot-shape-complex-default.cc new file mode 100644 index 0000000000..a755aea098 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-shape-complex-default.cc @@ -0,0 +1,73 @@ +/* + * Copyright © 2010,2012 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#include "hb.hh" + +#ifndef HB_NO_OT_SHAPE + +#include "hb-ot-shape-complex.hh" + + +const hb_ot_complex_shaper_t _hb_ot_complex_shaper_default = +{ + nullptr, /* collect_features */ + nullptr, /* override_features */ + nullptr, /* data_create */ + nullptr, /* data_destroy */ + nullptr, /* preprocess_text */ + nullptr, /* postprocess_glyphs */ + HB_OT_SHAPE_NORMALIZATION_MODE_DEFAULT, + nullptr, /* decompose */ + nullptr, /* compose */ + nullptr, /* setup_masks */ + HB_TAG_NONE, /* gpos_tag */ + nullptr, /* reorder_marks */ + HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_LATE, + true, /* fallback_position */ +}; + +/* Same as default but no mark advance zeroing / fallback positioning. + * Dumbest shaper ever, basically. */ +const hb_ot_complex_shaper_t _hb_ot_complex_shaper_dumber = +{ + nullptr, /* collect_features */ + nullptr, /* override_features */ + nullptr, /* data_create */ + nullptr, /* data_destroy */ + nullptr, /* preprocess_text */ + nullptr, /* postprocess_glyphs */ + HB_OT_SHAPE_NORMALIZATION_MODE_DEFAULT, + nullptr, /* decompose */ + nullptr, /* compose */ + nullptr, /* setup_masks */ + HB_TAG_NONE, /* gpos_tag */ + nullptr, /* reorder_marks */ + HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE, + false, /* fallback_position */ +}; + + +#endif diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-complex-hangul.cc b/thirdparty/harfbuzz/src/hb-ot-shape-complex-hangul.cc new file mode 100644 index 0000000000..f5915f43ae --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-shape-complex-hangul.cc @@ -0,0 +1,439 @@ +/* + * Copyright © 2013 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#include "hb.hh" + +#ifndef HB_NO_OT_SHAPE + +#include "hb-ot-shape-complex.hh" + + +/* Hangul shaper */ + + +/* Same order as the feature array below */ +enum { + _JMO, + + LJMO, + VJMO, + TJMO, + + FIRST_HANGUL_FEATURE = LJMO, + HANGUL_FEATURE_COUNT = TJMO + 1 +}; + +static const hb_tag_t hangul_features[HANGUL_FEATURE_COUNT] = +{ + HB_TAG_NONE, + HB_TAG('l','j','m','o'), + HB_TAG('v','j','m','o'), + HB_TAG('t','j','m','o') +}; + +static void +collect_features_hangul (hb_ot_shape_planner_t *plan) +{ + hb_ot_map_builder_t *map = &plan->map; + + for (unsigned int i = FIRST_HANGUL_FEATURE; i < HANGUL_FEATURE_COUNT; i++) + map->add_feature (hangul_features[i]); +} + +static void +override_features_hangul (hb_ot_shape_planner_t *plan) +{ + /* Uniscribe does not apply 'calt' for Hangul, and certain fonts + * (Noto Sans CJK, Source Sans Han, etc) apply all of jamo lookups + * in calt, which is not desirable. */ + plan->map.disable_feature (HB_TAG('c','a','l','t')); +} + +struct hangul_shape_plan_t +{ + hb_mask_t mask_array[HANGUL_FEATURE_COUNT]; +}; + +static void * +data_create_hangul (const hb_ot_shape_plan_t *plan) +{ + hangul_shape_plan_t *hangul_plan = (hangul_shape_plan_t *) calloc (1, sizeof (hangul_shape_plan_t)); + if (unlikely (!hangul_plan)) + return nullptr; + + for (unsigned int i = 0; i < HANGUL_FEATURE_COUNT; i++) + hangul_plan->mask_array[i] = plan->map.get_1_mask (hangul_features[i]); + + return hangul_plan; +} + +static void +data_destroy_hangul (void *data) +{ + free (data); +} + +/* Constants for algorithmic hangul syllable [de]composition. */ +#define LBase 0x1100u +#define VBase 0x1161u +#define TBase 0x11A7u +#define LCount 19u +#define VCount 21u +#define TCount 28u +#define SBase 0xAC00u +#define NCount (VCount * TCount) +#define SCount (LCount * NCount) + +#define isCombiningL(u) (hb_in_range<hb_codepoint_t> ((u), LBase, LBase+LCount-1)) +#define isCombiningV(u) (hb_in_range<hb_codepoint_t> ((u), VBase, VBase+VCount-1)) +#define isCombiningT(u) (hb_in_range<hb_codepoint_t> ((u), TBase+1, TBase+TCount-1)) +#define isCombinedS(u) (hb_in_range<hb_codepoint_t> ((u), SBase, SBase+SCount-1)) + +#define isL(u) (hb_in_ranges<hb_codepoint_t> ((u), 0x1100u, 0x115Fu, 0xA960u, 0xA97Cu)) +#define isV(u) (hb_in_ranges<hb_codepoint_t> ((u), 0x1160u, 0x11A7u, 0xD7B0u, 0xD7C6u)) +#define isT(u) (hb_in_ranges<hb_codepoint_t> ((u), 0x11A8u, 0x11FFu, 0xD7CBu, 0xD7FBu)) + +#define isHangulTone(u) (hb_in_range<hb_codepoint_t> ((u), 0x302Eu, 0x302Fu)) + +/* buffer var allocations */ +#define hangul_shaping_feature() complex_var_u8_0() /* hangul jamo shaping feature */ + +static bool +is_zero_width_char (hb_font_t *font, + hb_codepoint_t unicode) +{ + hb_codepoint_t glyph; + return hb_font_get_glyph (font, unicode, 0, &glyph) && hb_font_get_glyph_h_advance (font, glyph) == 0; +} + +static void +preprocess_text_hangul (const hb_ot_shape_plan_t *plan HB_UNUSED, + hb_buffer_t *buffer, + hb_font_t *font) +{ + HB_BUFFER_ALLOCATE_VAR (buffer, hangul_shaping_feature); + + /* Hangul syllables come in two shapes: LV, and LVT. Of those: + * + * - LV can be precomposed, or decomposed. Lets call those + * <LV> and <L,V>, + * - LVT can be fully precomposed, partically precomposed, or + * fully decomposed. Ie. <LVT>, <LV,T>, or <L,V,T>. + * + * The composition / decomposition is mechanical. However, not + * all <L,V> sequences compose, and not all <LV,T> sequences + * compose. + * + * Here are the specifics: + * + * - <L>: U+1100..115F, U+A960..A97F + * - <V>: U+1160..11A7, U+D7B0..D7C7 + * - <T>: U+11A8..11FF, U+D7CB..D7FB + * + * - Only the <L,V> sequences for some of the U+11xx ranges combine. + * - Only <LV,T> sequences for some of the Ts in U+11xx range combine. + * + * Here is what we want to accomplish in this shaper: + * + * - If the whole syllable can be precomposed, do that, + * - Otherwise, fully decompose and apply ljmo/vjmo/tjmo features. + * - If a valid syllable is followed by a Hangul tone mark, reorder the tone + * mark to precede the whole syllable - unless it is a zero-width glyph, in + * which case we leave it untouched, assuming it's designed to overstrike. + * + * That is, of the different possible syllables: + * + * <L> + * <L,V> + * <L,V,T> + * <LV> + * <LVT> + * <LV, T> + * + * - <L> needs no work. + * + * - <LV> and <LVT> can stay the way they are if the font supports them, otherwise we + * should fully decompose them if font supports. + * + * - <L,V> and <L,V,T> we should compose if the whole thing can be composed. + * + * - <LV,T> we should compose if the whole thing can be composed, otherwise we should + * decompose. + */ + + buffer->clear_output (); + unsigned int start = 0, end = 0; /* Extent of most recently seen syllable; + * valid only if start < end + */ + unsigned int count = buffer->len; + + for (buffer->idx = 0; buffer->idx < count && buffer->successful;) + { + hb_codepoint_t u = buffer->cur().codepoint; + + if (isHangulTone (u)) + { + /* + * We could cache the width of the tone marks and the existence of dotted-circle, + * but the use of the Hangul tone mark characters seems to be rare enough that + * I didn't bother for now. + */ + if (start < end && end == buffer->out_len) + { + /* Tone mark follows a valid syllable; move it in front, unless it's zero width. */ + buffer->unsafe_to_break_from_outbuffer (start, buffer->idx); + buffer->next_glyph (); + if (!is_zero_width_char (font, u)) + { + buffer->merge_out_clusters (start, end + 1); + hb_glyph_info_t *info = buffer->out_info; + hb_glyph_info_t tone = info[end]; + memmove (&info[start + 1], &info[start], (end - start) * sizeof (hb_glyph_info_t)); + info[start] = tone; + } + } + else + { + /* No valid syllable as base for tone mark; try to insert dotted circle. */ + if (!(buffer->flags & HB_BUFFER_FLAG_DO_NOT_INSERT_DOTTED_CIRCLE) && + font->has_glyph (0x25CCu)) + { + hb_codepoint_t chars[2]; + if (!is_zero_width_char (font, u)) { + chars[0] = u; + chars[1] = 0x25CCu; + } else { + chars[0] = 0x25CCu; + chars[1] = u; + } + buffer->replace_glyphs (1, 2, chars); + } + else + { + /* No dotted circle available in the font; just leave tone mark untouched. */ + buffer->next_glyph (); + } + } + start = end = buffer->out_len; + continue; + } + + start = buffer->out_len; /* Remember current position as a potential syllable start; + * will only be used if we set end to a later position. + */ + + if (isL (u) && buffer->idx + 1 < count) + { + hb_codepoint_t l = u; + hb_codepoint_t v = buffer->cur(+1).codepoint; + if (isV (v)) + { + /* Have <L,V> or <L,V,T>. */ + hb_codepoint_t t = 0; + unsigned int tindex = 0; + if (buffer->idx + 2 < count) + { + t = buffer->cur(+2).codepoint; + if (isT (t)) + tindex = t - TBase; /* Only used if isCombiningT (t); otherwise invalid. */ + else + t = 0; /* The next character was not a trailing jamo. */ + } + buffer->unsafe_to_break (buffer->idx, buffer->idx + (t ? 3 : 2)); + + /* We've got a syllable <L,V,T?>; see if it can potentially be composed. */ + if (isCombiningL (l) && isCombiningV (v) && (t == 0 || isCombiningT (t))) + { + /* Try to compose; if this succeeds, end is set to start+1. */ + hb_codepoint_t s = SBase + (l - LBase) * NCount + (v - VBase) * TCount + tindex; + if (font->has_glyph (s)) + { + buffer->replace_glyphs (t ? 3 : 2, 1, &s); + if (unlikely (!buffer->successful)) + return; + end = start + 1; + continue; + } + } + + /* We didn't compose, either because it's an Old Hangul syllable without a + * precomposed character in Unicode, or because the font didn't support the + * necessary precomposed glyph. + * Set jamo features on the individual glyphs, and advance past them. + */ + buffer->cur().hangul_shaping_feature() = LJMO; + buffer->next_glyph (); + buffer->cur().hangul_shaping_feature() = VJMO; + buffer->next_glyph (); + if (t) + { + buffer->cur().hangul_shaping_feature() = TJMO; + buffer->next_glyph (); + end = start + 3; + } + else + end = start + 2; + if (buffer->cluster_level == HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES) + buffer->merge_out_clusters (start, end); + continue; + } + } + + else if (isCombinedS (u)) + { + /* Have <LV>, <LVT>, or <LV,T> */ + hb_codepoint_t s = u; + bool has_glyph = font->has_glyph (s); + unsigned int lindex = (s - SBase) / NCount; + unsigned int nindex = (s - SBase) % NCount; + unsigned int vindex = nindex / TCount; + unsigned int tindex = nindex % TCount; + + if (!tindex && + buffer->idx + 1 < count && + isCombiningT (buffer->cur(+1).codepoint)) + { + /* <LV,T>, try to combine. */ + unsigned int new_tindex = buffer->cur(+1).codepoint - TBase; + hb_codepoint_t new_s = s + new_tindex; + if (font->has_glyph (new_s)) + { + buffer->replace_glyphs (2, 1, &new_s); + if (unlikely (!buffer->successful)) + return; + end = start + 1; + continue; + } + else + buffer->unsafe_to_break (buffer->idx, buffer->idx + 2); /* Mark unsafe between LV and T. */ + } + + /* Otherwise, decompose if font doesn't support <LV> or <LVT>, + * or if having non-combining <LV,T>. Note that we already handled + * combining <LV,T> above. */ + if (!has_glyph || + (!tindex && + buffer->idx + 1 < count && + isT (buffer->cur(+1).codepoint))) + { + hb_codepoint_t decomposed[3] = {LBase + lindex, + VBase + vindex, + TBase + tindex}; + if (font->has_glyph (decomposed[0]) && + font->has_glyph (decomposed[1]) && + (!tindex || font->has_glyph (decomposed[2]))) + { + unsigned int s_len = tindex ? 3 : 2; + buffer->replace_glyphs (1, s_len, decomposed); + + /* If we decomposed an LV because of a non-combining T following, + * we want to include this T in the syllable. + */ + if (has_glyph && !tindex) + { + buffer->next_glyph (); + s_len++; + } + + if (unlikely (!buffer->successful)) + return; + + /* We decomposed S: apply jamo features to the individual glyphs + * that are now in buffer->out_info. + */ + hb_glyph_info_t *info = buffer->out_info; + end = start + s_len; + + unsigned int i = start; + info[i++].hangul_shaping_feature() = LJMO; + info[i++].hangul_shaping_feature() = VJMO; + if (i < end) + info[i++].hangul_shaping_feature() = TJMO; + + if (buffer->cluster_level == HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES) + buffer->merge_out_clusters (start, end); + continue; + } + else if ((!tindex && buffer->idx + 1 < count && isT (buffer->cur(+1).codepoint))) + buffer->unsafe_to_break (buffer->idx, buffer->idx + 2); /* Mark unsafe between LV and T. */ + } + + if (has_glyph) + { + /* We didn't decompose the S, so just advance past it. */ + end = start + 1; + buffer->next_glyph (); + continue; + } + } + + /* Didn't find a recognizable syllable, so we leave end <= start; + * this will prevent tone-mark reordering happening. + */ + buffer->next_glyph (); + } + buffer->swap_buffers (); +} + +static void +setup_masks_hangul (const hb_ot_shape_plan_t *plan, + hb_buffer_t *buffer, + hb_font_t *font HB_UNUSED) +{ + const hangul_shape_plan_t *hangul_plan = (const hangul_shape_plan_t *) plan->data; + + if (likely (hangul_plan)) + { + unsigned int count = buffer->len; + hb_glyph_info_t *info = buffer->info; + for (unsigned int i = 0; i < count; i++, info++) + info->mask |= hangul_plan->mask_array[info->hangul_shaping_feature()]; + } + + HB_BUFFER_DEALLOCATE_VAR (buffer, hangul_shaping_feature); +} + + +const hb_ot_complex_shaper_t _hb_ot_complex_shaper_hangul = +{ + collect_features_hangul, + override_features_hangul, + data_create_hangul, + data_destroy_hangul, + preprocess_text_hangul, + nullptr, /* postprocess_glyphs */ + HB_OT_SHAPE_NORMALIZATION_MODE_NONE, + nullptr, /* decompose */ + nullptr, /* compose */ + setup_masks_hangul, + HB_TAG_NONE, /* gpos_tag */ + nullptr, /* reorder_marks */ + HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE, + false, /* fallback_position */ +}; + + +#endif diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-complex-hebrew.cc b/thirdparty/harfbuzz/src/hb-ot-shape-complex-hebrew.cc new file mode 100644 index 0000000000..334d3ded82 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-shape-complex-hebrew.cc @@ -0,0 +1,185 @@ +/* + * Copyright © 2010,2012 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#include "hb.hh" + +#ifndef HB_NO_OT_SHAPE + +#include "hb-ot-shape-complex.hh" + + +static bool +compose_hebrew (const hb_ot_shape_normalize_context_t *c, + hb_codepoint_t a, + hb_codepoint_t b, + hb_codepoint_t *ab) +{ + /* Hebrew presentation-form shaping. + * https://bugzilla.mozilla.org/show_bug.cgi?id=728866 + * Hebrew presentation forms with dagesh, for characters U+05D0..05EA; + * Note that some letters do not have a dagesh presForm encoded. + */ + static const hb_codepoint_t sDageshForms[0x05EAu - 0x05D0u + 1] = { + 0xFB30u, /* ALEF */ + 0xFB31u, /* BET */ + 0xFB32u, /* GIMEL */ + 0xFB33u, /* DALET */ + 0xFB34u, /* HE */ + 0xFB35u, /* VAV */ + 0xFB36u, /* ZAYIN */ + 0x0000u, /* HET */ + 0xFB38u, /* TET */ + 0xFB39u, /* YOD */ + 0xFB3Au, /* FINAL KAF */ + 0xFB3Bu, /* KAF */ + 0xFB3Cu, /* LAMED */ + 0x0000u, /* FINAL MEM */ + 0xFB3Eu, /* MEM */ + 0x0000u, /* FINAL NUN */ + 0xFB40u, /* NUN */ + 0xFB41u, /* SAMEKH */ + 0x0000u, /* AYIN */ + 0xFB43u, /* FINAL PE */ + 0xFB44u, /* PE */ + 0x0000u, /* FINAL TSADI */ + 0xFB46u, /* TSADI */ + 0xFB47u, /* QOF */ + 0xFB48u, /* RESH */ + 0xFB49u, /* SHIN */ + 0xFB4Au /* TAV */ + }; + + bool found = (bool) c->unicode->compose (a, b, ab); + +#ifdef HB_NO_OT_SHAPE_COMPLEX_HEBREW_FALLBACK + return found; +#endif + + if (!found && !c->plan->has_gpos_mark) + { + /* Special-case Hebrew presentation forms that are excluded from + * standard normalization, but wanted for old fonts. */ + switch (b) { + case 0x05B4u: /* HIRIQ */ + if (a == 0x05D9u) { /* YOD */ + *ab = 0xFB1Du; + found = true; + } + break; + case 0x05B7u: /* patah */ + if (a == 0x05F2u) { /* YIDDISH YOD YOD */ + *ab = 0xFB1Fu; + found = true; + } else if (a == 0x05D0u) { /* ALEF */ + *ab = 0xFB2Eu; + found = true; + } + break; + case 0x05B8u: /* QAMATS */ + if (a == 0x05D0u) { /* ALEF */ + *ab = 0xFB2Fu; + found = true; + } + break; + case 0x05B9u: /* HOLAM */ + if (a == 0x05D5u) { /* VAV */ + *ab = 0xFB4Bu; + found = true; + } + break; + case 0x05BCu: /* DAGESH */ + if (a >= 0x05D0u && a <= 0x05EAu) { + *ab = sDageshForms[a - 0x05D0u]; + found = (*ab != 0); + } else if (a == 0xFB2Au) { /* SHIN WITH SHIN DOT */ + *ab = 0xFB2Cu; + found = true; + } else if (a == 0xFB2Bu) { /* SHIN WITH SIN DOT */ + *ab = 0xFB2Du; + found = true; + } + break; + case 0x05BFu: /* RAFE */ + switch (a) { + case 0x05D1u: /* BET */ + *ab = 0xFB4Cu; + found = true; + break; + case 0x05DBu: /* KAF */ + *ab = 0xFB4Du; + found = true; + break; + case 0x05E4u: /* PE */ + *ab = 0xFB4Eu; + found = true; + break; + } + break; + case 0x05C1u: /* SHIN DOT */ + if (a == 0x05E9u) { /* SHIN */ + *ab = 0xFB2Au; + found = true; + } else if (a == 0xFB49u) { /* SHIN WITH DAGESH */ + *ab = 0xFB2Cu; + found = true; + } + break; + case 0x05C2u: /* SIN DOT */ + if (a == 0x05E9u) { /* SHIN */ + *ab = 0xFB2Bu; + found = true; + } else if (a == 0xFB49u) { /* SHIN WITH DAGESH */ + *ab = 0xFB2Du; + found = true; + } + break; + } + } + + return found; +} + + +const hb_ot_complex_shaper_t _hb_ot_complex_shaper_hebrew = +{ + nullptr, /* collect_features */ + nullptr, /* override_features */ + nullptr, /* data_create */ + nullptr, /* data_destroy */ + nullptr, /* preprocess_text */ + nullptr, /* postprocess_glyphs */ + HB_OT_SHAPE_NORMALIZATION_MODE_DEFAULT, + nullptr, /* decompose */ + compose_hebrew, + nullptr, /* setup_masks */ + HB_TAG ('h','e','b','r'), /* gpos_tag. https://github.com/harfbuzz/harfbuzz/issues/347#issuecomment-267838368 */ + nullptr, /* reorder_marks */ + HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_LATE, + true, /* fallback_position */ +}; + + +#endif diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-complex-indic-machine.hh b/thirdparty/harfbuzz/src/hb-ot-shape-complex-indic-machine.hh new file mode 100644 index 0000000000..670b6bf486 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-shape-complex-indic-machine.hh @@ -0,0 +1,574 @@ + +#line 1 "hb-ot-shape-complex-indic-machine.rl" +/* + * Copyright © 2011,2012 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_OT_SHAPE_COMPLEX_INDIC_MACHINE_HH +#define HB_OT_SHAPE_COMPLEX_INDIC_MACHINE_HH + +#include "hb.hh" + + +#line 36 "hb-ot-shape-complex-indic-machine.hh" +static const unsigned char _indic_syllable_machine_trans_keys[] = { + 8u, 8u, 4u, 8u, 5u, 7u, 5u, 8u, 4u, 8u, 6u, 6u, 16u, 16u, 4u, 8u, + 4u, 13u, 4u, 8u, 8u, 8u, 5u, 7u, 5u, 8u, 4u, 8u, 6u, 6u, 16u, 16u, + 4u, 8u, 4u, 13u, 4u, 13u, 4u, 13u, 8u, 8u, 5u, 7u, 5u, 8u, 4u, 8u, + 6u, 6u, 16u, 16u, 4u, 8u, 4u, 8u, 4u, 13u, 8u, 8u, 5u, 7u, 5u, 8u, + 4u, 8u, 6u, 6u, 16u, 16u, 4u, 8u, 4u, 8u, 5u, 8u, 8u, 8u, 1u, 19u, + 3u, 17u, 3u, 17u, 4u, 17u, 1u, 16u, 5u, 10u, 5u, 10u, 10u, 10u, 5u, 10u, + 1u, 16u, 1u, 16u, 1u, 16u, 3u, 10u, 4u, 10u, 5u, 10u, 4u, 10u, 5u, 10u, + 3u, 10u, 5u, 10u, 3u, 17u, 3u, 17u, 3u, 17u, 3u, 17u, 4u, 17u, 1u, 16u, + 3u, 17u, 3u, 17u, 4u, 17u, 1u, 16u, 5u, 10u, 10u, 10u, 5u, 10u, 1u, 16u, + 1u, 16u, 3u, 10u, 4u, 10u, 5u, 10u, 4u, 10u, 5u, 10u, 5u, 10u, 3u, 10u, + 5u, 10u, 3u, 17u, 3u, 17u, 4u, 8u, 3u, 17u, 3u, 17u, 4u, 17u, 1u, 16u, + 3u, 17u, 1u, 16u, 5u, 10u, 10u, 10u, 5u, 10u, 1u, 16u, 1u, 16u, 3u, 10u, + 4u, 10u, 5u, 10u, 3u, 17u, 4u, 10u, 5u, 10u, 5u, 10u, 3u, 10u, 5u, 10u, + 3u, 17u, 4u, 13u, 4u, 8u, 3u, 17u, 3u, 17u, 4u, 17u, 1u, 16u, 3u, 17u, + 1u, 16u, 5u, 10u, 10u, 10u, 5u, 10u, 1u, 16u, 1u, 16u, 3u, 10u, 4u, 10u, + 5u, 10u, 3u, 17u, 4u, 10u, 5u, 10u, 5u, 10u, 3u, 10u, 5u, 10u, 1u, 17u, + 3u, 17u, 1u, 17u, 4u, 13u, 5u, 10u, 10u, 10u, 5u, 10u, 1u, 16u, 3u, 10u, + 5u, 10u, 5u, 10u, 10u, 10u, 5u, 10u, 1u, 16u, 0 +}; + +static const char _indic_syllable_machine_key_spans[] = { + 1, 5, 3, 4, 5, 1, 1, 5, + 10, 5, 1, 3, 4, 5, 1, 1, + 5, 10, 10, 10, 1, 3, 4, 5, + 1, 1, 5, 5, 10, 1, 3, 4, + 5, 1, 1, 5, 5, 4, 1, 19, + 15, 15, 14, 16, 6, 6, 1, 6, + 16, 16, 16, 8, 7, 6, 7, 6, + 8, 6, 15, 15, 15, 15, 14, 16, + 15, 15, 14, 16, 6, 1, 6, 16, + 16, 8, 7, 6, 7, 6, 6, 8, + 6, 15, 15, 5, 15, 15, 14, 16, + 15, 16, 6, 1, 6, 16, 16, 8, + 7, 6, 15, 7, 6, 6, 8, 6, + 15, 10, 5, 15, 15, 14, 16, 15, + 16, 6, 1, 6, 16, 16, 8, 7, + 6, 15, 7, 6, 6, 8, 6, 17, + 15, 17, 10, 6, 1, 6, 16, 8, + 6, 6, 1, 6, 16 +}; + +static const short _indic_syllable_machine_index_offsets[] = { + 0, 2, 8, 12, 17, 23, 25, 27, + 33, 44, 50, 52, 56, 61, 67, 69, + 71, 77, 88, 99, 110, 112, 116, 121, + 127, 129, 131, 137, 143, 154, 156, 160, + 165, 171, 173, 175, 181, 187, 192, 194, + 214, 230, 246, 261, 278, 285, 292, 294, + 301, 318, 335, 352, 361, 369, 376, 384, + 391, 400, 407, 423, 439, 455, 471, 486, + 503, 519, 535, 550, 567, 574, 576, 583, + 600, 617, 626, 634, 641, 649, 656, 663, + 672, 679, 695, 711, 717, 733, 749, 764, + 781, 797, 814, 821, 823, 830, 847, 864, + 873, 881, 888, 904, 912, 919, 926, 935, + 942, 958, 969, 975, 991, 1007, 1022, 1039, + 1055, 1072, 1079, 1081, 1088, 1105, 1122, 1131, + 1139, 1146, 1162, 1170, 1177, 1184, 1193, 1200, + 1218, 1234, 1252, 1263, 1270, 1272, 1279, 1296, + 1305, 1312, 1319, 1321, 1328 +}; + +static const unsigned char _indic_syllable_machine_indicies[] = { + 1, 0, 2, 3, 3, 4, 1, 0, + 3, 3, 4, 0, 3, 3, 4, 1, + 0, 5, 3, 3, 4, 1, 0, 6, + 0, 7, 0, 8, 3, 3, 4, 1, + 0, 2, 3, 3, 4, 1, 0, 0, + 0, 0, 9, 0, 11, 12, 12, 13, + 14, 10, 14, 10, 12, 12, 13, 10, + 12, 12, 13, 14, 10, 15, 12, 12, + 13, 14, 10, 16, 10, 17, 10, 18, + 12, 12, 13, 14, 10, 11, 12, 12, + 13, 14, 10, 10, 10, 10, 19, 10, + 11, 12, 12, 13, 14, 10, 10, 10, + 10, 20, 10, 22, 23, 23, 24, 25, + 21, 21, 21, 21, 26, 21, 25, 21, + 23, 23, 24, 27, 23, 23, 24, 25, + 21, 28, 23, 23, 24, 25, 21, 29, + 21, 30, 21, 22, 23, 23, 24, 25, + 21, 31, 23, 23, 24, 25, 21, 33, + 34, 34, 35, 36, 32, 32, 32, 32, + 37, 32, 36, 32, 34, 34, 35, 32, + 34, 34, 35, 36, 32, 38, 34, 34, + 35, 36, 32, 39, 32, 40, 32, 33, + 34, 34, 35, 36, 32, 41, 34, 34, + 35, 36, 32, 23, 23, 24, 1, 0, + 43, 42, 45, 46, 47, 48, 49, 50, + 24, 25, 44, 51, 52, 52, 26, 44, + 53, 54, 55, 56, 57, 44, 59, 60, + 61, 62, 4, 1, 58, 63, 58, 58, + 9, 58, 58, 58, 64, 58, 65, 60, + 66, 66, 4, 1, 58, 63, 58, 58, + 58, 58, 58, 58, 64, 58, 60, 66, + 66, 4, 1, 58, 63, 58, 58, 58, + 58, 58, 58, 64, 58, 45, 58, 58, + 58, 67, 68, 58, 1, 58, 63, 58, + 58, 58, 58, 58, 45, 58, 69, 69, + 58, 1, 58, 63, 58, 63, 58, 58, + 70, 58, 63, 58, 63, 58, 63, 58, + 58, 58, 58, 63, 58, 45, 58, 71, + 58, 69, 69, 58, 1, 58, 63, 58, + 58, 58, 58, 58, 45, 58, 45, 58, + 58, 58, 69, 69, 58, 1, 58, 63, + 58, 58, 58, 58, 58, 45, 58, 45, + 58, 58, 58, 69, 68, 58, 1, 58, + 63, 58, 58, 58, 58, 58, 45, 58, + 72, 7, 73, 74, 4, 1, 58, 63, + 58, 7, 73, 74, 4, 1, 58, 63, + 58, 73, 73, 4, 1, 58, 63, 58, + 75, 76, 76, 4, 1, 58, 63, 58, + 67, 77, 58, 1, 58, 63, 58, 67, + 58, 69, 69, 58, 1, 58, 63, 58, + 69, 77, 58, 1, 58, 63, 58, 59, + 60, 66, 66, 4, 1, 58, 63, 58, + 58, 58, 58, 58, 58, 64, 58, 59, + 60, 61, 66, 4, 1, 58, 63, 58, + 58, 9, 58, 58, 58, 64, 58, 79, + 80, 81, 82, 13, 14, 78, 83, 78, + 78, 20, 78, 78, 78, 84, 78, 85, + 80, 86, 82, 13, 14, 78, 83, 78, + 78, 78, 78, 78, 78, 84, 78, 80, + 86, 82, 13, 14, 78, 83, 78, 78, + 78, 78, 78, 78, 84, 78, 87, 78, + 78, 78, 88, 89, 78, 14, 78, 83, + 78, 78, 78, 78, 78, 87, 78, 90, + 80, 91, 92, 13, 14, 78, 83, 78, + 78, 19, 78, 78, 78, 84, 78, 93, + 80, 86, 86, 13, 14, 78, 83, 78, + 78, 78, 78, 78, 78, 84, 78, 80, + 86, 86, 13, 14, 78, 83, 78, 78, + 78, 78, 78, 78, 84, 78, 87, 78, + 78, 78, 94, 89, 78, 14, 78, 83, + 78, 78, 78, 78, 78, 87, 78, 83, + 78, 78, 95, 78, 83, 78, 83, 78, + 83, 78, 78, 78, 78, 83, 78, 87, + 78, 96, 78, 94, 94, 78, 14, 78, + 83, 78, 78, 78, 78, 78, 87, 78, + 87, 78, 78, 78, 94, 94, 78, 14, + 78, 83, 78, 78, 78, 78, 78, 87, + 78, 97, 17, 98, 99, 13, 14, 78, + 83, 78, 17, 98, 99, 13, 14, 78, + 83, 78, 98, 98, 13, 14, 78, 83, + 78, 100, 101, 101, 13, 14, 78, 83, + 78, 88, 102, 78, 14, 78, 83, 78, + 94, 94, 78, 14, 78, 83, 78, 88, + 78, 94, 94, 78, 14, 78, 83, 78, + 94, 102, 78, 14, 78, 83, 78, 90, + 80, 86, 86, 13, 14, 78, 83, 78, + 78, 78, 78, 78, 78, 84, 78, 90, + 80, 91, 86, 13, 14, 78, 83, 78, + 78, 19, 78, 78, 78, 84, 78, 11, + 12, 12, 13, 14, 78, 79, 80, 86, + 82, 13, 14, 78, 83, 78, 78, 78, + 78, 78, 78, 84, 78, 104, 48, 105, + 105, 24, 25, 103, 51, 103, 103, 103, + 103, 103, 103, 55, 103, 48, 105, 105, + 24, 25, 103, 51, 103, 103, 103, 103, + 103, 103, 55, 103, 106, 103, 103, 103, + 107, 108, 103, 25, 103, 51, 103, 103, + 103, 103, 103, 106, 103, 47, 48, 109, + 110, 24, 25, 103, 51, 103, 103, 26, + 103, 103, 103, 55, 103, 106, 103, 103, + 103, 111, 108, 103, 25, 103, 51, 103, + 103, 103, 103, 103, 106, 103, 51, 103, + 103, 112, 103, 51, 103, 51, 103, 51, + 103, 103, 103, 103, 51, 103, 106, 103, + 113, 103, 111, 111, 103, 25, 103, 51, + 103, 103, 103, 103, 103, 106, 103, 106, + 103, 103, 103, 111, 111, 103, 25, 103, + 51, 103, 103, 103, 103, 103, 106, 103, + 114, 30, 115, 116, 24, 25, 103, 51, + 103, 30, 115, 116, 24, 25, 103, 51, + 103, 115, 115, 24, 25, 103, 51, 103, + 47, 48, 105, 105, 24, 25, 103, 51, + 103, 103, 103, 103, 103, 103, 55, 103, + 117, 118, 118, 24, 25, 103, 51, 103, + 107, 119, 103, 25, 103, 51, 103, 111, + 111, 103, 25, 103, 51, 103, 107, 103, + 111, 111, 103, 25, 103, 51, 103, 111, + 119, 103, 25, 103, 51, 103, 47, 48, + 109, 105, 24, 25, 103, 51, 103, 103, + 26, 103, 103, 103, 55, 103, 22, 23, + 23, 24, 25, 120, 120, 120, 120, 26, + 120, 22, 23, 23, 24, 25, 120, 122, + 123, 124, 125, 35, 36, 121, 126, 121, + 121, 37, 121, 121, 121, 127, 121, 128, + 123, 125, 125, 35, 36, 121, 126, 121, + 121, 121, 121, 121, 121, 127, 121, 123, + 125, 125, 35, 36, 121, 126, 121, 121, + 121, 121, 121, 121, 127, 121, 129, 121, + 121, 121, 130, 131, 121, 36, 121, 126, + 121, 121, 121, 121, 121, 129, 121, 122, + 123, 124, 52, 35, 36, 121, 126, 121, + 121, 37, 121, 121, 121, 127, 121, 129, + 121, 121, 121, 132, 131, 121, 36, 121, + 126, 121, 121, 121, 121, 121, 129, 121, + 126, 121, 121, 133, 121, 126, 121, 126, + 121, 126, 121, 121, 121, 121, 126, 121, + 129, 121, 134, 121, 132, 132, 121, 36, + 121, 126, 121, 121, 121, 121, 121, 129, + 121, 129, 121, 121, 121, 132, 132, 121, + 36, 121, 126, 121, 121, 121, 121, 121, + 129, 121, 135, 40, 136, 137, 35, 36, + 121, 126, 121, 40, 136, 137, 35, 36, + 121, 126, 121, 136, 136, 35, 36, 121, + 126, 121, 122, 123, 125, 125, 35, 36, + 121, 126, 121, 121, 121, 121, 121, 121, + 127, 121, 138, 139, 139, 35, 36, 121, + 126, 121, 130, 140, 121, 36, 121, 126, + 121, 132, 132, 121, 36, 121, 126, 121, + 130, 121, 132, 132, 121, 36, 121, 126, + 121, 132, 140, 121, 36, 121, 126, 121, + 45, 46, 47, 48, 109, 105, 24, 25, + 103, 51, 52, 52, 26, 103, 103, 45, + 55, 103, 59, 141, 61, 62, 4, 1, + 58, 63, 58, 58, 9, 58, 58, 58, + 64, 58, 45, 46, 47, 48, 142, 143, + 24, 144, 58, 145, 58, 52, 26, 58, + 58, 45, 55, 58, 22, 146, 146, 24, + 144, 58, 63, 58, 58, 26, 58, 145, + 58, 58, 147, 58, 145, 58, 145, 58, + 145, 58, 58, 58, 58, 145, 58, 45, + 58, 71, 22, 146, 146, 24, 144, 58, + 63, 58, 58, 58, 58, 58, 45, 58, + 149, 148, 150, 150, 148, 43, 148, 151, + 148, 150, 150, 148, 43, 148, 151, 148, + 151, 148, 148, 152, 148, 151, 148, 151, + 148, 151, 148, 148, 148, 148, 151, 148, + 45, 120, 120, 120, 120, 120, 120, 120, + 120, 120, 52, 120, 120, 120, 120, 45, + 120, 0 +}; + +static const unsigned char _indic_syllable_machine_trans_targs[] = { + 39, 45, 50, 2, 51, 5, 6, 53, + 57, 58, 39, 67, 11, 73, 68, 14, + 15, 75, 80, 81, 84, 39, 89, 21, + 95, 90, 98, 39, 24, 25, 97, 103, + 39, 112, 30, 118, 113, 121, 33, 34, + 120, 126, 39, 137, 39, 40, 60, 85, + 87, 105, 106, 91, 107, 127, 128, 99, + 135, 140, 39, 41, 43, 8, 59, 46, + 54, 42, 1, 44, 48, 0, 47, 49, + 52, 3, 4, 55, 7, 56, 39, 61, + 63, 18, 83, 69, 76, 62, 9, 64, + 78, 71, 65, 17, 82, 66, 10, 70, + 72, 74, 12, 13, 77, 16, 79, 39, + 86, 26, 88, 101, 93, 19, 104, 20, + 92, 94, 96, 22, 23, 100, 27, 102, + 39, 39, 108, 110, 28, 35, 114, 122, + 109, 111, 124, 116, 29, 115, 117, 119, + 31, 32, 123, 36, 125, 129, 130, 134, + 131, 132, 37, 133, 39, 136, 38, 138, + 139 +}; + +static const char _indic_syllable_machine_trans_actions[] = { + 1, 0, 2, 0, 2, 0, 0, 2, + 2, 2, 3, 2, 0, 2, 0, 0, + 0, 2, 2, 2, 2, 4, 2, 0, + 5, 0, 5, 6, 0, 0, 5, 2, + 7, 2, 0, 2, 0, 2, 0, 0, + 2, 2, 8, 0, 11, 2, 2, 5, + 0, 12, 12, 0, 2, 5, 2, 5, + 2, 0, 13, 2, 0, 0, 2, 0, + 2, 2, 0, 2, 2, 0, 0, 2, + 2, 0, 0, 0, 0, 2, 14, 2, + 0, 0, 2, 0, 2, 2, 0, 2, + 2, 2, 2, 0, 2, 2, 0, 0, + 2, 2, 0, 0, 0, 0, 2, 15, + 5, 0, 5, 2, 2, 0, 5, 0, + 0, 2, 5, 0, 0, 0, 0, 2, + 16, 17, 2, 0, 0, 0, 0, 2, + 2, 2, 2, 2, 0, 0, 2, 2, + 0, 0, 0, 0, 2, 0, 18, 18, + 0, 0, 0, 0, 19, 2, 0, 0, + 0 +}; + +static const char _indic_syllable_machine_to_state_actions[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 9, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0 +}; + +static const char _indic_syllable_machine_from_state_actions[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 10, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0 +}; + +static const short _indic_syllable_machine_eof_trans[] = { + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 22, 22, 28, 22, 22, + 22, 22, 22, 22, 33, 33, 33, 33, + 33, 33, 33, 33, 33, 1, 43, 0, + 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 104, 104, 104, + 104, 104, 104, 104, 104, 104, 104, 104, + 104, 104, 104, 104, 104, 104, 104, 104, + 104, 121, 121, 122, 122, 122, 122, 122, + 122, 122, 122, 122, 122, 122, 122, 122, + 122, 122, 122, 122, 122, 122, 122, 104, + 59, 59, 59, 59, 59, 59, 59, 149, + 149, 149, 149, 149, 121 +}; + +static const int indic_syllable_machine_start = 39; +static const int indic_syllable_machine_first_final = 39; +static const int indic_syllable_machine_error = -1; + +static const int indic_syllable_machine_en_main = 39; + + +#line 36 "hb-ot-shape-complex-indic-machine.rl" + + + +#line 93 "hb-ot-shape-complex-indic-machine.rl" + + +#define found_syllable(syllable_type) \ + HB_STMT_START { \ + if (0) fprintf (stderr, "syllable %d..%d %s\n", ts, te, #syllable_type); \ + for (unsigned int i = ts; i < te; i++) \ + info[i].syllable() = (syllable_serial << 4) | indic_##syllable_type; \ + syllable_serial++; \ + if (unlikely (syllable_serial == 16)) syllable_serial = 1; \ + } HB_STMT_END + +static void +find_syllables_indic (hb_buffer_t *buffer) +{ + unsigned int p, pe, eof, ts, te, act; + int cs; + hb_glyph_info_t *info = buffer->info; + +#line 411 "hb-ot-shape-complex-indic-machine.hh" + { + cs = indic_syllable_machine_start; + ts = 0; + te = 0; + act = 0; + } + +#line 113 "hb-ot-shape-complex-indic-machine.rl" + + + p = 0; + pe = eof = buffer->len; + + unsigned int syllable_serial = 1; + +#line 427 "hb-ot-shape-complex-indic-machine.hh" + { + int _slen; + int _trans; + const unsigned char *_keys; + const unsigned char *_inds; + if ( p == pe ) + goto _test_eof; +_resume: + switch ( _indic_syllable_machine_from_state_actions[cs] ) { + case 10: +#line 1 "NONE" + {ts = p;} + break; +#line 441 "hb-ot-shape-complex-indic-machine.hh" + } + + _keys = _indic_syllable_machine_trans_keys + (cs<<1); + _inds = _indic_syllable_machine_indicies + _indic_syllable_machine_index_offsets[cs]; + + _slen = _indic_syllable_machine_key_spans[cs]; + _trans = _inds[ _slen > 0 && _keys[0] <=( info[p].indic_category()) && + ( info[p].indic_category()) <= _keys[1] ? + ( info[p].indic_category()) - _keys[0] : _slen ]; + +_eof_trans: + cs = _indic_syllable_machine_trans_targs[_trans]; + + if ( _indic_syllable_machine_trans_actions[_trans] == 0 ) + goto _again; + + switch ( _indic_syllable_machine_trans_actions[_trans] ) { + case 2: +#line 1 "NONE" + {te = p+1;} + break; + case 11: +#line 89 "hb-ot-shape-complex-indic-machine.rl" + {te = p+1;{ found_syllable (non_indic_cluster); }} + break; + case 13: +#line 84 "hb-ot-shape-complex-indic-machine.rl" + {te = p;p--;{ found_syllable (consonant_syllable); }} + break; + case 14: +#line 85 "hb-ot-shape-complex-indic-machine.rl" + {te = p;p--;{ found_syllable (vowel_syllable); }} + break; + case 17: +#line 86 "hb-ot-shape-complex-indic-machine.rl" + {te = p;p--;{ found_syllable (standalone_cluster); }} + break; + case 19: +#line 87 "hb-ot-shape-complex-indic-machine.rl" + {te = p;p--;{ found_syllable (symbol_cluster); }} + break; + case 15: +#line 88 "hb-ot-shape-complex-indic-machine.rl" + {te = p;p--;{ found_syllable (broken_cluster); }} + break; + case 16: +#line 89 "hb-ot-shape-complex-indic-machine.rl" + {te = p;p--;{ found_syllable (non_indic_cluster); }} + break; + case 1: +#line 84 "hb-ot-shape-complex-indic-machine.rl" + {{p = ((te))-1;}{ found_syllable (consonant_syllable); }} + break; + case 3: +#line 85 "hb-ot-shape-complex-indic-machine.rl" + {{p = ((te))-1;}{ found_syllable (vowel_syllable); }} + break; + case 7: +#line 86 "hb-ot-shape-complex-indic-machine.rl" + {{p = ((te))-1;}{ found_syllable (standalone_cluster); }} + break; + case 8: +#line 87 "hb-ot-shape-complex-indic-machine.rl" + {{p = ((te))-1;}{ found_syllable (symbol_cluster); }} + break; + case 4: +#line 88 "hb-ot-shape-complex-indic-machine.rl" + {{p = ((te))-1;}{ found_syllable (broken_cluster); }} + break; + case 6: +#line 1 "NONE" + { switch( act ) { + case 1: + {{p = ((te))-1;} found_syllable (consonant_syllable); } + break; + case 5: + {{p = ((te))-1;} found_syllable (broken_cluster); } + break; + case 6: + {{p = ((te))-1;} found_syllable (non_indic_cluster); } + break; + } + } + break; + case 18: +#line 1 "NONE" + {te = p+1;} +#line 84 "hb-ot-shape-complex-indic-machine.rl" + {act = 1;} + break; + case 5: +#line 1 "NONE" + {te = p+1;} +#line 88 "hb-ot-shape-complex-indic-machine.rl" + {act = 5;} + break; + case 12: +#line 1 "NONE" + {te = p+1;} +#line 89 "hb-ot-shape-complex-indic-machine.rl" + {act = 6;} + break; +#line 544 "hb-ot-shape-complex-indic-machine.hh" + } + +_again: + switch ( _indic_syllable_machine_to_state_actions[cs] ) { + case 9: +#line 1 "NONE" + {ts = 0;} + break; +#line 553 "hb-ot-shape-complex-indic-machine.hh" + } + + if ( ++p != pe ) + goto _resume; + _test_eof: {} + if ( p == eof ) + { + if ( _indic_syllable_machine_eof_trans[cs] > 0 ) { + _trans = _indic_syllable_machine_eof_trans[cs] - 1; + goto _eof_trans; + } + } + + } + +#line 121 "hb-ot-shape-complex-indic-machine.rl" + +} + +#undef found_syllable + +#endif /* HB_OT_SHAPE_COMPLEX_INDIC_MACHINE_HH */ diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-complex-indic-table.cc b/thirdparty/harfbuzz/src/hb-ot-shape-complex-indic-table.cc new file mode 100644 index 0000000000..a150fd2486 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-shape-complex-indic-table.cc @@ -0,0 +1,501 @@ +/* == Start of generated table == */ +/* + * The following table is generated by running: + * + * ./gen-indic-table.py IndicSyllabicCategory.txt IndicPositionalCategory.txt Blocks.txt + * + * on files with these headers: + * + * # IndicSyllabicCategory-13.0.0.txt + * # Date: 2019-07-22, 19:55:00 GMT [KW, RP] + * # IndicPositionalCategory-13.0.0.txt + * # Date: 2019-07-23, 00:01:00 GMT [KW, RP] + * # Blocks-13.0.0.txt + * # Date: 2019-07-10, 19:06:00 GMT [KW] + */ + +#include "hb.hh" + +#ifndef HB_NO_OT_SHAPE + +#include "hb-ot-shape-complex-indic.hh" + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-macros" + +#define ISC_A INDIC_SYLLABIC_CATEGORY_AVAGRAHA /* 17 chars; Avagraha */ +#define ISC_Bi INDIC_SYLLABIC_CATEGORY_BINDU /* 91 chars; Bindu */ +#define ISC_BJN INDIC_SYLLABIC_CATEGORY_BRAHMI_JOINING_NUMBER /* 20 chars; Brahmi_Joining_Number */ +#define ISC_Ca INDIC_SYLLABIC_CATEGORY_CANTILLATION_MARK /* 59 chars; Cantillation_Mark */ +#define ISC_C INDIC_SYLLABIC_CATEGORY_CONSONANT /* 2195 chars; Consonant */ +#define ISC_CD INDIC_SYLLABIC_CATEGORY_CONSONANT_DEAD /* 12 chars; Consonant_Dead */ +#define ISC_CF INDIC_SYLLABIC_CATEGORY_CONSONANT_FINAL /* 67 chars; Consonant_Final */ +#define ISC_CHL INDIC_SYLLABIC_CATEGORY_CONSONANT_HEAD_LETTER /* 5 chars; Consonant_Head_Letter */ +#define ISC_CIP INDIC_SYLLABIC_CATEGORY_CONSONANT_INITIAL_POSTFIXED /* 1 chars; Consonant_Initial_Postfixed */ +#define ISC_CK INDIC_SYLLABIC_CATEGORY_CONSONANT_KILLER /* 2 chars; Consonant_Killer */ +#define ISC_CM INDIC_SYLLABIC_CATEGORY_CONSONANT_MEDIAL /* 31 chars; Consonant_Medial */ +#define ISC_CP INDIC_SYLLABIC_CATEGORY_CONSONANT_PLACEHOLDER /* 22 chars; Consonant_Placeholder */ +#define ISC_CPR INDIC_SYLLABIC_CATEGORY_CONSONANT_PRECEDING_REPHA /* 3 chars; Consonant_Preceding_Repha */ +#define ISC_CPrf INDIC_SYLLABIC_CATEGORY_CONSONANT_PREFIXED /* 10 chars; Consonant_Prefixed */ +#define ISC_CS INDIC_SYLLABIC_CATEGORY_CONSONANT_SUBJOINED /* 94 chars; Consonant_Subjoined */ +#define ISC_CSR INDIC_SYLLABIC_CATEGORY_CONSONANT_SUCCEEDING_REPHA /* 4 chars; Consonant_Succeeding_Repha */ +#define ISC_CWS INDIC_SYLLABIC_CATEGORY_CONSONANT_WITH_STACKER /* 8 chars; Consonant_With_Stacker */ +#define ISC_GM INDIC_SYLLABIC_CATEGORY_GEMINATION_MARK /* 3 chars; Gemination_Mark */ +#define ISC_IS INDIC_SYLLABIC_CATEGORY_INVISIBLE_STACKER /* 12 chars; Invisible_Stacker */ +#define ISC_ZWJ INDIC_SYLLABIC_CATEGORY_JOINER /* 1 chars; Joiner */ +#define ISC_ML INDIC_SYLLABIC_CATEGORY_MODIFYING_LETTER /* 1 chars; Modifying_Letter */ +#define ISC_ZWNJ INDIC_SYLLABIC_CATEGORY_NON_JOINER /* 1 chars; Non_Joiner */ +#define ISC_N INDIC_SYLLABIC_CATEGORY_NUKTA /* 31 chars; Nukta */ +#define ISC_Nd INDIC_SYLLABIC_CATEGORY_NUMBER /* 491 chars; Number */ +#define ISC_NJ INDIC_SYLLABIC_CATEGORY_NUMBER_JOINER /* 1 chars; Number_Joiner */ +#define ISC_x INDIC_SYLLABIC_CATEGORY_OTHER /* 1 chars; Other */ +#define ISC_PK INDIC_SYLLABIC_CATEGORY_PURE_KILLER /* 23 chars; Pure_Killer */ +#define ISC_RS INDIC_SYLLABIC_CATEGORY_REGISTER_SHIFTER /* 2 chars; Register_Shifter */ +#define ISC_SM INDIC_SYLLABIC_CATEGORY_SYLLABLE_MODIFIER /* 25 chars; Syllable_Modifier */ +#define ISC_TL INDIC_SYLLABIC_CATEGORY_TONE_LETTER /* 7 chars; Tone_Letter */ +#define ISC_TM INDIC_SYLLABIC_CATEGORY_TONE_MARK /* 42 chars; Tone_Mark */ +#define ISC_V INDIC_SYLLABIC_CATEGORY_VIRAMA /* 27 chars; Virama */ +#define ISC_Vs INDIC_SYLLABIC_CATEGORY_VISARGA /* 35 chars; Visarga */ +#define ISC_Vo INDIC_SYLLABIC_CATEGORY_VOWEL /* 30 chars; Vowel */ +#define ISC_M INDIC_SYLLABIC_CATEGORY_VOWEL_DEPENDENT /* 683 chars; Vowel_Dependent */ +#define ISC_VI INDIC_SYLLABIC_CATEGORY_VOWEL_INDEPENDENT /* 484 chars; Vowel_Independent */ + +#define IMC_B INDIC_MATRA_CATEGORY_BOTTOM /* 351 chars; Bottom */ +#define IMC_BL INDIC_MATRA_CATEGORY_BOTTOM_AND_LEFT /* 1 chars; Bottom_And_Left */ +#define IMC_BR INDIC_MATRA_CATEGORY_BOTTOM_AND_RIGHT /* 4 chars; Bottom_And_Right */ +#define IMC_L INDIC_MATRA_CATEGORY_LEFT /* 64 chars; Left */ +#define IMC_LR INDIC_MATRA_CATEGORY_LEFT_AND_RIGHT /* 22 chars; Left_And_Right */ +#define IMC_x INDIC_MATRA_CATEGORY_NOT_APPLICABLE /* 1 chars; Not_Applicable */ +#define IMC_O INDIC_MATRA_CATEGORY_OVERSTRUCK /* 10 chars; Overstruck */ +#define IMC_R INDIC_MATRA_CATEGORY_RIGHT /* 288 chars; Right */ +#define IMC_T INDIC_MATRA_CATEGORY_TOP /* 415 chars; Top */ +#define IMC_TB INDIC_MATRA_CATEGORY_TOP_AND_BOTTOM /* 10 chars; Top_And_Bottom */ +#define IMC_TBL INDIC_MATRA_CATEGORY_TOP_AND_BOTTOM_AND_LEFT /* 2 chars; Top_And_Bottom_And_Left */ +#define IMC_TBR INDIC_MATRA_CATEGORY_TOP_AND_BOTTOM_AND_RIGHT /* 1 chars; Top_And_Bottom_And_Right */ +#define IMC_TL INDIC_MATRA_CATEGORY_TOP_AND_LEFT /* 6 chars; Top_And_Left */ +#define IMC_TLR INDIC_MATRA_CATEGORY_TOP_AND_LEFT_AND_RIGHT /* 4 chars; Top_And_Left_And_Right */ +#define IMC_TR INDIC_MATRA_CATEGORY_TOP_AND_RIGHT /* 13 chars; Top_And_Right */ +#define IMC_VOL INDIC_MATRA_CATEGORY_VISUAL_ORDER_LEFT /* 19 chars; Visual_Order_Left */ + +#pragma GCC diagnostic pop + +#define _(S,M) INDIC_COMBINE_CATEGORIES (ISC_##S, IMC_##M) + + +static const INDIC_TABLE_ELEMENT_TYPE indic_table[] = { + + +#define indic_offset_0x0028u 0 + + + /* Basic Latin */ + + /* 0028 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(CP,x), _(x,x), _(x,x), + /* 0030 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), + /* 0038 */ _(Nd,x), _(Nd,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), + +#define indic_offset_0x00b0u 24 + + + /* Latin-1 Supplement */ + + /* 00B0 */ _(x,x), _(x,x), _(SM,x), _(SM,x), _(x,x), _(x,x), _(x,x), _(x,x), + /* 00B8 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), + /* 00C0 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), + /* 00C8 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), + /* 00D0 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(CP,x), + +#define indic_offset_0x0900u 64 + + + /* Devanagari */ + + /* 0900 */ _(Bi,T), _(Bi,T), _(Bi,T), _(Vs,R), _(VI,x), _(VI,x), _(VI,x), _(VI,x), + /* 0908 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), + /* 0910 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(C,x), _(C,x), _(C,x), + /* 0918 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 0920 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 0928 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 0930 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 0938 */ _(C,x), _(C,x), _(M,T), _(M,R), _(N,B), _(A,x), _(M,R), _(M,L), + /* 0940 */ _(M,R), _(M,B), _(M,B), _(M,B), _(M,B), _(M,T), _(M,T), _(M,T), + /* 0948 */ _(M,T), _(M,R), _(M,R), _(M,R), _(M,R), _(V,B), _(M,L), _(M,R), + /* 0950 */ _(x,x), _(Ca,T), _(Ca,B), _(x,T), _(x,T), _(M,T), _(M,B), _(M,B), + /* 0958 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 0960 */ _(VI,x), _(VI,x), _(M,B), _(M,B), _(x,x), _(x,x), _(Nd,x), _(Nd,x), + /* 0968 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), + /* 0970 */ _(x,x), _(x,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), + /* 0978 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + + /* Bengali */ + + /* 0980 */ _(CP,x), _(Bi,T), _(Bi,R), _(Vs,R), _(x,x), _(VI,x), _(VI,x), _(VI,x), + /* 0988 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(x,x), _(x,x), _(VI,x), + /* 0990 */ _(VI,x), _(x,x), _(x,x), _(VI,x), _(VI,x), _(C,x), _(C,x), _(C,x), + /* 0998 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 09A0 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 09A8 */ _(C,x), _(x,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 09B0 */ _(C,x), _(x,x), _(C,x), _(x,x), _(x,x), _(x,x), _(C,x), _(C,x), + /* 09B8 */ _(C,x), _(C,x), _(x,x), _(x,x), _(N,B), _(A,x), _(M,R), _(M,L), + /* 09C0 */ _(M,R), _(M,B), _(M,B), _(M,B), _(M,B), _(x,x), _(x,x), _(M,L), + /* 09C8 */ _(M,L), _(x,x), _(x,x), _(M,LR), _(M,LR), _(V,B), _(CD,x), _(x,x), + /* 09D0 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(M,R), + /* 09D8 */ _(x,x), _(x,x), _(x,x), _(x,x), _(C,x), _(C,x), _(x,x), _(C,x), + /* 09E0 */ _(VI,x), _(VI,x), _(M,B), _(M,B), _(x,x), _(x,x), _(Nd,x), _(Nd,x), + /* 09E8 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), + /* 09F0 */ _(C,x), _(C,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), + /* 09F8 */ _(x,x), _(x,x), _(x,x), _(x,x), _(Bi,x), _(x,x), _(SM,T), _(x,x), + + /* Gurmukhi */ + + /* 0A00 */ _(x,x), _(Bi,T), _(Bi,T), _(Vs,R), _(x,x), _(VI,x), _(VI,x), _(VI,x), + /* 0A08 */ _(VI,x), _(VI,x), _(VI,x), _(x,x), _(x,x), _(x,x), _(x,x), _(VI,x), + /* 0A10 */ _(VI,x), _(x,x), _(x,x), _(VI,x), _(VI,x), _(C,x), _(C,x), _(C,x), + /* 0A18 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 0A20 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 0A28 */ _(C,x), _(x,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 0A30 */ _(C,x), _(x,x), _(C,x), _(C,x), _(x,x), _(C,x), _(C,x), _(x,x), + /* 0A38 */ _(C,x), _(C,x), _(x,x), _(x,x), _(N,B), _(x,x), _(M,R), _(M,L), + /* 0A40 */ _(M,R), _(M,B), _(M,B), _(x,x), _(x,x), _(x,x), _(x,x), _(M,T), + /* 0A48 */ _(M,T), _(x,x), _(x,x), _(M,T), _(M,T), _(V,B), _(x,x), _(x,x), + /* 0A50 */ _(x,x), _(Ca,B), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), + /* 0A58 */ _(x,x), _(C,x), _(C,x), _(C,x), _(C,x), _(x,x), _(C,x), _(x,x), + /* 0A60 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(Nd,x), _(Nd,x), + /* 0A68 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), + /* 0A70 */ _(Bi,T), _(GM,T), _(CP,x), _(CP,x), _(x,x), _(CM,B), _(x,x), _(x,x), + /* 0A78 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), + + /* Gujarati */ + + /* 0A80 */ _(x,x), _(Bi,T), _(Bi,T), _(Vs,R), _(x,x), _(VI,x), _(VI,x), _(VI,x), + /* 0A88 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(x,x), _(VI,x), + /* 0A90 */ _(VI,x), _(VI,x), _(x,x), _(VI,x), _(VI,x), _(C,x), _(C,x), _(C,x), + /* 0A98 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 0AA0 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 0AA8 */ _(C,x), _(x,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 0AB0 */ _(C,x), _(x,x), _(C,x), _(C,x), _(x,x), _(C,x), _(C,x), _(C,x), + /* 0AB8 */ _(C,x), _(C,x), _(x,x), _(x,x), _(N,B), _(A,x), _(M,R), _(M,L), + /* 0AC0 */ _(M,R), _(M,B), _(M,B), _(M,B), _(M,B), _(M,T), _(x,x), _(M,T), + /* 0AC8 */ _(M,T), _(M,TR), _(x,x), _(M,R), _(M,R), _(V,B), _(x,x), _(x,x), + /* 0AD0 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), + /* 0AD8 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), + /* 0AE0 */ _(VI,x), _(VI,x), _(M,B), _(M,B), _(x,x), _(x,x), _(Nd,x), _(Nd,x), + /* 0AE8 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), + /* 0AF0 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), + /* 0AF8 */ _(x,x), _(C,x), _(Ca,T), _(Ca,T), _(Ca,T), _(N,T), _(N,T), _(N,T), + + /* Oriya */ + + /* 0B00 */ _(x,x), _(Bi,T), _(Bi,R), _(Vs,R), _(x,x), _(VI,x), _(VI,x), _(VI,x), + /* 0B08 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(x,x), _(x,x), _(VI,x), + /* 0B10 */ _(VI,x), _(x,x), _(x,x), _(VI,x), _(VI,x), _(C,x), _(C,x), _(C,x), + /* 0B18 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 0B20 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 0B28 */ _(C,x), _(x,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 0B30 */ _(C,x), _(x,x), _(C,x), _(C,x), _(x,x), _(C,x), _(C,x), _(C,x), + /* 0B38 */ _(C,x), _(C,x), _(x,x), _(x,x), _(N,B), _(A,x), _(M,R), _(M,T), + /* 0B40 */ _(M,R), _(M,B), _(M,B), _(M,B), _(M,B), _(x,x), _(x,x), _(M,L), + /* 0B48 */ _(M,TL), _(x,x), _(x,x), _(M,LR),_(M,TLR), _(V,B), _(x,x), _(x,x), + /* 0B50 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(M,T), _(M,T), _(M,TR), + /* 0B58 */ _(x,x), _(x,x), _(x,x), _(x,x), _(C,x), _(C,x), _(x,x), _(C,x), + /* 0B60 */ _(VI,x), _(VI,x), _(M,B), _(M,B), _(x,x), _(x,x), _(Nd,x), _(Nd,x), + /* 0B68 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), + /* 0B70 */ _(x,x), _(C,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), + /* 0B78 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), + + /* Tamil */ + + /* 0B80 */ _(x,x), _(x,x), _(Bi,T), _(ML,x), _(x,x), _(VI,x), _(VI,x), _(VI,x), + /* 0B88 */ _(VI,x), _(VI,x), _(VI,x), _(x,x), _(x,x), _(x,x), _(VI,x), _(VI,x), + /* 0B90 */ _(VI,x), _(x,x), _(VI,x), _(VI,x), _(VI,x), _(C,x), _(x,x), _(x,x), + /* 0B98 */ _(x,x), _(C,x), _(C,x), _(x,x), _(C,x), _(x,x), _(C,x), _(C,x), + /* 0BA0 */ _(x,x), _(x,x), _(x,x), _(C,x), _(C,x), _(x,x), _(x,x), _(x,x), + /* 0BA8 */ _(C,x), _(C,x), _(C,x), _(x,x), _(x,x), _(x,x), _(C,x), _(C,x), + /* 0BB0 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 0BB8 */ _(C,x), _(C,x), _(x,x), _(x,x), _(x,x), _(x,x), _(M,R), _(M,R), + /* 0BC0 */ _(M,T), _(M,R), _(M,R), _(x,x), _(x,x), _(x,x), _(M,L), _(M,L), + /* 0BC8 */ _(M,L), _(x,x), _(M,LR), _(M,LR), _(M,LR), _(V,T), _(x,x), _(x,x), + /* 0BD0 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(M,R), + /* 0BD8 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), + /* 0BE0 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(Nd,x), _(Nd,x), + /* 0BE8 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), + /* 0BF0 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), + /* 0BF8 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), + + /* Telugu */ + + /* 0C00 */ _(Bi,T), _(Bi,R), _(Bi,R), _(Vs,R), _(Bi,T), _(VI,x), _(VI,x), _(VI,x), + /* 0C08 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(x,x), _(VI,x), _(VI,x), + /* 0C10 */ _(VI,x), _(x,x), _(VI,x), _(VI,x), _(VI,x), _(C,x), _(C,x), _(C,x), + /* 0C18 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 0C20 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 0C28 */ _(C,x), _(x,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 0C30 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 0C38 */ _(C,x), _(C,x), _(x,x), _(x,x), _(x,x), _(A,x), _(M,T), _(M,T), + /* 0C40 */ _(M,T), _(M,R), _(M,R), _(M,R), _(M,R), _(x,x), _(M,T), _(M,T), + /* 0C48 */ _(M,TB), _(x,x), _(M,T), _(M,T), _(M,T), _(V,T), _(x,x), _(x,x), + /* 0C50 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(M,T), _(M,B), _(x,x), + /* 0C58 */ _(C,x), _(C,x), _(C,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), + /* 0C60 */ _(VI,x), _(VI,x), _(M,B), _(M,B), _(x,x), _(x,x), _(Nd,x), _(Nd,x), + /* 0C68 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), + /* 0C70 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), + /* 0C78 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), + + /* Kannada */ + + /* 0C80 */ _(Bi,x), _(Bi,T), _(Bi,R), _(Vs,R), _(x,x), _(VI,x), _(VI,x), _(VI,x), + /* 0C88 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(x,x), _(VI,x), _(VI,x), + /* 0C90 */ _(VI,x), _(x,x), _(VI,x), _(VI,x), _(VI,x), _(C,x), _(C,x), _(C,x), + /* 0C98 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 0CA0 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 0CA8 */ _(C,x), _(x,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 0CB0 */ _(C,x), _(C,x), _(C,x), _(C,x), _(x,x), _(C,x), _(C,x), _(C,x), + /* 0CB8 */ _(C,x), _(C,x), _(x,x), _(x,x), _(N,B), _(A,x), _(M,R), _(M,T), + /* 0CC0 */ _(M,TR), _(M,R), _(M,R), _(M,R), _(M,R), _(x,x), _(M,T), _(M,TR), + /* 0CC8 */ _(M,TR), _(x,x), _(M,TR), _(M,TR), _(M,T), _(V,T), _(x,x), _(x,x), + /* 0CD0 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(M,R), _(M,R), _(x,x), + /* 0CD8 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(C,x), _(x,x), + /* 0CE0 */ _(VI,x), _(VI,x), _(M,B), _(M,B), _(x,x), _(x,x), _(Nd,x), _(Nd,x), + /* 0CE8 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), + /* 0CF0 */ _(x,x),_(CWS,x),_(CWS,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), + /* 0CF8 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), + + /* Malayalam */ + + /* 0D00 */ _(Bi,T), _(Bi,T), _(Bi,R), _(Vs,R), _(Bi,x), _(VI,x), _(VI,x), _(VI,x), + /* 0D08 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(x,x), _(VI,x), _(VI,x), + /* 0D10 */ _(VI,x), _(x,x), _(VI,x), _(VI,x), _(VI,x), _(C,x), _(C,x), _(C,x), + /* 0D18 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 0D20 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 0D28 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 0D30 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 0D38 */ _(C,x), _(C,x), _(C,x), _(PK,T), _(PK,T), _(A,x), _(M,R), _(M,R), + /* 0D40 */ _(M,R), _(M,R), _(M,R), _(M,B), _(M,B), _(x,x), _(M,L), _(M,L), + /* 0D48 */ _(M,L), _(x,x), _(M,LR), _(M,LR), _(M,LR), _(V,T),_(CPR,T), _(x,x), + /* 0D50 */ _(x,x), _(x,x), _(x,x), _(x,x), _(CD,x), _(CD,x), _(CD,x), _(M,R), + /* 0D58 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(VI,x), + /* 0D60 */ _(VI,x), _(VI,x), _(M,B), _(M,B), _(x,x), _(x,x), _(Nd,x), _(Nd,x), + /* 0D68 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), + /* 0D70 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), + /* 0D78 */ _(x,x), _(x,x), _(CD,x), _(CD,x), _(CD,x), _(CD,x), _(CD,x), _(CD,x), + + /* Sinhala */ + + /* 0D80 */ _(x,x), _(Bi,T), _(Bi,R), _(Vs,R), _(x,x), _(VI,x), _(VI,x), _(VI,x), + /* 0D88 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), + /* 0D90 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(x,x), + /* 0D98 */ _(x,x), _(x,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 0DA0 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 0DA8 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 0DB0 */ _(C,x), _(C,x), _(x,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 0DB8 */ _(C,x), _(C,x), _(C,x), _(C,x), _(x,x), _(C,x), _(x,x), _(x,x), + /* 0DC0 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(x,x), + /* 0DC8 */ _(x,x), _(x,x), _(V,T), _(x,x), _(x,x), _(x,x), _(x,x), _(M,R), + /* 0DD0 */ _(M,R), _(M,R), _(M,T), _(M,T), _(M,B), _(x,x), _(M,B), _(x,x), + /* 0DD8 */ _(M,R), _(M,L), _(M,TL), _(M,L), _(M,LR),_(M,TLR), _(M,LR), _(M,R), + /* 0DE0 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(Nd,x), _(Nd,x), + /* 0DE8 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), + /* 0DF0 */ _(x,x), _(x,x), _(M,R), _(M,R), _(x,x), _(x,x), _(x,x), _(x,x), + +#define indic_offset_0x1000u 1336 + + + /* Myanmar */ + + /* 1000 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 1008 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 1010 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 1018 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 1020 */ _(C,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), + /* 1028 */ _(VI,x), _(VI,x), _(VI,x), _(M,R), _(M,R), _(M,T), _(M,T), _(M,B), + /* 1030 */ _(M,B), _(M,L), _(M,T), _(M,T), _(M,T), _(M,T), _(Bi,T), _(TM,B), + /* 1038 */ _(Vs,R), _(IS,x), _(PK,T), _(CM,R),_(CM,TBL), _(CM,B), _(CM,B), _(C,x), + /* 1040 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), + /* 1048 */ _(Nd,x), _(Nd,x), _(x,x), _(CP,x), _(x,x), _(x,x), _(CP,x), _(x,x), + /* 1050 */ _(C,x), _(C,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(M,R), _(M,R), + /* 1058 */ _(M,B), _(M,B), _(C,x), _(C,x), _(C,x), _(C,x), _(CM,B), _(CM,B), + /* 1060 */ _(CM,B), _(C,x), _(M,R), _(TM,R), _(TM,R), _(C,x), _(C,x), _(M,R), + /* 1068 */ _(M,R), _(TM,R), _(TM,R), _(TM,R), _(TM,R), _(TM,R), _(C,x), _(C,x), + /* 1070 */ _(C,x), _(M,T), _(M,T), _(M,T), _(M,T), _(C,x), _(C,x), _(C,x), + /* 1078 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 1080 */ _(C,x), _(C,x), _(CM,B), _(M,R), _(M,L), _(M,T), _(M,T), _(TM,R), + /* 1088 */ _(TM,R), _(TM,R), _(TM,R), _(TM,R), _(TM,R), _(TM,B), _(C,x), _(TM,R), + /* 1090 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), + /* 1098 */ _(Nd,x), _(Nd,x), _(TM,R), _(TM,R), _(M,R), _(M,T), _(x,x), _(x,x), + +#define indic_offset_0x1780u 1496 + + + /* Khmer */ + + /* 1780 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 1788 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 1790 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 1798 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* 17A0 */ _(C,x), _(C,x), _(C,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), + /* 17A8 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(VI,x), + /* 17B0 */ _(VI,x), _(VI,x), _(VI,x), _(VI,x), _(x,x), _(x,x), _(M,R), _(M,T), + /* 17B8 */ _(M,T), _(M,T), _(M,T), _(M,B), _(M,B), _(M,B), _(M,TL),_(M,TLR), + /* 17C0 */ _(M,LR), _(M,L), _(M,L), _(M,L), _(M,LR), _(M,LR), _(Bi,T), _(Vs,R), + /* 17C8 */ _(M,R), _(RS,T), _(RS,T), _(SM,T),_(CSR,T), _(CK,T), _(SM,T), _(SM,T), + /* 17D0 */ _(SM,T), _(PK,T), _(IS,x), _(SM,T), _(x,x), _(x,x), _(x,x), _(x,x), + /* 17D8 */ _(x,x), _(x,x), _(x,x), _(x,x), _(A,x), _(SM,T), _(x,x), _(x,x), + /* 17E0 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), + /* 17E8 */ _(Nd,x), _(Nd,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), + +#define indic_offset_0x1cd0u 1608 + + + /* Vedic Extensions */ + + /* 1CD0 */ _(Ca,T), _(Ca,T), _(Ca,T), _(x,x), _(Ca,O), _(Ca,B), _(Ca,B), _(Ca,B), + /* 1CD8 */ _(Ca,B), _(Ca,B), _(Ca,T), _(Ca,T), _(Ca,B), _(Ca,B), _(Ca,B), _(Ca,B), + /* 1CE0 */ _(Ca,T), _(Ca,R), _(x,O), _(x,O), _(x,O), _(x,O), _(x,O), _(x,O), + /* 1CE8 */ _(x,O), _(x,x), _(x,x), _(x,x), _(x,x), _(x,B), _(x,x), _(x,x), + /* 1CF0 */ _(x,x), _(x,x), _(CD,x), _(CD,x), _(Ca,T),_(CWS,x),_(CWS,x), _(Ca,R), + /* 1CF8 */ _(Ca,x), _(Ca,x), _(CP,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), + +#define indic_offset_0x2008u 1656 + + + /* General Punctuation */ + + /* 2008 */ _(x,x), _(x,x), _(x,x), _(x,x),_(ZWNJ,x),_(ZWJ,x), _(x,x), _(x,x), + /* 2010 */ _(CP,x), _(CP,x), _(CP,x), _(CP,x), _(CP,x), _(x,x), _(x,x), _(x,x), + +#define indic_offset_0x2070u 1672 + + + /* Superscripts and Subscripts */ + + /* 2070 */ _(x,x), _(x,x), _(x,x), _(x,x), _(SM,x), _(x,x), _(x,x), _(x,x), + /* 2078 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), + /* 2080 */ _(x,x), _(x,x), _(SM,x), _(SM,x), _(SM,x), _(x,x), _(x,x), _(x,x), + +#define indic_offset_0xa8e0u 1696 + + + /* Devanagari Extended */ + + /* A8E0 */ _(Ca,T), _(Ca,T), _(Ca,T), _(Ca,T), _(Ca,T), _(Ca,T), _(Ca,T), _(Ca,T), + /* A8E8 */ _(Ca,T), _(Ca,T), _(Ca,T), _(Ca,T), _(Ca,T), _(Ca,T), _(Ca,T), _(Ca,T), + /* A8F0 */ _(Ca,T), _(Ca,T), _(Bi,x), _(Bi,x), _(x,x), _(x,x), _(x,x), _(x,x), + /* A8F8 */ _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(x,x), _(VI,x), _(M,T), + +#define indic_offset_0xa9e0u 1728 + + + /* Myanmar Extended-B */ + + /* A9E0 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(M,T), _(x,x), _(C,x), + /* A9E8 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* A9F0 */ _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), _(Nd,x), + /* A9F8 */ _(Nd,x), _(Nd,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(x,x), + +#define indic_offset_0xaa60u 1760 + + + /* Myanmar Extended-A */ + + /* AA60 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* AA68 */ _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), _(C,x), + /* AA70 */ _(x,x), _(C,x), _(C,x), _(C,x), _(CP,x), _(CP,x), _(CP,x), _(x,x), + /* AA78 */ _(x,x), _(x,x), _(C,x), _(TM,R), _(TM,T), _(TM,R), _(C,x), _(C,x), + +}; /* Table items: 1792; occupancy: 70% */ + +INDIC_TABLE_ELEMENT_TYPE +hb_indic_get_categories (hb_codepoint_t u) +{ + switch (u >> 12) + { + case 0x0u: + if (unlikely (u == 0x00A0u)) return _(CP,x); + if (hb_in_range<hb_codepoint_t> (u, 0x0028u, 0x003Fu)) return indic_table[u - 0x0028u + indic_offset_0x0028u]; + if (hb_in_range<hb_codepoint_t> (u, 0x00B0u, 0x00D7u)) return indic_table[u - 0x00B0u + indic_offset_0x00b0u]; + if (hb_in_range<hb_codepoint_t> (u, 0x0900u, 0x0DF7u)) return indic_table[u - 0x0900u + indic_offset_0x0900u]; + break; + + case 0x1u: + if (hb_in_range<hb_codepoint_t> (u, 0x1000u, 0x109Fu)) return indic_table[u - 0x1000u + indic_offset_0x1000u]; + if (hb_in_range<hb_codepoint_t> (u, 0x1780u, 0x17EFu)) return indic_table[u - 0x1780u + indic_offset_0x1780u]; + if (hb_in_range<hb_codepoint_t> (u, 0x1CD0u, 0x1CFFu)) return indic_table[u - 0x1CD0u + indic_offset_0x1cd0u]; + break; + + case 0x2u: + if (unlikely (u == 0x25CCu)) return _(CP,x); + if (hb_in_range<hb_codepoint_t> (u, 0x2008u, 0x2017u)) return indic_table[u - 0x2008u + indic_offset_0x2008u]; + if (hb_in_range<hb_codepoint_t> (u, 0x2070u, 0x2087u)) return indic_table[u - 0x2070u + indic_offset_0x2070u]; + break; + + case 0xAu: + if (hb_in_range<hb_codepoint_t> (u, 0xA8E0u, 0xA8FFu)) return indic_table[u - 0xA8E0u + indic_offset_0xa8e0u]; + if (hb_in_range<hb_codepoint_t> (u, 0xA9E0u, 0xA9FFu)) return indic_table[u - 0xA9E0u + indic_offset_0xa9e0u]; + if (hb_in_range<hb_codepoint_t> (u, 0xAA60u, 0xAA7Fu)) return indic_table[u - 0xAA60u + indic_offset_0xaa60u]; + break; + + default: + break; + } + return _(x,x); +} + +#undef _ + +#undef ISC_A +#undef ISC_Bi +#undef ISC_BJN +#undef ISC_Ca +#undef ISC_C +#undef ISC_CD +#undef ISC_CF +#undef ISC_CHL +#undef ISC_CIP +#undef ISC_CK +#undef ISC_CM +#undef ISC_CP +#undef ISC_CPR +#undef ISC_CPrf +#undef ISC_CS +#undef ISC_CSR +#undef ISC_CWS +#undef ISC_GM +#undef ISC_IS +#undef ISC_ZWJ +#undef ISC_ML +#undef ISC_ZWNJ +#undef ISC_N +#undef ISC_Nd +#undef ISC_NJ +#undef ISC_x +#undef ISC_PK +#undef ISC_RS +#undef ISC_SM +#undef ISC_TL +#undef ISC_TM +#undef ISC_V +#undef ISC_Vs +#undef ISC_Vo +#undef ISC_M +#undef ISC_VI + +#undef IMC_B +#undef IMC_BL +#undef IMC_BR +#undef IMC_L +#undef IMC_LR +#undef IMC_x +#undef IMC_O +#undef IMC_R +#undef IMC_T +#undef IMC_TB +#undef IMC_TBL +#undef IMC_TBR +#undef IMC_TL +#undef IMC_TLR +#undef IMC_TR +#undef IMC_VOL + +#endif + +/* == End of generated table == */ diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-complex-indic.cc b/thirdparty/harfbuzz/src/hb-ot-shape-complex-indic.cc new file mode 100644 index 0000000000..34972f81e2 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-shape-complex-indic.cc @@ -0,0 +1,1615 @@ +/* + * Copyright © 2011,2012 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#include "hb.hh" + +#ifndef HB_NO_OT_SHAPE + +#include "hb-ot-shape-complex-indic.hh" +#include "hb-ot-shape-complex-vowel-constraints.hh" +#include "hb-ot-layout.hh" + + +/* + * Indic shaper. + */ + + +/* + * Indic configurations. Note that we do not want to keep every single script-specific + * behavior in these tables necessarily. This should mainly be used for per-script + * properties that are cheaper keeping here, than in the code. Ie. if, say, one and + * only one script has an exception, that one script can be if'ed directly in the code, + * instead of adding a new flag in these structs. + */ + +enum base_position_t { + BASE_POS_LAST_SINHALA, + BASE_POS_LAST +}; +enum reph_position_t { + REPH_POS_AFTER_MAIN = POS_AFTER_MAIN, + REPH_POS_BEFORE_SUB = POS_BEFORE_SUB, + REPH_POS_AFTER_SUB = POS_AFTER_SUB, + REPH_POS_BEFORE_POST = POS_BEFORE_POST, + REPH_POS_AFTER_POST = POS_AFTER_POST +}; +enum reph_mode_t { + REPH_MODE_IMPLICIT, /* Reph formed out of initial Ra,H sequence. */ + REPH_MODE_EXPLICIT, /* Reph formed out of initial Ra,H,ZWJ sequence. */ + REPH_MODE_LOG_REPHA /* Encoded Repha character, needs reordering. */ +}; +enum blwf_mode_t { + BLWF_MODE_PRE_AND_POST, /* Below-forms feature applied to pre-base and post-base. */ + BLWF_MODE_POST_ONLY /* Below-forms feature applied to post-base only. */ +}; +struct indic_config_t +{ + hb_script_t script; + bool has_old_spec; + hb_codepoint_t virama; + base_position_t base_pos; + reph_position_t reph_pos; + reph_mode_t reph_mode; + blwf_mode_t blwf_mode; +}; + +static const indic_config_t indic_configs[] = +{ + /* Default. Should be first. */ + {HB_SCRIPT_INVALID, false, 0,BASE_POS_LAST, REPH_POS_BEFORE_POST,REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST}, + {HB_SCRIPT_DEVANAGARI,true, 0x094Du,BASE_POS_LAST, REPH_POS_BEFORE_POST,REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST}, + {HB_SCRIPT_BENGALI, true, 0x09CDu,BASE_POS_LAST, REPH_POS_AFTER_SUB, REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST}, + {HB_SCRIPT_GURMUKHI, true, 0x0A4Du,BASE_POS_LAST, REPH_POS_BEFORE_SUB, REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST}, + {HB_SCRIPT_GUJARATI, true, 0x0ACDu,BASE_POS_LAST, REPH_POS_BEFORE_POST,REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST}, + {HB_SCRIPT_ORIYA, true, 0x0B4Du,BASE_POS_LAST, REPH_POS_AFTER_MAIN, REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST}, + {HB_SCRIPT_TAMIL, true, 0x0BCDu,BASE_POS_LAST, REPH_POS_AFTER_POST, REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST}, + {HB_SCRIPT_TELUGU, true, 0x0C4Du,BASE_POS_LAST, REPH_POS_AFTER_POST, REPH_MODE_EXPLICIT, BLWF_MODE_POST_ONLY}, + {HB_SCRIPT_KANNADA, true, 0x0CCDu,BASE_POS_LAST, REPH_POS_AFTER_POST, REPH_MODE_IMPLICIT, BLWF_MODE_POST_ONLY}, + {HB_SCRIPT_MALAYALAM, true, 0x0D4Du,BASE_POS_LAST, REPH_POS_AFTER_MAIN, REPH_MODE_LOG_REPHA,BLWF_MODE_PRE_AND_POST}, + {HB_SCRIPT_SINHALA, false,0x0DCAu,BASE_POS_LAST_SINHALA, + REPH_POS_AFTER_POST, REPH_MODE_EXPLICIT, BLWF_MODE_PRE_AND_POST}, +}; + + + +/* + * Indic shaper. + */ + +static const hb_ot_map_feature_t +indic_features[] = +{ + /* + * Basic features. + * These features are applied in order, one at a time, after initial_reordering. + */ + {HB_TAG('n','u','k','t'), F_GLOBAL_MANUAL_JOINERS}, + {HB_TAG('a','k','h','n'), F_GLOBAL_MANUAL_JOINERS}, + {HB_TAG('r','p','h','f'), F_MANUAL_JOINERS}, + {HB_TAG('r','k','r','f'), F_GLOBAL_MANUAL_JOINERS}, + {HB_TAG('p','r','e','f'), F_MANUAL_JOINERS}, + {HB_TAG('b','l','w','f'), F_MANUAL_JOINERS}, + {HB_TAG('a','b','v','f'), F_MANUAL_JOINERS}, + {HB_TAG('h','a','l','f'), F_MANUAL_JOINERS}, + {HB_TAG('p','s','t','f'), F_MANUAL_JOINERS}, + {HB_TAG('v','a','t','u'), F_GLOBAL_MANUAL_JOINERS}, + {HB_TAG('c','j','c','t'), F_GLOBAL_MANUAL_JOINERS}, + /* + * Other features. + * These features are applied all at once, after final_reordering + * but before clearing syllables. + * Default Bengali font in Windows for example has intermixed + * lookups for init,pres,abvs,blws features. + */ + {HB_TAG('i','n','i','t'), F_MANUAL_JOINERS}, + {HB_TAG('p','r','e','s'), F_GLOBAL_MANUAL_JOINERS}, + {HB_TAG('a','b','v','s'), F_GLOBAL_MANUAL_JOINERS}, + {HB_TAG('b','l','w','s'), F_GLOBAL_MANUAL_JOINERS}, + {HB_TAG('p','s','t','s'), F_GLOBAL_MANUAL_JOINERS}, + {HB_TAG('h','a','l','n'), F_GLOBAL_MANUAL_JOINERS}, +}; + +/* + * Must be in the same order as the indic_features array. + */ +enum { + _INDIC_NUKT, + _INDIC_AKHN, + INDIC_RPHF, + _INDIC_RKRF, + INDIC_PREF, + INDIC_BLWF, + INDIC_ABVF, + INDIC_HALF, + INDIC_PSTF, + _INDIC_VATU, + _INDIC_CJCT, + + INDIC_INIT, + _INDIC_PRES, + _INDIC_ABVS, + _INDIC_BLWS, + _INDIC_PSTS, + _INDIC_HALN, + + INDIC_NUM_FEATURES, + INDIC_BASIC_FEATURES = INDIC_INIT, /* Don't forget to update this! */ +}; + +static void +setup_syllables_indic (const hb_ot_shape_plan_t *plan, + hb_font_t *font, + hb_buffer_t *buffer); +static void +initial_reordering_indic (const hb_ot_shape_plan_t *plan, + hb_font_t *font, + hb_buffer_t *buffer); +static void +final_reordering_indic (const hb_ot_shape_plan_t *plan, + hb_font_t *font, + hb_buffer_t *buffer); + +static void +collect_features_indic (hb_ot_shape_planner_t *plan) +{ + hb_ot_map_builder_t *map = &plan->map; + + /* Do this before any lookups have been applied. */ + map->add_gsub_pause (setup_syllables_indic); + + map->enable_feature (HB_TAG('l','o','c','l')); + /* The Indic specs do not require ccmp, but we apply it here since if + * there is a use of it, it's typically at the beginning. */ + map->enable_feature (HB_TAG('c','c','m','p')); + + + unsigned int i = 0; + map->add_gsub_pause (initial_reordering_indic); + + for (; i < INDIC_BASIC_FEATURES; i++) { + map->add_feature (indic_features[i]); + map->add_gsub_pause (nullptr); + } + + map->add_gsub_pause (final_reordering_indic); + + for (; i < INDIC_NUM_FEATURES; i++) + map->add_feature (indic_features[i]); + + map->enable_feature (HB_TAG('c','a','l','t')); + map->enable_feature (HB_TAG('c','l','i','g')); + + map->add_gsub_pause (_hb_clear_syllables); +} + +static void +override_features_indic (hb_ot_shape_planner_t *plan) +{ + plan->map.disable_feature (HB_TAG('l','i','g','a')); +} + + +struct indic_shape_plan_t +{ + bool load_virama_glyph (hb_font_t *font, hb_codepoint_t *pglyph) const + { + hb_codepoint_t glyph = virama_glyph.get_relaxed (); + if (unlikely (glyph == (hb_codepoint_t) -1)) + { + if (!config->virama || !font->get_nominal_glyph (config->virama, &glyph)) + glyph = 0; + /* Technically speaking, the spec says we should apply 'locl' to virama too. + * Maybe one day... */ + + /* Our get_nominal_glyph() function needs a font, so we can't get the virama glyph + * during shape planning... Instead, overwrite it here. */ + virama_glyph.set_relaxed ((int) glyph); + } + + *pglyph = glyph; + return glyph != 0; + } + + const indic_config_t *config; + + bool is_old_spec; +#ifndef HB_NO_UNISCRIBE_BUG_COMPATIBLE + bool uniscribe_bug_compatible; +#else + static constexpr bool uniscribe_bug_compatible = false; +#endif + mutable hb_atomic_int_t virama_glyph; + + hb_indic_would_substitute_feature_t rphf; + hb_indic_would_substitute_feature_t pref; + hb_indic_would_substitute_feature_t blwf; + hb_indic_would_substitute_feature_t pstf; + hb_indic_would_substitute_feature_t vatu; + + hb_mask_t mask_array[INDIC_NUM_FEATURES]; +}; + +static void * +data_create_indic (const hb_ot_shape_plan_t *plan) +{ + indic_shape_plan_t *indic_plan = (indic_shape_plan_t *) calloc (1, sizeof (indic_shape_plan_t)); + if (unlikely (!indic_plan)) + return nullptr; + + indic_plan->config = &indic_configs[0]; + for (unsigned int i = 1; i < ARRAY_LENGTH (indic_configs); i++) + if (plan->props.script == indic_configs[i].script) { + indic_plan->config = &indic_configs[i]; + break; + } + + indic_plan->is_old_spec = indic_plan->config->has_old_spec && ((plan->map.chosen_script[0] & 0x000000FFu) != '2'); +#ifndef HB_NO_UNISCRIBE_BUG_COMPATIBLE + indic_plan->uniscribe_bug_compatible = hb_options ().uniscribe_bug_compatible; +#endif + indic_plan->virama_glyph.set_relaxed (-1); + + /* Use zero-context would_substitute() matching for new-spec of the main + * Indic scripts, and scripts with one spec only, but not for old-specs. + * The new-spec for all dual-spec scripts says zero-context matching happens. + * + * However, testing with Malayalam shows that old and new spec both allow + * context. Testing with Bengali new-spec however shows that it doesn't. + * So, the heuristic here is the way it is. It should *only* be changed, + * as we discover more cases of what Windows does. DON'T TOUCH OTHERWISE. + */ + bool zero_context = !indic_plan->is_old_spec && plan->props.script != HB_SCRIPT_MALAYALAM; + indic_plan->rphf.init (&plan->map, HB_TAG('r','p','h','f'), zero_context); + indic_plan->pref.init (&plan->map, HB_TAG('p','r','e','f'), zero_context); + indic_plan->blwf.init (&plan->map, HB_TAG('b','l','w','f'), zero_context); + indic_plan->pstf.init (&plan->map, HB_TAG('p','s','t','f'), zero_context); + indic_plan->vatu.init (&plan->map, HB_TAG('v','a','t','u'), zero_context); + + for (unsigned int i = 0; i < ARRAY_LENGTH (indic_plan->mask_array); i++) + indic_plan->mask_array[i] = (indic_features[i].flags & F_GLOBAL) ? + 0 : plan->map.get_1_mask (indic_features[i].tag); + + return indic_plan; +} + +static void +data_destroy_indic (void *data) +{ + free (data); +} + +static indic_position_t +consonant_position_from_face (const indic_shape_plan_t *indic_plan, + const hb_codepoint_t consonant, + const hb_codepoint_t virama, + hb_face_t *face) +{ + /* For old-spec, the order of glyphs is Consonant,Virama, + * whereas for new-spec, it's Virama,Consonant. However, + * some broken fonts (like Free Sans) simply copied lookups + * from old-spec to new-spec without modification. + * And oddly enough, Uniscribe seems to respect those lookups. + * Eg. in the sequence U+0924,U+094D,U+0930, Uniscribe finds + * base at 0. The font however, only has lookups matching + * 930,94D in 'blwf', not the expected 94D,930 (with new-spec + * table). As such, we simply match both sequences. Seems + * to work. + * + * Vatu is done as well, for: + * https://github.com/harfbuzz/harfbuzz/issues/1587 + */ + hb_codepoint_t glyphs[3] = {virama, consonant, virama}; + if (indic_plan->blwf.would_substitute (glyphs , 2, face) || + indic_plan->blwf.would_substitute (glyphs+1, 2, face) || + indic_plan->vatu.would_substitute (glyphs , 2, face) || + indic_plan->vatu.would_substitute (glyphs+1, 2, face)) + return POS_BELOW_C; + if (indic_plan->pstf.would_substitute (glyphs , 2, face) || + indic_plan->pstf.would_substitute (glyphs+1, 2, face)) + return POS_POST_C; + if (indic_plan->pref.would_substitute (glyphs , 2, face) || + indic_plan->pref.would_substitute (glyphs+1, 2, face)) + return POS_POST_C; + return POS_BASE_C; +} + + +enum indic_syllable_type_t { + indic_consonant_syllable, + indic_vowel_syllable, + indic_standalone_cluster, + indic_symbol_cluster, + indic_broken_cluster, + indic_non_indic_cluster, +}; + +#include "hb-ot-shape-complex-indic-machine.hh" + + +static void +setup_masks_indic (const hb_ot_shape_plan_t *plan HB_UNUSED, + hb_buffer_t *buffer, + hb_font_t *font HB_UNUSED) +{ + HB_BUFFER_ALLOCATE_VAR (buffer, indic_category); + HB_BUFFER_ALLOCATE_VAR (buffer, indic_position); + + /* We cannot setup masks here. We save information about characters + * and setup masks later on in a pause-callback. */ + + unsigned int count = buffer->len; + hb_glyph_info_t *info = buffer->info; + for (unsigned int i = 0; i < count; i++) + set_indic_properties (info[i]); +} + +static void +setup_syllables_indic (const hb_ot_shape_plan_t *plan HB_UNUSED, + hb_font_t *font HB_UNUSED, + hb_buffer_t *buffer) +{ + find_syllables_indic (buffer); + foreach_syllable (buffer, start, end) + buffer->unsafe_to_break (start, end); +} + +static int +compare_indic_order (const hb_glyph_info_t *pa, const hb_glyph_info_t *pb) +{ + int a = pa->indic_position(); + int b = pb->indic_position(); + + return a < b ? -1 : a == b ? 0 : +1; +} + + + +static void +update_consonant_positions_indic (const hb_ot_shape_plan_t *plan, + hb_font_t *font, + hb_buffer_t *buffer) +{ + const indic_shape_plan_t *indic_plan = (const indic_shape_plan_t *) plan->data; + + if (indic_plan->config->base_pos != BASE_POS_LAST) + return; + + hb_codepoint_t virama; + if (indic_plan->load_virama_glyph (font, &virama)) + { + hb_face_t *face = font->face; + unsigned int count = buffer->len; + hb_glyph_info_t *info = buffer->info; + for (unsigned int i = 0; i < count; i++) + if (info[i].indic_position() == POS_BASE_C) + { + hb_codepoint_t consonant = info[i].codepoint; + info[i].indic_position() = consonant_position_from_face (indic_plan, consonant, virama, face); + } + } +} + + +/* Rules from: + * https://docs.microsqoft.com/en-us/typography/script-development/devanagari */ + +static void +initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan, + hb_face_t *face, + hb_buffer_t *buffer, + unsigned int start, unsigned int end) +{ + const indic_shape_plan_t *indic_plan = (const indic_shape_plan_t *) plan->data; + hb_glyph_info_t *info = buffer->info; + + /* https://github.com/harfbuzz/harfbuzz/issues/435#issuecomment-335560167 + * // For compatibility with legacy usage in Kannada, + * // Ra+h+ZWJ must behave like Ra+ZWJ+h... + */ + if (buffer->props.script == HB_SCRIPT_KANNADA && + start + 3 <= end && + is_one_of (info[start ], FLAG (OT_Ra)) && + is_one_of (info[start+1], FLAG (OT_H)) && + is_one_of (info[start+2], FLAG (OT_ZWJ))) + { + buffer->merge_clusters (start+1, start+3); + hb_glyph_info_t tmp = info[start+1]; + info[start+1] = info[start+2]; + info[start+2] = tmp; + } + + /* 1. Find base consonant: + * + * The shaping engine finds the base consonant of the syllable, using the + * following algorithm: starting from the end of the syllable, move backwards + * until a consonant is found that does not have a below-base or post-base + * form (post-base forms have to follow below-base forms), or that is not a + * pre-base-reordering Ra, or arrive at the first consonant. The consonant + * stopped at will be the base. + * + * o If the syllable starts with Ra + Halant (in a script that has Reph) + * and has more than one consonant, Ra is excluded from candidates for + * base consonants. + */ + + unsigned int base = end; + bool has_reph = false; + + { + /* -> If the syllable starts with Ra + Halant (in a script that has Reph) + * and has more than one consonant, Ra is excluded from candidates for + * base consonants. */ + unsigned int limit = start; + if (indic_plan->mask_array[INDIC_RPHF] && + start + 3 <= end && + ( + (indic_plan->config->reph_mode == REPH_MODE_IMPLICIT && !is_joiner (info[start + 2])) || + (indic_plan->config->reph_mode == REPH_MODE_EXPLICIT && info[start + 2].indic_category() == OT_ZWJ) + )) + { + /* See if it matches the 'rphf' feature. */ + hb_codepoint_t glyphs[3] = {info[start].codepoint, + info[start + 1].codepoint, + indic_plan->config->reph_mode == REPH_MODE_EXPLICIT ? + info[start + 2].codepoint : 0}; + if (indic_plan->rphf.would_substitute (glyphs, 2, face) || + (indic_plan->config->reph_mode == REPH_MODE_EXPLICIT && + indic_plan->rphf.would_substitute (glyphs, 3, face))) + { + limit += 2; + while (limit < end && is_joiner (info[limit])) + limit++; + base = start; + has_reph = true; + } + } else if (indic_plan->config->reph_mode == REPH_MODE_LOG_REPHA && info[start].indic_category() == OT_Repha) + { + limit += 1; + while (limit < end && is_joiner (info[limit])) + limit++; + base = start; + has_reph = true; + } + + switch (indic_plan->config->base_pos) + { + case BASE_POS_LAST: + { + /* -> starting from the end of the syllable, move backwards */ + unsigned int i = end; + bool seen_below = false; + do { + i--; + /* -> until a consonant is found */ + if (is_consonant (info[i])) + { + /* -> that does not have a below-base or post-base form + * (post-base forms have to follow below-base forms), */ + if (info[i].indic_position() != POS_BELOW_C && + (info[i].indic_position() != POS_POST_C || seen_below)) + { + base = i; + break; + } + if (info[i].indic_position() == POS_BELOW_C) + seen_below = true; + + /* -> or that is not a pre-base-reordering Ra, + * + * IMPLEMENTATION NOTES: + * + * Our pre-base-reordering Ra's are marked POS_POST_C, so will be skipped + * by the logic above already. + */ + + /* -> or arrive at the first consonant. The consonant stopped at will + * be the base. */ + base = i; + } + else + { + /* A ZWJ after a Halant stops the base search, and requests an explicit + * half form. + * A ZWJ before a Halant, requests a subjoined form instead, and hence + * search continues. This is particularly important for Bengali + * sequence Ra,H,Ya that should form Ya-Phalaa by subjoining Ya. */ + if (start < i && + info[i].indic_category() == OT_ZWJ && + info[i - 1].indic_category() == OT_H) + break; + } + } while (i > limit); + } + break; + + case BASE_POS_LAST_SINHALA: + { + /* Sinhala base positioning is slightly different from main Indic, in that: + * 1. Its ZWJ behavior is different, + * 2. We don't need to look into the font for consonant positions. + */ + + if (!has_reph) + base = limit; + + /* Find the last base consonant that is not blocked by ZWJ. If there is + * a ZWJ right before a base consonant, that would request a subjoined form. */ + for (unsigned int i = limit; i < end; i++) + if (is_consonant (info[i])) + { + if (limit < i && info[i - 1].indic_category() == OT_ZWJ) + break; + else + base = i; + } + + /* Mark all subsequent consonants as below. */ + for (unsigned int i = base + 1; i < end; i++) + if (is_consonant (info[i])) + info[i].indic_position() = POS_BELOW_C; + } + break; + } + + /* -> If the syllable starts with Ra + Halant (in a script that has Reph) + * and has more than one consonant, Ra is excluded from candidates for + * base consonants. + * + * Only do this for unforced Reph. (ie. not for Ra,H,ZWJ. */ + if (has_reph && base == start && limit - base <= 2) { + /* Have no other consonant, so Reph is not formed and Ra becomes base. */ + has_reph = false; + } + } + + + /* 2. Decompose and reorder Matras: + * + * Each matra and any syllable modifier sign in the syllable are moved to the + * appropriate position relative to the consonant(s) in the syllable. The + * shaping engine decomposes two- or three-part matras into their constituent + * parts before any repositioning. Matra characters are classified by which + * consonant in a conjunct they have affinity for and are reordered to the + * following positions: + * + * o Before first half form in the syllable + * o After subjoined consonants + * o After post-form consonant + * o After main consonant (for above marks) + * + * IMPLEMENTATION NOTES: + * + * The normalize() routine has already decomposed matras for us, so we don't + * need to worry about that. + */ + + + /* 3. Reorder marks to canonical order: + * + * Adjacent nukta and halant or nukta and vedic sign are always repositioned + * if necessary, so that the nukta is first. + * + * IMPLEMENTATION NOTES: + * + * We don't need to do this: the normalize() routine already did this for us. + */ + + + /* Reorder characters */ + + for (unsigned int i = start; i < base; i++) + info[i].indic_position() = hb_min (POS_PRE_C, (indic_position_t) info[i].indic_position()); + + if (base < end) + info[base].indic_position() = POS_BASE_C; + + /* Mark final consonants. A final consonant is one appearing after a matra. + * Happens in Sinhala. */ + for (unsigned int i = base + 1; i < end; i++) + if (info[i].indic_category() == OT_M) { + for (unsigned int j = i + 1; j < end; j++) + if (is_consonant (info[j])) { + info[j].indic_position() = POS_FINAL_C; + break; + } + break; + } + + /* Handle beginning Ra */ + if (has_reph) + info[start].indic_position() = POS_RA_TO_BECOME_REPH; + + /* For old-style Indic script tags, move the first post-base Halant after + * last consonant. + * + * Reports suggest that in some scripts Uniscribe does this only if there + * is *not* a Halant after last consonant already. We know that is the + * case for Kannada, while it reorders unconditionally in other scripts, + * eg. Malayalam, Bengali, and Devanagari. We don't currently know about + * other scripts, so we block Kannada. + * + * Kannada test case: + * U+0C9A,U+0CCD,U+0C9A,U+0CCD + * With some versions of Lohit Kannada. + * https://bugs.freedesktop.org/show_bug.cgi?id=59118 + * + * Malayalam test case: + * U+0D38,U+0D4D,U+0D31,U+0D4D,U+0D31,U+0D4D + * With lohit-ttf-20121122/Lohit-Malayalam.ttf + * + * Bengali test case: + * U+0998,U+09CD,U+09AF,U+09CD + * With Windows XP vrinda.ttf + * https://github.com/harfbuzz/harfbuzz/issues/1073 + * + * Devanagari test case: + * U+091F,U+094D,U+0930,U+094D + * With chandas.ttf + * https://github.com/harfbuzz/harfbuzz/issues/1071 + */ + if (indic_plan->is_old_spec) + { + bool disallow_double_halants = buffer->props.script == HB_SCRIPT_KANNADA; + for (unsigned int i = base + 1; i < end; i++) + if (info[i].indic_category() == OT_H) + { + unsigned int j; + for (j = end - 1; j > i; j--) + if (is_consonant (info[j]) || + (disallow_double_halants && info[j].indic_category() == OT_H)) + break; + if (info[j].indic_category() != OT_H && j > i) { + /* Move Halant to after last consonant. */ + hb_glyph_info_t t = info[i]; + memmove (&info[i], &info[i + 1], (j - i) * sizeof (info[0])); + info[j] = t; + } + break; + } + } + + /* Attach misc marks to previous char to move with them. */ + { + indic_position_t last_pos = POS_START; + for (unsigned int i = start; i < end; i++) + { + if ((FLAG_UNSAFE (info[i].indic_category()) & (JOINER_FLAGS | FLAG (OT_N) | FLAG (OT_RS) | MEDIAL_FLAGS | FLAG (OT_H)))) + { + info[i].indic_position() = last_pos; + if (unlikely (info[i].indic_category() == OT_H && + info[i].indic_position() == POS_PRE_M)) + { + /* + * Uniscribe doesn't move the Halant with Left Matra. + * TEST: U+092B,U+093F,U+094DE + * We follow. This is important for the Sinhala + * U+0DDA split matra since it decomposes to U+0DD9,U+0DCA + * where U+0DD9 is a left matra and U+0DCA is the virama. + * We don't want to move the virama with the left matra. + * TEST: U+0D9A,U+0DDA + */ + for (unsigned int j = i; j > start; j--) + if (info[j - 1].indic_position() != POS_PRE_M) { + info[i].indic_position() = info[j - 1].indic_position(); + break; + } + } + } else if (info[i].indic_position() != POS_SMVD) { + last_pos = (indic_position_t) info[i].indic_position(); + } + } + } + /* For post-base consonants let them own anything before them + * since the last consonant or matra. */ + { + unsigned int last = base; + for (unsigned int i = base + 1; i < end; i++) + if (is_consonant (info[i])) + { + for (unsigned int j = last + 1; j < i; j++) + if (info[j].indic_position() < POS_SMVD) + info[j].indic_position() = info[i].indic_position(); + last = i; + } else if (info[i].indic_category() == OT_M) + last = i; + } + + + { + /* Use syllable() for sort accounting temporarily. */ + unsigned int syllable = info[start].syllable(); + for (unsigned int i = start; i < end; i++) + info[i].syllable() = i - start; + + /* Sit tight, rock 'n roll! */ + hb_stable_sort (info + start, end - start, compare_indic_order); + /* Find base again */ + base = end; + for (unsigned int i = start; i < end; i++) + if (info[i].indic_position() == POS_BASE_C) + { + base = i; + break; + } + /* Things are out-of-control for post base positions, they may shuffle + * around like crazy. In old-spec mode, we move halants around, so in + * that case merge all clusters after base. Otherwise, check the sort + * order and merge as needed. + * For pre-base stuff, we handle cluster issues in final reordering. + * + * We could use buffer->sort() for this, if there was no special + * reordering of pre-base stuff happening later... + * We don't want to merge_clusters all of that, which buffer->sort() + * would. + */ + if (indic_plan->is_old_spec || end - start > 127) + buffer->merge_clusters (base, end); + else + { + /* Note! syllable() is a one-byte field. */ + for (unsigned int i = base; i < end; i++) + if (info[i].syllable() != 255) + { + unsigned int max = i; + unsigned int j = start + info[i].syllable(); + while (j != i) + { + max = hb_max (max, j); + unsigned int next = start + info[j].syllable(); + info[j].syllable() = 255; /* So we don't process j later again. */ + j = next; + } + if (i != max) + buffer->merge_clusters (i, max + 1); + } + } + + /* Put syllable back in. */ + for (unsigned int i = start; i < end; i++) + info[i].syllable() = syllable; + } + + /* Setup masks now */ + + { + hb_mask_t mask; + + /* Reph */ + for (unsigned int i = start; i < end && info[i].indic_position() == POS_RA_TO_BECOME_REPH; i++) + info[i].mask |= indic_plan->mask_array[INDIC_RPHF]; + + /* Pre-base */ + mask = indic_plan->mask_array[INDIC_HALF]; + if (!indic_plan->is_old_spec && + indic_plan->config->blwf_mode == BLWF_MODE_PRE_AND_POST) + mask |= indic_plan->mask_array[INDIC_BLWF]; + for (unsigned int i = start; i < base; i++) + info[i].mask |= mask; + /* Base */ + mask = 0; + if (base < end) + info[base].mask |= mask; + /* Post-base */ + mask = indic_plan->mask_array[INDIC_BLWF] | + indic_plan->mask_array[INDIC_ABVF] | + indic_plan->mask_array[INDIC_PSTF]; + for (unsigned int i = base + 1; i < end; i++) + info[i].mask |= mask; + } + + if (indic_plan->is_old_spec && + buffer->props.script == HB_SCRIPT_DEVANAGARI) + { + /* Old-spec eye-lash Ra needs special handling. From the + * spec: + * + * "The feature 'below-base form' is applied to consonants + * having below-base forms and following the base consonant. + * The exception is vattu, which may appear below half forms + * as well as below the base glyph. The feature 'below-base + * form' will be applied to all such occurrences of Ra as well." + * + * Test case: U+0924,U+094D,U+0930,U+094d,U+0915 + * with Sanskrit 2003 font. + * + * However, note that Ra,Halant,ZWJ is the correct way to + * request eyelash form of Ra, so we wouldbn't inhibit it + * in that sequence. + * + * Test case: U+0924,U+094D,U+0930,U+094d,U+200D,U+0915 + */ + for (unsigned int i = start; i + 1 < base; i++) + if (info[i ].indic_category() == OT_Ra && + info[i+1].indic_category() == OT_H && + (i + 2 == base || + info[i+2].indic_category() != OT_ZWJ)) + { + info[i ].mask |= indic_plan->mask_array[INDIC_BLWF]; + info[i+1].mask |= indic_plan->mask_array[INDIC_BLWF]; + } + } + + unsigned int pref_len = 2; + if (indic_plan->mask_array[INDIC_PREF] && base + pref_len < end) + { + /* Find a Halant,Ra sequence and mark it for pre-base-reordering processing. */ + for (unsigned int i = base + 1; i + pref_len - 1 < end; i++) { + hb_codepoint_t glyphs[2]; + for (unsigned int j = 0; j < pref_len; j++) + glyphs[j] = info[i + j].codepoint; + if (indic_plan->pref.would_substitute (glyphs, pref_len, face)) + { + for (unsigned int j = 0; j < pref_len; j++) + info[i++].mask |= indic_plan->mask_array[INDIC_PREF]; + break; + } + } + } + + /* Apply ZWJ/ZWNJ effects */ + for (unsigned int i = start + 1; i < end; i++) + if (is_joiner (info[i])) { + bool non_joiner = info[i].indic_category() == OT_ZWNJ; + unsigned int j = i; + + do { + j--; + + /* ZWJ/ZWNJ should disable CJCT. They do that by simply + * being there, since we don't skip them for the CJCT + * feature (ie. F_MANUAL_ZWJ) */ + + /* A ZWNJ disables HALF. */ + if (non_joiner) + info[j].mask &= ~indic_plan->mask_array[INDIC_HALF]; + + } while (j > start && !is_consonant (info[j])); + } +} + +static void +initial_reordering_standalone_cluster (const hb_ot_shape_plan_t *plan, + hb_face_t *face, + hb_buffer_t *buffer, + unsigned int start, unsigned int end) +{ + /* We treat placeholder/dotted-circle as if they are consonants, so we + * should just chain. Only if not in compatibility mode that is... */ + + const indic_shape_plan_t *indic_plan = (const indic_shape_plan_t *) plan->data; + if (indic_plan->uniscribe_bug_compatible) + { + /* For dotted-circle, this is what Uniscribe does: + * If dotted-circle is the last glyph, it just does nothing. + * Ie. It doesn't form Reph. */ + if (buffer->info[end - 1].indic_category() == OT_DOTTEDCIRCLE) + return; + } + + initial_reordering_consonant_syllable (plan, face, buffer, start, end); +} + +static void +initial_reordering_syllable_indic (const hb_ot_shape_plan_t *plan, + hb_face_t *face, + hb_buffer_t *buffer, + unsigned int start, unsigned int end) +{ + indic_syllable_type_t syllable_type = (indic_syllable_type_t) (buffer->info[start].syllable() & 0x0F); + switch (syllable_type) + { + case indic_vowel_syllable: /* We made the vowels look like consonants. So let's call the consonant logic! */ + case indic_consonant_syllable: + initial_reordering_consonant_syllable (plan, face, buffer, start, end); + break; + + case indic_broken_cluster: /* We already inserted dotted-circles, so just call the standalone_cluster. */ + case indic_standalone_cluster: + initial_reordering_standalone_cluster (plan, face, buffer, start, end); + break; + + case indic_symbol_cluster: + case indic_non_indic_cluster: + break; + } +} + +static inline void +insert_dotted_circles_indic (const hb_ot_shape_plan_t *plan HB_UNUSED, + hb_font_t *font, + hb_buffer_t *buffer) +{ + if (unlikely (buffer->flags & HB_BUFFER_FLAG_DO_NOT_INSERT_DOTTED_CIRCLE)) + return; + + /* Note: This loop is extra overhead, but should not be measurable. + * TODO Use a buffer scratch flag to remove the loop. */ + bool has_broken_syllables = false; + unsigned int count = buffer->len; + hb_glyph_info_t *info = buffer->info; + for (unsigned int i = 0; i < count; i++) + if ((info[i].syllable() & 0x0F) == indic_broken_cluster) + { + has_broken_syllables = true; + break; + } + if (likely (!has_broken_syllables)) + return; + + + hb_codepoint_t dottedcircle_glyph; + if (!font->get_nominal_glyph (0x25CCu, &dottedcircle_glyph)) + return; + + hb_glyph_info_t dottedcircle = {0}; + dottedcircle.codepoint = 0x25CCu; + set_indic_properties (dottedcircle); + dottedcircle.codepoint = dottedcircle_glyph; + + buffer->clear_output (); + + buffer->idx = 0; + unsigned int last_syllable = 0; + while (buffer->idx < buffer->len && buffer->successful) + { + unsigned int syllable = buffer->cur().syllable(); + indic_syllable_type_t syllable_type = (indic_syllable_type_t) (syllable & 0x0F); + if (unlikely (last_syllable != syllable && syllable_type == indic_broken_cluster)) + { + last_syllable = syllable; + + hb_glyph_info_t ginfo = dottedcircle; + ginfo.cluster = buffer->cur().cluster; + ginfo.mask = buffer->cur().mask; + ginfo.syllable() = buffer->cur().syllable(); + + /* Insert dottedcircle after possible Repha. */ + while (buffer->idx < buffer->len && buffer->successful && + last_syllable == buffer->cur().syllable() && + buffer->cur().indic_category() == OT_Repha) + buffer->next_glyph (); + + buffer->output_info (ginfo); + } + else + buffer->next_glyph (); + } + buffer->swap_buffers (); +} + +static void +initial_reordering_indic (const hb_ot_shape_plan_t *plan, + hb_font_t *font, + hb_buffer_t *buffer) +{ + update_consonant_positions_indic (plan, font, buffer); + insert_dotted_circles_indic (plan, font, buffer); + + foreach_syllable (buffer, start, end) + initial_reordering_syllable_indic (plan, font->face, buffer, start, end); +} + +static void +final_reordering_syllable_indic (const hb_ot_shape_plan_t *plan, + hb_buffer_t *buffer, + unsigned int start, unsigned int end) +{ + const indic_shape_plan_t *indic_plan = (const indic_shape_plan_t *) plan->data; + hb_glyph_info_t *info = buffer->info; + + + /* This function relies heavily on halant glyphs. Lots of ligation + * and possibly multiple substitutions happened prior to this + * phase, and that might have messed up our properties. Recover + * from a particular case of that where we're fairly sure that a + * class of OT_H is desired but has been lost. */ + /* We don't call load_virama_glyph(), since we know it's already + * loaded. */ + hb_codepoint_t virama_glyph = indic_plan->virama_glyph.get_relaxed (); + if (virama_glyph) + { + for (unsigned int i = start; i < end; i++) + if (info[i].codepoint == virama_glyph && + _hb_glyph_info_ligated (&info[i]) && + _hb_glyph_info_multiplied (&info[i])) + { + /* This will make sure that this glyph passes is_halant() test. */ + info[i].indic_category() = OT_H; + _hb_glyph_info_clear_ligated_and_multiplied (&info[i]); + } + } + + + /* 4. Final reordering: + * + * After the localized forms and basic shaping forms GSUB features have been + * applied (see below), the shaping engine performs some final glyph + * reordering before applying all the remaining font features to the entire + * syllable. + */ + + bool try_pref = !!indic_plan->mask_array[INDIC_PREF]; + + /* Find base again */ + unsigned int base; + for (base = start; base < end; base++) + if (info[base].indic_position() >= POS_BASE_C) + { + if (try_pref && base + 1 < end) + { + for (unsigned int i = base + 1; i < end; i++) + if ((info[i].mask & indic_plan->mask_array[INDIC_PREF]) != 0) + { + if (!(_hb_glyph_info_substituted (&info[i]) && + _hb_glyph_info_ligated_and_didnt_multiply (&info[i]))) + { + /* Ok, this was a 'pref' candidate but didn't form any. + * Base is around here... */ + base = i; + while (base < end && is_halant (info[base])) + base++; + info[base].indic_position() = POS_BASE_C; + + try_pref = false; + } + break; + } + } + /* For Malayalam, skip over unformed below- (but NOT post-) forms. */ + if (buffer->props.script == HB_SCRIPT_MALAYALAM) + { + for (unsigned int i = base + 1; i < end; i++) + { + while (i < end && is_joiner (info[i])) + i++; + if (i == end || !is_halant (info[i])) + break; + i++; /* Skip halant. */ + while (i < end && is_joiner (info[i])) + i++; + if (i < end && is_consonant (info[i]) && info[i].indic_position() == POS_BELOW_C) + { + base = i; + info[base].indic_position() = POS_BASE_C; + } + } + } + + if (start < base && info[base].indic_position() > POS_BASE_C) + base--; + break; + } + if (base == end && start < base && + is_one_of (info[base - 1], FLAG (OT_ZWJ))) + base--; + if (base < end) + while (start < base && + is_one_of (info[base], (FLAG (OT_N) | FLAG (OT_H)))) + base--; + + + /* o Reorder matras: + * + * If a pre-base matra character had been reordered before applying basic + * features, the glyph can be moved closer to the main consonant based on + * whether half-forms had been formed. Actual position for the matra is + * defined as “after last standalone halant glyph, after initial matra + * position and before the main consonant”. If ZWJ or ZWNJ follow this + * halant, position is moved after it. + * + * IMPLEMENTATION NOTES: + * + * It looks like the last sentence is wrong. Testing, with Windows 7 Uniscribe + * and Devanagari shows that the behavior is best described as: + * + * "If ZWJ follows this halant, matra is NOT repositioned after this halant. + * If ZWNJ follows this halant, position is moved after it." + * + * Test case, with Adobe Devanagari or Nirmala UI: + * + * U+091F,U+094D,U+200C,U+092F,U+093F + * (Matra moves to the middle, after ZWNJ.) + * + * U+091F,U+094D,U+200D,U+092F,U+093F + * (Matra does NOT move, stays to the left.) + * + * https://github.com/harfbuzz/harfbuzz/issues/1070 + */ + + if (start + 1 < end && start < base) /* Otherwise there can't be any pre-base matra characters. */ + { + /* If we lost track of base, alas, position before last thingy. */ + unsigned int new_pos = base == end ? base - 2 : base - 1; + + /* Malayalam / Tamil do not have "half" forms or explicit virama forms. + * The glyphs formed by 'half' are Chillus or ligated explicit viramas. + * We want to position matra after them. + */ + if (buffer->props.script != HB_SCRIPT_MALAYALAM && buffer->props.script != HB_SCRIPT_TAMIL) + { + search: + while (new_pos > start && + !(is_one_of (info[new_pos], (FLAG (OT_M) | FLAG (OT_H))))) + new_pos--; + + /* If we found no Halant we are done. + * Otherwise only proceed if the Halant does + * not belong to the Matra itself! */ + if (is_halant (info[new_pos]) && + info[new_pos].indic_position() != POS_PRE_M) + { +#if 0 // See comment above + /* -> If ZWJ or ZWNJ follow this halant, position is moved after it. */ + if (new_pos + 1 < end && is_joiner (info[new_pos + 1])) + new_pos++; +#endif + if (new_pos + 1 < end) + { + /* -> If ZWJ follows this halant, matra is NOT repositioned after this halant. */ + if (info[new_pos + 1].indic_category() == OT_ZWJ) + { + /* Keep searching. */ + if (new_pos > start) + { + new_pos--; + goto search; + } + } + /* -> If ZWNJ follows this halant, position is moved after it. + * + * IMPLEMENTATION NOTES: + * + * This is taken care of by the state-machine. A Halant,ZWNJ is a terminating + * sequence for a consonant syllable; any pre-base matras occurring after it + * will belong to the subsequent syllable. + */ + } + } + else + new_pos = start; /* No move. */ + } + + if (start < new_pos && info[new_pos].indic_position () != POS_PRE_M) + { + /* Now go see if there's actually any matras... */ + for (unsigned int i = new_pos; i > start; i--) + if (info[i - 1].indic_position () == POS_PRE_M) + { + unsigned int old_pos = i - 1; + if (old_pos < base && base <= new_pos) /* Shouldn't actually happen. */ + base--; + + hb_glyph_info_t tmp = info[old_pos]; + memmove (&info[old_pos], &info[old_pos + 1], (new_pos - old_pos) * sizeof (info[0])); + info[new_pos] = tmp; + + /* Note: this merge_clusters() is intentionally *after* the reordering. + * Indic matra reordering is special and tricky... */ + buffer->merge_clusters (new_pos, hb_min (end, base + 1)); + + new_pos--; + } + } else { + for (unsigned int i = start; i < base; i++) + if (info[i].indic_position () == POS_PRE_M) { + buffer->merge_clusters (i, hb_min (end, base + 1)); + break; + } + } + } + + + /* o Reorder reph: + * + * Reph’s original position is always at the beginning of the syllable, + * (i.e. it is not reordered at the character reordering stage). However, + * it will be reordered according to the basic-forms shaping results. + * Possible positions for reph, depending on the script, are; after main, + * before post-base consonant forms, and after post-base consonant forms. + */ + + /* Two cases: + * + * - If repha is encoded as a sequence of characters (Ra,H or Ra,H,ZWJ), then + * we should only move it if the sequence ligated to the repha form. + * + * - If repha is encoded separately and in the logical position, we should only + * move it if it did NOT ligate. If it ligated, it's probably the font trying + * to make it work without the reordering. + */ + if (start + 1 < end && + info[start].indic_position() == POS_RA_TO_BECOME_REPH && + ((info[start].indic_category() == OT_Repha) ^ + _hb_glyph_info_ligated_and_didnt_multiply (&info[start]))) + { + unsigned int new_reph_pos; + reph_position_t reph_pos = indic_plan->config->reph_pos; + + /* 1. If reph should be positioned after post-base consonant forms, + * proceed to step 5. + */ + if (reph_pos == REPH_POS_AFTER_POST) + { + goto reph_step_5; + } + + /* 2. If the reph repositioning class is not after post-base: target + * position is after the first explicit halant glyph between the + * first post-reph consonant and last main consonant. If ZWJ or ZWNJ + * are following this halant, position is moved after it. If such + * position is found, this is the target position. Otherwise, + * proceed to the next step. + * + * Note: in old-implementation fonts, where classifications were + * fixed in shaping engine, there was no case where reph position + * will be found on this step. + */ + { + new_reph_pos = start + 1; + while (new_reph_pos < base && !is_halant (info[new_reph_pos])) + new_reph_pos++; + + if (new_reph_pos < base && is_halant (info[new_reph_pos])) + { + /* ->If ZWJ or ZWNJ are following this halant, position is moved after it. */ + if (new_reph_pos + 1 < base && is_joiner (info[new_reph_pos + 1])) + new_reph_pos++; + goto reph_move; + } + } + + /* 3. If reph should be repositioned after the main consonant: find the + * first consonant not ligated with main, or find the first + * consonant that is not a potential pre-base-reordering Ra. + */ + if (reph_pos == REPH_POS_AFTER_MAIN) + { + new_reph_pos = base; + while (new_reph_pos + 1 < end && info[new_reph_pos + 1].indic_position() <= POS_AFTER_MAIN) + new_reph_pos++; + if (new_reph_pos < end) + goto reph_move; + } + + /* 4. If reph should be positioned before post-base consonant, find + * first post-base classified consonant not ligated with main. If no + * consonant is found, the target position should be before the + * first matra, syllable modifier sign or vedic sign. + */ + /* This is our take on what step 4 is trying to say (and failing, BADLY). */ + if (reph_pos == REPH_POS_AFTER_SUB) + { + new_reph_pos = base; + while (new_reph_pos + 1 < end && + !( FLAG_UNSAFE (info[new_reph_pos + 1].indic_position()) & (FLAG (POS_POST_C) | FLAG (POS_AFTER_POST) | FLAG (POS_SMVD)))) + new_reph_pos++; + if (new_reph_pos < end) + goto reph_move; + } + + /* 5. If no consonant is found in steps 3 or 4, move reph to a position + * immediately before the first post-base matra, syllable modifier + * sign or vedic sign that has a reordering class after the intended + * reph position. For example, if the reordering position for reph + * is post-main, it will skip above-base matras that also have a + * post-main position. + */ + reph_step_5: + { + /* Copied from step 2. */ + new_reph_pos = start + 1; + while (new_reph_pos < base && !is_halant (info[new_reph_pos])) + new_reph_pos++; + + if (new_reph_pos < base && is_halant (info[new_reph_pos])) + { + /* ->If ZWJ or ZWNJ are following this halant, position is moved after it. */ + if (new_reph_pos + 1 < base && is_joiner (info[new_reph_pos + 1])) + new_reph_pos++; + goto reph_move; + } + } + /* See https://github.com/harfbuzz/harfbuzz/issues/2298#issuecomment-615318654 */ + + /* 6. Otherwise, reorder reph to the end of the syllable. + */ + { + new_reph_pos = end - 1; + while (new_reph_pos > start && info[new_reph_pos].indic_position() == POS_SMVD) + new_reph_pos--; + + /* + * If the Reph is to be ending up after a Matra,Halant sequence, + * position it before that Halant so it can interact with the Matra. + * However, if it's a plain Consonant,Halant we shouldn't do that. + * Uniscribe doesn't do this. + * TEST: U+0930,U+094D,U+0915,U+094B,U+094D + */ + if (!indic_plan->uniscribe_bug_compatible && + unlikely (is_halant (info[new_reph_pos]))) + { + for (unsigned int i = base + 1; i < new_reph_pos; i++) + if (info[i].indic_category() == OT_M) { + /* Ok, got it. */ + new_reph_pos--; + } + } + + goto reph_move; + } + + reph_move: + { + /* Move */ + buffer->merge_clusters (start, new_reph_pos + 1); + hb_glyph_info_t reph = info[start]; + memmove (&info[start], &info[start + 1], (new_reph_pos - start) * sizeof (info[0])); + info[new_reph_pos] = reph; + + if (start < base && base <= new_reph_pos) + base--; + } + } + + + /* o Reorder pre-base-reordering consonants: + * + * If a pre-base-reordering consonant is found, reorder it according to + * the following rules: + */ + + if (try_pref && base + 1 < end) /* Otherwise there can't be any pre-base-reordering Ra. */ + { + for (unsigned int i = base + 1; i < end; i++) + if ((info[i].mask & indic_plan->mask_array[INDIC_PREF]) != 0) + { + /* 1. Only reorder a glyph produced by substitution during application + * of the <pref> feature. (Note that a font may shape a Ra consonant with + * the feature generally but block it in certain contexts.) + */ + /* Note: We just check that something got substituted. We don't check that + * the <pref> feature actually did it... + * + * Reorder pref only if it ligated. */ + if (_hb_glyph_info_ligated_and_didnt_multiply (&info[i])) + { + /* + * 2. Try to find a target position the same way as for pre-base matra. + * If it is found, reorder pre-base consonant glyph. + * + * 3. If position is not found, reorder immediately before main + * consonant. + */ + + unsigned int new_pos = base; + /* Malayalam / Tamil do not have "half" forms or explicit virama forms. + * The glyphs formed by 'half' are Chillus or ligated explicit viramas. + * We want to position matra after them. + */ + if (buffer->props.script != HB_SCRIPT_MALAYALAM && buffer->props.script != HB_SCRIPT_TAMIL) + { + while (new_pos > start && + !(is_one_of (info[new_pos - 1], FLAG(OT_M) | FLAG (OT_H)))) + new_pos--; + } + + if (new_pos > start && is_halant (info[new_pos - 1])) + { + /* -> If ZWJ or ZWNJ follow this halant, position is moved after it. */ + if (new_pos < end && is_joiner (info[new_pos])) + new_pos++; + } + + { + unsigned int old_pos = i; + + buffer->merge_clusters (new_pos, old_pos + 1); + hb_glyph_info_t tmp = info[old_pos]; + memmove (&info[new_pos + 1], &info[new_pos], (old_pos - new_pos) * sizeof (info[0])); + info[new_pos] = tmp; + + if (new_pos <= base && base < old_pos) + base++; + } + } + + break; + } + } + + + /* Apply 'init' to the Left Matra if it's a word start. */ + if (info[start].indic_position () == POS_PRE_M) + { + if (!start || + !(FLAG_UNSAFE (_hb_glyph_info_get_general_category (&info[start - 1])) & + FLAG_RANGE (HB_UNICODE_GENERAL_CATEGORY_FORMAT, HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK))) + info[start].mask |= indic_plan->mask_array[INDIC_INIT]; + else + buffer->unsafe_to_break (start - 1, start + 1); + } + + + /* + * Finish off the clusters and go home! + */ + if (indic_plan->uniscribe_bug_compatible) + { + switch ((hb_tag_t) plan->props.script) + { + case HB_SCRIPT_TAMIL: + case HB_SCRIPT_SINHALA: + break; + + default: + /* Uniscribe merges the entire syllable into a single cluster... Except for Tamil & Sinhala. + * This means, half forms are submerged into the main consonant's cluster. + * This is unnecessary, and makes cursor positioning harder, but that's what + * Uniscribe does. */ + buffer->merge_clusters (start, end); + break; + } + } +} + + +static void +final_reordering_indic (const hb_ot_shape_plan_t *plan, + hb_font_t *font HB_UNUSED, + hb_buffer_t *buffer) +{ + unsigned int count = buffer->len; + if (unlikely (!count)) return; + + foreach_syllable (buffer, start, end) + final_reordering_syllable_indic (plan, buffer, start, end); + + HB_BUFFER_DEALLOCATE_VAR (buffer, indic_category); + HB_BUFFER_DEALLOCATE_VAR (buffer, indic_position); +} + + +static void +preprocess_text_indic (const hb_ot_shape_plan_t *plan, + hb_buffer_t *buffer, + hb_font_t *font) +{ + _hb_preprocess_text_vowel_constraints (plan, buffer, font); +} + +static bool +decompose_indic (const hb_ot_shape_normalize_context_t *c, + hb_codepoint_t ab, + hb_codepoint_t *a, + hb_codepoint_t *b) +{ + switch (ab) + { + /* Don't decompose these. */ + case 0x0931u : return false; /* DEVANAGARI LETTER RRA */ + // https://github.com/harfbuzz/harfbuzz/issues/779 + case 0x09DCu : return false; /* BENGALI LETTER RRA */ + case 0x09DDu : return false; /* BENGALI LETTER RHA */ + case 0x0B94u : return false; /* TAMIL LETTER AU */ + + + /* + * Decompose split matras that don't have Unicode decompositions. + */ + +#if 0 + /* Gujarati */ + /* This one has no decomposition in Unicode, but needs no decomposition either. */ + /* case 0x0AC9u : return false; */ + + /* Oriya */ + case 0x0B57u : *a = no decomp, -> RIGHT; return true; +#endif + } + + if ((ab == 0x0DDAu || hb_in_range<hb_codepoint_t> (ab, 0x0DDCu, 0x0DDEu))) + { + /* + * Sinhala split matras... Let the fun begin. + * + * These four characters have Unicode decompositions. However, Uniscribe + * decomposes them "Khmer-style", that is, it uses the character itself to + * get the second half. The first half of all four decompositions is always + * U+0DD9. + * + * Now, there are buggy fonts, namely, the widely used lklug.ttf, that are + * broken with Uniscribe. But we need to support them. As such, we only + * do the Uniscribe-style decomposition if the character is transformed into + * its "sec.half" form by the 'pstf' feature. Otherwise, we fall back to + * Unicode decomposition. + * + * Note that we can't unconditionally use Unicode decomposition. That would + * break some other fonts, that are designed to work with Uniscribe, and + * don't have positioning features for the Unicode-style decomposition. + * + * Argh... + * + * The Uniscribe behavior is now documented in the newly published Sinhala + * spec in 2012: + * + * https://docs.microsoft.com/en-us/typography/script-development/sinhala#shaping + */ + + + const indic_shape_plan_t *indic_plan = (const indic_shape_plan_t *) c->plan->data; + hb_codepoint_t glyph; + if (indic_plan->uniscribe_bug_compatible || + (c->font->get_nominal_glyph (ab, &glyph) && + indic_plan->pstf.would_substitute (&glyph, 1, c->font->face))) + { + /* Ok, safe to use Uniscribe-style decomposition. */ + *a = 0x0DD9u; + *b = ab; + return true; + } + } + + return (bool) c->unicode->decompose (ab, a, b); +} + +static bool +compose_indic (const hb_ot_shape_normalize_context_t *c, + hb_codepoint_t a, + hb_codepoint_t b, + hb_codepoint_t *ab) +{ + /* Avoid recomposing split matras. */ + if (HB_UNICODE_GENERAL_CATEGORY_IS_MARK (c->unicode->general_category (a))) + return false; + + /* Composition-exclusion exceptions that we want to recompose. */ + if (a == 0x09AFu && b == 0x09BCu) { *ab = 0x09DFu; return true; } + + return (bool) c->unicode->compose (a, b, ab); +} + + +const hb_ot_complex_shaper_t _hb_ot_complex_shaper_indic = +{ + collect_features_indic, + override_features_indic, + data_create_indic, + data_destroy_indic, + preprocess_text_indic, + nullptr, /* postprocess_glyphs */ + HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT, + decompose_indic, + compose_indic, + setup_masks_indic, + HB_TAG_NONE, /* gpos_tag */ + nullptr, /* reorder_marks */ + HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE, + false, /* fallback_position */ +}; + + +#endif diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-complex-indic.hh b/thirdparty/harfbuzz/src/hb-ot-shape-complex-indic.hh new file mode 100644 index 0000000000..41bd8bd6cc --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-shape-complex-indic.hh @@ -0,0 +1,436 @@ +/* + * Copyright © 2012 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_OT_SHAPE_COMPLEX_INDIC_HH +#define HB_OT_SHAPE_COMPLEX_INDIC_HH + +#include "hb.hh" + +#include "hb-ot-shape-complex.hh" + + +/* buffer var allocations */ +#define indic_category() complex_var_u8_0() /* indic_category_t */ +#define indic_position() complex_var_u8_1() /* indic_position_t */ + + +#define INDIC_TABLE_ELEMENT_TYPE uint16_t + +/* Cateories used in the OpenType spec: + * https://docs.microsoft.com/en-us/typography/script-development/devanagari + */ +/* Note: This enum is duplicated in the -machine.rl source file. + * Not sure how to avoid duplication. */ +enum indic_category_t { + OT_X = 0, + OT_C = 1, + OT_V = 2, + OT_N = 3, + OT_H = 4, + OT_ZWNJ = 5, + OT_ZWJ = 6, + OT_M = 7, + OT_SM = 8, + /* OT_VD = 9, UNUSED; we use OT_A instead. */ + OT_A = 10, + OT_PLACEHOLDER = 11, + OT_DOTTEDCIRCLE = 12, + OT_RS = 13, /* Register Shifter, used in Khmer OT spec. */ + OT_Coeng = 14, /* Khmer-style Virama. */ + OT_Repha = 15, /* Atomically-encoded logical or visual repha. */ + OT_Ra = 16, + OT_CM = 17, /* Consonant-Medial. */ + OT_Symbol = 18, /* Avagraha, etc that take marks (SM,A,VD). */ + OT_CS = 19, + + /* The following are used by Khmer & Myanmar shapers. Defined + * here for them to share. */ + OT_VAbv = 26, + OT_VBlw = 27, + OT_VPre = 28, + OT_VPst = 29, +}; + +#define MEDIAL_FLAGS (FLAG (OT_CM)) + +/* Note: + * + * We treat Vowels and placeholders as if they were consonants. This is safe because Vowels + * cannot happen in a consonant syllable. The plus side however is, we can call the + * consonant syllable logic from the vowel syllable function and get it all right! */ +#define CONSONANT_FLAGS (FLAG (OT_C) | FLAG (OT_CS) | FLAG (OT_Ra) | MEDIAL_FLAGS | FLAG (OT_V) | FLAG (OT_PLACEHOLDER) | FLAG (OT_DOTTEDCIRCLE)) +#define JOINER_FLAGS (FLAG (OT_ZWJ) | FLAG (OT_ZWNJ)) + + +/* Visual positions in a syllable from left to right. */ +enum indic_position_t { + POS_START = 0, + + POS_RA_TO_BECOME_REPH = 1, + POS_PRE_M = 2, + POS_PRE_C = 3, + + POS_BASE_C = 4, + POS_AFTER_MAIN = 5, + + POS_ABOVE_C = 6, + + POS_BEFORE_SUB = 7, + POS_BELOW_C = 8, + POS_AFTER_SUB = 9, + + POS_BEFORE_POST = 10, + POS_POST_C = 11, + POS_AFTER_POST = 12, + + POS_FINAL_C = 13, + POS_SMVD = 14, + + POS_END = 15 +}; + +/* Categories used in IndicSyllabicCategory.txt from UCD. */ +enum indic_syllabic_category_t { + INDIC_SYLLABIC_CATEGORY_OTHER = OT_X, + + INDIC_SYLLABIC_CATEGORY_AVAGRAHA = OT_Symbol, + INDIC_SYLLABIC_CATEGORY_BINDU = OT_SM, + INDIC_SYLLABIC_CATEGORY_BRAHMI_JOINING_NUMBER = OT_PLACEHOLDER, /* Don't care. */ + INDIC_SYLLABIC_CATEGORY_CANTILLATION_MARK = OT_A, + INDIC_SYLLABIC_CATEGORY_CONSONANT = OT_C, + INDIC_SYLLABIC_CATEGORY_CONSONANT_DEAD = OT_C, + INDIC_SYLLABIC_CATEGORY_CONSONANT_FINAL = OT_CM, + INDIC_SYLLABIC_CATEGORY_CONSONANT_HEAD_LETTER = OT_C, + INDIC_SYLLABIC_CATEGORY_CONSONANT_KILLER = OT_M, /* U+17CD only. */ + INDIC_SYLLABIC_CATEGORY_CONSONANT_MEDIAL = OT_CM, + INDIC_SYLLABIC_CATEGORY_CONSONANT_PLACEHOLDER = OT_PLACEHOLDER, + INDIC_SYLLABIC_CATEGORY_CONSONANT_PRECEDING_REPHA = OT_Repha, + INDIC_SYLLABIC_CATEGORY_CONSONANT_PREFIXED = OT_X, /* Don't care. */ + INDIC_SYLLABIC_CATEGORY_CONSONANT_SUBJOINED = OT_CM, + INDIC_SYLLABIC_CATEGORY_CONSONANT_SUCCEEDING_REPHA = OT_CM, + INDIC_SYLLABIC_CATEGORY_CONSONANT_WITH_STACKER = OT_CS, + INDIC_SYLLABIC_CATEGORY_GEMINATION_MARK = OT_SM, /* https://github.com/harfbuzz/harfbuzz/issues/552 */ + INDIC_SYLLABIC_CATEGORY_INVISIBLE_STACKER = OT_Coeng, + INDIC_SYLLABIC_CATEGORY_JOINER = OT_ZWJ, + INDIC_SYLLABIC_CATEGORY_MODIFYING_LETTER = OT_X, + INDIC_SYLLABIC_CATEGORY_NON_JOINER = OT_ZWNJ, + INDIC_SYLLABIC_CATEGORY_NUKTA = OT_N, + INDIC_SYLLABIC_CATEGORY_NUMBER = OT_PLACEHOLDER, + INDIC_SYLLABIC_CATEGORY_NUMBER_JOINER = OT_PLACEHOLDER, /* Don't care. */ + INDIC_SYLLABIC_CATEGORY_PURE_KILLER = OT_M, /* Is like a vowel matra. */ + INDIC_SYLLABIC_CATEGORY_REGISTER_SHIFTER = OT_RS, + INDIC_SYLLABIC_CATEGORY_SYLLABLE_MODIFIER = OT_SM, + INDIC_SYLLABIC_CATEGORY_TONE_LETTER = OT_X, + INDIC_SYLLABIC_CATEGORY_TONE_MARK = OT_N, + INDIC_SYLLABIC_CATEGORY_VIRAMA = OT_H, + INDIC_SYLLABIC_CATEGORY_VISARGA = OT_SM, + INDIC_SYLLABIC_CATEGORY_VOWEL = OT_V, + INDIC_SYLLABIC_CATEGORY_VOWEL_DEPENDENT = OT_M, + INDIC_SYLLABIC_CATEGORY_VOWEL_INDEPENDENT = OT_V +}; + +/* Categories used in IndicSMatraCategory.txt from UCD */ +enum indic_matra_category_t { + INDIC_MATRA_CATEGORY_NOT_APPLICABLE = POS_END, + + INDIC_MATRA_CATEGORY_LEFT = POS_PRE_C, + INDIC_MATRA_CATEGORY_TOP = POS_ABOVE_C, + INDIC_MATRA_CATEGORY_BOTTOM = POS_BELOW_C, + INDIC_MATRA_CATEGORY_RIGHT = POS_POST_C, + + /* These should resolve to the position of the last part of the split sequence. */ + INDIC_MATRA_CATEGORY_BOTTOM_AND_RIGHT = INDIC_MATRA_CATEGORY_RIGHT, + INDIC_MATRA_CATEGORY_LEFT_AND_RIGHT = INDIC_MATRA_CATEGORY_RIGHT, + INDIC_MATRA_CATEGORY_TOP_AND_BOTTOM = INDIC_MATRA_CATEGORY_BOTTOM, + INDIC_MATRA_CATEGORY_TOP_AND_BOTTOM_AND_LEFT = INDIC_MATRA_CATEGORY_BOTTOM, + INDIC_MATRA_CATEGORY_TOP_AND_BOTTOM_AND_RIGHT = INDIC_MATRA_CATEGORY_RIGHT, + INDIC_MATRA_CATEGORY_TOP_AND_LEFT = INDIC_MATRA_CATEGORY_TOP, + INDIC_MATRA_CATEGORY_TOP_AND_LEFT_AND_RIGHT = INDIC_MATRA_CATEGORY_RIGHT, + INDIC_MATRA_CATEGORY_TOP_AND_RIGHT = INDIC_MATRA_CATEGORY_RIGHT, + + INDIC_MATRA_CATEGORY_OVERSTRUCK = POS_AFTER_MAIN, + INDIC_MATRA_CATEGORY_VISUAL_ORDER_LEFT = POS_PRE_M +}; + +#define INDIC_COMBINE_CATEGORIES(S,M) \ + ( \ + ASSERT_STATIC_EXPR_ZERO (S < 255 && M < 255) + \ + ( S | \ + ( \ + ( \ + S == INDIC_SYLLABIC_CATEGORY_CONSONANT_MEDIAL || \ + S == INDIC_SYLLABIC_CATEGORY_GEMINATION_MARK || \ + S == INDIC_SYLLABIC_CATEGORY_REGISTER_SHIFTER || \ + S == INDIC_SYLLABIC_CATEGORY_CONSONANT_SUCCEEDING_REPHA || \ + S == INDIC_SYLLABIC_CATEGORY_VIRAMA || \ + S == INDIC_SYLLABIC_CATEGORY_VOWEL_DEPENDENT || \ + false \ + ? M : INDIC_MATRA_CATEGORY_NOT_APPLICABLE \ + ) << 8 \ + ) \ + ) \ + ) + +HB_INTERNAL INDIC_TABLE_ELEMENT_TYPE +hb_indic_get_categories (hb_codepoint_t u); + + +static inline bool +is_one_of (const hb_glyph_info_t &info, unsigned int flags) +{ + /* If it ligated, all bets are off. */ + if (_hb_glyph_info_ligated (&info)) return false; + return !!(FLAG_UNSAFE (info.indic_category()) & flags); +} + +static inline bool +is_joiner (const hb_glyph_info_t &info) +{ + return is_one_of (info, JOINER_FLAGS); +} + +static inline bool +is_consonant (const hb_glyph_info_t &info) +{ + return is_one_of (info, CONSONANT_FLAGS); +} + +static inline bool +is_halant (const hb_glyph_info_t &info) +{ + return is_one_of (info, FLAG (OT_H)); +} + +#define IN_HALF_BLOCK(u, Base) (((u) & ~0x7Fu) == (Base)) + +#define IS_DEVA(u) (IN_HALF_BLOCK (u, 0x0900u)) +#define IS_BENG(u) (IN_HALF_BLOCK (u, 0x0980u)) +#define IS_GURU(u) (IN_HALF_BLOCK (u, 0x0A00u)) +#define IS_GUJR(u) (IN_HALF_BLOCK (u, 0x0A80u)) +#define IS_ORYA(u) (IN_HALF_BLOCK (u, 0x0B00u)) +#define IS_TAML(u) (IN_HALF_BLOCK (u, 0x0B80u)) +#define IS_TELU(u) (IN_HALF_BLOCK (u, 0x0C00u)) +#define IS_KNDA(u) (IN_HALF_BLOCK (u, 0x0C80u)) +#define IS_MLYM(u) (IN_HALF_BLOCK (u, 0x0D00u)) +#define IS_SINH(u) (IN_HALF_BLOCK (u, 0x0D80u)) + + +#define MATRA_POS_LEFT(u) POS_PRE_M +#define MATRA_POS_RIGHT(u) ( \ + IS_DEVA(u) ? POS_AFTER_SUB : \ + IS_BENG(u) ? POS_AFTER_POST : \ + IS_GURU(u) ? POS_AFTER_POST : \ + IS_GUJR(u) ? POS_AFTER_POST : \ + IS_ORYA(u) ? POS_AFTER_POST : \ + IS_TAML(u) ? POS_AFTER_POST : \ + IS_TELU(u) ? (u <= 0x0C42u ? POS_BEFORE_SUB : POS_AFTER_SUB) : \ + IS_KNDA(u) ? (u < 0x0CC3u || u > 0xCD6u ? POS_BEFORE_SUB : POS_AFTER_SUB) : \ + IS_MLYM(u) ? POS_AFTER_POST : \ + IS_SINH(u) ? POS_AFTER_SUB : \ + /*default*/ POS_AFTER_SUB \ + ) +#define MATRA_POS_TOP(u) ( /* BENG and MLYM don't have top matras. */ \ + IS_DEVA(u) ? POS_AFTER_SUB : \ + IS_GURU(u) ? POS_AFTER_POST : /* Deviate from spec */ \ + IS_GUJR(u) ? POS_AFTER_SUB : \ + IS_ORYA(u) ? POS_AFTER_MAIN : \ + IS_TAML(u) ? POS_AFTER_SUB : \ + IS_TELU(u) ? POS_BEFORE_SUB : \ + IS_KNDA(u) ? POS_BEFORE_SUB : \ + IS_SINH(u) ? POS_AFTER_SUB : \ + /*default*/ POS_AFTER_SUB \ + ) +#define MATRA_POS_BOTTOM(u) ( \ + IS_DEVA(u) ? POS_AFTER_SUB : \ + IS_BENG(u) ? POS_AFTER_SUB : \ + IS_GURU(u) ? POS_AFTER_POST : \ + IS_GUJR(u) ? POS_AFTER_POST : \ + IS_ORYA(u) ? POS_AFTER_SUB : \ + IS_TAML(u) ? POS_AFTER_POST : \ + IS_TELU(u) ? POS_BEFORE_SUB : \ + IS_KNDA(u) ? POS_BEFORE_SUB : \ + IS_MLYM(u) ? POS_AFTER_POST : \ + IS_SINH(u) ? POS_AFTER_SUB : \ + /*default*/ POS_AFTER_SUB \ + ) + +static inline indic_position_t +matra_position_indic (hb_codepoint_t u, indic_position_t side) +{ + switch ((int) side) + { + case POS_PRE_C: return MATRA_POS_LEFT (u); + case POS_POST_C: return MATRA_POS_RIGHT (u); + case POS_ABOVE_C: return MATRA_POS_TOP (u); + case POS_BELOW_C: return MATRA_POS_BOTTOM (u); + } + return side; +} + +/* XXX + * This is a hack for now. We should move this data into the main Indic table. + * Or completely remove it and just check in the tables. + */ +static const hb_codepoint_t ra_chars[] = { + 0x0930u, /* Devanagari */ + 0x09B0u, /* Bengali */ + 0x09F0u, /* Bengali */ + 0x0A30u, /* Gurmukhi */ /* No Reph */ + 0x0AB0u, /* Gujarati */ + 0x0B30u, /* Oriya */ + 0x0BB0u, /* Tamil */ /* No Reph */ + 0x0C30u, /* Telugu */ /* Reph formed only with ZWJ */ + 0x0CB0u, /* Kannada */ + 0x0D30u, /* Malayalam */ /* No Reph, Logical Repha */ + + 0x0DBBu, /* Sinhala */ /* Reph formed only with ZWJ */ + + 0x179Au, /* Khmer */ +}; + +static inline bool +is_ra (hb_codepoint_t u) +{ + for (unsigned int i = 0; i < ARRAY_LENGTH (ra_chars); i++) + if (u == ra_chars[i]) + return true; + return false; +} + +static inline void +set_indic_properties (hb_glyph_info_t &info) +{ + hb_codepoint_t u = info.codepoint; + unsigned int type = hb_indic_get_categories (u); + indic_category_t cat = (indic_category_t) (type & 0x7Fu); + indic_position_t pos = (indic_position_t) (type >> 8); + + + /* + * Re-assign category + */ + + /* The following act more like the Bindus. */ + if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x0953u, 0x0954u))) + cat = OT_SM; + /* The following act like consonants. */ + else if (unlikely (hb_in_ranges<hb_codepoint_t> (u, 0x0A72u, 0x0A73u, + 0x1CF5u, 0x1CF6u))) + cat = OT_C; + /* TODO: The following should only be allowed after a Visarga. + * For now, just treat them like regular tone marks. */ + else if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x1CE2u, 0x1CE8u))) + cat = OT_A; + /* TODO: The following should only be allowed after some of + * the nasalization marks, maybe only for U+1CE9..U+1CF1. + * For now, just treat them like tone marks. */ + else if (unlikely (u == 0x1CEDu)) + cat = OT_A; + /* The following take marks in standalone clusters, similar to Avagraha. */ + else if (unlikely (hb_in_ranges<hb_codepoint_t> (u, 0xA8F2u, 0xA8F7u, + 0x1CE9u, 0x1CECu, + 0x1CEEu, 0x1CF1u))) + { + cat = OT_Symbol; + static_assert (((int) INDIC_SYLLABIC_CATEGORY_AVAGRAHA == OT_Symbol), ""); + } + else if (unlikely (u == 0x0A51u)) + { + /* https://github.com/harfbuzz/harfbuzz/issues/524 */ + cat = OT_M; + pos = POS_BELOW_C; + } + + /* According to ScriptExtensions.txt, these Grantha marks may also be used in Tamil, + * so the Indic shaper needs to know their categories. */ + else if (unlikely (u == 0x11301u || u == 0x11303u)) cat = OT_SM; + else if (unlikely (u == 0x1133Bu || u == 0x1133Cu)) cat = OT_N; + + else if (unlikely (u == 0x0AFBu)) cat = OT_N; /* https://github.com/harfbuzz/harfbuzz/issues/552 */ + + else if (unlikely (u == 0x0980u)) cat = OT_PLACEHOLDER; /* https://github.com/harfbuzz/harfbuzz/issues/538 */ + else if (unlikely (u == 0x09FCu)) cat = OT_PLACEHOLDER; /* https://github.com/harfbuzz/harfbuzz/pull/1613 */ + else if (unlikely (u == 0x0C80u)) cat = OT_PLACEHOLDER; /* https://github.com/harfbuzz/harfbuzz/pull/623 */ + else if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x2010u, 0x2011u))) + cat = OT_PLACEHOLDER; + else if (unlikely (u == 0x25CCu)) cat = OT_DOTTEDCIRCLE; + + + /* + * Re-assign position. + */ + + if ((FLAG_UNSAFE (cat) & CONSONANT_FLAGS)) + { + pos = POS_BASE_C; + if (is_ra (u)) + cat = OT_Ra; + } + else if (cat == OT_M) + { + pos = matra_position_indic (u, pos); + } + else if ((FLAG_UNSAFE (cat) & (FLAG (OT_SM) /* | FLAG (OT_VD) */ | FLAG (OT_A) | FLAG (OT_Symbol)))) + { + pos = POS_SMVD; + } + + if (unlikely (u == 0x0B01u)) pos = POS_BEFORE_SUB; /* Oriya Bindu is BeforeSub in the spec. */ + + + + info.indic_category() = cat; + info.indic_position() = pos; +} + +struct hb_indic_would_substitute_feature_t +{ + void init (const hb_ot_map_t *map, hb_tag_t feature_tag, bool zero_context_) + { + zero_context = zero_context_; + map->get_stage_lookups (0/*GSUB*/, + map->get_feature_stage (0/*GSUB*/, feature_tag), + &lookups, &count); + } + + bool would_substitute (const hb_codepoint_t *glyphs, + unsigned int glyphs_count, + hb_face_t *face) const + { + for (unsigned int i = 0; i < count; i++) + if (hb_ot_layout_lookup_would_substitute (face, lookups[i].index, glyphs, glyphs_count, zero_context)) + return true; + return false; + } + + private: + const hb_ot_map_t::lookup_map_t *lookups; + unsigned int count; + bool zero_context; +}; + + +#endif /* HB_OT_SHAPE_COMPLEX_INDIC_HH */ diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-complex-khmer-machine.hh b/thirdparty/harfbuzz/src/hb-ot-shape-complex-khmer-machine.hh new file mode 100644 index 0000000000..a040318d34 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-shape-complex-khmer-machine.hh @@ -0,0 +1,372 @@ + +#line 1 "hb-ot-shape-complex-khmer-machine.rl" +/* + * Copyright © 2011,2012 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_OT_SHAPE_COMPLEX_KHMER_MACHINE_HH +#define HB_OT_SHAPE_COMPLEX_KHMER_MACHINE_HH + +#include "hb.hh" + + +#line 36 "hb-ot-shape-complex-khmer-machine.hh" +static const unsigned char _khmer_syllable_machine_trans_keys[] = { + 5u, 26u, 5u, 21u, 5u, 26u, 5u, 21u, 1u, 16u, 5u, 21u, 5u, 26u, 5u, 21u, + 5u, 26u, 5u, 21u, 5u, 21u, 5u, 26u, 5u, 21u, 1u, 16u, 5u, 21u, 5u, 26u, + 5u, 21u, 5u, 26u, 5u, 21u, 5u, 26u, 1u, 29u, 5u, 29u, 5u, 29u, 5u, 29u, + 22u, 22u, 5u, 22u, 5u, 29u, 5u, 29u, 5u, 29u, 1u, 16u, 5u, 26u, 5u, 29u, + 5u, 29u, 22u, 22u, 5u, 22u, 5u, 29u, 5u, 29u, 1u, 16u, 5u, 29u, 5u, 29u, + 0 +}; + +static const char _khmer_syllable_machine_key_spans[] = { + 22, 17, 22, 17, 16, 17, 22, 17, + 22, 17, 17, 22, 17, 16, 17, 22, + 17, 22, 17, 22, 29, 25, 25, 25, + 1, 18, 25, 25, 25, 16, 22, 25, + 25, 1, 18, 25, 25, 16, 25, 25 +}; + +static const short _khmer_syllable_machine_index_offsets[] = { + 0, 23, 41, 64, 82, 99, 117, 140, + 158, 181, 199, 217, 240, 258, 275, 293, + 316, 334, 357, 375, 398, 428, 454, 480, + 506, 508, 527, 553, 579, 605, 622, 645, + 671, 697, 699, 718, 744, 770, 787, 813 +}; + +static const char _khmer_syllable_machine_indicies[] = { + 1, 1, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 2, + 3, 0, 0, 0, 0, 4, 0, 1, + 1, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 3, + 0, 1, 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 0, 0, 0, 0, 4, 0, + 5, 5, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 4, 0, 6, 6, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 6, 0, 7, 7, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 8, 0, 9, 9, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 10, 0, 0, + 0, 0, 4, 0, 9, 9, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 10, 0, 11, 11, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 12, 0, + 0, 0, 0, 4, 0, 11, 11, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 12, 0, 14, + 14, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 15, + 13, 14, 14, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 15, 16, 16, 16, 16, 17, 16, + 18, 18, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 17, 16, 19, 19, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 19, 16, 20, 20, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 21, 16, 22, 22, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 23, 16, 16, + 16, 16, 17, 16, 22, 22, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 23, 16, 24, 24, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 25, 16, + 16, 16, 16, 17, 16, 24, 24, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 25, 16, 14, + 14, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 26, 15, + 16, 16, 16, 16, 17, 16, 28, 28, + 27, 27, 29, 29, 27, 27, 27, 27, + 2, 2, 27, 30, 27, 28, 27, 27, + 27, 27, 15, 19, 27, 27, 27, 17, + 23, 25, 21, 27, 32, 32, 31, 31, + 31, 31, 31, 31, 31, 33, 31, 31, + 31, 31, 31, 2, 3, 6, 31, 31, + 31, 4, 10, 12, 8, 31, 34, 34, + 31, 31, 31, 31, 31, 31, 31, 35, + 31, 31, 31, 31, 31, 31, 3, 6, + 31, 31, 31, 4, 10, 12, 8, 31, + 5, 5, 31, 31, 31, 31, 31, 31, + 31, 35, 31, 31, 31, 31, 31, 31, + 4, 6, 31, 31, 31, 31, 31, 31, + 8, 31, 6, 31, 7, 7, 31, 31, + 31, 31, 31, 31, 31, 35, 31, 31, + 31, 31, 31, 31, 8, 6, 31, 36, + 36, 31, 31, 31, 31, 31, 31, 31, + 35, 31, 31, 31, 31, 31, 31, 10, + 6, 31, 31, 31, 4, 31, 31, 8, + 31, 37, 37, 31, 31, 31, 31, 31, + 31, 31, 35, 31, 31, 31, 31, 31, + 31, 12, 6, 31, 31, 31, 4, 10, + 31, 8, 31, 34, 34, 31, 31, 31, + 31, 31, 31, 31, 33, 31, 31, 31, + 31, 31, 31, 3, 6, 31, 31, 31, + 4, 10, 12, 8, 31, 28, 28, 31, + 31, 31, 31, 31, 31, 31, 31, 31, + 31, 31, 31, 31, 28, 31, 14, 14, + 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 15, 38, + 38, 38, 38, 17, 38, 40, 40, 39, + 39, 39, 39, 39, 39, 39, 41, 39, + 39, 39, 39, 39, 39, 15, 19, 39, + 39, 39, 17, 23, 25, 21, 39, 18, + 18, 39, 39, 39, 39, 39, 39, 39, + 41, 39, 39, 39, 39, 39, 39, 17, + 19, 39, 39, 39, 39, 39, 39, 21, + 39, 19, 39, 20, 20, 39, 39, 39, + 39, 39, 39, 39, 41, 39, 39, 39, + 39, 39, 39, 21, 19, 39, 42, 42, + 39, 39, 39, 39, 39, 39, 39, 41, + 39, 39, 39, 39, 39, 39, 23, 19, + 39, 39, 39, 17, 39, 39, 21, 39, + 43, 43, 39, 39, 39, 39, 39, 39, + 39, 41, 39, 39, 39, 39, 39, 39, + 25, 19, 39, 39, 39, 17, 23, 39, + 21, 39, 44, 44, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, + 39, 44, 39, 45, 45, 39, 39, 39, + 39, 39, 39, 39, 30, 39, 39, 39, + 39, 39, 26, 15, 19, 39, 39, 39, + 17, 23, 25, 21, 39, 40, 40, 39, + 39, 39, 39, 39, 39, 39, 30, 39, + 39, 39, 39, 39, 39, 15, 19, 39, + 39, 39, 17, 23, 25, 21, 39, 0 +}; + +static const char _khmer_syllable_machine_trans_targs[] = { + 20, 1, 28, 22, 23, 3, 24, 5, + 25, 7, 26, 9, 27, 20, 10, 31, + 20, 32, 12, 33, 14, 34, 16, 35, + 18, 36, 39, 20, 21, 30, 37, 20, + 0, 29, 2, 4, 6, 8, 20, 20, + 11, 13, 15, 17, 38, 19 +}; + +static const char _khmer_syllable_machine_trans_actions[] = { + 1, 0, 2, 2, 2, 0, 0, 0, + 2, 0, 2, 0, 2, 3, 0, 4, + 5, 2, 0, 0, 0, 2, 0, 2, + 0, 2, 4, 8, 2, 9, 0, 10, + 0, 0, 0, 0, 0, 0, 11, 12, + 0, 0, 0, 0, 4, 0 +}; + +static const char _khmer_syllable_machine_to_state_actions[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 6, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 +}; + +static const char _khmer_syllable_machine_from_state_actions[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 7, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 +}; + +static const unsigned char _khmer_syllable_machine_eof_trans[] = { + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 14, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 0, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 39, 40, + 40, 40, 40, 40, 40, 40, 40, 40 +}; + +static const int khmer_syllable_machine_start = 20; +static const int khmer_syllable_machine_first_final = 20; +static const int khmer_syllable_machine_error = -1; + +static const int khmer_syllable_machine_en_main = 20; + + +#line 36 "hb-ot-shape-complex-khmer-machine.rl" + + + +#line 80 "hb-ot-shape-complex-khmer-machine.rl" + + +#define found_syllable(syllable_type) \ + HB_STMT_START { \ + if (0) fprintf (stderr, "syllable %d..%d %s\n", ts, te, #syllable_type); \ + for (unsigned int i = ts; i < te; i++) \ + info[i].syllable() = (syllable_serial << 4) | khmer_##syllable_type; \ + syllable_serial++; \ + if (unlikely (syllable_serial == 16)) syllable_serial = 1; \ + } HB_STMT_END + +static void +find_syllables_khmer (hb_buffer_t *buffer) +{ + unsigned int p, pe, eof, ts, te, act HB_UNUSED; + int cs; + hb_glyph_info_t *info = buffer->info; + +#line 242 "hb-ot-shape-complex-khmer-machine.hh" + { + cs = khmer_syllable_machine_start; + ts = 0; + te = 0; + act = 0; + } + +#line 100 "hb-ot-shape-complex-khmer-machine.rl" + + + p = 0; + pe = eof = buffer->len; + + unsigned int syllable_serial = 1; + +#line 258 "hb-ot-shape-complex-khmer-machine.hh" + { + int _slen; + int _trans; + const unsigned char *_keys; + const char *_inds; + if ( p == pe ) + goto _test_eof; +_resume: + switch ( _khmer_syllable_machine_from_state_actions[cs] ) { + case 7: +#line 1 "NONE" + {ts = p;} + break; +#line 272 "hb-ot-shape-complex-khmer-machine.hh" + } + + _keys = _khmer_syllable_machine_trans_keys + (cs<<1); + _inds = _khmer_syllable_machine_indicies + _khmer_syllable_machine_index_offsets[cs]; + + _slen = _khmer_syllable_machine_key_spans[cs]; + _trans = _inds[ _slen > 0 && _keys[0] <=( info[p].khmer_category()) && + ( info[p].khmer_category()) <= _keys[1] ? + ( info[p].khmer_category()) - _keys[0] : _slen ]; + +_eof_trans: + cs = _khmer_syllable_machine_trans_targs[_trans]; + + if ( _khmer_syllable_machine_trans_actions[_trans] == 0 ) + goto _again; + + switch ( _khmer_syllable_machine_trans_actions[_trans] ) { + case 2: +#line 1 "NONE" + {te = p+1;} + break; + case 8: +#line 76 "hb-ot-shape-complex-khmer-machine.rl" + {te = p+1;{ found_syllable (non_khmer_cluster); }} + break; + case 10: +#line 74 "hb-ot-shape-complex-khmer-machine.rl" + {te = p;p--;{ found_syllable (consonant_syllable); }} + break; + case 12: +#line 75 "hb-ot-shape-complex-khmer-machine.rl" + {te = p;p--;{ found_syllable (broken_cluster); }} + break; + case 11: +#line 76 "hb-ot-shape-complex-khmer-machine.rl" + {te = p;p--;{ found_syllable (non_khmer_cluster); }} + break; + case 1: +#line 74 "hb-ot-shape-complex-khmer-machine.rl" + {{p = ((te))-1;}{ found_syllable (consonant_syllable); }} + break; + case 5: +#line 75 "hb-ot-shape-complex-khmer-machine.rl" + {{p = ((te))-1;}{ found_syllable (broken_cluster); }} + break; + case 3: +#line 1 "NONE" + { switch( act ) { + case 2: + {{p = ((te))-1;} found_syllable (broken_cluster); } + break; + case 3: + {{p = ((te))-1;} found_syllable (non_khmer_cluster); } + break; + } + } + break; + case 4: +#line 1 "NONE" + {te = p+1;} +#line 75 "hb-ot-shape-complex-khmer-machine.rl" + {act = 2;} + break; + case 9: +#line 1 "NONE" + {te = p+1;} +#line 76 "hb-ot-shape-complex-khmer-machine.rl" + {act = 3;} + break; +#line 342 "hb-ot-shape-complex-khmer-machine.hh" + } + +_again: + switch ( _khmer_syllable_machine_to_state_actions[cs] ) { + case 6: +#line 1 "NONE" + {ts = 0;} + break; +#line 351 "hb-ot-shape-complex-khmer-machine.hh" + } + + if ( ++p != pe ) + goto _resume; + _test_eof: {} + if ( p == eof ) + { + if ( _khmer_syllable_machine_eof_trans[cs] > 0 ) { + _trans = _khmer_syllable_machine_eof_trans[cs] - 1; + goto _eof_trans; + } + } + + } + +#line 108 "hb-ot-shape-complex-khmer-machine.rl" + +} + +#undef found_syllable + +#endif /* HB_OT_SHAPE_COMPLEX_KHMER_MACHINE_HH */ diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-complex-khmer-machine.rl b/thirdparty/harfbuzz/src/hb-ot-shape-complex-khmer-machine.rl new file mode 100644 index 0000000000..e7f14533dd --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-shape-complex-khmer-machine.rl @@ -0,0 +1,113 @@ +/* + * Copyright © 2011,2012 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_OT_SHAPE_COMPLEX_KHMER_MACHINE_HH +#define HB_OT_SHAPE_COMPLEX_KHMER_MACHINE_HH + +#include "hb.hh" + +%%{ + machine khmer_syllable_machine; + alphtype unsigned char; + write data; +}%% + +%%{ + +# Same order as enum khmer_category_t. Not sure how to avoid duplication. +C = 1; +V = 2; +ZWNJ = 5; +ZWJ = 6; +PLACEHOLDER = 11; +DOTTEDCIRCLE = 12; +Coeng= 14; +Ra = 16; +Robatic = 20; +Xgroup = 21; +Ygroup = 22; +VAbv = 26; +VBlw = 27; +VPre = 28; +VPst = 29; + +c = (C | Ra | V); +cn = c.((ZWJ|ZWNJ)?.Robatic)?; +joiner = (ZWJ | ZWNJ); +xgroup = (joiner*.Xgroup)*; +ygroup = Ygroup*; + +# This grammar was experimentally extracted from what Uniscribe allows. + +matra_group = VPre? xgroup VBlw? xgroup (joiner?.VAbv)? xgroup VPst?; +syllable_tail = xgroup matra_group xgroup (Coeng.c)? ygroup; + + +broken_cluster = (Coeng.cn)* (Coeng | syllable_tail); +consonant_syllable = (cn|PLACEHOLDER|DOTTEDCIRCLE) broken_cluster; +other = any; + +main := |* + consonant_syllable => { found_syllable (consonant_syllable); }; + broken_cluster => { found_syllable (broken_cluster); }; + other => { found_syllable (non_khmer_cluster); }; +*|; + + +}%% + +#define found_syllable(syllable_type) \ + HB_STMT_START { \ + if (0) fprintf (stderr, "syllable %d..%d %s\n", ts, te, #syllable_type); \ + for (unsigned int i = ts; i < te; i++) \ + info[i].syllable() = (syllable_serial << 4) | khmer_##syllable_type; \ + syllable_serial++; \ + if (unlikely (syllable_serial == 16)) syllable_serial = 1; \ + } HB_STMT_END + +static void +find_syllables_khmer (hb_buffer_t *buffer) +{ + unsigned int p, pe, eof, ts, te, act HB_UNUSED; + int cs; + hb_glyph_info_t *info = buffer->info; + %%{ + write init; + getkey info[p].khmer_category(); + }%% + + p = 0; + pe = eof = buffer->len; + + unsigned int syllable_serial = 1; + %%{ + write exec; + }%% +} + +#undef found_syllable + +#endif /* HB_OT_SHAPE_COMPLEX_KHMER_MACHINE_HH */ diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-complex-khmer.cc b/thirdparty/harfbuzz/src/hb-ot-shape-complex-khmer.cc new file mode 100644 index 0000000000..3da8374899 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-shape-complex-khmer.cc @@ -0,0 +1,457 @@ +/* + * Copyright © 2011,2012 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#include "hb.hh" + +#ifndef HB_NO_OT_SHAPE + +#include "hb-ot-shape-complex-khmer.hh" +#include "hb-ot-layout.hh" + + +/* + * Khmer shaper. + */ + +static const hb_ot_map_feature_t +khmer_features[] = +{ + /* + * Basic features. + * These features are applied in order, one at a time, after reordering. + */ + {HB_TAG('p','r','e','f'), F_MANUAL_JOINERS}, + {HB_TAG('b','l','w','f'), F_MANUAL_JOINERS}, + {HB_TAG('a','b','v','f'), F_MANUAL_JOINERS}, + {HB_TAG('p','s','t','f'), F_MANUAL_JOINERS}, + {HB_TAG('c','f','a','r'), F_MANUAL_JOINERS}, + /* + * Other features. + * These features are applied all at once after clearing syllables. + */ + {HB_TAG('p','r','e','s'), F_GLOBAL_MANUAL_JOINERS}, + {HB_TAG('a','b','v','s'), F_GLOBAL_MANUAL_JOINERS}, + {HB_TAG('b','l','w','s'), F_GLOBAL_MANUAL_JOINERS}, + {HB_TAG('p','s','t','s'), F_GLOBAL_MANUAL_JOINERS}, +}; + +/* + * Must be in the same order as the khmer_features array. + */ +enum { + KHMER_PREF, + KHMER_BLWF, + KHMER_ABVF, + KHMER_PSTF, + KHMER_CFAR, + + _KHMER_PRES, + _KHMER_ABVS, + _KHMER_BLWS, + _KHMER_PSTS, + + KHMER_NUM_FEATURES, + KHMER_BASIC_FEATURES = _KHMER_PRES, /* Don't forget to update this! */ +}; + +static void +setup_syllables_khmer (const hb_ot_shape_plan_t *plan, + hb_font_t *font, + hb_buffer_t *buffer); +static void +reorder_khmer (const hb_ot_shape_plan_t *plan, + hb_font_t *font, + hb_buffer_t *buffer); + +static void +collect_features_khmer (hb_ot_shape_planner_t *plan) +{ + hb_ot_map_builder_t *map = &plan->map; + + /* Do this before any lookups have been applied. */ + map->add_gsub_pause (setup_syllables_khmer); + map->add_gsub_pause (reorder_khmer); + + /* Testing suggests that Uniscribe does NOT pause between basic + * features. Test with KhmerUI.ttf and the following three + * sequences: + * + * U+1789,U+17BC + * U+1789,U+17D2,U+1789 + * U+1789,U+17D2,U+1789,U+17BC + * + * https://github.com/harfbuzz/harfbuzz/issues/974 + */ + map->enable_feature (HB_TAG('l','o','c','l')); + map->enable_feature (HB_TAG('c','c','m','p')); + + unsigned int i = 0; + for (; i < KHMER_BASIC_FEATURES; i++) + map->add_feature (khmer_features[i]); + + map->add_gsub_pause (_hb_clear_syllables); + + for (; i < KHMER_NUM_FEATURES; i++) + map->add_feature (khmer_features[i]); +} + +static void +override_features_khmer (hb_ot_shape_planner_t *plan) +{ + hb_ot_map_builder_t *map = &plan->map; + + /* Khmer spec has 'clig' as part of required shaping features: + * "Apply feature 'clig' to form ligatures that are desired for + * typographical correctness.", hence in overrides... */ + map->enable_feature (HB_TAG('c','l','i','g')); + + /* Uniscribe does not apply 'kern' in Khmer. */ + if (hb_options ().uniscribe_bug_compatible) + { + map->disable_feature (HB_TAG('k','e','r','n')); + } + + map->disable_feature (HB_TAG('l','i','g','a')); +} + + +struct khmer_shape_plan_t +{ + bool get_virama_glyph (hb_font_t *font, hb_codepoint_t *pglyph) const + { + hb_codepoint_t glyph = virama_glyph; + if (unlikely (virama_glyph == (hb_codepoint_t) -1)) + { + if (!font->get_nominal_glyph (0x17D2u, &glyph)) + glyph = 0; + /* Technically speaking, the spec says we should apply 'locl' to virama too. + * Maybe one day... */ + + /* Our get_nominal_glyph() function needs a font, so we can't get the virama glyph + * during shape planning... Instead, overwrite it here. It's safe. Don't worry! */ + virama_glyph = glyph; + } + + *pglyph = glyph; + return glyph != 0; + } + + mutable hb_codepoint_t virama_glyph; + + hb_mask_t mask_array[KHMER_NUM_FEATURES]; +}; + +static void * +data_create_khmer (const hb_ot_shape_plan_t *plan) +{ + khmer_shape_plan_t *khmer_plan = (khmer_shape_plan_t *) calloc (1, sizeof (khmer_shape_plan_t)); + if (unlikely (!khmer_plan)) + return nullptr; + + khmer_plan->virama_glyph = (hb_codepoint_t) -1; + + for (unsigned int i = 0; i < ARRAY_LENGTH (khmer_plan->mask_array); i++) + khmer_plan->mask_array[i] = (khmer_features[i].flags & F_GLOBAL) ? + 0 : plan->map.get_1_mask (khmer_features[i].tag); + + return khmer_plan; +} + +static void +data_destroy_khmer (void *data) +{ + free (data); +} + + +enum khmer_syllable_type_t { + khmer_consonant_syllable, + khmer_broken_cluster, + khmer_non_khmer_cluster, +}; + +#include "hb-ot-shape-complex-khmer-machine.hh" + +static void +setup_masks_khmer (const hb_ot_shape_plan_t *plan HB_UNUSED, + hb_buffer_t *buffer, + hb_font_t *font HB_UNUSED) +{ + HB_BUFFER_ALLOCATE_VAR (buffer, khmer_category); + + /* We cannot setup masks here. We save information about characters + * and setup masks later on in a pause-callback. */ + + unsigned int count = buffer->len; + hb_glyph_info_t *info = buffer->info; + for (unsigned int i = 0; i < count; i++) + set_khmer_properties (info[i]); +} + +static void +setup_syllables_khmer (const hb_ot_shape_plan_t *plan HB_UNUSED, + hb_font_t *font HB_UNUSED, + hb_buffer_t *buffer) +{ + find_syllables_khmer (buffer); + foreach_syllable (buffer, start, end) + buffer->unsafe_to_break (start, end); +} + + +/* Rules from: + * https://docs.microsoft.com/en-us/typography/script-development/devanagari */ + +static void +reorder_consonant_syllable (const hb_ot_shape_plan_t *plan, + hb_face_t *face HB_UNUSED, + hb_buffer_t *buffer, + unsigned int start, unsigned int end) +{ + const khmer_shape_plan_t *khmer_plan = (const khmer_shape_plan_t *) plan->data; + hb_glyph_info_t *info = buffer->info; + + /* Setup masks. */ + { + /* Post-base */ + hb_mask_t mask = khmer_plan->mask_array[KHMER_BLWF] | + khmer_plan->mask_array[KHMER_ABVF] | + khmer_plan->mask_array[KHMER_PSTF]; + for (unsigned int i = start + 1; i < end; i++) + info[i].mask |= mask; + } + + unsigned int num_coengs = 0; + for (unsigned int i = start + 1; i < end; i++) + { + /* """ + * When a COENG + (Cons | IndV) combination are found (and subscript count + * is less than two) the character combination is handled according to the + * subscript type of the character following the COENG. + * + * ... + * + * Subscript Type 2 - The COENG + RO characters are reordered to immediately + * before the base glyph. Then the COENG + RO characters are assigned to have + * the 'pref' OpenType feature applied to them. + * """ + */ + if (info[i].khmer_category() == OT_Coeng && num_coengs <= 2 && i + 1 < end) + { + num_coengs++; + + if (info[i + 1].khmer_category() == OT_Ra) + { + for (unsigned int j = 0; j < 2; j++) + info[i + j].mask |= khmer_plan->mask_array[KHMER_PREF]; + + /* Move the Coeng,Ro sequence to the start. */ + buffer->merge_clusters (start, i + 2); + hb_glyph_info_t t0 = info[i]; + hb_glyph_info_t t1 = info[i + 1]; + memmove (&info[start + 2], &info[start], (i - start) * sizeof (info[0])); + info[start] = t0; + info[start + 1] = t1; + + /* Mark the subsequent stuff with 'cfar'. Used in Khmer. + * Read the feature spec. + * This allows distinguishing the following cases with MS Khmer fonts: + * U+1784,U+17D2,U+179A,U+17D2,U+1782 + * U+1784,U+17D2,U+1782,U+17D2,U+179A + */ + if (khmer_plan->mask_array[KHMER_CFAR]) + for (unsigned int j = i + 2; j < end; j++) + info[j].mask |= khmer_plan->mask_array[KHMER_CFAR]; + + num_coengs = 2; /* Done. */ + } + } + + /* Reorder left matra piece. */ + else if (info[i].khmer_category() == OT_VPre) + { + /* Move to the start. */ + buffer->merge_clusters (start, i + 1); + hb_glyph_info_t t = info[i]; + memmove (&info[start + 1], &info[start], (i - start) * sizeof (info[0])); + info[start] = t; + } + } +} + +static void +reorder_syllable_khmer (const hb_ot_shape_plan_t *plan, + hb_face_t *face, + hb_buffer_t *buffer, + unsigned int start, unsigned int end) +{ + khmer_syllable_type_t syllable_type = (khmer_syllable_type_t) (buffer->info[start].syllable() & 0x0F); + switch (syllable_type) + { + case khmer_broken_cluster: /* We already inserted dotted-circles, so just call the consonant_syllable. */ + case khmer_consonant_syllable: + reorder_consonant_syllable (plan, face, buffer, start, end); + break; + + case khmer_non_khmer_cluster: + break; + } +} + +static inline void +insert_dotted_circles_khmer (const hb_ot_shape_plan_t *plan HB_UNUSED, + hb_font_t *font, + hb_buffer_t *buffer) +{ + if (unlikely (buffer->flags & HB_BUFFER_FLAG_DO_NOT_INSERT_DOTTED_CIRCLE)) + return; + + /* Note: This loop is extra overhead, but should not be measurable. + * TODO Use a buffer scratch flag to remove the loop. */ + bool has_broken_syllables = false; + unsigned int count = buffer->len; + hb_glyph_info_t *info = buffer->info; + for (unsigned int i = 0; i < count; i++) + if ((info[i].syllable() & 0x0F) == khmer_broken_cluster) + { + has_broken_syllables = true; + break; + } + if (likely (!has_broken_syllables)) + return; + + + hb_codepoint_t dottedcircle_glyph; + if (!font->get_nominal_glyph (0x25CCu, &dottedcircle_glyph)) + return; + + hb_glyph_info_t dottedcircle = {0}; + dottedcircle.codepoint = 0x25CCu; + set_khmer_properties (dottedcircle); + dottedcircle.codepoint = dottedcircle_glyph; + + buffer->clear_output (); + + buffer->idx = 0; + unsigned int last_syllable = 0; + while (buffer->idx < buffer->len && buffer->successful) + { + unsigned int syllable = buffer->cur().syllable(); + khmer_syllable_type_t syllable_type = (khmer_syllable_type_t) (syllable & 0x0F); + if (unlikely (last_syllable != syllable && syllable_type == khmer_broken_cluster)) + { + last_syllable = syllable; + + hb_glyph_info_t ginfo = dottedcircle; + ginfo.cluster = buffer->cur().cluster; + ginfo.mask = buffer->cur().mask; + ginfo.syllable() = buffer->cur().syllable(); + + /* Insert dottedcircle after possible Repha. */ + while (buffer->idx < buffer->len && buffer->successful && + last_syllable == buffer->cur().syllable() && + buffer->cur().khmer_category() == OT_Repha) + buffer->next_glyph (); + + buffer->output_info (ginfo); + } + else + buffer->next_glyph (); + } + buffer->swap_buffers (); +} + +static void +reorder_khmer (const hb_ot_shape_plan_t *plan, + hb_font_t *font, + hb_buffer_t *buffer) +{ + insert_dotted_circles_khmer (plan, font, buffer); + + foreach_syllable (buffer, start, end) + reorder_syllable_khmer (plan, font->face, buffer, start, end); + + HB_BUFFER_DEALLOCATE_VAR (buffer, khmer_category); +} + + +static bool +decompose_khmer (const hb_ot_shape_normalize_context_t *c, + hb_codepoint_t ab, + hb_codepoint_t *a, + hb_codepoint_t *b) +{ + switch (ab) + { + /* + * Decompose split matras that don't have Unicode decompositions. + */ + + /* Khmer */ + case 0x17BEu : *a = 0x17C1u; *b= 0x17BEu; return true; + case 0x17BFu : *a = 0x17C1u; *b= 0x17BFu; return true; + case 0x17C0u : *a = 0x17C1u; *b= 0x17C0u; return true; + case 0x17C4u : *a = 0x17C1u; *b= 0x17C4u; return true; + case 0x17C5u : *a = 0x17C1u; *b= 0x17C5u; return true; + } + + return (bool) c->unicode->decompose (ab, a, b); +} + +static bool +compose_khmer (const hb_ot_shape_normalize_context_t *c, + hb_codepoint_t a, + hb_codepoint_t b, + hb_codepoint_t *ab) +{ + /* Avoid recomposing split matras. */ + if (HB_UNICODE_GENERAL_CATEGORY_IS_MARK (c->unicode->general_category (a))) + return false; + + return (bool) c->unicode->compose (a, b, ab); +} + + +const hb_ot_complex_shaper_t _hb_ot_complex_shaper_khmer = +{ + collect_features_khmer, + override_features_khmer, + data_create_khmer, + data_destroy_khmer, + nullptr, /* preprocess_text */ + nullptr, /* postprocess_glyphs */ + HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT, + decompose_khmer, + compose_khmer, + setup_masks_khmer, + HB_TAG_NONE, /* gpos_tag */ + nullptr, /* reorder_marks */ + HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE, + false, /* fallback_position */ +}; + + +#endif diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-complex-khmer.hh b/thirdparty/harfbuzz/src/hb-ot-shape-complex-khmer.hh new file mode 100644 index 0000000000..11a77bfd4b --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-shape-complex-khmer.hh @@ -0,0 +1,113 @@ +/* + * Copyright © 2018 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_OT_SHAPE_COMPLEX_KHMER_HH +#define HB_OT_SHAPE_COMPLEX_KHMER_HH + +#include "hb.hh" + +#include "hb-ot-shape-complex-indic.hh" + + +/* buffer var allocations */ +#define khmer_category() indic_category() /* khmer_category_t */ + + +/* Note: This enum is duplicated in the -machine.rl source file. + * Not sure how to avoid duplication. */ +enum khmer_category_t +{ + OT_Robatic = 20, + OT_Xgroup = 21, + OT_Ygroup = 22, + //OT_VAbv = 26, + //OT_VBlw = 27, + //OT_VPre = 28, + //OT_VPst = 29, +}; + +static inline void +set_khmer_properties (hb_glyph_info_t &info) +{ + hb_codepoint_t u = info.codepoint; + unsigned int type = hb_indic_get_categories (u); + khmer_category_t cat = (khmer_category_t) (type & 0x7Fu); + indic_position_t pos = (indic_position_t) (type >> 8); + + + /* + * Re-assign category + * + * These categories are experimentally extracted from what Uniscribe allows. + */ + switch (u) + { + case 0x179Au: + cat = (khmer_category_t) OT_Ra; + break; + + case 0x17CCu: + case 0x17C9u: + case 0x17CAu: + cat = OT_Robatic; + break; + + case 0x17C6u: + case 0x17CBu: + case 0x17CDu: + case 0x17CEu: + case 0x17CFu: + case 0x17D0u: + case 0x17D1u: + cat = OT_Xgroup; + break; + + case 0x17C7u: + case 0x17C8u: + case 0x17DDu: + case 0x17D3u: /* Just guessing. Uniscribe doesn't categorize it. */ + cat = OT_Ygroup; + break; + } + + /* + * Re-assign position. + */ + if (cat == (khmer_category_t) OT_M) + switch ((int) pos) + { + case POS_PRE_C: cat = (khmer_category_t) OT_VPre; break; + case POS_BELOW_C: cat = (khmer_category_t) OT_VBlw; break; + case POS_ABOVE_C: cat = (khmer_category_t) OT_VAbv; break; + case POS_POST_C: cat = (khmer_category_t) OT_VPst; break; + default: assert (0); + } + + info.khmer_category() = cat; +} + + +#endif /* HB_OT_SHAPE_COMPLEX_KHMER_HH */ diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-complex-myanmar-machine.hh b/thirdparty/harfbuzz/src/hb-ot-shape-complex-myanmar-machine.hh new file mode 100644 index 0000000000..c2f4c0045c --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-shape-complex-myanmar-machine.hh @@ -0,0 +1,430 @@ + +#line 1 "hb-ot-shape-complex-myanmar-machine.rl" +/* + * Copyright © 2011,2012 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_OT_SHAPE_COMPLEX_MYANMAR_MACHINE_HH +#define HB_OT_SHAPE_COMPLEX_MYANMAR_MACHINE_HH + +#include "hb.hh" + + +#line 36 "hb-ot-shape-complex-myanmar-machine.hh" +static const unsigned char _myanmar_syllable_machine_trans_keys[] = { + 1u, 32u, 3u, 30u, 5u, 29u, 5u, 8u, 5u, 29u, 3u, 25u, 5u, 25u, 5u, 25u, + 3u, 29u, 3u, 29u, 3u, 29u, 3u, 29u, 1u, 16u, 3u, 29u, 3u, 29u, 3u, 29u, + 3u, 29u, 3u, 29u, 3u, 30u, 3u, 29u, 3u, 29u, 3u, 29u, 3u, 29u, 3u, 29u, + 5u, 29u, 5u, 8u, 5u, 29u, 3u, 25u, 5u, 25u, 5u, 25u, 3u, 29u, 3u, 29u, + 3u, 29u, 3u, 29u, 1u, 16u, 3u, 30u, 3u, 29u, 3u, 29u, 3u, 29u, 3u, 29u, + 3u, 29u, 3u, 30u, 3u, 29u, 3u, 29u, 3u, 29u, 3u, 29u, 3u, 29u, 3u, 30u, + 3u, 29u, 1u, 32u, 1u, 32u, 8u, 8u, 0 +}; + +static const char _myanmar_syllable_machine_key_spans[] = { + 32, 28, 25, 4, 25, 23, 21, 21, + 27, 27, 27, 27, 16, 27, 27, 27, + 27, 27, 28, 27, 27, 27, 27, 27, + 25, 4, 25, 23, 21, 21, 27, 27, + 27, 27, 16, 28, 27, 27, 27, 27, + 27, 28, 27, 27, 27, 27, 27, 28, + 27, 32, 32, 1 +}; + +static const short _myanmar_syllable_machine_index_offsets[] = { + 0, 33, 62, 88, 93, 119, 143, 165, + 187, 215, 243, 271, 299, 316, 344, 372, + 400, 428, 456, 485, 513, 541, 569, 597, + 625, 651, 656, 682, 706, 728, 750, 778, + 806, 834, 862, 879, 908, 936, 964, 992, + 1020, 1048, 1077, 1105, 1133, 1161, 1189, 1217, + 1246, 1274, 1307, 1340 +}; + +static const char _myanmar_syllable_machine_indicies[] = { + 1, 1, 2, 3, 4, 4, 0, 5, + 0, 6, 1, 0, 0, 0, 0, 7, + 0, 8, 9, 0, 10, 11, 12, 13, + 14, 15, 16, 17, 18, 19, 20, 1, + 0, 22, 23, 24, 24, 21, 25, 21, + 26, 21, 21, 21, 21, 21, 21, 21, + 27, 21, 21, 28, 29, 30, 31, 32, + 33, 34, 35, 36, 37, 21, 24, 24, + 21, 25, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 38, 21, 21, 21, 21, + 21, 21, 32, 21, 21, 21, 36, 21, + 24, 24, 21, 25, 21, 24, 24, 21, + 25, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 32, 21, 21, 21, 36, 21, 39, + 21, 24, 24, 21, 25, 21, 32, 21, + 21, 21, 21, 21, 21, 21, 40, 21, + 21, 21, 21, 21, 21, 32, 21, 24, + 24, 21, 25, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 40, 21, 21, 21, + 21, 21, 21, 32, 21, 24, 24, 21, + 25, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 32, 21, 22, 21, 24, 24, 21, + 25, 21, 26, 21, 21, 21, 21, 21, + 21, 21, 41, 21, 21, 41, 21, 21, + 21, 32, 42, 21, 21, 36, 21, 22, + 21, 24, 24, 21, 25, 21, 26, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 32, 21, 21, + 21, 36, 21, 22, 21, 24, 24, 21, + 25, 21, 26, 21, 21, 21, 21, 21, + 21, 21, 41, 21, 21, 21, 21, 21, + 21, 32, 42, 21, 21, 36, 21, 22, + 21, 24, 24, 21, 25, 21, 26, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 32, 42, 21, + 21, 36, 21, 1, 1, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 1, 21, 22, 21, 24, 24, + 21, 25, 21, 26, 21, 21, 21, 21, + 21, 21, 21, 27, 21, 21, 28, 29, + 30, 31, 32, 33, 34, 35, 36, 21, + 22, 21, 24, 24, 21, 25, 21, 26, + 21, 21, 21, 21, 21, 21, 21, 43, + 21, 21, 21, 21, 21, 21, 32, 33, + 34, 35, 36, 21, 22, 21, 24, 24, + 21, 25, 21, 26, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 32, 33, 34, 35, 36, 21, + 22, 21, 24, 24, 21, 25, 21, 26, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 32, 33, + 34, 21, 36, 21, 22, 21, 24, 24, + 21, 25, 21, 26, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 32, 21, 34, 21, 36, 21, + 22, 21, 24, 24, 21, 25, 21, 26, + 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 32, 33, + 34, 35, 36, 43, 21, 22, 21, 24, + 24, 21, 25, 21, 26, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 28, + 21, 30, 21, 32, 33, 34, 35, 36, + 21, 22, 21, 24, 24, 21, 25, 21, + 26, 21, 21, 21, 21, 21, 21, 21, + 43, 21, 21, 28, 21, 21, 21, 32, + 33, 34, 35, 36, 21, 22, 21, 24, + 24, 21, 25, 21, 26, 21, 21, 21, + 21, 21, 21, 21, 44, 21, 21, 28, + 29, 30, 21, 32, 33, 34, 35, 36, + 21, 22, 21, 24, 24, 21, 25, 21, + 26, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 28, 29, 30, 21, 32, + 33, 34, 35, 36, 21, 22, 23, 24, + 24, 21, 25, 21, 26, 21, 21, 21, + 21, 21, 21, 21, 27, 21, 21, 28, + 29, 30, 31, 32, 33, 34, 35, 36, + 21, 46, 46, 45, 5, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 47, 45, + 45, 45, 45, 45, 45, 14, 45, 45, + 45, 18, 45, 46, 46, 45, 5, 45, + 46, 46, 45, 5, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 14, 45, 45, 45, + 18, 45, 48, 45, 46, 46, 45, 5, + 45, 14, 45, 45, 45, 45, 45, 45, + 45, 49, 45, 45, 45, 45, 45, 45, + 14, 45, 46, 46, 45, 5, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 49, + 45, 45, 45, 45, 45, 45, 14, 45, + 46, 46, 45, 5, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 14, 45, 2, 45, + 46, 46, 45, 5, 45, 6, 45, 45, + 45, 45, 45, 45, 45, 50, 45, 45, + 50, 45, 45, 45, 14, 51, 45, 45, + 18, 45, 2, 45, 46, 46, 45, 5, + 45, 6, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, + 14, 45, 45, 45, 18, 45, 2, 45, + 46, 46, 45, 5, 45, 6, 45, 45, + 45, 45, 45, 45, 45, 50, 45, 45, + 45, 45, 45, 45, 14, 51, 45, 45, + 18, 45, 2, 45, 46, 46, 45, 5, + 45, 6, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, + 14, 51, 45, 45, 18, 45, 52, 52, + 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 52, 45, 2, + 3, 46, 46, 45, 5, 45, 6, 45, + 45, 45, 45, 45, 45, 45, 8, 45, + 45, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 45, 2, 45, 46, 46, + 45, 5, 45, 6, 45, 45, 45, 45, + 45, 45, 45, 8, 45, 45, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 45, + 2, 45, 46, 46, 45, 5, 45, 6, + 45, 45, 45, 45, 45, 45, 45, 53, + 45, 45, 45, 45, 45, 45, 14, 15, + 16, 17, 18, 45, 2, 45, 46, 46, + 45, 5, 45, 6, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 14, 15, 16, 17, 18, 45, + 2, 45, 46, 46, 45, 5, 45, 6, + 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 14, 15, + 16, 45, 18, 45, 2, 45, 46, 46, + 45, 5, 45, 6, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 14, 45, 16, 45, 18, 45, + 2, 45, 46, 46, 45, 5, 45, 6, + 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 14, 15, + 16, 17, 18, 53, 45, 2, 45, 46, + 46, 45, 5, 45, 6, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 10, + 45, 12, 45, 14, 15, 16, 17, 18, + 45, 2, 45, 46, 46, 45, 5, 45, + 6, 45, 45, 45, 45, 45, 45, 45, + 53, 45, 45, 10, 45, 45, 45, 14, + 15, 16, 17, 18, 45, 2, 45, 46, + 46, 45, 5, 45, 6, 45, 45, 45, + 45, 45, 45, 45, 54, 45, 45, 10, + 11, 12, 45, 14, 15, 16, 17, 18, + 45, 2, 45, 46, 46, 45, 5, 45, + 6, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 10, 11, 12, 45, 14, + 15, 16, 17, 18, 45, 2, 3, 46, + 46, 45, 5, 45, 6, 45, 45, 45, + 45, 45, 45, 45, 8, 45, 45, 10, + 11, 12, 13, 14, 15, 16, 17, 18, + 45, 22, 23, 24, 24, 21, 25, 21, + 26, 21, 21, 21, 21, 21, 21, 21, + 55, 21, 21, 28, 29, 30, 31, 32, + 33, 34, 35, 36, 37, 21, 22, 56, + 24, 24, 21, 25, 21, 26, 21, 21, + 21, 21, 21, 21, 21, 27, 21, 21, + 28, 29, 30, 31, 32, 33, 34, 35, + 36, 21, 1, 1, 2, 3, 46, 46, + 45, 5, 45, 6, 1, 45, 45, 45, + 45, 1, 45, 8, 45, 45, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, + 45, 1, 45, 1, 1, 57, 57, 57, + 57, 57, 57, 57, 57, 1, 57, 57, + 57, 57, 1, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 1, 57, 58, 57, 0 +}; + +static const char _myanmar_syllable_machine_trans_targs[] = { + 0, 1, 24, 34, 0, 25, 31, 47, + 36, 50, 37, 42, 43, 44, 27, 39, + 40, 41, 30, 46, 51, 0, 2, 12, + 0, 3, 9, 13, 14, 19, 20, 21, + 5, 16, 17, 18, 8, 23, 4, 6, + 7, 10, 11, 15, 22, 0, 0, 26, + 28, 29, 32, 33, 35, 38, 45, 48, + 49, 0, 0 +}; + +static const char _myanmar_syllable_machine_trans_actions[] = { + 3, 0, 0, 0, 4, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 5, 0, 0, + 6, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 7, 8, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 9, 10 +}; + +static const char _myanmar_syllable_machine_to_state_actions[] = { + 1, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0 +}; + +static const char _myanmar_syllable_machine_from_state_actions[] = { + 2, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0 +}; + +static const short _myanmar_syllable_machine_eof_trans[] = { + 0, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, + 46, 46, 46, 46, 46, 46, 46, 46, + 46, 46, 46, 46, 46, 46, 46, 46, + 46, 46, 46, 46, 46, 46, 46, 22, + 22, 46, 58, 58 +}; + +static const int myanmar_syllable_machine_start = 0; +static const int myanmar_syllable_machine_first_final = 0; +static const int myanmar_syllable_machine_error = -1; + +static const int myanmar_syllable_machine_en_main = 0; + + +#line 36 "hb-ot-shape-complex-myanmar-machine.rl" + + + +#line 94 "hb-ot-shape-complex-myanmar-machine.rl" + + +#define found_syllable(syllable_type) \ + HB_STMT_START { \ + if (0) fprintf (stderr, "syllable %d..%d %s\n", ts, te, #syllable_type); \ + for (unsigned int i = ts; i < te; i++) \ + info[i].syllable() = (syllable_serial << 4) | myanmar_##syllable_type; \ + syllable_serial++; \ + if (unlikely (syllable_serial == 16)) syllable_serial = 1; \ + } HB_STMT_END + +static void +find_syllables_myanmar (hb_buffer_t *buffer) +{ + unsigned int p, pe, eof, ts, te, act HB_UNUSED; + int cs; + hb_glyph_info_t *info = buffer->info; + +#line 320 "hb-ot-shape-complex-myanmar-machine.hh" + { + cs = myanmar_syllable_machine_start; + ts = 0; + te = 0; + act = 0; + } + +#line 114 "hb-ot-shape-complex-myanmar-machine.rl" + + + p = 0; + pe = eof = buffer->len; + + unsigned int syllable_serial = 1; + +#line 336 "hb-ot-shape-complex-myanmar-machine.hh" + { + int _slen; + int _trans; + const unsigned char *_keys; + const char *_inds; + if ( p == pe ) + goto _test_eof; +_resume: + switch ( _myanmar_syllable_machine_from_state_actions[cs] ) { + case 2: +#line 1 "NONE" + {ts = p;} + break; +#line 350 "hb-ot-shape-complex-myanmar-machine.hh" + } + + _keys = _myanmar_syllable_machine_trans_keys + (cs<<1); + _inds = _myanmar_syllable_machine_indicies + _myanmar_syllable_machine_index_offsets[cs]; + + _slen = _myanmar_syllable_machine_key_spans[cs]; + _trans = _inds[ _slen > 0 && _keys[0] <=( info[p].myanmar_category()) && + ( info[p].myanmar_category()) <= _keys[1] ? + ( info[p].myanmar_category()) - _keys[0] : _slen ]; + +_eof_trans: + cs = _myanmar_syllable_machine_trans_targs[_trans]; + + if ( _myanmar_syllable_machine_trans_actions[_trans] == 0 ) + goto _again; + + switch ( _myanmar_syllable_machine_trans_actions[_trans] ) { + case 6: +#line 86 "hb-ot-shape-complex-myanmar-machine.rl" + {te = p+1;{ found_syllable (consonant_syllable); }} + break; + case 4: +#line 87 "hb-ot-shape-complex-myanmar-machine.rl" + {te = p+1;{ found_syllable (non_myanmar_cluster); }} + break; + case 10: +#line 88 "hb-ot-shape-complex-myanmar-machine.rl" + {te = p+1;{ found_syllable (punctuation_cluster); }} + break; + case 8: +#line 89 "hb-ot-shape-complex-myanmar-machine.rl" + {te = p+1;{ found_syllable (broken_cluster); }} + break; + case 3: +#line 90 "hb-ot-shape-complex-myanmar-machine.rl" + {te = p+1;{ found_syllable (non_myanmar_cluster); }} + break; + case 5: +#line 86 "hb-ot-shape-complex-myanmar-machine.rl" + {te = p;p--;{ found_syllable (consonant_syllable); }} + break; + case 7: +#line 89 "hb-ot-shape-complex-myanmar-machine.rl" + {te = p;p--;{ found_syllable (broken_cluster); }} + break; + case 9: +#line 90 "hb-ot-shape-complex-myanmar-machine.rl" + {te = p;p--;{ found_syllable (non_myanmar_cluster); }} + break; +#line 400 "hb-ot-shape-complex-myanmar-machine.hh" + } + +_again: + switch ( _myanmar_syllable_machine_to_state_actions[cs] ) { + case 1: +#line 1 "NONE" + {ts = 0;} + break; +#line 409 "hb-ot-shape-complex-myanmar-machine.hh" + } + + if ( ++p != pe ) + goto _resume; + _test_eof: {} + if ( p == eof ) + { + if ( _myanmar_syllable_machine_eof_trans[cs] > 0 ) { + _trans = _myanmar_syllable_machine_eof_trans[cs] - 1; + goto _eof_trans; + } + } + + } + +#line 122 "hb-ot-shape-complex-myanmar-machine.rl" + +} + +#undef found_syllable + +#endif /* HB_OT_SHAPE_COMPLEX_MYANMAR_MACHINE_HH */ diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-complex-myanmar.cc b/thirdparty/harfbuzz/src/hb-ot-shape-complex-myanmar.cc new file mode 100644 index 0000000000..fc3490d716 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-shape-complex-myanmar.cc @@ -0,0 +1,387 @@ +/* + * Copyright © 2011,2012,2013 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#include "hb.hh" + +#ifndef HB_NO_OT_SHAPE + +#include "hb-ot-shape-complex-myanmar.hh" + + +/* + * Myanmar shaper. + */ + +static const hb_tag_t +myanmar_basic_features[] = +{ + /* + * Basic features. + * These features are applied in order, one at a time, after reordering. + */ + HB_TAG('r','p','h','f'), + HB_TAG('p','r','e','f'), + HB_TAG('b','l','w','f'), + HB_TAG('p','s','t','f'), +}; +static const hb_tag_t +myanmar_other_features[] = +{ + /* + * Other features. + * These features are applied all at once, after clearing syllables. + */ + HB_TAG('p','r','e','s'), + HB_TAG('a','b','v','s'), + HB_TAG('b','l','w','s'), + HB_TAG('p','s','t','s'), +}; + +static void +setup_syllables_myanmar (const hb_ot_shape_plan_t *plan, + hb_font_t *font, + hb_buffer_t *buffer); +static void +reorder_myanmar (const hb_ot_shape_plan_t *plan, + hb_font_t *font, + hb_buffer_t *buffer); + +static void +collect_features_myanmar (hb_ot_shape_planner_t *plan) +{ + hb_ot_map_builder_t *map = &plan->map; + + /* Do this before any lookups have been applied. */ + map->add_gsub_pause (setup_syllables_myanmar); + + map->enable_feature (HB_TAG('l','o','c','l')); + /* The Indic specs do not require ccmp, but we apply it here since if + * there is a use of it, it's typically at the beginning. */ + map->enable_feature (HB_TAG('c','c','m','p')); + + + map->add_gsub_pause (reorder_myanmar); + + for (unsigned int i = 0; i < ARRAY_LENGTH (myanmar_basic_features); i++) + { + map->enable_feature (myanmar_basic_features[i], F_MANUAL_ZWJ); + map->add_gsub_pause (nullptr); + } + + map->add_gsub_pause (_hb_clear_syllables); + + for (unsigned int i = 0; i < ARRAY_LENGTH (myanmar_other_features); i++) + map->enable_feature (myanmar_other_features[i], F_MANUAL_ZWJ); +} + +static void +override_features_myanmar (hb_ot_shape_planner_t *plan) +{ + plan->map.disable_feature (HB_TAG('l','i','g','a')); +} + + +enum myanmar_syllable_type_t { + myanmar_consonant_syllable, + myanmar_punctuation_cluster, + myanmar_broken_cluster, + myanmar_non_myanmar_cluster, +}; + +#include "hb-ot-shape-complex-myanmar-machine.hh" + + +static void +setup_masks_myanmar (const hb_ot_shape_plan_t *plan HB_UNUSED, + hb_buffer_t *buffer, + hb_font_t *font HB_UNUSED) +{ + HB_BUFFER_ALLOCATE_VAR (buffer, myanmar_category); + HB_BUFFER_ALLOCATE_VAR (buffer, myanmar_position); + + /* We cannot setup masks here. We save information about characters + * and setup masks later on in a pause-callback. */ + + unsigned int count = buffer->len; + hb_glyph_info_t *info = buffer->info; + for (unsigned int i = 0; i < count; i++) + set_myanmar_properties (info[i]); +} + +static void +setup_syllables_myanmar (const hb_ot_shape_plan_t *plan HB_UNUSED, + hb_font_t *font HB_UNUSED, + hb_buffer_t *buffer) +{ + find_syllables_myanmar (buffer); + foreach_syllable (buffer, start, end) + buffer->unsafe_to_break (start, end); +} + +static int +compare_myanmar_order (const hb_glyph_info_t *pa, const hb_glyph_info_t *pb) +{ + int a = pa->myanmar_position(); + int b = pb->myanmar_position(); + + return a < b ? -1 : a == b ? 0 : +1; +} + + +/* Rules from: + * https://docs.microsoft.com/en-us/typography/script-development/myanmar */ + +static void +initial_reordering_consonant_syllable (hb_buffer_t *buffer, + unsigned int start, unsigned int end) +{ + hb_glyph_info_t *info = buffer->info; + + unsigned int base = end; + bool has_reph = false; + + { + unsigned int limit = start; + if (start + 3 <= end && + info[start ].myanmar_category() == OT_Ra && + info[start+1].myanmar_category() == OT_As && + info[start+2].myanmar_category() == OT_H) + { + limit += 3; + base = start; + has_reph = true; + } + + { + if (!has_reph) + base = limit; + + for (unsigned int i = limit; i < end; i++) + if (is_consonant (info[i])) + { + base = i; + break; + } + } + } + + /* Reorder! */ + { + unsigned int i = start; + for (; i < start + (has_reph ? 3 : 0); i++) + info[i].myanmar_position() = POS_AFTER_MAIN; + for (; i < base; i++) + info[i].myanmar_position() = POS_PRE_C; + if (i < end) + { + info[i].myanmar_position() = POS_BASE_C; + i++; + } + indic_position_t pos = POS_AFTER_MAIN; + /* The following loop may be ugly, but it implements all of + * Myanmar reordering! */ + for (; i < end; i++) + { + if (info[i].myanmar_category() == OT_MR) /* Pre-base reordering */ + { + info[i].myanmar_position() = POS_PRE_C; + continue; + } + if (info[i].myanmar_position() < POS_BASE_C) /* Left matra */ + { + continue; + } + if (info[i].myanmar_category() == OT_VS) + { + info[i].myanmar_position() = info[i - 1].myanmar_position(); + continue; + } + + if (pos == POS_AFTER_MAIN && info[i].myanmar_category() == OT_VBlw) + { + pos = POS_BELOW_C; + info[i].myanmar_position() = pos; + continue; + } + + if (pos == POS_BELOW_C && info[i].myanmar_category() == OT_A) + { + info[i].myanmar_position() = POS_BEFORE_SUB; + continue; + } + if (pos == POS_BELOW_C && info[i].myanmar_category() == OT_VBlw) + { + info[i].myanmar_position() = pos; + continue; + } + if (pos == POS_BELOW_C && info[i].myanmar_category() != OT_A) + { + pos = POS_AFTER_SUB; + info[i].myanmar_position() = pos; + continue; + } + info[i].myanmar_position() = pos; + } + } + + /* Sit tight, rock 'n roll! */ + buffer->sort (start, end, compare_myanmar_order); +} + +static void +reorder_syllable_myanmar (const hb_ot_shape_plan_t *plan HB_UNUSED, + hb_face_t *face HB_UNUSED, + hb_buffer_t *buffer, + unsigned int start, unsigned int end) +{ + myanmar_syllable_type_t syllable_type = (myanmar_syllable_type_t) (buffer->info[start].syllable() & 0x0F); + switch (syllable_type) { + + case myanmar_broken_cluster: /* We already inserted dotted-circles, so just call the consonant_syllable. */ + case myanmar_consonant_syllable: + initial_reordering_consonant_syllable (buffer, start, end); + break; + + case myanmar_punctuation_cluster: + case myanmar_non_myanmar_cluster: + break; + } +} + +static inline void +insert_dotted_circles_myanmar (const hb_ot_shape_plan_t *plan HB_UNUSED, + hb_font_t *font, + hb_buffer_t *buffer) +{ + if (unlikely (buffer->flags & HB_BUFFER_FLAG_DO_NOT_INSERT_DOTTED_CIRCLE)) + return; + + /* Note: This loop is extra overhead, but should not be measurable. + * TODO Use a buffer scratch flag to remove the loop. */ + bool has_broken_syllables = false; + unsigned int count = buffer->len; + hb_glyph_info_t *info = buffer->info; + for (unsigned int i = 0; i < count; i++) + if ((info[i].syllable() & 0x0F) == myanmar_broken_cluster) + { + has_broken_syllables = true; + break; + } + if (likely (!has_broken_syllables)) + return; + + + hb_codepoint_t dottedcircle_glyph; + if (!font->get_nominal_glyph (0x25CCu, &dottedcircle_glyph)) + return; + + hb_glyph_info_t dottedcircle = {0}; + dottedcircle.codepoint = 0x25CCu; + set_myanmar_properties (dottedcircle); + dottedcircle.codepoint = dottedcircle_glyph; + + buffer->clear_output (); + + buffer->idx = 0; + unsigned int last_syllable = 0; + while (buffer->idx < buffer->len && buffer->successful) + { + unsigned int syllable = buffer->cur().syllable(); + myanmar_syllable_type_t syllable_type = (myanmar_syllable_type_t) (syllable & 0x0F); + if (unlikely (last_syllable != syllable && syllable_type == myanmar_broken_cluster)) + { + last_syllable = syllable; + + hb_glyph_info_t ginfo = dottedcircle; + ginfo.cluster = buffer->cur().cluster; + ginfo.mask = buffer->cur().mask; + ginfo.syllable() = buffer->cur().syllable(); + + buffer->output_info (ginfo); + } + else + buffer->next_glyph (); + } + buffer->swap_buffers (); +} + +static void +reorder_myanmar (const hb_ot_shape_plan_t *plan, + hb_font_t *font, + hb_buffer_t *buffer) +{ + insert_dotted_circles_myanmar (plan, font, buffer); + + foreach_syllable (buffer, start, end) + reorder_syllable_myanmar (plan, font->face, buffer, start, end); + + HB_BUFFER_DEALLOCATE_VAR (buffer, myanmar_category); + HB_BUFFER_DEALLOCATE_VAR (buffer, myanmar_position); +} + + +const hb_ot_complex_shaper_t _hb_ot_complex_shaper_myanmar = +{ + collect_features_myanmar, + override_features_myanmar, + nullptr, /* data_create */ + nullptr, /* data_destroy */ + nullptr, /* preprocess_text */ + nullptr, /* postprocess_glyphs */ + HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT, + nullptr, /* decompose */ + nullptr, /* compose */ + setup_masks_myanmar, + HB_TAG_NONE, /* gpos_tag */ + nullptr, /* reorder_marks */ + HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_EARLY, + false, /* fallback_position */ +}; + + +/* Ugly Zawgyi encoding. + * Disable all auto processing. + * https://github.com/harfbuzz/harfbuzz/issues/1162 */ +const hb_ot_complex_shaper_t _hb_ot_complex_shaper_myanmar_zawgyi = +{ + nullptr, /* collect_features */ + nullptr, /* override_features */ + nullptr, /* data_create */ + nullptr, /* data_destroy */ + nullptr, /* preprocess_text */ + nullptr, /* postprocess_glyphs */ + HB_OT_SHAPE_NORMALIZATION_MODE_NONE, + nullptr, /* decompose */ + nullptr, /* compose */ + nullptr, /* setup_masks */ + HB_TAG_NONE, /* gpos_tag */ + nullptr, /* reorder_marks */ + HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE, + false, /* fallback_position */ +}; + + +#endif diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-complex-myanmar.hh b/thirdparty/harfbuzz/src/hb-ot-shape-complex-myanmar.hh new file mode 100644 index 0000000000..7b9821e6ba --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-shape-complex-myanmar.hh @@ -0,0 +1,171 @@ +/* + * Copyright © 2018 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_OT_SHAPE_COMPLEX_MYANMAR_HH +#define HB_OT_SHAPE_COMPLEX_MYANMAR_HH + +#include "hb.hh" + +#include "hb-ot-shape-complex-indic.hh" + + +/* buffer var allocations */ +#define myanmar_category() indic_category() /* myanmar_category_t */ +#define myanmar_position() indic_position() /* myanmar_position_t */ + + +/* Note: This enum is duplicated in the -machine.rl source file. + * Not sure how to avoid duplication. */ +enum myanmar_category_t { + OT_As = 18, /* Asat */ + OT_D0 = 20, /* Digit zero */ + OT_DB = OT_N, /* Dot below */ + OT_GB = OT_PLACEHOLDER, + OT_MH = 21, /* Various consonant medial types */ + OT_MR = 22, /* Various consonant medial types */ + OT_MW = 23, /* Various consonant medial types */ + OT_MY = 24, /* Various consonant medial types */ + OT_PT = 25, /* Pwo and other tones */ + //OT_VAbv = 26, + //OT_VBlw = 27, + //OT_VPre = 28, + //OT_VPst = 29, + OT_VS = 30, /* Variation selectors */ + OT_P = 31, /* Punctuation */ + OT_D = 32, /* Digits except zero */ +}; + + +static inline void +set_myanmar_properties (hb_glyph_info_t &info) +{ + hb_codepoint_t u = info.codepoint; + unsigned int type = hb_indic_get_categories (u); + unsigned int cat = type & 0x7Fu; + indic_position_t pos = (indic_position_t) (type >> 8); + + /* Myanmar + * https://docs.microsoft.com/en-us/typography/script-development/myanmar#analyze + */ + if (unlikely (hb_in_range<hb_codepoint_t> (u, 0xFE00u, 0xFE0Fu))) + cat = OT_VS; + + switch (u) + { + case 0x104Eu: + cat = OT_C; /* The spec says C, IndicSyllableCategory doesn't have. */ + break; + + case 0x002Du: case 0x00A0u: case 0x00D7u: case 0x2012u: + case 0x2013u: case 0x2014u: case 0x2015u: case 0x2022u: + case 0x25CCu: case 0x25FBu: case 0x25FCu: case 0x25FDu: + case 0x25FEu: + cat = OT_GB; + break; + + case 0x1004u: case 0x101Bu: case 0x105Au: + cat = OT_Ra; + break; + + case 0x1032u: case 0x1036u: + cat = OT_A; + break; + + case 0x1039u: + cat = OT_H; + break; + + case 0x103Au: + cat = OT_As; + break; + + case 0x1041u: case 0x1042u: case 0x1043u: case 0x1044u: + case 0x1045u: case 0x1046u: case 0x1047u: case 0x1048u: + case 0x1049u: case 0x1090u: case 0x1091u: case 0x1092u: + case 0x1093u: case 0x1094u: case 0x1095u: case 0x1096u: + case 0x1097u: case 0x1098u: case 0x1099u: + cat = OT_D; + break; + + case 0x1040u: + cat = OT_D; /* XXX The spec says D0, but Uniscribe doesn't seem to do. */ + break; + + case 0x103Eu: case 0x1060u: + cat = OT_MH; + break; + + case 0x103Cu: + cat = OT_MR; + break; + + case 0x103Du: case 0x1082u: + cat = OT_MW; + break; + + case 0x103Bu: case 0x105Eu: case 0x105Fu: + cat = OT_MY; + break; + + case 0x1063u: case 0x1064u: case 0x1069u: case 0x106Au: + case 0x106Bu: case 0x106Cu: case 0x106Du: case 0xAA7Bu: + cat = OT_PT; + break; + + case 0x1038u: case 0x1087u: case 0x1088u: case 0x1089u: + case 0x108Au: case 0x108Bu: case 0x108Cu: case 0x108Du: + case 0x108Fu: case 0x109Au: case 0x109Bu: case 0x109Cu: + cat = OT_SM; + break; + + case 0x104Au: case 0x104Bu: + cat = OT_P; + break; + + case 0xAA74u: case 0xAA75u: case 0xAA76u: + /* https://github.com/harfbuzz/harfbuzz/issues/218 */ + cat = OT_C; + break; + } + + if (cat == OT_M) + { + switch ((int) pos) + { + case POS_PRE_C: cat = (myanmar_category_t) OT_VPre; + pos = POS_PRE_M; break; + case POS_ABOVE_C: cat = (myanmar_category_t) OT_VAbv; break; + case POS_BELOW_C: cat = (myanmar_category_t) OT_VBlw; break; + case POS_POST_C: cat = (myanmar_category_t) OT_VPst; break; + } + } + + info.myanmar_category() = cat; + info.myanmar_position() = pos; +} + + +#endif /* HB_OT_SHAPE_COMPLEX_MYANMAR_HH */ diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-complex-thai.cc b/thirdparty/harfbuzz/src/hb-ot-shape-complex-thai.cc new file mode 100644 index 0000000000..347ea2e7ac --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-shape-complex-thai.cc @@ -0,0 +1,394 @@ +/* + * Copyright © 2010,2012 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#include "hb.hh" + +#ifndef HB_NO_OT_SHAPE + +#include "hb-ot-shape-complex.hh" + + +/* Thai / Lao shaper */ + + +/* PUA shaping */ + + +enum thai_consonant_type_t +{ + NC, + AC, + RC, + DC, + NOT_CONSONANT, + NUM_CONSONANT_TYPES = NOT_CONSONANT +}; + +static thai_consonant_type_t +get_consonant_type (hb_codepoint_t u) +{ + if (u == 0x0E1Bu || u == 0x0E1Du || u == 0x0E1Fu/* || u == 0x0E2Cu*/) + return AC; + if (u == 0x0E0Du || u == 0x0E10u) + return RC; + if (u == 0x0E0Eu || u == 0x0E0Fu) + return DC; + if (hb_in_range<hb_codepoint_t> (u, 0x0E01u, 0x0E2Eu)) + return NC; + return NOT_CONSONANT; +} + + +enum thai_mark_type_t +{ + AV, + BV, + T, + NOT_MARK, + NUM_MARK_TYPES = NOT_MARK +}; + +static thai_mark_type_t +get_mark_type (hb_codepoint_t u) +{ + if (u == 0x0E31u || hb_in_range<hb_codepoint_t> (u, 0x0E34u, 0x0E37u) || + u == 0x0E47u || hb_in_range<hb_codepoint_t> (u, 0x0E4Du, 0x0E4Eu)) + return AV; + if (hb_in_range<hb_codepoint_t> (u, 0x0E38u, 0x0E3Au)) + return BV; + if (hb_in_range<hb_codepoint_t> (u, 0x0E48u, 0x0E4Cu)) + return T; + return NOT_MARK; +} + + +enum thai_action_t +{ + NOP, + SD, /* Shift combining-mark down */ + SL, /* Shift combining-mark left */ + SDL, /* Shift combining-mark down-left */ + RD /* Remove descender from base */ +}; + +static hb_codepoint_t +thai_pua_shape (hb_codepoint_t u, thai_action_t action, hb_font_t *font) +{ + struct thai_pua_mapping_t { + hb_codepoint_t u; + hb_codepoint_t win_pua; + hb_codepoint_t mac_pua; + } const *pua_mappings = nullptr; + static const thai_pua_mapping_t SD_mappings[] = { + {0x0E48u, 0xF70Au, 0xF88Bu}, /* MAI EK */ + {0x0E49u, 0xF70Bu, 0xF88Eu}, /* MAI THO */ + {0x0E4Au, 0xF70Cu, 0xF891u}, /* MAI TRI */ + {0x0E4Bu, 0xF70Du, 0xF894u}, /* MAI CHATTAWA */ + {0x0E4Cu, 0xF70Eu, 0xF897u}, /* THANTHAKHAT */ + {0x0E38u, 0xF718u, 0xF89Bu}, /* SARA U */ + {0x0E39u, 0xF719u, 0xF89Cu}, /* SARA UU */ + {0x0E3Au, 0xF71Au, 0xF89Du}, /* PHINTHU */ + {0x0000u, 0x0000u, 0x0000u} + }; + static const thai_pua_mapping_t SDL_mappings[] = { + {0x0E48u, 0xF705u, 0xF88Cu}, /* MAI EK */ + {0x0E49u, 0xF706u, 0xF88Fu}, /* MAI THO */ + {0x0E4Au, 0xF707u, 0xF892u}, /* MAI TRI */ + {0x0E4Bu, 0xF708u, 0xF895u}, /* MAI CHATTAWA */ + {0x0E4Cu, 0xF709u, 0xF898u}, /* THANTHAKHAT */ + {0x0000u, 0x0000u, 0x0000u} + }; + static const thai_pua_mapping_t SL_mappings[] = { + {0x0E48u, 0xF713u, 0xF88Au}, /* MAI EK */ + {0x0E49u, 0xF714u, 0xF88Du}, /* MAI THO */ + {0x0E4Au, 0xF715u, 0xF890u}, /* MAI TRI */ + {0x0E4Bu, 0xF716u, 0xF893u}, /* MAI CHATTAWA */ + {0x0E4Cu, 0xF717u, 0xF896u}, /* THANTHAKHAT */ + {0x0E31u, 0xF710u, 0xF884u}, /* MAI HAN-AKAT */ + {0x0E34u, 0xF701u, 0xF885u}, /* SARA I */ + {0x0E35u, 0xF702u, 0xF886u}, /* SARA II */ + {0x0E36u, 0xF703u, 0xF887u}, /* SARA UE */ + {0x0E37u, 0xF704u, 0xF888u}, /* SARA UEE */ + {0x0E47u, 0xF712u, 0xF889u}, /* MAITAIKHU */ + {0x0E4Du, 0xF711u, 0xF899u}, /* NIKHAHIT */ + {0x0000u, 0x0000u, 0x0000u} + }; + static const thai_pua_mapping_t RD_mappings[] = { + {0x0E0Du, 0xF70Fu, 0xF89Au}, /* YO YING */ + {0x0E10u, 0xF700u, 0xF89Eu}, /* THO THAN */ + {0x0000u, 0x0000u, 0x0000u} + }; + + switch (action) { + case NOP: return u; + case SD: pua_mappings = SD_mappings; break; + case SDL: pua_mappings = SDL_mappings; break; + case SL: pua_mappings = SL_mappings; break; + case RD: pua_mappings = RD_mappings; break; + } + for (; pua_mappings->u; pua_mappings++) + if (pua_mappings->u == u) + { + hb_codepoint_t glyph; + if (hb_font_get_glyph (font, pua_mappings->win_pua, 0, &glyph)) + return pua_mappings->win_pua; + if (hb_font_get_glyph (font, pua_mappings->mac_pua, 0, &glyph)) + return pua_mappings->mac_pua; + break; + } + return u; +} + + +static enum thai_above_state_t +{ /* Cluster above looks like: */ + T0, /* ⣤ */ + T1, /* ⣼ */ + T2, /* ⣾ */ + T3, /* ⣿ */ + NUM_ABOVE_STATES +} thai_above_start_state[NUM_CONSONANT_TYPES + 1/* For NOT_CONSONANT */] = +{ + T0, /* NC */ + T1, /* AC */ + T0, /* RC */ + T0, /* DC */ + T3, /* NOT_CONSONANT */ +}; + +static const struct thai_above_state_machine_edge_t { + thai_action_t action; + thai_above_state_t next_state; +} thai_above_state_machine[NUM_ABOVE_STATES][NUM_MARK_TYPES] = +{ /*AV*/ /*BV*/ /*T*/ +/*T0*/ {{NOP,T3}, {NOP,T0}, {SD, T3}}, +/*T1*/ {{SL, T2}, {NOP,T1}, {SDL,T2}}, +/*T2*/ {{NOP,T3}, {NOP,T2}, {SL, T3}}, +/*T3*/ {{NOP,T3}, {NOP,T3}, {NOP,T3}}, +}; + + +static enum thai_below_state_t +{ + B0, /* No descender */ + B1, /* Removable descender */ + B2, /* Strict descender */ + NUM_BELOW_STATES +} thai_below_start_state[NUM_CONSONANT_TYPES + 1/* For NOT_CONSONANT */] = +{ + B0, /* NC */ + B0, /* AC */ + B1, /* RC */ + B2, /* DC */ + B2, /* NOT_CONSONANT */ +}; + +static const struct thai_below_state_machine_edge_t { + thai_action_t action; + thai_below_state_t next_state; +} thai_below_state_machine[NUM_BELOW_STATES][NUM_MARK_TYPES] = +{ /*AV*/ /*BV*/ /*T*/ +/*B0*/ {{NOP,B0}, {NOP,B2}, {NOP, B0}}, +/*B1*/ {{NOP,B1}, {RD, B2}, {NOP, B1}}, +/*B2*/ {{NOP,B2}, {SD, B2}, {NOP, B2}}, +}; + + +static void +do_thai_pua_shaping (const hb_ot_shape_plan_t *plan HB_UNUSED, + hb_buffer_t *buffer, + hb_font_t *font) +{ +#ifdef HB_NO_OT_SHAPE_COMPLEX_THAI_FALLBACK + return; +#endif + + thai_above_state_t above_state = thai_above_start_state[NOT_CONSONANT]; + thai_below_state_t below_state = thai_below_start_state[NOT_CONSONANT]; + unsigned int base = 0; + + hb_glyph_info_t *info = buffer->info; + unsigned int count = buffer->len; + for (unsigned int i = 0; i < count; i++) + { + thai_mark_type_t mt = get_mark_type (info[i].codepoint); + + if (mt == NOT_MARK) { + thai_consonant_type_t ct = get_consonant_type (info[i].codepoint); + above_state = thai_above_start_state[ct]; + below_state = thai_below_start_state[ct]; + base = i; + continue; + } + + const thai_above_state_machine_edge_t &above_edge = thai_above_state_machine[above_state][mt]; + const thai_below_state_machine_edge_t &below_edge = thai_below_state_machine[below_state][mt]; + above_state = above_edge.next_state; + below_state = below_edge.next_state; + + /* At least one of the above/below actions is NOP. */ + thai_action_t action = above_edge.action != NOP ? above_edge.action : below_edge.action; + + buffer->unsafe_to_break (base, i); + if (action == RD) + info[base].codepoint = thai_pua_shape (info[base].codepoint, action, font); + else + info[i].codepoint = thai_pua_shape (info[i].codepoint, action, font); + } +} + + +static void +preprocess_text_thai (const hb_ot_shape_plan_t *plan, + hb_buffer_t *buffer, + hb_font_t *font) +{ + /* This function implements the shaping logic documented here: + * + * https://linux.thai.net/~thep/th-otf/shaping.html + * + * The first shaping rule listed there is needed even if the font has Thai + * OpenType tables. The rest do fallback positioning based on PUA codepoints. + * We implement that only if there exist no Thai GSUB in the font. + */ + + /* The following is NOT specified in the MS OT Thai spec, however, it seems + * to be what Uniscribe and other engines implement. According to Eric Muller: + * + * When you have a SARA AM, decompose it in NIKHAHIT + SARA AA, *and* move the + * NIKHAHIT backwards over any tone mark (0E48-0E4B). + * + * <0E14, 0E4B, 0E33> -> <0E14, 0E4D, 0E4B, 0E32> + * + * This reordering is legit only when the NIKHAHIT comes from a SARA AM, not + * when it's there to start with. The string <0E14, 0E4B, 0E4D> is probably + * not what a user wanted, but the rendering is nevertheless nikhahit above + * chattawa. + * + * Same for Lao. + * + * Note: + * + * Uniscribe also does some below-marks reordering. Namely, it positions U+0E3A + * after U+0E38 and U+0E39. We do that by modifying the ccc for U+0E3A. + * See unicode->modified_combining_class (). Lao does NOT have a U+0E3A + * equivalent. + */ + + + /* + * Here are the characters of significance: + * + * Thai Lao + * SARA AM: U+0E33 U+0EB3 + * SARA AA: U+0E32 U+0EB2 + * Nikhahit: U+0E4D U+0ECD + * + * Testing shows that Uniscribe reorder the following marks: + * Thai: <0E31,0E34..0E37,0E47..0E4E> + * Lao: <0EB1,0EB4..0EB7,0EC7..0ECE> + * + * Note how the Lao versions are the same as Thai + 0x80. + */ + + /* We only get one script at a time, so a script-agnostic implementation + * is adequate here. */ +#define IS_SARA_AM(x) (((x) & ~0x0080u) == 0x0E33u) +#define NIKHAHIT_FROM_SARA_AM(x) ((x) - 0x0E33u + 0x0E4Du) +#define SARA_AA_FROM_SARA_AM(x) ((x) - 1) +#define IS_TONE_MARK(x) (hb_in_ranges<hb_codepoint_t> ((x) & ~0x0080u, 0x0E34u, 0x0E37u, 0x0E47u, 0x0E4Eu, 0x0E31u, 0x0E31u)) + + buffer->clear_output (); + unsigned int count = buffer->len; + for (buffer->idx = 0; buffer->idx < count && buffer->successful;) + { + hb_codepoint_t u = buffer->cur().codepoint; + if (likely (!IS_SARA_AM (u))) { + buffer->next_glyph (); + continue; + } + + /* Is SARA AM. Decompose and reorder. */ + hb_glyph_info_t &nikhahit = buffer->output_glyph (NIKHAHIT_FROM_SARA_AM (u)); + _hb_glyph_info_set_continuation (&nikhahit); + buffer->replace_glyph (SARA_AA_FROM_SARA_AM (u)); + if (unlikely (!buffer->successful)) + return; + + /* Make Nikhahit be recognized as a ccc=0 mark when zeroing widths. */ + unsigned int end = buffer->out_len; + _hb_glyph_info_set_general_category (&buffer->out_info[end - 2], HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK); + + /* Ok, let's see... */ + unsigned int start = end - 2; + while (start > 0 && IS_TONE_MARK (buffer->out_info[start - 1].codepoint)) + start--; + + if (start + 2 < end) + { + /* Move Nikhahit (end-2) to the beginning */ + buffer->merge_out_clusters (start, end); + hb_glyph_info_t t = buffer->out_info[end - 2]; + memmove (buffer->out_info + start + 1, + buffer->out_info + start, + sizeof (buffer->out_info[0]) * (end - start - 2)); + buffer->out_info[start] = t; + } + else + { + /* Since we decomposed, and NIKHAHIT is combining, merge clusters with the + * previous cluster. */ + if (start && buffer->cluster_level == HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES) + buffer->merge_out_clusters (start - 1, end); + } + } + buffer->swap_buffers (); + + /* If font has Thai GSUB, we are done. */ + if (plan->props.script == HB_SCRIPT_THAI && !plan->map.found_script[0]) + do_thai_pua_shaping (plan, buffer, font); +} + +const hb_ot_complex_shaper_t _hb_ot_complex_shaper_thai = +{ + nullptr, /* collect_features */ + nullptr, /* override_features */ + nullptr, /* data_create */ + nullptr, /* data_destroy */ + preprocess_text_thai, + nullptr, /* postprocess_glyphs */ + HB_OT_SHAPE_NORMALIZATION_MODE_DEFAULT, + nullptr, /* decompose */ + nullptr, /* compose */ + nullptr, /* setup_masks */ + HB_TAG_NONE, /* gpos_tag */ + nullptr, /* reorder_marks */ + HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_LATE, + false,/* fallback_position */ +}; + + +#endif diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-complex-use-machine.hh b/thirdparty/harfbuzz/src/hb-ot-shape-complex-use-machine.hh new file mode 100644 index 0000000000..462342c618 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-shape-complex-use-machine.hh @@ -0,0 +1,562 @@ + +#line 1 "hb-ot-shape-complex-use-machine.rl" +/* + * Copyright © 2015 Mozilla Foundation. + * Copyright © 2015 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Mozilla Author(s): Jonathan Kew + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_OT_SHAPE_COMPLEX_USE_MACHINE_HH +#define HB_OT_SHAPE_COMPLEX_USE_MACHINE_HH + +#include "hb.hh" + + +#line 38 "hb-ot-shape-complex-use-machine.hh" +static const unsigned char _use_syllable_machine_trans_keys[] = { + 12u, 48u, 1u, 15u, 1u, 1u, 12u, 48u, 1u, 1u, 0u, 48u, 21u, 21u, 11u, 48u, + 11u, 48u, 1u, 15u, 1u, 1u, 11u, 48u, 22u, 48u, 23u, 48u, 24u, 47u, 25u, 47u, + 26u, 47u, 45u, 46u, 46u, 46u, 24u, 48u, 24u, 48u, 24u, 48u, 1u, 1u, 24u, 48u, + 23u, 48u, 23u, 48u, 23u, 48u, 22u, 48u, 22u, 48u, 22u, 48u, 22u, 48u, 11u, 48u, + 1u, 48u, 11u, 48u, 13u, 21u, 4u, 4u, 13u, 13u, 11u, 48u, 11u, 48u, 41u, 42u, + 42u, 42u, 11u, 48u, 11u, 48u, 22u, 48u, 23u, 48u, 24u, 47u, 25u, 47u, 26u, 47u, + 45u, 46u, 46u, 46u, 24u, 48u, 24u, 48u, 24u, 48u, 24u, 48u, 23u, 48u, 23u, 48u, + 23u, 48u, 22u, 48u, 22u, 48u, 22u, 48u, 22u, 48u, 11u, 48u, 1u, 48u, 1u, 15u, + 4u, 4u, 13u, 21u, 13u, 13u, 12u, 48u, 1u, 48u, 11u, 48u, 41u, 42u, 42u, 42u, + 21u, 42u, 1u, 5u, 0 +}; + +static const char _use_syllable_machine_key_spans[] = { + 37, 15, 1, 37, 1, 49, 1, 38, + 38, 15, 1, 38, 27, 26, 24, 23, + 22, 2, 1, 25, 25, 25, 1, 25, + 26, 26, 26, 27, 27, 27, 27, 38, + 48, 38, 9, 1, 1, 38, 38, 2, + 1, 38, 38, 27, 26, 24, 23, 22, + 2, 1, 25, 25, 25, 25, 26, 26, + 26, 27, 27, 27, 27, 38, 48, 15, + 1, 9, 1, 37, 48, 38, 2, 1, + 22, 5 +}; + +static const short _use_syllable_machine_index_offsets[] = { + 0, 38, 54, 56, 94, 96, 146, 148, + 187, 226, 242, 244, 283, 311, 338, 363, + 387, 410, 413, 415, 441, 467, 493, 495, + 521, 548, 575, 602, 630, 658, 686, 714, + 753, 802, 841, 851, 853, 855, 894, 933, + 936, 938, 977, 1016, 1044, 1071, 1096, 1120, + 1143, 1146, 1148, 1174, 1200, 1226, 1252, 1279, + 1306, 1333, 1361, 1389, 1417, 1445, 1484, 1533, + 1549, 1551, 1561, 1563, 1601, 1650, 1689, 1692, + 1694, 1717 +}; + +static const char _use_syllable_machine_indicies[] = { + 1, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 1, 0, 3, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 4, 2, 3, 2, + 6, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, + 6, 5, 5, 5, 6, 5, 7, 5, + 8, 9, 10, 8, 11, 12, 10, 10, + 10, 10, 10, 3, 13, 14, 10, 15, + 8, 8, 16, 17, 10, 10, 18, 19, + 20, 21, 22, 23, 24, 18, 25, 26, + 27, 28, 29, 30, 10, 31, 32, 33, + 10, 34, 35, 36, 37, 38, 39, 40, + 13, 10, 42, 41, 44, 1, 43, 43, + 45, 43, 43, 43, 43, 43, 46, 47, + 48, 49, 50, 51, 52, 53, 47, 54, + 46, 55, 56, 57, 58, 43, 59, 60, + 61, 43, 43, 43, 43, 62, 63, 64, + 65, 1, 43, 44, 1, 43, 43, 45, + 43, 43, 43, 43, 43, 66, 47, 48, + 49, 50, 51, 52, 53, 47, 54, 55, + 55, 56, 57, 58, 43, 59, 60, 61, + 43, 43, 43, 43, 62, 63, 64, 65, + 1, 43, 44, 67, 67, 67, 67, 67, + 67, 67, 67, 67, 67, 67, 67, 67, + 68, 67, 44, 67, 44, 1, 43, 43, + 45, 43, 43, 43, 43, 43, 43, 47, + 48, 49, 50, 51, 52, 53, 47, 54, + 55, 55, 56, 57, 58, 43, 59, 60, + 61, 43, 43, 43, 43, 62, 63, 64, + 65, 1, 43, 47, 48, 49, 50, 51, + 43, 43, 43, 43, 43, 43, 56, 57, + 58, 43, 59, 60, 61, 43, 43, 43, + 43, 48, 63, 64, 65, 69, 43, 48, + 49, 50, 51, 43, 43, 43, 43, 43, + 43, 43, 43, 43, 43, 59, 60, 61, + 43, 43, 43, 43, 43, 63, 64, 65, + 69, 43, 49, 50, 51, 43, 43, 43, + 43, 43, 43, 43, 43, 43, 43, 43, + 43, 43, 43, 43, 43, 43, 43, 63, + 64, 65, 43, 50, 51, 43, 43, 43, + 43, 43, 43, 43, 43, 43, 43, 43, + 43, 43, 43, 43, 43, 43, 43, 63, + 64, 65, 43, 51, 43, 43, 43, 43, + 43, 43, 43, 43, 43, 43, 43, 43, + 43, 43, 43, 43, 43, 43, 63, 64, + 65, 43, 63, 64, 43, 64, 43, 49, + 50, 51, 43, 43, 43, 43, 43, 43, + 43, 43, 43, 43, 59, 60, 61, 43, + 43, 43, 43, 43, 63, 64, 65, 69, + 43, 49, 50, 51, 43, 43, 43, 43, + 43, 43, 43, 43, 43, 43, 43, 60, + 61, 43, 43, 43, 43, 43, 63, 64, + 65, 69, 43, 49, 50, 51, 43, 43, + 43, 43, 43, 43, 43, 43, 43, 43, + 43, 43, 61, 43, 43, 43, 43, 43, + 63, 64, 65, 69, 43, 71, 70, 49, + 50, 51, 43, 43, 43, 43, 43, 43, + 43, 43, 43, 43, 43, 43, 43, 43, + 43, 43, 43, 43, 63, 64, 65, 69, + 43, 48, 49, 50, 51, 43, 43, 43, + 43, 43, 43, 56, 57, 58, 43, 59, + 60, 61, 43, 43, 43, 43, 48, 63, + 64, 65, 69, 43, 48, 49, 50, 51, + 43, 43, 43, 43, 43, 43, 43, 57, + 58, 43, 59, 60, 61, 43, 43, 43, + 43, 48, 63, 64, 65, 69, 43, 48, + 49, 50, 51, 43, 43, 43, 43, 43, + 43, 43, 43, 58, 43, 59, 60, 61, + 43, 43, 43, 43, 48, 63, 64, 65, + 69, 43, 47, 48, 49, 50, 51, 43, + 53, 47, 43, 43, 43, 56, 57, 58, + 43, 59, 60, 61, 43, 43, 43, 43, + 48, 63, 64, 65, 69, 43, 47, 48, + 49, 50, 51, 43, 72, 47, 43, 43, + 43, 56, 57, 58, 43, 59, 60, 61, + 43, 43, 43, 43, 48, 63, 64, 65, + 69, 43, 47, 48, 49, 50, 51, 43, + 43, 47, 43, 43, 43, 56, 57, 58, + 43, 59, 60, 61, 43, 43, 43, 43, + 48, 63, 64, 65, 69, 43, 47, 48, + 49, 50, 51, 52, 53, 47, 43, 43, + 43, 56, 57, 58, 43, 59, 60, 61, + 43, 43, 43, 43, 48, 63, 64, 65, + 69, 43, 44, 1, 43, 43, 45, 43, + 43, 43, 43, 43, 43, 47, 48, 49, + 50, 51, 52, 53, 47, 54, 43, 55, + 56, 57, 58, 43, 59, 60, 61, 43, + 43, 43, 43, 62, 63, 64, 65, 1, + 43, 44, 67, 67, 67, 67, 67, 67, + 67, 67, 67, 67, 67, 67, 67, 68, + 67, 67, 67, 67, 67, 67, 67, 48, + 49, 50, 51, 67, 67, 67, 67, 67, + 67, 67, 67, 67, 67, 59, 60, 61, + 67, 67, 67, 67, 67, 63, 64, 65, + 69, 67, 44, 1, 43, 43, 45, 43, + 43, 43, 43, 43, 43, 47, 48, 49, + 50, 51, 52, 53, 47, 54, 46, 55, + 56, 57, 58, 43, 59, 60, 61, 43, + 43, 43, 43, 62, 63, 64, 65, 1, + 43, 74, 73, 73, 73, 73, 73, 73, + 73, 75, 73, 11, 76, 74, 73, 44, + 1, 43, 43, 45, 43, 43, 43, 43, + 43, 77, 47, 48, 49, 50, 51, 52, + 53, 47, 54, 46, 55, 56, 57, 58, + 43, 59, 60, 61, 43, 78, 79, 43, + 62, 63, 64, 65, 1, 43, 44, 1, + 43, 43, 45, 43, 43, 43, 43, 43, + 43, 47, 48, 49, 50, 51, 52, 53, + 47, 54, 46, 55, 56, 57, 58, 43, + 59, 60, 61, 43, 78, 79, 43, 62, + 63, 64, 65, 1, 43, 78, 79, 80, + 79, 80, 3, 6, 81, 81, 82, 81, + 81, 81, 81, 81, 83, 18, 19, 20, + 21, 22, 23, 24, 18, 25, 27, 27, + 28, 29, 30, 81, 31, 32, 33, 81, + 81, 81, 81, 37, 38, 39, 40, 6, + 81, 3, 6, 81, 81, 82, 81, 81, + 81, 81, 81, 81, 18, 19, 20, 21, + 22, 23, 24, 18, 25, 27, 27, 28, + 29, 30, 81, 31, 32, 33, 81, 81, + 81, 81, 37, 38, 39, 40, 6, 81, + 18, 19, 20, 21, 22, 81, 81, 81, + 81, 81, 81, 28, 29, 30, 81, 31, + 32, 33, 81, 81, 81, 81, 19, 38, + 39, 40, 84, 81, 19, 20, 21, 22, + 81, 81, 81, 81, 81, 81, 81, 81, + 81, 81, 31, 32, 33, 81, 81, 81, + 81, 81, 38, 39, 40, 84, 81, 20, + 21, 22, 81, 81, 81, 81, 81, 81, + 81, 81, 81, 81, 81, 81, 81, 81, + 81, 81, 81, 81, 38, 39, 40, 81, + 21, 22, 81, 81, 81, 81, 81, 81, + 81, 81, 81, 81, 81, 81, 81, 81, + 81, 81, 81, 81, 38, 39, 40, 81, + 22, 81, 81, 81, 81, 81, 81, 81, + 81, 81, 81, 81, 81, 81, 81, 81, + 81, 81, 81, 38, 39, 40, 81, 38, + 39, 81, 39, 81, 20, 21, 22, 81, + 81, 81, 81, 81, 81, 81, 81, 81, + 81, 31, 32, 33, 81, 81, 81, 81, + 81, 38, 39, 40, 84, 81, 20, 21, + 22, 81, 81, 81, 81, 81, 81, 81, + 81, 81, 81, 81, 32, 33, 81, 81, + 81, 81, 81, 38, 39, 40, 84, 81, + 20, 21, 22, 81, 81, 81, 81, 81, + 81, 81, 81, 81, 81, 81, 81, 33, + 81, 81, 81, 81, 81, 38, 39, 40, + 84, 81, 20, 21, 22, 81, 81, 81, + 81, 81, 81, 81, 81, 81, 81, 81, + 81, 81, 81, 81, 81, 81, 81, 38, + 39, 40, 84, 81, 19, 20, 21, 22, + 81, 81, 81, 81, 81, 81, 28, 29, + 30, 81, 31, 32, 33, 81, 81, 81, + 81, 19, 38, 39, 40, 84, 81, 19, + 20, 21, 22, 81, 81, 81, 81, 81, + 81, 81, 29, 30, 81, 31, 32, 33, + 81, 81, 81, 81, 19, 38, 39, 40, + 84, 81, 19, 20, 21, 22, 81, 81, + 81, 81, 81, 81, 81, 81, 30, 81, + 31, 32, 33, 81, 81, 81, 81, 19, + 38, 39, 40, 84, 81, 18, 19, 20, + 21, 22, 81, 24, 18, 81, 81, 81, + 28, 29, 30, 81, 31, 32, 33, 81, + 81, 81, 81, 19, 38, 39, 40, 84, + 81, 18, 19, 20, 21, 22, 81, 85, + 18, 81, 81, 81, 28, 29, 30, 81, + 31, 32, 33, 81, 81, 81, 81, 19, + 38, 39, 40, 84, 81, 18, 19, 20, + 21, 22, 81, 81, 18, 81, 81, 81, + 28, 29, 30, 81, 31, 32, 33, 81, + 81, 81, 81, 19, 38, 39, 40, 84, + 81, 18, 19, 20, 21, 22, 23, 24, + 18, 81, 81, 81, 28, 29, 30, 81, + 31, 32, 33, 81, 81, 81, 81, 19, + 38, 39, 40, 84, 81, 3, 6, 81, + 81, 82, 81, 81, 81, 81, 81, 81, + 18, 19, 20, 21, 22, 23, 24, 18, + 25, 81, 27, 28, 29, 30, 81, 31, + 32, 33, 81, 81, 81, 81, 37, 38, + 39, 40, 6, 81, 3, 81, 81, 81, + 81, 81, 81, 81, 81, 81, 81, 81, + 81, 81, 4, 81, 81, 81, 81, 81, + 81, 81, 19, 20, 21, 22, 81, 81, + 81, 81, 81, 81, 81, 81, 81, 81, + 31, 32, 33, 81, 81, 81, 81, 81, + 38, 39, 40, 84, 81, 3, 86, 86, + 86, 86, 86, 86, 86, 86, 86, 86, + 86, 86, 86, 4, 86, 87, 81, 14, + 81, 81, 81, 81, 81, 81, 81, 88, + 81, 14, 81, 6, 86, 86, 86, 86, + 86, 86, 86, 86, 86, 86, 86, 86, + 86, 86, 86, 86, 86, 86, 86, 86, + 86, 86, 86, 86, 86, 86, 86, 86, + 86, 86, 86, 6, 86, 86, 86, 6, + 86, 9, 81, 81, 81, 9, 81, 81, + 81, 81, 81, 3, 6, 14, 81, 82, + 81, 81, 81, 81, 81, 81, 18, 19, + 20, 21, 22, 23, 24, 18, 25, 26, + 27, 28, 29, 30, 81, 31, 32, 33, + 81, 34, 35, 81, 37, 38, 39, 40, + 6, 81, 3, 6, 81, 81, 82, 81, + 81, 81, 81, 81, 81, 18, 19, 20, + 21, 22, 23, 24, 18, 25, 26, 27, + 28, 29, 30, 81, 31, 32, 33, 81, + 81, 81, 81, 37, 38, 39, 40, 6, + 81, 34, 35, 81, 35, 81, 78, 80, + 80, 80, 80, 80, 80, 80, 80, 80, + 80, 80, 80, 80, 80, 80, 80, 80, + 80, 80, 78, 79, 80, 9, 86, 86, + 86, 9, 86, 0 +}; + +static const char _use_syllable_machine_trans_targs[] = { + 5, 9, 5, 41, 2, 5, 1, 53, + 6, 7, 5, 34, 37, 63, 64, 67, + 68, 72, 43, 44, 45, 46, 47, 57, + 58, 60, 69, 61, 54, 55, 56, 50, + 51, 52, 70, 71, 73, 62, 48, 49, + 5, 5, 5, 5, 8, 0, 33, 12, + 13, 14, 15, 16, 27, 28, 30, 31, + 24, 25, 26, 19, 20, 21, 32, 17, + 18, 5, 11, 5, 10, 22, 5, 23, + 29, 5, 35, 36, 5, 38, 39, 40, + 5, 5, 3, 42, 4, 59, 5, 65, + 66 +}; + +static const char _use_syllable_machine_trans_actions[] = { + 1, 0, 2, 3, 0, 4, 0, 5, + 0, 5, 8, 0, 5, 9, 0, 9, + 3, 0, 5, 5, 0, 0, 0, 5, + 5, 5, 3, 3, 5, 5, 5, 5, + 5, 5, 0, 0, 0, 3, 0, 0, + 10, 11, 12, 13, 5, 0, 5, 0, + 0, 0, 0, 0, 0, 0, 0, 5, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 14, 5, 15, 0, 0, 16, 0, + 0, 17, 0, 0, 18, 5, 0, 0, + 19, 20, 0, 3, 0, 5, 21, 0, + 0 +}; + +static const char _use_syllable_machine_to_state_actions[] = { + 0, 0, 0, 0, 0, 6, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0 +}; + +static const char _use_syllable_machine_from_state_actions[] = { + 0, 0, 0, 0, 0, 7, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0 +}; + +static const short _use_syllable_machine_eof_trans[] = { + 1, 3, 3, 6, 6, 0, 42, 44, + 44, 68, 68, 44, 44, 44, 44, 44, + 44, 44, 44, 44, 44, 44, 71, 44, + 44, 44, 44, 44, 44, 44, 44, 44, + 68, 44, 74, 77, 74, 44, 44, 81, + 81, 82, 82, 82, 82, 82, 82, 82, + 82, 82, 82, 82, 82, 82, 82, 82, + 82, 82, 82, 82, 82, 82, 82, 87, + 82, 82, 82, 87, 82, 82, 82, 82, + 81, 87 +}; + +static const int use_syllable_machine_start = 5; +static const int use_syllable_machine_first_final = 5; +static const int use_syllable_machine_error = -1; + +static const int use_syllable_machine_en_main = 5; + + +#line 38 "hb-ot-shape-complex-use-machine.rl" + + + +#line 162 "hb-ot-shape-complex-use-machine.rl" + + +#define found_syllable(syllable_type) \ + HB_STMT_START { \ + if (0) fprintf (stderr, "syllable %d..%d %s\n", ts, te, #syllable_type); \ + for (unsigned int i = ts; i < te; i++) \ + info[i].syllable() = (syllable_serial << 4) | use_##syllable_type; \ + syllable_serial++; \ + if (unlikely (syllable_serial == 16)) syllable_serial = 1; \ + } HB_STMT_END + +static void +find_syllables_use (hb_buffer_t *buffer) +{ + unsigned int p, pe, eof, ts, te, act; + int cs; + hb_glyph_info_t *info = buffer->info; + +#line 396 "hb-ot-shape-complex-use-machine.hh" + { + cs = use_syllable_machine_start; + ts = 0; + te = 0; + act = 0; + } + +#line 182 "hb-ot-shape-complex-use-machine.rl" + + + p = 0; + pe = eof = buffer->len; + + unsigned int syllable_serial = 1; + +#line 412 "hb-ot-shape-complex-use-machine.hh" + { + int _slen; + int _trans; + const unsigned char *_keys; + const char *_inds; + if ( p == pe ) + goto _test_eof; +_resume: + switch ( _use_syllable_machine_from_state_actions[cs] ) { + case 7: +#line 1 "NONE" + {ts = p;} + break; +#line 426 "hb-ot-shape-complex-use-machine.hh" + } + + _keys = _use_syllable_machine_trans_keys + (cs<<1); + _inds = _use_syllable_machine_indicies + _use_syllable_machine_index_offsets[cs]; + + _slen = _use_syllable_machine_key_spans[cs]; + _trans = _inds[ _slen > 0 && _keys[0] <=( info[p].use_category()) && + ( info[p].use_category()) <= _keys[1] ? + ( info[p].use_category()) - _keys[0] : _slen ]; + +_eof_trans: + cs = _use_syllable_machine_trans_targs[_trans]; + + if ( _use_syllable_machine_trans_actions[_trans] == 0 ) + goto _again; + + switch ( _use_syllable_machine_trans_actions[_trans] ) { + case 5: +#line 1 "NONE" + {te = p+1;} + break; + case 12: +#line 150 "hb-ot-shape-complex-use-machine.rl" + {te = p+1;{ found_syllable (independent_cluster); }} + break; + case 14: +#line 153 "hb-ot-shape-complex-use-machine.rl" + {te = p+1;{ found_syllable (standard_cluster); }} + break; + case 10: +#line 157 "hb-ot-shape-complex-use-machine.rl" + {te = p+1;{ found_syllable (broken_cluster); }} + break; + case 8: +#line 158 "hb-ot-shape-complex-use-machine.rl" + {te = p+1;{ found_syllable (non_cluster); }} + break; + case 11: +#line 150 "hb-ot-shape-complex-use-machine.rl" + {te = p;p--;{ found_syllable (independent_cluster); }} + break; + case 15: +#line 151 "hb-ot-shape-complex-use-machine.rl" + {te = p;p--;{ found_syllable (virama_terminated_cluster); }} + break; + case 16: +#line 152 "hb-ot-shape-complex-use-machine.rl" + {te = p;p--;{ found_syllable (sakot_terminated_cluster); }} + break; + case 13: +#line 153 "hb-ot-shape-complex-use-machine.rl" + {te = p;p--;{ found_syllable (standard_cluster); }} + break; + case 18: +#line 154 "hb-ot-shape-complex-use-machine.rl" + {te = p;p--;{ found_syllable (number_joiner_terminated_cluster); }} + break; + case 17: +#line 155 "hb-ot-shape-complex-use-machine.rl" + {te = p;p--;{ found_syllable (numeral_cluster); }} + break; + case 19: +#line 156 "hb-ot-shape-complex-use-machine.rl" + {te = p;p--;{ found_syllable (symbol_cluster); }} + break; + case 20: +#line 157 "hb-ot-shape-complex-use-machine.rl" + {te = p;p--;{ found_syllable (broken_cluster); }} + break; + case 21: +#line 158 "hb-ot-shape-complex-use-machine.rl" + {te = p;p--;{ found_syllable (non_cluster); }} + break; + case 1: +#line 153 "hb-ot-shape-complex-use-machine.rl" + {{p = ((te))-1;}{ found_syllable (standard_cluster); }} + break; + case 4: +#line 157 "hb-ot-shape-complex-use-machine.rl" + {{p = ((te))-1;}{ found_syllable (broken_cluster); }} + break; + case 2: +#line 1 "NONE" + { switch( act ) { + case 8: + {{p = ((te))-1;} found_syllable (broken_cluster); } + break; + case 9: + {{p = ((te))-1;} found_syllable (non_cluster); } + break; + } + } + break; + case 3: +#line 1 "NONE" + {te = p+1;} +#line 157 "hb-ot-shape-complex-use-machine.rl" + {act = 8;} + break; + case 9: +#line 1 "NONE" + {te = p+1;} +#line 158 "hb-ot-shape-complex-use-machine.rl" + {act = 9;} + break; +#line 532 "hb-ot-shape-complex-use-machine.hh" + } + +_again: + switch ( _use_syllable_machine_to_state_actions[cs] ) { + case 6: +#line 1 "NONE" + {ts = 0;} + break; +#line 541 "hb-ot-shape-complex-use-machine.hh" + } + + if ( ++p != pe ) + goto _resume; + _test_eof: {} + if ( p == eof ) + { + if ( _use_syllable_machine_eof_trans[cs] > 0 ) { + _trans = _use_syllable_machine_eof_trans[cs] - 1; + goto _eof_trans; + } + } + + } + +#line 190 "hb-ot-shape-complex-use-machine.rl" + +} + +#undef found_syllable + +#endif /* HB_OT_SHAPE_COMPLEX_USE_MACHINE_HH */ diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-complex-use-table.cc b/thirdparty/harfbuzz/src/hb-ot-shape-complex-use-table.cc new file mode 100644 index 0000000000..aa9c350862 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-shape-complex-use-table.cc @@ -0,0 +1,873 @@ +/* == Start of generated table == */ +/* + * The following table is generated by running: + * + * ./gen-use-table.py IndicSyllabicCategory.txt IndicPositionalCategory.txt UnicodeData.txt Blocks.txt + * + * on files with these headers: + * + * # IndicSyllabicCategory-13.0.0.txt + * # Date: 2019-07-22, 19:55:00 GMT [KW, RP] + * # IndicPositionalCategory-13.0.0.txt + * # Date: 2019-07-23, 00:01:00 GMT [KW, RP] + * # Blocks-13.0.0.txt + * # Date: 2019-07-10, 19:06:00 GMT [KW] + * UnicodeData.txt does not have a header. + */ + +#include "hb.hh" + +#ifndef HB_NO_OT_SHAPE + +#include "hb-ot-shape-complex-use.hh" + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-macros" +#define B USE_B /* BASE */ +#define CGJ USE_CGJ /* CGJ */ +#define CS USE_CS /* CONS_WITH_STACKER */ +#define GB USE_GB /* BASE_OTHER */ +#define H USE_H /* HALANT */ +#define HN USE_HN /* HALANT_NUM */ +#define HVM USE_HVM /* HALANT_OR_VOWEL_MODIFIER */ +#define IND USE_IND /* BASE_IND */ +#define N USE_N /* BASE_NUM */ +#define O USE_O /* OTHER */ +#define R USE_R /* REPHA */ +#define Rsv USE_Rsv /* Reserved */ +#define S USE_S /* SYM */ +#define SUB USE_SUB /* CONS_SUB */ +#define Sk USE_Sk /* SAKOT */ +#define VS USE_VS /* VARIATION_SELECTOR */ +#define WJ USE_WJ /* Word_Joiner */ +#define ZWJ USE_ZWJ /* ZWJ */ +#define ZWNJ USE_ZWNJ /* ZWNJ */ +#define CMAbv USE_CMAbv +#define CMBlw USE_CMBlw +#define FAbv USE_FAbv +#define FBlw USE_FBlw +#define FPst USE_FPst +#define FMAbv USE_FMAbv +#define FMBlw USE_FMBlw +#define FMPst USE_FMPst +#define MAbv USE_MAbv +#define MBlw USE_MBlw +#define MPst USE_MPst +#define MPre USE_MPre +#define SMAbv USE_SMAbv +#define SMBlw USE_SMBlw +#define VAbv USE_VAbv +#define VBlw USE_VBlw +#define VPst USE_VPst +#define VPre USE_VPre +#define VMAbv USE_VMAbv +#define VMBlw USE_VMBlw +#define VMPst USE_VMPst +#define VMPre USE_VMPre +#pragma GCC diagnostic pop + +static const USE_TABLE_ELEMENT_TYPE use_table[] = { + + +#define use_offset_0x0028u 0 + + + /* Basic Latin */ + O, O, O, O, O, GB, O, O, + /* 0030 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O, + +#define use_offset_0x00a0u 24 + + + /* Latin-1 Supplement */ + + /* 00A0 */ GB, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, + /* 00B0 */ O, O, FMPst, FMPst, O, O, O, O, O, O, O, O, O, O, O, O, + /* 00C0 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, + /* 00D0 */ O, O, O, O, O, O, O, GB, + +#define use_offset_0x0348u 80 + + + /* Combining Diacritical Marks */ + O, O, O, O, O, O, O, CGJ, + +#define use_offset_0x0900u 88 + + + /* Devanagari */ + + /* 0900 */ VMAbv, VMAbv, VMAbv, VMPst, B, B, B, B, B, B, B, B, B, B, B, B, + /* 0910 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 0920 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 0930 */ B, B, B, B, B, B, B, B, B, B, VAbv, VPst, CMBlw, B, VPst, VPre, + /* 0940 */ VPst, VBlw, VBlw, VBlw, VBlw, VAbv, VAbv, VAbv, VAbv, VPst, VPst, VPst, VPst, H, VPre, VPst, + /* 0950 */ O, VMAbv, VMBlw, O, O, VAbv, VBlw, VBlw, B, B, B, B, B, B, B, B, + /* 0960 */ B, B, VBlw, VBlw, O, O, B, B, B, B, B, B, B, B, B, B, + /* 0970 */ O, O, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + + /* Bengali */ + + /* 0980 */ GB, VMAbv, VMPst, VMPst, O, B, B, B, B, B, B, B, B, O, O, B, + /* 0990 */ B, O, O, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 09A0 */ B, B, B, B, B, B, B, B, B, O, B, B, B, B, B, B, + /* 09B0 */ B, O, B, O, O, O, B, B, B, B, O, O, CMBlw, B, VPst, VPre, + /* 09C0 */ VPst, VBlw, VBlw, VBlw, VBlw, O, O, VPre, VPre, O, O, VPst, VPst, H, IND, O, + /* 09D0 */ O, O, O, O, O, O, O, VPst, O, O, O, O, B, B, O, B, + /* 09E0 */ B, B, VBlw, VBlw, O, O, B, B, B, B, B, B, B, B, B, B, + /* 09F0 */ B, B, O, O, O, O, O, O, O, O, O, O, B, O, FMAbv, O, + + /* Gurmukhi */ + + /* 0A00 */ O, VMAbv, VMAbv, VMPst, O, B, B, B, B, B, B, O, O, O, O, B, + /* 0A10 */ B, O, O, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 0A20 */ B, B, B, B, B, B, B, B, B, O, B, B, B, B, B, B, + /* 0A30 */ B, O, B, B, O, B, B, O, B, B, O, O, CMBlw, O, VPst, VPre, + /* 0A40 */ VPst, VBlw, VBlw, O, O, O, O, VAbv, VAbv, O, O, VAbv, VAbv, H, O, O, + /* 0A50 */ O, VMBlw, O, O, O, O, O, O, O, B, B, B, B, O, B, O, + /* 0A60 */ O, O, O, O, O, O, B, B, B, B, B, B, B, B, B, B, + /* 0A70 */ VMAbv, CMAbv, GB, GB, O, MBlw, O, O, O, O, O, O, O, O, O, O, + + /* Gujarati */ + + /* 0A80 */ O, VMAbv, VMAbv, VMPst, O, B, B, B, B, B, B, B, B, B, O, B, + /* 0A90 */ B, B, O, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 0AA0 */ B, B, B, B, B, B, B, B, B, O, B, B, B, B, B, B, + /* 0AB0 */ B, O, B, B, O, B, B, B, B, B, O, O, CMBlw, B, VPst, VPre, + /* 0AC0 */ VPst, VBlw, VBlw, VBlw, VBlw, VAbv, O, VAbv, VAbv, VAbv, O, VPst, VPst, H, O, O, + /* 0AD0 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, + /* 0AE0 */ B, B, VBlw, VBlw, O, O, B, B, B, B, B, B, B, B, B, B, + /* 0AF0 */ O, O, O, O, O, O, O, O, O, B, VMAbv, VMAbv, VMAbv, CMAbv, CMAbv, CMAbv, + + /* Oriya */ + + /* 0B00 */ O, VMAbv, VMPst, VMPst, O, B, B, B, B, B, B, B, B, O, O, B, + /* 0B10 */ B, O, O, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 0B20 */ B, B, B, B, B, B, B, B, B, O, B, B, B, B, B, B, + /* 0B30 */ B, O, B, B, O, B, B, B, B, B, O, O, CMBlw, B, VPst, VAbv, + /* 0B40 */ VPst, VBlw, VBlw, VBlw, VBlw, O, O, VPre, VPst, O, O, VPst, VPst, H, O, O, + /* 0B50 */ O, O, O, O, O, VAbv, VAbv, VAbv, O, O, O, O, B, B, O, B, + /* 0B60 */ B, B, VBlw, VBlw, O, O, B, B, B, B, B, B, B, B, B, B, + /* 0B70 */ O, B, O, O, O, O, O, O, O, O, O, O, O, O, O, O, + + /* Tamil */ + + /* 0B80 */ O, O, VMAbv, IND, O, B, B, B, B, B, B, O, O, O, B, B, + /* 0B90 */ B, O, B, B, B, B, O, O, O, B, B, O, B, O, B, B, + /* 0BA0 */ O, O, O, B, B, O, O, O, B, B, B, O, O, O, B, B, + /* 0BB0 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, VPst, VPst, + /* 0BC0 */ VAbv, VPst, VPst, O, O, O, VPre, VPre, VPre, O, VPst, VPst, VPst, H, O, O, + /* 0BD0 */ O, O, O, O, O, O, O, VPst, O, O, O, O, O, O, O, O, + /* 0BE0 */ O, O, O, O, O, O, B, B, B, B, B, B, B, B, B, B, + /* 0BF0 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, + + /* Telugu */ + + /* 0C00 */ VMAbv, VMPst, VMPst, VMPst, VMAbv, B, B, B, B, B, B, B, B, O, B, B, + /* 0C10 */ B, O, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 0C20 */ B, B, B, B, B, B, B, B, B, O, B, B, B, B, B, B, + /* 0C30 */ B, B, B, B, B, B, B, B, B, B, O, O, O, B, VAbv, VAbv, + /* 0C40 */ VAbv, VPst, VPst, VPst, VPst, O, VAbv, VAbv, VAbv, O, VAbv, VAbv, VAbv, H, O, O, + /* 0C50 */ O, O, O, O, O, VAbv, VBlw, O, B, B, B, O, O, O, O, O, + /* 0C60 */ B, B, VBlw, VBlw, O, O, B, B, B, B, B, B, B, B, B, B, + /* 0C70 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, + + /* Kannada */ + + /* 0C80 */ B, VMAbv, VMPst, VMPst, O, B, B, B, B, B, B, B, B, O, B, B, + /* 0C90 */ B, O, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 0CA0 */ B, B, B, B, B, B, B, B, B, O, B, B, B, B, B, B, + /* 0CB0 */ B, B, B, B, O, B, B, B, B, B, O, O, CMBlw, B, VPst, VAbv, + /* 0CC0 */ VAbv, VPst, VPst, VPst, VPst, O, VAbv, VAbv, VAbv, O, VAbv, VAbv, VAbv, H, O, O, + /* 0CD0 */ O, O, O, O, O, VPst, VPst, O, O, O, O, O, O, O, B, O, + /* 0CE0 */ B, B, VBlw, VBlw, O, O, B, B, B, B, B, B, B, B, B, B, + /* 0CF0 */ O, CS, CS, O, O, O, O, O, O, O, O, O, O, O, O, O, + + /* Malayalam */ + + /* 0D00 */ VMAbv, VMAbv, VMPst, VMPst, B, B, B, B, B, B, B, B, B, O, B, B, + /* 0D10 */ B, O, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 0D20 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 0D30 */ B, B, B, B, B, B, B, B, B, B, B, VAbv, VAbv, B, VPst, VPst, + /* 0D40 */ VPst, VPst, VPst, VBlw, VBlw, O, VPre, VPre, VPre, O, VPst, VPst, VPst, H, R, O, + /* 0D50 */ O, O, O, O, IND, IND, IND, VPst, O, O, O, O, O, O, O, B, + /* 0D60 */ B, B, VBlw, VBlw, O, O, B, B, B, B, B, B, B, B, B, B, + /* 0D70 */ O, O, O, O, O, O, O, O, O, O, IND, IND, IND, IND, IND, IND, + + /* Sinhala */ + + /* 0D80 */ O, VMAbv, VMPst, VMPst, O, B, B, B, B, B, B, B, B, B, B, B, + /* 0D90 */ B, B, B, B, B, B, B, O, O, O, B, B, B, B, B, B, + /* 0DA0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 0DB0 */ B, B, O, B, B, B, B, B, B, B, B, B, O, B, O, O, + /* 0DC0 */ B, B, B, B, B, B, B, O, O, O, H, O, O, O, O, VPst, + /* 0DD0 */ VPst, VPst, VAbv, VAbv, VBlw, O, VBlw, O, VPst, VPre, VPst, VPre, VPst, VPst, VPst, VPst, + /* 0DE0 */ O, O, O, O, O, O, B, B, B, B, B, B, B, B, B, B, + /* 0DF0 */ O, O, VPst, VPst, O, O, O, O, + +#define use_offset_0x0f18u 1360 + + + /* Tibetan */ + VBlw, VBlw, O, O, O, O, O, O, + /* 0F20 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 0F30 */ B, B, B, B, O, FMBlw, O, FMBlw, O, CMAbv, O, O, O, O, VPst, VPre, + /* 0F40 */ B, B, B, B, B, B, B, B, O, B, B, B, B, B, B, B, + /* 0F50 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 0F60 */ B, B, B, B, B, B, B, B, B, B, B, B, B, O, O, O, + /* 0F70 */ O, VBlw, VBlw, VAbv, VBlw, VBlw, VAbv, VAbv, VAbv, VAbv, VBlw, VBlw, VBlw, VBlw, VMAbv, VMPst, + /* 0F80 */ VBlw, VAbv, VMAbv, VMAbv, VBlw, IND, VMAbv, VMAbv, B, B, B, B, B, SUB, SUB, SUB, + /* 0F90 */ SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, O, SUB, SUB, SUB, SUB, SUB, SUB, SUB, + /* 0FA0 */ SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, + /* 0FB0 */ SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, O, O, O, + /* 0FC0 */ O, O, O, O, O, O, FMBlw, O, + +#define use_offset_0x1000u 1536 + + + /* Myanmar */ + + /* 1000 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 1010 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 1020 */ B, B, B, B, B, B, B, B, B, B, B, VPst, VPst, VAbv, VAbv, VBlw, + /* 1030 */ VBlw, VPre, VAbv, VAbv, VAbv, VAbv, VMAbv, VMBlw, VMPst, H, VAbv, MPst, MPre, MBlw, MBlw, B, + /* 1040 */ B, B, B, B, B, B, B, B, B, B, O, GB, O, O, GB, O, + /* 1050 */ B, B, B, B, B, B, VPst, VPst, VBlw, VBlw, B, B, B, B, MBlw, MBlw, + /* 1060 */ MBlw, B, VPst, VMPst, VMPst, B, B, VPst, VPst, VMPst, VMPst, VMPst, VMPst, VMPst, B, B, + /* 1070 */ B, VAbv, VAbv, VAbv, VAbv, B, B, B, B, B, B, B, B, B, B, B, + /* 1080 */ B, B, MBlw, VPst, VPre, VAbv, VAbv, VMPst, VMPst, VMPst, VMPst, VMPst, VMPst, VMBlw, B, VMPst, + /* 1090 */ B, B, B, B, B, B, B, B, B, B, VMPst, VMPst, VPst, VAbv, O, O, + +#define use_offset_0x1700u 1696 + + + /* Tagalog */ + + /* 1700 */ B, B, B, B, B, B, B, B, B, B, B, B, B, O, B, B, + /* 1710 */ B, B, VAbv, VBlw, VBlw, O, O, O, O, O, O, O, O, O, O, O, + + /* Hanunoo */ + + /* 1720 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 1730 */ B, B, VAbv, VBlw, VBlw, O, O, O, O, O, O, O, O, O, O, O, + + /* Buhid */ + + /* 1740 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 1750 */ B, B, VAbv, VBlw, O, O, O, O, O, O, O, O, O, O, O, O, + + /* Tagbanwa */ + + /* 1760 */ B, B, B, B, B, B, B, B, B, B, B, B, B, O, B, B, + /* 1770 */ B, O, VAbv, VBlw, O, O, O, O, O, O, O, O, O, O, O, O, + + /* Khmer */ + + /* 1780 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 1790 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 17A0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 17B0 */ B, B, B, B, O, O, VPst, VAbv, VAbv, VAbv, VAbv, VBlw, VBlw, VBlw, VPst, VPst, + /* 17C0 */ VPst, VPre, VPre, VPre, VPst, VPst, VMAbv, VMPst, VPst, VMAbv, VMAbv, FMAbv, FAbv, CMAbv, FMAbv, FMAbv, + /* 17D0 */ FMAbv, VAbv, H, FMAbv, O, O, O, O, O, O, O, O, B, FMAbv, O, O, + /* 17E0 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O, + +#define use_offset_0x1900u 1936 + + + /* Limbu */ + + /* 1900 */ GB, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 1910 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, O, + /* 1920 */ VAbv, VAbv, VBlw, VPst, VPst, VAbv, VAbv, VAbv, VAbv, SUB, SUB, SUB, O, O, O, O, + /* 1930 */ FPst, FPst, VMBlw, FPst, FPst, FPst, FPst, FPst, FPst, FBlw, VAbv, FMBlw, O, O, O, O, + /* 1940 */ O, O, O, O, O, O, B, B, B, B, B, B, B, B, B, B, + + /* Tai Le */ + + /* 1950 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 1960 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, O, O, + /* 1970 */ B, B, B, B, B, O, O, O, O, O, O, O, O, O, O, O, + + /* New Tai Lue */ + + /* 1980 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 1990 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 19A0 */ B, B, B, B, B, B, B, B, B, B, B, B, O, O, O, O, + /* 19B0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 19C0 */ B, B, B, B, B, B, B, B, VMPst, VMPst, O, O, O, O, O, O, + /* 19D0 */ B, B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, + /* 19E0 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, + /* 19F0 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, + + /* Buginese */ + + /* 1A00 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 1A10 */ B, B, B, B, B, B, B, VAbv, VBlw, VPre, VPst, VAbv, O, O, O, O, + + /* Tai Tham */ + + /* 1A20 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 1A30 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 1A40 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 1A50 */ B, B, B, B, B, MPre, MBlw, SUB, FAbv, FAbv, MAbv, SUB, SUB, SUB, SUB, O, + /* 1A60 */ Sk, VPst, VAbv, VPst, VPst, VAbv, VAbv, VAbv, VAbv, VBlw, VBlw, VAbv, VBlw, VPst, VPre, VPre, + /* 1A70 */ VPre, VPre, VPre, VAbv, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, VAbv, FMAbv, FMAbv, O, O, FMBlw, + /* 1A80 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O, + /* 1A90 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O, + +#define use_offset_0x1b00u 2352 + + + /* Balinese */ + + /* 1B00 */ VMAbv, VMAbv, VMAbv, FAbv, VMPst, B, B, B, B, B, B, B, B, B, B, B, + /* 1B10 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 1B20 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 1B30 */ B, B, B, B, CMAbv, VPst, VAbv, VAbv, VBlw, VBlw, VBlw, VBlw, VAbv, VAbv, VPre, VPre, + /* 1B40 */ VPst, VPst, VAbv, VAbv, H, B, B, B, B, B, B, B, O, O, O, O, + /* 1B50 */ B, B, B, B, B, B, B, B, B, B, O, GB, GB, O, O, GB, + /* 1B60 */ O, S, GB, S, S, S, S, S, GB, S, S, SMAbv, SMBlw, SMAbv, SMAbv, SMAbv, + /* 1B70 */ SMAbv, SMAbv, SMAbv, SMAbv, O, O, O, O, O, O, O, O, O, O, O, O, + + /* Sundanese */ + + /* 1B80 */ VMAbv, FAbv, VMPst, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 1B90 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 1BA0 */ B, SUB, SUB, SUB, VAbv, VBlw, VPre, VPst, VAbv, VAbv, VPst, H, SUB, SUB, B, B, + /* 1BB0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + + /* Batak */ + + /* 1BC0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 1BD0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 1BE0 */ B, B, B, B, B, B, CMAbv, VPst, VAbv, VAbv, VPst, VPst, VPst, VAbv, VPst, VAbv, + /* 1BF0 */ FAbv, FAbv, CMBlw, CMBlw, O, O, O, O, O, O, O, O, O, O, O, O, + + /* Lepcha */ + + /* 1C00 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 1C10 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 1C20 */ B, B, B, B, SUB, SUB, VPst, VPre, VPre, VPre, VPst, VPst, VBlw, FAbv, FAbv, FAbv, + /* 1C30 */ FAbv, FAbv, FAbv, FAbv, VMPre, VMPre, FMAbv, CMBlw, O, O, O, O, O, O, O, O, + /* 1C40 */ B, B, B, B, B, B, B, B, B, B, O, O, O, B, B, B, + +#define use_offset_0x1cd0u 2688 + + + /* Vedic Extensions */ + + /* 1CD0 */ VMAbv, VMAbv, VMAbv, O, VMBlw, VMBlw, VMBlw, VMBlw, VMBlw, VMBlw, VMAbv, VMAbv, VMBlw, VMBlw, VMBlw, VMBlw, + /* 1CE0 */ VMAbv, VMPst, VMBlw, VMBlw, VMBlw, VMBlw, VMBlw, VMBlw, VMBlw, O, O, O, O, VMBlw, O, O, + /* 1CF0 */ O, O, IND, IND, VMAbv, CS, CS, VMPst, VMAbv, VMAbv, GB, O, O, O, O, O, + +#define use_offset_0x1df8u 2736 + + + /* Combining Diacritical Marks Supplement */ + O, O, O, FMAbv, O, O, O, O, + +#define use_offset_0x2008u 2744 + + + /* General Punctuation */ + O, O, O, O, ZWNJ, ZWJ, O, O, + /* 2010 */ GB, GB, GB, GB, GB, O, O, O, + +#define use_offset_0x2060u 2760 + + /* 2060 */ WJ, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, + + /* Superscripts and Subscripts */ + + /* 2070 */ O, O, O, O, FMPst, O, O, O, O, O, O, O, O, O, O, O, + /* 2080 */ O, O, FMPst, FMPst, FMPst, O, O, O, + +#define use_offset_0x20f0u 2800 + + + /* Combining Diacritical Marks for Symbols */ + + /* 20F0 */ VMAbv, O, O, O, O, O, O, O, + +#define use_offset_0x25c8u 2808 + + + /* Geometric Shapes */ + O, O, O, O, GB, O, O, O, + +#define use_offset_0xa800u 2816 + + + /* Syloti Nagri */ + + /* A800 */ B, B, VAbv, B, B, B, H, B, B, B, B, VMAbv, B, B, B, B, + /* A810 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* A820 */ B, B, B, VPst, VPst, VBlw, VAbv, VPst, O, O, O, O, VBlw, O, O, O, + /* A830 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, + + /* Phags-pa */ + + /* A840 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* A850 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* A860 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* A870 */ B, B, B, B, O, O, O, O, O, O, O, O, O, O, O, O, + + /* Saurashtra */ + + /* A880 */ VMPst, VMPst, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* A890 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* A8A0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* A8B0 */ B, B, B, B, MPst, VPst, VPst, VPst, VPst, VPst, VPst, VPst, VPst, VPst, VPst, VPst, + /* A8C0 */ VPst, VPst, VPst, VPst, H, VMAbv, O, O, O, O, O, O, O, O, O, O, + /* A8D0 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O, + + /* Devanagari Extended */ + + /* A8E0 */ VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, + /* A8F0 */ VMAbv, VMAbv, B, B, O, O, O, O, O, O, O, O, O, O, B, VAbv, + + /* Kayah Li */ + + /* A900 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* A910 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* A920 */ B, B, B, B, B, B, VAbv, VAbv, VAbv, VAbv, VAbv, VMBlw, VMBlw, VMBlw, O, O, + + /* Rejang */ + + /* A930 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* A940 */ B, B, B, B, B, B, B, VBlw, VBlw, VBlw, VAbv, VBlw, VBlw, VBlw, VBlw, FAbv, + /* A950 */ FAbv, FAbv, FPst, VPst, O, O, O, O, O, O, O, O, O, O, O, O, + /* A960 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, + /* A970 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, + + /* Javanese */ + + /* A980 */ VMAbv, VMAbv, FAbv, VMPst, B, B, B, B, B, B, B, B, B, B, B, B, + /* A990 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* A9A0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* A9B0 */ B, B, B, CMAbv, VPst, VPst, VAbv, VAbv, VBlw, VBlw, VPre, VPre, VAbv, MBlw, MBlw, MBlw, + /* A9C0 */ H, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, + /* A9D0 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O, + + /* Myanmar Extended-B */ + + /* A9E0 */ B, B, B, B, B, VAbv, O, B, B, B, B, B, B, B, B, B, + /* A9F0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, O, + + /* Cham */ + + /* AA00 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* AA10 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* AA20 */ B, B, B, B, B, B, B, B, B, VMAbv, VAbv, VAbv, VAbv, VBlw, VAbv, VPre, + /* AA30 */ VPre, VAbv, VBlw, MPst, MPre, MBlw, MBlw, O, O, O, O, O, O, O, O, O, + /* AA40 */ B, B, B, FAbv, B, B, B, B, B, B, B, B, FAbv, FPst, O, O, + /* AA50 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O, + + /* Myanmar Extended-A */ + + /* AA60 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* AA70 */ O, B, B, B, GB, GB, GB, O, O, O, B, VMPst, VMAbv, VMPst, B, B, + + /* Tai Viet */ + + /* AA80 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* AA90 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* AAA0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* AAB0 */ VAbv, B, VAbv, VAbv, VBlw, B, B, VAbv, VAbv, B, B, B, B, B, VAbv, VMAbv, + /* AAC0 */ B, VMAbv, B, O, O, O, O, O, O, O, O, O, O, O, O, O, + /* AAD0 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, + + /* Meetei Mayek Extensions */ + + /* AAE0 */ B, B, B, B, B, B, B, B, B, B, B, VPre, VBlw, VAbv, VPre, VPst, + /* AAF0 */ O, O, O, O, O, VMPst, H, O, + +#define use_offset_0xabc0u 3576 + + + /* Meetei Mayek */ + + /* ABC0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* ABD0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* ABE0 */ B, B, B, VPst, VPst, VAbv, VPst, VPst, VBlw, VPst, VPst, O, VMPst, VBlw, O, O, + /* ABF0 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O, + +#define use_offset_0xfe00u 3640 + + + /* Variation Selectors */ + + /* FE00 */ VS, VS, VS, VS, VS, VS, VS, VS, VS, VS, VS, VS, VS, VS, VS, VS, + +#define use_offset_0x10a00u 3656 + + + /* Kharoshthi */ + + /* 10A00 */ B, VBlw, VBlw, VBlw, O, VAbv, VBlw, O, O, O, O, O, VBlw, VBlw, VMBlw, VMAbv, + /* 10A10 */ B, B, B, B, O, B, B, B, O, B, B, B, B, B, B, B, + /* 10A20 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 10A30 */ B, B, B, B, B, B, O, O, CMAbv, CMBlw, CMBlw, O, O, O, O, H, + /* 10A40 */ B, B, B, B, B, B, B, B, B, O, O, O, O, O, O, O, + +#define use_offset_0x11000u 3736 + + + /* Brahmi */ + + /* 11000 */ VMPst, VMAbv, VMPst, CS, CS, B, B, B, B, B, B, B, B, B, B, B, + /* 11010 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 11020 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 11030 */ B, B, B, B, B, B, B, B, VAbv, VAbv, VAbv, VAbv, VBlw, VBlw, VBlw, VBlw, + /* 11040 */ VBlw, VBlw, VAbv, VAbv, VAbv, VAbv, HVM, O, O, O, O, O, O, O, O, O, + /* 11050 */ O, O, N, N, N, N, N, N, N, N, N, N, N, N, N, N, + /* 11060 */ N, N, N, N, N, N, B, B, B, B, B, B, B, B, B, B, + /* 11070 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, HN, + + /* Kaithi */ + + /* 11080 */ VMAbv, VMAbv, VMPst, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 11090 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 110A0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 110B0 */ VPst, VPre, VPst, VBlw, VBlw, VAbv, VAbv, VPst, VPst, H, CMBlw, O, O, O, O, O, + +#define use_offset_0x11100u 3928 + + + /* Chakma */ + + /* 11100 */ VMAbv, VMAbv, VMAbv, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 11110 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 11120 */ B, B, B, B, B, B, B, VBlw, VBlw, VBlw, VAbv, VAbv, VPre, VBlw, VAbv, VAbv, + /* 11130 */ VBlw, VAbv, VAbv, H, CMBlw, O, B, B, B, B, B, B, B, B, B, B, + /* 11140 */ O, O, O, O, B, VPst, VPst, B, O, O, O, O, O, O, O, O, + + /* Mahajani */ + + /* 11150 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 11160 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 11170 */ B, B, B, CMBlw, O, O, O, O, O, O, O, O, O, O, O, O, + + /* Sharada */ + + /* 11180 */ VMAbv, VMAbv, VMPst, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 11190 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 111A0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 111B0 */ B, B, B, VPst, VPre, VPst, VBlw, VBlw, VBlw, VBlw, VBlw, VBlw, VAbv, VAbv, VAbv, VAbv, + /* 111C0 */ H, B, R, R, O, O, O, O, GB, FMBlw, CMBlw, VAbv, VBlw, O, VPre, VMAbv, + /* 111D0 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O, + + /* Sinhala Archaic Numbers */ + + /* 111E0 */ O, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 111F0 */ B, B, B, B, B, O, O, O, O, O, O, O, O, O, O, O, + + /* Khojki */ + + /* 11200 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 11210 */ B, B, O, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 11220 */ B, B, B, B, B, B, B, B, B, B, B, B, VPst, VPst, VPst, VBlw, + /* 11230 */ VAbv, VAbv, VAbv, VAbv, VMAbv, H, CMAbv, CMAbv, O, O, O, O, O, O, VMAbv, O, + +#define use_offset_0x11280u 4248 + + + /* Multani */ + + /* 11280 */ B, B, B, B, B, B, B, O, B, O, B, B, B, B, O, B, + /* 11290 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, O, B, + /* 112A0 */ B, B, B, B, B, B, B, B, B, O, O, O, O, O, O, O, + + /* Khudawadi */ + + /* 112B0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 112C0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 112D0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, VMAbv, + /* 112E0 */ VPst, VPre, VPst, VBlw, VBlw, VAbv, VAbv, VAbv, VAbv, CMBlw, VBlw, O, O, O, O, O, + /* 112F0 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O, + + /* Grantha */ + + /* 11300 */ VMAbv, VMAbv, VMAbv, VMAbv, O, B, B, B, B, B, B, B, B, O, O, B, + /* 11310 */ B, O, O, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 11320 */ B, B, B, B, B, B, B, B, B, O, B, B, B, B, B, B, + /* 11330 */ B, O, B, B, O, B, B, B, B, B, O, CMBlw, CMBlw, B, VPst, VPst, + /* 11340 */ VAbv, VPst, VPst, VPst, VPst, O, O, VPre, VPre, O, O, VPst, VPst, HVM, O, O, + /* 11350 */ O, O, O, O, O, O, O, VPst, O, O, O, O, O, O, B, B, + /* 11360 */ B, B, VPst, VPst, O, O, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, O, O, O, + /* 11370 */ VMAbv, VMAbv, VMAbv, VMAbv, VMAbv, O, O, O, + +#define use_offset_0x11400u 4496 + + + /* Newa */ + + /* 11400 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 11410 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 11420 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 11430 */ B, B, B, B, B, VPst, VPre, VPst, VBlw, VBlw, VBlw, VBlw, VBlw, VBlw, VAbv, VAbv, + /* 11440 */ VPst, VPst, H, VMAbv, VMAbv, VMPst, CMBlw, B, O, O, O, O, O, O, O, O, + /* 11450 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, FMAbv, B, + /* 11460 */ CS, CS, O, O, O, O, O, O, O, O, O, O, O, O, O, O, + /* 11470 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, + + /* Tirhuta */ + + /* 11480 */ O, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 11490 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 114A0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 114B0 */ VPst, VPre, VPst, VBlw, VBlw, VBlw, VBlw, VBlw, VBlw, VPre, VAbv, VPst, VPst, VPst, VPst, VMAbv, + /* 114C0 */ VMAbv, VMAbv, H, CMBlw, B, O, O, O, O, O, O, O, O, O, O, O, + /* 114D0 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O, + +#define use_offset_0x11580u 4720 + + + /* Siddham */ + + /* 11580 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 11590 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 115A0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, VPst, + /* 115B0 */ VPre, VPst, VBlw, VBlw, VBlw, VBlw, O, O, VPre, VPst, VPst, VPst, VMAbv, VMAbv, VMPst, H, + /* 115C0 */ CMBlw, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, + /* 115D0 */ O, O, O, O, O, O, O, O, B, B, B, B, VBlw, VBlw, O, O, + /* 115E0 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, + /* 115F0 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, + + /* Modi */ + + /* 11600 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 11610 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 11620 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 11630 */ VPst, VPst, VPst, VBlw, VBlw, VBlw, VBlw, VBlw, VBlw, VAbv, VAbv, VPst, VPst, VMAbv, VMPst, H, + /* 11640 */ VAbv, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, + /* 11650 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O, + /* 11660 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, + /* 11670 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, + + /* Takri */ + + /* 11680 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 11690 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 116A0 */ B, B, B, B, B, B, B, B, B, B, B, VMAbv, VMPst, VAbv, VPre, VPst, + /* 116B0 */ VBlw, VBlw, VAbv, VAbv, VAbv, VAbv, H, CMBlw, B, O, O, O, O, O, O, O, + /* 116C0 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O, + /* 116D0 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, + /* 116E0 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, + /* 116F0 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, + + /* Ahom */ + + /* 11700 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 11710 */ B, B, B, B, B, B, B, B, B, B, B, O, O, MBlw, MPre, MAbv, + /* 11720 */ VPst, VPst, VAbv, VAbv, VBlw, VBlw, VPre, VAbv, VBlw, VAbv, VAbv, VAbv, O, O, O, O, + /* 11730 */ B, B, B, B, B, B, B, B, B, B, B, B, O, O, O, O, + +#define use_offset_0x11800u 5168 + + + /* Dogra */ + + /* 11800 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 11810 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 11820 */ B, B, B, B, B, B, B, B, B, B, B, B, VPst, VPre, VPst, VBlw, + /* 11830 */ VBlw, VBlw, VBlw, VAbv, VAbv, VAbv, VAbv, VMAbv, VMPst, H, CMBlw, O, O, O, O, O, + +#define use_offset_0x11900u 5232 + + + /* Dives Akuru */ + + /* 11900 */ B, B, B, B, B, B, B, O, O, B, O, O, B, B, B, B, + /* 11910 */ B, B, B, B, O, B, B, O, B, B, B, B, B, B, B, B, + /* 11920 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 11930 */ VPst, VPst, VPst, VPst, VPst, VPre, O, VPre, VPst, O, O, VMAbv, VMAbv, VPst, H, R, + /* 11940 */ MPst, R, MBlw, CMBlw, O, O, O, O, O, O, O, O, O, O, O, O, + /* 11950 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O, + +#define use_offset_0x119a0u 5328 + + + /* Nandinagari */ + + /* 119A0 */ B, B, B, B, B, B, B, B, O, O, B, B, B, B, B, B, + /* 119B0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 119C0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 119D0 */ B, VPst, VPre, VPst, VBlw, VBlw, VBlw, VBlw, O, O, VAbv, VAbv, VPst, VPst, VMPst, VMPst, + /* 119E0 */ H, B, O, O, VPre, O, O, O, O, O, O, O, O, O, O, O, + /* 119F0 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, + + /* Zanabazar Square */ + + /* 11A00 */ B, VAbv, VBlw, VBlw, VAbv, VAbv, VAbv, VAbv, VAbv, VAbv, VBlw, B, B, B, B, B, + /* 11A10 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 11A20 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 11A30 */ B, B, B, FMBlw, VBlw, VMAbv, VMAbv, VMAbv, VMAbv, VMPst, R, MBlw, MBlw, MBlw, MBlw, GB, + /* 11A40 */ O, O, O, O, O, GB, O, H, O, O, O, O, O, O, O, O, + + /* Soyombo */ + + /* 11A50 */ B, VAbv, VBlw, VBlw, VAbv, VAbv, VAbv, VPst, VPst, VBlw, VBlw, VBlw, B, B, B, B, + /* 11A60 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 11A70 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 11A80 */ B, B, B, B, R, R, R, R, R, R, FBlw, FBlw, FBlw, FBlw, FBlw, FBlw, + /* 11A90 */ FBlw, FBlw, FBlw, FBlw, FBlw, FBlw, VMAbv, VMPst, CMAbv, H, O, O, O, B, O, O, + +#define use_offset_0x11c00u 5584 + + + /* Bhaiksuki */ + + /* 11C00 */ B, B, B, B, B, B, B, B, B, O, B, B, B, B, B, B, + /* 11C10 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 11C20 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, VPst, + /* 11C30 */ VAbv, VAbv, VBlw, VBlw, VBlw, VBlw, VBlw, O, VAbv, VAbv, VAbv, VAbv, VMAbv, VMAbv, VMPst, H, + /* 11C40 */ B, O, O, O, GB, GB, O, O, O, O, O, O, O, O, O, O, + /* 11C50 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 11C60 */ B, B, B, B, B, B, B, B, B, B, B, B, B, O, O, O, + + /* Marchen */ + + /* 11C70 */ O, O, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 11C80 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 11C90 */ O, O, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, + /* 11CA0 */ SUB, SUB, SUB, SUB, SUB, SUB, SUB, SUB, O, SUB, SUB, SUB, SUB, SUB, SUB, SUB, + /* 11CB0 */ VBlw, VPre, VBlw, VAbv, VPst, VMAbv, VMAbv, O, + +#define use_offset_0x11d00u 5768 + + + /* Masaram Gondi */ + + /* 11D00 */ B, B, B, B, B, B, B, O, B, B, O, B, B, B, B, B, + /* 11D10 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 11D20 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 11D30 */ B, VAbv, VAbv, VAbv, VAbv, VAbv, VBlw, O, O, O, VAbv, O, VAbv, VAbv, O, VAbv, + /* 11D40 */ VMAbv, VMAbv, CMBlw, VAbv, VBlw, H, R, MBlw, O, O, O, O, O, O, O, O, + /* 11D50 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O, + + /* Gunjala Gondi */ + + /* 11D60 */ B, B, B, B, B, B, O, B, B, O, B, B, B, B, B, B, + /* 11D70 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 11D80 */ B, B, B, B, B, B, B, B, B, B, VPst, VPst, VPst, VPst, VPst, O, + /* 11D90 */ VAbv, VAbv, O, VPst, VPst, VMAbv, VMPst, H, O, O, O, O, O, O, O, O, + /* 11DA0 */ B, B, B, B, B, B, B, B, B, B, O, O, O, O, O, O, + +#define use_offset_0x11ee0u 5944 + + + /* Makasar */ + + /* 11EE0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 11EF0 */ B, B, GB, VAbv, VBlw, VPre, VPst, O, + +}; /* Table items: 5968; occupancy: 74% */ + +USE_TABLE_ELEMENT_TYPE +hb_use_get_category (hb_codepoint_t u) +{ + switch (u >> 12) + { + case 0x0u: + if (hb_in_range<hb_codepoint_t> (u, 0x0028u, 0x003Fu)) return use_table[u - 0x0028u + use_offset_0x0028u]; + if (hb_in_range<hb_codepoint_t> (u, 0x00A0u, 0x00D7u)) return use_table[u - 0x00A0u + use_offset_0x00a0u]; + if (hb_in_range<hb_codepoint_t> (u, 0x0348u, 0x034Fu)) return use_table[u - 0x0348u + use_offset_0x0348u]; + if (hb_in_range<hb_codepoint_t> (u, 0x0900u, 0x0DF7u)) return use_table[u - 0x0900u + use_offset_0x0900u]; + if (hb_in_range<hb_codepoint_t> (u, 0x0F18u, 0x0FC7u)) return use_table[u - 0x0F18u + use_offset_0x0f18u]; + break; + + case 0x1u: + if (hb_in_range<hb_codepoint_t> (u, 0x1000u, 0x109Fu)) return use_table[u - 0x1000u + use_offset_0x1000u]; + if (hb_in_range<hb_codepoint_t> (u, 0x1700u, 0x17EFu)) return use_table[u - 0x1700u + use_offset_0x1700u]; + if (hb_in_range<hb_codepoint_t> (u, 0x1900u, 0x1A9Fu)) return use_table[u - 0x1900u + use_offset_0x1900u]; + if (hb_in_range<hb_codepoint_t> (u, 0x1B00u, 0x1C4Fu)) return use_table[u - 0x1B00u + use_offset_0x1b00u]; + if (hb_in_range<hb_codepoint_t> (u, 0x1CD0u, 0x1CFFu)) return use_table[u - 0x1CD0u + use_offset_0x1cd0u]; + if (hb_in_range<hb_codepoint_t> (u, 0x1DF8u, 0x1DFFu)) return use_table[u - 0x1DF8u + use_offset_0x1df8u]; + break; + + case 0x2u: + if (hb_in_range<hb_codepoint_t> (u, 0x2008u, 0x2017u)) return use_table[u - 0x2008u + use_offset_0x2008u]; + if (hb_in_range<hb_codepoint_t> (u, 0x2060u, 0x2087u)) return use_table[u - 0x2060u + use_offset_0x2060u]; + if (hb_in_range<hb_codepoint_t> (u, 0x20F0u, 0x20F7u)) return use_table[u - 0x20F0u + use_offset_0x20f0u]; + if (hb_in_range<hb_codepoint_t> (u, 0x25C8u, 0x25CFu)) return use_table[u - 0x25C8u + use_offset_0x25c8u]; + break; + + case 0xAu: + if (hb_in_range<hb_codepoint_t> (u, 0xA800u, 0xAAF7u)) return use_table[u - 0xA800u + use_offset_0xa800u]; + if (hb_in_range<hb_codepoint_t> (u, 0xABC0u, 0xABFFu)) return use_table[u - 0xABC0u + use_offset_0xabc0u]; + break; + + case 0xFu: + if (hb_in_range<hb_codepoint_t> (u, 0xFE00u, 0xFE0Fu)) return use_table[u - 0xFE00u + use_offset_0xfe00u]; + break; + + case 0x10u: + if (hb_in_range<hb_codepoint_t> (u, 0x10A00u, 0x10A4Fu)) return use_table[u - 0x10A00u + use_offset_0x10a00u]; + break; + + case 0x11u: + if (hb_in_range<hb_codepoint_t> (u, 0x11000u, 0x110BFu)) return use_table[u - 0x11000u + use_offset_0x11000u]; + if (hb_in_range<hb_codepoint_t> (u, 0x11100u, 0x1123Fu)) return use_table[u - 0x11100u + use_offset_0x11100u]; + if (hb_in_range<hb_codepoint_t> (u, 0x11280u, 0x11377u)) return use_table[u - 0x11280u + use_offset_0x11280u]; + if (hb_in_range<hb_codepoint_t> (u, 0x11400u, 0x114DFu)) return use_table[u - 0x11400u + use_offset_0x11400u]; + if (hb_in_range<hb_codepoint_t> (u, 0x11580u, 0x1173Fu)) return use_table[u - 0x11580u + use_offset_0x11580u]; + if (hb_in_range<hb_codepoint_t> (u, 0x11800u, 0x1183Fu)) return use_table[u - 0x11800u + use_offset_0x11800u]; + if (hb_in_range<hb_codepoint_t> (u, 0x11900u, 0x1195Fu)) return use_table[u - 0x11900u + use_offset_0x11900u]; + if (hb_in_range<hb_codepoint_t> (u, 0x119A0u, 0x11A9Fu)) return use_table[u - 0x119A0u + use_offset_0x119a0u]; + if (hb_in_range<hb_codepoint_t> (u, 0x11C00u, 0x11CB7u)) return use_table[u - 0x11C00u + use_offset_0x11c00u]; + if (hb_in_range<hb_codepoint_t> (u, 0x11D00u, 0x11DAFu)) return use_table[u - 0x11D00u + use_offset_0x11d00u]; + if (hb_in_range<hb_codepoint_t> (u, 0x11EE0u, 0x11EF7u)) return use_table[u - 0x11EE0u + use_offset_0x11ee0u]; + break; + + default: + break; + } + return USE_O; +} + +#undef B +#undef CGJ +#undef CS +#undef GB +#undef H +#undef HN +#undef HVM +#undef IND +#undef N +#undef O +#undef R +#undef Rsv +#undef S +#undef SUB +#undef Sk +#undef VS +#undef WJ +#undef ZWJ +#undef ZWNJ +#undef CMAbv +#undef CMBlw +#undef FAbv +#undef FBlw +#undef FPst +#undef FMAbv +#undef FMBlw +#undef FMPst +#undef MAbv +#undef MBlw +#undef MPst +#undef MPre +#undef SMAbv +#undef SMBlw +#undef VAbv +#undef VBlw +#undef VPst +#undef VPre +#undef VMAbv +#undef VMBlw +#undef VMPst +#undef VMPre + + +#endif +/* == End of generated table == */ diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-complex-use.cc b/thirdparty/harfbuzz/src/hb-ot-shape-complex-use.cc new file mode 100644 index 0000000000..a1e25bdd80 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-shape-complex-use.cc @@ -0,0 +1,569 @@ +/* + * Copyright © 2015 Mozilla Foundation. + * Copyright © 2015 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Mozilla Author(s): Jonathan Kew + * Google Author(s): Behdad Esfahbod + */ + +#include "hb.hh" + +#ifndef HB_NO_OT_SHAPE + +#include "hb-ot-shape-complex-use.hh" +#include "hb-ot-shape-complex-arabic.hh" +#include "hb-ot-shape-complex-arabic-joining-list.hh" +#include "hb-ot-shape-complex-vowel-constraints.hh" + +/* buffer var allocations */ +#define use_category() complex_var_u8_1() + + +/* + * Universal Shaping Engine. + * https://docs.microsoft.com/en-us/typography/script-development/use + */ + +static const hb_tag_t +use_basic_features[] = +{ + /* + * Basic features. + * These features are applied all at once, before reordering. + */ + HB_TAG('r','k','r','f'), + HB_TAG('a','b','v','f'), + HB_TAG('b','l','w','f'), + HB_TAG('h','a','l','f'), + HB_TAG('p','s','t','f'), + HB_TAG('v','a','t','u'), + HB_TAG('c','j','c','t'), +}; +static const hb_tag_t +use_topographical_features[] = +{ + HB_TAG('i','s','o','l'), + HB_TAG('i','n','i','t'), + HB_TAG('m','e','d','i'), + HB_TAG('f','i','n','a'), +}; +/* Same order as use_topographical_features. */ +enum joining_form_t { + USE_ISOL, + USE_INIT, + USE_MEDI, + USE_FINA, + _USE_NONE +}; +static const hb_tag_t +use_other_features[] = +{ + /* + * Other features. + * These features are applied all at once, after reordering and + * clearing syllables. + */ + HB_TAG('a','b','v','s'), + HB_TAG('b','l','w','s'), + HB_TAG('h','a','l','n'), + HB_TAG('p','r','e','s'), + HB_TAG('p','s','t','s'), +}; + +static void +setup_syllables_use (const hb_ot_shape_plan_t *plan, + hb_font_t *font, + hb_buffer_t *buffer); +static void +record_rphf_use (const hb_ot_shape_plan_t *plan, + hb_font_t *font, + hb_buffer_t *buffer); +static void +record_pref_use (const hb_ot_shape_plan_t *plan, + hb_font_t *font, + hb_buffer_t *buffer); +static void +reorder_use (const hb_ot_shape_plan_t *plan, + hb_font_t *font, + hb_buffer_t *buffer); + +static void +collect_features_use (hb_ot_shape_planner_t *plan) +{ + hb_ot_map_builder_t *map = &plan->map; + + /* Do this before any lookups have been applied. */ + map->add_gsub_pause (setup_syllables_use); + + /* "Default glyph pre-processing group" */ + map->enable_feature (HB_TAG('l','o','c','l')); + map->enable_feature (HB_TAG('c','c','m','p')); + map->enable_feature (HB_TAG('n','u','k','t')); + map->enable_feature (HB_TAG('a','k','h','n'), F_MANUAL_ZWJ); + + /* "Reordering group" */ + map->add_gsub_pause (_hb_clear_substitution_flags); + map->add_feature (HB_TAG('r','p','h','f'), F_MANUAL_ZWJ); + map->add_gsub_pause (record_rphf_use); + map->add_gsub_pause (_hb_clear_substitution_flags); + map->enable_feature (HB_TAG('p','r','e','f'), F_MANUAL_ZWJ); + map->add_gsub_pause (record_pref_use); + + /* "Orthographic unit shaping group" */ + for (unsigned int i = 0; i < ARRAY_LENGTH (use_basic_features); i++) + map->enable_feature (use_basic_features[i], F_MANUAL_ZWJ); + + map->add_gsub_pause (reorder_use); + map->add_gsub_pause (_hb_clear_syllables); + + /* "Topographical features" */ + for (unsigned int i = 0; i < ARRAY_LENGTH (use_topographical_features); i++) + map->add_feature (use_topographical_features[i]); + map->add_gsub_pause (nullptr); + + /* "Standard typographic presentation" */ + for (unsigned int i = 0; i < ARRAY_LENGTH (use_other_features); i++) + map->enable_feature (use_other_features[i], F_MANUAL_ZWJ); +} + +struct use_shape_plan_t +{ + hb_mask_t rphf_mask; + + arabic_shape_plan_t *arabic_plan; +}; + +static void * +data_create_use (const hb_ot_shape_plan_t *plan) +{ + use_shape_plan_t *use_plan = (use_shape_plan_t *) calloc (1, sizeof (use_shape_plan_t)); + if (unlikely (!use_plan)) + return nullptr; + + use_plan->rphf_mask = plan->map.get_1_mask (HB_TAG('r','p','h','f')); + + if (has_arabic_joining (plan->props.script)) + { + use_plan->arabic_plan = (arabic_shape_plan_t *) data_create_arabic (plan); + if (unlikely (!use_plan->arabic_plan)) + { + free (use_plan); + return nullptr; + } + } + + return use_plan; +} + +static void +data_destroy_use (void *data) +{ + use_shape_plan_t *use_plan = (use_shape_plan_t *) data; + + if (use_plan->arabic_plan) + data_destroy_arabic (use_plan->arabic_plan); + + free (data); +} + +enum use_syllable_type_t { + use_independent_cluster, + use_virama_terminated_cluster, + use_sakot_terminated_cluster, + use_standard_cluster, + use_number_joiner_terminated_cluster, + use_numeral_cluster, + use_symbol_cluster, + use_broken_cluster, + use_non_cluster, +}; + +#include "hb-ot-shape-complex-use-machine.hh" + + +static void +setup_masks_use (const hb_ot_shape_plan_t *plan, + hb_buffer_t *buffer, + hb_font_t *font HB_UNUSED) +{ + const use_shape_plan_t *use_plan = (const use_shape_plan_t *) plan->data; + + /* Do this before allocating use_category(). */ + if (use_plan->arabic_plan) + { + setup_masks_arabic_plan (use_plan->arabic_plan, buffer, plan->props.script); + } + + HB_BUFFER_ALLOCATE_VAR (buffer, use_category); + + /* We cannot setup masks here. We save information about characters + * and setup masks later on in a pause-callback. */ + + unsigned int count = buffer->len; + hb_glyph_info_t *info = buffer->info; + for (unsigned int i = 0; i < count; i++) + info[i].use_category() = hb_use_get_category (info[i].codepoint); +} + +static void +setup_rphf_mask (const hb_ot_shape_plan_t *plan, + hb_buffer_t *buffer) +{ + const use_shape_plan_t *use_plan = (const use_shape_plan_t *) plan->data; + + hb_mask_t mask = use_plan->rphf_mask; + if (!mask) return; + + hb_glyph_info_t *info = buffer->info; + + foreach_syllable (buffer, start, end) + { + unsigned int limit = info[start].use_category() == USE_R ? 1 : hb_min (3u, end - start); + for (unsigned int i = start; i < start + limit; i++) + info[i].mask |= mask; + } +} + +static void +setup_topographical_masks (const hb_ot_shape_plan_t *plan, + hb_buffer_t *buffer) +{ + const use_shape_plan_t *use_plan = (const use_shape_plan_t *) plan->data; + if (use_plan->arabic_plan) + return; + + static_assert ((USE_INIT < 4 && USE_ISOL < 4 && USE_MEDI < 4 && USE_FINA < 4), ""); + hb_mask_t masks[4], all_masks = 0; + for (unsigned int i = 0; i < 4; i++) + { + masks[i] = plan->map.get_1_mask (use_topographical_features[i]); + if (masks[i] == plan->map.get_global_mask ()) + masks[i] = 0; + all_masks |= masks[i]; + } + if (!all_masks) + return; + hb_mask_t other_masks = ~all_masks; + + unsigned int last_start = 0; + joining_form_t last_form = _USE_NONE; + hb_glyph_info_t *info = buffer->info; + foreach_syllable (buffer, start, end) + { + use_syllable_type_t syllable_type = (use_syllable_type_t) (info[start].syllable() & 0x0F); + switch (syllable_type) + { + case use_independent_cluster: + case use_symbol_cluster: + case use_non_cluster: + /* These don't join. Nothing to do. */ + last_form = _USE_NONE; + break; + + case use_virama_terminated_cluster: + case use_sakot_terminated_cluster: + case use_standard_cluster: + case use_number_joiner_terminated_cluster: + case use_numeral_cluster: + case use_broken_cluster: + + bool join = last_form == USE_FINA || last_form == USE_ISOL; + + if (join) + { + /* Fixup previous syllable's form. */ + last_form = last_form == USE_FINA ? USE_MEDI : USE_INIT; + for (unsigned int i = last_start; i < start; i++) + info[i].mask = (info[i].mask & other_masks) | masks[last_form]; + } + + /* Form for this syllable. */ + last_form = join ? USE_FINA : USE_ISOL; + for (unsigned int i = start; i < end; i++) + info[i].mask = (info[i].mask & other_masks) | masks[last_form]; + + break; + } + + last_start = start; + } +} + +static void +setup_syllables_use (const hb_ot_shape_plan_t *plan, + hb_font_t *font HB_UNUSED, + hb_buffer_t *buffer) +{ + find_syllables_use (buffer); + foreach_syllable (buffer, start, end) + buffer->unsafe_to_break (start, end); + setup_rphf_mask (plan, buffer); + setup_topographical_masks (plan, buffer); +} + +static void +record_rphf_use (const hb_ot_shape_plan_t *plan, + hb_font_t *font HB_UNUSED, + hb_buffer_t *buffer) +{ + const use_shape_plan_t *use_plan = (const use_shape_plan_t *) plan->data; + + hb_mask_t mask = use_plan->rphf_mask; + if (!mask) return; + hb_glyph_info_t *info = buffer->info; + + foreach_syllable (buffer, start, end) + { + /* Mark a substituted repha as USE_R. */ + for (unsigned int i = start; i < end && (info[i].mask & mask); i++) + if (_hb_glyph_info_substituted (&info[i])) + { + info[i].use_category() = USE_R; + break; + } + } +} + +static void +record_pref_use (const hb_ot_shape_plan_t *plan HB_UNUSED, + hb_font_t *font HB_UNUSED, + hb_buffer_t *buffer) +{ + hb_glyph_info_t *info = buffer->info; + + foreach_syllable (buffer, start, end) + { + /* Mark a substituted pref as VPre, as they behave the same way. */ + for (unsigned int i = start; i < end; i++) + if (_hb_glyph_info_substituted (&info[i])) + { + info[i].use_category() = USE_VPre; + break; + } + } +} + +static inline bool +is_halant_use (const hb_glyph_info_t &info) +{ + return (info.use_category() == USE_H || info.use_category() == USE_HVM) && + !_hb_glyph_info_ligated (&info); +} + +static void +reorder_syllable_use (hb_buffer_t *buffer, unsigned int start, unsigned int end) +{ + use_syllable_type_t syllable_type = (use_syllable_type_t) (buffer->info[start].syllable() & 0x0F); + /* Only a few syllable types need reordering. */ + if (unlikely (!(FLAG_UNSAFE (syllable_type) & + (FLAG (use_virama_terminated_cluster) | + FLAG (use_sakot_terminated_cluster) | + FLAG (use_standard_cluster) | + FLAG (use_broken_cluster) | + 0)))) + return; + + hb_glyph_info_t *info = buffer->info; + +#define POST_BASE_FLAGS64 (FLAG64 (USE_FM) | \ + FLAG64 (USE_FAbv) | \ + FLAG64 (USE_FBlw) | \ + FLAG64 (USE_FPst) | \ + FLAG64 (USE_MAbv) | \ + FLAG64 (USE_MBlw) | \ + FLAG64 (USE_MPst) | \ + FLAG64 (USE_MPre) | \ + FLAG64 (USE_VAbv) | \ + FLAG64 (USE_VBlw) | \ + FLAG64 (USE_VPst) | \ + FLAG64 (USE_VPre) | \ + FLAG64 (USE_VMAbv) | \ + FLAG64 (USE_VMBlw) | \ + FLAG64 (USE_VMPst) | \ + FLAG64 (USE_VMPre)) + + /* Move things forward. */ + if (info[start].use_category() == USE_R && end - start > 1) + { + /* Got a repha. Reorder it towards the end, but before the first post-base + * glyph. */ + for (unsigned int i = start + 1; i < end; i++) + { + bool is_post_base_glyph = (FLAG64_UNSAFE (info[i].use_category()) & POST_BASE_FLAGS64) || + is_halant_use (info[i]); + if (is_post_base_glyph || i == end - 1) + { + /* If we hit a post-base glyph, move before it; otherwise move to the + * end. Shift things in between backward. */ + + if (is_post_base_glyph) + i--; + + buffer->merge_clusters (start, i + 1); + hb_glyph_info_t t = info[start]; + memmove (&info[start], &info[start + 1], (i - start) * sizeof (info[0])); + info[i] = t; + + break; + } + } + } + + /* Move things back. */ + unsigned int j = start; + for (unsigned int i = start; i < end; i++) + { + uint32_t flag = FLAG_UNSAFE (info[i].use_category()); + if (is_halant_use (info[i])) + { + /* If we hit a halant, move after it; otherwise move to the beginning, and + * shift things in between forward. */ + j = i + 1; + } + else if (((flag) & (FLAG (USE_VPre) | FLAG (USE_VMPre))) && + /* Only move the first component of a MultipleSubst. */ + 0 == _hb_glyph_info_get_lig_comp (&info[i]) && + j < i) + { + buffer->merge_clusters (j, i + 1); + hb_glyph_info_t t = info[i]; + memmove (&info[j + 1], &info[j], (i - j) * sizeof (info[0])); + info[j] = t; + } + } +} + +static inline void +insert_dotted_circles_use (const hb_ot_shape_plan_t *plan HB_UNUSED, + hb_font_t *font, + hb_buffer_t *buffer) +{ + if (unlikely (buffer->flags & HB_BUFFER_FLAG_DO_NOT_INSERT_DOTTED_CIRCLE)) + return; + + /* Note: This loop is extra overhead, but should not be measurable. + * TODO Use a buffer scratch flag to remove the loop. */ + bool has_broken_syllables = false; + unsigned int count = buffer->len; + hb_glyph_info_t *info = buffer->info; + for (unsigned int i = 0; i < count; i++) + if ((info[i].syllable() & 0x0F) == use_broken_cluster) + { + has_broken_syllables = true; + break; + } + if (likely (!has_broken_syllables)) + return; + + hb_glyph_info_t dottedcircle = {0}; + if (!font->get_nominal_glyph (0x25CCu, &dottedcircle.codepoint)) + return; + dottedcircle.use_category() = hb_use_get_category (0x25CC); + + buffer->clear_output (); + + buffer->idx = 0; + unsigned int last_syllable = 0; + while (buffer->idx < buffer->len && buffer->successful) + { + unsigned int syllable = buffer->cur().syllable(); + use_syllable_type_t syllable_type = (use_syllable_type_t) (syllable & 0x0F); + if (unlikely (last_syllable != syllable && syllable_type == use_broken_cluster)) + { + last_syllable = syllable; + + hb_glyph_info_t ginfo = dottedcircle; + ginfo.cluster = buffer->cur().cluster; + ginfo.mask = buffer->cur().mask; + ginfo.syllable() = buffer->cur().syllable(); + + /* Insert dottedcircle after possible Repha. */ + while (buffer->idx < buffer->len && buffer->successful && + last_syllable == buffer->cur().syllable() && + buffer->cur().use_category() == USE_R) + buffer->next_glyph (); + + buffer->output_info (ginfo); + } + else + buffer->next_glyph (); + } + buffer->swap_buffers (); +} + +static void +reorder_use (const hb_ot_shape_plan_t *plan, + hb_font_t *font, + hb_buffer_t *buffer) +{ + insert_dotted_circles_use (plan, font, buffer); + + foreach_syllable (buffer, start, end) + reorder_syllable_use (buffer, start, end); + + HB_BUFFER_DEALLOCATE_VAR (buffer, use_category); +} + + +static void +preprocess_text_use (const hb_ot_shape_plan_t *plan, + hb_buffer_t *buffer, + hb_font_t *font) +{ + _hb_preprocess_text_vowel_constraints (plan, buffer, font); +} + +static bool +compose_use (const hb_ot_shape_normalize_context_t *c, + hb_codepoint_t a, + hb_codepoint_t b, + hb_codepoint_t *ab) +{ + /* Avoid recomposing split matras. */ + if (HB_UNICODE_GENERAL_CATEGORY_IS_MARK (c->unicode->general_category (a))) + return false; + + return (bool)c->unicode->compose (a, b, ab); +} + + +const hb_ot_complex_shaper_t _hb_ot_complex_shaper_use = +{ + collect_features_use, + nullptr, /* override_features */ + data_create_use, + data_destroy_use, + preprocess_text_use, + nullptr, /* postprocess_glyphs */ + HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT, + nullptr, /* decompose */ + compose_use, + setup_masks_use, + HB_TAG_NONE, /* gpos_tag */ + nullptr, /* reorder_marks */ + HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_EARLY, + false, /* fallback_position */ +}; + + +#endif diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-complex-use.hh b/thirdparty/harfbuzz/src/hb-ot-shape-complex-use.hh new file mode 100644 index 0000000000..ce6645ecd3 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-shape-complex-use.hh @@ -0,0 +1,105 @@ +/* + * Copyright © 2015 Mozilla Foundation. + * Copyright © 2015 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Mozilla Author(s): Jonathan Kew + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_OT_SHAPE_COMPLEX_USE_HH +#define HB_OT_SHAPE_COMPLEX_USE_HH + +#include "hb.hh" + + +#include "hb-ot-shape-complex.hh" + + +#define USE_TABLE_ELEMENT_TYPE uint8_t + +/* Cateories used in the Universal Shaping Engine spec: + * https://docs.microsoft.com/en-us/typography/script-development/use + */ +/* Note: This enum is duplicated in the -machine.rl source file. + * Not sure how to avoid duplication. */ +enum use_category_t { + USE_O = 0, /* OTHER */ + + USE_B = 1, /* BASE */ + USE_IND = 3, /* BASE_IND */ + USE_N = 4, /* BASE_NUM */ + USE_GB = 5, /* BASE_OTHER */ + USE_CGJ = 6, /* CGJ */ +// USE_F = 7, /* CONS_FINAL */ + USE_FM = 8, /* CONS_FINAL_MOD */ +// USE_M = 9, /* CONS_MED */ +// USE_CM = 10, /* CONS_MOD */ + USE_SUB = 11, /* CONS_SUB */ + USE_H = 12, /* HALANT */ + + USE_HN = 13, /* HALANT_NUM */ + USE_ZWNJ = 14, /* Zero width non-joiner */ + USE_ZWJ = 15, /* Zero width joiner */ + USE_WJ = 16, /* Word joiner */ + USE_Rsv = 17, /* Reserved characters */ + USE_R = 18, /* REPHA */ + USE_S = 19, /* SYM */ +// USE_SM = 20, /* SYM_MOD */ + USE_VS = 21, /* VARIATION_SELECTOR */ +// USE_V = 36, /* VOWEL */ +// USE_VM = 40, /* VOWEL_MOD */ + USE_CS = 43, /* CONS_WITH_STACKER */ + + /* https://github.com/harfbuzz/harfbuzz/issues/1102 */ + USE_HVM = 44, /* HALANT_OR_VOWEL_MODIFIER */ + + USE_Sk = 48, /* SAKOT */ + + USE_FAbv = 24, /* CONS_FINAL_ABOVE */ + USE_FBlw = 25, /* CONS_FINAL_BELOW */ + USE_FPst = 26, /* CONS_FINAL_POST */ + USE_MAbv = 27, /* CONS_MED_ABOVE */ + USE_MBlw = 28, /* CONS_MED_BELOW */ + USE_MPst = 29, /* CONS_MED_POST */ + USE_MPre = 30, /* CONS_MED_PRE */ + USE_CMAbv = 31, /* CONS_MOD_ABOVE */ + USE_CMBlw = 32, /* CONS_MOD_BELOW */ + USE_VAbv = 33, /* VOWEL_ABOVE / VOWEL_ABOVE_BELOW / VOWEL_ABOVE_BELOW_POST / VOWEL_ABOVE_POST */ + USE_VBlw = 34, /* VOWEL_BELOW / VOWEL_BELOW_POST */ + USE_VPst = 35, /* VOWEL_POST UIPC = Right */ + USE_VPre = 22, /* VOWEL_PRE / VOWEL_PRE_ABOVE / VOWEL_PRE_ABOVE_POST / VOWEL_PRE_POST */ + USE_VMAbv = 37, /* VOWEL_MOD_ABOVE */ + USE_VMBlw = 38, /* VOWEL_MOD_BELOW */ + USE_VMPst = 39, /* VOWEL_MOD_POST */ + USE_VMPre = 23, /* VOWEL_MOD_PRE */ + USE_SMAbv = 41, /* SYM_MOD_ABOVE */ + USE_SMBlw = 42, /* SYM_MOD_BELOW */ + USE_FMAbv = 45, /* CONS_FINAL_MOD UIPC = Top */ + USE_FMBlw = 46, /* CONS_FINAL_MOD UIPC = Bottom */ + USE_FMPst = 47, /* CONS_FINAL_MOD UIPC = Not_Applicable */ +}; + +HB_INTERNAL USE_TABLE_ELEMENT_TYPE +hb_use_get_category (hb_codepoint_t u); + +#endif /* HB_OT_SHAPE_COMPLEX_USE_HH */ diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-complex-vowel-constraints.cc b/thirdparty/harfbuzz/src/hb-ot-shape-complex-vowel-constraints.cc new file mode 100644 index 0000000000..c3368c6ec2 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-shape-complex-vowel-constraints.cc @@ -0,0 +1,464 @@ +/* == Start of generated functions == */ +/* + * The following functions are generated by running: + * + * ./gen-vowel-constraints.py ms-use/IndicShapingInvalidCluster.txt Scripts.txt + * + * on files with these headers: + * + * # IndicShapingInvalidCluster.txt + * # Date: 2015-03-12, 21:17:00 GMT [AG] + * # Date: 2019-11-08, 23:22:00 GMT [AG] + * + * # Scripts-13.0.0.txt + * # Date: 2020-01-22, 00:07:43 GMT + */ + +#include "hb.hh" + +#ifndef HB_NO_OT_SHAPE + +#include "hb-ot-shape-complex-vowel-constraints.hh" + +static void +_output_dotted_circle (hb_buffer_t *buffer) +{ + hb_glyph_info_t &dottedcircle = buffer->output_glyph (0x25CCu); + _hb_glyph_info_reset_continuation (&dottedcircle); +} + +static void +_output_with_dotted_circle (hb_buffer_t *buffer) +{ + _output_dotted_circle (buffer); + buffer->next_glyph (); +} + +void +_hb_preprocess_text_vowel_constraints (const hb_ot_shape_plan_t *plan HB_UNUSED, + hb_buffer_t *buffer, + hb_font_t *font HB_UNUSED) +{ +#ifdef HB_NO_OT_SHAPE_COMPLEX_VOWEL_CONSTRAINTS + return; +#endif + if (buffer->flags & HB_BUFFER_FLAG_DO_NOT_INSERT_DOTTED_CIRCLE) + return; + + /* UGLY UGLY UGLY business of adding dotted-circle in the middle of + * vowel-sequences that look like another vowel. Data for each script + * collected from the USE script development spec. + * + * https://github.com/harfbuzz/harfbuzz/issues/1019 + */ + bool processed = false; + buffer->clear_output (); + unsigned int count = buffer->len; + switch ((unsigned) buffer->props.script) + { + case HB_SCRIPT_DEVANAGARI: + for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;) + { + bool matched = false; + switch (buffer->cur ().codepoint) + { + case 0x0905u: + switch (buffer->cur (1).codepoint) + { + case 0x093Au: case 0x093Bu: case 0x093Eu: case 0x0945u: + case 0x0946u: case 0x0949u: case 0x094Au: case 0x094Bu: + case 0x094Cu: case 0x094Fu: case 0x0956u: case 0x0957u: + matched = true; + break; + } + break; + case 0x0906u: + switch (buffer->cur (1).codepoint) + { + case 0x093Au: case 0x0945u: case 0x0946u: case 0x0947u: + case 0x0948u: + matched = true; + break; + } + break; + case 0x0909u: + matched = 0x0941u == buffer->cur (1).codepoint; + break; + case 0x090Fu: + switch (buffer->cur (1).codepoint) + { + case 0x0945u: case 0x0946u: case 0x0947u: + matched = true; + break; + } + break; + case 0x0930u: + if (0x094Du == buffer->cur (1).codepoint && + buffer->idx + 2 < count && + 0x0907u == buffer->cur (2).codepoint) + { + buffer->next_glyph (); + matched = true; + } + break; + } + buffer->next_glyph (); + if (matched) _output_with_dotted_circle (buffer); + } + processed = true; + break; + + case HB_SCRIPT_BENGALI: + for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;) + { + bool matched = false; + switch (buffer->cur ().codepoint) + { + case 0x0985u: + matched = 0x09BEu == buffer->cur (1).codepoint; + break; + case 0x098Bu: + matched = 0x09C3u == buffer->cur (1).codepoint; + break; + case 0x098Cu: + matched = 0x09E2u == buffer->cur (1).codepoint; + break; + } + buffer->next_glyph (); + if (matched) _output_with_dotted_circle (buffer); + } + processed = true; + break; + + case HB_SCRIPT_GURMUKHI: + for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;) + { + bool matched = false; + switch (buffer->cur ().codepoint) + { + case 0x0A05u: + switch (buffer->cur (1).codepoint) + { + case 0x0A3Eu: case 0x0A48u: case 0x0A4Cu: + matched = true; + break; + } + break; + case 0x0A72u: + switch (buffer->cur (1).codepoint) + { + case 0x0A3Fu: case 0x0A40u: case 0x0A47u: + matched = true; + break; + } + break; + case 0x0A73u: + switch (buffer->cur (1).codepoint) + { + case 0x0A41u: case 0x0A42u: case 0x0A4Bu: + matched = true; + break; + } + break; + } + buffer->next_glyph (); + if (matched) _output_with_dotted_circle (buffer); + } + processed = true; + break; + + case HB_SCRIPT_GUJARATI: + for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;) + { + bool matched = false; + switch (buffer->cur ().codepoint) + { + case 0x0A85u: + switch (buffer->cur (1).codepoint) + { + case 0x0ABEu: case 0x0AC5u: case 0x0AC7u: case 0x0AC8u: + case 0x0AC9u: case 0x0ACBu: case 0x0ACCu: + matched = true; + break; + } + break; + case 0x0AC5u: + matched = 0x0ABEu == buffer->cur (1).codepoint; + break; + } + buffer->next_glyph (); + if (matched) _output_with_dotted_circle (buffer); + } + processed = true; + break; + + case HB_SCRIPT_ORIYA: + for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;) + { + bool matched = false; + switch (buffer->cur ().codepoint) + { + case 0x0B05u: + matched = 0x0B3Eu == buffer->cur (1).codepoint; + break; + case 0x0B0Fu: case 0x0B13u: + matched = 0x0B57u == buffer->cur (1).codepoint; + break; + } + buffer->next_glyph (); + if (matched) _output_with_dotted_circle (buffer); + } + processed = true; + break; + + case HB_SCRIPT_TAMIL: + for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;) + { + bool matched = false; + if (0x0B85u == buffer->cur ().codepoint && + 0x0BC2u == buffer->cur (1).codepoint) + { + matched = true; + } + buffer->next_glyph (); + if (matched) _output_with_dotted_circle (buffer); + } + processed = true; + break; + + case HB_SCRIPT_TELUGU: + for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;) + { + bool matched = false; + switch (buffer->cur ().codepoint) + { + case 0x0C12u: + switch (buffer->cur (1).codepoint) + { + case 0x0C4Cu: case 0x0C55u: + matched = true; + break; + } + break; + case 0x0C3Fu: case 0x0C46u: case 0x0C4Au: + matched = 0x0C55u == buffer->cur (1).codepoint; + break; + } + buffer->next_glyph (); + if (matched) _output_with_dotted_circle (buffer); + } + processed = true; + break; + + case HB_SCRIPT_KANNADA: + for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;) + { + bool matched = false; + switch (buffer->cur ().codepoint) + { + case 0x0C89u: case 0x0C8Bu: + matched = 0x0CBEu == buffer->cur (1).codepoint; + break; + case 0x0C92u: + matched = 0x0CCCu == buffer->cur (1).codepoint; + break; + } + buffer->next_glyph (); + if (matched) _output_with_dotted_circle (buffer); + } + processed = true; + break; + + case HB_SCRIPT_MALAYALAM: + for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;) + { + bool matched = false; + switch (buffer->cur ().codepoint) + { + case 0x0D07u: case 0x0D09u: + matched = 0x0D57u == buffer->cur (1).codepoint; + break; + case 0x0D0Eu: + matched = 0x0D46u == buffer->cur (1).codepoint; + break; + case 0x0D12u: + switch (buffer->cur (1).codepoint) + { + case 0x0D3Eu: case 0x0D57u: + matched = true; + break; + } + break; + } + buffer->next_glyph (); + if (matched) _output_with_dotted_circle (buffer); + } + processed = true; + break; + + case HB_SCRIPT_SINHALA: + for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;) + { + bool matched = false; + switch (buffer->cur ().codepoint) + { + case 0x0D85u: + switch (buffer->cur (1).codepoint) + { + case 0x0DCFu: case 0x0DD0u: case 0x0DD1u: + matched = true; + break; + } + break; + case 0x0D8Bu: case 0x0D8Fu: case 0x0D94u: + matched = 0x0DDFu == buffer->cur (1).codepoint; + break; + case 0x0D8Du: + matched = 0x0DD8u == buffer->cur (1).codepoint; + break; + case 0x0D91u: + switch (buffer->cur (1).codepoint) + { + case 0x0DCAu: case 0x0DD9u: case 0x0DDAu: case 0x0DDCu: + case 0x0DDDu: + matched = true; + break; + } + break; + } + buffer->next_glyph (); + if (matched) _output_with_dotted_circle (buffer); + } + processed = true; + break; + + case HB_SCRIPT_BRAHMI: + for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;) + { + bool matched = false; + switch (buffer->cur ().codepoint) + { + case 0x11005u: + matched = 0x11038u == buffer->cur (1).codepoint; + break; + case 0x1100Bu: + matched = 0x1103Eu == buffer->cur (1).codepoint; + break; + case 0x1100Fu: + matched = 0x11042u == buffer->cur (1).codepoint; + break; + } + buffer->next_glyph (); + if (matched) _output_with_dotted_circle (buffer); + } + processed = true; + break; + + case HB_SCRIPT_KHUDAWADI: + for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;) + { + bool matched = false; + switch (buffer->cur ().codepoint) + { + case 0x112B0u: + switch (buffer->cur (1).codepoint) + { + case 0x112E0u: case 0x112E5u: case 0x112E6u: case 0x112E7u: + case 0x112E8u: + matched = true; + break; + } + break; + } + buffer->next_glyph (); + if (matched) _output_with_dotted_circle (buffer); + } + processed = true; + break; + + case HB_SCRIPT_TIRHUTA: + for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;) + { + bool matched = false; + switch (buffer->cur ().codepoint) + { + case 0x11481u: + matched = 0x114B0u == buffer->cur (1).codepoint; + break; + case 0x1148Bu: case 0x1148Du: + matched = 0x114BAu == buffer->cur (1).codepoint; + break; + case 0x114AAu: + switch (buffer->cur (1).codepoint) + { + case 0x114B5u: case 0x114B6u: + matched = true; + break; + } + break; + } + buffer->next_glyph (); + if (matched) _output_with_dotted_circle (buffer); + } + processed = true; + break; + + case HB_SCRIPT_MODI: + for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;) + { + bool matched = false; + switch (buffer->cur ().codepoint) + { + case 0x11600u: case 0x11601u: + switch (buffer->cur (1).codepoint) + { + case 0x11639u: case 0x1163Au: + matched = true; + break; + } + break; + } + buffer->next_glyph (); + if (matched) _output_with_dotted_circle (buffer); + } + processed = true; + break; + + case HB_SCRIPT_TAKRI: + for (buffer->idx = 0; buffer->idx + 1 < count && buffer->successful;) + { + bool matched = false; + switch (buffer->cur ().codepoint) + { + case 0x11680u: + switch (buffer->cur (1).codepoint) + { + case 0x116ADu: case 0x116B4u: case 0x116B5u: + matched = true; + break; + } + break; + case 0x11686u: + matched = 0x116B2u == buffer->cur (1).codepoint; + break; + } + buffer->next_glyph (); + if (matched) _output_with_dotted_circle (buffer); + } + processed = true; + break; + + default: + break; + } + if (processed) + { + if (buffer->idx < count) + buffer->next_glyph (); + buffer->swap_buffers (); + } +} + + +#endif +/* == End of generated functions == */ diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-complex-vowel-constraints.hh b/thirdparty/harfbuzz/src/hb-ot-shape-complex-vowel-constraints.hh new file mode 100644 index 0000000000..d9082d4ead --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-shape-complex-vowel-constraints.hh @@ -0,0 +1,39 @@ +/* + * Copyright © 2018 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_OT_SHAPE_COMPLEX_VOWEL_CONSTRAINTS_HH +#define HB_OT_SHAPE_COMPLEX_VOWEL_CONSTRAINTS_HH + +#include "hb.hh" + +#include "hb-ot-shape-complex.hh" + +HB_INTERNAL void +_hb_preprocess_text_vowel_constraints (const hb_ot_shape_plan_t *plan, + hb_buffer_t *buffer, + hb_font_t *font); + +#endif /* HB_OT_SHAPE_COMPLEX_VOWEL_CONSTRAINTS_HH */ diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-complex.hh b/thirdparty/harfbuzz/src/hb-ot-shape-complex.hh new file mode 100644 index 0000000000..61f4c0e158 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-shape-complex.hh @@ -0,0 +1,402 @@ +/* + * Copyright © 2010,2011,2012 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_OT_SHAPE_COMPLEX_HH +#define HB_OT_SHAPE_COMPLEX_HH + +#include "hb.hh" + +#include "hb-ot-layout.hh" +#include "hb-ot-shape.hh" +#include "hb-ot-shape-normalize.hh" + + +/* buffer var allocations, used by complex shapers */ +#define complex_var_u8_0() var2.u8[2] +#define complex_var_u8_1() var2.u8[3] + + +#define HB_OT_SHAPE_COMPLEX_MAX_COMBINING_MARKS 32 + +enum hb_ot_shape_zero_width_marks_type_t { + HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE, + HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_EARLY, + HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_LATE +}; + + +/* Master OT shaper list */ +#define HB_COMPLEX_SHAPERS_IMPLEMENT_SHAPERS \ + HB_COMPLEX_SHAPER_IMPLEMENT (arabic) \ + HB_COMPLEX_SHAPER_IMPLEMENT (default) \ + HB_COMPLEX_SHAPER_IMPLEMENT (dumber) \ + HB_COMPLEX_SHAPER_IMPLEMENT (hangul) \ + HB_COMPLEX_SHAPER_IMPLEMENT (hebrew) \ + HB_COMPLEX_SHAPER_IMPLEMENT (indic) \ + HB_COMPLEX_SHAPER_IMPLEMENT (khmer) \ + HB_COMPLEX_SHAPER_IMPLEMENT (myanmar) \ + HB_COMPLEX_SHAPER_IMPLEMENT (myanmar_zawgyi) \ + HB_COMPLEX_SHAPER_IMPLEMENT (thai) \ + HB_COMPLEX_SHAPER_IMPLEMENT (use) \ + /* ^--- Add new shapers here; keep sorted. */ + + +struct hb_ot_complex_shaper_t +{ + /* collect_features() + * Called during shape_plan(). + * Shapers should use plan->map to add their features and callbacks. + * May be NULL. + */ + void (*collect_features) (hb_ot_shape_planner_t *plan); + + /* override_features() + * Called during shape_plan(). + * Shapers should use plan->map to override features and add callbacks after + * common features are added. + * May be NULL. + */ + void (*override_features) (hb_ot_shape_planner_t *plan); + + + /* data_create() + * Called at the end of shape_plan(). + * Whatever shapers return will be accessible through plan->data later. + * If nullptr is returned, means a plan failure. + */ + void *(*data_create) (const hb_ot_shape_plan_t *plan); + + /* data_destroy() + * Called when the shape_plan is being destroyed. + * plan->data is passed here for destruction. + * If nullptr is returned, means a plan failure. + * May be NULL. + */ + void (*data_destroy) (void *data); + + + /* preprocess_text() + * Called during shape(). + * Shapers can use to modify text before shaping starts. + * May be NULL. + */ + void (*preprocess_text) (const hb_ot_shape_plan_t *plan, + hb_buffer_t *buffer, + hb_font_t *font); + + /* postprocess_glyphs() + * Called during shape(). + * Shapers can use to modify glyphs after shaping ends. + * May be NULL. + */ + void (*postprocess_glyphs) (const hb_ot_shape_plan_t *plan, + hb_buffer_t *buffer, + hb_font_t *font); + + + hb_ot_shape_normalization_mode_t normalization_preference; + + /* decompose() + * Called during shape()'s normalization. + * May be NULL. + */ + bool (*decompose) (const hb_ot_shape_normalize_context_t *c, + hb_codepoint_t ab, + hb_codepoint_t *a, + hb_codepoint_t *b); + + /* compose() + * Called during shape()'s normalization. + * May be NULL. + */ + bool (*compose) (const hb_ot_shape_normalize_context_t *c, + hb_codepoint_t a, + hb_codepoint_t b, + hb_codepoint_t *ab); + + /* setup_masks() + * Called during shape(). + * Shapers should use map to get feature masks and set on buffer. + * Shapers may NOT modify characters. + * May be NULL. + */ + void (*setup_masks) (const hb_ot_shape_plan_t *plan, + hb_buffer_t *buffer, + hb_font_t *font); + + /* gpos_tag() + * If not HB_TAG_NONE, then must match found GPOS script tag for + * GPOS to be applied. Otherwise, fallback positioning will be used. + */ + hb_tag_t gpos_tag; + + /* reorder_marks() + * Called during shape(). + * Shapers can use to modify ordering of combining marks. + * May be NULL. + */ + void (*reorder_marks) (const hb_ot_shape_plan_t *plan, + hb_buffer_t *buffer, + unsigned int start, + unsigned int end); + + hb_ot_shape_zero_width_marks_type_t zero_width_marks; + + bool fallback_position; +}; + +#define HB_COMPLEX_SHAPER_IMPLEMENT(name) extern HB_INTERNAL const hb_ot_complex_shaper_t _hb_ot_complex_shaper_##name; +HB_COMPLEX_SHAPERS_IMPLEMENT_SHAPERS +#undef HB_COMPLEX_SHAPER_IMPLEMENT + + +static inline const hb_ot_complex_shaper_t * +hb_ot_shape_complex_categorize (const hb_ot_shape_planner_t *planner) +{ + switch ((hb_tag_t) planner->props.script) + { + default: + return &_hb_ot_complex_shaper_default; + + + /* Unicode-1.1 additions */ + case HB_SCRIPT_ARABIC: + + /* Unicode-3.0 additions */ + case HB_SCRIPT_MONGOLIAN: + case HB_SCRIPT_SYRIAC: + + /* Unicode-5.0 additions */ + case HB_SCRIPT_NKO: + case HB_SCRIPT_PHAGS_PA: + + /* Unicode-6.0 additions */ + case HB_SCRIPT_MANDAIC: + + /* Unicode-7.0 additions */ + case HB_SCRIPT_MANICHAEAN: + case HB_SCRIPT_PSALTER_PAHLAVI: + + /* Unicode-9.0 additions */ + case HB_SCRIPT_ADLAM: + + /* Unicode-11.0 additions */ + case HB_SCRIPT_HANIFI_ROHINGYA: + case HB_SCRIPT_SOGDIAN: + + /* For Arabic script, use the Arabic shaper even if no OT script tag was found. + * This is because we do fallback shaping for Arabic script (and not others). + * But note that Arabic shaping is applicable only to horizontal layout; for + * vertical text, just use the generic shaper instead. */ + if ((planner->map.chosen_script[0] != HB_OT_TAG_DEFAULT_SCRIPT || + planner->props.script == HB_SCRIPT_ARABIC) && + HB_DIRECTION_IS_HORIZONTAL(planner->props.direction)) + return &_hb_ot_complex_shaper_arabic; + else + return &_hb_ot_complex_shaper_default; + + + /* Unicode-1.1 additions */ + case HB_SCRIPT_THAI: + case HB_SCRIPT_LAO: + + return &_hb_ot_complex_shaper_thai; + + + /* Unicode-1.1 additions */ + case HB_SCRIPT_HANGUL: + + return &_hb_ot_complex_shaper_hangul; + + + /* Unicode-1.1 additions */ + case HB_SCRIPT_HEBREW: + + return &_hb_ot_complex_shaper_hebrew; + + + /* Unicode-1.1 additions */ + case HB_SCRIPT_BENGALI: + case HB_SCRIPT_DEVANAGARI: + case HB_SCRIPT_GUJARATI: + case HB_SCRIPT_GURMUKHI: + case HB_SCRIPT_KANNADA: + case HB_SCRIPT_MALAYALAM: + case HB_SCRIPT_ORIYA: + case HB_SCRIPT_TAMIL: + case HB_SCRIPT_TELUGU: + + /* Unicode-3.0 additions */ + case HB_SCRIPT_SINHALA: + + /* If the designer designed the font for the 'DFLT' script, + * (or we ended up arbitrarily pick 'latn'), use the default shaper. + * Otherwise, use the specific shaper. + * + * If it's indy3 tag, send to USE. */ + if (planner->map.chosen_script[0] == HB_TAG ('D','F','L','T') || + planner->map.chosen_script[0] == HB_TAG ('l','a','t','n')) + return &_hb_ot_complex_shaper_default; + else if ((planner->map.chosen_script[0] & 0x000000FF) == '3') + return &_hb_ot_complex_shaper_use; + else + return &_hb_ot_complex_shaper_indic; + + case HB_SCRIPT_KHMER: + return &_hb_ot_complex_shaper_khmer; + + case HB_SCRIPT_MYANMAR: + /* If the designer designed the font for the 'DFLT' script, + * (or we ended up arbitrarily pick 'latn'), use the default shaper. + * Otherwise, use the specific shaper. + * + * If designer designed for 'mymr' tag, also send to default + * shaper. That's tag used from before Myanmar shaping spec + * was developed. The shaping spec uses 'mym2' tag. */ + if (planner->map.chosen_script[0] == HB_TAG ('D','F','L','T') || + planner->map.chosen_script[0] == HB_TAG ('l','a','t','n') || + planner->map.chosen_script[0] == HB_TAG ('m','y','m','r')) + return &_hb_ot_complex_shaper_default; + else + return &_hb_ot_complex_shaper_myanmar; + + + /* https://github.com/harfbuzz/harfbuzz/issues/1162 */ + case HB_SCRIPT_MYANMAR_ZAWGYI: + + return &_hb_ot_complex_shaper_myanmar_zawgyi; + + + /* Unicode-2.0 additions */ + case HB_SCRIPT_TIBETAN: + + /* Unicode-3.0 additions */ + //case HB_SCRIPT_MONGOLIAN: + //case HB_SCRIPT_SINHALA: + + /* Unicode-3.2 additions */ + case HB_SCRIPT_BUHID: + case HB_SCRIPT_HANUNOO: + case HB_SCRIPT_TAGALOG: + case HB_SCRIPT_TAGBANWA: + + /* Unicode-4.0 additions */ + case HB_SCRIPT_LIMBU: + case HB_SCRIPT_TAI_LE: + + /* Unicode-4.1 additions */ + case HB_SCRIPT_BUGINESE: + case HB_SCRIPT_KHAROSHTHI: + case HB_SCRIPT_SYLOTI_NAGRI: + case HB_SCRIPT_TIFINAGH: + + /* Unicode-5.0 additions */ + case HB_SCRIPT_BALINESE: + //case HB_SCRIPT_NKO: + //case HB_SCRIPT_PHAGS_PA: + + /* Unicode-5.1 additions */ + case HB_SCRIPT_CHAM: + case HB_SCRIPT_KAYAH_LI: + case HB_SCRIPT_LEPCHA: + case HB_SCRIPT_REJANG: + case HB_SCRIPT_SAURASHTRA: + case HB_SCRIPT_SUNDANESE: + + /* Unicode-5.2 additions */ + case HB_SCRIPT_EGYPTIAN_HIEROGLYPHS: + case HB_SCRIPT_JAVANESE: + case HB_SCRIPT_KAITHI: + case HB_SCRIPT_MEETEI_MAYEK: + case HB_SCRIPT_TAI_THAM: + case HB_SCRIPT_TAI_VIET: + + /* Unicode-6.0 additions */ + case HB_SCRIPT_BATAK: + case HB_SCRIPT_BRAHMI: + //case HB_SCRIPT_MANDAIC: + + /* Unicode-6.1 additions */ + case HB_SCRIPT_CHAKMA: + case HB_SCRIPT_SHARADA: + case HB_SCRIPT_TAKRI: + + /* Unicode-7.0 additions */ + case HB_SCRIPT_DUPLOYAN: + case HB_SCRIPT_GRANTHA: + case HB_SCRIPT_KHOJKI: + case HB_SCRIPT_KHUDAWADI: + case HB_SCRIPT_MAHAJANI: + //case HB_SCRIPT_MANICHAEAN: + case HB_SCRIPT_MODI: + case HB_SCRIPT_PAHAWH_HMONG: + //case HB_SCRIPT_PSALTER_PAHLAVI: + case HB_SCRIPT_SIDDHAM: + case HB_SCRIPT_TIRHUTA: + + /* Unicode-8.0 additions */ + case HB_SCRIPT_AHOM: + + /* Unicode-9.0 additions */ + //case HB_SCRIPT_ADLAM: + case HB_SCRIPT_BHAIKSUKI: + case HB_SCRIPT_MARCHEN: + case HB_SCRIPT_NEWA: + + /* Unicode-10.0 additions */ + case HB_SCRIPT_MASARAM_GONDI: + case HB_SCRIPT_SOYOMBO: + case HB_SCRIPT_ZANABAZAR_SQUARE: + + /* Unicode-11.0 additions */ + case HB_SCRIPT_DOGRA: + case HB_SCRIPT_GUNJALA_GONDI: + //case HB_SCRIPT_HANIFI_ROHINGYA: + case HB_SCRIPT_MAKASAR: + //case HB_SCRIPT_SOGDIAN: + + /* Unicode-12.0 additions */ + case HB_SCRIPT_NANDINAGARI: + + /* Unicode-13.0 additions */ + case HB_SCRIPT_CHORASMIAN: + case HB_SCRIPT_DIVES_AKURU: + + /* If the designer designed the font for the 'DFLT' script, + * (or we ended up arbitrarily pick 'latn'), use the default shaper. + * Otherwise, use the specific shaper. + * Note that for some simple scripts, there may not be *any* + * GSUB/GPOS needed, so there may be no scripts found! */ + if (planner->map.chosen_script[0] == HB_TAG ('D','F','L','T') || + planner->map.chosen_script[0] == HB_TAG ('l','a','t','n')) + return &_hb_ot_complex_shaper_default; + else + return &_hb_ot_complex_shaper_use; + } +} + + +#endif /* HB_OT_SHAPE_COMPLEX_HH */ diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-fallback.cc b/thirdparty/harfbuzz/src/hb-ot-shape-fallback.cc new file mode 100644 index 0000000000..42bf524d16 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-shape-fallback.cc @@ -0,0 +1,596 @@ +/* + * Copyright © 2011,2012 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#include "hb.hh" + +#ifndef HB_NO_OT_SHAPE + +#include "hb-ot-shape-fallback.hh" +#include "hb-kern.hh" + +static unsigned int +recategorize_combining_class (hb_codepoint_t u, + unsigned int klass) +{ + if (klass >= 200) + return klass; + + /* Thai / Lao need some per-character work. */ + if ((u & ~0xFF) == 0x0E00u) + { + if (unlikely (klass == 0)) + { + switch (u) + { + case 0x0E31u: + case 0x0E34u: + case 0x0E35u: + case 0x0E36u: + case 0x0E37u: + case 0x0E47u: + case 0x0E4Cu: + case 0x0E4Du: + case 0x0E4Eu: + klass = HB_UNICODE_COMBINING_CLASS_ABOVE_RIGHT; + break; + + case 0x0EB1u: + case 0x0EB4u: + case 0x0EB5u: + case 0x0EB6u: + case 0x0EB7u: + case 0x0EBBu: + case 0x0ECCu: + case 0x0ECDu: + klass = HB_UNICODE_COMBINING_CLASS_ABOVE; + break; + + case 0x0EBCu: + klass = HB_UNICODE_COMBINING_CLASS_BELOW; + break; + } + } else { + /* Thai virama is below-right */ + if (u == 0x0E3Au) + klass = HB_UNICODE_COMBINING_CLASS_BELOW_RIGHT; + } + } + + switch (klass) + { + + /* Hebrew */ + + case HB_MODIFIED_COMBINING_CLASS_CCC10: /* sheva */ + case HB_MODIFIED_COMBINING_CLASS_CCC11: /* hataf segol */ + case HB_MODIFIED_COMBINING_CLASS_CCC12: /* hataf patah */ + case HB_MODIFIED_COMBINING_CLASS_CCC13: /* hataf qamats */ + case HB_MODIFIED_COMBINING_CLASS_CCC14: /* hiriq */ + case HB_MODIFIED_COMBINING_CLASS_CCC15: /* tsere */ + case HB_MODIFIED_COMBINING_CLASS_CCC16: /* segol */ + case HB_MODIFIED_COMBINING_CLASS_CCC17: /* patah */ + case HB_MODIFIED_COMBINING_CLASS_CCC18: /* qamats */ + case HB_MODIFIED_COMBINING_CLASS_CCC20: /* qubuts */ + case HB_MODIFIED_COMBINING_CLASS_CCC22: /* meteg */ + return HB_UNICODE_COMBINING_CLASS_BELOW; + + case HB_MODIFIED_COMBINING_CLASS_CCC23: /* rafe */ + return HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE; + + case HB_MODIFIED_COMBINING_CLASS_CCC24: /* shin dot */ + return HB_UNICODE_COMBINING_CLASS_ABOVE_RIGHT; + + case HB_MODIFIED_COMBINING_CLASS_CCC25: /* sin dot */ + case HB_MODIFIED_COMBINING_CLASS_CCC19: /* holam */ + return HB_UNICODE_COMBINING_CLASS_ABOVE_LEFT; + + case HB_MODIFIED_COMBINING_CLASS_CCC26: /* point varika */ + return HB_UNICODE_COMBINING_CLASS_ABOVE; + + case HB_MODIFIED_COMBINING_CLASS_CCC21: /* dagesh */ + break; + + + /* Arabic and Syriac */ + + case HB_MODIFIED_COMBINING_CLASS_CCC27: /* fathatan */ + case HB_MODIFIED_COMBINING_CLASS_CCC28: /* dammatan */ + case HB_MODIFIED_COMBINING_CLASS_CCC30: /* fatha */ + case HB_MODIFIED_COMBINING_CLASS_CCC31: /* damma */ + case HB_MODIFIED_COMBINING_CLASS_CCC33: /* shadda */ + case HB_MODIFIED_COMBINING_CLASS_CCC34: /* sukun */ + case HB_MODIFIED_COMBINING_CLASS_CCC35: /* superscript alef */ + case HB_MODIFIED_COMBINING_CLASS_CCC36: /* superscript alaph */ + return HB_UNICODE_COMBINING_CLASS_ABOVE; + + case HB_MODIFIED_COMBINING_CLASS_CCC29: /* kasratan */ + case HB_MODIFIED_COMBINING_CLASS_CCC32: /* kasra */ + return HB_UNICODE_COMBINING_CLASS_BELOW; + + + /* Thai */ + + case HB_MODIFIED_COMBINING_CLASS_CCC103: /* sara u / sara uu */ + return HB_UNICODE_COMBINING_CLASS_BELOW_RIGHT; + + case HB_MODIFIED_COMBINING_CLASS_CCC107: /* mai */ + return HB_UNICODE_COMBINING_CLASS_ABOVE_RIGHT; + + + /* Lao */ + + case HB_MODIFIED_COMBINING_CLASS_CCC118: /* sign u / sign uu */ + return HB_UNICODE_COMBINING_CLASS_BELOW; + + case HB_MODIFIED_COMBINING_CLASS_CCC122: /* mai */ + return HB_UNICODE_COMBINING_CLASS_ABOVE; + + + /* Tibetan */ + + case HB_MODIFIED_COMBINING_CLASS_CCC129: /* sign aa */ + return HB_UNICODE_COMBINING_CLASS_BELOW; + + case HB_MODIFIED_COMBINING_CLASS_CCC130: /* sign i*/ + return HB_UNICODE_COMBINING_CLASS_ABOVE; + + case HB_MODIFIED_COMBINING_CLASS_CCC132: /* sign u */ + return HB_UNICODE_COMBINING_CLASS_BELOW; + + } + + return klass; +} + +void +_hb_ot_shape_fallback_mark_position_recategorize_marks (const hb_ot_shape_plan_t *plan HB_UNUSED, + hb_font_t *font HB_UNUSED, + hb_buffer_t *buffer) +{ +#ifdef HB_NO_OT_SHAPE_FALLBACK + return; +#endif + + unsigned int count = buffer->len; + hb_glyph_info_t *info = buffer->info; + for (unsigned int i = 0; i < count; i++) + if (_hb_glyph_info_get_general_category (&info[i]) == HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK) { + unsigned int combining_class = _hb_glyph_info_get_modified_combining_class (&info[i]); + combining_class = recategorize_combining_class (info[i].codepoint, combining_class); + _hb_glyph_info_set_modified_combining_class (&info[i], combining_class); + } +} + + +static void +zero_mark_advances (hb_buffer_t *buffer, + unsigned int start, + unsigned int end, + bool adjust_offsets_when_zeroing) +{ + hb_glyph_info_t *info = buffer->info; + for (unsigned int i = start; i < end; i++) + if (_hb_glyph_info_get_general_category (&info[i]) == HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK) + { + if (adjust_offsets_when_zeroing) + { + buffer->pos[i].x_offset -= buffer->pos[i].x_advance; + buffer->pos[i].y_offset -= buffer->pos[i].y_advance; + } + buffer->pos[i].x_advance = 0; + buffer->pos[i].y_advance = 0; + } +} + +static inline void +position_mark (const hb_ot_shape_plan_t *plan HB_UNUSED, + hb_font_t *font, + hb_buffer_t *buffer, + hb_glyph_extents_t &base_extents, + unsigned int i, + unsigned int combining_class) +{ + hb_glyph_extents_t mark_extents; + if (!font->get_glyph_extents (buffer->info[i].codepoint, &mark_extents)) + return; + + hb_position_t y_gap = font->y_scale / 16; + + hb_glyph_position_t &pos = buffer->pos[i]; + pos.x_offset = pos.y_offset = 0; + + + /* We don't position LEFT and RIGHT marks. */ + + /* X positioning */ + switch (combining_class) + { + case HB_UNICODE_COMBINING_CLASS_DOUBLE_BELOW: + case HB_UNICODE_COMBINING_CLASS_DOUBLE_ABOVE: + if (buffer->props.direction == HB_DIRECTION_LTR) { + pos.x_offset += base_extents.x_bearing + base_extents.width - mark_extents.width / 2 - mark_extents.x_bearing; + break; + } else if (buffer->props.direction == HB_DIRECTION_RTL) { + pos.x_offset += base_extents.x_bearing - mark_extents.width / 2 - mark_extents.x_bearing; + break; + } + HB_FALLTHROUGH; + + default: + case HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW: + case HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE: + case HB_UNICODE_COMBINING_CLASS_BELOW: + case HB_UNICODE_COMBINING_CLASS_ABOVE: + /* Center align. */ + pos.x_offset += base_extents.x_bearing + (base_extents.width - mark_extents.width) / 2 - mark_extents.x_bearing; + break; + + case HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW_LEFT: + case HB_UNICODE_COMBINING_CLASS_BELOW_LEFT: + case HB_UNICODE_COMBINING_CLASS_ABOVE_LEFT: + /* Left align. */ + pos.x_offset += base_extents.x_bearing - mark_extents.x_bearing; + break; + + case HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE_RIGHT: + case HB_UNICODE_COMBINING_CLASS_BELOW_RIGHT: + case HB_UNICODE_COMBINING_CLASS_ABOVE_RIGHT: + /* Right align. */ + pos.x_offset += base_extents.x_bearing + base_extents.width - mark_extents.width - mark_extents.x_bearing; + break; + } + + /* Y positioning */ + switch (combining_class) + { + case HB_UNICODE_COMBINING_CLASS_DOUBLE_BELOW: + case HB_UNICODE_COMBINING_CLASS_BELOW_LEFT: + case HB_UNICODE_COMBINING_CLASS_BELOW: + case HB_UNICODE_COMBINING_CLASS_BELOW_RIGHT: + /* Add gap, fall-through. */ + base_extents.height -= y_gap; + HB_FALLTHROUGH; + + case HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW_LEFT: + case HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW: + pos.y_offset = base_extents.y_bearing + base_extents.height - mark_extents.y_bearing; + /* Never shift up "below" marks. */ + if ((y_gap > 0) == (pos.y_offset > 0)) + { + base_extents.height -= pos.y_offset; + pos.y_offset = 0; + } + base_extents.height += mark_extents.height; + break; + + case HB_UNICODE_COMBINING_CLASS_DOUBLE_ABOVE: + case HB_UNICODE_COMBINING_CLASS_ABOVE_LEFT: + case HB_UNICODE_COMBINING_CLASS_ABOVE: + case HB_UNICODE_COMBINING_CLASS_ABOVE_RIGHT: + /* Add gap, fall-through. */ + base_extents.y_bearing += y_gap; + base_extents.height -= y_gap; + HB_FALLTHROUGH; + + case HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE: + case HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE_RIGHT: + pos.y_offset = base_extents.y_bearing - (mark_extents.y_bearing + mark_extents.height); + /* Don't shift down "above" marks too much. */ + if ((y_gap > 0) != (pos.y_offset > 0)) + { + unsigned int correction = -pos.y_offset / 2; + base_extents.y_bearing += correction; + base_extents.height -= correction; + pos.y_offset += correction; + } + base_extents.y_bearing -= mark_extents.height; + base_extents.height += mark_extents.height; + break; + } +} + +static inline void +position_around_base (const hb_ot_shape_plan_t *plan, + hb_font_t *font, + hb_buffer_t *buffer, + unsigned int base, + unsigned int end, + bool adjust_offsets_when_zeroing) +{ + hb_direction_t horiz_dir = HB_DIRECTION_INVALID; + + buffer->unsafe_to_break (base, end); + + hb_glyph_extents_t base_extents; + if (!font->get_glyph_extents (buffer->info[base].codepoint, + &base_extents)) + { + /* If extents don't work, zero marks and go home. */ + zero_mark_advances (buffer, base + 1, end, adjust_offsets_when_zeroing); + return; + } + base_extents.y_bearing += buffer->pos[base].y_offset; + /* Use horizontal advance for horizontal positioning. + * Generally a better idea. Also works for zero-ink glyphs. See: + * https://github.com/harfbuzz/harfbuzz/issues/1532 */ + base_extents.x_bearing = 0; + base_extents.width = font->get_glyph_h_advance (buffer->info[base].codepoint); + + unsigned int lig_id = _hb_glyph_info_get_lig_id (&buffer->info[base]); + /* Use integer for num_lig_components such that it doesn't convert to unsigned + * when we divide or multiply by it. */ + int num_lig_components = _hb_glyph_info_get_lig_num_comps (&buffer->info[base]); + + hb_position_t x_offset = 0, y_offset = 0; + if (HB_DIRECTION_IS_FORWARD (buffer->props.direction)) { + x_offset -= buffer->pos[base].x_advance; + y_offset -= buffer->pos[base].y_advance; + } + + hb_glyph_extents_t component_extents = base_extents; + int last_lig_component = -1; + unsigned int last_combining_class = 255; + hb_glyph_extents_t cluster_extents = base_extents; /* Initialization is just to shut gcc up. */ + hb_glyph_info_t *info = buffer->info; + for (unsigned int i = base + 1; i < end; i++) + if (_hb_glyph_info_get_modified_combining_class (&info[i])) + { + if (num_lig_components > 1) { + unsigned int this_lig_id = _hb_glyph_info_get_lig_id (&info[i]); + int this_lig_component = _hb_glyph_info_get_lig_comp (&info[i]) - 1; + /* Conditions for attaching to the last component. */ + if (!lig_id || lig_id != this_lig_id || this_lig_component >= num_lig_components) + this_lig_component = num_lig_components - 1; + if (last_lig_component != this_lig_component) + { + last_lig_component = this_lig_component; + last_combining_class = 255; + component_extents = base_extents; + if (unlikely (horiz_dir == HB_DIRECTION_INVALID)) { + if (HB_DIRECTION_IS_HORIZONTAL (plan->props.direction)) + horiz_dir = plan->props.direction; + else + horiz_dir = hb_script_get_horizontal_direction (plan->props.script); + } + if (horiz_dir == HB_DIRECTION_LTR) + component_extents.x_bearing += (this_lig_component * component_extents.width) / num_lig_components; + else + component_extents.x_bearing += ((num_lig_components - 1 - this_lig_component) * component_extents.width) / num_lig_components; + component_extents.width /= num_lig_components; + } + } + + unsigned int this_combining_class = _hb_glyph_info_get_modified_combining_class (&info[i]); + if (last_combining_class != this_combining_class) + { + last_combining_class = this_combining_class; + cluster_extents = component_extents; + } + + position_mark (plan, font, buffer, cluster_extents, i, this_combining_class); + + buffer->pos[i].x_advance = 0; + buffer->pos[i].y_advance = 0; + buffer->pos[i].x_offset += x_offset; + buffer->pos[i].y_offset += y_offset; + + } else { + if (HB_DIRECTION_IS_FORWARD (buffer->props.direction)) { + x_offset -= buffer->pos[i].x_advance; + y_offset -= buffer->pos[i].y_advance; + } else { + x_offset += buffer->pos[i].x_advance; + y_offset += buffer->pos[i].y_advance; + } + } +} + +static inline void +position_cluster (const hb_ot_shape_plan_t *plan, + hb_font_t *font, + hb_buffer_t *buffer, + unsigned int start, + unsigned int end, + bool adjust_offsets_when_zeroing) +{ + if (end - start < 2) + return; + + /* Find the base glyph */ + hb_glyph_info_t *info = buffer->info; + for (unsigned int i = start; i < end; i++) + if (!_hb_glyph_info_is_unicode_mark (&info[i])) + { + /* Find mark glyphs */ + unsigned int j; + for (j = i + 1; j < end; j++) + if (!_hb_glyph_info_is_unicode_mark (&info[j])) + break; + + position_around_base (plan, font, buffer, i, j, adjust_offsets_when_zeroing); + + i = j - 1; + } +} + +void +_hb_ot_shape_fallback_mark_position (const hb_ot_shape_plan_t *plan, + hb_font_t *font, + hb_buffer_t *buffer, + bool adjust_offsets_when_zeroing) +{ +#ifdef HB_NO_OT_SHAPE_FALLBACK + return; +#endif + + _hb_buffer_assert_gsubgpos_vars (buffer); + + unsigned int start = 0; + unsigned int count = buffer->len; + hb_glyph_info_t *info = buffer->info; + for (unsigned int i = 1; i < count; i++) + if (likely (!_hb_glyph_info_is_unicode_mark (&info[i]))) { + position_cluster (plan, font, buffer, start, i, adjust_offsets_when_zeroing); + start = i; + } + position_cluster (plan, font, buffer, start, count, adjust_offsets_when_zeroing); +} + + +#ifndef HB_DISABLE_DEPRECATED +struct hb_ot_shape_fallback_kern_driver_t +{ + hb_ot_shape_fallback_kern_driver_t (hb_font_t *font_, + hb_buffer_t *buffer) : + font (font_), direction (buffer->props.direction) {} + + hb_position_t get_kerning (hb_codepoint_t first, hb_codepoint_t second) const + { + hb_position_t kern = 0; + font->get_glyph_kerning_for_direction (first, second, + direction, + &kern, &kern); + return kern; + } + + hb_font_t *font; + hb_direction_t direction; +}; +#endif + +/* Performs font-assisted kerning. */ +void +_hb_ot_shape_fallback_kern (const hb_ot_shape_plan_t *plan, + hb_font_t *font, + hb_buffer_t *buffer) +{ +#ifdef HB_NO_OT_SHAPE_FALLBACK + return; +#endif + +#ifndef HB_DISABLE_DEPRECATED + if (HB_DIRECTION_IS_HORIZONTAL (buffer->props.direction) ? + !font->has_glyph_h_kerning_func () : + !font->has_glyph_v_kerning_func ()) + return; + + bool reverse = HB_DIRECTION_IS_BACKWARD (buffer->props.direction); + + if (reverse) + buffer->reverse (); + + hb_ot_shape_fallback_kern_driver_t driver (font, buffer); + OT::hb_kern_machine_t<hb_ot_shape_fallback_kern_driver_t> machine (driver); + machine.kern (font, buffer, plan->kern_mask, false); + + if (reverse) + buffer->reverse (); +#endif +} + + +/* Adjusts width of various spaces. */ +void +_hb_ot_shape_fallback_spaces (const hb_ot_shape_plan_t *plan HB_UNUSED, + hb_font_t *font, + hb_buffer_t *buffer) +{ + hb_glyph_info_t *info = buffer->info; + hb_glyph_position_t *pos = buffer->pos; + bool horizontal = HB_DIRECTION_IS_HORIZONTAL (buffer->props.direction); + unsigned int count = buffer->len; + for (unsigned int i = 0; i < count; i++) + if (_hb_glyph_info_is_unicode_space (&info[i]) && !_hb_glyph_info_ligated (&info[i])) + { + hb_unicode_funcs_t::space_t space_type = _hb_glyph_info_get_unicode_space_fallback_type (&info[i]); + hb_codepoint_t glyph; + typedef hb_unicode_funcs_t t; + switch (space_type) + { + case t::NOT_SPACE: /* Shouldn't happen. */ + case t::SPACE: + break; + + case t::SPACE_EM: + case t::SPACE_EM_2: + case t::SPACE_EM_3: + case t::SPACE_EM_4: + case t::SPACE_EM_5: + case t::SPACE_EM_6: + case t::SPACE_EM_16: + if (horizontal) + pos[i].x_advance = +(font->x_scale + ((int) space_type)/2) / (int) space_type; + else + pos[i].y_advance = -(font->y_scale + ((int) space_type)/2) / (int) space_type; + break; + + case t::SPACE_4_EM_18: + if (horizontal) + pos[i].x_advance = (int64_t) +font->x_scale * 4 / 18; + else + pos[i].y_advance = (int64_t) -font->y_scale * 4 / 18; + break; + + case t::SPACE_FIGURE: + for (char u = '0'; u <= '9'; u++) + if (font->get_nominal_glyph (u, &glyph)) + { + if (horizontal) + pos[i].x_advance = font->get_glyph_h_advance (glyph); + else + pos[i].y_advance = font->get_glyph_v_advance (glyph); + break; + } + break; + + case t::SPACE_PUNCTUATION: + if (font->get_nominal_glyph ('.', &glyph) || + font->get_nominal_glyph (',', &glyph)) + { + if (horizontal) + pos[i].x_advance = font->get_glyph_h_advance (glyph); + else + pos[i].y_advance = font->get_glyph_v_advance (glyph); + } + break; + + case t::SPACE_NARROW: + /* Half-space? + * Unicode doc https://unicode.org/charts/PDF/U2000.pdf says ~1/4 or 1/5 of EM. + * However, in my testing, many fonts have their regular space being about that + * size. To me, a percentage of the space width makes more sense. Half is as + * good as any. */ + if (horizontal) + pos[i].x_advance /= 2; + else + pos[i].y_advance /= 2; + break; + } + } +} + + +#endif diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-fallback.hh b/thirdparty/harfbuzz/src/hb-ot-shape-fallback.hh new file mode 100644 index 0000000000..5faf5f2dfb --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-shape-fallback.hh @@ -0,0 +1,54 @@ +/* + * Copyright © 2012 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_OT_SHAPE_FALLBACK_HH +#define HB_OT_SHAPE_FALLBACK_HH + +#include "hb.hh" + +#include "hb-ot-shape.hh" + + +HB_INTERNAL void _hb_ot_shape_fallback_mark_position (const hb_ot_shape_plan_t *plan, + hb_font_t *font, + hb_buffer_t *buffer, + bool adjust_offsets_when_zeroing); + +HB_INTERNAL void _hb_ot_shape_fallback_mark_position_recategorize_marks (const hb_ot_shape_plan_t *plan, + hb_font_t *font, + hb_buffer_t *buffer); + + +HB_INTERNAL void _hb_ot_shape_fallback_kern (const hb_ot_shape_plan_t *plan, + hb_font_t *font, + hb_buffer_t *buffer); + +HB_INTERNAL void _hb_ot_shape_fallback_spaces (const hb_ot_shape_plan_t *plan, + hb_font_t *font, + hb_buffer_t *buffer); + + +#endif /* HB_OT_SHAPE_FALLBACK_HH */ diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-normalize.cc b/thirdparty/harfbuzz/src/hb-ot-shape-normalize.cc new file mode 100644 index 0000000000..50b5829c4a --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-shape-normalize.cc @@ -0,0 +1,478 @@ +/* + * Copyright © 2011,2012 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#include "hb.hh" + +#ifndef HB_NO_OT_SHAPE + +#include "hb-ot-shape-normalize.hh" +#include "hb-ot-shape-complex.hh" +#include "hb-ot-shape.hh" + + +/* + * HIGHLEVEL DESIGN: + * + * This file exports one main function: _hb_ot_shape_normalize(). + * + * This function closely reflects the Unicode Normalization Algorithm, + * yet it's different. + * + * Each shaper specifies whether it prefers decomposed (NFD) or composed (NFC). + * The logic however tries to use whatever the font can support. + * + * In general what happens is that: each grapheme is decomposed in a chain + * of 1:2 decompositions, marks reordered, and then recomposed if desired, + * so far it's like Unicode Normalization. However, the decomposition and + * recomposition only happens if the font supports the resulting characters. + * + * The goals are: + * + * - Try to render all canonically equivalent strings similarly. To really + * achieve this we have to always do the full decomposition and then + * selectively recompose from there. It's kinda too expensive though, so + * we skip some cases. For example, if composed is desired, we simply + * don't touch 1-character clusters that are supported by the font, even + * though their NFC may be different. + * + * - When a font has a precomposed character for a sequence but the 'ccmp' + * feature in the font is not adequate, use the precomposed character + * which typically has better mark positioning. + * + * - When a font does not support a combining mark, but supports it precomposed + * with previous base, use that. This needs the itemizer to have this + * knowledge too. We need to provide assistance to the itemizer. + * + * - When a font does not support a character but supports its canonical + * decomposition, well, use the decomposition. + * + * - The complex shapers can customize the compose and decompose functions to + * offload some of their requirements to the normalizer. For example, the + * Indic shaper may want to disallow recomposing of two matras. + */ + +static bool +decompose_unicode (const hb_ot_shape_normalize_context_t *c, + hb_codepoint_t ab, + hb_codepoint_t *a, + hb_codepoint_t *b) +{ + return (bool) c->unicode->decompose (ab, a, b); +} + +static bool +compose_unicode (const hb_ot_shape_normalize_context_t *c, + hb_codepoint_t a, + hb_codepoint_t b, + hb_codepoint_t *ab) +{ + return (bool) c->unicode->compose (a, b, ab); +} + +static inline void +set_glyph (hb_glyph_info_t &info, hb_font_t *font) +{ + (void) font->get_nominal_glyph (info.codepoint, &info.glyph_index()); +} + +static inline void +output_char (hb_buffer_t *buffer, hb_codepoint_t unichar, hb_codepoint_t glyph) +{ + buffer->cur().glyph_index() = glyph; + buffer->output_glyph (unichar); /* This is very confusing indeed. */ + _hb_glyph_info_set_unicode_props (&buffer->prev(), buffer); +} + +static inline void +next_char (hb_buffer_t *buffer, hb_codepoint_t glyph) +{ + buffer->cur().glyph_index() = glyph; + buffer->next_glyph (); +} + +static inline void +skip_char (hb_buffer_t *buffer) +{ + buffer->skip_glyph (); +} + +/* Returns 0 if didn't decompose, number of resulting characters otherwise. */ +static inline unsigned int +decompose (const hb_ot_shape_normalize_context_t *c, bool shortest, hb_codepoint_t ab) +{ + hb_codepoint_t a = 0, b = 0, a_glyph = 0, b_glyph = 0; + hb_buffer_t * const buffer = c->buffer; + hb_font_t * const font = c->font; + + if (!c->decompose (c, ab, &a, &b) || + (b && !font->get_nominal_glyph (b, &b_glyph))) + return 0; + + bool has_a = (bool) font->get_nominal_glyph (a, &a_glyph); + if (shortest && has_a) { + /* Output a and b */ + output_char (buffer, a, a_glyph); + if (likely (b)) { + output_char (buffer, b, b_glyph); + return 2; + } + return 1; + } + + unsigned int ret; + if ((ret = decompose (c, shortest, a))) { + if (b) { + output_char (buffer, b, b_glyph); + return ret + 1; + } + return ret; + } + + if (has_a) { + output_char (buffer, a, a_glyph); + if (likely (b)) { + output_char (buffer, b, b_glyph); + return 2; + } + return 1; + } + + return 0; +} + +static inline void +decompose_current_character (const hb_ot_shape_normalize_context_t *c, bool shortest) +{ + hb_buffer_t * const buffer = c->buffer; + hb_codepoint_t u = buffer->cur().codepoint; + hb_codepoint_t glyph = 0; + + if (shortest && c->font->get_nominal_glyph (u, &glyph)) + { + next_char (buffer, glyph); + return; + } + + if (decompose (c, shortest, u)) + { + skip_char (buffer); + return; + } + + if (!shortest && c->font->get_nominal_glyph (u, &glyph)) + { + next_char (buffer, glyph); + return; + } + + if (_hb_glyph_info_is_unicode_space (&buffer->cur())) + { + hb_codepoint_t space_glyph; + hb_unicode_funcs_t::space_t space_type = buffer->unicode->space_fallback_type (u); + if (space_type != hb_unicode_funcs_t::NOT_SPACE && c->font->get_nominal_glyph (0x0020u, &space_glyph)) + { + _hb_glyph_info_set_unicode_space_fallback_type (&buffer->cur(), space_type); + next_char (buffer, space_glyph); + buffer->scratch_flags |= HB_BUFFER_SCRATCH_FLAG_HAS_SPACE_FALLBACK; + return; + } + } + + if (u == 0x2011u) + { + /* U+2011 is the only sensible character that is a no-break version of another character + * and not a space. The space ones are handled already. Handle this lone one. */ + hb_codepoint_t other_glyph; + if (c->font->get_nominal_glyph (0x2010u, &other_glyph)) + { + next_char (buffer, other_glyph); + return; + } + } + + next_char (buffer, glyph); /* glyph is initialized in earlier branches. */ +} + +static inline void +handle_variation_selector_cluster (const hb_ot_shape_normalize_context_t *c, + unsigned int end, + bool short_circuit HB_UNUSED) +{ + /* TODO Currently if there's a variation-selector we give-up, it's just too hard. */ + hb_buffer_t * const buffer = c->buffer; + hb_font_t * const font = c->font; + for (; buffer->idx < end - 1 && buffer->successful;) { + if (unlikely (buffer->unicode->is_variation_selector (buffer->cur(+1).codepoint))) { + if (font->get_variation_glyph (buffer->cur().codepoint, buffer->cur(+1).codepoint, &buffer->cur().glyph_index())) + { + hb_codepoint_t unicode = buffer->cur().codepoint; + buffer->replace_glyphs (2, 1, &unicode); + } + else + { + /* Just pass on the two characters separately, let GSUB do its magic. */ + set_glyph (buffer->cur(), font); + buffer->next_glyph (); + set_glyph (buffer->cur(), font); + buffer->next_glyph (); + } + /* Skip any further variation selectors. */ + while (buffer->idx < end && unlikely (buffer->unicode->is_variation_selector (buffer->cur().codepoint))) + { + set_glyph (buffer->cur(), font); + buffer->next_glyph (); + } + } else { + set_glyph (buffer->cur(), font); + buffer->next_glyph (); + } + } + if (likely (buffer->idx < end)) { + set_glyph (buffer->cur(), font); + buffer->next_glyph (); + } +} + +static inline void +decompose_multi_char_cluster (const hb_ot_shape_normalize_context_t *c, unsigned int end, bool short_circuit) +{ + hb_buffer_t * const buffer = c->buffer; + for (unsigned int i = buffer->idx; i < end && buffer->successful; i++) + if (unlikely (buffer->unicode->is_variation_selector (buffer->info[i].codepoint))) { + handle_variation_selector_cluster (c, end, short_circuit); + return; + } + + while (buffer->idx < end && buffer->successful) + decompose_current_character (c, short_circuit); +} + + +static int +compare_combining_class (const hb_glyph_info_t *pa, const hb_glyph_info_t *pb) +{ + unsigned int a = _hb_glyph_info_get_modified_combining_class (pa); + unsigned int b = _hb_glyph_info_get_modified_combining_class (pb); + + return a < b ? -1 : a == b ? 0 : +1; +} + + +void +_hb_ot_shape_normalize (const hb_ot_shape_plan_t *plan, + hb_buffer_t *buffer, + hb_font_t *font) +{ + if (unlikely (!buffer->len)) return; + + _hb_buffer_assert_unicode_vars (buffer); + + hb_ot_shape_normalization_mode_t mode = plan->shaper->normalization_preference; + if (mode == HB_OT_SHAPE_NORMALIZATION_MODE_AUTO) + { + if (plan->has_gpos_mark) + // https://github.com/harfbuzz/harfbuzz/issues/653#issuecomment-423905920 + //mode = HB_OT_SHAPE_NORMALIZATION_MODE_DECOMPOSED; + mode = HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS; + else + mode = HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS; + } + + const hb_ot_shape_normalize_context_t c = { + plan, + buffer, + font, + buffer->unicode, + plan->shaper->decompose ? plan->shaper->decompose : decompose_unicode, + plan->shaper->compose ? plan->shaper->compose : compose_unicode + }; + + bool always_short_circuit = mode == HB_OT_SHAPE_NORMALIZATION_MODE_NONE; + bool might_short_circuit = always_short_circuit || + (mode != HB_OT_SHAPE_NORMALIZATION_MODE_DECOMPOSED && + mode != HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT); + unsigned int count; + + /* We do a fairly straightforward yet custom normalization process in three + * separate rounds: decompose, reorder, recompose (if desired). Currently + * this makes two buffer swaps. We can make it faster by moving the last + * two rounds into the inner loop for the first round, but it's more readable + * this way. */ + + + /* First round, decompose */ + + bool all_simple = true; + { + buffer->clear_output (); + count = buffer->len; + buffer->idx = 0; + do + { + unsigned int end; + for (end = buffer->idx + 1; end < count; end++) + if (unlikely (_hb_glyph_info_is_unicode_mark (&buffer->info[end]))) + break; + + if (end < count) + end--; /* Leave one base for the marks to cluster with. */ + + /* From idx to end are simple clusters. */ + if (might_short_circuit) + { + unsigned int done = font->get_nominal_glyphs (end - buffer->idx, + &buffer->cur().codepoint, + sizeof (buffer->info[0]), + &buffer->cur().glyph_index(), + sizeof (buffer->info[0])); + buffer->next_glyphs (done); + } + while (buffer->idx < end && buffer->successful) + decompose_current_character (&c, might_short_circuit); + + if (buffer->idx == count || !buffer->successful) + break; + + all_simple = false; + + /* Find all the marks now. */ + for (end = buffer->idx + 1; end < count; end++) + if (!_hb_glyph_info_is_unicode_mark(&buffer->info[end])) + break; + + /* idx to end is one non-simple cluster. */ + decompose_multi_char_cluster (&c, end, always_short_circuit); + } + while (buffer->idx < count && buffer->successful); + buffer->swap_buffers (); + } + + + /* Second round, reorder (inplace) */ + + if (!all_simple) + { + count = buffer->len; + for (unsigned int i = 0; i < count; i++) + { + if (_hb_glyph_info_get_modified_combining_class (&buffer->info[i]) == 0) + continue; + + unsigned int end; + for (end = i + 1; end < count; end++) + if (_hb_glyph_info_get_modified_combining_class (&buffer->info[end]) == 0) + break; + + /* We are going to do a O(n^2). Only do this if the sequence is short. */ + if (end - i > HB_OT_SHAPE_COMPLEX_MAX_COMBINING_MARKS) { + i = end; + continue; + } + + buffer->sort (i, end, compare_combining_class); + + if (plan->shaper->reorder_marks) + plan->shaper->reorder_marks (plan, buffer, i, end); + + i = end; + } + } + if (buffer->scratch_flags & HB_BUFFER_SCRATCH_FLAG_HAS_CGJ) + { + /* For all CGJ, check if it prevented any reordering at all. + * If it did NOT, then make it skippable. + * https://github.com/harfbuzz/harfbuzz/issues/554 + */ + for (unsigned int i = 1; i + 1 < buffer->len; i++) + if (buffer->info[i].codepoint == 0x034Fu/*CGJ*/ && + info_cc(buffer->info[i-1]) <= info_cc(buffer->info[i+1])) + { + _hb_glyph_info_unhide (&buffer->info[i]); + } + } + + + /* Third round, recompose */ + + if (!all_simple && + (mode == HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS || + mode == HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT)) + { + /* As noted in the comment earlier, we don't try to combine + * ccc=0 chars with their previous Starter. */ + + buffer->clear_output (); + count = buffer->len; + unsigned int starter = 0; + buffer->next_glyph (); + while (buffer->idx < count && buffer->successful) + { + hb_codepoint_t composed, glyph; + if (/* We don't try to compose a non-mark character with it's preceding starter. + * This is both an optimization to avoid trying to compose every two neighboring + * glyphs in most scripts AND a desired feature for Hangul. Apparently Hangul + * fonts are not designed to mix-and-match pre-composed syllables and Jamo. */ + _hb_glyph_info_is_unicode_mark(&buffer->cur())) + { + if (/* If there's anything between the starter and this char, they should have CCC + * smaller than this character's. */ + (starter == buffer->out_len - 1 || + info_cc (buffer->prev()) < info_cc (buffer->cur())) && + /* And compose. */ + c.compose (&c, + buffer->out_info[starter].codepoint, + buffer->cur().codepoint, + &composed) && + /* And the font has glyph for the composite. */ + font->get_nominal_glyph (composed, &glyph)) + { + /* Composes. */ + buffer->next_glyph (); /* Copy to out-buffer. */ + if (unlikely (!buffer->successful)) + return; + buffer->merge_out_clusters (starter, buffer->out_len); + buffer->out_len--; /* Remove the second composable. */ + /* Modify starter and carry on. */ + buffer->out_info[starter].codepoint = composed; + buffer->out_info[starter].glyph_index() = glyph; + _hb_glyph_info_set_unicode_props (&buffer->out_info[starter], buffer); + + continue; + } + } + + /* Blocked, or doesn't compose. */ + buffer->next_glyph (); + + if (info_cc (buffer->prev()) == 0) + starter = buffer->out_len - 1; + } + buffer->swap_buffers (); + } +} + + +#endif diff --git a/thirdparty/harfbuzz/src/hb-ot-shape-normalize.hh b/thirdparty/harfbuzz/src/hb-ot-shape-normalize.hh new file mode 100644 index 0000000000..04f1a80091 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-shape-normalize.hh @@ -0,0 +1,70 @@ +/* + * Copyright © 2012 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_OT_SHAPE_NORMALIZE_HH +#define HB_OT_SHAPE_NORMALIZE_HH + +#include "hb.hh" + + +/* buffer var allocations, used during the normalization process */ +#define glyph_index() var1.u32 + +struct hb_ot_shape_plan_t; + +enum hb_ot_shape_normalization_mode_t { + HB_OT_SHAPE_NORMALIZATION_MODE_NONE, + HB_OT_SHAPE_NORMALIZATION_MODE_DECOMPOSED, + HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS, /* Never composes base-to-base */ + HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT, /* Always fully decomposes and then recompose back */ + + HB_OT_SHAPE_NORMALIZATION_MODE_AUTO, /* See hb-ot-shape-normalize.cc for logic. */ + HB_OT_SHAPE_NORMALIZATION_MODE_DEFAULT = HB_OT_SHAPE_NORMALIZATION_MODE_AUTO +}; + +HB_INTERNAL void _hb_ot_shape_normalize (const hb_ot_shape_plan_t *shaper, + hb_buffer_t *buffer, + hb_font_t *font); + + +struct hb_ot_shape_normalize_context_t +{ + const hb_ot_shape_plan_t *plan; + hb_buffer_t *buffer; + hb_font_t *font; + hb_unicode_funcs_t *unicode; + bool (*decompose) (const hb_ot_shape_normalize_context_t *c, + hb_codepoint_t ab, + hb_codepoint_t *a, + hb_codepoint_t *b); + bool (*compose) (const hb_ot_shape_normalize_context_t *c, + hb_codepoint_t a, + hb_codepoint_t b, + hb_codepoint_t *ab); +}; + + +#endif /* HB_OT_SHAPE_NORMALIZE_HH */ diff --git a/thirdparty/harfbuzz/src/hb-ot-shape.cc b/thirdparty/harfbuzz/src/hb-ot-shape.cc new file mode 100644 index 0000000000..fe0444987a --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-shape.cc @@ -0,0 +1,1223 @@ +/* + * Copyright © 2009,2010 Red Hat, Inc. + * Copyright © 2010,2011,2012 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Red Hat Author(s): Behdad Esfahbod + * Google Author(s): Behdad Esfahbod + */ + +#include "hb.hh" + +#ifndef HB_NO_OT_SHAPE + +#ifdef HB_NO_OT_LAYOUT +#error "Cannot compile 'ot' shaper with HB_NO_OT_LAYOUT." +#endif + +#include "hb-shaper-impl.hh" + +#include "hb-ot-shape.hh" +#include "hb-ot-shape-complex.hh" +#include "hb-ot-shape-fallback.hh" +#include "hb-ot-shape-normalize.hh" + +#include "hb-ot-face.hh" + +#include "hb-set.hh" + +#include "hb-aat-layout.hh" + + +#ifndef HB_NO_AAT_SHAPE +static inline bool +_hb_apply_morx (hb_face_t *face, const hb_segment_properties_t *props) +{ + /* https://github.com/harfbuzz/harfbuzz/issues/2124 */ + return hb_aat_layout_has_substitution (face) && + (HB_DIRECTION_IS_HORIZONTAL (props->direction) || !hb_ot_layout_has_substitution (face)); +} +#endif + +/** + * SECTION:hb-ot-shape + * @title: hb-ot-shape + * @short_description: OpenType shaping support + * @include: hb-ot.h + * + * Support functions for OpenType shaping related queries. + **/ + + +static void +hb_ot_shape_collect_features (hb_ot_shape_planner_t *planner, + const hb_feature_t *user_features, + unsigned int num_user_features); + +hb_ot_shape_planner_t::hb_ot_shape_planner_t (hb_face_t *face, + const hb_segment_properties_t *props) : + face (face), + props (*props), + map (face, props), + aat_map (face, props) +#ifndef HB_NO_AAT_SHAPE + , apply_morx (_hb_apply_morx (face, props)) +#endif +{ + shaper = hb_ot_shape_complex_categorize (this); + + script_zero_marks = shaper->zero_width_marks != HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE; + script_fallback_mark_positioning = shaper->fallback_position; + + /* https://github.com/harfbuzz/harfbuzz/issues/1528 */ + if (apply_morx && shaper != &_hb_ot_complex_shaper_default) + shaper = &_hb_ot_complex_shaper_dumber; +} + +void +hb_ot_shape_planner_t::compile (hb_ot_shape_plan_t &plan, + const hb_ot_shape_plan_key_t &key) +{ + plan.props = props; + plan.shaper = shaper; + map.compile (plan.map, key); +#ifndef HB_NO_AAT_SHAPE + if (apply_morx) + aat_map.compile (plan.aat_map); +#endif + +#ifndef HB_NO_OT_SHAPE_FRACTIONS + plan.frac_mask = plan.map.get_1_mask (HB_TAG ('f','r','a','c')); + plan.numr_mask = plan.map.get_1_mask (HB_TAG ('n','u','m','r')); + plan.dnom_mask = plan.map.get_1_mask (HB_TAG ('d','n','o','m')); + plan.has_frac = plan.frac_mask || (plan.numr_mask && plan.dnom_mask); +#endif + + plan.rtlm_mask = plan.map.get_1_mask (HB_TAG ('r','t','l','m')); + plan.has_vert = !!plan.map.get_1_mask (HB_TAG ('v','e','r','t')); + + hb_tag_t kern_tag = HB_DIRECTION_IS_HORIZONTAL (props.direction) ? + HB_TAG ('k','e','r','n') : HB_TAG ('v','k','r','n'); +#ifndef HB_NO_OT_KERN + plan.kern_mask = plan.map.get_mask (kern_tag); + plan.requested_kerning = !!plan.kern_mask; +#endif +#ifndef HB_NO_AAT_SHAPE + plan.trak_mask = plan.map.get_mask (HB_TAG ('t','r','a','k')); + plan.requested_tracking = !!plan.trak_mask; +#endif + + bool has_gpos_kern = plan.map.get_feature_index (1, kern_tag) != HB_OT_LAYOUT_NO_FEATURE_INDEX; + bool disable_gpos = plan.shaper->gpos_tag && + plan.shaper->gpos_tag != plan.map.chosen_script[1]; + + /* + * Decide who provides glyph classes. GDEF or Unicode. + */ + + if (!hb_ot_layout_has_glyph_classes (face)) + plan.fallback_glyph_classes = true; + + /* + * Decide who does substitutions. GSUB, morx, or fallback. + */ + +#ifndef HB_NO_AAT_SHAPE + plan.apply_morx = apply_morx; +#endif + + /* + * Decide who does positioning. GPOS, kerx, kern, or fallback. + */ + + if (0) + ; +#ifndef HB_NO_AAT_SHAPE + else if (hb_aat_layout_has_positioning (face)) + plan.apply_kerx = true; +#endif + else if (!apply_morx && !disable_gpos && hb_ot_layout_has_positioning (face)) + plan.apply_gpos = true; + + if (!plan.apply_kerx && (!has_gpos_kern || !plan.apply_gpos)) + { + /* Apparently Apple applies kerx if GPOS kern was not applied. */ +#ifndef HB_NO_AAT_SHAPE + if (hb_aat_layout_has_positioning (face)) + plan.apply_kerx = true; + else +#endif +#ifndef HB_NO_OT_KERN + if (hb_ot_layout_has_kerning (face)) + plan.apply_kern = true; +#endif + } + + plan.zero_marks = script_zero_marks && + !plan.apply_kerx && + (!plan.apply_kern +#ifndef HB_NO_OT_KERN + || !hb_ot_layout_has_machine_kerning (face) +#endif + ); + plan.has_gpos_mark = !!plan.map.get_1_mask (HB_TAG ('m','a','r','k')); + + plan.adjust_mark_positioning_when_zeroing = !plan.apply_gpos && + !plan.apply_kerx && + (!plan.apply_kern +#ifndef HB_NO_OT_KERN + || !hb_ot_layout_has_cross_kerning (face) +#endif + ); + + plan.fallback_mark_positioning = plan.adjust_mark_positioning_when_zeroing && + script_fallback_mark_positioning; + +#ifndef HB_NO_AAT_SHAPE + /* Currently we always apply trak. */ + plan.apply_trak = plan.requested_tracking && hb_aat_layout_has_tracking (face); +#endif +} + +bool +hb_ot_shape_plan_t::init0 (hb_face_t *face, + const hb_shape_plan_key_t *key) +{ + map.init (); +#ifndef HB_NO_AAT_SHAPE + aat_map.init (); +#endif + + hb_ot_shape_planner_t planner (face, + &key->props); + + hb_ot_shape_collect_features (&planner, + key->user_features, + key->num_user_features); + + planner.compile (*this, key->ot); + + if (shaper->data_create) + { + data = shaper->data_create (this); + if (unlikely (!data)) + { + map.fini (); +#ifndef HB_NO_AAT_SHAPE + aat_map.fini (); +#endif + return false; + } + } + + return true; +} + +void +hb_ot_shape_plan_t::fini () +{ + if (shaper->data_destroy) + shaper->data_destroy (const_cast<void *> (data)); + + map.fini (); +#ifndef HB_NO_AAT_SHAPE + aat_map.fini (); +#endif +} + +void +hb_ot_shape_plan_t::substitute (hb_font_t *font, + hb_buffer_t *buffer) const +{ +#ifndef HB_NO_AAT_SHAPE + if (unlikely (apply_morx)) + hb_aat_layout_substitute (this, font, buffer); + else +#endif + map.substitute (this, font, buffer); +} + +void +hb_ot_shape_plan_t::position (hb_font_t *font, + hb_buffer_t *buffer) const +{ + if (this->apply_gpos) + map.position (this, font, buffer); +#ifndef HB_NO_AAT_SHAPE + else if (this->apply_kerx) + hb_aat_layout_position (this, font, buffer); +#endif +#ifndef HB_NO_OT_KERN + else if (this->apply_kern) + hb_ot_layout_kern (this, font, buffer); +#endif + else + _hb_ot_shape_fallback_kern (this, font, buffer); + +#ifndef HB_NO_AAT_SHAPE + if (this->apply_trak) + hb_aat_layout_track (this, font, buffer); +#endif +} + + +static const hb_ot_map_feature_t +common_features[] = +{ + {HB_TAG('a','b','v','m'), F_GLOBAL}, + {HB_TAG('b','l','w','m'), F_GLOBAL}, + {HB_TAG('c','c','m','p'), F_GLOBAL}, + {HB_TAG('l','o','c','l'), F_GLOBAL}, + {HB_TAG('m','a','r','k'), F_GLOBAL_MANUAL_JOINERS}, + {HB_TAG('m','k','m','k'), F_GLOBAL_MANUAL_JOINERS}, + {HB_TAG('r','l','i','g'), F_GLOBAL}, +}; + + +static const hb_ot_map_feature_t +horizontal_features[] = +{ + {HB_TAG('c','a','l','t'), F_GLOBAL}, + {HB_TAG('c','l','i','g'), F_GLOBAL}, + {HB_TAG('c','u','r','s'), F_GLOBAL}, + {HB_TAG('d','i','s','t'), F_GLOBAL}, + {HB_TAG('k','e','r','n'), F_GLOBAL_HAS_FALLBACK}, + {HB_TAG('l','i','g','a'), F_GLOBAL}, + {HB_TAG('r','c','l','t'), F_GLOBAL}, +}; + +static void +hb_ot_shape_collect_features (hb_ot_shape_planner_t *planner, + const hb_feature_t *user_features, + unsigned int num_user_features) +{ + hb_ot_map_builder_t *map = &planner->map; + + map->enable_feature (HB_TAG('r','v','r','n')); + map->add_gsub_pause (nullptr); + + switch (planner->props.direction) { + case HB_DIRECTION_LTR: + map->enable_feature (HB_TAG ('l','t','r','a')); + map->enable_feature (HB_TAG ('l','t','r','m')); + break; + case HB_DIRECTION_RTL: + map->enable_feature (HB_TAG ('r','t','l','a')); + map->add_feature (HB_TAG ('r','t','l','m')); + break; + case HB_DIRECTION_TTB: + case HB_DIRECTION_BTT: + case HB_DIRECTION_INVALID: + default: + break; + } + +#ifndef HB_NO_OT_SHAPE_FRACTIONS + /* Automatic fractions. */ + map->add_feature (HB_TAG ('f','r','a','c')); + map->add_feature (HB_TAG ('n','u','m','r')); + map->add_feature (HB_TAG ('d','n','o','m')); +#endif + + /* Random! */ + map->enable_feature (HB_TAG ('r','a','n','d'), F_RANDOM, HB_OT_MAP_MAX_VALUE); + +#ifndef HB_NO_AAT_SHAPE + /* Tracking. We enable dummy feature here just to allow disabling + * AAT 'trak' table using features. + * https://github.com/harfbuzz/harfbuzz/issues/1303 */ + map->enable_feature (HB_TAG ('t','r','a','k'), F_HAS_FALLBACK); +#endif + + map->enable_feature (HB_TAG ('H','A','R','F')); + + if (planner->shaper->collect_features) + planner->shaper->collect_features (planner); + + map->enable_feature (HB_TAG ('B','U','Z','Z')); + + for (unsigned int i = 0; i < ARRAY_LENGTH (common_features); i++) + map->add_feature (common_features[i]); + + if (HB_DIRECTION_IS_HORIZONTAL (planner->props.direction)) + for (unsigned int i = 0; i < ARRAY_LENGTH (horizontal_features); i++) + map->add_feature (horizontal_features[i]); + else + { + /* We really want to find a 'vert' feature if there's any in the font, no + * matter which script/langsys it is listed (or not) under. + * See various bugs referenced from: + * https://github.com/harfbuzz/harfbuzz/issues/63 */ + map->enable_feature (HB_TAG ('v','e','r','t'), F_GLOBAL_SEARCH); + } + + for (unsigned int i = 0; i < num_user_features; i++) + { + const hb_feature_t *feature = &user_features[i]; + map->add_feature (feature->tag, + (feature->start == HB_FEATURE_GLOBAL_START && + feature->end == HB_FEATURE_GLOBAL_END) ? F_GLOBAL : F_NONE, + feature->value); + } + +#ifndef HB_NO_AAT_SHAPE + if (planner->apply_morx) + { + hb_aat_map_builder_t *aat_map = &planner->aat_map; + for (unsigned int i = 0; i < num_user_features; i++) + { + const hb_feature_t *feature = &user_features[i]; + aat_map->add_feature (feature->tag, feature->value); + } + } +#endif + + if (planner->shaper->override_features) + planner->shaper->override_features (planner); +} + + +/* + * shaper face data + */ + +struct hb_ot_face_data_t {}; + +hb_ot_face_data_t * +_hb_ot_shaper_face_data_create (hb_face_t *face) +{ + return (hb_ot_face_data_t *) HB_SHAPER_DATA_SUCCEEDED; +} + +void +_hb_ot_shaper_face_data_destroy (hb_ot_face_data_t *data) +{ +} + + +/* + * shaper font data + */ + +struct hb_ot_font_data_t {}; + +hb_ot_font_data_t * +_hb_ot_shaper_font_data_create (hb_font_t *font HB_UNUSED) +{ + return (hb_ot_font_data_t *) HB_SHAPER_DATA_SUCCEEDED; +} + +void +_hb_ot_shaper_font_data_destroy (hb_ot_font_data_t *data HB_UNUSED) +{ +} + + +/* + * shaper + */ + +struct hb_ot_shape_context_t +{ + hb_ot_shape_plan_t *plan; + hb_font_t *font; + hb_face_t *face; + hb_buffer_t *buffer; + const hb_feature_t *user_features; + unsigned int num_user_features; + + /* Transient stuff */ + hb_direction_t target_direction; +}; + + + +/* Main shaper */ + + +/* Prepare */ + +static void +hb_set_unicode_props (hb_buffer_t *buffer) +{ + /* Implement enough of Unicode Graphemes here that shaping + * in reverse-direction wouldn't break graphemes. Namely, + * we mark all marks and ZWJ and ZWJ,Extended_Pictographic + * sequences as continuations. The foreach_grapheme() + * macro uses this bit. + * + * https://www.unicode.org/reports/tr29/#Regex_Definitions + */ + unsigned int count = buffer->len; + hb_glyph_info_t *info = buffer->info; + for (unsigned int i = 0; i < count; i++) + { + _hb_glyph_info_set_unicode_props (&info[i], buffer); + + /* Marks are already set as continuation by the above line. + * Handle Emoji_Modifier and ZWJ-continuation. */ + if (unlikely (_hb_glyph_info_get_general_category (&info[i]) == HB_UNICODE_GENERAL_CATEGORY_MODIFIER_SYMBOL && + hb_in_range<hb_codepoint_t> (info[i].codepoint, 0x1F3FBu, 0x1F3FFu))) + { + _hb_glyph_info_set_continuation (&info[i]); + } +#ifndef HB_NO_EMOJI_SEQUENCES + else if (unlikely (_hb_glyph_info_is_zwj (&info[i]))) + { + _hb_glyph_info_set_continuation (&info[i]); + if (i + 1 < count && + _hb_unicode_is_emoji_Extended_Pictographic (info[i + 1].codepoint)) + { + i++; + _hb_glyph_info_set_unicode_props (&info[i], buffer); + _hb_glyph_info_set_continuation (&info[i]); + } + } +#endif + /* Or part of the Other_Grapheme_Extend that is not marks. + * As of Unicode 11 that is just: + * + * 200C ; Other_Grapheme_Extend # Cf ZERO WIDTH NON-JOINER + * FF9E..FF9F ; Other_Grapheme_Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK + * E0020..E007F ; Other_Grapheme_Extend # Cf [96] TAG SPACE..CANCEL TAG + * + * ZWNJ is special, we don't want to merge it as there's no need, and keeping + * it separate results in more granular clusters. Ignore Katakana for now. + * Tags are used for Emoji sub-region flag sequences: + * https://github.com/harfbuzz/harfbuzz/issues/1556 + */ + else if (unlikely (hb_in_range<hb_codepoint_t> (info[i].codepoint, 0xE0020u, 0xE007Fu))) + _hb_glyph_info_set_continuation (&info[i]); + } +} + +static void +hb_insert_dotted_circle (hb_buffer_t *buffer, hb_font_t *font) +{ + if (unlikely (buffer->flags & HB_BUFFER_FLAG_DO_NOT_INSERT_DOTTED_CIRCLE)) + return; + + if (!(buffer->flags & HB_BUFFER_FLAG_BOT) || + buffer->context_len[0] || + !_hb_glyph_info_is_unicode_mark (&buffer->info[0])) + return; + + if (!font->has_glyph (0x25CCu)) + return; + + hb_glyph_info_t dottedcircle = {0}; + dottedcircle.codepoint = 0x25CCu; + _hb_glyph_info_set_unicode_props (&dottedcircle, buffer); + + buffer->clear_output (); + + buffer->idx = 0; + hb_glyph_info_t info = dottedcircle; + info.cluster = buffer->cur().cluster; + info.mask = buffer->cur().mask; + buffer->output_info (info); + while (buffer->idx < buffer->len && buffer->successful) + buffer->next_glyph (); + buffer->swap_buffers (); +} + +static void +hb_form_clusters (hb_buffer_t *buffer) +{ + if (!(buffer->scratch_flags & HB_BUFFER_SCRATCH_FLAG_HAS_NON_ASCII)) + return; + + if (buffer->cluster_level == HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES) + foreach_grapheme (buffer, start, end) + buffer->merge_clusters (start, end); + else + foreach_grapheme (buffer, start, end) + buffer->unsafe_to_break (start, end); +} + +static void +hb_ensure_native_direction (hb_buffer_t *buffer) +{ + hb_direction_t direction = buffer->props.direction; + hb_direction_t horiz_dir = hb_script_get_horizontal_direction (buffer->props.script); + + /* TODO vertical: + * The only BTT vertical script is Ogham, but it's not clear to me whether OpenType + * Ogham fonts are supposed to be implemented BTT or not. Need to research that + * first. */ + if ((HB_DIRECTION_IS_HORIZONTAL (direction) && + direction != horiz_dir && horiz_dir != HB_DIRECTION_INVALID) || + (HB_DIRECTION_IS_VERTICAL (direction) && + direction != HB_DIRECTION_TTB)) + { + + if (buffer->cluster_level == HB_BUFFER_CLUSTER_LEVEL_MONOTONE_CHARACTERS) + foreach_grapheme (buffer, start, end) + { + buffer->merge_clusters (start, end); + buffer->reverse_range (start, end); + } + else + foreach_grapheme (buffer, start, end) + /* form_clusters() merged clusters already, we don't merge. */ + buffer->reverse_range (start, end); + + buffer->reverse (); + + buffer->props.direction = HB_DIRECTION_REVERSE (buffer->props.direction); + } +} + + +/* + * Substitute + */ + +static hb_codepoint_t +hb_vert_char_for (hb_codepoint_t u) +{ + switch (u >> 8) + { + case 0x20: switch (u) { + case 0x2013u: return 0xfe32u; // EN DASH + case 0x2014u: return 0xfe31u; // EM DASH + case 0x2025u: return 0xfe30u; // TWO DOT LEADER + case 0x2026u: return 0xfe19u; // HORIZONTAL ELLIPSIS + } break; + case 0x30: switch (u) { + case 0x3001u: return 0xfe11u; // IDEOGRAPHIC COMMA + case 0x3002u: return 0xfe12u; // IDEOGRAPHIC FULL STOP + case 0x3008u: return 0xfe3fu; // LEFT ANGLE BRACKET + case 0x3009u: return 0xfe40u; // RIGHT ANGLE BRACKET + case 0x300au: return 0xfe3du; // LEFT DOUBLE ANGLE BRACKET + case 0x300bu: return 0xfe3eu; // RIGHT DOUBLE ANGLE BRACKET + case 0x300cu: return 0xfe41u; // LEFT CORNER BRACKET + case 0x300du: return 0xfe42u; // RIGHT CORNER BRACKET + case 0x300eu: return 0xfe43u; // LEFT WHITE CORNER BRACKET + case 0x300fu: return 0xfe44u; // RIGHT WHITE CORNER BRACKET + case 0x3010u: return 0xfe3bu; // LEFT BLACK LENTICULAR BRACKET + case 0x3011u: return 0xfe3cu; // RIGHT BLACK LENTICULAR BRACKET + case 0x3014u: return 0xfe39u; // LEFT TORTOISE SHELL BRACKET + case 0x3015u: return 0xfe3au; // RIGHT TORTOISE SHELL BRACKET + case 0x3016u: return 0xfe17u; // LEFT WHITE LENTICULAR BRACKET + case 0x3017u: return 0xfe18u; // RIGHT WHITE LENTICULAR BRACKET + } break; + case 0xfe: switch (u) { + case 0xfe4fu: return 0xfe34u; // WAVY LOW LINE + } break; + case 0xff: switch (u) { + case 0xff01u: return 0xfe15u; // FULLWIDTH EXCLAMATION MARK + case 0xff08u: return 0xfe35u; // FULLWIDTH LEFT PARENTHESIS + case 0xff09u: return 0xfe36u; // FULLWIDTH RIGHT PARENTHESIS + case 0xff0cu: return 0xfe10u; // FULLWIDTH COMMA + case 0xff1au: return 0xfe13u; // FULLWIDTH COLON + case 0xff1bu: return 0xfe14u; // FULLWIDTH SEMICOLON + case 0xff1fu: return 0xfe16u; // FULLWIDTH QUESTION MARK + case 0xff3bu: return 0xfe47u; // FULLWIDTH LEFT SQUARE BRACKET + case 0xff3du: return 0xfe48u; // FULLWIDTH RIGHT SQUARE BRACKET + case 0xff3fu: return 0xfe33u; // FULLWIDTH LOW LINE + case 0xff5bu: return 0xfe37u; // FULLWIDTH LEFT CURLY BRACKET + case 0xff5du: return 0xfe38u; // FULLWIDTH RIGHT CURLY BRACKET + } break; + } + + return u; +} + +static inline void +hb_ot_rotate_chars (const hb_ot_shape_context_t *c) +{ + hb_buffer_t *buffer = c->buffer; + unsigned int count = buffer->len; + hb_glyph_info_t *info = buffer->info; + + if (HB_DIRECTION_IS_BACKWARD (c->target_direction)) + { + hb_unicode_funcs_t *unicode = buffer->unicode; + hb_mask_t rtlm_mask = c->plan->rtlm_mask; + + for (unsigned int i = 0; i < count; i++) { + hb_codepoint_t codepoint = unicode->mirroring (info[i].codepoint); + if (unlikely (codepoint != info[i].codepoint && c->font->has_glyph (codepoint))) + info[i].codepoint = codepoint; + else + info[i].mask |= rtlm_mask; + } + } + + if (HB_DIRECTION_IS_VERTICAL (c->target_direction) && !c->plan->has_vert) + { + for (unsigned int i = 0; i < count; i++) { + hb_codepoint_t codepoint = hb_vert_char_for (info[i].codepoint); + if (unlikely (codepoint != info[i].codepoint && c->font->has_glyph (codepoint))) + info[i].codepoint = codepoint; + } + } +} + +static inline void +hb_ot_shape_setup_masks_fraction (const hb_ot_shape_context_t *c) +{ +#ifdef HB_NO_OT_SHAPE_FRACTIONS + return; +#endif + + if (!(c->buffer->scratch_flags & HB_BUFFER_SCRATCH_FLAG_HAS_NON_ASCII) || + !c->plan->has_frac) + return; + + hb_buffer_t *buffer = c->buffer; + + hb_mask_t pre_mask, post_mask; + if (HB_DIRECTION_IS_FORWARD (buffer->props.direction)) + { + pre_mask = c->plan->numr_mask | c->plan->frac_mask; + post_mask = c->plan->frac_mask | c->plan->dnom_mask; + } + else + { + pre_mask = c->plan->frac_mask | c->plan->dnom_mask; + post_mask = c->plan->numr_mask | c->plan->frac_mask; + } + + unsigned int count = buffer->len; + hb_glyph_info_t *info = buffer->info; + for (unsigned int i = 0; i < count; i++) + { + if (info[i].codepoint == 0x2044u) /* FRACTION SLASH */ + { + unsigned int start = i, end = i + 1; + while (start && + _hb_glyph_info_get_general_category (&info[start - 1]) == + HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER) + start--; + while (end < count && + _hb_glyph_info_get_general_category (&info[end]) == + HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER) + end++; + + buffer->unsafe_to_break (start, end); + + for (unsigned int j = start; j < i; j++) + info[j].mask |= pre_mask; + info[i].mask |= c->plan->frac_mask; + for (unsigned int j = i + 1; j < end; j++) + info[j].mask |= post_mask; + + i = end - 1; + } + } +} + +static inline void +hb_ot_shape_initialize_masks (const hb_ot_shape_context_t *c) +{ + hb_ot_map_t *map = &c->plan->map; + hb_buffer_t *buffer = c->buffer; + + hb_mask_t global_mask = map->get_global_mask (); + buffer->reset_masks (global_mask); +} + +static inline void +hb_ot_shape_setup_masks (const hb_ot_shape_context_t *c) +{ + hb_ot_map_t *map = &c->plan->map; + hb_buffer_t *buffer = c->buffer; + + hb_ot_shape_setup_masks_fraction (c); + + if (c->plan->shaper->setup_masks) + c->plan->shaper->setup_masks (c->plan, buffer, c->font); + + for (unsigned int i = 0; i < c->num_user_features; i++) + { + const hb_feature_t *feature = &c->user_features[i]; + if (!(feature->start == HB_FEATURE_GLOBAL_START && feature->end == HB_FEATURE_GLOBAL_END)) { + unsigned int shift; + hb_mask_t mask = map->get_mask (feature->tag, &shift); + buffer->set_masks (feature->value << shift, mask, feature->start, feature->end); + } + } +} + +static void +hb_ot_zero_width_default_ignorables (const hb_buffer_t *buffer) +{ + if (!(buffer->scratch_flags & HB_BUFFER_SCRATCH_FLAG_HAS_DEFAULT_IGNORABLES) || + (buffer->flags & HB_BUFFER_FLAG_PRESERVE_DEFAULT_IGNORABLES) || + (buffer->flags & HB_BUFFER_FLAG_REMOVE_DEFAULT_IGNORABLES)) + return; + + unsigned int count = buffer->len; + hb_glyph_info_t *info = buffer->info; + hb_glyph_position_t *pos = buffer->pos; + unsigned int i = 0; + for (i = 0; i < count; i++) + if (unlikely (_hb_glyph_info_is_default_ignorable (&info[i]))) + pos[i].x_advance = pos[i].y_advance = pos[i].x_offset = pos[i].y_offset = 0; +} + +static void +hb_ot_hide_default_ignorables (hb_buffer_t *buffer, + hb_font_t *font) +{ + if (!(buffer->scratch_flags & HB_BUFFER_SCRATCH_FLAG_HAS_DEFAULT_IGNORABLES) || + (buffer->flags & HB_BUFFER_FLAG_PRESERVE_DEFAULT_IGNORABLES)) + return; + + unsigned int count = buffer->len; + hb_glyph_info_t *info = buffer->info; + + hb_codepoint_t invisible = buffer->invisible; + if (!(buffer->flags & HB_BUFFER_FLAG_REMOVE_DEFAULT_IGNORABLES) && + (invisible || font->get_nominal_glyph (' ', &invisible))) + { + /* Replace default-ignorables with a zero-advance invisible glyph. */ + for (unsigned int i = 0; i < count; i++) + { + if (_hb_glyph_info_is_default_ignorable (&info[i])) + info[i].codepoint = invisible; + } + } + else + hb_ot_layout_delete_glyphs_inplace (buffer, _hb_glyph_info_is_default_ignorable); +} + + +static inline void +hb_ot_map_glyphs_fast (hb_buffer_t *buffer) +{ + /* Normalization process sets up glyph_index(), we just copy it. */ + unsigned int count = buffer->len; + hb_glyph_info_t *info = buffer->info; + for (unsigned int i = 0; i < count; i++) + info[i].codepoint = info[i].glyph_index(); + + buffer->content_type = HB_BUFFER_CONTENT_TYPE_GLYPHS; +} + +static inline void +hb_synthesize_glyph_classes (hb_buffer_t *buffer) +{ + unsigned int count = buffer->len; + hb_glyph_info_t *info = buffer->info; + for (unsigned int i = 0; i < count; i++) + { + hb_ot_layout_glyph_props_flags_t klass; + + /* Never mark default-ignorables as marks. + * They won't get in the way of lookups anyway, + * but having them as mark will cause them to be skipped + * over if the lookup-flag says so, but at least for the + * Mongolian variation selectors, looks like Uniscribe + * marks them as non-mark. Some Mongolian fonts without + * GDEF rely on this. Another notable character that + * this applies to is COMBINING GRAPHEME JOINER. */ + klass = (_hb_glyph_info_get_general_category (&info[i]) != + HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK || + _hb_glyph_info_is_default_ignorable (&info[i])) ? + HB_OT_LAYOUT_GLYPH_PROPS_BASE_GLYPH : + HB_OT_LAYOUT_GLYPH_PROPS_MARK; + _hb_glyph_info_set_glyph_props (&info[i], klass); + } +} + +static inline void +hb_ot_substitute_default (const hb_ot_shape_context_t *c) +{ + hb_buffer_t *buffer = c->buffer; + + hb_ot_rotate_chars (c); + + HB_BUFFER_ALLOCATE_VAR (buffer, glyph_index); + + _hb_ot_shape_normalize (c->plan, buffer, c->font); + + hb_ot_shape_setup_masks (c); + + /* This is unfortunate to go here, but necessary... */ + if (c->plan->fallback_mark_positioning) + _hb_ot_shape_fallback_mark_position_recategorize_marks (c->plan, c->font, buffer); + + hb_ot_map_glyphs_fast (buffer); + + HB_BUFFER_DEALLOCATE_VAR (buffer, glyph_index); +} + +static inline void +hb_ot_substitute_complex (const hb_ot_shape_context_t *c) +{ + hb_buffer_t *buffer = c->buffer; + + hb_ot_layout_substitute_start (c->font, buffer); + + if (c->plan->fallback_glyph_classes) + hb_synthesize_glyph_classes (c->buffer); + + c->plan->substitute (c->font, buffer); +} + +static inline void +hb_ot_substitute_pre (const hb_ot_shape_context_t *c) +{ + hb_ot_substitute_default (c); + + _hb_buffer_allocate_gsubgpos_vars (c->buffer); + + hb_ot_substitute_complex (c); +} + +static inline void +hb_ot_substitute_post (const hb_ot_shape_context_t *c) +{ + hb_ot_hide_default_ignorables (c->buffer, c->font); +#ifndef HB_NO_AAT_SHAPE + if (c->plan->apply_morx) + hb_aat_layout_remove_deleted_glyphs (c->buffer); +#endif + + if (c->plan->shaper->postprocess_glyphs) + c->plan->shaper->postprocess_glyphs (c->plan, c->buffer, c->font); +} + + +/* + * Position + */ + +static inline void +adjust_mark_offsets (hb_glyph_position_t *pos) +{ + pos->x_offset -= pos->x_advance; + pos->y_offset -= pos->y_advance; +} + +static inline void +zero_mark_width (hb_glyph_position_t *pos) +{ + pos->x_advance = 0; + pos->y_advance = 0; +} + +static inline void +zero_mark_widths_by_gdef (hb_buffer_t *buffer, bool adjust_offsets) +{ + unsigned int count = buffer->len; + hb_glyph_info_t *info = buffer->info; + for (unsigned int i = 0; i < count; i++) + if (_hb_glyph_info_is_mark (&info[i])) + { + if (adjust_offsets) + adjust_mark_offsets (&buffer->pos[i]); + zero_mark_width (&buffer->pos[i]); + } +} + +static inline void +hb_ot_position_default (const hb_ot_shape_context_t *c) +{ + hb_direction_t direction = c->buffer->props.direction; + unsigned int count = c->buffer->len; + hb_glyph_info_t *info = c->buffer->info; + hb_glyph_position_t *pos = c->buffer->pos; + + if (HB_DIRECTION_IS_HORIZONTAL (direction)) + { + c->font->get_glyph_h_advances (count, &info[0].codepoint, sizeof(info[0]), + &pos[0].x_advance, sizeof(pos[0])); + /* The nil glyph_h_origin() func returns 0, so no need to apply it. */ + if (c->font->has_glyph_h_origin_func ()) + for (unsigned int i = 0; i < count; i++) + c->font->subtract_glyph_h_origin (info[i].codepoint, + &pos[i].x_offset, + &pos[i].y_offset); + } + else + { + c->font->get_glyph_v_advances (count, &info[0].codepoint, sizeof(info[0]), + &pos[0].y_advance, sizeof(pos[0])); + for (unsigned int i = 0; i < count; i++) + { + c->font->subtract_glyph_v_origin (info[i].codepoint, + &pos[i].x_offset, + &pos[i].y_offset); + } + } + if (c->buffer->scratch_flags & HB_BUFFER_SCRATCH_FLAG_HAS_SPACE_FALLBACK) + _hb_ot_shape_fallback_spaces (c->plan, c->font, c->buffer); +} + +static inline void +hb_ot_position_complex (const hb_ot_shape_context_t *c) +{ + unsigned int count = c->buffer->len; + hb_glyph_info_t *info = c->buffer->info; + hb_glyph_position_t *pos = c->buffer->pos; + + /* If the font has no GPOS and direction is forward, then when + * zeroing mark widths, we shift the mark with it, such that the + * mark is positioned hanging over the previous glyph. When + * direction is backward we don't shift and it will end up + * hanging over the next glyph after the final reordering. + * + * Note: If fallback positinoing happens, we don't care about + * this as it will be overriden. + */ + bool adjust_offsets_when_zeroing = c->plan->adjust_mark_positioning_when_zeroing && + HB_DIRECTION_IS_FORWARD (c->buffer->props.direction); + + /* We change glyph origin to what GPOS expects (horizontal), apply GPOS, change it back. */ + + /* The nil glyph_h_origin() func returns 0, so no need to apply it. */ + if (c->font->has_glyph_h_origin_func ()) + for (unsigned int i = 0; i < count; i++) + c->font->add_glyph_h_origin (info[i].codepoint, + &pos[i].x_offset, + &pos[i].y_offset); + + hb_ot_layout_position_start (c->font, c->buffer); + + if (c->plan->zero_marks) + switch (c->plan->shaper->zero_width_marks) + { + case HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_EARLY: + zero_mark_widths_by_gdef (c->buffer, adjust_offsets_when_zeroing); + break; + + default: + case HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE: + case HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_LATE: + break; + } + + c->plan->position (c->font, c->buffer); + + if (c->plan->zero_marks) + switch (c->plan->shaper->zero_width_marks) + { + case HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_LATE: + zero_mark_widths_by_gdef (c->buffer, adjust_offsets_when_zeroing); + break; + + default: + case HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE: + case HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_EARLY: + break; + } + + /* Finish off. Has to follow a certain order. */ + hb_ot_layout_position_finish_advances (c->font, c->buffer); + hb_ot_zero_width_default_ignorables (c->buffer); +#ifndef HB_NO_AAT_SHAPE + if (c->plan->apply_morx) + hb_aat_layout_zero_width_deleted_glyphs (c->buffer); +#endif + hb_ot_layout_position_finish_offsets (c->font, c->buffer); + + /* The nil glyph_h_origin() func returns 0, so no need to apply it. */ + if (c->font->has_glyph_h_origin_func ()) + for (unsigned int i = 0; i < count; i++) + c->font->subtract_glyph_h_origin (info[i].codepoint, + &pos[i].x_offset, + &pos[i].y_offset); + + if (c->plan->fallback_mark_positioning) + _hb_ot_shape_fallback_mark_position (c->plan, c->font, c->buffer, + adjust_offsets_when_zeroing); +} + +static inline void +hb_ot_position (const hb_ot_shape_context_t *c) +{ + c->buffer->clear_positions (); + + hb_ot_position_default (c); + + hb_ot_position_complex (c); + + if (HB_DIRECTION_IS_BACKWARD (c->buffer->props.direction)) + hb_buffer_reverse (c->buffer); + + _hb_buffer_deallocate_gsubgpos_vars (c->buffer); +} + +static inline void +hb_propagate_flags (hb_buffer_t *buffer) +{ + /* Propagate cluster-level glyph flags to be the same on all cluster glyphs. + * Simplifies using them. */ + + if (!(buffer->scratch_flags & HB_BUFFER_SCRATCH_FLAG_HAS_UNSAFE_TO_BREAK)) + return; + + hb_glyph_info_t *info = buffer->info; + + foreach_cluster (buffer, start, end) + { + unsigned int mask = 0; + for (unsigned int i = start; i < end; i++) + if (info[i].mask & HB_GLYPH_FLAG_UNSAFE_TO_BREAK) + { + mask = HB_GLYPH_FLAG_UNSAFE_TO_BREAK; + break; + } + if (mask) + for (unsigned int i = start; i < end; i++) + info[i].mask |= mask; + } +} + +/* Pull it all together! */ + +static void +hb_ot_shape_internal (hb_ot_shape_context_t *c) +{ + c->buffer->deallocate_var_all (); + c->buffer->scratch_flags = HB_BUFFER_SCRATCH_FLAG_DEFAULT; + if (likely (!hb_unsigned_mul_overflows (c->buffer->len, HB_BUFFER_MAX_LEN_FACTOR))) + { + c->buffer->max_len = hb_max (c->buffer->len * HB_BUFFER_MAX_LEN_FACTOR, + (unsigned) HB_BUFFER_MAX_LEN_MIN); + } + if (likely (!hb_unsigned_mul_overflows (c->buffer->len, HB_BUFFER_MAX_OPS_FACTOR))) + { + c->buffer->max_ops = hb_max (c->buffer->len * HB_BUFFER_MAX_OPS_FACTOR, + (unsigned) HB_BUFFER_MAX_OPS_MIN); + } + + /* Save the original direction, we use it later. */ + c->target_direction = c->buffer->props.direction; + + _hb_buffer_allocate_unicode_vars (c->buffer); + + c->buffer->clear_output (); + + hb_ot_shape_initialize_masks (c); + hb_set_unicode_props (c->buffer); + hb_insert_dotted_circle (c->buffer, c->font); + + hb_form_clusters (c->buffer); + + hb_ensure_native_direction (c->buffer); + + if (c->plan->shaper->preprocess_text) + c->plan->shaper->preprocess_text (c->plan, c->buffer, c->font); + + hb_ot_substitute_pre (c); + hb_ot_position (c); + hb_ot_substitute_post (c); + + hb_propagate_flags (c->buffer); + + _hb_buffer_deallocate_unicode_vars (c->buffer); + + c->buffer->props.direction = c->target_direction; + + c->buffer->max_len = HB_BUFFER_MAX_LEN_DEFAULT; + c->buffer->max_ops = HB_BUFFER_MAX_OPS_DEFAULT; + c->buffer->deallocate_var_all (); +} + + +hb_bool_t +_hb_ot_shape (hb_shape_plan_t *shape_plan, + hb_font_t *font, + hb_buffer_t *buffer, + const hb_feature_t *features, + unsigned int num_features) +{ + hb_ot_shape_context_t c = {&shape_plan->ot, font, font->face, buffer, features, num_features}; + hb_ot_shape_internal (&c); + + return true; +} + + +/** + * hb_ot_shape_plan_collect_lookups: + * + * Since: 0.9.7 + **/ +void +hb_ot_shape_plan_collect_lookups (hb_shape_plan_t *shape_plan, + hb_tag_t table_tag, + hb_set_t *lookup_indexes /* OUT */) +{ + shape_plan->ot.collect_lookups (table_tag, lookup_indexes); +} + + +/* TODO Move this to hb-ot-shape-normalize, make it do decompose, and make it public. */ +static void +add_char (hb_font_t *font, + hb_unicode_funcs_t *unicode, + hb_bool_t mirror, + hb_codepoint_t u, + hb_set_t *glyphs) +{ + hb_codepoint_t glyph; + if (font->get_nominal_glyph (u, &glyph)) + glyphs->add (glyph); + if (mirror) + { + hb_codepoint_t m = unicode->mirroring (u); + if (m != u && font->get_nominal_glyph (m, &glyph)) + glyphs->add (glyph); + } +} + + +/** + * hb_ot_shape_glyphs_closure: + * + * Since: 0.9.2 + **/ +void +hb_ot_shape_glyphs_closure (hb_font_t *font, + hb_buffer_t *buffer, + const hb_feature_t *features, + unsigned int num_features, + hb_set_t *glyphs) +{ + const char *shapers[] = {"ot", nullptr}; + hb_shape_plan_t *shape_plan = hb_shape_plan_create_cached (font->face, &buffer->props, + features, num_features, shapers); + + bool mirror = hb_script_get_horizontal_direction (buffer->props.script) == HB_DIRECTION_RTL; + + unsigned int count = buffer->len; + hb_glyph_info_t *info = buffer->info; + for (unsigned int i = 0; i < count; i++) + add_char (font, buffer->unicode, mirror, info[i].codepoint, glyphs); + + hb_set_t *lookups = hb_set_create (); + hb_ot_shape_plan_collect_lookups (shape_plan, HB_OT_TAG_GSUB, lookups); + hb_ot_layout_lookups_substitute_closure (font->face, lookups, glyphs); + + hb_set_destroy (lookups); + + hb_shape_plan_destroy (shape_plan); +} + + +#endif diff --git a/thirdparty/harfbuzz/src/hb-ot-shape.h b/thirdparty/harfbuzz/src/hb-ot-shape.h new file mode 100644 index 0000000000..7b1bcc0637 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-shape.h @@ -0,0 +1,53 @@ +/* + * Copyright © 2013 Red Hat, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Red Hat Author(s): Behdad Esfahbod + */ + +#ifndef HB_OT_H_IN +#error "Include <hb-ot.h> instead." +#endif + +#ifndef HB_OT_SHAPE_H +#define HB_OT_SHAPE_H + +#include "hb.h" + +HB_BEGIN_DECLS + +/* TODO port to shape-plan / set. */ +HB_EXTERN void +hb_ot_shape_glyphs_closure (hb_font_t *font, + hb_buffer_t *buffer, + const hb_feature_t *features, + unsigned int num_features, + hb_set_t *glyphs); + +HB_EXTERN void +hb_ot_shape_plan_collect_lookups (hb_shape_plan_t *shape_plan, + hb_tag_t table_tag, + hb_set_t *lookup_indexes /* OUT */); + +HB_END_DECLS + +#endif /* HB_OT_SHAPE_H */ diff --git a/thirdparty/harfbuzz/src/hb-ot-shape.hh b/thirdparty/harfbuzz/src/hb-ot-shape.hh new file mode 100644 index 0000000000..acc98772a9 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-shape.hh @@ -0,0 +1,170 @@ +/* + * Copyright © 2010 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_OT_SHAPE_HH +#define HB_OT_SHAPE_HH + +#include "hb.hh" + +#include "hb-ot-map.hh" +#include "hb-aat-map.hh" + + +struct hb_ot_shape_plan_key_t +{ + unsigned int variations_index[2]; + + void init (hb_face_t *face, + const int *coords, + unsigned num_coords) + { + for (unsigned int table_index = 0; table_index < 2; table_index++) + hb_ot_layout_table_find_feature_variations (face, + table_tags[table_index], + coords, + num_coords, + &variations_index[table_index]); + } + + bool equal (const hb_ot_shape_plan_key_t *other) + { + return 0 == memcmp (this, other, sizeof (*this)); + } +}; + + +struct hb_shape_plan_key_t; + +struct hb_ot_shape_plan_t +{ + hb_segment_properties_t props; + const struct hb_ot_complex_shaper_t *shaper; + hb_ot_map_t map; + hb_aat_map_t aat_map; + const void *data; +#ifndef HB_NO_OT_SHAPE_FRACTIONS + hb_mask_t frac_mask, numr_mask, dnom_mask; +#else + static constexpr hb_mask_t frac_mask = 0; + static constexpr hb_mask_t numr_mask = 0; + static constexpr hb_mask_t dnom_mask = 0; +#endif + hb_mask_t rtlm_mask; +#ifndef HB_NO_OT_KERN + hb_mask_t kern_mask; +#else + static constexpr hb_mask_t kern_mask = 0; +#endif +#ifndef HB_NO_AAT_SHAPE + hb_mask_t trak_mask; +#else + static constexpr hb_mask_t trak_mask = 0; +#endif + +#ifndef HB_NO_OT_KERN + bool requested_kerning : 1; +#else + static constexpr bool requested_kerning = false; +#endif +#ifndef HB_NO_AAT_SHAPE + bool requested_tracking : 1; +#else + static constexpr bool requested_tracking = false; +#endif +#ifndef HB_NO_OT_SHAPE_FRACTIONS + bool has_frac : 1; +#else + static constexpr bool has_frac = false; +#endif + bool has_vert : 1; + bool has_gpos_mark : 1; + bool zero_marks : 1; + bool fallback_glyph_classes : 1; + bool fallback_mark_positioning : 1; + bool adjust_mark_positioning_when_zeroing : 1; + + bool apply_gpos : 1; +#ifndef HB_NO_OT_KERN + bool apply_kern : 1; +#else + static constexpr bool apply_kern = false; +#endif +#ifndef HB_NO_AAT_SHAPE + bool apply_kerx : 1; + bool apply_morx : 1; + bool apply_trak : 1; +#else + static constexpr bool apply_kerx = false; + static constexpr bool apply_morx = false; + static constexpr bool apply_trak = false; +#endif + + void collect_lookups (hb_tag_t table_tag, hb_set_t *lookups) const + { + unsigned int table_index; + switch (table_tag) { + case HB_OT_TAG_GSUB: table_index = 0; break; + case HB_OT_TAG_GPOS: table_index = 1; break; + default: return; + } + map.collect_lookups (table_index, lookups); + } + + HB_INTERNAL bool init0 (hb_face_t *face, + const hb_shape_plan_key_t *key); + HB_INTERNAL void fini (); + + HB_INTERNAL void substitute (hb_font_t *font, hb_buffer_t *buffer) const; + HB_INTERNAL void position (hb_font_t *font, hb_buffer_t *buffer) const; +}; + +struct hb_shape_plan_t; + +struct hb_ot_shape_planner_t +{ + /* In the order that they are filled in. */ + hb_face_t *face; + hb_segment_properties_t props; + hb_ot_map_builder_t map; + hb_aat_map_builder_t aat_map; +#ifndef HB_NO_AAT_SHAPE + bool apply_morx : 1; +#else + static constexpr bool apply_morx = false; +#endif + bool script_zero_marks : 1; + bool script_fallback_mark_positioning : 1; + const struct hb_ot_complex_shaper_t *shaper; + + HB_INTERNAL hb_ot_shape_planner_t (hb_face_t *face, + const hb_segment_properties_t *props); + + HB_INTERNAL void compile (hb_ot_shape_plan_t &plan, + const hb_ot_shape_plan_key_t &key); +}; + + +#endif /* HB_OT_SHAPE_HH */ diff --git a/thirdparty/harfbuzz/src/hb-ot-stat-table.hh b/thirdparty/harfbuzz/src/hb-ot-stat-table.hh new file mode 100644 index 0000000000..6aa4fa4492 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-stat-table.hh @@ -0,0 +1,404 @@ +/* + * Copyright © 2018 Ebrahim Byagowi + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + */ + +#ifndef HB_OT_STAT_TABLE_HH +#define HB_OT_STAT_TABLE_HH + +#include "hb-open-type.hh" +#include "hb-ot-layout-common.hh" + +/* + * STAT -- Style Attributes + * https://docs.microsoft.com/en-us/typography/opentype/spec/stat + */ +#define HB_OT_TAG_STAT HB_TAG('S','T','A','T') + + +namespace OT { + +enum +{ + OLDER_SIBLING_FONT_ATTRIBUTE = 0x0001, /* If set, this axis value table + * provides axis value information + * that is applicable to other fonts + * within the same font family. This + * is used if the other fonts were + * released earlier and did not include + * information about values for some axis. + * If newer versions of the other + * fonts include the information + * themselves and are present, + * then this record is ignored. */ + ELIDABLE_AXIS_VALUE_NAME = 0x0002 /* If set, it indicates that the axis + * value represents the “normal” value + * for the axis and may be omitted when + * composing name strings. */ + // Reserved = 0xFFFC /* Reserved for future use — set to zero. */ +}; + +struct AxisValueFormat1 +{ + unsigned int get_axis_index () const { return axisIndex; } + float get_value () const { return value.to_float (); } + + hb_ot_name_id_t get_value_name_id () const { return valueNameID; } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (likely (c->check_struct (this))); + } + + protected: + HBUINT16 format; /* Format identifier — set to 1. */ + HBUINT16 axisIndex; /* Zero-base index into the axis record array + * identifying the axis of design variation + * to which the axis value record applies. + * Must be less than designAxisCount. */ + HBUINT16 flags; /* Flags — see below for details. */ + NameID valueNameID; /* The name ID for entries in the 'name' table + * that provide a display string for this + * attribute value. */ + HBFixed value; /* A numeric value for this attribute value. */ + public: + DEFINE_SIZE_STATIC (12); +}; + +struct AxisValueFormat2 +{ + unsigned int get_axis_index () const { return axisIndex; } + float get_value () const { return nominalValue.to_float (); } + + hb_ot_name_id_t get_value_name_id () const { return valueNameID; } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (likely (c->check_struct (this))); + } + + protected: + HBUINT16 format; /* Format identifier — set to 2. */ + HBUINT16 axisIndex; /* Zero-base index into the axis record array + * identifying the axis of design variation + * to which the axis value record applies. + * Must be less than designAxisCount. */ + HBUINT16 flags; /* Flags — see below for details. */ + NameID valueNameID; /* The name ID for entries in the 'name' table + * that provide a display string for this + * attribute value. */ + HBFixed nominalValue; /* A numeric value for this attribute value. */ + HBFixed rangeMinValue; /* The minimum value for a range associated + * with the specified name ID. */ + HBFixed rangeMaxValue; /* The maximum value for a range associated + * with the specified name ID. */ + public: + DEFINE_SIZE_STATIC (20); +}; + +struct AxisValueFormat3 +{ + unsigned int get_axis_index () const { return axisIndex; } + float get_value () const { return value.to_float (); } + + hb_ot_name_id_t get_value_name_id () const { return valueNameID; } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (likely (c->check_struct (this))); + } + + protected: + HBUINT16 format; /* Format identifier — set to 3. */ + HBUINT16 axisIndex; /* Zero-base index into the axis record array + * identifying the axis of design variation + * to which the axis value record applies. + * Must be less than designAxisCount. */ + HBUINT16 flags; /* Flags — see below for details. */ + NameID valueNameID; /* The name ID for entries in the 'name' table + * that provide a display string for this + * attribute value. */ + HBFixed value; /* A numeric value for this attribute value. */ + HBFixed linkedValue; /* The numeric value for a style-linked mapping + * from this value. */ + public: + DEFINE_SIZE_STATIC (16); +}; + +struct AxisValueRecord +{ + unsigned int get_axis_index () const { return axisIndex; } + float get_value () const { return value.to_float (); } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (likely (c->check_struct (this))); + } + + protected: + HBUINT16 axisIndex; /* Zero-base index into the axis record array + * identifying the axis to which this value + * applies. Must be less than designAxisCount. */ + HBFixed value; /* A numeric value for this attribute value. */ + public: + DEFINE_SIZE_STATIC (6); +}; + +struct AxisValueFormat4 +{ + const AxisValueRecord &get_axis_record (unsigned int axis_index) const + { return axisValues.as_array (axisCount)[axis_index]; } + + hb_ot_name_id_t get_value_name_id () const { return valueNameID; } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (likely (c->check_struct (this))); + } + + protected: + HBUINT16 format; /* Format identifier — set to 4. */ + HBUINT16 axisCount; /* The total number of axes contributing to + * this axis-values combination. */ + HBUINT16 flags; /* Flags — see below for details. */ + NameID valueNameID; /* The name ID for entries in the 'name' table + * that provide a display string for this + * attribute value. */ + UnsizedArrayOf<AxisValueRecord> + axisValues; /* Array of AxisValue records that provide the + * combination of axis values, one for each + * contributing axis. */ + public: + DEFINE_SIZE_ARRAY (8, axisValues); +}; + +struct AxisValue +{ + bool get_value (unsigned int axis_index) const + { + switch (u.format) + { + case 1: return u.format1.get_value (); + case 2: return u.format2.get_value (); + case 3: return u.format3.get_value (); + case 4: return u.format4.get_axis_record (axis_index).get_value (); + default:return 0; + } + } + + unsigned int get_axis_index () const + { + switch (u.format) + { + case 1: return u.format1.get_axis_index (); + case 2: return u.format2.get_axis_index (); + case 3: return u.format3.get_axis_index (); + /* case 4: Makes more sense for variable fonts which are handled by fvar in hb-style */ + default:return -1; + } + } + + hb_ot_name_id_t get_value_name_id () const + { + switch (u.format) + { + case 1: return u.format1.get_value_name_id (); + case 2: return u.format2.get_value_name_id (); + case 3: return u.format3.get_value_name_id (); + case 4: return u.format4.get_value_name_id (); + default:return HB_OT_NAME_ID_INVALID; + } + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + if (unlikely (!c->check_struct (this))) + return_trace (false); + + switch (u.format) + { + case 1: return_trace (u.format1.sanitize (c)); + case 2: return_trace (u.format2.sanitize (c)); + case 3: return_trace (u.format3.sanitize (c)); + case 4: return_trace (u.format4.sanitize (c)); + default:return_trace (true); + } + } + + protected: + union + { + HBUINT16 format; + AxisValueFormat1 format1; + AxisValueFormat2 format2; + AxisValueFormat3 format3; + AxisValueFormat4 format4; + } u; + public: + DEFINE_SIZE_UNION (2, format); +}; + +struct StatAxisRecord +{ + int cmp (hb_tag_t key) const { return tag.cmp (key); } + + hb_ot_name_id_t get_name_id () const { return nameID; } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (likely (c->check_struct (this))); + } + + protected: + Tag tag; /* A tag identifying the axis of design variation. */ + NameID nameID; /* The name ID for entries in the 'name' table that + * provide a display string for this axis. */ + HBUINT16 ordering; /* A value that applications can use to determine + * primary sorting of face names, or for ordering + * of descriptors when composing family or face names. */ + public: + DEFINE_SIZE_STATIC (8); +}; + +struct STAT +{ + static constexpr hb_tag_t tableTag = HB_OT_TAG_STAT; + + bool has_data () const { return version.to_int (); } + + bool get_value (hb_tag_t tag, float *value) const + { + unsigned int axis_index; + if (!get_design_axes ().lfind (tag, &axis_index)) return false; + + hb_array_t<const OffsetTo<AxisValue>> axis_values = get_axis_value_offsets (); + for (unsigned int i = 0; i < axis_values.length; i++) + { + const AxisValue& axis_value = this+axis_values[i]; + if (axis_value.get_axis_index () == axis_index) + { + if (value) + *value = axis_value.get_value (axis_index); + return true; + } + } + return false; + } + + unsigned get_design_axis_count () const { return designAxisCount; } + + hb_ot_name_id_t get_axis_record_name_id (unsigned axis_record_index) const + { + if (unlikely (axis_record_index >= designAxisCount)) return HB_OT_NAME_ID_INVALID; + const StatAxisRecord &axis_record = get_design_axes ()[axis_record_index]; + return axis_record.get_name_id (); + } + + unsigned get_axis_value_count () const { return axisValueCount; } + + hb_ot_name_id_t get_axis_value_name_id (unsigned axis_value_index) const + { + if (unlikely (axis_value_index >= axisValueCount)) return HB_OT_NAME_ID_INVALID; + const AxisValue &axis_value = (this + get_axis_value_offsets ()[axis_value_index]); + return axis_value.get_value_name_id (); + } + + void collect_name_ids (hb_set_t *nameids_to_retain) const + { + if (!has_data ()) return; + + + get_design_axes () + | hb_map (&StatAxisRecord::get_name_id) + | hb_sink (nameids_to_retain) + ; + + + get_axis_value_offsets () + | hb_map (hb_add (&(this + offsetToAxisValueOffsets))) + | hb_map (&AxisValue::get_value_name_id) + | hb_sink (nameids_to_retain) + ; + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (likely (c->check_struct (this) && + version.major == 1 && + version.minor > 0 && + designAxesOffset.sanitize (c, this, designAxisCount) && + offsetToAxisValueOffsets.sanitize (c, this, axisValueCount, &(this+offsetToAxisValueOffsets)))); + } + + protected: + hb_array_t<const StatAxisRecord> const get_design_axes () const + { return (this+designAxesOffset).as_array (designAxisCount); } + + hb_array_t<const OffsetTo<AxisValue>> const get_axis_value_offsets () const + { return (this+offsetToAxisValueOffsets).as_array (axisValueCount); } + + + protected: + FixedVersion<>version; /* Version of the stat table + * initially set to 0x00010002u */ + HBUINT16 designAxisSize; /* The size in bytes of each axis record. */ + HBUINT16 designAxisCount;/* The number of design axis records. In a + * font with an 'fvar' table, this value must be + * greater than or equal to the axisCount value + * in the 'fvar' table. In all fonts, must + * be greater than zero if axisValueCount + * is greater than zero. */ + LNNOffsetTo<UnsizedArrayOf<StatAxisRecord>> + designAxesOffset; + /* Offset in bytes from the beginning of + * the STAT table to the start of the design + * axes array. If designAxisCount is zero, + * set to zero; if designAxisCount is greater + * than zero, must be greater than zero. */ + HBUINT16 axisValueCount; /* The number of axis value tables. */ + LNNOffsetTo<UnsizedArrayOf<OffsetTo<AxisValue>>> + offsetToAxisValueOffsets; + /* Offset in bytes from the beginning of + * the STAT table to the start of the design + * axes value offsets array. If axisValueCount + * is zero, set to zero; if axisValueCount is + * greater than zero, must be greater than zero. */ + NameID elidedFallbackNameID; + /* Name ID used as fallback when projection of + * names into a particular font model produces + * a subfamily name containing only elidable + * elements. */ + public: + DEFINE_SIZE_STATIC (20); +}; + + +} /* namespace OT */ + + +#endif /* HB_OT_STAT_TABLE_HH */ diff --git a/thirdparty/harfbuzz/src/hb-ot-tag-table.hh b/thirdparty/harfbuzz/src/hb-ot-tag-table.hh new file mode 100644 index 0000000000..99937d9f69 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-tag-table.hh @@ -0,0 +1,2176 @@ +/* == Start of generated table == */ +/* + * The following table is generated by running: + * + * ./gen-tag-table.py languagetags language-subtag-registry + * + * on files with these headers: + * + * <meta name="updated_at" content="2019-05-22 06:05 PM" /> + * File-Date: 2020-05-12 + */ + +#ifndef HB_OT_TAG_TABLE_HH +#define HB_OT_TAG_TABLE_HH + +static const LangTag ot_languages[] = { + {"aa", HB_TAG('A','F','R',' ')}, /* Afar */ + {"aae", HB_TAG('S','Q','I',' ')}, /* Arbëreshë Albanian -> Albanian */ + {"aao", HB_TAG('A','R','A',' ')}, /* Algerian Saharan Arabic -> Arabic */ + {"aat", HB_TAG('S','Q','I',' ')}, /* Arvanitika Albanian -> Albanian */ + {"ab", HB_TAG('A','B','K',' ')}, /* Abkhazian */ + {"abh", HB_TAG('A','R','A',' ')}, /* Tajiki Arabic -> Arabic */ + {"abq", HB_TAG('A','B','A',' ')}, /* Abaza */ + {"abv", HB_TAG('A','R','A',' ')}, /* Baharna Arabic -> Arabic */ + {"acf", HB_TAG('F','A','N',' ')}, /* Saint Lucian Creole French -> French Antillean */ +/*{"ach", HB_TAG('A','C','H',' ')},*/ /* Acoli -> Acholi */ + {"acm", HB_TAG('A','R','A',' ')}, /* Mesopotamian Arabic -> Arabic */ + {"acq", HB_TAG('A','R','A',' ')}, /* Ta'izzi-Adeni Arabic -> Arabic */ +/*{"acr", HB_TAG('A','C','R',' ')},*/ /* Achi */ + {"acw", HB_TAG('A','R','A',' ')}, /* Hijazi Arabic -> Arabic */ + {"acx", HB_TAG('A','R','A',' ')}, /* Omani Arabic -> Arabic */ + {"acy", HB_TAG('A','R','A',' ')}, /* Cypriot Arabic -> Arabic */ + {"ada", HB_TAG('D','N','G',' ')}, /* Adangme -> Dangme */ + {"adf", HB_TAG('A','R','A',' ')}, /* Dhofari Arabic -> Arabic */ + {"adp", HB_TAG('D','Z','N',' ')}, /* Adap (retired code) -> Dzongkha */ +/*{"ady", HB_TAG('A','D','Y',' ')},*/ /* Adyghe */ + {"aeb", HB_TAG('A','R','A',' ')}, /* Tunisian Arabic -> Arabic */ + {"aec", HB_TAG('A','R','A',' ')}, /* Saidi Arabic -> Arabic */ + {"af", HB_TAG('A','F','K',' ')}, /* Afrikaans */ + {"afb", HB_TAG('A','R','A',' ')}, /* Gulf Arabic -> Arabic */ + {"ahg", HB_TAG('A','G','W',' ')}, /* Qimant -> Agaw */ + {"aht", HB_TAG('A','T','H',' ')}, /* Ahtena -> Athapaskan */ + {"aii", HB_TAG('S','W','A',' ')}, /* Assyrian Neo-Aramaic -> Swadaya Aramaic */ + {"aii", HB_TAG('S','Y','R',' ')}, /* Assyrian Neo-Aramaic -> Syriac */ +/*{"aio", HB_TAG('A','I','O',' ')},*/ /* Aiton */ + {"aiw", HB_TAG('A','R','I',' ')}, /* Aari */ + {"ajp", HB_TAG('A','R','A',' ')}, /* South Levantine Arabic -> Arabic */ + {"ak", HB_TAG('A','K','A',' ')}, /* Akan [macrolanguage] */ + {"ak", HB_TAG('T','W','I',' ')}, /* Akan [macrolanguage] -> Twi */ + {"aln", HB_TAG('S','Q','I',' ')}, /* Gheg Albanian -> Albanian */ + {"als", HB_TAG('S','Q','I',' ')}, /* Tosk Albanian -> Albanian */ +/*{"alt", HB_TAG('A','L','T',' ')},*/ /* Southern Altai -> Altai */ + {"am", HB_TAG('A','M','H',' ')}, /* Amharic */ + {"amf", HB_TAG('H','B','N',' ')}, /* Hamer-Banna -> Hammer-Banna */ + {"amw", HB_TAG('S','Y','R',' ')}, /* Western Neo-Aramaic -> Syriac */ + {"an", HB_TAG('A','R','G',' ')}, /* Aragonese */ +/*{"ang", HB_TAG('A','N','G',' ')},*/ /* Old English (ca. 450-1100) -> Anglo-Saxon */ + {"apc", HB_TAG('A','R','A',' ')}, /* North Levantine Arabic -> Arabic */ + {"apd", HB_TAG('A','R','A',' ')}, /* Sudanese Arabic -> Arabic */ + {"apj", HB_TAG('A','T','H',' ')}, /* Jicarilla Apache -> Athapaskan */ + {"apk", HB_TAG('A','T','H',' ')}, /* Kiowa Apache -> Athapaskan */ + {"apl", HB_TAG('A','T','H',' ')}, /* Lipan Apache -> Athapaskan */ + {"apm", HB_TAG('A','T','H',' ')}, /* Mescalero-Chiricahua Apache -> Athapaskan */ + {"apw", HB_TAG('A','T','H',' ')}, /* Western Apache -> Athapaskan */ + {"ar", HB_TAG('A','R','A',' ')}, /* Arabic [macrolanguage] */ + {"arb", HB_TAG('A','R','A',' ')}, /* Standard Arabic -> Arabic */ + {"arn", HB_TAG('M','A','P',' ')}, /* Mapudungun */ + {"arq", HB_TAG('A','R','A',' ')}, /* Algerian Arabic -> Arabic */ + {"ars", HB_TAG('A','R','A',' ')}, /* Najdi Arabic -> Arabic */ + {"ary", HB_TAG('M','O','R',' ')}, /* Moroccan Arabic -> Moroccan */ + {"arz", HB_TAG('A','R','A',' ')}, /* Egyptian Arabic -> Arabic */ + {"as", HB_TAG('A','S','M',' ')}, /* Assamese */ +/*{"ast", HB_TAG('A','S','T',' ')},*/ /* Asturian */ +/*{"ath", HB_TAG('A','T','H',' ')},*/ /* Athapascan [family] -> Athapaskan */ + {"atj", HB_TAG('R','C','R',' ')}, /* Atikamekw -> R-Cree */ + {"atv", HB_TAG('A','L','T',' ')}, /* Northern Altai -> Altai */ + {"auz", HB_TAG('A','R','A',' ')}, /* Uzbeki Arabic -> Arabic */ + {"av", HB_TAG('A','V','R',' ')}, /* Avaric -> Avar */ + {"avl", HB_TAG('A','R','A',' ')}, /* Eastern Egyptian Bedawi Arabic -> Arabic */ +/*{"awa", HB_TAG('A','W','A',' ')},*/ /* Awadhi */ + {"ay", HB_TAG('A','Y','M',' ')}, /* Aymara [macrolanguage] */ + {"ayc", HB_TAG('A','Y','M',' ')}, /* Southern Aymara -> Aymara */ + {"ayh", HB_TAG('A','R','A',' ')}, /* Hadrami Arabic -> Arabic */ + {"ayl", HB_TAG('A','R','A',' ')}, /* Libyan Arabic -> Arabic */ + {"ayn", HB_TAG('A','R','A',' ')}, /* Sanaani Arabic -> Arabic */ + {"ayp", HB_TAG('A','R','A',' ')}, /* North Mesopotamian Arabic -> Arabic */ + {"ayr", HB_TAG('A','Y','M',' ')}, /* Central Aymara -> Aymara */ + {"az", HB_TAG('A','Z','E',' ')}, /* Azerbaijani [macrolanguage] */ +/*{"azb", HB_TAG('A','Z','B',' ')},*/ /* South Azerbaijani -> Torki */ + {"azj", HB_TAG('A','Z','E',' ')}, /* North Azerbaijani -> Azerbaijani */ + {"ba", HB_TAG('B','S','H',' ')}, /* Bashkir */ + {"bad", HB_TAG('B','A','D','0')}, /* Banda [family] */ + {"bai", HB_TAG('B','M','L',' ')}, /* Bamileke [family] */ + {"bal", HB_TAG('B','L','I',' ')}, /* Baluchi [macrolanguage] */ +/*{"ban", HB_TAG('B','A','N',' ')},*/ /* Balinese */ +/*{"bar", HB_TAG('B','A','R',' ')},*/ /* Bavarian */ +/*{"bbc", HB_TAG('B','B','C',' ')},*/ /* Batak Toba */ + {"bbz", HB_TAG('A','R','A',' ')}, /* Babalia Creole Arabic (retired code) -> Arabic */ + {"bcc", HB_TAG('B','L','I',' ')}, /* Southern Balochi -> Baluchi */ + {"bci", HB_TAG('B','A','U',' ')}, /* Baoulé -> Baulé */ + {"bcl", HB_TAG('B','I','K',' ')}, /* Central Bikol -> Bikol */ + {"bcq", HB_TAG('B','C','H',' ')}, /* Bench */ + {"bcr", HB_TAG('A','T','H',' ')}, /* Babine -> Athapaskan */ +/*{"bdy", HB_TAG('B','D','Y',' ')},*/ /* Bandjalang */ + {"be", HB_TAG('B','E','L',' ')}, /* Belarusian -> Belarussian */ + {"bea", HB_TAG('A','T','H',' ')}, /* Beaver -> Athapaskan */ + {"beb", HB_TAG('B','T','I',' ')}, /* Bebele -> Beti */ +/*{"bem", HB_TAG('B','E','M',' ')},*/ /* Bemba (Zambia) */ + {"ber", HB_TAG('B','B','R',' ')}, /* Berber [family] */ + {"bfq", HB_TAG('B','A','D',' ')}, /* Badaga */ + {"bft", HB_TAG('B','L','T',' ')}, /* Balti */ + {"bfu", HB_TAG('L','A','H',' ')}, /* Gahri -> Lahuli */ + {"bfy", HB_TAG('B','A','G',' ')}, /* Bagheli -> Baghelkhandi */ + {"bg", HB_TAG('B','G','R',' ')}, /* Bulgarian */ +/*{"bgc", HB_TAG('B','G','C',' ')},*/ /* Haryanvi */ + {"bgn", HB_TAG('B','L','I',' ')}, /* Western Balochi -> Baluchi */ + {"bgp", HB_TAG('B','L','I',' ')}, /* Eastern Balochi -> Baluchi */ +/*{"bgq", HB_TAG('B','G','Q',' ')},*/ /* Bagri */ + {"bgr", HB_TAG('Q','I','N',' ')}, /* Bawm Chin -> Chin */ + {"bhb", HB_TAG('B','H','I',' ')}, /* Bhili */ +/*{"bhi", HB_TAG('B','H','I',' ')},*/ /* Bhilali -> Bhili */ + {"bhk", HB_TAG('B','I','K',' ')}, /* Albay Bicolano (retired code) -> Bikol */ +/*{"bho", HB_TAG('B','H','O',' ')},*/ /* Bhojpuri */ + {"bhr", HB_TAG('M','L','G',' ')}, /* Bara Malagasy -> Malagasy */ + {"bi", HB_TAG('B','I','S',' ')}, /* Bislama */ +/*{"bik", HB_TAG('B','I','K',' ')},*/ /* Bikol [macrolanguage] */ + {"bin", HB_TAG('E','D','O',' ')}, /* Edo */ +/*{"bjj", HB_TAG('B','J','J',' ')},*/ /* Kanauji */ + {"bjn", HB_TAG('M','L','Y',' ')}, /* Banjar -> Malay */ + {"bjq", HB_TAG('M','L','G',' ')}, /* Southern Betsimisaraka Malagasy (retired code) -> Malagasy */ + {"bjt", HB_TAG('B','L','N',' ')}, /* Balanta-Ganja -> Balante */ + {"bla", HB_TAG('B','K','F',' ')}, /* Siksika -> Blackfoot */ + {"ble", HB_TAG('B','L','N',' ')}, /* Balanta-Kentohe -> Balante */ +/*{"blk", HB_TAG('B','L','K',' ')},*/ /* Pa’o Karen */ + {"bln", HB_TAG('B','I','K',' ')}, /* Southern Catanduanes Bikol -> Bikol */ + {"bm", HB_TAG('B','M','B',' ')}, /* Bambara (Bamanankan) */ + {"bmm", HB_TAG('M','L','G',' ')}, /* Northern Betsimisaraka Malagasy -> Malagasy */ + {"bn", HB_TAG('B','E','N',' ')}, /* Bengali */ + {"bo", HB_TAG('T','I','B',' ')}, /* Tibetan */ +/*{"bpy", HB_TAG('B','P','Y',' ')},*/ /* Bishnupriya -> Bishnupriya Manipuri */ + {"bqi", HB_TAG('L','R','C',' ')}, /* Bakhtiari -> Luri */ + {"br", HB_TAG('B','R','E',' ')}, /* Breton */ + {"bra", HB_TAG('B','R','I',' ')}, /* Braj -> Braj Bhasha */ +/*{"brh", HB_TAG('B','R','H',' ')},*/ /* Brahui */ +/*{"brx", HB_TAG('B','R','X',' ')},*/ /* Bodo (India) */ + {"bs", HB_TAG('B','O','S',' ')}, /* Bosnian */ +/*{"bsk", HB_TAG('B','S','K',' ')},*/ /* Burushaski */ + {"btb", HB_TAG('B','T','I',' ')}, /* Beti (Cameroon) (retired code) */ + {"btj", HB_TAG('M','L','Y',' ')}, /* Bacanese Malay -> Malay */ + {"bto", HB_TAG('B','I','K',' ')}, /* Rinconada Bikol -> Bikol */ +/*{"bts", HB_TAG('B','T','S',' ')},*/ /* Batak Simalungun */ +/*{"bug", HB_TAG('B','U','G',' ')},*/ /* Buginese -> Bugis */ + {"bum", HB_TAG('B','T','I',' ')}, /* Bulu (Cameroon) -> Beti */ + {"bve", HB_TAG('M','L','Y',' ')}, /* Berau Malay -> Malay */ + {"bvu", HB_TAG('M','L','Y',' ')}, /* Bukit Malay -> Malay */ + {"bxk", HB_TAG('L','U','H',' ')}, /* Bukusu -> Luyia */ + {"bxp", HB_TAG('B','T','I',' ')}, /* Bebil -> Beti */ + {"bxr", HB_TAG('R','B','U',' ')}, /* Russia Buriat -> Russian Buriat */ + {"byn", HB_TAG('B','I','L',' ')}, /* Bilin -> Bilen */ +/*{"byv", HB_TAG('B','Y','V',' ')},*/ /* Medumba */ + {"bzc", HB_TAG('M','L','G',' ')}, /* Southern Betsimisaraka Malagasy -> Malagasy */ + {"ca", HB_TAG('C','A','T',' ')}, /* Catalan */ + {"caf", HB_TAG('C','R','R',' ')}, /* Southern Carrier -> Carrier */ + {"caf", HB_TAG('A','T','H',' ')}, /* Southern Carrier -> Athapaskan */ +/*{"cak", HB_TAG('C','A','K',' ')},*/ /* Kaqchikel */ +/*{"cbk", HB_TAG('C','B','K',' ')},*/ /* Chavacano -> Zamboanga Chavacano */ + {"cbl", HB_TAG('Q','I','N',' ')}, /* Bualkhaw Chin -> Chin */ + {"cco", HB_TAG('C','C','H','N')}, /* Comaltepec Chinantec -> Chinantec */ + {"ccq", HB_TAG('A','R','K',' ')}, /* Chaungtha (retired code) -> Rakhine */ + {"cdo", HB_TAG('Z','H','S',' ')}, /* Min Dong Chinese -> Chinese Simplified */ + {"ce", HB_TAG('C','H','E',' ')}, /* Chechen */ +/*{"ceb", HB_TAG('C','E','B',' ')},*/ /* Cebuano */ + {"cfm", HB_TAG('H','A','L',' ')}, /* Halam (Falam Chin) */ +/*{"cgg", HB_TAG('C','G','G',' ')},*/ /* Chiga */ + {"ch", HB_TAG('C','H','A',' ')}, /* Chamorro */ + {"chj", HB_TAG('C','C','H','N')}, /* Ojitlán Chinantec -> Chinantec */ + {"chk", HB_TAG('C','H','K','0')}, /* Chuukese */ +/*{"cho", HB_TAG('C','H','O',' ')},*/ /* Choctaw */ + {"chp", HB_TAG('C','H','P',' ')}, /* Chipewyan */ + {"chp", HB_TAG('S','A','Y',' ')}, /* Chipewyan -> Sayisi */ + {"chp", HB_TAG('A','T','H',' ')}, /* Chipewyan -> Athapaskan */ + {"chq", HB_TAG('C','C','H','N')}, /* Quiotepec Chinantec -> Chinantec */ +/*{"chr", HB_TAG('C','H','R',' ')},*/ /* Cherokee */ +/*{"chy", HB_TAG('C','H','Y',' ')},*/ /* Cheyenne */ + {"chz", HB_TAG('C','C','H','N')}, /* Ozumacín Chinantec -> Chinantec */ + {"ciw", HB_TAG('O','J','B',' ')}, /* Chippewa -> Ojibway */ +/*{"cja", HB_TAG('C','J','A',' ')},*/ /* Western Cham */ +/*{"cjm", HB_TAG('C','J','M',' ')},*/ /* Eastern Cham */ + {"cjy", HB_TAG('Z','H','S',' ')}, /* Jinyu Chinese -> Chinese Simplified */ + {"cka", HB_TAG('Q','I','N',' ')}, /* Khumi Awa Chin (retired code) -> Chin */ + {"ckb", HB_TAG('K','U','R',' ')}, /* Central Kurdish -> Kurdish */ + {"ckt", HB_TAG('C','H','K',' ')}, /* Chukot -> Chukchi */ + {"clc", HB_TAG('A','T','H',' ')}, /* Chilcotin -> Athapaskan */ + {"cld", HB_TAG('S','Y','R',' ')}, /* Chaldean Neo-Aramaic -> Syriac */ + {"cle", HB_TAG('C','C','H','N')}, /* Lealao Chinantec -> Chinantec */ + {"cmn", HB_TAG('Z','H','S',' ')}, /* Mandarin Chinese -> Chinese Simplified */ + {"cmr", HB_TAG('Q','I','N',' ')}, /* Mro-Khimi Chin -> Chin */ + {"cnb", HB_TAG('Q','I','N',' ')}, /* Chinbon Chin -> Chin */ + {"cnh", HB_TAG('Q','I','N',' ')}, /* Hakha Chin -> Chin */ + {"cnk", HB_TAG('Q','I','N',' ')}, /* Khumi Chin -> Chin */ + {"cnl", HB_TAG('C','C','H','N')}, /* Lalana Chinantec -> Chinantec */ + {"cnp", HB_TAG('Z','H','S',' ')}, /* Northern Ping Chinese -> Chinese Simplified */ + {"cnt", HB_TAG('C','C','H','N')}, /* Tepetotutla Chinantec -> Chinantec */ + {"cnw", HB_TAG('Q','I','N',' ')}, /* Ngawn Chin -> Chin */ + {"co", HB_TAG('C','O','S',' ')}, /* Corsican */ + {"coa", HB_TAG('M','L','Y',' ')}, /* Cocos Islands Malay -> Malay */ +/*{"cop", HB_TAG('C','O','P',' ')},*/ /* Coptic */ + {"coq", HB_TAG('A','T','H',' ')}, /* Coquille -> Athapaskan */ + {"cpa", HB_TAG('C','C','H','N')}, /* Palantla Chinantec -> Chinantec */ + {"cpe", HB_TAG('C','P','P',' ')}, /* English-based creoles and pidgins [family] -> Creoles */ + {"cpf", HB_TAG('C','P','P',' ')}, /* French-based creoles and pidgins [family] -> Creoles */ +/*{"cpp", HB_TAG('C','P','P',' ')},*/ /* Portuguese-based creoles and pidgins [family] -> Creoles */ + {"cpx", HB_TAG('Z','H','S',' ')}, /* Pu-Xian Chinese -> Chinese Simplified */ + {"cqd", HB_TAG('H','M','N',' ')}, /* Chuanqiandian Cluster Miao -> Hmong */ + {"cqu", HB_TAG('Q','U','H',' ')}, /* Chilean Quechua (retired code) -> Quechua (Bolivia) */ + {"cr", HB_TAG('C','R','E',' ')}, /* Cree [macrolanguage] */ + {"cr", HB_TAG('Y','C','R',' ')}, /* Cree [macrolanguage] -> Y-Cree */ + {"crh", HB_TAG('C','R','T',' ')}, /* Crimean Tatar */ + {"crj", HB_TAG('E','C','R',' ')}, /* Southern East Cree -> Eastern Cree */ + {"crk", HB_TAG('W','C','R',' ')}, /* Plains Cree -> West-Cree */ + {"crl", HB_TAG('E','C','R',' ')}, /* Northern East Cree -> Eastern Cree */ + {"crm", HB_TAG('M','C','R',' ')}, /* Moose Cree */ + {"crm", HB_TAG('L','C','R',' ')}, /* Moose Cree -> L-Cree */ + {"crp", HB_TAG('C','P','P',' ')}, /* Creoles and pidgins [family] -> Creoles */ + {"crx", HB_TAG('C','R','R',' ')}, /* Carrier */ + {"crx", HB_TAG('A','T','H',' ')}, /* Carrier -> Athapaskan */ + {"cs", HB_TAG('C','S','Y',' ')}, /* Czech */ + {"csa", HB_TAG('C','C','H','N')}, /* Chiltepec Chinantec -> Chinantec */ +/*{"csb", HB_TAG('C','S','B',' ')},*/ /* Kashubian */ + {"csh", HB_TAG('Q','I','N',' ')}, /* Asho Chin -> Chin */ + {"cso", HB_TAG('C','C','H','N')}, /* Sochiapam Chinantec -> Chinantec */ + {"csp", HB_TAG('Z','H','S',' ')}, /* Southern Ping Chinese -> Chinese Simplified */ + {"csw", HB_TAG('N','C','R',' ')}, /* Swampy Cree -> N-Cree */ + {"csw", HB_TAG('N','H','C',' ')}, /* Swampy Cree -> Norway House Cree */ + {"csy", HB_TAG('Q','I','N',' ')}, /* Siyin Chin -> Chin */ + {"ctc", HB_TAG('A','T','H',' ')}, /* Chetco -> Athapaskan */ + {"ctd", HB_TAG('Q','I','N',' ')}, /* Tedim Chin -> Chin */ + {"cte", HB_TAG('C','C','H','N')}, /* Tepinapa Chinantec -> Chinantec */ +/*{"ctg", HB_TAG('C','T','G',' ')},*/ /* Chittagonian */ + {"ctl", HB_TAG('C','C','H','N')}, /* Tlacoatzintepec Chinantec -> Chinantec */ + {"cts", HB_TAG('B','I','K',' ')}, /* Northern Catanduanes Bikol -> Bikol */ + {"cu", HB_TAG('C','S','L',' ')}, /* Church Slavonic */ + {"cuc", HB_TAG('C','C','H','N')}, /* Usila Chinantec -> Chinantec */ +/*{"cuk", HB_TAG('C','U','K',' ')},*/ /* San Blas Kuna */ + {"cv", HB_TAG('C','H','U',' ')}, /* Chuvash */ + {"cvn", HB_TAG('C','C','H','N')}, /* Valle Nacional Chinantec -> Chinantec */ + {"cwd", HB_TAG('D','C','R',' ')}, /* Woods Cree */ + {"cwd", HB_TAG('T','C','R',' ')}, /* Woods Cree -> TH-Cree */ + {"cy", HB_TAG('W','E','L',' ')}, /* Welsh */ + {"czh", HB_TAG('Z','H','S',' ')}, /* Huizhou Chinese -> Chinese Simplified */ + {"czo", HB_TAG('Z','H','S',' ')}, /* Min Zhong Chinese -> Chinese Simplified */ + {"czt", HB_TAG('Q','I','N',' ')}, /* Zotung Chin -> Chin */ + {"da", HB_TAG('D','A','N',' ')}, /* Danish */ + {"dao", HB_TAG('Q','I','N',' ')}, /* Daai Chin -> Chin */ + {"dap", HB_TAG('N','I','S',' ')}, /* Nisi (India) (retired code) */ +/*{"dar", HB_TAG('D','A','R',' ')},*/ /* Dargwa */ +/*{"dax", HB_TAG('D','A','X',' ')},*/ /* Dayi */ + {"de", HB_TAG('D','E','U',' ')}, /* German */ + {"den", HB_TAG('S','L','A',' ')}, /* Slave (Athapascan) [macrolanguage] -> Slavey */ + {"den", HB_TAG('A','T','H',' ')}, /* Slave (Athapascan) [macrolanguage] -> Athapaskan */ +/*{"dgo", HB_TAG('D','G','O',' ')},*/ /* Dogri */ + {"dgr", HB_TAG('A','T','H',' ')}, /* Dogrib -> Athapaskan */ + {"dhd", HB_TAG('M','A','W',' ')}, /* Dhundari -> Marwari */ +/*{"dhg", HB_TAG('D','H','G',' ')},*/ /* Dhangu */ + {"dib", HB_TAG('D','N','K',' ')}, /* South Central Dinka -> Dinka */ + {"dik", HB_TAG('D','N','K',' ')}, /* Southwestern Dinka -> Dinka */ + {"din", HB_TAG('D','N','K',' ')}, /* Dinka [macrolanguage] */ + {"dip", HB_TAG('D','N','K',' ')}, /* Northeastern Dinka -> Dinka */ +/*{"diq", HB_TAG('D','I','Q',' ')},*/ /* Dimli */ + {"diw", HB_TAG('D','N','K',' ')}, /* Northwestern Dinka -> Dinka */ + {"dje", HB_TAG('D','J','R',' ')}, /* Zarma */ + {"djr", HB_TAG('D','J','R','0')}, /* Djambarrpuyngu */ + {"dks", HB_TAG('D','N','K',' ')}, /* Southeastern Dinka -> Dinka */ + {"dng", HB_TAG('D','U','N',' ')}, /* Dungan */ +/*{"dnj", HB_TAG('D','N','J',' ')},*/ /* Dan */ + {"doi", HB_TAG('D','G','R',' ')}, /* Dogri [macrolanguage] */ + {"drh", HB_TAG('M','N','G',' ')}, /* Darkhat (retired code) -> Mongolian */ + {"drw", HB_TAG('D','R','I',' ')}, /* Darwazi (retired code) -> Dari */ + {"dsb", HB_TAG('L','S','B',' ')}, /* Lower Sorbian */ + {"dty", HB_TAG('N','E','P',' ')}, /* Dotyali -> Nepali */ +/*{"duj", HB_TAG('D','U','J',' ')},*/ /* Dhuwal (retired code) */ + {"dup", HB_TAG('M','L','Y',' ')}, /* Duano -> Malay */ + {"dv", HB_TAG('D','I','V',' ')}, /* Divehi (Dhivehi, Maldivian) */ + {"dv", HB_TAG('D','H','V',' ')}, /* Divehi (Dhivehi, Maldivian) (deprecated) */ + {"dwk", HB_TAG('K','U','I',' ')}, /* Dawik Kui -> Kui */ + {"dwu", HB_TAG('D','U','J',' ')}, /* Dhuwal */ + {"dwy", HB_TAG('D','U','J',' ')}, /* Dhuwaya -> Dhuwal */ + {"dyu", HB_TAG('J','U','L',' ')}, /* Dyula -> Jula */ + {"dz", HB_TAG('D','Z','N',' ')}, /* Dzongkha */ + {"ee", HB_TAG('E','W','E',' ')}, /* Ewe */ +/*{"efi", HB_TAG('E','F','I',' ')},*/ /* Efik */ + {"ekk", HB_TAG('E','T','I',' ')}, /* Standard Estonian -> Estonian */ + {"el", HB_TAG('E','L','L',' ')}, /* Modern Greek (1453-) -> Greek */ + {"emk", HB_TAG('E','M','K',' ')}, /* Eastern Maninkakan */ + {"emk", HB_TAG('M','N','K',' ')}, /* Eastern Maninkakan -> Maninka */ + {"en", HB_TAG('E','N','G',' ')}, /* English */ + {"enb", HB_TAG('K','A','L',' ')}, /* Markweeta -> Kalenjin */ + {"enf", HB_TAG('F','N','E',' ')}, /* Forest Enets -> Forest Nenets */ + {"enh", HB_TAG('T','N','E',' ')}, /* Tundra Enets -> Tundra Nenets */ + {"eo", HB_TAG('N','T','O',' ')}, /* Esperanto */ + {"es", HB_TAG('E','S','P',' ')}, /* Spanish */ + {"esg", HB_TAG('G','O','N',' ')}, /* Aheri Gondi -> Gondi */ + {"esi", HB_TAG('I','P','K',' ')}, /* North Alaskan Inupiatun -> Inupiat */ + {"esk", HB_TAG('I','P','K',' ')}, /* Northwest Alaska Inupiatun -> Inupiat */ +/*{"esu", HB_TAG('E','S','U',' ')},*/ /* Central Yupik */ + {"et", HB_TAG('E','T','I',' ')}, /* Estonian [macrolanguage] */ + {"eto", HB_TAG('B','T','I',' ')}, /* Eton (Cameroon) -> Beti */ + {"eu", HB_TAG('E','U','Q',' ')}, /* Basque */ + {"eve", HB_TAG('E','V','N',' ')}, /* Even */ + {"evn", HB_TAG('E','V','K',' ')}, /* Evenki */ + {"ewo", HB_TAG('B','T','I',' ')}, /* Ewondo -> Beti */ + {"eyo", HB_TAG('K','A','L',' ')}, /* Keiyo -> Kalenjin */ + {"fa", HB_TAG('F','A','R',' ')}, /* Persian [macrolanguage] */ + {"fan", HB_TAG('F','A','N','0')}, /* Fang (Equatorial Guinea) */ +/*{"fat", HB_TAG('F','A','T',' ')},*/ /* Fanti */ + {"fbl", HB_TAG('B','I','K',' ')}, /* West Albay Bikol -> Bikol */ + {"ff", HB_TAG('F','U','L',' ')}, /* Fulah [macrolanguage] */ + {"ffm", HB_TAG('F','U','L',' ')}, /* Maasina Fulfulde -> Fulah */ + {"fi", HB_TAG('F','I','N',' ')}, /* Finnish */ + {"fil", HB_TAG('P','I','L',' ')}, /* Filipino */ + {"fj", HB_TAG('F','J','I',' ')}, /* Fijian */ + {"flm", HB_TAG('H','A','L',' ')}, /* Halam (Falam Chin) (retired code) */ + {"flm", HB_TAG('Q','I','N',' ')}, /* Falam Chin (retired code) -> Chin */ +/*{"fmp", HB_TAG('F','M','P',' ')},*/ /* Fe’fe’ */ + {"fo", HB_TAG('F','O','S',' ')}, /* Faroese */ +/*{"fon", HB_TAG('F','O','N',' ')},*/ /* Fon */ + {"fr", HB_TAG('F','R','A',' ')}, /* French */ +/*{"frc", HB_TAG('F','R','C',' ')},*/ /* Cajun French */ +/*{"frp", HB_TAG('F','R','P',' ')},*/ /* Arpitan */ + {"fub", HB_TAG('F','U','L',' ')}, /* Adamawa Fulfulde -> Fulah */ + {"fuc", HB_TAG('F','U','L',' ')}, /* Pulaar -> Fulah */ + {"fue", HB_TAG('F','U','L',' ')}, /* Borgu Fulfulde -> Fulah */ + {"fuf", HB_TAG('F','T','A',' ')}, /* Pular -> Futa */ + {"fuh", HB_TAG('F','U','L',' ')}, /* Western Niger Fulfulde -> Fulah */ + {"fui", HB_TAG('F','U','L',' ')}, /* Bagirmi Fulfulde -> Fulah */ + {"fuq", HB_TAG('F','U','L',' ')}, /* Central-Eastern Niger Fulfulde -> Fulah */ + {"fur", HB_TAG('F','R','L',' ')}, /* Friulian */ +/*{"fuv", HB_TAG('F','U','V',' ')},*/ /* Nigerian Fulfulde */ + {"fy", HB_TAG('F','R','I',' ')}, /* Western Frisian -> Frisian */ + {"ga", HB_TAG('I','R','I',' ')}, /* Irish */ + {"gaa", HB_TAG('G','A','D',' ')}, /* Ga */ +/*{"gag", HB_TAG('G','A','G',' ')},*/ /* Gagauz */ + {"gan", HB_TAG('Z','H','S',' ')}, /* Gan Chinese -> Chinese Simplified */ + {"gax", HB_TAG('O','R','O',' ')}, /* Borana-Arsi-Guji Oromo -> Oromo */ + {"gaz", HB_TAG('O','R','O',' ')}, /* West Central Oromo -> Oromo */ + {"gbm", HB_TAG('G','A','W',' ')}, /* Garhwali */ + {"gce", HB_TAG('A','T','H',' ')}, /* Galice -> Athapaskan */ + {"gd", HB_TAG('G','A','E',' ')}, /* Scottish Gaelic (Gaelic) */ + {"gda", HB_TAG('R','A','J',' ')}, /* Gade Lohar -> Rajasthani */ +/*{"gez", HB_TAG('G','E','Z',' ')},*/ /* Geez */ + {"ggo", HB_TAG('G','O','N',' ')}, /* Southern Gondi (retired code) -> Gondi */ +/*{"gih", HB_TAG('G','I','H',' ')},*/ /* Githabul */ + {"gil", HB_TAG('G','I','L','0')}, /* Kiribati (Gilbertese) */ + {"gju", HB_TAG('R','A','J',' ')}, /* Gujari -> Rajasthani */ +/*{"gkp", HB_TAG('G','K','P',' ')},*/ /* Guinea Kpelle -> Kpelle (Guinea) */ + {"gl", HB_TAG('G','A','L',' ')}, /* Galician */ + {"gld", HB_TAG('N','A','N',' ')}, /* Nanai */ +/*{"glk", HB_TAG('G','L','K',' ')},*/ /* Gilaki */ + {"gn", HB_TAG('G','U','A',' ')}, /* Guarani [macrolanguage] */ +/*{"gnn", HB_TAG('G','N','N',' ')},*/ /* Gumatj */ + {"gno", HB_TAG('G','O','N',' ')}, /* Northern Gondi -> Gondi */ + {"gnw", HB_TAG('G','U','A',' ')}, /* Western Bolivian Guaraní -> Guarani */ +/*{"gog", HB_TAG('G','O','G',' ')},*/ /* Gogo */ + {"gom", HB_TAG('K','O','K',' ')}, /* Goan Konkani -> Konkani */ +/*{"gon", HB_TAG('G','O','N',' ')},*/ /* Gondi [macrolanguage] */ + {"grt", HB_TAG('G','R','O',' ')}, /* Garo */ + {"gru", HB_TAG('S','O','G',' ')}, /* Kistane -> Sodo Gurage */ + {"gsw", HB_TAG('A','L','S',' ')}, /* Alsatian */ + {"gu", HB_TAG('G','U','J',' ')}, /* Gujarati */ +/*{"guc", HB_TAG('G','U','C',' ')},*/ /* Wayuu */ +/*{"guf", HB_TAG('G','U','F',' ')},*/ /* Gupapuyngu */ + {"gug", HB_TAG('G','U','A',' ')}, /* Paraguayan Guaraní -> Guarani */ + {"gui", HB_TAG('G','U','A',' ')}, /* Eastern Bolivian Guaraní -> Guarani */ + {"guk", HB_TAG('G','M','Z',' ')}, /* Gumuz */ + {"guk", HB_TAG('G','U','K',' ')}, /* Gumuz (SIL fonts) */ + {"gun", HB_TAG('G','U','A',' ')}, /* Mbyá Guaraní -> Guarani */ +/*{"guz", HB_TAG('G','U','Z',' ')},*/ /* Gusii */ + {"gv", HB_TAG('M','N','X',' ')}, /* Manx */ + {"gwi", HB_TAG('A','T','H',' ')}, /* Gwichʼin -> Athapaskan */ + {"ha", HB_TAG('H','A','U',' ')}, /* Hausa */ + {"haa", HB_TAG('A','T','H',' ')}, /* Han -> Athapaskan */ + {"hae", HB_TAG('O','R','O',' ')}, /* Eastern Oromo -> Oromo */ + {"hak", HB_TAG('Z','H','S',' ')}, /* Hakka Chinese -> Chinese Simplified */ + {"har", HB_TAG('H','R','I',' ')}, /* Harari */ +/*{"haw", HB_TAG('H','A','W',' ')},*/ /* Hawaiian */ +/*{"hay", HB_TAG('H','A','Y',' ')},*/ /* Haya */ +/*{"haz", HB_TAG('H','A','Z',' ')},*/ /* Hazaragi */ + {"he", HB_TAG('I','W','R',' ')}, /* Hebrew */ + {"hea", HB_TAG('H','M','N',' ')}, /* Northern Qiandong Miao -> Hmong */ + {"hi", HB_TAG('H','I','N',' ')}, /* Hindi */ +/*{"hil", HB_TAG('H','I','L',' ')},*/ /* Hiligaynon */ + {"hji", HB_TAG('M','L','Y',' ')}, /* Haji -> Malay */ + {"hlt", HB_TAG('Q','I','N',' ')}, /* Matu Chin -> Chin */ + {"hma", HB_TAG('H','M','N',' ')}, /* Southern Mashan Hmong -> Hmong */ + {"hmc", HB_TAG('H','M','N',' ')}, /* Central Huishui Hmong -> Hmong */ + {"hmd", HB_TAG('H','M','N',' ')}, /* Large Flowery Miao -> Hmong */ + {"hme", HB_TAG('H','M','N',' ')}, /* Eastern Huishui Hmong -> Hmong */ + {"hmg", HB_TAG('H','M','N',' ')}, /* Southwestern Guiyang Hmong -> Hmong */ + {"hmh", HB_TAG('H','M','N',' ')}, /* Southwestern Huishui Hmong -> Hmong */ + {"hmi", HB_TAG('H','M','N',' ')}, /* Northern Huishui Hmong -> Hmong */ + {"hmj", HB_TAG('H','M','N',' ')}, /* Ge -> Hmong */ + {"hml", HB_TAG('H','M','N',' ')}, /* Luopohe Hmong -> Hmong */ + {"hmm", HB_TAG('H','M','N',' ')}, /* Central Mashan Hmong -> Hmong */ +/*{"hmn", HB_TAG('H','M','N',' ')},*/ /* Hmong [macrolanguage] */ + {"hmp", HB_TAG('H','M','N',' ')}, /* Northern Mashan Hmong -> Hmong */ + {"hmq", HB_TAG('H','M','N',' ')}, /* Eastern Qiandong Miao -> Hmong */ + {"hms", HB_TAG('H','M','N',' ')}, /* Southern Qiandong Miao -> Hmong */ + {"hmw", HB_TAG('H','M','N',' ')}, /* Western Mashan Hmong -> Hmong */ + {"hmy", HB_TAG('H','M','N',' ')}, /* Southern Guiyang Hmong -> Hmong */ + {"hmz", HB_TAG('H','M','N',' ')}, /* Hmong Shua -> Hmong */ +/*{"hnd", HB_TAG('H','N','D',' ')},*/ /* Southern Hindko -> Hindko */ + {"hne", HB_TAG('C','H','H',' ')}, /* Chhattisgarhi -> Chattisgarhi */ + {"hnj", HB_TAG('H','M','N',' ')}, /* Hmong Njua -> Hmong */ + {"hno", HB_TAG('H','N','D',' ')}, /* Northern Hindko -> Hindko */ + {"ho", HB_TAG('H','M','O',' ')}, /* Hiri Motu */ + {"hoc", HB_TAG('H','O',' ',' ')}, /* Ho */ + {"hoi", HB_TAG('A','T','H',' ')}, /* Holikachuk -> Athapaskan */ + {"hoj", HB_TAG('H','A','R',' ')}, /* Hadothi -> Harauti */ + {"hr", HB_TAG('H','R','V',' ')}, /* Croatian */ + {"hrm", HB_TAG('H','M','N',' ')}, /* Horned Miao -> Hmong */ + {"hsb", HB_TAG('U','S','B',' ')}, /* Upper Sorbian */ + {"hsn", HB_TAG('Z','H','S',' ')}, /* Xiang Chinese -> Chinese Simplified */ + {"ht", HB_TAG('H','A','I',' ')}, /* Haitian (Haitian Creole) */ + {"hu", HB_TAG('H','U','N',' ')}, /* Hungarian */ + {"huj", HB_TAG('H','M','N',' ')}, /* Northern Guiyang Hmong -> Hmong */ + {"hup", HB_TAG('A','T','H',' ')}, /* Hupa -> Athapaskan */ + {"hy", HB_TAG('H','Y','E','0')}, /* Armenian -> Armenian East */ + {"hy", HB_TAG('H','Y','E',' ')}, /* Armenian */ + {"hyw", HB_TAG('H','Y','E',' ')}, /* Western Armenian -> Armenian */ + {"hz", HB_TAG('H','E','R',' ')}, /* Herero */ + {"ia", HB_TAG('I','N','A',' ')}, /* Interlingua (International Auxiliary Language Association) */ +/*{"iba", HB_TAG('I','B','A',' ')},*/ /* Iban */ +/*{"ibb", HB_TAG('I','B','B',' ')},*/ /* Ibibio */ + {"id", HB_TAG('I','N','D',' ')}, /* Indonesian */ + {"ida", HB_TAG('L','U','H',' ')}, /* Idakho-Isukha-Tiriki -> Luyia */ + {"ie", HB_TAG('I','L','E',' ')}, /* Interlingue */ + {"ig", HB_TAG('I','B','O',' ')}, /* Igbo */ + {"igb", HB_TAG('E','B','I',' ')}, /* Ebira */ + {"ii", HB_TAG('Y','I','M',' ')}, /* Sichuan Yi -> Yi Modern */ + {"ijc", HB_TAG('I','J','O',' ')}, /* Izon -> Ijo */ +/*{"ijo", HB_TAG('I','J','O',' ')},*/ /* Ijo [family] */ + {"ik", HB_TAG('I','P','K',' ')}, /* Inupiaq [macrolanguage] -> Inupiat */ + {"ike", HB_TAG('I','N','U',' ')}, /* Eastern Canadian Inuktitut -> Inuktitut */ + {"ikt", HB_TAG('I','N','U',' ')}, /* Inuinnaqtun -> Inuktitut */ +/*{"ilo", HB_TAG('I','L','O',' ')},*/ /* Iloko -> Ilokano */ + {"in", HB_TAG('I','N','D',' ')}, /* Indonesian (retired code) */ + {"ing", HB_TAG('A','T','H',' ')}, /* Degexit'an -> Athapaskan */ + {"inh", HB_TAG('I','N','G',' ')}, /* Ingush */ + {"io", HB_TAG('I','D','O',' ')}, /* Ido */ + {"is", HB_TAG('I','S','L',' ')}, /* Icelandic */ + {"it", HB_TAG('I','T','A',' ')}, /* Italian */ + {"iu", HB_TAG('I','N','U',' ')}, /* Inuktitut [macrolanguage] */ + {"iw", HB_TAG('I','W','R',' ')}, /* Hebrew (retired code) */ + {"ja", HB_TAG('J','A','N',' ')}, /* Japanese */ + {"jak", HB_TAG('M','L','Y',' ')}, /* Jakun -> Malay */ +/*{"jam", HB_TAG('J','A','M',' ')},*/ /* Jamaican Creole English -> Jamaican Creole */ + {"jax", HB_TAG('M','L','Y',' ')}, /* Jambi Malay -> Malay */ +/*{"jbo", HB_TAG('J','B','O',' ')},*/ /* Lojban */ +/*{"jct", HB_TAG('J','C','T',' ')},*/ /* Krymchak */ + {"ji", HB_TAG('J','I','I',' ')}, /* Yiddish (retired code) */ + {"jv", HB_TAG('J','A','V',' ')}, /* Javanese */ + {"jw", HB_TAG('J','A','V',' ')}, /* Javanese (retired code) */ + {"ka", HB_TAG('K','A','T',' ')}, /* Georgian */ + {"kaa", HB_TAG('K','R','K',' ')}, /* Karakalpak */ + {"kab", HB_TAG('K','A','B','0')}, /* Kabyle */ + {"kam", HB_TAG('K','M','B',' ')}, /* Kamba (Kenya) */ + {"kar", HB_TAG('K','R','N',' ')}, /* Karen [family] */ + {"kbd", HB_TAG('K','A','B',' ')}, /* Kabardian */ + {"kby", HB_TAG('K','N','R',' ')}, /* Manga Kanuri -> Kanuri */ + {"kca", HB_TAG('K','H','K',' ')}, /* Khanty -> Khanty-Kazim */ + {"kca", HB_TAG('K','H','S',' ')}, /* Khanty -> Khanty-Shurishkar */ + {"kca", HB_TAG('K','H','V',' ')}, /* Khanty -> Khanty-Vakhi */ +/*{"kde", HB_TAG('K','D','E',' ')},*/ /* Makonde */ + {"kdr", HB_TAG('K','R','M',' ')}, /* Karaim */ + {"kdt", HB_TAG('K','U','Y',' ')}, /* Kuy */ +/*{"kea", HB_TAG('K','E','A',' ')},*/ /* Kabuverdianu (Crioulo) */ +/*{"kek", HB_TAG('K','E','K',' ')},*/ /* Kekchi */ + {"kex", HB_TAG('K','K','N',' ')}, /* Kukna -> Kokni */ + {"kfa", HB_TAG('K','O','D',' ')}, /* Kodava -> Kodagu */ + {"kfr", HB_TAG('K','A','C',' ')}, /* Kachhi -> Kachchi */ + {"kfx", HB_TAG('K','U','L',' ')}, /* Kullu Pahari -> Kulvi */ + {"kfy", HB_TAG('K','M','N',' ')}, /* Kumaoni */ + {"kg", HB_TAG('K','O','N','0')}, /* Kongo [macrolanguage] */ + {"kha", HB_TAG('K','S','I',' ')}, /* Khasi */ + {"khb", HB_TAG('X','B','D',' ')}, /* Lü */ + {"khk", HB_TAG('M','N','G',' ')}, /* Halh Mongolian -> Mongolian */ + {"kht", HB_TAG('K','H','N',' ')}, /* Khamti -> Khamti Shan (Microsoft fonts) */ + {"kht", HB_TAG('K','H','T',' ')}, /* Khamti -> Khamti Shan (OpenType spec and SIL fonts) */ +/*{"khw", HB_TAG('K','H','W',' ')},*/ /* Khowar */ + {"ki", HB_TAG('K','I','K',' ')}, /* Kikuyu (Gikuyu) */ +/*{"kiu", HB_TAG('K','I','U',' ')},*/ /* Kirmanjki */ + {"kj", HB_TAG('K','U','A',' ')}, /* Kuanyama */ +/*{"kjd", HB_TAG('K','J','D',' ')},*/ /* Southern Kiwai */ + {"kjh", HB_TAG('K','H','A',' ')}, /* Khakas -> Khakass */ +/*{"kjp", HB_TAG('K','J','P',' ')},*/ /* Pwo Eastern Karen -> Eastern Pwo Karen */ +/*{"kjz", HB_TAG('K','J','Z',' ')},*/ /* Bumthangkha */ + {"kk", HB_TAG('K','A','Z',' ')}, /* Kazakh */ + {"kkz", HB_TAG('A','T','H',' ')}, /* Kaska -> Athapaskan */ + {"kl", HB_TAG('G','R','N',' ')}, /* Greenlandic */ + {"kln", HB_TAG('K','A','L',' ')}, /* Kalenjin [macrolanguage] */ + {"km", HB_TAG('K','H','M',' ')}, /* Khmer */ + {"kmb", HB_TAG('M','B','N',' ')}, /* Kimbundu -> Mbundu */ + {"kmr", HB_TAG('K','U','R',' ')}, /* Northern Kurdish -> Kurdish */ + {"kmw", HB_TAG('K','M','O',' ')}, /* Komo (Democratic Republic of Congo) */ +/*{"kmz", HB_TAG('K','M','Z',' ')},*/ /* Khorasani Turkish -> Khorasani Turkic */ + {"kn", HB_TAG('K','A','N',' ')}, /* Kannada */ + {"knc", HB_TAG('K','N','R',' ')}, /* Central Kanuri -> Kanuri */ + {"kng", HB_TAG('K','O','N','0')}, /* Koongo -> Kongo */ + {"knn", HB_TAG('K','O','K',' ')}, /* Konkani */ + {"ko", HB_TAG('K','O','R',' ')}, /* Korean */ + {"koi", HB_TAG('K','O','P',' ')}, /* Komi-Permyak */ +/*{"kok", HB_TAG('K','O','K',' ')},*/ /* Konkani [macrolanguage] */ +/*{"kos", HB_TAG('K','O','S',' ')},*/ /* Kosraean */ + {"koy", HB_TAG('A','T','H',' ')}, /* Koyukon -> Athapaskan */ + {"kpe", HB_TAG('K','P','L',' ')}, /* Kpelle [macrolanguage] */ + {"kpv", HB_TAG('K','O','Z',' ')}, /* Komi-Zyrian */ + {"kpy", HB_TAG('K','Y','K',' ')}, /* Koryak */ + {"kqs", HB_TAG('K','I','S',' ')}, /* Northern Kissi -> Kisii */ + {"kqy", HB_TAG('K','R','T',' ')}, /* Koorete */ + {"kr", HB_TAG('K','N','R',' ')}, /* Kanuri [macrolanguage] */ + {"krc", HB_TAG('K','A','R',' ')}, /* Karachay-Balkar -> Karachay */ + {"krc", HB_TAG('B','A','L',' ')}, /* Karachay-Balkar -> Balkar */ +/*{"kri", HB_TAG('K','R','I',' ')},*/ /* Krio */ +/*{"krl", HB_TAG('K','R','L',' ')},*/ /* Karelian */ + {"krt", HB_TAG('K','N','R',' ')}, /* Tumari Kanuri -> Kanuri */ + {"kru", HB_TAG('K','U','U',' ')}, /* Kurukh */ + {"ks", HB_TAG('K','S','H',' ')}, /* Kashmiri */ + {"ksh", HB_TAG('K','S','H','0')}, /* Kölsch -> Ripuarian */ + {"kss", HB_TAG('K','I','S',' ')}, /* Southern Kisi -> Kisii */ +/*{"ksw", HB_TAG('K','S','W',' ')},*/ /* S’gaw Karen */ + {"ktb", HB_TAG('K','E','B',' ')}, /* Kambaata -> Kebena */ + {"ktu", HB_TAG('K','O','N',' ')}, /* Kituba (Democratic Republic of Congo) -> Kikongo */ + {"ktw", HB_TAG('A','T','H',' ')}, /* Kato -> Athapaskan */ + {"ku", HB_TAG('K','U','R',' ')}, /* Kurdish [macrolanguage] */ +/*{"kum", HB_TAG('K','U','M',' ')},*/ /* Kumyk */ + {"kuu", HB_TAG('A','T','H',' ')}, /* Upper Kuskokwim -> Athapaskan */ + {"kv", HB_TAG('K','O','M',' ')}, /* Komi [macrolanguage] */ + {"kvb", HB_TAG('M','L','Y',' ')}, /* Kubu -> Malay */ + {"kvr", HB_TAG('M','L','Y',' ')}, /* Kerinci -> Malay */ + {"kw", HB_TAG('C','O','R',' ')}, /* Cornish */ + {"kwy", HB_TAG('K','O','N','0')}, /* San Salvador Kongo -> Kongo */ + {"kxc", HB_TAG('K','M','S',' ')}, /* Konso -> Komso */ + {"kxd", HB_TAG('M','L','Y',' ')}, /* Brunei -> Malay */ + {"kxl", HB_TAG('K','U','U',' ')}, /* Nepali Kurux (retired code) -> Kurukh */ + {"kxu", HB_TAG('K','U','I',' ')}, /* Kui (India) (retired code) */ + {"ky", HB_TAG('K','I','R',' ')}, /* Kirghiz (Kyrgyz) */ +/*{"kyu", HB_TAG('K','Y','U',' ')},*/ /* Western Kayah */ + {"la", HB_TAG('L','A','T',' ')}, /* Latin */ + {"lad", HB_TAG('J','U','D',' ')}, /* Ladino */ + {"lb", HB_TAG('L','T','Z',' ')}, /* Luxembourgish */ + {"lbe", HB_TAG('L','A','K',' ')}, /* Lak */ + {"lbj", HB_TAG('L','D','K',' ')}, /* Ladakhi */ + {"lbl", HB_TAG('B','I','K',' ')}, /* Libon Bikol -> Bikol */ + {"lce", HB_TAG('M','L','Y',' ')}, /* Loncong -> Malay */ + {"lcf", HB_TAG('M','L','Y',' ')}, /* Lubu -> Malay */ + {"ldi", HB_TAG('K','O','N','0')}, /* Laari -> Kongo */ +/*{"lez", HB_TAG('L','E','Z',' ')},*/ /* Lezghian -> Lezgi */ + {"lg", HB_TAG('L','U','G',' ')}, /* Ganda */ + {"li", HB_TAG('L','I','M',' ')}, /* Limburgish */ + {"lif", HB_TAG('L','M','B',' ')}, /* Limbu */ +/*{"lij", HB_TAG('L','I','J',' ')},*/ /* Ligurian */ +/*{"lis", HB_TAG('L','I','S',' ')},*/ /* Lisu */ + {"liw", HB_TAG('M','L','Y',' ')}, /* Col -> Malay */ +/*{"ljp", HB_TAG('L','J','P',' ')},*/ /* Lampung Api -> Lampung */ + {"lkb", HB_TAG('L','U','H',' ')}, /* Kabras -> Luyia */ +/*{"lki", HB_TAG('L','K','I',' ')},*/ /* Laki */ + {"lko", HB_TAG('L','U','H',' ')}, /* Khayo -> Luyia */ + {"lks", HB_TAG('L','U','H',' ')}, /* Kisa -> Luyia */ + {"lld", HB_TAG('L','A','D',' ')}, /* Ladin */ + {"lmn", HB_TAG('L','A','M',' ')}, /* Lambadi -> Lambani */ +/*{"lmo", HB_TAG('L','M','O',' ')},*/ /* Lombard */ + {"ln", HB_TAG('L','I','N',' ')}, /* Lingala */ + {"lo", HB_TAG('L','A','O',' ')}, /* Lao */ +/*{"lom", HB_TAG('L','O','M',' ')},*/ /* Loma (Liberia) */ +/*{"lrc", HB_TAG('L','R','C',' ')},*/ /* Northern Luri -> Luri */ + {"lri", HB_TAG('L','U','H',' ')}, /* Marachi -> Luyia */ + {"lrm", HB_TAG('L','U','H',' ')}, /* Marama -> Luyia */ + {"lsm", HB_TAG('L','U','H',' ')}, /* Saamia -> Luyia */ + {"lt", HB_TAG('L','T','H',' ')}, /* Lithuanian */ + {"ltg", HB_TAG('L','V','I',' ')}, /* Latgalian -> Latvian */ + {"lto", HB_TAG('L','U','H',' ')}, /* Tsotso -> Luyia */ + {"lts", HB_TAG('L','U','H',' ')}, /* Tachoni -> Luyia */ + {"lu", HB_TAG('L','U','B',' ')}, /* Luba-Katanga */ +/*{"lua", HB_TAG('L','U','A',' ')},*/ /* Luba-Lulua */ +/*{"luo", HB_TAG('L','U','O',' ')},*/ /* Luo (Kenya and Tanzania) */ + {"lus", HB_TAG('M','I','Z',' ')}, /* Lushai -> Mizo */ + {"luy", HB_TAG('L','U','H',' ')}, /* Luyia [macrolanguage] */ + {"luz", HB_TAG('L','R','C',' ')}, /* Southern Luri -> Luri */ + {"lv", HB_TAG('L','V','I',' ')}, /* Latvian [macrolanguage] */ + {"lvs", HB_TAG('L','V','I',' ')}, /* Standard Latvian -> Latvian */ + {"lwg", HB_TAG('L','U','H',' ')}, /* Wanga -> Luyia */ + {"lzh", HB_TAG('Z','H','T',' ')}, /* Literary Chinese -> Chinese Traditional */ + {"lzz", HB_TAG('L','A','Z',' ')}, /* Laz */ +/*{"mad", HB_TAG('M','A','D',' ')},*/ /* Madurese -> Madura */ +/*{"mag", HB_TAG('M','A','G',' ')},*/ /* Magahi */ + {"mai", HB_TAG('M','T','H',' ')}, /* Maithili */ + {"mak", HB_TAG('M','K','R',' ')}, /* Makasar */ +/*{"mam", HB_TAG('M','A','M',' ')},*/ /* Mam */ + {"man", HB_TAG('M','N','K',' ')}, /* Mandingo [macrolanguage] -> Maninka */ + {"max", HB_TAG('M','L','Y',' ')}, /* North Moluccan Malay -> Malay */ +/*{"mbo", HB_TAG('M','B','O',' ')},*/ /* Mbo (Cameroon) */ + {"mct", HB_TAG('B','T','I',' ')}, /* Mengisa -> Beti */ + {"mdf", HB_TAG('M','O','K',' ')}, /* Moksha */ +/*{"mdr", HB_TAG('M','D','R',' ')},*/ /* Mandar */ + {"mdy", HB_TAG('M','L','E',' ')}, /* Male (Ethiopia) */ + {"men", HB_TAG('M','D','E',' ')}, /* Mende (Sierra Leone) */ + {"meo", HB_TAG('M','L','Y',' ')}, /* Kedah Malay -> Malay */ +/*{"mer", HB_TAG('M','E','R',' ')},*/ /* Meru */ +/*{"mfa", HB_TAG('M','F','A',' ')},*/ /* Pattani Malay */ + {"mfb", HB_TAG('M','L','Y',' ')}, /* Bangka -> Malay */ +/*{"mfe", HB_TAG('M','F','E',' ')},*/ /* Morisyen */ + {"mg", HB_TAG('M','L','G',' ')}, /* Malagasy [macrolanguage] */ + {"mh", HB_TAG('M','A','H',' ')}, /* Marshallese */ + {"mhr", HB_TAG('L','M','A',' ')}, /* Eastern Mari -> Low Mari */ + {"mhv", HB_TAG('A','R','K',' ')}, /* Arakanese (retired code) -> Rakhine */ + {"mi", HB_TAG('M','R','I',' ')}, /* Maori */ +/*{"min", HB_TAG('M','I','N',' ')},*/ /* Minangkabau */ + {"mk", HB_TAG('M','K','D',' ')}, /* Macedonian */ + {"mku", HB_TAG('M','N','K',' ')}, /* Konyanka Maninka -> Maninka */ +/*{"mkw", HB_TAG('M','K','W',' ')},*/ /* Kituba (Congo) */ + {"ml", HB_TAG('M','A','L',' ')}, /* Malayalam -> Malayalam Traditional */ + {"ml", HB_TAG('M','L','R',' ')}, /* Malayalam -> Malayalam Reformed */ + {"mlq", HB_TAG('M','L','N',' ')}, /* Western Maninkakan -> Malinke */ + {"mlq", HB_TAG('M','N','K',' ')}, /* Western Maninkakan -> Maninka */ + {"mmr", HB_TAG('H','M','N',' ')}, /* Western Xiangxi Miao -> Hmong */ + {"mn", HB_TAG('M','N','G',' ')}, /* Mongolian [macrolanguage] */ + {"mnc", HB_TAG('M','C','H',' ')}, /* Manchu */ +/*{"mni", HB_TAG('M','N','I',' ')},*/ /* Manipuri */ + {"mnk", HB_TAG('M','N','D',' ')}, /* Mandinka */ + {"mnk", HB_TAG('M','N','K',' ')}, /* Mandinka -> Maninka */ + {"mnp", HB_TAG('Z','H','S',' ')}, /* Min Bei Chinese -> Chinese Simplified */ + {"mns", HB_TAG('M','A','N',' ')}, /* Mansi */ + {"mnw", HB_TAG('M','O','N',' ')}, /* Mon */ + {"mo", HB_TAG('M','O','L',' ')}, /* Moldavian (retired code) */ +/*{"moh", HB_TAG('M','O','H',' ')},*/ /* Mohawk */ +/*{"mos", HB_TAG('M','O','S',' ')},*/ /* Mossi */ + {"mpe", HB_TAG('M','A','J',' ')}, /* Majang */ + {"mqg", HB_TAG('M','L','Y',' ')}, /* Kota Bangun Kutai Malay -> Malay */ + {"mr", HB_TAG('M','A','R',' ')}, /* Marathi */ + {"mrh", HB_TAG('Q','I','N',' ')}, /* Mara Chin -> Chin */ + {"mrj", HB_TAG('H','M','A',' ')}, /* Western Mari -> High Mari */ + {"ms", HB_TAG('M','L','Y',' ')}, /* Malay [macrolanguage] */ + {"msc", HB_TAG('M','N','K',' ')}, /* Sankaran Maninka -> Maninka */ + {"msh", HB_TAG('M','L','G',' ')}, /* Masikoro Malagasy -> Malagasy */ + {"msi", HB_TAG('M','L','Y',' ')}, /* Sabah Malay -> Malay */ + {"mt", HB_TAG('M','T','S',' ')}, /* Maltese */ + {"mtr", HB_TAG('M','A','W',' ')}, /* Mewari -> Marwari */ + {"mui", HB_TAG('M','L','Y',' ')}, /* Musi -> Malay */ + {"mup", HB_TAG('R','A','J',' ')}, /* Malvi -> Rajasthani */ + {"muq", HB_TAG('H','M','N',' ')}, /* Eastern Xiangxi Miao -> Hmong */ +/*{"mus", HB_TAG('M','U','S',' ')},*/ /* Creek -> Muscogee */ + {"mvb", HB_TAG('A','T','H',' ')}, /* Mattole -> Athapaskan */ + {"mve", HB_TAG('M','A','W',' ')}, /* Marwari (Pakistan) */ + {"mvf", HB_TAG('M','N','G',' ')}, /* Peripheral Mongolian -> Mongolian */ + {"mwk", HB_TAG('M','N','K',' ')}, /* Kita Maninkakan -> Maninka */ +/*{"mwl", HB_TAG('M','W','L',' ')},*/ /* Mirandese */ + {"mwr", HB_TAG('M','A','W',' ')}, /* Marwari [macrolanguage] */ +/*{"mww", HB_TAG('M','W','W',' ')},*/ /* Hmong Daw */ + {"my", HB_TAG('B','R','M',' ')}, /* Burmese */ + {"mym", HB_TAG('M','E','N',' ')}, /* Me’en */ +/*{"myn", HB_TAG('M','Y','N',' ')},*/ /* Mayan [family] */ + {"myq", HB_TAG('M','N','K',' ')}, /* Forest Maninka (retired code) -> Maninka */ + {"myv", HB_TAG('E','R','Z',' ')}, /* Erzya */ +/*{"mzn", HB_TAG('M','Z','N',' ')},*/ /* Mazanderani */ + {"na", HB_TAG('N','A','U',' ')}, /* Nauru -> Nauruan */ +/*{"nag", HB_TAG('N','A','G',' ')},*/ /* Naga Pidgin -> Naga-Assamese */ +/*{"nah", HB_TAG('N','A','H',' ')},*/ /* Nahuatl [family] */ + {"nan", HB_TAG('Z','H','S',' ')}, /* Min Nan Chinese -> Chinese Simplified */ +/*{"nap", HB_TAG('N','A','P',' ')},*/ /* Neapolitan */ + {"nb", HB_TAG('N','O','R',' ')}, /* Norwegian Bokmål -> Norwegian */ + {"nd", HB_TAG('N','D','B',' ')}, /* North Ndebele -> Ndebele */ +/*{"ndc", HB_TAG('N','D','C',' ')},*/ /* Ndau */ +/*{"nds", HB_TAG('N','D','S',' ')},*/ /* Low Saxon */ + {"ne", HB_TAG('N','E','P',' ')}, /* Nepali [macrolanguage] */ +/*{"new", HB_TAG('N','E','W',' ')},*/ /* Newari */ + {"ng", HB_TAG('N','D','G',' ')}, /* Ndonga */ +/*{"nga", HB_TAG('N','G','A',' ')},*/ /* Ngbaka */ + {"ngl", HB_TAG('L','M','W',' ')}, /* Lomwe */ + {"ngo", HB_TAG('S','X','T',' ')}, /* Ngoni -> Sutu */ + {"nhd", HB_TAG('G','U','A',' ')}, /* Chiripá -> Guarani */ + {"niq", HB_TAG('K','A','L',' ')}, /* Nandi -> Kalenjin */ +/*{"niu", HB_TAG('N','I','U',' ')},*/ /* Niuean */ + {"niv", HB_TAG('G','I','L',' ')}, /* Gilyak */ + {"njz", HB_TAG('N','I','S',' ')}, /* Nyishi -> Nisi */ + {"nl", HB_TAG('N','L','D',' ')}, /* Dutch */ + {"nle", HB_TAG('L','U','H',' ')}, /* East Nyala -> Luyia */ + {"nn", HB_TAG('N','Y','N',' ')}, /* Norwegian Nynorsk (Nynorsk, Norwegian) */ + {"no", HB_TAG('N','O','R',' ')}, /* Norwegian [macrolanguage] */ + {"nod", HB_TAG('N','T','A',' ')}, /* Northern Thai -> Northern Tai */ +/*{"noe", HB_TAG('N','O','E',' ')},*/ /* Nimadi */ +/*{"nog", HB_TAG('N','O','G',' ')},*/ /* Nogai */ +/*{"nov", HB_TAG('N','O','V',' ')},*/ /* Novial */ + {"npi", HB_TAG('N','E','P',' ')}, /* Nepali */ + {"nqo", HB_TAG('N','K','O',' ')}, /* N’Ko */ + {"nr", HB_TAG('N','D','B',' ')}, /* South Ndebele -> Ndebele */ + {"nsk", HB_TAG('N','A','S',' ')}, /* Naskapi */ +/*{"nso", HB_TAG('N','S','O',' ')},*/ /* Pedi -> Sotho, Northern */ + {"nv", HB_TAG('N','A','V',' ')}, /* Navajo */ + {"nv", HB_TAG('A','T','H',' ')}, /* Navajo -> Athapaskan */ + {"ny", HB_TAG('C','H','I',' ')}, /* Chichewa (Chewa, Nyanja) */ + {"nyd", HB_TAG('L','U','H',' ')}, /* Nyore -> Luyia */ +/*{"nym", HB_TAG('N','Y','M',' ')},*/ /* Nyamwezi */ + {"nyn", HB_TAG('N','K','L',' ')}, /* Nyankole */ +/*{"nza", HB_TAG('N','Z','A',' ')},*/ /* Tigon Mbembe -> Mbembe Tigon */ + {"oc", HB_TAG('O','C','I',' ')}, /* Occitan (post 1500) */ + {"oj", HB_TAG('O','J','B',' ')}, /* Ojibwa [macrolanguage] -> Ojibway */ +/*{"ojb", HB_TAG('O','J','B',' ')},*/ /* Northwestern Ojibwa -> Ojibway */ + {"ojc", HB_TAG('O','J','B',' ')}, /* Central Ojibwa -> Ojibway */ + {"ojg", HB_TAG('O','J','B',' ')}, /* Eastern Ojibwa -> Ojibway */ + {"ojs", HB_TAG('O','C','R',' ')}, /* Severn Ojibwa -> Oji-Cree */ + {"ojw", HB_TAG('O','J','B',' ')}, /* Western Ojibwa -> Ojibway */ + {"oki", HB_TAG('K','A','L',' ')}, /* Okiek -> Kalenjin */ + {"okm", HB_TAG('K','O','H',' ')}, /* Middle Korean (10th-16th cent.) -> Korean Old Hangul */ + {"om", HB_TAG('O','R','O',' ')}, /* Oromo [macrolanguage] */ + {"or", HB_TAG('O','R','I',' ')}, /* Odia (formerly Oriya) [macrolanguage] */ + {"orc", HB_TAG('O','R','O',' ')}, /* Orma -> Oromo */ + {"orn", HB_TAG('M','L','Y',' ')}, /* Orang Kanaq -> Malay */ + {"ors", HB_TAG('M','L','Y',' ')}, /* Orang Seletar -> Malay */ + {"ory", HB_TAG('O','R','I',' ')}, /* Odia (formerly Oriya) */ + {"os", HB_TAG('O','S','S',' ')}, /* Ossetian */ + {"otw", HB_TAG('O','J','B',' ')}, /* Ottawa -> Ojibway */ + {"pa", HB_TAG('P','A','N',' ')}, /* Punjabi */ +/*{"pag", HB_TAG('P','A','G',' ')},*/ /* Pangasinan */ +/*{"pam", HB_TAG('P','A','M',' ')},*/ /* Pampanga -> Pampangan */ + {"pap", HB_TAG('P','A','P','0')}, /* Papiamento -> Papiamentu */ +/*{"pau", HB_TAG('P','A','U',' ')},*/ /* Palauan */ + {"pbt", HB_TAG('P','A','S',' ')}, /* Southern Pashto -> Pashto */ + {"pbu", HB_TAG('P','A','S',' ')}, /* Northern Pashto -> Pashto */ +/*{"pcc", HB_TAG('P','C','C',' ')},*/ /* Bouyei */ +/*{"pcd", HB_TAG('P','C','D',' ')},*/ /* Picard */ + {"pce", HB_TAG('P','L','G',' ')}, /* Ruching Palaung -> Palaung */ + {"pck", HB_TAG('Q','I','N',' ')}, /* Paite Chin -> Chin */ +/*{"pdc", HB_TAG('P','D','C',' ')},*/ /* Pennsylvania German */ + {"pel", HB_TAG('M','L','Y',' ')}, /* Pekal -> Malay */ + {"pes", HB_TAG('F','A','R',' ')}, /* Iranian Persian -> Persian */ + {"pga", HB_TAG('A','R','A',' ')}, /* Sudanese Creole Arabic -> Arabic */ +/*{"phk", HB_TAG('P','H','K',' ')},*/ /* Phake */ + {"pi", HB_TAG('P','A','L',' ')}, /* Pali */ +/*{"pih", HB_TAG('P','I','H',' ')},*/ /* Pitcairn-Norfolk -> Norfolk */ + {"pko", HB_TAG('K','A','L',' ')}, /* Pökoot -> Kalenjin */ + {"pl", HB_TAG('P','L','K',' ')}, /* Polish */ + {"pll", HB_TAG('P','L','G',' ')}, /* Shwe Palaung -> Palaung */ + {"plp", HB_TAG('P','A','P',' ')}, /* Palpa (retired code) */ + {"plt", HB_TAG('M','L','G',' ')}, /* Plateau Malagasy -> Malagasy */ +/*{"pms", HB_TAG('P','M','S',' ')},*/ /* Piemontese */ +/*{"pnb", HB_TAG('P','N','B',' ')},*/ /* Western Panjabi */ +/*{"poh", HB_TAG('P','O','H',' ')},*/ /* Poqomchi' -> Pocomchi */ +/*{"pon", HB_TAG('P','O','N',' ')},*/ /* Pohnpeian */ + {"ppa", HB_TAG('B','A','G',' ')}, /* Pao (retired code) -> Baghelkhandi */ +/*{"pro", HB_TAG('P','R','O',' ')},*/ /* Old Provençal (to 1500) -> Provençal / Old Provençal */ + {"prs", HB_TAG('D','R','I',' ')}, /* Dari */ + {"ps", HB_TAG('P','A','S',' ')}, /* Pashto [macrolanguage] */ + {"pse", HB_TAG('M','L','Y',' ')}, /* Central Malay -> Malay */ + {"pst", HB_TAG('P','A','S',' ')}, /* Central Pashto -> Pashto */ + {"pt", HB_TAG('P','T','G',' ')}, /* Portuguese */ +/*{"pwo", HB_TAG('P','W','O',' ')},*/ /* Pwo Western Karen -> Western Pwo Karen */ + {"qu", HB_TAG('Q','U','Z',' ')}, /* Quechua [macrolanguage] */ + {"qub", HB_TAG('Q','W','H',' ')}, /* Huallaga Huánuco Quechua -> Quechua (Peru) */ +/*{"quc", HB_TAG('Q','U','C',' ')},*/ /* K’iche’ */ + {"qud", HB_TAG('Q','V','I',' ')}, /* Calderón Highland Quichua -> Quechua (Ecuador) */ + {"quf", HB_TAG('Q','U','Z',' ')}, /* Lambayeque Quechua -> Quechua */ + {"qug", HB_TAG('Q','V','I',' ')}, /* Chimborazo Highland Quichua -> Quechua (Ecuador) */ +/*{"quh", HB_TAG('Q','U','H',' ')},*/ /* South Bolivian Quechua -> Quechua (Bolivia) */ + {"quk", HB_TAG('Q','U','Z',' ')}, /* Chachapoyas Quechua -> Quechua */ + {"qul", HB_TAG('Q','U','Z',' ')}, /* North Bolivian Quechua -> Quechua */ + {"qup", HB_TAG('Q','V','I',' ')}, /* Southern Pastaza Quechua -> Quechua (Ecuador) */ + {"qur", HB_TAG('Q','W','H',' ')}, /* Yanahuanca Pasco Quechua -> Quechua (Peru) */ + {"qus", HB_TAG('Q','U','H',' ')}, /* Santiago del Estero Quichua -> Quechua (Bolivia) */ + {"quw", HB_TAG('Q','V','I',' ')}, /* Tena Lowland Quichua -> Quechua (Ecuador) */ + {"qux", HB_TAG('Q','W','H',' ')}, /* Yauyos Quechua -> Quechua (Peru) */ + {"quy", HB_TAG('Q','U','Z',' ')}, /* Ayacucho Quechua -> Quechua */ +/*{"quz", HB_TAG('Q','U','Z',' ')},*/ /* Cusco Quechua -> Quechua */ + {"qva", HB_TAG('Q','W','H',' ')}, /* Ambo-Pasco Quechua -> Quechua (Peru) */ + {"qvc", HB_TAG('Q','U','Z',' ')}, /* Cajamarca Quechua -> Quechua */ + {"qve", HB_TAG('Q','U','Z',' ')}, /* Eastern Apurímac Quechua -> Quechua */ + {"qvh", HB_TAG('Q','W','H',' ')}, /* Huamalíes-Dos de Mayo Huánuco Quechua -> Quechua (Peru) */ +/*{"qvi", HB_TAG('Q','V','I',' ')},*/ /* Imbabura Highland Quichua -> Quechua (Ecuador) */ + {"qvj", HB_TAG('Q','V','I',' ')}, /* Loja Highland Quichua -> Quechua (Ecuador) */ + {"qvl", HB_TAG('Q','W','H',' ')}, /* Cajatambo North Lima Quechua -> Quechua (Peru) */ + {"qvm", HB_TAG('Q','W','H',' ')}, /* Margos-Yarowilca-Lauricocha Quechua -> Quechua (Peru) */ + {"qvn", HB_TAG('Q','W','H',' ')}, /* North Junín Quechua -> Quechua (Peru) */ + {"qvo", HB_TAG('Q','V','I',' ')}, /* Napo Lowland Quechua -> Quechua (Ecuador) */ + {"qvp", HB_TAG('Q','W','H',' ')}, /* Pacaraos Quechua -> Quechua (Peru) */ + {"qvs", HB_TAG('Q','U','Z',' ')}, /* San Martín Quechua -> Quechua */ + {"qvw", HB_TAG('Q','W','H',' ')}, /* Huaylla Wanca Quechua -> Quechua (Peru) */ + {"qvz", HB_TAG('Q','V','I',' ')}, /* Northern Pastaza Quichua -> Quechua (Ecuador) */ + {"qwa", HB_TAG('Q','W','H',' ')}, /* Corongo Ancash Quechua -> Quechua (Peru) */ + {"qwc", HB_TAG('Q','U','Z',' ')}, /* Classical Quechua -> Quechua */ +/*{"qwh", HB_TAG('Q','W','H',' ')},*/ /* Huaylas Ancash Quechua -> Quechua (Peru) */ + {"qws", HB_TAG('Q','W','H',' ')}, /* Sihuas Ancash Quechua -> Quechua (Peru) */ + {"qxa", HB_TAG('Q','W','H',' ')}, /* Chiquián Ancash Quechua -> Quechua (Peru) */ + {"qxc", HB_TAG('Q','W','H',' ')}, /* Chincha Quechua -> Quechua (Peru) */ + {"qxh", HB_TAG('Q','W','H',' ')}, /* Panao Huánuco Quechua -> Quechua (Peru) */ + {"qxl", HB_TAG('Q','V','I',' ')}, /* Salasaca Highland Quichua -> Quechua (Ecuador) */ + {"qxn", HB_TAG('Q','W','H',' ')}, /* Northern Conchucos Ancash Quechua -> Quechua (Peru) */ + {"qxo", HB_TAG('Q','W','H',' ')}, /* Southern Conchucos Ancash Quechua -> Quechua (Peru) */ + {"qxp", HB_TAG('Q','U','Z',' ')}, /* Puno Quechua -> Quechua */ + {"qxr", HB_TAG('Q','V','I',' ')}, /* Cañar Highland Quichua -> Quechua (Ecuador) */ + {"qxt", HB_TAG('Q','W','H',' ')}, /* Santa Ana de Tusi Pasco Quechua -> Quechua (Peru) */ + {"qxu", HB_TAG('Q','U','Z',' ')}, /* Arequipa-La Unión Quechua -> Quechua */ + {"qxw", HB_TAG('Q','W','H',' ')}, /* Jauja Wanca Quechua -> Quechua (Peru) */ + {"rag", HB_TAG('L','U','H',' ')}, /* Logooli -> Luyia */ +/*{"raj", HB_TAG('R','A','J',' ')},*/ /* Rajasthani [macrolanguage] */ +/*{"rar", HB_TAG('R','A','R',' ')},*/ /* Rarotongan */ + {"rbb", HB_TAG('P','L','G',' ')}, /* Rumai Palaung -> Palaung */ + {"rbl", HB_TAG('B','I','K',' ')}, /* Miraya Bikol -> Bikol */ +/*{"rej", HB_TAG('R','E','J',' ')},*/ /* Rejang */ +/*{"ria", HB_TAG('R','I','A',' ')},*/ /* Riang (India) */ +/*{"rif", HB_TAG('R','I','F',' ')},*/ /* Tarifit */ +/*{"rit", HB_TAG('R','I','T',' ')},*/ /* Ritharrngu -> Ritarungo */ + {"rki", HB_TAG('A','R','K',' ')}, /* Rakhine */ +/*{"rkw", HB_TAG('R','K','W',' ')},*/ /* Arakwal */ + {"rm", HB_TAG('R','M','S',' ')}, /* Romansh */ + {"rmc", HB_TAG('R','O','Y',' ')}, /* Carpathian Romani -> Romany */ + {"rmf", HB_TAG('R','O','Y',' ')}, /* Kalo Finnish Romani -> Romany */ + {"rml", HB_TAG('R','O','Y',' ')}, /* Baltic Romani -> Romany */ + {"rmn", HB_TAG('R','O','Y',' ')}, /* Balkan Romani -> Romany */ + {"rmo", HB_TAG('R','O','Y',' ')}, /* Sinte Romani -> Romany */ + {"rmw", HB_TAG('R','O','Y',' ')}, /* Welsh Romani -> Romany */ +/*{"rmy", HB_TAG('R','M','Y',' ')},*/ /* Vlax Romani */ + {"rmz", HB_TAG('A','R','K',' ')}, /* Marma -> Rakhine */ + {"rn", HB_TAG('R','U','N',' ')}, /* Rundi */ + {"rnl", HB_TAG('H','A','L',' ')}, /* Ranglong -> Halam (Falam Chin) */ + {"ro", HB_TAG('R','O','M',' ')}, /* Romanian */ + {"rom", HB_TAG('R','O','Y',' ')}, /* Romany [macrolanguage] */ +/*{"rtm", HB_TAG('R','T','M',' ')},*/ /* Rotuman */ + {"ru", HB_TAG('R','U','S',' ')}, /* Russian */ + {"rue", HB_TAG('R','S','Y',' ')}, /* Rusyn */ +/*{"rup", HB_TAG('R','U','P',' ')},*/ /* Aromanian */ + {"rw", HB_TAG('R','U','A',' ')}, /* Kinyarwanda */ + {"rwr", HB_TAG('M','A','W',' ')}, /* Marwari (India) */ + {"sa", HB_TAG('S','A','N',' ')}, /* Sanskrit */ + {"sah", HB_TAG('Y','A','K',' ')}, /* Yakut -> Sakha */ + {"sam", HB_TAG('P','A','A',' ')}, /* Samaritan Aramaic -> Palestinian Aramaic */ +/*{"sas", HB_TAG('S','A','S',' ')},*/ /* Sasak */ +/*{"sat", HB_TAG('S','A','T',' ')},*/ /* Santali */ + {"sc", HB_TAG('S','R','D',' ')}, /* Sardinian [macrolanguage] */ + {"sck", HB_TAG('S','A','D',' ')}, /* Sadri */ +/*{"scn", HB_TAG('S','C','N',' ')},*/ /* Sicilian */ +/*{"sco", HB_TAG('S','C','O',' ')},*/ /* Scots */ + {"scs", HB_TAG('S','C','S',' ')}, /* North Slavey */ + {"scs", HB_TAG('S','L','A',' ')}, /* North Slavey -> Slavey */ + {"scs", HB_TAG('A','T','H',' ')}, /* North Slavey -> Athapaskan */ + {"sd", HB_TAG('S','N','D',' ')}, /* Sindhi */ + {"sdc", HB_TAG('S','R','D',' ')}, /* Sassarese Sardinian -> Sardinian */ + {"sdh", HB_TAG('K','U','R',' ')}, /* Southern Kurdish -> Kurdish */ + {"sdn", HB_TAG('S','R','D',' ')}, /* Gallurese Sardinian -> Sardinian */ + {"se", HB_TAG('N','S','M',' ')}, /* Northern Sami */ + {"seh", HB_TAG('S','N','A',' ')}, /* Sena */ + {"sek", HB_TAG('A','T','H',' ')}, /* Sekani -> Athapaskan */ +/*{"sel", HB_TAG('S','E','L',' ')},*/ /* Selkup */ + {"sez", HB_TAG('Q','I','N',' ')}, /* Senthang Chin -> Chin */ + {"sfm", HB_TAG('H','M','N',' ')}, /* Small Flowery Miao -> Hmong */ + {"sg", HB_TAG('S','G','O',' ')}, /* Sango */ +/*{"sga", HB_TAG('S','G','A',' ')},*/ /* Old Irish (to 900) */ + {"sgc", HB_TAG('K','A','L',' ')}, /* Kipsigis -> Kalenjin */ +/*{"sgs", HB_TAG('S','G','S',' ')},*/ /* Samogitian */ + {"sgw", HB_TAG('C','H','G',' ')}, /* Sebat Bet Gurage -> Chaha Gurage */ + {"sgw", HB_TAG('S','G','W',' ')}, /* Sebat Bet Gurage -> Chaha Gurage (SIL fonts) */ +/*{"shi", HB_TAG('S','H','I',' ')},*/ /* Tachelhit */ +/*{"shn", HB_TAG('S','H','N',' ')},*/ /* Shan */ + {"shu", HB_TAG('A','R','A',' ')}, /* Chadian Arabic -> Arabic */ + {"si", HB_TAG('S','N','H',' ')}, /* Sinhala (Sinhalese) */ +/*{"sid", HB_TAG('S','I','D',' ')},*/ /* Sidamo */ + {"sjd", HB_TAG('K','S','M',' ')}, /* Kildin Sami */ + {"sjo", HB_TAG('S','I','B',' ')}, /* Xibe -> Sibe */ + {"sk", HB_TAG('S','K','Y',' ')}, /* Slovak */ + {"skg", HB_TAG('M','L','G',' ')}, /* Sakalava Malagasy -> Malagasy */ + {"skr", HB_TAG('S','R','K',' ')}, /* Saraiki */ + {"sl", HB_TAG('S','L','V',' ')}, /* Slovenian */ + {"sm", HB_TAG('S','M','O',' ')}, /* Samoan */ + {"sma", HB_TAG('S','S','M',' ')}, /* Southern Sami */ + {"smj", HB_TAG('L','S','M',' ')}, /* Lule Sami */ + {"smn", HB_TAG('I','S','M',' ')}, /* Inari Sami */ + {"sms", HB_TAG('S','K','S',' ')}, /* Skolt Sami */ + {"sn", HB_TAG('S','N','A','0')}, /* Shona */ +/*{"snk", HB_TAG('S','N','K',' ')},*/ /* Soninke */ + {"so", HB_TAG('S','M','L',' ')}, /* Somali */ +/*{"sop", HB_TAG('S','O','P',' ')},*/ /* Songe */ + {"spv", HB_TAG('O','R','I',' ')}, /* Sambalpuri -> Odia (formerly Oriya) */ + {"spy", HB_TAG('K','A','L',' ')}, /* Sabaot -> Kalenjin */ + {"sq", HB_TAG('S','Q','I',' ')}, /* Albanian [macrolanguage] */ + {"sr", HB_TAG('S','R','B',' ')}, /* Serbian */ + {"src", HB_TAG('S','R','D',' ')}, /* Logudorese Sardinian -> Sardinian */ + {"sro", HB_TAG('S','R','D',' ')}, /* Campidanese Sardinian -> Sardinian */ +/*{"srr", HB_TAG('S','R','R',' ')},*/ /* Serer */ + {"srs", HB_TAG('A','T','H',' ')}, /* Sarsi -> Athapaskan */ + {"ss", HB_TAG('S','W','Z',' ')}, /* Swati */ + {"ssh", HB_TAG('A','R','A',' ')}, /* Shihhi Arabic -> Arabic */ + {"st", HB_TAG('S','O','T',' ')}, /* Southern Sotho -> Sotho, Southern */ +/*{"stq", HB_TAG('S','T','Q',' ')},*/ /* Saterfriesisch -> Saterland Frisian */ + {"stv", HB_TAG('S','I','G',' ')}, /* Silt'e -> Silte Gurage */ + {"su", HB_TAG('S','U','N',' ')}, /* Sundanese */ +/*{"suk", HB_TAG('S','U','K',' ')},*/ /* Sukuma */ + {"suq", HB_TAG('S','U','R',' ')}, /* Suri */ + {"sv", HB_TAG('S','V','E',' ')}, /* Swedish */ +/*{"sva", HB_TAG('S','V','A',' ')},*/ /* Svan */ + {"sw", HB_TAG('S','W','K',' ')}, /* Swahili [macrolanguage] */ + {"swb", HB_TAG('C','M','R',' ')}, /* Maore Comorian -> Comorian */ + {"swc", HB_TAG('S','W','K',' ')}, /* Congo Swahili -> Swahili */ + {"swh", HB_TAG('S','W','K',' ')}, /* Swahili */ + {"swv", HB_TAG('M','A','W',' ')}, /* Shekhawati -> Marwari */ +/*{"sxu", HB_TAG('S','X','U',' ')},*/ /* Upper Saxon */ + {"syc", HB_TAG('S','Y','R',' ')}, /* Classical Syriac -> Syriac */ +/*{"syl", HB_TAG('S','Y','L',' ')},*/ /* Sylheti */ +/*{"syr", HB_TAG('S','Y','R',' ')},*/ /* Syriac [macrolanguage] */ +/*{"szl", HB_TAG('S','Z','L',' ')},*/ /* Silesian */ + {"ta", HB_TAG('T','A','M',' ')}, /* Tamil */ + {"taa", HB_TAG('A','T','H',' ')}, /* Lower Tanana -> Athapaskan */ +/*{"tab", HB_TAG('T','A','B',' ')},*/ /* Tabassaran -> Tabasaran */ + {"taq", HB_TAG('T','M','H',' ')}, /* Tamasheq -> Tamashek */ + {"tau", HB_TAG('A','T','H',' ')}, /* Upper Tanana -> Athapaskan */ + {"tcb", HB_TAG('A','T','H',' ')}, /* Tanacross -> Athapaskan */ + {"tce", HB_TAG('A','T','H',' ')}, /* Southern Tutchone -> Athapaskan */ + {"tcp", HB_TAG('Q','I','N',' ')}, /* Tawr Chin -> Chin */ + {"tcy", HB_TAG('T','U','L',' ')}, /* Tulu -> Tumbuka */ + {"tcz", HB_TAG('Q','I','N',' ')}, /* Thado Chin -> Chin */ +/*{"tdd", HB_TAG('T','D','D',' ')},*/ /* Tai Nüa -> Dehong Dai */ + {"tdx", HB_TAG('M','L','G',' ')}, /* Tandroy-Mahafaly Malagasy -> Malagasy */ + {"te", HB_TAG('T','E','L',' ')}, /* Telugu */ + {"tec", HB_TAG('K','A','L',' ')}, /* Terik -> Kalenjin */ + {"tem", HB_TAG('T','M','N',' ')}, /* Timne -> Temne */ +/*{"tet", HB_TAG('T','E','T',' ')},*/ /* Tetum */ + {"tfn", HB_TAG('A','T','H',' ')}, /* Tanaina -> Athapaskan */ + {"tg", HB_TAG('T','A','J',' ')}, /* Tajik -> Tajiki */ + {"tgj", HB_TAG('N','I','S',' ')}, /* Tagin -> Nisi */ + {"tgx", HB_TAG('A','T','H',' ')}, /* Tagish -> Athapaskan */ + {"th", HB_TAG('T','H','A',' ')}, /* Thai */ + {"tht", HB_TAG('A','T','H',' ')}, /* Tahltan -> Athapaskan */ + {"thv", HB_TAG('T','M','H',' ')}, /* Tahaggart Tamahaq -> Tamashek */ + {"thz", HB_TAG('T','M','H',' ')}, /* Tayart Tamajeq -> Tamashek */ + {"ti", HB_TAG('T','G','Y',' ')}, /* Tigrinya */ + {"tig", HB_TAG('T','G','R',' ')}, /* Tigre */ +/*{"tiv", HB_TAG('T','I','V',' ')},*/ /* Tiv */ + {"tk", HB_TAG('T','K','M',' ')}, /* Turkmen */ + {"tkg", HB_TAG('M','L','G',' ')}, /* Tesaka Malagasy -> Malagasy */ + {"tl", HB_TAG('T','G','L',' ')}, /* Tagalog */ +/*{"tmh", HB_TAG('T','M','H',' ')},*/ /* Tamashek [macrolanguage] */ + {"tmw", HB_TAG('M','L','Y',' ')}, /* Temuan -> Malay */ + {"tn", HB_TAG('T','N','A',' ')}, /* Tswana */ + {"tnf", HB_TAG('D','R','I',' ')}, /* Tangshewi (retired code) -> Dari */ + {"to", HB_TAG('T','G','N',' ')}, /* Tonga (Tonga Islands) -> Tongan */ + {"tod", HB_TAG('T','O','D','0')}, /* Toma */ + {"toi", HB_TAG('T','N','G',' ')}, /* Tonga (Zambia) */ + {"tol", HB_TAG('A','T','H',' ')}, /* Tolowa -> Athapaskan */ +/*{"tpi", HB_TAG('T','P','I',' ')},*/ /* Tok Pisin */ + {"tr", HB_TAG('T','R','K',' ')}, /* Turkish */ + {"tru", HB_TAG('T','U','A',' ')}, /* Turoyo -> Turoyo Aramaic */ + {"tru", HB_TAG('S','Y','R',' ')}, /* Turoyo -> Syriac */ + {"ts", HB_TAG('T','S','G',' ')}, /* Tsonga */ +/*{"tsj", HB_TAG('T','S','J',' ')},*/ /* Tshangla */ + {"tt", HB_TAG('T','A','T',' ')}, /* Tatar */ + {"ttm", HB_TAG('A','T','H',' ')}, /* Northern Tutchone -> Athapaskan */ + {"ttq", HB_TAG('T','M','H',' ')}, /* Tawallammat Tamajaq -> Tamashek */ +/*{"tum", HB_TAG('T','U','M',' ')},*/ /* Tumbuka -> Tulu */ + {"tuu", HB_TAG('A','T','H',' ')}, /* Tututni -> Athapaskan */ + {"tuy", HB_TAG('K','A','L',' ')}, /* Tugen -> Kalenjin */ +/*{"tvl", HB_TAG('T','V','L',' ')},*/ /* Tuvalu */ + {"tw", HB_TAG('T','W','I',' ')}, /* Twi */ + {"tw", HB_TAG('A','K','A',' ')}, /* Twi -> Akan */ + {"txc", HB_TAG('A','T','H',' ')}, /* Tsetsaut -> Athapaskan */ + {"txy", HB_TAG('M','L','G',' ')}, /* Tanosy Malagasy -> Malagasy */ + {"ty", HB_TAG('T','H','T',' ')}, /* Tahitian */ + {"tyv", HB_TAG('T','U','V',' ')}, /* Tuvinian -> Tuvin */ +/*{"tyz", HB_TAG('T','Y','Z',' ')},*/ /* Tày */ +/*{"tzm", HB_TAG('T','Z','M',' ')},*/ /* Central Atlas Tamazight -> Tamazight */ +/*{"tzo", HB_TAG('T','Z','O',' ')},*/ /* Tzotzil */ + {"ubl", HB_TAG('B','I','K',' ')}, /* Buhi'non Bikol -> Bikol */ +/*{"udm", HB_TAG('U','D','M',' ')},*/ /* Udmurt */ + {"ug", HB_TAG('U','Y','G',' ')}, /* Uyghur */ + {"uk", HB_TAG('U','K','R',' ')}, /* Ukrainian */ + {"uki", HB_TAG('K','U','I',' ')}, /* Kui (India) */ +/*{"umb", HB_TAG('U','M','B',' ')},*/ /* Umbundu */ + {"unr", HB_TAG('M','U','N',' ')}, /* Mundari */ + {"ur", HB_TAG('U','R','D',' ')}, /* Urdu */ + {"urk", HB_TAG('M','L','Y',' ')}, /* Urak Lawoi' -> Malay */ + {"uz", HB_TAG('U','Z','B',' ')}, /* Uzbek [macrolanguage] */ + {"uzn", HB_TAG('U','Z','B',' ')}, /* Northern Uzbek -> Uzbek */ + {"uzs", HB_TAG('U','Z','B',' ')}, /* Southern Uzbek -> Uzbek */ + {"ve", HB_TAG('V','E','N',' ')}, /* Venda */ +/*{"vec", HB_TAG('V','E','C',' ')},*/ /* Venetian */ + {"vi", HB_TAG('V','I','T',' ')}, /* Vietnamese */ + {"vkk", HB_TAG('M','L','Y',' ')}, /* Kaur -> Malay */ + {"vkt", HB_TAG('M','L','Y',' ')}, /* Tenggarong Kutai Malay -> Malay */ + {"vls", HB_TAG('F','L','E',' ')}, /* Vlaams -> Dutch (Flemish) */ + {"vmw", HB_TAG('M','A','K',' ')}, /* Makhuwa */ + {"vo", HB_TAG('V','O','L',' ')}, /* Volapük */ +/*{"vro", HB_TAG('V','R','O',' ')},*/ /* Võro */ + {"wa", HB_TAG('W','L','N',' ')}, /* Walloon */ +/*{"war", HB_TAG('W','A','R',' ')},*/ /* Waray (Philippines) -> Waray-Waray */ + {"wbm", HB_TAG('W','A',' ',' ')}, /* Wa */ + {"wbr", HB_TAG('W','A','G',' ')}, /* Wagdi */ + {"wlc", HB_TAG('C','M','R',' ')}, /* Mwali Comorian -> Comorian */ + {"wle", HB_TAG('S','I','G',' ')}, /* Wolane -> Silte Gurage */ + {"wlk", HB_TAG('A','T','H',' ')}, /* Wailaki -> Athapaskan */ + {"wni", HB_TAG('C','M','R',' ')}, /* Ndzwani Comorian -> Comorian */ + {"wo", HB_TAG('W','L','F',' ')}, /* Wolof */ + {"wry", HB_TAG('M','A','W',' ')}, /* Merwari -> Marwari */ + {"wsg", HB_TAG('G','O','N',' ')}, /* Adilabad Gondi -> Gondi */ +/*{"wtm", HB_TAG('W','T','M',' ')},*/ /* Mewati */ + {"wuu", HB_TAG('Z','H','S',' ')}, /* Wu Chinese -> Chinese Simplified */ + {"xal", HB_TAG('K','L','M',' ')}, /* Kalmyk */ + {"xal", HB_TAG('T','O','D',' ')}, /* Kalmyk -> Todo */ + {"xan", HB_TAG('S','E','K',' ')}, /* Xamtanga -> Sekota */ + {"xh", HB_TAG('X','H','S',' ')}, /* Xhosa */ +/*{"xjb", HB_TAG('X','J','B',' ')},*/ /* Minjungbal -> Minjangbal */ +/*{"xkf", HB_TAG('X','K','F',' ')},*/ /* Khengkha */ + {"xmm", HB_TAG('M','L','Y',' ')}, /* Manado Malay -> Malay */ + {"xmv", HB_TAG('M','L','G',' ')}, /* Antankarana Malagasy -> Malagasy */ + {"xmw", HB_TAG('M','L','G',' ')}, /* Tsimihety Malagasy -> Malagasy */ + {"xnr", HB_TAG('D','G','R',' ')}, /* Kangri -> Dogri */ +/*{"xog", HB_TAG('X','O','G',' ')},*/ /* Soga */ +/*{"xpe", HB_TAG('X','P','E',' ')},*/ /* Liberia Kpelle -> Kpelle (Liberia) */ + {"xsl", HB_TAG('S','S','L',' ')}, /* South Slavey */ + {"xsl", HB_TAG('S','L','A',' ')}, /* South Slavey -> Slavey */ + {"xsl", HB_TAG('A','T','H',' ')}, /* South Slavey -> Athapaskan */ + {"xst", HB_TAG('S','I','G',' ')}, /* Silt'e (retired code) -> Silte Gurage */ + {"xwo", HB_TAG('T','O','D',' ')}, /* Written Oirat -> Todo */ +/*{"yao", HB_TAG('Y','A','O',' ')},*/ /* Yao */ +/*{"yap", HB_TAG('Y','A','P',' ')},*/ /* Yapese */ + {"ybd", HB_TAG('A','R','K',' ')}, /* Yangbye (retired code) -> Rakhine */ + {"ydd", HB_TAG('J','I','I',' ')}, /* Eastern Yiddish -> Yiddish */ + {"yi", HB_TAG('J','I','I',' ')}, /* Yiddish [macrolanguage] */ + {"yih", HB_TAG('J','I','I',' ')}, /* Western Yiddish -> Yiddish */ + {"yo", HB_TAG('Y','B','A',' ')}, /* Yoruba */ + {"yos", HB_TAG('Q','I','N',' ')}, /* Yos (retired code) -> Chin */ + {"yrk", HB_TAG('T','N','E',' ')}, /* Nenets -> Tundra Nenets */ + {"yrk", HB_TAG('F','N','E',' ')}, /* Nenets -> Forest Nenets */ + {"yue", HB_TAG('Z','H','H',' ')}, /* Yue Chinese -> Chinese, Hong Kong SAR */ + {"za", HB_TAG('Z','H','A',' ')}, /* Zhuang [macrolanguage] */ + {"zch", HB_TAG('Z','H','A',' ')}, /* Central Hongshuihe Zhuang -> Zhuang */ + {"zdj", HB_TAG('C','M','R',' ')}, /* Ngazidja Comorian -> Comorian */ +/*{"zea", HB_TAG('Z','E','A',' ')},*/ /* Zeeuws -> Zealandic */ + {"zeh", HB_TAG('Z','H','A',' ')}, /* Eastern Hongshuihe Zhuang -> Zhuang */ + {"zgb", HB_TAG('Z','H','A',' ')}, /* Guibei Zhuang -> Zhuang */ +/*{"zgh", HB_TAG('Z','G','H',' ')},*/ /* Standard Moroccan Tamazight */ + {"zgm", HB_TAG('Z','H','A',' ')}, /* Minz Zhuang -> Zhuang */ + {"zgn", HB_TAG('Z','H','A',' ')}, /* Guibian Zhuang -> Zhuang */ + {"zh", HB_TAG('Z','H','S',' ')}, /* Chinese [macrolanguage] -> Chinese Simplified */ + {"zhd", HB_TAG('Z','H','A',' ')}, /* Dai Zhuang -> Zhuang */ + {"zhn", HB_TAG('Z','H','A',' ')}, /* Nong Zhuang -> Zhuang */ + {"zlj", HB_TAG('Z','H','A',' ')}, /* Liujiang Zhuang -> Zhuang */ + {"zlm", HB_TAG('M','L','Y',' ')}, /* Malay */ + {"zln", HB_TAG('Z','H','A',' ')}, /* Lianshan Zhuang -> Zhuang */ + {"zlq", HB_TAG('Z','H','A',' ')}, /* Liuqian Zhuang -> Zhuang */ + {"zmi", HB_TAG('M','L','Y',' ')}, /* Negeri Sembilan Malay -> Malay */ + {"zne", HB_TAG('Z','N','D',' ')}, /* Zande */ + {"zom", HB_TAG('Q','I','N',' ')}, /* Zou -> Chin */ + {"zqe", HB_TAG('Z','H','A',' ')}, /* Qiubei Zhuang -> Zhuang */ + {"zsm", HB_TAG('M','L','Y',' ')}, /* Standard Malay -> Malay */ + {"zu", HB_TAG('Z','U','L',' ')}, /* Zulu */ + {"zum", HB_TAG('L','R','C',' ')}, /* Kumzari -> Luri */ + {"zyb", HB_TAG('Z','H','A',' ')}, /* Yongbei Zhuang -> Zhuang */ + {"zyg", HB_TAG('Z','H','A',' ')}, /* Yang Zhuang -> Zhuang */ + {"zyj", HB_TAG('Z','H','A',' ')}, /* Youjiang Zhuang -> Zhuang */ + {"zyn", HB_TAG('Z','H','A',' ')}, /* Yongnan Zhuang -> Zhuang */ +/*{"zza", HB_TAG('Z','Z','A',' ')},*/ /* Zazaki [macrolanguage] */ + {"zzj", HB_TAG('Z','H','A',' ')}, /* Zuojiang Zhuang -> Zhuang */ +}; + +/** + * hb_ot_tags_from_complex_language: + * @lang_str: a BCP 47 language tag to convert. + * @limit: a pointer to the end of the substring of @lang_str to consider for + * conversion. + * @count: maximum number of language tags to retrieve (IN) and actual number of + * language tags retrieved (OUT). If no tags are retrieved, it is not modified. + * @tags: array of size at least @language_count to store the language tag + * results + * + * Converts a multi-subtag BCP 47 language tag to language tags. + * + * Return value: Whether any language systems were retrieved. + **/ +static bool +hb_ot_tags_from_complex_language (const char *lang_str, + const char *limit, + unsigned int *count /* IN/OUT */, + hb_tag_t *tags /* OUT */) +{ + if (subtag_matches (lang_str, limit, "-fonnapa")) + { + /* Undetermined; North American Phonetic Alphabet */ + tags[0] = HB_TAG('A','P','P','H'); /* Phonetic transcription—Americanist conventions */ + *count = 1; + return true; + } + if (subtag_matches (lang_str, limit, "-polyton")) + { + /* Modern Greek (1453-); Polytonic Greek */ + tags[0] = HB_TAG('P','G','R',' '); /* Polytonic Greek */ + *count = 1; + return true; + } + if (subtag_matches (lang_str, limit, "-provenc")) + { + /* Occitan (post 1500); Provençal */ + tags[0] = HB_TAG('P','R','O',' '); /* Provençal / Old Provençal */ + *count = 1; + return true; + } + if (subtag_matches (lang_str, limit, "-fonipa")) + { + /* Undetermined; International Phonetic Alphabet */ + tags[0] = HB_TAG('I','P','P','H'); /* Phonetic transcription—IPA conventions */ + *count = 1; + return true; + } + if (subtag_matches (lang_str, limit, "-geok")) + { + /* Undetermined; Khutsuri (Asomtavruli and Nuskhuri) */ + tags[0] = HB_TAG('K','G','E',' '); /* Khutsuri Georgian */ + *count = 1; + return true; + } + if (subtag_matches (lang_str, limit, "-syre")) + { + /* Undetermined; Syriac (Estrangelo variant) */ + tags[0] = HB_TAG('S','Y','R','E'); /* Syriac, Estrangela script-variant (equivalent to ISO 15924 'Syre') */ + *count = 1; + return true; + } + if (subtag_matches (lang_str, limit, "-syrj")) + { + /* Undetermined; Syriac (Western variant) */ + tags[0] = HB_TAG('S','Y','R','J'); /* Syriac, Western script-variant (equivalent to ISO 15924 'Syrj') */ + *count = 1; + return true; + } + if (subtag_matches (lang_str, limit, "-syrn")) + { + /* Undetermined; Syriac (Eastern variant) */ + tags[0] = HB_TAG('S','Y','R','N'); /* Syriac, Eastern script-variant (equivalent to ISO 15924 'Syrn') */ + *count = 1; + return true; + } + switch (lang_str[0]) + { + case 'a': + if (0 == strcmp (&lang_str[1], "rt-lojban")) + { + /* Lojban */ + tags[0] = HB_TAG('J','B','O',' '); /* Lojban */ + *count = 1; + return true; + } + break; + case 'c': + if (lang_matches (&lang_str[1], "do-hant-hk")) + { + /* Min Dong Chinese */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (&lang_str[1], "do-hant-mo")) + { + /* Min Dong Chinese */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (&lang_str[1], "jy-hant-hk")) + { + /* Jinyu Chinese */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (&lang_str[1], "jy-hant-mo")) + { + /* Jinyu Chinese */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (&lang_str[1], "mn-hant-hk")) + { + /* Mandarin Chinese */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (&lang_str[1], "mn-hant-mo")) + { + /* Mandarin Chinese */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (&lang_str[1], "np-hant-hk")) + { + /* Northern Ping Chinese */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (&lang_str[1], "np-hant-mo")) + { + /* Northern Ping Chinese */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (&lang_str[1], "px-hant-hk")) + { + /* Pu-Xian Chinese */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (&lang_str[1], "px-hant-mo")) + { + /* Pu-Xian Chinese */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (&lang_str[1], "sp-hant-hk")) + { + /* Southern Ping Chinese */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (&lang_str[1], "sp-hant-mo")) + { + /* Southern Ping Chinese */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (&lang_str[1], "zh-hant-hk")) + { + /* Huizhou Chinese */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (&lang_str[1], "zh-hant-mo")) + { + /* Huizhou Chinese */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (&lang_str[1], "zo-hant-hk")) + { + /* Min Zhong Chinese */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (&lang_str[1], "zo-hant-mo")) + { + /* Min Zhong Chinese */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (&lang_str[1], "do-hans")) + { + /* Min Dong Chinese */ + tags[0] = HB_TAG('Z','H','S',' '); /* Chinese Simplified */ + *count = 1; + return true; + } + if (lang_matches (&lang_str[1], "do-hant")) + { + /* Min Dong Chinese */ + tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */ + *count = 1; + return true; + } + if (lang_matches (&lang_str[1], "jy-hans")) + { + /* Jinyu Chinese */ + tags[0] = HB_TAG('Z','H','S',' '); /* Chinese Simplified */ + *count = 1; + return true; + } + if (lang_matches (&lang_str[1], "jy-hant")) + { + /* Jinyu Chinese */ + tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */ + *count = 1; + return true; + } + if (lang_matches (&lang_str[1], "mn-hans")) + { + /* Mandarin Chinese */ + tags[0] = HB_TAG('Z','H','S',' '); /* Chinese Simplified */ + *count = 1; + return true; + } + if (lang_matches (&lang_str[1], "mn-hant")) + { + /* Mandarin Chinese */ + tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */ + *count = 1; + return true; + } + if (lang_matches (&lang_str[1], "np-hans")) + { + /* Northern Ping Chinese */ + tags[0] = HB_TAG('Z','H','S',' '); /* Chinese Simplified */ + *count = 1; + return true; + } + if (lang_matches (&lang_str[1], "np-hant")) + { + /* Northern Ping Chinese */ + tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */ + *count = 1; + return true; + } + if (lang_matches (&lang_str[1], "px-hans")) + { + /* Pu-Xian Chinese */ + tags[0] = HB_TAG('Z','H','S',' '); /* Chinese Simplified */ + *count = 1; + return true; + } + if (lang_matches (&lang_str[1], "px-hant")) + { + /* Pu-Xian Chinese */ + tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */ + *count = 1; + return true; + } + if (lang_matches (&lang_str[1], "sp-hans")) + { + /* Southern Ping Chinese */ + tags[0] = HB_TAG('Z','H','S',' '); /* Chinese Simplified */ + *count = 1; + return true; + } + if (lang_matches (&lang_str[1], "sp-hant")) + { + /* Southern Ping Chinese */ + tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */ + *count = 1; + return true; + } + if (lang_matches (&lang_str[1], "zh-hans")) + { + /* Huizhou Chinese */ + tags[0] = HB_TAG('Z','H','S',' '); /* Chinese Simplified */ + *count = 1; + return true; + } + if (lang_matches (&lang_str[1], "zh-hant")) + { + /* Huizhou Chinese */ + tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */ + *count = 1; + return true; + } + if (lang_matches (&lang_str[1], "zo-hans")) + { + /* Min Zhong Chinese */ + tags[0] = HB_TAG('Z','H','S',' '); /* Chinese Simplified */ + *count = 1; + return true; + } + if (lang_matches (&lang_str[1], "zo-hant")) + { + /* Min Zhong Chinese */ + tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */ + *count = 1; + return true; + } + if (0 == strncmp (&lang_str[1], "do-", 3) + && subtag_matches (lang_str, limit, "-hk")) + { + /* Min Dong Chinese; Hong Kong */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (0 == strncmp (&lang_str[1], "do-", 3) + && subtag_matches (lang_str, limit, "-mo")) + { + /* Min Dong Chinese; Macao */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (0 == strncmp (&lang_str[1], "do-", 3) + && subtag_matches (lang_str, limit, "-tw")) + { + /* Min Dong Chinese; Taiwan, Province of China */ + tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */ + *count = 1; + return true; + } + if (0 == strncmp (&lang_str[1], "jy-", 3) + && subtag_matches (lang_str, limit, "-hk")) + { + /* Jinyu Chinese; Hong Kong */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (0 == strncmp (&lang_str[1], "jy-", 3) + && subtag_matches (lang_str, limit, "-mo")) + { + /* Jinyu Chinese; Macao */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (0 == strncmp (&lang_str[1], "jy-", 3) + && subtag_matches (lang_str, limit, "-tw")) + { + /* Jinyu Chinese; Taiwan, Province of China */ + tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */ + *count = 1; + return true; + } + if (0 == strncmp (&lang_str[1], "mn-", 3) + && subtag_matches (lang_str, limit, "-hk")) + { + /* Mandarin Chinese; Hong Kong */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (0 == strncmp (&lang_str[1], "mn-", 3) + && subtag_matches (lang_str, limit, "-mo")) + { + /* Mandarin Chinese; Macao */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (0 == strncmp (&lang_str[1], "mn-", 3) + && subtag_matches (lang_str, limit, "-tw")) + { + /* Mandarin Chinese; Taiwan, Province of China */ + tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */ + *count = 1; + return true; + } + if (0 == strncmp (&lang_str[1], "np-", 3) + && subtag_matches (lang_str, limit, "-hk")) + { + /* Northern Ping Chinese; Hong Kong */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (0 == strncmp (&lang_str[1], "np-", 3) + && subtag_matches (lang_str, limit, "-mo")) + { + /* Northern Ping Chinese; Macao */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (0 == strncmp (&lang_str[1], "np-", 3) + && subtag_matches (lang_str, limit, "-tw")) + { + /* Northern Ping Chinese; Taiwan, Province of China */ + tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */ + *count = 1; + return true; + } + if (0 == strncmp (&lang_str[1], "px-", 3) + && subtag_matches (lang_str, limit, "-hk")) + { + /* Pu-Xian Chinese; Hong Kong */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (0 == strncmp (&lang_str[1], "px-", 3) + && subtag_matches (lang_str, limit, "-mo")) + { + /* Pu-Xian Chinese; Macao */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (0 == strncmp (&lang_str[1], "px-", 3) + && subtag_matches (lang_str, limit, "-tw")) + { + /* Pu-Xian Chinese; Taiwan, Province of China */ + tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */ + *count = 1; + return true; + } + if (0 == strncmp (&lang_str[1], "sp-", 3) + && subtag_matches (lang_str, limit, "-hk")) + { + /* Southern Ping Chinese; Hong Kong */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (0 == strncmp (&lang_str[1], "sp-", 3) + && subtag_matches (lang_str, limit, "-mo")) + { + /* Southern Ping Chinese; Macao */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (0 == strncmp (&lang_str[1], "sp-", 3) + && subtag_matches (lang_str, limit, "-tw")) + { + /* Southern Ping Chinese; Taiwan, Province of China */ + tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */ + *count = 1; + return true; + } + if (0 == strncmp (&lang_str[1], "zh-", 3) + && subtag_matches (lang_str, limit, "-hk")) + { + /* Huizhou Chinese; Hong Kong */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (0 == strncmp (&lang_str[1], "zh-", 3) + && subtag_matches (lang_str, limit, "-mo")) + { + /* Huizhou Chinese; Macao */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (0 == strncmp (&lang_str[1], "zh-", 3) + && subtag_matches (lang_str, limit, "-tw")) + { + /* Huizhou Chinese; Taiwan, Province of China */ + tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */ + *count = 1; + return true; + } + if (0 == strncmp (&lang_str[1], "zo-", 3) + && subtag_matches (lang_str, limit, "-hk")) + { + /* Min Zhong Chinese; Hong Kong */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (0 == strncmp (&lang_str[1], "zo-", 3) + && subtag_matches (lang_str, limit, "-mo")) + { + /* Min Zhong Chinese; Macao */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (0 == strncmp (&lang_str[1], "zo-", 3) + && subtag_matches (lang_str, limit, "-tw")) + { + /* Min Zhong Chinese; Taiwan, Province of China */ + tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */ + *count = 1; + return true; + } + break; + case 'g': + if (lang_matches (&lang_str[1], "an-hant-hk")) + { + /* Gan Chinese */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (&lang_str[1], "an-hant-mo")) + { + /* Gan Chinese */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (&lang_str[1], "an-hans")) + { + /* Gan Chinese */ + tags[0] = HB_TAG('Z','H','S',' '); /* Chinese Simplified */ + *count = 1; + return true; + } + if (lang_matches (&lang_str[1], "an-hant")) + { + /* Gan Chinese */ + tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */ + *count = 1; + return true; + } + if (lang_matches (&lang_str[1], "a-latg")) + { + /* Irish */ + tags[0] = HB_TAG('I','R','T',' '); /* Irish Traditional */ + *count = 1; + return true; + } + if (0 == strncmp (&lang_str[1], "an-", 3) + && subtag_matches (lang_str, limit, "-hk")) + { + /* Gan Chinese; Hong Kong */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (0 == strncmp (&lang_str[1], "an-", 3) + && subtag_matches (lang_str, limit, "-mo")) + { + /* Gan Chinese; Macao */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (0 == strncmp (&lang_str[1], "an-", 3) + && subtag_matches (lang_str, limit, "-tw")) + { + /* Gan Chinese; Taiwan, Province of China */ + tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */ + *count = 1; + return true; + } + break; + case 'h': + if (lang_matches (&lang_str[1], "ak-hant-hk")) + { + /* Hakka Chinese */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (&lang_str[1], "ak-hant-mo")) + { + /* Hakka Chinese */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (&lang_str[1], "sn-hant-hk")) + { + /* Xiang Chinese */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (&lang_str[1], "sn-hant-mo")) + { + /* Xiang Chinese */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (&lang_str[1], "ak-hans")) + { + /* Hakka Chinese */ + tags[0] = HB_TAG('Z','H','S',' '); /* Chinese Simplified */ + *count = 1; + return true; + } + if (lang_matches (&lang_str[1], "ak-hant")) + { + /* Hakka Chinese */ + tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */ + *count = 1; + return true; + } + if (lang_matches (&lang_str[1], "sn-hans")) + { + /* Xiang Chinese */ + tags[0] = HB_TAG('Z','H','S',' '); /* Chinese Simplified */ + *count = 1; + return true; + } + if (lang_matches (&lang_str[1], "sn-hant")) + { + /* Xiang Chinese */ + tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */ + *count = 1; + return true; + } + if (0 == strncmp (&lang_str[1], "ak-", 3) + && subtag_matches (lang_str, limit, "-hk")) + { + /* Hakka Chinese; Hong Kong */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (0 == strncmp (&lang_str[1], "ak-", 3) + && subtag_matches (lang_str, limit, "-mo")) + { + /* Hakka Chinese; Macao */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (0 == strncmp (&lang_str[1], "ak-", 3) + && subtag_matches (lang_str, limit, "-tw")) + { + /* Hakka Chinese; Taiwan, Province of China */ + tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */ + *count = 1; + return true; + } + if (0 == strncmp (&lang_str[1], "sn-", 3) + && subtag_matches (lang_str, limit, "-hk")) + { + /* Xiang Chinese; Hong Kong */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (0 == strncmp (&lang_str[1], "sn-", 3) + && subtag_matches (lang_str, limit, "-mo")) + { + /* Xiang Chinese; Macao */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (0 == strncmp (&lang_str[1], "sn-", 3) + && subtag_matches (lang_str, limit, "-tw")) + { + /* Xiang Chinese; Taiwan, Province of China */ + tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */ + *count = 1; + return true; + } + break; + case 'i': + if (0 == strcmp (&lang_str[1], "-navajo")) + { + /* Navajo */ + unsigned int i; + hb_tag_t possible_tags[] = { + HB_TAG('N','A','V',' '), /* Navajo */ + HB_TAG('A','T','H',' '), /* Athapaskan */ + }; + for (i = 0; i < 2 && i < *count; i++) + tags[i] = possible_tags[i]; + *count = i; + return true; + } + if (0 == strcmp (&lang_str[1], "-hak")) + { + /* Hakka */ + tags[0] = HB_TAG('Z','H','S',' '); /* Chinese Simplified */ + *count = 1; + return true; + } + if (0 == strcmp (&lang_str[1], "-lux")) + { + /* Luxembourgish */ + tags[0] = HB_TAG('L','T','Z',' '); /* Luxembourgish */ + *count = 1; + return true; + } + break; + case 'l': + if (lang_matches (&lang_str[1], "zh-hans")) + { + /* Literary Chinese */ + tags[0] = HB_TAG('Z','H','S',' '); /* Chinese Simplified */ + *count = 1; + return true; + } + break; + case 'm': + if (lang_matches (&lang_str[1], "np-hant-hk")) + { + /* Min Bei Chinese */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (&lang_str[1], "np-hant-mo")) + { + /* Min Bei Chinese */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (&lang_str[1], "np-hans")) + { + /* Min Bei Chinese */ + tags[0] = HB_TAG('Z','H','S',' '); /* Chinese Simplified */ + *count = 1; + return true; + } + if (lang_matches (&lang_str[1], "np-hant")) + { + /* Min Bei Chinese */ + tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */ + *count = 1; + return true; + } + if (0 == strncmp (&lang_str[1], "np-", 3) + && subtag_matches (lang_str, limit, "-hk")) + { + /* Min Bei Chinese; Hong Kong */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (0 == strncmp (&lang_str[1], "np-", 3) + && subtag_matches (lang_str, limit, "-mo")) + { + /* Min Bei Chinese; Macao */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (0 == strncmp (&lang_str[1], "np-", 3) + && subtag_matches (lang_str, limit, "-tw")) + { + /* Min Bei Chinese; Taiwan, Province of China */ + tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */ + *count = 1; + return true; + } + break; + case 'n': + if (lang_matches (&lang_str[1], "an-hant-hk")) + { + /* Min Nan Chinese */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (&lang_str[1], "an-hant-mo")) + { + /* Min Nan Chinese */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (&lang_str[1], "an-hans")) + { + /* Min Nan Chinese */ + tags[0] = HB_TAG('Z','H','S',' '); /* Chinese Simplified */ + *count = 1; + return true; + } + if (lang_matches (&lang_str[1], "an-hant")) + { + /* Min Nan Chinese */ + tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */ + *count = 1; + return true; + } + if (0 == strncmp (&lang_str[1], "an-", 3) + && subtag_matches (lang_str, limit, "-hk")) + { + /* Min Nan Chinese; Hong Kong */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (0 == strncmp (&lang_str[1], "an-", 3) + && subtag_matches (lang_str, limit, "-mo")) + { + /* Min Nan Chinese; Macao */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (0 == strncmp (&lang_str[1], "an-", 3) + && subtag_matches (lang_str, limit, "-tw")) + { + /* Min Nan Chinese; Taiwan, Province of China */ + tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */ + *count = 1; + return true; + } + if (0 == strcmp (&lang_str[1], "o-bok")) + { + /* Norwegian Bokmal */ + tags[0] = HB_TAG('N','O','R',' '); /* Norwegian */ + *count = 1; + return true; + } + if (0 == strcmp (&lang_str[1], "o-nyn")) + { + /* Norwegian Nynorsk */ + tags[0] = HB_TAG('N','Y','N',' '); /* Norwegian Nynorsk (Nynorsk, Norwegian) */ + *count = 1; + return true; + } + break; + case 'r': + if (0 == strncmp (&lang_str[1], "o-", 2) + && subtag_matches (lang_str, limit, "-md")) + { + /* Romanian; Moldova */ + tags[0] = HB_TAG('M','O','L',' '); /* Moldavian */ + *count = 1; + return true; + } + break; + case 'w': + if (lang_matches (&lang_str[1], "uu-hant-hk")) + { + /* Wu Chinese */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (&lang_str[1], "uu-hant-mo")) + { + /* Wu Chinese */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (&lang_str[1], "uu-hans")) + { + /* Wu Chinese */ + tags[0] = HB_TAG('Z','H','S',' '); /* Chinese Simplified */ + *count = 1; + return true; + } + if (lang_matches (&lang_str[1], "uu-hant")) + { + /* Wu Chinese */ + tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */ + *count = 1; + return true; + } + if (0 == strncmp (&lang_str[1], "uu-", 3) + && subtag_matches (lang_str, limit, "-hk")) + { + /* Wu Chinese; Hong Kong */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (0 == strncmp (&lang_str[1], "uu-", 3) + && subtag_matches (lang_str, limit, "-mo")) + { + /* Wu Chinese; Macao */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (0 == strncmp (&lang_str[1], "uu-", 3) + && subtag_matches (lang_str, limit, "-tw")) + { + /* Wu Chinese; Taiwan, Province of China */ + tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */ + *count = 1; + return true; + } + break; + case 'y': + if (lang_matches (&lang_str[1], "ue-hans")) + { + /* Yue Chinese */ + tags[0] = HB_TAG('Z','H','S',' '); /* Chinese Simplified */ + *count = 1; + return true; + } + break; + case 'z': + if (lang_matches (&lang_str[1], "h-hant-hk")) + { + /* Chinese */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (lang_matches (&lang_str[1], "h-hant-mo")) + { + /* Chinese */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (0 == strcmp (&lang_str[1], "h-min-nan")) + { + /* Minnan, Hokkien, Amoy, Taiwanese, Southern Min, Southern Fujian, Hoklo, Southern Fukien, Ho-lo */ + tags[0] = HB_TAG('Z','H','S',' '); /* Chinese Simplified */ + *count = 1; + return true; + } + if (lang_matches (&lang_str[1], "h-hans")) + { + /* Chinese */ + tags[0] = HB_TAG('Z','H','S',' '); /* Chinese Simplified */ + *count = 1; + return true; + } + if (lang_matches (&lang_str[1], "h-hant")) + { + /* Chinese */ + tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */ + *count = 1; + return true; + } + if (0 == strcmp (&lang_str[1], "h-min")) + { + /* Min, Fuzhou, Hokkien, Amoy, or Taiwanese */ + tags[0] = HB_TAG('Z','H','S',' '); /* Chinese Simplified */ + *count = 1; + return true; + } + if (0 == strncmp (&lang_str[1], "h-", 2) + && subtag_matches (lang_str, limit, "-hk")) + { + /* Chinese; Hong Kong */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (0 == strncmp (&lang_str[1], "h-", 2) + && subtag_matches (lang_str, limit, "-mo")) + { + /* Chinese; Macao */ + tags[0] = HB_TAG('Z','H','H',' '); /* Chinese, Hong Kong SAR */ + *count = 1; + return true; + } + if (0 == strncmp (&lang_str[1], "h-", 2) + && subtag_matches (lang_str, limit, "-tw")) + { + /* Chinese; Taiwan, Province of China */ + tags[0] = HB_TAG('Z','H','T',' '); /* Chinese Traditional */ + *count = 1; + return true; + } + break; + } + return false; +} + +/** + * hb_ot_ambiguous_tag_to_language + * @tag: A language tag. + * + * Converts @tag to a BCP 47 language tag if it is ambiguous (it corresponds to + * many language tags) and the best tag is not the alphabetically first, or if + * the best tag consists of multiple subtags, or if the best tag does not appear + * in #ot_languages. + * + * Return value: The #hb_language_t corresponding to the BCP 47 language tag, + * or #HB_LANGUAGE_INVALID if @tag is not ambiguous. + **/ +static hb_language_t +hb_ot_ambiguous_tag_to_language (hb_tag_t tag) +{ + switch (tag) + { + case HB_TAG('A','L','T',' '): /* Altai */ + return hb_language_from_string ("alt", -1); /* Southern Altai */ + case HB_TAG('A','P','P','H'): /* Phonetic transcription—Americanist conventions */ + return hb_language_from_string ("und-fonnapa", -1); /* Undetermined; North American Phonetic Alphabet */ + case HB_TAG('A','R','A',' '): /* Arabic */ + return hb_language_from_string ("ar", -1); /* Arabic */ + case HB_TAG('A','R','K',' '): /* Rakhine */ + return hb_language_from_string ("rki", -1); /* Rakhine */ + case HB_TAG('A','T','H',' '): /* Athapaskan */ + return hb_language_from_string ("ath", -1); /* Athapascan */ + case HB_TAG('B','I','K',' '): /* Bikol */ + return hb_language_from_string ("bik", -1); /* Bikol */ + case HB_TAG('C','P','P',' '): /* Creoles */ + return hb_language_from_string ("crp", -1); /* Creoles and pidgins */ + case HB_TAG('C','R','R',' '): /* Carrier */ + return hb_language_from_string ("crx", -1); /* Carrier */ + case HB_TAG('D','N','K',' '): /* Dinka */ + return hb_language_from_string ("din", -1); /* Dinka */ + case HB_TAG('D','R','I',' '): /* Dari */ + return hb_language_from_string ("prs", -1); /* Dari */ + case HB_TAG('D','Z','N',' '): /* Dzongkha */ + return hb_language_from_string ("dz", -1); /* Dzongkha */ + case HB_TAG('E','T','I',' '): /* Estonian */ + return hb_language_from_string ("et", -1); /* Estonian */ + case HB_TAG('G','O','N',' '): /* Gondi */ + return hb_language_from_string ("gon", -1); /* Gondi */ + case HB_TAG('H','M','N',' '): /* Hmong */ + return hb_language_from_string ("hmn", -1); /* Hmong */ + case HB_TAG('H','N','D',' '): /* Hindko */ + return hb_language_from_string ("hnd", -1); /* Southern Hindko */ + case HB_TAG('I','J','O',' '): /* Ijo */ + return hb_language_from_string ("ijo", -1); /* Ijo */ + case HB_TAG('I','N','U',' '): /* Inuktitut */ + return hb_language_from_string ("iu", -1); /* Inuktitut */ + case HB_TAG('I','P','K',' '): /* Inupiat */ + return hb_language_from_string ("ik", -1); /* Inupiaq */ + case HB_TAG('I','P','P','H'): /* Phonetic transcription—IPA conventions */ + return hb_language_from_string ("und-fonipa", -1); /* Undetermined; International Phonetic Alphabet */ + case HB_TAG('I','R','T',' '): /* Irish Traditional */ + return hb_language_from_string ("ga-Latg", -1); /* Irish; Latin (Gaelic variant) */ + case HB_TAG('J','I','I',' '): /* Yiddish */ + return hb_language_from_string ("yi", -1); /* Yiddish */ + case HB_TAG('K','A','L',' '): /* Kalenjin */ + return hb_language_from_string ("kln", -1); /* Kalenjin */ + case HB_TAG('K','G','E',' '): /* Khutsuri Georgian */ + return hb_language_from_string ("und-Geok", -1); /* Undetermined; Khutsuri (Asomtavruli and Nuskhuri) */ + case HB_TAG('K','N','R',' '): /* Kanuri */ + return hb_language_from_string ("kr", -1); /* Kanuri */ + case HB_TAG('K','O','K',' '): /* Konkani */ + return hb_language_from_string ("kok", -1); /* Konkani */ + case HB_TAG('K','U','I',' '): /* Kui */ + return hb_language_from_string ("uki", -1); /* Kui (India) */ + case HB_TAG('K','U','R',' '): /* Kurdish */ + return hb_language_from_string ("ku", -1); /* Kurdish */ + case HB_TAG('L','U','H',' '): /* Luyia */ + return hb_language_from_string ("luy", -1); /* Luyia */ + case HB_TAG('L','V','I',' '): /* Latvian */ + return hb_language_from_string ("lv", -1); /* Latvian */ + case HB_TAG('M','A','W',' '): /* Marwari */ + return hb_language_from_string ("mwr", -1); /* Marwari */ + case HB_TAG('M','L','G',' '): /* Malagasy */ + return hb_language_from_string ("mg", -1); /* Malagasy */ + case HB_TAG('M','L','Y',' '): /* Malay */ + return hb_language_from_string ("ms", -1); /* Malay */ + case HB_TAG('M','N','G',' '): /* Mongolian */ + return hb_language_from_string ("mn", -1); /* Mongolian */ + case HB_TAG('M','O','L',' '): /* Moldavian */ + return hb_language_from_string ("ro-MD", -1); /* Romanian; Moldova */ + case HB_TAG('N','E','P',' '): /* Nepali */ + return hb_language_from_string ("ne", -1); /* Nepali */ + case HB_TAG('N','I','S',' '): /* Nisi */ + return hb_language_from_string ("njz", -1); /* Nyishi */ + case HB_TAG('N','O','R',' '): /* Norwegian */ + return hb_language_from_string ("no", -1); /* Norwegian */ + case HB_TAG('O','J','B',' '): /* Ojibway */ + return hb_language_from_string ("oj", -1); /* Ojibwa */ + case HB_TAG('O','R','O',' '): /* Oromo */ + return hb_language_from_string ("om", -1); /* Oromo */ + case HB_TAG('P','A','S',' '): /* Pashto */ + return hb_language_from_string ("ps", -1); /* Pashto */ + case HB_TAG('P','G','R',' '): /* Polytonic Greek */ + return hb_language_from_string ("el-polyton", -1); /* Modern Greek (1453-); Polytonic Greek */ + case HB_TAG('P','R','O',' '): /* Provençal / Old Provençal */ + return hb_language_from_string ("pro", -1); /* Old Provençal (to 1500) */ + case HB_TAG('Q','U','H',' '): /* Quechua (Bolivia) */ + return hb_language_from_string ("quh", -1); /* South Bolivian Quechua */ + case HB_TAG('Q','V','I',' '): /* Quechua (Ecuador) */ + return hb_language_from_string ("qvi", -1); /* Imbabura Highland Quichua */ + case HB_TAG('Q','W','H',' '): /* Quechua (Peru) */ + return hb_language_from_string ("qwh", -1); /* Huaylas Ancash Quechua */ + case HB_TAG('R','A','J',' '): /* Rajasthani */ + return hb_language_from_string ("raj", -1); /* Rajasthani */ + case HB_TAG('R','O','Y',' '): /* Romany */ + return hb_language_from_string ("rom", -1); /* Romany */ + case HB_TAG('S','Q','I',' '): /* Albanian */ + return hb_language_from_string ("sq", -1); /* Albanian */ + case HB_TAG('S','Y','R',' '): /* Syriac */ + return hb_language_from_string ("syr", -1); /* Syriac */ + case HB_TAG('S','Y','R','E'): /* Syriac, Estrangela script-variant (equivalent to ISO 15924 'Syre') */ + return hb_language_from_string ("und-Syre", -1); /* Undetermined; Syriac (Estrangelo variant) */ + case HB_TAG('S','Y','R','J'): /* Syriac, Western script-variant (equivalent to ISO 15924 'Syrj') */ + return hb_language_from_string ("und-Syrj", -1); /* Undetermined; Syriac (Western variant) */ + case HB_TAG('S','Y','R','N'): /* Syriac, Eastern script-variant (equivalent to ISO 15924 'Syrn') */ + return hb_language_from_string ("und-Syrn", -1); /* Undetermined; Syriac (Eastern variant) */ + case HB_TAG('T','M','H',' '): /* Tamashek */ + return hb_language_from_string ("tmh", -1); /* Tamashek */ + case HB_TAG('T','N','E',' '): /* Tundra Nenets */ + return hb_language_from_string ("yrk", -1); /* Nenets */ + case HB_TAG('Z','H','H',' '): /* Chinese, Hong Kong SAR */ + return hb_language_from_string ("zh-HK", -1); /* Chinese; Hong Kong */ + case HB_TAG('Z','H','S',' '): /* Chinese Simplified */ + return hb_language_from_string ("zh-Hans", -1); /* Chinese; Han (Simplified variant) */ + case HB_TAG('Z','H','T',' '): /* Chinese Traditional */ + return hb_language_from_string ("zh-Hant", -1); /* Chinese; Han (Traditional variant) */ + default: + return HB_LANGUAGE_INVALID; + } +} + +#endif /* HB_OT_TAG_TABLE_HH */ + +/* == End of generated table == */ diff --git a/thirdparty/harfbuzz/src/hb-ot-tag.cc b/thirdparty/harfbuzz/src/hb-ot-tag.cc new file mode 100644 index 0000000000..7ec91c5815 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-tag.cc @@ -0,0 +1,567 @@ +/* + * Copyright © 2009 Red Hat, Inc. + * Copyright © 2011 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Red Hat Author(s): Behdad Esfahbod + * Google Author(s): Behdad Esfahbod, Roozbeh Pournader + */ + +#include "hb.hh" + +#ifndef HB_NO_OT_TAG + + +/* hb_script_t */ + +static hb_tag_t +hb_ot_old_tag_from_script (hb_script_t script) +{ + /* This seems to be accurate as of end of 2012. */ + + switch ((hb_tag_t) script) + { + case HB_SCRIPT_INVALID: return HB_OT_TAG_DEFAULT_SCRIPT; + + /* KATAKANA and HIRAGANA both map to 'kana' */ + case HB_SCRIPT_HIRAGANA: return HB_TAG('k','a','n','a'); + + /* Spaces at the end are preserved, unlike ISO 15924 */ + case HB_SCRIPT_LAO: return HB_TAG('l','a','o',' '); + case HB_SCRIPT_YI: return HB_TAG('y','i',' ',' '); + /* Unicode-5.0 additions */ + case HB_SCRIPT_NKO: return HB_TAG('n','k','o',' '); + /* Unicode-5.1 additions */ + case HB_SCRIPT_VAI: return HB_TAG('v','a','i',' '); + } + + /* Else, just change first char to lowercase and return */ + return ((hb_tag_t) script) | 0x20000000u; +} + +static hb_script_t +hb_ot_old_tag_to_script (hb_tag_t tag) +{ + if (unlikely (tag == HB_OT_TAG_DEFAULT_SCRIPT)) + return HB_SCRIPT_INVALID; + + /* This side of the conversion is fully algorithmic. */ + + /* Any spaces at the end of the tag are replaced by repeating the last + * letter. Eg 'nko ' -> 'Nkoo' */ + if (unlikely ((tag & 0x0000FF00u) == 0x00002000u)) + tag |= (tag >> 8) & 0x0000FF00u; /* Copy second letter to third */ + if (unlikely ((tag & 0x000000FFu) == 0x00000020u)) + tag |= (tag >> 8) & 0x000000FFu; /* Copy third letter to fourth */ + + /* Change first char to uppercase and return */ + return (hb_script_t) (tag & ~0x20000000u); +} + +static hb_tag_t +hb_ot_new_tag_from_script (hb_script_t script) +{ + switch ((hb_tag_t) script) { + case HB_SCRIPT_BENGALI: return HB_TAG('b','n','g','2'); + case HB_SCRIPT_DEVANAGARI: return HB_TAG('d','e','v','2'); + case HB_SCRIPT_GUJARATI: return HB_TAG('g','j','r','2'); + case HB_SCRIPT_GURMUKHI: return HB_TAG('g','u','r','2'); + case HB_SCRIPT_KANNADA: return HB_TAG('k','n','d','2'); + case HB_SCRIPT_MALAYALAM: return HB_TAG('m','l','m','2'); + case HB_SCRIPT_ORIYA: return HB_TAG('o','r','y','2'); + case HB_SCRIPT_TAMIL: return HB_TAG('t','m','l','2'); + case HB_SCRIPT_TELUGU: return HB_TAG('t','e','l','2'); + case HB_SCRIPT_MYANMAR: return HB_TAG('m','y','m','2'); + } + + return HB_OT_TAG_DEFAULT_SCRIPT; +} + +static hb_script_t +hb_ot_new_tag_to_script (hb_tag_t tag) +{ + switch (tag) { + case HB_TAG('b','n','g','2'): return HB_SCRIPT_BENGALI; + case HB_TAG('d','e','v','2'): return HB_SCRIPT_DEVANAGARI; + case HB_TAG('g','j','r','2'): return HB_SCRIPT_GUJARATI; + case HB_TAG('g','u','r','2'): return HB_SCRIPT_GURMUKHI; + case HB_TAG('k','n','d','2'): return HB_SCRIPT_KANNADA; + case HB_TAG('m','l','m','2'): return HB_SCRIPT_MALAYALAM; + case HB_TAG('o','r','y','2'): return HB_SCRIPT_ORIYA; + case HB_TAG('t','m','l','2'): return HB_SCRIPT_TAMIL; + case HB_TAG('t','e','l','2'): return HB_SCRIPT_TELUGU; + case HB_TAG('m','y','m','2'): return HB_SCRIPT_MYANMAR; + } + + return HB_SCRIPT_UNKNOWN; +} + +#ifndef HB_DISABLE_DEPRECATED +void +hb_ot_tags_from_script (hb_script_t script, + hb_tag_t *script_tag_1, + hb_tag_t *script_tag_2) +{ + unsigned int count = 2; + hb_tag_t tags[2]; + hb_ot_tags_from_script_and_language (script, HB_LANGUAGE_INVALID, &count, tags, nullptr, nullptr); + *script_tag_1 = count > 0 ? tags[0] : HB_OT_TAG_DEFAULT_SCRIPT; + *script_tag_2 = count > 1 ? tags[1] : HB_OT_TAG_DEFAULT_SCRIPT; +} +#endif + +/* + * Complete list at: + * https://docs.microsoft.com/en-us/typography/opentype/spec/scripttags + * + * Most of the script tags are the same as the ISO 15924 tag but lowercased. + * So we just do that, and handle the exceptional cases in a switch. + */ + +static void +hb_ot_all_tags_from_script (hb_script_t script, + unsigned int *count /* IN/OUT */, + hb_tag_t *tags /* OUT */) +{ + unsigned int i = 0; + + hb_tag_t new_tag = hb_ot_new_tag_from_script (script); + if (unlikely (new_tag != HB_OT_TAG_DEFAULT_SCRIPT)) + { + /* HB_SCRIPT_MYANMAR maps to 'mym2', but there is no 'mym3'. */ + if (new_tag != HB_TAG('m','y','m','2')) + tags[i++] = new_tag | '3'; + if (*count > i) + tags[i++] = new_tag; + } + + if (*count > i) + { + hb_tag_t old_tag = hb_ot_old_tag_from_script (script); + if (old_tag != HB_OT_TAG_DEFAULT_SCRIPT) + tags[i++] = old_tag; + } + + *count = i; +} + +hb_script_t +hb_ot_tag_to_script (hb_tag_t tag) +{ + unsigned char digit = tag & 0x000000FFu; + if (unlikely (digit == '2' || digit == '3')) + return hb_ot_new_tag_to_script (tag & 0xFFFFFF32); + + return hb_ot_old_tag_to_script (tag); +} + + +/* hb_language_t */ + +static bool +subtag_matches (const char *lang_str, + const char *limit, + const char *subtag) +{ + do { + const char *s = strstr (lang_str, subtag); + if (!s || s >= limit) + return false; + if (!ISALNUM (s[strlen (subtag)])) + return true; + lang_str = s + strlen (subtag); + } while (true); +} + +static hb_bool_t +lang_matches (const char *lang_str, const char *spec) +{ + unsigned int len = strlen (spec); + + return strncmp (lang_str, spec, len) == 0 && + (lang_str[len] == '\0' || lang_str[len] == '-'); +} + +struct LangTag +{ + char language[4]; + hb_tag_t tag; + + int cmp (const char *a) const + { + const char *b = this->language; + unsigned int da, db; + const char *p; + + p = strchr (a, '-'); + da = p ? (unsigned int) (p - a) : strlen (a); + + p = strchr (b, '-'); + db = p ? (unsigned int) (p - b) : strlen (b); + + return strncmp (a, b, hb_max (da, db)); + } + int cmp (const LangTag *that) const + { return cmp (that->language); } +}; + +#include "hb-ot-tag-table.hh" + +/* The corresponding languages IDs for the following IDs are unclear, + * overlap, or are architecturally weird. Needs more research. */ + +/*{"??", {HB_TAG('B','C','R',' ')}},*/ /* Bible Cree */ +/*{"zh?", {HB_TAG('C','H','N',' ')}},*/ /* Chinese (seen in Microsoft fonts) */ +/*{"ar-Syrc?", {HB_TAG('G','A','R',' ')}},*/ /* Garshuni */ +/*{"??", {HB_TAG('N','G','R',' ')}},*/ /* Nagari */ +/*{"??", {HB_TAG('Y','I','C',' ')}},*/ /* Yi Classic */ +/*{"zh?", {HB_TAG('Z','H','P',' ')}},*/ /* Chinese Phonetic */ + +#ifndef HB_DISABLE_DEPRECATED +hb_tag_t +hb_ot_tag_from_language (hb_language_t language) +{ + unsigned int count = 1; + hb_tag_t tags[1]; + hb_ot_tags_from_script_and_language (HB_SCRIPT_UNKNOWN, language, nullptr, nullptr, &count, tags); + return count > 0 ? tags[0] : HB_OT_TAG_DEFAULT_LANGUAGE; +} +#endif + +static void +hb_ot_tags_from_language (const char *lang_str, + const char *limit, + unsigned int *count, + hb_tag_t *tags) +{ + const char *s; + unsigned int tag_idx; + + /* Check for matches of multiple subtags. */ + if (hb_ot_tags_from_complex_language (lang_str, limit, count, tags)) + return; + + /* Find a language matching in the first component. */ + s = strchr (lang_str, '-'); + { + if (s && limit - lang_str >= 6) + { + const char *extlang_end = strchr (s + 1, '-'); + /* If there is an extended language tag, use it. */ + if (3 == (extlang_end ? extlang_end - s - 1 : strlen (s + 1)) && + ISALPHA (s[1])) + lang_str = s + 1; + } + if (hb_sorted_array (ot_languages).bfind (lang_str, &tag_idx)) + { + unsigned int i; + while (tag_idx != 0 && + 0 == strcmp (ot_languages[tag_idx].language, ot_languages[tag_idx - 1].language)) + tag_idx--; + for (i = 0; + i < *count && + tag_idx + i < ARRAY_LENGTH (ot_languages) && + 0 == strcmp (ot_languages[tag_idx + i].language, ot_languages[tag_idx].language); + i++) + tags[i] = ot_languages[tag_idx + i].tag; + *count = i; + return; + } + } + + if (!s) + s = lang_str + strlen (lang_str); + if (s - lang_str == 3) { + /* Assume it's ISO-639-3 and upper-case and use it. */ + tags[0] = hb_tag_from_string (lang_str, s - lang_str) & ~0x20202000u; + *count = 1; + return; + } + + *count = 0; +} + +static bool +parse_private_use_subtag (const char *private_use_subtag, + unsigned int *count, + hb_tag_t *tags, + const char *prefix, + unsigned char (*normalize) (unsigned char)) +{ +#ifdef HB_NO_LANGUAGE_PRIVATE_SUBTAG + return false; +#endif + + if (!(private_use_subtag && count && tags && *count)) return false; + + const char *s = strstr (private_use_subtag, prefix); + if (!s) return false; + + char tag[4]; + int i; + s += strlen (prefix); + if (s[0] == '-') { + s += 1; + char c; + for (i = 0; i < 8 && ISHEX (s[i]); i++) + { + c = FROMHEX (s[i]); + if (i % 2 == 0) + tag[i / 2] = c << 4; + else + tag[i / 2] += c; + } + if (i != 8) return false; + } else { + for (i = 0; i < 4 && ISALNUM (s[i]); i++) + tag[i] = normalize (s[i]); + if (!i) return false; + + for (; i < 4; i++) + tag[i] = ' '; + } + tags[0] = HB_TAG (tag[0], tag[1], tag[2], tag[3]); + if ((tags[0] & 0xDFDFDFDF) == HB_OT_TAG_DEFAULT_SCRIPT) + tags[0] ^= ~0xDFDFDFDF; + *count = 1; + return true; +} + +/** + * hb_ot_tags_from_script_and_language: + * @script: an #hb_script_t to convert. + * @language: an #hb_language_t to convert. + * @script_count: (allow-none): maximum number of script tags to retrieve (IN) + * and actual number of script tags retrieved (OUT) + * @script_tags: (out) (allow-none): array of size at least @script_count to store the + * script tag results + * @language_count: (allow-none): maximum number of language tags to retrieve + * (IN) and actual number of language tags retrieved (OUT) + * @language_tags: (out) (allow-none): array of size at least @language_count to store + * the language tag results + * + * Converts an #hb_script_t and an #hb_language_t to script and language tags. + * + * Since: 2.0.0 + **/ +void +hb_ot_tags_from_script_and_language (hb_script_t script, + hb_language_t language, + unsigned int *script_count /* IN/OUT */, + hb_tag_t *script_tags /* OUT */, + unsigned int *language_count /* IN/OUT */, + hb_tag_t *language_tags /* OUT */) +{ + bool needs_script = true; + + if (language == HB_LANGUAGE_INVALID) + { + if (language_count && language_tags && *language_count) + *language_count = 0; + } + else + { + const char *lang_str, *s, *limit, *private_use_subtag; + bool needs_language; + + lang_str = hb_language_to_string (language); + limit = nullptr; + private_use_subtag = nullptr; + if (lang_str[0] == 'x' && lang_str[1] == '-') + { + private_use_subtag = lang_str; + } else { + for (s = lang_str + 1; *s; s++) + { + if (s[-1] == '-' && s[1] == '-') + { + if (s[0] == 'x') + { + private_use_subtag = s; + if (!limit) + limit = s - 1; + break; + } else if (!limit) + { + limit = s - 1; + } + } + } + if (!limit) + limit = s; + } + + needs_script = !parse_private_use_subtag (private_use_subtag, script_count, script_tags, "-hbsc", TOLOWER); + needs_language = !parse_private_use_subtag (private_use_subtag, language_count, language_tags, "-hbot", TOUPPER); + + if (needs_language && language_count && language_tags && *language_count) + hb_ot_tags_from_language (lang_str, limit, language_count, language_tags); + } + + if (needs_script && script_count && script_tags && *script_count) + hb_ot_all_tags_from_script (script, script_count, script_tags); +} + +/** + * hb_ot_tag_to_language: + * + * + * + * Return value: (transfer none): + * + * Since: 0.9.2 + **/ +hb_language_t +hb_ot_tag_to_language (hb_tag_t tag) +{ + unsigned int i; + + if (tag == HB_OT_TAG_DEFAULT_LANGUAGE) + return nullptr; + + { + hb_language_t disambiguated_tag = hb_ot_ambiguous_tag_to_language (tag); + if (disambiguated_tag != HB_LANGUAGE_INVALID) + return disambiguated_tag; + } + + for (i = 0; i < ARRAY_LENGTH (ot_languages); i++) + if (ot_languages[i].tag == tag) + return hb_language_from_string (ot_languages[i].language, -1); + + /* Return a custom language in the form of "x-hbot-AABBCCDD". + * If it's three letters long, also guess it's ISO 639-3 and lower-case and + * prepend it (if it's not a registered tag, the private use subtags will + * ensure that calling hb_ot_tag_from_language on the result will still return + * the same tag as the original tag). + */ + { + char buf[20]; + char *str = buf; + if (ISALPHA (tag >> 24) + && ISALPHA ((tag >> 16) & 0xFF) + && ISALPHA ((tag >> 8) & 0xFF) + && (tag & 0xFF) == ' ') + { + buf[0] = TOLOWER (tag >> 24); + buf[1] = TOLOWER ((tag >> 16) & 0xFF); + buf[2] = TOLOWER ((tag >> 8) & 0xFF); + buf[3] = '-'; + str += 4; + } + snprintf (str, 16, "x-hbot-%08x", tag); + return hb_language_from_string (&*buf, -1); + } +} + +/** + * hb_ot_tags_to_script_and_language: + * @script_tag: a script tag + * @language_tag: a language tag + * @script: (allow-none): the #hb_script_t corresponding to @script_tag (OUT). + * @language: (allow-none): the #hb_language_t corresponding to @script_tag and + * @language_tag (OUT). + * + * Converts a script tag and a language tag to an #hb_script_t and an + * #hb_language_t. + * + * Since: 2.0.0 + **/ +void +hb_ot_tags_to_script_and_language (hb_tag_t script_tag, + hb_tag_t language_tag, + hb_script_t *script /* OUT */, + hb_language_t *language /* OUT */) +{ + hb_script_t script_out = hb_ot_tag_to_script (script_tag); + if (script) + *script = script_out; + if (language) + { + unsigned int script_count = 1; + hb_tag_t primary_script_tag[1]; + hb_ot_tags_from_script_and_language (script_out, + HB_LANGUAGE_INVALID, + &script_count, + primary_script_tag, + nullptr, nullptr); + *language = hb_ot_tag_to_language (language_tag); + if (script_count == 0 || primary_script_tag[0] != script_tag) + { + unsigned char *buf; + const char *lang_str = hb_language_to_string (*language); + size_t len = strlen (lang_str); + buf = (unsigned char *) malloc (len + 16); + if (unlikely (!buf)) + { + *language = nullptr; + } + else + { + int shift; + memcpy (buf, lang_str, len); + if (lang_str[0] != 'x' || lang_str[1] != '-') { + buf[len++] = '-'; + buf[len++] = 'x'; + } + buf[len++] = '-'; + buf[len++] = 'h'; + buf[len++] = 'b'; + buf[len++] = 's'; + buf[len++] = 'c'; + buf[len++] = '-'; + for (shift = 28; shift >= 0; shift -= 4) + buf[len++] = TOHEX (script_tag >> shift); + *language = hb_language_from_string ((char *) buf, len); + free (buf); + } + } + } +} + +#ifdef MAIN +static inline void +test_langs_sorted () +{ + for (unsigned int i = 1; i < ARRAY_LENGTH (ot_languages); i++) + { + int c = ot_languages[i].cmp (&ot_languages[i - 1]); + if (c > 0) + { + fprintf (stderr, "ot_languages not sorted at index %d: %s %d %s\n", + i, ot_languages[i-1].language, c, ot_languages[i].language); + abort(); + } + } +} + +int +main () +{ + test_langs_sorted (); + return 0; +} + +#endif + + +#endif diff --git a/thirdparty/harfbuzz/src/hb-ot-var-avar-table.hh b/thirdparty/harfbuzz/src/hb-ot-var-avar-table.hh new file mode 100644 index 0000000000..29219adb0a --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-var-avar-table.hh @@ -0,0 +1,169 @@ +/* + * Copyright © 2017 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_OT_VAR_AVAR_TABLE_HH +#define HB_OT_VAR_AVAR_TABLE_HH + +#include "hb-open-type.hh" + +/* + * avar -- Axis Variations + * https://docs.microsoft.com/en-us/typography/opentype/spec/avar + */ + +#define HB_OT_TAG_avar HB_TAG('a','v','a','r') + + +namespace OT { + + +struct AxisValueMap +{ + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this)); + } + + public: + F2DOT14 coords[2]; +// F2DOT14 fromCoord; /* A normalized coordinate value obtained using +// * default normalization. */ +// F2DOT14 toCoord; /* The modified, normalized coordinate value. */ + + public: + DEFINE_SIZE_STATIC (4); +}; + +struct SegmentMaps : ArrayOf<AxisValueMap> +{ + int map (int value, unsigned int from_offset = 0, unsigned int to_offset = 1) const + { +#define fromCoord coords[from_offset] +#define toCoord coords[to_offset] + /* The following special-cases are not part of OpenType, which requires + * that at least -1, 0, and +1 must be mapped. But we include these as + * part of a better error recovery scheme. */ + if (len < 2) + { + if (!len) + return value; + else /* len == 1*/ + return value - arrayZ[0].fromCoord + arrayZ[0].toCoord; + } + + if (value <= arrayZ[0].fromCoord) + return value - arrayZ[0].fromCoord + arrayZ[0].toCoord; + + unsigned int i; + unsigned int count = len - 1; + for (i = 1; i < count && value > arrayZ[i].fromCoord; i++) + ; + + if (value >= arrayZ[i].fromCoord) + return value - arrayZ[i].fromCoord + arrayZ[i].toCoord; + + if (unlikely (arrayZ[i-1].fromCoord == arrayZ[i].fromCoord)) + return arrayZ[i-1].toCoord; + + int denom = arrayZ[i].fromCoord - arrayZ[i-1].fromCoord; + return roundf (arrayZ[i-1].toCoord + ((float) (arrayZ[i].toCoord - arrayZ[i-1].toCoord) * + (value - arrayZ[i-1].fromCoord)) / denom); +#undef toCoord +#undef fromCoord + } + + int unmap (int value) const { return map (value, 1, 0); } + + public: + DEFINE_SIZE_ARRAY (2, *this); +}; + +struct avar +{ + static constexpr hb_tag_t tableTag = HB_OT_TAG_avar; + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + if (unlikely (!(version.sanitize (c) && + version.major == 1 && + c->check_struct (this)))) + return_trace (false); + + const SegmentMaps *map = &firstAxisSegmentMaps; + unsigned int count = axisCount; + for (unsigned int i = 0; i < count; i++) + { + if (unlikely (!map->sanitize (c))) + return_trace (false); + map = &StructAfter<SegmentMaps> (*map); + } + + return_trace (true); + } + + void map_coords (int *coords, unsigned int coords_length) const + { + unsigned int count = hb_min (coords_length, axisCount); + + const SegmentMaps *map = &firstAxisSegmentMaps; + for (unsigned int i = 0; i < count; i++) + { + coords[i] = map->map (coords[i]); + map = &StructAfter<SegmentMaps> (*map); + } + } + + void unmap_coords (int *coords, unsigned int coords_length) const + { + unsigned int count = hb_min (coords_length, axisCount); + + const SegmentMaps *map = &firstAxisSegmentMaps; + for (unsigned int i = 0; i < count; i++) + { + coords[i] = map->unmap (coords[i]); + map = &StructAfter<SegmentMaps> (*map); + } + } + + protected: + FixedVersion<>version; /* Version of the avar table + * initially set to 0x00010000u */ + HBUINT16 reserved; /* This field is permanently reserved. Set to 0. */ + HBUINT16 axisCount; /* The number of variation axes in the font. This + * must be the same number as axisCount in the + * 'fvar' table. */ + SegmentMaps firstAxisSegmentMaps; + + public: + DEFINE_SIZE_MIN (8); +}; + +} /* namespace OT */ + + +#endif /* HB_OT_VAR_AVAR_TABLE_HH */ diff --git a/thirdparty/harfbuzz/src/hb-ot-var-fvar-table.hh b/thirdparty/harfbuzz/src/hb-ot-var-fvar-table.hh new file mode 100644 index 0000000000..f9e933fb2b --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-var-fvar-table.hh @@ -0,0 +1,327 @@ +/* + * Copyright © 2017 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_OT_VAR_FVAR_TABLE_HH +#define HB_OT_VAR_FVAR_TABLE_HH + +#include "hb-open-type.hh" + +/* + * fvar -- Font Variations + * https://docs.microsoft.com/en-us/typography/opentype/spec/fvar + */ + +#define HB_OT_TAG_fvar HB_TAG('f','v','a','r') + + +namespace OT { + + +struct InstanceRecord +{ + friend struct fvar; + + hb_array_t<const HBFixed> get_coordinates (unsigned int axis_count) const + { return coordinatesZ.as_array (axis_count); } + + bool sanitize (hb_sanitize_context_t *c, unsigned int axis_count) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && + c->check_array (coordinatesZ.arrayZ, axis_count)); + } + + protected: + NameID subfamilyNameID;/* The name ID for entries in the 'name' table + * that provide subfamily names for this instance. */ + HBUINT16 flags; /* Reserved for future use — set to 0. */ + UnsizedArrayOf<HBFixed> + coordinatesZ; /* The coordinates array for this instance. */ + //NameID postScriptNameIDX;/*Optional. The name ID for entries in the 'name' + // * table that provide PostScript names for this + // * instance. */ + + public: + DEFINE_SIZE_UNBOUNDED (4); +}; + +struct AxisRecord +{ + int cmp (hb_tag_t key) const { return axisTag.cmp (key); } + + enum + { + AXIS_FLAG_HIDDEN = 0x0001, + }; + +#ifndef HB_DISABLE_DEPRECATED + void get_axis_deprecated (hb_ot_var_axis_t *info) const + { + info->tag = axisTag; + info->name_id = axisNameID; + get_coordinates (info->min_value, info->default_value, info->max_value); + } +#endif + + void get_axis_info (unsigned axis_index, hb_ot_var_axis_info_t *info) const + { + info->axis_index = axis_index; + info->tag = axisTag; + info->name_id = axisNameID; + info->flags = (hb_ot_var_axis_flags_t) (unsigned int) flags; + get_coordinates (info->min_value, info->default_value, info->max_value); + info->reserved = 0; + } + + int normalize_axis_value (float v) const + { + float min_value, default_value, max_value; + get_coordinates (min_value, default_value, max_value); + + v = hb_clamp (v, min_value, max_value); + + if (v == default_value) + return 0; + else if (v < default_value) + v = (v - default_value) / (default_value - min_value); + else + v = (v - default_value) / (max_value - default_value); + return roundf (v * 16384.f); + } + + float unnormalize_axis_value (int v) const + { + float min_value, default_value, max_value; + get_coordinates (min_value, default_value, max_value); + + if (v == 0) + return default_value; + else if (v < 0) + return v * (default_value - min_value) / 16384.f + default_value; + else + return v * (max_value - default_value) / 16384.f + default_value; + } + + hb_ot_name_id_t get_name_id () const { return axisNameID; } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this)); + } + + protected: + void get_coordinates (float &min, float &default_, float &max) const + { + default_ = defaultValue / 65536.f; + /* Ensure order, to simplify client math. */ + min = hb_min (default_, minValue / 65536.f); + max = hb_max (default_, maxValue / 65536.f); + } + + protected: + Tag axisTag; /* Tag identifying the design variation for the axis. */ + HBFixed minValue; /* The minimum coordinate value for the axis. */ + HBFixed defaultValue; /* The default coordinate value for the axis. */ + HBFixed maxValue; /* The maximum coordinate value for the axis. */ + HBUINT16 flags; /* Axis flags. */ + NameID axisNameID; /* The name ID for entries in the 'name' table that + * provide a display name for this axis. */ + + public: + DEFINE_SIZE_STATIC (20); +}; + +struct fvar +{ + static constexpr hb_tag_t tableTag = HB_OT_TAG_fvar; + + bool has_data () const { return version.to_int (); } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (version.sanitize (c) && + likely (version.major == 1) && + c->check_struct (this) && + axisSize == 20 && /* Assumed in our code. */ + instanceSize >= axisCount * 4 + 4 && + get_axes ().sanitize (c) && + c->check_range (get_instance (0), instanceCount, instanceSize)); + } + + unsigned int get_axis_count () const { return axisCount; } + +#ifndef HB_DISABLE_DEPRECATED + unsigned int get_axes_deprecated (unsigned int start_offset, + unsigned int *axes_count /* IN/OUT */, + hb_ot_var_axis_t *axes_array /* OUT */) const + { + if (axes_count) + { + hb_array_t<const AxisRecord> arr = get_axes ().sub_array (start_offset, axes_count); + for (unsigned i = 0; i < arr.length; ++i) + arr[i].get_axis_deprecated (&axes_array[i]); + } + return axisCount; + } +#endif + + unsigned int get_axis_infos (unsigned int start_offset, + unsigned int *axes_count /* IN/OUT */, + hb_ot_var_axis_info_t *axes_array /* OUT */) const + { + if (axes_count) + { + hb_array_t<const AxisRecord> arr = get_axes ().sub_array (start_offset, axes_count); + for (unsigned i = 0; i < arr.length; ++i) + arr[i].get_axis_info (start_offset + i, &axes_array[i]); + } + return axisCount; + } + +#ifndef HB_DISABLE_DEPRECATED + bool + find_axis_deprecated (hb_tag_t tag, unsigned *axis_index, hb_ot_var_axis_t *info) const + { + unsigned i; + if (!axis_index) axis_index = &i; + *axis_index = HB_OT_VAR_NO_AXIS_INDEX; + auto axes = get_axes (); + return axes.lfind (tag, axis_index) && (axes[*axis_index].get_axis_deprecated (info), true); + } +#endif + + bool + find_axis_info (hb_tag_t tag, hb_ot_var_axis_info_t *info) const + { + unsigned i; + auto axes = get_axes (); + return axes.lfind (tag, &i) && (axes[i].get_axis_info (i, info), true); + } + + int normalize_axis_value (unsigned int axis_index, float v) const + { return get_axes ()[axis_index].normalize_axis_value (v); } + + float unnormalize_axis_value (unsigned int axis_index, int v) const + { return get_axes ()[axis_index].unnormalize_axis_value (v); } + + unsigned int get_instance_count () const { return instanceCount; } + + hb_ot_name_id_t get_instance_subfamily_name_id (unsigned int instance_index) const + { + const InstanceRecord *instance = get_instance (instance_index); + if (unlikely (!instance)) return HB_OT_NAME_ID_INVALID; + return instance->subfamilyNameID; + } + + hb_ot_name_id_t get_instance_postscript_name_id (unsigned int instance_index) const + { + const InstanceRecord *instance = get_instance (instance_index); + if (unlikely (!instance)) return HB_OT_NAME_ID_INVALID; + if (instanceSize >= axisCount * 4 + 6) + return StructAfter<NameID> (instance->get_coordinates (axisCount)); + return HB_OT_NAME_ID_INVALID; + } + + unsigned int get_instance_coords (unsigned int instance_index, + unsigned int *coords_length, /* IN/OUT */ + float *coords /* OUT */) const + { + const InstanceRecord *instance = get_instance (instance_index); + if (unlikely (!instance)) + { + if (coords_length) + *coords_length = 0; + return 0; + } + + if (coords_length && *coords_length) + { + hb_array_t<const HBFixed> instanceCoords = instance->get_coordinates (axisCount) + .sub_array (0, *coords_length); + for (unsigned int i = 0; i < instanceCoords.length; i++) + coords[i] = instanceCoords.arrayZ[i].to_float (); + } + return axisCount; + } + + void collect_name_ids (hb_set_t *nameids) const + { + if (!has_data ()) return; + + + get_axes () + | hb_map (&AxisRecord::get_name_id) + | hb_sink (nameids) + ; + + + hb_range ((unsigned) instanceCount) + | hb_map ([this] (const unsigned _) { return get_instance_subfamily_name_id (_); }) + | hb_sink (nameids) + ; + + + hb_range ((unsigned) instanceCount) + | hb_map ([this] (const unsigned _) { return get_instance_postscript_name_id (_); }) + | hb_sink (nameids) + ; + } + + protected: + hb_array_t<const AxisRecord> get_axes () const + { return hb_array (&(this+firstAxis), axisCount); } + + const InstanceRecord *get_instance (unsigned int i) const + { + if (unlikely (i >= instanceCount)) return nullptr; + return &StructAtOffset<InstanceRecord> (&StructAfter<InstanceRecord> (get_axes ()), + i * instanceSize); + } + + protected: + FixedVersion<>version; /* Version of the fvar table + * initially set to 0x00010000u */ + OffsetTo<AxisRecord> + firstAxis; /* Offset in bytes from the beginning of the table + * to the start of the AxisRecord array. */ + HBUINT16 reserved; /* This field is permanently reserved. Set to 2. */ + HBUINT16 axisCount; /* The number of variation axes in the font (the + * number of records in the axes array). */ + HBUINT16 axisSize; /* The size in bytes of each VariationAxisRecord — + * set to 20 (0x0014) for this version. */ + HBUINT16 instanceCount; /* The number of named instances defined in the font + * (the number of records in the instances array). */ + HBUINT16 instanceSize; /* The size in bytes of each InstanceRecord — set + * to either axisCount * sizeof(HBFixed) + 4, or to + * axisCount * sizeof(HBFixed) + 6. */ + + public: + DEFINE_SIZE_STATIC (16); +}; + +} /* namespace OT */ + + +#endif /* HB_OT_VAR_FVAR_TABLE_HH */ diff --git a/thirdparty/harfbuzz/src/hb-ot-var-gvar-table.hh b/thirdparty/harfbuzz/src/hb-ot-var-gvar-table.hh new file mode 100644 index 0000000000..4d4e6dcae4 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-var-gvar-table.hh @@ -0,0 +1,701 @@ +/* + * Copyright © 2019 Adobe Inc. + * Copyright © 2019 Ebrahim Byagowi + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Adobe Author(s): Michiharu Ariza + */ + +#ifndef HB_OT_VAR_GVAR_TABLE_HH +#define HB_OT_VAR_GVAR_TABLE_HH + +#include "hb-open-type.hh" + +/* + * gvar -- Glyph Variation Table + * https://docs.microsoft.com/en-us/typography/opentype/spec/gvar + */ +#define HB_OT_TAG_gvar HB_TAG('g','v','a','r') + +namespace OT { + +struct contour_point_t +{ + void init (float x_ = 0.f, float y_ = 0.f, bool is_end_point_ = false) + { flag = 0; x = x_; y = y_; is_end_point = is_end_point_; } + + void translate (const contour_point_t &p) { x += p.x; y += p.y; } + + uint8_t flag; + float x, y; + bool is_end_point; +}; + +struct contour_point_vector_t : hb_vector_t<contour_point_t> +{ + void extend (const hb_array_t<contour_point_t> &a) + { + unsigned int old_len = length; + resize (old_len + a.length); + for (unsigned int i = 0; i < a.length; i++) + (*this)[old_len + i] = a[i]; + } + + void transform (const float (&matrix)[4]) + { + for (unsigned int i = 0; i < length; i++) + { + contour_point_t &p = (*this)[i]; + float x_ = p.x * matrix[0] + p.y * matrix[2]; + p.y = p.x * matrix[1] + p.y * matrix[3]; + p.x = x_; + } + } + + void translate (const contour_point_t& delta) + { + for (unsigned int i = 0; i < length; i++) + (*this)[i].translate (delta); + } +}; + +/* https://docs.microsoft.com/en-us/typography/opentype/spec/otvarcommonformats#tuplevariationheader */ +struct TupleVariationHeader +{ + unsigned get_size (unsigned axis_count) const + { return min_size + get_all_tuples (axis_count).get_size (); } + + unsigned get_data_size () const { return varDataSize; } + + const TupleVariationHeader &get_next (unsigned axis_count) const + { return StructAtOffset<TupleVariationHeader> (this, get_size (axis_count)); } + + float calculate_scalar (const int *coords, unsigned int coord_count, + const hb_array_t<const F2DOT14> shared_tuples) const + { + hb_array_t<const F2DOT14> peak_tuple; + + if (has_peak ()) + peak_tuple = get_peak_tuple (coord_count); + else + { + unsigned int index = get_index (); + if (unlikely (index * coord_count >= shared_tuples.length)) + return 0.f; + peak_tuple = shared_tuples.sub_array (coord_count * index, coord_count); + } + + hb_array_t<const F2DOT14> start_tuple; + hb_array_t<const F2DOT14> end_tuple; + if (has_intermediate ()) + { + start_tuple = get_start_tuple (coord_count); + end_tuple = get_end_tuple (coord_count); + } + + float scalar = 1.f; + for (unsigned int i = 0; i < coord_count; i++) + { + int v = coords[i]; + int peak = peak_tuple[i]; + if (!peak || v == peak) continue; + + if (has_intermediate ()) + { + int start = start_tuple[i]; + int end = end_tuple[i]; + if (unlikely (start > peak || peak > end || + (start < 0 && end > 0 && peak))) continue; + if (v < start || v > end) return 0.f; + if (v < peak) + { if (peak != start) scalar *= (float) (v - start) / (peak - start); } + else + { if (peak != end) scalar *= (float) (end - v) / (end - peak); } + } + else if (!v || v < hb_min (0, peak) || v > hb_max (0, peak)) return 0.f; + else + scalar *= (float) v / peak; + } + return scalar; + } + + bool has_peak () const { return tupleIndex & TuppleIndex::EmbeddedPeakTuple; } + bool has_intermediate () const { return tupleIndex & TuppleIndex::IntermediateRegion; } + bool has_private_points () const { return tupleIndex & TuppleIndex::PrivatePointNumbers; } + unsigned get_index () const { return tupleIndex & TuppleIndex::TupleIndexMask; } + + protected: + struct TuppleIndex : HBUINT16 + { + enum Flags { + EmbeddedPeakTuple = 0x8000u, + IntermediateRegion = 0x4000u, + PrivatePointNumbers = 0x2000u, + TupleIndexMask = 0x0FFFu + }; + + DEFINE_SIZE_STATIC (2); + }; + + hb_array_t<const F2DOT14> get_all_tuples (unsigned axis_count) const + { return StructAfter<UnsizedArrayOf<F2DOT14>> (tupleIndex).as_array ((has_peak () + has_intermediate () * 2) * axis_count); } + hb_array_t<const F2DOT14> get_peak_tuple (unsigned axis_count) const + { return get_all_tuples (axis_count).sub_array (0, axis_count); } + hb_array_t<const F2DOT14> get_start_tuple (unsigned axis_count) const + { return get_all_tuples (axis_count).sub_array (has_peak () * axis_count, axis_count); } + hb_array_t<const F2DOT14> get_end_tuple (unsigned axis_count) const + { return get_all_tuples (axis_count).sub_array (has_peak () * axis_count + axis_count, axis_count); } + + HBUINT16 varDataSize; /* The size in bytes of the serialized + * data for this tuple variation table. */ + TuppleIndex tupleIndex; /* A packed field. The high 4 bits are flags (see below). + The low 12 bits are an index into a shared tuple + records array. */ + /* UnsizedArrayOf<F2DOT14> peakTuple - optional */ + /* Peak tuple record for this tuple variation table — optional, + * determined by flags in the tupleIndex value. + * + * Note that this must always be included in the 'cvar' table. */ + /* UnsizedArrayOf<F2DOT14> intermediateStartTuple - optional */ + /* Intermediate start tuple record for this tuple variation table — optional, + determined by flags in the tupleIndex value. */ + /* UnsizedArrayOf<F2DOT14> intermediateEndTuple - optional */ + /* Intermediate end tuple record for this tuple variation table — optional, + * determined by flags in the tupleIndex value. */ + public: + DEFINE_SIZE_MIN (4); +}; + +struct GlyphVariationData +{ + const TupleVariationHeader &get_tuple_var_header (void) const + { return StructAfter<TupleVariationHeader> (data); } + + struct tuple_iterator_t + { + void init (hb_bytes_t var_data_bytes_, unsigned int axis_count_) + { + var_data_bytes = var_data_bytes_; + var_data = var_data_bytes_.as<GlyphVariationData> (); + index = 0; + axis_count = axis_count_; + current_tuple = &var_data->get_tuple_var_header (); + data_offset = 0; + } + + bool get_shared_indices (hb_vector_t<unsigned int> &shared_indices /* OUT */) + { + if (var_data->has_shared_point_numbers ()) + { + const HBUINT8 *base = &(var_data+var_data->data); + const HBUINT8 *p = base; + if (!unpack_points (p, shared_indices, var_data_bytes)) return false; + data_offset = p - base; + } + return true; + } + + bool is_valid () const + { + return (index < var_data->tupleVarCount.get_count ()) && + var_data_bytes.check_range (current_tuple, TupleVariationHeader::min_size) && + var_data_bytes.check_range (current_tuple, hb_max (current_tuple->get_data_size (), current_tuple->get_size (axis_count))) && + current_tuple->get_size (axis_count); + } + + bool move_to_next () + { + data_offset += current_tuple->get_data_size (); + current_tuple = ¤t_tuple->get_next (axis_count); + index++; + return is_valid (); + } + + const HBUINT8 *get_serialized_data () const + { return &(var_data+var_data->data) + data_offset; } + + private: + const GlyphVariationData *var_data; + unsigned int index; + unsigned int axis_count; + unsigned int data_offset; + + public: + hb_bytes_t var_data_bytes; + const TupleVariationHeader *current_tuple; + }; + + static bool get_tuple_iterator (hb_bytes_t var_data_bytes, unsigned axis_count, + hb_vector_t<unsigned int> &shared_indices /* OUT */, + tuple_iterator_t *iterator /* OUT */) + { + iterator->init (var_data_bytes, axis_count); + if (!iterator->get_shared_indices (shared_indices)) + return false; + return iterator->is_valid (); + } + + bool has_shared_point_numbers () const { return tupleVarCount.has_shared_point_numbers (); } + + static bool unpack_points (const HBUINT8 *&p /* IN/OUT */, + hb_vector_t<unsigned int> &points /* OUT */, + const hb_bytes_t &bytes) + { + enum packed_point_flag_t + { + POINTS_ARE_WORDS = 0x80, + POINT_RUN_COUNT_MASK = 0x7F + }; + + if (unlikely (!bytes.check_range (p))) return false; + + uint16_t count = *p++; + if (count & POINTS_ARE_WORDS) + { + if (unlikely (!bytes.check_range (p))) return false; + count = ((count & POINT_RUN_COUNT_MASK) << 8) | *p++; + } + points.resize (count); + + unsigned int n = 0; + uint16_t i = 0; + while (i < count) + { + if (unlikely (!bytes.check_range (p))) return false; + uint16_t j; + uint8_t control = *p++; + uint16_t run_count = (control & POINT_RUN_COUNT_MASK) + 1; + if (control & POINTS_ARE_WORDS) + { + for (j = 0; j < run_count && i < count; j++, i++) + { + if (unlikely (!bytes.check_range ((const HBUINT16 *) p))) + return false; + n += *(const HBUINT16 *)p; + points[i] = n; + p += HBUINT16::static_size; + } + } + else + { + for (j = 0; j < run_count && i < count; j++, i++) + { + if (unlikely (!bytes.check_range (p))) return false; + n += *p++; + points[i] = n; + } + } + if (j < run_count) return false; + } + return true; + } + + static bool unpack_deltas (const HBUINT8 *&p /* IN/OUT */, + hb_vector_t<int> &deltas /* IN/OUT */, + const hb_bytes_t &bytes) + { + enum packed_delta_flag_t + { + DELTAS_ARE_ZERO = 0x80, + DELTAS_ARE_WORDS = 0x40, + DELTA_RUN_COUNT_MASK = 0x3F + }; + + unsigned int i = 0; + unsigned int count = deltas.length; + while (i < count) + { + if (unlikely (!bytes.check_range (p))) return false; + uint8_t control = *p++; + unsigned int run_count = (control & DELTA_RUN_COUNT_MASK) + 1; + unsigned int j; + if (control & DELTAS_ARE_ZERO) + for (j = 0; j < run_count && i < count; j++, i++) + deltas[i] = 0; + else if (control & DELTAS_ARE_WORDS) + for (j = 0; j < run_count && i < count; j++, i++) + { + if (unlikely (!bytes.check_range ((const HBUINT16 *) p))) + return false; + deltas[i] = *(const HBINT16 *) p; + p += HBUINT16::static_size; + } + else + for (j = 0; j < run_count && i < count; j++, i++) + { + if (unlikely (!bytes.check_range (p))) + return false; + deltas[i] = *(const HBINT8 *) p++; + } + if (j < run_count) + return false; + } + return true; + } + + bool has_data () const { return tupleVarCount; } + + protected: + struct TupleVarCount : HBUINT16 + { + bool has_shared_point_numbers () const { return ((*this) & SharedPointNumbers); } + unsigned int get_count () const { return (*this) & CountMask; } + + protected: + enum Flags + { + SharedPointNumbers= 0x8000u, + CountMask = 0x0FFFu + }; + public: + DEFINE_SIZE_STATIC (2); + }; + + TupleVarCount tupleVarCount; /* A packed field. The high 4 bits are flags, and the + * low 12 bits are the number of tuple variation tables + * for this glyph. The number of tuple variation tables + * can be any number between 1 and 4095. */ + OffsetTo<HBUINT8> + data; /* Offset from the start of the GlyphVariationData table + * to the serialized data. */ + /* TupleVariationHeader tupleVariationHeaders[] *//* Array of tuple variation headers. */ + public: + DEFINE_SIZE_MIN (4); +}; + +struct gvar +{ + static constexpr hb_tag_t tableTag = HB_OT_TAG_gvar; + + bool sanitize_shallow (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && (version.major == 1) && + (glyphCount == c->get_num_glyphs ()) && + sharedTuples.sanitize (c, this, axisCount * sharedTupleCount) && + (is_long_offset () ? + c->check_array (get_long_offset_array (), glyphCount+1) : + c->check_array (get_short_offset_array (), glyphCount+1)) && + c->check_array (((const HBUINT8*)&(this+dataZ)) + get_offset (0), + get_offset (glyphCount) - get_offset (0))); + } + + /* GlyphVariationData not sanitized here; must be checked while accessing each glyph varation data */ + bool sanitize (hb_sanitize_context_t *c) const + { return sanitize_shallow (c); } + + bool subset (hb_subset_context_t *c) const + { + TRACE_SUBSET (this); + + gvar *out = c->serializer->allocate_min<gvar> (); + if (unlikely (!out)) return_trace (false); + + out->version.major = 1; + out->version.minor = 0; + out->axisCount = axisCount; + out->sharedTupleCount = sharedTupleCount; + + unsigned int num_glyphs = c->plan->num_output_glyphs (); + out->glyphCount = num_glyphs; + + unsigned int subset_data_size = 0; + for (hb_codepoint_t gid = 0; gid < num_glyphs; gid++) + { + hb_codepoint_t old_gid; + if (!c->plan->old_gid_for_new_gid (gid, &old_gid)) continue; + subset_data_size += get_glyph_var_data_bytes (c->source_blob, old_gid).length; + } + + bool long_offset = subset_data_size & ~0xFFFFu; + out->flags = long_offset ? 1 : 0; + + HBUINT8 *subset_offsets = c->serializer->allocate_size<HBUINT8> ((long_offset ? 4 : 2) * (num_glyphs + 1)); + if (!subset_offsets) return_trace (false); + + /* shared tuples */ + if (!sharedTupleCount || !sharedTuples) + out->sharedTuples = 0; + else + { + unsigned int shared_tuple_size = F2DOT14::static_size * axisCount * sharedTupleCount; + F2DOT14 *tuples = c->serializer->allocate_size<F2DOT14> (shared_tuple_size); + if (!tuples) return_trace (false); + out->sharedTuples = (char *) tuples - (char *) out; + memcpy (tuples, this+sharedTuples, shared_tuple_size); + } + + char *subset_data = c->serializer->allocate_size<char> (subset_data_size); + if (!subset_data) return_trace (false); + out->dataZ = subset_data - (char *) out; + + unsigned int glyph_offset = 0; + for (hb_codepoint_t gid = 0; gid < num_glyphs; gid++) + { + hb_codepoint_t old_gid; + hb_bytes_t var_data_bytes = c->plan->old_gid_for_new_gid (gid, &old_gid) + ? get_glyph_var_data_bytes (c->source_blob, old_gid) + : hb_bytes_t (); + + if (long_offset) + ((HBUINT32 *) subset_offsets)[gid] = glyph_offset; + else + ((HBUINT16 *) subset_offsets)[gid] = glyph_offset / 2; + + if (var_data_bytes.length > 0) + memcpy (subset_data, var_data_bytes.arrayZ, var_data_bytes.length); + subset_data += var_data_bytes.length; + glyph_offset += var_data_bytes.length; + } + if (long_offset) + ((HBUINT32 *) subset_offsets)[num_glyphs] = glyph_offset; + else + ((HBUINT16 *) subset_offsets)[num_glyphs] = glyph_offset / 2; + + return_trace (true); + } + + protected: + const hb_bytes_t get_glyph_var_data_bytes (hb_blob_t *blob, hb_codepoint_t glyph) const + { + unsigned start_offset = get_offset (glyph); + unsigned length = get_offset (glyph+1) - start_offset; + hb_bytes_t var_data = blob->as_bytes ().sub_array (((unsigned) dataZ) + start_offset, length); + return likely (var_data.length >= GlyphVariationData::min_size) ? var_data : hb_bytes_t (); + } + + bool is_long_offset () const { return flags & 1; } + + unsigned get_offset (unsigned i) const + { return is_long_offset () ? get_long_offset_array ()[i] : get_short_offset_array ()[i] * 2; } + + const HBUINT32 * get_long_offset_array () const { return (const HBUINT32 *) &offsetZ; } + const HBUINT16 *get_short_offset_array () const { return (const HBUINT16 *) &offsetZ; } + + public: + struct accelerator_t + { + void init (hb_face_t *face) + { table = hb_sanitize_context_t ().reference_table<gvar> (face); } + void fini () { table.destroy (); } + + private: + struct x_getter { static float get (const contour_point_t &p) { return p.x; } }; + struct y_getter { static float get (const contour_point_t &p) { return p.y; } }; + + template <typename T> + static float infer_delta (const hb_array_t<contour_point_t> points, + const hb_array_t<contour_point_t> deltas, + unsigned int target, unsigned int prev, unsigned int next) + { + float target_val = T::get (points[target]); + float prev_val = T::get (points[prev]); + float next_val = T::get (points[next]); + float prev_delta = T::get (deltas[prev]); + float next_delta = T::get (deltas[next]); + + if (prev_val == next_val) + return (prev_delta == next_delta) ? prev_delta : 0.f; + else if (target_val <= hb_min (prev_val, next_val)) + return (prev_val < next_val) ? prev_delta : next_delta; + else if (target_val >= hb_max (prev_val, next_val)) + return (prev_val > next_val) ? prev_delta : next_delta; + + /* linear interpolation */ + float r = (target_val - prev_val) / (next_val - prev_val); + return (1.f - r) * prev_delta + r * next_delta; + } + + static unsigned int next_index (unsigned int i, unsigned int start, unsigned int end) + { return (i >= end) ? start : (i + 1); } + + public: + bool apply_deltas_to_points (hb_codepoint_t glyph, hb_font_t *font, + const hb_array_t<contour_point_t> points) const + { + /* num_coords should exactly match gvar's axisCount due to how GlyphVariationData tuples are aligned */ + if (!font->num_coords || font->num_coords != table->axisCount) return true; + + if (unlikely (glyph >= table->glyphCount)) return true; + + hb_bytes_t var_data_bytes = table->get_glyph_var_data_bytes (table.get_blob (), glyph); + if (!var_data_bytes.as<GlyphVariationData> ()->has_data ()) return true; + hb_vector_t<unsigned int> shared_indices; + GlyphVariationData::tuple_iterator_t iterator; + if (!GlyphVariationData::get_tuple_iterator (var_data_bytes, table->axisCount, + shared_indices, &iterator)) + return true; /* so isn't applied at all */ + + /* Save original points for inferred delta calculation */ + contour_point_vector_t orig_points; + orig_points.resize (points.length); + for (unsigned int i = 0; i < orig_points.length; i++) + orig_points[i] = points[i]; + + contour_point_vector_t deltas; /* flag is used to indicate referenced point */ + deltas.resize (points.length); + + hb_vector_t<unsigned> end_points; + for (unsigned i = 0; i < points.length; ++i) + if (points[i].is_end_point) + end_points.push (i); + + int *coords = font->coords; + unsigned num_coords = font->num_coords; + hb_array_t<const F2DOT14> shared_tuples = (table+table->sharedTuples).as_array (table->sharedTupleCount * table->axisCount); + do + { + float scalar = iterator.current_tuple->calculate_scalar (coords, num_coords, shared_tuples); + if (scalar == 0.f) continue; + const HBUINT8 *p = iterator.get_serialized_data (); + unsigned int length = iterator.current_tuple->get_data_size (); + if (unlikely (!iterator.var_data_bytes.check_range (p, length))) + return false; + + hb_bytes_t bytes ((const char *) p, length); + hb_vector_t<unsigned int> private_indices; + if (iterator.current_tuple->has_private_points () && + !GlyphVariationData::unpack_points (p, private_indices, bytes)) + return false; + const hb_array_t<unsigned int> &indices = private_indices.length ? private_indices : shared_indices; + + bool apply_to_all = (indices.length == 0); + unsigned int num_deltas = apply_to_all ? points.length : indices.length; + hb_vector_t<int> x_deltas; + x_deltas.resize (num_deltas); + if (!GlyphVariationData::unpack_deltas (p, x_deltas, bytes)) + return false; + hb_vector_t<int> y_deltas; + y_deltas.resize (num_deltas); + if (!GlyphVariationData::unpack_deltas (p, y_deltas, bytes)) + return false; + + for (unsigned int i = 0; i < deltas.length; i++) + deltas[i].init (); + for (unsigned int i = 0; i < num_deltas; i++) + { + unsigned int pt_index = apply_to_all ? i : indices[i]; + deltas[pt_index].flag = 1; /* this point is referenced, i.e., explicit deltas specified */ + deltas[pt_index].x += x_deltas[i] * scalar; + deltas[pt_index].y += y_deltas[i] * scalar; + } + + /* infer deltas for unreferenced points */ + unsigned start_point = 0; + for (unsigned c = 0; c < end_points.length; c++) + { + unsigned end_point = end_points[c]; + + /* Check the number of unreferenced points in a contour. If no unref points or no ref points, nothing to do. */ + unsigned unref_count = 0; + for (unsigned i = start_point; i <= end_point; i++) + if (!deltas[i].flag) unref_count++; + + unsigned j = start_point; + if (unref_count == 0 || unref_count > end_point - start_point) + goto no_more_gaps; + + for (;;) + { + /* Locate the next gap of unreferenced points between two referenced points prev and next. + * Note that a gap may wrap around at left (start_point) and/or at right (end_point). + */ + unsigned int prev, next, i; + for (;;) + { + i = j; + j = next_index (i, start_point, end_point); + if (deltas[i].flag && !deltas[j].flag) break; + } + prev = j = i; + for (;;) + { + i = j; + j = next_index (i, start_point, end_point); + if (!deltas[i].flag && deltas[j].flag) break; + } + next = j; + /* Infer deltas for all unref points in the gap between prev and next */ + i = prev; + for (;;) + { + i = next_index (i, start_point, end_point); + if (i == next) break; + deltas[i].x = infer_delta<x_getter> (orig_points.as_array (), deltas.as_array (), i, prev, next); + deltas[i].y = infer_delta<y_getter> (orig_points.as_array (), deltas.as_array (), i, prev, next); + if (--unref_count == 0) goto no_more_gaps; + } + } +no_more_gaps: + start_point = end_point + 1; + } + + /* apply specified / inferred deltas to points */ + for (unsigned int i = 0; i < points.length; i++) + { + points[i].x += roundf (deltas[i].x); + points[i].y += roundf (deltas[i].y); + } + } while (iterator.move_to_next ()); + + return true; + } + + unsigned int get_axis_count () const { return table->axisCount; } + + private: + hb_blob_ptr_t<gvar> table; + }; + + protected: + FixedVersion<>version; /* Version number of the glyph variations table + * Set to 0x00010000u. */ + HBUINT16 axisCount; /* The number of variation axes for this font. This must be + * the same number as axisCount in the 'fvar' table. */ + HBUINT16 sharedTupleCount; + /* The number of shared tuple records. Shared tuple records + * can be referenced within glyph variation data tables for + * multiple glyphs, as opposed to other tuple records stored + * directly within a glyph variation data table. */ + LNNOffsetTo<UnsizedArrayOf<F2DOT14>> + sharedTuples; /* Offset from the start of this table to the shared tuple records. + * Array of tuple records shared across all glyph variation data tables. */ + HBUINT16 glyphCount; /* The number of glyphs in this font. This must match the number of + * glyphs stored elsewhere in the font. */ + HBUINT16 flags; /* Bit-field that gives the format of the offset array that follows. + * If bit 0 is clear, the offsets are uint16; if bit 0 is set, the + * offsets are uint32. */ + LOffsetTo<GlyphVariationData> + dataZ; /* Offset from the start of this table to the array of + * GlyphVariationData tables. */ + UnsizedArrayOf<HBUINT8> + offsetZ; /* Offsets from the start of the GlyphVariationData array + * to each GlyphVariationData table. */ + public: + DEFINE_SIZE_MIN (20); +}; + +struct gvar_accelerator_t : gvar::accelerator_t {}; + +} /* namespace OT */ + +#endif /* HB_OT_VAR_GVAR_TABLE_HH */ diff --git a/thirdparty/harfbuzz/src/hb-ot-var-hvar-table.hh b/thirdparty/harfbuzz/src/hb-ot-var-hvar-table.hh new file mode 100644 index 0000000000..fdcc88d674 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-var-hvar-table.hh @@ -0,0 +1,488 @@ +/* + * Copyright © 2017 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_OT_VAR_HVAR_TABLE_HH +#define HB_OT_VAR_HVAR_TABLE_HH + +#include "hb-ot-layout-common.hh" + + +namespace OT { + + +struct DeltaSetIndexMap +{ + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && + c->check_range (mapDataZ.arrayZ, + mapCount, + get_width ())); + } + + template <typename T> + bool serialize (hb_serialize_context_t *c, const T &plan) + { + unsigned int width = plan.get_width (); + unsigned int inner_bit_count = plan.get_inner_bit_count (); + const hb_array_t<const unsigned int> output_map = plan.get_output_map (); + + TRACE_SERIALIZE (this); + if (unlikely (output_map.length && ((((inner_bit_count-1)&~0xF)!=0) || (((width-1)&~0x3)!=0)))) + return_trace (false); + if (unlikely (!c->extend_min (*this))) return_trace (false); + + format = ((width-1)<<4)|(inner_bit_count-1); + mapCount = output_map.length; + HBUINT8 *p = c->allocate_size<HBUINT8> (width * output_map.length); + if (unlikely (!p)) return_trace (false); + for (unsigned int i = 0; i < output_map.length; i++) + { + unsigned int v = output_map[i]; + unsigned int outer = v >> 16; + unsigned int inner = v & 0xFFFF; + unsigned int u = (outer << inner_bit_count) | inner; + for (unsigned int w = width; w > 0;) + { + p[--w] = u; + u >>= 8; + } + p += width; + } + return_trace (true); + } + + unsigned int map (unsigned int v) const /* Returns 16.16 outer.inner. */ + { + /* If count is zero, pass value unchanged. This takes + * care of direct mapping for advance map. */ + if (!mapCount) + return v; + + if (v >= mapCount) + v = mapCount - 1; + + unsigned int u = 0; + { /* Fetch it. */ + unsigned int w = get_width (); + const HBUINT8 *p = mapDataZ.arrayZ + w * v; + for (; w; w--) + u = (u << 8) + *p++; + } + + { /* Repack it. */ + unsigned int n = get_inner_bit_count (); + unsigned int outer = u >> n; + unsigned int inner = u & ((1 << n) - 1); + u = (outer<<16) | inner; + } + + return u; + } + + unsigned int get_map_count () const { return mapCount; } + unsigned int get_width () const { return ((format >> 4) & 3) + 1; } + unsigned int get_inner_bit_count () const { return (format & 0xF) + 1; } + + protected: + HBUINT16 format; /* A packed field that describes the compressed + * representation of delta-set indices. */ + HBUINT16 mapCount; /* The number of mapping entries. */ + UnsizedArrayOf<HBUINT8> + mapDataZ; /* The delta-set index mapping data. */ + + public: + DEFINE_SIZE_ARRAY (4, mapDataZ); +}; + +struct index_map_subset_plan_t +{ + enum index_map_index_t { + ADV_INDEX, + LSB_INDEX, /* dual as TSB */ + RSB_INDEX, /* dual as BSB */ + VORG_INDEX + }; + + void init (const DeltaSetIndexMap &index_map, + hb_inc_bimap_t &outer_map, + hb_vector_t<hb_set_t *> &inner_sets, + const hb_subset_plan_t *plan) + { + map_count = 0; + outer_bit_count = 0; + inner_bit_count = 1; + max_inners.init (); + output_map.init (); + + if (&index_map == &Null (DeltaSetIndexMap)) return; + + unsigned int last_val = (unsigned int)-1; + hb_codepoint_t last_gid = (hb_codepoint_t)-1; + hb_codepoint_t gid = (hb_codepoint_t) hb_min (index_map.get_map_count (), plan->num_output_glyphs ()); + + outer_bit_count = (index_map.get_width () * 8) - index_map.get_inner_bit_count (); + max_inners.resize (inner_sets.length); + for (unsigned i = 0; i < inner_sets.length; i++) max_inners[i] = 0; + + /* Search backwards for a map value different from the last map value */ + for (; gid > 0; gid--) + { + hb_codepoint_t old_gid; + if (!plan->old_gid_for_new_gid (gid - 1, &old_gid)) + { + if (last_gid == (hb_codepoint_t) -1) + continue; + else + break; + } + + unsigned int v = index_map.map (old_gid); + if (last_gid == (hb_codepoint_t) -1) + { + last_val = v; + last_gid = gid; + continue; + } + if (v != last_val) break; + + last_gid = gid; + } + + if (unlikely (last_gid == (hb_codepoint_t)-1)) return; + map_count = last_gid; + for (gid = 0; gid < map_count; gid++) + { + hb_codepoint_t old_gid; + if (plan->old_gid_for_new_gid (gid, &old_gid)) + { + unsigned int v = index_map.map (old_gid); + unsigned int outer = v >> 16; + unsigned int inner = v & 0xFFFF; + outer_map.add (outer); + if (inner > max_inners[outer]) max_inners[outer] = inner; + if (outer >= inner_sets.length) return; + inner_sets[outer]->add (inner); + } + } + } + + void fini () + { + max_inners.fini (); + output_map.fini (); + } + + void remap (const DeltaSetIndexMap *input_map, + const hb_inc_bimap_t &outer_map, + const hb_vector_t<hb_inc_bimap_t> &inner_maps, + const hb_subset_plan_t *plan) + { + if (input_map == &Null (DeltaSetIndexMap)) return; + + for (unsigned int i = 0; i < max_inners.length; i++) + { + if (inner_maps[i].get_population () == 0) continue; + unsigned int bit_count = (max_inners[i]==0)? 1: hb_bit_storage (inner_maps[i][max_inners[i]]); + if (bit_count > inner_bit_count) inner_bit_count = bit_count; + } + + output_map.resize (map_count); + for (hb_codepoint_t gid = 0; gid < output_map.length; gid++) + { + hb_codepoint_t old_gid; + if (plan->old_gid_for_new_gid (gid, &old_gid)) + { + unsigned int v = input_map->map (old_gid); + unsigned int outer = v >> 16; + output_map[gid] = (outer_map[outer] << 16) | (inner_maps[outer][v & 0xFFFF]); + } + else + output_map[gid] = 0; /* Map unused glyph to outer/inner=0/0 */ + } + } + + unsigned int get_inner_bit_count () const { return inner_bit_count; } + unsigned int get_width () const { return ((outer_bit_count + inner_bit_count + 7) / 8); } + unsigned int get_map_count () const { return map_count; } + + unsigned int get_size () const + { return (map_count? (DeltaSetIndexMap::min_size + get_width () * map_count): 0); } + + bool is_identity () const { return get_output_map ().length == 0; } + hb_array_t<const unsigned int> get_output_map () const { return output_map.as_array (); } + + protected: + unsigned int map_count; + hb_vector_t<unsigned int> max_inners; + unsigned int outer_bit_count; + unsigned int inner_bit_count; + hb_vector_t<unsigned int> output_map; +}; + +struct hvarvvar_subset_plan_t +{ + hvarvvar_subset_plan_t() : inner_maps (), index_map_plans () {} + ~hvarvvar_subset_plan_t() { fini (); } + + void init (const hb_array_t<const DeltaSetIndexMap *> &index_maps, + const VariationStore &_var_store, + const hb_subset_plan_t *plan) + { + index_map_plans.resize (index_maps.length); + + var_store = &_var_store; + inner_sets.resize (var_store->get_sub_table_count ()); + for (unsigned int i = 0; i < inner_sets.length; i++) + inner_sets[i] = hb_set_create (); + adv_set = hb_set_create (); + + inner_maps.resize (var_store->get_sub_table_count ()); + + for (unsigned int i = 0; i < inner_maps.length; i++) + inner_maps[i].init (); + + if (unlikely (!index_map_plans.length || !inner_sets.length || !inner_maps.length)) return; + + bool retain_adv_map = false; + index_map_plans[0].init (*index_maps[0], outer_map, inner_sets, plan); + if (index_maps[0] == &Null (DeltaSetIndexMap)) + { + retain_adv_map = plan->retain_gids; + outer_map.add (0); + for (hb_codepoint_t gid = 0; gid < plan->num_output_glyphs (); gid++) + { + hb_codepoint_t old_gid; + if (plan->old_gid_for_new_gid (gid, &old_gid)) + inner_sets[0]->add (old_gid); + } + hb_set_union (adv_set, inner_sets[0]); + } + + for (unsigned int i = 1; i < index_maps.length; i++) + index_map_plans[i].init (*index_maps[i], outer_map, inner_sets, plan); + + outer_map.sort (); + + if (retain_adv_map) + { + for (hb_codepoint_t gid = 0; gid < plan->num_output_glyphs (); gid++) + if (inner_sets[0]->has (gid)) + inner_maps[0].add (gid); + else + inner_maps[0].skip (); + } + else + { + inner_maps[0].add_set (adv_set); + hb_set_subtract (inner_sets[0], adv_set); + inner_maps[0].add_set (inner_sets[0]); + } + + for (unsigned int i = 1; i < inner_maps.length; i++) + inner_maps[i].add_set (inner_sets[i]); + + for (unsigned int i = 0; i < index_maps.length; i++) + index_map_plans[i].remap (index_maps[i], outer_map, inner_maps, plan); + } + + void fini () + { + for (unsigned int i = 0; i < inner_sets.length; i++) + hb_set_destroy (inner_sets[i]); + hb_set_destroy (adv_set); + inner_maps.fini_deep (); + index_map_plans.fini_deep (); + } + + hb_inc_bimap_t outer_map; + hb_vector_t<hb_inc_bimap_t> inner_maps; + hb_vector_t<index_map_subset_plan_t> index_map_plans; + const VariationStore *var_store; + + protected: + hb_vector_t<hb_set_t *> inner_sets; + hb_set_t *adv_set; +}; + +/* + * HVAR -- Horizontal Metrics Variations + * https://docs.microsoft.com/en-us/typography/opentype/spec/hvar + * VVAR -- Vertical Metrics Variations + * https://docs.microsoft.com/en-us/typography/opentype/spec/vvar + */ +#define HB_OT_TAG_HVAR HB_TAG('H','V','A','R') +#define HB_OT_TAG_VVAR HB_TAG('V','V','A','R') + +struct HVARVVAR +{ + static constexpr hb_tag_t HVARTag = HB_OT_TAG_HVAR; + static constexpr hb_tag_t VVARTag = HB_OT_TAG_VVAR; + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (version.sanitize (c) && + likely (version.major == 1) && + varStore.sanitize (c, this) && + advMap.sanitize (c, this) && + lsbMap.sanitize (c, this) && + rsbMap.sanitize (c, this)); + } + + void listup_index_maps (hb_vector_t<const DeltaSetIndexMap *> &index_maps) const + { + index_maps.push (&(this+advMap)); + index_maps.push (&(this+lsbMap)); + index_maps.push (&(this+rsbMap)); + } + + bool serialize_index_maps (hb_serialize_context_t *c, + const hb_array_t<index_map_subset_plan_t> &im_plans) + { + TRACE_SERIALIZE (this); + if (im_plans[index_map_subset_plan_t::ADV_INDEX].is_identity ()) + advMap = 0; + else if (unlikely (!advMap.serialize (c, this).serialize (c, im_plans[index_map_subset_plan_t::ADV_INDEX]))) + return_trace (false); + if (im_plans[index_map_subset_plan_t::LSB_INDEX].is_identity ()) + lsbMap = 0; + else if (unlikely (!lsbMap.serialize (c, this).serialize (c, im_plans[index_map_subset_plan_t::LSB_INDEX]))) + return_trace (false); + if (im_plans[index_map_subset_plan_t::RSB_INDEX].is_identity ()) + rsbMap = 0; + else if (unlikely (!rsbMap.serialize (c, this).serialize (c, im_plans[index_map_subset_plan_t::RSB_INDEX]))) + return_trace (false); + + return_trace (true); + } + + template <typename T> + bool _subset (hb_subset_context_t *c) const + { + TRACE_SUBSET (this); + hvarvvar_subset_plan_t hvar_plan; + hb_vector_t<const DeltaSetIndexMap *> + index_maps; + + ((T*)this)->listup_index_maps (index_maps); + hvar_plan.init (index_maps.as_array (), this+varStore, c->plan); + + T *out = c->serializer->allocate_min<T> (); + if (unlikely (!out)) return_trace (false); + + out->version.major = 1; + out->version.minor = 0; + + if (unlikely (!out->varStore.serialize (c->serializer, out) + .serialize (c->serializer, hvar_plan.var_store, hvar_plan.inner_maps.as_array ()))) + return_trace (false); + + return_trace (out->T::serialize_index_maps (c->serializer, + hvar_plan.index_map_plans.as_array ())); + } + + float get_advance_var (hb_codepoint_t glyph, hb_font_t *font) const + { + unsigned int varidx = (this+advMap).map (glyph); + return (this+varStore).get_delta (varidx, font->coords, font->num_coords); + } + + float get_side_bearing_var (hb_codepoint_t glyph, + const int *coords, unsigned int coord_count) const + { + if (!has_side_bearing_deltas ()) return 0.f; + unsigned int varidx = (this+lsbMap).map (glyph); + return (this+varStore).get_delta (varidx, coords, coord_count); + } + + bool has_side_bearing_deltas () const { return lsbMap && rsbMap; } + + protected: + FixedVersion<>version; /* Version of the metrics variation table + * initially set to 0x00010000u */ + LOffsetTo<VariationStore> + varStore; /* Offset to item variation store table. */ + LOffsetTo<DeltaSetIndexMap> + advMap; /* Offset to advance var-idx mapping. */ + LOffsetTo<DeltaSetIndexMap> + lsbMap; /* Offset to lsb/tsb var-idx mapping. */ + LOffsetTo<DeltaSetIndexMap> + rsbMap; /* Offset to rsb/bsb var-idx mapping. */ + + public: + DEFINE_SIZE_STATIC (20); +}; + +struct HVAR : HVARVVAR { + static constexpr hb_tag_t tableTag = HB_OT_TAG_HVAR; + bool subset (hb_subset_context_t *c) const { return HVARVVAR::_subset<HVAR> (c); } +}; +struct VVAR : HVARVVAR { + static constexpr hb_tag_t tableTag = HB_OT_TAG_VVAR; + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (static_cast<const HVARVVAR *> (this)->sanitize (c) && + vorgMap.sanitize (c, this)); + } + + void listup_index_maps (hb_vector_t<const DeltaSetIndexMap *> &index_maps) const + { + HVARVVAR::listup_index_maps (index_maps); + index_maps.push (&(this+vorgMap)); + } + + bool serialize_index_maps (hb_serialize_context_t *c, + const hb_array_t<index_map_subset_plan_t> &im_plans) + { + TRACE_SERIALIZE (this); + if (unlikely (!HVARVVAR::serialize_index_maps (c, im_plans))) + return_trace (false); + if (!im_plans[index_map_subset_plan_t::VORG_INDEX].get_map_count ()) + vorgMap = 0; + else if (unlikely (!vorgMap.serialize (c, this).serialize (c, im_plans[index_map_subset_plan_t::VORG_INDEX]))) + return_trace (false); + + return_trace (true); + } + + bool subset (hb_subset_context_t *c) const { return HVARVVAR::_subset<VVAR> (c); } + + protected: + LOffsetTo<DeltaSetIndexMap> + vorgMap; /* Offset to vertical-origin var-idx mapping. */ + + public: + DEFINE_SIZE_STATIC (24); +}; + +} /* namespace OT */ + + +#endif /* HB_OT_VAR_HVAR_TABLE_HH */ diff --git a/thirdparty/harfbuzz/src/hb-ot-var-mvar-table.hh b/thirdparty/harfbuzz/src/hb-ot-var-mvar-table.hh new file mode 100644 index 0000000000..1b7fad9cec --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-var-mvar-table.hh @@ -0,0 +1,119 @@ +/* + * Copyright © 2017 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_OT_VAR_MVAR_TABLE_HH +#define HB_OT_VAR_MVAR_TABLE_HH + +#include "hb-ot-layout-common.hh" + + +namespace OT { + + +struct VariationValueRecord +{ + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this)); + } + + public: + Tag valueTag; /* Four-byte tag identifying a font-wide measure. */ + HBUINT32 varIdx; /* Outer/inner index into VariationStore item. */ + + public: + DEFINE_SIZE_STATIC (8); +}; + + +/* + * MVAR -- Metrics Variations + * https://docs.microsoft.com/en-us/typography/opentype/spec/mvar + */ +#define HB_OT_TAG_MVAR HB_TAG('M','V','A','R') + +struct MVAR +{ + static constexpr hb_tag_t tableTag = HB_OT_TAG_MVAR; + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (version.sanitize (c) && + likely (version.major == 1) && + c->check_struct (this) && + valueRecordSize >= VariationValueRecord::static_size && + varStore.sanitize (c, this) && + c->check_range (valuesZ.arrayZ, + valueRecordCount, + valueRecordSize)); + } + + float get_var (hb_tag_t tag, + const int *coords, unsigned int coord_count) const + { + const VariationValueRecord *record; + record = (VariationValueRecord *) hb_bsearch (tag, + (const VariationValueRecord *) + (const HBUINT8 *) valuesZ, + valueRecordCount, valueRecordSize, + tag_compare); + if (!record) + return 0.; + + return (this+varStore).get_delta (record->varIdx, coords, coord_count); + } + +protected: + static int tag_compare (const void *pa, const void *pb) + { + const hb_tag_t *a = (const hb_tag_t *) pa; + const Tag *b = (const Tag *) pb; + return b->cmp (*a); + } + + protected: + FixedVersion<>version; /* Version of the metrics variation table + * initially set to 0x00010000u */ + HBUINT16 reserved; /* Not used; set to 0. */ + HBUINT16 valueRecordSize;/* The size in bytes of each value record — + * must be greater than zero. */ + HBUINT16 valueRecordCount;/* The number of value records — may be zero. */ + OffsetTo<VariationStore> + varStore; /* Offset to item variation store table. */ + UnsizedArrayOf<HBUINT8> + valuesZ; /* Array of value records. The records must be + * in binary order of their valueTag field. */ + + public: + DEFINE_SIZE_ARRAY (12, valuesZ); +}; + +} /* namespace OT */ + + +#endif /* HB_OT_VAR_MVAR_TABLE_HH */ diff --git a/thirdparty/harfbuzz/src/hb-ot-var.cc b/thirdparty/harfbuzz/src/hb-ot-var.cc new file mode 100644 index 0000000000..6b8b09b6ba --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-var.cc @@ -0,0 +1,220 @@ +/* + * Copyright © 2017 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#include "hb.hh" + +#ifndef HB_NO_VAR + +#include "hb-ot-var.h" + +#include "hb-ot-var-avar-table.hh" +#include "hb-ot-var-fvar-table.hh" +#include "hb-ot-var-mvar-table.hh" + + +/** + * SECTION:hb-ot-var + * @title: hb-ot-var + * @short_description: OpenType Font Variations + * @include: hb-ot.h + * + * Functions for fetching information about OpenType Variable Fonts. + **/ + + +/* + * fvar/avar + */ + + +/** + * hb_ot_var_has_data: + * @face: #hb_face_t to test + * + * This function allows to verify the presence of OpenType variation data on the face. + * + * Return value: true if face has a `fvar' table and false otherwise + * + * Since: 1.4.2 + **/ +hb_bool_t +hb_ot_var_has_data (hb_face_t *face) +{ + return face->table.fvar->has_data (); +} + +/** + * hb_ot_var_get_axis_count: + * + * Since: 1.4.2 + **/ +unsigned int +hb_ot_var_get_axis_count (hb_face_t *face) +{ + return face->table.fvar->get_axis_count (); +} + +#ifndef HB_DISABLE_DEPRECATED +/** + * hb_ot_var_get_axes: + * + * Since: 1.4.2 + * Deprecated: 2.2.0 + **/ +unsigned int +hb_ot_var_get_axes (hb_face_t *face, + unsigned int start_offset, + unsigned int *axes_count /* IN/OUT */, + hb_ot_var_axis_t *axes_array /* OUT */) +{ + return face->table.fvar->get_axes_deprecated (start_offset, axes_count, axes_array); +} + +/** + * hb_ot_var_find_axis: + * + * Since: 1.4.2 + * Deprecated: 2.2.0 + **/ +hb_bool_t +hb_ot_var_find_axis (hb_face_t *face, + hb_tag_t axis_tag, + unsigned int *axis_index, + hb_ot_var_axis_t *axis_info) +{ + return face->table.fvar->find_axis_deprecated (axis_tag, axis_index, axis_info); +} +#endif + +/** + * hb_ot_var_get_axis_infos: + * + * Since: 2.2.0 + **/ +HB_EXTERN unsigned int +hb_ot_var_get_axis_infos (hb_face_t *face, + unsigned int start_offset, + unsigned int *axes_count /* IN/OUT */, + hb_ot_var_axis_info_t *axes_array /* OUT */) +{ + return face->table.fvar->get_axis_infos (start_offset, axes_count, axes_array); +} + +/** + * hb_ot_var_find_axis_info: + * + * Since: 2.2.0 + **/ +HB_EXTERN hb_bool_t +hb_ot_var_find_axis_info (hb_face_t *face, + hb_tag_t axis_tag, + hb_ot_var_axis_info_t *axis_info) +{ + return face->table.fvar->find_axis_info (axis_tag, axis_info); +} + + +/* + * Named instances. + */ + +unsigned int +hb_ot_var_get_named_instance_count (hb_face_t *face) +{ + return face->table.fvar->get_instance_count (); +} + +hb_ot_name_id_t +hb_ot_var_named_instance_get_subfamily_name_id (hb_face_t *face, + unsigned int instance_index) +{ + return face->table.fvar->get_instance_subfamily_name_id (instance_index); +} + +hb_ot_name_id_t +hb_ot_var_named_instance_get_postscript_name_id (hb_face_t *face, + unsigned int instance_index) +{ + return face->table.fvar->get_instance_postscript_name_id (instance_index); +} + +unsigned int +hb_ot_var_named_instance_get_design_coords (hb_face_t *face, + unsigned int instance_index, + unsigned int *coords_length, /* IN/OUT */ + float *coords /* OUT */) +{ + return face->table.fvar->get_instance_coords (instance_index, coords_length, coords); +} + + +/** + * hb_ot_var_normalize_variations: + * + * Since: 1.4.2 + **/ +void +hb_ot_var_normalize_variations (hb_face_t *face, + const hb_variation_t *variations, /* IN */ + unsigned int variations_length, + int *coords, /* OUT */ + unsigned int coords_length) +{ + for (unsigned int i = 0; i < coords_length; i++) + coords[i] = 0; + + const OT::fvar &fvar = *face->table.fvar; + for (unsigned int i = 0; i < variations_length; i++) + { + hb_ot_var_axis_info_t info; + if (hb_ot_var_find_axis_info (face, variations[i].tag, &info) && + info.axis_index < coords_length) + coords[info.axis_index] = fvar.normalize_axis_value (info.axis_index, variations[i].value); + } + + face->table.avar->map_coords (coords, coords_length); +} + +/** + * hb_ot_var_normalize_coords: + * + * Since: 1.4.2 + **/ +void +hb_ot_var_normalize_coords (hb_face_t *face, + unsigned int coords_length, + const float *design_coords, /* IN */ + int *normalized_coords /* OUT */) +{ + const OT::fvar &fvar = *face->table.fvar; + for (unsigned int i = 0; i < coords_length; i++) + normalized_coords[i] = fvar.normalize_axis_value (i, design_coords[i]); + + face->table.avar->map_coords (normalized_coords, coords_length); +} + + +#endif diff --git a/thirdparty/harfbuzz/src/hb-ot-var.h b/thirdparty/harfbuzz/src/hb-ot-var.h new file mode 100644 index 0000000000..df89bc5a23 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-var.h @@ -0,0 +1,146 @@ +/* + * Copyright © 2017 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Red Hat Author(s): Behdad Esfahbod + */ + +#ifndef HB_OT_H_IN +#error "Include <hb-ot.h> instead." +#endif + +#ifndef HB_OT_VAR_H +#define HB_OT_VAR_H + +#include "hb.h" + +HB_BEGIN_DECLS + + +#define HB_OT_TAG_VAR_AXIS_ITALIC HB_TAG('i','t','a','l') +#define HB_OT_TAG_VAR_AXIS_OPTICAL_SIZE HB_TAG('o','p','s','z') +#define HB_OT_TAG_VAR_AXIS_SLANT HB_TAG('s','l','n','t') +#define HB_OT_TAG_VAR_AXIS_WIDTH HB_TAG('w','d','t','h') +#define HB_OT_TAG_VAR_AXIS_WEIGHT HB_TAG('w','g','h','t') + + +/* + * fvar / avar + */ + +HB_EXTERN hb_bool_t +hb_ot_var_has_data (hb_face_t *face); + + +/* + * Variation axes. + */ + + +HB_EXTERN unsigned int +hb_ot_var_get_axis_count (hb_face_t *face); + +/** + * hb_ot_var_axis_flags_t: + * @HB_OT_VAR_AXIS_FLAG_HIDDEN: The axis should not be exposed directly in user interfaces. + * + * Since: 2.2.0 + */ +typedef enum { /*< flags >*/ + HB_OT_VAR_AXIS_FLAG_HIDDEN = 0x00000001u, + + _HB_OT_VAR_AXIS_FLAG_MAX_VALUE= HB_TAG_MAX_SIGNED /*< skip >*/ +} hb_ot_var_axis_flags_t; + +/** + * hb_ot_var_axis_info_t: + * + * Since: 2.2.0 + */ +typedef struct hb_ot_var_axis_info_t +{ + unsigned int axis_index; + hb_tag_t tag; + hb_ot_name_id_t name_id; + hb_ot_var_axis_flags_t flags; + float min_value; + float default_value; + float max_value; + /*< private >*/ + unsigned int reserved; +} hb_ot_var_axis_info_t; + +HB_EXTERN unsigned int +hb_ot_var_get_axis_infos (hb_face_t *face, + unsigned int start_offset, + unsigned int *axes_count /* IN/OUT */, + hb_ot_var_axis_info_t *axes_array /* OUT */); + +HB_EXTERN hb_bool_t +hb_ot_var_find_axis_info (hb_face_t *face, + hb_tag_t axis_tag, + hb_ot_var_axis_info_t *axis_info); + + +/* + * Named instances. + */ + +HB_EXTERN unsigned int +hb_ot_var_get_named_instance_count (hb_face_t *face); + +HB_EXTERN hb_ot_name_id_t +hb_ot_var_named_instance_get_subfamily_name_id (hb_face_t *face, + unsigned int instance_index); + +HB_EXTERN hb_ot_name_id_t +hb_ot_var_named_instance_get_postscript_name_id (hb_face_t *face, + unsigned int instance_index); + +HB_EXTERN unsigned int +hb_ot_var_named_instance_get_design_coords (hb_face_t *face, + unsigned int instance_index, + unsigned int *coords_length, /* IN/OUT */ + float *coords /* OUT */); + + +/* + * Conversions. + */ + +HB_EXTERN void +hb_ot_var_normalize_variations (hb_face_t *face, + const hb_variation_t *variations, /* IN */ + unsigned int variations_length, + int *coords, /* OUT */ + unsigned int coords_length); + +HB_EXTERN void +hb_ot_var_normalize_coords (hb_face_t *face, + unsigned int coords_length, + const float *design_coords, /* IN */ + int *normalized_coords /* OUT */); + + +HB_END_DECLS + +#endif /* HB_OT_VAR_H */ diff --git a/thirdparty/harfbuzz/src/hb-ot-vorg-table.hh b/thirdparty/harfbuzz/src/hb-ot-vorg-table.hh new file mode 100644 index 0000000000..c6803200f9 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot-vorg-table.hh @@ -0,0 +1,136 @@ +/* + * Copyright © 2018 Adobe Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Adobe Author(s): Michiharu Ariza + */ + +#ifndef HB_OT_VORG_TABLE_HH +#define HB_OT_VORG_TABLE_HH + +#include "hb-open-type.hh" + +/* + * VORG -- Vertical Origin Table + * https://docs.microsoft.com/en-us/typography/opentype/spec/vorg + */ +#define HB_OT_TAG_VORG HB_TAG('V','O','R','G') + +namespace OT { + +struct VertOriginMetric +{ + int cmp (hb_codepoint_t g) const { return glyph.cmp (g); } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this)); + } + + public: + HBGlyphID glyph; + FWORD vertOriginY; + + public: + DEFINE_SIZE_STATIC (4); +}; + +struct VORG +{ + static constexpr hb_tag_t tableTag = HB_OT_TAG_VORG; + + bool has_data () const { return version.to_int (); } + + int get_y_origin (hb_codepoint_t glyph) const + { + unsigned int i; + if (!vertYOrigins.bfind (glyph, &i)) + return defaultVertOriginY; + return vertYOrigins[i].vertOriginY; + } + + template <typename Iterator, + hb_requires (hb_is_iterator (Iterator))> + void serialize (hb_serialize_context_t *c, + Iterator it, + FWORD defaultVertOriginY) + { + + if (unlikely (!c->extend_min ((*this)))) return; + + this->version.major = 1; + this->version.minor = 0; + + this->defaultVertOriginY = defaultVertOriginY; + this->vertYOrigins.len = it.len (); + + c->copy_all (it); + } + + bool subset (hb_subset_context_t *c) const + { + TRACE_SUBSET (this); + VORG *vorg_prime = c->serializer->start_embed<VORG> (); + if (unlikely (!c->serializer->check_success (vorg_prime))) return_trace (false); + + auto it = + + vertYOrigins.as_array () + | hb_filter (c->plan->glyphset (), &VertOriginMetric::glyph) + | hb_map ([&] (const VertOriginMetric& _) + { + hb_codepoint_t new_glyph = HB_SET_VALUE_INVALID; + c->plan->new_gid_for_old_gid (_.glyph, &new_glyph); + + VertOriginMetric metric; + metric.glyph = new_glyph; + metric.vertOriginY = _.vertOriginY; + return metric; + }) + ; + + /* serialize the new table */ + vorg_prime->serialize (c->serializer, it, defaultVertOriginY); + return_trace (true); + } + + bool sanitize (hb_sanitize_context_t *c) const + { + TRACE_SANITIZE (this); + return_trace (c->check_struct (this) && + version.major == 1 && + vertYOrigins.sanitize (c)); + } + + protected: + FixedVersion<>version; /* Version of VORG table. Set to 0x00010000u. */ + FWORD defaultVertOriginY; + /* The default vertical origin. */ + SortedArrayOf<VertOriginMetric> + vertYOrigins; /* The array of vertical origins. */ + + public: + DEFINE_SIZE_ARRAY(8, vertYOrigins); +}; +} /* namespace OT */ + +#endif /* HB_OT_VORG_TABLE_HH */ diff --git a/thirdparty/harfbuzz/src/hb-ot.h b/thirdparty/harfbuzz/src/hb-ot.h new file mode 100644 index 0000000000..f2dbaa1b31 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ot.h @@ -0,0 +1,49 @@ +/* + * Copyright © 2009 Red Hat, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Red Hat Author(s): Behdad Esfahbod + */ + +#ifndef HB_OT_H +#define HB_OT_H +#define HB_OT_H_IN + +#include "hb.h" + +#include "hb-ot-color.h" +#include "hb-ot-deprecated.h" +#include "hb-ot-font.h" +#include "hb-ot-layout.h" +#include "hb-ot-math.h" +#include "hb-ot-meta.h" +#include "hb-ot-metrics.h" +#include "hb-ot-name.h" +#include "hb-ot-shape.h" +#include "hb-ot-var.h" + +HB_BEGIN_DECLS + +HB_END_DECLS + +#undef HB_OT_H_IN +#endif /* HB_OT_H */ diff --git a/thirdparty/harfbuzz/src/hb-pool.hh b/thirdparty/harfbuzz/src/hb-pool.hh new file mode 100644 index 0000000000..dcf0faf2a9 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-pool.hh @@ -0,0 +1,100 @@ +/* + * Copyright © 2019 Facebook, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Facebook Author(s): Behdad Esfahbod + */ + +#ifndef HB_POOL_HH +#define HB_POOL_HH + +#include "hb.hh" + +/* Memory pool for persistent allocation of small objects. */ + +template <typename T, unsigned ChunkLen = 16> +struct hb_pool_t +{ + hb_pool_t () : next (nullptr) {} + ~hb_pool_t () { fini (); } + + void fini () + { + next = nullptr; + + for (chunk_t *_ : chunks) ::free (_); + + chunks.fini (); + } + + T* alloc () + { + if (unlikely (!next)) + { + if (unlikely (!chunks.alloc (chunks.length + 1))) return nullptr; + chunk_t *chunk = (chunk_t *) calloc (1, sizeof (chunk_t)); + if (unlikely (!chunk)) return nullptr; + chunks.push (chunk); + next = chunk->thread (); + } + + T* obj = next; + next = * ((T**) next); + + memset (obj, 0, sizeof (T)); + + return obj; + } + + void free (T* obj) + { + * (T**) obj = next; + next = obj; + } + + private: + + static_assert (ChunkLen > 1, ""); + static_assert (sizeof (T) >= sizeof (void *), ""); + static_assert (alignof (T) % alignof (void *) == 0, ""); + + struct chunk_t + { + T* thread () + { + for (unsigned i = 0; i < ARRAY_LENGTH (arrayZ) - 1; i++) + * (T**) &arrayZ[i] = &arrayZ[i + 1]; + + * (T**) &arrayZ[ARRAY_LENGTH (arrayZ) - 1] = nullptr; + + return arrayZ; + } + + T arrayZ[ChunkLen]; + }; + + T* next; + hb_vector_t<chunk_t *> chunks; +}; + + +#endif /* HB_POOL_HH */ diff --git a/thirdparty/harfbuzz/src/hb-sanitize.hh b/thirdparty/harfbuzz/src/hb-sanitize.hh new file mode 100644 index 0000000000..024b4d1c99 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-sanitize.hh @@ -0,0 +1,412 @@ +/* + * Copyright © 2007,2008,2009,2010 Red Hat, Inc. + * Copyright © 2012,2018 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Red Hat Author(s): Behdad Esfahbod + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_SANITIZE_HH +#define HB_SANITIZE_HH + +#include "hb.hh" +#include "hb-blob.hh" +#include "hb-dispatch.hh" + + +/* + * Sanitize + * + * + * === Introduction === + * + * The sanitize machinery is at the core of our zero-cost font loading. We + * mmap() font file into memory and create a blob out of it. Font subtables + * are returned as a readonly sub-blob of the main font blob. These table + * blobs are then sanitized before use, to ensure invalid memory access does + * not happen. The toplevel sanitize API use is like, eg. to load the 'head' + * table: + * + * hb_blob_t *head_blob = hb_sanitize_context_t ().reference_table<OT::head> (face); + * + * The blob then can be converted to a head table struct with: + * + * const head *head_table = head_blob->as<head> (); + * + * What the reference_table does is, to call hb_face_reference_table() to load + * the table blob, sanitize it and return either the sanitized blob, or empty + * blob if sanitization failed. The blob->as() function returns the null + * object of its template type argument if the blob is empty. Otherwise, it + * just casts the blob contents to the desired type. + * + * Sanitizing a blob of data with a type T works as follows (with minor + * simplification): + * + * - Cast blob content to T*, call sanitize() method of it, + * - If sanitize succeeded, return blob. + * - Otherwise, if blob is not writable, try making it writable, + * or copy if cannot be made writable in-place, + * - Call sanitize() again. Return blob if sanitize succeeded. + * - Return empty blob otherwise. + * + * + * === The sanitize() contract === + * + * The sanitize() method of each object type shall return true if it's safe to + * call other methods of the object, and false otherwise. + * + * Note that what sanitize() checks for might align with what the specification + * describes as valid table data, but does not have to be. In particular, we + * do NOT want to be pedantic and concern ourselves with validity checks that + * are irrelevant to our use of the table. On the contrary, we want to be + * lenient with error handling and accept invalid data to the extent that it + * does not impose extra burden on us. + * + * Based on the sanitize contract, one can see that what we check for depends + * on how we use the data in other table methods. Ie. if other table methods + * assume that offsets do NOT point out of the table data block, then that's + * something sanitize() must check for (GSUB/GPOS/GDEF/etc work this way). On + * the other hand, if other methods do such checks themselves, then sanitize() + * does not have to bother with them (glyf/local work this way). The choice + * depends on the table structure and sanitize() performance. For example, to + * check glyf/loca offsets in sanitize() would cost O(num-glyphs). We try hard + * to avoid such costs during font loading. By postponing such checks to the + * actual glyph loading, we reduce the sanitize cost to O(1) and total runtime + * cost to O(used-glyphs). As such, this is preferred. + * + * The same argument can be made re GSUB/GPOS/GDEF, but there, the table + * structure is so complicated that by checking all offsets at sanitize() time, + * we make the code much simpler in other methods, as offsets and referenced + * objects do not need to be validated at each use site. + */ + +/* This limits sanitizing time on really broken fonts. */ +#ifndef HB_SANITIZE_MAX_EDITS +#define HB_SANITIZE_MAX_EDITS 32 +#endif +#ifndef HB_SANITIZE_MAX_OPS_FACTOR +#define HB_SANITIZE_MAX_OPS_FACTOR 8 +#endif +#ifndef HB_SANITIZE_MAX_OPS_MIN +#define HB_SANITIZE_MAX_OPS_MIN 16384 +#endif +#ifndef HB_SANITIZE_MAX_OPS_MAX +#define HB_SANITIZE_MAX_OPS_MAX 0x3FFFFFFF +#endif +#ifndef HB_SANITIZE_MAX_SUTABLES +#define HB_SANITIZE_MAX_SUTABLES 0x4000 +#endif + +struct hb_sanitize_context_t : + hb_dispatch_context_t<hb_sanitize_context_t, bool, HB_DEBUG_SANITIZE> +{ + hb_sanitize_context_t () : + start (nullptr), end (nullptr), + max_ops (0), max_subtables (0), + writable (false), edit_count (0), + blob (nullptr), + num_glyphs (65536), + num_glyphs_set (false) {} + + const char *get_name () { return "SANITIZE"; } + template <typename T, typename F> + bool may_dispatch (const T *obj HB_UNUSED, const F *format) + { return format->sanitize (this); } + static return_t default_return_value () { return true; } + static return_t no_dispatch_return_value () { return false; } + bool stop_sublookup_iteration (const return_t r) const { return !r; } + + bool visit_subtables (unsigned count) + { + max_subtables += count; + return max_subtables < HB_SANITIZE_MAX_SUTABLES; + } + + private: + template <typename T, typename ...Ts> auto + _dispatch (const T &obj, hb_priority<1>, Ts&&... ds) HB_AUTO_RETURN + ( obj.sanitize (this, hb_forward<Ts> (ds)...) ) + template <typename T, typename ...Ts> auto + _dispatch (const T &obj, hb_priority<0>, Ts&&... ds) HB_AUTO_RETURN + ( obj.dispatch (this, hb_forward<Ts> (ds)...) ) + public: + template <typename T, typename ...Ts> auto + dispatch (const T &obj, Ts&&... ds) HB_AUTO_RETURN + ( _dispatch (obj, hb_prioritize, hb_forward<Ts> (ds)...) ) + + + void init (hb_blob_t *b) + { + this->blob = hb_blob_reference (b); + this->writable = false; + } + + void set_num_glyphs (unsigned int num_glyphs_) + { + num_glyphs = num_glyphs_; + num_glyphs_set = true; + } + unsigned int get_num_glyphs () { return num_glyphs; } + + void set_max_ops (int max_ops_) { max_ops = max_ops_; } + + template <typename T> + void set_object (const T *obj) + { + reset_object (); + + if (!obj) return; + + const char *obj_start = (const char *) obj; + if (unlikely (obj_start < this->start || this->end <= obj_start)) + this->start = this->end = nullptr; + else + { + this->start = obj_start; + this->end = obj_start + hb_min (size_t (this->end - obj_start), obj->get_size ()); + } + } + + void reset_object () + { + this->start = this->blob->data; + this->end = this->start + this->blob->length; + assert (this->start <= this->end); /* Must not overflow. */ + } + + void start_processing () + { + reset_object (); + if (unlikely (hb_unsigned_mul_overflows (this->end - this->start, HB_SANITIZE_MAX_OPS_FACTOR))) + this->max_ops = HB_SANITIZE_MAX_OPS_MAX; + else + this->max_ops = hb_clamp ((unsigned) (this->end - this->start) * HB_SANITIZE_MAX_OPS_FACTOR, + (unsigned) HB_SANITIZE_MAX_OPS_MIN, + (unsigned) HB_SANITIZE_MAX_OPS_MAX); + this->edit_count = 0; + this->debug_depth = 0; + + DEBUG_MSG_LEVEL (SANITIZE, start, 0, +1, + "start [%p..%p] (%lu bytes)", + this->start, this->end, + (unsigned long) (this->end - this->start)); + } + + void end_processing () + { + DEBUG_MSG_LEVEL (SANITIZE, this->start, 0, -1, + "end [%p..%p] %u edit requests", + this->start, this->end, this->edit_count); + + hb_blob_destroy (this->blob); + this->blob = nullptr; + this->start = this->end = nullptr; + } + + unsigned get_edit_count () { return edit_count; } + + bool check_range (const void *base, + unsigned int len) const + { + const char *p = (const char *) base; + bool ok = !len || + (this->start <= p && + p <= this->end && + (unsigned int) (this->end - p) >= len && + this->max_ops-- > 0); + + DEBUG_MSG_LEVEL (SANITIZE, p, this->debug_depth+1, 0, + "check_range [%p..%p]" + " (%d bytes) in [%p..%p] -> %s", + p, p + len, len, + this->start, this->end, + ok ? "OK" : "OUT-OF-RANGE"); + + return likely (ok); + } + + template <typename T> + bool check_range (const T *base, + unsigned int a, + unsigned int b) const + { + return !hb_unsigned_mul_overflows (a, b) && + this->check_range (base, a * b); + } + + template <typename T> + bool check_range (const T *base, + unsigned int a, + unsigned int b, + unsigned int c) const + { + return !hb_unsigned_mul_overflows (a, b) && + this->check_range (base, a * b, c); + } + + template <typename T> + bool check_array (const T *base, unsigned int len) const + { + return this->check_range (base, len, hb_static_size (T)); + } + + template <typename T> + bool check_array (const T *base, + unsigned int a, + unsigned int b) const + { + return this->check_range (base, a, b, hb_static_size (T)); + } + + template <typename Type> + bool check_struct (const Type *obj) const + { return likely (this->check_range (obj, obj->min_size)); } + + bool may_edit (const void *base, unsigned int len) + { + if (this->edit_count >= HB_SANITIZE_MAX_EDITS) + return false; + + const char *p = (const char *) base; + this->edit_count++; + + DEBUG_MSG_LEVEL (SANITIZE, p, this->debug_depth+1, 0, + "may_edit(%u) [%p..%p] (%d bytes) in [%p..%p] -> %s", + this->edit_count, + p, p + len, len, + this->start, this->end, + this->writable ? "GRANTED" : "DENIED"); + + return this->writable; + } + + template <typename Type, typename ValueType> + bool try_set (const Type *obj, const ValueType &v) + { + if (this->may_edit (obj, hb_static_size (Type))) + { + * const_cast<Type *> (obj) = v; + return true; + } + return false; + } + + template <typename Type> + hb_blob_t *sanitize_blob (hb_blob_t *blob) + { + bool sane; + + init (blob); + + retry: + DEBUG_MSG_FUNC (SANITIZE, start, "start"); + + start_processing (); + + if (unlikely (!start)) + { + end_processing (); + return blob; + } + + Type *t = reinterpret_cast<Type *> (const_cast<char *> (start)); + + sane = t->sanitize (this); + if (sane) + { + if (edit_count) + { + DEBUG_MSG_FUNC (SANITIZE, start, "passed first round with %d edits; going for second round", edit_count); + + /* sanitize again to ensure no toe-stepping */ + edit_count = 0; + sane = t->sanitize (this); + if (edit_count) { + DEBUG_MSG_FUNC (SANITIZE, start, "requested %d edits in second round; FAILLING", edit_count); + sane = false; + } + } + } + else + { + if (edit_count && !writable) { + start = hb_blob_get_data_writable (blob, nullptr); + end = start + blob->length; + + if (start) + { + writable = true; + /* ok, we made it writable by relocating. try again */ + DEBUG_MSG_FUNC (SANITIZE, start, "retry"); + goto retry; + } + } + } + + end_processing (); + + DEBUG_MSG_FUNC (SANITIZE, start, sane ? "PASSED" : "FAILED"); + if (sane) + { + hb_blob_make_immutable (blob); + return blob; + } + else + { + hb_blob_destroy (blob); + return hb_blob_get_empty (); + } + } + + template <typename Type> + hb_blob_t *reference_table (const hb_face_t *face, hb_tag_t tableTag = Type::tableTag) + { + if (!num_glyphs_set) + set_num_glyphs (hb_face_get_glyph_count (face)); + return sanitize_blob<Type> (hb_face_reference_table (face, tableTag)); + } + + const char *start, *end; + mutable int max_ops, max_subtables; + private: + bool writable; + unsigned int edit_count; + hb_blob_t *blob; + unsigned int num_glyphs; + bool num_glyphs_set; +}; + +struct hb_sanitize_with_object_t +{ + template <typename T> + hb_sanitize_with_object_t (hb_sanitize_context_t *c, const T& obj) : c (c) + { c->set_object (obj); } + ~hb_sanitize_with_object_t () + { c->reset_object (); } + + private: + hb_sanitize_context_t *c; +}; + + +#endif /* HB_SANITIZE_HH */ diff --git a/thirdparty/harfbuzz/src/hb-serialize.hh b/thirdparty/harfbuzz/src/hb-serialize.hh new file mode 100644 index 0000000000..4566153a59 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-serialize.hh @@ -0,0 +1,553 @@ +/* + * Copyright © 2007,2008,2009,2010 Red Hat, Inc. + * Copyright © 2012,2018 Google, Inc. + * Copyright © 2019 Facebook, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Red Hat Author(s): Behdad Esfahbod + * Google Author(s): Behdad Esfahbod + * Facebook Author(s): Behdad Esfahbod + */ + +#ifndef HB_SERIALIZE_HH +#define HB_SERIALIZE_HH + +#include "hb.hh" +#include "hb-blob.hh" +#include "hb-map.hh" +#include "hb-pool.hh" + + +/* + * Serialize + */ + +struct hb_serialize_context_t +{ + typedef unsigned objidx_t; + + enum whence_t { + Head, /* Relative to the current object head (default). */ + Tail, /* Relative to the current object tail after packed. */ + Absolute /* Absolute: from the start of the serialize buffer. */ + }; + + struct object_t + { + void fini () { links.fini (); } + + bool operator == (const object_t &o) const + { + return (tail - head == o.tail - o.head) + && (links.length == o.links.length) + && 0 == hb_memcmp (head, o.head, tail - head) + && links.as_bytes () == o.links.as_bytes (); + } + uint32_t hash () const + { + return hb_bytes_t (head, tail - head).hash () ^ + links.as_bytes ().hash (); + } + + struct link_t + { + bool is_wide: 1; + bool is_signed: 1; + unsigned whence: 2; + unsigned position: 28; + unsigned bias; + objidx_t objidx; + }; + + char *head; + char *tail; + hb_vector_t<link_t> links; + object_t *next; + }; + + struct snapshot_t + { + char *head; + char *tail; + object_t *current; // Just for sanity check + unsigned num_links; + }; + + snapshot_t snapshot () + { return snapshot_t { head, tail, current, current->links.length }; } + + hb_serialize_context_t (void *start_, unsigned int size) : + start ((char *) start_), + end (start + size), + current (nullptr) + { reset (); } + ~hb_serialize_context_t () { fini (); } + + void fini () + { + for (object_t *_ : ++hb_iter (packed)) _->fini (); + packed.fini (); + this->packed_map.fini (); + + while (current) + { + auto *_ = current; + current = current->next; + _->fini (); + } + object_pool.fini (); + } + + bool in_error () const { return !this->successful; } + + void reset () + { + this->successful = true; + this->ran_out_of_room = false; + this->head = this->start; + this->tail = this->end; + this->debug_depth = 0; + + fini (); + this->packed.push (nullptr); + } + + bool check_success (bool success) + { return this->successful && (success || (err_other_error (), false)); } + + template <typename T1, typename T2> + bool check_equal (T1 &&v1, T2 &&v2) + { return check_success ((long long) v1 == (long long) v2); } + + template <typename T1, typename T2> + bool check_assign (T1 &v1, T2 &&v2) + { return check_equal (v1 = v2, v2); } + + template <typename T> bool propagate_error (T &&obj) + { return check_success (!hb_deref (obj).in_error ()); } + + template <typename T1, typename... Ts> bool propagate_error (T1 &&o1, Ts&&... os) + { return propagate_error (hb_forward<T1> (o1)) && + propagate_error (hb_forward<Ts> (os)...); } + + /* To be called around main operation. */ + template <typename Type> + Type *start_serialize () + { + DEBUG_MSG_LEVEL (SERIALIZE, this->start, 0, +1, + "start [%p..%p] (%lu bytes)", + this->start, this->end, + (unsigned long) (this->end - this->start)); + + assert (!current); + return push<Type> (); + } + void end_serialize () + { + DEBUG_MSG_LEVEL (SERIALIZE, this->start, 0, -1, + "end [%p..%p] serialized %u bytes; %s", + this->start, this->end, + (unsigned) (this->head - this->start), + this->successful ? "successful" : "UNSUCCESSFUL"); + + propagate_error (packed, packed_map); + + if (unlikely (!current)) return; + if (unlikely (in_error())) return; + + assert (!current->next); + + /* Only "pack" if there exist other objects... Otherwise, don't bother. + * Saves a move. */ + if (packed.length <= 1) + return; + + pop_pack (false); + + resolve_links (); + } + + template <typename Type = void> + Type *push () + { + if (unlikely (in_error ())) return start_embed<Type> (); + + object_t *obj = object_pool.alloc (); + if (unlikely (!obj)) + check_success (false); + else + { + obj->head = head; + obj->tail = tail; + obj->next = current; + current = obj; + } + return start_embed<Type> (); + } + void pop_discard () + { + object_t *obj = current; + if (unlikely (!obj)) return; + if (unlikely (in_error())) return; + + current = current->next; + revert (obj->head, obj->tail); + obj->fini (); + object_pool.free (obj); + } + + /* Set share to false when an object is unlikely sharable with others + * so not worth an attempt, or a contiguous table is serialized as + * multiple consecutive objects in the reverse order so can't be shared. + */ + objidx_t pop_pack (bool share=true) + { + object_t *obj = current; + if (unlikely (!obj)) return 0; + if (unlikely (in_error())) return 0; + + current = current->next; + obj->tail = head; + obj->next = nullptr; + unsigned len = obj->tail - obj->head; + head = obj->head; /* Rewind head. */ + + if (!len) + { + assert (!obj->links.length); + return 0; + } + + objidx_t objidx; + if (share) + { + objidx = packed_map.get (obj); + if (objidx) + { + obj->fini (); + return objidx; + } + } + + tail -= len; + memmove (tail, obj->head, len); + + obj->head = tail; + obj->tail = tail + len; + + packed.push (obj); + + if (unlikely (packed.in_error ())) { + // obj wasn't successfully added to packed, so clean it up otherwise it's + // links will be leaked. + propagate_error (packed); + obj->fini (); + return 0; + } + + objidx = packed.length - 1; + + if (share) packed_map.set (obj, objidx); + propagate_error (packed_map); + + return objidx; + } + + void revert (snapshot_t snap) + { + if (unlikely (in_error ())) return; + assert (snap.current == current); + current->links.shrink (snap.num_links); + revert (snap.head, snap.tail); + } + + void revert (char *snap_head, + char *snap_tail) + { + if (unlikely (in_error ())) return; + assert (snap_head <= head); + assert (tail <= snap_tail); + head = snap_head; + tail = snap_tail; + discard_stale_objects (); + } + + void discard_stale_objects () + { + if (unlikely (in_error ())) return; + while (packed.length > 1 && + packed.tail ()->head < tail) + { + packed_map.del (packed.tail ()); + assert (!packed.tail ()->next); + packed.tail ()->fini (); + packed.pop (); + } + if (packed.length > 1) + assert (packed.tail ()->head == tail); + } + + template <typename T> + void add_link (T &ofs, objidx_t objidx, + whence_t whence = Head, + unsigned bias = 0) + { + static_assert (sizeof (T) == 2 || sizeof (T) == 4, ""); + if (unlikely (in_error ())) return; + + if (!objidx) + return; + + assert (current); + assert (current->head <= (const char *) &ofs); + + auto& link = *current->links.push (); + + link.is_wide = sizeof (T) == 4; + link.is_signed = hb_is_signed (hb_unwrap_type (T)); + link.whence = (unsigned) whence; + link.position = (const char *) &ofs - current->head; + link.bias = bias; + link.objidx = objidx; + } + + unsigned to_bias (const void *base) const + { + if (unlikely (in_error ())) return 0; + if (!base) return 0; + assert (current); + assert (current->head <= (const char *) base); + return (const char *) base - current->head; + } + + void resolve_links () + { + if (unlikely (in_error ())) return; + + assert (!current); + assert (packed.length > 1); + + for (const object_t* parent : ++hb_iter (packed)) + for (const object_t::link_t &link : parent->links) + { + const object_t* child = packed[link.objidx]; + if (unlikely (!child)) { err_other_error(); return; } + unsigned offset = 0; + switch ((whence_t) link.whence) { + case Head: offset = child->head - parent->head; break; + case Tail: offset = child->head - parent->tail; break; + case Absolute: offset = (head - start) + (child->head - tail); break; + } + + assert (offset >= link.bias); + offset -= link.bias; + if (link.is_signed) + { + if (link.is_wide) + assign_offset<int32_t> (parent, link, offset); + else + assign_offset<int16_t> (parent, link, offset); + } + else + { + if (link.is_wide) + assign_offset<uint32_t> (parent, link, offset); + else + assign_offset<uint16_t> (parent, link, offset); + } + } + } + + unsigned int length () const + { + if (unlikely (!current)) return 0; + return this->head - current->head; + } + + void align (unsigned int alignment) + { + unsigned int l = length () % alignment; + if (l) + allocate_size<void> (alignment - l); + } + + template <typename Type = void> + Type *start_embed (const Type *obj HB_UNUSED = nullptr) const + { return reinterpret_cast<Type *> (this->head); } + template <typename Type> + Type *start_embed (const Type &obj) const + { return start_embed (hb_addressof (obj)); } + + /* Following two functions exist to allow setting breakpoint on. */ + void err_ran_out_of_room () { this->ran_out_of_room = true; } + void err_other_error () { this->successful = false; } + + template <typename Type> + Type *allocate_size (unsigned int size) + { + if (unlikely (!this->successful)) return nullptr; + + if (this->tail - this->head < ptrdiff_t (size)) + { + err_ran_out_of_room (); + this->successful = false; + return nullptr; + } + memset (this->head, 0, size); + char *ret = this->head; + this->head += size; + return reinterpret_cast<Type *> (ret); + } + + template <typename Type> + Type *allocate_min () + { return this->allocate_size<Type> (Type::min_size); } + + template <typename Type> + Type *embed (const Type *obj) + { + unsigned int size = obj->get_size (); + Type *ret = this->allocate_size<Type> (size); + if (unlikely (!ret)) return nullptr; + memcpy (ret, obj, size); + return ret; + } + template <typename Type> + Type *embed (const Type &obj) + { return embed (hb_addressof (obj)); } + + template <typename Type, typename ...Ts> auto + _copy (const Type &src, hb_priority<1>, Ts&&... ds) HB_RETURN + (Type *, src.copy (this, hb_forward<Ts> (ds)...)) + + template <typename Type> auto + _copy (const Type &src, hb_priority<0>) -> decltype (&(hb_declval<Type> () = src)) + { + Type *ret = this->allocate_size<Type> (sizeof (Type)); + if (unlikely (!ret)) return nullptr; + *ret = src; + return ret; + } + + /* Like embed, but active: calls obj.operator=() or obj.copy() to transfer data + * instead of memcpy(). */ + template <typename Type, typename ...Ts> + Type *copy (const Type &src, Ts&&... ds) + { return _copy (src, hb_prioritize, hb_forward<Ts> (ds)...); } + template <typename Type, typename ...Ts> + Type *copy (const Type *src, Ts&&... ds) + { return copy (*src, hb_forward<Ts> (ds)...); } + + template<typename Iterator, + hb_requires (hb_is_iterator (Iterator)), + typename ...Ts> + void copy_all (Iterator it, Ts&&... ds) + { for (decltype (*it) _ : it) copy (_, hb_forward<Ts> (ds)...); } + + template <typename Type> + hb_serialize_context_t& operator << (const Type &obj) & { embed (obj); return *this; } + + template <typename Type> + Type *extend_size (Type *obj, unsigned int size) + { + if (unlikely (in_error ())) return nullptr; + + assert (this->start <= (char *) obj); + assert ((char *) obj <= this->head); + assert ((char *) obj + size >= this->head); + if (unlikely (!this->allocate_size<Type> (((char *) obj) + size - this->head))) return nullptr; + return reinterpret_cast<Type *> (obj); + } + template <typename Type> + Type *extend_size (Type &obj, unsigned int size) + { return extend_size (hb_addressof (obj), size); } + + template <typename Type> + Type *extend_min (Type *obj) { return extend_size (obj, obj->min_size); } + template <typename Type> + Type *extend_min (Type &obj) { return extend_min (hb_addressof (obj)); } + + template <typename Type, typename ...Ts> + Type *extend (Type *obj, Ts&&... ds) + { return extend_size (obj, obj->get_size (hb_forward<Ts> (ds)...)); } + template <typename Type, typename ...Ts> + Type *extend (Type &obj, Ts&&... ds) + { return extend (hb_addressof (obj), hb_forward<Ts> (ds)...); } + + /* Output routines. */ + hb_bytes_t copy_bytes () const + { + assert (this->successful); + /* Copy both items from head side and tail side... */ + unsigned int len = (this->head - this->start) + + (this->end - this->tail); + + char *p = (char *) malloc (len); + if (unlikely (!p)) return hb_bytes_t (); + + memcpy (p, this->start, this->head - this->start); + memcpy (p + (this->head - this->start), this->tail, this->end - this->tail); + return hb_bytes_t (p, len); + } + template <typename Type> + Type *copy () const + { return reinterpret_cast<Type *> ((char *) copy_bytes ().arrayZ); } + hb_blob_t *copy_blob () const + { + hb_bytes_t b = copy_bytes (); + return hb_blob_create (b.arrayZ, b.length, + HB_MEMORY_MODE_WRITABLE, + (char *) b.arrayZ, free); + } + + private: + template <typename T> + void assign_offset (const object_t* parent, const object_t::link_t &link, unsigned offset) + { + auto &off = * ((BEInt<T, sizeof (T)> *) (parent->head + link.position)); + assert (0 == off); + check_assign (off, offset); + } + + public: /* TODO Make private. */ + char *start, *head, *tail, *end; + unsigned int debug_depth; + bool successful; + bool ran_out_of_room; + + private: + + /* Object memory pool. */ + hb_pool_t<object_t> object_pool; + + /* Stack of currently under construction objects. */ + object_t *current; + + /* Stack of packed objects. Object 0 is always nil object. */ + hb_vector_t<object_t *> packed; + + /* Map view of packed objects. */ + hb_hashmap_t<const object_t *, objidx_t, nullptr, 0> packed_map; +}; + + +#endif /* HB_SERIALIZE_HH */ diff --git a/thirdparty/harfbuzz/src/hb-set-digest.hh b/thirdparty/harfbuzz/src/hb-set-digest.hh new file mode 100644 index 0000000000..b97526f775 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-set-digest.hh @@ -0,0 +1,174 @@ +/* + * Copyright © 2012 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_SET_DIGEST_HH +#define HB_SET_DIGEST_HH + +#include "hb.hh" + +/* + * The set digests here implement various "filters" that support + * "approximate member query". Conceptually these are like Bloom + * Filter and Quotient Filter, however, much smaller, faster, and + * designed to fit the requirements of our uses for glyph coverage + * queries. + * + * Our filters are highly accurate if the lookup covers fairly local + * set of glyphs, but fully flooded and ineffective if coverage is + * all over the place. + * + * The frozen-set can be used instead of a digest, to trade more + * memory for 100% accuracy, but in practice, that doesn't look like + * an attractive trade-off. + */ + +template <typename mask_t, unsigned int shift> +struct hb_set_digest_lowest_bits_t +{ + static constexpr unsigned mask_bytes = sizeof (mask_t); + static constexpr unsigned mask_bits = sizeof (mask_t) * 8; + static constexpr unsigned num_bits = 0 + + (mask_bytes >= 1 ? 3 : 0) + + (mask_bytes >= 2 ? 1 : 0) + + (mask_bytes >= 4 ? 1 : 0) + + (mask_bytes >= 8 ? 1 : 0) + + (mask_bytes >= 16? 1 : 0) + + 0; + + static_assert ((shift < sizeof (hb_codepoint_t) * 8), ""); + static_assert ((shift + num_bits <= sizeof (hb_codepoint_t) * 8), ""); + + void init () { mask = 0; } + + void add (hb_codepoint_t g) { mask |= mask_for (g); } + + bool add_range (hb_codepoint_t a, hb_codepoint_t b) + { + if ((b >> shift) - (a >> shift) >= mask_bits - 1) + mask = (mask_t) -1; + else { + mask_t ma = mask_for (a); + mask_t mb = mask_for (b); + mask |= mb + (mb - ma) - (mb < ma); + } + return true; + } + + template <typename T> + void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) + { + for (unsigned int i = 0; i < count; i++) + { + add (*array); + array = (const T *) (stride + (const char *) array); + } + } + template <typename T> + bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) + { + for (unsigned int i = 0; i < count; i++) + { + add (*array); + array = (const T *) (stride + (const char *) array); + } + return true; + } + + bool may_have (hb_codepoint_t g) const + { return !!(mask & mask_for (g)); } + + private: + + static mask_t mask_for (hb_codepoint_t g) + { return ((mask_t) 1) << ((g >> shift) & (mask_bits - 1)); } + mask_t mask; +}; + +template <typename head_t, typename tail_t> +struct hb_set_digest_combiner_t +{ + void init () + { + head.init (); + tail.init (); + } + + void add (hb_codepoint_t g) + { + head.add (g); + tail.add (g); + } + + bool add_range (hb_codepoint_t a, hb_codepoint_t b) + { + head.add_range (a, b); + tail.add_range (a, b); + return true; + } + template <typename T> + void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) + { + head.add_array (array, count, stride); + tail.add_array (array, count, stride); + } + template <typename T> + bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) + { + head.add_sorted_array (array, count, stride); + tail.add_sorted_array (array, count, stride); + return true; + } + + bool may_have (hb_codepoint_t g) const + { + return head.may_have (g) && tail.may_have (g); + } + + private: + head_t head; + tail_t tail; +}; + + +/* + * hb_set_digest_t + * + * This is a combination of digests that performs "best". + * There is not much science to this: it's a result of intuition + * and testing. + */ +typedef hb_set_digest_combiner_t +< + hb_set_digest_lowest_bits_t<unsigned long, 4>, + hb_set_digest_combiner_t + < + hb_set_digest_lowest_bits_t<unsigned long, 0>, + hb_set_digest_lowest_bits_t<unsigned long, 9> + > +> hb_set_digest_t; + + +#endif /* HB_SET_DIGEST_HH */ diff --git a/thirdparty/harfbuzz/src/hb-set.cc b/thirdparty/harfbuzz/src/hb-set.cc new file mode 100644 index 0000000000..0551ed80f2 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-set.cc @@ -0,0 +1,541 @@ +/* + * Copyright © 2012 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#include "hb-set.hh" + + +/** + * SECTION:hb-set + * @title: hb-set + * @short_description: Object representing a set of integers + * @include: hb.h + * + * Set objects represent a mathematical set of integer values. They are + * used in non-shaping API to query certain set of characters or glyphs, + * or other integer values. + **/ + + +/** + * hb_set_create: (Xconstructor) + * + * Return value: (transfer full): + * + * Since: 0.9.2 + **/ +hb_set_t * +hb_set_create () +{ + hb_set_t *set; + + if (!(set = hb_object_create<hb_set_t> ())) + return hb_set_get_empty (); + + set->init_shallow (); + + return set; +} + +/** + * hb_set_get_empty: + * + * Return value: (transfer full): + * + * Since: 0.9.2 + **/ +hb_set_t * +hb_set_get_empty () +{ + return const_cast<hb_set_t *> (&Null (hb_set_t)); +} + +/** + * hb_set_reference: (skip) + * @set: a set. + * + * Return value: (transfer full): + * + * Since: 0.9.2 + **/ +hb_set_t * +hb_set_reference (hb_set_t *set) +{ + return hb_object_reference (set); +} + +/** + * hb_set_destroy: (skip) + * @set: a set. + * + * Since: 0.9.2 + **/ +void +hb_set_destroy (hb_set_t *set) +{ + if (!hb_object_destroy (set)) return; + + set->fini_shallow (); + + free (set); +} + +/** + * hb_set_set_user_data: (skip) + * @set: a set. + * @key: + * @data: + * @destroy: + * @replace: + * + * Return value: + * + * Since: 0.9.2 + **/ +hb_bool_t +hb_set_set_user_data (hb_set_t *set, + hb_user_data_key_t *key, + void * data, + hb_destroy_func_t destroy, + hb_bool_t replace) +{ + return hb_object_set_user_data (set, key, data, destroy, replace); +} + +/** + * hb_set_get_user_data: (skip) + * @set: a set. + * @key: + * + * Return value: (transfer none): + * + * Since: 0.9.2 + **/ +void * +hb_set_get_user_data (hb_set_t *set, + hb_user_data_key_t *key) +{ + return hb_object_get_user_data (set, key); +} + + +/** + * hb_set_allocation_successful: + * @set: a set. + * + * + * + * Return value: + * + * Since: 0.9.2 + **/ +hb_bool_t +hb_set_allocation_successful (const hb_set_t *set) +{ + return set->successful; +} + +/** + * hb_set_clear: + * @set: a set. + * + * + * + * Since: 0.9.2 + **/ +void +hb_set_clear (hb_set_t *set) +{ + set->clear (); +} + +/** + * hb_set_is_empty: + * @set: a set. + * + * + * + * Return value: + * + * Since: 0.9.7 + **/ +hb_bool_t +hb_set_is_empty (const hb_set_t *set) +{ + return set->is_empty (); +} + +/** + * hb_set_has: + * @set: a set. + * @codepoint: + * + * + * + * Return value: + * + * Since: 0.9.2 + **/ +hb_bool_t +hb_set_has (const hb_set_t *set, + hb_codepoint_t codepoint) +{ + return set->has (codepoint); +} + +/** + * hb_set_add: + * @set: a set. + * @codepoint: + * + * + * + * Since: 0.9.2 + **/ +void +hb_set_add (hb_set_t *set, + hb_codepoint_t codepoint) +{ + set->add (codepoint); +} + +/** + * hb_set_add_range: + * @set: a set. + * @first: + * @last: + * + * + * + * Since: 0.9.7 + **/ +void +hb_set_add_range (hb_set_t *set, + hb_codepoint_t first, + hb_codepoint_t last) +{ + set->add_range (first, last); +} + +/** + * hb_set_del: + * @set: a set. + * @codepoint: + * + * + * + * Since: 0.9.2 + **/ +void +hb_set_del (hb_set_t *set, + hb_codepoint_t codepoint) +{ + set->del (codepoint); +} + +/** + * hb_set_del_range: + * @set: a set. + * @first: + * @last: + * + * + * + * Since: 0.9.7 + **/ +void +hb_set_del_range (hb_set_t *set, + hb_codepoint_t first, + hb_codepoint_t last) +{ + set->del_range (first, last); +} + +/** + * hb_set_is_equal: + * @set: a set. + * @other: other set. + * + * + * + * Return value: %TRUE if the two sets are equal, %FALSE otherwise. + * + * Since: 0.9.7 + **/ +hb_bool_t +hb_set_is_equal (const hb_set_t *set, + const hb_set_t *other) +{ + return set->is_equal (other); +} + +/** + * hb_set_is_subset: + * @set: a set. + * @larger_set: other set. + * + * + * + * Return value: %TRUE if the @set is a subset of (or equal to) @larger_set, %FALSE otherwise. + * + * Since: 1.8.1 + **/ +hb_bool_t +hb_set_is_subset (const hb_set_t *set, + const hb_set_t *larger_set) +{ + return set->is_subset (larger_set); +} + +/** + * hb_set_set: + * @set: a set. + * @other: + * + * + * + * Since: 0.9.2 + **/ +void +hb_set_set (hb_set_t *set, + const hb_set_t *other) +{ + set->set (other); +} + +/** + * hb_set_union: + * @set: a set. + * @other: + * + * + * + * Since: 0.9.2 + **/ +void +hb_set_union (hb_set_t *set, + const hb_set_t *other) +{ + set->union_ (other); +} + +/** + * hb_set_intersect: + * @set: a set. + * @other: + * + * + * + * Since: 0.9.2 + **/ +void +hb_set_intersect (hb_set_t *set, + const hb_set_t *other) +{ + set->intersect (other); +} + +/** + * hb_set_subtract: + * @set: a set. + * @other: + * + * + * + * Since: 0.9.2 + **/ +void +hb_set_subtract (hb_set_t *set, + const hb_set_t *other) +{ + set->subtract (other); +} + +/** + * hb_set_symmetric_difference: + * @set: a set. + * @other: + * + * + * + * Since: 0.9.2 + **/ +void +hb_set_symmetric_difference (hb_set_t *set, + const hb_set_t *other) +{ + set->symmetric_difference (other); +} + +#ifndef HB_DISABLE_DEPRECATED +/** + * hb_set_invert: + * @set: a set. + * + * + * + * Since: 0.9.10 + * + * Deprecated: 1.6.1 + **/ +void +hb_set_invert (hb_set_t *set HB_UNUSED) +{ +} +#endif + +/** + * hb_set_get_population: + * @set: a set. + * + * Returns the number of numbers in the set. + * + * Return value: set population. + * + * Since: 0.9.7 + **/ +unsigned int +hb_set_get_population (const hb_set_t *set) +{ + return set->get_population (); +} + +/** + * hb_set_get_min: + * @set: a set. + * + * Finds the minimum number in the set. + * + * Return value: minimum of the set, or %HB_SET_VALUE_INVALID if set is empty. + * + * Since: 0.9.7 + **/ +hb_codepoint_t +hb_set_get_min (const hb_set_t *set) +{ + return set->get_min (); +} + +/** + * hb_set_get_max: + * @set: a set. + * + * Finds the maximum number in the set. + * + * Return value: minimum of the set, or %HB_SET_VALUE_INVALID if set is empty. + * + * Since: 0.9.7 + **/ +hb_codepoint_t +hb_set_get_max (const hb_set_t *set) +{ + return set->get_max (); +} + +/** + * hb_set_next: + * @set: a set. + * @codepoint: (inout): + * + * Gets the next number in @set that is greater than current value of @codepoint. + * + * Set @codepoint to %HB_SET_VALUE_INVALID to get started. + * + * Return value: whether there was a next value. + * + * Since: 0.9.2 + **/ +hb_bool_t +hb_set_next (const hb_set_t *set, + hb_codepoint_t *codepoint) +{ + return set->next (codepoint); +} + +/** + * hb_set_previous: + * @set: a set. + * @codepoint: (inout): + * + * Gets the previous number in @set that is lower than current value of @codepoint. + * + * Set @codepoint to %HB_SET_VALUE_INVALID to get started. + * + * Return value: whether there was a previous value. + * + * Since: 1.8.0 + **/ +hb_bool_t +hb_set_previous (const hb_set_t *set, + hb_codepoint_t *codepoint) +{ + return set->previous (codepoint); +} + +/** + * hb_set_next_range: + * @set: a set. + * @first: (out): output first codepoint in the range. + * @last: (inout): input current last and output last codepoint in the range. + * + * Gets the next consecutive range of numbers in @set that + * are greater than current value of @last. + * + * Set @last to %HB_SET_VALUE_INVALID to get started. + * + * Return value: whether there was a next range. + * + * Since: 0.9.7 + **/ +hb_bool_t +hb_set_next_range (const hb_set_t *set, + hb_codepoint_t *first, + hb_codepoint_t *last) +{ + return set->next_range (first, last); +} + +/** + * hb_set_previous_range: + * @set: a set. + * @first: (inout): input current first and output first codepoint in the range. + * @last: (out): output last codepoint in the range. + * + * Gets the previous consecutive range of numbers in @set that + * are less than current value of @first. + * + * Set @first to %HB_SET_VALUE_INVALID to get started. + * + * Return value: whether there was a previous range. + * + * Since: 1.8.0 + **/ +hb_bool_t +hb_set_previous_range (const hb_set_t *set, + hb_codepoint_t *first, + hb_codepoint_t *last) +{ + return set->previous_range (first, last); +} diff --git a/thirdparty/harfbuzz/src/hb-set.h b/thirdparty/harfbuzz/src/hb-set.h new file mode 100644 index 0000000000..ed0e05db2e --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-set.h @@ -0,0 +1,167 @@ +/* + * Copyright © 2012 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_H_IN +#error "Include <hb.h> instead." +#endif + +#ifndef HB_SET_H +#define HB_SET_H + +#include "hb-common.h" + +HB_BEGIN_DECLS + + +/* + * Since: 0.9.21 + */ +#define HB_SET_VALUE_INVALID ((hb_codepoint_t) -1) + +typedef struct hb_set_t hb_set_t; + + +HB_EXTERN hb_set_t * +hb_set_create (void); + +HB_EXTERN hb_set_t * +hb_set_get_empty (void); + +HB_EXTERN hb_set_t * +hb_set_reference (hb_set_t *set); + +HB_EXTERN void +hb_set_destroy (hb_set_t *set); + +HB_EXTERN hb_bool_t +hb_set_set_user_data (hb_set_t *set, + hb_user_data_key_t *key, + void * data, + hb_destroy_func_t destroy, + hb_bool_t replace); + +HB_EXTERN void * +hb_set_get_user_data (hb_set_t *set, + hb_user_data_key_t *key); + + +/* Returns false if allocation has failed before */ +HB_EXTERN hb_bool_t +hb_set_allocation_successful (const hb_set_t *set); + +HB_EXTERN void +hb_set_clear (hb_set_t *set); + +HB_EXTERN hb_bool_t +hb_set_is_empty (const hb_set_t *set); + +HB_EXTERN hb_bool_t +hb_set_has (const hb_set_t *set, + hb_codepoint_t codepoint); + +HB_EXTERN void +hb_set_add (hb_set_t *set, + hb_codepoint_t codepoint); + +HB_EXTERN void +hb_set_add_range (hb_set_t *set, + hb_codepoint_t first, + hb_codepoint_t last); + +HB_EXTERN void +hb_set_del (hb_set_t *set, + hb_codepoint_t codepoint); + +HB_EXTERN void +hb_set_del_range (hb_set_t *set, + hb_codepoint_t first, + hb_codepoint_t last); + +HB_EXTERN hb_bool_t +hb_set_is_equal (const hb_set_t *set, + const hb_set_t *other); + +HB_EXTERN hb_bool_t +hb_set_is_subset (const hb_set_t *set, + const hb_set_t *larger_set); + +HB_EXTERN void +hb_set_set (hb_set_t *set, + const hb_set_t *other); + +HB_EXTERN void +hb_set_union (hb_set_t *set, + const hb_set_t *other); + +HB_EXTERN void +hb_set_intersect (hb_set_t *set, + const hb_set_t *other); + +HB_EXTERN void +hb_set_subtract (hb_set_t *set, + const hb_set_t *other); + +HB_EXTERN void +hb_set_symmetric_difference (hb_set_t *set, + const hb_set_t *other); + +HB_EXTERN unsigned int +hb_set_get_population (const hb_set_t *set); + +/* Returns HB_SET_VALUE_INVALID if set empty. */ +HB_EXTERN hb_codepoint_t +hb_set_get_min (const hb_set_t *set); + +/* Returns HB_SET_VALUE_INVALID if set empty. */ +HB_EXTERN hb_codepoint_t +hb_set_get_max (const hb_set_t *set); + +/* Pass HB_SET_VALUE_INVALID in to get started. */ +HB_EXTERN hb_bool_t +hb_set_next (const hb_set_t *set, + hb_codepoint_t *codepoint); + +/* Pass HB_SET_VALUE_INVALID in to get started. */ +HB_EXTERN hb_bool_t +hb_set_previous (const hb_set_t *set, + hb_codepoint_t *codepoint); + +/* Pass HB_SET_VALUE_INVALID for first and last to get started. */ +HB_EXTERN hb_bool_t +hb_set_next_range (const hb_set_t *set, + hb_codepoint_t *first, + hb_codepoint_t *last); + +/* Pass HB_SET_VALUE_INVALID for first and last to get started. */ +HB_EXTERN hb_bool_t +hb_set_previous_range (const hb_set_t *set, + hb_codepoint_t *first, + hb_codepoint_t *last); + + +HB_END_DECLS + +#endif /* HB_SET_H */ diff --git a/thirdparty/harfbuzz/src/hb-set.hh b/thirdparty/harfbuzz/src/hb-set.hh new file mode 100644 index 0000000000..b6e2086a2e --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-set.hh @@ -0,0 +1,884 @@ +/* + * Copyright © 2012,2017 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_SET_HH +#define HB_SET_HH + +#include "hb.hh" +#include "hb-machinery.hh" + + +/* + * hb_set_t + */ + +/* TODO Keep a free-list so we can free pages that are completely zeroed. At that + * point maybe also use a sentinel value for "all-1" pages? */ + +struct hb_set_t +{ + HB_DELETE_COPY_ASSIGN (hb_set_t); + hb_set_t () { init (); } + ~hb_set_t () { fini (); } + + struct page_map_t + { + int cmp (const page_map_t &o) const { return (int) o.major - (int) major; } + + uint32_t major; + uint32_t index; + }; + + struct page_t + { + void init0 () { v.clear (); } + void init1 () { v.clear (0xFF); } + + unsigned int len () const + { return ARRAY_LENGTH_CONST (v); } + + bool is_empty () const + { + for (unsigned int i = 0; i < len (); i++) + if (v[i]) + return false; + return true; + } + + void add (hb_codepoint_t g) { elt (g) |= mask (g); } + void del (hb_codepoint_t g) { elt (g) &= ~mask (g); } + bool get (hb_codepoint_t g) const { return elt (g) & mask (g); } + + void add_range (hb_codepoint_t a, hb_codepoint_t b) + { + elt_t *la = &elt (a); + elt_t *lb = &elt (b); + if (la == lb) + *la |= (mask (b) << 1) - mask(a); + else + { + *la |= ~(mask (a) - 1); + la++; + + memset (la, 0xff, (char *) lb - (char *) la); + + *lb |= ((mask (b) << 1) - 1); + } + } + + void del_range (hb_codepoint_t a, hb_codepoint_t b) + { + elt_t *la = &elt (a); + elt_t *lb = &elt (b); + if (la == lb) + *la &= ~((mask (b) << 1) - mask(a)); + else + { + *la &= mask (a) - 1; + la++; + + memset (la, 0, (char *) lb - (char *) la); + + *lb &= ~((mask (b) << 1) - 1); + } + } + + bool is_equal (const page_t *other) const + { + return 0 == hb_memcmp (&v, &other->v, sizeof (v)); + } + + unsigned int get_population () const + { + unsigned int pop = 0; + for (unsigned int i = 0; i < len (); i++) + pop += hb_popcount (v[i]); + return pop; + } + + bool next (hb_codepoint_t *codepoint) const + { + unsigned int m = (*codepoint + 1) & MASK; + if (!m) + { + *codepoint = INVALID; + return false; + } + unsigned int i = m / ELT_BITS; + unsigned int j = m & ELT_MASK; + + const elt_t vv = v[i] & ~((elt_t (1) << j) - 1); + for (const elt_t *p = &vv; i < len (); p = &v[++i]) + if (*p) + { + *codepoint = i * ELT_BITS + elt_get_min (*p); + return true; + } + + *codepoint = INVALID; + return false; + } + bool previous (hb_codepoint_t *codepoint) const + { + unsigned int m = (*codepoint - 1) & MASK; + if (m == MASK) + { + *codepoint = INVALID; + return false; + } + unsigned int i = m / ELT_BITS; + unsigned int j = m & ELT_MASK; + + /* Fancy mask to avoid shifting by elt_t bitsize, which is undefined. */ + const elt_t mask = j < 8 * sizeof (elt_t) - 1 ? + ((elt_t (1) << (j + 1)) - 1) : + (elt_t) -1; + const elt_t vv = v[i] & mask; + const elt_t *p = &vv; + while (true) + { + if (*p) + { + *codepoint = i * ELT_BITS + elt_get_max (*p); + return true; + } + if ((int) i <= 0) break; + p = &v[--i]; + } + + *codepoint = INVALID; + return false; + } + hb_codepoint_t get_min () const + { + for (unsigned int i = 0; i < len (); i++) + if (v[i]) + return i * ELT_BITS + elt_get_min (v[i]); + return INVALID; + } + hb_codepoint_t get_max () const + { + for (int i = len () - 1; i >= 0; i--) + if (v[i]) + return i * ELT_BITS + elt_get_max (v[i]); + return 0; + } + + typedef unsigned long long elt_t; + static constexpr unsigned PAGE_BITS = 512; + static_assert ((PAGE_BITS & ((PAGE_BITS) - 1)) == 0, ""); + + static unsigned int elt_get_min (const elt_t &elt) { return hb_ctz (elt); } + static unsigned int elt_get_max (const elt_t &elt) { return hb_bit_storage (elt) - 1; } + + typedef hb_vector_size_t<elt_t, PAGE_BITS / 8> vector_t; + + static constexpr unsigned ELT_BITS = sizeof (elt_t) * 8; + static constexpr unsigned ELT_MASK = ELT_BITS - 1; + static constexpr unsigned BITS = sizeof (vector_t) * 8; + static constexpr unsigned MASK = BITS - 1; + static_assert ((unsigned) PAGE_BITS == (unsigned) BITS, ""); + + elt_t &elt (hb_codepoint_t g) { return v[(g & MASK) / ELT_BITS]; } + elt_t const &elt (hb_codepoint_t g) const { return v[(g & MASK) / ELT_BITS]; } + elt_t mask (hb_codepoint_t g) const { return elt_t (1) << (g & ELT_MASK); } + + vector_t v; + }; + static_assert (page_t::PAGE_BITS == sizeof (page_t) * 8, ""); + + hb_object_header_t header; + bool successful; /* Allocations successful */ + mutable unsigned int population; + hb_sorted_vector_t<page_map_t> page_map; + hb_vector_t<page_t> pages; + + void init_shallow () + { + successful = true; + population = 0; + page_map.init (); + pages.init (); + } + void init () + { + hb_object_init (this); + init_shallow (); + } + void fini_shallow () + { + population = 0; + page_map.fini (); + pages.fini (); + } + void fini () + { + hb_object_fini (this); + fini_shallow (); + } + + bool in_error () const { return !successful; } + + bool resize (unsigned int count) + { + if (unlikely (!successful)) return false; + if (!pages.resize (count) || !page_map.resize (count)) + { + pages.resize (page_map.length); + successful = false; + return false; + } + return true; + } + + void reset () + { + if (unlikely (hb_object_is_immutable (this))) + return; + clear (); + successful = true; + } + + void clear () + { + if (unlikely (hb_object_is_immutable (this))) + return; + population = 0; + page_map.resize (0); + pages.resize (0); + } + bool is_empty () const + { + unsigned int count = pages.length; + for (unsigned int i = 0; i < count; i++) + if (!pages[i].is_empty ()) + return false; + return true; + } + + void dirty () { population = UINT_MAX; } + + void add (hb_codepoint_t g) + { + if (unlikely (!successful)) return; + if (unlikely (g == INVALID)) return; + dirty (); + page_t *page = page_for_insert (g); if (unlikely (!page)) return; + page->add (g); + } + bool add_range (hb_codepoint_t a, hb_codepoint_t b) + { + if (unlikely (!successful)) return true; /* https://github.com/harfbuzz/harfbuzz/issues/657 */ + if (unlikely (a > b || a == INVALID || b == INVALID)) return false; + dirty (); + unsigned int ma = get_major (a); + unsigned int mb = get_major (b); + if (ma == mb) + { + page_t *page = page_for_insert (a); if (unlikely (!page)) return false; + page->add_range (a, b); + } + else + { + page_t *page = page_for_insert (a); if (unlikely (!page)) return false; + page->add_range (a, major_start (ma + 1) - 1); + + for (unsigned int m = ma + 1; m < mb; m++) + { + page = page_for_insert (major_start (m)); if (unlikely (!page)) return false; + page->init1 (); + } + + page = page_for_insert (b); if (unlikely (!page)) return false; + page->add_range (major_start (mb), b); + } + return true; + } + + template <typename T> + void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) + { + if (unlikely (!successful)) return; + if (!count) return; + dirty (); + hb_codepoint_t g = *array; + while (count) + { + unsigned int m = get_major (g); + page_t *page = page_for_insert (g); if (unlikely (!page)) return; + unsigned int start = major_start (m); + unsigned int end = major_start (m + 1); + do + { + page->add (g); + + array = &StructAtOffsetUnaligned<T> (array, stride); + count--; + } + while (count && (g = *array, start <= g && g < end)); + } + } + + /* Might return false if array looks unsorted. + * Used for faster rejection of corrupt data. */ + template <typename T> + bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) + { + if (unlikely (!successful)) return true; /* https://github.com/harfbuzz/harfbuzz/issues/657 */ + if (!count) return true; + dirty (); + hb_codepoint_t g = *array; + hb_codepoint_t last_g = g; + while (count) + { + unsigned int m = get_major (g); + page_t *page = page_for_insert (g); if (unlikely (!page)) return false; + unsigned int end = major_start (m + 1); + do + { + /* If we try harder we can change the following comparison to <=; + * Not sure if it's worth it. */ + if (g < last_g) return false; + last_g = g; + page->add (g); + + array = (const T *) ((const char *) array + stride); + count--; + } + while (count && (g = *array, g < end)); + } + return true; + } + + void del (hb_codepoint_t g) + { + /* TODO perform op even if !successful. */ + if (unlikely (!successful)) return; + page_t *page = page_for (g); + if (!page) + return; + dirty (); + page->del (g); + } + + private: + void del_pages (int ds, int de) + { + if (ds <= de) + { + unsigned int write_index = 0; + for (unsigned int i = 0; i < page_map.length; i++) + { + int m = (int) page_map[i].major; + if (m < ds || de < m) + page_map[write_index++] = page_map[i]; + } + compact (write_index); + resize (write_index); + } + } + + public: + void del_range (hb_codepoint_t a, hb_codepoint_t b) + { + /* TODO perform op even if !successful. */ + if (unlikely (!successful)) return; + if (unlikely (a > b || a == INVALID || b == INVALID)) return; + dirty (); + unsigned int ma = get_major (a); + unsigned int mb = get_major (b); + /* Delete pages from ds through de if ds <= de. */ + int ds = (a == major_start (ma))? (int) ma: (int) (ma + 1); + int de = (b + 1 == major_start (mb + 1))? (int) mb: ((int) mb - 1); + if (ds > de || (int) ma < ds) + { + page_t *page = page_for (a); + if (page) + { + if (ma == mb) + page->del_range (a, b); + else + page->del_range (a, major_start (ma + 1) - 1); + } + } + if (de < (int) mb && ma != mb) + { + page_t *page = page_for (b); + if (page) + page->del_range (major_start (mb), b); + } + del_pages (ds, de); + } + + bool get (hb_codepoint_t g) const + { + const page_t *page = page_for (g); + if (!page) + return false; + return page->get (g); + } + + /* Has interface. */ + static constexpr bool SENTINEL = false; + typedef bool value_t; + value_t operator [] (hb_codepoint_t k) const { return get (k); } + bool has (hb_codepoint_t k) const { return (*this)[k] != SENTINEL; } + /* Predicate. */ + bool operator () (hb_codepoint_t k) const { return has (k); } + + /* Sink interface. */ + hb_set_t& operator << (hb_codepoint_t v) + { add (v); return *this; } + hb_set_t& operator << (const hb_pair_t<hb_codepoint_t, hb_codepoint_t>& range) + { add_range (range.first, range.second); return *this; } + + bool intersects (hb_codepoint_t first, hb_codepoint_t last) const + { + hb_codepoint_t c = first - 1; + return next (&c) && c <= last; + } + void set (const hb_set_t *other) + { + if (unlikely (!successful)) return; + unsigned int count = other->pages.length; + if (!resize (count)) + return; + population = other->population; + memcpy ((void *) pages, (const void *) other->pages, count * pages.item_size); + memcpy ((void *) page_map, (const void *) other->page_map, count * page_map.item_size); + } + + bool is_equal (const hb_set_t *other) const + { + if (get_population () != other->get_population ()) + return false; + + unsigned int na = pages.length; + unsigned int nb = other->pages.length; + + unsigned int a = 0, b = 0; + for (; a < na && b < nb; ) + { + if (page_at (a).is_empty ()) { a++; continue; } + if (other->page_at (b).is_empty ()) { b++; continue; } + if (page_map[a].major != other->page_map[b].major || + !page_at (a).is_equal (&other->page_at (b))) + return false; + a++; + b++; + } + for (; a < na; a++) + if (!page_at (a).is_empty ()) { return false; } + for (; b < nb; b++) + if (!other->page_at (b).is_empty ()) { return false; } + + return true; + } + + bool is_subset (const hb_set_t *larger_set) const + { + if (get_population () > larger_set->get_population ()) + return false; + + /* TODO Optimize to use pages. */ + hb_codepoint_t c = INVALID; + while (next (&c)) + if (!larger_set->has (c)) + return false; + + return true; + } + + void compact (unsigned int length) + { + hb_vector_t<uint32_t> old_index_to_page_map_index; + old_index_to_page_map_index.resize(pages.length); + for (uint32_t i = 0; i < old_index_to_page_map_index.length; i++) + old_index_to_page_map_index[i] = 0xFFFFFFFF; + + for (uint32_t i = 0; i < length; i++) + old_index_to_page_map_index[page_map[i].index] = i; + + compact_pages (old_index_to_page_map_index); + } + + void compact_pages (const hb_vector_t<uint32_t>& old_index_to_page_map_index) + { + unsigned int write_index = 0; + for (unsigned int i = 0; i < pages.length; i++) + { + if (old_index_to_page_map_index[i] == 0xFFFFFFFF) continue; + + if (write_index < i) + pages[write_index] = pages[i]; + + page_map[old_index_to_page_map_index[i]].index = write_index; + write_index++; + } + } + + template <typename Op> + void process (const Op& op, const hb_set_t *other) + { + if (unlikely (!successful)) return; + + dirty (); + + unsigned int na = pages.length; + unsigned int nb = other->pages.length; + unsigned int next_page = na; + + unsigned int count = 0, newCount = 0; + unsigned int a = 0, b = 0; + unsigned int write_index = 0; + for (; a < na && b < nb; ) + { + if (page_map[a].major == other->page_map[b].major) + { + if (!Op::passthru_left) + { + // Move page_map entries that we're keeping from the left side set + // to the front of the page_map vector. This isn't necessary if + // passthru_left is set since no left side pages will be removed + // in that case. + if (write_index < a) + page_map[write_index] = page_map[a]; + write_index++; + } + + count++; + a++; + b++; + } + else if (page_map[a].major < other->page_map[b].major) + { + if (Op::passthru_left) + count++; + a++; + } + else + { + if (Op::passthru_right) + count++; + b++; + } + } + if (Op::passthru_left) + count += na - a; + if (Op::passthru_right) + count += nb - b; + + if (!Op::passthru_left) + { + na = write_index; + next_page = write_index; + compact (write_index); + } + + if (!resize (count)) + return; + + newCount = count; + + /* Process in-place backward. */ + a = na; + b = nb; + for (; a && b; ) + { + if (page_map[a - 1].major == other->page_map[b - 1].major) + { + a--; + b--; + count--; + page_map[count] = page_map[a]; + page_at (count).v = op (page_at (a).v, other->page_at (b).v); + } + else if (page_map[a - 1].major > other->page_map[b - 1].major) + { + a--; + if (Op::passthru_left) + { + count--; + page_map[count] = page_map[a]; + } + } + else + { + b--; + if (Op::passthru_right) + { + count--; + page_map[count].major = other->page_map[b].major; + page_map[count].index = next_page++; + page_at (count).v = other->page_at (b).v; + } + } + } + if (Op::passthru_left) + while (a) + { + a--; + count--; + page_map[count] = page_map [a]; + } + if (Op::passthru_right) + while (b) + { + b--; + count--; + page_map[count].major = other->page_map[b].major; + page_map[count].index = next_page++; + page_at (count).v = other->page_at (b).v; + } + assert (!count); + if (pages.length > newCount) + resize (newCount); + } + + void union_ (const hb_set_t *other) + { + process (hb_bitwise_or, other); + } + void intersect (const hb_set_t *other) + { + process (hb_bitwise_and, other); + } + void subtract (const hb_set_t *other) + { + process (hb_bitwise_sub, other); + } + void symmetric_difference (const hb_set_t *other) + { + process (hb_bitwise_xor, other); + } + bool next (hb_codepoint_t *codepoint) const + { + if (unlikely (*codepoint == INVALID)) { + *codepoint = get_min (); + return *codepoint != INVALID; + } + + page_map_t map = {get_major (*codepoint), 0}; + unsigned int i; + page_map.bfind (map, &i, HB_BFIND_NOT_FOUND_STORE_CLOSEST); + if (i < page_map.length && page_map[i].major == map.major) + { + if (pages[page_map[i].index].next (codepoint)) + { + *codepoint += page_map[i].major * page_t::PAGE_BITS; + return true; + } + i++; + } + for (; i < page_map.length; i++) + { + hb_codepoint_t m = pages[page_map[i].index].get_min (); + if (m != INVALID) + { + *codepoint = page_map[i].major * page_t::PAGE_BITS + m; + return true; + } + } + *codepoint = INVALID; + return false; + } + bool previous (hb_codepoint_t *codepoint) const + { + if (unlikely (*codepoint == INVALID)) { + *codepoint = get_max (); + return *codepoint != INVALID; + } + + page_map_t map = {get_major (*codepoint), 0}; + unsigned int i; + page_map.bfind (map, &i, HB_BFIND_NOT_FOUND_STORE_CLOSEST); + if (i < page_map.length && page_map[i].major == map.major) + { + if (pages[page_map[i].index].previous (codepoint)) + { + *codepoint += page_map[i].major * page_t::PAGE_BITS; + return true; + } + } + i--; + for (; (int) i >= 0; i--) + { + hb_codepoint_t m = pages[page_map[i].index].get_max (); + if (m != INVALID) + { + *codepoint = page_map[i].major * page_t::PAGE_BITS + m; + return true; + } + } + *codepoint = INVALID; + return false; + } + bool next_range (hb_codepoint_t *first, hb_codepoint_t *last) const + { + hb_codepoint_t i; + + i = *last; + if (!next (&i)) + { + *last = *first = INVALID; + return false; + } + + /* TODO Speed up. */ + *last = *first = i; + while (next (&i) && i == *last + 1) + (*last)++; + + return true; + } + bool previous_range (hb_codepoint_t *first, hb_codepoint_t *last) const + { + hb_codepoint_t i; + + i = *first; + if (!previous (&i)) + { + *last = *first = INVALID; + return false; + } + + /* TODO Speed up. */ + *last = *first = i; + while (previous (&i) && i == *first - 1) + (*first)--; + + return true; + } + + unsigned int get_population () const + { + if (population != UINT_MAX) + return population; + + unsigned int pop = 0; + unsigned int count = pages.length; + for (unsigned int i = 0; i < count; i++) + pop += pages[i].get_population (); + + population = pop; + return pop; + } + hb_codepoint_t get_min () const + { + unsigned int count = pages.length; + for (unsigned int i = 0; i < count; i++) + if (!page_at (i).is_empty ()) + return page_map[i].major * page_t::PAGE_BITS + page_at (i).get_min (); + return INVALID; + } + hb_codepoint_t get_max () const + { + unsigned int count = pages.length; + for (int i = count - 1; i >= 0; i++) + if (!page_at (i).is_empty ()) + return page_map[(unsigned) i].major * page_t::PAGE_BITS + page_at (i).get_max (); + return INVALID; + } + + static constexpr hb_codepoint_t INVALID = HB_SET_VALUE_INVALID; + + /* + * Iterator implementation. + */ + struct iter_t : hb_iter_with_fallback_t<iter_t, hb_codepoint_t> + { + static constexpr bool is_sorted_iterator = true; + iter_t (const hb_set_t &s_ = Null (hb_set_t), + bool init = true) : s (&s_), v (INVALID), l(0) + { + if (init) + { + l = s->get_population () + 1; + __next__ (); + } + } + + typedef hb_codepoint_t __item_t__; + hb_codepoint_t __item__ () const { return v; } + bool __more__ () const { return v != INVALID; } + void __next__ () { s->next (&v); if (l) l--; } + void __prev__ () { s->previous (&v); } + unsigned __len__ () const { return l; } + iter_t end () const { return iter_t (*s, false); } + bool operator != (const iter_t& o) const + { return s != o.s || v != o.v; } + + protected: + const hb_set_t *s; + hb_codepoint_t v; + unsigned l; + }; + iter_t iter () const { return iter_t (*this); } + operator iter_t () const { return iter (); } + + protected: + + page_t *page_for_insert (hb_codepoint_t g) + { + page_map_t map = {get_major (g), pages.length}; + unsigned int i; + if (!page_map.bfind (map, &i, HB_BFIND_NOT_FOUND_STORE_CLOSEST)) + { + if (!resize (pages.length + 1)) + return nullptr; + + pages[map.index].init0 (); + memmove (page_map + i + 1, + page_map + i, + (page_map.length - 1 - i) * page_map.item_size); + page_map[i] = map; + } + return &pages[page_map[i].index]; + } + page_t *page_for (hb_codepoint_t g) + { + page_map_t key = {get_major (g)}; + const page_map_t *found = page_map.bsearch (key); + if (found) + return &pages[found->index]; + return nullptr; + } + const page_t *page_for (hb_codepoint_t g) const + { + page_map_t key = {get_major (g)}; + const page_map_t *found = page_map.bsearch (key); + if (found) + return &pages[found->index]; + return nullptr; + } + page_t &page_at (unsigned int i) { return pages[page_map[i].index]; } + const page_t &page_at (unsigned int i) const { return pages[page_map[i].index]; } + unsigned int get_major (hb_codepoint_t g) const { return g / page_t::PAGE_BITS; } + hb_codepoint_t major_start (unsigned int major) const { return major * page_t::PAGE_BITS; } +}; + + +#endif /* HB_SET_HH */ diff --git a/thirdparty/harfbuzz/src/hb-shape-plan.cc b/thirdparty/harfbuzz/src/hb-shape-plan.cc new file mode 100644 index 0000000000..666470b4f1 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-shape-plan.cc @@ -0,0 +1,513 @@ +/* + * Copyright © 2012 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#include "hb.hh" +#include "hb-shape-plan.hh" +#include "hb-shaper.hh" +#include "hb-font.hh" +#include "hb-buffer.hh" + + +/** + * SECTION:hb-shape-plan + * @title: hb-shape-plan + * @short_description: Object representing a shaping plan + * @include: hb.h + * + * Shape plans are not used for shaping directly, but can be access to query + * certain information about how shaping will perform given a set of input + * parameters (script, language, direction, features, etc.) + * Most client would not need to deal with shape plans directly. + **/ + + +/* + * hb_shape_plan_key_t + */ + +bool +hb_shape_plan_key_t::init (bool copy, + hb_face_t *face, + const hb_segment_properties_t *props, + const hb_feature_t *user_features, + unsigned int num_user_features, + const int *coords, + unsigned int num_coords, + const char * const *shaper_list) +{ + hb_feature_t *features = nullptr; + if (copy && num_user_features && !(features = (hb_feature_t *) calloc (num_user_features, sizeof (hb_feature_t)))) + goto bail; + + this->props = *props; + this->num_user_features = num_user_features; + this->user_features = copy ? features : user_features; + if (copy && num_user_features) + { + memcpy (features, user_features, num_user_features * sizeof (hb_feature_t)); + /* Make start/end uniform to easier catch bugs. */ + for (unsigned int i = 0; i < num_user_features; i++) + { + if (features[0].start != HB_FEATURE_GLOBAL_START) + features[0].start = 1; + if (features[0].end != HB_FEATURE_GLOBAL_END) + features[0].end = 2; + } + } + this->shaper_func = nullptr; + this->shaper_name = nullptr; +#ifndef HB_NO_OT_SHAPE + this->ot.init (face, coords, num_coords); +#endif + + /* + * Choose shaper. + */ + +#define HB_SHAPER_PLAN(shaper) \ + HB_STMT_START { \ + if (face->data.shaper) \ + { \ + this->shaper_func = _hb_##shaper##_shape; \ + this->shaper_name = #shaper; \ + return true; \ + } \ + } HB_STMT_END + + if (unlikely (shaper_list)) + { + for (; *shaper_list; shaper_list++) + if (false) + ; +#define HB_SHAPER_IMPLEMENT(shaper) \ + else if (0 == strcmp (*shaper_list, #shaper)) \ + HB_SHAPER_PLAN (shaper); +#include "hb-shaper-list.hh" +#undef HB_SHAPER_IMPLEMENT + } + else + { + const hb_shaper_entry_t *shapers = _hb_shapers_get (); + for (unsigned int i = 0; i < HB_SHAPERS_COUNT; i++) + if (false) + ; +#define HB_SHAPER_IMPLEMENT(shaper) \ + else if (shapers[i].func == _hb_##shaper##_shape) \ + HB_SHAPER_PLAN (shaper); +#include "hb-shaper-list.hh" +#undef HB_SHAPER_IMPLEMENT + } +#undef HB_SHAPER_PLAN + +bail: + ::free (features); + return false; +} + +bool +hb_shape_plan_key_t::user_features_match (const hb_shape_plan_key_t *other) +{ + if (this->num_user_features != other->num_user_features) + return false; + for (unsigned int i = 0; i < num_user_features; i++) + { + if (this->user_features[i].tag != other->user_features[i].tag || + this->user_features[i].value != other->user_features[i].value || + (this->user_features[i].start == HB_FEATURE_GLOBAL_START && + this->user_features[i].end == HB_FEATURE_GLOBAL_END) != + (other->user_features[i].start == HB_FEATURE_GLOBAL_START && + other->user_features[i].end == HB_FEATURE_GLOBAL_END)) + return false; + } + return true; +} + +bool +hb_shape_plan_key_t::equal (const hb_shape_plan_key_t *other) +{ + return hb_segment_properties_equal (&this->props, &other->props) && + this->user_features_match (other) && +#ifndef HB_NO_OT_SHAPE + this->ot.equal (&other->ot) && +#endif + this->shaper_func == other->shaper_func; +} + + +/* + * hb_shape_plan_t + */ + + +/** + * hb_shape_plan_create: (Xconstructor) + * @face: + * @props: + * @user_features: (array length=num_user_features): + * @num_user_features: + * @shaper_list: (array zero-terminated=1): + * + * + * + * Return value: (transfer full): + * + * Since: 0.9.7 + **/ +hb_shape_plan_t * +hb_shape_plan_create (hb_face_t *face, + const hb_segment_properties_t *props, + const hb_feature_t *user_features, + unsigned int num_user_features, + const char * const *shaper_list) +{ + return hb_shape_plan_create2 (face, props, + user_features, num_user_features, + nullptr, 0, + shaper_list); +} + +hb_shape_plan_t * +hb_shape_plan_create2 (hb_face_t *face, + const hb_segment_properties_t *props, + const hb_feature_t *user_features, + unsigned int num_user_features, + const int *coords, + unsigned int num_coords, + const char * const *shaper_list) +{ + DEBUG_MSG_FUNC (SHAPE_PLAN, nullptr, + "face=%p num_features=%d num_coords=%d shaper_list=%p", + face, + num_user_features, + num_coords, + shaper_list); + + assert (props->direction != HB_DIRECTION_INVALID); + + hb_shape_plan_t *shape_plan; + + if (unlikely (!props)) + goto bail; + if (!(shape_plan = hb_object_create<hb_shape_plan_t> ())) + goto bail; + + if (unlikely (!face)) + face = hb_face_get_empty (); + hb_face_make_immutable (face); + shape_plan->face_unsafe = face; + + if (unlikely (!shape_plan->key.init (true, + face, + props, + user_features, + num_user_features, + coords, + num_coords, + shaper_list))) + goto bail2; +#ifndef HB_NO_OT_SHAPE + if (unlikely (!shape_plan->ot.init0 (face, &shape_plan->key))) + goto bail3; +#endif + + return shape_plan; + +#ifndef HB_NO_OT_SHAPE +bail3: +#endif + shape_plan->key.free (); +bail2: + free (shape_plan); +bail: + return hb_shape_plan_get_empty (); +} + +/** + * hb_shape_plan_get_empty: + * + * + * + * Return value: (transfer full): + * + * Since: 0.9.7 + **/ +hb_shape_plan_t * +hb_shape_plan_get_empty () +{ + return const_cast<hb_shape_plan_t *> (&Null (hb_shape_plan_t)); +} + +/** + * hb_shape_plan_reference: (skip) + * @shape_plan: a shape plan. + * + * + * + * Return value: (transfer full): + * + * Since: 0.9.7 + **/ +hb_shape_plan_t * +hb_shape_plan_reference (hb_shape_plan_t *shape_plan) +{ + return hb_object_reference (shape_plan); +} + +/** + * hb_shape_plan_destroy: (skip) + * @shape_plan: a shape plan. + * + * + * + * Since: 0.9.7 + **/ +void +hb_shape_plan_destroy (hb_shape_plan_t *shape_plan) +{ + if (!hb_object_destroy (shape_plan)) return; + +#ifndef HB_NO_OT_SHAPE + shape_plan->ot.fini (); +#endif + shape_plan->key.free (); + free (shape_plan); +} + +/** + * hb_shape_plan_set_user_data: (skip) + * @shape_plan: a shape plan. + * @key: + * @data: + * @destroy: + * @replace: + * + * + * + * Return value: + * + * Since: 0.9.7 + **/ +hb_bool_t +hb_shape_plan_set_user_data (hb_shape_plan_t *shape_plan, + hb_user_data_key_t *key, + void * data, + hb_destroy_func_t destroy, + hb_bool_t replace) +{ + return hb_object_set_user_data (shape_plan, key, data, destroy, replace); +} + +/** + * hb_shape_plan_get_user_data: (skip) + * @shape_plan: a shape plan. + * @key: + * + * + * + * Return value: (transfer none): + * + * Since: 0.9.7 + **/ +void * +hb_shape_plan_get_user_data (hb_shape_plan_t *shape_plan, + hb_user_data_key_t *key) +{ + return hb_object_get_user_data (shape_plan, key); +} + +/** + * hb_shape_plan_get_shaper: + * @shape_plan: a shape plan. + * + * + * + * Return value: (transfer none): + * + * Since: 0.9.7 + **/ +const char * +hb_shape_plan_get_shaper (hb_shape_plan_t *shape_plan) +{ + return shape_plan->key.shaper_name; +} + + +/** + * hb_shape_plan_execute: + * @shape_plan: a shape plan. + * @font: a font. + * @buffer: a buffer. + * @features: (array length=num_features): + * @num_features: + * + * + * + * Return value: + * + * Since: 0.9.7 + **/ +hb_bool_t +hb_shape_plan_execute (hb_shape_plan_t *shape_plan, + hb_font_t *font, + hb_buffer_t *buffer, + const hb_feature_t *features, + unsigned int num_features) +{ + DEBUG_MSG_FUNC (SHAPE_PLAN, shape_plan, + "num_features=%d shaper_func=%p, shaper_name=%s", + num_features, + shape_plan->key.shaper_func, + shape_plan->key.shaper_name); + + if (unlikely (!buffer->len)) + return true; + + assert (!hb_object_is_immutable (buffer)); + assert (buffer->content_type == HB_BUFFER_CONTENT_TYPE_UNICODE); + + if (unlikely (hb_object_is_inert (shape_plan))) + return false; + + assert (shape_plan->face_unsafe == font->face); + assert (hb_segment_properties_equal (&shape_plan->key.props, &buffer->props)); + +#define HB_SHAPER_EXECUTE(shaper) \ + HB_STMT_START { \ + return font->data.shaper && \ + _hb_##shaper##_shape (shape_plan, font, buffer, features, num_features); \ + } HB_STMT_END + + if (false) + ; +#define HB_SHAPER_IMPLEMENT(shaper) \ + else if (shape_plan->key.shaper_func == _hb_##shaper##_shape) \ + HB_SHAPER_EXECUTE (shaper); +#include "hb-shaper-list.hh" +#undef HB_SHAPER_IMPLEMENT + +#undef HB_SHAPER_EXECUTE + + return false; +} + + +/* + * Caching + */ + +/** + * hb_shape_plan_create_cached: + * @face: + * @props: + * @user_features: (array length=num_user_features): + * @num_user_features: + * @shaper_list: (array zero-terminated=1): + * + * + * + * Return value: (transfer full): + * + * Since: 0.9.7 + **/ +hb_shape_plan_t * +hb_shape_plan_create_cached (hb_face_t *face, + const hb_segment_properties_t *props, + const hb_feature_t *user_features, + unsigned int num_user_features, + const char * const *shaper_list) +{ + return hb_shape_plan_create_cached2 (face, props, + user_features, num_user_features, + nullptr, 0, + shaper_list); +} + +hb_shape_plan_t * +hb_shape_plan_create_cached2 (hb_face_t *face, + const hb_segment_properties_t *props, + const hb_feature_t *user_features, + unsigned int num_user_features, + const int *coords, + unsigned int num_coords, + const char * const *shaper_list) +{ + DEBUG_MSG_FUNC (SHAPE_PLAN, nullptr, + "face=%p num_features=%d shaper_list=%p", + face, + num_user_features, + shaper_list); + +retry: + hb_face_t::plan_node_t *cached_plan_nodes = face->shape_plans; + + bool dont_cache = hb_object_is_inert (face); + + if (likely (!dont_cache)) + { + hb_shape_plan_key_t key; + if (!key.init (false, + face, + props, + user_features, + num_user_features, + coords, + num_coords, + shaper_list)) + return hb_shape_plan_get_empty (); + + for (hb_face_t::plan_node_t *node = cached_plan_nodes; node; node = node->next) + if (node->shape_plan->key.equal (&key)) + { + DEBUG_MSG_FUNC (SHAPE_PLAN, node->shape_plan, "fulfilled from cache"); + return hb_shape_plan_reference (node->shape_plan); + } + } + + hb_shape_plan_t *shape_plan = hb_shape_plan_create2 (face, props, + user_features, num_user_features, + coords, num_coords, + shaper_list); + + if (unlikely (dont_cache)) + return shape_plan; + + hb_face_t::plan_node_t *node = (hb_face_t::plan_node_t *) calloc (1, sizeof (hb_face_t::plan_node_t)); + if (unlikely (!node)) + return shape_plan; + + node->shape_plan = shape_plan; + node->next = cached_plan_nodes; + + if (unlikely (!face->shape_plans.cmpexch (cached_plan_nodes, node))) + { + hb_shape_plan_destroy (shape_plan); + free (node); + goto retry; + } + DEBUG_MSG_FUNC (SHAPE_PLAN, shape_plan, "inserted into cache"); + + return hb_shape_plan_reference (shape_plan); +} diff --git a/thirdparty/harfbuzz/src/hb-shape-plan.h b/thirdparty/harfbuzz/src/hb-shape-plan.h new file mode 100644 index 0000000000..b62ae7ca35 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-shape-plan.h @@ -0,0 +1,108 @@ +/* + * Copyright © 2012 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_H_IN +#error "Include <hb.h> instead." +#endif + +#ifndef HB_SHAPE_PLAN_H +#define HB_SHAPE_PLAN_H + +#include "hb-common.h" +#include "hb-font.h" + +HB_BEGIN_DECLS + +typedef struct hb_shape_plan_t hb_shape_plan_t; + +HB_EXTERN hb_shape_plan_t * +hb_shape_plan_create (hb_face_t *face, + const hb_segment_properties_t *props, + const hb_feature_t *user_features, + unsigned int num_user_features, + const char * const *shaper_list); + +HB_EXTERN hb_shape_plan_t * +hb_shape_plan_create_cached (hb_face_t *face, + const hb_segment_properties_t *props, + const hb_feature_t *user_features, + unsigned int num_user_features, + const char * const *shaper_list); + +HB_EXTERN hb_shape_plan_t * +hb_shape_plan_create2 (hb_face_t *face, + const hb_segment_properties_t *props, + const hb_feature_t *user_features, + unsigned int num_user_features, + const int *coords, + unsigned int num_coords, + const char * const *shaper_list); + +HB_EXTERN hb_shape_plan_t * +hb_shape_plan_create_cached2 (hb_face_t *face, + const hb_segment_properties_t *props, + const hb_feature_t *user_features, + unsigned int num_user_features, + const int *coords, + unsigned int num_coords, + const char * const *shaper_list); + + +HB_EXTERN hb_shape_plan_t * +hb_shape_plan_get_empty (void); + +HB_EXTERN hb_shape_plan_t * +hb_shape_plan_reference (hb_shape_plan_t *shape_plan); + +HB_EXTERN void +hb_shape_plan_destroy (hb_shape_plan_t *shape_plan); + +HB_EXTERN hb_bool_t +hb_shape_plan_set_user_data (hb_shape_plan_t *shape_plan, + hb_user_data_key_t *key, + void * data, + hb_destroy_func_t destroy, + hb_bool_t replace); + +HB_EXTERN void * +hb_shape_plan_get_user_data (hb_shape_plan_t *shape_plan, + hb_user_data_key_t *key); + + +HB_EXTERN hb_bool_t +hb_shape_plan_execute (hb_shape_plan_t *shape_plan, + hb_font_t *font, + hb_buffer_t *buffer, + const hb_feature_t *features, + unsigned int num_features); + +HB_EXTERN const char * +hb_shape_plan_get_shaper (hb_shape_plan_t *shape_plan); + + +HB_END_DECLS + +#endif /* HB_SHAPE_PLAN_H */ diff --git a/thirdparty/harfbuzz/src/hb-shape-plan.hh b/thirdparty/harfbuzz/src/hb-shape-plan.hh new file mode 100644 index 0000000000..6da7edb2f8 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-shape-plan.hh @@ -0,0 +1,76 @@ +/* + * Copyright © 2012,2018 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_SHAPE_PLAN_HH +#define HB_SHAPE_PLAN_HH + +#include "hb.hh" +#include "hb-shaper.hh" +#include "hb-ot-shape.hh" + + +struct hb_shape_plan_key_t +{ + hb_segment_properties_t props; + + const hb_feature_t *user_features; + unsigned int num_user_features; + +#ifndef HB_NO_OT_SHAPE + hb_ot_shape_plan_key_t ot; +#endif + + hb_shape_func_t *shaper_func; + const char *shaper_name; + + HB_INTERNAL bool init (bool copy, + hb_face_t *face, + const hb_segment_properties_t *props, + const hb_feature_t *user_features, + unsigned int num_user_features, + const int *coords, + unsigned int num_coords, + const char * const *shaper_list); + + HB_INTERNAL void free () { ::free ((void *) user_features); } + + HB_INTERNAL bool user_features_match (const hb_shape_plan_key_t *other); + + HB_INTERNAL bool equal (const hb_shape_plan_key_t *other); +}; + +struct hb_shape_plan_t +{ + hb_object_header_t header; + hb_face_t *face_unsafe; /* We don't carry a reference to face. */ + hb_shape_plan_key_t key; +#ifndef HB_NO_OT_SHAPE + hb_ot_shape_plan_t ot; +#endif +}; + + +#endif /* HB_SHAPE_PLAN_HH */ diff --git a/thirdparty/harfbuzz/src/hb-shape.cc b/thirdparty/harfbuzz/src/hb-shape.cc new file mode 100644 index 0000000000..017fb91b69 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-shape.cc @@ -0,0 +1,172 @@ +/* + * Copyright © 2009 Red Hat, Inc. + * Copyright © 2012 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Red Hat Author(s): Behdad Esfahbod + * Google Author(s): Behdad Esfahbod + */ + +#include "hb.hh" + +#include "hb-shaper.hh" +#include "hb-shape-plan.hh" +#include "hb-buffer.hh" +#include "hb-font.hh" +#include "hb-machinery.hh" + + +/** + * SECTION:hb-shape + * @title: hb-shape + * @short_description: Conversion of text strings into positioned glyphs + * @include: hb.h + * + * Shaping is the central operation of HarfBuzz. Shaping operates on buffers, + * which are sequences of Unicode characters that use the same font and have + * the same text direction, script, and language. After shaping the buffer + * contains the output glyphs and their positions. + **/ + + +#if HB_USE_ATEXIT +static void free_static_shaper_list (); +#endif + +static const char *nil_shaper_list[] = {nullptr}; + +static struct hb_shaper_list_lazy_loader_t : hb_lazy_loader_t<const char *, + hb_shaper_list_lazy_loader_t> +{ + static const char ** create () + { + const char **shaper_list = (const char **) calloc (1 + HB_SHAPERS_COUNT, sizeof (const char *)); + if (unlikely (!shaper_list)) + return nullptr; + + const hb_shaper_entry_t *shapers = _hb_shapers_get (); + unsigned int i; + for (i = 0; i < HB_SHAPERS_COUNT; i++) + shaper_list[i] = shapers[i].name; + shaper_list[i] = nullptr; + +#if HB_USE_ATEXIT + atexit (free_static_shaper_list); +#endif + + return shaper_list; + } + static void destroy (const char **l) + { free (l); } + static const char ** get_null () + { return nil_shaper_list; } +} static_shaper_list; + +#if HB_USE_ATEXIT +static +void free_static_shaper_list () +{ + static_shaper_list.free_instance (); +} +#endif + + +/** + * hb_shape_list_shapers: + * + * Retrieves the list of shapers supported by HarfBuzz. + * + * Return value: (transfer none) (array zero-terminated=1): an array of + * constant strings + * + * Since: 0.9.2 + **/ +const char ** +hb_shape_list_shapers () +{ + return static_shaper_list.get_unconst (); +} + + +/** + * hb_shape_full: + * @font: an #hb_font_t to use for shaping + * @buffer: an #hb_buffer_t to shape + * @features: (array length=num_features) (allow-none): an array of user + * specified #hb_feature_t or %NULL + * @num_features: the length of @features array + * @shaper_list: (array zero-terminated=1) (allow-none): a %NULL-terminated + * array of shapers to use or %NULL + * + * See hb_shape() for details. If @shaper_list is not %NULL, the specified + * shapers will be used in the given order, otherwise the default shapers list + * will be used. + * + * Return value: false if all shapers failed, true otherwise + * + * Since: 0.9.2 + **/ +hb_bool_t +hb_shape_full (hb_font_t *font, + hb_buffer_t *buffer, + const hb_feature_t *features, + unsigned int num_features, + const char * const *shaper_list) +{ + if (unlikely (hb_object_is_immutable (buffer))) return false; + + hb_shape_plan_t *shape_plan = hb_shape_plan_create_cached2 (font->face, &buffer->props, + features, num_features, + font->coords, font->num_coords, + shaper_list); + hb_bool_t res = hb_shape_plan_execute (shape_plan, font, buffer, features, num_features); + hb_shape_plan_destroy (shape_plan); + + if (res) + buffer->content_type = HB_BUFFER_CONTENT_TYPE_GLYPHS; + return res; +} + +/** + * hb_shape: + * @font: an #hb_font_t to use for shaping + * @buffer: an #hb_buffer_t to shape + * @features: (array length=num_features) (allow-none): an array of user + * specified #hb_feature_t or %NULL + * @num_features: the length of @features array + * + * Shapes @buffer using @font turning its Unicode characters content to + * positioned glyphs. If @features is not %NULL, it will be used to control the + * features applied during shaping. If two @features have the same tag but + * overlapping ranges the value of the feature with the higher index takes + * precedence. + * + * Since: 0.9.2 + **/ +void +hb_shape (hb_font_t *font, + hb_buffer_t *buffer, + const hb_feature_t *features, + unsigned int num_features) +{ + hb_shape_full (font, buffer, features, num_features, nullptr); +} diff --git a/thirdparty/harfbuzz/src/hb-shape.h b/thirdparty/harfbuzz/src/hb-shape.h new file mode 100644 index 0000000000..39507ff744 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-shape.h @@ -0,0 +1,62 @@ +/* + * Copyright © 2009 Red Hat, Inc. + * Copyright © 2012 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Red Hat Author(s): Behdad Esfahbod + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_H_IN +#error "Include <hb.h> instead." +#endif + +#ifndef HB_SHAPE_H +#define HB_SHAPE_H + +#include "hb-common.h" +#include "hb-buffer.h" +#include "hb-font.h" + +HB_BEGIN_DECLS + + +HB_EXTERN void +hb_shape (hb_font_t *font, + hb_buffer_t *buffer, + const hb_feature_t *features, + unsigned int num_features); + +HB_EXTERN hb_bool_t +hb_shape_full (hb_font_t *font, + hb_buffer_t *buffer, + const hb_feature_t *features, + unsigned int num_features, + const char * const *shaper_list); + +HB_EXTERN const char ** +hb_shape_list_shapers (void); + + +HB_END_DECLS + +#endif /* HB_SHAPE_H */ diff --git a/thirdparty/harfbuzz/src/hb-shaper-impl.hh b/thirdparty/harfbuzz/src/hb-shaper-impl.hh new file mode 100644 index 0000000000..b674fceb6a --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-shaper-impl.hh @@ -0,0 +1,38 @@ +/* + * Copyright © 2012 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_SHAPER_IMPL_HH +#define HB_SHAPER_IMPL_HH + +#include "hb.hh" + +#include "hb-shaper.hh" +#include "hb-face.hh" +#include "hb-font.hh" +#include "hb-shape-plan.hh" +#include "hb-buffer.hh" + +#endif /* HB_SHAPER_IMPL_HH */ diff --git a/thirdparty/harfbuzz/src/hb-shaper-list.hh b/thirdparty/harfbuzz/src/hb-shaper-list.hh new file mode 100644 index 0000000000..0d63933a76 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-shaper-list.hh @@ -0,0 +1,60 @@ +/* + * Copyright © 2012 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_SHAPER_LIST_HH +#define HB_SHAPER_LIST_HH +#endif /* HB_SHAPER_LIST_HH */ /* Dummy header guards */ + +#ifndef HB_NO_SHAPER + + +/* v--- Add new shapers in the right place here. */ + +#ifdef HAVE_GRAPHITE2 +/* Only picks up fonts that have a "Silf" table. */ +HB_SHAPER_IMPLEMENT (graphite2) +#endif + +#ifndef HB_NO_OT_SHAPE +HB_SHAPER_IMPLEMENT (ot) /* <--- This is our main OpenType shaper. */ +#endif + +#ifdef HAVE_UNISCRIBE +HB_SHAPER_IMPLEMENT (uniscribe) +#endif +#ifdef HAVE_DIRECTWRITE +HB_SHAPER_IMPLEMENT (directwrite) +#endif +#ifdef HAVE_CORETEXT +HB_SHAPER_IMPLEMENT (coretext) +#endif + +#ifndef HB_NO_FALLBACK_SHAPE +HB_SHAPER_IMPLEMENT (fallback) /* <--- This should be last. */ +#endif + + +#endif diff --git a/thirdparty/harfbuzz/src/hb-shaper.cc b/thirdparty/harfbuzz/src/hb-shaper.cc new file mode 100644 index 0000000000..0ea68ad1f5 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-shaper.cc @@ -0,0 +1,108 @@ +/* + * Copyright © 2012 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#include "hb.hh" +#include "hb-shaper.hh" +#include "hb-machinery.hh" + + +static const hb_shaper_entry_t all_shapers[] = { +#define HB_SHAPER_IMPLEMENT(name) {#name, _hb_##name##_shape}, +#include "hb-shaper-list.hh" +#undef HB_SHAPER_IMPLEMENT +}; +#ifndef HB_NO_SHAPER +static_assert (0 != ARRAY_LENGTH_CONST (all_shapers), "No shaper enabled."); +#endif + +#if HB_USE_ATEXIT +static void free_static_shapers (); +#endif + +static struct hb_shapers_lazy_loader_t : hb_lazy_loader_t<const hb_shaper_entry_t, + hb_shapers_lazy_loader_t> +{ + static hb_shaper_entry_t *create () + { + char *env = getenv ("HB_SHAPER_LIST"); + if (!env || !*env) + return nullptr; + + hb_shaper_entry_t *shapers = (hb_shaper_entry_t *) calloc (1, sizeof (all_shapers)); + if (unlikely (!shapers)) + return nullptr; + + memcpy (shapers, all_shapers, sizeof (all_shapers)); + + /* Reorder shaper list to prefer requested shapers. */ + unsigned int i = 0; + char *end, *p = env; + for (;;) + { + end = strchr (p, ','); + if (!end) + end = p + strlen (p); + + for (unsigned int j = i; j < ARRAY_LENGTH (all_shapers); j++) + if (end - p == (int) strlen (shapers[j].name) && + 0 == strncmp (shapers[j].name, p, end - p)) + { + /* Reorder this shaper to position i */ + struct hb_shaper_entry_t t = shapers[j]; + memmove (&shapers[i + 1], &shapers[i], sizeof (shapers[i]) * (j - i)); + shapers[i] = t; + i++; + } + + if (!*end) + break; + else + p = end + 1; + } + +#if HB_USE_ATEXIT + atexit (free_static_shapers); +#endif + + return shapers; + } + static void destroy (const hb_shaper_entry_t *p) { free ((void *) p); } + static const hb_shaper_entry_t *get_null () { return all_shapers; } +} static_shapers; + +#if HB_USE_ATEXIT +static +void free_static_shapers () +{ + static_shapers.free_instance (); +} +#endif + +const hb_shaper_entry_t * +_hb_shapers_get () +{ + return static_shapers.get_unconst (); +} diff --git a/thirdparty/harfbuzz/src/hb-shaper.hh b/thirdparty/harfbuzz/src/hb-shaper.hh new file mode 100644 index 0000000000..b4138a324f --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-shaper.hh @@ -0,0 +1,134 @@ +/* + * Copyright © 2012 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_SHAPER_HH +#define HB_SHAPER_HH + +#include "hb.hh" +#include "hb-machinery.hh" + +typedef hb_bool_t hb_shape_func_t (hb_shape_plan_t *shape_plan, + hb_font_t *font, + hb_buffer_t *buffer, + const hb_feature_t *features, + unsigned int num_features); + +#define HB_SHAPER_IMPLEMENT(name) \ + extern "C" HB_INTERNAL hb_shape_func_t _hb_##name##_shape; +#include "hb-shaper-list.hh" +#undef HB_SHAPER_IMPLEMENT + +struct hb_shaper_entry_t { + char name[16]; + hb_shape_func_t *func; +}; + +HB_INTERNAL const hb_shaper_entry_t * +_hb_shapers_get (); + + +template <typename Data, unsigned int WheresData, typename T> +struct hb_shaper_lazy_loader_t; + +#define HB_SHAPER_ORDER(Shaper) \ + HB_PASTE (HB_SHAPER_ORDER_, Shaper) +enum hb_shaper_order_t +{ + _HB_SHAPER_ORDER_ORDER_ZERO, +#define HB_SHAPER_IMPLEMENT(Shaper) \ + HB_SHAPER_ORDER (Shaper), +#include "hb-shaper-list.hh" +#undef HB_SHAPER_IMPLEMENT + _HB_SHAPERS_COUNT_PLUS_ONE, + HB_SHAPERS_COUNT = _HB_SHAPERS_COUNT_PLUS_ONE - 1, +}; + +template <enum hb_shaper_order_t order, typename Object> struct hb_shaper_object_data_type_t; + +#define HB_SHAPER_DATA_SUCCEEDED ((void *) +1) +#define HB_SHAPER_DATA_TYPE(shaper, object) hb_##shaper##_##object##_data_t +#define HB_SHAPER_DATA_CREATE_FUNC(shaper, object) _hb_##shaper##_shaper_##object##_data_create +#define HB_SHAPER_DATA_DESTROY_FUNC(shaper, object) _hb_##shaper##_shaper_##object##_data_destroy + +#define HB_SHAPER_DATA_INSTANTIATE_SHAPERS(shaper, object) \ + \ + struct HB_SHAPER_DATA_TYPE (shaper, object); /* Type forward declaration. */ \ + extern "C" HB_INTERNAL HB_SHAPER_DATA_TYPE (shaper, object) * \ + HB_SHAPER_DATA_CREATE_FUNC (shaper, object) (hb_##object##_t *object); \ + extern "C" HB_INTERNAL void \ + HB_SHAPER_DATA_DESTROY_FUNC (shaper, object) (HB_SHAPER_DATA_TYPE (shaper, object) *shaper##_##object); \ + \ + template <> \ + struct hb_shaper_object_data_type_t<HB_SHAPER_ORDER (shaper), hb_##object##_t> \ + { \ + typedef HB_SHAPER_DATA_TYPE(shaper, object) value; \ + }; \ + \ + template <unsigned int WheresData> \ + struct hb_shaper_lazy_loader_t<hb_##object##_t, WheresData, HB_SHAPER_DATA_TYPE(shaper, object)> \ + : hb_lazy_loader_t<HB_SHAPER_DATA_TYPE(shaper, object), \ + hb_shaper_lazy_loader_t<hb_##object##_t, \ + WheresData, \ + HB_SHAPER_DATA_TYPE(shaper, object)>, \ + hb_##object##_t, WheresData> \ + { \ + typedef HB_SHAPER_DATA_TYPE(shaper, object) Type; \ + static Type* create (hb_##object##_t *data) \ + { return HB_SHAPER_DATA_CREATE_FUNC (shaper, object) (data); } \ + static Type *get_null () { return nullptr; } \ + static void destroy (Type *p) { HB_SHAPER_DATA_DESTROY_FUNC (shaper, object) (p); } \ + }; \ + \ + static_assert (true, "") /* Require semicolon after. */ + + +template <typename Object> +struct hb_shaper_object_dataset_t +{ + void init0 (Object *parent_data) + { + this->parent_data = parent_data; +#define HB_SHAPER_IMPLEMENT(shaper) shaper.init0 (); +#include "hb-shaper-list.hh" +#undef HB_SHAPER_IMPLEMENT + } + void fini () + { +#define HB_SHAPER_IMPLEMENT(shaper) shaper.fini (); +#include "hb-shaper-list.hh" +#undef HB_SHAPER_IMPLEMENT + } + + Object *parent_data; /* MUST be JUST before the lazy loaders. */ +#define HB_SHAPER_IMPLEMENT(shaper) \ + hb_shaper_lazy_loader_t<Object, HB_SHAPER_ORDER(shaper), \ + typename hb_shaper_object_data_type_t<HB_SHAPER_ORDER(shaper), Object>::value \ + > shaper; +#include "hb-shaper-list.hh" +#undef HB_SHAPER_IMPLEMENT +}; + +#endif /* HB_SHAPER_HH */ diff --git a/thirdparty/harfbuzz/src/hb-static.cc b/thirdparty/harfbuzz/src/hb-static.cc new file mode 100644 index 0000000000..f5b7fa50a0 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-static.cc @@ -0,0 +1,112 @@ +/* + * Copyright © 2018 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#include "hb.hh" + +#include "hb-open-type.hh" +#include "hb-face.hh" + +#include "hb-aat-layout-common.hh" +#include "hb-aat-layout-feat-table.hh" +#include "hb-ot-layout-common.hh" +#include "hb-ot-cmap-table.hh" +#include "hb-ot-head-table.hh" +#include "hb-ot-maxp-table.hh" + +#ifndef HB_NO_VISIBILITY +#include "hb-ot-name-language-static.hh" + +uint64_t const _hb_NullPool[(HB_NULL_POOL_SIZE + sizeof (uint64_t) - 1) / sizeof (uint64_t)] = {}; +/*thread_local*/ uint64_t _hb_CrapPool[(HB_NULL_POOL_SIZE + sizeof (uint64_t) - 1) / sizeof (uint64_t)] = {}; + +DEFINE_NULL_NAMESPACE_BYTES (OT, Index) = {0xFF,0xFF}; +DEFINE_NULL_NAMESPACE_BYTES (OT, LangSys) = {0x00,0x00, 0xFF,0xFF, 0x00,0x00}; +DEFINE_NULL_NAMESPACE_BYTES (OT, RangeRecord) = {0x00,0x01, 0x00,0x00, 0x00, 0x00}; +DEFINE_NULL_NAMESPACE_BYTES (OT, CmapSubtableLongGroup) = {0x00,0x00,0x00,0x01, 0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00}; +DEFINE_NULL_NAMESPACE_BYTES (AAT, SettingName) = {0xFF,0xFF, 0xFF,0xFF}; +/* Hand-coded because Lookup is a template. Sad. */ +const unsigned char _hb_Null_AAT_Lookup[2] = {0xFF, 0xFF}; + + + +/* hb_face_t */ + +unsigned int +hb_face_t::load_num_glyphs () const +{ + hb_sanitize_context_t c = hb_sanitize_context_t (); + c.set_num_glyphs (0); /* So we don't recurse ad infinitum. */ + hb_blob_t *maxp_blob = c.reference_table<OT::maxp> (this); + const OT::maxp *maxp_table = maxp_blob->as<OT::maxp> (); + + unsigned int ret = maxp_table->get_num_glyphs (); + num_glyphs.set_relaxed (ret); + hb_blob_destroy (maxp_blob); + return ret; +} + +unsigned int +hb_face_t::load_upem () const +{ + unsigned int ret = table.head->get_upem (); + upem.set_relaxed (ret); + return ret; +} + + +/* hb_user_data_array_t */ + +bool +hb_user_data_array_t::set (hb_user_data_key_t *key, + void * data, + hb_destroy_func_t destroy, + hb_bool_t replace) +{ + if (!key) + return false; + + if (replace) { + if (!data && !destroy) { + items.remove (key, lock); + return true; + } + } + hb_user_data_item_t item = {key, data, destroy}; + bool ret = !!items.replace_or_insert (item, lock, (bool) replace); + + return ret; +} + +void * +hb_user_data_array_t::get (hb_user_data_key_t *key) +{ + hb_user_data_item_t item = {nullptr, nullptr, nullptr}; + + return items.find (key, &item, lock) ? item.data : nullptr; +} + + +#endif diff --git a/thirdparty/harfbuzz/src/hb-string-array.hh b/thirdparty/harfbuzz/src/hb-string-array.hh new file mode 100644 index 0000000000..e7ac119232 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-string-array.hh @@ -0,0 +1,85 @@ +/* + * Copyright © 2017 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_STRING_ARRAY_HH +#if 0 /* Make checks happy. */ +#define HB_STRING_ARRAY_HH +#endif + +#include "hb.hh" + +/* Based on Bruno Haible's code in Appendix B of Ulrich Drepper's dsohowto.pdf: + * https://software.intel.com/sites/default/files/m/a/1/e/dsohowto.pdf */ + +#define HB_STRING_ARRAY_TYPE_NAME HB_PASTE(HB_STRING_ARRAY_NAME, _msgstr_t) +#define HB_STRING_ARRAY_POOL_NAME HB_PASTE(HB_STRING_ARRAY_NAME, _msgstr) +#define HB_STRING_ARRAY_OFFS_NAME HB_PASTE(HB_STRING_ARRAY_NAME, _msgidx) +#define HB_STRING_ARRAY_LENG_NAME HB_PASTE(HB_STRING_ARRAY_NAME, _length) + +static const union HB_STRING_ARRAY_TYPE_NAME { + struct { +/* I like to avoid storing the nul-termination byte since we don't need it, + * but C++ does not allow that. + * https://stackoverflow.com/q/28433862 + */ +#define _S(s) char HB_PASTE (str, __LINE__)[sizeof (s)]; +#include HB_STRING_ARRAY_LIST +#undef _S + } st; + char str[HB_VAR_ARRAY]; +} +HB_STRING_ARRAY_POOL_NAME = +{ + { +#define _S(s) s, +#include HB_STRING_ARRAY_LIST +#undef _S + } +}; +static const unsigned int HB_STRING_ARRAY_OFFS_NAME[] = +{ +#define _S(s) offsetof (union HB_STRING_ARRAY_TYPE_NAME, st.HB_PASTE(str, __LINE__)), +#include HB_STRING_ARRAY_LIST +#undef _S + sizeof (HB_STRING_ARRAY_TYPE_NAME) +}; + +static const unsigned int HB_STRING_ARRAY_LENG_NAME = ARRAY_LENGTH_CONST (HB_STRING_ARRAY_OFFS_NAME) - 1; + +static inline hb_bytes_t +HB_STRING_ARRAY_NAME (unsigned int i) +{ + assert (i < ARRAY_LENGTH (HB_STRING_ARRAY_OFFS_NAME) - 1); + return hb_bytes_t (HB_STRING_ARRAY_POOL_NAME.str + HB_STRING_ARRAY_OFFS_NAME[i], + HB_STRING_ARRAY_OFFS_NAME[i + 1] - HB_STRING_ARRAY_OFFS_NAME[i] - 1); +} + +#undef HB_STRING_ARRAY_TYPE_NAME +#undef HB_STRING_ARRAY_POOL_NAME +#undef HB_STRING_ARRAY_OFFS_NAME +#undef HB_STRING_ARRAY_LENG_NAME + +#endif /* HB_STRING_ARRAY_HH */ diff --git a/thirdparty/harfbuzz/src/hb-style.cc b/thirdparty/harfbuzz/src/hb-style.cc new file mode 100644 index 0000000000..86b9f7da5f --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-style.cc @@ -0,0 +1,135 @@ +/* + * Copyright © 2019 Ebrahim Byagowi + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + */ + +#include "hb.hh" + +#ifndef HB_NO_STYLE +#ifdef HB_EXPERIMENTAL_API + +#include "hb-ot-var-avar-table.hh" +#include "hb-ot-var-fvar-table.hh" +#include "hb-ot-stat-table.hh" +#include "hb-ot-os2-table.hh" +#include "hb-ot-head-table.hh" +#include "hb-ot-post-table.hh" +#include "hb-ot-face.hh" + +/** + * hb_style_tag_t: + * @HB_STYLE_TAG_ITALIC: Used to vary between non-italic and italic. + * A value of 0 can be interpreted as "Roman" (non-italic); a value of 1 can + * be interpreted as (fully) italic. + * @HB_STYLE_TAG_OPTICAL_SIZE: Used to vary design to suit different text sizes. + * Non-zero. Values can be interpreted as text size, in points. + * @HB_STYLE_TAG_SLANT: Used to vary between upright and slanted text. Values + * must be greater than -90 and less than +90. Values can be interpreted as + * the angle, in counter-clockwise degrees, of oblique slant from whatever the + * designer considers to be upright for that font design. + * @HB_STYLE_TAG_WIDTH: Used to vary width of text from narrower to wider. + * Non-zero. Values can be interpreted as a percentage of whatever the font + * designer considers “normal width” for that font design. + * @HB_STYLE_TAG_WEIGHT: Used to vary stroke thicknesses or other design details + * to give variation from lighter to blacker. Values can be interpreted in direct + * comparison to values for usWeightClass in the OS/2 table, + * or the CSS font-weight property. + * + * Defined by https://docs.microsoft.com/en-us/typography/opentype/spec/dvaraxisreg + * + * Since: EXPERIMENTAL + **/ +typedef enum { + HB_STYLE_TAG_ITALIC = HB_TAG ('i','t','a','l'), + HB_STYLE_TAG_OPTICAL_SIZE = HB_TAG ('o','p','s','z'), + HB_STYLE_TAG_SLANT = HB_TAG ('s','l','n','t'), + HB_STYLE_TAG_WIDTH = HB_TAG ('w','d','t','h'), + HB_STYLE_TAG_WEIGHT = HB_TAG ('w','g','h','t'), + + _HB_STYLE_TAG_MAX_VALUE = HB_TAG_MAX_SIGNED /*< skip >*/ +} hb_style_tag_t; + +/** + * hb_style_get_value: + * @font: a #hb_font_t object. + * @style_tag: a style tag. + * + * Searches variation axes of a hb_font_t object for a specific axis first, + * if not set, then tries to get default style values from different + * tables of the font. + * + * Returns: Corresponding axis or default value to a style tag. + * + * Since: EXPERIMENTAL + **/ +float +hb_style_get_value (hb_font_t *font, hb_tag_t tag) +{ + hb_style_tag_t style_tag = (hb_style_tag_t) tag; + hb_face_t *face = font->face; + +#ifndef HB_NO_VAR + hb_ot_var_axis_info_t axis; + if (hb_ot_var_find_axis_info (face, style_tag, &axis)) + { + if (axis.axis_index < font->num_coords) return font->design_coords[axis.axis_index]; + /* If a face is variable, fvar's default_value is better than STAT records */ + return axis.default_value; + } +#endif + + if (style_tag == HB_STYLE_TAG_OPTICAL_SIZE && font->ptem) + return font->ptem; + + /* STAT */ + float value; + if (face->table.STAT->get_value (style_tag, &value)) + return value; + + switch ((unsigned) style_tag) + { + case HB_STYLE_TAG_ITALIC: + return face->table.OS2->is_italic () || face->table.head->is_italic () ? 1 : 0; + case HB_STYLE_TAG_OPTICAL_SIZE: + { + unsigned int lower, upper; + return face->table.OS2->v5 ().get_optical_size (&lower, &upper) + ? (float) (lower + upper) / 2.f + : 12.f; + } + case HB_STYLE_TAG_SLANT: + return face->table.post->table->italicAngle.to_float (); + case HB_STYLE_TAG_WIDTH: + return face->table.OS2->has_data () + ? face->table.OS2->get_width () + : (face->table.head->is_condensed () ? 75 : 100); + case HB_STYLE_TAG_WEIGHT: + return face->table.OS2->has_data () + ? face->table.OS2->usWeightClass + : (face->table.head->is_bold () ? 700 : 400); + default: + return 0; + } +} + +#endif +#endif diff --git a/thirdparty/harfbuzz/src/hb-style.h b/thirdparty/harfbuzz/src/hb-style.h new file mode 100644 index 0000000000..1209c79e94 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-style.h @@ -0,0 +1,43 @@ +/* + * Copyright © 2019 Ebrahim Byagowi + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + */ + +#ifndef HB_H_IN +#error "Include <hb.h> instead." +#endif + +#ifndef HB_STYLE_H +#define HB_STYLE_H + +#include "hb.h" + +HB_BEGIN_DECLS + +#ifdef HB_EXPERIMENTAL_API +HB_EXTERN float +hb_style_get_value (hb_font_t *font, hb_tag_t style_tag); +#endif + +HB_END_DECLS + +#endif /* HB_STYLE_H */ diff --git a/thirdparty/harfbuzz/src/hb-subset-cff-common.cc b/thirdparty/harfbuzz/src/hb-subset-cff-common.cc new file mode 100644 index 0000000000..04e1db24ac --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-subset-cff-common.cc @@ -0,0 +1,227 @@ +/* + * Copyright © 2018 Adobe Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Adobe Author(s): Michiharu Ariza + */ + +#include "hb.hh" + +#ifndef HB_NO_SUBSET_CFF + +#include "hb-ot-cff-common.hh" +#include "hb-ot-cff2-table.hh" +#include "hb-subset-cff-common.hh" + +/* Disable FDSelect format 0 for compatibility with fonttools which doesn't seem choose it. + * Rarely any/much smaller than format 3 anyway. */ +#define CFF_SERIALIZE_FDSELECT_0 0 + +using namespace CFF; + +/** + * hb_plan_subset_cff_fdselect + * Determine an optimal FDSelect format according to a provided plan. + * + * Return value: FDSelect format, size, and ranges for the most compact subset FDSelect + * along with a font index remapping table + **/ + +bool +hb_plan_subset_cff_fdselect (const hb_subset_plan_t *plan, + unsigned int fdCount, + const FDSelect &src, /* IN */ + unsigned int &subset_fd_count /* OUT */, + unsigned int &subset_fdselect_size /* OUT */, + unsigned int &subset_fdselect_format /* OUT */, + hb_vector_t<code_pair_t> &fdselect_ranges /* OUT */, + hb_inc_bimap_t &fdmap /* OUT */) +{ + subset_fd_count = 0; + subset_fdselect_size = 0; + subset_fdselect_format = 0; + unsigned int num_ranges = 0; + + unsigned int subset_num_glyphs = plan->num_output_glyphs (); + if (subset_num_glyphs == 0) + return true; + + { + /* use hb_set to determine the subset of font dicts */ + hb_set_t *set = hb_set_create (); + if (unlikely (set == &Null (hb_set_t))) return false; + hb_codepoint_t prev_fd = CFF_UNDEF_CODE; + for (hb_codepoint_t i = 0; i < subset_num_glyphs; i++) + { + hb_codepoint_t glyph; + hb_codepoint_t fd; + if (!plan->old_gid_for_new_gid (i, &glyph)) + { + /* fonttools retains FDSelect & font dicts for missing glyphs. do the same */ + glyph = i; + } + fd = src.get_fd (glyph); + set->add (fd); + + if (fd != prev_fd) + { + num_ranges++; + prev_fd = fd; + code_pair_t pair = { fd, i }; + fdselect_ranges.push (pair); + } + } + + subset_fd_count = set->get_population (); + if (subset_fd_count == fdCount) + { + /* all font dicts belong to the subset. no need to subset FDSelect & FDArray */ + fdmap.identity (fdCount); + hb_set_destroy (set); + } + else + { + /* create a fdmap */ + fdmap.reset (); + + hb_codepoint_t fd = CFF_UNDEF_CODE; + while (set->next (&fd)) + fdmap.add (fd); + hb_set_destroy (set); + if (unlikely (fdmap.get_population () != subset_fd_count)) + return false; + } + + /* update each font dict index stored as "code" in fdselect_ranges */ + for (unsigned int i = 0; i < fdselect_ranges.length; i++) + fdselect_ranges[i].code = fdmap[fdselect_ranges[i].code]; + } + + /* determine which FDSelect format is most compact */ + if (subset_fd_count > 0xFF) + { + if (unlikely (src.format != 4)) + return false; + subset_fdselect_format = 4; + subset_fdselect_size = FDSelect::min_size + FDSelect4::min_size + FDSelect4_Range::static_size * num_ranges + HBUINT32::static_size; + } + else + { +#if CFF_SERIALIZE_FDSELECT_0 + unsigned int format0_size = FDSelect::min_size + FDSelect0::min_size + HBUINT8::static_size * subset_num_glyphs; +#endif + unsigned int format3_size = FDSelect::min_size + FDSelect3::min_size + FDSelect3_Range::static_size * num_ranges + HBUINT16::static_size; + +#if CFF_SERIALIZE_FDSELECT_0 + if (format0_size <= format3_size) + { + // subset_fdselect_format = 0; + subset_fdselect_size = format0_size; + } + else +#endif + { + subset_fdselect_format = 3; + subset_fdselect_size = format3_size; + } + } + + return true; +} + +template <typename FDSELECT3_4> +static inline bool +serialize_fdselect_3_4 (hb_serialize_context_t *c, + const unsigned int num_glyphs, + const FDSelect &src, + unsigned int size, + const hb_vector_t<code_pair_t> &fdselect_ranges) +{ + TRACE_SERIALIZE (this); + FDSELECT3_4 *p = c->allocate_size<FDSELECT3_4> (size); + if (unlikely (!p)) return_trace (false); + p->nRanges () = fdselect_ranges.length; + for (unsigned int i = 0; i < fdselect_ranges.length; i++) + { + p->ranges[i].first = fdselect_ranges[i].glyph; + p->ranges[i].fd = fdselect_ranges[i].code; + } + p->sentinel () = num_glyphs; + return_trace (true); +} + +/** + * hb_serialize_cff_fdselect + * Serialize a subset FDSelect format planned above. + **/ +bool +hb_serialize_cff_fdselect (hb_serialize_context_t *c, + const unsigned int num_glyphs, + const FDSelect &src, + unsigned int fd_count, + unsigned int fdselect_format, + unsigned int size, + const hb_vector_t<code_pair_t> &fdselect_ranges) +{ + TRACE_SERIALIZE (this); + FDSelect *p = c->allocate_min<FDSelect> (); + if (unlikely (!p)) return_trace (false); + p->format = fdselect_format; + size -= FDSelect::min_size; + + switch (fdselect_format) + { +#if CFF_SERIALIZE_FDSELECT_0 + case 0: + { + FDSelect0 *p = c->allocate_size<FDSelect0> (size); + if (unlikely (!p)) return_trace (false); + unsigned int range_index = 0; + unsigned int fd = fdselect_ranges[range_index++].code; + for (unsigned int i = 0; i < num_glyphs; i++) + { + if ((range_index < fdselect_ranges.len) && + (i >= fdselect_ranges[range_index].glyph)) + { + fd = fdselect_ranges[range_index++].code; + } + p->fds[i] = fd; + } + return_trace (true); + } +#endif /* CFF_SERIALIZE_FDSELECT_0 */ + + case 3: + return serialize_fdselect_3_4<FDSelect3> (c, num_glyphs, src, + size, fdselect_ranges); + + case 4: + return serialize_fdselect_3_4<FDSelect4> (c, num_glyphs, src, + size, fdselect_ranges); + + default: + return_trace (false); + } +} + + +#endif diff --git a/thirdparty/harfbuzz/src/hb-subset-cff-common.hh b/thirdparty/harfbuzz/src/hb-subset-cff-common.hh new file mode 100644 index 0000000000..422b20b8d0 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-subset-cff-common.hh @@ -0,0 +1,989 @@ +/* + * Copyright © 2018 Adobe Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Adobe Author(s): Michiharu Ariza + */ + +#ifndef HB_SUBSET_CFF_COMMON_HH +#define HB_SUBSET_CFF_COMMON_HH + +#include "hb.hh" + +#include "hb-subset-plan.hh" +#include "hb-cff-interp-cs-common.hh" + +namespace CFF { + +/* Used for writing a temporary charstring */ +struct str_encoder_t +{ + str_encoder_t (str_buff_t &buff_) + : buff (buff_), error (false) {} + + void reset () { buff.resize (0); } + + void encode_byte (unsigned char b) + { + if (unlikely (buff.push (b) == &Crap (unsigned char))) + set_error (); + } + + void encode_int (int v) + { + if ((-1131 <= v) && (v <= 1131)) + { + if ((-107 <= v) && (v <= 107)) + encode_byte (v + 139); + else if (v > 0) + { + v -= 108; + encode_byte ((v >> 8) + OpCode_TwoBytePosInt0); + encode_byte (v & 0xFF); + } + else + { + v = -v - 108; + encode_byte ((v >> 8) + OpCode_TwoByteNegInt0); + encode_byte (v & 0xFF); + } + } + else + { + if (unlikely (v < -32768)) + v = -32768; + else if (unlikely (v > 32767)) + v = 32767; + encode_byte (OpCode_shortint); + encode_byte ((v >> 8) & 0xFF); + encode_byte (v & 0xFF); + } + } + + void encode_num (const number_t& n) + { + if (n.in_int_range ()) + { + encode_int (n.to_int ()); + } + else + { + int32_t v = n.to_fixed (); + encode_byte (OpCode_fixedcs); + encode_byte ((v >> 24) & 0xFF); + encode_byte ((v >> 16) & 0xFF); + encode_byte ((v >> 8) & 0xFF); + encode_byte (v & 0xFF); + } + } + + void encode_op (op_code_t op) + { + if (Is_OpCode_ESC (op)) + { + encode_byte (OpCode_escape); + encode_byte (Unmake_OpCode_ESC (op)); + } + else + encode_byte (op); + } + + void copy_str (const byte_str_t &str) + { + unsigned int offset = buff.length; + if (unlikely (!buff.resize (offset + str.length))) + { + set_error (); + return; + } + if (unlikely (buff.length < offset + str.length)) + { + set_error (); + return; + } + memcpy (&buff[offset], &str[0], str.length); + } + + bool is_error () const { return error; } + + protected: + void set_error () { error = true; } + + str_buff_t &buff; + bool error; +}; + +struct cff_sub_table_info_t { + cff_sub_table_info_t () + : fd_array_link (0), + char_strings_link (0) + { + fd_select.init (); + } + + table_info_t fd_select; + objidx_t fd_array_link; + objidx_t char_strings_link; +}; + +template <typename OPSTR=op_str_t> +struct cff_top_dict_op_serializer_t : op_serializer_t +{ + bool serialize (hb_serialize_context_t *c, + const OPSTR &opstr, + const cff_sub_table_info_t &info) const + { + TRACE_SERIALIZE (this); + + switch (opstr.op) + { + case OpCode_CharStrings: + return_trace (FontDict::serialize_link4_op(c, opstr.op, info.char_strings_link, whence_t::Absolute)); + + case OpCode_FDArray: + return_trace (FontDict::serialize_link4_op(c, opstr.op, info.fd_array_link, whence_t::Absolute)); + + case OpCode_FDSelect: + return_trace (FontDict::serialize_link4_op(c, opstr.op, info.fd_select.link, whence_t::Absolute)); + + default: + return_trace (copy_opstr (c, opstr)); + } + return_trace (true); + } +}; + +struct cff_font_dict_op_serializer_t : op_serializer_t +{ + bool serialize (hb_serialize_context_t *c, + const op_str_t &opstr, + const table_info_t &privateDictInfo) const + { + TRACE_SERIALIZE (this); + + if (opstr.op == OpCode_Private) + { + /* serialize the private dict size & offset as 2-byte & 4-byte integers */ + return_trace (UnsizedByteStr::serialize_int2 (c, privateDictInfo.size) && + Dict::serialize_link4_op (c, opstr.op, privateDictInfo.link, whence_t::Absolute)); + } + else + { + HBUINT8 *d = c->allocate_size<HBUINT8> (opstr.str.length); + if (unlikely (!d)) return_trace (false); + memcpy (d, &opstr.str[0], opstr.str.length); + } + return_trace (true); + } +}; + +struct cff_private_dict_op_serializer_t : op_serializer_t +{ + cff_private_dict_op_serializer_t (bool desubroutinize_, bool drop_hints_) + : desubroutinize (desubroutinize_), drop_hints (drop_hints_) {} + + bool serialize (hb_serialize_context_t *c, + const op_str_t &opstr, + objidx_t subrs_link) const + { + TRACE_SERIALIZE (this); + + if (drop_hints && dict_opset_t::is_hint_op (opstr.op)) + return true; + if (opstr.op == OpCode_Subrs) + { + if (desubroutinize || !subrs_link) + return_trace (true); + else + return_trace (FontDict::serialize_link2_op (c, opstr.op, subrs_link)); + } + else + return_trace (copy_opstr (c, opstr)); + } + + protected: + const bool desubroutinize; + const bool drop_hints; +}; + +struct flatten_param_t +{ + str_buff_t &flatStr; + bool drop_hints; +}; + +template <typename ACC, typename ENV, typename OPSET, op_code_t endchar_op=OpCode_Invalid> +struct subr_flattener_t +{ + subr_flattener_t (const ACC &acc_, + const hb_subset_plan_t *plan_) + : acc (acc_), plan (plan_) {} + + bool flatten (str_buff_vec_t &flat_charstrings) + { + if (!flat_charstrings.resize (plan->num_output_glyphs ())) + return false; + for (unsigned int i = 0; i < plan->num_output_glyphs (); i++) + flat_charstrings[i].init (); + for (unsigned int i = 0; i < plan->num_output_glyphs (); i++) + { + hb_codepoint_t glyph; + if (!plan->old_gid_for_new_gid (i, &glyph)) + { + /* add an endchar only charstring for a missing glyph if CFF1 */ + if (endchar_op != OpCode_Invalid) flat_charstrings[i].push (endchar_op); + continue; + } + const byte_str_t str = (*acc.charStrings)[glyph]; + unsigned int fd = acc.fdSelect->get_fd (glyph); + if (unlikely (fd >= acc.fdCount)) + return false; + cs_interpreter_t<ENV, OPSET, flatten_param_t> interp; + interp.env.init (str, acc, fd); + flatten_param_t param = { flat_charstrings[i], plan->drop_hints }; + if (unlikely (!interp.interpret (param))) + return false; + } + return true; + } + + const ACC &acc; + const hb_subset_plan_t *plan; +}; + +struct subr_closures_t +{ + subr_closures_t () : valid (false), global_closure (nullptr) + { local_closures.init (); } + + void init (unsigned int fd_count) + { + valid = true; + global_closure = hb_set_create (); + if (global_closure == hb_set_get_empty ()) + valid = false; + if (!local_closures.resize (fd_count)) + valid = false; + + for (unsigned int i = 0; i < local_closures.length; i++) + { + local_closures[i] = hb_set_create (); + if (local_closures[i] == hb_set_get_empty ()) + valid = false; + } + } + + void fini () + { + hb_set_destroy (global_closure); + for (unsigned int i = 0; i < local_closures.length; i++) + hb_set_destroy (local_closures[i]); + local_closures.fini (); + } + + void reset () + { + hb_set_clear (global_closure); + for (unsigned int i = 0; i < local_closures.length; i++) + hb_set_clear (local_closures[i]); + } + + bool is_valid () const { return valid; } + bool valid; + hb_set_t *global_closure; + hb_vector_t<hb_set_t *> local_closures; +}; + +struct parsed_cs_op_t : op_str_t +{ + void init (unsigned int subr_num_ = 0) + { + op_str_t::init (); + subr_num = subr_num_; + drop_flag = false; + keep_flag = false; + skip_flag = false; + } + + void fini () { op_str_t::fini (); } + + bool for_drop () const { return drop_flag; } + void set_drop () { if (!for_keep ()) drop_flag = true; } + + bool for_keep () const { return keep_flag; } + void set_keep () { keep_flag = true; } + + bool for_skip () const { return skip_flag; } + void set_skip () { skip_flag = true; } + + unsigned int subr_num; + + protected: + bool drop_flag : 1; + bool keep_flag : 1; + bool skip_flag : 1; +}; + +struct parsed_cs_str_t : parsed_values_t<parsed_cs_op_t> +{ + void init () + { + SUPER::init (); + parsed = false; + hint_dropped = false; + has_prefix_ = false; + } + + void add_op (op_code_t op, const byte_str_ref_t& str_ref) + { + if (!is_parsed ()) + SUPER::add_op (op, str_ref); + } + + void add_call_op (op_code_t op, const byte_str_ref_t& str_ref, unsigned int subr_num) + { + if (!is_parsed ()) + { + unsigned int parsed_len = get_count (); + if (likely (parsed_len > 0)) + values[parsed_len-1].set_skip (); + + parsed_cs_op_t val; + val.init (subr_num); + SUPER::add_op (op, str_ref, val); + } + } + + void set_prefix (const number_t &num, op_code_t op = OpCode_Invalid) + { + has_prefix_ = true; + prefix_op_ = op; + prefix_num_ = num; + } + + bool at_end (unsigned int pos) const + { + return ((pos + 1 >= values.length) /* CFF2 */ + || (values[pos + 1].op == OpCode_return)); + } + + bool is_parsed () const { return parsed; } + void set_parsed () { parsed = true; } + + bool is_hint_dropped () const { return hint_dropped; } + void set_hint_dropped () { hint_dropped = true; } + + bool is_vsindex_dropped () const { return vsindex_dropped; } + void set_vsindex_dropped () { vsindex_dropped = true; } + + bool has_prefix () const { return has_prefix_; } + op_code_t prefix_op () const { return prefix_op_; } + const number_t &prefix_num () const { return prefix_num_; } + + protected: + bool parsed; + bool hint_dropped; + bool vsindex_dropped; + bool has_prefix_; + op_code_t prefix_op_; + number_t prefix_num_; + + private: + typedef parsed_values_t<parsed_cs_op_t> SUPER; +}; + +struct parsed_cs_str_vec_t : hb_vector_t<parsed_cs_str_t> +{ + void init (unsigned int len_ = 0) + { + SUPER::init (); + if (unlikely (!resize (len_))) + return; + for (unsigned int i = 0; i < length; i++) + (*this)[i].init (); + } + void fini () { SUPER::fini_deep (); } + + private: + typedef hb_vector_t<parsed_cs_str_t> SUPER; +}; + +struct subr_subset_param_t +{ + void init (parsed_cs_str_t *parsed_charstring_, + parsed_cs_str_vec_t *parsed_global_subrs_, parsed_cs_str_vec_t *parsed_local_subrs_, + hb_set_t *global_closure_, hb_set_t *local_closure_, + bool drop_hints_) + { + parsed_charstring = parsed_charstring_; + current_parsed_str = parsed_charstring; + parsed_global_subrs = parsed_global_subrs_; + parsed_local_subrs = parsed_local_subrs_; + global_closure = global_closure_; + local_closure = local_closure_; + drop_hints = drop_hints_; + } + + parsed_cs_str_t *get_parsed_str_for_context (call_context_t &context) + { + switch (context.type) + { + case CSType_CharString: + return parsed_charstring; + + case CSType_LocalSubr: + if (likely (context.subr_num < parsed_local_subrs->length)) + return &(*parsed_local_subrs)[context.subr_num]; + break; + + case CSType_GlobalSubr: + if (likely (context.subr_num < parsed_global_subrs->length)) + return &(*parsed_global_subrs)[context.subr_num]; + break; + } + return nullptr; + } + + template <typename ENV> + void set_current_str (ENV &env, bool calling) + { + parsed_cs_str_t *parsed_str = get_parsed_str_for_context (env.context); + if (unlikely (!parsed_str)) + { + env.set_error (); + return; + } + /* If the called subroutine is parsed partially but not completely yet, + * it must be because we are calling it recursively. + * Handle it as an error. */ + if (unlikely (calling && !parsed_str->is_parsed () && (parsed_str->values.length > 0))) + env.set_error (); + else + current_parsed_str = parsed_str; + } + + parsed_cs_str_t *current_parsed_str; + + parsed_cs_str_t *parsed_charstring; + parsed_cs_str_vec_t *parsed_global_subrs; + parsed_cs_str_vec_t *parsed_local_subrs; + hb_set_t *global_closure; + hb_set_t *local_closure; + bool drop_hints; +}; + +struct subr_remap_t : hb_inc_bimap_t +{ + void create (hb_set_t *closure) + { + /* create a remapping of subroutine numbers from old to new. + * no optimization based on usage counts. fonttools doesn't appear doing that either. + */ + + hb_codepoint_t old_num = HB_SET_VALUE_INVALID; + while (hb_set_next (closure, &old_num)) + add (old_num); + + if (get_population () < 1240) + bias = 107; + else if (get_population () < 33900) + bias = 1131; + else + bias = 32768; + } + + int biased_num (unsigned int old_num) const + { + hb_codepoint_t new_num = get (old_num); + return (int)new_num - bias; + } + + protected: + int bias; +}; + +struct subr_remaps_t +{ + subr_remaps_t () + { + global_remap.init (); + local_remaps.init (); + } + + ~subr_remaps_t () { fini (); } + + void init (unsigned int fdCount) + { + if (unlikely (!local_remaps.resize (fdCount))) return; + for (unsigned int i = 0; i < fdCount; i++) + local_remaps[i].init (); + } + + bool in_error() + { + return local_remaps.in_error (); + } + + void create (subr_closures_t& closures) + { + global_remap.create (closures.global_closure); + for (unsigned int i = 0; i < local_remaps.length; i++) + local_remaps[i].create (closures.local_closures[i]); + } + + void fini () + { + global_remap.fini (); + local_remaps.fini_deep (); + } + + subr_remap_t global_remap; + hb_vector_t<subr_remap_t> local_remaps; +}; + +template <typename SUBSETTER, typename SUBRS, typename ACC, typename ENV, typename OPSET, op_code_t endchar_op=OpCode_Invalid> +struct subr_subsetter_t +{ + subr_subsetter_t (ACC &acc_, const hb_subset_plan_t *plan_) + : acc (acc_), plan (plan_) + { + parsed_charstrings.init (); + parsed_global_subrs.init (); + parsed_local_subrs.init (); + } + + ~subr_subsetter_t () + { + closures.fini (); + remaps.fini (); + parsed_charstrings.fini_deep (); + parsed_global_subrs.fini_deep (); + parsed_local_subrs.fini_deep (); + } + + /* Subroutine subsetting with --no-desubroutinize runs in phases: + * + * 1. execute charstrings/subroutines to determine subroutine closures + * 2. parse out all operators and numbers + * 3. mark hint operators and operands for removal if --no-hinting + * 4. re-encode all charstrings and subroutines with new subroutine numbers + * + * Phases #1 and #2 are done at the same time in collect_subrs (). + * Phase #3 walks charstrings/subroutines forward then backward (hence parsing required), + * because we can't tell if a number belongs to a hint op until we see the first moveto. + * + * Assumption: a callsubr/callgsubr operator must immediately follow a (biased) subroutine number + * within the same charstring/subroutine, e.g., not split across a charstring and a subroutine. + */ + bool subset (void) + { + closures.init (acc.fdCount); + remaps.init (acc.fdCount); + + parsed_charstrings.init (plan->num_output_glyphs ()); + parsed_global_subrs.init (acc.globalSubrs->count); + + if (unlikely (remaps.in_error() + || parsed_charstrings.in_error () + || parsed_global_subrs.in_error ())) { + return false; + } + + if (unlikely (!parsed_local_subrs.resize (acc.fdCount))) return false; + + for (unsigned int i = 0; i < acc.fdCount; i++) + { + parsed_local_subrs[i].init (acc.privateDicts[i].localSubrs->count); + if (unlikely (parsed_local_subrs[i].in_error ())) return false; + } + if (unlikely (!closures.valid)) + return false; + + /* phase 1 & 2 */ + for (unsigned int i = 0; i < plan->num_output_glyphs (); i++) + { + hb_codepoint_t glyph; + if (!plan->old_gid_for_new_gid (i, &glyph)) + continue; + const byte_str_t str = (*acc.charStrings)[glyph]; + unsigned int fd = acc.fdSelect->get_fd (glyph); + if (unlikely (fd >= acc.fdCount)) + return false; + + cs_interpreter_t<ENV, OPSET, subr_subset_param_t> interp; + interp.env.init (str, acc, fd); + + subr_subset_param_t param; + param.init (&parsed_charstrings[i], + &parsed_global_subrs, &parsed_local_subrs[fd], + closures.global_closure, closures.local_closures[fd], + plan->drop_hints); + + if (unlikely (!interp.interpret (param))) + return false; + + /* complete parsed string esp. copy CFF1 width or CFF2 vsindex to the parsed charstring for encoding */ + SUBSETTER::complete_parsed_str (interp.env, param, parsed_charstrings[i]); + } + + if (plan->drop_hints) + { + /* mark hint ops and arguments for drop */ + for (unsigned int i = 0; i < plan->num_output_glyphs (); i++) + { + hb_codepoint_t glyph; + if (!plan->old_gid_for_new_gid (i, &glyph)) + continue; + unsigned int fd = acc.fdSelect->get_fd (glyph); + if (unlikely (fd >= acc.fdCount)) + return false; + subr_subset_param_t param; + param.init (&parsed_charstrings[i], + &parsed_global_subrs, &parsed_local_subrs[fd], + closures.global_closure, closures.local_closures[fd], + plan->drop_hints); + + drop_hints_param_t drop; + if (drop_hints_in_str (parsed_charstrings[i], param, drop)) + { + parsed_charstrings[i].set_hint_dropped (); + if (drop.vsindex_dropped) + parsed_charstrings[i].set_vsindex_dropped (); + } + } + + /* after dropping hints recreate closures of actually used subrs */ + closures.reset (); + for (unsigned int i = 0; i < plan->num_output_glyphs (); i++) + { + hb_codepoint_t glyph; + if (!plan->old_gid_for_new_gid (i, &glyph)) + continue; + unsigned int fd = acc.fdSelect->get_fd (glyph); + if (unlikely (fd >= acc.fdCount)) + return false; + subr_subset_param_t param; + param.init (&parsed_charstrings[i], + &parsed_global_subrs, &parsed_local_subrs[fd], + closures.global_closure, closures.local_closures[fd], + plan->drop_hints); + collect_subr_refs_in_str (parsed_charstrings[i], param); + } + } + + remaps.create (closures); + + return true; + } + + bool encode_charstrings (str_buff_vec_t &buffArray) const + { + if (unlikely (!buffArray.resize (plan->num_output_glyphs ()))) + return false; + for (unsigned int i = 0; i < plan->num_output_glyphs (); i++) + { + hb_codepoint_t glyph; + if (!plan->old_gid_for_new_gid (i, &glyph)) + { + /* add an endchar only charstring for a missing glyph if CFF1 */ + if (endchar_op != OpCode_Invalid) buffArray[i].push (endchar_op); + continue; + } + unsigned int fd = acc.fdSelect->get_fd (glyph); + if (unlikely (fd >= acc.fdCount)) + return false; + if (unlikely (!encode_str (parsed_charstrings[i], fd, buffArray[i]))) + return false; + } + return true; + } + + bool encode_subrs (const parsed_cs_str_vec_t &subrs, const subr_remap_t& remap, unsigned int fd, str_buff_vec_t &buffArray) const + { + unsigned int count = remap.get_population (); + + if (unlikely (!buffArray.resize (count))) + return false; + for (unsigned int old_num = 0; old_num < subrs.length; old_num++) + { + hb_codepoint_t new_num = remap[old_num]; + if (new_num != CFF_UNDEF_CODE) + { + if (unlikely (!encode_str (subrs[old_num], fd, buffArray[new_num]))) + return false; + } + } + return true; + } + + bool encode_globalsubrs (str_buff_vec_t &buffArray) + { + return encode_subrs (parsed_global_subrs, remaps.global_remap, 0, buffArray); + } + + bool encode_localsubrs (unsigned int fd, str_buff_vec_t &buffArray) const + { + return encode_subrs (parsed_local_subrs[fd], remaps.local_remaps[fd], fd, buffArray); + } + + protected: + struct drop_hints_param_t + { + drop_hints_param_t () + : seen_moveto (false), + ends_in_hint (false), + all_dropped (false), + vsindex_dropped (false) {} + + bool seen_moveto; + bool ends_in_hint; + bool all_dropped; + bool vsindex_dropped; + }; + + bool drop_hints_in_subr (parsed_cs_str_t &str, unsigned int pos, + parsed_cs_str_vec_t &subrs, unsigned int subr_num, + const subr_subset_param_t ¶m, drop_hints_param_t &drop) + { + drop.ends_in_hint = false; + bool has_hint = drop_hints_in_str (subrs[subr_num], param, drop); + + /* if this subr ends with a stem hint (i.e., not a number; potential argument for moveto), + * then this entire subroutine must be a hint. drop its call. */ + if (drop.ends_in_hint) + { + str.values[pos].set_drop (); + /* if this subr call is at the end of the parent subr, propagate the flag + * otherwise reset the flag */ + if (!str.at_end (pos)) + drop.ends_in_hint = false; + } + else if (drop.all_dropped) + { + str.values[pos].set_drop (); + } + + return has_hint; + } + + /* returns true if it sees a hint op before the first moveto */ + bool drop_hints_in_str (parsed_cs_str_t &str, const subr_subset_param_t ¶m, drop_hints_param_t &drop) + { + bool seen_hint = false; + + for (unsigned int pos = 0; pos < str.values.length; pos++) + { + bool has_hint = false; + switch (str.values[pos].op) + { + case OpCode_callsubr: + has_hint = drop_hints_in_subr (str, pos, + *param.parsed_local_subrs, str.values[pos].subr_num, + param, drop); + break; + + case OpCode_callgsubr: + has_hint = drop_hints_in_subr (str, pos, + *param.parsed_global_subrs, str.values[pos].subr_num, + param, drop); + break; + + case OpCode_rmoveto: + case OpCode_hmoveto: + case OpCode_vmoveto: + drop.seen_moveto = true; + break; + + case OpCode_hintmask: + case OpCode_cntrmask: + if (drop.seen_moveto) + { + str.values[pos].set_drop (); + break; + } + HB_FALLTHROUGH; + + case OpCode_hstemhm: + case OpCode_vstemhm: + case OpCode_hstem: + case OpCode_vstem: + has_hint = true; + str.values[pos].set_drop (); + if (str.at_end (pos)) + drop.ends_in_hint = true; + break; + + case OpCode_dotsection: + str.values[pos].set_drop (); + break; + + default: + /* NONE */ + break; + } + if (has_hint) + { + for (int i = pos - 1; i >= 0; i--) + { + parsed_cs_op_t &csop = str.values[(unsigned)i]; + if (csop.for_drop ()) + break; + csop.set_drop (); + if (csop.op == OpCode_vsindexcs) + drop.vsindex_dropped = true; + } + seen_hint |= has_hint; + } + } + + /* Raise all_dropped flag if all operators except return are dropped from a subr. + * It may happen even after seeing the first moveto if a subr contains + * only (usually one) hintmask operator, then calls to this subr can be dropped. + */ + drop.all_dropped = true; + for (unsigned int pos = 0; pos < str.values.length; pos++) + { + parsed_cs_op_t &csop = str.values[pos]; + if (csop.op == OpCode_return) + break; + if (!csop.for_drop ()) + { + drop.all_dropped = false; + break; + } + } + + return seen_hint; + } + + void collect_subr_refs_in_subr (parsed_cs_str_t &str, unsigned int pos, + unsigned int subr_num, parsed_cs_str_vec_t &subrs, + hb_set_t *closure, + const subr_subset_param_t ¶m) + { + closure->add (subr_num); + collect_subr_refs_in_str (subrs[subr_num], param); + } + + void collect_subr_refs_in_str (parsed_cs_str_t &str, const subr_subset_param_t ¶m) + { + for (unsigned int pos = 0; pos < str.values.length; pos++) + { + if (!str.values[pos].for_drop ()) + { + switch (str.values[pos].op) + { + case OpCode_callsubr: + collect_subr_refs_in_subr (str, pos, + str.values[pos].subr_num, *param.parsed_local_subrs, + param.local_closure, param); + break; + + case OpCode_callgsubr: + collect_subr_refs_in_subr (str, pos, + str.values[pos].subr_num, *param.parsed_global_subrs, + param.global_closure, param); + break; + + default: break; + } + } + } + } + + bool encode_str (const parsed_cs_str_t &str, const unsigned int fd, str_buff_t &buff) const + { + buff.init (); + str_encoder_t encoder (buff); + encoder.reset (); + /* if a prefix (CFF1 width or CFF2 vsindex) has been removed along with hints, + * re-insert it at the beginning of charstreing */ + if (str.has_prefix () && str.is_hint_dropped ()) + { + encoder.encode_num (str.prefix_num ()); + if (str.prefix_op () != OpCode_Invalid) + encoder.encode_op (str.prefix_op ()); + } + for (unsigned int i = 0; i < str.get_count(); i++) + { + const parsed_cs_op_t &opstr = str.values[i]; + if (!opstr.for_drop () && !opstr.for_skip ()) + { + switch (opstr.op) + { + case OpCode_callsubr: + encoder.encode_int (remaps.local_remaps[fd].biased_num (opstr.subr_num)); + encoder.encode_op (OpCode_callsubr); + break; + + case OpCode_callgsubr: + encoder.encode_int (remaps.global_remap.biased_num (opstr.subr_num)); + encoder.encode_op (OpCode_callgsubr); + break; + + default: + encoder.copy_str (opstr.str); + break; + } + } + } + return !encoder.is_error (); + } + + protected: + const ACC &acc; + const hb_subset_plan_t *plan; + + subr_closures_t closures; + + parsed_cs_str_vec_t parsed_charstrings; + parsed_cs_str_vec_t parsed_global_subrs; + hb_vector_t<parsed_cs_str_vec_t> parsed_local_subrs; + + subr_remaps_t remaps; + + private: + typedef typename SUBRS::count_type subr_count_type; +}; + +} /* namespace CFF */ + +HB_INTERNAL bool +hb_plan_subset_cff_fdselect (const hb_subset_plan_t *plan, + unsigned int fdCount, + const CFF::FDSelect &src, /* IN */ + unsigned int &subset_fd_count /* OUT */, + unsigned int &subset_fdselect_size /* OUT */, + unsigned int &subset_fdselect_format /* OUT */, + hb_vector_t<CFF::code_pair_t> &fdselect_ranges /* OUT */, + hb_inc_bimap_t &fdmap /* OUT */); + +HB_INTERNAL bool +hb_serialize_cff_fdselect (hb_serialize_context_t *c, + unsigned int num_glyphs, + const CFF::FDSelect &src, + unsigned int fd_count, + unsigned int fdselect_format, + unsigned int size, + const hb_vector_t<CFF::code_pair_t> &fdselect_ranges); + +#endif /* HB_SUBSET_CFF_COMMON_HH */ diff --git a/thirdparty/harfbuzz/src/hb-subset-cff1.cc b/thirdparty/harfbuzz/src/hb-subset-cff1.cc new file mode 100644 index 0000000000..df322f8451 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-subset-cff1.cc @@ -0,0 +1,940 @@ +/* + * Copyright © 2018 Adobe Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Adobe Author(s): Michiharu Ariza + */ + +#include "hb.hh" + +#ifndef HB_NO_SUBSET_CFF + +#include "hb-open-type.hh" +#include "hb-ot-cff1-table.hh" +#include "hb-set.h" +#include "hb-bimap.hh" +#include "hb-subset-cff1.hh" +#include "hb-subset-plan.hh" +#include "hb-subset-cff-common.hh" +#include "hb-cff1-interp-cs.hh" + +using namespace CFF; + +struct remap_sid_t : hb_inc_bimap_t +{ + unsigned int add (unsigned int sid) + { + if ((sid != CFF_UNDEF_SID) && !is_std_std (sid)) + return offset_sid (hb_inc_bimap_t::add (unoffset_sid (sid))); + else + return sid; + } + + unsigned int operator[] (unsigned int sid) const + { + if (is_std_std (sid) || (sid == CFF_UNDEF_SID)) + return sid; + else + return offset_sid (get (unoffset_sid (sid))); + } + + static const unsigned int num_std_strings = 391; + + static bool is_std_std (unsigned int sid) { return sid < num_std_strings; } + static unsigned int offset_sid (unsigned int sid) { return sid + num_std_strings; } + static unsigned int unoffset_sid (unsigned int sid) { return sid - num_std_strings; } +}; + +struct cff1_sub_table_info_t : cff_sub_table_info_t +{ + cff1_sub_table_info_t () + : cff_sub_table_info_t (), + encoding_link (0), + charset_link (0) + { + privateDictInfo.init (); + } + + objidx_t encoding_link; + objidx_t charset_link; + table_info_t privateDictInfo; +}; + +/* a copy of a parsed out cff1_top_dict_values_t augmented with additional operators */ +struct cff1_top_dict_values_mod_t : cff1_top_dict_values_t +{ + void init (const cff1_top_dict_values_t *base_= &Null (cff1_top_dict_values_t)) + { + SUPER::init (); + base = base_; + } + + void fini () { SUPER::fini (); } + + unsigned get_count () const { return base->get_count () + SUPER::get_count (); } + const cff1_top_dict_val_t &get_value (unsigned int i) const + { + if (i < base->get_count ()) + return (*base)[i]; + else + return SUPER::values[i - base->get_count ()]; + } + const cff1_top_dict_val_t &operator [] (unsigned int i) const { return get_value (i); } + + void reassignSIDs (const remap_sid_t& sidmap) + { + for (unsigned int i = 0; i < name_dict_values_t::ValCount; i++) + nameSIDs[i] = sidmap[base->nameSIDs[i]]; + } + + protected: + typedef cff1_top_dict_values_t SUPER; + const cff1_top_dict_values_t *base; +}; + +struct top_dict_modifiers_t +{ + top_dict_modifiers_t (const cff1_sub_table_info_t &info_, + const unsigned int (&nameSIDs_)[name_dict_values_t::ValCount]) + : info (info_), + nameSIDs (nameSIDs_) + {} + + const cff1_sub_table_info_t &info; + const unsigned int (&nameSIDs)[name_dict_values_t::ValCount]; +}; + +struct cff1_top_dict_op_serializer_t : cff_top_dict_op_serializer_t<cff1_top_dict_val_t> +{ + bool serialize (hb_serialize_context_t *c, + const cff1_top_dict_val_t &opstr, + const top_dict_modifiers_t &mod) const + { + TRACE_SERIALIZE (this); + + op_code_t op = opstr.op; + switch (op) + { + case OpCode_charset: + if (mod.info.charset_link) + return_trace (FontDict::serialize_link4_op(c, op, mod.info.charset_link, whence_t::Absolute)); + else + goto fall_back; + + case OpCode_Encoding: + if (mod.info.encoding_link) + return_trace (FontDict::serialize_link4_op(c, op, mod.info.encoding_link, whence_t::Absolute)); + else + goto fall_back; + + case OpCode_Private: + return_trace (UnsizedByteStr::serialize_int2 (c, mod.info.privateDictInfo.size) && + Dict::serialize_link4_op (c, op, mod.info.privateDictInfo.link, whence_t::Absolute)); + + case OpCode_version: + case OpCode_Notice: + case OpCode_Copyright: + case OpCode_FullName: + case OpCode_FamilyName: + case OpCode_Weight: + case OpCode_PostScript: + case OpCode_BaseFontName: + case OpCode_FontName: + return_trace (FontDict::serialize_int2_op (c, op, mod.nameSIDs[name_dict_values_t::name_op_to_index (op)])); + + case OpCode_ROS: + { + /* for registry & ordering, reassigned SIDs are serialized + * for supplement, the original byte string is copied along with the op code */ + op_str_t supp_op; + supp_op.op = op; + if ( unlikely (!(opstr.str.length >= opstr.last_arg_offset + 3))) + return_trace (false); + supp_op.str = byte_str_t (&opstr.str + opstr.last_arg_offset, opstr.str.length - opstr.last_arg_offset); + return_trace (UnsizedByteStr::serialize_int2 (c, mod.nameSIDs[name_dict_values_t::registry]) && + UnsizedByteStr::serialize_int2 (c, mod.nameSIDs[name_dict_values_t::ordering]) && + copy_opstr (c, supp_op)); + } + fall_back: + default: + return_trace (cff_top_dict_op_serializer_t<cff1_top_dict_val_t>::serialize (c, opstr, mod.info)); + } + return_trace (true); + } + +}; + +struct cff1_font_dict_op_serializer_t : cff_font_dict_op_serializer_t +{ + bool serialize (hb_serialize_context_t *c, + const op_str_t &opstr, + const cff1_font_dict_values_mod_t &mod) const + { + TRACE_SERIALIZE (this); + + if (opstr.op == OpCode_FontName) + return_trace (FontDict::serialize_int2_op (c, opstr.op, mod.fontName)); + else + return_trace (SUPER::serialize (c, opstr, mod.privateDictInfo)); + } + + private: + typedef cff_font_dict_op_serializer_t SUPER; +}; + +struct cff1_cs_opset_flatten_t : cff1_cs_opset_t<cff1_cs_opset_flatten_t, flatten_param_t> +{ + static void flush_args_and_op (op_code_t op, cff1_cs_interp_env_t &env, flatten_param_t& param) + { + if (env.arg_start > 0) + flush_width (env, param); + + switch (op) + { + case OpCode_hstem: + case OpCode_hstemhm: + case OpCode_vstem: + case OpCode_vstemhm: + case OpCode_hintmask: + case OpCode_cntrmask: + case OpCode_dotsection: + if (param.drop_hints) + { + env.clear_args (); + return; + } + HB_FALLTHROUGH; + + default: + SUPER::flush_args_and_op (op, env, param); + break; + } + } + static void flush_args (cff1_cs_interp_env_t &env, flatten_param_t& param) + { + str_encoder_t encoder (param.flatStr); + for (unsigned int i = env.arg_start; i < env.argStack.get_count (); i++) + encoder.encode_num (env.eval_arg (i)); + SUPER::flush_args (env, param); + } + + static void flush_op (op_code_t op, cff1_cs_interp_env_t &env, flatten_param_t& param) + { + str_encoder_t encoder (param.flatStr); + encoder.encode_op (op); + } + + static void flush_width (cff1_cs_interp_env_t &env, flatten_param_t& param) + { + assert (env.has_width); + str_encoder_t encoder (param.flatStr); + encoder.encode_num (env.width); + } + + static void flush_hintmask (op_code_t op, cff1_cs_interp_env_t &env, flatten_param_t& param) + { + SUPER::flush_hintmask (op, env, param); + if (!param.drop_hints) + { + str_encoder_t encoder (param.flatStr); + for (unsigned int i = 0; i < env.hintmask_size; i++) + encoder.encode_byte (env.str_ref[i]); + } + } + + private: + typedef cff1_cs_opset_t<cff1_cs_opset_flatten_t, flatten_param_t> SUPER; +}; + +struct range_list_t : hb_vector_t<code_pair_t> +{ + /* replace the first glyph ID in the "glyph" field each range with a nLeft value */ + bool complete (unsigned int last_glyph) + { + bool two_byte = false; + for (unsigned int i = (*this).length; i > 0; i--) + { + code_pair_t &pair = (*this)[i - 1]; + unsigned int nLeft = last_glyph - pair.glyph - 1; + if (nLeft >= 0x100) + two_byte = true; + last_glyph = pair.glyph; + pair.glyph = nLeft; + } + return two_byte; + } +}; + +struct cff1_cs_opset_subr_subset_t : cff1_cs_opset_t<cff1_cs_opset_subr_subset_t, subr_subset_param_t> +{ + static void process_op (op_code_t op, cff1_cs_interp_env_t &env, subr_subset_param_t& param) + { + switch (op) { + + case OpCode_return: + param.current_parsed_str->add_op (op, env.str_ref); + param.current_parsed_str->set_parsed (); + env.return_from_subr (); + param.set_current_str (env, false); + break; + + case OpCode_endchar: + param.current_parsed_str->add_op (op, env.str_ref); + param.current_parsed_str->set_parsed (); + SUPER::process_op (op, env, param); + break; + + case OpCode_callsubr: + process_call_subr (op, CSType_LocalSubr, env, param, env.localSubrs, param.local_closure); + break; + + case OpCode_callgsubr: + process_call_subr (op, CSType_GlobalSubr, env, param, env.globalSubrs, param.global_closure); + break; + + default: + SUPER::process_op (op, env, param); + param.current_parsed_str->add_op (op, env.str_ref); + break; + } + } + + protected: + static void process_call_subr (op_code_t op, cs_type_t type, + cff1_cs_interp_env_t &env, subr_subset_param_t& param, + cff1_biased_subrs_t& subrs, hb_set_t *closure) + { + byte_str_ref_t str_ref = env.str_ref; + env.call_subr (subrs, type); + param.current_parsed_str->add_call_op (op, str_ref, env.context.subr_num); + closure->add (env.context.subr_num); + param.set_current_str (env, true); + } + + private: + typedef cff1_cs_opset_t<cff1_cs_opset_subr_subset_t, subr_subset_param_t> SUPER; +}; + +struct cff1_subr_subsetter_t : subr_subsetter_t<cff1_subr_subsetter_t, CFF1Subrs, const OT::cff1::accelerator_subset_t, cff1_cs_interp_env_t, cff1_cs_opset_subr_subset_t, OpCode_endchar> +{ + cff1_subr_subsetter_t (const OT::cff1::accelerator_subset_t &acc_, const hb_subset_plan_t *plan_) + : subr_subsetter_t (acc_, plan_) {} + + static void complete_parsed_str (cff1_cs_interp_env_t &env, subr_subset_param_t& param, parsed_cs_str_t &charstring) + { + /* insert width at the beginning of the charstring as necessary */ + if (env.has_width) + charstring.set_prefix (env.width); + + /* subroutines/charstring left on the call stack are legally left unmarked + * unmarked when a subroutine terminates with endchar. mark them. + */ + param.current_parsed_str->set_parsed (); + for (unsigned int i = 0; i < env.callStack.get_count (); i++) + { + parsed_cs_str_t *parsed_str = param.get_parsed_str_for_context (env.callStack[i]); + if (likely (parsed_str)) + parsed_str->set_parsed (); + else + env.set_error (); + } + } +}; + +struct cff_subset_plan { + cff_subset_plan () + : info (), + orig_fdcount (0), + subset_fdcount (1), + subset_fdselect_format (0), + drop_hints (false), + desubroutinize(false) + { + topdict_mod.init (); + subset_fdselect_ranges.init (); + fdmap.init (); + subset_charstrings.init (); + subset_globalsubrs.init (); + subset_localsubrs.init (); + fontdicts_mod.init (); + subset_enc_code_ranges.init (); + subset_enc_supp_codes.init (); + subset_charset_ranges.init (); + sidmap.init (); + for (unsigned int i = 0; i < name_dict_values_t::ValCount; i++) + topDictModSIDs[i] = CFF_UNDEF_SID; + } + + ~cff_subset_plan () + { + topdict_mod.fini (); + subset_fdselect_ranges.fini (); + fdmap.fini (); + subset_charstrings.fini_deep (); + subset_globalsubrs.fini_deep (); + subset_localsubrs.fini_deep (); + fontdicts_mod.fini (); + subset_enc_code_ranges.fini (); + subset_enc_supp_codes.fini (); + subset_charset_ranges.fini (); + sidmap.fini (); + } + + void plan_subset_encoding (const OT::cff1::accelerator_subset_t &acc, hb_subset_plan_t *plan) + { + const Encoding *encoding = acc.encoding; + unsigned int size0, size1, supp_size; + hb_codepoint_t code, last_code = CFF_UNDEF_CODE; + hb_vector_t<hb_codepoint_t> supp_codes; + + if (unlikely (!subset_enc_code_ranges.resize (0))) + { + plan->check_success (false); + return; + } + + supp_size = 0; + supp_codes.init (); + + subset_enc_num_codes = plan->num_output_glyphs () - 1; + unsigned int glyph; + for (glyph = 1; glyph < plan->num_output_glyphs (); glyph++) + { + hb_codepoint_t old_glyph; + if (!plan->old_gid_for_new_gid (glyph, &old_glyph)) + { + /* Retain the code for the old missing glyph ID */ + old_glyph = glyph; + } + code = acc.glyph_to_code (old_glyph); + if (code == CFF_UNDEF_CODE) + { + subset_enc_num_codes = glyph - 1; + break; + } + + if ((last_code == CFF_UNDEF_CODE) || (code != last_code + 1)) + { + code_pair_t pair = { code, glyph }; + subset_enc_code_ranges.push (pair); + } + last_code = code; + + if (encoding != &Null (Encoding)) + { + hb_codepoint_t sid = acc.glyph_to_sid (old_glyph); + encoding->get_supplement_codes (sid, supp_codes); + for (unsigned int i = 0; i < supp_codes.length; i++) + { + code_pair_t pair = { supp_codes[i], sid }; + subset_enc_supp_codes.push (pair); + } + supp_size += SuppEncoding::static_size * supp_codes.length; + } + } + supp_codes.fini (); + + subset_enc_code_ranges.complete (glyph); + + assert (subset_enc_num_codes <= 0xFF); + size0 = Encoding0::min_size + HBUINT8::static_size * subset_enc_num_codes; + size1 = Encoding1::min_size + Encoding1_Range::static_size * subset_enc_code_ranges.length; + + if (size0 < size1) + subset_enc_format = 0; + else + subset_enc_format = 1; + } + + void plan_subset_charset (const OT::cff1::accelerator_subset_t &acc, hb_subset_plan_t *plan) + { + unsigned int size0, size_ranges; + hb_codepoint_t sid, last_sid = CFF_UNDEF_CODE; + + if (unlikely (!subset_charset_ranges.resize (0))) + { + plan->check_success (false); + return; + } + + unsigned int glyph; + for (glyph = 1; glyph < plan->num_output_glyphs (); glyph++) + { + hb_codepoint_t old_glyph; + if (!plan->old_gid_for_new_gid (glyph, &old_glyph)) + { + /* Retain the SID for the old missing glyph ID */ + old_glyph = glyph; + } + sid = acc.glyph_to_sid (old_glyph); + + if (!acc.is_CID ()) + sid = sidmap.add (sid); + + if ((last_sid == CFF_UNDEF_CODE) || (sid != last_sid + 1)) + { + code_pair_t pair = { sid, glyph }; + subset_charset_ranges.push (pair); + } + last_sid = sid; + } + + bool two_byte = subset_charset_ranges.complete (glyph); + + size0 = Charset0::min_size + HBUINT16::static_size * (plan->num_output_glyphs () - 1); + if (!two_byte) + size_ranges = Charset1::min_size + Charset1_Range::static_size * subset_charset_ranges.length; + else + size_ranges = Charset2::min_size + Charset2_Range::static_size * subset_charset_ranges.length; + + if (size0 < size_ranges) + subset_charset_format = 0; + else if (!two_byte) + subset_charset_format = 1; + else + subset_charset_format = 2; + } + + bool collect_sids_in_dicts (const OT::cff1::accelerator_subset_t &acc) + { + sidmap.reset (); + + for (unsigned int i = 0; i < name_dict_values_t::ValCount; i++) + { + unsigned int sid = acc.topDict.nameSIDs[i]; + if (sid != CFF_UNDEF_SID) + { + (void)sidmap.add (sid); + topDictModSIDs[i] = sidmap[sid]; + } + } + + if (acc.fdArray != &Null (CFF1FDArray)) + for (unsigned int i = 0; i < orig_fdcount; i++) + if (fdmap.has (i)) + (void)sidmap.add (acc.fontDicts[i].fontName); + + return true; + } + + bool create (const OT::cff1::accelerator_subset_t &acc, + hb_subset_plan_t *plan) + { + /* make sure notdef is first */ + hb_codepoint_t old_glyph; + if (!plan->old_gid_for_new_gid (0, &old_glyph) || (old_glyph != 0)) return false; + + num_glyphs = plan->num_output_glyphs (); + orig_fdcount = acc.fdCount; + drop_hints = plan->drop_hints; + desubroutinize = plan->desubroutinize; + + /* check whether the subset renumbers any glyph IDs */ + gid_renum = false; + for (hb_codepoint_t new_glyph = 0; new_glyph < plan->num_output_glyphs (); new_glyph++) + { + if (!plan->old_gid_for_new_gid(new_glyph, &old_glyph)) + continue; + if (new_glyph != old_glyph) { + gid_renum = true; + break; + } + } + + subset_charset = gid_renum || !acc.is_predef_charset (); + subset_encoding = !acc.is_CID() && !acc.is_predef_encoding (); + + /* top dict INDEX */ + { + /* Add encoding/charset to a (copy of) top dict as necessary */ + topdict_mod.init (&acc.topDict); + bool need_to_add_enc = (subset_encoding && !acc.topDict.has_op (OpCode_Encoding)); + bool need_to_add_set = (subset_charset && !acc.topDict.has_op (OpCode_charset)); + if (need_to_add_enc || need_to_add_set) + { + if (need_to_add_enc) + topdict_mod.add_op (OpCode_Encoding); + if (need_to_add_set) + topdict_mod.add_op (OpCode_charset); + } + } + + /* Determine re-mapping of font index as fdmap among other info */ + if (acc.fdSelect != &Null (CFF1FDSelect)) + { + if (unlikely (!hb_plan_subset_cff_fdselect (plan, + orig_fdcount, + *acc.fdSelect, + subset_fdcount, + info.fd_select.size, + subset_fdselect_format, + subset_fdselect_ranges, + fdmap))) + return false; + } + else + fdmap.identity (1); + + /* remove unused SIDs & reassign SIDs */ + { + /* SIDs for name strings in dicts are added before glyph names so they fit in 16-bit int range */ + if (unlikely (!collect_sids_in_dicts (acc))) + return false; + if (unlikely (sidmap.get_population () > 0x8000)) /* assumption: a dict won't reference that many strings */ + return false; + + if (subset_charset) plan_subset_charset (acc, plan); + + topdict_mod.reassignSIDs (sidmap); + } + + if (desubroutinize) + { + /* Flatten global & local subrs */ + subr_flattener_t<const OT::cff1::accelerator_subset_t, cff1_cs_interp_env_t, cff1_cs_opset_flatten_t, OpCode_endchar> + flattener(acc, plan); + if (!flattener.flatten (subset_charstrings)) + return false; + } + else + { + cff1_subr_subsetter_t subr_subsetter (acc, plan); + + /* Subset subrs: collect used subroutines, leaving all unused ones behind */ + if (!subr_subsetter.subset ()) + return false; + + /* encode charstrings, global subrs, local subrs with new subroutine numbers */ + if (!subr_subsetter.encode_charstrings (subset_charstrings)) + return false; + + if (!subr_subsetter.encode_globalsubrs (subset_globalsubrs)) + return false; + + /* local subrs */ + if (!subset_localsubrs.resize (orig_fdcount)) + return false; + for (unsigned int fd = 0; fd < orig_fdcount; fd++) + { + subset_localsubrs[fd].init (); + if (fdmap.has (fd)) + { + if (!subr_subsetter.encode_localsubrs (fd, subset_localsubrs[fd])) + return false; + } + } + } + + /* Encoding */ + if (subset_encoding) + plan_subset_encoding (acc, plan); + + /* private dicts & local subrs */ + if (!acc.is_CID ()) + fontdicts_mod.push (cff1_font_dict_values_mod_t ()); + else + { + + hb_iter (acc.fontDicts) + | hb_filter ([&] (const cff1_font_dict_values_t &_) + { return fdmap.has (&_ - &acc.fontDicts[0]); } ) + | hb_map ([&] (const cff1_font_dict_values_t &_) + { + cff1_font_dict_values_mod_t mod; + mod.init (&_, sidmap[_.fontName]); + return mod; + }) + | hb_sink (fontdicts_mod) + ; + } + + return ((subset_charstrings.length == plan->num_output_glyphs ()) + && (fontdicts_mod.length == subset_fdcount)); + } + + cff1_top_dict_values_mod_t topdict_mod; + cff1_sub_table_info_t info; + + unsigned int num_glyphs; + unsigned int orig_fdcount; + unsigned int subset_fdcount; + unsigned int subset_fdselect_format; + hb_vector_t<code_pair_t> subset_fdselect_ranges; + + /* font dict index remap table from fullset FDArray to subset FDArray. + * set to CFF_UNDEF_CODE if excluded from subset */ + hb_inc_bimap_t fdmap; + + str_buff_vec_t subset_charstrings; + str_buff_vec_t subset_globalsubrs; + hb_vector_t<str_buff_vec_t> subset_localsubrs; + hb_vector_t<cff1_font_dict_values_mod_t> fontdicts_mod; + + bool drop_hints; + + bool gid_renum; + bool subset_encoding; + uint8_t subset_enc_format; + unsigned int subset_enc_num_codes; + range_list_t subset_enc_code_ranges; + hb_vector_t<code_pair_t> subset_enc_supp_codes; + + uint8_t subset_charset_format; + range_list_t subset_charset_ranges; + bool subset_charset; + + remap_sid_t sidmap; + unsigned int topDictModSIDs[name_dict_values_t::ValCount]; + + bool desubroutinize; +}; + +static bool _serialize_cff1 (hb_serialize_context_t *c, + cff_subset_plan &plan, + const OT::cff1::accelerator_subset_t &acc, + unsigned int num_glyphs) +{ + /* private dicts & local subrs */ + for (int i = (int)acc.privateDicts.length; --i >= 0 ;) + { + if (plan.fdmap.has (i)) + { + objidx_t subrs_link = 0; + if (plan.subset_localsubrs[i].length > 0) + { + CFF1Subrs *dest = c->start_embed <CFF1Subrs> (); + if (unlikely (!dest)) return false; + c->push (); + if (likely (dest && dest->serialize (c, plan.subset_localsubrs[i]))) + subrs_link = c->pop_pack (); + else + { + c->pop_discard (); + return false; + } + } + + PrivateDict *pd = c->start_embed<PrivateDict> (); + if (unlikely (!pd)) return false; + c->push (); + cff_private_dict_op_serializer_t privSzr (plan.desubroutinize, plan.drop_hints); + /* N.B. local subrs immediately follows its corresponding private dict. i.e., subr offset == private dict size */ + if (likely (pd->serialize (c, acc.privateDicts[i], privSzr, subrs_link))) + { + unsigned fd = plan.fdmap[i]; + plan.fontdicts_mod[fd].privateDictInfo.size = c->length (); + plan.fontdicts_mod[fd].privateDictInfo.link = c->pop_pack (); + } + else + { + c->pop_discard (); + return false; + } + } + } + + if (!acc.is_CID ()) + plan.info.privateDictInfo = plan.fontdicts_mod[0].privateDictInfo; + + /* CharStrings */ + { + CFF1CharStrings *cs = c->start_embed<CFF1CharStrings> (); + if (unlikely (!cs)) return false; + c->push (); + if (likely (cs->serialize (c, plan.subset_charstrings))) + plan.info.char_strings_link = c->pop_pack (); + else + { + c->pop_discard (); + return false; + } + } + + /* FDArray (FD Index) */ + if (acc.fdArray != &Null (CFF1FDArray)) + { + CFF1FDArray *fda = c->start_embed<CFF1FDArray> (); + if (unlikely (!fda)) return false; + c->push (); + cff1_font_dict_op_serializer_t fontSzr; + auto it = + hb_zip (+ hb_iter (plan.fontdicts_mod), + hb_iter (plan.fontdicts_mod)); + if (likely (fda->serialize (c, it, fontSzr))) + plan.info.fd_array_link = c->pop_pack (false); + else + { + c->pop_discard (); + return false; + } + } + + /* FDSelect */ + if (acc.fdSelect != &Null (CFF1FDSelect)) + { + c->push (); + if (likely (hb_serialize_cff_fdselect (c, num_glyphs, *acc.fdSelect, acc.fdCount, + plan.subset_fdselect_format, plan.info.fd_select.size, + plan.subset_fdselect_ranges))) + plan.info.fd_select.link = c->pop_pack (); + else + { + c->pop_discard (); + return false; + } + } + + /* Charset */ + if (plan.subset_charset) + { + Charset *dest = c->start_embed<Charset> (); + if (unlikely (!dest)) return false; + c->push (); + if (likely (dest->serialize (c, + plan.subset_charset_format, + plan.num_glyphs, + plan.subset_charset_ranges))) + plan.info.charset_link = c->pop_pack (); + else + { + c->pop_discard (); + return false; + } + } + + /* Encoding */ + if (plan.subset_encoding) + { + Encoding *dest = c->start_embed<Encoding> (); + if (unlikely (!dest)) return false; + c->push (); + if (likely (dest->serialize (c, + plan.subset_enc_format, + plan.subset_enc_num_codes, + plan.subset_enc_code_ranges, + plan.subset_enc_supp_codes))) + plan.info.encoding_link = c->pop_pack (); + else + { + c->pop_discard (); + return false; + } + } + + /* global subrs */ + { + c->push (); + CFF1Subrs *dest = c->start_embed <CFF1Subrs> (); + if (unlikely (!dest)) return false; + if (likely (dest->serialize (c, plan.subset_globalsubrs))) + c->pop_pack (); + else + { + c->pop_discard (); + return false; + } + } + + /* String INDEX */ + { + CFF1StringIndex *dest = c->start_embed<CFF1StringIndex> (); + if (unlikely (!dest)) return false; + c->push (); + if (likely (dest->serialize (c, *acc.stringIndex, plan.sidmap))) + c->pop_pack (); + else + { + c->pop_discard (); + return false; + } + } + + OT::cff1 *cff = c->allocate_min<OT::cff1> (); + if (unlikely (!cff)) + return false; + + /* header */ + cff->version.major = 0x01; + cff->version.minor = 0x00; + cff->nameIndex = cff->min_size; + cff->offSize = 4; /* unused? */ + + /* name INDEX */ + if (unlikely (!(*acc.nameIndex).copy (c))) return false; + + /* top dict INDEX */ + { + /* serialize singleton TopDict */ + TopDict *top = c->start_embed<TopDict> (); + if (!top) return false; + c->push (); + cff1_top_dict_op_serializer_t topSzr; + unsigned top_size = 0; + top_dict_modifiers_t modifier (plan.info, plan.topDictModSIDs); + if (likely (top->serialize (c, plan.topdict_mod, topSzr, modifier))) + { + top_size = c->length (); + c->pop_pack (false); + } + else + { + c->pop_discard (); + return false; + } + /* serialize INDEX header for above */ + CFF1Index *dest = c->start_embed<CFF1Index> (); + if (!dest) return false; + return dest->serialize_header (c, hb_iter (hb_array_t<unsigned> (&top_size, 1))); + } +} + +static bool +_hb_subset_cff1 (const OT::cff1::accelerator_subset_t &acc, + hb_subset_context_t *c) +{ + cff_subset_plan cff_plan; + + if (unlikely (!cff_plan.create (acc, c->plan))) + { + DEBUG_MSG(SUBSET, nullptr, "Failed to generate a cff subsetting plan."); + return false; + } + + return _serialize_cff1 (c->serializer, cff_plan, acc, c->plan->num_output_glyphs ()); +} + +/** + * hb_subset_cff1: + * Subsets the CFF table according to a provided plan. + * + * Return value: subsetted cff table. + **/ +bool +hb_subset_cff1 (hb_subset_context_t *c) +{ + OT::cff1::accelerator_subset_t acc; + acc.init (c->plan->source); + bool result = likely (acc.is_valid ()) && _hb_subset_cff1 (acc, c); + acc.fini (); + + return result; +} + + +#endif diff --git a/thirdparty/harfbuzz/src/hb-subset-cff1.hh b/thirdparty/harfbuzz/src/hb-subset-cff1.hh new file mode 100644 index 0000000000..aaf5def1ed --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-subset-cff1.hh @@ -0,0 +1,37 @@ +/* + * Copyright © 2018 Adobe Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Adobe Author(s): Michiharu Ariza + */ + +#ifndef HB_SUBSET_CFF1_HH +#define HB_SUBSET_CFF1_HH + +#include "hb.hh" + +#include "hb-subset-plan.hh" + +HB_INTERNAL bool +hb_subset_cff1 (hb_subset_context_t *c); + +#endif /* HB_SUBSET_CFF1_HH */ diff --git a/thirdparty/harfbuzz/src/hb-subset-cff2.cc b/thirdparty/harfbuzz/src/hb-subset-cff2.cc new file mode 100644 index 0000000000..17ee040deb --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-subset-cff2.cc @@ -0,0 +1,488 @@ +/* + * Copyright © 2018 Adobe Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Adobe Author(s): Michiharu Ariza + */ + +#include "hb.hh" + +#ifndef HB_NO_SUBSET_CFF + +#include "hb-open-type.hh" +#include "hb-ot-cff2-table.hh" +#include "hb-set.h" +#include "hb-subset-cff2.hh" +#include "hb-subset-plan.hh" +#include "hb-subset-cff-common.hh" +#include "hb-cff2-interp-cs.hh" + +using namespace CFF; + +struct cff2_sub_table_info_t : cff_sub_table_info_t +{ + cff2_sub_table_info_t () + : cff_sub_table_info_t (), + var_store_link (0) + {} + + objidx_t var_store_link; +}; + +struct cff2_top_dict_op_serializer_t : cff_top_dict_op_serializer_t<> +{ + bool serialize (hb_serialize_context_t *c, + const op_str_t &opstr, + const cff2_sub_table_info_t &info) const + { + TRACE_SERIALIZE (this); + + switch (opstr.op) + { + case OpCode_vstore: + return_trace (FontDict::serialize_link4_op(c, opstr.op, info.var_store_link)); + + default: + return_trace (cff_top_dict_op_serializer_t<>::serialize (c, opstr, info)); + } + } +}; + +struct cff2_cs_opset_flatten_t : cff2_cs_opset_t<cff2_cs_opset_flatten_t, flatten_param_t> +{ + static void flush_args_and_op (op_code_t op, cff2_cs_interp_env_t &env, flatten_param_t& param) + { + switch (op) + { + case OpCode_return: + case OpCode_endchar: + /* dummy opcodes in CFF2. ignore */ + break; + + case OpCode_hstem: + case OpCode_hstemhm: + case OpCode_vstem: + case OpCode_vstemhm: + case OpCode_hintmask: + case OpCode_cntrmask: + if (param.drop_hints) + { + env.clear_args (); + return; + } + HB_FALLTHROUGH; + + default: + SUPER::flush_args_and_op (op, env, param); + break; + } + } + + static void flush_args (cff2_cs_interp_env_t &env, flatten_param_t& param) + { + for (unsigned int i = 0; i < env.argStack.get_count ();) + { + const blend_arg_t &arg = env.argStack[i]; + if (arg.blending ()) + { + if (unlikely (!((arg.numValues > 0) && (env.argStack.get_count () >= arg.numValues)))) + { + env.set_error (); + return; + } + flatten_blends (arg, i, env, param); + i += arg.numValues; + } + else + { + str_encoder_t encoder (param.flatStr); + encoder.encode_num (arg); + i++; + } + } + SUPER::flush_args (env, param); + } + + static void flatten_blends (const blend_arg_t &arg, unsigned int i, cff2_cs_interp_env_t &env, flatten_param_t& param) + { + /* flatten the default values */ + str_encoder_t encoder (param.flatStr); + for (unsigned int j = 0; j < arg.numValues; j++) + { + const blend_arg_t &arg1 = env.argStack[i + j]; + if (unlikely (!((arg1.blending () && (arg.numValues == arg1.numValues) && (arg1.valueIndex == j) && + (arg1.deltas.length == env.get_region_count ()))))) + { + env.set_error (); + return; + } + encoder.encode_num (arg1); + } + /* flatten deltas for each value */ + for (unsigned int j = 0; j < arg.numValues; j++) + { + const blend_arg_t &arg1 = env.argStack[i + j]; + for (unsigned int k = 0; k < arg1.deltas.length; k++) + encoder.encode_num (arg1.deltas[k]); + } + /* flatten the number of values followed by blend operator */ + encoder.encode_int (arg.numValues); + encoder.encode_op (OpCode_blendcs); + } + + static void flush_op (op_code_t op, cff2_cs_interp_env_t &env, flatten_param_t& param) + { + switch (op) + { + case OpCode_return: + case OpCode_endchar: + return; + default: + str_encoder_t encoder (param.flatStr); + encoder.encode_op (op); + } + } + + private: + typedef cff2_cs_opset_t<cff2_cs_opset_flatten_t, flatten_param_t> SUPER; + typedef cs_opset_t<blend_arg_t, cff2_cs_opset_flatten_t, cff2_cs_opset_flatten_t, cff2_cs_interp_env_t, flatten_param_t> CSOPSET; +}; + +struct cff2_cs_opset_subr_subset_t : cff2_cs_opset_t<cff2_cs_opset_subr_subset_t, subr_subset_param_t> +{ + static void process_op (op_code_t op, cff2_cs_interp_env_t &env, subr_subset_param_t& param) + { + switch (op) { + + case OpCode_return: + param.current_parsed_str->set_parsed (); + env.return_from_subr (); + param.set_current_str (env, false); + break; + + case OpCode_endchar: + param.current_parsed_str->set_parsed (); + SUPER::process_op (op, env, param); + break; + + case OpCode_callsubr: + process_call_subr (op, CSType_LocalSubr, env, param, env.localSubrs, param.local_closure); + break; + + case OpCode_callgsubr: + process_call_subr (op, CSType_GlobalSubr, env, param, env.globalSubrs, param.global_closure); + break; + + default: + SUPER::process_op (op, env, param); + param.current_parsed_str->add_op (op, env.str_ref); + break; + } + } + + protected: + static void process_call_subr (op_code_t op, cs_type_t type, + cff2_cs_interp_env_t &env, subr_subset_param_t& param, + cff2_biased_subrs_t& subrs, hb_set_t *closure) + { + byte_str_ref_t str_ref = env.str_ref; + env.call_subr (subrs, type); + param.current_parsed_str->add_call_op (op, str_ref, env.context.subr_num); + closure->add (env.context.subr_num); + param.set_current_str (env, true); + } + + private: + typedef cff2_cs_opset_t<cff2_cs_opset_subr_subset_t, subr_subset_param_t> SUPER; +}; + +struct cff2_subr_subsetter_t : subr_subsetter_t<cff2_subr_subsetter_t, CFF2Subrs, const OT::cff2::accelerator_subset_t, cff2_cs_interp_env_t, cff2_cs_opset_subr_subset_t> +{ + cff2_subr_subsetter_t (const OT::cff2::accelerator_subset_t &acc_, const hb_subset_plan_t *plan_) + : subr_subsetter_t (acc_, plan_) {} + + static void complete_parsed_str (cff2_cs_interp_env_t &env, subr_subset_param_t& param, parsed_cs_str_t &charstring) + { + /* vsindex is inserted at the beginning of the charstring as necessary */ + if (env.seen_vsindex ()) + { + number_t ivs; + ivs.set_int ((int)env.get_ivs ()); + charstring.set_prefix (ivs, OpCode_vsindexcs); + } + } +}; + +struct cff2_subset_plan { + cff2_subset_plan () + : orig_fdcount (0), + subset_fdcount(1), + subset_fdselect_size (0), + subset_fdselect_format (0), + drop_hints (false), + desubroutinize (false) + { + subset_fdselect_ranges.init (); + fdmap.init (); + subset_charstrings.init (); + subset_globalsubrs.init (); + subset_localsubrs.init (); + } + + ~cff2_subset_plan () + { + subset_fdselect_ranges.fini (); + fdmap.fini (); + subset_charstrings.fini_deep (); + subset_globalsubrs.fini_deep (); + subset_localsubrs.fini_deep (); + } + + bool create (const OT::cff2::accelerator_subset_t &acc, + hb_subset_plan_t *plan) + { + orig_fdcount = acc.fdArray->count; + + drop_hints = plan->drop_hints; + desubroutinize = plan->desubroutinize; + + if (desubroutinize) + { + /* Flatten global & local subrs */ + subr_flattener_t<const OT::cff2::accelerator_subset_t, cff2_cs_interp_env_t, cff2_cs_opset_flatten_t> + flattener(acc, plan); + if (!flattener.flatten (subset_charstrings)) + return false; + } + else + { + cff2_subr_subsetter_t subr_subsetter (acc, plan); + + /* Subset subrs: collect used subroutines, leaving all unused ones behind */ + if (!subr_subsetter.subset ()) + return false; + + /* encode charstrings, global subrs, local subrs with new subroutine numbers */ + if (!subr_subsetter.encode_charstrings (subset_charstrings)) + return false; + + if (!subr_subsetter.encode_globalsubrs (subset_globalsubrs)) + return false; + + /* local subrs */ + if (!subset_localsubrs.resize (orig_fdcount)) + return false; + for (unsigned int fd = 0; fd < orig_fdcount; fd++) + { + subset_localsubrs[fd].init (); + if (!subr_subsetter.encode_localsubrs (fd, subset_localsubrs[fd])) + return false; + } + } + + /* FDSelect */ + if (acc.fdSelect != &Null (CFF2FDSelect)) + { + if (unlikely (!hb_plan_subset_cff_fdselect (plan, + orig_fdcount, + *(const FDSelect *)acc.fdSelect, + subset_fdcount, + subset_fdselect_size, + subset_fdselect_format, + subset_fdselect_ranges, + fdmap))) + return false; + } + else + fdmap.identity (1); + + return true; + } + + cff2_sub_table_info_t info; + + unsigned int orig_fdcount; + unsigned int subset_fdcount; + unsigned int subset_fdselect_size; + unsigned int subset_fdselect_format; + hb_vector_t<code_pair_t> subset_fdselect_ranges; + + hb_inc_bimap_t fdmap; + + str_buff_vec_t subset_charstrings; + str_buff_vec_t subset_globalsubrs; + hb_vector_t<str_buff_vec_t> subset_localsubrs; + + bool drop_hints; + bool desubroutinize; +}; + +static bool _serialize_cff2 (hb_serialize_context_t *c, + cff2_subset_plan &plan, + const OT::cff2::accelerator_subset_t &acc, + unsigned int num_glyphs) +{ + /* private dicts & local subrs */ + hb_vector_t<table_info_t> private_dict_infos; + if (unlikely (!private_dict_infos.resize (plan.subset_fdcount))) return false; + + for (int i = (int)acc.privateDicts.length; --i >= 0 ;) + { + if (plan.fdmap.has (i)) + { + objidx_t subrs_link = 0; + + if (plan.subset_localsubrs[i].length > 0) + { + CFF2Subrs *dest = c->start_embed <CFF2Subrs> (); + if (unlikely (!dest)) return false; + c->push (); + if (likely (dest->serialize (c, plan.subset_localsubrs[i]))) + subrs_link = c->pop_pack (); + else + { + c->pop_discard (); + return false; + } + } + PrivateDict *pd = c->start_embed<PrivateDict> (); + if (unlikely (!pd)) return false; + c->push (); + cff_private_dict_op_serializer_t privSzr (plan.desubroutinize, plan.drop_hints); + if (likely (pd->serialize (c, acc.privateDicts[i], privSzr, subrs_link))) + { + unsigned fd = plan.fdmap[i]; + private_dict_infos[fd].size = c->length (); + private_dict_infos[fd].link = c->pop_pack (); + } + else + { + c->pop_discard (); + return false; + } + } + } + + /* CharStrings */ + { + CFF2CharStrings *cs = c->start_embed<CFF2CharStrings> (); + if (unlikely (!cs)) return false; + c->push (); + if (likely (cs->serialize (c, plan.subset_charstrings))) + plan.info.char_strings_link = c->pop_pack (); + else + { + c->pop_discard (); + return false; + } + } + + /* FDSelect */ + if (acc.fdSelect != &Null (CFF2FDSelect)) + { + c->push (); + if (likely (hb_serialize_cff_fdselect (c, num_glyphs, *(const FDSelect *)acc.fdSelect, plan.orig_fdcount, + plan.subset_fdselect_format, plan.subset_fdselect_size, + plan.subset_fdselect_ranges))) + plan.info.fd_select.link = c->pop_pack (); + else + { + c->pop_discard (); + return false; + } + } + + /* FDArray (FD Index) */ + { + c->push (); + CFF2FDArray *fda = c->start_embed<CFF2FDArray> (); + if (unlikely (!fda)) return false; + cff_font_dict_op_serializer_t fontSzr; + auto it = + + hb_zip (+ hb_iter (acc.fontDicts) + | hb_filter ([&] (const cff2_font_dict_values_t &_) + { return plan.fdmap.has (&_ - &acc.fontDicts[0]); }), + hb_iter (private_dict_infos)) + ; + if (unlikely (!fda->serialize (c, it, fontSzr))) return false; + plan.info.fd_array_link = c->pop_pack (); + } + + /* variation store */ + if (acc.varStore != &Null (CFF2VariationStore)) + { + c->push (); + CFF2VariationStore *dest = c->start_embed<CFF2VariationStore> (); + if (unlikely (!dest || !dest->serialize (c, acc.varStore))) return false; + plan.info.var_store_link = c->pop_pack (); + } + + OT::cff2 *cff2 = c->allocate_min<OT::cff2> (); + if (unlikely (!cff2)) return false; + + /* header */ + cff2->version.major = 0x02; + cff2->version.minor = 0x00; + cff2->topDict = OT::cff2::static_size; + + /* top dict */ + { + TopDict &dict = cff2 + cff2->topDict; + cff2_top_dict_op_serializer_t topSzr; + if (unlikely (!dict.serialize (c, acc.topDict, topSzr, plan.info))) return false; + cff2->topDictSize = c->head - (const char *)&dict; + } + + /* global subrs */ + { + CFF2Subrs *dest = c->start_embed <CFF2Subrs> (); + if (unlikely (!dest)) return false; + return dest->serialize (c, plan.subset_globalsubrs); + } +} + +static bool +_hb_subset_cff2 (const OT::cff2::accelerator_subset_t &acc, + hb_subset_context_t *c) +{ + cff2_subset_plan cff2_plan; + + if (unlikely (!cff2_plan.create (acc, c->plan))) return false; + return _serialize_cff2 (c->serializer, cff2_plan, acc, c->plan->num_output_glyphs ()); +} + +/** + * hb_subset_cff2: + * Subsets the CFF2 table according to a provided subset context. + **/ +bool +hb_subset_cff2 (hb_subset_context_t *c) +{ + OT::cff2::accelerator_subset_t acc; + acc.init (c->plan->source); + bool result = likely (acc.is_valid ()) && _hb_subset_cff2 (acc, c); + acc.fini (); + + return result; +} + +#endif diff --git a/thirdparty/harfbuzz/src/hb-subset-cff2.hh b/thirdparty/harfbuzz/src/hb-subset-cff2.hh new file mode 100644 index 0000000000..f10556ddd7 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-subset-cff2.hh @@ -0,0 +1,37 @@ +/* + * Copyright © 2018 Adobe Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Adobe Author(s): Michiharu Ariza + */ + +#ifndef HB_SUBSET_CFF2_HH +#define HB_SUBSET_CFF2_HH + +#include "hb.hh" + +#include "hb-subset-plan.hh" + +HB_INTERNAL bool +hb_subset_cff2 (hb_subset_context_t *c); + +#endif /* HB_SUBSET_CFF2_HH */ diff --git a/thirdparty/harfbuzz/src/hb-subset-input.cc b/thirdparty/harfbuzz/src/hb-subset-input.cc new file mode 100644 index 0000000000..fe9be3ce02 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-subset-input.cc @@ -0,0 +1,229 @@ +/* + * Copyright © 2018 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Garret Rieger, Rod Sheeter, Behdad Esfahbod + */ + +#include "hb-subset.hh" +#include "hb-set.hh" + +/** + * hb_subset_input_create_or_fail: + * + * Return value: New subset input. + * + * Since: 1.8.0 + **/ +hb_subset_input_t * +hb_subset_input_create_or_fail () +{ + hb_subset_input_t *input = hb_object_create<hb_subset_input_t>(); + + if (unlikely (!input)) + return nullptr; + + input->unicodes = hb_set_create (); + input->glyphs = hb_set_create (); + input->name_ids = hb_set_create (); + hb_set_add_range (input->name_ids, 0, 6); + input->name_languages = hb_set_create (); + hb_set_add (input->name_languages, 0x0409); + input->drop_tables = hb_set_create (); + input->drop_hints = false; + input->desubroutinize = false; + input->retain_gids = false; + input->name_legacy = false; + + hb_tag_t default_drop_tables[] = { + // Layout disabled by default + HB_TAG ('G', 'S', 'U', 'B'), + HB_TAG ('G', 'P', 'O', 'S'), + HB_TAG ('G', 'D', 'E', 'F'), + HB_TAG ('m', 'o', 'r', 'x'), + HB_TAG ('m', 'o', 'r', 't'), + HB_TAG ('k', 'e', 'r', 'x'), + HB_TAG ('k', 'e', 'r', 'n'), + + // Copied from fontTools: + HB_TAG ('B', 'A', 'S', 'E'), + HB_TAG ('J', 'S', 'T', 'F'), + HB_TAG ('D', 'S', 'I', 'G'), + HB_TAG ('E', 'B', 'D', 'T'), + HB_TAG ('E', 'B', 'L', 'C'), + HB_TAG ('E', 'B', 'S', 'C'), + HB_TAG ('S', 'V', 'G', ' '), + HB_TAG ('P', 'C', 'L', 'T'), + HB_TAG ('L', 'T', 'S', 'H'), + // Graphite tables + HB_TAG ('F', 'e', 'a', 't'), + HB_TAG ('G', 'l', 'a', 't'), + HB_TAG ('G', 'l', 'o', 'c'), + HB_TAG ('S', 'i', 'l', 'f'), + HB_TAG ('S', 'i', 'l', 'l'), + }; + + input->drop_tables->add_array (default_drop_tables, ARRAY_LENGTH (default_drop_tables)); + + return input; +} + +/** + * hb_subset_input_reference: (skip) + * @subset_input: a subset_input. + * + * + * + * Return value: + * + * Since: 1.8.0 + **/ +hb_subset_input_t * +hb_subset_input_reference (hb_subset_input_t *subset_input) +{ + return hb_object_reference (subset_input); +} + +/** + * hb_subset_input_destroy: + * @subset_input: a subset_input. + * + * Since: 1.8.0 + **/ +void +hb_subset_input_destroy (hb_subset_input_t *subset_input) +{ + if (!hb_object_destroy (subset_input)) return; + + hb_set_destroy (subset_input->unicodes); + hb_set_destroy (subset_input->glyphs); + hb_set_destroy (subset_input->name_ids); + hb_set_destroy (subset_input->name_languages); + hb_set_destroy (subset_input->drop_tables); + + free (subset_input); +} + +/** + * hb_subset_input_unicode_set: + * @subset_input: a subset_input. + * + * Since: 1.8.0 + **/ +HB_EXTERN hb_set_t * +hb_subset_input_unicode_set (hb_subset_input_t *subset_input) +{ + return subset_input->unicodes; +} + +/** + * hb_subset_input_glyph_set: + * @subset_input: a subset_input. + * + * Since: 1.8.0 + **/ +HB_EXTERN hb_set_t * +hb_subset_input_glyph_set (hb_subset_input_t *subset_input) +{ + return subset_input->glyphs; +} + +HB_EXTERN hb_set_t * +hb_subset_input_nameid_set (hb_subset_input_t *subset_input) +{ + return subset_input->name_ids; +} + +HB_EXTERN hb_set_t * +hb_subset_input_namelangid_set (hb_subset_input_t *subset_input) +{ + return subset_input->name_languages; +} + +HB_EXTERN hb_set_t * +hb_subset_input_drop_tables_set (hb_subset_input_t *subset_input) +{ + return subset_input->drop_tables; +} + +HB_EXTERN void +hb_subset_input_set_drop_hints (hb_subset_input_t *subset_input, + hb_bool_t drop_hints) +{ + subset_input->drop_hints = drop_hints; +} + +HB_EXTERN hb_bool_t +hb_subset_input_get_drop_hints (hb_subset_input_t *subset_input) +{ + return subset_input->drop_hints; +} + +HB_EXTERN void +hb_subset_input_set_desubroutinize (hb_subset_input_t *subset_input, + hb_bool_t desubroutinize) +{ + subset_input->desubroutinize = desubroutinize; +} + +HB_EXTERN hb_bool_t +hb_subset_input_get_desubroutinize (hb_subset_input_t *subset_input) +{ + return subset_input->desubroutinize; +} + +/** + * hb_subset_input_set_retain_gids: + * @subset_input: a subset_input. + * @retain_gids: If true the subsetter will not renumber glyph ids. + * Since: 2.4.0 + **/ +HB_EXTERN void +hb_subset_input_set_retain_gids (hb_subset_input_t *subset_input, + hb_bool_t retain_gids) +{ + subset_input->retain_gids = retain_gids; +} + +/** + * hb_subset_input_get_retain_gids: + * Returns: value of retain_gids. + * Since: 2.4.0 + **/ +HB_EXTERN hb_bool_t +hb_subset_input_get_retain_gids (hb_subset_input_t *subset_input) +{ + return subset_input->retain_gids; +} + +HB_EXTERN void +hb_subset_input_set_name_legacy (hb_subset_input_t *subset_input, + hb_bool_t name_legacy) +{ + subset_input->name_legacy = name_legacy; +} + +HB_EXTERN hb_bool_t +hb_subset_input_get_name_legacy (hb_subset_input_t *subset_input) +{ + return subset_input->name_legacy; +} diff --git a/thirdparty/harfbuzz/src/hb-subset-input.hh b/thirdparty/harfbuzz/src/hb-subset-input.hh new file mode 100644 index 0000000000..0aeb96695b --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-subset-input.hh @@ -0,0 +1,61 @@ +/* + * Copyright © 2018 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Garret Rieger, Roderick Sheeter + */ + +#ifndef HB_SUBSET_INPUT_HH +#define HB_SUBSET_INPUT_HH + + +#include "hb.hh" + +#include "hb-subset.h" + +#include "hb-font.hh" + +struct hb_subset_input_t +{ + hb_object_header_t header; + + hb_set_t *unicodes; + hb_set_t *glyphs; + hb_set_t *name_ids; + hb_set_t *name_languages; + hb_set_t *drop_tables; + + bool drop_hints; + bool desubroutinize; + bool retain_gids; + bool name_legacy; + /* TODO + * + * features + * lookups + * name_ids + * ... + */ +}; + + +#endif /* HB_SUBSET_INPUT_HH */ diff --git a/thirdparty/harfbuzz/src/hb-subset-plan.cc b/thirdparty/harfbuzz/src/hb-subset-plan.cc new file mode 100644 index 0000000000..24beada3e8 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-subset-plan.cc @@ -0,0 +1,395 @@ +/* + * Copyright © 2018 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Garret Rieger, Roderick Sheeter + */ + +#include "hb-subset-plan.hh" +#include "hb-map.hh" +#include "hb-set.hh" + +#include "hb-ot-cmap-table.hh" +#include "hb-ot-glyf-table.hh" +#include "hb-ot-layout-gdef-table.hh" +#include "hb-ot-layout-gpos-table.hh" +#include "hb-ot-layout-gsub-table.hh" +#include "hb-ot-cff1-table.hh" +#include "hb-ot-color-colr-table.hh" +#include "hb-ot-var-fvar-table.hh" +#include "hb-ot-stat-table.hh" + + +#ifndef HB_NO_SUBSET_CFF +static inline void +_add_cff_seac_components (const OT::cff1::accelerator_t &cff, + hb_codepoint_t gid, + hb_set_t *gids_to_retain) +{ + hb_codepoint_t base_gid, accent_gid; + if (cff.get_seac_components (gid, &base_gid, &accent_gid)) + { + gids_to_retain->add (base_gid); + gids_to_retain->add (accent_gid); + } +} +#endif + +#ifndef HB_NO_SUBSET_LAYOUT +static void +_remap_indexes (const hb_set_t *indexes, + hb_map_t *mapping /* OUT */) +{ + unsigned count = indexes->get_population (); + + for (auto _ : + hb_zip (indexes->iter (), hb_range (count))) + mapping->set (_.first, _.second); + +} + +static inline void +_gsub_closure_glyphs_lookups_features (hb_face_t *face, + hb_set_t *gids_to_retain, + hb_map_t *gsub_lookups, + hb_map_t *gsub_features) +{ + hb_set_t lookup_indices; + hb_ot_layout_collect_lookups (face, + HB_OT_TAG_GSUB, + nullptr, + nullptr, + nullptr, + &lookup_indices); + hb_ot_layout_lookups_substitute_closure (face, + &lookup_indices, + gids_to_retain); + hb_blob_ptr_t<OT::GSUB> gsub = hb_sanitize_context_t ().reference_table<OT::GSUB> (face); + gsub->closure_lookups (face, + gids_to_retain, + &lookup_indices); + _remap_indexes (&lookup_indices, gsub_lookups); + + //closure features + hb_set_t feature_indices; + gsub->closure_features (gsub_lookups, &feature_indices); + _remap_indexes (&feature_indices, gsub_features); + gsub.destroy (); +} + +static inline void +_gpos_closure_lookups_features (hb_face_t *face, + const hb_set_t *gids_to_retain, + hb_map_t *gpos_lookups, + hb_map_t *gpos_features) +{ + hb_set_t lookup_indices; + hb_ot_layout_collect_lookups (face, + HB_OT_TAG_GPOS, + nullptr, + nullptr, + nullptr, + &lookup_indices); + hb_blob_ptr_t<OT::GPOS> gpos = hb_sanitize_context_t ().reference_table<OT::GPOS> (face); + gpos->closure_lookups (face, + gids_to_retain, + &lookup_indices); + _remap_indexes (&lookup_indices, gpos_lookups); + + //closure features + hb_set_t feature_indices; + gpos->closure_features (gpos_lookups, &feature_indices); + _remap_indexes (&feature_indices, gpos_features); + gpos.destroy (); +} +#endif + +#ifndef HB_NO_VAR +static inline void + _collect_layout_variation_indices (hb_face_t *face, + const hb_set_t *glyphset, + const hb_map_t *gpos_lookups, + hb_set_t *layout_variation_indices, + hb_map_t *layout_variation_idx_map) +{ + hb_blob_ptr_t<OT::GDEF> gdef = hb_sanitize_context_t ().reference_table<OT::GDEF> (face); + hb_blob_ptr_t<OT::GPOS> gpos = hb_sanitize_context_t ().reference_table<OT::GPOS> (face); + + if (!gdef->has_data ()) + { + gdef.destroy (); + gpos.destroy (); + return; + } + OT::hb_collect_variation_indices_context_t c (layout_variation_indices, glyphset, gpos_lookups); + gdef->collect_variation_indices (&c); + + if (hb_ot_layout_has_positioning (face)) + gpos->collect_variation_indices (&c); + + gdef->remap_layout_variation_indices (layout_variation_indices, layout_variation_idx_map); + + gdef.destroy (); + gpos.destroy (); +} +#endif + +static inline void +_cmap_closure (hb_face_t *face, + const hb_set_t *unicodes, + hb_set_t *glyphset) +{ + OT::cmap::accelerator_t cmap; + cmap.init (face); + cmap.table->closure_glyphs (unicodes, glyphset); + cmap.fini (); +} + +static inline void +_remove_invalid_gids (hb_set_t *glyphs, + unsigned int num_glyphs) +{ + hb_codepoint_t gid = HB_SET_VALUE_INVALID; + while (glyphs->next (&gid)) + { + if (gid >= num_glyphs) + glyphs->del (gid); + } +} + +static void +_populate_gids_to_retain (hb_subset_plan_t* plan, + const hb_set_t *unicodes, + const hb_set_t *input_glyphs_to_retain, + bool close_over_gsub, + bool close_over_gpos, + bool close_over_gdef) +{ + OT::cmap::accelerator_t cmap; + OT::glyf::accelerator_t glyf; +#ifndef HB_NO_SUBSET_CFF + OT::cff1::accelerator_t cff; +#endif + OT::COLR::accelerator_t colr; + cmap.init (plan->source); + glyf.init (plan->source); +#ifndef HB_NO_SUBSET_CFF + cff.init (plan->source); +#endif + colr.init (plan->source); + + plan->_glyphset_gsub->add (0); // Not-def + hb_set_union (plan->_glyphset_gsub, input_glyphs_to_retain); + + hb_codepoint_t cp = HB_SET_VALUE_INVALID; + while (unicodes->next (&cp)) + { + hb_codepoint_t gid; + if (!cmap.get_nominal_glyph (cp, &gid)) + { + DEBUG_MSG(SUBSET, nullptr, "Drop U+%04X; no gid", cp); + continue; + } + plan->unicodes->add (cp); + plan->codepoint_to_glyph->set (cp, gid); + plan->_glyphset_gsub->add (gid); + } + + _cmap_closure (plan->source, plan->unicodes, plan->_glyphset_gsub); + +#ifndef HB_NO_SUBSET_LAYOUT + if (close_over_gsub) + // closure all glyphs/lookups/features needed for GSUB substitutions. + _gsub_closure_glyphs_lookups_features (plan->source, plan->_glyphset_gsub, plan->gsub_lookups, plan->gsub_features); + + if (close_over_gpos) + _gpos_closure_lookups_features (plan->source, plan->_glyphset_gsub, plan->gpos_lookups, plan->gpos_features); +#endif + _remove_invalid_gids (plan->_glyphset_gsub, plan->source->get_num_glyphs ()); + + // Populate a full set of glyphs to retain by adding all referenced + // composite glyphs. + hb_codepoint_t gid = HB_SET_VALUE_INVALID; + while (plan->_glyphset_gsub->next (&gid)) + { + glyf.add_gid_and_children (gid, plan->_glyphset); +#ifndef HB_NO_SUBSET_CFF + if (cff.is_valid ()) + _add_cff_seac_components (cff, gid, plan->_glyphset); +#endif + if (colr.is_valid ()) + colr.closure_glyphs (gid, plan->_glyphset); + } + + _remove_invalid_gids (plan->_glyphset, plan->source->get_num_glyphs ()); + +#ifndef HB_NO_VAR + if (close_over_gdef) + _collect_layout_variation_indices (plan->source, plan->_glyphset, plan->gpos_lookups, plan->layout_variation_indices, plan->layout_variation_idx_map); +#endif + +#ifndef HB_NO_SUBSET_CFF + cff.fini (); +#endif + glyf.fini (); + cmap.fini (); +} + +static void +_create_old_gid_to_new_gid_map (const hb_face_t *face, + bool retain_gids, + const hb_set_t *all_gids_to_retain, + hb_map_t *glyph_map, /* OUT */ + hb_map_t *reverse_glyph_map, /* OUT */ + unsigned int *num_glyphs /* OUT */) +{ + if (!retain_gids) + { + + hb_enumerate (hb_iter (all_gids_to_retain), (hb_codepoint_t) 0) + | hb_sink (reverse_glyph_map) + ; + *num_glyphs = reverse_glyph_map->get_population (); + } else { + + hb_iter (all_gids_to_retain) + | hb_map ([] (hb_codepoint_t _) { + return hb_pair_t<hb_codepoint_t, hb_codepoint_t> (_, _); + }) + | hb_sink (reverse_glyph_map) + ; + + unsigned max_glyph = + + hb_iter (all_gids_to_retain) + | hb_reduce (hb_max, 0u) + ; + *num_glyphs = max_glyph + 1; + } + + + reverse_glyph_map->iter () + | hb_map (&hb_pair_t<hb_codepoint_t, hb_codepoint_t>::reverse) + | hb_sink (glyph_map) + ; +} + +static void +_nameid_closure (hb_face_t *face, + hb_set_t *nameids) +{ +#ifndef HB_NO_STYLE + face->table.STAT->collect_name_ids (nameids); +#endif +#ifndef HB_NO_VAR + face->table.fvar->collect_name_ids (nameids); +#endif +} + +/** + * hb_subset_plan_create: + * Computes a plan for subsetting the supplied face according + * to a provided input. The plan describes + * which tables and glyphs should be retained. + * + * Return value: New subset plan. + * + * Since: 1.7.5 + **/ +hb_subset_plan_t * +hb_subset_plan_create (hb_face_t *face, + hb_subset_input_t *input) +{ + hb_subset_plan_t *plan; + if (unlikely (!(plan = hb_object_create<hb_subset_plan_t> ()))) + return const_cast<hb_subset_plan_t *> (&Null (hb_subset_plan_t)); + + plan->successful = true; + plan->drop_hints = input->drop_hints; + plan->desubroutinize = input->desubroutinize; + plan->retain_gids = input->retain_gids; + plan->name_legacy = input->name_legacy; + plan->unicodes = hb_set_create (); + plan->name_ids = hb_set_reference (input->name_ids); + _nameid_closure (face, plan->name_ids); + plan->name_languages = hb_set_reference (input->name_languages); + plan->glyphs_requested = hb_set_reference (input->glyphs); + plan->drop_tables = hb_set_reference (input->drop_tables); + plan->source = hb_face_reference (face); + plan->dest = hb_face_builder_create (); + + plan->_glyphset = hb_set_create (); + plan->_glyphset_gsub = hb_set_create (); + plan->codepoint_to_glyph = hb_map_create (); + plan->glyph_map = hb_map_create (); + plan->reverse_glyph_map = hb_map_create (); + plan->gsub_lookups = hb_map_create (); + plan->gpos_lookups = hb_map_create (); + plan->gsub_features = hb_map_create (); + plan->gpos_features = hb_map_create (); + plan->layout_variation_indices = hb_set_create (); + plan->layout_variation_idx_map = hb_map_create (); + + _populate_gids_to_retain (plan, + input->unicodes, + input->glyphs, + !input->drop_tables->has (HB_OT_TAG_GSUB), + !input->drop_tables->has (HB_OT_TAG_GPOS), + !input->drop_tables->has (HB_OT_TAG_GDEF)); + + _create_old_gid_to_new_gid_map (face, + input->retain_gids, + plan->_glyphset, + plan->glyph_map, + plan->reverse_glyph_map, + &plan->_num_output_glyphs); + + return plan; +} + +/** + * hb_subset_plan_destroy: + * + * Since: 1.7.5 + **/ +void +hb_subset_plan_destroy (hb_subset_plan_t *plan) +{ + if (!hb_object_destroy (plan)) return; + + hb_set_destroy (plan->unicodes); + hb_set_destroy (plan->name_ids); + hb_set_destroy (plan->name_languages); + hb_set_destroy (plan->glyphs_requested); + hb_set_destroy (plan->drop_tables); + hb_face_destroy (plan->source); + hb_face_destroy (plan->dest); + hb_map_destroy (plan->codepoint_to_glyph); + hb_map_destroy (plan->glyph_map); + hb_map_destroy (plan->reverse_glyph_map); + hb_set_destroy (plan->_glyphset); + hb_set_destroy (plan->_glyphset_gsub); + hb_map_destroy (plan->gsub_lookups); + hb_map_destroy (plan->gpos_lookups); + hb_map_destroy (plan->gsub_features); + hb_map_destroy (plan->gpos_features); + hb_set_destroy (plan->layout_variation_indices); + hb_map_destroy (plan->layout_variation_idx_map); + + + free (plan); +} diff --git a/thirdparty/harfbuzz/src/hb-subset-plan.hh b/thirdparty/harfbuzz/src/hb-subset-plan.hh new file mode 100644 index 0000000000..e9f603dd1d --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-subset-plan.hh @@ -0,0 +1,194 @@ +/* + * Copyright © 2018 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Garret Rieger, Roderick Sheeter + */ + +#ifndef HB_SUBSET_PLAN_HH +#define HB_SUBSET_PLAN_HH + +#include "hb.hh" + +#include "hb-subset.h" +#include "hb-subset-input.hh" + +#include "hb-map.hh" +#include "hb-set.hh" + +struct hb_subset_plan_t +{ + hb_object_header_t header; + + bool successful : 1; + bool drop_hints : 1; + bool desubroutinize : 1; + bool retain_gids : 1; + bool name_legacy : 1; + + // For each cp that we'd like to retain maps to the corresponding gid. + hb_set_t *unicodes; + + // name_ids we would like to retain + hb_set_t *name_ids; + + // name_languages we would like to retain + hb_set_t *name_languages; + + //glyph ids requested to retain + hb_set_t *glyphs_requested; + + // Tables which should be dropped. + hb_set_t *drop_tables; + + // The glyph subset + hb_map_t *codepoint_to_glyph; + + // Old -> New glyph id mapping + hb_map_t *glyph_map; + hb_map_t *reverse_glyph_map; + + // Plan is only good for a specific source/dest so keep them with it + hb_face_t *source; + hb_face_t *dest; + + unsigned int _num_output_glyphs; + hb_set_t *_glyphset; + hb_set_t *_glyphset_gsub; + + //active lookups we'd like to retain + hb_map_t *gsub_lookups; + hb_map_t *gpos_lookups; + + //active features we'd like to retain + hb_map_t *gsub_features; + hb_map_t *gpos_features; + + //The set of layout item variation store delta set indices to be retained + hb_set_t *layout_variation_indices; + //Old -> New layout item variation store delta set index mapping + hb_map_t *layout_variation_idx_map; + + public: + + bool in_error () const { return !successful; } + + bool check_success(bool success) + { + successful = (successful && success); + return successful; + } + + /* + * The set of input glyph ids which will be retained in the subset. + * Does NOT include ids kept due to retain_gids. You probably want to use + * glyph_map/reverse_glyph_map. + */ + inline const hb_set_t * + glyphset () const + { + return _glyphset; + } + + /* + * The set of input glyph ids which will be retained in the subset. + */ + inline const hb_set_t * + glyphset_gsub () const + { + return _glyphset_gsub; + } + + /* + * The total number of output glyphs in the final subset. + */ + inline unsigned int + num_output_glyphs () const + { + return _num_output_glyphs; + } + + /* + * Given an output gid , returns true if that glyph id is an empty + * glyph (ie. it's a gid that we are dropping all data for). + */ + inline bool is_empty_glyph (hb_codepoint_t gid) const + { + return !_glyphset->has (gid); + } + + inline bool new_gid_for_codepoint (hb_codepoint_t codepoint, + hb_codepoint_t *new_gid) const + { + hb_codepoint_t old_gid = codepoint_to_glyph->get (codepoint); + if (old_gid == HB_MAP_VALUE_INVALID) + return false; + + return new_gid_for_old_gid (old_gid, new_gid); + } + + inline bool new_gid_for_old_gid (hb_codepoint_t old_gid, + hb_codepoint_t *new_gid) const + { + hb_codepoint_t gid = glyph_map->get (old_gid); + if (gid == HB_MAP_VALUE_INVALID) + return false; + + *new_gid = gid; + return true; + } + + inline bool old_gid_for_new_gid (hb_codepoint_t new_gid, + hb_codepoint_t *old_gid) const + { + hb_codepoint_t gid = reverse_glyph_map->get (new_gid); + if (gid == HB_MAP_VALUE_INVALID) + return false; + + *old_gid = gid; + return true; + } + + inline bool + add_table (hb_tag_t tag, + hb_blob_t *contents) + { + hb_blob_t *source_blob = source->reference_table (tag); + DEBUG_MSG(SUBSET, nullptr, "add table %c%c%c%c, dest %d bytes, source %d bytes", + HB_UNTAG(tag), + hb_blob_get_length (contents), + hb_blob_get_length (source_blob)); + hb_blob_destroy (source_blob); + return hb_face_builder_add_table (dest, tag, contents); + } +}; + +typedef struct hb_subset_plan_t hb_subset_plan_t; + +HB_INTERNAL hb_subset_plan_t * +hb_subset_plan_create (hb_face_t *face, + hb_subset_input_t *input); + +HB_INTERNAL void +hb_subset_plan_destroy (hb_subset_plan_t *plan); + +#endif /* HB_SUBSET_PLAN_HH */ diff --git a/thirdparty/harfbuzz/src/hb-subset.cc b/thirdparty/harfbuzz/src/hb-subset.cc new file mode 100644 index 0000000000..8b77ecd45a --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-subset.cc @@ -0,0 +1,269 @@ +/* + * Copyright © 2018 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Garret Rieger, Rod Sheeter, Behdad Esfahbod + */ + +#include "hb.hh" +#include "hb-open-type.hh" + +#include "hb-subset.hh" + +#include "hb-open-file.hh" +#include "hb-ot-cmap-table.hh" +#include "hb-ot-glyf-table.hh" +#include "hb-ot-hdmx-table.hh" +#include "hb-ot-head-table.hh" +#include "hb-ot-hhea-table.hh" +#include "hb-ot-hmtx-table.hh" +#include "hb-ot-maxp-table.hh" +#include "hb-ot-color-sbix-table.hh" +#include "hb-ot-color-colr-table.hh" +#include "hb-ot-os2-table.hh" +#include "hb-ot-post-table.hh" +#include "hb-ot-cff1-table.hh" +#include "hb-ot-cff2-table.hh" +#include "hb-ot-vorg-table.hh" +#include "hb-ot-name-table.hh" +#include "hb-ot-color-cbdt-table.hh" +#include "hb-ot-layout-gsub-table.hh" +#include "hb-ot-layout-gpos-table.hh" +#include "hb-ot-var-gvar-table.hh" +#include "hb-ot-var-hvar-table.hh" + + +static unsigned +_plan_estimate_subset_table_size (hb_subset_plan_t *plan, unsigned table_len) +{ + unsigned src_glyphs = plan->source->get_num_glyphs (); + unsigned dst_glyphs = plan->glyphset ()->get_population (); + + if (unlikely (!src_glyphs)) + return 512 + table_len; + + return 512 + (unsigned) (table_len * sqrt ((double) dst_glyphs / src_glyphs)); +} + +template<typename TableType> +static bool +_subset (hb_subset_plan_t *plan) +{ + bool result = false; + hb_blob_t *source_blob = hb_sanitize_context_t ().reference_table<TableType> (plan->source); + const TableType *table = source_blob->as<TableType> (); + + hb_tag_t tag = TableType::tableTag; + if (source_blob->data) + { + hb_vector_t<char> buf; + /* TODO Not all tables are glyph-related. 'name' table size for example should not be + * affected by number of glyphs. Accommodate that. */ + unsigned buf_size = _plan_estimate_subset_table_size (plan, source_blob->length); + DEBUG_MSG (SUBSET, nullptr, "OT::%c%c%c%c initial estimated table size: %u bytes.", HB_UNTAG (tag), buf_size); + if (unlikely (!buf.alloc (buf_size))) + { + DEBUG_MSG (SUBSET, nullptr, "OT::%c%c%c%c failed to allocate %u bytes.", HB_UNTAG (tag), buf_size); + hb_blob_destroy (source_blob); + return false; + } + retry: + hb_serialize_context_t serializer ((void *) buf, buf_size); + serializer.start_serialize<TableType> (); + hb_subset_context_t c (source_blob, plan, &serializer, tag); + bool needed = table->subset (&c); + if (serializer.ran_out_of_room) + { + buf_size += (buf_size >> 1) + 32; + DEBUG_MSG (SUBSET, nullptr, "OT::%c%c%c%c ran out of room; reallocating to %u bytes.", HB_UNTAG (tag), buf_size); + if (unlikely (!buf.alloc (buf_size))) + { + DEBUG_MSG (SUBSET, nullptr, "OT::%c%c%c%c failed to reallocate %u bytes.", HB_UNTAG (tag), buf_size); + hb_blob_destroy (source_blob); + return false; + } + goto retry; + } + serializer.end_serialize (); + + result = !serializer.in_error (); + + if (result) + { + if (needed) + { + hb_blob_t *dest_blob = serializer.copy_blob (); + DEBUG_MSG (SUBSET, nullptr, "OT::%c%c%c%c final subset table size: %u bytes.", HB_UNTAG (tag), dest_blob->length); + result = c.plan->add_table (tag, dest_blob); + hb_blob_destroy (dest_blob); + } + else + { + DEBUG_MSG (SUBSET, nullptr, "OT::%c%c%c%c::subset table subsetted to empty.", HB_UNTAG (tag)); + } + } + } + else + DEBUG_MSG (SUBSET, nullptr, "OT::%c%c%c%c::subset sanitize failed on source table.", HB_UNTAG (tag)); + + hb_blob_destroy (source_blob); + DEBUG_MSG (SUBSET, nullptr, "OT::%c%c%c%c::subset %s", HB_UNTAG (tag), result ? "success" : "FAILED!"); + return result; +} + +static bool +_is_table_present (hb_face_t *source, hb_tag_t tag) +{ + hb_tag_t table_tags[32]; + unsigned offset = 0, num_tables = ARRAY_LENGTH (table_tags); + while ((hb_face_get_table_tags (source, offset, &num_tables, table_tags), num_tables)) + { + for (unsigned i = 0; i < num_tables; ++i) + if (table_tags[i] == tag) + return true; + offset += num_tables; + } + return false; +} + +static bool +_should_drop_table (hb_subset_plan_t *plan, hb_tag_t tag) +{ + if (plan->drop_tables->has (tag)) + return true; + + switch (tag) + { + case HB_TAG ('c','v','a','r'): /* hint table, fallthrough */ + case HB_TAG ('c','v','t',' '): /* hint table, fallthrough */ + case HB_TAG ('f','p','g','m'): /* hint table, fallthrough */ + case HB_TAG ('p','r','e','p'): /* hint table, fallthrough */ + case HB_TAG ('h','d','m','x'): /* hint table, fallthrough */ + case HB_TAG ('V','D','M','X'): /* hint table, fallthrough */ + return plan->drop_hints; + +#ifdef HB_NO_SUBSET_LAYOUT + // Drop Layout Tables if requested. + case HB_OT_TAG_GDEF: + case HB_OT_TAG_GPOS: + case HB_OT_TAG_GSUB: + case HB_TAG ('m','o','r','x'): + case HB_TAG ('m','o','r','t'): + case HB_TAG ('k','e','r','x'): + case HB_TAG ('k','e','r','n'): + return true; +#endif + + default: + return false; + } +} + +static bool +_subset_table (hb_subset_plan_t *plan, hb_tag_t tag) +{ + DEBUG_MSG (SUBSET, nullptr, "subset %c%c%c%c", HB_UNTAG (tag)); + switch (tag) + { + case HB_OT_TAG_glyf: return _subset<const OT::glyf> (plan); + case HB_OT_TAG_hdmx: return _subset<const OT::hdmx> (plan); + case HB_OT_TAG_name: return _subset<const OT::name> (plan); + case HB_OT_TAG_head: + if (_is_table_present (plan->source, HB_OT_TAG_glyf) && !_should_drop_table (plan, HB_OT_TAG_glyf)) + return true; /* skip head, handled by glyf */ + return _subset<const OT::head> (plan); + case HB_OT_TAG_hhea: return true; /* skip hhea, handled by hmtx */ + case HB_OT_TAG_hmtx: return _subset<const OT::hmtx> (plan); + case HB_OT_TAG_vhea: return true; /* skip vhea, handled by vmtx */ + case HB_OT_TAG_vmtx: return _subset<const OT::vmtx> (plan); + case HB_OT_TAG_maxp: return _subset<const OT::maxp> (plan); + case HB_OT_TAG_sbix: return _subset<const OT::sbix> (plan); + case HB_OT_TAG_loca: return true; /* skip loca, handled by glyf */ + case HB_OT_TAG_cmap: return _subset<const OT::cmap> (plan); + case HB_OT_TAG_OS2 : return _subset<const OT::OS2 > (plan); + case HB_OT_TAG_post: return _subset<const OT::post> (plan); + case HB_OT_TAG_COLR: return _subset<const OT::COLR> (plan); + case HB_OT_TAG_CBLC: return _subset<const OT::CBLC> (plan); + case HB_OT_TAG_CBDT: return true; /* skip CBDT, handled by CBLC */ + +#ifndef HB_NO_SUBSET_CFF + case HB_OT_TAG_cff1: return _subset<const OT::cff1> (plan); + case HB_OT_TAG_cff2: return _subset<const OT::cff2> (plan); + case HB_OT_TAG_VORG: return _subset<const OT::VORG> (plan); +#endif + +#ifndef HB_NO_SUBSET_LAYOUT + case HB_OT_TAG_GDEF: return _subset<const OT::GDEF> (plan); + case HB_OT_TAG_GSUB: return _subset<const OT::GSUB> (plan); + case HB_OT_TAG_GPOS: return _subset<const OT::GPOS> (plan); + case HB_OT_TAG_gvar: return _subset<const OT::gvar> (plan); + case HB_OT_TAG_HVAR: return _subset<const OT::HVAR> (plan); + case HB_OT_TAG_VVAR: return _subset<const OT::VVAR> (plan); +#endif + + default: + hb_blob_t *source_table = hb_face_reference_table (plan->source, tag); + bool result = plan->add_table (tag, source_table); + hb_blob_destroy (source_table); + return result; + } +} + +/** + * hb_subset: + * @source: font face data to be subset. + * @input: input to use for the subsetting. + * + * Subsets a font according to provided input. + **/ +hb_face_t * +hb_subset (hb_face_t *source, hb_subset_input_t *input) +{ + if (unlikely (!input || !source)) return hb_face_get_empty (); + + hb_subset_plan_t *plan = hb_subset_plan_create (source, input); + if (unlikely (plan->in_error ())) + return hb_face_get_empty (); + + hb_set_t tags_set; + bool success = true; + hb_tag_t table_tags[32]; + unsigned offset = 0, num_tables = ARRAY_LENGTH (table_tags); + while ((hb_face_get_table_tags (source, offset, &num_tables, table_tags), num_tables)) + { + for (unsigned i = 0; i < num_tables; ++i) + { + hb_tag_t tag = table_tags[i]; + if (_should_drop_table (plan, tag) && !tags_set.has (tag)) continue; + tags_set.add (tag); + success = _subset_table (plan, tag); + if (unlikely (!success)) goto end; + } + offset += num_tables; + } +end: + + hb_face_t *result = success ? hb_face_reference (plan->dest) : hb_face_get_empty (); + + hb_subset_plan_destroy (plan); + return result; +} diff --git a/thirdparty/harfbuzz/src/hb-subset.h b/thirdparty/harfbuzz/src/hb-subset.h new file mode 100644 index 0000000000..ddf4409734 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-subset.h @@ -0,0 +1,97 @@ +/* + * Copyright © 2018 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Rod Sheeter + */ + +#ifndef HB_SUBSET_H +#define HB_SUBSET_H + +#include "hb.h" + +HB_BEGIN_DECLS + +/* + * hb_subset_input_t + * + * Things that change based on the input. Characters to keep, etc. + */ + +typedef struct hb_subset_input_t hb_subset_input_t; + +HB_EXTERN hb_subset_input_t * +hb_subset_input_create_or_fail (void); + +HB_EXTERN hb_subset_input_t * +hb_subset_input_reference (hb_subset_input_t *subset_input); + +HB_EXTERN void +hb_subset_input_destroy (hb_subset_input_t *subset_input); + +HB_EXTERN hb_set_t * +hb_subset_input_unicode_set (hb_subset_input_t *subset_input); + +HB_EXTERN hb_set_t * +hb_subset_input_glyph_set (hb_subset_input_t *subset_input); + +HB_EXTERN hb_set_t * +hb_subset_input_nameid_set (hb_subset_input_t *subset_input); + +HB_EXTERN hb_set_t * +hb_subset_input_namelangid_set (hb_subset_input_t *subset_input); + +HB_EXTERN hb_set_t * +hb_subset_input_drop_tables_set (hb_subset_input_t *subset_input); + +HB_EXTERN void +hb_subset_input_set_drop_hints (hb_subset_input_t *subset_input, + hb_bool_t drop_hints); +HB_EXTERN hb_bool_t +hb_subset_input_get_drop_hints (hb_subset_input_t *subset_input); + +HB_EXTERN void +hb_subset_input_set_desubroutinize (hb_subset_input_t *subset_input, + hb_bool_t desubroutinize); +HB_EXTERN hb_bool_t +hb_subset_input_get_desubroutinize (hb_subset_input_t *subset_input); + +HB_EXTERN void +hb_subset_input_set_retain_gids (hb_subset_input_t *subset_input, + hb_bool_t retain_gids); +HB_EXTERN hb_bool_t +hb_subset_input_get_retain_gids (hb_subset_input_t *subset_input); + +HB_EXTERN void +hb_subset_input_set_name_legacy (hb_subset_input_t *subset_input, + hb_bool_t name_legacy); +HB_EXTERN hb_bool_t +hb_subset_input_get_name_legacy (hb_subset_input_t *subset_input); + +/* hb_subset () */ +HB_EXTERN hb_face_t * +hb_subset (hb_face_t *source, hb_subset_input_t *input); + + +HB_END_DECLS + +#endif /* HB_SUBSET_H */ diff --git a/thirdparty/harfbuzz/src/hb-subset.hh b/thirdparty/harfbuzz/src/hb-subset.hh new file mode 100644 index 0000000000..c9b01c67f3 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-subset.hh @@ -0,0 +1,73 @@ +/* + * Copyright © 2018 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Garret Rieger, Roderick Sheeter + */ + +#ifndef HB_SUBSET_HH +#define HB_SUBSET_HH + + +#include "hb.hh" + +#include "hb-subset.h" + +#include "hb-machinery.hh" +#include "hb-subset-input.hh" +#include "hb-subset-plan.hh" + +struct hb_subset_context_t : + hb_dispatch_context_t<hb_subset_context_t, bool, HB_DEBUG_SUBSET> +{ + const char *get_name () { return "SUBSET"; } + static return_t default_return_value () { return true; } + + private: + template <typename T, typename ...Ts> auto + _dispatch (const T &obj, hb_priority<1>, Ts&&... ds) HB_AUTO_RETURN + ( obj.subset (this, hb_forward<Ts> (ds)...) ) + template <typename T, typename ...Ts> auto + _dispatch (const T &obj, hb_priority<0>, Ts&&... ds) HB_AUTO_RETURN + ( obj.dispatch (this, hb_forward<Ts> (ds)...) ) + public: + template <typename T, typename ...Ts> auto + dispatch (const T &obj, Ts&&... ds) HB_AUTO_RETURN + ( _dispatch (obj, hb_prioritize, hb_forward<Ts> (ds)...) ) + + hb_blob_t *source_blob; + hb_subset_plan_t *plan; + hb_serialize_context_t *serializer; + hb_tag_t table_tag; + + hb_subset_context_t (hb_blob_t *source_blob_, + hb_subset_plan_t *plan_, + hb_serialize_context_t *serializer_, + hb_tag_t table_tag_) : + source_blob (source_blob_), + plan (plan_), + serializer (serializer_), + table_tag (table_tag_) {} +}; + + +#endif /* HB_SUBSET_HH */ diff --git a/thirdparty/harfbuzz/src/hb-ucd-table.hh b/thirdparty/harfbuzz/src/hb-ucd-table.hh new file mode 100644 index 0000000000..88623db338 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ucd-table.hh @@ -0,0 +1,6780 @@ +/* == Start of generated table == */ +/* + * The following table is generated by running: + * + * ./gen-ucd-table.py ucd.nounihan.grouped.xml + * + * on file with this description: Unicode 13.0.0 + */ + +#ifndef HB_UCD_TABLE_HH +#define HB_UCD_TABLE_HH + +#include "hb.hh" + +static const hb_script_t +_hb_ucd_sc_map[157] = +{ + HB_SCRIPT_COMMON, HB_SCRIPT_INHERITED, + HB_SCRIPT_UNKNOWN, HB_SCRIPT_ARABIC, + HB_SCRIPT_ARMENIAN, HB_SCRIPT_BENGALI, + HB_SCRIPT_CYRILLIC, HB_SCRIPT_DEVANAGARI, + HB_SCRIPT_GEORGIAN, HB_SCRIPT_GREEK, + HB_SCRIPT_GUJARATI, HB_SCRIPT_GURMUKHI, + HB_SCRIPT_HANGUL, HB_SCRIPT_HAN, + HB_SCRIPT_HEBREW, HB_SCRIPT_HIRAGANA, + HB_SCRIPT_KANNADA, HB_SCRIPT_KATAKANA, + HB_SCRIPT_LAO, HB_SCRIPT_LATIN, + HB_SCRIPT_MALAYALAM, HB_SCRIPT_ORIYA, + HB_SCRIPT_TAMIL, HB_SCRIPT_TELUGU, + HB_SCRIPT_THAI, HB_SCRIPT_TIBETAN, + HB_SCRIPT_BOPOMOFO, HB_SCRIPT_BRAILLE, + HB_SCRIPT_CANADIAN_SYLLABICS, HB_SCRIPT_CHEROKEE, + HB_SCRIPT_ETHIOPIC, HB_SCRIPT_KHMER, + HB_SCRIPT_MONGOLIAN, HB_SCRIPT_MYANMAR, + HB_SCRIPT_OGHAM, HB_SCRIPT_RUNIC, + HB_SCRIPT_SINHALA, HB_SCRIPT_SYRIAC, + HB_SCRIPT_THAANA, HB_SCRIPT_YI, + HB_SCRIPT_DESERET, HB_SCRIPT_GOTHIC, + HB_SCRIPT_OLD_ITALIC, HB_SCRIPT_BUHID, + HB_SCRIPT_HANUNOO, HB_SCRIPT_TAGALOG, + HB_SCRIPT_TAGBANWA, HB_SCRIPT_CYPRIOT, + HB_SCRIPT_LIMBU, HB_SCRIPT_LINEAR_B, + HB_SCRIPT_OSMANYA, HB_SCRIPT_SHAVIAN, + HB_SCRIPT_TAI_LE, HB_SCRIPT_UGARITIC, + HB_SCRIPT_BUGINESE, HB_SCRIPT_COPTIC, + HB_SCRIPT_GLAGOLITIC, HB_SCRIPT_KHAROSHTHI, + HB_SCRIPT_NEW_TAI_LUE, HB_SCRIPT_OLD_PERSIAN, + HB_SCRIPT_SYLOTI_NAGRI, HB_SCRIPT_TIFINAGH, + HB_SCRIPT_BALINESE, HB_SCRIPT_CUNEIFORM, + HB_SCRIPT_NKO, HB_SCRIPT_PHAGS_PA, + HB_SCRIPT_PHOENICIAN, HB_SCRIPT_CARIAN, + HB_SCRIPT_CHAM, HB_SCRIPT_KAYAH_LI, + HB_SCRIPT_LEPCHA, HB_SCRIPT_LYCIAN, + HB_SCRIPT_LYDIAN, HB_SCRIPT_OL_CHIKI, + HB_SCRIPT_REJANG, HB_SCRIPT_SAURASHTRA, + HB_SCRIPT_SUNDANESE, HB_SCRIPT_VAI, + HB_SCRIPT_AVESTAN, HB_SCRIPT_BAMUM, + HB_SCRIPT_EGYPTIAN_HIEROGLYPHS, HB_SCRIPT_IMPERIAL_ARAMAIC, + HB_SCRIPT_INSCRIPTIONAL_PAHLAVI, HB_SCRIPT_INSCRIPTIONAL_PARTHIAN, + HB_SCRIPT_JAVANESE, HB_SCRIPT_KAITHI, + HB_SCRIPT_LISU, HB_SCRIPT_MEETEI_MAYEK, + HB_SCRIPT_OLD_SOUTH_ARABIAN, HB_SCRIPT_OLD_TURKIC, + HB_SCRIPT_SAMARITAN, HB_SCRIPT_TAI_THAM, + HB_SCRIPT_TAI_VIET, HB_SCRIPT_BATAK, + HB_SCRIPT_BRAHMI, HB_SCRIPT_MANDAIC, + HB_SCRIPT_CHAKMA, HB_SCRIPT_MEROITIC_CURSIVE, + HB_SCRIPT_MEROITIC_HIEROGLYPHS, HB_SCRIPT_MIAO, + HB_SCRIPT_SHARADA, HB_SCRIPT_SORA_SOMPENG, + HB_SCRIPT_TAKRI, HB_SCRIPT_BASSA_VAH, + HB_SCRIPT_CAUCASIAN_ALBANIAN, HB_SCRIPT_DUPLOYAN, + HB_SCRIPT_ELBASAN, HB_SCRIPT_GRANTHA, + HB_SCRIPT_KHOJKI, HB_SCRIPT_KHUDAWADI, + HB_SCRIPT_LINEAR_A, HB_SCRIPT_MAHAJANI, + HB_SCRIPT_MANICHAEAN, HB_SCRIPT_MENDE_KIKAKUI, + HB_SCRIPT_MODI, HB_SCRIPT_MRO, + HB_SCRIPT_NABATAEAN, HB_SCRIPT_OLD_NORTH_ARABIAN, + HB_SCRIPT_OLD_PERMIC, HB_SCRIPT_PAHAWH_HMONG, + HB_SCRIPT_PALMYRENE, HB_SCRIPT_PAU_CIN_HAU, + HB_SCRIPT_PSALTER_PAHLAVI, HB_SCRIPT_SIDDHAM, + HB_SCRIPT_TIRHUTA, HB_SCRIPT_WARANG_CITI, + HB_SCRIPT_AHOM, HB_SCRIPT_ANATOLIAN_HIEROGLYPHS, + HB_SCRIPT_HATRAN, HB_SCRIPT_MULTANI, + HB_SCRIPT_OLD_HUNGARIAN, HB_SCRIPT_SIGNWRITING, + HB_SCRIPT_ADLAM, HB_SCRIPT_BHAIKSUKI, + HB_SCRIPT_MARCHEN, HB_SCRIPT_OSAGE, + HB_SCRIPT_TANGUT, HB_SCRIPT_NEWA, + HB_SCRIPT_MASARAM_GONDI, HB_SCRIPT_NUSHU, + HB_SCRIPT_SOYOMBO, HB_SCRIPT_ZANABAZAR_SQUARE, + HB_SCRIPT_DOGRA, HB_SCRIPT_GUNJALA_GONDI, + HB_SCRIPT_HANIFI_ROHINGYA, HB_SCRIPT_MAKASAR, + HB_SCRIPT_MEDEFAIDRIN, HB_SCRIPT_OLD_SOGDIAN, + HB_SCRIPT_SOGDIAN, HB_SCRIPT_ELYMAIC, + HB_SCRIPT_NANDINAGARI, HB_SCRIPT_NYIAKENG_PUACHUE_HMONG, + HB_SCRIPT_WANCHO, HB_SCRIPT_CHORASMIAN, + HB_SCRIPT_DIVES_AKURU, HB_SCRIPT_KHITAN_SMALL_SCRIPT, + HB_SCRIPT_YEZIDI, +}; +static const uint16_t +_hb_ucd_dm1_p0_map[825] = +{ + 0x003Bu, 0x004Bu, 0x0060u, 0x00B4u, 0x00B7u, 0x00C5u, 0x02B9u, 0x0300u, + 0x0301u, 0x0313u, 0x0385u, 0x0386u, 0x0388u, 0x0389u, 0x038Au, 0x038Cu, + 0x038Eu, 0x038Fu, 0x0390u, 0x03A9u, 0x03ACu, 0x03ADu, 0x03AEu, 0x03AFu, + 0x03B0u, 0x03B9u, 0x03CCu, 0x03CDu, 0x03CEu, 0x2002u, 0x2003u, 0x3008u, + 0x3009u, 0x349Eu, 0x34B9u, 0x34BBu, 0x34DFu, 0x3515u, 0x36EEu, 0x36FCu, + 0x3781u, 0x382Fu, 0x3862u, 0x387Cu, 0x38C7u, 0x38E3u, 0x391Cu, 0x393Au, + 0x3A2Eu, 0x3A6Cu, 0x3AE4u, 0x3B08u, 0x3B19u, 0x3B49u, 0x3B9Du, 0x3C18u, + 0x3C4Eu, 0x3D33u, 0x3D96u, 0x3EACu, 0x3EB8u, 0x3F1Bu, 0x3FFCu, 0x4008u, + 0x4018u, 0x4039u, 0x4046u, 0x4096u, 0x40E3u, 0x412Fu, 0x4202u, 0x4227u, + 0x42A0u, 0x4301u, 0x4334u, 0x4359u, 0x43D5u, 0x43D9u, 0x440Bu, 0x446Bu, + 0x452Bu, 0x455Du, 0x4561u, 0x456Bu, 0x45D7u, 0x45F9u, 0x4635u, 0x46BEu, + 0x46C7u, 0x4995u, 0x49E6u, 0x4A6Eu, 0x4A76u, 0x4AB2u, 0x4B33u, 0x4BCEu, + 0x4CCEu, 0x4CEDu, 0x4CF8u, 0x4D56u, 0x4E0Du, 0x4E26u, 0x4E32u, 0x4E38u, + 0x4E39u, 0x4E3Du, 0x4E41u, 0x4E82u, 0x4E86u, 0x4EAEu, 0x4EC0u, 0x4ECCu, + 0x4EE4u, 0x4F60u, 0x4F80u, 0x4F86u, 0x4F8Bu, 0x4FAEu, 0x4FBBu, 0x4FBFu, + 0x5002u, 0x502Bu, 0x507Au, 0x5099u, 0x50CFu, 0x50DAu, 0x50E7u, 0x5140u, + 0x5145u, 0x514Du, 0x5154u, 0x5164u, 0x5167u, 0x5168u, 0x5169u, 0x516Du, + 0x5177u, 0x5180u, 0x518Du, 0x5192u, 0x5195u, 0x5197u, 0x51A4u, 0x51ACu, + 0x51B5u, 0x51B7u, 0x51C9u, 0x51CCu, 0x51DCu, 0x51DEu, 0x51F5u, 0x5203u, + 0x5207u, 0x5217u, 0x5229u, 0x523Au, 0x523Bu, 0x5246u, 0x5272u, 0x5277u, + 0x5289u, 0x529Bu, 0x52A3u, 0x52B3u, 0x52C7u, 0x52C9u, 0x52D2u, 0x52DEu, + 0x52E4u, 0x52F5u, 0x52FAu, 0x5305u, 0x5306u, 0x5317u, 0x533Fu, 0x5349u, + 0x5351u, 0x535Au, 0x5373u, 0x5375u, 0x537Du, 0x537Fu, 0x53C3u, 0x53CAu, + 0x53DFu, 0x53E5u, 0x53EBu, 0x53F1u, 0x5406u, 0x540Fu, 0x541Du, 0x5438u, + 0x5442u, 0x5448u, 0x5468u, 0x549Eu, 0x54A2u, 0x54BDu, 0x54F6u, 0x5510u, + 0x5553u, 0x5555u, 0x5563u, 0x5584u, 0x5587u, 0x5599u, 0x559Du, 0x55ABu, + 0x55B3u, 0x55C0u, 0x55C2u, 0x55E2u, 0x5606u, 0x5651u, 0x5668u, 0x5674u, + 0x56F9u, 0x5716u, 0x5717u, 0x578Bu, 0x57CEu, 0x57F4u, 0x580Du, 0x5831u, + 0x5832u, 0x5840u, 0x585Au, 0x585Eu, 0x58A8u, 0x58ACu, 0x58B3u, 0x58D8u, + 0x58DFu, 0x58EEu, 0x58F2u, 0x58F7u, 0x5906u, 0x591Au, 0x5922u, 0x5944u, + 0x5948u, 0x5951u, 0x5954u, 0x5962u, 0x5973u, 0x59D8u, 0x59ECu, 0x5A1Bu, + 0x5A27u, 0x5A62u, 0x5A66u, 0x5AB5u, 0x5B08u, 0x5B28u, 0x5B3Eu, 0x5B85u, + 0x5BC3u, 0x5BD8u, 0x5BE7u, 0x5BEEu, 0x5BF3u, 0x5BFFu, 0x5C06u, 0x5C22u, + 0x5C3Fu, 0x5C60u, 0x5C62u, 0x5C64u, 0x5C65u, 0x5C6Eu, 0x5C8Du, 0x5CC0u, + 0x5D19u, 0x5D43u, 0x5D50u, 0x5D6Bu, 0x5D6Eu, 0x5D7Cu, 0x5DB2u, 0x5DBAu, + 0x5DE1u, 0x5DE2u, 0x5DFDu, 0x5E28u, 0x5E3Du, 0x5E69u, 0x5E74u, 0x5EA6u, + 0x5EB0u, 0x5EB3u, 0x5EB6u, 0x5EC9u, 0x5ECAu, 0x5ED2u, 0x5ED3u, 0x5ED9u, + 0x5EECu, 0x5EFEu, 0x5F04u, 0x5F22u, 0x5F53u, 0x5F62u, 0x5F69u, 0x5F6Bu, + 0x5F8Bu, 0x5F9Au, 0x5FA9u, 0x5FADu, 0x5FCDu, 0x5FD7u, 0x5FF5u, 0x5FF9u, + 0x6012u, 0x601Cu, 0x6075u, 0x6081u, 0x6094u, 0x60C7u, 0x60D8u, 0x60E1u, + 0x6108u, 0x6144u, 0x6148u, 0x614Cu, 0x614Eu, 0x6160u, 0x6168u, 0x617Au, + 0x618Eu, 0x6190u, 0x61A4u, 0x61AFu, 0x61B2u, 0x61DEu, 0x61F2u, 0x61F6u, + 0x6200u, 0x6210u, 0x621Bu, 0x622Eu, 0x6234u, 0x625Du, 0x62B1u, 0x62C9u, + 0x62CFu, 0x62D3u, 0x62D4u, 0x62FCu, 0x62FEu, 0x633Du, 0x6350u, 0x6368u, + 0x637Bu, 0x6383u, 0x63A0u, 0x63A9u, 0x63C4u, 0x63C5u, 0x63E4u, 0x641Cu, + 0x6422u, 0x6452u, 0x6469u, 0x6477u, 0x647Eu, 0x649Au, 0x649Du, 0x64C4u, + 0x654Fu, 0x6556u, 0x656Cu, 0x6578u, 0x6599u, 0x65C5u, 0x65E2u, 0x65E3u, + 0x6613u, 0x6649u, 0x6674u, 0x6688u, 0x6691u, 0x669Cu, 0x66B4u, 0x66C6u, + 0x66F4u, 0x66F8u, 0x6700u, 0x6717u, 0x671Bu, 0x6721u, 0x674Eu, 0x6753u, + 0x6756u, 0x675Eu, 0x677Bu, 0x6785u, 0x6797u, 0x67F3u, 0x67FAu, 0x6817u, + 0x681Fu, 0x6852u, 0x6881u, 0x6885u, 0x688Eu, 0x68A8u, 0x6914u, 0x6942u, + 0x69A3u, 0x69EAu, 0x6A02u, 0x6A13u, 0x6AA8u, 0x6AD3u, 0x6ADBu, 0x6B04u, + 0x6B21u, 0x6B54u, 0x6B72u, 0x6B77u, 0x6B79u, 0x6B9Fu, 0x6BAEu, 0x6BBAu, + 0x6BBBu, 0x6C4Eu, 0x6C67u, 0x6C88u, 0x6CBFu, 0x6CCCu, 0x6CCDu, 0x6CE5u, + 0x6D16u, 0x6D1Bu, 0x6D1Eu, 0x6D34u, 0x6D3Eu, 0x6D41u, 0x6D69u, 0x6D6Au, + 0x6D77u, 0x6D78u, 0x6D85u, 0x6DCBu, 0x6DDAu, 0x6DEAu, 0x6DF9u, 0x6E1Au, + 0x6E2Fu, 0x6E6Eu, 0x6E9Cu, 0x6EBAu, 0x6EC7u, 0x6ECBu, 0x6ED1u, 0x6EDBu, + 0x6F0Fu, 0x6F22u, 0x6F23u, 0x6F6Eu, 0x6FC6u, 0x6FEBu, 0x6FFEu, 0x701Bu, + 0x701Eu, 0x7039u, 0x704Au, 0x7070u, 0x7077u, 0x707Du, 0x7099u, 0x70ADu, + 0x70C8u, 0x70D9u, 0x7145u, 0x7149u, 0x716Eu, 0x719Cu, 0x71CEu, 0x71D0u, + 0x7210u, 0x721Bu, 0x7228u, 0x722Bu, 0x7235u, 0x7250u, 0x7262u, 0x7280u, + 0x7295u, 0x72AFu, 0x72C0u, 0x72FCu, 0x732Au, 0x7375u, 0x737Au, 0x7387u, + 0x738Bu, 0x73A5u, 0x73B2u, 0x73DEu, 0x7406u, 0x7409u, 0x7422u, 0x7447u, + 0x745Cu, 0x7469u, 0x7471u, 0x7485u, 0x7489u, 0x7498u, 0x74CAu, 0x7506u, + 0x7524u, 0x753Bu, 0x753Eu, 0x7559u, 0x7565u, 0x7570u, 0x75E2u, 0x7610u, + 0x761Du, 0x761Fu, 0x7642u, 0x7669u, 0x76CAu, 0x76DBu, 0x76E7u, 0x76F4u, + 0x7701u, 0x771Eu, 0x771Fu, 0x7740u, 0x774Au, 0x778Bu, 0x77A7u, 0x784Eu, + 0x786Bu, 0x788Cu, 0x7891u, 0x78CAu, 0x78CCu, 0x78FBu, 0x792Au, 0x793Cu, + 0x793Eu, 0x7948u, 0x7949u, 0x7950u, 0x7956u, 0x795Du, 0x795Eu, 0x7965u, + 0x797Fu, 0x798Du, 0x798Eu, 0x798Fu, 0x79AEu, 0x79CAu, 0x79EBu, 0x7A1Cu, + 0x7A40u, 0x7A4Au, 0x7A4Fu, 0x7A81u, 0x7AB1u, 0x7ACBu, 0x7AEEu, 0x7B20u, + 0x7BC0u, 0x7BC6u, 0x7BC9u, 0x7C3Eu, 0x7C60u, 0x7C7Bu, 0x7C92u, 0x7CBEu, + 0x7CD2u, 0x7CD6u, 0x7CE3u, 0x7CE7u, 0x7CE8u, 0x7D00u, 0x7D10u, 0x7D22u, + 0x7D2Fu, 0x7D5Bu, 0x7D63u, 0x7DA0u, 0x7DBEu, 0x7DC7u, 0x7DF4u, 0x7E02u, + 0x7E09u, 0x7E37u, 0x7E41u, 0x7E45u, 0x7F3Eu, 0x7F72u, 0x7F79u, 0x7F7Au, + 0x7F85u, 0x7F95u, 0x7F9Au, 0x7FBDu, 0x7FFAu, 0x8001u, 0x8005u, 0x8046u, + 0x8060u, 0x806Fu, 0x8070u, 0x807Eu, 0x808Bu, 0x80ADu, 0x80B2u, 0x8103u, + 0x813Eu, 0x81D8u, 0x81E8u, 0x81EDu, 0x8201u, 0x8204u, 0x8218u, 0x826Fu, + 0x8279u, 0x828Bu, 0x8291u, 0x829Du, 0x82B1u, 0x82B3u, 0x82BDu, 0x82E5u, + 0x82E6u, 0x831Du, 0x8323u, 0x8336u, 0x8352u, 0x8353u, 0x8363u, 0x83ADu, + 0x83BDu, 0x83C9u, 0x83CAu, 0x83CCu, 0x83DCu, 0x83E7u, 0x83EFu, 0x83F1u, + 0x843Du, 0x8449u, 0x8457u, 0x84EEu, 0x84F1u, 0x84F3u, 0x84FCu, 0x8516u, + 0x8564u, 0x85CDu, 0x85FAu, 0x8606u, 0x8612u, 0x862Du, 0x863Fu, 0x8650u, + 0x865Cu, 0x8667u, 0x8669u, 0x8688u, 0x86A9u, 0x86E2u, 0x870Eu, 0x8728u, + 0x876Bu, 0x8779u, 0x8786u, 0x87BAu, 0x87E1u, 0x8801u, 0x881Fu, 0x884Cu, + 0x8860u, 0x8863u, 0x88C2u, 0x88CFu, 0x88D7u, 0x88DEu, 0x88E1u, 0x88F8u, + 0x88FAu, 0x8910u, 0x8941u, 0x8964u, 0x8986u, 0x898Bu, 0x8996u, 0x8AA0u, + 0x8AAAu, 0x8ABFu, 0x8ACBu, 0x8AD2u, 0x8AD6u, 0x8AEDu, 0x8AF8u, 0x8AFEu, + 0x8B01u, 0x8B39u, 0x8B58u, 0x8B80u, 0x8B8Au, 0x8C48u, 0x8C55u, 0x8CABu, + 0x8CC1u, 0x8CC2u, 0x8CC8u, 0x8CD3u, 0x8D08u, 0x8D1Bu, 0x8D77u, 0x8DBCu, + 0x8DCBu, 0x8DEFu, 0x8DF0u, 0x8ECAu, 0x8ED4u, 0x8F26u, 0x8F2Au, 0x8F38u, + 0x8F3Bu, 0x8F62u, 0x8F9Eu, 0x8FB0u, 0x8FB6u, 0x9023u, 0x9038u, 0x9072u, + 0x907Cu, 0x908Fu, 0x9094u, 0x90CEu, 0x90DEu, 0x90F1u, 0x90FDu, 0x9111u, + 0x911Bu, 0x916Au, 0x9199u, 0x91B4u, 0x91CCu, 0x91CFu, 0x91D1u, 0x9234u, + 0x9238u, 0x9276u, 0x927Cu, 0x92D7u, 0x92D8u, 0x9304u, 0x934Au, 0x93F9u, + 0x9415u, 0x958Bu, 0x95ADu, 0x95B7u, 0x962Eu, 0x964Bu, 0x964Du, 0x9675u, + 0x9678u, 0x967Cu, 0x9686u, 0x96A3u, 0x96B7u, 0x96B8u, 0x96C3u, 0x96E2u, + 0x96E3u, 0x96F6u, 0x96F7u, 0x9723u, 0x9732u, 0x9748u, 0x9756u, 0x97DBu, + 0x97E0u, 0x97FFu, 0x980Bu, 0x9818u, 0x9829u, 0x983Bu, 0x985Eu, 0x98E2u, + 0x98EFu, 0x98FCu, 0x9928u, 0x9929u, 0x99A7u, 0x99C2u, 0x99F1u, 0x99FEu, + 0x9A6Au, 0x9B12u, 0x9B6Fu, 0x9C40u, 0x9C57u, 0x9CFDu, 0x9D67u, 0x9DB4u, + 0x9DFAu, 0x9E1Eu, 0x9E7Fu, 0x9E97u, 0x9E9Fu, 0x9EBBu, 0x9ECEu, 0x9EF9u, + 0x9EFEu, 0x9F05u, 0x9F0Fu, 0x9F16u, 0x9F3Bu, 0x9F43u, 0x9F8Du, 0x9F8Eu, + 0x9F9Cu, +}; +static const uint16_t +_hb_ucd_dm1_p2_map[110] = +{ + 0x0122u, 0x051Cu, 0x0525u, 0x054Bu, 0x063Au, 0x0804u, 0x08DEu, 0x0A2Cu, + 0x0B63u, 0x14E4u, 0x16A8u, 0x16EAu, 0x19C8u, 0x1B18u, 0x1D0Bu, 0x1DE4u, + 0x1DE6u, 0x2183u, 0x219Fu, 0x2331u, 0x26D4u, 0x2844u, 0x284Au, 0x2B0Cu, + 0x2BF1u, 0x300Au, 0x32B8u, 0x335Fu, 0x3393u, 0x339Cu, 0x33C3u, 0x33D5u, + 0x346Du, 0x36A3u, 0x38A7u, 0x3A8Du, 0x3AFAu, 0x3CBCu, 0x3D1Eu, 0x3ED1u, + 0x3F5Eu, 0x3F8Eu, 0x4263u, 0x42EEu, 0x43ABu, 0x4608u, 0x4735u, 0x4814u, + 0x4C36u, 0x4C92u, 0x4FA1u, 0x4FB8u, 0x5044u, 0x50F2u, 0x50F3u, 0x5119u, + 0x5133u, 0x5249u, 0x541Du, 0x5626u, 0x569Au, 0x56C5u, 0x597Cu, 0x5AA7u, + 0x5BABu, 0x5C80u, 0x5CD0u, 0x5F86u, 0x61DAu, 0x6228u, 0x6247u, 0x62D9u, + 0x633Eu, 0x64DAu, 0x6523u, 0x65A8u, 0x67A7u, 0x67B5u, 0x6B3Cu, 0x6C36u, + 0x6CD5u, 0x6D6Bu, 0x6F2Cu, 0x6FB1u, 0x70D2u, 0x73CAu, 0x7667u, 0x78AEu, + 0x7966u, 0x7CA8u, 0x7ED3u, 0x7F2Fu, 0x85D2u, 0x85EDu, 0x872Eu, 0x8BFAu, + 0x8D77u, 0x9145u, 0x91DFu, 0x921Au, 0x940Au, 0x9496u, 0x95B6u, 0x9B30u, + 0xA0CEu, 0xA105u, 0xA20Eu, 0xA291u, 0xA392u, 0xA600u, +}; +static const uint32_t +_hb_ucd_dm2_u32_map[638] = +{ + HB_CODEPOINT_ENCODE3_11_7_14 (0x003Cu, 0x0338u, 0x226Eu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x003Du, 0x0338u, 0x2260u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x003Eu, 0x0338u, 0x226Fu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0041u, 0x0300u, 0x00C0u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0041u, 0x0301u, 0x00C1u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0041u, 0x0302u, 0x00C2u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0041u, 0x0303u, 0x00C3u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0041u, 0x0304u, 0x0100u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0041u, 0x0306u, 0x0102u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0041u, 0x0307u, 0x0226u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0041u, 0x0308u, 0x00C4u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0041u, 0x0309u, 0x1EA2u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0041u, 0x030Au, 0x00C5u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0041u, 0x030Cu, 0x01CDu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0041u, 0x030Fu, 0x0200u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0041u, 0x0311u, 0x0202u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0041u, 0x0323u, 0x1EA0u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0041u, 0x0325u, 0x1E00u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0041u, 0x0328u, 0x0104u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0042u, 0x0307u, 0x1E02u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0042u, 0x0323u, 0x1E04u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0042u, 0x0331u, 0x1E06u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0043u, 0x0301u, 0x0106u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0043u, 0x0302u, 0x0108u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0043u, 0x0307u, 0x010Au), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0043u, 0x030Cu, 0x010Cu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0043u, 0x0327u, 0x00C7u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0044u, 0x0307u, 0x1E0Au), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0044u, 0x030Cu, 0x010Eu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0044u, 0x0323u, 0x1E0Cu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0044u, 0x0327u, 0x1E10u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0044u, 0x032Du, 0x1E12u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0044u, 0x0331u, 0x1E0Eu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0045u, 0x0300u, 0x00C8u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0045u, 0x0301u, 0x00C9u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0045u, 0x0302u, 0x00CAu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0045u, 0x0303u, 0x1EBCu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0045u, 0x0304u, 0x0112u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0045u, 0x0306u, 0x0114u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0045u, 0x0307u, 0x0116u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0045u, 0x0308u, 0x00CBu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0045u, 0x0309u, 0x1EBAu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0045u, 0x030Cu, 0x011Au), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0045u, 0x030Fu, 0x0204u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0045u, 0x0311u, 0x0206u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0045u, 0x0323u, 0x1EB8u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0045u, 0x0327u, 0x0228u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0045u, 0x0328u, 0x0118u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0045u, 0x032Du, 0x1E18u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0045u, 0x0330u, 0x1E1Au), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0046u, 0x0307u, 0x1E1Eu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0047u, 0x0301u, 0x01F4u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0047u, 0x0302u, 0x011Cu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0047u, 0x0304u, 0x1E20u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0047u, 0x0306u, 0x011Eu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0047u, 0x0307u, 0x0120u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0047u, 0x030Cu, 0x01E6u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0047u, 0x0327u, 0x0122u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0048u, 0x0302u, 0x0124u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0048u, 0x0307u, 0x1E22u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0048u, 0x0308u, 0x1E26u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0048u, 0x030Cu, 0x021Eu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0048u, 0x0323u, 0x1E24u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0048u, 0x0327u, 0x1E28u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0048u, 0x032Eu, 0x1E2Au), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0049u, 0x0300u, 0x00CCu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0049u, 0x0301u, 0x00CDu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0049u, 0x0302u, 0x00CEu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0049u, 0x0303u, 0x0128u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0049u, 0x0304u, 0x012Au), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0049u, 0x0306u, 0x012Cu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0049u, 0x0307u, 0x0130u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0049u, 0x0308u, 0x00CFu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0049u, 0x0309u, 0x1EC8u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0049u, 0x030Cu, 0x01CFu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0049u, 0x030Fu, 0x0208u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0049u, 0x0311u, 0x020Au), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0049u, 0x0323u, 0x1ECAu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0049u, 0x0328u, 0x012Eu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0049u, 0x0330u, 0x1E2Cu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x004Au, 0x0302u, 0x0134u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x004Bu, 0x0301u, 0x1E30u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x004Bu, 0x030Cu, 0x01E8u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x004Bu, 0x0323u, 0x1E32u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x004Bu, 0x0327u, 0x0136u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x004Bu, 0x0331u, 0x1E34u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x004Cu, 0x0301u, 0x0139u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x004Cu, 0x030Cu, 0x013Du), + HB_CODEPOINT_ENCODE3_11_7_14 (0x004Cu, 0x0323u, 0x1E36u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x004Cu, 0x0327u, 0x013Bu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x004Cu, 0x032Du, 0x1E3Cu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x004Cu, 0x0331u, 0x1E3Au), + HB_CODEPOINT_ENCODE3_11_7_14 (0x004Du, 0x0301u, 0x1E3Eu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x004Du, 0x0307u, 0x1E40u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x004Du, 0x0323u, 0x1E42u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x004Eu, 0x0300u, 0x01F8u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x004Eu, 0x0301u, 0x0143u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x004Eu, 0x0303u, 0x00D1u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x004Eu, 0x0307u, 0x1E44u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x004Eu, 0x030Cu, 0x0147u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x004Eu, 0x0323u, 0x1E46u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x004Eu, 0x0327u, 0x0145u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x004Eu, 0x032Du, 0x1E4Au), + HB_CODEPOINT_ENCODE3_11_7_14 (0x004Eu, 0x0331u, 0x1E48u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x004Fu, 0x0300u, 0x00D2u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x004Fu, 0x0301u, 0x00D3u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x004Fu, 0x0302u, 0x00D4u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x004Fu, 0x0303u, 0x00D5u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x004Fu, 0x0304u, 0x014Cu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x004Fu, 0x0306u, 0x014Eu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x004Fu, 0x0307u, 0x022Eu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x004Fu, 0x0308u, 0x00D6u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x004Fu, 0x0309u, 0x1ECEu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x004Fu, 0x030Bu, 0x0150u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x004Fu, 0x030Cu, 0x01D1u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x004Fu, 0x030Fu, 0x020Cu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x004Fu, 0x0311u, 0x020Eu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x004Fu, 0x031Bu, 0x01A0u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x004Fu, 0x0323u, 0x1ECCu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x004Fu, 0x0328u, 0x01EAu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0050u, 0x0301u, 0x1E54u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0050u, 0x0307u, 0x1E56u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0052u, 0x0301u, 0x0154u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0052u, 0x0307u, 0x1E58u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0052u, 0x030Cu, 0x0158u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0052u, 0x030Fu, 0x0210u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0052u, 0x0311u, 0x0212u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0052u, 0x0323u, 0x1E5Au), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0052u, 0x0327u, 0x0156u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0052u, 0x0331u, 0x1E5Eu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0053u, 0x0301u, 0x015Au), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0053u, 0x0302u, 0x015Cu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0053u, 0x0307u, 0x1E60u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0053u, 0x030Cu, 0x0160u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0053u, 0x0323u, 0x1E62u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0053u, 0x0326u, 0x0218u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0053u, 0x0327u, 0x015Eu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0054u, 0x0307u, 0x1E6Au), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0054u, 0x030Cu, 0x0164u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0054u, 0x0323u, 0x1E6Cu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0054u, 0x0326u, 0x021Au), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0054u, 0x0327u, 0x0162u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0054u, 0x032Du, 0x1E70u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0054u, 0x0331u, 0x1E6Eu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0055u, 0x0300u, 0x00D9u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0055u, 0x0301u, 0x00DAu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0055u, 0x0302u, 0x00DBu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0055u, 0x0303u, 0x0168u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0055u, 0x0304u, 0x016Au), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0055u, 0x0306u, 0x016Cu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0055u, 0x0308u, 0x00DCu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0055u, 0x0309u, 0x1EE6u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0055u, 0x030Au, 0x016Eu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0055u, 0x030Bu, 0x0170u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0055u, 0x030Cu, 0x01D3u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0055u, 0x030Fu, 0x0214u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0055u, 0x0311u, 0x0216u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0055u, 0x031Bu, 0x01AFu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0055u, 0x0323u, 0x1EE4u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0055u, 0x0324u, 0x1E72u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0055u, 0x0328u, 0x0172u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0055u, 0x032Du, 0x1E76u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0055u, 0x0330u, 0x1E74u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0056u, 0x0303u, 0x1E7Cu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0056u, 0x0323u, 0x1E7Eu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0057u, 0x0300u, 0x1E80u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0057u, 0x0301u, 0x1E82u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0057u, 0x0302u, 0x0174u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0057u, 0x0307u, 0x1E86u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0057u, 0x0308u, 0x1E84u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0057u, 0x0323u, 0x1E88u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0058u, 0x0307u, 0x1E8Au), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0058u, 0x0308u, 0x1E8Cu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0059u, 0x0300u, 0x1EF2u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0059u, 0x0301u, 0x00DDu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0059u, 0x0302u, 0x0176u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0059u, 0x0303u, 0x1EF8u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0059u, 0x0304u, 0x0232u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0059u, 0x0307u, 0x1E8Eu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0059u, 0x0308u, 0x0178u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0059u, 0x0309u, 0x1EF6u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0059u, 0x0323u, 0x1EF4u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x005Au, 0x0301u, 0x0179u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x005Au, 0x0302u, 0x1E90u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x005Au, 0x0307u, 0x017Bu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x005Au, 0x030Cu, 0x017Du), + HB_CODEPOINT_ENCODE3_11_7_14 (0x005Au, 0x0323u, 0x1E92u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x005Au, 0x0331u, 0x1E94u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0061u, 0x0300u, 0x00E0u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0061u, 0x0301u, 0x00E1u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0061u, 0x0302u, 0x00E2u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0061u, 0x0303u, 0x00E3u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0061u, 0x0304u, 0x0101u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0061u, 0x0306u, 0x0103u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0061u, 0x0307u, 0x0227u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0061u, 0x0308u, 0x00E4u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0061u, 0x0309u, 0x1EA3u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0061u, 0x030Au, 0x00E5u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0061u, 0x030Cu, 0x01CEu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0061u, 0x030Fu, 0x0201u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0061u, 0x0311u, 0x0203u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0061u, 0x0323u, 0x1EA1u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0061u, 0x0325u, 0x1E01u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0061u, 0x0328u, 0x0105u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0062u, 0x0307u, 0x1E03u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0062u, 0x0323u, 0x1E05u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0062u, 0x0331u, 0x1E07u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0063u, 0x0301u, 0x0107u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0063u, 0x0302u, 0x0109u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0063u, 0x0307u, 0x010Bu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0063u, 0x030Cu, 0x010Du), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0063u, 0x0327u, 0x00E7u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0064u, 0x0307u, 0x1E0Bu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0064u, 0x030Cu, 0x010Fu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0064u, 0x0323u, 0x1E0Du), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0064u, 0x0327u, 0x1E11u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0064u, 0x032Du, 0x1E13u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0064u, 0x0331u, 0x1E0Fu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0065u, 0x0300u, 0x00E8u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0065u, 0x0301u, 0x00E9u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0065u, 0x0302u, 0x00EAu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0065u, 0x0303u, 0x1EBDu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0065u, 0x0304u, 0x0113u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0065u, 0x0306u, 0x0115u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0065u, 0x0307u, 0x0117u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0065u, 0x0308u, 0x00EBu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0065u, 0x0309u, 0x1EBBu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0065u, 0x030Cu, 0x011Bu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0065u, 0x030Fu, 0x0205u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0065u, 0x0311u, 0x0207u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0065u, 0x0323u, 0x1EB9u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0065u, 0x0327u, 0x0229u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0065u, 0x0328u, 0x0119u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0065u, 0x032Du, 0x1E19u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0065u, 0x0330u, 0x1E1Bu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0066u, 0x0307u, 0x1E1Fu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0067u, 0x0301u, 0x01F5u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0067u, 0x0302u, 0x011Du), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0067u, 0x0304u, 0x1E21u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0067u, 0x0306u, 0x011Fu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0067u, 0x0307u, 0x0121u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0067u, 0x030Cu, 0x01E7u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0067u, 0x0327u, 0x0123u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0068u, 0x0302u, 0x0125u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0068u, 0x0307u, 0x1E23u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0068u, 0x0308u, 0x1E27u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0068u, 0x030Cu, 0x021Fu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0068u, 0x0323u, 0x1E25u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0068u, 0x0327u, 0x1E29u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0068u, 0x032Eu, 0x1E2Bu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0068u, 0x0331u, 0x1E96u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0069u, 0x0300u, 0x00ECu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0069u, 0x0301u, 0x00EDu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0069u, 0x0302u, 0x00EEu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0069u, 0x0303u, 0x0129u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0069u, 0x0304u, 0x012Bu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0069u, 0x0306u, 0x012Du), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0069u, 0x0308u, 0x00EFu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0069u, 0x0309u, 0x1EC9u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0069u, 0x030Cu, 0x01D0u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0069u, 0x030Fu, 0x0209u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0069u, 0x0311u, 0x020Bu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0069u, 0x0323u, 0x1ECBu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0069u, 0x0328u, 0x012Fu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0069u, 0x0330u, 0x1E2Du), + HB_CODEPOINT_ENCODE3_11_7_14 (0x006Au, 0x0302u, 0x0135u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x006Au, 0x030Cu, 0x01F0u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x006Bu, 0x0301u, 0x1E31u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x006Bu, 0x030Cu, 0x01E9u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x006Bu, 0x0323u, 0x1E33u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x006Bu, 0x0327u, 0x0137u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x006Bu, 0x0331u, 0x1E35u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x006Cu, 0x0301u, 0x013Au), + HB_CODEPOINT_ENCODE3_11_7_14 (0x006Cu, 0x030Cu, 0x013Eu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x006Cu, 0x0323u, 0x1E37u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x006Cu, 0x0327u, 0x013Cu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x006Cu, 0x032Du, 0x1E3Du), + HB_CODEPOINT_ENCODE3_11_7_14 (0x006Cu, 0x0331u, 0x1E3Bu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x006Du, 0x0301u, 0x1E3Fu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x006Du, 0x0307u, 0x1E41u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x006Du, 0x0323u, 0x1E43u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x006Eu, 0x0300u, 0x01F9u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x006Eu, 0x0301u, 0x0144u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x006Eu, 0x0303u, 0x00F1u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x006Eu, 0x0307u, 0x1E45u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x006Eu, 0x030Cu, 0x0148u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x006Eu, 0x0323u, 0x1E47u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x006Eu, 0x0327u, 0x0146u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x006Eu, 0x032Du, 0x1E4Bu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x006Eu, 0x0331u, 0x1E49u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x006Fu, 0x0300u, 0x00F2u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x006Fu, 0x0301u, 0x00F3u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x006Fu, 0x0302u, 0x00F4u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x006Fu, 0x0303u, 0x00F5u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x006Fu, 0x0304u, 0x014Du), + HB_CODEPOINT_ENCODE3_11_7_14 (0x006Fu, 0x0306u, 0x014Fu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x006Fu, 0x0307u, 0x022Fu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x006Fu, 0x0308u, 0x00F6u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x006Fu, 0x0309u, 0x1ECFu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x006Fu, 0x030Bu, 0x0151u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x006Fu, 0x030Cu, 0x01D2u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x006Fu, 0x030Fu, 0x020Du), + HB_CODEPOINT_ENCODE3_11_7_14 (0x006Fu, 0x0311u, 0x020Fu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x006Fu, 0x031Bu, 0x01A1u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x006Fu, 0x0323u, 0x1ECDu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x006Fu, 0x0328u, 0x01EBu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0070u, 0x0301u, 0x1E55u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0070u, 0x0307u, 0x1E57u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0072u, 0x0301u, 0x0155u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0072u, 0x0307u, 0x1E59u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0072u, 0x030Cu, 0x0159u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0072u, 0x030Fu, 0x0211u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0072u, 0x0311u, 0x0213u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0072u, 0x0323u, 0x1E5Bu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0072u, 0x0327u, 0x0157u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0072u, 0x0331u, 0x1E5Fu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0073u, 0x0301u, 0x015Bu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0073u, 0x0302u, 0x015Du), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0073u, 0x0307u, 0x1E61u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0073u, 0x030Cu, 0x0161u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0073u, 0x0323u, 0x1E63u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0073u, 0x0326u, 0x0219u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0073u, 0x0327u, 0x015Fu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0074u, 0x0307u, 0x1E6Bu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0074u, 0x0308u, 0x1E97u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0074u, 0x030Cu, 0x0165u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0074u, 0x0323u, 0x1E6Du), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0074u, 0x0326u, 0x021Bu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0074u, 0x0327u, 0x0163u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0074u, 0x032Du, 0x1E71u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0074u, 0x0331u, 0x1E6Fu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0075u, 0x0300u, 0x00F9u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0075u, 0x0301u, 0x00FAu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0075u, 0x0302u, 0x00FBu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0075u, 0x0303u, 0x0169u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0075u, 0x0304u, 0x016Bu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0075u, 0x0306u, 0x016Du), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0075u, 0x0308u, 0x00FCu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0075u, 0x0309u, 0x1EE7u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0075u, 0x030Au, 0x016Fu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0075u, 0x030Bu, 0x0171u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0075u, 0x030Cu, 0x01D4u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0075u, 0x030Fu, 0x0215u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0075u, 0x0311u, 0x0217u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0075u, 0x031Bu, 0x01B0u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0075u, 0x0323u, 0x1EE5u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0075u, 0x0324u, 0x1E73u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0075u, 0x0328u, 0x0173u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0075u, 0x032Du, 0x1E77u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0075u, 0x0330u, 0x1E75u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0076u, 0x0303u, 0x1E7Du), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0076u, 0x0323u, 0x1E7Fu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0077u, 0x0300u, 0x1E81u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0077u, 0x0301u, 0x1E83u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0077u, 0x0302u, 0x0175u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0077u, 0x0307u, 0x1E87u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0077u, 0x0308u, 0x1E85u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0077u, 0x030Au, 0x1E98u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0077u, 0x0323u, 0x1E89u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0078u, 0x0307u, 0x1E8Bu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0078u, 0x0308u, 0x1E8Du), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0079u, 0x0300u, 0x1EF3u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0079u, 0x0301u, 0x00FDu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0079u, 0x0302u, 0x0177u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0079u, 0x0303u, 0x1EF9u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0079u, 0x0304u, 0x0233u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0079u, 0x0307u, 0x1E8Fu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0079u, 0x0308u, 0x00FFu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0079u, 0x0309u, 0x1EF7u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0079u, 0x030Au, 0x1E99u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0079u, 0x0323u, 0x1EF5u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x007Au, 0x0301u, 0x017Au), + HB_CODEPOINT_ENCODE3_11_7_14 (0x007Au, 0x0302u, 0x1E91u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x007Au, 0x0307u, 0x017Cu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x007Au, 0x030Cu, 0x017Eu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x007Au, 0x0323u, 0x1E93u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x007Au, 0x0331u, 0x1E95u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x00A8u, 0x0300u, 0x1FEDu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x00A8u, 0x0301u, 0x0385u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x00A8u, 0x0342u, 0x1FC1u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x00C2u, 0x0300u, 0x1EA6u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x00C2u, 0x0301u, 0x1EA4u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x00C2u, 0x0303u, 0x1EAAu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x00C2u, 0x0309u, 0x1EA8u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x00C4u, 0x0304u, 0x01DEu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x00C5u, 0x0301u, 0x01FAu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x00C6u, 0x0301u, 0x01FCu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x00C6u, 0x0304u, 0x01E2u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x00C7u, 0x0301u, 0x1E08u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x00CAu, 0x0300u, 0x1EC0u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x00CAu, 0x0301u, 0x1EBEu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x00CAu, 0x0303u, 0x1EC4u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x00CAu, 0x0309u, 0x1EC2u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x00CFu, 0x0301u, 0x1E2Eu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x00D4u, 0x0300u, 0x1ED2u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x00D4u, 0x0301u, 0x1ED0u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x00D4u, 0x0303u, 0x1ED6u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x00D4u, 0x0309u, 0x1ED4u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x00D5u, 0x0301u, 0x1E4Cu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x00D5u, 0x0304u, 0x022Cu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x00D5u, 0x0308u, 0x1E4Eu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x00D6u, 0x0304u, 0x022Au), + HB_CODEPOINT_ENCODE3_11_7_14 (0x00D8u, 0x0301u, 0x01FEu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x00DCu, 0x0300u, 0x01DBu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x00DCu, 0x0301u, 0x01D7u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x00DCu, 0x0304u, 0x01D5u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x00DCu, 0x030Cu, 0x01D9u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x00E2u, 0x0300u, 0x1EA7u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x00E2u, 0x0301u, 0x1EA5u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x00E2u, 0x0303u, 0x1EABu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x00E2u, 0x0309u, 0x1EA9u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x00E4u, 0x0304u, 0x01DFu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x00E5u, 0x0301u, 0x01FBu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x00E6u, 0x0301u, 0x01FDu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x00E6u, 0x0304u, 0x01E3u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x00E7u, 0x0301u, 0x1E09u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x00EAu, 0x0300u, 0x1EC1u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x00EAu, 0x0301u, 0x1EBFu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x00EAu, 0x0303u, 0x1EC5u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x00EAu, 0x0309u, 0x1EC3u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x00EFu, 0x0301u, 0x1E2Fu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x00F4u, 0x0300u, 0x1ED3u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x00F4u, 0x0301u, 0x1ED1u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x00F4u, 0x0303u, 0x1ED7u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x00F4u, 0x0309u, 0x1ED5u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x00F5u, 0x0301u, 0x1E4Du), + HB_CODEPOINT_ENCODE3_11_7_14 (0x00F5u, 0x0304u, 0x022Du), + HB_CODEPOINT_ENCODE3_11_7_14 (0x00F5u, 0x0308u, 0x1E4Fu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x00F6u, 0x0304u, 0x022Bu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x00F8u, 0x0301u, 0x01FFu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x00FCu, 0x0300u, 0x01DCu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x00FCu, 0x0301u, 0x01D8u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x00FCu, 0x0304u, 0x01D6u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x00FCu, 0x030Cu, 0x01DAu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0102u, 0x0300u, 0x1EB0u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0102u, 0x0301u, 0x1EAEu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0102u, 0x0303u, 0x1EB4u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0102u, 0x0309u, 0x1EB2u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0103u, 0x0300u, 0x1EB1u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0103u, 0x0301u, 0x1EAFu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0103u, 0x0303u, 0x1EB5u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0103u, 0x0309u, 0x1EB3u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0112u, 0x0300u, 0x1E14u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0112u, 0x0301u, 0x1E16u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0113u, 0x0300u, 0x1E15u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0113u, 0x0301u, 0x1E17u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x014Cu, 0x0300u, 0x1E50u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x014Cu, 0x0301u, 0x1E52u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x014Du, 0x0300u, 0x1E51u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x014Du, 0x0301u, 0x1E53u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x015Au, 0x0307u, 0x1E64u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x015Bu, 0x0307u, 0x1E65u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0160u, 0x0307u, 0x1E66u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0161u, 0x0307u, 0x1E67u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0168u, 0x0301u, 0x1E78u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0169u, 0x0301u, 0x1E79u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x016Au, 0x0308u, 0x1E7Au), + HB_CODEPOINT_ENCODE3_11_7_14 (0x016Bu, 0x0308u, 0x1E7Bu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x017Fu, 0x0307u, 0x1E9Bu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x01A0u, 0x0300u, 0x1EDCu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x01A0u, 0x0301u, 0x1EDAu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x01A0u, 0x0303u, 0x1EE0u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x01A0u, 0x0309u, 0x1EDEu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x01A0u, 0x0323u, 0x1EE2u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x01A1u, 0x0300u, 0x1EDDu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x01A1u, 0x0301u, 0x1EDBu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x01A1u, 0x0303u, 0x1EE1u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x01A1u, 0x0309u, 0x1EDFu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x01A1u, 0x0323u, 0x1EE3u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x01AFu, 0x0300u, 0x1EEAu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x01AFu, 0x0301u, 0x1EE8u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x01AFu, 0x0303u, 0x1EEEu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x01AFu, 0x0309u, 0x1EECu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x01AFu, 0x0323u, 0x1EF0u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x01B0u, 0x0300u, 0x1EEBu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x01B0u, 0x0301u, 0x1EE9u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x01B0u, 0x0303u, 0x1EEFu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x01B0u, 0x0309u, 0x1EEDu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x01B0u, 0x0323u, 0x1EF1u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x01B7u, 0x030Cu, 0x01EEu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x01EAu, 0x0304u, 0x01ECu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x01EBu, 0x0304u, 0x01EDu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0226u, 0x0304u, 0x01E0u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0227u, 0x0304u, 0x01E1u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0228u, 0x0306u, 0x1E1Cu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0229u, 0x0306u, 0x1E1Du), + HB_CODEPOINT_ENCODE3_11_7_14 (0x022Eu, 0x0304u, 0x0230u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x022Fu, 0x0304u, 0x0231u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0292u, 0x030Cu, 0x01EFu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0308u, 0x0301u, 0x0000u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0391u, 0x0300u, 0x1FBAu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0391u, 0x0301u, 0x0386u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0391u, 0x0304u, 0x1FB9u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0391u, 0x0306u, 0x1FB8u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0391u, 0x0313u, 0x1F08u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0391u, 0x0314u, 0x1F09u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0391u, 0x0345u, 0x1FBCu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0395u, 0x0300u, 0x1FC8u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0395u, 0x0301u, 0x0388u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0395u, 0x0313u, 0x1F18u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0395u, 0x0314u, 0x1F19u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0397u, 0x0300u, 0x1FCAu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0397u, 0x0301u, 0x0389u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0397u, 0x0313u, 0x1F28u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0397u, 0x0314u, 0x1F29u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0397u, 0x0345u, 0x1FCCu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0399u, 0x0300u, 0x1FDAu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0399u, 0x0301u, 0x038Au), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0399u, 0x0304u, 0x1FD9u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0399u, 0x0306u, 0x1FD8u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0399u, 0x0308u, 0x03AAu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0399u, 0x0313u, 0x1F38u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0399u, 0x0314u, 0x1F39u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x039Fu, 0x0300u, 0x1FF8u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x039Fu, 0x0301u, 0x038Cu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x039Fu, 0x0313u, 0x1F48u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x039Fu, 0x0314u, 0x1F49u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03A1u, 0x0314u, 0x1FECu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03A5u, 0x0300u, 0x1FEAu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03A5u, 0x0301u, 0x038Eu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03A5u, 0x0304u, 0x1FE9u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03A5u, 0x0306u, 0x1FE8u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03A5u, 0x0308u, 0x03ABu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03A5u, 0x0314u, 0x1F59u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03A9u, 0x0300u, 0x1FFAu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03A9u, 0x0301u, 0x038Fu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03A9u, 0x0313u, 0x1F68u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03A9u, 0x0314u, 0x1F69u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03A9u, 0x0345u, 0x1FFCu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03ACu, 0x0345u, 0x1FB4u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03AEu, 0x0345u, 0x1FC4u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03B1u, 0x0300u, 0x1F70u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03B1u, 0x0301u, 0x03ACu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03B1u, 0x0304u, 0x1FB1u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03B1u, 0x0306u, 0x1FB0u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03B1u, 0x0313u, 0x1F00u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03B1u, 0x0314u, 0x1F01u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03B1u, 0x0342u, 0x1FB6u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03B1u, 0x0345u, 0x1FB3u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03B5u, 0x0300u, 0x1F72u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03B5u, 0x0301u, 0x03ADu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03B5u, 0x0313u, 0x1F10u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03B5u, 0x0314u, 0x1F11u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03B7u, 0x0300u, 0x1F74u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03B7u, 0x0301u, 0x03AEu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03B7u, 0x0313u, 0x1F20u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03B7u, 0x0314u, 0x1F21u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03B7u, 0x0342u, 0x1FC6u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03B7u, 0x0345u, 0x1FC3u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03B9u, 0x0300u, 0x1F76u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03B9u, 0x0301u, 0x03AFu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03B9u, 0x0304u, 0x1FD1u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03B9u, 0x0306u, 0x1FD0u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03B9u, 0x0308u, 0x03CAu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03B9u, 0x0313u, 0x1F30u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03B9u, 0x0314u, 0x1F31u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03B9u, 0x0342u, 0x1FD6u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03BFu, 0x0300u, 0x1F78u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03BFu, 0x0301u, 0x03CCu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03BFu, 0x0313u, 0x1F40u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03BFu, 0x0314u, 0x1F41u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03C1u, 0x0313u, 0x1FE4u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03C1u, 0x0314u, 0x1FE5u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03C5u, 0x0300u, 0x1F7Au), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03C5u, 0x0301u, 0x03CDu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03C5u, 0x0304u, 0x1FE1u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03C5u, 0x0306u, 0x1FE0u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03C5u, 0x0308u, 0x03CBu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03C5u, 0x0313u, 0x1F50u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03C5u, 0x0314u, 0x1F51u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03C5u, 0x0342u, 0x1FE6u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03C9u, 0x0300u, 0x1F7Cu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03C9u, 0x0301u, 0x03CEu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03C9u, 0x0313u, 0x1F60u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03C9u, 0x0314u, 0x1F61u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03C9u, 0x0342u, 0x1FF6u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03C9u, 0x0345u, 0x1FF3u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03CAu, 0x0300u, 0x1FD2u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03CAu, 0x0301u, 0x0390u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03CAu, 0x0342u, 0x1FD7u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03CBu, 0x0300u, 0x1FE2u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03CBu, 0x0301u, 0x03B0u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03CBu, 0x0342u, 0x1FE7u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03CEu, 0x0345u, 0x1FF4u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03D2u, 0x0301u, 0x03D3u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x03D2u, 0x0308u, 0x03D4u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0406u, 0x0308u, 0x0407u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0410u, 0x0306u, 0x04D0u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0410u, 0x0308u, 0x04D2u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0413u, 0x0301u, 0x0403u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0415u, 0x0300u, 0x0400u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0415u, 0x0306u, 0x04D6u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0415u, 0x0308u, 0x0401u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0416u, 0x0306u, 0x04C1u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0416u, 0x0308u, 0x04DCu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0417u, 0x0308u, 0x04DEu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0418u, 0x0300u, 0x040Du), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0418u, 0x0304u, 0x04E2u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0418u, 0x0306u, 0x0419u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0418u, 0x0308u, 0x04E4u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x041Au, 0x0301u, 0x040Cu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x041Eu, 0x0308u, 0x04E6u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0423u, 0x0304u, 0x04EEu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0423u, 0x0306u, 0x040Eu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0423u, 0x0308u, 0x04F0u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0423u, 0x030Bu, 0x04F2u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0427u, 0x0308u, 0x04F4u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x042Bu, 0x0308u, 0x04F8u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x042Du, 0x0308u, 0x04ECu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0430u, 0x0306u, 0x04D1u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0430u, 0x0308u, 0x04D3u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0433u, 0x0301u, 0x0453u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0435u, 0x0300u, 0x0450u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0435u, 0x0306u, 0x04D7u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0435u, 0x0308u, 0x0451u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0436u, 0x0306u, 0x04C2u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0436u, 0x0308u, 0x04DDu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0437u, 0x0308u, 0x04DFu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0438u, 0x0300u, 0x045Du), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0438u, 0x0304u, 0x04E3u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0438u, 0x0306u, 0x0439u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0438u, 0x0308u, 0x04E5u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x043Au, 0x0301u, 0x045Cu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x043Eu, 0x0308u, 0x04E7u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0443u, 0x0304u, 0x04EFu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0443u, 0x0306u, 0x045Eu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0443u, 0x0308u, 0x04F1u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0443u, 0x030Bu, 0x04F3u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0447u, 0x0308u, 0x04F5u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x044Bu, 0x0308u, 0x04F9u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x044Du, 0x0308u, 0x04EDu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0456u, 0x0308u, 0x0457u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0474u, 0x030Fu, 0x0476u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x0475u, 0x030Fu, 0x0477u), + HB_CODEPOINT_ENCODE3_11_7_14 (0x04D8u, 0x0308u, 0x04DAu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x04D9u, 0x0308u, 0x04DBu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x04E8u, 0x0308u, 0x04EAu), + HB_CODEPOINT_ENCODE3_11_7_14 (0x04E9u, 0x0308u, 0x04EBu), +}; +static const uint64_t +_hb_ucd_dm2_u64_map[388] = +{ + HB_CODEPOINT_ENCODE3 (0x05D0u, 0x05B7u, 0x0000u), HB_CODEPOINT_ENCODE3 (0x05D0u, 0x05B8u, 0x0000u), + HB_CODEPOINT_ENCODE3 (0x05D0u, 0x05BCu, 0x0000u), HB_CODEPOINT_ENCODE3 (0x05D1u, 0x05BCu, 0x0000u), + HB_CODEPOINT_ENCODE3 (0x05D1u, 0x05BFu, 0x0000u), HB_CODEPOINT_ENCODE3 (0x05D2u, 0x05BCu, 0x0000u), + HB_CODEPOINT_ENCODE3 (0x05D3u, 0x05BCu, 0x0000u), HB_CODEPOINT_ENCODE3 (0x05D4u, 0x05BCu, 0x0000u), + HB_CODEPOINT_ENCODE3 (0x05D5u, 0x05B9u, 0x0000u), HB_CODEPOINT_ENCODE3 (0x05D5u, 0x05BCu, 0x0000u), + HB_CODEPOINT_ENCODE3 (0x05D6u, 0x05BCu, 0x0000u), HB_CODEPOINT_ENCODE3 (0x05D8u, 0x05BCu, 0x0000u), + HB_CODEPOINT_ENCODE3 (0x05D9u, 0x05B4u, 0x0000u), HB_CODEPOINT_ENCODE3 (0x05D9u, 0x05BCu, 0x0000u), + HB_CODEPOINT_ENCODE3 (0x05DAu, 0x05BCu, 0x0000u), HB_CODEPOINT_ENCODE3 (0x05DBu, 0x05BCu, 0x0000u), + HB_CODEPOINT_ENCODE3 (0x05DBu, 0x05BFu, 0x0000u), HB_CODEPOINT_ENCODE3 (0x05DCu, 0x05BCu, 0x0000u), + HB_CODEPOINT_ENCODE3 (0x05DEu, 0x05BCu, 0x0000u), HB_CODEPOINT_ENCODE3 (0x05E0u, 0x05BCu, 0x0000u), + HB_CODEPOINT_ENCODE3 (0x05E1u, 0x05BCu, 0x0000u), HB_CODEPOINT_ENCODE3 (0x05E3u, 0x05BCu, 0x0000u), + HB_CODEPOINT_ENCODE3 (0x05E4u, 0x05BCu, 0x0000u), HB_CODEPOINT_ENCODE3 (0x05E4u, 0x05BFu, 0x0000u), + HB_CODEPOINT_ENCODE3 (0x05E6u, 0x05BCu, 0x0000u), HB_CODEPOINT_ENCODE3 (0x05E7u, 0x05BCu, 0x0000u), + HB_CODEPOINT_ENCODE3 (0x05E8u, 0x05BCu, 0x0000u), HB_CODEPOINT_ENCODE3 (0x05E9u, 0x05BCu, 0x0000u), + HB_CODEPOINT_ENCODE3 (0x05E9u, 0x05C1u, 0x0000u), HB_CODEPOINT_ENCODE3 (0x05E9u, 0x05C2u, 0x0000u), + HB_CODEPOINT_ENCODE3 (0x05EAu, 0x05BCu, 0x0000u), HB_CODEPOINT_ENCODE3 (0x05F2u, 0x05B7u, 0x0000u), + HB_CODEPOINT_ENCODE3 (0x0627u, 0x0653u, 0x0622u), HB_CODEPOINT_ENCODE3 (0x0627u, 0x0654u, 0x0623u), + HB_CODEPOINT_ENCODE3 (0x0627u, 0x0655u, 0x0625u), HB_CODEPOINT_ENCODE3 (0x0648u, 0x0654u, 0x0624u), + HB_CODEPOINT_ENCODE3 (0x064Au, 0x0654u, 0x0626u), HB_CODEPOINT_ENCODE3 (0x06C1u, 0x0654u, 0x06C2u), + HB_CODEPOINT_ENCODE3 (0x06D2u, 0x0654u, 0x06D3u), HB_CODEPOINT_ENCODE3 (0x06D5u, 0x0654u, 0x06C0u), + HB_CODEPOINT_ENCODE3 (0x0915u, 0x093Cu, 0x0000u), HB_CODEPOINT_ENCODE3 (0x0916u, 0x093Cu, 0x0000u), + HB_CODEPOINT_ENCODE3 (0x0917u, 0x093Cu, 0x0000u), HB_CODEPOINT_ENCODE3 (0x091Cu, 0x093Cu, 0x0000u), + HB_CODEPOINT_ENCODE3 (0x0921u, 0x093Cu, 0x0000u), HB_CODEPOINT_ENCODE3 (0x0922u, 0x093Cu, 0x0000u), + HB_CODEPOINT_ENCODE3 (0x0928u, 0x093Cu, 0x0929u), HB_CODEPOINT_ENCODE3 (0x092Bu, 0x093Cu, 0x0000u), + HB_CODEPOINT_ENCODE3 (0x092Fu, 0x093Cu, 0x0000u), HB_CODEPOINT_ENCODE3 (0x0930u, 0x093Cu, 0x0931u), + HB_CODEPOINT_ENCODE3 (0x0933u, 0x093Cu, 0x0934u), HB_CODEPOINT_ENCODE3 (0x09A1u, 0x09BCu, 0x0000u), + HB_CODEPOINT_ENCODE3 (0x09A2u, 0x09BCu, 0x0000u), HB_CODEPOINT_ENCODE3 (0x09AFu, 0x09BCu, 0x0000u), + HB_CODEPOINT_ENCODE3 (0x09C7u, 0x09BEu, 0x09CBu), HB_CODEPOINT_ENCODE3 (0x09C7u, 0x09D7u, 0x09CCu), + HB_CODEPOINT_ENCODE3 (0x0A16u, 0x0A3Cu, 0x0000u), HB_CODEPOINT_ENCODE3 (0x0A17u, 0x0A3Cu, 0x0000u), + HB_CODEPOINT_ENCODE3 (0x0A1Cu, 0x0A3Cu, 0x0000u), HB_CODEPOINT_ENCODE3 (0x0A2Bu, 0x0A3Cu, 0x0000u), + HB_CODEPOINT_ENCODE3 (0x0A32u, 0x0A3Cu, 0x0000u), HB_CODEPOINT_ENCODE3 (0x0A38u, 0x0A3Cu, 0x0000u), + HB_CODEPOINT_ENCODE3 (0x0B21u, 0x0B3Cu, 0x0000u), HB_CODEPOINT_ENCODE3 (0x0B22u, 0x0B3Cu, 0x0000u), + HB_CODEPOINT_ENCODE3 (0x0B47u, 0x0B3Eu, 0x0B4Bu), HB_CODEPOINT_ENCODE3 (0x0B47u, 0x0B56u, 0x0B48u), + HB_CODEPOINT_ENCODE3 (0x0B47u, 0x0B57u, 0x0B4Cu), HB_CODEPOINT_ENCODE3 (0x0B92u, 0x0BD7u, 0x0B94u), + HB_CODEPOINT_ENCODE3 (0x0BC6u, 0x0BBEu, 0x0BCAu), HB_CODEPOINT_ENCODE3 (0x0BC6u, 0x0BD7u, 0x0BCCu), + HB_CODEPOINT_ENCODE3 (0x0BC7u, 0x0BBEu, 0x0BCBu), HB_CODEPOINT_ENCODE3 (0x0C46u, 0x0C56u, 0x0C48u), + HB_CODEPOINT_ENCODE3 (0x0CBFu, 0x0CD5u, 0x0CC0u), HB_CODEPOINT_ENCODE3 (0x0CC6u, 0x0CC2u, 0x0CCAu), + HB_CODEPOINT_ENCODE3 (0x0CC6u, 0x0CD5u, 0x0CC7u), HB_CODEPOINT_ENCODE3 (0x0CC6u, 0x0CD6u, 0x0CC8u), + HB_CODEPOINT_ENCODE3 (0x0CCAu, 0x0CD5u, 0x0CCBu), HB_CODEPOINT_ENCODE3 (0x0D46u, 0x0D3Eu, 0x0D4Au), + HB_CODEPOINT_ENCODE3 (0x0D46u, 0x0D57u, 0x0D4Cu), HB_CODEPOINT_ENCODE3 (0x0D47u, 0x0D3Eu, 0x0D4Bu), + HB_CODEPOINT_ENCODE3 (0x0DD9u, 0x0DCAu, 0x0DDAu), HB_CODEPOINT_ENCODE3 (0x0DD9u, 0x0DCFu, 0x0DDCu), + HB_CODEPOINT_ENCODE3 (0x0DD9u, 0x0DDFu, 0x0DDEu), HB_CODEPOINT_ENCODE3 (0x0DDCu, 0x0DCAu, 0x0DDDu), + HB_CODEPOINT_ENCODE3 (0x0F40u, 0x0FB5u, 0x0000u), HB_CODEPOINT_ENCODE3 (0x0F42u, 0x0FB7u, 0x0000u), + HB_CODEPOINT_ENCODE3 (0x0F4Cu, 0x0FB7u, 0x0000u), HB_CODEPOINT_ENCODE3 (0x0F51u, 0x0FB7u, 0x0000u), + HB_CODEPOINT_ENCODE3 (0x0F56u, 0x0FB7u, 0x0000u), HB_CODEPOINT_ENCODE3 (0x0F5Bu, 0x0FB7u, 0x0000u), + HB_CODEPOINT_ENCODE3 (0x0F71u, 0x0F72u, 0x0000u), HB_CODEPOINT_ENCODE3 (0x0F71u, 0x0F74u, 0x0000u), + HB_CODEPOINT_ENCODE3 (0x0F71u, 0x0F80u, 0x0000u), HB_CODEPOINT_ENCODE3 (0x0F90u, 0x0FB5u, 0x0000u), + HB_CODEPOINT_ENCODE3 (0x0F92u, 0x0FB7u, 0x0000u), HB_CODEPOINT_ENCODE3 (0x0F9Cu, 0x0FB7u, 0x0000u), + HB_CODEPOINT_ENCODE3 (0x0FA1u, 0x0FB7u, 0x0000u), HB_CODEPOINT_ENCODE3 (0x0FA6u, 0x0FB7u, 0x0000u), + HB_CODEPOINT_ENCODE3 (0x0FABu, 0x0FB7u, 0x0000u), HB_CODEPOINT_ENCODE3 (0x0FB2u, 0x0F80u, 0x0000u), + HB_CODEPOINT_ENCODE3 (0x0FB3u, 0x0F80u, 0x0000u), HB_CODEPOINT_ENCODE3 (0x1025u, 0x102Eu, 0x1026u), + HB_CODEPOINT_ENCODE3 (0x1B05u, 0x1B35u, 0x1B06u), HB_CODEPOINT_ENCODE3 (0x1B07u, 0x1B35u, 0x1B08u), + HB_CODEPOINT_ENCODE3 (0x1B09u, 0x1B35u, 0x1B0Au), HB_CODEPOINT_ENCODE3 (0x1B0Bu, 0x1B35u, 0x1B0Cu), + HB_CODEPOINT_ENCODE3 (0x1B0Du, 0x1B35u, 0x1B0Eu), HB_CODEPOINT_ENCODE3 (0x1B11u, 0x1B35u, 0x1B12u), + HB_CODEPOINT_ENCODE3 (0x1B3Au, 0x1B35u, 0x1B3Bu), HB_CODEPOINT_ENCODE3 (0x1B3Cu, 0x1B35u, 0x1B3Du), + HB_CODEPOINT_ENCODE3 (0x1B3Eu, 0x1B35u, 0x1B40u), HB_CODEPOINT_ENCODE3 (0x1B3Fu, 0x1B35u, 0x1B41u), + HB_CODEPOINT_ENCODE3 (0x1B42u, 0x1B35u, 0x1B43u), HB_CODEPOINT_ENCODE3 (0x1E36u, 0x0304u, 0x1E38u), + HB_CODEPOINT_ENCODE3 (0x1E37u, 0x0304u, 0x1E39u), HB_CODEPOINT_ENCODE3 (0x1E5Au, 0x0304u, 0x1E5Cu), + HB_CODEPOINT_ENCODE3 (0x1E5Bu, 0x0304u, 0x1E5Du), HB_CODEPOINT_ENCODE3 (0x1E62u, 0x0307u, 0x1E68u), + HB_CODEPOINT_ENCODE3 (0x1E63u, 0x0307u, 0x1E69u), HB_CODEPOINT_ENCODE3 (0x1EA0u, 0x0302u, 0x1EACu), + HB_CODEPOINT_ENCODE3 (0x1EA0u, 0x0306u, 0x1EB6u), HB_CODEPOINT_ENCODE3 (0x1EA1u, 0x0302u, 0x1EADu), + HB_CODEPOINT_ENCODE3 (0x1EA1u, 0x0306u, 0x1EB7u), HB_CODEPOINT_ENCODE3 (0x1EB8u, 0x0302u, 0x1EC6u), + HB_CODEPOINT_ENCODE3 (0x1EB9u, 0x0302u, 0x1EC7u), HB_CODEPOINT_ENCODE3 (0x1ECCu, 0x0302u, 0x1ED8u), + HB_CODEPOINT_ENCODE3 (0x1ECDu, 0x0302u, 0x1ED9u), HB_CODEPOINT_ENCODE3 (0x1F00u, 0x0300u, 0x1F02u), + HB_CODEPOINT_ENCODE3 (0x1F00u, 0x0301u, 0x1F04u), HB_CODEPOINT_ENCODE3 (0x1F00u, 0x0342u, 0x1F06u), + HB_CODEPOINT_ENCODE3 (0x1F00u, 0x0345u, 0x1F80u), HB_CODEPOINT_ENCODE3 (0x1F01u, 0x0300u, 0x1F03u), + HB_CODEPOINT_ENCODE3 (0x1F01u, 0x0301u, 0x1F05u), HB_CODEPOINT_ENCODE3 (0x1F01u, 0x0342u, 0x1F07u), + HB_CODEPOINT_ENCODE3 (0x1F01u, 0x0345u, 0x1F81u), HB_CODEPOINT_ENCODE3 (0x1F02u, 0x0345u, 0x1F82u), + HB_CODEPOINT_ENCODE3 (0x1F03u, 0x0345u, 0x1F83u), HB_CODEPOINT_ENCODE3 (0x1F04u, 0x0345u, 0x1F84u), + HB_CODEPOINT_ENCODE3 (0x1F05u, 0x0345u, 0x1F85u), HB_CODEPOINT_ENCODE3 (0x1F06u, 0x0345u, 0x1F86u), + HB_CODEPOINT_ENCODE3 (0x1F07u, 0x0345u, 0x1F87u), HB_CODEPOINT_ENCODE3 (0x1F08u, 0x0300u, 0x1F0Au), + HB_CODEPOINT_ENCODE3 (0x1F08u, 0x0301u, 0x1F0Cu), HB_CODEPOINT_ENCODE3 (0x1F08u, 0x0342u, 0x1F0Eu), + HB_CODEPOINT_ENCODE3 (0x1F08u, 0x0345u, 0x1F88u), HB_CODEPOINT_ENCODE3 (0x1F09u, 0x0300u, 0x1F0Bu), + HB_CODEPOINT_ENCODE3 (0x1F09u, 0x0301u, 0x1F0Du), HB_CODEPOINT_ENCODE3 (0x1F09u, 0x0342u, 0x1F0Fu), + HB_CODEPOINT_ENCODE3 (0x1F09u, 0x0345u, 0x1F89u), HB_CODEPOINT_ENCODE3 (0x1F0Au, 0x0345u, 0x1F8Au), + HB_CODEPOINT_ENCODE3 (0x1F0Bu, 0x0345u, 0x1F8Bu), HB_CODEPOINT_ENCODE3 (0x1F0Cu, 0x0345u, 0x1F8Cu), + HB_CODEPOINT_ENCODE3 (0x1F0Du, 0x0345u, 0x1F8Du), HB_CODEPOINT_ENCODE3 (0x1F0Eu, 0x0345u, 0x1F8Eu), + HB_CODEPOINT_ENCODE3 (0x1F0Fu, 0x0345u, 0x1F8Fu), HB_CODEPOINT_ENCODE3 (0x1F10u, 0x0300u, 0x1F12u), + HB_CODEPOINT_ENCODE3 (0x1F10u, 0x0301u, 0x1F14u), HB_CODEPOINT_ENCODE3 (0x1F11u, 0x0300u, 0x1F13u), + HB_CODEPOINT_ENCODE3 (0x1F11u, 0x0301u, 0x1F15u), HB_CODEPOINT_ENCODE3 (0x1F18u, 0x0300u, 0x1F1Au), + HB_CODEPOINT_ENCODE3 (0x1F18u, 0x0301u, 0x1F1Cu), HB_CODEPOINT_ENCODE3 (0x1F19u, 0x0300u, 0x1F1Bu), + HB_CODEPOINT_ENCODE3 (0x1F19u, 0x0301u, 0x1F1Du), HB_CODEPOINT_ENCODE3 (0x1F20u, 0x0300u, 0x1F22u), + HB_CODEPOINT_ENCODE3 (0x1F20u, 0x0301u, 0x1F24u), HB_CODEPOINT_ENCODE3 (0x1F20u, 0x0342u, 0x1F26u), + HB_CODEPOINT_ENCODE3 (0x1F20u, 0x0345u, 0x1F90u), HB_CODEPOINT_ENCODE3 (0x1F21u, 0x0300u, 0x1F23u), + HB_CODEPOINT_ENCODE3 (0x1F21u, 0x0301u, 0x1F25u), HB_CODEPOINT_ENCODE3 (0x1F21u, 0x0342u, 0x1F27u), + HB_CODEPOINT_ENCODE3 (0x1F21u, 0x0345u, 0x1F91u), HB_CODEPOINT_ENCODE3 (0x1F22u, 0x0345u, 0x1F92u), + HB_CODEPOINT_ENCODE3 (0x1F23u, 0x0345u, 0x1F93u), HB_CODEPOINT_ENCODE3 (0x1F24u, 0x0345u, 0x1F94u), + HB_CODEPOINT_ENCODE3 (0x1F25u, 0x0345u, 0x1F95u), HB_CODEPOINT_ENCODE3 (0x1F26u, 0x0345u, 0x1F96u), + HB_CODEPOINT_ENCODE3 (0x1F27u, 0x0345u, 0x1F97u), HB_CODEPOINT_ENCODE3 (0x1F28u, 0x0300u, 0x1F2Au), + HB_CODEPOINT_ENCODE3 (0x1F28u, 0x0301u, 0x1F2Cu), HB_CODEPOINT_ENCODE3 (0x1F28u, 0x0342u, 0x1F2Eu), + HB_CODEPOINT_ENCODE3 (0x1F28u, 0x0345u, 0x1F98u), HB_CODEPOINT_ENCODE3 (0x1F29u, 0x0300u, 0x1F2Bu), + HB_CODEPOINT_ENCODE3 (0x1F29u, 0x0301u, 0x1F2Du), HB_CODEPOINT_ENCODE3 (0x1F29u, 0x0342u, 0x1F2Fu), + HB_CODEPOINT_ENCODE3 (0x1F29u, 0x0345u, 0x1F99u), HB_CODEPOINT_ENCODE3 (0x1F2Au, 0x0345u, 0x1F9Au), + HB_CODEPOINT_ENCODE3 (0x1F2Bu, 0x0345u, 0x1F9Bu), HB_CODEPOINT_ENCODE3 (0x1F2Cu, 0x0345u, 0x1F9Cu), + HB_CODEPOINT_ENCODE3 (0x1F2Du, 0x0345u, 0x1F9Du), HB_CODEPOINT_ENCODE3 (0x1F2Eu, 0x0345u, 0x1F9Eu), + HB_CODEPOINT_ENCODE3 (0x1F2Fu, 0x0345u, 0x1F9Fu), HB_CODEPOINT_ENCODE3 (0x1F30u, 0x0300u, 0x1F32u), + HB_CODEPOINT_ENCODE3 (0x1F30u, 0x0301u, 0x1F34u), HB_CODEPOINT_ENCODE3 (0x1F30u, 0x0342u, 0x1F36u), + HB_CODEPOINT_ENCODE3 (0x1F31u, 0x0300u, 0x1F33u), HB_CODEPOINT_ENCODE3 (0x1F31u, 0x0301u, 0x1F35u), + HB_CODEPOINT_ENCODE3 (0x1F31u, 0x0342u, 0x1F37u), HB_CODEPOINT_ENCODE3 (0x1F38u, 0x0300u, 0x1F3Au), + HB_CODEPOINT_ENCODE3 (0x1F38u, 0x0301u, 0x1F3Cu), HB_CODEPOINT_ENCODE3 (0x1F38u, 0x0342u, 0x1F3Eu), + HB_CODEPOINT_ENCODE3 (0x1F39u, 0x0300u, 0x1F3Bu), HB_CODEPOINT_ENCODE3 (0x1F39u, 0x0301u, 0x1F3Du), + HB_CODEPOINT_ENCODE3 (0x1F39u, 0x0342u, 0x1F3Fu), HB_CODEPOINT_ENCODE3 (0x1F40u, 0x0300u, 0x1F42u), + HB_CODEPOINT_ENCODE3 (0x1F40u, 0x0301u, 0x1F44u), HB_CODEPOINT_ENCODE3 (0x1F41u, 0x0300u, 0x1F43u), + HB_CODEPOINT_ENCODE3 (0x1F41u, 0x0301u, 0x1F45u), HB_CODEPOINT_ENCODE3 (0x1F48u, 0x0300u, 0x1F4Au), + HB_CODEPOINT_ENCODE3 (0x1F48u, 0x0301u, 0x1F4Cu), HB_CODEPOINT_ENCODE3 (0x1F49u, 0x0300u, 0x1F4Bu), + HB_CODEPOINT_ENCODE3 (0x1F49u, 0x0301u, 0x1F4Du), HB_CODEPOINT_ENCODE3 (0x1F50u, 0x0300u, 0x1F52u), + HB_CODEPOINT_ENCODE3 (0x1F50u, 0x0301u, 0x1F54u), HB_CODEPOINT_ENCODE3 (0x1F50u, 0x0342u, 0x1F56u), + HB_CODEPOINT_ENCODE3 (0x1F51u, 0x0300u, 0x1F53u), HB_CODEPOINT_ENCODE3 (0x1F51u, 0x0301u, 0x1F55u), + HB_CODEPOINT_ENCODE3 (0x1F51u, 0x0342u, 0x1F57u), HB_CODEPOINT_ENCODE3 (0x1F59u, 0x0300u, 0x1F5Bu), + HB_CODEPOINT_ENCODE3 (0x1F59u, 0x0301u, 0x1F5Du), HB_CODEPOINT_ENCODE3 (0x1F59u, 0x0342u, 0x1F5Fu), + HB_CODEPOINT_ENCODE3 (0x1F60u, 0x0300u, 0x1F62u), HB_CODEPOINT_ENCODE3 (0x1F60u, 0x0301u, 0x1F64u), + HB_CODEPOINT_ENCODE3 (0x1F60u, 0x0342u, 0x1F66u), HB_CODEPOINT_ENCODE3 (0x1F60u, 0x0345u, 0x1FA0u), + HB_CODEPOINT_ENCODE3 (0x1F61u, 0x0300u, 0x1F63u), HB_CODEPOINT_ENCODE3 (0x1F61u, 0x0301u, 0x1F65u), + HB_CODEPOINT_ENCODE3 (0x1F61u, 0x0342u, 0x1F67u), HB_CODEPOINT_ENCODE3 (0x1F61u, 0x0345u, 0x1FA1u), + HB_CODEPOINT_ENCODE3 (0x1F62u, 0x0345u, 0x1FA2u), HB_CODEPOINT_ENCODE3 (0x1F63u, 0x0345u, 0x1FA3u), + HB_CODEPOINT_ENCODE3 (0x1F64u, 0x0345u, 0x1FA4u), HB_CODEPOINT_ENCODE3 (0x1F65u, 0x0345u, 0x1FA5u), + HB_CODEPOINT_ENCODE3 (0x1F66u, 0x0345u, 0x1FA6u), HB_CODEPOINT_ENCODE3 (0x1F67u, 0x0345u, 0x1FA7u), + HB_CODEPOINT_ENCODE3 (0x1F68u, 0x0300u, 0x1F6Au), HB_CODEPOINT_ENCODE3 (0x1F68u, 0x0301u, 0x1F6Cu), + HB_CODEPOINT_ENCODE3 (0x1F68u, 0x0342u, 0x1F6Eu), HB_CODEPOINT_ENCODE3 (0x1F68u, 0x0345u, 0x1FA8u), + HB_CODEPOINT_ENCODE3 (0x1F69u, 0x0300u, 0x1F6Bu), HB_CODEPOINT_ENCODE3 (0x1F69u, 0x0301u, 0x1F6Du), + HB_CODEPOINT_ENCODE3 (0x1F69u, 0x0342u, 0x1F6Fu), HB_CODEPOINT_ENCODE3 (0x1F69u, 0x0345u, 0x1FA9u), + HB_CODEPOINT_ENCODE3 (0x1F6Au, 0x0345u, 0x1FAAu), HB_CODEPOINT_ENCODE3 (0x1F6Bu, 0x0345u, 0x1FABu), + HB_CODEPOINT_ENCODE3 (0x1F6Cu, 0x0345u, 0x1FACu), HB_CODEPOINT_ENCODE3 (0x1F6Du, 0x0345u, 0x1FADu), + HB_CODEPOINT_ENCODE3 (0x1F6Eu, 0x0345u, 0x1FAEu), HB_CODEPOINT_ENCODE3 (0x1F6Fu, 0x0345u, 0x1FAFu), + HB_CODEPOINT_ENCODE3 (0x1F70u, 0x0345u, 0x1FB2u), HB_CODEPOINT_ENCODE3 (0x1F74u, 0x0345u, 0x1FC2u), + HB_CODEPOINT_ENCODE3 (0x1F7Cu, 0x0345u, 0x1FF2u), HB_CODEPOINT_ENCODE3 (0x1FB6u, 0x0345u, 0x1FB7u), + HB_CODEPOINT_ENCODE3 (0x1FBFu, 0x0300u, 0x1FCDu), HB_CODEPOINT_ENCODE3 (0x1FBFu, 0x0301u, 0x1FCEu), + HB_CODEPOINT_ENCODE3 (0x1FBFu, 0x0342u, 0x1FCFu), HB_CODEPOINT_ENCODE3 (0x1FC6u, 0x0345u, 0x1FC7u), + HB_CODEPOINT_ENCODE3 (0x1FF6u, 0x0345u, 0x1FF7u), HB_CODEPOINT_ENCODE3 (0x1FFEu, 0x0300u, 0x1FDDu), + HB_CODEPOINT_ENCODE3 (0x1FFEu, 0x0301u, 0x1FDEu), HB_CODEPOINT_ENCODE3 (0x1FFEu, 0x0342u, 0x1FDFu), + HB_CODEPOINT_ENCODE3 (0x2190u, 0x0338u, 0x219Au), HB_CODEPOINT_ENCODE3 (0x2192u, 0x0338u, 0x219Bu), + HB_CODEPOINT_ENCODE3 (0x2194u, 0x0338u, 0x21AEu), HB_CODEPOINT_ENCODE3 (0x21D0u, 0x0338u, 0x21CDu), + HB_CODEPOINT_ENCODE3 (0x21D2u, 0x0338u, 0x21CFu), HB_CODEPOINT_ENCODE3 (0x21D4u, 0x0338u, 0x21CEu), + HB_CODEPOINT_ENCODE3 (0x2203u, 0x0338u, 0x2204u), HB_CODEPOINT_ENCODE3 (0x2208u, 0x0338u, 0x2209u), + HB_CODEPOINT_ENCODE3 (0x220Bu, 0x0338u, 0x220Cu), HB_CODEPOINT_ENCODE3 (0x2223u, 0x0338u, 0x2224u), + HB_CODEPOINT_ENCODE3 (0x2225u, 0x0338u, 0x2226u), HB_CODEPOINT_ENCODE3 (0x223Cu, 0x0338u, 0x2241u), + HB_CODEPOINT_ENCODE3 (0x2243u, 0x0338u, 0x2244u), HB_CODEPOINT_ENCODE3 (0x2245u, 0x0338u, 0x2247u), + HB_CODEPOINT_ENCODE3 (0x2248u, 0x0338u, 0x2249u), HB_CODEPOINT_ENCODE3 (0x224Du, 0x0338u, 0x226Du), + HB_CODEPOINT_ENCODE3 (0x2261u, 0x0338u, 0x2262u), HB_CODEPOINT_ENCODE3 (0x2264u, 0x0338u, 0x2270u), + HB_CODEPOINT_ENCODE3 (0x2265u, 0x0338u, 0x2271u), HB_CODEPOINT_ENCODE3 (0x2272u, 0x0338u, 0x2274u), + HB_CODEPOINT_ENCODE3 (0x2273u, 0x0338u, 0x2275u), HB_CODEPOINT_ENCODE3 (0x2276u, 0x0338u, 0x2278u), + HB_CODEPOINT_ENCODE3 (0x2277u, 0x0338u, 0x2279u), HB_CODEPOINT_ENCODE3 (0x227Au, 0x0338u, 0x2280u), + HB_CODEPOINT_ENCODE3 (0x227Bu, 0x0338u, 0x2281u), HB_CODEPOINT_ENCODE3 (0x227Cu, 0x0338u, 0x22E0u), + HB_CODEPOINT_ENCODE3 (0x227Du, 0x0338u, 0x22E1u), HB_CODEPOINT_ENCODE3 (0x2282u, 0x0338u, 0x2284u), + HB_CODEPOINT_ENCODE3 (0x2283u, 0x0338u, 0x2285u), HB_CODEPOINT_ENCODE3 (0x2286u, 0x0338u, 0x2288u), + HB_CODEPOINT_ENCODE3 (0x2287u, 0x0338u, 0x2289u), HB_CODEPOINT_ENCODE3 (0x2291u, 0x0338u, 0x22E2u), + HB_CODEPOINT_ENCODE3 (0x2292u, 0x0338u, 0x22E3u), HB_CODEPOINT_ENCODE3 (0x22A2u, 0x0338u, 0x22ACu), + HB_CODEPOINT_ENCODE3 (0x22A8u, 0x0338u, 0x22ADu), HB_CODEPOINT_ENCODE3 (0x22A9u, 0x0338u, 0x22AEu), + HB_CODEPOINT_ENCODE3 (0x22ABu, 0x0338u, 0x22AFu), HB_CODEPOINT_ENCODE3 (0x22B2u, 0x0338u, 0x22EAu), + HB_CODEPOINT_ENCODE3 (0x22B3u, 0x0338u, 0x22EBu), HB_CODEPOINT_ENCODE3 (0x22B4u, 0x0338u, 0x22ECu), + HB_CODEPOINT_ENCODE3 (0x22B5u, 0x0338u, 0x22EDu), HB_CODEPOINT_ENCODE3 (0x2ADDu, 0x0338u, 0x0000u), + HB_CODEPOINT_ENCODE3 (0x3046u, 0x3099u, 0x3094u), HB_CODEPOINT_ENCODE3 (0x304Bu, 0x3099u, 0x304Cu), + HB_CODEPOINT_ENCODE3 (0x304Du, 0x3099u, 0x304Eu), HB_CODEPOINT_ENCODE3 (0x304Fu, 0x3099u, 0x3050u), + HB_CODEPOINT_ENCODE3 (0x3051u, 0x3099u, 0x3052u), HB_CODEPOINT_ENCODE3 (0x3053u, 0x3099u, 0x3054u), + HB_CODEPOINT_ENCODE3 (0x3055u, 0x3099u, 0x3056u), HB_CODEPOINT_ENCODE3 (0x3057u, 0x3099u, 0x3058u), + HB_CODEPOINT_ENCODE3 (0x3059u, 0x3099u, 0x305Au), HB_CODEPOINT_ENCODE3 (0x305Bu, 0x3099u, 0x305Cu), + HB_CODEPOINT_ENCODE3 (0x305Du, 0x3099u, 0x305Eu), HB_CODEPOINT_ENCODE3 (0x305Fu, 0x3099u, 0x3060u), + HB_CODEPOINT_ENCODE3 (0x3061u, 0x3099u, 0x3062u), HB_CODEPOINT_ENCODE3 (0x3064u, 0x3099u, 0x3065u), + HB_CODEPOINT_ENCODE3 (0x3066u, 0x3099u, 0x3067u), HB_CODEPOINT_ENCODE3 (0x3068u, 0x3099u, 0x3069u), + HB_CODEPOINT_ENCODE3 (0x306Fu, 0x3099u, 0x3070u), HB_CODEPOINT_ENCODE3 (0x306Fu, 0x309Au, 0x3071u), + HB_CODEPOINT_ENCODE3 (0x3072u, 0x3099u, 0x3073u), HB_CODEPOINT_ENCODE3 (0x3072u, 0x309Au, 0x3074u), + HB_CODEPOINT_ENCODE3 (0x3075u, 0x3099u, 0x3076u), HB_CODEPOINT_ENCODE3 (0x3075u, 0x309Au, 0x3077u), + HB_CODEPOINT_ENCODE3 (0x3078u, 0x3099u, 0x3079u), HB_CODEPOINT_ENCODE3 (0x3078u, 0x309Au, 0x307Au), + HB_CODEPOINT_ENCODE3 (0x307Bu, 0x3099u, 0x307Cu), HB_CODEPOINT_ENCODE3 (0x307Bu, 0x309Au, 0x307Du), + HB_CODEPOINT_ENCODE3 (0x309Du, 0x3099u, 0x309Eu), HB_CODEPOINT_ENCODE3 (0x30A6u, 0x3099u, 0x30F4u), + HB_CODEPOINT_ENCODE3 (0x30ABu, 0x3099u, 0x30ACu), HB_CODEPOINT_ENCODE3 (0x30ADu, 0x3099u, 0x30AEu), + HB_CODEPOINT_ENCODE3 (0x30AFu, 0x3099u, 0x30B0u), HB_CODEPOINT_ENCODE3 (0x30B1u, 0x3099u, 0x30B2u), + HB_CODEPOINT_ENCODE3 (0x30B3u, 0x3099u, 0x30B4u), HB_CODEPOINT_ENCODE3 (0x30B5u, 0x3099u, 0x30B6u), + HB_CODEPOINT_ENCODE3 (0x30B7u, 0x3099u, 0x30B8u), HB_CODEPOINT_ENCODE3 (0x30B9u, 0x3099u, 0x30BAu), + HB_CODEPOINT_ENCODE3 (0x30BBu, 0x3099u, 0x30BCu), HB_CODEPOINT_ENCODE3 (0x30BDu, 0x3099u, 0x30BEu), + HB_CODEPOINT_ENCODE3 (0x30BFu, 0x3099u, 0x30C0u), HB_CODEPOINT_ENCODE3 (0x30C1u, 0x3099u, 0x30C2u), + HB_CODEPOINT_ENCODE3 (0x30C4u, 0x3099u, 0x30C5u), HB_CODEPOINT_ENCODE3 (0x30C6u, 0x3099u, 0x30C7u), + HB_CODEPOINT_ENCODE3 (0x30C8u, 0x3099u, 0x30C9u), HB_CODEPOINT_ENCODE3 (0x30CFu, 0x3099u, 0x30D0u), + HB_CODEPOINT_ENCODE3 (0x30CFu, 0x309Au, 0x30D1u), HB_CODEPOINT_ENCODE3 (0x30D2u, 0x3099u, 0x30D3u), + HB_CODEPOINT_ENCODE3 (0x30D2u, 0x309Au, 0x30D4u), HB_CODEPOINT_ENCODE3 (0x30D5u, 0x3099u, 0x30D6u), + HB_CODEPOINT_ENCODE3 (0x30D5u, 0x309Au, 0x30D7u), HB_CODEPOINT_ENCODE3 (0x30D8u, 0x3099u, 0x30D9u), + HB_CODEPOINT_ENCODE3 (0x30D8u, 0x309Au, 0x30DAu), HB_CODEPOINT_ENCODE3 (0x30DBu, 0x3099u, 0x30DCu), + HB_CODEPOINT_ENCODE3 (0x30DBu, 0x309Au, 0x30DDu), HB_CODEPOINT_ENCODE3 (0x30EFu, 0x3099u, 0x30F7u), + HB_CODEPOINT_ENCODE3 (0x30F0u, 0x3099u, 0x30F8u), HB_CODEPOINT_ENCODE3 (0x30F1u, 0x3099u, 0x30F9u), + HB_CODEPOINT_ENCODE3 (0x30F2u, 0x3099u, 0x30FAu), HB_CODEPOINT_ENCODE3 (0x30FDu, 0x3099u, 0x30FEu), + HB_CODEPOINT_ENCODE3 (0xFB49u, 0x05C1u, 0x0000u), HB_CODEPOINT_ENCODE3 (0xFB49u, 0x05C2u, 0x0000u), + HB_CODEPOINT_ENCODE3 (0x11099u, 0x110BAu, 0x1109Au),HB_CODEPOINT_ENCODE3 (0x1109Bu, 0x110BAu, 0x1109Cu), + HB_CODEPOINT_ENCODE3 (0x110A5u, 0x110BAu, 0x110ABu),HB_CODEPOINT_ENCODE3 (0x11131u, 0x11127u, 0x1112Eu), + HB_CODEPOINT_ENCODE3 (0x11132u, 0x11127u, 0x1112Fu),HB_CODEPOINT_ENCODE3 (0x11347u, 0x1133Eu, 0x1134Bu), + HB_CODEPOINT_ENCODE3 (0x11347u, 0x11357u, 0x1134Cu),HB_CODEPOINT_ENCODE3 (0x114B9u, 0x114B0u, 0x114BCu), + HB_CODEPOINT_ENCODE3 (0x114B9u, 0x114BAu, 0x114BBu),HB_CODEPOINT_ENCODE3 (0x114B9u, 0x114BDu, 0x114BEu), + HB_CODEPOINT_ENCODE3 (0x115B8u, 0x115AFu, 0x115BAu),HB_CODEPOINT_ENCODE3 (0x115B9u, 0x115AFu, 0x115BBu), + HB_CODEPOINT_ENCODE3 (0x11935u, 0x11930u, 0x11938u), HB_CODEPOINT_ENCODE3 (0x1D157u, 0x1D165u, 0x0000u), + HB_CODEPOINT_ENCODE3 (0x1D158u, 0x1D165u, 0x0000u), HB_CODEPOINT_ENCODE3 (0x1D15Fu, 0x1D16Eu, 0x0000u), + HB_CODEPOINT_ENCODE3 (0x1D15Fu, 0x1D16Fu, 0x0000u), HB_CODEPOINT_ENCODE3 (0x1D15Fu, 0x1D170u, 0x0000u), + HB_CODEPOINT_ENCODE3 (0x1D15Fu, 0x1D171u, 0x0000u), HB_CODEPOINT_ENCODE3 (0x1D15Fu, 0x1D172u, 0x0000u), + HB_CODEPOINT_ENCODE3 (0x1D1B9u, 0x1D165u, 0x0000u), HB_CODEPOINT_ENCODE3 (0x1D1BAu, 0x1D165u, 0x0000u), + HB_CODEPOINT_ENCODE3 (0x1D1BBu, 0x1D16Eu, 0x0000u), HB_CODEPOINT_ENCODE3 (0x1D1BBu, 0x1D16Fu, 0x0000u), + HB_CODEPOINT_ENCODE3 (0x1D1BCu, 0x1D16Eu, 0x0000u), HB_CODEPOINT_ENCODE3 (0x1D1BCu, 0x1D16Fu, 0x0000u), +}; + +#ifndef HB_OPTIMIZE_SIZE + +static const uint8_t +_hb_ucd_u8[32480] = +{ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 27, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 28, + 29, 26, 30, 31, 32, 33, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 34, 35, 35, 35, 35, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 37, 38, 39, 40, + 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, + 26, 57, 58, 59, 59, 59, 59, 59, 26, 26, 60, 59, 59, 59, 59, 59, + 59, 59, 26, 61, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 26, 62, 59, 63, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 64, 26, 26, 65, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 66, 67, 59, 59, 59, 59, 68, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 69, 70, 71, 72, 73, 74, 59, 59, + 75, 76, 59, 59, 77, 59, 78, 79, 80, 81, 73, 82, 83, 84, 59, 59, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 85, 26, 26, 26, 26, 26, 26, 26, 86, 87, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 88, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 89, 59, 59, 59, 59, 59, 59, 26, 90, 59, 59, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 91, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 92, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 93, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 94, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 29, 21, 21, 21, 23, 21, 21, 21, 22, 18, 21, 25, 21, 17, 21, 21, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 21, 21, 25, 25, 25, 21, + 21, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 22, 21, 18, 24, 16, + 24, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 22, 25, 18, 25, 0, + 29, 21, 23, 23, 23, 23, 26, 21, 24, 26, 7, 20, 25, 1, 26, 24, + 26, 25, 15, 15, 24, 5, 21, 21, 24, 15, 7, 19, 15, 15, 15, 21, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 25, 9, 9, 9, 9, 9, 9, 9, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 25, 5, 5, 5, 5, 5, 5, 5, 5, + 9, 5, 9, 5, 9, 5, 9, 5, 9, 5, 9, 5, 9, 5, 9, 5, + 9, 5, 9, 5, 9, 5, 9, 5, 5, 9, 5, 9, 5, 9, 5, 9, + 5, 9, 5, 9, 5, 9, 5, 9, 5, 5, 9, 5, 9, 5, 9, 5, + 9, 5, 9, 5, 9, 5, 9, 5, 9, 9, 5, 9, 5, 9, 5, 5, + 5, 9, 9, 5, 9, 5, 9, 9, 5, 9, 9, 9, 5, 5, 9, 9, + 9, 9, 5, 9, 9, 5, 9, 9, 9, 5, 5, 5, 9, 9, 5, 9, + 9, 5, 9, 5, 9, 5, 9, 9, 5, 9, 5, 5, 9, 5, 9, 9, + 5, 9, 9, 9, 5, 9, 5, 9, 9, 5, 5, 7, 9, 5, 5, 5, + 7, 7, 7, 7, 9, 8, 5, 9, 8, 5, 9, 8, 5, 9, 5, 9, + 5, 9, 5, 9, 5, 9, 5, 9, 5, 9, 5, 9, 5, 5, 9, 5, + 5, 9, 8, 5, 9, 5, 9, 9, 9, 5, 9, 5, 9, 5, 9, 5, + 9, 5, 9, 5, 5, 5, 5, 5, 5, 5, 9, 9, 5, 9, 9, 5, + 5, 9, 5, 9, 9, 9, 9, 5, 9, 5, 9, 5, 9, 5, 9, 5, + 5, 5, 5, 5, 7, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 24, 24, 24, 24, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 6, 6, 6, 6, 6, 24, 24, 24, 24, 24, 24, 24, 6, 24, 6, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 9, 5, 9, 5, 6, 24, 9, 5, 2, 2, 6, 5, 5, 5, 21, 9, + 2, 2, 2, 2, 24, 24, 9, 21, 9, 9, 9, 2, 9, 2, 9, 9, + 5, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 2, 9, 9, 9, 9, 9, 9, 9, 9, 9, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 9, + 5, 5, 9, 9, 9, 5, 5, 5, 9, 5, 9, 5, 9, 5, 9, 5, + 5, 5, 5, 5, 9, 5, 25, 9, 5, 9, 9, 5, 5, 9, 9, 9, + 9, 5, 26, 12, 12, 12, 12, 12, 11, 11, 9, 5, 9, 5, 9, 5, + 9, 9, 5, 9, 5, 9, 5, 9, 5, 9, 5, 9, 5, 9, 5, 5, + 2, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 2, 2, 6, 21, 21, 21, 21, 21, 21, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 21, 17, 2, 2, 26, 26, 23, + 2, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 17, 12, + 21, 12, 12, 21, 12, 12, 21, 12, 2, 2, 2, 2, 2, 2, 2, 2, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2, 2, 2, 2, 7, + 7, 7, 7, 21, 21, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 1, 1, 1, 1, 1, 25, 25, 25, 21, 21, 23, 21, 21, 26, 26, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 21, 1, 2, 21, 21, + 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 12, 12, 12, 12, 12, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 21, 21, 21, 21, 7, 7, + 12, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 21, 7, 12, 12, 12, 12, 12, 12, 12, 1, 26, 12, + 12, 12, 12, 12, 12, 6, 6, 12, 12, 26, 12, 12, 12, 12, 7, 7, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 7, 7, 7, 26, 26, 7, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 2, 1, + 7, 12, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 2, 2, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 7, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 6, 6, 26, 21, 21, 21, 6, 2, 2, 12, 23, 23, + 7, 7, 7, 7, 7, 7, 12, 12, 12, 12, 6, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 6, 12, 12, 12, 6, 12, 12, 12, 12, 12, 2, 2, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 2, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 12, 12, 12, 2, 2, 21, 2, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 7, 7, 7, 7, 7, 2, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 1, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 10, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 12, 10, 12, 7, 10, 10, + 10, 12, 12, 12, 12, 12, 12, 12, 12, 10, 10, 10, 10, 12, 10, 10, + 7, 12, 12, 12, 12, 12, 12, 12, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 12, 12, 21, 21, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 21, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 12, 10, 10, 2, 7, 7, 7, 7, 7, 7, 7, 7, 2, 2, 7, + 7, 2, 2, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 2, 7, 7, 7, 7, 7, 7, + 7, 2, 7, 2, 2, 2, 7, 7, 7, 7, 2, 2, 12, 7, 10, 10, + 10, 12, 12, 12, 12, 2, 2, 10, 10, 2, 2, 10, 10, 12, 7, 2, + 2, 2, 2, 2, 2, 2, 2, 10, 2, 2, 2, 2, 7, 7, 2, 7, + 7, 7, 12, 12, 2, 2, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 7, 7, 23, 23, 15, 15, 15, 15, 15, 15, 26, 23, 7, 21, 12, 2, + 2, 12, 12, 10, 2, 7, 7, 7, 7, 7, 7, 2, 2, 2, 2, 7, + 7, 2, 7, 7, 2, 7, 7, 2, 7, 7, 2, 2, 12, 2, 10, 10, + 10, 12, 12, 2, 2, 2, 2, 12, 12, 2, 2, 12, 12, 12, 2, 2, + 2, 12, 2, 2, 2, 2, 2, 2, 2, 7, 7, 7, 7, 2, 7, 2, + 2, 2, 2, 2, 2, 2, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 12, 12, 7, 7, 7, 12, 21, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 12, 12, 10, 2, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2, 7, + 7, 7, 2, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 2, 7, 7, 2, 7, 7, 7, 7, 7, 2, 2, 12, 7, 10, 10, + 10, 12, 12, 12, 12, 12, 2, 12, 12, 10, 2, 10, 10, 12, 2, 2, + 7, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 21, 23, 2, 2, 2, 2, 2, 2, 2, 7, 12, 12, 12, 12, 12, 12, + 2, 12, 10, 10, 2, 7, 7, 7, 7, 7, 7, 7, 7, 2, 2, 7, + 7, 2, 7, 7, 2, 7, 7, 7, 7, 7, 2, 2, 12, 7, 10, 12, + 10, 12, 12, 12, 12, 2, 2, 10, 10, 2, 2, 10, 10, 12, 2, 2, + 2, 2, 2, 2, 2, 12, 12, 10, 2, 2, 2, 2, 7, 7, 2, 7, + 26, 7, 15, 15, 15, 15, 15, 15, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 12, 7, 2, 7, 7, 7, 7, 7, 7, 2, 2, 2, 7, 7, + 7, 2, 7, 7, 7, 7, 2, 2, 2, 7, 7, 2, 7, 2, 7, 7, + 2, 2, 2, 7, 7, 2, 2, 2, 7, 7, 7, 2, 2, 2, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2, 2, 2, 2, 10, 10, + 12, 10, 10, 2, 2, 2, 10, 10, 10, 2, 10, 10, 10, 12, 2, 2, + 7, 2, 2, 2, 2, 2, 2, 10, 2, 2, 2, 2, 2, 2, 2, 2, + 15, 15, 15, 26, 26, 26, 26, 26, 26, 23, 26, 2, 2, 2, 2, 2, + 12, 10, 10, 10, 12, 7, 7, 7, 7, 7, 7, 7, 7, 2, 7, 7, + 7, 2, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2, 2, 2, 7, 12, 12, + 12, 10, 10, 10, 10, 2, 12, 12, 12, 2, 12, 12, 12, 12, 2, 2, + 2, 2, 2, 2, 2, 12, 12, 2, 7, 7, 7, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 21, 15, 15, 15, 15, 15, 15, 15, 26, + 7, 12, 10, 10, 21, 7, 7, 7, 7, 7, 7, 7, 7, 2, 7, 7, + 7, 7, 7, 7, 2, 7, 7, 7, 7, 7, 2, 2, 12, 7, 10, 12, + 10, 10, 10, 10, 10, 2, 12, 10, 10, 2, 10, 10, 12, 12, 2, 2, + 2, 2, 2, 2, 2, 10, 10, 2, 2, 2, 2, 2, 2, 2, 7, 2, + 2, 7, 7, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 12, 12, 10, 10, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 12, 12, 7, 10, 10, + 10, 12, 12, 12, 12, 2, 10, 10, 10, 2, 10, 10, 10, 12, 7, 26, + 2, 2, 2, 2, 7, 7, 7, 10, 15, 15, 15, 15, 15, 15, 15, 7, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 26, 7, 7, 7, 7, 7, 7, + 2, 12, 10, 10, 2, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 2, 2, 2, 7, 7, 7, 7, 7, 7, + 7, 7, 2, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2, 7, 2, 2, + 7, 7, 7, 7, 7, 7, 7, 2, 2, 2, 12, 2, 2, 2, 2, 10, + 10, 10, 12, 12, 12, 2, 12, 2, 10, 10, 10, 10, 10, 10, 10, 10, + 2, 2, 10, 10, 21, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 12, 7, 7, 12, 12, 12, 12, 12, 12, 12, 2, 2, 2, 2, 23, + 7, 7, 7, 7, 7, 7, 6, 12, 12, 12, 12, 12, 12, 12, 12, 21, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 21, 21, 2, 2, 2, 2, + 2, 7, 7, 2, 7, 2, 7, 7, 7, 7, 7, 2, 7, 7, 7, 7, + 7, 7, 7, 7, 2, 7, 2, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 12, 7, 7, 12, 12, 12, 12, 12, 12, 12, 12, 12, 7, 2, 2, + 7, 7, 7, 7, 7, 2, 6, 2, 12, 12, 12, 12, 12, 12, 2, 2, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 2, 2, 7, 7, 7, 7, + 7, 26, 26, 26, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 26, 21, 26, 26, 26, 12, 12, 26, 26, 26, 26, 26, 26, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 26, 12, 26, 12, 26, 12, 22, 18, 22, 18, 10, 10, + 7, 7, 7, 7, 7, 7, 7, 7, 2, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2, 2, 2, + 2, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 10, + 12, 12, 12, 12, 12, 21, 12, 12, 7, 7, 7, 7, 7, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 2, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 2, 26, 26, + 26, 26, 26, 26, 26, 26, 12, 26, 26, 26, 26, 26, 26, 2, 26, 26, + 21, 21, 21, 21, 21, 26, 26, 26, 26, 21, 21, 2, 2, 2, 2, 2, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 10, 10, 12, 12, 12, + 12, 10, 12, 12, 12, 12, 12, 12, 10, 12, 12, 10, 10, 12, 12, 7, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 21, 21, 21, 21, 21, 21, + 7, 7, 7, 7, 7, 7, 10, 10, 12, 12, 7, 7, 7, 7, 12, 12, + 12, 7, 10, 10, 10, 7, 7, 10, 10, 10, 10, 10, 10, 10, 7, 7, + 7, 12, 12, 12, 12, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 12, 10, 10, 12, 12, 10, 10, 10, 10, 10, 10, 12, 7, 10, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 10, 10, 10, 12, 26, 26, + 9, 9, 9, 9, 9, 9, 2, 9, 2, 2, 2, 2, 2, 9, 2, 2, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 21, 6, 5, 5, 5, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 2, 7, 7, 7, 7, 2, 2, + 7, 7, 7, 7, 7, 7, 7, 2, 7, 2, 7, 7, 7, 7, 2, 2, + 7, 2, 7, 7, 7, 7, 2, 2, 7, 7, 7, 7, 7, 7, 7, 2, + 7, 2, 7, 7, 7, 7, 2, 2, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 2, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2, 2, 12, 12, 12, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 2, 2, 2, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 2, 2, 2, 2, 2, 2, + 9, 9, 9, 9, 9, 9, 2, 2, 5, 5, 5, 5, 5, 5, 2, 2, + 17, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 26, 21, 7, + 29, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 22, 18, 2, 2, 2, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 21, 21, 21, 14, 14, + 14, 7, 7, 7, 7, 7, 7, 7, 7, 2, 2, 2, 2, 2, 2, 2, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2, 7, 7, + 7, 7, 12, 12, 12, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 7, 7, 12, 12, 12, 21, 21, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 7, 7, 12, 12, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 7, 2, 12, 12, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 7, 7, 7, 7, 12, 12, 10, 12, 12, 12, 12, 12, 12, 12, 10, 10, + 10, 10, 10, 10, 10, 10, 12, 10, 10, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 21, 21, 21, 6, 21, 21, 21, 23, 7, 12, 2, 2, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 2, 2, 2, 2, 2, 2, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 2, 2, 2, 2, 2, 2, + 21, 21, 21, 21, 21, 21, 17, 21, 21, 21, 21, 12, 12, 12, 1, 2, + 7, 7, 7, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 2, 2, 2, 2, 2, 2, 2, + 7, 7, 7, 7, 7, 12, 12, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 12, 7, 2, 2, 2, 2, 2, + 7, 7, 7, 7, 7, 7, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2, + 12, 12, 12, 10, 10, 10, 10, 12, 12, 10, 10, 10, 2, 2, 2, 2, + 10, 10, 12, 10, 10, 10, 10, 10, 10, 12, 12, 12, 2, 2, 2, 2, + 26, 2, 2, 2, 21, 21, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2, 2, + 7, 7, 7, 7, 7, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2, 2, 2, 2, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2, 2, 2, 2, 2, 2, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 15, 2, 2, 2, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 7, 7, 7, 7, 7, 7, 7, 12, 12, 10, 10, 12, 2, 2, 21, 21, + 7, 7, 7, 7, 7, 10, 12, 10, 12, 12, 12, 12, 12, 12, 12, 2, + 12, 10, 12, 10, 10, 12, 12, 12, 12, 12, 12, 12, 12, 10, 10, 10, + 10, 10, 10, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 2, 2, 12, + 21, 21, 21, 21, 21, 21, 21, 6, 21, 21, 21, 21, 21, 21, 2, 2, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 11, 12, + 12, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 12, 12, 12, 12, 10, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 12, 10, 12, 12, 12, 12, 12, 10, 12, 10, 10, 10, + 10, 10, 12, 10, 10, 7, 7, 7, 7, 7, 7, 7, 2, 2, 2, 2, + 21, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 26, 26, 26, 26, 26, 26, 26, 26, 26, 2, 2, 2, + 12, 12, 10, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 10, 12, 12, 12, 12, 10, 10, 12, 12, 10, 12, 12, 12, 7, 7, + 7, 7, 7, 7, 7, 7, 12, 10, 12, 12, 10, 10, 10, 12, 10, 12, + 12, 12, 10, 10, 2, 2, 2, 2, 2, 2, 2, 2, 21, 21, 21, 21, + 7, 7, 7, 7, 10, 10, 10, 10, 10, 10, 10, 10, 12, 12, 12, 12, + 12, 12, 12, 12, 10, 10, 12, 12, 2, 2, 2, 21, 21, 21, 21, 21, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 2, 2, 2, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 21, 21, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 2, 2, 2, 2, 2, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 2, 2, 9, 9, 9, + 21, 21, 21, 21, 21, 21, 21, 21, 2, 2, 2, 2, 2, 2, 2, 2, + 12, 12, 12, 21, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 10, 12, 12, 12, 12, 12, 12, 12, 7, 7, 7, 7, 12, 7, 7, + 7, 7, 7, 7, 12, 7, 7, 10, 12, 12, 7, 2, 2, 2, 2, 2, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 6, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 2, 12, 12, 12, 12, 12, + 9, 5, 9, 5, 9, 5, 5, 5, 5, 5, 5, 5, 5, 5, 9, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 9, 9, 9, 9, 9, 9, 9, 9, + 5, 5, 5, 5, 5, 5, 2, 2, 9, 9, 9, 9, 9, 9, 2, 2, + 5, 5, 5, 5, 5, 5, 5, 5, 2, 9, 2, 9, 2, 9, 2, 9, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, + 5, 5, 5, 5, 5, 5, 5, 5, 8, 8, 8, 8, 8, 8, 8, 8, + 5, 5, 5, 5, 5, 2, 5, 5, 9, 9, 9, 9, 8, 24, 5, 24, + 24, 24, 5, 5, 5, 2, 5, 5, 9, 9, 9, 9, 8, 24, 24, 24, + 5, 5, 5, 5, 2, 2, 5, 5, 9, 9, 9, 9, 2, 24, 24, 24, + 5, 5, 5, 5, 5, 5, 5, 5, 9, 9, 9, 9, 9, 24, 24, 24, + 2, 2, 5, 5, 5, 2, 5, 5, 9, 9, 9, 9, 8, 24, 24, 2, + 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 1, 1, 1, 1, 1, + 17, 17, 17, 17, 17, 17, 21, 21, 20, 19, 22, 20, 20, 19, 22, 20, + 21, 21, 21, 21, 21, 21, 21, 21, 27, 28, 1, 1, 1, 1, 1, 29, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 20, 19, 21, 21, 21, 21, 16, + 16, 21, 21, 21, 25, 22, 18, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 25, 21, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 29, + 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 15, 6, 2, 2, 15, 15, 15, 15, 15, 15, 25, 25, 25, 22, 18, 6, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 25, 25, 25, 22, 18, 2, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 2, 2, 2, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 11, 11, 11, + 11, 12, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 26, 26, 9, 26, 26, 26, 26, 9, 26, 26, 5, 9, 9, 9, 5, 5, + 9, 9, 9, 5, 26, 9, 26, 26, 25, 9, 9, 9, 9, 9, 26, 26, + 26, 26, 26, 26, 9, 26, 9, 26, 9, 26, 9, 9, 9, 9, 26, 5, + 9, 9, 9, 9, 5, 7, 7, 7, 7, 5, 26, 26, 5, 5, 9, 9, + 25, 25, 25, 25, 25, 9, 5, 5, 5, 5, 26, 25, 26, 26, 5, 26, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 9, 5, 14, 14, 14, 14, 15, 26, 26, 2, 2, 2, 2, + 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 25, 25, 26, 26, 26, 26, + 25, 26, 26, 25, 26, 26, 25, 26, 26, 26, 26, 26, 26, 26, 25, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 25, 25, + 26, 26, 25, 26, 25, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 26, 26, 26, 26, 26, 26, 26, 26, 22, 18, 22, 18, 26, 26, 26, 26, + 25, 25, 26, 26, 26, 26, 26, 26, 26, 22, 18, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 25, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 25, 25, 25, 25, + 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 2, 2, 2, 2, 2, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 15, 15, 15, 15, 15, 15, + 26, 26, 26, 26, 26, 26, 26, 25, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 25, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 25, 25, 25, 25, 25, 25, 25, 25, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 25, + 26, 26, 26, 26, 26, 26, 26, 26, 22, 18, 22, 18, 22, 18, 22, 18, + 22, 18, 22, 18, 22, 18, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 25, 25, 25, 25, 25, 22, 18, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 22, 18, 22, 18, 22, 18, 22, 18, 22, 18, + 25, 25, 25, 22, 18, 22, 18, 22, 18, 22, 18, 22, 18, 22, 18, 22, + 18, 22, 18, 22, 18, 22, 18, 22, 18, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 22, 18, 22, 18, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 22, 18, 25, 25, + 25, 25, 25, 25, 25, 26, 26, 25, 25, 25, 25, 25, 25, 26, 26, 26, + 26, 26, 26, 26, 2, 2, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 2, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 2, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, + 9, 5, 9, 9, 9, 5, 5, 9, 5, 9, 5, 9, 5, 9, 9, 9, + 9, 5, 9, 5, 5, 9, 5, 5, 5, 5, 5, 5, 6, 6, 9, 9, + 9, 5, 9, 5, 5, 26, 26, 26, 26, 26, 26, 9, 5, 9, 5, 12, + 12, 12, 9, 5, 2, 2, 2, 2, 2, 21, 21, 21, 21, 15, 21, 21, + 5, 5, 5, 5, 5, 5, 2, 5, 2, 2, 2, 2, 2, 5, 2, 2, + 7, 7, 7, 7, 7, 7, 7, 7, 2, 2, 2, 2, 2, 2, 2, 6, + 21, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 12, + 7, 7, 7, 7, 7, 7, 7, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 7, 7, 7, 7, 7, 7, 7, 2, 7, 7, 7, 7, 7, 7, 7, 2, + 21, 21, 20, 19, 20, 19, 21, 21, 21, 20, 19, 21, 20, 19, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 17, 21, 21, 17, 21, 20, 19, 21, 21, + 20, 19, 22, 18, 22, 18, 22, 18, 22, 18, 21, 21, 21, 21, 21, 6, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 17, 17, 21, 21, 21, 21, + 17, 21, 22, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 26, 26, 21, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 2, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 26, 26, 26, 26, 26, 26, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 2, 2, 2, 2, + 29, 21, 21, 21, 26, 6, 7, 14, 22, 18, 22, 18, 22, 18, 22, 18, + 22, 18, 26, 26, 22, 18, 22, 18, 22, 18, 22, 18, 17, 22, 18, 18, + 26, 14, 14, 14, 14, 14, 14, 14, 14, 14, 12, 12, 12, 12, 10, 10, + 17, 6, 6, 6, 6, 6, 26, 26, 14, 14, 14, 6, 7, 21, 26, 26, + 7, 7, 7, 7, 7, 7, 7, 2, 2, 12, 12, 24, 24, 6, 6, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 21, 6, 6, 6, 7, + 2, 2, 2, 2, 2, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 26, 26, 15, 15, 15, 15, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 2, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 15, 15, 15, 15, 15, 15, 15, 15, + 26, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 7, 7, 7, 7, 7, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 6, 21, 21, 21, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 7, 7, 2, 2, 2, 2, + 9, 5, 9, 5, 9, 5, 9, 5, 9, 5, 9, 5, 9, 5, 7, 12, + 11, 11, 11, 21, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 21, 6, + 9, 5, 9, 5, 9, 5, 9, 5, 9, 5, 9, 5, 6, 6, 12, 12, + 7, 7, 7, 7, 7, 7, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 12, 12, 21, 21, 21, 21, 21, 21, 2, 2, 2, 2, 2, 2, 2, 2, + 24, 24, 24, 24, 24, 24, 24, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 24, 24, 9, 5, 9, 5, 9, 5, 9, 5, 9, 5, 9, 5, 9, 5, + 5, 5, 9, 5, 9, 5, 9, 5, 9, 5, 9, 5, 9, 5, 9, 5, + 6, 5, 5, 5, 5, 5, 5, 5, 5, 9, 5, 9, 5, 9, 9, 5, + 9, 5, 9, 5, 9, 5, 9, 5, 6, 24, 24, 9, 5, 9, 5, 7, + 9, 5, 9, 5, 5, 5, 9, 5, 9, 5, 9, 5, 9, 5, 9, 5, + 9, 5, 9, 5, 9, 5, 9, 5, 9, 5, 9, 9, 9, 9, 9, 5, + 9, 9, 9, 9, 9, 5, 9, 5, 9, 5, 9, 5, 9, 5, 9, 5, + 2, 2, 9, 5, 9, 9, 9, 9, 5, 9, 5, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 9, 5, 7, 6, 6, 5, 7, 7, 7, 7, 7, + 7, 7, 12, 7, 7, 7, 12, 7, 7, 7, 7, 12, 7, 7, 7, 7, + 7, 7, 7, 10, 10, 12, 12, 10, 26, 26, 26, 26, 12, 2, 2, 2, + 15, 15, 15, 15, 15, 15, 26, 26, 23, 26, 2, 2, 2, 2, 2, 2, + 7, 7, 7, 7, 21, 21, 21, 21, 2, 2, 2, 2, 2, 2, 2, 2, + 10, 10, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 12, 12, 2, 2, 2, 2, 2, 2, 2, 2, 21, 21, + 12, 12, 7, 7, 7, 7, 7, 7, 21, 21, 21, 7, 21, 7, 7, 12, + 7, 7, 7, 7, 7, 7, 12, 12, 12, 12, 12, 12, 12, 12, 21, 21, + 7, 7, 7, 7, 7, 7, 7, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 10, 10, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 21, + 7, 7, 7, 12, 10, 10, 12, 12, 12, 12, 10, 10, 12, 12, 10, 10, + 10, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 2, 6, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 2, 2, 2, 2, 21, 21, + 7, 7, 7, 7, 7, 12, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 7, 7, 7, 7, 7, 2, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 12, 12, 12, 12, 12, 12, 10, + 10, 12, 12, 10, 10, 12, 12, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 7, 7, 7, 12, 7, 7, 7, 7, 7, 7, 7, 7, 12, 10, 2, 2, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 2, 2, 21, 21, 21, 21, + 6, 7, 7, 7, 7, 7, 7, 26, 26, 26, 7, 10, 12, 10, 7, 7, + 12, 7, 12, 12, 12, 7, 7, 12, 12, 7, 7, 7, 7, 7, 12, 12, + 7, 12, 7, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 7, 7, 6, 21, 21, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 10, 12, 12, 10, 10, + 21, 21, 7, 6, 6, 10, 12, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 7, 7, 7, 7, 7, 7, 2, 2, 7, 7, 7, 7, 7, 7, 2, + 2, 7, 7, 7, 7, 7, 7, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 24, 6, 6, 6, 6, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 24, 24, 2, 2, 2, 2, + 7, 7, 7, 10, 10, 12, 10, 10, 12, 10, 10, 21, 10, 12, 2, 2, + 7, 7, 7, 7, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 7, 7, 7, 7, 7, 7, 7, 2, 2, 2, 2, 7, 7, 7, 7, 7, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 5, 5, 5, 5, 5, 5, 5, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 5, 5, 5, 5, 5, 2, 2, 2, 2, 2, 7, 12, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 25, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 2, 7, 7, 7, 7, 7, 2, 7, 2, + 7, 7, 2, 7, 7, 2, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 18, 22, + 2, 2, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 23, 26, 2, 2, + 21, 21, 21, 21, 21, 21, 21, 22, 18, 21, 2, 2, 2, 2, 2, 2, + 21, 17, 17, 16, 16, 22, 18, 22, 18, 22, 18, 22, 18, 22, 18, 22, + 18, 22, 18, 22, 18, 21, 21, 22, 18, 21, 21, 21, 21, 16, 16, 16, + 21, 21, 21, 2, 21, 21, 21, 21, 17, 22, 18, 22, 18, 22, 18, 21, + 21, 21, 25, 17, 25, 25, 25, 2, 21, 23, 21, 21, 2, 2, 2, 2, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2, 2, 1, + 2, 21, 21, 21, 23, 21, 21, 21, 22, 18, 21, 25, 21, 17, 21, 21, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 22, 25, 18, 25, 22, + 18, 21, 22, 18, 21, 21, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 6, 6, + 2, 2, 7, 7, 7, 7, 7, 7, 2, 2, 7, 7, 7, 7, 7, 7, + 2, 2, 7, 7, 7, 7, 7, 7, 2, 2, 7, 7, 7, 2, 2, 2, + 23, 23, 25, 24, 26, 23, 23, 2, 26, 25, 25, 25, 25, 26, 26, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 26, 26, 2, 2, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2, 7, 7, 2, 7, + 21, 21, 21, 2, 2, 2, 2, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 2, 2, 2, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 14, 14, 14, 14, 14, 15, 15, 15, 15, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 15, 15, 26, 26, 26, 2, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 2, 2, 2, + 26, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 12, 2, 2, + 12, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 2, 2, 2, 2, + 15, 15, 15, 15, 2, 2, 2, 2, 2, 2, 2, 2, 2, 7, 7, 7, + 7, 14, 7, 7, 7, 7, 7, 7, 7, 7, 14, 2, 2, 2, 2, 2, + 7, 7, 7, 7, 7, 7, 12, 12, 12, 12, 12, 2, 2, 2, 2, 2, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2, 21, + 7, 7, 7, 7, 2, 2, 2, 2, 7, 7, 7, 7, 7, 7, 7, 7, + 21, 14, 14, 14, 14, 14, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 9, 9, 9, 9, 9, 9, 9, 9, 5, 5, 5, 5, 5, 5, 5, 5, + 9, 9, 9, 9, 2, 2, 2, 2, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 2, 2, + 7, 7, 7, 7, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 21, + 7, 7, 7, 7, 7, 7, 2, 2, 7, 2, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 2, 7, 7, 2, 2, 2, 7, 2, 2, 7, + 7, 7, 7, 7, 7, 7, 2, 21, 15, 15, 15, 15, 15, 15, 15, 15, + 7, 7, 7, 7, 7, 7, 7, 26, 26, 15, 15, 15, 15, 15, 15, 15, + 2, 2, 2, 2, 2, 2, 2, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 7, 7, 7, 2, 7, 7, 2, 2, 2, 2, 2, 15, 15, 15, 15, 15, + 7, 7, 7, 7, 7, 7, 15, 15, 15, 15, 15, 15, 2, 2, 2, 21, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2, 2, 2, 2, 2, 21, + 7, 7, 7, 7, 7, 7, 7, 7, 2, 2, 2, 2, 15, 15, 7, 7, + 2, 2, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 7, 12, 12, 12, 2, 12, 12, 2, 2, 2, 2, 2, 12, 12, 12, 12, + 7, 7, 7, 7, 2, 7, 7, 7, 2, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 2, 2, 12, 12, 12, 2, 2, 2, 2, 12, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 2, 2, 2, 2, 2, 2, 2, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 2, 2, 2, 2, 2, 2, 2, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 15, 15, 21, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 15, 15, 15, + 7, 7, 7, 7, 7, 7, 7, 7, 26, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 12, 12, 2, 2, 2, 2, 15, 15, 15, 15, 15, + 21, 21, 21, 21, 21, 21, 21, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 7, 7, 7, 7, 7, 7, 2, 2, 2, 21, 21, 21, 21, 21, 21, 21, + 7, 7, 7, 7, 7, 7, 2, 2, 15, 15, 15, 15, 15, 15, 15, 15, + 7, 7, 7, 2, 2, 2, 2, 2, 15, 15, 15, 15, 15, 15, 15, 15, + 7, 7, 2, 2, 2, 2, 2, 2, 2, 21, 21, 21, 21, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 15, 15, 15, 15, 15, 15, 15, + 9, 9, 9, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 5, 5, 5, 2, 2, 2, 2, 2, 2, 2, 15, 15, 15, 15, 15, 15, + 7, 7, 7, 7, 12, 12, 12, 12, 2, 2, 2, 2, 2, 2, 2, 2, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 2, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2, 12, 12, 17, 2, 2, + 7, 7, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 15, 15, 15, 15, 15, 15, 15, 7, 2, 2, 2, 2, 2, 2, 2, 2, + 12, 15, 15, 15, 15, 21, 21, 21, 21, 21, 2, 2, 2, 2, 2, 2, + 7, 7, 7, 7, 7, 15, 15, 15, 15, 15, 15, 15, 2, 2, 2, 2, + 10, 12, 10, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 21, 21, 21, 21, 21, 21, 21, 2, 2, + 15, 15, 15, 15, 15, 15, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 12, + 10, 10, 10, 12, 12, 12, 12, 10, 10, 12, 12, 21, 21, 1, 21, 21, + 21, 21, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, + 12, 12, 12, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 12, 12, 12, 12, 12, 10, 12, 12, 12, + 12, 12, 12, 12, 12, 2, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 21, 21, 21, 21, 7, 10, 10, 7, 2, 2, 2, 2, 2, 2, 2, 2, + 7, 7, 7, 12, 21, 21, 7, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 7, 7, 7, 10, 10, 10, 12, 12, 12, 12, 12, 12, 12, 12, 12, 10, + 10, 7, 7, 7, 7, 21, 21, 21, 21, 12, 12, 12, 12, 21, 10, 12, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 7, 21, 7, 21, 21, 21, + 2, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 12, + 12, 12, 10, 10, 12, 10, 12, 12, 21, 21, 21, 21, 21, 21, 12, 2, + 7, 7, 7, 7, 7, 7, 7, 2, 7, 2, 7, 7, 7, 7, 2, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 21, 2, 2, 2, 2, 2, 2, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 12, + 10, 10, 10, 12, 12, 12, 12, 12, 12, 12, 12, 2, 2, 2, 2, 2, + 12, 12, 10, 10, 2, 7, 7, 7, 7, 7, 7, 7, 7, 2, 2, 7, + 7, 2, 7, 7, 2, 7, 7, 7, 7, 7, 2, 12, 12, 7, 10, 10, + 12, 10, 10, 10, 10, 2, 2, 10, 10, 2, 2, 10, 10, 10, 2, 2, + 7, 2, 2, 2, 2, 2, 2, 10, 2, 2, 2, 2, 2, 7, 7, 7, + 7, 7, 10, 10, 2, 2, 12, 12, 12, 12, 12, 12, 12, 2, 2, 2, + 12, 12, 12, 12, 12, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 7, 7, 7, 7, 7, 10, 10, 10, 12, 12, 12, 12, 12, 12, 12, 12, + 10, 10, 12, 12, 12, 10, 12, 7, 7, 7, 7, 21, 21, 21, 21, 21, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 21, 21, 2, 21, 12, 7, + 10, 10, 10, 12, 12, 12, 12, 12, 12, 10, 12, 10, 10, 10, 10, 12, + 12, 10, 12, 12, 7, 7, 21, 7, 2, 2, 2, 2, 2, 2, 2, 2, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 10, + 10, 10, 12, 12, 12, 12, 2, 2, 10, 10, 10, 10, 12, 12, 10, 12, + 12, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 7, 7, 7, 7, 12, 12, 2, 2, + 10, 10, 10, 12, 12, 12, 12, 12, 12, 12, 12, 10, 10, 12, 10, 12, + 12, 21, 21, 21, 7, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 2, 2, 2, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 12, 10, 12, 10, 10, + 12, 12, 12, 12, 12, 12, 10, 12, 7, 2, 2, 2, 2, 2, 2, 2, + 10, 10, 12, 12, 12, 12, 10, 12, 12, 12, 12, 12, 2, 2, 2, 2, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 15, 15, 21, 21, 21, 26, + 12, 12, 12, 12, 12, 12, 12, 12, 10, 12, 12, 21, 2, 2, 2, 2, + 15, 15, 15, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 7, + 7, 7, 7, 7, 7, 7, 7, 2, 2, 7, 2, 2, 7, 7, 7, 7, + 7, 7, 7, 7, 2, 7, 7, 2, 7, 7, 7, 7, 7, 7, 7, 7, + 10, 10, 10, 10, 10, 10, 2, 10, 10, 2, 2, 12, 12, 10, 12, 7, + 10, 7, 10, 12, 21, 21, 21, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 7, 7, 7, 7, 7, 7, 7, 7, 2, 2, 7, 7, 7, 7, 7, 7, + 7, 10, 10, 10, 12, 12, 12, 12, 2, 2, 12, 12, 10, 10, 10, 10, + 12, 7, 21, 7, 10, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 7, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 7, 7, 7, 7, 7, + 7, 7, 7, 12, 12, 12, 12, 12, 12, 10, 7, 12, 12, 12, 12, 21, + 21, 21, 21, 21, 21, 21, 21, 12, 2, 2, 2, 2, 2, 2, 2, 2, + 7, 12, 12, 12, 12, 12, 12, 10, 10, 12, 12, 12, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 10, 12, 12, 21, 21, 21, 7, 21, 21, + 21, 21, 21, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 12, 12, 12, 12, 12, 12, 12, 2, 12, 12, 12, 12, 12, 12, 10, 12, + 7, 21, 21, 21, 21, 21, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 21, 21, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 2, 2, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 2, 10, 12, 12, 12, 12, 12, 12, + 12, 10, 12, 12, 10, 12, 12, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 7, 7, 7, 7, 7, 7, 7, 2, 7, 7, 2, 7, 7, 7, 7, 7, + 7, 12, 12, 12, 12, 12, 12, 2, 2, 2, 12, 2, 12, 12, 2, 12, + 12, 12, 12, 12, 12, 12, 7, 12, 2, 2, 2, 2, 2, 2, 2, 2, + 7, 7, 7, 7, 7, 7, 2, 7, 7, 2, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10, 2, + 12, 12, 2, 10, 10, 12, 10, 12, 7, 2, 2, 2, 2, 2, 2, 2, + 7, 7, 7, 12, 12, 10, 10, 21, 21, 2, 2, 2, 2, 2, 2, 2, + 15, 15, 15, 15, 15, 26, 26, 26, 26, 26, 26, 26, 26, 23, 23, 23, + 23, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 21, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 2, + 21, 21, 21, 21, 21, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, + 12, 12, 12, 12, 12, 21, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 12, 12, 12, 12, 12, 12, 12, 21, 21, 21, 21, 21, 26, 26, 26, 26, + 6, 6, 6, 6, 21, 26, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 2, 15, 15, 15, 15, 15, + 15, 15, 2, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 2, 2, 2, 2, 2, 7, 7, 7, + 15, 15, 15, 15, 15, 15, 15, 21, 21, 21, 21, 2, 2, 2, 2, 2, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2, 2, 2, 2, 12, + 7, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 2, 2, 2, 2, 2, 2, 2, 12, + 12, 12, 12, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 21, 6, 12, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 10, 10, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 7, 7, 7, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 7, 7, 7, 7, 2, 2, 2, 2, 2, 2, 2, 2, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2, 2, 26, 12, 12, 21, + 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 26, 26, 26, 26, 26, 26, 26, 2, 2, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 10, 10, 12, 12, 12, 26, 26, 26, 10, 10, 10, + 10, 10, 10, 1, 1, 1, 1, 1, 1, 1, 1, 12, 12, 12, 12, 12, + 12, 12, 12, 26, 26, 12, 12, 12, 12, 12, 12, 12, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 12, 12, 12, 12, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 2, 2, 2, 2, 2, 2, 2, + 26, 26, 12, 12, 12, 26, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 15, 15, 15, 15, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 5, 5, + 5, 5, 5, 5, 5, 2, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 9, 9, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 9, 2, 9, 9, + 2, 2, 9, 2, 2, 9, 9, 2, 2, 9, 9, 9, 9, 2, 9, 9, + 9, 9, 9, 9, 9, 9, 5, 5, 5, 5, 2, 5, 2, 5, 5, 5, + 5, 5, 5, 5, 2, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 9, 9, 2, 9, 9, 9, 9, 2, 2, 9, 9, 9, + 9, 9, 9, 9, 9, 2, 9, 9, 9, 9, 9, 9, 9, 2, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 9, 9, 2, 9, 9, 9, 9, 2, + 9, 9, 9, 9, 9, 2, 9, 2, 2, 2, 9, 9, 9, 9, 9, 9, + 9, 2, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 2, 2, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 25, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 25, 5, 5, 5, 5, + 5, 5, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 25, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 25, 5, 5, 5, 5, 5, 5, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 25, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 25, + 5, 5, 5, 5, 5, 5, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 25, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 25, 5, 5, 5, 5, 5, 5, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 25, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 25, 5, 5, 5, 5, 5, 5, 9, 5, 2, 2, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 12, 12, 12, 12, 12, 12, 12, 26, 26, 26, 26, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 26, 26, 26, + 26, 26, 26, 26, 26, 12, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 12, 26, 26, 21, 21, 21, 21, 21, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 2, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 2, 2, 12, 12, 12, 12, 12, + 12, 12, 2, 12, 12, 2, 12, 12, 12, 12, 12, 2, 2, 2, 2, 2, + 12, 12, 12, 12, 12, 12, 12, 6, 6, 6, 6, 6, 6, 6, 2, 2, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 2, 2, 2, 2, 7, 26, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 12, 12, 12, 12, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 2, 2, 2, 2, 2, 23, + 7, 7, 7, 7, 7, 2, 2, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 12, 12, 12, 12, 12, 12, 12, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 5, 5, 5, 5, 12, 12, 12, 12, 12, 12, 12, 6, 2, 2, 2, 2, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 26, 15, 15, 15, + 23, 15, 15, 15, 15, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 26, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 2, 2, + 7, 7, 7, 7, 2, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 2, 7, 7, 2, 7, 2, 2, 7, 2, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 2, 7, 7, 7, 7, 2, 7, 2, 7, 2, 2, 2, 2, + 2, 2, 7, 2, 2, 2, 2, 7, 2, 7, 2, 7, 2, 7, 7, 7, + 2, 7, 7, 2, 7, 2, 2, 7, 2, 7, 2, 7, 2, 7, 2, 7, + 2, 7, 7, 2, 7, 2, 2, 7, 7, 7, 7, 2, 7, 7, 7, 7, + 7, 7, 7, 2, 7, 7, 7, 7, 2, 7, 7, 7, 7, 2, 7, 2, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2, 7, 7, 7, 7, 7, + 2, 7, 7, 7, 2, 7, 7, 7, 7, 7, 2, 7, 7, 7, 7, 7, + 25, 25, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 2, 2, + 2, 2, 2, 2, 2, 2, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 26, 26, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 24, 24, 24, 24, 24, + 26, 26, 26, 26, 26, 26, 26, 26, 2, 2, 2, 2, 2, 2, 2, 2, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 2, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 2, 26, 26, 26, + 26, 26, 26, 26, 26, 2, 2, 2, 26, 26, 26, 2, 2, 2, 2, 2, + 26, 26, 26, 2, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, + 0, 0, 0, 0, 0, 0, 1, 0, 0, 2, 0, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 12, 12, 13, 14, 12, 15, 16, 17, 18, 19, 20, + 21, 22, 0, 0, 0, 0, 23, 0, 0, 0, 0, 0, 0, 0, 24, 25, + 0, 26, 27, 0, 28, 29, 30, 31, 32, 33, 0, 34, 0, 0, 0, 0, + 0, 35, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 36, 37, 38, 0, 0, 0, 0, + 39, 40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 41, 42, 0, 0, + 43, 44, 45, 46, 0, 47, 0, 48, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 49, 0, 0, 0, 0, 0, 50, 0, 0, 0, + 0, 0, 0, 51, 0, 52, 53, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 54, 55, 0, 0, 0, 0, 56, 0, 0, 57, 58, 0, + 59, 60, 61, 62, 63, 64, 65, 0, 66, 67, 0, 68, 69, 70, 71, 0, + 60, 0, 72, 73, 74, 75, 0, 0, 69, 0, 76, 77, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 78, 79, 0, 0, 0, 0, 0, 0, 0, 0, 80, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 81, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 82, 83, 84, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 85, 0, 79, 0, 0, 86, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 87, 88, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 1, 0, 0, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 14, 15, 16, 17, 18, 19, 20, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 1, 21, 0, 0, 0, 0, 0, 22, 23, 24, + 0, 0, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 26, 27, + 28, 29, 0, 0, 0, 0, 30, 0, 0, 0, 31, 32, 33, 34, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 13, 35, 36, 0, 0, 26, 37, 38, 39, 0, 0, 0, 0, 0, 40, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 41, 1, + 42, 43, 44, 45, 0, 0, 0, 0, 0, 0, 0, 46, 0, 47, 48, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 46, 0, 47, 0, 0, + 0, 0, 0, 49, 0, 0, 0, 0, 0, 0, 0, 46, 0, 47, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 47, 50, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 51, 0, 47, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 52, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 53, 0, 54, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 55, 0, 56, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 57, 0, 0, 58, 59, 0, 0, 0, 0, + 0, 0, 60, 61, 62, 0, 0, 0, 0, 0, 0, 0, 63, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 65, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 36, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 66, + 0, 0, 0, 0, 0, 0, 67, 0, 0, 0, 67, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 52, 68, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 69, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 70, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 71, 72, 0, 0, 0, 0, 0, 0, 0, 0, + 73, 0, 66, 74, 0, 0, 0, 0, 0, 0, 75, 76, 72, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 46, 0, 67, 0, 0, 0, + 0, 77, 78, 0, 0, 0, 0, 0, 0, 79, 0, 0, 0, 0, 0, 0, + 80, 0, 79, 0, 0, 0, 0, 0, 0, 0, 64, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 81, 82, + 83, 84, 85, 86, 0, 0, 0, 0, 0, 0, 0, 0, 87, 88, 89, 1, + 1, 1, 90, 91, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 92, 93, + 94, 95, 96, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 71, 86, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 97, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 0, 0, 0, 0, 0, 98, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 99, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 71,100,101, 0, 0, 0, 26, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 86, 0,102, 0, 0, 0, 0, 67, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 67, 0, 0, 0, + 1, 1, 86, 0, 0, 0, 0, 0, 0,103, 0, 0, 0, 0,104, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,105, 0, 73, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,106,107,108, 0, 0, 0, + 0, 0,102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 47, 0, 0, 0, 0, 0,109, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0,110,111, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 36, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 72, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 26,112, 0,113, 0, 0, 0, 0, 0,114, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 115, 0, 0, 0, 0, 0, 0, 0,100, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0,116, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,117,118, 72, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,102, 0, 0, 0, + 0, 0, 0, 97, 0, 0, 0, 0, 0, 0, 0,119, 0, 0, 0, 0, + 0, 0, 0, 0,112, 0, 0, 0, 0, 0, 51, 0, 0, 0, 0, 0, + 0, 0,105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 73,120, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,121, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0,122, 0, 0, 0, 0, 0, 0, 0, 0, 0,123, 0, 47, 0, 0, + 26,124,124, 0, 0, 0, 0, 0, 0, 0, 0, 0,125, 0, 0, 49, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,126, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 97,127, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 97, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,128, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0,104, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,129,105, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 73, 0, 0, 0, 0, 0, 0, 0, 0, 0, 67, 0, 97, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0,130, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,131, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 97, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0,132, 0, 0, 0, 0, 0, 0, 0,133, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0,134, 0, 0, 0, 0,135, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 136,137,138,139,140,141, 0, 0, 0,142, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,143, 0, 0, 0, + 0, 0, 0, 0,133, 1, 1,144,145,112, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0,100, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,146, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,100,147, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,230,230,230,230, + 230,230,230,230,230,230,230,230,230,232,220,220,220,220,232,216, + 220,220,220,220,220,202,202,220,220,220,220,202,202,220,220,220, + 220,220,220,220,220,220,220,220, 1, 1, 1, 1, 1,220,220,220, + 220,230,230,230,230,230,230,230,230,240,230,220,220,220,230,230, + 230,220,220, 0,230,230,230,220,220,220,220,230,232,220,220,230, + 233,234,234,233,234,234,233,230,230,230,230,230, 0, 0, 0,230, + 230,230,230,230, 0,220,230,230,230,230,220,230,230,230,222,220, + 230,230,230,230,230,230,220,220,220,220,220,220,230,230,220,230, + 230,222,228,230, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 19, 20, + 21, 22, 0, 23, 0, 24, 25, 0,230,220, 0, 18, 30, 31, 32, 0, + 0, 0, 0, 0, 0, 0, 0, 27, 28, 29, 30, 31, 32, 33, 34,230, + 230,220,220,230,230,230,230,230,220,230,230,220, 35, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,230,230,230,230,230,230, + 230, 0, 0,230,230,230,230,220,230, 0, 0,230,230, 0,220,230, + 230,220, 0, 0, 0, 36, 0, 0, 0, 0, 0, 0,230,220,230,230, + 220,230,230,220,220,220,230,220,220,230,220,230,230,230,220,230, + 220,230,220,230,220,230,230, 0, 0, 0, 0, 0,230,230,220,230, + 0, 0, 0, 0, 0, 0, 0, 0, 0,220, 0, 0,230,230, 0,230, + 230,230,230,230,230,230,230,230, 0,230,230,230, 0,230,230,230, + 230,230, 0, 0, 0,220,220,220, 0, 0, 0, 0, 0, 0, 0,220, + 230,230,230,230,230,230, 0,220,230,230,220,230,230,220,230,230, + 230,220,220,220, 27, 28, 29,230,230,230,220,230,230,220,220,230, + 230,230,230,230, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, + 0, 9, 0, 0, 0,230,220,230,230, 0, 0, 0, 0, 0, 0, 0, + 0, 0,230, 0, 0, 0, 0, 0, 0, 84, 91, 0, 0, 0, 0, 9, + 9, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0, 0,103,103, 9, 0, + 0, 0, 0, 0,107,107,107,107, 0, 0, 0, 0,118,118, 9, 0, + 0, 0, 0, 0,122,122,122,122, 0, 0, 0, 0,220,220, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0,220, 0,220, 0,216, 0, 0, + 0, 0, 0, 0, 0,129,130, 0,132, 0, 0, 0, 0, 0,130,130, + 130,130, 0, 0,130, 0,230,230, 9, 0,230,230, 0, 0, 0, 0, + 0, 0,220, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 9, 9, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0,230,230,230, 0, 0, 0, 0, + 9, 0, 0, 0, 0, 0, 0, 0, 0,230, 0, 0, 0,228, 0, 0, + 0, 0, 0, 0, 0,222,230,220, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0,230,220, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, + 0, 0, 0, 0,230,230,230,230,230, 0, 0,220,230,230,230,230, + 230,220,220,220,220,220,220,230,230,220, 0,220, 0, 0, 0,230, + 220,230,230,230,230,230,230,230, 0, 0, 0, 0, 0, 0, 9, 9, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0,230,230,230, 0, + 1,220,220,220,220,220,230,230,220,220,220,220,230, 0, 1, 1, + 1, 1, 1, 1, 1, 0, 0, 0, 0,220, 0, 0, 0, 0, 0, 0, + 230, 0, 0, 0,230,230, 0, 0, 0, 0, 0, 0,230,230,220,230, + 230,230,230,230,230,230,220,230,230,234,214,220,202,230,230,230, + 230,230,230,230,230,230,230,230,230,230,232,228,228,220, 0,230, + 233,220,230,220,230,230, 1, 1,230,230,230,230, 1, 1, 1,230, + 230, 0, 0, 0, 0,230, 0, 0, 0, 1, 1,230,220,230, 1, 1, + 220,220,220,220,230, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 9, 0, 0,218,228,232,222,224,224, 0, 8, 8, 0, + 0, 0, 0, 0, 0, 0, 0, 0,230,230,230,230,230,230,230,230, + 230,230, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0,220, + 220,220, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 7, + 0, 0, 0, 0,230, 0,230,230,220, 0, 0,230,230, 0, 0, 0, + 0, 0,230,230, 0,230, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 26, 0,230,230,230,230,230,230,230,220,220,220,220,220, + 220,220,230,230,230,230,230, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0,220, 0,230,230, 1,220, 0, 0, 0, 0, 9, 0, 0, 0, 0, + 0,230,220, 0, 0, 0, 0,230,230, 0, 0, 0, 0, 0, 0, 0, + 0, 0,220,220,230,230,230,220,230,220,220,220, 0, 9, 7, 0, + 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 9, 7, 0, 0, 7, 9, 0, 0, 0, 0, 0, 0, 0, 0, 7, + 7, 0, 0, 0,230,230,230,230,230, 0, 0, 0, 0, 0, 9, 0, + 0, 0, 7, 0, 0, 0, 9, 7, 0, 0, 0, 0, 7, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 7, 0, 0, 0, 0, + 0, 9, 9, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, + 9, 9, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0,230,230,230,230, + 230,230,230, 0, 6, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, 0,216,216, 1, 1, 1, 0, 0, + 0,226,216,216,216,216,216, 0, 0, 0, 0, 0, 0, 0, 0,220, + 220,220,220,220,220,220,220, 0, 0,230,230,230,230,230,220,220, + 0, 0, 0, 0, 0, 0,230,230,230,230, 0, 0, 0, 0,230,230, + 230, 0, 0, 0,230, 0, 0,230,230,230,230,230,230,230, 0,230, + 230, 0,230,230,220,220,220,220,220,220,220, 0,230,230, 7, 0, + 0, 0, 0, 0, 16, 17, 17, 17, 17, 17, 17, 33, 17, 17, 17, 19, + 17, 17, 17, 17, 20,101, 17,113,129,169, 17, 27, 28, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17,237, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, + 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 3, 4, 0, 0, 0, 0, + 0, 0, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, + 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 7, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 10, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 10, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 11, 12, 0, 13, 0, 14, 15, 16, 0, 0, + 0, 0, 0, 1, 17, 18, 0, 19, 7, 1, 0, 0, 0, 20, 20, 7, + 20, 20, 20, 20, 20, 20, 20, 8, 21, 0, 22, 0, 7, 23, 24, 0, + 20, 20, 25, 0, 0, 0, 26, 27, 1, 7, 20, 20, 20, 20, 20, 1, + 28, 29, 30, 31, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 10, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 20, + 20, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 8, 21, 32, 4, 0, 10, 0, 33, 7, 20, 20, 20, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 8, 34, 34, 35, 36, 34, 37, 0, 38, 1, 20, 20, + 0, 0, 39, 0, 1, 1, 0, 8, 21, 1, 20, 0, 0, 0, 1, 0, + 0, 40, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 21, + 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, + 0, 0, 26, 34, 34, 34, 34, 34, 34, 34, 34, 34, 21, 7, 20, 41, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 21, 0, 42, 43, 44, 0, 45, + 0, 8, 21, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 46, 7, 1, 10, 1, 0, 0, 0, 1, 20, 20, 1, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 20, 20, 1, 20, 20, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 26, 21, 0, 1, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 2, 0, 0, 0, 0, + 0, 0, 3, 4, 0, 0, 0, 0, 0, 0, 3, 47, 48, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, + 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 3, 4, 0, 0, 0, 0, + 0, 0, 3, 4, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 17, 19, 20, 21, 22, 23, 24, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 27, 28, 28, 29, 30, 31, 32, 33, 33, 33, 33, 33, 33, + 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 34, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, + 52, 53, 54, 55, 56, 57, 58, 35, 35, 35, 35, 35, 59, 59, 60, 35, + 35, 35, 35, 35, 35, 35, 61, 62, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 63, 64, 35, 65, 66, 66, 66, 66, + 66, 66, 66, 66, 66, 66, 66, 67, 66, 68, 69, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 70, 71, 35, 35, + 35, 35, 72, 35, 35, 35, 35, 35, 35, 35, 35, 35, 73, 74, 75, 76, + 77, 78, 35, 35, 79, 80, 35, 35, 81, 35, 82, 83, 84, 85, 17, 86, + 87, 88, 35, 35, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 89, 25, 25, 25, 25, 25, 25, 25, 90, + 91, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 92, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 93, 35, 35, 35, 35, 35, 35, + 25, 94, 35, 35, 25, 25, 25, 25, 25, 25, 25, 25, 25, 95, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 96, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 19, 0, 0, 0, 0, 0, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 0, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 0, + 0, 0, 0, 0, 0, 0, 19, 19, 19, 19, 19, 0, 0, 0, 0, 0, + 26, 26, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 9, 9, 9, 9, 0, 9, 9, 9, 2, 2, + 9, 9, 9, 9, 0, 9, 2, 2, 2, 2, 9, 0, 9, 0, 9, 9, + 9, 2, 9, 2, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 2, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 1, 1, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 2, 2, 4, 4, 4, 2, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 2, 2, + 2, 2, 2, 2, 2, 2, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 2, 2, 2, 2, 14, 14, 14, 14, 14, 14, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, + 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 0, 3, 2, 3, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 0, 3, 3, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 2, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, + 37, 2, 2, 37, 37, 37, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + 64, 2, 2, 64, 64, 64, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, + 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, + 90, 90, 90, 90, 2, 2, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, + 90, 90, 90, 90, 90, 2, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, + 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, + 95, 95, 2, 2, 95, 2, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, + 37, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 1, 1, 1, 1, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 5, 5, 5, 5, 2, 5, 5, 5, 5, 5, + 5, 5, 5, 2, 2, 5, 5, 2, 2, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, + 5, 5, 5, 5, 5, 5, 5, 2, 5, 2, 2, 2, 5, 5, 5, 5, + 2, 2, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 5, 5, 2, + 2, 5, 5, 5, 5, 2, 2, 2, 2, 2, 2, 2, 2, 5, 2, 2, + 2, 2, 5, 5, 2, 5, 5, 5, 5, 5, 2, 2, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 2, 2, 11, 11, 11, 2, 11, 11, 11, 11, 11, + 11, 2, 2, 2, 2, 11, 11, 2, 2, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 2, + 11, 11, 11, 11, 11, 11, 11, 2, 11, 11, 2, 11, 11, 2, 11, 11, + 2, 2, 11, 2, 11, 11, 11, 11, 11, 2, 2, 2, 2, 11, 11, 2, + 2, 11, 11, 11, 2, 2, 2, 11, 2, 2, 2, 2, 2, 2, 2, 11, + 11, 11, 11, 2, 11, 2, 2, 2, 2, 2, 2, 2, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 10, 10, 10, 2, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 2, 10, 10, 10, 2, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 2, + 10, 10, 10, 10, 10, 10, 10, 2, 10, 10, 2, 10, 10, 10, 10, 10, + 2, 2, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 2, 10, 10, 10, + 2, 10, 10, 10, 2, 2, 10, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 10, 10, 10, 10, 2, 2, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 2, 2, 2, 2, 2, 2, 2, 10, + 10, 10, 10, 10, 10, 10, 2, 21, 21, 21, 2, 21, 21, 21, 21, 21, + 21, 21, 21, 2, 2, 21, 21, 2, 2, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 2, + 21, 21, 21, 21, 21, 21, 21, 2, 21, 21, 2, 21, 21, 21, 21, 21, + 2, 2, 21, 21, 21, 21, 21, 21, 21, 21, 21, 2, 2, 21, 21, 2, + 2, 21, 21, 21, 2, 2, 2, 2, 2, 2, 2, 21, 21, 21, 2, 2, + 2, 2, 21, 21, 2, 21, 21, 21, 21, 21, 2, 2, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 22, 22, 2, 22, 22, 22, 22, 22, + 22, 2, 2, 2, 22, 22, 22, 2, 22, 22, 22, 22, 2, 2, 2, 22, + 22, 2, 22, 2, 22, 22, 2, 2, 2, 22, 22, 2, 2, 2, 22, 22, + 22, 2, 2, 2, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 2, 2, 2, 2, 22, 22, 22, 22, 22, 2, 2, 2, 22, 22, 22, 2, + 22, 22, 22, 22, 2, 2, 22, 2, 2, 2, 2, 2, 2, 22, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 2, 2, 2, 2, 2, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 2, 23, 23, 23, 2, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 2, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 2, 2, 2, 23, 23, 23, 23, 23, 23, 23, 23, 2, 23, 23, 23, 2, + 23, 23, 23, 23, 2, 2, 2, 2, 2, 2, 2, 23, 23, 2, 23, 23, + 23, 2, 2, 2, 2, 2, 23, 23, 23, 23, 2, 2, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 2, 2, 2, 2, 2, 2, 2, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 2, 16, 16, 16, 2, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 2, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 2, 16, 16, 16, 16, 16, + 2, 2, 16, 16, 16, 16, 16, 16, 16, 16, 16, 2, 16, 16, 16, 2, + 16, 16, 16, 16, 2, 2, 2, 2, 2, 2, 2, 16, 16, 2, 2, 2, + 2, 2, 2, 2, 16, 2, 16, 16, 16, 16, 2, 2, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 2, 16, 16, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 2, 20, 20, 20, 2, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 2, 20, 20, 20, 2, + 20, 20, 20, 20, 20, 20, 2, 2, 2, 2, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 2, 2, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 2, 36, 36, 36, 2, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 2, 2, 2, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 2, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 2, 36, 2, 2, 36, 36, 36, 36, 36, 36, 36, 2, 2, 2, + 36, 2, 2, 2, 2, 36, 36, 36, 36, 36, 36, 2, 36, 2, 36, 36, + 36, 36, 36, 36, 36, 36, 2, 2, 2, 2, 2, 2, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 2, 2, 36, 36, 36, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 2, 2, 2, 2, 0, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 2, 2, 2, 2, 2, 18, 18, 2, 18, 2, 18, 18, 18, 18, + 18, 2, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 2, 18, 2, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 2, 2, 18, 18, 18, 18, 18, 2, 18, 2, 18, 18, + 18, 18, 18, 18, 2, 2, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 2, 2, 18, 18, 18, 18, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 2, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 2, 2, 2, 2, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 2, 25, 25, 25, 25, 25, 25, 25, 0, 0, 0, 0, 25, + 25, 2, 2, 2, 2, 2, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, + 33, 33, 33, 33, 33, 33, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2, 8, 2, 2, + 2, 2, 2, 8, 2, 2, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 0, 8, 8, 8, 8, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, + 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 2, + 30, 30, 30, 30, 2, 2, 30, 30, 30, 30, 30, 30, 30, 2, 30, 2, + 30, 30, 30, 30, 2, 2, 30, 2, 30, 30, 30, 30, 2, 2, 30, 30, + 30, 30, 30, 30, 30, 2, 30, 2, 30, 30, 30, 30, 2, 2, 30, 30, + 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 2, 30, 30, + 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, + 30, 2, 2, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, + 30, 30, 30, 2, 2, 2, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, + 2, 2, 2, 2, 2, 2, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, + 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 2, 2, 29, 29, + 29, 29, 29, 29, 2, 2, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, + 28, 28, 28, 28, 28, 28, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 2, 2, 2, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 0, 0, 0, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, 2, + 2, 2, 2, 2, 2, 2, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 2, 45, 45, 45, 45, 45, 45, 45, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, + 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 0, 0, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, + 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, + 46, 46, 46, 2, 46, 46, 46, 2, 46, 46, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, + 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, + 31, 31, 31, 31, 2, 2, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, + 2, 2, 2, 2, 2, 2, 32, 32, 0, 0, 32, 0, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 2, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, + 2, 2, 2, 2, 2, 2, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 2, + 2, 2, 2, 2, 2, 2, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, + 32, 2, 2, 2, 2, 2, 28, 28, 28, 28, 28, 28, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 2, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 2, 2, 2, 2, 48, 2, 2, 2, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, + 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, + 52, 52, 52, 52, 2, 2, 52, 52, 52, 52, 52, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, + 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, + 58, 58, 2, 2, 2, 2, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, + 2, 2, 2, 2, 2, 2, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, + 58, 2, 2, 2, 58, 58, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 2, 2, 54, 54, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, + 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, + 91, 91, 91, 91, 91, 2, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, + 91, 91, 91, 2, 2, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, + 2, 2, 2, 2, 2, 2, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, + 91, 91, 91, 91, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 2, 2, 2, 2, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 2, 2, 2, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, + 76, 76, 76, 76, 76, 76, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, + 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 2, 2, 2, 2, 2, 2, + 2, 2, 93, 93, 93, 93, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, + 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 2, 2, + 2, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, + 2, 2, 2, 70, 70, 70, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, + 73, 73, 73, 73, 73, 73, 6, 6, 6, 6, 6, 6, 6, 6, 6, 2, + 2, 2, 2, 2, 2, 2, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 2, 2, 8, 8, 8, 76, 76, 76, 76, 76, 76, 76, 76, 2, 2, + 2, 2, 2, 2, 2, 2, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, + 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, + 0, 2, 2, 2, 2, 2, 19, 19, 19, 19, 19, 19, 9, 9, 9, 9, + 9, 6, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 9, 9, 9, 9, 9, 19, 19, 19, 19, 9, 9, 9, 9, + 9, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 6, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 9, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 1, 1, 1, 1, 1, 9, 9, 9, 9, 9, 9, 2, 2, 9, 9, + 9, 9, 9, 9, 2, 2, 9, 9, 9, 9, 9, 9, 9, 9, 2, 9, + 2, 9, 2, 9, 2, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 2, 2, 9, 9, 9, 9, 9, 2, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 2, 2, 9, 9, 9, 9, + 9, 9, 2, 9, 9, 9, 2, 2, 9, 9, 9, 2, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 19, 2, 2, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 19, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 2, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 2, 2, 2, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, + 19, 19, 0, 0, 0, 0, 0, 0, 19, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 19, 0, 19, 19, 19, 19, 19, 19, 19, 19, 19, 0, + 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2, 2, 2, 2, 2, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 2, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 2, 2, 2, 2, 2, 55, + 55, 55, 55, 55, 55, 55, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, + 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 2, 2, + 2, 2, 2, 2, 2, 61, 61, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 61, 30, 30, 30, 30, 30, 30, 30, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 30, 30, 30, 30, 30, 30, 30, 2, 30, 30, + 30, 30, 30, 30, 30, 2, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 2, 13, 13, 13, 13, 13, 13, 13, 13, 13, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 13, 13, 13, 13, 13, 13, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 2, 2, 2, 2, 0, 0, 0, 0, 0, 13, 0, 13, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 1, 1, 1, 1, 12, 12, 0, 0, 0, 0, 0, 0, 0, 0, 13, 13, + 13, 13, 0, 0, 0, 0, 2, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 2, 2, 1, + 1, 0, 0, 15, 15, 15, 0, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 0, 0, 17, 17, 17, 2, 2, 2, 2, 2, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 2, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 2, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 0, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 0, 17, 17, 17, 17, 17, 17, 17, 17, 0, 0, + 0, 0, 0, 0, 0, 0, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 2, 2, 2, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 39, 39, 2, 2, 2, 39, 39, 39, 39, 39, 39, 39, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 86, 86, 86, 86, 86, 86, 86, 86, 86, 86, + 86, 86, 86, 86, 86, 86, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, + 77, 77, 2, 2, 2, 2, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, + 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 79, 2, 2, + 2, 2, 2, 2, 2, 2, 0, 0, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 0, 0, + 0, 19, 19, 19, 19, 19, 2, 2, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, + 60, 60, 60, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 2, 2, 2, 2, 2, 2, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, + 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 2, 2, + 2, 2, 2, 2, 2, 2, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, + 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 2, 2, 2, 2, + 2, 2, 2, 2, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, + 2, 2, 2, 2, 2, 2, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, + 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, + 69, 69, 69, 69, 0, 69, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, + 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 74, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 2, 2, 2, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 84, 2, 0, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, + 2, 2, 2, 2, 84, 84, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, + 33, 33, 33, 33, 33, 2, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, + 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, + 68, 68, 68, 68, 2, 2, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, + 2, 2, 68, 68, 68, 68, 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, + 92, 92, 92, 92, 92, 92, 92, 92, 92, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 92, 92, 92, 92, 92, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 30, 30, 30, 30, 30, 30, 2, 2, 30, + 30, 30, 30, 30, 30, 2, 2, 30, 30, 30, 30, 30, 30, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 0, 19, 19, 19, 19, 19, 19, 19, 19, 19, 9, 19, 19, 19, 19, + 0, 0, 2, 2, 2, 2, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 2, 2, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, + 2, 2, 2, 2, 2, 2, 12, 12, 12, 12, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 12, 12, 12, 12, 12, 12, 12, 2, 2, 2, + 2, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 2, 2, 2, 2, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 2, 2, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 2, 2, 2, 2, 2, 2, 19, 19, 19, 19, 19, 19, 19, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 2, 2, + 2, 2, 2, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 2, 14, 14, + 14, 14, 14, 2, 14, 2, 14, 14, 2, 14, 14, 2, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 0, 0, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 6, 6, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, + 0, 0, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 2, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 0, 0, 2, 2, 12, 12, 12, 12, 12, 12, 2, 2, + 12, 12, 12, 12, 12, 12, 2, 2, 12, 12, 12, 12, 12, 12, 2, 2, + 12, 12, 12, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, + 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, + 0, 0, 0, 0, 2, 2, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, + 49, 49, 2, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, + 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 2, 49, 49, + 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, + 49, 2, 49, 49, 2, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, + 49, 49, 49, 49, 2, 2, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, + 49, 2, 2, 2, 2, 2, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 2, 2, 2, 9, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 1, 2, 2, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, + 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, + 71, 71, 71, 2, 2, 2, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, + 67, 67, 67, 67, 67, 67, 67, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 42, 42, 42, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, + 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, + 41, 2, 2, 2, 2, 2,118,118,118,118,118,118,118,118,118,118, + 118,118,118,118,118,118,118,118,118,118,118,118,118,118,118,118, + 118, 2, 2, 2, 2, 2, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, + 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, + 53, 53, 53, 53, 2, 53, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 2, 2, 2, 2, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, + 40, 40, 40, 40, 40, 40, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, + 51, 51, 51, 51, 51, 51, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 2, 2, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, + 2, 2, 2, 2, 2, 2,135,135,135,135,135,135,135,135,135,135, + 135,135,135,135,135,135,135,135,135,135, 2, 2, 2, 2,135,135, + 135,135,135,135,135,135,135,135,135,135,135,135,135,135,135,135, + 135,135, 2, 2, 2, 2,106,106,106,106,106,106,106,106,106,106, + 106,106,106,106,106,106,106,106,106,106,106,106,106,106, 2, 2, + 2, 2, 2, 2, 2, 2,104,104,104,104,104,104,104,104,104,104, + 104,104,104,104,104,104,104,104,104,104, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2,104,110,110,110,110,110,110,110,110,110,110, + 110,110,110,110,110,110,110,110,110,110,110,110,110, 2, 2, 2, + 2, 2, 2, 2, 2, 2,110,110,110,110,110,110, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2,110,110,110,110,110,110,110,110, 2, 2, + 2, 2, 2, 2, 2, 2, 47, 47, 47, 47, 47, 47, 2, 2, 47, 2, + 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, + 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 2, 47, 47, 2, + 2, 2, 47, 2, 2, 47, 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, + 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, 2, 81, 81, 81, + 81, 81, 81, 81, 81, 81,120,120,120,120,120,120,120,120,120,120, + 120,120,120,120,120,120,116,116,116,116,116,116,116,116,116,116, + 116,116,116,116,116,116,116,116,116,116,116,116,116,116,116,116, + 116,116,116,116,116, 2, 2, 2, 2, 2, 2, 2, 2,116,116,116, + 116,116,116,116,116,116,128,128,128,128,128,128,128,128,128,128, + 128,128,128,128,128,128,128,128,128, 2,128,128, 2, 2, 2, 2, + 2,128,128,128,128,128, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, + 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, + 66, 66, 2, 2, 2, 66, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 2, 2, 2, 2, 2, 72, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, + 98, 98, 98, 98, 98, 98, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, + 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 2, 2, + 2, 2, 97, 97, 97, 97, 2, 2, 97, 97, 97, 97, 97, 97, 97, 97, + 97, 97, 97, 97, 97, 97, 57, 57, 57, 57, 2, 57, 57, 2, 2, 2, + 2, 2, 57, 57, 57, 57, 57, 57, 57, 57, 2, 57, 57, 57, 2, 57, + 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 2, 2, 57, 57, + 57, 2, 2, 2, 2, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 2, + 2, 2, 2, 2, 2, 2, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, + 88, 88, 88, 88, 88, 88,117,117,117,117,117,117,117,117,117,117, + 117,117,117,117,117,117,112,112,112,112,112,112,112,112,112,112, + 112,112,112,112,112,112,112,112,112,112,112,112,112, 2, 2, 2, + 2,112,112,112,112,112,112,112,112,112,112,112,112, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 2, 2, 2, 78, + 78, 78, 78, 78, 78, 78, 83, 83, 83, 83, 83, 83, 83, 83, 83, 83, + 83, 83, 83, 83, 83, 83, 83, 83, 83, 83, 83, 83, 2, 2, 83, 83, + 83, 83, 83, 83, 83, 83, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, + 82, 82, 82, 82, 82, 82, 82, 82, 82, 2, 2, 2, 2, 2, 82, 82, + 82, 82, 82, 82, 82, 82,122,122,122,122,122,122,122,122,122,122, + 122,122,122,122,122,122,122,122, 2, 2, 2, 2, 2, 2, 2,122, + 122,122,122, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,122, + 122,122,122,122,122,122, 89, 89, 89, 89, 89, 89, 89, 89, 89, 89, + 89, 89, 89, 89, 89, 89, 89, 89, 89, 89, 89, 89, 89, 89, 89, 2, + 2, 2, 2, 2, 2, 2,130,130,130,130,130,130,130,130,130,130, + 130,130,130,130,130,130,130,130,130, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2,130,130,130, 2, 2, 2, 2, 2, 2, 2, + 130,130,130,130,130,130,144,144,144,144,144,144,144,144,144,144, + 144,144,144,144,144,144,144,144,144,144,144,144,144,144, 2, 2, + 2, 2, 2, 2, 2, 2,144,144,144,144,144,144,144,144,144,144, + 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 2,156,156,156,156,156,156,156,156,156,156, + 156,156,156,156,156,156,156,156,156,156,156,156,156,156,156,156, + 2,156,156,156, 2, 2,156,156, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2,147,147,147,147,147,147,147,147,147,147, + 147,147,147,147,147,147,147,147,147,147,147,147,147,147, 2, 2, + 2, 2, 2, 2, 2, 2,148,148,148,148,148,148,148,148,148,148, + 148,148,148,148,148,148,148,148,148,148,148,148,148,148,148,148, + 2, 2, 2, 2, 2, 2,153,153,153,153,153,153,153,153,153,153, + 153,153,153,153,153,153,153,153,153,153,153,153,153,153,153,153, + 153,153, 2, 2, 2, 2,149,149,149,149,149,149,149,149,149,149, + 149,149,149,149,149,149,149,149,149,149,149,149,149, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, + 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, + 94, 94, 94, 94, 2, 2, 2, 2, 94, 94, 94, 94, 94, 94, 94, 94, + 94, 94, 94, 94, 94, 94, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 94, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, + 85, 85, 85, 85, 85, 85, 85, 85, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 85, 2, 2,101,101,101,101,101,101,101,101,101,101, + 101,101,101,101,101,101,101,101,101,101,101,101,101,101,101, 2, + 2, 2, 2, 2, 2, 2,101,101,101,101,101,101,101,101,101,101, + 2, 2, 2, 2, 2, 2, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 2, 96, 96, 96, 96, + 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 2, 2, + 2, 2, 2, 2, 2, 2,111,111,111,111,111,111,111,111,111,111, + 111,111,111,111,111,111,111,111,111,111,111,111,111, 2, 2, 2, + 2, 2, 2, 2, 2, 2,100,100,100,100,100,100,100,100,100,100, + 100,100,100,100,100,100, 2, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2,108,108,108,108,108,108,108,108,108,108, + 108,108,108,108,108,108,108,108, 2,108,108,108,108,108,108,108, + 108,108,108,108,108,108,108,108,108,108,108,108,108,108,108,108, + 108,108,108,108,108, 2,129,129,129,129,129,129,129, 2,129, 2, + 129,129,129,129, 2,129,129,129,129,129,129,129,129,129,129,129, + 129,129,129,129, 2,129,129,129,129,129,129,129,129,129,129,129, + 2, 2, 2, 2, 2, 2,109,109,109,109,109,109,109,109,109,109, + 109,109,109,109,109,109,109,109,109,109,109,109,109,109,109,109, + 109, 2, 2, 2, 2, 2,109,109,109,109,109,109,109,109,109,109, + 2, 2, 2, 2, 2, 2,107,107,107,107, 2,107,107,107,107,107, + 107,107,107, 2, 2,107,107, 2, 2,107,107,107,107,107,107,107, + 107,107,107,107,107,107,107,107,107,107,107,107,107,107,107, 2, + 107,107,107,107,107,107,107, 2,107,107, 2,107,107,107,107,107, + 2, 1,107,107,107,107,107,107,107,107,107, 2, 2,107,107, 2, + 2,107,107,107, 2, 2,107, 2, 2, 2, 2, 2, 2,107, 2, 2, + 2, 2, 2,107,107,107,107,107,107,107, 2, 2,107,107,107,107, + 107,107,107, 2, 2, 2,107,107,107,107,107, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2,137,137,137,137,137,137,137,137,137,137, + 137,137,137,137,137,137,137,137,137,137,137,137,137,137,137,137, + 137,137, 2,137,137,137,137,137, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2,124,124,124,124,124,124,124,124,124,124, + 124,124,124,124,124,124,124,124,124,124,124,124,124,124, 2, 2, + 2, 2, 2, 2, 2, 2,124,124,124,124,124,124,124,124,124,124, + 2, 2, 2, 2, 2, 2,123,123,123,123,123,123,123,123,123,123, + 123,123,123,123,123,123,123,123,123,123,123,123, 2, 2,123,123, + 123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, + 123,123,123,123, 2, 2,114,114,114,114,114,114,114,114,114,114, + 114,114,114,114,114,114,114,114,114,114,114, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2,114,114,114,114,114,114,114,114,114,114, + 2, 2, 2, 2, 2, 2, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 2, 2, 2,102,102,102,102,102,102,102,102,102,102, + 102,102,102,102,102,102,102,102,102,102,102,102,102,102,102, 2, + 2, 2, 2, 2, 2, 2,102,102,102,102,102,102,102,102,102,102, + 2, 2, 2, 2, 2, 2,126,126,126,126,126,126,126,126,126,126, + 126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126, + 126, 2, 2,126,126,126,126,126,126,126,126,126,126,126,126,126, + 126,126, 2, 2, 2, 2,142,142,142,142,142,142,142,142,142,142, + 142,142,142,142,142,142,142,142,142,142,142,142,142,142,142,142, + 142,142, 2, 2, 2, 2,125,125,125,125,125,125,125,125,125,125, + 125,125,125,125,125,125,125,125,125, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2,125,154,154,154,154,154,154,154, 2, 2,154, + 2, 2,154,154,154,154,154,154,154,154, 2,154,154, 2,154,154, + 154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154, + 154,154,154,154,154,154,154,154,154,154,154,154, 2,154,154, 2, + 2,154,154,154,154,154,154,154,154,154,154,154,154, 2, 2, 2, + 2, 2, 2, 2, 2, 2,154,154,154,154,154,154,154,154,154,154, + 2, 2, 2, 2, 2, 2,150,150,150,150,150,150,150,150, 2, 2, + 150,150,150,150,150,150,150,150,150,150,150,150,150,150,150,150, + 150,150,150,150,150,150,150,150,150,150,150, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2,141,141,141,141,141,141,141,141,141,141, + 141,141,141,141,141,141,141,141,141,141,141,141,141,141, 2, 2, + 2, 2, 2, 2, 2, 2,140,140,140,140,140,140,140,140,140,140, + 140,140,140,140,140,140,140,140,140, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2,121,121,121,121,121,121,121,121,121,121, + 121,121,121,121,121,121,121,121,121,121,121,121,121,121,121, 2, + 2, 2, 2, 2, 2, 2,133,133,133,133,133,133,133,133,133, 2, + 133,133,133,133,133,133,133,133,133,133,133,133,133,133,133,133, + 133,133,133,133,133,133,133,133,133,133,133,133,133, 2,133,133, + 133,133,133,133,133,133,133,133,133,133,133,133, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2,133,133,133,133,133,133,133,133,133,133, + 133,133,133, 2, 2, 2,134,134,134,134,134,134,134,134,134,134, + 134,134,134,134,134,134, 2, 2,134,134,134,134,134,134,134,134, + 134,134,134,134,134,134,134,134,134,134,134,134,134,134, 2,134, + 134,134,134,134,134,134,134,134,134,134,134,134,134, 2, 2, 2, + 2, 2, 2, 2, 2, 2,138,138,138,138,138,138,138, 2,138,138, + 2,138,138,138,138,138,138,138,138,138,138,138,138,138,138,138, + 138,138,138,138,138,138,138,138,138,138,138,138,138, 2, 2, 2, + 138, 2,138,138, 2,138,138,138,138,138,138,138,138,138, 2, 2, + 2, 2, 2, 2, 2, 2,138,138,138,138,138,138,138,138,138,138, + 2, 2, 2, 2, 2, 2,143,143,143,143,143,143, 2,143,143, 2, + 143,143,143,143,143,143,143,143,143,143,143,143,143,143,143,143, + 143,143,143,143,143,143,143,143,143,143,143,143,143,143,143,143, + 143,143,143,143,143, 2,143,143, 2,143,143,143,143,143,143, 2, + 2, 2, 2, 2, 2, 2,143,143,143,143,143,143,143,143,143,143, + 2, 2, 2, 2, 2, 2,145,145,145,145,145,145,145,145,145,145, + 145,145,145,145,145,145,145,145,145,145,145,145,145,145,145, 2, + 2, 2, 2, 2, 2, 2, 86, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 22, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, + 2, 2, 2, 2, 2, 2, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 2, 63, 63, 63, 63, 63, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 63, 63, 63, 63, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, + 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, + 80, 80, 80, 80, 80, 2, 80, 80, 80, 80, 80, 80, 80, 80, 80, 2, + 2, 2, 2, 2, 2, 2,127,127,127,127,127,127,127,127,127,127, + 127,127,127,127,127,127,127,127,127,127,127,127,127, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 79, 79, 79, 79, 79, 79, 79, 79, 79, 2, + 2, 2, 2, 2, 2, 2,115,115,115,115,115,115,115,115,115,115, + 115,115,115,115,115,115,115,115,115,115,115,115,115,115,115,115, + 115,115,115,115,115, 2,115,115,115,115,115,115,115,115,115,115, + 2, 2, 2, 2,115,115,103,103,103,103,103,103,103,103,103,103, + 103,103,103,103,103,103,103,103,103,103,103,103,103,103,103,103, + 103,103,103,103, 2, 2,103,103,103,103,103,103, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2,119,119,119,119,119,119,119,119,119,119, + 119,119,119,119,119,119,119,119,119,119,119,119, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2,119,119,119,119,119,119,119,119,119,119, + 2,119,119,119,119,119,119,119, 2,119,119,119,119,119,119,119, + 119,119,119,119,119,119,119,119,119,119,119,119,119,119, 2, 2, + 2, 2, 2,119,119,119,146,146,146,146,146,146,146,146,146,146, + 146,146,146,146,146,146,146,146,146,146,146,146,146,146,146,146, + 146, 2, 2, 2, 2, 2, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, + 99, 2, 2, 2, 2, 99, 99, 99, 99, 99, 99, 99, 99, 99, 2, 2, + 2, 2, 2, 2, 2, 99,136,139, 0, 0,155, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 13, 13, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2,136,136,136,136,136,136,136,136,136,136, + 136,136,136,136,136,136,136,136,136,136,136,136,136,136, 2, 2, + 2, 2, 2, 2, 2, 2,155,155,155,155,155,155,155,155,155,155, + 155,155,155,155,155,155,155,155,155,155,155,155, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2,136,136,136,136,136,136,136,136,136, 2, + 2, 2, 2, 2, 2, 2, 17, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 2, 15, 15, 15, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 17, 17, 17, 17, 2, 2, + 2, 2, 2, 2, 2, 2,139,139,139,139,139,139,139,139,139,139, + 139,139,139,139,139,139,139,139,139,139,139,139,139,139,139,139, + 139,139, 2, 2, 2, 2,105,105,105,105,105,105,105,105,105,105, + 105,105,105,105,105,105,105,105,105,105,105,105,105,105,105,105, + 105, 2, 2, 2, 2, 2,105,105,105,105,105,105,105,105,105,105, + 105,105,105, 2, 2, 2,105,105,105,105,105,105,105,105,105, 2, + 2, 2, 2, 2, 2, 2,105,105,105,105,105,105,105,105,105,105, + 2, 2,105,105,105,105, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, + 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, + 2, 2, 2, 2, 2, 2, 9, 9, 9, 9, 9, 9, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 2, 0, 0, 2, 2, 0, 2, 2, 0, 0, 2, 2, 0, + 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, + 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, + 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 2, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 2, 0, 2, 2, 2, + 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 2, 2, 0, 0,131,131,131,131,131,131,131,131,131,131, + 131,131,131,131,131,131,131,131,131,131,131,131,131,131,131,131, + 131,131, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2,131,131,131,131,131, 2,131,131,131,131,131,131,131,131,131, + 131,131,131,131,131,131, 56, 56, 56, 56, 56, 56, 56, 2, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 2, + 2, 56, 56, 56, 56, 56, 56, 56, 2, 56, 56, 2, 56, 56, 56, 56, + 56, 2, 2, 2, 2, 2,151,151,151,151,151,151,151,151,151,151, + 151,151,151,151,151,151,151,151,151,151,151,151,151,151,151,151, + 151,151,151, 2, 2, 2,151,151,151,151,151,151,151,151,151,151, + 151,151,151,151, 2, 2,151,151,151,151,151,151,151,151,151,151, + 2, 2, 2, 2,151,151,152,152,152,152,152,152,152,152,152,152, + 152,152,152,152,152,152,152,152,152,152,152,152,152,152,152,152, + 2, 2, 2, 2, 2,152,113,113,113,113,113,113,113,113,113,113, + 113,113,113,113,113,113,113,113,113,113,113, 2, 2,113,113,113, + 113,113,113,113,113,113,113,113,113,113,113,113,113, 2, 2, 2, + 2, 2, 2, 2, 2, 2,132,132,132,132,132,132,132,132,132,132, + 132,132,132,132,132,132,132,132,132,132,132,132,132,132,132,132, + 132,132, 2, 2, 2, 2,132,132,132,132,132,132,132,132,132,132, + 2, 2, 2, 2,132,132, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 2, 2, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 2, 3, 3, 2, 3, 2, 2, 3, 2, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 2, 3, + 2, 3, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 3, 2, 3, + 2, 3, 2, 3, 3, 3, 2, 3, 3, 2, 3, 2, 2, 3, 2, 3, + 2, 3, 2, 3, 2, 3, 2, 3, 3, 2, 3, 2, 2, 3, 3, 3, + 3, 2, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 2, 3, + 3, 3, 3, 2, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 2, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 3, 3, + 2, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 15, 0, 0, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, + 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 0, 0, + 0, 2, 2, 2, 2, 2, 13, 13, 13, 13, 13, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 13, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 0, 0, 0, 1, 2, 3, 4, 5, 6, 0, + 0, 0, 0, 7, 8, 9, 10, 11, 0, 12, 0, 0, 0, 0, 13, 0, + 0, 14, 0, 0, 0, 0, 0, 0, 0, 0, 15, 16, 0, 17, 18, 19, + 0, 0, 0, 20, 21, 22, 0, 23, 0, 24, 0, 25, 0, 26, 0, 0, + 0, 0, 0, 27, 28, 0, 29, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 30, 31, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 33, + 34, 35, 36, 37, 38, 39, 40, 0, 0, 0, 41, 0, 42, 43, 44, 45, + 46, 47, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 49, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 50, 51, 52, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 53, 54, 55, 56, 57, 58, + 59, 60, 61, 62, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 63, 0, 64, 0, 0, 0, 0, 0, + 0, 0, 0, 65, 0, 0, 0, 0, 66, 0, 0, 0, 67, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 68, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 69, 70, 71, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 72, 73, 74, 75, 76, 77, 78, 79, 80, 0, +}; +static const uint16_t +_hb_ucd_u16[11328] = +{ + 0, 0, 1, 2, 3, 4, 5, 6, 0, 0, 7, 8, 9, 10, 11, 12, + 13, 13, 13, 14, 15, 13, 13, 16, 17, 18, 19, 20, 21, 22, 13, 23, + 13, 13, 13, 24, 25, 11, 11, 11, 11, 26, 11, 27, 28, 29, 30, 31, + 32, 32, 32, 32, 32, 32, 32, 33, 34, 35, 36, 11, 37, 38, 13, 39, + 9, 9, 9, 11, 11, 11, 13, 13, 40, 13, 13, 13, 41, 13, 13, 13, + 13, 13, 13, 42, 9, 43, 11, 11, 44, 45, 32, 46, 47, 48, 49, 50, + 51, 52, 48, 48, 53, 32, 54, 55, 48, 48, 48, 48, 48, 56, 57, 58, + 59, 60, 48, 32, 61, 48, 48, 48, 48, 48, 62, 63, 64, 48, 65, 66, + 48, 67, 68, 69, 48, 70, 71, 72, 72, 72, 48, 73, 74, 75, 76, 32, + 77, 48, 48, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, + 91, 84, 85, 92, 93, 94, 95, 96, 97, 98, 85, 99, 100, 101, 89, 102, + 103, 84, 85, 104, 105, 106, 89, 107, 108, 109, 110, 111, 112, 113, 95, 114, + 115, 116, 85, 117, 118, 119, 89, 120, 121, 116, 85, 122, 123, 124, 89, 125, + 126, 116, 48, 127, 128, 129, 89, 130, 131, 132, 48, 133, 134, 135, 95, 136, + 137, 48, 48, 138, 139, 140, 72, 72, 141, 48, 142, 143, 144, 145, 72, 72, + 146, 147, 148, 149, 150, 48, 151, 152, 153, 154, 32, 155, 156, 157, 72, 72, + 48, 48, 158, 159, 160, 161, 162, 163, 164, 165, 9, 9, 166, 11, 11, 167, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 168, 169, 48, 48, 168, 48, 48, 170, 171, 172, 48, 48, + 48, 171, 48, 48, 48, 173, 174, 175, 48, 176, 9, 9, 9, 9, 9, 177, + 178, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 179, 48, 180, 181, 48, 48, 48, 48, 182, 183, + 184, 185, 48, 186, 48, 187, 184, 188, 48, 48, 48, 189, 190, 191, 192, 193, + 194, 192, 48, 48, 195, 48, 48, 196, 197, 48, 198, 48, 48, 48, 48, 199, + 48, 200, 201, 202, 203, 48, 204, 205, 48, 48, 206, 48, 207, 208, 209, 209, + 48, 210, 48, 48, 48, 211, 212, 213, 192, 192, 214, 215, 216, 72, 72, 72, + 217, 48, 48, 218, 219, 160, 220, 221, 222, 48, 223, 64, 48, 48, 224, 225, + 48, 48, 226, 227, 228, 64, 48, 229, 230, 9, 9, 231, 232, 233, 234, 235, + 11, 11, 236, 27, 27, 27, 237, 238, 11, 239, 27, 27, 32, 32, 32, 240, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 241, 13, 13, 13, 13, 13, 13, + 242, 243, 242, 242, 243, 244, 242, 245, 246, 246, 246, 247, 248, 249, 250, 251, + 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 262, 72, 263, 264, 216, + 265, 266, 267, 268, 269, 270, 271, 271, 272, 273, 274, 209, 275, 276, 209, 277, + 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, + 279, 209, 280, 209, 209, 209, 209, 281, 209, 282, 278, 283, 209, 284, 285, 209, + 209, 209, 286, 72, 287, 72, 270, 270, 270, 288, 209, 209, 209, 209, 289, 270, + 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 290, 291, 209, 209, 292, + 209, 209, 209, 209, 209, 209, 293, 209, 209, 209, 209, 209, 209, 209, 209, 209, + 209, 209, 209, 209, 209, 209, 294, 295, 270, 296, 209, 209, 297, 278, 298, 278, + 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, + 278, 278, 278, 278, 278, 278, 278, 278, 299, 300, 278, 278, 278, 301, 278, 302, + 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, + 209, 209, 209, 278, 303, 209, 209, 304, 209, 305, 209, 209, 209, 209, 209, 209, + 9, 9, 306, 11, 11, 307, 308, 309, 13, 13, 13, 13, 13, 13, 310, 311, + 11, 11, 312, 48, 48, 48, 313, 314, 48, 315, 316, 316, 316, 316, 32, 32, + 317, 318, 319, 320, 321, 322, 72, 72, 209, 323, 209, 209, 209, 209, 209, 324, + 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 325, 72, 326, + 327, 328, 329, 330, 137, 48, 48, 48, 48, 331, 178, 48, 48, 48, 48, 332, + 333, 48, 48, 137, 48, 48, 48, 48, 200, 334, 48, 48, 209, 209, 324, 48, + 209, 335, 336, 209, 337, 338, 209, 209, 336, 209, 209, 338, 209, 209, 209, 209, + 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 209, 209, 209, 209, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 151, + 48, 339, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 151, 209, 209, 209, 286, 48, 48, 229, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 340, 48, 341, 72, 13, 13, 342, 343, 13, 344, 48, 48, 48, 48, 345, 346, + 31, 347, 348, 349, 13, 13, 13, 350, 351, 352, 353, 354, 355, 72, 72, 356, + 357, 48, 358, 359, 48, 48, 48, 360, 361, 48, 48, 362, 363, 192, 32, 364, + 64, 48, 365, 48, 366, 367, 48, 151, 77, 48, 48, 368, 369, 370, 371, 372, + 48, 48, 373, 374, 375, 376, 48, 377, 48, 48, 48, 378, 379, 380, 381, 382, + 383, 384, 316, 11, 11, 385, 386, 11, 11, 11, 11, 11, 48, 48, 387, 192, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 388, 48, 389, 48, 48, 206, + 390, 390, 390, 390, 390, 390, 390, 390, 390, 390, 390, 390, 390, 390, 390, 390, + 390, 390, 390, 390, 390, 390, 390, 390, 390, 390, 390, 390, 390, 390, 390, 390, + 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, + 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, + 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 204, 48, 48, 48, 48, 48, 48, 207, 72, 72, + 392, 393, 394, 395, 396, 48, 48, 48, 48, 48, 48, 397, 398, 399, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 400, 72, 48, 48, 48, 48, 401, 48, 48, 74, 72, 72, 402, + 32, 403, 32, 404, 405, 406, 407, 73, 48, 48, 48, 48, 48, 48, 48, 408, + 409, 2, 3, 4, 5, 410, 411, 412, 48, 413, 48, 200, 414, 415, 416, 417, + 418, 48, 172, 419, 204, 204, 72, 72, 48, 48, 48, 48, 48, 48, 48, 71, + 420, 270, 270, 421, 271, 271, 271, 422, 423, 424, 425, 72, 72, 209, 209, 426, + 72, 72, 72, 72, 72, 72, 72, 72, 48, 151, 48, 48, 48, 101, 427, 428, + 48, 48, 429, 48, 430, 48, 48, 431, 48, 432, 48, 48, 433, 434, 72, 72, + 9, 9, 435, 11, 11, 48, 48, 48, 48, 204, 192, 9, 9, 436, 11, 437, + 48, 48, 74, 48, 48, 48, 438, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 315, 48, 199, 74, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 439, 48, 48, 440, 48, 441, 48, 442, 48, 200, 443, 72, 72, 72, 48, 444, + 48, 445, 48, 446, 72, 72, 72, 72, 48, 48, 48, 447, 270, 448, 270, 270, + 449, 450, 48, 451, 452, 453, 48, 454, 48, 455, 72, 72, 456, 48, 457, 458, + 48, 48, 48, 459, 48, 460, 48, 461, 48, 462, 463, 72, 72, 72, 72, 72, + 48, 48, 48, 48, 196, 72, 72, 72, 9, 9, 9, 464, 11, 11, 11, 465, + 48, 48, 466, 192, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 270, 467, 48, 48, 468, 469, 72, 72, 72, 72, + 48, 455, 470, 48, 62, 471, 72, 72, 72, 72, 72, 48, 472, 72, 48, 315, + 473, 48, 48, 474, 475, 448, 476, 477, 222, 48, 48, 478, 479, 48, 196, 192, + 480, 48, 481, 482, 483, 48, 48, 484, 222, 48, 48, 485, 486, 487, 488, 489, + 48, 98, 490, 491, 72, 72, 72, 72, 492, 493, 494, 48, 48, 495, 496, 192, + 497, 84, 85, 498, 499, 500, 501, 502, 72, 72, 72, 72, 72, 72, 72, 72, + 48, 48, 48, 503, 504, 505, 469, 72, 48, 48, 48, 506, 507, 192, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, 48, 48, 508, 509, 510, 511, 72, 72, + 48, 48, 48, 512, 513, 192, 514, 72, 48, 48, 515, 516, 192, 72, 72, 72, + 48, 173, 517, 518, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 48, 48, 490, 519, 72, 72, 72, 72, 72, 72, 9, 9, 11, 11, 148, 520, + 521, 522, 48, 523, 524, 192, 72, 72, 72, 72, 525, 48, 48, 526, 527, 72, + 528, 48, 48, 529, 530, 531, 48, 48, 532, 533, 534, 72, 48, 48, 48, 196, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 85, 48, 508, 535, 536, 148, 175, 537, 48, 538, 539, 540, 72, 72, 72, 72, + 541, 48, 48, 542, 543, 192, 544, 48, 545, 546, 192, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 48, 547, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 101, 270, 548, 549, 550, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 207, 72, 72, 72, 72, 72, 72, + 271, 271, 271, 271, 271, 271, 551, 552, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 388, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 48, 48, 200, 553, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 48, 48, 48, 48, 315, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 48, 48, 48, 196, 48, 200, 370, 72, 72, 72, 72, 72, 72, 48, 204, 554, + 48, 48, 48, 555, 556, 557, 558, 559, 48, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 9, 9, 11, 11, 270, 560, 72, 72, 72, 72, 72, 72, + 48, 48, 48, 48, 561, 562, 563, 563, 564, 565, 72, 72, 72, 72, 566, 567, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 74, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 199, 72, 72, + 196, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 200, 72, 72, 72, 568, 569, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 206, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 48, 48, 48, 48, 48, 48, 71, 151, 196, 570, 571, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 325, + 209, 209, 572, 209, 209, 209, 573, 574, 575, 209, 576, 209, 209, 209, 577, 72, + 209, 209, 209, 209, 578, 72, 72, 72, 72, 72, 72, 72, 72, 72, 270, 579, + 209, 209, 209, 209, 209, 286, 270, 452, 72, 72, 72, 72, 72, 72, 72, 72, + 9, 580, 11, 581, 582, 583, 242, 9, 584, 585, 586, 587, 588, 9, 580, 11, + 589, 590, 11, 591, 592, 593, 594, 9, 595, 11, 9, 580, 11, 581, 582, 11, + 242, 9, 584, 594, 9, 595, 11, 9, 580, 11, 596, 9, 597, 598, 599, 600, + 11, 601, 9, 602, 603, 604, 605, 11, 606, 9, 607, 11, 608, 609, 609, 609, + 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, + 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, + 32, 32, 32, 610, 32, 32, 611, 612, 613, 614, 45, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 615, 616, 617, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 48, 48, 151, 618, 619, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 48, 48, 620, 621, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 622, 623, 72, 72, + 9, 9, 584, 11, 624, 370, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 488, 270, 270, 625, 626, 72, 72, 72, 72, + 488, 270, 627, 628, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 629, 48, 630, 631, 632, 633, 634, 635, 636, 206, 637, 206, 72, 72, 72, 638, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 209, 209, 326, 209, 209, 209, 209, 209, 209, 324, 335, 639, 639, 639, 209, 325, + 640, 209, 209, 209, 209, 209, 209, 209, 209, 209, 641, 72, 72, 72, 642, 209, + 643, 209, 209, 326, 577, 644, 325, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 645, + 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 646, 424, 424, + 209, 209, 209, 209, 209, 209, 209, 324, 209, 209, 209, 209, 209, 577, 326, 72, + 326, 209, 209, 209, 646, 176, 209, 209, 646, 209, 641, 644, 72, 72, 72, 72, + 209, 209, 209, 209, 209, 209, 209, 647, 209, 209, 209, 209, 648, 209, 209, 209, + 209, 209, 209, 209, 209, 324, 641, 649, 286, 209, 577, 286, 643, 286, 72, 72, + 209, 209, 209, 209, 209, 209, 209, 209, 209, 650, 209, 209, 287, 72, 72, 192, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 204, 72, 72, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 205, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 204, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 469, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 101, 72, + 48, 204, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 71, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 651, 72, 652, 652, 652, 652, 652, 652, 72, 72, 72, 72, 72, 72, 72, 72, + 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 72, + 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, + 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 653, + 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, + 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 391, 654, + 0, 0, 0, 0, 1, 2, 1, 2, 0, 0, 3, 3, 4, 5, 4, 5, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 6, 0, 0, 7, 0, + 8, 8, 8, 8, 8, 8, 8, 9, 10, 11, 12, 11, 11, 11, 13, 11, + 14, 14, 14, 14, 14, 14, 14, 14, 15, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 16, 17, 18, 17, 17, 19, 20, 21, 21, 22, 21, 23, 24, + 25, 26, 27, 27, 28, 29, 27, 30, 27, 27, 27, 27, 27, 31, 27, 27, + 32, 33, 33, 33, 34, 27, 27, 27, 35, 35, 35, 36, 37, 37, 37, 38, + 39, 39, 40, 41, 42, 43, 44, 45, 45, 45, 27, 46, 47, 48, 49, 27, + 50, 50, 50, 50, 50, 51, 52, 50, 53, 54, 55, 56, 57, 58, 59, 60, + 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, + 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, + 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, + 109, 110, 111, 111, 112, 113, 114, 111, 115, 116, 117, 118, 119, 120, 121, 122, + 123, 124, 124, 125, 124, 126, 45, 45, 127, 128, 129, 130, 131, 132, 45, 45, + 133, 133, 133, 133, 134, 133, 135, 136, 133, 134, 133, 137, 137, 138, 45, 45, + 139, 139, 139, 139, 139, 139, 139, 139, 139, 139, 140, 140, 141, 140, 140, 142, + 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, + 144, 144, 144, 144, 145, 146, 144, 144, 145, 144, 144, 147, 148, 149, 144, 144, + 144, 148, 144, 144, 144, 150, 144, 151, 144, 152, 153, 153, 153, 153, 153, 154, + 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, + 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, 155, + 155, 155, 155, 155, 155, 155, 155, 155, 156, 157, 158, 158, 158, 158, 159, 160, + 161, 162, 163, 164, 165, 166, 167, 168, 169, 169, 169, 169, 169, 170, 171, 171, + 172, 173, 174, 174, 174, 174, 174, 175, 174, 174, 176, 155, 155, 155, 155, 177, + 178, 179, 180, 180, 181, 182, 183, 184, 185, 185, 186, 185, 187, 188, 169, 169, + 189, 190, 191, 191, 191, 192, 191, 193, 194, 194, 195, 8, 196, 45, 45, 45, + 197, 197, 197, 197, 198, 197, 197, 199, 200, 200, 200, 200, 201, 201, 201, 202, + 203, 203, 203, 204, 205, 206, 206, 206, 207, 140, 140, 208, 209, 210, 211, 212, + 4, 4, 213, 4, 4, 214, 215, 216, 4, 4, 4, 217, 8, 8, 8, 218, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 11, 219, 11, 11, 219, 220, 11, 221, 11, 11, 11, 222, 222, 223, 11, 224, + 225, 0, 0, 0, 0, 0, 226, 227, 228, 229, 0, 0, 45, 8, 8, 196, + 0, 0, 230, 231, 232, 0, 4, 4, 233, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 234, 45, 235, 45, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, 236, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 237, 0, 238, 0, 0, 0, 0, 0, 0, + 239, 239, 240, 239, 239, 240, 4, 4, 241, 241, 241, 241, 241, 241, 241, 242, + 140, 140, 141, 243, 243, 243, 244, 245, 144, 246, 247, 247, 247, 247, 14, 14, + 0, 0, 0, 0, 0, 248, 45, 45, 249, 250, 249, 249, 249, 249, 249, 251, + 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 252, 45, 253, + 254, 0, 255, 256, 257, 258, 258, 258, 258, 259, 260, 261, 261, 261, 261, 262, + 263, 264, 264, 265, 143, 143, 143, 143, 266, 0, 264, 264, 0, 0, 267, 261, + 143, 266, 0, 0, 0, 0, 143, 268, 0, 0, 0, 0, 0, 261, 261, 269, + 261, 261, 261, 261, 261, 270, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, + 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, + 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, + 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 0, 0, 0, 0, + 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, + 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 271, + 272, 272, 272, 272, 272, 272, 272, 272, 272, 272, 272, 272, 272, 272, 272, 272, + 272, 272, 272, 272, 272, 272, 272, 272, 272, 272, 272, 272, 272, 272, 272, 272, + 272, 272, 272, 272, 272, 272, 272, 272, 273, 272, 272, 272, 274, 275, 275, 275, + 276, 276, 276, 276, 276, 276, 276, 276, 276, 276, 276, 276, 276, 276, 276, 276, + 276, 276, 277, 45, 14, 14, 14, 14, 14, 14, 278, 278, 278, 278, 278, 279, + 0, 0, 280, 4, 4, 4, 4, 4, 281, 4, 4, 4, 282, 45, 45, 283, + 284, 284, 285, 286, 287, 287, 287, 288, 289, 289, 289, 289, 290, 291, 50, 50, + 292, 292, 293, 294, 294, 295, 143, 296, 297, 297, 297, 297, 298, 299, 139, 300, + 301, 301, 301, 302, 303, 304, 139, 139, 305, 305, 305, 305, 306, 307, 308, 309, + 310, 311, 247, 4, 4, 312, 313, 153, 153, 153, 153, 153, 308, 308, 314, 315, + 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, + 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, + 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, + 143, 143, 143, 143, 143, 143, 143, 143, 143, 143, 316, 143, 317, 143, 143, 318, + 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, + 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, + 249, 249, 249, 249, 249, 249, 319, 249, 249, 249, 249, 249, 249, 320, 45, 45, + 321, 322, 21, 323, 324, 27, 27, 27, 27, 27, 27, 27, 325, 48, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 326, 45, 27, 27, 27, 27, 327, 27, 27, 47, 45, 45, 328, + 8, 286, 329, 0, 0, 330, 331, 46, 27, 27, 27, 27, 27, 27, 27, 332, + 333, 0, 1, 2, 1, 2, 334, 260, 261, 335, 143, 266, 336, 337, 338, 339, + 340, 341, 342, 343, 344, 344, 45, 45, 341, 341, 341, 341, 341, 341, 341, 345, + 346, 0, 0, 347, 11, 11, 11, 11, 348, 349, 350, 45, 45, 0, 0, 351, + 45, 45, 45, 45, 45, 45, 45, 45, 352, 353, 354, 354, 354, 355, 356, 253, + 357, 357, 358, 359, 360, 361, 361, 362, 363, 364, 365, 365, 366, 367, 45, 45, + 368, 368, 368, 368, 368, 369, 369, 369, 370, 371, 372, 373, 373, 374, 373, 375, + 376, 376, 377, 378, 378, 378, 379, 45, 45, 45, 45, 45, 45, 45, 45, 45, + 380, 380, 380, 380, 380, 380, 380, 380, 380, 380, 380, 380, 380, 380, 380, 380, + 380, 380, 380, 381, 380, 382, 383, 45, 45, 45, 45, 45, 45, 45, 45, 45, + 384, 385, 385, 386, 387, 388, 389, 389, 390, 391, 392, 45, 45, 45, 393, 394, + 395, 396, 397, 398, 45, 45, 45, 45, 399, 399, 400, 401, 400, 402, 400, 400, + 403, 404, 405, 406, 407, 407, 408, 408, 409, 409, 45, 45, 410, 410, 411, 412, + 413, 413, 413, 414, 415, 416, 417, 418, 419, 420, 421, 45, 45, 45, 45, 45, + 422, 422, 422, 422, 423, 45, 45, 45, 424, 424, 424, 425, 424, 424, 424, 426, + 427, 427, 428, 429, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 27, 430, 431, 431, 432, 433, 45, 45, 45, 45, + 434, 434, 435, 436, 436, 437, 45, 45, 45, 45, 45, 438, 439, 45, 440, 441, + 442, 442, 442, 442, 443, 444, 442, 445, 446, 446, 446, 446, 447, 448, 449, 450, + 451, 451, 451, 452, 453, 454, 454, 455, 456, 456, 456, 456, 456, 456, 457, 458, + 459, 460, 459, 461, 45, 45, 45, 45, 462, 463, 464, 465, 465, 465, 466, 467, + 468, 469, 470, 471, 472, 473, 474, 475, 45, 45, 45, 45, 45, 45, 45, 45, + 476, 476, 476, 476, 476, 477, 478, 45, 479, 479, 479, 479, 480, 481, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, 482, 482, 482, 483, 482, 484, 45, 45, + 485, 485, 485, 485, 486, 487, 488, 45, 489, 489, 489, 490, 491, 45, 45, 45, + 492, 493, 494, 492, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, + 495, 495, 495, 496, 45, 45, 45, 45, 45, 45, 497, 497, 497, 497, 497, 498, + 499, 500, 501, 502, 503, 504, 45, 45, 45, 45, 505, 506, 506, 505, 507, 45, + 508, 508, 508, 508, 509, 510, 510, 510, 510, 510, 511, 45, 512, 512, 512, 513, + 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, + 514, 515, 515, 516, 517, 515, 518, 519, 519, 520, 521, 522, 45, 45, 45, 45, + 523, 524, 524, 525, 526, 527, 528, 529, 530, 531, 532, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 533, 534, + 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 535, 536, 536, 536, 537, + 538, 538, 538, 538, 538, 538, 538, 538, 538, 538, 538, 538, 538, 538, 538, 538, + 538, 538, 538, 538, 538, 538, 538, 538, 538, 538, 538, 538, 538, 538, 538, 538, + 538, 538, 538, 538, 538, 538, 538, 538, 538, 538, 538, 538, 538, 538, 538, 538, + 538, 538, 538, 538, 538, 538, 538, 538, 538, 539, 45, 45, 45, 45, 45, 45, + 538, 538, 538, 538, 538, 538, 540, 541, 538, 538, 538, 538, 538, 538, 538, 538, + 538, 538, 538, 538, 542, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, + 543, 543, 543, 543, 543, 543, 543, 543, 543, 543, 543, 543, 543, 543, 543, 543, + 543, 543, 543, 543, 543, 543, 543, 543, 543, 543, 543, 543, 543, 543, 543, 543, + 543, 543, 544, 545, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, + 546, 546, 546, 546, 546, 546, 546, 546, 546, 546, 546, 546, 546, 546, 546, 546, + 546, 546, 546, 546, 546, 546, 546, 546, 546, 546, 546, 546, 546, 546, 546, 546, + 546, 546, 546, 546, 547, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, + 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, + 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, + 278, 278, 278, 548, 549, 550, 551, 45, 45, 45, 45, 45, 45, 552, 553, 554, + 555, 555, 555, 555, 556, 557, 558, 559, 555, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 560, 560, 560, 560, 560, 561, 45, 45, 45, 45, 45, 45, + 562, 562, 562, 562, 563, 562, 562, 562, 564, 562, 45, 45, 45, 45, 565, 566, + 567, 567, 567, 567, 567, 567, 567, 567, 567, 567, 567, 567, 567, 567, 567, 567, + 567, 567, 567, 567, 567, 567, 567, 567, 567, 567, 567, 567, 567, 567, 567, 567, + 567, 567, 567, 567, 567, 567, 567, 567, 567, 567, 567, 567, 567, 567, 567, 567, + 567, 567, 567, 567, 567, 567, 567, 567, 567, 567, 567, 567, 567, 567, 567, 568, + 567, 567, 567, 567, 567, 567, 567, 567, 567, 567, 567, 567, 567, 567, 567, 567, + 569, 569, 569, 569, 569, 569, 569, 569, 569, 569, 569, 569, 569, 569, 569, 569, + 569, 569, 569, 569, 569, 569, 569, 569, 569, 569, 569, 569, 569, 570, 45, 45, + 571, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, + 572, 258, 258, 258, 258, 258, 258, 258, 258, 258, 258, 258, 258, 258, 258, 258, + 258, 573, 45, 45, 45, 574, 575, 576, 576, 576, 576, 576, 576, 576, 576, 576, + 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 577, + 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, + 578, 578, 578, 578, 578, 578, 579, 580, 581, 582, 267, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 583, + 0, 0, 584, 0, 0, 0, 585, 586, 587, 0, 588, 0, 0, 0, 589, 45, + 11, 11, 11, 11, 590, 45, 45, 45, 45, 45, 45, 45, 45, 45, 0, 267, + 0, 0, 0, 0, 0, 234, 0, 589, 45, 45, 45, 45, 45, 45, 45, 45, + 0, 0, 0, 0, 0, 226, 0, 0, 0, 591, 592, 593, 594, 0, 0, 0, + 595, 596, 0, 597, 598, 599, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 600, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 601, 0, 0, 0, + 602, 602, 602, 602, 602, 602, 602, 602, 602, 602, 602, 602, 602, 602, 602, 602, + 602, 602, 602, 602, 602, 602, 602, 602, 602, 602, 602, 602, 602, 602, 602, 602, + 602, 602, 602, 602, 602, 602, 602, 602, 603, 604, 605, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, + 606, 607, 608, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, + 609, 609, 610, 611, 612, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 613, 613, 613, 614, + 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, + 615, 615, 615, 615, 615, 615, 615, 615, 615, 615, 615, 615, 616, 617, 45, 45, + 618, 618, 618, 618, 619, 620, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 333, 0, 0, 0, 621, 45, 45, 45, 45, + 333, 0, 0, 622, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, + 623, 27, 624, 625, 626, 627, 628, 629, 630, 631, 632, 631, 45, 45, 45, 325, + 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, + 0, 0, 253, 0, 0, 0, 0, 0, 0, 267, 228, 333, 333, 333, 0, 583, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 622, 45, 45, 45, 633, 0, + 634, 0, 0, 253, 589, 635, 583, 45, 45, 45, 45, 45, 45, 45, 45, 45, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 636, 349, 349, + 0, 0, 0, 0, 0, 0, 0, 267, 0, 0, 0, 0, 0, 589, 253, 45, + 253, 0, 0, 0, 636, 286, 0, 0, 636, 0, 622, 635, 45, 45, 45, 45, + 0, 0, 0, 0, 0, 0, 0, 637, 0, 0, 0, 0, 638, 0, 0, 0, + 0, 0, 0, 0, 0, 267, 622, 639, 234, 0, 589, 234, 248, 234, 45, 45, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 330, 0, 0, 235, 45, 45, 286, + 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 319, 45, 45, + 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, + 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, + 249, 249, 249, 640, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, + 249, 319, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, + 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, + 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 566, 249, 249, 249, 249, 249, + 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, + 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, + 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 641, 45, + 249, 319, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, + 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, + 249, 249, 249, 249, 642, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, + 643, 45, 0, 0, 0, 0, 0, 0, 45, 45, 45, 45, 45, 45, 45, 45, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 939, 940, 941, 942, 946, 948, 0, 962, 969, 970, 971, 976,1001,1002,1003,1008, + 0,1033,1040,1041,1042,1043,1047, 0, 0,1080,1081,1082,1086,1110, 0, 0, + 1124,1125,1126,1127,1131,1133, 0,1147,1154,1155,1156,1161,1187,1188,1189,1193, + 0,1219,1226,1227,1228,1229,1233, 0, 0,1267,1268,1269,1273,1298, 0,1303, + 943,1128, 944,1129, 954,1139, 958,1143, 959,1144, 960,1145, 961,1146, 964,1149, + 0, 0, 973,1158, 974,1159, 975,1160, 983,1168, 978,1163, 988,1173, 990,1175, + 991,1176, 993,1178, 994,1179, 0, 0,1004,1190,1005,1191,1006,1192,1014,1199, + 1007, 0, 0, 0,1016,1201,1020,1206, 0,1022,1208,1025,1211,1023,1209, 0, + 0, 0, 0,1032,1218,1037,1223,1035,1221, 0, 0, 0,1044,1230,1045,1231, + 1049,1235, 0, 0,1058,1244,1064,1250,1060,1246,1066,1252,1067,1253,1072,1258, + 1069,1255,1077,1264,1074,1261, 0, 0,1083,1270,1084,1271,1085,1272,1088,1275, + 1089,1276,1096,1283,1103,1290,1111,1299,1115,1118,1307,1120,1309,1121,1310, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1053,1239, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1093, + 1280, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 949,1134,1010, + 1195,1050,1236,1090,1277,1341,1368,1340,1367,1342,1369,1339,1366, 0,1320,1347, + 1418,1419,1323,1350, 0, 0, 992,1177,1018,1204,1055,1241,1416,1417,1415,1424, + 1202, 0, 0, 0, 987,1172, 0, 0,1031,1217,1321,1348,1322,1349,1338,1365, + 950,1135, 951,1136, 979,1164, 980,1165,1011,1196,1012,1197,1051,1237,1052,1238, + 1061,1247,1062,1248,1091,1278,1092,1279,1071,1257,1076,1263, 0, 0, 997,1182, + 0, 0, 0, 0, 0, 0, 945,1130, 982,1167,1337,1364,1335,1362,1046,1232, + 1422,1423,1113,1301, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 8, 9, 0, 10,1425, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, + 0, 0, 0, 0, 0,1314,1427, 5,1434,1438,1443, 0,1450, 0,1455,1461, + 1514, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1446,1458,1468,1476,1480,1486, + 1517, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1489,1503,1494,1500,1508, 0, + 0, 0, 0,1520,1521, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1526,1528, 0,1525, 0, 0, 0,1522, 0, 0, 0, 0,1536,1532,1539, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0,1534, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0,1556, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1548,1550, 0,1547, 0, 0, 0,1567, 0, 0, 0, 0,1558,1554,1561, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0,1568,1569, 0, 0, 0, 0, 0, 0, 0, 0, + 0,1529,1551, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1523,1545,1524,1546, 0, 0,1527,1549, 0, 0,1570,1571,1530,1552,1531,1553, + 0, 0,1533,1555,1535,1557,1537,1559, 0, 0,1572,1573,1544,1566,1538,1560, + 1540,1562,1541,1563,1542,1564, 0, 0,1543,1565, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0,1606,1607,1609,1608,1610, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1613, 0,1611, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0,1612, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0,1620, 0, 0, 0, 0, 0, 0, + 0,1623, 0, 0,1624, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0,1614,1615,1616,1617,1618,1619,1621,1622, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1628,1629, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1625,1626, 0,1627, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0,1634, 0, 0,1635, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0,1630,1631,1632, 0, 0,1633, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0,1639, 0, 0,1638,1640, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1636,1637, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0,1641, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1642,1644,1643, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0,1645, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1646, 0, 0, 0, 0, 0, 0,1648,1649, 0,1647,1650, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1651,1653,1652, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1654, 0,1655,1657,1656, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0,1659, 0, 0, 0, 0, 0, 0, 0, 0, 0,1660, 0, 0, + 0, 0,1661, 0, 0, 0, 0,1662, 0, 0, 0, 0,1663, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0,1658, 0, 0, 0, 0, 0, 0, + 0, 0, 0,1664, 0,1665,1673, 0,1674, 0, 0, 0, 0, 0, 0, 0, + 0,1666, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0,1668, 0, 0, 0, 0, 0, 0, 0, 0, 0,1669, 0, 0, + 0, 0,1670, 0, 0, 0, 0,1671, 0, 0, 0, 0,1672, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0,1667, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0,1675, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0,1676, 0,1677, 0,1678, 0,1679, 0,1680, 0, + 0, 0,1681, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1682, 0,1683, 0, 0, + 1684,1685, 0,1686, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 953,1138, 955,1140, 956,1141, 957,1142,1324,1351, 963,1148, 965,1150, 968,1153, + 966,1151, 967,1152,1378,1380,1379,1381, 984,1169, 985,1170,1420,1421, 986,1171, + 989,1174, 995,1180, 998,1183, 996,1181, 999,1184,1000,1185,1015,1200,1329,1356, + 1017,1203,1019,1205,1021,1207,1024,1210,1687,1688,1027,1213,1026,1212,1028,1214, + 1029,1215,1030,1216,1034,1220,1036,1222,1039,1225,1038,1224,1334,1361,1336,1363, + 1382,1384,1383,1385,1056,1242,1057,1243,1059,1245,1063,1249,1689,1690,1065,1251, + 1068,1254,1070,1256,1386,1387,1388,1389,1691,1692,1073,1259,1075,1262,1079,1266, + 1078,1265,1095,1282,1098,1285,1097,1284,1390,1391,1392,1393,1099,1286,1100,1287, + 1101,1288,1102,1289,1105,1292,1104,1291,1106,1294,1107,1295,1108,1296,1114,1302, + 1119,1308,1122,1311,1123,1312,1186,1260,1293,1305, 0,1394, 0, 0, 0, 0, + 952,1137, 947,1132,1317,1344,1316,1343,1319,1346,1318,1345,1693,1695,1371,1375, + 1370,1374,1373,1377,1372,1376,1694,1696, 981,1166, 977,1162, 972,1157,1326,1353, + 1325,1352,1328,1355,1327,1354,1697,1698,1009,1194,1013,1198,1054,1240,1048,1234, + 1331,1358,1330,1357,1333,1360,1332,1359,1699,1700,1396,1401,1395,1400,1398,1403, + 1397,1402,1399,1404,1094,1281,1087,1274,1406,1411,1405,1410,1408,1413,1407,1412, + 1409,1414,1109,1297,1117,1306,1116,1304,1112,1300, 0, 0, 0, 0, 0, 0, + 1471,1472,1701,1705,1702,1706,1703,1707,1430,1431,1715,1719,1716,1720,1717,1721, + 1477,1478,1729,1731,1730,1732, 0, 0,1435,1436,1733,1735,1734,1736, 0, 0, + 1481,1482,1737,1741,1738,1742,1739,1743,1439,1440,1751,1755,1752,1756,1753,1757, + 1490,1491,1765,1768,1766,1769,1767,1770,1447,1448,1771,1774,1772,1775,1773,1776, + 1495,1496,1777,1779,1778,1780, 0, 0,1451,1452,1781,1783,1782,1784, 0, 0, + 1504,1505,1785,1788,1786,1789,1787,1790, 0,1459, 0,1791, 0,1792, 0,1793, + 1509,1510,1794,1798,1795,1799,1796,1800,1462,1463,1808,1812,1809,1813,1810,1814, + 1467, 21,1475, 22,1479, 23,1485, 24,1493, 27,1499, 28,1507, 29, 0, 0, + 1704,1708,1709,1710,1711,1712,1713,1714,1718,1722,1723,1724,1725,1726,1727,1728, + 1740,1744,1745,1746,1747,1748,1749,1750,1754,1758,1759,1760,1761,1762,1763,1764, + 1797,1801,1802,1803,1804,1805,1806,1807,1811,1815,1816,1817,1818,1819,1820,1821, + 1470,1469,1822,1474,1465, 0,1473,1825,1429,1428,1426, 12,1432, 0, 26, 0, + 0,1315,1823,1484,1466, 0,1483,1829,1433, 13,1437, 14,1441,1826,1827,1828, + 1488,1487,1513, 19, 0, 0,1492,1515,1445,1444,1442, 15, 0,1831,1832,1833, + 1502,1501,1516, 25,1497,1498,1506,1518,1457,1456,1454, 17,1453,1313, 11, 3, + 0, 0,1824,1512,1519, 0,1511,1830,1449, 16,1460, 18,1464, 4, 0, 0, + 30, 31, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 20, 0, 0, 0, 2, 6, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1834,1835, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1836, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1837,1839,1838, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0,1840, 0, 0, 0, 0,1841, 0, 0,1842, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0,1843, 0,1844, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0,1845, 0, 0,1846, 0, 0,1847, 0,1848, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 937, 0,1850, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1849, 936, 938, + 1851,1852, 0, 0,1853,1854, 0, 0,1855,1856, 0, 0, 0, 0, 0, 0, + 1857,1858, 0, 0,1861,1862, 0, 0,1863,1864, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1867,1868,1869,1870, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1859,1860,1865,1866, 0, 0, 0, 0, 0, 0,1871,1872,1873,1874, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 33, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1875, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1877, 0,1878, 0, + 1879, 0,1880, 0,1881, 0,1882, 0,1883, 0,1884, 0,1885, 0,1886, 0, + 1887, 0,1888, 0, 0,1889, 0,1890, 0,1891, 0, 0, 0, 0, 0, 0, + 1892,1893, 0,1894,1895, 0,1896,1897, 0,1898,1899, 0,1900,1901, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0,1876, 0, 0, 0, 0, 0, 0, 0, 0, 0,1902, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1904, 0,1905, 0, + 1906, 0,1907, 0,1908, 0,1909, 0,1910, 0,1911, 0,1912, 0,1913, 0, + 1914, 0,1915, 0, 0,1916, 0,1917, 0,1918, 0, 0, 0, 0, 0, 0, + 1919,1920, 0,1921,1922, 0,1923,1924, 0,1925,1926, 0,1927,1928, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0,1903, 0, 0,1929,1930,1931,1932, 0, 0, 0,1933, 0, + 710, 385, 724, 715, 455, 103, 186, 825, 825, 242, 751, 205, 241, 336, 524, 601, + 663, 676, 688, 738, 411, 434, 474, 500, 649, 746, 799, 108, 180, 416, 482, 662, + 810, 275, 462, 658, 692, 344, 618, 679, 293, 388, 440, 492, 740, 116, 146, 168, + 368, 414, 481, 527, 606, 660, 665, 722, 781, 803, 809, 538, 553, 588, 642, 758, + 811, 701, 233, 299, 573, 612, 487, 540, 714, 779, 232, 267, 412, 445, 457, 585, + 594, 766, 167, 613, 149, 148, 560, 589, 648, 768, 708, 345, 411, 704, 105, 259, + 313, 496, 518, 174, 542, 120, 307, 101, 430, 372, 584, 183, 228, 529, 650, 697, + 424, 732, 428, 349, 632, 355, 517, 110, 135, 147, 403, 580, 624, 700, 750, 170, + 193, 245, 297, 374, 463, 543, 763, 801, 812, 815, 162, 384, 420, 730, 287, 330, + 337, 366, 459, 476, 509, 558, 591, 610, 726, 652, 734, 759, 154, 163, 198, 473, + 683, 697, 292, 311, 353, 423, 572, 494, 113, 217, 259, 280, 314, 499, 506, 603, + 608, 752, 778, 782, 788, 117, 557, 748, 774, 320, 109, 126, 260, 265, 373, 411, + 479, 523, 655, 737, 823, 380, 765, 161, 395, 398, 438, 451, 502, 516, 537, 583, + 791, 136, 340, 769, 122, 273, 446, 727, 305, 322, 400, 496, 771, 155, 190, 269, + 377, 391, 406, 432, 501, 519, 599, 684, 687, 749, 776, 175, 452, 191, 480, 510, + 659, 772, 805, 813, 397, 444, 619, 566, 568, 575, 491, 471, 707, 111, 636, 156, + 153, 288, 346, 578, 256, 435, 383, 729, 680, 767, 694, 295, 128, 210, 0, 0, + 227, 0, 379, 0, 0, 150, 493, 525, 544, 551, 552, 556, 783, 576, 604, 0, + 661, 0, 703, 0, 0, 735, 743, 0, 0, 0, 793, 794, 795, 808, 741, 773, + 118, 127, 130, 166, 169, 177, 207, 213, 215, 226, 229, 268, 270, 317, 327, 329, + 335, 369, 375, 381, 404, 441, 448, 458, 477, 484, 503, 539, 545, 547, 546, 548, + 549, 550, 554, 555, 561, 564, 569, 591, 593, 595, 598, 607, 620, 625, 625, 651, + 690, 695, 705, 706, 716, 717, 733, 735, 777, 786, 790, 315, 869, 623, 0, 0, + 102, 145, 134, 115, 129, 138, 165, 171, 207, 202, 206, 212, 227, 231, 240, 243, + 250, 254, 294, 296, 303, 308, 319, 325, 321, 329, 326, 335, 341, 357, 360, 362, + 370, 379, 388, 389, 393, 421, 424, 438, 456, 454, 458, 465, 477, 535, 485, 490, + 493, 507, 512, 514, 521, 522, 525, 526, 528, 533, 532, 541, 565, 569, 574, 586, + 591, 597, 607, 637, 647, 674, 691, 693, 695, 698, 703, 699, 705, 704, 702, 706, + 709, 717, 728, 736, 747, 754, 770, 777, 783, 784, 786, 787, 790, 802, 825, 848, + 847, 857, 55, 65, 66, 883, 892, 916, 822, 824, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1586, 0,1605, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1602,1603,1934,1935,1574,1575, + 1576,1577,1579,1580,1581,1583,1584, 0,1585,1587,1588,1589,1591, 0,1592, 0, + 1593,1594, 0,1595,1596, 0,1598,1599,1600,1601,1604,1582,1578,1590,1597, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1936, 0,1937, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1938, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1939,1940, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1941,1942, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1944,1943, 0,1945, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1946,1947, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0,1948, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1949,1950, + 1951,1952,1953,1954,1955, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1956,1957,1958,1960,1959, + 1961, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 106, 104, 107, 826, 114, 118, 119, 121, 123, 124, 127, 125, 34, 830, 130, 131, + 132, 137, 827, 35, 133, 139, 829, 142, 143, 112, 144, 145, 924, 151, 152, 37, + 157, 158, 159, 160, 38, 165, 166, 169, 171, 172, 173, 174, 176, 177, 178, 179, + 181, 182, 182, 182, 833, 468, 184, 185, 834, 187, 188, 189, 196, 192, 194, 195, + 197, 199, 200, 201, 203, 204, 204, 206, 208, 209, 211, 218, 213, 219, 214, 216, + 153, 234, 221, 222, 223, 220, 225, 224, 230, 835, 235, 236, 237, 238, 239, 244, + 836, 837, 247, 248, 249, 246, 251, 39, 40, 253, 255, 255, 838, 257, 258, 259, + 261, 839, 262, 263, 301, 264, 41, 266, 270, 272, 271, 841, 274, 842, 277, 276, + 278, 281, 282, 42, 283, 284, 285, 286, 43, 843, 44, 289, 290, 291, 293, 934, + 298, 845, 845, 621, 300, 300, 45, 852, 894, 302, 304, 46, 306, 309, 310, 312, + 316, 48, 47, 317, 846, 318, 323, 324, 325, 324, 328, 329, 333, 331, 332, 334, + 335, 336, 338, 339, 342, 343, 347, 351, 849, 350, 348, 352, 354, 359, 850, 361, + 358, 356, 49, 363, 365, 367, 364, 50, 369, 371, 851, 376, 386, 378, 53, 381, + 52, 51, 140, 141, 387, 382, 614, 78, 388, 389, 390, 394, 392, 856, 54, 399, + 396, 402, 404, 858, 405, 401, 407, 55, 408, 409, 410, 413, 859, 415, 56, 417, + 860, 418, 57, 419, 422, 424, 425, 861, 840, 862, 426, 863, 429, 431, 427, 433, + 437, 441, 438, 439, 442, 443, 864, 436, 449, 450, 58, 454, 453, 865, 447, 460, + 866, 867, 461, 466, 465, 464, 59, 467, 470, 469, 472, 828, 475, 868, 478, 870, + 483, 485, 486, 871, 488, 489, 872, 873, 495, 497, 60, 498, 61, 61, 504, 505, + 507, 508, 511, 62, 513, 874, 515, 875, 518, 844, 520, 876, 877, 878, 63, 64, + 528, 880, 879, 881, 882, 530, 531, 531, 533, 66, 534, 67, 68, 884, 536, 538, + 541, 69, 885, 549, 886, 887, 556, 559, 70, 561, 562, 563, 888, 889, 889, 567, + 71, 890, 570, 571, 72, 891, 577, 73, 581, 579, 582, 893, 587, 74, 590, 592, + 596, 75, 895, 896, 76, 897, 600, 898, 602, 605, 607, 899, 900, 609, 901, 611, + 853, 77, 615, 616, 79, 617, 252, 902, 903, 854, 855, 621, 622, 731, 80, 627, + 626, 628, 164, 629, 630, 631, 633, 904, 632, 634, 639, 640, 635, 641, 646, 651, + 638, 643, 644, 645, 905, 907, 906, 81, 653, 654, 656, 911, 657, 908, 82, 83, + 909, 910, 84, 664, 665, 666, 667, 669, 668, 671, 670, 674, 672, 673, 675, 85, + 677, 678, 86, 681, 682, 912, 685, 686, 87, 689, 36, 913, 914, 88, 89, 696, + 702, 709, 711, 915, 712, 713, 718, 719, 917, 831, 721, 720, 723, 832, 725, 728, + 918, 919, 739, 742, 744, 920, 745, 753, 756, 757, 755, 760, 761, 921, 762, 90, + 764, 922, 91, 775, 279, 780, 923, 925, 92, 93, 785, 926, 94, 927, 787, 787, + 789, 928, 792, 95, 796, 797, 798, 800, 96, 929, 802, 804, 806, 97, 98, 807, + 930, 99, 931, 932, 933, 814, 100, 816, 817, 818, 819, 820, 821, 935, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; +static const int16_t +_hb_ucd_i16[196] = +{ + 0, 0, 0, 0, 1, -1, 0, 0, 2, 0, -2, 0, 0, 0, 0, 2, + 0, -2, 0, 0, 0, 0, 0, 16, 0, 0, 0, -16, 0, 0, 1, -1, + 0, 0, 0, 1, -1, 0, 0, 0, 0, 1, -1, 0, 3, 3, 3, -3, + -3, -3, 0, 0, 0, 2016, 0, 0, 0, 0, 0, 2527, 1923, 1914, 1918, 0, + 2250, 0, 0, 0, 0, 0, 0, 138, 0, 7, 0, 0, -7, 0, 0, 0, + 1, -1, 1, -1, -1, 1, -1, 0, 1824, 0, 0, 0, 0, 0, 2104, 0, + 2108, 2106, 0, 2106, 1316, 0, 0, 0, 0, 1, -1, 1, -1, -138, 0, 0, + 1, -1, 8, 8, 8, 0, 7, 7, 0, 0, -8, -8, -8, -7, -7, 0, + 1, -1, 0, 2,-1316, 1, -1, 0, -1, 1, -1, 1, -1, 3, 1, -1, + -3, 1, -1, 1, -1, 0, 0,-1914,-1918, 0, 0,-1923,-1824, 0, 0, 0, + 0,-2016, 0, 0, 1, -1, 0, 1, 0, 0,-2104, 0, 0, 0, 0,-2106, + -2108,-2106, 0, 0, 1, -1,-2250, 0, 0, 0,-2527, 0, 0, -2, 0, 1, + -1, 0, 1, -1, +}; + +static inline uint_fast8_t +_hb_ucd_gc (unsigned u) +{ + return u<1114110u?_hb_ucd_u8[2176+(((_hb_ucd_u16[((_hb_ucd_u8[u>>4>>5])<<5)+((u>>4)&31u)])<<4)+((u)&15u))]:2; +} +static inline uint_fast8_t +_hb_ucd_ccc (unsigned u) +{ + return u<125259u?_hb_ucd_u8[15060+(((_hb_ucd_u8[13636+(((_hb_ucd_u8[12656+(u>>3>>4)])<<4)+((u>>3)&15u))])<<3)+((u)&7u))]:0; +} +static inline unsigned +_hb_ucd_b4 (const uint8_t* a, unsigned i) +{ + return (a[i>>1]>>((i&1u)<<2))&15u; +} +static inline int_fast16_t +_hb_ucd_bmg (unsigned u) +{ + return u<65380u?_hb_ucd_i16[((_hb_ucd_u8[16372+(((_hb_ucd_b4(16244+_hb_ucd_u8,u>>2>>6))<<6)+((u>>2)&63u))])<<2)+((u)&3u)]:0; +} +static inline uint_fast8_t +_hb_ucd_sc (unsigned u) +{ + return u<918000u?_hb_ucd_u8[19126+(((_hb_ucd_u16[3040+(((_hb_ucd_u8[17332+(u>>4>>5)])<<5)+((u>>4)&31u))])<<4)+((u)&15u))]:2; +} +static inline uint_fast16_t +_hb_ucd_dm (unsigned u) +{ + return u<195102u?_hb_ucd_u16[6144+(((_hb_ucd_u8[29430+(u>>6)])<<6)+((u)&63u))]:0; +} + + +#elif !defined(HB_NO_UCD_UNASSIGNED) + +static const uint8_t +_hb_ucd_u8[17508] = +{ + 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 9, 10, 11, 7, 7, 7, 7, 12, 13, 14, 14, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 21, 23, 21, 21, 21, 21, 24, 7, 7, + 25, 26, 21, 21, 21, 21, 27, 28, 21, 21, 29, 30, 31, 32, 33, 34, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 35, 7, 36, 37, 7, 38, 7, 7, 7, 39, 21, 40, + 7, 7, 41, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 42, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 43, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 44, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 34, 35, 36, 37, 38, 39, 34, 34, 34, 40, 41, 42, 43, + 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 64, 65, 66, 67, 68, 69, 70, 71, 69, 72, 73, + 69, 69, 64, 74, 64, 64, 75, 76, 77, 78, 79, 80, 81, 82, 69, 83, + 84, 85, 86, 87, 88, 89, 69, 69, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 90, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 91, + 92, 34, 34, 34, 34, 34, 34, 34, 34, 93, 34, 34, 94, 95, 96, 97, + 98, 99,100,101,102,103,104,105, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,106, + 107,107,107,107,107,107,107,107,107,107,107,107,107,107,107,107, + 108,108,108,108,108,108,108,108,108,108,108,108,108,108,108,108, + 108,108, 34, 34,109,110,111,112, 34, 34,113,114,115,116,117,118, + 119,120,121,122,123,124,125,126,127,128,129,123, 34, 34,130,123, + 131,132,133,134,135,136,137,138,139,140,141,123,142,143,144,145, + 146,147,148,149,150,151,152,123,153,154,123,155,156,157,158,123, + 159,160,161,162,163,164,123,123,165,166,167,168,123,169,123,170, + 34, 34, 34, 34, 34, 34, 34,171,172, 34,173,123,123,123,123,123, + 123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123, + 34, 34, 34, 34, 34, 34, 34, 34,174,123,123,123,123,123,123,123, + 123,123,123,123,123,123,123,123, 34, 34, 34, 34,175,123,123,123, + 34, 34, 34, 34,176,177,178,179,123,123,123,123,180,181,182,183, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,184, + 34, 34, 34, 34, 34, 34, 34, 34, 34,185,186,123,123,123,123,123, + 34, 34,187, 34, 34,188,123,123,123,123,123,123,123,123,123,123, + 123,123,123,123,123,123,123,123,189,190,123,123,123,123,123,123, + 69,191,192,193,194,195,196,123,197,198,199,200,201,202,203,204, + 69, 69, 69, 69,205,206,123,123,123,123,123,123,123,123,123,123, + 207,123,208,123,123,209,123,123,123,123,123,123,123,123,123,123, + 34,210,211,123,123,123,123,123,212,213,214,123,215,216,123,123, + 217,218,219,220,221,123, 69,222, 69, 69, 69, 69, 69,223,224,225, + 226,227,228,229,230,231, 69,232,123,123,123,123,123,123,123,123, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,233, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,234, 34, + 235, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,236, 34, 34, + 34, 34, 34, 34, 34, 34, 34,237,123,123,123,123,123,123,123,123, + 34, 34, 34, 34,238,123,123,123,123,123,123,123,123,123,123,123, + 34, 34, 34, 34, 34, 34,239,123,123,123,123,123,123,123,123,123, + 240,123,241,242,123,123,123,123,123,123,123,123,123,123,123,123, + 108,108,108,108,108,108,108,108,108,108,108,108,108,108,108,243, + 108,108,108,108,108,108,108,108,108,108,108,108,108,108,108,244, + 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 2, 4, 5, 6, 2, + 7, 7, 7, 7, 7, 2, 8, 9, 10, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 12, 13, 14, 15, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 17, 18, 19, 1, 20, 20, 21, 22, 23, 24, 25, + 26, 27, 15, 2, 28, 29, 27, 30, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 31, 11, 11, 11, 32, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 33, 16, 16, 16, 16, 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 34, 34, 34, 34, 34, 34, 34, 34, 16, 32, 32, 32, + 32, 32, 32, 32, 11, 34, 34, 16, 34, 32, 32, 11, 34, 11, 16, 11, + 11, 34, 32, 11, 32, 16, 11, 34, 32, 32, 32, 11, 34, 16, 32, 11, + 34, 11, 34, 34, 32, 35, 32, 16, 36, 36, 37, 34, 38, 37, 34, 34, + 34, 34, 34, 34, 34, 34, 16, 32, 34, 38, 32, 11, 32, 32, 32, 32, + 32, 32, 16, 16, 16, 11, 34, 32, 34, 34, 11, 32, 32, 32, 32, 32, + 16, 16, 39, 16, 16, 16, 16, 16, 40, 40, 40, 40, 40, 40, 40, 40, + 40, 41, 41, 40, 40, 40, 40, 40, 40, 41, 41, 41, 41, 41, 41, 41, + 40, 40, 42, 41, 41, 41, 42, 42, 41, 41, 41, 41, 41, 41, 41, 41, + 43, 43, 43, 43, 43, 43, 43, 43, 32, 32, 42, 32, 44, 45, 16, 10, + 44, 44, 41, 46, 11, 47, 47, 11, 34, 11, 11, 11, 11, 11, 11, 11, + 11, 48, 11, 11, 11, 11, 16, 16, 16, 16, 16, 16, 16, 16, 16, 34, + 16, 11, 32, 16, 32, 32, 32, 32, 16, 16, 32, 49, 34, 32, 34, 11, + 32, 50, 43, 43, 51, 32, 32, 32, 11, 34, 34, 34, 34, 34, 34, 16, + 48, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 47, 52, 2, 2, 2, + 16, 16, 16, 16, 53, 54, 55, 56, 57, 43, 43, 43, 43, 43, 43, 43, + 43, 43, 43, 43, 43, 43, 43, 58, 59, 60, 43, 59, 44, 44, 44, 44, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 61, 44, 62, + 36, 63, 64, 44, 44, 44, 44, 44, 65, 65, 65, 8, 9, 66, 2, 67, + 43, 43, 43, 43, 43, 60, 68, 2, 69, 36, 36, 36, 36, 70, 43, 43, + 7, 7, 7, 7, 7, 2, 2, 36, 71, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 72, 43, 43, 43, 73, 50, 43, 43, 74, 75, 76, 43, 43, 36, + 7, 7, 7, 7, 7, 36, 77, 78, 2, 2, 2, 2, 2, 2, 2, 79, + 70, 36, 36, 36, 36, 36, 36, 36, 43, 43, 43, 43, 43, 80, 62, 36, + 36, 36, 36, 43, 43, 43, 43, 43, 71, 44, 44, 44, 44, 44, 44, 44, + 7, 7, 7, 7, 7, 36, 36, 36, 36, 36, 36, 36, 36, 70, 43, 43, + 43, 43, 40, 21, 2, 81, 57, 20, 36, 36, 36, 43, 43, 75, 43, 43, + 43, 43, 75, 43, 75, 43, 43, 44, 2, 2, 2, 2, 2, 2, 2, 64, + 36, 36, 36, 36, 70, 43, 44, 64, 36, 36, 36, 36, 36, 61, 44, 44, + 44, 44, 44, 44, 44, 44, 44, 44, 36, 36, 61, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 44, 44, 44, 44, 44, 57, 43, 43, 43, 43, 43, 43, + 43, 82, 43, 43, 43, 43, 43, 43, 43, 83, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 83, 71, 84, 85, 43, 43, 43, 83, 84, 85, 84, + 70, 43, 43, 43, 36, 36, 36, 36, 36, 43, 2, 7, 7, 7, 7, 7, + 86, 36, 36, 36, 36, 36, 36, 36, 70, 84, 62, 36, 36, 36, 61, 62, + 61, 62, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 61, 36, 36, 36, + 61, 61, 44, 36, 36, 44, 71, 84, 85, 43, 80, 87, 88, 87, 85, 61, + 44, 44, 44, 87, 44, 44, 36, 62, 36, 43, 44, 7, 7, 7, 7, 7, + 36, 20, 27, 27, 27, 56, 63, 80, 57, 83, 62, 36, 36, 61, 44, 62, + 61, 36, 62, 61, 36, 44, 80, 84, 85, 80, 44, 57, 80, 57, 43, 44, + 57, 44, 44, 44, 62, 36, 61, 61, 44, 44, 44, 7, 7, 7, 7, 7, + 43, 36, 70, 64, 44, 44, 44, 44, 57, 83, 62, 36, 36, 36, 36, 62, + 36, 62, 36, 36, 36, 36, 36, 36, 61, 36, 62, 36, 36, 44, 71, 84, + 85, 43, 43, 57, 83, 87, 85, 44, 61, 44, 44, 44, 44, 44, 44, 44, + 66, 44, 44, 44, 62, 43, 43, 43, 57, 84, 62, 36, 36, 36, 61, 62, + 61, 36, 62, 36, 36, 44, 71, 85, 85, 43, 80, 87, 88, 87, 85, 44, + 44, 44, 57, 83, 44, 44, 36, 62, 78, 27, 27, 27, 44, 44, 44, 44, + 44, 71, 62, 36, 36, 61, 44, 36, 61, 36, 36, 44, 62, 61, 61, 36, + 44, 62, 61, 44, 36, 61, 44, 36, 36, 36, 36, 36, 36, 44, 44, 84, + 83, 88, 44, 84, 88, 84, 85, 44, 61, 44, 44, 87, 44, 44, 44, 44, + 27, 89, 67, 67, 56, 90, 44, 44, 83, 84, 71, 36, 36, 36, 61, 36, + 61, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 44, 62, 43, + 83, 84, 88, 43, 80, 43, 43, 44, 44, 44, 57, 80, 36, 61, 44, 44, + 44, 44, 44, 91, 27, 27, 27, 89, 70, 84, 72, 36, 36, 36, 61, 36, + 36, 36, 62, 36, 36, 44, 71, 85, 84, 84, 88, 83, 88, 84, 43, 44, + 44, 44, 87, 88, 44, 44, 44, 61, 62, 61, 44, 44, 44, 44, 44, 44, + 43, 84, 36, 36, 36, 36, 61, 36, 36, 36, 36, 36, 36, 70, 71, 84, + 85, 43, 80, 84, 88, 84, 85, 77, 44, 44, 36, 92, 27, 27, 27, 93, + 27, 27, 27, 27, 89, 36, 36, 36, 57, 84, 62, 36, 36, 36, 36, 36, + 36, 36, 36, 61, 44, 36, 36, 36, 36, 62, 36, 36, 36, 36, 62, 44, + 36, 36, 36, 61, 44, 80, 44, 87, 84, 43, 80, 80, 84, 84, 84, 84, + 44, 84, 64, 44, 44, 44, 44, 44, 62, 36, 36, 36, 36, 36, 36, 36, + 70, 36, 43, 43, 43, 80, 44, 94, 36, 36, 36, 75, 43, 43, 43, 60, + 7, 7, 7, 7, 7, 2, 44, 44, 62, 61, 61, 36, 36, 61, 36, 36, + 36, 36, 62, 62, 36, 36, 36, 36, 70, 36, 43, 43, 43, 43, 71, 44, + 36, 36, 61, 81, 43, 43, 43, 44, 7, 7, 7, 7, 7, 44, 36, 36, + 77, 67, 2, 2, 2, 2, 2, 2, 2, 95, 95, 67, 43, 67, 67, 67, + 7, 7, 7, 7, 7, 27, 27, 27, 27, 27, 50, 50, 50, 4, 4, 84, + 36, 36, 36, 36, 62, 36, 36, 36, 36, 36, 36, 36, 36, 36, 61, 44, + 57, 43, 43, 43, 43, 43, 43, 83, 43, 43, 60, 43, 36, 36, 70, 43, + 43, 43, 43, 43, 57, 43, 43, 43, 43, 43, 43, 43, 43, 43, 80, 67, + 67, 67, 67, 76, 67, 67, 90, 67, 2, 2, 95, 67, 21, 64, 44, 44, + 36, 36, 36, 36, 36, 92, 85, 43, 83, 43, 43, 43, 85, 83, 85, 71, + 7, 7, 7, 7, 7, 2, 2, 2, 36, 36, 36, 84, 43, 36, 36, 43, + 71, 84, 96, 92, 84, 84, 84, 36, 70, 43, 71, 36, 36, 36, 36, 36, + 36, 83, 85, 83, 84, 84, 85, 92, 7, 7, 7, 7, 7, 84, 85, 67, + 11, 11, 11, 48, 44, 44, 48, 44, 16, 16, 16, 16, 16, 53, 45, 16, + 36, 36, 36, 36, 61, 36, 36, 44, 36, 36, 36, 61, 61, 36, 36, 44, + 61, 36, 36, 44, 36, 36, 36, 61, 61, 36, 36, 44, 36, 36, 36, 36, + 36, 36, 36, 61, 36, 36, 36, 36, 36, 36, 36, 36, 36, 61, 57, 43, + 2, 2, 2, 2, 97, 27, 27, 27, 27, 27, 27, 27, 27, 27, 98, 44, + 67, 67, 67, 67, 67, 44, 44, 44, 11, 11, 11, 44, 16, 16, 16, 44, + 99, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 77, 72, + 100, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,101,102, 44, + 36, 36, 36, 36, 36, 63, 2,103,104, 36, 36, 36, 61, 44, 44, 44, + 36, 36, 36, 36, 36, 36, 61, 36, 36, 43, 80, 44, 44, 44, 44, 44, + 36, 43, 60, 64, 44, 44, 44, 44, 36, 43, 44, 44, 44, 44, 44, 44, + 61, 43, 44, 44, 44, 44, 44, 44, 36, 36, 43, 85, 43, 43, 43, 84, + 84, 84, 84, 83, 85, 43, 43, 43, 43, 43, 2, 86, 2, 66, 70, 44, + 7, 7, 7, 7, 7, 44, 44, 44, 27, 27, 27, 27, 27, 44, 44, 44, + 2, 2, 2,105, 2, 59, 43, 68, 36,106, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 61, 44, 44, 44, 36, 36, 70, 71, 36, 36, 36, 36, + 36, 36, 36, 36, 70, 61, 44, 44, 36, 36, 36, 44, 44, 44, 44, 44, + 36, 36, 36, 36, 36, 36, 36, 61, 43, 83, 84, 85, 83, 84, 44, 44, + 84, 83, 84, 84, 85, 43, 44, 44, 90, 44, 2, 7, 7, 7, 7, 7, + 36, 36, 36, 36, 36, 36, 36, 44, 36, 36, 61, 44, 44, 44, 44, 44, + 36, 36, 36, 36, 36, 36, 44, 44, 36, 36, 36, 36, 36, 44, 44, 44, + 7, 7, 7, 7, 7, 98, 44, 67, 67, 67, 67, 67, 67, 67, 67, 67, + 36, 36, 36, 70, 83, 85, 44, 2, 36, 36, 92, 83, 43, 43, 43, 80, + 83, 83, 85, 43, 43, 43, 83, 84, 84, 85, 43, 43, 43, 43, 80, 57, + 2, 2, 2, 86, 2, 2, 2, 44, 43, 43, 43, 43, 43, 43, 43,107, + 80, 44, 44, 44, 44, 44, 44, 44, 43, 43, 96, 36, 36, 36, 36, 36, + 36, 36, 83, 43, 43, 83, 83, 84, 84, 83, 96, 36, 36, 36, 44, 44, + 95, 67, 67, 67, 67, 50, 43, 43, 43, 43, 67, 67, 67, 67, 90, 44, + 43, 96, 36, 36, 36, 36, 36, 36, 92, 43, 43, 84, 43, 85, 43, 36, + 36, 36, 36, 83, 43, 84, 85, 85, 43, 84, 44, 44, 44, 44, 2, 2, + 36, 36, 84, 84, 84, 84, 43, 43, 43, 43, 84, 43, 44, 91, 2, 2, + 7, 7, 7, 7, 7, 44, 62, 36, 36, 36, 36, 36, 40, 40, 40, 2, + 16, 16, 16, 16,108, 44, 44, 44, 11, 11, 11, 11, 11, 47, 48, 11, + 2, 2, 2, 2, 44, 44, 44, 44, 43, 60, 43, 43, 43, 43, 43, 43, + 83, 43, 43, 43, 71, 36, 70, 36, 36, 36, 71, 92, 43, 61, 44, 44, + 16, 16, 16, 16, 16, 16, 40, 40, 40, 40, 40, 40, 40, 45, 16, 16, + 16, 16, 16, 16, 45, 16, 16, 16, 16, 16, 16, 16, 16,109, 40, 40, + 43, 43, 43, 43, 43, 57, 43, 43, 32, 32, 32, 16, 16, 16, 16, 32, + 16, 16, 16, 16, 11, 11, 11, 11, 16, 16, 16, 44, 11, 11, 11, 44, + 16, 16, 16, 16, 48, 48, 48, 48, 16, 16, 16, 16, 16, 16, 16, 44, + 16, 16, 16, 16,110,110,110,110, 16, 16,108, 16, 11, 11,111,112, + 41, 16,108, 16, 11, 11,111, 41, 16, 16, 44, 16, 11, 11,113, 41, + 16, 16, 16, 16, 11, 11,114, 41, 44, 16,108, 16, 11, 11,111,115, + 116,116,116,116,116,117, 65, 65,118,118,118, 2,119,120,119,120, + 2, 2, 2, 2,121, 65, 65,122, 2, 2, 2, 2,123,124, 2,125, + 126, 2,127,128, 2, 2, 2, 2, 2, 9,126, 2, 2, 2, 2,129, + 65, 65, 68, 65, 65, 65, 65, 65,130, 44, 27, 27, 27, 8,127,131, + 27, 27, 27, 27, 27, 8,127,102, 40, 40, 40, 40, 40, 40, 81, 44, + 20, 20, 20, 20, 20, 20, 20, 20, 43, 43, 43, 43, 43, 43,132, 51, + 107, 51,107, 43, 43, 43, 43, 43, 67,133, 67,134, 67, 34, 11, 16, + 11, 32,134, 67, 49, 11, 11, 67, 67, 67,133,133,133, 11, 11,135, + 11, 11, 35, 36, 39, 67, 16, 11, 8, 8, 49, 16, 16, 26, 67,136, + 27, 27, 27, 27, 27, 27, 27, 27,103,103,103,103,103,103,103,103, + 103,137,138,103,139, 67, 44, 44, 8, 8,140, 67, 67, 8, 67, 67, + 140, 26, 67,140, 67, 67, 67,140, 67, 67, 67, 67, 67, 67, 67, 8, + 67,140,140, 67, 67, 67, 67, 67, 67, 67, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 67, 67, 67, 67, 4, 4, 67, 67, + 8, 67, 67, 67,141,142, 67, 67, 67, 67, 67, 67, 67, 67,140, 67, + 67, 67, 67, 67, 67, 26, 8, 8, 8, 8, 67, 67, 67, 67, 67, 67, + 67, 67, 67, 67, 67, 67, 8, 8, 8, 67, 67, 67, 67, 67, 67, 67, + 67, 67, 67, 90, 44, 44, 44, 44, 67, 67, 67, 67, 67, 90, 44, 44, + 27, 27, 27, 27, 27, 27, 67, 67, 67, 67, 67, 67, 67, 27, 27, 27, + 67, 67, 67, 26, 67, 67, 67, 67, 26, 67, 67, 67, 67, 67, 67, 67, + 67, 67, 67, 67, 8, 8, 8, 8, 67, 67, 67, 67, 67, 67, 67, 26, + 67, 67, 67, 67, 4, 4, 4, 4, 4, 4, 4, 27, 27, 27, 27, 27, + 27, 27, 67, 67, 67, 67, 67, 67, 8, 8,127,143, 8, 8, 8, 8, + 8, 8, 8, 4, 4, 4, 4, 4, 8,127,144,144,144,144,144,144, + 144,144,144,144,143, 8, 8, 8, 8, 8, 8, 8, 4, 4, 8, 8, + 8, 8, 8, 8, 8, 8, 4, 8, 8, 8,140, 26, 8, 8,140, 67, + 67, 67, 44, 67, 67, 67, 67, 67, 67, 67, 67, 55, 67, 67, 67, 67, + 11, 11, 11, 11, 11, 11, 11, 47, 16, 16, 16, 16, 16, 16, 16,108, + 32, 11, 32, 34, 34, 34, 34, 11, 32, 32, 34, 16, 16, 16, 40, 11, + 32, 32,136, 67, 67,134, 34,145, 43, 32, 44, 44, 91, 2, 97, 2, + 16, 16, 16,146, 44, 44,146, 44, 36, 36, 36, 36, 44, 44, 44, 52, + 64, 44, 44, 44, 44, 44, 44, 57, 36, 36, 36, 61, 44, 44, 44, 44, + 36, 36, 36, 61, 36, 36, 36, 61, 2,119,119, 2,123,124,119, 2, + 2, 2, 2, 6, 2,105,119, 2,119, 4, 4, 4, 4, 2, 2, 86, + 2, 2, 2, 2, 2,118, 2, 2,105,147, 2, 2, 2, 2, 2, 2, + 67, 64, 44, 44, 44, 44, 44, 44, 67, 67, 67, 67, 67, 55, 67, 67, + 67, 67, 44, 44, 44, 44, 44, 44, 67, 67, 67, 44, 44, 44, 44, 44, + 67, 67, 67, 67, 67, 67, 44, 44, 1, 2,148,149, 4, 4, 4, 4, + 4, 67, 4, 4, 4, 4,150,151,152,103,103,103,103, 43, 43, 84, + 153, 40, 40, 67,103,154, 63, 67, 36, 36, 36, 61, 57,155,156, 69, + 36, 36, 36, 36, 36, 63, 40, 69, 44, 44, 62, 36, 36, 36, 36, 36, + 67, 27, 27, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 90, + 27, 27, 27, 27, 27, 67, 67, 67, 67, 67, 67, 67, 27, 27, 27, 27, + 157, 27, 27, 27, 27, 27, 27, 27, 36, 36,106, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36,158, 2, 7, 7, 7, 7, 7, 36, 44, 44, + 32, 32, 32, 32, 32, 32, 32, 70, 51,159, 43, 43, 43, 43, 43, 86, + 32, 32, 32, 32, 32, 32, 40, 43, 36, 36, 36,103,103,103,103,103, + 43, 2, 2, 2, 44, 44, 44, 44, 41, 41, 41,156, 40, 40, 40, 40, + 41, 32, 32, 32, 32, 32, 32, 32, 16, 32, 32, 32, 32, 32, 32, 32, + 45, 16, 16, 16, 34, 34, 34, 32, 32, 32, 32, 32, 42,160, 34, 35, + 32, 32, 16, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 11, 11, 32, + 11, 11, 32, 32, 32, 32, 32, 32, 44, 32, 11, 11, 34,108, 44, 44, + 44, 44, 48, 35, 40, 35, 36, 36, 36, 71, 36, 71, 36, 70, 36, 36, + 36, 92, 85, 83, 67, 67, 80, 44, 27, 27, 27, 67,161, 44, 44, 44, + 36, 36, 2, 2, 44, 44, 44, 44, 84, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 84, 84, 84, 84, 84, 84, 84, 84, 43, 44, 44, 44, 44, 2, + 43, 36, 36, 36, 2, 72, 72, 70, 36, 36, 36, 43, 43, 43, 43, 2, + 36, 36, 36, 70, 43, 43, 43, 43, 43, 84, 44, 44, 44, 44, 44, 91, + 36, 70, 84, 43, 43, 84, 43, 84,162, 2, 2, 2, 2, 2, 2, 52, + 7, 7, 7, 7, 7, 44, 44, 2, 36, 36, 70, 69, 36, 36, 36, 36, + 7, 7, 7, 7, 7, 36, 36, 61, 36, 36, 36, 36, 70, 43, 43, 83, + 85, 83, 85, 80, 44, 44, 44, 44, 36, 70, 36, 36, 36, 36, 83, 44, + 7, 7, 7, 7, 7, 44, 2, 2, 69, 36, 36, 77, 67, 92, 83, 36, + 71, 43, 71, 70, 71, 36, 36, 43, 70, 61, 44, 44, 44, 44, 44, 44, + 44, 44, 44, 44, 44, 62,106, 2, 36, 36, 36, 36, 36, 92, 43, 84, + 2,106,163, 80, 44, 44, 44, 44, 62, 36, 36, 61, 62, 36, 36, 61, + 62, 36, 36, 61, 44, 44, 44, 44, 16, 16, 16, 16, 16,112, 40, 40, + 16, 16, 16, 16,109, 41, 44, 44, 36, 92, 85, 84, 83,162, 85, 44, + 36, 36, 44, 44, 44, 44, 44, 44, 36, 36, 36, 61, 44, 62, 36, 36, + 164,164,164,164,164,164,164,164,165,165,165,165,165,165,165,165, + 16, 16, 16,108, 44, 44, 44, 44, 44,146, 16, 16, 44, 44, 62, 71, + 36, 36, 36, 36,166, 36, 36, 36, 36, 36, 36, 61, 36, 36, 61, 61, + 36, 62, 61, 36, 36, 36, 36, 36, 36, 41, 41, 41, 41, 41, 41, 41, + 41, 44, 44, 44, 44, 44, 44, 44, 44, 62, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36,144, 44, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36,161, 44, 2, 2, 2,167,128, 44, 44, 44, + 6,168,169,144,144,144,144,144,144,144,128,167,128, 2,125,170, + 2, 64, 2, 2,150,144,144,128, 2,171, 8,172, 66, 2, 44, 44, + 36, 36, 36, 36, 36, 36, 61, 79, 91, 2, 3, 2, 4, 5, 6, 2, + 16, 16, 16, 16, 16, 17, 18,127,128, 4, 2, 36, 36, 36, 36, 36, + 69, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 40, + 44, 36, 36, 36, 44, 36, 36, 36, 44, 36, 36, 36, 44, 36, 61, 44, + 20,173, 56,174, 26, 8,140, 90, 44, 44, 44, 44, 79, 65, 67, 44, + 36, 36, 36, 36, 36, 36, 62, 36, 36, 36, 36, 36, 36, 61, 36, 62, + 2, 64, 44,175, 27, 27, 27, 27, 27, 27, 44, 55, 67, 67, 67, 67, + 103,103,139, 27, 89, 67, 67, 67, 67, 67, 67, 67, 67, 27, 67, 90, + 67, 67, 67, 67, 67, 67, 90, 44, 90, 44, 44, 44, 44, 44, 44, 44, + 67, 67, 67, 67, 67, 67, 50, 44,176, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 44, 44, 27, 27, 44, 44, 44, 44, 62, 36, + 149, 36, 36, 36, 36,177, 44, 44, 36, 36, 36, 43, 43, 80, 44, 44, + 36, 36, 36, 36, 36, 36, 36, 91, 36, 36, 44, 44, 36, 36, 36, 36, + 178,103,103, 44, 44, 44, 44, 44, 11, 11, 11, 11, 16, 16, 16, 16, + 11, 11, 44, 44, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 44, 44, + 36, 36, 44, 44, 44, 44, 44, 91, 36, 36, 36, 44, 61, 36, 36, 36, + 36, 36, 36, 62, 61, 44, 61, 62, 36, 36, 36, 91, 27, 27, 27, 27, + 36, 36, 36, 77,157, 27, 27, 27, 44, 44, 44,175, 27, 27, 27, 27, + 36, 61, 36, 44, 44,175, 27, 27, 36, 36, 36, 27, 27, 27, 44, 91, + 36, 36, 36, 36, 36, 44, 44, 91, 36, 36, 36, 36, 44, 44, 27, 36, + 44, 27, 27, 27, 27, 27, 27, 27, 70, 43, 57, 80, 44, 44, 43, 43, + 36, 36, 62, 36, 62, 36, 36, 36, 36, 36, 36, 44, 43, 80, 44, 57, + 27, 27, 27, 27, 98, 44, 44, 44, 2, 2, 2, 2, 64, 44, 44, 44, + 36, 36, 36, 36, 36, 36,179, 30, 36, 36, 36, 36, 36, 36,179, 27, + 36, 36, 36, 36, 78, 36, 36, 36, 36, 36, 70, 80, 44,175, 27, 27, + 2, 2, 2, 64, 44, 44, 44, 44, 36, 36, 36, 44, 91, 2, 2, 2, + 36, 36, 36, 44, 27, 27, 27, 27, 36, 61, 44, 44, 27, 27, 27, 27, + 36, 44, 44, 44, 91, 2, 64, 44, 44, 44, 44, 44,175, 27, 27, 27, + 11, 47, 44, 44, 44, 44, 44, 44, 16,108, 44, 44, 44, 27, 27, 27, + 36, 36, 43, 43, 44, 44, 44, 44, 27, 27, 27, 27, 27, 27, 27, 98, + 36, 36, 36, 36, 36, 57,180, 44, 36, 44, 44, 44, 44, 44, 44, 44, + 27, 27, 27, 93, 44, 44, 44, 44,176, 27, 30, 2, 2, 44, 44, 44, + 36, 36,179, 27, 27, 27, 44, 44, 85, 96, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 43, 43, 43, 43, 43, 43, 43, 60, 2, 2, 2, 44, + 27, 27, 27, 7, 7, 7, 7, 7, 44, 44, 44, 44, 44, 44, 44, 57, + 84, 85, 43, 83, 85, 60,181, 2, 2, 44, 44, 44, 44, 44, 79, 44, + 43, 71, 36, 36, 36, 36, 36, 36, 36, 36, 36, 70, 43, 43, 85, 43, + 43, 43, 80, 7, 7, 7, 7, 7, 2, 2, 92, 96, 44, 44, 44, 44, + 36, 70, 2, 61, 44, 44, 44, 44, 36, 92, 84, 43, 43, 43, 43, 83, + 96, 36, 63, 2, 59, 43, 60, 85, 7, 7, 7, 7, 7, 63, 63, 2, + 175, 27, 27, 27, 27, 27, 27, 27, 27, 27, 98, 44, 44, 44, 44, 44, + 36, 36, 36, 36, 36, 36, 84, 85, 43, 84, 83, 43, 2, 2, 2, 80, + 36, 36, 36, 61, 61, 36, 36, 62, 36, 36, 36, 36, 36, 36, 36, 62, + 36, 36, 36, 36, 63, 44, 44, 44, 36, 36, 36, 36, 36, 36, 36, 70, + 84, 85, 43, 43, 43, 80, 44, 44, 43, 84, 62, 36, 36, 36, 61, 62, + 61, 36, 62, 36, 36, 57, 71, 84, 83, 84, 88, 87, 88, 87, 84, 44, + 61, 44, 44, 87, 44, 44, 62, 36, 36, 84, 44, 43, 43, 43, 80, 44, + 43, 43, 80, 44, 44, 44, 44, 44, 36, 36, 92, 84, 43, 43, 43, 43, + 84, 43, 83, 71, 36, 63, 2, 2, 7, 7, 7, 7, 7, 2, 91, 71, + 84, 85, 43, 43, 83, 83, 84, 85, 83, 43, 36, 72, 44, 44, 44, 44, + 36, 36, 36, 36, 36, 36, 36, 92, 84, 43, 43, 44, 84, 84, 43, 85, + 60, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 36, 36, 43, 44, + 84, 85, 43, 43, 43, 83, 85, 85, 60, 2, 61, 44, 44, 44, 44, 44, + 2, 2, 2, 2, 2, 2, 64, 44, 36, 36, 36, 36, 36, 70, 85, 84, + 43, 43, 43, 85, 61, 44, 44, 44, 84, 43, 43, 85, 43, 43, 44, 44, + 7, 7, 7, 7, 7, 27, 2, 95, 43, 43, 43, 43, 85, 60, 44, 44, + 27, 98, 44, 44, 44, 44, 44, 62, 36, 36, 36, 61, 62, 44, 36, 36, + 36, 36, 62, 61, 36, 36, 36, 36, 84, 84, 84, 87, 88, 57, 83, 71, + 96, 85, 2, 64, 44, 44, 44, 44, 36, 36, 36, 36, 44, 36, 36, 36, + 92, 84, 43, 43, 44, 43, 84, 84, 71, 72, 88, 44, 44, 44, 44, 44, + 70, 43, 43, 43, 43, 71, 36, 36, 36, 70, 43, 43, 83, 70, 43, 60, + 2, 2, 2, 59, 44, 44, 44, 44, 70, 43, 43, 83, 85, 43, 36, 36, + 36, 36, 36, 36, 36, 43, 43, 43, 43, 43, 43, 83, 43, 2, 72, 2, + 2, 64, 44, 44, 44, 44, 44, 44, 43, 43, 43, 80, 43, 43, 43, 85, + 63, 2, 2, 44, 44, 44, 44, 44, 2, 36, 36, 36, 36, 36, 36, 36, + 44, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 87, 43, 43, 43, + 83, 43, 85, 80, 44, 44, 44, 44, 36, 36, 36, 61, 36, 62, 36, 36, + 70, 43, 43, 80, 44, 80, 43, 57, 43, 43, 43, 70, 44, 44, 44, 44, + 36, 36, 36, 62, 61, 36, 36, 36, 36, 36, 36, 36, 36, 84, 84, 88, + 43, 87, 85, 85, 61, 44, 44, 44, 36, 70, 83,162, 64, 44, 44, 44, + 27, 27, 89, 67, 67, 67, 56, 20,161, 67, 67, 67, 67, 67, 67, 67, + 67, 44, 44, 44, 44, 44, 44, 91,103,103,103,103,103,103,103,177, + 2, 2, 64, 44, 44, 44, 44, 44, 65, 65, 65, 65, 68, 44, 44, 44, + 43, 43, 60, 44, 44, 44, 44, 44, 43, 43, 43, 60, 2, 2, 67, 67, + 40, 40, 95, 44, 44, 44, 44, 44, 7, 7, 7, 7, 7,175, 27, 27, + 27, 62, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 44, 44, 62, 36, + 27, 27, 27, 30, 2, 64, 44, 44, 36, 36, 36, 36, 36, 61, 44, 57, + 92, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 84, 44, 44, 44, 57, 43, 74, 40, 40, 40, 40, 40, 40, + 40, 86, 80, 44, 44, 44, 44, 44, 84, 44, 44, 44, 44, 44, 44, 44, + 36, 61, 44, 44, 44, 44, 44, 44, 44, 44, 36, 36, 44, 44, 44, 44, + 36, 36, 36, 36, 36, 44, 50, 60, 65, 65, 44, 44, 44, 44, 44, 44, + 67, 67, 67, 90, 55, 67, 67, 67, 67, 67,182, 85, 43, 67,182, 84, + 84,183, 65, 65, 65, 82, 43, 43, 43, 76, 50, 43, 43, 43, 67, 67, + 67, 67, 67, 67, 67, 43, 43, 67, 67, 67, 67, 67, 90, 44, 44, 44, + 67, 43, 76, 44, 44, 44, 44, 44, 27, 27, 44, 44, 44, 44, 44, 44, + 11, 11, 11, 11, 11, 16, 16, 16, 16, 16, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 16, 16, 16,108, 16, 16, 16, 16, 16, + 11, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 47, 11, + 44, 47, 48, 47, 48, 11, 47, 11, 11, 11, 11, 16, 16,146,146, 16, + 16, 16,146, 16, 16, 16, 16, 16, 16, 16, 11, 48, 11, 47, 48, 11, + 11, 11, 47, 11, 11, 11, 47, 16, 16, 16, 16, 16, 11, 48, 11, 47, + 11, 11, 47, 47, 44, 11, 11, 11, 47, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 11, 11, 11, 11, 11, 16, 16, 16, 16, 16, + 16, 16, 16, 44, 11, 11, 11, 11, 31, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 33, 16, 16, 16, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 31, 16, 16, 16, 16, 33, 16, 16, 16, 11, 11, + 11, 11, 31, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 33, + 16, 16, 16, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 31, + 16, 16, 16, 16, 33, 16, 16, 16, 11, 11, 11, 11, 31, 16, 16, 16, + 16, 33, 16, 16, 16, 32, 44, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 43, 43, 43, 76, 67, 50, 43, 43, 43, 43, 43, 43, 43, 43, 76, 67, + 67, 67, 50, 67, 67, 67, 67, 67, 67, 67, 76, 21, 2, 2, 44, 44, + 44, 44, 44, 44, 44, 57, 43, 43, 43, 43, 43, 80, 43, 43, 43, 43, + 43, 43, 43, 43, 80, 57, 43, 43, 43, 57, 80, 43, 43, 80, 44, 44, + 43, 43, 43, 74, 40, 40, 40, 44, 7, 7, 7, 7, 7, 44, 44, 77, + 36, 36, 36, 36, 36, 36, 43, 43, 7, 7, 7, 7, 7, 44, 44, 94, + 36, 36, 61,175, 27, 27, 27, 27, 43, 43, 43, 80, 44, 44, 44, 44, + 16, 16, 43, 43, 43, 74, 44, 44, 27, 27, 27, 27, 27, 27,157, 27, + 184, 27, 98, 44, 44, 44, 44, 44, 27, 27, 27, 27, 27, 27, 27,157, + 27, 27, 27, 27, 27, 27, 27, 44, 36, 36, 62, 36, 36, 36, 36, 36, + 62, 61, 61, 62, 62, 36, 36, 36, 36, 61, 36, 36, 62, 62, 44, 44, + 44, 61, 44, 62, 62, 62, 62, 36, 62, 61, 61, 62, 62, 62, 62, 62, + 62, 61, 61, 62, 36, 61, 36, 36, 36, 61, 36, 36, 62, 36, 61, 61, + 36, 36, 36, 36, 36, 62, 36, 36, 62, 36, 62, 36, 36, 62, 36, 36, + 8, 44, 44, 44, 44, 44, 44, 44, 55, 67, 67, 67, 67, 67, 67, 67, + 27, 27, 27, 27, 27, 27, 89, 67, 67, 67, 67, 67, 67, 67, 67, 44, + 44, 44, 44, 67, 67, 67, 67, 67, 67, 90, 44, 44, 44, 44, 44, 44, + 67, 44, 44, 44, 44, 44, 44, 44, 67, 67, 67, 67, 67, 25, 41, 41, + 67, 67, 67, 67, 44, 44, 44, 44, 67, 67, 67, 67, 90, 67, 67, 67, + 67, 67, 67, 67, 67, 67, 55, 67, 67, 67, 90, 44, 67, 90, 44, 44, + 67, 90, 67, 67, 67, 67, 67, 67, 79, 44, 44, 44, 44, 44, 44, 44, + 65, 65, 65, 65, 65, 65, 65, 65,165,165,165,165,165,165,165, 44, + 165,165,165,165,165,165,165, 0, 0, 0, 29, 21, 21, 21, 23, 21, + 22, 18, 21, 25, 21, 17, 13, 13, 25, 25, 25, 21, 21, 9, 9, 9, + 9, 22, 21, 18, 24, 16, 24, 5, 5, 5, 5, 22, 25, 18, 25, 0, + 23, 23, 26, 21, 24, 26, 7, 20, 25, 1, 26, 24, 26, 25, 15, 15, + 24, 15, 7, 19, 15, 21, 9, 25, 9, 5, 5, 25, 5, 9, 5, 7, + 7, 7, 9, 8, 8, 5, 7, 5, 6, 6, 24, 24, 6, 24, 12, 12, + 2, 2, 6, 5, 9, 21, 9, 2, 2, 9, 25, 9, 26, 12, 11, 11, + 2, 6, 5, 21, 17, 2, 2, 26, 26, 23, 2, 12, 17, 12, 21, 12, + 12, 21, 7, 2, 2, 7, 7, 21, 21, 2, 1, 1, 21, 23, 26, 26, + 1, 2, 6, 7, 7, 12, 12, 7, 21, 7, 12, 1, 12, 6, 6, 12, + 12, 26, 7, 26, 26, 7, 2, 1, 12, 2, 6, 2, 1, 12, 12, 10, + 10, 10, 10, 12, 21, 6, 2, 10, 10, 2, 15, 26, 26, 2, 2, 21, + 7, 10, 15, 7, 2, 23, 21, 26, 10, 7, 21, 15, 15, 2, 17, 7, + 29, 7, 7, 22, 18, 2, 14, 14, 14, 7, 17, 21, 7, 6, 11, 12, + 5, 2, 5, 6, 8, 8, 8, 24, 5, 24, 2, 24, 9, 24, 24, 2, + 29, 29, 29, 1, 17, 17, 20, 19, 22, 20, 27, 28, 1, 29, 21, 20, + 19, 21, 21, 16, 16, 21, 25, 22, 18, 21, 21, 29, 15, 6, 18, 6, + 12, 11, 9, 26, 26, 9, 26, 5, 5, 26, 14, 9, 5, 14, 14, 15, + 25, 26, 26, 22, 18, 26, 18, 25, 18, 22, 5, 12, 2, 5, 22, 21, + 26, 6, 7, 14, 17, 22, 18, 18, 26, 14, 17, 6, 14, 6, 12, 24, + 24, 6, 26, 15, 6, 21, 11, 21, 24, 9, 23, 26, 10, 21, 6, 10, + 4, 4, 3, 3, 7, 25, 21, 22, 17, 16, 16, 22, 16, 16, 25, 17, + 25, 2, 25, 24, 23, 2, 2, 15, 12, 15, 14, 2, 21, 14, 7, 15, + 12, 17, 21, 1, 26, 10, 10, 1, 23, 15, 0, 1, 2, 3, 4, 5, + 6, 7, 8, 9, 0, 10, 11, 12, 13, 0, 14, 0, 0, 0, 0, 0, + 15, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 17, 18, 19, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 20, 0, 21, 22, 23, 0, 0, 0, 24, + 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 34, + 0, 35, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 36, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 37, 38, 0, 0, 0, 0, 0, 0, 39, 40, 0, 0, 41, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 0, 0, 0, 0, + 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 6, 7, 8, 0, + 9, 0, 10, 11, 0, 0, 12, 13, 14, 15, 16, 0, 0, 0, 0, 17, + 18, 19, 20, 0, 0, 0, 21, 22, 0, 23, 24, 0, 0, 23, 25, 26, + 0, 23, 25, 0, 0, 23, 25, 0, 0, 23, 25, 0, 0, 0, 25, 0, + 0, 0, 27, 0, 0, 23, 25, 0, 0, 28, 25, 0, 0, 0, 29, 0, + 0, 30, 31, 0, 0, 32, 33, 0, 34, 35, 0, 36, 37, 0, 38, 0, + 0, 39, 0, 0, 40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 41, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 42, 42, 0, 0, 0, 0, 43, 0, + 0, 0, 0, 0, 0, 44, 0, 0, 0, 45, 0, 0, 0, 0, 0, 0, + 46, 0, 0, 47, 0, 48, 49, 0, 0, 50, 51, 52, 0, 53, 0, 54, + 0, 55, 0, 0, 0, 0, 56, 57, 0, 0, 0, 0, 0, 0, 58, 59, + 0, 0, 0, 0, 0, 0, 60, 61, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 62, 0, 0, 0, 63, 0, 0, 0, 64, + 0, 65, 0, 0, 66, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 67, 68, 0, 0, 69, 0, 0, 0, 0, 0, 0, 0, 0, + 70, 71, 0, 0, 0, 0, 51, 72, 0, 73, 74, 0, 0, 75, 76, 0, + 0, 0, 0, 0, 0, 77, 78, 79, 0, 0, 0, 0, 0, 0, 0, 25, + 0, 0, 0, 0, 0, 0, 0, 0, 80, 0, 0, 0, 0, 0, 0, 0, + 0, 81, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 82, + 0, 0, 0, 0, 0, 0, 0, 49, 0, 0, 0, 83, 0, 0, 0, 0, + 84, 85, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 87, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 88, 0, 0, 0, 0, 89, 0, 0, 0, 0, 0, + 0, 0, 70, 63, 0, 90, 0, 0, 91, 92, 0, 75, 0, 0, 93, 0, + 0, 94, 0, 0, 0, 0, 0, 95, 0, 96, 25, 97, 0, 0, 0, 0, + 0, 0, 98, 0, 0, 0, 99, 0, 0, 0, 0, 0, 0, 63,100, 0, + 0, 63, 0, 0, 0,101, 0, 0, 0,102, 0, 0, 0, 0, 0, 0, + 0, 90, 0, 0, 0, 0, 0, 0, 0,103,104, 0, 0, 0, 0, 76, + 0, 42,105, 0,106, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 63, 0, 0, 0, 0, 0, 0, 0, 0,107, 0,108, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0,109, 0,110, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,111, + 0, 0, 0, 0,112, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,113,114,115, 0, 0, + 0, 0,116, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 117,118, 0, 0, 0, 0, 0, 0, 0,110, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0,119, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0,120, 0, 0, 0,121, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 3, 4, + 5, 6, 7, 4, 4, 8, 9, 10, 1, 11, 12, 13, 14, 15, 16, 17, + 18, 1, 1, 1, 0, 0, 0, 0, 19, 1, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 20, 21, 22, 1, 23, 4, 21, 24, 25, 26, 27, 28, + 29, 30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 31, 0, + 0, 0, 32, 33, 34, 35, 1, 36, 0, 0, 0, 0, 37, 0, 0, 0, + 0, 0, 0, 0, 0, 38, 1, 39, 14, 39, 40, 41, 0, 0, 0, 0, + 0, 0, 0, 0, 42, 0, 0, 0, 0, 0, 0, 0, 43, 36, 44, 45, + 21, 45, 46, 0, 0, 0, 0, 0, 0, 0, 19, 1, 21, 0, 0, 47, + 0, 0, 0, 0, 0, 38, 48, 1, 1, 49, 49, 50, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 51, 0, 0, 0, 0, 0, 52, 1, 1, 1, + 53, 21, 43, 54, 55, 21, 35, 1, 0, 0, 0, 0, 0, 0, 0, 56, + 0, 0, 0, 57, 58, 59, 0, 0, 0, 0, 0, 57, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 60, 0, 0, 0, 57, 0, 61, 0, 0, + 0, 0, 0, 0, 0, 0, 62, 63, 0, 0, 64, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 65, 0, 0, 0, 66, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 67, 0, 0, 0, 68, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 69, 0, 0, 0, 0, 0, 0, 70, 71, 0, + 0, 0, 0, 0, 72, 73, 74, 75, 76, 77, 0, 0, 0, 0, 0, 0, + 0, 78, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 79, 80, 0, + 0, 0, 0, 47, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 49, + 0, 0, 0, 0, 0, 63, 0, 0, 0, 0, 0, 0, 64, 0, 0, 81, + 0, 0, 82, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 83, 0, + 0, 0, 0, 0, 0, 19, 84, 0, 63, 0, 0, 0, 0, 49, 1, 85, + 0, 0, 0, 0, 1, 54, 15, 86, 84, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 56, 0, 0, 0, 63, 0, 0, 0, 0, 0, 0, + 0, 0, 19, 10, 1, 0, 0, 0, 0, 0, 87, 0, 0, 0, 0, 0, + 0, 88, 0, 0, 87, 0, 0, 0, 0, 0, 0, 0, 0, 79, 0, 0, + 0, 0, 0, 0, 89, 9, 12, 4, 90, 8, 91, 47, 0, 59, 50, 0, + 21, 1, 21, 92, 93, 1, 1, 1, 1, 1, 1, 1, 1, 94, 95, 96, + 0, 0, 0, 0, 97, 1, 98, 59, 81, 99,100, 4, 59, 0, 0, 0, + 0, 0, 0, 19, 50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 62, + 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,101,102, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0,103, 0, 0, 0, 0, 19, 0, 1, 1, 50, + 0, 0, 0, 0, 0, 0, 0, 38, 0, 0, 0, 0, 50, 0, 0, 0, + 0, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 63, 0, 0, 0, 0, + 1, 1, 1, 1, 50, 0, 0, 0, 0, 0, 52, 69, 0, 0, 0, 0, + 0, 0, 0, 0, 62, 0, 0, 0, 0, 0, 0, 0, 79, 0, 0, 0, + 63, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,104,105, 59, 38, + 81, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 0, 0, + 0, 0, 0, 0, 0, 0, 0,106, 1, 14, 4, 12, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 47, 0, 0, 0, 0, 0, 38, 89, 0, + 0, 0, 0,107, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,108, 62, + 0,109, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, + 0, 0, 19, 59, 0, 0, 0, 0, 0,110, 14, 54, 84, 0, 0, 0, + 0, 0, 0, 0, 0, 0,111, 0, 89, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 62, 63, 0, 0, 63, 0, 88, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0,111, 0, 0, 0, 0,112, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 79, 56, 0, 38, 1, 59, 1, 59, 0, 0, + 64, 88, 0, 0, 0, 0, 0, 60,113, 0, 0, 0, 0, 0, 0, 0, + 56, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,113, 0, 0, + 0, 0, 62, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 80, + 79, 0, 0, 0, 0, 0, 0, 0, 0, 62, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 57, 0, 88,114, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 62, 0, 0, 0, 0, 0, 0, 8, 91, 0, 0, + 0, 0, 0, 0, 1, 89, 0, 0, 0, 0, 0, 0,115, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0,116, 0,117,118,119,120, 0, 52, 4, + 121, 49, 23, 0, 0, 0, 0, 0, 0, 0, 38, 50, 0, 0, 0, 0, + 38, 59, 0, 0, 0, 0, 0, 0, 1, 89, 1, 1, 1, 1, 39, 1, + 48,104, 89, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, + 0, 0, 0, 0, 4,121, 0, 0, 0, 1,122, 0, 0, 0, 0, 0, + 0, 0, 0, 0,230,230,230,230,230,232,220,220,220,220,232,216, + 220,220,220,220,220,202,202,220,220,220,220,202,202,220,220,220, + 1, 1, 1, 1, 1,220,220,220,220,230,230,230,230,240,230,220, + 220,220,230,230,230,220,220, 0,230,230,230,220,220,220,220,230, + 232,220,220,230,233,234,234,233,234,234,233,230, 0, 0, 0,230, + 0,220,230,230,230,230,220,230,230,230,222,220,230,230,220,220, + 230,222,228,230, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 19, 20, + 21, 22, 0, 23, 0, 24, 25, 0,230,220, 0, 18, 30, 31, 32, 0, + 0, 0, 0, 27, 28, 29, 30, 31, 32, 33, 34,230,230,220,220,230, + 220,230,230,220, 35, 0, 0, 0, 0, 0,230,230,230, 0, 0,230, + 230, 0,220,230,230,220, 0, 0, 0, 36, 0, 0,230,220,230,230, + 220,220,230,220,220,230,220,230,220,230,230, 0, 0,220, 0, 0, + 230,230, 0,230, 0,230,230,230,230,230, 0, 0, 0,220,220,220, + 0, 0, 0,220,230,230, 0,220,230,220,220,220, 27, 28, 29,230, + 7, 0, 0, 0, 0, 9, 0, 0, 0,230,220,230,230, 0, 0, 0, + 0, 0,230, 0, 0, 84, 91, 0, 0, 0, 0, 9, 9, 0, 0, 0, + 0, 0, 9, 0,103,103, 9, 0,107,107,107,107,118,118, 9, 0, + 122,122,122,122,220,220, 0, 0, 0,220, 0,220, 0,216, 0, 0, + 0,129,130, 0,132, 0, 0, 0, 0, 0,130,130,130,130, 0, 0, + 130, 0,230,230, 9, 0,230,230, 0, 0,220, 0, 0, 0, 0, 7, + 0, 9, 9, 0, 0,230, 0, 0, 0,228, 0, 0, 0,222,230,220, + 220, 0, 0, 0,230, 0, 0,220,230,220, 0,220, 0, 0, 9, 9, + 0, 0, 7, 0,230,230,230, 0,230, 0, 1, 1, 1, 0, 0, 0, + 230,234,214,220,202,230,230,230,230,230,232,228,228,220, 0,230, + 233,220,230,220,230,230, 1, 1, 1, 1, 1,230, 0, 1, 1,230, + 220,230, 1, 1, 0, 0,218,228,232,222,224,224, 0, 8, 8, 0, + 230, 0,230,230,220, 0, 0,230, 0, 0, 26, 0, 0,220, 0,230, + 230, 1,220, 0, 0,230,220, 0, 0, 0,220,220, 0, 9, 7, 0, + 0, 7, 9, 0, 0, 0, 9, 7, 9, 9, 0, 0, 6, 6, 0, 0, + 0, 0, 1, 0, 0,216,216, 1, 1, 1, 0, 0, 0,226,216,216, + 216,216,216, 0,220,220,220, 0,230,230, 7, 0, 16, 17, 17, 17, + 17, 17, 17, 33, 17, 17, 17, 19, 17, 17, 17, 17, 20,101, 17,113, + 129,169, 17, 27, 28, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,237, 0, 1, 2, 2, + 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 6, 7, 8, + 9, 0, 0, 0, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 20, 0, 0, 21, 22, 0, 0, 0, 0, + 23, 24, 25, 26, 0, 27, 0, 28, 29, 30, 31, 32, 0, 0, 0, 0, + 0, 0, 0, 33, 34, 35, 0, 0, 0, 0, 0, 0, 36, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 37, 38, 0, 0, 0, 0, 1, 2, 39, 40, + 0, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, + 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 3, 4, 0, 0, 5, 0, + 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 7, 1, 0, 0, 0, 0, + 0, 0, 8, 9, 0, 0, 0, 0, 0, 0, 10, 0, 0, 10, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, 10, + 0, 0, 0, 0, 0, 0, 11, 12, 0, 13, 0, 14, 15, 16, 0, 0, + 0, 0, 0, 1, 17, 18, 0, 19, 7, 1, 0, 0, 0, 20, 20, 7, + 20, 20, 20, 20, 20, 20, 20, 8, 21, 0, 22, 0, 7, 23, 24, 0, + 20, 20, 25, 0, 0, 0, 26, 27, 1, 7, 20, 20, 20, 20, 20, 1, + 28, 29, 30, 31, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 10, 0, + 0, 0, 0, 0, 0, 0, 20, 20, 20, 1, 0, 0, 8, 21, 32, 4, + 0, 10, 0, 33, 7, 20, 20, 20, 0, 0, 0, 0, 8, 34, 34, 35, + 36, 34, 37, 0, 38, 1, 20, 20, 0, 0, 39, 0, 1, 1, 0, 8, + 21, 1, 20, 0, 0, 0, 1, 0, 0, 40, 1, 1, 0, 0, 8, 21, + 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 26, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 21, 7, 20, 41, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 21, 0, 42, 43, 44, 0, 45, 0, 8, 21, 0, 0, 0, 0, 0, + 0, 0, 0, 46, 7, 1, 10, 1, 0, 0, 0, 1, 20, 20, 1, 0, + 0, 0, 0, 0, 0, 0, 20, 20, 1, 20, 20, 0, 0, 0, 0, 0, + 0, 0, 26, 21, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 3, 47, 48, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, + 4, 5, 6, 7, 7, 8, 7, 7, 7, 7, 7, 7, 7, 7, 7, 9, + 10, 11, 12, 12, 12, 12, 13, 14, 14, 14, 14, 15, 16, 17, 18, 19, + 20, 14, 21, 14, 22, 14, 14, 14, 14, 23, 24, 24, 25, 26, 14, 14, + 14, 14, 27, 28, 14, 14, 29, 30, 31, 32, 33, 34, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 35, 7, 36, 37, 7, 38, 7, 7, 7, 39, 14, 40, 7, 7, 41, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 42, 0, 0, 1, + 2, 2, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 32, 33, 34, 35, 36, 37, 37, 37, 37, 37, 38, 39, 40, 41, 42, + 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 2, 2, 53, 54, 55, 56, + 57, 58, 59, 59, 59, 59, 60, 59, 59, 59, 59, 59, 59, 59, 61, 61, + 59, 59, 59, 59, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, + 74, 75, 76, 77, 78, 59, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, + 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, + 70, 70, 70, 70, 70, 70, 70, 70, 70, 79, 70, 70, 70, 70, 70, 70, + 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 80, 81, 81, + 81, 81, 81, 81, 81, 81, 81, 82, 83, 83, 84, 85, 86, 87, 88, 89, + 90, 91, 92, 93, 94, 95, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 96, 97, 97, + 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, + 70, 70, 98, 99,100,101,102,102,103,104,105,106,107,108,109,110, + 111,112, 97,113,114,115,116,117,118, 97,119,119,120, 97,121,122, + 123,124,125,126,127,128,129,130,131, 97,132,133,134,135,136,137, + 138,139,140,141,142, 97,143,144, 97,145,146,147,148, 97,149,150, + 151,152,153,154, 97, 97,155,156,157,158, 97,159, 97,160,161,161, + 161,161,161,161,161,162,163,161,164, 97, 97, 97, 97, 97,165,165, + 165,165,165,165,165,165,166, 97, 97, 97, 97, 97, 97, 97, 97, 97, + 97, 97, 97, 97, 97, 97,167,167,167,167,168, 97, 97, 97,169,169, + 169,169,170,171,172,173, 97, 97, 97, 97,174,175,176,177,178,178, + 178,178,178,178,178,178,178,178,178,178,178,178,178,178,178,178, + 178,178,178,178,178,178,178,178,178,178,178,178,178,179,178,178, + 178,178,178,178,180,180,180,181,182, 97, 97, 97, 97, 97,183,184, + 185,186,186,187, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, + 97, 97, 97, 97, 97, 97,188,189, 97, 97, 97, 97, 97, 97, 59,190, + 191,192,193,194,195, 97,196,197,198, 59, 59,199, 59,200,201,201, + 201,201,201,202, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97,203, 97, + 204, 97, 97,205, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97,206,207, + 208, 97, 97, 97, 97, 97,209,210,211, 97,212,213, 97, 97,214,215, + 59,216,217, 97, 59, 59, 59, 59, 59, 59, 59,218,219,220,221,222, + 223,224,225,226, 59,227, 97, 97, 97, 97, 97, 97, 97, 97, 70, 70, + 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70,228, 70, 70, 70, 70, + 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70,229, 70,230, 70, + 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, + 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70,231, 70, 70, 70, 70, + 70, 70, 70, 70, 70,232, 97, 97, 97, 97, 97, 97, 97, 97, 70, 70, + 70, 70,233, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, 70, 70, + 70, 70, 70, 70,234, 97, 97, 97, 97, 97, 97, 97, 97, 97,235, 97, + 236,237, 0, 1, 2, 2, 0, 1, 2, 2, 2, 3, 4, 5, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 0, 0, 0, 0, 0, 0, 0, + 19, 0, 0, 0, 0, 0, 19, 19, 19, 19, 19, 19, 19, 0, 19, 0, + 0, 0, 0, 0, 0, 0, 19, 19, 19, 19, 19, 0, 0, 0, 0, 0, + 26, 26, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 9, 9, + 9, 9, 0, 9, 9, 9, 2, 2, 9, 9, 9, 9, 0, 9, 2, 2, + 2, 2, 9, 0, 9, 0, 9, 9, 9, 2, 9, 2, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 2, 9, 9, 9, 9, 9, 9, 9, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 1, 1, 6, 2, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 2, 4, 4, 4, 2, 2, 4, 4, 4, 2, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 2, 2, + 2, 2, 2, 2, 2, 2, 14, 14, 14, 2, 2, 2, 2, 14, 14, 14, + 14, 14, 14, 2, 2, 2, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, + 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 0, 3, 2, 3, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 1, 3, + 3, 3, 3, 3, 3, 3, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, + 37, 37, 37, 37, 2, 37, 37, 37, 37, 2, 2, 37, 37, 37, 38, 38, + 38, 38, 38, 38, 38, 38, 38, 38, 2, 2, 2, 2, 2, 2, 64, 64, + 64, 64, 64, 64, 64, 64, 64, 64, 64, 2, 2, 64, 64, 64, 90, 90, + 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 90, 2, 2, 90, 90, + 90, 90, 90, 90, 90, 2, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, + 95, 95, 2, 2, 95, 2, 37, 37, 37, 2, 2, 2, 2, 2, 3, 3, + 3, 3, 3, 2, 3, 3, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, + 0, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 7, 7, 7, 1, + 1, 1, 1, 7, 7, 7, 7, 7, 7, 7, 0, 0, 7, 7, 5, 5, + 5, 5, 2, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 5, 5, 2, + 2, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, + 5, 5, 5, 5, 5, 5, 5, 2, 5, 2, 2, 2, 5, 5, 5, 5, + 2, 2, 5, 5, 5, 5, 5, 2, 2, 5, 5, 5, 5, 2, 2, 2, + 2, 2, 2, 2, 2, 5, 2, 2, 2, 2, 5, 5, 2, 5, 5, 5, + 5, 5, 2, 2, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 11, + 11, 11, 2, 11, 11, 11, 11, 11, 11, 2, 2, 2, 2, 11, 11, 2, + 2, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 2, + 11, 11, 11, 11, 11, 11, 11, 2, 11, 11, 2, 11, 11, 2, 11, 11, + 2, 2, 11, 2, 11, 11, 11, 2, 2, 11, 11, 11, 2, 2, 2, 11, + 2, 2, 2, 2, 2, 2, 2, 11, 11, 11, 11, 2, 11, 2, 2, 2, + 2, 2, 2, 2, 11, 11, 11, 11, 11, 11, 11, 11, 11, 2, 2, 10, + 10, 10, 2, 10, 10, 10, 10, 10, 10, 10, 10, 10, 2, 10, 10, 10, + 2, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 2, + 10, 10, 10, 10, 10, 10, 10, 2, 10, 10, 2, 10, 10, 10, 10, 10, + 2, 2, 10, 10, 10, 10, 10, 10, 2, 10, 10, 10, 2, 2, 10, 2, + 2, 2, 2, 2, 2, 2, 10, 10, 10, 10, 2, 2, 10, 10, 10, 10, + 2, 2, 2, 2, 2, 2, 2, 10, 10, 10, 10, 10, 10, 10, 2, 21, + 21, 21, 2, 21, 21, 21, 21, 21, 21, 21, 21, 2, 2, 21, 21, 2, + 2, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 2, + 21, 21, 21, 21, 21, 21, 21, 2, 21, 21, 2, 21, 21, 21, 21, 21, + 2, 2, 21, 21, 21, 21, 21, 2, 2, 21, 21, 21, 2, 2, 2, 2, + 2, 2, 2, 21, 21, 21, 2, 2, 2, 2, 21, 21, 2, 21, 21, 21, + 21, 21, 2, 2, 21, 21, 2, 2, 22, 22, 2, 22, 22, 22, 22, 22, + 22, 2, 2, 2, 22, 22, 22, 2, 22, 22, 22, 22, 2, 2, 2, 22, + 22, 2, 22, 2, 22, 22, 2, 2, 2, 22, 22, 2, 2, 2, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 2, 2, 2, 2, 22, 22, 22, 2, + 2, 2, 2, 2, 2, 22, 2, 2, 2, 2, 2, 2, 22, 22, 22, 22, + 22, 2, 2, 2, 2, 2, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 2, 23, 23, 23, 2, 23, 23, 23, 23, 23, 23, 23, 23, + 2, 2, 2, 23, 23, 23, 23, 2, 23, 23, 23, 23, 2, 2, 2, 2, + 2, 2, 2, 23, 23, 2, 23, 23, 23, 2, 2, 2, 2, 2, 23, 23, + 23, 23, 2, 2, 23, 23, 2, 2, 2, 2, 2, 2, 2, 23, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 2, 16, 16, 16, 2, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 2, 16, 16, 16, 16, 16, + 2, 2, 16, 16, 16, 16, 16, 2, 16, 16, 16, 16, 2, 2, 2, 2, + 2, 2, 2, 16, 16, 2, 2, 2, 2, 2, 2, 2, 16, 2, 16, 16, + 16, 16, 2, 2, 16, 16, 2, 16, 16, 2, 2, 2, 2, 2, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 2, 20, 20, 20, 2, + 20, 20, 20, 20, 20, 20, 2, 2, 2, 2, 20, 20, 20, 20, 20, 20, + 20, 20, 2, 2, 20, 20, 2, 36, 36, 36, 2, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 2, 2, 2, + 36, 36, 36, 36, 36, 36, 36, 36, 2, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 2, 36, 2, 2, 2, 2, 36, 2, 2, 2, 2, 36, 36, 36, + 36, 36, 36, 2, 36, 2, 2, 2, 2, 2, 2, 2, 36, 36, 2, 2, + 36, 36, 36, 2, 2, 2, 2, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 2, 2, 2, 2, 0, 24, 24, + 24, 24, 2, 2, 2, 2, 2, 18, 18, 2, 18, 2, 18, 18, 18, 18, + 18, 2, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 2, 18, 2, 18, 18, 18, 18, 18, 18, 18, 2, 2, 18, 18, + 18, 18, 18, 2, 18, 2, 18, 18, 2, 2, 18, 18, 18, 18, 25, 25, + 25, 25, 25, 25, 25, 25, 2, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 2, 2, 2, 25, 25, 25, 25, 25, 2, 25, 25, 25, 25, + 25, 25, 25, 0, 0, 0, 0, 25, 25, 2, 2, 2, 2, 2, 33, 33, + 33, 33, 33, 33, 33, 33, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 2, 8, 2, 2, 2, 2, 2, 8, 2, 2, 8, 8, + 8, 0, 8, 8, 8, 8, 12, 12, 12, 12, 12, 12, 12, 12, 30, 30, + 30, 30, 30, 30, 30, 30, 30, 2, 30, 30, 30, 30, 2, 2, 30, 30, + 30, 30, 30, 30, 30, 2, 30, 30, 30, 2, 2, 30, 30, 30, 30, 30, + 30, 30, 30, 2, 2, 2, 30, 30, 2, 2, 2, 2, 2, 2, 29, 29, + 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 2, 2, 28, 28, + 28, 28, 28, 28, 28, 28, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 2, 2, 2, 35, 35, 35, 35, 35, 35, 35, 35, 35, 35, + 35, 0, 0, 0, 35, 35, 35, 2, 2, 2, 2, 2, 2, 2, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 2, 45, 45, 45, 45, + 45, 45, 45, 2, 2, 2, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, + 44, 44, 44, 0, 0, 2, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, + 43, 43, 2, 2, 2, 2, 46, 46, 46, 46, 46, 46, 46, 46, 46, 46, + 46, 46, 46, 2, 46, 46, 46, 2, 46, 46, 2, 2, 2, 2, 31, 31, + 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 2, 2, 31, 31, + 2, 2, 2, 2, 2, 2, 32, 32, 0, 0, 32, 0, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 2, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, + 2, 2, 2, 2, 2, 2, 32, 2, 2, 2, 2, 2, 2, 2, 32, 32, + 32, 2, 2, 2, 2, 2, 28, 28, 28, 28, 28, 28, 2, 2, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 2, 48, 48, + 48, 48, 2, 2, 2, 2, 48, 2, 2, 2, 48, 48, 48, 48, 52, 52, + 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 2, 2, 52, 52, + 52, 52, 52, 2, 2, 2, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, + 58, 58, 2, 2, 2, 2, 58, 58, 2, 2, 2, 2, 2, 2, 58, 58, + 58, 2, 2, 2, 58, 58, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, + 54, 54, 2, 2, 54, 54, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, + 91, 91, 91, 91, 91, 2, 91, 91, 91, 91, 91, 2, 2, 91, 91, 91, + 2, 2, 2, 2, 2, 2, 91, 91, 91, 91, 91, 91, 2, 2, 1, 2, + 2, 2, 2, 2, 2, 2, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 2, 2, 2, 2, 62, 62, 62, 62, 62, 2, 2, 2, 76, 76, + 76, 76, 76, 76, 76, 76, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, + 93, 93, 2, 2, 2, 2, 2, 2, 2, 2, 93, 93, 93, 93, 70, 70, + 70, 70, 70, 70, 70, 70, 2, 2, 2, 70, 70, 70, 70, 70, 70, 70, + 2, 2, 2, 70, 70, 70, 73, 73, 73, 73, 73, 73, 73, 73, 6, 2, + 2, 2, 2, 2, 2, 2, 8, 8, 8, 2, 2, 8, 8, 8, 1, 1, + 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, + 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, + 0, 2, 2, 2, 2, 2, 19, 19, 19, 19, 19, 19, 9, 9, 9, 9, + 9, 6, 19, 19, 19, 19, 19, 19, 19, 19, 19, 9, 9, 9, 9, 9, + 19, 19, 19, 19, 9, 9, 9, 9, 9, 19, 19, 19, 19, 19, 6, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 9, 1, 1, + 2, 1, 1, 1, 1, 1, 9, 9, 9, 9, 9, 9, 2, 2, 2, 9, + 2, 9, 2, 9, 2, 9, 9, 9, 9, 9, 9, 2, 9, 9, 9, 9, + 9, 9, 2, 2, 9, 9, 9, 9, 9, 9, 2, 9, 9, 9, 2, 2, + 9, 9, 9, 2, 9, 9, 9, 9, 9, 9, 9, 9, 9, 2, 0, 0, + 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 19, + 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 19, 0, 0, + 0, 0, 0, 0, 0, 2, 19, 19, 19, 19, 19, 2, 2, 2, 0, 0, + 0, 0, 0, 0, 9, 0, 0, 0, 19, 19, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 19, 0, 19, 0, 0, 0, 2, 2, 2, 2, 0, 0, + 0, 2, 2, 2, 2, 2, 27, 27, 27, 27, 27, 27, 27, 27, 0, 0, + 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 2, 55, 55, + 55, 55, 2, 2, 2, 2, 2, 55, 55, 55, 55, 55, 55, 55, 61, 61, + 61, 61, 61, 61, 61, 61, 2, 2, 2, 2, 2, 2, 2, 61, 61, 2, + 2, 2, 2, 2, 2, 2, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 2, 13, 13, 13, 13, 13, 13, 13, 13, 13, 2, 2, 2, 2, 13, 13, + 13, 13, 13, 13, 2, 2, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, + 0, 0, 0, 13, 0, 13, 0, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 1, 1, 1, 1, 12, 12, 13, 13, 13, 13, 0, 0, 0, 0, 2, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 2, 2, 1, 1, 0, 0, 15, 15, 15, 0, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 0, 0, 17, 17, 17, 2, 2, 2, 2, 2, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 2, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 2, 12, 12, 12, 12, 12, 12, 12, 0, 17, 17, + 17, 17, 17, 17, 17, 0, 13, 13, 13, 13, 13, 2, 2, 2, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 2, 2, 2, 39, 39, + 39, 39, 39, 39, 39, 2, 86, 86, 86, 86, 86, 86, 86, 86, 77, 77, + 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 2, 2, 2, 2, 79, 79, + 79, 79, 79, 79, 79, 79, 0, 0, 19, 19, 19, 19, 19, 19, 0, 0, + 0, 19, 19, 19, 19, 19, 2, 2, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 19, 19, 19, 60, 60, + 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 2, 2, 2, 0, 0, + 2, 2, 2, 2, 2, 2, 65, 65, 65, 65, 65, 65, 65, 65, 75, 75, + 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 2, 2, 2, 2, + 2, 2, 2, 2, 75, 75, 75, 75, 2, 2, 2, 2, 2, 2, 69, 69, + 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 69, 0, 69, 74, 74, + 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 74, 12, 12, 12, 12, 12, 2, 2, 2, 84, 84, + 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 2, 0, 84, 84, + 2, 2, 2, 2, 84, 84, 33, 33, 33, 33, 33, 33, 33, 2, 68, 68, + 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 2, 68, 68, + 68, 68, 68, 68, 2, 2, 68, 68, 2, 2, 68, 68, 68, 68, 92, 92, + 92, 92, 92, 92, 92, 92, 92, 92, 92, 2, 2, 2, 2, 2, 2, 2, + 2, 92, 92, 92, 92, 92, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 2, 2, 30, 30, 30, 30, 30, 30, 2, 19, 19, + 19, 0, 19, 19, 19, 19, 19, 19, 19, 19, 19, 9, 19, 19, 19, 19, + 0, 0, 2, 2, 2, 2, 87, 87, 87, 87, 87, 87, 2, 2, 87, 87, + 2, 2, 2, 2, 2, 2, 12, 12, 12, 12, 2, 2, 2, 2, 2, 2, + 2, 12, 12, 12, 12, 12, 13, 13, 2, 2, 2, 2, 2, 2, 19, 19, + 19, 19, 19, 19, 19, 2, 2, 2, 2, 4, 4, 4, 4, 4, 2, 2, + 2, 2, 2, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 2, 14, 14, + 14, 14, 14, 2, 14, 2, 14, 14, 2, 14, 14, 2, 14, 14, 3, 3, + 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 0, 0, 2, 2, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 1, 1, + 1, 1, 1, 1, 6, 6, 0, 0, 0, 2, 0, 0, 0, 0, 3, 3, + 3, 3, 3, 2, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 17, 17, 17, 17, 17, 17, 17, 17, 0, 0, 2, 2, + 12, 12, 12, 12, 12, 12, 2, 2, 12, 12, 12, 2, 2, 2, 2, 0, + 0, 0, 0, 0, 2, 2, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, + 49, 49, 2, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 2, 49, 49, + 49, 2, 49, 49, 2, 49, 49, 49, 49, 49, 49, 49, 2, 2, 49, 49, + 49, 2, 2, 2, 2, 2, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0, + 0, 0, 2, 2, 2, 0, 0, 0, 0, 0, 0, 2, 2, 2, 9, 2, + 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 1, 2, 2, 71, 71, + 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 2, 2, 2, 67, 67, + 67, 67, 67, 67, 67, 67, 67, 2, 2, 2, 2, 2, 2, 2, 1, 0, + 0, 0, 0, 0, 0, 0, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, + 42, 42, 2, 2, 2, 2, 2, 2, 2, 2, 2, 42, 42, 42, 41, 41, + 41, 41, 41, 41, 41, 41, 41, 41, 41, 2, 2, 2, 2, 2,118,118, + 118,118,118,118,118,118,118,118,118, 2, 2, 2, 2, 2, 53, 53, + 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 2, 53, 59, 59, + 59, 59, 59, 59, 59, 59, 59, 59, 59, 59, 2, 2, 2, 2, 59, 59, + 59, 59, 59, 59, 2, 2, 40, 40, 40, 40, 40, 40, 40, 40, 51, 51, + 51, 51, 51, 51, 51, 51, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, + 50, 50, 50, 50, 2, 2, 50, 50, 2, 2, 2, 2, 2, 2,135,135, + 135,135,135,135,135,135,135,135,135,135, 2, 2, 2, 2,106,106, + 106,106,106,106,106,106,104,104,104,104,104,104,104,104,104,104, + 104,104, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,104,110,110, + 110,110,110,110,110,110,110,110,110,110,110,110,110, 2,110,110, + 110,110,110,110, 2, 2, 47, 47, 47, 47, 47, 47, 2, 2, 47, 2, + 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, + 47, 47, 47, 47, 2, 47, 47, 2, 2, 2, 47, 2, 2, 47, 81, 81, + 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, 2, 81,120,120, + 120,120,120,120,120,120,116,116,116,116,116,116,116,116,116,116, + 116,116,116,116,116, 2, 2, 2, 2, 2, 2, 2, 2,116,128,128, + 128,128,128,128,128,128,128,128,128, 2,128,128, 2, 2, 2, 2, + 2,128,128,128,128,128, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, + 66, 66, 2, 2, 2, 66, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, + 2, 2, 2, 2, 2, 72, 98, 98, 98, 98, 98, 98, 98, 98, 97, 97, + 97, 97, 97, 97, 97, 97, 2, 2, 2, 2, 97, 97, 97, 97, 2, 2, + 97, 97, 97, 97, 97, 97, 57, 57, 57, 57, 2, 57, 57, 2, 2, 2, + 2, 2, 57, 57, 57, 57, 57, 57, 57, 57, 2, 57, 57, 57, 2, 57, + 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, + 57, 57, 57, 57, 2, 2, 57, 57, 57, 2, 2, 2, 2, 57, 57, 2, + 2, 2, 2, 2, 2, 2, 88, 88, 88, 88, 88, 88, 88, 88,117,117, + 117,117,117,117,117,117,112,112,112,112,112,112,112,112,112,112, + 112,112,112,112,112, 2, 2, 2, 2,112,112,112,112,112, 78, 78, + 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 2, 2, 2, 78, + 78, 78, 78, 78, 78, 78, 83, 83, 83, 83, 83, 83, 83, 83, 83, 83, + 83, 83, 83, 83, 2, 2, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, + 82, 2, 2, 2, 2, 2,122,122,122,122,122,122,122,122,122,122, + 2, 2, 2, 2, 2, 2, 2,122,122,122,122, 2, 2, 2, 2,122, + 122,122,122,122,122,122, 89, 89, 89, 89, 89, 89, 89, 89, 89, 2, + 2, 2, 2, 2, 2, 2,130,130,130,130,130,130,130,130,130,130, + 130, 2, 2, 2, 2, 2, 2, 2,130,130,130,130,130,130,144,144, + 144,144,144,144,144,144,144,144, 2, 2, 2, 2, 2, 2, 3, 3, + 3, 3, 3, 3, 3, 2,156,156,156,156,156,156,156,156,156,156, + 2,156,156,156, 2, 2,156,156, 2, 2, 2, 2, 2, 2,147,147, + 147,147,147,147,147,147,148,148,148,148,148,148,148,148,148,148, + 2, 2, 2, 2, 2, 2,153,153,153,153,153,153,153,153,153,153, + 153,153, 2, 2, 2, 2,149,149,149,149,149,149,149,149,149,149, + 149,149,149,149,149, 2, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, + 94, 94, 94, 94, 2, 2, 2, 2, 94, 94, 94, 94, 94, 94, 2, 2, + 2, 2, 2, 2, 2, 94, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 85, 2, 2,101,101, + 101,101,101,101,101,101,101, 2, 2, 2, 2, 2, 2, 2,101,101, + 2, 2, 2, 2, 2, 2, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, + 96, 96, 96, 2, 96, 96,111,111,111,111,111,111,111,111,111,111, + 111,111,111,111,111, 2,100,100,100,100,100,100,100,100, 2, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 2, 2, 2,108,108, + 108,108,108,108,108,108,108,108, 2,108,108,108,108,108,108,108, + 108,108,108,108,108, 2,129,129,129,129,129,129,129, 2,129, 2, + 129,129,129,129, 2,129,129,129,129,129,129,129,129,129,129,129, + 129,129,129,129, 2,129,129,129, 2, 2, 2, 2, 2, 2,109,109, + 109,109,109,109,109,109,109,109,109, 2, 2, 2, 2, 2,109,109, + 2, 2, 2, 2, 2, 2,107,107,107,107, 2,107,107,107,107,107, + 107,107,107, 2, 2,107,107, 2, 2,107,107,107,107,107,107,107, + 107,107,107,107,107,107,107, 2,107,107,107,107,107,107,107, 2, + 107,107, 2,107,107,107,107,107, 2, 1,107,107,107,107,107, 2, + 2,107,107,107, 2, 2,107, 2, 2, 2, 2, 2, 2,107, 2, 2, + 2, 2, 2,107,107,107,107,107,107,107, 2, 2,107,107,107,107, + 107,107,107, 2, 2, 2,137,137,137,137,137,137,137,137,137,137, + 137,137, 2,137,137,137,137,137, 2, 2, 2, 2, 2, 2,124,124, + 124,124,124,124,124,124,124,124, 2, 2, 2, 2, 2, 2,123,123, + 123,123,123,123,123,123,123,123,123,123,123,123, 2, 2,114,114, + 114,114,114,114,114,114,114,114,114,114,114, 2, 2, 2,114,114, + 2, 2, 2, 2, 2, 2, 32, 32, 32, 32, 32, 2, 2, 2,102,102, + 102,102,102,102,102,102,102, 2, 2, 2, 2, 2, 2, 2,102,102, + 2, 2, 2, 2, 2, 2,126,126,126,126,126,126,126,126,126,126, + 126, 2, 2,126,126,126,126,126,126,126, 2, 2, 2, 2,142,142, + 142,142,142,142,142,142,142,142,142,142, 2, 2, 2, 2,125,125, + 125,125,125,125,125,125,125,125,125, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2,125,154,154,154,154,154,154,154, 2, 2,154, + 2, 2,154,154,154,154,154,154,154,154, 2,154,154, 2,154,154, + 154,154,154,154,154,154,154,154,154,154,154,154, 2,154,154, 2, + 2,154,154,154,154,154,154,154, 2, 2, 2, 2, 2, 2,150,150, + 150,150,150,150,150,150, 2, 2,150,150,150,150,150,150,150,150, + 150,150,150, 2, 2, 2,141,141,141,141,141,141,141,141,140,140, + 140,140,140,140,140,140,140,140,140, 2, 2, 2, 2, 2,121,121, + 121,121,121,121,121,121,121, 2, 2, 2, 2, 2, 2, 2,133,133, + 133,133,133,133,133,133,133, 2,133,133,133,133,133,133,133,133, + 133,133,133,133,133, 2,133,133,133,133,133,133, 2, 2,133,133, + 133,133,133, 2, 2, 2,134,134,134,134,134,134,134,134, 2, 2, + 134,134,134,134,134,134, 2,134,134,134,134,134,134,134,134,134, + 134,134,134,134,134, 2,138,138,138,138,138,138,138, 2,138,138, + 2,138,138,138,138,138,138,138,138,138,138,138,138,138, 2, 2, + 138, 2,138,138, 2,138,138,138, 2, 2, 2, 2, 2, 2,143,143, + 143,143,143,143, 2,143,143, 2,143,143,143,143,143,143,143,143, + 143,143,143,143,143,143,143,143,143,143,143,143,143, 2,143,143, + 2,143,143,143,143,143,143, 2, 2, 2, 2, 2, 2, 2,143,143, + 2, 2, 2, 2, 2, 2,145,145,145,145,145,145,145,145,145, 2, + 2, 2, 2, 2, 2, 2, 86, 2, 2, 2, 2, 2, 2, 2, 22, 22, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 22, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63, 2, 2, 2, 2, 2, 2, 63, 63, + 63, 63, 63, 63, 63, 2, 63, 63, 63, 63, 63, 2, 2, 2, 63, 63, + 63, 63, 2, 2, 2, 2, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, + 80, 80, 80, 80, 80, 2, 80, 2, 2, 2, 2, 2, 2, 2,127,127, + 127,127,127,127,127,127,127,127,127,127,127,127,127, 2, 79, 2, + 2, 2, 2, 2, 2, 2,115,115,115,115,115,115,115,115,115,115, + 115,115,115,115,115, 2,115,115, 2, 2, 2, 2,115,115,103,103, + 103,103,103,103,103,103,103,103,103,103,103,103, 2, 2,119,119, + 119,119,119,119,119,119,119,119,119,119,119,119, 2, 2,119,119, + 2,119,119,119,119,119, 2, 2, 2, 2, 2,119,119,119,146,146, + 146,146,146,146,146,146,146,146,146, 2, 2, 2, 2, 2, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, 99, 2, 2, 2, 2, 99, 2, 2, + 2, 2, 2, 2, 2, 99,136,139, 0, 0,155, 2, 2, 2,136,136, + 136,136,136,136,136,136,155,155,155,155,155,155,155,155,155,155, + 155,155,155,155, 2, 2,136, 2, 2, 2, 2, 2, 2, 2, 17, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 17, 17, 17, 17,139,139,139,139,139,139,139,139,139,139, + 139,139, 2, 2, 2, 2,105,105,105,105,105,105,105,105,105,105, + 105, 2, 2, 2, 2, 2,105,105,105,105,105, 2, 2, 2,105, 2, + 2, 2, 2, 2, 2, 2,105,105, 2, 2,105,105,105,105, 0, 0, + 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, + 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 0, 2, 2, 0, 0, 2, 2, 0, 0, 0, 0, 2, 0, 0, 0, 0, + 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, + 0, 2, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 2, 0, 0, + 0, 0, 0, 2, 0, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 2, + 0, 0, 0, 0, 0, 0,131,131,131,131,131,131,131,131,131,131, + 131,131, 2, 2, 2, 2, 2, 2, 2,131,131,131,131,131, 2,131, + 131,131,131,131,131,131, 56, 2, 2, 56, 56, 56, 56, 56, 56, 56, + 2, 56, 56, 2, 56, 56, 56, 56, 56, 2, 2, 2, 2, 2,151,151, + 151,151,151,151,151,151,151,151,151,151,151, 2, 2, 2,151,151, + 151,151,151,151, 2, 2,151,151, 2, 2, 2, 2,151,151,152,152, + 152,152,152,152,152,152,152,152, 2, 2, 2, 2, 2,152,113,113, + 113,113,113,113,113,113,113,113,113,113,113, 2, 2,113,113,113, + 113,113,113,113,113, 2,132,132,132,132,132,132,132,132,132,132, + 132,132, 2, 2, 2, 2,132,132, 2, 2, 2, 2,132,132, 3, 3, + 3, 3, 2, 3, 3, 3, 2, 3, 3, 2, 3, 2, 2, 3, 2, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 2, 3, + 2, 3, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 3, 2, 3, + 2, 3, 2, 3, 3, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, + 3, 3, 3, 2, 3, 2, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 2, 2, 2, 2, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, + 2, 2, 2, 2, 0, 0, 15, 0, 0, 2, 2, 2, 2, 2, 13, 2, + 2, 2, 2, 2, 2, 2, 13, 13, 13, 2, 2, 2, 2, 2, 2, 0, + 2, 2, 2, 2, 2, 2, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 9, 9, 9, 10, 9, 11, 12, 13, 9, 9, 9, 14, 9, 9, 15, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 16, 17, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 18, 19, + 20, 9, 21, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 22, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 23, 24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 0, 0, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 23, 0, 0, 24, 25, 26, 27, 28, + 29, 30, 0, 0, 31, 32, 0, 33, 0, 34, 0, 35, 0, 0, 0, 0, + 36, 37, 38, 39, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 40, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 41, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 43, 44, 0, 45, 0, 0, 0, 0, 0, 0, + 46, 47, 0, 0, 0, 0, 0, 48, 0, 49, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 50, 51, 0, 0, 0, 52, 0, 0, + 53, 0, 0, 0, 0, 0, 0, 0, 54, 0, 0, 0, 0, 0, 0, 0, + 55, 0, 0, 0, 0, 0, 0, 0, 56, 0, 0, 0, 0, 0, 0, 0, + 0, 57, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 58, 59, 60, 61, 62, 63, 64, 65, + 0, 0, 0, 0, 0, 0, 66, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 67, 68, 0, 69, 70, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, + 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, + 99,100,101,102,103, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0,104, 0, 0, 0, 0, 0, 0,105,106, 0, + 107, 0, 0, 0,108, 0,109, 0,110, 0,111,112,113, 0,114, 0, + 0, 0,115, 0, 0, 0,116, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0,117, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0,118,119,120,121, 0,122,123,124, + 125,126, 0,127, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0,128,129,130,131,132,133,134,135,136,137,138,139, + 140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155, + 156,157, 0, 0, 0,158,159,160,161, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,162,163, 0, + 0, 0, 0, 0, 0, 0,164, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0,165, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,166, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,167, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0,168, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0,169,170, 0, 0, 0, 0,171, + 172, 0, 0, 0,173,174,175,176,177,178,179,180,181,182,183,184, + 185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200, + 201,202,203,204,205,206, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 2, 3, 4, +}; +static const uint16_t +_hb_ucd_u16[9080] = +{ + 0, 0, 1, 2, 3, 4, 5, 6, 0, 0, 7, 8, 9, 10, 11, 12, + 13, 13, 13, 14, 15, 13, 13, 16, 17, 18, 19, 20, 21, 22, 13, 23, + 13, 13, 13, 24, 25, 11, 11, 11, 11, 26, 11, 27, 28, 29, 30, 31, + 32, 32, 32, 32, 32, 32, 32, 33, 34, 35, 36, 11, 37, 38, 13, 39, + 9, 9, 9, 11, 11, 11, 13, 13, 40, 13, 13, 13, 41, 13, 13, 13, + 13, 13, 13, 42, 9, 43, 11, 11, 44, 45, 32, 46, 47, 48, 49, 50, + 51, 52, 48, 48, 53, 32, 54, 55, 48, 48, 48, 48, 48, 56, 57, 58, + 59, 60, 48, 32, 61, 48, 48, 48, 48, 48, 62, 63, 64, 48, 65, 66, + 48, 67, 68, 69, 48, 70, 71, 72, 72, 72, 48, 73, 74, 75, 76, 32, + 77, 48, 48, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, + 91, 84, 85, 92, 93, 94, 95, 96, 97, 98, 85, 99, 100, 101, 89, 102, + 103, 84, 85, 104, 105, 106, 89, 107, 108, 109, 110, 111, 112, 113, 95, 114, + 115, 116, 85, 117, 118, 119, 89, 120, 121, 116, 85, 122, 123, 124, 89, 125, + 126, 116, 48, 127, 128, 129, 89, 130, 131, 132, 48, 133, 134, 135, 95, 136, + 137, 48, 48, 138, 139, 140, 72, 72, 141, 48, 142, 143, 144, 145, 72, 72, + 146, 147, 148, 149, 150, 48, 151, 152, 153, 154, 32, 155, 156, 157, 72, 72, + 48, 48, 158, 159, 160, 161, 162, 163, 164, 165, 9, 9, 166, 11, 11, 167, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 168, 169, 48, 48, + 168, 48, 48, 170, 171, 172, 48, 48, 48, 171, 48, 48, 48, 173, 174, 175, + 48, 176, 9, 9, 9, 9, 9, 177, 178, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 179, 48, 180, 181, 48, 48, 48, 48, 182, 183, + 184, 185, 48, 186, 48, 187, 184, 188, 48, 48, 48, 189, 190, 191, 192, 193, + 194, 192, 48, 48, 195, 48, 48, 196, 197, 48, 198, 48, 48, 48, 48, 199, + 48, 200, 201, 202, 203, 48, 204, 205, 48, 48, 206, 48, 207, 208, 209, 209, + 48, 210, 48, 48, 48, 211, 212, 213, 192, 192, 214, 215, 216, 72, 72, 72, + 217, 48, 48, 218, 219, 160, 220, 221, 222, 48, 223, 64, 48, 48, 224, 225, + 48, 48, 226, 227, 228, 64, 48, 229, 230, 9, 9, 231, 232, 233, 234, 235, + 11, 11, 236, 27, 27, 27, 237, 238, 11, 239, 27, 27, 32, 32, 32, 240, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 241, 13, 13, 13, 13, 13, 13, + 242, 243, 242, 242, 243, 244, 242, 245, 246, 246, 246, 247, 248, 249, 250, 251, + 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 262, 72, 263, 264, 216, + 265, 266, 267, 268, 269, 270, 271, 271, 272, 273, 274, 209, 275, 276, 209, 277, + 278, 278, 278, 278, 278, 278, 278, 278, 279, 209, 280, 209, 209, 209, 209, 281, + 209, 282, 278, 283, 209, 284, 285, 209, 209, 209, 286, 72, 287, 72, 270, 270, + 270, 288, 209, 209, 209, 209, 289, 270, 209, 209, 209, 209, 209, 209, 209, 209, + 209, 209, 209, 290, 291, 209, 209, 292, 209, 209, 209, 209, 209, 209, 293, 209, + 209, 209, 209, 209, 209, 209, 294, 295, 270, 296, 209, 209, 297, 278, 298, 278, + 299, 300, 278, 278, 278, 301, 278, 302, 209, 209, 209, 278, 303, 209, 209, 304, + 209, 305, 209, 209, 209, 209, 209, 209, 9, 9, 306, 11, 11, 307, 308, 309, + 13, 13, 13, 13, 13, 13, 310, 311, 11, 11, 312, 48, 48, 48, 313, 314, + 48, 315, 316, 316, 316, 316, 32, 32, 317, 318, 319, 320, 321, 322, 72, 72, + 209, 323, 209, 209, 209, 209, 209, 324, 209, 209, 209, 209, 209, 325, 72, 326, + 327, 328, 329, 330, 137, 48, 48, 48, 48, 331, 178, 48, 48, 48, 48, 332, + 333, 48, 48, 137, 48, 48, 48, 48, 200, 334, 48, 48, 209, 209, 324, 48, + 209, 335, 336, 209, 337, 338, 209, 209, 336, 209, 209, 338, 209, 209, 209, 209, + 48, 48, 48, 48, 209, 209, 209, 209, 48, 48, 48, 48, 48, 48, 48, 151, + 48, 339, 48, 48, 48, 48, 48, 48, 151, 209, 209, 209, 286, 48, 48, 229, + 340, 48, 341, 72, 13, 13, 342, 343, 13, 344, 48, 48, 48, 48, 345, 346, + 31, 347, 348, 349, 13, 13, 13, 350, 351, 352, 353, 354, 355, 72, 72, 356, + 357, 48, 358, 359, 48, 48, 48, 360, 361, 48, 48, 362, 363, 192, 32, 364, + 64, 48, 365, 48, 366, 367, 48, 151, 77, 48, 48, 368, 369, 370, 371, 372, + 48, 48, 373, 374, 375, 376, 48, 377, 48, 48, 48, 378, 379, 380, 381, 382, + 383, 384, 316, 11, 11, 385, 386, 11, 11, 11, 11, 11, 48, 48, 387, 192, + 48, 48, 388, 48, 389, 48, 48, 206, 390, 390, 390, 390, 390, 390, 390, 390, + 391, 391, 391, 391, 391, 391, 391, 391, 48, 48, 48, 48, 48, 48, 204, 48, + 48, 48, 48, 48, 48, 207, 72, 72, 392, 393, 394, 395, 396, 48, 48, 48, + 48, 48, 48, 397, 398, 399, 48, 48, 48, 48, 48, 400, 72, 48, 48, 48, + 48, 401, 48, 48, 74, 72, 72, 402, 32, 403, 32, 404, 405, 406, 407, 73, + 48, 48, 48, 48, 48, 48, 48, 408, 409, 2, 3, 4, 5, 410, 411, 412, + 48, 413, 48, 200, 414, 415, 416, 417, 418, 48, 172, 419, 204, 204, 72, 72, + 48, 48, 48, 48, 48, 48, 48, 71, 420, 270, 270, 421, 271, 271, 271, 422, + 423, 424, 425, 72, 72, 209, 209, 426, 72, 72, 72, 72, 72, 72, 72, 72, + 48, 151, 48, 48, 48, 101, 427, 428, 48, 48, 429, 48, 430, 48, 48, 431, + 48, 432, 48, 48, 433, 434, 72, 72, 9, 9, 435, 11, 11, 48, 48, 48, + 48, 204, 192, 9, 9, 436, 11, 437, 48, 48, 74, 48, 48, 48, 438, 72, + 48, 48, 48, 315, 48, 199, 74, 72, 439, 48, 48, 440, 48, 441, 48, 442, + 48, 200, 443, 72, 72, 72, 48, 444, 48, 445, 48, 446, 72, 72, 72, 72, + 48, 48, 48, 447, 270, 448, 270, 270, 449, 450, 48, 451, 452, 453, 48, 454, + 48, 455, 72, 72, 456, 48, 457, 458, 48, 48, 48, 459, 48, 460, 48, 461, + 48, 462, 463, 72, 72, 72, 72, 72, 48, 48, 48, 48, 196, 72, 72, 72, + 9, 9, 9, 464, 11, 11, 11, 465, 48, 48, 466, 192, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 270, 467, 48, 48, 468, 469, 72, 72, 72, 72, + 48, 455, 470, 48, 62, 471, 72, 72, 72, 72, 72, 48, 472, 72, 48, 315, + 473, 48, 48, 474, 475, 448, 476, 477, 222, 48, 48, 478, 479, 48, 196, 192, + 480, 48, 481, 482, 483, 48, 48, 484, 222, 48, 48, 485, 486, 487, 488, 489, + 48, 98, 490, 491, 72, 72, 72, 72, 492, 493, 494, 48, 48, 495, 496, 192, + 497, 84, 85, 498, 499, 500, 501, 502, 48, 48, 48, 503, 504, 505, 469, 72, + 48, 48, 48, 506, 507, 192, 72, 72, 48, 48, 508, 509, 510, 511, 72, 72, + 48, 48, 48, 512, 513, 192, 514, 72, 48, 48, 515, 516, 192, 72, 72, 72, + 48, 173, 517, 518, 72, 72, 72, 72, 48, 48, 490, 519, 72, 72, 72, 72, + 72, 72, 9, 9, 11, 11, 148, 520, 521, 522, 48, 523, 524, 192, 72, 72, + 72, 72, 525, 48, 48, 526, 527, 72, 528, 48, 48, 529, 530, 531, 48, 48, + 532, 533, 534, 72, 48, 48, 48, 196, 85, 48, 508, 535, 536, 148, 175, 537, + 48, 538, 539, 540, 72, 72, 72, 72, 541, 48, 48, 542, 543, 192, 544, 48, + 545, 546, 192, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 72, 48, 547, + 72, 72, 72, 101, 270, 548, 549, 550, 48, 207, 72, 72, 72, 72, 72, 72, + 271, 271, 271, 271, 271, 271, 551, 552, 48, 48, 48, 48, 388, 72, 72, 72, + 48, 48, 200, 553, 72, 72, 72, 72, 48, 48, 48, 48, 315, 72, 72, 72, + 48, 48, 48, 196, 48, 200, 370, 72, 72, 72, 72, 72, 72, 48, 204, 554, + 48, 48, 48, 555, 556, 557, 558, 559, 48, 72, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 72, 9, 9, 11, 11, 270, 560, 72, 72, 72, 72, 72, 72, + 48, 48, 48, 48, 561, 562, 563, 563, 564, 565, 72, 72, 72, 72, 566, 567, + 48, 48, 48, 48, 48, 48, 48, 74, 48, 48, 48, 48, 48, 199, 72, 72, + 196, 72, 72, 72, 72, 72, 72, 72, 48, 200, 72, 72, 72, 568, 569, 48, + 48, 48, 48, 48, 48, 48, 48, 206, 48, 48, 48, 48, 48, 48, 71, 151, + 196, 570, 571, 72, 72, 72, 72, 72, 209, 209, 209, 209, 209, 209, 209, 325, + 209, 209, 572, 209, 209, 209, 573, 574, 575, 209, 576, 209, 209, 209, 577, 72, + 209, 209, 209, 209, 578, 72, 72, 72, 72, 72, 72, 72, 72, 72, 270, 579, + 209, 209, 209, 209, 209, 286, 270, 452, 9, 580, 11, 581, 582, 583, 242, 9, + 584, 585, 586, 587, 588, 9, 580, 11, 589, 590, 11, 591, 592, 593, 594, 9, + 595, 11, 9, 580, 11, 581, 582, 11, 242, 9, 584, 594, 9, 595, 11, 9, + 580, 11, 596, 9, 597, 598, 599, 600, 11, 601, 9, 602, 603, 604, 605, 11, + 606, 9, 607, 11, 608, 609, 609, 609, 32, 32, 32, 610, 32, 32, 611, 612, + 613, 614, 45, 72, 72, 72, 72, 72, 615, 616, 617, 72, 72, 72, 72, 72, + 48, 48, 151, 618, 619, 72, 72, 72, 72, 72, 72, 72, 48, 48, 620, 621, + 48, 48, 48, 48, 622, 623, 72, 72, 9, 9, 584, 11, 624, 370, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 488, 270, 270, 625, 626, 72, 72, 72, 72, + 488, 270, 627, 628, 72, 72, 72, 72, 629, 48, 630, 631, 632, 633, 634, 635, + 636, 206, 637, 206, 72, 72, 72, 638, 209, 209, 326, 209, 209, 209, 209, 209, + 209, 324, 335, 639, 639, 639, 209, 325, 640, 209, 209, 209, 209, 209, 209, 209, + 209, 209, 641, 72, 72, 72, 642, 209, 643, 209, 209, 326, 577, 644, 325, 72, + 209, 209, 209, 209, 209, 209, 209, 645, 209, 209, 209, 209, 209, 646, 424, 424, + 209, 209, 209, 209, 209, 209, 209, 324, 209, 209, 209, 209, 209, 577, 326, 72, + 326, 209, 209, 209, 646, 176, 209, 209, 646, 209, 641, 644, 72, 72, 72, 72, + 209, 209, 209, 209, 209, 209, 209, 647, 209, 209, 209, 209, 648, 209, 209, 209, + 209, 209, 209, 209, 209, 324, 641, 649, 286, 209, 577, 286, 643, 286, 72, 72, + 209, 650, 209, 209, 287, 72, 72, 192, 48, 48, 48, 48, 48, 204, 72, 72, + 48, 48, 48, 205, 48, 48, 48, 48, 48, 204, 48, 48, 48, 48, 48, 48, + 48, 48, 469, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 101, 72, + 48, 204, 72, 72, 72, 72, 72, 72, 48, 48, 48, 48, 71, 72, 72, 72, + 651, 72, 652, 652, 652, 652, 652, 652, 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 72, 391, 391, 391, 391, 391, 391, 391, 653, + 391, 391, 391, 391, 391, 391, 391, 654, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 2, 2, 3, 1, 2, 2, 3, 0, 0, 0, 0, 0, 4, 0, 4, + 2, 2, 5, 2, 2, 2, 5, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, + 0, 0, 0, 0, 7, 8, 0, 0, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 10, 11, 12, 13, 14, 14, 15, 14, 14, 14, + 14, 14, 14, 14, 16, 17, 14, 14, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 19, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 20, 21, + 21, 21, 22, 20, 21, 21, 21, 21, 21, 23, 24, 25, 25, 25, 25, 25, + 25, 26, 25, 25, 25, 27, 28, 26, 29, 30, 31, 32, 31, 31, 31, 31, + 33, 34, 35, 31, 31, 31, 36, 31, 31, 31, 31, 31, 31, 31, 31, 31, + 31, 31, 31, 29, 31, 31, 31, 31, 37, 38, 37, 37, 37, 37, 37, 37, + 37, 39, 31, 31, 31, 31, 31, 31, 40, 40, 40, 40, 40, 40, 41, 26, + 42, 42, 42, 42, 42, 42, 42, 43, 44, 44, 44, 44, 44, 45, 44, 46, + 47, 47, 47, 48, 37, 49, 26, 26, 26, 26, 26, 26, 31, 31, 50, 31, + 31, 26, 51, 31, 52, 31, 31, 31, 53, 53, 53, 53, 53, 53, 53, 53, + 53, 53, 54, 53, 55, 53, 53, 53, 56, 57, 58, 59, 59, 60, 61, 62, + 57, 63, 64, 65, 66, 59, 59, 67, 68, 69, 70, 71, 71, 72, 73, 74, + 69, 75, 76, 77, 78, 71, 79, 26, 80, 81, 82, 83, 83, 84, 85, 86, + 81, 87, 88, 26, 89, 83, 90, 91, 92, 93, 94, 95, 95, 96, 97, 98, + 93, 99, 100, 101, 102, 95, 95, 26, 103, 104, 105, 106, 107, 104, 108, 109, + 104, 105, 110, 26, 111, 108, 108, 112, 113, 114, 115, 113, 113, 115, 113, 116, + 114, 117, 118, 119, 120, 113, 121, 113, 122, 123, 124, 122, 122, 124, 125, 126, + 123, 127, 128, 129, 130, 122, 131, 26, 132, 133, 134, 132, 132, 132, 132, 132, + 133, 134, 135, 132, 136, 132, 132, 132, 137, 138, 139, 140, 138, 138, 141, 142, + 139, 143, 144, 138, 145, 138, 146, 26, 147, 148, 148, 148, 148, 148, 148, 149, + 148, 148, 148, 150, 26, 26, 26, 26, 151, 152, 153, 153, 154, 153, 153, 155, + 156, 155, 153, 157, 26, 26, 26, 26, 158, 158, 158, 158, 158, 158, 158, 158, + 158, 159, 158, 158, 158, 160, 159, 158, 158, 158, 158, 159, 158, 158, 158, 161, + 158, 161, 162, 163, 26, 26, 26, 26, 164, 164, 164, 164, 164, 164, 164, 164, + 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 165, 165, 165, 165, + 166, 167, 165, 165, 165, 165, 165, 168, 169, 169, 169, 169, 169, 169, 169, 169, + 169, 169, 169, 169, 169, 169, 169, 169, 170, 170, 170, 170, 170, 170, 170, 170, + 170, 171, 172, 171, 170, 170, 170, 170, 170, 171, 170, 170, 170, 170, 171, 172, + 171, 170, 172, 170, 170, 170, 170, 170, 170, 170, 171, 170, 170, 170, 170, 170, + 170, 170, 170, 173, 170, 170, 170, 174, 170, 170, 170, 175, 176, 176, 176, 176, + 176, 176, 176, 176, 176, 176, 177, 177, 178, 178, 178, 178, 178, 178, 178, 178, + 178, 178, 178, 178, 178, 178, 178, 178, 179, 179, 179, 180, 181, 181, 181, 181, + 181, 181, 181, 181, 181, 182, 181, 183, 184, 185, 186, 26, 187, 187, 188, 26, + 189, 189, 190, 26, 191, 192, 193, 26, 194, 194, 194, 194, 194, 194, 194, 194, + 194, 194, 194, 195, 194, 196, 194, 196, 197, 198, 199, 200, 199, 199, 199, 199, + 199, 199, 199, 199, 199, 199, 199, 201, 199, 199, 199, 199, 199, 202, 178, 178, + 178, 178, 178, 178, 178, 178, 203, 26, 204, 204, 204, 205, 204, 206, 204, 206, + 207, 204, 208, 208, 208, 209, 210, 26, 211, 211, 211, 211, 211, 212, 211, 211, + 211, 213, 211, 214, 194, 194, 194, 194, 215, 215, 215, 216, 217, 217, 217, 217, + 217, 217, 217, 218, 217, 217, 217, 219, 217, 220, 217, 220, 217, 221, 9, 9, + 222, 26, 26, 26, 26, 26, 26, 26, 223, 223, 223, 223, 223, 223, 223, 223, + 223, 224, 223, 223, 223, 223, 223, 225, 226, 226, 226, 226, 226, 226, 226, 226, + 227, 227, 227, 227, 227, 227, 228, 229, 230, 230, 230, 230, 230, 230, 230, 231, + 230, 232, 233, 233, 233, 233, 233, 233, 18, 234, 165, 165, 165, 165, 165, 235, + 226, 26, 236, 9, 237, 238, 239, 240, 2, 2, 2, 2, 241, 242, 2, 2, + 2, 2, 2, 243, 244, 245, 2, 246, 2, 2, 2, 2, 2, 2, 2, 247, + 9, 9, 9, 9, 9, 9, 9, 248, 14, 14, 249, 249, 14, 14, 14, 14, + 249, 249, 14, 250, 14, 14, 14, 249, 14, 14, 14, 14, 14, 14, 251, 14, + 251, 14, 252, 253, 14, 14, 254, 255, 0, 256, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 257, 0, 258, 259, 0, 260, 2, 261, 0, 0, 0, 0, + 26, 26, 9, 9, 9, 9, 222, 26, 0, 0, 0, 0, 262, 263, 4, 0, + 0, 264, 0, 0, 2, 2, 2, 2, 2, 265, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 260, 26, 26, 26, + 0, 266, 26, 26, 0, 0, 0, 0, 267, 267, 267, 267, 267, 267, 267, 267, + 267, 267, 267, 267, 267, 267, 267, 267, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 268, 0, 0, 0, 269, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 270, 270, 270, 270, 270, 271, 270, 270, + 270, 270, 270, 271, 2, 2, 2, 2, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 272, 273, 165, 165, 165, 165, 166, 167, 274, 274, + 274, 274, 274, 274, 274, 275, 276, 275, 170, 170, 172, 26, 172, 172, 172, 172, + 172, 172, 172, 172, 18, 18, 18, 18, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 266, 26, 26, 26, 26, 26, 277, 277, 277, 278, 277, 277, 277, 277, + 277, 277, 277, 277, 277, 277, 279, 26, 277, 277, 277, 277, 277, 277, 277, 277, + 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, + 277, 277, 280, 26, 26, 26, 0, 281, 282, 0, 0, 0, 283, 284, 0, 285, + 286, 287, 287, 287, 287, 287, 287, 287, 287, 287, 288, 289, 290, 291, 291, 291, + 291, 291, 291, 291, 291, 291, 291, 292, 293, 294, 294, 294, 294, 294, 295, 169, + 169, 169, 169, 169, 169, 169, 169, 169, 169, 296, 0, 0, 294, 294, 294, 294, + 0, 0, 0, 0, 281, 26, 291, 291, 169, 169, 169, 296, 0, 0, 0, 0, + 0, 0, 0, 0, 169, 169, 169, 297, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 291, 291, 291, 291, 291, 298, 291, 291, 291, 291, 291, 291, 291, 291, + 291, 291, 291, 0, 0, 0, 0, 0, 277, 277, 277, 277, 277, 277, 277, 277, + 0, 0, 0, 0, 0, 0, 0, 0, 277, 277, 277, 277, 277, 277, 277, 277, + 277, 277, 277, 277, 277, 277, 277, 299, 300, 300, 300, 300, 300, 300, 300, 300, + 300, 300, 300, 300, 300, 300, 300, 300, 300, 301, 300, 300, 300, 300, 300, 300, + 302, 26, 303, 303, 303, 303, 303, 303, 304, 304, 304, 304, 304, 304, 304, 304, + 304, 304, 304, 304, 304, 304, 304, 304, 304, 304, 304, 304, 304, 305, 26, 26, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 306, 306, 306, 306, + 306, 306, 306, 306, 306, 306, 306, 26, 0, 0, 0, 0, 307, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 308, 2, 2, 2, 2, 2, 2, + 309, 310, 26, 26, 26, 26, 311, 2, 312, 312, 312, 312, 312, 313, 0, 314, + 315, 315, 315, 315, 315, 315, 315, 26, 316, 316, 316, 316, 316, 316, 316, 316, + 317, 318, 316, 319, 53, 53, 53, 53, 320, 320, 320, 320, 320, 321, 322, 322, + 322, 322, 323, 324, 169, 169, 169, 325, 326, 326, 326, 326, 326, 326, 326, 326, + 326, 327, 326, 328, 164, 164, 164, 329, 330, 330, 330, 330, 330, 330, 331, 26, + 330, 332, 330, 333, 164, 164, 164, 164, 334, 334, 334, 334, 334, 334, 334, 334, + 335, 26, 26, 336, 337, 337, 338, 26, 339, 339, 339, 26, 172, 172, 2, 2, + 2, 2, 2, 340, 341, 342, 176, 176, 176, 176, 176, 176, 176, 176, 176, 176, + 337, 337, 337, 337, 337, 343, 337, 344, 169, 169, 169, 169, 345, 26, 169, 169, + 296, 346, 169, 169, 169, 169, 169, 345, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 277, 277, 277, 277, 277, 277, 277, 277, + 277, 277, 277, 277, 277, 280, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, + 277, 277, 277, 347, 26, 26, 26, 26, 348, 26, 349, 350, 25, 25, 351, 352, + 353, 25, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, + 354, 26, 51, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, + 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 355, + 26, 26, 31, 31, 31, 31, 31, 31, 31, 31, 356, 31, 31, 31, 31, 31, + 31, 26, 26, 26, 26, 26, 31, 357, 9, 9, 0, 314, 9, 358, 0, 0, + 0, 0, 359, 0, 260, 281, 50, 31, 31, 31, 31, 31, 31, 31, 31, 31, + 31, 31, 31, 31, 31, 31, 31, 360, 361, 0, 0, 0, 1, 2, 2, 3, + 1, 2, 2, 3, 362, 291, 290, 291, 291, 291, 291, 363, 169, 169, 169, 296, + 364, 364, 364, 365, 260, 260, 26, 366, 367, 368, 367, 367, 369, 367, 367, 370, + 367, 371, 367, 371, 26, 26, 26, 26, 367, 367, 367, 367, 367, 367, 367, 367, + 367, 367, 367, 367, 367, 367, 367, 372, 373, 0, 0, 0, 0, 0, 374, 0, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 255, 0, 375, 376, 26, 26, 26, + 26, 26, 0, 0, 0, 0, 0, 377, 378, 378, 378, 379, 380, 380, 380, 380, + 380, 380, 381, 26, 382, 0, 0, 281, 383, 383, 383, 383, 384, 385, 386, 386, + 386, 387, 388, 388, 388, 388, 388, 389, 390, 390, 390, 391, 392, 392, 392, 392, + 393, 392, 394, 26, 26, 26, 26, 26, 395, 395, 395, 395, 395, 395, 395, 395, + 395, 395, 396, 396, 396, 396, 396, 396, 397, 397, 397, 398, 397, 399, 400, 400, + 400, 400, 401, 400, 400, 400, 400, 401, 402, 402, 402, 402, 402, 26, 403, 403, + 403, 403, 403, 403, 404, 405, 26, 26, 406, 406, 406, 406, 406, 406, 406, 406, + 406, 406, 406, 406, 406, 406, 406, 406, 406, 406, 406, 406, 406, 406, 407, 26, + 406, 406, 408, 26, 406, 26, 26, 26, 409, 410, 411, 411, 411, 411, 412, 413, + 414, 414, 415, 414, 416, 416, 416, 416, 417, 417, 417, 418, 419, 417, 26, 26, + 26, 26, 26, 26, 420, 420, 421, 422, 423, 423, 423, 424, 425, 425, 425, 426, + 26, 26, 26, 26, 26, 26, 26, 26, 427, 427, 427, 427, 428, 428, 428, 429, + 428, 428, 430, 428, 428, 428, 428, 428, 431, 432, 433, 434, 435, 435, 436, 437, + 435, 438, 435, 438, 439, 439, 439, 439, 440, 440, 440, 440, 26, 26, 26, 26, + 441, 441, 441, 441, 442, 443, 442, 26, 444, 444, 444, 444, 444, 444, 445, 446, + 447, 447, 448, 447, 449, 449, 450, 449, 451, 451, 452, 453, 26, 454, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 455, 455, 455, 455, 455, 455, 455, 455, + 455, 456, 26, 26, 26, 26, 26, 26, 457, 457, 457, 457, 457, 457, 458, 26, + 457, 457, 457, 457, 457, 457, 458, 459, 460, 460, 460, 460, 460, 26, 460, 461, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 31, 31, 31, 462, 463, 463, 463, 463, 463, 464, 465, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 466, 466, 466, 466, 466, 26, 467, 467, + 467, 467, 467, 468, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 469, 469, + 469, 470, 26, 26, 471, 471, 472, 26, 473, 473, 473, 473, 473, 473, 473, 473, + 473, 474, 475, 473, 473, 473, 26, 476, 477, 477, 477, 477, 477, 477, 477, 477, + 478, 479, 480, 480, 480, 481, 480, 482, 483, 483, 483, 483, 483, 483, 484, 483, + 483, 26, 485, 485, 485, 485, 486, 26, 487, 487, 487, 487, 487, 487, 487, 487, + 487, 487, 487, 487, 488, 138, 489, 26, 490, 490, 491, 490, 490, 490, 490, 492, + 26, 26, 26, 26, 26, 26, 26, 26, 493, 494, 495, 496, 495, 497, 498, 498, + 498, 498, 498, 498, 498, 499, 498, 500, 501, 502, 503, 504, 504, 505, 506, 507, + 502, 508, 509, 510, 511, 512, 512, 26, 513, 513, 513, 513, 513, 513, 513, 513, + 513, 513, 513, 514, 515, 26, 26, 26, 516, 516, 516, 516, 516, 516, 516, 516, + 516, 26, 516, 517, 26, 26, 26, 26, 518, 518, 518, 518, 518, 518, 519, 518, + 518, 518, 518, 519, 26, 26, 26, 26, 520, 520, 520, 520, 520, 520, 520, 520, + 521, 26, 520, 522, 199, 523, 26, 26, 524, 524, 524, 524, 524, 524, 524, 525, + 524, 526, 26, 26, 26, 26, 26, 26, 527, 527, 527, 528, 527, 529, 527, 527, + 26, 26, 26, 26, 26, 26, 26, 26, 530, 530, 530, 530, 530, 530, 530, 531, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 532, 532, 532, 532, + 532, 532, 532, 532, 532, 532, 533, 534, 535, 536, 537, 538, 538, 538, 539, 540, + 535, 26, 538, 541, 26, 26, 26, 26, 26, 26, 26, 26, 542, 543, 542, 542, + 542, 542, 542, 543, 544, 26, 26, 26, 545, 545, 545, 545, 545, 545, 545, 545, + 545, 26, 546, 546, 546, 546, 546, 546, 546, 546, 546, 546, 547, 26, 26, 26, + 548, 548, 548, 548, 548, 548, 548, 549, 550, 551, 550, 550, 550, 550, 552, 550, + 553, 26, 550, 550, 550, 554, 555, 555, 555, 555, 556, 555, 555, 557, 558, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 559, 560, 561, 561, 561, 561, 559, 562, + 561, 26, 561, 563, 564, 565, 566, 566, 566, 567, 568, 569, 566, 570, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 571, 571, 571, 572, 26, 26, 26, 26, 26, 26, 573, 26, + 108, 108, 108, 108, 108, 108, 574, 575, 576, 576, 576, 576, 576, 576, 576, 576, + 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 576, 577, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 576, 576, 576, 576, 576, 576, 576, 576, + 576, 576, 576, 576, 576, 578, 579, 26, 576, 576, 576, 576, 576, 576, 576, 576, + 580, 26, 26, 26, 26, 26, 26, 26, 581, 581, 581, 581, 581, 581, 581, 581, + 581, 581, 581, 581, 581, 581, 581, 581, 581, 581, 581, 581, 581, 582, 581, 583, + 26, 26, 26, 26, 26, 26, 26, 26, 584, 584, 584, 584, 584, 584, 584, 584, + 584, 584, 584, 584, 584, 584, 584, 584, 584, 584, 584, 584, 584, 584, 584, 584, + 585, 26, 26, 26, 26, 26, 26, 26, 306, 306, 306, 306, 306, 306, 306, 306, + 306, 306, 306, 306, 306, 306, 306, 306, 306, 306, 306, 306, 306, 306, 306, 586, + 587, 587, 587, 588, 587, 589, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 590, 590, 590, 591, 591, 26, 592, 592, 592, 592, 592, 592, 592, 592, + 593, 26, 592, 594, 594, 592, 592, 595, 592, 592, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 596, 596, 596, 596, 596, 596, 596, 596, 596, 596, 596, 597, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 598, 598, 598, 598, 598, 598, 598, 598, + 598, 599, 598, 598, 598, 598, 598, 598, 598, 600, 598, 598, 26, 26, 26, 26, + 26, 26, 26, 26, 601, 26, 347, 26, 602, 602, 602, 602, 602, 602, 602, 602, + 602, 602, 602, 602, 602, 602, 602, 602, 602, 602, 602, 602, 602, 602, 602, 602, + 602, 602, 602, 602, 602, 602, 602, 26, 603, 603, 603, 603, 603, 603, 603, 603, + 603, 603, 603, 603, 603, 603, 603, 603, 603, 603, 603, 603, 603, 603, 603, 603, + 603, 603, 604, 26, 26, 26, 26, 26, 602, 605, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 606, 287, 287, 287, 287, 287, 287, 287, + 287, 287, 287, 287, 287, 287, 287, 287, 287, 287, 287, 287, 287, 287, 287, 287, + 287, 287, 287, 287, 287, 287, 287, 287, 287, 287, 287, 288, 26, 26, 26, 26, + 26, 26, 607, 26, 608, 26, 609, 609, 609, 609, 609, 609, 609, 609, 609, 609, + 609, 609, 609, 609, 609, 609, 609, 609, 609, 609, 609, 609, 609, 609, 609, 609, + 609, 609, 609, 609, 609, 609, 609, 610, 611, 611, 611, 611, 611, 611, 611, 611, + 611, 611, 611, 611, 611, 612, 611, 613, 611, 614, 611, 615, 281, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 616, 26, 0, 0, 0, 0, 260, 361, 0, 0, + 0, 0, 0, 0, 617, 618, 0, 619, 620, 621, 0, 0, 0, 622, 0, 0, + 0, 0, 0, 0, 0, 623, 26, 26, 14, 14, 14, 14, 14, 14, 14, 14, + 249, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 0, 0, 281, 26, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 260, 26, 0, 0, 0, 623, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 257, 0, 0, 0, 0, 0, 0, 0, 0, 257, 624, 625, 0, 626, + 627, 0, 0, 0, 0, 0, 0, 0, 269, 628, 257, 257, 0, 0, 0, 629, + 630, 631, 632, 0, 0, 0, 0, 0, 0, 0, 0, 0, 616, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 268, 0, 0, 0, 0, 0, 0, 633, 633, 633, 633, 633, 633, 633, 633, + 633, 633, 633, 633, 633, 633, 633, 633, 633, 634, 26, 635, 636, 633, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 271, 270, 270, 637, 638, 639, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 640, 640, 640, 640, 640, 641, 640, 642, + 640, 643, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 644, 644, 644, 644, 644, 644, 644, 645, 646, 646, 646, 646, 646, 646, 646, 646, + 646, 646, 646, 646, 646, 646, 646, 646, 646, 646, 646, 646, 646, 646, 646, 646, + 647, 646, 648, 26, 26, 26, 26, 26, 649, 649, 649, 649, 649, 649, 649, 649, + 649, 650, 649, 651, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 361, 0, 0, 0, 0, 0, 0, 0, 375, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 361, 0, 0, 0, 0, 0, 0, 616, + 26, 26, 26, 26, 26, 26, 26, 26, 652, 31, 31, 31, 653, 654, 655, 656, + 657, 658, 653, 659, 653, 655, 655, 660, 31, 661, 31, 662, 663, 661, 31, 662, + 26, 26, 26, 26, 26, 26, 354, 26, 0, 0, 0, 0, 0, 281, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 281, 26, 0, 260, 361, 0, + 361, 0, 361, 0, 0, 0, 616, 26, 0, 0, 0, 0, 0, 616, 26, 26, + 26, 26, 26, 26, 664, 0, 0, 0, 665, 26, 0, 0, 0, 0, 0, 281, + 0, 623, 314, 26, 616, 26, 26, 26, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 26, 0, 375, 0, 375, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 281, 26, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 623, 0, 281, 26, 26, 0, 281, 0, 0, 0, 0, 0, 0, + 0, 26, 0, 314, 0, 0, 0, 0, 0, 26, 0, 0, 0, 616, 314, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 632, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 627, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 281, 26, 0, 616, 375, 266, 260, 26, 0, 0, 0, 623, 260, 26, + 266, 26, 260, 26, 26, 26, 26, 26, 0, 0, 359, 0, 0, 0, 0, 0, + 0, 266, 26, 26, 26, 26, 0, 314, 277, 277, 277, 277, 277, 277, 277, 277, + 277, 277, 277, 280, 26, 26, 26, 26, 277, 277, 277, 277, 277, 277, 299, 26, + 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 280, 277, 277, 277, 277, + 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 347, 26, 277, 277, + 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, + 277, 277, 277, 277, 666, 26, 26, 26, 277, 277, 277, 280, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 277, 277, 277, 277, 277, 277, 277, 277, + 277, 667, 26, 26, 26, 26, 26, 26, 668, 26, 26, 26, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 939, 940, 941, 942, 946, 948, 0, 962, + 969, 970, 971, 976,1001,1002,1003,1008, 0,1033,1040,1041,1042,1043,1047, 0, + 0,1080,1081,1082,1086,1110, 0, 0,1124,1125,1126,1127,1131,1133, 0,1147, + 1154,1155,1156,1161,1187,1188,1189,1193, 0,1219,1226,1227,1228,1229,1233, 0, + 0,1267,1268,1269,1273,1298, 0,1303, 943,1128, 944,1129, 954,1139, 958,1143, + 959,1144, 960,1145, 961,1146, 964,1149, 0, 0, 973,1158, 974,1159, 975,1160, + 983,1168, 978,1163, 988,1173, 990,1175, 991,1176, 993,1178, 994,1179, 0, 0, + 1004,1190,1005,1191,1006,1192,1014,1199,1007, 0, 0, 0,1016,1201,1020,1206, + 0,1022,1208,1025,1211,1023,1209, 0, 0, 0, 0,1032,1218,1037,1223,1035, + 1221, 0, 0, 0,1044,1230,1045,1231,1049,1235, 0, 0,1058,1244,1064,1250, + 1060,1246,1066,1252,1067,1253,1072,1258,1069,1255,1077,1264,1074,1261, 0, 0, + 1083,1270,1084,1271,1085,1272,1088,1275,1089,1276,1096,1283,1103,1290,1111,1299, + 1115,1118,1307,1120,1309,1121,1310, 0,1053,1239, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0,1093,1280, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 949,1134,1010,1195,1050,1236,1090,1277,1341,1368,1340, + 1367,1342,1369,1339,1366, 0,1320,1347,1418,1419,1323,1350, 0, 0, 992,1177, + 1018,1204,1055,1241,1416,1417,1415,1424,1202, 0, 0, 0, 987,1172, 0, 0, + 1031,1217,1321,1348,1322,1349,1338,1365, 950,1135, 951,1136, 979,1164, 980,1165, + 1011,1196,1012,1197,1051,1237,1052,1238,1061,1247,1062,1248,1091,1278,1092,1279, + 1071,1257,1076,1263, 0, 0, 997,1182, 0, 0, 0, 0, 0, 0, 945,1130, + 982,1167,1337,1364,1335,1362,1046,1232,1422,1423,1113,1301, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 0, 10,1425, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,1314,1427, 5, + 1434,1438,1443, 0,1450, 0,1455,1461,1514, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0,1446,1458,1468,1476,1480,1486,1517, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0,1489,1503,1494,1500,1508, 0, 0, 0, 0,1520,1521, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0,1526,1528, 0,1525, 0, 0, 0,1522, + 0, 0, 0, 0,1536,1532,1539, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0,1534, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0,1556, 0, 0, 0, 0, 0, 0,1548,1550, 0,1547, 0, 0, 0,1567, + 0, 0, 0, 0,1558,1554,1561, 0, 0, 0, 0, 0, 0, 0,1568,1569, + 0, 0, 0, 0, 0, 0, 0, 0, 0,1529,1551, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0,1523,1545,1524,1546, 0, 0,1527,1549, + 0, 0,1570,1571,1530,1552,1531,1553, 0, 0,1533,1555,1535,1557,1537,1559, + 0, 0,1572,1573,1544,1566,1538,1560,1540,1562,1541,1563,1542,1564, 0, 0, + 1543,1565, 0, 0, 0, 0, 0, 0, 0, 0,1606,1607,1609,1608,1610, 0, + 0, 0, 0, 0, 0, 0, 0, 0,1613, 0,1611, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1612, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0,1620, 0, 0, 0, 0, 0, 0, 0,1623, 0, 0,1624, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1614,1615,1616,1617,1618,1619,1621,1622, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0,1628,1629, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0,1625,1626, 0,1627, 0, 0, 0,1634, 0, 0,1635, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0,1630,1631,1632, 0, 0,1633, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1639, 0, 0,1638,1640, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0,1636,1637, 0, 0, 0, 0, 0, 0,1641, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0,1642,1644,1643, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1645, 0, 0, 0, 0, 0, 0, 0,1646, 0, 0, 0, 0, 0, 0,1648, + 1649, 0,1647,1650, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0,1651,1653,1652, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0,1654, 0,1655,1657,1656, 0, 0, 0, 0,1659, 0, 0, 0, 0, + 0, 0, 0, 0, 0,1660, 0, 0, 0, 0,1661, 0, 0, 0, 0,1662, + 0, 0, 0, 0,1663, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0,1658, 0, 0, 0, 0, 0, 0, 0, 0, 0,1664, 0,1665,1673, 0, + 1674, 0, 0, 0, 0, 0, 0, 0, 0,1666, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1668, 0, 0, 0, 0, + 0, 0, 0, 0, 0,1669, 0, 0, 0, 0,1670, 0, 0, 0, 0,1671, + 0, 0, 0, 0,1672, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0,1667, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1675, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1676, 0, + 1677, 0,1678, 0,1679, 0,1680, 0, 0, 0,1681, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0,1682, 0,1683, 0, 0,1684,1685, 0,1686, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 953,1138, 955,1140, 956,1141, 957,1142, + 1324,1351, 963,1148, 965,1150, 968,1153, 966,1151, 967,1152,1378,1380,1379,1381, + 984,1169, 985,1170,1420,1421, 986,1171, 989,1174, 995,1180, 998,1183, 996,1181, + 999,1184,1000,1185,1015,1200,1329,1356,1017,1203,1019,1205,1021,1207,1024,1210, + 1687,1688,1027,1213,1026,1212,1028,1214,1029,1215,1030,1216,1034,1220,1036,1222, + 1039,1225,1038,1224,1334,1361,1336,1363,1382,1384,1383,1385,1056,1242,1057,1243, + 1059,1245,1063,1249,1689,1690,1065,1251,1068,1254,1070,1256,1386,1387,1388,1389, + 1691,1692,1073,1259,1075,1262,1079,1266,1078,1265,1095,1282,1098,1285,1097,1284, + 1390,1391,1392,1393,1099,1286,1100,1287,1101,1288,1102,1289,1105,1292,1104,1291, + 1106,1294,1107,1295,1108,1296,1114,1302,1119,1308,1122,1311,1123,1312,1186,1260, + 1293,1305, 0,1394, 0, 0, 0, 0, 952,1137, 947,1132,1317,1344,1316,1343, + 1319,1346,1318,1345,1693,1695,1371,1375,1370,1374,1373,1377,1372,1376,1694,1696, + 981,1166, 977,1162, 972,1157,1326,1353,1325,1352,1328,1355,1327,1354,1697,1698, + 1009,1194,1013,1198,1054,1240,1048,1234,1331,1358,1330,1357,1333,1360,1332,1359, + 1699,1700,1396,1401,1395,1400,1398,1403,1397,1402,1399,1404,1094,1281,1087,1274, + 1406,1411,1405,1410,1408,1413,1407,1412,1409,1414,1109,1297,1117,1306,1116,1304, + 1112,1300, 0, 0, 0, 0, 0, 0,1471,1472,1701,1705,1702,1706,1703,1707, + 1430,1431,1715,1719,1716,1720,1717,1721,1477,1478,1729,1731,1730,1732, 0, 0, + 1435,1436,1733,1735,1734,1736, 0, 0,1481,1482,1737,1741,1738,1742,1739,1743, + 1439,1440,1751,1755,1752,1756,1753,1757,1490,1491,1765,1768,1766,1769,1767,1770, + 1447,1448,1771,1774,1772,1775,1773,1776,1495,1496,1777,1779,1778,1780, 0, 0, + 1451,1452,1781,1783,1782,1784, 0, 0,1504,1505,1785,1788,1786,1789,1787,1790, + 0,1459, 0,1791, 0,1792, 0,1793,1509,1510,1794,1798,1795,1799,1796,1800, + 1462,1463,1808,1812,1809,1813,1810,1814,1467, 21,1475, 22,1479, 23,1485, 24, + 1493, 27,1499, 28,1507, 29, 0, 0,1704,1708,1709,1710,1711,1712,1713,1714, + 1718,1722,1723,1724,1725,1726,1727,1728,1740,1744,1745,1746,1747,1748,1749,1750, + 1754,1758,1759,1760,1761,1762,1763,1764,1797,1801,1802,1803,1804,1805,1806,1807, + 1811,1815,1816,1817,1818,1819,1820,1821,1470,1469,1822,1474,1465, 0,1473,1825, + 1429,1428,1426, 12,1432, 0, 26, 0, 0,1315,1823,1484,1466, 0,1483,1829, + 1433, 13,1437, 14,1441,1826,1827,1828,1488,1487,1513, 19, 0, 0,1492,1515, + 1445,1444,1442, 15, 0,1831,1832,1833,1502,1501,1516, 25,1497,1498,1506,1518, + 1457,1456,1454, 17,1453,1313, 11, 3, 0, 0,1824,1512,1519, 0,1511,1830, + 1449, 16,1460, 18,1464, 4, 0, 0, 30, 31, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 0, + 0, 0, 2, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0,1834,1835, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0,1836, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0,1837,1839,1838, 0, 0, 0, 0,1840, 0, 0, 0, + 0,1841, 0, 0,1842, 0, 0, 0, 0, 0, 0, 0,1843, 0,1844, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0,1845, 0, 0,1846, 0, 0,1847, + 0,1848, 0, 0, 0, 0, 0, 0, 937, 0,1850, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0,1849, 936, 938,1851,1852, 0, 0,1853,1854, 0, 0, + 1855,1856, 0, 0, 0, 0, 0, 0,1857,1858, 0, 0,1861,1862, 0, 0, + 1863,1864, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0,1867,1868,1869,1870,1859,1860,1865,1866, 0, 0, 0, 0, + 0, 0,1871,1872,1873,1874, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 32, 33, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0,1875, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0,1877, 0,1878, 0,1879, 0,1880, 0,1881, 0,1882, 0, + 1883, 0,1884, 0,1885, 0,1886, 0,1887, 0,1888, 0, 0,1889, 0,1890, + 0,1891, 0, 0, 0, 0, 0, 0,1892,1893, 0,1894,1895, 0,1896,1897, + 0,1898,1899, 0,1900,1901, 0, 0, 0, 0, 0, 0,1876, 0, 0, 0, + 0, 0, 0, 0, 0, 0,1902, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0,1904, 0,1905, 0,1906, 0,1907, 0,1908, 0,1909, 0, + 1910, 0,1911, 0,1912, 0,1913, 0,1914, 0,1915, 0, 0,1916, 0,1917, + 0,1918, 0, 0, 0, 0, 0, 0,1919,1920, 0,1921,1922, 0,1923,1924, + 0,1925,1926, 0,1927,1928, 0, 0, 0, 0, 0, 0,1903, 0, 0,1929, + 1930,1931,1932, 0, 0, 0,1933, 0, 710, 385, 724, 715, 455, 103, 186, 825, + 825, 242, 751, 205, 241, 336, 524, 601, 663, 676, 688, 738, 411, 434, 474, 500, + 649, 746, 799, 108, 180, 416, 482, 662, 810, 275, 462, 658, 692, 344, 618, 679, + 293, 388, 440, 492, 740, 116, 146, 168, 368, 414, 481, 527, 606, 660, 665, 722, + 781, 803, 809, 538, 553, 588, 642, 758, 811, 701, 233, 299, 573, 612, 487, 540, + 714, 779, 232, 267, 412, 445, 457, 585, 594, 766, 167, 613, 149, 148, 560, 589, + 648, 768, 708, 345, 411, 704, 105, 259, 313, 496, 518, 174, 542, 120, 307, 101, + 430, 372, 584, 183, 228, 529, 650, 697, 424, 732, 428, 349, 632, 355, 517, 110, + 135, 147, 403, 580, 624, 700, 750, 170, 193, 245, 297, 374, 463, 543, 763, 801, + 812, 815, 162, 384, 420, 730, 287, 330, 337, 366, 459, 476, 509, 558, 591, 610, + 726, 652, 734, 759, 154, 163, 198, 473, 683, 697, 292, 311, 353, 423, 572, 494, + 113, 217, 259, 280, 314, 499, 506, 603, 608, 752, 778, 782, 788, 117, 557, 748, + 774, 320, 109, 126, 260, 265, 373, 411, 479, 523, 655, 737, 823, 380, 765, 161, + 395, 398, 438, 451, 502, 516, 537, 583, 791, 136, 340, 769, 122, 273, 446, 727, + 305, 322, 400, 496, 771, 155, 190, 269, 377, 391, 406, 432, 501, 519, 599, 684, + 687, 749, 776, 175, 452, 191, 480, 510, 659, 772, 805, 813, 397, 444, 619, 566, + 568, 575, 491, 471, 707, 111, 636, 156, 153, 288, 346, 578, 256, 435, 383, 729, + 680, 767, 694, 295, 128, 210, 0, 0, 227, 0, 379, 0, 0, 150, 493, 525, + 544, 551, 552, 556, 783, 576, 604, 0, 661, 0, 703, 0, 0, 735, 743, 0, + 0, 0, 793, 794, 795, 808, 741, 773, 118, 127, 130, 166, 169, 177, 207, 213, + 215, 226, 229, 268, 270, 317, 327, 329, 335, 369, 375, 381, 404, 441, 448, 458, + 477, 484, 503, 539, 545, 547, 546, 548, 549, 550, 554, 555, 561, 564, 569, 591, + 593, 595, 598, 607, 620, 625, 625, 651, 690, 695, 705, 706, 716, 717, 733, 735, + 777, 786, 790, 315, 869, 623, 0, 0, 102, 145, 134, 115, 129, 138, 165, 171, + 207, 202, 206, 212, 227, 231, 240, 243, 250, 254, 294, 296, 303, 308, 319, 325, + 321, 329, 326, 335, 341, 357, 360, 362, 370, 379, 388, 389, 393, 421, 424, 438, + 456, 454, 458, 465, 477, 535, 485, 490, 493, 507, 512, 514, 521, 522, 525, 526, + 528, 533, 532, 541, 565, 569, 574, 586, 591, 597, 607, 637, 647, 674, 691, 693, + 695, 698, 703, 699, 705, 704, 702, 706, 709, 717, 728, 736, 747, 754, 770, 777, + 783, 784, 786, 787, 790, 802, 825, 848, 847, 857, 55, 65, 66, 883, 892, 916, + 822, 824, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0,1586, 0,1605, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0,1602,1603,1934,1935,1574,1575,1576,1577,1579,1580,1581,1583,1584, 0, + 1585,1587,1588,1589,1591, 0,1592, 0,1593,1594, 0,1595,1596, 0,1598,1599, + 1600,1601,1604,1582,1578,1590,1597, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0,1936, 0,1937, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0,1938, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0,1939,1940, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0,1941,1942, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0,1944,1943, 0,1945, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0,1946,1947, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1948, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0,1949,1950,1951,1952,1953,1954,1955, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0,1956,1957,1958,1960,1959,1961, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 106, 104, 107, 826, 114, 118, 119, 121, + 123, 124, 127, 125, 34, 830, 130, 131, 132, 137, 827, 35, 133, 139, 829, 142, + 143, 112, 144, 145, 924, 151, 152, 37, 157, 158, 159, 160, 38, 165, 166, 169, + 171, 172, 173, 174, 176, 177, 178, 179, 181, 182, 182, 182, 833, 468, 184, 185, + 834, 187, 188, 189, 196, 192, 194, 195, 197, 199, 200, 201, 203, 204, 204, 206, + 208, 209, 211, 218, 213, 219, 214, 216, 153, 234, 221, 222, 223, 220, 225, 224, + 230, 835, 235, 236, 237, 238, 239, 244, 836, 837, 247, 248, 249, 246, 251, 39, + 40, 253, 255, 255, 838, 257, 258, 259, 261, 839, 262, 263, 301, 264, 41, 266, + 270, 272, 271, 841, 274, 842, 277, 276, 278, 281, 282, 42, 283, 284, 285, 286, + 43, 843, 44, 289, 290, 291, 293, 934, 298, 845, 845, 621, 300, 300, 45, 852, + 894, 302, 304, 46, 306, 309, 310, 312, 316, 48, 47, 317, 846, 318, 323, 324, + 325, 324, 328, 329, 333, 331, 332, 334, 335, 336, 338, 339, 342, 343, 347, 351, + 849, 350, 348, 352, 354, 359, 850, 361, 358, 356, 49, 363, 365, 367, 364, 50, + 369, 371, 851, 376, 386, 378, 53, 381, 52, 51, 140, 141, 387, 382, 614, 78, + 388, 389, 390, 394, 392, 856, 54, 399, 396, 402, 404, 858, 405, 401, 407, 55, + 408, 409, 410, 413, 859, 415, 56, 417, 860, 418, 57, 419, 422, 424, 425, 861, + 840, 862, 426, 863, 429, 431, 427, 433, 437, 441, 438, 439, 442, 443, 864, 436, + 449, 450, 58, 454, 453, 865, 447, 460, 866, 867, 461, 466, 465, 464, 59, 467, + 470, 469, 472, 828, 475, 868, 478, 870, 483, 485, 486, 871, 488, 489, 872, 873, + 495, 497, 60, 498, 61, 61, 504, 505, 507, 508, 511, 62, 513, 874, 515, 875, + 518, 844, 520, 876, 877, 878, 63, 64, 528, 880, 879, 881, 882, 530, 531, 531, + 533, 66, 534, 67, 68, 884, 536, 538, 541, 69, 885, 549, 886, 887, 556, 559, + 70, 561, 562, 563, 888, 889, 889, 567, 71, 890, 570, 571, 72, 891, 577, 73, + 581, 579, 582, 893, 587, 74, 590, 592, 596, 75, 895, 896, 76, 897, 600, 898, + 602, 605, 607, 899, 900, 609, 901, 611, 853, 77, 615, 616, 79, 617, 252, 902, + 903, 854, 855, 621, 622, 731, 80, 627, 626, 628, 164, 629, 630, 631, 633, 904, + 632, 634, 639, 640, 635, 641, 646, 651, 638, 643, 644, 645, 905, 907, 906, 81, + 653, 654, 656, 911, 657, 908, 82, 83, 909, 910, 84, 664, 665, 666, 667, 669, + 668, 671, 670, 674, 672, 673, 675, 85, 677, 678, 86, 681, 682, 912, 685, 686, + 87, 689, 36, 913, 914, 88, 89, 696, 702, 709, 711, 915, 712, 713, 718, 719, + 917, 831, 721, 720, 723, 832, 725, 728, 918, 919, 739, 742, 744, 920, 745, 753, + 756, 757, 755, 760, 761, 921, 762, 90, 764, 922, 91, 775, 279, 780, 923, 925, + 92, 93, 785, 926, 94, 927, 787, 787, 789, 928, 792, 95, 796, 797, 798, 800, + 96, 929, 802, 804, 806, 97, 98, 807, 930, 99, 931, 932, 933, 814, 100, 816, + 817, 818, 819, 820, 821, 935, 0, 0, +}; +static const int16_t +_hb_ucd_i16[196] = +{ + 0, 0, 0, 0, 1, -1, 0, 0, 2, 0, -2, 0, 0, 0, 0, 2, + 0, -2, 0, 0, 0, 0, 0, 16, 0, 0, 0, -16, 0, 0, 1, -1, + 0, 0, 0, 1, -1, 0, 0, 0, 0, 1, -1, 0, 3, 3, 3, -3, + -3, -3, 0, 0, 0, 2016, 0, 0, 0, 0, 0, 2527, 1923, 1914, 1918, 0, + 2250, 0, 0, 0, 0, 0, 0, 138, 0, 7, 0, 0, -7, 0, 0, 0, + 1, -1, 1, -1, -1, 1, -1, 0, 1824, 0, 0, 0, 0, 0, 2104, 0, + 2108, 2106, 0, 2106, 1316, 0, 0, 0, 0, 1, -1, 1, -1, -138, 0, 0, + 1, -1, 8, 8, 8, 0, 7, 7, 0, 0, -8, -8, -8, -7, -7, 0, + 1, -1, 0, 2,-1316, 1, -1, 0, -1, 1, -1, 1, -1, 3, 1, -1, + -3, 1, -1, 1, -1, 0, 0,-1914,-1918, 0, 0,-1923,-1824, 0, 0, 0, + 0,-2016, 0, 0, 1, -1, 0, 1, 0, 0,-2104, 0, 0, 0, 0,-2106, + -2108,-2106, 0, 0, 1, -1,-2250, 0, 0, 0,-2527, 0, 0, -2, 0, 1, + -1, 0, 1, -1, +}; + +static inline uint_fast8_t +_hb_ucd_gc (unsigned u) +{ + return u<1114110u?_hb_ucd_u8[6504+(((_hb_ucd_u8[1264+(((_hb_ucd_u16[((_hb_ucd_u8[544+(((_hb_ucd_u8[u>>1>>3>>3>>4])<<4)+((u>>1>>3>>3)&15u))])<<3)+((u>>1>>3)&7u)])<<3)+((u>>1)&7u))])<<1)+((u)&1u))]:2; +} +static inline uint_fast8_t +_hb_ucd_ccc (unsigned u) +{ + return u<125259u?_hb_ucd_u8[8768+(((_hb_ucd_u8[7792+(((_hb_ucd_u8[7120+(((_hb_ucd_u8[6874+(u>>2>>3>>4)])<<4)+((u>>2>>3)&15u))])<<3)+((u>>2)&7u))])<<2)+((u)&3u))]:0; +} +static inline unsigned +_hb_ucd_b4 (const uint8_t* a, unsigned i) +{ + return (a[i>>1]>>((i&1u)<<2))&15u; +} +static inline int_fast16_t +_hb_ucd_bmg (unsigned u) +{ + return u<65380u?_hb_ucd_i16[((_hb_ucd_u8[9508+(((_hb_ucd_u8[9388+(((_hb_ucd_b4(9260+_hb_ucd_u8,u>>2>>3>>3))<<3)+((u>>2>>3)&7u))])<<3)+((u>>2)&7u))])<<2)+((u)&3u)]:0; +} +static inline uint_fast8_t +_hb_ucd_sc (unsigned u) +{ + return u<918000u?_hb_ucd_u8[10974+(((_hb_ucd_u16[1960+(((_hb_ucd_u8[10286+(((_hb_ucd_u8[9836+(u>>3>>4>>4)])<<4)+((u>>3>>4)&15u))])<<4)+((u>>3)&15u))])<<3)+((u)&7u))]:2; +} +static inline uint_fast16_t +_hb_ucd_dm (unsigned u) +{ + return u<195102u?_hb_ucd_u16[5768+(((_hb_ucd_u8[16708+(((_hb_ucd_u8[16326+(u>>4>>5)])<<5)+((u>>4)&31u))])<<4)+((u)&15u))]:0; +} + + +#else + +static const uint8_t +_hb_ucd_u8[13344] = +{ + 0, 1, 2, 3, 4, 5, 5, 5, 5, 5, 6, 5, 5, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 5, 17, 15, 15, 18, 15, 19, 20, 21, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 22, 23, + 5, 24, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 25, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 34, 34, 34, 35, 36, 37, 34, 34, 34, 38, 39, 40, 41, + 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, + 58, 59, 60, 61, 62, 62, 63, 64, 65, 66, 67, 68, 69, 67, 70, 71, + 67, 67, 62, 72, 62, 62, 73, 67, 74, 75, 76, 77, 78, 67, 67, 67, + 79, 80, 34, 81, 82, 83, 67, 67, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 84, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 85, 34, 34, 34, 34, 34, 34, 34, 34, 86, 34, 34, 87, 88, 89, 90, + 91, 92, 93, 94, 95, 96, 97, 98, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, + 100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100, + 100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100, + 100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100, + 100,100, 34, 34, 34, 34,101,102, 34, 34,103,104,105,106,107,108, + 34, 34,109,110,111,112,113,114,115,116,117,111, 34, 34, 34,111, + 118,119,120,121,122,123,124,125, 34,126,127,111,128,129,130,131, + 132,133,134,135,136,137,138,111,139,140,111,141,142,143,144,111, + 145,146,147,148,149,150,111,111,151,152,153,154,111,155,111,156, + 34, 34, 34, 34, 34, 34, 34, 34,157, 34, 34,111,111,111,111,111, + 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111, + 34, 34, 34, 34, 34, 34, 34, 34,158,111,111,111,111,111,111,111, + 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111, + 111,111,111,111,111,111,111,111, 34, 34, 34, 34, 34,111,111,111, + 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111, + 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111, + 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111, + 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111, + 34, 34, 34, 34,159,160,161, 34,111,111,111,111,162,163,164,165, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,111,111,111,111,111, + 34, 34, 34, 34, 34, 34,111,111,111,111,111,111,111,111,111,111, + 111,111,111,111,111,111,111,111, 34,166,111,111,111,111,111,111, + 67, 67,167,168,169,128, 65,111,170,171,172,173,174,175,176,177, + 67, 67, 67, 67,178,179,111,111,111,111,111,111,111,111,111,111, + 180,111,181,111,111,182,111,111,111,111,111,111,111,111,111,111, + 34,183,184,111,111,111,111,111,128,185,186,111, 34,187,111,111, + 67, 67,188, 67, 67,111, 67,189, 67, 67, 67, 67, 67, 67, 67, 67, + 67, 67, 67, 67, 67, 67, 67,190,111,111,111,111,111,111,111,111, + 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 34, 34, 34, 34, 34, 34, 34, 34,111,111,111,111,111,111,111,111, + 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111, + 34, 34, 34, 34, 34,111,111,111,111,111,111,111,111,111,111,111, + 34, 34, 34, 34, 34, 34, 34,111,111,111,111,111,111,111,111,111, + 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111, + 191,111,180,180,111,111,111,111,111,111,111,111,111,111,111,111, + 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111, + 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 2, 4, 5, 6, 2, + 7, 7, 7, 7, 7, 2, 8, 9, 10, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 12, 13, 14, 15, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 17, 18, 19, 1, 20, 20, 21, 22, 23, 24, 25, + 26, 27, 15, 2, 28, 29, 27, 30, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 31, 11, 11, 11, 32, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 33, 16, 16, 16, 16, 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 34, 34, 34, 34, 34, 34, 34, 34, 16, 32, 32, 32, + 32, 32, 32, 32, 11, 34, 34, 16, 34, 32, 32, 11, 34, 11, 16, 11, + 11, 34, 32, 11, 32, 16, 11, 34, 32, 32, 32, 11, 34, 16, 32, 11, + 34, 11, 34, 34, 32, 35, 32, 16, 36, 36, 37, 34, 38, 37, 34, 34, + 34, 34, 34, 34, 34, 34, 16, 32, 34, 38, 32, 11, 32, 32, 32, 32, + 32, 32, 16, 16, 16, 11, 34, 32, 34, 34, 11, 32, 32, 32, 32, 32, + 16, 16, 39, 16, 16, 16, 16, 16, 40, 40, 40, 40, 40, 40, 40, 40, + 40, 41, 41, 40, 40, 40, 40, 40, 40, 41, 41, 41, 41, 41, 41, 41, + 40, 40, 42, 41, 41, 41, 42, 42, 41, 41, 41, 41, 41, 41, 41, 41, + 43, 43, 43, 43, 43, 43, 43, 43, 32, 32, 42, 32, 16, 44, 16, 10, + 41, 41, 41, 45, 11, 11, 11, 11, 34, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 16, 16, 16, 16, 16, 16, 16, 16, 16, 34, + 16, 11, 32, 16, 32, 32, 32, 32, 16, 16, 32, 46, 34, 32, 34, 11, + 32, 47, 43, 43, 48, 32, 32, 32, 11, 34, 34, 34, 34, 34, 34, 16, + 11, 11, 11, 11, 49, 2, 2, 2, 16, 16, 16, 16, 50, 51, 52, 53, + 54, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 55, + 56, 57, 43, 56, 43, 43, 43, 43, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 58, 2, 2, 2, 2, 2, 2, 59, 59, 59, 8, 9, 60, 2, 61, + 43, 43, 43, 43, 43, 57, 59, 2, 62, 36, 36, 36, 36, 63, 43, 43, + 7, 7, 7, 7, 7, 2, 2, 36, 64, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 65, 43, 43, 43, 66, 47, 43, 43, 67, 68, 69, 43, 43, 36, + 7, 7, 7, 7, 7, 36, 70, 71, 2, 2, 2, 2, 2, 2, 2, 72, + 63, 36, 36, 36, 36, 36, 36, 36, 43, 43, 43, 43, 43, 43, 64, 36, + 36, 36, 36, 43, 43, 43, 43, 43, 7, 7, 7, 7, 7, 36, 36, 36, + 36, 36, 36, 36, 36, 63, 43, 43, 43, 43, 40, 21, 2, 40, 68, 20, + 36, 36, 36, 43, 43, 68, 43, 43, 43, 43, 68, 43, 68, 43, 43, 43, + 2, 2, 2, 2, 2, 2, 2, 2, 36, 36, 36, 36, 63, 43, 43, 2, + 36, 63, 43, 43, 43, 43, 43, 43, 43, 73, 43, 43, 43, 43, 43, 43, + 43, 74, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 74, 64, 75, + 76, 43, 43, 43, 74, 75, 76, 75, 63, 43, 43, 43, 36, 36, 36, 36, + 36, 43, 2, 7, 7, 7, 7, 7, 77, 36, 36, 36, 36, 36, 36, 36, + 63, 75, 78, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 64, 75, + 76, 43, 43, 74, 75, 75, 76, 36, 36, 36, 36, 79, 75, 75, 36, 36, + 36, 43, 43, 7, 7, 7, 7, 7, 36, 20, 27, 27, 27, 53, 58, 43, + 43, 74, 78, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 43, 75, + 76, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 43, 64, 36, 36, 36, + 36, 36, 36, 7, 7, 7, 7, 7, 43, 36, 63, 2, 2, 2, 2, 2, + 76, 43, 43, 43, 74, 75, 76, 43, 60, 20, 20, 20, 80, 43, 43, 43, + 43, 75, 78, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 64, 76, + 76, 43, 43, 74, 75, 75, 76, 43, 43, 43, 43, 74, 75, 75, 36, 36, + 71, 27, 27, 27, 27, 27, 27, 27, 43, 64, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 75, 74, 75, 75, 75, 75, 75, 76, 43, + 36, 36, 36, 79, 75, 75, 75, 75, 75, 75, 75, 7, 7, 7, 7, 7, + 27, 81, 61, 61, 53, 61, 61, 61, 74, 75, 64, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 43, 74, 75, 75, 43, 43, 43, 43, 43, + 43, 43, 43, 43, 36, 36, 36, 36, 7, 7, 7, 82, 27, 27, 27, 81, + 63, 75, 65, 36, 36, 36, 36, 36, 75, 75, 75, 74, 75, 75, 43, 43, + 43, 43, 74, 75, 75, 75, 75, 36, 83, 36, 36, 36, 36, 36, 36, 36, + 43, 75, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 63, 64, 75, + 76, 43, 43, 75, 75, 75, 76, 70, 61, 61, 36, 79, 27, 27, 27, 84, + 27, 27, 27, 27, 81, 36, 36, 36, 36, 36, 36, 36, 36, 43, 43, 74, + 75, 43, 43, 43, 75, 75, 75, 75, 7, 75, 2, 2, 2, 2, 2, 2, + 63, 36, 43, 43, 43, 43, 43, 85, 36, 36, 36, 68, 43, 43, 43, 57, + 7, 7, 7, 7, 7, 2, 2, 2, 63, 36, 43, 43, 43, 43, 64, 36, + 36, 36, 36, 40, 43, 43, 43, 43, 7, 7, 7, 7, 7, 7, 36, 36, + 70, 61, 2, 2, 2, 2, 2, 2, 2, 86, 86, 61, 43, 61, 61, 61, + 7, 7, 7, 7, 7, 27, 27, 27, 27, 27, 47, 47, 47, 4, 4, 75, + 63, 43, 43, 43, 43, 43, 43, 74, 43, 43, 57, 43, 36, 36, 63, 43, + 43, 43, 43, 43, 43, 43, 43, 61, 61, 61, 61, 69, 61, 61, 61, 61, + 2, 2, 86, 61, 21, 2, 2, 2, 36, 36, 36, 36, 36, 79, 76, 43, + 74, 43, 43, 43, 76, 74, 76, 64, 36, 36, 36, 75, 43, 36, 36, 43, + 64, 75, 78, 79, 75, 75, 75, 36, 63, 43, 64, 36, 36, 36, 36, 36, + 36, 74, 76, 74, 75, 75, 76, 79, 7, 7, 7, 7, 7, 75, 76, 61, + 16, 16, 16, 16, 16, 50, 44, 16, 36, 36, 36, 36, 36, 36, 63, 43, + 2, 2, 2, 2, 87, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, + 61, 61, 61, 61, 61, 61, 61, 61, 11, 11, 11, 11, 16, 16, 16, 16, + 88, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 70, 65, + 89, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 90, 91, 91, + 36, 36, 36, 36, 36, 58, 2, 92, 93, 36, 36, 36, 36, 36, 36, 36, + 36, 43, 43, 43, 43, 43, 43, 43, 36, 43, 57, 2, 2, 2, 2, 2, + 36, 36, 43, 76, 43, 43, 43, 75, 75, 75, 75, 74, 76, 43, 43, 43, + 43, 43, 2, 77, 2, 60, 63, 43, 7, 7, 7, 7, 7, 7, 7, 7, + 2, 2, 2, 94, 2, 56, 43, 59, 36, 95, 36, 36, 36, 36, 36, 36, + 36, 36, 63, 64, 36, 36, 36, 36, 36, 36, 36, 36, 63, 36, 36, 36, + 43, 74, 75, 76, 74, 75, 75, 75, 75, 74, 75, 75, 76, 43, 43, 43, + 61, 61, 2, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 27, 27, 61, + 36, 36, 36, 63, 74, 76, 43, 2, 36, 36, 79, 74, 43, 43, 43, 43, + 74, 74, 76, 43, 43, 43, 74, 75, 75, 76, 43, 43, 43, 43, 43, 43, + 2, 2, 2, 77, 2, 2, 2, 2, 43, 43, 43, 43, 43, 43, 43, 96, + 43, 43, 78, 36, 36, 36, 36, 36, 36, 36, 74, 43, 43, 74, 74, 75, + 75, 74, 78, 36, 36, 36, 36, 36, 86, 61, 61, 61, 61, 47, 43, 43, + 43, 43, 61, 61, 61, 61, 61, 61, 43, 78, 36, 36, 36, 36, 36, 36, + 79, 43, 43, 75, 43, 76, 43, 36, 36, 36, 36, 74, 43, 75, 76, 76, + 43, 75, 75, 75, 75, 75, 2, 2, 36, 36, 75, 75, 75, 75, 43, 43, + 43, 43, 75, 43, 43, 57, 2, 2, 7, 7, 7, 7, 7, 7, 83, 36, + 36, 36, 36, 36, 40, 40, 40, 2, 43, 57, 43, 43, 43, 43, 43, 43, + 74, 43, 43, 43, 64, 36, 63, 36, 36, 36, 64, 79, 43, 36, 36, 36, + 16, 16, 16, 16, 16, 16, 40, 40, 40, 40, 40, 40, 40, 44, 16, 16, + 16, 16, 16, 16, 44, 16, 16, 16, 16, 16, 16, 16, 16, 97, 40, 40, + 32, 32, 32, 16, 16, 16, 16, 32, 16, 16, 16, 16, 11, 11, 11, 11, + 16, 16, 16, 16, 34, 11, 11, 11, 16, 16, 16, 16, 98, 98, 98, 98, + 16, 16, 16, 16, 11, 11, 99,100, 41, 16, 16, 16, 11, 11, 99, 41, + 16, 16, 16, 16, 11, 11,101, 41,102,102,102,102,102,103, 59, 59, + 51, 51, 51, 2,104,105,104,105, 2, 2, 2, 2,106, 59, 59,107, + 2, 2, 2, 2,108,109, 2,110,111, 2,112,113, 2, 2, 2, 2, + 2, 9,111, 2, 2, 2, 2,114, 59, 59, 59, 59, 59, 59, 59, 59, + 115, 40, 27, 27, 27, 8,112,116, 27, 27, 27, 27, 27, 8,112, 91, + 20, 20, 20, 20, 20, 20, 20, 20, 43, 43, 43, 43, 43, 43,117, 48, + 96, 48, 96, 43, 43, 43, 43, 43, 61,118, 61,119, 61, 34, 11, 16, + 11, 32,119, 61, 46, 11, 11, 61, 61, 61,118,118,118, 11, 11,120, + 11, 11, 35, 36, 39, 61, 16, 11, 8, 8, 46, 16, 16, 26, 61,121, + 92, 92, 92, 92, 92, 92, 92, 92, 92,122,123, 92,124, 61, 61, 61, + 8, 8,125, 61, 61, 8, 61, 61,125, 26, 61,125, 61, 61, 61,125, + 61, 61, 61, 61, 61, 61, 61, 8, 61,125,125, 61, 61, 61, 61, 61, + 61, 61, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 61, 61, 61, 61, 4, 4, 61, 61, 8, 61, 61, 61,126,127, 61, 61, + 61, 61, 61, 61, 61, 61,125, 61, 61, 61, 61, 61, 61, 26, 8, 8, + 8, 8, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 8, 8, + 8, 61, 61, 61, 61, 61, 61, 61, 27, 27, 27, 27, 27, 27, 61, 61, + 61, 61, 61, 61, 61, 27, 27, 27, 61, 61, 61, 26, 61, 61, 61, 61, + 26, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 8, 8, 8, 8, + 61, 61, 61, 61, 61, 61, 61, 26, 61, 61, 61, 61, 4, 4, 4, 4, + 4, 4, 4, 27, 27, 27, 27, 27, 27, 27, 61, 61, 61, 61, 61, 61, + 8, 8,112,128, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, + 8,112,129,129,129,129,129,129,129,129,129,129,128, 8, 8, 8, + 8, 8, 8, 8, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 8, + 8, 8,125, 26, 8, 8,125, 61, 32, 11, 32, 34, 34, 34, 34, 11, + 32, 32, 34, 16, 16, 16, 40, 11, 32, 32,121, 61, 61,119, 34,130, + 43, 32, 16, 16, 50, 2, 87, 2, 36, 36, 36, 36, 36, 36, 36, 95, + 2, 2, 2, 2, 2, 2, 2, 56, 2,104,104, 2,108,109,104, 2, + 2, 2, 2, 6, 2, 94,104, 2,104, 4, 4, 4, 4, 2, 2, 77, + 2, 2, 2, 2, 2, 51, 2, 2, 94,131, 2, 2, 2, 2, 2, 2, + 61, 2, 2, 2, 2, 2, 2, 2, 1, 2,132,133, 4, 4, 4, 4, + 4, 61, 4, 4, 4, 4,134, 91,135, 92, 92, 92, 92, 43, 43, 75, + 136, 40, 40, 61, 92,137, 58, 61, 71, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 63,138,139, 62, 36, 36, 36, 36, 36, 58, 40, 62, + 61, 27, 27, 61, 61, 61, 61, 61, 27, 27, 27, 27, 27, 61, 61, 61, + 61, 61, 61, 61, 27, 27, 27, 27,140, 27, 27, 27, 27, 27, 27, 27, + 36, 36, 95, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,141, 2, + 32, 32, 32, 32, 32, 32, 32, 63, 48,142, 43, 43, 43, 43, 43, 77, + 32, 32, 32, 32, 32, 32, 40, 43, 36, 36, 36, 92, 92, 92, 92, 92, + 43, 2, 2, 2, 2, 2, 2, 2, 41, 41, 41,139, 40, 40, 40, 40, + 41, 32, 32, 32, 32, 32, 32, 32, 16, 32, 32, 32, 32, 32, 32, 32, + 44, 16, 16, 16, 34, 34, 34, 32, 32, 32, 32, 32, 42,143, 34, 35, + 32, 32, 16, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 11, 11, 32, + 11, 11, 32, 32, 32, 32, 32, 32, 16, 32, 11, 11, 34, 16, 16, 16, + 16, 16, 34, 35, 40, 35, 36, 36, 36, 64, 36, 64, 36, 63, 36, 36, + 36, 79, 76, 74, 61, 61, 43, 43, 27, 27, 27, 61,144, 61, 61, 61, + 36, 36, 2, 2, 2, 2, 2, 2, 75, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 75, 75, 75, 75, 75, 75, 75, 75, 43, 43, 43, 43, 43, 2, + 43, 36, 36, 36, 2, 65, 65, 63, 36, 36, 36, 43, 43, 43, 43, 2, + 36, 36, 36, 63, 43, 43, 43, 43, 43, 75, 75, 75, 75, 75, 75,145, + 36, 63, 75, 43, 43, 75, 43, 75,145, 2, 2, 2, 2, 2, 2, 77, + 7, 7, 7, 7, 7, 7, 7, 2, 36, 36, 63, 62, 36, 36, 36, 36, + 36, 36, 36, 36, 63, 43, 43, 74, 76, 74, 76, 43, 43, 43, 43, 43, + 36, 63, 36, 36, 36, 36, 74, 75, 7, 7, 7, 7, 7, 7, 2, 2, + 62, 36, 36, 70, 61, 79, 74, 36, 64, 43, 64, 63, 64, 36, 36, 43, + 36, 36, 36, 36, 36, 36, 95, 2, 36, 36, 36, 36, 36, 79, 43, 75, + 2, 95,146, 43, 43, 43, 43, 43, 16, 16, 16, 16, 16,100, 40, 40, + 16, 16, 16, 16, 97, 41, 41, 41, 36, 79, 76, 75, 74,145, 76, 43, + 147,147,147,147,147,147,147,147,148,148,148,148,148,148,148,148, + 16, 16, 16, 16, 16, 16, 35, 64, 36, 36, 36, 36,149, 36, 36, 36, + 36, 41, 41, 41, 41, 41, 41, 41, 41,150, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36,129,151,151,151,151,151,151,151,151, + 36, 36, 36, 36, 36, 36,144, 61, 2, 2, 2,152,113, 2, 2, 2, + 6,153,154,129,129,129,129,129,129,129,113,152,113, 2,110,155, + 2, 2, 2, 2,134,129,129,113, 2,156, 8, 8, 60, 2, 2, 2, + 36, 36, 36, 36, 36, 36, 36,157, 2, 2, 3, 2, 4, 5, 6, 2, + 16, 16, 16, 16, 16, 17, 18,112,113, 4, 2, 36, 36, 36, 36, 36, + 62, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 40, + 20,158, 53, 20, 26, 8,125, 61, 61, 61, 61, 61,159, 59, 61, 61, + 2, 2, 2, 87, 27, 27, 27, 27, 27, 27, 27, 81, 61, 61, 61, 61, + 92, 92,124, 27, 81, 61, 61, 61, 61, 61, 61, 61, 61, 27, 61, 61, + 61, 61, 61, 61, 61, 61, 47, 43,160,160,160,160,160,160,160,160, + 161, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 84, 36, + 133, 36, 36, 36, 36, 92, 92, 92, 36, 36, 36, 36, 36, 36, 36, 58, + 162, 92, 92, 92, 92, 92, 92, 92, 36, 36, 36, 58, 27, 27, 27, 27, + 36, 36, 36, 70,140, 27, 27, 27, 36, 36, 36,163, 27, 27, 27, 27, + 36, 36, 36, 36, 36,163, 27, 27, 36, 36, 36, 27, 27, 27, 27, 30, + 36, 36, 36, 36, 36, 36, 27, 36, 63, 43, 43, 43, 43, 43, 43, 43, + 36, 36, 36, 36, 43, 43, 43, 43, 36, 36, 36, 36, 36, 36,163, 30, + 36, 36, 36, 36, 36, 36,163, 27, 36, 36, 36, 36, 71, 36, 36, 36, + 36, 36, 63, 43, 43,161, 27, 27, 36, 36, 36, 36, 58, 2, 2, 2, + 36, 36, 36, 36, 27, 27, 27, 27, 16, 16, 16, 16, 16, 27, 27, 27, + 36, 36, 43, 43, 43, 43, 43, 43, 36, 36, 36, 36, 36, 63,164, 51, + 27, 27, 27, 84, 36, 36, 36, 36,161, 27, 30, 2, 2, 2, 2, 2, + 36, 36,163, 27, 27, 27, 27, 27, 76, 78, 36, 36, 36, 36, 36, 36, + 43, 43, 43, 57, 2, 2, 2, 2, 2, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,165, + 75, 76, 43, 74, 76, 57, 72, 2, 2, 2, 2, 2, 2, 2, 72, 59, + 36, 36, 36, 63, 43, 43, 76, 43, 43, 43, 43, 7, 7, 7, 7, 7, + 2, 2, 79, 78, 36, 36, 36, 36, 36, 63, 2, 36, 36, 36, 36, 36, + 36, 79, 75, 43, 43, 43, 43, 74, 78, 36, 58, 2, 56, 43, 57, 76, + 7, 7, 7, 7, 7, 58, 58, 2, 87, 27, 27, 27, 27, 27, 27, 27, + 36, 36, 36, 36, 36, 36, 75, 76, 43, 75, 74, 43, 2, 2, 2, 43, + 36, 36, 36, 36, 36, 36, 36, 63, 74, 75, 75, 75, 75, 75, 75, 75, + 36, 36, 36, 79, 75, 75, 78, 36, 36, 75, 75, 43, 43, 43, 43, 43, + 36, 36, 79, 75, 43, 43, 43, 43, 75, 43, 74, 64, 36, 58, 2, 2, + 7, 7, 7, 7, 7, 2, 2, 64, 75, 76, 43, 43, 74, 74, 75, 76, + 74, 43, 36, 65, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 79, + 75, 43, 43, 43, 75, 75, 43, 76, 57, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 36, 36, 43, 43, 75, 76, 43, 43, 43, 74, 76, 76, + 57, 2, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 63, 76, 75, + 43, 43, 43, 76, 36, 36, 36, 36, 75, 43, 43, 76, 43, 43, 43, 43, + 7, 7, 7, 7, 7, 27, 2, 86, 43, 43, 43, 43, 76, 57, 2, 2, + 27, 27, 27, 27, 27, 27, 27, 84, 75, 75, 75, 75, 75, 76, 74, 64, + 78, 76, 2, 2, 2, 2, 2, 2, 79, 75, 43, 43, 43, 43, 75, 75, + 64, 65, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, + 63, 43, 43, 43, 43, 64, 36, 36, 36, 63, 43, 43, 74, 63, 43, 57, + 2, 2, 2, 56, 43, 43, 43, 43, 63, 43, 43, 74, 76, 43, 36, 36, + 36, 36, 36, 36, 36, 43, 43, 43, 43, 43, 43, 74, 43, 2, 65, 2, + 43, 43, 43, 43, 43, 43, 43, 76, 58, 2, 2, 2, 2, 2, 2, 2, + 2, 36, 36, 36, 36, 36, 36, 36, 43, 43, 43, 43, 74, 43, 43, 43, + 74, 43, 76, 43, 43, 43, 43, 43, 43, 43, 43, 63, 43, 43, 43, 43, + 36, 36, 36, 36, 36, 75, 75, 75, 43, 74, 76, 76, 36, 36, 36, 36, + 36, 63, 74,145, 2, 2, 2, 2, 27, 27, 81, 61, 61, 61, 53, 20, + 144, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 21, + 43, 43, 57, 2, 2, 2, 2, 2, 43, 43, 43, 57, 2, 2, 61, 61, + 40, 40, 86, 61, 61, 61, 61, 61, 7, 7, 7, 7, 7,166, 27, 27, + 27, 84, 36, 36, 36, 36, 36, 36, 27, 27, 27, 30, 2, 2, 2, 2, + 79, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 76, + 43, 67, 40, 40, 40, 40, 40, 40, 40, 77, 43, 43, 43, 43, 43, 43, + 36, 36, 36, 36, 36, 36, 47, 57, 61, 61,167, 76, 43, 61,167, 75, + 75,168, 59, 59, 59, 73, 43, 43, 43, 69, 47, 43, 43, 43, 61, 61, + 61, 61, 61, 61, 61, 43, 43, 61, 61, 43, 69, 61, 61, 61, 61, 61, + 11, 11, 11, 11, 11, 16, 16, 16, 16, 16, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 16, 11, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 11, 11, 11, 11, 11, 16, 16, 16, 16, 16, + 31, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 33, 16, 16, + 16, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 31, 16, 16, + 16, 16, 33, 16, 16, 16, 11, 11, 11, 11, 31, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 33, 16, 16, 16, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 31, 16, 16, 16, 16, 33, 16, 16, 16, + 11, 11, 11, 11, 31, 16, 16, 16, 16, 33, 16, 16, 16, 32, 16, 7, + 43, 43, 43, 69, 61, 47, 43, 43, 43, 43, 43, 43, 43, 43, 69, 61, + 61, 61, 47, 61, 61, 61, 61, 61, 61, 61, 69, 21, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 56, 43, 43, 43, 43, 43, 67, 40, 40, 40, 40, + 7, 7, 7, 7, 7, 7, 7, 70, 36, 36, 36, 36, 36, 36, 43, 43, + 7, 7, 7, 7, 7, 7, 7,169, 16, 16, 43, 43, 43, 67, 40, 40, + 27, 27, 27, 27, 27, 27,140, 27,170, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27,140, 27, 27, 27, 27, 27, 27, 81, 61, + 61, 61, 61, 61, 61, 25, 41, 41, 0, 0, 29, 21, 21, 21, 23, 21, + 22, 18, 21, 25, 21, 17, 13, 13, 25, 25, 25, 21, 21, 9, 9, 9, + 9, 22, 21, 18, 24, 16, 24, 5, 5, 5, 5, 22, 25, 18, 25, 0, + 23, 23, 26, 21, 24, 26, 7, 20, 25, 1, 26, 24, 26, 25, 15, 15, + 24, 15, 7, 19, 15, 21, 9, 25, 9, 5, 5, 25, 5, 9, 5, 7, + 7, 7, 9, 8, 8, 5, 7, 5, 6, 6, 24, 24, 6, 24, 12, 12, + 6, 5, 9, 21, 25, 9, 26, 12, 11, 11, 9, 6, 5, 21, 17, 17, + 17, 26, 26, 23, 23, 12, 17, 12, 21, 12, 12, 21, 7, 21, 1, 1, + 21, 23, 26, 26, 6, 7, 7, 12, 12, 7, 21, 7, 12, 1, 12, 6, + 6, 12, 12, 26, 7, 26, 26, 7, 21, 1, 1, 12, 12, 10, 10, 10, + 10, 12, 21, 6, 10, 7, 7, 10, 23, 7, 15, 26, 13, 21, 13, 7, + 15, 7, 12, 23, 21, 26, 21, 15, 17, 7, 29, 7, 7, 22, 18, 18, + 14, 14, 14, 7, 17, 21, 7, 6, 11, 12, 5, 6, 8, 8, 8, 24, + 5, 24, 9, 24, 29, 29, 29, 1, 20, 19, 22, 20, 27, 28, 1, 29, + 21, 20, 19, 21, 21, 16, 16, 21, 25, 22, 18, 21, 21, 29, 15, 6, + 18, 6, 12, 11, 9, 26, 26, 9, 26, 5, 5, 26, 14, 9, 5, 14, + 14, 15, 25, 26, 26, 22, 18, 26, 18, 25, 18, 22, 5, 12, 22, 21, + 26, 6, 7, 14, 17, 22, 26, 14, 17, 6, 14, 6, 12, 24, 24, 6, + 26, 15, 6, 21, 11, 21, 24, 9, 23, 26, 10, 21, 6, 10, 4, 4, + 3, 3, 7, 25, 24, 7, 22, 22, 21, 22, 17, 16, 16, 22, 16, 16, + 25, 17, 7, 1, 25, 24, 26, 1, 2, 2, 12, 15, 21, 14, 7, 15, + 12, 17, 13, 12, 13, 15, 26, 10, 10, 1, 13, 23, 23, 15, 0, 1, + 2, 3, 4, 5, 6, 7, 8, 9, 0, 10, 11, 12, 13, 0, 14, 0, + 0, 0, 0, 0, 15, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 17, 18, 19, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 20, 0, 21, 22, 23, + 0, 0, 0, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 34, 0, 35, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 36, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 37, 38, 0, 0, 0, 0, 0, 0, 39, 40, + 0, 0, 41, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 2, 0, 0, 0, 0, 3, 0, 0, 0, 4, 5, 6, 7, 0, 8, + 9, 10, 0, 11, 12, 13, 0, 14, 15, 16, 15, 17, 15, 18, 15, 18, + 15, 18, 0, 18, 0, 19, 15, 18, 20, 18, 0, 21, 22, 23, 24, 25, + 26, 27, 28, 29, 30, 0, 31, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 32, 0, 0, 0, 0, 0, 0, 33, 0, 0, 34, 0, 0, 35, 0, + 36, 0, 0, 0, 37, 38, 39, 40, 41, 42, 43, 44, 45, 0, 0, 46, + 0, 0, 0, 47, 0, 0, 0, 48, 0, 0, 0, 0, 0, 0, 0, 49, + 0, 50, 0, 51, 52, 0, 53, 0, 0, 0, 0, 0, 0, 54, 55, 56, + 0, 0, 0, 0, 57, 0, 0, 58, 59, 60, 61, 62, 0, 0, 63, 64, + 0, 0, 0, 65, 0, 0, 0, 0, 66, 0, 0, 0, 67, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 68, 0, 0, 0, 69, + 0, 70, 0, 0, 71, 0, 0, 72, 0, 0, 0, 0, 0, 0, 0, 0, + 73, 0, 0, 0, 0, 0, 74, 0, 0, 75, 0, 0, 0, 76, 77, 0, + 78, 61, 0, 79, 80, 0, 0, 81, 82, 83, 0, 0, 0, 84, 0, 85, + 0, 0, 50, 86, 50, 0, 87, 0, 88, 0, 0, 0, 77, 0, 0, 0, + 89, 90, 0, 91, 92, 93, 94, 0, 0, 0, 0, 0, 50, 0, 0, 0, + 0, 95, 96, 0, 0, 0, 0, 97, 98, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 99, 0, 0,100, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0,101,102, 0, 0,103, 0, 0, 0, 0, 0, 0,104, 0, 0, 0, + 98, 0, 0, 0, 0, 0, 0,105, 0, 0, 0, 0, 0, 0, 0,106, + 0,107, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 0, + 8, 0, 0, 0, 0, 9, 10, 11, 12, 0, 0, 0, 0, 13, 0, 0, + 14, 15, 0, 16, 0, 17, 18, 0, 0, 19, 0, 20, 21, 0, 0, 0, + 0, 0, 22, 23, 0, 24, 25, 0, 0, 26, 0, 0, 0, 27, 28, 29, + 0, 0, 0, 30, 31, 32, 0, 0, 31, 0, 0, 33, 31, 0, 0, 0, + 31, 34, 0, 0, 0, 0, 0, 35, 36, 0, 0, 0, 0, 0, 0, 37, + 38, 0, 0, 0, 0, 0, 0, 39, 40, 0, 0, 0, 0, 41, 0, 42, + 0, 0, 0, 43, 44, 0, 0, 0, 45, 0, 0, 0, 0, 0, 0, 46, + 47, 0, 0, 0, 0, 48, 0, 0, 0, 49, 0, 49, 0, 50, 0, 0, + 0, 0, 51, 0, 0, 0, 0, 52, 0, 53, 0, 0, 0, 0, 54, 55, + 0, 0, 0, 56, 57, 0, 0, 0, 0, 0, 0, 58, 49, 0, 59, 60, + 0, 0, 61, 0, 0, 0, 62, 63, 0, 0, 0, 64, 0, 65, 66, 67, + 68, 69, 1, 70, 0, 71, 72, 73, 0, 0, 74, 75, 0, 0, 0, 76, + 0, 0, 1, 1, 0, 0, 77, 0, 0, 78, 0, 0, 0, 0, 74, 79, + 0, 80, 0, 0, 0, 0, 0, 75, 81, 0, 82, 0, 49, 0, 1, 75, + 0, 0, 83, 0, 0, 84, 0, 0, 0, 0, 0, 85, 54, 0, 0, 0, + 0, 0, 0, 86, 87, 0, 0, 81, 0, 0, 31, 0, 0, 88, 0, 0, + 0, 0, 89, 0, 0, 0, 0, 47, 0, 0, 57, 0, 0, 0, 0, 90, + 91, 0, 0, 92, 0, 0, 93, 0, 0, 0, 94, 0, 0, 0, 95, 0, + 96, 57, 0, 0, 81, 0, 0, 76, 0, 0, 0, 97, 98, 0, 0, 99, + 100, 0, 0, 0, 0, 0, 0,101, 0, 0,102, 0, 0, 0, 0,103, + 31, 0,104,105,106, 33, 0, 0,107, 0, 0, 0,108, 0, 0, 0, + 0, 0, 0,109, 0, 0,110, 0, 0, 0, 0,111, 85, 0, 0, 0, + 0, 0, 54, 0, 0, 0, 0, 49,112, 0, 0, 0, 0,113, 0, 0, + 114, 0, 0, 0, 0,112, 0, 0, 0, 0, 0,115, 0, 0, 0,116, + 0, 0, 0,117, 0,118, 0, 0, 0, 0,119,120,121, 0,122, 0, + 123, 0, 0, 0,124,125,126, 0, 0, 0,127, 0, 0,128, 0, 0, + 129, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 3, 4, + 5, 6, 7, 4, 4, 8, 9, 10, 1, 11, 12, 13, 14, 15, 16, 17, + 18, 1, 1, 1, 19, 1, 0, 0, 20, 21, 22, 1, 23, 4, 21, 24, + 25, 26, 27, 28, 29, 30, 0, 0, 1, 1, 31, 0, 0, 0, 32, 33, + 34, 35, 1, 36, 37, 0, 0, 0, 0, 38, 1, 39, 14, 39, 40, 41, + 42, 0, 0, 0, 43, 36, 44, 45, 21, 45, 46, 0, 0, 0, 19, 1, + 21, 0, 0, 47, 0, 38, 48, 1, 1, 49, 49, 50, 0, 0, 51, 0, + 52, 1, 1, 1, 53, 21, 43, 54, 55, 21, 35, 1, 0, 0, 0, 56, + 0, 0, 0, 57, 58, 59, 0, 0, 0, 0, 0, 60, 0, 61, 0, 0, + 0, 0, 62, 63, 0, 0, 64, 0, 0, 0, 65, 0, 0, 0, 66, 0, + 0, 0, 67, 0, 0, 0, 68, 0, 0, 0, 69, 0, 0, 70, 71, 0, + 72, 73, 74, 75, 76, 77, 0, 0, 0, 78, 0, 0, 0, 79, 80, 0, + 0, 0, 0, 47, 0, 0, 0, 49, 0, 63, 0, 0, 64, 0, 0, 81, + 0, 0, 82, 0, 0, 0, 83, 0, 0, 19, 84, 0, 63, 0, 0, 0, + 0, 49, 1, 85, 1, 54, 15, 86, 84, 0, 0, 0, 0, 56, 0, 0, + 0, 0, 19, 10, 1, 0, 0, 0, 0, 0, 87, 0, 0, 88, 0, 0, + 87, 0, 0, 0, 0, 79, 0, 0, 89, 9, 12, 4, 90, 8, 91, 47, + 0, 59, 50, 0, 21, 1, 21, 92, 93, 1, 1, 1, 1, 94, 95, 96, + 97, 1, 98, 59, 81, 99,100, 4, 59, 0, 0, 0, 0, 0, 0, 19, + 50, 0, 0, 0, 0, 0, 0, 62, 0, 0,101,102, 0, 0,103, 0, + 0, 1, 1, 50, 0, 0, 0, 38, 0, 64, 0, 0, 0, 0, 0, 63, + 0, 0, 52, 69, 62, 0, 0, 0, 79, 0, 0, 0,104,105, 59, 38, + 81, 0, 0, 0, 0, 0, 0,106, 1, 14, 4, 12, 0, 38, 89, 0, + 0, 0, 0,107, 0, 0,108, 62, 0,109, 0, 0, 0, 1, 0, 0, + 0, 0, 19, 59, 0,110, 14, 54, 0, 0,111, 0, 89, 0, 0, 0, + 62, 63, 0, 0, 63, 0, 88, 0, 0,111, 0, 0, 0, 0,112, 0, + 0, 0, 79, 56, 0, 38, 1, 59, 1, 59, 0, 0, 64, 88, 0, 0, + 113, 0, 0, 0, 56, 0, 0, 0, 0,113, 0, 0, 0, 0, 62, 0, + 0, 0, 0, 80, 0, 62, 0, 0, 0, 0, 57, 0, 88,114, 0, 0, + 8, 91, 0, 0, 1, 89, 0, 0,115, 0, 0, 0, 0, 0, 0,116, + 0,117,118,119,120, 0, 52, 4,121, 49, 23, 0, 0, 0, 38, 50, + 38, 59, 0, 0, 1, 89, 1, 1, 1, 1, 39, 1, 48,104, 89, 0, + 0, 0, 0, 1, 4,121, 0, 0, 0, 1,122, 0, 0, 0, 0, 0, + 230,230,230,230,230,232,220,220,220,220,232,216,220,220,220,220, + 220,202,202,220,220,220,220,202,202,220,220,220, 1, 1, 1, 1, + 1,220,220,220,220,230,230,230,230,240,230,220,220,220,230,230, + 230,220,220, 0,230,230,230,220,220,220,220,230,232,220,220,230, + 233,234,234,233,234,234,233,230, 0, 0, 0,230, 0,220,230,230, + 230,230,220,230,230,230,222,220,230,230,220,220,230,222,228,230, + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 19, 20, 21, 22, 0, 23, + 0, 24, 25, 0,230,220, 0, 18, 30, 31, 32, 0, 0, 0, 0, 27, + 28, 29, 30, 31, 32, 33, 34,230,230,220,220,230,220,230,230,220, + 35, 0, 0, 0, 0, 0,230,230,230, 0, 0,230,230, 0,220,230, + 230,220, 0, 0, 0, 36, 0, 0,230,220,230,230,220,220,230,220, + 220,230,220,230,220,230,230, 0, 0,220, 0, 0,230,230, 0,230, + 0,230,230,230,230,230, 0, 0, 0,220,220,220, 0, 0, 0,220, + 230,230, 0,220,230,220,220,220, 27, 28, 29,230, 7, 0, 0, 0, + 0, 9, 0, 0, 0,230,220,230,230, 0, 0, 0, 0, 0,230, 0, + 0, 84, 91, 0, 0, 0, 0, 9, 9, 0, 0, 0, 0, 0, 9, 0, + 103,103, 9, 0,107,107,107,107,118,118, 9, 0,122,122,122,122, + 220,220, 0, 0, 0,220, 0,220, 0,216, 0, 0, 0,129,130, 0, + 132, 0, 0, 0, 0, 0,130,130,130,130, 0, 0,130, 0,230,230, + 9, 0,230,230, 0, 0,220, 0, 0, 0, 0, 7, 0, 9, 9, 0, + 0,230, 0, 0, 0,228, 0, 0, 0,222,230,220,220, 0, 0, 0, + 230, 0, 0,220,230,220, 0,220, 0, 0, 9, 9, 0, 0, 7, 0, + 230,230,230, 0,230, 0, 1, 1, 1, 0, 0, 0,230,234,214,220, + 202,230,230,230,230,230,232,228,228,220, 0,230,233,220,230,220, + 230,230, 1, 1, 1, 1, 1,230, 0, 1, 1,230,220,230, 1, 1, + 0, 0,218,228,232,222,224,224, 0, 8, 8, 0,230, 0,230,230, + 220, 0, 0,230, 0, 0, 26, 0, 0,220, 0,230,230, 1,220, 0, + 0,230,220, 0, 0, 0,220,220, 0, 9, 7, 0, 0, 7, 9, 0, + 0, 0, 9, 7, 9, 9, 0, 0, 6, 6, 0, 0, 0, 0, 1, 0, + 0,216,216, 1, 1, 1, 0, 0, 0,226,216,216,216,216,216, 0, + 220,220,220, 0,230,230, 7, 0, 16, 17, 17, 33, 17, 49, 17, 17, + 84, 97,135,145, 26, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17,177, 0, 1, 2, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 3, + 3, 3, 5, 3, 3, 3, 3, 3, 6, 7, 8, 3, 3, 3, 3, 3, + 9, 10, 11, 12, 13, 3, 3, 3, 3, 3, 3, 3, 3, 14, 3, 15, + 3, 3, 3, 3, 3, 3, 16, 17, 18, 19, 20, 21, 3, 3, 3, 22, + 23, 3, 3, 3, 3, 3, 3, 3, 24, 3, 3, 3, 3, 3, 3, 3, + 3, 25, 3, 3, 26, 27, 0, 1, 0, 0, 0, 0, 0, 1, 0, 2, + 0, 0, 0, 3, 0, 0, 0, 3, 0, 0, 0, 0, 0, 4, 0, 5, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, + 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, + 9, 0, 0, 0, 0, 0, 0, 9, 0, 9, 0, 0, 0, 0, 0, 0, + 0, 10, 11, 12, 13, 0, 0, 14, 15, 16, 6, 0, 17, 18, 19, 19, + 19, 20, 21, 22, 23, 24, 19, 25, 0, 26, 27, 19, 19, 28, 29, 30, + 0, 31, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 19, 28, 0, + 32, 33, 9, 34, 35, 19, 0, 0, 36, 37, 38, 39, 40, 19, 0, 41, + 42, 43, 44, 31, 0, 1, 45, 42, 0, 0, 0, 0, 0, 32, 14, 14, + 0, 0, 0, 0, 14, 0, 0, 46, 47, 47, 47, 47, 48, 49, 47, 47, + 47, 47, 50, 51, 52, 53, 43, 21, 0, 0, 0, 0, 0, 0, 0, 54, + 6, 55, 0, 14, 19, 1, 0, 0, 0, 19, 56, 31, 0, 0, 0, 0, + 0, 0, 0, 57, 14, 0, 0, 0, 0, 1, 0, 2, 0, 0, 0, 3, + 0, 0, 0, 58, 59, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, + 0, 0, 2, 3, 0, 4, 5, 0, 0, 6, 0, 0, 0, 7, 0, 0, + 0, 1, 1, 0, 0, 8, 9, 0, 8, 9, 0, 0, 0, 0, 8, 9, + 10, 11, 12, 0, 0, 0, 13, 0, 0, 0, 0, 14, 15, 16, 17, 0, + 0, 0, 1, 0, 0, 18, 19, 0, 0, 0, 20, 0, 0, 0, 1, 1, + 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 8, 21, 9, 0, 0, + 22, 0, 0, 0, 0, 1, 0, 23, 24, 25, 0, 0, 26, 0, 0, 0, + 8, 21, 27, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 28, 29, 30, + 0, 31, 32, 20, 1, 1, 0, 0, 0, 8, 21, 9, 1, 4, 5, 0, + 0, 0, 33, 9, 0, 1, 1, 1, 0, 8, 21, 21, 21, 21, 34, 1, + 35, 21, 21, 21, 9, 36, 0, 0, 37, 38, 1, 0, 39, 0, 0, 0, + 1, 0, 1, 0, 0, 0, 0, 8, 21, 9, 1, 0, 0, 0, 40, 0, + 8, 21, 21, 21, 21, 21, 21, 21, 21, 9, 0, 1, 1, 1, 1, 8, + 21, 21, 21, 9, 0, 0, 0, 41, 0, 42, 43, 0, 0, 0, 1, 44, + 0, 0, 0, 45, 8, 9, 1, 0, 1, 0, 1, 1, 8, 21, 21, 9, + 0, 4, 5, 8, 9, 1, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, + 7, 8, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 9, 10, 11, 11, + 11, 11, 11, 12, 12, 12, 12, 13, 14, 15, 16, 17, 18, 12, 19, 12, + 20, 12, 12, 12, 12, 21, 22, 22, 22, 23, 12, 12, 12, 12, 24, 25, + 12, 12, 26, 27, 28, 29, 30, 31, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 32, 12, 33, 7, 7, 34, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 35, 0, 0, 1, 2, 2, 2, 3, + 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 32, 33, 33, + 33, 34, 35, 35, 35, 35, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, + 45, 46, 47, 48, 49, 50, 2, 2, 51, 51, 52, 53, 54, 55, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 57, 57, 56, 56, 56, 56, + 56, 56, 58, 59, 60, 61, 56, 62, 62, 63, 64, 65, 66, 67, 68, 69, + 70, 56, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, + 62, 62, 62, 62, 62, 71, 62, 62, 62, 62, 72, 72, 72, 72, 72, 72, + 72, 72, 72, 73, 74, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, + 85, 86, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 87, 87, 87, 87, 87, 87, + 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 62, 62, 62, 62, + 88, 89, 89, 89, 90, 89, 91, 92, 93, 94, 95, 95, 96, 97, 87, 98, + 99,100,101,102,103, 87,104,104,104, 87,105,106,107,108,109,110, + 111,112,113,114,115, 87, 89,116,117,118,119,120,121,122,123,124, + 125, 87,126,127, 87,128,129,130,131, 87,132,133,134,135,136,137, + 87, 87,138,139,140,141, 87,142, 87,143,144,144,144,144,144,144, + 144,144,144,144,144, 87, 87, 87, 87, 87,145,145,145,145,145,145, + 145,145,145, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87,146,146,146,146,146, 87, 87, 87,147,147,147,147,148,149, + 150,150, 87, 87, 87, 87,151,151,152,153,154,154,154,154,154,154, + 154,154,154,154,154,154,154,154,154,154,154,154,154,154,154,154, + 155,155,155,155,154, 87, 87, 87, 87, 87,156,157,158,159,159,159, + 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, + 87, 87,160,161, 87, 87, 87, 87, 87, 87, 56, 56,162,163, 51, 56, + 56, 87, 56, 56, 56, 56, 56, 56, 56, 56,164,164,164,164,164,164, + 87, 87, 87, 87, 87, 87, 87, 87, 87, 87,165, 87,166, 87, 87,167, + 87, 87, 87, 87, 87, 87, 87, 87, 87, 87,168,168,169, 87, 87, 87, + 87, 87, 56, 56, 56, 87, 89, 89, 87, 87, 56, 56, 56, 56,170, 87, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 87, 87, 87, 87, 87, 87, 87, 87, 62, 62, 62, 62, 62, 62, + 62, 62, 87, 87, 87, 87, 87, 87, 87, 87, 62, 62, 62, 62, 62, 87, + 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 62, 62, 62, 62, 62, 62, + 62, 87, 87, 87, 87, 87, 87, 87, 87, 87, 56, 87,171,171, 0, 1, + 2, 2, 0, 1, 2, 2, 2, 3, 4, 5, 0, 0, 0, 0, 1, 2, + 1, 2, 0, 0, 3, 3, 4, 5, 4, 5, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 6, 0, 0, 7, 0, 8, 8, 8, 8, 8, 8, + 8, 9, 10, 11, 11, 11, 11, 11, 12, 11, 13, 13, 13, 13, 13, 13, + 13, 13, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 15, 16, 16, + 16, 16, 16, 17, 18, 18, 18, 18, 18, 18, 19, 20, 21, 21, 22, 23, + 21, 24, 21, 21, 21, 21, 21, 25, 21, 21, 26, 26, 26, 26, 26, 21, + 21, 21, 27, 27, 27, 27, 28, 28, 28, 28, 29, 29, 29, 29, 30, 30, + 26, 26, 21, 21, 21, 21, 21, 21, 31, 21, 32, 32, 32, 32, 32, 33, + 34, 32, 35, 35, 35, 35, 35, 35, 35, 35, 36, 36, 36, 36, 36, 36, + 36, 36, 37, 37, 37, 37, 37, 37, 37, 37, 38, 38, 38, 38, 38, 38, + 38, 38, 39, 39, 39, 39, 39, 39, 39, 39, 40, 40, 40, 40, 40, 40, + 40, 40, 41, 41, 41, 41, 41, 41, 41, 41, 42, 42, 42, 42, 42, 42, + 42, 42, 43, 43, 43, 43, 43, 43, 43, 43, 44, 44, 44, 45, 44, 44, + 44, 44, 46, 46, 46, 46, 46, 46, 46, 46, 47, 47, 47, 47, 47, 47, + 47, 47, 47, 47, 47, 47, 47, 48, 47, 47, 49, 49, 49, 49, 49, 49, + 49, 49, 49, 49, 50, 50, 50, 50, 50, 51, 52, 52, 52, 52, 52, 52, + 52, 52, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 54, 54, 54, 54, + 54, 54, 55, 55, 55, 55, 55, 55, 55, 55, 56, 56, 57, 57, 57, 57, + 58, 57, 59, 59, 60, 61, 62, 62, 63, 63, 64, 64, 64, 64, 64, 64, + 64, 64, 65, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 55, 55, 55, + 55, 55, 67, 67, 67, 67, 67, 68, 68, 68, 69, 69, 69, 69, 69, 69, + 64, 64, 70, 70, 71, 71, 71, 71, 71, 71, 71, 71, 71, 8, 8, 8, + 8, 8, 72, 72, 72, 72, 72, 72, 72, 72, 73, 73, 73, 73, 74, 74, + 74, 74, 75, 75, 75, 75, 75, 76, 76, 76, 13, 50, 50, 50, 73, 77, + 78, 79, 4, 4, 80, 4, 4, 81, 82, 83, 4, 4, 4, 84, 8, 8, + 8, 8, 11, 11, 11, 11, 11, 11, 11, 11, 85, 0, 0, 0, 0, 0, + 0, 86, 0, 4, 0, 0, 0, 8, 8, 8, 0, 0, 87, 88, 89, 0, + 4, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 90, 90, 90, 90, 90, 90, 90, 90, 91, 91, 91, 91, 91, 91, + 4, 4, 92, 92, 92, 92, 92, 92, 92, 92, 50, 50, 50, 93, 93, 93, + 93, 93, 53, 53, 53, 53, 53, 53, 13, 13, 94, 94, 94, 94, 94, 94, + 94, 94, 94, 94, 94, 94, 94, 94, 94, 0, 95, 0, 96, 97, 98, 99, + 99, 99, 99,100,101,102,102,102,102,103,104,104,104,105, 52, 52, + 52, 52, 52, 0,104,104, 0, 0, 0,102, 52, 52, 0, 0, 0, 0, + 52,106, 0, 0, 0, 0, 0,102,102,107,102,102,102,102,102,108, + 0, 0, 94, 94, 94, 94, 0, 0, 0, 0,109,109,109,109,109,109, + 109,109,109,109,109,109,109,110,110,110,111,111,111,111,111,111, + 111,111,111,111,111,111, 13, 13, 13, 13, 13, 13,112,112,112,112, + 112,112, 0, 0,113, 4, 4, 4, 4, 4,114, 4, 4, 4, 4, 4, + 4, 4,115,115,115, 0,116,116,116,116,117,117,117,117,117,117, + 32, 32,118,118,119,120,120,120, 52, 52,121,121,121,121,122,121, + 49, 49,123,123,123,123,123,123, 49, 49,124,124,124,124,124,124, + 125,125, 53, 53, 53, 4, 4,126,127, 54, 54, 54, 54, 54,125,125, + 125,125,128,128,128,128,128,128,128,128, 4,129, 18, 18, 18, 21, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,130, 0, 21, + 21, 21, 8, 0,131, 0, 0, 0, 0, 21, 21, 21, 21, 21, 21, 21, + 21,132, 0, 0, 1, 2, 1, 2,133,101,102,134, 52, 52, 52, 52, + 0, 0,135,135,135,135,135,135,135,135, 0, 0, 0, 0, 11, 11, + 11, 11, 11, 0, 11, 11, 11, 0, 0,136,137,137,138,138,138,138, + 139, 0,140,140,140,141,141,142,142,142,143,143,144,144,144,144, + 144,144,145,145,145,145,145,146,146,146,147,147,147,148,148,148, + 148,148,149,149,149,150,150,150,150,150,151,151,151,151,151,151, + 151,151,152,152,152,152,153,153,154,154,155,155,155,155,155,155, + 156,156,157,157,158,158,158,158,158,158,159,159,160,160,160,160, + 160,160,161,161,161,161,161,161,162,162,163,163,163,163,164,164, + 164,164,165,165,165,165,166,166,167,167,168,168,168,168,168,168, + 168,168,169,169,169,169,169,169,169,169,170,170,170,170,170,170, + 170,170,171,171,171,171,171,171,171,171,172,172,172,172,172,172, + 172,172,173,173,173,174,174,174,174,174,175,175,175,175,175,175, + 176,176,177,177,177,177,177,177,177,177,178,178,178,178,178,179, + 179,179,180,180,180,180,180,181,181,181,182,182,182,182,182,182, + 183, 43,184,184,184,184,184,184,184,184,185,185,185,186,186,186, + 186,186,187,187,187,188,187,187,187,187,189,189,189,189,189,189, + 189,189,190,190,190,190,190,190,190,190,191,191,191,191,191,191, + 191,191,192,192,192,192,192,192, 66, 66,193,193,193,193,193,193, + 193,193,194,194,194,194,194,194,194,194,195,195,195,195,195,195, + 195,195,196,196,196,196,196,196,196,196,197,197,197,197,197,197, + 197,197,198,198,198,198,198,198,198,198,199,199,199,199,199,200, + 200,200,200,200,200,200,201,201,201,201,202,202,202,202,202,202, + 202,203,203,203,203,203,203,203,203,203,204,204,204,204,204,204, + 205,205,205,205,205,205,205,205,205,205,206,206,206,206,206,206, + 206,206,110,110,110,110, 39, 39, 39, 39,207,207,207,207,207,207, + 207,207,208,208,208,208,208,208,208,208,209,209,209,209,209,209, + 209,209,112,112,112,112,112,112,112,112,112,112,112,112,210,210, + 210,210,211,211,211,211,211,211,211,211,212,212,212,212,212,212, + 212,212,213,213,213,213,213,213,213,213,214,214,214,214,214,214, + 214,214,214,214,214,214,214,214,215, 94,216,216,216,216,216,216, + 216,216,217,217,217,217,217,217,217,217,218, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, + 219,220,220,220,220,220,220,220,220,220,221,221,221,221,221,221, + 221,221,221,221, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 222,223,224, 0,225, 0, 0, 0, 0, 0,226,226,226,226,226,226, + 226,226, 91, 91, 91, 91, 91, 91, 91, 91,227,227,227,227,227,227, + 227,227,228,228,228,228,228,228,228,228,229,229,229,229,229,229, + 229,229,230,230,230,230,230,230,230,230,231, 0, 0, 0, 0, 0, + 0, 0, 8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 1, 2, + 2, 2, 2, 2, 3, 0, 0, 0, 4, 0, 2, 2, 2, 2, 2, 3, + 2, 2, 2, 2, 5, 0, 2, 5, 6, 0, 7, 7, 7, 7, 8, 9, + 8, 10, 8, 11, 8, 8, 8, 8, 8, 8, 12, 13, 13, 13, 14, 14, + 14, 14, 14, 15, 14, 14, 16, 17, 17, 17, 17, 17, 17, 17, 18, 19, + 19, 19, 19, 19, 19, 19, 20, 21, 20, 22, 20, 20, 23, 23, 20, 20, + 20, 20, 22, 20, 24, 7, 7, 25, 20, 20, 26, 20, 20, 20, 20, 20, + 20, 21, 27, 27, 27, 27, 28, 28, 28, 28, 29, 29, 29, 29, 30, 30, + 30, 30, 31, 31, 31, 31, 32, 20, 20, 20, 33, 33, 33, 33, 34, 35, + 33, 33, 33, 36, 33, 33, 37, 37, 37, 37, 38, 38, 38, 38, 39, 39, + 39, 39, 40, 40, 40, 40, 41, 41, 41, 41, 42, 42, 42, 42, 43, 43, + 43, 43, 44, 44, 44, 44, 45, 45, 45, 45, 46, 46, 46, 46, 46, 46, + 46, 47, 48, 48, 48, 48, 49, 49, 49, 49, 49, 50, 51, 49, 52, 52, + 52, 52, 53, 53, 53, 53, 53, 53, 54, 53, 55, 55, 55, 55, 56, 56, + 56, 56, 57, 57, 57, 57, 58, 58, 58, 58, 59, 59, 59, 59, 60, 60, + 60, 60, 60, 60, 61, 62, 63, 63, 63, 63, 64, 64, 64, 64, 64, 65, + 0, 0, 66, 66, 66, 66, 67, 67, 67, 67, 68, 68, 68, 68, 69, 70, + 71, 71, 71, 71, 71, 71, 72, 72, 72, 72, 73, 73, 73, 73, 74, 74, + 74, 74, 75, 75, 75, 75, 76, 76, 76, 76, 77, 77, 77, 77, 78, 78, + 78, 78, 79, 79, 79, 79, 80, 80, 80, 80, 81, 81, 81, 81, 82, 7, + 7, 7, 83, 7, 84, 85, 0, 84, 86, 0, 2, 87, 88, 2, 2, 2, + 2, 89, 90, 87, 91, 2, 2, 2, 92, 2, 2, 2, 2, 93, 0, 0, + 0, 86, 1, 0, 0, 94, 0, 95, 96, 0, 4, 0, 0, 0, 0, 0, + 0, 4, 97, 97, 97, 97, 98, 98, 98, 98, 13, 13, 13, 13, 99, 99, + 99, 99,100,100,100,100, 0,101, 0, 0,102,100,103,104, 0, 0, + 100, 0,105,106,106,106,106,106,106,106,106,106,107,105,108,109, + 109,109,109,109,109,109,109,109,110,108,111,111,111,111,112, 55, + 55, 55, 55, 55, 55,113,109,109,109,110,109,109, 0, 0,114,114, + 114,114,115,115,115,115,116,116,116,116,117,117,117,117, 96, 2, + 2, 2, 2, 2, 94, 2,118,118,118,118,119,119,119,119,120,120, + 120,120,121,121,121,121,121,121,121,122,123,123,123,123,124,124, + 124,124,124,124,124,125,126,126,126,126,127,127,127,127,128,128, + 128,128, 2, 2, 3, 2, 2,129,130, 0,131,131,131,131,132, 17, + 17, 18, 20, 20, 20,133, 7, 7, 7,134, 20, 20, 20, 23, 0,135, + 109,109,109,109,109,136,137,137,137,137, 0, 0, 0,138,139,139, + 139,139,140,140,140,140, 84, 0, 0, 0,141,141,141,141,142,142, + 142,142,143,143,143,143,144,144,144,144,145,145,145,145,146,146, + 146,146,147,147,147,147,148,148,148,148,149,149,149,149,150,150, + 150,150,151,151,151,151,152,152,152,152,153,153,153,153,154,154, + 154,154,155,155,155,155,156,156,156,156,157,157,157,157,158,158, + 158,158,159,159,159,159,160,160,160,160,161,161,161,161,162,162, + 162,162,163,163,163,163,164,164,164,164,165,165,165,165,166,166, + 166,166,167,167,167,167,168,168,168,168,169,169,169,169,170,170, + 170,170,171,171,171,171,172,172,172,172,173,173,173,173,174,174, + 174,174,175,175,175,175,176,176,176,176,177,177,177,177,178,178, + 178,178,179,179,179,179,180,180,180,180,181,181,181,181,182,182, + 182,182,183,183,183,183,184, 45, 45, 45,185,185,185,185,186,186, + 186,186,187,187,187,187,188,188,188,188,188,188,189,188,190,190, + 190,190,191,191,191,191,192,192,192,192,193,193,193,193,194,194, + 194,194,195,195,195,195,196,196,196,196,197,197,197,197,198,198, + 198,198,199,199,199,199,200,200,200,200,201,201,201,201,202,202, + 202,202,203,203,203,203,204,204,204,204,205,205,205,205,206,206, + 206,206,207,207,207,207,208,208,208,208,209,209,209,209,210,210, + 210,210,211,211,211,211,212,212,212,212,213,213,213,213,214,214, + 214,214,215,215,215,215,216,217,217,217,218,218,218,218,217,217, + 217,217,219,106,106,106,106,109,109,109,220,220,220,220,221,221, + 221,221, 0,222, 86, 0, 0, 0,222, 7, 82,138, 7, 0, 0, 0, + 223, 86,224,224,224,224,225,225,225,225,226,226,226,226,227,227, + 227,227,228,228,228,228,229, 0, 0, 0, 0, 0, 0, 0, 0, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 0, 0, 0, 19, 0, 19, 0, + 0, 0, 0, 0, 26, 26, 1, 1, 1, 1, 9, 9, 9, 9, 0, 9, + 9, 9, 9, 9, 0, 9, 9, 0, 9, 0, 9, 9, 55, 55, 55, 55, + 55, 55, 6, 6, 6, 6, 6, 1, 1, 6, 6, 4, 4, 4, 4, 4, + 4, 4, 4, 14, 14, 14, 14, 14, 14, 14, 3, 3, 3, 3, 3, 0, + 3, 3, 0, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 1, 1, 1, + 3, 3, 1, 3, 3, 3, 37, 37, 37, 37, 38, 38, 38, 38, 64, 64, + 64, 64, 90, 90, 90, 90, 95, 95, 95, 95, 3, 3, 0, 3, 7, 7, + 7, 7, 7, 1, 1, 1, 1, 7, 7, 7, 0, 0, 7, 7, 5, 5, + 5, 5, 11, 11, 11, 11, 10, 10, 10, 10, 21, 21, 21, 21, 22, 22, + 22, 22, 23, 23, 23, 23, 16, 16, 16, 16, 20, 20, 20, 20, 36, 36, + 36, 36, 24, 24, 24, 24, 24, 24, 24, 0, 18, 18, 18, 18, 25, 25, + 25, 25, 25, 0, 0, 0, 0, 25, 25, 25, 33, 33, 33, 33, 8, 8, + 8, 8, 8, 8, 8, 0, 12, 12, 12, 12, 30, 30, 30, 30, 29, 29, + 29, 29, 28, 28, 28, 28, 34, 34, 34, 34, 35, 35, 35, 35, 35, 35, + 35, 0, 0, 0, 35, 35, 45, 45, 45, 45, 44, 44, 44, 44, 44, 0, + 0, 0, 43, 43, 43, 43, 46, 46, 46, 46, 31, 31, 31, 31, 32, 32, + 0, 0, 32, 0, 32, 32, 32, 32, 32, 32, 48, 48, 48, 48, 52, 52, + 52, 52, 58, 58, 58, 58, 54, 54, 54, 54, 91, 91, 91, 91, 62, 62, + 62, 62, 76, 76, 76, 76, 93, 93, 93, 93, 70, 70, 70, 70, 73, 73, + 73, 73, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, + 0, 0, 1, 1, 0, 0, 19, 19, 9, 9, 9, 9, 9, 6, 19, 9, + 9, 9, 9, 9, 19, 19, 9, 9, 9, 19, 6, 19, 19, 19, 19, 19, + 19, 9, 0, 0, 0, 19, 0, 0, 9, 0, 0, 0, 19, 19, 27, 27, + 27, 27, 56, 56, 56, 56, 61, 61, 61, 61, 13, 13, 13, 13, 0, 13, + 0, 13, 0, 13, 13, 13, 13, 13, 1, 1, 1, 1, 12, 12, 0, 15, + 15, 15, 15, 15, 15, 15, 15, 1, 1, 0, 0, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 0, 26, 26, 26, 26, 26, 12, 12, 12, 12, 12, + 12, 0, 39, 39, 39, 39, 86, 86, 86, 86, 77, 77, 77, 77, 79, 79, + 79, 79, 60, 60, 60, 60, 65, 65, 65, 65, 75, 75, 75, 75, 69, 69, + 69, 69, 69, 69, 0, 69, 74, 74, 74, 74, 84, 84, 84, 84, 84, 84, + 84, 0, 68, 68, 68, 68, 92, 92, 92, 92, 87, 87, 87, 87, 19, 9, + 19, 19, 19, 19, 0, 0, 2, 2, 2, 2, 19, 19, 19, 4, 3, 3, + 0, 0, 1, 1, 6, 6, 0, 0, 17, 17, 17, 17, 0, 0, 49, 49, + 49, 49, 0, 1, 1, 1, 71, 71, 71, 71, 67, 67, 67, 67, 42, 42, + 42, 42, 41, 41, 41, 41,118,118,118,118, 53, 53, 53, 53, 59, 59, + 59, 59, 40, 40, 40, 40, 51, 51, 51, 51, 50, 50, 50, 50,135,135, + 135,135,106,106,106,106,104,104,104,104,110,110,110,110, 47, 47, + 47, 47, 81, 81, 81, 81,120,120,120,120,116,116,116,116,128,128, + 128,128, 66, 66, 66, 66, 72, 72, 72, 72, 98, 98, 98, 98, 97, 97, + 97, 97, 57, 57, 57, 57, 88, 88, 88, 88,117,117,117,117,112,112, + 112,112, 78, 78, 78, 78, 83, 83, 83, 83, 82, 82, 82, 82,122,122, + 122,122, 89, 89, 89, 89,130,130,130,130,144,144,144,144,156,156, + 156,156,147,147,147,147,148,148,148,148,153,153,153,153,149,149, + 149,149, 94, 94, 94, 94, 85, 85, 85, 85,101,101,101,101, 96, 96, + 96, 96,111,111,111,111,100,100,100,100,100, 36, 36, 36,108,108, + 108,108,129,129,129,129,109,109,109,109,107,107,107,107,107,107, + 107, 1,137,137,137,137,124,124,124,124,123,123,123,123,114,114, + 114,114,102,102,102,102,126,126,126,126,142,142,142,142,125,125, + 125,125,154,154,154,154,150,150,150,150,141,141,141,141,140,140, + 140,140,121,121,121,121,133,133,133,133,134,134,134,134,138,138, + 138,138,143,143,143,143,145,145,145,145, 63, 63, 63, 63, 80, 80, + 80, 80,127,127,127,127,115,115,115,115,103,103,103,103,119,119, + 119,119,146,146,146,146, 99, 99, 99, 99,136,139, 0, 0,155,155, + 155,155,136,136,136,136, 17, 15, 15, 15,139,139,139,139,105,105, + 105,105, 0, 0, 0, 1, 0, 0, 1, 1,131,131,131,131,151,151, + 151,151,152,152,152,152,113,113,113,113,132,132,132,132, 15, 0, + 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 9, 9, 10, + 9, 11, 12, 13, 9, 9, 9, 14, 9, 9, 15, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 16, 17, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 18, 19, 20, 9, 21, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 22, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 23, 24, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, + 5, 6, 7, 8, 9, 10, 11, 12, 0, 0, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 23, 0, 0, 24, 25, 26, 27, 28, 29, 30, 0, 0, + 31, 32, 0, 33, 0, 34, 0, 35, 0, 0, 0, 0, 36, 37, 38, 39, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 41, 42, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 43, 44, 0, 45, 0, 0, 0, 0, 0, 0, 46, 47, 0, 0, + 0, 0, 0, 48, 0, 49, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 50, 51, 0, 0, 0, 52, 0, 0, 53, 0, 0, 0, + 0, 0, 0, 0, 54, 0, 0, 0, 0, 0, 0, 0, 55, 0, 0, 0, + 0, 0, 0, 0, 56, 0, 0, 0, 0, 0, 0, 0, 0, 57, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 58, 59, 60, 61, 62, 63, 64, 65, 0, 0, 0, 0, + 0, 0, 66, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 67, 68, 0, 69, 70, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, + 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99,100,101,102, + 103, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0,104, 0, 0, 0, 0, 0, 0,105,106, 0,107, 0, 0, 0, + 108, 0,109, 0,110, 0,111,112,113, 0,114, 0, 0, 0,115, 0, + 0, 0,116, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,117, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0,118,119,120,121, 0,122,123,124,125,126, 0,127, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143, + 144,145,146,147,148,149,150,151,152,153,154,155,156,157, 0, 0, + 0,158,159,160,161, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0,162,163, 0, 0, 0, 0, 0, + 0, 0,164, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0,165, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,166, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,167, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0,168, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0,169,170, 0, 0, 0, 0,171,172, 0, 0, 0, + 173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188, + 189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204, + 205,206, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, +}; +static const uint16_t +_hb_ucd_u16[4848] = +{ + 0, 0, 1, 2, 3, 4, 5, 6, 0, 0, 7, 8, 9, 10, 11, 12, + 13, 13, 13, 14, 15, 13, 13, 16, 17, 18, 19, 20, 21, 22, 13, 23, + 13, 13, 13, 24, 25, 11, 11, 11, 11, 26, 11, 27, 28, 29, 30, 31, + 32, 32, 32, 32, 32, 32, 32, 33, 34, 35, 36, 11, 37, 38, 13, 39, + 9, 9, 9, 11, 11, 11, 13, 13, 40, 13, 13, 13, 41, 13, 13, 13, + 13, 13, 13, 35, 9, 42, 11, 11, 43, 44, 32, 45, 46, 47, 47, 48, + 49, 50, 47, 47, 51, 32, 52, 53, 47, 47, 47, 47, 47, 54, 55, 56, + 57, 58, 47, 32, 59, 47, 47, 47, 47, 47, 60, 53, 61, 47, 62, 63, + 47, 64, 65, 66, 47, 67, 47, 47, 47, 47, 47, 47, 47, 68, 69, 32, + 70, 47, 47, 71, 72, 73, 74, 75, 76, 47, 47, 77, 78, 79, 80, 81, + 82, 47, 47, 83, 84, 85, 86, 87, 82, 47, 47, 77, 88, 47, 80, 89, + 90, 47, 47, 91, 92, 93, 80, 94, 95, 47, 47, 96, 97, 98, 99, 100, + 101, 47, 47, 102, 103, 104, 80, 105, 106, 47, 47, 91, 107, 108, 80, 109, + 110, 47, 47, 111, 112, 113, 80, 114, 90, 47, 47, 47, 115, 116, 99, 117, + 47, 47, 47, 118, 119, 120, 66, 66, 47, 47, 47, 121, 122, 123, 47, 47, + 124, 125, 126, 127, 47, 47, 47, 128, 129, 32, 32, 130, 131, 132, 66, 66, + 47, 47, 133, 134, 120, 135, 136, 137, 138, 139, 9, 9, 9, 11, 11, 140, + 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 141, 142, 143, + 47, 144, 9, 9, 9, 9, 9, 145, 146, 47, 47, 47, 47, 47, 47, 47, + 47, 47, 47, 47, 47, 47, 147, 47, 148, 149, 47, 47, 47, 47, 150, 151, + 47, 152, 47, 153, 47, 152, 47, 152, 47, 47, 47, 154, 155, 156, 157, 143, + 158, 157, 47, 47, 159, 47, 47, 47, 160, 47, 161, 47, 47, 47, 47, 47, + 47, 47, 162, 163, 164, 47, 47, 47, 47, 47, 47, 47, 47, 165, 144, 144, + 47, 166, 47, 47, 47, 167, 168, 169, 157, 157, 170, 171, 32, 32, 32, 32, + 172, 47, 47, 173, 174, 120, 175, 176, 177, 47, 178, 61, 47, 47, 179, 180, + 47, 47, 181, 182, 183, 61, 47, 184, 11, 9, 9, 9, 66, 185, 186, 187, + 11, 11, 188, 27, 27, 27, 189, 190, 11, 191, 27, 27, 32, 32, 32, 32, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 192, 13, 13, 13, 13, 13, 13, + 193, 193, 193, 193, 193, 194, 193, 11, 195, 195, 195, 196, 197, 198, 198, 197, + 199, 200, 201, 202, 203, 204, 205, 206, 207, 27, 208, 208, 208, 209, 210, 32, + 211, 212, 213, 214, 215, 143, 216, 216, 217, 218, 219, 144, 220, 221, 144, 222, + 223, 223, 223, 223, 223, 223, 223, 223, 224, 144, 225, 144, 144, 144, 144, 226, + 144, 227, 223, 228, 144, 229, 230, 144, 144, 144, 144, 144, 144, 144, 143, 143, + 143, 231, 144, 144, 144, 144, 232, 143, 144, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 233, 234, 144, 144, 235, 144, 144, 144, 144, 144, 144, 236, 144, + 144, 144, 144, 144, 144, 144, 237, 238, 143, 239, 144, 144, 240, 223, 241, 223, + 242, 243, 223, 223, 223, 244, 223, 245, 144, 144, 144, 223, 246, 144, 144, 144, + 9, 9, 9, 11, 11, 11, 247, 248, 13, 13, 13, 13, 13, 13, 249, 250, + 11, 11, 11, 47, 47, 47, 251, 252, 47, 47, 47, 47, 47, 47, 32, 32, + 253, 254, 255, 256, 257, 258, 66, 66, 259, 260, 261, 262, 263, 47, 47, 47, + 47, 264, 146, 47, 47, 47, 47, 265, 47, 266, 47, 47, 144, 144, 144, 47, + 144, 144, 267, 144, 268, 269, 144, 144, 267, 144, 144, 269, 144, 144, 144, 144, + 47, 47, 47, 47, 144, 144, 144, 144, 47, 270, 47, 47, 47, 47, 47, 47, + 47, 144, 144, 144, 144, 47, 47, 184, 271, 47, 61, 47, 13, 13, 272, 273, + 13, 274, 47, 47, 47, 47, 275, 276, 31, 277, 278, 279, 13, 13, 13, 280, + 281, 282, 283, 284, 285, 11, 11, 286, 287, 47, 288, 289, 47, 47, 47, 290, + 291, 47, 47, 292, 293, 157, 32, 294, 61, 47, 295, 47, 296, 297, 47, 47, + 70, 47, 47, 298, 299, 300, 301, 61, 47, 47, 302, 303, 304, 305, 47, 306, + 47, 47, 47, 307, 58, 308, 309, 310, 47, 47, 47, 11, 11, 311, 312, 11, + 11, 11, 11, 11, 47, 47, 313, 157, 314, 314, 314, 314, 314, 314, 314, 314, + 315, 315, 315, 315, 315, 315, 315, 315, 11, 316, 317, 47, 47, 47, 47, 47, + 47, 47, 47, 318, 31, 319, 47, 47, 47, 47, 47, 320, 321, 47, 47, 47, + 47, 47, 47, 47, 47, 47, 47, 322, 32, 323, 32, 324, 325, 326, 327, 47, + 47, 47, 47, 47, 47, 47, 47, 328, 329, 2, 3, 4, 5, 330, 331, 332, + 47, 333, 47, 47, 47, 47, 334, 335, 336, 143, 143, 337, 216, 216, 216, 338, + 339, 144, 144, 144, 144, 144, 144, 340, 341, 341, 341, 341, 341, 341, 341, 341, + 47, 47, 47, 47, 47, 47, 342, 143, 47, 47, 343, 47, 344, 47, 47, 60, + 47, 345, 47, 47, 47, 346, 216, 216, 9, 9, 145, 11, 11, 47, 47, 47, + 47, 47, 157, 9, 9, 145, 11, 11, 47, 47, 47, 47, 47, 47, 345, 66, + 47, 47, 47, 47, 47, 347, 47, 348, 47, 47, 349, 143, 143, 143, 47, 350, + 47, 351, 47, 345, 66, 66, 66, 66, 47, 47, 47, 352, 143, 143, 143, 143, + 353, 47, 47, 354, 143, 66, 47, 355, 47, 356, 143, 143, 357, 47, 358, 66, + 47, 47, 47, 359, 47, 360, 47, 360, 47, 359, 142, 143, 143, 143, 143, 143, + 9, 9, 9, 9, 11, 11, 11, 361, 47, 47, 362, 157, 157, 157, 157, 157, + 143, 143, 143, 143, 143, 143, 143, 143, 47, 47, 363, 47, 47, 47, 47, 47, + 47, 356, 364, 47, 60, 365, 66, 66, 47, 47, 47, 47, 366, 143, 47, 47, + 367, 47, 47, 354, 368, 369, 370, 371, 177, 47, 47, 372, 373, 47, 47, 157, + 95, 47, 374, 375, 376, 47, 47, 377, 177, 47, 47, 378, 379, 380, 381, 143, + 47, 47, 382, 383, 32, 32, 32, 32, 47, 47, 359, 47, 47, 384, 169, 157, + 90, 47, 47, 111, 385, 386, 387, 32, 47, 47, 47, 388, 389, 390, 47, 47, + 47, 47, 47, 391, 392, 157, 157, 157, 47, 47, 393, 394, 395, 396, 32, 32, + 47, 47, 47, 397, 398, 157, 66, 66, 47, 47, 399, 400, 157, 157, 157, 157, + 47, 141, 401, 402, 144, 144, 144, 144, 47, 47, 382, 403, 66, 66, 66, 66, + 9, 9, 9, 9, 11, 11, 126, 404, 47, 47, 47, 405, 406, 157, 157, 157, + 47, 47, 47, 47, 47, 407, 408, 409, 410, 47, 47, 411, 412, 413, 47, 47, + 414, 415, 66, 66, 47, 47, 47, 47, 47, 47, 393, 416, 417, 126, 143, 418, + 47, 152, 419, 420, 32, 32, 32, 32, 47, 47, 47, 353, 421, 157, 47, 47, + 422, 423, 157, 157, 157, 157, 157, 157, 47, 47, 47, 47, 47, 47, 47, 424, + 47, 47, 47, 47, 143, 425, 426, 427, 216, 216, 216, 216, 216, 216, 216, 66, + 47, 47, 47, 205, 205, 205, 205, 205, 47, 47, 47, 47, 47, 47, 300, 66, + 47, 47, 47, 47, 47, 47, 47, 428, 47, 47, 47, 429, 430, 431, 432, 47, + 9, 9, 9, 9, 9, 9, 11, 11, 143, 433, 66, 66, 66, 66, 66, 66, + 47, 47, 47, 47, 384, 434, 409, 409, 435, 436, 27, 27, 27, 27, 437, 409, + 47, 438, 205, 205, 205, 205, 205, 205, 144, 144, 144, 144, 144, 144, 439, 440, + 441, 144, 442, 144, 144, 144, 144, 144, 144, 144, 144, 144, 443, 144, 144, 144, + 9, 444, 11, 445, 446, 11, 193, 9, 447, 448, 9, 449, 11, 9, 444, 11, + 445, 446, 11, 193, 9, 447, 448, 9, 449, 11, 9, 444, 11, 445, 446, 11, + 193, 9, 447, 448, 9, 449, 11, 9, 444, 11, 193, 9, 450, 451, 452, 453, + 11, 454, 9, 455, 456, 457, 458, 11, 459, 9, 460, 11, 461, 157, 157, 157, + 32, 32, 32, 462, 32, 32, 463, 464, 465, 466, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, 47, 47, 47, 467, 468, 144, 144, 144, + 47, 47, 47, 47, 47, 47, 469, 470, 47, 47, 47, 47, 349, 32, 32, 32, + 9, 9, 447, 11, 471, 300, 66, 66, 143, 143, 472, 473, 143, 143, 143, 143, + 143, 143, 474, 143, 143, 143, 143, 143, 47, 47, 47, 47, 47, 47, 47, 223, + 475, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 476, + 144, 144, 144, 144, 144, 144, 144, 157, 205, 205, 205, 205, 205, 205, 205, 205, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 939, 940, 941, 942, 946, 948, 0, 962, 969, 970, 971, 976,1001,1002,1003,1008, + 0,1033,1040,1041,1042,1043,1047, 0, 0,1080,1081,1082,1086,1110, 0, 0, + 1124,1125,1126,1127,1131,1133, 0,1147,1154,1155,1156,1161,1187,1188,1189,1193, + 0,1219,1226,1227,1228,1229,1233, 0, 0,1267,1268,1269,1273,1298, 0,1303, + 943,1128, 944,1129, 954,1139, 958,1143, 959,1144, 960,1145, 961,1146, 964,1149, + 0, 0, 973,1158, 974,1159, 975,1160, 983,1168, 978,1163, 988,1173, 990,1175, + 991,1176, 993,1178, 994,1179, 0, 0,1004,1190,1005,1191,1006,1192,1014,1199, + 1007, 0, 0, 0,1016,1201,1020,1206, 0,1022,1208,1025,1211,1023,1209, 0, + 0, 0, 0,1032,1218,1037,1223,1035,1221, 0, 0, 0,1044,1230,1045,1231, + 1049,1235, 0, 0,1058,1244,1064,1250,1060,1246,1066,1252,1067,1253,1072,1258, + 1069,1255,1077,1264,1074,1261, 0, 0,1083,1270,1084,1271,1085,1272,1088,1275, + 1089,1276,1096,1283,1103,1290,1111,1299,1115,1118,1307,1120,1309,1121,1310, 0, + 1053,1239, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1093, + 1280, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 949,1134,1010, + 1195,1050,1236,1090,1277,1341,1368,1340,1367,1342,1369,1339,1366, 0,1320,1347, + 1418,1419,1323,1350, 0, 0, 992,1177,1018,1204,1055,1241,1416,1417,1415,1424, + 1202, 0, 0, 0, 987,1172, 0, 0,1031,1217,1321,1348,1322,1349,1338,1365, + 950,1135, 951,1136, 979,1164, 980,1165,1011,1196,1012,1197,1051,1237,1052,1238, + 1061,1247,1062,1248,1091,1278,1092,1279,1071,1257,1076,1263, 0, 0, 997,1182, + 0, 0, 0, 0, 0, 0, 945,1130, 982,1167,1337,1364,1335,1362,1046,1232, + 1422,1423,1113,1301, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 8, 9, 0, 10,1425, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, + 0, 0, 0, 0, 0,1314,1427, 5,1434,1438,1443, 0,1450, 0,1455,1461, + 1514, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1446,1458,1468,1476,1480,1486, + 1517, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1489,1503,1494,1500,1508, 0, + 0, 0, 0,1520,1521, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1526,1528, 0,1525, 0, 0, 0,1522, 0, 0, 0, 0,1536,1532,1539, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0,1534, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0,1556, 0, 0, 0, 0, 0, 0, + 1548,1550, 0,1547, 0, 0, 0,1567, 0, 0, 0, 0,1558,1554,1561, 0, + 0, 0, 0, 0, 0, 0,1568,1569, 0, 0, 0, 0, 0, 0, 0, 0, + 0,1529,1551, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1523,1545,1524,1546, 0, 0,1527,1549, 0, 0,1570,1571,1530,1552,1531,1553, + 0, 0,1533,1555,1535,1557,1537,1559, 0, 0,1572,1573,1544,1566,1538,1560, + 1540,1562,1541,1563,1542,1564, 0, 0,1543,1565, 0, 0, 0, 0, 0, 0, + 0, 0,1606,1607,1609,1608,1610, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1613, 0,1611, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0,1612, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0,1620, 0, 0, 0, 0, 0, 0, + 0,1623, 0, 0,1624, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0,1614,1615,1616,1617,1618,1619,1621,1622, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1628,1629, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1625,1626, 0,1627, + 0, 0, 0,1634, 0, 0,1635, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0,1630,1631,1632, 0, 0,1633, 0, + 0, 0, 0, 0, 0, 0, 0, 0,1639, 0, 0,1638,1640, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1636,1637, 0, 0, + 0, 0, 0, 0,1641, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1642,1644,1643, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0,1645, 0, 0, 0, 0, 0, 0, 0, + 1646, 0, 0, 0, 0, 0, 0,1648,1649, 0,1647,1650, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1651,1653,1652, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1654, 0,1655,1657,1656, 0, + 0, 0, 0,1659, 0, 0, 0, 0, 0, 0, 0, 0, 0,1660, 0, 0, + 0, 0,1661, 0, 0, 0, 0,1662, 0, 0, 0, 0,1663, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0,1658, 0, 0, 0, 0, 0, 0, + 0, 0, 0,1664, 0,1665,1673, 0,1674, 0, 0, 0, 0, 0, 0, 0, + 0,1666, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0,1668, 0, 0, 0, 0, 0, 0, 0, 0, 0,1669, 0, 0, + 0, 0,1670, 0, 0, 0, 0,1671, 0, 0, 0, 0,1672, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0,1667, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0,1675, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0,1676, 0,1677, 0,1678, 0,1679, 0,1680, 0, + 0, 0,1681, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1682, 0,1683, 0, 0, + 1684,1685, 0,1686, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 953,1138, 955,1140, 956,1141, 957,1142,1324,1351, 963,1148, 965,1150, 968,1153, + 966,1151, 967,1152,1378,1380,1379,1381, 984,1169, 985,1170,1420,1421, 986,1171, + 989,1174, 995,1180, 998,1183, 996,1181, 999,1184,1000,1185,1015,1200,1329,1356, + 1017,1203,1019,1205,1021,1207,1024,1210,1687,1688,1027,1213,1026,1212,1028,1214, + 1029,1215,1030,1216,1034,1220,1036,1222,1039,1225,1038,1224,1334,1361,1336,1363, + 1382,1384,1383,1385,1056,1242,1057,1243,1059,1245,1063,1249,1689,1690,1065,1251, + 1068,1254,1070,1256,1386,1387,1388,1389,1691,1692,1073,1259,1075,1262,1079,1266, + 1078,1265,1095,1282,1098,1285,1097,1284,1390,1391,1392,1393,1099,1286,1100,1287, + 1101,1288,1102,1289,1105,1292,1104,1291,1106,1294,1107,1295,1108,1296,1114,1302, + 1119,1308,1122,1311,1123,1312,1186,1260,1293,1305, 0,1394, 0, 0, 0, 0, + 952,1137, 947,1132,1317,1344,1316,1343,1319,1346,1318,1345,1693,1695,1371,1375, + 1370,1374,1373,1377,1372,1376,1694,1696, 981,1166, 977,1162, 972,1157,1326,1353, + 1325,1352,1328,1355,1327,1354,1697,1698,1009,1194,1013,1198,1054,1240,1048,1234, + 1331,1358,1330,1357,1333,1360,1332,1359,1699,1700,1396,1401,1395,1400,1398,1403, + 1397,1402,1399,1404,1094,1281,1087,1274,1406,1411,1405,1410,1408,1413,1407,1412, + 1409,1414,1109,1297,1117,1306,1116,1304,1112,1300, 0, 0, 0, 0, 0, 0, + 1471,1472,1701,1705,1702,1706,1703,1707,1430,1431,1715,1719,1716,1720,1717,1721, + 1477,1478,1729,1731,1730,1732, 0, 0,1435,1436,1733,1735,1734,1736, 0, 0, + 1481,1482,1737,1741,1738,1742,1739,1743,1439,1440,1751,1755,1752,1756,1753,1757, + 1490,1491,1765,1768,1766,1769,1767,1770,1447,1448,1771,1774,1772,1775,1773,1776, + 1495,1496,1777,1779,1778,1780, 0, 0,1451,1452,1781,1783,1782,1784, 0, 0, + 1504,1505,1785,1788,1786,1789,1787,1790, 0,1459, 0,1791, 0,1792, 0,1793, + 1509,1510,1794,1798,1795,1799,1796,1800,1462,1463,1808,1812,1809,1813,1810,1814, + 1467, 21,1475, 22,1479, 23,1485, 24,1493, 27,1499, 28,1507, 29, 0, 0, + 1704,1708,1709,1710,1711,1712,1713,1714,1718,1722,1723,1724,1725,1726,1727,1728, + 1740,1744,1745,1746,1747,1748,1749,1750,1754,1758,1759,1760,1761,1762,1763,1764, + 1797,1801,1802,1803,1804,1805,1806,1807,1811,1815,1816,1817,1818,1819,1820,1821, + 1470,1469,1822,1474,1465, 0,1473,1825,1429,1428,1426, 12,1432, 0, 26, 0, + 0,1315,1823,1484,1466, 0,1483,1829,1433, 13,1437, 14,1441,1826,1827,1828, + 1488,1487,1513, 19, 0, 0,1492,1515,1445,1444,1442, 15, 0,1831,1832,1833, + 1502,1501,1516, 25,1497,1498,1506,1518,1457,1456,1454, 17,1453,1313, 11, 3, + 0, 0,1824,1512,1519, 0,1511,1830,1449, 16,1460, 18,1464, 4, 0, 0, + 30, 31, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 20, 0, 0, 0, 2, 6, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1834,1835, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1836, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1837,1839,1838, + 0, 0, 0, 0,1840, 0, 0, 0, 0,1841, 0, 0,1842, 0, 0, 0, + 0, 0, 0, 0,1843, 0,1844, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0,1845, 0, 0,1846, 0, 0,1847, 0,1848, 0, 0, 0, 0, 0, 0, + 937, 0,1850, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1849, 936, 938, + 1851,1852, 0, 0,1853,1854, 0, 0,1855,1856, 0, 0, 0, 0, 0, 0, + 1857,1858, 0, 0,1861,1862, 0, 0,1863,1864, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1867,1868,1869,1870, + 1859,1860,1865,1866, 0, 0, 0, 0, 0, 0,1871,1872,1873,1874, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 33, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1875, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1877, 0,1878, 0, + 1879, 0,1880, 0,1881, 0,1882, 0,1883, 0,1884, 0,1885, 0,1886, 0, + 1887, 0,1888, 0, 0,1889, 0,1890, 0,1891, 0, 0, 0, 0, 0, 0, + 1892,1893, 0,1894,1895, 0,1896,1897, 0,1898,1899, 0,1900,1901, 0, 0, + 0, 0, 0, 0,1876, 0, 0, 0, 0, 0, 0, 0, 0, 0,1902, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1904, 0,1905, 0, + 1906, 0,1907, 0,1908, 0,1909, 0,1910, 0,1911, 0,1912, 0,1913, 0, + 1914, 0,1915, 0, 0,1916, 0,1917, 0,1918, 0, 0, 0, 0, 0, 0, + 1919,1920, 0,1921,1922, 0,1923,1924, 0,1925,1926, 0,1927,1928, 0, 0, + 0, 0, 0, 0,1903, 0, 0,1929,1930,1931,1932, 0, 0, 0,1933, 0, + 710, 385, 724, 715, 455, 103, 186, 825, 825, 242, 751, 205, 241, 336, 524, 601, + 663, 676, 688, 738, 411, 434, 474, 500, 649, 746, 799, 108, 180, 416, 482, 662, + 810, 275, 462, 658, 692, 344, 618, 679, 293, 388, 440, 492, 740, 116, 146, 168, + 368, 414, 481, 527, 606, 660, 665, 722, 781, 803, 809, 538, 553, 588, 642, 758, + 811, 701, 233, 299, 573, 612, 487, 540, 714, 779, 232, 267, 412, 445, 457, 585, + 594, 766, 167, 613, 149, 148, 560, 589, 648, 768, 708, 345, 411, 704, 105, 259, + 313, 496, 518, 174, 542, 120, 307, 101, 430, 372, 584, 183, 228, 529, 650, 697, + 424, 732, 428, 349, 632, 355, 517, 110, 135, 147, 403, 580, 624, 700, 750, 170, + 193, 245, 297, 374, 463, 543, 763, 801, 812, 815, 162, 384, 420, 730, 287, 330, + 337, 366, 459, 476, 509, 558, 591, 610, 726, 652, 734, 759, 154, 163, 198, 473, + 683, 697, 292, 311, 353, 423, 572, 494, 113, 217, 259, 280, 314, 499, 506, 603, + 608, 752, 778, 782, 788, 117, 557, 748, 774, 320, 109, 126, 260, 265, 373, 411, + 479, 523, 655, 737, 823, 380, 765, 161, 395, 398, 438, 451, 502, 516, 537, 583, + 791, 136, 340, 769, 122, 273, 446, 727, 305, 322, 400, 496, 771, 155, 190, 269, + 377, 391, 406, 432, 501, 519, 599, 684, 687, 749, 776, 175, 452, 191, 480, 510, + 659, 772, 805, 813, 397, 444, 619, 566, 568, 575, 491, 471, 707, 111, 636, 156, + 153, 288, 346, 578, 256, 435, 383, 729, 680, 767, 694, 295, 128, 210, 0, 0, + 227, 0, 379, 0, 0, 150, 493, 525, 544, 551, 552, 556, 783, 576, 604, 0, + 661, 0, 703, 0, 0, 735, 743, 0, 0, 0, 793, 794, 795, 808, 741, 773, + 118, 127, 130, 166, 169, 177, 207, 213, 215, 226, 229, 268, 270, 317, 327, 329, + 335, 369, 375, 381, 404, 441, 448, 458, 477, 484, 503, 539, 545, 547, 546, 548, + 549, 550, 554, 555, 561, 564, 569, 591, 593, 595, 598, 607, 620, 625, 625, 651, + 690, 695, 705, 706, 716, 717, 733, 735, 777, 786, 790, 315, 869, 623, 0, 0, + 102, 145, 134, 115, 129, 138, 165, 171, 207, 202, 206, 212, 227, 231, 240, 243, + 250, 254, 294, 296, 303, 308, 319, 325, 321, 329, 326, 335, 341, 357, 360, 362, + 370, 379, 388, 389, 393, 421, 424, 438, 456, 454, 458, 465, 477, 535, 485, 490, + 493, 507, 512, 514, 521, 522, 525, 526, 528, 533, 532, 541, 565, 569, 574, 586, + 591, 597, 607, 637, 647, 674, 691, 693, 695, 698, 703, 699, 705, 704, 702, 706, + 709, 717, 728, 736, 747, 754, 770, 777, 783, 784, 786, 787, 790, 802, 825, 848, + 847, 857, 55, 65, 66, 883, 892, 916, 822, 824, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1586, 0,1605, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1602,1603,1934,1935,1574,1575, + 1576,1577,1579,1580,1581,1583,1584, 0,1585,1587,1588,1589,1591, 0,1592, 0, + 1593,1594, 0,1595,1596, 0,1598,1599,1600,1601,1604,1582,1578,1590,1597, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1936, 0,1937, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1938, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1939,1940, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1941,1942, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1944,1943, 0,1945, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1946,1947, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0,1948, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1949,1950, + 1951,1952,1953,1954,1955, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,1956,1957,1958,1960,1959, + 1961, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 106, 104, 107, 826, 114, 118, 119, 121, 123, 124, 127, 125, 34, 830, 130, 131, + 132, 137, 827, 35, 133, 139, 829, 142, 143, 112, 144, 145, 924, 151, 152, 37, + 157, 158, 159, 160, 38, 165, 166, 169, 171, 172, 173, 174, 176, 177, 178, 179, + 181, 182, 182, 182, 833, 468, 184, 185, 834, 187, 188, 189, 196, 192, 194, 195, + 197, 199, 200, 201, 203, 204, 204, 206, 208, 209, 211, 218, 213, 219, 214, 216, + 153, 234, 221, 222, 223, 220, 225, 224, 230, 835, 235, 236, 237, 238, 239, 244, + 836, 837, 247, 248, 249, 246, 251, 39, 40, 253, 255, 255, 838, 257, 258, 259, + 261, 839, 262, 263, 301, 264, 41, 266, 270, 272, 271, 841, 274, 842, 277, 276, + 278, 281, 282, 42, 283, 284, 285, 286, 43, 843, 44, 289, 290, 291, 293, 934, + 298, 845, 845, 621, 300, 300, 45, 852, 894, 302, 304, 46, 306, 309, 310, 312, + 316, 48, 47, 317, 846, 318, 323, 324, 325, 324, 328, 329, 333, 331, 332, 334, + 335, 336, 338, 339, 342, 343, 347, 351, 849, 350, 348, 352, 354, 359, 850, 361, + 358, 356, 49, 363, 365, 367, 364, 50, 369, 371, 851, 376, 386, 378, 53, 381, + 52, 51, 140, 141, 387, 382, 614, 78, 388, 389, 390, 394, 392, 856, 54, 399, + 396, 402, 404, 858, 405, 401, 407, 55, 408, 409, 410, 413, 859, 415, 56, 417, + 860, 418, 57, 419, 422, 424, 425, 861, 840, 862, 426, 863, 429, 431, 427, 433, + 437, 441, 438, 439, 442, 443, 864, 436, 449, 450, 58, 454, 453, 865, 447, 460, + 866, 867, 461, 466, 465, 464, 59, 467, 470, 469, 472, 828, 475, 868, 478, 870, + 483, 485, 486, 871, 488, 489, 872, 873, 495, 497, 60, 498, 61, 61, 504, 505, + 507, 508, 511, 62, 513, 874, 515, 875, 518, 844, 520, 876, 877, 878, 63, 64, + 528, 880, 879, 881, 882, 530, 531, 531, 533, 66, 534, 67, 68, 884, 536, 538, + 541, 69, 885, 549, 886, 887, 556, 559, 70, 561, 562, 563, 888, 889, 889, 567, + 71, 890, 570, 571, 72, 891, 577, 73, 581, 579, 582, 893, 587, 74, 590, 592, + 596, 75, 895, 896, 76, 897, 600, 898, 602, 605, 607, 899, 900, 609, 901, 611, + 853, 77, 615, 616, 79, 617, 252, 902, 903, 854, 855, 621, 622, 731, 80, 627, + 626, 628, 164, 629, 630, 631, 633, 904, 632, 634, 639, 640, 635, 641, 646, 651, + 638, 643, 644, 645, 905, 907, 906, 81, 653, 654, 656, 911, 657, 908, 82, 83, + 909, 910, 84, 664, 665, 666, 667, 669, 668, 671, 670, 674, 672, 673, 675, 85, + 677, 678, 86, 681, 682, 912, 685, 686, 87, 689, 36, 913, 914, 88, 89, 696, + 702, 709, 711, 915, 712, 713, 718, 719, 917, 831, 721, 720, 723, 832, 725, 728, + 918, 919, 739, 742, 744, 920, 745, 753, 756, 757, 755, 760, 761, 921, 762, 90, + 764, 922, 91, 775, 279, 780, 923, 925, 92, 93, 785, 926, 94, 927, 787, 787, + 789, 928, 792, 95, 796, 797, 798, 800, 96, 929, 802, 804, 806, 97, 98, 807, + 930, 99, 931, 932, 933, 814, 100, 816, 817, 818, 819, 820, 821, 935, 0, 0, +}; +static const int16_t +_hb_ucd_i16[92] = +{ + 0, 0, 1, -1, 2, 0, -2, 0, 0, 2, 0, -2, 0, 16, 0, -16, + 0, 1, -1, 0, 3, 3, 3, -3, -3, -3, 0, 2016, 0, 2527, 1923, 1914, + 1918, 0, 2250, 0, 0, 138, 0, 7, -7, 0, -1, 1, 1824, 0, 2104, 0, + 2108, 2106, 0, 2106, 1316, 0, -1, -138, 8, 8, 8, 0, 7, 7, -8, -8, + -8, -7,-1316, 1, -1, 3, -3, 1, 0,-1914,-1918, 0, 0,-1923,-1824, 0, + 0,-2016,-2104, 0, 0,-2106,-2108,-2106,-2250, 0,-2527, 0, +}; + +static inline uint_fast8_t +_hb_ucd_gc (unsigned u) +{ + return u<1114112u?_hb_ucd_u8[4920+(((_hb_ucd_u8[1104+(((_hb_ucd_u16[((_hb_ucd_u8[272+(((_hb_ucd_u8[u>>1>>3>>3>>5])<<5)+((u>>1>>3>>3)&31u))])<<3)+((u>>1>>3)&7u)])<<3)+((u>>1)&7u))])<<1)+((u)&1u))]:2; +} +static inline uint_fast8_t +_hb_ucd_ccc (unsigned u) +{ + return u<125259u?_hb_ucd_u8[6796+(((_hb_ucd_u8[6276+(((_hb_ucd_u8[5844+(((_hb_ucd_u8[5508+(((_hb_ucd_u8[5262+(u>>2>>2>>2>>3)])<<3)+((u>>2>>2>>2)&7u))])<<2)+((u>>2>>2)&3u))])<<2)+((u>>2)&3u))])<<2)+((u)&3u))]:0; +} +static inline unsigned +_hb_ucd_b4 (const uint8_t* a, unsigned i) +{ + return (a[i>>1]>>((i&1u)<<2))&15u; +} +static inline int_fast16_t +_hb_ucd_bmg (unsigned u) +{ + return u<65380u?_hb_ucd_i16[((_hb_ucd_u8[7672+(((_hb_ucd_u8[7448+(((_hb_ucd_u8[7352+(((_hb_ucd_b4(7288+_hb_ucd_u8,u>>1>>2>>3>>3))<<3)+((u>>1>>2>>3)&7u))])<<3)+((u>>1>>2)&7u))])<<2)+((u>>1)&3u))])<<1)+((u)&1u)]:0; +} +static inline uint_fast8_t +_hb_ucd_sc (unsigned u) +{ + return u<918016u?_hb_ucd_u8[11242+(((_hb_ucd_u8[10314+(((_hb_ucd_u8[8938+(((_hb_ucd_u8[8362+(((_hb_ucd_u8[7912+(u>>2>>2>>3>>4)])<<4)+((u>>2>>2>>3)&15u))])<<3)+((u>>2>>2)&7u))])<<2)+((u>>2)&3u))])<<2)+((u)&3u))]:2; +} +static inline uint_fast16_t +_hb_ucd_dm (unsigned u) +{ + return u<195102u?_hb_ucd_u16[1536+(((_hb_ucd_u8[12544+(((_hb_ucd_u8[12162+(u>>4>>5)])<<5)+((u>>4)&31u))])<<4)+((u)&15u))]:0; +} + +#endif + + +#endif /* HB_UCD_TABLE_HH */ + +/* == End of generated table == */ diff --git a/thirdparty/harfbuzz/src/hb-ucd.cc b/thirdparty/harfbuzz/src/hb-ucd.cc new file mode 100644 index 0000000000..ad72a26c04 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-ucd.cc @@ -0,0 +1,248 @@ +/* + * Copyright (C) 2012 Grigori Goronzy <greg@kinoho.net> + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include "hb.hh" +#include "hb-unicode.hh" +#include "hb-machinery.hh" + +#include "hb-ucd-table.hh" + +static hb_unicode_combining_class_t +hb_ucd_combining_class (hb_unicode_funcs_t *ufuncs HB_UNUSED, + hb_codepoint_t unicode, + void *user_data HB_UNUSED) +{ + return (hb_unicode_combining_class_t) _hb_ucd_ccc (unicode); +} + +static hb_unicode_general_category_t +hb_ucd_general_category (hb_unicode_funcs_t *ufuncs HB_UNUSED, + hb_codepoint_t unicode, + void *user_data HB_UNUSED) +{ + return (hb_unicode_general_category_t) _hb_ucd_gc (unicode); +} + +static hb_codepoint_t +hb_ucd_mirroring (hb_unicode_funcs_t *ufuncs HB_UNUSED, + hb_codepoint_t unicode, + void *user_data HB_UNUSED) +{ + return unicode + _hb_ucd_bmg (unicode); +} + +static hb_script_t +hb_ucd_script (hb_unicode_funcs_t *ufuncs HB_UNUSED, + hb_codepoint_t unicode, + void *user_data HB_UNUSED) +{ + return _hb_ucd_sc_map[_hb_ucd_sc (unicode)]; +} + + +#define SBASE 0xAC00u +#define LBASE 0x1100u +#define VBASE 0x1161u +#define TBASE 0x11A7u +#define SCOUNT 11172u +#define LCOUNT 19u +#define VCOUNT 21u +#define TCOUNT 28u +#define NCOUNT (VCOUNT * TCOUNT) + +static inline bool +_hb_ucd_decompose_hangul (hb_codepoint_t ab, hb_codepoint_t *a, hb_codepoint_t *b) +{ + unsigned si = ab - SBASE; + + if (si >= SCOUNT) + return false; + + if (si % TCOUNT) + { + /* LV,T */ + *a = SBASE + (si / TCOUNT) * TCOUNT; + *b = TBASE + (si % TCOUNT); + return true; + } else { + /* L,V */ + *a = LBASE + (si / NCOUNT); + *b = VBASE + (si % NCOUNT) / TCOUNT; + return true; + } +} + +static inline bool +_hb_ucd_compose_hangul (hb_codepoint_t a, hb_codepoint_t b, hb_codepoint_t *ab) +{ + if (a >= SBASE && a < (SBASE + SCOUNT) && b > TBASE && b < (TBASE + TCOUNT) && + !((a - SBASE) % TCOUNT)) + { + /* LV,T */ + *ab = a + (b - TBASE); + return true; + } + else if (a >= LBASE && a < (LBASE + LCOUNT) && b >= VBASE && b < (VBASE + VCOUNT)) + { + /* L,V */ + int li = a - LBASE; + int vi = b - VBASE; + *ab = SBASE + li * NCOUNT + vi * TCOUNT; + return true; + } + else + return false; +} + +static int +_cmp_pair (const void *_key, const void *_item) +{ + uint64_t& a = * (uint64_t*) _key; + uint64_t b = (* (uint64_t*) _item) & HB_CODEPOINT_ENCODE3(0x1FFFFFu, 0x1FFFFFu, 0); + + return a < b ? -1 : a > b ? +1 : 0; +} +static int +_cmp_pair_11_7_14 (const void *_key, const void *_item) +{ + uint32_t& a = * (uint32_t*) _key; + uint32_t b = (* (uint32_t*) _item) & HB_CODEPOINT_ENCODE3_11_7_14(0x1FFFFFu, 0x1FFFFFu, 0); + + return a < b ? -1 : a > b ? +1 : 0; +} + +static hb_bool_t +hb_ucd_compose (hb_unicode_funcs_t *ufuncs HB_UNUSED, + hb_codepoint_t a, hb_codepoint_t b, hb_codepoint_t *ab, + void *user_data HB_UNUSED) +{ + if (_hb_ucd_compose_hangul (a, b, ab)) return true; + + hb_codepoint_t u = 0; + + if ((a & 0xFFFFF800u) == 0x0000u && (b & 0xFFFFFF80) == 0x0300u) + { + uint32_t k = HB_CODEPOINT_ENCODE3_11_7_14 (a, b, 0); + const uint32_t *v = hb_bsearch (k, + _hb_ucd_dm2_u32_map, + ARRAY_LENGTH (_hb_ucd_dm2_u32_map), + sizeof (*_hb_ucd_dm2_u32_map), + _cmp_pair_11_7_14); + if (likely (!v)) return false; + u = HB_CODEPOINT_DECODE3_11_7_14_3 (*v); + } + else + { + uint64_t k = HB_CODEPOINT_ENCODE3 (a, b, 0); + const uint64_t *v = hb_bsearch (k, + _hb_ucd_dm2_u64_map, + ARRAY_LENGTH (_hb_ucd_dm2_u64_map), + sizeof (*_hb_ucd_dm2_u64_map), + _cmp_pair); + if (likely (!v)) return false; + u = HB_CODEPOINT_DECODE3_3 (*v); + } + + if (unlikely (!u)) return false; + *ab = u; + return true; +} + +static hb_bool_t +hb_ucd_decompose (hb_unicode_funcs_t *ufuncs HB_UNUSED, + hb_codepoint_t ab, hb_codepoint_t *a, hb_codepoint_t *b, + void *user_data HB_UNUSED) +{ + if (_hb_ucd_decompose_hangul (ab, a, b)) return true; + + unsigned i = _hb_ucd_dm (ab); + + if (likely (!i)) return false; + i--; + + if (i < ARRAY_LENGTH (_hb_ucd_dm1_p0_map) + ARRAY_LENGTH (_hb_ucd_dm1_p2_map)) + { + if (i < ARRAY_LENGTH (_hb_ucd_dm1_p0_map)) + *a = _hb_ucd_dm1_p0_map[i]; + else + { + i -= ARRAY_LENGTH (_hb_ucd_dm1_p0_map); + *a = 0x20000 | _hb_ucd_dm1_p2_map[i]; + } + *b = 0; + return true; + } + i -= ARRAY_LENGTH (_hb_ucd_dm1_p0_map) + ARRAY_LENGTH (_hb_ucd_dm1_p2_map); + + if (i < ARRAY_LENGTH (_hb_ucd_dm2_u32_map)) + { + uint32_t v = _hb_ucd_dm2_u32_map[i]; + *a = HB_CODEPOINT_DECODE3_11_7_14_1 (v); + *b = HB_CODEPOINT_DECODE3_11_7_14_2 (v); + return true; + } + i -= ARRAY_LENGTH (_hb_ucd_dm2_u32_map); + + uint64_t v = _hb_ucd_dm2_u64_map[i]; + *a = HB_CODEPOINT_DECODE3_1 (v); + *b = HB_CODEPOINT_DECODE3_2 (v); + return true; +} + + +#if HB_USE_ATEXIT +static void free_static_ucd_funcs (); +#endif + +static struct hb_ucd_unicode_funcs_lazy_loader_t : hb_unicode_funcs_lazy_loader_t<hb_ucd_unicode_funcs_lazy_loader_t> +{ + static hb_unicode_funcs_t *create () + { + hb_unicode_funcs_t *funcs = hb_unicode_funcs_create (nullptr); + + hb_unicode_funcs_set_combining_class_func (funcs, hb_ucd_combining_class, nullptr, nullptr); + hb_unicode_funcs_set_general_category_func (funcs, hb_ucd_general_category, nullptr, nullptr); + hb_unicode_funcs_set_mirroring_func (funcs, hb_ucd_mirroring, nullptr, nullptr); + hb_unicode_funcs_set_script_func (funcs, hb_ucd_script, nullptr, nullptr); + hb_unicode_funcs_set_compose_func (funcs, hb_ucd_compose, nullptr, nullptr); + hb_unicode_funcs_set_decompose_func (funcs, hb_ucd_decompose, nullptr, nullptr); + + hb_unicode_funcs_make_immutable (funcs); + +#if HB_USE_ATEXIT + atexit (free_static_ucd_funcs); +#endif + + return funcs; + } +} static_ucd_funcs; + +#if HB_USE_ATEXIT +static +void free_static_ucd_funcs () +{ + static_ucd_funcs.free_instance (); +} +#endif + +hb_unicode_funcs_t * +hb_ucd_get_unicode_funcs () +{ +#ifdef HB_NO_UCD + return hb_unicode_funcs_get_empty (); +#endif + return static_ucd_funcs.get_unconst (); +} diff --git a/thirdparty/harfbuzz/src/hb-unicode-emoji-table.hh b/thirdparty/harfbuzz/src/hb-unicode-emoji-table.hh new file mode 100644 index 0000000000..eb7776eecb --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-unicode-emoji-table.hh @@ -0,0 +1,78 @@ +/* == Start of generated table == */ +/* + * The following tables are generated by running: + * + * ./gen-emoji-table.py emoji-data.txt + * + * on file with this header: + * + * # emoji-data.txt + * # Date: 2020-01-28, 20:52:38 GMT + * # © 2020 Unicode®, Inc. + * # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. + * # For terms of use, see http://www.unicode.org/terms_of_use.html + * # + * # Emoji Data for UTS #51 + * # Version: 13.0 + * # + * # For documentation and usage, see http://www.unicode.org/reports/tr51 + */ + +#ifndef HB_UNICODE_EMOJI_TABLE_HH +#define HB_UNICODE_EMOJI_TABLE_HH + +#include "hb-unicode.hh" + +static const uint8_t +_hb_emoji_u8[448] = +{ + 0, 0, 0, 0, 33, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 84,118, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 3, + 0, 0, 0, 0, 0, 0, 4, 5, 6, 7, 8, 7, 9, 10, 11, 0, + 0, 0, 0, 0, 12, 0, 0, 0, 0, 0, 0, 0, 13, 0, 0, 0, + 7, 7, 7, 14, 15, 16, 17, 18, 19, 20, 7, 7, 7, 7, 7, 21, + 7, 7, 7, 7, 22, 23, 7, 7, 7, 24, 7, 14, 0, 25, 0, 26, + 27, 28, 29, 14, 30, 31, 7, 7, 7, 7, 7, 14, 0, 0, 0, 0, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 22, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,240, 1, 0, 2, 0, 0, + 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0,254, 7, 3, + 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 56, + 159,255,243,255,255,255,255,255,255,255,255,255,255,255,255,255, + 31, 0,255,255,255,255,255,255, 31,255, 3, 0, 0, 0, 8, 0, + 0, 0, 24, 0,120, 0, 0, 0, 0, 0, 96, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 16, 0, 96, 0, 0, 8, 0, 0, 0, 0, + 255,255,255,255,255,255,255,127, 0, 96, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0,240, 1, 64, 0, 0,254, 3, 0,224,255,255, + 255,255,255,255, 31, 0, 0, 0,254,127, 0, 0, 0, 0,252,115, + 0,254,255,255,255,255,255,255,255,255,255,255,255,255,255, 3, + 255,255,255,255,255,255,255, 31,192,255,255,255,255,255,255,255, + 255,127, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,240,127, + 0, 0,224,255,255,255,255,127, 0,112, 0, 0, 0, 0, 0, 0, + 0,127, 0,124, 0, 0, 0, 0, 0,127, 0, 0, 0,192,255,255, + 0,240,255,255,255,255,255,243,159,255,255,255,255,255,255,255, +}; + +static inline unsigned +_hb_emoji_b4 (const uint8_t* a, unsigned i) +{ + return (a[i>>1]>>((i&1u)<<2))&15u; +} +static inline unsigned +_hb_emoji_b1 (const uint8_t* a, unsigned i) +{ + return (a[i>>3]>>((i&7u)<<0))&1u; +} +static inline uint_fast8_t +_hb_emoji_is_Extended_Pictographic (unsigned u) +{ + return u<131069u?_hb_emoji_b1(192+_hb_emoji_u8,((_hb_emoji_u8[64+(((_hb_emoji_b4(_hb_emoji_u8,u>>6>>4))<<4)+((u>>6)&15u))])<<6)+((u)&63u)):0; +} + + +#endif /* HB_UNICODE_EMOJI_TABLE_HH */ + +/* == End of generated table == */ diff --git a/thirdparty/harfbuzz/src/hb-unicode.cc b/thirdparty/harfbuzz/src/hb-unicode.cc new file mode 100644 index 0000000000..36070a7f18 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-unicode.cc @@ -0,0 +1,586 @@ +/* + * Copyright © 2009 Red Hat, Inc. + * Copyright © 2011 Codethink Limited + * Copyright © 2010,2011,2012 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Red Hat Author(s): Behdad Esfahbod + * Codethink Author(s): Ryan Lortie + * Google Author(s): Behdad Esfahbod + */ + +#include "hb.hh" + +#include "hb-unicode.hh" + + +/** + * SECTION: hb-unicode + * @title: hb-unicode + * @short_description: Unicode character property access + * @include: hb.h + * + * Unicode functions are used to access Unicode character properties. + * Client can pass its own Unicode functions to HarfBuzz, or access + * the built-in Unicode functions that come with HarfBuzz. + * + * With the Unicode functions, one can query variour Unicode character + * properties, such as General Category, Script, Combining Class, etc. + **/ + + +/* + * hb_unicode_funcs_t + */ + +static hb_unicode_combining_class_t +hb_unicode_combining_class_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED, + hb_codepoint_t unicode HB_UNUSED, + void *user_data HB_UNUSED) +{ + return HB_UNICODE_COMBINING_CLASS_NOT_REORDERED; +} + +#ifndef HB_DISABLE_DEPRECATED +static unsigned int +hb_unicode_eastasian_width_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED, + hb_codepoint_t unicode HB_UNUSED, + void *user_data HB_UNUSED) +{ + return 1; +} +#endif + +static hb_unicode_general_category_t +hb_unicode_general_category_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED, + hb_codepoint_t unicode HB_UNUSED, + void *user_data HB_UNUSED) +{ + return HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER; +} + +static hb_codepoint_t +hb_unicode_mirroring_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED, + hb_codepoint_t unicode, + void *user_data HB_UNUSED) +{ + return unicode; +} + +static hb_script_t +hb_unicode_script_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED, + hb_codepoint_t unicode HB_UNUSED, + void *user_data HB_UNUSED) +{ + return HB_SCRIPT_UNKNOWN; +} + +static hb_bool_t +hb_unicode_compose_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED, + hb_codepoint_t a HB_UNUSED, + hb_codepoint_t b HB_UNUSED, + hb_codepoint_t *ab HB_UNUSED, + void *user_data HB_UNUSED) +{ + return false; +} + +static hb_bool_t +hb_unicode_decompose_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED, + hb_codepoint_t ab HB_UNUSED, + hb_codepoint_t *a HB_UNUSED, + hb_codepoint_t *b HB_UNUSED, + void *user_data HB_UNUSED) +{ + return false; +} + + +#ifndef HB_DISABLE_DEPRECATED +static unsigned int +hb_unicode_decompose_compatibility_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED, + hb_codepoint_t u HB_UNUSED, + hb_codepoint_t *decomposed HB_UNUSED, + void *user_data HB_UNUSED) +{ + return 0; +} +#endif + +#if !defined(HB_NO_UNICODE_FUNCS) && defined(HAVE_GLIB) +#include "hb-glib.h" +#endif +#if !defined(HB_NO_UNICODE_FUNCS) && defined(HAVE_ICU) && defined(HAVE_ICU_BUILTIN) +#include "hb-icu.h" +#endif + +hb_unicode_funcs_t * +hb_unicode_funcs_get_default () +{ +#if !defined(HB_NO_UNICODE_FUNCS) && !defined(HB_NO_UCD) + return hb_ucd_get_unicode_funcs (); +#elif !defined(HB_NO_UNICODE_FUNCS) && defined(HAVE_GLIB) + return hb_glib_get_unicode_funcs (); +#elif !defined(HB_NO_UNICODE_FUNCS) && defined(HAVE_ICU) && defined(HAVE_ICU_BUILTIN) + return hb_icu_get_unicode_funcs (); +#else +#define HB_UNICODE_FUNCS_NIL 1 + return hb_unicode_funcs_get_empty (); +#endif +} + +#if !defined(HB_NO_UNICODE_FUNCS) && defined(HB_UNICODE_FUNCS_NIL) +#error "Could not find any Unicode functions implementation, you have to provide your own" +#error "Consider building hb-ucd.cc. If you absolutely want to build without any, check the code." +#endif + +/** + * hb_unicode_funcs_create: (Xconstructor) + * @parent: (nullable): + * + * + * + * Return value: (transfer full): + * + * Since: 0.9.2 + **/ +hb_unicode_funcs_t * +hb_unicode_funcs_create (hb_unicode_funcs_t *parent) +{ + hb_unicode_funcs_t *ufuncs; + + if (!(ufuncs = hb_object_create<hb_unicode_funcs_t> ())) + return hb_unicode_funcs_get_empty (); + + if (!parent) + parent = hb_unicode_funcs_get_empty (); + + hb_unicode_funcs_make_immutable (parent); + ufuncs->parent = hb_unicode_funcs_reference (parent); + + ufuncs->func = parent->func; + + /* We can safely copy user_data from parent since we hold a reference + * onto it and it's immutable. We should not copy the destroy notifiers + * though. */ + ufuncs->user_data = parent->user_data; + + return ufuncs; +} + + +DEFINE_NULL_INSTANCE (hb_unicode_funcs_t) = +{ + HB_OBJECT_HEADER_STATIC, + + nullptr, /* parent */ + { +#define HB_UNICODE_FUNC_IMPLEMENT(name) hb_unicode_##name##_nil, + HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS +#undef HB_UNICODE_FUNC_IMPLEMENT + } +}; + +/** + * hb_unicode_funcs_get_empty: + * + * + * + * Return value: (transfer full): + * + * Since: 0.9.2 + **/ +hb_unicode_funcs_t * +hb_unicode_funcs_get_empty () +{ + return const_cast<hb_unicode_funcs_t *> (&Null (hb_unicode_funcs_t)); +} + +/** + * hb_unicode_funcs_reference: (skip) + * @ufuncs: Unicode functions. + * + * + * + * Return value: (transfer full): + * + * Since: 0.9.2 + **/ +hb_unicode_funcs_t * +hb_unicode_funcs_reference (hb_unicode_funcs_t *ufuncs) +{ + return hb_object_reference (ufuncs); +} + +/** + * hb_unicode_funcs_destroy: (skip) + * @ufuncs: Unicode functions. + * + * + * + * Since: 0.9.2 + **/ +void +hb_unicode_funcs_destroy (hb_unicode_funcs_t *ufuncs) +{ + if (!hb_object_destroy (ufuncs)) return; + +#define HB_UNICODE_FUNC_IMPLEMENT(name) \ + if (ufuncs->destroy.name) ufuncs->destroy.name (ufuncs->user_data.name); + HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS +#undef HB_UNICODE_FUNC_IMPLEMENT + + hb_unicode_funcs_destroy (ufuncs->parent); + + free (ufuncs); +} + +/** + * hb_unicode_funcs_set_user_data: (skip) + * @ufuncs: Unicode functions. + * @key: + * @data: + * @destroy: + * @replace: + * + * + * + * Return value: + * + * Since: 0.9.2 + **/ +hb_bool_t +hb_unicode_funcs_set_user_data (hb_unicode_funcs_t *ufuncs, + hb_user_data_key_t *key, + void * data, + hb_destroy_func_t destroy, + hb_bool_t replace) +{ + return hb_object_set_user_data (ufuncs, key, data, destroy, replace); +} + +/** + * hb_unicode_funcs_get_user_data: (skip) + * @ufuncs: Unicode functions. + * @key: + * + * + * + * Return value: (transfer none): + * + * Since: 0.9.2 + **/ +void * +hb_unicode_funcs_get_user_data (hb_unicode_funcs_t *ufuncs, + hb_user_data_key_t *key) +{ + return hb_object_get_user_data (ufuncs, key); +} + + +/** + * hb_unicode_funcs_make_immutable: + * @ufuncs: Unicode functions. + * + * + * + * Since: 0.9.2 + **/ +void +hb_unicode_funcs_make_immutable (hb_unicode_funcs_t *ufuncs) +{ + if (hb_object_is_immutable (ufuncs)) + return; + + hb_object_make_immutable (ufuncs); +} + +/** + * hb_unicode_funcs_is_immutable: + * @ufuncs: Unicode functions. + * + * + * + * Return value: + * + * Since: 0.9.2 + **/ +hb_bool_t +hb_unicode_funcs_is_immutable (hb_unicode_funcs_t *ufuncs) +{ + return hb_object_is_immutable (ufuncs); +} + +/** + * hb_unicode_funcs_get_parent: + * @ufuncs: Unicode functions. + * + * + * + * Return value: + * + * Since: 0.9.2 + **/ +hb_unicode_funcs_t * +hb_unicode_funcs_get_parent (hb_unicode_funcs_t *ufuncs) +{ + return ufuncs->parent ? ufuncs->parent : hb_unicode_funcs_get_empty (); +} + + +#define HB_UNICODE_FUNC_IMPLEMENT(name) \ + \ +void \ +hb_unicode_funcs_set_##name##_func (hb_unicode_funcs_t *ufuncs, \ + hb_unicode_##name##_func_t func, \ + void *user_data, \ + hb_destroy_func_t destroy) \ +{ \ + if (hb_object_is_immutable (ufuncs)) \ + return; \ + \ + if (ufuncs->destroy.name) \ + ufuncs->destroy.name (ufuncs->user_data.name); \ + \ + if (func) { \ + ufuncs->func.name = func; \ + ufuncs->user_data.name = user_data; \ + ufuncs->destroy.name = destroy; \ + } else { \ + ufuncs->func.name = ufuncs->parent->func.name; \ + ufuncs->user_data.name = ufuncs->parent->user_data.name; \ + ufuncs->destroy.name = nullptr; \ + } \ +} + +HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS +#undef HB_UNICODE_FUNC_IMPLEMENT + + +#define HB_UNICODE_FUNC_IMPLEMENT(return_type, name) \ + \ +return_type \ +hb_unicode_##name (hb_unicode_funcs_t *ufuncs, \ + hb_codepoint_t unicode) \ +{ \ + return ufuncs->name (unicode); \ +} +HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS_SIMPLE +#undef HB_UNICODE_FUNC_IMPLEMENT + +/** + * hb_unicode_compose: + * @ufuncs: Unicode functions. + * @a: + * @b: + * @ab: (out): + * + * + * + * Return value: + * + * Since: 0.9.2 + **/ +hb_bool_t +hb_unicode_compose (hb_unicode_funcs_t *ufuncs, + hb_codepoint_t a, + hb_codepoint_t b, + hb_codepoint_t *ab) +{ + return ufuncs->compose (a, b, ab); +} + +/** + * hb_unicode_decompose: + * @ufuncs: Unicode functions. + * @ab: + * @a: (out): + * @b: (out): + * + * + * + * Return value: + * + * Since: 0.9.2 + **/ +hb_bool_t +hb_unicode_decompose (hb_unicode_funcs_t *ufuncs, + hb_codepoint_t ab, + hb_codepoint_t *a, + hb_codepoint_t *b) +{ + return ufuncs->decompose (ab, a, b); +} + +#ifndef HB_DISABLE_DEPRECATED +/** + * hb_unicode_decompose_compatibility: + * @ufuncs: Unicode functions. + * @u: + * @decomposed: (out): + * + * + * + * Return value: + * + * Since: 0.9.2 + * Deprecated: 2.0.0 + **/ +unsigned int +hb_unicode_decompose_compatibility (hb_unicode_funcs_t *ufuncs, + hb_codepoint_t u, + hb_codepoint_t *decomposed) +{ + return ufuncs->decompose_compatibility (u, decomposed); +} +#endif + + +#ifndef HB_NO_OT_SHAPE +/* See hb-unicode.hh for details. */ +const uint8_t +_hb_modified_combining_class[256] = +{ + 0, /* HB_UNICODE_COMBINING_CLASS_NOT_REORDERED */ + 1, /* HB_UNICODE_COMBINING_CLASS_OVERLAY */ + 2, 3, 4, 5, 6, + 7, /* HB_UNICODE_COMBINING_CLASS_NUKTA */ + 8, /* HB_UNICODE_COMBINING_CLASS_KANA_VOICING */ + 9, /* HB_UNICODE_COMBINING_CLASS_VIRAMA */ + + /* Hebrew */ + HB_MODIFIED_COMBINING_CLASS_CCC10, + HB_MODIFIED_COMBINING_CLASS_CCC11, + HB_MODIFIED_COMBINING_CLASS_CCC12, + HB_MODIFIED_COMBINING_CLASS_CCC13, + HB_MODIFIED_COMBINING_CLASS_CCC14, + HB_MODIFIED_COMBINING_CLASS_CCC15, + HB_MODIFIED_COMBINING_CLASS_CCC16, + HB_MODIFIED_COMBINING_CLASS_CCC17, + HB_MODIFIED_COMBINING_CLASS_CCC18, + HB_MODIFIED_COMBINING_CLASS_CCC19, + HB_MODIFIED_COMBINING_CLASS_CCC20, + HB_MODIFIED_COMBINING_CLASS_CCC21, + HB_MODIFIED_COMBINING_CLASS_CCC22, + HB_MODIFIED_COMBINING_CLASS_CCC23, + HB_MODIFIED_COMBINING_CLASS_CCC24, + HB_MODIFIED_COMBINING_CLASS_CCC25, + HB_MODIFIED_COMBINING_CLASS_CCC26, + + /* Arabic */ + HB_MODIFIED_COMBINING_CLASS_CCC27, + HB_MODIFIED_COMBINING_CLASS_CCC28, + HB_MODIFIED_COMBINING_CLASS_CCC29, + HB_MODIFIED_COMBINING_CLASS_CCC30, + HB_MODIFIED_COMBINING_CLASS_CCC31, + HB_MODIFIED_COMBINING_CLASS_CCC32, + HB_MODIFIED_COMBINING_CLASS_CCC33, + HB_MODIFIED_COMBINING_CLASS_CCC34, + HB_MODIFIED_COMBINING_CLASS_CCC35, + + /* Syriac */ + HB_MODIFIED_COMBINING_CLASS_CCC36, + + 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, + + /* Telugu */ + HB_MODIFIED_COMBINING_CLASS_CCC84, + 85, 86, 87, 88, 89, 90, + HB_MODIFIED_COMBINING_CLASS_CCC91, + 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, + + /* Thai */ + HB_MODIFIED_COMBINING_CLASS_CCC103, + 104, 105, 106, + HB_MODIFIED_COMBINING_CLASS_CCC107, + 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, + + /* Lao */ + HB_MODIFIED_COMBINING_CLASS_CCC118, + 119, 120, 121, + HB_MODIFIED_COMBINING_CLASS_CCC122, + 123, 124, 125, 126, 127, 128, + + /* Tibetan */ + HB_MODIFIED_COMBINING_CLASS_CCC129, + HB_MODIFIED_COMBINING_CLASS_CCC130, + 131, + HB_MODIFIED_COMBINING_CLASS_CCC132, + 133, 134, 135, 136, 137, 138, 139, + + + 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, + 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, + 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, + 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, + 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, + 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, + + 200, /* HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW_LEFT */ + 201, + 202, /* HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW */ + 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, + 214, /* HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE */ + 215, + 216, /* HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE_RIGHT */ + 217, + 218, /* HB_UNICODE_COMBINING_CLASS_BELOW_LEFT */ + 219, + 220, /* HB_UNICODE_COMBINING_CLASS_BELOW */ + 221, + 222, /* HB_UNICODE_COMBINING_CLASS_BELOW_RIGHT */ + 223, + 224, /* HB_UNICODE_COMBINING_CLASS_LEFT */ + 225, + 226, /* HB_UNICODE_COMBINING_CLASS_RIGHT */ + 227, + 228, /* HB_UNICODE_COMBINING_CLASS_ABOVE_LEFT */ + 229, + 230, /* HB_UNICODE_COMBINING_CLASS_ABOVE */ + 231, + 232, /* HB_UNICODE_COMBINING_CLASS_ABOVE_RIGHT */ + 233, /* HB_UNICODE_COMBINING_CLASS_DOUBLE_BELOW */ + 234, /* HB_UNICODE_COMBINING_CLASS_DOUBLE_ABOVE */ + 235, 236, 237, 238, 239, + 240, /* HB_UNICODE_COMBINING_CLASS_IOTA_SUBSCRIPT */ + 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, + 255, /* HB_UNICODE_COMBINING_CLASS_INVALID */ +}; +#endif + + +/* + * Emoji + */ +#ifndef HB_NO_EMOJI_SEQUENCES + +#include "hb-unicode-emoji-table.hh" + +bool +_hb_unicode_is_emoji_Extended_Pictographic (hb_codepoint_t cp) +{ + return _hb_emoji_is_Extended_Pictographic (cp); +} +#endif diff --git a/thirdparty/harfbuzz/src/hb-unicode.h b/thirdparty/harfbuzz/src/hb-unicode.h new file mode 100644 index 0000000000..61b1b0ba1f --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-unicode.h @@ -0,0 +1,404 @@ +/* + * Copyright © 2009 Red Hat, Inc. + * Copyright © 2011 Codethink Limited + * Copyright © 2011,2012 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Red Hat Author(s): Behdad Esfahbod + * Codethink Author(s): Ryan Lortie + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_H_IN +#error "Include <hb.h> instead." +#endif + +#ifndef HB_UNICODE_H +#define HB_UNICODE_H + +#include "hb-common.h" + +HB_BEGIN_DECLS + + +/** + * HB_UNICODE_MAX + * + * Since: 1.9.0 + **/ +#define HB_UNICODE_MAX 0x10FFFFu + + +/* hb_unicode_general_category_t */ + +/* Unicode Character Database property: General_Category (gc) */ +typedef enum +{ + HB_UNICODE_GENERAL_CATEGORY_CONTROL, /* Cc */ + HB_UNICODE_GENERAL_CATEGORY_FORMAT, /* Cf */ + HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED, /* Cn */ + HB_UNICODE_GENERAL_CATEGORY_PRIVATE_USE, /* Co */ + HB_UNICODE_GENERAL_CATEGORY_SURROGATE, /* Cs */ + HB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER, /* Ll */ + HB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER, /* Lm */ + HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER, /* Lo */ + HB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER, /* Lt */ + HB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER, /* Lu */ + HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK, /* Mc */ + HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK, /* Me */ + HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK, /* Mn */ + HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER, /* Nd */ + HB_UNICODE_GENERAL_CATEGORY_LETTER_NUMBER, /* Nl */ + HB_UNICODE_GENERAL_CATEGORY_OTHER_NUMBER, /* No */ + HB_UNICODE_GENERAL_CATEGORY_CONNECT_PUNCTUATION, /* Pc */ + HB_UNICODE_GENERAL_CATEGORY_DASH_PUNCTUATION, /* Pd */ + HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION, /* Pe */ + HB_UNICODE_GENERAL_CATEGORY_FINAL_PUNCTUATION, /* Pf */ + HB_UNICODE_GENERAL_CATEGORY_INITIAL_PUNCTUATION, /* Pi */ + HB_UNICODE_GENERAL_CATEGORY_OTHER_PUNCTUATION, /* Po */ + HB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION, /* Ps */ + HB_UNICODE_GENERAL_CATEGORY_CURRENCY_SYMBOL, /* Sc */ + HB_UNICODE_GENERAL_CATEGORY_MODIFIER_SYMBOL, /* Sk */ + HB_UNICODE_GENERAL_CATEGORY_MATH_SYMBOL, /* Sm */ + HB_UNICODE_GENERAL_CATEGORY_OTHER_SYMBOL, /* So */ + HB_UNICODE_GENERAL_CATEGORY_LINE_SEPARATOR, /* Zl */ + HB_UNICODE_GENERAL_CATEGORY_PARAGRAPH_SEPARATOR, /* Zp */ + HB_UNICODE_GENERAL_CATEGORY_SPACE_SEPARATOR /* Zs */ +} hb_unicode_general_category_t; + +/* hb_unicode_combining_class_t */ + +/* Note: newer versions of Unicode may add new values. Clients should be ready to handle + * any value in the 0..254 range being returned from hb_unicode_combining_class(). + */ + +/* Unicode Character Database property: Canonical_Combining_Class (ccc) */ +typedef enum +{ + HB_UNICODE_COMBINING_CLASS_NOT_REORDERED = 0, + HB_UNICODE_COMBINING_CLASS_OVERLAY = 1, + HB_UNICODE_COMBINING_CLASS_NUKTA = 7, + HB_UNICODE_COMBINING_CLASS_KANA_VOICING = 8, + HB_UNICODE_COMBINING_CLASS_VIRAMA = 9, + + /* Hebrew */ + HB_UNICODE_COMBINING_CLASS_CCC10 = 10, + HB_UNICODE_COMBINING_CLASS_CCC11 = 11, + HB_UNICODE_COMBINING_CLASS_CCC12 = 12, + HB_UNICODE_COMBINING_CLASS_CCC13 = 13, + HB_UNICODE_COMBINING_CLASS_CCC14 = 14, + HB_UNICODE_COMBINING_CLASS_CCC15 = 15, + HB_UNICODE_COMBINING_CLASS_CCC16 = 16, + HB_UNICODE_COMBINING_CLASS_CCC17 = 17, + HB_UNICODE_COMBINING_CLASS_CCC18 = 18, + HB_UNICODE_COMBINING_CLASS_CCC19 = 19, + HB_UNICODE_COMBINING_CLASS_CCC20 = 20, + HB_UNICODE_COMBINING_CLASS_CCC21 = 21, + HB_UNICODE_COMBINING_CLASS_CCC22 = 22, + HB_UNICODE_COMBINING_CLASS_CCC23 = 23, + HB_UNICODE_COMBINING_CLASS_CCC24 = 24, + HB_UNICODE_COMBINING_CLASS_CCC25 = 25, + HB_UNICODE_COMBINING_CLASS_CCC26 = 26, + + /* Arabic */ + HB_UNICODE_COMBINING_CLASS_CCC27 = 27, + HB_UNICODE_COMBINING_CLASS_CCC28 = 28, + HB_UNICODE_COMBINING_CLASS_CCC29 = 29, + HB_UNICODE_COMBINING_CLASS_CCC30 = 30, + HB_UNICODE_COMBINING_CLASS_CCC31 = 31, + HB_UNICODE_COMBINING_CLASS_CCC32 = 32, + HB_UNICODE_COMBINING_CLASS_CCC33 = 33, + HB_UNICODE_COMBINING_CLASS_CCC34 = 34, + HB_UNICODE_COMBINING_CLASS_CCC35 = 35, + + /* Syriac */ + HB_UNICODE_COMBINING_CLASS_CCC36 = 36, + + /* Telugu */ + HB_UNICODE_COMBINING_CLASS_CCC84 = 84, + HB_UNICODE_COMBINING_CLASS_CCC91 = 91, + + /* Thai */ + HB_UNICODE_COMBINING_CLASS_CCC103 = 103, + HB_UNICODE_COMBINING_CLASS_CCC107 = 107, + + /* Lao */ + HB_UNICODE_COMBINING_CLASS_CCC118 = 118, + HB_UNICODE_COMBINING_CLASS_CCC122 = 122, + + /* Tibetan */ + HB_UNICODE_COMBINING_CLASS_CCC129 = 129, + HB_UNICODE_COMBINING_CLASS_CCC130 = 130, + HB_UNICODE_COMBINING_CLASS_CCC133 = 132, + + + HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW_LEFT = 200, + HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW = 202, + HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE = 214, + HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE_RIGHT = 216, + HB_UNICODE_COMBINING_CLASS_BELOW_LEFT = 218, + HB_UNICODE_COMBINING_CLASS_BELOW = 220, + HB_UNICODE_COMBINING_CLASS_BELOW_RIGHT = 222, + HB_UNICODE_COMBINING_CLASS_LEFT = 224, + HB_UNICODE_COMBINING_CLASS_RIGHT = 226, + HB_UNICODE_COMBINING_CLASS_ABOVE_LEFT = 228, + HB_UNICODE_COMBINING_CLASS_ABOVE = 230, + HB_UNICODE_COMBINING_CLASS_ABOVE_RIGHT = 232, + HB_UNICODE_COMBINING_CLASS_DOUBLE_BELOW = 233, + HB_UNICODE_COMBINING_CLASS_DOUBLE_ABOVE = 234, + + HB_UNICODE_COMBINING_CLASS_IOTA_SUBSCRIPT = 240, + + HB_UNICODE_COMBINING_CLASS_INVALID = 255 +} hb_unicode_combining_class_t; + + +/* + * hb_unicode_funcs_t + */ + +typedef struct hb_unicode_funcs_t hb_unicode_funcs_t; + + +/* + * just give me the best implementation you've got there. + */ +HB_EXTERN hb_unicode_funcs_t * +hb_unicode_funcs_get_default (void); + + +HB_EXTERN hb_unicode_funcs_t * +hb_unicode_funcs_create (hb_unicode_funcs_t *parent); + +HB_EXTERN hb_unicode_funcs_t * +hb_unicode_funcs_get_empty (void); + +HB_EXTERN hb_unicode_funcs_t * +hb_unicode_funcs_reference (hb_unicode_funcs_t *ufuncs); + +HB_EXTERN void +hb_unicode_funcs_destroy (hb_unicode_funcs_t *ufuncs); + +HB_EXTERN hb_bool_t +hb_unicode_funcs_set_user_data (hb_unicode_funcs_t *ufuncs, + hb_user_data_key_t *key, + void * data, + hb_destroy_func_t destroy, + hb_bool_t replace); + + +HB_EXTERN void * +hb_unicode_funcs_get_user_data (hb_unicode_funcs_t *ufuncs, + hb_user_data_key_t *key); + + +HB_EXTERN void +hb_unicode_funcs_make_immutable (hb_unicode_funcs_t *ufuncs); + +HB_EXTERN hb_bool_t +hb_unicode_funcs_is_immutable (hb_unicode_funcs_t *ufuncs); + +HB_EXTERN hb_unicode_funcs_t * +hb_unicode_funcs_get_parent (hb_unicode_funcs_t *ufuncs); + + +/* + * funcs + */ + +/* typedefs */ + +typedef hb_unicode_combining_class_t (*hb_unicode_combining_class_func_t) (hb_unicode_funcs_t *ufuncs, + hb_codepoint_t unicode, + void *user_data); +typedef hb_unicode_general_category_t (*hb_unicode_general_category_func_t) (hb_unicode_funcs_t *ufuncs, + hb_codepoint_t unicode, + void *user_data); +typedef hb_codepoint_t (*hb_unicode_mirroring_func_t) (hb_unicode_funcs_t *ufuncs, + hb_codepoint_t unicode, + void *user_data); +typedef hb_script_t (*hb_unicode_script_func_t) (hb_unicode_funcs_t *ufuncs, + hb_codepoint_t unicode, + void *user_data); + +typedef hb_bool_t (*hb_unicode_compose_func_t) (hb_unicode_funcs_t *ufuncs, + hb_codepoint_t a, + hb_codepoint_t b, + hb_codepoint_t *ab, + void *user_data); +typedef hb_bool_t (*hb_unicode_decompose_func_t) (hb_unicode_funcs_t *ufuncs, + hb_codepoint_t ab, + hb_codepoint_t *a, + hb_codepoint_t *b, + void *user_data); + +/* setters */ + +/** + * hb_unicode_funcs_set_combining_class_func: + * @ufuncs: a Unicode function structure + * @func: (closure user_data) (destroy destroy) (scope notified): + * @user_data: + * @destroy: + * + * + * + * Since: 0.9.2 + **/ +HB_EXTERN void +hb_unicode_funcs_set_combining_class_func (hb_unicode_funcs_t *ufuncs, + hb_unicode_combining_class_func_t func, + void *user_data, hb_destroy_func_t destroy); + +/** + * hb_unicode_funcs_set_general_category_func: + * @ufuncs: a Unicode function structure + * @func: (closure user_data) (destroy destroy) (scope notified): + * @user_data: + * @destroy: + * + * + * + * Since: 0.9.2 + **/ +HB_EXTERN void +hb_unicode_funcs_set_general_category_func (hb_unicode_funcs_t *ufuncs, + hb_unicode_general_category_func_t func, + void *user_data, hb_destroy_func_t destroy); + +/** + * hb_unicode_funcs_set_mirroring_func: + * @ufuncs: a Unicode function structure + * @func: (closure user_data) (destroy destroy) (scope notified): + * @user_data: + * @destroy: + * + * + * + * Since: 0.9.2 + **/ +HB_EXTERN void +hb_unicode_funcs_set_mirroring_func (hb_unicode_funcs_t *ufuncs, + hb_unicode_mirroring_func_t func, + void *user_data, hb_destroy_func_t destroy); + +/** + * hb_unicode_funcs_set_script_func: + * @ufuncs: a Unicode function structure + * @func: (closure user_data) (destroy destroy) (scope notified): + * @user_data: + * @destroy: + * + * + * + * Since: 0.9.2 + **/ +HB_EXTERN void +hb_unicode_funcs_set_script_func (hb_unicode_funcs_t *ufuncs, + hb_unicode_script_func_t func, + void *user_data, hb_destroy_func_t destroy); + +/** + * hb_unicode_funcs_set_compose_func: + * @ufuncs: a Unicode function structure + * @func: (closure user_data) (destroy destroy) (scope notified): + * @user_data: + * @destroy: + * + * + * + * Since: 0.9.2 + **/ +HB_EXTERN void +hb_unicode_funcs_set_compose_func (hb_unicode_funcs_t *ufuncs, + hb_unicode_compose_func_t func, + void *user_data, hb_destroy_func_t destroy); + +/** + * hb_unicode_funcs_set_decompose_func: + * @ufuncs: a Unicode function structure + * @func: (closure user_data) (destroy destroy) (scope notified): + * @user_data: + * @destroy: + * + * + * + * Since: 0.9.2 + **/ +HB_EXTERN void +hb_unicode_funcs_set_decompose_func (hb_unicode_funcs_t *ufuncs, + hb_unicode_decompose_func_t func, + void *user_data, hb_destroy_func_t destroy); + +/* accessors */ + +/** + * hb_unicode_combining_class: + * + * Since: 0.9.2 + **/ +HB_EXTERN hb_unicode_combining_class_t +hb_unicode_combining_class (hb_unicode_funcs_t *ufuncs, + hb_codepoint_t unicode); + +/** + * hb_unicode_general_category: + * + * Since: 0.9.2 + **/ +HB_EXTERN hb_unicode_general_category_t +hb_unicode_general_category (hb_unicode_funcs_t *ufuncs, + hb_codepoint_t unicode); + +/** + * hb_unicode_mirroring: + * + * Since: 0.9.2 + **/ +HB_EXTERN hb_codepoint_t +hb_unicode_mirroring (hb_unicode_funcs_t *ufuncs, + hb_codepoint_t unicode); + +/** + * hb_unicode_script: + * + * Since: 0.9.2 + **/ +HB_EXTERN hb_script_t +hb_unicode_script (hb_unicode_funcs_t *ufuncs, + hb_codepoint_t unicode); + +HB_EXTERN hb_bool_t +hb_unicode_compose (hb_unicode_funcs_t *ufuncs, + hb_codepoint_t a, + hb_codepoint_t b, + hb_codepoint_t *ab); + +HB_EXTERN hb_bool_t +hb_unicode_decompose (hb_unicode_funcs_t *ufuncs, + hb_codepoint_t ab, + hb_codepoint_t *a, + hb_codepoint_t *b); + +HB_END_DECLS + +#endif /* HB_UNICODE_H */ diff --git a/thirdparty/harfbuzz/src/hb-unicode.hh b/thirdparty/harfbuzz/src/hb-unicode.hh new file mode 100644 index 0000000000..34d66d7aa3 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-unicode.hh @@ -0,0 +1,398 @@ +/* + * Copyright © 2009 Red Hat, Inc. + * Copyright © 2011 Codethink Limited + * Copyright © 2010,2011,2012 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Red Hat Author(s): Behdad Esfahbod + * Codethink Author(s): Ryan Lortie + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_UNICODE_HH +#define HB_UNICODE_HH + +#include "hb.hh" + + +extern HB_INTERNAL const uint8_t _hb_modified_combining_class[256]; + +/* + * hb_unicode_funcs_t + */ + +#define HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS \ + HB_UNICODE_FUNC_IMPLEMENT (combining_class) \ + HB_IF_NOT_DEPRECATED (HB_UNICODE_FUNC_IMPLEMENT (eastasian_width)) \ + HB_UNICODE_FUNC_IMPLEMENT (general_category) \ + HB_UNICODE_FUNC_IMPLEMENT (mirroring) \ + HB_UNICODE_FUNC_IMPLEMENT (script) \ + HB_UNICODE_FUNC_IMPLEMENT (compose) \ + HB_UNICODE_FUNC_IMPLEMENT (decompose) \ + HB_IF_NOT_DEPRECATED (HB_UNICODE_FUNC_IMPLEMENT (decompose_compatibility)) \ + /* ^--- Add new callbacks here */ + +/* Simple callbacks are those taking a hb_codepoint_t and returning a hb_codepoint_t */ +#define HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS_SIMPLE \ + HB_UNICODE_FUNC_IMPLEMENT (hb_unicode_combining_class_t, combining_class) \ + HB_IF_NOT_DEPRECATED (HB_UNICODE_FUNC_IMPLEMENT (unsigned int, eastasian_width)) \ + HB_UNICODE_FUNC_IMPLEMENT (hb_unicode_general_category_t, general_category) \ + HB_UNICODE_FUNC_IMPLEMENT (hb_codepoint_t, mirroring) \ + HB_UNICODE_FUNC_IMPLEMENT (hb_script_t, script) \ + /* ^--- Add new simple callbacks here */ + +struct hb_unicode_funcs_t +{ + hb_object_header_t header; + + hb_unicode_funcs_t *parent; + +#define HB_UNICODE_FUNC_IMPLEMENT(return_type, name) \ + return_type name (hb_codepoint_t unicode) { return func.name (this, unicode, user_data.name); } +HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS_SIMPLE +#undef HB_UNICODE_FUNC_IMPLEMENT + + hb_bool_t compose (hb_codepoint_t a, hb_codepoint_t b, + hb_codepoint_t *ab) + { + *ab = 0; + if (unlikely (!a || !b)) return false; + return func.compose (this, a, b, ab, user_data.compose); + } + + hb_bool_t decompose (hb_codepoint_t ab, + hb_codepoint_t *a, hb_codepoint_t *b) + { + *a = ab; *b = 0; + return func.decompose (this, ab, a, b, user_data.decompose); + } + + unsigned int decompose_compatibility (hb_codepoint_t u, + hb_codepoint_t *decomposed) + { +#ifdef HB_DISABLE_DEPRECATED + unsigned int ret = 0; +#else + unsigned int ret = func.decompose_compatibility (this, u, decomposed, user_data.decompose_compatibility); +#endif + if (ret == 1 && u == decomposed[0]) { + decomposed[0] = 0; + return 0; + } + decomposed[ret] = 0; + return ret; + } + + unsigned int + modified_combining_class (hb_codepoint_t u) + { + /* XXX This hack belongs to the USE shaper (for Tai Tham): + * Reorder SAKOT to ensure it comes after any tone marks. */ + if (unlikely (u == 0x1A60u)) return 254; + + /* XXX This hack belongs to the Tibetan shaper: + * Reorder PADMA to ensure it comes after any vowel marks. */ + if (unlikely (u == 0x0FC6u)) return 254; + /* Reorder TSA -PHRU to reorder before U+0F74 */ + if (unlikely (u == 0x0F39u)) return 127; + + return _hb_modified_combining_class[combining_class (u)]; + } + + static hb_bool_t + is_variation_selector (hb_codepoint_t unicode) + { + /* U+180B..180D MONGOLIAN FREE VARIATION SELECTORs are handled in the + * Arabic shaper. No need to match them here. */ + return unlikely (hb_in_ranges<hb_codepoint_t> (unicode, + 0xFE00u, 0xFE0Fu, /* VARIATION SELECTOR-1..16 */ + 0xE0100u, 0xE01EFu)); /* VARIATION SELECTOR-17..256 */ + } + + /* Default_Ignorable codepoints: + * + * Note: While U+115F, U+1160, U+3164 and U+FFA0 are Default_Ignorable, + * we do NOT want to hide them, as the way Uniscribe has implemented them + * is with regular spacing glyphs, and that's the way fonts are made to work. + * As such, we make exceptions for those four. + * Also ignoring U+1BCA0..1BCA3. https://github.com/harfbuzz/harfbuzz/issues/503 + * + * Unicode 7.0: + * $ grep '; Default_Ignorable_Code_Point ' DerivedCoreProperties.txt | sed 's/;.*#/#/' + * 00AD # Cf SOFT HYPHEN + * 034F # Mn COMBINING GRAPHEME JOINER + * 061C # Cf ARABIC LETTER MARK + * 115F..1160 # Lo [2] HANGUL CHOSEONG FILLER..HANGUL JUNGSEONG FILLER + * 17B4..17B5 # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA + * 180B..180D # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE + * 180E # Cf MONGOLIAN VOWEL SEPARATOR + * 200B..200F # Cf [5] ZERO WIDTH SPACE..RIGHT-TO-LEFT MARK + * 202A..202E # Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE + * 2060..2064 # Cf [5] WORD JOINER..INVISIBLE PLUS + * 2065 # Cn <reserved-2065> + * 2066..206F # Cf [10] LEFT-TO-RIGHT ISOLATE..NOMINAL DIGIT SHAPES + * 3164 # Lo HANGUL FILLER + * FE00..FE0F # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16 + * FEFF # Cf ZERO WIDTH NO-BREAK SPACE + * FFA0 # Lo HALFWIDTH HANGUL FILLER + * FFF0..FFF8 # Cn [9] <reserved-FFF0>..<reserved-FFF8> + * 1BCA0..1BCA3 # Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP + * 1D173..1D17A # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE + * E0000 # Cn <reserved-E0000> + * E0001 # Cf LANGUAGE TAG + * E0002..E001F # Cn [30] <reserved-E0002>..<reserved-E001F> + * E0020..E007F # Cf [96] TAG SPACE..CANCEL TAG + * E0080..E00FF # Cn [128] <reserved-E0080>..<reserved-E00FF> + * E0100..E01EF # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 + * E01F0..E0FFF # Cn [3600] <reserved-E01F0>..<reserved-E0FFF> + */ + static hb_bool_t + is_default_ignorable (hb_codepoint_t ch) + { + hb_codepoint_t plane = ch >> 16; + if (likely (plane == 0)) + { + /* BMP */ + hb_codepoint_t page = ch >> 8; + switch (page) { + case 0x00: return unlikely (ch == 0x00ADu); + case 0x03: return unlikely (ch == 0x034Fu); + case 0x06: return unlikely (ch == 0x061Cu); + case 0x17: return hb_in_range<hb_codepoint_t> (ch, 0x17B4u, 0x17B5u); + case 0x18: return hb_in_range<hb_codepoint_t> (ch, 0x180Bu, 0x180Eu); + case 0x20: return hb_in_ranges<hb_codepoint_t> (ch, 0x200Bu, 0x200Fu, + 0x202Au, 0x202Eu, + 0x2060u, 0x206Fu); + case 0xFE: return hb_in_range<hb_codepoint_t> (ch, 0xFE00u, 0xFE0Fu) || ch == 0xFEFFu; + case 0xFF: return hb_in_range<hb_codepoint_t> (ch, 0xFFF0u, 0xFFF8u); + default: return false; + } + } + else + { + /* Other planes */ + switch (plane) { + case 0x01: return hb_in_range<hb_codepoint_t> (ch, 0x1D173u, 0x1D17Au); + case 0x0E: return hb_in_range<hb_codepoint_t> (ch, 0xE0000u, 0xE0FFFu); + default: return false; + } + } + } + + /* Space estimates based on: + * https://unicode.org/charts/PDF/U2000.pdf + * https://docs.microsoft.com/en-us/typography/develop/character-design-standards/whitespace + */ + enum space_t { + NOT_SPACE = 0, + SPACE_EM = 1, + SPACE_EM_2 = 2, + SPACE_EM_3 = 3, + SPACE_EM_4 = 4, + SPACE_EM_5 = 5, + SPACE_EM_6 = 6, + SPACE_EM_16 = 16, + SPACE_4_EM_18, /* 4/18th of an EM! */ + SPACE, + SPACE_FIGURE, + SPACE_PUNCTUATION, + SPACE_NARROW, + }; + static space_t + space_fallback_type (hb_codepoint_t u) + { + switch (u) + { + /* All GC=Zs chars that can use a fallback. */ + default: return NOT_SPACE; /* U+1680 OGHAM SPACE MARK */ + case 0x0020u: return SPACE; /* U+0020 SPACE */ + case 0x00A0u: return SPACE; /* U+00A0 NO-BREAK SPACE */ + case 0x2000u: return SPACE_EM_2; /* U+2000 EN QUAD */ + case 0x2001u: return SPACE_EM; /* U+2001 EM QUAD */ + case 0x2002u: return SPACE_EM_2; /* U+2002 EN SPACE */ + case 0x2003u: return SPACE_EM; /* U+2003 EM SPACE */ + case 0x2004u: return SPACE_EM_3; /* U+2004 THREE-PER-EM SPACE */ + case 0x2005u: return SPACE_EM_4; /* U+2005 FOUR-PER-EM SPACE */ + case 0x2006u: return SPACE_EM_6; /* U+2006 SIX-PER-EM SPACE */ + case 0x2007u: return SPACE_FIGURE; /* U+2007 FIGURE SPACE */ + case 0x2008u: return SPACE_PUNCTUATION; /* U+2008 PUNCTUATION SPACE */ + case 0x2009u: return SPACE_EM_5; /* U+2009 THIN SPACE */ + case 0x200Au: return SPACE_EM_16; /* U+200A HAIR SPACE */ + case 0x202Fu: return SPACE_NARROW; /* U+202F NARROW NO-BREAK SPACE */ + case 0x205Fu: return SPACE_4_EM_18; /* U+205F MEDIUM MATHEMATICAL SPACE */ + case 0x3000u: return SPACE_EM; /* U+3000 IDEOGRAPHIC SPACE */ + } + } + + struct { +#define HB_UNICODE_FUNC_IMPLEMENT(name) hb_unicode_##name##_func_t name; + HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS +#undef HB_UNICODE_FUNC_IMPLEMENT + } func; + + struct { +#define HB_UNICODE_FUNC_IMPLEMENT(name) void *name; + HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS +#undef HB_UNICODE_FUNC_IMPLEMENT + } user_data; + + struct { +#define HB_UNICODE_FUNC_IMPLEMENT(name) hb_destroy_func_t name; + HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS +#undef HB_UNICODE_FUNC_IMPLEMENT + } destroy; +}; +DECLARE_NULL_INSTANCE (hb_unicode_funcs_t); + + +/* + * Modified combining marks + */ + +/* Hebrew + * + * We permute the "fixed-position" classes 10-26 into the order + * described in the SBL Hebrew manual: + * + * https://www.sbl-site.org/Fonts/SBLHebrewUserManual1.5x.pdf + * + * (as recommended by: + * https://forum.fontlab.com/archive-old-microsoft-volt-group/vista-and-diacritic-ordering/msg22823/) + * + * More details here: + * https://bugzilla.mozilla.org/show_bug.cgi?id=662055 + */ +#define HB_MODIFIED_COMBINING_CLASS_CCC10 22 /* sheva */ +#define HB_MODIFIED_COMBINING_CLASS_CCC11 15 /* hataf segol */ +#define HB_MODIFIED_COMBINING_CLASS_CCC12 16 /* hataf patah */ +#define HB_MODIFIED_COMBINING_CLASS_CCC13 17 /* hataf qamats */ +#define HB_MODIFIED_COMBINING_CLASS_CCC14 23 /* hiriq */ +#define HB_MODIFIED_COMBINING_CLASS_CCC15 18 /* tsere */ +#define HB_MODIFIED_COMBINING_CLASS_CCC16 19 /* segol */ +#define HB_MODIFIED_COMBINING_CLASS_CCC17 20 /* patah */ +#define HB_MODIFIED_COMBINING_CLASS_CCC18 21 /* qamats */ +#define HB_MODIFIED_COMBINING_CLASS_CCC19 14 /* holam */ +#define HB_MODIFIED_COMBINING_CLASS_CCC20 24 /* qubuts */ +#define HB_MODIFIED_COMBINING_CLASS_CCC21 12 /* dagesh */ +#define HB_MODIFIED_COMBINING_CLASS_CCC22 25 /* meteg */ +#define HB_MODIFIED_COMBINING_CLASS_CCC23 13 /* rafe */ +#define HB_MODIFIED_COMBINING_CLASS_CCC24 10 /* shin dot */ +#define HB_MODIFIED_COMBINING_CLASS_CCC25 11 /* sin dot */ +#define HB_MODIFIED_COMBINING_CLASS_CCC26 26 /* point varika */ + +/* + * Arabic + * + * Modify to move Shadda (ccc=33) before other marks. See: + * https://unicode.org/faq/normalization.html#8 + * https://unicode.org/faq/normalization.html#9 + */ +#define HB_MODIFIED_COMBINING_CLASS_CCC27 28 /* fathatan */ +#define HB_MODIFIED_COMBINING_CLASS_CCC28 29 /* dammatan */ +#define HB_MODIFIED_COMBINING_CLASS_CCC29 30 /* kasratan */ +#define HB_MODIFIED_COMBINING_CLASS_CCC30 31 /* fatha */ +#define HB_MODIFIED_COMBINING_CLASS_CCC31 32 /* damma */ +#define HB_MODIFIED_COMBINING_CLASS_CCC32 33 /* kasra */ +#define HB_MODIFIED_COMBINING_CLASS_CCC33 27 /* shadda */ +#define HB_MODIFIED_COMBINING_CLASS_CCC34 34 /* sukun */ +#define HB_MODIFIED_COMBINING_CLASS_CCC35 35 /* superscript alef */ + +/* Syriac */ +#define HB_MODIFIED_COMBINING_CLASS_CCC36 36 /* superscript alaph */ + +/* Telugu + * + * Modify Telugu length marks (ccc=84, ccc=91). + * These are the only matras in the main Indic scripts range that have + * a non-zero ccc. That makes them reorder with the Halant (ccc=9). + * Assign 4 and 5, which are otherwise unassigned. + */ +#define HB_MODIFIED_COMBINING_CLASS_CCC84 4 /* length mark */ +#define HB_MODIFIED_COMBINING_CLASS_CCC91 5 /* ai length mark */ + +/* Thai + * + * Modify U+0E38 and U+0E39 (ccc=103) to be reordered before U+0E3A (ccc=9). + * Assign 3, which is unassigned otherwise. + * Uniscribe does this reordering too. + */ +#define HB_MODIFIED_COMBINING_CLASS_CCC103 3 /* sara u / sara uu */ +#define HB_MODIFIED_COMBINING_CLASS_CCC107 107 /* mai * */ + +/* Lao */ +#define HB_MODIFIED_COMBINING_CLASS_CCC118 118 /* sign u / sign uu */ +#define HB_MODIFIED_COMBINING_CLASS_CCC122 122 /* mai * */ + +/* Tibetan + * + * In case of multiple vowel-signs, use u first (but after achung) + * this allows Dzongkha multi-vowel shortcuts to render correctly + */ +#define HB_MODIFIED_COMBINING_CLASS_CCC129 129 /* sign aa */ +#define HB_MODIFIED_COMBINING_CLASS_CCC130 132 /* sign i */ +#define HB_MODIFIED_COMBINING_CLASS_CCC132 131 /* sign u */ + +/* Misc */ + +#define HB_UNICODE_GENERAL_CATEGORY_IS_MARK(gen_cat) \ + (FLAG_UNSAFE (gen_cat) & \ + (FLAG (HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK) | \ + FLAG (HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK) | \ + FLAG (HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK))) + + +/* + * Ranges, used for bsearch tables. + */ + +struct hb_unicode_range_t +{ + static int + cmp (const void *_key, const void *_item) + { + hb_codepoint_t cp = *((hb_codepoint_t *) _key); + const hb_unicode_range_t *range = (hb_unicode_range_t *) _item; + + if (cp < range->start) + return -1; + else if (cp <= range->end) + return 0; + else + return +1; + } + + hb_codepoint_t start; + hb_codepoint_t end; +}; + +/* + * Emoji. + */ + +HB_INTERNAL bool +_hb_unicode_is_emoji_Extended_Pictographic (hb_codepoint_t cp); + + +extern "C" HB_INTERNAL hb_unicode_funcs_t *hb_ucd_get_unicode_funcs (); + + +#endif /* HB_UNICODE_HH */ diff --git a/thirdparty/harfbuzz/src/hb-uniscribe.cc b/thirdparty/harfbuzz/src/hb-uniscribe.cc new file mode 100644 index 0000000000..48a5dc50ad --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-uniscribe.cc @@ -0,0 +1,1047 @@ +/* + * Copyright © 2011,2012,2013 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#include "hb.hh" + +#ifdef HAVE_UNISCRIBE + +#ifdef HB_NO_OT_TAG +#error "Cannot compile 'uniscribe' shaper with HB_NO_OT_TAG." +#endif + +#include "hb-shaper-impl.hh" + +#include <windows.h> +#include <usp10.h> +#include <rpc.h> + +#ifndef E_NOT_SUFFICIENT_BUFFER +#define E_NOT_SUFFICIENT_BUFFER HRESULT_FROM_WIN32 (ERROR_INSUFFICIENT_BUFFER) +#endif + +#include "hb-uniscribe.h" + +#include "hb-open-file.hh" +#include "hb-ot-name-table.hh" +#include "hb-ot-layout.h" + + +/** + * SECTION:hb-uniscribe + * @title: hb-uniscribe + * @short_description: Windows integration + * @include: hb-uniscribe.h + * + * Functions for using HarfBuzz with Windows fonts. + **/ + +typedef HRESULT (WINAPI *SIOT) /*ScriptItemizeOpenType*/( + const WCHAR *pwcInChars, + int cInChars, + int cMaxItems, + const SCRIPT_CONTROL *psControl, + const SCRIPT_STATE *psState, + SCRIPT_ITEM *pItems, + OPENTYPE_TAG *pScriptTags, + int *pcItems +); + +typedef HRESULT (WINAPI *SSOT) /*ScriptShapeOpenType*/( + HDC hdc, + SCRIPT_CACHE *psc, + SCRIPT_ANALYSIS *psa, + OPENTYPE_TAG tagScript, + OPENTYPE_TAG tagLangSys, + int *rcRangeChars, + TEXTRANGE_PROPERTIES **rpRangeProperties, + int cRanges, + const WCHAR *pwcChars, + int cChars, + int cMaxGlyphs, + WORD *pwLogClust, + SCRIPT_CHARPROP *pCharProps, + WORD *pwOutGlyphs, + SCRIPT_GLYPHPROP *pOutGlyphProps, + int *pcGlyphs +); + +typedef HRESULT (WINAPI *SPOT) /*ScriptPlaceOpenType*/( + HDC hdc, + SCRIPT_CACHE *psc, + SCRIPT_ANALYSIS *psa, + OPENTYPE_TAG tagScript, + OPENTYPE_TAG tagLangSys, + int *rcRangeChars, + TEXTRANGE_PROPERTIES **rpRangeProperties, + int cRanges, + const WCHAR *pwcChars, + WORD *pwLogClust, + SCRIPT_CHARPROP *pCharProps, + int cChars, + const WORD *pwGlyphs, + const SCRIPT_GLYPHPROP *pGlyphProps, + int cGlyphs, + int *piAdvance, + GOFFSET *pGoffset, + ABC *pABC +); + + +/* Fallback implementations. */ + +static HRESULT WINAPI +hb_ScriptItemizeOpenType( + const WCHAR *pwcInChars, + int cInChars, + int cMaxItems, + const SCRIPT_CONTROL *psControl, + const SCRIPT_STATE *psState, + SCRIPT_ITEM *pItems, + OPENTYPE_TAG *pScriptTags, + int *pcItems +) +{ +{ + return ScriptItemize (pwcInChars, + cInChars, + cMaxItems, + psControl, + psState, + pItems, + pcItems); +} +} + +static HRESULT WINAPI +hb_ScriptShapeOpenType( + HDC hdc, + SCRIPT_CACHE *psc, + SCRIPT_ANALYSIS *psa, + OPENTYPE_TAG tagScript, + OPENTYPE_TAG tagLangSys, + int *rcRangeChars, + TEXTRANGE_PROPERTIES **rpRangeProperties, + int cRanges, + const WCHAR *pwcChars, + int cChars, + int cMaxGlyphs, + WORD *pwLogClust, + SCRIPT_CHARPROP *pCharProps, + WORD *pwOutGlyphs, + SCRIPT_GLYPHPROP *pOutGlyphProps, + int *pcGlyphs +) +{ + SCRIPT_VISATTR *psva = (SCRIPT_VISATTR *) pOutGlyphProps; + return ScriptShape (hdc, + psc, + pwcChars, + cChars, + cMaxGlyphs, + psa, + pwOutGlyphs, + pwLogClust, + psva, + pcGlyphs); +} + +static HRESULT WINAPI +hb_ScriptPlaceOpenType( + HDC hdc, + SCRIPT_CACHE *psc, + SCRIPT_ANALYSIS *psa, + OPENTYPE_TAG tagScript, + OPENTYPE_TAG tagLangSys, + int *rcRangeChars, + TEXTRANGE_PROPERTIES **rpRangeProperties, + int cRanges, + const WCHAR *pwcChars, + WORD *pwLogClust, + SCRIPT_CHARPROP *pCharProps, + int cChars, + const WORD *pwGlyphs, + const SCRIPT_GLYPHPROP *pGlyphProps, + int cGlyphs, + int *piAdvance, + GOFFSET *pGoffset, + ABC *pABC +) +{ + SCRIPT_VISATTR *psva = (SCRIPT_VISATTR *) pGlyphProps; + return ScriptPlace (hdc, + psc, + pwGlyphs, + cGlyphs, + psva, + psa, + piAdvance, + pGoffset, + pABC); +} + + +struct hb_uniscribe_shaper_funcs_t +{ + SIOT ScriptItemizeOpenType; + SSOT ScriptShapeOpenType; + SPOT ScriptPlaceOpenType; + + void init () + { + HMODULE hinstLib; + this->ScriptItemizeOpenType = nullptr; + this->ScriptShapeOpenType = nullptr; + this->ScriptPlaceOpenType = nullptr; + + hinstLib = GetModuleHandle (TEXT ("usp10.dll")); + if (hinstLib) + { +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wcast-function-type" + this->ScriptItemizeOpenType = (SIOT) GetProcAddress (hinstLib, "ScriptItemizeOpenType"); + this->ScriptShapeOpenType = (SSOT) GetProcAddress (hinstLib, "ScriptShapeOpenType"); + this->ScriptPlaceOpenType = (SPOT) GetProcAddress (hinstLib, "ScriptPlaceOpenType"); +#pragma GCC diagnostic pop + } + if (!this->ScriptItemizeOpenType || + !this->ScriptShapeOpenType || + !this->ScriptPlaceOpenType) + { + DEBUG_MSG (UNISCRIBE, nullptr, "OpenType versions of functions not found; falling back."); + this->ScriptItemizeOpenType = hb_ScriptItemizeOpenType; + this->ScriptShapeOpenType = hb_ScriptShapeOpenType; + this->ScriptPlaceOpenType = hb_ScriptPlaceOpenType; + } + } +}; + +#if HB_USE_ATEXIT +static void free_static_uniscribe_shaper_funcs (); +#endif + +static struct hb_uniscribe_shaper_funcs_lazy_loader_t : hb_lazy_loader_t<hb_uniscribe_shaper_funcs_t, + hb_uniscribe_shaper_funcs_lazy_loader_t> +{ + static hb_uniscribe_shaper_funcs_t *create () + { + hb_uniscribe_shaper_funcs_t *funcs = (hb_uniscribe_shaper_funcs_t *) calloc (1, sizeof (hb_uniscribe_shaper_funcs_t)); + if (unlikely (!funcs)) + return nullptr; + + funcs->init (); + +#if HB_USE_ATEXIT + atexit (free_static_uniscribe_shaper_funcs); +#endif + + return funcs; + } + static void destroy (hb_uniscribe_shaper_funcs_t *p) + { + free ((void *) p); + } + static hb_uniscribe_shaper_funcs_t *get_null () + { + return nullptr; + } +} static_uniscribe_shaper_funcs; + +#if HB_USE_ATEXIT +static +void free_static_uniscribe_shaper_funcs () +{ + static_uniscribe_shaper_funcs.free_instance (); +} +#endif + +static hb_uniscribe_shaper_funcs_t * +hb_uniscribe_shaper_get_funcs () +{ + return static_uniscribe_shaper_funcs.get_unconst (); +} + + +struct active_feature_t { + OPENTYPE_FEATURE_RECORD rec; + unsigned int order; + + HB_INTERNAL static int cmp (const void *pa, const void *pb) { + const active_feature_t *a = (const active_feature_t *) pa; + const active_feature_t *b = (const active_feature_t *) pb; + return a->rec.tagFeature < b->rec.tagFeature ? -1 : a->rec.tagFeature > b->rec.tagFeature ? 1 : + a->order < b->order ? -1 : a->order > b->order ? 1 : + a->rec.lParameter < b->rec.lParameter ? -1 : a->rec.lParameter > b->rec.lParameter ? 1 : + 0; + } + bool operator== (const active_feature_t *f) + { return cmp (this, f) == 0; } +}; + +struct feature_event_t { + unsigned int index; + bool start; + active_feature_t feature; + + HB_INTERNAL static int cmp (const void *pa, const void *pb) + { + const feature_event_t *a = (const feature_event_t *) pa; + const feature_event_t *b = (const feature_event_t *) pb; + return a->index < b->index ? -1 : a->index > b->index ? 1 : + a->start < b->start ? -1 : a->start > b->start ? 1 : + active_feature_t::cmp (&a->feature, &b->feature); + } +}; + +struct range_record_t { + TEXTRANGE_PROPERTIES props; + unsigned int index_first; /* == start */ + unsigned int index_last; /* == end - 1 */ +}; + + +/* + * shaper face data + */ + +struct hb_uniscribe_face_data_t { + HANDLE fh; + hb_uniscribe_shaper_funcs_t *funcs; + wchar_t face_name[LF_FACESIZE]; +}; + +/* face_name should point to a wchar_t[LF_FACESIZE] object. */ +static void +_hb_generate_unique_face_name (wchar_t *face_name, unsigned int *plen) +{ + /* We'll create a private name for the font from a UUID using a simple, + * somewhat base64-like encoding scheme */ + const char *enc = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-"; + UUID id; + UuidCreate ((UUID*) &id); + static_assert ((2 + 3 * (16/2) < LF_FACESIZE), ""); + unsigned int name_str_len = 0; + face_name[name_str_len++] = 'F'; + face_name[name_str_len++] = '_'; + unsigned char *p = (unsigned char *) &id; + for (unsigned int i = 0; i < 16; i += 2) + { + /* Spread the 16 bits from two bytes of the UUID across three chars of face_name, + * using the bits in groups of 5,5,6 to select chars from enc. + * This will generate 24 characters; with the 'F_' prefix we already provided, + * the name will be 26 chars (plus the NUL terminator), so will always fit within + * face_name (LF_FACESIZE = 32). */ + face_name[name_str_len++] = enc[p[i] >> 3]; + face_name[name_str_len++] = enc[((p[i] << 2) | (p[i + 1] >> 6)) & 0x1f]; + face_name[name_str_len++] = enc[p[i + 1] & 0x3f]; + } + face_name[name_str_len] = 0; + if (plen) + *plen = name_str_len; +} + +/* Destroys blob. */ +static hb_blob_t * +_hb_rename_font (hb_blob_t *blob, wchar_t *new_name) +{ + /* Create a copy of the font data, with the 'name' table replaced by a + * table that names the font with our private F_* name created above. + * For simplicity, we just append a new 'name' table and update the + * sfnt directory; the original table is left in place, but unused. + * + * The new table will contain just 5 name IDs: family, style, unique, + * full, PS. All of them point to the same name data with our unique name. + */ + + blob = hb_sanitize_context_t ().sanitize_blob<OT::OpenTypeFontFile> (blob); + + unsigned int length, new_length, name_str_len; + const char *orig_sfnt_data = hb_blob_get_data (blob, &length); + + _hb_generate_unique_face_name (new_name, &name_str_len); + + static const uint16_t name_IDs[] = { 1, 2, 3, 4, 6 }; + + unsigned int name_table_length = OT::name::min_size + + ARRAY_LENGTH (name_IDs) * OT::NameRecord::static_size + + name_str_len * 2; /* for name data in UTF16BE form */ + unsigned int padded_name_table_length = ((name_table_length + 3) & ~3); + unsigned int name_table_offset = (length + 3) & ~3; + + new_length = name_table_offset + padded_name_table_length; + void *new_sfnt_data = calloc (1, new_length); + if (!new_sfnt_data) + { + hb_blob_destroy (blob); + return nullptr; + } + + memcpy(new_sfnt_data, orig_sfnt_data, length); + + OT::name &name = StructAtOffset<OT::name> (new_sfnt_data, name_table_offset); + name.format = 0; + name.count = ARRAY_LENGTH (name_IDs); + name.stringOffset = name.get_size (); + for (unsigned int i = 0; i < ARRAY_LENGTH (name_IDs); i++) + { + OT::NameRecord &record = name.nameRecordZ[i]; + record.platformID = 3; + record.encodingID = 1; + record.languageID = 0x0409u; /* English */ + record.nameID = name_IDs[i]; + record.length = name_str_len * 2; + record.offset = 0; + } + + /* Copy string data from new_name, converting wchar_t to UTF16BE. */ + unsigned char *p = &StructAfter<unsigned char> (name); + for (unsigned int i = 0; i < name_str_len; i++) + { + *p++ = new_name[i] >> 8; + *p++ = new_name[i] & 0xff; + } + + /* Adjust name table entry to point to new name table */ + const OT::OpenTypeFontFile &file = * (OT::OpenTypeFontFile *) (new_sfnt_data); + unsigned int face_count = file.get_face_count (); + for (unsigned int face_index = 0; face_index < face_count; face_index++) + { + /* Note: doing multiple edits (ie. TTC) can be unsafe. There may be + * toe-stepping. But we don't really care. */ + const OT::OpenTypeFontFace &face = file.get_face (face_index); + unsigned int index; + if (face.find_table_index (HB_OT_TAG_name, &index)) + { + OT::TableRecord &record = const_cast<OT::TableRecord &> (face.get_table (index)); + record.checkSum.set_for_data (&name, padded_name_table_length); + record.offset = name_table_offset; + record.length = name_table_length; + } + else if (face_index == 0) /* Fail if first face doesn't have 'name' table. */ + { + free (new_sfnt_data); + hb_blob_destroy (blob); + return nullptr; + } + } + + /* The checkSumAdjustment field in the 'head' table is now wrong, + * but that doesn't actually seem to cause any problems so we don't + * bother. */ + + hb_blob_destroy (blob); + return hb_blob_create ((const char *) new_sfnt_data, new_length, + HB_MEMORY_MODE_WRITABLE, new_sfnt_data, free); +} + +hb_uniscribe_face_data_t * +_hb_uniscribe_shaper_face_data_create (hb_face_t *face) +{ + hb_uniscribe_face_data_t *data = (hb_uniscribe_face_data_t *) calloc (1, sizeof (hb_uniscribe_face_data_t)); + if (unlikely (!data)) + return nullptr; + + data->funcs = hb_uniscribe_shaper_get_funcs (); + if (unlikely (!data->funcs)) + { + free (data); + return nullptr; + } + + hb_blob_t *blob = hb_face_reference_blob (face); + if (unlikely (!hb_blob_get_length (blob))) + DEBUG_MSG (UNISCRIBE, face, "Face has empty blob"); + + blob = _hb_rename_font (blob, data->face_name); + if (unlikely (!blob)) + { + free (data); + return nullptr; + } + + DWORD num_fonts_installed; + data->fh = AddFontMemResourceEx ((void *) hb_blob_get_data (blob, nullptr), + hb_blob_get_length (blob), + 0, &num_fonts_installed); + if (unlikely (!data->fh)) + { + DEBUG_MSG (UNISCRIBE, face, "Face AddFontMemResourceEx() failed"); + free (data); + return nullptr; + } + + return data; +} + +void +_hb_uniscribe_shaper_face_data_destroy (hb_uniscribe_face_data_t *data) +{ + RemoveFontMemResourceEx (data->fh); + free (data); +} + + +/* + * shaper font data + */ + +struct hb_uniscribe_font_data_t +{ + HDC hdc; + mutable LOGFONTW log_font; + HFONT hfont; + mutable SCRIPT_CACHE script_cache; + double x_mult, y_mult; /* From LOGFONT space to HB space. */ +}; + +static bool +populate_log_font (LOGFONTW *lf, + hb_font_t *font, + unsigned int font_size) +{ + memset (lf, 0, sizeof (*lf)); + lf->lfHeight = - (int) font_size; + lf->lfCharSet = DEFAULT_CHARSET; + + memcpy (lf->lfFaceName, font->face->data.uniscribe->face_name, sizeof (lf->lfFaceName)); + + return true; +} + +hb_uniscribe_font_data_t * +_hb_uniscribe_shaper_font_data_create (hb_font_t *font) +{ + hb_uniscribe_font_data_t *data = (hb_uniscribe_font_data_t *) calloc (1, sizeof (hb_uniscribe_font_data_t)); + if (unlikely (!data)) + return nullptr; + + int font_size = font->face->get_upem (); /* Default... */ + /* No idea if the following is even a good idea. */ + if (font->y_ppem) + font_size = font->y_ppem; + + if (font_size < 0) + font_size = -font_size; + data->x_mult = (double) font->x_scale / font_size; + data->y_mult = (double) font->y_scale / font_size; + + data->hdc = GetDC (nullptr); + + if (unlikely (!populate_log_font (&data->log_font, font, font_size))) { + DEBUG_MSG (UNISCRIBE, font, "Font populate_log_font() failed"); + _hb_uniscribe_shaper_font_data_destroy (data); + return nullptr; + } + + data->hfont = CreateFontIndirectW (&data->log_font); + if (unlikely (!data->hfont)) { + DEBUG_MSG (UNISCRIBE, font, "Font CreateFontIndirectW() failed"); + _hb_uniscribe_shaper_font_data_destroy (data); + return nullptr; + } + + if (!SelectObject (data->hdc, data->hfont)) { + DEBUG_MSG (UNISCRIBE, font, "Font SelectObject() failed"); + _hb_uniscribe_shaper_font_data_destroy (data); + return nullptr; + } + + return data; +} + +void +_hb_uniscribe_shaper_font_data_destroy (hb_uniscribe_font_data_t *data) +{ + if (data->hdc) + ReleaseDC (nullptr, data->hdc); + if (data->hfont) + DeleteObject (data->hfont); + if (data->script_cache) + ScriptFreeCache (&data->script_cache); + free (data); +} + +/** + * hb_uniscribe_font_get_logfontw: + * @font: The #hb_font_t to work upon + * + * Fetches the LOGFONTW structure that corresponds to the + * specified #hb_font_t font. + * + * Return value: a pointer to the LOGFONTW retrieved + * + **/ +LOGFONTW * +hb_uniscribe_font_get_logfontw (hb_font_t *font) +{ + const hb_uniscribe_font_data_t *data = font->data.uniscribe; + return data ? &data->log_font : nullptr; +} + +/** + * hb_uniscribe_font_get_hfont: + * @font: The #hb_font_t to work upon + * + * Fetches the HFONT handle that corresponds to the + * specified #hb_font_t font. + * + * Return value: the HFONT retreieved + * + **/ +HFONT +hb_uniscribe_font_get_hfont (hb_font_t *font) +{ + const hb_uniscribe_font_data_t *data = font->data.uniscribe; + return data ? data->hfont : nullptr; +} + + +/* + * shaper + */ + + +hb_bool_t +_hb_uniscribe_shape (hb_shape_plan_t *shape_plan, + hb_font_t *font, + hb_buffer_t *buffer, + const hb_feature_t *features, + unsigned int num_features) +{ + hb_face_t *face = font->face; + const hb_uniscribe_face_data_t *face_data = face->data.uniscribe; + const hb_uniscribe_font_data_t *font_data = font->data.uniscribe; + hb_uniscribe_shaper_funcs_t *funcs = face_data->funcs; + + /* + * Set up features. + */ + hb_vector_t<OPENTYPE_FEATURE_RECORD> feature_records; + hb_vector_t<range_record_t> range_records; + if (num_features) + { + /* Sort features by start/end events. */ + hb_vector_t<feature_event_t> feature_events; + for (unsigned int i = 0; i < num_features; i++) + { + active_feature_t feature; + feature.rec.tagFeature = hb_uint32_swap (features[i].tag); + feature.rec.lParameter = features[i].value; + feature.order = i; + + feature_event_t *event; + + event = feature_events.push (); + event->index = features[i].start; + event->start = true; + event->feature = feature; + + event = feature_events.push (); + event->index = features[i].end; + event->start = false; + event->feature = feature; + } + feature_events.qsort (); + /* Add a strategic final event. */ + { + active_feature_t feature; + feature.rec.tagFeature = 0; + feature.rec.lParameter = 0; + feature.order = num_features + 1; + + feature_event_t *event = feature_events.push (); + event->index = 0; /* This value does magic. */ + event->start = false; + event->feature = feature; + } + + /* Scan events and save features for each range. */ + hb_vector_t<active_feature_t> active_features; + unsigned int last_index = 0; + for (unsigned int i = 0; i < feature_events.length; i++) + { + feature_event_t *event = &feature_events[i]; + + if (event->index != last_index) + { + /* Save a snapshot of active features and the range. */ + range_record_t *range = range_records.push (); + + unsigned int offset = feature_records.length; + + active_features.qsort (); + for (unsigned int j = 0; j < active_features.length; j++) + { + if (!j || active_features[j].rec.tagFeature != feature_records[feature_records.length - 1].tagFeature) + { + feature_records.push (active_features[j].rec); + } + else + { + /* Overrides value for existing feature. */ + feature_records[feature_records.length - 1].lParameter = active_features[j].rec.lParameter; + } + } + + /* Will convert to pointer after all is ready, since feature_records.array + * may move as we grow it. */ + range->props.potfRecords = reinterpret_cast<OPENTYPE_FEATURE_RECORD *> (offset); + range->props.cotfRecords = feature_records.length - offset; + range->index_first = last_index; + range->index_last = event->index - 1; + + last_index = event->index; + } + + if (event->start) + { + active_features.push (event->feature); + } + else + { + active_feature_t *feature = active_features.find (&event->feature); + if (feature) + active_features.remove (feature - active_features.arrayZ); + } + } + + if (!range_records.length) /* No active feature found. */ + num_features = 0; + + /* Fixup the pointers. */ + for (unsigned int i = 0; i < range_records.length; i++) + { + range_record_t *range = &range_records[i]; + range->props.potfRecords = (OPENTYPE_FEATURE_RECORD *) feature_records + reinterpret_cast<uintptr_t> (range->props.potfRecords); + } + } + +#define FAIL(...) \ + HB_STMT_START { \ + DEBUG_MSG (UNISCRIBE, nullptr, __VA_ARGS__); \ + return false; \ + } HB_STMT_END + + HRESULT hr; + +retry: + + unsigned int scratch_size; + hb_buffer_t::scratch_buffer_t *scratch = buffer->get_scratch_buffer (&scratch_size); + +#define ALLOCATE_ARRAY(Type, name, len) \ + Type *name = (Type *) scratch; \ + do { \ + unsigned int _consumed = DIV_CEIL ((len) * sizeof (Type), sizeof (*scratch)); \ + assert (_consumed <= scratch_size); \ + scratch += _consumed; \ + scratch_size -= _consumed; \ + } while (0) + +#define utf16_index() var1.u32 + + ALLOCATE_ARRAY (WCHAR, pchars, buffer->len * 2); + + unsigned int chars_len = 0; + for (unsigned int i = 0; i < buffer->len; i++) + { + hb_codepoint_t c = buffer->info[i].codepoint; + buffer->info[i].utf16_index() = chars_len; + if (likely (c <= 0xFFFFu)) + pchars[chars_len++] = c; + else if (unlikely (c > 0x10FFFFu)) + pchars[chars_len++] = 0xFFFDu; + else { + pchars[chars_len++] = 0xD800u + ((c - 0x10000u) >> 10); + pchars[chars_len++] = 0xDC00u + ((c - 0x10000u) & ((1u << 10) - 1)); + } + } + + ALLOCATE_ARRAY (WORD, log_clusters, chars_len); + ALLOCATE_ARRAY (SCRIPT_CHARPROP, char_props, chars_len); + + if (num_features) + { + /* Need log_clusters to assign features. */ + chars_len = 0; + for (unsigned int i = 0; i < buffer->len; i++) + { + hb_codepoint_t c = buffer->info[i].codepoint; + unsigned int cluster = buffer->info[i].cluster; + log_clusters[chars_len++] = cluster; + if (hb_in_range (c, 0x10000u, 0x10FFFFu)) + log_clusters[chars_len++] = cluster; /* Surrogates. */ + } + } + + /* The -2 in the following is to compensate for possible + * alignment needed after the WORD array. sizeof(WORD) == 2. */ + unsigned int glyphs_size = (scratch_size * sizeof (int) - 2) + / (sizeof (WORD) + + sizeof (SCRIPT_GLYPHPROP) + + sizeof (int) + + sizeof (GOFFSET) + + sizeof (uint32_t)); + + ALLOCATE_ARRAY (WORD, glyphs, glyphs_size); + ALLOCATE_ARRAY (SCRIPT_GLYPHPROP, glyph_props, glyphs_size); + ALLOCATE_ARRAY (int, advances, glyphs_size); + ALLOCATE_ARRAY (GOFFSET, offsets, glyphs_size); + ALLOCATE_ARRAY (uint32_t, vis_clusters, glyphs_size); + + /* Note: + * We can't touch the contents of glyph_props. Our fallback + * implementations of Shape and Place functions use that buffer + * by casting it to a different type. It works because they + * both agree about it, but if we want to access it here we + * need address that issue first. + */ + +#undef ALLOCATE_ARRAY + +#define MAX_ITEMS 256 + + SCRIPT_ITEM items[MAX_ITEMS + 1]; + SCRIPT_CONTROL bidi_control = {0}; + SCRIPT_STATE bidi_state = {0}; + ULONG script_tags[MAX_ITEMS]; + int item_count; + + /* MinGW32 doesn't define fMergeNeutralItems, so we bruteforce */ + //bidi_control.fMergeNeutralItems = true; + *(uint32_t*)&bidi_control |= 1u<<24; + + bidi_state.uBidiLevel = HB_DIRECTION_IS_FORWARD (buffer->props.direction) ? 0 : 1; + bidi_state.fOverrideDirection = 1; + + hr = funcs->ScriptItemizeOpenType (pchars, + chars_len, + MAX_ITEMS, + &bidi_control, + &bidi_state, + items, + script_tags, + &item_count); + if (unlikely (FAILED (hr))) + FAIL ("ScriptItemizeOpenType() failed: 0x%08lx", hr); + +#undef MAX_ITEMS + + hb_tag_t lang_tag; + unsigned int lang_count = 1; + hb_ot_tags_from_script_and_language (buffer->props.script, + buffer->props.language, + nullptr, nullptr, + &lang_count, &lang_tag); + OPENTYPE_TAG language_tag = hb_uint32_swap (lang_count ? lang_tag : HB_TAG_NONE); + hb_vector_t<TEXTRANGE_PROPERTIES*> range_properties; + hb_vector_t<int> range_char_counts; + + unsigned int glyphs_offset = 0; + unsigned int glyphs_len; + bool backward = HB_DIRECTION_IS_BACKWARD (buffer->props.direction); + for (int i = 0; i < item_count; i++) + { + unsigned int chars_offset = items[i].iCharPos; + unsigned int item_chars_len = items[i + 1].iCharPos - chars_offset; + + if (num_features) + { + range_properties.shrink (0); + range_char_counts.shrink (0); + + range_record_t *last_range = &range_records[0]; + + for (unsigned int k = chars_offset; k < chars_offset + item_chars_len; k++) + { + range_record_t *range = last_range; + while (log_clusters[k] < range->index_first) + range--; + while (log_clusters[k] > range->index_last) + range++; + if (!range_properties.length || + &range->props != range_properties[range_properties.length - 1]) + { + TEXTRANGE_PROPERTIES **props = range_properties.push (); + int *c = range_char_counts.push (); + if (unlikely (!props || !c)) + { + range_properties.shrink (0); + range_char_counts.shrink (0); + break; + } + *props = &range->props; + *c = 1; + } + else + { + range_char_counts[range_char_counts.length - 1]++; + } + + last_range = range; + } + } + + /* Asking for glyphs in logical order circumvents at least + * one bug in Uniscribe. */ + items[i].a.fLogicalOrder = true; + + retry_shape: + hr = funcs->ScriptShapeOpenType (font_data->hdc, + &font_data->script_cache, + &items[i].a, + script_tags[i], + language_tag, + range_char_counts.arrayZ, + range_properties.arrayZ, + range_properties.length, + pchars + chars_offset, + item_chars_len, + glyphs_size - glyphs_offset, + /* out */ + log_clusters + chars_offset, + char_props + chars_offset, + glyphs + glyphs_offset, + glyph_props + glyphs_offset, + (int *) &glyphs_len); + + if (unlikely (items[i].a.fNoGlyphIndex)) + FAIL ("ScriptShapeOpenType() set fNoGlyphIndex"); + if (unlikely (hr == E_OUTOFMEMORY || hr == E_NOT_SUFFICIENT_BUFFER)) + { + if (unlikely (!buffer->ensure (buffer->allocated * 2))) + FAIL ("Buffer resize failed"); + goto retry; + } + if (unlikely (hr == USP_E_SCRIPT_NOT_IN_FONT)) + { + if (items[i].a.eScript == SCRIPT_UNDEFINED) + FAIL ("ScriptShapeOpenType() failed: Font doesn't support script"); + items[i].a.eScript = SCRIPT_UNDEFINED; + goto retry_shape; + } + if (unlikely (FAILED (hr))) + { + FAIL ("ScriptShapeOpenType() failed: 0x%08lx", hr); + } + + for (unsigned int j = chars_offset; j < chars_offset + item_chars_len; j++) + log_clusters[j] += glyphs_offset; + + hr = funcs->ScriptPlaceOpenType (font_data->hdc, + &font_data->script_cache, + &items[i].a, + script_tags[i], + language_tag, + range_char_counts.arrayZ, + range_properties.arrayZ, + range_properties.length, + pchars + chars_offset, + log_clusters + chars_offset, + char_props + chars_offset, + item_chars_len, + glyphs + glyphs_offset, + glyph_props + glyphs_offset, + glyphs_len, + /* out */ + advances + glyphs_offset, + offsets + glyphs_offset, + nullptr); + if (unlikely (FAILED (hr))) + FAIL ("ScriptPlaceOpenType() failed: 0x%08lx", hr); + + if (DEBUG_ENABLED (UNISCRIBE)) + fprintf (stderr, "Item %d RTL %d LayoutRTL %d LogicalOrder %d ScriptTag %c%c%c%c\n", + i, + items[i].a.fRTL, + items[i].a.fLayoutRTL, + items[i].a.fLogicalOrder, + HB_UNTAG (hb_uint32_swap (script_tags[i]))); + + glyphs_offset += glyphs_len; + } + glyphs_len = glyphs_offset; + + /* Ok, we've got everything we need, now compose output buffer, + * very, *very*, carefully! */ + + /* Calculate visual-clusters. That's what we ship. */ + for (unsigned int i = 0; i < glyphs_len; i++) + vis_clusters[i] = (uint32_t) -1; + for (unsigned int i = 0; i < buffer->len; i++) { + uint32_t *p = &vis_clusters[log_clusters[buffer->info[i].utf16_index()]]; + *p = hb_min (*p, buffer->info[i].cluster); + } + for (unsigned int i = 1; i < glyphs_len; i++) + if (vis_clusters[i] == (uint32_t) -1) + vis_clusters[i] = vis_clusters[i - 1]; + +#undef utf16_index + + if (unlikely (!buffer->ensure (glyphs_len))) + FAIL ("Buffer in error"); + +#undef FAIL + + /* Set glyph infos */ + buffer->len = 0; + for (unsigned int i = 0; i < glyphs_len; i++) + { + hb_glyph_info_t *info = &buffer->info[buffer->len++]; + + info->codepoint = glyphs[i]; + info->cluster = vis_clusters[i]; + + /* The rest is crap. Let's store position info there for now. */ + info->mask = advances[i]; + info->var1.i32 = offsets[i].du; + info->var2.i32 = offsets[i].dv; + } + + /* Set glyph positions */ + buffer->clear_positions (); + double x_mult = font_data->x_mult, y_mult = font_data->y_mult; + for (unsigned int i = 0; i < glyphs_len; i++) + { + hb_glyph_info_t *info = &buffer->info[i]; + hb_glyph_position_t *pos = &buffer->pos[i]; + + /* TODO vertical */ + pos->x_advance = x_mult * (int32_t) info->mask; + pos->x_offset = x_mult * (backward ? -info->var1.i32 : info->var1.i32); + pos->y_offset = y_mult * info->var2.i32; + } + + if (backward) + hb_buffer_reverse (buffer); + + buffer->unsafe_to_break_all (); + + /* Wow, done! */ + return true; +} + + +#endif diff --git a/thirdparty/harfbuzz/src/hb-uniscribe.h b/thirdparty/harfbuzz/src/hb-uniscribe.h new file mode 100644 index 0000000000..4e4ef9986a --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-uniscribe.h @@ -0,0 +1,46 @@ +/* + * Copyright © 2011 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_UNISCRIBE_H +#define HB_UNISCRIBE_H + +#include "hb.h" + +#include <windows.h> + +HB_BEGIN_DECLS + + +HB_EXTERN LOGFONTW * +hb_uniscribe_font_get_logfontw (hb_font_t *font); + +HB_EXTERN HFONT +hb_uniscribe_font_get_hfont (hb_font_t *font); + + +HB_END_DECLS + +#endif /* HB_UNISCRIBE_H */ diff --git a/thirdparty/harfbuzz/src/hb-utf.hh b/thirdparty/harfbuzz/src/hb-utf.hh new file mode 100644 index 0000000000..ff5712d16d --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-utf.hh @@ -0,0 +1,453 @@ +/* + * Copyright © 2011,2012,2014 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_UTF_HH +#define HB_UTF_HH + +#include "hb.hh" + +#include "hb-open-type.hh" + + +struct hb_utf8_t +{ + typedef uint8_t codepoint_t; + + static const codepoint_t * + next (const codepoint_t *text, + const codepoint_t *end, + hb_codepoint_t *unicode, + hb_codepoint_t replacement) + { + /* Written to only accept well-formed sequences. + * Based on ideas from ICU's U8_NEXT. + * Generates one "replacement" for each ill-formed byte. */ + + hb_codepoint_t c = *text++; + + if (c > 0x7Fu) + { + if (hb_in_range<hb_codepoint_t> (c, 0xC2u, 0xDFu)) /* Two-byte */ + { + unsigned int t1; + if (likely (text < end && + (t1 = text[0] - 0x80u) <= 0x3Fu)) + { + c = ((c&0x1Fu)<<6) | t1; + text++; + } + else + goto error; + } + else if (hb_in_range<hb_codepoint_t> (c, 0xE0u, 0xEFu)) /* Three-byte */ + { + unsigned int t1, t2; + if (likely (1 < end - text && + (t1 = text[0] - 0x80u) <= 0x3Fu && + (t2 = text[1] - 0x80u) <= 0x3Fu)) + { + c = ((c&0xFu)<<12) | (t1<<6) | t2; + if (unlikely (c < 0x0800u || hb_in_range<hb_codepoint_t> (c, 0xD800u, 0xDFFFu))) + goto error; + text += 2; + } + else + goto error; + } + else if (hb_in_range<hb_codepoint_t> (c, 0xF0u, 0xF4u)) /* Four-byte */ + { + unsigned int t1, t2, t3; + if (likely (2 < end - text && + (t1 = text[0] - 0x80u) <= 0x3Fu && + (t2 = text[1] - 0x80u) <= 0x3Fu && + (t3 = text[2] - 0x80u) <= 0x3Fu)) + { + c = ((c&0x7u)<<18) | (t1<<12) | (t2<<6) | t3; + if (unlikely (!hb_in_range<hb_codepoint_t> (c, 0x10000u, 0x10FFFFu))) + goto error; + text += 3; + } + else + goto error; + } + else + goto error; + } + + *unicode = c; + return text; + + error: + *unicode = replacement; + return text; + } + + static const codepoint_t * + prev (const codepoint_t *text, + const codepoint_t *start, + hb_codepoint_t *unicode, + hb_codepoint_t replacement) + { + const codepoint_t *end = text--; + while (start < text && (*text & 0xc0) == 0x80 && end - text < 4) + text--; + + if (likely (next (text, end, unicode, replacement) == end)) + return text; + + *unicode = replacement; + return end - 1; + } + + static unsigned int + strlen (const codepoint_t *text) + { return ::strlen ((const char *) text); } + + static unsigned int + encode_len (hb_codepoint_t unicode) + { + if (unicode < 0x0080u) return 1; + if (unicode < 0x0800u) return 2; + if (unicode < 0x10000u) return 3; + if (unicode < 0x110000u) return 4; + return 3; + } + + static codepoint_t * + encode (codepoint_t *text, + const codepoint_t *end, + hb_codepoint_t unicode) + { + if (unlikely (unicode >= 0xD800u && (unicode <= 0xDFFFu || unicode > 0x10FFFFu))) + unicode = 0xFFFDu; + if (unicode < 0x0080u) + *text++ = unicode; + else if (unicode < 0x0800u) + { + if (end - text >= 2) + { + *text++ = 0xC0u + (0x1Fu & (unicode >> 6)); + *text++ = 0x80u + (0x3Fu & (unicode )); + } + } + else if (unicode < 0x10000u) + { + if (end - text >= 3) + { + *text++ = 0xE0u + (0x0Fu & (unicode >> 12)); + *text++ = 0x80u + (0x3Fu & (unicode >> 6)); + *text++ = 0x80u + (0x3Fu & (unicode )); + } + } + else + { + if (end - text >= 4) + { + *text++ = 0xF0u + (0x07u & (unicode >> 18)); + *text++ = 0x80u + (0x3Fu & (unicode >> 12)); + *text++ = 0x80u + (0x3Fu & (unicode >> 6)); + *text++ = 0x80u + (0x3Fu & (unicode )); + } + } + return text; + } +}; + + +template <typename TCodepoint> +struct hb_utf16_xe_t +{ + static_assert (sizeof (TCodepoint) == 2, ""); + typedef TCodepoint codepoint_t; + + static const codepoint_t * + next (const codepoint_t *text, + const codepoint_t *end, + hb_codepoint_t *unicode, + hb_codepoint_t replacement) + { + hb_codepoint_t c = *text++; + + if (likely (!hb_in_range<hb_codepoint_t> (c, 0xD800u, 0xDFFFu))) + { + *unicode = c; + return text; + } + + if (likely (c <= 0xDBFFu && text < end)) + { + /* High-surrogate in c */ + hb_codepoint_t l = *text; + if (likely (hb_in_range<hb_codepoint_t> (l, 0xDC00u, 0xDFFFu))) + { + /* Low-surrogate in l */ + *unicode = (c << 10) + l - ((0xD800u << 10) - 0x10000u + 0xDC00u); + text++; + return text; + } + } + + /* Lonely / out-of-order surrogate. */ + *unicode = replacement; + return text; + } + + static const codepoint_t * + prev (const codepoint_t *text, + const codepoint_t *start, + hb_codepoint_t *unicode, + hb_codepoint_t replacement) + { + hb_codepoint_t c = *--text; + + if (likely (!hb_in_range<hb_codepoint_t> (c, 0xD800u, 0xDFFFu))) + { + *unicode = c; + return text; + } + + if (likely (c >= 0xDC00u && start < text)) + { + /* Low-surrogate in c */ + hb_codepoint_t h = text[-1]; + if (likely (hb_in_range<hb_codepoint_t> (h, 0xD800u, 0xDBFFu))) + { + /* High-surrogate in h */ + *unicode = (h << 10) + c - ((0xD800u << 10) - 0x10000u + 0xDC00u); + text--; + return text; + } + } + + /* Lonely / out-of-order surrogate. */ + *unicode = replacement; + return text; + } + + + static unsigned int + strlen (const codepoint_t *text) + { + unsigned int l = 0; + while (*text++) l++; + return l; + } + + static unsigned int + encode_len (hb_codepoint_t unicode) + { + return unicode < 0x10000 ? 1 : 2; + } + + static codepoint_t * + encode (codepoint_t *text, + const codepoint_t *end, + hb_codepoint_t unicode) + { + if (unlikely (unicode >= 0xD800u && (unicode <= 0xDFFFu || unicode > 0x10FFFFu))) + unicode = 0xFFFDu; + if (unicode < 0x10000u) + *text++ = unicode; + else if (end - text >= 2) + { + unicode -= 0x10000u; + *text++ = 0xD800u + (unicode >> 10); + *text++ = 0xDC00u + (unicode & 0x03FFu); + } + return text; + } +}; + +typedef hb_utf16_xe_t<uint16_t> hb_utf16_t; +typedef hb_utf16_xe_t<OT::HBUINT16> hb_utf16_be_t; + + +template <typename TCodepoint, bool validate=true> +struct hb_utf32_xe_t +{ + static_assert (sizeof (TCodepoint) == 4, ""); + typedef TCodepoint codepoint_t; + + static const TCodepoint * + next (const TCodepoint *text, + const TCodepoint *end HB_UNUSED, + hb_codepoint_t *unicode, + hb_codepoint_t replacement) + { + hb_codepoint_t c = *unicode = *text++; + if (validate && unlikely (c >= 0xD800u && (c <= 0xDFFFu || c > 0x10FFFFu))) + *unicode = replacement; + return text; + } + + static const TCodepoint * + prev (const TCodepoint *text, + const TCodepoint *start HB_UNUSED, + hb_codepoint_t *unicode, + hb_codepoint_t replacement) + { + hb_codepoint_t c = *unicode = *--text; + if (validate && unlikely (c >= 0xD800u && (c <= 0xDFFFu || c > 0x10FFFFu))) + *unicode = replacement; + return text; + } + + static unsigned int + strlen (const TCodepoint *text) + { + unsigned int l = 0; + while (*text++) l++; + return l; + } + + static unsigned int + encode_len (hb_codepoint_t unicode HB_UNUSED) + { + return 1; + } + + static codepoint_t * + encode (codepoint_t *text, + const codepoint_t *end HB_UNUSED, + hb_codepoint_t unicode) + { + if (validate && unlikely (unicode >= 0xD800u && (unicode <= 0xDFFFu || unicode > 0x10FFFFu))) + unicode = 0xFFFDu; + *text++ = unicode; + return text; + } +}; + +typedef hb_utf32_xe_t<uint32_t> hb_utf32_t; +typedef hb_utf32_xe_t<uint32_t, false> hb_utf32_novalidate_t; + + +struct hb_latin1_t +{ + typedef uint8_t codepoint_t; + + static const codepoint_t * + next (const codepoint_t *text, + const codepoint_t *end HB_UNUSED, + hb_codepoint_t *unicode, + hb_codepoint_t replacement HB_UNUSED) + { + *unicode = *text++; + return text; + } + + static const codepoint_t * + prev (const codepoint_t *text, + const codepoint_t *start HB_UNUSED, + hb_codepoint_t *unicode, + hb_codepoint_t replacement HB_UNUSED) + { + *unicode = *--text; + return text; + } + + static unsigned int + strlen (const codepoint_t *text) + { + unsigned int l = 0; + while (*text++) l++; + return l; + } + + static unsigned int + encode_len (hb_codepoint_t unicode HB_UNUSED) + { + return 1; + } + + static codepoint_t * + encode (codepoint_t *text, + const codepoint_t *end HB_UNUSED, + hb_codepoint_t unicode) + { + if (unlikely (unicode >= 0x0100u)) + unicode = '?'; + *text++ = unicode; + return text; + } +}; + + +struct hb_ascii_t +{ + typedef uint8_t codepoint_t; + + static const codepoint_t * + next (const codepoint_t *text, + const codepoint_t *end HB_UNUSED, + hb_codepoint_t *unicode, + hb_codepoint_t replacement HB_UNUSED) + { + *unicode = *text++; + if (*unicode >= 0x0080u) + *unicode = replacement; + return text; + } + + static const codepoint_t * + prev (const codepoint_t *text, + const codepoint_t *start HB_UNUSED, + hb_codepoint_t *unicode, + hb_codepoint_t replacement) + { + *unicode = *--text; + if (*unicode >= 0x0080u) + *unicode = replacement; + return text; + } + + static unsigned int + strlen (const codepoint_t *text) + { + unsigned int l = 0; + while (*text++) l++; + return l; + } + + static unsigned int + encode_len (hb_codepoint_t unicode HB_UNUSED) + { + return 1; + } + + static codepoint_t * + encode (codepoint_t *text, + const codepoint_t *end HB_UNUSED, + hb_codepoint_t unicode) + { + if (unlikely (unicode >= 0x0080u)) + unicode = '?'; + *text++ = unicode; + return text; + } +}; + +#endif /* HB_UTF_HH */ diff --git a/thirdparty/harfbuzz/src/hb-vector.hh b/thirdparty/harfbuzz/src/hb-vector.hh new file mode 100644 index 0000000000..079b94a6b4 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-vector.hh @@ -0,0 +1,313 @@ +/* + * Copyright © 2017,2018 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_VECTOR_HH +#define HB_VECTOR_HH + +#include "hb.hh" +#include "hb-array.hh" +#include "hb-null.hh" + + +template <typename Type> +struct hb_vector_t +{ + typedef Type item_t; + static constexpr unsigned item_size = hb_static_size (Type); + + hb_vector_t () { init (); } + hb_vector_t (const hb_vector_t &o) + { + init (); + alloc (o.length); + hb_copy (o, *this); + } + hb_vector_t (hb_vector_t &&o) + { + allocated = o.allocated; + length = o.length; + arrayZ = o.arrayZ; + o.init (); + } + ~hb_vector_t () { fini (); } + + private: + int allocated; /* == -1 means allocation failed. */ + public: + unsigned int length; + public: + Type *arrayZ; + + void init () + { + allocated = length = 0; + arrayZ = nullptr; + } + + void fini () + { + free (arrayZ); + init (); + } + void fini_deep () + { + unsigned int count = length; + for (unsigned int i = 0; i < count; i++) + arrayZ[i].fini (); + fini (); + } + + void reset () { resize (0); } + + hb_vector_t& operator = (const hb_vector_t &o) + { + reset (); + alloc (o.length); + hb_copy (o, *this); + return *this; + } + hb_vector_t& operator = (hb_vector_t &&o) + { + fini (); + allocated = o.allocated; + length = o.length; + arrayZ = o.arrayZ; + o.init (); + return *this; + } + + hb_bytes_t as_bytes () const + { return hb_bytes_t ((const char *) arrayZ, length * item_size); } + + bool operator == (const hb_vector_t &o) const { return as_array () == o.as_array (); } + bool operator != (const hb_vector_t &o) const { return !(*this == o); } + uint32_t hash () const { return as_array ().hash (); } + + Type& operator [] (int i_) + { + unsigned int i = (unsigned int) i_; + if (unlikely (i >= length)) + return Crap (Type); + return arrayZ[i]; + } + const Type& operator [] (int i_) const + { + unsigned int i = (unsigned int) i_; + if (unlikely (i >= length)) + return Null (Type); + return arrayZ[i]; + } + + Type& tail () { return (*this)[length - 1]; } + const Type& tail () const { return (*this)[length - 1]; } + + explicit operator bool () const { return length; } + unsigned get_size () const { return length * item_size; } + + /* Sink interface. */ + template <typename T> + hb_vector_t& operator << (T&& v) { push (hb_forward<T> (v)); return *this; } + + hb_array_t< Type> as_array () { return hb_array (arrayZ, length); } + hb_array_t<const Type> as_array () const { return hb_array (arrayZ, length); } + + /* Iterator. */ + typedef hb_array_t<const Type> iter_t; + typedef hb_array_t< Type> writer_t; + iter_t iter () const { return as_array (); } + writer_t writer () { return as_array (); } + operator iter_t () const { return iter (); } + operator writer_t () { return writer (); } + + hb_array_t<const Type> sub_array (unsigned int start_offset, unsigned int count) const + { return as_array ().sub_array (start_offset, count); } + hb_array_t<const Type> sub_array (unsigned int start_offset, unsigned int *count = nullptr /* IN/OUT */) const + { return as_array ().sub_array (start_offset, count); } + hb_array_t<Type> sub_array (unsigned int start_offset, unsigned int count) + { return as_array ().sub_array (start_offset, count); } + hb_array_t<Type> sub_array (unsigned int start_offset, unsigned int *count = nullptr /* IN/OUT */) + { return as_array ().sub_array (start_offset, count); } + + hb_sorted_array_t<Type> as_sorted_array () + { return hb_sorted_array (arrayZ, length); } + hb_sorted_array_t<const Type> as_sorted_array () const + { return hb_sorted_array (arrayZ, length); } + + template <typename T> explicit operator T * () { return arrayZ; } + template <typename T> explicit operator const T * () const { return arrayZ; } + + Type * operator + (unsigned int i) { return arrayZ + i; } + const Type * operator + (unsigned int i) const { return arrayZ + i; } + + Type *push () + { + if (unlikely (!resize (length + 1))) + return &Crap (Type); + return &arrayZ[length - 1]; + } + template <typename T> + Type *push (T&& v) + { + Type *p = push (); + *p = hb_forward<T> (v); + return p; + } + + bool in_error () const { return allocated < 0; } + + /* Allocate for size but don't adjust length. */ + bool alloc (unsigned int size) + { + if (unlikely (allocated < 0)) + return false; + + if (likely (size <= (unsigned) allocated)) + return true; + + /* Reallocate */ + + unsigned int new_allocated = allocated; + while (size >= new_allocated) + new_allocated += (new_allocated >> 1) + 8; + + Type *new_array = nullptr; + bool overflows = + (int) new_allocated < 0 || + (new_allocated < (unsigned) allocated) || + hb_unsigned_mul_overflows (new_allocated, sizeof (Type)); + if (likely (!overflows)) + new_array = (Type *) realloc (arrayZ, new_allocated * sizeof (Type)); + + if (unlikely (!new_array)) + { + allocated = -1; + return false; + } + + arrayZ = new_array; + allocated = new_allocated; + + return true; + } + + bool resize (int size_) + { + unsigned int size = size_ < 0 ? 0u : (unsigned int) size_; + if (!alloc (size)) + return false; + + if (size > length) + memset (arrayZ + length, 0, (size - length) * sizeof (*arrayZ)); + + length = size; + return true; + } + + Type pop () + { + if (!length) return Null (Type); + return hb_move (arrayZ[--length]); /* Does this move actually work? */ + } + + void remove (unsigned int i) + { + if (unlikely (i >= length)) + return; + memmove (static_cast<void *> (&arrayZ[i]), + static_cast<void *> (&arrayZ[i + 1]), + (length - i - 1) * sizeof (Type)); + length--; + } + + void shrink (int size_) + { + unsigned int size = size_ < 0 ? 0u : (unsigned int) size_; + if (size < length) + length = size; + } + + template <typename T> + Type *find (T v) + { + for (unsigned int i = 0; i < length; i++) + if (arrayZ[i] == v) + return &arrayZ[i]; + return nullptr; + } + template <typename T> + const Type *find (T v) const + { + for (unsigned int i = 0; i < length; i++) + if (arrayZ[i] == v) + return &arrayZ[i]; + return nullptr; + } + + void qsort (int (*cmp)(const void*, const void*)) + { as_array ().qsort (cmp); } + void qsort (unsigned int start = 0, unsigned int end = (unsigned int) -1) + { as_array ().qsort (start, end); } + + template <typename T> + Type *lsearch (const T &x, Type *not_found = nullptr) + { return as_array ().lsearch (x, not_found); } + template <typename T> + const Type *lsearch (const T &x, const Type *not_found = nullptr) const + { return as_array ().lsearch (x, not_found); } + template <typename T> + bool lfind (const T &x, unsigned *pos = nullptr) const + { return as_array ().lfind (x, pos); } +}; + +template <typename Type> +struct hb_sorted_vector_t : hb_vector_t<Type> +{ + hb_sorted_array_t< Type> as_array () { return hb_sorted_array (this->arrayZ, this->length); } + hb_sorted_array_t<const Type> as_array () const { return hb_sorted_array (this->arrayZ, this->length); } + + /* Iterator. */ + typedef hb_sorted_array_t<const Type> const_iter_t; + typedef hb_sorted_array_t< Type> iter_t; + const_iter_t iter () const { return as_array (); } + const_iter_t citer () const { return as_array (); } + iter_t iter () { return as_array (); } + operator iter_t () { return iter (); } + operator const_iter_t () const { return iter (); } + + template <typename T> + Type *bsearch (const T &x, Type *not_found = nullptr) + { return as_array ().bsearch (x, not_found); } + template <typename T> + const Type *bsearch (const T &x, const Type *not_found = nullptr) const + { return as_array ().bsearch (x, not_found); } + template <typename T> + bool bfind (const T &x, unsigned int *i = nullptr, + hb_bfind_not_found_t not_found = HB_BFIND_NOT_FOUND_DONT_STORE, + unsigned int to_store = (unsigned int) -1) const + { return as_array ().bfind (x, i, not_found, to_store); } +}; + +#endif /* HB_VECTOR_HH */ diff --git a/thirdparty/harfbuzz/src/hb-version.h b/thirdparty/harfbuzz/src/hb-version.h new file mode 100644 index 0000000000..92d61b8cdb --- /dev/null +++ b/thirdparty/harfbuzz/src/hb-version.h @@ -0,0 +1,66 @@ +/* + * Copyright © 2011 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_H_IN +#error "Include <hb.h> instead." +#endif + +#ifndef HB_VERSION_H +#define HB_VERSION_H + +#include "hb-common.h" + +HB_BEGIN_DECLS + + +#define HB_VERSION_MAJOR 2 +#define HB_VERSION_MINOR 7 +#define HB_VERSION_MICRO 2 + +#define HB_VERSION_STRING "2.7.2" + +#define HB_VERSION_ATLEAST(major,minor,micro) \ + ((major)*10000+(minor)*100+(micro) <= \ + HB_VERSION_MAJOR*10000+HB_VERSION_MINOR*100+HB_VERSION_MICRO) + + +HB_EXTERN void +hb_version (unsigned int *major, + unsigned int *minor, + unsigned int *micro); + +HB_EXTERN const char * +hb_version_string (void); + +HB_EXTERN hb_bool_t +hb_version_atleast (unsigned int major, + unsigned int minor, + unsigned int micro); + + +HB_END_DECLS + +#endif /* HB_VERSION_H */ diff --git a/thirdparty/harfbuzz/src/hb.h b/thirdparty/harfbuzz/src/hb.h new file mode 100644 index 0000000000..360686ca68 --- /dev/null +++ b/thirdparty/harfbuzz/src/hb.h @@ -0,0 +1,50 @@ +/* + * Copyright © 2009 Red Hat, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Red Hat Author(s): Behdad Esfahbod + */ + +#ifndef HB_H +#define HB_H +#define HB_H_IN + +#include "hb-blob.h" +#include "hb-buffer.h" +#include "hb-common.h" +#include "hb-deprecated.h" +#include "hb-draw.h" +#include "hb-face.h" +#include "hb-font.h" +#include "hb-map.h" +#include "hb-set.h" +#include "hb-shape.h" +#include "hb-shape-plan.h" +#include "hb-style.h" +#include "hb-unicode.h" +#include "hb-version.h" + +HB_BEGIN_DECLS +HB_END_DECLS + +#undef HB_H_IN +#endif /* HB_H */ diff --git a/thirdparty/harfbuzz/src/hb.hh b/thirdparty/harfbuzz/src/hb.hh new file mode 100644 index 0000000000..274a0e98db --- /dev/null +++ b/thirdparty/harfbuzz/src/hb.hh @@ -0,0 +1,634 @@ +/* + * Copyright © 2007,2008,2009 Red Hat, Inc. + * Copyright © 2011,2012 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Red Hat Author(s): Behdad Esfahbod + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_HH +#define HB_HH + + +#ifndef HB_NO_PRAGMA_GCC_DIAGNOSTIC +#ifdef _MSC_VER +#pragma warning( disable: 4068 ) /* Unknown pragma */ +#endif +#if defined(__GNUC__) || defined(__clang__) +/* Rules: + * + * - All pragmas are declared GCC even if they are clang ones. Otherwise GCC + * nags, even though we instruct it to ignore -Wunknown-pragmas. ¯\_(ツ)_/¯ + * + * - Within each category, keep sorted. + * + * - Warnings whose scope can be expanded in future compiler versions shall + * be declared as "warning". Otherwise, either ignored or error. + */ + +/* Setup. Don't sort order within this category. */ +#ifndef HB_NO_PRAGMA_GCC_DIAGNOSTIC_WARNING +#pragma GCC diagnostic warning "-Wall" +#pragma GCC diagnostic warning "-Wextra" +#endif +#ifndef HB_NO_PRAGMA_GCC_DIAGNOSTIC_IGNORED +#pragma GCC diagnostic ignored "-Wpragmas" +#pragma GCC diagnostic ignored "-Wunknown-pragmas" +#pragma GCC diagnostic ignored "-Wunknown-warning-option" +#endif +#ifndef HB_NO_PRAGMA_GCC_DIAGNOSTIC_WARNING +//#pragma GCC diagnostic warning "-Weverything" +#endif + +/* Error. Should never happen. */ +#ifndef HB_NO_PRAGMA_GCC_DIAGNOSTIC_ERROR +#pragma GCC diagnostic error "-Wc++11-narrowing" +#pragma GCC diagnostic error "-Wcast-align" +#pragma GCC diagnostic error "-Wcast-function-type" +#pragma GCC diagnostic error "-Wdelete-non-virtual-dtor" +#pragma GCC diagnostic error "-Wembedded-directive" +#pragma GCC diagnostic error "-Wextra-semi-stmt" +#pragma GCC diagnostic error "-Wformat-security" +#pragma GCC diagnostic error "-Wimplicit-function-declaration" +#pragma GCC diagnostic error "-Winit-self" +#pragma GCC diagnostic error "-Winjected-class-name" +#pragma GCC diagnostic error "-Wmissing-braces" +#pragma GCC diagnostic error "-Wmissing-declarations" +#pragma GCC diagnostic error "-Wmissing-prototypes" +#pragma GCC diagnostic error "-Wnested-externs" +#pragma GCC diagnostic error "-Wold-style-definition" +#pragma GCC diagnostic error "-Wpointer-arith" +#pragma GCC diagnostic error "-Wredundant-decls" +#pragma GCC diagnostic error "-Wreorder" +#pragma GCC diagnostic error "-Wsign-compare" +#pragma GCC diagnostic error "-Wstrict-prototypes" +#pragma GCC diagnostic error "-Wstring-conversion" +#pragma GCC diagnostic error "-Wswitch-enum" +#pragma GCC diagnostic error "-Wtautological-overlap-compare" +#pragma GCC diagnostic error "-Wunneeded-internal-declaration" +#pragma GCC diagnostic error "-Wunused" +#pragma GCC diagnostic error "-Wunused-local-typedefs" +#pragma GCC diagnostic error "-Wunused-value" +#pragma GCC diagnostic error "-Wunused-variable" +#pragma GCC diagnostic error "-Wvla" +#pragma GCC diagnostic error "-Wwrite-strings" +#endif + +/* Warning. To be investigated if happens. */ +#ifndef HB_NO_PRAGMA_GCC_DIAGNOSTIC_WARNING +#pragma GCC diagnostic warning "-Wbuiltin-macro-redefined" +#pragma GCC diagnostic warning "-Wdeprecated" +#pragma GCC diagnostic warning "-Wdeprecated-declarations" +#pragma GCC diagnostic warning "-Wdisabled-optimization" +#pragma GCC diagnostic warning "-Wdouble-promotion" +#pragma GCC diagnostic warning "-Wformat=2" +#pragma GCC diagnostic warning "-Wignored-pragma-optimize" +#pragma GCC diagnostic warning "-Wlogical-op" +#pragma GCC diagnostic warning "-Wmaybe-uninitialized" +#pragma GCC diagnostic warning "-Wmissing-format-attribute" +#pragma GCC diagnostic warning "-Wundef" +#pragma GCC diagnostic warning "-Wunused-but-set-variable" +#endif + +/* Ignored currently, but should be fixed at some point. */ +#ifndef HB_NO_PRAGMA_GCC_DIAGNOSTIC_IGNORED +#pragma GCC diagnostic ignored "-Wconversion" // TODO fix +#pragma GCC diagnostic ignored "-Wformat-signedness" // TODO fix +#pragma GCC diagnostic ignored "-Wshadow" // TODO fix +#pragma GCC diagnostic ignored "-Wunsafe-loop-optimizations" // TODO fix +#pragma GCC diagnostic ignored "-Wunused-parameter" // TODO fix +#endif + +/* Ignored intentionally. */ +#ifndef HB_NO_PRAGMA_GCC_DIAGNOSTIC_IGNORED +#pragma GCC diagnostic ignored "-Wclass-memaccess" +#pragma GCC diagnostic ignored "-Wformat-nonliteral" +#pragma GCC diagnostic ignored "-Wformat-zero-length" +#pragma GCC diagnostic ignored "-Wmissing-field-initializers" +#pragma GCC diagnostic ignored "-Wpacked" // Erratic impl in clang +#pragma GCC diagnostic ignored "-Wstrict-aliasing" +#pragma GCC diagnostic ignored "-Wtype-limits" +#pragma GCC diagnostic ignored "-Wc++11-compat" // only gcc raises it +#endif + +#endif +#endif + + +#include "hb-config.hh" + + +/* + * Following added based on what AC_USE_SYSTEM_EXTENSIONS adds to + * config.h.in. Copied here for the convenience of those embedding + * HarfBuzz and not using our build system. + */ +/* Enable extensions on AIX 3, Interix. */ +#ifndef _ALL_SOURCE +# define _ALL_SOURCE 1 +#endif +/* Enable GNU extensions on systems that have them. */ +#ifndef _GNU_SOURCE +# define _GNU_SOURCE 1 +#endif +/* Enable threading extensions on Solaris. */ +#ifndef _POSIX_PTHREAD_SEMANTICS +# define _POSIX_PTHREAD_SEMANTICS 1 +#endif +/* Enable extensions on HP NonStop. */ +#ifndef _TANDEM_SOURCE +# define _TANDEM_SOURCE 1 +#endif +/* Enable general extensions on Solaris. */ +#ifndef __EXTENSIONS__ +# define __EXTENSIONS__ 1 +#endif + +#if defined (_MSC_VER) && defined (HB_DLL_EXPORT) +#define HB_EXTERN __declspec (dllexport) extern +#endif + +#include "hb.h" +#define HB_H_IN +#include "hb-ot.h" +#define HB_OT_H_IN +#include "hb-aat.h" +#define HB_AAT_H_IN + +#include <limits.h> +#include <math.h> +#include <float.h> +#include <stdlib.h> +#include <stddef.h> +#include <string.h> +#include <assert.h> +#include <stdio.h> +#include <stdarg.h> + +#if (defined(_MSC_VER) && _MSC_VER >= 1500) || defined(__MINGW32__) +#ifdef __MINGW32_VERSION +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN 1 +#endif +#else +#include <intrin.h> +#endif +#endif + +#ifdef _WIN32 +#include <windows.h> +#include <winapifamily.h> +#endif + +#define HB_PASTE1(a,b) a##b +#define HB_PASTE(a,b) HB_PASTE1(a,b) + + +/* Compile-time custom allocator support. */ + +#if !defined(HB_CUSTOM_MALLOC) \ + && defined(hb_malloc_impl) \ + && defined(hb_calloc_impl) \ + && defined(hb_realloc_impl) \ + && defined(hb_free_impl) +#define HB_CUSTOM_MALLOC +#endif + +#ifdef HB_CUSTOM_MALLOC +extern "C" void* hb_malloc_impl(size_t size); +extern "C" void* hb_calloc_impl(size_t nmemb, size_t size); +extern "C" void* hb_realloc_impl(void *ptr, size_t size); +extern "C" void hb_free_impl(void *ptr); +#define malloc hb_malloc_impl +#define calloc hb_calloc_impl +#define realloc hb_realloc_impl +#define free hb_free_impl +#endif + + +/* + * Compiler attributes + */ + +#if (defined(__GNUC__) || defined(__clang__)) && defined(__OPTIMIZE__) +#define likely(expr) (__builtin_expect (!!(expr), 1)) +#define unlikely(expr) (__builtin_expect (!!(expr), 0)) +#else +#define likely(expr) (expr) +#define unlikely(expr) (expr) +#endif + +#if !defined(__GNUC__) && !defined(__clang__) +#undef __attribute__ +#define __attribute__(x) +#endif + +#if defined(__GNUC__) && (__GNUC__ >= 3) +#define HB_PURE_FUNC __attribute__((pure)) +#define HB_CONST_FUNC __attribute__((const)) +#define HB_PRINTF_FUNC(format_idx, arg_idx) __attribute__((__format__ (__printf__, format_idx, arg_idx))) +#else +#define HB_PURE_FUNC +#define HB_CONST_FUNC +#define HB_PRINTF_FUNC(format_idx, arg_idx) +#endif +#if defined(__GNUC__) && (__GNUC__ >= 4) || (__clang__) +#define HB_UNUSED __attribute__((unused)) +#elif defined(_MSC_VER) /* https://github.com/harfbuzz/harfbuzz/issues/635 */ +#define HB_UNUSED __pragma(warning(suppress: 4100 4101)) +#else +#define HB_UNUSED +#endif + +#ifndef HB_INTERNAL +# if !defined(HB_NO_VISIBILITY) && !defined(__MINGW32__) && !defined(__CYGWIN__) && !defined(_MSC_VER) && !defined(__SUNPRO_CC) +# define HB_INTERNAL __attribute__((__visibility__("hidden"))) +# elif defined(__MINGW32__) + /* We use -export-symbols on mingw32, since it does not support visibility attributes. */ +# define HB_INTERNAL +# elif defined (_MSC_VER) && defined (HB_DLL_EXPORT) + /* We do not try to export internal symbols on Visual Studio */ +# define HB_INTERNAL +#else +# define HB_INTERNAL +# define HB_NO_VISIBILITY 1 +# endif +#endif + +/* https://github.com/harfbuzz/harfbuzz/issues/1651 */ +#if defined(__clang__) && __clang_major__ < 10 +#define static_const static +#else +#define static_const static const +#endif + +#if defined(__GNUC__) && (__GNUC__ >= 3) +#define HB_FUNC __PRETTY_FUNCTION__ +#elif defined(_MSC_VER) +#define HB_FUNC __FUNCSIG__ +#else +#define HB_FUNC __func__ +#endif + +#if defined(__SUNPRO_CC) && (__SUNPRO_CC < 0x5140) +/* https://github.com/harfbuzz/harfbuzz/issues/630 */ +#define __restrict +#endif + +/* + * Borrowed from https://bugzilla.mozilla.org/show_bug.cgi?id=1215411 + * HB_FALLTHROUGH is an annotation to suppress compiler warnings about switch + * cases that fall through without a break or return statement. HB_FALLTHROUGH + * is only needed on cases that have code: + * + * switch (foo) { + * case 1: // These cases have no code. No fallthrough annotations are needed. + * case 2: + * case 3: + * foo = 4; // This case has code, so a fallthrough annotation is needed: + * HB_FALLTHROUGH; + * default: + * return foo; + * } + */ +#if defined(__clang__) && __cplusplus >= 201103L + /* clang's fallthrough annotations are only available starting in C++11. */ +# define HB_FALLTHROUGH [[clang::fallthrough]] +#elif defined(__GNUC__) && (__GNUC__ >= 7) + /* GNU fallthrough attribute is available from GCC7 */ +# define HB_FALLTHROUGH __attribute__((fallthrough)) +#elif defined(_MSC_VER) + /* + * MSVC's __fallthrough annotations are checked by /analyze (Code Analysis): + * https://msdn.microsoft.com/en-us/library/ms235402%28VS.80%29.aspx + */ +# include <sal.h> +# define HB_FALLTHROUGH __fallthrough +#else +# define HB_FALLTHROUGH /* FALLTHROUGH */ +#endif + +/* A tag to enforce use of return value for a function */ +#if __cplusplus >= 201703L +# define HB_NODISCARD [[nodiscard]] +#elif defined(__GNUC__) || defined(__clang__) +# define HB_NODISCARD __attribute__((warn_unused_result)) +#elif defined(_MSC_VER) +# define HB_NODISCARD _Check_return_ +#else +# define HB_NODISCARD +#endif +#define hb_success_t HB_NODISCARD bool + +/* https://github.com/harfbuzz/harfbuzz/issues/1852 */ +#if defined(__clang__) && !(defined(_AIX) && (defined(__IBMCPP__) || defined(__ibmxl__))) +/* Disable certain sanitizer errors. */ +/* https://github.com/harfbuzz/harfbuzz/issues/1247 */ +#define HB_NO_SANITIZE_SIGNED_INTEGER_OVERFLOW __attribute__((no_sanitize("signed-integer-overflow"))) +#else +#define HB_NO_SANITIZE_SIGNED_INTEGER_OVERFLOW +#endif + + +#ifdef _WIN32 + /* We need Windows Vista for both Uniscribe backend and for + * MemoryBarrier. We don't support compiling on Windows XP, + * though we run on it fine. */ +# if defined(_WIN32_WINNT) && _WIN32_WINNT < 0x0600 +# undef _WIN32_WINNT +# endif +# ifndef _WIN32_WINNT +# if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) +# define _WIN32_WINNT 0x0600 +# endif +# endif +# ifndef WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN 1 +# endif +# ifndef STRICT +# define STRICT 1 +# endif + +# if defined(_WIN32_WCE) + /* Some things not defined on Windows CE. */ +# define vsnprintf _vsnprintf +# ifndef HB_NO_GETENV +# define HB_NO_GETENV +# endif +# if _WIN32_WCE < 0x800 +# define HB_NO_SETLOCALE +# define HB_NO_ERRNO +# endif +# elif WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) +# ifndef HB_NO_GETENV +# define HB_NO_GETENV +# endif +# endif +# if defined(_MSC_VER) && _MSC_VER < 1900 +# define snprintf _snprintf +# endif +#endif + +#ifdef HB_NO_GETENV +#define getenv(Name) nullptr +#endif + +#ifndef HB_NO_ERRNO +# include <errno.h> +#else +static int HB_UNUSED _hb_errno = 0; +# undef errno +# define errno _hb_errno +#endif + +#if defined(HAVE_ATEXIT) && !defined(HB_USE_ATEXIT) +/* atexit() is only safe to be called from shared libraries on certain + * platforms. Whitelist. + * https://bugs.freedesktop.org/show_bug.cgi?id=82246 */ +# if defined(__linux) && defined(__GLIBC_PREREQ) +# if __GLIBC_PREREQ(2,3) +/* From atexit() manpage, it's safe with glibc 2.2.3 on Linux. */ +# define HB_USE_ATEXIT 1 +# endif +# elif defined(_MSC_VER) || defined(__MINGW32__) +/* For MSVC: + * https://msdn.microsoft.com/en-us/library/tze57ck3.aspx + * https://msdn.microsoft.com/en-us/library/zk17ww08.aspx + * mingw32 headers say atexit is safe to use in shared libraries. + */ +# define HB_USE_ATEXIT 1 +# elif defined(__ANDROID__) +/* This is available since Android NKD r8 or r8b: + * https://issuetracker.google.com/code/p/android/issues/detail?id=6455 + */ +# define HB_USE_ATEXIT 1 +# elif defined(__APPLE__) +/* For macOS and related platforms, the atexit man page indicates + * that it will be invoked when the library is unloaded, not only + * at application exit. + */ +# define HB_USE_ATEXIT 1 +# endif +#endif +#ifdef HB_NO_ATEXIT +# undef HB_USE_ATEXIT +#endif +#ifndef HB_USE_ATEXIT +# define HB_USE_ATEXIT 0 +#endif + +#define HB_STMT_START do +#define HB_STMT_END while (0) + +/* Static-assert as expression. */ +template <unsigned int cond> class hb_assert_constant_t; +template <> class hb_assert_constant_t<1> {}; +#define ASSERT_STATIC_EXPR_ZERO(_cond) (0 * (unsigned int) sizeof (hb_assert_constant_t<_cond>)) + +/* Lets assert int types. Saves trouble down the road. */ +static_assert ((sizeof (int8_t) == 1), ""); +static_assert ((sizeof (uint8_t) == 1), ""); +static_assert ((sizeof (int16_t) == 2), ""); +static_assert ((sizeof (uint16_t) == 2), ""); +static_assert ((sizeof (int32_t) == 4), ""); +static_assert ((sizeof (uint32_t) == 4), ""); +static_assert ((sizeof (int64_t) == 8), ""); +static_assert ((sizeof (uint64_t) == 8), ""); +static_assert ((sizeof (hb_codepoint_t) == 4), ""); +static_assert ((sizeof (hb_position_t) == 4), ""); +static_assert ((sizeof (hb_mask_t) == 4), ""); +static_assert ((sizeof (hb_var_int_t) == 4), ""); + +#define HB_DELETE_COPY_ASSIGN(TypeName) \ + TypeName(const TypeName&) = delete; \ + void operator=(const TypeName&) = delete +#define HB_DELETE_CREATE_COPY_ASSIGN(TypeName) \ + TypeName() = delete; \ + TypeName(const TypeName&) = delete; \ + void operator=(const TypeName&) = delete + + +/* Flags */ + +/* Enable bitwise ops on enums marked as flags_t */ +/* To my surprise, looks like the function resolver is happy to silently cast + * one enum to another... So this doesn't provide the type-checking that I + * originally had in mind... :(. + * + * For MSVC warnings, see: https://github.com/harfbuzz/harfbuzz/pull/163 + */ +#ifdef _MSC_VER +# pragma warning(disable:4200) +# pragma warning(disable:4800) +#endif +#define HB_MARK_AS_FLAG_T(T) \ + extern "C++" { \ + static inline T operator | (T l, T r) { return T ((unsigned) l | (unsigned) r); } \ + static inline T operator & (T l, T r) { return T ((unsigned) l & (unsigned) r); } \ + static inline T operator ^ (T l, T r) { return T ((unsigned) l ^ (unsigned) r); } \ + static inline T operator ~ (T r) { return T (~(unsigned int) r); } \ + static inline T& operator |= (T &l, T r) { l = l | r; return l; } \ + static inline T& operator &= (T& l, T r) { l = l & r; return l; } \ + static inline T& operator ^= (T& l, T r) { l = l ^ r; return l; } \ + } \ + static_assert (true, "") + +/* Useful for set-operations on small enums. + * For example, for testing "x ∈ {x1, x2, x3}" use: + * (FLAG_UNSAFE(x) & (FLAG(x1) | FLAG(x2) | FLAG(x3))) + */ +#define FLAG(x) (ASSERT_STATIC_EXPR_ZERO ((unsigned)(x) < 32) + (((uint32_t) 1U) << (unsigned)(x))) +#define FLAG_UNSAFE(x) ((unsigned)(x) < 32 ? (((uint32_t) 1U) << (unsigned)(x)) : 0) +#define FLAG_RANGE(x,y) (ASSERT_STATIC_EXPR_ZERO ((x) < (y)) + FLAG(y+1) - FLAG(x)) +#define FLAG64(x) (ASSERT_STATIC_EXPR_ZERO ((unsigned)(x) < 64) + (((uint64_t) 1ULL) << (unsigned)(x))) +#define FLAG64_UNSAFE(x) ((unsigned)(x) < 64 ? (((uint64_t) 1ULL) << (unsigned)(x)) : 0) + + +/* Size signifying variable-sized array */ +#ifndef HB_VAR_ARRAY +#define HB_VAR_ARRAY 1 +#endif + +static inline float +_hb_roundf (float x) { return floorf (x + .5f); } +#define roundf(x) _hb_roundf(x) + +/* Endian swap, used in Windows related backends */ +static inline uint16_t hb_uint16_swap (const uint16_t v) +{ return (v >> 8) | (v << 8); } +static inline uint32_t hb_uint32_swap (const uint32_t v) +{ return (hb_uint16_swap (v) << 16) | hb_uint16_swap (v >> 16); } + +/* + * Big-endian integers. Here because fundamental. + */ + +template <typename Type, int Bytes> struct BEInt; + +template <typename Type> +struct BEInt<Type, 1> +{ + public: + BEInt<Type, 1>& operator = (Type V) + { + v = V; + return *this; + } + operator Type () const { return v; } + private: uint8_t v; +}; +template <typename Type> +struct BEInt<Type, 2> +{ + public: + BEInt<Type, 2>& operator = (Type V) + { + v[0] = (V >> 8) & 0xFF; + v[1] = (V ) & 0xFF; + return *this; + } + operator Type () const + { +#if ((defined(__GNUC__) && __GNUC__ >= 5) || defined(__clang__)) && \ + defined(__BYTE_ORDER) && \ + (__BYTE_ORDER == __LITTLE_ENDIAN || __BYTE_ORDER == __BIG_ENDIAN) + /* Spoon-feed the compiler a big-endian integer with alignment 1. + * https://github.com/harfbuzz/harfbuzz/pull/1398 */ + struct __attribute__((packed)) packed_uint16_t { uint16_t v; }; +#if __BYTE_ORDER == __LITTLE_ENDIAN + return __builtin_bswap16 (((packed_uint16_t *) this)->v); +#else /* __BYTE_ORDER == __BIG_ENDIAN */ + return ((packed_uint16_t *) this)->v; +#endif +#endif + return (v[0] << 8) + + (v[1] ); + } + private: uint8_t v[2]; +}; +template <typename Type> +struct BEInt<Type, 3> +{ + public: + BEInt<Type, 3>& operator = (Type V) + { + v[0] = (V >> 16) & 0xFF; + v[1] = (V >> 8) & 0xFF; + v[2] = (V ) & 0xFF; + return *this; + } + operator Type () const + { + return (v[0] << 16) + + (v[1] << 8) + + (v[2] ); + } + private: uint8_t v[3]; +}; +template <typename Type> +struct BEInt<Type, 4> +{ + public: + BEInt<Type, 4>& operator = (Type V) + { + v[0] = (V >> 24) & 0xFF; + v[1] = (V >> 16) & 0xFF; + v[2] = (V >> 8) & 0xFF; + v[3] = (V ) & 0xFF; + return *this; + } + operator Type () const + { + return (v[0] << 24) + + (v[1] << 16) + + (v[2] << 8) + + (v[3] ); + } + private: uint8_t v[4]; +}; + + +/* + * For lack of a better place, put Zawgyi script hack here. + * https://github.com/harfbuzz/harfbuzz/issues/1162 + */ + +#define HB_SCRIPT_MYANMAR_ZAWGYI ((hb_script_t) HB_TAG ('Q','a','a','g')) + + +/* Headers we include for everyone. Keep topologically sorted by dependency. + * They express dependency amongst themselves, but no other file should include + * them directly.*/ +#include "hb-meta.hh" +#include "hb-mutex.hh" +#include "hb-number.hh" +#include "hb-atomic.hh" // Requires: hb-meta +#include "hb-null.hh" // Requires: hb-meta +#include "hb-algs.hh" // Requires: hb-meta hb-null hb-number +#include "hb-iter.hh" // Requires: hb-algs hb-meta +#include "hb-debug.hh" // Requires: hb-algs hb-atomic +#include "hb-array.hh" // Requires: hb-algs hb-iter hb-null +#include "hb-vector.hh" // Requires: hb-array hb-null +#include "hb-object.hh" // Requires: hb-atomic hb-mutex hb-vector + +#endif /* HB_HH */ diff --git a/thirdparty/icu4c/APIChangeReport.md b/thirdparty/icu4c/APIChangeReport.md new file mode 100644 index 0000000000..0cf9ed5bfc --- /dev/null +++ b/thirdparty/icu4c/APIChangeReport.md @@ -0,0 +1,380 @@ + + +<!-- + Copyright © 2019 and later: Unicode, Inc. and others. + License & terms of use: http://www.unicode.org/copyright.html +--> + +# ICU4C API Comparison: ICU 67 with ICU 68 + +> _Note_ Markdown format of this document is new for ICU 65. + +- [Removed from ICU 67](#removed) +- [Deprecated or Obsoleted in ICU 68](#deprecated) +- [Changed in ICU 68](#changed) +- [Promoted to stable in ICU 68](#promoted) +- [Added in ICU 68](#added) +- [Other existing drafts in ICU 68](#other) +- [Signature Simplifications](#simplifications) + +## Removed + +Removed from ICU 67 + +| File | API | ICU 67 | ICU 68 | +|---|---|---|---| +| measunit.h | LocalArray<MeasureUnit> icu::MeasureUnit::splitToSingleUnits(int32_t&, UErrorCode&) const | InternalICU 67 | (missing) +| measunit.h | int32_t icu::MeasureUnit::getIndex() const | Internal | (missing) +| measunit.h | <tt>static</tt> int32_t icu::MeasureUnit::getIndexCount() | Internal | (missing) +| measunit.h | <tt>static</tt> int32_t icu::MeasureUnit::internalGetIndexForTypeAndSubtype(const char*, const char*) | Internal | (missing) +| nounit.h | UClassID icu::NoUnit::getDynamicClassID() const | DraftICU 60 | (missing) +| nounit.h | icu::NoUnit::NoUnit(const NoUnit&) | DraftICU 60 | (missing) +| nounit.h | icu::NoUnit::~NoUnit() | DraftICU 60 | (missing) +| nounit.h | <tt>static</tt> NoUnit icu::NoUnit::base() | DraftICU 60 | (missing) +| nounit.h | <tt>static</tt> NoUnit icu::NoUnit::percent() | DraftICU 60 | (missing) +| nounit.h | <tt>static</tt> NoUnit icu::NoUnit::permille() | DraftICU 60 | (missing) +| nounit.h | <tt>static</tt> UClassID icu::NoUnit::getStaticClassID() | DraftICU 60 | (missing) +| nounit.h | void* icu::NoUnit::clone() const | DraftICU 60 | (missing) + +## Deprecated + +Deprecated or Obsoleted in ICU 68 + +| File | API | ICU 67 | ICU 68 | +|---|---|---|---| +| numberrangeformatter.h | UnicodeString icu::number::FormattedNumberRange::getFirstDecimal(UErrorCode&) const | DraftICU 63 | DeprecatedICU 68 +| numberrangeformatter.h | UnicodeString icu::number::FormattedNumberRange::getSecondDecimal(UErrorCode&) const | DraftICU 63 | DeprecatedICU 68 +| umachine.h | <tt>#define</tt> FALSE | StableICU 2.0 | DeprecatedICU 68 +| umachine.h | <tt>#define</tt> TRUE | StableICU 2.0 | DeprecatedICU 68 + +## Changed + +Changed in ICU 68 (old, new) + + + +| File | API | ICU 67 | ICU 68 | +|---|---|---|---| +| bytestrie.h | BytesTrie& icu::BytesTrie::resetToState64(uint64_t) | Draft→StableICU 65 +| bytestrie.h | uint64_t icu::BytesTrie::getState64() const | Draft→StableICU 65 +| localebuilder.h | UBool icu::LocaleBuilder::copyErrorTo(UErrorCode&) const | Draft→StableICU 65 +| localematcher.h | Builder& icu::LocaleMatcher::Builder::addSupportedLocale(const Locale&) | Draft→StableICU 65 +| localematcher.h | Builder& icu::LocaleMatcher::Builder::operator=(Builder&&) | Draft→StableICU 65 +| localematcher.h | Builder& icu::LocaleMatcher::Builder::setDefaultLocale(const Locale*) | Draft→StableICU 65 +| localematcher.h | Builder& icu::LocaleMatcher::Builder::setDemotionPerDesiredLocale(ULocMatchDemotion) | Draft→StableICU 65 +| localematcher.h | Builder& icu::LocaleMatcher::Builder::setFavorSubtag(ULocMatchFavorSubtag) | Draft→StableICU 65 +| localematcher.h | Builder& icu::LocaleMatcher::Builder::setSupportedLocales(Iter, Iter) | Draft→StableICU 65 +| localematcher.h | Builder& icu::LocaleMatcher::Builder::setSupportedLocales(Locale::Iterator&) | Draft→StableICU 65 +| localematcher.h | Builder& icu::LocaleMatcher::Builder::setSupportedLocalesFromListString(StringPiece) | Draft→StableICU 65 +| localematcher.h | Builder& icu::LocaleMatcher::Builder::setSupportedLocalesViaConverter(Iter, Iter, Conv) | Draft→StableICU 65 +| localematcher.h | Locale icu::LocaleMatcher::Result::makeResolvedLocale(UErrorCode&) const | Draft→StableICU 65 +| localematcher.h | LocaleMatcher icu::LocaleMatcher::Builder::build(UErrorCode&) const | Draft→StableICU 65 +| localematcher.h | LocaleMatcher& icu::LocaleMatcher::operator=(LocaleMatcher&&) | Draft→StableICU 65 +| localematcher.h | Result icu::LocaleMatcher::getBestMatchResult(Locale::Iterator&, UErrorCode&) const | Draft→StableICU 65 +| localematcher.h | Result icu::LocaleMatcher::getBestMatchResult(const Locale&, UErrorCode&) const | Draft→StableICU 65 +| localematcher.h | Result& icu::LocaleMatcher::Result::operator=(Result&&) | Draft→StableICU 65 +| localematcher.h | UBool icu::LocaleMatcher::Builder::copyErrorTo(UErrorCode&) const | Draft→StableICU 65 +| localematcher.h | const Locale* icu::LocaleMatcher::Result::getDesiredLocale() const | Draft→StableICU 65 +| localematcher.h | const Locale* icu::LocaleMatcher::Result::getSupportedLocale() const | Draft→StableICU 65 +| localematcher.h | const Locale* icu::LocaleMatcher::getBestMatch(Locale::Iterator&, UErrorCode&) const | Draft→StableICU 65 +| localematcher.h | const Locale* icu::LocaleMatcher::getBestMatch(const Locale&, UErrorCode&) const | Draft→StableICU 65 +| localematcher.h | const Locale* icu::LocaleMatcher::getBestMatchForListString(StringPiece, UErrorCode&) const | Draft→StableICU 65 +| localematcher.h | <tt>enum</tt> ULocMatchDemotion::ULOCMATCH_DEMOTION_NONE | Draft→StableICU 65 +| localematcher.h | <tt>enum</tt> ULocMatchDemotion::ULOCMATCH_DEMOTION_REGION | Draft→StableICU 65 +| localematcher.h | <tt>enum</tt> ULocMatchFavorSubtag::ULOCMATCH_FAVOR_LANGUAGE | Draft→StableICU 65 +| localematcher.h | <tt>enum</tt> ULocMatchFavorSubtag::ULOCMATCH_FAVOR_SCRIPT | Draft→StableICU 65 +| localematcher.h | icu::LocaleMatcher::Builder::Builder() | Draft→StableICU 65 +| localematcher.h | icu::LocaleMatcher::Builder::Builder(Builder&&) | Draft→StableICU 65 +| localematcher.h | icu::LocaleMatcher::Builder::~Builder() | Draft→StableICU 65 +| localematcher.h | icu::LocaleMatcher::LocaleMatcher(LocaleMatcher&&) | Draft→StableICU 65 +| localematcher.h | icu::LocaleMatcher::Result::Result(Result&&) | Draft→StableICU 65 +| localematcher.h | icu::LocaleMatcher::Result::~Result() | Draft→StableICU 65 +| localematcher.h | icu::LocaleMatcher::~LocaleMatcher() | Draft→StableICU 65 +| localematcher.h | int32_t icu::LocaleMatcher::Result::getDesiredIndex() const | Draft→StableICU 65 +| localematcher.h | int32_t icu::LocaleMatcher::Result::getSupportedIndex() const | Draft→StableICU 65 +| locid.h | UBool icu::Locale::ConvertingIterator< Iter, Conv >::hasNext() const override | Draft→StableICU 65 +| locid.h | UBool icu::Locale::Iterator::hasNext() const | Draft→StableICU 65 +| locid.h | UBool icu::Locale::RangeIterator< Iter >::hasNext() const override | Draft→StableICU 65 +| locid.h | const Locale& icu::Locale::ConvertingIterator< Iter, Conv >::next() override | Draft→StableICU 65 +| locid.h | const Locale& icu::Locale::Iterator::next() | Draft→StableICU 65 +| locid.h | const Locale& icu::Locale::RangeIterator< Iter >::next() override | Draft→StableICU 65 +| locid.h | icu::Locale::ConvertingIterator< Iter, Conv >::ConvertingIterator(Iter, Iter, Conv) | Draft→StableICU 65 +| locid.h | icu::Locale::Iterator::~Iterator() | Draft→StableICU 65 +| locid.h | icu::Locale::RangeIterator< Iter >::RangeIterator(Iter, Iter) | Draft→StableICU 65 +| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::getBar() | Draft→StableICU 65 +| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::getDecade() | Draft→StableICU 65 +| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::getDotPerCentimeter() | Draft→StableICU 65 +| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::getDotPerInch() | Draft→StableICU 65 +| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::getEm() | Draft→StableICU 65 +| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::getMegapixel() | Draft→StableICU 65 +| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::getPascal() | Draft→StableICU 65 +| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::getPixel() | Draft→StableICU 65 +| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::getPixelPerCentimeter() | Draft→StableICU 65 +| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::getPixelPerInch() | Draft→StableICU 65 +| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::getThermUs() | Draft→StableICU 65 +| measunit.h | <tt>static</tt> MeasureUnit* icu::MeasureUnit::createBar(UErrorCode&) | Draft→StableICU 65 +| measunit.h | <tt>static</tt> MeasureUnit* icu::MeasureUnit::createDecade(UErrorCode&) | Draft→StableICU 65 +| measunit.h | <tt>static</tt> MeasureUnit* icu::MeasureUnit::createDotPerCentimeter(UErrorCode&) | Draft→StableICU 65 +| measunit.h | <tt>static</tt> MeasureUnit* icu::MeasureUnit::createDotPerInch(UErrorCode&) | Draft→StableICU 65 +| measunit.h | <tt>static</tt> MeasureUnit* icu::MeasureUnit::createEm(UErrorCode&) | Draft→StableICU 65 +| measunit.h | <tt>static</tt> MeasureUnit* icu::MeasureUnit::createMegapixel(UErrorCode&) | Draft→StableICU 65 +| measunit.h | <tt>static</tt> MeasureUnit* icu::MeasureUnit::createPascal(UErrorCode&) | Draft→StableICU 65 +| measunit.h | <tt>static</tt> MeasureUnit* icu::MeasureUnit::createPixel(UErrorCode&) | Draft→StableICU 65 +| measunit.h | <tt>static</tt> MeasureUnit* icu::MeasureUnit::createPixelPerCentimeter(UErrorCode&) | Draft→StableICU 65 +| measunit.h | <tt>static</tt> MeasureUnit* icu::MeasureUnit::createPixelPerInch(UErrorCode&) | Draft→StableICU 65 +| measunit.h | <tt>static</tt> MeasureUnit* icu::MeasureUnit::createThermUs(UErrorCode&) | Draft→StableICU 65 +| numberformatter.h | StringClass icu::number::FormattedNumber::toDecimalNumber(UErrorCode&) const | Draft→StableICU 65 +| numberrangeformatter.h | UnicodeString icu::number::FormattedNumberRange::getFirstDecimal(UErrorCode&) const | DraftICU 63 | DeprecatedICU 68 +| numberrangeformatter.h | UnicodeString icu::number::FormattedNumberRange::getSecondDecimal(UErrorCode&) const | DraftICU 63 | DeprecatedICU 68 +| reldatefmt.h | <tt>enum</tt> UDateAbsoluteUnit::UDAT_ABSOLUTE_HOUR | Draft→StableICU 65 +| reldatefmt.h | <tt>enum</tt> UDateAbsoluteUnit::UDAT_ABSOLUTE_MINUTE | Draft→StableICU 65 +| stringpiece.h | icu::StringPiece::StringPiece(T) | Draft→StableICU 65 +| ucal.h | int32_t ucal_getHostTimeZone(UChar*, int32_t, UErrorCode*) | Draft→StableICU 65 +| ucharstrie.h | UCharsTrie& icu::UCharsTrie::resetToState64(uint64_t) | Draft→StableICU 65 +| ucharstrie.h | uint64_t icu::UCharsTrie::getState64() const | Draft→StableICU 65 +| uloc.h | UEnumeration* uloc_openAvailableByType(ULocAvailableType, UErrorCode*) | Draft→StableICU 65 +| uloc.h | <tt>enum</tt> ULocAvailableType::ULOC_AVAILABLE_DEFAULT | Draft→StableICU 65 +| uloc.h | <tt>enum</tt> ULocAvailableType::ULOC_AVAILABLE_ONLY_LEGACY_ALIASES | Draft→StableICU 65 +| uloc.h | <tt>enum</tt> ULocAvailableType::ULOC_AVAILABLE_WITH_LEGACY_ALIASES | Draft→StableICU 65 +| umachine.h | <tt>#define</tt> FALSE | StableICU 2.0 | DeprecatedICU 68 +| umachine.h | <tt>#define</tt> TRUE | StableICU 2.0 | DeprecatedICU 68 +| utrace.h | <tt>enum</tt> UTraceFunctionNumber::UTRACE_UDATA_BUNDLE | Draft→StableICU 65 +| utrace.h | <tt>enum</tt> UTraceFunctionNumber::UTRACE_UDATA_DATA_FILE | Draft→StableICU 65 +| utrace.h | <tt>enum</tt> UTraceFunctionNumber::UTRACE_UDATA_RES_FILE | Draft→StableICU 65 +| utrace.h | <tt>enum</tt> UTraceFunctionNumber::UTRACE_UDATA_START | Draft→StableICU 65 + +## Promoted + +Promoted to stable in ICU 68 + +| File | API | ICU 67 | ICU 68 | +|---|---|---|---| +| bytestrie.h | BytesTrie& icu::BytesTrie::resetToState64(uint64_t) | Draft→StableICU 65 +| bytestrie.h | uint64_t icu::BytesTrie::getState64() const | Draft→StableICU 65 +| localebuilder.h | UBool icu::LocaleBuilder::copyErrorTo(UErrorCode&) const | Draft→StableICU 65 +| localematcher.h | Builder& icu::LocaleMatcher::Builder::addSupportedLocale(const Locale&) | Draft→StableICU 65 +| localematcher.h | Builder& icu::LocaleMatcher::Builder::operator=(Builder&&) | Draft→StableICU 65 +| localematcher.h | Builder& icu::LocaleMatcher::Builder::setDefaultLocale(const Locale*) | Draft→StableICU 65 +| localematcher.h | Builder& icu::LocaleMatcher::Builder::setDemotionPerDesiredLocale(ULocMatchDemotion) | Draft→StableICU 65 +| localematcher.h | Builder& icu::LocaleMatcher::Builder::setFavorSubtag(ULocMatchFavorSubtag) | Draft→StableICU 65 +| localematcher.h | Builder& icu::LocaleMatcher::Builder::setSupportedLocales(Iter, Iter) | Draft→StableICU 65 +| localematcher.h | Builder& icu::LocaleMatcher::Builder::setSupportedLocales(Locale::Iterator&) | Draft→StableICU 65 +| localematcher.h | Builder& icu::LocaleMatcher::Builder::setSupportedLocalesFromListString(StringPiece) | Draft→StableICU 65 +| localematcher.h | Builder& icu::LocaleMatcher::Builder::setSupportedLocalesViaConverter(Iter, Iter, Conv) | Draft→StableICU 65 +| localematcher.h | Locale icu::LocaleMatcher::Result::makeResolvedLocale(UErrorCode&) const | Draft→StableICU 65 +| localematcher.h | LocaleMatcher icu::LocaleMatcher::Builder::build(UErrorCode&) const | Draft→StableICU 65 +| localematcher.h | LocaleMatcher& icu::LocaleMatcher::operator=(LocaleMatcher&&) | Draft→StableICU 65 +| localematcher.h | Result icu::LocaleMatcher::getBestMatchResult(Locale::Iterator&, UErrorCode&) const | Draft→StableICU 65 +| localematcher.h | Result icu::LocaleMatcher::getBestMatchResult(const Locale&, UErrorCode&) const | Draft→StableICU 65 +| localematcher.h | Result& icu::LocaleMatcher::Result::operator=(Result&&) | Draft→StableICU 65 +| localematcher.h | UBool icu::LocaleMatcher::Builder::copyErrorTo(UErrorCode&) const | Draft→StableICU 65 +| localematcher.h | const Locale* icu::LocaleMatcher::Result::getDesiredLocale() const | Draft→StableICU 65 +| localematcher.h | const Locale* icu::LocaleMatcher::Result::getSupportedLocale() const | Draft→StableICU 65 +| localematcher.h | const Locale* icu::LocaleMatcher::getBestMatch(Locale::Iterator&, UErrorCode&) const | Draft→StableICU 65 +| localematcher.h | const Locale* icu::LocaleMatcher::getBestMatch(const Locale&, UErrorCode&) const | Draft→StableICU 65 +| localematcher.h | const Locale* icu::LocaleMatcher::getBestMatchForListString(StringPiece, UErrorCode&) const | Draft→StableICU 65 +| localematcher.h | <tt>enum</tt> ULocMatchDemotion::ULOCMATCH_DEMOTION_NONE | Draft→StableICU 65 +| localematcher.h | <tt>enum</tt> ULocMatchDemotion::ULOCMATCH_DEMOTION_REGION | Draft→StableICU 65 +| localematcher.h | <tt>enum</tt> ULocMatchFavorSubtag::ULOCMATCH_FAVOR_LANGUAGE | Draft→StableICU 65 +| localematcher.h | <tt>enum</tt> ULocMatchFavorSubtag::ULOCMATCH_FAVOR_SCRIPT | Draft→StableICU 65 +| localematcher.h | icu::LocaleMatcher::Builder::Builder() | Draft→StableICU 65 +| localematcher.h | icu::LocaleMatcher::Builder::Builder(Builder&&) | Draft→StableICU 65 +| localematcher.h | icu::LocaleMatcher::Builder::~Builder() | Draft→StableICU 65 +| localematcher.h | icu::LocaleMatcher::LocaleMatcher(LocaleMatcher&&) | Draft→StableICU 65 +| localematcher.h | icu::LocaleMatcher::Result::Result(Result&&) | Draft→StableICU 65 +| localematcher.h | icu::LocaleMatcher::Result::~Result() | Draft→StableICU 65 +| localematcher.h | icu::LocaleMatcher::~LocaleMatcher() | Draft→StableICU 65 +| localematcher.h | int32_t icu::LocaleMatcher::Result::getDesiredIndex() const | Draft→StableICU 65 +| localematcher.h | int32_t icu::LocaleMatcher::Result::getSupportedIndex() const | Draft→StableICU 65 +| locid.h | UBool icu::Locale::ConvertingIterator< Iter, Conv >::hasNext() const override | Draft→StableICU 65 +| locid.h | UBool icu::Locale::Iterator::hasNext() const | Draft→StableICU 65 +| locid.h | UBool icu::Locale::RangeIterator< Iter >::hasNext() const override | Draft→StableICU 65 +| locid.h | const Locale& icu::Locale::ConvertingIterator< Iter, Conv >::next() override | Draft→StableICU 65 +| locid.h | const Locale& icu::Locale::Iterator::next() | Draft→StableICU 65 +| locid.h | const Locale& icu::Locale::RangeIterator< Iter >::next() override | Draft→StableICU 65 +| locid.h | icu::Locale::ConvertingIterator< Iter, Conv >::ConvertingIterator(Iter, Iter, Conv) | Draft→StableICU 65 +| locid.h | icu::Locale::Iterator::~Iterator() | Draft→StableICU 65 +| locid.h | icu::Locale::RangeIterator< Iter >::RangeIterator(Iter, Iter) | Draft→StableICU 65 +| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::getBar() | Draft→StableICU 65 +| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::getDecade() | Draft→StableICU 65 +| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::getDotPerCentimeter() | Draft→StableICU 65 +| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::getDotPerInch() | Draft→StableICU 65 +| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::getEm() | Draft→StableICU 65 +| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::getMegapixel() | Draft→StableICU 65 +| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::getPascal() | Draft→StableICU 65 +| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::getPixel() | Draft→StableICU 65 +| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::getPixelPerCentimeter() | Draft→StableICU 65 +| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::getPixelPerInch() | Draft→StableICU 65 +| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::getThermUs() | Draft→StableICU 65 +| measunit.h | <tt>static</tt> MeasureUnit* icu::MeasureUnit::createBar(UErrorCode&) | Draft→StableICU 65 +| measunit.h | <tt>static</tt> MeasureUnit* icu::MeasureUnit::createDecade(UErrorCode&) | Draft→StableICU 65 +| measunit.h | <tt>static</tt> MeasureUnit* icu::MeasureUnit::createDotPerCentimeter(UErrorCode&) | Draft→StableICU 65 +| measunit.h | <tt>static</tt> MeasureUnit* icu::MeasureUnit::createDotPerInch(UErrorCode&) | Draft→StableICU 65 +| measunit.h | <tt>static</tt> MeasureUnit* icu::MeasureUnit::createEm(UErrorCode&) | Draft→StableICU 65 +| measunit.h | <tt>static</tt> MeasureUnit* icu::MeasureUnit::createMegapixel(UErrorCode&) | Draft→StableICU 65 +| measunit.h | <tt>static</tt> MeasureUnit* icu::MeasureUnit::createPascal(UErrorCode&) | Draft→StableICU 65 +| measunit.h | <tt>static</tt> MeasureUnit* icu::MeasureUnit::createPixel(UErrorCode&) | Draft→StableICU 65 +| measunit.h | <tt>static</tt> MeasureUnit* icu::MeasureUnit::createPixelPerCentimeter(UErrorCode&) | Draft→StableICU 65 +| measunit.h | <tt>static</tt> MeasureUnit* icu::MeasureUnit::createPixelPerInch(UErrorCode&) | Draft→StableICU 65 +| measunit.h | <tt>static</tt> MeasureUnit* icu::MeasureUnit::createThermUs(UErrorCode&) | Draft→StableICU 65 +| numberformatter.h | StringClass icu::number::FormattedNumber::toDecimalNumber(UErrorCode&) const | Draft→StableICU 65 +| reldatefmt.h | <tt>enum</tt> UDateAbsoluteUnit::UDAT_ABSOLUTE_HOUR | Draft→StableICU 65 +| reldatefmt.h | <tt>enum</tt> UDateAbsoluteUnit::UDAT_ABSOLUTE_MINUTE | Draft→StableICU 65 +| stringpiece.h | icu::StringPiece::StringPiece(T) | Draft→StableICU 65 +| ucal.h | int32_t ucal_getHostTimeZone(UChar*, int32_t, UErrorCode*) | Draft→StableICU 65 +| ucharstrie.h | UCharsTrie& icu::UCharsTrie::resetToState64(uint64_t) | Draft→StableICU 65 +| ucharstrie.h | uint64_t icu::UCharsTrie::getState64() const | Draft→StableICU 65 +| uloc.h | UEnumeration* uloc_openAvailableByType(ULocAvailableType, UErrorCode*) | Draft→StableICU 65 +| uloc.h | <tt>enum</tt> ULocAvailableType::ULOC_AVAILABLE_DEFAULT | Draft→StableICU 65 +| uloc.h | <tt>enum</tt> ULocAvailableType::ULOC_AVAILABLE_ONLY_LEGACY_ALIASES | Draft→StableICU 65 +| uloc.h | <tt>enum</tt> ULocAvailableType::ULOC_AVAILABLE_WITH_LEGACY_ALIASES | Draft→StableICU 65 +| utrace.h | <tt>enum</tt> UTraceFunctionNumber::UTRACE_UDATA_BUNDLE | Draft→StableICU 65 +| utrace.h | <tt>enum</tt> UTraceFunctionNumber::UTRACE_UDATA_DATA_FILE | Draft→StableICU 65 +| utrace.h | <tt>enum</tt> UTraceFunctionNumber::UTRACE_UDATA_RES_FILE | Draft→StableICU 65 +| utrace.h | <tt>enum</tt> UTraceFunctionNumber::UTRACE_UDATA_START | Draft→StableICU 65 + +## Added + +Added in ICU 68 + +| File | API | ICU 67 | ICU 68 | +|---|---|---|---| +| dtitvfmt.h | UDisplayContext icu::DateIntervalFormat::getContext(UDisplayContextType, UErrorCode&) const | (missing) | DraftICU 68 +| dtitvfmt.h | void icu::DateIntervalFormat::setContext(UDisplayContext, UErrorCode&) | (missing) | DraftICU 68 +| dtptngen.h | <tt>static</tt> DateTimePatternGenerator* icu::DateTimePatternGenerator::createInstanceNoStdPat(const Locale&, UErrorCode&) | (missing) | Internal +| localematcher.h | Builder& icu::LocaleMatcher::Builder::setMaxDistance(const Locale&, const Locale&) | (missing) | DraftICU 68 +| localematcher.h | Builder& icu::LocaleMatcher::Builder::setNoDefaultLocale() | (missing) | DraftICU 68 +| localematcher.h | UBool icu::LocaleMatcher::isMatch(const Locale&, const Locale&, UErrorCode&) const | (missing) | DraftICU 68 +| measunit.h | int32_t icu::MeasureUnit::getOffset() const | (missing) | Internal +| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::getCandela() | (missing) | DraftICU 68 +| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::getDessertSpoon() | (missing) | DraftICU 68 +| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::getDessertSpoonImperial() | (missing) | DraftICU 68 +| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::getDot() | (missing) | DraftICU 68 +| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::getDram() | (missing) | DraftICU 68 +| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::getDrop() | (missing) | DraftICU 68 +| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::getEarthRadius() | (missing) | DraftICU 68 +| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::getGrain() | (missing) | DraftICU 68 +| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::getJigger() | (missing) | DraftICU 68 +| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::getLumen() | (missing) | DraftICU 68 +| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::getPinch() | (missing) | DraftICU 68 +| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::getQuartImperial() | (missing) | DraftICU 68 +| measunit.h | <tt>static</tt> MeasureUnit* icu::MeasureUnit::createCandela(UErrorCode&) | (missing) | DraftICU 68 +| measunit.h | <tt>static</tt> MeasureUnit* icu::MeasureUnit::createDessertSpoon(UErrorCode&) | (missing) | DraftICU 68 +| measunit.h | <tt>static</tt> MeasureUnit* icu::MeasureUnit::createDessertSpoonImperial(UErrorCode&) | (missing) | DraftICU 68 +| measunit.h | <tt>static</tt> MeasureUnit* icu::MeasureUnit::createDot(UErrorCode&) | (missing) | DraftICU 68 +| measunit.h | <tt>static</tt> MeasureUnit* icu::MeasureUnit::createDram(UErrorCode&) | (missing) | DraftICU 68 +| measunit.h | <tt>static</tt> MeasureUnit* icu::MeasureUnit::createDrop(UErrorCode&) | (missing) | DraftICU 68 +| measunit.h | <tt>static</tt> MeasureUnit* icu::MeasureUnit::createEarthRadius(UErrorCode&) | (missing) | DraftICU 68 +| measunit.h | <tt>static</tt> MeasureUnit* icu::MeasureUnit::createGrain(UErrorCode&) | (missing) | DraftICU 68 +| measunit.h | <tt>static</tt> MeasureUnit* icu::MeasureUnit::createJigger(UErrorCode&) | (missing) | DraftICU 68 +| measunit.h | <tt>static</tt> MeasureUnit* icu::MeasureUnit::createLumen(UErrorCode&) | (missing) | DraftICU 68 +| measunit.h | <tt>static</tt> MeasureUnit* icu::MeasureUnit::createPinch(UErrorCode&) | (missing) | DraftICU 68 +| measunit.h | <tt>static</tt> MeasureUnit* icu::MeasureUnit::createQuartImperial(UErrorCode&) | (missing) | DraftICU 68 +| measunit.h | std::pair< LocalArray< MeasureUnit >, int32_t > icu::MeasureUnit::splitToSingleUnits(UErrorCode&) const | (missing) | DraftICU 68 +| numberformatter.h | Derived icu::number::NumberFormatterSettings< Derived >::usage(StringPiece) const& | (missing) | DraftICU 68 +| numberformatter.h | Derived icu::number::NumberFormatterSettings< Derived >::usage(StringPiece)&& | (missing) | DraftICU 68 +| numberformatter.h | MeasureUnit icu::number::FormattedNumber::getOutputUnit(UErrorCode&) const | (missing) | DraftICU 68 +| numberformatter.h | Usage& icu::number::impl::Usage::operator=(Usage&&) | (missing) | Internal +| numberformatter.h | Usage& icu::number::impl::Usage::operator=(const Usage&) | (missing) | Internal +| numberformatter.h | bool icu::number::impl::Usage::isSet() const | (missing) | Internal +| numberformatter.h | icu::number::impl::Usage::Usage(Usage&&) | (missing) | Internal +| numberformatter.h | icu::number::impl::Usage::Usage(const Usage&) | (missing) | Internal +| numberformatter.h | icu::number::impl::Usage::~Usage() | (missing) | Internal +| numberformatter.h | int16_t icu::number::impl::Usage::length() const | (missing) | Internal +| numberformatter.h | void icu::number::impl::Usage::set(StringPiece) | (missing) | Internal +| numberrangeformatter.h | std::pair< StringClass, StringClass > icu::number::FormattedNumberRange::getDecimalNumbers(UErrorCode&) const | (missing) | DraftICU 68 +| plurrule.h | UnicodeString icu::PluralRules::select(const number::FormattedNumberRange&, UErrorCode&) const | (missing) | DraftICU 68 +| plurrule.h | UnicodeString icu::PluralRules::select(const number::impl::UFormattedNumberRangeData*, UErrorCode&) const | (missing) | Internal +| timezone.h | <tt>static</tt> TimeZone* icu::TimeZone::forLocaleOrDefault(const Locale&) | (missing) | Internal +| ucurr.h | <tt>enum</tt> UCurrNameStyle::UCURR_FORMAL_SYMBOL_NAME | (missing) | DraftICU 68 +| ucurr.h | <tt>enum</tt> UCurrNameStyle::UCURR_VARIANT_SYMBOL_NAME | (missing) | DraftICU 68 +| udateintervalformat.h | UDisplayContext udtitvfmt_getContext(const UDateIntervalFormat*, UDisplayContextType, UErrorCode*) | (missing) | DraftICU 68 +| udateintervalformat.h | void udtitvfmt_setContext(UDateIntervalFormat*, UDisplayContext, UErrorCode*) | (missing) | DraftICU 68 +| umachine.h | <tt>#define</tt> U_DEFINE_FALSE_AND_TRUE | (missing) | InternalICU 68 +| unum.h | <tt>enum</tt> UNumberFormatMinimumGroupingDigits::UNUM_MINIMUM_GROUPING_DIGITS_AUTO | (missing) | DraftICU 68 +| unum.h | <tt>enum</tt> UNumberFormatMinimumGroupingDigits::UNUM_MINIMUM_GROUPING_DIGITS_MIN2 | (missing) | DraftICU 68 +| unumberformatter.h | <tt>enum</tt> UNumberUnitWidth::UNUM_UNIT_WIDTH_FORMAL | (missing) | DraftICU 68 +| unumberformatter.h | <tt>enum</tt> UNumberUnitWidth::UNUM_UNIT_WIDTH_VARIANT | (missing) | DraftICU 68 +| unumberformatter.h | int32_t unumf_resultToDecimalNumber(const UFormattedNumber*, char*, int32_t, UErrorCode*) | (missing) | DraftICU 68 +| unumberrangeformatter.h | UFormattedNumberRange* unumrf_openResult(UErrorCode*) | (missing) | DraftICU 68 +| unumberrangeformatter.h | UNumberRangeFormatter* unumrf_openForSkeletonWithCollapseAndIdentityFallback(const UChar*, int32_t, UNumberRangeCollapse, UNumberRangeIdentityFallback, const char*, UParseError*, UErrorCode*) | (missing) | DraftICU 68 +| unumberrangeformatter.h | UNumberRangeIdentityResult unumrf_resultGetIdentityResult(const UFormattedNumberRange*, UErrorCode*) | (missing) | DraftICU 68 +| unumberrangeformatter.h | const UFormattedValue* unumrf_resultAsValue(const UFormattedNumberRange*, UErrorCode*) | (missing) | DraftICU 68 +| unumberrangeformatter.h | int32_t unumrf_resultGetFirstDecimalNumber(const UFormattedNumberRange*, char*, int32_t, UErrorCode*) | (missing) | DraftICU 68 +| unumberrangeformatter.h | int32_t unumrf_resultGetSecondDecimalNumber(const UFormattedNumberRange*, char*, int32_t, UErrorCode*) | (missing) | DraftICU 68 +| unumberrangeformatter.h | void unumrf_close(UNumberRangeFormatter*) | (missing) | DraftICU 68 +| unumberrangeformatter.h | void unumrf_closeResult(UFormattedNumberRange*) | (missing) | DraftICU 68 +| unumberrangeformatter.h | void unumrf_formatDecimalRange(const UNumberRangeFormatter*, const char*, int32_t, const char*, int32_t, UFormattedNumberRange*, UErrorCode*) | (missing) | DraftICU 68 +| unumberrangeformatter.h | void unumrf_formatDoubleRange(const UNumberRangeFormatter*, double, double, UFormattedNumberRange*, UErrorCode*) | (missing) | DraftICU 68 +| upluralrules.h | int32_t uplrules_selectForRange(const UPluralRules*, const struct UFormattedNumberRange*, UChar*, int32_t, UErrorCode*) | (missing) | DraftICU 68 + +## Other + +Other existing drafts in ICU 68 + +| File | API | ICU 67 | ICU 68 | +|---|---|---|---| +| bytestream.h | void icu::ByteSink::AppendU8(const char*, int32_t) | DraftICU 67 | +| bytestream.h | void icu::ByteSink::AppendU8(const char8_t*, int32_t) | DraftICU 67 | +| dtptngen.h | UDateFormatHourCycle icu::DateTimePatternGenerator::getDefaultHourCycle(UErrorCode&) const | DraftICU 67 | +| listformatter.h | <tt>static</tt> ListFormatter* icu::ListFormatter::createInstance(const Locale&, UListFormatterType, UListFormatterWidth, UErrorCode&) | DraftICU 67 | +| localematcher.h | Builder& icu::LocaleMatcher::Builder::setDirection(ULocMatchDirection) | DraftICU 67 | +| localematcher.h | <tt>enum</tt> ULocMatchDirection::ULOCMATCH_DIRECTION_ONLY_TWO_WAY | DraftICU 67 | +| localematcher.h | <tt>enum</tt> ULocMatchDirection::ULOCMATCH_DIRECTION_WITH_ONE_WAY | DraftICU 67 | +| locid.h | void icu::Locale::canonicalize(UErrorCode&) | DraftICU 67 | +| measfmt.h | void icu::MeasureFormat::parseObject(const UnicodeString&, Formattable&, ParsePosition&) const | DraftICU 53 | +| measunit.h | MeasureUnit icu::MeasureUnit::product(const MeasureUnit&, UErrorCode&) const | DraftICU 67 | +| measunit.h | MeasureUnit icu::MeasureUnit::reciprocal(UErrorCode&) const | DraftICU 67 | +| measunit.h | MeasureUnit icu::MeasureUnit::withDimensionality(int32_t, UErrorCode&) const | DraftICU 67 | +| measunit.h | MeasureUnit icu::MeasureUnit::withSIPrefix(UMeasureSIPrefix, UErrorCode&) const | DraftICU 67 | +| measunit.h | MeasureUnit& icu::MeasureUnit::operator=(MeasureUnit&&) noexcept | DraftICU 67 | +| measunit.h | UMeasureSIPrefix icu::MeasureUnit::getSIPrefix(UErrorCode&) const | DraftICU 67 | +| measunit.h | UMeasureUnitComplexity icu::MeasureUnit::getComplexity(UErrorCode&) const | DraftICU 67 | +| measunit.h | const char* icu::MeasureUnit::getIdentifier() const | DraftICU 67 | +| measunit.h | icu::MeasureUnit::MeasureUnit(MeasureUnit&&) noexcept | DraftICU 67 | +| measunit.h | int32_t icu::MeasureUnit::getDimensionality(UErrorCode&) const | DraftICU 67 | +| measunit.h | <tt>static</tt> MeasureUnit icu::MeasureUnit::forIdentifier(StringPiece, UErrorCode&) | DraftICU 67 | +| stringpiece.h | icu::StringPiece::StringPiece(const char8_t*) | DraftICU 67 | +| stringpiece.h | icu::StringPiece::StringPiece(const char8_t*, int32_t) | DraftICU 67 | +| stringpiece.h | icu::StringPiece::StringPiece(const std::u8string&) | DraftICU 67 | +| stringpiece.h | icu::StringPiece::StringPiece(std::nullptr_t) | DraftICU 67 | +| stringpiece.h | int32_t icu::StringPiece::compare(StringPiece) | DraftICU 67 | +| stringpiece.h | int32_t icu::StringPiece::find(StringPiece, int32_t) | DraftICU 67 | +| stringpiece.h | void icu::StringPiece::set(const char8_t*) | DraftICU 67 | +| stringpiece.h | void icu::StringPiece::set(const char8_t*, int32_t) | DraftICU 67 | +| udat.h | <tt>enum</tt> UDateFormatHourCycle::UDAT_HOUR_CYCLE_11 | DraftICU 67 | +| udat.h | <tt>enum</tt> UDateFormatHourCycle::UDAT_HOUR_CYCLE_12 | DraftICU 67 | +| udat.h | <tt>enum</tt> UDateFormatHourCycle::UDAT_HOUR_CYCLE_23 | DraftICU 67 | +| udat.h | <tt>enum</tt> UDateFormatHourCycle::UDAT_HOUR_CYCLE_24 | DraftICU 67 | +| udateintervalformat.h | void udtitvfmt_formatCalendarToResult(const UDateIntervalFormat*, UCalendar*, UCalendar*, UFormattedDateInterval*, UErrorCode*) | DraftICU 67 | +| udateintervalformat.h | void udtitvfmt_formatToResult(const UDateIntervalFormat*, UDate, UDate, UFormattedDateInterval*, UErrorCode*) | DraftICU 67 | +| udatpg.h | UDateFormatHourCycle udatpg_getDefaultHourCycle(const UDateTimePatternGenerator*, UErrorCode*) | DraftICU 67 | +| ulistformatter.h | UListFormatter* ulistfmt_openForType(const char*, UListFormatterType, UListFormatterWidth, UErrorCode*) | DraftICU 67 | +| ulistformatter.h | <tt>enum</tt> UListFormatterType::ULISTFMT_TYPE_AND | DraftICU 67 | +| ulistformatter.h | <tt>enum</tt> UListFormatterType::ULISTFMT_TYPE_OR | DraftICU 67 | +| ulistformatter.h | <tt>enum</tt> UListFormatterType::ULISTFMT_TYPE_UNITS | DraftICU 67 | +| ulistformatter.h | <tt>enum</tt> UListFormatterWidth::ULISTFMT_WIDTH_NARROW | DraftICU 67 | +| ulistformatter.h | <tt>enum</tt> UListFormatterWidth::ULISTFMT_WIDTH_SHORT | DraftICU 67 | +| ulistformatter.h | <tt>enum</tt> UListFormatterWidth::ULISTFMT_WIDTH_WIDE | DraftICU 67 | +| uregex.h | <tt>enum</tt> URegexpFlag::UREGEX_CANON_EQ | DraftICU 2.4 | +| utrace.h | <tt>enum</tt> UTraceFunctionNumber::UTRACE_UBRK_CREATE_BREAK_ENGINE | DraftICU 67 | +| utrace.h | <tt>enum</tt> UTraceFunctionNumber::UTRACE_UBRK_CREATE_CHARACTER | DraftICU 67 | +| utrace.h | <tt>enum</tt> UTraceFunctionNumber::UTRACE_UBRK_CREATE_LINE | DraftICU 67 | +| utrace.h | <tt>enum</tt> UTraceFunctionNumber::UTRACE_UBRK_CREATE_SENTENCE | DraftICU 67 | +| utrace.h | <tt>enum</tt> UTraceFunctionNumber::UTRACE_UBRK_CREATE_TITLE | DraftICU 67 | +| utrace.h | <tt>enum</tt> UTraceFunctionNumber::UTRACE_UBRK_CREATE_WORD | DraftICU 67 | +| utrace.h | <tt>enum</tt> UTraceFunctionNumber::UTRACE_UBRK_START | DraftICU 67 | + +## Simplifications + +This section shows cases where the signature was "simplified" for the sake of comparison. The simplified form is in bold, followed by + all possible variations in "original" form. + + +## Colophon + +Contents generated by StableAPI tool on Wed Sep 30 17:44:26 PDT 2020 + +Copyright © 2019 and later: Unicode, Inc. and others. +License & terms of use: http://www.unicode.org/copyright.html +
\ No newline at end of file diff --git a/thirdparty/icu4c/LICENSE b/thirdparty/icu4c/LICENSE new file mode 100644 index 0000000000..5d664a083b --- /dev/null +++ b/thirdparty/icu4c/LICENSE @@ -0,0 +1,414 @@ +COPYRIGHT AND PERMISSION NOTICE (ICU 58 and later) + +Copyright © 1991-2020 Unicode, Inc. All rights reserved. +Distributed under the Terms of Use in https://www.unicode.org/copyright.html. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of the Unicode data files and any associated documentation +(the "Data Files") or Unicode software and any associated documentation +(the "Software") to deal in the Data Files or Software +without restriction, including without limitation the rights to use, +copy, modify, merge, publish, distribute, and/or sell copies of +the Data Files or Software, and to permit persons to whom the Data Files +or Software are furnished to do so, provided that either +(a) this copyright and permission notice appear with all copies +of the Data Files or Software, or +(b) this copyright and permission notice appear in associated +Documentation. + +THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT OF THIRD PARTY RIGHTS. +IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS +NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL +DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, +DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER +TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +PERFORMANCE OF THE DATA FILES OR SOFTWARE. + +Except as contained in this notice, the name of a copyright holder +shall not be used in advertising or otherwise to promote the sale, +use or other dealings in these Data Files or Software without prior +written authorization of the copyright holder. + +--------------------- + +Third-Party Software Licenses + +This section contains third-party software notices and/or additional +terms for licensed third-party software components included within ICU +libraries. + +1. ICU License - ICU 1.8.1 to ICU 57.1 + +COPYRIGHT AND PERMISSION NOTICE + +Copyright (c) 1995-2016 International Business Machines Corporation and others +All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, and/or sell copies of the Software, and to permit persons +to whom the Software is furnished to do so, provided that the above +copyright notice(s) and this permission notice appear in all copies of +the Software and that both the above copyright notice(s) and this +permission notice appear in supporting documentation. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT +OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY +SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER +RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF +CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +Except as contained in this notice, the name of a copyright holder +shall not be used in advertising or otherwise to promote the sale, use +or other dealings in this Software without prior written authorization +of the copyright holder. + +All trademarks and registered trademarks mentioned herein are the +property of their respective owners. + +2. Chinese/Japanese Word Break Dictionary Data (cjdict.txt) + + # The Google Chrome software developed by Google is licensed under + # the BSD license. Other software included in this distribution is + # provided under other licenses, as set forth below. + # + # The BSD License + # http://opensource.org/licenses/bsd-license.php + # Copyright (C) 2006-2008, Google Inc. + # + # All rights reserved. + # + # Redistribution and use in source and binary forms, with or without + # modification, are permitted provided that the following conditions are met: + # + # Redistributions of source code must retain the above copyright notice, + # this list of conditions and the following disclaimer. + # Redistributions in binary form must reproduce the above + # copyright notice, this list of conditions and the following + # disclaimer in the documentation and/or other materials provided with + # the distribution. + # Neither the name of Google Inc. nor the names of its + # contributors may be used to endorse or promote products derived from + # this software without specific prior written permission. + # + # + # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + # CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + # INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + # + # + # The word list in cjdict.txt are generated by combining three word lists + # listed below with further processing for compound word breaking. The + # frequency is generated with an iterative training against Google web + # corpora. + # + # * Libtabe (Chinese) + # - https://sourceforge.net/project/?group_id=1519 + # - Its license terms and conditions are shown below. + # + # * IPADIC (Japanese) + # - http://chasen.aist-nara.ac.jp/chasen/distribution.html + # - Its license terms and conditions are shown below. + # + # ---------COPYING.libtabe ---- BEGIN-------------------- + # + # /* + # * Copyright (c) 1999 TaBE Project. + # * Copyright (c) 1999 Pai-Hsiang Hsiao. + # * All rights reserved. + # * + # * Redistribution and use in source and binary forms, with or without + # * modification, are permitted provided that the following conditions + # * are met: + # * + # * . Redistributions of source code must retain the above copyright + # * notice, this list of conditions and the following disclaimer. + # * . Redistributions in binary form must reproduce the above copyright + # * notice, this list of conditions and the following disclaimer in + # * the documentation and/or other materials provided with the + # * distribution. + # * . Neither the name of the TaBE Project nor the names of its + # * contributors may be used to endorse or promote products derived + # * from this software without specific prior written permission. + # * + # * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + # * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + # * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + # * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + # * REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + # * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + # * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + # * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + # * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + # * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + # * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + # * OF THE POSSIBILITY OF SUCH DAMAGE. + # */ + # + # /* + # * Copyright (c) 1999 Computer Systems and Communication Lab, + # * Institute of Information Science, Academia + # * Sinica. All rights reserved. + # * + # * Redistribution and use in source and binary forms, with or without + # * modification, are permitted provided that the following conditions + # * are met: + # * + # * . Redistributions of source code must retain the above copyright + # * notice, this list of conditions and the following disclaimer. + # * . Redistributions in binary form must reproduce the above copyright + # * notice, this list of conditions and the following disclaimer in + # * the documentation and/or other materials provided with the + # * distribution. + # * . Neither the name of the Computer Systems and Communication Lab + # * nor the names of its contributors may be used to endorse or + # * promote products derived from this software without specific + # * prior written permission. + # * + # * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + # * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + # * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + # * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + # * REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + # * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + # * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + # * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + # * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + # * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + # * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + # * OF THE POSSIBILITY OF SUCH DAMAGE. + # */ + # + # Copyright 1996 Chih-Hao Tsai @ Beckman Institute, + # University of Illinois + # c-tsai4@uiuc.edu http://casper.beckman.uiuc.edu/~c-tsai4 + # + # ---------------COPYING.libtabe-----END-------------------------------- + # + # + # ---------------COPYING.ipadic-----BEGIN------------------------------- + # + # Copyright 2000, 2001, 2002, 2003 Nara Institute of Science + # and Technology. All Rights Reserved. + # + # Use, reproduction, and distribution of this software is permitted. + # Any copy of this software, whether in its original form or modified, + # must include both the above copyright notice and the following + # paragraphs. + # + # Nara Institute of Science and Technology (NAIST), + # the copyright holders, disclaims all warranties with regard to this + # software, including all implied warranties of merchantability and + # fitness, in no event shall NAIST be liable for + # any special, indirect or consequential damages or any damages + # whatsoever resulting from loss of use, data or profits, whether in an + # action of contract, negligence or other tortuous action, arising out + # of or in connection with the use or performance of this software. + # + # A large portion of the dictionary entries + # originate from ICOT Free Software. The following conditions for ICOT + # Free Software applies to the current dictionary as well. + # + # Each User may also freely distribute the Program, whether in its + # original form or modified, to any third party or parties, PROVIDED + # that the provisions of Section 3 ("NO WARRANTY") will ALWAYS appear + # on, or be attached to, the Program, which is distributed substantially + # in the same form as set out herein and that such intended + # distribution, if actually made, will neither violate or otherwise + # contravene any of the laws and regulations of the countries having + # jurisdiction over the User or the intended distribution itself. + # + # NO WARRANTY + # + # The program was produced on an experimental basis in the course of the + # research and development conducted during the project and is provided + # to users as so produced on an experimental basis. Accordingly, the + # program is provided without any warranty whatsoever, whether express, + # implied, statutory or otherwise. The term "warranty" used herein + # includes, but is not limited to, any warranty of the quality, + # performance, merchantability and fitness for a particular purpose of + # the program and the nonexistence of any infringement or violation of + # any right of any third party. + # + # Each user of the program will agree and understand, and be deemed to + # have agreed and understood, that there is no warranty whatsoever for + # the program and, accordingly, the entire risk arising from or + # otherwise connected with the program is assumed by the user. + # + # Therefore, neither ICOT, the copyright holder, or any other + # organization that participated in or was otherwise related to the + # development of the program and their respective officials, directors, + # officers and other employees shall be held liable for any and all + # damages, including, without limitation, general, special, incidental + # and consequential damages, arising out of or otherwise in connection + # with the use or inability to use the program or any product, material + # or result produced or otherwise obtained by using the program, + # regardless of whether they have been advised of, or otherwise had + # knowledge of, the possibility of such damages at any time during the + # project or thereafter. Each user will be deemed to have agreed to the + # foregoing by his or her commencement of use of the program. The term + # "use" as used herein includes, but is not limited to, the use, + # modification, copying and distribution of the program and the + # production of secondary products from the program. + # + # In the case where the program, whether in its original form or + # modified, was distributed or delivered to or received by a user from + # any person, organization or entity other than ICOT, unless it makes or + # grants independently of ICOT any specific warranty to the user in + # writing, such person, organization or entity, will also be exempted + # from and not be held liable to the user for any such damages as noted + # above as far as the program is concerned. + # + # ---------------COPYING.ipadic-----END---------------------------------- + +3. Lao Word Break Dictionary Data (laodict.txt) + + # Copyright (c) 2013 International Business Machines Corporation + # and others. All Rights Reserved. + # + # Project: https://github.com/veer66/lao-dictionary + # Dictionary: https://github.com/veer66/lao-dictionary/blob/master/Lao-Dictionary.txt + # License: https://github.com/veer66/lao-dictionary/blob/master/Lao-Dictionary-LICENSE.txt + # (copied below) + # + # This file is derived from the above dictionary, with slight + # modifications. + # ---------------------------------------------------------------------- + # Copyright (C) 2013 Brian Eugene Wilson, Robert Martin Campbell. + # All rights reserved. + # + # Redistribution and use in source and binary forms, with or without + # modification, + # are permitted provided that the following conditions are met: + # + # + # Redistributions of source code must retain the above copyright notice, this + # list of conditions and the following disclaimer. Redistributions in + # binary form must reproduce the above copyright notice, this list of + # conditions and the following disclaimer in the documentation and/or + # other materials provided with the distribution. + # + # + # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + # STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + # OF THE POSSIBILITY OF SUCH DAMAGE. + # -------------------------------------------------------------------------- + +4. Burmese Word Break Dictionary Data (burmesedict.txt) + + # Copyright (c) 2014 International Business Machines Corporation + # and others. All Rights Reserved. + # + # This list is part of a project hosted at: + # github.com/kanyawtech/myanmar-karen-word-lists + # + # -------------------------------------------------------------------------- + # Copyright (c) 2013, LeRoy Benjamin Sharon + # All rights reserved. + # + # Redistribution and use in source and binary forms, with or without + # modification, are permitted provided that the following conditions + # are met: Redistributions of source code must retain the above + # copyright notice, this list of conditions and the following + # disclaimer. Redistributions in binary form must reproduce the + # above copyright notice, this list of conditions and the following + # disclaimer in the documentation and/or other materials provided + # with the distribution. + # + # Neither the name Myanmar Karen Word Lists, nor the names of its + # contributors may be used to endorse or promote products derived + # from this software without specific prior written permission. + # + # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + # CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + # INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS + # BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + # ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR + # TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF + # THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + # SUCH DAMAGE. + # -------------------------------------------------------------------------- + +5. Time Zone Database + + ICU uses the public domain data and code derived from Time Zone +Database for its time zone support. The ownership of the TZ database +is explained in BCP 175: Procedure for Maintaining the Time Zone +Database section 7. + + # 7. Database Ownership + # + # The TZ database itself is not an IETF Contribution or an IETF + # document. Rather it is a pre-existing and regularly updated work + # that is in the public domain, and is intended to remain in the + # public domain. Therefore, BCPs 78 [RFC5378] and 79 [RFC3979] do + # not apply to the TZ Database or contributions that individuals make + # to it. Should any claims be made and substantiated against the TZ + # Database, the organization that is providing the IANA + # Considerations defined in this RFC, under the memorandum of + # understanding with the IETF, currently ICANN, may act in accordance + # with all competent court orders. No ownership claims will be made + # by ICANN or the IETF Trust on the database or the code. Any person + # making a contribution to the database or code waives all rights to + # future claims in that contribution or in the TZ Database. + +6. Google double-conversion + +Copyright 2006-2011, the V8 project authors. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + * Neither the name of Google Inc. nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/thirdparty/icu4c/common/appendable.cpp b/thirdparty/icu4c/common/appendable.cpp new file mode 100644 index 0000000000..fca3c1e413 --- /dev/null +++ b/thirdparty/icu4c/common/appendable.cpp @@ -0,0 +1,74 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2011-2012, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* file name: appendable.cpp +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2010dec07 +* created by: Markus W. Scherer +*/ + +#include "unicode/utypes.h" +#include "unicode/appendable.h" +#include "unicode/utf16.h" + +U_NAMESPACE_BEGIN + +Appendable::~Appendable() {} + +UBool +Appendable::appendCodePoint(UChar32 c) { + if(c<=0xffff) { + return appendCodeUnit((UChar)c); + } else { + return appendCodeUnit(U16_LEAD(c)) && appendCodeUnit(U16_TRAIL(c)); + } +} + +UBool +Appendable::appendString(const UChar *s, int32_t length) { + if(length<0) { + UChar c; + while((c=*s++)!=0) { + if(!appendCodeUnit(c)) { + return FALSE; + } + } + } else if(length>0) { + const UChar *limit=s+length; + do { + if(!appendCodeUnit(*s++)) { + return FALSE; + } + } while(s<limit); + } + return TRUE; +} + +UBool +Appendable::reserveAppendCapacity(int32_t /*appendCapacity*/) { + return TRUE; +} + +UChar * +Appendable::getAppendBuffer(int32_t minCapacity, + int32_t /*desiredCapacityHint*/, + UChar *scratch, int32_t scratchCapacity, + int32_t *resultCapacity) { + if(minCapacity<1 || scratchCapacity<minCapacity) { + *resultCapacity=0; + return NULL; + } + *resultCapacity=scratchCapacity; + return scratch; +} + +// UnicodeStringAppendable is implemented in unistr.cpp. + +U_NAMESPACE_END diff --git a/thirdparty/icu4c/common/bmpset.cpp b/thirdparty/icu4c/common/bmpset.cpp new file mode 100644 index 0000000000..bc79f5e5a6 --- /dev/null +++ b/thirdparty/icu4c/common/bmpset.cpp @@ -0,0 +1,741 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 2007-2012, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* file name: bmpset.cpp +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2007jan29 +* created by: Markus W. Scherer +*/ + +#include "unicode/utypes.h" +#include "unicode/uniset.h" +#include "unicode/utf8.h" +#include "unicode/utf16.h" +#include "cmemory.h" +#include "bmpset.h" +#include "uassert.h" + +U_NAMESPACE_BEGIN + +BMPSet::BMPSet(const int32_t *parentList, int32_t parentListLength) : + list(parentList), listLength(parentListLength) { + uprv_memset(latin1Contains, 0, sizeof(latin1Contains)); + uprv_memset(table7FF, 0, sizeof(table7FF)); + uprv_memset(bmpBlockBits, 0, sizeof(bmpBlockBits)); + + /* + * Set the list indexes for binary searches for + * U+0800, U+1000, U+2000, .., U+F000, U+10000. + * U+0800 is the first 3-byte-UTF-8 code point. Lower code points are + * looked up in the bit tables. + * The last pair of indexes is for finding supplementary code points. + */ + list4kStarts[0]=findCodePoint(0x800, 0, listLength-1); + int32_t i; + for(i=1; i<=0x10; ++i) { + list4kStarts[i]=findCodePoint(i<<12, list4kStarts[i-1], listLength-1); + } + list4kStarts[0x11]=listLength-1; + containsFFFD=containsSlow(0xfffd, list4kStarts[0xf], list4kStarts[0x10]); + + initBits(); + overrideIllegal(); +} + +BMPSet::BMPSet(const BMPSet &otherBMPSet, const int32_t *newParentList, int32_t newParentListLength) : + containsFFFD(otherBMPSet.containsFFFD), + list(newParentList), listLength(newParentListLength) { + uprv_memcpy(latin1Contains, otherBMPSet.latin1Contains, sizeof(latin1Contains)); + uprv_memcpy(table7FF, otherBMPSet.table7FF, sizeof(table7FF)); + uprv_memcpy(bmpBlockBits, otherBMPSet.bmpBlockBits, sizeof(bmpBlockBits)); + uprv_memcpy(list4kStarts, otherBMPSet.list4kStarts, sizeof(list4kStarts)); +} + +BMPSet::~BMPSet() { +} + +/* + * Set bits in a bit rectangle in "vertical" bit organization. + * start<limit<=0x800 + */ +static void set32x64Bits(uint32_t table[64], int32_t start, int32_t limit) { + U_ASSERT(start<limit); + U_ASSERT(limit<=0x800); + + int32_t lead=start>>6; // Named for UTF-8 2-byte lead byte with upper 5 bits. + int32_t trail=start&0x3f; // Named for UTF-8 2-byte trail byte with lower 6 bits. + + // Set one bit indicating an all-one block. + uint32_t bits=(uint32_t)1<<lead; + if((start+1)==limit) { // Single-character shortcut. + table[trail]|=bits; + return; + } + + int32_t limitLead=limit>>6; + int32_t limitTrail=limit&0x3f; + + if(lead==limitLead) { + // Partial vertical bit column. + while(trail<limitTrail) { + table[trail++]|=bits; + } + } else { + // Partial vertical bit column, + // followed by a bit rectangle, + // followed by another partial vertical bit column. + if(trail>0) { + do { + table[trail++]|=bits; + } while(trail<64); + ++lead; + } + if(lead<limitLead) { + bits=~(((unsigned)1<<lead)-1); + if(limitLead<0x20) { + bits&=((unsigned)1<<limitLead)-1; + } + for(trail=0; trail<64; ++trail) { + table[trail]|=bits; + } + } + // limit<=0x800. If limit==0x800 then limitLead=32 and limitTrail=0. + // In that case, bits=1<<limitLead is undefined but the bits value + // is not used because trail<limitTrail is already false. + bits=(uint32_t)1<<((limitLead == 0x20) ? (limitLead - 1) : limitLead); + for(trail=0; trail<limitTrail; ++trail) { + table[trail]|=bits; + } + } +} + +void BMPSet::initBits() { + UChar32 start, limit; + int32_t listIndex=0; + + // Set latin1Contains[]. + do { + start=list[listIndex++]; + if(listIndex<listLength) { + limit=list[listIndex++]; + } else { + limit=0x110000; + } + if(start>=0x100) { + break; + } + do { + latin1Contains[start++]=1; + } while(start<limit && start<0x100); + } while(limit<=0x100); + + // Find the first range overlapping with (or after) 80..FF again, + // to include them in table7FF as well. + for(listIndex=0;;) { + start=list[listIndex++]; + if(listIndex<listLength) { + limit=list[listIndex++]; + } else { + limit=0x110000; + } + if(limit>0x80) { + if(start<0x80) { + start=0x80; + } + break; + } + } + + // Set table7FF[]. + while(start<0x800) { + set32x64Bits(table7FF, start, limit<=0x800 ? limit : 0x800); + if(limit>0x800) { + start=0x800; + break; + } + + start=list[listIndex++]; + if(listIndex<listLength) { + limit=list[listIndex++]; + } else { + limit=0x110000; + } + } + + // Set bmpBlockBits[]. + int32_t minStart=0x800; + while(start<0x10000) { + if(limit>0x10000) { + limit=0x10000; + } + + if(start<minStart) { + start=minStart; + } + if(start<limit) { // Else: Another range entirely in a known mixed-value block. + if(start&0x3f) { + // Mixed-value block of 64 code points. + start>>=6; + bmpBlockBits[start&0x3f]|=0x10001<<(start>>6); + start=(start+1)<<6; // Round up to the next block boundary. + minStart=start; // Ignore further ranges in this block. + } + if(start<limit) { + if(start<(limit&~0x3f)) { + // Multiple all-ones blocks of 64 code points each. + set32x64Bits(bmpBlockBits, start>>6, limit>>6); + } + + if(limit&0x3f) { + // Mixed-value block of 64 code points. + limit>>=6; + bmpBlockBits[limit&0x3f]|=0x10001<<(limit>>6); + limit=(limit+1)<<6; // Round up to the next block boundary. + minStart=limit; // Ignore further ranges in this block. + } + } + } + + if(limit==0x10000) { + break; + } + + start=list[listIndex++]; + if(listIndex<listLength) { + limit=list[listIndex++]; + } else { + limit=0x110000; + } + } +} + +/* + * Override some bits and bytes to the result of contains(FFFD) + * for faster validity checking at runtime. + * No need to set 0 values where they were reset to 0 in the constructor + * and not modified by initBits(). + * (table7FF[] 0..7F, bmpBlockBits[] 0..7FF) + * Need to set 0 values for surrogates D800..DFFF. + */ +void BMPSet::overrideIllegal() { + uint32_t bits, mask; + int32_t i; + + if(containsFFFD) { + bits=3; // Lead bytes 0xC0 and 0xC1. + for(i=0; i<64; ++i) { + table7FF[i]|=bits; + } + + bits=1; // Lead byte 0xE0. + for(i=0; i<32; ++i) { // First half of 4k block. + bmpBlockBits[i]|=bits; + } + + mask= static_cast<uint32_t>(~(0x10001<<0xd)); // Lead byte 0xED. + bits=1<<0xd; + for(i=32; i<64; ++i) { // Second half of 4k block. + bmpBlockBits[i]=(bmpBlockBits[i]&mask)|bits; + } + } else { + mask= static_cast<uint32_t>(~(0x10001<<0xd)); // Lead byte 0xED. + for(i=32; i<64; ++i) { // Second half of 4k block. + bmpBlockBits[i]&=mask; + } + } +} + +int32_t BMPSet::findCodePoint(UChar32 c, int32_t lo, int32_t hi) const { + /* Examples: + findCodePoint(c) + set list[] c=0 1 3 4 7 8 + === ============== =========== + [] [110000] 0 0 0 0 0 0 + [\u0000-\u0003] [0, 4, 110000] 1 1 1 2 2 2 + [\u0004-\u0007] [4, 8, 110000] 0 0 0 1 1 2 + [:Any:] [0, 110000] 1 1 1 1 1 1 + */ + + // Return the smallest i such that c < list[i]. Assume + // list[len - 1] == HIGH and that c is legal (0..HIGH-1). + if (c < list[lo]) + return lo; + // High runner test. c is often after the last range, so an + // initial check for this condition pays off. + if (lo >= hi || c >= list[hi-1]) + return hi; + // invariant: c >= list[lo] + // invariant: c < list[hi] + for (;;) { + int32_t i = (lo + hi) >> 1; + if (i == lo) { + break; // Found! + } else if (c < list[i]) { + hi = i; + } else { + lo = i; + } + } + return hi; +} + +UBool +BMPSet::contains(UChar32 c) const { + if((uint32_t)c<=0xff) { + return (UBool)latin1Contains[c]; + } else if((uint32_t)c<=0x7ff) { + return (UBool)((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0); + } else if((uint32_t)c<0xd800 || (c>=0xe000 && c<=0xffff)) { + int lead=c>>12; + uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001; + if(twoBits<=1) { + // All 64 code points with the same bits 15..6 + // are either in the set or not. + return (UBool)twoBits; + } else { + // Look up the code point in its 4k block of code points. + return containsSlow(c, list4kStarts[lead], list4kStarts[lead+1]); + } + } else if((uint32_t)c<=0x10ffff) { + // surrogate or supplementary code point + return containsSlow(c, list4kStarts[0xd], list4kStarts[0x11]); + } else { + // Out-of-range code points get FALSE, consistent with long-standing + // behavior of UnicodeSet::contains(c). + return FALSE; + } +} + +/* + * Check for sufficient length for trail unit for each surrogate pair. + * Handle single surrogates as surrogate code points as usual in ICU. + */ +const UChar * +BMPSet::span(const UChar *s, const UChar *limit, USetSpanCondition spanCondition) const { + UChar c, c2; + + if(spanCondition) { + // span + do { + c=*s; + if(c<=0xff) { + if(!latin1Contains[c]) { + break; + } + } else if(c<=0x7ff) { + if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))==0) { + break; + } + } else if(c<0xd800 || c>=0xe000) { + int lead=c>>12; + uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001; + if(twoBits<=1) { + // All 64 code points with the same bits 15..6 + // are either in the set or not. + if(twoBits==0) { + break; + } + } else { + // Look up the code point in its 4k block of code points. + if(!containsSlow(c, list4kStarts[lead], list4kStarts[lead+1])) { + break; + } + } + } else if(c>=0xdc00 || (s+1)==limit || (c2=s[1])<0xdc00 || c2>=0xe000) { + // surrogate code point + if(!containsSlow(c, list4kStarts[0xd], list4kStarts[0xe])) { + break; + } + } else { + // surrogate pair + if(!containsSlow(U16_GET_SUPPLEMENTARY(c, c2), list4kStarts[0x10], list4kStarts[0x11])) { + break; + } + ++s; + } + } while(++s<limit); + } else { + // span not + do { + c=*s; + if(c<=0xff) { + if(latin1Contains[c]) { + break; + } + } else if(c<=0x7ff) { + if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0) { + break; + } + } else if(c<0xd800 || c>=0xe000) { + int lead=c>>12; + uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001; + if(twoBits<=1) { + // All 64 code points with the same bits 15..6 + // are either in the set or not. + if(twoBits!=0) { + break; + } + } else { + // Look up the code point in its 4k block of code points. + if(containsSlow(c, list4kStarts[lead], list4kStarts[lead+1])) { + break; + } + } + } else if(c>=0xdc00 || (s+1)==limit || (c2=s[1])<0xdc00 || c2>=0xe000) { + // surrogate code point + if(containsSlow(c, list4kStarts[0xd], list4kStarts[0xe])) { + break; + } + } else { + // surrogate pair + if(containsSlow(U16_GET_SUPPLEMENTARY(c, c2), list4kStarts[0x10], list4kStarts[0x11])) { + break; + } + ++s; + } + } while(++s<limit); + } + return s; +} + +/* Symmetrical with span(). */ +const UChar * +BMPSet::spanBack(const UChar *s, const UChar *limit, USetSpanCondition spanCondition) const { + UChar c, c2; + + if(spanCondition) { + // span + for(;;) { + c=*(--limit); + if(c<=0xff) { + if(!latin1Contains[c]) { + break; + } + } else if(c<=0x7ff) { + if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))==0) { + break; + } + } else if(c<0xd800 || c>=0xe000) { + int lead=c>>12; + uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001; + if(twoBits<=1) { + // All 64 code points with the same bits 15..6 + // are either in the set or not. + if(twoBits==0) { + break; + } + } else { + // Look up the code point in its 4k block of code points. + if(!containsSlow(c, list4kStarts[lead], list4kStarts[lead+1])) { + break; + } + } + } else if(c<0xdc00 || s==limit || (c2=*(limit-1))<0xd800 || c2>=0xdc00) { + // surrogate code point + if(!containsSlow(c, list4kStarts[0xd], list4kStarts[0xe])) { + break; + } + } else { + // surrogate pair + if(!containsSlow(U16_GET_SUPPLEMENTARY(c2, c), list4kStarts[0x10], list4kStarts[0x11])) { + break; + } + --limit; + } + if(s==limit) { + return s; + } + } + } else { + // span not + for(;;) { + c=*(--limit); + if(c<=0xff) { + if(latin1Contains[c]) { + break; + } + } else if(c<=0x7ff) { + if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0) { + break; + } + } else if(c<0xd800 || c>=0xe000) { + int lead=c>>12; + uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001; + if(twoBits<=1) { + // All 64 code points with the same bits 15..6 + // are either in the set or not. + if(twoBits!=0) { + break; + } + } else { + // Look up the code point in its 4k block of code points. + if(containsSlow(c, list4kStarts[lead], list4kStarts[lead+1])) { + break; + } + } + } else if(c<0xdc00 || s==limit || (c2=*(limit-1))<0xd800 || c2>=0xdc00) { + // surrogate code point + if(containsSlow(c, list4kStarts[0xd], list4kStarts[0xe])) { + break; + } + } else { + // surrogate pair + if(containsSlow(U16_GET_SUPPLEMENTARY(c2, c), list4kStarts[0x10], list4kStarts[0x11])) { + break; + } + --limit; + } + if(s==limit) { + return s; + } + } + } + return limit+1; +} + +/* + * Precheck for sufficient trail bytes at end of string only once per span. + * Check validity. + */ +const uint8_t * +BMPSet::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const { + const uint8_t *limit=s+length; + uint8_t b=*s; + if(U8_IS_SINGLE(b)) { + // Initial all-ASCII span. + if(spanCondition) { + do { + if(!latin1Contains[b] || ++s==limit) { + return s; + } + b=*s; + } while(U8_IS_SINGLE(b)); + } else { + do { + if(latin1Contains[b] || ++s==limit) { + return s; + } + b=*s; + } while(U8_IS_SINGLE(b)); + } + length=(int32_t)(limit-s); + } + + if(spanCondition!=USET_SPAN_NOT_CONTAINED) { + spanCondition=USET_SPAN_CONTAINED; // Pin to 0/1 values. + } + + const uint8_t *limit0=limit; + + /* + * Make sure that the last 1/2/3/4-byte sequence before limit is complete + * or runs into a lead byte. + * In the span loop compare s with limit only once + * per multi-byte character. + * + * Give a trailing illegal sequence the same value as the result of contains(FFFD), + * including it if that is part of the span, otherwise set limit0 to before + * the truncated sequence. + */ + b=*(limit-1); + if((int8_t)b<0) { + // b>=0x80: lead or trail byte + if(b<0xc0) { + // single trail byte, check for preceding 3- or 4-byte lead byte + if(length>=2 && (b=*(limit-2))>=0xe0) { + limit-=2; + if(containsFFFD!=spanCondition) { + limit0=limit; + } + } else if(b<0xc0 && b>=0x80 && length>=3 && (b=*(limit-3))>=0xf0) { + // 4-byte lead byte with only two trail bytes + limit-=3; + if(containsFFFD!=spanCondition) { + limit0=limit; + } + } + } else { + // lead byte with no trail bytes + --limit; + if(containsFFFD!=spanCondition) { + limit0=limit; + } + } + } + + uint8_t t1, t2, t3; + + while(s<limit) { + b=*s; + if(U8_IS_SINGLE(b)) { + // ASCII + if(spanCondition) { + do { + if(!latin1Contains[b]) { + return s; + } else if(++s==limit) { + return limit0; + } + b=*s; + } while(U8_IS_SINGLE(b)); + } else { + do { + if(latin1Contains[b]) { + return s; + } else if(++s==limit) { + return limit0; + } + b=*s; + } while(U8_IS_SINGLE(b)); + } + } + ++s; // Advance past the lead byte. + if(b>=0xe0) { + if(b<0xf0) { + if( /* handle U+0000..U+FFFF inline */ + (t1=(uint8_t)(s[0]-0x80)) <= 0x3f && + (t2=(uint8_t)(s[1]-0x80)) <= 0x3f + ) { + b&=0xf; + uint32_t twoBits=(bmpBlockBits[t1]>>b)&0x10001; + if(twoBits<=1) { + // All 64 code points with this lead byte and middle trail byte + // are either in the set or not. + if(twoBits!=(uint32_t)spanCondition) { + return s-1; + } + } else { + // Look up the code point in its 4k block of code points. + UChar32 c=(b<<12)|(t1<<6)|t2; + if(containsSlow(c, list4kStarts[b], list4kStarts[b+1]) != spanCondition) { + return s-1; + } + } + s+=2; + continue; + } + } else if( /* handle U+10000..U+10FFFF inline */ + (t1=(uint8_t)(s[0]-0x80)) <= 0x3f && + (t2=(uint8_t)(s[1]-0x80)) <= 0x3f && + (t3=(uint8_t)(s[2]-0x80)) <= 0x3f + ) { + // Give an illegal sequence the same value as the result of contains(FFFD). + UChar32 c=((UChar32)(b-0xf0)<<18)|((UChar32)t1<<12)|(t2<<6)|t3; + if( ( (0x10000<=c && c<=0x10ffff) ? + containsSlow(c, list4kStarts[0x10], list4kStarts[0x11]) : + containsFFFD + ) != spanCondition + ) { + return s-1; + } + s+=3; + continue; + } + } else { + if( /* handle U+0000..U+07FF inline */ + b>=0xc0 && + (t1=(uint8_t)(*s-0x80)) <= 0x3f + ) { + if((USetSpanCondition)((table7FF[t1]&((uint32_t)1<<(b&0x1f)))!=0) != spanCondition) { + return s-1; + } + ++s; + continue; + } + } + + // Give an illegal sequence the same value as the result of contains(FFFD). + // Handle each byte of an illegal sequence separately to simplify the code; + // no need to optimize error handling. + if(containsFFFD!=spanCondition) { + return s-1; + } + } + + return limit0; +} + +/* + * While going backwards through UTF-8 optimize only for ASCII. + * Unlike UTF-16, UTF-8 is not forward-backward symmetrical, that is, it is not + * possible to tell from the last byte in a multi-byte sequence how many + * preceding bytes there should be. Therefore, going backwards through UTF-8 + * is much harder than going forward. + */ +int32_t +BMPSet::spanBackUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const { + if(spanCondition!=USET_SPAN_NOT_CONTAINED) { + spanCondition=USET_SPAN_CONTAINED; // Pin to 0/1 values. + } + + uint8_t b; + + do { + b=s[--length]; + if(U8_IS_SINGLE(b)) { + // ASCII sub-span + if(spanCondition) { + do { + if(!latin1Contains[b]) { + return length+1; + } else if(length==0) { + return 0; + } + b=s[--length]; + } while(U8_IS_SINGLE(b)); + } else { + do { + if(latin1Contains[b]) { + return length+1; + } else if(length==0) { + return 0; + } + b=s[--length]; + } while(U8_IS_SINGLE(b)); + } + } + + int32_t prev=length; + UChar32 c; + // trail byte: collect a multi-byte character + // (or lead byte in last-trail position) + c=utf8_prevCharSafeBody(s, 0, &length, b, -3); + // c is a valid code point, not ASCII, not a surrogate + if(c<=0x7ff) { + if((USetSpanCondition)((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0) != spanCondition) { + return prev+1; + } + } else if(c<=0xffff) { + int lead=c>>12; + uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001; + if(twoBits<=1) { + // All 64 code points with the same bits 15..6 + // are either in the set or not. + if(twoBits!=(uint32_t)spanCondition) { + return prev+1; + } + } else { + // Look up the code point in its 4k block of code points. + if(containsSlow(c, list4kStarts[lead], list4kStarts[lead+1]) != spanCondition) { + return prev+1; + } + } + } else { + if(containsSlow(c, list4kStarts[0x10], list4kStarts[0x11]) != spanCondition) { + return prev+1; + } + } + } while(length>0); + return 0; +} + +U_NAMESPACE_END diff --git a/thirdparty/icu4c/common/bmpset.h b/thirdparty/icu4c/common/bmpset.h new file mode 100644 index 0000000000..e1982ac669 --- /dev/null +++ b/thirdparty/icu4c/common/bmpset.h @@ -0,0 +1,164 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 2007, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* file name: bmpset.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2007jan29 +* created by: Markus W. Scherer +*/ + +#ifndef __BMPSET_H__ +#define __BMPSET_H__ + +#include "unicode/utypes.h" +#include "unicode/uniset.h" + +U_NAMESPACE_BEGIN + +/* + * Helper class for frozen UnicodeSets, implements contains() and span() + * optimized for BMP code points. Structured to be UTF-8-friendly. + * + * Latin-1: Look up bytes. + * 2-byte characters: Bits organized vertically. + * 3-byte characters: Use zero/one/mixed data per 64-block in U+0000..U+FFFF, + * with mixed for illegal ranges. + * Supplementary characters: Binary search over + * the supplementary part of the parent set's inversion list. + */ +class BMPSet : public UMemory { +public: + BMPSet(const int32_t *parentList, int32_t parentListLength); + BMPSet(const BMPSet &otherBMPSet, const int32_t *newParentList, int32_t newParentListLength); + virtual ~BMPSet(); + + virtual UBool contains(UChar32 c) const; + + /* + * Span the initial substring for which each character c has spanCondition==contains(c). + * It must be s<limit and spanCondition==0 or 1. + * @return The string pointer which limits the span. + */ + const UChar *span(const UChar *s, const UChar *limit, USetSpanCondition spanCondition) const; + + /* + * Span the trailing substring for which each character c has spanCondition==contains(c). + * It must be s<limit and spanCondition==0 or 1. + * @return The string pointer which starts the span. + */ + const UChar *spanBack(const UChar *s, const UChar *limit, USetSpanCondition spanCondition) const; + + /* + * Span the initial substring for which each character c has spanCondition==contains(c). + * It must be length>0 and spanCondition==0 or 1. + * @return The string pointer which limits the span. + */ + const uint8_t *spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const; + + /* + * Span the trailing substring for which each character c has spanCondition==contains(c). + * It must be length>0 and spanCondition==0 or 1. + * @return The start of the span. + */ + int32_t spanBackUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const; + +private: + void initBits(); + void overrideIllegal(); + + /** + * Same as UnicodeSet::findCodePoint(UChar32 c) const except that the + * binary search is restricted for finding code points in a certain range. + * + * For restricting the search for finding in the range start..end, + * pass in + * lo=findCodePoint(start) and + * hi=findCodePoint(end) + * with 0<=lo<=hi<len. + * findCodePoint(c) defaults to lo=0 and hi=len-1. + * + * @param c a character in a subrange of MIN_VALUE..MAX_VALUE + * @param lo The lowest index to be returned. + * @param hi The highest index to be returned. + * @return the smallest integer i in the range lo..hi, + * inclusive, such that c < list[i] + */ + int32_t findCodePoint(UChar32 c, int32_t lo, int32_t hi) const; + + inline UBool containsSlow(UChar32 c, int32_t lo, int32_t hi) const; + + /* + * One byte 0 or 1 per Latin-1 character. + */ + UBool latin1Contains[0x100]; + + /* true if contains(U+FFFD). */ + UBool containsFFFD; + + /* + * One bit per code point from U+0000..U+07FF. + * The bits are organized vertically; consecutive code points + * correspond to the same bit positions in consecutive table words. + * With code point parts + * lead=c{10..6} + * trail=c{5..0} + * it is set.contains(c)==(table7FF[trail] bit lead) + * + * Bits for 0..7F (non-shortest forms) are set to the result of contains(FFFD) + * for faster validity checking at runtime. + */ + uint32_t table7FF[64]; + + /* + * One bit per 64 BMP code points. + * The bits are organized vertically; consecutive 64-code point blocks + * correspond to the same bit position in consecutive table words. + * With code point parts + * lead=c{15..12} + * t1=c{11..6} + * test bits (lead+16) and lead in bmpBlockBits[t1]. + * If the upper bit is 0, then the lower bit indicates if contains(c) + * for all code points in the 64-block. + * If the upper bit is 1, then the block is mixed and set.contains(c) + * must be called. + * + * Bits for 0..7FF (non-shortest forms) and D800..DFFF are set to + * the result of contains(FFFD) for faster validity checking at runtime. + */ + uint32_t bmpBlockBits[64]; + + /* + * Inversion list indexes for restricted binary searches in + * findCodePoint(), from + * findCodePoint(U+0800, U+1000, U+2000, .., U+F000, U+10000). + * U+0800 is the first 3-byte-UTF-8 code point. Code points below U+0800 are + * always looked up in the bit tables. + * The last pair of indexes is for finding supplementary code points. + */ + int32_t list4kStarts[18]; + + /* + * The inversion list of the parent set, for the slower contains() implementation + * for mixed BMP blocks and for supplementary code points. + * The list is terminated with list[listLength-1]=0x110000. + */ + const int32_t *list; + int32_t listLength; +}; + +inline UBool BMPSet::containsSlow(UChar32 c, int32_t lo, int32_t hi) const { + return (UBool)(findCodePoint(c, lo, hi) & 1); +} + +U_NAMESPACE_END + +#endif diff --git a/thirdparty/icu4c/common/brkeng.cpp b/thirdparty/icu4c/common/brkeng.cpp new file mode 100644 index 0000000000..78492db662 --- /dev/null +++ b/thirdparty/icu4c/common/brkeng.cpp @@ -0,0 +1,284 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* + ************************************************************************************ + * Copyright (C) 2006-2016, International Business Machines Corporation + * and others. All Rights Reserved. + ************************************************************************************ + */ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_BREAK_ITERATION + +#include "unicode/uchar.h" +#include "unicode/uniset.h" +#include "unicode/chariter.h" +#include "unicode/ures.h" +#include "unicode/udata.h" +#include "unicode/putil.h" +#include "unicode/ustring.h" +#include "unicode/uscript.h" +#include "unicode/ucharstrie.h" +#include "unicode/bytestrie.h" + +#include "brkeng.h" +#include "cmemory.h" +#include "dictbe.h" +#include "charstr.h" +#include "dictionarydata.h" +#include "mutex.h" +#include "uvector.h" +#include "umutex.h" +#include "uresimp.h" +#include "ubrkimpl.h" + +U_NAMESPACE_BEGIN + +/* + ****************************************************************** + */ + +LanguageBreakEngine::LanguageBreakEngine() { +} + +LanguageBreakEngine::~LanguageBreakEngine() { +} + +/* + ****************************************************************** + */ + +LanguageBreakFactory::LanguageBreakFactory() { +} + +LanguageBreakFactory::~LanguageBreakFactory() { +} + +/* + ****************************************************************** + */ + +UnhandledEngine::UnhandledEngine(UErrorCode &status) : fHandled(nullptr) { + (void)status; +} + +UnhandledEngine::~UnhandledEngine() { + delete fHandled; + fHandled = nullptr; +} + +UBool +UnhandledEngine::handles(UChar32 c) const { + return fHandled && fHandled->contains(c); +} + +int32_t +UnhandledEngine::findBreaks( UText *text, + int32_t /* startPos */, + int32_t endPos, + UVector32 &/*foundBreaks*/ ) const { + UChar32 c = utext_current32(text); + while((int32_t)utext_getNativeIndex(text) < endPos && fHandled->contains(c)) { + utext_next32(text); // TODO: recast loop to work with post-increment operations. + c = utext_current32(text); + } + return 0; +} + +void +UnhandledEngine::handleCharacter(UChar32 c) { + if (fHandled == nullptr) { + fHandled = new UnicodeSet(); + if (fHandled == nullptr) { + return; + } + } + if (!fHandled->contains(c)) { + UErrorCode status = U_ZERO_ERROR; + // Apply the entire script of the character. + int32_t script = u_getIntPropertyValue(c, UCHAR_SCRIPT); + fHandled->applyIntPropertyValue(UCHAR_SCRIPT, script, status); + } +} + +/* + ****************************************************************** + */ + +ICULanguageBreakFactory::ICULanguageBreakFactory(UErrorCode &/*status*/) { + fEngines = 0; +} + +ICULanguageBreakFactory::~ICULanguageBreakFactory() { + if (fEngines != 0) { + delete fEngines; + } +} + +U_NAMESPACE_END +U_CDECL_BEGIN +static void U_CALLCONV _deleteEngine(void *obj) { + delete (const icu::LanguageBreakEngine *) obj; +} +U_CDECL_END +U_NAMESPACE_BEGIN + +const LanguageBreakEngine * +ICULanguageBreakFactory::getEngineFor(UChar32 c) { + const LanguageBreakEngine *lbe = NULL; + UErrorCode status = U_ZERO_ERROR; + + static UMutex gBreakEngineMutex; + Mutex m(&gBreakEngineMutex); + + if (fEngines == NULL) { + UStack *engines = new UStack(_deleteEngine, NULL, status); + if (U_FAILURE(status) || engines == NULL) { + // Note: no way to return error code to caller. + delete engines; + return NULL; + } + fEngines = engines; + } else { + int32_t i = fEngines->size(); + while (--i >= 0) { + lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i)); + if (lbe != NULL && lbe->handles(c)) { + return lbe; + } + } + } + + // We didn't find an engine. Create one. + lbe = loadEngineFor(c); + if (lbe != NULL) { + fEngines->push((void *)lbe, status); + } + return lbe; +} + +const LanguageBreakEngine * +ICULanguageBreakFactory::loadEngineFor(UChar32 c) { + UErrorCode status = U_ZERO_ERROR; + UScriptCode code = uscript_getScript(c, &status); + if (U_SUCCESS(status)) { + DictionaryMatcher *m = loadDictionaryMatcherFor(code); + if (m != NULL) { + const LanguageBreakEngine *engine = NULL; + switch(code) { + case USCRIPT_THAI: + engine = new ThaiBreakEngine(m, status); + break; + case USCRIPT_LAO: + engine = new LaoBreakEngine(m, status); + break; + case USCRIPT_MYANMAR: + engine = new BurmeseBreakEngine(m, status); + break; + case USCRIPT_KHMER: + engine = new KhmerBreakEngine(m, status); + break; + +#if !UCONFIG_NO_NORMALIZATION + // CJK not available w/o normalization + case USCRIPT_HANGUL: + engine = new CjkBreakEngine(m, kKorean, status); + break; + + // use same BreakEngine and dictionary for both Chinese and Japanese + case USCRIPT_HIRAGANA: + case USCRIPT_KATAKANA: + case USCRIPT_HAN: + engine = new CjkBreakEngine(m, kChineseJapanese, status); + break; +#if 0 + // TODO: Have to get some characters with script=common handled + // by CjkBreakEngine (e.g. U+309B). Simply subjecting + // them to CjkBreakEngine does not work. The engine has to + // special-case them. + case USCRIPT_COMMON: + { + UBlockCode block = ublock_getCode(code); + if (block == UBLOCK_HIRAGANA || block == UBLOCK_KATAKANA) + engine = new CjkBreakEngine(dict, kChineseJapanese, status); + break; + } +#endif +#endif + + default: + break; + } + if (engine == NULL) { + delete m; + } + else if (U_FAILURE(status)) { + delete engine; + engine = NULL; + } + return engine; + } + } + return NULL; +} + +DictionaryMatcher * +ICULanguageBreakFactory::loadDictionaryMatcherFor(UScriptCode script) { + UErrorCode status = U_ZERO_ERROR; + // open root from brkitr tree. + UResourceBundle *b = ures_open(U_ICUDATA_BRKITR, "", &status); + b = ures_getByKeyWithFallback(b, "dictionaries", b, &status); + int32_t dictnlength = 0; + const UChar *dictfname = + ures_getStringByKeyWithFallback(b, uscript_getShortName(script), &dictnlength, &status); + if (U_FAILURE(status)) { + ures_close(b); + return NULL; + } + CharString dictnbuf; + CharString ext; + const UChar *extStart = u_memrchr(dictfname, 0x002e, dictnlength); // last dot + if (extStart != NULL) { + int32_t len = (int32_t)(extStart - dictfname); + ext.appendInvariantChars(UnicodeString(FALSE, extStart + 1, dictnlength - len - 1), status); + dictnlength = len; + } + dictnbuf.appendInvariantChars(UnicodeString(FALSE, dictfname, dictnlength), status); + ures_close(b); + + UDataMemory *file = udata_open(U_ICUDATA_BRKITR, ext.data(), dictnbuf.data(), &status); + if (U_SUCCESS(status)) { + // build trie + const uint8_t *data = (const uint8_t *)udata_getMemory(file); + const int32_t *indexes = (const int32_t *)data; + const int32_t offset = indexes[DictionaryData::IX_STRING_TRIE_OFFSET]; + const int32_t trieType = indexes[DictionaryData::IX_TRIE_TYPE] & DictionaryData::TRIE_TYPE_MASK; + DictionaryMatcher *m = NULL; + if (trieType == DictionaryData::TRIE_TYPE_BYTES) { + const int32_t transform = indexes[DictionaryData::IX_TRANSFORM]; + const char *characters = (const char *)(data + offset); + m = new BytesDictionaryMatcher(characters, transform, file); + } + else if (trieType == DictionaryData::TRIE_TYPE_UCHARS) { + const UChar *characters = (const UChar *)(data + offset); + m = new UCharsDictionaryMatcher(characters, file); + } + if (m == NULL) { + // no matcher exists to take ownership - either we are an invalid + // type or memory allocation failed + udata_close(file); + } + return m; + } else if (dictfname != NULL) { + // we don't have a dictionary matcher. + // returning NULL here will cause us to fail to find a dictionary break engine, as expected + status = U_ZERO_ERROR; + return NULL; + } + return NULL; +} + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ diff --git a/thirdparty/icu4c/common/brkeng.h b/thirdparty/icu4c/common/brkeng.h new file mode 100644 index 0000000000..155433b89a --- /dev/null +++ b/thirdparty/icu4c/common/brkeng.h @@ -0,0 +1,271 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/** + ************************************************************************************ + * Copyright (C) 2006-2012, International Business Machines Corporation and others. * + * All Rights Reserved. * + ************************************************************************************ + */ + +#ifndef BRKENG_H +#define BRKENG_H + +#include "unicode/utypes.h" +#include "unicode/uobject.h" +#include "unicode/utext.h" +#include "unicode/uscript.h" + +U_NAMESPACE_BEGIN + +class UnicodeSet; +class UStack; +class UVector32; +class DictionaryMatcher; + +/******************************************************************* + * LanguageBreakEngine + */ + +/** + * <p>LanguageBreakEngines implement language-specific knowledge for + * finding text boundaries within a run of characters belonging to a + * specific set. The boundaries will be of a specific kind, e.g. word, + * line, etc.</p> + * + * <p>LanguageBreakEngines should normally be implemented so as to + * be shared between threads without locking.</p> + */ +class LanguageBreakEngine : public UMemory { + public: + + /** + * <p>Default constructor.</p> + * + */ + LanguageBreakEngine(); + + /** + * <p>Virtual destructor.</p> + */ + virtual ~LanguageBreakEngine(); + + /** + * <p>Indicate whether this engine handles a particular character for + * a particular kind of break.</p> + * + * @param c A character which begins a run that the engine might handle + * @return true if this engine handles the particular character and break + * type. + */ + virtual UBool handles(UChar32 c) const = 0; + + /** + * <p>Find any breaks within a run in the supplied text.</p> + * + * @param text A UText representing the text. The + * iterator is left at the end of the run of characters which the engine + * is capable of handling. + * @param startPos The start of the run within the supplied text. + * @param endPos The end of the run within the supplied text. + * @param foundBreaks A Vector of int32_t to receive the breaks. + * @return The number of breaks found. + */ + virtual int32_t findBreaks( UText *text, + int32_t startPos, + int32_t endPos, + UVector32 &foundBreaks ) const = 0; + +}; + +/******************************************************************* + * LanguageBreakFactory + */ + +/** + * <p>LanguageBreakFactorys find and return a LanguageBreakEngine + * that can determine breaks for characters in a specific set, if + * such an object can be found.</p> + * + * <p>If a LanguageBreakFactory is to be shared between threads, + * appropriate synchronization must be used; there is none internal + * to the factory.</p> + * + * <p>A LanguageBreakEngine returned by a LanguageBreakFactory can + * normally be shared between threads without synchronization, unless + * the specific subclass of LanguageBreakFactory indicates otherwise.</p> + * + * <p>A LanguageBreakFactory is responsible for deleting any LanguageBreakEngine + * it returns when it itself is deleted, unless the specific subclass of + * LanguageBreakFactory indicates otherwise. Naturally, the factory should + * not be deleted until the LanguageBreakEngines it has returned are no + * longer needed.</p> + */ +class LanguageBreakFactory : public UMemory { + public: + + /** + * <p>Default constructor.</p> + * + */ + LanguageBreakFactory(); + + /** + * <p>Virtual destructor.</p> + */ + virtual ~LanguageBreakFactory(); + + /** + * <p>Find and return a LanguageBreakEngine that can find the desired + * kind of break for the set of characters to which the supplied + * character belongs. It is up to the set of available engines to + * determine what the sets of characters are.</p> + * + * @param c A character that begins a run for which a LanguageBreakEngine is + * sought. + * @return A LanguageBreakEngine with the desired characteristics, or 0. + */ + virtual const LanguageBreakEngine *getEngineFor(UChar32 c) = 0; + +}; + +/******************************************************************* + * UnhandledEngine + */ + +/** + * <p>UnhandledEngine is a special subclass of LanguageBreakEngine that + * handles characters that no other LanguageBreakEngine is available to + * handle. It is told the character and the type of break; at its + * discretion it may handle more than the specified character (e.g., + * the entire script to which that character belongs.</p> + * + * <p>UnhandledEngines may not be shared between threads without + * external synchronization.</p> + */ + +class UnhandledEngine : public LanguageBreakEngine { + private: + + /** + * The sets of characters handled. + * @internal + */ + + UnicodeSet *fHandled; + + public: + + /** + * <p>Default constructor.</p> + * + */ + UnhandledEngine(UErrorCode &status); + + /** + * <p>Virtual destructor.</p> + */ + virtual ~UnhandledEngine(); + + /** + * <p>Indicate whether this engine handles a particular character for + * a particular kind of break.</p> + * + * @param c A character which begins a run that the engine might handle + * @return true if this engine handles the particular character and break + * type. + */ + virtual UBool handles(UChar32 c) const; + + /** + * <p>Find any breaks within a run in the supplied text.</p> + * + * @param text A UText representing the text (TODO: UText). The + * iterator is left at the end of the run of characters which the engine + * is capable of handling. + * @param startPos The start of the run within the supplied text. + * @param endPos The end of the run within the supplied text. + * @param foundBreaks An allocated C array of the breaks found, if any + * @return The number of breaks found. + */ + virtual int32_t findBreaks( UText *text, + int32_t startPos, + int32_t endPos, + UVector32 &foundBreaks ) const; + + /** + * <p>Tell the engine to handle a particular character and break type.</p> + * + * @param c A character which the engine should handle + */ + virtual void handleCharacter(UChar32 c); + +}; + +/******************************************************************* + * ICULanguageBreakFactory + */ + +/** + * <p>ICULanguageBreakFactory is the default LanguageBreakFactory for + * ICU. It creates dictionary-based LanguageBreakEngines from dictionary + * data in the ICU data file.</p> + */ +class ICULanguageBreakFactory : public LanguageBreakFactory { + private: + + /** + * The stack of break engines created by this factory + * @internal + */ + + UStack *fEngines; + + public: + + /** + * <p>Standard constructor.</p> + * + */ + ICULanguageBreakFactory(UErrorCode &status); + + /** + * <p>Virtual destructor.</p> + */ + virtual ~ICULanguageBreakFactory(); + + /** + * <p>Find and return a LanguageBreakEngine that can find the desired + * kind of break for the set of characters to which the supplied + * character belongs. It is up to the set of available engines to + * determine what the sets of characters are.</p> + * + * @param c A character that begins a run for which a LanguageBreakEngine is + * sought. + * @return A LanguageBreakEngine with the desired characteristics, or 0. + */ + virtual const LanguageBreakEngine *getEngineFor(UChar32 c); + +protected: + /** + * <p>Create a LanguageBreakEngine for the set of characters to which + * the supplied character belongs, for the specified break type.</p> + * + * @param c A character that begins a run for which a LanguageBreakEngine is + * sought. + * @return A LanguageBreakEngine with the desired characteristics, or 0. + */ + virtual const LanguageBreakEngine *loadEngineFor(UChar32 c); + + /** + * <p>Create a DictionaryMatcher for the specified script and break type.</p> + * @param script An ISO 15924 script code that identifies the dictionary to be + * created. + * @return A DictionaryMatcher with the desired characteristics, or NULL. + */ + virtual DictionaryMatcher *loadDictionaryMatcherFor(UScriptCode script); +}; + +U_NAMESPACE_END + + /* BRKENG_H */ +#endif diff --git a/thirdparty/icu4c/common/brkiter.cpp b/thirdparty/icu4c/common/brkiter.cpp new file mode 100644 index 0000000000..b9b6ca65cd --- /dev/null +++ b/thirdparty/icu4c/common/brkiter.cpp @@ -0,0 +1,527 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 1997-2015, International Business Machines Corporation and +* others. All Rights Reserved. +******************************************************************************* +* +* File brkiter.cpp +* +* Modification History: +* +* Date Name Description +* 02/18/97 aliu Converted from OpenClass. Added DONE. +* 01/13/2000 helena Added UErrorCode parameter to createXXXInstance methods. +***************************************************************************************** +*/ + +// ***************************************************************************** +// This file was generated from the java source file BreakIterator.java +// ***************************************************************************** + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_BREAK_ITERATION + +#include "unicode/rbbi.h" +#include "unicode/brkiter.h" +#include "unicode/udata.h" +#include "unicode/ures.h" +#include "unicode/ustring.h" +#include "unicode/filteredbrk.h" +#include "ucln_cmn.h" +#include "cstring.h" +#include "umutex.h" +#include "servloc.h" +#include "locbased.h" +#include "uresimp.h" +#include "uassert.h" +#include "ubrkimpl.h" +#include "utracimp.h" +#include "charstr.h" + +// ***************************************************************************** +// class BreakIterator +// This class implements methods for finding the location of boundaries in text. +// Instances of BreakIterator maintain a current position and scan over text +// returning the index of characters where boundaries occur. +// ***************************************************************************** + +U_NAMESPACE_BEGIN + +// ------------------------------------- + +BreakIterator* +BreakIterator::buildInstance(const Locale& loc, const char *type, UErrorCode &status) +{ + char fnbuff[256]; + char ext[4]={'\0'}; + CharString actualLocale; + int32_t size; + const UChar* brkfname = NULL; + UResourceBundle brkRulesStack; + UResourceBundle brkNameStack; + UResourceBundle *brkRules = &brkRulesStack; + UResourceBundle *brkName = &brkNameStack; + RuleBasedBreakIterator *result = NULL; + + if (U_FAILURE(status)) + return NULL; + + ures_initStackObject(brkRules); + ures_initStackObject(brkName); + + // Get the locale + UResourceBundle *b = ures_openNoDefault(U_ICUDATA_BRKITR, loc.getName(), &status); + + // Get the "boundaries" array. + if (U_SUCCESS(status)) { + brkRules = ures_getByKeyWithFallback(b, "boundaries", brkRules, &status); + // Get the string object naming the rules file + brkName = ures_getByKeyWithFallback(brkRules, type, brkName, &status); + // Get the actual string + brkfname = ures_getString(brkName, &size, &status); + U_ASSERT((size_t)size<sizeof(fnbuff)); + if ((size_t)size>=sizeof(fnbuff)) { + size=0; + if (U_SUCCESS(status)) { + status = U_BUFFER_OVERFLOW_ERROR; + } + } + + // Use the string if we found it + if (U_SUCCESS(status) && brkfname) { + actualLocale.append(ures_getLocaleInternal(brkName, &status), -1, status); + + UChar* extStart=u_strchr(brkfname, 0x002e); + int len = 0; + if(extStart!=NULL){ + len = (int)(extStart-brkfname); + u_UCharsToChars(extStart+1, ext, sizeof(ext)); // nul terminates the buff + u_UCharsToChars(brkfname, fnbuff, len); + } + fnbuff[len]=0; // nul terminate + } + } + + ures_close(brkRules); + ures_close(brkName); + + UDataMemory* file = udata_open(U_ICUDATA_BRKITR, ext, fnbuff, &status); + if (U_FAILURE(status)) { + ures_close(b); + return NULL; + } + + // Create a RuleBasedBreakIterator + result = new RuleBasedBreakIterator(file, status); + + // If there is a result, set the valid locale and actual locale, and the kind + if (U_SUCCESS(status) && result != NULL) { + U_LOCALE_BASED(locBased, *(BreakIterator*)result); + locBased.setLocaleIDs(ures_getLocaleByType(b, ULOC_VALID_LOCALE, &status), + actualLocale.data()); + } + + ures_close(b); + + if (U_FAILURE(status) && result != NULL) { // Sometimes redundant check, but simple + delete result; + return NULL; + } + + if (result == NULL) { + udata_close(file); + if (U_SUCCESS(status)) { + status = U_MEMORY_ALLOCATION_ERROR; + } + } + + return result; +} + +// Creates a break iterator for word breaks. +BreakIterator* U_EXPORT2 +BreakIterator::createWordInstance(const Locale& key, UErrorCode& status) +{ + return createInstance(key, UBRK_WORD, status); +} + +// ------------------------------------- + +// Creates a break iterator for line breaks. +BreakIterator* U_EXPORT2 +BreakIterator::createLineInstance(const Locale& key, UErrorCode& status) +{ + return createInstance(key, UBRK_LINE, status); +} + +// ------------------------------------- + +// Creates a break iterator for character breaks. +BreakIterator* U_EXPORT2 +BreakIterator::createCharacterInstance(const Locale& key, UErrorCode& status) +{ + return createInstance(key, UBRK_CHARACTER, status); +} + +// ------------------------------------- + +// Creates a break iterator for sentence breaks. +BreakIterator* U_EXPORT2 +BreakIterator::createSentenceInstance(const Locale& key, UErrorCode& status) +{ + return createInstance(key, UBRK_SENTENCE, status); +} + +// ------------------------------------- + +// Creates a break iterator for title casing breaks. +BreakIterator* U_EXPORT2 +BreakIterator::createTitleInstance(const Locale& key, UErrorCode& status) +{ + return createInstance(key, UBRK_TITLE, status); +} + +// ------------------------------------- + +// Gets all the available locales that has localized text boundary data. +const Locale* U_EXPORT2 +BreakIterator::getAvailableLocales(int32_t& count) +{ + return Locale::getAvailableLocales(count); +} + +// ------------------------------------------ +// +// Constructors, destructor and assignment operator +// +//------------------------------------------- + +BreakIterator::BreakIterator() +{ + *validLocale = *actualLocale = 0; +} + +BreakIterator::BreakIterator(const BreakIterator &other) : UObject(other) { + uprv_strncpy(actualLocale, other.actualLocale, sizeof(actualLocale)); + uprv_strncpy(validLocale, other.validLocale, sizeof(validLocale)); +} + +BreakIterator &BreakIterator::operator =(const BreakIterator &other) { + if (this != &other) { + uprv_strncpy(actualLocale, other.actualLocale, sizeof(actualLocale)); + uprv_strncpy(validLocale, other.validLocale, sizeof(validLocale)); + } + return *this; +} + +BreakIterator::~BreakIterator() +{ +} + +// ------------------------------------------ +// +// Registration +// +//------------------------------------------- +#if !UCONFIG_NO_SERVICE + +// ------------------------------------- + +class ICUBreakIteratorFactory : public ICUResourceBundleFactory { +public: + virtual ~ICUBreakIteratorFactory(); +protected: + virtual UObject* handleCreate(const Locale& loc, int32_t kind, const ICUService* /*service*/, UErrorCode& status) const { + return BreakIterator::makeInstance(loc, kind, status); + } +}; + +ICUBreakIteratorFactory::~ICUBreakIteratorFactory() {} + +// ------------------------------------- + +class ICUBreakIteratorService : public ICULocaleService { +public: + ICUBreakIteratorService() + : ICULocaleService(UNICODE_STRING("Break Iterator", 14)) + { + UErrorCode status = U_ZERO_ERROR; + registerFactory(new ICUBreakIteratorFactory(), status); + } + + virtual ~ICUBreakIteratorService(); + + virtual UObject* cloneInstance(UObject* instance) const { + return ((BreakIterator*)instance)->clone(); + } + + virtual UObject* handleDefault(const ICUServiceKey& key, UnicodeString* /*actualID*/, UErrorCode& status) const { + LocaleKey& lkey = (LocaleKey&)key; + int32_t kind = lkey.kind(); + Locale loc; + lkey.currentLocale(loc); + return BreakIterator::makeInstance(loc, kind, status); + } + + virtual UBool isDefault() const { + return countFactories() == 1; + } +}; + +ICUBreakIteratorService::~ICUBreakIteratorService() {} + +// ------------------------------------- + +// defined in ucln_cmn.h +U_NAMESPACE_END + +static icu::UInitOnce gInitOnceBrkiter = U_INITONCE_INITIALIZER; +static icu::ICULocaleService* gService = NULL; + + + +/** + * Release all static memory held by breakiterator. + */ +U_CDECL_BEGIN +static UBool U_CALLCONV breakiterator_cleanup(void) { +#if !UCONFIG_NO_SERVICE + if (gService) { + delete gService; + gService = NULL; + } + gInitOnceBrkiter.reset(); +#endif + return TRUE; +} +U_CDECL_END +U_NAMESPACE_BEGIN + +static void U_CALLCONV +initService(void) { + gService = new ICUBreakIteratorService(); + ucln_common_registerCleanup(UCLN_COMMON_BREAKITERATOR, breakiterator_cleanup); +} + +static ICULocaleService* +getService(void) +{ + umtx_initOnce(gInitOnceBrkiter, &initService); + return gService; +} + + +// ------------------------------------- + +static inline UBool +hasService(void) +{ + return !gInitOnceBrkiter.isReset() && getService() != NULL; +} + +// ------------------------------------- + +URegistryKey U_EXPORT2 +BreakIterator::registerInstance(BreakIterator* toAdopt, const Locale& locale, UBreakIteratorType kind, UErrorCode& status) +{ + ICULocaleService *service = getService(); + if (service == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + return service->registerInstance(toAdopt, locale, kind, status); +} + +// ------------------------------------- + +UBool U_EXPORT2 +BreakIterator::unregister(URegistryKey key, UErrorCode& status) +{ + if (U_SUCCESS(status)) { + if (hasService()) { + return gService->unregister(key, status); + } + status = U_MEMORY_ALLOCATION_ERROR; + } + return FALSE; +} + +// ------------------------------------- + +StringEnumeration* U_EXPORT2 +BreakIterator::getAvailableLocales(void) +{ + ICULocaleService *service = getService(); + if (service == NULL) { + return NULL; + } + return service->getAvailableLocales(); +} +#endif /* UCONFIG_NO_SERVICE */ + +// ------------------------------------- + +BreakIterator* +BreakIterator::createInstance(const Locale& loc, int32_t kind, UErrorCode& status) +{ + if (U_FAILURE(status)) { + return NULL; + } + +#if !UCONFIG_NO_SERVICE + if (hasService()) { + Locale actualLoc(""); + BreakIterator *result = (BreakIterator*)gService->get(loc, kind, &actualLoc, status); + // TODO: The way the service code works in ICU 2.8 is that if + // there is a real registered break iterator, the actualLoc + // will be populated, but if the handleDefault path is taken + // (because nothing is registered that can handle the + // requested locale) then the actualLoc comes back empty. In + // that case, the returned object already has its actual/valid + // locale data populated (by makeInstance, which is what + // handleDefault calls), so we don't touch it. YES, A COMMENT + // THIS LONG is a sign of bad code -- so the action item is to + // revisit this in ICU 3.0 and clean it up/fix it/remove it. + if (U_SUCCESS(status) && (result != NULL) && *actualLoc.getName() != 0) { + U_LOCALE_BASED(locBased, *result); + locBased.setLocaleIDs(actualLoc.getName(), actualLoc.getName()); + } + return result; + } + else +#endif + { + return makeInstance(loc, kind, status); + } +} + +// ------------------------------------- +enum { kKeyValueLenMax = 32 }; + +BreakIterator* +BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status) +{ + + if (U_FAILURE(status)) { + return NULL; + } + char lbType[kKeyValueLenMax]; + + BreakIterator *result = NULL; + switch (kind) { + case UBRK_CHARACTER: + { + UTRACE_ENTRY(UTRACE_UBRK_CREATE_CHARACTER); + result = BreakIterator::buildInstance(loc, "grapheme", status); + UTRACE_EXIT_STATUS(status); + } + break; + case UBRK_WORD: + { + UTRACE_ENTRY(UTRACE_UBRK_CREATE_WORD); + result = BreakIterator::buildInstance(loc, "word", status); + UTRACE_EXIT_STATUS(status); + } + break; + case UBRK_LINE: + { + UTRACE_ENTRY(UTRACE_UBRK_CREATE_LINE); + uprv_strcpy(lbType, "line"); + char lbKeyValue[kKeyValueLenMax] = {0}; + UErrorCode kvStatus = U_ZERO_ERROR; + int32_t kLen = loc.getKeywordValue("lb", lbKeyValue, kKeyValueLenMax, kvStatus); + if (U_SUCCESS(kvStatus) && kLen > 0 && (uprv_strcmp(lbKeyValue,"strict")==0 || uprv_strcmp(lbKeyValue,"normal")==0 || uprv_strcmp(lbKeyValue,"loose")==0)) { + uprv_strcat(lbType, "_"); + uprv_strcat(lbType, lbKeyValue); + } + result = BreakIterator::buildInstance(loc, lbType, status); + + UTRACE_DATA1(UTRACE_INFO, "lb=%s", lbKeyValue); + UTRACE_EXIT_STATUS(status); + } + break; + case UBRK_SENTENCE: + { + UTRACE_ENTRY(UTRACE_UBRK_CREATE_SENTENCE); + result = BreakIterator::buildInstance(loc, "sentence", status); +#if !UCONFIG_NO_FILTERED_BREAK_ITERATION + char ssKeyValue[kKeyValueLenMax] = {0}; + UErrorCode kvStatus = U_ZERO_ERROR; + int32_t kLen = loc.getKeywordValue("ss", ssKeyValue, kKeyValueLenMax, kvStatus); + if (U_SUCCESS(kvStatus) && kLen > 0 && uprv_strcmp(ssKeyValue,"standard")==0) { + FilteredBreakIteratorBuilder* fbiBuilder = FilteredBreakIteratorBuilder::createInstance(loc, kvStatus); + if (U_SUCCESS(kvStatus)) { + result = fbiBuilder->build(result, status); + delete fbiBuilder; + } + } +#endif + UTRACE_EXIT_STATUS(status); + } + break; + case UBRK_TITLE: + { + UTRACE_ENTRY(UTRACE_UBRK_CREATE_TITLE); + result = BreakIterator::buildInstance(loc, "title", status); + UTRACE_EXIT_STATUS(status); + } + break; + default: + status = U_ILLEGAL_ARGUMENT_ERROR; + } + + if (U_FAILURE(status)) { + return NULL; + } + + return result; +} + +Locale +BreakIterator::getLocale(ULocDataLocaleType type, UErrorCode& status) const { + U_LOCALE_BASED(locBased, *this); + return locBased.getLocale(type, status); +} + +const char * +BreakIterator::getLocaleID(ULocDataLocaleType type, UErrorCode& status) const { + U_LOCALE_BASED(locBased, *this); + return locBased.getLocaleID(type, status); +} + + +// This implementation of getRuleStatus is a do-nothing stub, here to +// provide a default implementation for any derived BreakIterator classes that +// do not implement it themselves. +int32_t BreakIterator::getRuleStatus() const { + return 0; +} + +// This implementation of getRuleStatusVec is a do-nothing stub, here to +// provide a default implementation for any derived BreakIterator classes that +// do not implement it themselves. +int32_t BreakIterator::getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status) { + if (U_FAILURE(status)) { + return 0; + } + if (capacity < 1) { + status = U_BUFFER_OVERFLOW_ERROR; + return 1; + } + *fillInVec = 0; + return 1; +} + +BreakIterator::BreakIterator (const Locale& valid, const Locale& actual) { + U_LOCALE_BASED(locBased, (*this)); + locBased.setLocaleIDs(valid, actual); +} + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ + +//eof diff --git a/thirdparty/icu4c/common/bytesinkutil.cpp b/thirdparty/icu4c/common/bytesinkutil.cpp new file mode 100644 index 0000000000..c64a845f87 --- /dev/null +++ b/thirdparty/icu4c/common/bytesinkutil.cpp @@ -0,0 +1,161 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// bytesinkutil.cpp +// created: 2017sep14 Markus W. Scherer + +#include "unicode/utypes.h" +#include "unicode/bytestream.h" +#include "unicode/edits.h" +#include "unicode/stringoptions.h" +#include "unicode/utf8.h" +#include "unicode/utf16.h" +#include "bytesinkutil.h" +#include "charstr.h" +#include "cmemory.h" +#include "uassert.h" + +U_NAMESPACE_BEGIN + +UBool +ByteSinkUtil::appendChange(int32_t length, const char16_t *s16, int32_t s16Length, + ByteSink &sink, Edits *edits, UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { return FALSE; } + char scratch[200]; + int32_t s8Length = 0; + for (int32_t i = 0; i < s16Length;) { + int32_t capacity; + int32_t desiredCapacity = s16Length - i; + if (desiredCapacity < (INT32_MAX / 3)) { + desiredCapacity *= 3; // max 3 UTF-8 bytes per UTF-16 code unit + } else if (desiredCapacity < (INT32_MAX / 2)) { + desiredCapacity *= 2; + } else { + desiredCapacity = INT32_MAX; + } + char *buffer = sink.GetAppendBuffer(U8_MAX_LENGTH, desiredCapacity, + scratch, UPRV_LENGTHOF(scratch), &capacity); + capacity -= U8_MAX_LENGTH - 1; + int32_t j = 0; + for (; i < s16Length && j < capacity;) { + UChar32 c; + U16_NEXT_UNSAFE(s16, i, c); + U8_APPEND_UNSAFE(buffer, j, c); + } + if (j > (INT32_MAX - s8Length)) { + errorCode = U_INDEX_OUTOFBOUNDS_ERROR; + return FALSE; + } + sink.Append(buffer, j); + s8Length += j; + } + if (edits != nullptr) { + edits->addReplace(length, s8Length); + } + return TRUE; +} + +UBool +ByteSinkUtil::appendChange(const uint8_t *s, const uint8_t *limit, + const char16_t *s16, int32_t s16Length, + ByteSink &sink, Edits *edits, UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { return FALSE; } + if ((limit - s) > INT32_MAX) { + errorCode = U_INDEX_OUTOFBOUNDS_ERROR; + return FALSE; + } + return appendChange((int32_t)(limit - s), s16, s16Length, sink, edits, errorCode); +} + +void +ByteSinkUtil::appendCodePoint(int32_t length, UChar32 c, ByteSink &sink, Edits *edits) { + char s8[U8_MAX_LENGTH]; + int32_t s8Length = 0; + U8_APPEND_UNSAFE(s8, s8Length, c); + if (edits != nullptr) { + edits->addReplace(length, s8Length); + } + sink.Append(s8, s8Length); +} + +namespace { + +// See unicode/utf8.h U8_APPEND_UNSAFE(). +inline uint8_t getTwoByteLead(UChar32 c) { return (uint8_t)((c >> 6) | 0xc0); } +inline uint8_t getTwoByteTrail(UChar32 c) { return (uint8_t)((c & 0x3f) | 0x80); } + +} // namespace + +void +ByteSinkUtil::appendTwoBytes(UChar32 c, ByteSink &sink) { + U_ASSERT(0x80 <= c && c <= 0x7ff); // 2-byte UTF-8 + char s8[2] = { (char)getTwoByteLead(c), (char)getTwoByteTrail(c) }; + sink.Append(s8, 2); +} + +void +ByteSinkUtil::appendNonEmptyUnchanged(const uint8_t *s, int32_t length, + ByteSink &sink, uint32_t options, Edits *edits) { + U_ASSERT(length > 0); + if (edits != nullptr) { + edits->addUnchanged(length); + } + if ((options & U_OMIT_UNCHANGED_TEXT) == 0) { + sink.Append(reinterpret_cast<const char *>(s), length); + } +} + +UBool +ByteSinkUtil::appendUnchanged(const uint8_t *s, const uint8_t *limit, + ByteSink &sink, uint32_t options, Edits *edits, + UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { return FALSE; } + if ((limit - s) > INT32_MAX) { + errorCode = U_INDEX_OUTOFBOUNDS_ERROR; + return FALSE; + } + int32_t length = (int32_t)(limit - s); + if (length > 0) { + appendNonEmptyUnchanged(s, length, sink, options, edits); + } + return TRUE; +} + +CharStringByteSink::CharStringByteSink(CharString* dest) : dest_(*dest) { +} + +CharStringByteSink::~CharStringByteSink() = default; + +void +CharStringByteSink::Append(const char* bytes, int32_t n) { + UErrorCode status = U_ZERO_ERROR; + dest_.append(bytes, n, status); + // Any errors are silently ignored. +} + +char* +CharStringByteSink::GetAppendBuffer(int32_t min_capacity, + int32_t desired_capacity_hint, + char* scratch, + int32_t scratch_capacity, + int32_t* result_capacity) { + if (min_capacity < 1 || scratch_capacity < min_capacity) { + *result_capacity = 0; + return nullptr; + } + + UErrorCode status = U_ZERO_ERROR; + char* result = dest_.getAppendBuffer( + min_capacity, + desired_capacity_hint, + *result_capacity, + status); + if (U_SUCCESS(status)) { + return result; + } + + *result_capacity = scratch_capacity; + return scratch; +} + +U_NAMESPACE_END diff --git a/thirdparty/icu4c/common/bytesinkutil.h b/thirdparty/icu4c/common/bytesinkutil.h new file mode 100644 index 0000000000..ab2516432d --- /dev/null +++ b/thirdparty/icu4c/common/bytesinkutil.h @@ -0,0 +1,83 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// bytesinkutil.h +// created: 2017sep14 Markus W. Scherer + +#include "unicode/utypes.h" +#include "unicode/bytestream.h" +#include "unicode/edits.h" +#include "cmemory.h" +#include "uassert.h" + +U_NAMESPACE_BEGIN + +class ByteSink; +class CharString; +class Edits; + +class U_COMMON_API ByteSinkUtil { +public: + ByteSinkUtil() = delete; // all static + + /** (length) bytes were mapped to valid (s16, s16Length). */ + static UBool appendChange(int32_t length, + const char16_t *s16, int32_t s16Length, + ByteSink &sink, Edits *edits, UErrorCode &errorCode); + + /** The bytes at [s, limit[ were mapped to valid (s16, s16Length). */ + static UBool appendChange(const uint8_t *s, const uint8_t *limit, + const char16_t *s16, int32_t s16Length, + ByteSink &sink, Edits *edits, UErrorCode &errorCode); + + /** (length) bytes were mapped/changed to valid code point c. */ + static void appendCodePoint(int32_t length, UChar32 c, ByteSink &sink, Edits *edits = nullptr); + + /** The few bytes at [src, nextSrc[ were mapped/changed to valid code point c. */ + static inline void appendCodePoint(const uint8_t *src, const uint8_t *nextSrc, UChar32 c, + ByteSink &sink, Edits *edits = nullptr) { + appendCodePoint((int32_t)(nextSrc - src), c, sink, edits); + } + + /** Append the two-byte character (U+0080..U+07FF). */ + static void appendTwoBytes(UChar32 c, ByteSink &sink); + + static UBool appendUnchanged(const uint8_t *s, int32_t length, + ByteSink &sink, uint32_t options, Edits *edits, + UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { return false; } + if (length > 0) { appendNonEmptyUnchanged(s, length, sink, options, edits); } + return true; + } + + static UBool appendUnchanged(const uint8_t *s, const uint8_t *limit, + ByteSink &sink, uint32_t options, Edits *edits, + UErrorCode &errorCode); + +private: + static void appendNonEmptyUnchanged(const uint8_t *s, int32_t length, + ByteSink &sink, uint32_t options, Edits *edits); +}; + +class U_COMMON_API CharStringByteSink : public ByteSink { +public: + CharStringByteSink(CharString* dest); + ~CharStringByteSink() override; + + CharStringByteSink() = delete; + CharStringByteSink(const CharStringByteSink&) = delete; + CharStringByteSink& operator=(const CharStringByteSink&) = delete; + + void Append(const char* bytes, int32_t n) override; + + char* GetAppendBuffer(int32_t min_capacity, + int32_t desired_capacity_hint, + char* scratch, + int32_t scratch_capacity, + int32_t* result_capacity) override; + +private: + CharString& dest_; +}; + +U_NAMESPACE_END diff --git a/thirdparty/icu4c/common/bytestream.cpp b/thirdparty/icu4c/common/bytestream.cpp new file mode 100644 index 0000000000..0d0e4dda39 --- /dev/null +++ b/thirdparty/icu4c/common/bytestream.cpp @@ -0,0 +1,85 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +// Copyright (C) 2009-2011, International Business Machines +// Corporation and others. All Rights Reserved. +// +// Copyright 2007 Google Inc. All Rights Reserved. +// Author: sanjay@google.com (Sanjay Ghemawat) + +#include "unicode/utypes.h" +#include "unicode/bytestream.h" +#include "cmemory.h" + +U_NAMESPACE_BEGIN + +ByteSink::~ByteSink() {} + +char* ByteSink::GetAppendBuffer(int32_t min_capacity, + int32_t /*desired_capacity_hint*/, + char* scratch, int32_t scratch_capacity, + int32_t* result_capacity) { + if (min_capacity < 1 || scratch_capacity < min_capacity) { + *result_capacity = 0; + return NULL; + } + *result_capacity = scratch_capacity; + return scratch; +} + +void ByteSink::Flush() {} + +CheckedArrayByteSink::CheckedArrayByteSink(char* outbuf, int32_t capacity) + : outbuf_(outbuf), capacity_(capacity < 0 ? 0 : capacity), + size_(0), appended_(0), overflowed_(FALSE) { +} + +CheckedArrayByteSink::~CheckedArrayByteSink() {} + +CheckedArrayByteSink& CheckedArrayByteSink::Reset() { + size_ = appended_ = 0; + overflowed_ = FALSE; + return *this; +} + +void CheckedArrayByteSink::Append(const char* bytes, int32_t n) { + if (n <= 0) { + return; + } + if (n > (INT32_MAX - appended_)) { + // TODO: Report as integer overflow, not merely buffer overflow. + appended_ = INT32_MAX; + overflowed_ = TRUE; + return; + } + appended_ += n; + int32_t available = capacity_ - size_; + if (n > available) { + n = available; + overflowed_ = TRUE; + } + if (n > 0 && bytes != (outbuf_ + size_)) { + uprv_memcpy(outbuf_ + size_, bytes, n); + } + size_ += n; +} + +char* CheckedArrayByteSink::GetAppendBuffer(int32_t min_capacity, + int32_t /*desired_capacity_hint*/, + char* scratch, + int32_t scratch_capacity, + int32_t* result_capacity) { + if (min_capacity < 1 || scratch_capacity < min_capacity) { + *result_capacity = 0; + return NULL; + } + int32_t available = capacity_ - size_; + if (available >= min_capacity) { + *result_capacity = available; + return outbuf_ + size_; + } else { + *result_capacity = scratch_capacity; + return scratch; + } +} + +U_NAMESPACE_END diff --git a/thirdparty/icu4c/common/bytestrie.cpp b/thirdparty/icu4c/common/bytestrie.cpp new file mode 100644 index 0000000000..c4d498c4bf --- /dev/null +++ b/thirdparty/icu4c/common/bytestrie.cpp @@ -0,0 +1,441 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2010-2011, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* file name: bytestrie.cpp +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2010sep25 +* created by: Markus W. Scherer +*/ + +#include "unicode/utypes.h" +#include "unicode/bytestream.h" +#include "unicode/bytestrie.h" +#include "unicode/uobject.h" +#include "cmemory.h" +#include "uassert.h" + +U_NAMESPACE_BEGIN + +BytesTrie::~BytesTrie() { + uprv_free(ownedArray_); +} + +// lead byte already shifted right by 1. +int32_t +BytesTrie::readValue(const uint8_t *pos, int32_t leadByte) { + int32_t value; + if(leadByte<kMinTwoByteValueLead) { + value=leadByte-kMinOneByteValueLead; + } else if(leadByte<kMinThreeByteValueLead) { + value=((leadByte-kMinTwoByteValueLead)<<8)|*pos; + } else if(leadByte<kFourByteValueLead) { + value=((leadByte-kMinThreeByteValueLead)<<16)|(pos[0]<<8)|pos[1]; + } else if(leadByte==kFourByteValueLead) { + value=(pos[0]<<16)|(pos[1]<<8)|pos[2]; + } else { + value=(pos[0]<<24)|(pos[1]<<16)|(pos[2]<<8)|pos[3]; + } + return value; +} + +const uint8_t * +BytesTrie::jumpByDelta(const uint8_t *pos) { + int32_t delta=*pos++; + if(delta<kMinTwoByteDeltaLead) { + // nothing to do + } else if(delta<kMinThreeByteDeltaLead) { + delta=((delta-kMinTwoByteDeltaLead)<<8)|*pos++; + } else if(delta<kFourByteDeltaLead) { + delta=((delta-kMinThreeByteDeltaLead)<<16)|(pos[0]<<8)|pos[1]; + pos+=2; + } else if(delta==kFourByteDeltaLead) { + delta=(pos[0]<<16)|(pos[1]<<8)|pos[2]; + pos+=3; + } else { + delta=(pos[0]<<24)|(pos[1]<<16)|(pos[2]<<8)|pos[3]; + pos+=4; + } + return pos+delta; +} + +UStringTrieResult +BytesTrie::current() const { + const uint8_t *pos=pos_; + if(pos==NULL) { + return USTRINGTRIE_NO_MATCH; + } else { + int32_t node; + return (remainingMatchLength_<0 && (node=*pos)>=kMinValueLead) ? + valueResult(node) : USTRINGTRIE_NO_VALUE; + } +} + +UStringTrieResult +BytesTrie::branchNext(const uint8_t *pos, int32_t length, int32_t inByte) { + // Branch according to the current byte. + if(length==0) { + length=*pos++; + } + ++length; + // The length of the branch is the number of bytes to select from. + // The data structure encodes a binary search. + while(length>kMaxBranchLinearSubNodeLength) { + if(inByte<*pos++) { + length>>=1; + pos=jumpByDelta(pos); + } else { + length=length-(length>>1); + pos=skipDelta(pos); + } + } + // Drop down to linear search for the last few bytes. + // length>=2 because the loop body above sees length>kMaxBranchLinearSubNodeLength>=3 + // and divides length by 2. + do { + if(inByte==*pos++) { + UStringTrieResult result; + int32_t node=*pos; + U_ASSERT(node>=kMinValueLead); + if(node&kValueIsFinal) { + // Leave the final value for getValue() to read. + result=USTRINGTRIE_FINAL_VALUE; + } else { + // Use the non-final value as the jump delta. + ++pos; + // int32_t delta=readValue(pos, node>>1); + node>>=1; + int32_t delta; + if(node<kMinTwoByteValueLead) { + delta=node-kMinOneByteValueLead; + } else if(node<kMinThreeByteValueLead) { + delta=((node-kMinTwoByteValueLead)<<8)|*pos++; + } else if(node<kFourByteValueLead) { + delta=((node-kMinThreeByteValueLead)<<16)|(pos[0]<<8)|pos[1]; + pos+=2; + } else if(node==kFourByteValueLead) { + delta=(pos[0]<<16)|(pos[1]<<8)|pos[2]; + pos+=3; + } else { + delta=(pos[0]<<24)|(pos[1]<<16)|(pos[2]<<8)|pos[3]; + pos+=4; + } + // end readValue() + pos+=delta; + node=*pos; + result= node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE; + } + pos_=pos; + return result; + } + --length; + pos=skipValue(pos); + } while(length>1); + if(inByte==*pos++) { + pos_=pos; + int32_t node=*pos; + return node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE; + } else { + stop(); + return USTRINGTRIE_NO_MATCH; + } +} + +UStringTrieResult +BytesTrie::nextImpl(const uint8_t *pos, int32_t inByte) { + for(;;) { + int32_t node=*pos++; + if(node<kMinLinearMatch) { + return branchNext(pos, node, inByte); + } else if(node<kMinValueLead) { + // Match the first of length+1 bytes. + int32_t length=node-kMinLinearMatch; // Actual match length minus 1. + if(inByte==*pos++) { + remainingMatchLength_=--length; + pos_=pos; + return (length<0 && (node=*pos)>=kMinValueLead) ? + valueResult(node) : USTRINGTRIE_NO_VALUE; + } else { + // No match. + break; + } + } else if(node&kValueIsFinal) { + // No further matching bytes. + break; + } else { + // Skip intermediate value. + pos=skipValue(pos, node); + // The next node must not also be a value node. + U_ASSERT(*pos<kMinValueLead); + } + } + stop(); + return USTRINGTRIE_NO_MATCH; +} + +UStringTrieResult +BytesTrie::next(int32_t inByte) { + const uint8_t *pos=pos_; + if(pos==NULL) { + return USTRINGTRIE_NO_MATCH; + } + if(inByte<0) { + inByte+=0x100; + } + int32_t length=remainingMatchLength_; // Actual remaining match length minus 1. + if(length>=0) { + // Remaining part of a linear-match node. + if(inByte==*pos++) { + remainingMatchLength_=--length; + pos_=pos; + int32_t node; + return (length<0 && (node=*pos)>=kMinValueLead) ? + valueResult(node) : USTRINGTRIE_NO_VALUE; + } else { + stop(); + return USTRINGTRIE_NO_MATCH; + } + } + return nextImpl(pos, inByte); +} + +UStringTrieResult +BytesTrie::next(const char *s, int32_t sLength) { + if(sLength<0 ? *s==0 : sLength==0) { + // Empty input. + return current(); + } + const uint8_t *pos=pos_; + if(pos==NULL) { + return USTRINGTRIE_NO_MATCH; + } + int32_t length=remainingMatchLength_; // Actual remaining match length minus 1. + for(;;) { + // Fetch the next input byte, if there is one. + // Continue a linear-match node without rechecking sLength<0. + int32_t inByte; + if(sLength<0) { + for(;;) { + if((inByte=*s++)==0) { + remainingMatchLength_=length; + pos_=pos; + int32_t node; + return (length<0 && (node=*pos)>=kMinValueLead) ? + valueResult(node) : USTRINGTRIE_NO_VALUE; + } + if(length<0) { + remainingMatchLength_=length; + break; + } + if(inByte!=*pos) { + stop(); + return USTRINGTRIE_NO_MATCH; + } + ++pos; + --length; + } + } else { + for(;;) { + if(sLength==0) { + remainingMatchLength_=length; + pos_=pos; + int32_t node; + return (length<0 && (node=*pos)>=kMinValueLead) ? + valueResult(node) : USTRINGTRIE_NO_VALUE; + } + inByte=*s++; + --sLength; + if(length<0) { + remainingMatchLength_=length; + break; + } + if(inByte!=*pos) { + stop(); + return USTRINGTRIE_NO_MATCH; + } + ++pos; + --length; + } + } + for(;;) { + int32_t node=*pos++; + if(node<kMinLinearMatch) { + UStringTrieResult result=branchNext(pos, node, inByte); + if(result==USTRINGTRIE_NO_MATCH) { + return USTRINGTRIE_NO_MATCH; + } + // Fetch the next input byte, if there is one. + if(sLength<0) { + if((inByte=*s++)==0) { + return result; + } + } else { + if(sLength==0) { + return result; + } + inByte=*s++; + --sLength; + } + if(result==USTRINGTRIE_FINAL_VALUE) { + // No further matching bytes. + stop(); + return USTRINGTRIE_NO_MATCH; + } + pos=pos_; // branchNext() advanced pos and wrote it to pos_ . + } else if(node<kMinValueLead) { + // Match length+1 bytes. + length=node-kMinLinearMatch; // Actual match length minus 1. + if(inByte!=*pos) { + stop(); + return USTRINGTRIE_NO_MATCH; + } + ++pos; + --length; + break; + } else if(node&kValueIsFinal) { + // No further matching bytes. + stop(); + return USTRINGTRIE_NO_MATCH; + } else { + // Skip intermediate value. + pos=skipValue(pos, node); + // The next node must not also be a value node. + U_ASSERT(*pos<kMinValueLead); + } + } + } +} + +const uint8_t * +BytesTrie::findUniqueValueFromBranch(const uint8_t *pos, int32_t length, + UBool haveUniqueValue, int32_t &uniqueValue) { + while(length>kMaxBranchLinearSubNodeLength) { + ++pos; // ignore the comparison byte + if(NULL==findUniqueValueFromBranch(jumpByDelta(pos), length>>1, haveUniqueValue, uniqueValue)) { + return NULL; + } + length=length-(length>>1); + pos=skipDelta(pos); + } + do { + ++pos; // ignore a comparison byte + // handle its value + int32_t node=*pos++; + UBool isFinal=(UBool)(node&kValueIsFinal); + int32_t value=readValue(pos, node>>1); + pos=skipValue(pos, node); + if(isFinal) { + if(haveUniqueValue) { + if(value!=uniqueValue) { + return NULL; + } + } else { + uniqueValue=value; + haveUniqueValue=TRUE; + } + } else { + if(!findUniqueValue(pos+value, haveUniqueValue, uniqueValue)) { + return NULL; + } + haveUniqueValue=TRUE; + } + } while(--length>1); + return pos+1; // ignore the last comparison byte +} + +UBool +BytesTrie::findUniqueValue(const uint8_t *pos, UBool haveUniqueValue, int32_t &uniqueValue) { + for(;;) { + int32_t node=*pos++; + if(node<kMinLinearMatch) { + if(node==0) { + node=*pos++; + } + pos=findUniqueValueFromBranch(pos, node+1, haveUniqueValue, uniqueValue); + if(pos==NULL) { + return FALSE; + } + haveUniqueValue=TRUE; + } else if(node<kMinValueLead) { + // linear-match node + pos+=node-kMinLinearMatch+1; // Ignore the match bytes. + } else { + UBool isFinal=(UBool)(node&kValueIsFinal); + int32_t value=readValue(pos, node>>1); + if(haveUniqueValue) { + if(value!=uniqueValue) { + return FALSE; + } + } else { + uniqueValue=value; + haveUniqueValue=TRUE; + } + if(isFinal) { + return TRUE; + } + pos=skipValue(pos, node); + } + } +} + +int32_t +BytesTrie::getNextBytes(ByteSink &out) const { + const uint8_t *pos=pos_; + if(pos==NULL) { + return 0; + } + if(remainingMatchLength_>=0) { + append(out, *pos); // Next byte of a pending linear-match node. + return 1; + } + int32_t node=*pos++; + if(node>=kMinValueLead) { + if(node&kValueIsFinal) { + return 0; + } else { + pos=skipValue(pos, node); + node=*pos++; + U_ASSERT(node<kMinValueLead); + } + } + if(node<kMinLinearMatch) { + if(node==0) { + node=*pos++; + } + getNextBranchBytes(pos, ++node, out); + return node; + } else { + // First byte of the linear-match node. + append(out, *pos); + return 1; + } +} + +void +BytesTrie::getNextBranchBytes(const uint8_t *pos, int32_t length, ByteSink &out) { + while(length>kMaxBranchLinearSubNodeLength) { + ++pos; // ignore the comparison byte + getNextBranchBytes(jumpByDelta(pos), length>>1, out); + length=length-(length>>1); + pos=skipDelta(pos); + } + do { + append(out, *pos++); + pos=skipValue(pos); + } while(--length>1); + append(out, *pos); +} + +void +BytesTrie::append(ByteSink &out, int c) { + char ch=(char)c; + out.Append(&ch, 1); +} + +U_NAMESPACE_END diff --git a/thirdparty/icu4c/common/bytestriebuilder.cpp b/thirdparty/icu4c/common/bytestriebuilder.cpp new file mode 100644 index 0000000000..ec1ab7d8f5 --- /dev/null +++ b/thirdparty/icu4c/common/bytestriebuilder.cpp @@ -0,0 +1,504 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2010-2012, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* file name: bytestriebuilder.cpp +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2010sep25 +* created by: Markus W. Scherer +*/ + +#include "unicode/utypes.h" +#include "unicode/bytestrie.h" +#include "unicode/bytestriebuilder.h" +#include "unicode/stringpiece.h" +#include "charstr.h" +#include "cmemory.h" +#include "uhash.h" +#include "uarrsort.h" +#include "uassert.h" +#include "ustr_imp.h" + +U_NAMESPACE_BEGIN + +/* + * Note: This builder implementation stores (bytes, value) pairs with full copies + * of the byte sequences, until the BytesTrie is built. + * It might(!) take less memory if we collected the data in a temporary, dynamic trie. + */ + +class BytesTrieElement : public UMemory { +public: + // Use compiler's default constructor, initializes nothing. + + void setTo(StringPiece s, int32_t val, CharString &strings, UErrorCode &errorCode); + + StringPiece getString(const CharString &strings) const { + int32_t offset=stringOffset; + int32_t length; + if(offset>=0) { + length=(uint8_t)strings[offset++]; + } else { + offset=~offset; + length=((int32_t)(uint8_t)strings[offset]<<8)|(uint8_t)strings[offset+1]; + offset+=2; + } + return StringPiece(strings.data()+offset, length); + } + int32_t getStringLength(const CharString &strings) const { + int32_t offset=stringOffset; + if(offset>=0) { + return (uint8_t)strings[offset]; + } else { + offset=~offset; + return ((int32_t)(uint8_t)strings[offset]<<8)|(uint8_t)strings[offset+1]; + } + } + + char charAt(int32_t index, const CharString &strings) const { return data(strings)[index]; } + + int32_t getValue() const { return value; } + + int32_t compareStringTo(const BytesTrieElement &o, const CharString &strings) const; + +private: + const char *data(const CharString &strings) const { + int32_t offset=stringOffset; + if(offset>=0) { + ++offset; + } else { + offset=~offset+2; + } + return strings.data()+offset; + } + + // If the stringOffset is non-negative, then the first strings byte contains + // the string length. + // If the stringOffset is negative, then the first two strings bytes contain + // the string length (big-endian), and the offset needs to be bit-inverted. + // (Compared with a stringLength field here, this saves 3 bytes per string for most strings.) + int32_t stringOffset; + int32_t value; +}; + +void +BytesTrieElement::setTo(StringPiece s, int32_t val, + CharString &strings, UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { + return; + } + int32_t length=s.length(); + if(length>0xffff) { + // Too long: We store the length in 1 or 2 bytes. + errorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return; + } + int32_t offset=strings.length(); + if(length>0xff) { + offset=~offset; + strings.append((char)(length>>8), errorCode); + } + strings.append((char)length, errorCode); + stringOffset=offset; + value=val; + strings.append(s, errorCode); +} + +int32_t +BytesTrieElement::compareStringTo(const BytesTrieElement &other, const CharString &strings) const { + // TODO: add StringPiece::compare(), see ticket #8187 + StringPiece thisString=getString(strings); + StringPiece otherString=other.getString(strings); + int32_t lengthDiff=thisString.length()-otherString.length(); + int32_t commonLength; + if(lengthDiff<=0) { + commonLength=thisString.length(); + } else { + commonLength=otherString.length(); + } + int32_t diff=uprv_memcmp(thisString.data(), otherString.data(), commonLength); + return diff!=0 ? diff : lengthDiff; +} + +BytesTrieBuilder::BytesTrieBuilder(UErrorCode &errorCode) + : strings(NULL), elements(NULL), elementsCapacity(0), elementsLength(0), + bytes(NULL), bytesCapacity(0), bytesLength(0) { + if(U_FAILURE(errorCode)) { + return; + } + strings=new CharString(); + if(strings==NULL) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + } +} + +BytesTrieBuilder::~BytesTrieBuilder() { + delete strings; + delete[] elements; + uprv_free(bytes); +} + +BytesTrieBuilder & +BytesTrieBuilder::add(StringPiece s, int32_t value, UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { + return *this; + } + if(bytesLength>0) { + // Cannot add elements after building. + errorCode=U_NO_WRITE_PERMISSION; + return *this; + } + if(elementsLength==elementsCapacity) { + int32_t newCapacity; + if(elementsCapacity==0) { + newCapacity=1024; + } else { + newCapacity=4*elementsCapacity; + } + BytesTrieElement *newElements=new BytesTrieElement[newCapacity]; + if(newElements==NULL) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + return *this; // error instead of dereferencing null + } + if(elementsLength>0) { + uprv_memcpy(newElements, elements, (size_t)elementsLength*sizeof(BytesTrieElement)); + } + delete[] elements; + elements=newElements; + elementsCapacity=newCapacity; + } + elements[elementsLength++].setTo(s, value, *strings, errorCode); + return *this; +} + +U_CDECL_BEGIN + +static int32_t U_CALLCONV +compareElementStrings(const void *context, const void *left, const void *right) { + const CharString *strings=static_cast<const CharString *>(context); + const BytesTrieElement *leftElement=static_cast<const BytesTrieElement *>(left); + const BytesTrieElement *rightElement=static_cast<const BytesTrieElement *>(right); + return leftElement->compareStringTo(*rightElement, *strings); +} + +U_CDECL_END + +BytesTrie * +BytesTrieBuilder::build(UStringTrieBuildOption buildOption, UErrorCode &errorCode) { + buildBytes(buildOption, errorCode); + BytesTrie *newTrie=NULL; + if(U_SUCCESS(errorCode)) { + newTrie=new BytesTrie(bytes, bytes+(bytesCapacity-bytesLength)); + if(newTrie==NULL) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + } else { + bytes=NULL; // The new trie now owns the array. + bytesCapacity=0; + } + } + return newTrie; +} + +StringPiece +BytesTrieBuilder::buildStringPiece(UStringTrieBuildOption buildOption, UErrorCode &errorCode) { + buildBytes(buildOption, errorCode); + StringPiece result; + if(U_SUCCESS(errorCode)) { + result.set(bytes+(bytesCapacity-bytesLength), bytesLength); + } + return result; +} + +void +BytesTrieBuilder::buildBytes(UStringTrieBuildOption buildOption, UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { + return; + } + if(bytes!=NULL && bytesLength>0) { + // Already built. + return; + } + if(bytesLength==0) { + if(elementsLength==0) { + errorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return; + } + uprv_sortArray(elements, elementsLength, (int32_t)sizeof(BytesTrieElement), + compareElementStrings, strings, + FALSE, // need not be a stable sort + &errorCode); + if(U_FAILURE(errorCode)) { + return; + } + // Duplicate strings are not allowed. + StringPiece prev=elements[0].getString(*strings); + for(int32_t i=1; i<elementsLength; ++i) { + StringPiece current=elements[i].getString(*strings); + if(prev==current) { + errorCode=U_ILLEGAL_ARGUMENT_ERROR; + return; + } + prev=current; + } + } + // Create and byte-serialize the trie for the elements. + bytesLength=0; + int32_t capacity=strings->length(); + if(capacity<1024) { + capacity=1024; + } + if(bytesCapacity<capacity) { + uprv_free(bytes); + bytes=static_cast<char *>(uprv_malloc(capacity)); + if(bytes==NULL) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + bytesCapacity=0; + return; + } + bytesCapacity=capacity; + } + StringTrieBuilder::build(buildOption, elementsLength, errorCode); + if(bytes==NULL) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + } +} + +BytesTrieBuilder & +BytesTrieBuilder::clear() { + strings->clear(); + elementsLength=0; + bytesLength=0; + return *this; +} + +int32_t +BytesTrieBuilder::getElementStringLength(int32_t i) const { + return elements[i].getStringLength(*strings); +} + +UChar +BytesTrieBuilder::getElementUnit(int32_t i, int32_t byteIndex) const { + return (uint8_t)elements[i].charAt(byteIndex, *strings); +} + +int32_t +BytesTrieBuilder::getElementValue(int32_t i) const { + return elements[i].getValue(); +} + +int32_t +BytesTrieBuilder::getLimitOfLinearMatch(int32_t first, int32_t last, int32_t byteIndex) const { + const BytesTrieElement &firstElement=elements[first]; + const BytesTrieElement &lastElement=elements[last]; + int32_t minStringLength=firstElement.getStringLength(*strings); + while(++byteIndex<minStringLength && + firstElement.charAt(byteIndex, *strings)== + lastElement.charAt(byteIndex, *strings)) {} + return byteIndex; +} + +int32_t +BytesTrieBuilder::countElementUnits(int32_t start, int32_t limit, int32_t byteIndex) const { + int32_t length=0; // Number of different bytes at byteIndex. + int32_t i=start; + do { + char byte=elements[i++].charAt(byteIndex, *strings); + while(i<limit && byte==elements[i].charAt(byteIndex, *strings)) { + ++i; + } + ++length; + } while(i<limit); + return length; +} + +int32_t +BytesTrieBuilder::skipElementsBySomeUnits(int32_t i, int32_t byteIndex, int32_t count) const { + do { + char byte=elements[i++].charAt(byteIndex, *strings); + while(byte==elements[i].charAt(byteIndex, *strings)) { + ++i; + } + } while(--count>0); + return i; +} + +int32_t +BytesTrieBuilder::indexOfElementWithNextUnit(int32_t i, int32_t byteIndex, UChar byte) const { + char b=(char)byte; + while(b==elements[i].charAt(byteIndex, *strings)) { + ++i; + } + return i; +} + +BytesTrieBuilder::BTLinearMatchNode::BTLinearMatchNode(const char *bytes, int32_t len, Node *nextNode) + : LinearMatchNode(len, nextNode), s(bytes) { + hash=static_cast<int32_t>( + static_cast<uint32_t>(hash)*37u + static_cast<uint32_t>(ustr_hashCharsN(bytes, len))); +} + +UBool +BytesTrieBuilder::BTLinearMatchNode::operator==(const Node &other) const { + if(this==&other) { + return TRUE; + } + if(!LinearMatchNode::operator==(other)) { + return FALSE; + } + const BTLinearMatchNode &o=(const BTLinearMatchNode &)other; + return 0==uprv_memcmp(s, o.s, length); +} + +void +BytesTrieBuilder::BTLinearMatchNode::write(StringTrieBuilder &builder) { + BytesTrieBuilder &b=(BytesTrieBuilder &)builder; + next->write(builder); + b.write(s, length); + offset=b.write(b.getMinLinearMatch()+length-1); +} + +StringTrieBuilder::Node * +BytesTrieBuilder::createLinearMatchNode(int32_t i, int32_t byteIndex, int32_t length, + Node *nextNode) const { + return new BTLinearMatchNode( + elements[i].getString(*strings).data()+byteIndex, + length, + nextNode); +} + +UBool +BytesTrieBuilder::ensureCapacity(int32_t length) { + if(bytes==NULL) { + return FALSE; // previous memory allocation had failed + } + if(length>bytesCapacity) { + int32_t newCapacity=bytesCapacity; + do { + newCapacity*=2; + } while(newCapacity<=length); + char *newBytes=static_cast<char *>(uprv_malloc(newCapacity)); + if(newBytes==NULL) { + // unable to allocate memory + uprv_free(bytes); + bytes=NULL; + bytesCapacity=0; + return FALSE; + } + uprv_memcpy(newBytes+(newCapacity-bytesLength), + bytes+(bytesCapacity-bytesLength), bytesLength); + uprv_free(bytes); + bytes=newBytes; + bytesCapacity=newCapacity; + } + return TRUE; +} + +int32_t +BytesTrieBuilder::write(int32_t byte) { + int32_t newLength=bytesLength+1; + if(ensureCapacity(newLength)) { + bytesLength=newLength; + bytes[bytesCapacity-bytesLength]=(char)byte; + } + return bytesLength; +} + +int32_t +BytesTrieBuilder::write(const char *b, int32_t length) { + int32_t newLength=bytesLength+length; + if(ensureCapacity(newLength)) { + bytesLength=newLength; + uprv_memcpy(bytes+(bytesCapacity-bytesLength), b, length); + } + return bytesLength; +} + +int32_t +BytesTrieBuilder::writeElementUnits(int32_t i, int32_t byteIndex, int32_t length) { + return write(elements[i].getString(*strings).data()+byteIndex, length); +} + +int32_t +BytesTrieBuilder::writeValueAndFinal(int32_t i, UBool isFinal) { + if(0<=i && i<=BytesTrie::kMaxOneByteValue) { + return write(((BytesTrie::kMinOneByteValueLead+i)<<1)|isFinal); + } + char intBytes[5]; + int32_t length=1; + if(i<0 || i>0xffffff) { + intBytes[0]=(char)BytesTrie::kFiveByteValueLead; + intBytes[1]=(char)((uint32_t)i>>24); + intBytes[2]=(char)((uint32_t)i>>16); + intBytes[3]=(char)((uint32_t)i>>8); + intBytes[4]=(char)i; + length=5; + // } else if(i<=BytesTrie::kMaxOneByteValue) { + // intBytes[0]=(char)(BytesTrie::kMinOneByteValueLead+i); + } else { + if(i<=BytesTrie::kMaxTwoByteValue) { + intBytes[0]=(char)(BytesTrie::kMinTwoByteValueLead+(i>>8)); + } else { + if(i<=BytesTrie::kMaxThreeByteValue) { + intBytes[0]=(char)(BytesTrie::kMinThreeByteValueLead+(i>>16)); + } else { + intBytes[0]=(char)BytesTrie::kFourByteValueLead; + intBytes[1]=(char)(i>>16); + length=2; + } + intBytes[length++]=(char)(i>>8); + } + intBytes[length++]=(char)i; + } + intBytes[0]=(char)((intBytes[0]<<1)|isFinal); + return write(intBytes, length); +} + +int32_t +BytesTrieBuilder::writeValueAndType(UBool hasValue, int32_t value, int32_t node) { + int32_t offset=write(node); + if(hasValue) { + offset=writeValueAndFinal(value, FALSE); + } + return offset; +} + +int32_t +BytesTrieBuilder::writeDeltaTo(int32_t jumpTarget) { + int32_t i=bytesLength-jumpTarget; + U_ASSERT(i>=0); + if(i<=BytesTrie::kMaxOneByteDelta) { + return write(i); + } + char intBytes[5]; + int32_t length; + if(i<=BytesTrie::kMaxTwoByteDelta) { + intBytes[0]=(char)(BytesTrie::kMinTwoByteDeltaLead+(i>>8)); + length=1; + } else { + if(i<=BytesTrie::kMaxThreeByteDelta) { + intBytes[0]=(char)(BytesTrie::kMinThreeByteDeltaLead+(i>>16)); + length=2; + } else { + if(i<=0xffffff) { + intBytes[0]=(char)BytesTrie::kFourByteDeltaLead; + length=3; + } else { + intBytes[0]=(char)BytesTrie::kFiveByteDeltaLead; + intBytes[1]=(char)(i>>24); + length=4; + } + intBytes[1]=(char)(i>>16); + } + intBytes[1]=(char)(i>>8); + } + intBytes[length++]=(char)i; + return write(intBytes, length); +} + +U_NAMESPACE_END diff --git a/thirdparty/icu4c/common/bytestrieiterator.cpp b/thirdparty/icu4c/common/bytestrieiterator.cpp new file mode 100644 index 0000000000..e64961a1f1 --- /dev/null +++ b/thirdparty/icu4c/common/bytestrieiterator.cpp @@ -0,0 +1,214 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2010-2012, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* file name: bytestrieiterator.cpp +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2010nov03 +* created by: Markus W. Scherer +*/ + +#include "unicode/utypes.h" +#include "unicode/bytestrie.h" +#include "unicode/stringpiece.h" +#include "charstr.h" +#include "uvectr32.h" + +U_NAMESPACE_BEGIN + +BytesTrie::Iterator::Iterator(const void *trieBytes, int32_t maxStringLength, + UErrorCode &errorCode) + : bytes_(static_cast<const uint8_t *>(trieBytes)), + pos_(bytes_), initialPos_(bytes_), + remainingMatchLength_(-1), initialRemainingMatchLength_(-1), + str_(NULL), maxLength_(maxStringLength), value_(0), stack_(NULL) { + if(U_FAILURE(errorCode)) { + return; + } + // str_ and stack_ are pointers so that it's easy to turn bytestrie.h into + // a public API header for which we would want it to depend only on + // other public headers. + // Unlike BytesTrie itself, its Iterator performs memory allocations anyway + // via the CharString and UVector32 implementations, so this additional + // cost is minimal. + str_=new CharString(); + stack_=new UVector32(errorCode); + if(U_SUCCESS(errorCode) && (str_==NULL || stack_==NULL)) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + } +} + +BytesTrie::Iterator::Iterator(const BytesTrie &trie, int32_t maxStringLength, + UErrorCode &errorCode) + : bytes_(trie.bytes_), pos_(trie.pos_), initialPos_(trie.pos_), + remainingMatchLength_(trie.remainingMatchLength_), + initialRemainingMatchLength_(trie.remainingMatchLength_), + str_(NULL), maxLength_(maxStringLength), value_(0), stack_(NULL) { + if(U_FAILURE(errorCode)) { + return; + } + str_=new CharString(); + stack_=new UVector32(errorCode); + if(U_FAILURE(errorCode)) { + return; + } + if(str_==NULL || stack_==NULL) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + return; + } + int32_t length=remainingMatchLength_; // Actual remaining match length minus 1. + if(length>=0) { + // Pending linear-match node, append remaining bytes to str_. + ++length; + if(maxLength_>0 && length>maxLength_) { + length=maxLength_; // This will leave remainingMatchLength>=0 as a signal. + } + str_->append(reinterpret_cast<const char *>(pos_), length, errorCode); + pos_+=length; + remainingMatchLength_-=length; + } +} + +BytesTrie::Iterator::~Iterator() { + delete str_; + delete stack_; +} + +BytesTrie::Iterator & +BytesTrie::Iterator::reset() { + pos_=initialPos_; + remainingMatchLength_=initialRemainingMatchLength_; + int32_t length=remainingMatchLength_+1; // Remaining match length. + if(maxLength_>0 && length>maxLength_) { + length=maxLength_; + } + str_->truncate(length); + pos_+=length; + remainingMatchLength_-=length; + stack_->setSize(0); + return *this; +} + +UBool +BytesTrie::Iterator::hasNext() const { return pos_!=NULL || !stack_->isEmpty(); } + +UBool +BytesTrie::Iterator::next(UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { + return FALSE; + } + const uint8_t *pos=pos_; + if(pos==NULL) { + if(stack_->isEmpty()) { + return FALSE; + } + // Pop the state off the stack and continue with the next outbound edge of + // the branch node. + int32_t stackSize=stack_->size(); + int32_t length=stack_->elementAti(stackSize-1); + pos=bytes_+stack_->elementAti(stackSize-2); + stack_->setSize(stackSize-2); + str_->truncate(length&0xffff); + length=(int32_t)((uint32_t)length>>16); + if(length>1) { + pos=branchNext(pos, length, errorCode); + if(pos==NULL) { + return TRUE; // Reached a final value. + } + } else { + str_->append((char)*pos++, errorCode); + } + } + if(remainingMatchLength_>=0) { + // We only get here if we started in a pending linear-match node + // with more than maxLength remaining bytes. + return truncateAndStop(); + } + for(;;) { + int32_t node=*pos++; + if(node>=kMinValueLead) { + // Deliver value for the byte sequence so far. + UBool isFinal=(UBool)(node&kValueIsFinal); + value_=readValue(pos, node>>1); + if(isFinal || (maxLength_>0 && str_->length()==maxLength_)) { + pos_=NULL; + } else { + pos_=skipValue(pos, node); + } + return TRUE; + } + if(maxLength_>0 && str_->length()==maxLength_) { + return truncateAndStop(); + } + if(node<kMinLinearMatch) { + if(node==0) { + node=*pos++; + } + pos=branchNext(pos, node+1, errorCode); + if(pos==NULL) { + return TRUE; // Reached a final value. + } + } else { + // Linear-match node, append length bytes to str_. + int32_t length=node-kMinLinearMatch+1; + if(maxLength_>0 && str_->length()+length>maxLength_) { + str_->append(reinterpret_cast<const char *>(pos), + maxLength_-str_->length(), errorCode); + return truncateAndStop(); + } + str_->append(reinterpret_cast<const char *>(pos), length, errorCode); + pos+=length; + } + } +} + +StringPiece +BytesTrie::Iterator::getString() const { + return str_ == NULL ? StringPiece() : str_->toStringPiece(); +} + +UBool +BytesTrie::Iterator::truncateAndStop() { + pos_=NULL; + value_=-1; // no real value for str + return TRUE; +} + +// Branch node, needs to take the first outbound edge and push state for the rest. +const uint8_t * +BytesTrie::Iterator::branchNext(const uint8_t *pos, int32_t length, UErrorCode &errorCode) { + while(length>kMaxBranchLinearSubNodeLength) { + ++pos; // ignore the comparison byte + // Push state for the greater-or-equal edge. + stack_->addElement((int32_t)(skipDelta(pos)-bytes_), errorCode); + stack_->addElement(((length-(length>>1))<<16)|str_->length(), errorCode); + // Follow the less-than edge. + length>>=1; + pos=jumpByDelta(pos); + } + // List of key-value pairs where values are either final values or jump deltas. + // Read the first (key, value) pair. + uint8_t trieByte=*pos++; + int32_t node=*pos++; + UBool isFinal=(UBool)(node&kValueIsFinal); + int32_t value=readValue(pos, node>>1); + pos=skipValue(pos, node); + stack_->addElement((int32_t)(pos-bytes_), errorCode); + stack_->addElement(((length-1)<<16)|str_->length(), errorCode); + str_->append((char)trieByte, errorCode); + if(isFinal) { + pos_=NULL; + value_=value; + return NULL; + } else { + return pos+value; + } +} + +U_NAMESPACE_END diff --git a/thirdparty/icu4c/common/caniter.cpp b/thirdparty/icu4c/common/caniter.cpp new file mode 100644 index 0000000000..b28acfc84e --- /dev/null +++ b/thirdparty/icu4c/common/caniter.cpp @@ -0,0 +1,586 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* + ***************************************************************************** + * Copyright (C) 1996-2015, International Business Machines Corporation and + * others. All Rights Reserved. + ***************************************************************************** + */ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_NORMALIZATION + +#include "unicode/caniter.h" +#include "unicode/normalizer2.h" +#include "unicode/uchar.h" +#include "unicode/uniset.h" +#include "unicode/usetiter.h" +#include "unicode/ustring.h" +#include "unicode/utf16.h" +#include "cmemory.h" +#include "hash.h" +#include "normalizer2impl.h" + +/** + * This class allows one to iterate through all the strings that are canonically equivalent to a given + * string. For example, here are some sample results: +Results for: {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA} +1: \u0041\u030A\u0064\u0307\u0327 + = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA} +2: \u0041\u030A\u0064\u0327\u0307 + = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE} +3: \u0041\u030A\u1E0B\u0327 + = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA} +4: \u0041\u030A\u1E11\u0307 + = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE} +5: \u00C5\u0064\u0307\u0327 + = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA} +6: \u00C5\u0064\u0327\u0307 + = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE} +7: \u00C5\u1E0B\u0327 + = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA} +8: \u00C5\u1E11\u0307 + = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE} +9: \u212B\u0064\u0307\u0327 + = {ANGSTROM SIGN}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA} +10: \u212B\u0064\u0327\u0307 + = {ANGSTROM SIGN}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE} +11: \u212B\u1E0B\u0327 + = {ANGSTROM SIGN}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA} +12: \u212B\u1E11\u0307 + = {ANGSTROM SIGN}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE} + *<br>Note: the code is intended for use with small strings, and is not suitable for larger ones, + * since it has not been optimized for that situation. + *@author M. Davis + *@draft + */ + +// public + +U_NAMESPACE_BEGIN + +// TODO: add boilerplate methods. + +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CanonicalIterator) + +/** + *@param source string to get results for + */ +CanonicalIterator::CanonicalIterator(const UnicodeString &sourceStr, UErrorCode &status) : + pieces(NULL), + pieces_length(0), + pieces_lengths(NULL), + current(NULL), + current_length(0), + nfd(*Normalizer2::getNFDInstance(status)), + nfcImpl(*Normalizer2Factory::getNFCImpl(status)) +{ + if(U_SUCCESS(status) && nfcImpl.ensureCanonIterData(status)) { + setSource(sourceStr, status); + } +} + +CanonicalIterator::~CanonicalIterator() { + cleanPieces(); +} + +void CanonicalIterator::cleanPieces() { + int32_t i = 0; + if(pieces != NULL) { + for(i = 0; i < pieces_length; i++) { + if(pieces[i] != NULL) { + delete[] pieces[i]; + } + } + uprv_free(pieces); + pieces = NULL; + pieces_length = 0; + } + if(pieces_lengths != NULL) { + uprv_free(pieces_lengths); + pieces_lengths = NULL; + } + if(current != NULL) { + uprv_free(current); + current = NULL; + current_length = 0; + } +} + +/** + *@return gets the source: NOTE: it is the NFD form of source + */ +UnicodeString CanonicalIterator::getSource() { + return source; +} + +/** + * Resets the iterator so that one can start again from the beginning. + */ +void CanonicalIterator::reset() { + done = FALSE; + for (int i = 0; i < current_length; ++i) { + current[i] = 0; + } +} + +/** + *@return the next string that is canonically equivalent. The value null is returned when + * the iteration is done. + */ +UnicodeString CanonicalIterator::next() { + int32_t i = 0; + + if (done) { + buffer.setToBogus(); + return buffer; + } + + // delete old contents + buffer.remove(); + + // construct return value + + for (i = 0; i < pieces_length; ++i) { + buffer.append(pieces[i][current[i]]); + } + //String result = buffer.toString(); // not needed + + // find next value for next time + + for (i = current_length - 1; ; --i) { + if (i < 0) { + done = TRUE; + break; + } + current[i]++; + if (current[i] < pieces_lengths[i]) break; // got sequence + current[i] = 0; + } + return buffer; +} + +/** + *@param set the source string to iterate against. This allows the same iterator to be used + * while changing the source string, saving object creation. + */ +void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &status) { + int32_t list_length = 0; + UChar32 cp = 0; + int32_t start = 0; + int32_t i = 0; + UnicodeString *list = NULL; + + nfd.normalize(newSource, source, status); + if(U_FAILURE(status)) { + return; + } + done = FALSE; + + cleanPieces(); + + // catch degenerate case + if (newSource.length() == 0) { + pieces = (UnicodeString **)uprv_malloc(sizeof(UnicodeString *)); + pieces_lengths = (int32_t*)uprv_malloc(1 * sizeof(int32_t)); + pieces_length = 1; + current = (int32_t*)uprv_malloc(1 * sizeof(int32_t)); + current_length = 1; + if (pieces == NULL || pieces_lengths == NULL || current == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + goto CleanPartialInitialization; + } + current[0] = 0; + pieces[0] = new UnicodeString[1]; + pieces_lengths[0] = 1; + if (pieces[0] == 0) { + status = U_MEMORY_ALLOCATION_ERROR; + goto CleanPartialInitialization; + } + return; + } + + + list = new UnicodeString[source.length()]; + if (list == 0) { + status = U_MEMORY_ALLOCATION_ERROR; + goto CleanPartialInitialization; + } + + // i should initialy be the number of code units at the + // start of the string + i = U16_LENGTH(source.char32At(0)); + //int32_t i = 1; + // find the segments + // This code iterates through the source string and + // extracts segments that end up on a codepoint that + // doesn't start any decompositions. (Analysis is done + // on the NFD form - see above). + for (; i < source.length(); i += U16_LENGTH(cp)) { + cp = source.char32At(i); + if (nfcImpl.isCanonSegmentStarter(cp)) { + source.extract(start, i-start, list[list_length++]); // add up to i + start = i; + } + } + source.extract(start, i-start, list[list_length++]); // add last one + + + // allocate the arrays, and find the strings that are CE to each segment + pieces = (UnicodeString **)uprv_malloc(list_length * sizeof(UnicodeString *)); + pieces_length = list_length; + pieces_lengths = (int32_t*)uprv_malloc(list_length * sizeof(int32_t)); + current = (int32_t*)uprv_malloc(list_length * sizeof(int32_t)); + current_length = list_length; + if (pieces == NULL || pieces_lengths == NULL || current == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + goto CleanPartialInitialization; + } + + for (i = 0; i < current_length; i++) { + current[i] = 0; + } + // for each segment, get all the combinations that can produce + // it after NFD normalization + for (i = 0; i < pieces_length; ++i) { + //if (PROGRESS) printf("SEGMENT\n"); + pieces[i] = getEquivalents(list[i], pieces_lengths[i], status); + } + + delete[] list; + return; +// Common section to cleanup all local variables and reset object variables. +CleanPartialInitialization: + if (list != NULL) { + delete[] list; + } + cleanPieces(); +} + +/** + * Dumb recursive implementation of permutation. + * TODO: optimize + * @param source the string to find permutations for + * @return the results in a set. + */ +void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status) { + if(U_FAILURE(status)) { + return; + } + //if (PROGRESS) printf("Permute: %s\n", UToS(Tr(source))); + int32_t i = 0; + + // optimization: + // if zero or one character, just return a set with it + // we check for length < 2 to keep from counting code points all the time + if (source.length() <= 2 && source.countChar32() <= 1) { + UnicodeString *toPut = new UnicodeString(source); + /* test for NULL */ + if (toPut == 0) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + result->put(source, toPut, status); + return; + } + + // otherwise iterate through the string, and recursively permute all the other characters + UChar32 cp; + Hashtable subpermute(status); + if(U_FAILURE(status)) { + return; + } + subpermute.setValueDeleter(uprv_deleteUObject); + + for (i = 0; i < source.length(); i += U16_LENGTH(cp)) { + cp = source.char32At(i); + const UHashElement *ne = NULL; + int32_t el = UHASH_FIRST; + UnicodeString subPermuteString = source; + + // optimization: + // if the character is canonical combining class zero, + // don't permute it + if (skipZeros && i != 0 && u_getCombiningClass(cp) == 0) { + //System.out.println("Skipping " + Utility.hex(UTF16.valueOf(source, i))); + continue; + } + + subpermute.removeAll(); + + // see what the permutations of the characters before and after this one are + //Hashtable *subpermute = permute(source.substring(0,i) + source.substring(i + UTF16.getCharCount(cp))); + permute(subPermuteString.remove(i, U16_LENGTH(cp)), skipZeros, &subpermute, status); + /* Test for buffer overflows */ + if(U_FAILURE(status)) { + return; + } + // The upper remove is destructive. The question is do we have to make a copy, or we don't care about the contents + // of source at this point. + + // prefix this character to all of them + ne = subpermute.nextElement(el); + while (ne != NULL) { + UnicodeString *permRes = (UnicodeString *)(ne->value.pointer); + UnicodeString *chStr = new UnicodeString(cp); + //test for NULL + if (chStr == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + chStr->append(*permRes); //*((UnicodeString *)(ne->value.pointer)); + //if (PROGRESS) printf(" Piece: %s\n", UToS(*chStr)); + result->put(*chStr, chStr, status); + ne = subpermute.nextElement(el); + } + } + //return result; +} + +// privates + +// we have a segment, in NFD. Find all the strings that are canonically equivalent to it. +UnicodeString* CanonicalIterator::getEquivalents(const UnicodeString &segment, int32_t &result_len, UErrorCode &status) { + Hashtable result(status); + Hashtable permutations(status); + Hashtable basic(status); + if (U_FAILURE(status)) { + return 0; + } + result.setValueDeleter(uprv_deleteUObject); + permutations.setValueDeleter(uprv_deleteUObject); + basic.setValueDeleter(uprv_deleteUObject); + + UChar USeg[256]; + int32_t segLen = segment.extract(USeg, 256, status); + getEquivalents2(&basic, USeg, segLen, status); + + // now get all the permutations + // add only the ones that are canonically equivalent + // TODO: optimize by not permuting any class zero. + + const UHashElement *ne = NULL; + int32_t el = UHASH_FIRST; + //Iterator it = basic.iterator(); + ne = basic.nextElement(el); + //while (it.hasNext()) + while (ne != NULL) { + //String item = (String) it.next(); + UnicodeString item = *((UnicodeString *)(ne->value.pointer)); + + permutations.removeAll(); + permute(item, CANITER_SKIP_ZEROES, &permutations, status); + const UHashElement *ne2 = NULL; + int32_t el2 = UHASH_FIRST; + //Iterator it2 = permutations.iterator(); + ne2 = permutations.nextElement(el2); + //while (it2.hasNext()) + while (ne2 != NULL) { + //String possible = (String) it2.next(); + //UnicodeString *possible = new UnicodeString(*((UnicodeString *)(ne2->value.pointer))); + UnicodeString possible(*((UnicodeString *)(ne2->value.pointer))); + UnicodeString attempt; + nfd.normalize(possible, attempt, status); + + // TODO: check if operator == is semanticaly the same as attempt.equals(segment) + if (attempt==segment) { + //if (PROGRESS) printf("Adding Permutation: %s\n", UToS(Tr(*possible))); + // TODO: use the hashtable just to catch duplicates - store strings directly (somehow). + result.put(possible, new UnicodeString(possible), status); //add(possible); + } else { + //if (PROGRESS) printf("-Skipping Permutation: %s\n", UToS(Tr(*possible))); + } + + ne2 = permutations.nextElement(el2); + } + ne = basic.nextElement(el); + } + + /* Test for buffer overflows */ + if(U_FAILURE(status)) { + return 0; + } + // convert into a String[] to clean up storage + //String[] finalResult = new String[result.size()]; + UnicodeString *finalResult = NULL; + int32_t resultCount; + if((resultCount = result.count()) != 0) { + finalResult = new UnicodeString[resultCount]; + if (finalResult == 0) { + status = U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + } + else { + status = U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + //result.toArray(finalResult); + result_len = 0; + el = UHASH_FIRST; + ne = result.nextElement(el); + while(ne != NULL) { + finalResult[result_len++] = *((UnicodeString *)(ne->value.pointer)); + ne = result.nextElement(el); + } + + + return finalResult; +} + +Hashtable *CanonicalIterator::getEquivalents2(Hashtable *fillinResult, const UChar *segment, int32_t segLen, UErrorCode &status) { + + if (U_FAILURE(status)) { + return NULL; + } + + //if (PROGRESS) printf("Adding: %s\n", UToS(Tr(segment))); + + UnicodeString toPut(segment, segLen); + + fillinResult->put(toPut, new UnicodeString(toPut), status); + + UnicodeSet starts; + + // cycle through all the characters + UChar32 cp; + for (int32_t i = 0; i < segLen; i += U16_LENGTH(cp)) { + // see if any character is at the start of some decomposition + U16_GET(segment, 0, i, segLen, cp); + if (!nfcImpl.getCanonStartSet(cp, starts)) { + continue; + } + // if so, see which decompositions match + UnicodeSetIterator iter(starts); + while (iter.next()) { + UChar32 cp2 = iter.getCodepoint(); + Hashtable remainder(status); + remainder.setValueDeleter(uprv_deleteUObject); + if (extract(&remainder, cp2, segment, segLen, i, status) == NULL) { + continue; + } + + // there were some matches, so add all the possibilities to the set. + UnicodeString prefix(segment, i); + prefix += cp2; + + int32_t el = UHASH_FIRST; + const UHashElement *ne = remainder.nextElement(el); + while (ne != NULL) { + UnicodeString item = *((UnicodeString *)(ne->value.pointer)); + UnicodeString *toAdd = new UnicodeString(prefix); + /* test for NULL */ + if (toAdd == 0) { + status = U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + *toAdd += item; + fillinResult->put(*toAdd, toAdd, status); + + //if (PROGRESS) printf("Adding: %s\n", UToS(Tr(*toAdd))); + + ne = remainder.nextElement(el); + } + } + } + + /* Test for buffer overflows */ + if(U_FAILURE(status)) { + return NULL; + } + return fillinResult; +} + +/** + * See if the decomposition of cp2 is at segment starting at segmentPos + * (with canonical rearrangment!) + * If so, take the remainder, and return the equivalents + */ +Hashtable *CanonicalIterator::extract(Hashtable *fillinResult, UChar32 comp, const UChar *segment, int32_t segLen, int32_t segmentPos, UErrorCode &status) { +//Hashtable *CanonicalIterator::extract(UChar32 comp, const UnicodeString &segment, int32_t segLen, int32_t segmentPos, UErrorCode &status) { + //if (PROGRESS) printf(" extract: %s, ", UToS(Tr(UnicodeString(comp)))); + //if (PROGRESS) printf("%s, %i\n", UToS(Tr(segment)), segmentPos); + + if (U_FAILURE(status)) { + return NULL; + } + + UnicodeString temp(comp); + int32_t inputLen=temp.length(); + UnicodeString decompString; + nfd.normalize(temp, decompString, status); + if (U_FAILURE(status)) { + return NULL; + } + if (decompString.isBogus()) { + status = U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + const UChar *decomp=decompString.getBuffer(); + int32_t decompLen=decompString.length(); + + // See if it matches the start of segment (at segmentPos) + UBool ok = FALSE; + UChar32 cp; + int32_t decompPos = 0; + UChar32 decompCp; + U16_NEXT(decomp, decompPos, decompLen, decompCp); + + int32_t i = segmentPos; + while(i < segLen) { + U16_NEXT(segment, i, segLen, cp); + + if (cp == decompCp) { // if equal, eat another cp from decomp + + //if (PROGRESS) printf(" matches: %s\n", UToS(Tr(UnicodeString(cp)))); + + if (decompPos == decompLen) { // done, have all decomp characters! + temp.append(segment+i, segLen-i); + ok = TRUE; + break; + } + U16_NEXT(decomp, decompPos, decompLen, decompCp); + } else { + //if (PROGRESS) printf(" buffer: %s\n", UToS(Tr(UnicodeString(cp)))); + + // brute force approach + temp.append(cp); + + /* TODO: optimize + // since we know that the classes are monotonically increasing, after zero + // e.g. 0 5 7 9 0 3 + // we can do an optimization + // there are only a few cases that work: zero, less, same, greater + // if both classes are the same, we fail + // if the decomp class < the segment class, we fail + + segClass = getClass(cp); + if (decompClass <= segClass) return null; + */ + } + } + if (!ok) + return NULL; // we failed, characters left over + + //if (PROGRESS) printf("Matches\n"); + + if (inputLen == temp.length()) { + fillinResult->put(UnicodeString(), new UnicodeString(), status); + return fillinResult; // succeed, but no remainder + } + + // brute force approach + // check to make sure result is canonically equivalent + UnicodeString trial; + nfd.normalize(temp, trial, status); + if(U_FAILURE(status) || trial.compare(segment+segmentPos, segLen - segmentPos) != 0) { + return NULL; + } + + return getEquivalents2(fillinResult, temp.getBuffer()+inputLen, temp.length()-inputLen, status); +} + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_NORMALIZATION */ diff --git a/thirdparty/icu4c/common/capi_helper.h b/thirdparty/icu4c/common/capi_helper.h new file mode 100644 index 0000000000..54b1db9e33 --- /dev/null +++ b/thirdparty/icu4c/common/capi_helper.h @@ -0,0 +1,97 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#ifndef __CAPI_HELPER_H__ +#define __CAPI_HELPER_H__ + +#include "unicode/utypes.h" + +U_NAMESPACE_BEGIN + +/** + * An internal helper class to help convert between C and C++ APIs. + */ +template<typename CType, typename CPPType, int32_t kMagic> +class IcuCApiHelper { + public: + /** + * Convert from the C type to the C++ type (const version). + */ + static const CPPType* validate(const CType* input, UErrorCode& status); + + /** + * Convert from the C type to the C++ type (non-const version). + */ + static CPPType* validate(CType* input, UErrorCode& status); + + /** + * Convert from the C++ type to the C type (const version). + */ + const CType* exportConstForC() const; + + /** + * Convert from the C++ type to the C type (non-const version). + */ + CType* exportForC(); + + /** + * Invalidates the object. + */ + ~IcuCApiHelper(); + + private: + /** + * While the object is valid, fMagic equals kMagic. + */ + int32_t fMagic = kMagic; +}; + + +template<typename CType, typename CPPType, int32_t kMagic> +const CPPType* +IcuCApiHelper<CType, CPPType, kMagic>::validate(const CType* input, UErrorCode& status) { + if (U_FAILURE(status)) { + return nullptr; + } + if (input == nullptr) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return nullptr; + } + auto* impl = reinterpret_cast<const CPPType*>(input); + if (static_cast<const IcuCApiHelper<CType, CPPType, kMagic>*>(impl)->fMagic != kMagic) { + status = U_INVALID_FORMAT_ERROR; + return nullptr; + } + return impl; +} + +template<typename CType, typename CPPType, int32_t kMagic> +CPPType* +IcuCApiHelper<CType, CPPType, kMagic>::validate(CType* input, UErrorCode& status) { + auto* constInput = static_cast<const CType*>(input); + auto* validated = validate(constInput, status); + return const_cast<CPPType*>(validated); +} + +template<typename CType, typename CPPType, int32_t kMagic> +const CType* +IcuCApiHelper<CType, CPPType, kMagic>::exportConstForC() const { + return reinterpret_cast<const CType*>(static_cast<const CPPType*>(this)); +} + +template<typename CType, typename CPPType, int32_t kMagic> +CType* +IcuCApiHelper<CType, CPPType, kMagic>::exportForC() { + return reinterpret_cast<CType*>(static_cast<CPPType*>(this)); +} + +template<typename CType, typename CPPType, int32_t kMagic> +IcuCApiHelper<CType, CPPType, kMagic>::~IcuCApiHelper() { + // head off application errors by preventing use of of deleted objects. + fMagic = 0; +} + + +U_NAMESPACE_END + +#endif // __CAPI_HELPER_H__ diff --git a/thirdparty/icu4c/common/characterproperties.cpp b/thirdparty/icu4c/common/characterproperties.cpp new file mode 100644 index 0000000000..7b50a4e205 --- /dev/null +++ b/thirdparty/icu4c/common/characterproperties.cpp @@ -0,0 +1,383 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// characterproperties.cpp +// created: 2018sep03 Markus W. Scherer + +#include "unicode/utypes.h" +#include "unicode/localpointer.h" +#include "unicode/uchar.h" +#include "unicode/ucpmap.h" +#include "unicode/ucptrie.h" +#include "unicode/umutablecptrie.h" +#include "unicode/uniset.h" +#include "unicode/uscript.h" +#include "unicode/uset.h" +#include "cmemory.h" +#include "mutex.h" +#include "normalizer2impl.h" +#include "uassert.h" +#include "ubidi_props.h" +#include "ucase.h" +#include "ucln_cmn.h" +#include "umutex.h" +#include "uprops.h" + +using icu::LocalPointer; +#if !UCONFIG_NO_NORMALIZATION +using icu::Normalizer2Factory; +using icu::Normalizer2Impl; +#endif +using icu::UInitOnce; +using icu::UnicodeSet; + +namespace { + +UBool U_CALLCONV characterproperties_cleanup(); + +constexpr int32_t NUM_INCLUSIONS = UPROPS_SRC_COUNT + UCHAR_INT_LIMIT - UCHAR_INT_START; + +struct Inclusion { + UnicodeSet *fSet = nullptr; + UInitOnce fInitOnce = U_INITONCE_INITIALIZER; +}; +Inclusion gInclusions[NUM_INCLUSIONS]; // cached getInclusions() + +UnicodeSet *sets[UCHAR_BINARY_LIMIT] = {}; + +UCPMap *maps[UCHAR_INT_LIMIT - UCHAR_INT_START] = {}; + +icu::UMutex cpMutex; + +//---------------------------------------------------------------- +// Inclusions list +//---------------------------------------------------------------- + +// USetAdder implementation +// Does not use uset.h to reduce code dependencies +void U_CALLCONV +_set_add(USet *set, UChar32 c) { + ((UnicodeSet *)set)->add(c); +} + +void U_CALLCONV +_set_addRange(USet *set, UChar32 start, UChar32 end) { + ((UnicodeSet *)set)->add(start, end); +} + +void U_CALLCONV +_set_addString(USet *set, const UChar *str, int32_t length) { + ((UnicodeSet *)set)->add(icu::UnicodeString((UBool)(length<0), str, length)); +} + +UBool U_CALLCONV characterproperties_cleanup() { + for (Inclusion &in: gInclusions) { + delete in.fSet; + in.fSet = nullptr; + in.fInitOnce.reset(); + } + for (int32_t i = 0; i < UPRV_LENGTHOF(sets); ++i) { + delete sets[i]; + sets[i] = nullptr; + } + for (int32_t i = 0; i < UPRV_LENGTHOF(maps); ++i) { + ucptrie_close(reinterpret_cast<UCPTrie *>(maps[i])); + maps[i] = nullptr; + } + return TRUE; +} + +void U_CALLCONV initInclusion(UPropertySource src, UErrorCode &errorCode) { + // This function is invoked only via umtx_initOnce(). + U_ASSERT(0 <= src && src < UPROPS_SRC_COUNT); + if (src == UPROPS_SRC_NONE) { + errorCode = U_INTERNAL_PROGRAM_ERROR; + return; + } + U_ASSERT(gInclusions[src].fSet == nullptr); + + LocalPointer<UnicodeSet> incl(new UnicodeSet()); + if (incl.isNull()) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return; + } + USetAdder sa = { + (USet *)incl.getAlias(), + _set_add, + _set_addRange, + _set_addString, + nullptr, // don't need remove() + nullptr // don't need removeRange() + }; + + switch(src) { + case UPROPS_SRC_CHAR: + uchar_addPropertyStarts(&sa, &errorCode); + break; + case UPROPS_SRC_PROPSVEC: + upropsvec_addPropertyStarts(&sa, &errorCode); + break; + case UPROPS_SRC_CHAR_AND_PROPSVEC: + uchar_addPropertyStarts(&sa, &errorCode); + upropsvec_addPropertyStarts(&sa, &errorCode); + break; +#if !UCONFIG_NO_NORMALIZATION + case UPROPS_SRC_CASE_AND_NORM: { + const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode); + if(U_SUCCESS(errorCode)) { + impl->addPropertyStarts(&sa, errorCode); + } + ucase_addPropertyStarts(&sa, &errorCode); + break; + } + case UPROPS_SRC_NFC: { + const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode); + if(U_SUCCESS(errorCode)) { + impl->addPropertyStarts(&sa, errorCode); + } + break; + } + case UPROPS_SRC_NFKC: { + const Normalizer2Impl *impl=Normalizer2Factory::getNFKCImpl(errorCode); + if(U_SUCCESS(errorCode)) { + impl->addPropertyStarts(&sa, errorCode); + } + break; + } + case UPROPS_SRC_NFKC_CF: { + const Normalizer2Impl *impl=Normalizer2Factory::getNFKC_CFImpl(errorCode); + if(U_SUCCESS(errorCode)) { + impl->addPropertyStarts(&sa, errorCode); + } + break; + } + case UPROPS_SRC_NFC_CANON_ITER: { + const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode); + if(U_SUCCESS(errorCode)) { + impl->addCanonIterPropertyStarts(&sa, errorCode); + } + break; + } +#endif + case UPROPS_SRC_CASE: + ucase_addPropertyStarts(&sa, &errorCode); + break; + case UPROPS_SRC_BIDI: + ubidi_addPropertyStarts(&sa, &errorCode); + break; + case UPROPS_SRC_INPC: + case UPROPS_SRC_INSC: + case UPROPS_SRC_VO: + uprops_addPropertyStarts((UPropertySource)src, &sa, &errorCode); + break; + default: + errorCode = U_INTERNAL_PROGRAM_ERROR; + break; + } + + if (U_FAILURE(errorCode)) { + return; + } + if (incl->isBogus()) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return; + } + // Compact for caching. + incl->compact(); + gInclusions[src].fSet = incl.orphan(); + ucln_common_registerCleanup(UCLN_COMMON_CHARACTERPROPERTIES, characterproperties_cleanup); +} + +const UnicodeSet *getInclusionsForSource(UPropertySource src, UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { return nullptr; } + if (src < 0 || UPROPS_SRC_COUNT <= src) { + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + return nullptr; + } + Inclusion &i = gInclusions[src]; + umtx_initOnce(i.fInitOnce, &initInclusion, src, errorCode); + return i.fSet; +} + +void U_CALLCONV initIntPropInclusion(UProperty prop, UErrorCode &errorCode) { + // This function is invoked only via umtx_initOnce(). + U_ASSERT(UCHAR_INT_START <= prop && prop < UCHAR_INT_LIMIT); + int32_t inclIndex = UPROPS_SRC_COUNT + prop - UCHAR_INT_START; + U_ASSERT(gInclusions[inclIndex].fSet == nullptr); + UPropertySource src = uprops_getSource(prop); + const UnicodeSet *incl = getInclusionsForSource(src, errorCode); + if (U_FAILURE(errorCode)) { + return; + } + + LocalPointer<UnicodeSet> intPropIncl(new UnicodeSet(0, 0)); + if (intPropIncl.isNull()) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return; + } + int32_t numRanges = incl->getRangeCount(); + int32_t prevValue = 0; + for (int32_t i = 0; i < numRanges; ++i) { + UChar32 rangeEnd = incl->getRangeEnd(i); + for (UChar32 c = incl->getRangeStart(i); c <= rangeEnd; ++c) { + // TODO: Get a UCharacterProperty.IntProperty to avoid the property dispatch. + int32_t value = u_getIntPropertyValue(c, prop); + if (value != prevValue) { + intPropIncl->add(c); + prevValue = value; + } + } + } + + if (intPropIncl->isBogus()) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return; + } + // Compact for caching. + intPropIncl->compact(); + gInclusions[inclIndex].fSet = intPropIncl.orphan(); + ucln_common_registerCleanup(UCLN_COMMON_CHARACTERPROPERTIES, characterproperties_cleanup); +} + +} // namespace + +U_NAMESPACE_BEGIN + +const UnicodeSet *CharacterProperties::getInclusionsForProperty( + UProperty prop, UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { return nullptr; } + if (UCHAR_INT_START <= prop && prop < UCHAR_INT_LIMIT) { + int32_t inclIndex = UPROPS_SRC_COUNT + prop - UCHAR_INT_START; + Inclusion &i = gInclusions[inclIndex]; + umtx_initOnce(i.fInitOnce, &initIntPropInclusion, prop, errorCode); + return i.fSet; + } else { + UPropertySource src = uprops_getSource(prop); + return getInclusionsForSource(src, errorCode); + } +} + +U_NAMESPACE_END + +namespace { + +UnicodeSet *makeSet(UProperty property, UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { return nullptr; } + LocalPointer<UnicodeSet> set(new UnicodeSet()); + if (set.isNull()) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } + const UnicodeSet *inclusions = + icu::CharacterProperties::getInclusionsForProperty(property, errorCode); + if (U_FAILURE(errorCode)) { return nullptr; } + int32_t numRanges = inclusions->getRangeCount(); + UChar32 startHasProperty = -1; + + for (int32_t i = 0; i < numRanges; ++i) { + UChar32 rangeEnd = inclusions->getRangeEnd(i); + for (UChar32 c = inclusions->getRangeStart(i); c <= rangeEnd; ++c) { + // TODO: Get a UCharacterProperty.BinaryProperty to avoid the property dispatch. + if (u_hasBinaryProperty(c, property)) { + if (startHasProperty < 0) { + // Transition from false to true. + startHasProperty = c; + } + } else if (startHasProperty >= 0) { + // Transition from true to false. + set->add(startHasProperty, c - 1); + startHasProperty = -1; + } + } + } + if (startHasProperty >= 0) { + set->add(startHasProperty, 0x10FFFF); + } + set->freeze(); + return set.orphan(); +} + +UCPMap *makeMap(UProperty property, UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { return nullptr; } + uint32_t nullValue = property == UCHAR_SCRIPT ? USCRIPT_UNKNOWN : 0; + icu::LocalUMutableCPTriePointer mutableTrie( + umutablecptrie_open(nullValue, nullValue, &errorCode)); + const UnicodeSet *inclusions = + icu::CharacterProperties::getInclusionsForProperty(property, errorCode); + if (U_FAILURE(errorCode)) { return nullptr; } + int32_t numRanges = inclusions->getRangeCount(); + UChar32 start = 0; + uint32_t value = nullValue; + + for (int32_t i = 0; i < numRanges; ++i) { + UChar32 rangeEnd = inclusions->getRangeEnd(i); + for (UChar32 c = inclusions->getRangeStart(i); c <= rangeEnd; ++c) { + // TODO: Get a UCharacterProperty.IntProperty to avoid the property dispatch. + uint32_t nextValue = u_getIntPropertyValue(c, property); + if (value != nextValue) { + if (value != nullValue) { + umutablecptrie_setRange(mutableTrie.getAlias(), start, c - 1, value, &errorCode); + } + start = c; + value = nextValue; + } + } + } + if (value != 0) { + umutablecptrie_setRange(mutableTrie.getAlias(), start, 0x10FFFF, value, &errorCode); + } + + UCPTrieType type; + if (property == UCHAR_BIDI_CLASS || property == UCHAR_GENERAL_CATEGORY) { + type = UCPTRIE_TYPE_FAST; + } else { + type = UCPTRIE_TYPE_SMALL; + } + UCPTrieValueWidth valueWidth; + // TODO: UCharacterProperty.IntProperty + int32_t max = u_getIntPropertyMaxValue(property); + if (max <= 0xff) { + valueWidth = UCPTRIE_VALUE_BITS_8; + } else if (max <= 0xffff) { + valueWidth = UCPTRIE_VALUE_BITS_16; + } else { + valueWidth = UCPTRIE_VALUE_BITS_32; + } + return reinterpret_cast<UCPMap *>( + umutablecptrie_buildImmutable(mutableTrie.getAlias(), type, valueWidth, &errorCode)); +} + +} // namespace + +U_NAMESPACE_USE + +U_CAPI const USet * U_EXPORT2 +u_getBinaryPropertySet(UProperty property, UErrorCode *pErrorCode) { + if (U_FAILURE(*pErrorCode)) { return nullptr; } + if (property < 0 || UCHAR_BINARY_LIMIT <= property) { + *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; + return nullptr; + } + Mutex m(&cpMutex); + UnicodeSet *set = sets[property]; + if (set == nullptr) { + sets[property] = set = makeSet(property, *pErrorCode); + } + if (U_FAILURE(*pErrorCode)) { return nullptr; } + return set->toUSet(); +} + +U_CAPI const UCPMap * U_EXPORT2 +u_getIntPropertyMap(UProperty property, UErrorCode *pErrorCode) { + if (U_FAILURE(*pErrorCode)) { return nullptr; } + if (property < UCHAR_INT_START || UCHAR_INT_LIMIT <= property) { + *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; + return nullptr; + } + Mutex m(&cpMutex); + UCPMap *map = maps[property - UCHAR_INT_START]; + if (map == nullptr) { + maps[property - UCHAR_INT_START] = map = makeMap(property, *pErrorCode); + } + return map; +} diff --git a/thirdparty/icu4c/common/chariter.cpp b/thirdparty/icu4c/common/chariter.cpp new file mode 100644 index 0000000000..887119a0eb --- /dev/null +++ b/thirdparty/icu4c/common/chariter.cpp @@ -0,0 +1,100 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 1999-2011, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +*/ + +#include "unicode/chariter.h" + +U_NAMESPACE_BEGIN + +ForwardCharacterIterator::~ForwardCharacterIterator() {} +ForwardCharacterIterator::ForwardCharacterIterator() +: UObject() +{} +ForwardCharacterIterator::ForwardCharacterIterator(const ForwardCharacterIterator &other) +: UObject(other) +{} + + +CharacterIterator::CharacterIterator() +: textLength(0), pos(0), begin(0), end(0) { +} + +CharacterIterator::CharacterIterator(int32_t length) +: textLength(length), pos(0), begin(0), end(length) { + if(textLength < 0) { + textLength = end = 0; + } +} + +CharacterIterator::CharacterIterator(int32_t length, int32_t position) +: textLength(length), pos(position), begin(0), end(length) { + if(textLength < 0) { + textLength = end = 0; + } + if(pos < 0) { + pos = 0; + } else if(pos > end) { + pos = end; + } +} + +CharacterIterator::CharacterIterator(int32_t length, int32_t textBegin, int32_t textEnd, int32_t position) +: textLength(length), pos(position), begin(textBegin), end(textEnd) { + if(textLength < 0) { + textLength = 0; + } + if(begin < 0) { + begin = 0; + } else if(begin > textLength) { + begin = textLength; + } + if(end < begin) { + end = begin; + } else if(end > textLength) { + end = textLength; + } + if(pos < begin) { + pos = begin; + } else if(pos > end) { + pos = end; + } +} + +CharacterIterator::~CharacterIterator() {} + +CharacterIterator::CharacterIterator(const CharacterIterator &that) : +ForwardCharacterIterator(that), +textLength(that.textLength), pos(that.pos), begin(that.begin), end(that.end) +{ +} + +CharacterIterator & +CharacterIterator::operator=(const CharacterIterator &that) { + ForwardCharacterIterator::operator=(that); + textLength = that.textLength; + pos = that.pos; + begin = that.begin; + end = that.end; + return *this; +} + +// implementing first[32]PostInc() directly in a subclass should be faster +// but these implementations make subclassing a little easier +UChar +CharacterIterator::firstPostInc(void) { + setToStart(); + return nextPostInc(); +} + +UChar32 +CharacterIterator::first32PostInc(void) { + setToStart(); + return next32PostInc(); +} + +U_NAMESPACE_END diff --git a/thirdparty/icu4c/common/charstr.cpp b/thirdparty/icu4c/common/charstr.cpp new file mode 100644 index 0000000000..318a185b3f --- /dev/null +++ b/thirdparty/icu4c/common/charstr.cpp @@ -0,0 +1,239 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2010-2015, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* file name: charstr.cpp +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2010may19 +* created by: Markus W. Scherer +*/ + +#include "unicode/utypes.h" +#include "unicode/putil.h" +#include "charstr.h" +#include "cmemory.h" +#include "cstring.h" +#include "uinvchar.h" +#include "ustr_imp.h" + +U_NAMESPACE_BEGIN + +CharString::CharString(CharString&& src) U_NOEXCEPT + : buffer(std::move(src.buffer)), len(src.len) { + src.len = 0; // not strictly necessary because we make no guarantees on the source string +} + +CharString& CharString::operator=(CharString&& src) U_NOEXCEPT { + buffer = std::move(src.buffer); + len = src.len; + src.len = 0; // not strictly necessary because we make no guarantees on the source string + return *this; +} + +char *CharString::cloneData(UErrorCode &errorCode) const { + if (U_FAILURE(errorCode)) { return nullptr; } + char *p = static_cast<char *>(uprv_malloc(len + 1)); + if (p == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } + uprv_memcpy(p, buffer.getAlias(), len + 1); + return p; +} + +int32_t CharString::extract(char *dest, int32_t capacity, UErrorCode &errorCode) const { + if (U_FAILURE(errorCode)) { return len; } + if (capacity < 0 || (capacity > 0 && dest == nullptr)) { + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + return len; + } + const char *src = buffer.getAlias(); + if (0 < len && len <= capacity && src != dest) { + uprv_memcpy(dest, src, len); + } + return u_terminateChars(dest, capacity, len, &errorCode); +} + +CharString &CharString::copyFrom(const CharString &s, UErrorCode &errorCode) { + if(U_SUCCESS(errorCode) && this!=&s && ensureCapacity(s.len+1, 0, errorCode)) { + len=s.len; + uprv_memcpy(buffer.getAlias(), s.buffer.getAlias(), len+1); + } + return *this; +} + +int32_t CharString::lastIndexOf(char c) const { + for(int32_t i=len; i>0;) { + if(buffer[--i]==c) { + return i; + } + } + return -1; +} + +bool CharString::contains(StringPiece s) const { + if (s.empty()) { return false; } + const char *p = buffer.getAlias(); + int32_t lastStart = len - s.length(); + for (int32_t i = 0; i <= lastStart; ++i) { + if (uprv_memcmp(p + i, s.data(), s.length()) == 0) { + return true; + } + } + return false; +} + +CharString &CharString::truncate(int32_t newLength) { + if(newLength<0) { + newLength=0; + } + if(newLength<len) { + buffer[len=newLength]=0; + } + return *this; +} + +CharString &CharString::append(char c, UErrorCode &errorCode) { + if(ensureCapacity(len+2, 0, errorCode)) { + buffer[len++]=c; + buffer[len]=0; + } + return *this; +} + +CharString &CharString::append(const char *s, int32_t sLength, UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { + return *this; + } + if(sLength<-1 || (s==NULL && sLength!=0)) { + errorCode=U_ILLEGAL_ARGUMENT_ERROR; + return *this; + } + if(sLength<0) { + sLength= static_cast<int32_t>(uprv_strlen(s)); + } + if(sLength>0) { + if(s==(buffer.getAlias()+len)) { + // The caller wrote into the getAppendBuffer(). + if(sLength>=(buffer.getCapacity()-len)) { + // The caller wrote too much. + errorCode=U_INTERNAL_PROGRAM_ERROR; + } else { + buffer[len+=sLength]=0; + } + } else if(buffer.getAlias()<=s && s<(buffer.getAlias()+len) && + sLength>=(buffer.getCapacity()-len) + ) { + // (Part of) this string is appended to itself which requires reallocation, + // so we have to make a copy of the substring and append that. + return append(CharString(s, sLength, errorCode), errorCode); + } else if(ensureCapacity(len+sLength+1, 0, errorCode)) { + uprv_memcpy(buffer.getAlias()+len, s, sLength); + buffer[len+=sLength]=0; + } + } + return *this; +} + +char *CharString::getAppendBuffer(int32_t minCapacity, + int32_t desiredCapacityHint, + int32_t &resultCapacity, + UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { + resultCapacity=0; + return NULL; + } + int32_t appendCapacity=buffer.getCapacity()-len-1; // -1 for NUL + if(appendCapacity>=minCapacity) { + resultCapacity=appendCapacity; + return buffer.getAlias()+len; + } + if(ensureCapacity(len+minCapacity+1, len+desiredCapacityHint+1, errorCode)) { + resultCapacity=buffer.getCapacity()-len-1; + return buffer.getAlias()+len; + } + resultCapacity=0; + return NULL; +} + +CharString &CharString::appendInvariantChars(const UnicodeString &s, UErrorCode &errorCode) { + return appendInvariantChars(s.getBuffer(), s.length(), errorCode); +} + +CharString &CharString::appendInvariantChars(const UChar* uchars, int32_t ucharsLen, UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { + return *this; + } + if (!uprv_isInvariantUString(uchars, ucharsLen)) { + errorCode = U_INVARIANT_CONVERSION_ERROR; + return *this; + } + if(ensureCapacity(len+ucharsLen+1, 0, errorCode)) { + u_UCharsToChars(uchars, buffer.getAlias()+len, ucharsLen); + len += ucharsLen; + buffer[len] = 0; + } + return *this; +} + +UBool CharString::ensureCapacity(int32_t capacity, + int32_t desiredCapacityHint, + UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { + return FALSE; + } + if(capacity>buffer.getCapacity()) { + if(desiredCapacityHint==0) { + desiredCapacityHint=capacity+buffer.getCapacity(); + } + if( (desiredCapacityHint<=capacity || buffer.resize(desiredCapacityHint, len+1)==NULL) && + buffer.resize(capacity, len+1)==NULL + ) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + return FALSE; + } + } + return TRUE; +} + +CharString &CharString::appendPathPart(StringPiece s, UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { + return *this; + } + if(s.length()==0) { + return *this; + } + char c; + if(len>0 && (c=buffer[len-1])!=U_FILE_SEP_CHAR && c!=U_FILE_ALT_SEP_CHAR) { + append(getDirSepChar(), errorCode); + } + append(s, errorCode); + return *this; +} + +CharString &CharString::ensureEndsWithFileSeparator(UErrorCode &errorCode) { + char c; + if(U_SUCCESS(errorCode) && len>0 && + (c=buffer[len-1])!=U_FILE_SEP_CHAR && c!=U_FILE_ALT_SEP_CHAR) { + append(getDirSepChar(), errorCode); + } + return *this; +} + +char CharString::getDirSepChar() const { + char dirSepChar = U_FILE_SEP_CHAR; +#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR) + // We may need to return a different directory separator when building for Cygwin or MSYS2. + if(len>0 && !uprv_strchr(data(), U_FILE_SEP_CHAR) && uprv_strchr(data(), U_FILE_ALT_SEP_CHAR)) + dirSepChar = U_FILE_ALT_SEP_CHAR; +#endif + return dirSepChar; +} + +U_NAMESPACE_END diff --git a/thirdparty/icu4c/common/charstr.h b/thirdparty/icu4c/common/charstr.h new file mode 100644 index 0000000000..6619faac61 --- /dev/null +++ b/thirdparty/icu4c/common/charstr.h @@ -0,0 +1,190 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (c) 2001-2015, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* Date Name Description +* 11/19/2001 aliu Creation. +* 05/19/2010 markus Rewritten from scratch +********************************************************************** +*/ + +#ifndef CHARSTRING_H +#define CHARSTRING_H + +#include "unicode/utypes.h" +#include "unicode/unistr.h" +#include "unicode/uobject.h" +#include "cmemory.h" + +U_NAMESPACE_BEGIN + +// Windows needs us to DLL-export the MaybeStackArray template specialization, +// but MacOS X cannot handle it. Same as in digitlst.h. +#if !U_PLATFORM_IS_DARWIN_BASED +template class U_COMMON_API MaybeStackArray<char, 40>; +#endif + +/** + * ICU-internal char * string class. + * This class does not assume or enforce any particular character encoding. + * Raw bytes can be stored. The string object owns its characters. + * A terminating NUL is stored, but the class does not prevent embedded NUL characters. + * + * This class wants to be convenient but is also deliberately minimalist. + * Please do not add methods if they only add minor convenience. + * For example: + * cs.data()[5]='a'; // no need for setCharAt(5, 'a') + */ +class U_COMMON_API CharString : public UMemory { +public: + CharString() : len(0) { buffer[0]=0; } + CharString(StringPiece s, UErrorCode &errorCode) : len(0) { + buffer[0]=0; + append(s, errorCode); + } + CharString(const CharString &s, UErrorCode &errorCode) : len(0) { + buffer[0]=0; + append(s, errorCode); + } + CharString(const char *s, int32_t sLength, UErrorCode &errorCode) : len(0) { + buffer[0]=0; + append(s, sLength, errorCode); + } + ~CharString() {} + + /** + * Move constructor; might leave src in an undefined state. + * This string will have the same contents and state that the source string had. + */ + CharString(CharString &&src) U_NOEXCEPT; + /** + * Move assignment operator; might leave src in an undefined state. + * This string will have the same contents and state that the source string had. + * The behavior is undefined if *this and src are the same object. + */ + CharString &operator=(CharString &&src) U_NOEXCEPT; + + /** + * Replaces this string's contents with the other string's contents. + * CharString does not support the standard copy constructor nor + * the assignment operator, to make copies explicit and to + * use a UErrorCode where memory allocations might be needed. + */ + CharString ©From(const CharString &other, UErrorCode &errorCode); + + UBool isEmpty() const { return len==0; } + int32_t length() const { return len; } + char operator[](int32_t index) const { return buffer[index]; } + StringPiece toStringPiece() const { return StringPiece(buffer.getAlias(), len); } + + const char *data() const { return buffer.getAlias(); } + char *data() { return buffer.getAlias(); } + /** + * Allocates length()+1 chars and copies the NUL-terminated data(). + * The caller must uprv_free() the result. + */ + char *cloneData(UErrorCode &errorCode) const; + /** + * Copies the contents of the string into dest. + * Checks if there is enough space in dest, extracts the entire string if possible, + * and NUL-terminates dest if possible. + * + * If the string fits into dest but cannot be NUL-terminated (length()==capacity), + * then the error code is set to U_STRING_NOT_TERMINATED_WARNING. + * If the string itself does not fit into dest (length()>capacity), + * then the error code is set to U_BUFFER_OVERFLOW_ERROR. + * + * @param dest Destination string buffer. + * @param capacity Size of the dest buffer (number of chars). + * @param errorCode ICU error code. + * @return length() + */ + int32_t extract(char *dest, int32_t capacity, UErrorCode &errorCode) const; + + bool operator==(StringPiece other) const { + return len == other.length() && (len == 0 || uprv_memcmp(data(), other.data(), len) == 0); + } + bool operator!=(StringPiece other) const { + return !operator==(other); + } + + /** @return last index of c, or -1 if c is not in this string */ + int32_t lastIndexOf(char c) const; + + bool contains(StringPiece s) const; + + CharString &clear() { len=0; buffer[0]=0; return *this; } + CharString &truncate(int32_t newLength); + + CharString &append(char c, UErrorCode &errorCode); + CharString &append(StringPiece s, UErrorCode &errorCode) { + return append(s.data(), s.length(), errorCode); + } + CharString &append(const CharString &s, UErrorCode &errorCode) { + return append(s.data(), s.length(), errorCode); + } + CharString &append(const char *s, int32_t sLength, UErrorCode &status); + /** + * Returns a writable buffer for appending and writes the buffer's capacity to + * resultCapacity. Guarantees resultCapacity>=minCapacity if U_SUCCESS(). + * There will additionally be space for a terminating NUL right at resultCapacity. + * (This function is similar to ByteSink.GetAppendBuffer().) + * + * The returned buffer is only valid until the next write operation + * on this string. + * + * After writing at most resultCapacity bytes, call append() with the + * pointer returned from this function and the number of bytes written. + * + * @param minCapacity required minimum capacity of the returned buffer; + * must be non-negative + * @param desiredCapacityHint desired capacity of the returned buffer; + * must be non-negative + * @param resultCapacity will be set to the capacity of the returned buffer + * @param errorCode in/out error code + * @return a buffer with resultCapacity>=min_capacity + */ + char *getAppendBuffer(int32_t minCapacity, + int32_t desiredCapacityHint, + int32_t &resultCapacity, + UErrorCode &errorCode); + + CharString &appendInvariantChars(const UnicodeString &s, UErrorCode &errorCode); + CharString &appendInvariantChars(const UChar* uchars, int32_t ucharsLen, UErrorCode& errorCode); + + /** + * Appends a filename/path part, e.g., a directory name. + * First appends a U_FILE_SEP_CHAR or U_FILE_ALT_SEP_CHAR if necessary. + * Does nothing if s is empty. + */ + CharString &appendPathPart(StringPiece s, UErrorCode &errorCode); + + /** + * Appends a U_FILE_SEP_CHAR or U_FILE_ALT_SEP_CHAR if this string is not empty + * and does not already end with a U_FILE_SEP_CHAR or U_FILE_ALT_SEP_CHAR. + */ + CharString &ensureEndsWithFileSeparator(UErrorCode &errorCode); + +private: + MaybeStackArray<char, 40> buffer; + int32_t len; + + UBool ensureCapacity(int32_t capacity, int32_t desiredCapacityHint, UErrorCode &errorCode); + + CharString(const CharString &other); // forbid copying of this class + CharString &operator=(const CharString &other); // forbid copying of this class + + /** + * Returns U_FILE_ALT_SEP_CHAR if found in string, and U_FILE_SEP_CHAR is not found. + * Otherwise returns U_FILE_SEP_CHAR. + */ + char getDirSepChar() const; +}; + +U_NAMESPACE_END + +#endif +//eof diff --git a/thirdparty/icu4c/common/charstrmap.h b/thirdparty/icu4c/common/charstrmap.h new file mode 100644 index 0000000000..3320a46208 --- /dev/null +++ b/thirdparty/icu4c/common/charstrmap.h @@ -0,0 +1,55 @@ +// © 2020 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// charstrmap.h +// created: 2020sep01 Frank Yung-Fong Tang + +#ifndef __CHARSTRMAP_H__ +#define __CHARSTRMAP_H__ + +#include <utility> +#include "unicode/utypes.h" +#include "unicode/uobject.h" +#include "uhash.h" + +U_NAMESPACE_BEGIN + +/** + * Map of const char * keys & values. + * Stores pointers as is: Does not own/copy/adopt/release strings. + */ +class CharStringMap final : public UMemory { +public: + /** Constructs an unusable non-map. */ + CharStringMap() : map(nullptr) {} + CharStringMap(int32_t size, UErrorCode &errorCode) { + map = uhash_openSize(uhash_hashChars, uhash_compareChars, uhash_compareChars, + size, &errorCode); + } + CharStringMap(CharStringMap &&other) U_NOEXCEPT : map(other.map) { + other.map = nullptr; + } + CharStringMap(const CharStringMap &other) = delete; + ~CharStringMap() { + uhash_close(map); + } + + CharStringMap &operator=(CharStringMap &&other) U_NOEXCEPT { + map = other.map; + other.map = nullptr; + return *this; + } + CharStringMap &operator=(const CharStringMap &other) = delete; + + const char *get(const char *key) const { return static_cast<const char *>(uhash_get(map, key)); } + void put(const char *key, const char *value, UErrorCode &errorCode) { + uhash_put(map, const_cast<char *>(key), const_cast<char *>(value), &errorCode); + } + +private: + UHashtable *map; +}; + +U_NAMESPACE_END + +#endif // __CHARSTRMAP_H__ diff --git a/thirdparty/icu4c/common/cmemory.cpp b/thirdparty/icu4c/common/cmemory.cpp new file mode 100644 index 0000000000..663c1411e4 --- /dev/null +++ b/thirdparty/icu4c/common/cmemory.cpp @@ -0,0 +1,138 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 2002-2015, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* +* File cmemory.c ICU Heap allocation. +* All ICU heap allocation, both for C and C++ new of ICU +* class types, comes through these functions. +* +* If you have a need to replace ICU allocation, this is the +* place to do it. +* +* Note that uprv_malloc(0) returns a non-NULL pointer, and +* that a subsequent free of that pointer value is a NOP. +* +****************************************************************************** +*/ +#include "unicode/uclean.h" +#include "cmemory.h" +#include "putilimp.h" +#include "uassert.h" +#include <stdlib.h> + +/* uprv_malloc(0) returns a pointer to this read-only data. */ +static const int32_t zeroMem[] = {0, 0, 0, 0, 0, 0}; + +/* Function Pointers for user-supplied heap functions */ +static const void *pContext; +static UMemAllocFn *pAlloc; +static UMemReallocFn *pRealloc; +static UMemFreeFn *pFree; + +#if U_DEBUG && defined(UPRV_MALLOC_COUNT) +#include <stdio.h> +static int n=0; +static long b=0; +#endif + +U_CAPI void * U_EXPORT2 +uprv_malloc(size_t s) { +#if U_DEBUG && defined(UPRV_MALLOC_COUNT) +#if 1 + putchar('>'); + fflush(stdout); +#else + fprintf(stderr,"MALLOC\t#%d\t%ul bytes\t%ul total\n", ++n,s,(b+=s)); fflush(stderr); +#endif +#endif + if (s > 0) { + if (pAlloc) { + return (*pAlloc)(pContext, s); + } else { + return uprv_default_malloc(s); + } + } else { + return (void *)zeroMem; + } +} + +U_CAPI void * U_EXPORT2 +uprv_realloc(void * buffer, size_t size) { +#if U_DEBUG && defined(UPRV_MALLOC_COUNT) + putchar('~'); + fflush(stdout); +#endif + if (buffer == zeroMem) { + return uprv_malloc(size); + } else if (size == 0) { + if (pFree) { + (*pFree)(pContext, buffer); + } else { + uprv_default_free(buffer); + } + return (void *)zeroMem; + } else { + if (pRealloc) { + return (*pRealloc)(pContext, buffer, size); + } else { + return uprv_default_realloc(buffer, size); + } + } +} + +U_CAPI void U_EXPORT2 +uprv_free(void *buffer) { +#if U_DEBUG && defined(UPRV_MALLOC_COUNT) + putchar('<'); + fflush(stdout); +#endif + if (buffer != zeroMem) { + if (pFree) { + (*pFree)(pContext, buffer); + } else { + uprv_default_free(buffer); + } + } +} + +U_CAPI void * U_EXPORT2 +uprv_calloc(size_t num, size_t size) { + void *mem = NULL; + size *= num; + mem = uprv_malloc(size); + if (mem) { + uprv_memset(mem, 0, size); + } + return mem; +} + +U_CAPI void U_EXPORT2 +u_setMemoryFunctions(const void *context, UMemAllocFn *a, UMemReallocFn *r, UMemFreeFn *f, UErrorCode *status) +{ + if (U_FAILURE(*status)) { + return; + } + if (a==NULL || r==NULL || f==NULL) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + pContext = context; + pAlloc = a; + pRealloc = r; + pFree = f; +} + + +U_CFUNC UBool cmemory_cleanup(void) { + pContext = NULL; + pAlloc = NULL; + pRealloc = NULL; + pFree = NULL; + return TRUE; +} diff --git a/thirdparty/icu4c/common/cmemory.h b/thirdparty/icu4c/common/cmemory.h new file mode 100644 index 0000000000..210bc7645e --- /dev/null +++ b/thirdparty/icu4c/common/cmemory.h @@ -0,0 +1,849 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1997-2016, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* +* File CMEMORY.H +* +* Contains stdlib.h/string.h memory functions +* +* @author Bertrand A. Damiba +* +* Modification History: +* +* Date Name Description +* 6/20/98 Bertrand Created. +* 05/03/99 stephen Changed from functions to macros. +* +****************************************************************************** +*/ + +#ifndef CMEMORY_H +#define CMEMORY_H + +#include "unicode/utypes.h" + +#include <stddef.h> +#include <string.h> +#include "unicode/localpointer.h" + +#if U_DEBUG && defined(UPRV_MALLOC_COUNT) +#include <stdio.h> +#endif + + +#define uprv_memcpy(dst, src, size) U_STANDARD_CPP_NAMESPACE memcpy(dst, src, size) +#define uprv_memmove(dst, src, size) U_STANDARD_CPP_NAMESPACE memmove(dst, src, size) + +/** + * \def UPRV_LENGTHOF + * Convenience macro to determine the length of a fixed array at compile-time. + * @param array A fixed length array + * @return The length of the array, in elements + * @internal + */ +#define UPRV_LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) +#define uprv_memset(buffer, mark, size) U_STANDARD_CPP_NAMESPACE memset(buffer, mark, size) +#define uprv_memcmp(buffer1, buffer2, size) U_STANDARD_CPP_NAMESPACE memcmp(buffer1, buffer2,size) +#define uprv_memchr(ptr, value, num) U_STANDARD_CPP_NAMESPACE memchr(ptr, value, num) + +U_CAPI void * U_EXPORT2 +uprv_malloc(size_t s) U_MALLOC_ATTR U_ALLOC_SIZE_ATTR(1); + +U_CAPI void * U_EXPORT2 +uprv_realloc(void *mem, size_t size) U_ALLOC_SIZE_ATTR(2); + +U_CAPI void U_EXPORT2 +uprv_free(void *mem); + +U_CAPI void * U_EXPORT2 +uprv_calloc(size_t num, size_t size) U_MALLOC_ATTR U_ALLOC_SIZE_ATTR2(1,2); + +/** + * Get the least significant bits of a pointer (a memory address). + * For example, with a mask of 3, the macro gets the 2 least significant bits, + * which will be 0 if the pointer is 32-bit (4-byte) aligned. + * + * uintptr_t is the most appropriate integer type to cast to. + */ +#define U_POINTER_MASK_LSB(ptr, mask) ((uintptr_t)(ptr) & (mask)) + +/** + * Create & return an instance of "type" in statically allocated storage. + * e.g. + * static std::mutex *myMutex = STATIC_NEW(std::mutex); + * To destroy an object created in this way, invoke the destructor explicitly, e.g. + * myMutex->~mutex(); + * DO NOT use delete. + * DO NOT use with class UMutex, which has specific support for static instances. + * + * STATIC_NEW is intended for use when + * - We want a static (or global) object. + * - We don't want it to ever be destructed, or to explicitly control destruction, + * to avoid use-after-destruction problems. + * - We want to avoid an ordinary heap allocated object, + * to avoid the possibility of memory allocation failures, and + * to avoid memory leak reports, from valgrind, for example. + * This is defined as a macro rather than a template function because each invocation + * must define distinct static storage for the object being returned. + */ +#define STATIC_NEW(type) [] () { \ + alignas(type) static char storage[sizeof(type)]; \ + return new(storage) type();} () + +/** + * Heap clean up function, called from u_cleanup() + * Clears any user heap functions from u_setMemoryFunctions() + * Does NOT deallocate any remaining allocated memory. + */ +U_CFUNC UBool +cmemory_cleanup(void); + +/** + * A function called by <TT>uhash_remove</TT>, + * <TT>uhash_close</TT>, or <TT>uhash_put</TT> to delete + * an existing key or value. + * @param obj A key or value stored in a hashtable + * @see uprv_deleteUObject + */ +typedef void U_CALLCONV UObjectDeleter(void* obj); + +/** + * Deleter for UObject instances. + * Works for all subclasses of UObject because it has a virtual destructor. + */ +U_CAPI void U_EXPORT2 +uprv_deleteUObject(void *obj); + +#ifdef __cplusplus + +#include <utility> +#include "unicode/uobject.h" + +U_NAMESPACE_BEGIN + +/** + * "Smart pointer" class, deletes memory via uprv_free(). + * For most methods see the LocalPointerBase base class. + * Adds operator[] for array item access. + * + * @see LocalPointerBase + */ +template<typename T> +class LocalMemory : public LocalPointerBase<T> { +public: + using LocalPointerBase<T>::operator*; + using LocalPointerBase<T>::operator->; + /** + * Constructor takes ownership. + * @param p simple pointer to an array of T items that is adopted + */ + explicit LocalMemory(T *p=NULL) : LocalPointerBase<T>(p) {} + /** + * Move constructor, leaves src with isNull(). + * @param src source smart pointer + */ + LocalMemory(LocalMemory<T> &&src) U_NOEXCEPT : LocalPointerBase<T>(src.ptr) { + src.ptr=NULL; + } + /** + * Destructor deletes the memory it owns. + */ + ~LocalMemory() { + uprv_free(LocalPointerBase<T>::ptr); + } + /** + * Move assignment operator, leaves src with isNull(). + * The behavior is undefined if *this and src are the same object. + * @param src source smart pointer + * @return *this + */ + LocalMemory<T> &operator=(LocalMemory<T> &&src) U_NOEXCEPT { + uprv_free(LocalPointerBase<T>::ptr); + LocalPointerBase<T>::ptr=src.ptr; + src.ptr=NULL; + return *this; + } + /** + * Swap pointers. + * @param other other smart pointer + */ + void swap(LocalMemory<T> &other) U_NOEXCEPT { + T *temp=LocalPointerBase<T>::ptr; + LocalPointerBase<T>::ptr=other.ptr; + other.ptr=temp; + } + /** + * Non-member LocalMemory swap function. + * @param p1 will get p2's pointer + * @param p2 will get p1's pointer + */ + friend inline void swap(LocalMemory<T> &p1, LocalMemory<T> &p2) U_NOEXCEPT { + p1.swap(p2); + } + /** + * Deletes the array it owns, + * and adopts (takes ownership of) the one passed in. + * @param p simple pointer to an array of T items that is adopted + */ + void adoptInstead(T *p) { + uprv_free(LocalPointerBase<T>::ptr); + LocalPointerBase<T>::ptr=p; + } + /** + * Deletes the array it owns, allocates a new one and reset its bytes to 0. + * Returns the new array pointer. + * If the allocation fails, then the current array is unchanged and + * this method returns NULL. + * @param newCapacity must be >0 + * @return the allocated array pointer, or NULL if the allocation failed + */ + inline T *allocateInsteadAndReset(int32_t newCapacity=1); + /** + * Deletes the array it owns and allocates a new one, copying length T items. + * Returns the new array pointer. + * If the allocation fails, then the current array is unchanged and + * this method returns NULL. + * @param newCapacity must be >0 + * @param length number of T items to be copied from the old array to the new one; + * must be no more than the capacity of the old array, + * which the caller must track because the LocalMemory does not track it + * @return the allocated array pointer, or NULL if the allocation failed + */ + inline T *allocateInsteadAndCopy(int32_t newCapacity=1, int32_t length=0); + /** + * Array item access (writable). + * No index bounds check. + * @param i array index + * @return reference to the array item + */ + T &operator[](ptrdiff_t i) const { return LocalPointerBase<T>::ptr[i]; } +}; + +template<typename T> +inline T *LocalMemory<T>::allocateInsteadAndReset(int32_t newCapacity) { + if(newCapacity>0) { + T *p=(T *)uprv_malloc(newCapacity*sizeof(T)); + if(p!=NULL) { + uprv_memset(p, 0, newCapacity*sizeof(T)); + uprv_free(LocalPointerBase<T>::ptr); + LocalPointerBase<T>::ptr=p; + } + return p; + } else { + return NULL; + } +} + + +template<typename T> +inline T *LocalMemory<T>::allocateInsteadAndCopy(int32_t newCapacity, int32_t length) { + if(newCapacity>0) { + T *p=(T *)uprv_malloc(newCapacity*sizeof(T)); + if(p!=NULL) { + if(length>0) { + if(length>newCapacity) { + length=newCapacity; + } + uprv_memcpy(p, LocalPointerBase<T>::ptr, (size_t)length*sizeof(T)); + } + uprv_free(LocalPointerBase<T>::ptr); + LocalPointerBase<T>::ptr=p; + } + return p; + } else { + return NULL; + } +} + +/** + * Simple array/buffer management class using uprv_malloc() and uprv_free(). + * Provides an internal array with fixed capacity. Can alias another array + * or allocate one. + * + * The array address is properly aligned for type T. It might not be properly + * aligned for types larger than T (or larger than the largest subtype of T). + * + * Unlike LocalMemory and LocalArray, this class never adopts + * (takes ownership of) another array. + * + * WARNING: MaybeStackArray only works with primitive (plain-old data) types. + * It does NOT know how to call a destructor! If you work with classes with + * destructors, consider: + * + * - LocalArray in localpointer.h if you know the length ahead of time + * - MaybeStackVector if you know the length at runtime + */ +template<typename T, int32_t stackCapacity> +class MaybeStackArray { +public: + // No heap allocation. Use only on the stack. + static void* U_EXPORT2 operator new(size_t) U_NOEXCEPT = delete; + static void* U_EXPORT2 operator new[](size_t) U_NOEXCEPT = delete; +#if U_HAVE_PLACEMENT_NEW + static void* U_EXPORT2 operator new(size_t, void*) U_NOEXCEPT = delete; +#endif + + /** + * Default constructor initializes with internal T[stackCapacity] buffer. + */ + MaybeStackArray() : ptr(stackArray), capacity(stackCapacity), needToRelease(false) {} + /** + * Automatically allocates the heap array if the argument is larger than the stack capacity. + * Intended for use when an approximate capacity is known at compile time but the true + * capacity is not known until runtime. + */ + MaybeStackArray(int32_t newCapacity, UErrorCode status) : MaybeStackArray() { + if (U_FAILURE(status)) { + return; + } + if (capacity < newCapacity) { + if (resize(newCapacity) == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + } + } + } + /** + * Destructor deletes the array (if owned). + */ + ~MaybeStackArray() { releaseArray(); } + /** + * Move constructor: transfers ownership or copies the stack array. + */ + MaybeStackArray(MaybeStackArray<T, stackCapacity> &&src) U_NOEXCEPT; + /** + * Move assignment: transfers ownership or copies the stack array. + */ + MaybeStackArray<T, stackCapacity> &operator=(MaybeStackArray<T, stackCapacity> &&src) U_NOEXCEPT; + /** + * Returns the array capacity (number of T items). + * @return array capacity + */ + int32_t getCapacity() const { return capacity; } + /** + * Access without ownership change. + * @return the array pointer + */ + T *getAlias() const { return ptr; } + /** + * Returns the array limit. Simple convenience method. + * @return getAlias()+getCapacity() + */ + T *getArrayLimit() const { return getAlias()+capacity; } + // No "operator T *() const" because that can make + // expressions like mbs[index] ambiguous for some compilers. + /** + * Array item access (const). + * No index bounds check. + * @param i array index + * @return reference to the array item + */ + const T &operator[](ptrdiff_t i) const { return ptr[i]; } + /** + * Array item access (writable). + * No index bounds check. + * @param i array index + * @return reference to the array item + */ + T &operator[](ptrdiff_t i) { return ptr[i]; } + /** + * Deletes the array (if owned) and aliases another one, no transfer of ownership. + * If the arguments are illegal, then the current array is unchanged. + * @param otherArray must not be NULL + * @param otherCapacity must be >0 + */ + void aliasInstead(T *otherArray, int32_t otherCapacity) { + if(otherArray!=NULL && otherCapacity>0) { + releaseArray(); + ptr=otherArray; + capacity=otherCapacity; + needToRelease=false; + } + } + /** + * Deletes the array (if owned) and allocates a new one, copying length T items. + * Returns the new array pointer. + * If the allocation fails, then the current array is unchanged and + * this method returns NULL. + * @param newCapacity can be less than or greater than the current capacity; + * must be >0 + * @param length number of T items to be copied from the old array to the new one + * @return the allocated array pointer, or NULL if the allocation failed + */ + inline T *resize(int32_t newCapacity, int32_t length=0); + /** + * Gives up ownership of the array if owned, or else clones it, + * copying length T items; resets itself to the internal stack array. + * Returns NULL if the allocation failed. + * @param length number of T items to copy when cloning, + * and capacity of the clone when cloning + * @param resultCapacity will be set to the returned array's capacity (output-only) + * @return the array pointer; + * caller becomes responsible for deleting the array + */ + inline T *orphanOrClone(int32_t length, int32_t &resultCapacity); + +protected: + // Resizes the array to the size of src, then copies the contents of src. + void copyFrom(const MaybeStackArray &src, UErrorCode &status) { + if (U_FAILURE(status)) { + return; + } + if (this->resize(src.capacity, 0) == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + uprv_memcpy(this->ptr, src.ptr, (size_t)capacity * sizeof(T)); + } + +private: + T *ptr; + int32_t capacity; + UBool needToRelease; + T stackArray[stackCapacity]; + void releaseArray() { + if(needToRelease) { + uprv_free(ptr); + } + } + void resetToStackArray() { + ptr=stackArray; + capacity=stackCapacity; + needToRelease=false; + } + /* No comparison operators with other MaybeStackArray's. */ + bool operator==(const MaybeStackArray & /*other*/) = delete; + bool operator!=(const MaybeStackArray & /*other*/) = delete; + /* No ownership transfer: No copy constructor, no assignment operator. */ + MaybeStackArray(const MaybeStackArray & /*other*/) = delete; + void operator=(const MaybeStackArray & /*other*/) = delete; +}; + +template<typename T, int32_t stackCapacity> +icu::MaybeStackArray<T, stackCapacity>::MaybeStackArray( + MaybeStackArray <T, stackCapacity>&& src) U_NOEXCEPT + : ptr(src.ptr), capacity(src.capacity), needToRelease(src.needToRelease) { + if (src.ptr == src.stackArray) { + ptr = stackArray; + uprv_memcpy(stackArray, src.stackArray, sizeof(T) * src.capacity); + } else { + src.resetToStackArray(); // take ownership away from src + } +} + +template<typename T, int32_t stackCapacity> +inline MaybeStackArray <T, stackCapacity>& +MaybeStackArray<T, stackCapacity>::operator=(MaybeStackArray <T, stackCapacity>&& src) U_NOEXCEPT { + releaseArray(); // in case this instance had its own memory allocated + capacity = src.capacity; + needToRelease = src.needToRelease; + if (src.ptr == src.stackArray) { + ptr = stackArray; + uprv_memcpy(stackArray, src.stackArray, sizeof(T) * src.capacity); + } else { + ptr = src.ptr; + src.resetToStackArray(); // take ownership away from src + } + return *this; +} + +template<typename T, int32_t stackCapacity> +inline T *MaybeStackArray<T, stackCapacity>::resize(int32_t newCapacity, int32_t length) { + if(newCapacity>0) { +#if U_DEBUG && defined(UPRV_MALLOC_COUNT) + ::fprintf(::stderr, "MaybeStackArray (resize) alloc %d * %lu\n", newCapacity, sizeof(T)); +#endif + T *p=(T *)uprv_malloc(newCapacity*sizeof(T)); + if(p!=NULL) { + if(length>0) { + if(length>capacity) { + length=capacity; + } + if(length>newCapacity) { + length=newCapacity; + } + uprv_memcpy(p, ptr, (size_t)length*sizeof(T)); + } + releaseArray(); + ptr=p; + capacity=newCapacity; + needToRelease=true; + } + return p; + } else { + return NULL; + } +} + +template<typename T, int32_t stackCapacity> +inline T *MaybeStackArray<T, stackCapacity>::orphanOrClone(int32_t length, int32_t &resultCapacity) { + T *p; + if(needToRelease) { + p=ptr; + } else if(length<=0) { + return NULL; + } else { + if(length>capacity) { + length=capacity; + } + p=(T *)uprv_malloc(length*sizeof(T)); +#if U_DEBUG && defined(UPRV_MALLOC_COUNT) + ::fprintf(::stderr,"MaybeStacArray (orphan) alloc %d * %lu\n", length,sizeof(T)); +#endif + if(p==NULL) { + return NULL; + } + uprv_memcpy(p, ptr, (size_t)length*sizeof(T)); + } + resultCapacity=length; + resetToStackArray(); + return p; +} + +/** + * Variant of MaybeStackArray that allocates a header struct and an array + * in one contiguous memory block, using uprv_malloc() and uprv_free(). + * Provides internal memory with fixed array capacity. Can alias another memory + * block or allocate one. + * The stackCapacity is the number of T items in the internal memory, + * not counting the H header. + * Unlike LocalMemory and LocalArray, this class never adopts + * (takes ownership of) another memory block. + */ +template<typename H, typename T, int32_t stackCapacity> +class MaybeStackHeaderAndArray { +public: + // No heap allocation. Use only on the stack. + static void* U_EXPORT2 operator new(size_t) U_NOEXCEPT = delete; + static void* U_EXPORT2 operator new[](size_t) U_NOEXCEPT = delete; +#if U_HAVE_PLACEMENT_NEW + static void* U_EXPORT2 operator new(size_t, void*) U_NOEXCEPT = delete; +#endif + + /** + * Default constructor initializes with internal H+T[stackCapacity] buffer. + */ + MaybeStackHeaderAndArray() : ptr(&stackHeader), capacity(stackCapacity), needToRelease(false) {} + /** + * Destructor deletes the memory (if owned). + */ + ~MaybeStackHeaderAndArray() { releaseMemory(); } + /** + * Returns the array capacity (number of T items). + * @return array capacity + */ + int32_t getCapacity() const { return capacity; } + /** + * Access without ownership change. + * @return the header pointer + */ + H *getAlias() const { return ptr; } + /** + * Returns the array start. + * @return array start, same address as getAlias()+1 + */ + T *getArrayStart() const { return reinterpret_cast<T *>(getAlias()+1); } + /** + * Returns the array limit. + * @return array limit + */ + T *getArrayLimit() const { return getArrayStart()+capacity; } + /** + * Access without ownership change. Same as getAlias(). + * A class instance can be used directly in expressions that take a T *. + * @return the header pointer + */ + operator H *() const { return ptr; } + /** + * Array item access (writable). + * No index bounds check. + * @param i array index + * @return reference to the array item + */ + T &operator[](ptrdiff_t i) { return getArrayStart()[i]; } + /** + * Deletes the memory block (if owned) and aliases another one, no transfer of ownership. + * If the arguments are illegal, then the current memory is unchanged. + * @param otherArray must not be NULL + * @param otherCapacity must be >0 + */ + void aliasInstead(H *otherMemory, int32_t otherCapacity) { + if(otherMemory!=NULL && otherCapacity>0) { + releaseMemory(); + ptr=otherMemory; + capacity=otherCapacity; + needToRelease=false; + } + } + /** + * Deletes the memory block (if owned) and allocates a new one, + * copying the header and length T array items. + * Returns the new header pointer. + * If the allocation fails, then the current memory is unchanged and + * this method returns NULL. + * @param newCapacity can be less than or greater than the current capacity; + * must be >0 + * @param length number of T items to be copied from the old array to the new one + * @return the allocated pointer, or NULL if the allocation failed + */ + inline H *resize(int32_t newCapacity, int32_t length=0); + /** + * Gives up ownership of the memory if owned, or else clones it, + * copying the header and length T array items; resets itself to the internal memory. + * Returns NULL if the allocation failed. + * @param length number of T items to copy when cloning, + * and array capacity of the clone when cloning + * @param resultCapacity will be set to the returned array's capacity (output-only) + * @return the header pointer; + * caller becomes responsible for deleting the array + */ + inline H *orphanOrClone(int32_t length, int32_t &resultCapacity); +private: + H *ptr; + int32_t capacity; + UBool needToRelease; + // stackHeader must precede stackArray immediately. + H stackHeader; + T stackArray[stackCapacity]; + void releaseMemory() { + if(needToRelease) { + uprv_free(ptr); + } + } + /* No comparison operators with other MaybeStackHeaderAndArray's. */ + bool operator==(const MaybeStackHeaderAndArray & /*other*/) {return false;} + bool operator!=(const MaybeStackHeaderAndArray & /*other*/) {return true;} + /* No ownership transfer: No copy constructor, no assignment operator. */ + MaybeStackHeaderAndArray(const MaybeStackHeaderAndArray & /*other*/) {} + void operator=(const MaybeStackHeaderAndArray & /*other*/) {} +}; + +template<typename H, typename T, int32_t stackCapacity> +inline H *MaybeStackHeaderAndArray<H, T, stackCapacity>::resize(int32_t newCapacity, + int32_t length) { + if(newCapacity>=0) { +#if U_DEBUG && defined(UPRV_MALLOC_COUNT) + ::fprintf(::stderr,"MaybeStackHeaderAndArray alloc %d + %d * %ul\n", sizeof(H),newCapacity,sizeof(T)); +#endif + H *p=(H *)uprv_malloc(sizeof(H)+newCapacity*sizeof(T)); + if(p!=NULL) { + if(length<0) { + length=0; + } else if(length>0) { + if(length>capacity) { + length=capacity; + } + if(length>newCapacity) { + length=newCapacity; + } + } + uprv_memcpy(p, ptr, sizeof(H)+(size_t)length*sizeof(T)); + releaseMemory(); + ptr=p; + capacity=newCapacity; + needToRelease=true; + } + return p; + } else { + return NULL; + } +} + +template<typename H, typename T, int32_t stackCapacity> +inline H *MaybeStackHeaderAndArray<H, T, stackCapacity>::orphanOrClone(int32_t length, + int32_t &resultCapacity) { + H *p; + if(needToRelease) { + p=ptr; + } else { + if(length<0) { + length=0; + } else if(length>capacity) { + length=capacity; + } +#if U_DEBUG && defined(UPRV_MALLOC_COUNT) + ::fprintf(::stderr,"MaybeStackHeaderAndArray (orphan) alloc %ul + %d * %lu\n", sizeof(H),length,sizeof(T)); +#endif + p=(H *)uprv_malloc(sizeof(H)+length*sizeof(T)); + if(p==NULL) { + return NULL; + } + uprv_memcpy(p, ptr, sizeof(H)+(size_t)length*sizeof(T)); + } + resultCapacity=length; + ptr=&stackHeader; + capacity=stackCapacity; + needToRelease=false; + return p; +} + +/** + * A simple memory management class that creates new heap allocated objects (of + * any class that has a public constructor), keeps track of them and eventually + * deletes them all in its own destructor. + * + * A typical use-case would be code like this: + * + * MemoryPool<MyType> pool; + * + * MyType* o1 = pool.create(); + * if (o1 != nullptr) { + * foo(o1); + * } + * + * MyType* o2 = pool.create(1, 2, 3); + * if (o2 != nullptr) { + * bar(o2); + * } + * + * // MemoryPool will take care of deleting the MyType objects. + * + * It doesn't do anything more than that, and is intentionally kept minimalist. + */ +template<typename T, int32_t stackCapacity = 8> +class MemoryPool : public UMemory { +public: + MemoryPool() : fCount(0), fPool() {} + + ~MemoryPool() { + for (int32_t i = 0; i < fCount; ++i) { + delete fPool[i]; + } + } + + MemoryPool(const MemoryPool&) = delete; + MemoryPool& operator=(const MemoryPool&) = delete; + + MemoryPool(MemoryPool&& other) U_NOEXCEPT : fCount(other.fCount), + fPool(std::move(other.fPool)) { + other.fCount = 0; + } + + MemoryPool& operator=(MemoryPool&& other) U_NOEXCEPT { + fCount = other.fCount; + fPool = std::move(other.fPool); + other.fCount = 0; + return *this; + } + + /** + * Creates a new object of typename T, by forwarding any and all arguments + * to the typename T constructor. + * + * @param args Arguments to be forwarded to the typename T constructor. + * @return A pointer to the newly created object, or nullptr on error. + */ + template<typename... Args> + T* create(Args&&... args) { + int32_t capacity = fPool.getCapacity(); + if (fCount == capacity && + fPool.resize(capacity == stackCapacity ? 4 * capacity : 2 * capacity, + capacity) == nullptr) { + return nullptr; + } + return fPool[fCount++] = new T(std::forward<Args>(args)...); + } + + template <typename... Args> + T* createAndCheckErrorCode(UErrorCode &status, Args &&... args) { + if (U_FAILURE(status)) { + return nullptr; + } + T *pointer = this->create(args...); + if (U_SUCCESS(status) && pointer == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + } + return pointer; + } + + /** + * @return Number of elements that have been allocated. + */ + int32_t count() const { + return fCount; + } + +protected: + int32_t fCount; + MaybeStackArray<T*, stackCapacity> fPool; +}; + +/** + * An internal Vector-like implementation based on MemoryPool. + * + * Heap-allocates each element and stores pointers. + * + * To append an item to the vector, use emplaceBack. + * + * MaybeStackVector<MyType> vector; + * MyType* element = vector.emplaceBack(); + * if (!element) { + * status = U_MEMORY_ALLOCATION_ERROR; + * } + * // do stuff with element + * + * To loop over the vector, use a for loop with indices: + * + * for (int32_t i = 0; i < vector.length(); i++) { + * MyType* element = vector[i]; + * } + */ +template<typename T, int32_t stackCapacity = 8> +class MaybeStackVector : protected MemoryPool<T, stackCapacity> { +public: + using MemoryPool<T, stackCapacity>::MemoryPool; + using MemoryPool<T, stackCapacity>::operator=; + + template<typename... Args> + T* emplaceBack(Args&&... args) { + return this->create(args...); + } + + template <typename... Args> + T *emplaceBackAndCheckErrorCode(UErrorCode &status, Args &&... args) { + return this->createAndCheckErrorCode(status, args...); + } + + int32_t length() const { + return this->fCount; + } + + T** getAlias() { + return this->fPool.getAlias(); + } + + const T *const *getAlias() const { + return this->fPool.getAlias(); + } + + /** + * Array item access (read-only). + * No index bounds check. + * @param i array index + * @return reference to the array item + */ + const T* operator[](ptrdiff_t i) const { + return this->fPool[i]; + } + + /** + * Array item access (writable). + * No index bounds check. + * @param i array index + * @return reference to the array item + */ + T* operator[](ptrdiff_t i) { + return this->fPool[i]; + } +}; + + +U_NAMESPACE_END + +#endif /* __cplusplus */ +#endif /* CMEMORY_H */ diff --git a/thirdparty/icu4c/common/cpputils.h b/thirdparty/icu4c/common/cpputils.h new file mode 100644 index 0000000000..307e570486 --- /dev/null +++ b/thirdparty/icu4c/common/cpputils.h @@ -0,0 +1,97 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1997-2011, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* file name: cpputils.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +*/ + +#ifndef CPPUTILS_H +#define CPPUTILS_H + +#include "unicode/utypes.h" +#include "unicode/unistr.h" +#include "cmemory.h" + +/*==========================================================================*/ +/* Array copy utility functions */ +/*==========================================================================*/ + +static +inline void uprv_arrayCopy(const double* src, double* dst, int32_t count) +{ uprv_memcpy(dst, src, (size_t)count * sizeof(*src)); } + +static +inline void uprv_arrayCopy(const double* src, int32_t srcStart, + double* dst, int32_t dstStart, int32_t count) +{ uprv_memcpy(dst+dstStart, src+srcStart, (size_t)count * sizeof(*src)); } + +static +inline void uprv_arrayCopy(const int8_t* src, int8_t* dst, int32_t count) + { uprv_memcpy(dst, src, (size_t)count * sizeof(*src)); } + +static +inline void uprv_arrayCopy(const int8_t* src, int32_t srcStart, + int8_t* dst, int32_t dstStart, int32_t count) +{ uprv_memcpy(dst+dstStart, src+srcStart, (size_t)count * sizeof(*src)); } + +static +inline void uprv_arrayCopy(const int16_t* src, int16_t* dst, int32_t count) +{ uprv_memcpy(dst, src, (size_t)count * sizeof(*src)); } + +static +inline void uprv_arrayCopy(const int16_t* src, int32_t srcStart, + int16_t* dst, int32_t dstStart, int32_t count) +{ uprv_memcpy(dst+dstStart, src+srcStart, (size_t)count * sizeof(*src)); } + +static +inline void uprv_arrayCopy(const int32_t* src, int32_t* dst, int32_t count) +{ uprv_memcpy(dst, src, (size_t)count * sizeof(*src)); } + +static +inline void uprv_arrayCopy(const int32_t* src, int32_t srcStart, + int32_t* dst, int32_t dstStart, int32_t count) +{ uprv_memcpy(dst+dstStart, src+srcStart, (size_t)count * sizeof(*src)); } + +static +inline void +uprv_arrayCopy(const UChar *src, int32_t srcStart, + UChar *dst, int32_t dstStart, int32_t count) +{ uprv_memcpy(dst+dstStart, src+srcStart, (size_t)count * sizeof(*src)); } + +/** + * Copy an array of UnicodeString OBJECTS (not pointers). + * @internal + */ +static inline void +uprv_arrayCopy(const icu::UnicodeString *src, icu::UnicodeString *dst, int32_t count) +{ while(count-- > 0) *dst++ = *src++; } + +/** + * Copy an array of UnicodeString OBJECTS (not pointers). + * @internal + */ +static inline void +uprv_arrayCopy(const icu::UnicodeString *src, int32_t srcStart, + icu::UnicodeString *dst, int32_t dstStart, int32_t count) +{ uprv_arrayCopy(src+srcStart, dst+dstStart, count); } + +/** + * Checks that the string is readable and writable. + * Sets U_ILLEGAL_ARGUMENT_ERROR if the string isBogus() or has an open getBuffer(). + */ +inline void +uprv_checkCanGetBuffer(const icu::UnicodeString &s, UErrorCode &errorCode) { + if(U_SUCCESS(errorCode) && s.isBogus()) { + errorCode=U_ILLEGAL_ARGUMENT_ERROR; + } +} + +#endif /* _CPPUTILS */ diff --git a/thirdparty/icu4c/common/cstr.cpp b/thirdparty/icu4c/common/cstr.cpp new file mode 100644 index 0000000000..24654f8fc2 --- /dev/null +++ b/thirdparty/icu4c/common/cstr.cpp @@ -0,0 +1,54 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2015-2016, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* file name: charstr.cpp +*/ +#include "unicode/utypes.h" +#include "unicode/putil.h" +#include "unicode/unistr.h" + +#include "cstr.h" + +#include "charstr.h" +#include "uinvchar.h" + +U_NAMESPACE_BEGIN + +CStr::CStr(const UnicodeString &in) { + UErrorCode status = U_ZERO_ERROR; +#if !UCONFIG_NO_CONVERSION || U_CHARSET_IS_UTF8 + int32_t length = in.extract(0, in.length(), static_cast<char *>(NULL), static_cast<uint32_t>(0)); + int32_t resultCapacity = 0; + char *buf = s.getAppendBuffer(length, length, resultCapacity, status); + if (U_SUCCESS(status)) { + in.extract(0, in.length(), buf, resultCapacity); + s.append(buf, length, status); + } +#else + // No conversion available. Convert any invariant characters; substitute '?' for the rest. + // Note: can't just call u_UCharsToChars() or CharString.appendInvariantChars() on the + // whole string because they require that the entire input be invariant. + char buf[2]; + for (int i=0; i<in.length(); i = in.moveIndex32(i, 1)) { + if (uprv_isInvariantUString(in.getBuffer()+i, 1)) { + u_UCharsToChars(in.getBuffer()+i, buf, 1); + } else { + buf[0] = '?'; + } + s.append(buf, 1, status); + } +#endif +} + +CStr::~CStr() { +} + +const char * CStr::operator ()() const { + return s.data(); +} + +U_NAMESPACE_END diff --git a/thirdparty/icu4c/common/cstr.h b/thirdparty/icu4c/common/cstr.h new file mode 100644 index 0000000000..c33f487ea1 --- /dev/null +++ b/thirdparty/icu4c/common/cstr.h @@ -0,0 +1,60 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 2016, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* +* File: cstr.h +*/ + +#ifndef CSTR_H +#define CSTR_H + +#include "unicode/unistr.h" +#include "unicode/uobject.h" +#include "unicode/utypes.h" + +#include "charstr.h" + +/** + * ICU-internal class CStr, a small helper class to facilitate passing UnicodeStrings + * to functions needing (const char *) strings, such as printf(). + * + * It is intended primarily for use in debugging or in tests. Uses platform + * default code page conversion, which will do the best job possible, + * but may be lossy, depending on the platform. + * + * If no other conversion is available, use invariant conversion and substitue + * '?' for non-invariant characters. + * + * Example Usage: + * UnicodeString s = whatever; + * printf("%s", CStr(s)()); + * + * The explicit call to the CStr() constructor creates a temporary object. + * Operator () on the temporary object returns a (const char *) pointer. + * The lifetime of the (const char *) data is that of the temporary object, + * which works well when passing it as a parameter to another function, such as printf. + */ + +U_NAMESPACE_BEGIN + +class U_COMMON_API CStr : public UMemory { + public: + CStr(const UnicodeString &in); + ~CStr(); + const char * operator ()() const; + + private: + CharString s; + CStr(const CStr &other); // Forbid copying of this class. + CStr &operator =(const CStr &other); // Forbid assignment. +}; + +U_NAMESPACE_END + +#endif diff --git a/thirdparty/icu4c/common/cstring.cpp b/thirdparty/icu4c/common/cstring.cpp new file mode 100644 index 0000000000..06275c4b56 --- /dev/null +++ b/thirdparty/icu4c/common/cstring.cpp @@ -0,0 +1,341 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1997-2011, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* +* File CSTRING.C +* +* @author Helena Shih +* +* Modification History: +* +* Date Name Description +* 6/18/98 hshih Created +* 09/08/98 stephen Added include for ctype, for Mac Port +* 11/15/99 helena Integrated S/390 IEEE changes. +****************************************************************************** +*/ + + + +#include <stdlib.h> +#include <stdio.h> +#include "unicode/utypes.h" +#include "cmemory.h" +#include "cstring.h" +#include "uassert.h" + +/* + * We hardcode case conversion for invariant characters to match our expectation + * and the compiler execution charset. + * This prevents problems on systems + * - with non-default casing behavior, like Turkish system locales where + * tolower('I') maps to dotless i and toupper('i') maps to dotted I + * - where there are no lowercase Latin characters at all, or using different + * codes (some old EBCDIC codepages) + * + * This works because the compiler usually runs on a platform where the execution + * charset includes all of the invariant characters at their expected + * code positions, so that the char * string literals in ICU code match + * the char literals here. + * + * Note that the set of lowercase Latin letters is discontiguous in EBCDIC + * and the set of uppercase Latin letters is discontiguous as well. + */ + +U_CAPI UBool U_EXPORT2 +uprv_isASCIILetter(char c) { +#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY + return + ('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z') || + ('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z'); +#else + return ('a'<=c && c<='z') || ('A'<=c && c<='Z'); +#endif +} + +U_CAPI char U_EXPORT2 +uprv_toupper(char c) { +#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY + if(('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z')) { + c=(char)(c+('A'-'a')); + } +#else + if('a'<=c && c<='z') { + c=(char)(c+('A'-'a')); + } +#endif + return c; +} + + +#if 0 +/* + * Commented out because cstring.h defines uprv_tolower() to be + * the same as either uprv_asciitolower() or uprv_ebcdictolower() + * to reduce the amount of code to cover with tests. + * + * Note that this uprv_tolower() definition is likely to work for most + * charset families, not just ASCII and EBCDIC, because its #else branch + * is written generically. + */ +U_CAPI char U_EXPORT2 +uprv_tolower(char c) { +#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY + if(('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z')) { + c=(char)(c+('a'-'A')); + } +#else + if('A'<=c && c<='Z') { + c=(char)(c+('a'-'A')); + } +#endif + return c; +} +#endif + +U_CAPI char U_EXPORT2 +uprv_asciitolower(char c) { + if(0x41<=c && c<=0x5a) { + c=(char)(c+0x20); + } + return c; +} + +U_CAPI char U_EXPORT2 +uprv_ebcdictolower(char c) { + if( (0xc1<=(uint8_t)c && (uint8_t)c<=0xc9) || + (0xd1<=(uint8_t)c && (uint8_t)c<=0xd9) || + (0xe2<=(uint8_t)c && (uint8_t)c<=0xe9) + ) { + c=(char)(c-0x40); + } + return c; +} + + +U_CAPI char* U_EXPORT2 +T_CString_toLowerCase(char* str) +{ + char* origPtr = str; + + if (str) { + do + *str = (char)uprv_tolower(*str); + while (*(str++)); + } + + return origPtr; +} + +U_CAPI char* U_EXPORT2 +T_CString_toUpperCase(char* str) +{ + char* origPtr = str; + + if (str) { + do + *str = (char)uprv_toupper(*str); + while (*(str++)); + } + + return origPtr; +} + +/* + * Takes a int32_t and fills in a char* string with that number "radix"-based. + * Does not handle negative values (makes an empty string for them). + * Writes at most 12 chars ("-2147483647" plus NUL). + * Returns the length of the string (not including the NUL). + */ +U_CAPI int32_t U_EXPORT2 +T_CString_integerToString(char* buffer, int32_t v, int32_t radix) +{ + char tbuf[30]; + int32_t tbx = sizeof(tbuf); + uint8_t digit; + int32_t length = 0; + uint32_t uval; + + U_ASSERT(radix>=2 && radix<=16); + uval = (uint32_t) v; + if(v<0 && radix == 10) { + /* Only in base 10 do we conside numbers to be signed. */ + uval = (uint32_t)(-v); + buffer[length++] = '-'; + } + + tbx = sizeof(tbuf)-1; + tbuf[tbx] = 0; /* We are generating the digits backwards. Null term the end. */ + do { + digit = (uint8_t)(uval % radix); + tbuf[--tbx] = (char)(T_CString_itosOffset(digit)); + uval = uval / radix; + } while (uval != 0); + + /* copy converted number into user buffer */ + uprv_strcpy(buffer+length, tbuf+tbx); + length += sizeof(tbuf) - tbx -1; + return length; +} + + + +/* + * Takes a int64_t and fills in a char* string with that number "radix"-based. + * Writes at most 21: chars ("-9223372036854775807" plus NUL). + * Returns the length of the string, not including the terminating NULL. + */ +U_CAPI int32_t U_EXPORT2 +T_CString_int64ToString(char* buffer, int64_t v, uint32_t radix) +{ + char tbuf[30]; + int32_t tbx = sizeof(tbuf); + uint8_t digit; + int32_t length = 0; + uint64_t uval; + + U_ASSERT(radix>=2 && radix<=16); + uval = (uint64_t) v; + if(v<0 && radix == 10) { + /* Only in base 10 do we conside numbers to be signed. */ + uval = (uint64_t)(-v); + buffer[length++] = '-'; + } + + tbx = sizeof(tbuf)-1; + tbuf[tbx] = 0; /* We are generating the digits backwards. Null term the end. */ + do { + digit = (uint8_t)(uval % radix); + tbuf[--tbx] = (char)(T_CString_itosOffset(digit)); + uval = uval / radix; + } while (uval != 0); + + /* copy converted number into user buffer */ + uprv_strcpy(buffer+length, tbuf+tbx); + length += sizeof(tbuf) - tbx -1; + return length; +} + + +U_CAPI int32_t U_EXPORT2 +T_CString_stringToInteger(const char *integerString, int32_t radix) +{ + char *end; + return uprv_strtoul(integerString, &end, radix); + +} + +U_CAPI int U_EXPORT2 +uprv_stricmp(const char *str1, const char *str2) { + if(str1==NULL) { + if(str2==NULL) { + return 0; + } else { + return -1; + } + } else if(str2==NULL) { + return 1; + } else { + /* compare non-NULL strings lexically with lowercase */ + int rc; + unsigned char c1, c2; + + for(;;) { + c1=(unsigned char)*str1; + c2=(unsigned char)*str2; + if(c1==0) { + if(c2==0) { + return 0; + } else { + return -1; + } + } else if(c2==0) { + return 1; + } else { + /* compare non-zero characters with lowercase */ + rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2); + if(rc!=0) { + return rc; + } + } + ++str1; + ++str2; + } + } +} + +U_CAPI int U_EXPORT2 +uprv_strnicmp(const char *str1, const char *str2, uint32_t n) { + if(str1==NULL) { + if(str2==NULL) { + return 0; + } else { + return -1; + } + } else if(str2==NULL) { + return 1; + } else { + /* compare non-NULL strings lexically with lowercase */ + int rc; + unsigned char c1, c2; + + for(; n--;) { + c1=(unsigned char)*str1; + c2=(unsigned char)*str2; + if(c1==0) { + if(c2==0) { + return 0; + } else { + return -1; + } + } else if(c2==0) { + return 1; + } else { + /* compare non-zero characters with lowercase */ + rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2); + if(rc!=0) { + return rc; + } + } + ++str1; + ++str2; + } + } + + return 0; +} + +U_CAPI char* U_EXPORT2 +uprv_strdup(const char *src) { + size_t len = uprv_strlen(src) + 1; + char *dup = (char *) uprv_malloc(len); + + if (dup) { + uprv_memcpy(dup, src, len); + } + + return dup; +} + +U_CAPI char* U_EXPORT2 +uprv_strndup(const char *src, int32_t n) { + char *dup; + + if(n < 0) { + dup = uprv_strdup(src); + } else { + dup = (char*)uprv_malloc(n+1); + if (dup) { + uprv_memcpy(dup, src, n); + dup[n] = 0; + } + } + + return dup; +} diff --git a/thirdparty/icu4c/common/cstring.h b/thirdparty/icu4c/common/cstring.h new file mode 100644 index 0000000000..3a14e4216c --- /dev/null +++ b/thirdparty/icu4c/common/cstring.h @@ -0,0 +1,126 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1997-2012, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* +* File CSTRING.H +* +* Contains CString interface +* +* @author Helena Shih +* +* Modification History: +* +* Date Name Description +* 6/17/98 hshih Created. +* 05/03/99 stephen Changed from functions to macros. +* 06/14/99 stephen Added icu_strncat, icu_strncmp, icu_tolower +* +****************************************************************************** +*/ + +#ifndef CSTRING_H +#define CSTRING_H 1 + +#include "unicode/utypes.h" +#include "cmemory.h" +#include <string.h> +#include <stdlib.h> +#include <ctype.h> + +#define uprv_strcpy(dst, src) U_STANDARD_CPP_NAMESPACE strcpy(dst, src) +#define uprv_strlen(str) U_STANDARD_CPP_NAMESPACE strlen(str) +#define uprv_strcmp(s1, s2) U_STANDARD_CPP_NAMESPACE strcmp(s1, s2) +#define uprv_strcat(dst, src) U_STANDARD_CPP_NAMESPACE strcat(dst, src) +#define uprv_strchr(s, c) U_STANDARD_CPP_NAMESPACE strchr(s, c) +#define uprv_strstr(s, c) U_STANDARD_CPP_NAMESPACE strstr(s, c) +#define uprv_strrchr(s, c) U_STANDARD_CPP_NAMESPACE strrchr(s, c) +#define uprv_strncpy(dst, src, size) U_STANDARD_CPP_NAMESPACE strncpy(dst, src, size) +#define uprv_strncmp(s1, s2, n) U_STANDARD_CPP_NAMESPACE strncmp(s1, s2, n) +#define uprv_strncat(dst, src, n) U_STANDARD_CPP_NAMESPACE strncat(dst, src, n) + +/** + * Is c an ASCII-repertoire letter a-z or A-Z? + * Note: The implementation is specific to whether ICU is compiled for + * an ASCII-based or EBCDIC-based machine. There just does not seem to be a better name for this. + */ +U_CAPI UBool U_EXPORT2 +uprv_isASCIILetter(char c); + +// NOTE: For u_asciiToUpper that takes a UChar, see ustr_imp.h + +U_CAPI char U_EXPORT2 +uprv_toupper(char c); + + +U_CAPI char U_EXPORT2 +uprv_asciitolower(char c); + +U_CAPI char U_EXPORT2 +uprv_ebcdictolower(char c); + +#if U_CHARSET_FAMILY==U_ASCII_FAMILY +# define uprv_tolower uprv_asciitolower +#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY +# define uprv_tolower uprv_ebcdictolower +#else +# error U_CHARSET_FAMILY is not valid +#endif + +#define uprv_strtod(source, end) U_STANDARD_CPP_NAMESPACE strtod(source, end) +#define uprv_strtoul(str, end, base) U_STANDARD_CPP_NAMESPACE strtoul(str, end, base) +#define uprv_strtol(str, end, base) U_STANDARD_CPP_NAMESPACE strtol(str, end, base) + +/* Conversion from a digit to the character with radix base from 2-19 */ +/* May need to use U_UPPER_ORDINAL*/ +#define T_CString_itosOffset(a) ((a)<=9?('0'+(a)):('A'+(a)-10)) + +U_CAPI char* U_EXPORT2 +uprv_strdup(const char *src); + +/** + * uprv_malloc n+1 bytes, and copy n bytes from src into the new string. + * Terminate with a null at offset n. If n is -1, works like uprv_strdup + * @param src + * @param n length of the input string, not including null. + * @return new string (owned by caller, use uprv_free to free). + * @internal + */ +U_CAPI char* U_EXPORT2 +uprv_strndup(const char *src, int32_t n); + +U_CAPI char* U_EXPORT2 +T_CString_toLowerCase(char* str); + +U_CAPI char* U_EXPORT2 +T_CString_toUpperCase(char* str); + +U_CAPI int32_t U_EXPORT2 +T_CString_integerToString(char *buffer, int32_t n, int32_t radix); + +U_CAPI int32_t U_EXPORT2 +T_CString_int64ToString(char *buffer, int64_t n, uint32_t radix); + +U_CAPI int32_t U_EXPORT2 +T_CString_stringToInteger(const char *integerString, int32_t radix); + +/** + * Case-insensitive, language-independent string comparison + * limited to the ASCII character repertoire. + */ +U_CAPI int U_EXPORT2 +uprv_stricmp(const char *str1, const char *str2); + +/** + * Case-insensitive, language-independent string comparison + * limited to the ASCII character repertoire. + */ +U_CAPI int U_EXPORT2 +uprv_strnicmp(const char *str1, const char *str2, uint32_t n); + +#endif /* ! CSTRING_H */ diff --git a/thirdparty/icu4c/common/cwchar.cpp b/thirdparty/icu4c/common/cwchar.cpp new file mode 100644 index 0000000000..20c7d71e0f --- /dev/null +++ b/thirdparty/icu4c/common/cwchar.cpp @@ -0,0 +1,55 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 2001, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* file name: cwchar.c +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2001may25 +* created by: Markus W. Scherer +*/ + +#include "unicode/utypes.h" + +#if !U_HAVE_WCSCPY + +#include "cwchar.h" + +U_CAPI wchar_t *uprv_wcscat(wchar_t *dst, const wchar_t *src) { + wchar_t *start=dst; + while(*dst!=0) { + ++dst; + } + while((*dst=*src)!=0) { + ++dst; + ++src; + } + return start; +} + +U_CAPI wchar_t *uprv_wcscpy(wchar_t *dst, const wchar_t *src) { + wchar_t *start=dst; + while((*dst=*src)!=0) { + ++dst; + ++src; + } + return start; +} + +U_CAPI size_t uprv_wcslen(const wchar_t *src) { + const wchar_t *start=src; + while(*src!=0) { + ++src; + } + return src-start; +} + +#endif + diff --git a/thirdparty/icu4c/common/cwchar.h b/thirdparty/icu4c/common/cwchar.h new file mode 100644 index 0000000000..8fd041a1b9 --- /dev/null +++ b/thirdparty/icu4c/common/cwchar.h @@ -0,0 +1,58 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 2001, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* file name: cwchar.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2001may25 +* created by: Markus W. Scherer +* +* This file contains ICU-internal definitions of wchar_t operations. +* These definitions were moved here from cstring.h so that fewer +* ICU implementation files include wchar.h. +*/ + +#ifndef __CWCHAR_H__ +#define __CWCHAR_H__ + +#include <string.h> +#include <stdlib.h> +#include "unicode/utypes.h" + +/* Do this after utypes.h so that we have U_HAVE_WCHAR_H . */ +#if U_HAVE_WCHAR_H +# include <wchar.h> +#endif + +/*===========================================================================*/ +/* Wide-character functions */ +/*===========================================================================*/ + +/* The following are not available on all systems, defined in wchar.h or string.h. */ +#if U_HAVE_WCSCPY +# define uprv_wcscpy wcscpy +# define uprv_wcscat wcscat +# define uprv_wcslen wcslen +#else +U_CAPI wchar_t* U_EXPORT2 +uprv_wcscpy(wchar_t *dst, const wchar_t *src); +U_CAPI wchar_t* U_EXPORT2 +uprv_wcscat(wchar_t *dst, const wchar_t *src); +U_CAPI size_t U_EXPORT2 +uprv_wcslen(const wchar_t *src); +#endif + +/* The following are part of the ANSI C standard, defined in stdlib.h . */ +#define uprv_wcstombs(mbstr, wcstr, count) U_STANDARD_CPP_NAMESPACE wcstombs(mbstr, wcstr, count) +#define uprv_mbstowcs(wcstr, mbstr, count) U_STANDARD_CPP_NAMESPACE mbstowcs(wcstr, mbstr, count) + + +#endif diff --git a/thirdparty/icu4c/common/dictbe.cpp b/thirdparty/icu4c/common/dictbe.cpp new file mode 100644 index 0000000000..b42cdf03fa --- /dev/null +++ b/thirdparty/icu4c/common/dictbe.cpp @@ -0,0 +1,1410 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/** + ******************************************************************************* + * Copyright (C) 2006-2016, International Business Machines Corporation + * and others. All Rights Reserved. + ******************************************************************************* + */ + +#include <utility> + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_BREAK_ITERATION + +#include "brkeng.h" +#include "dictbe.h" +#include "unicode/uniset.h" +#include "unicode/chariter.h" +#include "unicode/ubrk.h" +#include "utracimp.h" +#include "uvectr32.h" +#include "uvector.h" +#include "uassert.h" +#include "unicode/normlzr.h" +#include "cmemory.h" +#include "dictionarydata.h" + +U_NAMESPACE_BEGIN + +/* + ****************************************************************** + */ + +DictionaryBreakEngine::DictionaryBreakEngine() { +} + +DictionaryBreakEngine::~DictionaryBreakEngine() { +} + +UBool +DictionaryBreakEngine::handles(UChar32 c) const { + return fSet.contains(c); +} + +int32_t +DictionaryBreakEngine::findBreaks( UText *text, + int32_t startPos, + int32_t endPos, + UVector32 &foundBreaks ) const { + (void)startPos; // TODO: remove this param? + int32_t result = 0; + + // Find the span of characters included in the set. + // The span to break begins at the current position in the text, and + // extends towards the start or end of the text, depending on 'reverse'. + + int32_t start = (int32_t)utext_getNativeIndex(text); + int32_t current; + int32_t rangeStart; + int32_t rangeEnd; + UChar32 c = utext_current32(text); + while((current = (int32_t)utext_getNativeIndex(text)) < endPos && fSet.contains(c)) { + utext_next32(text); // TODO: recast loop for postincrement + c = utext_current32(text); + } + rangeStart = start; + rangeEnd = current; + result = divideUpDictionaryRange(text, rangeStart, rangeEnd, foundBreaks); + utext_setNativeIndex(text, current); + + return result; +} + +void +DictionaryBreakEngine::setCharacters( const UnicodeSet &set ) { + fSet = set; + // Compact for caching + fSet.compact(); +} + +/* + ****************************************************************** + * PossibleWord + */ + +// Helper class for improving readability of the Thai/Lao/Khmer word break +// algorithm. The implementation is completely inline. + +// List size, limited by the maximum number of words in the dictionary +// that form a nested sequence. +static const int32_t POSSIBLE_WORD_LIST_MAX = 20; + +class PossibleWord { +private: + // list of word candidate lengths, in increasing length order + // TODO: bytes would be sufficient for word lengths. + int32_t count; // Count of candidates + int32_t prefix; // The longest match with a dictionary word + int32_t offset; // Offset in the text of these candidates + int32_t mark; // The preferred candidate's offset + int32_t current; // The candidate we're currently looking at + int32_t cuLengths[POSSIBLE_WORD_LIST_MAX]; // Word Lengths, in code units. + int32_t cpLengths[POSSIBLE_WORD_LIST_MAX]; // Word Lengths, in code points. + +public: + PossibleWord() : count(0), prefix(0), offset(-1), mark(0), current(0) {} + ~PossibleWord() {} + + // Fill the list of candidates if needed, select the longest, and return the number found + int32_t candidates( UText *text, DictionaryMatcher *dict, int32_t rangeEnd ); + + // Select the currently marked candidate, point after it in the text, and invalidate self + int32_t acceptMarked( UText *text ); + + // Back up from the current candidate to the next shorter one; return TRUE if that exists + // and point the text after it + UBool backUp( UText *text ); + + // Return the longest prefix this candidate location shares with a dictionary word + // Return value is in code points. + int32_t longestPrefix() { return prefix; } + + // Mark the current candidate as the one we like + void markCurrent() { mark = current; } + + // Get length in code points of the marked word. + int32_t markedCPLength() { return cpLengths[mark]; } +}; + + +int32_t PossibleWord::candidates( UText *text, DictionaryMatcher *dict, int32_t rangeEnd ) { + // TODO: If getIndex is too slow, use offset < 0 and add discardAll() + int32_t start = (int32_t)utext_getNativeIndex(text); + if (start != offset) { + offset = start; + count = dict->matches(text, rangeEnd-start, UPRV_LENGTHOF(cuLengths), cuLengths, cpLengths, NULL, &prefix); + // Dictionary leaves text after longest prefix, not longest word. Back up. + if (count <= 0) { + utext_setNativeIndex(text, start); + } + } + if (count > 0) { + utext_setNativeIndex(text, start+cuLengths[count-1]); + } + current = count-1; + mark = current; + return count; +} + +int32_t +PossibleWord::acceptMarked( UText *text ) { + utext_setNativeIndex(text, offset + cuLengths[mark]); + return cuLengths[mark]; +} + + +UBool +PossibleWord::backUp( UText *text ) { + if (current > 0) { + utext_setNativeIndex(text, offset + cuLengths[--current]); + return TRUE; + } + return FALSE; +} + +/* + ****************************************************************** + * ThaiBreakEngine + */ + +// How many words in a row are "good enough"? +static const int32_t THAI_LOOKAHEAD = 3; + +// Will not combine a non-word with a preceding dictionary word longer than this +static const int32_t THAI_ROOT_COMBINE_THRESHOLD = 3; + +// Will not combine a non-word that shares at least this much prefix with a +// dictionary word, with a preceding word +static const int32_t THAI_PREFIX_COMBINE_THRESHOLD = 3; + +// Ellision character +static const int32_t THAI_PAIYANNOI = 0x0E2F; + +// Repeat character +static const int32_t THAI_MAIYAMOK = 0x0E46; + +// Minimum word size +static const int32_t THAI_MIN_WORD = 2; + +// Minimum number of characters for two words +static const int32_t THAI_MIN_WORD_SPAN = THAI_MIN_WORD * 2; + +ThaiBreakEngine::ThaiBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status) + : DictionaryBreakEngine(), + fDictionary(adoptDictionary) +{ + UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE); + UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Thai"); + fThaiWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Thai:]&[:LineBreak=SA:]]"), status); + if (U_SUCCESS(status)) { + setCharacters(fThaiWordSet); + } + fMarkSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Thai:]&[:LineBreak=SA:]&[:M:]]"), status); + fMarkSet.add(0x0020); + fEndWordSet = fThaiWordSet; + fEndWordSet.remove(0x0E31); // MAI HAN-AKAT + fEndWordSet.remove(0x0E40, 0x0E44); // SARA E through SARA AI MAIMALAI + fBeginWordSet.add(0x0E01, 0x0E2E); // KO KAI through HO NOKHUK + fBeginWordSet.add(0x0E40, 0x0E44); // SARA E through SARA AI MAIMALAI + fSuffixSet.add(THAI_PAIYANNOI); + fSuffixSet.add(THAI_MAIYAMOK); + + // Compact for caching. + fMarkSet.compact(); + fEndWordSet.compact(); + fBeginWordSet.compact(); + fSuffixSet.compact(); + UTRACE_EXIT_STATUS(status); +} + +ThaiBreakEngine::~ThaiBreakEngine() { + delete fDictionary; +} + +int32_t +ThaiBreakEngine::divideUpDictionaryRange( UText *text, + int32_t rangeStart, + int32_t rangeEnd, + UVector32 &foundBreaks ) const { + utext_setNativeIndex(text, rangeStart); + utext_moveIndex32(text, THAI_MIN_WORD_SPAN); + if (utext_getNativeIndex(text) >= rangeEnd) { + return 0; // Not enough characters for two words + } + utext_setNativeIndex(text, rangeStart); + + + uint32_t wordsFound = 0; + int32_t cpWordLength = 0; // Word Length in Code Points. + int32_t cuWordLength = 0; // Word length in code units (UText native indexing) + int32_t current; + UErrorCode status = U_ZERO_ERROR; + PossibleWord words[THAI_LOOKAHEAD]; + + utext_setNativeIndex(text, rangeStart); + + while (U_SUCCESS(status) && (current = (int32_t)utext_getNativeIndex(text)) < rangeEnd) { + cpWordLength = 0; + cuWordLength = 0; + + // Look for candidate words at the current position + int32_t candidates = words[wordsFound%THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd); + + // If we found exactly one, use that + if (candidates == 1) { + cuWordLength = words[wordsFound % THAI_LOOKAHEAD].acceptMarked(text); + cpWordLength = words[wordsFound % THAI_LOOKAHEAD].markedCPLength(); + wordsFound += 1; + } + // If there was more than one, see which one can take us forward the most words + else if (candidates > 1) { + // If we're already at the end of the range, we're done + if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) { + goto foundBest; + } + do { + int32_t wordsMatched = 1; + if (words[(wordsFound + 1) % THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) > 0) { + if (wordsMatched < 2) { + // Followed by another dictionary word; mark first word as a good candidate + words[wordsFound%THAI_LOOKAHEAD].markCurrent(); + wordsMatched = 2; + } + + // If we're already at the end of the range, we're done + if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) { + goto foundBest; + } + + // See if any of the possible second words is followed by a third word + do { + // If we find a third word, stop right away + if (words[(wordsFound + 2) % THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd)) { + words[wordsFound % THAI_LOOKAHEAD].markCurrent(); + goto foundBest; + } + } + while (words[(wordsFound + 1) % THAI_LOOKAHEAD].backUp(text)); + } + } + while (words[wordsFound % THAI_LOOKAHEAD].backUp(text)); +foundBest: + // Set UText position to after the accepted word. + cuWordLength = words[wordsFound % THAI_LOOKAHEAD].acceptMarked(text); + cpWordLength = words[wordsFound % THAI_LOOKAHEAD].markedCPLength(); + wordsFound += 1; + } + + // We come here after having either found a word or not. We look ahead to the + // next word. If it's not a dictionary word, we will combine it with the word we + // just found (if there is one), but only if the preceding word does not exceed + // the threshold. + // The text iterator should now be positioned at the end of the word we found. + + UChar32 uc = 0; + if ((int32_t)utext_getNativeIndex(text) < rangeEnd && cpWordLength < THAI_ROOT_COMBINE_THRESHOLD) { + // if it is a dictionary word, do nothing. If it isn't, then if there is + // no preceding word, or the non-word shares less than the minimum threshold + // of characters with a dictionary word, then scan to resynchronize + if (words[wordsFound % THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) <= 0 + && (cuWordLength == 0 + || words[wordsFound%THAI_LOOKAHEAD].longestPrefix() < THAI_PREFIX_COMBINE_THRESHOLD)) { + // Look for a plausible word boundary + int32_t remaining = rangeEnd - (current+cuWordLength); + UChar32 pc; + int32_t chars = 0; + for (;;) { + int32_t pcIndex = (int32_t)utext_getNativeIndex(text); + pc = utext_next32(text); + int32_t pcSize = (int32_t)utext_getNativeIndex(text) - pcIndex; + chars += pcSize; + remaining -= pcSize; + if (remaining <= 0) { + break; + } + uc = utext_current32(text); + if (fEndWordSet.contains(pc) && fBeginWordSet.contains(uc)) { + // Maybe. See if it's in the dictionary. + // NOTE: In the original Apple code, checked that the next + // two characters after uc were not 0x0E4C THANTHAKHAT before + // checking the dictionary. That is just a performance filter, + // but it's not clear it's faster than checking the trie. + int32_t num_candidates = words[(wordsFound + 1) % THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd); + utext_setNativeIndex(text, current + cuWordLength + chars); + if (num_candidates > 0) { + break; + } + } + } + + // Bump the word count if there wasn't already one + if (cuWordLength <= 0) { + wordsFound += 1; + } + + // Update the length with the passed-over characters + cuWordLength += chars; + } + else { + // Back up to where we were for next iteration + utext_setNativeIndex(text, current+cuWordLength); + } + } + + // Never stop before a combining mark. + int32_t currPos; + while ((currPos = (int32_t)utext_getNativeIndex(text)) < rangeEnd && fMarkSet.contains(utext_current32(text))) { + utext_next32(text); + cuWordLength += (int32_t)utext_getNativeIndex(text) - currPos; + } + + // Look ahead for possible suffixes if a dictionary word does not follow. + // We do this in code rather than using a rule so that the heuristic + // resynch continues to function. For example, one of the suffix characters + // could be a typo in the middle of a word. + if ((int32_t)utext_getNativeIndex(text) < rangeEnd && cuWordLength > 0) { + if (words[wordsFound%THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) <= 0 + && fSuffixSet.contains(uc = utext_current32(text))) { + if (uc == THAI_PAIYANNOI) { + if (!fSuffixSet.contains(utext_previous32(text))) { + // Skip over previous end and PAIYANNOI + utext_next32(text); + int32_t paiyannoiIndex = (int32_t)utext_getNativeIndex(text); + utext_next32(text); + cuWordLength += (int32_t)utext_getNativeIndex(text) - paiyannoiIndex; // Add PAIYANNOI to word + uc = utext_current32(text); // Fetch next character + } + else { + // Restore prior position + utext_next32(text); + } + } + if (uc == THAI_MAIYAMOK) { + if (utext_previous32(text) != THAI_MAIYAMOK) { + // Skip over previous end and MAIYAMOK + utext_next32(text); + int32_t maiyamokIndex = (int32_t)utext_getNativeIndex(text); + utext_next32(text); + cuWordLength += (int32_t)utext_getNativeIndex(text) - maiyamokIndex; // Add MAIYAMOK to word + } + else { + // Restore prior position + utext_next32(text); + } + } + } + else { + utext_setNativeIndex(text, current+cuWordLength); + } + } + + // Did we find a word on this iteration? If so, push it on the break stack + if (cuWordLength > 0) { + foundBreaks.push((current+cuWordLength), status); + } + } + + // Don't return a break for the end of the dictionary range if there is one there. + if (foundBreaks.peeki() >= rangeEnd) { + (void) foundBreaks.popi(); + wordsFound -= 1; + } + + return wordsFound; +} + +/* + ****************************************************************** + * LaoBreakEngine + */ + +// How many words in a row are "good enough"? +static const int32_t LAO_LOOKAHEAD = 3; + +// Will not combine a non-word with a preceding dictionary word longer than this +static const int32_t LAO_ROOT_COMBINE_THRESHOLD = 3; + +// Will not combine a non-word that shares at least this much prefix with a +// dictionary word, with a preceding word +static const int32_t LAO_PREFIX_COMBINE_THRESHOLD = 3; + +// Minimum word size +static const int32_t LAO_MIN_WORD = 2; + +// Minimum number of characters for two words +static const int32_t LAO_MIN_WORD_SPAN = LAO_MIN_WORD * 2; + +LaoBreakEngine::LaoBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status) + : DictionaryBreakEngine(), + fDictionary(adoptDictionary) +{ + UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE); + UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Laoo"); + fLaoWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Laoo:]&[:LineBreak=SA:]]"), status); + if (U_SUCCESS(status)) { + setCharacters(fLaoWordSet); + } + fMarkSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Laoo:]&[:LineBreak=SA:]&[:M:]]"), status); + fMarkSet.add(0x0020); + fEndWordSet = fLaoWordSet; + fEndWordSet.remove(0x0EC0, 0x0EC4); // prefix vowels + fBeginWordSet.add(0x0E81, 0x0EAE); // basic consonants (including holes for corresponding Thai characters) + fBeginWordSet.add(0x0EDC, 0x0EDD); // digraph consonants (no Thai equivalent) + fBeginWordSet.add(0x0EC0, 0x0EC4); // prefix vowels + + // Compact for caching. + fMarkSet.compact(); + fEndWordSet.compact(); + fBeginWordSet.compact(); + UTRACE_EXIT_STATUS(status); +} + +LaoBreakEngine::~LaoBreakEngine() { + delete fDictionary; +} + +int32_t +LaoBreakEngine::divideUpDictionaryRange( UText *text, + int32_t rangeStart, + int32_t rangeEnd, + UVector32 &foundBreaks ) const { + if ((rangeEnd - rangeStart) < LAO_MIN_WORD_SPAN) { + return 0; // Not enough characters for two words + } + + uint32_t wordsFound = 0; + int32_t cpWordLength = 0; + int32_t cuWordLength = 0; + int32_t current; + UErrorCode status = U_ZERO_ERROR; + PossibleWord words[LAO_LOOKAHEAD]; + + utext_setNativeIndex(text, rangeStart); + + while (U_SUCCESS(status) && (current = (int32_t)utext_getNativeIndex(text)) < rangeEnd) { + cuWordLength = 0; + cpWordLength = 0; + + // Look for candidate words at the current position + int32_t candidates = words[wordsFound%LAO_LOOKAHEAD].candidates(text, fDictionary, rangeEnd); + + // If we found exactly one, use that + if (candidates == 1) { + cuWordLength = words[wordsFound % LAO_LOOKAHEAD].acceptMarked(text); + cpWordLength = words[wordsFound % LAO_LOOKAHEAD].markedCPLength(); + wordsFound += 1; + } + // If there was more than one, see which one can take us forward the most words + else if (candidates > 1) { + // If we're already at the end of the range, we're done + if (utext_getNativeIndex(text) >= rangeEnd) { + goto foundBest; + } + do { + int32_t wordsMatched = 1; + if (words[(wordsFound + 1) % LAO_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) > 0) { + if (wordsMatched < 2) { + // Followed by another dictionary word; mark first word as a good candidate + words[wordsFound%LAO_LOOKAHEAD].markCurrent(); + wordsMatched = 2; + } + + // If we're already at the end of the range, we're done + if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) { + goto foundBest; + } + + // See if any of the possible second words is followed by a third word + do { + // If we find a third word, stop right away + if (words[(wordsFound + 2) % LAO_LOOKAHEAD].candidates(text, fDictionary, rangeEnd)) { + words[wordsFound % LAO_LOOKAHEAD].markCurrent(); + goto foundBest; + } + } + while (words[(wordsFound + 1) % LAO_LOOKAHEAD].backUp(text)); + } + } + while (words[wordsFound % LAO_LOOKAHEAD].backUp(text)); +foundBest: + cuWordLength = words[wordsFound % LAO_LOOKAHEAD].acceptMarked(text); + cpWordLength = words[wordsFound % LAO_LOOKAHEAD].markedCPLength(); + wordsFound += 1; + } + + // We come here after having either found a word or not. We look ahead to the + // next word. If it's not a dictionary word, we will combine it withe the word we + // just found (if there is one), but only if the preceding word does not exceed + // the threshold. + // The text iterator should now be positioned at the end of the word we found. + if ((int32_t)utext_getNativeIndex(text) < rangeEnd && cpWordLength < LAO_ROOT_COMBINE_THRESHOLD) { + // if it is a dictionary word, do nothing. If it isn't, then if there is + // no preceding word, or the non-word shares less than the minimum threshold + // of characters with a dictionary word, then scan to resynchronize + if (words[wordsFound % LAO_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) <= 0 + && (cuWordLength == 0 + || words[wordsFound%LAO_LOOKAHEAD].longestPrefix() < LAO_PREFIX_COMBINE_THRESHOLD)) { + // Look for a plausible word boundary + int32_t remaining = rangeEnd - (current + cuWordLength); + UChar32 pc; + UChar32 uc; + int32_t chars = 0; + for (;;) { + int32_t pcIndex = (int32_t)utext_getNativeIndex(text); + pc = utext_next32(text); + int32_t pcSize = (int32_t)utext_getNativeIndex(text) - pcIndex; + chars += pcSize; + remaining -= pcSize; + if (remaining <= 0) { + break; + } + uc = utext_current32(text); + if (fEndWordSet.contains(pc) && fBeginWordSet.contains(uc)) { + // Maybe. See if it's in the dictionary. + // TODO: this looks iffy; compare with old code. + int32_t num_candidates = words[(wordsFound + 1) % LAO_LOOKAHEAD].candidates(text, fDictionary, rangeEnd); + utext_setNativeIndex(text, current + cuWordLength + chars); + if (num_candidates > 0) { + break; + } + } + } + + // Bump the word count if there wasn't already one + if (cuWordLength <= 0) { + wordsFound += 1; + } + + // Update the length with the passed-over characters + cuWordLength += chars; + } + else { + // Back up to where we were for next iteration + utext_setNativeIndex(text, current + cuWordLength); + } + } + + // Never stop before a combining mark. + int32_t currPos; + while ((currPos = (int32_t)utext_getNativeIndex(text)) < rangeEnd && fMarkSet.contains(utext_current32(text))) { + utext_next32(text); + cuWordLength += (int32_t)utext_getNativeIndex(text) - currPos; + } + + // Look ahead for possible suffixes if a dictionary word does not follow. + // We do this in code rather than using a rule so that the heuristic + // resynch continues to function. For example, one of the suffix characters + // could be a typo in the middle of a word. + // NOT CURRENTLY APPLICABLE TO LAO + + // Did we find a word on this iteration? If so, push it on the break stack + if (cuWordLength > 0) { + foundBreaks.push((current+cuWordLength), status); + } + } + + // Don't return a break for the end of the dictionary range if there is one there. + if (foundBreaks.peeki() >= rangeEnd) { + (void) foundBreaks.popi(); + wordsFound -= 1; + } + + return wordsFound; +} + +/* + ****************************************************************** + * BurmeseBreakEngine + */ + +// How many words in a row are "good enough"? +static const int32_t BURMESE_LOOKAHEAD = 3; + +// Will not combine a non-word with a preceding dictionary word longer than this +static const int32_t BURMESE_ROOT_COMBINE_THRESHOLD = 3; + +// Will not combine a non-word that shares at least this much prefix with a +// dictionary word, with a preceding word +static const int32_t BURMESE_PREFIX_COMBINE_THRESHOLD = 3; + +// Minimum word size +static const int32_t BURMESE_MIN_WORD = 2; + +// Minimum number of characters for two words +static const int32_t BURMESE_MIN_WORD_SPAN = BURMESE_MIN_WORD * 2; + +BurmeseBreakEngine::BurmeseBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status) + : DictionaryBreakEngine(), + fDictionary(adoptDictionary) +{ + UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE); + UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Mymr"); + fBurmeseWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Mymr:]&[:LineBreak=SA:]]"), status); + if (U_SUCCESS(status)) { + setCharacters(fBurmeseWordSet); + } + fMarkSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Mymr:]&[:LineBreak=SA:]&[:M:]]"), status); + fMarkSet.add(0x0020); + fEndWordSet = fBurmeseWordSet; + fBeginWordSet.add(0x1000, 0x102A); // basic consonants and independent vowels + + // Compact for caching. + fMarkSet.compact(); + fEndWordSet.compact(); + fBeginWordSet.compact(); + UTRACE_EXIT_STATUS(status); +} + +BurmeseBreakEngine::~BurmeseBreakEngine() { + delete fDictionary; +} + +int32_t +BurmeseBreakEngine::divideUpDictionaryRange( UText *text, + int32_t rangeStart, + int32_t rangeEnd, + UVector32 &foundBreaks ) const { + if ((rangeEnd - rangeStart) < BURMESE_MIN_WORD_SPAN) { + return 0; // Not enough characters for two words + } + + uint32_t wordsFound = 0; + int32_t cpWordLength = 0; + int32_t cuWordLength = 0; + int32_t current; + UErrorCode status = U_ZERO_ERROR; + PossibleWord words[BURMESE_LOOKAHEAD]; + + utext_setNativeIndex(text, rangeStart); + + while (U_SUCCESS(status) && (current = (int32_t)utext_getNativeIndex(text)) < rangeEnd) { + cuWordLength = 0; + cpWordLength = 0; + + // Look for candidate words at the current position + int32_t candidates = words[wordsFound%BURMESE_LOOKAHEAD].candidates(text, fDictionary, rangeEnd); + + // If we found exactly one, use that + if (candidates == 1) { + cuWordLength = words[wordsFound % BURMESE_LOOKAHEAD].acceptMarked(text); + cpWordLength = words[wordsFound % BURMESE_LOOKAHEAD].markedCPLength(); + wordsFound += 1; + } + // If there was more than one, see which one can take us forward the most words + else if (candidates > 1) { + // If we're already at the end of the range, we're done + if (utext_getNativeIndex(text) >= rangeEnd) { + goto foundBest; + } + do { + int32_t wordsMatched = 1; + if (words[(wordsFound + 1) % BURMESE_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) > 0) { + if (wordsMatched < 2) { + // Followed by another dictionary word; mark first word as a good candidate + words[wordsFound%BURMESE_LOOKAHEAD].markCurrent(); + wordsMatched = 2; + } + + // If we're already at the end of the range, we're done + if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) { + goto foundBest; + } + + // See if any of the possible second words is followed by a third word + do { + // If we find a third word, stop right away + if (words[(wordsFound + 2) % BURMESE_LOOKAHEAD].candidates(text, fDictionary, rangeEnd)) { + words[wordsFound % BURMESE_LOOKAHEAD].markCurrent(); + goto foundBest; + } + } + while (words[(wordsFound + 1) % BURMESE_LOOKAHEAD].backUp(text)); + } + } + while (words[wordsFound % BURMESE_LOOKAHEAD].backUp(text)); +foundBest: + cuWordLength = words[wordsFound % BURMESE_LOOKAHEAD].acceptMarked(text); + cpWordLength = words[wordsFound % BURMESE_LOOKAHEAD].markedCPLength(); + wordsFound += 1; + } + + // We come here after having either found a word or not. We look ahead to the + // next word. If it's not a dictionary word, we will combine it withe the word we + // just found (if there is one), but only if the preceding word does not exceed + // the threshold. + // The text iterator should now be positioned at the end of the word we found. + if ((int32_t)utext_getNativeIndex(text) < rangeEnd && cpWordLength < BURMESE_ROOT_COMBINE_THRESHOLD) { + // if it is a dictionary word, do nothing. If it isn't, then if there is + // no preceding word, or the non-word shares less than the minimum threshold + // of characters with a dictionary word, then scan to resynchronize + if (words[wordsFound % BURMESE_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) <= 0 + && (cuWordLength == 0 + || words[wordsFound%BURMESE_LOOKAHEAD].longestPrefix() < BURMESE_PREFIX_COMBINE_THRESHOLD)) { + // Look for a plausible word boundary + int32_t remaining = rangeEnd - (current + cuWordLength); + UChar32 pc; + UChar32 uc; + int32_t chars = 0; + for (;;) { + int32_t pcIndex = (int32_t)utext_getNativeIndex(text); + pc = utext_next32(text); + int32_t pcSize = (int32_t)utext_getNativeIndex(text) - pcIndex; + chars += pcSize; + remaining -= pcSize; + if (remaining <= 0) { + break; + } + uc = utext_current32(text); + if (fEndWordSet.contains(pc) && fBeginWordSet.contains(uc)) { + // Maybe. See if it's in the dictionary. + // TODO: this looks iffy; compare with old code. + int32_t num_candidates = words[(wordsFound + 1) % BURMESE_LOOKAHEAD].candidates(text, fDictionary, rangeEnd); + utext_setNativeIndex(text, current + cuWordLength + chars); + if (num_candidates > 0) { + break; + } + } + } + + // Bump the word count if there wasn't already one + if (cuWordLength <= 0) { + wordsFound += 1; + } + + // Update the length with the passed-over characters + cuWordLength += chars; + } + else { + // Back up to where we were for next iteration + utext_setNativeIndex(text, current + cuWordLength); + } + } + + // Never stop before a combining mark. + int32_t currPos; + while ((currPos = (int32_t)utext_getNativeIndex(text)) < rangeEnd && fMarkSet.contains(utext_current32(text))) { + utext_next32(text); + cuWordLength += (int32_t)utext_getNativeIndex(text) - currPos; + } + + // Look ahead for possible suffixes if a dictionary word does not follow. + // We do this in code rather than using a rule so that the heuristic + // resynch continues to function. For example, one of the suffix characters + // could be a typo in the middle of a word. + // NOT CURRENTLY APPLICABLE TO BURMESE + + // Did we find a word on this iteration? If so, push it on the break stack + if (cuWordLength > 0) { + foundBreaks.push((current+cuWordLength), status); + } + } + + // Don't return a break for the end of the dictionary range if there is one there. + if (foundBreaks.peeki() >= rangeEnd) { + (void) foundBreaks.popi(); + wordsFound -= 1; + } + + return wordsFound; +} + +/* + ****************************************************************** + * KhmerBreakEngine + */ + +// How many words in a row are "good enough"? +static const int32_t KHMER_LOOKAHEAD = 3; + +// Will not combine a non-word with a preceding dictionary word longer than this +static const int32_t KHMER_ROOT_COMBINE_THRESHOLD = 3; + +// Will not combine a non-word that shares at least this much prefix with a +// dictionary word, with a preceding word +static const int32_t KHMER_PREFIX_COMBINE_THRESHOLD = 3; + +// Minimum word size +static const int32_t KHMER_MIN_WORD = 2; + +// Minimum number of characters for two words +static const int32_t KHMER_MIN_WORD_SPAN = KHMER_MIN_WORD * 2; + +KhmerBreakEngine::KhmerBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status) + : DictionaryBreakEngine(), + fDictionary(adoptDictionary) +{ + UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE); + UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Khmr"); + fKhmerWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Khmr:]&[:LineBreak=SA:]]"), status); + if (U_SUCCESS(status)) { + setCharacters(fKhmerWordSet); + } + fMarkSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Khmr:]&[:LineBreak=SA:]&[:M:]]"), status); + fMarkSet.add(0x0020); + fEndWordSet = fKhmerWordSet; + fBeginWordSet.add(0x1780, 0x17B3); + //fBeginWordSet.add(0x17A3, 0x17A4); // deprecated vowels + //fEndWordSet.remove(0x17A5, 0x17A9); // Khmer independent vowels that can't end a word + //fEndWordSet.remove(0x17B2); // Khmer independent vowel that can't end a word + fEndWordSet.remove(0x17D2); // KHMER SIGN COENG that combines some following characters + //fEndWordSet.remove(0x17B6, 0x17C5); // Remove dependent vowels +// fEndWordSet.remove(0x0E31); // MAI HAN-AKAT +// fEndWordSet.remove(0x0E40, 0x0E44); // SARA E through SARA AI MAIMALAI +// fBeginWordSet.add(0x0E01, 0x0E2E); // KO KAI through HO NOKHUK +// fBeginWordSet.add(0x0E40, 0x0E44); // SARA E through SARA AI MAIMALAI +// fSuffixSet.add(THAI_PAIYANNOI); +// fSuffixSet.add(THAI_MAIYAMOK); + + // Compact for caching. + fMarkSet.compact(); + fEndWordSet.compact(); + fBeginWordSet.compact(); +// fSuffixSet.compact(); + UTRACE_EXIT_STATUS(status); +} + +KhmerBreakEngine::~KhmerBreakEngine() { + delete fDictionary; +} + +int32_t +KhmerBreakEngine::divideUpDictionaryRange( UText *text, + int32_t rangeStart, + int32_t rangeEnd, + UVector32 &foundBreaks ) const { + if ((rangeEnd - rangeStart) < KHMER_MIN_WORD_SPAN) { + return 0; // Not enough characters for two words + } + + uint32_t wordsFound = 0; + int32_t cpWordLength = 0; + int32_t cuWordLength = 0; + int32_t current; + UErrorCode status = U_ZERO_ERROR; + PossibleWord words[KHMER_LOOKAHEAD]; + + utext_setNativeIndex(text, rangeStart); + + while (U_SUCCESS(status) && (current = (int32_t)utext_getNativeIndex(text)) < rangeEnd) { + cuWordLength = 0; + cpWordLength = 0; + + // Look for candidate words at the current position + int32_t candidates = words[wordsFound%KHMER_LOOKAHEAD].candidates(text, fDictionary, rangeEnd); + + // If we found exactly one, use that + if (candidates == 1) { + cuWordLength = words[wordsFound % KHMER_LOOKAHEAD].acceptMarked(text); + cpWordLength = words[wordsFound % KHMER_LOOKAHEAD].markedCPLength(); + wordsFound += 1; + } + + // If there was more than one, see which one can take us forward the most words + else if (candidates > 1) { + // If we're already at the end of the range, we're done + if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) { + goto foundBest; + } + do { + int32_t wordsMatched = 1; + if (words[(wordsFound + 1) % KHMER_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) > 0) { + if (wordsMatched < 2) { + // Followed by another dictionary word; mark first word as a good candidate + words[wordsFound % KHMER_LOOKAHEAD].markCurrent(); + wordsMatched = 2; + } + + // If we're already at the end of the range, we're done + if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) { + goto foundBest; + } + + // See if any of the possible second words is followed by a third word + do { + // If we find a third word, stop right away + if (words[(wordsFound + 2) % KHMER_LOOKAHEAD].candidates(text, fDictionary, rangeEnd)) { + words[wordsFound % KHMER_LOOKAHEAD].markCurrent(); + goto foundBest; + } + } + while (words[(wordsFound + 1) % KHMER_LOOKAHEAD].backUp(text)); + } + } + while (words[wordsFound % KHMER_LOOKAHEAD].backUp(text)); +foundBest: + cuWordLength = words[wordsFound % KHMER_LOOKAHEAD].acceptMarked(text); + cpWordLength = words[wordsFound % KHMER_LOOKAHEAD].markedCPLength(); + wordsFound += 1; + } + + // We come here after having either found a word or not. We look ahead to the + // next word. If it's not a dictionary word, we will combine it with the word we + // just found (if there is one), but only if the preceding word does not exceed + // the threshold. + // The text iterator should now be positioned at the end of the word we found. + if ((int32_t)utext_getNativeIndex(text) < rangeEnd && cpWordLength < KHMER_ROOT_COMBINE_THRESHOLD) { + // if it is a dictionary word, do nothing. If it isn't, then if there is + // no preceding word, or the non-word shares less than the minimum threshold + // of characters with a dictionary word, then scan to resynchronize + if (words[wordsFound % KHMER_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) <= 0 + && (cuWordLength == 0 + || words[wordsFound % KHMER_LOOKAHEAD].longestPrefix() < KHMER_PREFIX_COMBINE_THRESHOLD)) { + // Look for a plausible word boundary + int32_t remaining = rangeEnd - (current+cuWordLength); + UChar32 pc; + UChar32 uc; + int32_t chars = 0; + for (;;) { + int32_t pcIndex = (int32_t)utext_getNativeIndex(text); + pc = utext_next32(text); + int32_t pcSize = (int32_t)utext_getNativeIndex(text) - pcIndex; + chars += pcSize; + remaining -= pcSize; + if (remaining <= 0) { + break; + } + uc = utext_current32(text); + if (fEndWordSet.contains(pc) && fBeginWordSet.contains(uc)) { + // Maybe. See if it's in the dictionary. + int32_t num_candidates = words[(wordsFound + 1) % KHMER_LOOKAHEAD].candidates(text, fDictionary, rangeEnd); + utext_setNativeIndex(text, current+cuWordLength+chars); + if (num_candidates > 0) { + break; + } + } + } + + // Bump the word count if there wasn't already one + if (cuWordLength <= 0) { + wordsFound += 1; + } + + // Update the length with the passed-over characters + cuWordLength += chars; + } + else { + // Back up to where we were for next iteration + utext_setNativeIndex(text, current+cuWordLength); + } + } + + // Never stop before a combining mark. + int32_t currPos; + while ((currPos = (int32_t)utext_getNativeIndex(text)) < rangeEnd && fMarkSet.contains(utext_current32(text))) { + utext_next32(text); + cuWordLength += (int32_t)utext_getNativeIndex(text) - currPos; + } + + // Look ahead for possible suffixes if a dictionary word does not follow. + // We do this in code rather than using a rule so that the heuristic + // resynch continues to function. For example, one of the suffix characters + // could be a typo in the middle of a word. +// if ((int32_t)utext_getNativeIndex(text) < rangeEnd && wordLength > 0) { +// if (words[wordsFound%KHMER_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) <= 0 +// && fSuffixSet.contains(uc = utext_current32(text))) { +// if (uc == KHMER_PAIYANNOI) { +// if (!fSuffixSet.contains(utext_previous32(text))) { +// // Skip over previous end and PAIYANNOI +// utext_next32(text); +// utext_next32(text); +// wordLength += 1; // Add PAIYANNOI to word +// uc = utext_current32(text); // Fetch next character +// } +// else { +// // Restore prior position +// utext_next32(text); +// } +// } +// if (uc == KHMER_MAIYAMOK) { +// if (utext_previous32(text) != KHMER_MAIYAMOK) { +// // Skip over previous end and MAIYAMOK +// utext_next32(text); +// utext_next32(text); +// wordLength += 1; // Add MAIYAMOK to word +// } +// else { +// // Restore prior position +// utext_next32(text); +// } +// } +// } +// else { +// utext_setNativeIndex(text, current+wordLength); +// } +// } + + // Did we find a word on this iteration? If so, push it on the break stack + if (cuWordLength > 0) { + foundBreaks.push((current+cuWordLength), status); + } + } + + // Don't return a break for the end of the dictionary range if there is one there. + if (foundBreaks.peeki() >= rangeEnd) { + (void) foundBreaks.popi(); + wordsFound -= 1; + } + + return wordsFound; +} + +#if !UCONFIG_NO_NORMALIZATION +/* + ****************************************************************** + * CjkBreakEngine + */ +static const uint32_t kuint32max = 0xFFFFFFFF; +CjkBreakEngine::CjkBreakEngine(DictionaryMatcher *adoptDictionary, LanguageType type, UErrorCode &status) +: DictionaryBreakEngine(), fDictionary(adoptDictionary) { + UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE); + UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Hani"); + // Korean dictionary only includes Hangul syllables + fHangulWordSet.applyPattern(UNICODE_STRING_SIMPLE("[\\uac00-\\ud7a3]"), status); + fHanWordSet.applyPattern(UNICODE_STRING_SIMPLE("[:Han:]"), status); + fKatakanaWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Katakana:]\\uff9e\\uff9f]"), status); + fHiraganaWordSet.applyPattern(UNICODE_STRING_SIMPLE("[:Hiragana:]"), status); + nfkcNorm2 = Normalizer2::getNFKCInstance(status); + + if (U_SUCCESS(status)) { + // handle Korean and Japanese/Chinese using different dictionaries + if (type == kKorean) { + setCharacters(fHangulWordSet); + } else { //Chinese and Japanese + UnicodeSet cjSet; + cjSet.addAll(fHanWordSet); + cjSet.addAll(fKatakanaWordSet); + cjSet.addAll(fHiraganaWordSet); + cjSet.add(0xFF70); // HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK + cjSet.add(0x30FC); // KATAKANA-HIRAGANA PROLONGED SOUND MARK + setCharacters(cjSet); + } + } + UTRACE_EXIT_STATUS(status); +} + +CjkBreakEngine::~CjkBreakEngine(){ + delete fDictionary; +} + +// The katakanaCost values below are based on the length frequencies of all +// katakana phrases in the dictionary +static const int32_t kMaxKatakanaLength = 8; +static const int32_t kMaxKatakanaGroupLength = 20; +static const uint32_t maxSnlp = 255; + +static inline uint32_t getKatakanaCost(int32_t wordLength){ + //TODO: fill array with actual values from dictionary! + static const uint32_t katakanaCost[kMaxKatakanaLength + 1] + = {8192, 984, 408, 240, 204, 252, 300, 372, 480}; + return (wordLength > kMaxKatakanaLength) ? 8192 : katakanaCost[wordLength]; +} + +static inline bool isKatakana(UChar32 value) { + return (value >= 0x30A1 && value <= 0x30FE && value != 0x30FB) || + (value >= 0xFF66 && value <= 0xFF9f); +} + + +// Function for accessing internal utext flags. +// Replicates an internal UText function. + +static inline int32_t utext_i32_flag(int32_t bitIndex) { + return (int32_t)1 << bitIndex; +} + + +/* + * @param text A UText representing the text + * @param rangeStart The start of the range of dictionary characters + * @param rangeEnd The end of the range of dictionary characters + * @param foundBreaks vector<int32> to receive the break positions + * @return The number of breaks found + */ +int32_t +CjkBreakEngine::divideUpDictionaryRange( UText *inText, + int32_t rangeStart, + int32_t rangeEnd, + UVector32 &foundBreaks ) const { + if (rangeStart >= rangeEnd) { + return 0; + } + + // UnicodeString version of input UText, NFKC normalized if necessary. + UnicodeString inString; + + // inputMap[inStringIndex] = corresponding native index from UText inText. + // If NULL then mapping is 1:1 + LocalPointer<UVector32> inputMap; + + UErrorCode status = U_ZERO_ERROR; + + + // if UText has the input string as one contiguous UTF-16 chunk + if ((inText->providerProperties & utext_i32_flag(UTEXT_PROVIDER_STABLE_CHUNKS)) && + inText->chunkNativeStart <= rangeStart && + inText->chunkNativeLimit >= rangeEnd && + inText->nativeIndexingLimit >= rangeEnd - inText->chunkNativeStart) { + + // Input UText is in one contiguous UTF-16 chunk. + // Use Read-only aliasing UnicodeString. + inString.setTo(FALSE, + inText->chunkContents + rangeStart - inText->chunkNativeStart, + rangeEnd - rangeStart); + } else { + // Copy the text from the original inText (UText) to inString (UnicodeString). + // Create a map from UnicodeString indices -> UText offsets. + utext_setNativeIndex(inText, rangeStart); + int32_t limit = rangeEnd; + U_ASSERT(limit <= utext_nativeLength(inText)); + if (limit > utext_nativeLength(inText)) { + limit = (int32_t)utext_nativeLength(inText); + } + inputMap.adoptInsteadAndCheckErrorCode(new UVector32(status), status); + if (U_FAILURE(status)) { + return 0; + } + while (utext_getNativeIndex(inText) < limit) { + int32_t nativePosition = (int32_t)utext_getNativeIndex(inText); + UChar32 c = utext_next32(inText); + U_ASSERT(c != U_SENTINEL); + inString.append(c); + while (inputMap->size() < inString.length()) { + inputMap->addElement(nativePosition, status); + } + } + inputMap->addElement(limit, status); + } + + + if (!nfkcNorm2->isNormalized(inString, status)) { + UnicodeString normalizedInput; + // normalizedMap[normalizedInput position] == original UText position. + LocalPointer<UVector32> normalizedMap(new UVector32(status), status); + if (U_FAILURE(status)) { + return 0; + } + + UnicodeString fragment; + UnicodeString normalizedFragment; + for (int32_t srcI = 0; srcI < inString.length();) { // Once per normalization chunk + fragment.remove(); + int32_t fragmentStartI = srcI; + UChar32 c = inString.char32At(srcI); + for (;;) { + fragment.append(c); + srcI = inString.moveIndex32(srcI, 1); + if (srcI == inString.length()) { + break; + } + c = inString.char32At(srcI); + if (nfkcNorm2->hasBoundaryBefore(c)) { + break; + } + } + nfkcNorm2->normalize(fragment, normalizedFragment, status); + normalizedInput.append(normalizedFragment); + + // Map every position in the normalized chunk to the start of the chunk + // in the original input. + int32_t fragmentOriginalStart = inputMap.isValid() ? + inputMap->elementAti(fragmentStartI) : fragmentStartI+rangeStart; + while (normalizedMap->size() < normalizedInput.length()) { + normalizedMap->addElement(fragmentOriginalStart, status); + if (U_FAILURE(status)) { + break; + } + } + } + U_ASSERT(normalizedMap->size() == normalizedInput.length()); + int32_t nativeEnd = inputMap.isValid() ? + inputMap->elementAti(inString.length()) : inString.length()+rangeStart; + normalizedMap->addElement(nativeEnd, status); + + inputMap = std::move(normalizedMap); + inString = std::move(normalizedInput); + } + + int32_t numCodePts = inString.countChar32(); + if (numCodePts != inString.length()) { + // There are supplementary characters in the input. + // The dictionary will produce boundary positions in terms of code point indexes, + // not in terms of code unit string indexes. + // Use the inputMap mechanism to take care of this in addition to indexing differences + // from normalization and/or UTF-8 input. + UBool hadExistingMap = inputMap.isValid(); + if (!hadExistingMap) { + inputMap.adoptInsteadAndCheckErrorCode(new UVector32(status), status); + if (U_FAILURE(status)) { + return 0; + } + } + int32_t cpIdx = 0; + for (int32_t cuIdx = 0; ; cuIdx = inString.moveIndex32(cuIdx, 1)) { + U_ASSERT(cuIdx >= cpIdx); + if (hadExistingMap) { + inputMap->setElementAt(inputMap->elementAti(cuIdx), cpIdx); + } else { + inputMap->addElement(cuIdx+rangeStart, status); + } + cpIdx++; + if (cuIdx == inString.length()) { + break; + } + } + } + + // bestSnlp[i] is the snlp of the best segmentation of the first i + // code points in the range to be matched. + UVector32 bestSnlp(numCodePts + 1, status); + bestSnlp.addElement(0, status); + for(int32_t i = 1; i <= numCodePts; i++) { + bestSnlp.addElement(kuint32max, status); + } + + + // prev[i] is the index of the last CJK code point in the previous word in + // the best segmentation of the first i characters. + UVector32 prev(numCodePts + 1, status); + for(int32_t i = 0; i <= numCodePts; i++){ + prev.addElement(-1, status); + } + + const int32_t maxWordSize = 20; + UVector32 values(numCodePts, status); + values.setSize(numCodePts); + UVector32 lengths(numCodePts, status); + lengths.setSize(numCodePts); + + UText fu = UTEXT_INITIALIZER; + utext_openUnicodeString(&fu, &inString, &status); + + // Dynamic programming to find the best segmentation. + + // In outer loop, i is the code point index, + // ix is the corresponding string (code unit) index. + // They differ when the string contains supplementary characters. + int32_t ix = 0; + bool is_prev_katakana = false; + for (int32_t i = 0; i < numCodePts; ++i, ix = inString.moveIndex32(ix, 1)) { + if ((uint32_t)bestSnlp.elementAti(i) == kuint32max) { + continue; + } + + int32_t count; + utext_setNativeIndex(&fu, ix); + count = fDictionary->matches(&fu, maxWordSize, numCodePts, + NULL, lengths.getBuffer(), values.getBuffer(), NULL); + // Note: lengths is filled with code point lengths + // The NULL parameter is the ignored code unit lengths. + + // if there are no single character matches found in the dictionary + // starting with this character, treat character as a 1-character word + // with the highest value possible, i.e. the least likely to occur. + // Exclude Korean characters from this treatment, as they should be left + // together by default. + if ((count == 0 || lengths.elementAti(0) != 1) && + !fHangulWordSet.contains(inString.char32At(ix))) { + values.setElementAt(maxSnlp, count); // 255 + lengths.setElementAt(1, count++); + } + + for (int32_t j = 0; j < count; j++) { + uint32_t newSnlp = (uint32_t)bestSnlp.elementAti(i) + (uint32_t)values.elementAti(j); + int32_t ln_j_i = lengths.elementAti(j) + i; + if (newSnlp < (uint32_t)bestSnlp.elementAti(ln_j_i)) { + bestSnlp.setElementAt(newSnlp, ln_j_i); + prev.setElementAt(i, ln_j_i); + } + } + + // In Japanese, + // Katakana word in single character is pretty rare. So we apply + // the following heuristic to Katakana: any continuous run of Katakana + // characters is considered a candidate word with a default cost + // specified in the katakanaCost table according to its length. + + bool is_katakana = isKatakana(inString.char32At(ix)); + int32_t katakanaRunLength = 1; + if (!is_prev_katakana && is_katakana) { + int32_t j = inString.moveIndex32(ix, 1); + // Find the end of the continuous run of Katakana characters + while (j < inString.length() && katakanaRunLength < kMaxKatakanaGroupLength && + isKatakana(inString.char32At(j))) { + j = inString.moveIndex32(j, 1); + katakanaRunLength++; + } + if (katakanaRunLength < kMaxKatakanaGroupLength) { + uint32_t newSnlp = bestSnlp.elementAti(i) + getKatakanaCost(katakanaRunLength); + if (newSnlp < (uint32_t)bestSnlp.elementAti(i+katakanaRunLength)) { + bestSnlp.setElementAt(newSnlp, i+katakanaRunLength); + prev.setElementAt(i, i+katakanaRunLength); // prev[j] = i; + } + } + } + is_prev_katakana = is_katakana; + } + utext_close(&fu); + + // Start pushing the optimal offset index into t_boundary (t for tentative). + // prev[numCodePts] is guaranteed to be meaningful. + // We'll first push in the reverse order, i.e., + // t_boundary[0] = numCodePts, and afterwards do a swap. + UVector32 t_boundary(numCodePts+1, status); + + int32_t numBreaks = 0; + // No segmentation found, set boundary to end of range + if ((uint32_t)bestSnlp.elementAti(numCodePts) == kuint32max) { + t_boundary.addElement(numCodePts, status); + numBreaks++; + } else { + for (int32_t i = numCodePts; i > 0; i = prev.elementAti(i)) { + t_boundary.addElement(i, status); + numBreaks++; + } + U_ASSERT(prev.elementAti(t_boundary.elementAti(numBreaks - 1)) == 0); + } + + // Add a break for the start of the dictionary range if there is not one + // there already. + if (foundBreaks.size() == 0 || foundBreaks.peeki() < rangeStart) { + t_boundary.addElement(0, status); + numBreaks++; + } + + // Now that we're done, convert positions in t_boundary[] (indices in + // the normalized input string) back to indices in the original input UText + // while reversing t_boundary and pushing values to foundBreaks. + int32_t prevCPPos = -1; + int32_t prevUTextPos = -1; + for (int32_t i = numBreaks-1; i >= 0; i--) { + int32_t cpPos = t_boundary.elementAti(i); + U_ASSERT(cpPos > prevCPPos); + int32_t utextPos = inputMap.isValid() ? inputMap->elementAti(cpPos) : cpPos + rangeStart; + U_ASSERT(utextPos >= prevUTextPos); + if (utextPos > prevUTextPos) { + // Boundaries are added to foundBreaks output in ascending order. + U_ASSERT(foundBreaks.size() == 0 || foundBreaks.peeki() < utextPos); + foundBreaks.push(utextPos, status); + } else { + // Normalization expanded the input text, the dictionary found a boundary + // within the expansion, giving two boundaries with the same index in the + // original text. Ignore the second. See ticket #12918. + --numBreaks; + } + prevCPPos = cpPos; + prevUTextPos = utextPos; + } + (void)prevCPPos; // suppress compiler warnings about unused variable + + // inString goes out of scope + // inputMap goes out of scope + return numBreaks; +} +#endif + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ + diff --git a/thirdparty/icu4c/common/dictbe.h b/thirdparty/icu4c/common/dictbe.h new file mode 100644 index 0000000000..4ea676fc71 --- /dev/null +++ b/thirdparty/icu4c/common/dictbe.h @@ -0,0 +1,402 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/** + ******************************************************************************* + * Copyright (C) 2006-2014, International Business Machines Corporation * + * and others. All Rights Reserved. * + ******************************************************************************* + */ + +#ifndef DICTBE_H +#define DICTBE_H + +#include "unicode/utypes.h" +#include "unicode/uniset.h" +#include "unicode/utext.h" + +#include "brkeng.h" +#include "uvectr32.h" + +U_NAMESPACE_BEGIN + +class DictionaryMatcher; +class Normalizer2; + +/******************************************************************* + * DictionaryBreakEngine + */ + +/** + * <p>DictionaryBreakEngine is a kind of LanguageBreakEngine that uses a + * dictionary to determine language-specific breaks.</p> + * + * <p>After it is constructed a DictionaryBreakEngine may be shared between + * threads without synchronization.</p> + */ +class DictionaryBreakEngine : public LanguageBreakEngine { + private: + /** + * The set of characters handled by this engine + * @internal + */ + + UnicodeSet fSet; + + public: + + /** + * <p>Constructor </p> + */ + DictionaryBreakEngine(); + + /** + * <p>Virtual destructor.</p> + */ + virtual ~DictionaryBreakEngine(); + + /** + * <p>Indicate whether this engine handles a particular character for + * a particular kind of break.</p> + * + * @param c A character which begins a run that the engine might handle + * @return true if this engine handles the particular character and break + * type. + */ + virtual UBool handles(UChar32 c) const; + + /** + * <p>Find any breaks within a run in the supplied text.</p> + * + * @param text A UText representing the text. The iterator is left at + * the end of the run of characters which the engine is capable of handling + * that starts from the first character in the range. + * @param startPos The start of the run within the supplied text. + * @param endPos The end of the run within the supplied text. + * @param foundBreaks vector of int32_t to receive the break positions + * @return The number of breaks found. + */ + virtual int32_t findBreaks( UText *text, + int32_t startPos, + int32_t endPos, + UVector32 &foundBreaks ) const; + + protected: + + /** + * <p>Set the character set handled by this engine.</p> + * + * @param set A UnicodeSet of the set of characters handled by the engine + */ + virtual void setCharacters( const UnicodeSet &set ); + + /** + * <p>Divide up a range of known dictionary characters handled by this break engine.</p> + * + * @param text A UText representing the text + * @param rangeStart The start of the range of dictionary characters + * @param rangeEnd The end of the range of dictionary characters + * @param foundBreaks Output of C array of int32_t break positions, or 0 + * @return The number of breaks found + */ + virtual int32_t divideUpDictionaryRange( UText *text, + int32_t rangeStart, + int32_t rangeEnd, + UVector32 &foundBreaks ) const = 0; + +}; + +/******************************************************************* + * ThaiBreakEngine + */ + +/** + * <p>ThaiBreakEngine is a kind of DictionaryBreakEngine that uses a + * dictionary and heuristics to determine Thai-specific breaks.</p> + * + * <p>After it is constructed a ThaiBreakEngine may be shared between + * threads without synchronization.</p> + */ +class ThaiBreakEngine : public DictionaryBreakEngine { + private: + /** + * The set of characters handled by this engine + * @internal + */ + + UnicodeSet fThaiWordSet; + UnicodeSet fEndWordSet; + UnicodeSet fBeginWordSet; + UnicodeSet fSuffixSet; + UnicodeSet fMarkSet; + DictionaryMatcher *fDictionary; + + public: + + /** + * <p>Default constructor.</p> + * + * @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the + * engine is deleted. + */ + ThaiBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status); + + /** + * <p>Virtual destructor.</p> + */ + virtual ~ThaiBreakEngine(); + + protected: + /** + * <p>Divide up a range of known dictionary characters handled by this break engine.</p> + * + * @param text A UText representing the text + * @param rangeStart The start of the range of dictionary characters + * @param rangeEnd The end of the range of dictionary characters + * @param foundBreaks Output of C array of int32_t break positions, or 0 + * @return The number of breaks found + */ + virtual int32_t divideUpDictionaryRange( UText *text, + int32_t rangeStart, + int32_t rangeEnd, + UVector32 &foundBreaks ) const; + +}; + +/******************************************************************* + * LaoBreakEngine + */ + +/** + * <p>LaoBreakEngine is a kind of DictionaryBreakEngine that uses a + * dictionary and heuristics to determine Lao-specific breaks.</p> + * + * <p>After it is constructed a LaoBreakEngine may be shared between + * threads without synchronization.</p> + */ +class LaoBreakEngine : public DictionaryBreakEngine { + private: + /** + * The set of characters handled by this engine + * @internal + */ + + UnicodeSet fLaoWordSet; + UnicodeSet fEndWordSet; + UnicodeSet fBeginWordSet; + UnicodeSet fMarkSet; + DictionaryMatcher *fDictionary; + + public: + + /** + * <p>Default constructor.</p> + * + * @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the + * engine is deleted. + */ + LaoBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status); + + /** + * <p>Virtual destructor.</p> + */ + virtual ~LaoBreakEngine(); + + protected: + /** + * <p>Divide up a range of known dictionary characters handled by this break engine.</p> + * + * @param text A UText representing the text + * @param rangeStart The start of the range of dictionary characters + * @param rangeEnd The end of the range of dictionary characters + * @param foundBreaks Output of C array of int32_t break positions, or 0 + * @return The number of breaks found + */ + virtual int32_t divideUpDictionaryRange( UText *text, + int32_t rangeStart, + int32_t rangeEnd, + UVector32 &foundBreaks ) const; + +}; + +/******************************************************************* + * BurmeseBreakEngine + */ + +/** + * <p>BurmeseBreakEngine is a kind of DictionaryBreakEngine that uses a + * DictionaryMatcher and heuristics to determine Burmese-specific breaks.</p> + * + * <p>After it is constructed a BurmeseBreakEngine may be shared between + * threads without synchronization.</p> + */ +class BurmeseBreakEngine : public DictionaryBreakEngine { + private: + /** + * The set of characters handled by this engine + * @internal + */ + + UnicodeSet fBurmeseWordSet; + UnicodeSet fEndWordSet; + UnicodeSet fBeginWordSet; + UnicodeSet fMarkSet; + DictionaryMatcher *fDictionary; + + public: + + /** + * <p>Default constructor.</p> + * + * @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the + * engine is deleted. + */ + BurmeseBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status); + + /** + * <p>Virtual destructor.</p> + */ + virtual ~BurmeseBreakEngine(); + + protected: + /** + * <p>Divide up a range of known dictionary characters.</p> + * + * @param text A UText representing the text + * @param rangeStart The start of the range of dictionary characters + * @param rangeEnd The end of the range of dictionary characters + * @param foundBreaks Output of C array of int32_t break positions, or 0 + * @return The number of breaks found + */ + virtual int32_t divideUpDictionaryRange( UText *text, + int32_t rangeStart, + int32_t rangeEnd, + UVector32 &foundBreaks ) const; + +}; + +/******************************************************************* + * KhmerBreakEngine + */ + +/** + * <p>KhmerBreakEngine is a kind of DictionaryBreakEngine that uses a + * DictionaryMatcher and heuristics to determine Khmer-specific breaks.</p> + * + * <p>After it is constructed a KhmerBreakEngine may be shared between + * threads without synchronization.</p> + */ +class KhmerBreakEngine : public DictionaryBreakEngine { + private: + /** + * The set of characters handled by this engine + * @internal + */ + + UnicodeSet fKhmerWordSet; + UnicodeSet fEndWordSet; + UnicodeSet fBeginWordSet; + UnicodeSet fMarkSet; + DictionaryMatcher *fDictionary; + + public: + + /** + * <p>Default constructor.</p> + * + * @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the + * engine is deleted. + */ + KhmerBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status); + + /** + * <p>Virtual destructor.</p> + */ + virtual ~KhmerBreakEngine(); + + protected: + /** + * <p>Divide up a range of known dictionary characters.</p> + * + * @param text A UText representing the text + * @param rangeStart The start of the range of dictionary characters + * @param rangeEnd The end of the range of dictionary characters + * @param foundBreaks Output of C array of int32_t break positions, or 0 + * @return The number of breaks found + */ + virtual int32_t divideUpDictionaryRange( UText *text, + int32_t rangeStart, + int32_t rangeEnd, + UVector32 &foundBreaks ) const; + +}; + +#if !UCONFIG_NO_NORMALIZATION + +/******************************************************************* + * CjkBreakEngine + */ + +//indicates language/script that the CjkBreakEngine will handle +enum LanguageType { + kKorean, + kChineseJapanese +}; + +/** + * <p>CjkBreakEngine is a kind of DictionaryBreakEngine that uses a + * dictionary with costs associated with each word and + * Viterbi decoding to determine CJK-specific breaks.</p> + */ +class CjkBreakEngine : public DictionaryBreakEngine { + protected: + /** + * The set of characters handled by this engine + * @internal + */ + UnicodeSet fHangulWordSet; + UnicodeSet fHanWordSet; + UnicodeSet fKatakanaWordSet; + UnicodeSet fHiraganaWordSet; + + DictionaryMatcher *fDictionary; + const Normalizer2 *nfkcNorm2; + + public: + + /** + * <p>Default constructor.</p> + * + * @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the + * engine is deleted. The DictionaryMatcher must contain costs for each word + * in order for the dictionary to work properly. + */ + CjkBreakEngine(DictionaryMatcher *adoptDictionary, LanguageType type, UErrorCode &status); + + /** + * <p>Virtual destructor.</p> + */ + virtual ~CjkBreakEngine(); + + protected: + /** + * <p>Divide up a range of known dictionary characters handled by this break engine.</p> + * + * @param text A UText representing the text + * @param rangeStart The start of the range of dictionary characters + * @param rangeEnd The end of the range of dictionary characters + * @param foundBreaks Output of C array of int32_t break positions, or 0 + * @return The number of breaks found + */ + virtual int32_t divideUpDictionaryRange( UText *text, + int32_t rangeStart, + int32_t rangeEnd, + UVector32 &foundBreaks ) const; + +}; + +#endif + +U_NAMESPACE_END + + /* DICTBE_H */ +#endif diff --git a/thirdparty/icu4c/common/dictionarydata.cpp b/thirdparty/icu4c/common/dictionarydata.cpp new file mode 100644 index 0000000000..6e2dbee5b6 --- /dev/null +++ b/thirdparty/icu4c/common/dictionarydata.cpp @@ -0,0 +1,242 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2014-2016, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* dictionarydata.h +* +* created on: 2012may31 +* created by: Markus W. Scherer & Maxime Serrano +*/ + +#include "dictionarydata.h" +#include "unicode/ucharstrie.h" +#include "unicode/bytestrie.h" +#include "unicode/udata.h" +#include "cmemory.h" + +#if !UCONFIG_NO_BREAK_ITERATION + +U_NAMESPACE_BEGIN + +const int32_t DictionaryData::TRIE_TYPE_BYTES = 0; +const int32_t DictionaryData::TRIE_TYPE_UCHARS = 1; +const int32_t DictionaryData::TRIE_TYPE_MASK = 7; +const int32_t DictionaryData::TRIE_HAS_VALUES = 8; + +const int32_t DictionaryData::TRANSFORM_NONE = 0; +const int32_t DictionaryData::TRANSFORM_TYPE_OFFSET = 0x1000000; +const int32_t DictionaryData::TRANSFORM_TYPE_MASK = 0x7f000000; +const int32_t DictionaryData::TRANSFORM_OFFSET_MASK = 0x1fffff; + +DictionaryMatcher::~DictionaryMatcher() { +} + +UCharsDictionaryMatcher::~UCharsDictionaryMatcher() { + udata_close(file); +} + +int32_t UCharsDictionaryMatcher::getType() const { + return DictionaryData::TRIE_TYPE_UCHARS; +} + +int32_t UCharsDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t limit, + int32_t *lengths, int32_t *cpLengths, int32_t *values, + int32_t *prefix) const { + + UCharsTrie uct(characters); + int32_t startingTextIndex = (int32_t)utext_getNativeIndex(text); + int32_t wordCount = 0; + int32_t codePointsMatched = 0; + + for (UChar32 c = utext_next32(text); c >= 0; c=utext_next32(text)) { + UStringTrieResult result = (codePointsMatched == 0) ? uct.first(c) : uct.next(c); + int32_t lengthMatched = (int32_t)utext_getNativeIndex(text) - startingTextIndex; + codePointsMatched += 1; + if (USTRINGTRIE_HAS_VALUE(result)) { + if (wordCount < limit) { + if (values != NULL) { + values[wordCount] = uct.getValue(); + } + if (lengths != NULL) { + lengths[wordCount] = lengthMatched; + } + if (cpLengths != NULL) { + cpLengths[wordCount] = codePointsMatched; + } + ++wordCount; + } + if (result == USTRINGTRIE_FINAL_VALUE) { + break; + } + } + else if (result == USTRINGTRIE_NO_MATCH) { + break; + } + if (lengthMatched >= maxLength) { + break; + } + } + + if (prefix != NULL) { + *prefix = codePointsMatched; + } + return wordCount; +} + +BytesDictionaryMatcher::~BytesDictionaryMatcher() { + udata_close(file); +} + +UChar32 BytesDictionaryMatcher::transform(UChar32 c) const { + if ((transformConstant & DictionaryData::TRANSFORM_TYPE_MASK) == DictionaryData::TRANSFORM_TYPE_OFFSET) { + if (c == 0x200D) { + return 0xFF; + } else if (c == 0x200C) { + return 0xFE; + } + int32_t delta = c - (transformConstant & DictionaryData::TRANSFORM_OFFSET_MASK); + if (delta < 0 || 0xFD < delta) { + return U_SENTINEL; + } + return (UChar32)delta; + } + return c; +} + +int32_t BytesDictionaryMatcher::getType() const { + return DictionaryData::TRIE_TYPE_BYTES; +} + +int32_t BytesDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t limit, + int32_t *lengths, int32_t *cpLengths, int32_t *values, + int32_t *prefix) const { + BytesTrie bt(characters); + int32_t startingTextIndex = (int32_t)utext_getNativeIndex(text); + int32_t wordCount = 0; + int32_t codePointsMatched = 0; + + for (UChar32 c = utext_next32(text); c >= 0; c=utext_next32(text)) { + UStringTrieResult result = (codePointsMatched == 0) ? bt.first(transform(c)) : bt.next(transform(c)); + int32_t lengthMatched = (int32_t)utext_getNativeIndex(text) - startingTextIndex; + codePointsMatched += 1; + if (USTRINGTRIE_HAS_VALUE(result)) { + if (wordCount < limit) { + if (values != NULL) { + values[wordCount] = bt.getValue(); + } + if (lengths != NULL) { + lengths[wordCount] = lengthMatched; + } + if (cpLengths != NULL) { + cpLengths[wordCount] = codePointsMatched; + } + ++wordCount; + } + if (result == USTRINGTRIE_FINAL_VALUE) { + break; + } + } + else if (result == USTRINGTRIE_NO_MATCH) { + break; + } + if (lengthMatched >= maxLength) { + break; + } + } + + if (prefix != NULL) { + *prefix = codePointsMatched; + } + return wordCount; +} + + +U_NAMESPACE_END + +U_NAMESPACE_USE + +U_CAPI int32_t U_EXPORT2 +udict_swap(const UDataSwapper *ds, const void *inData, int32_t length, + void *outData, UErrorCode *pErrorCode) { + const UDataInfo *pInfo; + int32_t headerSize; + const uint8_t *inBytes; + uint8_t *outBytes; + const int32_t *inIndexes; + int32_t indexes[DictionaryData::IX_COUNT]; + int32_t i, offset, size; + + headerSize = udata_swapDataHeader(ds, inData, length, outData, pErrorCode); + if (pErrorCode == NULL || U_FAILURE(*pErrorCode)) return 0; + pInfo = (const UDataInfo *)((const char *)inData + 4); + if (!(pInfo->dataFormat[0] == 0x44 && + pInfo->dataFormat[1] == 0x69 && + pInfo->dataFormat[2] == 0x63 && + pInfo->dataFormat[3] == 0x74 && + pInfo->formatVersion[0] == 1)) { + udata_printError(ds, "udict_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as dictionary data\n", + pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3], pInfo->formatVersion[0]); + *pErrorCode = U_UNSUPPORTED_ERROR; + return 0; + } + + inBytes = (const uint8_t *)inData + headerSize; + outBytes = (uint8_t *)outData + headerSize; + + inIndexes = (const int32_t *)inBytes; + if (length >= 0) { + length -= headerSize; + if (length < (int32_t)(sizeof(indexes))) { + udata_printError(ds, "udict_swap(): too few bytes (%d after header) for dictionary data\n", length); + *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + } + + for (i = 0; i < DictionaryData::IX_COUNT; i++) { + indexes[i] = udata_readInt32(ds, inIndexes[i]); + } + + size = indexes[DictionaryData::IX_TOTAL_SIZE]; + + if (length >= 0) { + if (length < size) { + udata_printError(ds, "udict_swap(): too few bytes (%d after header) for all of dictionary data\n", length); + *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + + if (inBytes != outBytes) { + uprv_memcpy(outBytes, inBytes, size); + } + + offset = 0; + ds->swapArray32(ds, inBytes, sizeof(indexes), outBytes, pErrorCode); + offset = (int32_t)sizeof(indexes); + int32_t trieType = indexes[DictionaryData::IX_TRIE_TYPE] & DictionaryData::TRIE_TYPE_MASK; + int32_t nextOffset = indexes[DictionaryData::IX_RESERVED1_OFFSET]; + + if (trieType == DictionaryData::TRIE_TYPE_UCHARS) { + ds->swapArray16(ds, inBytes + offset, nextOffset - offset, outBytes + offset, pErrorCode); + } else if (trieType == DictionaryData::TRIE_TYPE_BYTES) { + // nothing to do + } else { + udata_printError(ds, "udict_swap(): unknown trie type!\n"); + *pErrorCode = U_UNSUPPORTED_ERROR; + return 0; + } + + // these next two sections are empty in the current format, + // but may be used later. + offset = nextOffset; + nextOffset = indexes[DictionaryData::IX_RESERVED2_OFFSET]; + offset = nextOffset; + nextOffset = indexes[DictionaryData::IX_TOTAL_SIZE]; + offset = nextOffset; + } + return headerSize + size; +} +#endif diff --git a/thirdparty/icu4c/common/dictionarydata.h b/thirdparty/icu4c/common/dictionarydata.h new file mode 100644 index 0000000000..0d303d9a8d --- /dev/null +++ b/thirdparty/icu4c/common/dictionarydata.h @@ -0,0 +1,191 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2014, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* dictionarydata.h +* +* created on: 2012may31 +* created by: Markus W. Scherer & Maxime Serrano +*/ + +#ifndef __DICTIONARYDATA_H__ +#define __DICTIONARYDATA_H__ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_BREAK_ITERATION + +#include "unicode/utext.h" +#include "unicode/udata.h" +#include "udataswp.h" +#include "unicode/uobject.h" +#include "unicode/ustringtrie.h" + +U_NAMESPACE_BEGIN + +class UCharsTrie; +class BytesTrie; + +class U_COMMON_API DictionaryData : public UMemory { +public: + static const int32_t TRIE_TYPE_BYTES; // = 0; + static const int32_t TRIE_TYPE_UCHARS; // = 1; + static const int32_t TRIE_TYPE_MASK; // = 7; + static const int32_t TRIE_HAS_VALUES; // = 8; + + static const int32_t TRANSFORM_NONE; // = 0; + static const int32_t TRANSFORM_TYPE_OFFSET; // = 0x1000000; + static const int32_t TRANSFORM_TYPE_MASK; // = 0x7f000000; + static const int32_t TRANSFORM_OFFSET_MASK; // = 0x1fffff; + + enum { + // Byte offsets from the start of the data, after the generic header. + IX_STRING_TRIE_OFFSET, + IX_RESERVED1_OFFSET, + IX_RESERVED2_OFFSET, + IX_TOTAL_SIZE, + + // Trie type: TRIE_HAS_VALUES | TRIE_TYPE_BYTES etc. + IX_TRIE_TYPE, + // Transform specification: TRANSFORM_TYPE_OFFSET | 0xe00 etc. + IX_TRANSFORM, + + IX_RESERVED6, + IX_RESERVED7, + IX_COUNT + }; +}; + +/** + * Wrapper class around generic dictionaries, implementing matches(). + * getType() should return a TRIE_TYPE_??? constant from DictionaryData. + * + * All implementations of this interface must be thread-safe if they are to be used inside of the + * dictionary-based break iteration code. + */ +class U_COMMON_API DictionaryMatcher : public UMemory { +public: + DictionaryMatcher() {} + virtual ~DictionaryMatcher(); + // this should emulate CompactTrieDictionary::matches() + /* @param text The text in which to look for matching words. Matching begins + * at the current position of the UText. + * @param maxLength The max length of match to consider. Units are the native indexing + * units of the UText. + * @param limit Capacity of output arrays, which is also the maximum number of + * matching words to be found. + * @param lengths output array, filled with the lengths of the matches, in order, + * from shortest to longest. Lengths are in native indexing units + * of the UText. May be NULL. + * @param cpLengths output array, filled with the lengths of the matches, in order, + * from shortest to longest. Lengths are the number of Unicode code points. + * May be NULL. + * @param values Output array, filled with the values associated with the words found. + * May be NULL. + * @param prefix Output parameter, the code point length of the prefix match, even if that + * prefix didn't lead to a complete word. Will always be >= the cpLength + * of the longest complete word matched. May be NULL. + * @return Number of matching words found. + */ + virtual int32_t matches(UText *text, int32_t maxLength, int32_t limit, + int32_t *lengths, int32_t *cpLengths, int32_t *values, + int32_t *prefix) const = 0; + + /** @return DictionaryData::TRIE_TYPE_XYZ */ + virtual int32_t getType() const = 0; +}; + +// Implementation of the DictionaryMatcher interface for a UCharsTrie dictionary +class U_COMMON_API UCharsDictionaryMatcher : public DictionaryMatcher { +public: + // constructs a new UCharsDictionaryMatcher. + // The UDataMemory * will be closed on this object's destruction. + UCharsDictionaryMatcher(const UChar *c, UDataMemory *f) : characters(c), file(f) { } + virtual ~UCharsDictionaryMatcher(); + virtual int32_t matches(UText *text, int32_t maxLength, int32_t limit, + int32_t *lengths, int32_t *cpLengths, int32_t *values, + int32_t *prefix) const; + virtual int32_t getType() const; +private: + const UChar *characters; + UDataMemory *file; +}; + +// Implementation of the DictionaryMatcher interface for a BytesTrie dictionary +class U_COMMON_API BytesDictionaryMatcher : public DictionaryMatcher { +public: + // constructs a new BytesTrieDictionaryMatcher + // the transform constant should be the constant read from the file, not a masked version! + // the UDataMemory * fed in here will be closed on this object's destruction + BytesDictionaryMatcher(const char *c, int32_t t, UDataMemory *f) + : characters(c), transformConstant(t), file(f) { } + virtual ~BytesDictionaryMatcher(); + virtual int32_t matches(UText *text, int32_t maxLength, int32_t limit, + int32_t *lengths, int32_t *cpLengths, int32_t *values, + int32_t *prefix) const; + virtual int32_t getType() const; +private: + UChar32 transform(UChar32 c) const; + + const char *characters; + int32_t transformConstant; + UDataMemory *file; +}; + +U_NAMESPACE_END + +U_CAPI int32_t U_EXPORT2 +udict_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, UErrorCode *pErrorCode); + +/** + * Format of dictionary .dict data files. + * Format version 1.0. + * + * A dictionary .dict data file contains a byte-serialized BytesTrie or + * a UChars-serialized UCharsTrie. + * Such files are used in dictionary-based break iteration (DBBI). + * + * For a BytesTrie, a transformation type is specified for + * transforming Unicode strings into byte sequences. + * + * A .dict file begins with a standard ICU data file header + * (DataHeader, see ucmndata.h and unicode/udata.h). + * The UDataInfo.dataVersion field is currently unused (set to 0.0.0.0). + * + * After the header, the file contains the following parts. + * Constants are defined in the DictionaryData class. + * + * For the data structure of BytesTrie & UCharsTrie see + * http://site.icu-project.org/design/struct/tries + * and the bytestrie.h and ucharstrie.h header files. + * + * int32_t indexes[indexesLength]; -- indexesLength=indexes[IX_STRING_TRIE_OFFSET]/4; + * + * The first four indexes are byte offsets in ascending order. + * Each byte offset marks the start of the next part in the data file, + * and the end of the previous one. + * When two consecutive byte offsets are the same, then the corresponding part is empty. + * Byte offsets are offsets from after the header, + * that is, from the beginning of the indexes[]. + * Each part starts at an offset with proper alignment for its data. + * If necessary, the previous part may include padding bytes to achieve this alignment. + * + * trieType=indexes[IX_TRIE_TYPE] defines the trie type. + * transform=indexes[IX_TRANSFORM] defines the Unicode-to-bytes transformation. + * If the transformation type is TRANSFORM_TYPE_OFFSET, + * then the lower 21 bits contain the offset code point. + * Each code point c is mapped to byte b = (c - offset). + * Code points outside the range offset..(offset+0xff) cannot be mapped + * and do not occur in the dictionary. + * + * stringTrie; -- a serialized BytesTrie or UCharsTrie + * + * The dictionary maps strings to specific values (TRIE_HAS_VALUES bit set in trieType), + * or it maps all strings to 0 (TRIE_HAS_VALUES bit not set). + */ + +#endif /* !UCONFIG_NO_BREAK_ITERATION */ +#endif /* __DICTIONARYDATA_H__ */ diff --git a/thirdparty/icu4c/common/dtintrv.cpp b/thirdparty/icu4c/common/dtintrv.cpp new file mode 100644 index 0000000000..80bb5d6dbd --- /dev/null +++ b/thirdparty/icu4c/common/dtintrv.cpp @@ -0,0 +1,63 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/******************************************************************************* +* Copyright (C) 2008, International Business Machines Corporation and +* others. All Rights Reserved. +******************************************************************************* +* +* File DTINTRV.CPP +* +******************************************************************************* +*/ + + + +#include "unicode/dtintrv.h" + + +U_NAMESPACE_BEGIN + +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(DateInterval) + +//DateInterval::DateInterval(){} + + +DateInterval::DateInterval(UDate from, UDate to) +: fromDate(from), + toDate(to) +{} + + +DateInterval::~DateInterval(){} + + +DateInterval::DateInterval(const DateInterval& other) +: UObject(other) { + *this = other; +} + + +DateInterval& +DateInterval::operator=(const DateInterval& other) { + if ( this != &other ) { + fromDate = other.fromDate; + toDate = other.toDate; + } + return *this; +} + + +DateInterval* +DateInterval::clone() const { + return new DateInterval(*this); +} + + +UBool +DateInterval::operator==(const DateInterval& other) const { + return ( fromDate == other.fromDate && toDate == other.toDate ); +} + + +U_NAMESPACE_END + diff --git a/thirdparty/icu4c/common/edits.cpp b/thirdparty/icu4c/common/edits.cpp new file mode 100644 index 0000000000..95f0c19a72 --- /dev/null +++ b/thirdparty/icu4c/common/edits.cpp @@ -0,0 +1,803 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// edits.cpp +// created: 2017feb08 Markus W. Scherer + +#include "unicode/edits.h" +#include "unicode/unistr.h" +#include "unicode/utypes.h" +#include "cmemory.h" +#include "uassert.h" +#include "util.h" + +U_NAMESPACE_BEGIN + +namespace { + +// 0000uuuuuuuuuuuu records u+1 unchanged text units. +const int32_t MAX_UNCHANGED_LENGTH = 0x1000; +const int32_t MAX_UNCHANGED = MAX_UNCHANGED_LENGTH - 1; + +// 0mmmnnnccccccccc with m=1..6 records ccc+1 replacements of m:n text units. +const int32_t MAX_SHORT_CHANGE_OLD_LENGTH = 6; +const int32_t MAX_SHORT_CHANGE_NEW_LENGTH = 7; +const int32_t SHORT_CHANGE_NUM_MASK = 0x1ff; +const int32_t MAX_SHORT_CHANGE = 0x6fff; + +// 0111mmmmmmnnnnnn records a replacement of m text units with n. +// m or n = 61: actual length follows in the next edits array unit. +// m or n = 62..63: actual length follows in the next two edits array units. +// Bit 30 of the actual length is in the head unit. +// Trailing units have bit 15 set. +const int32_t LENGTH_IN_1TRAIL = 61; +const int32_t LENGTH_IN_2TRAIL = 62; + +} // namespace + +void Edits::releaseArray() U_NOEXCEPT { + if (array != stackArray) { + uprv_free(array); + } +} + +Edits &Edits::copyArray(const Edits &other) { + if (U_FAILURE(errorCode_)) { + length = delta = numChanges = 0; + return *this; + } + if (length > capacity) { + uint16_t *newArray = (uint16_t *)uprv_malloc((size_t)length * 2); + if (newArray == nullptr) { + length = delta = numChanges = 0; + errorCode_ = U_MEMORY_ALLOCATION_ERROR; + return *this; + } + releaseArray(); + array = newArray; + capacity = length; + } + if (length > 0) { + uprv_memcpy(array, other.array, (size_t)length * 2); + } + return *this; +} + +Edits &Edits::moveArray(Edits &src) U_NOEXCEPT { + if (U_FAILURE(errorCode_)) { + length = delta = numChanges = 0; + return *this; + } + releaseArray(); + if (length > STACK_CAPACITY) { + array = src.array; + capacity = src.capacity; + src.array = src.stackArray; + src.capacity = STACK_CAPACITY; + src.reset(); + return *this; + } + array = stackArray; + capacity = STACK_CAPACITY; + if (length > 0) { + uprv_memcpy(array, src.array, (size_t)length * 2); + } + return *this; +} + +Edits &Edits::operator=(const Edits &other) { + length = other.length; + delta = other.delta; + numChanges = other.numChanges; + errorCode_ = other.errorCode_; + return copyArray(other); +} + +Edits &Edits::operator=(Edits &&src) U_NOEXCEPT { + length = src.length; + delta = src.delta; + numChanges = src.numChanges; + errorCode_ = src.errorCode_; + return moveArray(src); +} + +Edits::~Edits() { + releaseArray(); +} + +void Edits::reset() U_NOEXCEPT { + length = delta = numChanges = 0; + errorCode_ = U_ZERO_ERROR; +} + +void Edits::addUnchanged(int32_t unchangedLength) { + if(U_FAILURE(errorCode_) || unchangedLength == 0) { return; } + if(unchangedLength < 0) { + errorCode_ = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + // Merge into previous unchanged-text record, if any. + int32_t last = lastUnit(); + if(last < MAX_UNCHANGED) { + int32_t remaining = MAX_UNCHANGED - last; + if (remaining >= unchangedLength) { + setLastUnit(last + unchangedLength); + return; + } + setLastUnit(MAX_UNCHANGED); + unchangedLength -= remaining; + } + // Split large lengths into multiple units. + while(unchangedLength >= MAX_UNCHANGED_LENGTH) { + append(MAX_UNCHANGED); + unchangedLength -= MAX_UNCHANGED_LENGTH; + } + // Write a small (remaining) length. + if(unchangedLength > 0) { + append(unchangedLength - 1); + } +} + +void Edits::addReplace(int32_t oldLength, int32_t newLength) { + if(U_FAILURE(errorCode_)) { return; } + if(oldLength < 0 || newLength < 0) { + errorCode_ = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + if (oldLength == 0 && newLength == 0) { + return; + } + ++numChanges; + int32_t newDelta = newLength - oldLength; + if (newDelta != 0) { + if ((newDelta > 0 && delta >= 0 && newDelta > (INT32_MAX - delta)) || + (newDelta < 0 && delta < 0 && newDelta < (INT32_MIN - delta))) { + // Integer overflow or underflow. + errorCode_ = U_INDEX_OUTOFBOUNDS_ERROR; + return; + } + delta += newDelta; + } + + if(0 < oldLength && oldLength <= MAX_SHORT_CHANGE_OLD_LENGTH && + newLength <= MAX_SHORT_CHANGE_NEW_LENGTH) { + // Merge into previous same-lengths short-replacement record, if any. + int32_t u = (oldLength << 12) | (newLength << 9); + int32_t last = lastUnit(); + if(MAX_UNCHANGED < last && last < MAX_SHORT_CHANGE && + (last & ~SHORT_CHANGE_NUM_MASK) == u && + (last & SHORT_CHANGE_NUM_MASK) < SHORT_CHANGE_NUM_MASK) { + setLastUnit(last + 1); + return; + } + append(u); + return; + } + + int32_t head = 0x7000; + if (oldLength < LENGTH_IN_1TRAIL && newLength < LENGTH_IN_1TRAIL) { + head |= oldLength << 6; + head |= newLength; + append(head); + } else if ((capacity - length) >= 5 || growArray()) { + int32_t limit = length + 1; + if(oldLength < LENGTH_IN_1TRAIL) { + head |= oldLength << 6; + } else if(oldLength <= 0x7fff) { + head |= LENGTH_IN_1TRAIL << 6; + array[limit++] = (uint16_t)(0x8000 | oldLength); + } else { + head |= (LENGTH_IN_2TRAIL + (oldLength >> 30)) << 6; + array[limit++] = (uint16_t)(0x8000 | (oldLength >> 15)); + array[limit++] = (uint16_t)(0x8000 | oldLength); + } + if(newLength < LENGTH_IN_1TRAIL) { + head |= newLength; + } else if(newLength <= 0x7fff) { + head |= LENGTH_IN_1TRAIL; + array[limit++] = (uint16_t)(0x8000 | newLength); + } else { + head |= LENGTH_IN_2TRAIL + (newLength >> 30); + array[limit++] = (uint16_t)(0x8000 | (newLength >> 15)); + array[limit++] = (uint16_t)(0x8000 | newLength); + } + array[length] = (uint16_t)head; + length = limit; + } +} + +void Edits::append(int32_t r) { + if(length < capacity || growArray()) { + array[length++] = (uint16_t)r; + } +} + +UBool Edits::growArray() { + int32_t newCapacity; + if (array == stackArray) { + newCapacity = 2000; + } else if (capacity == INT32_MAX) { + // Not U_BUFFER_OVERFLOW_ERROR because that could be confused on a string transform API + // with a result-string-buffer overflow. + errorCode_ = U_INDEX_OUTOFBOUNDS_ERROR; + return FALSE; + } else if (capacity >= (INT32_MAX / 2)) { + newCapacity = INT32_MAX; + } else { + newCapacity = 2 * capacity; + } + // Grow by at least 5 units so that a maximal change record will fit. + if ((newCapacity - capacity) < 5) { + errorCode_ = U_INDEX_OUTOFBOUNDS_ERROR; + return FALSE; + } + uint16_t *newArray = (uint16_t *)uprv_malloc((size_t)newCapacity * 2); + if (newArray == NULL) { + errorCode_ = U_MEMORY_ALLOCATION_ERROR; + return FALSE; + } + uprv_memcpy(newArray, array, (size_t)length * 2); + releaseArray(); + array = newArray; + capacity = newCapacity; + return TRUE; +} + +UBool Edits::copyErrorTo(UErrorCode &outErrorCode) const { + if (U_FAILURE(outErrorCode)) { return TRUE; } + if (U_SUCCESS(errorCode_)) { return FALSE; } + outErrorCode = errorCode_; + return TRUE; +} + +Edits &Edits::mergeAndAppend(const Edits &ab, const Edits &bc, UErrorCode &errorCode) { + if (copyErrorTo(errorCode)) { return *this; } + // Picture string a --(Edits ab)--> string b --(Edits bc)--> string c. + // Parallel iteration over both Edits. + Iterator abIter = ab.getFineIterator(); + Iterator bcIter = bc.getFineIterator(); + UBool abHasNext = TRUE, bcHasNext = TRUE; + // Copy iterator state into local variables, so that we can modify and subdivide spans. + // ab old & new length, bc old & new length + int32_t aLength = 0, ab_bLength = 0, bc_bLength = 0, cLength = 0; + // When we have different-intermediate-length changes, we accumulate a larger change. + int32_t pending_aLength = 0, pending_cLength = 0; + for (;;) { + // At this point, for each of the two iterators: + // Either we are done with the locally cached current edit, + // and its intermediate-string length has been reset, + // or we will continue to work with a truncated remainder of this edit. + // + // If the current edit is done, and the iterator has not yet reached the end, + // then we fetch the next edit. This is true for at least one of the iterators. + // + // Normally it does not matter whether we fetch from ab and then bc or vice versa. + // However, the result is observably different when + // ab deletions meet bc insertions at the same intermediate-string index. + // Some users expect the bc insertions to come first, so we fetch from bc first. + if (bc_bLength == 0) { + if (bcHasNext && (bcHasNext = bcIter.next(errorCode)) != 0) { + bc_bLength = bcIter.oldLength(); + cLength = bcIter.newLength(); + if (bc_bLength == 0) { + // insertion + if (ab_bLength == 0 || !abIter.hasChange()) { + addReplace(pending_aLength, pending_cLength + cLength); + pending_aLength = pending_cLength = 0; + } else { + pending_cLength += cLength; + } + continue; + } + } + // else see if the other iterator is done, too. + } + if (ab_bLength == 0) { + if (abHasNext && (abHasNext = abIter.next(errorCode)) != 0) { + aLength = abIter.oldLength(); + ab_bLength = abIter.newLength(); + if (ab_bLength == 0) { + // deletion + if (bc_bLength == bcIter.oldLength() || !bcIter.hasChange()) { + addReplace(pending_aLength + aLength, pending_cLength); + pending_aLength = pending_cLength = 0; + } else { + pending_aLength += aLength; + } + continue; + } + } else if (bc_bLength == 0) { + // Both iterators are done at the same time: + // The intermediate-string lengths match. + break; + } else { + // The ab output string is shorter than the bc input string. + if (!copyErrorTo(errorCode)) { + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + } + return *this; + } + } + if (bc_bLength == 0) { + // The bc input string is shorter than the ab output string. + if (!copyErrorTo(errorCode)) { + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + } + return *this; + } + // Done fetching: ab_bLength > 0 && bc_bLength > 0 + + // The current state has two parts: + // - Past: We accumulate a longer ac edit in the "pending" variables. + // - Current: We have copies of the current ab/bc edits in local variables. + // At least one side is newly fetched. + // One side might be a truncated remainder of an edit we fetched earlier. + + if (!abIter.hasChange() && !bcIter.hasChange()) { + // An unchanged span all the way from string a to string c. + if (pending_aLength != 0 || pending_cLength != 0) { + addReplace(pending_aLength, pending_cLength); + pending_aLength = pending_cLength = 0; + } + int32_t unchangedLength = aLength <= cLength ? aLength : cLength; + addUnchanged(unchangedLength); + ab_bLength = aLength -= unchangedLength; + bc_bLength = cLength -= unchangedLength; + // At least one of the unchanged spans is now empty. + continue; + } + if (!abIter.hasChange() && bcIter.hasChange()) { + // Unchanged a->b but changed b->c. + if (ab_bLength >= bc_bLength) { + // Split the longer unchanged span into change + remainder. + addReplace(pending_aLength + bc_bLength, pending_cLength + cLength); + pending_aLength = pending_cLength = 0; + aLength = ab_bLength -= bc_bLength; + bc_bLength = 0; + continue; + } + // Handle the shorter unchanged span below like a change. + } else if (abIter.hasChange() && !bcIter.hasChange()) { + // Changed a->b and then unchanged b->c. + if (ab_bLength <= bc_bLength) { + // Split the longer unchanged span into change + remainder. + addReplace(pending_aLength + aLength, pending_cLength + ab_bLength); + pending_aLength = pending_cLength = 0; + cLength = bc_bLength -= ab_bLength; + ab_bLength = 0; + continue; + } + // Handle the shorter unchanged span below like a change. + } else { // both abIter.hasChange() && bcIter.hasChange() + if (ab_bLength == bc_bLength) { + // Changes on both sides up to the same position. Emit & reset. + addReplace(pending_aLength + aLength, pending_cLength + cLength); + pending_aLength = pending_cLength = 0; + ab_bLength = bc_bLength = 0; + continue; + } + } + // Accumulate the a->c change, reset the shorter side, + // keep a remainder of the longer one. + pending_aLength += aLength; + pending_cLength += cLength; + if (ab_bLength < bc_bLength) { + bc_bLength -= ab_bLength; + cLength = ab_bLength = 0; + } else { // ab_bLength > bc_bLength + ab_bLength -= bc_bLength; + aLength = bc_bLength = 0; + } + } + if (pending_aLength != 0 || pending_cLength != 0) { + addReplace(pending_aLength, pending_cLength); + } + copyErrorTo(errorCode); + return *this; +} + +Edits::Iterator::Iterator(const uint16_t *a, int32_t len, UBool oc, UBool crs) : + array(a), index(0), length(len), remaining(0), + onlyChanges_(oc), coarse(crs), + dir(0), changed(FALSE), oldLength_(0), newLength_(0), + srcIndex(0), replIndex(0), destIndex(0) {} + +int32_t Edits::Iterator::readLength(int32_t head) { + if (head < LENGTH_IN_1TRAIL) { + return head; + } else if (head < LENGTH_IN_2TRAIL) { + U_ASSERT(index < length); + U_ASSERT(array[index] >= 0x8000); + return array[index++] & 0x7fff; + } else { + U_ASSERT((index + 2) <= length); + U_ASSERT(array[index] >= 0x8000); + U_ASSERT(array[index + 1] >= 0x8000); + int32_t len = ((head & 1) << 30) | + ((int32_t)(array[index] & 0x7fff) << 15) | + (array[index + 1] & 0x7fff); + index += 2; + return len; + } +} + +void Edits::Iterator::updateNextIndexes() { + srcIndex += oldLength_; + if (changed) { + replIndex += newLength_; + } + destIndex += newLength_; +} + +void Edits::Iterator::updatePreviousIndexes() { + srcIndex -= oldLength_; + if (changed) { + replIndex -= newLength_; + } + destIndex -= newLength_; +} + +UBool Edits::Iterator::noNext() { + // No change before or beyond the string. + dir = 0; + changed = FALSE; + oldLength_ = newLength_ = 0; + return FALSE; +} + +UBool Edits::Iterator::next(UBool onlyChanges, UErrorCode &errorCode) { + // Forward iteration: Update the string indexes to the limit of the current span, + // and post-increment-read array units to assemble a new span. + // Leaves the array index one after the last unit of that span. + if (U_FAILURE(errorCode)) { return FALSE; } + // We have an errorCode in case we need to start guarding against integer overflows. + // It is also convenient for caller loops if we bail out when an error was set elsewhere. + if (dir > 0) { + updateNextIndexes(); + } else { + if (dir < 0) { + // Turn around from previous() to next(). + // Post-increment-read the same span again. + if (remaining > 0) { + // Fine-grained iterator: + // Stay on the current one of a sequence of compressed changes. + ++index; // next() rests on the index after the sequence unit. + dir = 1; + return TRUE; + } + } + dir = 1; + } + if (remaining >= 1) { + // Fine-grained iterator: Continue a sequence of compressed changes. + if (remaining > 1) { + --remaining; + return TRUE; + } + remaining = 0; + } + if (index >= length) { + return noNext(); + } + int32_t u = array[index++]; + if (u <= MAX_UNCHANGED) { + // Combine adjacent unchanged ranges. + changed = FALSE; + oldLength_ = u + 1; + while (index < length && (u = array[index]) <= MAX_UNCHANGED) { + ++index; + oldLength_ += u + 1; + } + newLength_ = oldLength_; + if (onlyChanges) { + updateNextIndexes(); + if (index >= length) { + return noNext(); + } + // already fetched u > MAX_UNCHANGED at index + ++index; + } else { + return TRUE; + } + } + changed = TRUE; + if (u <= MAX_SHORT_CHANGE) { + int32_t oldLen = u >> 12; + int32_t newLen = (u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH; + int32_t num = (u & SHORT_CHANGE_NUM_MASK) + 1; + if (coarse) { + oldLength_ = num * oldLen; + newLength_ = num * newLen; + } else { + // Split a sequence of changes that was compressed into one unit. + oldLength_ = oldLen; + newLength_ = newLen; + if (num > 1) { + remaining = num; // This is the first of two or more changes. + } + return TRUE; + } + } else { + U_ASSERT(u <= 0x7fff); + oldLength_ = readLength((u >> 6) & 0x3f); + newLength_ = readLength(u & 0x3f); + if (!coarse) { + return TRUE; + } + } + // Combine adjacent changes. + while (index < length && (u = array[index]) > MAX_UNCHANGED) { + ++index; + if (u <= MAX_SHORT_CHANGE) { + int32_t num = (u & SHORT_CHANGE_NUM_MASK) + 1; + oldLength_ += (u >> 12) * num; + newLength_ += ((u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH) * num; + } else { + U_ASSERT(u <= 0x7fff); + oldLength_ += readLength((u >> 6) & 0x3f); + newLength_ += readLength(u & 0x3f); + } + } + return TRUE; +} + +UBool Edits::Iterator::previous(UErrorCode &errorCode) { + // Backward iteration: Pre-decrement-read array units to assemble a new span, + // then update the string indexes to the start of that span. + // Leaves the array index on the head unit of that span. + if (U_FAILURE(errorCode)) { return FALSE; } + // We have an errorCode in case we need to start guarding against integer overflows. + // It is also convenient for caller loops if we bail out when an error was set elsewhere. + if (dir >= 0) { + if (dir > 0) { + // Turn around from next() to previous(). + // Set the string indexes to the span limit and + // pre-decrement-read the same span again. + if (remaining > 0) { + // Fine-grained iterator: + // Stay on the current one of a sequence of compressed changes. + --index; // previous() rests on the sequence unit. + dir = -1; + return TRUE; + } + updateNextIndexes(); + } + dir = -1; + } + if (remaining > 0) { + // Fine-grained iterator: Continue a sequence of compressed changes. + int32_t u = array[index]; + U_ASSERT(MAX_UNCHANGED < u && u <= MAX_SHORT_CHANGE); + if (remaining <= (u & SHORT_CHANGE_NUM_MASK)) { + ++remaining; + updatePreviousIndexes(); + return TRUE; + } + remaining = 0; + } + if (index <= 0) { + return noNext(); + } + int32_t u = array[--index]; + if (u <= MAX_UNCHANGED) { + // Combine adjacent unchanged ranges. + changed = FALSE; + oldLength_ = u + 1; + while (index > 0 && (u = array[index - 1]) <= MAX_UNCHANGED) { + --index; + oldLength_ += u + 1; + } + newLength_ = oldLength_; + // No need to handle onlyChanges as long as previous() is called only from findIndex(). + updatePreviousIndexes(); + return TRUE; + } + changed = TRUE; + if (u <= MAX_SHORT_CHANGE) { + int32_t oldLen = u >> 12; + int32_t newLen = (u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH; + int32_t num = (u & SHORT_CHANGE_NUM_MASK) + 1; + if (coarse) { + oldLength_ = num * oldLen; + newLength_ = num * newLen; + } else { + // Split a sequence of changes that was compressed into one unit. + oldLength_ = oldLen; + newLength_ = newLen; + if (num > 1) { + remaining = 1; // This is the last of two or more changes. + } + updatePreviousIndexes(); + return TRUE; + } + } else { + if (u <= 0x7fff) { + // The change is encoded in u alone. + oldLength_ = readLength((u >> 6) & 0x3f); + newLength_ = readLength(u & 0x3f); + } else { + // Back up to the head of the change, read the lengths, + // and reset the index to the head again. + U_ASSERT(index > 0); + while ((u = array[--index]) > 0x7fff) {} + U_ASSERT(u > MAX_SHORT_CHANGE); + int32_t headIndex = index++; + oldLength_ = readLength((u >> 6) & 0x3f); + newLength_ = readLength(u & 0x3f); + index = headIndex; + } + if (!coarse) { + updatePreviousIndexes(); + return TRUE; + } + } + // Combine adjacent changes. + while (index > 0 && (u = array[index - 1]) > MAX_UNCHANGED) { + --index; + if (u <= MAX_SHORT_CHANGE) { + int32_t num = (u & SHORT_CHANGE_NUM_MASK) + 1; + oldLength_ += (u >> 12) * num; + newLength_ += ((u >> 9) & MAX_SHORT_CHANGE_NEW_LENGTH) * num; + } else if (u <= 0x7fff) { + // Read the lengths, and reset the index to the head again. + int32_t headIndex = index++; + oldLength_ += readLength((u >> 6) & 0x3f); + newLength_ += readLength(u & 0x3f); + index = headIndex; + } + } + updatePreviousIndexes(); + return TRUE; +} + +int32_t Edits::Iterator::findIndex(int32_t i, UBool findSource, UErrorCode &errorCode) { + if (U_FAILURE(errorCode) || i < 0) { return -1; } + int32_t spanStart, spanLength; + if (findSource) { // find source index + spanStart = srcIndex; + spanLength = oldLength_; + } else { // find destination index + spanStart = destIndex; + spanLength = newLength_; + } + if (i < spanStart) { + if (i >= (spanStart / 2)) { + // Search backwards. + for (;;) { + UBool hasPrevious = previous(errorCode); + U_ASSERT(hasPrevious); // because i>=0 and the first span starts at 0 + (void)hasPrevious; // avoid unused-variable warning + spanStart = findSource ? srcIndex : destIndex; + if (i >= spanStart) { + // The index is in the current span. + return 0; + } + if (remaining > 0) { + // Is the index in one of the remaining compressed edits? + // spanStart is the start of the current span, first of the remaining ones. + spanLength = findSource ? oldLength_ : newLength_; + int32_t u = array[index]; + U_ASSERT(MAX_UNCHANGED < u && u <= MAX_SHORT_CHANGE); + int32_t num = (u & SHORT_CHANGE_NUM_MASK) + 1 - remaining; + int32_t len = num * spanLength; + if (i >= (spanStart - len)) { + int32_t n = ((spanStart - i - 1) / spanLength) + 1; + // 1 <= n <= num + srcIndex -= n * oldLength_; + replIndex -= n * newLength_; + destIndex -= n * newLength_; + remaining += n; + return 0; + } + // Skip all of these edits at once. + srcIndex -= num * oldLength_; + replIndex -= num * newLength_; + destIndex -= num * newLength_; + remaining = 0; + } + } + } + // Reset the iterator to the start. + dir = 0; + index = remaining = oldLength_ = newLength_ = srcIndex = replIndex = destIndex = 0; + } else if (i < (spanStart + spanLength)) { + // The index is in the current span. + return 0; + } + while (next(FALSE, errorCode)) { + if (findSource) { + spanStart = srcIndex; + spanLength = oldLength_; + } else { + spanStart = destIndex; + spanLength = newLength_; + } + if (i < (spanStart + spanLength)) { + // The index is in the current span. + return 0; + } + if (remaining > 1) { + // Is the index in one of the remaining compressed edits? + // spanStart is the start of the current span, first of the remaining ones. + int32_t len = remaining * spanLength; + if (i < (spanStart + len)) { + int32_t n = (i - spanStart) / spanLength; // 1 <= n <= remaining - 1 + srcIndex += n * oldLength_; + replIndex += n * newLength_; + destIndex += n * newLength_; + remaining -= n; + return 0; + } + // Make next() skip all of these edits at once. + oldLength_ *= remaining; + newLength_ *= remaining; + remaining = 0; + } + } + return 1; +} + +int32_t Edits::Iterator::destinationIndexFromSourceIndex(int32_t i, UErrorCode &errorCode) { + int32_t where = findIndex(i, TRUE, errorCode); + if (where < 0) { + // Error or before the string. + return 0; + } + if (where > 0 || i == srcIndex) { + // At or after string length, or at start of the found span. + return destIndex; + } + if (changed) { + // In a change span, map to its end. + return destIndex + newLength_; + } else { + // In an unchanged span, offset 1:1 within it. + return destIndex + (i - srcIndex); + } +} + +int32_t Edits::Iterator::sourceIndexFromDestinationIndex(int32_t i, UErrorCode &errorCode) { + int32_t where = findIndex(i, FALSE, errorCode); + if (where < 0) { + // Error or before the string. + return 0; + } + if (where > 0 || i == destIndex) { + // At or after string length, or at start of the found span. + return srcIndex; + } + if (changed) { + // In a change span, map to its end. + return srcIndex + oldLength_; + } else { + // In an unchanged span, offset within it. + return srcIndex + (i - destIndex); + } +} + +UnicodeString& Edits::Iterator::toString(UnicodeString& sb) const { + sb.append(u"{ src[", -1); + ICU_Utility::appendNumber(sb, srcIndex); + sb.append(u"..", -1); + ICU_Utility::appendNumber(sb, srcIndex + oldLength_); + if (changed) { + sb.append(u"] ⇝ dest[", -1); + } else { + sb.append(u"] ≡ dest[", -1); + } + ICU_Utility::appendNumber(sb, destIndex); + sb.append(u"..", -1); + ICU_Utility::appendNumber(sb, destIndex + newLength_); + if (changed) { + sb.append(u"], repl[", -1); + ICU_Utility::appendNumber(sb, replIndex); + sb.append(u"..", -1); + ICU_Utility::appendNumber(sb, replIndex + newLength_); + sb.append(u"] }", -1); + } else { + sb.append(u"] (no-change) }", -1); + } + return sb; +} + +U_NAMESPACE_END diff --git a/thirdparty/icu4c/common/errorcode.cpp b/thirdparty/icu4c/common/errorcode.cpp new file mode 100644 index 0000000000..e7ac43b527 --- /dev/null +++ b/thirdparty/icu4c/common/errorcode.cpp @@ -0,0 +1,42 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2009-2011, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: errorcode.cpp +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2009mar10 +* created by: Markus W. Scherer +*/ + +#include "unicode/utypes.h" +#include "unicode/errorcode.h" + +U_NAMESPACE_BEGIN + +ErrorCode::~ErrorCode() {} + +UErrorCode ErrorCode::reset() { + UErrorCode code = errorCode; + errorCode = U_ZERO_ERROR; + return code; +} + +void ErrorCode::assertSuccess() const { + if(isFailure()) { + handleFailure(); + } +} + +const char* ErrorCode::errorName() const { + return u_errorName(errorCode); +} + +U_NAMESPACE_END diff --git a/thirdparty/icu4c/common/filteredbrk.cpp b/thirdparty/icu4c/common/filteredbrk.cpp new file mode 100644 index 0000000000..c07128cbce --- /dev/null +++ b/thirdparty/icu4c/common/filteredbrk.cpp @@ -0,0 +1,710 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2014-2015, International Business Machines Corporation and +* others. All Rights Reserved. +******************************************************************************* +*/ + +#include "unicode/utypes.h" +#if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION + +#include "cmemory.h" + +#include "unicode/filteredbrk.h" +#include "unicode/ucharstriebuilder.h" +#include "unicode/ures.h" + +#include "uresimp.h" // ures_getByKeyWithFallback +#include "ubrkimpl.h" // U_ICUDATA_BRKITR +#include "uvector.h" +#include "cmemory.h" + +U_NAMESPACE_BEGIN + +#ifndef FB_DEBUG +#define FB_DEBUG 0 +#endif + +#if FB_DEBUG +#include <stdio.h> +static void _fb_trace(const char *m, const UnicodeString *s, UBool b, int32_t d, const char *f, int l) { + char buf[2048]; + if(s) { + s->extract(0,s->length(),buf,2048); + } else { + strcpy(buf,"NULL"); + } + fprintf(stderr,"%s:%d: %s. s='%s'(%p), b=%c, d=%d\n", + f, l, m, buf, (const void*)s, b?'T':'F',(int)d); +} + +#define FB_TRACE(m,s,b,d) _fb_trace(m,s,b,d,__FILE__,__LINE__) +#else +#define FB_TRACE(m,s,b,d) +#endif + +/** + * Used with sortedInsert() + */ +static int8_t U_CALLCONV compareUnicodeString(UElement t1, UElement t2) { + const UnicodeString &a = *(const UnicodeString*)t1.pointer; + const UnicodeString &b = *(const UnicodeString*)t2.pointer; + return a.compare(b); +} + +/** + * A UVector which implements a set of strings. + */ +class U_COMMON_API UStringSet : public UVector { + public: + UStringSet(UErrorCode &status) : UVector(uprv_deleteUObject, + uhash_compareUnicodeString, + 1, + status) {} + virtual ~UStringSet(); + /** + * Is this UnicodeSet contained? + */ + inline UBool contains(const UnicodeString& s) { + return contains((void*) &s); + } + using UVector::contains; + /** + * Return the ith UnicodeString alias + */ + inline const UnicodeString* getStringAt(int32_t i) const { + return (const UnicodeString*)elementAt(i); + } + /** + * Adopt the UnicodeString if not already contained. + * Caller no longer owns the pointer in any case. + * @return true if adopted successfully, false otherwise (error, or else duplicate) + */ + inline UBool adopt(UnicodeString *str, UErrorCode &status) { + if(U_FAILURE(status) || contains(*str)) { + delete str; + return false; + } else { + sortedInsert(str, compareUnicodeString, status); + if(U_FAILURE(status)) { + delete str; + return false; + } + return true; + } + } + /** + * Add by value. + * @return true if successfully adopted. + */ + inline UBool add(const UnicodeString& str, UErrorCode &status) { + if(U_FAILURE(status)) return false; + UnicodeString *t = new UnicodeString(str); + if(t==NULL) { + status = U_MEMORY_ALLOCATION_ERROR; return false; + } + return adopt(t, status); + } + /** + * Remove this string. + * @return true if successfully removed, false otherwise (error, or else it wasn't there) + */ + inline UBool remove(const UnicodeString &s, UErrorCode &status) { + if(U_FAILURE(status)) return false; + return removeElement((void*) &s); + } +}; + +/** + * Virtual, won't be inlined + */ +UStringSet::~UStringSet() {} + +/* ----------------------------------------------------------- */ + + +/* Filtered Break constants */ +static const int32_t kPARTIAL = (1<<0); //< partial - need to run through forward trie +static const int32_t kMATCH = (1<<1); //< exact match - skip this one. +static const int32_t kSuppressInReverse = (1<<0); +static const int32_t kAddToForward = (1<<1); +static const UChar kFULLSTOP = 0x002E; // '.' + +/** + * Shared data for SimpleFilteredSentenceBreakIterator + */ +class SimpleFilteredSentenceBreakData : public UMemory { +public: + SimpleFilteredSentenceBreakData(UCharsTrie *forwards, UCharsTrie *backwards ) + : fForwardsPartialTrie(forwards), fBackwardsTrie(backwards), refcount(1) { } + SimpleFilteredSentenceBreakData *incr() { refcount++; return this; } + SimpleFilteredSentenceBreakData *decr() { if((--refcount) <= 0) delete this; return 0; } + virtual ~SimpleFilteredSentenceBreakData(); + + LocalPointer<UCharsTrie> fForwardsPartialTrie; // Has ".a" for "a.M." + LocalPointer<UCharsTrie> fBackwardsTrie; // i.e. ".srM" for Mrs. + int32_t refcount; +}; + +SimpleFilteredSentenceBreakData::~SimpleFilteredSentenceBreakData() {} + +/** + * Concrete implementation + */ +class SimpleFilteredSentenceBreakIterator : public BreakIterator { +public: + SimpleFilteredSentenceBreakIterator(BreakIterator *adopt, UCharsTrie *forwards, UCharsTrie *backwards, UErrorCode &status); + SimpleFilteredSentenceBreakIterator(const SimpleFilteredSentenceBreakIterator& other); + virtual ~SimpleFilteredSentenceBreakIterator(); +private: + SimpleFilteredSentenceBreakData *fData; + LocalPointer<BreakIterator> fDelegate; + LocalUTextPointer fText; + + /* -- subclass interface -- */ +public: + /* -- cloning and other subclass stuff -- */ + virtual BreakIterator * createBufferClone(void * /*stackBuffer*/, + int32_t &/*BufferSize*/, + UErrorCode &status) { + // for now - always deep clone + status = U_SAFECLONE_ALLOCATED_WARNING; + return clone(); + } + virtual SimpleFilteredSentenceBreakIterator* clone() const { return new SimpleFilteredSentenceBreakIterator(*this); } + virtual UClassID getDynamicClassID(void) const { return NULL; } + virtual UBool operator==(const BreakIterator& o) const { if(this==&o) return true; return false; } + + /* -- text modifying -- */ + virtual void setText(UText *text, UErrorCode &status) { fDelegate->setText(text,status); } + virtual BreakIterator &refreshInputText(UText *input, UErrorCode &status) { fDelegate->refreshInputText(input,status); return *this; } + virtual void adoptText(CharacterIterator* it) { fDelegate->adoptText(it); } + virtual void setText(const UnicodeString &text) { fDelegate->setText(text); } + + /* -- other functions that are just delegated -- */ + virtual UText *getUText(UText *fillIn, UErrorCode &status) const { return fDelegate->getUText(fillIn,status); } + virtual CharacterIterator& getText(void) const { return fDelegate->getText(); } + + /* -- ITERATION -- */ + virtual int32_t first(void); + virtual int32_t preceding(int32_t offset); + virtual int32_t previous(void); + virtual UBool isBoundary(int32_t offset); + virtual int32_t current(void) const { return fDelegate->current(); } // we keep the delegate current, so this should be correct. + + virtual int32_t next(void); + + virtual int32_t next(int32_t n); + virtual int32_t following(int32_t offset); + virtual int32_t last(void); + +private: + /** + * Given that the fDelegate has already given its "initial" answer, + * find the NEXT actual (non-excepted) break. + * @param n initial position from delegate + * @return new break position or UBRK_DONE + */ + int32_t internalNext(int32_t n); + /** + * Given that the fDelegate has already given its "initial" answer, + * find the PREV actual (non-excepted) break. + * @param n initial position from delegate + * @return new break position or UBRK_DONE + */ + int32_t internalPrev(int32_t n); + /** + * set up the UText with the value of the fDelegate. + * Call this before calling breakExceptionAt. + * May be able to avoid excess calls + */ + void resetState(UErrorCode &status); + /** + * Is there a match (exception) at this spot? + */ + enum EFBMatchResult { kNoExceptionHere, kExceptionHere }; + /** + * Determine if there is an exception at this spot + * @param n spot to check + * @return kNoExceptionHere or kExceptionHere + **/ + enum EFBMatchResult breakExceptionAt(int32_t n); +}; + +SimpleFilteredSentenceBreakIterator::SimpleFilteredSentenceBreakIterator(const SimpleFilteredSentenceBreakIterator& other) + : BreakIterator(other), fData(other.fData->incr()), fDelegate(other.fDelegate->clone()) +{ +} + + +SimpleFilteredSentenceBreakIterator::SimpleFilteredSentenceBreakIterator(BreakIterator *adopt, UCharsTrie *forwards, UCharsTrie *backwards, UErrorCode &status) : + BreakIterator(adopt->getLocale(ULOC_VALID_LOCALE,status),adopt->getLocale(ULOC_ACTUAL_LOCALE,status)), + fData(new SimpleFilteredSentenceBreakData(forwards, backwards)), + fDelegate(adopt) +{ + // all set.. +} + +SimpleFilteredSentenceBreakIterator::~SimpleFilteredSentenceBreakIterator() { + fData = fData->decr(); +} + +void SimpleFilteredSentenceBreakIterator::resetState(UErrorCode &status) { + fText.adoptInstead(fDelegate->getUText(fText.orphan(), status)); +} + +SimpleFilteredSentenceBreakIterator::EFBMatchResult +SimpleFilteredSentenceBreakIterator::breakExceptionAt(int32_t n) { + int64_t bestPosn = -1; + int32_t bestValue = -1; + // loops while 'n' points to an exception. + utext_setNativeIndex(fText.getAlias(), n); // from n.. + fData->fBackwardsTrie->reset(); + UChar32 uch; + + //if(debug2) u_printf(" n@ %d\n", n); + // Assume a space is following the '.' (so we handle the case: "Mr. /Brown") + if((uch=utext_previous32(fText.getAlias()))==(UChar32)0x0020) { // TODO: skip a class of chars here?? + // TODO only do this the 1st time? + //if(debug2) u_printf("skipping prev: |%C| \n", (UChar)uch); + } else { + //if(debug2) u_printf("not skipping prev: |%C| \n", (UChar)uch); + uch = utext_next32(fText.getAlias()); + //if(debug2) u_printf(" -> : |%C| \n", (UChar)uch); + } + + UStringTrieResult r = USTRINGTRIE_INTERMEDIATE_VALUE; + + while((uch=utext_previous32(fText.getAlias()))!=U_SENTINEL && // more to consume backwards and.. + USTRINGTRIE_HAS_NEXT(r=fData->fBackwardsTrie->nextForCodePoint(uch))) {// more in the trie + if(USTRINGTRIE_HAS_VALUE(r)) { // remember the best match so far + bestPosn = utext_getNativeIndex(fText.getAlias()); + bestValue = fData->fBackwardsTrie->getValue(); + } + //if(debug2) u_printf("rev< /%C/ cont?%d @%d\n", (UChar)uch, r, utext_getNativeIndex(fText.getAlias())); + } + + if(USTRINGTRIE_MATCHES(r)) { // exact match? + //if(debug2) u_printf("rev<?/%C/?end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue); + bestValue = fData->fBackwardsTrie->getValue(); + bestPosn = utext_getNativeIndex(fText.getAlias()); + //if(debug2) u_printf("rev<+/%C/+end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue); + } + + if(bestPosn>=0) { + //if(debug2) u_printf("rev< /%C/ end of seq.. r=%d, bestPosn=%d, bestValue=%d\n", (UChar)uch, r, bestPosn, bestValue); + + //if(USTRINGTRIE_MATCHES(r)) { // matched - so, now what? + //int32_t bestValue = fBackwardsTrie->getValue(); + ////if(debug2) u_printf("rev< /%C/ matched, skip..%d bestValue=%d\n", (UChar)uch, r, bestValue); + + if(bestValue == kMATCH) { // exact match! + //if(debug2) u_printf(" exact backward match\n"); + return kExceptionHere; // See if the next is another exception. + } else if(bestValue == kPARTIAL + && fData->fForwardsPartialTrie.isValid()) { // make sure there's a forward trie + //if(debug2) u_printf(" partial backward match\n"); + // We matched the "Ph." in "Ph.D." - now we need to run everything through the forwards trie + // to see if it matches something going forward. + fData->fForwardsPartialTrie->reset(); + UStringTrieResult rfwd = USTRINGTRIE_INTERMEDIATE_VALUE; + utext_setNativeIndex(fText.getAlias(), bestPosn); // hope that's close .. + //if(debug2) u_printf("Retrying at %d\n", bestPosn); + while((uch=utext_next32(fText.getAlias()))!=U_SENTINEL && + USTRINGTRIE_HAS_NEXT(rfwd=fData->fForwardsPartialTrie->nextForCodePoint(uch))) { + //if(debug2) u_printf("fwd> /%C/ cont?%d @%d\n", (UChar)uch, rfwd, utext_getNativeIndex(fText.getAlias())); + } + if(USTRINGTRIE_MATCHES(rfwd)) { + //if(debug2) u_printf("fwd> /%C/ == forward match!\n", (UChar)uch); + // only full matches here, nothing to check + // skip the next: + return kExceptionHere; + } else { + //if(debug2) u_printf("fwd> /%C/ no match.\n", (UChar)uch); + // no match (no exception) -return the 'underlying' break + return kNoExceptionHere; + } + } else { + return kNoExceptionHere; // internal error and/or no forwards trie + } + } else { + //if(debug2) u_printf("rev< /%C/ .. no match..%d\n", (UChar)uch, r); // no best match + return kNoExceptionHere; // No match - so exit. Not an exception. + } +} + +// the workhorse single next. +int32_t +SimpleFilteredSentenceBreakIterator::internalNext(int32_t n) { + if(n == UBRK_DONE || // at end or + fData->fBackwardsTrie.isNull()) { // .. no backwards table loaded == no exceptions + return n; + } + // OK, do we need to break here? + UErrorCode status = U_ZERO_ERROR; + // refresh text + resetState(status); + if(U_FAILURE(status)) return UBRK_DONE; // bail out + int64_t utextLen = utext_nativeLength(fText.getAlias()); + + //if(debug2) u_printf("str, native len=%d\n", utext_nativeLength(fText.getAlias())); + while (n != UBRK_DONE && n != utextLen) { // outer loop runs once per underlying break (from fDelegate). + SimpleFilteredSentenceBreakIterator::EFBMatchResult m = breakExceptionAt(n); + + switch(m) { + case kExceptionHere: + n = fDelegate->next(); // skip this one. Find the next lowerlevel break. + continue; + + default: + case kNoExceptionHere: + return n; + } + } + return n; +} + +int32_t +SimpleFilteredSentenceBreakIterator::internalPrev(int32_t n) { + if(n == 0 || n == UBRK_DONE || // at end or + fData->fBackwardsTrie.isNull()) { // .. no backwards table loaded == no exceptions + return n; + } + // OK, do we need to break here? + UErrorCode status = U_ZERO_ERROR; + // refresh text + resetState(status); + if(U_FAILURE(status)) return UBRK_DONE; // bail out + + //if(debug2) u_printf("str, native len=%d\n", utext_nativeLength(fText.getAlias())); + while (n != UBRK_DONE && n != 0) { // outer loop runs once per underlying break (from fDelegate). + SimpleFilteredSentenceBreakIterator::EFBMatchResult m = breakExceptionAt(n); + + switch(m) { + case kExceptionHere: + n = fDelegate->previous(); // skip this one. Find the next lowerlevel break. + continue; + + default: + case kNoExceptionHere: + return n; + } + } + return n; +} + + +int32_t +SimpleFilteredSentenceBreakIterator::next() { + return internalNext(fDelegate->next()); +} + +int32_t +SimpleFilteredSentenceBreakIterator::first(void) { + // Don't suppress a break opportunity at the beginning of text. + return fDelegate->first(); +} + +int32_t +SimpleFilteredSentenceBreakIterator::preceding(int32_t offset) { + return internalPrev(fDelegate->preceding(offset)); +} + +int32_t +SimpleFilteredSentenceBreakIterator::previous(void) { + return internalPrev(fDelegate->previous()); +} + +UBool SimpleFilteredSentenceBreakIterator::isBoundary(int32_t offset) { + if (!fDelegate->isBoundary(offset)) return false; // no break to suppress + + if (fData->fBackwardsTrie.isNull()) return true; // no data = no suppressions + + UErrorCode status = U_ZERO_ERROR; + resetState(status); + + SimpleFilteredSentenceBreakIterator::EFBMatchResult m = breakExceptionAt(offset); + + switch(m) { + case kExceptionHere: + return false; + default: + case kNoExceptionHere: + return true; + } +} + +int32_t +SimpleFilteredSentenceBreakIterator::next(int32_t offset) { + return internalNext(fDelegate->next(offset)); +} + +int32_t +SimpleFilteredSentenceBreakIterator::following(int32_t offset) { + return internalNext(fDelegate->following(offset)); +} + +int32_t +SimpleFilteredSentenceBreakIterator::last(void) { + // Don't suppress a break opportunity at the end of text. + return fDelegate->last(); +} + + +/** + * Concrete implementation of builder class. + */ +class U_COMMON_API SimpleFilteredBreakIteratorBuilder : public FilteredBreakIteratorBuilder { +public: + virtual ~SimpleFilteredBreakIteratorBuilder(); + SimpleFilteredBreakIteratorBuilder(const Locale &fromLocale, UErrorCode &status); + SimpleFilteredBreakIteratorBuilder(UErrorCode &status); + virtual UBool suppressBreakAfter(const UnicodeString& exception, UErrorCode& status); + virtual UBool unsuppressBreakAfter(const UnicodeString& exception, UErrorCode& status); + virtual BreakIterator *build(BreakIterator* adoptBreakIterator, UErrorCode& status); +private: + UStringSet fSet; +}; + +SimpleFilteredBreakIteratorBuilder::~SimpleFilteredBreakIteratorBuilder() +{ +} + +SimpleFilteredBreakIteratorBuilder::SimpleFilteredBreakIteratorBuilder(UErrorCode &status) + : fSet(status) +{ +} + +SimpleFilteredBreakIteratorBuilder::SimpleFilteredBreakIteratorBuilder(const Locale &fromLocale, UErrorCode &status) + : fSet(status) +{ + if(U_SUCCESS(status)) { + UErrorCode subStatus = U_ZERO_ERROR; + LocalUResourceBundlePointer b(ures_open(U_ICUDATA_BRKITR, fromLocale.getBaseName(), &subStatus)); + if (U_FAILURE(subStatus) || (subStatus == U_USING_DEFAULT_WARNING) ) { + status = subStatus; // copy the failing status +#if FB_DEBUG + fprintf(stderr, "open BUNDLE %s : %s, %s\n", fromLocale.getBaseName(), "[exit]", u_errorName(status)); +#endif + return; // leaves the builder empty, if you try to use it. + } + LocalUResourceBundlePointer exceptions(ures_getByKeyWithFallback(b.getAlias(), "exceptions", NULL, &subStatus)); + if (U_FAILURE(subStatus) || (subStatus == U_USING_DEFAULT_WARNING) ) { + status = subStatus; // copy the failing status +#if FB_DEBUG + fprintf(stderr, "open EXCEPTIONS %s : %s, %s\n", fromLocale.getBaseName(), "[exit]", u_errorName(status)); +#endif + return; // leaves the builder empty, if you try to use it. + } + LocalUResourceBundlePointer breaks(ures_getByKeyWithFallback(exceptions.getAlias(), "SentenceBreak", NULL, &subStatus)); + +#if FB_DEBUG + { + UErrorCode subsub = subStatus; + fprintf(stderr, "open SentenceBreak %s => %s, %s\n", fromLocale.getBaseName(), ures_getLocale(breaks.getAlias(), &subsub), u_errorName(subStatus)); + } +#endif + + if (U_FAILURE(subStatus) || (subStatus == U_USING_DEFAULT_WARNING) ) { + status = subStatus; // copy the failing status +#if FB_DEBUG + fprintf(stderr, "open %s : %s, %s\n", fromLocale.getBaseName(), "[exit]", u_errorName(status)); +#endif + return; // leaves the builder empty, if you try to use it. + } + + LocalUResourceBundlePointer strs; + subStatus = status; // Pick up inherited warning status now + do { + strs.adoptInstead(ures_getNextResource(breaks.getAlias(), strs.orphan(), &subStatus)); + if(strs.isValid() && U_SUCCESS(subStatus)) { + UnicodeString str(ures_getUnicodeString(strs.getAlias(), &status)); + suppressBreakAfter(str, status); // load the string + } + } while (strs.isValid() && U_SUCCESS(subStatus)); + if(U_FAILURE(subStatus)&&subStatus!=U_INDEX_OUTOFBOUNDS_ERROR&&U_SUCCESS(status)) { + status = subStatus; + } + } +} + +UBool +SimpleFilteredBreakIteratorBuilder::suppressBreakAfter(const UnicodeString& exception, UErrorCode& status) +{ + UBool r = fSet.add(exception, status); + FB_TRACE("suppressBreakAfter",&exception,r,0); + return r; +} + +UBool +SimpleFilteredBreakIteratorBuilder::unsuppressBreakAfter(const UnicodeString& exception, UErrorCode& status) +{ + UBool r = fSet.remove(exception, status); + FB_TRACE("unsuppressBreakAfter",&exception,r,0); + return r; +} + +/** + * Jitterbug 2974: MSVC has a bug whereby new X[0] behaves badly. + * Work around this. + * + * Note: "new UnicodeString[subCount]" ends up calling global operator new + * on MSVC2012 for some reason. + */ +static inline UnicodeString* newUnicodeStringArray(size_t count) { + return new UnicodeString[count ? count : 1]; +} + +BreakIterator * +SimpleFilteredBreakIteratorBuilder::build(BreakIterator* adoptBreakIterator, UErrorCode& status) { + LocalPointer<BreakIterator> adopt(adoptBreakIterator); + + LocalPointer<UCharsTrieBuilder> builder(new UCharsTrieBuilder(status), status); + LocalPointer<UCharsTrieBuilder> builder2(new UCharsTrieBuilder(status), status); + if(U_FAILURE(status)) { + return NULL; + } + + int32_t revCount = 0; + int32_t fwdCount = 0; + + int32_t subCount = fSet.size(); + + UnicodeString *ustrs_ptr = newUnicodeStringArray(subCount); + + LocalArray<UnicodeString> ustrs(ustrs_ptr); + + LocalMemory<int> partials; + partials.allocateInsteadAndReset(subCount); + + LocalPointer<UCharsTrie> backwardsTrie; // i.e. ".srM" for Mrs. + LocalPointer<UCharsTrie> forwardsPartialTrie; // Has ".a" for "a.M." + + int n=0; + for ( int32_t i = 0; + i<fSet.size(); + i++) { + const UnicodeString *abbr = fSet.getStringAt(i); + if(abbr) { + FB_TRACE("build",abbr,TRUE,i); + ustrs[n] = *abbr; // copy by value + FB_TRACE("ustrs[n]",&ustrs[n],TRUE,i); + } else { + FB_TRACE("build",abbr,FALSE,i); + status = U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + partials[n] = 0; // default: not partial + n++; + } + // first pass - find partials. + for(int i=0;i<subCount;i++) { + int nn = ustrs[i].indexOf(kFULLSTOP); // TODO: non-'.' abbreviations + if(nn>-1 && (nn+1)!=ustrs[i].length()) { + FB_TRACE("partial",&ustrs[i],FALSE,i); + // is partial. + // is it unique? + int sameAs = -1; + for(int j=0;j<subCount;j++) { + if(j==i) continue; + if(ustrs[i].compare(0,nn+1,ustrs[j],0,nn+1)==0) { + FB_TRACE("prefix",&ustrs[j],FALSE,nn+1); + //UBool otherIsPartial = ((nn+1)!=ustrs[j].length()); // true if ustrs[j] doesn't end at nn + if(partials[j]==0) { // hasn't been processed yet + partials[j] = kSuppressInReverse | kAddToForward; + FB_TRACE("suppressing",&ustrs[j],FALSE,j); + } else if(partials[j] & kSuppressInReverse) { + sameAs = j; // the other entry is already in the reverse table. + } + } + } + FB_TRACE("for partial same-",&ustrs[i],FALSE,sameAs); + FB_TRACE(" == partial #",&ustrs[i],FALSE,partials[i]); + UnicodeString prefix(ustrs[i], 0, nn+1); + if(sameAs == -1 && partials[i] == 0) { + // first one - add the prefix to the reverse table. + prefix.reverse(); + builder->add(prefix, kPARTIAL, status); + revCount++; + FB_TRACE("Added partial",&prefix,FALSE, i); + FB_TRACE(u_errorName(status),&ustrs[i],FALSE,i); + partials[i] = kSuppressInReverse | kAddToForward; + } else { + FB_TRACE("NOT adding partial",&prefix,FALSE, i); + FB_TRACE(u_errorName(status),&ustrs[i],FALSE,i); + } + } + } + for(int i=0;i<subCount;i++) { + if(partials[i]==0) { + ustrs[i].reverse(); + builder->add(ustrs[i], kMATCH, status); + revCount++; + FB_TRACE(u_errorName(status), &ustrs[i], FALSE, i); + } else { + FB_TRACE("Adding fwd",&ustrs[i], FALSE, i); + + // an optimization would be to only add the portion after the '.' + // for example, for "Ph.D." we store ".hP" in the reverse table. We could just store "D." in the forward, + // instead of "Ph.D." since we already know the "Ph." part is a match. + // would need the trie to be able to hold 0-length strings, though. + builder2->add(ustrs[i], kMATCH, status); // forward + fwdCount++; + //ustrs[i].reverse(); + ////if(debug2) u_printf("SUPPRESS- not Added(%d): /%S/ status=%s\n",partials[i], ustrs[i].getTerminatedBuffer(), u_errorName(status)); + } + } + FB_TRACE("AbbrCount",NULL,FALSE, subCount); + + if(revCount>0) { + backwardsTrie.adoptInstead(builder->build(USTRINGTRIE_BUILD_FAST, status)); + if(U_FAILURE(status)) { + FB_TRACE(u_errorName(status),NULL,FALSE, -1); + return NULL; + } + } + + if(fwdCount>0) { + forwardsPartialTrie.adoptInstead(builder2->build(USTRINGTRIE_BUILD_FAST, status)); + if(U_FAILURE(status)) { + FB_TRACE(u_errorName(status),NULL,FALSE, -1); + return NULL; + } + } + + return new SimpleFilteredSentenceBreakIterator(adopt.orphan(), forwardsPartialTrie.orphan(), backwardsTrie.orphan(), status); +} + + +// ----------- Base class implementation + +FilteredBreakIteratorBuilder::FilteredBreakIteratorBuilder() { +} + +FilteredBreakIteratorBuilder::~FilteredBreakIteratorBuilder() { +} + +FilteredBreakIteratorBuilder * +FilteredBreakIteratorBuilder::createInstance(const Locale& where, UErrorCode& status) { + if(U_FAILURE(status)) return NULL; + LocalPointer<FilteredBreakIteratorBuilder> ret(new SimpleFilteredBreakIteratorBuilder(where, status), status); + return (U_SUCCESS(status))? ret.orphan(): NULL; +} + +FilteredBreakIteratorBuilder * +FilteredBreakIteratorBuilder::createInstance(UErrorCode &status) { + return createEmptyInstance(status); +} + +FilteredBreakIteratorBuilder * +FilteredBreakIteratorBuilder::createEmptyInstance(UErrorCode& status) { + if(U_FAILURE(status)) return NULL; + LocalPointer<FilteredBreakIteratorBuilder> ret(new SimpleFilteredBreakIteratorBuilder(status), status); + return (U_SUCCESS(status))? ret.orphan(): NULL; +} + +U_NAMESPACE_END + +#endif //#if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION diff --git a/thirdparty/icu4c/common/filterednormalizer2.cpp b/thirdparty/icu4c/common/filterednormalizer2.cpp new file mode 100644 index 0000000000..1a0914d3f7 --- /dev/null +++ b/thirdparty/icu4c/common/filterednormalizer2.cpp @@ -0,0 +1,363 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2009-2012, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: filterednormalizer2.cpp +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2009dec10 +* created by: Markus W. Scherer +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_NORMALIZATION + +#include "unicode/edits.h" +#include "unicode/normalizer2.h" +#include "unicode/stringoptions.h" +#include "unicode/uniset.h" +#include "unicode/unistr.h" +#include "unicode/unorm.h" +#include "cpputils.h" + +U_NAMESPACE_BEGIN + +FilteredNormalizer2::~FilteredNormalizer2() {} + +UnicodeString & +FilteredNormalizer2::normalize(const UnicodeString &src, + UnicodeString &dest, + UErrorCode &errorCode) const { + uprv_checkCanGetBuffer(src, errorCode); + if(U_FAILURE(errorCode)) { + dest.setToBogus(); + return dest; + } + if(&dest==&src) { + errorCode=U_ILLEGAL_ARGUMENT_ERROR; + return dest; + } + dest.remove(); + return normalize(src, dest, USET_SPAN_SIMPLE, errorCode); +} + +// Internal: No argument checking, and appends to dest. +// Pass as input spanCondition the one that is likely to yield a non-zero +// span length at the start of src. +// For set=[:age=3.2:], since almost all common characters were in Unicode 3.2, +// USET_SPAN_SIMPLE should be passed in for the start of src +// and USET_SPAN_NOT_CONTAINED should be passed in if we continue after +// an in-filter prefix. +UnicodeString & +FilteredNormalizer2::normalize(const UnicodeString &src, + UnicodeString &dest, + USetSpanCondition spanCondition, + UErrorCode &errorCode) const { + UnicodeString tempDest; // Don't throw away destination buffer between iterations. + for(int32_t prevSpanLimit=0; prevSpanLimit<src.length();) { + int32_t spanLimit=set.span(src, prevSpanLimit, spanCondition); + int32_t spanLength=spanLimit-prevSpanLimit; + if(spanCondition==USET_SPAN_NOT_CONTAINED) { + if(spanLength!=0) { + dest.append(src, prevSpanLimit, spanLength); + } + spanCondition=USET_SPAN_SIMPLE; + } else { + if(spanLength!=0) { + // Not norm2.normalizeSecondAndAppend() because we do not want + // to modify the non-filter part of dest. + dest.append(norm2.normalize(src.tempSubStringBetween(prevSpanLimit, spanLimit), + tempDest, errorCode)); + if(U_FAILURE(errorCode)) { + break; + } + } + spanCondition=USET_SPAN_NOT_CONTAINED; + } + prevSpanLimit=spanLimit; + } + return dest; +} + +void +FilteredNormalizer2::normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink, + Edits *edits, UErrorCode &errorCode) const { + if (U_FAILURE(errorCode)) { + return; + } + if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) { + edits->reset(); + } + options |= U_EDITS_NO_RESET; // Do not reset for each span. + normalizeUTF8(options, src.data(), src.length(), sink, edits, USET_SPAN_SIMPLE, errorCode); +} + +void +FilteredNormalizer2::normalizeUTF8(uint32_t options, const char *src, int32_t length, + ByteSink &sink, Edits *edits, + USetSpanCondition spanCondition, + UErrorCode &errorCode) const { + while (length > 0) { + int32_t spanLength = set.spanUTF8(src, length, spanCondition); + if (spanCondition == USET_SPAN_NOT_CONTAINED) { + if (spanLength != 0) { + if (edits != nullptr) { + edits->addUnchanged(spanLength); + } + if ((options & U_OMIT_UNCHANGED_TEXT) == 0) { + sink.Append(src, spanLength); + } + } + spanCondition = USET_SPAN_SIMPLE; + } else { + if (spanLength != 0) { + // Not norm2.normalizeSecondAndAppend() because we do not want + // to modify the non-filter part of dest. + norm2.normalizeUTF8(options, StringPiece(src, spanLength), sink, edits, errorCode); + if (U_FAILURE(errorCode)) { + break; + } + } + spanCondition = USET_SPAN_NOT_CONTAINED; + } + src += spanLength; + length -= spanLength; + } +} + +UnicodeString & +FilteredNormalizer2::normalizeSecondAndAppend(UnicodeString &first, + const UnicodeString &second, + UErrorCode &errorCode) const { + return normalizeSecondAndAppend(first, second, TRUE, errorCode); +} + +UnicodeString & +FilteredNormalizer2::append(UnicodeString &first, + const UnicodeString &second, + UErrorCode &errorCode) const { + return normalizeSecondAndAppend(first, second, FALSE, errorCode); +} + +UnicodeString & +FilteredNormalizer2::normalizeSecondAndAppend(UnicodeString &first, + const UnicodeString &second, + UBool doNormalize, + UErrorCode &errorCode) const { + uprv_checkCanGetBuffer(first, errorCode); + uprv_checkCanGetBuffer(second, errorCode); + if(U_FAILURE(errorCode)) { + return first; + } + if(&first==&second) { + errorCode=U_ILLEGAL_ARGUMENT_ERROR; + return first; + } + if(first.isEmpty()) { + if(doNormalize) { + return normalize(second, first, errorCode); + } else { + return first=second; + } + } + // merge the in-filter suffix of the first string with the in-filter prefix of the second + int32_t prefixLimit=set.span(second, 0, USET_SPAN_SIMPLE); + if(prefixLimit!=0) { + UnicodeString prefix(second.tempSubString(0, prefixLimit)); + int32_t suffixStart=set.spanBack(first, INT32_MAX, USET_SPAN_SIMPLE); + if(suffixStart==0) { + if(doNormalize) { + norm2.normalizeSecondAndAppend(first, prefix, errorCode); + } else { + norm2.append(first, prefix, errorCode); + } + } else { + UnicodeString middle(first, suffixStart, INT32_MAX); + if(doNormalize) { + norm2.normalizeSecondAndAppend(middle, prefix, errorCode); + } else { + norm2.append(middle, prefix, errorCode); + } + first.replace(suffixStart, INT32_MAX, middle); + } + } + if(prefixLimit<second.length()) { + UnicodeString rest(second.tempSubString(prefixLimit, INT32_MAX)); + if(doNormalize) { + normalize(rest, first, USET_SPAN_NOT_CONTAINED, errorCode); + } else { + first.append(rest); + } + } + return first; +} + +UBool +FilteredNormalizer2::getDecomposition(UChar32 c, UnicodeString &decomposition) const { + return set.contains(c) && norm2.getDecomposition(c, decomposition); +} + +UBool +FilteredNormalizer2::getRawDecomposition(UChar32 c, UnicodeString &decomposition) const { + return set.contains(c) && norm2.getRawDecomposition(c, decomposition); +} + +UChar32 +FilteredNormalizer2::composePair(UChar32 a, UChar32 b) const { + return (set.contains(a) && set.contains(b)) ? norm2.composePair(a, b) : U_SENTINEL; +} + +uint8_t +FilteredNormalizer2::getCombiningClass(UChar32 c) const { + return set.contains(c) ? norm2.getCombiningClass(c) : 0; +} + +UBool +FilteredNormalizer2::isNormalized(const UnicodeString &s, UErrorCode &errorCode) const { + uprv_checkCanGetBuffer(s, errorCode); + if(U_FAILURE(errorCode)) { + return FALSE; + } + USetSpanCondition spanCondition=USET_SPAN_SIMPLE; + for(int32_t prevSpanLimit=0; prevSpanLimit<s.length();) { + int32_t spanLimit=set.span(s, prevSpanLimit, spanCondition); + if(spanCondition==USET_SPAN_NOT_CONTAINED) { + spanCondition=USET_SPAN_SIMPLE; + } else { + if( !norm2.isNormalized(s.tempSubStringBetween(prevSpanLimit, spanLimit), errorCode) || + U_FAILURE(errorCode) + ) { + return FALSE; + } + spanCondition=USET_SPAN_NOT_CONTAINED; + } + prevSpanLimit=spanLimit; + } + return TRUE; +} + +UBool +FilteredNormalizer2::isNormalizedUTF8(StringPiece sp, UErrorCode &errorCode) const { + if(U_FAILURE(errorCode)) { + return FALSE; + } + const char *s = sp.data(); + int32_t length = sp.length(); + USetSpanCondition spanCondition = USET_SPAN_SIMPLE; + while (length > 0) { + int32_t spanLength = set.spanUTF8(s, length, spanCondition); + if (spanCondition == USET_SPAN_NOT_CONTAINED) { + spanCondition = USET_SPAN_SIMPLE; + } else { + if (!norm2.isNormalizedUTF8(StringPiece(s, spanLength), errorCode) || + U_FAILURE(errorCode)) { + return FALSE; + } + spanCondition = USET_SPAN_NOT_CONTAINED; + } + s += spanLength; + length -= spanLength; + } + return TRUE; +} + +UNormalizationCheckResult +FilteredNormalizer2::quickCheck(const UnicodeString &s, UErrorCode &errorCode) const { + uprv_checkCanGetBuffer(s, errorCode); + if(U_FAILURE(errorCode)) { + return UNORM_MAYBE; + } + UNormalizationCheckResult result=UNORM_YES; + USetSpanCondition spanCondition=USET_SPAN_SIMPLE; + for(int32_t prevSpanLimit=0; prevSpanLimit<s.length();) { + int32_t spanLimit=set.span(s, prevSpanLimit, spanCondition); + if(spanCondition==USET_SPAN_NOT_CONTAINED) { + spanCondition=USET_SPAN_SIMPLE; + } else { + UNormalizationCheckResult qcResult= + norm2.quickCheck(s.tempSubStringBetween(prevSpanLimit, spanLimit), errorCode); + if(U_FAILURE(errorCode) || qcResult==UNORM_NO) { + return qcResult; + } else if(qcResult==UNORM_MAYBE) { + result=qcResult; + } + spanCondition=USET_SPAN_NOT_CONTAINED; + } + prevSpanLimit=spanLimit; + } + return result; +} + +int32_t +FilteredNormalizer2::spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const { + uprv_checkCanGetBuffer(s, errorCode); + if(U_FAILURE(errorCode)) { + return 0; + } + USetSpanCondition spanCondition=USET_SPAN_SIMPLE; + for(int32_t prevSpanLimit=0; prevSpanLimit<s.length();) { + int32_t spanLimit=set.span(s, prevSpanLimit, spanCondition); + if(spanCondition==USET_SPAN_NOT_CONTAINED) { + spanCondition=USET_SPAN_SIMPLE; + } else { + int32_t yesLimit= + prevSpanLimit+ + norm2.spanQuickCheckYes( + s.tempSubStringBetween(prevSpanLimit, spanLimit), errorCode); + if(U_FAILURE(errorCode) || yesLimit<spanLimit) { + return yesLimit; + } + spanCondition=USET_SPAN_NOT_CONTAINED; + } + prevSpanLimit=spanLimit; + } + return s.length(); +} + +UBool +FilteredNormalizer2::hasBoundaryBefore(UChar32 c) const { + return !set.contains(c) || norm2.hasBoundaryBefore(c); +} + +UBool +FilteredNormalizer2::hasBoundaryAfter(UChar32 c) const { + return !set.contains(c) || norm2.hasBoundaryAfter(c); +} + +UBool +FilteredNormalizer2::isInert(UChar32 c) const { + return !set.contains(c) || norm2.isInert(c); +} + +U_NAMESPACE_END + +// C API ------------------------------------------------------------------- *** + +U_NAMESPACE_USE + +U_CAPI UNormalizer2 * U_EXPORT2 +unorm2_openFiltered(const UNormalizer2 *norm2, const USet *filterSet, UErrorCode *pErrorCode) { + if(U_FAILURE(*pErrorCode)) { + return NULL; + } + if(filterSet==NULL) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + Normalizer2 *fn2=new FilteredNormalizer2(*(Normalizer2 *)norm2, + *UnicodeSet::fromUSet(filterSet)); + if(fn2==NULL) { + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + } + return (UNormalizer2 *)fn2; +} + +#endif // !UCONFIG_NO_NORMALIZATION diff --git a/thirdparty/icu4c/common/hash.h b/thirdparty/icu4c/common/hash.h new file mode 100644 index 0000000000..f02cb7087a --- /dev/null +++ b/thirdparty/icu4c/common/hash.h @@ -0,0 +1,248 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* Copyright (C) 1997-2014, International Business Machines +* Corporation and others. All Rights Reserved. +****************************************************************************** +* Date Name Description +* 03/28/00 aliu Creation. +****************************************************************************** +*/ + +#ifndef HASH_H +#define HASH_H + +#include "unicode/unistr.h" +#include "unicode/uobject.h" +#include "cmemory.h" +#include "uhash.h" + +U_NAMESPACE_BEGIN + +/** + * Hashtable is a thin C++ wrapper around UHashtable, a general-purpose void* + * hashtable implemented in C. Hashtable is designed to be idiomatic and + * easy-to-use in C++. + * + * Hashtable is an INTERNAL CLASS. + */ +class U_COMMON_API Hashtable : public UMemory { + UHashtable* hash; + UHashtable hashObj; + + inline void init(UHashFunction *keyHash, UKeyComparator *keyComp, UValueComparator *valueComp, UErrorCode& status); + + inline void initSize(UHashFunction *keyHash, UKeyComparator *keyComp, UValueComparator *valueComp, int32_t size, UErrorCode& status); + +public: + /** + * Construct a hashtable + * @param ignoreKeyCase If true, keys are case insensitive. + * @param status Error code + */ + inline Hashtable(UBool ignoreKeyCase, UErrorCode& status); + + /** + * Construct a hashtable + * @param ignoreKeyCase If true, keys are case insensitive. + * @param size initial size allocation + * @param status Error code + */ + inline Hashtable(UBool ignoreKeyCase, int32_t size, UErrorCode& status); + + /** + * Construct a hashtable + * @param keyComp Comparator for comparing the keys + * @param valueComp Comparator for comparing the values + * @param status Error code + */ + inline Hashtable(UKeyComparator *keyComp, UValueComparator *valueComp, UErrorCode& status); + + /** + * Construct a hashtable + * @param status Error code + */ + inline Hashtable(UErrorCode& status); + + /** + * Construct a hashtable, _disregarding any error_. Use this constructor + * with caution. + */ + inline Hashtable(); + + /** + * Non-virtual destructor; make this virtual if Hashtable is subclassed + * in the future. + */ + inline ~Hashtable(); + + inline UObjectDeleter *setValueDeleter(UObjectDeleter *fn); + + inline int32_t count() const; + + inline void* put(const UnicodeString& key, void* value, UErrorCode& status); + + inline int32_t puti(const UnicodeString& key, int32_t value, UErrorCode& status); + + inline void* get(const UnicodeString& key) const; + + inline int32_t geti(const UnicodeString& key) const; + + inline void* remove(const UnicodeString& key); + + inline int32_t removei(const UnicodeString& key); + + inline void removeAll(void); + + inline const UHashElement* find(const UnicodeString& key) const; + + /** + * @param pos - must be UHASH_FIRST on first call, and untouched afterwards. + * @see uhash_nextElement + */ + inline const UHashElement* nextElement(int32_t& pos) const; + + inline UKeyComparator* setKeyComparator(UKeyComparator*keyComp); + + inline UValueComparator* setValueComparator(UValueComparator* valueComp); + + inline UBool equals(const Hashtable& that) const; +private: + Hashtable(const Hashtable &other); // forbid copying of this class + Hashtable &operator=(const Hashtable &other); // forbid copying of this class +}; + +/********************************************************************* + * Implementation + ********************************************************************/ + +inline void Hashtable::init(UHashFunction *keyHash, UKeyComparator *keyComp, + UValueComparator *valueComp, UErrorCode& status) { + if (U_FAILURE(status)) { + return; + } + uhash_init(&hashObj, keyHash, keyComp, valueComp, &status); + if (U_SUCCESS(status)) { + hash = &hashObj; + uhash_setKeyDeleter(hash, uprv_deleteUObject); + } +} + +inline void Hashtable::initSize(UHashFunction *keyHash, UKeyComparator *keyComp, + UValueComparator *valueComp, int32_t size, UErrorCode& status) { + if (U_FAILURE(status)) { + return; + } + uhash_initSize(&hashObj, keyHash, keyComp, valueComp, size, &status); + if (U_SUCCESS(status)) { + hash = &hashObj; + uhash_setKeyDeleter(hash, uprv_deleteUObject); + } +} + +inline Hashtable::Hashtable(UKeyComparator *keyComp, UValueComparator *valueComp, + UErrorCode& status) : hash(0) { + init( uhash_hashUnicodeString, keyComp, valueComp, status); +} + +inline Hashtable::Hashtable(UBool ignoreKeyCase, UErrorCode& status) + : hash(0) +{ + init(ignoreKeyCase ? uhash_hashCaselessUnicodeString + : uhash_hashUnicodeString, + ignoreKeyCase ? uhash_compareCaselessUnicodeString + : uhash_compareUnicodeString, + NULL, + status); +} + +inline Hashtable::Hashtable(UBool ignoreKeyCase, int32_t size, UErrorCode& status) + : hash(0) +{ + initSize(ignoreKeyCase ? uhash_hashCaselessUnicodeString + : uhash_hashUnicodeString, + ignoreKeyCase ? uhash_compareCaselessUnicodeString + : uhash_compareUnicodeString, + NULL, size, + status); +} + +inline Hashtable::Hashtable(UErrorCode& status) + : hash(0) +{ + init(uhash_hashUnicodeString, uhash_compareUnicodeString, NULL, status); +} + +inline Hashtable::Hashtable() + : hash(0) +{ + UErrorCode status = U_ZERO_ERROR; + init(uhash_hashUnicodeString, uhash_compareUnicodeString, NULL, status); +} + +inline Hashtable::~Hashtable() { + if (hash != NULL) { + uhash_close(hash); + } +} + +inline UObjectDeleter *Hashtable::setValueDeleter(UObjectDeleter *fn) { + return uhash_setValueDeleter(hash, fn); +} + +inline int32_t Hashtable::count() const { + return uhash_count(hash); +} + +inline void* Hashtable::put(const UnicodeString& key, void* value, UErrorCode& status) { + return uhash_put(hash, new UnicodeString(key), value, &status); +} + +inline int32_t Hashtable::puti(const UnicodeString& key, int32_t value, UErrorCode& status) { + return uhash_puti(hash, new UnicodeString(key), value, &status); +} + +inline void* Hashtable::get(const UnicodeString& key) const { + return uhash_get(hash, &key); +} + +inline int32_t Hashtable::geti(const UnicodeString& key) const { + return uhash_geti(hash, &key); +} + +inline void* Hashtable::remove(const UnicodeString& key) { + return uhash_remove(hash, &key); +} + +inline int32_t Hashtable::removei(const UnicodeString& key) { + return uhash_removei(hash, &key); +} + +inline const UHashElement* Hashtable::find(const UnicodeString& key) const { + return uhash_find(hash, &key); +} + +inline const UHashElement* Hashtable::nextElement(int32_t& pos) const { + return uhash_nextElement(hash, &pos); +} + +inline void Hashtable::removeAll(void) { + uhash_removeAll(hash); +} + +inline UKeyComparator* Hashtable::setKeyComparator(UKeyComparator*keyComp){ + return uhash_setKeyComparator(hash, keyComp); +} + +inline UValueComparator* Hashtable::setValueComparator(UValueComparator* valueComp){ + return uhash_setValueComparator(hash, valueComp); +} + +inline UBool Hashtable::equals(const Hashtable& that)const{ + return uhash_equals(hash, that.hash); +} +U_NAMESPACE_END + +#endif + diff --git a/thirdparty/icu4c/common/icudataver.cpp b/thirdparty/icu4c/common/icudataver.cpp new file mode 100644 index 0000000000..d314411374 --- /dev/null +++ b/thirdparty/icu4c/common/icudataver.cpp @@ -0,0 +1,31 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 2009-2011, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +*/ + +#include "unicode/utypes.h" +#include "unicode/icudataver.h" +#include "unicode/ures.h" +#include "uresimp.h" /* for ures_getVersionByKey */ + +U_CAPI void U_EXPORT2 u_getDataVersion(UVersionInfo dataVersionFillin, UErrorCode *status) { + UResourceBundle *icudatares = NULL; + + if (U_FAILURE(*status)) { + return; + } + + if (dataVersionFillin != NULL) { + icudatares = ures_openDirect(NULL, U_ICU_VERSION_BUNDLE , status); + if (U_SUCCESS(*status)) { + ures_getVersionByKey(icudatares, U_ICU_DATA_KEY, dataVersionFillin, status); + } + ures_close(icudatares); + } +} diff --git a/thirdparty/icu4c/common/icuplug.cpp b/thirdparty/icu4c/common/icuplug.cpp new file mode 100644 index 0000000000..4ab8c66ebe --- /dev/null +++ b/thirdparty/icu4c/common/icuplug.cpp @@ -0,0 +1,884 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 2009-2015, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* +* FILE NAME : icuplug.c +* +* Date Name Description +* 10/29/2009 sl New. +****************************************************************************** +*/ + +#include "unicode/icuplug.h" + + +#if UCONFIG_ENABLE_PLUGINS + + +#include "icuplugimp.h" +#include "cstring.h" +#include "cmemory.h" +#include "putilimp.h" +#include "ucln.h" +#include <stdio.h> +#ifdef __MVS__ /* defined by z/OS compiler */ +#define _POSIX_SOURCE +#include <cics.h> /* 12 Nov 2011 JAM iscics() function */ +#endif +#include "charstr.h" + +using namespace icu; + +#ifndef UPLUG_TRACE +#define UPLUG_TRACE 0 +#endif + +#if UPLUG_TRACE +#include <stdio.h> +#define DBG(x) fprintf(stderr, "%s:%d: ",__FILE__,__LINE__); fprintf x +#endif + +/** + * Internal structure of an ICU plugin. + */ + +struct UPlugData { + UPlugEntrypoint *entrypoint; /**< plugin entrypoint */ + uint32_t structSize; /**< initialized to the size of this structure */ + uint32_t token; /**< must be U_PLUG_TOKEN */ + void *lib; /**< plugin library, or NULL */ + char libName[UPLUG_NAME_MAX]; /**< library name */ + char sym[UPLUG_NAME_MAX]; /**< plugin symbol, or NULL */ + char config[UPLUG_NAME_MAX]; /**< configuration data */ + void *context; /**< user context data */ + char name[UPLUG_NAME_MAX]; /**< name of plugin */ + UPlugLevel level; /**< level of plugin */ + UBool awaitingLoad; /**< TRUE if the plugin is awaiting a load call */ + UBool dontUnload; /**< TRUE if plugin must stay resident (leak plugin and lib) */ + UErrorCode pluginStatus; /**< status code of plugin */ +}; + + + +#define UPLUG_LIBRARY_INITIAL_COUNT 8 +#define UPLUG_PLUGIN_INITIAL_COUNT 12 + +/** + * Remove an item + * @param list the full list + * @param listSize the number of entries in the list + * @param memberSize the size of one member + * @param itemToRemove the item number of the member + * @return the new listsize + */ +static int32_t uplug_removeEntryAt(void *list, int32_t listSize, int32_t memberSize, int32_t itemToRemove) { + uint8_t *bytePtr = (uint8_t *)list; + + /* get rid of some bad cases first */ + if(listSize<1) { + return listSize; + } + + /* is there anything to move? */ + if(listSize > itemToRemove+1) { + memmove(bytePtr+(itemToRemove*memberSize), bytePtr+((itemToRemove+1)*memberSize), memberSize); + } + + return listSize-1; +} + + + + +#if U_ENABLE_DYLOAD +/** + * Library management. Internal. + * @internal + */ +struct UPlugLibrary; + +/** + * Library management. Internal. + * @internal + */ +typedef struct UPlugLibrary { + void *lib; /**< library ptr */ + char name[UPLUG_NAME_MAX]; /**< library name */ + uint32_t ref; /**< reference count */ +} UPlugLibrary; + +static UPlugLibrary staticLibraryList[UPLUG_LIBRARY_INITIAL_COUNT]; +static UPlugLibrary * libraryList = staticLibraryList; +static int32_t libraryCount = 0; +static int32_t libraryMax = UPLUG_LIBRARY_INITIAL_COUNT; + +/** + * Search for a library. Doesn't lock + * @param libName libname to search for + * @return the library's struct + */ +static int32_t searchForLibraryName(const char *libName) { + int32_t i; + + for(i=0;i<libraryCount;i++) { + if(!uprv_strcmp(libName, libraryList[i].name)) { + return i; + } + } + return -1; +} + +static int32_t searchForLibrary(void *lib) { + int32_t i; + + for(i=0;i<libraryCount;i++) { + if(lib==libraryList[i].lib) { + return i; + } + } + return -1; +} + +U_CAPI char * U_EXPORT2 +uplug_findLibrary(void *lib, UErrorCode *status) { + int32_t libEnt; + char *ret = NULL; + if(U_FAILURE(*status)) { + return NULL; + } + libEnt = searchForLibrary(lib); + if(libEnt!=-1) { + ret = libraryList[libEnt].name; + } else { + *status = U_MISSING_RESOURCE_ERROR; + } + return ret; +} + +U_CAPI void * U_EXPORT2 +uplug_openLibrary(const char *libName, UErrorCode *status) { + int32_t libEntry = -1; + void *lib = NULL; + + if(U_FAILURE(*status)) return NULL; + + libEntry = searchForLibraryName(libName); + if(libEntry == -1) { + libEntry = libraryCount++; + if(libraryCount >= libraryMax) { + /* Ran out of library slots. Statically allocated because we can't depend on allocating memory.. */ + *status = U_MEMORY_ALLOCATION_ERROR; +#if UPLUG_TRACE + DBG((stderr, "uplug_openLibrary() - out of library slots (max %d)\n", libraryMax)); +#endif + return NULL; + } + /* Some operating systems don't want + DL operations from multiple threads. */ + libraryList[libEntry].lib = uprv_dl_open(libName, status); +#if UPLUG_TRACE + DBG((stderr, "uplug_openLibrary(%s,%s) libEntry %d, lib %p\n", libName, u_errorName(*status), libEntry, lib)); +#endif + + if(libraryList[libEntry].lib == NULL || U_FAILURE(*status)) { + /* cleanup. */ + libraryList[libEntry].lib = NULL; /* failure with open */ + libraryList[libEntry].name[0] = 0; +#if UPLUG_TRACE + DBG((stderr, "uplug_openLibrary(%s,%s) libEntry %d, lib %p\n", libName, u_errorName(*status), libEntry, lib)); +#endif + /* no need to free - just won't increase the count. */ + libraryCount--; + } else { /* is it still there? */ + /* link it in */ + uprv_strncpy(libraryList[libEntry].name,libName,UPLUG_NAME_MAX); + libraryList[libEntry].ref=1; + lib = libraryList[libEntry].lib; + } + + } else { + lib = libraryList[libEntry].lib; + libraryList[libEntry].ref++; + } + return lib; +} + +U_CAPI void U_EXPORT2 +uplug_closeLibrary(void *lib, UErrorCode *status) { + int32_t i; + +#if UPLUG_TRACE + DBG((stderr, "uplug_closeLibrary(%p,%s) list %p\n", lib, u_errorName(*status), (void*)libraryList)); +#endif + if(U_FAILURE(*status)) return; + + for(i=0;i<libraryCount;i++) { + if(lib==libraryList[i].lib) { + if(--(libraryList[i].ref) == 0) { + uprv_dl_close(libraryList[i].lib, status); + libraryCount = uplug_removeEntryAt(libraryList, libraryCount, sizeof(*libraryList), i); + } + return; + } + } + *status = U_INTERNAL_PROGRAM_ERROR; /* could not find the entry! */ +} + +#endif + +static UPlugData pluginList[UPLUG_PLUGIN_INITIAL_COUNT]; +static int32_t pluginCount = 0; + + + + +static int32_t uplug_pluginNumber(UPlugData* d) { + UPlugData *pastPlug = &pluginList[pluginCount]; + if(d<=pluginList) { + return 0; + } else if(d>=pastPlug) { + return pluginCount; + } else { + return (d-pluginList)/sizeof(pluginList[0]); + } +} + + +U_CAPI UPlugData * U_EXPORT2 +uplug_nextPlug(UPlugData *prior) { + if(prior==NULL) { + return pluginList; + } else { + UPlugData *nextPlug = &prior[1]; + UPlugData *pastPlug = &pluginList[pluginCount]; + + if(nextPlug>=pastPlug) { + return NULL; + } else { + return nextPlug; + } + } +} + + + +/** + * Call the plugin with some params + */ +static void uplug_callPlug(UPlugData *plug, UPlugReason reason, UErrorCode *status) { + UPlugTokenReturn token; + if(plug==NULL||U_FAILURE(*status)) { + return; + } + token = (*(plug->entrypoint))(plug, reason, status); + if(token!=UPLUG_TOKEN) { + *status = U_INTERNAL_PROGRAM_ERROR; + } +} + + +static void uplug_unloadPlug(UPlugData *plug, UErrorCode *status) { + if(plug->awaitingLoad) { /* shouldn't happen. Plugin hasn'tbeen loaded yet.*/ + *status = U_INTERNAL_PROGRAM_ERROR; + return; + } + if(U_SUCCESS(plug->pluginStatus)) { + /* Don't unload a plug which has a failing load status - means it didn't actually load. */ + uplug_callPlug(plug, UPLUG_REASON_UNLOAD, status); + } +} + +static void uplug_queryPlug(UPlugData *plug, UErrorCode *status) { + if(!plug->awaitingLoad || !(plug->level == UPLUG_LEVEL_UNKNOWN) ) { /* shouldn't happen. Plugin hasn'tbeen loaded yet.*/ + *status = U_INTERNAL_PROGRAM_ERROR; + return; + } + plug->level = UPLUG_LEVEL_INVALID; + uplug_callPlug(plug, UPLUG_REASON_QUERY, status); + if(U_SUCCESS(*status)) { + if(plug->level == UPLUG_LEVEL_INVALID) { + plug->pluginStatus = U_PLUGIN_DIDNT_SET_LEVEL; + plug->awaitingLoad = FALSE; + } + } else { + plug->pluginStatus = U_INTERNAL_PROGRAM_ERROR; + plug->awaitingLoad = FALSE; + } +} + + +static void uplug_loadPlug(UPlugData *plug, UErrorCode *status) { + if(U_FAILURE(*status)) { + return; + } + if(!plug->awaitingLoad || (plug->level < UPLUG_LEVEL_LOW) ) { /* shouldn't happen. Plugin hasn'tbeen loaded yet.*/ + *status = U_INTERNAL_PROGRAM_ERROR; + return; + } + uplug_callPlug(plug, UPLUG_REASON_LOAD, status); + plug->awaitingLoad = FALSE; + if(!U_SUCCESS(*status)) { + plug->pluginStatus = U_INTERNAL_PROGRAM_ERROR; + } +} + +static UPlugData *uplug_allocateEmptyPlug(UErrorCode *status) +{ + UPlugData *plug = NULL; + + if(U_FAILURE(*status)) { + return NULL; + } + + if(pluginCount == UPLUG_PLUGIN_INITIAL_COUNT) { + *status = U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + + plug = &pluginList[pluginCount++]; + + plug->token = UPLUG_TOKEN; + plug->structSize = sizeof(UPlugData); + plug->name[0]=0; + plug->level = UPLUG_LEVEL_UNKNOWN; /* initialize to null state */ + plug->awaitingLoad = TRUE; + plug->dontUnload = FALSE; + plug->pluginStatus = U_ZERO_ERROR; + plug->libName[0] = 0; + plug->config[0]=0; + plug->sym[0]=0; + plug->lib=NULL; + plug->entrypoint=NULL; + + + return plug; +} + +static UPlugData *uplug_allocatePlug(UPlugEntrypoint *entrypoint, const char *config, void *lib, const char *symName, + UErrorCode *status) { + UPlugData *plug = uplug_allocateEmptyPlug(status); + if(U_FAILURE(*status)) { + return NULL; + } + + if(config!=NULL) { + uprv_strncpy(plug->config, config, UPLUG_NAME_MAX); + } else { + plug->config[0] = 0; + } + + if(symName!=NULL) { + uprv_strncpy(plug->sym, symName, UPLUG_NAME_MAX); + } else { + plug->sym[0] = 0; + } + + plug->entrypoint = entrypoint; + plug->lib = lib; + uplug_queryPlug(plug, status); + + return plug; +} + +static void uplug_deallocatePlug(UPlugData *plug, UErrorCode *status) { + UErrorCode subStatus = U_ZERO_ERROR; + if(!plug->dontUnload) { +#if U_ENABLE_DYLOAD + uplug_closeLibrary(plug->lib, &subStatus); +#endif + } + plug->lib = NULL; + if(U_SUCCESS(*status) && U_FAILURE(subStatus)) { + *status = subStatus; + } + /* shift plugins up and decrement count. */ + if(U_SUCCESS(*status)) { + /* all ok- remove. */ + pluginCount = uplug_removeEntryAt(pluginList, pluginCount, sizeof(plug[0]), uplug_pluginNumber(plug)); + } else { + /* not ok- leave as a message. */ + plug->awaitingLoad=FALSE; + plug->entrypoint=0; + plug->dontUnload=TRUE; + } +} + +static void uplug_doUnloadPlug(UPlugData *plugToRemove, UErrorCode *status) { + if(plugToRemove != NULL) { + uplug_unloadPlug(plugToRemove, status); + uplug_deallocatePlug(plugToRemove, status); + } +} + +U_CAPI void U_EXPORT2 +uplug_removePlug(UPlugData *plug, UErrorCode *status) { + UPlugData *cursor = NULL; + UPlugData *plugToRemove = NULL; + if(U_FAILURE(*status)) return; + + for(cursor=pluginList;cursor!=NULL;) { + if(cursor==plug) { + plugToRemove = plug; + cursor=NULL; + } else { + cursor = uplug_nextPlug(cursor); + } + } + + uplug_doUnloadPlug(plugToRemove, status); +} + + + + +U_CAPI void U_EXPORT2 +uplug_setPlugNoUnload(UPlugData *data, UBool dontUnload) +{ + data->dontUnload = dontUnload; +} + + +U_CAPI void U_EXPORT2 +uplug_setPlugLevel(UPlugData *data, UPlugLevel level) { + data->level = level; +} + + +U_CAPI UPlugLevel U_EXPORT2 +uplug_getPlugLevel(UPlugData *data) { + return data->level; +} + + +U_CAPI void U_EXPORT2 +uplug_setPlugName(UPlugData *data, const char *name) { + uprv_strncpy(data->name, name, UPLUG_NAME_MAX); +} + + +U_CAPI const char * U_EXPORT2 +uplug_getPlugName(UPlugData *data) { + return data->name; +} + + +U_CAPI const char * U_EXPORT2 +uplug_getSymbolName(UPlugData *data) { + return data->sym; +} + +U_CAPI const char * U_EXPORT2 +uplug_getLibraryName(UPlugData *data, UErrorCode *status) { + if(data->libName[0]) { + return data->libName; + } else { +#if U_ENABLE_DYLOAD + return uplug_findLibrary(data->lib, status); +#else + return NULL; +#endif + } +} + +U_CAPI void * U_EXPORT2 +uplug_getLibrary(UPlugData *data) { + return data->lib; +} + +U_CAPI void * U_EXPORT2 +uplug_getContext(UPlugData *data) { + return data->context; +} + + +U_CAPI void U_EXPORT2 +uplug_setContext(UPlugData *data, void *context) { + data->context = context; +} + +U_CAPI const char* U_EXPORT2 +uplug_getConfiguration(UPlugData *data) { + return data->config; +} + +U_CAPI UPlugData* U_EXPORT2 +uplug_getPlugInternal(int32_t n) { + if(n <0 || n >= pluginCount) { + return NULL; + } else { + return &(pluginList[n]); + } +} + + +U_CAPI UErrorCode U_EXPORT2 +uplug_getPlugLoadStatus(UPlugData *plug) { + return plug->pluginStatus; +} + + + + +/** + * Initialize a plugin fron an entrypoint and library - but don't load it. + */ +static UPlugData* uplug_initPlugFromEntrypointAndLibrary(UPlugEntrypoint *entrypoint, const char *config, void *lib, const char *sym, + UErrorCode *status) { + UPlugData *plug = NULL; + + plug = uplug_allocatePlug(entrypoint, config, lib, sym, status); + + if(U_SUCCESS(*status)) { + return plug; + } else { + uplug_deallocatePlug(plug, status); + return NULL; + } +} + +U_CAPI UPlugData* U_EXPORT2 +uplug_loadPlugFromEntrypoint(UPlugEntrypoint *entrypoint, const char *config, UErrorCode *status) { + UPlugData* plug = uplug_initPlugFromEntrypointAndLibrary(entrypoint, config, NULL, NULL, status); + uplug_loadPlug(plug, status); + return plug; +} + +#if U_ENABLE_DYLOAD + +static UPlugData* +uplug_initErrorPlug(const char *libName, const char *sym, const char *config, const char *nameOrError, UErrorCode loadStatus, UErrorCode *status) +{ + UPlugData *plug = uplug_allocateEmptyPlug(status); + if(U_FAILURE(*status)) return NULL; + + plug->pluginStatus = loadStatus; + plug->awaitingLoad = FALSE; /* Won't load. */ + plug->dontUnload = TRUE; /* cannot unload. */ + + if(sym!=NULL) { + uprv_strncpy(plug->sym, sym, UPLUG_NAME_MAX); + } + + if(libName!=NULL) { + uprv_strncpy(plug->libName, libName, UPLUG_NAME_MAX); + } + + if(nameOrError!=NULL) { + uprv_strncpy(plug->name, nameOrError, UPLUG_NAME_MAX); + } + + if(config!=NULL) { + uprv_strncpy(plug->config, config, UPLUG_NAME_MAX); + } + + return plug; +} + +/** + * Fetch a plugin from DLL, and then initialize it from a library- but don't load it. + */ +static UPlugData* +uplug_initPlugFromLibrary(const char *libName, const char *sym, const char *config, UErrorCode *status) { + void *lib = NULL; + UPlugData *plug = NULL; + if(U_FAILURE(*status)) { return NULL; } + lib = uplug_openLibrary(libName, status); + if(lib!=NULL && U_SUCCESS(*status)) { + UPlugEntrypoint *entrypoint = NULL; + entrypoint = (UPlugEntrypoint*)uprv_dlsym_func(lib, sym, status); + + if(entrypoint!=NULL&&U_SUCCESS(*status)) { + plug = uplug_initPlugFromEntrypointAndLibrary(entrypoint, config, lib, sym, status); + if(plug!=NULL&&U_SUCCESS(*status)) { + plug->lib = lib; /* plug takes ownership of library */ + lib = NULL; /* library is now owned by plugin. */ + } + } else { + UErrorCode subStatus = U_ZERO_ERROR; + plug = uplug_initErrorPlug(libName,sym,config,"ERROR: Could not load entrypoint",(lib==NULL)?U_MISSING_RESOURCE_ERROR:*status,&subStatus); + } + if(lib!=NULL) { /* still need to close the lib */ + UErrorCode subStatus = U_ZERO_ERROR; + uplug_closeLibrary(lib, &subStatus); /* don't care here */ + } + } else { + UErrorCode subStatus = U_ZERO_ERROR; + plug = uplug_initErrorPlug(libName,sym,config,"ERROR: could not load library",(lib==NULL)?U_MISSING_RESOURCE_ERROR:*status,&subStatus); + } + return plug; +} + +U_CAPI UPlugData* U_EXPORT2 +uplug_loadPlugFromLibrary(const char *libName, const char *sym, const char *config, UErrorCode *status) { + UPlugData *plug = NULL; + if(U_FAILURE(*status)) { return NULL; } + plug = uplug_initPlugFromLibrary(libName, sym, config, status); + uplug_loadPlug(plug, status); + + return plug; +} + +#endif + +static UPlugLevel gCurrentLevel = UPLUG_LEVEL_LOW; + +U_CAPI UPlugLevel U_EXPORT2 uplug_getCurrentLevel() { + return gCurrentLevel; +} + +static UBool U_CALLCONV uplug_cleanup(void) +{ + int32_t i; + + UPlugData *pluginToRemove; + /* cleanup plugs */ + for(i=0;i<pluginCount;i++) { + UErrorCode subStatus = U_ZERO_ERROR; + pluginToRemove = &pluginList[i]; + /* unload and deallocate */ + uplug_doUnloadPlug(pluginToRemove, &subStatus); + } + /* close other held libs? */ + gCurrentLevel = UPLUG_LEVEL_LOW; + return TRUE; +} + +#if U_ENABLE_DYLOAD + +static void uplug_loadWaitingPlugs(UErrorCode *status) { + int32_t i; + UPlugLevel currentLevel = uplug_getCurrentLevel(); + + if(U_FAILURE(*status)) { + return; + } +#if UPLUG_TRACE + DBG((stderr, "uplug_loadWaitingPlugs() Level: %d\n", currentLevel)); +#endif + /* pass #1: low level plugs */ + for(i=0;i<pluginCount;i++) { + UErrorCode subStatus = U_ZERO_ERROR; + UPlugData *pluginToLoad = &pluginList[i]; + if(pluginToLoad->awaitingLoad) { + if(pluginToLoad->level == UPLUG_LEVEL_LOW) { + if(currentLevel > UPLUG_LEVEL_LOW) { + pluginToLoad->pluginStatus = U_PLUGIN_TOO_HIGH; + } else { + UPlugLevel newLevel; + uplug_loadPlug(pluginToLoad, &subStatus); + newLevel = uplug_getCurrentLevel(); + if(newLevel > currentLevel) { + pluginToLoad->pluginStatus = U_PLUGIN_CHANGED_LEVEL_WARNING; + currentLevel = newLevel; + } + } + pluginToLoad->awaitingLoad = FALSE; + } + } + } + for(i=0;i<pluginCount;i++) { + UErrorCode subStatus = U_ZERO_ERROR; + UPlugData *pluginToLoad = &pluginList[i]; + + if(pluginToLoad->awaitingLoad) { + if(pluginToLoad->level == UPLUG_LEVEL_INVALID) { + pluginToLoad->pluginStatus = U_PLUGIN_DIDNT_SET_LEVEL; + } else if(pluginToLoad->level == UPLUG_LEVEL_UNKNOWN) { + pluginToLoad->pluginStatus = U_INTERNAL_PROGRAM_ERROR; + } else { + uplug_loadPlug(pluginToLoad, &subStatus); + } + pluginToLoad->awaitingLoad = FALSE; + } + } + +#if UPLUG_TRACE + DBG((stderr, " Done Loading Plugs. Level: %d\n", (int32_t)uplug_getCurrentLevel())); +#endif +} + +/* Name of the plugin config file */ +static char plugin_file[2048] = ""; +#endif + +U_CAPI const char* U_EXPORT2 +uplug_getPluginFile() { +#if U_ENABLE_DYLOAD && !UCONFIG_NO_FILE_IO + return plugin_file; +#else + return NULL; +#endif +} + + +// uplug_init() is called first thing from u_init(). + +U_CAPI void U_EXPORT2 +uplug_init(UErrorCode *status) { +#if !U_ENABLE_DYLOAD + (void)status; /* unused */ +#elif !UCONFIG_NO_FILE_IO + CharString plugin_dir; + const char *env = getenv("ICU_PLUGINS"); + + if(U_FAILURE(*status)) return; + if(env != NULL) { + plugin_dir.append(env, -1, *status); + } + if(U_FAILURE(*status)) return; + +#if defined(DEFAULT_ICU_PLUGINS) + if(plugin_dir.isEmpty()) { + plugin_dir.append(DEFAULT_ICU_PLUGINS, -1, *status); + } +#endif + +#if UPLUG_TRACE + DBG((stderr, "ICU_PLUGINS=%s\n", plugin_dir.data())); +#endif + + if(!plugin_dir.isEmpty()) { + FILE *f; + + CharString pluginFile; +#ifdef OS390BATCH +/* There are potentially a lot of ways to implement a plugin directory on OS390/zOS */ +/* Keeping in mind that unauthorized file access is logged, monitored, and enforced */ +/* I've chosen to open a DDNAME if BATCH and leave it alone for (presumably) UNIX */ +/* System Services. Alternative techniques might be allocating a member in */ +/* SYS1.PARMLIB or setting an environment variable "ICU_PLUGIN_PATH" (?). The */ +/* DDNAME can be connected to a file in the HFS if need be. */ + + pluginFile.append("//DD:ICUPLUG", -1, *status); /* JAM 20 Oct 2011 */ +#else + pluginFile.append(plugin_dir, *status); + pluginFile.append(U_FILE_SEP_STRING, -1, *status); + pluginFile.append("icuplugins", -1, *status); + pluginFile.append(U_ICU_VERSION_SHORT, -1, *status); + pluginFile.append(".txt", -1, *status); +#endif + +#if UPLUG_TRACE + DBG((stderr, "status=%s\n", u_errorName(*status))); +#endif + + if(U_FAILURE(*status)) { + return; + } + if((size_t)pluginFile.length() > (sizeof(plugin_file)-1)) { + *status = U_BUFFER_OVERFLOW_ERROR; +#if UPLUG_TRACE + DBG((stderr, "status=%s\n", u_errorName(*status))); +#endif + return; + } + + /* plugin_file is not used for processing - it is only used + so that uplug_getPluginFile() works (i.e. icuinfo) + */ + pluginFile.extract(plugin_file, sizeof(plugin_file), *status); + +#if UPLUG_TRACE + DBG((stderr, "pluginfile= %s len %d/%d\n", plugin_file, (int)strlen(plugin_file), (int)sizeof(plugin_file))); +#endif + +#ifdef __MVS__ + if (iscics()) /* 12 Nov 2011 JAM */ + { + f = NULL; + } + else +#endif + { + f = fopen(pluginFile.data(), "r"); + } + + if(f != NULL) { + char linebuf[1024]; + char *p, *libName=NULL, *symName=NULL, *config=NULL; + int32_t line = 0; + + + while(fgets(linebuf,1023,f)) { + line++; + + if(!*linebuf || *linebuf=='#') { + continue; + } else { + p = linebuf; + while(*p&&isspace((int)*p)) + p++; + if(!*p || *p=='#') continue; + libName = p; + while(*p&&!isspace((int)*p)) { + p++; + } + if(!*p || *p=='#') continue; /* no tab after libname */ + *p=0; /* end of libname */ + p++; + while(*p&&isspace((int)*p)) { + p++; + } + if(!*p||*p=='#') continue; /* no symname after libname +tab */ + symName = p; + while(*p&&!isspace((int)*p)) { + p++; + } + + if(*p) { /* has config */ + *p=0; + ++p; + while(*p&&isspace((int)*p)) { + p++; + } + if(*p) { + config = p; + } + } + + /* chop whitespace at the end of the config */ + if(config!=NULL&&*config!=0) { + p = config+strlen(config); + while(p>config&&isspace((int)*(--p))) { + *p=0; + } + } + + /* OK, we're good. */ + { + UErrorCode subStatus = U_ZERO_ERROR; + UPlugData *plug = uplug_initPlugFromLibrary(libName, symName, config, &subStatus); + if(U_FAILURE(subStatus) && U_SUCCESS(*status)) { + *status = subStatus; + } +#if UPLUG_TRACE + DBG((stderr, "PLUGIN libName=[%s], sym=[%s], config=[%s]\n", libName, symName, config)); + DBG((stderr, " -> %p, %s\n", (void*)plug, u_errorName(subStatus))); +#else + (void)plug; /* unused */ +#endif + } + } + } + fclose(f); + } else { +#if UPLUG_TRACE + DBG((stderr, "Can't open plugin file %s\n", plugin_file)); +#endif + } + } + uplug_loadWaitingPlugs(status); +#endif /* U_ENABLE_DYLOAD */ + gCurrentLevel = UPLUG_LEVEL_HIGH; + ucln_registerCleanup(UCLN_UPLUG, uplug_cleanup); +} + +#endif + + diff --git a/thirdparty/icu4c/common/icuplugimp.h b/thirdparty/icu4c/common/icuplugimp.h new file mode 100644 index 0000000000..9df309204e --- /dev/null +++ b/thirdparty/icu4c/common/icuplugimp.h @@ -0,0 +1,93 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 2009-2015, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* +* FILE NAME : icuplugimp.h +* +* Internal functions for the ICU plugin system +* +* Date Name Description +* 10/29/2009 sl New. +****************************************************************************** +*/ + + +#ifndef ICUPLUGIMP_H +#define ICUPLUGIMP_H + +#include "unicode/icuplug.h" + +#if UCONFIG_ENABLE_PLUGINS + +/*========================*/ +/** @{ Library Manipulation + */ + +/** + * Open a library, adding a reference count if needed. + * @param libName library name to load + * @param status error code + * @return the library pointer, or NULL + * @internal internal use only + */ +U_CAPI void * U_EXPORT2 +uplug_openLibrary(const char *libName, UErrorCode *status); + +/** + * Close a library, if its reference count is 0 + * @param lib the library to close + * @param status error code + * @internal internal use only + */ +U_CAPI void U_EXPORT2 +uplug_closeLibrary(void *lib, UErrorCode *status); + +/** + * Get a library's name, or NULL if not found. + * @param lib the library's name + * @param status error code + * @return the library name, or NULL if not found. + * @internal internal use only + */ +U_CAPI char * U_EXPORT2 +uplug_findLibrary(void *lib, UErrorCode *status); + +/** @} */ + +/*========================*/ +/** {@ ICU Plugin internal interfaces + */ + +/** + * Initialize the plugins + * @param status error result + * @internal - Internal use only. + */ +U_CAPI void U_EXPORT2 +uplug_init(UErrorCode *status); + +/** + * Get raw plug N + * @internal - Internal use only + */ +U_CAPI UPlugData* U_EXPORT2 +uplug_getPlugInternal(int32_t n); + +/** + * Get the name of the plugin file. + * @internal - Internal use only. + */ +U_CAPI const char* U_EXPORT2 +uplug_getPluginFile(void); + +/** @} */ + +#endif + +#endif diff --git a/thirdparty/icu4c/common/loadednormalizer2impl.cpp b/thirdparty/icu4c/common/loadednormalizer2impl.cpp new file mode 100644 index 0000000000..e4b36f1055 --- /dev/null +++ b/thirdparty/icu4c/common/loadednormalizer2impl.cpp @@ -0,0 +1,418 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2014, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* loadednormalizer2impl.cpp +* +* created on: 2014sep03 +* created by: Markus W. Scherer +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_NORMALIZATION + +#include "unicode/udata.h" +#include "unicode/localpointer.h" +#include "unicode/normalizer2.h" +#include "unicode/ucptrie.h" +#include "unicode/unistr.h" +#include "unicode/unorm.h" +#include "cstring.h" +#include "mutex.h" +#include "norm2allmodes.h" +#include "normalizer2impl.h" +#include "uassert.h" +#include "ucln_cmn.h" +#include "uhash.h" + +U_NAMESPACE_BEGIN + +class LoadedNormalizer2Impl : public Normalizer2Impl { +public: + LoadedNormalizer2Impl() : memory(NULL), ownedTrie(NULL) {} + virtual ~LoadedNormalizer2Impl(); + + void load(const char *packageName, const char *name, UErrorCode &errorCode); + +private: + static UBool U_CALLCONV + isAcceptable(void *context, const char *type, const char *name, const UDataInfo *pInfo); + + UDataMemory *memory; + UCPTrie *ownedTrie; +}; + +LoadedNormalizer2Impl::~LoadedNormalizer2Impl() { + udata_close(memory); + ucptrie_close(ownedTrie); +} + +UBool U_CALLCONV +LoadedNormalizer2Impl::isAcceptable(void * /*context*/, + const char * /* type */, const char * /*name*/, + const UDataInfo *pInfo) { + if( + pInfo->size>=20 && + pInfo->isBigEndian==U_IS_BIG_ENDIAN && + pInfo->charsetFamily==U_CHARSET_FAMILY && + pInfo->dataFormat[0]==0x4e && /* dataFormat="Nrm2" */ + pInfo->dataFormat[1]==0x72 && + pInfo->dataFormat[2]==0x6d && + pInfo->dataFormat[3]==0x32 && + pInfo->formatVersion[0]==4 + ) { + // Normalizer2Impl *me=(Normalizer2Impl *)context; + // uprv_memcpy(me->dataVersion, pInfo->dataVersion, 4); + return TRUE; + } else { + return FALSE; + } +} + +void +LoadedNormalizer2Impl::load(const char *packageName, const char *name, UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { + return; + } + memory=udata_openChoice(packageName, "nrm", name, isAcceptable, this, &errorCode); + if(U_FAILURE(errorCode)) { + return; + } + const uint8_t *inBytes=(const uint8_t *)udata_getMemory(memory); + const int32_t *inIndexes=(const int32_t *)inBytes; + int32_t indexesLength=inIndexes[IX_NORM_TRIE_OFFSET]/4; + if(indexesLength<=IX_MIN_LCCC_CP) { + errorCode=U_INVALID_FORMAT_ERROR; // Not enough indexes. + return; + } + + int32_t offset=inIndexes[IX_NORM_TRIE_OFFSET]; + int32_t nextOffset=inIndexes[IX_EXTRA_DATA_OFFSET]; + ownedTrie=ucptrie_openFromBinary(UCPTRIE_TYPE_FAST, UCPTRIE_VALUE_BITS_16, + inBytes+offset, nextOffset-offset, NULL, + &errorCode); + if(U_FAILURE(errorCode)) { + return; + } + + offset=nextOffset; + nextOffset=inIndexes[IX_SMALL_FCD_OFFSET]; + const uint16_t *inExtraData=(const uint16_t *)(inBytes+offset); + + // smallFCD: new in formatVersion 2 + offset=nextOffset; + const uint8_t *inSmallFCD=inBytes+offset; + + init(inIndexes, ownedTrie, inExtraData, inSmallFCD); +} + +// instance cache ---------------------------------------------------------- *** + +Norm2AllModes * +Norm2AllModes::createInstance(const char *packageName, + const char *name, + UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { + return NULL; + } + LoadedNormalizer2Impl *impl=new LoadedNormalizer2Impl; + if(impl==NULL) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + impl->load(packageName, name, errorCode); + return createInstance(impl, errorCode); +} + +U_CDECL_BEGIN +static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup(); +U_CDECL_END + +#if !NORM2_HARDCODE_NFC_DATA +static Norm2AllModes *nfcSingleton; +static icu::UInitOnce nfcInitOnce = U_INITONCE_INITIALIZER; +#endif + +static Norm2AllModes *nfkcSingleton; +static icu::UInitOnce nfkcInitOnce = U_INITONCE_INITIALIZER; + +static Norm2AllModes *nfkc_cfSingleton; +static icu::UInitOnce nfkc_cfInitOnce = U_INITONCE_INITIALIZER; + +static UHashtable *cache=NULL; + +// UInitOnce singleton initialization function +static void U_CALLCONV initSingletons(const char *what, UErrorCode &errorCode) { +#if !NORM2_HARDCODE_NFC_DATA + if (uprv_strcmp(what, "nfc") == 0) { + nfcSingleton = Norm2AllModes::createInstance(NULL, "nfc", errorCode); + } else +#endif + if (uprv_strcmp(what, "nfkc") == 0) { + nfkcSingleton = Norm2AllModes::createInstance(NULL, "nfkc", errorCode); + } else if (uprv_strcmp(what, "nfkc_cf") == 0) { + nfkc_cfSingleton = Norm2AllModes::createInstance(NULL, "nfkc_cf", errorCode); + } else { + UPRV_UNREACHABLE; // Unknown singleton + } + ucln_common_registerCleanup(UCLN_COMMON_LOADED_NORMALIZER2, uprv_loaded_normalizer2_cleanup); +} + +U_CDECL_BEGIN + +static void U_CALLCONV deleteNorm2AllModes(void *allModes) { + delete (Norm2AllModes *)allModes; +} + +static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup() { +#if !NORM2_HARDCODE_NFC_DATA + delete nfcSingleton; + nfcSingleton = NULL; + nfcInitOnce.reset(); +#endif + + delete nfkcSingleton; + nfkcSingleton = NULL; + nfkcInitOnce.reset(); + + delete nfkc_cfSingleton; + nfkc_cfSingleton = NULL; + nfkc_cfInitOnce.reset(); + + uhash_close(cache); + cache=NULL; + return TRUE; +} + +U_CDECL_END + +#if !NORM2_HARDCODE_NFC_DATA +const Norm2AllModes * +Norm2AllModes::getNFCInstance(UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { return NULL; } + umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode); + return nfcSingleton; +} +#endif + +const Norm2AllModes * +Norm2AllModes::getNFKCInstance(UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { return NULL; } + umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode); + return nfkcSingleton; +} + +const Norm2AllModes * +Norm2AllModes::getNFKC_CFInstance(UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { return NULL; } + umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode); + return nfkc_cfSingleton; +} + +#if !NORM2_HARDCODE_NFC_DATA +const Normalizer2 * +Normalizer2::getNFCInstance(UErrorCode &errorCode) { + const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode); + return allModes!=NULL ? &allModes->comp : NULL; +} + +const Normalizer2 * +Normalizer2::getNFDInstance(UErrorCode &errorCode) { + const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode); + return allModes!=NULL ? &allModes->decomp : NULL; +} + +const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) { + const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode); + return allModes!=NULL ? &allModes->fcd : NULL; +} + +const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) { + const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode); + return allModes!=NULL ? &allModes->fcc : NULL; +} + +const Normalizer2Impl * +Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) { + const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode); + return allModes!=NULL ? allModes->impl : NULL; +} +#endif + +const Normalizer2 * +Normalizer2::getNFKCInstance(UErrorCode &errorCode) { + const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode); + return allModes!=NULL ? &allModes->comp : NULL; +} + +const Normalizer2 * +Normalizer2::getNFKDInstance(UErrorCode &errorCode) { + const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode); + return allModes!=NULL ? &allModes->decomp : NULL; +} + +const Normalizer2 * +Normalizer2::getNFKCCasefoldInstance(UErrorCode &errorCode) { + const Norm2AllModes *allModes=Norm2AllModes::getNFKC_CFInstance(errorCode); + return allModes!=NULL ? &allModes->comp : NULL; +} + +const Normalizer2 * +Normalizer2::getInstance(const char *packageName, + const char *name, + UNormalization2Mode mode, + UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { + return NULL; + } + if(name==NULL || *name==0) { + errorCode=U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + const Norm2AllModes *allModes=NULL; + if(packageName==NULL) { + if(0==uprv_strcmp(name, "nfc")) { + allModes=Norm2AllModes::getNFCInstance(errorCode); + } else if(0==uprv_strcmp(name, "nfkc")) { + allModes=Norm2AllModes::getNFKCInstance(errorCode); + } else if(0==uprv_strcmp(name, "nfkc_cf")) { + allModes=Norm2AllModes::getNFKC_CFInstance(errorCode); + } + } + if(allModes==NULL && U_SUCCESS(errorCode)) { + { + Mutex lock; + if(cache!=NULL) { + allModes=(Norm2AllModes *)uhash_get(cache, name); + } + } + if(allModes==NULL) { + ucln_common_registerCleanup(UCLN_COMMON_LOADED_NORMALIZER2, uprv_loaded_normalizer2_cleanup); + LocalPointer<Norm2AllModes> localAllModes( + Norm2AllModes::createInstance(packageName, name, errorCode)); + if(U_SUCCESS(errorCode)) { + Mutex lock; + if(cache==NULL) { + cache=uhash_open(uhash_hashChars, uhash_compareChars, NULL, &errorCode); + if(U_FAILURE(errorCode)) { + return NULL; + } + uhash_setKeyDeleter(cache, uprv_free); + uhash_setValueDeleter(cache, deleteNorm2AllModes); + } + void *temp=uhash_get(cache, name); + if(temp==NULL) { + int32_t keyLength= static_cast<int32_t>(uprv_strlen(name)+1); + char *nameCopy=(char *)uprv_malloc(keyLength); + if(nameCopy==NULL) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + uprv_memcpy(nameCopy, name, keyLength); + allModes=localAllModes.getAlias(); + uhash_put(cache, nameCopy, localAllModes.orphan(), &errorCode); + } else { + // race condition + allModes=(Norm2AllModes *)temp; + } + } + } + } + if(allModes!=NULL && U_SUCCESS(errorCode)) { + switch(mode) { + case UNORM2_COMPOSE: + return &allModes->comp; + case UNORM2_DECOMPOSE: + return &allModes->decomp; + case UNORM2_FCD: + return &allModes->fcd; + case UNORM2_COMPOSE_CONTIGUOUS: + return &allModes->fcc; + default: + break; // do nothing + } + } + return NULL; +} + +const Normalizer2 * +Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { + return NULL; + } + switch(mode) { + case UNORM_NFD: + return Normalizer2::getNFDInstance(errorCode); + case UNORM_NFKD: + return Normalizer2::getNFKDInstance(errorCode); + case UNORM_NFC: + return Normalizer2::getNFCInstance(errorCode); + case UNORM_NFKC: + return Normalizer2::getNFKCInstance(errorCode); + case UNORM_FCD: + return getFCDInstance(errorCode); + default: // UNORM_NONE + return getNoopInstance(errorCode); + } +} + +const Normalizer2Impl * +Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) { + const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode); + return allModes!=NULL ? allModes->impl : NULL; +} + +const Normalizer2Impl * +Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) { + const Norm2AllModes *allModes=Norm2AllModes::getNFKC_CFInstance(errorCode); + return allModes!=NULL ? allModes->impl : NULL; +} + +U_NAMESPACE_END + +// C API ------------------------------------------------------------------- *** + +U_NAMESPACE_USE + +U_CAPI const UNormalizer2 * U_EXPORT2 +unorm2_getNFKCInstance(UErrorCode *pErrorCode) { + return (const UNormalizer2 *)Normalizer2::getNFKCInstance(*pErrorCode); +} + +U_CAPI const UNormalizer2 * U_EXPORT2 +unorm2_getNFKDInstance(UErrorCode *pErrorCode) { + return (const UNormalizer2 *)Normalizer2::getNFKDInstance(*pErrorCode); +} + +U_CAPI const UNormalizer2 * U_EXPORT2 +unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode) { + return (const UNormalizer2 *)Normalizer2::getNFKCCasefoldInstance(*pErrorCode); +} + +U_CAPI const UNormalizer2 * U_EXPORT2 +unorm2_getInstance(const char *packageName, + const char *name, + UNormalization2Mode mode, + UErrorCode *pErrorCode) { + return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode); +} + +U_CFUNC UNormalizationCheckResult +unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) { + if(mode<=UNORM_NONE || UNORM_FCD<=mode) { + return UNORM_YES; + } + UErrorCode errorCode=U_ZERO_ERROR; + const Normalizer2 *norm2=Normalizer2Factory::getInstance(mode, errorCode); + if(U_SUCCESS(errorCode)) { + return ((const Normalizer2WithImpl *)norm2)->getQuickCheck(c); + } else { + return UNORM_MAYBE; + } +} + +#endif // !UCONFIG_NO_NORMALIZATION diff --git a/thirdparty/icu4c/common/localebuilder.cpp b/thirdparty/icu4c/common/localebuilder.cpp new file mode 100644 index 0000000000..1dd8131e58 --- /dev/null +++ b/thirdparty/icu4c/common/localebuilder.cpp @@ -0,0 +1,468 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include <utility> + +#include "bytesinkutil.h" // CharStringByteSink +#include "charstr.h" +#include "cstring.h" +#include "ulocimp.h" +#include "unicode/localebuilder.h" +#include "unicode/locid.h" + +U_NAMESPACE_BEGIN + +#define UPRV_ISDIGIT(c) (((c) >= '0') && ((c) <= '9')) +#define UPRV_ISALPHANUM(c) (uprv_isASCIILetter(c) || UPRV_ISDIGIT(c) ) + +const char* kAttributeKey = "attribute"; + +static bool _isExtensionSubtags(char key, const char* s, int32_t len) { + switch (uprv_tolower(key)) { + case 'u': + return ultag_isUnicodeExtensionSubtags(s, len); + case 't': + return ultag_isTransformedExtensionSubtags(s, len); + case 'x': + return ultag_isPrivateuseValueSubtags(s, len); + default: + return ultag_isExtensionSubtags(s, len); + } +} + +LocaleBuilder::LocaleBuilder() : UObject(), status_(U_ZERO_ERROR), language_(), + script_(), region_(), variant_(nullptr), extensions_(nullptr) +{ + language_[0] = 0; + script_[0] = 0; + region_[0] = 0; +} + +LocaleBuilder::~LocaleBuilder() +{ + delete variant_; + delete extensions_; +} + +LocaleBuilder& LocaleBuilder::setLocale(const Locale& locale) +{ + clear(); + setLanguage(locale.getLanguage()); + setScript(locale.getScript()); + setRegion(locale.getCountry()); + setVariant(locale.getVariant()); + extensions_ = locale.clone(); + if (extensions_ == nullptr) { + status_ = U_MEMORY_ALLOCATION_ERROR; + } + return *this; +} + +LocaleBuilder& LocaleBuilder::setLanguageTag(StringPiece tag) +{ + Locale l = Locale::forLanguageTag(tag, status_); + if (U_FAILURE(status_)) { return *this; } + // Because setLocale will reset status_ we need to return + // first if we have error in forLanguageTag. + setLocale(l); + return *this; +} + +static void setField(StringPiece input, char* dest, UErrorCode& errorCode, + UBool (*test)(const char*, int32_t)) { + if (U_FAILURE(errorCode)) { return; } + if (input.empty()) { + dest[0] = '\0'; + } else if (test(input.data(), input.length())) { + uprv_memcpy(dest, input.data(), input.length()); + dest[input.length()] = '\0'; + } else { + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + } +} + +LocaleBuilder& LocaleBuilder::setLanguage(StringPiece language) +{ + setField(language, language_, status_, &ultag_isLanguageSubtag); + return *this; +} + +LocaleBuilder& LocaleBuilder::setScript(StringPiece script) +{ + setField(script, script_, status_, &ultag_isScriptSubtag); + return *this; +} + +LocaleBuilder& LocaleBuilder::setRegion(StringPiece region) +{ + setField(region, region_, status_, &ultag_isRegionSubtag); + return *this; +} + +static void transform(char* data, int32_t len) { + for (int32_t i = 0; i < len; i++, data++) { + if (*data == '_') { + *data = '-'; + } else { + *data = uprv_tolower(*data); + } + } +} + +LocaleBuilder& LocaleBuilder::setVariant(StringPiece variant) +{ + if (U_FAILURE(status_)) { return *this; } + if (variant.empty()) { + delete variant_; + variant_ = nullptr; + return *this; + } + CharString* new_variant = new CharString(variant, status_); + if (U_FAILURE(status_)) { return *this; } + if (new_variant == nullptr) { + status_ = U_MEMORY_ALLOCATION_ERROR; + return *this; + } + transform(new_variant->data(), new_variant->length()); + if (!ultag_isVariantSubtags(new_variant->data(), new_variant->length())) { + delete new_variant; + status_ = U_ILLEGAL_ARGUMENT_ERROR; + return *this; + } + delete variant_; + variant_ = new_variant; + return *this; +} + +static bool +_isKeywordValue(const char* key, const char* value, int32_t value_len) +{ + if (key[1] == '\0') { + // one char key + return (UPRV_ISALPHANUM(uprv_tolower(key[0])) && + _isExtensionSubtags(key[0], value, value_len)); + } else if (uprv_strcmp(key, kAttributeKey) == 0) { + // unicode attributes + return ultag_isUnicodeLocaleAttributes(value, value_len); + } + // otherwise: unicode extension value + // We need to convert from legacy key/value to unicode + // key/value + const char* unicode_locale_key = uloc_toUnicodeLocaleKey(key); + const char* unicode_locale_type = uloc_toUnicodeLocaleType(key, value); + + return unicode_locale_key && unicode_locale_type && + ultag_isUnicodeLocaleKey(unicode_locale_key, -1) && + ultag_isUnicodeLocaleType(unicode_locale_type, -1); +} + +static void +_copyExtensions(const Locale& from, icu::StringEnumeration *keywords, + Locale& to, bool validate, UErrorCode& errorCode) +{ + if (U_FAILURE(errorCode)) { return; } + LocalPointer<icu::StringEnumeration> ownedKeywords; + if (keywords == nullptr) { + ownedKeywords.adoptInstead(from.createKeywords(errorCode)); + if (U_FAILURE(errorCode) || ownedKeywords.isNull()) { return; } + keywords = ownedKeywords.getAlias(); + } + const char* key; + while ((key = keywords->next(nullptr, errorCode)) != nullptr) { + CharString value; + CharStringByteSink sink(&value); + from.getKeywordValue(key, sink, errorCode); + if (U_FAILURE(errorCode)) { return; } + if (uprv_strcmp(key, kAttributeKey) == 0) { + transform(value.data(), value.length()); + } + if (validate && + !_isKeywordValue(key, value.data(), value.length())) { + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + to.setKeywordValue(key, value.data(), errorCode); + if (U_FAILURE(errorCode)) { return; } + } +} + +void static +_clearUAttributesAndKeyType(Locale& locale, UErrorCode& errorCode) +{ + // Clear Unicode attributes + locale.setKeywordValue(kAttributeKey, "", errorCode); + + // Clear all Unicode keyword values + LocalPointer<icu::StringEnumeration> iter(locale.createUnicodeKeywords(errorCode)); + if (U_FAILURE(errorCode) || iter.isNull()) { return; } + const char* key; + while ((key = iter->next(nullptr, errorCode)) != nullptr) { + locale.setUnicodeKeywordValue(key, nullptr, errorCode); + } +} + +static void +_setUnicodeExtensions(Locale& locale, const CharString& value, UErrorCode& errorCode) +{ + // Add the unicode extensions to extensions_ + CharString locale_str("und-u-", errorCode); + locale_str.append(value, errorCode); + _copyExtensions( + Locale::forLanguageTag(locale_str.data(), errorCode), nullptr, + locale, false, errorCode); +} + +LocaleBuilder& LocaleBuilder::setExtension(char key, StringPiece value) +{ + if (U_FAILURE(status_)) { return *this; } + if (!UPRV_ISALPHANUM(key)) { + status_ = U_ILLEGAL_ARGUMENT_ERROR; + return *this; + } + CharString value_str(value, status_); + if (U_FAILURE(status_)) { return *this; } + transform(value_str.data(), value_str.length()); + if (!value_str.isEmpty() && + !_isExtensionSubtags(key, value_str.data(), value_str.length())) { + status_ = U_ILLEGAL_ARGUMENT_ERROR; + return *this; + } + if (extensions_ == nullptr) { + extensions_ = new Locale(); + if (extensions_ == nullptr) { + status_ = U_MEMORY_ALLOCATION_ERROR; + return *this; + } + } + if (uprv_tolower(key) != 'u') { + // for t, x and others extension. + extensions_->setKeywordValue(StringPiece(&key, 1), value_str.data(), + status_); + return *this; + } + _clearUAttributesAndKeyType(*extensions_, status_); + if (U_FAILURE(status_)) { return *this; } + if (!value.empty()) { + _setUnicodeExtensions(*extensions_, value_str, status_); + } + return *this; +} + +LocaleBuilder& LocaleBuilder::setUnicodeLocaleKeyword( + StringPiece key, StringPiece type) +{ + if (U_FAILURE(status_)) { return *this; } + if (!ultag_isUnicodeLocaleKey(key.data(), key.length()) || + (!type.empty() && + !ultag_isUnicodeLocaleType(type.data(), type.length()))) { + status_ = U_ILLEGAL_ARGUMENT_ERROR; + return *this; + } + if (extensions_ == nullptr) { + extensions_ = new Locale(); + } + if (extensions_ == nullptr) { + status_ = U_MEMORY_ALLOCATION_ERROR; + return *this; + } + extensions_->setUnicodeKeywordValue(key, type, status_); + return *this; +} + +LocaleBuilder& LocaleBuilder::addUnicodeLocaleAttribute( + StringPiece value) +{ + CharString value_str(value, status_); + if (U_FAILURE(status_)) { return *this; } + transform(value_str.data(), value_str.length()); + if (!ultag_isUnicodeLocaleAttribute(value_str.data(), value_str.length())) { + status_ = U_ILLEGAL_ARGUMENT_ERROR; + return *this; + } + if (extensions_ == nullptr) { + extensions_ = new Locale(); + if (extensions_ == nullptr) { + status_ = U_MEMORY_ALLOCATION_ERROR; + return *this; + } + extensions_->setKeywordValue(kAttributeKey, value_str.data(), status_); + return *this; + } + + CharString attributes; + CharStringByteSink sink(&attributes); + UErrorCode localErrorCode = U_ZERO_ERROR; + extensions_->getKeywordValue(kAttributeKey, sink, localErrorCode); + if (U_FAILURE(localErrorCode)) { + CharString new_attributes(value_str.data(), status_); + // No attributes, set the attribute. + extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_); + return *this; + } + + transform(attributes.data(),attributes.length()); + const char* start = attributes.data(); + const char* limit = attributes.data() + attributes.length(); + CharString new_attributes; + bool inserted = false; + while (start < limit) { + if (!inserted) { + int cmp = uprv_strcmp(start, value_str.data()); + if (cmp == 0) { return *this; } // Found it in attributes: Just return + if (cmp > 0) { + if (!new_attributes.isEmpty()) new_attributes.append('_', status_); + new_attributes.append(value_str.data(), status_); + inserted = true; + } + } + if (!new_attributes.isEmpty()) { + new_attributes.append('_', status_); + } + new_attributes.append(start, status_); + start += uprv_strlen(start) + 1; + } + if (!inserted) { + if (!new_attributes.isEmpty()) { + new_attributes.append('_', status_); + } + new_attributes.append(value_str.data(), status_); + } + // Not yet in the attributes, set the attribute. + extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_); + return *this; +} + +LocaleBuilder& LocaleBuilder::removeUnicodeLocaleAttribute( + StringPiece value) +{ + CharString value_str(value, status_); + if (U_FAILURE(status_)) { return *this; } + transform(value_str.data(), value_str.length()); + if (!ultag_isUnicodeLocaleAttribute(value_str.data(), value_str.length())) { + status_ = U_ILLEGAL_ARGUMENT_ERROR; + return *this; + } + if (extensions_ == nullptr) { return *this; } + UErrorCode localErrorCode = U_ZERO_ERROR; + CharString attributes; + CharStringByteSink sink(&attributes); + extensions_->getKeywordValue(kAttributeKey, sink, localErrorCode); + // get failure, just return + if (U_FAILURE(localErrorCode)) { return *this; } + // Do not have any attributes, just return. + if (attributes.isEmpty()) { return *this; } + + char* p = attributes.data(); + // Replace null terminiator in place for _ and - so later + // we can use uprv_strcmp to compare. + for (int32_t i = 0; i < attributes.length(); i++, p++) { + *p = (*p == '_' || *p == '-') ? '\0' : uprv_tolower(*p); + } + + const char* start = attributes.data(); + const char* limit = attributes.data() + attributes.length(); + CharString new_attributes; + bool found = false; + while (start < limit) { + if (uprv_strcmp(start, value_str.data()) == 0) { + found = true; + } else { + if (!new_attributes.isEmpty()) { + new_attributes.append('_', status_); + } + new_attributes.append(start, status_); + } + start += uprv_strlen(start) + 1; + } + // Found the value in attributes, set the attribute. + if (found) { + extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_); + } + return *this; +} + +LocaleBuilder& LocaleBuilder::clear() +{ + status_ = U_ZERO_ERROR; + language_[0] = 0; + script_[0] = 0; + region_[0] = 0; + delete variant_; + variant_ = nullptr; + clearExtensions(); + return *this; +} + +LocaleBuilder& LocaleBuilder::clearExtensions() +{ + delete extensions_; + extensions_ = nullptr; + return *this; +} + +Locale makeBogusLocale() { + Locale bogus; + bogus.setToBogus(); + return bogus; +} + +void LocaleBuilder::copyExtensionsFrom(const Locale& src, UErrorCode& errorCode) +{ + if (U_FAILURE(errorCode)) { return; } + LocalPointer<icu::StringEnumeration> keywords(src.createKeywords(errorCode)); + if (U_FAILURE(errorCode) || keywords.isNull() || keywords->count(errorCode) == 0) { + // Error, or no extensions to copy. + return; + } + if (extensions_ == nullptr) { + extensions_ = new Locale(); + if (extensions_ == nullptr) { + status_ = U_MEMORY_ALLOCATION_ERROR; + return; + } + } + _copyExtensions(src, keywords.getAlias(), *extensions_, false, errorCode); +} + +Locale LocaleBuilder::build(UErrorCode& errorCode) +{ + if (U_FAILURE(errorCode)) { + return makeBogusLocale(); + } + if (U_FAILURE(status_)) { + errorCode = status_; + return makeBogusLocale(); + } + CharString locale_str(language_, errorCode); + if (uprv_strlen(script_) > 0) { + locale_str.append('-', errorCode).append(StringPiece(script_), errorCode); + } + if (uprv_strlen(region_) > 0) { + locale_str.append('-', errorCode).append(StringPiece(region_), errorCode); + } + if (variant_ != nullptr) { + locale_str.append('-', errorCode).append(StringPiece(variant_->data()), errorCode); + } + if (U_FAILURE(errorCode)) { + return makeBogusLocale(); + } + Locale product(locale_str.data()); + if (extensions_ != nullptr) { + _copyExtensions(*extensions_, nullptr, product, true, errorCode); + } + if (U_FAILURE(errorCode)) { + return makeBogusLocale(); + } + return product; +} + +UBool LocaleBuilder::copyErrorTo(UErrorCode &outErrorCode) const { + if (U_FAILURE(outErrorCode)) { + // Do not overwrite the older error code + return TRUE; + } + outErrorCode = status_; + return U_FAILURE(outErrorCode); +} + +U_NAMESPACE_END diff --git a/thirdparty/icu4c/common/localematcher.cpp b/thirdparty/icu4c/common/localematcher.cpp new file mode 100644 index 0000000000..5795cbf87e --- /dev/null +++ b/thirdparty/icu4c/common/localematcher.cpp @@ -0,0 +1,846 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// localematcher.cpp +// created: 2019may08 Markus W. Scherer + +#include "unicode/utypes.h" +#include "unicode/localebuilder.h" +#include "unicode/localematcher.h" +#include "unicode/locid.h" +#include "unicode/stringpiece.h" +#include "unicode/uloc.h" +#include "unicode/uobject.h" +#include "cstring.h" +#include "localeprioritylist.h" +#include "loclikelysubtags.h" +#include "locdistance.h" +#include "lsr.h" +#include "uassert.h" +#include "uhash.h" +#include "ustr_imp.h" +#include "uvector.h" + +#define UND_LSR LSR("und", "", "", LSR::EXPLICIT_LSR) + +/** + * Indicator for the lifetime of desired-locale objects passed into the LocaleMatcher. + * + * @draft ICU 65 + */ +enum ULocMatchLifetime { + /** + * Locale objects are temporary. + * The matcher will make a copy of a locale that will be used beyond one function call. + * + * @draft ICU 65 + */ + ULOCMATCH_TEMPORARY_LOCALES, + /** + * Locale objects are stored at least as long as the matcher is used. + * The matcher will keep only a pointer to a locale that will be used beyond one function call, + * avoiding a copy. + * + * @draft ICU 65 + */ + ULOCMATCH_STORED_LOCALES // TODO: permanent? cached? clone? +}; +#ifndef U_IN_DOXYGEN +typedef enum ULocMatchLifetime ULocMatchLifetime; +#endif + +U_NAMESPACE_BEGIN + +LocaleMatcher::Result::Result(LocaleMatcher::Result &&src) U_NOEXCEPT : + desiredLocale(src.desiredLocale), + supportedLocale(src.supportedLocale), + desiredIndex(src.desiredIndex), + supportedIndex(src.supportedIndex), + desiredIsOwned(src.desiredIsOwned) { + if (desiredIsOwned) { + src.desiredLocale = nullptr; + src.desiredIndex = -1; + src.desiredIsOwned = FALSE; + } +} + +LocaleMatcher::Result::~Result() { + if (desiredIsOwned) { + delete desiredLocale; + } +} + +LocaleMatcher::Result &LocaleMatcher::Result::operator=(LocaleMatcher::Result &&src) U_NOEXCEPT { + this->~Result(); + + desiredLocale = src.desiredLocale; + supportedLocale = src.supportedLocale; + desiredIndex = src.desiredIndex; + supportedIndex = src.supportedIndex; + desiredIsOwned = src.desiredIsOwned; + + if (desiredIsOwned) { + src.desiredLocale = nullptr; + src.desiredIndex = -1; + src.desiredIsOwned = FALSE; + } + return *this; +} + +Locale LocaleMatcher::Result::makeResolvedLocale(UErrorCode &errorCode) const { + if (U_FAILURE(errorCode) || supportedLocale == nullptr) { + return Locale::getRoot(); + } + const Locale *bestDesired = getDesiredLocale(); + if (bestDesired == nullptr || *supportedLocale == *bestDesired) { + return *supportedLocale; + } + LocaleBuilder b; + b.setLocale(*supportedLocale); + + // Copy the region from bestDesired, if there is one. + const char *region = bestDesired->getCountry(); + if (*region != 0) { + b.setRegion(region); + } + + // Copy the variants from bestDesired, if there are any. + // Note that this will override any supportedLocale variants. + // For example, "sco-ulster-fonipa" + "...-fonupa" => "sco-fonupa" (replacing ulster). + const char *variants = bestDesired->getVariant(); + if (*variants != 0) { + b.setVariant(variants); + } + + // Copy the extensions from bestDesired, if there are any. + // C++ note: The following note, copied from Java, may not be true, + // as long as C++ copies by legacy ICU keyword, not by extension singleton. + // Note that this will override any supportedLocale extensions. + // For example, "th-u-nu-latn-ca-buddhist" + "...-u-nu-native" => "th-u-nu-native" + // (replacing calendar). + b.copyExtensionsFrom(*bestDesired, errorCode); + return b.build(errorCode); +} + +LocaleMatcher::Builder::Builder(LocaleMatcher::Builder &&src) U_NOEXCEPT : + errorCode_(src.errorCode_), + supportedLocales_(src.supportedLocales_), + thresholdDistance_(src.thresholdDistance_), + demotion_(src.demotion_), + defaultLocale_(src.defaultLocale_), + withDefault_(src.withDefault_), + favor_(src.favor_), + direction_(src.direction_) { + src.supportedLocales_ = nullptr; + src.defaultLocale_ = nullptr; +} + +LocaleMatcher::Builder::~Builder() { + delete supportedLocales_; + delete defaultLocale_; + delete maxDistanceDesired_; + delete maxDistanceSupported_; +} + +LocaleMatcher::Builder &LocaleMatcher::Builder::operator=(LocaleMatcher::Builder &&src) U_NOEXCEPT { + this->~Builder(); + + errorCode_ = src.errorCode_; + supportedLocales_ = src.supportedLocales_; + thresholdDistance_ = src.thresholdDistance_; + demotion_ = src.demotion_; + defaultLocale_ = src.defaultLocale_; + withDefault_ = src.withDefault_, + favor_ = src.favor_; + direction_ = src.direction_; + + src.supportedLocales_ = nullptr; + src.defaultLocale_ = nullptr; + return *this; +} + +void LocaleMatcher::Builder::clearSupportedLocales() { + if (supportedLocales_ != nullptr) { + supportedLocales_->removeAllElements(); + } +} + +bool LocaleMatcher::Builder::ensureSupportedLocaleVector() { + if (U_FAILURE(errorCode_)) { return false; } + if (supportedLocales_ != nullptr) { return true; } + supportedLocales_ = new UVector(uprv_deleteUObject, nullptr, errorCode_); + if (U_FAILURE(errorCode_)) { return false; } + if (supportedLocales_ == nullptr) { + errorCode_ = U_MEMORY_ALLOCATION_ERROR; + return false; + } + return true; +} + +LocaleMatcher::Builder &LocaleMatcher::Builder::setSupportedLocalesFromListString( + StringPiece locales) { + LocalePriorityList list(locales, errorCode_); + if (U_FAILURE(errorCode_)) { return *this; } + clearSupportedLocales(); + if (!ensureSupportedLocaleVector()) { return *this; } + int32_t length = list.getLengthIncludingRemoved(); + for (int32_t i = 0; i < length; ++i) { + Locale *locale = list.orphanLocaleAt(i); + if (locale == nullptr) { continue; } + supportedLocales_->addElement(locale, errorCode_); + if (U_FAILURE(errorCode_)) { + delete locale; + break; + } + } + return *this; +} + +LocaleMatcher::Builder &LocaleMatcher::Builder::setSupportedLocales(Locale::Iterator &locales) { + if (U_FAILURE(errorCode_)) { return *this; } + clearSupportedLocales(); + if (!ensureSupportedLocaleVector()) { return *this; } + while (locales.hasNext()) { + const Locale &locale = locales.next(); + Locale *clone = locale.clone(); + if (clone == nullptr) { + errorCode_ = U_MEMORY_ALLOCATION_ERROR; + break; + } + supportedLocales_->addElement(clone, errorCode_); + if (U_FAILURE(errorCode_)) { + delete clone; + break; + } + } + return *this; +} + +LocaleMatcher::Builder &LocaleMatcher::Builder::addSupportedLocale(const Locale &locale) { + if (!ensureSupportedLocaleVector()) { return *this; } + Locale *clone = locale.clone(); + if (clone == nullptr) { + errorCode_ = U_MEMORY_ALLOCATION_ERROR; + return *this; + } + supportedLocales_->addElement(clone, errorCode_); + if (U_FAILURE(errorCode_)) { + delete clone; + } + return *this; +} + +LocaleMatcher::Builder &LocaleMatcher::Builder::setNoDefaultLocale() { + if (U_FAILURE(errorCode_)) { return *this; } + delete defaultLocale_; + defaultLocale_ = nullptr; + withDefault_ = false; + return *this; +} + +LocaleMatcher::Builder &LocaleMatcher::Builder::setDefaultLocale(const Locale *defaultLocale) { + if (U_FAILURE(errorCode_)) { return *this; } + Locale *clone = nullptr; + if (defaultLocale != nullptr) { + clone = defaultLocale->clone(); + if (clone == nullptr) { + errorCode_ = U_MEMORY_ALLOCATION_ERROR; + return *this; + } + } + delete defaultLocale_; + defaultLocale_ = clone; + withDefault_ = true; + return *this; +} + +LocaleMatcher::Builder &LocaleMatcher::Builder::setFavorSubtag(ULocMatchFavorSubtag subtag) { + if (U_FAILURE(errorCode_)) { return *this; } + favor_ = subtag; + return *this; +} + +LocaleMatcher::Builder &LocaleMatcher::Builder::setDemotionPerDesiredLocale(ULocMatchDemotion demotion) { + if (U_FAILURE(errorCode_)) { return *this; } + demotion_ = demotion; + return *this; +} + +LocaleMatcher::Builder &LocaleMatcher::Builder::setMaxDistance(const Locale &desired, + const Locale &supported) { + if (U_FAILURE(errorCode_)) { return *this; } + Locale *desiredClone = desired.clone(); + Locale *supportedClone = supported.clone(); + if (desiredClone == nullptr || supportedClone == nullptr) { + delete desiredClone; // in case only one could not be allocated + delete supportedClone; + errorCode_ = U_MEMORY_ALLOCATION_ERROR; + return *this; + } + delete maxDistanceDesired_; + delete maxDistanceSupported_; + maxDistanceDesired_ = desiredClone; + maxDistanceSupported_ = supportedClone; + return *this; +} + +#if 0 +/** + * <i>Internal only!</i> + * + * @param thresholdDistance the thresholdDistance to set, with -1 = default + * @return this Builder object + * @internal + * @deprecated This API is ICU internal only. + */ +@Deprecated +LocaleMatcher::Builder &LocaleMatcher::Builder::internalSetThresholdDistance(int32_t thresholdDistance) { + if (U_FAILURE(errorCode_)) { return *this; } + if (thresholdDistance > 100) { + thresholdDistance = 100; + } + thresholdDistance_ = thresholdDistance; + return *this; +} +#endif + +UBool LocaleMatcher::Builder::copyErrorTo(UErrorCode &outErrorCode) const { + if (U_FAILURE(outErrorCode)) { return TRUE; } + if (U_SUCCESS(errorCode_)) { return FALSE; } + outErrorCode = errorCode_; + return TRUE; +} + +LocaleMatcher LocaleMatcher::Builder::build(UErrorCode &errorCode) const { + if (U_SUCCESS(errorCode) && U_FAILURE(errorCode_)) { + errorCode = errorCode_; + } + return LocaleMatcher(*this, errorCode); +} + +namespace { + +LSR getMaximalLsrOrUnd(const XLikelySubtags &likelySubtags, const Locale &locale, + UErrorCode &errorCode) { + if (U_FAILURE(errorCode) || locale.isBogus() || *locale.getName() == 0 /* "und" */) { + return UND_LSR; + } else { + return likelySubtags.makeMaximizedLsrFrom(locale, errorCode); + } +} + +int32_t hashLSR(const UHashTok token) { + const LSR *lsr = static_cast<const LSR *>(token.pointer); + return lsr->hashCode; +} + +UBool compareLSRs(const UHashTok t1, const UHashTok t2) { + const LSR *lsr1 = static_cast<const LSR *>(t1.pointer); + const LSR *lsr2 = static_cast<const LSR *>(t2.pointer); + return *lsr1 == *lsr2; +} + +} // namespace + +int32_t LocaleMatcher::putIfAbsent(const LSR &lsr, int32_t i, int32_t suppLength, + UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { return suppLength; } + int32_t index = uhash_geti(supportedLsrToIndex, &lsr); + if (index == 0) { + uhash_puti(supportedLsrToIndex, const_cast<LSR *>(&lsr), i + 1, &errorCode); + if (U_SUCCESS(errorCode)) { + supportedLSRs[suppLength] = &lsr; + supportedIndexes[suppLength++] = i; + } + } + return suppLength; +} + +LocaleMatcher::LocaleMatcher(const Builder &builder, UErrorCode &errorCode) : + likelySubtags(*XLikelySubtags::getSingleton(errorCode)), + localeDistance(*LocaleDistance::getSingleton(errorCode)), + thresholdDistance(builder.thresholdDistance_), + demotionPerDesiredLocale(0), + favorSubtag(builder.favor_), + direction(builder.direction_), + supportedLocales(nullptr), lsrs(nullptr), supportedLocalesLength(0), + supportedLsrToIndex(nullptr), + supportedLSRs(nullptr), supportedIndexes(nullptr), supportedLSRsLength(0), + ownedDefaultLocale(nullptr), defaultLocale(nullptr) { + if (U_FAILURE(errorCode)) { return; } + const Locale *def = builder.defaultLocale_; + LSR builderDefaultLSR; + const LSR *defLSR = nullptr; + if (def != nullptr) { + ownedDefaultLocale = def->clone(); + if (ownedDefaultLocale == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return; + } + def = ownedDefaultLocale; + builderDefaultLSR = getMaximalLsrOrUnd(likelySubtags, *def, errorCode); + if (U_FAILURE(errorCode)) { return; } + defLSR = &builderDefaultLSR; + } + supportedLocalesLength = builder.supportedLocales_ != nullptr ? + builder.supportedLocales_->size() : 0; + if (supportedLocalesLength > 0) { + // Store the supported locales in input order, + // so that when different types are used (e.g., language tag strings) + // we can return those by parallel index. + supportedLocales = static_cast<const Locale **>( + uprv_malloc(supportedLocalesLength * sizeof(const Locale *))); + // Supported LRSs in input order. + // In C++, we store these permanently to simplify ownership management + // in the hash tables. Duplicate LSRs (if any) are unused overhead. + lsrs = new LSR[supportedLocalesLength]; + if (supportedLocales == nullptr || lsrs == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return; + } + // If the constructor fails partway, we need null pointers for destructibility. + uprv_memset(supportedLocales, 0, supportedLocalesLength * sizeof(const Locale *)); + for (int32_t i = 0; i < supportedLocalesLength; ++i) { + const Locale &locale = *static_cast<Locale *>(builder.supportedLocales_->elementAt(i)); + supportedLocales[i] = locale.clone(); + if (supportedLocales[i] == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return; + } + const Locale &supportedLocale = *supportedLocales[i]; + LSR &lsr = lsrs[i] = getMaximalLsrOrUnd(likelySubtags, supportedLocale, errorCode); + lsr.setHashCode(); + if (U_FAILURE(errorCode)) { return; } + } + + // We need an unordered map from LSR to first supported locale with that LSR, + // and an ordered list of (LSR, supported index) for + // the supported locales in the following order: + // 1. Default locale, if it is supported. + // 2. Priority locales (aka "paradigm locales") in builder order. + // 3. Remaining locales in builder order. + supportedLsrToIndex = uhash_openSize(hashLSR, compareLSRs, uhash_compareLong, + supportedLocalesLength, &errorCode); + if (U_FAILURE(errorCode)) { return; } + supportedLSRs = static_cast<const LSR **>( + uprv_malloc(supportedLocalesLength * sizeof(const LSR *))); + supportedIndexes = static_cast<int32_t *>( + uprv_malloc(supportedLocalesLength * sizeof(int32_t))); + if (supportedLSRs == nullptr || supportedIndexes == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return; + } + int32_t suppLength = 0; + // Determine insertion order. + // Add locales immediately that are equivalent to the default. + MaybeStackArray<int8_t, 100> order(supportedLocalesLength, errorCode); + if (U_FAILURE(errorCode)) { return; } + int32_t numParadigms = 0; + for (int32_t i = 0; i < supportedLocalesLength; ++i) { + const Locale &locale = *supportedLocales[i]; + const LSR &lsr = lsrs[i]; + if (defLSR == nullptr && builder.withDefault_) { + // Implicit default locale = first supported locale, if not turned off. + U_ASSERT(i == 0); + def = &locale; + defLSR = &lsr; + order[i] = 1; + suppLength = putIfAbsent(lsr, 0, suppLength, errorCode); + } else if (defLSR != nullptr && lsr.isEquivalentTo(*defLSR)) { + order[i] = 1; + suppLength = putIfAbsent(lsr, i, suppLength, errorCode); + } else if (localeDistance.isParadigmLSR(lsr)) { + order[i] = 2; + ++numParadigms; + } else { + order[i] = 3; + } + if (U_FAILURE(errorCode)) { return; } + } + // Add supported paradigm locales. + int32_t paradigmLimit = suppLength + numParadigms; + for (int32_t i = 0; i < supportedLocalesLength && suppLength < paradigmLimit; ++i) { + if (order[i] == 2) { + suppLength = putIfAbsent(lsrs[i], i, suppLength, errorCode); + } + } + // Add remaining supported locales. + for (int32_t i = 0; i < supportedLocalesLength; ++i) { + if (order[i] == 3) { + suppLength = putIfAbsent(lsrs[i], i, suppLength, errorCode); + } + } + supportedLSRsLength = suppLength; + // If supportedLSRsLength < supportedLocalesLength then + // we waste as many array slots as there are duplicate supported LSRs, + // but the amount of wasted space is small as long as there are few duplicates. + } + + defaultLocale = def; + + if (builder.demotion_ == ULOCMATCH_DEMOTION_REGION) { + demotionPerDesiredLocale = localeDistance.getDefaultDemotionPerDesiredLocale(); + } + + if (thresholdDistance >= 0) { + // already copied + } else if (builder.maxDistanceDesired_ != nullptr) { + LSR suppLSR = getMaximalLsrOrUnd(likelySubtags, *builder.maxDistanceSupported_, errorCode); + const LSR *pSuppLSR = &suppLSR; + int32_t indexAndDistance = localeDistance.getBestIndexAndDistance( + getMaximalLsrOrUnd(likelySubtags, *builder.maxDistanceDesired_, errorCode), + &pSuppLSR, 1, + LocaleDistance::shiftDistance(100), favorSubtag, direction); + if (U_SUCCESS(errorCode)) { + // +1 for an exclusive threshold from an inclusive max. + thresholdDistance = LocaleDistance::getDistanceFloor(indexAndDistance) + 1; + } else { + thresholdDistance = 0; + } + } else { + thresholdDistance = localeDistance.getDefaultScriptDistance(); + } +} + +LocaleMatcher::LocaleMatcher(LocaleMatcher &&src) U_NOEXCEPT : + likelySubtags(src.likelySubtags), + localeDistance(src.localeDistance), + thresholdDistance(src.thresholdDistance), + demotionPerDesiredLocale(src.demotionPerDesiredLocale), + favorSubtag(src.favorSubtag), + direction(src.direction), + supportedLocales(src.supportedLocales), lsrs(src.lsrs), + supportedLocalesLength(src.supportedLocalesLength), + supportedLsrToIndex(src.supportedLsrToIndex), + supportedLSRs(src.supportedLSRs), + supportedIndexes(src.supportedIndexes), + supportedLSRsLength(src.supportedLSRsLength), + ownedDefaultLocale(src.ownedDefaultLocale), defaultLocale(src.defaultLocale) { + src.supportedLocales = nullptr; + src.lsrs = nullptr; + src.supportedLocalesLength = 0; + src.supportedLsrToIndex = nullptr; + src.supportedLSRs = nullptr; + src.supportedIndexes = nullptr; + src.supportedLSRsLength = 0; + src.ownedDefaultLocale = nullptr; + src.defaultLocale = nullptr; +} + +LocaleMatcher::~LocaleMatcher() { + for (int32_t i = 0; i < supportedLocalesLength; ++i) { + delete supportedLocales[i]; + } + uprv_free(supportedLocales); + delete[] lsrs; + uhash_close(supportedLsrToIndex); + uprv_free(supportedLSRs); + uprv_free(supportedIndexes); + delete ownedDefaultLocale; +} + +LocaleMatcher &LocaleMatcher::operator=(LocaleMatcher &&src) U_NOEXCEPT { + this->~LocaleMatcher(); + + thresholdDistance = src.thresholdDistance; + demotionPerDesiredLocale = src.demotionPerDesiredLocale; + favorSubtag = src.favorSubtag; + direction = src.direction; + supportedLocales = src.supportedLocales; + lsrs = src.lsrs; + supportedLocalesLength = src.supportedLocalesLength; + supportedLsrToIndex = src.supportedLsrToIndex; + supportedLSRs = src.supportedLSRs; + supportedIndexes = src.supportedIndexes; + supportedLSRsLength = src.supportedLSRsLength; + ownedDefaultLocale = src.ownedDefaultLocale; + defaultLocale = src.defaultLocale; + + src.supportedLocales = nullptr; + src.lsrs = nullptr; + src.supportedLocalesLength = 0; + src.supportedLsrToIndex = nullptr; + src.supportedLSRs = nullptr; + src.supportedIndexes = nullptr; + src.supportedLSRsLength = 0; + src.ownedDefaultLocale = nullptr; + src.defaultLocale = nullptr; + return *this; +} + +class LocaleLsrIterator { +public: + LocaleLsrIterator(const XLikelySubtags &likelySubtags, Locale::Iterator &locales, + ULocMatchLifetime lifetime) : + likelySubtags(likelySubtags), locales(locales), lifetime(lifetime) {} + + ~LocaleLsrIterator() { + if (lifetime == ULOCMATCH_TEMPORARY_LOCALES) { + delete remembered; + } + } + + bool hasNext() const { + return locales.hasNext(); + } + + LSR next(UErrorCode &errorCode) { + current = &locales.next(); + return getMaximalLsrOrUnd(likelySubtags, *current, errorCode); + } + + void rememberCurrent(int32_t desiredIndex, UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { return; } + bestDesiredIndex = desiredIndex; + if (lifetime == ULOCMATCH_STORED_LOCALES) { + remembered = current; + } else { + // ULOCMATCH_TEMPORARY_LOCALES + delete remembered; + remembered = new Locale(*current); + if (remembered == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + } + } + + const Locale *orphanRemembered() { + const Locale *rem = remembered; + remembered = nullptr; + return rem; + } + + int32_t getBestDesiredIndex() const { + return bestDesiredIndex; + } + +private: + const XLikelySubtags &likelySubtags; + Locale::Iterator &locales; + ULocMatchLifetime lifetime; + const Locale *current = nullptr, *remembered = nullptr; + int32_t bestDesiredIndex = -1; +}; + +const Locale *LocaleMatcher::getBestMatch(const Locale &desiredLocale, UErrorCode &errorCode) const { + if (U_FAILURE(errorCode)) { return nullptr; } + int32_t suppIndex = getBestSuppIndex( + getMaximalLsrOrUnd(likelySubtags, desiredLocale, errorCode), + nullptr, errorCode); + return U_SUCCESS(errorCode) && suppIndex >= 0 ? supportedLocales[suppIndex] : defaultLocale; +} + +const Locale *LocaleMatcher::getBestMatch(Locale::Iterator &desiredLocales, + UErrorCode &errorCode) const { + if (U_FAILURE(errorCode)) { return nullptr; } + if (!desiredLocales.hasNext()) { + return defaultLocale; + } + LocaleLsrIterator lsrIter(likelySubtags, desiredLocales, ULOCMATCH_TEMPORARY_LOCALES); + int32_t suppIndex = getBestSuppIndex(lsrIter.next(errorCode), &lsrIter, errorCode); + return U_SUCCESS(errorCode) && suppIndex >= 0 ? supportedLocales[suppIndex] : defaultLocale; +} + +const Locale *LocaleMatcher::getBestMatchForListString( + StringPiece desiredLocaleList, UErrorCode &errorCode) const { + LocalePriorityList list(desiredLocaleList, errorCode); + LocalePriorityList::Iterator iter = list.iterator(); + return getBestMatch(iter, errorCode); +} + +LocaleMatcher::Result LocaleMatcher::getBestMatchResult( + const Locale &desiredLocale, UErrorCode &errorCode) const { + if (U_FAILURE(errorCode)) { + return Result(nullptr, defaultLocale, -1, -1, FALSE); + } + int32_t suppIndex = getBestSuppIndex( + getMaximalLsrOrUnd(likelySubtags, desiredLocale, errorCode), + nullptr, errorCode); + if (U_FAILURE(errorCode) || suppIndex < 0) { + return Result(nullptr, defaultLocale, -1, -1, FALSE); + } else { + return Result(&desiredLocale, supportedLocales[suppIndex], 0, suppIndex, FALSE); + } +} + +LocaleMatcher::Result LocaleMatcher::getBestMatchResult( + Locale::Iterator &desiredLocales, UErrorCode &errorCode) const { + if (U_FAILURE(errorCode) || !desiredLocales.hasNext()) { + return Result(nullptr, defaultLocale, -1, -1, FALSE); + } + LocaleLsrIterator lsrIter(likelySubtags, desiredLocales, ULOCMATCH_TEMPORARY_LOCALES); + int32_t suppIndex = getBestSuppIndex(lsrIter.next(errorCode), &lsrIter, errorCode); + if (U_FAILURE(errorCode) || suppIndex < 0) { + return Result(nullptr, defaultLocale, -1, -1, FALSE); + } else { + return Result(lsrIter.orphanRemembered(), supportedLocales[suppIndex], + lsrIter.getBestDesiredIndex(), suppIndex, TRUE); + } +} + +int32_t LocaleMatcher::getBestSuppIndex(LSR desiredLSR, LocaleLsrIterator *remainingIter, + UErrorCode &errorCode) const { + if (U_FAILURE(errorCode)) { return -1; } + int32_t desiredIndex = 0; + int32_t bestSupportedLsrIndex = -1; + for (int32_t bestShiftedDistance = LocaleDistance::shiftDistance(thresholdDistance);;) { + // Quick check for exact maximized LSR. + // Returns suppIndex+1 where 0 means not found. + if (supportedLsrToIndex != nullptr) { + desiredLSR.setHashCode(); + int32_t index = uhash_geti(supportedLsrToIndex, &desiredLSR); + if (index != 0) { + int32_t suppIndex = index - 1; + if (remainingIter != nullptr) { + remainingIter->rememberCurrent(desiredIndex, errorCode); + } + return suppIndex; + } + } + int32_t bestIndexAndDistance = localeDistance.getBestIndexAndDistance( + desiredLSR, supportedLSRs, supportedLSRsLength, + bestShiftedDistance, favorSubtag, direction); + if (bestIndexAndDistance >= 0) { + bestShiftedDistance = LocaleDistance::getShiftedDistance(bestIndexAndDistance); + if (remainingIter != nullptr) { + remainingIter->rememberCurrent(desiredIndex, errorCode); + if (U_FAILURE(errorCode)) { return -1; } + } + bestSupportedLsrIndex = LocaleDistance::getIndex(bestIndexAndDistance); + } + if ((bestShiftedDistance -= LocaleDistance::shiftDistance(demotionPerDesiredLocale)) <= 0) { + break; + } + if (remainingIter == nullptr || !remainingIter->hasNext()) { + break; + } + desiredLSR = remainingIter->next(errorCode); + if (U_FAILURE(errorCode)) { return -1; } + ++desiredIndex; + } + if (bestSupportedLsrIndex < 0) { + // no good match + return -1; + } + return supportedIndexes[bestSupportedLsrIndex]; +} + +UBool LocaleMatcher::isMatch(const Locale &desired, const Locale &supported, + UErrorCode &errorCode) const { + LSR suppLSR = getMaximalLsrOrUnd(likelySubtags, supported, errorCode); + if (U_FAILURE(errorCode)) { return 0; } + const LSR *pSuppLSR = &suppLSR; + int32_t indexAndDistance = localeDistance.getBestIndexAndDistance( + getMaximalLsrOrUnd(likelySubtags, desired, errorCode), + &pSuppLSR, 1, + LocaleDistance::shiftDistance(thresholdDistance), favorSubtag, direction); + return indexAndDistance >= 0; +} + +double LocaleMatcher::internalMatch(const Locale &desired, const Locale &supported, UErrorCode &errorCode) const { + // Returns the inverse of the distance: That is, 1-distance(desired, supported). + LSR suppLSR = getMaximalLsrOrUnd(likelySubtags, supported, errorCode); + if (U_FAILURE(errorCode)) { return 0; } + const LSR *pSuppLSR = &suppLSR; + int32_t indexAndDistance = localeDistance.getBestIndexAndDistance( + getMaximalLsrOrUnd(likelySubtags, desired, errorCode), + &pSuppLSR, 1, + LocaleDistance::shiftDistance(thresholdDistance), favorSubtag, direction); + double distance = LocaleDistance::getDistanceDouble(indexAndDistance); + return (100.0 - distance) / 100.0; +} + +U_NAMESPACE_END + +// uloc_acceptLanguage() --------------------------------------------------- *** + +U_NAMESPACE_USE + +namespace { + +class LocaleFromTag { +public: + LocaleFromTag() : locale(Locale::getRoot()) {} + const Locale &operator()(const char *tag) { return locale = Locale(tag); } + +private: + // Store the locale in the converter, rather than return a reference to a temporary, + // or a value which could go out of scope with the caller's reference to it. + Locale locale; +}; + +int32_t acceptLanguage(UEnumeration &supportedLocales, Locale::Iterator &desiredLocales, + char *dest, int32_t capacity, UAcceptResult *acceptResult, + UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { return 0; } + LocaleMatcher::Builder builder; + const char *locString; + while ((locString = uenum_next(&supportedLocales, nullptr, &errorCode)) != nullptr) { + Locale loc(locString); + if (loc.isBogus()) { + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + builder.addSupportedLocale(loc); + } + LocaleMatcher matcher = builder.build(errorCode); + LocaleMatcher::Result result = matcher.getBestMatchResult(desiredLocales, errorCode); + if (U_FAILURE(errorCode)) { return 0; } + if (result.getDesiredIndex() >= 0) { + if (acceptResult != nullptr) { + *acceptResult = *result.getDesiredLocale() == *result.getSupportedLocale() ? + ULOC_ACCEPT_VALID : ULOC_ACCEPT_FALLBACK; + } + const char *bestStr = result.getSupportedLocale()->getName(); + int32_t bestLength = (int32_t)uprv_strlen(bestStr); + if (bestLength <= capacity) { + uprv_memcpy(dest, bestStr, bestLength); + } + return u_terminateChars(dest, capacity, bestLength, &errorCode); + } else { + if (acceptResult != nullptr) { + *acceptResult = ULOC_ACCEPT_FAILED; + } + return u_terminateChars(dest, capacity, 0, &errorCode); + } +} + +} // namespace + +U_CAPI int32_t U_EXPORT2 +uloc_acceptLanguage(char *result, int32_t resultAvailable, + UAcceptResult *outResult, + const char **acceptList, int32_t acceptListCount, + UEnumeration *availableLocales, + UErrorCode *status) { + if (U_FAILURE(*status)) { return 0; } + if ((result == nullptr ? resultAvailable != 0 : resultAvailable < 0) || + (acceptList == nullptr ? acceptListCount != 0 : acceptListCount < 0) || + availableLocales == nullptr) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + LocaleFromTag converter; + Locale::ConvertingIterator<const char **, LocaleFromTag> desiredLocales( + acceptList, acceptList + acceptListCount, converter); + return acceptLanguage(*availableLocales, desiredLocales, + result, resultAvailable, outResult, *status); +} + +U_CAPI int32_t U_EXPORT2 +uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable, + UAcceptResult *outResult, + const char *httpAcceptLanguage, + UEnumeration *availableLocales, + UErrorCode *status) { + if (U_FAILURE(*status)) { return 0; } + if ((result == nullptr ? resultAvailable != 0 : resultAvailable < 0) || + httpAcceptLanguage == nullptr || availableLocales == nullptr) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + LocalePriorityList list(httpAcceptLanguage, *status); + LocalePriorityList::Iterator desiredLocales = list.iterator(); + return acceptLanguage(*availableLocales, desiredLocales, + result, resultAvailable, outResult, *status); +} diff --git a/thirdparty/icu4c/common/localeprioritylist.cpp b/thirdparty/icu4c/common/localeprioritylist.cpp new file mode 100644 index 0000000000..8916b121be --- /dev/null +++ b/thirdparty/icu4c/common/localeprioritylist.cpp @@ -0,0 +1,239 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// localeprioritylist.cpp +// created: 2019jul11 Markus W. Scherer + +#include "unicode/utypes.h" +#include "unicode/localpointer.h" +#include "unicode/locid.h" +#include "unicode/stringpiece.h" +#include "unicode/uobject.h" +#include "charstr.h" +#include "cmemory.h" +#include "localeprioritylist.h" +#include "uarrsort.h" +#include "uassert.h" +#include "uhash.h" + +U_NAMESPACE_BEGIN + +namespace { + +int32_t hashLocale(const UHashTok token) { + auto *locale = static_cast<const Locale *>(token.pointer); + return locale->hashCode(); +} + +UBool compareLocales(const UHashTok t1, const UHashTok t2) { + auto *l1 = static_cast<const Locale *>(t1.pointer); + auto *l2 = static_cast<const Locale *>(t2.pointer); + return *l1 == *l2; +} + +constexpr int32_t WEIGHT_ONE = 1000; + +struct LocaleAndWeight { + Locale *locale; + int32_t weight; // 0..1000 = 0.0..1.0 + int32_t index; // force stable sort + + int32_t compare(const LocaleAndWeight &other) const { + int32_t diff = other.weight - weight; // descending: other-this + if (diff != 0) { return diff; } + return index - other.index; + } +}; + +int32_t U_CALLCONV +compareLocaleAndWeight(const void * /*context*/, const void *left, const void *right) { + return static_cast<const LocaleAndWeight *>(left)-> + compare(*static_cast<const LocaleAndWeight *>(right)); +} + +const char *skipSpaces(const char *p, const char *limit) { + while (p < limit && *p == ' ') { ++p; } + return p; +} + +int32_t findTagLength(const char *p, const char *limit) { + // Look for accept-language delimiters. + // Leave other validation up to the Locale constructor. + const char *q; + for (q = p; q < limit; ++q) { + char c = *q; + if (c == ' ' || c == ',' || c == ';') { break; } + } + return static_cast<int32_t>(q - p); +} + +/** + * Parses and returns a qvalue weight in millis. + * Advances p to after the parsed substring. + * Returns a negative value if parsing fails. + */ +int32_t parseWeight(const char *&p, const char *limit) { + p = skipSpaces(p, limit); + char c; + if (p == limit || ((c = *p) != '0' && c != '1')) { return -1; } + int32_t weight = (c - '0') * 1000; + if (++p == limit || *p != '.') { return weight; } + int32_t multiplier = 100; + while (++p != limit && '0' <= (c = *p) && c <= '9') { + c -= '0'; + if (multiplier > 0) { + weight += c * multiplier; + multiplier /= 10; + } else if (multiplier == 0) { + // round up + if (c >= 5) { ++weight; } + multiplier = -1; + } // else ignore further fraction digits + } + return weight <= WEIGHT_ONE ? weight : -1; // bad if > 1.0 +} + +} // namespace + +/** + * Nothing but a wrapper over a MaybeStackArray of LocaleAndWeight. + * + * This wrapper exists (and is not in an anonymous namespace) + * so that we can forward-declare it in the header file and + * don't have to expose the MaybeStackArray specialization and + * the LocaleAndWeight to code (like the test) that #includes localeprioritylist.h. + * Also, otherwise we would have to do a platform-specific + * template export declaration of some kind for the MaybeStackArray specialization + * to be properly exported from the common DLL. + */ +struct LocaleAndWeightArray : public UMemory { + MaybeStackArray<LocaleAndWeight, 20> array; +}; + +LocalePriorityList::LocalePriorityList(StringPiece s, UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { return; } + list = new LocaleAndWeightArray(); + if (list == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return; + } + const char *p = s.data(); + const char *limit = p + s.length(); + while ((p = skipSpaces(p, limit)) != limit) { + if (*p == ',') { // empty range field + ++p; + continue; + } + int32_t tagLength = findTagLength(p, limit); + if (tagLength == 0) { + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + CharString tag(p, tagLength, errorCode); + if (U_FAILURE(errorCode)) { return; } + Locale locale = Locale(tag.data()); + if (locale.isBogus()) { + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + int32_t weight = WEIGHT_ONE; + if ((p = skipSpaces(p + tagLength, limit)) != limit && *p == ';') { + if ((p = skipSpaces(p + 1, limit)) == limit || *p != 'q' || + (p = skipSpaces(p + 1, limit)) == limit || *p != '=' || + (++p, (weight = parseWeight(p, limit)) < 0)) { + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + p = skipSpaces(p, limit); + } + if (p != limit && *p != ',') { // trailing junk + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + add(locale, weight, errorCode); + if (p == limit) { break; } + ++p; + } + sort(errorCode); +} + +LocalePriorityList::~LocalePriorityList() { + if (list != nullptr) { + for (int32_t i = 0; i < listLength; ++i) { + delete list->array[i].locale; + } + delete list; + } + uhash_close(map); +} + +const Locale *LocalePriorityList::localeAt(int32_t i) const { + return list->array[i].locale; +} + +Locale *LocalePriorityList::orphanLocaleAt(int32_t i) { + if (list == nullptr) { return nullptr; } + LocaleAndWeight &lw = list->array[i]; + Locale *l = lw.locale; + lw.locale = nullptr; + return l; +} + +bool LocalePriorityList::add(const Locale &locale, int32_t weight, UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { return false; } + if (map == nullptr) { + if (weight <= 0) { return true; } // do not add q=0 + map = uhash_open(hashLocale, compareLocales, uhash_compareLong, &errorCode); + if (U_FAILURE(errorCode)) { return false; } + } + LocalPointer<Locale> clone; + int32_t index = uhash_geti(map, &locale); + if (index != 0) { + // Duplicate: Remove the old item and append it anew. + LocaleAndWeight &lw = list->array[index - 1]; + clone.adoptInstead(lw.locale); + lw.locale = nullptr; + lw.weight = 0; + ++numRemoved; + } + if (weight <= 0) { // do not add q=0 + if (index != 0) { + // Not strictly necessary but cleaner. + uhash_removei(map, &locale); + } + return true; + } + if (clone.isNull()) { + clone.adoptInstead(locale.clone()); + if (clone.isNull() || (clone->isBogus() && !locale.isBogus())) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return false; + } + } + if (listLength == list->array.getCapacity()) { + int32_t newCapacity = listLength < 50 ? 100 : 4 * listLength; + if (list->array.resize(newCapacity, listLength) == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return false; + } + } + uhash_puti(map, clone.getAlias(), listLength + 1, &errorCode); + if (U_FAILURE(errorCode)) { return false; } + LocaleAndWeight &lw = list->array[listLength]; + lw.locale = clone.orphan(); + lw.weight = weight; + lw.index = listLength++; + if (weight < WEIGHT_ONE) { hasWeights = true; } + U_ASSERT(uhash_count(map) == getLength()); + return true; +} + +void LocalePriorityList::sort(UErrorCode &errorCode) { + // Sort by descending weights if there is a mix of weights. + // The comparator forces a stable sort via the item index. + if (U_FAILURE(errorCode) || getLength() <= 1 || !hasWeights) { return; } + uprv_sortArray(list->array.getAlias(), listLength, sizeof(LocaleAndWeight), + compareLocaleAndWeight, nullptr, FALSE, &errorCode); +} + +U_NAMESPACE_END diff --git a/thirdparty/icu4c/common/localeprioritylist.h b/thirdparty/icu4c/common/localeprioritylist.h new file mode 100644 index 0000000000..41e9d3ea08 --- /dev/null +++ b/thirdparty/icu4c/common/localeprioritylist.h @@ -0,0 +1,115 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// localeprioritylist.h +// created: 2019jul11 Markus W. Scherer + +#ifndef __LOCALEPRIORITYLIST_H__ +#define __LOCALEPRIORITYLIST_H__ + +#include "unicode/utypes.h" +#include "unicode/locid.h" +#include "unicode/stringpiece.h" +#include "unicode/uobject.h" + +struct UHashtable; + +U_NAMESPACE_BEGIN + +struct LocaleAndWeightArray; + +/** + * Parses a list of locales from an accept-language string. + * We are a bit more lenient than the spec: + * We accept extra whitespace in more places, empty range fields, + * and any number of qvalue fraction digits. + * + * https://tools.ietf.org/html/rfc2616#section-14.4 + * 14.4 Accept-Language + * + * Accept-Language = "Accept-Language" ":" + * 1#( language-range [ ";" "q" "=" qvalue ] ) + * language-range = ( ( 1*8ALPHA *( "-" 1*8ALPHA ) ) | "*" ) + * + * Each language-range MAY be given an associated quality value which + * represents an estimate of the user's preference for the languages + * specified by that range. The quality value defaults to "q=1". For + * example, + * + * Accept-Language: da, en-gb;q=0.8, en;q=0.7 + * + * https://tools.ietf.org/html/rfc2616#section-3.9 + * 3.9 Quality Values + * + * HTTP content negotiation (section 12) uses short "floating point" + * numbers to indicate the relative importance ("weight") of various + * negotiable parameters. A weight is normalized to a real number in + * the range 0 through 1, where 0 is the minimum and 1 the maximum + * value. If a parameter has a quality value of 0, then content with + * this parameter is `not acceptable' for the client. HTTP/1.1 + * applications MUST NOT generate more than three digits after the + * decimal point. User configuration of these values SHOULD also be + * limited in this fashion. + * + * qvalue = ( "0" [ "." 0*3DIGIT ] ) + * | ( "1" [ "." 0*3("0") ] ) + */ +class U_COMMON_API LocalePriorityList : public UMemory { +public: + class Iterator : public Locale::Iterator { + public: + UBool hasNext() const override { return count < length; } + + const Locale &next() override { + for(;;) { + const Locale *locale = list.localeAt(index++); + if (locale != nullptr) { + ++count; + return *locale; + } + } + } + + private: + friend class LocalePriorityList; + + Iterator(const LocalePriorityList &list) : list(list), length(list.getLength()) {} + + const LocalePriorityList &list; + int32_t index = 0; + int32_t count = 0; + const int32_t length; + }; + + LocalePriorityList(StringPiece s, UErrorCode &errorCode); + + ~LocalePriorityList(); + + int32_t getLength() const { return listLength - numRemoved; } + + int32_t getLengthIncludingRemoved() const { return listLength; } + + Iterator iterator() const { return Iterator(*this); } + + const Locale *localeAt(int32_t i) const; + + Locale *orphanLocaleAt(int32_t i); + +private: + LocalePriorityList(const LocalePriorityList &) = delete; + LocalePriorityList &operator=(const LocalePriorityList &) = delete; + + bool add(const Locale &locale, int32_t weight, UErrorCode &errorCode); + + void sort(UErrorCode &errorCode); + + LocaleAndWeightArray *list = nullptr; + int32_t listLength = 0; + int32_t numRemoved = 0; + bool hasWeights = false; // other than 1.0 + UHashtable *map = nullptr; +}; + +U_NAMESPACE_END + +#endif // __LOCALEPRIORITYLIST_H__ diff --git a/thirdparty/icu4c/common/localsvc.h b/thirdparty/icu4c/common/localsvc.h new file mode 100644 index 0000000000..3364019513 --- /dev/null +++ b/thirdparty/icu4c/common/localsvc.h @@ -0,0 +1,27 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +*************************************************************************** +* Copyright (C) 2006 International Business Machines Corporation * +* and others. All rights reserved. * +*************************************************************************** +*/ + +#ifndef LOCALSVC_H +#define LOCALSVC_H + +#include "unicode/utypes.h" + +#if defined(U_LOCAL_SERVICE_HOOK) && U_LOCAL_SERVICE_HOOK +/** + * Prototype for user-supplied service hook. This function is expected to return + * a type of factory object specific to the requested service. + * + * @param what service-specific string identifying the specific user hook + * @param status error status + * @return a service-specific hook, or NULL on failure. + */ +U_CAPI void* uprv_svc_hook(const char *what, UErrorCode *status); +#endif + +#endif diff --git a/thirdparty/icu4c/common/locavailable.cpp b/thirdparty/icu4c/common/locavailable.cpp new file mode 100644 index 0000000000..e8ec512e37 --- /dev/null +++ b/thirdparty/icu4c/common/locavailable.cpp @@ -0,0 +1,270 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 1997-2013, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: locavailable.cpp +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2010feb25 +* created by: Markus W. Scherer +* +* Code for available locales, separated out from other .cpp files +* that then do not depend on resource bundle code and res_index bundles. +*/ + +#include "unicode/errorcode.h" +#include "unicode/utypes.h" +#include "unicode/locid.h" +#include "unicode/uloc.h" +#include "unicode/ures.h" +#include "cmemory.h" +#include "cstring.h" +#include "ucln_cmn.h" +#include "uassert.h" +#include "umutex.h" +#include "uresimp.h" + +// C++ API ----------------------------------------------------------------- *** + +U_NAMESPACE_BEGIN + +static icu::Locale* availableLocaleList = NULL; +static int32_t availableLocaleListCount; +static icu::UInitOnce gInitOnceLocale = U_INITONCE_INITIALIZER; + +U_NAMESPACE_END + +U_CDECL_BEGIN + +static UBool U_CALLCONV locale_available_cleanup(void) +{ + U_NAMESPACE_USE + + if (availableLocaleList) { + delete []availableLocaleList; + availableLocaleList = NULL; + } + availableLocaleListCount = 0; + gInitOnceLocale.reset(); + + return TRUE; +} + +U_CDECL_END + +U_NAMESPACE_BEGIN + +void U_CALLCONV locale_available_init() { + // This function is a friend of class Locale. + // This function is only invoked via umtx_initOnce(). + + // for now, there is a hardcoded list, so just walk through that list and set it up. + // Note: this function is a friend of class Locale. + availableLocaleListCount = uloc_countAvailable(); + if(availableLocaleListCount) { + availableLocaleList = new Locale[availableLocaleListCount]; + } + if (availableLocaleList == NULL) { + availableLocaleListCount= 0; + } + for (int32_t locCount=availableLocaleListCount-1; locCount>=0; --locCount) { + availableLocaleList[locCount].setFromPOSIXID(uloc_getAvailable(locCount)); + } + ucln_common_registerCleanup(UCLN_COMMON_LOCALE_AVAILABLE, locale_available_cleanup); +} + +const Locale* U_EXPORT2 +Locale::getAvailableLocales(int32_t& count) +{ + umtx_initOnce(gInitOnceLocale, &locale_available_init); + count = availableLocaleListCount; + return availableLocaleList; +} + + +U_NAMESPACE_END + +// C API ------------------------------------------------------------------- *** + +U_NAMESPACE_USE + +/* ### Constants **************************************************/ + +namespace { + +// Enough capacity for the two lists in the res_index.res file +const char** gAvailableLocaleNames[2] = {}; +int32_t gAvailableLocaleCounts[2] = {}; +icu::UInitOnce ginstalledLocalesInitOnce = U_INITONCE_INITIALIZER; + +class AvailableLocalesSink : public ResourceSink { + public: + void put(const char *key, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) U_OVERRIDE { + ResourceTable resIndexTable = value.getTable(status); + if (U_FAILURE(status)) { + return; + } + for (int32_t i = 0; resIndexTable.getKeyAndValue(i, key, value); ++i) { + ULocAvailableType type; + if (uprv_strcmp(key, "InstalledLocales") == 0) { + type = ULOC_AVAILABLE_DEFAULT; + } else if (uprv_strcmp(key, "AliasLocales") == 0) { + type = ULOC_AVAILABLE_ONLY_LEGACY_ALIASES; + } else { + // CLDRVersion, etc. + continue; + } + ResourceTable availableLocalesTable = value.getTable(status); + if (U_FAILURE(status)) { + return; + } + gAvailableLocaleCounts[type] = availableLocalesTable.getSize(); + gAvailableLocaleNames[type] = static_cast<const char**>( + uprv_malloc(gAvailableLocaleCounts[type] * sizeof(const char*))); + if (gAvailableLocaleNames[type] == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + for (int32_t j = 0; availableLocalesTable.getKeyAndValue(j, key, value); ++j) { + gAvailableLocaleNames[type][j] = key; + } + } + } +}; + +class AvailableLocalesStringEnumeration : public StringEnumeration { + public: + AvailableLocalesStringEnumeration(ULocAvailableType type) : fType(type) { + } + + const char* next(int32_t *resultLength, UErrorCode&) override { + ULocAvailableType actualType = fType; + int32_t actualIndex = fIndex++; + + // If the "combined" list was requested, resolve that now + if (fType == ULOC_AVAILABLE_WITH_LEGACY_ALIASES) { + int32_t defaultLocalesCount = gAvailableLocaleCounts[ULOC_AVAILABLE_DEFAULT]; + if (actualIndex < defaultLocalesCount) { + actualType = ULOC_AVAILABLE_DEFAULT; + } else { + actualIndex -= defaultLocalesCount; + actualType = ULOC_AVAILABLE_ONLY_LEGACY_ALIASES; + } + } + + // Return the requested string + int32_t count = gAvailableLocaleCounts[actualType]; + const char* result; + if (actualIndex < count) { + result = gAvailableLocaleNames[actualType][actualIndex]; + if (resultLength != nullptr) { + *resultLength = static_cast<int32_t>(uprv_strlen(result)); + } + } else { + result = nullptr; + if (resultLength != nullptr) { + *resultLength = 0; + } + } + return result; + } + + void reset(UErrorCode&) override { + fIndex = 0; + } + + int32_t count(UErrorCode&) const override { + if (fType == ULOC_AVAILABLE_WITH_LEGACY_ALIASES) { + return gAvailableLocaleCounts[ULOC_AVAILABLE_DEFAULT] + + gAvailableLocaleCounts[ULOC_AVAILABLE_ONLY_LEGACY_ALIASES]; + } else { + return gAvailableLocaleCounts[fType]; + } + } + + private: + ULocAvailableType fType; + int32_t fIndex = 0; +}; + +/* ### Get available **************************************************/ + +static UBool U_CALLCONV uloc_cleanup(void) { + for (int32_t i = 0; i < UPRV_LENGTHOF(gAvailableLocaleNames); i++) { + uprv_free(gAvailableLocaleNames[i]); + gAvailableLocaleNames[i] = nullptr; + gAvailableLocaleCounts[i] = 0; + } + ginstalledLocalesInitOnce.reset(); + return TRUE; +} + +// Load Installed Locales. This function will be called exactly once +// via the initOnce mechanism. + +static void U_CALLCONV loadInstalledLocales(UErrorCode& status) { + ucln_common_registerCleanup(UCLN_COMMON_ULOC, uloc_cleanup); + + icu::LocalUResourceBundlePointer rb(ures_openDirect(NULL, "res_index", &status)); + AvailableLocalesSink sink; + ures_getAllItemsWithFallback(rb.getAlias(), "", sink, status); +} + +void _load_installedLocales(UErrorCode& status) { + umtx_initOnce(ginstalledLocalesInitOnce, &loadInstalledLocales, status); +} + +} // namespace + +U_CAPI const char* U_EXPORT2 +uloc_getAvailable(int32_t offset) { + icu::ErrorCode status; + _load_installedLocales(status); + if (status.isFailure()) { + return nullptr; + } + if (offset > gAvailableLocaleCounts[0]) { + // *status = U_ILLEGAL_ARGUMENT_ERROR; + return nullptr; + } + return gAvailableLocaleNames[0][offset]; +} + +U_CAPI int32_t U_EXPORT2 +uloc_countAvailable() { + icu::ErrorCode status; + _load_installedLocales(status); + if (status.isFailure()) { + return 0; + } + return gAvailableLocaleCounts[0]; +} + +U_CAPI UEnumeration* U_EXPORT2 +uloc_openAvailableByType(ULocAvailableType type, UErrorCode* status) { + if (U_FAILURE(*status)) { + return nullptr; + } + if (type < 0 || type >= ULOC_AVAILABLE_COUNT) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return nullptr; + } + _load_installedLocales(*status); + if (U_FAILURE(*status)) { + return nullptr; + } + LocalPointer<AvailableLocalesStringEnumeration> result( + new AvailableLocalesStringEnumeration(type), *status); + if (U_FAILURE(*status)) { + return nullptr; + } + return uenum_openFromStringEnumeration(result.orphan(), status); +} + diff --git a/thirdparty/icu4c/common/locbased.cpp b/thirdparty/icu4c/common/locbased.cpp new file mode 100644 index 0000000000..ff378b4cc7 --- /dev/null +++ b/thirdparty/icu4c/common/locbased.cpp @@ -0,0 +1,55 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (c) 2004-2014, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* Author: Alan Liu +* Created: January 16 2004 +* Since: ICU 2.8 +********************************************************************** +*/ +#include "locbased.h" +#include "cstring.h" + +U_NAMESPACE_BEGIN + +Locale LocaleBased::getLocale(ULocDataLocaleType type, UErrorCode& status) const { + const char* id = getLocaleID(type, status); + return Locale((id != 0) ? id : ""); +} + +const char* LocaleBased::getLocaleID(ULocDataLocaleType type, UErrorCode& status) const { + if (U_FAILURE(status)) { + return NULL; + } + + switch(type) { + case ULOC_VALID_LOCALE: + return valid; + case ULOC_ACTUAL_LOCALE: + return actual; + default: + status = U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } +} + +void LocaleBased::setLocaleIDs(const char* validID, const char* actualID) { + if (validID != 0) { + uprv_strncpy(valid, validID, ULOC_FULLNAME_CAPACITY); + valid[ULOC_FULLNAME_CAPACITY-1] = 0; // always terminate + } + if (actualID != 0) { + uprv_strncpy(actual, actualID, ULOC_FULLNAME_CAPACITY); + actual[ULOC_FULLNAME_CAPACITY-1] = 0; // always terminate + } +} + +void LocaleBased::setLocaleIDs(const Locale& validID, const Locale& actualID) { + uprv_strcpy(valid, validID.getName()); + uprv_strcpy(actual, actualID.getName()); +} + +U_NAMESPACE_END diff --git a/thirdparty/icu4c/common/locbased.h b/thirdparty/icu4c/common/locbased.h new file mode 100644 index 0000000000..45738863b5 --- /dev/null +++ b/thirdparty/icu4c/common/locbased.h @@ -0,0 +1,107 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (c) 2004-2014, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* Author: Alan Liu +* Created: January 16 2004 +* Since: ICU 2.8 +********************************************************************** +*/ +#ifndef LOCBASED_H +#define LOCBASED_H + +#include "unicode/locid.h" +#include "unicode/uobject.h" + +/** + * Macro to declare a locale LocaleBased wrapper object for the given + * object, which must have two members named `validLocale' and + * `actualLocale' of size ULOC_FULLNAME_CAPACITY + */ +#define U_LOCALE_BASED(varname, objname) \ + LocaleBased varname((objname).validLocale, (objname).actualLocale) + +U_NAMESPACE_BEGIN + +/** + * A utility class that unifies the implementation of getLocale() by + * various ICU services. This class is likely to be removed in the + * ICU 3.0 time frame in favor of an integrated approach with the + * services framework. + * @since ICU 2.8 + */ +class U_COMMON_API LocaleBased : public UMemory { + + public: + + /** + * Construct a LocaleBased wrapper around the two pointers. These + * will be aliased for the lifetime of this object. + */ + inline LocaleBased(char* validAlias, char* actualAlias); + + /** + * Construct a LocaleBased wrapper around the two const pointers. + * These will be aliased for the lifetime of this object. + */ + inline LocaleBased(const char* validAlias, const char* actualAlias); + + /** + * Return locale meta-data for the service object wrapped by this + * object. Either the valid or the actual locale may be + * retrieved. + * @param type either ULOC_VALID_LOCALE or ULOC_ACTUAL_LOCALE + * @param status input-output error code + * @return the indicated locale + */ + Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const; + + /** + * Return the locale ID for the service object wrapped by this + * object. Either the valid or the actual locale may be + * retrieved. + * @param type either ULOC_VALID_LOCALE or ULOC_ACTUAL_LOCALE + * @param status input-output error code + * @return the indicated locale ID + */ + const char* getLocaleID(ULocDataLocaleType type, UErrorCode& status) const; + + /** + * Set the locale meta-data for the service object wrapped by this + * object. If either parameter is zero, it is ignored. + * @param valid the ID of the valid locale + * @param actual the ID of the actual locale + */ + void setLocaleIDs(const char* valid, const char* actual); + + /** + * Set the locale meta-data for the service object wrapped by this + * object. + * @param valid the ID of the valid locale + * @param actual the ID of the actual locale + */ + void setLocaleIDs(const Locale& valid, const Locale& actual); + + private: + + char* valid; + + char* actual; +}; + +inline LocaleBased::LocaleBased(char* validAlias, char* actualAlias) : + valid(validAlias), actual(actualAlias) { +} + +inline LocaleBased::LocaleBased(const char* validAlias, + const char* actualAlias) : + // ugh: cast away const + valid((char*)validAlias), actual((char*)actualAlias) { +} + +U_NAMESPACE_END + +#endif diff --git a/thirdparty/icu4c/common/locdispnames.cpp b/thirdparty/icu4c/common/locdispnames.cpp new file mode 100644 index 0000000000..47c0667417 --- /dev/null +++ b/thirdparty/icu4c/common/locdispnames.cpp @@ -0,0 +1,890 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 1997-2016, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: locdispnames.cpp +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2010feb25 +* created by: Markus W. Scherer +* +* Code for locale display names, separated out from other .cpp files +* that then do not depend on resource bundle code and display name data. +*/ + +#include "unicode/utypes.h" +#include "unicode/brkiter.h" +#include "unicode/locid.h" +#include "unicode/uenum.h" +#include "unicode/uloc.h" +#include "unicode/ures.h" +#include "unicode/ustring.h" +#include "bytesinkutil.h" +#include "charstr.h" +#include "cmemory.h" +#include "cstring.h" +#include "putilimp.h" +#include "ulocimp.h" +#include "uresimp.h" +#include "ureslocs.h" +#include "ustr_imp.h" + +// C++ API ----------------------------------------------------------------- *** + +U_NAMESPACE_BEGIN + +UnicodeString& +Locale::getDisplayLanguage(UnicodeString& dispLang) const +{ + return this->getDisplayLanguage(getDefault(), dispLang); +} + +/*We cannot make any assumptions on the size of the output display strings +* Yet, since we are calling through to a C API, we need to set limits on +* buffer size. For all the following getDisplay functions we first attempt +* to fill up a stack allocated buffer. If it is to small we heap allocated +* the exact buffer we need copy it to the UnicodeString and delete it*/ + +UnicodeString& +Locale::getDisplayLanguage(const Locale &displayLocale, + UnicodeString &result) const { + UChar *buffer; + UErrorCode errorCode=U_ZERO_ERROR; + int32_t length; + + buffer=result.getBuffer(ULOC_FULLNAME_CAPACITY); + if(buffer==0) { + result.truncate(0); + return result; + } + + length=uloc_getDisplayLanguage(fullName, displayLocale.fullName, + buffer, result.getCapacity(), + &errorCode); + result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0); + + if(errorCode==U_BUFFER_OVERFLOW_ERROR) { + buffer=result.getBuffer(length); + if(buffer==0) { + result.truncate(0); + return result; + } + errorCode=U_ZERO_ERROR; + length=uloc_getDisplayLanguage(fullName, displayLocale.fullName, + buffer, result.getCapacity(), + &errorCode); + result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0); + } + + return result; +} + +UnicodeString& +Locale::getDisplayScript(UnicodeString& dispScript) const +{ + return this->getDisplayScript(getDefault(), dispScript); +} + +UnicodeString& +Locale::getDisplayScript(const Locale &displayLocale, + UnicodeString &result) const { + UChar *buffer; + UErrorCode errorCode=U_ZERO_ERROR; + int32_t length; + + buffer=result.getBuffer(ULOC_FULLNAME_CAPACITY); + if(buffer==0) { + result.truncate(0); + return result; + } + + length=uloc_getDisplayScript(fullName, displayLocale.fullName, + buffer, result.getCapacity(), + &errorCode); + result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0); + + if(errorCode==U_BUFFER_OVERFLOW_ERROR) { + buffer=result.getBuffer(length); + if(buffer==0) { + result.truncate(0); + return result; + } + errorCode=U_ZERO_ERROR; + length=uloc_getDisplayScript(fullName, displayLocale.fullName, + buffer, result.getCapacity(), + &errorCode); + result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0); + } + + return result; +} + +UnicodeString& +Locale::getDisplayCountry(UnicodeString& dispCntry) const +{ + return this->getDisplayCountry(getDefault(), dispCntry); +} + +UnicodeString& +Locale::getDisplayCountry(const Locale &displayLocale, + UnicodeString &result) const { + UChar *buffer; + UErrorCode errorCode=U_ZERO_ERROR; + int32_t length; + + buffer=result.getBuffer(ULOC_FULLNAME_CAPACITY); + if(buffer==0) { + result.truncate(0); + return result; + } + + length=uloc_getDisplayCountry(fullName, displayLocale.fullName, + buffer, result.getCapacity(), + &errorCode); + result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0); + + if(errorCode==U_BUFFER_OVERFLOW_ERROR) { + buffer=result.getBuffer(length); + if(buffer==0) { + result.truncate(0); + return result; + } + errorCode=U_ZERO_ERROR; + length=uloc_getDisplayCountry(fullName, displayLocale.fullName, + buffer, result.getCapacity(), + &errorCode); + result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0); + } + + return result; +} + +UnicodeString& +Locale::getDisplayVariant(UnicodeString& dispVar) const +{ + return this->getDisplayVariant(getDefault(), dispVar); +} + +UnicodeString& +Locale::getDisplayVariant(const Locale &displayLocale, + UnicodeString &result) const { + UChar *buffer; + UErrorCode errorCode=U_ZERO_ERROR; + int32_t length; + + buffer=result.getBuffer(ULOC_FULLNAME_CAPACITY); + if(buffer==0) { + result.truncate(0); + return result; + } + + length=uloc_getDisplayVariant(fullName, displayLocale.fullName, + buffer, result.getCapacity(), + &errorCode); + result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0); + + if(errorCode==U_BUFFER_OVERFLOW_ERROR) { + buffer=result.getBuffer(length); + if(buffer==0) { + result.truncate(0); + return result; + } + errorCode=U_ZERO_ERROR; + length=uloc_getDisplayVariant(fullName, displayLocale.fullName, + buffer, result.getCapacity(), + &errorCode); + result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0); + } + + return result; +} + +UnicodeString& +Locale::getDisplayName( UnicodeString& name ) const +{ + return this->getDisplayName(getDefault(), name); +} + +UnicodeString& +Locale::getDisplayName(const Locale &displayLocale, + UnicodeString &result) const { + UChar *buffer; + UErrorCode errorCode=U_ZERO_ERROR; + int32_t length; + + buffer=result.getBuffer(ULOC_FULLNAME_CAPACITY); + if(buffer==0) { + result.truncate(0); + return result; + } + + length=uloc_getDisplayName(fullName, displayLocale.fullName, + buffer, result.getCapacity(), + &errorCode); + result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0); + + if(errorCode==U_BUFFER_OVERFLOW_ERROR) { + buffer=result.getBuffer(length); + if(buffer==0) { + result.truncate(0); + return result; + } + errorCode=U_ZERO_ERROR; + length=uloc_getDisplayName(fullName, displayLocale.fullName, + buffer, result.getCapacity(), + &errorCode); + result.releaseBuffer(U_SUCCESS(errorCode) ? length : 0); + } + + return result; +} + +#if ! UCONFIG_NO_BREAK_ITERATION + +// ------------------------------------- +// Gets the objectLocale display name in the default locale language. +UnicodeString& U_EXPORT2 +BreakIterator::getDisplayName(const Locale& objectLocale, + UnicodeString& name) +{ + return objectLocale.getDisplayName(name); +} + +// ------------------------------------- +// Gets the objectLocale display name in the displayLocale language. +UnicodeString& U_EXPORT2 +BreakIterator::getDisplayName(const Locale& objectLocale, + const Locale& displayLocale, + UnicodeString& name) +{ + return objectLocale.getDisplayName(displayLocale, name); +} + +#endif + + +U_NAMESPACE_END + +// C API ------------------------------------------------------------------- *** + +U_NAMESPACE_USE + +/* ### Constants **************************************************/ + +/* These strings describe the resources we attempt to load from + the locale ResourceBundle data file.*/ +static const char _kLanguages[] = "Languages"; +static const char _kScripts[] = "Scripts"; +static const char _kScriptsStandAlone[] = "Scripts%stand-alone"; +static const char _kCountries[] = "Countries"; +static const char _kVariants[] = "Variants"; +static const char _kKeys[] = "Keys"; +static const char _kTypes[] = "Types"; +//static const char _kRootName[] = "root"; +static const char _kCurrency[] = "currency"; +static const char _kCurrencies[] = "Currencies"; +static const char _kLocaleDisplayPattern[] = "localeDisplayPattern"; +static const char _kPattern[] = "pattern"; +static const char _kSeparator[] = "separator"; + +/* ### Display name **************************************************/ + +static int32_t +_getStringOrCopyKey(const char *path, const char *locale, + const char *tableKey, + const char* subTableKey, + const char *itemKey, + const char *substitute, + UChar *dest, int32_t destCapacity, + UErrorCode *pErrorCode) { + const UChar *s = NULL; + int32_t length = 0; + + if(itemKey==NULL) { + /* top-level item: normal resource bundle access */ + icu::LocalUResourceBundlePointer rb(ures_open(path, locale, pErrorCode)); + + if(U_SUCCESS(*pErrorCode)) { + s=ures_getStringByKey(rb.getAlias(), tableKey, &length, pErrorCode); + /* see comment about closing rb near "return item;" in _res_getTableStringWithFallback() */ + } + } else { + /* Language code should not be a number. If it is, set the error code. */ + if (!uprv_strncmp(tableKey, "Languages", 9) && uprv_strtol(itemKey, NULL, 10)) { + *pErrorCode = U_MISSING_RESOURCE_ERROR; + } else { + /* second-level item, use special fallback */ + s=uloc_getTableStringWithFallback(path, locale, + tableKey, + subTableKey, + itemKey, + &length, + pErrorCode); + } + } + + if(U_SUCCESS(*pErrorCode)) { + int32_t copyLength=uprv_min(length, destCapacity); + if(copyLength>0 && s != NULL) { + u_memcpy(dest, s, copyLength); + } + } else { + /* no string from a resource bundle: convert the substitute */ + length=(int32_t)uprv_strlen(substitute); + u_charsToUChars(substitute, dest, uprv_min(length, destCapacity)); + *pErrorCode=U_USING_DEFAULT_WARNING; + } + + return u_terminateUChars(dest, destCapacity, length, pErrorCode); +} + +typedef int32_t U_CALLCONV UDisplayNameGetter(const char *, char *, int32_t, UErrorCode *); + +static int32_t +_getDisplayNameForComponent(const char *locale, + const char *displayLocale, + UChar *dest, int32_t destCapacity, + UDisplayNameGetter *getter, + const char *tag, + UErrorCode *pErrorCode) { + char localeBuffer[ULOC_FULLNAME_CAPACITY*4]; + int32_t length; + UErrorCode localStatus; + const char* root = NULL; + + /* argument checking */ + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return 0; + } + + if(destCapacity<0 || (destCapacity>0 && dest==NULL)) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + localStatus = U_ZERO_ERROR; + length=(*getter)(locale, localeBuffer, sizeof(localeBuffer), &localStatus); + if(U_FAILURE(localStatus) || localStatus==U_STRING_NOT_TERMINATED_WARNING) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + if(length==0) { + // For the display name, we treat this as unknown language (ICU-20273). + if (getter == uloc_getLanguage) { + uprv_strcpy(localeBuffer, "und"); + } else { + return u_terminateUChars(dest, destCapacity, 0, pErrorCode); + } + } + + root = tag == _kCountries ? U_ICUDATA_REGION : U_ICUDATA_LANG; + + return _getStringOrCopyKey(root, displayLocale, + tag, NULL, localeBuffer, + localeBuffer, + dest, destCapacity, + pErrorCode); +} + +U_CAPI int32_t U_EXPORT2 +uloc_getDisplayLanguage(const char *locale, + const char *displayLocale, + UChar *dest, int32_t destCapacity, + UErrorCode *pErrorCode) { + return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity, + uloc_getLanguage, _kLanguages, pErrorCode); +} + +U_CAPI int32_t U_EXPORT2 +uloc_getDisplayScript(const char* locale, + const char* displayLocale, + UChar *dest, int32_t destCapacity, + UErrorCode *pErrorCode) +{ + UErrorCode err = U_ZERO_ERROR; + int32_t res = _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity, + uloc_getScript, _kScriptsStandAlone, &err); + + if (destCapacity == 0 && err == U_BUFFER_OVERFLOW_ERROR) { + // For preflight, return the max of the value and the fallback. + int32_t fallback_res = _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity, + uloc_getScript, _kScripts, pErrorCode); + return (fallback_res > res) ? fallback_res : res; + } + if ( err == U_USING_DEFAULT_WARNING ) { + return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity, + uloc_getScript, _kScripts, pErrorCode); + } else { + *pErrorCode = err; + return res; + } +} + +static int32_t +uloc_getDisplayScriptInContext(const char* locale, + const char* displayLocale, + UChar *dest, int32_t destCapacity, + UErrorCode *pErrorCode) +{ + return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity, + uloc_getScript, _kScripts, pErrorCode); +} + +U_CAPI int32_t U_EXPORT2 +uloc_getDisplayCountry(const char *locale, + const char *displayLocale, + UChar *dest, int32_t destCapacity, + UErrorCode *pErrorCode) { + return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity, + uloc_getCountry, _kCountries, pErrorCode); +} + +/* + * TODO separate variant1_variant2_variant3... + * by getting each tag's display string and concatenating them with ", " + * in between - similar to uloc_getDisplayName() + */ +U_CAPI int32_t U_EXPORT2 +uloc_getDisplayVariant(const char *locale, + const char *displayLocale, + UChar *dest, int32_t destCapacity, + UErrorCode *pErrorCode) { + return _getDisplayNameForComponent(locale, displayLocale, dest, destCapacity, + uloc_getVariant, _kVariants, pErrorCode); +} + +/* Instead of having a separate pass for 'special' patterns, reintegrate the two + * so we don't get bitten by preflight bugs again. We can be reasonably efficient + * without two separate code paths, this code isn't that performance-critical. + * + * This code is general enough to deal with patterns that have a prefix or swap the + * language and remainder components, since we gave developers enough rope to do such + * things if they futz with the pattern data. But since we don't give them a way to + * specify a pattern for arbitrary combinations of components, there's not much use in + * that. I don't think our data includes such patterns, the only variable I know if is + * whether there is a space before the open paren, or not. Oh, and zh uses different + * chars than the standard open/close paren (which ja and ko use, btw). + */ +U_CAPI int32_t U_EXPORT2 +uloc_getDisplayName(const char *locale, + const char *displayLocale, + UChar *dest, int32_t destCapacity, + UErrorCode *pErrorCode) +{ + static const UChar defaultSeparator[9] = { 0x007b, 0x0030, 0x007d, 0x002c, 0x0020, 0x007b, 0x0031, 0x007d, 0x0000 }; /* "{0}, {1}" */ + static const UChar sub0[4] = { 0x007b, 0x0030, 0x007d , 0x0000 } ; /* {0} */ + static const UChar sub1[4] = { 0x007b, 0x0031, 0x007d , 0x0000 } ; /* {1} */ + static const int32_t subLen = 3; + static const UChar defaultPattern[10] = { + 0x007b, 0x0030, 0x007d, 0x0020, 0x0028, 0x007b, 0x0031, 0x007d, 0x0029, 0x0000 + }; /* {0} ({1}) */ + static const int32_t defaultPatLen = 9; + static const int32_t defaultSub0Pos = 0; + static const int32_t defaultSub1Pos = 5; + + int32_t length; /* of formatted result */ + + const UChar *separator; + int32_t sepLen = 0; + const UChar *pattern; + int32_t patLen = 0; + int32_t sub0Pos, sub1Pos; + + UChar formatOpenParen = 0x0028; // ( + UChar formatReplaceOpenParen = 0x005B; // [ + UChar formatCloseParen = 0x0029; // ) + UChar formatReplaceCloseParen = 0x005D; // ] + + UBool haveLang = TRUE; /* assume true, set false if we find we don't have + a lang component in the locale */ + UBool haveRest = TRUE; /* assume true, set false if we find we don't have + any other component in the locale */ + UBool retry = FALSE; /* set true if we need to retry, see below */ + + int32_t langi = 0; /* index of the language substitution (0 or 1), virtually always 0 */ + + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return 0; + } + + if(destCapacity<0 || (destCapacity>0 && dest==NULL)) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + { + UErrorCode status = U_ZERO_ERROR; + + icu::LocalUResourceBundlePointer locbundle( + ures_open(U_ICUDATA_LANG, displayLocale, &status)); + icu::LocalUResourceBundlePointer dspbundle( + ures_getByKeyWithFallback(locbundle.getAlias(), _kLocaleDisplayPattern, NULL, &status)); + + separator=ures_getStringByKeyWithFallback(dspbundle.getAlias(), _kSeparator, &sepLen, &status); + pattern=ures_getStringByKeyWithFallback(dspbundle.getAlias(), _kPattern, &patLen, &status); + } + + /* If we couldn't find any data, then use the defaults */ + if(sepLen == 0) { + separator = defaultSeparator; + } + /* #10244: Even though separator is now a pattern, it is awkward to handle it as such + * here since we are trying to build the display string in place in the dest buffer, + * and to handle it as a pattern would entail having separate storage for the + * substrings that need to be combined (the first of which may be the result of + * previous such combinations). So for now we continue to treat the portion between + * {0} and {1} as a string to be appended when joining substrings, ignoring anything + * that is before {0} or after {1} (no existing separator pattern has any such thing). + * This is similar to how pattern is handled below. + */ + { + UChar *p0=u_strstr(separator, sub0); + UChar *p1=u_strstr(separator, sub1); + if (p0==NULL || p1==NULL || p1<p0) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + separator = (const UChar *)p0 + subLen; + sepLen = static_cast<int32_t>(p1 - separator); + } + + if(patLen==0 || (patLen==defaultPatLen && !u_strncmp(pattern, defaultPattern, patLen))) { + pattern=defaultPattern; + patLen=defaultPatLen; + sub0Pos=defaultSub0Pos; + sub1Pos=defaultSub1Pos; + // use default formatOpenParen etc. set above + } else { /* non-default pattern */ + UChar *p0=u_strstr(pattern, sub0); + UChar *p1=u_strstr(pattern, sub1); + if (p0==NULL || p1==NULL) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + sub0Pos = static_cast<int32_t>(p0-pattern); + sub1Pos = static_cast<int32_t>(p1-pattern); + if (sub1Pos < sub0Pos) { /* a very odd pattern */ + int32_t t=sub0Pos; sub0Pos=sub1Pos; sub1Pos=t; + langi=1; + } + if (u_strchr(pattern, 0xFF08) != NULL) { + formatOpenParen = 0xFF08; // fullwidth ( + formatReplaceOpenParen = 0xFF3B; // fullwidth [ + formatCloseParen = 0xFF09; // fullwidth ) + formatReplaceCloseParen = 0xFF3D; // fullwidth ] + } + } + + /* We loop here because there is one case in which after the first pass we could need to + * reextract the data. If there's initial padding before the first element, we put in + * the padding and then write that element. If it turns out there's no second element, + * we didn't need the padding. If we do need the data (no preflight), and the first element + * would have fit but for the padding, we need to reextract. In this case (only) we + * adjust the parameters so padding is not added, and repeat. + */ + do { + UChar* p=dest; + int32_t patPos=0; /* position in the pattern, used for non-substitution portions */ + int32_t langLen=0; /* length of language substitution */ + int32_t langPos=0; /* position in output of language substitution */ + int32_t restLen=0; /* length of 'everything else' substitution */ + int32_t restPos=0; /* position in output of 'everything else' substitution */ + icu::LocalUEnumerationPointer kenum; /* keyword enumeration */ + + /* prefix of pattern, extremely likely to be empty */ + if(sub0Pos) { + if(destCapacity >= sub0Pos) { + while (patPos < sub0Pos) { + *p++ = pattern[patPos++]; + } + } else { + patPos=sub0Pos; + } + length=sub0Pos; + } else { + length=0; + } + + for(int32_t subi=0,resti=0;subi<2;) { /* iterate through patterns 0 and 1*/ + UBool subdone = FALSE; /* set true when ready to move to next substitution */ + + /* prep p and cap for calls to get display components, pin cap to 0 since + they complain if cap is negative */ + int32_t cap=destCapacity-length; + if (cap <= 0) { + cap=0; + } else { + p=dest+length; + } + + if (subi == langi) { /* {0}*/ + if(haveLang) { + langPos=length; + langLen=uloc_getDisplayLanguage(locale, displayLocale, p, cap, pErrorCode); + length+=langLen; + haveLang=langLen>0; + } + subdone=TRUE; + } else { /* {1} */ + if(!haveRest) { + subdone=TRUE; + } else { + int32_t len; /* length of component (plus other stuff) we just fetched */ + switch(resti++) { + case 0: + restPos=length; + len=uloc_getDisplayScriptInContext(locale, displayLocale, p, cap, pErrorCode); + break; + case 1: + len=uloc_getDisplayCountry(locale, displayLocale, p, cap, pErrorCode); + break; + case 2: + len=uloc_getDisplayVariant(locale, displayLocale, p, cap, pErrorCode); + break; + case 3: + kenum.adoptInstead(uloc_openKeywords(locale, pErrorCode)); + U_FALLTHROUGH; + default: { + const char* kw=uenum_next(kenum.getAlias(), &len, pErrorCode); + if (kw == NULL) { + len=0; /* mark that we didn't add a component */ + subdone=TRUE; + } else { + /* incorporating this behavior into the loop made it even more complex, + so just special case it here */ + len = uloc_getDisplayKeyword(kw, displayLocale, p, cap, pErrorCode); + if(len) { + if(len < cap) { + p[len]=0x3d; /* '=', assume we'll need it */ + } + len+=1; + + /* adjust for call to get keyword */ + cap-=len; + if(cap <= 0) { + cap=0; + } else { + p+=len; + } + } + /* reset for call below */ + if(*pErrorCode == U_BUFFER_OVERFLOW_ERROR) { + *pErrorCode=U_ZERO_ERROR; + } + int32_t vlen = uloc_getDisplayKeywordValue(locale, kw, displayLocale, + p, cap, pErrorCode); + if(len) { + if(vlen==0) { + --len; /* remove unneeded '=' */ + } + /* restore cap and p to what they were at start */ + cap=destCapacity-length; + if(cap <= 0) { + cap=0; + } else { + p=dest+length; + } + } + len+=vlen; /* total we added for key + '=' + value */ + } + } break; + } /* end switch */ + + if (len>0) { + /* we addeed a component, so add separator and write it if there's room. */ + if(len+sepLen<=cap) { + const UChar * plimit = p + len; + for (; p < plimit; p++) { + if (*p == formatOpenParen) { + *p = formatReplaceOpenParen; + } else if (*p == formatCloseParen) { + *p = formatReplaceCloseParen; + } + } + for(int32_t i=0;i<sepLen;++i) { + *p++=separator[i]; + } + } + length+=len+sepLen; + } else if(subdone) { + /* remove separator if we added it */ + if (length!=restPos) { + length-=sepLen; + } + restLen=length-restPos; + haveRest=restLen>0; + } + } + } + + if(*pErrorCode == U_BUFFER_OVERFLOW_ERROR) { + *pErrorCode=U_ZERO_ERROR; + } + + if(subdone) { + if(haveLang && haveRest) { + /* append internal portion of pattern, the first time, + or last portion of pattern the second time */ + int32_t padLen; + patPos+=subLen; + padLen=(subi==0 ? sub1Pos : patLen)-patPos; + if(length+padLen <= destCapacity) { + p=dest+length; + for(int32_t i=0;i<padLen;++i) { + *p++=pattern[patPos++]; + } + } else { + patPos+=padLen; + } + length+=padLen; + } else if(subi==0) { + /* don't have first component, reset for second component */ + sub0Pos=0; + length=0; + } else if(length>0) { + /* true length is the length of just the component we got. */ + length=haveLang?langLen:restLen; + if(dest && sub0Pos!=0) { + if (sub0Pos+length<=destCapacity) { + /* first component not at start of result, + but we have full component in buffer. */ + u_memmove(dest, dest+(haveLang?langPos:restPos), length); + } else { + /* would have fit, but didn't because of pattern prefix. */ + sub0Pos=0; /* stops initial padding (and a second retry, + so we won't end up here again) */ + retry=TRUE; + } + } + } + + ++subi; /* move on to next substitution */ + } + } + } while(retry); + + return u_terminateUChars(dest, destCapacity, length, pErrorCode); +} + +U_CAPI int32_t U_EXPORT2 +uloc_getDisplayKeyword(const char* keyword, + const char* displayLocale, + UChar* dest, + int32_t destCapacity, + UErrorCode* status){ + + /* argument checking */ + if(status==NULL || U_FAILURE(*status)) { + return 0; + } + + if(destCapacity<0 || (destCapacity>0 && dest==NULL)) { + *status=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + + /* pass itemKey=NULL to look for a top-level item */ + return _getStringOrCopyKey(U_ICUDATA_LANG, displayLocale, + _kKeys, NULL, + keyword, + keyword, + dest, destCapacity, + status); + +} + + +#define UCURRENCY_DISPLAY_NAME_INDEX 1 + +U_CAPI int32_t U_EXPORT2 +uloc_getDisplayKeywordValue( const char* locale, + const char* keyword, + const char* displayLocale, + UChar* dest, + int32_t destCapacity, + UErrorCode* status){ + + + /* argument checking */ + if(status==NULL || U_FAILURE(*status)) { + return 0; + } + + if(destCapacity<0 || (destCapacity>0 && dest==NULL)) { + *status=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + /* get the keyword value */ + CharString keywordValue; + { + CharStringByteSink sink(&keywordValue); + ulocimp_getKeywordValue(locale, keyword, sink, status); + } + + /* + * if the keyword is equal to currency .. then to get the display name + * we need to do the fallback ourselves + */ + if(uprv_stricmp(keyword, _kCurrency)==0){ + + int32_t dispNameLen = 0; + const UChar *dispName = NULL; + + icu::LocalUResourceBundlePointer bundle( + ures_open(U_ICUDATA_CURR, displayLocale, status)); + icu::LocalUResourceBundlePointer currencies( + ures_getByKey(bundle.getAlias(), _kCurrencies, NULL, status)); + icu::LocalUResourceBundlePointer currency( + ures_getByKeyWithFallback(currencies.getAlias(), keywordValue.data(), NULL, status)); + + dispName = ures_getStringByIndex(currency.getAlias(), UCURRENCY_DISPLAY_NAME_INDEX, &dispNameLen, status); + + if(U_FAILURE(*status)){ + if(*status == U_MISSING_RESOURCE_ERROR){ + /* we just want to write the value over if nothing is available */ + *status = U_USING_DEFAULT_WARNING; + }else{ + return 0; + } + } + + /* now copy the dispName over if not NULL */ + if(dispName != NULL){ + if(dispNameLen <= destCapacity){ + u_memcpy(dest, dispName, dispNameLen); + return u_terminateUChars(dest, destCapacity, dispNameLen, status); + }else{ + *status = U_BUFFER_OVERFLOW_ERROR; + return dispNameLen; + } + }else{ + /* we have not found the display name for the value .. just copy over */ + if(keywordValue.length() <= destCapacity){ + u_charsToUChars(keywordValue.data(), dest, keywordValue.length()); + return u_terminateUChars(dest, destCapacity, keywordValue.length(), status); + }else{ + *status = U_BUFFER_OVERFLOW_ERROR; + return keywordValue.length(); + } + } + + + }else{ + + return _getStringOrCopyKey(U_ICUDATA_LANG, displayLocale, + _kTypes, keyword, + keywordValue.data(), + keywordValue.data(), + dest, destCapacity, + status); + } +} diff --git a/thirdparty/icu4c/common/locdistance.cpp b/thirdparty/icu4c/common/locdistance.cpp new file mode 100644 index 0000000000..ff8892791b --- /dev/null +++ b/thirdparty/icu4c/common/locdistance.cpp @@ -0,0 +1,415 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// locdistance.cpp +// created: 2019may08 Markus W. Scherer + +#include "unicode/utypes.h" +#include "unicode/bytestrie.h" +#include "unicode/localematcher.h" +#include "unicode/locid.h" +#include "unicode/uobject.h" +#include "unicode/ures.h" +#include "cstring.h" +#include "locdistance.h" +#include "loclikelysubtags.h" +#include "uassert.h" +#include "ucln_cmn.h" +#include "uinvchar.h" +#include "umutex.h" + +U_NAMESPACE_BEGIN + +namespace { + +/** + * Bit flag used on the last character of a subtag in the trie. + * Must be set consistently by the builder and the lookup code. + */ +constexpr int32_t END_OF_SUBTAG = 0x80; +/** Distance value bit flag, set by the builder. */ +constexpr int32_t DISTANCE_SKIP_SCRIPT = 0x80; +/** Distance value bit flag, set by trieNext(). */ +constexpr int32_t DISTANCE_IS_FINAL = 0x100; +constexpr int32_t DISTANCE_IS_FINAL_OR_SKIP_SCRIPT = DISTANCE_IS_FINAL | DISTANCE_SKIP_SCRIPT; + +constexpr int32_t ABOVE_THRESHOLD = 100; + +// Indexes into array of distances. +enum { + IX_DEF_LANG_DISTANCE, + IX_DEF_SCRIPT_DISTANCE, + IX_DEF_REGION_DISTANCE, + IX_MIN_REGION_DISTANCE, + IX_LIMIT +}; + +LocaleDistance *gLocaleDistance = nullptr; +UInitOnce gInitOnce = U_INITONCE_INITIALIZER; + +UBool U_CALLCONV cleanup() { + delete gLocaleDistance; + gLocaleDistance = nullptr; + gInitOnce.reset(); + return TRUE; +} + +} // namespace + +void U_CALLCONV LocaleDistance::initLocaleDistance(UErrorCode &errorCode) { + // This function is invoked only via umtx_initOnce(). + U_ASSERT(gLocaleDistance == nullptr); + const XLikelySubtags &likely = *XLikelySubtags::getSingleton(errorCode); + if (U_FAILURE(errorCode)) { return; } + const LocaleDistanceData &data = likely.getDistanceData(); + if (data.distanceTrieBytes == nullptr || + data.regionToPartitions == nullptr || data.partitions == nullptr || + // ok if no paradigms + data.distances == nullptr) { + errorCode = U_MISSING_RESOURCE_ERROR; + return; + } + gLocaleDistance = new LocaleDistance(data, likely); + if (gLocaleDistance == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return; + } + ucln_common_registerCleanup(UCLN_COMMON_LOCALE_DISTANCE, cleanup); +} + +const LocaleDistance *LocaleDistance::getSingleton(UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { return nullptr; } + umtx_initOnce(gInitOnce, &LocaleDistance::initLocaleDistance, errorCode); + return gLocaleDistance; +} + +LocaleDistance::LocaleDistance(const LocaleDistanceData &data, const XLikelySubtags &likely) : + likelySubtags(likely), + trie(data.distanceTrieBytes), + regionToPartitionsIndex(data.regionToPartitions), partitionArrays(data.partitions), + paradigmLSRs(data.paradigms), paradigmLSRsLength(data.paradigmsLength), + defaultLanguageDistance(data.distances[IX_DEF_LANG_DISTANCE]), + defaultScriptDistance(data.distances[IX_DEF_SCRIPT_DISTANCE]), + defaultRegionDistance(data.distances[IX_DEF_REGION_DISTANCE]), + minRegionDistance(data.distances[IX_MIN_REGION_DISTANCE]) { + // For the default demotion value, use the + // default region distance between unrelated Englishes. + // Thus, unless demotion is turned off, + // a mere region difference for one desired locale + // is as good as a perfect match for the next following desired locale. + // As of CLDR 36, we have <languageMatch desired="en_*_*" supported="en_*_*" distance="5"/>. + LSR en("en", "Latn", "US", LSR::EXPLICIT_LSR); + LSR enGB("en", "Latn", "GB", LSR::EXPLICIT_LSR); + const LSR *p_enGB = &enGB; + int32_t indexAndDistance = getBestIndexAndDistance(en, &p_enGB, 1, + shiftDistance(50), ULOCMATCH_FAVOR_LANGUAGE, ULOCMATCH_DIRECTION_WITH_ONE_WAY); + defaultDemotionPerDesiredLocale = getDistanceFloor(indexAndDistance); +} + +int32_t LocaleDistance::getBestIndexAndDistance( + const LSR &desired, + const LSR **supportedLSRs, int32_t supportedLSRsLength, + int32_t shiftedThreshold, + ULocMatchFavorSubtag favorSubtag, ULocMatchDirection direction) const { + BytesTrie iter(trie); + // Look up the desired language only once for all supported LSRs. + // Its "distance" is either a match point value of 0, or a non-match negative value. + // Note: The data builder verifies that there are no <*, supported> or <desired, *> rules. + int32_t desLangDistance = trieNext(iter, desired.language, false); + uint64_t desLangState = desLangDistance >= 0 && supportedLSRsLength > 1 ? iter.getState64() : 0; + // Index of the supported LSR with the lowest distance. + int32_t bestIndex = -1; + // Cached lookup info from XLikelySubtags.compareLikely(). + int32_t bestLikelyInfo = -1; + for (int32_t slIndex = 0; slIndex < supportedLSRsLength; ++slIndex) { + const LSR &supported = *supportedLSRs[slIndex]; + bool star = false; + int32_t distance = desLangDistance; + if (distance >= 0) { + U_ASSERT((distance & DISTANCE_IS_FINAL) == 0); + if (slIndex != 0) { + iter.resetToState64(desLangState); + } + distance = trieNext(iter, supported.language, true); + } + // Note: The data builder verifies that there are no rules with "any" (*) language and + // real (non *) script or region subtags. + // This means that if the lookup for either language fails we can use + // the default distances without further lookups. + int32_t flags; + if (distance >= 0) { + flags = distance & DISTANCE_IS_FINAL_OR_SKIP_SCRIPT; + distance &= ~DISTANCE_IS_FINAL_OR_SKIP_SCRIPT; + } else { // <*, *> + if (uprv_strcmp(desired.language, supported.language) == 0) { + distance = 0; + } else { + distance = defaultLanguageDistance; + } + flags = 0; + star = true; + } + U_ASSERT(0 <= distance && distance <= 100); + // Round up the shifted threshold (if fraction bits are not 0) + // for comparison with un-shifted distances until we need fraction bits. + // (If we simply shifted non-zero fraction bits away, then we might ignore a language + // when it's really still a micro distance below the threshold.) + int32_t roundedThreshold = (shiftedThreshold + DISTANCE_FRACTION_MASK) >> DISTANCE_SHIFT; + // We implement "favor subtag" by reducing the language subtag distance + // (unscientifically reducing it to a quarter of the normal value), + // so that the script distance is relatively more important. + // For example, given a default language distance of 80, we reduce it to 20, + // which is below the default threshold of 50, which is the default script distance. + if (favorSubtag == ULOCMATCH_FAVOR_SCRIPT) { + distance >>= 2; + } + // Let distance == roundedThreshold pass until the tie-breaker logic + // at the end of the loop. + if (distance > roundedThreshold) { + continue; + } + + int32_t scriptDistance; + if (star || flags != 0) { + if (uprv_strcmp(desired.script, supported.script) == 0) { + scriptDistance = 0; + } else { + scriptDistance = defaultScriptDistance; + } + } else { + scriptDistance = getDesSuppScriptDistance(iter, iter.getState64(), + desired.script, supported.script); + flags = scriptDistance & DISTANCE_IS_FINAL; + scriptDistance &= ~DISTANCE_IS_FINAL; + } + distance += scriptDistance; + if (distance > roundedThreshold) { + continue; + } + + if (uprv_strcmp(desired.region, supported.region) == 0) { + // regionDistance = 0 + } else if (star || (flags & DISTANCE_IS_FINAL) != 0) { + distance += defaultRegionDistance; + } else { + int32_t remainingThreshold = roundedThreshold - distance; + if (minRegionDistance > remainingThreshold) { + continue; + } + + // From here on we know the regions are not equal. + // Map each region to zero or more partitions. (zero = one non-matching string) + // (Each array of single-character partition strings is encoded as one string.) + // If either side has more than one, then we find the maximum distance. + // This could be optimized by adding some more structure, but probably not worth it. + distance += getRegionPartitionsDistance( + iter, iter.getState64(), + partitionsForRegion(desired), + partitionsForRegion(supported), + remainingThreshold); + } + int32_t shiftedDistance = shiftDistance(distance); + if (shiftedDistance == 0) { + // Distinguish between equivalent but originally unequal locales via an + // additional micro distance. + shiftedDistance |= (desired.flags ^ supported.flags); + if (shiftedDistance < shiftedThreshold) { + if (direction != ULOCMATCH_DIRECTION_ONLY_TWO_WAY || + // Is there also a match when we swap desired/supported? + isMatch(supported, desired, shiftedThreshold, favorSubtag)) { + if (shiftedDistance == 0) { + return slIndex << INDEX_SHIFT; + } + bestIndex = slIndex; + shiftedThreshold = shiftedDistance; + bestLikelyInfo = -1; + } + } + } else { + if (shiftedDistance < shiftedThreshold) { + if (direction != ULOCMATCH_DIRECTION_ONLY_TWO_WAY || + // Is there also a match when we swap desired/supported? + isMatch(supported, desired, shiftedThreshold, favorSubtag)) { + bestIndex = slIndex; + shiftedThreshold = shiftedDistance; + bestLikelyInfo = -1; + } + } else if (shiftedDistance == shiftedThreshold && bestIndex >= 0) { + if (direction != ULOCMATCH_DIRECTION_ONLY_TWO_WAY || + // Is there also a match when we swap desired/supported? + isMatch(supported, desired, shiftedThreshold, favorSubtag)) { + bestLikelyInfo = likelySubtags.compareLikely( + supported, *supportedLSRs[bestIndex], bestLikelyInfo); + if ((bestLikelyInfo & 1) != 0) { + // This supported locale matches as well as the previous best match, + // and neither matches perfectly, + // but this one is "more likely" (has more-default subtags). + bestIndex = slIndex; + } + } + } + } + } + return bestIndex >= 0 ? + (bestIndex << INDEX_SHIFT) | shiftedThreshold : + INDEX_NEG_1 | shiftDistance(ABOVE_THRESHOLD); +} + +int32_t LocaleDistance::getDesSuppScriptDistance( + BytesTrie &iter, uint64_t startState, const char *desired, const char *supported) { + // Note: The data builder verifies that there are no <*, supported> or <desired, *> rules. + int32_t distance = trieNext(iter, desired, false); + if (distance >= 0) { + distance = trieNext(iter, supported, true); + } + if (distance < 0) { + UStringTrieResult result = iter.resetToState64(startState).next(u'*'); // <*, *> + U_ASSERT(USTRINGTRIE_HAS_VALUE(result)); + if (uprv_strcmp(desired, supported) == 0) { + distance = 0; // same script + } else { + distance = iter.getValue(); + U_ASSERT(distance >= 0); + } + if (result == USTRINGTRIE_FINAL_VALUE) { + distance |= DISTANCE_IS_FINAL; + } + } + return distance; +} + +int32_t LocaleDistance::getRegionPartitionsDistance( + BytesTrie &iter, uint64_t startState, + const char *desiredPartitions, const char *supportedPartitions, int32_t threshold) { + char desired = *desiredPartitions++; + char supported = *supportedPartitions++; + U_ASSERT(desired != 0 && supported != 0); + // See if we have single desired/supported partitions, from NUL-terminated + // partition strings without explicit length. + bool suppLengthGt1 = *supportedPartitions != 0; // gt1: more than 1 character + // equivalent to: if (desLength == 1 && suppLength == 1) + if (*desiredPartitions == 0 && !suppLengthGt1) { + // Fastpath for single desired/supported partitions. + UStringTrieResult result = iter.next(uprv_invCharToAscii(desired) | END_OF_SUBTAG); + if (USTRINGTRIE_HAS_NEXT(result)) { + result = iter.next(uprv_invCharToAscii(supported) | END_OF_SUBTAG); + if (USTRINGTRIE_HAS_VALUE(result)) { + return iter.getValue(); + } + } + return getFallbackRegionDistance(iter, startState); + } + + const char *supportedStart = supportedPartitions - 1; // for restart of inner loop + int32_t regionDistance = 0; + // Fall back to * only once, not for each pair of partition strings. + bool star = false; + for (;;) { + // Look up each desired-partition string only once, + // not for each (desired, supported) pair. + UStringTrieResult result = iter.next(uprv_invCharToAscii(desired) | END_OF_SUBTAG); + if (USTRINGTRIE_HAS_NEXT(result)) { + uint64_t desState = suppLengthGt1 ? iter.getState64() : 0; + for (;;) { + result = iter.next(uprv_invCharToAscii(supported) | END_OF_SUBTAG); + int32_t d; + if (USTRINGTRIE_HAS_VALUE(result)) { + d = iter.getValue(); + } else if (star) { + d = 0; + } else { + d = getFallbackRegionDistance(iter, startState); + star = true; + } + if (d > threshold) { + return d; + } else if (regionDistance < d) { + regionDistance = d; + } + if ((supported = *supportedPartitions++) != 0) { + iter.resetToState64(desState); + } else { + break; + } + } + } else if (!star) { + int32_t d = getFallbackRegionDistance(iter, startState); + if (d > threshold) { + return d; + } else if (regionDistance < d) { + regionDistance = d; + } + star = true; + } + if ((desired = *desiredPartitions++) != 0) { + iter.resetToState64(startState); + supportedPartitions = supportedStart; + supported = *supportedPartitions++; + } else { + break; + } + } + return regionDistance; +} + +int32_t LocaleDistance::getFallbackRegionDistance(BytesTrie &iter, uint64_t startState) { +#if U_DEBUG + UStringTrieResult result = +#endif + iter.resetToState64(startState).next(u'*'); // <*, *> + U_ASSERT(USTRINGTRIE_HAS_VALUE(result)); + int32_t distance = iter.getValue(); + U_ASSERT(distance >= 0); + return distance; +} + +int32_t LocaleDistance::trieNext(BytesTrie &iter, const char *s, bool wantValue) { + uint8_t c; + if ((c = *s) == 0) { + return -1; // no empty subtags in the distance data + } + for (;;) { + c = uprv_invCharToAscii(c); + // EBCDIC: If *s is not an invariant character, + // then c is now 0 and will simply not match anything, which is harmless. + uint8_t next = *++s; + if (next != 0) { + if (!USTRINGTRIE_HAS_NEXT(iter.next(c))) { + return -1; + } + } else { + // last character of this subtag + UStringTrieResult result = iter.next(c | END_OF_SUBTAG); + if (wantValue) { + if (USTRINGTRIE_HAS_VALUE(result)) { + int32_t value = iter.getValue(); + if (result == USTRINGTRIE_FINAL_VALUE) { + value |= DISTANCE_IS_FINAL; + } + return value; + } + } else { + if (USTRINGTRIE_HAS_NEXT(result)) { + return 0; + } + } + return -1; + } + c = next; + } +} + +UBool LocaleDistance::isParadigmLSR(const LSR &lsr) const { + // Linear search for a very short list (length 6 as of 2019), + // because we look for equivalence not equality, and + // because it's easy. + // If there are many paradigm LSRs we should use a hash set + // with custom comparator and hasher. + U_ASSERT(paradigmLSRsLength <= 15); + for (int32_t i = 0; i < paradigmLSRsLength; ++i) { + if (lsr.isEquivalentTo(paradigmLSRs[i])) { return true; } + } + return false; +} + +U_NAMESPACE_END diff --git a/thirdparty/icu4c/common/locdistance.h b/thirdparty/icu4c/common/locdistance.h new file mode 100644 index 0000000000..51b777e627 --- /dev/null +++ b/thirdparty/icu4c/common/locdistance.h @@ -0,0 +1,151 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// locdistance.h +// created: 2019may08 Markus W. Scherer + +#ifndef __LOCDISTANCE_H__ +#define __LOCDISTANCE_H__ + +#include "unicode/utypes.h" +#include "unicode/bytestrie.h" +#include "unicode/localematcher.h" +#include "unicode/locid.h" +#include "unicode/uobject.h" +#include "lsr.h" + +U_NAMESPACE_BEGIN + +struct LocaleDistanceData; + +/** + * Offline-built data for LocaleMatcher. + * Mostly but not only the data for mapping locales to their maximized forms. + */ +class LocaleDistance final : public UMemory { +public: + static const LocaleDistance *getSingleton(UErrorCode &errorCode); + + static int32_t shiftDistance(int32_t distance) { + return distance << DISTANCE_SHIFT; + } + + static int32_t getShiftedDistance(int32_t indexAndDistance) { + return indexAndDistance & DISTANCE_MASK; + } + + static double getDistanceDouble(int32_t indexAndDistance) { + double shiftedDistance = getShiftedDistance(indexAndDistance); + return shiftedDistance / (1 << DISTANCE_SHIFT); + } + + static int32_t getDistanceFloor(int32_t indexAndDistance) { + return (indexAndDistance & DISTANCE_MASK) >> DISTANCE_SHIFT; + } + + static int32_t getIndex(int32_t indexAndDistance) { + // assert indexAndDistance >= 0; + return indexAndDistance >> INDEX_SHIFT; + } + + /** + * Finds the supported LSR with the smallest distance from the desired one. + * Equivalent LSR subtags must be normalized into a canonical form. + * + * <p>Returns the index of the lowest-distance supported LSR in the high bits + * (negative if none has a distance below the threshold), + * and its distance (0..ABOVE_THRESHOLD) in the low bits. + */ + int32_t getBestIndexAndDistance(const LSR &desired, + const LSR **supportedLSRs, int32_t supportedLSRsLength, + int32_t shiftedThreshold, + ULocMatchFavorSubtag favorSubtag, + ULocMatchDirection direction) const; + + UBool isParadigmLSR(const LSR &lsr) const; + + int32_t getDefaultScriptDistance() const { + return defaultScriptDistance; + } + + int32_t getDefaultDemotionPerDesiredLocale() const { + return defaultDemotionPerDesiredLocale; + } + +private: + // The distance is shifted left to gain some fraction bits. + static constexpr int32_t DISTANCE_SHIFT = 3; + static constexpr int32_t DISTANCE_FRACTION_MASK = 7; + // 7 bits for 0..100 + static constexpr int32_t DISTANCE_INT_SHIFT = 7; + static constexpr int32_t INDEX_SHIFT = DISTANCE_INT_SHIFT + DISTANCE_SHIFT; + static constexpr int32_t DISTANCE_MASK = 0x3ff; + // tic constexpr int32_t MAX_INDEX = 0x1fffff; // avoids sign bit + static constexpr int32_t INDEX_NEG_1 = 0xfffffc00; + + LocaleDistance(const LocaleDistanceData &data, const XLikelySubtags &likely); + LocaleDistance(const LocaleDistance &other) = delete; + LocaleDistance &operator=(const LocaleDistance &other) = delete; + + static void initLocaleDistance(UErrorCode &errorCode); + + UBool isMatch(const LSR &desired, const LSR &supported, + int32_t shiftedThreshold, ULocMatchFavorSubtag favorSubtag) const { + const LSR *pSupp = &supported; + return getBestIndexAndDistance( + desired, &pSupp, 1, + shiftedThreshold, favorSubtag, ULOCMATCH_DIRECTION_WITH_ONE_WAY) >= 0; + } + + static int32_t getDesSuppScriptDistance(BytesTrie &iter, uint64_t startState, + const char *desired, const char *supported); + + static int32_t getRegionPartitionsDistance( + BytesTrie &iter, uint64_t startState, + const char *desiredPartitions, const char *supportedPartitions, + int32_t threshold); + + static int32_t getFallbackRegionDistance(BytesTrie &iter, uint64_t startState); + + static int32_t trieNext(BytesTrie &iter, const char *s, bool wantValue); + + const char *partitionsForRegion(const LSR &lsr) const { + // ill-formed region -> one non-matching string + int32_t pIndex = regionToPartitionsIndex[lsr.regionIndex]; + return partitionArrays[pIndex]; + } + + int32_t getDefaultRegionDistance() const { + return defaultRegionDistance; + } + + const XLikelySubtags &likelySubtags; + + // The trie maps each dlang+slang+dscript+sscript+dregion+sregion + // (encoded in ASCII with bit 7 set on the last character of each subtag) to a distance. + // There is also a trie value for each subsequence of whole subtags. + // One '*' is used for a (desired, supported) pair of "und", "Zzzz"/"", or "ZZ"/"". + BytesTrie trie; + + /** + * Maps each region to zero or more single-character partitions. + */ + const uint8_t *regionToPartitionsIndex; + const char **partitionArrays; + + /** + * Used to get the paradigm region for a cluster, if there is one. + */ + const LSR *paradigmLSRs; + int32_t paradigmLSRsLength; + + int32_t defaultLanguageDistance; + int32_t defaultScriptDistance; + int32_t defaultRegionDistance; + int32_t minRegionDistance; + int32_t defaultDemotionPerDesiredLocale; +}; + +U_NAMESPACE_END + +#endif // __LOCDISTANCE_H__ diff --git a/thirdparty/icu4c/common/locdspnm.cpp b/thirdparty/icu4c/common/locdspnm.cpp new file mode 100644 index 0000000000..43334f5196 --- /dev/null +++ b/thirdparty/icu4c/common/locdspnm.cpp @@ -0,0 +1,1110 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2010-2016, International Business Machines Corporation and +* others. All Rights Reserved. +******************************************************************************* +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/locdspnm.h" +#include "unicode/simpleformatter.h" +#include "unicode/ucasemap.h" +#include "unicode/ures.h" +#include "unicode/udisplaycontext.h" +#include "unicode/brkiter.h" +#include "unicode/ucurr.h" +#include "cmemory.h" +#include "cstring.h" +#include "mutex.h" +#include "ulocimp.h" +#include "umutex.h" +#include "ureslocs.h" +#include "uresimp.h" + +#include <stdarg.h> + +/** + * Concatenate a number of null-terminated strings to buffer, leaving a + * null-terminated string. The last argument should be the null pointer. + * Return the length of the string in the buffer, not counting the trailing + * null. Return -1 if there is an error (buffer is null, or buflen < 1). + */ +static int32_t ncat(char *buffer, uint32_t buflen, ...) { + va_list args; + char *str; + char *p = buffer; + const char* e = buffer + buflen - 1; + + if (buffer == NULL || buflen < 1) { + return -1; + } + + va_start(args, buflen); + while ((str = va_arg(args, char *)) != 0) { + char c; + while (p != e && (c = *str++) != 0) { + *p++ = c; + } + } + *p = 0; + va_end(args); + + return static_cast<int32_t>(p - buffer); +} + +U_NAMESPACE_BEGIN + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +// Access resource data for locale components. +// Wrap code in uloc.c for now. +class ICUDataTable { + const char* path; + Locale locale; + +public: + ICUDataTable(const char* path, const Locale& locale); + ~ICUDataTable(); + + const Locale& getLocale(); + + UnicodeString& get(const char* tableKey, const char* itemKey, + UnicodeString& result) const; + UnicodeString& get(const char* tableKey, const char* subTableKey, const char* itemKey, + UnicodeString& result) const; + + UnicodeString& getNoFallback(const char* tableKey, const char* itemKey, + UnicodeString &result) const; + UnicodeString& getNoFallback(const char* tableKey, const char* subTableKey, const char* itemKey, + UnicodeString &result) const; +}; + +inline UnicodeString & +ICUDataTable::get(const char* tableKey, const char* itemKey, UnicodeString& result) const { + return get(tableKey, NULL, itemKey, result); +} + +inline UnicodeString & +ICUDataTable::getNoFallback(const char* tableKey, const char* itemKey, UnicodeString& result) const { + return getNoFallback(tableKey, NULL, itemKey, result); +} + +ICUDataTable::ICUDataTable(const char* path, const Locale& locale) + : path(NULL), locale(Locale::getRoot()) +{ + if (path) { + int32_t len = static_cast<int32_t>(uprv_strlen(path)); + this->path = (const char*) uprv_malloc(len + 1); + if (this->path) { + uprv_strcpy((char *)this->path, path); + this->locale = locale; + } + } +} + +ICUDataTable::~ICUDataTable() { + if (path) { + uprv_free((void*) path); + path = NULL; + } +} + +const Locale& +ICUDataTable::getLocale() { + return locale; +} + +UnicodeString & +ICUDataTable::get(const char* tableKey, const char* subTableKey, const char* itemKey, + UnicodeString &result) const { + UErrorCode status = U_ZERO_ERROR; + int32_t len = 0; + + const UChar *s = uloc_getTableStringWithFallback(path, locale.getName(), + tableKey, subTableKey, itemKey, + &len, &status); + if (U_SUCCESS(status) && len > 0) { + return result.setTo(s, len); + } + return result.setTo(UnicodeString(itemKey, -1, US_INV)); +} + +UnicodeString & +ICUDataTable::getNoFallback(const char* tableKey, const char* subTableKey, const char* itemKey, + UnicodeString& result) const { + UErrorCode status = U_ZERO_ERROR; + int32_t len = 0; + + const UChar *s = uloc_getTableStringWithFallback(path, locale.getName(), + tableKey, subTableKey, itemKey, + &len, &status); + if (U_SUCCESS(status)) { + return result.setTo(s, len); + } + + result.setToBogus(); + return result; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +LocaleDisplayNames::~LocaleDisplayNames() {} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +#if 0 // currently unused + +class DefaultLocaleDisplayNames : public LocaleDisplayNames { + UDialectHandling dialectHandling; + +public: + // constructor + DefaultLocaleDisplayNames(UDialectHandling dialectHandling); + + virtual ~DefaultLocaleDisplayNames(); + + virtual const Locale& getLocale() const; + virtual UDialectHandling getDialectHandling() const; + + virtual UnicodeString& localeDisplayName(const Locale& locale, + UnicodeString& result) const; + virtual UnicodeString& localeDisplayName(const char* localeId, + UnicodeString& result) const; + virtual UnicodeString& languageDisplayName(const char* lang, + UnicodeString& result) const; + virtual UnicodeString& scriptDisplayName(const char* script, + UnicodeString& result) const; + virtual UnicodeString& scriptDisplayName(UScriptCode scriptCode, + UnicodeString& result) const; + virtual UnicodeString& regionDisplayName(const char* region, + UnicodeString& result) const; + virtual UnicodeString& variantDisplayName(const char* variant, + UnicodeString& result) const; + virtual UnicodeString& keyDisplayName(const char* key, + UnicodeString& result) const; + virtual UnicodeString& keyValueDisplayName(const char* key, + const char* value, + UnicodeString& result) const; +}; + +DefaultLocaleDisplayNames::DefaultLocaleDisplayNames(UDialectHandling dialectHandling) + : dialectHandling(dialectHandling) { +} + +DefaultLocaleDisplayNames::~DefaultLocaleDisplayNames() { +} + +const Locale& +DefaultLocaleDisplayNames::getLocale() const { + return Locale::getRoot(); +} + +UDialectHandling +DefaultLocaleDisplayNames::getDialectHandling() const { + return dialectHandling; +} + +UnicodeString& +DefaultLocaleDisplayNames::localeDisplayName(const Locale& locale, + UnicodeString& result) const { + return result = UnicodeString(locale.getName(), -1, US_INV); +} + +UnicodeString& +DefaultLocaleDisplayNames::localeDisplayName(const char* localeId, + UnicodeString& result) const { + return result = UnicodeString(localeId, -1, US_INV); +} + +UnicodeString& +DefaultLocaleDisplayNames::languageDisplayName(const char* lang, + UnicodeString& result) const { + return result = UnicodeString(lang, -1, US_INV); +} + +UnicodeString& +DefaultLocaleDisplayNames::scriptDisplayName(const char* script, + UnicodeString& result) const { + return result = UnicodeString(script, -1, US_INV); +} + +UnicodeString& +DefaultLocaleDisplayNames::scriptDisplayName(UScriptCode scriptCode, + UnicodeString& result) const { + const char* name = uscript_getName(scriptCode); + if (name) { + return result = UnicodeString(name, -1, US_INV); + } + return result.remove(); +} + +UnicodeString& +DefaultLocaleDisplayNames::regionDisplayName(const char* region, + UnicodeString& result) const { + return result = UnicodeString(region, -1, US_INV); +} + +UnicodeString& +DefaultLocaleDisplayNames::variantDisplayName(const char* variant, + UnicodeString& result) const { + return result = UnicodeString(variant, -1, US_INV); +} + +UnicodeString& +DefaultLocaleDisplayNames::keyDisplayName(const char* key, + UnicodeString& result) const { + return result = UnicodeString(key, -1, US_INV); +} + +UnicodeString& +DefaultLocaleDisplayNames::keyValueDisplayName(const char* /* key */, + const char* value, + UnicodeString& result) const { + return result = UnicodeString(value, -1, US_INV); +} + +#endif // currently unused class DefaultLocaleDisplayNames + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +class LocaleDisplayNamesImpl : public LocaleDisplayNames { + Locale locale; + UDialectHandling dialectHandling; + ICUDataTable langData; + ICUDataTable regionData; + SimpleFormatter separatorFormat; + SimpleFormatter format; + SimpleFormatter keyTypeFormat; + UDisplayContext capitalizationContext; +#if !UCONFIG_NO_BREAK_ITERATION + BreakIterator* capitalizationBrkIter; +#else + UObject* capitalizationBrkIter; +#endif + UnicodeString formatOpenParen; + UnicodeString formatReplaceOpenParen; + UnicodeString formatCloseParen; + UnicodeString formatReplaceCloseParen; + UDisplayContext nameLength; + UDisplayContext substitute; + + // Constants for capitalization context usage types. + enum CapContextUsage { + kCapContextUsageLanguage, + kCapContextUsageScript, + kCapContextUsageTerritory, + kCapContextUsageVariant, + kCapContextUsageKey, + kCapContextUsageKeyValue, + kCapContextUsageCount + }; + // Capitalization transforms. For each usage type, indicates whether to titlecase for + // the context specified in capitalizationContext (which we know at construction time) + UBool fCapitalization[kCapContextUsageCount]; + +public: + // constructor + LocaleDisplayNamesImpl(const Locale& locale, UDialectHandling dialectHandling); + LocaleDisplayNamesImpl(const Locale& locale, UDisplayContext *contexts, int32_t length); + virtual ~LocaleDisplayNamesImpl(); + + virtual const Locale& getLocale() const; + virtual UDialectHandling getDialectHandling() const; + virtual UDisplayContext getContext(UDisplayContextType type) const; + + virtual UnicodeString& localeDisplayName(const Locale& locale, + UnicodeString& result) const; + virtual UnicodeString& localeDisplayName(const char* localeId, + UnicodeString& result) const; + virtual UnicodeString& languageDisplayName(const char* lang, + UnicodeString& result) const; + virtual UnicodeString& scriptDisplayName(const char* script, + UnicodeString& result) const; + virtual UnicodeString& scriptDisplayName(UScriptCode scriptCode, + UnicodeString& result) const; + virtual UnicodeString& regionDisplayName(const char* region, + UnicodeString& result) const; + virtual UnicodeString& variantDisplayName(const char* variant, + UnicodeString& result) const; + virtual UnicodeString& keyDisplayName(const char* key, + UnicodeString& result) const; + virtual UnicodeString& keyValueDisplayName(const char* key, + const char* value, + UnicodeString& result) const; +private: + UnicodeString& localeIdName(const char* localeId, + UnicodeString& result, bool substitute) const; + UnicodeString& appendWithSep(UnicodeString& buffer, const UnicodeString& src) const; + UnicodeString& adjustForUsageAndContext(CapContextUsage usage, UnicodeString& result) const; + UnicodeString& scriptDisplayName(const char* script, UnicodeString& result, UBool skipAdjust) const; + UnicodeString& regionDisplayName(const char* region, UnicodeString& result, UBool skipAdjust) const; + UnicodeString& variantDisplayName(const char* variant, UnicodeString& result, UBool skipAdjust) const; + UnicodeString& keyDisplayName(const char* key, UnicodeString& result, UBool skipAdjust) const; + UnicodeString& keyValueDisplayName(const char* key, const char* value, + UnicodeString& result, UBool skipAdjust) const; + void initialize(void); + + struct CapitalizationContextSink; +}; + +LocaleDisplayNamesImpl::LocaleDisplayNamesImpl(const Locale& locale, + UDialectHandling dialectHandling) + : dialectHandling(dialectHandling) + , langData(U_ICUDATA_LANG, locale) + , regionData(U_ICUDATA_REGION, locale) + , capitalizationContext(UDISPCTX_CAPITALIZATION_NONE) + , capitalizationBrkIter(NULL) + , nameLength(UDISPCTX_LENGTH_FULL) + , substitute(UDISPCTX_SUBSTITUTE) +{ + initialize(); +} + +LocaleDisplayNamesImpl::LocaleDisplayNamesImpl(const Locale& locale, + UDisplayContext *contexts, int32_t length) + : dialectHandling(ULDN_STANDARD_NAMES) + , langData(U_ICUDATA_LANG, locale) + , regionData(U_ICUDATA_REGION, locale) + , capitalizationContext(UDISPCTX_CAPITALIZATION_NONE) + , capitalizationBrkIter(NULL) + , nameLength(UDISPCTX_LENGTH_FULL) + , substitute(UDISPCTX_SUBSTITUTE) +{ + while (length-- > 0) { + UDisplayContext value = *contexts++; + UDisplayContextType selector = (UDisplayContextType)((uint32_t)value >> 8); + switch (selector) { + case UDISPCTX_TYPE_DIALECT_HANDLING: + dialectHandling = (UDialectHandling)value; + break; + case UDISPCTX_TYPE_CAPITALIZATION: + capitalizationContext = value; + break; + case UDISPCTX_TYPE_DISPLAY_LENGTH: + nameLength = value; + break; + case UDISPCTX_TYPE_SUBSTITUTE_HANDLING: + substitute = value; + break; + default: + break; + } + } + initialize(); +} + +struct LocaleDisplayNamesImpl::CapitalizationContextSink : public ResourceSink { + UBool hasCapitalizationUsage; + LocaleDisplayNamesImpl& parent; + + CapitalizationContextSink(LocaleDisplayNamesImpl& _parent) + : hasCapitalizationUsage(FALSE), parent(_parent) {} + virtual ~CapitalizationContextSink(); + + virtual void put(const char *key, ResourceValue &value, UBool /*noFallback*/, + UErrorCode &errorCode) { + ResourceTable contexts = value.getTable(errorCode); + if (U_FAILURE(errorCode)) { return; } + for (int i = 0; contexts.getKeyAndValue(i, key, value); ++i) { + + CapContextUsage usageEnum; + if (uprv_strcmp(key, "key") == 0) { + usageEnum = kCapContextUsageKey; + } else if (uprv_strcmp(key, "keyValue") == 0) { + usageEnum = kCapContextUsageKeyValue; + } else if (uprv_strcmp(key, "languages") == 0) { + usageEnum = kCapContextUsageLanguage; + } else if (uprv_strcmp(key, "script") == 0) { + usageEnum = kCapContextUsageScript; + } else if (uprv_strcmp(key, "territory") == 0) { + usageEnum = kCapContextUsageTerritory; + } else if (uprv_strcmp(key, "variant") == 0) { + usageEnum = kCapContextUsageVariant; + } else { + continue; + } + + int32_t len = 0; + const int32_t* intVector = value.getIntVector(len, errorCode); + if (U_FAILURE(errorCode)) { return; } + if (len < 2) { continue; } + + int32_t titlecaseInt = (parent.capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU) ? intVector[0] : intVector[1]; + if (titlecaseInt == 0) { continue; } + + parent.fCapitalization[usageEnum] = TRUE; + hasCapitalizationUsage = TRUE; + } + } +}; + +// Virtual destructors must be defined out of line. +LocaleDisplayNamesImpl::CapitalizationContextSink::~CapitalizationContextSink() {} + +void +LocaleDisplayNamesImpl::initialize(void) { + LocaleDisplayNamesImpl *nonConstThis = (LocaleDisplayNamesImpl *)this; + nonConstThis->locale = langData.getLocale() == Locale::getRoot() + ? regionData.getLocale() + : langData.getLocale(); + + UnicodeString sep; + langData.getNoFallback("localeDisplayPattern", "separator", sep); + if (sep.isBogus()) { + sep = UnicodeString("{0}, {1}", -1, US_INV); + } + UErrorCode status = U_ZERO_ERROR; + separatorFormat.applyPatternMinMaxArguments(sep, 2, 2, status); + + UnicodeString pattern; + langData.getNoFallback("localeDisplayPattern", "pattern", pattern); + if (pattern.isBogus()) { + pattern = UnicodeString("{0} ({1})", -1, US_INV); + } + format.applyPatternMinMaxArguments(pattern, 2, 2, status); + if (pattern.indexOf((UChar)0xFF08) >= 0) { + formatOpenParen.setTo((UChar)0xFF08); // fullwidth ( + formatReplaceOpenParen.setTo((UChar)0xFF3B); // fullwidth [ + formatCloseParen.setTo((UChar)0xFF09); // fullwidth ) + formatReplaceCloseParen.setTo((UChar)0xFF3D); // fullwidth ] + } else { + formatOpenParen.setTo((UChar)0x0028); // ( + formatReplaceOpenParen.setTo((UChar)0x005B); // [ + formatCloseParen.setTo((UChar)0x0029); // ) + formatReplaceCloseParen.setTo((UChar)0x005D); // ] + } + + UnicodeString ktPattern; + langData.get("localeDisplayPattern", "keyTypePattern", ktPattern); + if (ktPattern.isBogus()) { + ktPattern = UnicodeString("{0}={1}", -1, US_INV); + } + keyTypeFormat.applyPatternMinMaxArguments(ktPattern, 2, 2, status); + + uprv_memset(fCapitalization, 0, sizeof(fCapitalization)); +#if !UCONFIG_NO_BREAK_ITERATION + // Only get the context data if we need it! This is a const object so we know now... + // Also check whether we will need a break iterator (depends on the data) + UBool needBrkIter = FALSE; + if (capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU || capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_STANDALONE) { + LocalUResourceBundlePointer resource(ures_open(NULL, locale.getName(), &status)); + if (U_FAILURE(status)) { return; } + CapitalizationContextSink sink(*this); + ures_getAllItemsWithFallback(resource.getAlias(), "contextTransforms", sink, status); + if (status == U_MISSING_RESOURCE_ERROR) { + // Silently ignore. Not every locale has contextTransforms. + status = U_ZERO_ERROR; + } else if (U_FAILURE(status)) { + return; + } + needBrkIter = sink.hasCapitalizationUsage; + } + // Get a sentence break iterator if we will need it + if (needBrkIter || capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE) { + status = U_ZERO_ERROR; + capitalizationBrkIter = BreakIterator::createSentenceInstance(locale, status); + if (U_FAILURE(status)) { + delete capitalizationBrkIter; + capitalizationBrkIter = NULL; + } + } +#endif +} + +LocaleDisplayNamesImpl::~LocaleDisplayNamesImpl() { +#if !UCONFIG_NO_BREAK_ITERATION + delete capitalizationBrkIter; +#endif +} + +const Locale& +LocaleDisplayNamesImpl::getLocale() const { + return locale; +} + +UDialectHandling +LocaleDisplayNamesImpl::getDialectHandling() const { + return dialectHandling; +} + +UDisplayContext +LocaleDisplayNamesImpl::getContext(UDisplayContextType type) const { + switch (type) { + case UDISPCTX_TYPE_DIALECT_HANDLING: + return (UDisplayContext)dialectHandling; + case UDISPCTX_TYPE_CAPITALIZATION: + return capitalizationContext; + case UDISPCTX_TYPE_DISPLAY_LENGTH: + return nameLength; + case UDISPCTX_TYPE_SUBSTITUTE_HANDLING: + return substitute; + default: + break; + } + return (UDisplayContext)0; +} + +UnicodeString& +LocaleDisplayNamesImpl::adjustForUsageAndContext(CapContextUsage usage, + UnicodeString& result) const { +#if !UCONFIG_NO_BREAK_ITERATION + // check to see whether we need to titlecase result + if ( result.length() > 0 && u_islower(result.char32At(0)) && capitalizationBrkIter!= NULL && + ( capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE || fCapitalization[usage] ) ) { + // note fCapitalization[usage] won't be set unless capitalizationContext is UI_LIST_OR_MENU or STANDALONE + static UMutex capitalizationBrkIterLock; + Mutex lock(&capitalizationBrkIterLock); + result.toTitle(capitalizationBrkIter, locale, U_TITLECASE_NO_LOWERCASE | U_TITLECASE_NO_BREAK_ADJUSTMENT); + } +#endif + return result; +} + +UnicodeString& +LocaleDisplayNamesImpl::localeDisplayName(const Locale& loc, + UnicodeString& result) const { + if (loc.isBogus()) { + result.setToBogus(); + return result; + } + UnicodeString resultName; + + const char* lang = loc.getLanguage(); + if (uprv_strlen(lang) == 0) { + lang = "root"; + } + const char* script = loc.getScript(); + const char* country = loc.getCountry(); + const char* variant = loc.getVariant(); + + UBool hasScript = uprv_strlen(script) > 0; + UBool hasCountry = uprv_strlen(country) > 0; + UBool hasVariant = uprv_strlen(variant) > 0; + + if (dialectHandling == ULDN_DIALECT_NAMES) { + char buffer[ULOC_FULLNAME_CAPACITY]; + do { // loop construct is so we can break early out of search + if (hasScript && hasCountry) { + ncat(buffer, ULOC_FULLNAME_CAPACITY, lang, "_", script, "_", country, (char *)0); + localeIdName(buffer, resultName, false); + if (!resultName.isBogus()) { + hasScript = FALSE; + hasCountry = FALSE; + break; + } + } + if (hasScript) { + ncat(buffer, ULOC_FULLNAME_CAPACITY, lang, "_", script, (char *)0); + localeIdName(buffer, resultName, false); + if (!resultName.isBogus()) { + hasScript = FALSE; + break; + } + } + if (hasCountry) { + ncat(buffer, ULOC_FULLNAME_CAPACITY, lang, "_", country, (char*)0); + localeIdName(buffer, resultName, false); + if (!resultName.isBogus()) { + hasCountry = FALSE; + break; + } + } + } while (FALSE); + } + if (resultName.isBogus() || resultName.isEmpty()) { + localeIdName(lang, resultName, substitute == UDISPCTX_SUBSTITUTE); + if (resultName.isBogus()) { + result.setToBogus(); + return result; + } + } + + UnicodeString resultRemainder; + UnicodeString temp; + UErrorCode status = U_ZERO_ERROR; + + if (hasScript) { + UnicodeString script_str = scriptDisplayName(script, temp, TRUE); + if (script_str.isBogus()) { + result.setToBogus(); + return result; + } + resultRemainder.append(script_str); + } + if (hasCountry) { + UnicodeString region_str = regionDisplayName(country, temp, TRUE); + if (region_str.isBogus()) { + result.setToBogus(); + return result; + } + appendWithSep(resultRemainder, region_str); + } + if (hasVariant) { + UnicodeString variant_str = variantDisplayName(variant, temp, TRUE); + if (variant_str.isBogus()) { + result.setToBogus(); + return result; + } + appendWithSep(resultRemainder, variant_str); + } + resultRemainder.findAndReplace(formatOpenParen, formatReplaceOpenParen); + resultRemainder.findAndReplace(formatCloseParen, formatReplaceCloseParen); + + LocalPointer<StringEnumeration> e(loc.createKeywords(status)); + if (e.isValid() && U_SUCCESS(status)) { + UnicodeString temp2; + char value[ULOC_KEYWORD_AND_VALUES_CAPACITY]; // sigh, no ULOC_VALUE_CAPACITY + const char* key; + while ((key = e->next((int32_t *)0, status)) != NULL) { + value[0] = 0; + loc.getKeywordValue(key, value, ULOC_KEYWORD_AND_VALUES_CAPACITY, status); + if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING) { + return result; + } + keyDisplayName(key, temp, TRUE); + temp.findAndReplace(formatOpenParen, formatReplaceOpenParen); + temp.findAndReplace(formatCloseParen, formatReplaceCloseParen); + keyValueDisplayName(key, value, temp2, TRUE); + temp2.findAndReplace(formatOpenParen, formatReplaceOpenParen); + temp2.findAndReplace(formatCloseParen, formatReplaceCloseParen); + if (temp2 != UnicodeString(value, -1, US_INV)) { + appendWithSep(resultRemainder, temp2); + } else if (temp != UnicodeString(key, -1, US_INV)) { + UnicodeString temp3; + keyTypeFormat.format(temp, temp2, temp3, status); + appendWithSep(resultRemainder, temp3); + } else { + appendWithSep(resultRemainder, temp) + .append((UChar)0x3d /* = */) + .append(temp2); + } + } + } + + if (!resultRemainder.isEmpty()) { + format.format(resultName, resultRemainder, result.remove(), status); + return adjustForUsageAndContext(kCapContextUsageLanguage, result); + } + + result = resultName; + return adjustForUsageAndContext(kCapContextUsageLanguage, result); +} + +UnicodeString& +LocaleDisplayNamesImpl::appendWithSep(UnicodeString& buffer, const UnicodeString& src) const { + if (buffer.isEmpty()) { + buffer.setTo(src); + } else { + const UnicodeString *values[2] = { &buffer, &src }; + UErrorCode status = U_ZERO_ERROR; + separatorFormat.formatAndReplace(values, 2, buffer, NULL, 0, status); + } + return buffer; +} + +UnicodeString& +LocaleDisplayNamesImpl::localeDisplayName(const char* localeId, + UnicodeString& result) const { + return localeDisplayName(Locale(localeId), result); +} + +// private +UnicodeString& +LocaleDisplayNamesImpl::localeIdName(const char* localeId, + UnicodeString& result, bool substitute) const { + if (nameLength == UDISPCTX_LENGTH_SHORT) { + langData.getNoFallback("Languages%short", localeId, result); + if (!result.isBogus()) { + return result; + } + } + if (substitute) { + return langData.get("Languages", localeId, result); + } else { + return langData.getNoFallback("Languages", localeId, result); + } +} + +UnicodeString& +LocaleDisplayNamesImpl::languageDisplayName(const char* lang, + UnicodeString& result) const { + if (uprv_strcmp("root", lang) == 0 || uprv_strchr(lang, '_') != NULL) { + return result = UnicodeString(lang, -1, US_INV); + } + if (nameLength == UDISPCTX_LENGTH_SHORT) { + langData.getNoFallback("Languages%short", lang, result); + if (!result.isBogus()) { + return adjustForUsageAndContext(kCapContextUsageLanguage, result); + } + } + if (substitute == UDISPCTX_SUBSTITUTE) { + langData.get("Languages", lang, result); + } else { + langData.getNoFallback("Languages", lang, result); + } + return adjustForUsageAndContext(kCapContextUsageLanguage, result); +} + +UnicodeString& +LocaleDisplayNamesImpl::scriptDisplayName(const char* script, + UnicodeString& result, + UBool skipAdjust) const { + if (nameLength == UDISPCTX_LENGTH_SHORT) { + langData.getNoFallback("Scripts%short", script, result); + if (!result.isBogus()) { + return skipAdjust? result: adjustForUsageAndContext(kCapContextUsageScript, result); + } + } + if (substitute == UDISPCTX_SUBSTITUTE) { + langData.get("Scripts", script, result); + } else { + langData.getNoFallback("Scripts", script, result); + } + return skipAdjust? result: adjustForUsageAndContext(kCapContextUsageScript, result); +} + +UnicodeString& +LocaleDisplayNamesImpl::scriptDisplayName(const char* script, + UnicodeString& result) const { + return scriptDisplayName(script, result, FALSE); +} + +UnicodeString& +LocaleDisplayNamesImpl::scriptDisplayName(UScriptCode scriptCode, + UnicodeString& result) const { + return scriptDisplayName(uscript_getName(scriptCode), result, FALSE); +} + +UnicodeString& +LocaleDisplayNamesImpl::regionDisplayName(const char* region, + UnicodeString& result, + UBool skipAdjust) const { + if (nameLength == UDISPCTX_LENGTH_SHORT) { + regionData.getNoFallback("Countries%short", region, result); + if (!result.isBogus()) { + return skipAdjust? result: adjustForUsageAndContext(kCapContextUsageTerritory, result); + } + } + if (substitute == UDISPCTX_SUBSTITUTE) { + regionData.get("Countries", region, result); + } else { + regionData.getNoFallback("Countries", region, result); + } + return skipAdjust? result: adjustForUsageAndContext(kCapContextUsageTerritory, result); +} + +UnicodeString& +LocaleDisplayNamesImpl::regionDisplayName(const char* region, + UnicodeString& result) const { + return regionDisplayName(region, result, FALSE); +} + + +UnicodeString& +LocaleDisplayNamesImpl::variantDisplayName(const char* variant, + UnicodeString& result, + UBool skipAdjust) const { + // don't have a resource for short variant names + if (substitute == UDISPCTX_SUBSTITUTE) { + langData.get("Variants", variant, result); + } else { + langData.getNoFallback("Variants", variant, result); + } + return skipAdjust? result: adjustForUsageAndContext(kCapContextUsageVariant, result); +} + +UnicodeString& +LocaleDisplayNamesImpl::variantDisplayName(const char* variant, + UnicodeString& result) const { + return variantDisplayName(variant, result, FALSE); +} + +UnicodeString& +LocaleDisplayNamesImpl::keyDisplayName(const char* key, + UnicodeString& result, + UBool skipAdjust) const { + // don't have a resource for short key names + if (substitute == UDISPCTX_SUBSTITUTE) { + langData.get("Keys", key, result); + } else { + langData.getNoFallback("Keys", key, result); + } + return skipAdjust? result: adjustForUsageAndContext(kCapContextUsageKey, result); +} + +UnicodeString& +LocaleDisplayNamesImpl::keyDisplayName(const char* key, + UnicodeString& result) const { + return keyDisplayName(key, result, FALSE); +} + +UnicodeString& +LocaleDisplayNamesImpl::keyValueDisplayName(const char* key, + const char* value, + UnicodeString& result, + UBool skipAdjust) const { + if (uprv_strcmp(key, "currency") == 0) { + // ICU4C does not have ICU4J CurrencyDisplayInfo equivalent for now. + UErrorCode sts = U_ZERO_ERROR; + UnicodeString ustrValue(value, -1, US_INV); + int32_t len; + const UChar *currencyName = ucurr_getName(ustrValue.getTerminatedBuffer(), + locale.getBaseName(), UCURR_LONG_NAME, nullptr /* isChoiceFormat */, &len, &sts); + if (U_FAILURE(sts)) { + // Return the value as is on failure + result = ustrValue; + return result; + } + result.setTo(currencyName, len); + return skipAdjust? result: adjustForUsageAndContext(kCapContextUsageKeyValue, result); + } + + if (nameLength == UDISPCTX_LENGTH_SHORT) { + langData.getNoFallback("Types%short", key, value, result); + if (!result.isBogus()) { + return skipAdjust? result: adjustForUsageAndContext(kCapContextUsageKeyValue, result); + } + } + if (substitute == UDISPCTX_SUBSTITUTE) { + langData.get("Types", key, value, result); + } else { + langData.getNoFallback("Types", key, value, result); + } + return skipAdjust? result: adjustForUsageAndContext(kCapContextUsageKeyValue, result); +} + +UnicodeString& +LocaleDisplayNamesImpl::keyValueDisplayName(const char* key, + const char* value, + UnicodeString& result) const { + return keyValueDisplayName(key, value, result, FALSE); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +LocaleDisplayNames* +LocaleDisplayNames::createInstance(const Locale& locale, + UDialectHandling dialectHandling) { + return new LocaleDisplayNamesImpl(locale, dialectHandling); +} + +LocaleDisplayNames* +LocaleDisplayNames::createInstance(const Locale& locale, + UDisplayContext *contexts, int32_t length) { + if (contexts == NULL) { + length = 0; + } + return new LocaleDisplayNamesImpl(locale, contexts, length); +} + +U_NAMESPACE_END + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +U_NAMESPACE_USE + +U_CAPI ULocaleDisplayNames * U_EXPORT2 +uldn_open(const char * locale, + UDialectHandling dialectHandling, + UErrorCode *pErrorCode) { + if (U_FAILURE(*pErrorCode)) { + return 0; + } + if (locale == NULL) { + locale = uloc_getDefault(); + } + return (ULocaleDisplayNames *)LocaleDisplayNames::createInstance(Locale(locale), dialectHandling); +} + +U_CAPI ULocaleDisplayNames * U_EXPORT2 +uldn_openForContext(const char * locale, + UDisplayContext *contexts, int32_t length, + UErrorCode *pErrorCode) { + if (U_FAILURE(*pErrorCode)) { + return 0; + } + if (locale == NULL) { + locale = uloc_getDefault(); + } + return (ULocaleDisplayNames *)LocaleDisplayNames::createInstance(Locale(locale), contexts, length); +} + + +U_CAPI void U_EXPORT2 +uldn_close(ULocaleDisplayNames *ldn) { + delete (LocaleDisplayNames *)ldn; +} + +U_CAPI const char * U_EXPORT2 +uldn_getLocale(const ULocaleDisplayNames *ldn) { + if (ldn) { + return ((const LocaleDisplayNames *)ldn)->getLocale().getName(); + } + return NULL; +} + +U_CAPI UDialectHandling U_EXPORT2 +uldn_getDialectHandling(const ULocaleDisplayNames *ldn) { + if (ldn) { + return ((const LocaleDisplayNames *)ldn)->getDialectHandling(); + } + return ULDN_STANDARD_NAMES; +} + +U_CAPI UDisplayContext U_EXPORT2 +uldn_getContext(const ULocaleDisplayNames *ldn, + UDisplayContextType type, + UErrorCode *pErrorCode) { + if (U_FAILURE(*pErrorCode)) { + return (UDisplayContext)0; + } + return ((const LocaleDisplayNames *)ldn)->getContext(type); +} + +U_CAPI int32_t U_EXPORT2 +uldn_localeDisplayName(const ULocaleDisplayNames *ldn, + const char *locale, + UChar *result, + int32_t maxResultSize, + UErrorCode *pErrorCode) { + if (U_FAILURE(*pErrorCode)) { + return 0; + } + if (ldn == NULL || locale == NULL || (result == NULL && maxResultSize > 0) || maxResultSize < 0) { + *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + UnicodeString temp(result, 0, maxResultSize); + ((const LocaleDisplayNames *)ldn)->localeDisplayName(locale, temp); + if (temp.isBogus()) { + *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + return temp.extract(result, maxResultSize, *pErrorCode); +} + +U_CAPI int32_t U_EXPORT2 +uldn_languageDisplayName(const ULocaleDisplayNames *ldn, + const char *lang, + UChar *result, + int32_t maxResultSize, + UErrorCode *pErrorCode) { + if (U_FAILURE(*pErrorCode)) { + return 0; + } + if (ldn == NULL || lang == NULL || (result == NULL && maxResultSize > 0) || maxResultSize < 0) { + *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + UnicodeString temp(result, 0, maxResultSize); + ((const LocaleDisplayNames *)ldn)->languageDisplayName(lang, temp); + return temp.extract(result, maxResultSize, *pErrorCode); +} + +U_CAPI int32_t U_EXPORT2 +uldn_scriptDisplayName(const ULocaleDisplayNames *ldn, + const char *script, + UChar *result, + int32_t maxResultSize, + UErrorCode *pErrorCode) { + if (U_FAILURE(*pErrorCode)) { + return 0; + } + if (ldn == NULL || script == NULL || (result == NULL && maxResultSize > 0) || maxResultSize < 0) { + *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + UnicodeString temp(result, 0, maxResultSize); + ((const LocaleDisplayNames *)ldn)->scriptDisplayName(script, temp); + return temp.extract(result, maxResultSize, *pErrorCode); +} + +U_CAPI int32_t U_EXPORT2 +uldn_scriptCodeDisplayName(const ULocaleDisplayNames *ldn, + UScriptCode scriptCode, + UChar *result, + int32_t maxResultSize, + UErrorCode *pErrorCode) { + return uldn_scriptDisplayName(ldn, uscript_getName(scriptCode), result, maxResultSize, pErrorCode); +} + +U_CAPI int32_t U_EXPORT2 +uldn_regionDisplayName(const ULocaleDisplayNames *ldn, + const char *region, + UChar *result, + int32_t maxResultSize, + UErrorCode *pErrorCode) { + if (U_FAILURE(*pErrorCode)) { + return 0; + } + if (ldn == NULL || region == NULL || (result == NULL && maxResultSize > 0) || maxResultSize < 0) { + *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + UnicodeString temp(result, 0, maxResultSize); + ((const LocaleDisplayNames *)ldn)->regionDisplayName(region, temp); + return temp.extract(result, maxResultSize, *pErrorCode); +} + +U_CAPI int32_t U_EXPORT2 +uldn_variantDisplayName(const ULocaleDisplayNames *ldn, + const char *variant, + UChar *result, + int32_t maxResultSize, + UErrorCode *pErrorCode) { + if (U_FAILURE(*pErrorCode)) { + return 0; + } + if (ldn == NULL || variant == NULL || (result == NULL && maxResultSize > 0) || maxResultSize < 0) { + *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + UnicodeString temp(result, 0, maxResultSize); + ((const LocaleDisplayNames *)ldn)->variantDisplayName(variant, temp); + return temp.extract(result, maxResultSize, *pErrorCode); +} + +U_CAPI int32_t U_EXPORT2 +uldn_keyDisplayName(const ULocaleDisplayNames *ldn, + const char *key, + UChar *result, + int32_t maxResultSize, + UErrorCode *pErrorCode) { + if (U_FAILURE(*pErrorCode)) { + return 0; + } + if (ldn == NULL || key == NULL || (result == NULL && maxResultSize > 0) || maxResultSize < 0) { + *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + UnicodeString temp(result, 0, maxResultSize); + ((const LocaleDisplayNames *)ldn)->keyDisplayName(key, temp); + return temp.extract(result, maxResultSize, *pErrorCode); +} + +U_CAPI int32_t U_EXPORT2 +uldn_keyValueDisplayName(const ULocaleDisplayNames *ldn, + const char *key, + const char *value, + UChar *result, + int32_t maxResultSize, + UErrorCode *pErrorCode) { + if (U_FAILURE(*pErrorCode)) { + return 0; + } + if (ldn == NULL || key == NULL || value == NULL || (result == NULL && maxResultSize > 0) + || maxResultSize < 0) { + *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + UnicodeString temp(result, 0, maxResultSize); + ((const LocaleDisplayNames *)ldn)->keyValueDisplayName(key, value, temp); + return temp.extract(result, maxResultSize, *pErrorCode); +} + +#endif diff --git a/thirdparty/icu4c/common/locid.cpp b/thirdparty/icu4c/common/locid.cpp new file mode 100644 index 0000000000..2804e36bf6 --- /dev/null +++ b/thirdparty/icu4c/common/locid.cpp @@ -0,0 +1,2536 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* + ********************************************************************** + * Copyright (C) 1997-2016, International Business Machines + * Corporation and others. All Rights Reserved. + ********************************************************************** +* +* File locid.cpp +* +* Created by: Richard Gillam +* +* Modification History: +* +* Date Name Description +* 02/11/97 aliu Changed gLocPath to fgDataDirectory and added +* methods to get and set it. +* 04/02/97 aliu Made operator!= inline; fixed return value +* of getName(). +* 04/15/97 aliu Cleanup for AIX/Win32. +* 04/24/97 aliu Numerous changes per code review. +* 08/18/98 stephen Changed getDisplayName() +* Added SIMPLIFIED_CHINESE, TRADITIONAL_CHINESE +* Added getISOCountries(), getISOLanguages(), +* getLanguagesForCountry() +* 03/16/99 bertrand rehaul. +* 07/21/99 stephen Added U_CFUNC setDefault +* 11/09/99 weiv Added const char * getName() const; +* 04/12/00 srl removing unicodestring api's and cached hash code +* 08/10/01 grhoten Change the static Locales to accessor functions +****************************************************************************** +*/ + +#include <utility> + +#include "unicode/bytestream.h" +#include "unicode/locid.h" +#include "unicode/strenum.h" +#include "unicode/stringpiece.h" +#include "unicode/uloc.h" +#include "unicode/ures.h" + +#include "bytesinkutil.h" +#include "charstr.h" +#include "charstrmap.h" +#include "cmemory.h" +#include "cstring.h" +#include "mutex.h" +#include "putilimp.h" +#include "uassert.h" +#include "ucln_cmn.h" +#include "uhash.h" +#include "ulocimp.h" +#include "umutex.h" +#include "uniquecharstr.h" +#include "ustr_imp.h" +#include "uvector.h" + +U_CDECL_BEGIN +static UBool U_CALLCONV locale_cleanup(void); +U_CDECL_END + +U_NAMESPACE_BEGIN + +static Locale *gLocaleCache = NULL; +static UInitOnce gLocaleCacheInitOnce = U_INITONCE_INITIALIZER; + +// gDefaultLocaleMutex protects all access to gDefaultLocalesHashT and gDefaultLocale. +static UMutex gDefaultLocaleMutex; +static UHashtable *gDefaultLocalesHashT = NULL; +static Locale *gDefaultLocale = NULL; + +/** + * \def ULOC_STRING_LIMIT + * strings beyond this value crash in CharString + */ +#define ULOC_STRING_LIMIT 357913941 + +U_NAMESPACE_END + +typedef enum ELocalePos { + eENGLISH, + eFRENCH, + eGERMAN, + eITALIAN, + eJAPANESE, + eKOREAN, + eCHINESE, + + eFRANCE, + eGERMANY, + eITALY, + eJAPAN, + eKOREA, + eCHINA, /* Alias for PRC */ + eTAIWAN, + eUK, + eUS, + eCANADA, + eCANADA_FRENCH, + eROOT, + + + //eDEFAULT, + eMAX_LOCALES +} ELocalePos; + +U_CDECL_BEGIN +// +// Deleter function for Locales owned by the default Locale hash table/ +// +static void U_CALLCONV +deleteLocale(void *obj) { + delete (icu::Locale *) obj; +} + +static UBool U_CALLCONV locale_cleanup(void) +{ + U_NAMESPACE_USE + + delete [] gLocaleCache; + gLocaleCache = NULL; + gLocaleCacheInitOnce.reset(); + + if (gDefaultLocalesHashT) { + uhash_close(gDefaultLocalesHashT); // Automatically deletes all elements, using deleter func. + gDefaultLocalesHashT = NULL; + } + gDefaultLocale = NULL; + return TRUE; +} + + +static void U_CALLCONV locale_init(UErrorCode &status) { + U_NAMESPACE_USE + + U_ASSERT(gLocaleCache == NULL); + gLocaleCache = new Locale[(int)eMAX_LOCALES]; + if (gLocaleCache == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + ucln_common_registerCleanup(UCLN_COMMON_LOCALE, locale_cleanup); + gLocaleCache[eROOT] = Locale(""); + gLocaleCache[eENGLISH] = Locale("en"); + gLocaleCache[eFRENCH] = Locale("fr"); + gLocaleCache[eGERMAN] = Locale("de"); + gLocaleCache[eITALIAN] = Locale("it"); + gLocaleCache[eJAPANESE] = Locale("ja"); + gLocaleCache[eKOREAN] = Locale("ko"); + gLocaleCache[eCHINESE] = Locale("zh"); + gLocaleCache[eFRANCE] = Locale("fr", "FR"); + gLocaleCache[eGERMANY] = Locale("de", "DE"); + gLocaleCache[eITALY] = Locale("it", "IT"); + gLocaleCache[eJAPAN] = Locale("ja", "JP"); + gLocaleCache[eKOREA] = Locale("ko", "KR"); + gLocaleCache[eCHINA] = Locale("zh", "CN"); + gLocaleCache[eTAIWAN] = Locale("zh", "TW"); + gLocaleCache[eUK] = Locale("en", "GB"); + gLocaleCache[eUS] = Locale("en", "US"); + gLocaleCache[eCANADA] = Locale("en", "CA"); + gLocaleCache[eCANADA_FRENCH] = Locale("fr", "CA"); +} + +U_CDECL_END + +U_NAMESPACE_BEGIN + +Locale *locale_set_default_internal(const char *id, UErrorCode& status) { + // Synchronize this entire function. + Mutex lock(&gDefaultLocaleMutex); + + UBool canonicalize = FALSE; + + // If given a NULL string for the locale id, grab the default + // name from the system. + // (Different from most other locale APIs, where a null name means use + // the current ICU default locale.) + if (id == NULL) { + id = uprv_getDefaultLocaleID(); // This function not thread safe? TODO: verify. + canonicalize = TRUE; // always canonicalize host ID + } + + CharString localeNameBuf; + { + CharStringByteSink sink(&localeNameBuf); + if (canonicalize) { + ulocimp_canonicalize(id, sink, &status); + } else { + ulocimp_getName(id, sink, &status); + } + } + + if (U_FAILURE(status)) { + return gDefaultLocale; + } + + if (gDefaultLocalesHashT == NULL) { + gDefaultLocalesHashT = uhash_open(uhash_hashChars, uhash_compareChars, NULL, &status); + if (U_FAILURE(status)) { + return gDefaultLocale; + } + uhash_setValueDeleter(gDefaultLocalesHashT, deleteLocale); + ucln_common_registerCleanup(UCLN_COMMON_LOCALE, locale_cleanup); + } + + Locale *newDefault = (Locale *)uhash_get(gDefaultLocalesHashT, localeNameBuf.data()); + if (newDefault == NULL) { + newDefault = new Locale(Locale::eBOGUS); + if (newDefault == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + return gDefaultLocale; + } + newDefault->init(localeNameBuf.data(), FALSE); + uhash_put(gDefaultLocalesHashT, (char*) newDefault->getName(), newDefault, &status); + if (U_FAILURE(status)) { + return gDefaultLocale; + } + } + gDefaultLocale = newDefault; + return gDefaultLocale; +} + +U_NAMESPACE_END + +/* sfb 07/21/99 */ +U_CFUNC void +locale_set_default(const char *id) +{ + U_NAMESPACE_USE + UErrorCode status = U_ZERO_ERROR; + locale_set_default_internal(id, status); +} +/* end */ + +U_CFUNC const char * +locale_get_default(void) +{ + U_NAMESPACE_USE + return Locale::getDefault().getName(); +} + + +U_NAMESPACE_BEGIN + +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(Locale) + +/*Character separating the posix id fields*/ +// '_' +// In the platform codepage. +#define SEP_CHAR '_' +#define NULL_CHAR '\0' + +Locale::~Locale() +{ + if (baseName != fullName) { + uprv_free(baseName); + } + baseName = NULL; + /*if fullName is on the heap, we free it*/ + if (fullName != fullNameBuffer) + { + uprv_free(fullName); + fullName = NULL; + } +} + +Locale::Locale() + : UObject(), fullName(fullNameBuffer), baseName(NULL) +{ + init(NULL, FALSE); +} + +/* + * Internal constructor to allow construction of a locale object with + * NO side effects. (Default constructor tries to get + * the default locale.) + */ +Locale::Locale(Locale::ELocaleType) + : UObject(), fullName(fullNameBuffer), baseName(NULL) +{ + setToBogus(); +} + + +Locale::Locale( const char * newLanguage, + const char * newCountry, + const char * newVariant, + const char * newKeywords) + : UObject(), fullName(fullNameBuffer), baseName(NULL) +{ + if( (newLanguage==NULL) && (newCountry == NULL) && (newVariant == NULL) ) + { + init(NULL, FALSE); /* shortcut */ + } + else + { + UErrorCode status = U_ZERO_ERROR; + int32_t size = 0; + int32_t lsize = 0; + int32_t csize = 0; + int32_t vsize = 0; + int32_t ksize = 0; + + // Calculate the size of the resulting string. + + // Language + if ( newLanguage != NULL ) + { + lsize = (int32_t)uprv_strlen(newLanguage); + if ( lsize < 0 || lsize > ULOC_STRING_LIMIT ) { // int32 wrap + setToBogus(); + return; + } + size = lsize; + } + + CharString togo(newLanguage, lsize, status); // start with newLanguage + + // _Country + if ( newCountry != NULL ) + { + csize = (int32_t)uprv_strlen(newCountry); + if ( csize < 0 || csize > ULOC_STRING_LIMIT ) { // int32 wrap + setToBogus(); + return; + } + size += csize; + } + + // _Variant + if ( newVariant != NULL ) + { + // remove leading _'s + while(newVariant[0] == SEP_CHAR) + { + newVariant++; + } + + // remove trailing _'s + vsize = (int32_t)uprv_strlen(newVariant); + if ( vsize < 0 || vsize > ULOC_STRING_LIMIT ) { // int32 wrap + setToBogus(); + return; + } + while( (vsize>1) && (newVariant[vsize-1] == SEP_CHAR) ) + { + vsize--; + } + } + + if( vsize > 0 ) + { + size += vsize; + } + + // Separator rules: + if ( vsize > 0 ) + { + size += 2; // at least: __v + } + else if ( csize > 0 ) + { + size += 1; // at least: _v + } + + if ( newKeywords != NULL) + { + ksize = (int32_t)uprv_strlen(newKeywords); + if ( ksize < 0 || ksize > ULOC_STRING_LIMIT ) { + setToBogus(); + return; + } + size += ksize + 1; + } + + // NOW we have the full locale string.. + // Now, copy it back. + + // newLanguage is already copied + + if ( ( vsize != 0 ) || (csize != 0) ) // at least: __v + { // ^ + togo.append(SEP_CHAR, status); + } + + if ( csize != 0 ) + { + togo.append(newCountry, status); + } + + if ( vsize != 0) + { + togo.append(SEP_CHAR, status) + .append(newVariant, vsize, status); + } + + if ( ksize != 0) + { + if (uprv_strchr(newKeywords, '=')) { + togo.append('@', status); /* keyword parsing */ + } + else { + togo.append('_', status); /* Variant parsing with a script */ + if ( vsize == 0) { + togo.append('_', status); /* No country found */ + } + } + togo.append(newKeywords, status); + } + + if (U_FAILURE(status)) { + // Something went wrong with appending, etc. + setToBogus(); + return; + } + // Parse it, because for example 'language' might really be a complete + // string. + init(togo.data(), FALSE); + } +} + +Locale::Locale(const Locale &other) + : UObject(other), fullName(fullNameBuffer), baseName(NULL) +{ + *this = other; +} + +Locale::Locale(Locale&& other) U_NOEXCEPT + : UObject(other), fullName(fullNameBuffer), baseName(fullName) { + *this = std::move(other); +} + +Locale& Locale::operator=(const Locale& other) { + if (this == &other) { + return *this; + } + + setToBogus(); + + if (other.fullName == other.fullNameBuffer) { + uprv_strcpy(fullNameBuffer, other.fullNameBuffer); + } else if (other.fullName == nullptr) { + fullName = nullptr; + } else { + fullName = uprv_strdup(other.fullName); + if (fullName == nullptr) return *this; + } + + if (other.baseName == other.fullName) { + baseName = fullName; + } else if (other.baseName != nullptr) { + baseName = uprv_strdup(other.baseName); + if (baseName == nullptr) return *this; + } + + uprv_strcpy(language, other.language); + uprv_strcpy(script, other.script); + uprv_strcpy(country, other.country); + + variantBegin = other.variantBegin; + fIsBogus = other.fIsBogus; + + return *this; +} + +Locale& Locale::operator=(Locale&& other) U_NOEXCEPT { + if (baseName != fullName) uprv_free(baseName); + if (fullName != fullNameBuffer) uprv_free(fullName); + + if (other.fullName == other.fullNameBuffer) { + uprv_strcpy(fullNameBuffer, other.fullNameBuffer); + fullName = fullNameBuffer; + } else { + fullName = other.fullName; + } + + if (other.baseName == other.fullName) { + baseName = fullName; + } else { + baseName = other.baseName; + } + + uprv_strcpy(language, other.language); + uprv_strcpy(script, other.script); + uprv_strcpy(country, other.country); + + variantBegin = other.variantBegin; + fIsBogus = other.fIsBogus; + + other.baseName = other.fullName = other.fullNameBuffer; + + return *this; +} + +Locale * +Locale::clone() const { + return new Locale(*this); +} + +UBool +Locale::operator==( const Locale& other) const +{ + return (uprv_strcmp(other.fullName, fullName) == 0); +} + +namespace { + +UInitOnce gKnownCanonicalizedInitOnce = U_INITONCE_INITIALIZER; +UHashtable *gKnownCanonicalized = nullptr; + +static const char* const KNOWN_CANONICALIZED[] = { + "c", + // Commonly used locales known are already canonicalized + "af", "af_ZA", "am", "am_ET", "ar", "ar_001", "as", "as_IN", "az", "az_AZ", + "be", "be_BY", "bg", "bg_BG", "bn", "bn_IN", "bs", "bs_BA", "ca", "ca_ES", + "cs", "cs_CZ", "cy", "cy_GB", "da", "da_DK", "de", "de_DE", "el", "el_GR", + "en", "en_GB", "en_US", "es", "es_419", "es_ES", "et", "et_EE", "eu", + "eu_ES", "fa", "fa_IR", "fi", "fi_FI", "fil", "fil_PH", "fr", "fr_FR", + "ga", "ga_IE", "gl", "gl_ES", "gu", "gu_IN", "he", "he_IL", "hi", "hi_IN", + "hr", "hr_HR", "hu", "hu_HU", "hy", "hy_AM", "id", "id_ID", "is", "is_IS", + "it", "it_IT", "ja", "ja_JP", "jv", "jv_ID", "ka", "ka_GE", "kk", "kk_KZ", + "km", "km_KH", "kn", "kn_IN", "ko", "ko_KR", "ky", "ky_KG", "lo", "lo_LA", + "lt", "lt_LT", "lv", "lv_LV", "mk", "mk_MK", "ml", "ml_IN", "mn", "mn_MN", + "mr", "mr_IN", "ms", "ms_MY", "my", "my_MM", "nb", "nb_NO", "ne", "ne_NP", + "nl", "nl_NL", "or", "or_IN", "pa", "pa_IN", "pl", "pl_PL", "ps", "ps_AF", + "pt", "pt_BR", "pt_PT", "ro", "ro_RO", "ru", "ru_RU", "sd", "sd_IN", "si", + "si_LK", "sk", "sk_SK", "sl", "sl_SI", "so", "so_SO", "sq", "sq_AL", "sr", + "sr_Cyrl_RS", "sr_Latn", "sr_RS", "sv", "sv_SE", "sw", "sw_TZ", "ta", + "ta_IN", "te", "te_IN", "th", "th_TH", "tk", "tk_TM", "tr", "tr_TR", "uk", + "uk_UA", "ur", "ur_PK", "uz", "uz_UZ", "vi", "vi_VN", "yue", "yue_Hant", + "yue_Hant_HK", "yue_HK", "zh", "zh_CN", "zh_Hans", "zh_Hans_CN", "zh_Hant", + "zh_Hant_TW", "zh_TW", "zu", "zu_ZA" +}; + +static UBool U_CALLCONV cleanupKnownCanonicalized() { + gKnownCanonicalizedInitOnce.reset(); + if (gKnownCanonicalized) { uhash_close(gKnownCanonicalized); } + return TRUE; +} + +static void U_CALLCONV loadKnownCanonicalized(UErrorCode &status) { + ucln_common_registerCleanup(UCLN_COMMON_LOCALE_KNOWN_CANONICALIZED, + cleanupKnownCanonicalized); + LocalUHashtablePointer newKnownCanonicalizedMap( + uhash_open(uhash_hashChars, uhash_compareChars, nullptr, &status)); + for (int32_t i = 0; + U_SUCCESS(status) && i < UPRV_LENGTHOF(KNOWN_CANONICALIZED); + i++) { + uhash_puti(newKnownCanonicalizedMap.getAlias(), + (void*)KNOWN_CANONICALIZED[i], + 1, &status); + } + if (U_FAILURE(status)) { + return; + } + + gKnownCanonicalized = newKnownCanonicalizedMap.orphan(); +} + +class AliasData; + +/** + * A Builder class to build the alias data. + */ +class AliasDataBuilder { +public: + AliasDataBuilder() { + } + + // Build the AliasData from resource. + AliasData* build(UErrorCode &status); + +private: + void readAlias(UResourceBundle* alias, + UniqueCharStrings* strings, + LocalMemory<const char*>& types, + LocalMemory<int32_t>& replacementIndexes, + int32_t &length, + void (*checkType)(const char* type), + void (*checkReplacement)(const UnicodeString& replacement), + UErrorCode &status); + + // Read the languageAlias data from alias to + // strings+types+replacementIndexes + // The number of record will be stored into length. + // Allocate length items for types, to store the type field. + // Allocate length items for replacementIndexes, + // to store the index in the strings for the replacement script. + void readLanguageAlias(UResourceBundle* alias, + UniqueCharStrings* strings, + LocalMemory<const char*>& types, + LocalMemory<int32_t>& replacementIndexes, + int32_t &length, + UErrorCode &status); + + // Read the scriptAlias data from alias to + // strings+types+replacementIndexes + // Allocate length items for types, to store the type field. + // Allocate length items for replacementIndexes, + // to store the index in the strings for the replacement script. + void readScriptAlias(UResourceBundle* alias, + UniqueCharStrings* strings, + LocalMemory<const char*>& types, + LocalMemory<int32_t>& replacementIndexes, + int32_t &length, UErrorCode &status); + + // Read the territoryAlias data from alias to + // strings+types+replacementIndexes + // Allocate length items for types, to store the type field. + // Allocate length items for replacementIndexes, + // to store the index in the strings for the replacement script. + void readTerritoryAlias(UResourceBundle* alias, + UniqueCharStrings* strings, + LocalMemory<const char*>& types, + LocalMemory<int32_t>& replacementIndexes, + int32_t &length, UErrorCode &status); + + // Read the variantAlias data from alias to + // strings+types+replacementIndexes + // Allocate length items for types, to store the type field. + // Allocate length items for replacementIndexes, + // to store the index in the strings for the replacement variant. + void readVariantAlias(UResourceBundle* alias, + UniqueCharStrings* strings, + LocalMemory<const char*>& types, + LocalMemory<int32_t>& replacementIndexes, + int32_t &length, UErrorCode &status); +}; + +/** + * A class to hold the Alias Data. + */ +class AliasData : public UMemory { +public: + static const AliasData* singleton(UErrorCode& status) { + if (U_FAILURE(status)) { + // Do not get into loadData if the status already has error. + return nullptr; + } + umtx_initOnce(AliasData::gInitOnce, &AliasData::loadData, status); + return gSingleton; + } + + const CharStringMap& languageMap() const { return language; } + const CharStringMap& scriptMap() const { return script; } + const CharStringMap& territoryMap() const { return territory; } + const CharStringMap& variantMap() const { return variant; } + + static void U_CALLCONV loadData(UErrorCode &status); + static UBool U_CALLCONV cleanup(); + + static UInitOnce gInitOnce; + +private: + AliasData(CharStringMap languageMap, + CharStringMap scriptMap, + CharStringMap territoryMap, + CharStringMap variantMap, + CharString* strings) + : language(std::move(languageMap)), + script(std::move(scriptMap)), + territory(std::move(territoryMap)), + variant(std::move(variantMap)), + strings(strings) { + } + + ~AliasData() { + delete strings; + } + + static const AliasData* gSingleton; + + CharStringMap language; + CharStringMap script; + CharStringMap territory; + CharStringMap variant; + CharString* strings; + + friend class AliasDataBuilder; +}; + + +const AliasData* AliasData::gSingleton = nullptr; +UInitOnce AliasData::gInitOnce = U_INITONCE_INITIALIZER; + +UBool U_CALLCONV +AliasData::cleanup() +{ + gInitOnce.reset(); + delete gSingleton; + return TRUE; +} + +void +AliasDataBuilder::readAlias( + UResourceBundle* alias, + UniqueCharStrings* strings, + LocalMemory<const char*>& types, + LocalMemory<int32_t>& replacementIndexes, + int32_t &length, + void (*checkType)(const char* type), + void (*checkReplacement)(const UnicodeString& replacement), + UErrorCode &status) { + if (U_FAILURE(status)) { + return; + } + length = ures_getSize(alias); + const char** rawTypes = types.allocateInsteadAndCopy(length); + if (rawTypes == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + int32_t* rawIndexes = replacementIndexes.allocateInsteadAndCopy(length); + if (rawIndexes == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + int i = 0; + while (ures_hasNext(alias)) { + LocalUResourceBundlePointer res( + ures_getNextResource(alias, nullptr, &status)); + const char* aliasFrom = ures_getKey(res.getAlias()); + UnicodeString aliasTo = + ures_getUnicodeStringByKey(res.getAlias(), "replacement", &status); + + checkType(aliasFrom); + checkReplacement(aliasTo); + + rawTypes[i] = aliasFrom; + rawIndexes[i] = strings->add(aliasTo, status); + i++; + } +} + +/** + * Read the languageAlias data from alias to strings+types+replacementIndexes. + * Allocate length items for types, to store the type field. Allocate length + * items for replacementIndexes, to store the index in the strings for the + * replacement language. + */ +void +AliasDataBuilder::readLanguageAlias( + UResourceBundle* alias, + UniqueCharStrings* strings, + LocalMemory<const char*>& types, + LocalMemory<int32_t>& replacementIndexes, + int32_t &length, + UErrorCode &status) +{ + return readAlias( + alias, strings, types, replacementIndexes, length, +#if U_DEBUG + [](const char* type) { + // Assert the aliasFrom only contains the following possibilties + // language_REGION_variant + // language_REGION + // language_variant + // language + // und_variant + Locale test(type); + // Assert no script in aliasFrom + U_ASSERT(test.getScript()[0] == '\0'); + // Assert when language is und, no REGION in aliasFrom. + U_ASSERT(test.getLanguage()[0] != '\0' || test.getCountry()[0] == '\0'); + }, +#else + [](const char*) {}, +#endif + [](const UnicodeString&) {}, status); +} + +/** + * Read the scriptAlias data from alias to strings+types+replacementIndexes. + * Allocate length items for types, to store the type field. Allocate length + * items for replacementIndexes, to store the index in the strings for the + * replacement script. + */ +void +AliasDataBuilder::readScriptAlias( + UResourceBundle* alias, + UniqueCharStrings* strings, + LocalMemory<const char*>& types, + LocalMemory<int32_t>& replacementIndexes, + int32_t &length, + UErrorCode &status) +{ + return readAlias( + alias, strings, types, replacementIndexes, length, +#if U_DEBUG + [](const char* type) { + U_ASSERT(uprv_strlen(type) == 4); + }, + [](const UnicodeString& replacement) { + U_ASSERT(replacement.length() == 4); + }, +#else + [](const char*) {}, + [](const UnicodeString&) { }, +#endif + status); +} + +/** + * Read the territoryAlias data from alias to strings+types+replacementIndexes. + * Allocate length items for types, to store the type field. Allocate length + * items for replacementIndexes, to store the index in the strings for the + * replacement regions. + */ +void +AliasDataBuilder::readTerritoryAlias( + UResourceBundle* alias, + UniqueCharStrings* strings, + LocalMemory<const char*>& types, + LocalMemory<int32_t>& replacementIndexes, + int32_t &length, + UErrorCode &status) +{ + return readAlias( + alias, strings, types, replacementIndexes, length, +#if U_DEBUG + [](const char* type) { + U_ASSERT(uprv_strlen(type) == 2 || uprv_strlen(type) == 3); + }, +#else + [](const char*) {}, +#endif + [](const UnicodeString&) { }, + status); +} + +/** + * Read the variantAlias data from alias to strings+types+replacementIndexes. + * Allocate length items for types, to store the type field. Allocate length + * items for replacementIndexes, to store the index in the strings for the + * replacement variant. + */ +void +AliasDataBuilder::readVariantAlias( + UResourceBundle* alias, + UniqueCharStrings* strings, + LocalMemory<const char*>& types, + LocalMemory<int32_t>& replacementIndexes, + int32_t &length, + UErrorCode &status) +{ + return readAlias( + alias, strings, types, replacementIndexes, length, +#if U_DEBUG + [](const char* type) { + U_ASSERT(uprv_strlen(type) >= 4 && uprv_strlen(type) <= 8); + U_ASSERT(uprv_strlen(type) != 4 || + (type[0] >= '0' && type[0] <= '9')); + }, + [](const UnicodeString& replacement) { + U_ASSERT(replacement.length() >= 4 && replacement.length() <= 8); + U_ASSERT(replacement.length() != 4 || + (replacement.charAt(0) >= u'0' && + replacement.charAt(0) <= u'9')); + }, +#else + [](const char*) {}, + [](const UnicodeString&) { }, +#endif + status); +} + +/** + * Initializes the alias data from the ICU resource bundles. The alias data + * contains alias of language, country, script and variants. + * + * If the alias data has already loaded, then this method simply returns without + * doing anything meaningful. + */ +void U_CALLCONV +AliasData::loadData(UErrorCode &status) +{ +#ifdef LOCALE_CANONICALIZATION_DEBUG + UDate start = uprv_getRawUTCtime(); +#endif // LOCALE_CANONICALIZATION_DEBUG + ucln_common_registerCleanup(UCLN_COMMON_LOCALE_ALIAS, cleanup); + AliasDataBuilder builder; + gSingleton = builder.build(status); +#ifdef LOCALE_CANONICALIZATION_DEBUG + UDate end = uprv_getRawUTCtime(); + printf("AliasData::loadData took total %f ms\n", end - start); +#endif // LOCALE_CANONICALIZATION_DEBUG +} + +/** + * Build the alias data from resources. + */ +AliasData* +AliasDataBuilder::build(UErrorCode &status) { + LocalUResourceBundlePointer metadata( + ures_openDirect(nullptr, "metadata", &status)); + LocalUResourceBundlePointer metadataAlias( + ures_getByKey(metadata.getAlias(), "alias", nullptr, &status)); + LocalUResourceBundlePointer languageAlias( + ures_getByKey(metadataAlias.getAlias(), "language", nullptr, &status)); + LocalUResourceBundlePointer scriptAlias( + ures_getByKey(metadataAlias.getAlias(), "script", nullptr, &status)); + LocalUResourceBundlePointer territoryAlias( + ures_getByKey(metadataAlias.getAlias(), "territory", nullptr, &status)); + LocalUResourceBundlePointer variantAlias( + ures_getByKey(metadataAlias.getAlias(), "variant", nullptr, &status)); + + if (U_FAILURE(status)) { + return nullptr; + } + int32_t languagesLength = 0, scriptLength = 0, territoryLength = 0, + variantLength = 0; + + // Read the languageAlias into languageTypes, languageReplacementIndexes + // and strings + UniqueCharStrings strings(status); + LocalMemory<const char*> languageTypes; + LocalMemory<int32_t> languageReplacementIndexes; + readLanguageAlias(languageAlias.getAlias(), + &strings, + languageTypes, + languageReplacementIndexes, + languagesLength, + status); + + // Read the scriptAlias into scriptTypes, scriptReplacementIndexes + // and strings + LocalMemory<const char*> scriptTypes; + LocalMemory<int32_t> scriptReplacementIndexes; + readScriptAlias(scriptAlias.getAlias(), + &strings, + scriptTypes, + scriptReplacementIndexes, + scriptLength, + status); + + // Read the territoryAlias into territoryTypes, territoryReplacementIndexes + // and strings + LocalMemory<const char*> territoryTypes; + LocalMemory<int32_t> territoryReplacementIndexes; + readTerritoryAlias(territoryAlias.getAlias(), + &strings, + territoryTypes, + territoryReplacementIndexes, + territoryLength, status); + + // Read the variantAlias into variantTypes, variantReplacementIndexes + // and strings + LocalMemory<const char*> variantTypes; + LocalMemory<int32_t> variantReplacementIndexes; + readVariantAlias(variantAlias.getAlias(), + &strings, + variantTypes, + variantReplacementIndexes, + variantLength, status); + + if (U_FAILURE(status)) { + return nullptr; + } + + // We can only use strings after freeze it. + strings.freeze(); + + // Build the languageMap from languageTypes & languageReplacementIndexes + CharStringMap languageMap(490, status); + for (int32_t i = 0; U_SUCCESS(status) && i < languagesLength; i++) { + languageMap.put(languageTypes[i], + strings.get(languageReplacementIndexes[i]), + status); + } + + // Build the scriptMap from scriptTypes & scriptReplacementIndexes + CharStringMap scriptMap(1, status); + for (int32_t i = 0; U_SUCCESS(status) && i < scriptLength; i++) { + scriptMap.put(scriptTypes[i], + strings.get(scriptReplacementIndexes[i]), + status); + } + + // Build the territoryMap from territoryTypes & territoryReplacementIndexes + CharStringMap territoryMap(650, status); + for (int32_t i = 0; U_SUCCESS(status) && i < territoryLength; i++) { + territoryMap.put(territoryTypes[i], + strings.get(territoryReplacementIndexes[i]), + status); + } + + // Build the variantMap from variantTypes & variantReplacementIndexes. + CharStringMap variantMap(2, status); + for (int32_t i = 0; U_SUCCESS(status) && i < variantLength; i++) { + variantMap.put(variantTypes[i], + strings.get(variantReplacementIndexes[i]), + status); + } + + if (U_FAILURE(status)) { + return nullptr; + } + + // copy hashtables + auto *data = new AliasData( + std::move(languageMap), + std::move(scriptMap), + std::move(territoryMap), + std::move(variantMap), + strings.orphanCharStrings()); + + if (data == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + } + return data; +} + +/** + * A class that find the replacement values of locale fields by using AliasData. + */ +class AliasReplacer { +public: + AliasReplacer(UErrorCode status) : + language(nullptr), script(nullptr), region(nullptr), + extensions(nullptr), variants(status), + data(nullptr) { + } + ~AliasReplacer() { + } + + // Check the fields inside locale, if need to replace fields, + // place the the replaced locale ID in out and return true. + // Otherwise return false for no replacement or error. + bool replace( + const Locale& locale, CharString& out, UErrorCode status); + +private: + const char* language; + const char* script; + const char* region; + const char* extensions; + UVector variants; + + const AliasData* data; + + inline bool notEmpty(const char* str) { + return str && str[0] != NULL_CHAR; + } + + /** + * If replacement is neither null nor empty and input is either null or empty, + * return replacement. + * If replacement is neither null nor empty but input is not empty, return input. + * If replacement is either null or empty and type is either null or empty, + * return input. + * Otherwise return null. + * replacement input type return + * AAA nullptr * AAA + * AAA BBB * BBB + * nullptr || "" CCC nullptr CCC + * nullptr || "" * DDD nullptr + */ + inline const char* deleteOrReplace( + const char* input, const char* type, const char* replacement) { + return notEmpty(replacement) ? + ((input == nullptr) ? replacement : input) : + ((type == nullptr) ? input : nullptr); + } + + inline bool same(const char* a, const char* b) { + if (a == nullptr && b == nullptr) { + return true; + } + if ((a == nullptr && b != nullptr) || + (a != nullptr && b == nullptr)) { + return false; + } + return uprv_strcmp(a, b) == 0; + } + + // Gather fields and generate locale ID into out. + CharString& outputToString(CharString& out, UErrorCode status); + + // Generate the lookup key. + CharString& generateKey(const char* language, const char* region, + const char* variant, CharString& out, + UErrorCode status); + + void parseLanguageReplacement(const char* replacement, + const char*& replaceLanguage, + const char*& replaceScript, + const char*& replaceRegion, + const char*& replaceVariant, + const char*& replaceExtensions, + UVector& toBeFreed, + UErrorCode& status); + + // Replace by using languageAlias. + bool replaceLanguage(bool checkLanguage, bool checkRegion, + bool checkVariants, UVector& toBeFreed, + UErrorCode& status); + + // Replace by using territoryAlias. + bool replaceTerritory(UVector& toBeFreed, UErrorCode& status); + + // Replace by using scriptAlias. + bool replaceScript(UErrorCode& status); + + // Replace by using variantAlias. + bool replaceVariant(UErrorCode& status); +}; + +CharString& +AliasReplacer::generateKey( + const char* language, const char* region, const char* variant, + CharString& out, UErrorCode status) +{ + out.append(language, status); + if (notEmpty(region)) { + out.append(SEP_CHAR, status) + .append(region, status); + } + if (notEmpty(variant)) { + out.append(SEP_CHAR, status) + .append(variant, status); + } + return out; +} + +void +AliasReplacer::parseLanguageReplacement( + const char* replacement, + const char*& replacedLanguage, + const char*& replacedScript, + const char*& replacedRegion, + const char*& replacedVariant, + const char*& replacedExtensions, + UVector& toBeFreed, + UErrorCode& status) +{ + if (U_FAILURE(status)) { + return; + } + replacedScript = replacedRegion = replacedVariant + = replacedExtensions = nullptr; + if (uprv_strchr(replacement, '_') == nullptr) { + replacedLanguage = replacement; + // reach the end, just return it. + return; + } + // We have multiple field so we have to allocate and parse + CharString* str = new CharString( + replacement, (int32_t)uprv_strlen(replacement), status); + if (U_FAILURE(status)) { + return; + } + if (str == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + toBeFreed.addElement(str, status); + char* data = str->data(); + replacedLanguage = (const char*) data; + char* endOfField = uprv_strchr(data, '_'); + *endOfField = '\0'; // null terminiate it. + endOfField++; + const char* start = endOfField; + endOfField = (char*) uprv_strchr(start, '_'); + size_t len = 0; + if (endOfField == nullptr) { + len = uprv_strlen(start); + } else { + len = endOfField - start; + *endOfField = '\0'; // null terminiate it. + } + if (len == 4 && uprv_isASCIILetter(*start)) { + // Got a script + replacedScript = start; + if (endOfField == nullptr) { + return; + } + start = endOfField++; + endOfField = (char*)uprv_strchr(start, '_'); + if (endOfField == nullptr) { + len = uprv_strlen(start); + } else { + len = endOfField - start; + *endOfField = '\0'; // null terminiate it. + } + } + if (len >= 2 && len <= 3) { + // Got a region + replacedRegion = start; + if (endOfField == nullptr) { + return; + } + start = endOfField++; + endOfField = (char*)uprv_strchr(start, '_'); + if (endOfField == nullptr) { + len = uprv_strlen(start); + } else { + len = endOfField - start; + *endOfField = '\0'; // null terminiate it. + } + } + if (len >= 4) { + // Got a variant + replacedVariant = start; + if (endOfField == nullptr) { + return; + } + start = endOfField++; + } + replacedExtensions = start; +} + +bool +AliasReplacer::replaceLanguage( + bool checkLanguage, bool checkRegion, + bool checkVariants, UVector& toBeFreed, UErrorCode& status) +{ + if (U_FAILURE(status)) { + return false; + } + if ( (checkRegion && region == nullptr) || + (checkVariants && variants.size() == 0)) { + // Nothing to search. + return false; + } + int32_t variant_size = checkVariants ? variants.size() : 1; + // Since we may have more than one variant, we need to loop through them. + const char* searchLanguage = checkLanguage ? language : "und"; + const char* searchRegion = checkRegion ? region : nullptr; + const char* searchVariant = nullptr; + for (int32_t variant_index = 0; + variant_index < variant_size; + variant_index++) { + if (checkVariants) { + U_ASSERT(variant_index < variant_size); + searchVariant = (const char*)(variants.elementAt(variant_index)); + } + + if (searchVariant != nullptr && uprv_strlen(searchVariant) < 4) { + // Do not consider ill-formed variant subtag. + searchVariant = nullptr; + } + CharString typeKey; + generateKey(searchLanguage, searchRegion, searchVariant, typeKey, + status); + if (U_FAILURE(status)) { + return false; + } + const char *replacement = data->languageMap().get(typeKey.data()); + if (replacement == nullptr) { + // Found no replacement data. + continue; + } + + const char* replacedLanguage = nullptr; + const char* replacedScript = nullptr; + const char* replacedRegion = nullptr; + const char* replacedVariant = nullptr; + const char* replacedExtensions = nullptr; + parseLanguageReplacement(replacement, + replacedLanguage, + replacedScript, + replacedRegion, + replacedVariant, + replacedExtensions, + toBeFreed, + status); + replacedLanguage = + (replacedLanguage != nullptr && uprv_strcmp(replacedLanguage, "und") == 0) ? + language : replacedLanguage; + replacedScript = deleteOrReplace(script, nullptr, replacedScript); + replacedRegion = deleteOrReplace(region, searchRegion, replacedRegion); + replacedVariant = deleteOrReplace( + searchVariant, searchVariant, replacedVariant); + + if ( same(language, replacedLanguage) && + same(script, replacedScript) && + same(region, replacedRegion) && + same(searchVariant, replacedVariant) && + replacedExtensions == nullptr) { + // Replacement produce no changes. + continue; + } + + language = replacedLanguage; + region = replacedRegion; + script = replacedScript; + if (searchVariant != nullptr) { + if (notEmpty(replacedVariant)) { + variants.setElementAt((void*)replacedVariant, variant_index); + } else { + variants.removeElementAt(variant_index); + } + } + if (replacedExtensions != nullptr) { + // TODO(ICU-21292) + // DO NOTHING + // UTS35 does not specifiy what should we do if we have extensions in the + // replacement. Currently we know only the following 4 "BCP47 LegacyRules" have + // extensions in them languageAlias: + // i_default => en_x_i_default + // i_enochian => und_x_i_enochian + // i_mingo => see_x_i_mingo + // zh_min => nan_x_zh_min + // But all of them are already changed by code inside ultag_parse() before + // hitting this code. + } + + // Something changed by language alias data. + return true; + } + // Nothing changed by language alias data. + return false; +} + +bool +AliasReplacer::replaceTerritory(UVector& toBeFreed, UErrorCode& status) +{ + if (U_FAILURE(status)) { + return false; + } + if (region == nullptr) { + // No region to search. + return false; + } + const char *replacement = data->territoryMap().get(region); + if (replacement == nullptr) { + // Found no replacement data for this region. + return false; + } + const char* replacedRegion = replacement; + const char* firstSpace = uprv_strchr(replacement, ' '); + if (firstSpace != nullptr) { + // If there are are more than one region in the replacement. + // We need to check which one match based on the language. + // Cannot use nullptr for language because that will construct + // the default locale, in that case, use "und" to get the correct + // locale. + Locale l(language == nullptr ? "und" : language, nullptr, script); + l.addLikelySubtags(status); + const char* likelyRegion = l.getCountry(); + CharString* item = nullptr; + if (likelyRegion != nullptr && uprv_strlen(likelyRegion) > 0) { + size_t len = uprv_strlen(likelyRegion); + const char* foundInReplacement = uprv_strstr(replacement, + likelyRegion); + if (foundInReplacement != nullptr) { + // Assuming the case there are no three letter region code in + // the replacement of territoryAlias + U_ASSERT(foundInReplacement == replacement || + *(foundInReplacement-1) == ' '); + U_ASSERT(foundInReplacement[len] == ' ' || + foundInReplacement[len] == '\0'); + item = new CharString(foundInReplacement, (int32_t)len, status); + } + } + if (item == nullptr) { + item = new CharString(replacement, + (int32_t)(firstSpace - replacement), status); + } + if (U_FAILURE(status)) { return false; } + if (item == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return false; + } + replacedRegion = item->data(); + toBeFreed.addElement(item, status); + } + U_ASSERT(!same(region, replacedRegion)); + region = replacedRegion; + // The region is changed by data in territory alias. + return true; +} + +bool +AliasReplacer::replaceScript(UErrorCode& status) +{ + if (U_FAILURE(status)) { + return false; + } + if (script == nullptr) { + // No script to search. + return false; + } + const char *replacement = data->scriptMap().get(script); + if (replacement == nullptr) { + // Found no replacement data for this script. + return false; + } + U_ASSERT(!same(script, replacement)); + script = replacement; + // The script is changed by data in script alias. + return true; +} + +bool +AliasReplacer::replaceVariant(UErrorCode& status) +{ + if (U_FAILURE(status)) { + return false; + } + // Since we may have more than one variant, we need to loop through them. + for (int32_t i = 0; i < variants.size(); i++) { + const char *variant = (const char*)(variants.elementAt(i)); + const char *replacement = data->variantMap().get(variant); + if (replacement == nullptr) { + // Found no replacement data for this variant. + continue; + } + U_ASSERT((uprv_strlen(replacement) >= 5 && + uprv_strlen(replacement) <= 8) || + (uprv_strlen(replacement) == 4 && + replacement[0] >= '0' && + replacement[0] <= '9')); + if (!same(variant, replacement)) { + variants.setElementAt((void*)replacement, i); + // Special hack to handle hepburn-heploc => alalc97 + if (uprv_strcmp(variant, "heploc") == 0) { + for (int32_t j = 0; j < variants.size(); j++) { + if (uprv_strcmp((const char*)(variants.elementAt(j)), + "hepburn") == 0) { + variants.removeElementAt(j); + } + } + } + return true; + } + } + return false; +} + +CharString& +AliasReplacer::outputToString( + CharString& out, UErrorCode status) +{ + out.append(language, status); + if (notEmpty(script)) { + out.append(SEP_CHAR, status) + .append(script, status); + } + if (notEmpty(region)) { + out.append(SEP_CHAR, status) + .append(region, status); + } + if (variants.size() > 0) { + if (!notEmpty(script) && !notEmpty(region)) { + out.append(SEP_CHAR, status); + } + variants.sort([](UElement e1, UElement e2) -> int8_t { + return uprv_strcmp( + (const char*)e1.pointer, (const char*)e2.pointer); + }, status); + int32_t variantsStart = out.length(); + for (int32_t i = 0; i < variants.size(); i++) { + out.append(SEP_CHAR, status) + .append((const char*)((UVector*)variants.elementAt(i)), + status); + } + T_CString_toUpperCase(out.data() + variantsStart); + } + if (notEmpty(extensions)) { + CharString tmp("und_", status); + tmp.append(extensions, status); + Locale tmpLocale(tmp.data()); + // only support x extension inside CLDR for now. + U_ASSERT(extensions[0] == 'x'); + out.append(tmpLocale.getName() + 1, status); + } + return out; +} + +bool +AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode status) +{ + data = AliasData::singleton(status); + if (U_FAILURE(status)) { + return false; + } + U_ASSERT(data != nullptr); + out.clear(); + language = locale.getLanguage(); + if (!notEmpty(language)) { + language = nullptr; + } + script = locale.getScript(); + if (!notEmpty(script)) { + script = nullptr; + } + region = locale.getCountry(); + if (!notEmpty(region)) { + region = nullptr; + } + const char* variantsStr = locale.getVariant(); + const char* extensionsStr = locale_getKeywordsStart(locale.getName()); + CharString variantsBuff(variantsStr, -1, status); + if (!variantsBuff.isEmpty()) { + if (U_FAILURE(status)) { return false; } + char* start = variantsBuff.data(); + T_CString_toLowerCase(start); + char* end; + while ((end = uprv_strchr(start, SEP_CHAR)) != nullptr && + U_SUCCESS(status)) { + *end = NULL_CHAR; // null terminate inside variantsBuff + variants.addElement(start, status); + start = end + 1; + } + variants.addElement(start, status); + } + if (U_FAILURE(status)) { return false; } + + // Sort the variants + variants.sort([](UElement e1, UElement e2) -> int8_t { + return uprv_strcmp( + (const char*)e1.pointer, (const char*)e2.pointer); + }, status); + + // A changed count to assert when loop too many times. + int changed = 0; + // A UVector to to hold CharString allocated by the replace* method + // and freed when out of scope from his function. + UVector stringsToBeFreed([](void *obj){ delete ((CharString*) obj); }, + nullptr, 10, status); + while (U_SUCCESS(status)) { + // Something wrong with the data cause looping here more than 10 times + // already. + U_ASSERT(changed < 5); + // From observation of key in data/misc/metadata.txt + // we know currently we only need to search in the following combination + // of fields for type in languageAlias: + // * lang_region_variant + // * lang_region + // * lang_variant + // * lang + // * und_variant + // This assumption is ensured by the U_ASSERT in readLanguageAlias + // + // lang REGION variant + if ( replaceLanguage(true, true, true, stringsToBeFreed, status) || + replaceLanguage(true, true, false, stringsToBeFreed, status) || + replaceLanguage(true, false, true, stringsToBeFreed, status) || + replaceLanguage(true, false, false, stringsToBeFreed, status) || + replaceLanguage(false,false, true, stringsToBeFreed, status) || + replaceTerritory(stringsToBeFreed, status) || + replaceScript(status) || + replaceVariant(status)) { + // Some values in data is changed, try to match from the beginning + // again. + changed++; + continue; + } + // Nothing changed. Break out. + break; + } // while(1) + + if (U_FAILURE(status)) { return false; } + // Nothing changed and we know the order of the vaiants are not change + // because we have no variant or only one. + if (changed == 0 && variants.size() <= 1) { + return false; + } + outputToString(out, status); + if (extensionsStr != nullptr) { + out.append(extensionsStr, status); + } + if (U_FAILURE(status)) { + return false; + } + // If the tag is not changed, return. + if (uprv_strcmp(out.data(), locale.getName()) == 0) { + U_ASSERT(changed == 0); + U_ASSERT(variants.size() > 1); + out.clear(); + return false; + } + return true; +} + +// Return true if the locale is changed during canonicalization. +// The replaced value then will be put into out. +bool +canonicalizeLocale(const Locale& locale, CharString& out, UErrorCode& status) +{ + AliasReplacer replacer(status); + return replacer.replace(locale, out, status); +} + +// Function to optimize for known cases without so we can skip the loading +// of resources in the startup time until we really need it. +bool +isKnownCanonicalizedLocale(const char* locale, UErrorCode& status) +{ + if ( uprv_strcmp(locale, "c") == 0 || + uprv_strcmp(locale, "en") == 0 || + uprv_strcmp(locale, "en_US") == 0) { + return true; + } + + // common well-known Canonicalized. + umtx_initOnce(gKnownCanonicalizedInitOnce, + &loadKnownCanonicalized, status); + if (U_FAILURE(status)) { + return false; + } + U_ASSERT(gKnownCanonicalized != nullptr); + return uhash_geti(gKnownCanonicalized, locale) != 0; +} + +} // namespace + +// Function for testing. +U_CAPI const char* const* +ulocimp_getKnownCanonicalizedLocaleForTest(int32_t* length) +{ + *length = UPRV_LENGTHOF(KNOWN_CANONICALIZED); + return KNOWN_CANONICALIZED; +} + +// Function for testing. +U_CAPI bool +ulocimp_isCanonicalizedLocaleForTest(const char* localeName) +{ + Locale l(localeName); + UErrorCode status = U_ZERO_ERROR; + CharString temp; + return !canonicalizeLocale(l, temp, status) && U_SUCCESS(status); +} + +/*This function initializes a Locale from a C locale ID*/ +Locale& Locale::init(const char* localeID, UBool canonicalize) +{ + fIsBogus = FALSE; + /* Free our current storage */ + if (baseName != fullName) { + uprv_free(baseName); + } + baseName = NULL; + if(fullName != fullNameBuffer) { + uprv_free(fullName); + fullName = fullNameBuffer; + } + + // not a loop: + // just an easy way to have a common error-exit + // without goto and without another function + do { + char *separator; + char *field[5] = {0}; + int32_t fieldLen[5] = {0}; + int32_t fieldIdx; + int32_t variantField; + int32_t length; + UErrorCode err; + + if(localeID == NULL) { + // not an error, just set the default locale + return *this = getDefault(); + } + + /* preset all fields to empty */ + language[0] = script[0] = country[0] = 0; + + // "canonicalize" the locale ID to ICU/Java format + err = U_ZERO_ERROR; + length = canonicalize ? + uloc_canonicalize(localeID, fullName, sizeof(fullNameBuffer), &err) : + uloc_getName(localeID, fullName, sizeof(fullNameBuffer), &err); + + if(err == U_BUFFER_OVERFLOW_ERROR || length >= (int32_t)sizeof(fullNameBuffer)) { + /*Go to heap for the fullName if necessary*/ + fullName = (char *)uprv_malloc(sizeof(char)*(length + 1)); + if(fullName == 0) { + fullName = fullNameBuffer; + break; // error: out of memory + } + err = U_ZERO_ERROR; + length = canonicalize ? + uloc_canonicalize(localeID, fullName, length+1, &err) : + uloc_getName(localeID, fullName, length+1, &err); + } + if(U_FAILURE(err) || err == U_STRING_NOT_TERMINATED_WARNING) { + /* should never occur */ + break; + } + + variantBegin = length; + + /* after uloc_getName/canonicalize() we know that only '_' are separators */ + /* But _ could also appeared in timezone such as "en@timezone=America/Los_Angeles" */ + separator = field[0] = fullName; + fieldIdx = 1; + char* at = uprv_strchr(fullName, '@'); + while ((separator = uprv_strchr(field[fieldIdx-1], SEP_CHAR)) != 0 && + fieldIdx < UPRV_LENGTHOF(field)-1 && + (at == nullptr || separator < at)) { + field[fieldIdx] = separator + 1; + fieldLen[fieldIdx-1] = (int32_t)(separator - field[fieldIdx-1]); + fieldIdx++; + } + // variant may contain @foo or .foo POSIX cruft; remove it + separator = uprv_strchr(field[fieldIdx-1], '@'); + char* sep2 = uprv_strchr(field[fieldIdx-1], '.'); + if (separator!=NULL || sep2!=NULL) { + if (separator==NULL || (sep2!=NULL && separator > sep2)) { + separator = sep2; + } + fieldLen[fieldIdx-1] = (int32_t)(separator - field[fieldIdx-1]); + } else { + fieldLen[fieldIdx-1] = length - (int32_t)(field[fieldIdx-1] - fullName); + } + + if (fieldLen[0] >= (int32_t)(sizeof(language))) + { + break; // error: the language field is too long + } + + variantField = 1; /* Usually the 2nd one, except when a script or country is also used. */ + if (fieldLen[0] > 0) { + /* We have a language */ + uprv_memcpy(language, fullName, fieldLen[0]); + language[fieldLen[0]] = 0; + } + if (fieldLen[1] == 4 && uprv_isASCIILetter(field[1][0]) && + uprv_isASCIILetter(field[1][1]) && uprv_isASCIILetter(field[1][2]) && + uprv_isASCIILetter(field[1][3])) { + /* We have at least a script */ + uprv_memcpy(script, field[1], fieldLen[1]); + script[fieldLen[1]] = 0; + variantField++; + } + + if (fieldLen[variantField] == 2 || fieldLen[variantField] == 3) { + /* We have a country */ + uprv_memcpy(country, field[variantField], fieldLen[variantField]); + country[fieldLen[variantField]] = 0; + variantField++; + } else if (fieldLen[variantField] == 0) { + variantField++; /* script or country empty but variant in next field (i.e. en__POSIX) */ + } + + if (fieldLen[variantField] > 0) { + /* We have a variant */ + variantBegin = (int32_t)(field[variantField] - fullName); + } + + err = U_ZERO_ERROR; + initBaseName(err); + if (U_FAILURE(err)) { + break; + } + + if (canonicalize) { + if (!isKnownCanonicalizedLocale(fullName, err)) { + CharString replaced; + // Not sure it is already canonicalized + if (canonicalizeLocale(*this, replaced, err)) { + U_ASSERT(U_SUCCESS(err)); + // If need replacement, call init again. + init(replaced.data(), false); + } + if (U_FAILURE(err)) { + break; + } + } + } // if (canonicalize) { + + // successful end of init() + return *this; + } while(0); /*loop doesn't iterate*/ + + // when an error occurs, then set this object to "bogus" (there is no UErrorCode here) + setToBogus(); + + return *this; +} + +/* + * Set up the base name. + * If there are no key words, it's exactly the full name. + * If key words exist, it's the full name truncated at the '@' character. + * Need to set up both at init() and after setting a keyword. + */ +void +Locale::initBaseName(UErrorCode &status) { + if (U_FAILURE(status)) { + return; + } + U_ASSERT(baseName==NULL || baseName==fullName); + const char *atPtr = uprv_strchr(fullName, '@'); + const char *eqPtr = uprv_strchr(fullName, '='); + if (atPtr && eqPtr && atPtr < eqPtr) { + // Key words exist. + int32_t baseNameLength = (int32_t)(atPtr - fullName); + baseName = (char *)uprv_malloc(baseNameLength + 1); + if (baseName == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + uprv_strncpy(baseName, fullName, baseNameLength); + baseName[baseNameLength] = 0; + + // The original computation of variantBegin leaves it equal to the length + // of fullName if there is no variant. It should instead be + // the length of the baseName. + if (variantBegin > baseNameLength) { + variantBegin = baseNameLength; + } + } else { + baseName = fullName; + } +} + + +int32_t +Locale::hashCode() const +{ + return ustr_hashCharsN(fullName, static_cast<int32_t>(uprv_strlen(fullName))); +} + +void +Locale::setToBogus() { + /* Free our current storage */ + if(baseName != fullName) { + uprv_free(baseName); + } + baseName = NULL; + if(fullName != fullNameBuffer) { + uprv_free(fullName); + fullName = fullNameBuffer; + } + *fullNameBuffer = 0; + *language = 0; + *script = 0; + *country = 0; + fIsBogus = TRUE; + variantBegin = 0; +} + +const Locale& U_EXPORT2 +Locale::getDefault() +{ + { + Mutex lock(&gDefaultLocaleMutex); + if (gDefaultLocale != NULL) { + return *gDefaultLocale; + } + } + UErrorCode status = U_ZERO_ERROR; + return *locale_set_default_internal(NULL, status); +} + + + +void U_EXPORT2 +Locale::setDefault( const Locale& newLocale, + UErrorCode& status) +{ + if (U_FAILURE(status)) { + return; + } + + /* Set the default from the full name string of the supplied locale. + * This is a convenient way to access the default locale caching mechanisms. + */ + const char *localeID = newLocale.getName(); + locale_set_default_internal(localeID, status); +} + +void +Locale::addLikelySubtags(UErrorCode& status) { + if (U_FAILURE(status)) { + return; + } + + CharString maximizedLocaleID; + { + CharStringByteSink sink(&maximizedLocaleID); + ulocimp_addLikelySubtags(fullName, sink, &status); + } + + if (U_FAILURE(status)) { + return; + } + + init(maximizedLocaleID.data(), /*canonicalize=*/FALSE); + if (isBogus()) { + status = U_ILLEGAL_ARGUMENT_ERROR; + } +} + +void +Locale::minimizeSubtags(UErrorCode& status) { + if (U_FAILURE(status)) { + return; + } + + CharString minimizedLocaleID; + { + CharStringByteSink sink(&minimizedLocaleID); + ulocimp_minimizeSubtags(fullName, sink, &status); + } + + if (U_FAILURE(status)) { + return; + } + + init(minimizedLocaleID.data(), /*canonicalize=*/FALSE); + if (isBogus()) { + status = U_ILLEGAL_ARGUMENT_ERROR; + } +} + +void +Locale::canonicalize(UErrorCode& status) { + if (U_FAILURE(status)) { + return; + } + if (isBogus()) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + CharString uncanonicalized(fullName, status); + if (U_FAILURE(status)) { + return; + } + init(uncanonicalized.data(), /*canonicalize=*/TRUE); + if (isBogus()) { + status = U_ILLEGAL_ARGUMENT_ERROR; + } +} + +Locale U_EXPORT2 +Locale::forLanguageTag(StringPiece tag, UErrorCode& status) +{ + Locale result(Locale::eBOGUS); + + if (U_FAILURE(status)) { + return result; + } + + // If a BCP 47 language tag is passed as the language parameter to the + // normal Locale constructor, it will actually fall back to invoking + // uloc_forLanguageTag() to parse it if it somehow is able to detect that + // the string actually is BCP 47. This works well for things like strings + // using BCP 47 extensions, but it does not at all work for things like + // legacy language tags (marked as “Type: grandfathered” in BCP 47, + // e.g., "en-GB-oed") which are possible to also + // interpret as ICU locale IDs and because of that won't trigger the BCP 47 + // parsing. Therefore the code here explicitly calls uloc_forLanguageTag() + // and then Locale::init(), instead of just calling the normal constructor. + + CharString localeID; + int32_t parsedLength; + { + CharStringByteSink sink(&localeID); + ulocimp_forLanguageTag( + tag.data(), + tag.length(), + sink, + &parsedLength, + &status); + } + + if (U_FAILURE(status)) { + return result; + } + + if (parsedLength != tag.size()) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return result; + } + + result.init(localeID.data(), /*canonicalize=*/FALSE); + if (result.isBogus()) { + status = U_ILLEGAL_ARGUMENT_ERROR; + } + return result; +} + +void +Locale::toLanguageTag(ByteSink& sink, UErrorCode& status) const +{ + if (U_FAILURE(status)) { + return; + } + + if (fIsBogus) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + ulocimp_toLanguageTag(fullName, sink, /*strict=*/FALSE, &status); +} + +Locale U_EXPORT2 +Locale::createFromName (const char *name) +{ + if (name) { + Locale l(""); + l.init(name, FALSE); + return l; + } + else { + return getDefault(); + } +} + +Locale U_EXPORT2 +Locale::createCanonical(const char* name) { + Locale loc(""); + loc.init(name, TRUE); + return loc; +} + +const char * +Locale::getISO3Language() const +{ + return uloc_getISO3Language(fullName); +} + + +const char * +Locale::getISO3Country() const +{ + return uloc_getISO3Country(fullName); +} + +/** + * Return the LCID value as specified in the "LocaleID" resource for this + * locale. The LocaleID must be expressed as a hexadecimal number, from + * one to four digits. If the LocaleID resource is not present, or is + * in an incorrect format, 0 is returned. The LocaleID is for use in + * Windows (it is an LCID), but is available on all platforms. + */ +uint32_t +Locale::getLCID() const +{ + return uloc_getLCID(fullName); +} + +const char* const* U_EXPORT2 Locale::getISOCountries() +{ + return uloc_getISOCountries(); +} + +const char* const* U_EXPORT2 Locale::getISOLanguages() +{ + return uloc_getISOLanguages(); +} + +// Set the locale's data based on a posix id. +void Locale::setFromPOSIXID(const char *posixID) +{ + init(posixID, TRUE); +} + +const Locale & U_EXPORT2 +Locale::getRoot(void) +{ + return getLocale(eROOT); +} + +const Locale & U_EXPORT2 +Locale::getEnglish(void) +{ + return getLocale(eENGLISH); +} + +const Locale & U_EXPORT2 +Locale::getFrench(void) +{ + return getLocale(eFRENCH); +} + +const Locale & U_EXPORT2 +Locale::getGerman(void) +{ + return getLocale(eGERMAN); +} + +const Locale & U_EXPORT2 +Locale::getItalian(void) +{ + return getLocale(eITALIAN); +} + +const Locale & U_EXPORT2 +Locale::getJapanese(void) +{ + return getLocale(eJAPANESE); +} + +const Locale & U_EXPORT2 +Locale::getKorean(void) +{ + return getLocale(eKOREAN); +} + +const Locale & U_EXPORT2 +Locale::getChinese(void) +{ + return getLocale(eCHINESE); +} + +const Locale & U_EXPORT2 +Locale::getSimplifiedChinese(void) +{ + return getLocale(eCHINA); +} + +const Locale & U_EXPORT2 +Locale::getTraditionalChinese(void) +{ + return getLocale(eTAIWAN); +} + + +const Locale & U_EXPORT2 +Locale::getFrance(void) +{ + return getLocale(eFRANCE); +} + +const Locale & U_EXPORT2 +Locale::getGermany(void) +{ + return getLocale(eGERMANY); +} + +const Locale & U_EXPORT2 +Locale::getItaly(void) +{ + return getLocale(eITALY); +} + +const Locale & U_EXPORT2 +Locale::getJapan(void) +{ + return getLocale(eJAPAN); +} + +const Locale & U_EXPORT2 +Locale::getKorea(void) +{ + return getLocale(eKOREA); +} + +const Locale & U_EXPORT2 +Locale::getChina(void) +{ + return getLocale(eCHINA); +} + +const Locale & U_EXPORT2 +Locale::getPRC(void) +{ + return getLocale(eCHINA); +} + +const Locale & U_EXPORT2 +Locale::getTaiwan(void) +{ + return getLocale(eTAIWAN); +} + +const Locale & U_EXPORT2 +Locale::getUK(void) +{ + return getLocale(eUK); +} + +const Locale & U_EXPORT2 +Locale::getUS(void) +{ + return getLocale(eUS); +} + +const Locale & U_EXPORT2 +Locale::getCanada(void) +{ + return getLocale(eCANADA); +} + +const Locale & U_EXPORT2 +Locale::getCanadaFrench(void) +{ + return getLocale(eCANADA_FRENCH); +} + +const Locale & +Locale::getLocale(int locid) +{ + Locale *localeCache = getLocaleCache(); + U_ASSERT((locid < eMAX_LOCALES)&&(locid>=0)); + if (localeCache == NULL) { + // Failure allocating the locale cache. + // The best we can do is return a NULL reference. + locid = 0; + } + return localeCache[locid]; /*operating on NULL*/ +} + +/* +This function is defined this way in order to get around static +initialization and static destruction. + */ +Locale * +Locale::getLocaleCache(void) +{ + UErrorCode status = U_ZERO_ERROR; + umtx_initOnce(gLocaleCacheInitOnce, locale_init, status); + return gLocaleCache; +} + +class KeywordEnumeration : public StringEnumeration { +private: + char *keywords; + char *current; + int32_t length; + UnicodeString currUSKey; + static const char fgClassID;/* Warning this is used beyond the typical RTTI usage. */ + +public: + static UClassID U_EXPORT2 getStaticClassID(void) { return (UClassID)&fgClassID; } + virtual UClassID getDynamicClassID(void) const { return getStaticClassID(); } +public: + KeywordEnumeration(const char *keys, int32_t keywordLen, int32_t currentIndex, UErrorCode &status) + : keywords((char *)&fgClassID), current((char *)&fgClassID), length(0) { + if(U_SUCCESS(status) && keywordLen != 0) { + if(keys == NULL || keywordLen < 0) { + status = U_ILLEGAL_ARGUMENT_ERROR; + } else { + keywords = (char *)uprv_malloc(keywordLen+1); + if (keywords == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + } + else { + uprv_memcpy(keywords, keys, keywordLen); + keywords[keywordLen] = 0; + current = keywords + currentIndex; + length = keywordLen; + } + } + } + } + + virtual ~KeywordEnumeration(); + + virtual StringEnumeration * clone() const + { + UErrorCode status = U_ZERO_ERROR; + return new KeywordEnumeration(keywords, length, (int32_t)(current - keywords), status); + } + + virtual int32_t count(UErrorCode &/*status*/) const { + char *kw = keywords; + int32_t result = 0; + while(*kw) { + result++; + kw += uprv_strlen(kw)+1; + } + return result; + } + + virtual const char* next(int32_t* resultLength, UErrorCode& status) { + const char* result; + int32_t len; + if(U_SUCCESS(status) && *current != 0) { + result = current; + len = (int32_t)uprv_strlen(current); + current += len+1; + if(resultLength != NULL) { + *resultLength = len; + } + } else { + if(resultLength != NULL) { + *resultLength = 0; + } + result = NULL; + } + return result; + } + + virtual const UnicodeString* snext(UErrorCode& status) { + int32_t resultLength = 0; + const char *s = next(&resultLength, status); + return setChars(s, resultLength, status); + } + + virtual void reset(UErrorCode& /*status*/) { + current = keywords; + } +}; + +const char KeywordEnumeration::fgClassID = '\0'; + +KeywordEnumeration::~KeywordEnumeration() { + uprv_free(keywords); +} + +// A wrapper around KeywordEnumeration that calls uloc_toUnicodeLocaleKey() in +// the next() method for each keyword before returning it. +class UnicodeKeywordEnumeration : public KeywordEnumeration { +public: + using KeywordEnumeration::KeywordEnumeration; + virtual ~UnicodeKeywordEnumeration(); + + virtual const char* next(int32_t* resultLength, UErrorCode& status) { + const char* legacy_key = KeywordEnumeration::next(nullptr, status); + if (U_SUCCESS(status) && legacy_key != nullptr) { + const char* key = uloc_toUnicodeLocaleKey(legacy_key); + if (key == nullptr) { + status = U_ILLEGAL_ARGUMENT_ERROR; + } else { + if (resultLength != nullptr) { + *resultLength = static_cast<int32_t>(uprv_strlen(key)); + } + return key; + } + } + if (resultLength != nullptr) *resultLength = 0; + return nullptr; + } +}; + +// Out-of-line virtual destructor to serve as the "key function". +UnicodeKeywordEnumeration::~UnicodeKeywordEnumeration() = default; + +StringEnumeration * +Locale::createKeywords(UErrorCode &status) const +{ + StringEnumeration *result = NULL; + + if (U_FAILURE(status)) { + return result; + } + + const char* variantStart = uprv_strchr(fullName, '@'); + const char* assignment = uprv_strchr(fullName, '='); + if(variantStart) { + if(assignment > variantStart) { + CharString keywords; + CharStringByteSink sink(&keywords); + ulocimp_getKeywords(variantStart+1, '@', sink, FALSE, &status); + if (U_SUCCESS(status) && !keywords.isEmpty()) { + result = new KeywordEnumeration(keywords.data(), keywords.length(), 0, status); + if (!result) { + status = U_MEMORY_ALLOCATION_ERROR; + } + } + } else { + status = U_INVALID_FORMAT_ERROR; + } + } + return result; +} + +StringEnumeration * +Locale::createUnicodeKeywords(UErrorCode &status) const +{ + StringEnumeration *result = NULL; + + if (U_FAILURE(status)) { + return result; + } + + const char* variantStart = uprv_strchr(fullName, '@'); + const char* assignment = uprv_strchr(fullName, '='); + if(variantStart) { + if(assignment > variantStart) { + CharString keywords; + CharStringByteSink sink(&keywords); + ulocimp_getKeywords(variantStart+1, '@', sink, FALSE, &status); + if (U_SUCCESS(status) && !keywords.isEmpty()) { + result = new UnicodeKeywordEnumeration(keywords.data(), keywords.length(), 0, status); + if (!result) { + status = U_MEMORY_ALLOCATION_ERROR; + } + } + } else { + status = U_INVALID_FORMAT_ERROR; + } + } + return result; +} + +int32_t +Locale::getKeywordValue(const char* keywordName, char *buffer, int32_t bufLen, UErrorCode &status) const +{ + return uloc_getKeywordValue(fullName, keywordName, buffer, bufLen, &status); +} + +void +Locale::getKeywordValue(StringPiece keywordName, ByteSink& sink, UErrorCode& status) const { + if (U_FAILURE(status)) { + return; + } + + if (fIsBogus) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + // TODO: Remove the need for a const char* to a NUL terminated buffer. + const CharString keywordName_nul(keywordName, status); + if (U_FAILURE(status)) { + return; + } + + ulocimp_getKeywordValue(fullName, keywordName_nul.data(), sink, &status); +} + +void +Locale::getUnicodeKeywordValue(StringPiece keywordName, + ByteSink& sink, + UErrorCode& status) const { + // TODO: Remove the need for a const char* to a NUL terminated buffer. + const CharString keywordName_nul(keywordName, status); + if (U_FAILURE(status)) { + return; + } + + const char* legacy_key = uloc_toLegacyKey(keywordName_nul.data()); + + if (legacy_key == nullptr) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + CharString legacy_value; + { + CharStringByteSink sink(&legacy_value); + getKeywordValue(legacy_key, sink, status); + } + + if (U_FAILURE(status)) { + return; + } + + const char* unicode_value = uloc_toUnicodeLocaleType( + keywordName_nul.data(), legacy_value.data()); + + if (unicode_value == nullptr) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + sink.Append(unicode_value, static_cast<int32_t>(uprv_strlen(unicode_value))); +} + +void +Locale::setKeywordValue(const char* keywordName, const char* keywordValue, UErrorCode &status) +{ + if (U_FAILURE(status)) { + return; + } + int32_t bufferLength = uprv_max((int32_t)(uprv_strlen(fullName) + 1), ULOC_FULLNAME_CAPACITY); + int32_t newLength = uloc_setKeywordValue(keywordName, keywordValue, fullName, + bufferLength, &status) + 1; + /* Handle the case the current buffer is not enough to hold the new id */ + if (status == U_BUFFER_OVERFLOW_ERROR) { + U_ASSERT(newLength > bufferLength); + char* newFullName = (char *)uprv_malloc(newLength); + if (newFullName == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + uprv_strcpy(newFullName, fullName); + if (fullName != fullNameBuffer) { + // if full Name is already on the heap, need to free it. + uprv_free(fullName); + } + fullName = newFullName; + status = U_ZERO_ERROR; + uloc_setKeywordValue(keywordName, keywordValue, fullName, newLength, &status); + } else { + U_ASSERT(newLength <= bufferLength); + } + if (U_SUCCESS(status) && baseName == fullName) { + // May have added the first keyword, meaning that the fullName is no longer also the baseName. + initBaseName(status); + } +} + +void +Locale::setKeywordValue(StringPiece keywordName, + StringPiece keywordValue, + UErrorCode& status) { + // TODO: Remove the need for a const char* to a NUL terminated buffer. + const CharString keywordName_nul(keywordName, status); + const CharString keywordValue_nul(keywordValue, status); + setKeywordValue(keywordName_nul.data(), keywordValue_nul.data(), status); +} + +void +Locale::setUnicodeKeywordValue(StringPiece keywordName, + StringPiece keywordValue, + UErrorCode& status) { + // TODO: Remove the need for a const char* to a NUL terminated buffer. + const CharString keywordName_nul(keywordName, status); + const CharString keywordValue_nul(keywordValue, status); + + if (U_FAILURE(status)) { + return; + } + + const char* legacy_key = uloc_toLegacyKey(keywordName_nul.data()); + + if (legacy_key == nullptr) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + const char* legacy_value = nullptr; + + if (!keywordValue_nul.isEmpty()) { + legacy_value = + uloc_toLegacyType(keywordName_nul.data(), keywordValue_nul.data()); + + if (legacy_value == nullptr) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + } + + setKeywordValue(legacy_key, legacy_value, status); +} + +const char * +Locale::getBaseName() const { + return baseName; +} + +Locale::Iterator::~Iterator() = default; + +//eof +U_NAMESPACE_END diff --git a/thirdparty/icu4c/common/loclikely.cpp b/thirdparty/icu4c/common/loclikely.cpp new file mode 100644 index 0000000000..94a60aba3e --- /dev/null +++ b/thirdparty/icu4c/common/loclikely.cpp @@ -0,0 +1,1410 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 1997-2016, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: loclikely.cpp +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2010feb25 +* created by: Markus W. Scherer +* +* Code for likely and minimized locale subtags, separated out from other .cpp files +* that then do not depend on resource bundle code and likely-subtags data. +*/ + +#include "unicode/bytestream.h" +#include "unicode/utypes.h" +#include "unicode/locid.h" +#include "unicode/putil.h" +#include "unicode/uchar.h" +#include "unicode/uloc.h" +#include "unicode/ures.h" +#include "unicode/uscript.h" +#include "bytesinkutil.h" +#include "charstr.h" +#include "cmemory.h" +#include "cstring.h" +#include "ulocimp.h" +#include "ustr_imp.h" + +/** + * These are the canonical strings for unknown languages, scripts and regions. + **/ +static const char* const unknownLanguage = "und"; +static const char* const unknownScript = "Zzzz"; +static const char* const unknownRegion = "ZZ"; + +/** + * This function looks for the localeID in the likelySubtags resource. + * + * @param localeID The tag to find. + * @param buffer A buffer to hold the matching entry + * @param bufferLength The length of the output buffer + * @return A pointer to "buffer" if found, or a null pointer if not. + */ +static const char* U_CALLCONV +findLikelySubtags(const char* localeID, + char* buffer, + int32_t bufferLength, + UErrorCode* err) { + const char* result = NULL; + + if (!U_FAILURE(*err)) { + int32_t resLen = 0; + const UChar* s = NULL; + UErrorCode tmpErr = U_ZERO_ERROR; + icu::LocalUResourceBundlePointer subtags(ures_openDirect(NULL, "likelySubtags", &tmpErr)); + if (U_SUCCESS(tmpErr)) { + icu::CharString und; + if (localeID != NULL) { + if (*localeID == '\0') { + localeID = unknownLanguage; + } else if (*localeID == '_') { + und.append(unknownLanguage, *err); + und.append(localeID, *err); + if (U_FAILURE(*err)) { + return NULL; + } + localeID = und.data(); + } + } + s = ures_getStringByKey(subtags.getAlias(), localeID, &resLen, &tmpErr); + + if (U_FAILURE(tmpErr)) { + /* + * If a resource is missing, it's not really an error, it's + * just that we don't have any data for that particular locale ID. + */ + if (tmpErr != U_MISSING_RESOURCE_ERROR) { + *err = tmpErr; + } + } + else if (resLen >= bufferLength) { + /* The buffer should never overflow. */ + *err = U_INTERNAL_PROGRAM_ERROR; + } + else { + u_UCharsToChars(s, buffer, resLen + 1); + if (resLen >= 3 && + uprv_strnicmp(buffer, unknownLanguage, 3) == 0 && + (resLen == 3 || buffer[3] == '_')) { + uprv_memmove(buffer, buffer + 3, resLen - 3 + 1); + } + result = buffer; + } + } else { + *err = tmpErr; + } + } + + return result; +} + +/** + * Append a tag to a buffer, adding the separator if necessary. The buffer + * must be large enough to contain the resulting tag plus any separator + * necessary. The tag must not be a zero-length string. + * + * @param tag The tag to add. + * @param tagLength The length of the tag. + * @param buffer The output buffer. + * @param bufferLength The length of the output buffer. This is an input/ouput parameter. + **/ +static void U_CALLCONV +appendTag( + const char* tag, + int32_t tagLength, + char* buffer, + int32_t* bufferLength, + UBool withSeparator) { + + if (withSeparator) { + buffer[*bufferLength] = '_'; + ++(*bufferLength); + } + + uprv_memmove( + &buffer[*bufferLength], + tag, + tagLength); + + *bufferLength += tagLength; +} + +/** + * Create a tag string from the supplied parameters. The lang, script and region + * parameters may be NULL pointers. If they are, their corresponding length parameters + * must be less than or equal to 0. + * + * If any of the language, script or region parameters are empty, and the alternateTags + * parameter is not NULL, it will be parsed for potential language, script and region tags + * to be used when constructing the new tag. If the alternateTags parameter is NULL, or + * it contains no language tag, the default tag for the unknown language is used. + * + * If the length of the new string exceeds the capacity of the output buffer, + * the function copies as many bytes to the output buffer as it can, and returns + * the error U_BUFFER_OVERFLOW_ERROR. + * + * If an illegal argument is provided, the function returns the error + * U_ILLEGAL_ARGUMENT_ERROR. + * + * Note that this function can return the warning U_STRING_NOT_TERMINATED_WARNING if + * the tag string fits in the output buffer, but the null terminator doesn't. + * + * @param lang The language tag to use. + * @param langLength The length of the language tag. + * @param script The script tag to use. + * @param scriptLength The length of the script tag. + * @param region The region tag to use. + * @param regionLength The length of the region tag. + * @param trailing Any trailing data to append to the new tag. + * @param trailingLength The length of the trailing data. + * @param alternateTags A string containing any alternate tags. + * @param sink The output sink receiving the tag string. + * @param err A pointer to a UErrorCode for error reporting. + **/ +static void U_CALLCONV +createTagStringWithAlternates( + const char* lang, + int32_t langLength, + const char* script, + int32_t scriptLength, + const char* region, + int32_t regionLength, + const char* trailing, + int32_t trailingLength, + const char* alternateTags, + icu::ByteSink& sink, + UErrorCode* err) { + + if (U_FAILURE(*err)) { + goto error; + } + else if (langLength >= ULOC_LANG_CAPACITY || + scriptLength >= ULOC_SCRIPT_CAPACITY || + regionLength >= ULOC_COUNTRY_CAPACITY) { + goto error; + } + else { + /** + * ULOC_FULLNAME_CAPACITY will provide enough capacity + * that we can build a string that contains the language, + * script and region code without worrying about overrunning + * the user-supplied buffer. + **/ + char tagBuffer[ULOC_FULLNAME_CAPACITY]; + int32_t tagLength = 0; + UBool regionAppended = FALSE; + + if (langLength > 0) { + appendTag( + lang, + langLength, + tagBuffer, + &tagLength, + /*withSeparator=*/FALSE); + } + else if (alternateTags == NULL) { + /* + * Use the empty string for an unknown language, if + * we found no language. + */ + } + else { + /* + * Parse the alternateTags string for the language. + */ + char alternateLang[ULOC_LANG_CAPACITY]; + int32_t alternateLangLength = sizeof(alternateLang); + + alternateLangLength = + uloc_getLanguage( + alternateTags, + alternateLang, + alternateLangLength, + err); + if(U_FAILURE(*err) || + alternateLangLength >= ULOC_LANG_CAPACITY) { + goto error; + } + else if (alternateLangLength == 0) { + /* + * Use the empty string for an unknown language, if + * we found no language. + */ + } + else { + appendTag( + alternateLang, + alternateLangLength, + tagBuffer, + &tagLength, + /*withSeparator=*/FALSE); + } + } + + if (scriptLength > 0) { + appendTag( + script, + scriptLength, + tagBuffer, + &tagLength, + /*withSeparator=*/TRUE); + } + else if (alternateTags != NULL) { + /* + * Parse the alternateTags string for the script. + */ + char alternateScript[ULOC_SCRIPT_CAPACITY]; + + const int32_t alternateScriptLength = + uloc_getScript( + alternateTags, + alternateScript, + sizeof(alternateScript), + err); + + if (U_FAILURE(*err) || + alternateScriptLength >= ULOC_SCRIPT_CAPACITY) { + goto error; + } + else if (alternateScriptLength > 0) { + appendTag( + alternateScript, + alternateScriptLength, + tagBuffer, + &tagLength, + /*withSeparator=*/TRUE); + } + } + + if (regionLength > 0) { + appendTag( + region, + regionLength, + tagBuffer, + &tagLength, + /*withSeparator=*/TRUE); + + regionAppended = TRUE; + } + else if (alternateTags != NULL) { + /* + * Parse the alternateTags string for the region. + */ + char alternateRegion[ULOC_COUNTRY_CAPACITY]; + + const int32_t alternateRegionLength = + uloc_getCountry( + alternateTags, + alternateRegion, + sizeof(alternateRegion), + err); + if (U_FAILURE(*err) || + alternateRegionLength >= ULOC_COUNTRY_CAPACITY) { + goto error; + } + else if (alternateRegionLength > 0) { + appendTag( + alternateRegion, + alternateRegionLength, + tagBuffer, + &tagLength, + /*withSeparator=*/TRUE); + + regionAppended = TRUE; + } + } + + /** + * Copy the partial tag from our internal buffer to the supplied + * target. + **/ + sink.Append(tagBuffer, tagLength); + + if (trailingLength > 0) { + if (*trailing != '@') { + sink.Append("_", 1); + if (!regionAppended) { + /* extra separator is required */ + sink.Append("_", 1); + } + } + + /* + * Copy the trailing data into the supplied buffer. + */ + sink.Append(trailing, trailingLength); + } + + return; + } + +error: + + /** + * An overflow indicates the locale ID passed in + * is ill-formed. If we got here, and there was + * no previous error, it's an implicit overflow. + **/ + if (*err == U_BUFFER_OVERFLOW_ERROR || + U_SUCCESS(*err)) { + *err = U_ILLEGAL_ARGUMENT_ERROR; + } +} + +/** + * Create a tag string from the supplied parameters. The lang, script and region + * parameters may be NULL pointers. If they are, their corresponding length parameters + * must be less than or equal to 0. If the lang parameter is an empty string, the + * default value for an unknown language is written to the output buffer. + * + * If the length of the new string exceeds the capacity of the output buffer, + * the function copies as many bytes to the output buffer as it can, and returns + * the error U_BUFFER_OVERFLOW_ERROR. + * + * If an illegal argument is provided, the function returns the error + * U_ILLEGAL_ARGUMENT_ERROR. + * + * @param lang The language tag to use. + * @param langLength The length of the language tag. + * @param script The script tag to use. + * @param scriptLength The length of the script tag. + * @param region The region tag to use. + * @param regionLength The length of the region tag. + * @param trailing Any trailing data to append to the new tag. + * @param trailingLength The length of the trailing data. + * @param sink The output sink receiving the tag string. + * @param err A pointer to a UErrorCode for error reporting. + **/ +static void U_CALLCONV +createTagString( + const char* lang, + int32_t langLength, + const char* script, + int32_t scriptLength, + const char* region, + int32_t regionLength, + const char* trailing, + int32_t trailingLength, + icu::ByteSink& sink, + UErrorCode* err) +{ + createTagStringWithAlternates( + lang, + langLength, + script, + scriptLength, + region, + regionLength, + trailing, + trailingLength, + NULL, + sink, + err); +} + +/** + * Parse the language, script, and region subtags from a tag string, and copy the + * results into the corresponding output parameters. The buffers are null-terminated, + * unless overflow occurs. + * + * The langLength, scriptLength, and regionLength parameters are input/output + * parameters, and must contain the capacity of their corresponding buffers on + * input. On output, they will contain the actual length of the buffers, not + * including the null terminator. + * + * If the length of any of the output subtags exceeds the capacity of the corresponding + * buffer, the function copies as many bytes to the output buffer as it can, and returns + * the error U_BUFFER_OVERFLOW_ERROR. It will not parse any more subtags once overflow + * occurs. + * + * If an illegal argument is provided, the function returns the error + * U_ILLEGAL_ARGUMENT_ERROR. + * + * @param localeID The locale ID to parse. + * @param lang The language tag buffer. + * @param langLength The length of the language tag. + * @param script The script tag buffer. + * @param scriptLength The length of the script tag. + * @param region The region tag buffer. + * @param regionLength The length of the region tag. + * @param err A pointer to a UErrorCode for error reporting. + * @return The number of chars of the localeID parameter consumed. + **/ +static int32_t U_CALLCONV +parseTagString( + const char* localeID, + char* lang, + int32_t* langLength, + char* script, + int32_t* scriptLength, + char* region, + int32_t* regionLength, + UErrorCode* err) +{ + const char* position = localeID; + int32_t subtagLength = 0; + + if(U_FAILURE(*err) || + localeID == NULL || + lang == NULL || + langLength == NULL || + script == NULL || + scriptLength == NULL || + region == NULL || + regionLength == NULL) { + goto error; + } + + subtagLength = ulocimp_getLanguage(position, &position, *err).extract(lang, *langLength, *err); + + /* + * Note that we explicit consider U_STRING_NOT_TERMINATED_WARNING + * to be an error, because it indicates the user-supplied tag is + * not well-formed. + */ + if(U_FAILURE(*err)) { + goto error; + } + + *langLength = subtagLength; + + /* + * If no language was present, use the empty string instead. + * Otherwise, move past any separator. + */ + if (_isIDSeparator(*position)) { + ++position; + } + + subtagLength = ulocimp_getScript(position, &position, *err).extract(script, *scriptLength, *err); + + if(U_FAILURE(*err)) { + goto error; + } + + *scriptLength = subtagLength; + + if (*scriptLength > 0) { + if (uprv_strnicmp(script, unknownScript, *scriptLength) == 0) { + /** + * If the script part is the "unknown" script, then don't return it. + **/ + *scriptLength = 0; + } + + /* + * Move past any separator. + */ + if (_isIDSeparator(*position)) { + ++position; + } + } + + subtagLength = ulocimp_getCountry(position, &position, *err).extract(region, *regionLength, *err); + + if(U_FAILURE(*err)) { + goto error; + } + + *regionLength = subtagLength; + + if (*regionLength > 0) { + if (uprv_strnicmp(region, unknownRegion, *regionLength) == 0) { + /** + * If the region part is the "unknown" region, then don't return it. + **/ + *regionLength = 0; + } + } else if (*position != 0 && *position != '@') { + /* back up over consumed trailing separator */ + --position; + } + +exit: + + return (int32_t)(position - localeID); + +error: + + /** + * If we get here, we have no explicit error, it's the result of an + * illegal argument. + **/ + if (!U_FAILURE(*err)) { + *err = U_ILLEGAL_ARGUMENT_ERROR; + } + + goto exit; +} + +static UBool U_CALLCONV +createLikelySubtagsString( + const char* lang, + int32_t langLength, + const char* script, + int32_t scriptLength, + const char* region, + int32_t regionLength, + const char* variants, + int32_t variantsLength, + icu::ByteSink& sink, + UErrorCode* err) { + /** + * ULOC_FULLNAME_CAPACITY will provide enough capacity + * that we can build a string that contains the language, + * script and region code without worrying about overrunning + * the user-supplied buffer. + **/ + char likelySubtagsBuffer[ULOC_FULLNAME_CAPACITY]; + + if(U_FAILURE(*err)) { + goto error; + } + + /** + * Try the language with the script and region first. + **/ + if (scriptLength > 0 && regionLength > 0) { + + const char* likelySubtags = NULL; + + icu::CharString tagBuffer; + { + icu::CharStringByteSink sink(&tagBuffer); + createTagString( + lang, + langLength, + script, + scriptLength, + region, + regionLength, + NULL, + 0, + sink, + err); + } + if(U_FAILURE(*err)) { + goto error; + } + + likelySubtags = + findLikelySubtags( + tagBuffer.data(), + likelySubtagsBuffer, + sizeof(likelySubtagsBuffer), + err); + if(U_FAILURE(*err)) { + goto error; + } + + if (likelySubtags != NULL) { + /* Always use the language tag from the + maximal string, since it may be more + specific than the one provided. */ + createTagStringWithAlternates( + NULL, + 0, + NULL, + 0, + NULL, + 0, + variants, + variantsLength, + likelySubtags, + sink, + err); + return TRUE; + } + } + + /** + * Try the language with just the script. + **/ + if (scriptLength > 0) { + + const char* likelySubtags = NULL; + + icu::CharString tagBuffer; + { + icu::CharStringByteSink sink(&tagBuffer); + createTagString( + lang, + langLength, + script, + scriptLength, + NULL, + 0, + NULL, + 0, + sink, + err); + } + if(U_FAILURE(*err)) { + goto error; + } + + likelySubtags = + findLikelySubtags( + tagBuffer.data(), + likelySubtagsBuffer, + sizeof(likelySubtagsBuffer), + err); + if(U_FAILURE(*err)) { + goto error; + } + + if (likelySubtags != NULL) { + /* Always use the language tag from the + maximal string, since it may be more + specific than the one provided. */ + createTagStringWithAlternates( + NULL, + 0, + NULL, + 0, + region, + regionLength, + variants, + variantsLength, + likelySubtags, + sink, + err); + return TRUE; + } + } + + /** + * Try the language with just the region. + **/ + if (regionLength > 0) { + + const char* likelySubtags = NULL; + + icu::CharString tagBuffer; + { + icu::CharStringByteSink sink(&tagBuffer); + createTagString( + lang, + langLength, + NULL, + 0, + region, + regionLength, + NULL, + 0, + sink, + err); + } + if(U_FAILURE(*err)) { + goto error; + } + + likelySubtags = + findLikelySubtags( + tagBuffer.data(), + likelySubtagsBuffer, + sizeof(likelySubtagsBuffer), + err); + if(U_FAILURE(*err)) { + goto error; + } + + if (likelySubtags != NULL) { + /* Always use the language tag from the + maximal string, since it may be more + specific than the one provided. */ + createTagStringWithAlternates( + NULL, + 0, + script, + scriptLength, + NULL, + 0, + variants, + variantsLength, + likelySubtags, + sink, + err); + return TRUE; + } + } + + /** + * Finally, try just the language. + **/ + { + const char* likelySubtags = NULL; + + icu::CharString tagBuffer; + { + icu::CharStringByteSink sink(&tagBuffer); + createTagString( + lang, + langLength, + NULL, + 0, + NULL, + 0, + NULL, + 0, + sink, + err); + } + if(U_FAILURE(*err)) { + goto error; + } + + likelySubtags = + findLikelySubtags( + tagBuffer.data(), + likelySubtagsBuffer, + sizeof(likelySubtagsBuffer), + err); + if(U_FAILURE(*err)) { + goto error; + } + + if (likelySubtags != NULL) { + /* Always use the language tag from the + maximal string, since it may be more + specific than the one provided. */ + createTagStringWithAlternates( + NULL, + 0, + script, + scriptLength, + region, + regionLength, + variants, + variantsLength, + likelySubtags, + sink, + err); + return TRUE; + } + } + + return FALSE; + +error: + + if (!U_FAILURE(*err)) { + *err = U_ILLEGAL_ARGUMENT_ERROR; + } + + return FALSE; +} + +#define CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength) UPRV_BLOCK_MACRO_BEGIN { \ + int32_t count = 0; \ + int32_t i; \ + for (i = 0; i < trailingLength; i++) { \ + if (trailing[i] == '-' || trailing[i] == '_') { \ + count = 0; \ + if (count > 8) { \ + goto error; \ + } \ + } else if (trailing[i] == '@') { \ + break; \ + } else if (count > 8) { \ + goto error; \ + } else { \ + count++; \ + } \ + } \ +} UPRV_BLOCK_MACRO_END + +static UBool +_uloc_addLikelySubtags(const char* localeID, + icu::ByteSink& sink, + UErrorCode* err) { + char lang[ULOC_LANG_CAPACITY]; + int32_t langLength = sizeof(lang); + char script[ULOC_SCRIPT_CAPACITY]; + int32_t scriptLength = sizeof(script); + char region[ULOC_COUNTRY_CAPACITY]; + int32_t regionLength = sizeof(region); + const char* trailing = ""; + int32_t trailingLength = 0; + int32_t trailingIndex = 0; + UBool success = FALSE; + + if(U_FAILURE(*err)) { + goto error; + } + if (localeID == NULL) { + goto error; + } + + trailingIndex = parseTagString( + localeID, + lang, + &langLength, + script, + &scriptLength, + region, + ®ionLength, + err); + if(U_FAILURE(*err)) { + /* Overflow indicates an illegal argument error */ + if (*err == U_BUFFER_OVERFLOW_ERROR) { + *err = U_ILLEGAL_ARGUMENT_ERROR; + } + + goto error; + } + + /* Find the length of the trailing portion. */ + while (_isIDSeparator(localeID[trailingIndex])) { + trailingIndex++; + } + trailing = &localeID[trailingIndex]; + trailingLength = (int32_t)uprv_strlen(trailing); + + CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength); + + success = + createLikelySubtagsString( + lang, + langLength, + script, + scriptLength, + region, + regionLength, + trailing, + trailingLength, + sink, + err); + + if (!success) { + const int32_t localIDLength = (int32_t)uprv_strlen(localeID); + + /* + * If we get here, we need to return localeID. + */ + sink.Append(localeID, localIDLength); + } + + return success; + +error: + + if (!U_FAILURE(*err)) { + *err = U_ILLEGAL_ARGUMENT_ERROR; + } + return FALSE; +} + +// Add likely subtags to the sink +// return true if the value in the sink is produced by a match during the lookup +// return false if the value in the sink is the same as input because there are +// no match after the lookup. +static UBool _ulocimp_addLikelySubtags(const char*, icu::ByteSink&, UErrorCode*); + +static void +_uloc_minimizeSubtags(const char* localeID, + icu::ByteSink& sink, + UErrorCode* err) { + icu::CharString maximizedTagBuffer; + + char lang[ULOC_LANG_CAPACITY]; + int32_t langLength = sizeof(lang); + char script[ULOC_SCRIPT_CAPACITY]; + int32_t scriptLength = sizeof(script); + char region[ULOC_COUNTRY_CAPACITY]; + int32_t regionLength = sizeof(region); + const char* trailing = ""; + int32_t trailingLength = 0; + int32_t trailingIndex = 0; + UBool successGetMax = FALSE; + + if(U_FAILURE(*err)) { + goto error; + } + else if (localeID == NULL) { + goto error; + } + + trailingIndex = + parseTagString( + localeID, + lang, + &langLength, + script, + &scriptLength, + region, + ®ionLength, + err); + if(U_FAILURE(*err)) { + + /* Overflow indicates an illegal argument error */ + if (*err == U_BUFFER_OVERFLOW_ERROR) { + *err = U_ILLEGAL_ARGUMENT_ERROR; + } + + goto error; + } + + /* Find the spot where the variants or the keywords begin, if any. */ + while (_isIDSeparator(localeID[trailingIndex])) { + trailingIndex++; + } + trailing = &localeID[trailingIndex]; + trailingLength = (int32_t)uprv_strlen(trailing); + + CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength); + + { + icu::CharString base; + { + icu::CharStringByteSink baseSink(&base); + createTagString( + lang, + langLength, + script, + scriptLength, + region, + regionLength, + NULL, + 0, + baseSink, + err); + } + + /** + * First, we need to first get the maximization + * from AddLikelySubtags. + **/ + { + icu::CharStringByteSink maxSink(&maximizedTagBuffer); + successGetMax = _ulocimp_addLikelySubtags(base.data(), maxSink, err); + } + } + + if(U_FAILURE(*err)) { + goto error; + } + + if (!successGetMax) { + /** + * If we got here, return the locale ID parameter unchanged. + **/ + const int32_t localeIDLength = (int32_t)uprv_strlen(localeID); + sink.Append(localeID, localeIDLength); + return; + } + + // In the following, the lang, script, region are referring to those in + // the maximizedTagBuffer, not the one in the localeID. + langLength = sizeof(lang); + scriptLength = sizeof(script); + regionLength = sizeof(region); + parseTagString( + maximizedTagBuffer.data(), + lang, + &langLength, + script, + &scriptLength, + region, + ®ionLength, + err); + if(U_FAILURE(*err)) { + goto error; + } + + /** + * Start first with just the language. + **/ + { + icu::CharString tagBuffer; + { + icu::CharStringByteSink tagSink(&tagBuffer); + createLikelySubtagsString( + lang, + langLength, + NULL, + 0, + NULL, + 0, + NULL, + 0, + tagSink, + err); + } + + if(U_FAILURE(*err)) { + goto error; + } + else if (!tagBuffer.isEmpty() && + uprv_strnicmp( + maximizedTagBuffer.data(), + tagBuffer.data(), + tagBuffer.length()) == 0) { + + createTagString( + lang, + langLength, + NULL, + 0, + NULL, + 0, + trailing, + trailingLength, + sink, + err); + return; + } + } + + /** + * Next, try the language and region. + **/ + if (regionLength > 0) { + + icu::CharString tagBuffer; + { + icu::CharStringByteSink tagSink(&tagBuffer); + createLikelySubtagsString( + lang, + langLength, + NULL, + 0, + region, + regionLength, + NULL, + 0, + tagSink, + err); + } + + if(U_FAILURE(*err)) { + goto error; + } + else if (!tagBuffer.isEmpty() && + uprv_strnicmp( + maximizedTagBuffer.data(), + tagBuffer.data(), + tagBuffer.length()) == 0) { + + createTagString( + lang, + langLength, + NULL, + 0, + region, + regionLength, + trailing, + trailingLength, + sink, + err); + return; + } + } + + /** + * Finally, try the language and script. This is our last chance, + * since trying with all three subtags would only yield the + * maximal version that we already have. + **/ + if (scriptLength > 0) { + icu::CharString tagBuffer; + { + icu::CharStringByteSink tagSink(&tagBuffer); + createLikelySubtagsString( + lang, + langLength, + script, + scriptLength, + NULL, + 0, + NULL, + 0, + tagSink, + err); + } + + if(U_FAILURE(*err)) { + goto error; + } + else if (!tagBuffer.isEmpty() && + uprv_strnicmp( + maximizedTagBuffer.data(), + tagBuffer.data(), + tagBuffer.length()) == 0) { + + createTagString( + lang, + langLength, + script, + scriptLength, + NULL, + 0, + trailing, + trailingLength, + sink, + err); + return; + } + } + + { + /** + * If we got here, return the max + trail. + **/ + createTagString( + lang, + langLength, + script, + scriptLength, + region, + regionLength, + trailing, + trailingLength, + sink, + err); + return; + } + +error: + + if (!U_FAILURE(*err)) { + *err = U_ILLEGAL_ARGUMENT_ERROR; + } +} + +static UBool +do_canonicalize(const char* localeID, + char* buffer, + int32_t bufferCapacity, + UErrorCode* err) +{ + uloc_canonicalize( + localeID, + buffer, + bufferCapacity, + err); + + if (*err == U_STRING_NOT_TERMINATED_WARNING || + *err == U_BUFFER_OVERFLOW_ERROR) { + *err = U_ILLEGAL_ARGUMENT_ERROR; + + return FALSE; + } + else if (U_FAILURE(*err)) { + + return FALSE; + } + else { + return TRUE; + } +} + +U_CAPI int32_t U_EXPORT2 +uloc_addLikelySubtags(const char* localeID, + char* maximizedLocaleID, + int32_t maximizedLocaleIDCapacity, + UErrorCode* status) { + if (U_FAILURE(*status)) { + return 0; + } + + icu::CheckedArrayByteSink sink( + maximizedLocaleID, maximizedLocaleIDCapacity); + + ulocimp_addLikelySubtags(localeID, sink, status); + int32_t reslen = sink.NumberOfBytesAppended(); + + if (U_FAILURE(*status)) { + return sink.Overflowed() ? reslen : -1; + } + + if (sink.Overflowed()) { + *status = U_BUFFER_OVERFLOW_ERROR; + } else { + u_terminateChars( + maximizedLocaleID, maximizedLocaleIDCapacity, reslen, status); + } + + return reslen; +} + +static UBool +_ulocimp_addLikelySubtags(const char* localeID, + icu::ByteSink& sink, + UErrorCode* status) { + char localeBuffer[ULOC_FULLNAME_CAPACITY]; + + if (do_canonicalize(localeID, localeBuffer, sizeof localeBuffer, status)) { + return _uloc_addLikelySubtags(localeBuffer, sink, status); + } + return FALSE; +} + +U_CAPI void U_EXPORT2 +ulocimp_addLikelySubtags(const char* localeID, + icu::ByteSink& sink, + UErrorCode* status) { + _ulocimp_addLikelySubtags(localeID, sink, status); +} + +U_CAPI int32_t U_EXPORT2 +uloc_minimizeSubtags(const char* localeID, + char* minimizedLocaleID, + int32_t minimizedLocaleIDCapacity, + UErrorCode* status) { + if (U_FAILURE(*status)) { + return 0; + } + + icu::CheckedArrayByteSink sink( + minimizedLocaleID, minimizedLocaleIDCapacity); + + ulocimp_minimizeSubtags(localeID, sink, status); + int32_t reslen = sink.NumberOfBytesAppended(); + + if (U_FAILURE(*status)) { + return sink.Overflowed() ? reslen : -1; + } + + if (sink.Overflowed()) { + *status = U_BUFFER_OVERFLOW_ERROR; + } else { + u_terminateChars( + minimizedLocaleID, minimizedLocaleIDCapacity, reslen, status); + } + + return reslen; +} + +U_CAPI void U_EXPORT2 +ulocimp_minimizeSubtags(const char* localeID, + icu::ByteSink& sink, + UErrorCode* status) { + char localeBuffer[ULOC_FULLNAME_CAPACITY]; + + if (do_canonicalize(localeID, localeBuffer, sizeof localeBuffer, status)) { + _uloc_minimizeSubtags(localeBuffer, sink, status); + } +} + +// Pairs of (language subtag, + or -) for finding out fast if common languages +// are LTR (minus) or RTL (plus). +static const char LANG_DIR_STRING[] = + "root-en-es-pt-zh-ja-ko-de-fr-it-ar+he+fa+ru-nl-pl-th-tr-"; + +// Implemented here because this calls ulocimp_addLikelySubtags(). +U_CAPI UBool U_EXPORT2 +uloc_isRightToLeft(const char *locale) { + UErrorCode errorCode = U_ZERO_ERROR; + char script[8]; + int32_t scriptLength = uloc_getScript(locale, script, UPRV_LENGTHOF(script), &errorCode); + if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING || + scriptLength == 0) { + // Fastpath: We know the likely scripts and their writing direction + // for some common languages. + errorCode = U_ZERO_ERROR; + char lang[8]; + int32_t langLength = uloc_getLanguage(locale, lang, UPRV_LENGTHOF(lang), &errorCode); + if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) { + return FALSE; + } + if (langLength > 0) { + const char* langPtr = uprv_strstr(LANG_DIR_STRING, lang); + if (langPtr != NULL) { + switch (langPtr[langLength]) { + case '-': return FALSE; + case '+': return TRUE; + default: break; // partial match of a longer code + } + } + } + // Otherwise, find the likely script. + errorCode = U_ZERO_ERROR; + icu::CharString likely; + { + icu::CharStringByteSink sink(&likely); + ulocimp_addLikelySubtags(locale, sink, &errorCode); + } + if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) { + return FALSE; + } + scriptLength = uloc_getScript(likely.data(), script, UPRV_LENGTHOF(script), &errorCode); + if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING || + scriptLength == 0) { + return FALSE; + } + } + UScriptCode scriptCode = (UScriptCode)u_getPropertyValueEnum(UCHAR_SCRIPT, script); + return uscript_isRightToLeft(scriptCode); +} + +U_NAMESPACE_BEGIN + +UBool +Locale::isRightToLeft() const { + return uloc_isRightToLeft(getBaseName()); +} + +U_NAMESPACE_END + +// The following must at least allow for rg key value (6) plus terminator (1). +#define ULOC_RG_BUFLEN 8 + +U_CAPI int32_t U_EXPORT2 +ulocimp_getRegionForSupplementalData(const char *localeID, UBool inferRegion, + char *region, int32_t regionCapacity, UErrorCode* status) { + if (U_FAILURE(*status)) { + return 0; + } + char rgBuf[ULOC_RG_BUFLEN]; + UErrorCode rgStatus = U_ZERO_ERROR; + + // First check for rg keyword value + int32_t rgLen = uloc_getKeywordValue(localeID, "rg", rgBuf, ULOC_RG_BUFLEN, &rgStatus); + if (U_FAILURE(rgStatus) || rgLen != 6) { + rgLen = 0; + } else { + // rgBuf guaranteed to be zero terminated here, with text len 6 + char *rgPtr = rgBuf; + for (; *rgPtr!= 0; rgPtr++) { + *rgPtr = uprv_toupper(*rgPtr); + } + rgLen = (uprv_strcmp(rgBuf+2, "ZZZZ") == 0)? 2: 0; + } + + if (rgLen == 0) { + // No valid rg keyword value, try for unicode_region_subtag + rgLen = uloc_getCountry(localeID, rgBuf, ULOC_RG_BUFLEN, status); + if (U_FAILURE(*status)) { + rgLen = 0; + } else if (rgLen == 0 && inferRegion) { + // no unicode_region_subtag but inferRegion TRUE, try likely subtags + rgStatus = U_ZERO_ERROR; + icu::CharString locBuf; + { + icu::CharStringByteSink sink(&locBuf); + ulocimp_addLikelySubtags(localeID, sink, &rgStatus); + } + if (U_SUCCESS(rgStatus)) { + rgLen = uloc_getCountry(locBuf.data(), rgBuf, ULOC_RG_BUFLEN, status); + if (U_FAILURE(*status)) { + rgLen = 0; + } + } + } + } + + rgBuf[rgLen] = 0; + uprv_strncpy(region, rgBuf, regionCapacity); + return u_terminateChars(region, regionCapacity, rgLen, status); +} + diff --git a/thirdparty/icu4c/common/loclikelysubtags.cpp b/thirdparty/icu4c/common/loclikelysubtags.cpp new file mode 100644 index 0000000000..a031bfa587 --- /dev/null +++ b/thirdparty/icu4c/common/loclikelysubtags.cpp @@ -0,0 +1,682 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// loclikelysubtags.cpp +// created: 2019may08 Markus W. Scherer + +#include <utility> +#include "unicode/utypes.h" +#include "unicode/bytestrie.h" +#include "unicode/localpointer.h" +#include "unicode/locid.h" +#include "unicode/uobject.h" +#include "unicode/ures.h" +#include "charstr.h" +#include "cstring.h" +#include "loclikelysubtags.h" +#include "lsr.h" +#include "uassert.h" +#include "ucln_cmn.h" +#include "uhash.h" +#include "uinvchar.h" +#include "umutex.h" +#include "uniquecharstr.h" +#include "uresdata.h" +#include "uresimp.h" + +U_NAMESPACE_BEGIN + +namespace { + +constexpr char PSEUDO_ACCENTS_PREFIX = '\''; // -XA, -PSACCENT +constexpr char PSEUDO_BIDI_PREFIX = '+'; // -XB, -PSBIDI +constexpr char PSEUDO_CRACKED_PREFIX = ','; // -XC, -PSCRACK + +} // namespace + +LocaleDistanceData::LocaleDistanceData(LocaleDistanceData &&data) : + distanceTrieBytes(data.distanceTrieBytes), + regionToPartitions(data.regionToPartitions), + partitions(data.partitions), + paradigms(data.paradigms), paradigmsLength(data.paradigmsLength), + distances(data.distances) { + data.partitions = nullptr; + data.paradigms = nullptr; +} + +LocaleDistanceData::~LocaleDistanceData() { + uprv_free(partitions); + delete[] paradigms; +} + +// TODO(ICU-20777): Rename to just LikelySubtagsData. +struct XLikelySubtagsData { + UResourceBundle *langInfoBundle = nullptr; + UniqueCharStrings strings; + CharStringMap languageAliases; + CharStringMap regionAliases; + const uint8_t *trieBytes = nullptr; + LSR *lsrs = nullptr; + int32_t lsrsLength = 0; + + LocaleDistanceData distanceData; + + XLikelySubtagsData(UErrorCode &errorCode) : strings(errorCode) {} + + ~XLikelySubtagsData() { + ures_close(langInfoBundle); + delete[] lsrs; + } + + void load(UErrorCode &errorCode) { + langInfoBundle = ures_openDirect(nullptr, "langInfo", &errorCode); + if (U_FAILURE(errorCode)) { return; } + StackUResourceBundle stackTempBundle; + ResourceDataValue value; + ures_getValueWithFallback(langInfoBundle, "likely", stackTempBundle.getAlias(), + value, errorCode); + ResourceTable likelyTable = value.getTable(errorCode); + if (U_FAILURE(errorCode)) { return; } + + // Read all strings in the resource bundle and convert them to invariant char *. + LocalMemory<int32_t> languageIndexes, regionIndexes, lsrSubtagIndexes; + int32_t languagesLength = 0, regionsLength = 0, lsrSubtagsLength = 0; + if (!readStrings(likelyTable, "languageAliases", value, + languageIndexes, languagesLength, errorCode) || + !readStrings(likelyTable, "regionAliases", value, + regionIndexes, regionsLength, errorCode) || + !readStrings(likelyTable, "lsrs", value, + lsrSubtagIndexes,lsrSubtagsLength, errorCode)) { + return; + } + if ((languagesLength & 1) != 0 || + (regionsLength & 1) != 0 || + (lsrSubtagsLength % 3) != 0) { + errorCode = U_INVALID_FORMAT_ERROR; + return; + } + if (lsrSubtagsLength == 0) { + errorCode = U_MISSING_RESOURCE_ERROR; + return; + } + + if (!likelyTable.findValue("trie", value)) { + errorCode = U_MISSING_RESOURCE_ERROR; + return; + } + int32_t length; + trieBytes = value.getBinary(length, errorCode); + if (U_FAILURE(errorCode)) { return; } + + // Also read distance/matcher data if available, + // to open & keep only one resource bundle pointer + // and to use one single UniqueCharStrings. + UErrorCode matchErrorCode = U_ZERO_ERROR; + ures_getValueWithFallback(langInfoBundle, "match", stackTempBundle.getAlias(), + value, matchErrorCode); + LocalMemory<int32_t> partitionIndexes, paradigmSubtagIndexes; + int32_t partitionsLength = 0, paradigmSubtagsLength = 0; + if (U_SUCCESS(matchErrorCode)) { + ResourceTable matchTable = value.getTable(errorCode); + if (U_FAILURE(errorCode)) { return; } + + if (matchTable.findValue("trie", value)) { + distanceData.distanceTrieBytes = value.getBinary(length, errorCode); + if (U_FAILURE(errorCode)) { return; } + } + + if (matchTable.findValue("regionToPartitions", value)) { + distanceData.regionToPartitions = value.getBinary(length, errorCode); + if (U_FAILURE(errorCode)) { return; } + if (length < LSR::REGION_INDEX_LIMIT) { + errorCode = U_INVALID_FORMAT_ERROR; + return; + } + } + + if (!readStrings(matchTable, "partitions", value, + partitionIndexes, partitionsLength, errorCode) || + !readStrings(matchTable, "paradigms", value, + paradigmSubtagIndexes, paradigmSubtagsLength, errorCode)) { + return; + } + if ((paradigmSubtagsLength % 3) != 0) { + errorCode = U_INVALID_FORMAT_ERROR; + return; + } + + if (matchTable.findValue("distances", value)) { + distanceData.distances = value.getIntVector(length, errorCode); + if (U_FAILURE(errorCode)) { return; } + if (length < 4) { // LocaleDistance IX_LIMIT + errorCode = U_INVALID_FORMAT_ERROR; + return; + } + } + } else if (matchErrorCode == U_MISSING_RESOURCE_ERROR) { + // ok for likely subtags + } else { // error other than missing resource + errorCode = matchErrorCode; + return; + } + + // Fetch & store invariant-character versions of strings + // only after we have collected and de-duplicated all of them. + strings.freeze(); + + languageAliases = CharStringMap(languagesLength / 2, errorCode); + for (int32_t i = 0; i < languagesLength; i += 2) { + languageAliases.put(strings.get(languageIndexes[i]), + strings.get(languageIndexes[i + 1]), errorCode); + } + + regionAliases = CharStringMap(regionsLength / 2, errorCode); + for (int32_t i = 0; i < regionsLength; i += 2) { + regionAliases.put(strings.get(regionIndexes[i]), + strings.get(regionIndexes[i + 1]), errorCode); + } + if (U_FAILURE(errorCode)) { return; } + + lsrsLength = lsrSubtagsLength / 3; + lsrs = new LSR[lsrsLength]; + if (lsrs == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return; + } + for (int32_t i = 0, j = 0; i < lsrSubtagsLength; i += 3, ++j) { + lsrs[j] = LSR(strings.get(lsrSubtagIndexes[i]), + strings.get(lsrSubtagIndexes[i + 1]), + strings.get(lsrSubtagIndexes[i + 2]), + LSR::IMPLICIT_LSR); + } + + if (partitionsLength > 0) { + distanceData.partitions = static_cast<const char **>( + uprv_malloc(partitionsLength * sizeof(const char *))); + if (distanceData.partitions == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return; + } + for (int32_t i = 0; i < partitionsLength; ++i) { + distanceData.partitions[i] = strings.get(partitionIndexes[i]); + } + } + + if (paradigmSubtagsLength > 0) { + distanceData.paradigmsLength = paradigmSubtagsLength / 3; + LSR *paradigms = new LSR[distanceData.paradigmsLength]; + if (paradigms == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return; + } + for (int32_t i = 0, j = 0; i < paradigmSubtagsLength; i += 3, ++j) { + paradigms[j] = LSR(strings.get(paradigmSubtagIndexes[i]), + strings.get(paradigmSubtagIndexes[i + 1]), + strings.get(paradigmSubtagIndexes[i + 2]), + LSR::DONT_CARE_FLAGS); + } + distanceData.paradigms = paradigms; + } + } + +private: + bool readStrings(const ResourceTable &table, const char *key, ResourceValue &value, + LocalMemory<int32_t> &indexes, int32_t &length, UErrorCode &errorCode) { + if (table.findValue(key, value)) { + ResourceArray stringArray = value.getArray(errorCode); + if (U_FAILURE(errorCode)) { return false; } + length = stringArray.getSize(); + if (length == 0) { return true; } + int32_t *rawIndexes = indexes.allocateInsteadAndCopy(length); + if (rawIndexes == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return false; + } + for (int i = 0; i < length; ++i) { + stringArray.getValue(i, value); // returns TRUE because i < length + rawIndexes[i] = strings.add(value.getUnicodeString(errorCode), errorCode); + if (U_FAILURE(errorCode)) { return false; } + } + } + return true; + } +}; + +namespace { + +XLikelySubtags *gLikelySubtags = nullptr; +UInitOnce gInitOnce = U_INITONCE_INITIALIZER; + +UBool U_CALLCONV cleanup() { + delete gLikelySubtags; + gLikelySubtags = nullptr; + gInitOnce.reset(); + return TRUE; +} + +} // namespace + +void U_CALLCONV XLikelySubtags::initLikelySubtags(UErrorCode &errorCode) { + // This function is invoked only via umtx_initOnce(). + U_ASSERT(gLikelySubtags == nullptr); + XLikelySubtagsData data(errorCode); + data.load(errorCode); + if (U_FAILURE(errorCode)) { return; } + gLikelySubtags = new XLikelySubtags(data); + if (gLikelySubtags == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return; + } + ucln_common_registerCleanup(UCLN_COMMON_LIKELY_SUBTAGS, cleanup); +} + +const XLikelySubtags *XLikelySubtags::getSingleton(UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { return nullptr; } + umtx_initOnce(gInitOnce, &XLikelySubtags::initLikelySubtags, errorCode); + return gLikelySubtags; +} + +XLikelySubtags::XLikelySubtags(XLikelySubtagsData &data) : + langInfoBundle(data.langInfoBundle), + strings(data.strings.orphanCharStrings()), + languageAliases(std::move(data.languageAliases)), + regionAliases(std::move(data.regionAliases)), + trie(data.trieBytes), + lsrs(data.lsrs), +#if U_DEBUG + lsrsLength(data.lsrsLength), +#endif + distanceData(std::move(data.distanceData)) { + data.langInfoBundle = nullptr; + data.lsrs = nullptr; + + // Cache the result of looking up language="und" encoded as "*", and "und-Zzzz" ("**"). + UStringTrieResult result = trie.next(u'*'); + U_ASSERT(USTRINGTRIE_HAS_NEXT(result)); + trieUndState = trie.getState64(); + result = trie.next(u'*'); + U_ASSERT(USTRINGTRIE_HAS_NEXT(result)); + trieUndZzzzState = trie.getState64(); + result = trie.next(u'*'); + U_ASSERT(USTRINGTRIE_HAS_VALUE(result)); + defaultLsrIndex = trie.getValue(); + trie.reset(); + + for (char16_t c = u'a'; c <= u'z'; ++c) { + result = trie.next(c); + if (result == USTRINGTRIE_NO_VALUE) { + trieFirstLetterStates[c - u'a'] = trie.getState64(); + } + trie.reset(); + } +} + +XLikelySubtags::~XLikelySubtags() { + ures_close(langInfoBundle); + delete strings; + delete[] lsrs; +} + +LSR XLikelySubtags::makeMaximizedLsrFrom(const Locale &locale, UErrorCode &errorCode) const { + const char *name = locale.getName(); + if (uprv_isAtSign(name[0]) && name[1] == 'x' && name[2] == '=') { // name.startsWith("@x=") + // Private use language tag x-subtag-subtag... + return LSR(name, "", "", LSR::EXPLICIT_LSR); + } + return makeMaximizedLsr(locale.getLanguage(), locale.getScript(), locale.getCountry(), + locale.getVariant(), errorCode); +} + +namespace { + +const char *getCanonical(const CharStringMap &aliases, const char *alias) { + const char *canonical = aliases.get(alias); + return canonical == nullptr ? alias : canonical; +} + +} // namespace + +LSR XLikelySubtags::makeMaximizedLsr(const char *language, const char *script, const char *region, + const char *variant, UErrorCode &errorCode) const { + // Handle pseudolocales like en-XA, ar-XB, fr-PSCRACK. + // They should match only themselves, + // not other locales with what looks like the same language and script subtags. + char c1; + if (region[0] == 'X' && (c1 = region[1]) != 0 && region[2] == 0) { + switch (c1) { + case 'A': + return LSR(PSEUDO_ACCENTS_PREFIX, language, script, region, + LSR::EXPLICIT_LSR, errorCode); + case 'B': + return LSR(PSEUDO_BIDI_PREFIX, language, script, region, + LSR::EXPLICIT_LSR, errorCode); + case 'C': + return LSR(PSEUDO_CRACKED_PREFIX, language, script, region, + LSR::EXPLICIT_LSR, errorCode); + default: // normal locale + break; + } + } + + if (variant[0] == 'P' && variant[1] == 'S') { + int32_t lsrFlags = *region == 0 ? + LSR::EXPLICIT_LANGUAGE | LSR::EXPLICIT_SCRIPT : LSR::EXPLICIT_LSR; + if (uprv_strcmp(variant, "PSACCENT") == 0) { + return LSR(PSEUDO_ACCENTS_PREFIX, language, script, + *region == 0 ? "XA" : region, lsrFlags, errorCode); + } else if (uprv_strcmp(variant, "PSBIDI") == 0) { + return LSR(PSEUDO_BIDI_PREFIX, language, script, + *region == 0 ? "XB" : region, lsrFlags, errorCode); + } else if (uprv_strcmp(variant, "PSCRACK") == 0) { + return LSR(PSEUDO_CRACKED_PREFIX, language, script, + *region == 0 ? "XC" : region, lsrFlags, errorCode); + } + // else normal locale + } + + language = getCanonical(languageAliases, language); + // (We have no script mappings.) + region = getCanonical(regionAliases, region); + return maximize(language, script, region); +} + +LSR XLikelySubtags::maximize(const char *language, const char *script, const char *region) const { + if (uprv_strcmp(language, "und") == 0) { + language = ""; + } + if (uprv_strcmp(script, "Zzzz") == 0) { + script = ""; + } + if (uprv_strcmp(region, "ZZ") == 0) { + region = ""; + } + if (*script != 0 && *region != 0 && *language != 0) { + return LSR(language, script, region, LSR::EXPLICIT_LSR); // already maximized + } + + uint32_t retainOldMask = 0; + BytesTrie iter(trie); + uint64_t state; + int32_t value; + // Small optimization: Array lookup for first language letter. + int32_t c0; + if (0 <= (c0 = uprv_lowerOrdinal(language[0])) && c0 <= 25 && + language[1] != 0 && // language.length() >= 2 + (state = trieFirstLetterStates[c0]) != 0) { + value = trieNext(iter.resetToState64(state), language, 1); + } else { + value = trieNext(iter, language, 0); + } + if (value >= 0) { + if (*language != 0) { + retainOldMask |= 4; + } + state = iter.getState64(); + } else { + retainOldMask |= 4; + iter.resetToState64(trieUndState); // "und" ("*") + state = 0; + } + + if (value > 0) { + // Intermediate or final value from just language. + if (value == SKIP_SCRIPT) { + value = 0; + } + if (*script != 0) { + retainOldMask |= 2; + } + } else { + value = trieNext(iter, script, 0); + if (value >= 0) { + if (*script != 0) { + retainOldMask |= 2; + } + state = iter.getState64(); + } else { + retainOldMask |= 2; + if (state == 0) { + iter.resetToState64(trieUndZzzzState); // "und-Zzzz" ("**") + } else { + iter.resetToState64(state); + value = trieNext(iter, "", 0); + U_ASSERT(value >= 0); + state = iter.getState64(); + } + } + } + + if (value > 0) { + // Final value from just language or language+script. + if (*region != 0) { + retainOldMask |= 1; + } + } else { + value = trieNext(iter, region, 0); + if (value >= 0) { + if (*region != 0) { + retainOldMask |= 1; + } + } else { + retainOldMask |= 1; + if (state == 0) { + value = defaultLsrIndex; + } else { + iter.resetToState64(state); + value = trieNext(iter, "", 0); + U_ASSERT(value > 0); + } + } + } + U_ASSERT(value < lsrsLength); + const LSR &result = lsrs[value]; + + if (*language == 0) { + language = "und"; + } + + if (retainOldMask == 0) { + // Quickly return a copy of the lookup-result LSR + // without new allocation of the subtags. + return LSR(result.language, result.script, result.region, result.flags); + } + if ((retainOldMask & 4) == 0) { + language = result.language; + } + if ((retainOldMask & 2) == 0) { + script = result.script; + } + if ((retainOldMask & 1) == 0) { + region = result.region; + } + // retainOldMask flags = LSR explicit-subtag flags + return LSR(language, script, region, retainOldMask); +} + +int32_t XLikelySubtags::compareLikely(const LSR &lsr, const LSR &other, int32_t likelyInfo) const { + // If likelyInfo >= 0: + // likelyInfo bit 1 is set if the previous comparison with lsr + // was for equal language and script. + // Otherwise the scripts differed. + if (uprv_strcmp(lsr.language, other.language) != 0) { + return 0xfffffffc; // negative, lsr not better than other + } + if (uprv_strcmp(lsr.script, other.script) != 0) { + int32_t index; + if (likelyInfo >= 0 && (likelyInfo & 2) == 0) { + index = likelyInfo >> 2; + } else { + index = getLikelyIndex(lsr.language, ""); + likelyInfo = index << 2; + } + const LSR &likely = lsrs[index]; + if (uprv_strcmp(lsr.script, likely.script) == 0) { + return likelyInfo | 1; + } else { + return likelyInfo & ~1; + } + } + if (uprv_strcmp(lsr.region, other.region) != 0) { + int32_t index; + if (likelyInfo >= 0 && (likelyInfo & 2) != 0) { + index = likelyInfo >> 2; + } else { + index = getLikelyIndex(lsr.language, lsr.region); + likelyInfo = (index << 2) | 2; + } + const LSR &likely = lsrs[index]; + if (uprv_strcmp(lsr.region, likely.region) == 0) { + return likelyInfo | 1; + } else { + return likelyInfo & ~1; + } + } + return likelyInfo & ~1; // lsr not better than other +} + +// Subset of maximize(). +int32_t XLikelySubtags::getLikelyIndex(const char *language, const char *script) const { + if (uprv_strcmp(language, "und") == 0) { + language = ""; + } + if (uprv_strcmp(script, "Zzzz") == 0) { + script = ""; + } + + BytesTrie iter(trie); + uint64_t state; + int32_t value; + // Small optimization: Array lookup for first language letter. + int32_t c0; + if (0 <= (c0 = uprv_lowerOrdinal(language[0])) && c0 <= 25 && + language[1] != 0 && // language.length() >= 2 + (state = trieFirstLetterStates[c0]) != 0) { + value = trieNext(iter.resetToState64(state), language, 1); + } else { + value = trieNext(iter, language, 0); + } + if (value >= 0) { + state = iter.getState64(); + } else { + iter.resetToState64(trieUndState); // "und" ("*") + state = 0; + } + + if (value > 0) { + // Intermediate or final value from just language. + if (value == SKIP_SCRIPT) { + value = 0; + } + } else { + value = trieNext(iter, script, 0); + if (value >= 0) { + state = iter.getState64(); + } else { + if (state == 0) { + iter.resetToState64(trieUndZzzzState); // "und-Zzzz" ("**") + } else { + iter.resetToState64(state); + value = trieNext(iter, "", 0); + U_ASSERT(value >= 0); + state = iter.getState64(); + } + } + } + + if (value > 0) { + // Final value from just language or language+script. + } else { + value = trieNext(iter, "", 0); + U_ASSERT(value > 0); + } + U_ASSERT(value < lsrsLength); + return value; +} + +int32_t XLikelySubtags::trieNext(BytesTrie &iter, const char *s, int32_t i) { + UStringTrieResult result; + uint8_t c; + if ((c = s[i]) == 0) { + result = iter.next(u'*'); + } else { + for (;;) { + c = uprv_invCharToAscii(c); + // EBCDIC: If s[i] is not an invariant character, + // then c is now 0 and will simply not match anything, which is harmless. + uint8_t next = s[++i]; + if (next != 0) { + if (!USTRINGTRIE_HAS_NEXT(iter.next(c))) { + return -1; + } + } else { + // last character of this subtag + result = iter.next(c | 0x80); + break; + } + c = next; + } + } + switch (result) { + case USTRINGTRIE_NO_MATCH: return -1; + case USTRINGTRIE_NO_VALUE: return 0; + case USTRINGTRIE_INTERMEDIATE_VALUE: + U_ASSERT(iter.getValue() == SKIP_SCRIPT); + return SKIP_SCRIPT; + case USTRINGTRIE_FINAL_VALUE: return iter.getValue(); + default: return -1; + } +} + +// TODO(ICU-20777): Switch Locale/uloc_ likely-subtags API from the old code +// in loclikely.cpp to this new code, including activating this +// minimizeSubtags() function. The LocaleMatcher does not minimize. +#if 0 +LSR XLikelySubtags::minimizeSubtags(const char *languageIn, const char *scriptIn, + const char *regionIn, ULocale.Minimize fieldToFavor, + UErrorCode &errorCode) const { + LSR result = maximize(languageIn, scriptIn, regionIn); + + // We could try just a series of checks, like: + // LSR result2 = addLikelySubtags(languageIn, "", ""); + // if result.equals(result2) return result2; + // However, we can optimize 2 of the cases: + // (languageIn, "", "") + // (languageIn, "", regionIn) + + // value00 = lookup(result.language, "", "") + BytesTrie iter = new BytesTrie(trie); + int value = trieNext(iter, result.language, 0); + U_ASSERT(value >= 0); + if (value == 0) { + value = trieNext(iter, "", 0); + U_ASSERT(value >= 0); + if (value == 0) { + value = trieNext(iter, "", 0); + } + } + U_ASSERT(value > 0); + LSR value00 = lsrs[value]; + boolean favorRegionOk = false; + if (result.script.equals(value00.script)) { //script is default + if (result.region.equals(value00.region)) { + return new LSR(result.language, "", "", LSR.DONT_CARE_FLAGS); + } else if (fieldToFavor == ULocale.Minimize.FAVOR_REGION) { + return new LSR(result.language, "", result.region, LSR.DONT_CARE_FLAGS); + } else { + favorRegionOk = true; + } + } + + // The last case is not as easy to optimize. + // Maybe do later, but for now use the straightforward code. + LSR result2 = maximize(languageIn, scriptIn, ""); + if (result2.equals(result)) { + return new LSR(result.language, result.script, "", LSR.DONT_CARE_FLAGS); + } else if (favorRegionOk) { + return new LSR(result.language, "", result.region, LSR.DONT_CARE_FLAGS); + } + return result; +} +#endif + +U_NAMESPACE_END diff --git a/thirdparty/icu4c/common/loclikelysubtags.h b/thirdparty/icu4c/common/loclikelysubtags.h new file mode 100644 index 0000000000..14a01a5eac --- /dev/null +++ b/thirdparty/icu4c/common/loclikelysubtags.h @@ -0,0 +1,121 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// loclikelysubtags.h +// created: 2019may08 Markus W. Scherer + +#ifndef __LOCLIKELYSUBTAGS_H__ +#define __LOCLIKELYSUBTAGS_H__ + +#include <utility> +#include "unicode/utypes.h" +#include "unicode/bytestrie.h" +#include "unicode/locid.h" +#include "unicode/uobject.h" +#include "unicode/ures.h" +#include "charstrmap.h" +#include "lsr.h" + +U_NAMESPACE_BEGIN + +struct XLikelySubtagsData; + +struct LocaleDistanceData { + LocaleDistanceData() = default; + LocaleDistanceData(LocaleDistanceData &&data); + ~LocaleDistanceData(); + + const uint8_t *distanceTrieBytes = nullptr; + const uint8_t *regionToPartitions = nullptr; + const char **partitions = nullptr; + const LSR *paradigms = nullptr; + int32_t paradigmsLength = 0; + const int32_t *distances = nullptr; + +private: + LocaleDistanceData &operator=(const LocaleDistanceData &) = delete; +}; + +// TODO(ICU-20777): Rename to just LikelySubtags. +class XLikelySubtags final : public UMemory { +public: + ~XLikelySubtags(); + + static constexpr int32_t SKIP_SCRIPT = 1; + + // VisibleForTesting + static const XLikelySubtags *getSingleton(UErrorCode &errorCode); + + // VisibleForTesting + LSR makeMaximizedLsrFrom(const Locale &locale, UErrorCode &errorCode) const; + + /** + * Tests whether lsr is "more likely" than other. + * For example, fr-Latn-FR is more likely than fr-Latn-CH because + * FR is the default region for fr-Latn. + * + * The likelyInfo caches lookup information between calls. + * The return value is an updated likelyInfo value, + * with bit 0 set if lsr is "more likely". + * The initial value of likelyInfo must be negative. + */ + int32_t compareLikely(const LSR &lsr, const LSR &other, int32_t likelyInfo) const; + + // TODO(ICU-20777): Switch Locale/uloc_ likely-subtags API from the old code + // in loclikely.cpp to this new code, including activating this + // minimizeSubtags() function. The LocaleMatcher does not minimize. +#if 0 + LSR minimizeSubtags(const char *languageIn, const char *scriptIn, const char *regionIn, + ULocale.Minimize fieldToFavor, UErrorCode &errorCode) const; +#endif + + // visible for LocaleDistance + const LocaleDistanceData &getDistanceData() const { return distanceData; } + +private: + XLikelySubtags(XLikelySubtagsData &data); + XLikelySubtags(const XLikelySubtags &other) = delete; + XLikelySubtags &operator=(const XLikelySubtags &other) = delete; + + static void initLikelySubtags(UErrorCode &errorCode); + + LSR makeMaximizedLsr(const char *language, const char *script, const char *region, + const char *variant, UErrorCode &errorCode) const; + + /** + * Raw access to addLikelySubtags. Input must be in canonical format, eg "en", not "eng" or "EN". + */ + LSR maximize(const char *language, const char *script, const char *region) const; + + int32_t getLikelyIndex(const char *language, const char *script) const; + + static int32_t trieNext(BytesTrie &iter, const char *s, int32_t i); + + UResourceBundle *langInfoBundle; + // We could store the strings by value, except that if there were few enough strings, + // moving the contents could copy it to a different array, + // invalidating the pointers stored in the maps. + CharString *strings; + CharStringMap languageAliases; + CharStringMap regionAliases; + + // The trie maps each lang+script+region (encoded in ASCII) to an index into lsrs. + // There is also a trie value for each intermediate lang and lang+script. + // '*' is used instead of "und", "Zzzz"/"" and "ZZ"/"". + BytesTrie trie; + uint64_t trieUndState; + uint64_t trieUndZzzzState; + int32_t defaultLsrIndex; + uint64_t trieFirstLetterStates[26]; + const LSR *lsrs; +#if U_DEBUG + int32_t lsrsLength; +#endif + + // distance/matcher data: see comment in XLikelySubtagsData::load() + LocaleDistanceData distanceData; +}; + +U_NAMESPACE_END + +#endif // __LOCLIKELYSUBTAGS_H__ diff --git a/thirdparty/icu4c/common/locmap.cpp b/thirdparty/icu4c/common/locmap.cpp new file mode 100644 index 0000000000..29a5646385 --- /dev/null +++ b/thirdparty/icu4c/common/locmap.cpp @@ -0,0 +1,1315 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* + ********************************************************************** + * Copyright (C) 1996-2016, International Business Machines + * Corporation and others. All Rights Reserved. + ********************************************************************** + * + * Provides functionality for mapping between + * LCID and Posix IDs or ICU locale to codepage + * + * Note: All classes and code in this file are + * intended for internal use only. + * + * Methods of interest: + * unsigned long convertToLCID(const char*); + * const char* convertToPosix(unsigned long); + * + * Kathleen Wilson, 4/30/96 + * + * Date Name Description + * 3/11/97 aliu Fixed off-by-one bug in assignment operator. Added + * setId() method and safety check against + * MAX_ID_LENGTH. + * 04/23/99 stephen Added C wrapper for convertToPosix. + * 09/18/00 george Removed the memory leaks. + * 08/23/01 george Convert to C + */ + +#include "locmap.h" +#include "bytesinkutil.h" +#include "charstr.h" +#include "cstring.h" +#include "cmemory.h" +#include "ulocimp.h" +#include "unicode/uloc.h" + +#if U_PLATFORM_HAS_WIN32_API && UCONFIG_USE_WINDOWS_LCID_MAPPING_API +#include <windows.h> +#include <winnls.h> // LCIDToLocaleName and LocaleNameToLCID +#endif + +/* + * Note: + * The mapping from Win32 locale ID numbers to POSIX locale strings should + * be the faster one. + * + * Windows LCIDs are defined at https://msdn.microsoft.com/en-us/library/cc233965.aspx + * [MS-LCID] Windows Language Code Identifier (LCID) Reference + */ + +/* +//////////////////////////////////////////////// +// +// Internal Classes for LCID <--> POSIX Mapping +// +///////////////////////////////////////////////// +*/ + +typedef struct ILcidPosixElement +{ + const uint32_t hostID; + const char * const posixID; +} ILcidPosixElement; + +typedef struct ILcidPosixMap +{ + const uint32_t numRegions; + const struct ILcidPosixElement* const regionMaps; +} ILcidPosixMap; + + +/* +///////////////////////////////////////////////// +// +// Easy macros to make the LCID <--> POSIX Mapping +// +///////////////////////////////////////////////// +*/ + +/** + * The standard one language/one country mapping for LCID. + * The first element must be the language, and the following + * elements are the language with the country. + * @param hostID LCID in host format such as 0x044d + * @param languageID posix ID of just the language such as 'de' + * @param posixID posix ID of the language_TERRITORY such as 'de_CH' + */ +#define ILCID_POSIX_ELEMENT_ARRAY(hostID, languageID, posixID) \ +static const ILcidPosixElement locmap_ ## languageID [] = { \ + {LANGUAGE_LCID(hostID), #languageID}, /* parent locale */ \ + {hostID, #posixID}, \ +}; + +/** + * Define a subtable by ID + * @param id the POSIX ID, either a language or language_TERRITORY + */ +#define ILCID_POSIX_SUBTABLE(id) \ +static const ILcidPosixElement locmap_ ## id [] = + + +/** + * Create the map for the posixID. This macro supposes that the language string + * name is the same as the global variable name, and that the first element + * in the ILcidPosixElement is just the language. + * @param _posixID the full POSIX ID for this entry. + */ +#define ILCID_POSIX_MAP(_posixID) \ + {UPRV_LENGTHOF(locmap_ ## _posixID), locmap_ ## _posixID} + +/* +//////////////////////////////////////////// +// +// Create the table of LCID to POSIX Mapping +// None of it should be dynamically created. +// +// Keep static locale variables inside the function so that +// it can be created properly during static init. +// +// Note: This table should be updated periodically. Check the [MS-LCID] Windows Language Code Identifier +// (LCID) Reference defined at https://msdn.microsoft.com/en-us/library/cc233965.aspx +// +// Microsoft is moving away from LCID in favor of locale name as of Vista. This table needs to be +// maintained for support of older Windows version. +// Update: Windows 7 (091130) +// +// Note: Microsoft assign a different LCID if a locale has a sorting variant. POSIX IDs below may contain +// @collation=XXX, but no other keywords are allowed (at least for now). When uprv_convertToLCID() is +// called from uloc_getLCID(), keywords other than collation are already removed. If we really need +// to support other keywords in this mapping data, we must update the implementation. +//////////////////////////////////////////// +*/ + +// TODO: For Windows ideally this table would be a list of exceptions rather than a complete list as +// LocaleNameToLCID and LCIDToLocaleName provide 90% of these. + +ILCID_POSIX_ELEMENT_ARRAY(0x0436, af, af_ZA) + +ILCID_POSIX_SUBTABLE(ar) { + {0x01, "ar"}, + {0x3801, "ar_AE"}, + {0x3c01, "ar_BH"}, + {0x1401, "ar_DZ"}, + {0x0c01, "ar_EG"}, + {0x0801, "ar_IQ"}, + {0x2c01, "ar_JO"}, + {0x3401, "ar_KW"}, + {0x3001, "ar_LB"}, + {0x1001, "ar_LY"}, + {0x1801, "ar_MA"}, + {0x1801, "ar_MO"}, + {0x2001, "ar_OM"}, + {0x4001, "ar_QA"}, + {0x0401, "ar_SA"}, + {0x2801, "ar_SY"}, + {0x1c01, "ar_TN"}, + {0x2401, "ar_YE"} +}; + +ILCID_POSIX_ELEMENT_ARRAY(0x044d, as, as_IN) +ILCID_POSIX_ELEMENT_ARRAY(0x045e, am, am_ET) +ILCID_POSIX_ELEMENT_ARRAY(0x047a, arn,arn_CL) + +ILCID_POSIX_SUBTABLE(az) { + {0x2c, "az"}, + {0x082c, "az_Cyrl_AZ"}, /* Cyrillic based */ + {0x742c, "az_Cyrl"}, /* Cyrillic based */ + {0x042c, "az_Latn_AZ"}, /* Latin based */ + {0x782c, "az_Latn"}, /* Latin based */ + {0x042c, "az_AZ"} /* Latin based */ +}; + +ILCID_POSIX_ELEMENT_ARRAY(0x046d, ba, ba_RU) +ILCID_POSIX_ELEMENT_ARRAY(0x0423, be, be_BY) + +/*ILCID_POSIX_SUBTABLE(ber) { + {0x5f, "ber"}, + {0x045f, "ber_Arab_DZ"}, + {0x045f, "ber_Arab"}, + {0x085f, "ber_Latn_DZ"}, + {0x085f, "ber_Latn"} +};*/ + +ILCID_POSIX_ELEMENT_ARRAY(0x0402, bg, bg_BG) + +ILCID_POSIX_SUBTABLE(bin) { + {0x66, "bin"}, + {0x0466, "bin_NG"} +}; + +ILCID_POSIX_SUBTABLE(bn) { + {0x45, "bn"}, + {0x0845, "bn_BD"}, + {0x0445, "bn_IN"} +}; + +ILCID_POSIX_SUBTABLE(bo) { + {0x51, "bo"}, + {0x0851, "bo_BT"}, + {0x0451, "bo_CN"}, + {0x0c51, "dz_BT"} +}; + +ILCID_POSIX_ELEMENT_ARRAY(0x047e, br, br_FR) + +ILCID_POSIX_SUBTABLE(ca) { + {0x03, "ca"}, + {0x0403, "ca_ES"}, + {0x0803, "ca_ES_VALENCIA"} +}; + +ILCID_POSIX_ELEMENT_ARRAY(0x0483, co, co_FR) + +ILCID_POSIX_SUBTABLE(chr) { + {0x05c, "chr"}, + {0x7c5c, "chr_Cher"}, + {0x045c, "chr_Cher_US"}, + {0x045c, "chr_US"} +}; + +// ICU has chosen different names for these. +ILCID_POSIX_SUBTABLE(ckb) { + {0x92, "ckb"}, + {0x7c92, "ckb_Arab"}, + {0x0492, "ckb_Arab_IQ"} +}; + +/* Declared as cs_CZ to get around compiler errors on z/OS, which defines cs as a function */ +ILCID_POSIX_ELEMENT_ARRAY(0x0405, cs, cs_CZ) + +ILCID_POSIX_ELEMENT_ARRAY(0x0452, cy, cy_GB) +ILCID_POSIX_ELEMENT_ARRAY(0x0406, da, da_DK) + +// Windows doesn't know POSIX or BCP47 Unicode phonebook sort names +ILCID_POSIX_SUBTABLE(de) { + {0x07, "de"}, + {0x0c07, "de_AT"}, + {0x0807, "de_CH"}, + {0x0407, "de_DE"}, + {0x1407, "de_LI"}, + {0x1007, "de_LU"}, + {0x10407,"de_DE@collation=phonebook"}, /*This is really de_DE_PHONEBOOK on Windows*/ + {0x10407,"de@collation=phonebook"} /*This is really de_DE_PHONEBOOK on Windows*/ +}; + +ILCID_POSIX_ELEMENT_ARRAY(0x0465, dv, dv_MV) +ILCID_POSIX_ELEMENT_ARRAY(0x0408, el, el_GR) + +// Windows uses an empty string for 'invariant' +ILCID_POSIX_SUBTABLE(en) { + {0x09, "en"}, + {0x0c09, "en_AU"}, + {0x2809, "en_BZ"}, + {0x1009, "en_CA"}, + {0x0809, "en_GB"}, + {0x3c09, "en_HK"}, + {0x3809, "en_ID"}, + {0x1809, "en_IE"}, + {0x4009, "en_IN"}, + {0x2009, "en_JM"}, + {0x4409, "en_MY"}, + {0x1409, "en_NZ"}, + {0x3409, "en_PH"}, + {0x4809, "en_SG"}, + {0x2C09, "en_TT"}, + {0x0409, "en_US"}, + {0x007f, "en_US_POSIX"}, /* duplicate for round-tripping */ + {0x2409, "en_029"}, + {0x1c09, "en_ZA"}, + {0x3009, "en_ZW"}, + {0x2409, "en_VI"}, /* Virgin Islands AKA Caribbean Islands (en_CB). On Windows8+ This is 0x1000 or dynamically assigned */ + {0x0409, "en_AS"}, /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */ + {0x0409, "en_GU"}, /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */ + {0x0409, "en_MH"}, /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */ + {0x0409, "en_MP"}, /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */ + {0x0409, "en_UM"} /* Alias for en_US. Leave last. On Windows8+ This is 0x1000 or dynamically assigned */ +}; + +ILCID_POSIX_SUBTABLE(en_US_POSIX) { + {0x007f, "en_US_POSIX"} /* duplicate for roundtripping */ +}; + +// Windows doesn't know POSIX or BCP47 Unicode traditional sort names +ILCID_POSIX_SUBTABLE(es) { + {0x0a, "es"}, + {0x2c0a, "es_AR"}, + {0x400a, "es_BO"}, + {0x340a, "es_CL"}, + {0x240a, "es_CO"}, + {0x140a, "es_CR"}, + {0x5c0a, "es_CU"}, + {0x1c0a, "es_DO"}, + {0x300a, "es_EC"}, + {0x0c0a, "es_ES"}, /*Modern sort.*/ + {0x100a, "es_GT"}, + {0x480a, "es_HN"}, + {0x080a, "es_MX"}, + {0x4c0a, "es_NI"}, + {0x180a, "es_PA"}, + {0x280a, "es_PE"}, + {0x500a, "es_PR"}, + {0x3c0a, "es_PY"}, + {0x440a, "es_SV"}, + {0x540a, "es_US"}, + {0x380a, "es_UY"}, + {0x200a, "es_VE"}, + {0x580a, "es_419"}, + {0x040a, "es_ES@collation=traditional"}, + {0x040a, "es@collation=traditional"} // Windows will treat this as es-ES@collation=traditional +}; + +ILCID_POSIX_ELEMENT_ARRAY(0x0425, et, et_EE) +ILCID_POSIX_ELEMENT_ARRAY(0x042d, eu, eu_ES) + +/* ISO-639 doesn't distinguish between Persian and Dari.*/ +ILCID_POSIX_SUBTABLE(fa) { + {0x29, "fa"}, + {0x0429, "fa_IR"}, /* Persian/Farsi (Iran) */ + {0x048c, "fa_AF"} /* Persian/Dari (Afghanistan) */ +}; + + +/* duplicate for roundtripping */ +ILCID_POSIX_SUBTABLE(fa_AF) { + {0x8c, "fa_AF"}, /* Persian/Dari (Afghanistan) */ + {0x048c, "fa_AF"} /* Persian/Dari (Afghanistan) */ +}; + +ILCID_POSIX_SUBTABLE(ff) { + {0x67, "ff"}, + {0x7c67, "ff_Latn"}, + {0x0867, "ff_Latn_SN"}, + {0x0467, "ff_NG"} +}; + +ILCID_POSIX_ELEMENT_ARRAY(0x040b, fi, fi_FI) +ILCID_POSIX_ELEMENT_ARRAY(0x0464, fil,fil_PH) +ILCID_POSIX_ELEMENT_ARRAY(0x0438, fo, fo_FO) + +ILCID_POSIX_SUBTABLE(fr) { + {0x0c, "fr"}, + {0x080c, "fr_BE"}, + {0x0c0c, "fr_CA"}, + {0x240c, "fr_CD"}, + {0x240c, "fr_CG"}, + {0x100c, "fr_CH"}, + {0x300c, "fr_CI"}, + {0x2c0c, "fr_CM"}, + {0x040c, "fr_FR"}, + {0x3c0c, "fr_HT"}, + {0x140c, "fr_LU"}, + {0x380c, "fr_MA"}, + {0x180c, "fr_MC"}, + {0x340c, "fr_ML"}, + {0x200c, "fr_RE"}, + {0x280c, "fr_SN"}, + {0xe40c, "fr_015"}, + {0x1c0c, "fr_029"} +}; + +ILCID_POSIX_ELEMENT_ARRAY(0x0467, fuv, fuv_NG) + +ILCID_POSIX_ELEMENT_ARRAY(0x0462, fy, fy_NL) + +ILCID_POSIX_SUBTABLE(ga) { /* Gaelic (Ireland) */ + {0x3c, "ga"}, + {0x083c, "ga_IE"}, + {0x043c, "gd_GB"} +}; + +ILCID_POSIX_SUBTABLE(gd) { /* Gaelic (Scotland) */ + {0x91, "gd"}, + {0x0491, "gd_GB"} +}; + +ILCID_POSIX_ELEMENT_ARRAY(0x0456, gl, gl_ES) +ILCID_POSIX_ELEMENT_ARRAY(0x0447, gu, gu_IN) +ILCID_POSIX_ELEMENT_ARRAY(0x0474, gn, gn_PY) +ILCID_POSIX_ELEMENT_ARRAY(0x0484, gsw,gsw_FR) + +ILCID_POSIX_SUBTABLE(ha) { + {0x68, "ha"}, + {0x7c68, "ha_Latn"}, + {0x0468, "ha_Latn_NG"}, +}; + +ILCID_POSIX_ELEMENT_ARRAY(0x0475, haw,haw_US) +ILCID_POSIX_ELEMENT_ARRAY(0x040d, he, he_IL) +ILCID_POSIX_ELEMENT_ARRAY(0x0439, hi, hi_IN) + +/* This LCID is really four different locales.*/ +ILCID_POSIX_SUBTABLE(hr) { + {0x1a, "hr"}, + {0x141a, "bs_Latn_BA"}, /* Bosnian, Bosnia and Herzegovina */ + {0x681a, "bs_Latn"}, /* Bosnian, Bosnia and Herzegovina */ + {0x141a, "bs_BA"}, /* Bosnian, Bosnia and Herzegovina */ + {0x781a, "bs"}, /* Bosnian */ + {0x201a, "bs_Cyrl_BA"}, /* Bosnian, Bosnia and Herzegovina */ + {0x641a, "bs_Cyrl"}, /* Bosnian, Bosnia and Herzegovina */ + {0x101a, "hr_BA"}, /* Croatian in Bosnia */ + {0x041a, "hr_HR"}, /* Croatian*/ + {0x2c1a, "sr_Latn_ME"}, + {0x241a, "sr_Latn_RS"}, + {0x181a, "sr_Latn_BA"}, /* Serbo-Croatian in Bosnia */ + {0x081a, "sr_Latn_CS"}, /* Serbo-Croatian*/ + {0x701a, "sr_Latn"}, /* It's 0x1a or 0x081a, pick one to make the test program happy. */ + {0x1c1a, "sr_Cyrl_BA"}, /* Serbo-Croatian in Bosnia */ + {0x0c1a, "sr_Cyrl_CS"}, /* Serbian*/ + {0x301a, "sr_Cyrl_ME"}, + {0x281a, "sr_Cyrl_RS"}, + {0x6c1a, "sr_Cyrl"}, /* It's 0x1a or 0x0c1a, pick one to make the test program happy. */ + {0x7c1a, "sr"} /* In CLDR sr is sr_Cyrl. */ +}; + +ILCID_POSIX_SUBTABLE(hsb) { + {0x2E, "hsb"}, + {0x042E, "hsb_DE"}, + {0x082E, "dsb_DE"}, + {0x7C2E, "dsb"}, +}; + +ILCID_POSIX_ELEMENT_ARRAY(0x040e, hu, hu_HU) +ILCID_POSIX_ELEMENT_ARRAY(0x042b, hy, hy_AM) + +ILCID_POSIX_SUBTABLE(ibb) { + {0x69, "ibb"}, + {0x0469, "ibb_NG"} +}; + +ILCID_POSIX_ELEMENT_ARRAY(0x0421, id, id_ID) +ILCID_POSIX_ELEMENT_ARRAY(0x0470, ig, ig_NG) +ILCID_POSIX_ELEMENT_ARRAY(0x0478, ii, ii_CN) +ILCID_POSIX_ELEMENT_ARRAY(0x040f, is, is_IS) + +ILCID_POSIX_SUBTABLE(it) { + {0x10, "it"}, + {0x0810, "it_CH"}, + {0x0410, "it_IT"} +}; + +ILCID_POSIX_SUBTABLE(iu) { + {0x5d, "iu"}, + {0x045d, "iu_Cans_CA"}, + {0x785d, "iu_Cans"}, + {0x085d, "iu_Latn_CA"}, + {0x7c5d, "iu_Latn"} +}; + +ILCID_POSIX_ELEMENT_ARRAY(0x040d, iw, iw_IL) /*Left in for compatibility*/ +ILCID_POSIX_ELEMENT_ARRAY(0x0411, ja, ja_JP) +ILCID_POSIX_ELEMENT_ARRAY(0x0437, ka, ka_GE) +ILCID_POSIX_ELEMENT_ARRAY(0x043f, kk, kk_KZ) +ILCID_POSIX_ELEMENT_ARRAY(0x046f, kl, kl_GL) +ILCID_POSIX_ELEMENT_ARRAY(0x0453, km, km_KH) +ILCID_POSIX_ELEMENT_ARRAY(0x044b, kn, kn_IN) + +ILCID_POSIX_SUBTABLE(ko) { + {0x12, "ko"}, + {0x0812, "ko_KP"}, + {0x0412, "ko_KR"} +}; + +ILCID_POSIX_ELEMENT_ARRAY(0x0457, kok, kok_IN) +ILCID_POSIX_ELEMENT_ARRAY(0x0471, kr, kr_NG) + +ILCID_POSIX_SUBTABLE(ks) { /* We could add PK and CN too */ + {0x60, "ks"}, + {0x0460, "ks_Arab_IN"}, + {0x0860, "ks_Deva_IN"} +}; + +ILCID_POSIX_ELEMENT_ARRAY(0x0440, ky, ky_KG) /* Kyrgyz is spoken in Kyrgyzstan */ + +ILCID_POSIX_SUBTABLE(la) { + {0x76, "la"}, + {0x0476, "la_001"}, + {0x0476, "la_IT"} /*Left in for compatibility*/ +}; + +ILCID_POSIX_ELEMENT_ARRAY(0x046e, lb, lb_LU) +ILCID_POSIX_ELEMENT_ARRAY(0x0454, lo, lo_LA) +ILCID_POSIX_ELEMENT_ARRAY(0x0427, lt, lt_LT) +ILCID_POSIX_ELEMENT_ARRAY(0x0426, lv, lv_LV) +ILCID_POSIX_ELEMENT_ARRAY(0x0481, mi, mi_NZ) +ILCID_POSIX_ELEMENT_ARRAY(0x042f, mk, mk_MK) +ILCID_POSIX_ELEMENT_ARRAY(0x044c, ml, ml_IN) + +ILCID_POSIX_SUBTABLE(mn) { + {0x50, "mn"}, + {0x0450, "mn_MN"}, + {0x7c50, "mn_Mong"}, + {0x0850, "mn_Mong_CN"}, + {0x0850, "mn_CN"}, + {0x7850, "mn_Cyrl"}, + {0x0c50, "mn_Mong_MN"} +}; + +ILCID_POSIX_ELEMENT_ARRAY(0x0458, mni,mni_IN) +ILCID_POSIX_ELEMENT_ARRAY(0x047c, moh,moh_CA) +ILCID_POSIX_ELEMENT_ARRAY(0x044e, mr, mr_IN) + +ILCID_POSIX_SUBTABLE(ms) { + {0x3e, "ms"}, + {0x083e, "ms_BN"}, /* Brunei Darussalam*/ + {0x043e, "ms_MY"} /* Malaysia*/ +}; + +ILCID_POSIX_ELEMENT_ARRAY(0x043a, mt, mt_MT) +ILCID_POSIX_ELEMENT_ARRAY(0x0455, my, my_MM) + +ILCID_POSIX_SUBTABLE(ne) { + {0x61, "ne"}, + {0x0861, "ne_IN"}, /* India*/ + {0x0461, "ne_NP"} /* Nepal*/ +}; + +ILCID_POSIX_SUBTABLE(nl) { + {0x13, "nl"}, + {0x0813, "nl_BE"}, + {0x0413, "nl_NL"} +}; + +/* The "no" locale split into nb and nn. By default in ICU, "no" is nb.*/ +// TODO: Not all of these are needed on Windows, but I don't know how ICU treats preferred ones here. +ILCID_POSIX_SUBTABLE(no) { + {0x14, "no"}, /* really nb_NO - actually Windows differentiates between neutral (no region) and specific (with region) */ + {0x7c14, "nb"}, /* really nb */ + {0x0414, "nb_NO"}, /* really nb_NO. Keep first in the 414 list. */ + {0x0414, "no_NO"}, /* really nb_NO */ + {0x0814, "nn_NO"}, /* really nn_NO. Keep first in the 814 list. */ + {0x7814, "nn"}, /* It's 0x14 or 0x814, pick one to make the test program happy. */ + {0x0814, "no_NO_NY"}/* really nn_NO */ +}; + +ILCID_POSIX_ELEMENT_ARRAY(0x046c, nso,nso_ZA) /* TODO: Verify the ISO-639 code */ +ILCID_POSIX_ELEMENT_ARRAY(0x0482, oc, oc_FR) + +ILCID_POSIX_SUBTABLE(om) { /* TODO: Verify the country */ + {0x72, "om"}, + {0x0472, "om_ET"}, + {0x0472, "gaz_ET"} +}; + +/* Declared as or_IN to get around compiler errors*/ +ILCID_POSIX_SUBTABLE(or_IN) { + {0x48, "or"}, + {0x0448, "or_IN"}, +}; + +ILCID_POSIX_SUBTABLE(pa) { + {0x46, "pa"}, + {0x0446, "pa_IN"}, + {0x0846, "pa_Arab_PK"}, + {0x0846, "pa_PK"} +}; + +ILCID_POSIX_SUBTABLE(pap) { + {0x79, "pap"}, + {0x0479, "pap_029"}, + {0x0479, "pap_AN"} /*Left in for compatibility*/ +}; + +ILCID_POSIX_ELEMENT_ARRAY(0x0415, pl, pl_PL) +ILCID_POSIX_ELEMENT_ARRAY(0x0463, ps, ps_AF) + +ILCID_POSIX_SUBTABLE(pt) { + {0x16, "pt"}, + {0x0416, "pt_BR"}, + {0x0816, "pt_PT"} +}; + +ILCID_POSIX_SUBTABLE(qu) { + {0x6b, "qu"}, + {0x046b, "qu_BO"}, + {0x086b, "qu_EC"}, + {0x0C6b, "qu_PE"}, + {0x046b, "quz_BO"}, + {0x086b, "quz_EC"}, + {0x0C6b, "quz_PE"} +}; + +ILCID_POSIX_SUBTABLE(quc) { + {0x93, "quc"}, + {0x0493, "quc_CO"}, + /* + "quc_Latn_GT" is an exceptional case. Language ID of "quc" + is 0x93, but LCID of "quc_Latn_GT" is 0x486, which should be + under the group of "qut". "qut" is a retired ISO 639-3 language + code for West Central Quiche, and merged to "quc". + It looks Windows previously reserved "qut" for K'iche', but, + decided to use "quc" when adding a locale for K'iche' (Guatemala). + + This data structure used here assumes language ID bits in + LCID is unique for alphabetic language code. But this is not true + for "quc_Latn_GT". If we don't have the data below, LCID look up + by alphabetic locale ID (POSIX) will fail. The same entry is found + under "qut" below, which is required for reverse look up. + */ + {0x0486, "quc_Latn_GT"} +}; + +ILCID_POSIX_SUBTABLE(qut) { + {0x86, "qut"}, + {0x0486, "qut_GT"}, + /* + See the note in "quc" above. + */ + {0x0486, "quc_Latn_GT"} +}; + +ILCID_POSIX_ELEMENT_ARRAY(0x0417, rm, rm_CH) + +ILCID_POSIX_SUBTABLE(ro) { + {0x18, "ro"}, + {0x0418, "ro_RO"}, + {0x0818, "ro_MD"} +}; + +// TODO: This is almost certainly 'wrong'. 0 in Windows is a synonym for LOCALE_USER_DEFAULT. +// More likely this is a similar concept to the Windows 0x7f Invariant locale "" +// (Except that it's not invariant in ICU) +ILCID_POSIX_SUBTABLE(root) { + {0x00, "root"} +}; + +ILCID_POSIX_SUBTABLE(ru) { + {0x19, "ru"}, + {0x0419, "ru_RU"}, + {0x0819, "ru_MD"} +}; + +ILCID_POSIX_ELEMENT_ARRAY(0x0487, rw, rw_RW) +ILCID_POSIX_ELEMENT_ARRAY(0x044f, sa, sa_IN) +ILCID_POSIX_ELEMENT_ARRAY(0x0485, sah,sah_RU) + +ILCID_POSIX_SUBTABLE(sd) { + {0x59, "sd"}, + {0x0459, "sd_Deva_IN"}, + {0x0459, "sd_IN"}, + {0x0859, "sd_Arab_PK"}, + {0x0859, "sd_PK"}, + {0x7c59, "sd_Arab"} +}; + +ILCID_POSIX_SUBTABLE(se) { + {0x3b, "se"}, + {0x0c3b, "se_FI"}, + {0x043b, "se_NO"}, + {0x083b, "se_SE"}, + {0x783b, "sma"}, + {0x183b, "sma_NO"}, + {0x1c3b, "sma_SE"}, + {0x7c3b, "smj"}, + {0x703b, "smn"}, + {0x743b, "sms"}, + {0x103b, "smj_NO"}, + {0x143b, "smj_SE"}, + {0x243b, "smn_FI"}, + {0x203b, "sms_FI"}, +}; + +ILCID_POSIX_ELEMENT_ARRAY(0x045b, si, si_LK) +ILCID_POSIX_ELEMENT_ARRAY(0x041b, sk, sk_SK) +ILCID_POSIX_ELEMENT_ARRAY(0x0424, sl, sl_SI) + +ILCID_POSIX_SUBTABLE(so) { + {0x77, "so"}, + {0x0477, "so_SO"} +}; + +ILCID_POSIX_ELEMENT_ARRAY(0x041c, sq, sq_AL) +ILCID_POSIX_ELEMENT_ARRAY(0x0430, st, st_ZA) + +ILCID_POSIX_SUBTABLE(sv) { + {0x1d, "sv"}, + {0x081d, "sv_FI"}, + {0x041d, "sv_SE"} +}; + +ILCID_POSIX_ELEMENT_ARRAY(0x0441, sw, sw_KE) +ILCID_POSIX_ELEMENT_ARRAY(0x045A, syr, syr_SY) + +ILCID_POSIX_SUBTABLE(ta) { + {0x49, "ta"}, + {0x0449, "ta_IN"}, + {0x0849, "ta_LK"} +}; + +ILCID_POSIX_ELEMENT_ARRAY(0x044a, te, te_IN) + +/* Cyrillic based by default */ +ILCID_POSIX_SUBTABLE(tg) { + {0x28, "tg"}, + {0x7c28, "tg_Cyrl"}, + {0x0428, "tg_Cyrl_TJ"} +}; + +ILCID_POSIX_ELEMENT_ARRAY(0x041e, th, th_TH) + +ILCID_POSIX_SUBTABLE(ti) { + {0x73, "ti"}, + {0x0873, "ti_ER"}, + {0x0473, "ti_ET"} +}; + +ILCID_POSIX_ELEMENT_ARRAY(0x0442, tk, tk_TM) + +ILCID_POSIX_SUBTABLE(tn) { + {0x32, "tn"}, + {0x0832, "tn_BW"}, + {0x0432, "tn_ZA"} +}; + +ILCID_POSIX_ELEMENT_ARRAY(0x041f, tr, tr_TR) +ILCID_POSIX_ELEMENT_ARRAY(0x0431, ts, ts_ZA) +ILCID_POSIX_ELEMENT_ARRAY(0x0444, tt, tt_RU) + +ILCID_POSIX_SUBTABLE(tzm) { + {0x5f, "tzm"}, + {0x7c5f, "tzm_Latn"}, + {0x085f, "tzm_Latn_DZ"}, + {0x105f, "tzm_Tfng_MA"}, + {0x045f, "tzm_Arab_MA"}, + {0x045f, "tmz"} +}; + +ILCID_POSIX_SUBTABLE(ug) { + {0x80, "ug"}, + {0x0480, "ug_CN"}, + {0x0480, "ug_Arab_CN"} +}; + +ILCID_POSIX_ELEMENT_ARRAY(0x0422, uk, uk_UA) + +ILCID_POSIX_SUBTABLE(ur) { + {0x20, "ur"}, + {0x0820, "ur_IN"}, + {0x0420, "ur_PK"} +}; + +ILCID_POSIX_SUBTABLE(uz) { + {0x43, "uz"}, + {0x0843, "uz_Cyrl_UZ"}, /* Cyrillic based */ + {0x7843, "uz_Cyrl"}, /* Cyrillic based */ + {0x0843, "uz_UZ"}, /* Cyrillic based */ + {0x0443, "uz_Latn_UZ"}, /* Latin based */ + {0x7c43, "uz_Latn"} /* Latin based */ +}; + +ILCID_POSIX_SUBTABLE(ve) { /* TODO: Verify the country */ + {0x33, "ve"}, + {0x0433, "ve_ZA"}, + {0x0433, "ven_ZA"} +}; + +ILCID_POSIX_ELEMENT_ARRAY(0x042a, vi, vi_VN) +ILCID_POSIX_ELEMENT_ARRAY(0x0488, wo, wo_SN) +ILCID_POSIX_ELEMENT_ARRAY(0x0434, xh, xh_ZA) + +ILCID_POSIX_SUBTABLE(yi) { + {0x003d, "yi"}, + {0x043d, "yi_001"} +}; + +ILCID_POSIX_ELEMENT_ARRAY(0x046a, yo, yo_NG) + +// Windows & ICU tend to different names for some of these +// TODO: Windows probably does not need all of these entries, but I don't know how the precedence works. +ILCID_POSIX_SUBTABLE(zh) { + {0x0004, "zh_Hans"}, + {0x7804, "zh"}, + {0x0804, "zh_CN"}, + {0x0804, "zh_Hans_CN"}, + {0x0c04, "zh_Hant_HK"}, + {0x0c04, "zh_HK"}, + {0x1404, "zh_Hant_MO"}, + {0x1404, "zh_MO"}, + {0x1004, "zh_Hans_SG"}, + {0x1004, "zh_SG"}, + {0x0404, "zh_Hant_TW"}, + {0x7c04, "zh_Hant"}, + {0x0404, "zh_TW"}, + {0x30404,"zh_Hant_TW"}, /* Bopomofo order */ + {0x30404,"zh_TW"}, /* Bopomofo order */ + {0x20004,"zh@collation=stroke"}, + {0x20404,"zh_Hant@collation=stroke"}, + {0x20404,"zh_Hant_TW@collation=stroke"}, + {0x20404,"zh_TW@collation=stroke"}, + {0x20804,"zh_Hans@collation=stroke"}, + {0x20804,"zh_Hans_CN@collation=stroke"}, + {0x20804,"zh_CN@collation=stroke"} + // TODO: Alternate collations for other LCIDs are missing, eg: 0x50804 +}; + +ILCID_POSIX_ELEMENT_ARRAY(0x0435, zu, zu_ZA) + +/* This must be static and grouped by LCID. */ +static const ILcidPosixMap gPosixIDmap[] = { + ILCID_POSIX_MAP(af), /* af Afrikaans 0x36 */ + ILCID_POSIX_MAP(am), /* am Amharic 0x5e */ + ILCID_POSIX_MAP(ar), /* ar Arabic 0x01 */ + ILCID_POSIX_MAP(arn), /* arn Araucanian/Mapudungun 0x7a */ + ILCID_POSIX_MAP(as), /* as Assamese 0x4d */ + ILCID_POSIX_MAP(az), /* az Azerbaijani 0x2c */ + ILCID_POSIX_MAP(ba), /* ba Bashkir 0x6d */ + ILCID_POSIX_MAP(be), /* be Belarusian 0x23 */ +/* ILCID_POSIX_MAP(ber), ber Berber/Tamazight 0x5f */ + ILCID_POSIX_MAP(bg), /* bg Bulgarian 0x02 */ + ILCID_POSIX_MAP(bin), /* bin Edo 0x66 */ + ILCID_POSIX_MAP(bn), /* bn Bengali; Bangla 0x45 */ + ILCID_POSIX_MAP(bo), /* bo Tibetan 0x51 */ + ILCID_POSIX_MAP(br), /* br Breton 0x7e */ + ILCID_POSIX_MAP(ca), /* ca Catalan 0x03 */ + ILCID_POSIX_MAP(chr), /* chr Cherokee 0x5c */ + ILCID_POSIX_MAP(ckb), /* ckb Sorani (Central Kurdish) 0x92 */ + ILCID_POSIX_MAP(co), /* co Corsican 0x83 */ + ILCID_POSIX_MAP(cs), /* cs Czech 0x05 */ + ILCID_POSIX_MAP(cy), /* cy Welsh 0x52 */ + ILCID_POSIX_MAP(da), /* da Danish 0x06 */ + ILCID_POSIX_MAP(de), /* de German 0x07 */ + ILCID_POSIX_MAP(dv), /* dv Divehi 0x65 */ + ILCID_POSIX_MAP(el), /* el Greek 0x08 */ + ILCID_POSIX_MAP(en), /* en English 0x09 */ + ILCID_POSIX_MAP(en_US_POSIX), /* invariant 0x7f */ + ILCID_POSIX_MAP(es), /* es Spanish 0x0a */ + ILCID_POSIX_MAP(et), /* et Estonian 0x25 */ + ILCID_POSIX_MAP(eu), /* eu Basque 0x2d */ + ILCID_POSIX_MAP(fa), /* fa Persian/Farsi 0x29 */ + ILCID_POSIX_MAP(fa_AF), /* fa Persian/Dari 0x8c */ + ILCID_POSIX_MAP(ff), /* ff Fula 0x67 */ + ILCID_POSIX_MAP(fi), /* fi Finnish 0x0b */ + ILCID_POSIX_MAP(fil), /* fil Filipino 0x64 */ + ILCID_POSIX_MAP(fo), /* fo Faroese 0x38 */ + ILCID_POSIX_MAP(fr), /* fr French 0x0c */ + ILCID_POSIX_MAP(fuv), /* fuv Fulfulde - Nigeria 0x67 */ + ILCID_POSIX_MAP(fy), /* fy Frisian 0x62 */ + ILCID_POSIX_MAP(ga), /* * Gaelic (Ireland,Scotland) 0x3c */ + ILCID_POSIX_MAP(gd), /* gd Gaelic (United Kingdom) 0x91 */ + ILCID_POSIX_MAP(gl), /* gl Galician 0x56 */ + ILCID_POSIX_MAP(gn), /* gn Guarani 0x74 */ + ILCID_POSIX_MAP(gsw), /* gsw Alemanic/Alsatian/Swiss German 0x84 */ + ILCID_POSIX_MAP(gu), /* gu Gujarati 0x47 */ + ILCID_POSIX_MAP(ha), /* ha Hausa 0x68 */ + ILCID_POSIX_MAP(haw), /* haw Hawaiian 0x75 */ + ILCID_POSIX_MAP(he), /* he Hebrew (formerly iw) 0x0d */ + ILCID_POSIX_MAP(hi), /* hi Hindi 0x39 */ + ILCID_POSIX_MAP(hr), /* * Croatian and others 0x1a */ + ILCID_POSIX_MAP(hsb), /* hsb Upper Sorbian 0x2e */ + ILCID_POSIX_MAP(hu), /* hu Hungarian 0x0e */ + ILCID_POSIX_MAP(hy), /* hy Armenian 0x2b */ + ILCID_POSIX_MAP(ibb), /* ibb Ibibio - Nigeria 0x69 */ + ILCID_POSIX_MAP(id), /* id Indonesian (formerly in) 0x21 */ + ILCID_POSIX_MAP(ig), /* ig Igbo 0x70 */ + ILCID_POSIX_MAP(ii), /* ii Sichuan Yi 0x78 */ + ILCID_POSIX_MAP(is), /* is Icelandic 0x0f */ + ILCID_POSIX_MAP(it), /* it Italian 0x10 */ + ILCID_POSIX_MAP(iu), /* iu Inuktitut 0x5d */ + ILCID_POSIX_MAP(iw), /* iw Hebrew 0x0d */ + ILCID_POSIX_MAP(ja), /* ja Japanese 0x11 */ + ILCID_POSIX_MAP(ka), /* ka Georgian 0x37 */ + ILCID_POSIX_MAP(kk), /* kk Kazakh 0x3f */ + ILCID_POSIX_MAP(kl), /* kl Kalaallisut 0x6f */ + ILCID_POSIX_MAP(km), /* km Khmer 0x53 */ + ILCID_POSIX_MAP(kn), /* kn Kannada 0x4b */ + ILCID_POSIX_MAP(ko), /* ko Korean 0x12 */ + ILCID_POSIX_MAP(kok), /* kok Konkani 0x57 */ + ILCID_POSIX_MAP(kr), /* kr Kanuri 0x71 */ + ILCID_POSIX_MAP(ks), /* ks Kashmiri 0x60 */ + ILCID_POSIX_MAP(ky), /* ky Kyrgyz 0x40 */ + ILCID_POSIX_MAP(lb), /* lb Luxembourgish 0x6e */ + ILCID_POSIX_MAP(la), /* la Latin 0x76 */ + ILCID_POSIX_MAP(lo), /* lo Lao 0x54 */ + ILCID_POSIX_MAP(lt), /* lt Lithuanian 0x27 */ + ILCID_POSIX_MAP(lv), /* lv Latvian, Lettish 0x26 */ + ILCID_POSIX_MAP(mi), /* mi Maori 0x81 */ + ILCID_POSIX_MAP(mk), /* mk Macedonian 0x2f */ + ILCID_POSIX_MAP(ml), /* ml Malayalam 0x4c */ + ILCID_POSIX_MAP(mn), /* mn Mongolian 0x50 */ + ILCID_POSIX_MAP(mni), /* mni Manipuri 0x58 */ + ILCID_POSIX_MAP(moh), /* moh Mohawk 0x7c */ + ILCID_POSIX_MAP(mr), /* mr Marathi 0x4e */ + ILCID_POSIX_MAP(ms), /* ms Malay 0x3e */ + ILCID_POSIX_MAP(mt), /* mt Maltese 0x3a */ + ILCID_POSIX_MAP(my), /* my Burmese 0x55 */ +/* ILCID_POSIX_MAP(nb), // no Norwegian 0x14 */ + ILCID_POSIX_MAP(ne), /* ne Nepali 0x61 */ + ILCID_POSIX_MAP(nl), /* nl Dutch 0x13 */ +/* ILCID_POSIX_MAP(nn), // no Norwegian 0x14 */ + ILCID_POSIX_MAP(no), /* * Norwegian 0x14 */ + ILCID_POSIX_MAP(nso), /* nso Sotho, Northern (Sepedi dialect) 0x6c */ + ILCID_POSIX_MAP(oc), /* oc Occitan 0x82 */ + ILCID_POSIX_MAP(om), /* om Oromo 0x72 */ + ILCID_POSIX_MAP(or_IN), /* or Oriya 0x48 */ + ILCID_POSIX_MAP(pa), /* pa Punjabi 0x46 */ + ILCID_POSIX_MAP(pap), /* pap Papiamentu 0x79 */ + ILCID_POSIX_MAP(pl), /* pl Polish 0x15 */ + ILCID_POSIX_MAP(ps), /* ps Pashto 0x63 */ + ILCID_POSIX_MAP(pt), /* pt Portuguese 0x16 */ + ILCID_POSIX_MAP(qu), /* qu Quechua 0x6B */ + ILCID_POSIX_MAP(quc), /* quc K'iche 0x93 */ + ILCID_POSIX_MAP(qut), /* qut K'iche 0x86 */ + ILCID_POSIX_MAP(rm), /* rm Raeto-Romance/Romansh 0x17 */ + ILCID_POSIX_MAP(ro), /* ro Romanian 0x18 */ + ILCID_POSIX_MAP(root), /* root 0x00 */ + ILCID_POSIX_MAP(ru), /* ru Russian 0x19 */ + ILCID_POSIX_MAP(rw), /* rw Kinyarwanda 0x87 */ + ILCID_POSIX_MAP(sa), /* sa Sanskrit 0x4f */ + ILCID_POSIX_MAP(sah), /* sah Yakut 0x85 */ + ILCID_POSIX_MAP(sd), /* sd Sindhi 0x59 */ + ILCID_POSIX_MAP(se), /* se Sami 0x3b */ +/* ILCID_POSIX_MAP(sh), // sh Serbo-Croatian 0x1a */ + ILCID_POSIX_MAP(si), /* si Sinhalese 0x5b */ + ILCID_POSIX_MAP(sk), /* sk Slovak 0x1b */ + ILCID_POSIX_MAP(sl), /* sl Slovenian 0x24 */ + ILCID_POSIX_MAP(so), /* so Somali 0x77 */ + ILCID_POSIX_MAP(sq), /* sq Albanian 0x1c */ +/* ILCID_POSIX_MAP(sr), // sr Serbian 0x1a */ + ILCID_POSIX_MAP(st), /* st Sutu 0x30 */ + ILCID_POSIX_MAP(sv), /* sv Swedish 0x1d */ + ILCID_POSIX_MAP(sw), /* sw Swahili 0x41 */ + ILCID_POSIX_MAP(syr), /* syr Syriac 0x5A */ + ILCID_POSIX_MAP(ta), /* ta Tamil 0x49 */ + ILCID_POSIX_MAP(te), /* te Telugu 0x4a */ + ILCID_POSIX_MAP(tg), /* tg Tajik 0x28 */ + ILCID_POSIX_MAP(th), /* th Thai 0x1e */ + ILCID_POSIX_MAP(ti), /* ti Tigrigna 0x73 */ + ILCID_POSIX_MAP(tk), /* tk Turkmen 0x42 */ + ILCID_POSIX_MAP(tn), /* tn Tswana 0x32 */ + ILCID_POSIX_MAP(tr), /* tr Turkish 0x1f */ + ILCID_POSIX_MAP(ts), /* ts Tsonga 0x31 */ + ILCID_POSIX_MAP(tt), /* tt Tatar 0x44 */ + ILCID_POSIX_MAP(tzm), /* tzm Tamazight 0x5f */ + ILCID_POSIX_MAP(ug), /* ug Uighur 0x80 */ + ILCID_POSIX_MAP(uk), /* uk Ukrainian 0x22 */ + ILCID_POSIX_MAP(ur), /* ur Urdu 0x20 */ + ILCID_POSIX_MAP(uz), /* uz Uzbek 0x43 */ + ILCID_POSIX_MAP(ve), /* ve Venda 0x33 */ + ILCID_POSIX_MAP(vi), /* vi Vietnamese 0x2a */ + ILCID_POSIX_MAP(wo), /* wo Wolof 0x88 */ + ILCID_POSIX_MAP(xh), /* xh Xhosa 0x34 */ + ILCID_POSIX_MAP(yi), /* yi Yiddish 0x3d */ + ILCID_POSIX_MAP(yo), /* yo Yoruba 0x6a */ + ILCID_POSIX_MAP(zh), /* zh Chinese 0x04 */ + ILCID_POSIX_MAP(zu), /* zu Zulu 0x35 */ +}; + +static const uint32_t gLocaleCount = UPRV_LENGTHOF(gPosixIDmap); + +/** + * Do not call this function. It is called by hostID. + * The function is not private because this struct must stay as a C struct, + * and this is an internal class. + */ +static int32_t +idCmp(const char* id1, const char* id2) +{ + int32_t diffIdx = 0; + while (*id1 == *id2 && *id1 != 0) { + diffIdx++; + id1++; + id2++; + } + return diffIdx; +} + +/** + * Searches for a Windows LCID + * + * @param posixID the Posix style locale id. + * @param status gets set to U_ILLEGAL_ARGUMENT_ERROR when the Posix ID has + * no equivalent Windows LCID. + * @return the LCID + */ +static uint32_t +getHostID(const ILcidPosixMap *this_0, const char* posixID, UErrorCode* status) +{ + int32_t bestIdx = 0; + int32_t bestIdxDiff = 0; + int32_t posixIDlen = (int32_t)uprv_strlen(posixID); + uint32_t idx; + + for (idx = 0; idx < this_0->numRegions; idx++ ) { + int32_t sameChars = idCmp(posixID, this_0->regionMaps[idx].posixID); + if (sameChars > bestIdxDiff && this_0->regionMaps[idx].posixID[sameChars] == 0) { + if (posixIDlen == sameChars) { + /* Exact match */ + return this_0->regionMaps[idx].hostID; + } + bestIdxDiff = sameChars; + bestIdx = idx; + } + } + /* We asked for something unusual, like en_ZZ, and we try to return the number for the same language. */ + /* We also have to make sure that sid and si and similar string subsets don't match. */ + if ((posixID[bestIdxDiff] == '_' || posixID[bestIdxDiff] == '@') + && this_0->regionMaps[bestIdx].posixID[bestIdxDiff] == 0) + { + *status = U_USING_FALLBACK_WARNING; + return this_0->regionMaps[bestIdx].hostID; + } + + /*no match found */ + *status = U_ILLEGAL_ARGUMENT_ERROR; + return this_0->regionMaps->hostID; +} + +static const char* +getPosixID(const ILcidPosixMap *this_0, uint32_t hostID) +{ + uint32_t i; + for (i = 0; i < this_0->numRegions; i++) + { + if (this_0->regionMaps[i].hostID == hostID) + { + return this_0->regionMaps[i].posixID; + } + } + + /* If you get here, then no matching region was found, + so return the language id with the wild card region. */ + return this_0->regionMaps[0].posixID; +} + +/* +////////////////////////////////////// +// +// LCID --> POSIX +// +///////////////////////////////////// +*/ +#if U_PLATFORM_HAS_WIN32_API && UCONFIG_USE_WINDOWS_LCID_MAPPING_API +/* + * Various language tags needs to be changed: + * quz -> qu + * prs -> fa + */ +#define FIX_LANGUAGE_ID_TAG(buffer, len) \ + if (len >= 3) { \ + if (buffer[0] == 'q' && buffer[1] == 'u' && buffer[2] == 'z') {\ + buffer[2] = 0; \ + uprv_strcat(buffer, buffer+3); \ + } else if (buffer[0] == 'p' && buffer[1] == 'r' && buffer[2] == 's') {\ + buffer[0] = 'f'; buffer[1] = 'a'; buffer[2] = 0; \ + uprv_strcat(buffer, buffer+3); \ + } \ + } + +#endif + +U_CAPI int32_t +uprv_convertToPosix(uint32_t hostid, char *posixID, int32_t posixIDCapacity, UErrorCode* status) +{ + uint16_t langID; + uint32_t localeIndex; + UBool bLookup = TRUE; + const char *pPosixID = NULL; + +#if U_PLATFORM_HAS_WIN32_API && UCONFIG_USE_WINDOWS_LCID_MAPPING_API + static_assert(ULOC_FULLNAME_CAPACITY > LOCALE_NAME_MAX_LENGTH, "Windows locale names have smaller length than ICU locale names."); + + char locName[LOCALE_NAME_MAX_LENGTH] = {}; + + // Note: Windows primary lang ID 0x92 in LCID is used for Central Kurdish and + // GetLocaleInfo() maps such LCID to "ku". However, CLDR uses "ku" for + // Northern Kurdish and "ckb" for Central Kurdish. For this reason, we cannot + // use the Windows API to resolve locale ID for this specific case. + if ((hostid & 0x3FF) != 0x92) { + int32_t tmpLen = 0; + char16_t windowsLocaleName[LOCALE_NAME_MAX_LENGTH] = {}; + + // Note: LOCALE_ALLOW_NEUTRAL_NAMES was enabled in Windows7+, prior versions did not handle neutral (no-region) locale names. + tmpLen = LCIDToLocaleName(hostid, (PWSTR)windowsLocaleName, UPRV_LENGTHOF(windowsLocaleName), LOCALE_ALLOW_NEUTRAL_NAMES); + if (tmpLen > 1) { + int32_t i = 0; + // Only need to look up in table if have _, eg for de-de_phoneb type alternate sort. + bLookup = FALSE; + for (i = 0; i < UPRV_LENGTHOF(locName); i++) + { + locName[i] = (char)(windowsLocaleName[i]); + + // Windows locale name may contain sorting variant, such as "es-ES_tradnl". + // In such cases, we need special mapping data found in the hardcoded table + // in this source file. + if (windowsLocaleName[i] == L'_') + { + // Keep the base locale, without variant + // TODO: Should these be mapped from _phoneb to @collation=phonebook, etc.? + locName[i] = '\0'; + tmpLen = i; + bLookup = TRUE; + break; + } + else if (windowsLocaleName[i] == L'-') + { + // Windows names use -, ICU uses _ + locName[i] = '_'; + } + else if (windowsLocaleName[i] == L'\0') + { + // No point in doing more work than necessary + break; + } + } + // TODO: Need to understand this better, why isn't it an alias? + FIX_LANGUAGE_ID_TAG(locName, tmpLen); + pPosixID = locName; + } + } +#endif + + if (bLookup) { + const char *pCandidate = NULL; + langID = LANGUAGE_LCID(hostid); + + for (localeIndex = 0; localeIndex < gLocaleCount; localeIndex++) { + if (langID == gPosixIDmap[localeIndex].regionMaps->hostID) { + pCandidate = getPosixID(&gPosixIDmap[localeIndex], hostid); + break; + } + } + + /* On Windows, when locale name has a variant, we still look up the hardcoded table. + If a match in the hardcoded table is longer than the Windows locale name without + variant, we use the one as the result */ + if (pCandidate && (pPosixID == NULL || uprv_strlen(pCandidate) > uprv_strlen(pPosixID))) { + pPosixID = pCandidate; + } + } + + if (pPosixID) { + int32_t resLen = static_cast<int32_t>(uprv_strlen(pPosixID)); + int32_t copyLen = resLen <= posixIDCapacity ? resLen : posixIDCapacity; + uprv_memcpy(posixID, pPosixID, copyLen); + if (resLen < posixIDCapacity) { + posixID[resLen] = 0; + if (*status == U_STRING_NOT_TERMINATED_WARNING) { + *status = U_ZERO_ERROR; + } + } else if (resLen == posixIDCapacity) { + *status = U_STRING_NOT_TERMINATED_WARNING; + } else { + *status = U_BUFFER_OVERFLOW_ERROR; + } + return resLen; + } + + /* no match found */ + *status = U_ILLEGAL_ARGUMENT_ERROR; + return -1; +} + +/* +////////////////////////////////////// +// +// POSIX --> LCID +// This should only be called from uloc_getLCID. +// The locale ID must be in canonical form. +// +///////////////////////////////////// +*/ +U_CAPI uint32_t +uprv_convertToLCIDPlatform(const char* localeID, UErrorCode* status) +{ + if (U_FAILURE(*status)) { + return 0; + } + + // The purpose of this function is to leverage the Windows platform name->lcid + // conversion functionality when available. +#if U_PLATFORM_HAS_WIN32_API && UCONFIG_USE_WINDOWS_LCID_MAPPING_API + int32_t len; + char baseName[ULOC_FULLNAME_CAPACITY] = {}; + const char * mylocaleID = localeID; + + // Check any for keywords. + if (uprv_strchr(localeID, '@')) + { + icu::CharString collVal; + { + icu::CharStringByteSink sink(&collVal); + ulocimp_getKeywordValue(localeID, "collation", sink, status); + } + if (U_SUCCESS(*status) && !collVal.isEmpty()) + { + // If it contains the keyword collation, return 0 so that the LCID lookup table will be used. + return 0; + } + else + { + // If the locale ID contains keywords other than collation, just use the base name. + len = uloc_getBaseName(localeID, baseName, UPRV_LENGTHOF(baseName) - 1, status); + + if (U_SUCCESS(*status) && len > 0) + { + baseName[len] = 0; + mylocaleID = baseName; + } + } + } + + char asciiBCP47Tag[LOCALE_NAME_MAX_LENGTH] = {}; + // this will change it from de_DE@collation=phonebook to de-DE-u-co-phonebk form + (void)uloc_toLanguageTag(mylocaleID, asciiBCP47Tag, UPRV_LENGTHOF(asciiBCP47Tag), FALSE, status); + + if (U_SUCCESS(*status)) + { + // Need it to be UTF-16, not 8-bit + wchar_t bcp47Tag[LOCALE_NAME_MAX_LENGTH] = {}; + int32_t i; + for (i = 0; i < UPRV_LENGTHOF(bcp47Tag); i++) + { + if (asciiBCP47Tag[i] == '\0') + { + break; + } + else + { + // Copy the character + bcp47Tag[i] = static_cast<wchar_t>(asciiBCP47Tag[i]); + } + } + + if (i < (UPRV_LENGTHOF(bcp47Tag) - 1)) + { + // Ensure it's null terminated + bcp47Tag[i] = L'\0'; + LCID lcid = LocaleNameToLCID(bcp47Tag, LOCALE_ALLOW_NEUTRAL_NAMES); + if (lcid > 0) + { + // Found LCID from windows, return that one, unless its completely ambiguous + // LOCALE_USER_DEFAULT and transients are OK because they will round trip + // for this process. + if (lcid != LOCALE_CUSTOM_UNSPECIFIED) + { + return lcid; + } + } + } + } +#else + (void) localeID; // Suppress unused variable warning. +#endif + + // Nothing found, or not implemented. + return 0; +} + +U_CAPI uint32_t +uprv_convertToLCID(const char *langID, const char* posixID, UErrorCode* status) +{ + // This function does the table lookup when native platform name->lcid conversion isn't available, + // or for locales that don't follow patterns the platform expects. + uint32_t low = 0; + uint32_t high = gLocaleCount; + uint32_t mid; + uint32_t oldmid = 0; + int32_t compVal; + + uint32_t value = 0; + uint32_t fallbackValue = (uint32_t)-1; + UErrorCode myStatus; + uint32_t idx; + + /* Check for incomplete id. */ + if (!langID || !posixID || uprv_strlen(langID) < 2 || uprv_strlen(posixID) < 2) { + return 0; + } + + /*Binary search for the map entry for normal cases */ + + while (high > low) /*binary search*/{ + + mid = (high+low) >> 1; /*Finds median*/ + + if (mid == oldmid) + break; + + compVal = uprv_strcmp(langID, gPosixIDmap[mid].regionMaps->posixID); + if (compVal < 0){ + high = mid; + } + else if (compVal > 0){ + low = mid; + } + else /*we found it*/{ + return getHostID(&gPosixIDmap[mid], posixID, status); + } + oldmid = mid; + } + + /* + * Sometimes we can't do a binary search on posixID because some LCIDs + * go to different locales. We hit one of those special cases. + */ + for (idx = 0; idx < gLocaleCount; idx++ ) { + myStatus = U_ZERO_ERROR; + value = getHostID(&gPosixIDmap[idx], posixID, &myStatus); + if (myStatus == U_ZERO_ERROR) { + return value; + } + else if (myStatus == U_USING_FALLBACK_WARNING) { + fallbackValue = value; + } + } + + if (fallbackValue != (uint32_t)-1) { + *status = U_USING_FALLBACK_WARNING; + return fallbackValue; + } + + /* no match found */ + *status = U_ILLEGAL_ARGUMENT_ERROR; + return 0; /* return international (root) */ +} diff --git a/thirdparty/icu4c/common/locmap.h b/thirdparty/icu4c/common/locmap.h new file mode 100644 index 0000000000..e669873a14 --- /dev/null +++ b/thirdparty/icu4c/common/locmap.h @@ -0,0 +1,40 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1996-2013, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* +* File locmap.h : Locale Mapping Classes +* +* +* Created by: Helena Shih +* +* Modification History: +* +* Date Name Description +* 3/11/97 aliu Added setId(). +* 4/20/99 Madhu Added T_convertToPosix() +* 09/18/00 george Removed the memory leaks. +* 08/23/01 george Convert to C +*============================================================================ +*/ + +#ifndef LOCMAP_H +#define LOCMAP_H + +#include "unicode/utypes.h" + +#define LANGUAGE_LCID(hostID) (uint16_t)(0x03FF & hostID) + +U_CAPI int32_t uprv_convertToPosix(uint32_t hostid, char* posixID, int32_t posixIDCapacity, UErrorCode* status); + +/* Don't call these functions directly. Use uloc_getLCID instead. */ +U_CAPI uint32_t uprv_convertToLCIDPlatform(const char* localeID, UErrorCode* status); // Leverage platform conversion if possible +U_CAPI uint32_t uprv_convertToLCID(const char* langID, const char* posixID, UErrorCode* status); + +#endif /* LOCMAP_H */ + diff --git a/thirdparty/icu4c/common/locresdata.cpp b/thirdparty/icu4c/common/locresdata.cpp new file mode 100644 index 0000000000..d1d9a4729f --- /dev/null +++ b/thirdparty/icu4c/common/locresdata.cpp @@ -0,0 +1,220 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 1997-2012, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: loclikely.cpp +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2010feb25 +* created by: Markus W. Scherer +* +* Code for miscellaneous locale-related resource bundle data access, +* separated out from other .cpp files +* that then do not depend on resource bundle code and this data. +*/ + +#include "unicode/utypes.h" +#include "unicode/putil.h" +#include "unicode/uloc.h" +#include "unicode/ures.h" +#include "cstring.h" +#include "ulocimp.h" +#include "uresimp.h" + +/* + * Lookup a resource bundle table item with fallback on the table level. + * Regular resource bundle lookups perform fallback to parent locale bundles + * and eventually the root bundle, but only for top-level items. + * This function takes the name of a top-level table and of an item in that table + * and performs a lookup of both, falling back until a bundle contains a table + * with this item. + * + * Note: Only the opening of entire bundles falls back through the default locale + * before root. Once a bundle is open, item lookups do not go through the + * default locale because that would result in a mix of languages that is + * unpredictable to the programmer and most likely useless. + */ +U_CAPI const UChar * U_EXPORT2 +uloc_getTableStringWithFallback(const char *path, const char *locale, + const char *tableKey, const char *subTableKey, + const char *itemKey, + int32_t *pLength, + UErrorCode *pErrorCode) +{ +/* char localeBuffer[ULOC_FULLNAME_CAPACITY*4];*/ + const UChar *item=NULL; + UErrorCode errorCode; + char explicitFallbackName[ULOC_FULLNAME_CAPACITY] = {0}; + + /* + * open the bundle for the current locale + * this falls back through the locale's chain to root + */ + errorCode=U_ZERO_ERROR; + icu::LocalUResourceBundlePointer rb(ures_open(path, locale, &errorCode)); + + if(U_FAILURE(errorCode)) { + /* total failure, not even root could be opened */ + *pErrorCode=errorCode; + return NULL; + } else if(errorCode==U_USING_DEFAULT_WARNING || + (errorCode==U_USING_FALLBACK_WARNING && *pErrorCode!=U_USING_DEFAULT_WARNING) + ) { + /* set the "strongest" error code (success->fallback->default->failure) */ + *pErrorCode=errorCode; + } + + for(;;){ + icu::StackUResourceBundle table; + icu::StackUResourceBundle subTable; + ures_getByKeyWithFallback(rb.getAlias(), tableKey, table.getAlias(), &errorCode); + + if (subTableKey != NULL) { + /* + ures_getByKeyWithFallback(table.getAlias(), subTableKey, subTable.getAlias(), &errorCode); + item = ures_getStringByKeyWithFallback(subTable.getAlias(), itemKey, pLength, &errorCode); + if(U_FAILURE(errorCode)){ + *pErrorCode = errorCode; + } + + break;*/ + + ures_getByKeyWithFallback(table.getAlias(), subTableKey, table.getAlias(), &errorCode); + } + if(U_SUCCESS(errorCode)){ + item = ures_getStringByKeyWithFallback(table.getAlias(), itemKey, pLength, &errorCode); + if(U_FAILURE(errorCode)){ + const char* replacement = NULL; + *pErrorCode = errorCode; /*save the errorCode*/ + errorCode = U_ZERO_ERROR; + /* may be a deprecated code */ + if(uprv_strcmp(tableKey, "Countries")==0){ + replacement = uloc_getCurrentCountryID(itemKey); + }else if(uprv_strcmp(tableKey, "Languages")==0){ + replacement = uloc_getCurrentLanguageID(itemKey); + } + /*pointer comparison is ok since uloc_getCurrentCountryID & uloc_getCurrentLanguageID return the key itself is replacement is not found*/ + if(replacement!=NULL && itemKey != replacement){ + item = ures_getStringByKeyWithFallback(table.getAlias(), replacement, pLength, &errorCode); + if(U_SUCCESS(errorCode)){ + *pErrorCode = errorCode; + break; + } + } + }else{ + break; + } + } + + if(U_FAILURE(errorCode)){ + + /* still can't figure out ?.. try the fallback mechanism */ + int32_t len = 0; + const UChar* fallbackLocale = NULL; + *pErrorCode = errorCode; + errorCode = U_ZERO_ERROR; + + fallbackLocale = ures_getStringByKeyWithFallback(table.getAlias(), "Fallback", &len, &errorCode); + if(U_FAILURE(errorCode)){ + *pErrorCode = errorCode; + break; + } + + u_UCharsToChars(fallbackLocale, explicitFallbackName, len); + + /* guard against recursive fallback */ + if(uprv_strcmp(explicitFallbackName, locale)==0){ + *pErrorCode = U_INTERNAL_PROGRAM_ERROR; + break; + } + rb.adoptInstead(ures_open(path, explicitFallbackName, &errorCode)); + if(U_FAILURE(errorCode)){ + *pErrorCode = errorCode; + break; + } + /* succeeded in opening the fallback bundle .. continue and try to fetch the item */ + }else{ + break; + } + } + + return item; +} + +static ULayoutType +_uloc_getOrientationHelper(const char* localeId, + const char* key, + UErrorCode *status) +{ + ULayoutType result = ULOC_LAYOUT_UNKNOWN; + + if (!U_FAILURE(*status)) { + int32_t length = 0; + char localeBuffer[ULOC_FULLNAME_CAPACITY]; + + uloc_canonicalize(localeId, localeBuffer, sizeof(localeBuffer), status); + + if (!U_FAILURE(*status)) { + const UChar* const value = + uloc_getTableStringWithFallback( + NULL, + localeBuffer, + "layout", + NULL, + key, + &length, + status); + + if (!U_FAILURE(*status) && length != 0) { + switch(value[0]) + { + case 0x0062: /* 'b' */ + result = ULOC_LAYOUT_BTT; + break; + case 0x006C: /* 'l' */ + result = ULOC_LAYOUT_LTR; + break; + case 0x0072: /* 'r' */ + result = ULOC_LAYOUT_RTL; + break; + case 0x0074: /* 't' */ + result = ULOC_LAYOUT_TTB; + break; + default: + *status = U_INTERNAL_PROGRAM_ERROR; + break; + } + } + } + } + + return result; +} + +U_CAPI ULayoutType U_EXPORT2 +uloc_getCharacterOrientation(const char* localeId, + UErrorCode *status) +{ + return _uloc_getOrientationHelper(localeId, "characters", status); +} + +/** + * Get the layout line orientation for the specified locale. + * + * @param localeID locale name + * @param status Error status + * @return an enum indicating the layout orientation for lines. + */ +U_CAPI ULayoutType U_EXPORT2 +uloc_getLineOrientation(const char* localeId, + UErrorCode *status) +{ + return _uloc_getOrientationHelper(localeId, "lines", status); +} diff --git a/thirdparty/icu4c/common/locutil.cpp b/thirdparty/icu4c/common/locutil.cpp new file mode 100644 index 0000000000..3d9d69ff7e --- /dev/null +++ b/thirdparty/icu4c/common/locutil.cpp @@ -0,0 +1,275 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* + ******************************************************************************* + * Copyright (C) 2002-2014, International Business Machines Corporation and + * others. All Rights Reserved. + ******************************************************************************* + */ +#include "unicode/utypes.h" + +#if !UCONFIG_NO_SERVICE || !UCONFIG_NO_TRANSLITERATION + +#include "unicode/resbund.h" +#include "unicode/uenum.h" +#include "cmemory.h" +#include "ustrfmt.h" +#include "locutil.h" +#include "charstr.h" +#include "ucln_cmn.h" +#include "uassert.h" +#include "umutex.h" + +// see LocaleUtility::getAvailableLocaleNames +static icu::UInitOnce LocaleUtilityInitOnce = U_INITONCE_INITIALIZER; +static icu::Hashtable * LocaleUtility_cache = NULL; + +#define UNDERSCORE_CHAR ((UChar)0x005f) +#define AT_SIGN_CHAR ((UChar)64) +#define PERIOD_CHAR ((UChar)46) + +/* + ****************************************************************** + */ + +/** + * Release all static memory held by Locale Utility. + */ +U_CDECL_BEGIN +static UBool U_CALLCONV service_cleanup(void) { + if (LocaleUtility_cache) { + delete LocaleUtility_cache; + LocaleUtility_cache = NULL; + } + return TRUE; +} + + +static void U_CALLCONV locale_utility_init(UErrorCode &status) { + using namespace icu; + U_ASSERT(LocaleUtility_cache == NULL); + ucln_common_registerCleanup(UCLN_COMMON_SERVICE, service_cleanup); + LocaleUtility_cache = new Hashtable(status); + if (U_FAILURE(status)) { + delete LocaleUtility_cache; + LocaleUtility_cache = NULL; + return; + } + if (LocaleUtility_cache == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + LocaleUtility_cache->setValueDeleter(uhash_deleteHashtable); +} + +U_CDECL_END + +U_NAMESPACE_BEGIN + +UnicodeString& +LocaleUtility::canonicalLocaleString(const UnicodeString* id, UnicodeString& result) +{ + if (id == NULL) { + result.setToBogus(); + } else { + // Fix case only (no other changes) up to the first '@' or '.' or + // end of string, whichever comes first. In 3.0 I changed this to + // stop at first '@' or '.'. It used to run out to the end of + // string. My fix makes the tests pass but is probably + // structurally incorrect. See below. [alan 3.0] + + // TODO: Doug, you might want to revise this... + result = *id; + int32_t i = 0; + int32_t end = result.indexOf(AT_SIGN_CHAR); + int32_t n = result.indexOf(PERIOD_CHAR); + if (n >= 0 && n < end) { + end = n; + } + if (end < 0) { + end = result.length(); + } + n = result.indexOf(UNDERSCORE_CHAR); + if (n < 0) { + n = end; + } + for (; i < n; ++i) { + UChar c = result.charAt(i); + if (c >= 0x0041 && c <= 0x005a) { + c += 0x20; + result.setCharAt(i, c); + } + } + for (n = end; i < n; ++i) { + UChar c = result.charAt(i); + if (c >= 0x0061 && c <= 0x007a) { + c -= 0x20; + result.setCharAt(i, c); + } + } + } + return result; + +#if 0 + // This code does a proper full level 2 canonicalization of id. + // It's nasty to go from UChar to char to char to UChar -- but + // that's what you have to do to use the uloc_canonicalize + // function on UnicodeStrings. + + // I ended up doing the alternate fix (see above) not for + // performance reasons, although performance will certainly be + // better, but because doing a full level 2 canonicalization + // causes some tests to fail. [alan 3.0] + + // TODO: Doug, you might want to revisit this... + result.setToBogus(); + if (id != 0) { + int32_t buflen = id->length() + 8; // space for NUL + char* buf = (char*) uprv_malloc(buflen); + char* canon = (buf == 0) ? 0 : (char*) uprv_malloc(buflen); + if (buf != 0 && canon != 0) { + U_ASSERT(id->extract(0, INT32_MAX, buf, buflen) < buflen); + UErrorCode ec = U_ZERO_ERROR; + uloc_canonicalize(buf, canon, buflen, &ec); + if (U_SUCCESS(ec)) { + result = UnicodeString(canon); + } + } + uprv_free(buf); + uprv_free(canon); + } + return result; +#endif +} + +Locale& +LocaleUtility::initLocaleFromName(const UnicodeString& id, Locale& result) +{ + enum { BUFLEN = 128 }; // larger than ever needed + + if (id.isBogus() || id.length() >= BUFLEN) { + result.setToBogus(); + } else { + /* + * We need to convert from a UnicodeString to char * in order to + * create a Locale. + * + * Problem: Locale ID strings may contain '@' which is a variant + * character and cannot be handled by invariant-character conversion. + * + * Hack: Since ICU code can handle locale IDs with multiple encodings + * of '@' (at least for EBCDIC; it's not known to be a problem for + * ASCII-based systems), + * we use regular invariant-character conversion for everything else + * and manually convert U+0040 into a compiler-char-constant '@'. + * While this compilation-time constant may not match the runtime + * encoding of '@', it should be one of the encodings which ICU + * recognizes. + * + * There should be only at most one '@' in a locale ID. + */ + char buffer[BUFLEN]; + int32_t prev, i; + prev = 0; + for(;;) { + i = id.indexOf((UChar)0x40, prev); + if(i < 0) { + // no @ between prev and the rest of the string + id.extract(prev, INT32_MAX, buffer + prev, BUFLEN - prev, US_INV); + break; // done + } else { + // normal invariant-character conversion for text between @s + id.extract(prev, i - prev, buffer + prev, BUFLEN - prev, US_INV); + // manually "convert" U+0040 at id[i] into '@' at buffer[i] + buffer[i] = '@'; + prev = i + 1; + } + } + result = Locale::createFromName(buffer); + } + return result; +} + +UnicodeString& +LocaleUtility::initNameFromLocale(const Locale& locale, UnicodeString& result) +{ + if (locale.isBogus()) { + result.setToBogus(); + } else { + result.append(UnicodeString(locale.getName(), -1, US_INV)); + } + return result; +} + +const Hashtable* +LocaleUtility::getAvailableLocaleNames(const UnicodeString& bundleID) +{ + // LocaleUtility_cache is a hash-of-hashes. The top-level keys + // are path strings ('bundleID') passed to + // ures_openAvailableLocales. The top-level values are + // second-level hashes. The second-level keys are result strings + // from ures_openAvailableLocales. The second-level values are + // garbage ((void*)1 or other random pointer). + + UErrorCode status = U_ZERO_ERROR; + umtx_initOnce(LocaleUtilityInitOnce, locale_utility_init, status); + Hashtable *cache = LocaleUtility_cache; + if (cache == NULL) { + // Catastrophic failure. + return NULL; + } + + Hashtable* htp; + umtx_lock(NULL); + htp = (Hashtable*) cache->get(bundleID); + umtx_unlock(NULL); + + if (htp == NULL) { + htp = new Hashtable(status); + if (htp && U_SUCCESS(status)) { + CharString cbundleID; + cbundleID.appendInvariantChars(bundleID, status); + const char* path = cbundleID.isEmpty() ? NULL : cbundleID.data(); + icu::LocalUEnumerationPointer uenum(ures_openAvailableLocales(path, &status)); + for (;;) { + const UChar* id = uenum_unext(uenum.getAlias(), NULL, &status); + if (id == NULL) { + break; + } + htp->put(UnicodeString(id), (void*)htp, status); + } + if (U_FAILURE(status)) { + delete htp; + return NULL; + } + umtx_lock(NULL); + Hashtable *t = static_cast<Hashtable *>(cache->get(bundleID)); + if (t != NULL) { + // Another thread raced through this code, creating the cache entry first. + // Discard ours and return theirs. + umtx_unlock(NULL); + delete htp; + htp = t; + } else { + cache->put(bundleID, (void*)htp, status); + umtx_unlock(NULL); + } + } + } + return htp; +} + +UBool +LocaleUtility::isFallbackOf(const UnicodeString& root, const UnicodeString& child) +{ + return child.indexOf(root) == 0 && + (child.length() == root.length() || + child.charAt(root.length()) == UNDERSCORE_CHAR); +} + +U_NAMESPACE_END + +/* !UCONFIG_NO_SERVICE */ +#endif + + diff --git a/thirdparty/icu4c/common/locutil.h b/thirdparty/icu4c/common/locutil.h new file mode 100644 index 0000000000..31bfffd7a5 --- /dev/null +++ b/thirdparty/icu4c/common/locutil.h @@ -0,0 +1,39 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/** + ******************************************************************************* + * Copyright (C) 2002-2005, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + * + ******************************************************************************* + */ +#ifndef LOCUTIL_H +#define LOCUTIL_H + +#include "unicode/utypes.h" +#include "hash.h" + +#if !UCONFIG_NO_SERVICE || !UCONFIG_NO_TRANSLITERATION + + +U_NAMESPACE_BEGIN + +// temporary utility functions, till I know where to find them +// in header so tests can also access them + +class U_COMMON_API LocaleUtility { +public: + static UnicodeString& canonicalLocaleString(const UnicodeString* id, UnicodeString& result); + static Locale& initLocaleFromName(const UnicodeString& id, Locale& result); + static UnicodeString& initNameFromLocale(const Locale& locale, UnicodeString& result); + static const Hashtable* getAvailableLocaleNames(const UnicodeString& bundleID); + static UBool isFallbackOf(const UnicodeString& root, const UnicodeString& child); +}; + +U_NAMESPACE_END + + +#endif + +#endif diff --git a/thirdparty/icu4c/common/lsr.cpp b/thirdparty/icu4c/common/lsr.cpp new file mode 100644 index 0000000000..b81808f2c4 --- /dev/null +++ b/thirdparty/icu4c/common/lsr.cpp @@ -0,0 +1,114 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// lsr.cpp +// created: 2019may08 Markus W. Scherer + +#include "unicode/utypes.h" +#include "charstr.h" +#include "cmemory.h" +#include "cstring.h" +#include "lsr.h" +#include "uinvchar.h" +#include "ustr_imp.h" + +U_NAMESPACE_BEGIN + +LSR::LSR(char prefix, const char *lang, const char *scr, const char *r, int32_t f, + UErrorCode &errorCode) : + language(nullptr), script(nullptr), region(r), + regionIndex(indexForRegion(region)), flags(f) { + if (U_SUCCESS(errorCode)) { + CharString langScript; + langScript.append(prefix, errorCode).append(lang, errorCode).append('\0', errorCode); + int32_t scriptOffset = langScript.length(); + langScript.append(prefix, errorCode).append(scr, errorCode); + owned = langScript.cloneData(errorCode); + if (U_SUCCESS(errorCode)) { + language = owned; + script = owned + scriptOffset; + } + } +} + +LSR::LSR(LSR &&other) U_NOEXCEPT : + language(other.language), script(other.script), region(other.region), owned(other.owned), + regionIndex(other.regionIndex), flags(other.flags), + hashCode(other.hashCode) { + if (owned != nullptr) { + other.language = other.script = ""; + other.owned = nullptr; + other.hashCode = 0; + } +} + +void LSR::deleteOwned() { + uprv_free(owned); +} + +LSR &LSR::operator=(LSR &&other) U_NOEXCEPT { + this->~LSR(); + language = other.language; + script = other.script; + region = other.region; + regionIndex = other.regionIndex; + flags = other.flags; + owned = other.owned; + hashCode = other.hashCode; + if (owned != nullptr) { + other.language = other.script = ""; + other.owned = nullptr; + other.hashCode = 0; + } + return *this; +} + +UBool LSR::isEquivalentTo(const LSR &other) const { + return + uprv_strcmp(language, other.language) == 0 && + uprv_strcmp(script, other.script) == 0 && + regionIndex == other.regionIndex && + // Compare regions if both are ill-formed (and their indexes are 0). + (regionIndex > 0 || uprv_strcmp(region, other.region) == 0); +} + +UBool LSR::operator==(const LSR &other) const { + return + uprv_strcmp(language, other.language) == 0 && + uprv_strcmp(script, other.script) == 0 && + regionIndex == other.regionIndex && + // Compare regions if both are ill-formed (and their indexes are 0). + (regionIndex > 0 || uprv_strcmp(region, other.region) == 0) && + flags == other.flags; +} + +int32_t LSR::indexForRegion(const char *region) { + int32_t c = region[0]; + int32_t a = c - '0'; + if (0 <= a && a <= 9) { // digits: "419" + int32_t b = region[1] - '0'; + if (b < 0 || 9 < b) { return 0; } + c = region[2] - '0'; + if (c < 0 || 9 < c || region[3] != 0) { return 0; } + return (10 * a + b) * 10 + c + 1; + } else { // letters: "DE" + a = uprv_upperOrdinal(c); + if (a < 0 || 25 < a) { return 0; } + int32_t b = uprv_upperOrdinal(region[1]); + if (b < 0 || 25 < b || region[2] != 0) { return 0; } + return 26 * a + b + 1001; + } + return 0; +} + +LSR &LSR::setHashCode() { + if (hashCode == 0) { + uint32_t h = ustr_hashCharsN(language, static_cast<int32_t>(uprv_strlen(language))); + h = h * 37 + ustr_hashCharsN(script, static_cast<int32_t>(uprv_strlen(script))); + h = h * 37 + regionIndex; + hashCode = h * 37 + flags; + } + return *this; +} + +U_NAMESPACE_END diff --git a/thirdparty/icu4c/common/lsr.h b/thirdparty/icu4c/common/lsr.h new file mode 100644 index 0000000000..a33f855245 --- /dev/null +++ b/thirdparty/icu4c/common/lsr.h @@ -0,0 +1,82 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// lsr.h +// created: 2019may08 Markus W. Scherer + +#ifndef __LSR_H__ +#define __LSR_H__ + +#include "unicode/utypes.h" +#include "unicode/uobject.h" +#include "cstring.h" + +U_NAMESPACE_BEGIN + +struct LSR final : public UMemory { + static constexpr int32_t REGION_INDEX_LIMIT = 1001 + 26 * 26; + + static constexpr int32_t EXPLICIT_LSR = 7; + static constexpr int32_t EXPLICIT_LANGUAGE = 4; + static constexpr int32_t EXPLICIT_SCRIPT = 2; + static constexpr int32_t EXPLICIT_REGION = 1; + static constexpr int32_t IMPLICIT_LSR = 0; + static constexpr int32_t DONT_CARE_FLAGS = 0; + + const char *language; + const char *script; + const char *region; + char *owned = nullptr; + /** Index for region, 0 if ill-formed. @see indexForRegion */ + int32_t regionIndex = 0; + int32_t flags = 0; + /** Only set for LSRs that will be used in a hash table. */ + int32_t hashCode = 0; + + LSR() : language("und"), script(""), region("") {} + + /** Constructor which aliases all subtag pointers. */ + LSR(const char *lang, const char *scr, const char *r, int32_t f) : + language(lang), script(scr), region(r), + regionIndex(indexForRegion(region)), flags(f) {} + /** + * Constructor which prepends the prefix to the language and script, + * copies those into owned memory, and aliases the region. + */ + LSR(char prefix, const char *lang, const char *scr, const char *r, int32_t f, + UErrorCode &errorCode); + LSR(LSR &&other) U_NOEXCEPT; + LSR(const LSR &other) = delete; + inline ~LSR() { + // Pure inline code for almost all instances. + if (owned != nullptr) { + deleteOwned(); + } + } + + LSR &operator=(LSR &&other) U_NOEXCEPT; + LSR &operator=(const LSR &other) = delete; + + /** + * Returns a positive index (>0) for a well-formed region code. + * Do not rely on a particular region->index mapping; it may change. + * Returns 0 for ill-formed strings. + */ + static int32_t indexForRegion(const char *region); + + UBool isEquivalentTo(const LSR &other) const; + UBool operator==(const LSR &other) const; + + inline UBool operator!=(const LSR &other) const { + return !operator==(other); + } + + LSR &setHashCode(); + +private: + void deleteOwned(); +}; + +U_NAMESPACE_END + +#endif // __LSR_H__ diff --git a/thirdparty/icu4c/common/messageimpl.h b/thirdparty/icu4c/common/messageimpl.h new file mode 100644 index 0000000000..a56479066b --- /dev/null +++ b/thirdparty/icu4c/common/messageimpl.h @@ -0,0 +1,65 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2011, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* file name: messageimpl.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2011apr04 +* created by: Markus W. Scherer +*/ + +#ifndef __MESSAGEIMPL_H__ +#define __MESSAGEIMPL_H__ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/messagepattern.h" + +U_NAMESPACE_BEGIN + +/** + * Helper functions for use of MessagePattern. + * In Java, these are package-private methods in MessagePattern itself. + * In C++, they are declared here and implemented in messagepattern.cpp. + */ +class U_COMMON_API MessageImpl { +public: + /** + * @return true if getApostropheMode()==UMSGPAT_APOS_DOUBLE_REQUIRED + */ + static UBool jdkAposMode(const MessagePattern &msgPattern) { + return msgPattern.getApostropheMode()==UMSGPAT_APOS_DOUBLE_REQUIRED; + } + + /** + * Appends the s[start, limit[ substring to sb, but with only half of the apostrophes + * according to JDK pattern behavior. + */ + static void appendReducedApostrophes(const UnicodeString &s, int32_t start, int32_t limit, + UnicodeString &sb); + + /** + * Appends the sub-message to the result string. + * Omits SKIP_SYNTAX and appends whole arguments using appendReducedApostrophes(). + */ + static UnicodeString &appendSubMessageWithoutSkipSyntax(const MessagePattern &msgPattern, + int32_t msgStart, + UnicodeString &result); + +private: + MessageImpl(); // no constructor: all static methods +}; + +U_NAMESPACE_END + +#endif // !UCONFIG_NO_FORMATTING + +#endif // __MESSAGEIMPL_H__ diff --git a/thirdparty/icu4c/common/messagepattern.cpp b/thirdparty/icu4c/common/messagepattern.cpp new file mode 100644 index 0000000000..f223d06711 --- /dev/null +++ b/thirdparty/icu4c/common/messagepattern.cpp @@ -0,0 +1,1233 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2011-2012, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* file name: messagepattern.cpp +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2011mar14 +* created by: Markus W. Scherer +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/messagepattern.h" +#include "unicode/unistr.h" +#include "unicode/utf16.h" +#include "cmemory.h" +#include "cstring.h" +#include "messageimpl.h" +#include "patternprops.h" +#include "putilimp.h" +#include "uassert.h" + +U_NAMESPACE_BEGIN + +// Unicode character/code point constants ---------------------------------- *** + +static const UChar u_pound=0x23; +static const UChar u_apos=0x27; +static const UChar u_plus=0x2B; +static const UChar u_comma=0x2C; +static const UChar u_minus=0x2D; +static const UChar u_dot=0x2E; +static const UChar u_colon=0x3A; +static const UChar u_lessThan=0x3C; +static const UChar u_equal=0x3D; +static const UChar u_A=0x41; +static const UChar u_C=0x43; +static const UChar u_D=0x44; +static const UChar u_E=0x45; +static const UChar u_H=0x48; +static const UChar u_I=0x49; +static const UChar u_L=0x4C; +static const UChar u_N=0x4E; +static const UChar u_O=0x4F; +static const UChar u_P=0x50; +static const UChar u_R=0x52; +static const UChar u_S=0x53; +static const UChar u_T=0x54; +static const UChar u_U=0x55; +static const UChar u_Z=0x5A; +static const UChar u_a=0x61; +static const UChar u_c=0x63; +static const UChar u_d=0x64; +static const UChar u_e=0x65; +static const UChar u_f=0x66; +static const UChar u_h=0x68; +static const UChar u_i=0x69; +static const UChar u_l=0x6C; +static const UChar u_n=0x6E; +static const UChar u_o=0x6F; +static const UChar u_p=0x70; +static const UChar u_r=0x72; +static const UChar u_s=0x73; +static const UChar u_t=0x74; +static const UChar u_u=0x75; +static const UChar u_z=0x7A; +static const UChar u_leftCurlyBrace=0x7B; +static const UChar u_pipe=0x7C; +static const UChar u_rightCurlyBrace=0x7D; +static const UChar u_lessOrEqual=0x2264; // U+2264 is <= + +static const UChar kOffsetColon[]={ // "offset:" + u_o, u_f, u_f, u_s, u_e, u_t, u_colon +}; + +static const UChar kOther[]={ // "other" + u_o, u_t, u_h, u_e, u_r +}; + +// MessagePatternList ------------------------------------------------------ *** + +template<typename T, int32_t stackCapacity> +class MessagePatternList : public UMemory { +public: + MessagePatternList() {} + void copyFrom(const MessagePatternList<T, stackCapacity> &other, + int32_t length, + UErrorCode &errorCode); + UBool ensureCapacityForOneMore(int32_t oldLength, UErrorCode &errorCode); + UBool equals(const MessagePatternList<T, stackCapacity> &other, int32_t length) const { + for(int32_t i=0; i<length; ++i) { + if(a[i]!=other.a[i]) { return FALSE; } + } + return TRUE; + } + + MaybeStackArray<T, stackCapacity> a; +}; + +template<typename T, int32_t stackCapacity> +void +MessagePatternList<T, stackCapacity>::copyFrom( + const MessagePatternList<T, stackCapacity> &other, + int32_t length, + UErrorCode &errorCode) { + if(U_SUCCESS(errorCode) && length>0) { + if(length>a.getCapacity() && NULL==a.resize(length)) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + return; + } + uprv_memcpy(a.getAlias(), other.a.getAlias(), (size_t)length*sizeof(T)); + } +} + +template<typename T, int32_t stackCapacity> +UBool +MessagePatternList<T, stackCapacity>::ensureCapacityForOneMore(int32_t oldLength, UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { + return FALSE; + } + if(a.getCapacity()>oldLength || a.resize(2*oldLength, oldLength)!=NULL) { + return TRUE; + } + errorCode=U_MEMORY_ALLOCATION_ERROR; + return FALSE; +} + +// MessagePatternList specializations -------------------------------------- *** + +class MessagePatternDoubleList : public MessagePatternList<double, 8> { +}; + +class MessagePatternPartsList : public MessagePatternList<MessagePattern::Part, 32> { +}; + +// MessagePattern constructors etc. ---------------------------------------- *** + +MessagePattern::MessagePattern(UErrorCode &errorCode) + : aposMode(UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE), + partsList(NULL), parts(NULL), partsLength(0), + numericValuesList(NULL), numericValues(NULL), numericValuesLength(0), + hasArgNames(FALSE), hasArgNumbers(FALSE), needsAutoQuoting(FALSE) { + init(errorCode); +} + +MessagePattern::MessagePattern(UMessagePatternApostropheMode mode, UErrorCode &errorCode) + : aposMode(mode), + partsList(NULL), parts(NULL), partsLength(0), + numericValuesList(NULL), numericValues(NULL), numericValuesLength(0), + hasArgNames(FALSE), hasArgNumbers(FALSE), needsAutoQuoting(FALSE) { + init(errorCode); +} + +MessagePattern::MessagePattern(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode) + : aposMode(UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE), + partsList(NULL), parts(NULL), partsLength(0), + numericValuesList(NULL), numericValues(NULL), numericValuesLength(0), + hasArgNames(FALSE), hasArgNumbers(FALSE), needsAutoQuoting(FALSE) { + if(init(errorCode)) { + parse(pattern, parseError, errorCode); + } +} + +UBool +MessagePattern::init(UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { + return FALSE; + } + partsList=new MessagePatternPartsList(); + if(partsList==NULL) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + return FALSE; + } + parts=partsList->a.getAlias(); + return TRUE; +} + +MessagePattern::MessagePattern(const MessagePattern &other) + : UObject(other), aposMode(other.aposMode), msg(other.msg), + partsList(NULL), parts(NULL), partsLength(0), + numericValuesList(NULL), numericValues(NULL), numericValuesLength(0), + hasArgNames(other.hasArgNames), hasArgNumbers(other.hasArgNumbers), + needsAutoQuoting(other.needsAutoQuoting) { + UErrorCode errorCode=U_ZERO_ERROR; + if(!copyStorage(other, errorCode)) { + clear(); + } +} + +MessagePattern & +MessagePattern::operator=(const MessagePattern &other) { + if(this==&other) { + return *this; + } + aposMode=other.aposMode; + msg=other.msg; + hasArgNames=other.hasArgNames; + hasArgNumbers=other.hasArgNumbers; + needsAutoQuoting=other.needsAutoQuoting; + UErrorCode errorCode=U_ZERO_ERROR; + if(!copyStorage(other, errorCode)) { + clear(); + } + return *this; +} + +UBool +MessagePattern::copyStorage(const MessagePattern &other, UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { + return FALSE; + } + parts=NULL; + partsLength=0; + numericValues=NULL; + numericValuesLength=0; + if(partsList==NULL) { + partsList=new MessagePatternPartsList(); + if(partsList==NULL) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + return FALSE; + } + parts=partsList->a.getAlias(); + } + if(other.partsLength>0) { + partsList->copyFrom(*other.partsList, other.partsLength, errorCode); + if(U_FAILURE(errorCode)) { + return FALSE; + } + parts=partsList->a.getAlias(); + partsLength=other.partsLength; + } + if(other.numericValuesLength>0) { + if(numericValuesList==NULL) { + numericValuesList=new MessagePatternDoubleList(); + if(numericValuesList==NULL) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + return FALSE; + } + numericValues=numericValuesList->a.getAlias(); + } + numericValuesList->copyFrom( + *other.numericValuesList, other.numericValuesLength, errorCode); + if(U_FAILURE(errorCode)) { + return FALSE; + } + numericValues=numericValuesList->a.getAlias(); + numericValuesLength=other.numericValuesLength; + } + return TRUE; +} + +MessagePattern::~MessagePattern() { + delete partsList; + delete numericValuesList; +} + +// MessagePattern API ------------------------------------------------------ *** + +MessagePattern & +MessagePattern::parse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode) { + preParse(pattern, parseError, errorCode); + parseMessage(0, 0, 0, UMSGPAT_ARG_TYPE_NONE, parseError, errorCode); + postParse(); + return *this; +} + +MessagePattern & +MessagePattern::parseChoiceStyle(const UnicodeString &pattern, + UParseError *parseError, UErrorCode &errorCode) { + preParse(pattern, parseError, errorCode); + parseChoiceStyle(0, 0, parseError, errorCode); + postParse(); + return *this; +} + +MessagePattern & +MessagePattern::parsePluralStyle(const UnicodeString &pattern, + UParseError *parseError, UErrorCode &errorCode) { + preParse(pattern, parseError, errorCode); + parsePluralOrSelectStyle(UMSGPAT_ARG_TYPE_PLURAL, 0, 0, parseError, errorCode); + postParse(); + return *this; +} + +MessagePattern & +MessagePattern::parseSelectStyle(const UnicodeString &pattern, + UParseError *parseError, UErrorCode &errorCode) { + preParse(pattern, parseError, errorCode); + parsePluralOrSelectStyle(UMSGPAT_ARG_TYPE_SELECT, 0, 0, parseError, errorCode); + postParse(); + return *this; +} + +void +MessagePattern::clear() { + // Mostly the same as preParse(). + msg.remove(); + hasArgNames=hasArgNumbers=FALSE; + needsAutoQuoting=FALSE; + partsLength=0; + numericValuesLength=0; +} + +UBool +MessagePattern::operator==(const MessagePattern &other) const { + if(this==&other) { + return TRUE; + } + return + aposMode==other.aposMode && + msg==other.msg && + // parts.equals(o.parts) + partsLength==other.partsLength && + (partsLength==0 || partsList->equals(*other.partsList, partsLength)); + // No need to compare numericValues if msg and parts are the same. +} + +int32_t +MessagePattern::hashCode() const { + int32_t hash=(aposMode*37+msg.hashCode())*37+partsLength; + for(int32_t i=0; i<partsLength; ++i) { + hash=hash*37+parts[i].hashCode(); + } + return hash; +} + +int32_t +MessagePattern::validateArgumentName(const UnicodeString &name) { + if(!PatternProps::isIdentifier(name.getBuffer(), name.length())) { + return UMSGPAT_ARG_NAME_NOT_VALID; + } + return parseArgNumber(name, 0, name.length()); +} + +UnicodeString +MessagePattern::autoQuoteApostropheDeep() const { + if(!needsAutoQuoting) { + return msg; + } + UnicodeString modified(msg); + // Iterate backward so that the insertion indexes do not change. + int32_t count=countParts(); + for(int32_t i=count; i>0;) { + const Part &part=getPart(--i); + if(part.getType()==UMSGPAT_PART_TYPE_INSERT_CHAR) { + modified.insert(part.index, (UChar)part.value); + } + } + return modified; +} + +double +MessagePattern::getNumericValue(const Part &part) const { + UMessagePatternPartType type=part.type; + if(type==UMSGPAT_PART_TYPE_ARG_INT) { + return part.value; + } else if(type==UMSGPAT_PART_TYPE_ARG_DOUBLE) { + return numericValues[part.value]; + } else { + return UMSGPAT_NO_NUMERIC_VALUE; + } +} + +/** + * Returns the "offset:" value of a PluralFormat argument, or 0 if none is specified. + * @param pluralStart the index of the first PluralFormat argument style part. (0..countParts()-1) + * @return the "offset:" value. + * @draft ICU 4.8 + */ +double +MessagePattern::getPluralOffset(int32_t pluralStart) const { + const Part &part=getPart(pluralStart); + if(Part::hasNumericValue(part.type)) { + return getNumericValue(part); + } else { + return 0; + } +} + +// MessagePattern::Part ---------------------------------------------------- *** + +UBool +MessagePattern::Part::operator==(const Part &other) const { + if(this==&other) { + return TRUE; + } + return + type==other.type && + index==other.index && + length==other.length && + value==other.value && + limitPartIndex==other.limitPartIndex; +} + +// MessagePattern parser --------------------------------------------------- *** + +void +MessagePattern::preParse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { + return; + } + if(parseError!=NULL) { + parseError->line=0; + parseError->offset=0; + parseError->preContext[0]=0; + parseError->postContext[0]=0; + } + msg=pattern; + hasArgNames=hasArgNumbers=FALSE; + needsAutoQuoting=FALSE; + partsLength=0; + numericValuesLength=0; +} + +void +MessagePattern::postParse() { + if(partsList!=NULL) { + parts=partsList->a.getAlias(); + } + if(numericValuesList!=NULL) { + numericValues=numericValuesList->a.getAlias(); + } +} + +int32_t +MessagePattern::parseMessage(int32_t index, int32_t msgStartLength, + int32_t nestingLevel, UMessagePatternArgType parentType, + UParseError *parseError, UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { + return 0; + } + if(nestingLevel>Part::MAX_VALUE) { + errorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + int32_t msgStart=partsLength; + addPart(UMSGPAT_PART_TYPE_MSG_START, index, msgStartLength, nestingLevel, errorCode); + index+=msgStartLength; + for(;;) { // while(index<msg.length()) with U_FAILURE(errorCode) check + if(U_FAILURE(errorCode)) { + return 0; + } + if(index>=msg.length()) { + break; + } + UChar c=msg.charAt(index++); + if(c==u_apos) { + if(index==msg.length()) { + // The apostrophe is the last character in the pattern. + // Add a Part for auto-quoting. + addPart(UMSGPAT_PART_TYPE_INSERT_CHAR, index, 0, + u_apos, errorCode); // value=char to be inserted + needsAutoQuoting=TRUE; + } else { + c=msg.charAt(index); + if(c==u_apos) { + // double apostrophe, skip the second one + addPart(UMSGPAT_PART_TYPE_SKIP_SYNTAX, index++, 1, 0, errorCode); + } else if( + aposMode==UMSGPAT_APOS_DOUBLE_REQUIRED || + c==u_leftCurlyBrace || c==u_rightCurlyBrace || + (parentType==UMSGPAT_ARG_TYPE_CHOICE && c==u_pipe) || + (UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(parentType) && c==u_pound) + ) { + // skip the quote-starting apostrophe + addPart(UMSGPAT_PART_TYPE_SKIP_SYNTAX, index-1, 1, 0, errorCode); + // find the end of the quoted literal text + for(;;) { + index=msg.indexOf(u_apos, index+1); + if(index>=0) { + if(/*(index+1)<msg.length() &&*/ msg.charAt(index+1)==u_apos) { + // double apostrophe inside quoted literal text + // still encodes a single apostrophe, skip the second one + addPart(UMSGPAT_PART_TYPE_SKIP_SYNTAX, ++index, 1, 0, errorCode); + } else { + // skip the quote-ending apostrophe + addPart(UMSGPAT_PART_TYPE_SKIP_SYNTAX, index++, 1, 0, errorCode); + break; + } + } else { + // The quoted text reaches to the end of the of the message. + index=msg.length(); + // Add a Part for auto-quoting. + addPart(UMSGPAT_PART_TYPE_INSERT_CHAR, index, 0, + u_apos, errorCode); // value=char to be inserted + needsAutoQuoting=TRUE; + break; + } + } + } else { + // Interpret the apostrophe as literal text. + // Add a Part for auto-quoting. + addPart(UMSGPAT_PART_TYPE_INSERT_CHAR, index, 0, + u_apos, errorCode); // value=char to be inserted + needsAutoQuoting=TRUE; + } + } + } else if(UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(parentType) && c==u_pound) { + // The unquoted # in a plural message fragment will be replaced + // with the (number-offset). + addPart(UMSGPAT_PART_TYPE_REPLACE_NUMBER, index-1, 1, 0, errorCode); + } else if(c==u_leftCurlyBrace) { + index=parseArg(index-1, 1, nestingLevel, parseError, errorCode); + } else if((nestingLevel>0 && c==u_rightCurlyBrace) || + (parentType==UMSGPAT_ARG_TYPE_CHOICE && c==u_pipe)) { + // Finish the message before the terminator. + // In a choice style, report the "}" substring only for the following ARG_LIMIT, + // not for this MSG_LIMIT. + int32_t limitLength=(parentType==UMSGPAT_ARG_TYPE_CHOICE && c==u_rightCurlyBrace) ? 0 : 1; + addLimitPart(msgStart, UMSGPAT_PART_TYPE_MSG_LIMIT, index-1, limitLength, + nestingLevel, errorCode); + if(parentType==UMSGPAT_ARG_TYPE_CHOICE) { + // Let the choice style parser see the '}' or '|'. + return index-1; + } else { + // continue parsing after the '}' + return index; + } + } // else: c is part of literal text + } + if(nestingLevel>0 && !inTopLevelChoiceMessage(nestingLevel, parentType)) { + setParseError(parseError, 0); // Unmatched '{' braces in message. + errorCode=U_UNMATCHED_BRACES; + return 0; + } + addLimitPart(msgStart, UMSGPAT_PART_TYPE_MSG_LIMIT, index, 0, nestingLevel, errorCode); + return index; +} + +int32_t +MessagePattern::parseArg(int32_t index, int32_t argStartLength, int32_t nestingLevel, + UParseError *parseError, UErrorCode &errorCode) { + int32_t argStart=partsLength; + UMessagePatternArgType argType=UMSGPAT_ARG_TYPE_NONE; + addPart(UMSGPAT_PART_TYPE_ARG_START, index, argStartLength, argType, errorCode); + if(U_FAILURE(errorCode)) { + return 0; + } + int32_t nameIndex=index=skipWhiteSpace(index+argStartLength); + if(index==msg.length()) { + setParseError(parseError, 0); // Unmatched '{' braces in message. + errorCode=U_UNMATCHED_BRACES; + return 0; + } + // parse argument name or number + index=skipIdentifier(index); + int32_t number=parseArgNumber(nameIndex, index); + if(number>=0) { + int32_t length=index-nameIndex; + if(length>Part::MAX_LENGTH || number>Part::MAX_VALUE) { + setParseError(parseError, nameIndex); // Argument number too large. + errorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + hasArgNumbers=TRUE; + addPart(UMSGPAT_PART_TYPE_ARG_NUMBER, nameIndex, length, number, errorCode); + } else if(number==UMSGPAT_ARG_NAME_NOT_NUMBER) { + int32_t length=index-nameIndex; + if(length>Part::MAX_LENGTH) { + setParseError(parseError, nameIndex); // Argument name too long. + errorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + hasArgNames=TRUE; + addPart(UMSGPAT_PART_TYPE_ARG_NAME, nameIndex, length, 0, errorCode); + } else { // number<-1 (ARG_NAME_NOT_VALID) + setParseError(parseError, nameIndex); // Bad argument syntax. + errorCode=U_PATTERN_SYNTAX_ERROR; + return 0; + } + index=skipWhiteSpace(index); + if(index==msg.length()) { + setParseError(parseError, 0); // Unmatched '{' braces in message. + errorCode=U_UNMATCHED_BRACES; + return 0; + } + UChar c=msg.charAt(index); + if(c==u_rightCurlyBrace) { + // all done + } else if(c!=u_comma) { + setParseError(parseError, nameIndex); // Bad argument syntax. + errorCode=U_PATTERN_SYNTAX_ERROR; + return 0; + } else /* ',' */ { + // parse argument type: case-sensitive a-zA-Z + int32_t typeIndex=index=skipWhiteSpace(index+1); + while(index<msg.length() && isArgTypeChar(msg.charAt(index))) { + ++index; + } + int32_t length=index-typeIndex; + index=skipWhiteSpace(index); + if(index==msg.length()) { + setParseError(parseError, 0); // Unmatched '{' braces in message. + errorCode=U_UNMATCHED_BRACES; + return 0; + } + if(length==0 || ((c=msg.charAt(index))!=u_comma && c!=u_rightCurlyBrace)) { + setParseError(parseError, nameIndex); // Bad argument syntax. + errorCode=U_PATTERN_SYNTAX_ERROR; + return 0; + } + if(length>Part::MAX_LENGTH) { + setParseError(parseError, nameIndex); // Argument type name too long. + errorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + argType=UMSGPAT_ARG_TYPE_SIMPLE; + if(length==6) { + // case-insensitive comparisons for complex-type names + if(isChoice(typeIndex)) { + argType=UMSGPAT_ARG_TYPE_CHOICE; + } else if(isPlural(typeIndex)) { + argType=UMSGPAT_ARG_TYPE_PLURAL; + } else if(isSelect(typeIndex)) { + argType=UMSGPAT_ARG_TYPE_SELECT; + } + } else if(length==13) { + if(isSelect(typeIndex) && isOrdinal(typeIndex+6)) { + argType=UMSGPAT_ARG_TYPE_SELECTORDINAL; + } + } + // change the ARG_START type from NONE to argType + partsList->a[argStart].value=(int16_t)argType; + if(argType==UMSGPAT_ARG_TYPE_SIMPLE) { + addPart(UMSGPAT_PART_TYPE_ARG_TYPE, typeIndex, length, 0, errorCode); + } + // look for an argument style (pattern) + if(c==u_rightCurlyBrace) { + if(argType!=UMSGPAT_ARG_TYPE_SIMPLE) { + setParseError(parseError, nameIndex); // No style field for complex argument. + errorCode=U_PATTERN_SYNTAX_ERROR; + return 0; + } + } else /* ',' */ { + ++index; + if(argType==UMSGPAT_ARG_TYPE_SIMPLE) { + index=parseSimpleStyle(index, parseError, errorCode); + } else if(argType==UMSGPAT_ARG_TYPE_CHOICE) { + index=parseChoiceStyle(index, nestingLevel, parseError, errorCode); + } else { + index=parsePluralOrSelectStyle(argType, index, nestingLevel, parseError, errorCode); + } + } + } + // Argument parsing stopped on the '}'. + addLimitPart(argStart, UMSGPAT_PART_TYPE_ARG_LIMIT, index, 1, argType, errorCode); + return index+1; +} + +int32_t +MessagePattern::parseSimpleStyle(int32_t index, UParseError *parseError, UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { + return 0; + } + int32_t start=index; + int32_t nestedBraces=0; + while(index<msg.length()) { + UChar c=msg.charAt(index++); + if(c==u_apos) { + // Treat apostrophe as quoting but include it in the style part. + // Find the end of the quoted literal text. + index=msg.indexOf(u_apos, index); + if(index<0) { + // Quoted literal argument style text reaches to the end of the message. + setParseError(parseError, start); + errorCode=U_PATTERN_SYNTAX_ERROR; + return 0; + } + // skip the quote-ending apostrophe + ++index; + } else if(c==u_leftCurlyBrace) { + ++nestedBraces; + } else if(c==u_rightCurlyBrace) { + if(nestedBraces>0) { + --nestedBraces; + } else { + int32_t length=--index-start; + if(length>Part::MAX_LENGTH) { + setParseError(parseError, start); // Argument style text too long. + errorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + addPart(UMSGPAT_PART_TYPE_ARG_STYLE, start, length, 0, errorCode); + return index; + } + } // c is part of literal text + } + setParseError(parseError, 0); // Unmatched '{' braces in message. + errorCode=U_UNMATCHED_BRACES; + return 0; +} + +int32_t +MessagePattern::parseChoiceStyle(int32_t index, int32_t nestingLevel, + UParseError *parseError, UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { + return 0; + } + int32_t start=index; + index=skipWhiteSpace(index); + if(index==msg.length() || msg.charAt(index)==u_rightCurlyBrace) { + setParseError(parseError, 0); // Missing choice argument pattern. + errorCode=U_PATTERN_SYNTAX_ERROR; + return 0; + } + for(;;) { + // The choice argument style contains |-separated (number, separator, message) triples. + // Parse the number. + int32_t numberIndex=index; + index=skipDouble(index); + int32_t length=index-numberIndex; + if(length==0) { + setParseError(parseError, start); // Bad choice pattern syntax. + errorCode=U_PATTERN_SYNTAX_ERROR; + return 0; + } + if(length>Part::MAX_LENGTH) { + setParseError(parseError, numberIndex); // Choice number too long. + errorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + parseDouble(numberIndex, index, TRUE, parseError, errorCode); // adds ARG_INT or ARG_DOUBLE + if(U_FAILURE(errorCode)) { + return 0; + } + // Parse the separator. + index=skipWhiteSpace(index); + if(index==msg.length()) { + setParseError(parseError, start); // Bad choice pattern syntax. + errorCode=U_PATTERN_SYNTAX_ERROR; + return 0; + } + UChar c=msg.charAt(index); + if(!(c==u_pound || c==u_lessThan || c==u_lessOrEqual)) { // U+2264 is <= + setParseError(parseError, start); // Expected choice separator (#<\u2264) instead of c. + errorCode=U_PATTERN_SYNTAX_ERROR; + return 0; + } + addPart(UMSGPAT_PART_TYPE_ARG_SELECTOR, index, 1, 0, errorCode); + // Parse the message fragment. + index=parseMessage(++index, 0, nestingLevel+1, UMSGPAT_ARG_TYPE_CHOICE, parseError, errorCode); + if(U_FAILURE(errorCode)) { + return 0; + } + // parseMessage(..., CHOICE) returns the index of the terminator, or msg.length(). + if(index==msg.length()) { + return index; + } + if(msg.charAt(index)==u_rightCurlyBrace) { + if(!inMessageFormatPattern(nestingLevel)) { + setParseError(parseError, start); // Bad choice pattern syntax. + errorCode=U_PATTERN_SYNTAX_ERROR; + return 0; + } + return index; + } // else the terminator is '|' + index=skipWhiteSpace(index+1); + } +} + +int32_t +MessagePattern::parsePluralOrSelectStyle(UMessagePatternArgType argType, + int32_t index, int32_t nestingLevel, + UParseError *parseError, UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { + return 0; + } + int32_t start=index; + UBool isEmpty=TRUE; + UBool hasOther=FALSE; + for(;;) { + // First, collect the selector looking for a small set of terminators. + // It would be a little faster to consider the syntax of each possible + // token right here, but that makes the code too complicated. + index=skipWhiteSpace(index); + UBool eos=index==msg.length(); + if(eos || msg.charAt(index)==u_rightCurlyBrace) { + if(eos==inMessageFormatPattern(nestingLevel)) { + setParseError(parseError, start); // Bad plural/select pattern syntax. + errorCode=U_PATTERN_SYNTAX_ERROR; + return 0; + } + if(!hasOther) { + setParseError(parseError, 0); // Missing 'other' keyword in plural/select pattern. + errorCode=U_DEFAULT_KEYWORD_MISSING; + return 0; + } + return index; + } + int32_t selectorIndex=index; + if(UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) && msg.charAt(selectorIndex)==u_equal) { + // explicit-value plural selector: =double + index=skipDouble(index+1); + int32_t length=index-selectorIndex; + if(length==1) { + setParseError(parseError, start); // Bad plural/select pattern syntax. + errorCode=U_PATTERN_SYNTAX_ERROR; + return 0; + } + if(length>Part::MAX_LENGTH) { + setParseError(parseError, selectorIndex); // Argument selector too long. + errorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + addPart(UMSGPAT_PART_TYPE_ARG_SELECTOR, selectorIndex, length, 0, errorCode); + parseDouble(selectorIndex+1, index, FALSE, + parseError, errorCode); // adds ARG_INT or ARG_DOUBLE + } else { + index=skipIdentifier(index); + int32_t length=index-selectorIndex; + if(length==0) { + setParseError(parseError, start); // Bad plural/select pattern syntax. + errorCode=U_PATTERN_SYNTAX_ERROR; + return 0; + } + // Note: The ':' in "offset:" is just beyond the skipIdentifier() range. + if( UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) && length==6 && index<msg.length() && + 0==msg.compare(selectorIndex, 7, kOffsetColon, 0, 7) + ) { + // plural offset, not a selector + if(!isEmpty) { + // Plural argument 'offset:' (if present) must precede key-message pairs. + setParseError(parseError, start); + errorCode=U_PATTERN_SYNTAX_ERROR; + return 0; + } + // allow whitespace between offset: and its value + int32_t valueIndex=skipWhiteSpace(index+1); // The ':' is at index. + index=skipDouble(valueIndex); + if(index==valueIndex) { + setParseError(parseError, start); // Missing value for plural 'offset:'. + errorCode=U_PATTERN_SYNTAX_ERROR; + return 0; + } + if((index-valueIndex)>Part::MAX_LENGTH) { + setParseError(parseError, valueIndex); // Plural offset value too long. + errorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + parseDouble(valueIndex, index, FALSE, + parseError, errorCode); // adds ARG_INT or ARG_DOUBLE + if(U_FAILURE(errorCode)) { + return 0; + } + isEmpty=FALSE; + continue; // no message fragment after the offset + } else { + // normal selector word + if(length>Part::MAX_LENGTH) { + setParseError(parseError, selectorIndex); // Argument selector too long. + errorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + addPart(UMSGPAT_PART_TYPE_ARG_SELECTOR, selectorIndex, length, 0, errorCode); + if(0==msg.compare(selectorIndex, length, kOther, 0, 5)) { + hasOther=TRUE; + } + } + } + if(U_FAILURE(errorCode)) { + return 0; + } + + // parse the message fragment following the selector + index=skipWhiteSpace(index); + if(index==msg.length() || msg.charAt(index)!=u_leftCurlyBrace) { + setParseError(parseError, selectorIndex); // No message fragment after plural/select selector. + errorCode=U_PATTERN_SYNTAX_ERROR; + return 0; + } + index=parseMessage(index, 1, nestingLevel+1, argType, parseError, errorCode); + if(U_FAILURE(errorCode)) { + return 0; + } + isEmpty=FALSE; + } +} + +int32_t +MessagePattern::parseArgNumber(const UnicodeString &s, int32_t start, int32_t limit) { + // If the identifier contains only ASCII digits, then it is an argument _number_ + // and must not have leading zeros (except "0" itself). + // Otherwise it is an argument _name_. + if(start>=limit) { + return UMSGPAT_ARG_NAME_NOT_VALID; + } + int32_t number; + // Defer numeric errors until we know there are only digits. + UBool badNumber; + UChar c=s.charAt(start++); + if(c==0x30) { + if(start==limit) { + return 0; + } else { + number=0; + badNumber=TRUE; // leading zero + } + } else if(0x31<=c && c<=0x39) { + number=c-0x30; + badNumber=FALSE; + } else { + return UMSGPAT_ARG_NAME_NOT_NUMBER; + } + while(start<limit) { + c=s.charAt(start++); + if(0x30<=c && c<=0x39) { + if(number>=INT32_MAX/10) { + badNumber=TRUE; // overflow + } + number=number*10+(c-0x30); + } else { + return UMSGPAT_ARG_NAME_NOT_NUMBER; + } + } + // There are only ASCII digits. + if(badNumber) { + return UMSGPAT_ARG_NAME_NOT_VALID; + } else { + return number; + } +} + +void +MessagePattern::parseDouble(int32_t start, int32_t limit, UBool allowInfinity, + UParseError *parseError, UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { + return; + } + U_ASSERT(start<limit); + // fake loop for easy exit and single throw statement + for(;;) { /*loop doesn't iterate*/ + // fast path for small integers and infinity + int32_t value=0; + int32_t isNegative=0; // not boolean so that we can easily add it to value + int32_t index=start; + UChar c=msg.charAt(index++); + if(c==u_minus) { + isNegative=1; + if(index==limit) { + break; // no number + } + c=msg.charAt(index++); + } else if(c==u_plus) { + if(index==limit) { + break; // no number + } + c=msg.charAt(index++); + } + if(c==0x221e) { // infinity + if(allowInfinity && index==limit) { + double infinity=uprv_getInfinity(); + addArgDoublePart( + isNegative!=0 ? -infinity : infinity, + start, limit-start, errorCode); + return; + } else { + break; + } + } + // try to parse the number as a small integer but fall back to a double + while('0'<=c && c<='9') { + value=value*10+(c-'0'); + if(value>(Part::MAX_VALUE+isNegative)) { + break; // not a small-enough integer + } + if(index==limit) { + addPart(UMSGPAT_PART_TYPE_ARG_INT, start, limit-start, + isNegative!=0 ? -value : value, errorCode); + return; + } + c=msg.charAt(index++); + } + // Let Double.parseDouble() throw a NumberFormatException. + char numberChars[128]; + int32_t capacity=(int32_t)sizeof(numberChars); + int32_t length=limit-start; + if(length>=capacity) { + break; // number too long + } + msg.extract(start, length, numberChars, capacity, US_INV); + if((int32_t)uprv_strlen(numberChars)<length) { + break; // contains non-invariant character that was turned into NUL + } + char *end; + double numericValue=uprv_strtod(numberChars, &end); + if(end!=(numberChars+length)) { + break; // parsing error + } + addArgDoublePart(numericValue, start, length, errorCode); + return; + } + setParseError(parseError, start /*, limit*/); // Bad syntax for numeric value. + errorCode=U_PATTERN_SYNTAX_ERROR; + return; +} + +int32_t +MessagePattern::skipWhiteSpace(int32_t index) { + const UChar *s=msg.getBuffer(); + int32_t msgLength=msg.length(); + const UChar *t=PatternProps::skipWhiteSpace(s+index, msgLength-index); + return (int32_t)(t-s); +} + +int32_t +MessagePattern::skipIdentifier(int32_t index) { + const UChar *s=msg.getBuffer(); + int32_t msgLength=msg.length(); + const UChar *t=PatternProps::skipIdentifier(s+index, msgLength-index); + return (int32_t)(t-s); +} + +int32_t +MessagePattern::skipDouble(int32_t index) { + int32_t msgLength=msg.length(); + while(index<msgLength) { + UChar c=msg.charAt(index); + // U+221E: Allow the infinity symbol, for ChoiceFormat patterns. + if((c<0x30 && c!=u_plus && c!=u_minus && c!=u_dot) || (c>0x39 && c!=u_e && c!=u_E && c!=0x221e)) { + break; + } + ++index; + } + return index; +} + +UBool +MessagePattern::isArgTypeChar(UChar32 c) { + return (u_a<=c && c<=u_z) || (u_A<=c && c<=u_Z); +} + +UBool +MessagePattern::isChoice(int32_t index) { + UChar c; + return + ((c=msg.charAt(index++))==u_c || c==u_C) && + ((c=msg.charAt(index++))==u_h || c==u_H) && + ((c=msg.charAt(index++))==u_o || c==u_O) && + ((c=msg.charAt(index++))==u_i || c==u_I) && + ((c=msg.charAt(index++))==u_c || c==u_C) && + ((c=msg.charAt(index))==u_e || c==u_E); +} + +UBool +MessagePattern::isPlural(int32_t index) { + UChar c; + return + ((c=msg.charAt(index++))==u_p || c==u_P) && + ((c=msg.charAt(index++))==u_l || c==u_L) && + ((c=msg.charAt(index++))==u_u || c==u_U) && + ((c=msg.charAt(index++))==u_r || c==u_R) && + ((c=msg.charAt(index++))==u_a || c==u_A) && + ((c=msg.charAt(index))==u_l || c==u_L); +} + +UBool +MessagePattern::isSelect(int32_t index) { + UChar c; + return + ((c=msg.charAt(index++))==u_s || c==u_S) && + ((c=msg.charAt(index++))==u_e || c==u_E) && + ((c=msg.charAt(index++))==u_l || c==u_L) && + ((c=msg.charAt(index++))==u_e || c==u_E) && + ((c=msg.charAt(index++))==u_c || c==u_C) && + ((c=msg.charAt(index))==u_t || c==u_T); +} + +UBool +MessagePattern::isOrdinal(int32_t index) { + UChar c; + return + ((c=msg.charAt(index++))==u_o || c==u_O) && + ((c=msg.charAt(index++))==u_r || c==u_R) && + ((c=msg.charAt(index++))==u_d || c==u_D) && + ((c=msg.charAt(index++))==u_i || c==u_I) && + ((c=msg.charAt(index++))==u_n || c==u_N) && + ((c=msg.charAt(index++))==u_a || c==u_A) && + ((c=msg.charAt(index))==u_l || c==u_L); +} + +UBool +MessagePattern::inMessageFormatPattern(int32_t nestingLevel) { + return nestingLevel>0 || partsList->a[0].type==UMSGPAT_PART_TYPE_MSG_START; +} + +UBool +MessagePattern::inTopLevelChoiceMessage(int32_t nestingLevel, UMessagePatternArgType parentType) { + return + nestingLevel==1 && + parentType==UMSGPAT_ARG_TYPE_CHOICE && + partsList->a[0].type!=UMSGPAT_PART_TYPE_MSG_START; +} + +void +MessagePattern::addPart(UMessagePatternPartType type, int32_t index, int32_t length, + int32_t value, UErrorCode &errorCode) { + if(partsList->ensureCapacityForOneMore(partsLength, errorCode)) { + Part &part=partsList->a[partsLength++]; + part.type=type; + part.index=index; + part.length=(uint16_t)length; + part.value=(int16_t)value; + part.limitPartIndex=0; + } +} + +void +MessagePattern::addLimitPart(int32_t start, + UMessagePatternPartType type, int32_t index, int32_t length, + int32_t value, UErrorCode &errorCode) { + partsList->a[start].limitPartIndex=partsLength; + addPart(type, index, length, value, errorCode); +} + +void +MessagePattern::addArgDoublePart(double numericValue, int32_t start, int32_t length, + UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { + return; + } + int32_t numericIndex=numericValuesLength; + if(numericValuesList==NULL) { + numericValuesList=new MessagePatternDoubleList(); + if(numericValuesList==NULL) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + return; + } + } else if(!numericValuesList->ensureCapacityForOneMore(numericValuesLength, errorCode)) { + return; + } else { + if(numericIndex>Part::MAX_VALUE) { + errorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return; + } + } + numericValuesList->a[numericValuesLength++]=numericValue; + addPart(UMSGPAT_PART_TYPE_ARG_DOUBLE, start, length, numericIndex, errorCode); +} + +void +MessagePattern::setParseError(UParseError *parseError, int32_t index) { + if(parseError==NULL) { + return; + } + parseError->offset=index; + + // Set preContext to some of msg before index. + // Avoid splitting a surrogate pair. + int32_t length=index; + if(length>=U_PARSE_CONTEXT_LEN) { + length=U_PARSE_CONTEXT_LEN-1; + if(length>0 && U16_IS_TRAIL(msg[index-length])) { + --length; + } + } + msg.extract(index-length, length, parseError->preContext); + parseError->preContext[length]=0; + + // Set postContext to some of msg starting at index. + length=msg.length()-index; + if(length>=U_PARSE_CONTEXT_LEN) { + length=U_PARSE_CONTEXT_LEN-1; + if(length>0 && U16_IS_LEAD(msg[index+length-1])) { + --length; + } + } + msg.extract(index, length, parseError->postContext); + parseError->postContext[length]=0; +} + +// MessageImpl ------------------------------------------------------------- *** + +void +MessageImpl::appendReducedApostrophes(const UnicodeString &s, int32_t start, int32_t limit, + UnicodeString &sb) { + int32_t doubleApos=-1; + for(;;) { + int32_t i=s.indexOf(u_apos, start); + if(i<0 || i>=limit) { + sb.append(s, start, limit-start); + break; + } + if(i==doubleApos) { + // Double apostrophe at start-1 and start==i, append one. + sb.append(u_apos); + ++start; + doubleApos=-1; + } else { + // Append text between apostrophes and skip this one. + sb.append(s, start, i-start); + doubleApos=start=i+1; + } + } +} + +// Ported from second half of ICU4J SelectFormat.format(String). +UnicodeString & +MessageImpl::appendSubMessageWithoutSkipSyntax(const MessagePattern &msgPattern, + int32_t msgStart, + UnicodeString &result) { + const UnicodeString &msgString=msgPattern.getPatternString(); + int32_t prevIndex=msgPattern.getPart(msgStart).getLimit(); + for(int32_t i=msgStart;;) { + const MessagePattern::Part &part=msgPattern.getPart(++i); + UMessagePatternPartType type=part.getType(); + int32_t index=part.getIndex(); + if(type==UMSGPAT_PART_TYPE_MSG_LIMIT) { + return result.append(msgString, prevIndex, index-prevIndex); + } else if(type==UMSGPAT_PART_TYPE_SKIP_SYNTAX) { + result.append(msgString, prevIndex, index-prevIndex); + prevIndex=part.getLimit(); + } else if(type==UMSGPAT_PART_TYPE_ARG_START) { + result.append(msgString, prevIndex, index-prevIndex); + prevIndex=index; + i=msgPattern.getLimitPartIndex(i); + index=msgPattern.getPart(i).getLimit(); + appendReducedApostrophes(msgString, prevIndex, index, result); + prevIndex=index; + } + } +} + +U_NAMESPACE_END + +#endif // !UCONFIG_NO_FORMATTING diff --git a/thirdparty/icu4c/common/msvcres.h b/thirdparty/icu4c/common/msvcres.h new file mode 100644 index 0000000000..0cace85e74 --- /dev/null +++ b/thirdparty/icu4c/common/msvcres.h @@ -0,0 +1,25 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +//{{NO_DEPENDENCIES}} +// Copyright (c) 2003-2010 International Business Machines +// Corporation and others. All Rights Reserved. +// +// Used by common.rc and other .rc files. +//Do not edit with Microsoft Developer Studio because it will modify this +//header the wrong way. This is here to prevent Visual Studio .NET from +//unnessarily building the resource files when it's not needed. +// + +/* +These are defined before unicode/uversion.h in order to prevent +STLPort's broken stddef.h from being used when rc.exe parses this file. +*/ +#define _STLP_OUTERMOST_HEADER_ID 0 +#define _STLP_WINCE 1 + +#include "unicode/uversion.h" + +#define ICU_WEBSITE "http://icu-project.org" +#define ICU_COMPANY "The ICU Project" +#define ICU_PRODUCT_PREFIX "ICU" +#define ICU_PRODUCT "International Components for Unicode" diff --git a/thirdparty/icu4c/common/mutex.h b/thirdparty/icu4c/common/mutex.h new file mode 100644 index 0000000000..44b1f90ba0 --- /dev/null +++ b/thirdparty/icu4c/common/mutex.h @@ -0,0 +1,77 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1997-2013, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +*/ +//---------------------------------------------------------------------------- +// File: mutex.h +// +// Lightweight C++ wrapper for umtx_ C mutex functions +// +// Author: Alan Liu 1/31/97 +// History: +// 06/04/97 helena Updated setImplementation as per feedback from 5/21 drop. +// 04/07/1999 srl refocused as a thin wrapper +// +//---------------------------------------------------------------------------- +#ifndef MUTEX_H +#define MUTEX_H + +#include "unicode/utypes.h" +#include "unicode/uobject.h" +#include "umutex.h" + +U_NAMESPACE_BEGIN + +/** + * Mutex is a helper class for convenient locking and unlocking of a UMutex. + * + * Creating a local scope Mutex will lock a UMutex, holding the lock until the Mutex + * goes out of scope. + * + * If no UMutex is specified, the ICU global mutex is implied. + * + * For example: + * + * static UMutex myMutex; + * + * void Function(int arg1, int arg2) + * { + * static Object* foo; // Shared read-write object + * Mutex mutex(&myMutex); // or no args for the global lock + * foo->Method(); + * // When 'mutex' goes out of scope and gets destroyed here, the lock is released + * } + * + * Note: Do NOT use the form 'Mutex mutex();' as that merely forward-declares a function + * returning a Mutex. This is a common mistake which silently slips through the + * compiler!! + */ + +class U_COMMON_API Mutex : public UMemory { +public: + Mutex(UMutex *mutex = nullptr) : fMutex(mutex) { + umtx_lock(fMutex); + } + ~Mutex() { + umtx_unlock(fMutex); + } + + Mutex(const Mutex &other) = delete; // forbid assigning of this class + Mutex &operator=(const Mutex &other) = delete; // forbid copying of this class + void *operator new(size_t s) = delete; // forbid heap allocation. Locals only. + +private: + UMutex *fMutex; +}; + + +U_NAMESPACE_END + +#endif //_MUTEX_ +//eof diff --git a/thirdparty/icu4c/common/norm2_nfc_data.h b/thirdparty/icu4c/common/norm2_nfc_data.h new file mode 100644 index 0000000000..455cc0c428 --- /dev/null +++ b/thirdparty/icu4c/common/norm2_nfc_data.h @@ -0,0 +1,1149 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +// +// Copyright (C) 1999-2016, International Business Machines +// Corporation and others. All Rights Reserved. +// +// file name: norm2_nfc_data.h +// +// machine-generated by: icu/source/tools/gennorm2/n2builder.cpp + + +#ifdef INCLUDED_FROM_NORMALIZER2_CPP + +static const UVersionInfo norm2_nfc_data_formatVersion={4,0,0,0}; +static const UVersionInfo norm2_nfc_data_dataVersion={0xd,0,0,0}; + +static const int32_t norm2_nfc_data_indexes[Normalizer2Impl::IX_COUNT]={ +0x50,0x4bac,0x8814,0x8914,0x8914,0x8914,0x8914,0x8914,0xc0,0x300,0xae2,0x29e0,0x3c66,0xfc00,0x1288,0x3b9c, +0x3c34,0x3c66,0x300,0 +}; + +static const uint16_t norm2_nfc_data_trieIndex[1746]={ +0,0x40,0x7b,0xbb,0xfb,0x13a,0x17a,0x1b2,0x1f2,0x226,0x254,0x226,0x294,0x2d4,0x313,0x353, +0x393,0x3d2,0x40f,0x44e,0x226,0x226,0x488,0x4c8,0x4f8,0x530,0x226,0x570,0x59f,0x5de,0x226,0x5f3, +0x631,0x65f,0x226,0x68c,0x6cc,0x709,0x729,0x768,0x7a7,0x7e4,0x803,0x840,0x729,0x879,0x8a7,0x8e6, +0x226,0x920,0x937,0x977,0x98e,0x9cd,0x226,0xa03,0xa23,0xa5e,0xa6a,0xaa5,0xacd,0xb0a,0xb4a,0xb84, +0xb9f,0x226,0xbda,0x226,0xc1a,0xc39,0xc6f,0xcac,0x226,0x226,0x226,0x226,0x226,0xccf,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0xcfb,0x226,0x226,0xd30, +0x226,0x226,0xd4e,0x226,0xd78,0x226,0x226,0x226,0xdb4,0xdd4,0xe14,0xe53,0xe8e,0xece,0xf02,0xf2e, +0x808,0x226,0x226,0xf62,0x226,0x226,0x226,0xfa2,0xfe2,0x1022,0x1062,0x10a2,0x10e2,0x1122,0x1162,0x11a2, +0x11e2,0x226,0x226,0x1212,0x1243,0x226,0x1273,0x12a6,0x12e3,0x1322,0x1362,0x1398,0x13c6,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x13f1,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0xcbd,0x226,0x140e,0x226,0x144e,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x148e,0x14c8,0x1506,0x1546,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1585,0x15c3,0x15e3,0x226,0x226,0x226,0x226, +0x161d,0x226,0x226,0x1645,0x1677,0x16a5,0x80c,0x16b8,0x226,0x226,0x16c8,0x1708,0x226,0x226,0x226,0x1420, +0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,0x175c,0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,0x175c,0x1748,0x1750, +0x1758,0x1760,0x174c,0x1754,0x175c,0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,0x175c,0x1748,0x1750,0x1758,0x1760, +0x174c,0x1754,0x175c,0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,0x175c,0x1748,0x1750,0x1758,0x1760,0x174c,0x1754, +0x175c,0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,0x175c,0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,0x175c,0x1748, +0x1750,0x1758,0x1760,0x174c,0x1754,0x175c,0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,0x175c,0x1748,0x1750,0x1758, +0x1760,0x174c,0x1754,0x175c,0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,0x175c,0x1748,0x1750,0x1758,0x1760,0x174c, +0x1754,0x175c,0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,0x175c,0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,0x175c, +0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,0x175c,0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,0x175c,0x1748,0x1750, +0x1758,0x1760,0x174c,0x1754,0x175c,0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,0x175c,0x1748,0x1750,0x1758,0x1760, +0x174c,0x1754,0x175c,0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,0x175c,0x1748,0x1750,0x1758,0x1760,0x174c,0x1754, +0x175c,0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,0x175c,0x1748,0x1750,0x1758,0x1760,0x174c,0x1754,0x1794,0x226, +0x17d4,0x180f,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x184f,0x188f,0x18cf,0x190f,0x194f,0x198f,0x19cf,0x1a0f,0x1a32,0x1a72,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1a92,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x655,0x664,0x67c,0x69b,0x6b0,0x6b0,0x6b0,0x6b4,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0xbda,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x54f,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x40c, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1ac5,0x226,0x226,0x1ad5,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0xdc6,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1ae5,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x15d6,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x1aef,0x54f,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x7eb,0x226,0x226, +0x9ba,0x226,0x1aff,0x1b0c,0x1b18,0x226,0x226,0x226,0x226,0x414,0x226,0x1b23,0x1b33,0x226,0x226,0x226, +0x7e0,0x226,0x226,0x226,0x226,0x1b43,0x226,0x226,0x226,0x1b4e,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x1b55,0x226,0x226,0x226,0x226,0x1b60,0x1b6f,0x8f6,0x1b7d,0x412,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x1b8b,0x798,0x226,0x226,0x226,0x226,0x226,0x1b9b,0x1baa,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x8d6,0x1bb2,0x1bc2,0x226, +0x226,0x226,0x9ba,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1bcc,0x226,0x226,0x226,0x226,0x226, +0x226,0x7e6,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1bc9, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1bdc, +0x7e0,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x84d,0x226,0x226,0x226,0x7ed,0x7ea, +0x226,0x226,0x226,0x226,0x7e8,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x9ba,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0xbd4,0x226,0x226,0x226, +0x226,0x7ea,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x1bec,0x226,0x226,0x226,0xefb,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x1bfc,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1bfe, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x1c0d,0x1c1d,0x1c2b,0x1c38,0x226,0x1c44,0x1c52,0x1c62,0x226,0x226, +0x226,0x226,0xcea,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1c72,0x1c7a, +0x1c88,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0xefb,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x4fc,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x1c98,0x226,0x226,0x226,0x226,0x226,0x226,0x1ca4,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x1cb4,0x1cc4,0x1cd4,0x1ce4,0x1cf4,0x1d04,0x1d14,0x1d24,0x1d34,0x1d44,0x1d54, +0x1d64,0x1d74,0x1d84,0x1d94,0x1da4,0x1db4,0x1dc4,0x1dd4,0x1de4,0x1df4,0x1e04,0x1e14,0x1e24,0x1e34,0x1e44,0x1e54, +0x1e64,0x1e74,0x1e84,0x1e94,0x1ea4,0x1eb4,0x1ec4,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x408,0x428,0xc4,0xc4,0xc4,0x448,0x457,0x46d,0x489,0x4a6,0x4c2, +0x4df,0x4fc,0x51b,0x538,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4, +0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0x552,0xc4,0x566,0xc4,0xc4,0xc4,0xc4, +0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4, +0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0x586,0xc4,0xc4,0xc4,0xc4,0xc4, +0xc4,0xc4,0xc4,0x591,0x5ae,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0x5ce,0x5e2,0xc4,0xc4,0x5f5, +0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4, +0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4, +0x615,0x635 +}; + +static const uint16_t norm2_nfc_data_trieData[7892]={ +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,4,8,0xc,1, +1,0x10,0x50,0x5c,0x70,0x88,0xcc,0xd0,0xec,0x108,0x144,0x148,0x15c,0x174,0x180,0x1a4, +0x1e4,1,0x1ec,0x20c,0x228,0x244,0x290,0x298,0x2b0,0x2b8,0x2dc,1,1,1,1,1, +1,0x2f4,0x334,0x340,0x354,0x36c,0x3b0,0x3b4,0x3d0,0x3f0,0x428,0x430,0x444,0x45c,0x468,0x48c, +0x4cc,1,0x4d4,0x4f4,0x510,0x530,0x57c,0x584,0x5a0,0x5a8,0x5d0,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,0x5e8,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,0x128a,0x1290,0xae4,0x1296,0xafa, +0xb04,0x5f4,0xb0e,0x129c,0x12a2,0xb18,0x12a8,0x12ae,0x12b4,0x12ba,0xb2e,1,0x12c0,0x12c6,0x12cc,0xb38, +0xb4e,0xb60,1,0x5fc,0x12d2,0x12d8,0x12de,0xb6a,0x12e4,1,1,0x12ea,0x12f0,0xb80,0x12f6,0xb96, +0xba0,0x600,0xbaa,0x12fc,0x1302,0xbb4,0x1308,0x130e,0x1314,0x131a,0xbca,1,0x1320,0x1326,0x132c,0xbd4, +0xbea,0xbfc,1,0x608,0x1332,0x1338,0x133e,0xc06,0x1344,1,0x134a,0x1350,0x1356,0xc1c,0xc32,0x135d, +0x1363,0x1368,0x136e,0x1374,0x137a,0x1380,0x1386,0x138c,0x1392,0x1398,0x139e,1,1,0xc48,0xc56,0x13a4, +0x13aa,0x13b0,0x13b6,0x13bd,0x13c3,0x13c8,0x13ce,0x13d4,0x13da,0x13e0,0x13e6,0x13ec,0x13f2,0x13f9,0x13ff,0x1404, +0x140a,1,1,0x1410,0x1416,0x141c,0x1422,0x1428,0x142e,0x1435,0x143b,0x1440,1,1,1,0x1447, +0x144d,0x1453,0x1459,1,0x145e,0x1464,0x146b,0x1471,0x1476,0x147c,1,1,1,0x1482,0x1488,0x148f, +0x1495,0x149a,0x14a0,1,1,1,0xc64,0xc72,0x14a6,0x14ac,0x14b2,0x14b8,1,1,0x14be,0x14c4, +0x14cb,0x14d1,0x14d6,0x14dc,0xc80,0xc8a,0x14e2,0x14e8,0x14ef,0x14f5,0xc94,0xc9e,0x14fb,0x1501,0x1506,0x150c, +1,1,0xca8,0xcb2,0xcbc,0xcc6,0x1512,0x1518,0x151e,0x1524,0x152a,0x1530,0x1537,0x153d,0x1542,0x1548, +0x154e,0x1554,0x155a,0x1560,0x1566,0x156c,0x1572,0x1578,0x157e,0x60c,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,0xcd0,0xcea,1,1,1,1, +1,1,1,1,1,1,1,1,1,0xd04,0xd1e,1,1,1,1,1, +1,0x610,1,1,1,1,1,1,1,1,1,1,1,1,1,0x1584, +0x158a,0x1590,0x1596,0x159c,0x15a2,0x15a8,0x15ae,0x15b6,0x15c0,0x15ca,0x15d4,0x15de,0x15e8,0x15f2,0x15fc,1, +0x1606,0x1610,0x161a,0x1624,0x162d,0x1633,1,1,0x1638,0x163e,0x1644,0x164a,0xd38,0xd42,0x1653,0x165d, +0x1665,0x166b,0x1671,1,1,1,0x1676,0x167c,1,1,0x1682,0x1688,0x1690,0x169a,0x16a3,0x16a9, +0x16af,0x16b5,0x16ba,0x16c0,0x16c6,0x16cc,0x16d2,0x16d8,0x16de,0x16e4,0x16ea,0x16f0,0x16f6,0x16fc,0x1702,0x1708, +0x170e,0x1714,0x171a,0x1720,0x1726,0x172c,0x1732,0x1738,0x173e,0x1744,0x174a,0x1750,0x1756,0x175c,1,1, +0x1762,0x1768,1,1,1,1,1,1,0xd4c,0xd56,0xd60,0xd6a,0x1770,0x177a,0x1784,0x178e, +0xd74,0xd7e,0x1798,0x17a2,0x17aa,0x17b0,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,0x614,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,0xfdcc,0xfdcc,0xfdcc,0xfdcc,0xfdcc,0xffcc,0xfdcc,0xfdcc,0xfdcc,0xfdcc,0xfdcc,0xfdcc, +0xfdcc,0xffcc,0xffcc,0xfdcc,0xffcc,0xfdcc,0xffcc,0xfdcc,0xfdcc,0xffd0,0xffb8,0xffb8,0xffb8,0xffb8,0xffd0,0xfdb0, +0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xff94,0xff94,0xfdb8,0xfdb8,0xfdb8,0xfdb8,0xfd94,0xfd94,0xffb8,0xffb8,0xffb8, +0xffb8,0xfdb8,0xfdb8,0xffb8,0xfdb8,0xfdb8,0xffb8,0xffb8,0xfe02,0xfe02,0xfe02,0xfe02,0xfc02,0xffb8,0xffb8,0xffb8, +0xffb8,0xffcc,0xffcc,0xffcc,0x3c36,0x3c3c,0xfdcc,0x3c42,0x3c48,0xfde0,0xffcc,0xffb8,0xffb8,0xffb8,0xffcc,0xffcc, +0xffcc,0xffb8,0xffb8,1,0xffcc,0xffcc,0xffcc,0xffb8,0xffb8,0xffb8,0xffb8,0xffcc,0xffd0,0xffb8,0xffb8,0xffcc, +0xffd2,0xffd4,0xffd4,0xffd2,0xffd4,0xffd4,0xffd2,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc, +0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,1,0x29e1,1,1,1,1,1,1,1, +1,1,0x29e5,1,1,1,1,1,0x17b7,0x17bd,0x29e9,0x17c3,0x17c9,0x17cf,1,0x17d5, +1,0x17db,0x17e1,0x17e9,0x618,1,1,1,0x634,1,0x644,1,0x658,1,1,1, +1,1,0x674,1,0x684,1,1,1,0x688,1,1,1,0x6a0,0x17f1,0x17f7,0xd88, +0x17fd,0xd92,0x1803,0x180b,0x6b4,1,1,1,0x6d4,1,0x6e4,1,0x6fc,1,1,1, +1,1,0x71c,1,0x72c,1,1,1,0x734,1,1,1,0x754,0xd9c,0xdae,0x1813, +0x1819,0xdc0,1,1,1,0x76c,0x181f,0x1825,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,0x182b,0x1831,1,0x1837,1,1,0x774,0x183d,1,1,1,1,0x1843, +0x1849,0x184f,1,0x778,1,1,0x780,1,0x784,0x790,0x798,0x79c,0x1855,0x7ac,1,1, +1,0x7b0,1,1,1,1,0x7b4,1,1,1,0x7c4,1,1,1,0x7c8,1, +0x7cc,1,1,0x7d0,1,1,0x7d8,1,0x7dc,0x7e8,0x7f0,0x7f4,0x185b,0x804,1,1, +1,0x808,1,1,1,0x80c,1,1,1,0x81c,1,1,1,0x820,1,0x824, +1,1,0x1861,0x1867,1,0x186d,1,1,0x828,0x1873,1,1,1,1,0x1879,0x187f, +0x1885,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,0x82c,0x830,0x188b,0x1891,1,1,1,1,1,1, +1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0x1897, +0x189d,1,1,1,1,1,1,1,1,1,1,1,1,1,0x18a3,0x18a9, +0x18af,0x18b5,1,1,0x18bb,0x18c1,0x834,0x838,0x18c7,0x18cd,0x18d3,0x18d9,0x18df,0x18e5,1,1, +0x18eb,0x18f1,0x18f7,0x18fd,0x1903,0x1909,0x83c,0x840,0x190f,0x1915,0x191b,0x1921,0x1927,0x192d,0x1933,0x1939, +0x193f,0x1945,0x194b,0x1951,1,1,0x1957,0x195d,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,0xffb8,0xffcc,0xffcc,0xffcc,0xffcc,0xffb8,0xffcc, +0xffcc,0xffcc,0xffbc,0xffb8,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8, +0xffcc,0xffcc,0xffb8,0xffcc,0xffcc,0xffbc,0xffc8,0xffcc,0xfe14,0xfe16,0xfe18,0xfe1a,0xfe1c,0xfe1e,0xfe20,0xfe22, +0xfe24,0xfe26,0xfe26,0xfe28,0xfe2a,0xfe2c,1,0xfe2e,1,0xfe30,0xfe32,1,0xffcc,0xffb8,1,0xfe24, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc, +0xfe3c,0xfe3e,0xfe40,1,1,1,1,1,1,1,0x1962,0x1968,0x196f,0x1975,0x197b,0x844, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,0x850,1,0x854,0xfe36,0xfe38,0xfe3a,0xfe3c,0xfe3e, +0xfe40,0xfe42,0xfe44,0xfdcc,0xfdcc,0xfdb8,0xffb8,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffb8,0xffcc,0xffcc,0xffb8, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +0xfe46,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +0x1981,0x858,0x1987,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,0x85c,0x198d,1,0x860,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,1,0xffcc, +0xffcc,0xffcc,0xffcc,0xffb8,0xffcc,1,1,0xffcc,0xffcc,1,0xffb8,0xffcc,0xffcc,0xffb8,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +0xfe48,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xffcc, +0xffb8,0xffcc,0xffcc,0xffb8,0xffcc,0xffcc,0xffb8,0xffb8,0xffb8,0xffcc,0xffb8,0xffb8,0xffcc,0xffb8,0xffcc,0xffcc, +0xffb8,0xffcc,0xffb8,0xffcc,0xffb8,0xffcc,0xffb8,0xffcc,0xffcc,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xffcc,0xffcc, +0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffb8,0xffcc,1,1,1,1,1,1,1,1,1, +0xffb8,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,1,0xffcc,0xffcc,0xffcc,0xffcc, +0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,0xffcc,0xffcc,0xffcc,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,0xffb8,0xffb8,0xffb8,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xffb8, +0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,0xffb8, +0xffcc,0xffcc,0xffb8,0xffcc,0xffcc,0xffb8,0xffcc,0xffcc,0xffcc,0xffb8,0xffb8,0xffb8,0xfe36,0xfe38,0xfe3a,0xffcc, +0xffcc,0xffcc,0xffb8,0xffcc,0xffcc,0xffb8,0xffb8,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,0x864,0x1993,1,1,1,1,1,1,0x868,0x1999,1,0x86c, +0x199f,1,1,1,1,1,1,1,0xfc0e,1,1,1,1,1,1,1, +1,1,1,1,1,1,0xfe12,1,1,1,0xffcc,0xffb8,0xffcc,0xffcc,1,1, +1,0x29ec,0x29f2,0x29f8,0x29fe,0x2a04,0x2a0a,0x2a10,0x2a16,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,0xfe0e,1,0xfc00,1,1,1,1,1,1,1,0x870, +1,1,1,0x19a5,0x19ab,0xfe12,1,1,1,1,1,1,1,1,1,0xfc00, +1,1,1,1,0x2a1c,0x2a22,1,0x2a28,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,0xffcc,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,0x2a2e,1,1,0x2a34,1,1, +1,1,1,0xfe0e,1,1,1,1,1,1,1,1,1,1,1,1, +1,0xfe12,1,1,1,1,1,1,1,1,1,1,1,0x2a3a,0x2a40,0x2a46, +1,1,0x2a4c,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xfe0e, +1,1,1,1,1,1,1,1,1,1,1,1,1,0xfe12,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +0x878,0x19b1,1,1,0x19b7,0x19bd,0xfe12,1,1,1,1,1,1,1,1,0xfc00, +0xfc00,1,1,1,1,0x2a52,0x2a58,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,0x884,1,0x19c3,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,0xfc00,1,1,1,1,1,1,0x888,0x890,1,1, +0x19c9,0x19cf,0x19d5,0xfe12,1,1,1,1,1,1,1,1,1,0xfc00,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,0x894,1,0x19db,1,1,1,1,0xfe12,1,1, +1,1,1,1,1,0xfea8,0xfcb6,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,0xfe0e,1,1,0x898,0x19e1,1,0xfc00,1,1,1,0x89c,0x19e7,0x19ed, +1,0xdca,0x19f5,1,0xfe12,1,1,1,1,1,1,1,0xfc00,0xfc00,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,0xfe12,0xfe12,1,0xfc00,1,1,1, +1,1,1,0x8a8,0x8b0,1,1,0x19fd,0x1a03,0x1a09,0xfe12,1,1,1,1,1, +1,1,1,1,0xfc00,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,0xfc12,1,1, +1,1,0xfc00,1,1,1,1,1,1,1,1,1,0x8b4,0x1a0f,1,0xdd4, +0x1a17,0x1a1f,0xfc00,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,0xfece,0xfece,0xfe12,1,1, +1,1,1,1,1,1,0xfed6,0xfed6,0xfed6,0xfed6,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,0xfeec,0xfeec,0xfe12,1,1,1,1,1,1,1,1,0xfef4,0xfef4,0xfef4, +0xfef4,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,0xffb8,0xffb8,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,0xffb8,1,0xffb8,1,0xffb0,1,1,1,1,1,1,0x2a5f,1,1, +1,1,1,1,1,1,1,0x2a65,1,1,1,1,0x2a6b,1,1,1, +1,0x2a71,1,1,1,1,0x2a77,1,1,1,1,1,1,1,1,1, +1,1,1,0x2a7d,1,1,1,1,1,1,1,0xff02,0xff04,0x3c50,0xff08,0x3c58, +0x2a82,1,0x2a88,1,0xff04,0xff04,0xff04,0xff04,1,1,0xff04,0x3c60,0xffcc,0xffcc,0xfe12,1, +0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,1,1,0x2a8f,1,1, +1,1,1,1,1,1,1,0x2a95,1,1,1,1,0x2a9b,1,1,1, +1,0x2aa1,1,1,1,1,0x2aa7,1,1,1,1,1,1,1,1,1, +1,1,1,0x2aad,1,1,1,1,1,1,0xffb8,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,0x8c0,0x1a25,1,1,1,1,1,1,1,0xfc00,1,1, +1,1,1,1,1,1,0xfe0e,1,0xfe12,0xfe12,1,1,1,1,1,1, +1,1,1,1,1,1,1,0xffb8,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00, +0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00, +0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xfe12, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xfe12, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,0xfe12,1,1,1,1,1,1,1,1,1,1,0xffcc,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,0xffc8,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,0xffbc,0xffcc,0xffb8,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,0xffcc,0xffb8,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,0xfe12,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc, +0xffcc,1,1,0xffb8,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffcc, +0xffcc,0xffb8,1,0xffb8,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,0x8c4,0x1a2b,0x8c8,0x1a31,0x8cc,0x1a37,0x8d0,0x1a3d,0x8d4,0x1a43,1,1,0x8d8, +0x1a49,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,0xfe0e,0xfc00,1,1,1,1,0x8dc,0x1a4f,0x8e0,0x1a55,0x8e4,0x8e8,0x1a5b,0x1a61, +0x8ec,0x1a67,0xfe12,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,0xffcc,0xffb8,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc, +0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,0xfe12,0xfe12,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,0xfe0e,1,1,1,1,1,1,1,1,1,1,1, +0xfe12,0xfe12,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,0xffcc,0xffcc,0xffcc,1,0xfe02,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffcc,0xffcc,0xffb8,0xffb8, +0xffb8,0xffb8,0xffcc,1,0xfe02,0xfe02,0xfe02,0xfe02,0xfe02,0xfe02,0xfe02,1,1,1,1,0xffb8, +1,1,1,1,1,1,0xffcc,1,1,1,0xffcc,0xffcc,1,1,1,1, +1,1,0xffcc,0xffcc,0xffb8,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffb8,0xffcc,0xffcc,0xffd4, +0xffac,0xffb8,0xff94,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc, +0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc, +0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffd0,0xffc8,0xffc8,0xffb8,1,0xffcc,0xffd2,0xffb8, +0xffcc,0xffb8,0x1a6c,0x1a72,0x1a78,0x1a7e,0x1a85,0x1a8b,0x1a91,0x1a97,0x1a9f,0x1aa9,0x1ab0,0x1ab6,0x1abc,0x1ac2, +0x1ac8,0x1ace,0x1ad5,0x1adb,0x1ae0,0x1ae6,0x1aee,0x1af8,0x1b02,0x1b0c,0x1b14,0x1b1a,0x1b20,0x1b26,0x1b2f,0x1b39, +0x1b41,0x1b47,0x1b4c,0x1b52,0x1b58,0x1b5e,0x1b64,0x1b6a,0x1b70,0x1b76,0x1b7d,0x1b83,0x1b88,0x1b8e,0x1b94,0x1b9a, +0x1ba2,0x1bac,0x1bb4,0x1bba,0x1bc0,0x1bc6,0x1bcc,0x1bd2,0xdde,0xde8,0x1bda,0x1be4,0x1bec,0x1bf2,0x1bf8,0x1bfe, +0x1c04,0x1c0a,0x1c10,0x1c16,0x1c1d,0x1c23,0x1c28,0x1c2e,0x1c34,0x1c3a,0x1c40,0x1c46,0x1c4c,0x1c52,0x1c5a,0x1c64, +0x1c6e,0x1c78,0x1c82,0x1c8c,0x1c96,0x1ca0,0x1ca9,0x1caf,0x1cb5,0x1cbb,0x1cc0,0x1cc6,0xdf2,0xdfc,0x1cce,0x1cd8, +0x1ce0,0x1ce6,0x1cec,0x1cf2,0xe06,0xe10,0x1cfa,0x1d04,0x1d0e,0x1d18,0x1d22,0x1d2c,0x1d34,0x1d3a,0x1d40,0x1d46, +0x1d4c,0x1d52,0x1d58,0x1d5e,0x1d64,0x1d6a,0x1d70,0x1d76,0x1d7c,0x1d82,0x1d8a,0x1d94,0x1d9e,0x1da8,0x1db0,0x1db6, +0x1dbd,0x1dc3,0x1dc8,0x1dce,0x1dd4,0x1dda,0x1de0,0x1de6,0x1dec,0x1df2,0x1df9,0x1dff,0x1e05,0x1e0b,0x1e11,0x1e17, +0x1e1c,0x1e22,0x1e28,0x1e2e,0x1e35,0x1e3b,0x1e41,0x1e47,0x1e4c,0x1e52,0x1e58,0x1e5e,1,0x1e65,1,1, +1,1,0xe1a,0xe28,0x1e6a,0x1e70,0x1e78,0x1e82,0x1e8c,0x1e96,0x1ea0,0x1eaa,0x1eb4,0x1ebe,0x1ec8,0x1ed2, +0x1edc,0x1ee6,0x1ef0,0x1efa,0x1f04,0x1f0e,0x1f18,0x1f22,0x1f2c,0x1f36,0xe36,0xe40,0x1f3e,0x1f44,0x1f4a,0x1f50, +0x1f58,0x1f62,0x1f6c,0x1f76,0x1f80,0x1f8a,0x1f94,0x1f9e,0x1fa8,0x1fb2,0x1fba,0x1fc0,0x1fc6,0x1fcc,0xe4a,0xe54, +0x1fd2,0x1fd8,0x1fe0,0x1fea,0x1ff4,0x1ffe,0x2008,0x2012,0x201c,0x2026,0x2030,0x203a,0x2044,0x204e,0x2058,0x2062, +0x206c,0x2076,0x2080,0x208a,0x2094,0x209e,0x20a6,0x20ac,0x20b2,0x20b8,0x20c0,0x20ca,0x20d4,0x20de,0x20e8,0x20f2, +0x20fc,0x2106,0x2110,0x211a,0x2122,0x2128,0x212f,0x2135,0x213a,0x2140,0x2146,0x214c,1,1,1,1, +1,1,0xe5e,0xe74,0xe8c,0xe9a,0xea8,0xeb6,0xec4,0xed2,0xede,0xef4,0xf0c,0xf1a,0xf28,0xf36, +0xf44,0xf52,0xf5e,0xf6c,0x2155,0x215f,0x2169,0x2173,1,1,0xf7a,0xf88,0x217d,0x2187,0x2191,0x219b, +1,1,0xf96,0xfac,0xfc4,0xfd2,0xfe0,0xfee,0xffc,0x100a,0x1016,0x102c,0x1044,0x1052,0x1060,0x106e, +0x107c,0x108a,0x1096,0x10a8,0x21a5,0x21af,0x21b9,0x21c3,0x21cd,0x21d7,0x10ba,0x10cc,0x21e1,0x21eb,0x21f5,0x21ff, +0x2209,0x2213,0x10de,0x10ec,0x221d,0x2227,0x2231,0x223b,1,1,0x10fa,0x1108,0x2245,0x224f,0x2259,0x2263, +1,1,0x1116,0x1128,0x226d,0x2277,0x2281,0x228b,0x2295,0x229f,1,0x113a,1,0x22a9,1,0x22b3, +1,0x22bd,0x114c,0x1162,0x117a,0x1188,0x1196,0x11a4,0x11b2,0x11c0,0x11cc,0x11e2,0x11fa,0x1208,0x1216,0x1224, +0x1232,0x1240,0x124c,0x3b9e,0x22c5,0x3ba6,0x1256,0x3bae,0x22cb,0x3bb6,0x22d1,0x3bbe,0x22d7,0x3bc6,0x1260,0x3bce, +1,1,0x22de,0x22e8,0x22f7,0x2307,0x2317,0x2327,0x2337,0x2347,0x2352,0x235c,0x236b,0x237b,0x238b,0x239b, +0x23ab,0x23bb,0x23c6,0x23d0,0x23df,0x23ef,0x23ff,0x240f,0x241f,0x242f,0x243a,0x2444,0x2453,0x2463,0x2473,0x2483, +0x2493,0x24a3,0x24ae,0x24b8,0x24c7,0x24d7,0x24e7,0x24f7,0x2507,0x2517,0x2522,0x252c,0x253b,0x254b,0x255b,0x256b, +0x257b,0x258b,0x2595,0x259b,0x25a3,0x25aa,0x25b3,1,0x126a,0x25bd,0x25c5,0x25cb,0x25d1,0x3bd6,0x25d6,1, +0x2ab2,0x8f0,1,0x25dd,0x25e5,0x25ec,0x25f5,1,0x1274,0x25ff,0x2607,0x3bde,0x260d,0x3be6,0x2612,0x2619, +0x261f,0x2625,0x262b,0x2631,0x2639,0x3bf0,1,1,0x2641,0x2649,0x2651,0x2657,0x265d,0x3bfa,1,0x2663, +0x2669,0x266f,0x2675,0x267b,0x2683,0x3c04,0x268b,0x2691,0x2697,0x269f,0x26a7,0x26ad,0x26b3,0x3c0e,0x26b9,0x26bf, +0x3c16,0x2ab7,1,1,0x26c7,0x26ce,0x26d7,1,0x127e,0x26e1,0x26e9,0x3c1e,0x26ef,0x3c26,0x26f4,0x2abb, +0x8fc,1,0xfa09,0xfa09,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,0xffcc,0xffcc,0xfe02,0xfe02,0xffcc,0xffcc,0xffcc,0xffcc,0xfe02,0xfe02,0xfe02,0xffcc,0xffcc,1, +1,1,1,0xffcc,1,1,1,0xfe02,0xfe02,0xffcc,0xffb8,0xffcc,0xfe02,0xfe02,0xffb8,0xffb8, +0xffb8,0xffb8,0xffcc,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,0x2abe,1,1,1,0x2ac2,0x3c2e,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,0x908,1,0x90c,1,0x910,1,1,1,1,1,0x26fb,0x2701,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,0x2707,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,0x270d,0x2713,0x2719,0x914,1,0x918,1,0x91c,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,0x920,0x271f,1,1,1,0x924,0x2725,1,0x928,0x272b, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,0x92c,0x2731,0x930,0x2737,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0x934, +1,1,1,0x273d,1,0x938,0x2743,0x93c,1,0x2749,0x940,0x274f,1,1,1,0x944, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,0x2755,0x948,0x275b,1,0x94c,0x950,1,1,1,1,1,1,1,0x2761, +0x2767,0x276d,0x2773,0x2779,0x954,0x958,0x277f,0x2785,0x95c,0x960,0x278b,0x2791,0x964,0x968,0x96c,0x970, +1,1,0x2797,0x279d,0x974,0x978,0x27a3,0x27a9,0x97c,0x980,0x27af,0x27b5,1,1,1,1, +1,1,1,0x984,0x988,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,0x98c,1,1,1,1,1,0x990,0x994,1,0x998,0x27bb,0x27c1, +0x27c7,0x27cd,1,1,0x99c,0x9a0,0x9a4,0x9a8,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,0x27d3,0x27d9,0x27df,0x27e5,1,1,1,1, +1,1,0x27eb,0x27f1,0x27f7,0x27fd,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0x2ac7, +0x2acb,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,0x2acf,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,0xfe12,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xffcc,0xffcc, +0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc, +0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,0xffb4,0xffc8,0xffd0,0xffbc,0xffc0,0xffc0,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,0x9ac,1, +1,1,1,0x9b0,0x2803,0x9b4,0x2809,0x9b8,0x280f,0x9bc,0x2815,0x9c0,0x281b,0x9c4,0x2821,0x9c8, +0x2827,0x9cc,0x282d,0x9d0,0x2833,0x9d4,0x2839,0x9d8,0x283f,0x9dc,0x2845,1,0x9e0,0x284b,0x9e4,0x2851, +0x9e8,0x2857,1,1,1,1,1,0x9ec,0x285d,0x2863,0x9f4,0x2869,0x286f,0x9fc,0x2875,0x287b, +0xa04,0x2881,0x2887,0xa0c,0x288d,0x2893,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,0x2899,1,1,1,1,0xfc10, +0xfc10,1,1,0xa14,0x289f,1,1,1,1,1,1,1,0xa18,1,1,1, +1,0xa1c,0x28a5,0xa20,0x28ab,0xa24,0x28b1,0xa28,0x28b7,0xa2c,0x28bd,0xa30,0x28c3,0xa34,0x28c9,0xa38, +0x28cf,0xa3c,0x28d5,0xa40,0x28db,0xa44,0x28e1,0xa48,0x28e7,1,0xa4c,0x28ed,0xa50,0x28f3,0xa54,0x28f9, +1,1,1,1,1,0xa58,0x28ff,0x2905,0xa60,0x290b,0x2911,0xa68,0x2917,0x291d,0xa70,0x2923, +0x2929,0xa78,0x292f,0x2935,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,0xa80,0xa84,0xa88,0xa8c,1,0x293b,1,1,0x2941,0x2947,0x294d, +0x2953,1,1,0xa90,0x2959,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,0xffcc,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc, +0xffcc,0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,0xfe12,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,0xfe12,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc, +0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,0xffb8,0xffb8,0xffb8,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,0xfe12,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,0xffcc,1,0xffcc,0xffcc,0xffb8,1,1,0xffcc, +0xffcc,1,1,1,1,1,0xffcc,0xffcc,1,0xffcc,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xfe12,1, +1,1,1,1,1,1,1,1,0xae2,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289, +0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289, +0x1289,0x1289,0x1289,0x1289,0xae2,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289, +0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289, +0xae2,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289, +0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0xae2,0x1289,0x1289,0x1289, +0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289, +0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,0x1289,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,0x3c66,1,0x3c66,0x3c66,0x3c66,0x3c66,0x3c66,0x3c66,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,0x3c66,0x3c66, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,0x3c66,1,1,1,1,0x3c66,1,1,1,0x3c66,1,0x3c66,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,0x3b97,1,0x2ad5, +0x2ad9,0x2add,0x2ae1,0x2ae5,0x2ae9,0x2aed,0x2af1,0x2af1,0x2af5,0x2af9,0x2afd,0x2b01,0x2b05,0x2b09,0x2b0d,0x2b11, +0x2b15,0x2b19,0x2b1d,0x2b21,0x2b25,0x2b29,0x2b2d,0x2b31,0x2b35,0x2b39,0x2b3d,0x2b41,0x2b45,0x2b49,0x2b4d,0x2b51, +0x2b55,0x2b59,0x2b5d,0x2b61,0x2b65,0x2b69,0x2b6d,0x2b71,0x2b75,0x2b79,0x2b7d,0x2b81,0x2b85,0x2b89,0x2b8d,0x2b91, +0x2b95,0x2b99,0x2b9d,0x2ba1,0x2ba5,0x2ba9,0x2bad,0x2bb1,0x2bb5,0x2bb9,0x2bbd,0x2bc1,0x2bc5,0x2bc9,0x2bcd,0x2bd1, +0x2bd5,0x2bd9,0x2bdd,0x2be1,0x2be5,0x2be9,0x2bed,0x2bf1,0x2bf5,0x2bf9,0x2bfd,0x2c01,0x2c05,0x2c09,0x2c0d,0x2c11, +0x2c15,0x2c19,0x2c1d,0x2c21,0x2c25,0x2c29,0x2c2d,0x2c31,0x2c35,0x2c39,0x2c3d,0x2b21,0x2c41,0x2c45,0x2c49,0x2c4d, +0x2c51,0x2c55,0x2c59,0x2c5d,0x2c61,0x2c65,0x2c69,0x2c6d,0x2c71,0x2c75,0x2c79,0x2c7d,0x2c81,0x2c85,0x2c89,0x2c8d, +0x2c91,0x2c95,0x2c99,0x2c9d,0x2ca1,0x2ca5,0x2ca9,0x2cad,0x2cb1,0x2cb5,0x2cb9,0x2cbd,0x2cc1,0x2cc5,0x2cc9,0x2ccd, +0x2cd1,0x2cd5,0x2cd9,0x2cdd,0x2ce1,0x2ce5,0x2ce9,0x2ced,0x2cf1,0x2cf5,0x2cf9,0x2cfd,0x2d01,0x2d05,0x2d09,0x2d0d, +0x2d11,0x2d15,0x2d19,0x2d1d,0x2d21,0x2d25,0x2d29,0x2d2d,0x2d31,0x2d35,0x2d39,0x2d3d,0x2d41,0x2d45,0x2d49,0x2d4d, +0x2c89,0x2d51,0x2d55,0x2d59,0x2d5d,0x2d61,0x2d65,0x2d69,0x2d6d,0x2c49,0x2d71,0x2d75,0x2d79,0x2d7d,0x2d81,0x2d85, +0x2d89,0x2d8d,0x2d91,0x2d95,0x2d99,0x2d9d,0x2da1,0x2da5,0x2da9,0x2dad,0x2db1,0x2db5,0x2db9,0x2dbd,0x2b21,0x2dc1, +0x2dc5,0x2dc9,0x2dcd,0x2dd1,0x2dd5,0x2dd9,0x2ddd,0x2de1,0x2de5,0x2de9,0x2ded,0x2df1,0x2df5,0x2df9,0x2dfd,0x2e01, +0x2e05,0x2e09,0x2e0d,0x2e11,0x2e15,0x2e19,0x2e1d,0x2e21,0x2e25,0x2e29,0x2c51,0x2e2d,0x2e31,0x2e35,0x2e39,0x2e3d, +0x2e41,0x2e45,0x2e49,0x2e4d,0x2e51,0x2e55,0x2e59,0x2e5d,0x2e61,0x2e65,0x2e69,0x2e6d,0x2e71,0x2e75,0x2e79,0x2e7d, +0x2e81,0x2e85,0x2e89,0x2e8d,0x2e91,0x2e95,0x2e99,0x2e9d,0x2ea1,0x2ea5,0x2ea9,0x2ead,0x2eb1,0x2eb5,0x2eb9,0x2ebd, +0x2ec1,0x2ec5,0x2ec9,0x2ecd,0x2ed1,0x2ed5,0x2ed9,0x2edd,0x2ee1,0x2ee5,0x2ee9,0x2eed,0x2ef1,1,1,0x2ef5, +1,0x2ef9,1,1,0x2efd,0x2f01,0x2f05,0x2f09,0x2f0d,0x2f11,0x2f15,0x2f19,0x2f1d,0x2f21,1,0x2f25, +1,0x2f29,1,1,0x2f2d,0x2f31,1,1,1,0x2f35,0x2f39,0x2f3d,0x2f41,0x2f45,0x2f49,0x2f4d, +0x2f51,0x2f55,0x2f59,0x2f5d,0x2f61,0x2f65,0x2f69,0x2f6d,0x2f71,0x2f75,0x2f79,0x2f7d,0x2f81,0x2f85,0x2f89,0x2f8d, +0x2f91,0x2f95,0x2f99,0x2f9d,0x2fa1,0x2fa5,0x2fa9,0x2fad,0x2fb1,0x2fb5,0x2fb9,0x2fbd,0x2fc1,0x2fc5,0x2fc9,0x2fcd, +0x2fd1,0x2fd5,0x2fd9,0x2fdd,0x2fe1,0x2fe5,0x2d25,0x2fe9,0x2fed,0x2ff1,0x2ff5,0x2ff9,0x2ffd,0x2ffd,0x3001,0x3005, +0x3009,0x300d,0x3011,0x3015,0x3019,0x301d,0x2f2d,0x3021,0x3025,0x3029,0x302d,0x3031,0x3037,1,1,0x303b, +0x303f,0x3043,0x3047,0x304b,0x304f,0x3053,0x3057,0x2f65,0x305b,0x305f,0x3063,0x2ef5,0x3067,0x306b,0x306f,0x3073, +0x3077,0x307b,0x307f,0x3083,0x3087,0x308b,0x308f,0x3093,0x2f89,0x3097,0x2f8d,0x309b,0x309f,0x30a3,0x30a7,0x30ab, +0x2ef9,0x2b75,0x30af,0x30b3,0x30b7,0x2c8d,0x2de9,0x30bb,0x30bf,0x2fa9,0x30c3,0x2fad,0x30c7,0x30cb,0x30cf,0x2f01, +0x30d3,0x30d7,0x30db,0x30df,0x30e3,0x2f05,0x30e7,0x30eb,0x30ef,0x30f3,0x30f7,0x30fb,0x2fe5,0x30ff,0x3103,0x2d25, +0x3107,0x2ff5,0x310b,0x310f,0x3113,0x3117,0x311b,0x3009,0x311f,0x2f29,0x3123,0x300d,0x2c41,0x3127,0x3011,0x312b, +0x3019,0x312f,0x3133,0x3137,0x313b,0x313f,0x3021,0x2f19,0x3143,0x3025,0x3147,0x3029,0x314b,0x2af1,0x314f,0x3155, +0x315b,0x3161,0x3165,0x3169,0x316d,0x3173,0x3179,0x317f,0x3183,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0x3186, +0xfe34,0x318c,1,1,1,1,1,1,1,1,1,1,0x3192,0x3198,0x31a0,0x31aa, +0x31b2,0x31b8,0x31be,0x31c4,0x31ca,0x31d0,0x31d6,0x31dc,0x31e2,1,0x31e8,0x31ee,0x31f4,0x31fa,0x3200,1, +0x3206,1,0x320c,0x3212,1,0x3218,0x321e,1,0x3224,0x322a,0x3230,0x3236,0x323c,0x3242,0x3248,0x324e, +0x3254,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8, +0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,0xffb8,1,0xffcc,1,1,1,1,1,1,1,1,0xffcc,0xfe02,0xffb8, +1,1,1,1,0xfe12,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,1,1,1, +1,1,1,1,1,0xffb8,0xffb8,0xffcc,0xffcc,0xffcc,0xffb8,0xffcc,0xffb8,0xffb8,0xffb8,1, +1,1,1,1,1,1,1,1,0xa94,0x295f,0xa9a,0x2969,1,1,1,1, +1,0xaa0,1,1,1,1,1,0x2973,1,1,1,1,1,1,1,1, +1,0xfe12,0xfc0e,1,1,1,1,1,1,1,0xfc00,1,1,1,1,1, +1,0x297d,0x2987,1,0xaa6,0xaac,0xfe12,0xfe12,1,1,1,1,1,1,1,1, +1,1,1,0xfe12,1,1,1,1,1,1,1,1,1,0xfe0e,1,1, +1,1,1,0xfe12,0xfe0e,1,1,1,1,1,1,1,1,1,0xfe0e,0xfe12, +1,1,1,1,1,1,1,1,1,1,1,0xfe0e,0xfe0e,1,0xfc00,1, +1,1,1,1,1,1,0xab2,1,1,1,0x2991,0x299b,0xfe12,1,1,1, +1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,0xfe12,1,1, +1,0xfe0e,1,1,1,1,1,1,1,1,1,0xfc00,1,1,1,1, +1,1,1,1,0xabe,0xfc00,0x29a5,0x29af,0xfc00,0x29b9,1,1,0xfe12,0xfe0e,1,1, +1,1,1,1,1,1,1,1,1,1,0xad0,0xad6,0x29c3,0x29cd,1,1, +1,0xfe12,0xfe0e,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,0xfe12,0xfe0e,1,1,1,1,1,1,1,1,0xfc00,1,1,1, +1,0xadc,1,1,0x29d7,1,1,1,1,0xfe12,0xfe12,1,0xfe02,0xfe02,0xfe02,0xfe02, +0xfe02,1,1,1,1,1,1,1,1,1,1,1,0xfe0c,0xfe0c,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,0xfe02,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,0x325a,0x3264,0x3278,0x3290,0x32a8, +0x32c0,0x32d8,0xffb0,0xffb0,0xfe02,0xfe02,0xfe02,1,1,1,0xffc4,0xffb0,0xffb0,0xffb0,1,1, +1,1,1,1,1,1,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,1,1,0xffcc,0xffcc,0xffcc, +0xffcc,0xffcc,0xffb8,0xffb8,1,1,1,1,1,1,1,1,1,1,0xffcc,0xffcc, +0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,1,1,0x32e6,0x32f0,0x3304, +0x331c,0x3334,0x334c,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc, +0xffcc,0xffcc,0xffcc,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,0xffcc,0xffcc,1,0xffcc,0xffcc, +0xffcc,0xffcc,0xffcc,1,1,1,1,1,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,1, +1,1,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xfe0e,1, +1,1,1,1,0x335b,0x335f,0x3363,0x3367,0x336d,0x2f4d,0x3371,0x3375,0x3379,0x337d,0x2f51,0x3381, +0x3385,0x3389,0x2f55,0x338f,0x3393,0x3397,0x339b,0x33a1,0x33a5,0x33a9,0x33ad,0x33b3,0x33b7,0x33bb,0x33bf,0x303f, +0x33c3,0x33c9,0x33cd,0x33d1,0x33d5,0x33d9,0x33dd,0x33e1,0x33e5,0x3053,0x2f59,0x2f5d,0x3057,0x33e9,0x33ed,0x2c59, +0x33f1,0x2f61,0x33f5,0x33f9,0x33fd,0x3401,0x3401,0x3401,0x3405,0x340b,0x340f,0x3413,0x3417,0x341d,0x3421,0x3425, +0x3429,0x342d,0x3431,0x3435,0x3439,0x343d,0x3441,0x3445,0x3449,0x344d,0x344d,0x305f,0x3451,0x3455,0x3459,0x345d, +0x2f69,0x3461,0x3465,0x3469,0x2ebd,0x346d,0x3471,0x3475,0x3479,0x347d,0x3481,0x3485,0x3489,0x348d,0x3493,0x3497, +0x349b,0x349f,0x34a3,0x34a7,0x34ab,0x34b1,0x34b7,0x34bb,0x34bf,0x34c3,0x34c7,0x34cb,0x34cf,0x34d3,0x34d7,0x34d7, +0x34db,0x34e1,0x34e5,0x2c49,0x34e9,0x34ed,0x34f3,0x34f7,0x34fb,0x34ff,0x3503,0x3507,0x2f7d,0x350b,0x350f,0x3513, +0x3519,0x351d,0x3523,0x3527,0x352b,0x352f,0x3533,0x3537,0x353b,0x353f,0x3543,0x3547,0x354b,0x354f,0x3555,0x3559, +0x355d,0x3561,0x2b71,0x3565,0x356b,0x356f,0x356f,0x3575,0x3579,0x3579,0x357d,0x3581,0x3587,0x358d,0x3591,0x3595, +0x3599,0x359d,0x35a1,0x35a5,0x35a9,0x35ad,0x35b1,0x2f81,0x35b5,0x35bb,0x35bf,0x35c3,0x308f,0x35c3,0x35c7,0x2f89, +0x35cb,0x35cf,0x35d3,0x35d7,0x2f8d,0x2b05,0x35db,0x35df,0x35e3,0x35e7,0x35eb,0x35ef,0x35f3,0x35f9,0x35fd,0x3601, +0x3605,0x3609,0x360d,0x3613,0x3617,0x361b,0x361f,0x3623,0x3627,0x362b,0x362f,0x3633,0x2f91,0x3637,0x363b,0x3641, +0x3645,0x3649,0x364d,0x2f99,0x3651,0x3655,0x3659,0x365d,0x3661,0x3665,0x3669,0x366d,0x2b75,0x30af,0x3671,0x3675, +0x3679,0x367d,0x3683,0x3687,0x368b,0x368f,0x2f9d,0x3693,0x3699,0x369d,0x36a1,0x3161,0x36a5,0x36a9,0x36ad,0x36b1, +0x36b5,0x36bb,0x36bf,0x36c3,0x36c7,0x36cd,0x36d1,0x36d5,0x36d9,0x2c8d,0x36dd,0x36e1,0x36e7,0x36ed,0x36f3,0x36f7, +0x36fd,0x3701,0x3705,0x3709,0x370d,0x2fa1,0x2de9,0x3711,0x3715,0x3719,0x371d,0x3723,0x3727,0x372b,0x372f,0x30bf, +0x3733,0x3737,0x373d,0x3741,0x3745,0x374b,0x3751,0x3755,0x30c3,0x3759,0x375d,0x3761,0x3765,0x3769,0x376d,0x3771, +0x3777,0x377b,0x3781,0x3785,0x378b,0x30cb,0x378f,0x3793,0x3799,0x379d,0x37a1,0x37a7,0x37ad,0x37b1,0x37b5,0x37b9, +0x37bd,0x37bd,0x37c1,0x37c5,0x30d3,0x37c9,0x37cd,0x37d1,0x37d5,0x37d9,0x37df,0x37e3,0x2c55,0x37e9,0x37ef,0x37f3, +0x37f9,0x37ff,0x3805,0x3809,0x30eb,0x380d,0x3813,0x3819,0x381f,0x3825,0x3829,0x3829,0x30ef,0x3169,0x382d,0x3831, +0x3835,0x3839,0x383f,0x2bbd,0x30f7,0x3843,0x3847,0x2fcd,0x384d,0x3853,0x2f15,0x3859,0x385d,0x2fdd,0x3861,0x3865, +0x3869,0x386f,0x386f,0x3875,0x3879,0x387d,0x3883,0x3887,0x388b,0x388f,0x3895,0x3899,0x389d,0x38a1,0x38a5,0x38a9, +0x38af,0x38b3,0x38b7,0x38bb,0x38bf,0x38c3,0x38c7,0x38cd,0x38d3,0x38d7,0x38dd,0x38e1,0x38e7,0x38eb,0x2ff5,0x38ef, +0x38f5,0x38fb,0x38ff,0x3905,0x3909,0x390f,0x3913,0x3917,0x391b,0x391f,0x3923,0x3927,0x392d,0x3933,0x3939,0x3575, +0x393f,0x3943,0x3947,0x394b,0x394f,0x3953,0x3957,0x395b,0x395f,0x3963,0x3967,0x396b,0x2c9d,0x3971,0x3975,0x3979, +0x397d,0x3981,0x3985,0x3001,0x3989,0x398d,0x3991,0x3995,0x3999,0x399f,0x39a5,0x39ab,0x39af,0x39b3,0x39b7,0x39bb, +0x39c1,0x39c5,0x39cb,0x39cf,0x39d3,0x39d9,0x39df,0x39e3,0x2ba9,0x39e7,0x39eb,0x39ef,0x39f3,0x39f7,0x39fb,0x3113, +0x39ff,0x3a03,0x3a07,0x3a0b,0x3a0f,0x3a13,0x3a17,0x3a1b,0x3a1f,0x3a23,0x3a29,0x3a2d,0x3a31,0x3a35,0x3a39,0x3a3d, +0x3a43,0x3a49,0x3a4d,0x3a51,0x3127,0x312b,0x3a55,0x3a59,0x3a5f,0x3a63,0x3a67,0x3a6b,0x3a6f,0x3a75,0x3a7b,0x3a7f, +0x3a83,0x3a87,0x3a8d,0x312f,0x3a91,0x3a97,0x3a9d,0x3aa1,0x3aa5,0x3aa9,0x3aaf,0x3ab3,0x3ab7,0x3abb,0x3abf,0x3ac3, +0x3ac7,0x3acb,0x3ad1,0x3ad5,0x3ad9,0x3add,0x3ae3,0x3ae7,0x3aeb,0x3aef,0x3af3,0x3af9,0x3aff,0x3b03,0x3b07,0x3b0b, +0x3b11,0x3b15,0x3147,0x3147,0x3b1b,0x3b1f,0x3b25,0x3b29,0x3b2d,0x3b31,0x3b35,0x3b39,0x3b3d,0x3b41,0x314b,0x3b47, +0x3b4b,0x3b4f,0x3b53,0x3b57,0x3b5b,0x3b61,0x3b65,0x3b6b,0x3b71,0x3b77,0x3b7b,0x3b7f,0x3b83,0x3b87,0x3b8b,0x3b8f, +0x3b93,0x3b97,1,1 +}; + +static const UCPTrie norm2_nfc_data_trie={ + norm2_nfc_data_trieIndex, + { norm2_nfc_data_trieData }, + 1746, 7892, + 0x2fc00, 0x30, + 0, 0, + 0, 0, + 0xc4, 0x226, + 0x1, +}; + +static const uint16_t norm2_nfc_data_extraData[7732]={ +0xffff,0xffff,0x8670,0x44dc,0x8670,0x44c0,0x8670,0x44de,0x600,0x180,0x602,0x182,0x604,0x185,0x606,0x186, +0x608,0x200,0x60c,0x205,0x60e,0x44d,0x610,0x189,0x612,0x3d44,0x614,0x18b,0x618,0x39a,0x61e,0x400, +0x622,0x404,0x646,0x3d41,0x64a,0x3c00,0x8650,0x208,0x60e,0x3c04,0x646,0x3c08,0x8662,0x3c0c,0x602,0x20c, +0x604,0x210,0x60e,0x214,0x618,0x218,0x864e,0x18f,0x60e,0x3c14,0x618,0x21c,0x646,0x3c18,0x64e,0x3c20, +0x65a,0x3c24,0x8662,0x3c1c,0x600,0x190,0x602,0x192,0x604,0x195,0x606,0x3d78,0x608,0x225,0x60c,0x228, +0x60e,0x22c,0x610,0x196,0x612,0x3d74,0x618,0x234,0x61e,0x408,0x622,0x40c,0x646,0x3d71,0x64e,0x451, +0x650,0x230,0x65a,0x3c30,0x8660,0x3c34,0x860e,0x3c3c,0x602,0x3e8,0x604,0x238,0x608,0x3c40,0x60c,0x23c, +0x60e,0x240,0x618,0x3cc,0x864e,0x244,0x604,0x248,0x60e,0x3c44,0x610,0x3c4c,0x618,0x43c,0x646,0x3c48, +0x64e,0x3c50,0x865c,0x3c54,0x600,0x198,0x602,0x19a,0x604,0x19c,0x606,0x250,0x608,0x254,0x60c,0x258, +0x60e,0x260,0x610,0x19f,0x612,0x3d90,0x618,0x39e,0x61e,0x410,0x622,0x414,0x646,0x3d94,0x650,0x25c, +0x8660,0x3c58,0x8604,0x268,0x602,0x3c60,0x618,0x3d0,0x646,0x3c64,0x64e,0x26c,0x8662,0x3c68,0x602,0x272, +0x618,0x27a,0x646,0x3c6d,0x64e,0x276,0x65a,0x3c78,0x8662,0x3c74,0x602,0x3c7c,0x60e,0x3c80,0x8646,0x3c84, +0x600,0x3f0,0x602,0x286,0x606,0x1a2,0x60e,0x3c88,0x618,0x28e,0x646,0x3c8c,0x64e,0x28a,0x65a,0x3c94, +0x8662,0x3c90,0x600,0x1a4,0x602,0x1a6,0x604,0x1a9,0x606,0x1ab,0x608,0x299,0x60c,0x29c,0x60e,0x45d, +0x610,0x1ad,0x612,0x3d9c,0x616,0x2a0,0x618,0x3a2,0x61e,0x418,0x622,0x41c,0x636,0x341,0x646,0x3d99, +0x8650,0x3d5,0x602,0x3ca8,0x860e,0x3cac,0x602,0x2a8,0x60e,0x3cb0,0x618,0x2b0,0x61e,0x420,0x622,0x424, +0x646,0x3cb5,0x64e,0x2ac,0x8662,0x3cbc,0x602,0x2b5,0x604,0x2b8,0x60e,0x3cc0,0x618,0x2c1,0x646,0x3cc5, +0x64c,0x430,0x864e,0x2bc,0x60e,0x3cd4,0x618,0x2c8,0x646,0x3cd8,0x64c,0x434,0x64e,0x2c4,0x65a,0x3ce0, +0x8662,0x3cdc,0x600,0x1b2,0x602,0x1b4,0x604,0x1b6,0x606,0x2d1,0x608,0x2d5,0x60c,0x2d8,0x610,0x1b9, +0x612,0x3dcc,0x614,0x2dc,0x616,0x2e0,0x618,0x3a6,0x61e,0x428,0x622,0x42c,0x636,0x35f,0x646,0x3dc8, +0x648,0x3ce4,0x650,0x2e4,0x65a,0x3cec,0x8660,0x3ce8,0x606,0x3cf8,0x8646,0x3cfc,0x600,0x3d00,0x602,0x3d04, +0x604,0x2e8,0x60e,0x3d0c,0x610,0x3d08,0x8646,0x3d10,0x60e,0x3d14,0x8610,0x3d18,0x600,0x3de4,0x602,0x1ba, +0x604,0x2ec,0x606,0x3df0,0x608,0x464,0x60e,0x3d1c,0x610,0x2f0,0x612,0x3dec,0x8646,0x3de8,0x602,0x2f2, +0x604,0x3d20,0x60e,0x2f6,0x618,0x2fa,0x646,0x3d24,0x8662,0x3d28,0x600,0x1c0,0x602,0x1c2,0x604,0x1c5, +0x606,0x1c6,0x608,0x202,0x60c,0x207,0x60e,0x44f,0x610,0x1c9,0x612,0x3d46,0x614,0x1cb,0x618,0x39c, +0x61e,0x402,0x622,0x406,0x646,0x3d43,0x64a,0x3c02,0x8650,0x20a,0x60e,0x3c06,0x646,0x3c0a,0x8662,0x3c0e, +0x602,0x20e,0x604,0x212,0x60e,0x216,0x618,0x21a,0x864e,0x1cf,0x60e,0x3c16,0x618,0x21e,0x646,0x3c1a, +0x64e,0x3c22,0x65a,0x3c26,0x8662,0x3c1e,0x600,0x1d0,0x602,0x1d2,0x604,0x1d5,0x606,0x3d7a,0x608,0x227, +0x60c,0x22a,0x60e,0x22e,0x610,0x1d6,0x612,0x3d76,0x618,0x236,0x61e,0x40a,0x622,0x40e,0x646,0x3d73, +0x64e,0x453,0x650,0x232,0x65a,0x3c32,0x8660,0x3c36,0x860e,0x3c3e,0x602,0x3ea,0x604,0x23a,0x608,0x3c42, +0x60c,0x23e,0x60e,0x242,0x618,0x3ce,0x864e,0x246,0x604,0x24a,0x60e,0x3c46,0x610,0x3c4e,0x618,0x43e, +0x646,0x3c4a,0x64e,0x3c52,0x65c,0x3c56,0x8662,0x3d2c,0x600,0x1d8,0x602,0x1da,0x604,0x1dc,0x606,0x252, +0x608,0x256,0x60c,0x25a,0x610,0x1df,0x612,0x3d92,0x618,0x3a0,0x61e,0x412,0x622,0x416,0x646,0x3d96, +0x650,0x25e,0x8660,0x3c5a,0x604,0x26a,0x8618,0x3e0,0x602,0x3c62,0x618,0x3d2,0x646,0x3c66,0x64e,0x26e, +0x8662,0x3c6a,0x602,0x274,0x618,0x27c,0x646,0x3c6f,0x64e,0x278,0x65a,0x3c7a,0x8662,0x3c76,0x602,0x3c7e, +0x60e,0x3c82,0x8646,0x3c86,0x600,0x3f2,0x602,0x288,0x606,0x1e2,0x60e,0x3c8a,0x618,0x290,0x646,0x3c8e, +0x64e,0x28c,0x65a,0x3c96,0x8662,0x3c92,0x600,0x1e4,0x602,0x1e6,0x604,0x1e9,0x606,0x1eb,0x608,0x29b, +0x60c,0x29e,0x60e,0x45f,0x610,0x1ed,0x612,0x3d9e,0x616,0x2a2,0x618,0x3a4,0x61e,0x41a,0x622,0x41e, +0x636,0x343,0x646,0x3d9b,0x8650,0x3d7,0x602,0x3caa,0x860e,0x3cae,0x602,0x2aa,0x60e,0x3cb2,0x618,0x2b2, +0x61e,0x422,0x622,0x426,0x646,0x3cb7,0x64e,0x2ae,0x8662,0x3cbe,0x602,0x2b7,0x604,0x2ba,0x60e,0x3cc2, +0x618,0x2c3,0x646,0x3cc7,0x64c,0x432,0x864e,0x2be,0x60e,0x3cd6,0x610,0x3d2e,0x618,0x2ca,0x646,0x3cda, +0x64c,0x436,0x64e,0x2c6,0x65a,0x3ce2,0x8662,0x3cde,0x600,0x1f2,0x602,0x1f4,0x604,0x1f6,0x606,0x2d3, +0x608,0x2d7,0x60c,0x2da,0x610,0x1f9,0x612,0x3dce,0x614,0x2de,0x616,0x2e2,0x618,0x3a8,0x61e,0x42a, +0x622,0x42e,0x636,0x361,0x646,0x3dca,0x648,0x3ce6,0x650,0x2e6,0x65a,0x3cee,0x8660,0x3cea,0x606,0x3cfa, +0x8646,0x3cfe,0x600,0x3d02,0x602,0x3d06,0x604,0x2ea,0x60e,0x3d0e,0x610,0x3d0a,0x614,0x3d30,0x8646,0x3d12, +0x60e,0x3d16,0x8610,0x3d1a,0x600,0x3de6,0x602,0x1fa,0x604,0x2ee,0x606,0x3df2,0x608,0x466,0x60e,0x3d1e, +0x610,0x1fe,0x612,0x3dee,0x614,0x3d32,0x8646,0x3dea,0x602,0x2f4,0x604,0x3d22,0x60e,0x2f8,0x618,0x2fc, +0x646,0x3d26,0x8662,0x3d2a,0x600,0x3fda,0x602,0x70a,0x8684,0x3f82,0x602,0x3f8,0x8608,0x3c4,0x8602,0x3fc, +0x602,0x3fa,0x8608,0x3c6,0x8602,0x3fe,0x860e,0x3d36,0x8618,0x3dc,0x8618,0x3de,0x600,0x3f74,0x602,0x70c, +0x608,0x3f72,0x60c,0x3f70,0x626,0x3e11,0x628,0x3e13,0x868a,0x3f78,0x600,0x3f90,0x602,0x710,0x626,0x3e31, +0x8628,0x3e33,0x600,0x3f94,0x602,0x712,0x626,0x3e51,0x628,0x3e53,0x868a,0x3f98,0x600,0x3fb4,0x602,0x714, +0x608,0x3fb2,0x60c,0x3fb0,0x610,0x754,0x626,0x3e71,0x8628,0x3e73,0x600,0x3ff0,0x602,0x718,0x626,0x3e91, +0x8628,0x3e93,0x8628,0x3fd8,0x600,0x3fd4,0x602,0x71c,0x608,0x3fd2,0x60c,0x3fd0,0x610,0x756,0x8628,0x3eb3, +0x600,0x3ff4,0x602,0x71e,0x626,0x3ed1,0x628,0x3ed3,0x868a,0x3ff8,0x600,0x3ee1,0x602,0x759,0x608,0x3f62, +0x60c,0x3f60,0x626,0x3e01,0x628,0x3e03,0x684,0x3f6d,0x868a,0x3f66,0x600,0x3ee4,0x602,0x75a,0x626,0x3e21, +0x8628,0x3e23,0x600,0x3ee9,0x602,0x75d,0x626,0x3e41,0x628,0x3e43,0x684,0x3f8d,0x868a,0x3f86,0x600,0x3eec, +0x602,0x75e,0x608,0x3fa2,0x60c,0x3fa0,0x610,0x795,0x626,0x3e61,0x628,0x3e63,0x8684,0x3fac,0x600,0x3ef0, +0x602,0x798,0x626,0x3e81,0x8628,0x3e83,0x626,0x3fc8,0x8628,0x3fca,0x600,0x3ef4,0x602,0x79a,0x608,0x3fc2, +0x60c,0x3fc0,0x610,0x797,0x626,0x3ea1,0x628,0x3ea3,0x8684,0x3fcc,0x600,0x3ef9,0x602,0x79d,0x626,0x3ec1, +0x628,0x3ec3,0x684,0x3fed,0x868a,0x3fe6,0x602,0x7a6,0x8610,0x7a8,0x8610,0x80e,0x60c,0x9a0,0x8610,0x9a4, +0x8602,0x806,0x600,0x800,0x60c,0x9ac,0x8610,0x802,0x60c,0x982,0x8610,0x9b8,0x8610,0x9bc,0x600,0x81a, +0x608,0x9c4,0x60c,0x832,0x8610,0x9c8,0x8602,0x818,0x8610,0x9cc,0x608,0x9dc,0x60c,0x81c,0x610,0x9e0, +0x8616,0x9e4,0x8610,0x9e8,0x8610,0x9f0,0x8610,0x9d8,0x60c,0x9a2,0x8610,0x9a6,0x8602,0x8a6,0x600,0x8a0, +0x60c,0x9ae,0x8610,0x8a2,0x60c,0x984,0x8610,0x9ba,0x8610,0x9be,0x600,0x8ba,0x608,0x9c6,0x60c,0x872, +0x8610,0x9ca,0x8602,0x8b8,0x8610,0x9ce,0x608,0x9de,0x60c,0x8bc,0x610,0x9e2,0x8616,0x9e6,0x8610,0x9ea, +0x8610,0x9f2,0x8610,0x9da,0x8610,0x8ae,0x861e,0x8ec,0x861e,0x8ee,0x8610,0x9b4,0x8610,0x9b6,0x8610,0x9d4, +0x8610,0x9d6,0xca6,0xc44,0xca8,0xc46,0x8caa,0xc4a,0x8ca8,0xc48,0x8ca8,0xc4c,0x8ca8,0xd84,0x8ca8,0xda6, +0x8ca8,0xd80,0x9278,0x1252,0x9278,0x1262,0x9278,0x1268,0x137c,0x1396,0x93ae,0x1398,0x167c,0x1696,0x16ac,0x1690, +0x96ae,0x1698,0x97ae,0x1728,0x177c,0x1794,0x97ae,0x1798,0x977c,0x1796,0x98ac,0x1890,0x99aa,0x1980,0x1984,0x1995, +0x19aa,0x198e,0x99ac,0x1990,0x1a7c,0x1a94,0x9aae,0x1a98,0x9a7c,0x1a96,0x1b94,0x1bb4,0x1b9e,0x1bb9,0x9bbe,0x1bbc, +0xa05c,0x204c,0xb66a,0x360c,0xb66a,0x3610,0xb66a,0x3614,0xb66a,0x3618,0xb66a,0x361c,0xb66a,0x3624,0xb66a,0x3676, +0xb66a,0x367a,0xb66a,0x3680,0xb66a,0x3682,0xb66a,0x3686,0x600,0x3f9a,0x602,0x3f9c,0x8684,0x3f9e,0x600,0x3fba, +0x602,0x3fbc,0x8684,0x3fbe,0x8670,0x4334,0x8670,0x4336,0x8670,0x435c,0x8670,0x439a,0x8670,0x439e,0x8670,0x439c, +0x8670,0x4408,0x8670,0x4412,0x8670,0x4418,0x8670,0x4448,0x8670,0x444c,0x8670,0x4482,0x8670,0x4488,0x8670,0x448e, +0x8670,0x4492,0x8670,0x44da,0x8670,0x44c4,0x8670,0x44e0,0x8670,0x44e2,0x8670,0x44e8,0x8670,0x44ea,0x8670,0x44f0, +0x8670,0x44f2,0x8670,0x4500,0x8670,0x4502,0x8670,0x45c0,0x8670,0x45c2,0x8670,0x4508,0x8670,0x450a,0x8670,0x4510, +0x8670,0x4512,0x8670,0x45c4,0x8670,0x45c6,0x8670,0x4558,0x8670,0x455a,0x8670,0x455c,0x8670,0x455e,0x8670,0x45d4, +0x8670,0x45d6,0x8670,0x45d8,0x8670,0x45da,0xe132,0x6128,0xe132,0x6098,0xe132,0x609c,0xe132,0x60a0,0xe132,0x60a4, +0xe132,0x60a8,0xe132,0x60ac,0xe132,0x60b0,0xe132,0x60b4,0xe132,0x60b8,0xe132,0x60bc,0xe132,0x60c0,0xe132,0x60c4, +0xe132,0x60ca,0xe132,0x60ce,0xe132,0x60d2,0x6132,0x60e0,0xe134,0x60e2,0x6132,0x60e6,0xe134,0x60e8,0x6132,0x60ec, +0xe134,0x60ee,0x6132,0x60f2,0xe134,0x60f4,0x6132,0x60f8,0xe134,0x60fa,0xe132,0x613c,0xe132,0x61e8,0xe132,0x6158, +0xe132,0x615c,0xe132,0x6160,0xe132,0x6164,0xe132,0x6168,0xe132,0x616c,0xe132,0x6170,0xe132,0x6174,0xe132,0x6178, +0xe132,0x617c,0xe132,0x6180,0xe132,0x6184,0xe132,0x618a,0xe132,0x618e,0xe132,0x6192,0x6132,0x61a0,0xe134,0x61a2, +0x6132,0x61a6,0xe134,0x61a8,0x6132,0x61ac,0xe134,0x61ae,0x6132,0x61b2,0xe134,0x61b4,0x6132,0x61b8,0xe134,0x61ba, +0xe132,0x61ee,0xe132,0x61f0,0xe132,0x61f2,0xe132,0x61f4,0xe132,0x61fc,0xb489,0x2e82,0x2134,0xb489,0x2e82,0x2138, +0xb489,0x2e82,0x2156,0xb489,0x49c2,0x225c,0xb489,0x49c2,0x225e,0x3489,0xcf82,0x2696,0xb489,0xd5c2,0x2698,0x348b, +0x2c02,0x2978,0x348b,0x2e82,0x2976,0xb48b,0x2f42,0x297c,0xb48b,0x6bc2,0x2b74,0xb48b,0x6bc2,0x2b76,0xb48d,0x4c02, +0x3270,2,0xe602,0x41,0x302,0x600,0x3d4c,0x602,0x3d48,0x606,0x3d54,0x8612,0x3d50,0xe602,0x41,0x308, +0x8608,0x3bc,0xe602,0x41,0x30a,0x8602,0x3f4,0xca02,0x43,0x327,0x8602,0x3c10,0xe602,0x45,0x302,0x600, +0x3d80,0x602,0x3d7c,0x606,0x3d88,0x8612,0x3d84,0xe602,0x49,0x308,0x8602,0x3c5c,0xe602,0x4f,0x302,0x600, +0x3da4,0x602,0x3da0,0x606,0x3dac,0x8612,0x3da8,0xe602,0x4f,0x303,0x602,0x3c98,0x608,0x458,0x8610,0x3c9c, +0xe602,0x4f,0x308,0x8608,0x454,0xe602,0x55,0x308,0x600,0x3b6,0x602,0x3ae,0x608,0x3aa,0x8618,0x3b2, +0xe602,0x61,0x302,0x600,0x3d4e,0x602,0x3d4a,0x606,0x3d56,0x8612,0x3d52,0xe602,0x61,0x308,0x8608,0x3be, +0xe602,0x61,0x30a,0x8602,0x3f6,0xca02,0x63,0x327,0x8602,0x3c12,0xe602,0x65,0x302,0x600,0x3d82,0x602, +0x3d7e,0x606,0x3d8a,0x8612,0x3d86,0xe602,0x69,0x308,0x8602,0x3c5e,0xe602,0x6f,0x302,0x600,0x3da6,0x602, +0x3da2,0x606,0x3dae,0x8612,0x3daa,0xe602,0x6f,0x303,0x602,0x3c9a,0x608,0x45a,0x8610,0x3c9e,0xe602,0x6f, +0x308,0x8608,0x456,0xe602,0x75,0x308,0x600,0x3b8,0x602,0x3b0,0x608,0x3ac,0x8618,0x3b4,0xe602,0x41, +0x306,0x600,0x3d60,0x602,0x3d5c,0x606,0x3d68,0x8612,0x3d64,0xe602,0x61,0x306,0x600,0x3d62,0x602,0x3d5e, +0x606,0x3d6a,0x8612,0x3d66,0xe602,0x45,0x304,0x600,0x3c28,0x8602,0x3c2c,0xe602,0x65,0x304,0x600,0x3c2a, +0x8602,0x3c2e,0xe602,0x4f,0x304,0x600,0x3ca0,0x8602,0x3ca4,0xe602,0x6f,0x304,0x600,0x3ca2,0x8602,0x3ca6, +0xe602,0x53,0x301,0x860e,0x3cc8,0xe602,0x73,0x301,0x860e,0x3cca,0xe602,0x53,0x30c,0x860e,0x3ccc,0xe602, +0x73,0x30c,0x860e,0x3cce,0xe602,0x55,0x303,0x8602,0x3cf0,0xe602,0x75,0x303,0x8602,0x3cf2,0xe602,0x55, +0x304,0x8610,0x3cf4,0xe602,0x75,0x304,0x8610,0x3cf6,0xd802,0x4f,0x31b,0x600,0x3db8,0x602,0x3db4,0x606, +0x3dc0,0x612,0x3dbc,0x8646,0x3dc4,0xd802,0x6f,0x31b,0x600,0x3dba,0x602,0x3db6,0x606,0x3dc2,0x612,0x3dbe, +0x8646,0x3dc6,0xd802,0x55,0x31b,0x600,0x3dd4,0x602,0x3dd0,0x606,0x3ddc,0x612,0x3dd8,0x8646,0x3de0,0xd802, +0x75,0x31b,0x600,0x3dd6,0x602,0x3dd2,0x606,0x3dde,0x612,0x3dda,0x8646,0x3de2,0xca02,0x4f,0x328,0x8608, +0x3d8,0xca02,0x6f,0x328,0x8608,0x3da,0xe602,0x41,0x307,0x8608,0x3c0,0xe602,0x61,0x307,0x8608,0x3c2, +0xca02,0x45,0x327,0x860c,0x3c38,0xca02,0x65,0x327,0x860c,0x3c3a,0xe602,0x4f,0x307,0x8608,0x460,0xe602, +0x6f,0x307,0x8608,0x462,0xe602,0x3b1,0x301,0x868a,0x3f68,0xe602,0x3b7,0x301,0x868a,0x3f88,0xe602,0x3b9, +0x308,0x600,0x3fa4,0x602,0x720,0x8684,0x3fae,0xe602,0x3c5,0x308,0x600,0x3fc4,0x602,0x760,0x8684,0x3fce, +0xe602,0x3c9,0x301,0x868a,0x3fe8,2,0xcc6,0xcc2,0x99aa,0x1996,2,0xdd9,0xdcf,0x9b94,0x1bba,0xdc02, +0x4c,0x323,0x8608,0x3c70,0xdc02,0x6c,0x323,0x8608,0x3c72,0xdc02,0x52,0x323,0x8608,0x3cb8,0xdc02,0x72, +0x323,0x8608,0x3cba,0xdc02,0x53,0x323,0x860e,0x3cd0,0xdc02,0x73,0x323,0x860e,0x3cd2,0xdc02,0x41,0x323, +0x604,0x3d58,0x860c,0x3d6c,0xdc02,0x61,0x323,0x604,0x3d5a,0x860c,0x3d6e,0xdc02,0x45,0x323,0x8604,0x3d8c, +0xdc02,0x65,0x323,0x8604,0x3d8e,0xdc02,0x4f,0x323,0x8604,0x3db0,0xdc02,0x6f,0x323,0x8604,0x3db2,0xe602, +0x3b1,0x313,0x600,0x3e05,0x602,0x3e09,0x684,0x3e0d,0x868a,0x3f00,0xe602,0x3b1,0x314,0x600,0x3e07,0x602, +0x3e0b,0x684,0x3e0f,0x868a,0x3f02,0x1f00,0xe643,0x3b1,0x313,0x300,0x868a,0x3f04,0x1f01,0xe643,0x3b1,0x314, +0x300,0x868a,0x3f06,0x1f00,0xe643,0x3b1,0x313,0x301,0x868a,0x3f08,0x1f01,0xe643,0x3b1,0x314,0x301,0x868a, +0x3f0a,0x1f00,0xe643,0x3b1,0x313,0x342,0x868a,0x3f0c,0x1f01,0xe643,0x3b1,0x314,0x342,0x868a,0x3f0e,0xe602, +0x391,0x313,0x600,0x3e15,0x602,0x3e19,0x684,0x3e1d,0x868a,0x3f10,0xe602,0x391,0x314,0x600,0x3e17,0x602, +0x3e1b,0x684,0x3e1f,0x868a,0x3f12,0x1f08,0xe643,0x391,0x313,0x300,0x868a,0x3f14,0x1f09,0xe643,0x391,0x314, +0x300,0x868a,0x3f16,0x1f08,0xe643,0x391,0x313,0x301,0x868a,0x3f18,0x1f09,0xe643,0x391,0x314,0x301,0x868a, +0x3f1a,0x1f08,0xe643,0x391,0x313,0x342,0x868a,0x3f1c,0x1f09,0xe643,0x391,0x314,0x342,0x868a,0x3f1e,0xe602, +0x3b5,0x313,0x600,0x3e24,0x8602,0x3e28,0xe602,0x3b5,0x314,0x600,0x3e26,0x8602,0x3e2a,0xe602,0x395,0x313, +0x600,0x3e34,0x8602,0x3e38,0xe602,0x395,0x314,0x600,0x3e36,0x8602,0x3e3a,0xe602,0x3b7,0x313,0x600,0x3e45, +0x602,0x3e49,0x684,0x3e4d,0x868a,0x3f20,0xe602,0x3b7,0x314,0x600,0x3e47,0x602,0x3e4b,0x684,0x3e4f,0x868a, +0x3f22,0x1f20,0xe643,0x3b7,0x313,0x300,0x868a,0x3f24,0x1f21,0xe643,0x3b7,0x314,0x300,0x868a,0x3f26,0x1f20, +0xe643,0x3b7,0x313,0x301,0x868a,0x3f28,0x1f21,0xe643,0x3b7,0x314,0x301,0x868a,0x3f2a,0x1f20,0xe643,0x3b7, +0x313,0x342,0x868a,0x3f2c,0x1f21,0xe643,0x3b7,0x314,0x342,0x868a,0x3f2e,0xe602,0x397,0x313,0x600,0x3e55, +0x602,0x3e59,0x684,0x3e5d,0x868a,0x3f30,0xe602,0x397,0x314,0x600,0x3e57,0x602,0x3e5b,0x684,0x3e5f,0x868a, +0x3f32,0x1f28,0xe643,0x397,0x313,0x300,0x868a,0x3f34,0x1f29,0xe643,0x397,0x314,0x300,0x868a,0x3f36,0x1f28, +0xe643,0x397,0x313,0x301,0x868a,0x3f38,0x1f29,0xe643,0x397,0x314,0x301,0x868a,0x3f3a,0x1f28,0xe643,0x397, +0x313,0x342,0x868a,0x3f3c,0x1f29,0xe643,0x397,0x314,0x342,0x868a,0x3f3e,0xe602,0x3b9,0x313,0x600,0x3e64, +0x602,0x3e68,0x8684,0x3e6c,0xe602,0x3b9,0x314,0x600,0x3e66,0x602,0x3e6a,0x8684,0x3e6e,0xe602,0x399,0x313, +0x600,0x3e74,0x602,0x3e78,0x8684,0x3e7c,0xe602,0x399,0x314,0x600,0x3e76,0x602,0x3e7a,0x8684,0x3e7e,0xe602, +0x3bf,0x313,0x600,0x3e84,0x8602,0x3e88,0xe602,0x3bf,0x314,0x600,0x3e86,0x8602,0x3e8a,0xe602,0x39f,0x313, +0x600,0x3e94,0x8602,0x3e98,0xe602,0x39f,0x314,0x600,0x3e96,0x8602,0x3e9a,0xe602,0x3c5,0x313,0x600,0x3ea4, +0x602,0x3ea8,0x8684,0x3eac,0xe602,0x3c5,0x314,0x600,0x3ea6,0x602,0x3eaa,0x8684,0x3eae,0xe602,0x3a5,0x314, +0x600,0x3eb6,0x602,0x3eba,0x8684,0x3ebe,0xe602,0x3c9,0x313,0x600,0x3ec5,0x602,0x3ec9,0x684,0x3ecd,0x868a, +0x3f40,0xe602,0x3c9,0x314,0x600,0x3ec7,0x602,0x3ecb,0x684,0x3ecf,0x868a,0x3f42,0x1f60,0xe643,0x3c9,0x313, +0x300,0x868a,0x3f44,0x1f61,0xe643,0x3c9,0x314,0x300,0x868a,0x3f46,0x1f60,0xe643,0x3c9,0x313,0x301,0x868a, +0x3f48,0x1f61,0xe643,0x3c9,0x314,0x301,0x868a,0x3f4a,0x1f60,0xe643,0x3c9,0x313,0x342,0x868a,0x3f4c,0x1f61, +0xe643,0x3c9,0x314,0x342,0x868a,0x3f4e,0xe602,0x3a9,0x313,0x600,0x3ed5,0x602,0x3ed9,0x684,0x3edd,0x868a, +0x3f50,0xe602,0x3a9,0x314,0x600,0x3ed7,0x602,0x3edb,0x684,0x3edf,0x868a,0x3f52,0x1f68,0xe643,0x3a9,0x313, +0x300,0x868a,0x3f54,0x1f69,0xe643,0x3a9,0x314,0x300,0x868a,0x3f56,0x1f68,0xe643,0x3a9,0x313,0x301,0x868a, +0x3f58,0x1f69,0xe643,0x3a9,0x314,0x301,0x868a,0x3f5a,0x1f68,0xe643,0x3a9,0x313,0x342,0x868a,0x3f5c,0x1f69, +0xe643,0x3a9,0x314,0x342,0x868a,0x3f5e,0xe602,0x3b1,0x300,0x868a,0x3f64,0xe602,0x3b7,0x300,0x868a,0x3f84, +0xe602,0x3c9,0x300,0x868a,0x3fe4,0xe602,0x3b1,0x342,0x868a,0x3f6e,0xe602,0x3b7,0x342,0x868a,0x3f8e,0xe602, +0x3c9,0x342,0x868a,0x3fee,3,0xe602,0x41,0x300,0xe602,0x41,0x301,0xe602,0x41,0x303,0xe602,0x45, +0x300,0xe602,0x45,0x301,0xe602,0x45,0x308,0xe602,0x49,0x300,0xe602,0x49,0x301,0xe602,0x49,0x302, +0xe602,0x4e,0x303,0xe602,0x4f,0x300,0xe602,0x4f,0x301,0xe602,0x55,0x300,0xe602,0x55,0x301,0xe602, +0x55,0x302,0xe602,0x59,0x301,0xe602,0x61,0x300,0xe602,0x61,0x301,0xe602,0x61,0x303,0xe602,0x65, +0x300,0xe602,0x65,0x301,0xe602,0x65,0x308,0xe602,0x69,0x300,0xe602,0x69,0x301,0xe602,0x69,0x302, +0xe602,0x6e,0x303,0xe602,0x6f,0x300,0xe602,0x6f,0x301,0xe602,0x75,0x300,0xe602,0x75,0x301,0xe602, +0x75,0x302,0xe602,0x79,0x301,0xe602,0x79,0x308,0xe602,0x41,0x304,0xe602,0x61,0x304,0xca02,0x41, +0x328,0xca02,0x61,0x328,0xe602,0x43,0x301,0xe602,0x63,0x301,0xe602,0x43,0x302,0xe602,0x63,0x302, +0xe602,0x43,0x307,0xe602,0x63,0x307,0xe602,0x43,0x30c,0xe602,0x63,0x30c,0xe602,0x44,0x30c,0xe602, +0x64,0x30c,0xe602,0x45,0x306,0xe602,0x65,0x306,0xe602,0x45,0x307,0xe602,0x65,0x307,0xca02,0x45, +0x328,0xca02,0x65,0x328,0xe602,0x45,0x30c,0xe602,0x65,0x30c,0xe602,0x47,0x302,0xe602,0x67,0x302, +0xe602,0x47,0x306,0xe602,0x67,0x306,0xe602,0x47,0x307,0xe602,0x67,0x307,0xca02,0x47,0x327,0xca02, +0x67,0x327,0xe602,0x48,0x302,0xe602,0x68,0x302,0xe602,0x49,0x303,0xe602,0x69,0x303,0xe602,0x49, +0x304,0xe602,0x69,0x304,0xe602,0x49,0x306,0xe602,0x69,0x306,0xca02,0x49,0x328,0xca02,0x69,0x328, +0xe602,0x49,0x307,0xe602,0x4a,0x302,0xe602,0x6a,0x302,0xca02,0x4b,0x327,0xca02,0x6b,0x327,0xe602, +0x4c,0x301,0xe602,0x6c,0x301,0xca02,0x4c,0x327,0xca02,0x6c,0x327,0xe602,0x4c,0x30c,0xe602,0x6c, +0x30c,0xe602,0x4e,0x301,0xe602,0x6e,0x301,0xca02,0x4e,0x327,0xca02,0x6e,0x327,0xe602,0x4e,0x30c, +0xe602,0x6e,0x30c,0xe602,0x4f,0x306,0xe602,0x6f,0x306,0xe602,0x4f,0x30b,0xe602,0x6f,0x30b,0xe602, +0x52,0x301,0xe602,0x72,0x301,0xca02,0x52,0x327,0xca02,0x72,0x327,0xe602,0x52,0x30c,0xe602,0x72, +0x30c,0xe602,0x53,0x302,0xe602,0x73,0x302,0xca02,0x53,0x327,0xca02,0x73,0x327,0xca02,0x54,0x327, +0xca02,0x74,0x327,0xe602,0x54,0x30c,0xe602,0x74,0x30c,0xe602,0x55,0x306,0xe602,0x75,0x306,0xe602, +0x55,0x30a,0xe602,0x75,0x30a,0xe602,0x55,0x30b,0xe602,0x75,0x30b,0xca02,0x55,0x328,0xca02,0x75, +0x328,0xe602,0x57,0x302,0xe602,0x77,0x302,0xe602,0x59,0x302,0xe602,0x79,0x302,0xe602,0x59,0x308, +0xe602,0x5a,0x301,0xe602,0x7a,0x301,0xe602,0x5a,0x307,0xe602,0x7a,0x307,0xe602,0x5a,0x30c,0xe602, +0x7a,0x30c,0xe602,0x41,0x30c,0xe602,0x61,0x30c,0xe602,0x49,0x30c,0xe602,0x69,0x30c,0xe602,0x4f, +0x30c,0xe602,0x6f,0x30c,0xe602,0x55,0x30c,0xe602,0x75,0x30c,0xdc,0xe643,0x55,0x308,0x304,0xfc, +0xe643,0x75,0x308,0x304,0xdc,0xe643,0x55,0x308,0x301,0xfc,0xe643,0x75,0x308,0x301,0xdc,0xe643, +0x55,0x308,0x30c,0xfc,0xe643,0x75,0x308,0x30c,0xdc,0xe643,0x55,0x308,0x300,0xfc,0xe643,0x75, +0x308,0x300,0xc4,0xe643,0x41,0x308,0x304,0xe4,0xe643,0x61,0x308,0x304,0x226,0xe643,0x41,0x307, +0x304,0x227,0xe643,0x61,0x307,0x304,0xe602,0xc6,0x304,0xe602,0xe6,0x304,0xe602,0x47,0x30c,0xe602, +0x67,0x30c,0xe602,0x4b,0x30c,0xe602,0x6b,0x30c,0x1ea,0xe643,0x4f,0x328,0x304,0x1eb,0xe643,0x6f, +0x328,0x304,0xe602,0x1b7,0x30c,0xe602,0x292,0x30c,0xe602,0x6a,0x30c,0xe602,0x47,0x301,0xe602,0x67, +0x301,0xe602,0x4e,0x300,0xe602,0x6e,0x300,0xc5,0xe643,0x41,0x30a,0x301,0xe5,0xe643,0x61,0x30a, +0x301,0xe602,0xc6,0x301,0xe602,0xe6,0x301,0xe602,0xd8,0x301,0xe602,0xf8,0x301,0xe602,0x41,0x30f, +0xe602,0x61,0x30f,0xe602,0x41,0x311,0xe602,0x61,0x311,0xe602,0x45,0x30f,0xe602,0x65,0x30f,0xe602, +0x45,0x311,0xe602,0x65,0x311,0xe602,0x49,0x30f,0xe602,0x69,0x30f,0xe602,0x49,0x311,0xe602,0x69, +0x311,0xe602,0x4f,0x30f,0xe602,0x6f,0x30f,0xe602,0x4f,0x311,0xe602,0x6f,0x311,0xe602,0x52,0x30f, +0xe602,0x72,0x30f,0xe602,0x52,0x311,0xe602,0x72,0x311,0xe602,0x55,0x30f,0xe602,0x75,0x30f,0xe602, +0x55,0x311,0xe602,0x75,0x311,0xdc02,0x53,0x326,0xdc02,0x73,0x326,0xdc02,0x54,0x326,0xdc02,0x74, +0x326,0xe602,0x48,0x30c,0xe602,0x68,0x30c,0xd6,0xe643,0x4f,0x308,0x304,0xf6,0xe643,0x6f,0x308, +0x304,0xd5,0xe643,0x4f,0x303,0x304,0xf5,0xe643,0x6f,0x303,0x304,0x22e,0xe643,0x4f,0x307,0x304, +0x22f,0xe643,0x6f,0x307,0x304,0xe602,0x59,0x304,0xe602,0x79,0x304,0xe602,0xa8,0x301,0xe602,0x391, +0x301,0xe602,0x395,0x301,0xe602,0x397,0x301,0xe602,0x399,0x301,0xe602,0x39f,0x301,0xe602,0x3a5,0x301, +0xe602,0x3a9,0x301,0x3ca,0xe643,0x3b9,0x308,0x301,0xe602,0x399,0x308,0xe602,0x3a5,0x308,0xe602,0x3b5, +0x301,0xe602,0x3b9,0x301,0x3cb,0xe643,0x3c5,0x308,0x301,0xe602,0x3bf,0x301,0xe602,0x3c5,0x301,0xe602, +0x3d2,0x301,0xe602,0x3d2,0x308,0xe602,0x415,0x300,0xe602,0x415,0x308,0xe602,0x413,0x301,0xe602,0x406, +0x308,0xe602,0x41a,0x301,0xe602,0x418,0x300,0xe602,0x423,0x306,0xe602,0x418,0x306,0xe602,0x438,0x306, +0xe602,0x435,0x300,0xe602,0x435,0x308,0xe602,0x433,0x301,0xe602,0x456,0x308,0xe602,0x43a,0x301,0xe602, +0x438,0x300,0xe602,0x443,0x306,0xe602,0x474,0x30f,0xe602,0x475,0x30f,0xe602,0x416,0x306,0xe602,0x436, +0x306,0xe602,0x410,0x306,0xe602,0x430,0x306,0xe602,0x410,0x308,0xe602,0x430,0x308,0xe602,0x415,0x306, +0xe602,0x435,0x306,0xe602,0x4d8,0x308,0xe602,0x4d9,0x308,0xe602,0x416,0x308,0xe602,0x436,0x308,0xe602, +0x417,0x308,0xe602,0x437,0x308,0xe602,0x418,0x304,0xe602,0x438,0x304,0xe602,0x418,0x308,0xe602,0x438, +0x308,0xe602,0x41e,0x308,0xe602,0x43e,0x308,0xe602,0x4e8,0x308,0xe602,0x4e9,0x308,0xe602,0x42d,0x308, +0xe602,0x44d,0x308,0xe602,0x423,0x304,0xe602,0x443,0x304,0xe602,0x423,0x308,0xe602,0x443,0x308,0xe602, +0x423,0x30b,0xe602,0x443,0x30b,0xe602,0x427,0x308,0xe602,0x447,0x308,0xe602,0x42b,0x308,0xe602,0x44b, +0x308,0xe602,0x627,0x653,0xe602,0x627,0x654,0xe602,0x648,0x654,0xdc02,0x627,0x655,0xe602,0x64a,0x654, +0xe602,0x6d5,0x654,0xe602,0x6c1,0x654,0xe602,0x6d2,0x654,0x702,0x928,0x93c,0x702,0x930,0x93c,0x702, +0x933,0x93c,2,0x9c7,0x9be,2,0x9c7,0x9d7,2,0xb47,0xb56,2,0xb47,0xb3e,2,0xb47, +0xb57,2,0xb92,0xbd7,2,0xbc6,0xbbe,2,0xbc7,0xbbe,2,0xbc6,0xbd7,0x5b02,0xc46,0xc56, +2,0xcbf,0xcd5,2,0xcc6,0xcd5,2,0xcc6,0xcd6,0xcca,0x43,0xcc6,0xcc2,0xcd5,2,0xd46, +0xd3e,2,0xd47,0xd3e,2,0xd46,0xd57,0x902,0xdd9,0xdca,0xddc,0x943,0xdd9,0xdcf,0xdca,2, +0xdd9,0xddf,2,0x1025,0x102e,2,0x1b05,0x1b35,2,0x1b07,0x1b35,2,0x1b09,0x1b35,2,0x1b0b, +0x1b35,2,0x1b0d,0x1b35,2,0x1b11,0x1b35,2,0x1b3a,0x1b35,2,0x1b3c,0x1b35,2,0x1b3e,0x1b35, +2,0x1b3f,0x1b35,2,0x1b42,0x1b35,0xdc02,0x41,0x325,0xdc02,0x61,0x325,0xe602,0x42,0x307,0xe602, +0x62,0x307,0xdc02,0x42,0x323,0xdc02,0x62,0x323,0xdc02,0x42,0x331,0xdc02,0x62,0x331,0xc7,0xe643, +0x43,0x327,0x301,0xe7,0xe643,0x63,0x327,0x301,0xe602,0x44,0x307,0xe602,0x64,0x307,0xdc02,0x44, +0x323,0xdc02,0x64,0x323,0xdc02,0x44,0x331,0xdc02,0x64,0x331,0xca02,0x44,0x327,0xca02,0x64,0x327, +0xdc02,0x44,0x32d,0xdc02,0x64,0x32d,0x112,0xe643,0x45,0x304,0x300,0x113,0xe643,0x65,0x304,0x300, +0x112,0xe643,0x45,0x304,0x301,0x113,0xe643,0x65,0x304,0x301,0xdc02,0x45,0x32d,0xdc02,0x65,0x32d, +0xdc02,0x45,0x330,0xdc02,0x65,0x330,0x228,0xe643,0x45,0x327,0x306,0x229,0xe643,0x65,0x327,0x306, +0xe602,0x46,0x307,0xe602,0x66,0x307,0xe602,0x47,0x304,0xe602,0x67,0x304,0xe602,0x48,0x307,0xe602, +0x68,0x307,0xdc02,0x48,0x323,0xdc02,0x68,0x323,0xe602,0x48,0x308,0xe602,0x68,0x308,0xca02,0x48, +0x327,0xca02,0x68,0x327,0xdc02,0x48,0x32e,0xdc02,0x68,0x32e,0xdc02,0x49,0x330,0xdc02,0x69,0x330, +0xcf,0xe643,0x49,0x308,0x301,0xef,0xe643,0x69,0x308,0x301,0xe602,0x4b,0x301,0xe602,0x6b,0x301, +0xdc02,0x4b,0x323,0xdc02,0x6b,0x323,0xdc02,0x4b,0x331,0xdc02,0x6b,0x331,0x1e36,0xe643,0x4c,0x323, +0x304,0x1e37,0xe643,0x6c,0x323,0x304,0xdc02,0x4c,0x331,0xdc02,0x6c,0x331,0xdc02,0x4c,0x32d,0xdc02, +0x6c,0x32d,0xe602,0x4d,0x301,0xe602,0x6d,0x301,0xe602,0x4d,0x307,0xe602,0x6d,0x307,0xdc02,0x4d, +0x323,0xdc02,0x6d,0x323,0xe602,0x4e,0x307,0xe602,0x6e,0x307,0xdc02,0x4e,0x323,0xdc02,0x6e,0x323, +0xdc02,0x4e,0x331,0xdc02,0x6e,0x331,0xdc02,0x4e,0x32d,0xdc02,0x6e,0x32d,0xd5,0xe643,0x4f,0x303, +0x301,0xf5,0xe643,0x6f,0x303,0x301,0xd5,0xe643,0x4f,0x303,0x308,0xf5,0xe643,0x6f,0x303,0x308, +0x14c,0xe643,0x4f,0x304,0x300,0x14d,0xe643,0x6f,0x304,0x300,0x14c,0xe643,0x4f,0x304,0x301,0x14d, +0xe643,0x6f,0x304,0x301,0xe602,0x50,0x301,0xe602,0x70,0x301,0xe602,0x50,0x307,0xe602,0x70,0x307, +0xe602,0x52,0x307,0xe602,0x72,0x307,0x1e5a,0xe643,0x52,0x323,0x304,0x1e5b,0xe643,0x72,0x323,0x304, +0xdc02,0x52,0x331,0xdc02,0x72,0x331,0xe602,0x53,0x307,0xe602,0x73,0x307,0x15a,0xe643,0x53,0x301, +0x307,0x15b,0xe643,0x73,0x301,0x307,0x160,0xe643,0x53,0x30c,0x307,0x161,0xe643,0x73,0x30c,0x307, +0x1e62,0xe643,0x53,0x323,0x307,0x1e63,0xe643,0x73,0x323,0x307,0xe602,0x54,0x307,0xe602,0x74,0x307, +0xdc02,0x54,0x323,0xdc02,0x74,0x323,0xdc02,0x54,0x331,0xdc02,0x74,0x331,0xdc02,0x54,0x32d,0xdc02, +0x74,0x32d,0xdc02,0x55,0x324,0xdc02,0x75,0x324,0xdc02,0x55,0x330,0xdc02,0x75,0x330,0xdc02,0x55, +0x32d,0xdc02,0x75,0x32d,0x168,0xe643,0x55,0x303,0x301,0x169,0xe643,0x75,0x303,0x301,0x16a,0xe643, +0x55,0x304,0x308,0x16b,0xe643,0x75,0x304,0x308,0xe602,0x56,0x303,0xe602,0x76,0x303,0xdc02,0x56, +0x323,0xdc02,0x76,0x323,0xe602,0x57,0x300,0xe602,0x77,0x300,0xe602,0x57,0x301,0xe602,0x77,0x301, +0xe602,0x57,0x308,0xe602,0x77,0x308,0xe602,0x57,0x307,0xe602,0x77,0x307,0xdc02,0x57,0x323,0xdc02, +0x77,0x323,0xe602,0x58,0x307,0xe602,0x78,0x307,0xe602,0x58,0x308,0xe602,0x78,0x308,0xe602,0x59, +0x307,0xe602,0x79,0x307,0xe602,0x5a,0x302,0xe602,0x7a,0x302,0xdc02,0x5a,0x323,0xdc02,0x7a,0x323, +0xdc02,0x5a,0x331,0xdc02,0x7a,0x331,0xdc02,0x68,0x331,0xe602,0x74,0x308,0xe602,0x77,0x30a,0xe602, +0x79,0x30a,0xe602,0x17f,0x307,0xe602,0x41,0x309,0xe602,0x61,0x309,0xc2,0xe643,0x41,0x302,0x301, +0xe2,0xe643,0x61,0x302,0x301,0xc2,0xe643,0x41,0x302,0x300,0xe2,0xe643,0x61,0x302,0x300,0xc2, +0xe643,0x41,0x302,0x309,0xe2,0xe643,0x61,0x302,0x309,0xc2,0xe643,0x41,0x302,0x303,0xe2,0xe643, +0x61,0x302,0x303,0x1ea0,0xe643,0x41,0x323,0x302,0x1ea1,0xe643,0x61,0x323,0x302,0x102,0xe643,0x41, +0x306,0x301,0x103,0xe643,0x61,0x306,0x301,0x102,0xe643,0x41,0x306,0x300,0x103,0xe643,0x61,0x306, +0x300,0x102,0xe643,0x41,0x306,0x309,0x103,0xe643,0x61,0x306,0x309,0x102,0xe643,0x41,0x306,0x303, +0x103,0xe643,0x61,0x306,0x303,0x1ea0,0xe643,0x41,0x323,0x306,0x1ea1,0xe643,0x61,0x323,0x306,0xe602, +0x45,0x309,0xe602,0x65,0x309,0xe602,0x45,0x303,0xe602,0x65,0x303,0xca,0xe643,0x45,0x302,0x301, +0xea,0xe643,0x65,0x302,0x301,0xca,0xe643,0x45,0x302,0x300,0xea,0xe643,0x65,0x302,0x300,0xca, +0xe643,0x45,0x302,0x309,0xea,0xe643,0x65,0x302,0x309,0xca,0xe643,0x45,0x302,0x303,0xea,0xe643, +0x65,0x302,0x303,0x1eb8,0xe643,0x45,0x323,0x302,0x1eb9,0xe643,0x65,0x323,0x302,0xe602,0x49,0x309, +0xe602,0x69,0x309,0xdc02,0x49,0x323,0xdc02,0x69,0x323,0xe602,0x4f,0x309,0xe602,0x6f,0x309,0xd4, +0xe643,0x4f,0x302,0x301,0xf4,0xe643,0x6f,0x302,0x301,0xd4,0xe643,0x4f,0x302,0x300,0xf4,0xe643, +0x6f,0x302,0x300,0xd4,0xe643,0x4f,0x302,0x309,0xf4,0xe643,0x6f,0x302,0x309,0xd4,0xe643,0x4f, +0x302,0x303,0xf4,0xe643,0x6f,0x302,0x303,0x1ecc,0xe643,0x4f,0x323,0x302,0x1ecd,0xe643,0x6f,0x323, +0x302,0x1a0,0xe643,0x4f,0x31b,0x301,0x1a1,0xe643,0x6f,0x31b,0x301,0x1a0,0xe643,0x4f,0x31b,0x300, +0x1a1,0xe643,0x6f,0x31b,0x300,0x1a0,0xe643,0x4f,0x31b,0x309,0x1a1,0xe643,0x6f,0x31b,0x309,0x1a0, +0xe643,0x4f,0x31b,0x303,0x1a1,0xe643,0x6f,0x31b,0x303,0x1a0,0xdc43,0x4f,0x31b,0x323,0x1a1,0xdc43, +0x6f,0x31b,0x323,0xdc02,0x55,0x323,0xdc02,0x75,0x323,0xe602,0x55,0x309,0xe602,0x75,0x309,0x1af, +0xe643,0x55,0x31b,0x301,0x1b0,0xe643,0x75,0x31b,0x301,0x1af,0xe643,0x55,0x31b,0x300,0x1b0,0xe643, +0x75,0x31b,0x300,0x1af,0xe643,0x55,0x31b,0x309,0x1b0,0xe643,0x75,0x31b,0x309,0x1af,0xe643,0x55, +0x31b,0x303,0x1b0,0xe643,0x75,0x31b,0x303,0x1af,0xdc43,0x55,0x31b,0x323,0x1b0,0xdc43,0x75,0x31b, +0x323,0xe602,0x59,0x300,0xe602,0x79,0x300,0xdc02,0x59,0x323,0xdc02,0x79,0x323,0xe602,0x59,0x309, +0xe602,0x79,0x309,0xe602,0x59,0x303,0xe602,0x79,0x303,0x1f10,0xe643,0x3b5,0x313,0x300,0x1f11,0xe643, +0x3b5,0x314,0x300,0x1f10,0xe643,0x3b5,0x313,0x301,0x1f11,0xe643,0x3b5,0x314,0x301,0x1f18,0xe643,0x395, +0x313,0x300,0x1f19,0xe643,0x395,0x314,0x300,0x1f18,0xe643,0x395,0x313,0x301,0x1f19,0xe643,0x395,0x314, +0x301,0x1f30,0xe643,0x3b9,0x313,0x300,0x1f31,0xe643,0x3b9,0x314,0x300,0x1f30,0xe643,0x3b9,0x313,0x301, +0x1f31,0xe643,0x3b9,0x314,0x301,0x1f30,0xe643,0x3b9,0x313,0x342,0x1f31,0xe643,0x3b9,0x314,0x342,0x1f38, +0xe643,0x399,0x313,0x300,0x1f39,0xe643,0x399,0x314,0x300,0x1f38,0xe643,0x399,0x313,0x301,0x1f39,0xe643, +0x399,0x314,0x301,0x1f38,0xe643,0x399,0x313,0x342,0x1f39,0xe643,0x399,0x314,0x342,0x1f40,0xe643,0x3bf, +0x313,0x300,0x1f41,0xe643,0x3bf,0x314,0x300,0x1f40,0xe643,0x3bf,0x313,0x301,0x1f41,0xe643,0x3bf,0x314, +0x301,0x1f48,0xe643,0x39f,0x313,0x300,0x1f49,0xe643,0x39f,0x314,0x300,0x1f48,0xe643,0x39f,0x313,0x301, +0x1f49,0xe643,0x39f,0x314,0x301,0x1f50,0xe643,0x3c5,0x313,0x300,0x1f51,0xe643,0x3c5,0x314,0x300,0x1f50, +0xe643,0x3c5,0x313,0x301,0x1f51,0xe643,0x3c5,0x314,0x301,0x1f50,0xe643,0x3c5,0x313,0x342,0x1f51,0xe643, +0x3c5,0x314,0x342,0x1f59,0xe643,0x3a5,0x314,0x300,0x1f59,0xe643,0x3a5,0x314,0x301,0x1f59,0xe643,0x3a5, +0x314,0x342,0xe602,0x3b5,0x300,0xe602,0x3b9,0x300,0xe602,0x3bf,0x300,0xe602,0x3c5,0x300,0x1f00,0xf043, +0x3b1,0x313,0x345,0x1f01,0xf043,0x3b1,0x314,0x345,0x1f02,0x345,2,0xf044,0x3b1,0x313,0x300,0x345, +0x1f03,0x345,2,0xf044,0x3b1,0x314,0x300,0x345,0x1f04,0x345,2,0xf044,0x3b1,0x313,0x301,0x345, +0x1f05,0x345,2,0xf044,0x3b1,0x314,0x301,0x345,0x1f06,0x345,2,0xf044,0x3b1,0x313,0x342,0x345, +0x1f07,0x345,2,0xf044,0x3b1,0x314,0x342,0x345,0x1f08,0xf043,0x391,0x313,0x345,0x1f09,0xf043,0x391, +0x314,0x345,0x1f0a,0x345,2,0xf044,0x391,0x313,0x300,0x345,0x1f0b,0x345,2,0xf044,0x391,0x314, +0x300,0x345,0x1f0c,0x345,2,0xf044,0x391,0x313,0x301,0x345,0x1f0d,0x345,2,0xf044,0x391,0x314, +0x301,0x345,0x1f0e,0x345,2,0xf044,0x391,0x313,0x342,0x345,0x1f0f,0x345,2,0xf044,0x391,0x314, +0x342,0x345,0x1f20,0xf043,0x3b7,0x313,0x345,0x1f21,0xf043,0x3b7,0x314,0x345,0x1f22,0x345,2,0xf044, +0x3b7,0x313,0x300,0x345,0x1f23,0x345,2,0xf044,0x3b7,0x314,0x300,0x345,0x1f24,0x345,2,0xf044, +0x3b7,0x313,0x301,0x345,0x1f25,0x345,2,0xf044,0x3b7,0x314,0x301,0x345,0x1f26,0x345,2,0xf044, +0x3b7,0x313,0x342,0x345,0x1f27,0x345,2,0xf044,0x3b7,0x314,0x342,0x345,0x1f28,0xf043,0x397,0x313, +0x345,0x1f29,0xf043,0x397,0x314,0x345,0x1f2a,0x345,2,0xf044,0x397,0x313,0x300,0x345,0x1f2b,0x345, +2,0xf044,0x397,0x314,0x300,0x345,0x1f2c,0x345,2,0xf044,0x397,0x313,0x301,0x345,0x1f2d,0x345, +2,0xf044,0x397,0x314,0x301,0x345,0x1f2e,0x345,2,0xf044,0x397,0x313,0x342,0x345,0x1f2f,0x345, +2,0xf044,0x397,0x314,0x342,0x345,0x1f60,0xf043,0x3c9,0x313,0x345,0x1f61,0xf043,0x3c9,0x314,0x345, +0x1f62,0x345,2,0xf044,0x3c9,0x313,0x300,0x345,0x1f63,0x345,2,0xf044,0x3c9,0x314,0x300,0x345, +0x1f64,0x345,2,0xf044,0x3c9,0x313,0x301,0x345,0x1f65,0x345,2,0xf044,0x3c9,0x314,0x301,0x345, +0x1f66,0x345,2,0xf044,0x3c9,0x313,0x342,0x345,0x1f67,0x345,2,0xf044,0x3c9,0x314,0x342,0x345, +0x1f68,0xf043,0x3a9,0x313,0x345,0x1f69,0xf043,0x3a9,0x314,0x345,0x1f6a,0x345,2,0xf044,0x3a9,0x313, +0x300,0x345,0x1f6b,0x345,2,0xf044,0x3a9,0x314,0x300,0x345,0x1f6c,0x345,2,0xf044,0x3a9,0x313, +0x301,0x345,0x1f6d,0x345,2,0xf044,0x3a9,0x314,0x301,0x345,0x1f6e,0x345,2,0xf044,0x3a9,0x313, +0x342,0x345,0x1f6f,0x345,2,0xf044,0x3a9,0x314,0x342,0x345,0xe602,0x3b1,0x306,0xe602,0x3b1,0x304, +0x1f70,0xf043,0x3b1,0x300,0x345,0xf002,0x3b1,0x345,0x3ac,0xf043,0x3b1,0x301,0x345,0x1fb6,0xf043,0x3b1, +0x342,0x345,0xe602,0x391,0x306,0xe602,0x391,0x304,0xe602,0x391,0x300,0xf002,0x391,0x345,0xe602,0xa8, +0x342,0x1f74,0xf043,0x3b7,0x300,0x345,0xf002,0x3b7,0x345,0x3ae,0xf043,0x3b7,0x301,0x345,0x1fc6,0xf043, +0x3b7,0x342,0x345,0xe602,0x395,0x300,0xe602,0x397,0x300,0xf002,0x397,0x345,0xe602,0x1fbf,0x300,0xe602, +0x1fbf,0x301,0xe602,0x1fbf,0x342,0xe602,0x3b9,0x306,0xe602,0x3b9,0x304,0x3ca,0xe643,0x3b9,0x308,0x300, +0xe602,0x3b9,0x342,0x3ca,0xe643,0x3b9,0x308,0x342,0xe602,0x399,0x306,0xe602,0x399,0x304,0xe602,0x399, +0x300,0xe602,0x1ffe,0x300,0xe602,0x1ffe,0x301,0xe602,0x1ffe,0x342,0xe602,0x3c5,0x306,0xe602,0x3c5,0x304, +0x3cb,0xe643,0x3c5,0x308,0x300,0xe602,0x3c1,0x313,0xe602,0x3c1,0x314,0xe602,0x3c5,0x342,0x3cb,0xe643, +0x3c5,0x308,0x342,0xe602,0x3a5,0x306,0xe602,0x3a5,0x304,0xe602,0x3a5,0x300,0xe602,0x3a1,0x314,0xe602, +0xa8,0x300,0x1f7c,0xf043,0x3c9,0x300,0x345,0xf002,0x3c9,0x345,0x3ce,0xf043,0x3c9,0x301,0x345,0x1ff6, +0xf043,0x3c9,0x342,0x345,0xe602,0x39f,0x300,0xe602,0x3a9,0x300,0xf002,0x3a9,0x345,0x102,0x2190,0x338, +0x102,0x2192,0x338,0x102,0x2194,0x338,0x102,0x21d0,0x338,0x102,0x21d4,0x338,0x102,0x21d2,0x338,0x102, +0x2203,0x338,0x102,0x2208,0x338,0x102,0x220b,0x338,0x102,0x2223,0x338,0x102,0x2225,0x338,0x102,0x223c, +0x338,0x102,0x2243,0x338,0x102,0x2245,0x338,0x102,0x2248,0x338,0x102,0x3d,0x338,0x102,0x2261,0x338, +0x102,0x224d,0x338,0x102,0x3c,0x338,0x102,0x3e,0x338,0x102,0x2264,0x338,0x102,0x2265,0x338,0x102, +0x2272,0x338,0x102,0x2273,0x338,0x102,0x2276,0x338,0x102,0x2277,0x338,0x102,0x227a,0x338,0x102,0x227b, +0x338,0x102,0x2282,0x338,0x102,0x2283,0x338,0x102,0x2286,0x338,0x102,0x2287,0x338,0x102,0x22a2,0x338, +0x102,0x22a8,0x338,0x102,0x22a9,0x338,0x102,0x22ab,0x338,0x102,0x227c,0x338,0x102,0x227d,0x338,0x102, +0x2291,0x338,0x102,0x2292,0x338,0x102,0x22b2,0x338,0x102,0x22b3,0x338,0x102,0x22b4,0x338,0x102,0x22b5, +0x338,0x802,0x304b,0x3099,0x802,0x304d,0x3099,0x802,0x304f,0x3099,0x802,0x3051,0x3099,0x802,0x3053,0x3099, +0x802,0x3055,0x3099,0x802,0x3057,0x3099,0x802,0x3059,0x3099,0x802,0x305b,0x3099,0x802,0x305d,0x3099,0x802, +0x305f,0x3099,0x802,0x3061,0x3099,0x802,0x3064,0x3099,0x802,0x3066,0x3099,0x802,0x3068,0x3099,0x802,0x306f, +0x3099,0x802,0x306f,0x309a,0x802,0x3072,0x3099,0x802,0x3072,0x309a,0x802,0x3075,0x3099,0x802,0x3075,0x309a, +0x802,0x3078,0x3099,0x802,0x3078,0x309a,0x802,0x307b,0x3099,0x802,0x307b,0x309a,0x802,0x3046,0x3099,0x802, +0x309d,0x3099,0x802,0x30ab,0x3099,0x802,0x30ad,0x3099,0x802,0x30af,0x3099,0x802,0x30b1,0x3099,0x802,0x30b3, +0x3099,0x802,0x30b5,0x3099,0x802,0x30b7,0x3099,0x802,0x30b9,0x3099,0x802,0x30bb,0x3099,0x802,0x30bd,0x3099, +0x802,0x30bf,0x3099,0x802,0x30c1,0x3099,0x802,0x30c4,0x3099,0x802,0x30c6,0x3099,0x802,0x30c8,0x3099,0x802, +0x30cf,0x3099,0x802,0x30cf,0x309a,0x802,0x30d2,0x3099,0x802,0x30d2,0x309a,0x802,0x30d5,0x3099,0x802,0x30d5, +0x309a,0x802,0x30d8,0x3099,0x802,0x30d8,0x309a,0x802,0x30db,0x3099,0x802,0x30db,0x309a,0x802,0x30a6,0x3099, +0x802,0x30ef,0x3099,0x802,0x30f0,0x3099,0x802,0x30f1,0x3099,0x802,0x30f2,0x3099,0x802,0x30fd,0x3099,0x704, +0xd804,0xdc99,0xd804,0xdcba,0x704,0xd804,0xdc9b,0xd804,0xdcba,0x704,0xd804,0xdca5,0xd804,0xdcba,4,0xd804, +0xdd31,0xd804,0xdd27,4,0xd804,0xdd32,0xd804,0xdd27,4,0xd804,0xdf47,0xd804,0xdf3e,4,0xd804,0xdf47, +0xd804,0xdf57,4,0xd805,0xdcb9,0xd805,0xdcba,4,0xd805,0xdcb9,0xd805,0xdcb0,4,0xd805,0xdcb9,0xd805, +0xdcbd,4,0xd805,0xddb8,0xd805,0xddaf,4,0xd805,0xddb9,0xd805,0xddaf,4,0xd806,0xdd35,0xd806,0xdd30, +1,0x2b9,1,0x3b,1,0xb7,0x702,0x915,0x93c,0x702,0x916,0x93c,0x702,0x917,0x93c,0x702, +0x91c,0x93c,0x702,0x921,0x93c,0x702,0x922,0x93c,0x702,0x92b,0x93c,0x702,0x92f,0x93c,0x702,0x9a1, +0x9bc,0x702,0x9a2,0x9bc,0x702,0x9af,0x9bc,0x702,0xa32,0xa3c,0x702,0xa38,0xa3c,0x702,0xa16,0xa3c, +0x702,0xa17,0xa3c,0x702,0xa1c,0xa3c,0x702,0xa2b,0xa3c,0x702,0xb21,0xb3c,0x702,0xb22,0xb3c,2, +0xf42,0xfb7,2,0xf4c,0xfb7,2,0xf51,0xfb7,2,0xf56,0xfb7,2,0xf5b,0xfb7,2,0xf40, +0xfb5,0x8202,0xfb2,0xf80,0x8202,0xfb3,0xf80,2,0xf92,0xfb7,2,0xf9c,0xfb7,2,0xfa1,0xfb7, +2,0xfa6,0xfb7,2,0xfab,0xfb7,2,0xf90,0xfb5,1,0x3b9,1,0x60,1,0xb4,1, +0x3a9,1,0x4b,1,0x3008,1,0x3009,0x102,0x2add,0x338,1,0x8c48,1,0x66f4,1,0x8eca, +1,0x8cc8,1,0x6ed1,1,0x4e32,1,0x53e5,1,0x9f9c,1,0x5951,1,0x91d1,1,0x5587, +1,0x5948,1,0x61f6,1,0x7669,1,0x7f85,1,0x863f,1,0x87ba,1,0x88f8,1,0x908f, +1,0x6a02,1,0x6d1b,1,0x70d9,1,0x73de,1,0x843d,1,0x916a,1,0x99f1,1,0x4e82, +1,0x5375,1,0x6b04,1,0x721b,1,0x862d,1,0x9e1e,1,0x5d50,1,0x6feb,1,0x85cd, +1,0x8964,1,0x62c9,1,0x81d8,1,0x881f,1,0x5eca,1,0x6717,1,0x6d6a,1,0x72fc, +1,0x90ce,1,0x4f86,1,0x51b7,1,0x52de,1,0x64c4,1,0x6ad3,1,0x7210,1,0x76e7, +1,0x8001,1,0x8606,1,0x865c,1,0x8def,1,0x9732,1,0x9b6f,1,0x9dfa,1,0x788c, +1,0x797f,1,0x7da0,1,0x83c9,1,0x9304,1,0x9e7f,1,0x8ad6,1,0x58df,1,0x5f04, +1,0x7c60,1,0x807e,1,0x7262,1,0x78ca,1,0x8cc2,1,0x96f7,1,0x58d8,1,0x5c62, +1,0x6a13,1,0x6dda,1,0x6f0f,1,0x7d2f,1,0x7e37,1,0x964b,1,0x52d2,1,0x808b, +1,0x51dc,1,0x51cc,1,0x7a1c,1,0x7dbe,1,0x83f1,1,0x9675,1,0x8b80,1,0x62cf, +1,0x8afe,1,0x4e39,1,0x5be7,1,0x6012,1,0x7387,1,0x7570,1,0x5317,1,0x78fb, +1,0x4fbf,1,0x5fa9,1,0x4e0d,1,0x6ccc,1,0x6578,1,0x7d22,1,0x53c3,1,0x585e, +1,0x7701,1,0x8449,1,0x8aaa,1,0x6bba,1,0x8fb0,1,0x6c88,1,0x62fe,1,0x82e5, +1,0x63a0,1,0x7565,1,0x4eae,1,0x5169,1,0x51c9,1,0x6881,1,0x7ce7,1,0x826f, +1,0x8ad2,1,0x91cf,1,0x52f5,1,0x5442,1,0x5973,1,0x5eec,1,0x65c5,1,0x6ffe, +1,0x792a,1,0x95ad,1,0x9a6a,1,0x9e97,1,0x9ece,1,0x529b,1,0x66c6,1,0x6b77, +1,0x8f62,1,0x5e74,1,0x6190,1,0x6200,1,0x649a,1,0x6f23,1,0x7149,1,0x7489, +1,0x79ca,1,0x7df4,1,0x806f,1,0x8f26,1,0x84ee,1,0x9023,1,0x934a,1,0x5217, +1,0x52a3,1,0x54bd,1,0x70c8,1,0x88c2,1,0x5ec9,1,0x5ff5,1,0x637b,1,0x6bae, +1,0x7c3e,1,0x7375,1,0x4ee4,1,0x56f9,1,0x5dba,1,0x601c,1,0x73b2,1,0x7469, +1,0x7f9a,1,0x8046,1,0x9234,1,0x96f6,1,0x9748,1,0x9818,1,0x4f8b,1,0x79ae, +1,0x91b4,1,0x96b8,1,0x60e1,1,0x4e86,1,0x50da,1,0x5bee,1,0x5c3f,1,0x6599, +1,0x71ce,1,0x7642,1,0x84fc,1,0x907c,1,0x9f8d,1,0x6688,1,0x962e,1,0x5289, +1,0x677b,1,0x67f3,1,0x6d41,1,0x6e9c,1,0x7409,1,0x7559,1,0x786b,1,0x7d10, +1,0x985e,1,0x516d,1,0x622e,1,0x9678,1,0x502b,1,0x5d19,1,0x6dea,1,0x8f2a, +1,0x5f8b,1,0x6144,1,0x6817,1,0x9686,1,0x5229,1,0x540f,1,0x5c65,1,0x6613, +1,0x674e,1,0x68a8,1,0x6ce5,1,0x7406,1,0x75e2,1,0x7f79,1,0x88cf,1,0x88e1, +1,0x91cc,1,0x96e2,1,0x533f,1,0x6eba,1,0x541d,1,0x71d0,1,0x7498,1,0x85fa, +1,0x96a3,1,0x9c57,1,0x9e9f,1,0x6797,1,0x6dcb,1,0x81e8,1,0x7acb,1,0x7b20, +1,0x7c92,1,0x72c0,1,0x7099,1,0x8b58,1,0x4ec0,1,0x8336,1,0x523a,1,0x5207, +1,0x5ea6,1,0x62d3,1,0x7cd6,1,0x5b85,1,0x6d1e,1,0x66b4,1,0x8f3b,1,0x884c, +1,0x964d,1,0x898b,1,0x5ed3,1,0x5140,1,0x55c0,1,0x585a,1,0x6674,1,0x51de, +1,0x732a,1,0x76ca,1,0x793c,1,0x795e,1,0x7965,1,0x798f,1,0x9756,1,0x7cbe, +1,0x7fbd,1,0x8612,1,0x8af8,1,0x9038,1,0x90fd,1,0x98ef,1,0x98fc,1,0x9928, +1,0x9db4,1,0x90de,1,0x96b7,1,0x4fae,1,0x50e7,1,0x514d,1,0x52c9,1,0x52e4, +1,0x5351,1,0x559d,1,0x5606,1,0x5668,1,0x5840,1,0x58a8,1,0x5c64,1,0x5c6e, +1,0x6094,1,0x6168,1,0x618e,1,0x61f2,1,0x654f,1,0x65e2,1,0x6691,1,0x6885, +1,0x6d77,1,0x6e1a,1,0x6f22,1,0x716e,1,0x722b,1,0x7422,1,0x7891,1,0x793e, +1,0x7949,1,0x7948,1,0x7950,1,0x7956,1,0x795d,1,0x798d,1,0x798e,1,0x7a40, +1,0x7a81,1,0x7bc0,1,0x7e09,1,0x7e41,1,0x7f72,1,0x8005,1,0x81ed,1,0x8279, +1,0x8457,1,0x8910,1,0x8996,1,0x8b01,1,0x8b39,1,0x8cd3,1,0x8d08,1,0x8fb6, +1,0x96e3,1,0x97ff,1,0x983b,1,0x6075,2,0xd850,0xdeee,1,0x8218,1,0x4e26,1, +0x51b5,1,0x5168,1,0x4f80,1,0x5145,1,0x5180,1,0x52c7,1,0x52fa,1,0x5555,1, +0x5599,1,0x55e2,1,0x58b3,1,0x5944,1,0x5954,1,0x5a62,1,0x5b28,1,0x5ed2,1, +0x5ed9,1,0x5f69,1,0x5fad,1,0x60d8,1,0x614e,1,0x6108,1,0x6160,1,0x6234,1, +0x63c4,1,0x641c,1,0x6452,1,0x6556,1,0x671b,1,0x6756,1,0x6b79,1,0x6edb,1, +0x6ecb,1,0x701e,1,0x77a7,1,0x7235,1,0x72af,1,0x7471,1,0x7506,1,0x753b,1, +0x761d,1,0x761f,1,0x76db,1,0x76f4,1,0x774a,1,0x7740,1,0x78cc,1,0x7ab1,1, +0x7c7b,1,0x7d5b,1,0x7f3e,1,0x8352,1,0x83ef,1,0x8779,1,0x8941,1,0x8986,1, +0x8abf,1,0x8acb,1,0x8aed,1,0x8b8a,1,0x8f38,1,0x9072,1,0x9199,1,0x9276,1, +0x967c,1,0x97db,1,0x980b,1,0x9b12,2,0xd84a,0xdc4a,2,0xd84a,0xdc44,2,0xd84c,0xdfd5, +1,0x3b9d,1,0x4018,1,0x4039,2,0xd854,0xde49,2,0xd857,0xdcd0,2,0xd85f,0xded3,1, +0x9f43,1,0x9f8e,0xe02,0x5d9,0x5b4,0x1102,0x5f2,0x5b7,0x1802,0x5e9,0x5c1,0x1902,0x5e9,0x5c2,0xfb49, +0x1843,0x5e9,0x5bc,0x5c1,0xfb49,0x1943,0x5e9,0x5bc,0x5c2,0x1102,0x5d0,0x5b7,0x1202,0x5d0,0x5b8,0x1502, +0x5d0,0x5bc,0x1502,0x5d1,0x5bc,0x1502,0x5d2,0x5bc,0x1502,0x5d3,0x5bc,0x1502,0x5d4,0x5bc,0x1502,0x5d5, +0x5bc,0x1502,0x5d6,0x5bc,0x1502,0x5d8,0x5bc,0x1502,0x5d9,0x5bc,0x1502,0x5da,0x5bc,0x1502,0x5db,0x5bc, +0x1502,0x5dc,0x5bc,0x1502,0x5de,0x5bc,0x1502,0x5e0,0x5bc,0x1502,0x5e1,0x5bc,0x1502,0x5e3,0x5bc,0x1502, +0x5e4,0x5bc,0x1502,0x5e6,0x5bc,0x1502,0x5e7,0x5bc,0x1502,0x5e8,0x5bc,0x1502,0x5e9,0x5bc,0x1502,0x5ea, +0x5bc,0x1302,0x5d5,0x5b9,0x1702,0x5d1,0x5bf,0x1702,0x5db,0x5bf,0x1702,0x5e4,0x5bf,0xd804,0xd834,0xdd57, +0xd834,0xdd65,0xd804,0xd834,0xdd58,0xd834,0xdd65,0xd834,0xdd5f,0xd834,0xdd6e,4,0xd846,0xd834,0xdd58,0xd834, +0xdd65,0xd834,0xdd6e,0xd834,0xdd5f,0xd834,0xdd6f,4,0xd846,0xd834,0xdd58,0xd834,0xdd65,0xd834,0xdd6f,0xd834, +0xdd5f,0xd834,0xdd70,4,0xd846,0xd834,0xdd58,0xd834,0xdd65,0xd834,0xdd70,0xd834,0xdd5f,0xd834,0xdd71,4, +0xd846,0xd834,0xdd58,0xd834,0xdd65,0xd834,0xdd71,0xd834,0xdd5f,0xd834,0xdd72,4,0xd846,0xd834,0xdd58,0xd834, +0xdd65,0xd834,0xdd72,0xd804,0xd834,0xddb9,0xd834,0xdd65,0xd804,0xd834,0xddba,0xd834,0xdd65,0xd834,0xddbb,0xd834, +0xdd6e,4,0xd846,0xd834,0xddb9,0xd834,0xdd65,0xd834,0xdd6e,0xd834,0xddbc,0xd834,0xdd6e,4,0xd846,0xd834, +0xddba,0xd834,0xdd65,0xd834,0xdd6e,0xd834,0xddbb,0xd834,0xdd6f,4,0xd846,0xd834,0xddb9,0xd834,0xdd65,0xd834, +0xdd6f,0xd834,0xddbc,0xd834,0xdd6f,4,0xd846,0xd834,0xddba,0xd834,0xdd65,0xd834,0xdd6f,1,0x4e3d,1, +0x4e38,1,0x4e41,2,0xd840,0xdd22,1,0x4f60,1,0x4fbb,1,0x5002,1,0x507a,1,0x5099, +1,0x50cf,1,0x349e,2,0xd841,0xde3a,1,0x5154,1,0x5164,1,0x5177,2,0xd841,0xdd1c, +1,0x34b9,1,0x5167,1,0x518d,2,0xd841,0xdd4b,1,0x5197,1,0x51a4,1,0x4ecc,1, +0x51ac,2,0xd864,0xdddf,1,0x51f5,1,0x5203,1,0x34df,1,0x523b,1,0x5246,1,0x5272, +1,0x5277,1,0x3515,1,0x5305,1,0x5306,1,0x5349,1,0x535a,1,0x5373,1,0x537d, +1,0x537f,2,0xd842,0xde2c,1,0x7070,1,0x53ca,1,0x53df,2,0xd842,0xdf63,1,0x53eb, +1,0x53f1,1,0x5406,1,0x549e,1,0x5438,1,0x5448,1,0x5468,1,0x54a2,1,0x54f6, +1,0x5510,1,0x5553,1,0x5563,1,0x5584,1,0x55ab,1,0x55b3,1,0x55c2,1,0x5716, +1,0x5717,1,0x5651,1,0x5674,1,0x58ee,1,0x57ce,1,0x57f4,1,0x580d,1,0x578b, +1,0x5832,1,0x5831,1,0x58ac,2,0xd845,0xdce4,1,0x58f2,1,0x58f7,1,0x5906,1, +0x591a,1,0x5922,1,0x5962,2,0xd845,0xdea8,2,0xd845,0xdeea,1,0x59ec,1,0x5a1b,1, +0x5a27,1,0x59d8,1,0x5a66,1,0x36ee,1,0x36fc,1,0x5b08,1,0x5b3e,2,0xd846,0xddc8, +1,0x5bc3,1,0x5bd8,1,0x5bf3,2,0xd846,0xdf18,1,0x5bff,1,0x5c06,1,0x5f53,1, +0x5c22,1,0x3781,1,0x5c60,1,0x5cc0,1,0x5c8d,2,0xd847,0xdde4,1,0x5d43,2,0xd847, +0xdde6,1,0x5d6e,1,0x5d6b,1,0x5d7c,1,0x5de1,1,0x5de2,1,0x382f,1,0x5dfd,1, +0x5e28,1,0x5e3d,1,0x5e69,1,0x3862,2,0xd848,0xdd83,1,0x387c,1,0x5eb0,1,0x5eb3, +1,0x5eb6,2,0xd868,0xdf92,1,0x5efe,2,0xd848,0xdf31,1,0x8201,1,0x5f22,1,0x38c7, +2,0xd84c,0xdeb8,2,0xd858,0xddda,1,0x5f62,1,0x5f6b,1,0x38e3,1,0x5f9a,1,0x5fcd, +1,0x5fd7,1,0x5ff9,1,0x6081,1,0x393a,1,0x391c,2,0xd849,0xded4,1,0x60c7,1, +0x6148,1,0x614c,1,0x617a,1,0x61b2,1,0x61a4,1,0x61af,1,0x61de,1,0x6210,1, +0x621b,1,0x625d,1,0x62b1,1,0x62d4,1,0x6350,2,0xd84a,0xdf0c,1,0x633d,1,0x62fc, +1,0x6368,1,0x6383,1,0x63e4,2,0xd84a,0xdff1,1,0x6422,1,0x63c5,1,0x63a9,1, +0x3a2e,1,0x6469,1,0x647e,1,0x649d,1,0x6477,1,0x3a6c,1,0x656c,2,0xd84c,0xdc0a, +1,0x65e3,1,0x66f8,1,0x6649,1,0x3b19,1,0x3b08,1,0x3ae4,1,0x5192,1,0x5195, +1,0x6700,1,0x669c,1,0x80ad,1,0x43d9,1,0x6721,1,0x675e,1,0x6753,2,0xd84c, +0xdfc3,1,0x3b49,1,0x67fa,1,0x6785,1,0x6852,2,0xd84d,0xdc6d,1,0x688e,1,0x681f, +1,0x6914,1,0x6942,1,0x69a3,1,0x69ea,1,0x6aa8,2,0xd84d,0xdea3,1,0x6adb,1, +0x3c18,1,0x6b21,2,0xd84e,0xdca7,1,0x6b54,1,0x3c4e,1,0x6b72,1,0x6b9f,1,0x6bbb, +2,0xd84e,0xde8d,2,0xd847,0xdd0b,2,0xd84e,0xdefa,1,0x6c4e,2,0xd84f,0xdcbc,1,0x6cbf, +1,0x6ccd,1,0x6c67,1,0x6d16,1,0x6d3e,1,0x6d69,1,0x6d78,1,0x6d85,2,0xd84f, +0xdd1e,1,0x6d34,1,0x6e2f,1,0x6e6e,1,0x3d33,1,0x6ec7,2,0xd84f,0xded1,1,0x6df9, +1,0x6f6e,2,0xd84f,0xdf5e,2,0xd84f,0xdf8e,1,0x6fc6,1,0x7039,1,0x701b,1,0x3d96, +1,0x704a,1,0x707d,1,0x7077,1,0x70ad,2,0xd841,0xdd25,1,0x7145,2,0xd850,0xde63, +1,0x719c,2,0xd850,0xdfab,1,0x7228,1,0x7250,2,0xd851,0xde08,1,0x7280,1,0x7295, +2,0xd851,0xdf35,2,0xd852,0xdc14,1,0x737a,1,0x738b,1,0x3eac,1,0x73a5,1,0x3eb8, +1,0x7447,1,0x745c,1,0x7485,1,0x74ca,1,0x3f1b,1,0x7524,2,0xd853,0xdc36,1, +0x753e,2,0xd853,0xdc92,2,0xd848,0xdd9f,1,0x7610,2,0xd853,0xdfa1,2,0xd853,0xdfb8,2, +0xd854,0xdc44,1,0x3ffc,1,0x4008,2,0xd854,0xdcf3,2,0xd854,0xdcf2,2,0xd854,0xdd19,2, +0xd854,0xdd33,1,0x771e,1,0x771f,1,0x778b,1,0x4046,1,0x4096,2,0xd855,0xdc1d,1, +0x784e,1,0x40e3,2,0xd855,0xde26,2,0xd855,0xde9a,2,0xd855,0xdec5,1,0x79eb,1,0x412f, +1,0x7a4a,1,0x7a4f,2,0xd856,0xdd7c,2,0xd856,0xdea7,1,0x7aee,1,0x4202,2,0xd856, +0xdfab,1,0x7bc6,1,0x7bc9,1,0x4227,2,0xd857,0xdc80,1,0x7cd2,1,0x42a0,1,0x7ce8, +1,0x7ce3,1,0x7d00,2,0xd857,0xdf86,1,0x7d63,1,0x4301,1,0x7dc7,1,0x7e02,1, +0x7e45,1,0x4334,2,0xd858,0xde28,2,0xd858,0xde47,1,0x4359,2,0xd858,0xded9,1,0x7f7a, +2,0xd858,0xdf3e,1,0x7f95,1,0x7ffa,2,0xd859,0xdcda,2,0xd859,0xdd23,1,0x8060,2, +0xd859,0xdda8,1,0x8070,2,0xd84c,0xdf5f,1,0x43d5,1,0x80b2,1,0x8103,1,0x440b,1, +0x813e,1,0x5ab5,2,0xd859,0xdfa7,2,0xd859,0xdfb5,2,0xd84c,0xdf93,2,0xd84c,0xdf9c,1, +0x8204,1,0x8f9e,1,0x446b,1,0x8291,1,0x828b,1,0x829d,1,0x52b3,1,0x82b1,1, +0x82b3,1,0x82bd,1,0x82e6,2,0xd85a,0xdf3c,1,0x831d,1,0x8363,1,0x83ad,1,0x8323, +1,0x83bd,1,0x83e7,1,0x8353,1,0x83ca,1,0x83cc,1,0x83dc,2,0xd85b,0xdc36,2, +0xd85b,0xdd6b,2,0xd85b,0xdcd5,1,0x452b,1,0x84f1,1,0x84f3,1,0x8516,2,0xd85c,0xdfca, +1,0x8564,2,0xd85b,0xdf2c,1,0x455d,1,0x4561,2,0xd85b,0xdfb1,2,0xd85c,0xdcd2,1, +0x456b,1,0x8650,1,0x8667,1,0x8669,1,0x86a9,1,0x8688,1,0x870e,1,0x86e2,1, +0x8728,1,0x876b,1,0x8786,1,0x45d7,1,0x87e1,1,0x8801,1,0x45f9,1,0x8860,1, +0x8863,2,0xd85d,0xde67,1,0x88d7,1,0x88de,1,0x4635,1,0x88fa,1,0x34bb,2,0xd85e, +0xdcae,2,0xd85e,0xdd66,1,0x46be,1,0x46c7,1,0x8aa0,1,0x8c55,2,0xd85f,0xdca8,1, +0x8cab,1,0x8cc1,1,0x8d1b,1,0x8d77,2,0xd85f,0xdf2f,2,0xd842,0xdc04,1,0x8dcb,1, +0x8dbc,1,0x8df0,2,0xd842,0xdcde,1,0x8ed4,2,0xd861,0xddd2,2,0xd861,0xdded,1,0x9094, +1,0x90f1,1,0x9111,2,0xd861,0xdf2e,1,0x911b,1,0x9238,1,0x92d7,1,0x92d8,1, +0x927c,1,0x93f9,1,0x9415,2,0xd862,0xdffa,1,0x958b,1,0x4995,1,0x95b7,2,0xd863, +0xdd77,1,0x49e6,1,0x96c3,1,0x5db2,1,0x9723,2,0xd864,0xdd45,2,0xd864,0xde1a,1, +0x4a6e,1,0x4a76,1,0x97e0,2,0xd865,0xdc0a,1,0x4ab2,2,0xd865,0xdc96,1,0x9829,2, +0xd865,0xddb6,1,0x98e2,1,0x4b33,1,0x9929,1,0x99a7,1,0x99c2,1,0x99fe,1,0x4bce, +2,0xd866,0xdf30,1,0x9c40,1,0x9cfd,1,0x4cce,1,0x4ced,1,0x9d67,2,0xd868,0xdcce, +1,0x4cf8,2,0xd868,0xdd05,2,0xd868,0xde0e,2,0xd868,0xde91,1,0x9ebb,1,0x4d56,1, +0x9ef9,1,0x9efe,1,0x9f05,1,0x9f0f,1,0x9f16,1,0x9f3b,2,0xd869,0xde00,0x3ac,0xe642, +0x3b1,0x301,0x3ad,0xe642,0x3b5,0x301,0x3ae,0xe642,0x3b7,0x301,0x3af,0xe642,0x3b9,0x301,0x3cc,0xe642, +0x3bf,0x301,0x3cd,0xe642,0x3c5,0x301,0x3ce,0xe642,0x3c9,0x301,0x386,0xe642,0x391,0x301,0x388,0xe642, +0x395,0x301,0x389,0xe642,0x397,0x301,0x390,1,0xe643,0x3b9,0x308,0x301,0x38a,0xe642,0x399,0x301, +0x3b0,1,0xe643,0x3c5,0x308,0x301,0x38e,0xe642,0x3a5,0x301,0x385,0xe642,0xa8,0x301,0x38c,0xe642, +0x39f,0x301,0x38f,0xe642,0x3a9,0x301,0xc5,0xe642,0x41,0x30a,0xe6e6,0xe681,0x300,0xe6e6,0xe681,0x301, +0xe6e6,0xe681,0x313,0xe6e6,0xe682,0x308,0x301,0x8100,0x8282,0xf71,0xf72,0x8100,0x8482,0xf71,0xf74,0x8100, +0x8282,0xf71,0xf80,0 +}; + +static const uint8_t norm2_nfc_data_smallFCD[256]={ +0xc0,0xef,3,0x7f,0xdf,0x70,0xcf,0x87,0xc7,0xe6,0x66,0x46,0x64,0x46,0x66,0x5b, +0x12,0,0,4,0,0,0,0x43,0x20,2,0x69,0xae,0xc2,0xc0,0xff,0xff, +0xc0,0x72,0xbf,0,0,0,0,0,0,0,0x40,0,0x80,0x88,0,0, +0xfe,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0x98,0,0xc3,0x66,0xe0,0x80,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,7,0,0,2,0 +}; + +#endif // INCLUDED_FROM_NORMALIZER2_CPP diff --git a/thirdparty/icu4c/common/norm2allmodes.h b/thirdparty/icu4c/common/norm2allmodes.h new file mode 100644 index 0000000000..e8bd52c6ae --- /dev/null +++ b/thirdparty/icu4c/common/norm2allmodes.h @@ -0,0 +1,369 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2014, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* norm2allmodes.h +* +* created on: 2014sep07 +* created by: Markus W. Scherer +*/ + +#ifndef __NORM2ALLMODES_H__ +#define __NORM2ALLMODES_H__ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_NORMALIZATION + +#include "unicode/edits.h" +#include "unicode/normalizer2.h" +#include "unicode/stringoptions.h" +#include "unicode/unistr.h" +#include "cpputils.h" +#include "normalizer2impl.h" + +U_NAMESPACE_BEGIN + +// Intermediate class: +// Has Normalizer2Impl and does boilerplate argument checking and setup. +class Normalizer2WithImpl : public Normalizer2 { +public: + Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {} + virtual ~Normalizer2WithImpl(); + + // normalize + virtual UnicodeString & + normalize(const UnicodeString &src, + UnicodeString &dest, + UErrorCode &errorCode) const { + if(U_FAILURE(errorCode)) { + dest.setToBogus(); + return dest; + } + const UChar *sArray=src.getBuffer(); + if(&dest==&src || sArray==NULL) { + errorCode=U_ILLEGAL_ARGUMENT_ERROR; + dest.setToBogus(); + return dest; + } + dest.remove(); + ReorderingBuffer buffer(impl, dest); + if(buffer.init(src.length(), errorCode)) { + normalize(sArray, sArray+src.length(), buffer, errorCode); + } + return dest; + } + virtual void + normalize(const UChar *src, const UChar *limit, + ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0; + + // normalize and append + virtual UnicodeString & + normalizeSecondAndAppend(UnicodeString &first, + const UnicodeString &second, + UErrorCode &errorCode) const { + return normalizeSecondAndAppend(first, second, true, errorCode); + } + virtual UnicodeString & + append(UnicodeString &first, + const UnicodeString &second, + UErrorCode &errorCode) const { + return normalizeSecondAndAppend(first, second, false, errorCode); + } + UnicodeString & + normalizeSecondAndAppend(UnicodeString &first, + const UnicodeString &second, + UBool doNormalize, + UErrorCode &errorCode) const { + uprv_checkCanGetBuffer(first, errorCode); + if(U_FAILURE(errorCode)) { + return first; + } + const UChar *secondArray=second.getBuffer(); + if(&first==&second || secondArray==NULL) { + errorCode=U_ILLEGAL_ARGUMENT_ERROR; + return first; + } + int32_t firstLength=first.length(); + UnicodeString safeMiddle; + { + ReorderingBuffer buffer(impl, first); + if(buffer.init(firstLength+second.length(), errorCode)) { + normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize, + safeMiddle, buffer, errorCode); + } + } // The ReorderingBuffer destructor finalizes the first string. + if(U_FAILURE(errorCode)) { + // Restore the modified suffix of the first string. + first.replace(firstLength-safeMiddle.length(), 0x7fffffff, safeMiddle); + } + return first; + } + virtual void + normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, + UnicodeString &safeMiddle, + ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0; + virtual UBool + getDecomposition(UChar32 c, UnicodeString &decomposition) const { + UChar buffer[4]; + int32_t length; + const UChar *d=impl.getDecomposition(c, buffer, length); + if(d==NULL) { + return false; + } + if(d==buffer) { + decomposition.setTo(buffer, length); // copy the string (Jamos from Hangul syllable c) + } else { + decomposition.setTo(false, d, length); // read-only alias + } + return true; + } + virtual UBool + getRawDecomposition(UChar32 c, UnicodeString &decomposition) const { + UChar buffer[30]; + int32_t length; + const UChar *d=impl.getRawDecomposition(c, buffer, length); + if(d==NULL) { + return false; + } + if(d==buffer) { + decomposition.setTo(buffer, length); // copy the string (algorithmic decomposition) + } else { + decomposition.setTo(false, d, length); // read-only alias + } + return true; + } + virtual UChar32 + composePair(UChar32 a, UChar32 b) const { + return impl.composePair(a, b); + } + + virtual uint8_t + getCombiningClass(UChar32 c) const { + return impl.getCC(impl.getNorm16(c)); + } + + // quick checks + virtual UBool + isNormalized(const UnicodeString &s, UErrorCode &errorCode) const { + if(U_FAILURE(errorCode)) { + return false; + } + const UChar *sArray=s.getBuffer(); + if(sArray==NULL) { + errorCode=U_ILLEGAL_ARGUMENT_ERROR; + return false; + } + const UChar *sLimit=sArray+s.length(); + return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode); + } + virtual UNormalizationCheckResult + quickCheck(const UnicodeString &s, UErrorCode &errorCode) const { + return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO; + } + virtual int32_t + spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const { + if(U_FAILURE(errorCode)) { + return 0; + } + const UChar *sArray=s.getBuffer(); + if(sArray==NULL) { + errorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray); + } + virtual const UChar * + spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const = 0; + + virtual UNormalizationCheckResult getQuickCheck(UChar32) const { + return UNORM_YES; + } + + const Normalizer2Impl &impl; +}; + +class DecomposeNormalizer2 : public Normalizer2WithImpl { +public: + DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {} + virtual ~DecomposeNormalizer2(); + +private: + virtual void + normalize(const UChar *src, const UChar *limit, + ReorderingBuffer &buffer, UErrorCode &errorCode) const { + impl.decompose(src, limit, &buffer, errorCode); + } + using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. + virtual void + normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, + UnicodeString &safeMiddle, + ReorderingBuffer &buffer, UErrorCode &errorCode) const { + impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode); + } + virtual const UChar * + spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const { + return impl.decompose(src, limit, NULL, errorCode); + } + using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. + virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const { + return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO; + } + virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundaryBefore(c); } + virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundaryAfter(c); } + virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); } +}; + +class ComposeNormalizer2 : public Normalizer2WithImpl { +public: + ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) : + Normalizer2WithImpl(ni), onlyContiguous(fcc) {} + virtual ~ComposeNormalizer2(); + +private: + virtual void + normalize(const UChar *src, const UChar *limit, + ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE { + impl.compose(src, limit, onlyContiguous, true, buffer, errorCode); + } + using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. + + void + normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink, + Edits *edits, UErrorCode &errorCode) const U_OVERRIDE { + if (U_FAILURE(errorCode)) { + return; + } + if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) { + edits->reset(); + } + const uint8_t *s = reinterpret_cast<const uint8_t *>(src.data()); + impl.composeUTF8(options, onlyContiguous, s, s + src.length(), + &sink, edits, errorCode); + sink.Flush(); + } + + virtual void + normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, + UnicodeString &safeMiddle, + ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE { + impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode); + } + + virtual UBool + isNormalized(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE { + if(U_FAILURE(errorCode)) { + return false; + } + const UChar *sArray=s.getBuffer(); + if(sArray==NULL) { + errorCode=U_ILLEGAL_ARGUMENT_ERROR; + return false; + } + UnicodeString temp; + ReorderingBuffer buffer(impl, temp); + if(!buffer.init(5, errorCode)) { // small destCapacity for substring normalization + return false; + } + return impl.compose(sArray, sArray+s.length(), onlyContiguous, false, buffer, errorCode); + } + virtual UBool + isNormalizedUTF8(StringPiece sp, UErrorCode &errorCode) const U_OVERRIDE { + if(U_FAILURE(errorCode)) { + return false; + } + const uint8_t *s = reinterpret_cast<const uint8_t *>(sp.data()); + return impl.composeUTF8(0, onlyContiguous, s, s + sp.length(), nullptr, nullptr, errorCode); + } + virtual UNormalizationCheckResult + quickCheck(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE { + if(U_FAILURE(errorCode)) { + return UNORM_MAYBE; + } + const UChar *sArray=s.getBuffer(); + if(sArray==NULL) { + errorCode=U_ILLEGAL_ARGUMENT_ERROR; + return UNORM_MAYBE; + } + UNormalizationCheckResult qcResult=UNORM_YES; + impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult); + return qcResult; + } + virtual const UChar * + spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const U_OVERRIDE { + return impl.composeQuickCheck(src, limit, onlyContiguous, NULL); + } + using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. + virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const U_OVERRIDE { + return impl.getCompQuickCheck(impl.getNorm16(c)); + } + virtual UBool hasBoundaryBefore(UChar32 c) const U_OVERRIDE { + return impl.hasCompBoundaryBefore(c); + } + virtual UBool hasBoundaryAfter(UChar32 c) const U_OVERRIDE { + return impl.hasCompBoundaryAfter(c, onlyContiguous); + } + virtual UBool isInert(UChar32 c) const U_OVERRIDE { + return impl.isCompInert(c, onlyContiguous); + } + + const UBool onlyContiguous; +}; + +class FCDNormalizer2 : public Normalizer2WithImpl { +public: + FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {} + virtual ~FCDNormalizer2(); + +private: + virtual void + normalize(const UChar *src, const UChar *limit, + ReorderingBuffer &buffer, UErrorCode &errorCode) const { + impl.makeFCD(src, limit, &buffer, errorCode); + } + using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. + virtual void + normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, + UnicodeString &safeMiddle, + ReorderingBuffer &buffer, UErrorCode &errorCode) const { + impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode); + } + virtual const UChar * + spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const { + return impl.makeFCD(src, limit, NULL, errorCode); + } + using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. + virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); } + virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); } + virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); } +}; + +struct Norm2AllModes : public UMemory { + Norm2AllModes(Normalizer2Impl *i) + : impl(i), comp(*i, false), decomp(*i), fcd(*i), fcc(*i, true) {} + ~Norm2AllModes(); + + static Norm2AllModes *createInstance(Normalizer2Impl *impl, UErrorCode &errorCode); + static Norm2AllModes *createNFCInstance(UErrorCode &errorCode); + static Norm2AllModes *createInstance(const char *packageName, + const char *name, + UErrorCode &errorCode); + + static const Norm2AllModes *getNFCInstance(UErrorCode &errorCode); + static const Norm2AllModes *getNFKCInstance(UErrorCode &errorCode); + static const Norm2AllModes *getNFKC_CFInstance(UErrorCode &errorCode); + + Normalizer2Impl *impl; + ComposeNormalizer2 comp; + DecomposeNormalizer2 decomp; + FCDNormalizer2 fcd; + ComposeNormalizer2 fcc; +}; + +U_NAMESPACE_END + +#endif // !UCONFIG_NO_NORMALIZATION +#endif // __NORM2ALLMODES_H__ diff --git a/thirdparty/icu4c/common/normalizer2.cpp b/thirdparty/icu4c/common/normalizer2.cpp new file mode 100644 index 0000000000..6be7e0b21a --- /dev/null +++ b/thirdparty/icu4c/common/normalizer2.cpp @@ -0,0 +1,572 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2009-2016, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: normalizer2.cpp +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2009nov22 +* created by: Markus W. Scherer +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_NORMALIZATION + +#include "unicode/edits.h" +#include "unicode/normalizer2.h" +#include "unicode/stringoptions.h" +#include "unicode/unistr.h" +#include "unicode/unorm.h" +#include "cstring.h" +#include "mutex.h" +#include "norm2allmodes.h" +#include "normalizer2impl.h" +#include "uassert.h" +#include "ucln_cmn.h" + +using icu::Normalizer2Impl; + +#if NORM2_HARDCODE_NFC_DATA +// NFC/NFD data machine-generated by gennorm2 --csource +#define INCLUDED_FROM_NORMALIZER2_CPP +#include "norm2_nfc_data.h" +#endif + +U_NAMESPACE_BEGIN + +// Public API dispatch via Normalizer2 subclasses -------------------------- *** + +Normalizer2::~Normalizer2() {} + +void +Normalizer2::normalizeUTF8(uint32_t /*options*/, StringPiece src, ByteSink &sink, + Edits *edits, UErrorCode &errorCode) const { + if (U_FAILURE(errorCode)) { + return; + } + if (edits != nullptr) { + errorCode = U_UNSUPPORTED_ERROR; + return; + } + UnicodeString src16 = UnicodeString::fromUTF8(src); + normalize(src16, errorCode).toUTF8(sink); +} + +UBool +Normalizer2::getRawDecomposition(UChar32, UnicodeString &) const { + return FALSE; +} + +UChar32 +Normalizer2::composePair(UChar32, UChar32) const { + return U_SENTINEL; +} + +uint8_t +Normalizer2::getCombiningClass(UChar32 /*c*/) const { + return 0; +} + +UBool +Normalizer2::isNormalizedUTF8(StringPiece s, UErrorCode &errorCode) const { + return U_SUCCESS(errorCode) && isNormalized(UnicodeString::fromUTF8(s), errorCode); +} + +// Normalizer2 implementation for the old UNORM_NONE. +class NoopNormalizer2 : public Normalizer2 { + virtual ~NoopNormalizer2(); + + virtual UnicodeString & + normalize(const UnicodeString &src, + UnicodeString &dest, + UErrorCode &errorCode) const U_OVERRIDE { + if(U_SUCCESS(errorCode)) { + if(&dest!=&src) { + dest=src; + } else { + errorCode=U_ILLEGAL_ARGUMENT_ERROR; + } + } + return dest; + } + virtual void + normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink, + Edits *edits, UErrorCode &errorCode) const U_OVERRIDE { + if(U_SUCCESS(errorCode)) { + if (edits != nullptr) { + if ((options & U_EDITS_NO_RESET) == 0) { + edits->reset(); + } + edits->addUnchanged(src.length()); + } + if ((options & U_OMIT_UNCHANGED_TEXT) == 0) { + sink.Append(src.data(), src.length()); + } + sink.Flush(); + } + } + + virtual UnicodeString & + normalizeSecondAndAppend(UnicodeString &first, + const UnicodeString &second, + UErrorCode &errorCode) const U_OVERRIDE { + if(U_SUCCESS(errorCode)) { + if(&first!=&second) { + first.append(second); + } else { + errorCode=U_ILLEGAL_ARGUMENT_ERROR; + } + } + return first; + } + virtual UnicodeString & + append(UnicodeString &first, + const UnicodeString &second, + UErrorCode &errorCode) const U_OVERRIDE { + if(U_SUCCESS(errorCode)) { + if(&first!=&second) { + first.append(second); + } else { + errorCode=U_ILLEGAL_ARGUMENT_ERROR; + } + } + return first; + } + virtual UBool + getDecomposition(UChar32, UnicodeString &) const U_OVERRIDE { + return FALSE; + } + // No need to U_OVERRIDE the default getRawDecomposition(). + virtual UBool + isNormalized(const UnicodeString &, UErrorCode &errorCode) const U_OVERRIDE { + return U_SUCCESS(errorCode); + } + virtual UBool + isNormalizedUTF8(StringPiece, UErrorCode &errorCode) const U_OVERRIDE { + return U_SUCCESS(errorCode); + } + virtual UNormalizationCheckResult + quickCheck(const UnicodeString &, UErrorCode &) const U_OVERRIDE { + return UNORM_YES; + } + virtual int32_t + spanQuickCheckYes(const UnicodeString &s, UErrorCode &) const U_OVERRIDE { + return s.length(); + } + virtual UBool hasBoundaryBefore(UChar32) const U_OVERRIDE { return TRUE; } + virtual UBool hasBoundaryAfter(UChar32) const U_OVERRIDE { return TRUE; } + virtual UBool isInert(UChar32) const U_OVERRIDE { return TRUE; } +}; + +NoopNormalizer2::~NoopNormalizer2() {} + +Normalizer2WithImpl::~Normalizer2WithImpl() {} + +DecomposeNormalizer2::~DecomposeNormalizer2() {} + +ComposeNormalizer2::~ComposeNormalizer2() {} + +FCDNormalizer2::~FCDNormalizer2() {} + +// instance cache ---------------------------------------------------------- *** + +U_CDECL_BEGIN +static UBool U_CALLCONV uprv_normalizer2_cleanup(); +U_CDECL_END + +static Normalizer2 *noopSingleton; +static icu::UInitOnce noopInitOnce = U_INITONCE_INITIALIZER; + +static void U_CALLCONV initNoopSingleton(UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { + return; + } + noopSingleton=new NoopNormalizer2; + if(noopSingleton==NULL) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + return; + } + ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup); +} + +const Normalizer2 *Normalizer2Factory::getNoopInstance(UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { return NULL; } + umtx_initOnce(noopInitOnce, &initNoopSingleton, errorCode); + return noopSingleton; +} + +const Normalizer2Impl * +Normalizer2Factory::getImpl(const Normalizer2 *norm2) { + return &((Normalizer2WithImpl *)norm2)->impl; +} + +Norm2AllModes::~Norm2AllModes() { + delete impl; +} + +Norm2AllModes * +Norm2AllModes::createInstance(Normalizer2Impl *impl, UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { + delete impl; + return NULL; + } + Norm2AllModes *allModes=new Norm2AllModes(impl); + if(allModes==NULL) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + delete impl; + return NULL; + } + return allModes; +} + +#if NORM2_HARDCODE_NFC_DATA +Norm2AllModes * +Norm2AllModes::createNFCInstance(UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { + return NULL; + } + Normalizer2Impl *impl=new Normalizer2Impl; + if(impl==NULL) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + impl->init(norm2_nfc_data_indexes, &norm2_nfc_data_trie, + norm2_nfc_data_extraData, norm2_nfc_data_smallFCD); + return createInstance(impl, errorCode); +} + +static Norm2AllModes *nfcSingleton; + +static icu::UInitOnce nfcInitOnce = U_INITONCE_INITIALIZER; + +static void U_CALLCONV initNFCSingleton(UErrorCode &errorCode) { + nfcSingleton=Norm2AllModes::createNFCInstance(errorCode); + ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup); +} + +const Norm2AllModes * +Norm2AllModes::getNFCInstance(UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { return NULL; } + umtx_initOnce(nfcInitOnce, &initNFCSingleton, errorCode); + return nfcSingleton; +} + +const Normalizer2 * +Normalizer2::getNFCInstance(UErrorCode &errorCode) { + const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode); + return allModes!=NULL ? &allModes->comp : NULL; +} + +const Normalizer2 * +Normalizer2::getNFDInstance(UErrorCode &errorCode) { + const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode); + return allModes!=NULL ? &allModes->decomp : NULL; +} + +const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) { + const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode); + return allModes!=NULL ? &allModes->fcd : NULL; +} + +const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) { + const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode); + return allModes!=NULL ? &allModes->fcc : NULL; +} + +const Normalizer2Impl * +Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) { + const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode); + return allModes!=NULL ? allModes->impl : NULL; +} +#endif // NORM2_HARDCODE_NFC_DATA + +U_CDECL_BEGIN + +static UBool U_CALLCONV uprv_normalizer2_cleanup() { + delete noopSingleton; + noopSingleton = NULL; + noopInitOnce.reset(); +#if NORM2_HARDCODE_NFC_DATA + delete nfcSingleton; + nfcSingleton = NULL; + nfcInitOnce.reset(); +#endif + return TRUE; +} + +U_CDECL_END + +U_NAMESPACE_END + +// C API ------------------------------------------------------------------- *** + +U_NAMESPACE_USE + +U_CAPI const UNormalizer2 * U_EXPORT2 +unorm2_getNFCInstance(UErrorCode *pErrorCode) { + return (const UNormalizer2 *)Normalizer2::getNFCInstance(*pErrorCode); +} + +U_CAPI const UNormalizer2 * U_EXPORT2 +unorm2_getNFDInstance(UErrorCode *pErrorCode) { + return (const UNormalizer2 *)Normalizer2::getNFDInstance(*pErrorCode); +} + +U_CAPI void U_EXPORT2 +unorm2_close(UNormalizer2 *norm2) { + delete (Normalizer2 *)norm2; +} + +U_CAPI int32_t U_EXPORT2 +unorm2_normalize(const UNormalizer2 *norm2, + const UChar *src, int32_t length, + UChar *dest, int32_t capacity, + UErrorCode *pErrorCode) { + if(U_FAILURE(*pErrorCode)) { + return 0; + } + if( (src==NULL ? length!=0 : length<-1) || + (dest==NULL ? capacity!=0 : capacity<0) || + (src==dest && src!=NULL) + ) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + UnicodeString destString(dest, 0, capacity); + // length==0: Nothing to do, and n2wi->normalize(NULL, NULL, buffer, ...) would crash. + if(length!=0) { + const Normalizer2 *n2=(const Normalizer2 *)norm2; + const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *>(n2); + if(n2wi!=NULL) { + // Avoid duplicate argument checking and support NUL-terminated src. + ReorderingBuffer buffer(n2wi->impl, destString); + if(buffer.init(length, *pErrorCode)) { + n2wi->normalize(src, length>=0 ? src+length : NULL, buffer, *pErrorCode); + } + } else { + UnicodeString srcString(length<0, src, length); + n2->normalize(srcString, destString, *pErrorCode); + } + } + return destString.extract(dest, capacity, *pErrorCode); +} + +static int32_t +normalizeSecondAndAppend(const UNormalizer2 *norm2, + UChar *first, int32_t firstLength, int32_t firstCapacity, + const UChar *second, int32_t secondLength, + UBool doNormalize, + UErrorCode *pErrorCode) { + if(U_FAILURE(*pErrorCode)) { + return 0; + } + if( (second==NULL ? secondLength!=0 : secondLength<-1) || + (first==NULL ? (firstCapacity!=0 || firstLength!=0) : + (firstCapacity<0 || firstLength<-1)) || + (first==second && first!=NULL) + ) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + UnicodeString firstString(first, firstLength, firstCapacity); + firstLength=firstString.length(); // In case it was -1. + // secondLength==0: Nothing to do, and n2wi->normalizeAndAppend(NULL, NULL, buffer, ...) would crash. + if(secondLength!=0) { + const Normalizer2 *n2=(const Normalizer2 *)norm2; + const Normalizer2WithImpl *n2wi=dynamic_cast<const Normalizer2WithImpl *>(n2); + if(n2wi!=NULL) { + // Avoid duplicate argument checking and support NUL-terminated src. + UnicodeString safeMiddle; + { + ReorderingBuffer buffer(n2wi->impl, firstString); + if(buffer.init(firstLength+secondLength+1, *pErrorCode)) { // destCapacity>=-1 + n2wi->normalizeAndAppend(second, secondLength>=0 ? second+secondLength : NULL, + doNormalize, safeMiddle, buffer, *pErrorCode); + } + } // The ReorderingBuffer destructor finalizes firstString. + if(U_FAILURE(*pErrorCode) || firstString.length()>firstCapacity) { + // Restore the modified suffix of the first string. + // This does not restore first[] array contents between firstLength and firstCapacity. + // (That might be uninitialized memory, as far as we know.) + if(first!=NULL) { /* don't dereference NULL */ + safeMiddle.extract(0, 0x7fffffff, first+firstLength-safeMiddle.length()); + if(firstLength<firstCapacity) { + first[firstLength]=0; // NUL-terminate in case it was originally. + } + } + } + } else { + UnicodeString secondString(secondLength<0, second, secondLength); + if(doNormalize) { + n2->normalizeSecondAndAppend(firstString, secondString, *pErrorCode); + } else { + n2->append(firstString, secondString, *pErrorCode); + } + } + } + return firstString.extract(first, firstCapacity, *pErrorCode); +} + +U_CAPI int32_t U_EXPORT2 +unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2, + UChar *first, int32_t firstLength, int32_t firstCapacity, + const UChar *second, int32_t secondLength, + UErrorCode *pErrorCode) { + return normalizeSecondAndAppend(norm2, + first, firstLength, firstCapacity, + second, secondLength, + TRUE, pErrorCode); +} + +U_CAPI int32_t U_EXPORT2 +unorm2_append(const UNormalizer2 *norm2, + UChar *first, int32_t firstLength, int32_t firstCapacity, + const UChar *second, int32_t secondLength, + UErrorCode *pErrorCode) { + return normalizeSecondAndAppend(norm2, + first, firstLength, firstCapacity, + second, secondLength, + FALSE, pErrorCode); +} + +U_CAPI int32_t U_EXPORT2 +unorm2_getDecomposition(const UNormalizer2 *norm2, + UChar32 c, UChar *decomposition, int32_t capacity, + UErrorCode *pErrorCode) { + if(U_FAILURE(*pErrorCode)) { + return 0; + } + if(decomposition==NULL ? capacity!=0 : capacity<0) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + UnicodeString destString(decomposition, 0, capacity); + if(reinterpret_cast<const Normalizer2 *>(norm2)->getDecomposition(c, destString)) { + return destString.extract(decomposition, capacity, *pErrorCode); + } else { + return -1; + } +} + +U_CAPI int32_t U_EXPORT2 +unorm2_getRawDecomposition(const UNormalizer2 *norm2, + UChar32 c, UChar *decomposition, int32_t capacity, + UErrorCode *pErrorCode) { + if(U_FAILURE(*pErrorCode)) { + return 0; + } + if(decomposition==NULL ? capacity!=0 : capacity<0) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + UnicodeString destString(decomposition, 0, capacity); + if(reinterpret_cast<const Normalizer2 *>(norm2)->getRawDecomposition(c, destString)) { + return destString.extract(decomposition, capacity, *pErrorCode); + } else { + return -1; + } +} + +U_CAPI UChar32 U_EXPORT2 +unorm2_composePair(const UNormalizer2 *norm2, UChar32 a, UChar32 b) { + return reinterpret_cast<const Normalizer2 *>(norm2)->composePair(a, b); +} + +U_CAPI uint8_t U_EXPORT2 +unorm2_getCombiningClass(const UNormalizer2 *norm2, UChar32 c) { + return reinterpret_cast<const Normalizer2 *>(norm2)->getCombiningClass(c); +} + +U_CAPI UBool U_EXPORT2 +unorm2_isNormalized(const UNormalizer2 *norm2, + const UChar *s, int32_t length, + UErrorCode *pErrorCode) { + if(U_FAILURE(*pErrorCode)) { + return 0; + } + if((s==NULL && length!=0) || length<-1) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + UnicodeString sString(length<0, s, length); + return ((const Normalizer2 *)norm2)->isNormalized(sString, *pErrorCode); +} + +U_CAPI UNormalizationCheckResult U_EXPORT2 +unorm2_quickCheck(const UNormalizer2 *norm2, + const UChar *s, int32_t length, + UErrorCode *pErrorCode) { + if(U_FAILURE(*pErrorCode)) { + return UNORM_NO; + } + if((s==NULL && length!=0) || length<-1) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return UNORM_NO; + } + UnicodeString sString(length<0, s, length); + return ((const Normalizer2 *)norm2)->quickCheck(sString, *pErrorCode); +} + +U_CAPI int32_t U_EXPORT2 +unorm2_spanQuickCheckYes(const UNormalizer2 *norm2, + const UChar *s, int32_t length, + UErrorCode *pErrorCode) { + if(U_FAILURE(*pErrorCode)) { + return 0; + } + if((s==NULL && length!=0) || length<-1) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + UnicodeString sString(length<0, s, length); + return ((const Normalizer2 *)norm2)->spanQuickCheckYes(sString, *pErrorCode); +} + +U_CAPI UBool U_EXPORT2 +unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c) { + return ((const Normalizer2 *)norm2)->hasBoundaryBefore(c); +} + +U_CAPI UBool U_EXPORT2 +unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c) { + return ((const Normalizer2 *)norm2)->hasBoundaryAfter(c); +} + +U_CAPI UBool U_EXPORT2 +unorm2_isInert(const UNormalizer2 *norm2, UChar32 c) { + return ((const Normalizer2 *)norm2)->isInert(c); +} + +// Some properties APIs ---------------------------------------------------- *** + +U_CAPI uint8_t U_EXPORT2 +u_getCombiningClass(UChar32 c) { + UErrorCode errorCode=U_ZERO_ERROR; + const Normalizer2 *nfd=Normalizer2::getNFDInstance(errorCode); + if(U_SUCCESS(errorCode)) { + return nfd->getCombiningClass(c); + } else { + return 0; + } +} + +U_CFUNC uint16_t +unorm_getFCD16(UChar32 c) { + UErrorCode errorCode=U_ZERO_ERROR; + const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode); + if(U_SUCCESS(errorCode)) { + return impl->getFCD16(c); + } else { + return 0; + } +} + +#endif // !UCONFIG_NO_NORMALIZATION diff --git a/thirdparty/icu4c/common/normalizer2impl.cpp b/thirdparty/icu4c/common/normalizer2impl.cpp new file mode 100644 index 0000000000..cbf6b4d980 --- /dev/null +++ b/thirdparty/icu4c/common/normalizer2impl.cpp @@ -0,0 +1,2669 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2009-2014, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: normalizer2impl.cpp +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2009nov22 +* created by: Markus W. Scherer +*/ + +// #define UCPTRIE_DEBUG + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_NORMALIZATION + +#include "unicode/bytestream.h" +#include "unicode/edits.h" +#include "unicode/normalizer2.h" +#include "unicode/stringoptions.h" +#include "unicode/ucptrie.h" +#include "unicode/udata.h" +#include "unicode/umutablecptrie.h" +#include "unicode/ustring.h" +#include "unicode/utf16.h" +#include "unicode/utf8.h" +#include "bytesinkutil.h" +#include "cmemory.h" +#include "mutex.h" +#include "normalizer2impl.h" +#include "putilimp.h" +#include "uassert.h" +#include "ucptrie_impl.h" +#include "uset_imp.h" +#include "uvector.h" + +U_NAMESPACE_BEGIN + +namespace { + +/** + * UTF-8 lead byte for minNoMaybeCP. + * Can be lower than the actual lead byte for c. + * Typically U+0300 for NFC/NFD, U+00A0 for NFKC/NFKD, U+0041 for NFKC_Casefold. + */ +inline uint8_t leadByteForCP(UChar32 c) { + if (c <= 0x7f) { + return (uint8_t)c; + } else if (c <= 0x7ff) { + return (uint8_t)(0xc0+(c>>6)); + } else { + // Should not occur because ccc(U+0300)!=0. + return 0xe0; + } +} + +/** + * Returns the code point from one single well-formed UTF-8 byte sequence + * between cpStart and cpLimit. + * + * Trie UTF-8 macros do not assemble whole code points (for efficiency). + * When we do need the code point, we call this function. + * We should not need it for normalization-inert data (norm16==0). + * Illegal sequences yield the error value norm16==0 just like real normalization-inert code points. + */ +UChar32 codePointFromValidUTF8(const uint8_t *cpStart, const uint8_t *cpLimit) { + // Similar to U8_NEXT_UNSAFE(s, i, c). + U_ASSERT(cpStart < cpLimit); + uint8_t c = *cpStart; + switch(cpLimit-cpStart) { + case 1: + return c; + case 2: + return ((c&0x1f)<<6) | (cpStart[1]&0x3f); + case 3: + // no need for (c&0xf) because the upper bits are truncated after <<12 in the cast to (UChar) + return (UChar)((c<<12) | ((cpStart[1]&0x3f)<<6) | (cpStart[2]&0x3f)); + case 4: + return ((c&7)<<18) | ((cpStart[1]&0x3f)<<12) | ((cpStart[2]&0x3f)<<6) | (cpStart[3]&0x3f); + default: + UPRV_UNREACHABLE; // Should not occur. + } +} + +/** + * Returns the last code point in [start, p[ if it is valid and in U+1000..U+D7FF. + * Otherwise returns a negative value. + */ +UChar32 previousHangulOrJamo(const uint8_t *start, const uint8_t *p) { + if ((p - start) >= 3) { + p -= 3; + uint8_t l = *p; + uint8_t t1, t2; + if (0xe1 <= l && l <= 0xed && + (t1 = (uint8_t)(p[1] - 0x80)) <= 0x3f && + (t2 = (uint8_t)(p[2] - 0x80)) <= 0x3f && + (l < 0xed || t1 <= 0x1f)) { + return ((l & 0xf) << 12) | (t1 << 6) | t2; + } + } + return U_SENTINEL; +} + +/** + * Returns the offset from the Jamo T base if [src, limit[ starts with a single Jamo T code point. + * Otherwise returns a negative value. + */ +int32_t getJamoTMinusBase(const uint8_t *src, const uint8_t *limit) { + // Jamo T: E1 86 A8..E1 87 82 + if ((limit - src) >= 3 && *src == 0xe1) { + if (src[1] == 0x86) { + uint8_t t = src[2]; + // The first Jamo T is U+11A8 but JAMO_T_BASE is 11A7. + // Offset 0 does not correspond to any conjoining Jamo. + if (0xa8 <= t && t <= 0xbf) { + return t - 0xa7; + } + } else if (src[1] == 0x87) { + uint8_t t = src[2]; + if ((int8_t)t <= (int8_t)0x82u) { + return t - (0xa7 - 0x40); + } + } + } + return -1; +} + +void +appendCodePointDelta(const uint8_t *cpStart, const uint8_t *cpLimit, int32_t delta, + ByteSink &sink, Edits *edits) { + char buffer[U8_MAX_LENGTH]; + int32_t length; + int32_t cpLength = (int32_t)(cpLimit - cpStart); + if (cpLength == 1) { + // The builder makes ASCII map to ASCII. + buffer[0] = (uint8_t)(*cpStart + delta); + length = 1; + } else { + int32_t trail = *(cpLimit-1) + delta; + if (0x80 <= trail && trail <= 0xbf) { + // The delta only changes the last trail byte. + --cpLimit; + length = 0; + do { buffer[length++] = *cpStart++; } while (cpStart < cpLimit); + buffer[length++] = (uint8_t)trail; + } else { + // Decode the code point, add the delta, re-encode. + UChar32 c = codePointFromValidUTF8(cpStart, cpLimit) + delta; + length = 0; + U8_APPEND_UNSAFE(buffer, length, c); + } + } + if (edits != nullptr) { + edits->addReplace(cpLength, length); + } + sink.Append(buffer, length); +} + +} // namespace + +// ReorderingBuffer -------------------------------------------------------- *** + +ReorderingBuffer::ReorderingBuffer(const Normalizer2Impl &ni, UnicodeString &dest, + UErrorCode &errorCode) : + impl(ni), str(dest), + start(str.getBuffer(8)), reorderStart(start), limit(start), + remainingCapacity(str.getCapacity()), lastCC(0) { + if (start == nullptr && U_SUCCESS(errorCode)) { + // getBuffer() already did str.setToBogus() + errorCode = U_MEMORY_ALLOCATION_ERROR; + } +} + +UBool ReorderingBuffer::init(int32_t destCapacity, UErrorCode &errorCode) { + int32_t length=str.length(); + start=str.getBuffer(destCapacity); + if(start==NULL) { + // getBuffer() already did str.setToBogus() + errorCode=U_MEMORY_ALLOCATION_ERROR; + return FALSE; + } + limit=start+length; + remainingCapacity=str.getCapacity()-length; + reorderStart=start; + if(start==limit) { + lastCC=0; + } else { + setIterator(); + lastCC=previousCC(); + // Set reorderStart after the last code point with cc<=1 if there is one. + if(lastCC>1) { + while(previousCC()>1) {} + } + reorderStart=codePointLimit; + } + return TRUE; +} + +UBool ReorderingBuffer::equals(const UChar *otherStart, const UChar *otherLimit) const { + int32_t length=(int32_t)(limit-start); + return + length==(int32_t)(otherLimit-otherStart) && + 0==u_memcmp(start, otherStart, length); +} + +UBool ReorderingBuffer::equals(const uint8_t *otherStart, const uint8_t *otherLimit) const { + U_ASSERT((otherLimit - otherStart) <= INT32_MAX); // ensured by caller + int32_t length = (int32_t)(limit - start); + int32_t otherLength = (int32_t)(otherLimit - otherStart); + // For equal strings, UTF-8 is at least as long as UTF-16, and at most three times as long. + if (otherLength < length || (otherLength / 3) > length) { + return FALSE; + } + // Compare valid strings from between normalization boundaries. + // (Invalid sequences are normalization-inert.) + for (int32_t i = 0, j = 0;;) { + if (i >= length) { + return j >= otherLength; + } else if (j >= otherLength) { + return FALSE; + } + // Not at the end of either string yet. + UChar32 c, other; + U16_NEXT_UNSAFE(start, i, c); + U8_NEXT_UNSAFE(otherStart, j, other); + if (c != other) { + return FALSE; + } + } +} + +UBool ReorderingBuffer::appendSupplementary(UChar32 c, uint8_t cc, UErrorCode &errorCode) { + if(remainingCapacity<2 && !resize(2, errorCode)) { + return FALSE; + } + if(lastCC<=cc || cc==0) { + limit[0]=U16_LEAD(c); + limit[1]=U16_TRAIL(c); + limit+=2; + lastCC=cc; + if(cc<=1) { + reorderStart=limit; + } + } else { + insert(c, cc); + } + remainingCapacity-=2; + return TRUE; +} + +UBool ReorderingBuffer::append(const UChar *s, int32_t length, UBool isNFD, + uint8_t leadCC, uint8_t trailCC, + UErrorCode &errorCode) { + if(length==0) { + return TRUE; + } + if(remainingCapacity<length && !resize(length, errorCode)) { + return FALSE; + } + remainingCapacity-=length; + if(lastCC<=leadCC || leadCC==0) { + if(trailCC<=1) { + reorderStart=limit+length; + } else if(leadCC<=1) { + reorderStart=limit+1; // Ok if not a code point boundary. + } + const UChar *sLimit=s+length; + do { *limit++=*s++; } while(s!=sLimit); + lastCC=trailCC; + } else { + int32_t i=0; + UChar32 c; + U16_NEXT(s, i, length, c); + insert(c, leadCC); // insert first code point + while(i<length) { + U16_NEXT(s, i, length, c); + if(i<length) { + if (isNFD) { + leadCC = Normalizer2Impl::getCCFromYesOrMaybe(impl.getRawNorm16(c)); + } else { + leadCC = impl.getCC(impl.getNorm16(c)); + } + } else { + leadCC=trailCC; + } + append(c, leadCC, errorCode); + } + } + return TRUE; +} + +UBool ReorderingBuffer::appendZeroCC(UChar32 c, UErrorCode &errorCode) { + int32_t cpLength=U16_LENGTH(c); + if(remainingCapacity<cpLength && !resize(cpLength, errorCode)) { + return FALSE; + } + remainingCapacity-=cpLength; + if(cpLength==1) { + *limit++=(UChar)c; + } else { + limit[0]=U16_LEAD(c); + limit[1]=U16_TRAIL(c); + limit+=2; + } + lastCC=0; + reorderStart=limit; + return TRUE; +} + +UBool ReorderingBuffer::appendZeroCC(const UChar *s, const UChar *sLimit, UErrorCode &errorCode) { + if(s==sLimit) { + return TRUE; + } + int32_t length=(int32_t)(sLimit-s); + if(remainingCapacity<length && !resize(length, errorCode)) { + return FALSE; + } + u_memcpy(limit, s, length); + limit+=length; + remainingCapacity-=length; + lastCC=0; + reorderStart=limit; + return TRUE; +} + +void ReorderingBuffer::remove() { + reorderStart=limit=start; + remainingCapacity=str.getCapacity(); + lastCC=0; +} + +void ReorderingBuffer::removeSuffix(int32_t suffixLength) { + if(suffixLength<(limit-start)) { + limit-=suffixLength; + remainingCapacity+=suffixLength; + } else { + limit=start; + remainingCapacity=str.getCapacity(); + } + lastCC=0; + reorderStart=limit; +} + +UBool ReorderingBuffer::resize(int32_t appendLength, UErrorCode &errorCode) { + int32_t reorderStartIndex=(int32_t)(reorderStart-start); + int32_t length=(int32_t)(limit-start); + str.releaseBuffer(length); + int32_t newCapacity=length+appendLength; + int32_t doubleCapacity=2*str.getCapacity(); + if(newCapacity<doubleCapacity) { + newCapacity=doubleCapacity; + } + if(newCapacity<256) { + newCapacity=256; + } + start=str.getBuffer(newCapacity); + if(start==NULL) { + // getBuffer() already did str.setToBogus() + errorCode=U_MEMORY_ALLOCATION_ERROR; + return FALSE; + } + reorderStart=start+reorderStartIndex; + limit=start+length; + remainingCapacity=str.getCapacity()-length; + return TRUE; +} + +void ReorderingBuffer::skipPrevious() { + codePointLimit=codePointStart; + UChar c=*--codePointStart; + if(U16_IS_TRAIL(c) && start<codePointStart && U16_IS_LEAD(*(codePointStart-1))) { + --codePointStart; + } +} + +uint8_t ReorderingBuffer::previousCC() { + codePointLimit=codePointStart; + if(reorderStart>=codePointStart) { + return 0; + } + UChar32 c=*--codePointStart; + UChar c2; + if(U16_IS_TRAIL(c) && start<codePointStart && U16_IS_LEAD(c2=*(codePointStart-1))) { + --codePointStart; + c=U16_GET_SUPPLEMENTARY(c2, c); + } + return impl.getCCFromYesOrMaybeCP(c); +} + +// Inserts c somewhere before the last character. +// Requires 0<cc<lastCC which implies reorderStart<limit. +void ReorderingBuffer::insert(UChar32 c, uint8_t cc) { + for(setIterator(), skipPrevious(); previousCC()>cc;) {} + // insert c at codePointLimit, after the character with prevCC<=cc + UChar *q=limit; + UChar *r=limit+=U16_LENGTH(c); + do { + *--r=*--q; + } while(codePointLimit!=q); + writeCodePoint(q, c); + if(cc<=1) { + reorderStart=r; + } +} + +// Normalizer2Impl --------------------------------------------------------- *** + +struct CanonIterData : public UMemory { + CanonIterData(UErrorCode &errorCode); + ~CanonIterData(); + void addToStartSet(UChar32 origin, UChar32 decompLead, UErrorCode &errorCode); + UMutableCPTrie *mutableTrie; + UCPTrie *trie; + UVector canonStartSets; // contains UnicodeSet * +}; + +Normalizer2Impl::~Normalizer2Impl() { + delete fCanonIterData; +} + +void +Normalizer2Impl::init(const int32_t *inIndexes, const UCPTrie *inTrie, + const uint16_t *inExtraData, const uint8_t *inSmallFCD) { + minDecompNoCP = static_cast<UChar>(inIndexes[IX_MIN_DECOMP_NO_CP]); + minCompNoMaybeCP = static_cast<UChar>(inIndexes[IX_MIN_COMP_NO_MAYBE_CP]); + minLcccCP = static_cast<UChar>(inIndexes[IX_MIN_LCCC_CP]); + + minYesNo = static_cast<uint16_t>(inIndexes[IX_MIN_YES_NO]); + minYesNoMappingsOnly = static_cast<uint16_t>(inIndexes[IX_MIN_YES_NO_MAPPINGS_ONLY]); + minNoNo = static_cast<uint16_t>(inIndexes[IX_MIN_NO_NO]); + minNoNoCompBoundaryBefore = static_cast<uint16_t>(inIndexes[IX_MIN_NO_NO_COMP_BOUNDARY_BEFORE]); + minNoNoCompNoMaybeCC = static_cast<uint16_t>(inIndexes[IX_MIN_NO_NO_COMP_NO_MAYBE_CC]); + minNoNoEmpty = static_cast<uint16_t>(inIndexes[IX_MIN_NO_NO_EMPTY]); + limitNoNo = static_cast<uint16_t>(inIndexes[IX_LIMIT_NO_NO]); + minMaybeYes = static_cast<uint16_t>(inIndexes[IX_MIN_MAYBE_YES]); + U_ASSERT((minMaybeYes & 7) == 0); // 8-aligned for noNoDelta bit fields + centerNoNoDelta = (minMaybeYes >> DELTA_SHIFT) - MAX_DELTA - 1; + + normTrie=inTrie; + + maybeYesCompositions=inExtraData; + extraData=maybeYesCompositions+((MIN_NORMAL_MAYBE_YES-minMaybeYes)>>OFFSET_SHIFT); + + smallFCD=inSmallFCD; +} + +U_CDECL_BEGIN + +static uint32_t U_CALLCONV +segmentStarterMapper(const void * /*context*/, uint32_t value) { + return value&CANON_NOT_SEGMENT_STARTER; +} + +U_CDECL_END + +void +Normalizer2Impl::addLcccChars(UnicodeSet &set) const { + UChar32 start = 0, end; + uint32_t norm16; + while ((end = ucptrie_getRange(normTrie, start, UCPMAP_RANGE_FIXED_LEAD_SURROGATES, INERT, + nullptr, nullptr, &norm16)) >= 0) { + if (norm16 > Normalizer2Impl::MIN_NORMAL_MAYBE_YES && + norm16 != Normalizer2Impl::JAMO_VT) { + set.add(start, end); + } else if (minNoNoCompNoMaybeCC <= norm16 && norm16 < limitNoNo) { + uint16_t fcd16 = getFCD16(start); + if (fcd16 > 0xff) { set.add(start, end); } + } + start = end + 1; + } +} + +void +Normalizer2Impl::addPropertyStarts(const USetAdder *sa, UErrorCode & /*errorCode*/) const { + // Add the start code point of each same-value range of the trie. + UChar32 start = 0, end; + uint32_t value; + while ((end = ucptrie_getRange(normTrie, start, UCPMAP_RANGE_FIXED_LEAD_SURROGATES, INERT, + nullptr, nullptr, &value)) >= 0) { + sa->add(sa->set, start); + if (start != end && isAlgorithmicNoNo((uint16_t)value) && + (value & Normalizer2Impl::DELTA_TCCC_MASK) > Normalizer2Impl::DELTA_TCCC_1) { + // Range of code points with same-norm16-value algorithmic decompositions. + // They might have different non-zero FCD16 values. + uint16_t prevFCD16 = getFCD16(start); + while (++start <= end) { + uint16_t fcd16 = getFCD16(start); + if (fcd16 != prevFCD16) { + sa->add(sa->set, start); + prevFCD16 = fcd16; + } + } + } + start = end + 1; + } + + /* add Hangul LV syllables and LV+1 because of skippables */ + for(UChar c=Hangul::HANGUL_BASE; c<Hangul::HANGUL_LIMIT; c+=Hangul::JAMO_T_COUNT) { + sa->add(sa->set, c); + sa->add(sa->set, c+1); + } + sa->add(sa->set, Hangul::HANGUL_LIMIT); /* add Hangul+1 to continue with other properties */ +} + +void +Normalizer2Impl::addCanonIterPropertyStarts(const USetAdder *sa, UErrorCode &errorCode) const { + // Add the start code point of each same-value range of the canonical iterator data trie. + if (!ensureCanonIterData(errorCode)) { return; } + // Currently only used for the SEGMENT_STARTER property. + UChar32 start = 0, end; + uint32_t value; + while ((end = ucptrie_getRange(fCanonIterData->trie, start, UCPMAP_RANGE_NORMAL, 0, + segmentStarterMapper, nullptr, &value)) >= 0) { + sa->add(sa->set, start); + start = end + 1; + } +} + +const UChar * +Normalizer2Impl::copyLowPrefixFromNulTerminated(const UChar *src, + UChar32 minNeedDataCP, + ReorderingBuffer *buffer, + UErrorCode &errorCode) const { + // Make some effort to support NUL-terminated strings reasonably. + // Take the part of the fast quick check loop that does not look up + // data and check the first part of the string. + // After this prefix, determine the string length to simplify the rest + // of the code. + const UChar *prevSrc=src; + UChar c; + while((c=*src++)<minNeedDataCP && c!=0) {} + // Back out the last character for full processing. + // Copy this prefix. + if(--src!=prevSrc) { + if(buffer!=NULL) { + buffer->appendZeroCC(prevSrc, src, errorCode); + } + } + return src; +} + +UnicodeString & +Normalizer2Impl::decompose(const UnicodeString &src, UnicodeString &dest, + UErrorCode &errorCode) const { + if(U_FAILURE(errorCode)) { + dest.setToBogus(); + return dest; + } + const UChar *sArray=src.getBuffer(); + if(&dest==&src || sArray==NULL) { + errorCode=U_ILLEGAL_ARGUMENT_ERROR; + dest.setToBogus(); + return dest; + } + decompose(sArray, sArray+src.length(), dest, src.length(), errorCode); + return dest; +} + +void +Normalizer2Impl::decompose(const UChar *src, const UChar *limit, + UnicodeString &dest, + int32_t destLengthEstimate, + UErrorCode &errorCode) const { + if(destLengthEstimate<0 && limit!=NULL) { + destLengthEstimate=(int32_t)(limit-src); + } + dest.remove(); + ReorderingBuffer buffer(*this, dest); + if(buffer.init(destLengthEstimate, errorCode)) { + decompose(src, limit, &buffer, errorCode); + } +} + +// Dual functionality: +// buffer!=NULL: normalize +// buffer==NULL: isNormalized/spanQuickCheckYes +const UChar * +Normalizer2Impl::decompose(const UChar *src, const UChar *limit, + ReorderingBuffer *buffer, + UErrorCode &errorCode) const { + UChar32 minNoCP=minDecompNoCP; + if(limit==NULL) { + src=copyLowPrefixFromNulTerminated(src, minNoCP, buffer, errorCode); + if(U_FAILURE(errorCode)) { + return src; + } + limit=u_strchr(src, 0); + } + + const UChar *prevSrc; + UChar32 c=0; + uint16_t norm16=0; + + // only for quick check + const UChar *prevBoundary=src; + uint8_t prevCC=0; + + for(;;) { + // count code units below the minimum or with irrelevant data for the quick check + for(prevSrc=src; src!=limit;) { + if( (c=*src)<minNoCP || + isMostDecompYesAndZeroCC(norm16=UCPTRIE_FAST_BMP_GET(normTrie, UCPTRIE_16, c)) + ) { + ++src; + } else if(!U16_IS_LEAD(c)) { + break; + } else { + UChar c2; + if((src+1)!=limit && U16_IS_TRAIL(c2=src[1])) { + c=U16_GET_SUPPLEMENTARY(c, c2); + norm16=UCPTRIE_FAST_SUPP_GET(normTrie, UCPTRIE_16, c); + if(isMostDecompYesAndZeroCC(norm16)) { + src+=2; + } else { + break; + } + } else { + ++src; // unpaired lead surrogate: inert + } + } + } + // copy these code units all at once + if(src!=prevSrc) { + if(buffer!=NULL) { + if(!buffer->appendZeroCC(prevSrc, src, errorCode)) { + break; + } + } else { + prevCC=0; + prevBoundary=src; + } + } + if(src==limit) { + break; + } + + // Check one above-minimum, relevant code point. + src+=U16_LENGTH(c); + if(buffer!=NULL) { + if(!decompose(c, norm16, *buffer, errorCode)) { + break; + } + } else { + if(isDecompYes(norm16)) { + uint8_t cc=getCCFromYesOrMaybe(norm16); + if(prevCC<=cc || cc==0) { + prevCC=cc; + if(cc<=1) { + prevBoundary=src; + } + continue; + } + } + return prevBoundary; // "no" or cc out of order + } + } + return src; +} + +// Decompose a short piece of text which is likely to contain characters that +// fail the quick check loop and/or where the quick check loop's overhead +// is unlikely to be amortized. +// Called by the compose() and makeFCD() implementations. +const UChar * +Normalizer2Impl::decomposeShort(const UChar *src, const UChar *limit, + UBool stopAtCompBoundary, UBool onlyContiguous, + ReorderingBuffer &buffer, UErrorCode &errorCode) const { + if (U_FAILURE(errorCode)) { + return nullptr; + } + while(src<limit) { + if (stopAtCompBoundary && *src < minCompNoMaybeCP) { + return src; + } + const UChar *prevSrc = src; + UChar32 c; + uint16_t norm16; + UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, src, limit, c, norm16); + if (stopAtCompBoundary && norm16HasCompBoundaryBefore(norm16)) { + return prevSrc; + } + if(!decompose(c, norm16, buffer, errorCode)) { + return nullptr; + } + if (stopAtCompBoundary && norm16HasCompBoundaryAfter(norm16, onlyContiguous)) { + return src; + } + } + return src; +} + +UBool Normalizer2Impl::decompose(UChar32 c, uint16_t norm16, + ReorderingBuffer &buffer, + UErrorCode &errorCode) const { + // get the decomposition and the lead and trail cc's + if (norm16 >= limitNoNo) { + if (isMaybeOrNonZeroCC(norm16)) { + return buffer.append(c, getCCFromYesOrMaybe(norm16), errorCode); + } + // Maps to an isCompYesAndZeroCC. + c=mapAlgorithmic(c, norm16); + norm16=getRawNorm16(c); + } + if (norm16 < minYesNo) { + // c does not decompose + return buffer.append(c, 0, errorCode); + } else if(isHangulLV(norm16) || isHangulLVT(norm16)) { + // Hangul syllable: decompose algorithmically + UChar jamos[3]; + return buffer.appendZeroCC(jamos, jamos+Hangul::decompose(c, jamos), errorCode); + } + // c decomposes, get everything from the variable-length extra data + const uint16_t *mapping=getMapping(norm16); + uint16_t firstUnit=*mapping; + int32_t length=firstUnit&MAPPING_LENGTH_MASK; + uint8_t leadCC, trailCC; + trailCC=(uint8_t)(firstUnit>>8); + if(firstUnit&MAPPING_HAS_CCC_LCCC_WORD) { + leadCC=(uint8_t)(*(mapping-1)>>8); + } else { + leadCC=0; + } + return buffer.append((const UChar *)mapping+1, length, TRUE, leadCC, trailCC, errorCode); +} + +const uint8_t * +Normalizer2Impl::decomposeShort(const uint8_t *src, const uint8_t *limit, + UBool stopAtCompBoundary, UBool onlyContiguous, + ReorderingBuffer &buffer, UErrorCode &errorCode) const { + if (U_FAILURE(errorCode)) { + return nullptr; + } + while (src < limit) { + const uint8_t *prevSrc = src; + uint16_t norm16; + UCPTRIE_FAST_U8_NEXT(normTrie, UCPTRIE_16, src, limit, norm16); + // Get the decomposition and the lead and trail cc's. + UChar32 c = U_SENTINEL; + if (norm16 >= limitNoNo) { + if (isMaybeOrNonZeroCC(norm16)) { + // No boundaries around this character. + c = codePointFromValidUTF8(prevSrc, src); + if (!buffer.append(c, getCCFromYesOrMaybe(norm16), errorCode)) { + return nullptr; + } + continue; + } + // Maps to an isCompYesAndZeroCC. + if (stopAtCompBoundary) { + return prevSrc; + } + c = codePointFromValidUTF8(prevSrc, src); + c = mapAlgorithmic(c, norm16); + norm16 = getRawNorm16(c); + } else if (stopAtCompBoundary && norm16 < minNoNoCompNoMaybeCC) { + return prevSrc; + } + // norm16!=INERT guarantees that [prevSrc, src[ is valid UTF-8. + // We do not see invalid UTF-8 here because + // its norm16==INERT is normalization-inert, + // so it gets copied unchanged in the fast path, + // and we stop the slow path where invalid UTF-8 begins. + U_ASSERT(norm16 != INERT); + if (norm16 < minYesNo) { + if (c < 0) { + c = codePointFromValidUTF8(prevSrc, src); + } + // does not decompose + if (!buffer.append(c, 0, errorCode)) { + return nullptr; + } + } else if (isHangulLV(norm16) || isHangulLVT(norm16)) { + // Hangul syllable: decompose algorithmically + if (c < 0) { + c = codePointFromValidUTF8(prevSrc, src); + } + char16_t jamos[3]; + if (!buffer.appendZeroCC(jamos, jamos+Hangul::decompose(c, jamos), errorCode)) { + return nullptr; + } + } else { + // The character decomposes, get everything from the variable-length extra data. + const uint16_t *mapping = getMapping(norm16); + uint16_t firstUnit = *mapping; + int32_t length = firstUnit & MAPPING_LENGTH_MASK; + uint8_t trailCC = (uint8_t)(firstUnit >> 8); + uint8_t leadCC; + if (firstUnit & MAPPING_HAS_CCC_LCCC_WORD) { + leadCC = (uint8_t)(*(mapping-1) >> 8); + } else { + leadCC = 0; + } + if (!buffer.append((const char16_t *)mapping+1, length, TRUE, leadCC, trailCC, errorCode)) { + return nullptr; + } + } + if (stopAtCompBoundary && norm16HasCompBoundaryAfter(norm16, onlyContiguous)) { + return src; + } + } + return src; +} + +const UChar * +Normalizer2Impl::getDecomposition(UChar32 c, UChar buffer[4], int32_t &length) const { + uint16_t norm16; + if(c<minDecompNoCP || isMaybeOrNonZeroCC(norm16=getNorm16(c))) { + // c does not decompose + return nullptr; + } + const UChar *decomp = nullptr; + if(isDecompNoAlgorithmic(norm16)) { + // Maps to an isCompYesAndZeroCC. + c=mapAlgorithmic(c, norm16); + decomp=buffer; + length=0; + U16_APPEND_UNSAFE(buffer, length, c); + // The mapping might decompose further. + norm16 = getRawNorm16(c); + } + if (norm16 < minYesNo) { + return decomp; + } else if(isHangulLV(norm16) || isHangulLVT(norm16)) { + // Hangul syllable: decompose algorithmically + length=Hangul::decompose(c, buffer); + return buffer; + } + // c decomposes, get everything from the variable-length extra data + const uint16_t *mapping=getMapping(norm16); + length=*mapping&MAPPING_LENGTH_MASK; + return (const UChar *)mapping+1; +} + +// The capacity of the buffer must be 30=MAPPING_LENGTH_MASK-1 +// so that a raw mapping fits that consists of one unit ("rm0") +// plus all but the first two code units of the normal mapping. +// The maximum length of a normal mapping is 31=MAPPING_LENGTH_MASK. +const UChar * +Normalizer2Impl::getRawDecomposition(UChar32 c, UChar buffer[30], int32_t &length) const { + uint16_t norm16; + if(c<minDecompNoCP || isDecompYes(norm16=getNorm16(c))) { + // c does not decompose + return NULL; + } else if(isHangulLV(norm16) || isHangulLVT(norm16)) { + // Hangul syllable: decompose algorithmically + Hangul::getRawDecomposition(c, buffer); + length=2; + return buffer; + } else if(isDecompNoAlgorithmic(norm16)) { + c=mapAlgorithmic(c, norm16); + length=0; + U16_APPEND_UNSAFE(buffer, length, c); + return buffer; + } + // c decomposes, get everything from the variable-length extra data + const uint16_t *mapping=getMapping(norm16); + uint16_t firstUnit=*mapping; + int32_t mLength=firstUnit&MAPPING_LENGTH_MASK; // length of normal mapping + if(firstUnit&MAPPING_HAS_RAW_MAPPING) { + // Read the raw mapping from before the firstUnit and before the optional ccc/lccc word. + // Bit 7=MAPPING_HAS_CCC_LCCC_WORD + const uint16_t *rawMapping=mapping-((firstUnit>>7)&1)-1; + uint16_t rm0=*rawMapping; + if(rm0<=MAPPING_LENGTH_MASK) { + length=rm0; + return (const UChar *)rawMapping-rm0; + } else { + // Copy the normal mapping and replace its first two code units with rm0. + buffer[0]=(UChar)rm0; + u_memcpy(buffer+1, (const UChar *)mapping+1+2, mLength-2); + length=mLength-1; + return buffer; + } + } else { + length=mLength; + return (const UChar *)mapping+1; + } +} + +void Normalizer2Impl::decomposeAndAppend(const UChar *src, const UChar *limit, + UBool doDecompose, + UnicodeString &safeMiddle, + ReorderingBuffer &buffer, + UErrorCode &errorCode) const { + buffer.copyReorderableSuffixTo(safeMiddle); + if(doDecompose) { + decompose(src, limit, &buffer, errorCode); + return; + } + // Just merge the strings at the boundary. + bool isFirst = true; + uint8_t firstCC = 0, prevCC = 0, cc; + const UChar *p = src; + while (p != limit) { + const UChar *codePointStart = p; + UChar32 c; + uint16_t norm16; + UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, p, limit, c, norm16); + if ((cc = getCC(norm16)) == 0) { + p = codePointStart; + break; + } + if (isFirst) { + firstCC = cc; + isFirst = false; + } + prevCC = cc; + } + if(limit==NULL) { // appendZeroCC() needs limit!=NULL + limit=u_strchr(p, 0); + } + + if (buffer.append(src, (int32_t)(p - src), FALSE, firstCC, prevCC, errorCode)) { + buffer.appendZeroCC(p, limit, errorCode); + } +} + +UBool Normalizer2Impl::hasDecompBoundaryBefore(UChar32 c) const { + return c < minLcccCP || (c <= 0xffff && !singleLeadMightHaveNonZeroFCD16(c)) || + norm16HasDecompBoundaryBefore(getNorm16(c)); +} + +UBool Normalizer2Impl::norm16HasDecompBoundaryBefore(uint16_t norm16) const { + if (norm16 < minNoNoCompNoMaybeCC) { + return TRUE; + } + if (norm16 >= limitNoNo) { + return norm16 <= MIN_NORMAL_MAYBE_YES || norm16 == JAMO_VT; + } + // c decomposes, get everything from the variable-length extra data + const uint16_t *mapping=getMapping(norm16); + uint16_t firstUnit=*mapping; + // TRUE if leadCC==0 (hasFCDBoundaryBefore()) + return (firstUnit&MAPPING_HAS_CCC_LCCC_WORD)==0 || (*(mapping-1)&0xff00)==0; +} + +UBool Normalizer2Impl::hasDecompBoundaryAfter(UChar32 c) const { + if (c < minDecompNoCP) { + return TRUE; + } + if (c <= 0xffff && !singleLeadMightHaveNonZeroFCD16(c)) { + return TRUE; + } + return norm16HasDecompBoundaryAfter(getNorm16(c)); +} + +UBool Normalizer2Impl::norm16HasDecompBoundaryAfter(uint16_t norm16) const { + if(norm16 <= minYesNo || isHangulLVT(norm16)) { + return TRUE; + } + if (norm16 >= limitNoNo) { + if (isMaybeOrNonZeroCC(norm16)) { + return norm16 <= MIN_NORMAL_MAYBE_YES || norm16 == JAMO_VT; + } + // Maps to an isCompYesAndZeroCC. + return (norm16 & DELTA_TCCC_MASK) <= DELTA_TCCC_1; + } + // c decomposes, get everything from the variable-length extra data + const uint16_t *mapping=getMapping(norm16); + uint16_t firstUnit=*mapping; + // decomp after-boundary: same as hasFCDBoundaryAfter(), + // fcd16<=1 || trailCC==0 + if(firstUnit>0x1ff) { + return FALSE; // trailCC>1 + } + if(firstUnit<=0xff) { + return TRUE; // trailCC==0 + } + // if(trailCC==1) test leadCC==0, same as checking for before-boundary + // TRUE if leadCC==0 (hasFCDBoundaryBefore()) + return (firstUnit&MAPPING_HAS_CCC_LCCC_WORD)==0 || (*(mapping-1)&0xff00)==0; +} + +/* + * Finds the recomposition result for + * a forward-combining "lead" character, + * specified with a pointer to its compositions list, + * and a backward-combining "trail" character. + * + * If the lead and trail characters combine, then this function returns + * the following "compositeAndFwd" value: + * Bits 21..1 composite character + * Bit 0 set if the composite is a forward-combining starter + * otherwise it returns -1. + * + * The compositions list has (trail, compositeAndFwd) pair entries, + * encoded as either pairs or triples of 16-bit units. + * The last entry has the high bit of its first unit set. + * + * The list is sorted by ascending trail characters (there are no duplicates). + * A linear search is used. + * + * See normalizer2impl.h for a more detailed description + * of the compositions list format. + */ +int32_t Normalizer2Impl::combine(const uint16_t *list, UChar32 trail) { + uint16_t key1, firstUnit; + if(trail<COMP_1_TRAIL_LIMIT) { + // trail character is 0..33FF + // result entry may have 2 or 3 units + key1=(uint16_t)(trail<<1); + while(key1>(firstUnit=*list)) { + list+=2+(firstUnit&COMP_1_TRIPLE); + } + if(key1==(firstUnit&COMP_1_TRAIL_MASK)) { + if(firstUnit&COMP_1_TRIPLE) { + return ((int32_t)list[1]<<16)|list[2]; + } else { + return list[1]; + } + } + } else { + // trail character is 3400..10FFFF + // result entry has 3 units + key1=(uint16_t)(COMP_1_TRAIL_LIMIT+ + (((trail>>COMP_1_TRAIL_SHIFT))& + ~COMP_1_TRIPLE)); + uint16_t key2=(uint16_t)(trail<<COMP_2_TRAIL_SHIFT); + uint16_t secondUnit; + for(;;) { + if(key1>(firstUnit=*list)) { + list+=2+(firstUnit&COMP_1_TRIPLE); + } else if(key1==(firstUnit&COMP_1_TRAIL_MASK)) { + if(key2>(secondUnit=list[1])) { + if(firstUnit&COMP_1_LAST_TUPLE) { + break; + } else { + list+=3; + } + } else if(key2==(secondUnit&COMP_2_TRAIL_MASK)) { + return ((int32_t)(secondUnit&~COMP_2_TRAIL_MASK)<<16)|list[2]; + } else { + break; + } + } else { + break; + } + } + } + return -1; +} + +/** + * @param list some character's compositions list + * @param set recursively receives the composites from these compositions + */ +void Normalizer2Impl::addComposites(const uint16_t *list, UnicodeSet &set) const { + uint16_t firstUnit; + int32_t compositeAndFwd; + do { + firstUnit=*list; + if((firstUnit&COMP_1_TRIPLE)==0) { + compositeAndFwd=list[1]; + list+=2; + } else { + compositeAndFwd=(((int32_t)list[1]&~COMP_2_TRAIL_MASK)<<16)|list[2]; + list+=3; + } + UChar32 composite=compositeAndFwd>>1; + if((compositeAndFwd&1)!=0) { + addComposites(getCompositionsListForComposite(getRawNorm16(composite)), set); + } + set.add(composite); + } while((firstUnit&COMP_1_LAST_TUPLE)==0); +} + +/* + * Recomposes the buffer text starting at recomposeStartIndex + * (which is in NFD - decomposed and canonically ordered), + * and truncates the buffer contents. + * + * Note that recomposition never lengthens the text: + * Any character consists of either one or two code units; + * a composition may contain at most one more code unit than the original starter, + * while the combining mark that is removed has at least one code unit. + */ +void Normalizer2Impl::recompose(ReorderingBuffer &buffer, int32_t recomposeStartIndex, + UBool onlyContiguous) const { + UChar *p=buffer.getStart()+recomposeStartIndex; + UChar *limit=buffer.getLimit(); + if(p==limit) { + return; + } + + UChar *starter, *pRemove, *q, *r; + const uint16_t *compositionsList; + UChar32 c, compositeAndFwd; + uint16_t norm16; + uint8_t cc, prevCC; + UBool starterIsSupplementary; + + // Some of the following variables are not used until we have a forward-combining starter + // and are only initialized now to avoid compiler warnings. + compositionsList=NULL; // used as indicator for whether we have a forward-combining starter + starter=NULL; + starterIsSupplementary=FALSE; + prevCC=0; + + for(;;) { + UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, p, limit, c, norm16); + cc=getCCFromYesOrMaybe(norm16); + if( // this character combines backward and + isMaybe(norm16) && + // we have seen a starter that combines forward and + compositionsList!=NULL && + // the backward-combining character is not blocked + (prevCC<cc || prevCC==0) + ) { + if(isJamoVT(norm16)) { + // c is a Jamo V/T, see if we can compose it with the previous character. + if(c<Hangul::JAMO_T_BASE) { + // c is a Jamo Vowel, compose with previous Jamo L and following Jamo T. + UChar prev=(UChar)(*starter-Hangul::JAMO_L_BASE); + if(prev<Hangul::JAMO_L_COUNT) { + pRemove=p-1; + UChar syllable=(UChar) + (Hangul::HANGUL_BASE+ + (prev*Hangul::JAMO_V_COUNT+(c-Hangul::JAMO_V_BASE))* + Hangul::JAMO_T_COUNT); + UChar t; + if(p!=limit && (t=(UChar)(*p-Hangul::JAMO_T_BASE))<Hangul::JAMO_T_COUNT) { + ++p; + syllable+=t; // The next character was a Jamo T. + } + *starter=syllable; + // remove the Jamo V/T + q=pRemove; + r=p; + while(r<limit) { + *q++=*r++; + } + limit=q; + p=pRemove; + } + } + /* + * No "else" for Jamo T: + * Since the input is in NFD, there are no Hangul LV syllables that + * a Jamo T could combine with. + * All Jamo Ts are combined above when handling Jamo Vs. + */ + if(p==limit) { + break; + } + compositionsList=NULL; + continue; + } else if((compositeAndFwd=combine(compositionsList, c))>=0) { + // The starter and the combining mark (c) do combine. + UChar32 composite=compositeAndFwd>>1; + + // Replace the starter with the composite, remove the combining mark. + pRemove=p-U16_LENGTH(c); // pRemove & p: start & limit of the combining mark + if(starterIsSupplementary) { + if(U_IS_SUPPLEMENTARY(composite)) { + // both are supplementary + starter[0]=U16_LEAD(composite); + starter[1]=U16_TRAIL(composite); + } else { + *starter=(UChar)composite; + // The composite is shorter than the starter, + // move the intermediate characters forward one. + starterIsSupplementary=FALSE; + q=starter+1; + r=q+1; + while(r<pRemove) { + *q++=*r++; + } + --pRemove; + } + } else if(U_IS_SUPPLEMENTARY(composite)) { + // The composite is longer than the starter, + // move the intermediate characters back one. + starterIsSupplementary=TRUE; + ++starter; // temporarily increment for the loop boundary + q=pRemove; + r=++pRemove; + while(starter<q) { + *--r=*--q; + } + *starter=U16_TRAIL(composite); + *--starter=U16_LEAD(composite); // undo the temporary increment + } else { + // both are on the BMP + *starter=(UChar)composite; + } + + /* remove the combining mark by moving the following text over it */ + if(pRemove<p) { + q=pRemove; + r=p; + while(r<limit) { + *q++=*r++; + } + limit=q; + p=pRemove; + } + // Keep prevCC because we removed the combining mark. + + if(p==limit) { + break; + } + // Is the composite a starter that combines forward? + if(compositeAndFwd&1) { + compositionsList= + getCompositionsListForComposite(getRawNorm16(composite)); + } else { + compositionsList=NULL; + } + + // We combined; continue with looking for compositions. + continue; + } + } + + // no combination this time + prevCC=cc; + if(p==limit) { + break; + } + + // If c did not combine, then check if it is a starter. + if(cc==0) { + // Found a new starter. + if((compositionsList=getCompositionsListForDecompYes(norm16))!=NULL) { + // It may combine with something, prepare for it. + if(U_IS_BMP(c)) { + starterIsSupplementary=FALSE; + starter=p-1; + } else { + starterIsSupplementary=TRUE; + starter=p-2; + } + } + } else if(onlyContiguous) { + // FCC: no discontiguous compositions; any intervening character blocks. + compositionsList=NULL; + } + } + buffer.setReorderingLimit(limit); +} + +UChar32 +Normalizer2Impl::composePair(UChar32 a, UChar32 b) const { + uint16_t norm16=getNorm16(a); // maps an out-of-range 'a' to inert norm16 + const uint16_t *list; + if(isInert(norm16)) { + return U_SENTINEL; + } else if(norm16<minYesNoMappingsOnly) { + // a combines forward. + if(isJamoL(norm16)) { + b-=Hangul::JAMO_V_BASE; + if(0<=b && b<Hangul::JAMO_V_COUNT) { + return + (Hangul::HANGUL_BASE+ + ((a-Hangul::JAMO_L_BASE)*Hangul::JAMO_V_COUNT+b)* + Hangul::JAMO_T_COUNT); + } else { + return U_SENTINEL; + } + } else if(isHangulLV(norm16)) { + b-=Hangul::JAMO_T_BASE; + if(0<b && b<Hangul::JAMO_T_COUNT) { // not b==0! + return a+b; + } else { + return U_SENTINEL; + } + } else { + // 'a' has a compositions list in extraData + list=getMapping(norm16); + if(norm16>minYesNo) { // composite 'a' has both mapping & compositions list + list+= // mapping pointer + 1+ // +1 to skip the first unit with the mapping length + (*list&MAPPING_LENGTH_MASK); // + mapping length + } + } + } else if(norm16<minMaybeYes || MIN_NORMAL_MAYBE_YES<=norm16) { + return U_SENTINEL; + } else { + list=getCompositionsListForMaybe(norm16); + } + if(b<0 || 0x10ffff<b) { // combine(list, b) requires a valid code point b + return U_SENTINEL; + } +#if U_SIGNED_RIGHT_SHIFT_IS_ARITHMETIC + return combine(list, b)>>1; +#else + int32_t compositeAndFwd=combine(list, b); + return compositeAndFwd>=0 ? compositeAndFwd>>1 : U_SENTINEL; +#endif +} + +// Very similar to composeQuickCheck(): Make the same changes in both places if relevant. +// doCompose: normalize +// !doCompose: isNormalized (buffer must be empty and initialized) +UBool +Normalizer2Impl::compose(const UChar *src, const UChar *limit, + UBool onlyContiguous, + UBool doCompose, + ReorderingBuffer &buffer, + UErrorCode &errorCode) const { + const UChar *prevBoundary=src; + UChar32 minNoMaybeCP=minCompNoMaybeCP; + if(limit==NULL) { + src=copyLowPrefixFromNulTerminated(src, minNoMaybeCP, + doCompose ? &buffer : NULL, + errorCode); + if(U_FAILURE(errorCode)) { + return FALSE; + } + limit=u_strchr(src, 0); + if (prevBoundary != src) { + if (hasCompBoundaryAfter(*(src-1), onlyContiguous)) { + prevBoundary = src; + } else { + buffer.removeSuffix(1); + prevBoundary = --src; + } + } + } + + for (;;) { + // Fast path: Scan over a sequence of characters below the minimum "no or maybe" code point, + // or with (compYes && ccc==0) properties. + const UChar *prevSrc; + UChar32 c = 0; + uint16_t norm16 = 0; + for (;;) { + if (src == limit) { + if (prevBoundary != limit && doCompose) { + buffer.appendZeroCC(prevBoundary, limit, errorCode); + } + return TRUE; + } + if( (c=*src)<minNoMaybeCP || + isCompYesAndZeroCC(norm16=UCPTRIE_FAST_BMP_GET(normTrie, UCPTRIE_16, c)) + ) { + ++src; + } else { + prevSrc = src++; + if(!U16_IS_LEAD(c)) { + break; + } else { + UChar c2; + if(src!=limit && U16_IS_TRAIL(c2=*src)) { + ++src; + c=U16_GET_SUPPLEMENTARY(c, c2); + norm16=UCPTRIE_FAST_SUPP_GET(normTrie, UCPTRIE_16, c); + if(!isCompYesAndZeroCC(norm16)) { + break; + } + } + } + } + } + // isCompYesAndZeroCC(norm16) is false, that is, norm16>=minNoNo. + // The current character is either a "noNo" (has a mapping) + // or a "maybeYes" (combines backward) + // or a "yesYes" with ccc!=0. + // It is not a Hangul syllable or Jamo L because those have "yes" properties. + + // Medium-fast path: Handle cases that do not require full decomposition and recomposition. + if (!isMaybeOrNonZeroCC(norm16)) { // minNoNo <= norm16 < minMaybeYes + if (!doCompose) { + return FALSE; + } + // Fast path for mapping a character that is immediately surrounded by boundaries. + // In this case, we need not decompose around the current character. + if (isDecompNoAlgorithmic(norm16)) { + // Maps to a single isCompYesAndZeroCC character + // which also implies hasCompBoundaryBefore. + if (norm16HasCompBoundaryAfter(norm16, onlyContiguous) || + hasCompBoundaryBefore(src, limit)) { + if (prevBoundary != prevSrc && !buffer.appendZeroCC(prevBoundary, prevSrc, errorCode)) { + break; + } + if(!buffer.append(mapAlgorithmic(c, norm16), 0, errorCode)) { + break; + } + prevBoundary = src; + continue; + } + } else if (norm16 < minNoNoCompBoundaryBefore) { + // The mapping is comp-normalized which also implies hasCompBoundaryBefore. + if (norm16HasCompBoundaryAfter(norm16, onlyContiguous) || + hasCompBoundaryBefore(src, limit)) { + if (prevBoundary != prevSrc && !buffer.appendZeroCC(prevBoundary, prevSrc, errorCode)) { + break; + } + const UChar *mapping = reinterpret_cast<const UChar *>(getMapping(norm16)); + int32_t length = *mapping++ & MAPPING_LENGTH_MASK; + if(!buffer.appendZeroCC(mapping, mapping + length, errorCode)) { + break; + } + prevBoundary = src; + continue; + } + } else if (norm16 >= minNoNoEmpty) { + // The current character maps to nothing. + // Simply omit it from the output if there is a boundary before _or_ after it. + // The character itself implies no boundaries. + if (hasCompBoundaryBefore(src, limit) || + hasCompBoundaryAfter(prevBoundary, prevSrc, onlyContiguous)) { + if (prevBoundary != prevSrc && !buffer.appendZeroCC(prevBoundary, prevSrc, errorCode)) { + break; + } + prevBoundary = src; + continue; + } + } + // Other "noNo" type, or need to examine more text around this character: + // Fall through to the slow path. + } else if (isJamoVT(norm16) && prevBoundary != prevSrc) { + UChar prev=*(prevSrc-1); + if(c<Hangul::JAMO_T_BASE) { + // The current character is a Jamo Vowel, + // compose with previous Jamo L and following Jamo T. + UChar l = (UChar)(prev-Hangul::JAMO_L_BASE); + if(l<Hangul::JAMO_L_COUNT) { + if (!doCompose) { + return FALSE; + } + int32_t t; + if (src != limit && + 0 < (t = ((int32_t)*src - Hangul::JAMO_T_BASE)) && + t < Hangul::JAMO_T_COUNT) { + // The next character is a Jamo T. + ++src; + } else if (hasCompBoundaryBefore(src, limit)) { + // No Jamo T follows, not even via decomposition. + t = 0; + } else { + t = -1; + } + if (t >= 0) { + UChar32 syllable = Hangul::HANGUL_BASE + + (l*Hangul::JAMO_V_COUNT + (c-Hangul::JAMO_V_BASE)) * + Hangul::JAMO_T_COUNT + t; + --prevSrc; // Replace the Jamo L as well. + if (prevBoundary != prevSrc && !buffer.appendZeroCC(prevBoundary, prevSrc, errorCode)) { + break; + } + if(!buffer.appendBMP((UChar)syllable, 0, errorCode)) { + break; + } + prevBoundary = src; + continue; + } + // If we see L+V+x where x!=T then we drop to the slow path, + // decompose and recompose. + // This is to deal with NFKC finding normal L and V but a + // compatibility variant of a T. + // We need to either fully compose that combination here + // (which would complicate the code and may not work with strange custom data) + // or use the slow path. + } + } else if (Hangul::isHangulLV(prev)) { + // The current character is a Jamo Trailing consonant, + // compose with previous Hangul LV that does not contain a Jamo T. + if (!doCompose) { + return FALSE; + } + UChar32 syllable = prev + c - Hangul::JAMO_T_BASE; + --prevSrc; // Replace the Hangul LV as well. + if (prevBoundary != prevSrc && !buffer.appendZeroCC(prevBoundary, prevSrc, errorCode)) { + break; + } + if(!buffer.appendBMP((UChar)syllable, 0, errorCode)) { + break; + } + prevBoundary = src; + continue; + } + // No matching context, or may need to decompose surrounding text first: + // Fall through to the slow path. + } else if (norm16 > JAMO_VT) { // norm16 >= MIN_YES_YES_WITH_CC + // One or more combining marks that do not combine-back: + // Check for canonical order, copy unchanged if ok and + // if followed by a character with a boundary-before. + uint8_t cc = getCCFromNormalYesOrMaybe(norm16); // cc!=0 + if (onlyContiguous /* FCC */ && getPreviousTrailCC(prevBoundary, prevSrc) > cc) { + // Fails FCD test, need to decompose and contiguously recompose. + if (!doCompose) { + return FALSE; + } + } else { + // If !onlyContiguous (not FCC), then we ignore the tccc of + // the previous character which passed the quick check "yes && ccc==0" test. + const UChar *nextSrc; + uint16_t n16; + for (;;) { + if (src == limit) { + if (doCompose) { + buffer.appendZeroCC(prevBoundary, limit, errorCode); + } + return TRUE; + } + uint8_t prevCC = cc; + nextSrc = src; + UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, nextSrc, limit, c, n16); + if (n16 >= MIN_YES_YES_WITH_CC) { + cc = getCCFromNormalYesOrMaybe(n16); + if (prevCC > cc) { + if (!doCompose) { + return FALSE; + } + break; + } + } else { + break; + } + src = nextSrc; + } + // src is after the last in-order combining mark. + // If there is a boundary here, then we continue with no change. + if (norm16HasCompBoundaryBefore(n16)) { + if (isCompYesAndZeroCC(n16)) { + src = nextSrc; + } + continue; + } + // Use the slow path. There is no boundary in [prevSrc, src[. + } + } + + // Slow path: Find the nearest boundaries around the current character, + // decompose and recompose. + if (prevBoundary != prevSrc && !norm16HasCompBoundaryBefore(norm16)) { + const UChar *p = prevSrc; + UCPTRIE_FAST_U16_PREV(normTrie, UCPTRIE_16, prevBoundary, p, c, norm16); + if (!norm16HasCompBoundaryAfter(norm16, onlyContiguous)) { + prevSrc = p; + } + } + if (doCompose && prevBoundary != prevSrc && !buffer.appendZeroCC(prevBoundary, prevSrc, errorCode)) { + break; + } + int32_t recomposeStartIndex=buffer.length(); + // We know there is not a boundary here. + decomposeShort(prevSrc, src, FALSE /* !stopAtCompBoundary */, onlyContiguous, + buffer, errorCode); + // Decompose until the next boundary. + src = decomposeShort(src, limit, TRUE /* stopAtCompBoundary */, onlyContiguous, + buffer, errorCode); + if (U_FAILURE(errorCode)) { + break; + } + if ((src - prevSrc) > INT32_MAX) { // guard before buffer.equals() + errorCode = U_INDEX_OUTOFBOUNDS_ERROR; + return TRUE; + } + recompose(buffer, recomposeStartIndex, onlyContiguous); + if(!doCompose) { + if(!buffer.equals(prevSrc, src)) { + return FALSE; + } + buffer.remove(); + } + prevBoundary=src; + } + return TRUE; +} + +// Very similar to compose(): Make the same changes in both places if relevant. +// pQCResult==NULL: spanQuickCheckYes +// pQCResult!=NULL: quickCheck (*pQCResult must be UNORM_YES) +const UChar * +Normalizer2Impl::composeQuickCheck(const UChar *src, const UChar *limit, + UBool onlyContiguous, + UNormalizationCheckResult *pQCResult) const { + const UChar *prevBoundary=src; + UChar32 minNoMaybeCP=minCompNoMaybeCP; + if(limit==NULL) { + UErrorCode errorCode=U_ZERO_ERROR; + src=copyLowPrefixFromNulTerminated(src, minNoMaybeCP, NULL, errorCode); + limit=u_strchr(src, 0); + if (prevBoundary != src) { + if (hasCompBoundaryAfter(*(src-1), onlyContiguous)) { + prevBoundary = src; + } else { + prevBoundary = --src; + } + } + } + + for(;;) { + // Fast path: Scan over a sequence of characters below the minimum "no or maybe" code point, + // or with (compYes && ccc==0) properties. + const UChar *prevSrc; + UChar32 c = 0; + uint16_t norm16 = 0; + for (;;) { + if(src==limit) { + return src; + } + if( (c=*src)<minNoMaybeCP || + isCompYesAndZeroCC(norm16=UCPTRIE_FAST_BMP_GET(normTrie, UCPTRIE_16, c)) + ) { + ++src; + } else { + prevSrc = src++; + if(!U16_IS_LEAD(c)) { + break; + } else { + UChar c2; + if(src!=limit && U16_IS_TRAIL(c2=*src)) { + ++src; + c=U16_GET_SUPPLEMENTARY(c, c2); + norm16=UCPTRIE_FAST_SUPP_GET(normTrie, UCPTRIE_16, c); + if(!isCompYesAndZeroCC(norm16)) { + break; + } + } + } + } + } + // isCompYesAndZeroCC(norm16) is false, that is, norm16>=minNoNo. + // The current character is either a "noNo" (has a mapping) + // or a "maybeYes" (combines backward) + // or a "yesYes" with ccc!=0. + // It is not a Hangul syllable or Jamo L because those have "yes" properties. + + uint16_t prevNorm16 = INERT; + if (prevBoundary != prevSrc) { + if (norm16HasCompBoundaryBefore(norm16)) { + prevBoundary = prevSrc; + } else { + const UChar *p = prevSrc; + uint16_t n16; + UCPTRIE_FAST_U16_PREV(normTrie, UCPTRIE_16, prevBoundary, p, c, n16); + if (norm16HasCompBoundaryAfter(n16, onlyContiguous)) { + prevBoundary = prevSrc; + } else { + prevBoundary = p; + prevNorm16 = n16; + } + } + } + + if(isMaybeOrNonZeroCC(norm16)) { + uint8_t cc=getCCFromYesOrMaybe(norm16); + if (onlyContiguous /* FCC */ && cc != 0 && + getTrailCCFromCompYesAndZeroCC(prevNorm16) > cc) { + // The [prevBoundary..prevSrc[ character + // passed the quick check "yes && ccc==0" test + // but is out of canonical order with the current combining mark. + } else { + // If !onlyContiguous (not FCC), then we ignore the tccc of + // the previous character which passed the quick check "yes && ccc==0" test. + const UChar *nextSrc; + for (;;) { + if (norm16 < MIN_YES_YES_WITH_CC) { + if (pQCResult != nullptr) { + *pQCResult = UNORM_MAYBE; + } else { + return prevBoundary; + } + } + if (src == limit) { + return src; + } + uint8_t prevCC = cc; + nextSrc = src; + UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, nextSrc, limit, c, norm16); + if (isMaybeOrNonZeroCC(norm16)) { + cc = getCCFromYesOrMaybe(norm16); + if (!(prevCC <= cc || cc == 0)) { + break; + } + } else { + break; + } + src = nextSrc; + } + // src is after the last in-order combining mark. + if (isCompYesAndZeroCC(norm16)) { + prevBoundary = src; + src = nextSrc; + continue; + } + } + } + if(pQCResult!=NULL) { + *pQCResult=UNORM_NO; + } + return prevBoundary; + } +} + +void Normalizer2Impl::composeAndAppend(const UChar *src, const UChar *limit, + UBool doCompose, + UBool onlyContiguous, + UnicodeString &safeMiddle, + ReorderingBuffer &buffer, + UErrorCode &errorCode) const { + if(!buffer.isEmpty()) { + const UChar *firstStarterInSrc=findNextCompBoundary(src, limit, onlyContiguous); + if(src!=firstStarterInSrc) { + const UChar *lastStarterInDest=findPreviousCompBoundary(buffer.getStart(), + buffer.getLimit(), onlyContiguous); + int32_t destSuffixLength=(int32_t)(buffer.getLimit()-lastStarterInDest); + UnicodeString middle(lastStarterInDest, destSuffixLength); + buffer.removeSuffix(destSuffixLength); + safeMiddle=middle; + middle.append(src, (int32_t)(firstStarterInSrc-src)); + const UChar *middleStart=middle.getBuffer(); + compose(middleStart, middleStart+middle.length(), onlyContiguous, + TRUE, buffer, errorCode); + if(U_FAILURE(errorCode)) { + return; + } + src=firstStarterInSrc; + } + } + if(doCompose) { + compose(src, limit, onlyContiguous, TRUE, buffer, errorCode); + } else { + if(limit==NULL) { // appendZeroCC() needs limit!=NULL + limit=u_strchr(src, 0); + } + buffer.appendZeroCC(src, limit, errorCode); + } +} + +UBool +Normalizer2Impl::composeUTF8(uint32_t options, UBool onlyContiguous, + const uint8_t *src, const uint8_t *limit, + ByteSink *sink, Edits *edits, UErrorCode &errorCode) const { + U_ASSERT(limit != nullptr); + UnicodeString s16; + uint8_t minNoMaybeLead = leadByteForCP(minCompNoMaybeCP); + const uint8_t *prevBoundary = src; + + for (;;) { + // Fast path: Scan over a sequence of characters below the minimum "no or maybe" code point, + // or with (compYes && ccc==0) properties. + const uint8_t *prevSrc; + uint16_t norm16 = 0; + for (;;) { + if (src == limit) { + if (prevBoundary != limit && sink != nullptr) { + ByteSinkUtil::appendUnchanged(prevBoundary, limit, + *sink, options, edits, errorCode); + } + return TRUE; + } + if (*src < minNoMaybeLead) { + ++src; + } else { + prevSrc = src; + UCPTRIE_FAST_U8_NEXT(normTrie, UCPTRIE_16, src, limit, norm16); + if (!isCompYesAndZeroCC(norm16)) { + break; + } + } + } + // isCompYesAndZeroCC(norm16) is false, that is, norm16>=minNoNo. + // The current character is either a "noNo" (has a mapping) + // or a "maybeYes" (combines backward) + // or a "yesYes" with ccc!=0. + // It is not a Hangul syllable or Jamo L because those have "yes" properties. + + // Medium-fast path: Handle cases that do not require full decomposition and recomposition. + if (!isMaybeOrNonZeroCC(norm16)) { // minNoNo <= norm16 < minMaybeYes + if (sink == nullptr) { + return FALSE; + } + // Fast path for mapping a character that is immediately surrounded by boundaries. + // In this case, we need not decompose around the current character. + if (isDecompNoAlgorithmic(norm16)) { + // Maps to a single isCompYesAndZeroCC character + // which also implies hasCompBoundaryBefore. + if (norm16HasCompBoundaryAfter(norm16, onlyContiguous) || + hasCompBoundaryBefore(src, limit)) { + if (prevBoundary != prevSrc && + !ByteSinkUtil::appendUnchanged(prevBoundary, prevSrc, + *sink, options, edits, errorCode)) { + break; + } + appendCodePointDelta(prevSrc, src, getAlgorithmicDelta(norm16), *sink, edits); + prevBoundary = src; + continue; + } + } else if (norm16 < minNoNoCompBoundaryBefore) { + // The mapping is comp-normalized which also implies hasCompBoundaryBefore. + if (norm16HasCompBoundaryAfter(norm16, onlyContiguous) || + hasCompBoundaryBefore(src, limit)) { + if (prevBoundary != prevSrc && + !ByteSinkUtil::appendUnchanged(prevBoundary, prevSrc, + *sink, options, edits, errorCode)) { + break; + } + const uint16_t *mapping = getMapping(norm16); + int32_t length = *mapping++ & MAPPING_LENGTH_MASK; + if (!ByteSinkUtil::appendChange(prevSrc, src, (const UChar *)mapping, length, + *sink, edits, errorCode)) { + break; + } + prevBoundary = src; + continue; + } + } else if (norm16 >= minNoNoEmpty) { + // The current character maps to nothing. + // Simply omit it from the output if there is a boundary before _or_ after it. + // The character itself implies no boundaries. + if (hasCompBoundaryBefore(src, limit) || + hasCompBoundaryAfter(prevBoundary, prevSrc, onlyContiguous)) { + if (prevBoundary != prevSrc && + !ByteSinkUtil::appendUnchanged(prevBoundary, prevSrc, + *sink, options, edits, errorCode)) { + break; + } + if (edits != nullptr) { + edits->addReplace((int32_t)(src - prevSrc), 0); + } + prevBoundary = src; + continue; + } + } + // Other "noNo" type, or need to examine more text around this character: + // Fall through to the slow path. + } else if (isJamoVT(norm16)) { + // Jamo L: E1 84 80..92 + // Jamo V: E1 85 A1..B5 + // Jamo T: E1 86 A8..E1 87 82 + U_ASSERT((src - prevSrc) == 3 && *prevSrc == 0xe1); + UChar32 prev = previousHangulOrJamo(prevBoundary, prevSrc); + if (prevSrc[1] == 0x85) { + // The current character is a Jamo Vowel, + // compose with previous Jamo L and following Jamo T. + UChar32 l = prev - Hangul::JAMO_L_BASE; + if ((uint32_t)l < Hangul::JAMO_L_COUNT) { + if (sink == nullptr) { + return FALSE; + } + int32_t t = getJamoTMinusBase(src, limit); + if (t >= 0) { + // The next character is a Jamo T. + src += 3; + } else if (hasCompBoundaryBefore(src, limit)) { + // No Jamo T follows, not even via decomposition. + t = 0; + } + if (t >= 0) { + UChar32 syllable = Hangul::HANGUL_BASE + + (l*Hangul::JAMO_V_COUNT + (prevSrc[2]-0xa1)) * + Hangul::JAMO_T_COUNT + t; + prevSrc -= 3; // Replace the Jamo L as well. + if (prevBoundary != prevSrc && + !ByteSinkUtil::appendUnchanged(prevBoundary, prevSrc, + *sink, options, edits, errorCode)) { + break; + } + ByteSinkUtil::appendCodePoint(prevSrc, src, syllable, *sink, edits); + prevBoundary = src; + continue; + } + // If we see L+V+x where x!=T then we drop to the slow path, + // decompose and recompose. + // This is to deal with NFKC finding normal L and V but a + // compatibility variant of a T. + // We need to either fully compose that combination here + // (which would complicate the code and may not work with strange custom data) + // or use the slow path. + } + } else if (Hangul::isHangulLV(prev)) { + // The current character is a Jamo Trailing consonant, + // compose with previous Hangul LV that does not contain a Jamo T. + if (sink == nullptr) { + return FALSE; + } + UChar32 syllable = prev + getJamoTMinusBase(prevSrc, src); + prevSrc -= 3; // Replace the Hangul LV as well. + if (prevBoundary != prevSrc && + !ByteSinkUtil::appendUnchanged(prevBoundary, prevSrc, + *sink, options, edits, errorCode)) { + break; + } + ByteSinkUtil::appendCodePoint(prevSrc, src, syllable, *sink, edits); + prevBoundary = src; + continue; + } + // No matching context, or may need to decompose surrounding text first: + // Fall through to the slow path. + } else if (norm16 > JAMO_VT) { // norm16 >= MIN_YES_YES_WITH_CC + // One or more combining marks that do not combine-back: + // Check for canonical order, copy unchanged if ok and + // if followed by a character with a boundary-before. + uint8_t cc = getCCFromNormalYesOrMaybe(norm16); // cc!=0 + if (onlyContiguous /* FCC */ && getPreviousTrailCC(prevBoundary, prevSrc) > cc) { + // Fails FCD test, need to decompose and contiguously recompose. + if (sink == nullptr) { + return FALSE; + } + } else { + // If !onlyContiguous (not FCC), then we ignore the tccc of + // the previous character which passed the quick check "yes && ccc==0" test. + const uint8_t *nextSrc; + uint16_t n16; + for (;;) { + if (src == limit) { + if (sink != nullptr) { + ByteSinkUtil::appendUnchanged(prevBoundary, limit, + *sink, options, edits, errorCode); + } + return TRUE; + } + uint8_t prevCC = cc; + nextSrc = src; + UCPTRIE_FAST_U8_NEXT(normTrie, UCPTRIE_16, nextSrc, limit, n16); + if (n16 >= MIN_YES_YES_WITH_CC) { + cc = getCCFromNormalYesOrMaybe(n16); + if (prevCC > cc) { + if (sink == nullptr) { + return FALSE; + } + break; + } + } else { + break; + } + src = nextSrc; + } + // src is after the last in-order combining mark. + // If there is a boundary here, then we continue with no change. + if (norm16HasCompBoundaryBefore(n16)) { + if (isCompYesAndZeroCC(n16)) { + src = nextSrc; + } + continue; + } + // Use the slow path. There is no boundary in [prevSrc, src[. + } + } + + // Slow path: Find the nearest boundaries around the current character, + // decompose and recompose. + if (prevBoundary != prevSrc && !norm16HasCompBoundaryBefore(norm16)) { + const uint8_t *p = prevSrc; + UCPTRIE_FAST_U8_PREV(normTrie, UCPTRIE_16, prevBoundary, p, norm16); + if (!norm16HasCompBoundaryAfter(norm16, onlyContiguous)) { + prevSrc = p; + } + } + ReorderingBuffer buffer(*this, s16, errorCode); + if (U_FAILURE(errorCode)) { + break; + } + // We know there is not a boundary here. + decomposeShort(prevSrc, src, FALSE /* !stopAtCompBoundary */, onlyContiguous, + buffer, errorCode); + // Decompose until the next boundary. + src = decomposeShort(src, limit, TRUE /* stopAtCompBoundary */, onlyContiguous, + buffer, errorCode); + if (U_FAILURE(errorCode)) { + break; + } + if ((src - prevSrc) > INT32_MAX) { // guard before buffer.equals() + errorCode = U_INDEX_OUTOFBOUNDS_ERROR; + return TRUE; + } + recompose(buffer, 0, onlyContiguous); + if (!buffer.equals(prevSrc, src)) { + if (sink == nullptr) { + return FALSE; + } + if (prevBoundary != prevSrc && + !ByteSinkUtil::appendUnchanged(prevBoundary, prevSrc, + *sink, options, edits, errorCode)) { + break; + } + if (!ByteSinkUtil::appendChange(prevSrc, src, buffer.getStart(), buffer.length(), + *sink, edits, errorCode)) { + break; + } + prevBoundary = src; + } + } + return TRUE; +} + +UBool Normalizer2Impl::hasCompBoundaryBefore(const UChar *src, const UChar *limit) const { + if (src == limit || *src < minCompNoMaybeCP) { + return TRUE; + } + UChar32 c; + uint16_t norm16; + UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, src, limit, c, norm16); + return norm16HasCompBoundaryBefore(norm16); +} + +UBool Normalizer2Impl::hasCompBoundaryBefore(const uint8_t *src, const uint8_t *limit) const { + if (src == limit) { + return TRUE; + } + uint16_t norm16; + UCPTRIE_FAST_U8_NEXT(normTrie, UCPTRIE_16, src, limit, norm16); + return norm16HasCompBoundaryBefore(norm16); +} + +UBool Normalizer2Impl::hasCompBoundaryAfter(const UChar *start, const UChar *p, + UBool onlyContiguous) const { + if (start == p) { + return TRUE; + } + UChar32 c; + uint16_t norm16; + UCPTRIE_FAST_U16_PREV(normTrie, UCPTRIE_16, start, p, c, norm16); + return norm16HasCompBoundaryAfter(norm16, onlyContiguous); +} + +UBool Normalizer2Impl::hasCompBoundaryAfter(const uint8_t *start, const uint8_t *p, + UBool onlyContiguous) const { + if (start == p) { + return TRUE; + } + uint16_t norm16; + UCPTRIE_FAST_U8_PREV(normTrie, UCPTRIE_16, start, p, norm16); + return norm16HasCompBoundaryAfter(norm16, onlyContiguous); +} + +const UChar *Normalizer2Impl::findPreviousCompBoundary(const UChar *start, const UChar *p, + UBool onlyContiguous) const { + while (p != start) { + const UChar *codePointLimit = p; + UChar32 c; + uint16_t norm16; + UCPTRIE_FAST_U16_PREV(normTrie, UCPTRIE_16, start, p, c, norm16); + if (norm16HasCompBoundaryAfter(norm16, onlyContiguous)) { + return codePointLimit; + } + if (hasCompBoundaryBefore(c, norm16)) { + return p; + } + } + return p; +} + +const UChar *Normalizer2Impl::findNextCompBoundary(const UChar *p, const UChar *limit, + UBool onlyContiguous) const { + while (p != limit) { + const UChar *codePointStart = p; + UChar32 c; + uint16_t norm16; + UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, p, limit, c, norm16); + if (hasCompBoundaryBefore(c, norm16)) { + return codePointStart; + } + if (norm16HasCompBoundaryAfter(norm16, onlyContiguous)) { + return p; + } + } + return p; +} + +uint8_t Normalizer2Impl::getPreviousTrailCC(const UChar *start, const UChar *p) const { + if (start == p) { + return 0; + } + int32_t i = (int32_t)(p - start); + UChar32 c; + U16_PREV(start, 0, i, c); + return (uint8_t)getFCD16(c); +} + +uint8_t Normalizer2Impl::getPreviousTrailCC(const uint8_t *start, const uint8_t *p) const { + if (start == p) { + return 0; + } + int32_t i = (int32_t)(p - start); + UChar32 c; + U8_PREV(start, 0, i, c); + return (uint8_t)getFCD16(c); +} + +// Note: normalizer2impl.cpp r30982 (2011-nov-27) +// still had getFCDTrie() which built and cached an FCD trie. +// That provided faster access to FCD data than getFCD16FromNormData() +// but required synchronization and consumed some 10kB of heap memory +// in any process that uses FCD (e.g., via collation). +// minDecompNoCP etc. and smallFCD[] are intended to help with any loss of performance, +// at least for ASCII & CJK. + +// Ticket 20907 - The optimizer in MSVC/Visual Studio versions below 16.4 has trouble with this +// function on Windows ARM64. As a work-around, we disable optimizations for this function. +// This work-around could/should be removed once the following versions of Visual Studio are no +// longer supported: All versions of VS2017, and versions of VS2019 below 16.4. +#if (defined(_MSC_VER) && (defined(_M_ARM64)) && (_MSC_VER < 1924)) +#pragma optimize( "", off ) +#endif +// Gets the FCD value from the regular normalization data. +uint16_t Normalizer2Impl::getFCD16FromNormData(UChar32 c) const { + uint16_t norm16=getNorm16(c); + if (norm16 >= limitNoNo) { + if(norm16>=MIN_NORMAL_MAYBE_YES) { + // combining mark + norm16=getCCFromNormalYesOrMaybe(norm16); + return norm16|(norm16<<8); + } else if(norm16>=minMaybeYes) { + return 0; + } else { // isDecompNoAlgorithmic(norm16) + uint16_t deltaTrailCC = norm16 & DELTA_TCCC_MASK; + if (deltaTrailCC <= DELTA_TCCC_1) { + return deltaTrailCC >> OFFSET_SHIFT; + } + // Maps to an isCompYesAndZeroCC. + c=mapAlgorithmic(c, norm16); + norm16=getRawNorm16(c); + } + } + if(norm16<=minYesNo || isHangulLVT(norm16)) { + // no decomposition or Hangul syllable, all zeros + return 0; + } + // c decomposes, get everything from the variable-length extra data + const uint16_t *mapping=getMapping(norm16); + uint16_t firstUnit=*mapping; + norm16=firstUnit>>8; // tccc + if(firstUnit&MAPPING_HAS_CCC_LCCC_WORD) { + norm16|=*(mapping-1)&0xff00; // lccc + } + return norm16; +} +#if (defined(_MSC_VER) && (defined(_M_ARM64)) && (_MSC_VER < 1924)) +#pragma optimize( "", on ) +#endif + +// Dual functionality: +// buffer!=NULL: normalize +// buffer==NULL: isNormalized/quickCheck/spanQuickCheckYes +const UChar * +Normalizer2Impl::makeFCD(const UChar *src, const UChar *limit, + ReorderingBuffer *buffer, + UErrorCode &errorCode) const { + // Tracks the last FCD-safe boundary, before lccc=0 or after properly-ordered tccc<=1. + // Similar to the prevBoundary in the compose() implementation. + const UChar *prevBoundary=src; + int32_t prevFCD16=0; + if(limit==NULL) { + src=copyLowPrefixFromNulTerminated(src, minLcccCP, buffer, errorCode); + if(U_FAILURE(errorCode)) { + return src; + } + if(prevBoundary<src) { + prevBoundary=src; + // We know that the previous character's lccc==0. + // Fetching the fcd16 value was deferred for this below-U+0300 code point. + prevFCD16=getFCD16(*(src-1)); + if(prevFCD16>1) { + --prevBoundary; + } + } + limit=u_strchr(src, 0); + } + + // Note: In this function we use buffer->appendZeroCC() because we track + // the lead and trail combining classes here, rather than leaving it to + // the ReorderingBuffer. + // The exception is the call to decomposeShort() which uses the buffer + // in the normal way. + + const UChar *prevSrc; + UChar32 c=0; + uint16_t fcd16=0; + + for(;;) { + // count code units with lccc==0 + for(prevSrc=src; src!=limit;) { + if((c=*src)<minLcccCP) { + prevFCD16=~c; + ++src; + } else if(!singleLeadMightHaveNonZeroFCD16(c)) { + prevFCD16=0; + ++src; + } else { + if(U16_IS_LEAD(c)) { + UChar c2; + if((src+1)!=limit && U16_IS_TRAIL(c2=src[1])) { + c=U16_GET_SUPPLEMENTARY(c, c2); + } + } + if((fcd16=getFCD16FromNormData(c))<=0xff) { + prevFCD16=fcd16; + src+=U16_LENGTH(c); + } else { + break; + } + } + } + // copy these code units all at once + if(src!=prevSrc) { + if(buffer!=NULL && !buffer->appendZeroCC(prevSrc, src, errorCode)) { + break; + } + if(src==limit) { + break; + } + prevBoundary=src; + // We know that the previous character's lccc==0. + if(prevFCD16<0) { + // Fetching the fcd16 value was deferred for this below-minLcccCP code point. + UChar32 prev=~prevFCD16; + if(prev<minDecompNoCP) { + prevFCD16=0; + } else { + prevFCD16=getFCD16FromNormData(prev); + if(prevFCD16>1) { + --prevBoundary; + } + } + } else { + const UChar *p=src-1; + if(U16_IS_TRAIL(*p) && prevSrc<p && U16_IS_LEAD(*(p-1))) { + --p; + // Need to fetch the previous character's FCD value because + // prevFCD16 was just for the trail surrogate code point. + prevFCD16=getFCD16FromNormData(U16_GET_SUPPLEMENTARY(p[0], p[1])); + // Still known to have lccc==0 because its lead surrogate unit had lccc==0. + } + if(prevFCD16>1) { + prevBoundary=p; + } + } + // The start of the current character (c). + prevSrc=src; + } else if(src==limit) { + break; + } + + src+=U16_LENGTH(c); + // The current character (c) at [prevSrc..src[ has a non-zero lead combining class. + // Check for proper order, and decompose locally if necessary. + if((prevFCD16&0xff)<=(fcd16>>8)) { + // proper order: prev tccc <= current lccc + if((fcd16&0xff)<=1) { + prevBoundary=src; + } + if(buffer!=NULL && !buffer->appendZeroCC(c, errorCode)) { + break; + } + prevFCD16=fcd16; + continue; + } else if(buffer==NULL) { + return prevBoundary; // quick check "no" + } else { + /* + * Back out the part of the source that we copied or appended + * already but is now going to be decomposed. + * prevSrc is set to after what was copied/appended. + */ + buffer->removeSuffix((int32_t)(prevSrc-prevBoundary)); + /* + * Find the part of the source that needs to be decomposed, + * up to the next safe boundary. + */ + src=findNextFCDBoundary(src, limit); + /* + * The source text does not fulfill the conditions for FCD. + * Decompose and reorder a limited piece of the text. + */ + decomposeShort(prevBoundary, src, FALSE, FALSE, *buffer, errorCode); + if (U_FAILURE(errorCode)) { + break; + } + prevBoundary=src; + prevFCD16=0; + } + } + return src; +} + +void Normalizer2Impl::makeFCDAndAppend(const UChar *src, const UChar *limit, + UBool doMakeFCD, + UnicodeString &safeMiddle, + ReorderingBuffer &buffer, + UErrorCode &errorCode) const { + if(!buffer.isEmpty()) { + const UChar *firstBoundaryInSrc=findNextFCDBoundary(src, limit); + if(src!=firstBoundaryInSrc) { + const UChar *lastBoundaryInDest=findPreviousFCDBoundary(buffer.getStart(), + buffer.getLimit()); + int32_t destSuffixLength=(int32_t)(buffer.getLimit()-lastBoundaryInDest); + UnicodeString middle(lastBoundaryInDest, destSuffixLength); + buffer.removeSuffix(destSuffixLength); + safeMiddle=middle; + middle.append(src, (int32_t)(firstBoundaryInSrc-src)); + const UChar *middleStart=middle.getBuffer(); + makeFCD(middleStart, middleStart+middle.length(), &buffer, errorCode); + if(U_FAILURE(errorCode)) { + return; + } + src=firstBoundaryInSrc; + } + } + if(doMakeFCD) { + makeFCD(src, limit, &buffer, errorCode); + } else { + if(limit==NULL) { // appendZeroCC() needs limit!=NULL + limit=u_strchr(src, 0); + } + buffer.appendZeroCC(src, limit, errorCode); + } +} + +const UChar *Normalizer2Impl::findPreviousFCDBoundary(const UChar *start, const UChar *p) const { + while(start<p) { + const UChar *codePointLimit = p; + UChar32 c; + uint16_t norm16; + UCPTRIE_FAST_U16_PREV(normTrie, UCPTRIE_16, start, p, c, norm16); + if (c < minDecompNoCP || norm16HasDecompBoundaryAfter(norm16)) { + return codePointLimit; + } + if (norm16HasDecompBoundaryBefore(norm16)) { + return p; + } + } + return p; +} + +const UChar *Normalizer2Impl::findNextFCDBoundary(const UChar *p, const UChar *limit) const { + while(p<limit) { + const UChar *codePointStart=p; + UChar32 c; + uint16_t norm16; + UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, p, limit, c, norm16); + if (c < minLcccCP || norm16HasDecompBoundaryBefore(norm16)) { + return codePointStart; + } + if (norm16HasDecompBoundaryAfter(norm16)) { + return p; + } + } + return p; +} + +// CanonicalIterator data -------------------------------------------------- *** + +CanonIterData::CanonIterData(UErrorCode &errorCode) : + mutableTrie(umutablecptrie_open(0, 0, &errorCode)), trie(nullptr), + canonStartSets(uprv_deleteUObject, NULL, errorCode) {} + +CanonIterData::~CanonIterData() { + umutablecptrie_close(mutableTrie); + ucptrie_close(trie); +} + +void CanonIterData::addToStartSet(UChar32 origin, UChar32 decompLead, UErrorCode &errorCode) { + uint32_t canonValue = umutablecptrie_get(mutableTrie, decompLead); + if((canonValue&(CANON_HAS_SET|CANON_VALUE_MASK))==0 && origin!=0) { + // origin is the first character whose decomposition starts with + // the character for which we are setting the value. + umutablecptrie_set(mutableTrie, decompLead, canonValue|origin, &errorCode); + } else { + // origin is not the first character, or it is U+0000. + UnicodeSet *set; + if((canonValue&CANON_HAS_SET)==0) { + set=new UnicodeSet; + if(set==NULL) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + return; + } + UChar32 firstOrigin=(UChar32)(canonValue&CANON_VALUE_MASK); + canonValue=(canonValue&~CANON_VALUE_MASK)|CANON_HAS_SET|(uint32_t)canonStartSets.size(); + umutablecptrie_set(mutableTrie, decompLead, canonValue, &errorCode); + canonStartSets.addElement(set, errorCode); + if(firstOrigin!=0) { + set->add(firstOrigin); + } + } else { + set=(UnicodeSet *)canonStartSets[(int32_t)(canonValue&CANON_VALUE_MASK)]; + } + set->add(origin); + } +} + +// C++ class for friend access to private Normalizer2Impl members. +class InitCanonIterData { +public: + static void doInit(Normalizer2Impl *impl, UErrorCode &errorCode); +}; + +U_CDECL_BEGIN + +// UInitOnce instantiation function for CanonIterData +static void U_CALLCONV +initCanonIterData(Normalizer2Impl *impl, UErrorCode &errorCode) { + InitCanonIterData::doInit(impl, errorCode); +} + +U_CDECL_END + +void InitCanonIterData::doInit(Normalizer2Impl *impl, UErrorCode &errorCode) { + U_ASSERT(impl->fCanonIterData == NULL); + impl->fCanonIterData = new CanonIterData(errorCode); + if (impl->fCanonIterData == NULL) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + } + if (U_SUCCESS(errorCode)) { + UChar32 start = 0, end; + uint32_t value; + while ((end = ucptrie_getRange(impl->normTrie, start, + UCPMAP_RANGE_FIXED_LEAD_SURROGATES, Normalizer2Impl::INERT, + nullptr, nullptr, &value)) >= 0) { + // Call Normalizer2Impl::makeCanonIterDataFromNorm16() for a range of same-norm16 characters. + if (value != Normalizer2Impl::INERT) { + impl->makeCanonIterDataFromNorm16(start, end, value, *impl->fCanonIterData, errorCode); + } + start = end + 1; + } +#ifdef UCPTRIE_DEBUG + umutablecptrie_setName(impl->fCanonIterData->mutableTrie, "CanonIterData"); +#endif + impl->fCanonIterData->trie = umutablecptrie_buildImmutable( + impl->fCanonIterData->mutableTrie, UCPTRIE_TYPE_SMALL, UCPTRIE_VALUE_BITS_32, &errorCode); + umutablecptrie_close(impl->fCanonIterData->mutableTrie); + impl->fCanonIterData->mutableTrie = nullptr; + } + if (U_FAILURE(errorCode)) { + delete impl->fCanonIterData; + impl->fCanonIterData = NULL; + } +} + +void Normalizer2Impl::makeCanonIterDataFromNorm16(UChar32 start, UChar32 end, const uint16_t norm16, + CanonIterData &newData, + UErrorCode &errorCode) const { + if(isInert(norm16) || (minYesNo<=norm16 && norm16<minNoNo)) { + // Inert, or 2-way mapping (including Hangul syllable). + // We do not write a canonStartSet for any yesNo character. + // Composites from 2-way mappings are added at runtime from the + // starter's compositions list, and the other characters in + // 2-way mappings get CANON_NOT_SEGMENT_STARTER set because they are + // "maybe" characters. + return; + } + for(UChar32 c=start; c<=end; ++c) { + uint32_t oldValue = umutablecptrie_get(newData.mutableTrie, c); + uint32_t newValue=oldValue; + if(isMaybeOrNonZeroCC(norm16)) { + // not a segment starter if it occurs in a decomposition or has cc!=0 + newValue|=CANON_NOT_SEGMENT_STARTER; + if(norm16<MIN_NORMAL_MAYBE_YES) { + newValue|=CANON_HAS_COMPOSITIONS; + } + } else if(norm16<minYesNo) { + newValue|=CANON_HAS_COMPOSITIONS; + } else { + // c has a one-way decomposition + UChar32 c2=c; + // Do not modify the whole-range norm16 value. + uint16_t norm16_2=norm16; + if (isDecompNoAlgorithmic(norm16_2)) { + // Maps to an isCompYesAndZeroCC. + c2 = mapAlgorithmic(c2, norm16_2); + norm16_2 = getRawNorm16(c2); + // No compatibility mappings for the CanonicalIterator. + U_ASSERT(!(isHangulLV(norm16_2) || isHangulLVT(norm16_2))); + } + if (norm16_2 > minYesNo) { + // c decomposes, get everything from the variable-length extra data + const uint16_t *mapping=getMapping(norm16_2); + uint16_t firstUnit=*mapping; + int32_t length=firstUnit&MAPPING_LENGTH_MASK; + if((firstUnit&MAPPING_HAS_CCC_LCCC_WORD)!=0) { + if(c==c2 && (*(mapping-1)&0xff)!=0) { + newValue|=CANON_NOT_SEGMENT_STARTER; // original c has cc!=0 + } + } + // Skip empty mappings (no characters in the decomposition). + if(length!=0) { + ++mapping; // skip over the firstUnit + // add c to first code point's start set + int32_t i=0; + U16_NEXT_UNSAFE(mapping, i, c2); + newData.addToStartSet(c, c2, errorCode); + // Set CANON_NOT_SEGMENT_STARTER for each remaining code point of a + // one-way mapping. A 2-way mapping is possible here after + // intermediate algorithmic mapping. + if(norm16_2>=minNoNo) { + while(i<length) { + U16_NEXT_UNSAFE(mapping, i, c2); + uint32_t c2Value = umutablecptrie_get(newData.mutableTrie, c2); + if((c2Value&CANON_NOT_SEGMENT_STARTER)==0) { + umutablecptrie_set(newData.mutableTrie, c2, + c2Value|CANON_NOT_SEGMENT_STARTER, &errorCode); + } + } + } + } + } else { + // c decomposed to c2 algorithmically; c has cc==0 + newData.addToStartSet(c, c2, errorCode); + } + } + if(newValue!=oldValue) { + umutablecptrie_set(newData.mutableTrie, c, newValue, &errorCode); + } + } +} + +UBool Normalizer2Impl::ensureCanonIterData(UErrorCode &errorCode) const { + // Logically const: Synchronized instantiation. + Normalizer2Impl *me=const_cast<Normalizer2Impl *>(this); + umtx_initOnce(me->fCanonIterDataInitOnce, &initCanonIterData, me, errorCode); + return U_SUCCESS(errorCode); +} + +int32_t Normalizer2Impl::getCanonValue(UChar32 c) const { + return (int32_t)ucptrie_get(fCanonIterData->trie, c); +} + +const UnicodeSet &Normalizer2Impl::getCanonStartSet(int32_t n) const { + return *(const UnicodeSet *)fCanonIterData->canonStartSets[n]; +} + +UBool Normalizer2Impl::isCanonSegmentStarter(UChar32 c) const { + return getCanonValue(c)>=0; +} + +UBool Normalizer2Impl::getCanonStartSet(UChar32 c, UnicodeSet &set) const { + int32_t canonValue=getCanonValue(c)&~CANON_NOT_SEGMENT_STARTER; + if(canonValue==0) { + return FALSE; + } + set.clear(); + int32_t value=canonValue&CANON_VALUE_MASK; + if((canonValue&CANON_HAS_SET)!=0) { + set.addAll(getCanonStartSet(value)); + } else if(value!=0) { + set.add(value); + } + if((canonValue&CANON_HAS_COMPOSITIONS)!=0) { + uint16_t norm16=getRawNorm16(c); + if(norm16==JAMO_L) { + UChar32 syllable= + (UChar32)(Hangul::HANGUL_BASE+(c-Hangul::JAMO_L_BASE)*Hangul::JAMO_VT_COUNT); + set.add(syllable, syllable+Hangul::JAMO_VT_COUNT-1); + } else { + addComposites(getCompositionsList(norm16), set); + } + } + return TRUE; +} + +U_NAMESPACE_END + +// Normalizer2 data swapping ----------------------------------------------- *** + +U_NAMESPACE_USE + +U_CAPI int32_t U_EXPORT2 +unorm2_swap(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode) { + const UDataInfo *pInfo; + int32_t headerSize; + + const uint8_t *inBytes; + uint8_t *outBytes; + + const int32_t *inIndexes; + int32_t indexes[Normalizer2Impl::IX_TOTAL_SIZE+1]; + + int32_t i, offset, nextOffset, size; + + /* udata_swapDataHeader checks the arguments */ + headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return 0; + } + + /* check data format and format version */ + pInfo=(const UDataInfo *)((const char *)inData+4); + uint8_t formatVersion0=pInfo->formatVersion[0]; + if(!( + pInfo->dataFormat[0]==0x4e && /* dataFormat="Nrm2" */ + pInfo->dataFormat[1]==0x72 && + pInfo->dataFormat[2]==0x6d && + pInfo->dataFormat[3]==0x32 && + (1<=formatVersion0 && formatVersion0<=4) + )) { + udata_printError(ds, "unorm2_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as Normalizer2 data\n", + pInfo->dataFormat[0], pInfo->dataFormat[1], + pInfo->dataFormat[2], pInfo->dataFormat[3], + pInfo->formatVersion[0]); + *pErrorCode=U_UNSUPPORTED_ERROR; + return 0; + } + + inBytes=(const uint8_t *)inData+headerSize; + outBytes=(uint8_t *)outData+headerSize; + + inIndexes=(const int32_t *)inBytes; + int32_t minIndexesLength; + if(formatVersion0==1) { + minIndexesLength=Normalizer2Impl::IX_MIN_MAYBE_YES+1; + } else if(formatVersion0==2) { + minIndexesLength=Normalizer2Impl::IX_MIN_YES_NO_MAPPINGS_ONLY+1; + } else { + minIndexesLength=Normalizer2Impl::IX_MIN_LCCC_CP+1; + } + + if(length>=0) { + length-=headerSize; + if(length<minIndexesLength*4) { + udata_printError(ds, "unorm2_swap(): too few bytes (%d after header) for Normalizer2 data\n", + length); + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + } + + /* read the first few indexes */ + for(i=0; i<UPRV_LENGTHOF(indexes); ++i) { + indexes[i]=udata_readInt32(ds, inIndexes[i]); + } + + /* get the total length of the data */ + size=indexes[Normalizer2Impl::IX_TOTAL_SIZE]; + + if(length>=0) { + if(length<size) { + udata_printError(ds, "unorm2_swap(): too few bytes (%d after header) for all of Normalizer2 data\n", + length); + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + + /* copy the data for inaccessible bytes */ + if(inBytes!=outBytes) { + uprv_memcpy(outBytes, inBytes, size); + } + + offset=0; + + /* swap the int32_t indexes[] */ + nextOffset=indexes[Normalizer2Impl::IX_NORM_TRIE_OFFSET]; + ds->swapArray32(ds, inBytes, nextOffset-offset, outBytes, pErrorCode); + offset=nextOffset; + + /* swap the trie */ + nextOffset=indexes[Normalizer2Impl::IX_EXTRA_DATA_OFFSET]; + utrie_swapAnyVersion(ds, inBytes+offset, nextOffset-offset, outBytes+offset, pErrorCode); + offset=nextOffset; + + /* swap the uint16_t extraData[] */ + nextOffset=indexes[Normalizer2Impl::IX_SMALL_FCD_OFFSET]; + ds->swapArray16(ds, inBytes+offset, nextOffset-offset, outBytes+offset, pErrorCode); + offset=nextOffset; + + /* no need to swap the uint8_t smallFCD[] (new in formatVersion 2) */ + nextOffset=indexes[Normalizer2Impl::IX_SMALL_FCD_OFFSET+1]; + offset=nextOffset; + + U_ASSERT(offset==size); + } + + return headerSize+size; +} + +#endif // !UCONFIG_NO_NORMALIZATION diff --git a/thirdparty/icu4c/common/normalizer2impl.h b/thirdparty/icu4c/common/normalizer2impl.h new file mode 100644 index 0000000000..4218a30a34 --- /dev/null +++ b/thirdparty/icu4c/common/normalizer2impl.h @@ -0,0 +1,978 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2009-2014, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: normalizer2impl.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2009nov22 +* created by: Markus W. Scherer +*/ + +#ifndef __NORMALIZER2IMPL_H__ +#define __NORMALIZER2IMPL_H__ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_NORMALIZATION + +#include "unicode/normalizer2.h" +#include "unicode/ucptrie.h" +#include "unicode/unistr.h" +#include "unicode/unorm.h" +#include "unicode/utf.h" +#include "unicode/utf16.h" +#include "mutex.h" +#include "udataswp.h" +#include "uset_imp.h" + +// When the nfc.nrm data is *not* hardcoded into the common library +// (with this constant set to 0), +// then it needs to be built into the data package: +// Add nfc.nrm to icu4c/source/data/Makefile.in DAT_FILES_SHORT +#define NORM2_HARDCODE_NFC_DATA 1 + +U_NAMESPACE_BEGIN + +struct CanonIterData; + +class ByteSink; +class Edits; +class InitCanonIterData; +class LcccContext; + +class U_COMMON_API Hangul { +public: + /* Korean Hangul and Jamo constants */ + enum { + JAMO_L_BASE=0x1100, /* "lead" jamo */ + JAMO_L_END=0x1112, + JAMO_V_BASE=0x1161, /* "vowel" jamo */ + JAMO_V_END=0x1175, + JAMO_T_BASE=0x11a7, /* "trail" jamo */ + JAMO_T_END=0x11c2, + + HANGUL_BASE=0xac00, + HANGUL_END=0xd7a3, + + JAMO_L_COUNT=19, + JAMO_V_COUNT=21, + JAMO_T_COUNT=28, + + JAMO_VT_COUNT=JAMO_V_COUNT*JAMO_T_COUNT, + + HANGUL_COUNT=JAMO_L_COUNT*JAMO_V_COUNT*JAMO_T_COUNT, + HANGUL_LIMIT=HANGUL_BASE+HANGUL_COUNT + }; + + static inline UBool isHangul(UChar32 c) { + return HANGUL_BASE<=c && c<HANGUL_LIMIT; + } + static inline UBool + isHangulLV(UChar32 c) { + c-=HANGUL_BASE; + return 0<=c && c<HANGUL_COUNT && c%JAMO_T_COUNT==0; + } + static inline UBool isJamoL(UChar32 c) { + return (uint32_t)(c-JAMO_L_BASE)<JAMO_L_COUNT; + } + static inline UBool isJamoV(UChar32 c) { + return (uint32_t)(c-JAMO_V_BASE)<JAMO_V_COUNT; + } + static inline UBool isJamoT(UChar32 c) { + int32_t t=c-JAMO_T_BASE; + return 0<t && t<JAMO_T_COUNT; // not JAMO_T_BASE itself + } + static UBool isJamo(UChar32 c) { + return JAMO_L_BASE<=c && c<=JAMO_T_END && + (c<=JAMO_L_END || (JAMO_V_BASE<=c && c<=JAMO_V_END) || JAMO_T_BASE<c); + } + + /** + * Decomposes c, which must be a Hangul syllable, into buffer + * and returns the length of the decomposition (2 or 3). + */ + static inline int32_t decompose(UChar32 c, UChar buffer[3]) { + c-=HANGUL_BASE; + UChar32 c2=c%JAMO_T_COUNT; + c/=JAMO_T_COUNT; + buffer[0]=(UChar)(JAMO_L_BASE+c/JAMO_V_COUNT); + buffer[1]=(UChar)(JAMO_V_BASE+c%JAMO_V_COUNT); + if(c2==0) { + return 2; + } else { + buffer[2]=(UChar)(JAMO_T_BASE+c2); + return 3; + } + } + + /** + * Decomposes c, which must be a Hangul syllable, into buffer. + * This is the raw, not recursive, decomposition. Its length is always 2. + */ + static inline void getRawDecomposition(UChar32 c, UChar buffer[2]) { + UChar32 orig=c; + c-=HANGUL_BASE; + UChar32 c2=c%JAMO_T_COUNT; + if(c2==0) { + c/=JAMO_T_COUNT; + buffer[0]=(UChar)(JAMO_L_BASE+c/JAMO_V_COUNT); + buffer[1]=(UChar)(JAMO_V_BASE+c%JAMO_V_COUNT); + } else { + buffer[0]=(UChar)(orig-c2); // LV syllable + buffer[1]=(UChar)(JAMO_T_BASE+c2); + } + } +private: + Hangul(); // no instantiation +}; + +class Normalizer2Impl; + +class U_COMMON_API ReorderingBuffer : public UMemory { +public: + /** Constructs only; init() should be called. */ + ReorderingBuffer(const Normalizer2Impl &ni, UnicodeString &dest) : + impl(ni), str(dest), + start(NULL), reorderStart(NULL), limit(NULL), + remainingCapacity(0), lastCC(0) {} + /** Constructs, removes the string contents, and initializes for a small initial capacity. */ + ReorderingBuffer(const Normalizer2Impl &ni, UnicodeString &dest, UErrorCode &errorCode); + ~ReorderingBuffer() { + if(start!=NULL) { + str.releaseBuffer((int32_t)(limit-start)); + } + } + UBool init(int32_t destCapacity, UErrorCode &errorCode); + + UBool isEmpty() const { return start==limit; } + int32_t length() const { return (int32_t)(limit-start); } + UChar *getStart() { return start; } + UChar *getLimit() { return limit; } + uint8_t getLastCC() const { return lastCC; } + + UBool equals(const UChar *start, const UChar *limit) const; + UBool equals(const uint8_t *otherStart, const uint8_t *otherLimit) const; + + UBool append(UChar32 c, uint8_t cc, UErrorCode &errorCode) { + return (c<=0xffff) ? + appendBMP((UChar)c, cc, errorCode) : + appendSupplementary(c, cc, errorCode); + } + UBool append(const UChar *s, int32_t length, UBool isNFD, + uint8_t leadCC, uint8_t trailCC, + UErrorCode &errorCode); + UBool appendBMP(UChar c, uint8_t cc, UErrorCode &errorCode) { + if(remainingCapacity==0 && !resize(1, errorCode)) { + return false; + } + if(lastCC<=cc || cc==0) { + *limit++=c; + lastCC=cc; + if(cc<=1) { + reorderStart=limit; + } + } else { + insert(c, cc); + } + --remainingCapacity; + return true; + } + UBool appendZeroCC(UChar32 c, UErrorCode &errorCode); + UBool appendZeroCC(const UChar *s, const UChar *sLimit, UErrorCode &errorCode); + void remove(); + void removeSuffix(int32_t suffixLength); + void setReorderingLimit(UChar *newLimit) { + remainingCapacity+=(int32_t)(limit-newLimit); + reorderStart=limit=newLimit; + lastCC=0; + } + void copyReorderableSuffixTo(UnicodeString &s) const { + s.setTo(ConstChar16Ptr(reorderStart), (int32_t)(limit-reorderStart)); + } +private: + /* + * TODO: Revisit whether it makes sense to track reorderStart. + * It is set to after the last known character with cc<=1, + * which stops previousCC() before it reads that character and looks up its cc. + * previousCC() is normally only called from insert(). + * In other words, reorderStart speeds up the insertion of a combining mark + * into a multi-combining mark sequence where it does not belong at the end. + * This might not be worth the trouble. + * On the other hand, it's not a huge amount of trouble. + * + * We probably need it for UNORM_SIMPLE_APPEND. + */ + + UBool appendSupplementary(UChar32 c, uint8_t cc, UErrorCode &errorCode); + void insert(UChar32 c, uint8_t cc); + static void writeCodePoint(UChar *p, UChar32 c) { + if(c<=0xffff) { + *p=(UChar)c; + } else { + p[0]=U16_LEAD(c); + p[1]=U16_TRAIL(c); + } + } + UBool resize(int32_t appendLength, UErrorCode &errorCode); + + const Normalizer2Impl &impl; + UnicodeString &str; + UChar *start, *reorderStart, *limit; + int32_t remainingCapacity; + uint8_t lastCC; + + // private backward iterator + void setIterator() { codePointStart=limit; } + void skipPrevious(); // Requires start<codePointStart. + uint8_t previousCC(); // Returns 0 if there is no previous character. + + UChar *codePointStart, *codePointLimit; +}; + +/** + * Low-level implementation of the Unicode Normalization Algorithm. + * For the data structure and details see the documentation at the end of + * this normalizer2impl.h and in the design doc at + * http://site.icu-project.org/design/normalization/custom + */ +class U_COMMON_API Normalizer2Impl : public UObject { +public: + Normalizer2Impl() : normTrie(NULL), fCanonIterData(NULL) { } + virtual ~Normalizer2Impl(); + + void init(const int32_t *inIndexes, const UCPTrie *inTrie, + const uint16_t *inExtraData, const uint8_t *inSmallFCD); + + void addLcccChars(UnicodeSet &set) const; + void addPropertyStarts(const USetAdder *sa, UErrorCode &errorCode) const; + void addCanonIterPropertyStarts(const USetAdder *sa, UErrorCode &errorCode) const; + + // low-level properties ------------------------------------------------ *** + + UBool ensureCanonIterData(UErrorCode &errorCode) const; + + // The trie stores values for lead surrogate code *units*. + // Surrogate code *points* are inert. + uint16_t getNorm16(UChar32 c) const { + return U_IS_LEAD(c) ? + static_cast<uint16_t>(INERT) : + UCPTRIE_FAST_GET(normTrie, UCPTRIE_16, c); + } + uint16_t getRawNorm16(UChar32 c) const { return UCPTRIE_FAST_GET(normTrie, UCPTRIE_16, c); } + + UNormalizationCheckResult getCompQuickCheck(uint16_t norm16) const { + if(norm16<minNoNo || MIN_YES_YES_WITH_CC<=norm16) { + return UNORM_YES; + } else if(minMaybeYes<=norm16) { + return UNORM_MAYBE; + } else { + return UNORM_NO; + } + } + UBool isAlgorithmicNoNo(uint16_t norm16) const { return limitNoNo<=norm16 && norm16<minMaybeYes; } + UBool isCompNo(uint16_t norm16) const { return minNoNo<=norm16 && norm16<minMaybeYes; } + UBool isDecompYes(uint16_t norm16) const { return norm16<minYesNo || minMaybeYes<=norm16; } + + uint8_t getCC(uint16_t norm16) const { + if(norm16>=MIN_NORMAL_MAYBE_YES) { + return getCCFromNormalYesOrMaybe(norm16); + } + if(norm16<minNoNo || limitNoNo<=norm16) { + return 0; + } + return getCCFromNoNo(norm16); + } + static uint8_t getCCFromNormalYesOrMaybe(uint16_t norm16) { + return (uint8_t)(norm16 >> OFFSET_SHIFT); + } + static uint8_t getCCFromYesOrMaybe(uint16_t norm16) { + return norm16>=MIN_NORMAL_MAYBE_YES ? getCCFromNormalYesOrMaybe(norm16) : 0; + } + uint8_t getCCFromYesOrMaybeCP(UChar32 c) const { + if (c < minCompNoMaybeCP) { return 0; } + return getCCFromYesOrMaybe(getNorm16(c)); + } + + /** + * Returns the FCD data for code point c. + * @param c A Unicode code point. + * @return The lccc(c) in bits 15..8 and tccc(c) in bits 7..0. + */ + uint16_t getFCD16(UChar32 c) const { + if(c<minDecompNoCP) { + return 0; + } else if(c<=0xffff) { + if(!singleLeadMightHaveNonZeroFCD16(c)) { return 0; } + } + return getFCD16FromNormData(c); + } + /** + * Returns the FCD data for the next code point (post-increment). + * Might skip only a lead surrogate rather than the whole surrogate pair if none of + * the supplementary code points associated with the lead surrogate have non-zero FCD data. + * @param s A valid pointer into a string. Requires s!=limit. + * @param limit The end of the string, or NULL. + * @return The lccc(c) in bits 15..8 and tccc(c) in bits 7..0. + */ + uint16_t nextFCD16(const UChar *&s, const UChar *limit) const { + UChar32 c=*s++; + if(c<minDecompNoCP || !singleLeadMightHaveNonZeroFCD16(c)) { + return 0; + } + UChar c2; + if(U16_IS_LEAD(c) && s!=limit && U16_IS_TRAIL(c2=*s)) { + c=U16_GET_SUPPLEMENTARY(c, c2); + ++s; + } + return getFCD16FromNormData(c); + } + /** + * Returns the FCD data for the previous code point (pre-decrement). + * @param start The start of the string. + * @param s A valid pointer into a string. Requires start<s. + * @return The lccc(c) in bits 15..8 and tccc(c) in bits 7..0. + */ + uint16_t previousFCD16(const UChar *start, const UChar *&s) const { + UChar32 c=*--s; + if(c<minDecompNoCP) { + return 0; + } + if(!U16_IS_TRAIL(c)) { + if(!singleLeadMightHaveNonZeroFCD16(c)) { + return 0; + } + } else { + UChar c2; + if(start<s && U16_IS_LEAD(c2=*(s-1))) { + c=U16_GET_SUPPLEMENTARY(c2, c); + --s; + } + } + return getFCD16FromNormData(c); + } + + /** Returns true if the single-or-lead code unit c might have non-zero FCD data. */ + UBool singleLeadMightHaveNonZeroFCD16(UChar32 lead) const { + // 0<=lead<=0xffff + uint8_t bits=smallFCD[lead>>8]; + if(bits==0) { return false; } + return (UBool)((bits>>((lead>>5)&7))&1); + } + /** Returns the FCD value from the regular normalization data. */ + uint16_t getFCD16FromNormData(UChar32 c) const; + + /** + * Gets the decomposition for one code point. + * @param c code point + * @param buffer out-only buffer for algorithmic decompositions + * @param length out-only, takes the length of the decomposition, if any + * @return pointer to the decomposition, or NULL if none + */ + const UChar *getDecomposition(UChar32 c, UChar buffer[4], int32_t &length) const; + + /** + * Gets the raw decomposition for one code point. + * @param c code point + * @param buffer out-only buffer for algorithmic decompositions + * @param length out-only, takes the length of the decomposition, if any + * @return pointer to the decomposition, or NULL if none + */ + const UChar *getRawDecomposition(UChar32 c, UChar buffer[30], int32_t &length) const; + + UChar32 composePair(UChar32 a, UChar32 b) const; + + UBool isCanonSegmentStarter(UChar32 c) const; + UBool getCanonStartSet(UChar32 c, UnicodeSet &set) const; + + enum { + // Fixed norm16 values. + MIN_YES_YES_WITH_CC=0xfe02, + JAMO_VT=0xfe00, + MIN_NORMAL_MAYBE_YES=0xfc00, + JAMO_L=2, // offset=1 hasCompBoundaryAfter=false + INERT=1, // offset=0 hasCompBoundaryAfter=true + + // norm16 bit 0 is comp-boundary-after. + HAS_COMP_BOUNDARY_AFTER=1, + OFFSET_SHIFT=1, + + // For algorithmic one-way mappings, norm16 bits 2..1 indicate the + // tccc (0, 1, >1) for quick FCC boundary-after tests. + DELTA_TCCC_0=0, + DELTA_TCCC_1=2, + DELTA_TCCC_GT_1=4, + DELTA_TCCC_MASK=6, + DELTA_SHIFT=3, + + MAX_DELTA=0x40 + }; + + enum { + // Byte offsets from the start of the data, after the generic header. + IX_NORM_TRIE_OFFSET, + IX_EXTRA_DATA_OFFSET, + IX_SMALL_FCD_OFFSET, + IX_RESERVED3_OFFSET, + IX_RESERVED4_OFFSET, + IX_RESERVED5_OFFSET, + IX_RESERVED6_OFFSET, + IX_TOTAL_SIZE, + + // Code point thresholds for quick check codes. + IX_MIN_DECOMP_NO_CP, + IX_MIN_COMP_NO_MAYBE_CP, + + // Norm16 value thresholds for quick check combinations and types of extra data. + + /** Mappings & compositions in [minYesNo..minYesNoMappingsOnly[. */ + IX_MIN_YES_NO, + /** Mappings are comp-normalized. */ + IX_MIN_NO_NO, + IX_LIMIT_NO_NO, + IX_MIN_MAYBE_YES, + + /** Mappings only in [minYesNoMappingsOnly..minNoNo[. */ + IX_MIN_YES_NO_MAPPINGS_ONLY, + /** Mappings are not comp-normalized but have a comp boundary before. */ + IX_MIN_NO_NO_COMP_BOUNDARY_BEFORE, + /** Mappings do not have a comp boundary before. */ + IX_MIN_NO_NO_COMP_NO_MAYBE_CC, + /** Mappings to the empty string. */ + IX_MIN_NO_NO_EMPTY, + + IX_MIN_LCCC_CP, + IX_RESERVED19, + IX_COUNT + }; + + enum { + MAPPING_HAS_CCC_LCCC_WORD=0x80, + MAPPING_HAS_RAW_MAPPING=0x40, + // unused bit 0x20, + MAPPING_LENGTH_MASK=0x1f + }; + + enum { + COMP_1_LAST_TUPLE=0x8000, + COMP_1_TRIPLE=1, + COMP_1_TRAIL_LIMIT=0x3400, + COMP_1_TRAIL_MASK=0x7ffe, + COMP_1_TRAIL_SHIFT=9, // 10-1 for the "triple" bit + COMP_2_TRAIL_SHIFT=6, + COMP_2_TRAIL_MASK=0xffc0 + }; + + // higher-level functionality ------------------------------------------ *** + + // NFD without an NFD Normalizer2 instance. + UnicodeString &decompose(const UnicodeString &src, UnicodeString &dest, + UErrorCode &errorCode) const; + /** + * Decomposes [src, limit[ and writes the result to dest. + * limit can be NULL if src is NUL-terminated. + * destLengthEstimate is the initial dest buffer capacity and can be -1. + */ + void decompose(const UChar *src, const UChar *limit, + UnicodeString &dest, int32_t destLengthEstimate, + UErrorCode &errorCode) const; + + const UChar *decompose(const UChar *src, const UChar *limit, + ReorderingBuffer *buffer, UErrorCode &errorCode) const; + void decomposeAndAppend(const UChar *src, const UChar *limit, + UBool doDecompose, + UnicodeString &safeMiddle, + ReorderingBuffer &buffer, + UErrorCode &errorCode) const; + UBool compose(const UChar *src, const UChar *limit, + UBool onlyContiguous, + UBool doCompose, + ReorderingBuffer &buffer, + UErrorCode &errorCode) const; + const UChar *composeQuickCheck(const UChar *src, const UChar *limit, + UBool onlyContiguous, + UNormalizationCheckResult *pQCResult) const; + void composeAndAppend(const UChar *src, const UChar *limit, + UBool doCompose, + UBool onlyContiguous, + UnicodeString &safeMiddle, + ReorderingBuffer &buffer, + UErrorCode &errorCode) const; + + /** sink==nullptr: isNormalized() */ + UBool composeUTF8(uint32_t options, UBool onlyContiguous, + const uint8_t *src, const uint8_t *limit, + ByteSink *sink, icu::Edits *edits, UErrorCode &errorCode) const; + + const UChar *makeFCD(const UChar *src, const UChar *limit, + ReorderingBuffer *buffer, UErrorCode &errorCode) const; + void makeFCDAndAppend(const UChar *src, const UChar *limit, + UBool doMakeFCD, + UnicodeString &safeMiddle, + ReorderingBuffer &buffer, + UErrorCode &errorCode) const; + + UBool hasDecompBoundaryBefore(UChar32 c) const; + UBool norm16HasDecompBoundaryBefore(uint16_t norm16) const; + UBool hasDecompBoundaryAfter(UChar32 c) const; + UBool norm16HasDecompBoundaryAfter(uint16_t norm16) const; + UBool isDecompInert(UChar32 c) const { return isDecompYesAndZeroCC(getNorm16(c)); } + + UBool hasCompBoundaryBefore(UChar32 c) const { + return c<minCompNoMaybeCP || norm16HasCompBoundaryBefore(getNorm16(c)); + } + UBool hasCompBoundaryAfter(UChar32 c, UBool onlyContiguous) const { + return norm16HasCompBoundaryAfter(getNorm16(c), onlyContiguous); + } + UBool isCompInert(UChar32 c, UBool onlyContiguous) const { + uint16_t norm16=getNorm16(c); + return isCompYesAndZeroCC(norm16) && + (norm16 & HAS_COMP_BOUNDARY_AFTER) != 0 && + (!onlyContiguous || isInert(norm16) || *getMapping(norm16) <= 0x1ff); + } + + UBool hasFCDBoundaryBefore(UChar32 c) const { return hasDecompBoundaryBefore(c); } + UBool hasFCDBoundaryAfter(UChar32 c) const { return hasDecompBoundaryAfter(c); } + UBool isFCDInert(UChar32 c) const { return getFCD16(c)<=1; } +private: + friend class InitCanonIterData; + friend class LcccContext; + + UBool isMaybe(uint16_t norm16) const { return minMaybeYes<=norm16 && norm16<=JAMO_VT; } + UBool isMaybeOrNonZeroCC(uint16_t norm16) const { return norm16>=minMaybeYes; } + static UBool isInert(uint16_t norm16) { return norm16==INERT; } + static UBool isJamoL(uint16_t norm16) { return norm16==JAMO_L; } + static UBool isJamoVT(uint16_t norm16) { return norm16==JAMO_VT; } + uint16_t hangulLVT() const { return minYesNoMappingsOnly|HAS_COMP_BOUNDARY_AFTER; } + UBool isHangulLV(uint16_t norm16) const { return norm16==minYesNo; } + UBool isHangulLVT(uint16_t norm16) const { + return norm16==hangulLVT(); + } + UBool isCompYesAndZeroCC(uint16_t norm16) const { return norm16<minNoNo; } + // UBool isCompYes(uint16_t norm16) const { + // return norm16>=MIN_YES_YES_WITH_CC || norm16<minNoNo; + // } + // UBool isCompYesOrMaybe(uint16_t norm16) const { + // return norm16<minNoNo || minMaybeYes<=norm16; + // } + // UBool hasZeroCCFromDecompYes(uint16_t norm16) const { + // return norm16<=MIN_NORMAL_MAYBE_YES || norm16==JAMO_VT; + // } + UBool isDecompYesAndZeroCC(uint16_t norm16) const { + return norm16<minYesNo || + norm16==JAMO_VT || + (minMaybeYes<=norm16 && norm16<=MIN_NORMAL_MAYBE_YES); + } + /** + * A little faster and simpler than isDecompYesAndZeroCC() but does not include + * the MaybeYes which combine-forward and have ccc=0. + * (Standard Unicode 10 normalization does not have such characters.) + */ + UBool isMostDecompYesAndZeroCC(uint16_t norm16) const { + return norm16<minYesNo || norm16==MIN_NORMAL_MAYBE_YES || norm16==JAMO_VT; + } + UBool isDecompNoAlgorithmic(uint16_t norm16) const { return norm16>=limitNoNo; } + + // For use with isCompYes(). + // Perhaps the compiler can combine the two tests for MIN_YES_YES_WITH_CC. + // static uint8_t getCCFromYes(uint16_t norm16) { + // return norm16>=MIN_YES_YES_WITH_CC ? getCCFromNormalYesOrMaybe(norm16) : 0; + // } + uint8_t getCCFromNoNo(uint16_t norm16) const { + const uint16_t *mapping=getMapping(norm16); + if(*mapping&MAPPING_HAS_CCC_LCCC_WORD) { + return (uint8_t)*(mapping-1); + } else { + return 0; + } + } + // requires that the [cpStart..cpLimit[ character passes isCompYesAndZeroCC() + uint8_t getTrailCCFromCompYesAndZeroCC(uint16_t norm16) const { + if(norm16<=minYesNo) { + return 0; // yesYes and Hangul LV have ccc=tccc=0 + } else { + // For Hangul LVT we harmlessly fetch a firstUnit with tccc=0 here. + return (uint8_t)(*getMapping(norm16)>>8); // tccc from yesNo + } + } + uint8_t getPreviousTrailCC(const UChar *start, const UChar *p) const; + uint8_t getPreviousTrailCC(const uint8_t *start, const uint8_t *p) const; + + // Requires algorithmic-NoNo. + UChar32 mapAlgorithmic(UChar32 c, uint16_t norm16) const { + return c+(norm16>>DELTA_SHIFT)-centerNoNoDelta; + } + UChar32 getAlgorithmicDelta(uint16_t norm16) const { + return (norm16>>DELTA_SHIFT)-centerNoNoDelta; + } + + // Requires minYesNo<norm16<limitNoNo. + const uint16_t *getMapping(uint16_t norm16) const { return extraData+(norm16>>OFFSET_SHIFT); } + const uint16_t *getCompositionsListForDecompYes(uint16_t norm16) const { + if(norm16<JAMO_L || MIN_NORMAL_MAYBE_YES<=norm16) { + return NULL; + } else if(norm16<minMaybeYes) { + return getMapping(norm16); // for yesYes; if Jamo L: harmless empty list + } else { + return maybeYesCompositions+norm16-minMaybeYes; + } + } + const uint16_t *getCompositionsListForComposite(uint16_t norm16) const { + // A composite has both mapping & compositions list. + const uint16_t *list=getMapping(norm16); + return list+ // mapping pointer + 1+ // +1 to skip the first unit with the mapping length + (*list&MAPPING_LENGTH_MASK); // + mapping length + } + const uint16_t *getCompositionsListForMaybe(uint16_t norm16) const { + // minMaybeYes<=norm16<MIN_NORMAL_MAYBE_YES + return maybeYesCompositions+((norm16-minMaybeYes)>>OFFSET_SHIFT); + } + /** + * @param c code point must have compositions + * @return compositions list pointer + */ + const uint16_t *getCompositionsList(uint16_t norm16) const { + return isDecompYes(norm16) ? + getCompositionsListForDecompYes(norm16) : + getCompositionsListForComposite(norm16); + } + + const UChar *copyLowPrefixFromNulTerminated(const UChar *src, + UChar32 minNeedDataCP, + ReorderingBuffer *buffer, + UErrorCode &errorCode) const; + const UChar *decomposeShort(const UChar *src, const UChar *limit, + UBool stopAtCompBoundary, UBool onlyContiguous, + ReorderingBuffer &buffer, UErrorCode &errorCode) const; + UBool decompose(UChar32 c, uint16_t norm16, + ReorderingBuffer &buffer, UErrorCode &errorCode) const; + + const uint8_t *decomposeShort(const uint8_t *src, const uint8_t *limit, + UBool stopAtCompBoundary, UBool onlyContiguous, + ReorderingBuffer &buffer, UErrorCode &errorCode) const; + + static int32_t combine(const uint16_t *list, UChar32 trail); + void addComposites(const uint16_t *list, UnicodeSet &set) const; + void recompose(ReorderingBuffer &buffer, int32_t recomposeStartIndex, + UBool onlyContiguous) const; + + UBool hasCompBoundaryBefore(UChar32 c, uint16_t norm16) const { + return c<minCompNoMaybeCP || norm16HasCompBoundaryBefore(norm16); + } + UBool norm16HasCompBoundaryBefore(uint16_t norm16) const { + return norm16 < minNoNoCompNoMaybeCC || isAlgorithmicNoNo(norm16); + } + UBool hasCompBoundaryBefore(const UChar *src, const UChar *limit) const; + UBool hasCompBoundaryBefore(const uint8_t *src, const uint8_t *limit) const; + UBool hasCompBoundaryAfter(const UChar *start, const UChar *p, + UBool onlyContiguous) const; + UBool hasCompBoundaryAfter(const uint8_t *start, const uint8_t *p, + UBool onlyContiguous) const; + UBool norm16HasCompBoundaryAfter(uint16_t norm16, UBool onlyContiguous) const { + return (norm16 & HAS_COMP_BOUNDARY_AFTER) != 0 && + (!onlyContiguous || isTrailCC01ForCompBoundaryAfter(norm16)); + } + /** For FCC: Given norm16 HAS_COMP_BOUNDARY_AFTER, does it have tccc<=1? */ + UBool isTrailCC01ForCompBoundaryAfter(uint16_t norm16) const { + return isInert(norm16) || (isDecompNoAlgorithmic(norm16) ? + (norm16 & DELTA_TCCC_MASK) <= DELTA_TCCC_1 : *getMapping(norm16) <= 0x1ff); + } + + const UChar *findPreviousCompBoundary(const UChar *start, const UChar *p, UBool onlyContiguous) const; + const UChar *findNextCompBoundary(const UChar *p, const UChar *limit, UBool onlyContiguous) const; + + const UChar *findPreviousFCDBoundary(const UChar *start, const UChar *p) const; + const UChar *findNextFCDBoundary(const UChar *p, const UChar *limit) const; + + void makeCanonIterDataFromNorm16(UChar32 start, UChar32 end, const uint16_t norm16, + CanonIterData &newData, UErrorCode &errorCode) const; + + int32_t getCanonValue(UChar32 c) const; + const UnicodeSet &getCanonStartSet(int32_t n) const; + + // UVersionInfo dataVersion; + + // BMP code point thresholds for quick check loops looking at single UTF-16 code units. + UChar minDecompNoCP; + UChar minCompNoMaybeCP; + UChar minLcccCP; + + // Norm16 value thresholds for quick check combinations and types of extra data. + uint16_t minYesNo; + uint16_t minYesNoMappingsOnly; + uint16_t minNoNo; + uint16_t minNoNoCompBoundaryBefore; + uint16_t minNoNoCompNoMaybeCC; + uint16_t minNoNoEmpty; + uint16_t limitNoNo; + uint16_t centerNoNoDelta; + uint16_t minMaybeYes; + + const UCPTrie *normTrie; + const uint16_t *maybeYesCompositions; + const uint16_t *extraData; // mappings and/or compositions for yesYes, yesNo & noNo characters + const uint8_t *smallFCD; // [0x100] one bit per 32 BMP code points, set if any FCD!=0 + + UInitOnce fCanonIterDataInitOnce = U_INITONCE_INITIALIZER; + CanonIterData *fCanonIterData; +}; + +// bits in canonIterData +#define CANON_NOT_SEGMENT_STARTER 0x80000000 +#define CANON_HAS_COMPOSITIONS 0x40000000 +#define CANON_HAS_SET 0x200000 +#define CANON_VALUE_MASK 0x1fffff + +/** + * ICU-internal shortcut for quick access to standard Unicode normalization. + */ +class U_COMMON_API Normalizer2Factory { +public: + static const Normalizer2 *getFCDInstance(UErrorCode &errorCode); + static const Normalizer2 *getFCCInstance(UErrorCode &errorCode); + static const Normalizer2 *getNoopInstance(UErrorCode &errorCode); + + static const Normalizer2 *getInstance(UNormalizationMode mode, UErrorCode &errorCode); + + static const Normalizer2Impl *getNFCImpl(UErrorCode &errorCode); + static const Normalizer2Impl *getNFKCImpl(UErrorCode &errorCode); + static const Normalizer2Impl *getNFKC_CFImpl(UErrorCode &errorCode); + + // Get the Impl instance of the Normalizer2. + // Must be used only when it is known that norm2 is a Normalizer2WithImpl instance. + static const Normalizer2Impl *getImpl(const Normalizer2 *norm2); +private: + Normalizer2Factory(); // No instantiation. +}; + +U_NAMESPACE_END + +U_CAPI int32_t U_EXPORT2 +unorm2_swap(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode); + +/** + * Get the NF*_QC property for a code point, for u_getIntPropertyValue(). + * @internal + */ +U_CFUNC UNormalizationCheckResult +unorm_getQuickCheck(UChar32 c, UNormalizationMode mode); + +/** + * Gets the 16-bit FCD value (lead & trail CCs) for a code point, for u_getIntPropertyValue(). + * @internal + */ +U_CFUNC uint16_t +unorm_getFCD16(UChar32 c); + +/** + * Format of Normalizer2 .nrm data files. + * Format version 4.0. + * + * Normalizer2 .nrm data files provide data for the Unicode Normalization algorithms. + * ICU ships with data files for standard Unicode Normalization Forms + * NFC and NFD (nfc.nrm), NFKC and NFKD (nfkc.nrm) and NFKC_Casefold (nfkc_cf.nrm). + * Custom (application-specific) data can be built into additional .nrm files + * with the gennorm2 build tool. + * ICU ships with one such file, uts46.nrm, for the implementation of UTS #46. + * + * Normalizer2.getInstance() causes a .nrm file to be loaded, unless it has been + * cached already. Internally, Normalizer2Impl.load() reads the .nrm file. + * + * A .nrm file begins with a standard ICU data file header + * (DataHeader, see ucmndata.h and unicode/udata.h). + * The UDataInfo.dataVersion field usually contains the Unicode version + * for which the data was generated. + * + * After the header, the file contains the following parts. + * Constants are defined as enum values of the Normalizer2Impl class. + * + * Many details of the data structures are described in the design doc + * which is at http://site.icu-project.org/design/normalization/custom + * + * int32_t indexes[indexesLength]; -- indexesLength=indexes[IX_NORM_TRIE_OFFSET]/4; + * + * The first eight indexes are byte offsets in ascending order. + * Each byte offset marks the start of the next part in the data file, + * and the end of the previous one. + * When two consecutive byte offsets are the same, then the corresponding part is empty. + * Byte offsets are offsets from after the header, + * that is, from the beginning of the indexes[]. + * Each part starts at an offset with proper alignment for its data. + * If necessary, the previous part may include padding bytes to achieve this alignment. + * + * minDecompNoCP=indexes[IX_MIN_DECOMP_NO_CP] is the lowest code point + * with a decomposition mapping, that is, with NF*D_QC=No. + * minCompNoMaybeCP=indexes[IX_MIN_COMP_NO_MAYBE_CP] is the lowest code point + * with NF*C_QC=No (has a one-way mapping) or Maybe (combines backward). + * minLcccCP=indexes[IX_MIN_LCCC_CP] (index 18, new in formatVersion 3) + * is the lowest code point with lccc!=0. + * + * The next eight indexes are thresholds of 16-bit trie values for ranges of + * values indicating multiple normalization properties. + * They are listed here in threshold order, not in the order they are stored in the indexes. + * minYesNo=indexes[IX_MIN_YES_NO]; + * minYesNoMappingsOnly=indexes[IX_MIN_YES_NO_MAPPINGS_ONLY]; + * minNoNo=indexes[IX_MIN_NO_NO]; + * minNoNoCompBoundaryBefore=indexes[IX_MIN_NO_NO_COMP_BOUNDARY_BEFORE]; + * minNoNoCompNoMaybeCC=indexes[IX_MIN_NO_NO_COMP_NO_MAYBE_CC]; + * minNoNoEmpty=indexes[IX_MIN_NO_NO_EMPTY]; + * limitNoNo=indexes[IX_LIMIT_NO_NO]; + * minMaybeYes=indexes[IX_MIN_MAYBE_YES]; + * See the normTrie description below and the design doc for details. + * + * UCPTrie normTrie; -- see ucptrie_impl.h and ucptrie.h, same as Java CodePointTrie + * + * The trie holds the main normalization data. Each code point is mapped to a 16-bit value. + * Rather than using independent bits in the value (which would require more than 16 bits), + * information is extracted primarily via range checks. + * Except, format version 3 uses bit 0 for hasCompBoundaryAfter(). + * For example, a 16-bit value norm16 in the range minYesNo<=norm16<minNoNo + * means that the character has NF*C_QC=Yes and NF*D_QC=No properties, + * which means it has a two-way (round-trip) decomposition mapping. + * Values in the range 2<=norm16<limitNoNo are also directly indexes into the extraData + * pointing to mappings, compositions lists, or both. + * Value norm16==INERT (0 in versions 1 & 2, 1 in version 3) + * means that the character is normalization-inert, that is, + * it does not have a mapping, does not participate in composition, has a zero + * canonical combining class, and forms a boundary where text before it and after it + * can be normalized independently. + * For details about how multiple properties are encoded in 16-bit values + * see the design doc. + * Note that the encoding cannot express all combinations of the properties involved; + * it only supports those combinations that are allowed by + * the Unicode Normalization algorithms. Details are in the design doc as well. + * The gennorm2 tool only builds .nrm files for data that conforms to the limitations. + * + * The trie has a value for each lead surrogate code unit representing the "worst case" + * properties of the 1024 supplementary characters whose UTF-16 form starts with + * the lead surrogate. If all of the 1024 supplementary characters are normalization-inert, + * then their lead surrogate code unit has the trie value INERT. + * When the lead surrogate unit's value exceeds the quick check minimum during processing, + * the properties for the full supplementary code point need to be looked up. + * + * uint16_t maybeYesCompositions[MIN_NORMAL_MAYBE_YES-minMaybeYes]; + * uint16_t extraData[]; + * + * There is only one byte offset for the end of these two arrays. + * The split between them is given by the constant and variable mentioned above. + * In version 3, the difference must be shifted right by OFFSET_SHIFT. + * + * The maybeYesCompositions array contains compositions lists for characters that + * combine both forward (as starters in composition pairs) + * and backward (as trailing characters in composition pairs). + * Such characters do not occur in Unicode 5.2 but are allowed by + * the Unicode Normalization algorithms. + * If there are no such characters, then minMaybeYes==MIN_NORMAL_MAYBE_YES + * and the maybeYesCompositions array is empty. + * If there are such characters, then minMaybeYes is subtracted from their norm16 values + * to get the index into this array. + * + * The extraData array contains compositions lists for "YesYes" characters, + * followed by mappings and optional compositions lists for "YesNo" characters, + * followed by only mappings for "NoNo" characters. + * (Referring to pairs of NFC/NFD quick check values.) + * The norm16 values of those characters are directly indexes into the extraData array. + * In version 3, the norm16 values must be shifted right by OFFSET_SHIFT + * for accessing extraData. + * + * The data structures for compositions lists and mappings are described in the design doc. + * + * uint8_t smallFCD[0x100]; -- new in format version 2 + * + * This is a bit set to help speed up FCD value lookups in the absence of a full + * UTrie2 or other large data structure with the full FCD value mapping. + * + * Each smallFCD bit is set if any of the corresponding 32 BMP code points + * has a non-zero FCD value (lccc!=0 or tccc!=0). + * Bit 0 of smallFCD[0] is for U+0000..U+001F. Bit 7 of smallFCD[0xff] is for U+FFE0..U+FFFF. + * A bit for 32 lead surrogates is set if any of the 32k corresponding + * _supplementary_ code points has a non-zero FCD value. + * + * This bit set is most useful for the large blocks of CJK characters with FCD=0. + * + * Changes from format version 1 to format version 2 --------------------------- + * + * - Addition of data for raw (not recursively decomposed) mappings. + * + The MAPPING_NO_COMP_BOUNDARY_AFTER bit in the extraData is now also set when + * the mapping is to an empty string or when the character combines-forward. + * This subsumes the one actual use of the MAPPING_PLUS_COMPOSITION_LIST bit which + * is then repurposed for the MAPPING_HAS_RAW_MAPPING bit. + * + For details see the design doc. + * - Addition of indexes[IX_MIN_YES_NO_MAPPINGS_ONLY] and separation of the yesNo extraData into + * distinct ranges (combines-forward vs. not) + * so that a range check can be used to find out if there is a compositions list. + * This is fully equivalent with formatVersion 1's MAPPING_PLUS_COMPOSITION_LIST flag. + * It is needed for the new (in ICU 49) composePair(), not for other normalization. + * - Addition of the smallFCD[] bit set. + * + * Changes from format version 2 to format version 3 (ICU 60) ------------------ + * + * - norm16 bit 0 indicates hasCompBoundaryAfter(), + * except that for contiguous composition (FCC) the tccc must be checked as well. + * Data indexes and ccc values are shifted left by one (OFFSET_SHIFT). + * Thresholds like minNoNo are tested before shifting. + * + * - Algorithmic mapping deltas are shifted left by two more bits (total DELTA_SHIFT), + * to make room for two bits (three values) indicating whether the tccc is 0, 1, or greater. + * See DELTA_TCCC_MASK etc. + * This helps with fetching tccc/FCD values and FCC hasCompBoundaryAfter(). + * minMaybeYes is 8-aligned so that the DELTA_TCCC_MASK bits can be tested directly. + * + * - Algorithmic mappings are only used for mapping to "comp yes and ccc=0" characters, + * and ASCII characters are mapped algorithmically only to other ASCII characters. + * This helps with hasCompBoundaryBefore() and compose() fast paths. + * It is never necessary any more to loop for algorithmic mappings. + * + * - Addition of indexes[IX_MIN_NO_NO_COMP_BOUNDARY_BEFORE], + * indexes[IX_MIN_NO_NO_COMP_NO_MAYBE_CC], and indexes[IX_MIN_NO_NO_EMPTY], + * and separation of the noNo extraData into distinct ranges. + * With this, the noNo norm16 value indicates whether the mapping is + * compose-normalized, not normalized but hasCompBoundaryBefore(), + * not even that, or maps to an empty string. + * hasCompBoundaryBefore() can be determined solely from the norm16 value. + * + * - The norm16 value for Hangul LVT is now different from that for Hangul LV, + * so that hasCompBoundaryAfter() need not check for the syllable type. + * For Hangul LV, minYesNo continues to be used (no comp-boundary-after). + * For Hangul LVT, minYesNoMappingsOnly|HAS_COMP_BOUNDARY_AFTER is used. + * The extraData units at these indexes are set to firstUnit=2 and firstUnit=3, respectively, + * to simplify some code. + * + * - The extraData firstUnit bit 5 is no longer necessary + * (norm16 bit 0 used instead of firstUnit MAPPING_NO_COMP_BOUNDARY_AFTER), + * is reserved again, and always set to 0. + * + * - Addition of indexes[IX_MIN_LCCC_CP], the first code point where lccc!=0. + * This used to be hardcoded to U+0300, but in data like NFKC_Casefold it is lower: + * U+00AD Soft Hyphen maps to an empty string, + * which is artificially assigned "worst case" values lccc=1 and tccc=255. + * + * - A mapping to an empty string has explicit lccc=1 and tccc=255 values. + * + * Changes from format version 3 to format version 4 (ICU 63) ------------------ + * + * Switched from UTrie2 to UCPTrie/CodePointTrie. + * + * The new trie no longer stores different values for surrogate code *units* vs. + * surrogate code *points*. + * Lead surrogates still have values for optimized UTF-16 string processing. + * When looking up code point properties, the code now checks for lead surrogates and + * treats them as inert. + * + * gennorm2 now has to reject mappings for surrogate code points. + * UTS #46 maps unpaired surrogates to U+FFFD in code rather than via its + * custom normalization data file. + */ + +#endif /* !UCONFIG_NO_NORMALIZATION */ +#endif /* __NORMALIZER2IMPL_H__ */ diff --git a/thirdparty/icu4c/common/normlzr.cpp b/thirdparty/icu4c/common/normlzr.cpp new file mode 100644 index 0000000000..2dea0ffc33 --- /dev/null +++ b/thirdparty/icu4c/common/normlzr.cpp @@ -0,0 +1,529 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* + ************************************************************************* + * COPYRIGHT: + * Copyright (c) 1996-2012, International Business Machines Corporation and + * others. All Rights Reserved. + ************************************************************************* + */ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_NORMALIZATION + +#include "unicode/uniset.h" +#include "unicode/unistr.h" +#include "unicode/chariter.h" +#include "unicode/schriter.h" +#include "unicode/uchriter.h" +#include "unicode/normlzr.h" +#include "unicode/utf16.h" +#include "cmemory.h" +#include "normalizer2impl.h" +#include "uprops.h" // for uniset_getUnicode32Instance() + +#if defined(move32) + // System can define move32 intrinsics, but the char iters define move32 method + // using same undef trick in headers, so undef here to re-enable the method. +#undef move32 +#endif + +U_NAMESPACE_BEGIN + +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(Normalizer) + +//------------------------------------------------------------------------- +// Constructors and other boilerplate +//------------------------------------------------------------------------- + +Normalizer::Normalizer(const UnicodeString& str, UNormalizationMode mode) : + UObject(), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(mode), fOptions(0), + text(new StringCharacterIterator(str)), + currentIndex(0), nextIndex(0), + buffer(), bufferPos(0) +{ + init(); +} + +Normalizer::Normalizer(ConstChar16Ptr str, int32_t length, UNormalizationMode mode) : + UObject(), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(mode), fOptions(0), + text(new UCharCharacterIterator(str, length)), + currentIndex(0), nextIndex(0), + buffer(), bufferPos(0) +{ + init(); +} + +Normalizer::Normalizer(const CharacterIterator& iter, UNormalizationMode mode) : + UObject(), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(mode), fOptions(0), + text(iter.clone()), + currentIndex(0), nextIndex(0), + buffer(), bufferPos(0) +{ + init(); +} + +Normalizer::Normalizer(const Normalizer ©) : + UObject(copy), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(copy.fUMode), fOptions(copy.fOptions), + text(copy.text->clone()), + currentIndex(copy.currentIndex), nextIndex(copy.nextIndex), + buffer(copy.buffer), bufferPos(copy.bufferPos) +{ + init(); +} + +void +Normalizer::init() { + UErrorCode errorCode=U_ZERO_ERROR; + fNorm2=Normalizer2Factory::getInstance(fUMode, errorCode); + if(fOptions&UNORM_UNICODE_3_2) { + delete fFilteredNorm2; + fNorm2=fFilteredNorm2= + new FilteredNormalizer2(*fNorm2, *uniset_getUnicode32Instance(errorCode)); + } + if(U_FAILURE(errorCode)) { + errorCode=U_ZERO_ERROR; + fNorm2=Normalizer2Factory::getNoopInstance(errorCode); + } +} + +Normalizer::~Normalizer() +{ + delete fFilteredNorm2; + delete text; +} + +Normalizer* +Normalizer::clone() const +{ + return new Normalizer(*this); +} + +/** + * Generates a hash code for this iterator. + */ +int32_t Normalizer::hashCode() const +{ + return text->hashCode() + fUMode + fOptions + buffer.hashCode() + bufferPos + currentIndex + nextIndex; +} + +UBool Normalizer::operator==(const Normalizer& that) const +{ + return + this==&that || + (fUMode==that.fUMode && + fOptions==that.fOptions && + *text==*that.text && + buffer==that.buffer && + bufferPos==that.bufferPos && + nextIndex==that.nextIndex); +} + +//------------------------------------------------------------------------- +// Static utility methods +//------------------------------------------------------------------------- + +void U_EXPORT2 +Normalizer::normalize(const UnicodeString& source, + UNormalizationMode mode, int32_t options, + UnicodeString& result, + UErrorCode &status) { + if(source.isBogus() || U_FAILURE(status)) { + result.setToBogus(); + if(U_SUCCESS(status)) { + status=U_ILLEGAL_ARGUMENT_ERROR; + } + } else { + UnicodeString localDest; + UnicodeString *dest; + + if(&source!=&result) { + dest=&result; + } else { + // the source and result strings are the same object, use a temporary one + dest=&localDest; + } + const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status); + if(U_SUCCESS(status)) { + if(options&UNORM_UNICODE_3_2) { + FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(status)). + normalize(source, *dest, status); + } else { + n2->normalize(source, *dest, status); + } + } + if(dest==&localDest && U_SUCCESS(status)) { + result=*dest; + } + } +} + +void U_EXPORT2 +Normalizer::compose(const UnicodeString& source, + UBool compat, int32_t options, + UnicodeString& result, + UErrorCode &status) { + normalize(source, compat ? UNORM_NFKC : UNORM_NFC, options, result, status); +} + +void U_EXPORT2 +Normalizer::decompose(const UnicodeString& source, + UBool compat, int32_t options, + UnicodeString& result, + UErrorCode &status) { + normalize(source, compat ? UNORM_NFKD : UNORM_NFD, options, result, status); +} + +UNormalizationCheckResult +Normalizer::quickCheck(const UnicodeString& source, + UNormalizationMode mode, int32_t options, + UErrorCode &status) { + const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status); + if(U_SUCCESS(status)) { + if(options&UNORM_UNICODE_3_2) { + return FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(status)). + quickCheck(source, status); + } else { + return n2->quickCheck(source, status); + } + } else { + return UNORM_MAYBE; + } +} + +UBool +Normalizer::isNormalized(const UnicodeString& source, + UNormalizationMode mode, int32_t options, + UErrorCode &status) { + const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status); + if(U_SUCCESS(status)) { + if(options&UNORM_UNICODE_3_2) { + return FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(status)). + isNormalized(source, status); + } else { + return n2->isNormalized(source, status); + } + } else { + return FALSE; + } +} + +UnicodeString & U_EXPORT2 +Normalizer::concatenate(const UnicodeString &left, const UnicodeString &right, + UnicodeString &result, + UNormalizationMode mode, int32_t options, + UErrorCode &errorCode) { + if(left.isBogus() || right.isBogus() || U_FAILURE(errorCode)) { + result.setToBogus(); + if(U_SUCCESS(errorCode)) { + errorCode=U_ILLEGAL_ARGUMENT_ERROR; + } + } else { + UnicodeString localDest; + UnicodeString *dest; + + if(&right!=&result) { + dest=&result; + } else { + // the right and result strings are the same object, use a temporary one + dest=&localDest; + } + *dest=left; + const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, errorCode); + if(U_SUCCESS(errorCode)) { + if(options&UNORM_UNICODE_3_2) { + FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(errorCode)). + append(*dest, right, errorCode); + } else { + n2->append(*dest, right, errorCode); + } + } + if(dest==&localDest && U_SUCCESS(errorCode)) { + result=*dest; + } + } + return result; +} + +//------------------------------------------------------------------------- +// Iteration API +//------------------------------------------------------------------------- + +/** + * Return the current character in the normalized text. + */ +UChar32 Normalizer::current() { + if(bufferPos<buffer.length() || nextNormalize()) { + return buffer.char32At(bufferPos); + } else { + return DONE; + } +} + +/** + * Return the next character in the normalized text and advance + * the iteration position by one. If the end + * of the text has already been reached, {@link #DONE} is returned. + */ +UChar32 Normalizer::next() { + if(bufferPos<buffer.length() || nextNormalize()) { + UChar32 c=buffer.char32At(bufferPos); + bufferPos+=U16_LENGTH(c); + return c; + } else { + return DONE; + } +} + +/** + * Return the previous character in the normalized text and decrement + * the iteration position by one. If the beginning + * of the text has already been reached, {@link #DONE} is returned. + */ +UChar32 Normalizer::previous() { + if(bufferPos>0 || previousNormalize()) { + UChar32 c=buffer.char32At(bufferPos-1); + bufferPos-=U16_LENGTH(c); + return c; + } else { + return DONE; + } +} + +void Normalizer::reset() { + currentIndex=nextIndex=text->setToStart(); + clearBuffer(); +} + +void +Normalizer::setIndexOnly(int32_t index) { + text->setIndex(index); // pins index + currentIndex=nextIndex=text->getIndex(); + clearBuffer(); +} + +/** + * Return the first character in the normalized text. This resets + * the <tt>Normalizer's</tt> position to the beginning of the text. + */ +UChar32 Normalizer::first() { + reset(); + return next(); +} + +/** + * Return the last character in the normalized text. This resets + * the <tt>Normalizer's</tt> position to be just before the + * the input text corresponding to that normalized character. + */ +UChar32 Normalizer::last() { + currentIndex=nextIndex=text->setToEnd(); + clearBuffer(); + return previous(); +} + +/** + * Retrieve the current iteration position in the input text that is + * being normalized. This method is useful in applications such as + * searching, where you need to be able to determine the position in + * the input text that corresponds to a given normalized output character. + * <p> + * <b>Note:</b> This method sets the position in the <em>input</em>, while + * {@link #next} and {@link #previous} iterate through characters in the + * <em>output</em>. This means that there is not necessarily a one-to-one + * correspondence between characters returned by <tt>next</tt> and + * <tt>previous</tt> and the indices passed to and returned from + * <tt>setIndex</tt> and {@link #getIndex}. + * + */ +int32_t Normalizer::getIndex() const { + if(bufferPos<buffer.length()) { + return currentIndex; + } else { + return nextIndex; + } +} + +/** + * Retrieve the index of the start of the input text. This is the begin index + * of the <tt>CharacterIterator</tt> or the start (i.e. 0) of the <tt>String</tt> + * over which this <tt>Normalizer</tt> is iterating + */ +int32_t Normalizer::startIndex() const { + return text->startIndex(); +} + +/** + * Retrieve the index of the end of the input text. This is the end index + * of the <tt>CharacterIterator</tt> or the length of the <tt>String</tt> + * over which this <tt>Normalizer</tt> is iterating + */ +int32_t Normalizer::endIndex() const { + return text->endIndex(); +} + +//------------------------------------------------------------------------- +// Property access methods +//------------------------------------------------------------------------- + +void +Normalizer::setMode(UNormalizationMode newMode) +{ + fUMode = newMode; + init(); +} + +UNormalizationMode +Normalizer::getUMode() const +{ + return fUMode; +} + +void +Normalizer::setOption(int32_t option, + UBool value) +{ + if (value) { + fOptions |= option; + } else { + fOptions &= (~option); + } + init(); +} + +UBool +Normalizer::getOption(int32_t option) const +{ + return (fOptions & option) != 0; +} + +/** + * Set the input text over which this <tt>Normalizer</tt> will iterate. + * The iteration position is set to the beginning of the input text. + */ +void +Normalizer::setText(const UnicodeString& newText, + UErrorCode &status) +{ + if (U_FAILURE(status)) { + return; + } + CharacterIterator *newIter = new StringCharacterIterator(newText); + if (newIter == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + delete text; + text = newIter; + reset(); +} + +/** + * Set the input text over which this <tt>Normalizer</tt> will iterate. + * The iteration position is set to the beginning of the string. + */ +void +Normalizer::setText(const CharacterIterator& newText, + UErrorCode &status) +{ + if (U_FAILURE(status)) { + return; + } + CharacterIterator *newIter = newText.clone(); + if (newIter == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + delete text; + text = newIter; + reset(); +} + +void +Normalizer::setText(ConstChar16Ptr newText, + int32_t length, + UErrorCode &status) +{ + if (U_FAILURE(status)) { + return; + } + CharacterIterator *newIter = new UCharCharacterIterator(newText, length); + if (newIter == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + delete text; + text = newIter; + reset(); +} + +/** + * Copies the text under iteration into the UnicodeString referred to by "result". + * @param result Receives a copy of the text under iteration. + */ +void +Normalizer::getText(UnicodeString& result) +{ + text->getText(result); +} + +//------------------------------------------------------------------------- +// Private utility methods +//------------------------------------------------------------------------- + +void Normalizer::clearBuffer() { + buffer.remove(); + bufferPos=0; +} + +UBool +Normalizer::nextNormalize() { + clearBuffer(); + currentIndex=nextIndex; + text->setIndex(nextIndex); + if(!text->hasNext()) { + return FALSE; + } + // Skip at least one character so we make progress. + UnicodeString segment(text->next32PostInc()); + while(text->hasNext()) { + UChar32 c; + if(fNorm2->hasBoundaryBefore(c=text->next32PostInc())) { + text->move32(-1, CharacterIterator::kCurrent); + break; + } + segment.append(c); + } + nextIndex=text->getIndex(); + UErrorCode errorCode=U_ZERO_ERROR; + fNorm2->normalize(segment, buffer, errorCode); + return U_SUCCESS(errorCode) && !buffer.isEmpty(); +} + +UBool +Normalizer::previousNormalize() { + clearBuffer(); + nextIndex=currentIndex; + text->setIndex(currentIndex); + if(!text->hasPrevious()) { + return FALSE; + } + UnicodeString segment; + while(text->hasPrevious()) { + UChar32 c=text->previous32(); + segment.insert(0, c); + if(fNorm2->hasBoundaryBefore(c)) { + break; + } + } + currentIndex=text->getIndex(); + UErrorCode errorCode=U_ZERO_ERROR; + fNorm2->normalize(segment, buffer, errorCode); + bufferPos=buffer.length(); + return U_SUCCESS(errorCode) && !buffer.isEmpty(); +} + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_NORMALIZATION */ diff --git a/thirdparty/icu4c/common/parsepos.cpp b/thirdparty/icu4c/common/parsepos.cpp new file mode 100644 index 0000000000..56c6c78813 --- /dev/null +++ b/thirdparty/icu4c/common/parsepos.cpp @@ -0,0 +1,23 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 2003-2003, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +*/ + +#include "unicode/parsepos.h" + +U_NAMESPACE_BEGIN + +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ParsePosition) + +ParsePosition::~ParsePosition() {} + +ParsePosition * +ParsePosition::clone() const { + return new ParsePosition(*this); +} + +U_NAMESPACE_END diff --git a/thirdparty/icu4c/common/patternprops.cpp b/thirdparty/icu4c/common/patternprops.cpp new file mode 100644 index 0000000000..c38a7e276d --- /dev/null +++ b/thirdparty/icu4c/common/patternprops.cpp @@ -0,0 +1,230 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2011, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* file name: patternprops.cpp +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2011mar13 +* created by: Markus W. Scherer +*/ + +#include "unicode/utypes.h" +#include "patternprops.h" + +U_NAMESPACE_BEGIN + +/* + * One byte per Latin-1 character. + * Bit 0 is set if either Pattern property is true, + * bit 1 if Pattern_Syntax is true, + * bit 2 if Pattern_White_Space is true. + * That is, Pattern_Syntax is encoded as 3 and Pattern_White_Space as 5. + */ +static const uint8_t latin1[256]={ + // WS: 9..D + 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // WS: 20 Syntax: 21..2F + 5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + // Syntax: 3A..40 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, + 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // Syntax: 5B..5E + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 0, + // Syntax: 60 + 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // Syntax: 7B..7E + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 0, + // WS: 85 + 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // Syntax: A1..A7, A9, AB, AC, AE + 0, 3, 3, 3, 3, 3, 3, 3, 0, 3, 0, 3, 3, 0, 3, 0, + // Syntax: B0, B1, B6, BB, BF + 3, 3, 0, 0, 0, 0, 3, 0, 0, 0, 0, 3, 0, 0, 0, 3, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // Syntax: D7 + 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // Syntax: F7 + 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +/* + * One byte per 32 characters from U+2000..U+303F indexing into + * a small table of 32-bit data words. + * The first two data words are all-zeros and all-ones. + */ +static const uint8_t index2000[130]={ + 2, 3, 4, 0, 0, 0, 0, 0, // 20xx + 0, 0, 0, 0, 5, 1, 1, 1, // 21xx + 1, 1, 1, 1, 1, 1, 1, 1, // 22xx + 1, 1, 1, 1, 1, 1, 1, 1, // 23xx + 1, 1, 1, 0, 0, 0, 0, 0, // 24xx + 1, 1, 1, 1, 1, 1, 1, 1, // 25xx + 1, 1, 1, 1, 1, 1, 1, 1, // 26xx + 1, 1, 1, 6, 7, 1, 1, 1, // 27xx + 1, 1, 1, 1, 1, 1, 1, 1, // 28xx + 1, 1, 1, 1, 1, 1, 1, 1, // 29xx + 1, 1, 1, 1, 1, 1, 1, 1, // 2Axx + 1, 1, 1, 1, 1, 1, 1, 1, // 2Bxx + 0, 0, 0, 0, 0, 0, 0, 0, // 2Cxx + 0, 0, 0, 0, 0, 0, 0, 0, // 2Dxx + 1, 1, 1, 1, 0, 0, 0, 0, // 2Exx + 0, 0, 0, 0, 0, 0, 0, 0, // 2Fxx + 8, 9 // 3000..303F +}; + +/* + * One 32-bit integer per 32 characters. Ranges of all-false and all-true + * are mapped to the first two values, other ranges map to appropriate bit patterns. + */ +static const uint32_t syntax2000[]={ + 0, + 0xffffffff, + 0xffff0000, // 2: 2010..201F + 0x7fff00ff, // 3: 2020..2027, 2030..203E + 0x7feffffe, // 4: 2041..2053, 2055..205E + 0xffff0000, // 5: 2190..219F + 0x003fffff, // 6: 2760..2775 + 0xfff00000, // 7: 2794..279F + 0xffffff0e, // 8: 3001..3003, 3008..301F + 0x00010001 // 9: 3020, 3030 +}; + +/* + * Same as syntax2000, but with additional bits set for the + * Pattern_White_Space characters 200E 200F 2028 2029. + */ +static const uint32_t syntaxOrWhiteSpace2000[]={ + 0, + 0xffffffff, + 0xffffc000, // 2: 200E..201F + 0x7fff03ff, // 3: 2020..2029, 2030..203E + 0x7feffffe, // 4: 2041..2053, 2055..205E + 0xffff0000, // 5: 2190..219F + 0x003fffff, // 6: 2760..2775 + 0xfff00000, // 7: 2794..279F + 0xffffff0e, // 8: 3001..3003, 3008..301F + 0x00010001 // 9: 3020, 3030 +}; + +UBool +PatternProps::isSyntax(UChar32 c) { + if(c<0) { + return FALSE; + } else if(c<=0xff) { + return (UBool)(latin1[c]>>1)&1; + } else if(c<0x2010) { + return FALSE; + } else if(c<=0x3030) { + uint32_t bits=syntax2000[index2000[(c-0x2000)>>5]]; + return (UBool)((bits>>(c&0x1f))&1); + } else if(0xfd3e<=c && c<=0xfe46) { + return c<=0xfd3f || 0xfe45<=c; + } else { + return FALSE; + } +} + +UBool +PatternProps::isSyntaxOrWhiteSpace(UChar32 c) { + if(c<0) { + return FALSE; + } else if(c<=0xff) { + return (UBool)(latin1[c]&1); + } else if(c<0x200e) { + return FALSE; + } else if(c<=0x3030) { + uint32_t bits=syntaxOrWhiteSpace2000[index2000[(c-0x2000)>>5]]; + return (UBool)((bits>>(c&0x1f))&1); + } else if(0xfd3e<=c && c<=0xfe46) { + return c<=0xfd3f || 0xfe45<=c; + } else { + return FALSE; + } +} + +UBool +PatternProps::isWhiteSpace(UChar32 c) { + if(c<0) { + return FALSE; + } else if(c<=0xff) { + return (UBool)(latin1[c]>>2)&1; + } else if(0x200e<=c && c<=0x2029) { + return c<=0x200f || 0x2028<=c; + } else { + return FALSE; + } +} + +const UChar * +PatternProps::skipWhiteSpace(const UChar *s, int32_t length) { + while(length>0 && isWhiteSpace(*s)) { + ++s; + --length; + } + return s; +} + +int32_t +PatternProps::skipWhiteSpace(const UnicodeString& s, int32_t start) { + int32_t i = start; + int32_t length = s.length(); + while(i<length && isWhiteSpace(s.charAt(i))) { + ++i; + } + return i; +} + +const UChar * +PatternProps::trimWhiteSpace(const UChar *s, int32_t &length) { + if(length<=0 || (!isWhiteSpace(s[0]) && !isWhiteSpace(s[length-1]))) { + return s; + } + int32_t start=0; + int32_t limit=length; + while(start<limit && isWhiteSpace(s[start])) { + ++start; + } + if(start<limit) { + // There is non-white space at start; we will not move limit below that, + // so we need not test start<limit in the loop. + while(isWhiteSpace(s[limit-1])) { + --limit; + } + } + length=limit-start; + return s+start; +} + +UBool +PatternProps::isIdentifier(const UChar *s, int32_t length) { + if(length<=0) { + return FALSE; + } + const UChar *limit=s+length; + do { + if(isSyntaxOrWhiteSpace(*s++)) { + return FALSE; + } + } while(s<limit); + return TRUE; +} + +const UChar * +PatternProps::skipIdentifier(const UChar *s, int32_t length) { + while(length>0 && !isSyntaxOrWhiteSpace(*s)) { + ++s; + --length; + } + return s; +} + +U_NAMESPACE_END diff --git a/thirdparty/icu4c/common/patternprops.h b/thirdparty/icu4c/common/patternprops.h new file mode 100644 index 0000000000..95898d580c --- /dev/null +++ b/thirdparty/icu4c/common/patternprops.h @@ -0,0 +1,98 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2011, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* file name: patternprops.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2011mar13 +* created by: Markus W. Scherer +*/ + +#ifndef __PATTERNPROPS_H__ +#define __PATTERNPROPS_H__ + +#include "unicode/unistr.h" +#include "unicode/utypes.h" + +U_NAMESPACE_BEGIN + +/** + * Implements the immutable Unicode properties Pattern_Syntax and Pattern_White_Space. + * Hardcodes these properties, does not load data, does not depend on other ICU classes. + * <p> + * Note: Both properties include ASCII as well as non-ASCII, non-Latin-1 code points, + * and both properties only include BMP code points (no supplementary ones). + * Pattern_Syntax includes some unassigned code points. + * <p> + * [:Pattern_White_Space:] = + * [\u0009-\u000D\ \u0085\u200E\u200F\u2028\u2029] + * <p> + * [:Pattern_Syntax:] = + * [!-/\:-@\[-\^`\{-~\u00A1-\u00A7\u00A9\u00AB\u00AC\u00AE + * \u00B0\u00B1\u00B6\u00BB\u00BF\u00D7\u00F7 + * \u2010-\u2027\u2030-\u203E\u2041-\u2053\u2055-\u205E + * \u2190-\u245F\u2500-\u2775\u2794-\u2BFF\u2E00-\u2E7F + * \u3001-\u3003\u3008-\u3020\u3030\uFD3E\uFD3F\uFE45\uFE46] + * @author mscherer + */ +class U_COMMON_API PatternProps { +public: + /** + * @return true if c is a Pattern_Syntax code point. + */ + static UBool isSyntax(UChar32 c); + + /** + * @return true if c is a Pattern_Syntax or Pattern_White_Space code point. + */ + static UBool isSyntaxOrWhiteSpace(UChar32 c); + + /** + * @return true if c is a Pattern_White_Space character. + */ + static UBool isWhiteSpace(UChar32 c); + + /** + * Skips over Pattern_White_Space starting at s. + * @return The smallest pointer at or after s with a non-white space character. + */ + static const UChar *skipWhiteSpace(const UChar *s, int32_t length); + + /** + * Skips over Pattern_White_Space starting at index start in s. + * @return The smallest index at or after start with a non-white space character. + */ + static int32_t skipWhiteSpace(const UnicodeString &s, int32_t start); + + /** + * @return s except with leading and trailing Pattern_White_Space removed and length adjusted. + */ + static const UChar *trimWhiteSpace(const UChar *s, int32_t &length); + + /** + * Tests whether the string contains a "pattern identifier", that is, + * whether it contains only non-Pattern_White_Space, non-Pattern_Syntax characters. + * @return true if there are no Pattern_White_Space or Pattern_Syntax characters in s. + */ + static UBool isIdentifier(const UChar *s, int32_t length); + + /** + * Skips over a "pattern identifier" starting at index s. + * @return The smallest pointer at or after s with + * a Pattern_White_Space or Pattern_Syntax character. + */ + static const UChar *skipIdentifier(const UChar *s, int32_t length); + +private: + PatternProps(); // no constructor: all static methods +}; + +U_NAMESPACE_END + +#endif // __PATTERNPROPS_H__ diff --git a/thirdparty/icu4c/common/pluralmap.cpp b/thirdparty/icu4c/common/pluralmap.cpp new file mode 100644 index 0000000000..ec87f0198e --- /dev/null +++ b/thirdparty/icu4c/common/pluralmap.cpp @@ -0,0 +1,44 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* + * Copyright (C) 2015, International Business Machines Corporation and + * others. All Rights Reserved. + */ + +#include "unicode/unistr.h" +#include "charstr.h" +#include "cstring.h" +#include "pluralmap.h" + +U_NAMESPACE_BEGIN + +static const char * const gPluralForms[] = { + "other", "zero", "one", "two", "few", "many"}; + +PluralMapBase::Category +PluralMapBase::toCategory(const char *pluralForm) { + for (int32_t i = 0; i < UPRV_LENGTHOF(gPluralForms); ++i) { + if (uprv_strcmp(pluralForm, gPluralForms[i]) == 0) { + return static_cast<Category>(i); + } + } + return NONE; +} + +PluralMapBase::Category +PluralMapBase::toCategory(const UnicodeString &pluralForm) { + CharString cCategory; + UErrorCode status = U_ZERO_ERROR; + cCategory.appendInvariantChars(pluralForm, status); + return U_FAILURE(status) ? NONE : toCategory(cCategory.data()); +} + +const char *PluralMapBase::getCategoryName(Category c) { + int32_t index = c; + return (index < 0 || index >= UPRV_LENGTHOF(gPluralForms)) ? + NULL : gPluralForms[index]; +} + + +U_NAMESPACE_END + diff --git a/thirdparty/icu4c/common/pluralmap.h b/thirdparty/icu4c/common/pluralmap.h new file mode 100644 index 0000000000..d898ac4671 --- /dev/null +++ b/thirdparty/icu4c/common/pluralmap.h @@ -0,0 +1,292 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* Copyright (C) 2015, International Business Machines Corporation and +* others. All Rights Reserved. +****************************************************************************** +* +* File pluralmap.h - PluralMap class that maps plural categories to values. +****************************************************************************** +*/ + +#ifndef __PLURAL_MAP_H__ +#define __PLURAL_MAP_H__ + +#include "unicode/uobject.h" +#include "cmemory.h" + +U_NAMESPACE_BEGIN + +class UnicodeString; + +class U_COMMON_API PluralMapBase : public UMemory { +public: + /** + * The names of all the plural categories. NONE is not an actual plural + * category, but rather represents the absense of a plural category. + */ + enum Category { + NONE = -1, + OTHER, + ZERO, + ONE, + TWO, + FEW, + MANY, + CATEGORY_COUNT + }; + + /** + * Converts a category name such as "zero", "one", "two", "few", "many" + * or "other" to a category enum. Returns NONE for an unrecognized + * category name. + */ + static Category toCategory(const char *categoryName); + + /** + * Converts a category name such as "zero", "one", "two", "few", "many" + * or "other" to a category enum. Returns NONE for urecongized + * category name. + */ + static Category toCategory(const UnicodeString &categoryName); + + /** + * Converts a category to a name. + * Passing NONE or CATEGORY_COUNT for category returns NULL. + */ + static const char *getCategoryName(Category category); +}; + +/** + * A Map of plural categories to values. It maintains ownership of the + * values. + * + * Type T is the value type. T must provide the followng: + * 1) Default constructor + * 2) Copy constructor + * 3) Assignment operator + * 4) Must extend UMemory + */ +template<typename T> +class PluralMap : public PluralMapBase { +public: + /** + * Other category is maps to a copy of the default value. + */ + PluralMap() : fOtherVariant() { + initializeNew(); + } + + /** + * Other category is mapped to otherVariant. + */ + PluralMap(const T &otherVariant) : fOtherVariant(otherVariant) { + initializeNew(); + } + + PluralMap(const PluralMap<T> &other) : fOtherVariant(other.fOtherVariant) { + fVariants[0] = &fOtherVariant; + for (int32_t i = 1; i < UPRV_LENGTHOF(fVariants); ++i) { + fVariants[i] = other.fVariants[i] ? + new T(*other.fVariants[i]) : NULL; + } + } + + PluralMap<T> &operator=(const PluralMap<T> &other) { + if (this == &other) { + return *this; + } + for (int32_t i = 0; i < UPRV_LENGTHOF(fVariants); ++i) { + if (fVariants[i] != NULL && other.fVariants[i] != NULL) { + *fVariants[i] = *other.fVariants[i]; + } else if (fVariants[i] != NULL) { + delete fVariants[i]; + fVariants[i] = NULL; + } else if (other.fVariants[i] != NULL) { + fVariants[i] = new T(*other.fVariants[i]); + } else { + // do nothing + } + } + return *this; + } + + ~PluralMap() { + for (int32_t i = 1; i < UPRV_LENGTHOF(fVariants); ++i) { + delete fVariants[i]; + } + } + + /** + * Removes all mappings and makes 'other' point to the default value. + */ + void clear() { + *fVariants[0] = T(); + for (int32_t i = 1; i < UPRV_LENGTHOF(fVariants); ++i) { + delete fVariants[i]; + fVariants[i] = NULL; + } + } + + /** + * Iterates through the mappings in this instance, set index to NONE + * prior to using. Call next repeatedly to get the values until it + * returns NULL. Each time next returns, caller may pass index + * to getCategoryName() to get the name of the plural category. + * When this function returns NULL, index is CATEGORY_COUNT + */ + const T *next(Category &index) const { + int32_t idx = index; + ++idx; + for (; idx < UPRV_LENGTHOF(fVariants); ++idx) { + if (fVariants[idx] != NULL) { + index = static_cast<Category>(idx); + return fVariants[idx]; + } + } + index = static_cast<Category>(idx); + return NULL; + } + + /** + * non const version of next. + */ + T *nextMutable(Category &index) { + const T *result = next(index); + return const_cast<T *>(result); + } + + /** + * Returns the 'other' variant. + * Same as calling get(OTHER). + */ + const T &getOther() const { + return get(OTHER); + } + + /** + * Returns the value associated with a category. + * If no value found, or v is NONE or CATEGORY_COUNT, falls + * back to returning the value for the 'other' category. + */ + const T &get(Category v) const { + int32_t index = v; + if (index < 0 || index >= UPRV_LENGTHOF(fVariants) || fVariants[index] == NULL) { + return *fVariants[0]; + } + return *fVariants[index]; + } + + /** + * Convenience routine to get the value by category name. Otherwise + * works just like get(Category). + */ + const T &get(const char *category) const { + return get(toCategory(category)); + } + + /** + * Convenience routine to get the value by category name as a + * UnicodeString. Otherwise works just like get(category). + */ + const T &get(const UnicodeString &category) const { + return get(toCategory(category)); + } + + /** + * Returns a pointer to the value associated with a category + * that caller can safely modify. If the value was defaulting to the 'other' + * variant because no explicit value was stored, this method creates a + * new value using the default constructor at the returned pointer. + * + * @param category the category with the value to change. + * @param status error returned here if index is NONE or CATEGORY_COUNT + * or memory could not be allocated, or any other error happens. + */ + T *getMutable( + Category category, + UErrorCode &status) { + return getMutable(category, NULL, status); + } + + /** + * Convenience routine to get a mutable pointer to a value by category name. + * Otherwise works just like getMutable(Category, UErrorCode &). + * reports an error if the category name is invalid. + */ + T *getMutable( + const char *category, + UErrorCode &status) { + return getMutable(toCategory(category), NULL, status); + } + + /** + * Just like getMutable(Category, UErrorCode &) but copies defaultValue to + * returned pointer if it was defaulting to the 'other' variant + * because no explicit value was stored. + */ + T *getMutableWithDefault( + Category category, + const T &defaultValue, + UErrorCode &status) { + return getMutable(category, &defaultValue, status); + } + + /** + * Returns true if this object equals rhs. + */ + UBool equals( + const PluralMap<T> &rhs, + UBool (*eqFunc)(const T &, const T &)) const { + for (int32_t i = 0; i < UPRV_LENGTHOF(fVariants); ++i) { + if (fVariants[i] == rhs.fVariants[i]) { + continue; + } + if (fVariants[i] == NULL || rhs.fVariants[i] == NULL) { + return false; + } + if (!eqFunc(*fVariants[i], *rhs.fVariants[i])) { + return false; + } + } + return true; + } + +private: + T fOtherVariant; + T* fVariants[6]; + + T *getMutable( + Category category, + const T *defaultValue, + UErrorCode &status) { + if (U_FAILURE(status)) { + return NULL; + } + int32_t index = category; + if (index < 0 || index >= UPRV_LENGTHOF(fVariants)) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + if (fVariants[index] == NULL) { + fVariants[index] = defaultValue == NULL ? + new T() : new T(*defaultValue); + } + if (!fVariants[index]) { + status = U_MEMORY_ALLOCATION_ERROR; + } + return fVariants[index]; + } + + void initializeNew() { + fVariants[0] = &fOtherVariant; + for (int32_t i = 1; i < UPRV_LENGTHOF(fVariants); ++i) { + fVariants[i] = NULL; + } + } +}; + +U_NAMESPACE_END + +#endif diff --git a/thirdparty/icu4c/common/propname.cpp b/thirdparty/icu4c/common/propname.cpp new file mode 100644 index 0000000000..a12eb7d913 --- /dev/null +++ b/thirdparty/icu4c/common/propname.cpp @@ -0,0 +1,328 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (c) 2002-2014, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* Author: Alan Liu +* Created: October 30 2002 +* Since: ICU 2.4 +* 2010nov19 Markus Scherer Rewrite for formatVersion 2. +********************************************************************** +*/ +#include "propname.h" +#include "unicode/uchar.h" +#include "unicode/udata.h" +#include "unicode/uscript.h" +#include "umutex.h" +#include "cmemory.h" +#include "cstring.h" +#include "uarrsort.h" +#include "uinvchar.h" + +#define INCLUDED_FROM_PROPNAME_CPP +#include "propname_data.h" + +U_CDECL_BEGIN + +/** + * Get the next non-ignorable ASCII character from a property name + * and lowercases it. + * @return ((advance count for the name)<<8)|character + */ +static inline int32_t +getASCIIPropertyNameChar(const char *name) { + int32_t i; + char c; + + /* Ignore delimiters '-', '_', and ASCII White_Space */ + for(i=0; + (c=name[i++])==0x2d || c==0x5f || + c==0x20 || (0x09<=c && c<=0x0d); + ) {} + + if(c!=0) { + return (i<<8)|(uint8_t)uprv_asciitolower((char)c); + } else { + return i<<8; + } +} + +/** + * Get the next non-ignorable EBCDIC character from a property name + * and lowercases it. + * @return ((advance count for the name)<<8)|character + */ +static inline int32_t +getEBCDICPropertyNameChar(const char *name) { + int32_t i; + char c; + + /* Ignore delimiters '-', '_', and EBCDIC White_Space */ + for(i=0; + (c=name[i++])==0x60 || c==0x6d || + c==0x40 || c==0x05 || c==0x15 || c==0x25 || c==0x0b || c==0x0c || c==0x0d; + ) {} + + if(c!=0) { + return (i<<8)|(uint8_t)uprv_ebcdictolower((char)c); + } else { + return i<<8; + } +} + +/** + * Unicode property names and property value names are compared "loosely". + * + * UCD.html 4.0.1 says: + * For all property names, property value names, and for property values for + * Enumerated, Binary, or Catalog properties, use the following + * loose matching rule: + * + * LM3. Ignore case, whitespace, underscore ('_'), and hyphens. + * + * This function does just that, for (char *) name strings. + * It is almost identical to ucnv_compareNames() but also ignores + * C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC). + * + * @internal + */ + +U_CAPI int32_t U_EXPORT2 +uprv_compareASCIIPropertyNames(const char *name1, const char *name2) { + int32_t rc, r1, r2; + + for(;;) { + r1=getASCIIPropertyNameChar(name1); + r2=getASCIIPropertyNameChar(name2); + + /* If we reach the ends of both strings then they match */ + if(((r1|r2)&0xff)==0) { + return 0; + } + + /* Compare the lowercased characters */ + if(r1!=r2) { + rc=(r1&0xff)-(r2&0xff); + if(rc!=0) { + return rc; + } + } + + name1+=r1>>8; + name2+=r2>>8; + } +} + +U_CAPI int32_t U_EXPORT2 +uprv_compareEBCDICPropertyNames(const char *name1, const char *name2) { + int32_t rc, r1, r2; + + for(;;) { + r1=getEBCDICPropertyNameChar(name1); + r2=getEBCDICPropertyNameChar(name2); + + /* If we reach the ends of both strings then they match */ + if(((r1|r2)&0xff)==0) { + return 0; + } + + /* Compare the lowercased characters */ + if(r1!=r2) { + rc=(r1&0xff)-(r2&0xff); + if(rc!=0) { + return rc; + } + } + + name1+=r1>>8; + name2+=r2>>8; + } +} + +U_CDECL_END + +U_NAMESPACE_BEGIN + +int32_t PropNameData::findProperty(int32_t property) { + int32_t i=1; // valueMaps index, initially after numRanges + for(int32_t numRanges=valueMaps[0]; numRanges>0; --numRanges) { + // Read and skip the start and limit of this range. + int32_t start=valueMaps[i]; + int32_t limit=valueMaps[i+1]; + i+=2; + if(property<start) { + break; + } + if(property<limit) { + return i+(property-start)*2; + } + i+=(limit-start)*2; // Skip all entries for this range. + } + return 0; +} + +int32_t PropNameData::findPropertyValueNameGroup(int32_t valueMapIndex, int32_t value) { + if(valueMapIndex==0) { + return 0; // The property does not have named values. + } + ++valueMapIndex; // Skip the BytesTrie offset. + int32_t numRanges=valueMaps[valueMapIndex++]; + if(numRanges<0x10) { + // Ranges of values. + for(; numRanges>0; --numRanges) { + // Read and skip the start and limit of this range. + int32_t start=valueMaps[valueMapIndex]; + int32_t limit=valueMaps[valueMapIndex+1]; + valueMapIndex+=2; + if(value<start) { + break; + } + if(value<limit) { + return valueMaps[valueMapIndex+value-start]; + } + valueMapIndex+=limit-start; // Skip all entries for this range. + } + } else { + // List of values. + int32_t valuesStart=valueMapIndex; + int32_t nameGroupOffsetsStart=valueMapIndex+numRanges-0x10; + do { + int32_t v=valueMaps[valueMapIndex]; + if(value<v) { + break; + } + if(value==v) { + return valueMaps[nameGroupOffsetsStart+valueMapIndex-valuesStart]; + } + } while(++valueMapIndex<nameGroupOffsetsStart); + } + return 0; +} + +const char *PropNameData::getName(const char *nameGroup, int32_t nameIndex) { + int32_t numNames=*nameGroup++; + if(nameIndex<0 || numNames<=nameIndex) { + return NULL; + } + // Skip nameIndex names. + for(; nameIndex>0; --nameIndex) { + nameGroup=uprv_strchr(nameGroup, 0)+1; + } + if(*nameGroup==0) { + return NULL; // no name (Property[Value]Aliases.txt has "n/a") + } + return nameGroup; +} + +UBool PropNameData::containsName(BytesTrie &trie, const char *name) { + if(name==NULL) { + return FALSE; + } + UStringTrieResult result=USTRINGTRIE_NO_VALUE; + char c; + while((c=*name++)!=0) { + c=uprv_invCharToLowercaseAscii(c); + // Ignore delimiters '-', '_', and ASCII White_Space. + if(c==0x2d || c==0x5f || c==0x20 || (0x09<=c && c<=0x0d)) { + continue; + } + if(!USTRINGTRIE_HAS_NEXT(result)) { + return FALSE; + } + result=trie.next((uint8_t)c); + } + return USTRINGTRIE_HAS_VALUE(result); +} + +const char *PropNameData::getPropertyName(int32_t property, int32_t nameChoice) { + int32_t valueMapIndex=findProperty(property); + if(valueMapIndex==0) { + return NULL; // Not a known property. + } + return getName(nameGroups+valueMaps[valueMapIndex], nameChoice); +} + +const char *PropNameData::getPropertyValueName(int32_t property, int32_t value, int32_t nameChoice) { + int32_t valueMapIndex=findProperty(property); + if(valueMapIndex==0) { + return NULL; // Not a known property. + } + int32_t nameGroupOffset=findPropertyValueNameGroup(valueMaps[valueMapIndex+1], value); + if(nameGroupOffset==0) { + return NULL; + } + return getName(nameGroups+nameGroupOffset, nameChoice); +} + +int32_t PropNameData::getPropertyOrValueEnum(int32_t bytesTrieOffset, const char *alias) { + BytesTrie trie(bytesTries+bytesTrieOffset); + if(containsName(trie, alias)) { + return trie.getValue(); + } else { + return UCHAR_INVALID_CODE; + } +} + +int32_t PropNameData::getPropertyEnum(const char *alias) { + return getPropertyOrValueEnum(0, alias); +} + +int32_t PropNameData::getPropertyValueEnum(int32_t property, const char *alias) { + int32_t valueMapIndex=findProperty(property); + if(valueMapIndex==0) { + return UCHAR_INVALID_CODE; // Not a known property. + } + valueMapIndex=valueMaps[valueMapIndex+1]; + if(valueMapIndex==0) { + return UCHAR_INVALID_CODE; // The property does not have named values. + } + // valueMapIndex is the start of the property's valueMap, + // where the first word is the BytesTrie offset. + return getPropertyOrValueEnum(valueMaps[valueMapIndex], alias); +} +U_NAMESPACE_END + +//---------------------------------------------------------------------- +// Public API implementation + +U_CAPI const char* U_EXPORT2 +u_getPropertyName(UProperty property, + UPropertyNameChoice nameChoice) { + U_NAMESPACE_USE + return PropNameData::getPropertyName(property, nameChoice); +} + +U_CAPI UProperty U_EXPORT2 +u_getPropertyEnum(const char* alias) { + U_NAMESPACE_USE + return (UProperty)PropNameData::getPropertyEnum(alias); +} + +U_CAPI const char* U_EXPORT2 +u_getPropertyValueName(UProperty property, + int32_t value, + UPropertyNameChoice nameChoice) { + U_NAMESPACE_USE + return PropNameData::getPropertyValueName(property, value, nameChoice); +} + +U_CAPI int32_t U_EXPORT2 +u_getPropertyValueEnum(UProperty property, + const char* alias) { + U_NAMESPACE_USE + return PropNameData::getPropertyValueEnum(property, alias); +} + +U_CAPI const char* U_EXPORT2 +uscript_getName(UScriptCode scriptCode){ + return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode, + U_LONG_PROPERTY_NAME); +} + +U_CAPI const char* U_EXPORT2 +uscript_getShortName(UScriptCode scriptCode){ + return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode, + U_SHORT_PROPERTY_NAME); +} diff --git a/thirdparty/icu4c/common/propname.h b/thirdparty/icu4c/common/propname.h new file mode 100644 index 0000000000..1a8ced5b87 --- /dev/null +++ b/thirdparty/icu4c/common/propname.h @@ -0,0 +1,212 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (c) 2002-2011, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* Author: Alan Liu +* Created: October 30 2002 +* Since: ICU 2.4 +* 2010nov19 Markus Scherer Rewrite for formatVersion 2. +********************************************************************** +*/ +#ifndef PROPNAME_H +#define PROPNAME_H + +#include "unicode/utypes.h" +#include "unicode/bytestrie.h" +#include "unicode/uchar.h" +#include "udataswp.h" +#include "uprops.h" + +/* + * This header defines the in-memory layout of the property names data + * structure representing the UCD data files PropertyAliases.txt and + * PropertyValueAliases.txt. It is used by: + * propname.cpp - reads data + * genpname - creates data + */ + +/* low-level char * property name comparison -------------------------------- */ + +U_CDECL_BEGIN + +/** + * \var uprv_comparePropertyNames + * Unicode property names and property value names are compared "loosely". + * + * UCD.html 4.0.1 says: + * For all property names, property value names, and for property values for + * Enumerated, Binary, or Catalog properties, use the following + * loose matching rule: + * + * LM3. Ignore case, whitespace, underscore ('_'), and hyphens. + * + * This function does just that, for (char *) name strings. + * It is almost identical to ucnv_compareNames() but also ignores + * C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC). + * + * @internal + */ + +U_CAPI int32_t U_EXPORT2 +uprv_compareASCIIPropertyNames(const char *name1, const char *name2); + +U_CAPI int32_t U_EXPORT2 +uprv_compareEBCDICPropertyNames(const char *name1, const char *name2); + +#if U_CHARSET_FAMILY==U_ASCII_FAMILY +# define uprv_comparePropertyNames uprv_compareASCIIPropertyNames +#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY +# define uprv_comparePropertyNames uprv_compareEBCDICPropertyNames +#else +# error U_CHARSET_FAMILY is not valid +#endif + +U_CDECL_END + +/* UDataMemory structure and signatures ------------------------------------- */ + +#define PNAME_DATA_NAME "pnames" +#define PNAME_DATA_TYPE "icu" + +/* Fields in UDataInfo: */ + +/* PNAME_SIG[] is encoded as numeric literals for compatibility with the HP compiler */ +#define PNAME_SIG_0 ((uint8_t)0x70) /* p */ +#define PNAME_SIG_1 ((uint8_t)0x6E) /* n */ +#define PNAME_SIG_2 ((uint8_t)0x61) /* a */ +#define PNAME_SIG_3 ((uint8_t)0x6D) /* m */ + +U_NAMESPACE_BEGIN + +class PropNameData { +public: + enum { + // Byte offsets from the start of the data, after the generic header. + IX_VALUE_MAPS_OFFSET, + IX_BYTE_TRIES_OFFSET, + IX_NAME_GROUPS_OFFSET, + IX_RESERVED3_OFFSET, + IX_RESERVED4_OFFSET, + IX_TOTAL_SIZE, + + // Other values. + IX_MAX_NAME_LENGTH, + IX_RESERVED7, + IX_COUNT + }; + + static const char *getPropertyName(int32_t property, int32_t nameChoice); + static const char *getPropertyValueName(int32_t property, int32_t value, int32_t nameChoice); + + static int32_t getPropertyEnum(const char *alias); + static int32_t getPropertyValueEnum(int32_t property, const char *alias); + +private: + static int32_t findProperty(int32_t property); + static int32_t findPropertyValueNameGroup(int32_t valueMapIndex, int32_t value); + static const char *getName(const char *nameGroup, int32_t nameIndex); + static UBool containsName(BytesTrie &trie, const char *name); + + static int32_t getPropertyOrValueEnum(int32_t bytesTrieOffset, const char *alias); + + static const int32_t indexes[]; + static const int32_t valueMaps[]; + static const uint8_t bytesTries[]; + static const char nameGroups[]; +}; + +/* + * pnames.icu formatVersion 2 + * + * formatVersion 2 is new in ICU 4.8. + * In ICU 4.8, the pnames.icu data file is used only in ICU4J. + * ICU4C 4.8 has the same data structures hardcoded in source/common/propname_data.h. + * + * For documentation of pnames.icu formatVersion 1 see ICU4C 4.6 (2010-dec-01) + * or earlier versions of this header file (source/common/propname.h). + * + * The pnames.icu begins with the standard ICU DataHeader/UDataInfo. + * After that: + * + * int32_t indexes[8]; + * + * (See the PropNameData::IX_... constants.) + * + * The first 6 indexes are byte offsets from the beginning of the data + * (beginning of indexes[]) to following structures. + * The length of each structure is the difference between its offset + * and the next one. + * All offsets are filled in: Where there is no data between two offsets, + * those two offsets are the same. + * The last offset (indexes[PropNameData::IX_TOTAL_SIZE]) indicates the + * total number of bytes in the file. (Not counting the standard headers.) + * + * The sixth index (indexes[PropNameData::IX_MAX_NAME_LENGTH]) has the + * maximum length of any Unicode property (or property value) alias. + * (Without normalization, that is, including underscores etc.) + * + * int32_t valueMaps[]; + * + * The valueMaps[] begins with a map from UProperty enums to properties, + * followed by the per-property value maps from property values to names, + * for those properties that have named values. + * (Binary & enumerated, plus General_Category_Mask.) + * + * valueMaps[0] contains the number of UProperty enum ranges. + * For each range: + * int32_t start, limit -- first and last+1 UProperty enum of a dense range + * Followed by (limit-start) pairs of + * int32_t nameGroupOffset; + * Offset into nameGroups[] for the property's names/aliases. + * int32_t valueMapIndex; + * Offset of the property's value map in the valueMaps[] array. + * If the valueMapIndex is 0, then the property does not have named values. + * + * For each property's value map: + * int32_t bytesTrieOffset; -- Offset into bytesTries[] for name->value mapping. + * int32_t numRanges; + * If numRanges is in the range 1..15, then that many ranges of values follow. + * Per range: + * int32_t start, limit -- first and last+1 UProperty enum of a range + * Followed by (limit-start) entries of + * int32_t nameGroupOffset; + * Offset into nameGroups[] for the property value's names/aliases. + * If the nameGroupOffset is 0, then this is not a named value for this property. + * (That is, the ranges need not be dense.) + * If numRanges is >=0x10, then (numRanges-0x10) sorted values + * and then (numRanges-0x10) corresponding nameGroupOffsets follow. + * Values are sorted as signed integers. + * In this case, the set of values is dense; no nameGroupOffset will be 0. + * + * For both properties and property values, ranges are sorted by their start/limit values. + * + * uint8_t bytesTries[]; + * + * This is a sequence of BytesTrie structures, byte-serialized tries for + * mapping from names/aliases to values. + * The first one maps from property names/aliases to UProperty enum constants. + * The following ones are indexed by property value map bytesTrieOffsets + * for mapping each property's names/aliases to their property values. + * + * char nameGroups[]; + * + * This is a sequence of property name groups. + * Each group is a list of names/aliases (invariant-character strings) for + * one property or property value, in the order of UCharNameChoice. + * The first byte of each group is the number of names in the group. + * It is followed by that many NUL-terminated strings. + * The first string is for the short name; if there is no short name, + * then the first string is empty. + * The second string is the long name. Further strings are additional aliases. + * + * The first name group is for a property rather than a property value, + * so that a nameGroupOffset of 0 can be used to indicate "no value" + * in a property's sparse value ranges. + */ + +U_NAMESPACE_END + +#endif diff --git a/thirdparty/icu4c/common/propname_data.h b/thirdparty/icu4c/common/propname_data.h new file mode 100644 index 0000000000..6f63e9cdd4 --- /dev/null +++ b/thirdparty/icu4c/common/propname_data.h @@ -0,0 +1,1919 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +// +// Copyright (C) 1999-2016, International Business Machines +// Corporation and others. All Rights Reserved. +// +// file name: propname_data.h +// +// machine-generated by: icu/tools/unicode/c/genprops/pnamesbuilder.cpp + + +#ifdef INCLUDED_FROM_PROPNAME_CPP + +U_NAMESPACE_BEGIN + +const int32_t PropNameData::indexes[8]={0x20,0x15b8,0x5048,0xa69a,0xa69a,0xa69a,0x2f,0}; + +const int32_t PropNameData::valueMaps[1382]={ +6,0,0x41,0,0xe3,0x368,0xe3,0x37e,0xe3,0x393,0xe3,0x3a9,0xe3,0x3b4,0xe3,0x3d5, +0xe3,0x3e5,0xe3,0x3f4,0xe3,0x402,0xe3,0x426,0xe3,0x43d,0xe3,0x455,0xe3,0x46c,0xe3,0x47b, +0xe3,0x48a,0xe3,0x49b,0xe3,0x4a9,0xe3,0x4bb,0xe3,0x4d5,0xe3,0x4f0,0xe3,0x505,0xe3,0x522, +0xe3,0x533,0xe3,0x53e,0xe3,0x55d,0xe3,0x573,0xe3,0x584,0xe3,0x594,0xe3,0x5af,0xe3,0x5c8, +0xe3,0x5d9,0xe3,0x5f3,0xe3,0x606,0xe3,0x616,0xe3,0x630,0xe3,0x649,0xe3,0x660,0xe3,0x674, +0xe3,0x68a,0xe3,0x69e,0xe3,0x6b4,0xe3,0x6ce,0xe3,0x6e6,0xe3,0x702,0xe3,0x70a,0xe3,0x712, +0xe3,0x71a,0xe3,0x722,0xe3,0x72b,0xe3,0x738,0xe3,0x74b,0xe3,0x768,0xe3,0x785,0xe3,0x7a2, +0xe3,0x7c0,0xe3,0x7de,0xe3,0x802,0xe3,0x80f,0xe3,0x829,0xe3,0x83e,0xe3,0x859,0xe3,0x870, +0xe3,0x887,0xe3,0x8a9,0xe3,0x1000,0x1019,0x8c8,0x15f,0xae8,0x17a,0x2f11,0xe9,0x2f30,0x2b3,0x306e, +0x2c9,0x30c8,0x2d3,0x3325,0x2f5,0x3c20,0x35f,0x3c90,0x369,0x3f2a,0x398,0x3f68,0x3a0,0x4a5b,0x465,0x4ad9, +0x46f,0x4afe,0x475,0x4b18,0x47b,0x4b39,0x482,0x4b53,0xe9,0x4b78,0xe9,0x4b9e,0x489,0x4c48,0x49f,0x4cc1, +0x4b2,0x4d73,0x4cd,0x4daa,0x4d4,0x4f8a,0x4e8,0x540a,0x510,0x2000,0x2001,0x5469,0x518,0x3000,0x3001,0x54f5, +0,0x4000,0x400e,0x5507,0,0x5510,0,0x552a,0,0x553b,0,0x554c,0,0x5562,0,0x556b, +0,0x5588,0,0x55a6,0,0x55c4,0,0x55e2,0,0x55f8,0,0x560c,0,0x5622,0,0x7000, +0x7001,0x563b,0,0x7d6,0x12,0,1,0x12,0x20,0x7f4,0x4a,0,1,6,7,8, +9,0xa,0xb,0xc,0xd,0xe,0xf,0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18, +0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,0x20,0x21,0x22,0x23,0x24,0x54,0x5b,0x67,0x6b, +0x76,0x7a,0x81,0x82,0x84,0x85,0xc8,0xca,0xd6,0xd8,0xda,0xdc,0xde,0xe0,0xe2,0xe4, +0xe6,0xe8,0xe9,0xea,0xf0,0x2e,0x40,0x4c,0x5e,0x68,0x79,0x84,0x91,0x9e,0xab,0xb8, +0xc5,0xd2,0xdf,0xec,0xf9,0x106,0x113,0x120,0x12d,0x13a,0x147,0x154,0x161,0x16e,0x17b,0x188, +0x195,0x1a2,0x1af,0x1bc,0x1c9,0x1d6,0x1e3,0x1f0,0x1fd,0x20c,0x21b,0x22a,0x239,0x248,0x257,0x266, +0x275,0x28f,0x2a3,0x2b7,0x2d2,0x2e1,0x2ea,0x2fa,0x302,0x30b,0x31a,0x323,0x333,0x344,0x355,0x995, +1,0,0x17,0x8d7,0x8e8,0x8f9,0x90d,0x924,0x93c,0x94e,0x963,0x97a,0x98f,0x99f,0x9b1,0x9ce, +0x9ea,0x9fc,0xa19,0xa35,0xa51,0xa66,0xa7b,0xa95,0xab0,0xacb,0xb37,1,0,0x135,0xaf3,0xb00, +0xb13,0xb3b,0xb59,0xb77,0xb8f,0xbba,0xbe4,0xbfc,0xc0f,0xc22,0xc31,0xc40,0xc4f,0xc5e,0xc75,0xc86, +0xc99,0xcac,0xcb9,0xcc6,0xcd5,0xce6,0xcfb,0xd0c,0xd17,0xd20,0xd31,0xd42,0xd55,0xd67,0xd7a,0xd8d, +0xdcc,0xdd9,0xde6,0xdf3,0xe08,0xe38,0xe52,0xe73,0xe9e,0xec1,0xf1f,0xf46,0xf61,0xf70,0xf97,0xfbf, +0xfe2,0x1005,0x102f,0x1048,0x1067,0x108a,0x10ae,0x10c1,0x10db,0x1105,0x111d,0x1145,0x116e,0x1181,0x1194,0x11a7, +0x11ce,0x11dd,0x11fd,0x122b,0x1249,0x1277,0x1293,0x12ae,0x12c7,0x12e0,0x1301,0x1331,0x1350,0x1372,0x13a6,0x13d3, +0x1418,0x1439,0x1463,0x1484,0x14ad,0x14c0,0x14f3,0x150a,0x1519,0x152a,0x1555,0x156c,0x159d,0x15cb,0x160e,0x1619, +0x1652,0x1663,0x1674,0x1681,0x1694,0x16ce,0x16f2,0x1716,0x1750,0x1788,0x17b3,0x17cb,0x17f7,0x1823,0x1830,0x183f, +0x185c,0x187e,0x18ac,0x18cc,0x18f3,0x191a,0x1939,0x194c,0x195d,0x196e,0x1993,0x19b8,0x19df,0x1a13,0x1a40,0x1a5e, +0x1a71,0x1a8a,0x1ac3,0x1ad2,0x1af2,0x1b14,0x1b36,0x1b4d,0x1b64,0x1b91,0x1baa,0x1bc3,0x1bf4,0x1c1e,0x1c39,0x1c4c, +0x1c6b,0x1c74,0x1c87,0x1ca5,0x1cc3,0x1cd6,0x1ced,0x1d02,0x1d37,0x1d5b,0x1d70,0x1d7f,0x1d92,0x1db6,0x1dbf,0x1de3, +0x1dfa,0x1e0d,0x1e1c,0x1e27,0x1e48,0x1e60,0x1e6f,0x1e7e,0x1e8d,0x1ea4,0x1eb9,0x1ece,0x1f07,0x1f1a,0x1f36,0x1f41, +0x1f4e,0x1f7c,0x1fa0,0x1fc3,0x1fd6,0x1ff8,0x200b,0x2026,0x2049,0x206c,0x2091,0x20a2,0x20d1,0x20fe,0x2115,0x2130, +0x213f,0x216a,0x21a2,0x21dc,0x220a,0x221b,0x2228,0x224c,0x225b,0x2277,0x2291,0x22ae,0x22e6,0x22fb,0x2328,0x2347, +0x2375,0x2395,0x23c9,0x23d8,0x2402,0x2425,0x2450,0x245b,0x246c,0x2487,0x24ab,0x24b8,0x24cd,0x24f4,0x251f,0x2556, +0x2569,0x257a,0x25aa,0x25bb,0x25ca,0x25df,0x25fd,0x2610,0x2623,0x263a,0x2657,0x2662,0x266b,0x268d,0x26a2,0x26c7, +0x26de,0x2707,0x2722,0x2737,0x2750,0x2771,0x27a6,0x27b7,0x27e8,0x280c,0x281d,0x2836,0x2841,0x286e,0x2890,0x28be, +0x28f1,0x2900,0x2911,0x292e,0x2970,0x2997,0x29a4,0x29b9,0x29dd,0x2a03,0x2a3c,0x2a4d,0x2a71,0x2a7c,0x2a89,0x2a98, +0x2abd,0x2aeb,0x2b07,0x2b24,0x2b31,0x2b42,0x2b60,0x2b83,0x2ba0,0x2bad,0x2bcd,0x2bea,0x2c0b,0x2c34,0x2c45,0x2c64, +0x2c7d,0x2c96,0x2ca7,0x2cf0,0x2d01,0x2d1a,0x2d49,0x2d76,0x2d9b,0x2ddd,0x2df9,0x2e08,0x2e1f,0x2e4d,0x2e66,0x2e8f, +0x2ea9,0x2ee4,0x2f02,0x1e85,1,0,0x12,0x2f47,0x2f57,0x2f6a,0x2f7a,0x2f8a,0x2f99,0x2fa9,0x2fbb,0x2fce, +0x2fe0,0x2ff0,0x3000,0x300f,0x301e,0x302e,0x303b,0x304a,0x305e,0x1f43,1,0,6,0x3083,0x308e,0x309b, +0x30a8,0x30b5,0x30c0,0x1f87,1,0,0x1e,0x30dd,0x30ec,0x3101,0x3116,0x312b,0x313f,0x3150,0x3164,0x3177, +0x3188,0x31a1,0x31b3,0x31c4,0x31d8,0x31eb,0x3203,0x3215,0x3220,0x3230,0x323e,0x3253,0x3268,0x327e,0x3298,0x32ae, +0x32be,0x32d2,0x32e6,0x32f7,0x330f,0x21b2,1,0,0x66,0x3337,0x335a,0x3363,0x3370,0x337b,0x3384,0x338f, +0x3398,0x33b1,0x33b6,0x33bf,0x33dc,0x33e5,0x33f2,0x33fb,0x341f,0x3426,0x342f,0x3442,0x344d,0x3456,0x3461,0x347a, +0x3483,0x3492,0x349d,0x34a6,0x34b1,0x34ba,0x34c1,0x34ca,0x34d5,0x34de,0x34f7,0x3500,0x350d,0x3518,0x3529,0x3534, +0x3549,0x3560,0x3569,0x3572,0x358b,0x3596,0x359f,0x35a8,0x35bf,0x35dc,0x35e7,0x35f8,0x3603,0x360a,0x3617,0x3624, +0x3651,0x3666,0x366f,0x368a,0x36ad,0x36ce,0x36ef,0x3714,0x373b,0x375c,0x377f,0x37a0,0x37c7,0x37e8,0x380d,0x382c, +0x384b,0x386a,0x3887,0x38a8,0x38c9,0x38ec,0x3911,0x3930,0x394f,0x3970,0x3997,0x39bc,0x39db,0x39fc,0x3a1f,0x3a3a, +0x3a53,0x3a6e,0x3a87,0x3aa4,0x3abf,0x3adc,0x3afb,0x3b18,0x3b35,0x3b54,0x3b71,0x3b8c,0x3ba9,0x3bc6,0x3bf9,0x24f7, +1,0,6,0x3c31,0x3c40,0x3c50,0x3c60,0x3c70,0x3c81,0x2555,1,0,0x2b,0x3c9f,0x3cab,0x3cb9, +0x3cc8,0x3cd7,0x3ce7,0x3cf8,0x3d0c,0x3d21,0x3d37,0x3d4a,0x3d5e,0x3d6e,0x3d77,0x3d82,0x3d92,0x3dae,0x3dc0,0x3dce, +0x3ddd,0x3de9,0x3dfe,0x3e12,0x3e25,0x3e33,0x3e47,0x3e55,0x3e5f,0x3e71,0x3e7d,0x3e8b,0x3e9b,0x3ea2,0x3ea9,0x3eb0, +0x3eb7,0x3ebe,0x3ed4,0x3ef5,0x870,0x3f07,0x3f12,0x3f21,0x27ae,1,0,4,0x3f3b,0x3f46,0x3f52,0x3f5c, +0x27d4,1,0,0xc1,0x3f73,0x3f80,0x3f95,0x3fa2,0x3fb1,0x3fbf,0x3fce,0x3fdd,0x3fef,0x3ffe,0x400c,0x401d, +0x402c,0x403b,0x4048,0x4054,0x4063,0x4072,0x407c,0x4089,0x4096,0x40a5,0x40b3,0x40c2,0x40ce,0x40d8,0x40e4,0x40f4, +0x4104,0x4112,0x411e,0x412f,0x413b,0x4147,0x4155,0x4162,0x416e,0x417b,0xd0c,0x4188,0x4196,0x41b0,0x41b9,0x41c7, +0x41d5,0x41e1,0x41f0,0x41fe,0x420c,0x4218,0x4227,0x4235,0x4243,0x4250,0x425f,0x427a,0x4289,0x429a,0x42ab,0x42be, +0x42d0,0x42df,0x42f1,0x4300,0x430c,0x4317,0x1e1c,0x4324,0x432f,0x433a,0x4345,0x4350,0x436b,0x4376,0x4381,0x438c, +0x439f,0x43b3,0x43be,0x43cd,0x43dc,0x43e7,0x43f2,0x43ff,0x440e,0x441c,0x4427,0x4442,0x444c,0x445d,0x446e,0x447d, +0x448e,0x4499,0x44a4,0x44af,0x44ba,0x44c5,0x44d0,0x44db,0x44e5,0x44f0,0x4500,0x450b,0x4519,0x4526,0x4531,0x4540, +0x454d,0x455a,0x4569,0x4576,0x4587,0x4599,0x45a9,0x45b4,0x45c7,0x45de,0x45ec,0x45f9,0x4604,0x4611,0x4622,0x463e, +0x4654,0x465f,0x467c,0x468c,0x469b,0x46a6,0x46b1,0x1f36,0x46bd,0x46c8,0x46e0,0x46f0,0x46ff,0x470d,0x471b,0x4726, +0x4731,0x4745,0x475c,0x4774,0x4784,0x4794,0x47a4,0x47b6,0x47c1,0x47cc,0x47d6,0x47e2,0x47f0,0x4803,0x480f,0x481c, +0x4827,0x4843,0x4850,0x485e,0x4877,0x2836,0x4886,0x2657,0x4893,0x48a1,0x48b3,0x48c1,0x48cd,0x48dd,0x2a71,0x48eb, +0x48f7,0x4902,0x490d,0x4918,0x492c,0x493a,0x4951,0x495d,0x4971,0x497f,0x4991,0x49a7,0x49b5,0x49c7,0x49d5,0x49f2, +0x4a04,0x4a11,0x4a22,0x4a34,0x4a4e,0x31cc,1,0,6,0x4a75,0x4a88,0x4a98,0x4aa6,0x4ab7,0x4ac7,0x3228, +0x12,0,1,0x4af1,0x4af7,0x3235,0x12,0,1,0x4af1,0x4af7,0x3242,1,0,3,0x4af1, +0x4af7,0x4b30,0x3258,1,0,3,0x4af1,0x4af7,0x4b30,0x326e,1,0,0x12,0x4bba,0x4bc4,0x4bd0, +0x4bd7,0x4be2,0x4be7,0x4bee,0x4bf5,0x4bfe,0x4c03,0x4c08,0x4c18,0x870,0x3f07,0x4c24,0x3f12,0x4c34,0x3f21,0x3317, +1,0,0xf,0x4bba,0x4c5b,0x4c65,0x4c6f,0x4c7a,0x3ddd,0x4c84,0x4c90,0x4c98,0x4c9f,0x4ca9,0x4bd0,0x4bd7, +0x4be7,0x4cb3,0x339e,1,0,0x17,0x4bba,0x4cd0,0x4c6f,0x4cdc,0x4ce9,0x4cf7,0x3ddd,0x4d02,0x4bd0,0x4d13, +0x4be7,0x4d22,0x4d30,0x870,0x3ef5,0x4d3c,0x4d4d,0x3f07,0x4c24,0x3f12,0x4c34,0x3f21,0x4d5e,0x34bb,1,0, +3,0x4d91,0x4d99,0x4da1,0x34d4,1,0,0x10,0x4dca,0x4dd1,0x4de0,0x4e01,0x4e24,0x4e2f,0x4e4e,0x4e65, +0x4e72,0x4e7b,0x4e9a,0x4ecd,0x4ee8,0x4f17,0x4f34,0x4f59,0x356d,1,0,0x24,0x4fa8,0x4fb5,0x4fc8,0x4fd5, +0x5002,0x5027,0x503c,0x505b,0x507c,0x50a9,0x50e2,0x5105,0x5128,0x5155,0x518a,0x51b1,0x51da,0x5211,0x5240,0x5261, +0x5286,0x5295,0x52b8,0x52cf,0x52dc,0x52eb,0x5308,0x5321,0x5344,0x5369,0x5382,0x5397,0x53a6,0x53b7,0x53c4,0x53e5, +0x373d,1,0,4,0x5423,0x542e,0x5446,0x545e,0x3779,0x36,1,2,4,8,0xe,0x10, +0x20,0x3e,0x40,0x80,0x100,0x1c0,0x200,0x400,0x800,0xe00,0x1000,0x2000,0x4000,0x7000,0x8000,0x10000, +0x20000,0x40000,0x78001,0x80000,0x100000,0x200000,0x400000,0x800000,0x1000000,0x2000000,0x4000000,0x8000000,0xf000000,0x10000000,0x20000000,0x30f80000, +0x30dd,0x30ec,0x3101,0x3116,0x5497,0x312b,0x313f,0x548d,0x3150,0x3164,0x3177,0x54a8,0x3188,0x31a1,0x31b3,0x54bf, +0x31c4,0x31d8,0x31eb,0x54e8,0x3203,0x3215,0x3220,0x3230,0x5484,0x323e,0x3253,0x3268,0x327e,0x3298,0x32ae,0x32be, +0x32d2,0x32e6,0x54de,0x32f7,0x330f,0x54c9 +}; + +const uint8_t PropNameData::bytesTries[14992]={ +0,0x15,0x6d,0xc3,0x78,0x73,0xc2,0x12,0x76,0x7a,0x76,0x6a,0x77,0xa2,0x52,0x78, +1,0x64,0x50,0x69,0x10,0x64,1,0x63,0x30,0x73,0x62,0x13,0x74,0x61,0x72,0x74, +0x63,0x60,0x16,0x6f,0x6e,0x74,0x69,0x6e,0x75,0x65,0x61,0x13,0x69,0x67,0x69,0x74, +0x81,3,0x61,0x2e,0x65,0x4c,0x6f,0xc3,0x18,0x73,0x69,0x1e,0x72,0x69,0x61,0x74, +0x69,0x6f,0x6e,0x73,0x65,0x6c,0x65,0x63,0x74,0x6f,0x72,0x69,0x10,0x72,0x1f,0x74, +0x69,0x63,0x61,0x6c,0x6f,0x72,0x69,0x65,0x6e,0x74,0x61,0x74,0x69,0x6f,0x6e,0xc3, +0x18,3,0x62,0xc3,0x14,0x68,0x32,0x6f,0x42,0x73,0x13,0x70,0x61,0x63,0x65,0x5f, +0x17,0x69,0x74,0x65,0x73,0x70,0x61,0x63,0x65,0x5f,0x16,0x72,0x64,0x62,0x72,0x65, +0x61,0x6b,0xc3,0x14,0x73,0xa2,0x49,0x74,0xa4,0x3b,0x75,3,0x63,0xd9,0x40,0xc, +0x69,0x52,0x6e,0x58,0x70,0x12,0x70,0x65,0x72,0x5c,0x13,0x63,0x61,0x73,0x65,0x5c, +0x16,0x6d,0x61,0x70,0x70,0x69,0x6e,0x67,0xd9,0x40,0xc,0x12,0x64,0x65,0x6f,0x5b, +0x10,0x69,1,0x63,0x3e,0x66,0x1b,0x69,0x65,0x64,0x69,0x64,0x65,0x6f,0x67,0x72, +0x61,0x70,0x68,0x5b,0x17,0x6f,0x64,0x65,0x31,0x6e,0x61,0x6d,0x65,0xd9,0x40,0xb, +0xa,0x69,0x84,0x70,0x19,0x70,0x30,0x74,0x36,0x75,0x10,0x63,0xd9,0x40,9,0x12, +0x61,0x63,0x65,0x5f,1,0x63,0xd9,0x40,8,0x65,0x11,0x72,0x6d,0x67,0x69,0x3c, +0x6c,0xa2,0x5f,0x6f,0x17,0x66,0x74,0x64,0x6f,0x74,0x74,0x65,0x64,0x57,0x13,0x6d, +0x70,0x6c,0x65,3,0x63,0x50,0x6c,0x68,0x74,0x8a,0x75,0x1e,0x70,0x70,0x65,0x72, +0x63,0x61,0x73,0x65,0x6d,0x61,0x70,0x70,0x69,0x6e,0x67,0xd9,0x40,9,0x19,0x61, +0x73,0x65,0x66,0x6f,0x6c,0x64,0x69,0x6e,0x67,0xd9,0x40,6,0x1e,0x6f,0x77,0x65, +0x72,0x63,0x61,0x73,0x65,0x6d,0x61,0x70,0x70,0x69,0x6e,0x67,0xd9,0x40,7,0x1e, +0x69,0x74,0x6c,0x65,0x63,0x61,0x73,0x65,0x6d,0x61,0x70,0x70,0x69,0x6e,0x67,0xd9, +0x40,8,0x10,0x63,0xd9,0x40,7,0x62,0xc3,0x13,0x63,0x34,0x64,0x57,0x65,0x6e, +0x66,0x10,0x63,0xd9,0x40,6,0xc2,0xa,2,0x66,0xd9,0x40,6,0x72,0x28,0x78, +0xd9,0x70,0,0x12,0x69,0x70,0x74,0xc2,0xa,0x19,0x65,0x78,0x74,0x65,0x6e,0x73, +0x69,0x6f,0x6e,0x73,0xd9,0x70,0,1,0x67,0x6a,0x6e,1,0x73,0x54,0x74,0x13, +0x65,0x6e,0x63,0x65,1,0x62,0x34,0x74,0x16,0x65,0x72,0x6d,0x69,0x6e,0x61,0x6c, +0x67,0x13,0x72,0x65,0x61,0x6b,0xc3,0x13,0x14,0x69,0x74,0x69,0x76,0x65,0x65,1, +0x6d,0x2e,0x73,0x13,0x74,0x61,0x72,0x74,0x73,0x19,0x65,0x6e,0x74,0x73,0x74,0x61, +0x72,0x74,0x65,0x72,0x73,3,0x63,0x66,0x65,0x72,0x69,0x98,0x72,0x19,0x61,0x69, +0x6c,0x63,0x61,0x6e,0x6f,0x6e,0x69,0x63,0x1f,0x61,0x6c,0x63,0x6f,0x6d,0x62,0x69, +0x6e,0x69,0x6e,0x67,0x63,0x6c,0x61,0x73,0x73,0xc3,0x11,0xd8,0x40,0xa,0x11,0x63, +0x63,0xc3,0x11,0x11,0x72,0x6d,0x58,0x1e,0x69,0x6e,0x61,0x6c,0x70,0x75,0x6e,0x63, +0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0x59,0x1d,0x74,0x6c,0x65,0x63,0x61,0x73,0x65, +0x6d,0x61,0x70,0x70,0x69,0x6e,0x67,0xd9,0x40,0xa,0x6d,0x70,0x6e,0x76,0x70,0xa2, +0xf1,0x71,0xa4,0x43,0x72,2,0x61,0x28,0x65,0x32,0x69,0x9d,0x14,0x64,0x69,0x63, +0x61,0x6c,0x55,0x1e,0x67,0x69,0x6f,0x6e,0x61,0x6c,0x69,0x6e,0x64,0x69,0x63,0x61, +0x74,0x6f,0x72,0x9d,0x12,0x61,0x74,0x68,0x4f,6,0x6f,0x39,0x6f,0x32,0x74,0xc3, +9,0x75,0x54,0x76,0xd9,0x30,0,0x12,0x6e,0x63,0x68,0x1f,0x61,0x72,0x61,0x63, +0x74,0x65,0x72,0x63,0x6f,0x64,0x65,0x70,0x6f,0x69,0x6e,0x74,0x51,0x14,0x6d,0x65, +0x72,0x69,0x63,1,0x74,0x32,0x76,0x13,0x61,0x6c,0x75,0x65,0xd9,0x30,0,0x12, +0x79,0x70,0x65,0xc3,9,0x61,0xa2,0x77,0x63,0xa2,0x82,0x66,2,0x63,0x98,0x64, +0xa2,0x53,0x6b,1,0x63,0x56,0x64,1,0x69,0x42,0x71,1,0x63,0xc3,0xd,0x75, +0x17,0x69,0x63,0x6b,0x63,0x68,0x65,0x63,0x6b,0xc3,0xd,0x13,0x6e,0x65,0x72,0x74, +0x6d,1,0x69,0x42,0x71,1,0x63,0xc3,0xf,0x75,0x17,0x69,0x63,0x6b,0x63,0x68, +0x65,0x63,0x6b,0xc3,0xf,0x13,0x6e,0x65,0x72,0x74,0x71,1,0x69,0x42,0x71,1, +0x63,0xc3,0xe,0x75,0x17,0x69,0x63,0x6b,0x63,0x68,0x65,0x63,0x6b,0xc3,0xe,0x13, +0x6e,0x65,0x72,0x74,0x6f,1,0x69,0x42,0x71,1,0x63,0xc3,0xc,0x75,0x17,0x69, +0x63,0x6b,0x63,0x68,0x65,0x63,0x6b,0xc3,0xc,0x13,0x6e,0x65,0x72,0x74,0x6b,0xd8, +0x40,5,1,0x31,0xd9,0x40,0xb,0x6d,0x10,0x65,0xd9,0x40,5,0x12,0x68,0x61, +0x72,0x51,2,0x61,0x6c,0x63,0xa2,0x4c,0x72,1,0x65,0x2a,0x69,0x11,0x6e,0x74, +0x7f,0x16,0x70,0x65,0x6e,0x64,0x65,0x64,0x63,0x1f,0x6f,0x6e,0x63,0x61,0x74,0x65, +0x6e,0x61,0x74,0x69,0x6f,0x6e,0x6d,0x61,0x72,0x6b,0x9f,0x10,0x74,2,0x73,0x2c, +0x74,0x30,0x77,0x10,0x73,0x77,0x11,0x79,0x6e,0x75,0x12,0x65,0x72,0x6e,1,0x73, +0x38,0x77,0x18,0x68,0x69,0x74,0x65,0x73,0x70,0x61,0x63,0x65,0x77,0x14,0x79,0x6e, +0x74,0x61,0x78,0x75,0x10,0x6d,0x9f,1,0x6d,0x3c,0x75,0x1a,0x6f,0x74,0x61,0x74, +0x69,0x6f,0x6e,0x6d,0x61,0x72,0x6b,0x53,0x12,0x61,0x72,0x6b,0x53,0x66,0xc1,0xf8, +0x69,0xc1,0x3c,0x69,0xa2,0x6f,0x6a,0xa4,9,0x6c,4,0x62,0xc3,8,0x63,0x8c, +0x65,0x98,0x69,0xa2,0x56,0x6f,2,0x65,0x4b,0x67,0x4c,0x77,0x11,0x65,0x72,0x4c, +0x13,0x63,0x61,0x73,0x65,0x4c,0x16,0x6d,0x61,0x70,0x70,0x69,0x6e,0x67,0xd9,0x40, +4,0x11,0x69,0x63,0x1f,0x61,0x6c,0x6f,0x72,0x64,0x65,0x72,0x65,0x78,0x63,0x65, +0x70,0x74,0x69,0x6f,0x6e,0x4b,0xd8,0x40,4,0x11,0x63,0x63,0xc3,0x10,0x18,0x61, +0x64,0x63,0x61,0x6e,0x6f,0x6e,0x69,0x63,0x1f,0x61,0x6c,0x63,0x6f,0x6d,0x62,0x69, +0x6e,0x69,0x6e,0x67,0x63,0x6c,0x61,0x73,0x73,0xc3,0x10,0x16,0x6e,0x65,0x62,0x72, +0x65,0x61,0x6b,0xc3,8,2,0x64,0x4a,0x6e,0xa2,0x5b,0x73,1,0x63,0xd9,0x40, +3,0x6f,0x16,0x63,0x6f,0x6d,0x6d,0x65,0x6e,0x74,0xd9,0x40,3,2,0x63,0x80, +0x65,0x90,0x73,0x40,1,0x62,0x52,0x74,0x46,1,0x61,0x40,0x72,0x1c,0x69,0x6e, +0x61,0x72,0x79,0x6f,0x70,0x65,0x72,0x61,0x74,0x6f,0x72,0x47,0x11,0x72,0x74,0x41, +0x44,0x1c,0x69,0x6e,0x61,0x72,0x79,0x6f,0x70,0x65,0x72,0x61,0x74,0x6f,0x72,0x45, +0x3e,0x16,0x6f,0x6e,0x74,0x69,0x6e,0x75,0x65,0x3f,0x10,0x6f,0x42,0x16,0x67,0x72, +0x61,0x70,0x68,0x69,0x63,0x43,2,0x64,0x2e,0x70,0x86,0x73,0x10,0x63,0xc3,0x17, +0x11,0x69,0x63,1,0x70,0x46,0x73,0x1e,0x79,0x6c,0x6c,0x61,0x62,0x69,0x63,0x63, +0x61,0x74,0x65,0x67,0x6f,0x72,0x79,0xc3,0x17,0x10,0x6f,0x1f,0x73,0x69,0x74,0x69, +0x6f,0x6e,0x61,0x6c,0x63,0x61,0x74,0x65,0x67,0x6f,0x72,0x79,0xc3,0x16,0x10,0x63, +0xc3,0x16,2,0x67,0xc3,6,0x6f,0x26,0x74,0xc3,7,0x11,0x69,0x6e,1,0x63, +0x4a,0x69,0x11,0x6e,0x67,1,0x67,0x2e,0x74,0x12,0x79,0x70,0x65,0xc3,7,0x13, +0x72,0x6f,0x75,0x70,0xc3,6,0x48,0x15,0x6f,0x6e,0x74,0x72,0x6f,0x6c,0x49,0x66, +0x86,0x67,0xa2,0x4a,0x68,3,0x61,0x36,0x65,0x58,0x73,0x68,0x79,0x13,0x70,0x68, +0x65,0x6e,0x3d,0x1f,0x6e,0x67,0x75,0x6c,0x73,0x79,0x6c,0x6c,0x61,0x62,0x6c,0x65, +0x74,0x79,0x70,0x65,0xc3,0xb,0x10,0x78,0x3a,0x14,0x64,0x69,0x67,0x69,0x74,0x3b, +0x10,0x74,0xc3,0xb,0x16,0x75,0x6c,0x6c,0x63,0x6f,0x6d,0x70,0x1f,0x6f,0x73,0x69, +0x74,0x69,0x6f,0x6e,0x65,0x78,0x63,0x6c,0x75,0x73,0x69,0x6f,0x6e,0x33,2,0x63, +0xa2,0x44,0x65,0xa2,0x4b,0x72,3,0x61,0x34,0x62,0x84,0x65,0x8a,0x6c,0x12,0x69, +0x6e,0x6b,0x39,0x11,0x70,0x68,0x7c,0x12,0x65,0x6d,0x65,3,0x62,0x5e,0x63,0x30, +0x65,0x48,0x6c,0x12,0x69,0x6e,0x6b,0x39,0x1a,0x6c,0x75,0x73,0x74,0x65,0x72,0x62, +0x72,0x65,0x61,0x6b,0xc3,0x12,0x14,0x78,0x74,0x65,0x6e,0x64,0x37,0x12,0x61,0x73, +0x65,0x35,0x11,0x78,0x74,0x37,0xc2,5,1,0x62,0xc3,0x12,0x6d,0xd9,0x20,0, +0x1c,0x6e,0x65,0x72,0x61,0x6c,0x63,0x61,0x74,0x65,0x67,0x6f,0x72,0x79,0xc2,5, +0x13,0x6d,0x61,0x73,0x6b,0xd9,0x20,0,0x61,0xa2,0x90,0x62,0xa2,0xbe,0x63,0xa4, +0x30,0x64,0xa4,0xfd,0x65,5,0x6d,0x63,0x6d,0x6e,0x70,0xa2,0x59,0x78,0x10,0x74, +0x30,1,0x65,0x2c,0x70,0x12,0x69,0x63,0x74,0xa1,0x12,0x6e,0x64,0x65,1,0x64, +0x24,0x72,0x31,0x1b,0x70,0x69,0x63,0x74,0x6f,0x67,0x72,0x61,0x70,0x68,0x69,0x63, +0xa1,0x10,0x6f,1,0x64,0x97,0x6a,0x10,0x69,0x92,2,0x63,0x40,0x6d,0x50,0x70, +0x1a,0x72,0x65,0x73,0x65,0x6e,0x74,0x61,0x74,0x69,0x6f,0x6e,0x95,0x17,0x6f,0x6d, +0x70,0x6f,0x6e,0x65,0x6e,0x74,0x9b,0x16,0x6f,0x64,0x69,0x66,0x69,0x65,0x72,0x96, +0x13,0x62,0x61,0x73,0x65,0x99,0x12,0x72,0x65,0x73,0x95,0x61,0x30,0x62,0x4e,0x63, +0x12,0x6f,0x6d,0x70,0x9b,0xc2,4,0x1b,0x73,0x74,0x61,0x73,0x69,0x61,0x6e,0x77, +0x69,0x64,0x74,0x68,0xc3,4,0x12,0x61,0x73,0x65,0x99,3,0x67,0x44,0x68,0x4a, +0x6c,0x4e,0x73,0x1a,0x63,0x69,0x69,0x68,0x65,0x78,0x64,0x69,0x67,0x69,0x74,0x23, +0x10,0x65,0xd9,0x40,0,0x11,0x65,0x78,0x23,1,0x6e,0x38,0x70,0x11,0x68,0x61, +0x20,0x14,0x62,0x65,0x74,0x69,0x63,0x21,0x11,0x75,0x6d,0x79,4,0x63,0xc3,0, +0x69,0x3e,0x6c,0xa2,0x57,0x6d,0xa2,0x64,0x70,1,0x62,0xd9,0x40,0xd,0x74,0xc3, +0x15,0x11,0x64,0x69,2,0x63,0x54,0x6d,0x74,0x70,0x1b,0x61,0x69,0x72,0x65,0x64, +0x62,0x72,0x61,0x63,0x6b,0x65,0x74,0xd8,0x40,0xd,0x13,0x74,0x79,0x70,0x65,0xc3, +0x15,0x24,1,0x6c,0x30,0x6f,0x14,0x6e,0x74,0x72,0x6f,0x6c,0x25,0x12,0x61,0x73, +0x73,0xc3,0,0x26,0x14,0x69,0x72,0x72,0x6f,0x72,1,0x65,0x38,0x69,0x16,0x6e, +0x67,0x67,0x6c,0x79,0x70,0x68,0xd9,0x40,1,0x10,0x64,0x27,2,0x61,0x32,0x6b, +0xc3,1,0x6f,0x11,0x63,0x6b,0xc3,1,0x11,0x6e,0x6b,0x7b,0x10,0x67,0xd9,0x40, +1,6,0x68,0x7c,0x68,0x54,0x69,0x85,0x6f,0xa2,0x6f,0x77,4,0x63,0x30,0x6b, +0x36,0x6c,0x87,0x74,0x8b,0x75,0x89,1,0x66,0x8d,0x6d,0x8f,0x11,0x63,0x66,0x91, +0x18,0x61,0x6e,0x67,0x65,0x73,0x77,0x68,0x65,0x6e,4,0x63,0x44,0x6c,0x6c,0x6e, +0x7e,0x74,0x98,0x75,0x18,0x70,0x70,0x65,0x72,0x63,0x61,0x73,0x65,0x64,0x89,0x12, +0x61,0x73,0x65,1,0x66,0x30,0x6d,0x14,0x61,0x70,0x70,0x65,0x64,0x8f,0x14,0x6f, +0x6c,0x64,0x65,0x64,0x8d,0x18,0x6f,0x77,0x65,0x72,0x63,0x61,0x73,0x65,0x64,0x87, +0x1c,0x66,0x6b,0x63,0x63,0x61,0x73,0x65,0x66,0x6f,0x6c,0x64,0x65,0x64,0x91,0x18, +0x69,0x74,0x6c,0x65,0x63,0x61,0x73,0x65,0x64,0x8b,0x13,0x6d,0x70,0x65,0x78,0x33, +0x61,0x2e,0x63,0xa2,0x48,0x66,0xd9,0x40,2,1,0x6e,0x72,0x73,0x10,0x65,3, +0x64,0x83,0x66,0x3a,0x69,0x4a,0x73,0x17,0x65,0x6e,0x73,0x69,0x74,0x69,0x76,0x65, +0x65,0x15,0x6f,0x6c,0x64,0x69,0x6e,0x67,0xd9,0x40,2,0x17,0x67,0x6e,0x6f,0x72, +0x61,0x62,0x6c,0x65,0x85,0x13,0x6f,0x6e,0x69,0x63,0x1f,0x61,0x6c,0x63,0x6f,0x6d, +0x62,0x69,0x6e,0x69,0x6e,0x67,0x63,0x6c,0x61,0x73,0x73,0xc3,2,0x10,0x63,0xc3, +2,3,0x61,0x30,0x65,0x34,0x69,0xa2,0x41,0x74,0xc3,3,0x11,0x73,0x68,0x29, +2,0x63,0x3a,0x66,0x58,0x70,0x2c,0x16,0x72,0x65,0x63,0x61,0x74,0x65,0x64,0x2d, +0x1d,0x6f,0x6d,0x70,0x6f,0x73,0x69,0x74,0x69,0x6f,0x6e,0x74,0x79,0x70,0x65,0xc3, +3,0x15,0x61,0x75,0x6c,0x74,0x69,0x67,0x1f,0x6e,0x6f,0x72,0x61,0x62,0x6c,0x65, +0x63,0x6f,0x64,0x65,0x70,0x6f,0x69,0x6e,0x74,0x2b,0x2a,0x10,0x61,0x2e,0x15,0x63, +0x72,0x69,0x74,0x69,0x63,0x2f,3,0x66,0x34,0x6e,0x3e,0x74,0x42,0x79,0x22,0x11, +0x65,0x73,0x23,0x20,0x13,0x61,0x6c,0x73,0x65,0x21,0x20,0x10,0x6f,0x21,0x22,0x12, +0x72,0x75,0x65,0x23,0xb,0x6b,0x5b,0x6f,0x23,0x6f,0x3c,0x72,0x4c,0x76,1,0x69, +0x24,0x72,0x33,0x13,0x72,0x61,0x6d,0x61,0x33,0x10,0x76,0x22,0x14,0x65,0x72,0x6c, +0x61,0x79,0x23,0xa2,0xe2,0x13,0x69,0x67,0x68,0x74,0xa3,0xe2,0x6b,0x58,0x6c,0x74, +0x6e,3,0x6b,0x2f,0x6f,0x30,0x72,0x21,0x75,0x12,0x6b,0x74,0x61,0x2f,0x19,0x74, +0x72,0x65,0x6f,0x72,0x64,0x65,0x72,0x65,0x64,0x21,1,0x61,0x24,0x76,0x31,0x18, +0x6e,0x61,0x76,0x6f,0x69,0x63,0x69,0x6e,0x67,0x31,0xa2,0xe0,0x12,0x65,0x66,0x74, +0xa3,0xe0,0x64,0x45,0x64,0x4e,0x68,0x88,0x69,1,0x6f,0x26,0x73,0xa3,0xf0,0x1a, +0x74,0x61,0x73,0x75,0x62,0x73,0x63,0x72,0x69,0x70,0x74,0xa3,0xf0,2,0x61,0xa3, +0xea,0x62,0xa3,0xe9,0x6f,0x13,0x75,0x62,0x6c,0x65,1,0x61,0x30,0x62,0x13,0x65, +0x6c,0x6f,0x77,0xa3,0xe9,0x13,0x62,0x6f,0x76,0x65,0xa3,0xea,0x12,0x61,0x6e,0x72, +0x2c,0x15,0x65,0x61,0x64,0x69,0x6e,0x67,0x2d,0x61,0xa2,0x7b,0x62,0xa2,0xd4,0x63, +0x11,0x63,0x63,4,0x31,0x3c,0x32,0xa2,0x42,0x33,0xa2,0x56,0x38,0xa2,0x64,0x39, +0x10,0x31,0xa3,0x5b,9,0x35,0xa,0x35,0x3f,0x36,0x41,0x37,0x43,0x38,0x45,0x39, +0x47,0x30,0x30,0x31,0x3c,0x32,0x42,0x33,0x4e,0x34,0x3d,0x34,1,0x33,0xa3,0x67, +0x37,0xa3,0x6b,0x36,0x10,0x38,0xa3,0x76,0x38,1,0x32,0xa3,0x7a,0x39,0xa3,0x81, +0x3a,2,0x30,0xa3,0x82,0x32,0xa3,0x84,0x33,0xa3,0x85,9,0x35,0xa,0x35,0x53, +0x36,0x55,0x37,0x57,0x38,0x59,0x39,0x5b,0x30,0x49,0x31,0x4b,0x32,0x4d,0x33,0x4f, +0x34,0x51,6,0x33,8,0x33,0x63,0x34,0x65,0x35,0x67,0x36,0x69,0x30,0x5d,0x31, +0x5f,0x32,0x61,0x10,0x34,0xa3,0x54,0xa2,0xe6,3,0x62,0xa0,0x6c,0xa3,0xe4,0x72, +0xa3,0xe8,0x74,2,0x61,0x74,0x62,0x7c,0x74,0x14,0x61,0x63,0x68,0x65,0x64,1, +0x61,0x3e,0x62,0x13,0x65,0x6c,0x6f,0x77,0xa2,0xca,0x13,0x6c,0x65,0x66,0x74,0xa3, +0xc8,0x13,0x62,0x6f,0x76,0x65,0xa2,0xd6,0x14,0x72,0x69,0x67,0x68,0x74,0xa3,0xd8, +0xa2,0xd6,0x10,0x72,0xa3,0xd8,0xa2,0xca,0x10,0x6c,0xa3,0xc8,0x12,0x6f,0x76,0x65, +0xa2,0xe6,1,0x6c,0x30,0x72,0x13,0x69,0x67,0x68,0x74,0xa3,0xe8,0x12,0x65,0x66, +0x74,0xa3,0xe4,0xa2,0xdc,2,0x65,0x2c,0x6c,0xa3,0xda,0x72,0xa3,0xde,0x12,0x6c, +0x6f,0x77,0xa2,0xdc,1,0x6c,0x30,0x72,0x13,0x69,0x67,0x68,0x74,0xa3,0xde,0x12, +0x65,0x66,0x74,0xa3,0xda,0xb,0x6e,0xc0,0xca,0x72,0x5f,0x72,0x46,0x73,0xa2,0x48, +0x77,1,0x68,0x24,0x73,0x33,0x17,0x69,0x74,0x65,0x73,0x70,0x61,0x63,0x65,0x33, +0x22,1,0x69,0x30,0x6c,2,0x65,0x3d,0x69,0x4b,0x6f,0x3f,0x18,0x67,0x68,0x74, +0x74,0x6f,0x6c,0x65,0x66,0x74,0x22,2,0x65,0x38,0x69,0x48,0x6f,0x16,0x76,0x65, +0x72,0x72,0x69,0x64,0x65,0x3f,0x17,0x6d,0x62,0x65,0x64,0x64,0x69,0x6e,0x67,0x3d, +0x15,0x73,0x6f,0x6c,0x61,0x74,0x65,0x4b,0x30,0x1e,0x65,0x67,0x6d,0x65,0x6e,0x74, +0x73,0x65,0x70,0x61,0x72,0x61,0x74,0x6f,0x72,0x31,0x6e,0xa2,0x41,0x6f,0xa2,0x53, +0x70,2,0x61,0x66,0x64,0x86,0x6f,0x1b,0x70,0x64,0x69,0x72,0x65,0x63,0x74,0x69, +0x6f,0x6e,0x61,0x6c,1,0x66,0x32,0x69,0x15,0x73,0x6f,0x6c,0x61,0x74,0x65,0x4d, +0x14,0x6f,0x72,0x6d,0x61,0x74,0x41,0x1f,0x72,0x61,0x67,0x72,0x61,0x70,0x68,0x73, +0x65,0x70,0x61,0x72,0x61,0x74,0x6f,0x72,0x2f,1,0x66,0x41,0x69,0x4d,1,0x6f, +0x28,0x73,0x10,0x6d,0x43,0x1b,0x6e,0x73,0x70,0x61,0x63,0x69,0x6e,0x67,0x6d,0x61, +0x72,0x6b,0x43,1,0x6e,0x35,0x74,0x19,0x68,0x65,0x72,0x6e,0x65,0x75,0x74,0x72, +0x61,0x6c,0x35,0x65,0x88,0x65,0x98,0x66,0xa2,0x6a,0x6c,0x20,1,0x65,0x30,0x72, +2,0x65,0x37,0x69,0x49,0x6f,0x39,0x18,0x66,0x74,0x74,0x6f,0x72,0x69,0x67,0x68, +0x74,0x20,2,0x65,0x38,0x69,0x48,0x6f,0x16,0x76,0x65,0x72,0x72,0x69,0x64,0x65, +0x39,0x17,0x6d,0x62,0x65,0x64,0x64,0x69,0x6e,0x67,0x37,0x15,0x73,0x6f,0x6c,0x61, +0x74,0x65,0x49,3,0x6e,0x25,0x73,0x27,0x74,0x29,0x75,0x15,0x72,0x6f,0x70,0x65, +0x61,0x6e,2,0x6e,0x3c,0x73,0x46,0x74,0x18,0x65,0x72,0x6d,0x69,0x6e,0x61,0x74, +0x6f,0x72,0x29,0x14,0x75,0x6d,0x62,0x65,0x72,0x25,0x17,0x65,0x70,0x61,0x72,0x61, +0x74,0x6f,0x72,0x27,1,0x69,0x28,0x73,0x10,0x69,0x47,0x1f,0x72,0x73,0x74,0x73, +0x74,0x72,0x6f,0x6e,0x67,0x69,0x73,0x6f,0x6c,0x61,0x74,0x65,0x47,0x61,0x4e,0x62, +0x84,0x63,1,0x6f,0x24,0x73,0x2d,0x1c,0x6d,0x6d,0x6f,0x6e,0x73,0x65,0x70,0x61, +0x72,0x61,0x74,0x6f,0x72,0x2d,2,0x6c,0x3b,0x6e,0x2b,0x72,0x13,0x61,0x62,0x69, +0x63,1,0x6c,0x30,0x6e,0x14,0x75,0x6d,0x62,0x65,0x72,0x2b,0x14,0x65,0x74,0x74, +0x65,0x72,0x3b,0x2e,1,0x6e,0x45,0x6f,0x1c,0x75,0x6e,0x64,0x61,0x72,0x79,0x6e, +0x65,0x75,0x74,0x72,0x61,0x6c,0x45,0,0x16,0x6d,0xc8,0xc8,0x74,0xc1,0xee,0x77, +0x6a,0x77,0x48,0x79,0x70,0x7a,0x1d,0x61,0x6e,0x61,0x62,0x61,0x7a,0x61,0x72,0x73, +0x71,0x75,0x61,0x72,0x65,0xa5,0x18,0x10,0x61,1,0x6e,0x36,0x72,0x16,0x61,0x6e, +0x67,0x63,0x69,0x74,0x69,0xa3,0xfc,0x12,0x63,0x68,0x6f,0xa5,0x2c,1,0x65,0x88, +0x69,2,0x6a,0x3c,0x72,0x68,0x73,0x17,0x79,0x6c,0x6c,0x61,0x62,0x6c,0x65,0x73, +0xa3,0x48,0x12,0x69,0x6e,0x67,0xa2,0x74,0x1e,0x68,0x65,0x78,0x61,0x67,0x72,0x61, +0x6d,0x73,0x79,0x6d,0x62,0x6f,0x6c,0x73,0xa3,0x74,0x16,0x61,0x64,0x69,0x63,0x61, +0x6c,0x73,0xa3,0x49,0x13,0x7a,0x69,0x64,0x69,0xa5,0x34,0x74,0xa2,0x59,0x75,0xa4, +0x35,0x76,2,0x61,0x36,0x65,0x7a,0x73,0xa2,0x6c,0x12,0x73,0x75,0x70,0xa3,0x7d, +1,0x69,0xa3,0x9f,0x72,0x1e,0x69,0x61,0x74,0x69,0x6f,0x6e,0x73,0x65,0x6c,0x65, +0x63,0x74,0x6f,0x72,0x73,0xa2,0x6c,0x19,0x73,0x75,0x70,0x70,0x6c,0x65,0x6d,0x65, +0x6e,0x74,0xa3,0x7d,1,0x64,0x3c,0x72,0x19,0x74,0x69,0x63,0x61,0x6c,0x66,0x6f, +0x72,0x6d,0x73,0xa3,0x91,0x14,0x69,0x63,0x65,0x78,0x74,0xa2,0xaf,0x16,0x65,0x6e, +0x73,0x69,0x6f,0x6e,0x73,0xa3,0xaf,4,0x61,0x68,0x65,0xa2,0xad,0x68,0xa2,0xb0, +0x69,0xa2,0xb8,0x72,0x1c,0x61,0x6e,0x73,0x70,0x6f,0x72,0x74,0x61,0x6e,0x64,0x6d, +0x61,0x70,0xa2,0xcf,0x16,0x73,0x79,0x6d,0x62,0x6f,0x6c,0x73,0xa3,0xcf,4,0x67, +0x7e,0x69,0xa2,0x41,0x6b,0xa2,0x6a,0x6d,0xa2,0x6c,0x6e,0x12,0x67,0x75,0x74,0xa4, +0x10,1,0x63,0x40,0x73,0x11,0x75,0x70,0xa4,0x33,0x16,0x70,0x6c,0x65,0x6d,0x65, +0x6e,0x74,0xa5,0x33,0x18,0x6f,0x6d,0x70,0x6f,0x6e,0x65,0x6e,0x74,0x73,0xa5,0x11, +2,0x61,0x2a,0x62,0x32,0x73,0xa3,0x60,0x12,0x6c,0x6f,0x67,0xa3,0x62,0x13,0x61, +0x6e,0x77,0x61,0xa3,0x65,3,0x6c,0x52,0x74,0x56,0x76,0x5e,0x78,0x16,0x75,0x61, +0x6e,0x6a,0x69,0x6e,0x67,0xa2,0x7c,0x16,0x73,0x79,0x6d,0x62,0x6f,0x6c,0x73,0xa3, +0x7c,0x10,0x65,0xa3,0x70,0x12,0x68,0x61,0x6d,0xa3,0xae,0x12,0x69,0x65,0x74,0xa3, +0xb7,0x11,0x72,0x69,0xa3,0xdc,0x11,0x69,0x6c,0x48,0x12,0x73,0x75,0x70,0xa4,0x2b, +0x16,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa5,0x2b,0x13,0x6c,0x75,0x67,0x75,0x4b, +0x10,0x61,1,0x61,0x24,0x69,0x53,0x11,0x6e,0x61,0x3d,2,0x62,0x34,0x66,0x3c, +0x72,0x13,0x68,0x75,0x74,0x61,0xa3,0xfb,0x13,0x65,0x74,0x61,0x6e,0x57,0x14,0x69, +0x6e,0x61,0x67,0x68,0xa3,0x90,2,0x63,0x82,0x67,0x92,0x6e,0x1f,0x69,0x66,0x69, +0x65,0x64,0x63,0x61,0x6e,0x61,0x64,0x69,0x61,0x6e,0x61,0x62,0x6f,0x1f,0x72,0x69, +0x67,0x69,0x6e,0x61,0x6c,0x73,0x79,0x6c,0x6c,0x61,0x62,0x69,0x63,0x73,0x62,0x17, +0x65,0x78,0x74,0x65,0x6e,0x64,0x65,0x64,0xa3,0xad,0x11,0x61,0x73,0x62,0x12,0x65, +0x78,0x74,0xa3,0xad,0x15,0x61,0x72,0x69,0x74,0x69,0x63,0xa3,0x78,0x70,0xc3,0x4b, +0x70,0xa6,0x61,0x72,0xa8,0x1d,0x73,7,0x6f,0xc1,0xbe,0x6f,0xa2,0x69,0x70,0xa2, +0x85,0x75,0xa2,0xa4,0x79,2,0x6c,0x50,0x6d,0x62,0x72,0x12,0x69,0x61,0x63,0x3a, +0x12,0x73,0x75,0x70,0xa4,0x17,0x16,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa5,0x17, +0x17,0x6f,0x74,0x69,0x6e,0x61,0x67,0x72,0x69,0xa3,0x8f,0x13,0x62,0x6f,0x6c,0x73, +1,0x61,0x4c,0x66,0x10,0x6f,0x1f,0x72,0x6c,0x65,0x67,0x61,0x63,0x79,0x63,0x6f, +0x6d,0x70,0x75,0x74,0x69,0x6e,0x67,0xa5,0x32,0x1f,0x6e,0x64,0x70,0x69,0x63,0x74, +0x6f,0x67,0x72,0x61,0x70,0x68,0x73,0x65,0x78,0x74,1,0x61,0xa5,0x2a,0x65,0x14, +0x6e,0x64,0x65,0x64,0x61,0xa5,0x2a,2,0x67,0x34,0x72,0x3e,0x79,0x13,0x6f,0x6d, +0x62,0x6f,0xa5,0x16,0x13,0x64,0x69,0x61,0x6e,0xa5,0x23,0x17,0x61,0x73,0x6f,0x6d, +0x70,0x65,0x6e,0x67,0xa3,0xda,1,0x61,0x32,0x65,0x14,0x63,0x69,0x61,0x6c,0x73, +0xa3,0x56,0x12,0x63,0x69,0x6e,0x1f,0x67,0x6d,0x6f,0x64,0x69,0x66,0x69,0x65,0x72, +0x6c,0x65,0x74,0x74,0x65,0x72,0x73,0x2d,2,0x6e,0x48,0x70,0x76,0x74,0x1d,0x74, +0x6f,0x6e,0x73,0x69,0x67,0x6e,0x77,0x72,0x69,0x74,0x69,0x6e,0x67,0xa5,6,0x15, +0x64,0x61,0x6e,0x65,0x73,0x65,0xa2,0x9b,0x12,0x73,0x75,0x70,0xa2,0xdb,0x16,0x70, +0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa3,0xdb,4,0x61,0xa2,0xa8,0x65,0x5c,0x6d,0x9e, +0x70,0xa2,0x4b,0x73,0x13,0x79,0x6d,0x62,0x6f,0x1f,0x6c,0x73,0x61,0x6e,0x64,0x70, +0x69,0x63,0x74,0x6f,0x67,0x72,0x61,0x70,0x68,0x73,0xa5,5,0x10,0x72,1,0x61, +0x4e,0x73,0x12,0x63,0x72,0x69,0x1f,0x70,0x74,0x73,0x61,0x6e,0x64,0x73,0x75,0x62, +0x73,0x63,0x72,0x69,0x70,0x74,0x73,0x73,0x14,0x6e,0x64,0x73,0x75,0x62,0x73,0x1b, +0x61,0x74,0x68,0x6f,0x70,0x65,0x72,0x61,0x74,0x6f,0x72,0x73,0xa3,0x6a,1,0x6c, +0x40,0x75,1,0x61,0x6e,0x6e,0x17,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0xa3, +0x8e,0x15,0x65,0x6d,0x65,0x6e,0x74,0x61,1,0x6c,0x50,0x72,0x1e,0x79,0x70,0x72, +0x69,0x76,0x61,0x74,0x65,0x75,0x73,0x65,0x61,0x72,0x65,0x61,1,0x61,0xa3,0x6d, +0x62,0xa3,0x6e,3,0x61,0x5c,0x6d,0x78,0x70,0xa2,0x41,0x73,0x13,0x79,0x6d,0x62, +0x6f,0x1f,0x6c,0x73,0x61,0x6e,0x64,0x70,0x69,0x63,0x74,0x6f,0x67,0x72,0x61,0x70, +0x68,0x73,0xa5,5,0x14,0x72,0x72,0x6f,0x77,0x73,2,0x61,0xa3,0x67,0x62,0xa3, +0x68,0x63,0xa3,0xfa,0x13,0x61,0x74,0x68,0x65,0x1f,0x6d,0x61,0x74,0x69,0x63,0x61, +0x6c,0x6f,0x70,0x65,0x72,0x61,0x74,0x6f,0x72,0x73,0xa3,0x6a,0x19,0x75,0x6e,0x63, +0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0xa3,0x8e,0x61,0x88,0x68,0xa2,0x48,0x69,0xa2, +0x71,0x6d,0x12,0x61,0x6c,0x6c,1,0x66,0x46,0x6b,0x15,0x61,0x6e,0x61,0x65,0x78, +0x74,0xa4,0x29,0x15,0x65,0x6e,0x73,0x69,0x6f,0x6e,0xa5,0x29,0x12,0x6f,0x72,0x6d, +1,0x73,0xa3,0x54,0x76,0x16,0x61,0x72,0x69,0x61,0x6e,0x74,0x73,0xa3,0x54,1, +0x6d,0x36,0x75,0x16,0x72,0x61,0x73,0x68,0x74,0x72,0x61,0xa3,0xa1,0x15,0x61,0x72, +0x69,0x74,0x61,0x6e,0xa3,0xac,1,0x61,0x52,0x6f,0x13,0x72,0x74,0x68,0x61,0x1f, +0x6e,0x64,0x66,0x6f,0x72,0x6d,0x61,0x74,0x63,0x6f,0x6e,0x74,0x72,0x6f,0x6c,0x73, +0xa3,0xf7,1,0x72,0x2e,0x76,0x12,0x69,0x61,0x6e,0xa3,0x79,0x12,0x61,0x64,0x61, +0xa3,0xd9,1,0x64,0x50,0x6e,0x13,0x68,0x61,0x6c,0x61,0x50,0x1d,0x61,0x72,0x63, +0x68,0x61,0x69,0x63,0x6e,0x75,0x6d,0x62,0x65,0x72,0x73,0xa3,0xf9,0x13,0x64,0x68, +0x61,0x6d,0xa3,0xf8,5,0x72,0x35,0x72,0x44,0x73,0x64,0x75,1,0x61,0xa3,0x4e, +0x6e,0x17,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0x71,0x17,0x69,0x76,0x61,0x74, +0x65,0x75,0x73,0x65,0xa2,0x4e,0x13,0x61,0x72,0x65,0x61,0xa3,0x4e,0x1b,0x61,0x6c, +0x74,0x65,0x72,0x70,0x61,0x68,0x6c,0x61,0x76,0x69,0xa3,0xf6,0x61,0x40,0x68,0x82, +0x6c,0x19,0x61,0x79,0x69,0x6e,0x67,0x63,0x61,0x72,0x64,0x73,0xa3,0xcc,2,0x68, +0x38,0x6c,0x4a,0x75,0x15,0x63,0x69,0x6e,0x68,0x61,0x75,0xa3,0xf5,0x17,0x61,0x77, +0x68,0x68,0x6d,0x6f,0x6e,0x67,0xa3,0xf3,0x15,0x6d,0x79,0x72,0x65,0x6e,0x65,0xa3, +0xf4,1,0x61,0x8e,0x6f,1,0x65,0x74,0x6e,0x16,0x65,0x74,0x69,0x63,0x65,0x78, +0x74,0xa2,0x72,1,0x65,0x2c,0x73,0x11,0x75,0x70,0xa3,0x8d,0x15,0x6e,0x73,0x69, +0x6f,0x6e,0x73,0xa2,0x72,0x19,0x73,0x75,0x70,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74, +0xa3,0x8d,0x15,0x6e,0x69,0x63,0x69,0x61,0x6e,0xa3,0x97,1,0x67,0x3e,0x69,0x13, +0x73,0x74,0x6f,0x73,0xa2,0xa6,0x13,0x64,0x69,0x73,0x63,0xa3,0xa6,0x12,0x73,0x70, +0x61,0xa3,0x96,1,0x65,0x5c,0x75,1,0x6d,0x2a,0x6e,0x11,0x69,0x63,0x67,0x10, +0x69,0xa2,0xc0,0x1d,0x6e,0x75,0x6d,0x65,0x72,0x61,0x6c,0x73,0x79,0x6d,0x62,0x6f, +0x6c,0x73,0xa3,0xc0,0x13,0x6a,0x61,0x6e,0x67,0xa3,0xa3,0x6d,0xa2,0xe6,0x6e,0xa8, +0x19,0x6f,6,0x70,0x63,0x70,0x56,0x72,0x8a,0x73,0xa2,0x4c,0x74,0x10,0x74,0x1f, +0x6f,0x6d,0x61,0x6e,0x73,0x69,0x79,0x61,0x71,0x6e,0x75,0x6d,0x62,0x65,0x72,0x73, +0xa5,0x28,0x18,0x74,0x69,0x63,0x61,0x6c,0x63,0x68,0x61,0x72,0x1f,0x61,0x63,0x74, +0x65,0x72,0x72,0x65,0x63,0x6f,0x67,0x6e,0x69,0x74,0x69,0x6f,0x6e,0x85,1,0x69, +0x46,0x6e,0x1e,0x61,0x6d,0x65,0x6e,0x74,0x61,0x6c,0x64,0x69,0x6e,0x67,0x62,0x61, +0x74,0x73,0xa3,0xf2,0x11,0x79,0x61,0x47,1,0x61,0x30,0x6d,0x13,0x61,0x6e,0x79, +0x61,0xa3,0x7a,0x11,0x67,0x65,0xa5,0xf,0x63,0xa2,0x71,0x67,0xa2,0x71,0x6c,1, +0x63,0xa2,0x62,0x64,5,0x70,0x38,0x70,0x36,0x73,0x56,0x74,0x14,0x75,0x72,0x6b, +0x69,0x63,0xa3,0xbf,0x11,0x65,0x72,1,0x6d,0x2e,0x73,0x12,0x69,0x61,0x6e,0xa3, +0x8c,0x11,0x69,0x63,0xa3,0xf1,0x10,0x6f,1,0x67,0x3a,0x75,0x18,0x74,0x68,0x61, +0x72,0x61,0x62,0x69,0x61,0x6e,0xa3,0xbb,0x13,0x64,0x69,0x61,0x6e,0xa5,0x22,0x68, +0x42,0x69,0x54,0x6e,0x1a,0x6f,0x72,0x74,0x68,0x61,0x72,0x61,0x62,0x69,0x61,0x6e, +0xa3,0xf0,0x17,0x75,0x6e,0x67,0x61,0x72,0x69,0x61,0x6e,0xa5,4,0x14,0x74,0x61, +0x6c,0x69,0x63,0xa3,0x58,0x13,0x68,0x69,0x6b,0x69,0xa3,0x9d,0x10,0x72,0x85,0x12, +0x68,0x61,0x6d,0x65,6,0x6f,0x86,0x6f,0x6c,0x72,0xa2,0x61,0x75,0xa2,0x62,0x79, +0x14,0x61,0x6e,0x6d,0x61,0x72,0x58,0x12,0x65,0x78,0x74,2,0x61,0xa3,0xb6,0x62, +0xa3,0xee,0x65,0x13,0x6e,0x64,0x65,0x64,1,0x61,0xa3,0xb6,0x62,0xa3,0xee,1, +0x64,0x52,0x6e,0x15,0x67,0x6f,0x6c,0x69,0x61,0x6e,0x6a,0x12,0x73,0x75,0x70,0xa4, +0xd,0x16,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa5,0xd,0x10,0x69,0xa2,0xec,0x13, +0x66,0x69,0x65,0x72,1,0x6c,0x3c,0x74,0x19,0x6f,0x6e,0x65,0x6c,0x65,0x74,0x74, +0x65,0x72,0x73,0xa3,0x8a,0x15,0x65,0x74,0x74,0x65,0x72,0x73,0x2d,0x10,0x6f,0xa3, +0xed,1,0x6c,0x44,0x73,0x11,0x69,0x63,0xa2,0x5c,0x18,0x61,0x6c,0x73,0x79,0x6d, +0x62,0x6f,0x6c,0x73,0xa3,0x5c,0x13,0x74,0x61,0x6e,0x69,0xa5,3,0x61,0xa2,0x9b, +0x65,0xa4,0x4c,0x69,1,0x61,0xa2,0x8f,0x73,0x10,0x63,5,0x70,0x18,0x70,0xa2, +0x71,0x73,0x36,0x74,0x17,0x65,0x63,0x68,0x6e,0x69,0x63,0x61,0x6c,0x81,0x15,0x79, +0x6d,0x62,0x6f,0x6c,0x73,0x8f,0x61,0xa2,0x66,0x65,0x46,0x6d,0x19,0x61,0x74,0x68, +0x73,0x79,0x6d,0x62,0x6f,0x6c,0x73,1,0x61,0xa3,0x66,0x62,0xa3,0x69,0x17,0x6c, +0x6c,0x61,0x6e,0x65,0x6f,0x75,0x73,2,0x6d,0x3a,0x73,0x6c,0x74,0x17,0x65,0x63, +0x68,0x6e,0x69,0x63,0x61,0x6c,0x81,0x11,0x61,0x74,0x1f,0x68,0x65,0x6d,0x61,0x74, +0x69,0x63,0x61,0x6c,0x73,0x79,0x6d,0x62,0x6f,0x6c,0x73,1,0x61,0xa3,0x66,0x62, +0xa3,0x69,0x15,0x79,0x6d,0x62,0x6f,0x6c,0x73,0x8e,0x12,0x61,0x6e,0x64,1,0x61, +0x3c,0x70,0x19,0x69,0x63,0x74,0x6f,0x67,0x72,0x61,0x70,0x68,0x73,0xa3,0xcd,0x14, +0x72,0x72,0x6f,0x77,0x73,0xa3,0x73,0x10,0x6f,0xa3,0xd8,7,0x72,0x6f,0x72,0x44, +0x73,0x4e,0x74,0x62,0x79,0x19,0x61,0x6e,0x6e,0x75,0x6d,0x65,0x72,0x61,0x6c,0x73, +0xa5,0x20,0x13,0x63,0x68,0x65,0x6e,0xa5,0xc,0x18,0x61,0x72,0x61,0x6d,0x67,0x6f, +0x6e,0x64,0x69,0xa5,0x14,0x10,0x68,2,0x61,0x3a,0x65,0x4a,0x6f,0x17,0x70,0x65, +0x72,0x61,0x74,0x6f,0x72,0x73,0x7f,0x16,0x6c,0x70,0x68,0x61,0x6e,0x75,0x6d,0xa3, +0x5d,0x16,0x6d,0x61,0x74,0x69,0x63,0x61,0x6c,1,0x61,0x36,0x6f,0x17,0x70,0x65, +0x72,0x61,0x74,0x6f,0x72,0x73,0x7f,0x11,0x6c,0x70,0x1f,0x68,0x61,0x6e,0x75,0x6d, +0x65,0x72,0x69,0x63,0x73,0x79,0x6d,0x62,0x6f,0x6c,0x73,0xa3,0x5d,0x68,0x50,0x6b, +0x7e,0x6c,0x88,0x6e,1,0x64,0x34,0x69,0x15,0x63,0x68,0x61,0x65,0x61,0x6e,0xa3, +0xea,0x12,0x61,0x69,0x63,0xa3,0xc6,1,0x61,0x3e,0x6a,0x12,0x6f,0x6e,0x67,0xa2, +0xaa,0x14,0x74,0x69,0x6c,0x65,0x73,0xa3,0xaa,0x13,0x6a,0x61,0x6e,0x69,0xa3,0xe9, +0x13,0x61,0x73,0x61,0x72,0xa5,0x1f,0x15,0x61,0x79,0x61,0x6c,0x61,0x6d,0x4f,3, +0x64,0x6c,0x65,0x7e,0x6e,0xa2,0x47,0x72,0x14,0x6f,0x69,0x74,0x69,0x63,1,0x63, +0x3c,0x68,0x19,0x69,0x65,0x72,0x6f,0x67,0x6c,0x79,0x70,0x68,0x73,0xa3,0xd7,0x15, +0x75,0x72,0x73,0x69,0x76,0x65,0xa3,0xd6,0x17,0x65,0x66,0x61,0x69,0x64,0x72,0x69, +0x6e,0xa5,0x21,0x17,0x74,0x65,0x69,0x6d,0x61,0x79,0x65,0x6b,0xa2,0xb8,0x12,0x65, +0x78,0x74,0xa2,0xd5,0x16,0x65,0x6e,0x73,0x69,0x6f,0x6e,0x73,0xa3,0xd5,0x18,0x64, +0x65,0x6b,0x69,0x6b,0x61,0x6b,0x75,0x69,0xa3,0xeb,6,0x6b,0x3b,0x6b,0x56,0x6f, +0x5a,0x75,0x64,0x79,0x11,0x69,0x61,0x1f,0x6b,0x65,0x6e,0x67,0x70,0x75,0x61,0x63, +0x68,0x75,0x65,0x68,0x6d,0x6f,0x6e,0x67,0xa5,0x27,0x10,0x6f,0xa3,0x92,0x14,0x62, +0x6c,0x6f,0x63,0x6b,0x21,1,0x6d,0x2c,0x73,0x11,0x68,0x75,0xa5,0x15,0x17,0x62, +0x65,0x72,0x66,0x6f,0x72,0x6d,0x73,0x7b,0x61,0x44,0x62,0x21,0x65,0x10,0x77,1, +0x61,0xa5,0xe,0x74,0x14,0x61,0x69,0x6c,0x75,0x65,0xa3,0x8b,1,0x62,0x38,0x6e, +0x17,0x64,0x69,0x6e,0x61,0x67,0x61,0x72,0x69,0xa5,0x26,0x15,0x61,0x74,0x61,0x65, +0x61,0x6e,0xa3,0xef,0x67,0xc4,0x32,0x6a,0xc1,0xb9,0x6a,0xa2,0xd5,0x6b,0xa2,0xee, +0x6c,4,0x61,0x54,0x65,0xa2,0x61,0x69,0xa2,0x78,0x6f,0xa2,0xb7,0x79,1,0x63, +0x2e,0x64,0x12,0x69,0x61,0x6e,0xa3,0xa9,0x12,0x69,0x61,0x6e,0xa3,0xa7,1,0x6f, +0x55,0x74,0x11,0x69,0x6e,1,0x31,0x82,0x65,0x11,0x78,0x74,4,0x61,0x5c,0x62, +0x29,0x63,0xa3,0x94,0x64,0xa3,0x95,0x65,0xa2,0xe7,0x13,0x6e,0x64,0x65,0x64,4, +0x61,0x36,0x62,0x29,0x63,0xa3,0x94,0x64,0xa3,0x95,0x65,0xa3,0xe7,0x26,0x18,0x64, +0x64,0x69,0x74,0x69,0x6f,0x6e,0x61,0x6c,0x6d,0x24,0x12,0x73,0x75,0x70,0x24,0x16, +0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0x25,1,0x70,0x42,0x74,0x1d,0x74,0x65,0x72, +0x6c,0x69,0x6b,0x65,0x73,0x79,0x6d,0x62,0x6f,0x6c,0x73,0x79,0x12,0x63,0x68,0x61, +0xa3,0x9c,2,0x6d,0x4e,0x6e,0x54,0x73,0x10,0x75,0xa2,0xb0,0x12,0x73,0x75,0x70, +0xa4,0x31,0x16,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa5,0x31,0x11,0x62,0x75,0xa3, +0x6f,0x12,0x65,0x61,0x72,1,0x61,0xa3,0xe8,0x62,1,0x69,0x38,0x73,0x17,0x79, +0x6c,0x6c,0x61,0x62,0x61,0x72,0x79,0xa3,0x75,0x17,0x64,0x65,0x6f,0x67,0x72,0x61, +0x6d,0x73,0xa3,0x76,0x1a,0x77,0x73,0x75,0x72,0x72,0x6f,0x67,0x61,0x74,0x65,0x73, +0xa3,0x4d,0x10,0x61,1,0x6d,0x32,0x76,0x14,0x61,0x6e,0x65,0x73,0x65,0xa3,0xb5, +0x10,0x6f,0x5c,0x12,0x65,0x78,0x74,1,0x61,0xa3,0xb4,0x62,0xa3,0xb9,1,0x61, +0xa2,0x43,0x68,4,0x61,0x40,0x69,0x50,0x6d,0x6e,0x6f,0x86,0x75,0x15,0x64,0x61, +0x77,0x61,0x64,0x69,0xa3,0xe6,0x16,0x72,0x6f,0x73,0x68,0x74,0x68,0x69,0xa3,0x89, +0x1d,0x74,0x61,0x6e,0x73,0x6d,0x61,0x6c,0x6c,0x73,0x63,0x72,0x69,0x70,0x74,0xa5, +0x30,0x11,0x65,0x72,0x68,0x16,0x73,0x79,0x6d,0x62,0x6f,0x6c,0x73,0xa3,0x71,0x12, +0x6a,0x6b,0x69,0xa3,0xe5,3,0x69,0x3a,0x6e,0x42,0x74,0xa2,0x51,0x79,0x13,0x61, +0x68,0x6c,0x69,0xa3,0xa2,0x12,0x74,0x68,0x69,0xa3,0xc1,3,0x61,0x34,0x62,0x76, +0x67,0x7c,0x6e,0x12,0x61,0x64,0x61,0x4d,1,0x65,0x40,0x73,0x11,0x75,0x70,0xa2, +0xcb,0x16,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa3,0xcb,0x11,0x78,0x74,1,0x61, +0xa5,0x13,0x65,0x14,0x6e,0x64,0x65,0x64,0x61,0xa5,0x13,0x11,0x75,0x6e,0xa3,0x42, +0x11,0x78,0x69,0x96,0x17,0x72,0x61,0x64,0x69,0x63,0x61,0x6c,0x73,0x97,0x14,0x61, +0x6b,0x61,0x6e,0x61,0x9e,1,0x65,0x4c,0x70,0x10,0x68,0x1f,0x6f,0x6e,0x65,0x74, +0x69,0x63,0x65,0x78,0x74,0x65,0x6e,0x73,0x69,0x6f,0x6e,0x73,0xa3,0x6b,0x11,0x78, +0x74,0xa3,0x6b,0x67,0xa2,0xb5,0x68,0xa4,0x84,0x69,3,0x64,0x4c,0x6d,0xa2,0x55, +0x6e,0xa2,0x62,0x70,0x13,0x61,0x65,0x78,0x74,0x2a,0x16,0x65,0x6e,0x73,0x69,0x6f, +0x6e,0x73,0x2b,1,0x63,0x99,0x65,0x17,0x6f,0x67,0x72,0x61,0x70,0x68,0x69,0x63, +1,0x64,0x56,0x73,0x15,0x79,0x6d,0x62,0x6f,0x6c,0x73,0xa4,0xb,0x1d,0x61,0x6e, +0x64,0x70,0x75,0x6e,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0xa5,0xb,0x13,0x65, +0x73,0x63,0x72,0x1f,0x69,0x70,0x74,0x69,0x6f,0x6e,0x63,0x68,0x61,0x72,0x61,0x63, +0x74,0x65,0x72,0x73,0x99,0x1c,0x70,0x65,0x72,0x69,0x61,0x6c,0x61,0x72,0x61,0x6d, +0x61,0x69,0x63,0xa3,0xba,1,0x64,0x62,0x73,0x1b,0x63,0x72,0x69,0x70,0x74,0x69, +0x6f,0x6e,0x61,0x6c,0x70,0x61,1,0x68,0x32,0x72,0x14,0x74,0x68,0x69,0x61,0x6e, +0xa3,0xbd,0x13,0x6c,0x61,0x76,0x69,0xa3,0xbe,0x11,0x69,0x63,1,0x6e,0x3e,0x73, +0x1a,0x69,0x79,0x61,0x71,0x6e,0x75,0x6d,0x62,0x65,0x72,0x73,0xa5,0x1e,0x19,0x75, +0x6d,0x62,0x65,0x72,0x66,0x6f,0x72,0x6d,0x73,0xa3,0xb2,4,0x65,0x74,0x6c,0xa2, +0x82,0x6f,0xa2,0x9a,0x72,0xa2,0x9e,0x75,2,0x6a,0x34,0x6e,0x3e,0x72,0x14,0x6d, +0x75,0x6b,0x68,0x69,0x43,0x14,0x61,0x72,0x61,0x74,0x69,0x45,0x18,0x6a,0x61,0x6c, +0x61,0x67,0x6f,0x6e,0x64,0x69,0xa5,0x1c,1,0x6e,0xa2,0x46,0x6f,1,0x6d,0x6e, +0x72,0x13,0x67,0x69,0x61,0x6e,0x5a,1,0x65,0x40,0x73,0x11,0x75,0x70,0xa2,0x87, +0x16,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa3,0x87,0x11,0x78,0x74,0xa4,0x1b,0x14, +0x65,0x6e,0x64,0x65,0x64,0xa5,0x1b,0x1a,0x65,0x74,0x72,0x69,0x63,0x73,0x68,0x61, +0x70,0x65,0x73,0x8c,0x12,0x65,0x78,0x74,0xa2,0xe3,0x14,0x65,0x6e,0x64,0x65,0x64, +0xa3,0xe3,0x1e,0x65,0x72,0x61,0x6c,0x70,0x75,0x6e,0x63,0x74,0x75,0x61,0x74,0x69, +0x6f,0x6e,0x71,0x17,0x61,0x67,0x6f,0x6c,0x69,0x74,0x69,0x63,0xa2,0x88,0x12,0x73, +0x75,0x70,0xa4,0xa,0x16,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa5,0xa,0x13,0x74, +0x68,0x69,0x63,0xa3,0x59,1,0x61,0x5c,0x65,0x11,0x65,0x6b,0x30,1,0x61,0x38, +0x65,0x11,0x78,0x74,0x6e,0x14,0x65,0x6e,0x64,0x65,0x64,0x6f,0x17,0x6e,0x64,0x63, +0x6f,0x70,0x74,0x69,0x63,0x31,0x13,0x6e,0x74,0x68,0x61,0xa3,0xe4,2,0x61,0xa2, +0x48,0x65,0xa2,0xdf,0x69,1,0x67,0x30,0x72,0x14,0x61,0x67,0x61,0x6e,0x61,0x9d, +0x10,0x68,1,0x70,0x3a,0x73,0x18,0x75,0x72,0x72,0x6f,0x67,0x61,0x74,0x65,0x73, +0xa3,0x4b,1,0x72,0x3c,0x75,0x19,0x73,0x75,0x72,0x72,0x6f,0x67,0x61,0x74,0x65, +0x73,0xa3,0x4c,0x11,0x69,0x76,0x1f,0x61,0x74,0x65,0x75,0x73,0x65,0x73,0x75,0x72, +0x72,0x6f,0x67,0x61,0x74,0x65,0x73,0xa3,0x4c,2,0x6c,0x32,0x6e,0x9a,0x74,0x12, +0x72,0x61,0x6e,0xa5,2,0x10,0x66,2,0x61,0x58,0x6d,0x70,0x77,0x14,0x69,0x64, +0x74,0x68,0x61,0x1f,0x6e,0x64,0x66,0x75,0x6c,0x6c,0x77,0x69,0x64,0x74,0x68,0x66, +0x6f,0x72,0x6d,0x73,0xa3,0x57,0x1a,0x6e,0x64,0x66,0x75,0x6c,0x6c,0x66,0x6f,0x72, +0x6d,0x73,0xa3,0x57,0x13,0x61,0x72,0x6b,0x73,0xa3,0x52,2,0x67,0x34,0x69,0xa2, +0x45,0x75,0x12,0x6e,0x6f,0x6f,0xa3,0x63,0x11,0x75,0x6c,0xa2,0x4a,2,0x63,0x3c, +0x6a,0x5e,0x73,0x17,0x79,0x6c,0x6c,0x61,0x62,0x6c,0x65,0x73,0xa3,0x4a,0x1f,0x6f, +0x6d,0x70,0x61,0x74,0x69,0x62,0x69,0x6c,0x69,0x74,0x79,0x6a,0x61,0x6d,0x6f,0xa3, +0x41,0x12,0x61,0x6d,0x6f,0x5c,0x17,0x65,0x78,0x74,0x65,0x6e,0x64,0x65,0x64,1, +0x61,0xa3,0xb4,0x62,0xa3,0xb9,0x19,0x66,0x69,0x72,0x6f,0x68,0x69,0x6e,0x67,0x79, +0x61,0xa5,0x1d,0x13,0x62,0x72,0x65,0x77,0x37,0x61,0xa4,5,0x62,0xa6,0x45,0x63, +0xa8,0x1a,0x64,0xac,0xb8,0x65,5,0x6d,0xa2,0x6d,0x86,0x6e,0x96,0x74,0x15,0x68, +0x69,0x6f,0x70,0x69,0x63,0x5e,1,0x65,0x40,0x73,0x11,0x75,0x70,0xa2,0x86,0x16, +0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa3,0x86,0x11,0x78,0x74,0xa2,0x85,1,0x61, +0xa3,0xc8,0x65,0x13,0x6e,0x64,0x65,0x64,0xa2,0x85,0x10,0x61,0xa3,0xc8,0x16,0x6f, +0x74,0x69,0x63,0x6f,0x6e,0x73,0xa3,0xce,0x15,0x63,0x6c,0x6f,0x73,0x65,0x64,2, +0x61,0x5a,0x63,0x9e,0x69,0x1c,0x64,0x65,0x6f,0x67,0x72,0x61,0x70,0x68,0x69,0x63, +0x73,0x75,0x70,0xa2,0xc4,0x16,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa3,0xc4,0x16, +0x6c,0x70,0x68,0x61,0x6e,0x75,0x6d,0x86,1,0x65,0x2c,0x73,0x11,0x75,0x70,0xa3, +0xc3,0x13,0x72,0x69,0x63,0x73,0x86,0x18,0x75,0x70,0x70,0x6c,0x65,0x6d,0x65,0x6e, +0x74,0xa3,0xc3,0x11,0x6a,0x6b,0xa2,0x44,0x1f,0x6c,0x65,0x74,0x74,0x65,0x72,0x73, +0x61,0x6e,0x64,0x6d,0x6f,0x6e,0x74,0x68,0x73,0xa3,0x44,0x61,0x4a,0x67,0x76,0x6c, +1,0x62,0x30,0x79,0x13,0x6d,0x61,0x69,0x63,0xa5,0x25,0x13,0x61,0x73,0x61,0x6e, +0xa3,0xe2,0x13,0x72,0x6c,0x79,0x64,0x1f,0x79,0x6e,0x61,0x73,0x74,0x69,0x63,0x63, +0x75,0x6e,0x65,0x69,0x66,0x6f,0x72,0x6d,0xa5,1,0x1f,0x79,0x70,0x74,0x69,0x61, +0x6e,0x68,0x69,0x65,0x72,0x6f,0x67,0x6c,0x79,0x70,0x68,1,0x66,0x26,0x73,0xa3, +0xc2,0x1c,0x6f,0x72,0x6d,0x61,0x74,0x63,0x6f,0x6e,0x74,0x72,0x6f,0x6c,0x73,0xa5, +0x24,7,0x6e,0xc0,0xe5,0x6e,0x3e,0x72,0xa2,0x5d,0x73,0xa2,0xd8,0x76,0x14,0x65, +0x73,0x74,0x61,0x6e,0xa3,0xbc,1,0x61,0x92,0x63,0x13,0x69,0x65,0x6e,0x74,1, +0x67,0x34,0x73,0x15,0x79,0x6d,0x62,0x6f,0x6c,0x73,0xa3,0xa5,0x13,0x72,0x65,0x65, +0x6b,1,0x6d,0x34,0x6e,0x15,0x75,0x6d,0x62,0x65,0x72,0x73,0xa3,0x7f,0x13,0x75, +0x73,0x69,0x63,0xa2,0x7e,0x19,0x61,0x6c,0x6e,0x6f,0x74,0x61,0x74,0x69,0x6f,0x6e, +0xa3,0x7e,0x10,0x74,0x1f,0x6f,0x6c,0x69,0x61,0x6e,0x68,0x69,0x65,0x72,0x6f,0x67, +0x6c,0x79,0x70,0x68,0x73,0xa3,0xfe,2,0x61,0x32,0x6d,0xa2,0x71,0x72,0x12,0x6f, +0x77,0x73,0x7d,0x12,0x62,0x69,0x63,0x38,3,0x65,0x4a,0x6d,0x66,0x70,0xa2,0x43, +0x73,0x11,0x75,0x70,0xa2,0x80,0x16,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa3,0x80, +0x11,0x78,0x74,1,0x61,0xa3,0xd2,0x65,0x14,0x6e,0x64,0x65,0x64,0x61,0xa3,0xd2, +0x12,0x61,0x74,0x68,0xa2,0xd3,0x18,0x65,0x6d,0x61,0x74,0x69,0x63,0x61,0x6c,0x61, +0x1f,0x6c,0x70,0x68,0x61,0x62,0x65,0x74,0x69,0x63,0x73,0x79,0x6d,0x62,0x6f,0x6c, +0x73,0xa3,0xd3,1,0x66,0x42,0x72,0x1e,0x65,0x73,0x65,0x6e,0x74,0x61,0x74,0x69, +0x6f,0x6e,0x66,0x6f,0x72,0x6d,0x73,1,0x61,0xa3,0x51,0x62,0xa3,0x55,0x14,0x65, +0x6e,0x69,0x61,0x6e,0x35,0x12,0x63,0x69,0x69,0x23,0x64,0x9e,0x65,0xa2,0x42,0x68, +0xa2,0x4d,0x6c,1,0x63,0x62,0x70,0x17,0x68,0x61,0x62,0x65,0x74,0x69,0x63,0x70, +1,0x66,0xa3,0x50,0x72,0x1e,0x65,0x73,0x65,0x6e,0x74,0x61,0x74,0x69,0x6f,0x6e, +0x66,0x6f,0x72,0x6d,0x73,0xa3,0x50,0x16,0x68,0x65,0x6d,0x69,0x63,0x61,0x6c,0xa2, +0xd0,0x16,0x73,0x79,0x6d,0x62,0x6f,0x6c,0x73,0xa3,0xd0,0x12,0x6c,0x61,0x6d,0xa5, +7,0x1a,0x67,0x65,0x61,0x6e,0x6e,0x75,0x6d,0x62,0x65,0x72,0x73,0xa3,0x77,0x11, +0x6f,0x6d,0xa3,0xfd,7,0x6f,0x71,0x6f,0x64,0x72,0xa2,0x41,0x75,0xa2,0x58,0x79, +0x1b,0x7a,0x61,0x6e,0x74,0x69,0x6e,0x65,0x6d,0x75,0x73,0x69,0x63,0xa2,0x5b,0x18, +0x61,0x6c,0x73,0x79,0x6d,0x62,0x6f,0x6c,0x73,0xa3,0x5b,1,0x70,0x34,0x78,0x16, +0x64,0x72,0x61,0x77,0x69,0x6e,0x67,0x89,0x14,0x6f,0x6d,0x6f,0x66,0x6f,0xa0,0x12, +0x65,0x78,0x74,0xa2,0x43,0x14,0x65,0x6e,0x64,0x65,0x64,0xa3,0x43,0x10,0x61,1, +0x68,0x40,0x69,0x12,0x6c,0x6c,0x65,0x92,0x17,0x70,0x61,0x74,0x74,0x65,0x72,0x6e, +0x73,0x93,0x11,0x6d,0x69,0xa3,0xc9,1,0x67,0x2c,0x68,0x11,0x69,0x64,0xa3,0x64, +0x14,0x69,0x6e,0x65,0x73,0x65,0xa3,0x81,0x61,0x48,0x65,0xa2,0x4e,0x68,0xa2,0x52, +0x6c,0x1a,0x6f,0x63,0x6b,0x65,0x6c,0x65,0x6d,0x65,0x6e,0x74,0x73,0x8b,3,0x6c, +0x34,0x6d,0x40,0x73,0x66,0x74,0x11,0x61,0x6b,0xa3,0xc7,0x14,0x69,0x6e,0x65,0x73, +0x65,0xa3,0x93,0x11,0x75,0x6d,0xa2,0xb1,0x12,0x73,0x75,0x70,0xa2,0xca,0x16,0x70, +0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa3,0xca,1,0x69,0x30,0x73,0x13,0x61,0x76,0x61, +0x68,0xa3,0xdd,0x15,0x63,0x6c,0x61,0x74,0x69,0x6e,0x23,0x14,0x6e,0x67,0x61,0x6c, +0x69,0x41,0x16,0x61,0x69,0x6b,0x73,0x75,0x6b,0x69,0xa5,8,5,0x6f,0xc1,0x4c, +0x6f,0xa2,0x55,0x75,0xa4,0x10,0x79,1,0x70,0x9c,0x72,0x14,0x69,0x6c,0x6c,0x69, +0x63,0x32,1,0x65,0x4c,0x73,0x11,0x75,0x70,0xa2,0x61,0x16,0x70,0x6c,0x65,0x6d, +0x65,0x6e,0x74,0xa2,0x61,0x12,0x61,0x72,0x79,0xa3,0x61,0x11,0x78,0x74,3,0x61, +0xa3,0x9e,0x62,0xa3,0xa0,0x63,0xa5,9,0x65,0x13,0x6e,0x64,0x65,0x64,2,0x61, +0xa3,0x9e,0x62,0xa3,0xa0,0x63,0xa5,9,0x1c,0x72,0x69,0x6f,0x74,0x73,0x79,0x6c, +0x6c,0x61,0x62,0x61,0x72,0x79,0xa3,0x7b,3,0x6d,0x5a,0x6e,0xa2,0x95,0x70,0xa2, +0xa0,0x75,0x17,0x6e,0x74,0x69,0x6e,0x67,0x72,0x6f,0x64,0xa2,0x9a,0x17,0x6e,0x75, +0x6d,0x65,0x72,0x61,0x6c,0x73,0xa3,0x9a,2,0x62,0x3a,0x6d,0xa2,0x5f,0x70,0x15, +0x61,0x74,0x6a,0x61,0x6d,0x6f,0xa3,0x41,0x14,0x69,0x6e,0x69,0x6e,0x67,2,0x64, +0x46,0x68,0x9e,0x6d,0x1d,0x61,0x72,0x6b,0x73,0x66,0x6f,0x72,0x73,0x79,0x6d,0x62, +0x6f,0x6c,0x73,0x77,0x1e,0x69,0x61,0x63,0x72,0x69,0x74,0x69,0x63,0x61,0x6c,0x6d, +0x61,0x72,0x6b,0x73,0x2e,2,0x65,0x40,0x66,0xa6,0x41,0x73,0x18,0x75,0x70,0x70, +0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa3,0x83,0x16,0x78,0x74,0x65,0x6e,0x64,0x65,0x64, +0xa3,0xe0,0x17,0x61,0x6c,0x66,0x6d,0x61,0x72,0x6b,0x73,0xa3,0x52,0x11,0x6f,0x6e, +0x1f,0x69,0x6e,0x64,0x69,0x63,0x6e,0x75,0x6d,0x62,0x65,0x72,0x66,0x6f,0x72,0x6d, +0x73,0xa3,0xb2,0x1b,0x74,0x72,0x6f,0x6c,0x70,0x69,0x63,0x74,0x75,0x72,0x65,0x73, +0x83,0x12,0x74,0x69,0x63,0xa2,0x84,0x1b,0x65,0x70,0x61,0x63,0x74,0x6e,0x75,0x6d, +0x62,0x65,0x72,0x73,0xa3,0xdf,1,0x6e,0x3e,0x72,0x1b,0x72,0x65,0x6e,0x63,0x79, +0x73,0x79,0x6d,0x62,0x6f,0x6c,0x73,0x75,0x15,0x65,0x69,0x66,0x6f,0x72,0x6d,0xa2, +0x98,0x16,0x6e,0x75,0x6d,0x62,0x65,0x72,0x73,0xa2,0x99,0x1d,0x61,0x6e,0x64,0x70, +0x75,0x6e,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0xa3,0x99,0x61,0xa2,0xe1,0x68, +0xa4,0xb,0x6a,0x10,0x6b,0xa2,0x47,4,0x63,0x8c,0x65,0xa2,0x80,0x72,0xa2,0x98, +0x73,0xa2,0xaa,0x75,0x1f,0x6e,0x69,0x66,0x69,0x65,0x64,0x69,0x64,0x65,0x6f,0x67, +0x72,0x61,0x70,0x68,0x73,0xa2,0x47,0x18,0x65,0x78,0x74,0x65,0x6e,0x73,0x69,0x6f, +0x6e,6,0x64,0x6b,0x64,0xa3,0xd1,0x65,0xa5,0,0x66,0xa5,0x12,0x67,0xa5,0x2e, +0x14,0x6f,0x6d,0x70,0x61,0x74,0xa2,0x45,1,0x66,0x96,0x69,1,0x62,0x44,0x64, +0x17,0x65,0x6f,0x67,0x72,0x61,0x70,0x68,0x73,0xa2,0x4f,0x12,0x73,0x75,0x70,0xa3, +0x5f,0x14,0x69,0x6c,0x69,0x74,0x79,0xa2,0x45,1,0x66,0x54,0x69,0x18,0x64,0x65, +0x6f,0x67,0x72,0x61,0x70,0x68,0x73,0xa2,0x4f,0x19,0x73,0x75,0x70,0x70,0x6c,0x65, +0x6d,0x65,0x6e,0x74,0xa3,0x5f,0x13,0x6f,0x72,0x6d,0x73,0xa3,0x53,0x11,0x78,0x74, +6,0x64,0xc,0x64,0xa3,0xd1,0x65,0xa5,0,0x66,0xa5,0x12,0x67,0xa5,0x2e,0x61, +0xa3,0x46,0x62,0xa3,0x5e,0x63,0xa3,0xc5,0x19,0x61,0x64,0x69,0x63,0x61,0x6c,0x73, +0x73,0x75,0x70,0x94,0x16,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0x95,1,0x74,0x50, +0x79,0x14,0x6d,0x62,0x6f,0x6c,0x73,0x9a,0x1d,0x61,0x6e,0x64,0x70,0x75,0x6e,0x63, +0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0x9b,0x14,0x72,0x6f,0x6b,0x65,0x73,0xa3,0x82, +2,0x6e,0x48,0x72,0x64,0x75,0x1d,0x63,0x61,0x73,0x69,0x61,0x6e,0x61,0x6c,0x62, +0x61,0x6e,0x69,0x61,0x6e,0xa3,0xde,0x1d,0x61,0x64,0x69,0x61,0x6e,0x73,0x79,0x6c, +0x6c,0x61,0x62,0x69,0x63,0x73,0x63,0x12,0x69,0x61,0x6e,0xa3,0xa8,2,0x61,0x3a, +0x65,0x4c,0x6f,0x16,0x72,0x61,0x73,0x6d,0x69,0x61,0x6e,0xa5,0x2d,1,0x6b,0x26, +0x6d,0xa3,0xa4,0x11,0x6d,0x61,0xa3,0xd4,1,0x72,0x38,0x73,0x17,0x73,0x73,0x79, +0x6d,0x62,0x6f,0x6c,0x73,0xa5,0x19,0x13,0x6f,0x6b,0x65,0x65,0x60,0x12,0x73,0x75, +0x70,0xa2,0xff,0x16,0x70,0x6c,0x65,0x6d,0x65,0x6e,0x74,0xa3,0xff,3,0x65,0x3e, +0x69,0x7e,0x6f,0xa2,0x69,0x75,0x15,0x70,0x6c,0x6f,0x79,0x61,0x6e,0xa3,0xe1,1, +0x73,0x50,0x76,0x16,0x61,0x6e,0x61,0x67,0x61,0x72,0x69,0x3e,0x12,0x65,0x78,0x74, +0xa2,0xb3,0x14,0x65,0x6e,0x64,0x65,0x64,0xa3,0xb3,0x13,0x65,0x72,0x65,0x74,0xa3, +0x5a,2,0x61,0x3a,0x6e,0x82,0x76,0x16,0x65,0x73,0x61,0x6b,0x75,0x72,0x75,0xa5, +0x2f,0x18,0x63,0x72,0x69,0x74,0x69,0x63,0x61,0x6c,0x73,0x2e,2,0x65,0x30,0x66, +0x36,0x73,0x11,0x75,0x70,0xa3,0x83,0x11,0x78,0x74,0xa3,0xe0,0x18,0x6f,0x72,0x73, +0x79,0x6d,0x62,0x6f,0x6c,0x73,0x77,0x14,0x67,0x62,0x61,0x74,0x73,0x91,1,0x67, +0x3e,0x6d,0x12,0x69,0x6e,0x6f,0xa2,0xab,0x14,0x74,0x69,0x6c,0x65,0x73,0xa3,0xab, +0x11,0x72,0x61,0xa5,0x1a,8,0x6d,0x5f,0x6d,0x3a,0x6e,0x48,0x73,0x7a,0x76,0xa2, +0x4b,0x77,0x12,0x69,0x64,0x65,0x43,0x11,0x65,0x64,0x32,0x12,0x69,0x61,0x6c,0x33, +2,0x61,0x40,0x62,0x37,0x6f,1,0x62,0x28,0x6e,0x10,0x65,0x21,0x13,0x72,0x65, +0x61,0x6b,0x37,0x10,0x72,0x34,0x12,0x72,0x6f,0x77,0x35,2,0x6d,0x38,0x71,0x46, +0x75,1,0x62,0x3d,0x70,0x3e,0x11,0x65,0x72,0x3f,1,0x61,0x24,0x6c,0x39,0x11, +0x6c,0x6c,0x39,1,0x72,0x3b,0x75,0x12,0x61,0x72,0x65,0x3b,0x12,0x65,0x72,0x74, +0x40,0x13,0x69,0x63,0x61,0x6c,0x41,0x63,0x58,0x65,0x92,0x66,0x96,0x69,1,0x6e, +0x36,0x73,0x10,0x6f,0x30,0x14,0x6c,0x61,0x74,0x65,0x64,0x31,0x11,0x69,0x74,0x2e, +0x12,0x69,0x61,0x6c,0x2f,2,0x61,0x36,0x69,0x48,0x6f,0x10,0x6d,0x24,0x12,0x70, +0x61,0x74,0x25,0x10,0x6e,0x22,0x15,0x6f,0x6e,0x69,0x63,0x61,0x6c,0x23,0x13,0x72, +0x63,0x6c,0x65,0x27,0x11,0x6e,0x63,0x27,2,0x69,0x3a,0x6f,0x44,0x72,0x10,0x61, +0x2c,0x14,0x63,0x74,0x69,0x6f,0x6e,0x2d,0x10,0x6e,0x28,0x11,0x61,0x6c,0x29,0x11, +0x6e,0x74,0x2b,4,0x61,0x3a,0x66,0x4c,0x68,0x5e,0x6e,0x70,0x77,0x2a,0x12,0x69, +0x64,0x65,0x2b,0x22,0x17,0x6d,0x62,0x69,0x67,0x75,0x6f,0x75,0x73,0x23,0x26,0x17, +0x75,0x6c,0x6c,0x77,0x69,0x64,0x74,0x68,0x27,0x24,0x17,0x61,0x6c,0x66,0x77,0x69, +0x64,0x74,0x68,0x25,0x20,1,0x61,0x30,0x65,0x14,0x75,0x74,0x72,0x61,0x6c,0x21, +0x28,0x13,0x72,0x72,0x6f,0x77,0x29,0xd,0x6e,0xc0,0xfb,0x73,0x6d,0x73,0x3a,0x74, +0x98,0x75,0xa2,0x49,0x7a,2,0x6c,0x3b,0x70,0x3d,0x73,0x39,5,0x6f,0x28,0x6f, +0x57,0x70,0x34,0x75,0x16,0x72,0x72,0x6f,0x67,0x61,0x74,0x65,0x45,0x11,0x61,0x63, +1,0x65,0x32,0x69,0x15,0x6e,0x67,0x6d,0x61,0x72,0x6b,0x31,0x18,0x73,0x65,0x70, +0x61,0x72,0x61,0x74,0x6f,0x72,0x39,0x63,0x53,0x6b,0x55,0x6d,0x51,0x1d,0x69,0x74, +0x6c,0x65,0x63,0x61,0x73,0x65,0x6c,0x65,0x74,0x74,0x65,0x72,0x27,1,0x6e,0x40, +0x70,0x1c,0x70,0x65,0x72,0x63,0x61,0x73,0x65,0x6c,0x65,0x74,0x74,0x65,0x72,0x23, +0x17,0x61,0x73,0x73,0x69,0x67,0x6e,0x65,0x64,0x21,0x6e,0x8a,0x6f,0xa2,0x47,0x70, +8,0x66,0x14,0x66,0x5b,0x69,0x59,0x6f,0x4f,0x72,0x24,0x73,0x49,0x17,0x69,0x76, +0x61,0x74,0x65,0x75,0x73,0x65,0x43,0x61,0x2c,0x63,0x4d,0x64,0x47,0x65,0x4b,0x1f, +0x72,0x61,0x67,0x72,0x61,0x70,0x68,0x73,0x65,0x70,0x61,0x72,0x61,0x74,0x6f,0x72, +0x3d,2,0x64,0x33,0x6c,0x35,0x6f,0x36,0x1b,0x6e,0x73,0x70,0x61,0x63,0x69,0x6e, +0x67,0x6d,0x61,0x72,0x6b,0x2d,1,0x70,0x7c,0x74,0x12,0x68,0x65,0x72,3,0x6c, +0x38,0x6e,0x42,0x70,0x4c,0x73,0x14,0x79,0x6d,0x62,0x6f,0x6c,0x57,0x14,0x65,0x74, +0x74,0x65,0x72,0x2b,0x14,0x75,0x6d,0x62,0x65,0x72,0x37,0x19,0x75,0x6e,0x63,0x74, +0x75,0x61,0x74,0x69,0x6f,0x6e,0x4f,0x1c,0x65,0x6e,0x70,0x75,0x6e,0x63,0x74,0x75, +0x61,0x74,0x69,0x6f,0x6e,0x49,0x66,0x9e,0x66,0x88,0x69,0xa2,0x4b,0x6c,0xa2,0x5c, +0x6d,4,0x61,0x60,0x63,0x31,0x65,0x2f,0x6e,0x2d,0x6f,0x15,0x64,0x69,0x66,0x69, +0x65,0x72,1,0x6c,0x30,0x73,0x14,0x79,0x6d,0x62,0x6f,0x6c,0x55,0x14,0x65,0x74, +0x74,0x65,0x72,0x29,0x17,0x74,0x68,0x73,0x79,0x6d,0x62,0x6f,0x6c,0x51,1,0x69, +0x2e,0x6f,0x13,0x72,0x6d,0x61,0x74,0x41,0x1d,0x6e,0x61,0x6c,0x70,0x75,0x6e,0x63, +0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0x5b,0x10,0x6e,0x1f,0x69,0x74,0x69,0x61,0x6c, +0x70,0x75,0x6e,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0x59,6,0x6d,0x18,0x6d, +0x29,0x6f,0x28,0x74,0x27,0x75,0x23,0x2a,0x1c,0x77,0x65,0x72,0x63,0x61,0x73,0x65, +0x6c,0x65,0x74,0x74,0x65,0x72,0x25,0x65,0x28,0x69,0x3c,0x6c,0x25,0x19,0x74,0x74, +0x65,0x72,0x6e,0x75,0x6d,0x62,0x65,0x72,0x35,0x1a,0x6e,0x65,0x73,0x65,0x70,0x61, +0x72,0x61,0x74,0x6f,0x72,0x3b,0x63,0x44,0x64,0xa2,0x60,0x65,0x1b,0x6e,0x63,0x6c, +0x6f,0x73,0x69,0x6e,0x67,0x6d,0x61,0x72,0x6b,0x2f,6,0x6e,0x39,0x6e,0x46,0x6f, +0x4e,0x73,0x45,0x75,0x1b,0x72,0x72,0x65,0x6e,0x63,0x79,0x73,0x79,0x6d,0x62,0x6f, +0x6c,0x53,0x20,0x12,0x74,0x72,0x6c,0x3f,0x42,0x10,0x6e,1,0x6e,0x2c,0x74,0x12, +0x72,0x6f,0x6c,0x3f,0x1f,0x65,0x63,0x74,0x6f,0x72,0x70,0x75,0x6e,0x63,0x74,0x75, +0x61,0x74,0x69,0x6f,0x6e,0x4d,0x63,0x3f,0x66,0x41,0x6c,0x1d,0x6f,0x73,0x65,0x70, +0x75,0x6e,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0x4b,2,0x61,0x30,0x65,0x4a, +0x69,0x12,0x67,0x69,0x74,0x33,0x1c,0x73,0x68,0x70,0x75,0x6e,0x63,0x74,0x75,0x61, +0x74,0x69,0x6f,0x6e,0x47,0x1a,0x63,0x69,0x6d,0x61,0x6c,0x6e,0x75,0x6d,0x62,0x65, +0x72,0x33,0,0x12,0x6d,0xc2,0x3f,0x73,0xa1,0x73,0x4e,0x74,0xa2,0x56,0x77,0xa2, +0x72,0x79,0xa2,0x73,0x7a,1,0x61,0x2c,0x68,0x12,0x61,0x69,0x6e,0x8b,0x11,0x69, +0x6e,0x85,5,0x74,0x22,0x74,0x38,0x77,0x4c,0x79,0x16,0x72,0x69,0x61,0x63,0x77, +0x61,0x77,0x6f,0x18,0x72,0x61,0x69,0x67,0x68,0x74,0x77,0x61,0x77,0xa3,0x55,0x15, +0x61,0x73,0x68,0x6b,0x61,0x66,0x6d,0x61,0x2e,0x65,0x38,0x68,0x11,0x69,0x6e,0x6b, +0x10,0x64,0x62,0x11,0x68,0x65,0x65,1,0x65,0x2e,0x6d,0x13,0x6b,0x61,0x74,0x68, +0x69,0x10,0x6e,0x67,1,0x61,0x4e,0x65,1,0x68,0x28,0x74,0x10,0x68,0x77,0x16, +0x6d,0x61,0x72,0x62,0x75,0x74,0x61,0x74,0x13,0x67,0x6f,0x61,0x6c,0x3d,1,0x68, +0x71,0x77,0x73,0x11,0x61,0x77,0x79,1,0x65,0x32,0x75,0x11,0x64,0x68,0x80,0x11, +0x68,0x65,0x83,0x10,0x68,0x7a,1,0x62,0x34,0x77,0x16,0x69,0x74,0x68,0x74,0x61, +0x69,0x6c,0x7f,0x14,0x61,0x72,0x72,0x65,0x65,0x7d,0x6d,0x6c,0x6e,0xa4,0x6b,0x70, +0xa4,0x88,0x71,0xa4,0x88,0x72,1,0x65,0x38,0x6f,0x18,0x68,0x69,0x6e,0x67,0x79, +0x61,0x79,0x65,0x68,0x93,1,0x68,0x5f,0x76,0x16,0x65,0x72,0x73,0x65,0x64,0x70, +0x65,0x61,2,0x61,0x2e,0x65,0xa4,0x3e,0x69,0x10,0x6d,0x53,1,0x6c,0xa2,0xe7, +0x6e,0x16,0x69,0x63,0x68,0x61,0x65,0x61,0x6e,0,0x12,0x6e,0x76,0x73,0x51,0x73, +0x3e,0x74,0x5c,0x77,0xa0,0x79,0xa2,0x42,0x7a,0x13,0x61,0x79,0x69,0x6e,0xa3,0x54, +0x10,0x61,1,0x64,0x2e,0x6d,0x12,0x65,0x6b,0x68,0xa3,0x4c,0x11,0x68,0x65,0xa3, +0x4b,3,0x61,0x38,0x65,0x3c,0x68,0x4a,0x77,0x13,0x65,0x6e,0x74,0x79,0xa3,0x51, +0x10,0x77,0xa3,0x4d,1,0x6e,0xa3,0x4e,0x74,0x10,0x68,0xa3,0x4f,0x14,0x61,0x6d, +0x65,0x64,0x68,0xa3,0x50,0x11,0x61,0x77,0xa3,0x52,0x12,0x6f,0x64,0x68,0xa3,0x53, +0x6e,0x3a,0x6f,0x40,0x70,0x46,0x71,0x4a,0x72,0x12,0x65,0x73,0x68,0xa3,0x4a,0x11, +0x75,0x6e,0xa3,0x46,0x11,0x6e,0x65,0xa3,0x47,0x10,0x65,0xa3,0x48,0x12,0x6f,0x70, +0x68,0xa3,0x49,0x67,0x33,0x67,0x38,0x68,0x40,0x6b,0x5e,0x6c,0x66,0x6d,0x11,0x65, +0x6d,0xa3,0x45,0x13,0x69,0x6d,0x65,0x6c,0xa1,1,0x65,0x32,0x75,0x14,0x6e,0x64, +0x72,0x65,0x64,0xa3,0x42,0x11,0x74,0x68,0xa3,0x41,0x12,0x61,0x70,0x68,0xa3,0x43, +0x14,0x61,0x6d,0x65,0x64,0x68,0xa3,0x44,0x61,0x34,0x62,0x4a,0x64,0x50,0x66,0x12, +0x69,0x76,0x65,0x9f,1,0x6c,0x2a,0x79,0x11,0x69,0x6e,0x97,0x12,0x65,0x70,0x68, +0x95,0x12,0x65,0x74,0x68,0x99,1,0x61,0x30,0x68,0x14,0x61,0x6d,0x65,0x64,0x68, +0x9d,0x13,0x6c,0x65,0x74,0x68,0x9b,0x15,0x61,0x79,0x61,0x6c,0x61,0x6d,6,0x6e, +0x2c,0x6e,0x34,0x72,0x5e,0x73,0x62,0x74,0x11,0x74,0x61,0xa3,0x63,2,0x67,0x2e, +0x6e,0x32,0x79,0x10,0x61,0xa3,0x60,0x10,0x61,0xa3,0x5d,1,0x61,0xa3,0x5e,0x6e, +0x10,0x61,0xa3,0x5f,0x10,0x61,0xa3,0x61,0x11,0x73,0x61,0xa3,0x62,0x62,0x3c,0x6a, +0x42,0x6c,0x10,0x6c,1,0x61,0xa3,0x5b,0x6c,0x10,0x61,0xa3,0x5c,0x11,0x68,0x61, +0xa3,0x59,0x10,0x61,0xa3,0x5a,0x11,0x65,0x6d,0x51,2,0x6f,0x2c,0x75,0x50,0x79, +0x10,0x61,0x91,1,0x6a,0x28,0x6f,0x10,0x6e,0x55,0x1a,0x6f,0x69,0x6e,0x69,0x6e, +0x67,0x67,0x72,0x6f,0x75,0x70,0x21,0x10,0x6e,0x57,0x10,0x65,0x59,0x10,0x61,1, +0x66,0x5b,0x70,0x10,0x68,0x5d,0x66,0x9a,0x66,0x42,0x67,0x7a,0x68,0x8a,0x6b,0xa2, +0x75,0x6c,0x11,0x61,0x6d,0x4c,0x12,0x61,0x64,0x68,0x4f,2,0x61,0x3e,0x65,0x4a, +0x69,0x19,0x6e,0x61,0x6c,0x73,0x65,0x6d,0x6b,0x61,0x74,0x68,0x35,0x15,0x72,0x73, +0x69,0x79,0x65,0x68,0x8f,0x86,0x10,0x68,0x33,0x10,0x61,1,0x66,0x37,0x6d,0x11, +0x61,0x6c,0x39,1,0x61,0x40,0x65,0x3e,1,0x68,0x28,0x74,0x10,0x68,0x45,0x40, +0x13,0x67,0x6f,0x61,0x6c,0x43,2,0x68,0x3b,0x6d,0x5c,0x6e,0x1a,0x69,0x66,0x69, +0x72,0x6f,0x68,0x69,0x6e,0x67,0x79,0x61,1,0x6b,0x2a,0x70,0x10,0x61,0xa3,0x65, +0x15,0x69,0x6e,0x6e,0x61,0x79,0x61,0xa3,0x64,0x1a,0x7a,0x61,0x6f,0x6e,0x68,0x65, +0x68,0x67,0x6f,0x61,0x6c,0x3d,2,0x61,0x3a,0x68,0x44,0x6e,0x17,0x6f,0x74,0x74, +0x65,0x64,0x68,0x65,0x68,0x4b,1,0x66,0x47,0x70,0x10,0x68,0x49,0x12,0x61,0x70, +0x68,0x89,0x61,0x2e,0x62,0x8a,0x64,0xa2,0x51,0x65,0x31,2,0x66,0x3c,0x69,0x70, +0x6c,1,0x61,0x28,0x65,0x10,0x66,0x27,0x11,0x70,0x68,0x25,0x14,0x72,0x69,0x63, +0x61,0x6e,2,0x66,0x30,0x6e,0x36,0x71,0x11,0x61,0x66,0xa3,0x58,0x11,0x65,0x68, +0xa3,0x56,0x12,0x6f,0x6f,0x6e,0xa3,0x57,0x10,0x6e,0x23,1,0x65,0x4a,0x75,0x10, +0x72,0x1f,0x75,0x73,0x68,0x61,0x73,0x6b,0x69,0x79,0x65,0x68,0x62,0x61,0x72,0x72, +0x65,0x65,0x8d,1,0x68,0x29,0x74,0x10,0x68,0x2b,0x11,0x61,0x6c,0x2c,0x16,0x61, +0x74,0x68,0x72,0x69,0x73,0x68,0x2f,7,0x6e,0x2e,0x6e,0x2c,0x72,0x3e,0x74,0x56, +0x75,0x21,0x18,0x6f,0x6e,0x6a,0x6f,0x69,0x6e,0x69,0x6e,0x67,0x21,0x28,0x1a,0x69, +0x67,0x68,0x74,0x6a,0x6f,0x69,0x6e,0x69,0x6e,0x67,0x29,0x2a,0x19,0x72,0x61,0x6e, +0x73,0x70,0x61,0x72,0x65,0x6e,0x74,0x2b,0x63,0x23,0x64,0x40,0x6a,0x56,0x6c,0x26, +0x19,0x65,0x66,0x74,0x6a,0x6f,0x69,0x6e,0x69,0x6e,0x67,0x27,0x24,0x19,0x75,0x61, +0x6c,0x6a,0x6f,0x69,0x6e,0x69,0x6e,0x67,0x25,0x19,0x6f,0x69,0x6e,0x63,0x61,0x75, +0x73,0x69,0x6e,0x67,0x23,0,0x13,0x6e,0xc0,0xd0,0x73,0x49,0x73,0x48,0x75,0x78, +0x77,0x84,0x78,0x9c,0x7a,0x10,0x77,0x58,1,0x6a,0x75,0x73,0x13,0x70,0x61,0x63, +0x65,0x59,4,0x61,0x51,0x67,0x53,0x70,0x28,0x75,0x30,0x79,0x57,0x54,0x12,0x61, +0x63,0x65,0x55,0x16,0x72,0x72,0x6f,0x67,0x61,0x74,0x65,0x53,0x15,0x6e,0x6b,0x6e, +0x6f,0x77,0x6e,0x21,1,0x6a,0x5d,0x6f,0x17,0x72,0x64,0x6a,0x6f,0x69,0x6e,0x65, +0x72,0x5d,0x10,0x78,0x21,0x6e,0x60,0x6f,0xa2,0x41,0x70,0xa2,0x50,0x71,0xa2,0x6e, +0x72,1,0x65,0x24,0x69,0x6f,0x1e,0x67,0x69,0x6f,0x6e,0x61,0x6c,0x69,0x6e,0x64, +0x69,0x63,0x61,0x74,0x6f,0x72,0x6f,4,0x65,0x3e,0x6c,0x5b,0x6f,0x46,0x73,0x45, +0x75,0x46,0x14,0x6d,0x65,0x72,0x69,0x63,0x47,0x15,0x78,0x74,0x6c,0x69,0x6e,0x65, +0x5b,0x17,0x6e,0x73,0x74,0x61,0x72,0x74,0x65,0x72,0x45,0x10,0x70,0x48,0x1c,0x65, +0x6e,0x70,0x75,0x6e,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0x49,1,0x6f,0x3e, +0x72,0x4c,0x1a,0x65,0x66,0x69,0x78,0x6e,0x75,0x6d,0x65,0x72,0x69,0x63,0x4d,0x4a, +0x1b,0x73,0x74,0x66,0x69,0x78,0x6e,0x75,0x6d,0x65,0x72,0x69,0x63,0x4b,0x10,0x75, +0x4e,0x16,0x6f,0x74,0x61,0x74,0x69,0x6f,0x6e,0x4f,0x68,0x7b,0x68,0x50,0x69,0x86, +0x6a,0xa2,0x61,0x6c,0xa2,0x65,0x6d,0x1c,0x61,0x6e,0x64,0x61,0x74,0x6f,0x72,0x79, +0x62,0x72,0x65,0x61,0x6b,0x2d,4,0x32,0x5f,0x33,0x61,0x65,0x34,0x6c,0x6d,0x79, +0x3a,0x13,0x70,0x68,0x65,0x6e,0x3b,0x19,0x62,0x72,0x65,0x77,0x6c,0x65,0x74,0x74, +0x65,0x72,0x6d,2,0x64,0x28,0x6e,0x3c,0x73,0x41,0x3c,0x18,0x65,0x6f,0x67,0x72, +0x61,0x70,0x68,0x69,0x63,0x3d,0x3e,1,0x66,0x3e,0x73,0x11,0x65,0x70,1,0x61, +0x22,0x65,0x14,0x72,0x61,0x62,0x6c,0x65,0x3f,0x18,0x69,0x78,0x6e,0x75,0x6d,0x65, +0x72,0x69,0x63,0x41,2,0x6c,0x63,0x74,0x65,0x76,0x67,1,0x66,0x43,0x69,0x15, +0x6e,0x65,0x66,0x65,0x65,0x64,0x43,0x61,0x40,0x62,0x70,0x63,0xa2,0x55,0x65,0xa2, +0xdb,0x67,0x10,0x6c,0x38,0x11,0x75,0x65,0x39,2,0x69,0x23,0x6c,0x34,0x6d,0x16, +0x62,0x69,0x67,0x75,0x6f,0x75,0x73,0x23,0x24,0x17,0x70,0x68,0x61,0x62,0x65,0x74, +0x69,0x63,0x25,4,0x32,0x27,0x61,0x29,0x62,0x2b,0x6b,0x2d,0x72,0x12,0x65,0x61, +0x6b,2,0x61,0x36,0x62,0x3e,0x73,0x15,0x79,0x6d,0x62,0x6f,0x6c,0x73,0x57,0x13, +0x66,0x74,0x65,0x72,0x29,1,0x65,0x2a,0x6f,0x11,0x74,0x68,0x27,0x13,0x66,0x6f, +0x72,0x65,0x2b,7,0x6d,0x51,0x6d,0x33,0x6f,0x28,0x70,0x69,0x72,0x35,1,0x6d, +0x76,0x6e,1,0x64,0x3c,0x74,0x1a,0x69,0x6e,0x67,0x65,0x6e,0x74,0x62,0x72,0x65, +0x61,0x6b,0x2f,0x15,0x69,0x74,0x69,0x6f,0x6e,0x61,0x1f,0x6c,0x6a,0x61,0x70,0x61, +0x6e,0x65,0x73,0x65,0x73,0x74,0x61,0x72,0x74,0x65,0x72,0x6b,1,0x62,0x3a,0x70, +0x19,0x6c,0x65,0x78,0x63,0x6f,0x6e,0x74,0x65,0x78,0x74,0x51,0x18,0x69,0x6e,0x69, +0x6e,0x67,0x6d,0x61,0x72,0x6b,0x33,0x61,0x6a,0x62,0x2f,0x6a,0x6b,0x6c,0x30,0x13, +0x6f,0x73,0x65,0x70,1,0x61,0x38,0x75,0x18,0x6e,0x63,0x74,0x75,0x61,0x74,0x69, +0x6f,0x6e,0x31,0x18,0x72,0x65,0x6e,0x74,0x68,0x65,0x73,0x69,0x73,0x69,0x1b,0x72, +0x72,0x69,0x61,0x67,0x65,0x72,0x65,0x74,0x75,0x72,0x6e,0x35,2,0x62,0x3e,0x6d, +0x46,0x78,0x36,0x18,0x63,0x6c,0x61,0x6d,0x61,0x74,0x69,0x6f,0x6e,0x37,0x70,0x12, +0x61,0x73,0x65,0x71,0x72,0x16,0x6f,0x64,0x69,0x66,0x69,0x65,0x72,0x73,1,0x64, +0x42,0x6e,1,0x6f,0x32,0x75,0x26,0x14,0x6d,0x65,0x72,0x69,0x63,0x27,0x11,0x6e, +0x65,0x21,1,0x65,0x2e,0x69,0x24,0x12,0x67,0x69,0x74,0x25,0x22,0x14,0x63,0x69, +0x6d,0x61,0x6c,0x23,0,0x18,0x6e,0xc4,0x2a,0x74,0xc1,0x6d,0x77,0x96,0x77,0xa2, +0x4c,0x78,0xa2,0x70,0x79,0xa2,0x7a,0x7a,6,0x73,0x1e,0x73,0x34,0x78,0x42,0x79, +0x48,0x7a,0x11,0x7a,0x7a,0xa3,0x67,0x10,0x79,1,0x65,0xa3,0xae,0x6d,0xa3,0x81, +0x11,0x78,0x78,0xa3,0x66,0x11,0x79,0x79,0x21,0x61,0x30,0x69,0x58,0x6d,0x11,0x74, +0x68,0xa3,0x80,0x10,0x6e,1,0x61,0x26,0x62,0xa3,0xb1,0x1a,0x62,0x61,0x7a,0x61, +0x72,0x73,0x71,0x75,0x61,0x72,0x65,0xa3,0xb1,0x11,0x6e,0x68,0x23,2,0x61,0x30, +0x63,0x5a,0x6f,0x11,0x6c,0x65,0xa3,0x9b,1,0x6e,0x3c,0x72,0x10,0x61,0xa2,0x92, +0x15,0x6e,0x67,0x63,0x69,0x74,0x69,0xa3,0x92,0x12,0x63,0x68,0x6f,0xa3,0xbc,0x11, +0x68,0x6f,0xa3,0xbc,1,0x70,0x2c,0x73,0x11,0x75,0x78,0xa3,0x65,0x11,0x65,0x6f, +0x9b,1,0x65,0x2c,0x69,0x72,0x11,0x69,0x69,0x73,0x11,0x7a,0x69,0xa2,0xc0,0x11, +0x64,0x69,0xa3,0xc0,0x74,0x4a,0x75,0xa2,0xba,0x76,1,0x61,0x2c,0x69,0x11,0x73, +0x70,0xa3,0x64,0x10,0x69,0xa2,0x63,0x10,0x69,0xa3,0x63,5,0x67,0x36,0x67,0x68, +0x68,0x6c,0x69,2,0x62,0x3a,0x66,0x4a,0x72,0x10,0x68,0xa2,0x9e,0x12,0x75,0x74, +0x61,0xa3,0x9e,1,0x65,0x24,0x74,0x6f,0x12,0x74,0x61,0x6e,0x6f,0x14,0x69,0x6e, +0x61,0x67,0x68,0x99,0x11,0x6c,0x67,0x75,0x10,0x61,1,0x61,0x24,0x69,0x6d,0x6a, +0x11,0x6e,0x61,0x6b,0x61,0x30,0x65,0xa2,0x5b,0x66,0x11,0x6e,0x67,0x99,6,0x6c, +0x21,0x6c,0x32,0x6d,0x38,0x6e,0x44,0x76,0x10,0x74,0xa3,0x7f,1,0x65,0x89,0x75, +0x97,1,0x69,0x24,0x6c,0x67,0x10,0x6c,0x67,0x10,0x67,0xa2,0x9a,0x11,0x75,0x74, +0xa3,0x9a,0x67,0x36,0x69,0x52,0x6b,0x10,0x72,0xa2,0x99,0x10,0x69,0xa3,0x99,1, +0x61,0x30,0x62,0x7a,0x13,0x61,0x6e,0x77,0x61,0x7b,0x12,0x6c,0x6f,0x67,0x75,2, +0x6c,0x32,0x74,0x34,0x76,0x12,0x69,0x65,0x74,0xa3,0x7f,0x10,0x65,0x89,0x12,0x68, +0x61,0x6d,0xa3,0x6a,1,0x6c,0x2a,0x6e,0x10,0x67,0xa3,0x62,0x10,0x75,0x68,0x11, +0x67,0x75,0x69,1,0x67,0x32,0x6e,0x14,0x6b,0x6e,0x6f,0x77,0x6e,0xa3,0x67,0x11, +0x61,0x72,0x8a,0x13,0x69,0x74,0x69,0x63,0x8b,0x71,0xc1,0x13,0x71,0xa2,0xde,0x72, +0xa2,0xe3,0x73,6,0x69,0x8a,0x69,0x72,0x6f,0xa2,0x4c,0x75,0xa2,0x75,0x79,1, +0x6c,0x46,0x72,4,0x63,0x65,0x65,0xa3,0x5f,0x69,0x2c,0x6a,0xa3,0x60,0x6e,0xa3, +0x61,0x11,0x61,0x63,0x65,0x10,0x6f,0x94,0x16,0x74,0x69,0x6e,0x61,0x67,0x72,0x69, +0x95,2,0x64,0x3c,0x67,0x4c,0x6e,1,0x64,0xa3,0x91,0x68,0x62,0x12,0x61,0x6c, +0x61,0x63,0x10,0x64,0xa2,0xa6,0x12,0x68,0x61,0x6d,0xa3,0xa6,0x17,0x6e,0x77,0x72, +0x69,0x74,0x69,0x6e,0x67,0xa3,0x70,2,0x67,0x3a,0x72,0x52,0x79,0x10,0x6f,0xa2, +0xb0,0x12,0x6d,0x62,0x6f,0xa3,0xb0,1,0x64,0x26,0x6f,0xa3,0xb8,0xa2,0xb7,0x12, +0x69,0x61,0x6e,0xa3,0xb7,0x10,0x61,0xa2,0x98,0x16,0x73,0x6f,0x6d,0x70,0x65,0x6e, +0x67,0xa3,0x98,0x11,0x6e,0x64,0xa2,0x71,0x14,0x61,0x6e,0x65,0x73,0x65,0xa3,0x71, +0x61,0x5c,0x67,0xa2,0x43,0x68,1,0x61,0x2a,0x72,0x10,0x64,0xa3,0x97,2,0x72, +0x28,0x76,0x30,0x77,0x87,0x12,0x61,0x64,0x61,0xa3,0x97,0x12,0x69,0x61,0x6e,0x87, +2,0x6d,0x40,0x72,0x58,0x75,0x10,0x72,0xa2,0x6f,0x15,0x61,0x73,0x68,0x74,0x72, +0x61,0xa3,0x6f,1,0x61,0x26,0x72,0xa3,0x7e,0x14,0x72,0x69,0x74,0x61,0x6e,0xa3, +0x7e,1,0x61,0xa3,0x5e,0x62,0xa3,0x85,0x11,0x6e,0x77,0xa3,0x70,0x11,0x61,0x61, +1,0x63,0x2f,0x69,0x23,3,0x65,0x3e,0x6a,0x48,0x6f,0x4e,0x75,0x10,0x6e,1, +0x69,0x24,0x72,0x61,0x10,0x63,0x61,0x13,0x6a,0x61,0x6e,0x67,0xa3,0x6e,0x11,0x6e, +0x67,0xa3,0x6e,1,0x68,0x2a,0x72,0x10,0x6f,0xa3,0x5d,0x10,0x67,0xa3,0xb6,0x6e, +0xa2,0x83,0x6f,0xa2,0xf2,0x70,5,0x6c,0x1e,0x6c,0x44,0x72,0x4a,0x73,0x1b,0x61, +0x6c,0x74,0x65,0x72,0x70,0x61,0x68,0x6c,0x61,0x76,0x69,0xa3,0x7b,0x11,0x72,0x64, +0xa3,0x5c,0x11,0x74,0x69,0xa3,0x7d,0x61,0x7c,0x65,0xa2,0x54,0x68,3,0x61,0x3e, +0x6c,0x4e,0x6e,0x5e,0x6f,0x16,0x65,0x6e,0x69,0x63,0x69,0x61,0x6e,0xa3,0x5b,0x10, +0x67,0xa2,0x5a,0x12,0x73,0x70,0x61,0xa3,0x5a,2,0x69,0xa3,0x7a,0x70,0xa3,0x7b, +0x76,0xa3,0x7c,0x10,0x78,0xa3,0x5b,2,0x68,0x3e,0x6c,0x50,0x75,0x10,0x63,0xa2, +0xa5,0x14,0x69,0x6e,0x68,0x61,0x75,0xa3,0xa5,0x17,0x61,0x77,0x68,0x68,0x6d,0x6f, +0x6e,0x67,0xa3,0x4b,0x10,0x6d,0xa2,0x90,0x14,0x79,0x72,0x65,0x6e,0x65,0xa3,0x90, +0x11,0x72,0x6d,0xa3,0x59,6,0x6b,0x36,0x6b,0x56,0x73,0x6e,0x75,0x74,0x79,0x11, +0x69,0x61,0x1f,0x6b,0x65,0x6e,0x67,0x70,0x75,0x61,0x63,0x68,0x75,0x65,0x68,0x6d, +0x6f,0x6e,0x67,0xa3,0xba,1,0x67,0x2e,0x6f,0xa2,0x57,0x10,0x6f,0xa3,0x57,0x10, +0x62,0xa3,0x84,0x11,0x68,0x75,0xa3,0x96,0x12,0x73,0x68,0x75,0xa3,0x96,0x61,0x42, +0x62,0x80,0x65,0x10,0x77,1,0x61,0xa3,0xaa,0x74,0x14,0x61,0x69,0x6c,0x75,0x65, +0x97,2,0x62,0x2e,0x6e,0x3c,0x72,0x10,0x62,0xa3,0x8e,0x15,0x61,0x74,0x61,0x65, +0x61,0x6e,0xa3,0x8f,0x10,0x64,0xa2,0xbb,0x16,0x69,0x6e,0x61,0x67,0x61,0x72,0x69, +0xa3,0xbb,0x11,0x61,0x74,0xa3,0x8f,3,0x67,0x5a,0x6c,0x6c,0x72,0xa2,0x93,0x73, +2,0x61,0x36,0x67,0x3c,0x6d,0x10,0x61,0x84,0x12,0x6e,0x79,0x61,0x85,0x11,0x67, +0x65,0xa3,0xab,0x10,0x65,0xa3,0xab,1,0x61,0x2a,0x68,0x11,0x61,0x6d,0x5b,0x10, +0x6d,0x5b,1,0x63,0xa2,0x60,0x64,5,0x70,0x37,0x70,0x36,0x73,0x54,0x74,0x14, +0x75,0x72,0x6b,0x69,0x63,0xa3,0x58,0x11,0x65,0x72,1,0x6d,0x2c,0x73,0x12,0x69, +0x61,0x6e,0x9b,0x11,0x69,0x63,0xa3,0x59,0x10,0x6f,1,0x67,0x3a,0x75,0x18,0x74, +0x68,0x61,0x72,0x61,0x62,0x69,0x61,0x6e,0xa3,0x85,0x13,0x64,0x69,0x61,0x6e,0xa3, +0xb8,0x68,0x42,0x69,0x54,0x6e,0x1a,0x6f,0x72,0x74,0x68,0x61,0x72,0x61,0x62,0x69, +0x61,0x6e,0xa3,0x8e,0x17,0x75,0x6e,0x67,0x61,0x72,0x69,0x61,0x6e,0xa3,0x4c,0x14, +0x74,0x61,0x6c,0x69,0x63,0x5d,1,0x68,0x26,0x6b,0xa3,0x6d,0x12,0x69,0x6b,0x69, +0xa3,0x6d,2,0x69,0x2c,0x6b,0x30,0x79,0x10,0x61,0x5f,0x11,0x79,0x61,0x5f,0x10, +0x68,0xa3,0x58,0x68,0xc3,0xd,0x6b,0xc2,0x24,0x6b,0xa4,0x17,0x6c,0xa4,0xb2,0x6d, +8,0x6f,0x46,0x6f,0x48,0x72,0x74,0x74,0x80,0x75,0x86,0x79,1,0x61,0x28,0x6d, +0x10,0x72,0x59,0x13,0x6e,0x6d,0x61,0x72,0x59,2,0x64,0x2e,0x6e,0x32,0x6f,0x10, +0x6e,0xa3,0x72,0x10,0x69,0xa3,0xa3,0x10,0x67,0x56,0x14,0x6f,0x6c,0x69,0x61,0x6e, +0x57,0x10,0x6f,0xa2,0x95,0x10,0x6f,0xa3,0x95,0x11,0x65,0x69,0xa3,0x73,0x11,0x6c, +0x74,0xa2,0xa4,0x12,0x61,0x6e,0x69,0xa3,0xa4,0x61,0x36,0x65,0xa2,0x67,0x69,0xa2, +0xbd,0x6c,0x11,0x79,0x6d,0x55,6,0x6e,0x38,0x6e,0x32,0x72,0x5c,0x73,0x6c,0x79, +0x10,0x61,0xa3,0x55,1,0x64,0x38,0x69,0xa2,0x79,0x15,0x63,0x68,0x61,0x65,0x61, +0x6e,0xa3,0x79,0xa2,0x54,0x12,0x61,0x69,0x63,0xa3,0x54,0x10,0x63,0xa2,0xa9,0x12, +0x68,0x65,0x6e,0xa3,0xa9,0x18,0x61,0x72,0x61,0x6d,0x67,0x6f,0x6e,0x64,0x69,0xa3, +0xaf,0x68,0x36,0x6b,0x4c,0x6c,0x15,0x61,0x79,0x61,0x6c,0x61,0x6d,0x55,1,0x61, +0x26,0x6a,0xa3,0xa0,0x13,0x6a,0x61,0x6e,0x69,0xa3,0xa0,0x10,0x61,0xa2,0xb4,0x12, +0x73,0x61,0x72,0xa3,0xb4,3,0x64,0x78,0x65,0x94,0x6e,0xa2,0x42,0x72,1,0x63, +0xa3,0x8d,0x6f,0xa2,0x56,0x13,0x69,0x74,0x69,0x63,1,0x63,0x3c,0x68,0x19,0x69, +0x65,0x72,0x6f,0x67,0x6c,0x79,0x70,0x68,0x73,0xa3,0x56,0x15,0x75,0x72,0x73,0x69, +0x76,0x65,0xa3,0x8d,1,0x65,0x26,0x66,0xa3,0xb5,0x16,0x66,0x61,0x69,0x64,0x72, +0x69,0x6e,0xa3,0xb5,0x17,0x74,0x65,0x69,0x6d,0x61,0x79,0x65,0x6b,0xa3,0x73,0x10, +0x64,0xa2,0x8c,0x17,0x65,0x6b,0x69,0x6b,0x61,0x6b,0x75,0x69,0xa3,0x8c,0x11,0x61, +0x6f,0xa3,0x5c,6,0x6e,0x1a,0x6e,0x34,0x6f,0x38,0x70,0x3e,0x74,0x11,0x68,0x69, +0xa3,0x78,0x11,0x64,0x61,0x4b,0x11,0x72,0x65,0xa3,0x77,0x11,0x65,0x6c,0xa3,0x8a, +0x61,0x30,0x68,0x9a,0x69,0x11,0x74,0x73,0xa3,0xbf,4,0x69,0x3c,0x6c,0x44,0x6e, +0x48,0x74,0x56,0x79,0x13,0x61,0x68,0x6c,0x69,0xa3,0x4f,0x12,0x74,0x68,0x69,0xa3, +0x78,0x10,0x69,0xa3,0x4f,1,0x61,0x4d,0x6e,0x12,0x61,0x64,0x61,0x4b,0x14,0x61, +0x6b,0x61,0x6e,0x61,0x4c,0x19,0x6f,0x72,0x68,0x69,0x72,0x61,0x67,0x61,0x6e,0x61, +0x8d,4,0x61,0x40,0x69,0x52,0x6d,0x70,0x6f,0x7c,0x75,0x15,0x64,0x61,0x77,0x61, +0x64,0x69,0xa3,0x91,0x10,0x72,0x92,0x15,0x6f,0x73,0x68,0x74,0x68,0x69,0x93,0x1d, +0x74,0x61,0x6e,0x73,0x6d,0x61,0x6c,0x6c,0x73,0x63,0x72,0x69,0x70,0x74,0xa3,0xbf, +1,0x65,0x24,0x72,0x4f,0x10,0x72,0x4f,0x10,0x6a,0xa2,0x9d,0x11,0x6b,0x69,0xa3, +0x9d,4,0x61,0x5c,0x65,0x90,0x69,0xa0,0x6f,0xa2,0x5d,0x79,1,0x63,0x34,0x64, +0x10,0x69,0xa2,0x6c,0x11,0x61,0x6e,0xa3,0x6c,0x10,0x69,0xa2,0x6b,0x11,0x61,0x6e, +0xa3,0x6b,2,0x6e,0x42,0x6f,0x46,0x74,3,0x66,0xa3,0x50,0x67,0xa3,0x51,0x69, +0x24,0x6e,0x53,0x10,0x6e,0x53,0x10,0x61,0xa3,0x6a,0x50,0x10,0x6f,0x51,0x11,0x70, +0x63,0xa2,0x52,0x11,0x68,0x61,0xa3,0x52,2,0x6d,0x2e,0x6e,0x36,0x73,0x10,0x75, +0xa3,0x83,0x10,0x62,0x80,0x10,0x75,0x81,2,0x61,0xa3,0x53,0x62,0x83,0x65,0x11, +0x61,0x72,1,0x61,0xa3,0x53,0x62,0x83,0x11,0x6d,0x61,0xa3,0x8b,0x68,0x6e,0x69, +0xa2,0x95,0x6a,2,0x61,0x30,0x70,0x52,0x75,0x11,0x72,0x63,0xa3,0x94,1,0x6d, +0x38,0x76,0x10,0x61,0xa2,0x4e,0x13,0x6e,0x65,0x73,0x65,0xa3,0x4e,0x10,0x6f,0xa3, +0xad,0x11,0x61,0x6e,0xa3,0x69,6,0x6c,0x1e,0x6c,0x34,0x6d,0x3a,0x72,0x48,0x75, +0x11,0x6e,0x67,0xa3,0x4c,0x11,0x75,0x77,0xa3,0x9c,0x10,0x6e,1,0x67,0xa3,0x4b, +0x70,0xa3,0xba,0x11,0x6b,0x74,0x8d,0x61,0x3c,0x65,0xa2,0x43,0x69,0x11,0x72,0x61, +0x48,0x13,0x67,0x61,0x6e,0x61,0x49,1,0x6e,0x34,0x74,0x10,0x72,0xa2,0xa2,0x11, +0x61,0x6e,0xa3,0xa2,0x42,6,0x6f,0xe,0x6f,0x77,0x73,0xa3,0x49,0x74,0xa3,0x4a, +0x75,0x12,0x6e,0x6f,0x6f,0x77,0x62,0xa3,0xac,0x67,0x3e,0x69,0x42,0x19,0x66,0x69, +0x72,0x6f,0x68,0x69,0x6e,0x67,0x79,0x61,0xa3,0xb6,0x44,0x11,0x75,0x6c,0x45,0x11, +0x62,0x72,0x46,0x11,0x65,0x77,0x47,2,0x6d,0x2e,0x6e,0x4a,0x74,0x11,0x61,0x6c, +0x5d,0x1c,0x70,0x65,0x72,0x69,0x61,0x6c,0x61,0x72,0x61,0x6d,0x61,0x69,0x63,0xa3, +0x74,2,0x64,0x66,0x68,0x6a,0x73,0x1b,0x63,0x72,0x69,0x70,0x74,0x69,0x6f,0x6e, +0x61,0x6c,0x70,0x61,1,0x68,0x32,0x72,0x14,0x74,0x68,0x69,0x61,0x6e,0xa3,0x7d, +0x13,0x6c,0x61,0x76,0x69,0xa3,0x7a,0x10,0x73,0xa3,0x4d,0x15,0x65,0x72,0x69,0x74, +0x65,0x64,0x23,0x64,0xc1,0xd,0x64,0xa2,0x7a,0x65,0xa2,0xc1,0x67,4,0x65,0x82, +0x6c,0x9a,0x6f,0xa2,0x46,0x72,0xa2,0x55,0x75,2,0x6a,0x3c,0x6e,0x4e,0x72,1, +0x6d,0x24,0x75,0x41,0x13,0x75,0x6b,0x68,0x69,0x41,1,0x61,0x24,0x72,0x3f,0x13, +0x72,0x61,0x74,0x69,0x3f,0x18,0x6a,0x61,0x6c,0x61,0x67,0x6f,0x6e,0x64,0x69,0xa3, +0xb3,0x10,0x6f,1,0x6b,0xa3,0x48,0x72,0x38,0x13,0x67,0x69,0x61,0x6e,0x39,0x11, +0x61,0x67,0x90,0x15,0x6f,0x6c,0x69,0x74,0x69,0x63,0x91,1,0x6e,0x30,0x74,0x10, +0x68,0x3a,0x11,0x69,0x63,0x3b,1,0x67,0xa3,0xb3,0x6d,0xa3,0xaf,1,0x61,0x32, +0x65,1,0x65,0x24,0x6b,0x3d,0x10,0x6b,0x3d,0x10,0x6e,0xa2,0x89,0x12,0x74,0x68, +0x61,0xa3,0x89,4,0x65,0x46,0x69,0x6c,0x6f,0x8c,0x73,0x9a,0x75,0x11,0x70,0x6c, +0xa2,0x87,0x13,0x6f,0x79,0x61,0x6e,0xa3,0x87,1,0x73,0x38,0x76,0x10,0x61,0x34, +0x15,0x6e,0x61,0x67,0x61,0x72,0x69,0x35,0x13,0x65,0x72,0x65,0x74,0x33,1,0x61, +0x36,0x76,0x16,0x65,0x73,0x61,0x6b,0x75,0x72,0x75,0xa3,0xbe,0x10,0x6b,0xa3,0xbe, +0x11,0x67,0x72,0xa2,0xb2,0x10,0x61,0xa3,0xb2,0x11,0x72,0x74,0x33,2,0x67,0x3a, +0x6c,0x72,0x74,0x11,0x68,0x69,0x36,0x13,0x6f,0x70,0x69,0x63,0x37,0x10,0x79,2, +0x64,0xa3,0x45,0x68,0xa3,0x46,0x70,0xa2,0x47,0x1e,0x74,0x69,0x61,0x6e,0x68,0x69, +0x65,0x72,0x6f,0x67,0x6c,0x79,0x70,0x68,0x73,0xa3,0x47,1,0x62,0x36,0x79,0x10, +0x6d,0xa2,0xb9,0x12,0x61,0x69,0x63,0xa3,0xb9,0x10,0x61,0xa2,0x88,0x12,0x73,0x61, +0x6e,0xa3,0x88,0x61,0xa2,0xb4,0x62,0xa4,0x19,0x63,6,0x6f,0x3d,0x6f,0x5a,0x70, +0x76,0x75,0x7a,0x79,1,0x70,0x3e,0x72,2,0x69,0x2a,0x6c,0x31,0x73,0xa3,0x44, +0x13,0x6c,0x6c,0x69,0x63,0x31,0x13,0x72,0x69,0x6f,0x74,0x7f,1,0x6d,0x30,0x70, +0x10,0x74,0x2e,0x11,0x69,0x63,0x2f,0x12,0x6d,0x6f,0x6e,0x21,0x11,0x72,0x74,0x7f, +0x16,0x6e,0x65,0x69,0x66,0x6f,0x72,0x6d,0xa3,0x65,0x61,0x32,0x68,0xa2,0x41,0x69, +0x11,0x72,0x74,0xa3,0x43,3,0x6b,0x4c,0x6e,0x50,0x72,0x76,0x75,0x1d,0x63,0x61, +0x73,0x69,0x61,0x6e,0x61,0x6c,0x62,0x61,0x6e,0x69,0x61,0x6e,0xa3,0x9f,0x10,0x6d, +0xa3,0x76,1,0x61,0x24,0x73,0x71,0x1d,0x64,0x69,0x61,0x6e,0x61,0x62,0x6f,0x72, +0x69,0x67,0x69,0x6e,0x61,0x6c,0x71,0x10,0x69,0xa2,0x68,0x11,0x61,0x6e,0xa3,0x68, +3,0x61,0x32,0x65,0x44,0x6f,0x52,0x72,0x10,0x73,0xa3,0xbd,1,0x6b,0x26,0x6d, +0xa3,0x42,0x11,0x6d,0x61,0xa3,0x76,0x10,0x72,0x2c,0x13,0x6f,0x6b,0x65,0x65,0x2d, +0x16,0x72,0x61,0x73,0x6d,0x69,0x61,0x6e,0xa3,0xbd,6,0x68,0x4a,0x68,0x48,0x6e, +0x4e,0x72,0x76,0x76,1,0x65,0x2a,0x73,0x10,0x74,0xa3,0x75,0x13,0x73,0x74,0x61, +0x6e,0xa3,0x75,0x11,0x6f,0x6d,0xa3,0xa1,0x11,0x61,0x74,0x1f,0x6f,0x6c,0x69,0x61, +0x6e,0x68,0x69,0x65,0x72,0x6f,0x67,0x6c,0x79,0x70,0x68,0x73,0xa3,0x9c,1,0x61, +0x3e,0x6d,2,0x65,0x2a,0x69,0xa3,0x74,0x6e,0x27,0x13,0x6e,0x69,0x61,0x6e,0x27, +0x10,0x62,0x24,0x11,0x69,0x63,0x25,0x64,0x30,0x66,0x44,0x67,0x11,0x68,0x62,0xa3, +0x9f,0x10,0x6c,1,0x61,0x26,0x6d,0xa3,0xa7,0x10,0x6d,0xa3,0xa7,0x11,0x61,0x6b, +0xa3,0x93,6,0x6c,0x3c,0x6c,0x52,0x6f,0x56,0x72,0x66,0x75,1,0x67,0x30,0x68, +1,0x64,0x79,0x69,0x10,0x64,0x79,0x10,0x69,0x8e,0x13,0x6e,0x65,0x73,0x65,0x8f, +0x11,0x69,0x73,0xa1,0x11,0x70,0x6f,0x2a,0x13,0x6d,0x6f,0x66,0x6f,0x2b,0x10,0x61, +1,0x68,0x2e,0x69,0x7c,0x12,0x6c,0x6c,0x65,0x7d,0xa2,0x41,0x11,0x6d,0x69,0xa3, +0x41,0x61,0x48,0x65,0x9c,0x68,1,0x61,0x2a,0x6b,0x10,0x73,0xa3,0xa8,0x15,0x69, +0x6b,0x73,0x75,0x6b,0x69,0xa3,0xa8,3,0x6c,0x3a,0x6d,0x48,0x73,0x54,0x74,1, +0x61,0x24,0x6b,0x9f,0x10,0x6b,0x9f,0x10,0x69,0x9c,0x13,0x6e,0x65,0x73,0x65,0x9d, +0x10,0x75,0xa2,0x82,0x10,0x6d,0xa3,0x82,0x10,0x73,0xa2,0x86,0x13,0x61,0x76,0x61, +0x68,0xa3,0x86,0x11,0x6e,0x67,0x28,0x12,0x61,0x6c,0x69,0x29,3,0x6c,0x42,0x6e, +0x90,0x74,0xa2,0x46,0x76,0x24,0x17,0x6f,0x77,0x65,0x6c,0x6a,0x61,0x6d,0x6f,0x25, +0x22,1,0x65,0x54,0x76,0x28,1,0x73,0x38,0x74,0x2a,0x17,0x73,0x79,0x6c,0x6c, +0x61,0x62,0x6c,0x65,0x2b,0x16,0x79,0x6c,0x6c,0x61,0x62,0x6c,0x65,0x29,0x18,0x61, +0x64,0x69,0x6e,0x67,0x6a,0x61,0x6d,0x6f,0x23,1,0x61,0x21,0x6f,0x1a,0x74,0x61, +0x70,0x70,0x6c,0x69,0x63,0x61,0x62,0x6c,0x65,0x21,0x26,0x1a,0x72,0x61,0x69,0x6c, +0x69,0x6e,0x67,0x6a,0x61,0x6d,0x6f,0x27,1,0x6e,0x2c,0x79,0x22,0x11,0x65,0x73, +0x23,0x20,0x10,0x6f,0x21,1,0x6e,0x2c,0x79,0x22,0x11,0x65,0x73,0x23,0x20,0x10, +0x6f,0x21,2,0x6d,0x30,0x6e,0x3a,0x79,0x22,0x11,0x65,0x73,0x23,0x24,0x13,0x61, +0x79,0x62,0x65,0x25,0x20,0x10,0x6f,0x21,2,0x6d,0x30,0x6e,0x3a,0x79,0x22,0x11, +0x65,0x73,0x23,0x24,0x13,0x61,0x79,0x62,0x65,0x25,0x20,0x10,0x6f,0x21,0xb,0x72, +0x39,0x76,0xc,0x76,0x33,0x78,0x2a,0x7a,0x11,0x77,0x6a,0x43,0x10,0x78,0x21,0x72, +0x28,0x73,0x50,0x74,0x31,1,0x65,0x24,0x69,0x39,0x1e,0x67,0x69,0x6f,0x6e,0x61, +0x6c,0x69,0x6e,0x64,0x69,0x63,0x61,0x74,0x6f,0x72,0x39,1,0x6d,0x35,0x70,0x18, +0x61,0x63,0x69,0x6e,0x67,0x6d,0x61,0x72,0x6b,0x35,0x6c,0x1f,0x6c,0x3c,0x6f,0x4a, +0x70,1,0x70,0x37,0x72,0x14,0x65,0x70,0x65,0x6e,0x64,0x37,0x28,1,0x66,0x2b, +0x76,0x2c,0x10,0x74,0x2f,0x13,0x74,0x68,0x65,0x72,0x21,0x63,0x4c,0x65,0x64,0x67, +1,0x61,0x3a,0x6c,0x19,0x75,0x65,0x61,0x66,0x74,0x65,0x72,0x7a,0x77,0x6a,0x41, +0x10,0x7a,0x41,2,0x6e,0x23,0x6f,0x24,0x72,0x25,0x14,0x6e,0x74,0x72,0x6f,0x6c, +0x23,2,0x62,0x34,0x6d,0x4e,0x78,0x26,0x13,0x74,0x65,0x6e,0x64,0x27,0x3a,1, +0x61,0x24,0x67,0x3d,0x11,0x73,0x65,0x3a,0x12,0x67,0x61,0x7a,0x3d,0x3e,0x16,0x6f, +0x64,0x69,0x66,0x69,0x65,0x72,0x3f,9,0x6e,0x4a,0x6e,0x34,0x6f,0x44,0x73,0x60, +0x75,0x94,0x78,0x10,0x78,0x21,0x10,0x75,0x2a,0x14,0x6d,0x65,0x72,0x69,0x63,0x2b, +1,0x6c,0x2c,0x74,0x12,0x68,0x65,0x72,0x21,0x14,0x65,0x74,0x74,0x65,0x72,0x2d, +3,0x63,0x36,0x65,0x46,0x70,0x31,0x74,0x32,0x12,0x65,0x72,0x6d,0x33,0x3c,0x16, +0x6f,0x6e,0x74,0x69,0x6e,0x75,0x65,0x3d,0x2e,0x10,0x70,0x2f,0x10,0x70,0x34,0x12, +0x70,0x65,0x72,0x35,0x61,0x46,0x63,0x52,0x65,0x64,0x66,0x72,0x6c,2,0x65,0x2d, +0x66,0x3b,0x6f,0x28,0x12,0x77,0x65,0x72,0x29,0x10,0x74,0x22,0x12,0x65,0x72,0x6d, +0x23,1,0x6c,0x24,0x72,0x37,0x24,0x12,0x6f,0x73,0x65,0x25,0x10,0x78,0x38,0x13, +0x74,0x65,0x6e,0x64,0x39,0x10,0x6f,0x26,0x13,0x72,0x6d,0x61,0x74,0x27,0,0x10, +0x6c,0x88,0x72,0x40,0x72,0x36,0x73,0x5e,0x77,0x7a,0x78,0x8a,0x7a,0x11,0x77,0x6a, +0x4b,1,0x65,0x24,0x69,0x3b,0x1e,0x67,0x69,0x6f,0x6e,0x61,0x6c,0x69,0x6e,0x64, +0x69,0x63,0x61,0x74,0x6f,0x72,0x3b,1,0x69,0x24,0x71,0x3f,0x18,0x6e,0x67,0x6c, +0x65,0x71,0x75,0x6f,0x74,0x65,0x3f,0x17,0x73,0x65,0x67,0x73,0x70,0x61,0x63,0x65, +0x4d,0x10,0x78,0x21,0x6c,0x36,0x6d,0x3c,0x6e,0x76,0x6f,0x13,0x74,0x68,0x65,0x72, +0x21,1,0x65,0x23,0x66,0x35,3,0x62,0x37,0x69,0x28,0x6c,0x29,0x6e,0x2b,0x10, +0x64,1,0x6c,0x34,0x6e,0x11,0x75,0x6d,0x2a,0x12,0x6c,0x65,0x74,0x37,0x14,0x65, +0x74,0x74,0x65,0x72,0x29,2,0x65,0x36,0x6c,0x39,0x75,0x2c,0x14,0x6d,0x65,0x72, +0x69,0x63,0x2d,0x14,0x77,0x6c,0x69,0x6e,0x65,0x39,0x66,0x3f,0x66,0x40,0x67,0x4e, +0x68,0x70,0x6b,0x10,0x61,0x26,0x15,0x74,0x61,0x6b,0x61,0x6e,0x61,0x27,0x10,0x6f, +0x24,0x13,0x72,0x6d,0x61,0x74,0x25,1,0x61,0x3a,0x6c,0x19,0x75,0x65,0x61,0x66, +0x74,0x65,0x72,0x7a,0x77,0x6a,0x49,0x10,0x7a,0x49,1,0x65,0x24,0x6c,0x3d,0x19, +0x62,0x72,0x65,0x77,0x6c,0x65,0x74,0x74,0x65,0x72,0x3d,0x61,0x86,0x63,0x92,0x64, +0x94,0x65,2,0x62,0x44,0x6d,0x5e,0x78,0x2e,0x13,0x74,0x65,0x6e,0x64,0x32,0x15, +0x6e,0x75,0x6d,0x6c,0x65,0x74,0x2f,0x42,1,0x61,0x24,0x67,0x45,0x11,0x73,0x65, +0x42,0x12,0x67,0x61,0x7a,0x45,0x46,0x16,0x6f,0x64,0x69,0x66,0x69,0x65,0x72,0x47, +0x15,0x6c,0x65,0x74,0x74,0x65,0x72,0x23,0x10,0x72,0x31,1,0x6f,0x24,0x71,0x41, +0x18,0x75,0x62,0x6c,0x65,0x71,0x75,0x6f,0x74,0x65,0x41,2,0x63,0x32,0x6e,0x3c, +0x6f,0x22,0x12,0x70,0x65,0x6e,0x23,0x24,0x13,0x6c,0x6f,0x73,0x65,0x25,0x20,0x12, +0x6f,0x6e,0x65,0x21,6,0x6f,0x65,0x6f,0x4a,0x72,0x5c,0x74,0x64,0x76,0x1d,0x69, +0x73,0x75,0x61,0x6c,0x6f,0x72,0x64,0x65,0x72,0x6c,0x65,0x66,0x74,0x3d,0x18,0x76, +0x65,0x72,0x73,0x74,0x72,0x75,0x63,0x6b,0x2d,0x13,0x69,0x67,0x68,0x74,0x2f,0x11, +0x6f,0x70,0x30,0x12,0x61,0x6e,0x64,2,0x62,0x32,0x6c,0x62,0x72,0x13,0x69,0x67, +0x68,0x74,0x3b,0x14,0x6f,0x74,0x74,0x6f,0x6d,0x32,0x12,0x61,0x6e,0x64,1,0x6c, +0x2e,0x72,0x13,0x69,0x67,0x68,0x74,0x35,0x12,0x65,0x66,0x74,0x3f,0x12,0x65,0x66, +0x74,0x36,0x17,0x61,0x6e,0x64,0x72,0x69,0x67,0x68,0x74,0x39,0x62,0x2c,0x6c,0x5c, +0x6e,0x10,0x61,0x21,0x14,0x6f,0x74,0x74,0x6f,0x6d,0x22,0x12,0x61,0x6e,0x64,1, +0x6c,0x2e,0x72,0x13,0x69,0x67,0x68,0x74,0x27,0x12,0x65,0x66,0x74,0x25,0x12,0x65, +0x66,0x74,0x28,0x17,0x61,0x6e,0x64,0x72,0x69,0x67,0x68,0x74,0x2b,0xd,0x6e,0xaa, +0x72,0x70,0x72,0x92,0x73,0xa2,0x46,0x74,0xa2,0x54,0x76,1,0x69,0x60,0x6f,0x12, +0x77,0x65,0x6c,0x62,1,0x64,0x3a,0x69,0x19,0x6e,0x64,0x65,0x70,0x65,0x6e,0x64, +0x65,0x6e,0x74,0x67,0x17,0x65,0x70,0x65,0x6e,0x64,0x65,0x6e,0x74,0x65,1,0x72, +0x2e,0x73,0x13,0x61,0x72,0x67,0x61,0x61,0x12,0x61,0x6d,0x61,0x5f,0x1d,0x65,0x67, +0x69,0x73,0x74,0x65,0x72,0x73,0x68,0x69,0x66,0x74,0x65,0x72,0x57,0x1e,0x79,0x6c, +0x6c,0x61,0x62,0x6c,0x65,0x6d,0x6f,0x64,0x69,0x66,0x69,0x65,0x72,0x59,0x12,0x6f, +0x6e,0x65,1,0x6c,0x2c,0x6d,0x12,0x61,0x72,0x6b,0x5d,0x14,0x65,0x74,0x74,0x65, +0x72,0x5b,0x6e,0x3c,0x6f,0x7c,0x70,0x18,0x75,0x72,0x65,0x6b,0x69,0x6c,0x6c,0x65, +0x72,0x55,1,0x6f,0x4c,0x75,1,0x6b,0x3c,0x6d,0x12,0x62,0x65,0x72,0x50,0x15, +0x6a,0x6f,0x69,0x6e,0x65,0x72,0x53,0x11,0x74,0x61,0x4f,0x16,0x6e,0x6a,0x6f,0x69, +0x6e,0x65,0x72,0x4d,0x13,0x74,0x68,0x65,0x72,0x21,0x67,0x3e,0x67,0x4a,0x69,0x64, +0x6a,0x82,0x6d,0x1d,0x6f,0x64,0x69,0x66,0x79,0x69,0x6e,0x67,0x6c,0x65,0x74,0x74, +0x65,0x72,0x4b,0x1c,0x65,0x6d,0x69,0x6e,0x61,0x74,0x69,0x6f,0x6e,0x6d,0x61,0x72, +0x6b,0x45,0x1e,0x6e,0x76,0x69,0x73,0x69,0x62,0x6c,0x65,0x73,0x74,0x61,0x63,0x6b, +0x65,0x72,0x47,0x14,0x6f,0x69,0x6e,0x65,0x72,0x49,0x61,0xa2,0xba,0x62,0xa2,0xc0, +0x63,1,0x61,0xa2,0xa2,0x6f,0x16,0x6e,0x73,0x6f,0x6e,0x61,0x6e,0x74,0x2a,8, +0x6b,0x67,0x6b,0x48,0x6d,0x52,0x70,0x5c,0x73,0xa2,0x42,0x77,0x19,0x69,0x74,0x68, +0x73,0x74,0x61,0x63,0x6b,0x65,0x72,0x43,0x14,0x69,0x6c,0x6c,0x65,0x72,0x35,0x14, +0x65,0x64,0x69,0x61,0x6c,0x37,1,0x6c,0x52,0x72,0x10,0x65,1,0x63,0x2e,0x66, +0x13,0x69,0x78,0x65,0x64,0x3d,0x19,0x65,0x64,0x69,0x6e,0x67,0x72,0x65,0x70,0x68, +0x61,0x3b,0x18,0x61,0x63,0x65,0x68,0x6f,0x6c,0x64,0x65,0x72,0x39,0x10,0x75,1, +0x62,0x3e,0x63,0x1b,0x63,0x65,0x65,0x64,0x69,0x6e,0x67,0x72,0x65,0x70,0x68,0x61, +0x41,0x15,0x6a,0x6f,0x69,0x6e,0x65,0x64,0x3f,0x64,0x4c,0x66,0x52,0x68,0x5a,0x69, +0x1e,0x6e,0x69,0x74,0x69,0x61,0x6c,0x70,0x6f,0x73,0x74,0x66,0x69,0x78,0x65,0x64, +0x33,0x12,0x65,0x61,0x64,0x2d,0x13,0x69,0x6e,0x61,0x6c,0x2f,0x18,0x65,0x61,0x64, +0x6c,0x65,0x74,0x74,0x65,0x72,0x31,0x1d,0x6e,0x74,0x69,0x6c,0x6c,0x61,0x74,0x69, +0x6f,0x6e,0x6d,0x61,0x72,0x6b,0x29,0x16,0x76,0x61,0x67,0x72,0x61,0x68,0x61,0x23, +1,0x69,0x4a,0x72,0x10,0x61,0x1f,0x68,0x6d,0x69,0x6a,0x6f,0x69,0x6e,0x69,0x6e, +0x67,0x6e,0x75,0x6d,0x62,0x65,0x72,0x27,0x12,0x6e,0x64,0x75,0x25,2,0x72,0x38, +0x74,0x46,0x75,0x26,0x15,0x70,0x72,0x69,0x67,0x68,0x74,0x27,0x20,0x15,0x6f,0x74, +0x61,0x74,0x65,0x64,0x21,1,0x72,0x24,0x75,0x25,0x22,0x18,0x61,0x6e,0x73,0x66, +0x6f,0x72,0x6d,0x65,0x64,1,0x72,0x32,0x75,0x15,0x70,0x72,0x69,0x67,0x68,0x74, +0x25,0x15,0x6f,0x74,0x61,0x74,0x65,0x64,0x23,0xd,0x6e,0xc1,0x86,0x73,0xa8,0x73, +0x4c,0x74,0xa2,0x76,0x75,0xa2,0x83,0x7a,0xd8,0x70,0,2,0x6c,0xd9,0x20,0, +0x70,0xd9,0x40,0,0x73,0xc3,0,0xfe,0xf,0,0,0,7,0x6f,0x3c,0x6f, +0xff,8,0,0,0,0x70,0x3a,0x75,0x6e,0x79,0x13,0x6d,0x62,0x6f,0x6c,0xff, +0xf,0,0,0,0x11,0x61,0x63,1,0x65,0x34,0x69,0x15,0x6e,0x67,0x6d,0x61, +0x72,0x6b,0xa5,0,0x18,0x73,0x65,0x70,0x61,0x72,0x61,0x74,0x6f,0x72,0xc3,0, +0x16,0x72,0x72,0x6f,0x67,0x61,0x74,0x65,0xe1,0,0,0x63,0xff,2,0,0, +0,0x65,0x38,0x6b,0xff,4,0,0,0,0x6d,0xff,1,0,0,0,0x16, +0x70,0x61,0x72,0x61,0x74,0x6f,0x72,0xd9,0x70,0,0x1d,0x69,0x74,0x6c,0x65,0x63, +0x61,0x73,0x65,0x6c,0x65,0x74,0x74,0x65,0x72,0x31,1,0x6e,0x40,0x70,0x1c,0x70, +0x65,0x72,0x63,0x61,0x73,0x65,0x6c,0x65,0x74,0x74,0x65,0x72,0x25,0x17,0x61,0x73, +0x73,0x69,0x67,0x6e,0x65,0x64,0x23,0x6e,0xa2,0x69,0x6f,0xa2,0x89,0x70,0xfe,0x30, +0xf8,0,0,9,0x69,0x33,0x69,0xff,0x10,0,0,0,0x6f,0xfd,0x80,0, +0,0x72,0x54,0x73,0xf9,0,0,0x75,0x12,0x6e,0x63,0x74,0xfe,0x30,0xf8,0, +0,0x15,0x75,0x61,0x74,0x69,0x6f,0x6e,0xff,0x30,0xf8,0,0,0x17,0x69,0x76, +0x61,0x74,0x65,0x75,0x73,0x65,0xdd,0,0,0x61,0x48,0x63,0xfd,0x40,0,0, +0x64,0xe9,0,0,0x65,0xfd,0x20,0,0,0x66,0xff,0x20,0,0,0,0x1f, +0x72,0x61,0x67,0x72,0x61,0x70,0x68,0x73,0x65,0x70,0x61,0x72,0x61,0x74,0x6f,0x72, +0xd9,0x40,0,0xbe,0,3,0x64,0xa7,0,0x6c,0xab,0,0x6f,0x30,0x75,0x13, +0x6d,0x62,0x65,0x72,0xbf,0,0xb2,0,0x1b,0x6e,0x73,0x70,0x61,0x63,0x69,0x6e, +0x67,0x6d,0x61,0x72,0x6b,0xa1,1,0x70,0x92,0x74,0x12,0x68,0x65,0x72,0xe6,0x80, +1,3,0x6c,0x40,0x6e,0x4a,0x70,0x56,0x73,0x14,0x79,0x6d,0x62,0x6f,0x6c,0xff, +8,0,0,0,0x14,0x65,0x74,0x74,0x65,0x72,0x61,0x14,0x75,0x6d,0x62,0x65, +0x72,0xb3,0,0x19,0x75,0x6e,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0xfd,0x80, +0,0,0x1c,0x65,0x6e,0x70,0x75,0x6e,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e, +0xf9,0,0,0x66,0xc0,0xc4,0x66,0xa2,0x47,0x69,0xa2,0x64,0x6c,0xa2,0x79,0x6d, +0xa4,0xc0,4,0x61,0x6c,0x63,0xa5,0,0x65,0xa3,0x80,0x6e,0xa1,0x6f,0x15,0x64, +0x69,0x66,0x69,0x65,0x72,1,0x6c,0x38,0x73,0x14,0x79,0x6d,0x62,0x6f,0x6c,0xff, +4,0,0,0,0x14,0x65,0x74,0x74,0x65,0x72,0x41,1,0x72,0x3c,0x74,0x16, +0x68,0x73,0x79,0x6d,0x62,0x6f,0x6c,0xff,1,0,0,0,0x10,0x6b,0xa5,0xc0, +1,0x69,0x32,0x6f,0x13,0x72,0x6d,0x61,0x74,0xdb,0,0,0x1d,0x6e,0x61,0x6c, +0x70,0x75,0x6e,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0xff,0x20,0,0,0, +0x10,0x6e,0x1f,0x69,0x74,0x69,0x61,0x6c,0x70,0x75,0x6e,0x63,0x74,0x75,0x61,0x74, +0x69,0x6f,0x6e,0xff,0x10,0,0,0,0x9c,7,0x6d,0x18,0x6d,0x41,0x6f,0x28, +0x74,0x31,0x75,0x25,0x60,0x1c,0x77,0x65,0x72,0x63,0x61,0x73,0x65,0x6c,0x65,0x74, +0x74,0x65,0x72,0x29,0x63,0x3d,0x65,0x28,0x69,0x42,0x6c,0x29,0x13,0x74,0x74,0x65, +0x72,0x9c,0x15,0x6e,0x75,0x6d,0x62,0x65,0x72,0xab,0,0x1a,0x6e,0x65,0x73,0x65, +0x70,0x61,0x72,0x61,0x74,0x6f,0x72,0xd9,0x20,0,0x63,0x46,0x64,0xa2,0x96,0x65, +0x1b,0x6e,0x63,0x6c,0x6f,0x73,0x69,0x6e,0x67,0x6d,0x61,0x72,0x6b,0xa3,0x80,0xe6, +0x80,1,7,0x6e,0x57,0x6e,0x52,0x6f,0x5e,0x73,0xe1,0,0,0x75,0x1b,0x72, +0x72,0x65,0x6e,0x63,0x79,0x73,0x79,0x6d,0x62,0x6f,0x6c,0xff,2,0,0,0, +0x22,0x12,0x74,0x72,0x6c,0xd9,0x80,0,0xdc,0,0,1,0x6d,0x62,0x6e,1, +0x6e,0x30,0x74,0x12,0x72,0x6f,0x6c,0xd9,0x80,0,0x1f,0x65,0x63,0x74,0x6f,0x72, +0x70,0x75,0x6e,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0xfd,0x40,0,0,0x19, +0x62,0x69,0x6e,0x69,0x6e,0x67,0x6d,0x61,0x72,0x6b,0xa5,0xc0,0x61,0x58,0x63,0xd9, +0x80,0,0x66,0xdb,0,0,0x6c,0x1d,0x6f,0x73,0x65,0x70,0x75,0x6e,0x63,0x74, +0x75,0x61,0x74,0x69,0x6f,0x6e,0xfd,0x20,0,0,0x18,0x73,0x65,0x64,0x6c,0x65, +0x74,0x74,0x65,0x72,0x3d,2,0x61,0x32,0x65,0x50,0x69,0x12,0x67,0x69,0x74,0xa7, +0,0x1c,0x73,0x68,0x70,0x75,0x6e,0x63,0x74,0x75,0x61,0x74,0x69,0x6f,0x6e,0xe9, +0,0,0x1a,0x63,0x69,0x6d,0x61,0x6c,0x6e,0x75,0x6d,0x62,0x65,0x72,0xa7,0 +}; + +const char PropNameData::nameGroups[22098]={ +2,'A','l','p','h','a',0,'A','l','p','h','a','b','e','t','i','c',0, +4,'N',0,'N','o',0,'F',0,'F','a','l','s','e',0,4,'Y',0,'Y','e','s',0,'T',0,'T','r','u','e',0, +2,'N','R',0,'N','o','t','_','R','e','o','r','d','e','r','e','d',0, +2,'O','V',0,'O','v','e','r','l','a','y',0,2,'H','A','N','R',0,'H','a','n','_','R','e','a','d','i','n','g',0, +2,'N','K',0,'N','u','k','t','a',0,2,'K','V',0,'K','a','n','a','_','V','o','i','c','i','n','g',0, +2,'V','R',0,'V','i','r','a','m','a',0,2,'C','C','C','1','0',0,'C','C','C','1','0',0, +2,'C','C','C','1','1',0,'C','C','C','1','1',0,2,'C','C','C','1','2',0,'C','C','C','1','2',0, +2,'C','C','C','1','3',0,'C','C','C','1','3',0,2,'C','C','C','1','4',0,'C','C','C','1','4',0, +2,'C','C','C','1','5',0,'C','C','C','1','5',0,2,'C','C','C','1','6',0,'C','C','C','1','6',0, +2,'C','C','C','1','7',0,'C','C','C','1','7',0,2,'C','C','C','1','8',0,'C','C','C','1','8',0, +2,'C','C','C','1','9',0,'C','C','C','1','9',0,2,'C','C','C','2','0',0,'C','C','C','2','0',0, +2,'C','C','C','2','1',0,'C','C','C','2','1',0,2,'C','C','C','2','2',0,'C','C','C','2','2',0, +2,'C','C','C','2','3',0,'C','C','C','2','3',0,2,'C','C','C','2','4',0,'C','C','C','2','4',0, +2,'C','C','C','2','5',0,'C','C','C','2','5',0,2,'C','C','C','2','6',0,'C','C','C','2','6',0, +2,'C','C','C','2','7',0,'C','C','C','2','7',0,2,'C','C','C','2','8',0,'C','C','C','2','8',0, +2,'C','C','C','2','9',0,'C','C','C','2','9',0,2,'C','C','C','3','0',0,'C','C','C','3','0',0, +2,'C','C','C','3','1',0,'C','C','C','3','1',0,2,'C','C','C','3','2',0,'C','C','C','3','2',0, +2,'C','C','C','3','3',0,'C','C','C','3','3',0,2,'C','C','C','3','4',0,'C','C','C','3','4',0, +2,'C','C','C','3','5',0,'C','C','C','3','5',0,2,'C','C','C','3','6',0,'C','C','C','3','6',0, +2,'C','C','C','8','4',0,'C','C','C','8','4',0,2,'C','C','C','9','1',0,'C','C','C','9','1',0, +2,'C','C','C','1','0','3',0,'C','C','C','1','0','3',0,2,'C','C','C','1','0','7',0,'C','C','C','1','0','7',0, +2,'C','C','C','1','1','8',0,'C','C','C','1','1','8',0,2,'C','C','C','1','2','2',0,'C','C','C','1','2','2',0, +2,'C','C','C','1','2','9',0,'C','C','C','1','2','9',0,2,'C','C','C','1','3','0',0,'C','C','C','1','3','0',0, +2,'C','C','C','1','3','2',0,'C','C','C','1','3','2',0,2,'C','C','C','1','3','3',0,'C','C','C','1','3','3',0, +2,'A','T','B','L',0,'A','t','t','a','c','h','e','d','_','B','e','l','o','w','_','L','e','f','t',0, +2,'A','T','B',0,'A','t','t','a','c','h','e','d','_','B','e','l','o','w',0, +2,'A','T','A',0,'A','t','t','a','c','h','e','d','_','A','b','o','v','e',0, +2,'A','T','A','R',0,'A','t','t','a','c','h','e','d','_','A','b','o','v','e','_','R','i','g','h','t',0, +2,'B','L',0,'B','e','l','o','w','_','L','e','f','t',0,2,'B',0,'B','e','l','o','w',0, +2,'B','R',0,'B','e','l','o','w','_','R','i','g','h','t',0, +2,'L',0,'L','e','f','t',0,2,'R',0,'R','i','g','h','t',0, +2,'A','L',0,'A','b','o','v','e','_','L','e','f','t',0,2,'A',0,'A','b','o','v','e',0, +2,'A','R',0,'A','b','o','v','e','_','R','i','g','h','t',0, +2,'D','B',0,'D','o','u','b','l','e','_','B','e','l','o','w',0, +2,'D','A',0,'D','o','u','b','l','e','_','A','b','o','v','e',0, +2,'I','S',0,'I','o','t','a','_','S','u','b','s','c','r','i','p','t',0, +2,'A','H','e','x',0,'A','S','C','I','I','_','H','e','x','_','D','i','g','i','t',0, +2,'B','i','d','i','_','C',0,'B','i','d','i','_','C','o','n','t','r','o','l',0, +2,'B','i','d','i','_','M',0,'B','i','d','i','_','M','i','r','r','o','r','e','d',0, +2,'D','a','s','h',0,'D','a','s','h',0,2,'D','I',0,'D','e','f','a','u','l','t','_','I','g','n','o','r','a','b','l','e', +'_','C','o','d','e','_','P','o','i','n','t',0,2,'D','e','p',0,'D','e','p','r','e','c','a','t','e','d',0, +2,'D','i','a',0,'D','i','a','c','r','i','t','i','c',0,2,'E','x','t',0,'E','x','t','e','n','d','e','r',0, +2,'C','o','m','p','_','E','x',0,'F','u','l','l','_','C','o','m','p','o','s','i','t','i','o','n','_','E','x','c','l','u','s', +'i','o','n',0,2,'G','r','_','B','a','s','e',0,'G','r','a','p','h','e','m','e','_','B','a','s','e',0, +2,'G','r','_','E','x','t',0,'G','r','a','p','h','e','m','e','_','E','x','t','e','n','d',0, +2,'G','r','_','L','i','n','k',0,'G','r','a','p','h','e','m','e','_','L','i','n','k',0, +2,'H','e','x',0,'H','e','x','_','D','i','g','i','t',0,2,'H','y','p','h','e','n',0,'H','y','p','h','e','n',0, +2,'I','D','C',0,'I','D','_','C','o','n','t','i','n','u','e',0, +2,'I','D','S',0,'I','D','_','S','t','a','r','t',0,2,'I','d','e','o',0,'I','d','e','o','g','r','a','p','h','i','c',0, +2,'I','D','S','B',0,'I','D','S','_','B','i','n','a','r','y','_','O','p','e','r','a','t','o','r',0, +2,'I','D','S','T',0,'I','D','S','_','T','r','i','n','a','r','y','_','O','p','e','r','a','t','o','r',0, +2,'J','o','i','n','_','C',0,'J','o','i','n','_','C','o','n','t','r','o','l',0, +2,'L','O','E',0,'L','o','g','i','c','a','l','_','O','r','d','e','r','_','E','x','c','e','p','t','i','o','n',0, +2,'L','o','w','e','r',0,'L','o','w','e','r','c','a','s','e',0, +2,'M','a','t','h',0,'M','a','t','h',0,2,'N','C','h','a','r',0,'N','o','n','c','h','a','r','a','c','t','e','r','_','C', +'o','d','e','_','P','o','i','n','t',0,2,'Q','M','a','r','k',0,'Q','u','o','t','a','t','i','o','n','_','M','a','r','k',0, +2,'R','a','d','i','c','a','l',0,'R','a','d','i','c','a','l',0, +2,'S','D',0,'S','o','f','t','_','D','o','t','t','e','d',0, +2,'T','e','r','m',0,'T','e','r','m','i','n','a','l','_','P','u','n','c','t','u','a','t','i','o','n',0, +2,'U','I','d','e','o',0,'U','n','i','f','i','e','d','_','I','d','e','o','g','r','a','p','h',0, +2,'U','p','p','e','r',0,'U','p','p','e','r','c','a','s','e',0, +3,'W','S','p','a','c','e',0,'W','h','i','t','e','_','S','p','a','c','e',0,'s','p','a','c','e',0, +2,'X','I','D','C',0,'X','I','D','_','C','o','n','t','i','n','u','e',0, +2,'X','I','D','S',0,'X','I','D','_','S','t','a','r','t',0, +2,'S','e','n','s','i','t','i','v','e',0,'C','a','s','e','_','S','e','n','s','i','t','i','v','e',0, +2,'S','T','e','r','m',0,'S','e','n','t','e','n','c','e','_','T','e','r','m','i','n','a','l',0, +2,'V','S',0,'V','a','r','i','a','t','i','o','n','_','S','e','l','e','c','t','o','r',0, +2,'n','f','d','i','n','e','r','t',0,'N','F','D','_','I','n','e','r','t',0, +2,'n','f','k','d','i','n','e','r','t',0,'N','F','K','D','_','I','n','e','r','t',0, +2,'n','f','c','i','n','e','r','t',0,'N','F','C','_','I','n','e','r','t',0, +2,'n','f','k','c','i','n','e','r','t',0,'N','F','K','C','_','I','n','e','r','t',0, +2,'s','e','g','s','t','a','r','t',0,'S','e','g','m','e','n','t','_','S','t','a','r','t','e','r',0, +2,'P','a','t','_','S','y','n',0,'P','a','t','t','e','r','n','_','S','y','n','t','a','x',0, +2,'P','a','t','_','W','S',0,'P','a','t','t','e','r','n','_','W','h','i','t','e','_','S','p','a','c','e',0, +2,0,'a','l','n','u','m',0,2,0,'b','l','a','n','k',0, +2,0,'g','r','a','p','h',0,2,0,'p','r','i','n','t',0, +2,0,'x','d','i','g','i','t',0,2,'C','a','s','e','d',0,'C','a','s','e','d',0, +2,'C','I',0,'C','a','s','e','_','I','g','n','o','r','a','b','l','e',0, +2,'C','W','L',0,'C','h','a','n','g','e','s','_','W','h','e','n','_','L','o','w','e','r','c','a','s','e','d',0, +2,'C','W','U',0,'C','h','a','n','g','e','s','_','W','h','e','n','_','U','p','p','e','r','c','a','s','e','d',0, +2,'C','W','T',0,'C','h','a','n','g','e','s','_','W','h','e','n','_','T','i','t','l','e','c','a','s','e','d',0, +2,'C','W','C','F',0,'C','h','a','n','g','e','s','_','W','h','e','n','_','C','a','s','e','f','o','l','d','e','d',0, +2,'C','W','C','M',0,'C','h','a','n','g','e','s','_','W','h','e','n','_','C','a','s','e','m','a','p','p','e','d',0, +2,'C','W','K','C','F',0,'C','h','a','n','g','e','s','_','W','h','e','n','_','N','F','K','C','_','C','a','s','e','f','o','l', +'d','e','d',0,2,'E','m','o','j','i',0,'E','m','o','j','i',0, +2,'E','P','r','e','s',0,'E','m','o','j','i','_','P','r','e','s','e','n','t','a','t','i','o','n',0, +2,'E','M','o','d',0,'E','m','o','j','i','_','M','o','d','i','f','i','e','r',0, +2,'E','B','a','s','e',0,'E','m','o','j','i','_','M','o','d','i','f','i','e','r','_','B','a','s','e',0, +2,'E','C','o','m','p',0,'E','m','o','j','i','_','C','o','m','p','o','n','e','n','t',0, +2,'R','I',0,'R','e','g','i','o','n','a','l','_','I','n','d','i','c','a','t','o','r',0, +2,'P','C','M',0,'P','r','e','p','e','n','d','e','d','_','C','o','n','c','a','t','e','n','a','t','i','o','n','_','M','a','r', +'k',0,2,'E','x','t','P','i','c','t',0,'E','x','t','e','n','d','e','d','_','P','i','c','t','o','g','r','a','p','h','i','c', +0,2,'b','c',0,'B','i','d','i','_','C','l','a','s','s',0, +2,'L',0,'L','e','f','t','_','T','o','_','R','i','g','h','t',0, +2,'R',0,'R','i','g','h','t','_','T','o','_','L','e','f','t',0, +2,'E','N',0,'E','u','r','o','p','e','a','n','_','N','u','m','b','e','r',0, +2,'E','S',0,'E','u','r','o','p','e','a','n','_','S','e','p','a','r','a','t','o','r',0, +2,'E','T',0,'E','u','r','o','p','e','a','n','_','T','e','r','m','i','n','a','t','o','r',0, +2,'A','N',0,'A','r','a','b','i','c','_','N','u','m','b','e','r',0, +2,'C','S',0,'C','o','m','m','o','n','_','S','e','p','a','r','a','t','o','r',0, +2,'B',0,'P','a','r','a','g','r','a','p','h','_','S','e','p','a','r','a','t','o','r',0, +2,'S',0,'S','e','g','m','e','n','t','_','S','e','p','a','r','a','t','o','r',0, +2,'W','S',0,'W','h','i','t','e','_','S','p','a','c','e',0, +2,'O','N',0,'O','t','h','e','r','_','N','e','u','t','r','a','l',0, +2,'L','R','E',0,'L','e','f','t','_','T','o','_','R','i','g','h','t','_','E','m','b','e','d','d','i','n','g',0, +2,'L','R','O',0,'L','e','f','t','_','T','o','_','R','i','g','h','t','_','O','v','e','r','r','i','d','e',0, +2,'A','L',0,'A','r','a','b','i','c','_','L','e','t','t','e','r',0, +2,'R','L','E',0,'R','i','g','h','t','_','T','o','_','L','e','f','t','_','E','m','b','e','d','d','i','n','g',0, +2,'R','L','O',0,'R','i','g','h','t','_','T','o','_','L','e','f','t','_','O','v','e','r','r','i','d','e',0, +2,'P','D','F',0,'P','o','p','_','D','i','r','e','c','t','i','o','n','a','l','_','F','o','r','m','a','t',0, +2,'N','S','M',0,'N','o','n','s','p','a','c','i','n','g','_','M','a','r','k',0, +2,'B','N',0,'B','o','u','n','d','a','r','y','_','N','e','u','t','r','a','l',0, +2,'F','S','I',0,'F','i','r','s','t','_','S','t','r','o','n','g','_','I','s','o','l','a','t','e',0, +2,'L','R','I',0,'L','e','f','t','_','T','o','_','R','i','g','h','t','_','I','s','o','l','a','t','e',0, +2,'R','L','I',0,'R','i','g','h','t','_','T','o','_','L','e','f','t','_','I','s','o','l','a','t','e',0, +2,'P','D','I',0,'P','o','p','_','D','i','r','e','c','t','i','o','n','a','l','_','I','s','o','l','a','t','e',0, +2,'b','l','k',0,'B','l','o','c','k',0,2,'N','B',0,'N','o','_','B','l','o','c','k',0, +2,'A','S','C','I','I',0,'B','a','s','i','c','_','L','a','t','i','n',0, +3,'L','a','t','i','n','_','1','_','S','u','p',0,'L','a','t','i','n','_','1','_','S','u','p','p','l','e','m','e','n','t',0, +'L','a','t','i','n','_','1',0,2,'L','a','t','i','n','_','E','x','t','_','A',0,'L','a','t','i','n','_','E','x','t','e','n', +'d','e','d','_','A',0,2,'L','a','t','i','n','_','E','x','t','_','B',0,'L','a','t','i','n','_','E','x','t','e','n','d','e', +'d','_','B',0,2,'I','P','A','_','E','x','t',0,'I','P','A','_','E','x','t','e','n','s','i','o','n','s',0, +2,'M','o','d','i','f','i','e','r','_','L','e','t','t','e','r','s',0,'S','p','a','c','i','n','g','_','M','o','d','i','f','i', +'e','r','_','L','e','t','t','e','r','s',0,2,'D','i','a','c','r','i','t','i','c','a','l','s',0, +'C','o','m','b','i','n','i','n','g','_','D','i','a','c','r','i','t','i','c','a','l','_','M','a','r','k','s',0, +2,'G','r','e','e','k',0,'G','r','e','e','k','_','A','n','d','_','C','o','p','t','i','c',0, +2,'C','y','r','i','l','l','i','c',0,'C','y','r','i','l','l','i','c',0, +2,'A','r','m','e','n','i','a','n',0,'A','r','m','e','n','i','a','n',0, +2,'H','e','b','r','e','w',0,'H','e','b','r','e','w',0,2,'A','r','a','b','i','c',0,'A','r','a','b','i','c',0, +2,'S','y','r','i','a','c',0,'S','y','r','i','a','c',0,2,'T','h','a','a','n','a',0,'T','h','a','a','n','a',0, +2,'D','e','v','a','n','a','g','a','r','i',0,'D','e','v','a','n','a','g','a','r','i',0, +2,'B','e','n','g','a','l','i',0,'B','e','n','g','a','l','i',0, +2,'G','u','r','m','u','k','h','i',0,'G','u','r','m','u','k','h','i',0, +2,'G','u','j','a','r','a','t','i',0,'G','u','j','a','r','a','t','i',0, +2,'O','r','i','y','a',0,'O','r','i','y','a',0,2,'T','a','m','i','l',0,'T','a','m','i','l',0, +2,'T','e','l','u','g','u',0,'T','e','l','u','g','u',0,2,'K','a','n','n','a','d','a',0, +'K','a','n','n','a','d','a',0,2,'M','a','l','a','y','a','l','a','m',0,'M','a','l','a','y','a','l','a','m',0, +2,'S','i','n','h','a','l','a',0,'S','i','n','h','a','l','a',0, +2,'T','h','a','i',0,'T','h','a','i',0,2,'L','a','o',0,'L','a','o',0, +2,'T','i','b','e','t','a','n',0,'T','i','b','e','t','a','n',0, +2,'M','y','a','n','m','a','r',0,'M','y','a','n','m','a','r',0, +2,'G','e','o','r','g','i','a','n',0,'G','e','o','r','g','i','a','n',0, +2,'J','a','m','o',0,'H','a','n','g','u','l','_','J','a','m','o',0, +2,'E','t','h','i','o','p','i','c',0,'E','t','h','i','o','p','i','c',0, +2,'C','h','e','r','o','k','e','e',0,'C','h','e','r','o','k','e','e',0, +3,'U','C','A','S',0,'U','n','i','f','i','e','d','_','C','a','n','a','d','i','a','n','_','A','b','o','r','i','g','i','n','a', +'l','_','S','y','l','l','a','b','i','c','s',0,'C','a','n','a','d','i','a','n','_','S','y','l','l','a','b','i','c','s',0, +2,'O','g','h','a','m',0,'O','g','h','a','m',0,2,'R','u','n','i','c',0,'R','u','n','i','c',0, +2,'K','h','m','e','r',0,'K','h','m','e','r',0,2,'M','o','n','g','o','l','i','a','n',0, +'M','o','n','g','o','l','i','a','n',0,2,'L','a','t','i','n','_','E','x','t','_','A','d','d','i','t','i','o','n','a','l',0, +'L','a','t','i','n','_','E','x','t','e','n','d','e','d','_','A','d','d','i','t','i','o','n','a','l',0, +2,'G','r','e','e','k','_','E','x','t',0,'G','r','e','e','k','_','E','x','t','e','n','d','e','d',0, +2,'P','u','n','c','t','u','a','t','i','o','n',0,'G','e','n','e','r','a','l','_','P','u','n','c','t','u','a','t','i','o','n', +0,2,'S','u','p','e','r','_','A','n','d','_','S','u','b',0,'S','u','p','e','r','s','c','r','i','p','t','s','_','A','n','d', +'_','S','u','b','s','c','r','i','p','t','s',0,2,'C','u','r','r','e','n','c','y','_','S','y','m','b','o','l','s',0, +'C','u','r','r','e','n','c','y','_','S','y','m','b','o','l','s',0, +3,'D','i','a','c','r','i','t','i','c','a','l','s','_','F','o','r','_','S','y','m','b','o','l','s',0, +'C','o','m','b','i','n','i','n','g','_','D','i','a','c','r','i','t','i','c','a','l','_','M','a','r','k','s','_','F','o','r','_', +'S','y','m','b','o','l','s',0,'C','o','m','b','i','n','i','n','g','_','M','a','r','k','s','_','F','o','r','_','S','y','m','b', +'o','l','s',0,2,'L','e','t','t','e','r','l','i','k','e','_','S','y','m','b','o','l','s',0, +'L','e','t','t','e','r','l','i','k','e','_','S','y','m','b','o','l','s',0, +2,'N','u','m','b','e','r','_','F','o','r','m','s',0,'N','u','m','b','e','r','_','F','o','r','m','s',0, +2,'A','r','r','o','w','s',0,'A','r','r','o','w','s',0,2,'M','a','t','h','_','O','p','e','r','a','t','o','r','s',0, +'M','a','t','h','e','m','a','t','i','c','a','l','_','O','p','e','r','a','t','o','r','s',0, +2,'M','i','s','c','_','T','e','c','h','n','i','c','a','l',0,'M','i','s','c','e','l','l','a','n','e','o','u','s','_','T','e', +'c','h','n','i','c','a','l',0,2,'C','o','n','t','r','o','l','_','P','i','c','t','u','r','e','s',0, +'C','o','n','t','r','o','l','_','P','i','c','t','u','r','e','s',0, +2,'O','C','R',0,'O','p','t','i','c','a','l','_','C','h','a','r','a','c','t','e','r','_','R','e','c','o','g','n','i','t','i', +'o','n',0,2,'E','n','c','l','o','s','e','d','_','A','l','p','h','a','n','u','m',0,'E','n','c','l','o','s','e','d','_','A', +'l','p','h','a','n','u','m','e','r','i','c','s',0,2,'B','o','x','_','D','r','a','w','i','n','g',0, +'B','o','x','_','D','r','a','w','i','n','g',0,2,'B','l','o','c','k','_','E','l','e','m','e','n','t','s',0, +'B','l','o','c','k','_','E','l','e','m','e','n','t','s',0,2,'G','e','o','m','e','t','r','i','c','_','S','h','a','p','e','s', +0,'G','e','o','m','e','t','r','i','c','_','S','h','a','p','e','s',0, +2,'M','i','s','c','_','S','y','m','b','o','l','s',0,'M','i','s','c','e','l','l','a','n','e','o','u','s','_','S','y','m','b', +'o','l','s',0,2,'D','i','n','g','b','a','t','s',0,'D','i','n','g','b','a','t','s',0, +2,'B','r','a','i','l','l','e',0,'B','r','a','i','l','l','e','_','P','a','t','t','e','r','n','s',0, +2,'C','J','K','_','R','a','d','i','c','a','l','s','_','S','u','p',0,'C','J','K','_','R','a','d','i','c','a','l','s','_','S', +'u','p','p','l','e','m','e','n','t',0,2,'K','a','n','g','x','i',0,'K','a','n','g','x','i','_','R','a','d','i','c','a','l', +'s',0,2,'I','D','C',0,'I','d','e','o','g','r','a','p','h','i','c','_','D','e','s','c','r','i','p','t','i','o','n','_','C', +'h','a','r','a','c','t','e','r','s',0,2,'C','J','K','_','S','y','m','b','o','l','s',0,'C','J','K','_','S','y','m','b','o', +'l','s','_','A','n','d','_','P','u','n','c','t','u','a','t','i','o','n',0, +2,'H','i','r','a','g','a','n','a',0,'H','i','r','a','g','a','n','a',0, +2,'K','a','t','a','k','a','n','a',0,'K','a','t','a','k','a','n','a',0, +2,'B','o','p','o','m','o','f','o',0,'B','o','p','o','m','o','f','o',0, +2,'C','o','m','p','a','t','_','J','a','m','o',0,'H','a','n','g','u','l','_','C','o','m','p','a','t','i','b','i','l','i','t', +'y','_','J','a','m','o',0,2,'K','a','n','b','u','n',0,'K','a','n','b','u','n',0, +2,'B','o','p','o','m','o','f','o','_','E','x','t',0,'B','o','p','o','m','o','f','o','_','E','x','t','e','n','d','e','d',0, +2,'E','n','c','l','o','s','e','d','_','C','J','K',0,'E','n','c','l','o','s','e','d','_','C','J','K','_','L','e','t','t','e', +'r','s','_','A','n','d','_','M','o','n','t','h','s',0,2,'C','J','K','_','C','o','m','p','a','t',0, +'C','J','K','_','C','o','m','p','a','t','i','b','i','l','i','t','y',0, +2,'C','J','K','_','E','x','t','_','A',0,'C','J','K','_','U','n','i','f','i','e','d','_','I','d','e','o','g','r','a','p','h', +'s','_','E','x','t','e','n','s','i','o','n','_','A',0,2,'C','J','K',0,'C','J','K','_','U','n','i','f','i','e','d','_','I', +'d','e','o','g','r','a','p','h','s',0,2,'Y','i','_','S','y','l','l','a','b','l','e','s',0, +'Y','i','_','S','y','l','l','a','b','l','e','s',0,2,'Y','i','_','R','a','d','i','c','a','l','s',0, +'Y','i','_','R','a','d','i','c','a','l','s',0,2,'H','a','n','g','u','l',0,'H','a','n','g','u','l','_','S','y','l','l','a', +'b','l','e','s',0,2,'H','i','g','h','_','S','u','r','r','o','g','a','t','e','s',0,'H','i','g','h','_','S','u','r','r','o', +'g','a','t','e','s',0,2,'H','i','g','h','_','P','U','_','S','u','r','r','o','g','a','t','e','s',0, +'H','i','g','h','_','P','r','i','v','a','t','e','_','U','s','e','_','S','u','r','r','o','g','a','t','e','s',0, +2,'L','o','w','_','S','u','r','r','o','g','a','t','e','s',0,'L','o','w','_','S','u','r','r','o','g','a','t','e','s',0, +3,'P','U','A',0,'P','r','i','v','a','t','e','_','U','s','e','_','A','r','e','a',0,'P','r','i','v','a','t','e','_','U','s', +'e',0,2,'C','J','K','_','C','o','m','p','a','t','_','I','d','e','o','g','r','a','p','h','s',0, +'C','J','K','_','C','o','m','p','a','t','i','b','i','l','i','t','y','_','I','d','e','o','g','r','a','p','h','s',0, +2,'A','l','p','h','a','b','e','t','i','c','_','P','F',0,'A','l','p','h','a','b','e','t','i','c','_','P','r','e','s','e','n', +'t','a','t','i','o','n','_','F','o','r','m','s',0,3,'A','r','a','b','i','c','_','P','F','_','A',0, +'A','r','a','b','i','c','_','P','r','e','s','e','n','t','a','t','i','o','n','_','F','o','r','m','s','_','A',0, +'A','r','a','b','i','c','_','P','r','e','s','e','n','t','a','t','i','o','n','_','F','o','r','m','s','-','A',0, +2,'H','a','l','f','_','M','a','r','k','s',0,'C','o','m','b','i','n','i','n','g','_','H','a','l','f','_','M','a','r','k','s', +0,2,'C','J','K','_','C','o','m','p','a','t','_','F','o','r','m','s',0,'C','J','K','_','C','o','m','p','a','t','i','b','i', +'l','i','t','y','_','F','o','r','m','s',0,2,'S','m','a','l','l','_','F','o','r','m','s',0, +'S','m','a','l','l','_','F','o','r','m','_','V','a','r','i','a','n','t','s',0, +2,'A','r','a','b','i','c','_','P','F','_','B',0,'A','r','a','b','i','c','_','P','r','e','s','e','n','t','a','t','i','o','n', +'_','F','o','r','m','s','_','B',0,2,'S','p','e','c','i','a','l','s',0,'S','p','e','c','i','a','l','s',0, +2,'H','a','l','f','_','A','n','d','_','F','u','l','l','_','F','o','r','m','s',0,'H','a','l','f','w','i','d','t','h','_','A', +'n','d','_','F','u','l','l','w','i','d','t','h','_','F','o','r','m','s',0, +2,'O','l','d','_','I','t','a','l','i','c',0,'O','l','d','_','I','t','a','l','i','c',0, +2,'G','o','t','h','i','c',0,'G','o','t','h','i','c',0,2,'D','e','s','e','r','e','t',0, +'D','e','s','e','r','e','t',0,2,'B','y','z','a','n','t','i','n','e','_','M','u','s','i','c',0, +'B','y','z','a','n','t','i','n','e','_','M','u','s','i','c','a','l','_','S','y','m','b','o','l','s',0, +2,'M','u','s','i','c',0,'M','u','s','i','c','a','l','_','S','y','m','b','o','l','s',0, +2,'M','a','t','h','_','A','l','p','h','a','n','u','m',0,'M','a','t','h','e','m','a','t','i','c','a','l','_','A','l','p','h', +'a','n','u','m','e','r','i','c','_','S','y','m','b','o','l','s',0, +2,'C','J','K','_','E','x','t','_','B',0,'C','J','K','_','U','n','i','f','i','e','d','_','I','d','e','o','g','r','a','p','h', +'s','_','E','x','t','e','n','s','i','o','n','_','B',0,2,'C','J','K','_','C','o','m','p','a','t','_','I','d','e','o','g','r', +'a','p','h','s','_','S','u','p',0,'C','J','K','_','C','o','m','p','a','t','i','b','i','l','i','t','y','_','I','d','e','o','g', +'r','a','p','h','s','_','S','u','p','p','l','e','m','e','n','t',0, +2,'T','a','g','s',0,'T','a','g','s',0,3,'C','y','r','i','l','l','i','c','_','S','u','p',0, +'C','y','r','i','l','l','i','c','_','S','u','p','p','l','e','m','e','n','t',0,'C','y','r','i','l','l','i','c','_','S','u','p', +'p','l','e','m','e','n','t','a','r','y',0,2,'T','a','g','a','l','o','g',0,'T','a','g','a','l','o','g',0, +2,'H','a','n','u','n','o','o',0,'H','a','n','u','n','o','o',0, +2,'B','u','h','i','d',0,'B','u','h','i','d',0,2,'T','a','g','b','a','n','w','a',0,'T','a','g','b','a','n','w','a',0, +2,'M','i','s','c','_','M','a','t','h','_','S','y','m','b','o','l','s','_','A',0,'M','i','s','c','e','l','l','a','n','e','o', +'u','s','_','M','a','t','h','e','m','a','t','i','c','a','l','_','S','y','m','b','o','l','s','_','A',0, +2,'S','u','p','_','A','r','r','o','w','s','_','A',0,'S','u','p','p','l','e','m','e','n','t','a','l','_','A','r','r','o','w', +'s','_','A',0,2,'S','u','p','_','A','r','r','o','w','s','_','B',0,'S','u','p','p','l','e','m','e','n','t','a','l','_','A', +'r','r','o','w','s','_','B',0,2,'M','i','s','c','_','M','a','t','h','_','S','y','m','b','o','l','s','_','B',0, +'M','i','s','c','e','l','l','a','n','e','o','u','s','_','M','a','t','h','e','m','a','t','i','c','a','l','_','S','y','m','b','o', +'l','s','_','B',0,2,'S','u','p','_','M','a','t','h','_','O','p','e','r','a','t','o','r','s',0, +'S','u','p','p','l','e','m','e','n','t','a','l','_','M','a','t','h','e','m','a','t','i','c','a','l','_','O','p','e','r','a','t', +'o','r','s',0,2,'K','a','t','a','k','a','n','a','_','E','x','t',0,'K','a','t','a','k','a','n','a','_','P','h','o','n','e', +'t','i','c','_','E','x','t','e','n','s','i','o','n','s',0,2,'V','S',0,'V','a','r','i','a','t','i','o','n','_','S','e','l', +'e','c','t','o','r','s',0,2,'S','u','p','_','P','U','A','_','A',0,'S','u','p','p','l','e','m','e','n','t','a','r','y','_', +'P','r','i','v','a','t','e','_','U','s','e','_','A','r','e','a','_','A',0, +2,'S','u','p','_','P','U','A','_','B',0,'S','u','p','p','l','e','m','e','n','t','a','r','y','_','P','r','i','v','a','t','e', +'_','U','s','e','_','A','r','e','a','_','B',0,2,'L','i','m','b','u',0,'L','i','m','b','u',0, +2,'T','a','i','_','L','e',0,'T','a','i','_','L','e',0,2,'K','h','m','e','r','_','S','y','m','b','o','l','s',0, +'K','h','m','e','r','_','S','y','m','b','o','l','s',0,2,'P','h','o','n','e','t','i','c','_','E','x','t',0, +'P','h','o','n','e','t','i','c','_','E','x','t','e','n','s','i','o','n','s',0, +2,'M','i','s','c','_','A','r','r','o','w','s',0,'M','i','s','c','e','l','l','a','n','e','o','u','s','_','S','y','m','b','o', +'l','s','_','A','n','d','_','A','r','r','o','w','s',0,2,'Y','i','j','i','n','g',0,'Y','i','j','i','n','g','_','H','e','x', +'a','g','r','a','m','_','S','y','m','b','o','l','s',0,2,'L','i','n','e','a','r','_','B','_','S','y','l','l','a','b','a','r', +'y',0,'L','i','n','e','a','r','_','B','_','S','y','l','l','a','b','a','r','y',0, +2,'L','i','n','e','a','r','_','B','_','I','d','e','o','g','r','a','m','s',0,'L','i','n','e','a','r','_','B','_','I','d','e', +'o','g','r','a','m','s',0,2,'A','e','g','e','a','n','_','N','u','m','b','e','r','s',0,'A','e','g','e','a','n','_','N','u', +'m','b','e','r','s',0,2,'U','g','a','r','i','t','i','c',0,'U','g','a','r','i','t','i','c',0, +2,'S','h','a','v','i','a','n',0,'S','h','a','v','i','a','n',0, +2,'O','s','m','a','n','y','a',0,'O','s','m','a','n','y','a',0, +2,'C','y','p','r','i','o','t','_','S','y','l','l','a','b','a','r','y',0,'C','y','p','r','i','o','t','_','S','y','l','l','a', +'b','a','r','y',0,2,'T','a','i','_','X','u','a','n','_','J','i','n','g',0,'T','a','i','_','X','u','a','n','_','J','i','n', +'g','_','S','y','m','b','o','l','s',0,2,'V','S','_','S','u','p',0,'V','a','r','i','a','t','i','o','n','_','S','e','l','e', +'c','t','o','r','s','_','S','u','p','p','l','e','m','e','n','t',0, +2,'A','n','c','i','e','n','t','_','G','r','e','e','k','_','M','u','s','i','c',0,'A','n','c','i','e','n','t','_','G','r','e', +'e','k','_','M','u','s','i','c','a','l','_','N','o','t','a','t','i','o','n',0, +2,'A','n','c','i','e','n','t','_','G','r','e','e','k','_','N','u','m','b','e','r','s',0,'A','n','c','i','e','n','t','_','G', +'r','e','e','k','_','N','u','m','b','e','r','s',0,2,'A','r','a','b','i','c','_','S','u','p',0, +'A','r','a','b','i','c','_','S','u','p','p','l','e','m','e','n','t',0, +2,'B','u','g','i','n','e','s','e',0,'B','u','g','i','n','e','s','e',0, +2,'C','J','K','_','S','t','r','o','k','e','s',0,'C','J','K','_','S','t','r','o','k','e','s',0, +2,'D','i','a','c','r','i','t','i','c','a','l','s','_','S','u','p',0,'C','o','m','b','i','n','i','n','g','_','D','i','a','c', +'r','i','t','i','c','a','l','_','M','a','r','k','s','_','S','u','p','p','l','e','m','e','n','t',0, +2,'C','o','p','t','i','c',0,'C','o','p','t','i','c',0,2,'E','t','h','i','o','p','i','c','_','E','x','t',0, +'E','t','h','i','o','p','i','c','_','E','x','t','e','n','d','e','d',0, +2,'E','t','h','i','o','p','i','c','_','S','u','p',0,'E','t','h','i','o','p','i','c','_','S','u','p','p','l','e','m','e','n', +'t',0,2,'G','e','o','r','g','i','a','n','_','S','u','p',0,'G','e','o','r','g','i','a','n','_','S','u','p','p','l','e','m', +'e','n','t',0,2,'G','l','a','g','o','l','i','t','i','c',0,'G','l','a','g','o','l','i','t','i','c',0, +2,'K','h','a','r','o','s','h','t','h','i',0,'K','h','a','r','o','s','h','t','h','i',0, +2,'M','o','d','i','f','i','e','r','_','T','o','n','e','_','L','e','t','t','e','r','s',0,'M','o','d','i','f','i','e','r','_', +'T','o','n','e','_','L','e','t','t','e','r','s',0,2,'N','e','w','_','T','a','i','_','L','u','e',0, +'N','e','w','_','T','a','i','_','L','u','e',0,2,'O','l','d','_','P','e','r','s','i','a','n',0, +'O','l','d','_','P','e','r','s','i','a','n',0,2,'P','h','o','n','e','t','i','c','_','E','x','t','_','S','u','p',0, +'P','h','o','n','e','t','i','c','_','E','x','t','e','n','s','i','o','n','s','_','S','u','p','p','l','e','m','e','n','t',0, +2,'S','u','p','_','P','u','n','c','t','u','a','t','i','o','n',0,'S','u','p','p','l','e','m','e','n','t','a','l','_','P','u', +'n','c','t','u','a','t','i','o','n',0,2,'S','y','l','o','t','i','_','N','a','g','r','i',0, +'S','y','l','o','t','i','_','N','a','g','r','i',0,2,'T','i','f','i','n','a','g','h',0,'T','i','f','i','n','a','g','h',0, +2,'V','e','r','t','i','c','a','l','_','F','o','r','m','s',0,'V','e','r','t','i','c','a','l','_','F','o','r','m','s',0, +2,'N','K','o',0,'N','K','o',0,2,'B','a','l','i','n','e','s','e',0,'B','a','l','i','n','e','s','e',0, +2,'L','a','t','i','n','_','E','x','t','_','C',0,'L','a','t','i','n','_','E','x','t','e','n','d','e','d','_','C',0, +2,'L','a','t','i','n','_','E','x','t','_','D',0,'L','a','t','i','n','_','E','x','t','e','n','d','e','d','_','D',0, +2,'P','h','a','g','s','_','P','a',0,'P','h','a','g','s','_','P','a',0, +2,'P','h','o','e','n','i','c','i','a','n',0,'P','h','o','e','n','i','c','i','a','n',0, +2,'C','u','n','e','i','f','o','r','m',0,'C','u','n','e','i','f','o','r','m',0, +2,'C','u','n','e','i','f','o','r','m','_','N','u','m','b','e','r','s',0,'C','u','n','e','i','f','o','r','m','_','N','u','m', +'b','e','r','s','_','A','n','d','_','P','u','n','c','t','u','a','t','i','o','n',0, +2,'C','o','u','n','t','i','n','g','_','R','o','d',0,'C','o','u','n','t','i','n','g','_','R','o','d','_','N','u','m','e','r', +'a','l','s',0,2,'S','u','n','d','a','n','e','s','e',0,'S','u','n','d','a','n','e','s','e',0, +2,'L','e','p','c','h','a',0,'L','e','p','c','h','a',0,2,'O','l','_','C','h','i','k','i',0, +'O','l','_','C','h','i','k','i',0,2,'C','y','r','i','l','l','i','c','_','E','x','t','_','A',0, +'C','y','r','i','l','l','i','c','_','E','x','t','e','n','d','e','d','_','A',0, +2,'V','a','i',0,'V','a','i',0,2,'C','y','r','i','l','l','i','c','_','E','x','t','_','B',0, +'C','y','r','i','l','l','i','c','_','E','x','t','e','n','d','e','d','_','B',0, +2,'S','a','u','r','a','s','h','t','r','a',0,'S','a','u','r','a','s','h','t','r','a',0, +2,'K','a','y','a','h','_','L','i',0,'K','a','y','a','h','_','L','i',0, +2,'R','e','j','a','n','g',0,'R','e','j','a','n','g',0,2,'C','h','a','m',0,'C','h','a','m',0, +2,'A','n','c','i','e','n','t','_','S','y','m','b','o','l','s',0,'A','n','c','i','e','n','t','_','S','y','m','b','o','l','s', +0,2,'P','h','a','i','s','t','o','s',0,'P','h','a','i','s','t','o','s','_','D','i','s','c',0, +2,'L','y','c','i','a','n',0,'L','y','c','i','a','n',0,2,'C','a','r','i','a','n',0,'C','a','r','i','a','n',0, +2,'L','y','d','i','a','n',0,'L','y','d','i','a','n',0,2,'M','a','h','j','o','n','g',0, +'M','a','h','j','o','n','g','_','T','i','l','e','s',0,2,'D','o','m','i','n','o',0,'D','o','m','i','n','o','_','T','i','l', +'e','s',0,2,'S','a','m','a','r','i','t','a','n',0,'S','a','m','a','r','i','t','a','n',0, +2,'U','C','A','S','_','E','x','t',0,'U','n','i','f','i','e','d','_','C','a','n','a','d','i','a','n','_','A','b','o','r','i', +'g','i','n','a','l','_','S','y','l','l','a','b','i','c','s','_','E','x','t','e','n','d','e','d',0, +2,'T','a','i','_','T','h','a','m',0,'T','a','i','_','T','h','a','m',0, +2,'V','e','d','i','c','_','E','x','t',0,'V','e','d','i','c','_','E','x','t','e','n','s','i','o','n','s',0, +2,'L','i','s','u',0,'L','i','s','u',0,2,'B','a','m','u','m',0,'B','a','m','u','m',0, +2,'I','n','d','i','c','_','N','u','m','b','e','r','_','F','o','r','m','s',0,'C','o','m','m','o','n','_','I','n','d','i','c', +'_','N','u','m','b','e','r','_','F','o','r','m','s',0,2,'D','e','v','a','n','a','g','a','r','i','_','E','x','t',0, +'D','e','v','a','n','a','g','a','r','i','_','E','x','t','e','n','d','e','d',0, +2,'J','a','m','o','_','E','x','t','_','A',0,'H','a','n','g','u','l','_','J','a','m','o','_','E','x','t','e','n','d','e','d', +'_','A',0,2,'J','a','v','a','n','e','s','e',0,'J','a','v','a','n','e','s','e',0, +2,'M','y','a','n','m','a','r','_','E','x','t','_','A',0,'M','y','a','n','m','a','r','_','E','x','t','e','n','d','e','d','_', +'A',0,2,'T','a','i','_','V','i','e','t',0,'T','a','i','_','V','i','e','t',0, +2,'M','e','e','t','e','i','_','M','a','y','e','k',0,'M','e','e','t','e','i','_','M','a','y','e','k',0, +2,'J','a','m','o','_','E','x','t','_','B',0,'H','a','n','g','u','l','_','J','a','m','o','_','E','x','t','e','n','d','e','d', +'_','B',0,2,'I','m','p','e','r','i','a','l','_','A','r','a','m','a','i','c',0,'I','m','p','e','r','i','a','l','_','A','r', +'a','m','a','i','c',0,2,'O','l','d','_','S','o','u','t','h','_','A','r','a','b','i','a','n',0, +'O','l','d','_','S','o','u','t','h','_','A','r','a','b','i','a','n',0, +2,'A','v','e','s','t','a','n',0,'A','v','e','s','t','a','n',0, +2,'I','n','s','c','r','i','p','t','i','o','n','a','l','_','P','a','r','t','h','i','a','n',0, +'I','n','s','c','r','i','p','t','i','o','n','a','l','_','P','a','r','t','h','i','a','n',0, +2,'I','n','s','c','r','i','p','t','i','o','n','a','l','_','P','a','h','l','a','v','i',0,'I','n','s','c','r','i','p','t','i', +'o','n','a','l','_','P','a','h','l','a','v','i',0,2,'O','l','d','_','T','u','r','k','i','c',0, +'O','l','d','_','T','u','r','k','i','c',0,2,'R','u','m','i',0,'R','u','m','i','_','N','u','m','e','r','a','l','_','S','y', +'m','b','o','l','s',0,2,'K','a','i','t','h','i',0,'K','a','i','t','h','i',0, +2,'E','g','y','p','t','i','a','n','_','H','i','e','r','o','g','l','y','p','h','s',0,'E','g','y','p','t','i','a','n','_','H', +'i','e','r','o','g','l','y','p','h','s',0,2,'E','n','c','l','o','s','e','d','_','A','l','p','h','a','n','u','m','_','S','u', +'p',0,'E','n','c','l','o','s','e','d','_','A','l','p','h','a','n','u','m','e','r','i','c','_','S','u','p','p','l','e','m','e', +'n','t',0,2,'E','n','c','l','o','s','e','d','_','I','d','e','o','g','r','a','p','h','i','c','_','S','u','p',0, +'E','n','c','l','o','s','e','d','_','I','d','e','o','g','r','a','p','h','i','c','_','S','u','p','p','l','e','m','e','n','t',0, +2,'C','J','K','_','E','x','t','_','C',0,'C','J','K','_','U','n','i','f','i','e','d','_','I','d','e','o','g','r','a','p','h', +'s','_','E','x','t','e','n','s','i','o','n','_','C',0,2,'M','a','n','d','a','i','c',0,'M','a','n','d','a','i','c',0, +2,'B','a','t','a','k',0,'B','a','t','a','k',0,2,'E','t','h','i','o','p','i','c','_','E','x','t','_','A',0, +'E','t','h','i','o','p','i','c','_','E','x','t','e','n','d','e','d','_','A',0, +2,'B','r','a','h','m','i',0,'B','r','a','h','m','i',0,2,'B','a','m','u','m','_','S','u','p',0, +'B','a','m','u','m','_','S','u','p','p','l','e','m','e','n','t',0, +2,'K','a','n','a','_','S','u','p',0,'K','a','n','a','_','S','u','p','p','l','e','m','e','n','t',0, +2,'P','l','a','y','i','n','g','_','C','a','r','d','s',0,'P','l','a','y','i','n','g','_','C','a','r','d','s',0, +2,'M','i','s','c','_','P','i','c','t','o','g','r','a','p','h','s',0,'M','i','s','c','e','l','l','a','n','e','o','u','s','_', +'S','y','m','b','o','l','s','_','A','n','d','_','P','i','c','t','o','g','r','a','p','h','s',0, +2,'E','m','o','t','i','c','o','n','s',0,'E','m','o','t','i','c','o','n','s',0, +2,'T','r','a','n','s','p','o','r','t','_','A','n','d','_','M','a','p',0,'T','r','a','n','s','p','o','r','t','_','A','n','d', +'_','M','a','p','_','S','y','m','b','o','l','s',0,2,'A','l','c','h','e','m','i','c','a','l',0, +'A','l','c','h','e','m','i','c','a','l','_','S','y','m','b','o','l','s',0, +2,'C','J','K','_','E','x','t','_','D',0,'C','J','K','_','U','n','i','f','i','e','d','_','I','d','e','o','g','r','a','p','h', +'s','_','E','x','t','e','n','s','i','o','n','_','D',0,2,'A','r','a','b','i','c','_','E','x','t','_','A',0, +'A','r','a','b','i','c','_','E','x','t','e','n','d','e','d','_','A',0, +2,'A','r','a','b','i','c','_','M','a','t','h',0,'A','r','a','b','i','c','_','M','a','t','h','e','m','a','t','i','c','a','l', +'_','A','l','p','h','a','b','e','t','i','c','_','S','y','m','b','o','l','s',0, +2,'C','h','a','k','m','a',0,'C','h','a','k','m','a',0,2,'M','e','e','t','e','i','_','M','a','y','e','k','_','E','x','t', +0,'M','e','e','t','e','i','_','M','a','y','e','k','_','E','x','t','e','n','s','i','o','n','s',0, +2,'M','e','r','o','i','t','i','c','_','C','u','r','s','i','v','e',0,'M','e','r','o','i','t','i','c','_','C','u','r','s','i', +'v','e',0,2,'M','e','r','o','i','t','i','c','_','H','i','e','r','o','g','l','y','p','h','s',0, +'M','e','r','o','i','t','i','c','_','H','i','e','r','o','g','l','y','p','h','s',0, +2,'M','i','a','o',0,'M','i','a','o',0,2,'S','h','a','r','a','d','a',0,'S','h','a','r','a','d','a',0, +2,'S','o','r','a','_','S','o','m','p','e','n','g',0,'S','o','r','a','_','S','o','m','p','e','n','g',0, +2,'S','u','n','d','a','n','e','s','e','_','S','u','p',0,'S','u','n','d','a','n','e','s','e','_','S','u','p','p','l','e','m', +'e','n','t',0,2,'T','a','k','r','i',0,'T','a','k','r','i',0, +2,'B','a','s','s','a','_','V','a','h',0,'B','a','s','s','a','_','V','a','h',0, +2,'C','a','u','c','a','s','i','a','n','_','A','l','b','a','n','i','a','n',0,'C','a','u','c','a','s','i','a','n','_','A','l', +'b','a','n','i','a','n',0,2,'C','o','p','t','i','c','_','E','p','a','c','t','_','N','u','m','b','e','r','s',0, +'C','o','p','t','i','c','_','E','p','a','c','t','_','N','u','m','b','e','r','s',0, +2,'D','i','a','c','r','i','t','i','c','a','l','s','_','E','x','t',0,'C','o','m','b','i','n','i','n','g','_','D','i','a','c', +'r','i','t','i','c','a','l','_','M','a','r','k','s','_','E','x','t','e','n','d','e','d',0, +2,'D','u','p','l','o','y','a','n',0,'D','u','p','l','o','y','a','n',0, +2,'E','l','b','a','s','a','n',0,'E','l','b','a','s','a','n',0, +2,'G','e','o','m','e','t','r','i','c','_','S','h','a','p','e','s','_','E','x','t',0,'G','e','o','m','e','t','r','i','c','_', +'S','h','a','p','e','s','_','E','x','t','e','n','d','e','d',0, +2,'G','r','a','n','t','h','a',0,'G','r','a','n','t','h','a',0, +2,'K','h','o','j','k','i',0,'K','h','o','j','k','i',0,2,'K','h','u','d','a','w','a','d','i',0, +'K','h','u','d','a','w','a','d','i',0,2,'L','a','t','i','n','_','E','x','t','_','E',0,'L','a','t','i','n','_','E','x','t', +'e','n','d','e','d','_','E',0,2,'L','i','n','e','a','r','_','A',0,'L','i','n','e','a','r','_','A',0, +2,'M','a','h','a','j','a','n','i',0,'M','a','h','a','j','a','n','i',0, +2,'M','a','n','i','c','h','a','e','a','n',0,'M','a','n','i','c','h','a','e','a','n',0, +2,'M','e','n','d','e','_','K','i','k','a','k','u','i',0,'M','e','n','d','e','_','K','i','k','a','k','u','i',0, +2,'M','o','d','i',0,'M','o','d','i',0,2,'M','r','o',0,'M','r','o',0, +2,'M','y','a','n','m','a','r','_','E','x','t','_','B',0,'M','y','a','n','m','a','r','_','E','x','t','e','n','d','e','d','_', +'B',0,2,'N','a','b','a','t','a','e','a','n',0,'N','a','b','a','t','a','e','a','n',0, +2,'O','l','d','_','N','o','r','t','h','_','A','r','a','b','i','a','n',0,'O','l','d','_','N','o','r','t','h','_','A','r','a', +'b','i','a','n',0,2,'O','l','d','_','P','e','r','m','i','c',0,'O','l','d','_','P','e','r','m','i','c',0, +2,'O','r','n','a','m','e','n','t','a','l','_','D','i','n','g','b','a','t','s',0,'O','r','n','a','m','e','n','t','a','l','_', +'D','i','n','g','b','a','t','s',0,2,'P','a','h','a','w','h','_','H','m','o','n','g',0,'P','a','h','a','w','h','_','H','m', +'o','n','g',0,2,'P','a','l','m','y','r','e','n','e',0,'P','a','l','m','y','r','e','n','e',0, +2,'P','a','u','_','C','i','n','_','H','a','u',0,'P','a','u','_','C','i','n','_','H','a','u',0, +2,'P','s','a','l','t','e','r','_','P','a','h','l','a','v','i',0,'P','s','a','l','t','e','r','_','P','a','h','l','a','v','i', +0,2,'S','h','o','r','t','h','a','n','d','_','F','o','r','m','a','t','_','C','o','n','t','r','o','l','s',0, +'S','h','o','r','t','h','a','n','d','_','F','o','r','m','a','t','_','C','o','n','t','r','o','l','s',0, +2,'S','i','d','d','h','a','m',0,'S','i','d','d','h','a','m',0, +2,'S','i','n','h','a','l','a','_','A','r','c','h','a','i','c','_','N','u','m','b','e','r','s',0, +'S','i','n','h','a','l','a','_','A','r','c','h','a','i','c','_','N','u','m','b','e','r','s',0, +2,'S','u','p','_','A','r','r','o','w','s','_','C',0,'S','u','p','p','l','e','m','e','n','t','a','l','_','A','r','r','o','w', +'s','_','C',0,2,'T','i','r','h','u','t','a',0,'T','i','r','h','u','t','a',0, +2,'W','a','r','a','n','g','_','C','i','t','i',0,'W','a','r','a','n','g','_','C','i','t','i',0, +2,'A','h','o','m',0,'A','h','o','m',0,2,'A','n','a','t','o','l','i','a','n','_','H','i','e','r','o','g','l','y','p','h', +'s',0,'A','n','a','t','o','l','i','a','n','_','H','i','e','r','o','g','l','y','p','h','s',0, +2,'C','h','e','r','o','k','e','e','_','S','u','p',0,'C','h','e','r','o','k','e','e','_','S','u','p','p','l','e','m','e','n', +'t',0,2,'C','J','K','_','E','x','t','_','E',0,'C','J','K','_','U','n','i','f','i','e','d','_','I','d','e','o','g','r','a', +'p','h','s','_','E','x','t','e','n','s','i','o','n','_','E',0, +2,'E','a','r','l','y','_','D','y','n','a','s','t','i','c','_','C','u','n','e','i','f','o','r','m',0, +'E','a','r','l','y','_','D','y','n','a','s','t','i','c','_','C','u','n','e','i','f','o','r','m',0, +2,'H','a','t','r','a','n',0,'H','a','t','r','a','n',0,2,'M','u','l','t','a','n','i',0, +'M','u','l','t','a','n','i',0,2,'O','l','d','_','H','u','n','g','a','r','i','a','n',0,'O','l','d','_','H','u','n','g','a', +'r','i','a','n',0,2,'S','u','p','_','S','y','m','b','o','l','s','_','A','n','d','_','P','i','c','t','o','g','r','a','p','h', +'s',0,'S','u','p','p','l','e','m','e','n','t','a','l','_','S','y','m','b','o','l','s','_','A','n','d','_','P','i','c','t','o', +'g','r','a','p','h','s',0,2,'S','u','t','t','o','n','_','S','i','g','n','W','r','i','t','i','n','g',0, +'S','u','t','t','o','n','_','S','i','g','n','W','r','i','t','i','n','g',0, +2,'A','d','l','a','m',0,'A','d','l','a','m',0,2,'B','h','a','i','k','s','u','k','i',0, +'B','h','a','i','k','s','u','k','i',0,2,'C','y','r','i','l','l','i','c','_','E','x','t','_','C',0, +'C','y','r','i','l','l','i','c','_','E','x','t','e','n','d','e','d','_','C',0, +2,'G','l','a','g','o','l','i','t','i','c','_','S','u','p',0,'G','l','a','g','o','l','i','t','i','c','_','S','u','p','p','l', +'e','m','e','n','t',0,2,'I','d','e','o','g','r','a','p','h','i','c','_','S','y','m','b','o','l','s',0, +'I','d','e','o','g','r','a','p','h','i','c','_','S','y','m','b','o','l','s','_','A','n','d','_','P','u','n','c','t','u','a','t', +'i','o','n',0,2,'M','a','r','c','h','e','n',0,'M','a','r','c','h','e','n',0, +2,'M','o','n','g','o','l','i','a','n','_','S','u','p',0,'M','o','n','g','o','l','i','a','n','_','S','u','p','p','l','e','m', +'e','n','t',0,2,'N','e','w','a',0,'N','e','w','a',0,2,'O','s','a','g','e',0,'O','s','a','g','e',0, +2,'T','a','n','g','u','t',0,'T','a','n','g','u','t',0,2,'T','a','n','g','u','t','_','C','o','m','p','o','n','e','n','t', +'s',0,'T','a','n','g','u','t','_','C','o','m','p','o','n','e','n','t','s',0, +2,'C','J','K','_','E','x','t','_','F',0,'C','J','K','_','U','n','i','f','i','e','d','_','I','d','e','o','g','r','a','p','h', +'s','_','E','x','t','e','n','s','i','o','n','_','F',0,2,'K','a','n','a','_','E','x','t','_','A',0, +'K','a','n','a','_','E','x','t','e','n','d','e','d','_','A',0, +2,'M','a','s','a','r','a','m','_','G','o','n','d','i',0,'M','a','s','a','r','a','m','_','G','o','n','d','i',0, +2,'N','u','s','h','u',0,'N','u','s','h','u',0,2,'S','o','y','o','m','b','o',0,'S','o','y','o','m','b','o',0, +2,'S','y','r','i','a','c','_','S','u','p',0,'S','y','r','i','a','c','_','S','u','p','p','l','e','m','e','n','t',0, +2,'Z','a','n','a','b','a','z','a','r','_','S','q','u','a','r','e',0,'Z','a','n','a','b','a','z','a','r','_','S','q','u','a', +'r','e',0,2,'C','h','e','s','s','_','S','y','m','b','o','l','s',0,'C','h','e','s','s','_','S','y','m','b','o','l','s',0, +2,'D','o','g','r','a',0,'D','o','g','r','a',0,2,'G','e','o','r','g','i','a','n','_','E','x','t',0, +'G','e','o','r','g','i','a','n','_','E','x','t','e','n','d','e','d',0, +2,'G','u','n','j','a','l','a','_','G','o','n','d','i',0,'G','u','n','j','a','l','a','_','G','o','n','d','i',0, +2,'H','a','n','i','f','i','_','R','o','h','i','n','g','y','a',0,'H','a','n','i','f','i','_','R','o','h','i','n','g','y','a', +0,2,'I','n','d','i','c','_','S','i','y','a','q','_','N','u','m','b','e','r','s',0,'I','n','d','i','c','_','S','i','y','a', +'q','_','N','u','m','b','e','r','s',0,2,'M','a','k','a','s','a','r',0,'M','a','k','a','s','a','r',0, +2,'M','a','y','a','n','_','N','u','m','e','r','a','l','s',0,'M','a','y','a','n','_','N','u','m','e','r','a','l','s',0, +2,'M','e','d','e','f','a','i','d','r','i','n',0,'M','e','d','e','f','a','i','d','r','i','n',0, +2,'O','l','d','_','S','o','g','d','i','a','n',0,'O','l','d','_','S','o','g','d','i','a','n',0, +2,'S','o','g','d','i','a','n',0,'S','o','g','d','i','a','n',0, +2,'E','g','y','p','t','i','a','n','_','H','i','e','r','o','g','l','y','p','h','_','F','o','r','m','a','t','_','C','o','n','t', +'r','o','l','s',0,'E','g','y','p','t','i','a','n','_','H','i','e','r','o','g','l','y','p','h','_','F','o','r','m','a','t','_', +'C','o','n','t','r','o','l','s',0,2,'E','l','y','m','a','i','c',0,'E','l','y','m','a','i','c',0, +2,'N','a','n','d','i','n','a','g','a','r','i',0,'N','a','n','d','i','n','a','g','a','r','i',0, +2,'N','y','i','a','k','e','n','g','_','P','u','a','c','h','u','e','_','H','m','o','n','g',0, +'N','y','i','a','k','e','n','g','_','P','u','a','c','h','u','e','_','H','m','o','n','g',0, +2,'O','t','t','o','m','a','n','_','S','i','y','a','q','_','N','u','m','b','e','r','s',0,'O','t','t','o','m','a','n','_','S', +'i','y','a','q','_','N','u','m','b','e','r','s',0,2,'S','m','a','l','l','_','K','a','n','a','_','E','x','t',0, +'S','m','a','l','l','_','K','a','n','a','_','E','x','t','e','n','s','i','o','n',0, +2,'S','y','m','b','o','l','s','_','A','n','d','_','P','i','c','t','o','g','r','a','p','h','s','_','E','x','t','_','A',0, +'S','y','m','b','o','l','s','_','A','n','d','_','P','i','c','t','o','g','r','a','p','h','s','_','E','x','t','e','n','d','e','d', +'_','A',0,2,'T','a','m','i','l','_','S','u','p',0,'T','a','m','i','l','_','S','u','p','p','l','e','m','e','n','t',0, +2,'W','a','n','c','h','o',0,'W','a','n','c','h','o',0,2,'C','h','o','r','a','s','m','i','a','n',0, +'C','h','o','r','a','s','m','i','a','n',0,2,'C','J','K','_','E','x','t','_','G',0,'C','J','K','_','U','n','i','f','i','e', +'d','_','I','d','e','o','g','r','a','p','h','s','_','E','x','t','e','n','s','i','o','n','_','G',0, +2,'D','i','v','e','s','_','A','k','u','r','u',0,'D','i','v','e','s','_','A','k','u','r','u',0, +2,'K','h','i','t','a','n','_','S','m','a','l','l','_','S','c','r','i','p','t',0,'K','h','i','t','a','n','_','S','m','a','l', +'l','_','S','c','r','i','p','t',0,2,'L','i','s','u','_','S','u','p',0,'L','i','s','u','_','S','u','p','p','l','e','m','e', +'n','t',0,2,'S','y','m','b','o','l','s','_','F','o','r','_','L','e','g','a','c','y','_','C','o','m','p','u','t','i','n','g', +0,'S','y','m','b','o','l','s','_','F','o','r','_','L','e','g','a','c','y','_','C','o','m','p','u','t','i','n','g',0, +2,'T','a','n','g','u','t','_','S','u','p',0,'T','a','n','g','u','t','_','S','u','p','p','l','e','m','e','n','t',0, +2,'Y','e','z','i','d','i',0,'Y','e','z','i','d','i',0,2,'c','c','c',0,'C','a','n','o','n','i','c','a','l','_','C','o', +'m','b','i','n','i','n','g','_','C','l','a','s','s',0,2,'d','t',0,'D','e','c','o','m','p','o','s','i','t','i','o','n','_', +'T','y','p','e',0,3,'N','o','n','e',0,'N','o','n','e',0,'n','o','n','e',0, +3,'C','a','n',0,'C','a','n','o','n','i','c','a','l',0,'c','a','n',0, +3,'C','o','m',0,'C','o','m','p','a','t',0,'c','o','m',0, +3,'E','n','c',0,'C','i','r','c','l','e',0,'e','n','c',0, +3,'F','i','n',0,'F','i','n','a','l',0,'f','i','n',0,3,'F','o','n','t',0,'F','o','n','t',0, +'f','o','n','t',0,3,'F','r','a',0,'F','r','a','c','t','i','o','n',0,'f','r','a',0, +3,'I','n','i','t',0,'I','n','i','t','i','a','l',0,'i','n','i','t',0, +3,'I','s','o',0,'I','s','o','l','a','t','e','d',0,'i','s','o',0, +3,'M','e','d',0,'M','e','d','i','a','l',0,'m','e','d',0, +3,'N','a','r',0,'N','a','r','r','o','w',0,'n','a','r',0, +3,'N','b',0,'N','o','b','r','e','a','k',0,'n','b',0,3,'S','m','l',0,'S','m','a','l','l',0, +'s','m','l',0,3,'S','q','r',0,'S','q','u','a','r','e',0,'s','q','r',0, +3,'S','u','b',0,'S','u','b',0,'s','u','b',0,3,'S','u','p',0,'S','u','p','e','r',0, +'s','u','p',0,3,'V','e','r','t',0,'V','e','r','t','i','c','a','l',0,'v','e','r','t',0, +3,'W','i','d','e',0,'W','i','d','e',0,'w','i','d','e',0, +2,'e','a',0,'E','a','s','t','_','A','s','i','a','n','_','W','i','d','t','h',0, +2,'N',0,'N','e','u','t','r','a','l',0,2,'A',0,'A','m','b','i','g','u','o','u','s',0, +2,'H',0,'H','a','l','f','w','i','d','t','h',0,2,'F',0,'F','u','l','l','w','i','d','t','h',0, +2,'N','a',0,'N','a','r','r','o','w',0,2,'W',0,'W','i','d','e',0, +2,'g','c',0,'G','e','n','e','r','a','l','_','C','a','t','e','g','o','r','y',0, +2,'C','n',0,'U','n','a','s','s','i','g','n','e','d',0,2,'L','u',0,'U','p','p','e','r','c','a','s','e','_','L','e','t', +'t','e','r',0,2,'L','l',0,'L','o','w','e','r','c','a','s','e','_','L','e','t','t','e','r',0, +2,'L','t',0,'T','i','t','l','e','c','a','s','e','_','L','e','t','t','e','r',0, +2,'L','m',0,'M','o','d','i','f','i','e','r','_','L','e','t','t','e','r',0, +2,'L','o',0,'O','t','h','e','r','_','L','e','t','t','e','r',0, +2,'M','n',0,'N','o','n','s','p','a','c','i','n','g','_','M','a','r','k',0, +2,'M','e',0,'E','n','c','l','o','s','i','n','g','_','M','a','r','k',0, +2,'M','c',0,'S','p','a','c','i','n','g','_','M','a','r','k',0, +3,'N','d',0,'D','e','c','i','m','a','l','_','N','u','m','b','e','r',0,'d','i','g','i','t',0, +2,'N','l',0,'L','e','t','t','e','r','_','N','u','m','b','e','r',0, +2,'N','o',0,'O','t','h','e','r','_','N','u','m','b','e','r',0, +2,'Z','s',0,'S','p','a','c','e','_','S','e','p','a','r','a','t','o','r',0, +2,'Z','l',0,'L','i','n','e','_','S','e','p','a','r','a','t','o','r',0, +2,'Z','p',0,'P','a','r','a','g','r','a','p','h','_','S','e','p','a','r','a','t','o','r',0, +3,'C','c',0,'C','o','n','t','r','o','l',0,'c','n','t','r','l',0, +2,'C','f',0,'F','o','r','m','a','t',0,2,'C','o',0,'P','r','i','v','a','t','e','_','U','s','e',0, +2,'C','s',0,'S','u','r','r','o','g','a','t','e',0,2,'P','d',0,'D','a','s','h','_','P','u','n','c','t','u','a','t','i', +'o','n',0,2,'P','s',0,'O','p','e','n','_','P','u','n','c','t','u','a','t','i','o','n',0, +2,'P','e',0,'C','l','o','s','e','_','P','u','n','c','t','u','a','t','i','o','n',0, +2,'P','c',0,'C','o','n','n','e','c','t','o','r','_','P','u','n','c','t','u','a','t','i','o','n',0, +2,'P','o',0,'O','t','h','e','r','_','P','u','n','c','t','u','a','t','i','o','n',0, +2,'S','m',0,'M','a','t','h','_','S','y','m','b','o','l',0, +2,'S','c',0,'C','u','r','r','e','n','c','y','_','S','y','m','b','o','l',0, +2,'S','k',0,'M','o','d','i','f','i','e','r','_','S','y','m','b','o','l',0, +2,'S','o',0,'O','t','h','e','r','_','S','y','m','b','o','l',0, +2,'P','i',0,'I','n','i','t','i','a','l','_','P','u','n','c','t','u','a','t','i','o','n',0, +2,'P','f',0,'F','i','n','a','l','_','P','u','n','c','t','u','a','t','i','o','n',0, +2,'j','g',0,'J','o','i','n','i','n','g','_','G','r','o','u','p',0, +2,'N','o','_','J','o','i','n','i','n','g','_','G','r','o','u','p',0,'N','o','_','J','o','i','n','i','n','g','_','G','r','o', +'u','p',0,2,'A','i','n',0,'A','i','n',0,2,'A','l','a','p','h',0,'A','l','a','p','h',0, +2,'A','l','e','f',0,'A','l','e','f',0,2,'B','e','h',0,'B','e','h',0, +2,'B','e','t','h',0,'B','e','t','h',0,2,'D','a','l',0,'D','a','l',0, +2,'D','a','l','a','t','h','_','R','i','s','h',0,'D','a','l','a','t','h','_','R','i','s','h',0, +2,'E',0,'E',0,2,'F','e','h',0,'F','e','h',0,2,'F','i','n','a','l','_','S','e','m','k','a','t','h',0, +'F','i','n','a','l','_','S','e','m','k','a','t','h',0,2,'G','a','f',0,'G','a','f',0, +2,'G','a','m','a','l',0,'G','a','m','a','l',0,2,'H','a','h',0,'H','a','h',0, +2,'T','e','h','_','M','a','r','b','u','t','a','_','G','o','a','l',0,'H','a','m','z','a','_','O','n','_','H','e','h','_','G', +'o','a','l',0,2,'H','e',0,'H','e',0,2,'H','e','h',0,'H','e','h',0, +2,'H','e','h','_','G','o','a','l',0,'H','e','h','_','G','o','a','l',0, +2,'H','e','t','h',0,'H','e','t','h',0,2,'K','a','f',0,'K','a','f',0, +2,'K','a','p','h',0,'K','a','p','h',0,2,'K','n','o','t','t','e','d','_','H','e','h',0, +'K','n','o','t','t','e','d','_','H','e','h',0,2,'L','a','m',0,'L','a','m',0, +2,'L','a','m','a','d','h',0,'L','a','m','a','d','h',0,2,'M','e','e','m',0,'M','e','e','m',0, +2,'M','i','m',0,'M','i','m',0,2,'N','o','o','n',0,'N','o','o','n',0, +2,'N','u','n',0,'N','u','n',0,2,'P','e',0,'P','e',0, +2,'Q','a','f',0,'Q','a','f',0,2,'Q','a','p','h',0,'Q','a','p','h',0, +2,'R','e','h',0,'R','e','h',0,2,'R','e','v','e','r','s','e','d','_','P','e',0,'R','e','v','e','r','s','e','d','_','P', +'e',0,2,'S','a','d',0,'S','a','d',0,2,'S','a','d','h','e',0,'S','a','d','h','e',0, +2,'S','e','e','n',0,'S','e','e','n',0,2,'S','e','m','k','a','t','h',0,'S','e','m','k','a','t','h',0, +2,'S','h','i','n',0,'S','h','i','n',0,2,'S','w','a','s','h','_','K','a','f',0,'S','w','a','s','h','_','K','a','f',0, +2,'S','y','r','i','a','c','_','W','a','w',0,'S','y','r','i','a','c','_','W','a','w',0, +2,'T','a','h',0,'T','a','h',0,2,'T','a','w',0,'T','a','w',0, +2,'T','e','h','_','M','a','r','b','u','t','a',0,'T','e','h','_','M','a','r','b','u','t','a',0, +2,'T','e','t','h',0,'T','e','t','h',0,2,'W','a','w',0,'W','a','w',0, +2,'Y','e','h',0,'Y','e','h',0,2,'Y','e','h','_','B','a','r','r','e','e',0,'Y','e','h','_','B','a','r','r','e','e',0, +2,'Y','e','h','_','W','i','t','h','_','T','a','i','l',0,'Y','e','h','_','W','i','t','h','_','T','a','i','l',0, +2,'Y','u','d','h',0,'Y','u','d','h',0,2,'Y','u','d','h','_','H','e',0,'Y','u','d','h','_','H','e',0, +2,'Z','a','i','n',0,'Z','a','i','n',0,2,'F','e',0,'F','e',0, +2,'K','h','a','p','h',0,'K','h','a','p','h',0,2,'Z','h','a','i','n',0,'Z','h','a','i','n',0, +2,'B','u','r','u','s','h','a','s','k','i','_','Y','e','h','_','B','a','r','r','e','e',0,'B','u','r','u','s','h','a','s','k', +'i','_','Y','e','h','_','B','a','r','r','e','e',0,2,'F','a','r','s','i','_','Y','e','h',0, +'F','a','r','s','i','_','Y','e','h',0,2,'N','y','a',0,'N','y','a',0, +2,'R','o','h','i','n','g','y','a','_','Y','e','h',0,'R','o','h','i','n','g','y','a','_','Y','e','h',0, +2,'M','a','n','i','c','h','a','e','a','n','_','A','l','e','p','h',0,'M','a','n','i','c','h','a','e','a','n','_','A','l','e', +'p','h',0,2,'M','a','n','i','c','h','a','e','a','n','_','A','y','i','n',0,'M','a','n','i','c','h','a','e','a','n','_','A', +'y','i','n',0,2,'M','a','n','i','c','h','a','e','a','n','_','B','e','t','h',0,'M','a','n','i','c','h','a','e','a','n','_', +'B','e','t','h',0,2,'M','a','n','i','c','h','a','e','a','n','_','D','a','l','e','t','h',0, +'M','a','n','i','c','h','a','e','a','n','_','D','a','l','e','t','h',0, +2,'M','a','n','i','c','h','a','e','a','n','_','D','h','a','m','e','d','h',0,'M','a','n','i','c','h','a','e','a','n','_','D', +'h','a','m','e','d','h',0,2,'M','a','n','i','c','h','a','e','a','n','_','F','i','v','e',0, +'M','a','n','i','c','h','a','e','a','n','_','F','i','v','e',0, +2,'M','a','n','i','c','h','a','e','a','n','_','G','i','m','e','l',0,'M','a','n','i','c','h','a','e','a','n','_','G','i','m', +'e','l',0,2,'M','a','n','i','c','h','a','e','a','n','_','H','e','t','h',0,'M','a','n','i','c','h','a','e','a','n','_','H', +'e','t','h',0,2,'M','a','n','i','c','h','a','e','a','n','_','H','u','n','d','r','e','d',0, +'M','a','n','i','c','h','a','e','a','n','_','H','u','n','d','r','e','d',0, +2,'M','a','n','i','c','h','a','e','a','n','_','K','a','p','h',0,'M','a','n','i','c','h','a','e','a','n','_','K','a','p','h', +0,2,'M','a','n','i','c','h','a','e','a','n','_','L','a','m','e','d','h',0,'M','a','n','i','c','h','a','e','a','n','_','L', +'a','m','e','d','h',0,2,'M','a','n','i','c','h','a','e','a','n','_','M','e','m',0,'M','a','n','i','c','h','a','e','a','n', +'_','M','e','m',0,2,'M','a','n','i','c','h','a','e','a','n','_','N','u','n',0,'M','a','n','i','c','h','a','e','a','n','_', +'N','u','n',0,2,'M','a','n','i','c','h','a','e','a','n','_','O','n','e',0,'M','a','n','i','c','h','a','e','a','n','_','O', +'n','e',0,2,'M','a','n','i','c','h','a','e','a','n','_','P','e',0,'M','a','n','i','c','h','a','e','a','n','_','P','e',0, +2,'M','a','n','i','c','h','a','e','a','n','_','Q','o','p','h',0,'M','a','n','i','c','h','a','e','a','n','_','Q','o','p','h', +0,2,'M','a','n','i','c','h','a','e','a','n','_','R','e','s','h',0,'M','a','n','i','c','h','a','e','a','n','_','R','e','s', +'h',0,2,'M','a','n','i','c','h','a','e','a','n','_','S','a','d','h','e',0,'M','a','n','i','c','h','a','e','a','n','_','S', +'a','d','h','e',0,2,'M','a','n','i','c','h','a','e','a','n','_','S','a','m','e','k','h',0, +'M','a','n','i','c','h','a','e','a','n','_','S','a','m','e','k','h',0, +2,'M','a','n','i','c','h','a','e','a','n','_','T','a','w',0,'M','a','n','i','c','h','a','e','a','n','_','T','a','w',0, +2,'M','a','n','i','c','h','a','e','a','n','_','T','e','n',0,'M','a','n','i','c','h','a','e','a','n','_','T','e','n',0, +2,'M','a','n','i','c','h','a','e','a','n','_','T','e','t','h',0,'M','a','n','i','c','h','a','e','a','n','_','T','e','t','h', +0,2,'M','a','n','i','c','h','a','e','a','n','_','T','h','a','m','e','d','h',0,'M','a','n','i','c','h','a','e','a','n','_', +'T','h','a','m','e','d','h',0,2,'M','a','n','i','c','h','a','e','a','n','_','T','w','e','n','t','y',0, +'M','a','n','i','c','h','a','e','a','n','_','T','w','e','n','t','y',0, +2,'M','a','n','i','c','h','a','e','a','n','_','W','a','w',0,'M','a','n','i','c','h','a','e','a','n','_','W','a','w',0, +2,'M','a','n','i','c','h','a','e','a','n','_','Y','o','d','h',0,'M','a','n','i','c','h','a','e','a','n','_','Y','o','d','h', +0,2,'M','a','n','i','c','h','a','e','a','n','_','Z','a','y','i','n',0,'M','a','n','i','c','h','a','e','a','n','_','Z','a', +'y','i','n',0,2,'S','t','r','a','i','g','h','t','_','W','a','w',0,'S','t','r','a','i','g','h','t','_','W','a','w',0, +2,'A','f','r','i','c','a','n','_','F','e','h',0,'A','f','r','i','c','a','n','_','F','e','h',0, +2,'A','f','r','i','c','a','n','_','N','o','o','n',0,'A','f','r','i','c','a','n','_','N','o','o','n',0, +2,'A','f','r','i','c','a','n','_','Q','a','f',0,'A','f','r','i','c','a','n','_','Q','a','f',0, +2,'M','a','l','a','y','a','l','a','m','_','B','h','a',0,'M','a','l','a','y','a','l','a','m','_','B','h','a',0, +2,'M','a','l','a','y','a','l','a','m','_','J','a',0,'M','a','l','a','y','a','l','a','m','_','J','a',0, +2,'M','a','l','a','y','a','l','a','m','_','L','l','a',0,'M','a','l','a','y','a','l','a','m','_','L','l','a',0, +2,'M','a','l','a','y','a','l','a','m','_','L','l','l','a',0,'M','a','l','a','y','a','l','a','m','_','L','l','l','a',0, +2,'M','a','l','a','y','a','l','a','m','_','N','g','a',0,'M','a','l','a','y','a','l','a','m','_','N','g','a',0, +2,'M','a','l','a','y','a','l','a','m','_','N','n','a',0,'M','a','l','a','y','a','l','a','m','_','N','n','a',0, +2,'M','a','l','a','y','a','l','a','m','_','N','n','n','a',0,'M','a','l','a','y','a','l','a','m','_','N','n','n','a',0, +2,'M','a','l','a','y','a','l','a','m','_','N','y','a',0,'M','a','l','a','y','a','l','a','m','_','N','y','a',0, +2,'M','a','l','a','y','a','l','a','m','_','R','a',0,'M','a','l','a','y','a','l','a','m','_','R','a',0, +2,'M','a','l','a','y','a','l','a','m','_','S','s','a',0,'M','a','l','a','y','a','l','a','m','_','S','s','a',0, +2,'M','a','l','a','y','a','l','a','m','_','T','t','a',0,'M','a','l','a','y','a','l','a','m','_','T','t','a',0, +2,'H','a','n','i','f','i','_','R','o','h','i','n','g','y','a','_','K','i','n','n','a','_','Y','a',0, +'H','a','n','i','f','i','_','R','o','h','i','n','g','y','a','_','K','i','n','n','a','_','Y','a',0, +2,'H','a','n','i','f','i','_','R','o','h','i','n','g','y','a','_','P','a',0,'H','a','n','i','f','i','_','R','o','h','i','n', +'g','y','a','_','P','a',0,2,'j','t',0,'J','o','i','n','i','n','g','_','T','y','p','e',0, +2,'U',0,'N','o','n','_','J','o','i','n','i','n','g',0,2,'C',0,'J','o','i','n','_','C','a','u','s','i','n','g',0, +2,'D',0,'D','u','a','l','_','J','o','i','n','i','n','g',0, +2,'L',0,'L','e','f','t','_','J','o','i','n','i','n','g',0, +2,'R',0,'R','i','g','h','t','_','J','o','i','n','i','n','g',0, +2,'T',0,'T','r','a','n','s','p','a','r','e','n','t',0,2,'l','b',0,'L','i','n','e','_','B','r','e','a','k',0, +2,'X','X',0,'U','n','k','n','o','w','n',0,2,'A','I',0,'A','m','b','i','g','u','o','u','s',0, +2,'A','L',0,'A','l','p','h','a','b','e','t','i','c',0,2,'B','2',0,'B','r','e','a','k','_','B','o','t','h',0, +2,'B','A',0,'B','r','e','a','k','_','A','f','t','e','r',0, +2,'B','B',0,'B','r','e','a','k','_','B','e','f','o','r','e',0, +2,'B','K',0,'M','a','n','d','a','t','o','r','y','_','B','r','e','a','k',0, +2,'C','B',0,'C','o','n','t','i','n','g','e','n','t','_','B','r','e','a','k',0, +2,'C','L',0,'C','l','o','s','e','_','P','u','n','c','t','u','a','t','i','o','n',0, +2,'C','M',0,'C','o','m','b','i','n','i','n','g','_','M','a','r','k',0, +2,'C','R',0,'C','a','r','r','i','a','g','e','_','R','e','t','u','r','n',0, +2,'E','X',0,'E','x','c','l','a','m','a','t','i','o','n',0, +2,'G','L',0,'G','l','u','e',0,2,'H','Y',0,'H','y','p','h','e','n',0, +2,'I','D',0,'I','d','e','o','g','r','a','p','h','i','c',0, +3,'I','N',0,'I','n','s','e','p','a','r','a','b','l','e',0,'I','n','s','e','p','e','r','a','b','l','e',0, +2,'I','S',0,'I','n','f','i','x','_','N','u','m','e','r','i','c',0, +2,'L','F',0,'L','i','n','e','_','F','e','e','d',0,2,'N','S',0,'N','o','n','s','t','a','r','t','e','r',0, +2,'N','U',0,'N','u','m','e','r','i','c',0,2,'O','P',0,'O','p','e','n','_','P','u','n','c','t','u','a','t','i','o','n', +0,2,'P','O',0,'P','o','s','t','f','i','x','_','N','u','m','e','r','i','c',0, +2,'P','R',0,'P','r','e','f','i','x','_','N','u','m','e','r','i','c',0, +2,'Q','U',0,'Q','u','o','t','a','t','i','o','n',0,2,'S','A',0,'C','o','m','p','l','e','x','_','C','o','n','t','e','x', +'t',0,2,'S','G',0,'S','u','r','r','o','g','a','t','e',0, +2,'S','P',0,'S','p','a','c','e',0,2,'S','Y',0,'B','r','e','a','k','_','S','y','m','b','o','l','s',0, +2,'Z','W',0,'Z','W','S','p','a','c','e',0,2,'N','L',0,'N','e','x','t','_','L','i','n','e',0, +2,'W','J',0,'W','o','r','d','_','J','o','i','n','e','r',0, +2,'H','2',0,'H','2',0,2,'H','3',0,'H','3',0,2,'J','L',0,'J','L',0, +2,'J','T',0,'J','T',0,2,'J','V',0,'J','V',0,2,'C','P',0,'C','l','o','s','e','_','P','a','r','e','n','t','h','e', +'s','i','s',0,2,'C','J',0,'C','o','n','d','i','t','i','o','n','a','l','_','J','a','p','a','n','e','s','e','_','S','t','a', +'r','t','e','r',0,2,'H','L',0,'H','e','b','r','e','w','_','L','e','t','t','e','r',0, +2,'E','B',0,'E','_','B','a','s','e',0,2,'E','M',0,'E','_','M','o','d','i','f','i','e','r',0, +2,'Z','W','J',0,'Z','W','J',0,2,'n','t',0,'N','u','m','e','r','i','c','_','T','y','p','e',0, +2,'N','o','n','e',0,'N','o','n','e',0,2,'D','e',0,'D','e','c','i','m','a','l',0, +2,'D','i',0,'D','i','g','i','t',0,2,'N','u',0,'N','u','m','e','r','i','c',0, +2,'s','c',0,'S','c','r','i','p','t',0,2,'Z','y','y','y',0,'C','o','m','m','o','n',0, +3,'Z','i','n','h',0,'I','n','h','e','r','i','t','e','d',0,'Q','a','a','i',0, +2,'A','r','a','b',0,'A','r','a','b','i','c',0,2,'A','r','m','n',0,'A','r','m','e','n','i','a','n',0, +2,'B','e','n','g',0,'B','e','n','g','a','l','i',0,2,'B','o','p','o',0,'B','o','p','o','m','o','f','o',0, +2,'C','h','e','r',0,'C','h','e','r','o','k','e','e',0,3,'C','o','p','t',0,'C','o','p','t','i','c',0, +'Q','a','a','c',0,2,'C','y','r','l',0,'C','y','r','i','l','l','i','c',0, +2,'D','s','r','t',0,'D','e','s','e','r','e','t',0,2,'D','e','v','a',0,'D','e','v','a','n','a','g','a','r','i',0, +2,'E','t','h','i',0,'E','t','h','i','o','p','i','c',0,2,'G','e','o','r',0,'G','e','o','r','g','i','a','n',0, +2,'G','o','t','h',0,'G','o','t','h','i','c',0,2,'G','r','e','k',0,'G','r','e','e','k',0, +2,'G','u','j','r',0,'G','u','j','a','r','a','t','i',0,2,'G','u','r','u',0,'G','u','r','m','u','k','h','i',0, +2,'H','a','n','i',0,'H','a','n',0,2,'H','a','n','g',0,'H','a','n','g','u','l',0, +2,'H','e','b','r',0,'H','e','b','r','e','w',0,2,'H','i','r','a',0,'H','i','r','a','g','a','n','a',0, +2,'K','n','d','a',0,'K','a','n','n','a','d','a',0,2,'K','a','n','a',0,'K','a','t','a','k','a','n','a',0, +2,'K','h','m','r',0,'K','h','m','e','r',0,2,'L','a','o','o',0,'L','a','o',0, +2,'L','a','t','n',0,'L','a','t','i','n',0,2,'M','l','y','m',0,'M','a','l','a','y','a','l','a','m',0, +2,'M','o','n','g',0,'M','o','n','g','o','l','i','a','n',0, +2,'M','y','m','r',0,'M','y','a','n','m','a','r',0,2,'O','g','a','m',0,'O','g','h','a','m',0, +2,'I','t','a','l',0,'O','l','d','_','I','t','a','l','i','c',0, +2,'O','r','y','a',0,'O','r','i','y','a',0,2,'R','u','n','r',0,'R','u','n','i','c',0, +2,'S','i','n','h',0,'S','i','n','h','a','l','a',0,2,'S','y','r','c',0,'S','y','r','i','a','c',0, +2,'T','a','m','l',0,'T','a','m','i','l',0,2,'T','e','l','u',0,'T','e','l','u','g','u',0, +2,'T','h','a','a',0,'T','h','a','a','n','a',0,2,'T','i','b','t',0,'T','i','b','e','t','a','n',0, +2,'C','a','n','s',0,'C','a','n','a','d','i','a','n','_','A','b','o','r','i','g','i','n','a','l',0, +2,'Y','i','i','i',0,'Y','i',0,2,'T','g','l','g',0,'T','a','g','a','l','o','g',0, +2,'H','a','n','o',0,'H','a','n','u','n','o','o',0,2,'B','u','h','d',0,'B','u','h','i','d',0, +2,'T','a','g','b',0,'T','a','g','b','a','n','w','a',0,2,'B','r','a','i',0,'B','r','a','i','l','l','e',0, +2,'C','p','r','t',0,'C','y','p','r','i','o','t',0,2,'L','i','m','b',0,'L','i','m','b','u',0, +2,'L','i','n','b',0,'L','i','n','e','a','r','_','B',0,2,'O','s','m','a',0,'O','s','m','a','n','y','a',0, +2,'S','h','a','w',0,'S','h','a','v','i','a','n',0,2,'T','a','l','e',0,'T','a','i','_','L','e',0, +2,'U','g','a','r',0,'U','g','a','r','i','t','i','c',0,2,'H','r','k','t',0,'K','a','t','a','k','a','n','a','_','O','r', +'_','H','i','r','a','g','a','n','a',0,2,'B','u','g','i',0,'B','u','g','i','n','e','s','e',0, +2,'G','l','a','g',0,'G','l','a','g','o','l','i','t','i','c',0, +2,'K','h','a','r',0,'K','h','a','r','o','s','h','t','h','i',0, +2,'S','y','l','o',0,'S','y','l','o','t','i','_','N','a','g','r','i',0, +2,'T','a','l','u',0,'N','e','w','_','T','a','i','_','L','u','e',0, +2,'T','f','n','g',0,'T','i','f','i','n','a','g','h',0,2,'X','p','e','o',0,'O','l','d','_','P','e','r','s','i','a','n', +0,2,'B','a','l','i',0,'B','a','l','i','n','e','s','e',0, +2,'B','a','t','k',0,'B','a','t','a','k',0,2,'B','l','i','s',0,'B','l','i','s',0, +2,'B','r','a','h',0,'B','r','a','h','m','i',0,2,'C','i','r','t',0,'C','i','r','t',0, +2,'C','y','r','s',0,'C','y','r','s',0,2,'E','g','y','d',0,'E','g','y','d',0, +2,'E','g','y','h',0,'E','g','y','h',0,2,'E','g','y','p',0,'E','g','y','p','t','i','a','n','_','H','i','e','r','o','g', +'l','y','p','h','s',0,2,'G','e','o','k',0,'G','e','o','k',0, +2,'H','a','n','s',0,'H','a','n','s',0,2,'H','a','n','t',0,'H','a','n','t',0, +2,'H','m','n','g',0,'P','a','h','a','w','h','_','H','m','o','n','g',0, +2,'H','u','n','g',0,'O','l','d','_','H','u','n','g','a','r','i','a','n',0, +2,'I','n','d','s',0,'I','n','d','s',0,2,'J','a','v','a',0,'J','a','v','a','n','e','s','e',0, +2,'K','a','l','i',0,'K','a','y','a','h','_','L','i',0,2,'L','a','t','f',0,'L','a','t','f',0, +2,'L','a','t','g',0,'L','a','t','g',0,2,'L','e','p','c',0,'L','e','p','c','h','a',0, +2,'L','i','n','a',0,'L','i','n','e','a','r','_','A',0,2,'M','a','n','d',0,'M','a','n','d','a','i','c',0, +2,'M','a','y','a',0,'M','a','y','a',0,2,'M','e','r','o',0,'M','e','r','o','i','t','i','c','_','H','i','e','r','o','g', +'l','y','p','h','s',0,2,'N','k','o','o',0,'N','k','o',0, +2,'O','r','k','h',0,'O','l','d','_','T','u','r','k','i','c',0, +2,'P','e','r','m',0,'O','l','d','_','P','e','r','m','i','c',0, +2,'P','h','a','g',0,'P','h','a','g','s','_','P','a',0,2,'P','h','n','x',0,'P','h','o','e','n','i','c','i','a','n',0, +2,'P','l','r','d',0,'M','i','a','o',0,2,'R','o','r','o',0,'R','o','r','o',0, +2,'S','a','r','a',0,'S','a','r','a',0,2,'S','y','r','e',0,'S','y','r','e',0, +2,'S','y','r','j',0,'S','y','r','j',0,2,'S','y','r','n',0,'S','y','r','n',0, +2,'T','e','n','g',0,'T','e','n','g',0,2,'V','a','i','i',0,'V','a','i',0, +2,'V','i','s','p',0,'V','i','s','p',0,2,'X','s','u','x',0,'C','u','n','e','i','f','o','r','m',0, +2,'Z','x','x','x',0,'Z','x','x','x',0,2,'Z','z','z','z',0,'U','n','k','n','o','w','n',0, +2,'C','a','r','i',0,'C','a','r','i','a','n',0,2,'J','p','a','n',0,'J','p','a','n',0, +2,'L','a','n','a',0,'T','a','i','_','T','h','a','m',0,2,'L','y','c','i',0,'L','y','c','i','a','n',0, +2,'L','y','d','i',0,'L','y','d','i','a','n',0,2,'O','l','c','k',0,'O','l','_','C','h','i','k','i',0, +2,'R','j','n','g',0,'R','e','j','a','n','g',0,2,'S','a','u','r',0,'S','a','u','r','a','s','h','t','r','a',0, +2,'S','g','n','w',0,'S','i','g','n','W','r','i','t','i','n','g',0, +2,'S','u','n','d',0,'S','u','n','d','a','n','e','s','e',0, +2,'M','o','o','n',0,'M','o','o','n',0,2,'M','t','e','i',0,'M','e','e','t','e','i','_','M','a','y','e','k',0, +2,'A','r','m','i',0,'I','m','p','e','r','i','a','l','_','A','r','a','m','a','i','c',0, +2,'A','v','s','t',0,'A','v','e','s','t','a','n',0,2,'C','a','k','m',0,'C','h','a','k','m','a',0, +2,'K','o','r','e',0,'K','o','r','e',0,2,'K','t','h','i',0,'K','a','i','t','h','i',0, +2,'M','a','n','i',0,'M','a','n','i','c','h','a','e','a','n',0, +2,'P','h','l','i',0,'I','n','s','c','r','i','p','t','i','o','n','a','l','_','P','a','h','l','a','v','i',0, +2,'P','h','l','p',0,'P','s','a','l','t','e','r','_','P','a','h','l','a','v','i',0, +2,'P','h','l','v',0,'P','h','l','v',0,2,'P','r','t','i',0,'I','n','s','c','r','i','p','t','i','o','n','a','l','_','P', +'a','r','t','h','i','a','n',0,2,'S','a','m','r',0,'S','a','m','a','r','i','t','a','n',0, +2,'T','a','v','t',0,'T','a','i','_','V','i','e','t',0,2,'Z','m','t','h',0,'Z','m','t','h',0, +2,'Z','s','y','m',0,'Z','s','y','m',0,2,'B','a','m','u',0,'B','a','m','u','m',0, +2,'N','k','g','b',0,'N','k','g','b',0,2,'S','a','r','b',0,'O','l','d','_','S','o','u','t','h','_','A','r','a','b','i', +'a','n',0,2,'B','a','s','s',0,'B','a','s','s','a','_','V','a','h',0, +2,'D','u','p','l',0,'D','u','p','l','o','y','a','n',0,2,'E','l','b','a',0,'E','l','b','a','s','a','n',0, +2,'G','r','a','n',0,'G','r','a','n','t','h','a',0,2,'K','p','e','l',0,'K','p','e','l',0, +2,'L','o','m','a',0,'L','o','m','a',0,2,'M','e','n','d',0,'M','e','n','d','e','_','K','i','k','a','k','u','i',0, +2,'M','e','r','c',0,'M','e','r','o','i','t','i','c','_','C','u','r','s','i','v','e',0, +2,'N','a','r','b',0,'O','l','d','_','N','o','r','t','h','_','A','r','a','b','i','a','n',0, +2,'N','b','a','t',0,'N','a','b','a','t','a','e','a','n',0, +2,'P','a','l','m',0,'P','a','l','m','y','r','e','n','e',0, +2,'S','i','n','d',0,'K','h','u','d','a','w','a','d','i',0, +2,'W','a','r','a',0,'W','a','r','a','n','g','_','C','i','t','i',0, +2,'A','f','a','k',0,'A','f','a','k',0,2,'J','u','r','c',0,'J','u','r','c',0, +2,'M','r','o','o',0,'M','r','o',0,2,'N','s','h','u',0,'N','u','s','h','u',0, +2,'S','h','r','d',0,'S','h','a','r','a','d','a',0,2,'S','o','r','a',0,'S','o','r','a','_','S','o','m','p','e','n','g', +0,2,'T','a','k','r',0,'T','a','k','r','i',0,2,'T','a','n','g',0,'T','a','n','g','u','t',0, +2,'W','o','l','e',0,'W','o','l','e',0,2,'H','l','u','w',0,'A','n','a','t','o','l','i','a','n','_','H','i','e','r','o', +'g','l','y','p','h','s',0,2,'K','h','o','j',0,'K','h','o','j','k','i',0, +2,'T','i','r','h',0,'T','i','r','h','u','t','a',0,2,'A','g','h','b',0,'C','a','u','c','a','s','i','a','n','_','A','l', +'b','a','n','i','a','n',0,2,'M','a','h','j',0,'M','a','h','a','j','a','n','i',0, +2,'H','a','t','r',0,'H','a','t','r','a','n',0,2,'M','u','l','t',0,'M','u','l','t','a','n','i',0, +2,'P','a','u','c',0,'P','a','u','_','C','i','n','_','H','a','u',0, +2,'S','i','d','d',0,'S','i','d','d','h','a','m',0,2,'A','d','l','m',0,'A','d','l','a','m',0, +2,'B','h','k','s',0,'B','h','a','i','k','s','u','k','i',0, +2,'M','a','r','c',0,'M','a','r','c','h','e','n',0,2,'O','s','g','e',0,'O','s','a','g','e',0, +2,'H','a','n','b',0,'H','a','n','b',0,2,'J','a','m','o',0,'J','a','m','o',0, +2,'Z','s','y','e',0,'Z','s','y','e',0,2,'G','o','n','m',0,'M','a','s','a','r','a','m','_','G','o','n','d','i',0, +2,'S','o','y','o',0,'S','o','y','o','m','b','o',0,2,'Z','a','n','b',0,'Z','a','n','a','b','a','z','a','r','_','S','q', +'u','a','r','e',0,2,'D','o','g','r',0,'D','o','g','r','a',0, +2,'G','o','n','g',0,'G','u','n','j','a','l','a','_','G','o','n','d','i',0, +2,'M','a','k','a',0,'M','a','k','a','s','a','r',0,2,'M','e','d','f',0,'M','e','d','e','f','a','i','d','r','i','n',0, +2,'R','o','h','g',0,'H','a','n','i','f','i','_','R','o','h','i','n','g','y','a',0, +2,'S','o','g','d',0,'S','o','g','d','i','a','n',0,2,'S','o','g','o',0,'O','l','d','_','S','o','g','d','i','a','n',0, +2,'E','l','y','m',0,'E','l','y','m','a','i','c',0,2,'H','m','n','p',0,'N','y','i','a','k','e','n','g','_','P','u','a', +'c','h','u','e','_','H','m','o','n','g',0,2,'N','a','n','d',0,'N','a','n','d','i','n','a','g','a','r','i',0, +2,'W','c','h','o',0,'W','a','n','c','h','o',0,2,'C','h','r','s',0,'C','h','o','r','a','s','m','i','a','n',0, +2,'D','i','a','k',0,'D','i','v','e','s','_','A','k','u','r','u',0, +2,'K','i','t','s',0,'K','h','i','t','a','n','_','S','m','a','l','l','_','S','c','r','i','p','t',0, +2,'Y','e','z','i',0,'Y','e','z','i','d','i',0,2,'h','s','t',0,'H','a','n','g','u','l','_','S','y','l','l','a','b','l', +'e','_','T','y','p','e',0,2,'N','A',0,'N','o','t','_','A','p','p','l','i','c','a','b','l','e',0, +2,'L',0,'L','e','a','d','i','n','g','_','J','a','m','o',0, +2,'V',0,'V','o','w','e','l','_','J','a','m','o',0,2,'T',0,'T','r','a','i','l','i','n','g','_','J','a','m','o',0, +2,'L','V',0,'L','V','_','S','y','l','l','a','b','l','e',0, +2,'L','V','T',0,'L','V','T','_','S','y','l','l','a','b','l','e',0, +2,'N','F','D','_','Q','C',0,'N','F','D','_','Q','u','i','c','k','_','C','h','e','c','k',0, +2,'N',0,'N','o',0,2,'Y',0,'Y','e','s',0,2,'N','F','K','D','_','Q','C',0,'N','F','K','D','_','Q','u','i','c','k', +'_','C','h','e','c','k',0,2,'N','F','C','_','Q','C',0,'N','F','C','_','Q','u','i','c','k','_','C','h','e','c','k',0, +2,'M',0,'M','a','y','b','e',0,2,'N','F','K','C','_','Q','C',0,'N','F','K','C','_','Q','u','i','c','k','_','C','h','e', +'c','k',0,2,'l','c','c','c',0,'L','e','a','d','_','C','a','n','o','n','i','c','a','l','_','C','o','m','b','i','n','i','n', +'g','_','C','l','a','s','s',0,2,'t','c','c','c',0,'T','r','a','i','l','_','C','a','n','o','n','i','c','a','l','_','C','o', +'m','b','i','n','i','n','g','_','C','l','a','s','s',0,2,'G','C','B',0,'G','r','a','p','h','e','m','e','_','C','l','u','s', +'t','e','r','_','B','r','e','a','k',0,2,'X','X',0,'O','t','h','e','r',0, +2,'C','N',0,'C','o','n','t','r','o','l',0,2,'C','R',0,'C','R',0, +2,'E','X',0,'E','x','t','e','n','d',0,2,'L',0,'L',0, +2,'L','F',0,'L','F',0,2,'L','V',0,'L','V',0,2,'L','V','T',0,'L','V','T',0, +2,'T',0,'T',0,2,'V',0,'V',0,2,'S','M',0,'S','p','a','c','i','n','g','M','a','r','k',0, +2,'P','P',0,'P','r','e','p','e','n','d',0,2,'E','B','G',0,'E','_','B','a','s','e','_','G','A','Z',0, +2,'G','A','Z',0,'G','l','u','e','_','A','f','t','e','r','_','Z','w','j',0, +2,'S','B',0,'S','e','n','t','e','n','c','e','_','B','r','e','a','k',0, +2,'A','T',0,'A','T','e','r','m',0,2,'C','L',0,'C','l','o','s','e',0, +2,'F','O',0,'F','o','r','m','a','t',0,2,'L','O',0,'L','o','w','e','r',0, +2,'L','E',0,'O','L','e','t','t','e','r',0,2,'S','E',0,'S','e','p',0, +2,'S','P',0,'S','p',0,2,'S','T',0,'S','T','e','r','m',0, +2,'U','P',0,'U','p','p','e','r',0,2,'S','C',0,'S','C','o','n','t','i','n','u','e',0, +2,'W','B',0,'W','o','r','d','_','B','r','e','a','k',0,2,'L','E',0,'A','L','e','t','t','e','r',0, +2,'K','A',0,'K','a','t','a','k','a','n','a',0,2,'M','L',0,'M','i','d','L','e','t','t','e','r',0, +2,'M','N',0,'M','i','d','N','u','m',0,2,'E','X',0,'E','x','t','e','n','d','N','u','m','L','e','t',0, +2,'E','x','t','e','n','d',0,'E','x','t','e','n','d',0,2,'M','B',0,'M','i','d','N','u','m','L','e','t',0, +2,'N','L',0,'N','e','w','l','i','n','e',0,2,'S','Q',0,'S','i','n','g','l','e','_','Q','u','o','t','e',0, +2,'D','Q',0,'D','o','u','b','l','e','_','Q','u','o','t','e',0, +2,'W','S','e','g','S','p','a','c','e',0,'W','S','e','g','S','p','a','c','e',0, +2,'b','p','t',0,'B','i','d','i','_','P','a','i','r','e','d','_','B','r','a','c','k','e','t','_','T','y','p','e',0, +2,'n',0,'N','o','n','e',0,2,'o',0,'O','p','e','n',0, +2,'c',0,'C','l','o','s','e',0,2,'I','n','P','C',0,'I','n','d','i','c','_','P','o','s','i','t','i','o','n','a','l','_', +'C','a','t','e','g','o','r','y',0,2,'N','A',0,'N','A',0, +2,'B','o','t','t','o','m',0,'B','o','t','t','o','m',0,2,'B','o','t','t','o','m','_','A','n','d','_','L','e','f','t',0, +'B','o','t','t','o','m','_','A','n','d','_','L','e','f','t',0, +2,'B','o','t','t','o','m','_','A','n','d','_','R','i','g','h','t',0,'B','o','t','t','o','m','_','A','n','d','_','R','i','g', +'h','t',0,2,'L','e','f','t',0,'L','e','f','t',0,2,'L','e','f','t','_','A','n','d','_','R','i','g','h','t',0, +'L','e','f','t','_','A','n','d','_','R','i','g','h','t',0,2,'O','v','e','r','s','t','r','u','c','k',0, +'O','v','e','r','s','t','r','u','c','k',0,2,'R','i','g','h','t',0,'R','i','g','h','t',0, +2,'T','o','p',0,'T','o','p',0,2,'T','o','p','_','A','n','d','_','B','o','t','t','o','m',0, +'T','o','p','_','A','n','d','_','B','o','t','t','o','m',0,2,'T','o','p','_','A','n','d','_','B','o','t','t','o','m','_','A', +'n','d','_','R','i','g','h','t',0,'T','o','p','_','A','n','d','_','B','o','t','t','o','m','_','A','n','d','_','R','i','g','h', +'t',0,2,'T','o','p','_','A','n','d','_','L','e','f','t',0,'T','o','p','_','A','n','d','_','L','e','f','t',0, +2,'T','o','p','_','A','n','d','_','L','e','f','t','_','A','n','d','_','R','i','g','h','t',0, +'T','o','p','_','A','n','d','_','L','e','f','t','_','A','n','d','_','R','i','g','h','t',0, +2,'T','o','p','_','A','n','d','_','R','i','g','h','t',0,'T','o','p','_','A','n','d','_','R','i','g','h','t',0, +2,'V','i','s','u','a','l','_','O','r','d','e','r','_','L','e','f','t',0,'V','i','s','u','a','l','_','O','r','d','e','r','_', +'L','e','f','t',0,2,'T','o','p','_','A','n','d','_','B','o','t','t','o','m','_','A','n','d','_','L','e','f','t',0, +'T','o','p','_','A','n','d','_','B','o','t','t','o','m','_','A','n','d','_','L','e','f','t',0, +2,'I','n','S','C',0,'I','n','d','i','c','_','S','y','l','l','a','b','i','c','_','C','a','t','e','g','o','r','y',0, +2,'O','t','h','e','r',0,'O','t','h','e','r',0,2,'A','v','a','g','r','a','h','a',0,'A','v','a','g','r','a','h','a',0, +2,'B','i','n','d','u',0,'B','i','n','d','u',0,2,'B','r','a','h','m','i','_','J','o','i','n','i','n','g','_','N','u','m', +'b','e','r',0,'B','r','a','h','m','i','_','J','o','i','n','i','n','g','_','N','u','m','b','e','r',0, +2,'C','a','n','t','i','l','l','a','t','i','o','n','_','M','a','r','k',0,'C','a','n','t','i','l','l','a','t','i','o','n','_', +'M','a','r','k',0,2,'C','o','n','s','o','n','a','n','t',0,'C','o','n','s','o','n','a','n','t',0, +2,'C','o','n','s','o','n','a','n','t','_','D','e','a','d',0,'C','o','n','s','o','n','a','n','t','_','D','e','a','d',0, +2,'C','o','n','s','o','n','a','n','t','_','F','i','n','a','l',0,'C','o','n','s','o','n','a','n','t','_','F','i','n','a','l', +0,2,'C','o','n','s','o','n','a','n','t','_','H','e','a','d','_','L','e','t','t','e','r',0, +'C','o','n','s','o','n','a','n','t','_','H','e','a','d','_','L','e','t','t','e','r',0, +2,'C','o','n','s','o','n','a','n','t','_','I','n','i','t','i','a','l','_','P','o','s','t','f','i','x','e','d',0, +'C','o','n','s','o','n','a','n','t','_','I','n','i','t','i','a','l','_','P','o','s','t','f','i','x','e','d',0, +2,'C','o','n','s','o','n','a','n','t','_','K','i','l','l','e','r',0,'C','o','n','s','o','n','a','n','t','_','K','i','l','l', +'e','r',0,2,'C','o','n','s','o','n','a','n','t','_','M','e','d','i','a','l',0,'C','o','n','s','o','n','a','n','t','_','M', +'e','d','i','a','l',0,2,'C','o','n','s','o','n','a','n','t','_','P','l','a','c','e','h','o','l','d','e','r',0, +'C','o','n','s','o','n','a','n','t','_','P','l','a','c','e','h','o','l','d','e','r',0, +2,'C','o','n','s','o','n','a','n','t','_','P','r','e','c','e','d','i','n','g','_','R','e','p','h','a',0, +'C','o','n','s','o','n','a','n','t','_','P','r','e','c','e','d','i','n','g','_','R','e','p','h','a',0, +2,'C','o','n','s','o','n','a','n','t','_','P','r','e','f','i','x','e','d',0,'C','o','n','s','o','n','a','n','t','_','P','r', +'e','f','i','x','e','d',0,2,'C','o','n','s','o','n','a','n','t','_','S','u','b','j','o','i','n','e','d',0, +'C','o','n','s','o','n','a','n','t','_','S','u','b','j','o','i','n','e','d',0, +2,'C','o','n','s','o','n','a','n','t','_','S','u','c','c','e','e','d','i','n','g','_','R','e','p','h','a',0, +'C','o','n','s','o','n','a','n','t','_','S','u','c','c','e','e','d','i','n','g','_','R','e','p','h','a',0, +2,'C','o','n','s','o','n','a','n','t','_','W','i','t','h','_','S','t','a','c','k','e','r',0, +'C','o','n','s','o','n','a','n','t','_','W','i','t','h','_','S','t','a','c','k','e','r',0, +2,'G','e','m','i','n','a','t','i','o','n','_','M','a','r','k',0,'G','e','m','i','n','a','t','i','o','n','_','M','a','r','k', +0,2,'I','n','v','i','s','i','b','l','e','_','S','t','a','c','k','e','r',0,'I','n','v','i','s','i','b','l','e','_','S','t', +'a','c','k','e','r',0,2,'J','o','i','n','e','r',0,'J','o','i','n','e','r',0, +2,'M','o','d','i','f','y','i','n','g','_','L','e','t','t','e','r',0,'M','o','d','i','f','y','i','n','g','_','L','e','t','t', +'e','r',0,2,'N','o','n','_','J','o','i','n','e','r',0,'N','o','n','_','J','o','i','n','e','r',0, +2,'N','u','k','t','a',0,'N','u','k','t','a',0,2,'N','u','m','b','e','r',0,'N','u','m','b','e','r',0, +2,'N','u','m','b','e','r','_','J','o','i','n','e','r',0,'N','u','m','b','e','r','_','J','o','i','n','e','r',0, +2,'P','u','r','e','_','K','i','l','l','e','r',0,'P','u','r','e','_','K','i','l','l','e','r',0, +2,'R','e','g','i','s','t','e','r','_','S','h','i','f','t','e','r',0,'R','e','g','i','s','t','e','r','_','S','h','i','f','t', +'e','r',0,2,'S','y','l','l','a','b','l','e','_','M','o','d','i','f','i','e','r',0,'S','y','l','l','a','b','l','e','_','M', +'o','d','i','f','i','e','r',0,2,'T','o','n','e','_','L','e','t','t','e','r',0,'T','o','n','e','_','L','e','t','t','e','r', +0,2,'T','o','n','e','_','M','a','r','k',0,'T','o','n','e','_','M','a','r','k',0, +2,'V','i','r','a','m','a',0,'V','i','r','a','m','a',0,2,'V','i','s','a','r','g','a',0, +'V','i','s','a','r','g','a',0,2,'V','o','w','e','l',0,'V','o','w','e','l',0, +2,'V','o','w','e','l','_','D','e','p','e','n','d','e','n','t',0,'V','o','w','e','l','_','D','e','p','e','n','d','e','n','t', +0,2,'V','o','w','e','l','_','I','n','d','e','p','e','n','d','e','n','t',0,'V','o','w','e','l','_','I','n','d','e','p','e', +'n','d','e','n','t',0,2,'v','o',0,'V','e','r','t','i','c','a','l','_','O','r','i','e','n','t','a','t','i','o','n',0, +2,'R',0,'R','o','t','a','t','e','d',0,2,'T','r',0,'T','r','a','n','s','f','o','r','m','e','d','_','R','o','t','a','t', +'e','d',0,2,'T','u',0,'T','r','a','n','s','f','o','r','m','e','d','_','U','p','r','i','g','h','t',0, +2,'U',0,'U','p','r','i','g','h','t',0,2,'g','c','m',0,'G','e','n','e','r','a','l','_','C','a','t','e','g','o','r','y', +'_','M','a','s','k',0,2,'C',0,'O','t','h','e','r',0,2,'L',0,'L','e','t','t','e','r',0, +2,'L','C',0,'C','a','s','e','d','_','L','e','t','t','e','r',0, +3,'M',0,'M','a','r','k',0,'C','o','m','b','i','n','i','n','g','_','M','a','r','k',0, +2,'N',0,'N','u','m','b','e','r',0,3,'P',0,'P','u','n','c','t','u','a','t','i','o','n',0, +'p','u','n','c','t',0,2,'S',0,'S','y','m','b','o','l',0, +2,'Z',0,'S','e','p','a','r','a','t','o','r',0,2,'n','v',0,'N','u','m','e','r','i','c','_','V','a','l','u','e',0, +2,'a','g','e',0,'A','g','e',0,2,'b','m','g',0,'B','i','d','i','_','M','i','r','r','o','r','i','n','g','_','G','l','y', +'p','h',0,2,'c','f',0,'C','a','s','e','_','F','o','l','d','i','n','g',0, +2,'i','s','c',0,'I','S','O','_','C','o','m','m','e','n','t',0, +2,'l','c',0,'L','o','w','e','r','c','a','s','e','_','M','a','p','p','i','n','g',0, +2,'n','a',0,'N','a','m','e',0,3,'s','c','f',0,'S','i','m','p','l','e','_','C','a','s','e','_','F','o','l','d','i','n', +'g',0,'s','f','c',0,2,'s','l','c',0,'S','i','m','p','l','e','_','L','o','w','e','r','c','a','s','e','_','M','a','p','p', +'i','n','g',0,2,'s','t','c',0,'S','i','m','p','l','e','_','T','i','t','l','e','c','a','s','e','_','M','a','p','p','i','n', +'g',0,2,'s','u','c',0,'S','i','m','p','l','e','_','U','p','p','e','r','c','a','s','e','_','M','a','p','p','i','n','g',0, +2,'t','c',0,'T','i','t','l','e','c','a','s','e','_','M','a','p','p','i','n','g',0, +2,'n','a','1',0,'U','n','i','c','o','d','e','_','1','_','N','a','m','e',0, +2,'u','c',0,'U','p','p','e','r','c','a','s','e','_','M','a','p','p','i','n','g',0, +2,'b','p','b',0,'B','i','d','i','_','P','a','i','r','e','d','_','B','r','a','c','k','e','t',0, +2,'s','c','x',0,'S','c','r','i','p','t','_','E','x','t','e','n','s','i','o','n','s',0 +}; + +U_NAMESPACE_END + +#endif // INCLUDED_FROM_PROPNAME_CPP diff --git a/thirdparty/icu4c/common/propsvec.cpp b/thirdparty/icu4c/common/propsvec.cpp new file mode 100644 index 0000000000..056fcda9cf --- /dev/null +++ b/thirdparty/icu4c/common/propsvec.cpp @@ -0,0 +1,529 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2002-2011, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: propsvec.c +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2002feb22 +* created by: Markus W. Scherer +* +* Store bits (Unicode character properties) in bit set vectors. +*/ + +#include <stdlib.h> +#include "unicode/utypes.h" +#include "cmemory.h" +#include "utrie.h" +#include "utrie2.h" +#include "uarrsort.h" +#include "propsvec.h" +#include "uassert.h" + +struct UPropsVectors { + uint32_t *v; + int32_t columns; /* number of columns, plus two for start & limit values */ + int32_t maxRows; + int32_t rows; + int32_t prevRow; /* search optimization: remember last row seen */ + UBool isCompacted; +}; + +#define UPVEC_INITIAL_ROWS (1<<12) +#define UPVEC_MEDIUM_ROWS ((int32_t)1<<16) +#define UPVEC_MAX_ROWS (UPVEC_MAX_CP+1) + +U_CAPI UPropsVectors * U_EXPORT2 +upvec_open(int32_t columns, UErrorCode *pErrorCode) { + UPropsVectors *pv; + uint32_t *v, *row; + uint32_t cp; + + if(U_FAILURE(*pErrorCode)) { + return NULL; + } + if(columns<1) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + columns+=2; /* count range start and limit columns */ + + pv=(UPropsVectors *)uprv_malloc(sizeof(UPropsVectors)); + v=(uint32_t *)uprv_malloc(UPVEC_INITIAL_ROWS*columns*4); + if(pv==NULL || v==NULL) { + uprv_free(pv); + uprv_free(v); + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + uprv_memset(pv, 0, sizeof(UPropsVectors)); + pv->v=v; + pv->columns=columns; + pv->maxRows=UPVEC_INITIAL_ROWS; + pv->rows=2+(UPVEC_MAX_CP-UPVEC_FIRST_SPECIAL_CP); + + /* set the all-Unicode row and the special-value rows */ + row=pv->v; + uprv_memset(row, 0, pv->rows*columns*4); + row[0]=0; + row[1]=0x110000; + row+=columns; + for(cp=UPVEC_FIRST_SPECIAL_CP; cp<=UPVEC_MAX_CP; ++cp) { + row[0]=cp; + row[1]=cp+1; + row+=columns; + } + return pv; +} + +U_CAPI void U_EXPORT2 +upvec_close(UPropsVectors *pv) { + if(pv!=NULL) { + uprv_free(pv->v); + uprv_free(pv); + } +} + +static uint32_t * +_findRow(UPropsVectors *pv, UChar32 rangeStart) { + uint32_t *row; + int32_t columns, i, start, limit, prevRow; + + columns=pv->columns; + limit=pv->rows; + prevRow=pv->prevRow; + + /* check the vicinity of the last-seen row (start searching with an unrolled loop) */ + row=pv->v+prevRow*columns; + if(rangeStart>=(UChar32)row[0]) { + if(rangeStart<(UChar32)row[1]) { + /* same row as last seen */ + return row; + } else if(rangeStart<(UChar32)(row+=columns)[1]) { + /* next row after the last one */ + pv->prevRow=prevRow+1; + return row; + } else if(rangeStart<(UChar32)(row+=columns)[1]) { + /* second row after the last one */ + pv->prevRow=prevRow+2; + return row; + } else if((rangeStart-(UChar32)row[1])<10) { + /* we are close, continue looping */ + prevRow+=2; + do { + ++prevRow; + row+=columns; + } while(rangeStart>=(UChar32)row[1]); + pv->prevRow=prevRow; + return row; + } + } else if(rangeStart<(UChar32)pv->v[1]) { + /* the very first row */ + pv->prevRow=0; + return pv->v; + } + + /* do a binary search for the start of the range */ + start=0; + while(start<limit-1) { + i=(start+limit)/2; + row=pv->v+i*columns; + if(rangeStart<(UChar32)row[0]) { + limit=i; + } else if(rangeStart<(UChar32)row[1]) { + pv->prevRow=i; + return row; + } else { + start=i; + } + } + + /* must be found because all ranges together always cover all of Unicode */ + pv->prevRow=start; + return pv->v+start*columns; +} + +U_CAPI void U_EXPORT2 +upvec_setValue(UPropsVectors *pv, + UChar32 start, UChar32 end, + int32_t column, + uint32_t value, uint32_t mask, + UErrorCode *pErrorCode) { + uint32_t *firstRow, *lastRow; + int32_t columns; + UChar32 limit; + UBool splitFirstRow, splitLastRow; + + /* argument checking */ + if(U_FAILURE(*pErrorCode)) { + return; + } + if( pv==NULL || + start<0 || start>end || end>UPVEC_MAX_CP || + column<0 || column>=(pv->columns-2) + ) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return; + } + if(pv->isCompacted) { + *pErrorCode=U_NO_WRITE_PERMISSION; + return; + } + limit=end+1; + + /* initialize */ + columns=pv->columns; + column+=2; /* skip range start and limit columns */ + value&=mask; + + /* find the rows whose ranges overlap with the input range */ + + /* find the first and last rows, always successful */ + firstRow=_findRow(pv, start); + lastRow=_findRow(pv, end); + + /* + * Rows need to be split if they partially overlap with the + * input range (only possible for the first and last rows) + * and if their value differs from the input value. + */ + splitFirstRow= (UBool)(start!=(UChar32)firstRow[0] && value!=(firstRow[column]&mask)); + splitLastRow= (UBool)(limit!=(UChar32)lastRow[1] && value!=(lastRow[column]&mask)); + + /* split first/last rows if necessary */ + if(splitFirstRow || splitLastRow) { + int32_t count, rows; + + rows=pv->rows; + if((rows+splitFirstRow+splitLastRow)>pv->maxRows) { + uint32_t *newVectors; + int32_t newMaxRows; + + if(pv->maxRows<UPVEC_MEDIUM_ROWS) { + newMaxRows=UPVEC_MEDIUM_ROWS; + } else if(pv->maxRows<UPVEC_MAX_ROWS) { + newMaxRows=UPVEC_MAX_ROWS; + } else { + /* Implementation bug, or UPVEC_MAX_ROWS too low. */ + *pErrorCode=U_INTERNAL_PROGRAM_ERROR; + return; + } + newVectors=(uint32_t *)uprv_malloc(newMaxRows*columns*4); + if(newVectors==NULL) { + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + return; + } + uprv_memcpy(newVectors, pv->v, (size_t)rows*columns*4); + firstRow=newVectors+(firstRow-pv->v); + lastRow=newVectors+(lastRow-pv->v); + uprv_free(pv->v); + pv->v=newVectors; + pv->maxRows=newMaxRows; + } + + /* count the number of row cells to move after the last row, and move them */ + count = (int32_t)((pv->v+rows*columns)-(lastRow+columns)); + if(count>0) { + uprv_memmove( + lastRow+(1+splitFirstRow+splitLastRow)*columns, + lastRow+columns, + count*4); + } + pv->rows=rows+splitFirstRow+splitLastRow; + + /* split the first row, and move the firstRow pointer to the second part */ + if(splitFirstRow) { + /* copy all affected rows up one and move the lastRow pointer */ + count = (int32_t)((lastRow-firstRow)+columns); + uprv_memmove(firstRow+columns, firstRow, (size_t)count*4); + lastRow+=columns; + + /* split the range and move the firstRow pointer */ + firstRow[1]=firstRow[columns]=(uint32_t)start; + firstRow+=columns; + } + + /* split the last row */ + if(splitLastRow) { + /* copy the last row data */ + uprv_memcpy(lastRow+columns, lastRow, (size_t)columns*4); + + /* split the range and move the firstRow pointer */ + lastRow[1]=lastRow[columns]=(uint32_t)limit; + } + } + + /* set the "row last seen" to the last row for the range */ + pv->prevRow=(int32_t)((lastRow-(pv->v))/columns); + + /* set the input value in all remaining rows */ + firstRow+=column; + lastRow+=column; + mask=~mask; + for(;;) { + *firstRow=(*firstRow&mask)|value; + if(firstRow==lastRow) { + break; + } + firstRow+=columns; + } +} + +U_CAPI uint32_t U_EXPORT2 +upvec_getValue(const UPropsVectors *pv, UChar32 c, int32_t column) { + uint32_t *row; + UPropsVectors *ncpv; + + if(pv->isCompacted || c<0 || c>UPVEC_MAX_CP || column<0 || column>=(pv->columns-2)) { + return 0; + } + ncpv=(UPropsVectors *)pv; + row=_findRow(ncpv, c); + return row[2+column]; +} + +U_CAPI uint32_t * U_EXPORT2 +upvec_getRow(const UPropsVectors *pv, int32_t rowIndex, + UChar32 *pRangeStart, UChar32 *pRangeEnd) { + uint32_t *row; + int32_t columns; + + if(pv->isCompacted || rowIndex<0 || rowIndex>=pv->rows) { + return NULL; + } + + columns=pv->columns; + row=pv->v+rowIndex*columns; + if(pRangeStart!=NULL) { + *pRangeStart=(UChar32)row[0]; + } + if(pRangeEnd!=NULL) { + *pRangeEnd=(UChar32)row[1]-1; + } + return row+2; +} + +static int32_t U_CALLCONV +upvec_compareRows(const void *context, const void *l, const void *r) { + const uint32_t *left=(const uint32_t *)l, *right=(const uint32_t *)r; + const UPropsVectors *pv=(const UPropsVectors *)context; + int32_t i, count, columns; + + count=columns=pv->columns; /* includes start/limit columns */ + + /* start comparing after start/limit but wrap around to them */ + i=2; + do { + if(left[i]!=right[i]) { + return left[i]<right[i] ? -1 : 1; + } + if(++i==columns) { + i=0; + } + } while(--count>0); + + return 0; +} + +U_CAPI void U_EXPORT2 +upvec_compact(UPropsVectors *pv, UPVecCompactHandler *handler, void *context, UErrorCode *pErrorCode) { + uint32_t *row; + int32_t i, columns, valueColumns, rows, count; + UChar32 start, limit; + + /* argument checking */ + if(U_FAILURE(*pErrorCode)) { + return; + } + if(handler==NULL) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return; + } + if(pv->isCompacted) { + return; + } + + /* Set the flag now: Sorting and compacting destroys the builder data structure. */ + pv->isCompacted=TRUE; + + rows=pv->rows; + columns=pv->columns; + U_ASSERT(columns>=3); /* upvec_open asserts this */ + valueColumns=columns-2; /* not counting start & limit */ + + /* sort the properties vectors to find unique vector values */ + uprv_sortArray(pv->v, rows, columns*4, + upvec_compareRows, pv, FALSE, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + return; + } + + /* + * Find and set the special values. + * This has to do almost the same work as the compaction below, + * to find the indexes where the special-value rows will move. + */ + row=pv->v; + count=-valueColumns; + for(i=0; i<rows; ++i) { + start=(UChar32)row[0]; + + /* count a new values vector if it is different from the current one */ + if(count<0 || 0!=uprv_memcmp(row+2, row-valueColumns, valueColumns*4)) { + count+=valueColumns; + } + + if(start>=UPVEC_FIRST_SPECIAL_CP) { + handler(context, start, start, count, row+2, valueColumns, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + return; + } + } + + row+=columns; + } + + /* count is at the beginning of the last vector, add valueColumns to include that last vector */ + count+=valueColumns; + + /* Call the handler once more to signal the start of delivering real values. */ + handler(context, UPVEC_START_REAL_VALUES_CP, UPVEC_START_REAL_VALUES_CP, + count, row-valueColumns, valueColumns, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + return; + } + + /* + * Move vector contents up to a contiguous array with only unique + * vector values, and call the handler function for each vector. + * + * This destroys the Properties Vector structure and replaces it + * with an array of just vector values. + */ + row=pv->v; + count=-valueColumns; + for(i=0; i<rows; ++i) { + /* fetch these first before memmove() may overwrite them */ + start=(UChar32)row[0]; + limit=(UChar32)row[1]; + + /* add a new values vector if it is different from the current one */ + if(count<0 || 0!=uprv_memcmp(row+2, pv->v+count, valueColumns*4)) { + count+=valueColumns; + uprv_memmove(pv->v+count, row+2, (size_t)valueColumns*4); + } + + if(start<UPVEC_FIRST_SPECIAL_CP) { + handler(context, start, limit-1, count, pv->v+count, valueColumns, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + return; + } + } + + row+=columns; + } + + /* count is at the beginning of the last vector, add one to include that last vector */ + pv->rows=count/valueColumns+1; +} + +U_CAPI const uint32_t * U_EXPORT2 +upvec_getArray(const UPropsVectors *pv, int32_t *pRows, int32_t *pColumns) { + if(!pv->isCompacted) { + return NULL; + } + if(pRows!=NULL) { + *pRows=pv->rows; + } + if(pColumns!=NULL) { + *pColumns=pv->columns-2; + } + return pv->v; +} + +U_CAPI uint32_t * U_EXPORT2 +upvec_cloneArray(const UPropsVectors *pv, + int32_t *pRows, int32_t *pColumns, UErrorCode *pErrorCode) { + uint32_t *clonedArray; + int32_t byteLength; + + if(U_FAILURE(*pErrorCode)) { + return NULL; + } + if(!pv->isCompacted) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + byteLength=pv->rows*(pv->columns-2)*4; + clonedArray=(uint32_t *)uprv_malloc(byteLength); + if(clonedArray==NULL) { + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + uprv_memcpy(clonedArray, pv->v, byteLength); + if(pRows!=NULL) { + *pRows=pv->rows; + } + if(pColumns!=NULL) { + *pColumns=pv->columns-2; + } + return clonedArray; +} + +U_CAPI UTrie2 * U_EXPORT2 +upvec_compactToUTrie2WithRowIndexes(UPropsVectors *pv, UErrorCode *pErrorCode) { + UPVecToUTrie2Context toUTrie2={ NULL, 0, 0, 0 }; + upvec_compact(pv, upvec_compactToUTrie2Handler, &toUTrie2, pErrorCode); + utrie2_freeze(toUTrie2.trie, UTRIE2_16_VALUE_BITS, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + utrie2_close(toUTrie2.trie); + toUTrie2.trie=NULL; + } + return toUTrie2.trie; +} + +/* + * TODO(markus): Add upvec_16BitsToUTrie2() function that enumerates all rows, extracts + * some 16-bit field and builds and returns a UTrie2. + */ + +U_CAPI void U_CALLCONV +upvec_compactToUTrie2Handler(void *context, + UChar32 start, UChar32 end, + int32_t rowIndex, uint32_t *row, int32_t columns, + UErrorCode *pErrorCode) { + (void)row; + (void)columns; + UPVecToUTrie2Context *toUTrie2=(UPVecToUTrie2Context *)context; + if(start<UPVEC_FIRST_SPECIAL_CP) { + utrie2_setRange32(toUTrie2->trie, start, end, (uint32_t)rowIndex, TRUE, pErrorCode); + } else { + switch(start) { + case UPVEC_INITIAL_VALUE_CP: + toUTrie2->initialValue=rowIndex; + break; + case UPVEC_ERROR_VALUE_CP: + toUTrie2->errorValue=rowIndex; + break; + case UPVEC_START_REAL_VALUES_CP: + toUTrie2->maxValue=rowIndex; + if(rowIndex>0xffff) { + /* too many rows for a 16-bit trie */ + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + } else { + toUTrie2->trie=utrie2_open(toUTrie2->initialValue, + toUTrie2->errorValue, pErrorCode); + } + break; + default: + break; + } + } +} diff --git a/thirdparty/icu4c/common/propsvec.h b/thirdparty/icu4c/common/propsvec.h new file mode 100644 index 0000000000..39080615ea --- /dev/null +++ b/thirdparty/icu4c/common/propsvec.h @@ -0,0 +1,178 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2002-2010, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: propsvec.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2002feb22 +* created by: Markus W. Scherer +* +* Store bits (Unicode character properties) in bit set vectors. +*/ + +#ifndef __UPROPSVEC_H__ +#define __UPROPSVEC_H__ + +#include "unicode/utypes.h" +#include "utrie.h" +#include "utrie2.h" + +U_CDECL_BEGIN + +/** + * Unicode Properties Vectors associated with code point ranges. + * + * Rows of uint32_t integers in a contiguous array store + * the range limits and the properties vectors. + * + * Logically, each row has a certain number of uint32_t values, + * which is set via the upvec_open() "columns" parameter. + * + * Internally, two additional columns are stored. + * In each internal row, + * row[0] contains the start code point and + * row[1] contains the limit code point, + * which is the start of the next range. + * + * Initially, there is only one "normal" row for + * range [0..0x110000[ with values 0. + * There are additional rows for special purposes, see UPVEC_FIRST_SPECIAL_CP. + * + * It would be possible to store only one range boundary per row, + * but self-contained rows allow to later sort them by contents. + */ +struct UPropsVectors; +typedef struct UPropsVectors UPropsVectors; + +/* + * Special pseudo code points for storing the initialValue and the errorValue, + * which are used to initialize a UTrie2 or similar. + */ +#define UPVEC_FIRST_SPECIAL_CP 0x110000 +#define UPVEC_INITIAL_VALUE_CP 0x110000 +#define UPVEC_ERROR_VALUE_CP 0x110001 +#define UPVEC_MAX_CP 0x110001 + +/* + * Special pseudo code point used in upvec_compact() signalling the end of + * delivering special values and the beginning of delivering real ones. + * Stable value, unlike UPVEC_MAX_CP which might grow over time. + */ +#define UPVEC_START_REAL_VALUES_CP 0x200000 + +/* + * Open a UPropsVectors object. + * @param columns Number of value integers (uint32_t) per row. + */ +U_CAPI UPropsVectors * U_EXPORT2 +upvec_open(int32_t columns, UErrorCode *pErrorCode); + +U_CAPI void U_EXPORT2 +upvec_close(UPropsVectors *pv); + +/* + * In rows for code points [start..end], select the column, + * reset the mask bits and set the value bits (ANDed with the mask). + * + * Will set U_NO_WRITE_PERMISSION if called after upvec_compact(). + */ +U_CAPI void U_EXPORT2 +upvec_setValue(UPropsVectors *pv, + UChar32 start, UChar32 end, + int32_t column, + uint32_t value, uint32_t mask, + UErrorCode *pErrorCode); + +/* + * Logically const but must not be used on the same pv concurrently! + * Always returns 0 if called after upvec_compact(). + */ +U_CAPI uint32_t U_EXPORT2 +upvec_getValue(const UPropsVectors *pv, UChar32 c, int32_t column); + +/* + * pRangeStart and pRangeEnd can be NULL. + * @return NULL if rowIndex out of range and for illegal arguments, + * or if called after upvec_compact() + */ +U_CAPI uint32_t * U_EXPORT2 +upvec_getRow(const UPropsVectors *pv, int32_t rowIndex, + UChar32 *pRangeStart, UChar32 *pRangeEnd); + +/* + * Compact the vectors: + * - modify the memory + * - keep only unique vectors + * - store them contiguously from the beginning of the memory + * - for each (non-unique) row, call the handler function + * + * The handler's rowIndex is the index of the row in the compacted + * memory block. + * (Therefore, it starts at 0 increases in increments of the columns value.) + * + * In a first phase, only special values are delivered (each exactly once), + * with start==end both equalling a special pseudo code point. + * Then the handler is called once more with start==end==UPVEC_START_REAL_VALUES_CP + * where rowIndex is the length of the compacted array, + * and the row is arbitrary (but not NULL). + * Then, in the second phase, the handler is called for each row of real values. + */ +typedef void U_CALLCONV +UPVecCompactHandler(void *context, + UChar32 start, UChar32 end, + int32_t rowIndex, uint32_t *row, int32_t columns, + UErrorCode *pErrorCode); + +U_CAPI void U_EXPORT2 +upvec_compact(UPropsVectors *pv, UPVecCompactHandler *handler, void *context, UErrorCode *pErrorCode); + +/* + * Get the vectors array after calling upvec_compact(). + * The caller must not modify nor release the returned array. + * Returns NULL if called before upvec_compact(). + */ +U_CAPI const uint32_t * U_EXPORT2 +upvec_getArray(const UPropsVectors *pv, int32_t *pRows, int32_t *pColumns); + +/* + * Get a clone of the vectors array after calling upvec_compact(). + * The caller owns the returned array and must uprv_free() it. + * Returns NULL if called before upvec_compact(). + */ +U_CAPI uint32_t * U_EXPORT2 +upvec_cloneArray(const UPropsVectors *pv, + int32_t *pRows, int32_t *pColumns, UErrorCode *pErrorCode); + +/* + * Call upvec_compact(), create a 16-bit UTrie2 with indexes into the compacted + * vectors array, and freeze the trie. + */ +U_CAPI UTrie2 * U_EXPORT2 +upvec_compactToUTrie2WithRowIndexes(UPropsVectors *pv, UErrorCode *pErrorCode); + +struct UPVecToUTrie2Context { + UTrie2 *trie; + int32_t initialValue; + int32_t errorValue; + int32_t maxValue; +}; +typedef struct UPVecToUTrie2Context UPVecToUTrie2Context; + +/* context=UPVecToUTrie2Context, creates the trie and stores the rowIndex values */ +U_CAPI void U_CALLCONV +upvec_compactToUTrie2Handler(void *context, + UChar32 start, UChar32 end, + int32_t rowIndex, uint32_t *row, int32_t columns, + UErrorCode *pErrorCode); + +U_CDECL_END + +#endif diff --git a/thirdparty/icu4c/common/punycode.cpp b/thirdparty/icu4c/common/punycode.cpp new file mode 100644 index 0000000000..4832938ff7 --- /dev/null +++ b/thirdparty/icu4c/common/punycode.cpp @@ -0,0 +1,590 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2002-2011, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: punycode.cpp +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2002jan31 +* created by: Markus W. Scherer +*/ + + +/* This ICU code derived from: */ +/* +punycode.c 0.4.0 (2001-Nov-17-Sat) +http://www.cs.berkeley.edu/~amc/idn/ +Adam M. Costello +http://www.nicemice.net/amc/ + +Disclaimer and license + + Regarding this entire document or any portion of it (including + the pseudocode and C code), the author makes no guarantees and + is not responsible for any damage resulting from its use. The + author grants irrevocable permission to anyone to use, modify, + and distribute it in any way that does not diminish the rights + of anyone else to use, modify, and distribute it, provided that + redistributed derivative works do not contain misleading author or + version information. Derivative works need not be licensed under + similar terms. +*/ +/* + * ICU modifications: + * - ICU data types and coding conventions + * - ICU string buffer handling with implicit source lengths + * and destination preflighting + * - UTF-16 handling + */ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_IDNA + +#include "unicode/ustring.h" +#include "unicode/utf.h" +#include "unicode/utf16.h" +#include "ustr_imp.h" +#include "cstring.h" +#include "cmemory.h" +#include "punycode.h" +#include "uassert.h" + + +/* Punycode ----------------------------------------------------------------- */ + +/* Punycode parameters for Bootstring */ +#define BASE 36 +#define TMIN 1 +#define TMAX 26 +#define SKEW 38 +#define DAMP 700 +#define INITIAL_BIAS 72 +#define INITIAL_N 0x80 + +/* "Basic" Unicode/ASCII code points */ +#define _HYPHEN 0X2d +#define DELIMITER _HYPHEN + +#define _ZERO_ 0X30 +#define _NINE 0x39 + +#define _SMALL_A 0X61 +#define _SMALL_Z 0X7a + +#define _CAPITAL_A 0X41 +#define _CAPITAL_Z 0X5a + +#define IS_BASIC(c) ((c)<0x80) +#define IS_BASIC_UPPERCASE(c) (_CAPITAL_A<=(c) && (c)<=_CAPITAL_Z) + +/** + * digitToBasic() returns the basic code point whose value + * (when used for representing integers) is d, which must be in the + * range 0 to BASE-1. The lowercase form is used unless the uppercase flag is + * nonzero, in which case the uppercase form is used. + */ +static inline char +digitToBasic(int32_t digit, UBool uppercase) { + /* 0..25 map to ASCII a..z or A..Z */ + /* 26..35 map to ASCII 0..9 */ + if(digit<26) { + if(uppercase) { + return (char)(_CAPITAL_A+digit); + } else { + return (char)(_SMALL_A+digit); + } + } else { + return (char)((_ZERO_-26)+digit); + } +} + +/** + * @return the numeric value of a basic code point (for use in representing integers) + * in the range 0 to BASE-1, or a negative value if cp is invalid. + */ +static int32_t decodeDigit(int32_t cp) { + if(cp<=u'Z') { + if(cp<=u'9') { + if(cp<u'0') { + return -1; + } else { + return cp-u'0'+26; // 0..9 -> 26..35 + } + } else { + return cp-u'A'; // A-Z -> 0..25 + } + } else if(cp<=u'z') { + return cp-'a'; // a..z -> 0..25 + } else { + return -1; + } +} + +static inline char +asciiCaseMap(char b, UBool uppercase) { + if(uppercase) { + if(_SMALL_A<=b && b<=_SMALL_Z) { + b-=(_SMALL_A-_CAPITAL_A); + } + } else { + if(_CAPITAL_A<=b && b<=_CAPITAL_Z) { + b+=(_SMALL_A-_CAPITAL_A); + } + } + return b; +} + +/* Punycode-specific Bootstring code ---------------------------------------- */ + +/* + * The following code omits the {parts} of the pseudo-algorithm in the spec + * that are not used with the Punycode parameter set. + */ + +/* Bias adaptation function. */ +static int32_t +adaptBias(int32_t delta, int32_t length, UBool firstTime) { + int32_t count; + + if(firstTime) { + delta/=DAMP; + } else { + delta/=2; + } + + delta+=delta/length; + for(count=0; delta>((BASE-TMIN)*TMAX)/2; count+=BASE) { + delta/=(BASE-TMIN); + } + + return count+(((BASE-TMIN+1)*delta)/(delta+SKEW)); +} + +namespace { + +// ICU-13727: Limit input length for n^2 algorithm +// where well-formed strings are at most 59 characters long. +constexpr int32_t ENCODE_MAX_CODE_UNITS=1000; +constexpr int32_t DECODE_MAX_CHARS=2000; + +} // namespace + +// encode +U_CAPI int32_t +u_strToPunycode(const UChar *src, int32_t srcLength, + UChar *dest, int32_t destCapacity, + const UBool *caseFlags, + UErrorCode *pErrorCode) { + + int32_t cpBuffer[ENCODE_MAX_CODE_UNITS]; + int32_t n, delta, handledCPCount, basicLength, destLength, bias, j, m, q, k, t, srcCPCount; + UChar c, c2; + + /* argument checking */ + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return 0; + } + + if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + if (srcLength>ENCODE_MAX_CODE_UNITS) { + *pErrorCode=U_INPUT_TOO_LONG_ERROR; + return 0; + } + + /* + * Handle the basic code points and + * convert extended ones to UTF-32 in cpBuffer (caseFlag in sign bit): + */ + srcCPCount=destLength=0; + if(srcLength==-1) { + /* NUL-terminated input */ + for(j=0; /* no condition */; ++j) { + if((c=src[j])==0) { + break; + } + if(j>=ENCODE_MAX_CODE_UNITS) { + *pErrorCode=U_INPUT_TOO_LONG_ERROR; + return 0; + } + if(IS_BASIC(c)) { + cpBuffer[srcCPCount++]=0; + if(destLength<destCapacity) { + dest[destLength]= + caseFlags!=NULL ? + asciiCaseMap((char)c, caseFlags[j]) : + (char)c; + } + ++destLength; + } else { + n=(caseFlags!=NULL && caseFlags[j])<<31L; + if(U16_IS_SINGLE(c)) { + n|=c; + } else if(U16_IS_LEAD(c) && U16_IS_TRAIL(c2=src[j+1])) { + ++j; + n|=(int32_t)U16_GET_SUPPLEMENTARY(c, c2); + } else { + /* error: unmatched surrogate */ + *pErrorCode=U_INVALID_CHAR_FOUND; + return 0; + } + cpBuffer[srcCPCount++]=n; + } + } + } else { + /* length-specified input */ + for(j=0; j<srcLength; ++j) { + c=src[j]; + if(IS_BASIC(c)) { + cpBuffer[srcCPCount++]=0; + if(destLength<destCapacity) { + dest[destLength]= + caseFlags!=NULL ? + asciiCaseMap((char)c, caseFlags[j]) : + (char)c; + } + ++destLength; + } else { + n=(caseFlags!=NULL && caseFlags[j])<<31L; + if(U16_IS_SINGLE(c)) { + n|=c; + } else if(U16_IS_LEAD(c) && (j+1)<srcLength && U16_IS_TRAIL(c2=src[j+1])) { + ++j; + n|=(int32_t)U16_GET_SUPPLEMENTARY(c, c2); + } else { + /* error: unmatched surrogate */ + *pErrorCode=U_INVALID_CHAR_FOUND; + return 0; + } + cpBuffer[srcCPCount++]=n; + } + } + } + + /* Finish the basic string - if it is not empty - with a delimiter. */ + basicLength=destLength; + if(basicLength>0) { + if(destLength<destCapacity) { + dest[destLength]=DELIMITER; + } + ++destLength; + } + + /* + * handledCPCount is the number of code points that have been handled + * basicLength is the number of basic code points + * destLength is the number of chars that have been output + */ + + /* Initialize the state: */ + n=INITIAL_N; + delta=0; + bias=INITIAL_BIAS; + + /* Main encoding loop: */ + for(handledCPCount=basicLength; handledCPCount<srcCPCount; /* no op */) { + /* + * All non-basic code points < n have been handled already. + * Find the next larger one: + */ + for(m=0x7fffffff, j=0; j<srcCPCount; ++j) { + q=cpBuffer[j]&0x7fffffff; /* remove case flag from the sign bit */ + if(n<=q && q<m) { + m=q; + } + } + + /* + * Increase delta enough to advance the decoder's + * <n,i> state to <m,0>, but guard against overflow: + */ + if(m-n>(0x7fffffff-handledCPCount-delta)/(handledCPCount+1)) { + *pErrorCode=U_INTERNAL_PROGRAM_ERROR; + return 0; + } + delta+=(m-n)*(handledCPCount+1); + n=m; + + /* Encode a sequence of same code points n */ + for(j=0; j<srcCPCount; ++j) { + q=cpBuffer[j]&0x7fffffff; /* remove case flag from the sign bit */ + if(q<n) { + ++delta; + } else if(q==n) { + /* Represent delta as a generalized variable-length integer: */ + for(q=delta, k=BASE; /* no condition */; k+=BASE) { + + /** RAM: comment out the old code for conformance with draft-ietf-idn-punycode-03.txt + + t=k-bias; + if(t<TMIN) { + t=TMIN; + } else if(t>TMAX) { + t=TMAX; + } + */ + + t=k-bias; + if(t<TMIN) { + t=TMIN; + } else if(k>=(bias+TMAX)) { + t=TMAX; + } + + if(q<t) { + break; + } + + if(destLength<destCapacity) { + dest[destLength]=digitToBasic(t+(q-t)%(BASE-t), 0); + } + ++destLength; + q=(q-t)/(BASE-t); + } + + if(destLength<destCapacity) { + dest[destLength]=digitToBasic(q, (UBool)(cpBuffer[j]<0)); + } + ++destLength; + bias=adaptBias(delta, handledCPCount+1, (UBool)(handledCPCount==basicLength)); + delta=0; + ++handledCPCount; + } + } + + ++delta; + ++n; + } + + return u_terminateUChars(dest, destCapacity, destLength, pErrorCode); +} + +// decode +U_CAPI int32_t +u_strFromPunycode(const UChar *src, int32_t srcLength, + UChar *dest, int32_t destCapacity, + UBool *caseFlags, + UErrorCode *pErrorCode) { + int32_t n, destLength, i, bias, basicLength, j, in, oldi, w, k, digit, t, + destCPCount, firstSupplementaryIndex, cpLength; + UChar b; + + /* argument checking */ + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return 0; + } + + if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + if(srcLength==-1) { + srcLength=u_strlen(src); + } + if (srcLength>DECODE_MAX_CHARS) { + *pErrorCode=U_INPUT_TOO_LONG_ERROR; + return 0; + } + + /* + * Handle the basic code points: + * Let basicLength be the number of input code points + * before the last delimiter, or 0 if there is none, + * then copy the first basicLength code points to the output. + * + * The two following loops iterate backward. + */ + for(j=srcLength; j>0;) { + if(src[--j]==DELIMITER) { + break; + } + } + destLength=basicLength=destCPCount=j; + U_ASSERT(destLength>=0); + + while(j>0) { + b=src[--j]; + if(!IS_BASIC(b)) { + *pErrorCode=U_INVALID_CHAR_FOUND; + return 0; + } + + if(j<destCapacity) { + dest[j]=(UChar)b; + + if(caseFlags!=NULL) { + caseFlags[j]=IS_BASIC_UPPERCASE(b); + } + } + } + + /* Initialize the state: */ + n=INITIAL_N; + i=0; + bias=INITIAL_BIAS; + firstSupplementaryIndex=1000000000; + + /* + * Main decoding loop: + * Start just after the last delimiter if any + * basic code points were copied; start at the beginning otherwise. + */ + for(in=basicLength>0 ? basicLength+1 : 0; in<srcLength; /* no op */) { + /* + * in is the index of the next character to be consumed, and + * destCPCount is the number of code points in the output array. + * + * Decode a generalized variable-length integer into delta, + * which gets added to i. The overflow checking is easier + * if we increase i as we go, then subtract off its starting + * value at the end to obtain delta. + */ + for(oldi=i, w=1, k=BASE; /* no condition */; k+=BASE) { + if(in>=srcLength) { + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + return 0; + } + + digit=decodeDigit(src[in++]); + if(digit<0) { + *pErrorCode=U_INVALID_CHAR_FOUND; + return 0; + } + if(digit>(0x7fffffff-i)/w) { + /* integer overflow */ + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + return 0; + } + + i+=digit*w; + /** RAM: comment out the old code for conformance with draft-ietf-idn-punycode-03.txt + t=k-bias; + if(t<TMIN) { + t=TMIN; + } else if(t>TMAX) { + t=TMAX; + } + */ + t=k-bias; + if(t<TMIN) { + t=TMIN; + } else if(k>=(bias+TMAX)) { + t=TMAX; + } + if(digit<t) { + break; + } + + if(w>0x7fffffff/(BASE-t)) { + /* integer overflow */ + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + return 0; + } + w*=BASE-t; + } + + /* + * Modification from sample code: + * Increments destCPCount here, + * where needed instead of in for() loop tail. + */ + ++destCPCount; + bias=adaptBias(i-oldi, destCPCount, (UBool)(oldi==0)); + + /* + * i was supposed to wrap around from (incremented) destCPCount to 0, + * incrementing n each time, so we'll fix that now: + */ + if(i/destCPCount>(0x7fffffff-n)) { + /* integer overflow */ + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + return 0; + } + + n+=i/destCPCount; + i%=destCPCount; + /* not needed for Punycode: */ + /* if (decode_digit(n) <= BASE) return punycode_invalid_input; */ + + if(n>0x10ffff || U_IS_SURROGATE(n)) { + /* Unicode code point overflow */ + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + return 0; + } + + /* Insert n at position i of the output: */ + cpLength=U16_LENGTH(n); + if(dest!=NULL && ((destLength+cpLength)<=destCapacity)) { + int32_t codeUnitIndex; + + /* + * Handle indexes when supplementary code points are present. + * + * In almost all cases, there will be only BMP code points before i + * and even in the entire string. + * This is handled with the same efficiency as with UTF-32. + * + * Only the rare cases with supplementary code points are handled + * more slowly - but not too bad since this is an insertion anyway. + */ + if(i<=firstSupplementaryIndex) { + codeUnitIndex=i; + if(cpLength>1) { + firstSupplementaryIndex=codeUnitIndex; + } else { + ++firstSupplementaryIndex; + } + } else { + codeUnitIndex=firstSupplementaryIndex; + U16_FWD_N(dest, codeUnitIndex, destLength, i-codeUnitIndex); + } + + /* use the UChar index codeUnitIndex instead of the code point index i */ + if(codeUnitIndex<destLength) { + uprv_memmove(dest+codeUnitIndex+cpLength, + dest+codeUnitIndex, + (destLength-codeUnitIndex)*U_SIZEOF_UCHAR); + if(caseFlags!=NULL) { + uprv_memmove(caseFlags+codeUnitIndex+cpLength, + caseFlags+codeUnitIndex, + destLength-codeUnitIndex); + } + } + if(cpLength==1) { + /* BMP, insert one code unit */ + dest[codeUnitIndex]=(UChar)n; + } else { + /* supplementary character, insert two code units */ + dest[codeUnitIndex]=U16_LEAD(n); + dest[codeUnitIndex+1]=U16_TRAIL(n); + } + if(caseFlags!=NULL) { + /* Case of last character determines uppercase flag: */ + caseFlags[codeUnitIndex]=IS_BASIC_UPPERCASE(src[in-1]); + if(cpLength==2) { + caseFlags[codeUnitIndex+1]=FALSE; + } + } + } + destLength+=cpLength; + U_ASSERT(destLength>=0); + ++i; + } + + return u_terminateUChars(dest, destCapacity, destLength, pErrorCode); +} + +/* ### check notes on overflow handling - only necessary if not IDNA? are these Punycode functions to be public? */ + +#endif /* #if !UCONFIG_NO_IDNA */ diff --git a/thirdparty/icu4c/common/punycode.h b/thirdparty/icu4c/common/punycode.h new file mode 100644 index 0000000000..9e28f770c4 --- /dev/null +++ b/thirdparty/icu4c/common/punycode.h @@ -0,0 +1,120 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2002-2003, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: punycode.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2002jan31 +* created by: Markus W. Scherer +*/ + +/* This ICU code derived from: */ +/* +punycode.c 0.4.0 (2001-Nov-17-Sat) +http://www.cs.berkeley.edu/~amc/idn/ +Adam M. Costello +http://www.nicemice.net/amc/ +*/ + +#ifndef __PUNYCODE_H__ +#define __PUNYCODE_H__ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_IDNA + +/** + * u_strToPunycode() converts Unicode to Punycode. + * + * The input string must not contain single, unpaired surrogates. + * The output will be represented as an array of ASCII code points. + * + * The output string is NUL-terminated according to normal ICU + * string output rules. + * + * @param src Input Unicode string. + * This function handles a limited amount of code points + * (the limit is >=64). + * U_INDEX_OUTOFBOUNDS_ERROR is set if the limit is exceeded. + * @param srcLength Number of UChars in src, or -1 if NUL-terminated. + * @param dest Output Punycode array. + * @param destCapacity Size of dest. + * @param caseFlags Vector of boolean values, one per input UChar, + * indicating that the corresponding character is to be + * marked for the decoder optionally + * uppercasing (true) or lowercasing (false) + * the character. + * ASCII characters are output directly in the case as marked. + * Flags corresponding to trail surrogates are ignored. + * If caseFlags==NULL then input characters are not + * case-mapped. + * @param pErrorCode ICU in/out error code parameter. + * U_INVALID_CHAR_FOUND if src contains + * unmatched single surrogates. + * U_INDEX_OUTOFBOUNDS_ERROR if src contains + * too many code points. + * @return Number of ASCII characters in puny. + * + * @see u_strFromPunycode + */ +U_CAPI int32_t +u_strToPunycode(const UChar *src, int32_t srcLength, + UChar *dest, int32_t destCapacity, + const UBool *caseFlags, + UErrorCode *pErrorCode); + +/** + * u_strFromPunycode() converts Punycode to Unicode. + * The Unicode string will be at most as long (in UChars) + * than the Punycode string (in chars). + * + * @param src Input Punycode string. + * @param srcLength Length of puny, or -1 if NUL-terminated + * @param dest Output Unicode string buffer. + * @param destCapacity Size of dest in number of UChars, + * and of caseFlags in numbers of UBools. + * @param caseFlags Output array for case flags as + * defined by the Punycode string. + * The caller should uppercase (true) or lowercase (FASLE) + * the corresponding character in dest. + * For supplementary characters, only the lead surrogate + * is marked, and false is stored for the trail surrogate. + * This is redundant and not necessary for ASCII characters + * because they are already in the case indicated. + * Can be NULL if the case flags are not needed. + * @param pErrorCode ICU in/out error code parameter. + * U_INVALID_CHAR_FOUND if a non-ASCII character + * precedes the last delimiter ('-'), + * or if an invalid character (not a-zA-Z0-9) is found + * after the last delimiter. + * U_ILLEGAL_CHAR_FOUND if the delta sequence is ill-formed. + * @return Number of UChars written to dest. + * + * @see u_strToPunycode + */ +U_CAPI int32_t +u_strFromPunycode(const UChar *src, int32_t srcLength, + UChar *dest, int32_t destCapacity, + UBool *caseFlags, + UErrorCode *pErrorCode); + +#endif /* #if !UCONFIG_NO_IDNA */ + +#endif + +/* + * Hey, Emacs, please set the following: + * + * Local Variables: + * indent-tabs-mode: nil + * End: + * + */ diff --git a/thirdparty/icu4c/common/putil.cpp b/thirdparty/icu4c/common/putil.cpp new file mode 100644 index 0000000000..3ed6a05d22 --- /dev/null +++ b/thirdparty/icu4c/common/putil.cpp @@ -0,0 +1,2482 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1997-2016, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* +* FILE NAME : putil.c (previously putil.cpp and ptypes.cpp) +* +* Date Name Description +* 04/14/97 aliu Creation. +* 04/24/97 aliu Added getDefaultDataDirectory() and +* getDefaultLocaleID(). +* 04/28/97 aliu Rewritten to assume Unix and apply general methods +* for assumed case. Non-UNIX platforms must be +* special-cased. Rewrote numeric methods dealing +* with NaN and Infinity to be platform independent +* over all IEEE 754 platforms. +* 05/13/97 aliu Restored sign of timezone +* (semantics are hours West of GMT) +* 06/16/98 erm Added IEEE_754 stuff, cleaned up isInfinite, isNan, +* nextDouble.. +* 07/22/98 stephen Added remainder, max, min, trunc +* 08/13/98 stephen Added isNegativeInfinity, isPositiveInfinity +* 08/24/98 stephen Added longBitsFromDouble +* 09/08/98 stephen Minor changes for Mac Port +* 03/02/99 stephen Removed openFile(). Added AS400 support. +* Fixed EBCDIC tables +* 04/15/99 stephen Converted to C. +* 06/28/99 stephen Removed mutex locking in u_isBigEndian(). +* 08/04/99 jeffrey R. Added OS/2 changes +* 11/15/99 helena Integrated S/390 IEEE support. +* 04/26/01 Barry N. OS/400 support for uprv_getDefaultLocaleID +* 08/15/01 Steven H. OS/400 support for uprv_getDefaultCodepage +* 01/03/08 Steven L. Fake Time Support +****************************************************************************** +*/ + +// Defines _XOPEN_SOURCE for access to POSIX functions. +// Must be before any other #includes. +#include "uposixdefs.h" + +// First, the platform type. Need this for U_PLATFORM. +#include "unicode/platform.h" + +#if U_PLATFORM == U_PF_MINGW && defined __STRICT_ANSI__ +/* tzset isn't defined in strict ANSI on MinGW. */ +#undef __STRICT_ANSI__ +#endif + +/* + * Cygwin with GCC requires inclusion of time.h after the above disabling strict asci mode statement. + */ +#include <time.h> + +#if !U_PLATFORM_USES_ONLY_WIN32_API +#include <sys/time.h> +#endif + +/* include the rest of the ICU headers */ +#include "unicode/putil.h" +#include "unicode/ustring.h" +#include "putilimp.h" +#include "uassert.h" +#include "umutex.h" +#include "cmemory.h" +#include "cstring.h" +#include "locmap.h" +#include "ucln_cmn.h" +#include "charstr.h" + +/* Include standard headers. */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <math.h> +#include <locale.h> +#include <float.h> + +#ifndef U_COMMON_IMPLEMENTATION +#error U_COMMON_IMPLEMENTATION not set - must be set for all ICU source files in common/ - see https://unicode-org.github.io/icu/userguide/howtouseicu +#endif + + +/* include system headers */ +#if U_PLATFORM_USES_ONLY_WIN32_API + /* + * TODO: U_PLATFORM_USES_ONLY_WIN32_API includes MinGW. + * Should Cygwin be included as well (U_PLATFORM_HAS_WIN32_API) + * to use native APIs as much as possible? + */ +#ifndef WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN +#endif +# define VC_EXTRALEAN +# define NOUSER +# define NOSERVICE +# define NOIME +# define NOMCX +# include <windows.h> +# include "unicode/uloc.h" +# include "wintz.h" +#elif U_PLATFORM == U_PF_OS400 +# include <float.h> +# include <qusec.h> /* error code structure */ +# include <qusrjobi.h> +# include <qliept.h> /* EPT_CALL macro - this include must be after all other "QSYSINCs" */ +# include <mih/testptr.h> /* For uprv_maximumPtr */ +#elif U_PLATFORM == U_PF_OS390 +# include "unicode/ucnv.h" /* Needed for UCNV_SWAP_LFNL_OPTION_STRING */ +#elif U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS +# include <limits.h> +# include <unistd.h> +# if U_PLATFORM == U_PF_SOLARIS +# ifndef _XPG4_2 +# define _XPG4_2 +# endif +# elif U_PLATFORM == U_PF_ANDROID +# include <sys/system_properties.h> +# include <dlfcn.h> +# endif +#elif U_PLATFORM == U_PF_QNX +# include <sys/neutrino.h> +#endif + + +/* + * Only include langinfo.h if we have a way to get the codeset. If we later + * depend on more feature, we can test on U_HAVE_NL_LANGINFO. + * + */ + +#if U_HAVE_NL_LANGINFO_CODESET +#include <langinfo.h> +#endif + +/** + * Simple things (presence of functions, etc) should just go in configure.in and be added to + * icucfg.h via autoheader. + */ +#if U_PLATFORM_IMPLEMENTS_POSIX +# if U_PLATFORM == U_PF_OS400 +# define HAVE_DLFCN_H 0 +# define HAVE_DLOPEN 0 +# else +# ifndef HAVE_DLFCN_H +# define HAVE_DLFCN_H 1 +# endif +# ifndef HAVE_DLOPEN +# define HAVE_DLOPEN 1 +# endif +# endif +# ifndef HAVE_GETTIMEOFDAY +# define HAVE_GETTIMEOFDAY 1 +# endif +#else +# define HAVE_DLFCN_H 0 +# define HAVE_DLOPEN 0 +# define HAVE_GETTIMEOFDAY 0 +#endif + +U_NAMESPACE_USE + +/* Define the extension for data files, again... */ +#define DATA_TYPE "dat" + +/* Leave this copyright notice here! */ +static const char copyright[] = U_COPYRIGHT_STRING; + +/* floating point implementations ------------------------------------------- */ + +/* We return QNAN rather than SNAN*/ +#define SIGN 0x80000000U + +/* Make it easy to define certain types of constants */ +typedef union { + int64_t i64; /* This must be defined first in order to allow the initialization to work. This is a C89 feature. */ + double d64; +} BitPatternConversion; +static const BitPatternConversion gNan = { (int64_t) INT64_C(0x7FF8000000000000) }; +static const BitPatternConversion gInf = { (int64_t) INT64_C(0x7FF0000000000000) }; + +/*--------------------------------------------------------------------------- + Platform utilities + Our general strategy is to assume we're on a POSIX platform. Platforms which + are non-POSIX must declare themselves so. The default POSIX implementation + will sometimes work for non-POSIX platforms as well (e.g., the NaN-related + functions). + ---------------------------------------------------------------------------*/ + +#if U_PLATFORM_USES_ONLY_WIN32_API || U_PLATFORM == U_PF_OS400 +# undef U_POSIX_LOCALE +#else +# define U_POSIX_LOCALE 1 +#endif + +/* + WARNING! u_topNBytesOfDouble and u_bottomNBytesOfDouble + can't be properly optimized by the gcc compiler sometimes (i.e. gcc 3.2). +*/ +#if !IEEE_754 +static char* +u_topNBytesOfDouble(double* d, int n) +{ +#if U_IS_BIG_ENDIAN + return (char*)d; +#else + return (char*)(d + 1) - n; +#endif +} + +static char* +u_bottomNBytesOfDouble(double* d, int n) +{ +#if U_IS_BIG_ENDIAN + return (char*)(d + 1) - n; +#else + return (char*)d; +#endif +} +#endif /* !IEEE_754 */ + +#if IEEE_754 +static UBool +u_signBit(double d) { + uint8_t hiByte; +#if U_IS_BIG_ENDIAN + hiByte = *(uint8_t *)&d; +#else + hiByte = *(((uint8_t *)&d) + sizeof(double) - 1); +#endif + return (hiByte & 0x80) != 0; +} +#endif + + + +#if defined (U_DEBUG_FAKETIME) +/* Override the clock to test things without having to move the system clock. + * Assumes POSIX gettimeofday() will function + */ +UDate fakeClock_t0 = 0; /** Time to start the clock from **/ +UDate fakeClock_dt = 0; /** Offset (fake time - real time) **/ +UBool fakeClock_set = FALSE; /** True if fake clock has spun up **/ + +static UDate getUTCtime_real() { + struct timeval posixTime; + gettimeofday(&posixTime, NULL); + return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000)); +} + +static UDate getUTCtime_fake() { + static UMutex fakeClockMutex; + umtx_lock(&fakeClockMutex); + if(!fakeClock_set) { + UDate real = getUTCtime_real(); + const char *fake_start = getenv("U_FAKETIME_START"); + if((fake_start!=NULL) && (fake_start[0]!=0)) { + sscanf(fake_start,"%lf",&fakeClock_t0); + fakeClock_dt = fakeClock_t0 - real; + fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, so the ICU clock will start at a preset value\n" + "env variable U_FAKETIME_START=%.0f (%s) for an offset of %.0f ms from the current time %.0f\n", + fakeClock_t0, fake_start, fakeClock_dt, real); + } else { + fakeClock_dt = 0; + fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, but U_FAKETIME_START was not set.\n" + "Set U_FAKETIME_START to the number of milliseconds since 1/1/1970 to set the ICU clock.\n"); + } + fakeClock_set = TRUE; + } + umtx_unlock(&fakeClockMutex); + + return getUTCtime_real() + fakeClock_dt; +} +#endif + +#if U_PLATFORM_USES_ONLY_WIN32_API +typedef union { + int64_t int64; + FILETIME fileTime; +} FileTimeConversion; /* This is like a ULARGE_INTEGER */ + +/* Number of 100 nanoseconds from 1/1/1601 to 1/1/1970 */ +#define EPOCH_BIAS INT64_C(116444736000000000) +#define HECTONANOSECOND_PER_MILLISECOND 10000 + +#endif + +/*--------------------------------------------------------------------------- + Universal Implementations + These are designed to work on all platforms. Try these, and if they + don't work on your platform, then special case your platform with new + implementations. +---------------------------------------------------------------------------*/ + +U_CAPI UDate U_EXPORT2 +uprv_getUTCtime() +{ +#if defined(U_DEBUG_FAKETIME) + return getUTCtime_fake(); /* Hook for overriding the clock */ +#else + return uprv_getRawUTCtime(); +#endif +} + +/* Return UTC (GMT) time measured in milliseconds since 0:00 on 1/1/70.*/ +U_CAPI UDate U_EXPORT2 +uprv_getRawUTCtime() +{ +#if U_PLATFORM_USES_ONLY_WIN32_API + + FileTimeConversion winTime; + GetSystemTimeAsFileTime(&winTime.fileTime); + return (UDate)((winTime.int64 - EPOCH_BIAS) / HECTONANOSECOND_PER_MILLISECOND); +#else + +#if HAVE_GETTIMEOFDAY + struct timeval posixTime; + gettimeofday(&posixTime, NULL); + return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000)); +#else + time_t epochtime; + time(&epochtime); + return (UDate)epochtime * U_MILLIS_PER_SECOND; +#endif + +#endif +} + +/*----------------------------------------------------------------------------- + IEEE 754 + These methods detect and return NaN and infinity values for doubles + conforming to IEEE 754. Platforms which support this standard include X86, + Mac 680x0, Mac PowerPC, AIX RS/6000, and most others. + If this doesn't work on your platform, you have non-IEEE floating-point, and + will need to code your own versions. A naive implementation is to return 0.0 + for getNaN and getInfinity, and false for isNaN and isInfinite. + ---------------------------------------------------------------------------*/ + +U_CAPI UBool U_EXPORT2 +uprv_isNaN(double number) +{ +#if IEEE_754 + BitPatternConversion convertedNumber; + convertedNumber.d64 = number; + /* Infinity is 0x7FF0000000000000U. Anything greater than that is a NaN */ + return (UBool)((convertedNumber.i64 & U_INT64_MAX) > gInf.i64); + +#elif U_PLATFORM == U_PF_OS390 + uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number, + sizeof(uint32_t)); + uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number, + sizeof(uint32_t)); + + return ((highBits & 0x7F080000L) == 0x7F080000L) && + (lowBits == 0x00000000L); + +#else + /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/ + /* you'll need to replace this default implementation with what's correct*/ + /* for your platform.*/ + return number != number; +#endif +} + +U_CAPI UBool U_EXPORT2 +uprv_isInfinite(double number) +{ +#if IEEE_754 + BitPatternConversion convertedNumber; + convertedNumber.d64 = number; + /* Infinity is exactly 0x7FF0000000000000U. */ + return (UBool)((convertedNumber.i64 & U_INT64_MAX) == gInf.i64); +#elif U_PLATFORM == U_PF_OS390 + uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number, + sizeof(uint32_t)); + uint32_t lowBits = *(uint32_t*)u_bottomNBytesOfDouble(&number, + sizeof(uint32_t)); + + return ((highBits & ~SIGN) == 0x70FF0000L) && (lowBits == 0x00000000L); + +#else + /* If your platform doesn't support IEEE 754 but *does* have an infinity*/ + /* value, you'll need to replace this default implementation with what's*/ + /* correct for your platform.*/ + return number == (2.0 * number); +#endif +} + +U_CAPI UBool U_EXPORT2 +uprv_isPositiveInfinity(double number) +{ +#if IEEE_754 || U_PLATFORM == U_PF_OS390 + return (UBool)(number > 0 && uprv_isInfinite(number)); +#else + return uprv_isInfinite(number); +#endif +} + +U_CAPI UBool U_EXPORT2 +uprv_isNegativeInfinity(double number) +{ +#if IEEE_754 || U_PLATFORM == U_PF_OS390 + return (UBool)(number < 0 && uprv_isInfinite(number)); + +#else + uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number, + sizeof(uint32_t)); + return((highBits & SIGN) && uprv_isInfinite(number)); + +#endif +} + +U_CAPI double U_EXPORT2 +uprv_getNaN() +{ +#if IEEE_754 || U_PLATFORM == U_PF_OS390 + return gNan.d64; +#else + /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/ + /* you'll need to replace this default implementation with what's correct*/ + /* for your platform.*/ + return 0.0; +#endif +} + +U_CAPI double U_EXPORT2 +uprv_getInfinity() +{ +#if IEEE_754 || U_PLATFORM == U_PF_OS390 + return gInf.d64; +#else + /* If your platform doesn't support IEEE 754 but *does* have an infinity*/ + /* value, you'll need to replace this default implementation with what's*/ + /* correct for your platform.*/ + return 0.0; +#endif +} + +U_CAPI double U_EXPORT2 +uprv_floor(double x) +{ + return floor(x); +} + +U_CAPI double U_EXPORT2 +uprv_ceil(double x) +{ + return ceil(x); +} + +U_CAPI double U_EXPORT2 +uprv_round(double x) +{ + return uprv_floor(x + 0.5); +} + +U_CAPI double U_EXPORT2 +uprv_fabs(double x) +{ + return fabs(x); +} + +U_CAPI double U_EXPORT2 +uprv_modf(double x, double* y) +{ + return modf(x, y); +} + +U_CAPI double U_EXPORT2 +uprv_fmod(double x, double y) +{ + return fmod(x, y); +} + +U_CAPI double U_EXPORT2 +uprv_pow(double x, double y) +{ + /* This is declared as "double pow(double x, double y)" */ + return pow(x, y); +} + +U_CAPI double U_EXPORT2 +uprv_pow10(int32_t x) +{ + return pow(10.0, (double)x); +} + +U_CAPI double U_EXPORT2 +uprv_fmax(double x, double y) +{ +#if IEEE_754 + /* first handle NaN*/ + if(uprv_isNaN(x) || uprv_isNaN(y)) + return uprv_getNaN(); + + /* check for -0 and 0*/ + if(x == 0.0 && y == 0.0 && u_signBit(x)) + return y; + +#endif + + /* this should work for all flt point w/o NaN and Inf special cases */ + return (x > y ? x : y); +} + +U_CAPI double U_EXPORT2 +uprv_fmin(double x, double y) +{ +#if IEEE_754 + /* first handle NaN*/ + if(uprv_isNaN(x) || uprv_isNaN(y)) + return uprv_getNaN(); + + /* check for -0 and 0*/ + if(x == 0.0 && y == 0.0 && u_signBit(y)) + return y; + +#endif + + /* this should work for all flt point w/o NaN and Inf special cases */ + return (x > y ? y : x); +} + +U_CAPI UBool U_EXPORT2 +uprv_add32_overflow(int32_t a, int32_t b, int32_t* res) { + // NOTE: Some compilers (GCC, Clang) have primitives available, like __builtin_add_overflow. + // This function could be optimized by calling one of those primitives. + auto a64 = static_cast<int64_t>(a); + auto b64 = static_cast<int64_t>(b); + int64_t res64 = a64 + b64; + *res = static_cast<int32_t>(res64); + return res64 != *res; +} + +U_CAPI UBool U_EXPORT2 +uprv_mul32_overflow(int32_t a, int32_t b, int32_t* res) { + // NOTE: Some compilers (GCC, Clang) have primitives available, like __builtin_mul_overflow. + // This function could be optimized by calling one of those primitives. + auto a64 = static_cast<int64_t>(a); + auto b64 = static_cast<int64_t>(b); + int64_t res64 = a64 * b64; + *res = static_cast<int32_t>(res64); + return res64 != *res; +} + +/** + * Truncates the given double. + * trunc(3.3) = 3.0, trunc (-3.3) = -3.0 + * This is different than calling floor() or ceil(): + * floor(3.3) = 3, floor(-3.3) = -4 + * ceil(3.3) = 4, ceil(-3.3) = -3 + */ +U_CAPI double U_EXPORT2 +uprv_trunc(double d) +{ +#if IEEE_754 + /* handle error cases*/ + if(uprv_isNaN(d)) + return uprv_getNaN(); + if(uprv_isInfinite(d)) + return uprv_getInfinity(); + + if(u_signBit(d)) /* Signbit() picks up -0.0; d<0 does not. */ + return ceil(d); + else + return floor(d); + +#else + return d >= 0 ? floor(d) : ceil(d); + +#endif +} + +/** + * Return the largest positive number that can be represented by an integer + * type of arbitrary bit length. + */ +U_CAPI double U_EXPORT2 +uprv_maxMantissa(void) +{ + return pow(2.0, DBL_MANT_DIG + 1.0) - 1.0; +} + +U_CAPI double U_EXPORT2 +uprv_log(double d) +{ + return log(d); +} + +U_CAPI void * U_EXPORT2 +uprv_maximumPtr(void * base) +{ +#if U_PLATFORM == U_PF_OS400 + /* + * With the provided function we should never be out of range of a given segment + * (a traditional/typical segment that is). Our segments have 5 bytes for the + * id and 3 bytes for the offset. The key is that the casting takes care of + * only retrieving the offset portion minus x1000. Hence, the smallest offset + * seen in a program is x001000 and when casted to an int would be 0. + * That's why we can only add 0xffefff. Otherwise, we would exceed the segment. + * + * Currently, 16MB is the current addressing limitation on i5/OS if the activation is + * non-TERASPACE. If it is TERASPACE it is 2GB - 4k(header information). + * This function determines the activation based on the pointer that is passed in and + * calculates the appropriate maximum available size for + * each pointer type (TERASPACE and non-TERASPACE) + * + * Unlike other operating systems, the pointer model isn't determined at + * compile time on i5/OS. + */ + if ((base != NULL) && (_TESTPTR(base, _C_TERASPACE_CHECK))) { + /* if it is a TERASPACE pointer the max is 2GB - 4k */ + return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0x7fffefff))); + } + /* otherwise 16MB since NULL ptr is not checkable or the ptr is not TERASPACE */ + return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0xffefff))); + +#else + return U_MAX_PTR(base); +#endif +} + +/*--------------------------------------------------------------------------- + Platform-specific Implementations + Try these, and if they don't work on your platform, then special case your + platform with new implementations. + ---------------------------------------------------------------------------*/ + +/* Generic time zone layer -------------------------------------------------- */ + +/* Time zone utilities */ +U_CAPI void U_EXPORT2 +uprv_tzset() +{ +#if defined(U_TZSET) + U_TZSET(); +#else + /* no initialization*/ +#endif +} + +U_CAPI int32_t U_EXPORT2 +uprv_timezone() +{ +#ifdef U_TIMEZONE + return U_TIMEZONE; +#else + time_t t, t1, t2; + struct tm tmrec; + int32_t tdiff = 0; + + time(&t); + uprv_memcpy( &tmrec, localtime(&t), sizeof(tmrec) ); +#if U_PLATFORM != U_PF_IPHONE + UBool dst_checked = (tmrec.tm_isdst != 0); /* daylight savings time is checked*/ +#endif + t1 = mktime(&tmrec); /* local time in seconds*/ + uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) ); + t2 = mktime(&tmrec); /* GMT (or UTC) in seconds*/ + tdiff = t2 - t1; + +#if U_PLATFORM != U_PF_IPHONE + /* imitate NT behaviour, which returns same timezone offset to GMT for + winter and summer. + This does not work on all platforms. For instance, on glibc on Linux + and on Mac OS 10.5, tdiff calculated above remains the same + regardless of whether DST is in effect or not. iOS is another + platform where this does not work. Linux + glibc and Mac OS 10.5 + have U_TIMEZONE defined so that this code is not reached. + */ + if (dst_checked) + tdiff += 3600; +#endif + return tdiff; +#endif +} + +/* Note that U_TZNAME does *not* have to be tzname, but if it is, + some platforms need to have it declared here. */ + +#if defined(U_TZNAME) && (U_PLATFORM == U_PF_IRIX || U_PLATFORM_IS_DARWIN_BASED) +/* RS6000 and others reject char **tzname. */ +extern U_IMPORT char *U_TZNAME[]; +#endif + +#if !UCONFIG_NO_FILE_IO && ((U_PLATFORM_IS_DARWIN_BASED && (U_PLATFORM != U_PF_IPHONE || defined(U_TIMEZONE))) || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS) +/* These platforms are likely to use Olson timezone IDs. */ +/* common targets of the symbolic link at TZDEFAULT are: + * "/usr/share/zoneinfo/<olsonID>" default, older Linux distros, macOS to 10.12 + * "../usr/share/zoneinfo/<olsonID>" newer Linux distros: Red Hat Enterprise Linux 7, Ubuntu 16, SuSe Linux 12 + * "/usr/share/lib/zoneinfo/<olsonID>" Solaris + * "../usr/share/lib/zoneinfo/<olsonID>" Solaris + * "/var/db/timezone/zoneinfo/<olsonID>" macOS 10.13 + * To avoid checking lots of paths, just check that the target path + * before the <olsonID> ends with "/zoneinfo/", and the <olsonID> is valid. + */ + +#define CHECK_LOCALTIME_LINK 1 +#if U_PLATFORM_IS_DARWIN_BASED +#include <tzfile.h> +#define TZZONEINFO (TZDIR "/") +#elif U_PLATFORM == U_PF_SOLARIS +#define TZDEFAULT "/etc/localtime" +#define TZZONEINFO "/usr/share/lib/zoneinfo/" +#define TZ_ENV_CHECK "localtime" +#else +#define TZDEFAULT "/etc/localtime" +#define TZZONEINFO "/usr/share/zoneinfo/" +#endif +#define TZZONEINFOTAIL "/zoneinfo/" +#if U_HAVE_DIRENT_H +#define TZFILE_SKIP "posixrules" /* tz file to skip when searching. */ +/* Some Linux distributions have 'localtime' in /usr/share/zoneinfo + symlinked to /etc/localtime, which makes searchForTZFile return + 'localtime' when it's the first match. */ +#define TZFILE_SKIP2 "localtime" +#define SEARCH_TZFILE +#include <dirent.h> /* Needed to search through system timezone files */ +#endif +static char gTimeZoneBuffer[PATH_MAX]; +static char *gTimeZoneBufferPtr = NULL; +#endif + +#if !U_PLATFORM_USES_ONLY_WIN32_API +#define isNonDigit(ch) (ch < '0' || '9' < ch) +static UBool isValidOlsonID(const char *id) { + int32_t idx = 0; + + /* Determine if this is something like Iceland (Olson ID) + or AST4ADT (non-Olson ID) */ + while (id[idx] && isNonDigit(id[idx]) && id[idx] != ',') { + idx++; + } + + /* If we went through the whole string, then it might be okay. + The timezone is sometimes set to "CST-7CDT", "CST6CDT5,J129,J131/19:30", + "GRNLNDST3GRNLNDDT" or similar, so we cannot use it. + The rest of the time it could be an Olson ID. George */ + return (UBool)(id[idx] == 0 + || uprv_strcmp(id, "PST8PDT") == 0 + || uprv_strcmp(id, "MST7MDT") == 0 + || uprv_strcmp(id, "CST6CDT") == 0 + || uprv_strcmp(id, "EST5EDT") == 0); +} + +/* On some Unix-like OS, 'posix' subdirectory in + /usr/share/zoneinfo replicates the top-level contents. 'right' + subdirectory has the same set of files, but individual files + are different from those in the top-level directory or 'posix' + because 'right' has files for TAI (Int'l Atomic Time) while 'posix' + has files for UTC. + When the first match for /etc/localtime is in either of them + (usually in posix because 'right' has different file contents), + or TZ environment variable points to one of them, createTimeZone + fails because, say, 'posix/America/New_York' is not an Olson + timezone id ('America/New_York' is). So, we have to skip + 'posix/' and 'right/' at the beginning. */ +static void skipZoneIDPrefix(const char** id) { + if (uprv_strncmp(*id, "posix/", 6) == 0 + || uprv_strncmp(*id, "right/", 6) == 0) + { + *id += 6; + } +} +#endif + +#if defined(U_TZNAME) && !U_PLATFORM_USES_ONLY_WIN32_API + +#define CONVERT_HOURS_TO_SECONDS(offset) (int32_t)(offset*3600) +typedef struct OffsetZoneMapping { + int32_t offsetSeconds; + int32_t daylightType; /* 0=U_DAYLIGHT_NONE, 1=daylight in June-U_DAYLIGHT_JUNE, 2=daylight in December=U_DAYLIGHT_DECEMBER*/ + const char *stdID; + const char *dstID; + const char *olsonID; +} OffsetZoneMapping; + +enum { U_DAYLIGHT_NONE=0,U_DAYLIGHT_JUNE=1,U_DAYLIGHT_DECEMBER=2 }; + +/* +This list tries to disambiguate a set of abbreviated timezone IDs and offsets +and maps it to an Olson ID. +Before adding anything to this list, take a look at +icu/source/tools/tzcode/tz.alias +Sometimes no daylight savings (0) is important to define due to aliases. +This list can be tested with icu/source/test/compat/tzone.pl +More values could be added to daylightType to increase precision. +*/ +static const struct OffsetZoneMapping OFFSET_ZONE_MAPPINGS[] = { + {-45900, 2, "CHAST", "CHADT", "Pacific/Chatham"}, + {-43200, 1, "PETT", "PETST", "Asia/Kamchatka"}, + {-43200, 2, "NZST", "NZDT", "Pacific/Auckland"}, + {-43200, 1, "ANAT", "ANAST", "Asia/Anadyr"}, + {-39600, 1, "MAGT", "MAGST", "Asia/Magadan"}, + {-37800, 2, "LHST", "LHST", "Australia/Lord_Howe"}, + {-36000, 2, "EST", "EST", "Australia/Sydney"}, + {-36000, 1, "SAKT", "SAKST", "Asia/Sakhalin"}, + {-36000, 1, "VLAT", "VLAST", "Asia/Vladivostok"}, + {-34200, 2, "CST", "CST", "Australia/South"}, + {-32400, 1, "YAKT", "YAKST", "Asia/Yakutsk"}, + {-32400, 1, "CHOT", "CHOST", "Asia/Choibalsan"}, + {-31500, 2, "CWST", "CWST", "Australia/Eucla"}, + {-28800, 1, "IRKT", "IRKST", "Asia/Irkutsk"}, + {-28800, 1, "ULAT", "ULAST", "Asia/Ulaanbaatar"}, + {-28800, 2, "WST", "WST", "Australia/West"}, + {-25200, 1, "HOVT", "HOVST", "Asia/Hovd"}, + {-25200, 1, "KRAT", "KRAST", "Asia/Krasnoyarsk"}, + {-21600, 1, "NOVT", "NOVST", "Asia/Novosibirsk"}, + {-21600, 1, "OMST", "OMSST", "Asia/Omsk"}, + {-18000, 1, "YEKT", "YEKST", "Asia/Yekaterinburg"}, + {-14400, 1, "SAMT", "SAMST", "Europe/Samara"}, + {-14400, 1, "AMT", "AMST", "Asia/Yerevan"}, + {-14400, 1, "AZT", "AZST", "Asia/Baku"}, + {-10800, 1, "AST", "ADT", "Asia/Baghdad"}, + {-10800, 1, "MSK", "MSD", "Europe/Moscow"}, + {-10800, 1, "VOLT", "VOLST", "Europe/Volgograd"}, + {-7200, 0, "EET", "CEST", "Africa/Tripoli"}, + {-7200, 1, "EET", "EEST", "Europe/Athens"}, /* Conflicts with Africa/Cairo */ + {-7200, 1, "IST", "IDT", "Asia/Jerusalem"}, + {-3600, 0, "CET", "WEST", "Africa/Algiers"}, + {-3600, 2, "WAT", "WAST", "Africa/Windhoek"}, + {0, 1, "GMT", "IST", "Europe/Dublin"}, + {0, 1, "GMT", "BST", "Europe/London"}, + {0, 0, "WET", "WEST", "Africa/Casablanca"}, + {0, 0, "WET", "WET", "Africa/El_Aaiun"}, + {3600, 1, "AZOT", "AZOST", "Atlantic/Azores"}, + {3600, 1, "EGT", "EGST", "America/Scoresbysund"}, + {10800, 1, "PMST", "PMDT", "America/Miquelon"}, + {10800, 2, "UYT", "UYST", "America/Montevideo"}, + {10800, 1, "WGT", "WGST", "America/Godthab"}, + {10800, 2, "BRT", "BRST", "Brazil/East"}, + {12600, 1, "NST", "NDT", "America/St_Johns"}, + {14400, 1, "AST", "ADT", "Canada/Atlantic"}, + {14400, 2, "AMT", "AMST", "America/Cuiaba"}, + {14400, 2, "CLT", "CLST", "Chile/Continental"}, + {14400, 2, "FKT", "FKST", "Atlantic/Stanley"}, + {14400, 2, "PYT", "PYST", "America/Asuncion"}, + {18000, 1, "CST", "CDT", "America/Havana"}, + {18000, 1, "EST", "EDT", "US/Eastern"}, /* Conflicts with America/Grand_Turk */ + {21600, 2, "EAST", "EASST", "Chile/EasterIsland"}, + {21600, 0, "CST", "MDT", "Canada/Saskatchewan"}, + {21600, 0, "CST", "CDT", "America/Guatemala"}, + {21600, 1, "CST", "CDT", "US/Central"}, /* Conflicts with Mexico/General */ + {25200, 1, "MST", "MDT", "US/Mountain"}, /* Conflicts with Mexico/BajaSur */ + {28800, 0, "PST", "PST", "Pacific/Pitcairn"}, + {28800, 1, "PST", "PDT", "US/Pacific"}, /* Conflicts with Mexico/BajaNorte */ + {32400, 1, "AKST", "AKDT", "US/Alaska"}, + {36000, 1, "HAST", "HADT", "US/Aleutian"} +}; + +/*#define DEBUG_TZNAME*/ + +static const char* remapShortTimeZone(const char *stdID, const char *dstID, int32_t daylightType, int32_t offset) +{ + int32_t idx; +#ifdef DEBUG_TZNAME + fprintf(stderr, "TZ=%s std=%s dst=%s daylight=%d offset=%d\n", getenv("TZ"), stdID, dstID, daylightType, offset); +#endif + for (idx = 0; idx < UPRV_LENGTHOF(OFFSET_ZONE_MAPPINGS); idx++) + { + if (offset == OFFSET_ZONE_MAPPINGS[idx].offsetSeconds + && daylightType == OFFSET_ZONE_MAPPINGS[idx].daylightType + && strcmp(OFFSET_ZONE_MAPPINGS[idx].stdID, stdID) == 0 + && strcmp(OFFSET_ZONE_MAPPINGS[idx].dstID, dstID) == 0) + { + return OFFSET_ZONE_MAPPINGS[idx].olsonID; + } + } + return NULL; +} +#endif + +#ifdef SEARCH_TZFILE +#define MAX_READ_SIZE 512 + +typedef struct DefaultTZInfo { + char* defaultTZBuffer; + int64_t defaultTZFileSize; + FILE* defaultTZFilePtr; + UBool defaultTZstatus; + int32_t defaultTZPosition; +} DefaultTZInfo; + +/* + * This method compares the two files given to see if they are a match. + * It is currently use to compare two TZ files. + */ +static UBool compareBinaryFiles(const char* defaultTZFileName, const char* TZFileName, DefaultTZInfo* tzInfo) { + FILE* file; + int64_t sizeFile; + int64_t sizeFileLeft; + int32_t sizeFileRead; + int32_t sizeFileToRead; + char bufferFile[MAX_READ_SIZE]; + UBool result = TRUE; + + if (tzInfo->defaultTZFilePtr == NULL) { + tzInfo->defaultTZFilePtr = fopen(defaultTZFileName, "r"); + } + file = fopen(TZFileName, "r"); + + tzInfo->defaultTZPosition = 0; /* reset position to begin search */ + + if (file != NULL && tzInfo->defaultTZFilePtr != NULL) { + /* First check that the file size are equal. */ + if (tzInfo->defaultTZFileSize == 0) { + fseek(tzInfo->defaultTZFilePtr, 0, SEEK_END); + tzInfo->defaultTZFileSize = ftell(tzInfo->defaultTZFilePtr); + } + fseek(file, 0, SEEK_END); + sizeFile = ftell(file); + sizeFileLeft = sizeFile; + + if (sizeFile != tzInfo->defaultTZFileSize) { + result = FALSE; + } else { + /* Store the data from the files in seperate buffers and + * compare each byte to determine equality. + */ + if (tzInfo->defaultTZBuffer == NULL) { + rewind(tzInfo->defaultTZFilePtr); + tzInfo->defaultTZBuffer = (char*)uprv_malloc(sizeof(char) * tzInfo->defaultTZFileSize); + sizeFileRead = fread(tzInfo->defaultTZBuffer, 1, tzInfo->defaultTZFileSize, tzInfo->defaultTZFilePtr); + } + rewind(file); + while(sizeFileLeft > 0) { + uprv_memset(bufferFile, 0, MAX_READ_SIZE); + sizeFileToRead = sizeFileLeft < MAX_READ_SIZE ? sizeFileLeft : MAX_READ_SIZE; + + sizeFileRead = fread(bufferFile, 1, sizeFileToRead, file); + if (memcmp(tzInfo->defaultTZBuffer + tzInfo->defaultTZPosition, bufferFile, sizeFileRead) != 0) { + result = FALSE; + break; + } + sizeFileLeft -= sizeFileRead; + tzInfo->defaultTZPosition += sizeFileRead; + } + } + } else { + result = FALSE; + } + + if (file != NULL) { + fclose(file); + } + + return result; +} + + +/* dirent also lists two entries: "." and ".." that we can safely ignore. */ +#define SKIP1 "." +#define SKIP2 ".." +static UBool U_CALLCONV putil_cleanup(void); +static CharString *gSearchTZFileResult = NULL; + +/* + * This method recursively traverses the directory given for a matching TZ file and returns the first match. + * This function is not thread safe - it uses a global, gSearchTZFileResult, to hold its results. + */ +static char* searchForTZFile(const char* path, DefaultTZInfo* tzInfo) { + DIR* dirp = NULL; + struct dirent* dirEntry = NULL; + char* result = NULL; + UErrorCode status = U_ZERO_ERROR; + + /* Save the current path */ + CharString curpath(path, -1, status); + if (U_FAILURE(status)) { + goto cleanupAndReturn; + } + + dirp = opendir(path); + if (dirp == NULL) { + goto cleanupAndReturn; + } + + if (gSearchTZFileResult == NULL) { + gSearchTZFileResult = new CharString; + if (gSearchTZFileResult == NULL) { + goto cleanupAndReturn; + } + ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup); + } + + /* Check each entry in the directory. */ + while((dirEntry = readdir(dirp)) != NULL) { + const char* dirName = dirEntry->d_name; + if (uprv_strcmp(dirName, SKIP1) != 0 && uprv_strcmp(dirName, SKIP2) != 0 + && uprv_strcmp(TZFILE_SKIP, dirName) != 0 && uprv_strcmp(TZFILE_SKIP2, dirName) != 0) { + /* Create a newpath with the new entry to test each entry in the directory. */ + CharString newpath(curpath, status); + newpath.append(dirName, -1, status); + if (U_FAILURE(status)) { + break; + } + + DIR* subDirp = NULL; + if ((subDirp = opendir(newpath.data())) != NULL) { + /* If this new path is a directory, make a recursive call with the newpath. */ + closedir(subDirp); + newpath.append('/', status); + if (U_FAILURE(status)) { + break; + } + result = searchForTZFile(newpath.data(), tzInfo); + /* + Have to get out here. Otherwise, we'd keep looking + and return the first match in the top-level directory + if there's a match in the top-level. If not, this function + would return NULL and set gTimeZoneBufferPtr to NULL in initDefault(). + It worked without this in most cases because we have a fallback of calling + localtime_r to figure out the default timezone. + */ + if (result != NULL) + break; + } else { + if(compareBinaryFiles(TZDEFAULT, newpath.data(), tzInfo)) { + int32_t amountToSkip = sizeof(TZZONEINFO) - 1; + if (amountToSkip > newpath.length()) { + amountToSkip = newpath.length(); + } + const char* zoneid = newpath.data() + amountToSkip; + skipZoneIDPrefix(&zoneid); + gSearchTZFileResult->clear(); + gSearchTZFileResult->append(zoneid, -1, status); + if (U_FAILURE(status)) { + break; + } + result = gSearchTZFileResult->data(); + /* Get out after the first one found. */ + break; + } + } + } + } + + cleanupAndReturn: + if (dirp) { + closedir(dirp); + } + return result; +} +#endif + +#if U_PLATFORM == U_PF_ANDROID +typedef int(system_property_read_callback)(const prop_info* info, + void (*callback)(void* cookie, + const char* name, + const char* value, + uint32_t serial), + void* cookie); +typedef int(system_property_get)(const char*, char*); + +static char gAndroidTimeZone[PROP_VALUE_MAX] = { '\0' }; + +static void u_property_read(void* cookie, const char* name, const char* value, + uint32_t serial) { + uprv_strcpy((char* )cookie, value); +} +#endif + +U_CAPI void U_EXPORT2 +uprv_tzname_clear_cache(void) +{ +#if U_PLATFORM == U_PF_ANDROID + /* Android's timezone is stored in system property. */ + gAndroidTimeZone[0] = '\0'; + void* libc = dlopen("libc.so", RTLD_NOLOAD); + if (libc) { + /* Android API 26+ has new API to get system property and old API + * (__system_property_get) is deprecated */ + system_property_read_callback* property_read_callback = + (system_property_read_callback*)dlsym( + libc, "__system_property_read_callback"); + if (property_read_callback) { + const prop_info* info = + __system_property_find("persist.sys.timezone"); + if (info) { + property_read_callback(info, &u_property_read, gAndroidTimeZone); + } + } else { + system_property_get* property_get = + (system_property_get*)dlsym(libc, "__system_property_get"); + if (property_get) { + property_get("persist.sys.timezone", gAndroidTimeZone); + } + } + dlclose(libc); + } +#endif + +#if defined(CHECK_LOCALTIME_LINK) && !defined(DEBUG_SKIP_LOCALTIME_LINK) + gTimeZoneBufferPtr = NULL; +#endif +} + +U_CAPI const char* U_EXPORT2 +uprv_tzname(int n) +{ + (void)n; // Avoid unreferenced parameter warning. + const char *tzid = NULL; +#if U_PLATFORM_USES_ONLY_WIN32_API + tzid = uprv_detectWindowsTimeZone(); + + if (tzid != NULL) { + return tzid; + } + +#ifndef U_TZNAME + // The return value is free'd in timezone.cpp on Windows because + // the other code path returns a pointer to a heap location. + // If we don't have a name already, then tzname wouldn't be any + // better, so just fall back. + return uprv_strdup(""); +#endif // !U_TZNAME + +#else + +/*#if U_PLATFORM_IS_DARWIN_BASED + int ret; + + tzid = getenv("TZFILE"); + if (tzid != NULL) { + return tzid; + } +#endif*/ + +/* This code can be temporarily disabled to test tzname resolution later on. */ +#ifndef DEBUG_TZNAME +#if U_PLATFORM == U_PF_ANDROID + tzid = gAndroidTimeZone; +#else + tzid = getenv("TZ"); +#endif + if (tzid != NULL && isValidOlsonID(tzid) +#if U_PLATFORM == U_PF_SOLARIS + /* When TZ equals localtime on Solaris, check the /etc/localtime file. */ + && uprv_strcmp(tzid, TZ_ENV_CHECK) != 0 +#endif + ) { + /* The colon forces tzset() to treat the remainder as zoneinfo path */ + if (tzid[0] == ':') { + tzid++; + } + /* This might be a good Olson ID. */ + skipZoneIDPrefix(&tzid); + return tzid; + } + /* else U_TZNAME will give a better result. */ +#endif + +#if defined(CHECK_LOCALTIME_LINK) && !defined(DEBUG_SKIP_LOCALTIME_LINK) + /* Caller must handle threading issues */ + if (gTimeZoneBufferPtr == NULL) { + /* + This is a trick to look at the name of the link to get the Olson ID + because the tzfile contents is underspecified. + This isn't guaranteed to work because it may not be a symlink. + */ + int32_t ret = (int32_t)readlink(TZDEFAULT, gTimeZoneBuffer, sizeof(gTimeZoneBuffer)-1); + if (0 < ret) { + int32_t tzZoneInfoTailLen = uprv_strlen(TZZONEINFOTAIL); + gTimeZoneBuffer[ret] = 0; + char * tzZoneInfoTailPtr = uprv_strstr(gTimeZoneBuffer, TZZONEINFOTAIL); + + if (tzZoneInfoTailPtr != NULL + && isValidOlsonID(tzZoneInfoTailPtr + tzZoneInfoTailLen)) + { + return (gTimeZoneBufferPtr = tzZoneInfoTailPtr + tzZoneInfoTailLen); + } + } else { +#if defined(SEARCH_TZFILE) + DefaultTZInfo* tzInfo = (DefaultTZInfo*)uprv_malloc(sizeof(DefaultTZInfo)); + if (tzInfo != NULL) { + tzInfo->defaultTZBuffer = NULL; + tzInfo->defaultTZFileSize = 0; + tzInfo->defaultTZFilePtr = NULL; + tzInfo->defaultTZstatus = FALSE; + tzInfo->defaultTZPosition = 0; + + gTimeZoneBufferPtr = searchForTZFile(TZZONEINFO, tzInfo); + + /* Free previously allocated memory */ + if (tzInfo->defaultTZBuffer != NULL) { + uprv_free(tzInfo->defaultTZBuffer); + } + if (tzInfo->defaultTZFilePtr != NULL) { + fclose(tzInfo->defaultTZFilePtr); + } + uprv_free(tzInfo); + } + + if (gTimeZoneBufferPtr != NULL && isValidOlsonID(gTimeZoneBufferPtr)) { + return gTimeZoneBufferPtr; + } +#endif + } + } + else { + return gTimeZoneBufferPtr; + } +#endif +#endif + +#ifdef U_TZNAME +#if U_PLATFORM_USES_ONLY_WIN32_API + /* The return value is free'd in timezone.cpp on Windows because + * the other code path returns a pointer to a heap location. */ + return uprv_strdup(U_TZNAME[n]); +#else + /* + U_TZNAME is usually a non-unique abbreviation, which isn't normally usable. + So we remap the abbreviation to an olson ID. + + Since Windows exposes a little more timezone information, + we normally don't use this code on Windows because + uprv_detectWindowsTimeZone should have already given the correct answer. + */ + { + struct tm juneSol, decemberSol; + int daylightType; + static const time_t juneSolstice=1182478260; /*2007-06-21 18:11 UT*/ + static const time_t decemberSolstice=1198332540; /*2007-12-22 06:09 UT*/ + + /* This probing will tell us when daylight savings occurs. */ + localtime_r(&juneSolstice, &juneSol); + localtime_r(&decemberSolstice, &decemberSol); + if(decemberSol.tm_isdst > 0) { + daylightType = U_DAYLIGHT_DECEMBER; + } else if(juneSol.tm_isdst > 0) { + daylightType = U_DAYLIGHT_JUNE; + } else { + daylightType = U_DAYLIGHT_NONE; + } + tzid = remapShortTimeZone(U_TZNAME[0], U_TZNAME[1], daylightType, uprv_timezone()); + if (tzid != NULL) { + return tzid; + } + } + return U_TZNAME[n]; +#endif +#else + return ""; +#endif +} + +/* Get and set the ICU data directory --------------------------------------- */ + +static icu::UInitOnce gDataDirInitOnce = U_INITONCE_INITIALIZER; +static char *gDataDirectory = NULL; + +UInitOnce gTimeZoneFilesInitOnce = U_INITONCE_INITIALIZER; +static CharString *gTimeZoneFilesDirectory = NULL; + +#if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API + static const char *gCorrectedPOSIXLocale = NULL; /* Sometimes heap allocated */ + static bool gCorrectedPOSIXLocaleHeapAllocated = false; +#endif + +static UBool U_CALLCONV putil_cleanup(void) +{ + if (gDataDirectory && *gDataDirectory) { + uprv_free(gDataDirectory); + } + gDataDirectory = NULL; + gDataDirInitOnce.reset(); + + delete gTimeZoneFilesDirectory; + gTimeZoneFilesDirectory = NULL; + gTimeZoneFilesInitOnce.reset(); + +#ifdef SEARCH_TZFILE + delete gSearchTZFileResult; + gSearchTZFileResult = NULL; +#endif + +#if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API + if (gCorrectedPOSIXLocale && gCorrectedPOSIXLocaleHeapAllocated) { + uprv_free(const_cast<char *>(gCorrectedPOSIXLocale)); + gCorrectedPOSIXLocale = NULL; + gCorrectedPOSIXLocaleHeapAllocated = false; + } +#endif + return TRUE; +} + +/* + * Set the data directory. + * Make a copy of the passed string, and set the global data dir to point to it. + */ +U_CAPI void U_EXPORT2 +u_setDataDirectory(const char *directory) { + char *newDataDir; + int32_t length; + + if(directory==NULL || *directory==0) { + /* A small optimization to prevent the malloc and copy when the + shared library is used, and this is a way to make sure that NULL + is never returned. + */ + newDataDir = (char *)""; + } + else { + length=(int32_t)uprv_strlen(directory); + newDataDir = (char *)uprv_malloc(length + 2); + /* Exit out if newDataDir could not be created. */ + if (newDataDir == NULL) { + return; + } + uprv_strcpy(newDataDir, directory); + +#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR) + { + char *p; + while((p = uprv_strchr(newDataDir, U_FILE_ALT_SEP_CHAR)) != NULL) { + *p = U_FILE_SEP_CHAR; + } + } +#endif + } + + if (gDataDirectory && *gDataDirectory) { + uprv_free(gDataDirectory); + } + gDataDirectory = newDataDir; + ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup); +} + +U_CAPI UBool U_EXPORT2 +uprv_pathIsAbsolute(const char *path) +{ + if(!path || !*path) { + return FALSE; + } + + if(*path == U_FILE_SEP_CHAR) { + return TRUE; + } + +#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR) + if(*path == U_FILE_ALT_SEP_CHAR) { + return TRUE; + } +#endif + +#if U_PLATFORM_USES_ONLY_WIN32_API + if( (((path[0] >= 'A') && (path[0] <= 'Z')) || + ((path[0] >= 'a') && (path[0] <= 'z'))) && + path[1] == ':' ) { + return TRUE; + } +#endif + + return FALSE; +} + +/* Backup setting of ICU_DATA_DIR_PREFIX_ENV_VAR + (needed for some Darwin ICU build environments) */ +#if U_PLATFORM_IS_DARWIN_BASED && TARGET_OS_SIMULATOR +# if !defined(ICU_DATA_DIR_PREFIX_ENV_VAR) +# define ICU_DATA_DIR_PREFIX_ENV_VAR "IPHONE_SIMULATOR_ROOT" +# endif +#endif + +#if defined(ICU_DATA_DIR_WINDOWS) +// Helper function to get the ICU Data Directory under the Windows directory location. +static BOOL U_CALLCONV getIcuDataDirectoryUnderWindowsDirectory(char* directoryBuffer, UINT bufferLength) +{ + wchar_t windowsPath[MAX_PATH]; + char windowsPathUtf8[MAX_PATH]; + + UINT length = GetSystemWindowsDirectoryW(windowsPath, UPRV_LENGTHOF(windowsPath)); + if ((length > 0) && (length < (UPRV_LENGTHOF(windowsPath) - 1))) { + // Convert UTF-16 to a UTF-8 string. + UErrorCode status = U_ZERO_ERROR; + int32_t windowsPathUtf8Len = 0; + u_strToUTF8(windowsPathUtf8, static_cast<int32_t>(UPRV_LENGTHOF(windowsPathUtf8)), + &windowsPathUtf8Len, reinterpret_cast<const UChar*>(windowsPath), -1, &status); + + if (U_SUCCESS(status) && (status != U_STRING_NOT_TERMINATED_WARNING) && + (windowsPathUtf8Len < (UPRV_LENGTHOF(windowsPathUtf8) - 1))) { + // Ensure it always has a separator, so we can append the ICU data path. + if (windowsPathUtf8[windowsPathUtf8Len - 1] != U_FILE_SEP_CHAR) { + windowsPathUtf8[windowsPathUtf8Len++] = U_FILE_SEP_CHAR; + windowsPathUtf8[windowsPathUtf8Len] = '\0'; + } + // Check if the concatenated string will fit. + if ((windowsPathUtf8Len + UPRV_LENGTHOF(ICU_DATA_DIR_WINDOWS)) < bufferLength) { + uprv_strcpy(directoryBuffer, windowsPathUtf8); + uprv_strcat(directoryBuffer, ICU_DATA_DIR_WINDOWS); + return TRUE; + } + } + } + + return FALSE; +} +#endif + +static void U_CALLCONV dataDirectoryInitFn() { + /* If we already have the directory, then return immediately. Will happen if user called + * u_setDataDirectory(). + */ + if (gDataDirectory) { + return; + } + + const char *path = NULL; +#if defined(ICU_DATA_DIR_PREFIX_ENV_VAR) + char datadir_path_buffer[PATH_MAX]; +#endif + + /* + When ICU_NO_USER_DATA_OVERRIDE is defined, users aren't allowed to + override ICU's data with the ICU_DATA environment variable. This prevents + problems where multiple custom copies of ICU's specific version of data + are installed on a system. Either the application must define the data + directory with u_setDataDirectory, define ICU_DATA_DIR when compiling + ICU, set the data with udata_setCommonData or trust that all of the + required data is contained in ICU's data library that contains + the entry point defined by U_ICUDATA_ENTRY_POINT. + + There may also be some platforms where environment variables + are not allowed. + */ +# if !defined(ICU_NO_USER_DATA_OVERRIDE) && !UCONFIG_NO_FILE_IO + /* First try to get the environment variable */ +# if U_PLATFORM_HAS_WINUWP_API == 0 // Windows UWP does not support getenv + path=getenv("ICU_DATA"); +# endif +# endif + + /* ICU_DATA_DIR may be set as a compile option. + * U_ICU_DATA_DEFAULT_DIR is provided and is set by ICU at compile time + * and is used only when data is built in archive mode eliminating the need + * for ICU_DATA_DIR to be set. U_ICU_DATA_DEFAULT_DIR is set to the installation + * directory of the data dat file. Users should use ICU_DATA_DIR if they want to + * set their own path. + */ +#if defined(ICU_DATA_DIR) || defined(U_ICU_DATA_DEFAULT_DIR) + if(path==NULL || *path==0) { +# if defined(ICU_DATA_DIR_PREFIX_ENV_VAR) + const char *prefix = getenv(ICU_DATA_DIR_PREFIX_ENV_VAR); +# endif +# ifdef ICU_DATA_DIR + path=ICU_DATA_DIR; +# else + path=U_ICU_DATA_DEFAULT_DIR; +# endif +# if defined(ICU_DATA_DIR_PREFIX_ENV_VAR) + if (prefix != NULL) { + snprintf(datadir_path_buffer, PATH_MAX, "%s%s", prefix, path); + path=datadir_path_buffer; + } +# endif + } +#endif + +#if defined(ICU_DATA_DIR_WINDOWS) + char datadir_path_buffer[MAX_PATH]; + if (getIcuDataDirectoryUnderWindowsDirectory(datadir_path_buffer, UPRV_LENGTHOF(datadir_path_buffer))) { + path = datadir_path_buffer; + } +#endif + + if(path==NULL) { + /* It looks really bad, set it to something. */ + path = ""; + } + + u_setDataDirectory(path); + return; +} + +U_CAPI const char * U_EXPORT2 +u_getDataDirectory(void) { + umtx_initOnce(gDataDirInitOnce, &dataDirectoryInitFn); + return gDataDirectory; +} + +static void setTimeZoneFilesDir(const char *path, UErrorCode &status) { + if (U_FAILURE(status)) { + return; + } + gTimeZoneFilesDirectory->clear(); + gTimeZoneFilesDirectory->append(path, status); +#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR) + char *p = gTimeZoneFilesDirectory->data(); + while ((p = uprv_strchr(p, U_FILE_ALT_SEP_CHAR)) != NULL) { + *p = U_FILE_SEP_CHAR; + } +#endif +} + +#define TO_STRING(x) TO_STRING_2(x) +#define TO_STRING_2(x) #x + +static void U_CALLCONV TimeZoneDataDirInitFn(UErrorCode &status) { + U_ASSERT(gTimeZoneFilesDirectory == NULL); + ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup); + gTimeZoneFilesDirectory = new CharString(); + if (gTimeZoneFilesDirectory == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + + const char *dir = ""; + +#if defined(ICU_TIMEZONE_FILES_DIR_PREFIX_ENV_VAR) + char timezonefilesdir_path_buffer[PATH_MAX]; + const char *prefix = getenv(ICU_TIMEZONE_FILES_DIR_PREFIX_ENV_VAR); +#endif + +#if U_PLATFORM_HAS_WINUWP_API == 1 +// The UWP version does not support the environment variable setting. + +# if defined(ICU_DATA_DIR_WINDOWS) + // When using the Windows system data, we can possibly pick up time zone data from the Windows directory. + char datadir_path_buffer[MAX_PATH]; + if (getIcuDataDirectoryUnderWindowsDirectory(datadir_path_buffer, UPRV_LENGTHOF(datadir_path_buffer))) { + dir = datadir_path_buffer; + } +# endif + +#else + dir = getenv("ICU_TIMEZONE_FILES_DIR"); +#endif // U_PLATFORM_HAS_WINUWP_API + +#if defined(U_TIMEZONE_FILES_DIR) + if (dir == NULL) { + // Build time configuration setting. + dir = TO_STRING(U_TIMEZONE_FILES_DIR); + } +#endif + + if (dir == NULL) { + dir = ""; + } + +#if defined(ICU_TIMEZONE_FILES_DIR_PREFIX_ENV_VAR) + if (prefix != NULL) { + snprintf(timezonefilesdir_path_buffer, PATH_MAX, "%s%s", prefix, dir); + dir = timezonefilesdir_path_buffer; + } +#endif + + setTimeZoneFilesDir(dir, status); +} + + +U_CAPI const char * U_EXPORT2 +u_getTimeZoneFilesDirectory(UErrorCode *status) { + umtx_initOnce(gTimeZoneFilesInitOnce, &TimeZoneDataDirInitFn, *status); + return U_SUCCESS(*status) ? gTimeZoneFilesDirectory->data() : ""; +} + +U_CAPI void U_EXPORT2 +u_setTimeZoneFilesDirectory(const char *path, UErrorCode *status) { + umtx_initOnce(gTimeZoneFilesInitOnce, &TimeZoneDataDirInitFn, *status); + setTimeZoneFilesDir(path, *status); + + // Note: this function does some extra churn, first setting based on the + // environment, then immediately replacing with the value passed in. + // The logic is simpler that way, and performance shouldn't be an issue. +} + + +#if U_POSIX_LOCALE +/* A helper function used by uprv_getPOSIXIDForDefaultLocale and + * uprv_getPOSIXIDForDefaultCodepage. Returns the posix locale id for + * LC_CTYPE and LC_MESSAGES. It doesn't support other locale categories. + */ +static const char *uprv_getPOSIXIDForCategory(int category) +{ + const char* posixID = NULL; + if (category == LC_MESSAGES || category == LC_CTYPE) { + /* + * On Solaris two different calls to setlocale can result in + * different values. Only get this value once. + * + * We must check this first because an application can set this. + * + * LC_ALL can't be used because it's platform dependent. The LANG + * environment variable seems to affect LC_CTYPE variable by default. + * Here is what setlocale(LC_ALL, NULL) can return. + * HPUX can return 'C C C C C C C' + * Solaris can return /en_US/C/C/C/C/C on the second try. + * Linux can return LC_CTYPE=C;LC_NUMERIC=C;... + * + * The default codepage detection also needs to use LC_CTYPE. + * + * Do not call setlocale(LC_*, "")! Using an empty string instead + * of NULL, will modify the libc behavior. + */ + posixID = setlocale(category, NULL); + if ((posixID == 0) + || (uprv_strcmp("C", posixID) == 0) + || (uprv_strcmp("POSIX", posixID) == 0)) + { + /* Maybe we got some garbage. Try something more reasonable */ + posixID = getenv("LC_ALL"); + /* Solaris speaks POSIX - See IEEE Std 1003.1-2008 + * This is needed to properly handle empty env. variables + */ +#if U_PLATFORM == U_PF_SOLARIS + if ((posixID == 0) || (posixID[0] == '\0')) { + posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE"); + if ((posixID == 0) || (posixID[0] == '\0')) { +#else + if (posixID == 0) { + posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE"); + if (posixID == 0) { +#endif + posixID = getenv("LANG"); + } + } + } + } + if ((posixID==0) + || (uprv_strcmp("C", posixID) == 0) + || (uprv_strcmp("POSIX", posixID) == 0)) + { + /* Nothing worked. Give it a nice POSIX default value. */ + posixID = "en_US_POSIX"; + // Note: this test will not catch 'C.UTF-8', + // that will be handled in uprv_getDefaultLocaleID(). + // Leave this mapping here for the uprv_getPOSIXIDForDefaultCodepage() + // caller which expects to see "en_US_POSIX" in many branches. + } + return posixID; +} + +/* Return just the POSIX id for the default locale, whatever happens to be in + * it. It gets the value from LC_MESSAGES and indirectly from LC_ALL and LANG. + */ +static const char *uprv_getPOSIXIDForDefaultLocale(void) +{ + static const char* posixID = NULL; + if (posixID == 0) { + posixID = uprv_getPOSIXIDForCategory(LC_MESSAGES); + } + return posixID; +} + +#if !U_CHARSET_IS_UTF8 +/* Return just the POSIX id for the default codepage, whatever happens to be in + * it. It gets the value from LC_CTYPE and indirectly from LC_ALL and LANG. + */ +static const char *uprv_getPOSIXIDForDefaultCodepage(void) +{ + static const char* posixID = NULL; + if (posixID == 0) { + posixID = uprv_getPOSIXIDForCategory(LC_CTYPE); + } + return posixID; +} +#endif +#endif + +/* NOTE: The caller should handle thread safety */ +U_CAPI const char* U_EXPORT2 +uprv_getDefaultLocaleID() +{ +#if U_POSIX_LOCALE +/* + Note that: (a '!' means the ID is improper somehow) + LC_ALL ----> default_loc codepage +-------------------------------------------------------- + ab.CD ab CD + ab@CD ab__CD - + ab@CD.EF ab__CD EF + + ab_CD.EF@GH ab_CD_GH EF + +Some 'improper' ways to do the same as above: + ! ab_CD@GH.EF ab_CD_GH EF + ! ab_CD.EF@GH.IJ ab_CD_GH EF + ! ab_CD@ZZ.EF@GH.IJ ab_CD_GH EF + + _CD@GH _CD_GH - + _CD.EF@GH _CD_GH EF + +The variant cannot have dots in it. +The 'rightmost' variant (@xxx) wins. +The leftmost codepage (.xxx) wins. +*/ + const char* posixID = uprv_getPOSIXIDForDefaultLocale(); + + /* Format: (no spaces) + ll [ _CC ] [ . MM ] [ @ VV] + + l = lang, C = ctry, M = charmap, V = variant + */ + + if (gCorrectedPOSIXLocale != nullptr) { + return gCorrectedPOSIXLocale; + } + + // Copy the ID into owned memory. + // Over-allocate in case we replace "C" with "en_US_POSIX" (+10), + null termination + char *correctedPOSIXLocale = static_cast<char *>(uprv_malloc(uprv_strlen(posixID) + 10 + 1)); + if (correctedPOSIXLocale == nullptr) { + return nullptr; + } + uprv_strcpy(correctedPOSIXLocale, posixID); + + char *limit; + if ((limit = uprv_strchr(correctedPOSIXLocale, '.')) != nullptr) { + *limit = 0; + } + if ((limit = uprv_strchr(correctedPOSIXLocale, '@')) != nullptr) { + *limit = 0; + } + + if ((uprv_strcmp("C", correctedPOSIXLocale) == 0) // no @ variant + || (uprv_strcmp("POSIX", correctedPOSIXLocale) == 0)) { + // Raw input was C.* or POSIX.*, Give it a nice POSIX default value. + // (The "C"/"POSIX" case is handled in uprv_getPOSIXIDForCategory()) + uprv_strcpy(correctedPOSIXLocale, "en_US_POSIX"); + } + + /* Note that we scan the *uncorrected* ID. */ + const char *p; + if ((p = uprv_strrchr(posixID, '@')) != nullptr) { + p++; + + /* Take care of any special cases here.. */ + if (!uprv_strcmp(p, "nynorsk")) { + p = "NY"; + /* Don't worry about no__NY. In practice, it won't appear. */ + } + + if (uprv_strchr(correctedPOSIXLocale,'_') == nullptr) { + uprv_strcat(correctedPOSIXLocale, "__"); /* aa@b -> aa__b (note this can make the new locale 1 char longer) */ + } + else { + uprv_strcat(correctedPOSIXLocale, "_"); /* aa_CC@b -> aa_CC_b */ + } + + const char *q; + if ((q = uprv_strchr(p, '.')) != nullptr) { + /* How big will the resulting string be? */ + int32_t len = (int32_t)(uprv_strlen(correctedPOSIXLocale) + (q-p)); + uprv_strncat(correctedPOSIXLocale, p, q-p); // do not include charset + correctedPOSIXLocale[len] = 0; + } + else { + /* Anything following the @ sign */ + uprv_strcat(correctedPOSIXLocale, p); + } + + /* Should there be a map from 'no@nynorsk' -> no_NO_NY here? + * How about 'russian' -> 'ru'? + * Many of the other locales using ISO codes will be handled by the + * canonicalization functions in uloc_getDefault. + */ + } + + if (gCorrectedPOSIXLocale == nullptr) { + gCorrectedPOSIXLocale = correctedPOSIXLocale; + gCorrectedPOSIXLocaleHeapAllocated = true; + ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup); + correctedPOSIXLocale = nullptr; + } + posixID = gCorrectedPOSIXLocale; + + if (correctedPOSIXLocale != nullptr) { /* Was already set - clean up. */ + uprv_free(correctedPOSIXLocale); + } + + return posixID; + +#elif U_PLATFORM_USES_ONLY_WIN32_API +#define POSIX_LOCALE_CAPACITY 64 + UErrorCode status = U_ZERO_ERROR; + char *correctedPOSIXLocale = nullptr; + + // If we have already figured this out just use the cached value + if (gCorrectedPOSIXLocale != nullptr) { + return gCorrectedPOSIXLocale; + } + + // No cached value, need to determine the current value + static WCHAR windowsLocale[LOCALE_NAME_MAX_LENGTH] = {}; + int length = GetLocaleInfoEx(LOCALE_NAME_USER_DEFAULT, LOCALE_SNAME, windowsLocale, LOCALE_NAME_MAX_LENGTH); + + // Now we should have a Windows locale name that needs converted to the POSIX style. + if (length > 0) // If length is 0, then the GetLocaleInfoEx failed. + { + // First we need to go from UTF-16 to char (and also convert from _ to - while we're at it.) + char modifiedWindowsLocale[LOCALE_NAME_MAX_LENGTH] = {}; + + int32_t i; + for (i = 0; i < UPRV_LENGTHOF(modifiedWindowsLocale); i++) + { + if (windowsLocale[i] == '_') + { + modifiedWindowsLocale[i] = '-'; + } + else + { + modifiedWindowsLocale[i] = static_cast<char>(windowsLocale[i]); + } + + if (modifiedWindowsLocale[i] == '\0') + { + break; + } + } + + if (i >= UPRV_LENGTHOF(modifiedWindowsLocale)) + { + // Ran out of room, can't really happen, maybe we'll be lucky about a matching + // locale when tags are dropped + modifiedWindowsLocale[UPRV_LENGTHOF(modifiedWindowsLocale) - 1] = '\0'; + } + + // Now normalize the resulting name + correctedPOSIXLocale = static_cast<char *>(uprv_malloc(POSIX_LOCALE_CAPACITY + 1)); + /* TODO: Should we just exit on memory allocation failure? */ + if (correctedPOSIXLocale) + { + int32_t posixLen = uloc_canonicalize(modifiedWindowsLocale, correctedPOSIXLocale, POSIX_LOCALE_CAPACITY, &status); + if (U_SUCCESS(status)) + { + *(correctedPOSIXLocale + posixLen) = 0; + gCorrectedPOSIXLocale = correctedPOSIXLocale; + gCorrectedPOSIXLocaleHeapAllocated = true; + ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup); + } + else + { + uprv_free(correctedPOSIXLocale); + } + } + } + + // If unable to find a locale we can agree upon, use en-US by default + if (gCorrectedPOSIXLocale == nullptr) { + gCorrectedPOSIXLocale = "en_US"; + } + return gCorrectedPOSIXLocale; + +#elif U_PLATFORM == U_PF_OS400 + /* locales are process scoped and are by definition thread safe */ + static char correctedLocale[64]; + const char *localeID = getenv("LC_ALL"); + char *p; + + if (localeID == NULL) + localeID = getenv("LANG"); + if (localeID == NULL) + localeID = setlocale(LC_ALL, NULL); + /* Make sure we have something... */ + if (localeID == NULL) + return "en_US_POSIX"; + + /* Extract the locale name from the path. */ + if((p = uprv_strrchr(localeID, '/')) != NULL) + { + /* Increment p to start of locale name. */ + p++; + localeID = p; + } + + /* Copy to work location. */ + uprv_strcpy(correctedLocale, localeID); + + /* Strip off the '.locale' extension. */ + if((p = uprv_strchr(correctedLocale, '.')) != NULL) { + *p = 0; + } + + /* Upper case the locale name. */ + T_CString_toUpperCase(correctedLocale); + + /* See if we are using the POSIX locale. Any of the + * following are equivalent and use the same QLGPGCMA + * (POSIX) locale. + * QLGPGCMA2 means UCS2 + * QLGPGCMA_4 means UTF-32 + * QLGPGCMA_8 means UTF-8 + */ + if ((uprv_strcmp("C", correctedLocale) == 0) || + (uprv_strcmp("POSIX", correctedLocale) == 0) || + (uprv_strncmp("QLGPGCMA", correctedLocale, 8) == 0)) + { + uprv_strcpy(correctedLocale, "en_US_POSIX"); + } + else + { + int16_t LocaleLen; + + /* Lower case the lang portion. */ + for(p = correctedLocale; *p != 0 && *p != '_'; p++) + { + *p = uprv_tolower(*p); + } + + /* Adjust for Euro. After '_E' add 'URO'. */ + LocaleLen = uprv_strlen(correctedLocale); + if (correctedLocale[LocaleLen - 2] == '_' && + correctedLocale[LocaleLen - 1] == 'E') + { + uprv_strcat(correctedLocale, "URO"); + } + + /* If using Lotus-based locale then convert to + * equivalent non Lotus. + */ + else if (correctedLocale[LocaleLen - 2] == '_' && + correctedLocale[LocaleLen - 1] == 'L') + { + correctedLocale[LocaleLen - 2] = 0; + } + + /* There are separate simplified and traditional + * locales called zh_HK_S and zh_HK_T. + */ + else if (uprv_strncmp(correctedLocale, "zh_HK", 5) == 0) + { + uprv_strcpy(correctedLocale, "zh_HK"); + } + + /* A special zh_CN_GBK locale... + */ + else if (uprv_strcmp(correctedLocale, "zh_CN_GBK") == 0) + { + uprv_strcpy(correctedLocale, "zh_CN"); + } + + } + + return correctedLocale; +#endif + +} + +#if !U_CHARSET_IS_UTF8 +#if U_POSIX_LOCALE +/* +Due to various platform differences, one platform may specify a charset, +when they really mean a different charset. Remap the names so that they are +compatible with ICU. Only conflicting/ambiguous aliases should be resolved +here. Before adding anything to this function, please consider adding unique +names to the ICU alias table in the data directory. +*/ +static const char* +remapPlatformDependentCodepage(const char *locale, const char *name) { + if (locale != NULL && *locale == 0) { + /* Make sure that an empty locale is handled the same way. */ + locale = NULL; + } + if (name == NULL) { + return NULL; + } +#if U_PLATFORM == U_PF_AIX + if (uprv_strcmp(name, "IBM-943") == 0) { + /* Use the ASCII compatible ibm-943 */ + name = "Shift-JIS"; + } + else if (uprv_strcmp(name, "IBM-1252") == 0) { + /* Use the windows-1252 that contains the Euro */ + name = "IBM-5348"; + } +#elif U_PLATFORM == U_PF_SOLARIS + if (locale != NULL && uprv_strcmp(name, "EUC") == 0) { + /* Solaris underspecifies the "EUC" name. */ + if (uprv_strcmp(locale, "zh_CN") == 0) { + name = "EUC-CN"; + } + else if (uprv_strcmp(locale, "zh_TW") == 0) { + name = "EUC-TW"; + } + else if (uprv_strcmp(locale, "ko_KR") == 0) { + name = "EUC-KR"; + } + } + else if (uprv_strcmp(name, "eucJP") == 0) { + /* + ibm-954 is the best match. + ibm-33722 is the default for eucJP (similar to Windows). + */ + name = "eucjis"; + } + else if (uprv_strcmp(name, "646") == 0) { + /* + * The default codepage given by Solaris is 646 but the C library routines treat it as if it was + * ISO-8859-1 instead of US-ASCII(646). + */ + name = "ISO-8859-1"; + } +#elif U_PLATFORM_IS_DARWIN_BASED + if (locale == NULL && *name == 0) { + /* + No locale was specified, and an empty name was passed in. + This usually indicates that nl_langinfo didn't return valid information. + Mac OS X uses UTF-8 by default (especially the locale data and console). + */ + name = "UTF-8"; + } + else if (uprv_strcmp(name, "CP949") == 0) { + /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */ + name = "EUC-KR"; + } + else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 && uprv_strcmp(name, "US-ASCII") == 0) { + /* + * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII. + */ + name = "UTF-8"; + } +#elif U_PLATFORM == U_PF_BSD + if (uprv_strcmp(name, "CP949") == 0) { + /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */ + name = "EUC-KR"; + } +#elif U_PLATFORM == U_PF_HPUX + if (locale != NULL && uprv_strcmp(locale, "zh_HK") == 0 && uprv_strcmp(name, "big5") == 0) { + /* HP decided to extend big5 as hkbig5 even though it's not compatible :-( */ + /* zh_TW.big5 is not the same charset as zh_HK.big5! */ + name = "hkbig5"; + } + else if (uprv_strcmp(name, "eucJP") == 0) { + /* + ibm-1350 is the best match, but unavailable. + ibm-954 is mostly a superset of ibm-1350. + ibm-33722 is the default for eucJP (similar to Windows). + */ + name = "eucjis"; + } +#elif U_PLATFORM == U_PF_LINUX + if (locale != NULL && uprv_strcmp(name, "euc") == 0) { + /* Linux underspecifies the "EUC" name. */ + if (uprv_strcmp(locale, "korean") == 0) { + name = "EUC-KR"; + } + else if (uprv_strcmp(locale, "japanese") == 0) { + /* See comment below about eucJP */ + name = "eucjis"; + } + } + else if (uprv_strcmp(name, "eucjp") == 0) { + /* + ibm-1350 is the best match, but unavailable. + ibm-954 is mostly a superset of ibm-1350. + ibm-33722 is the default for eucJP (similar to Windows). + */ + name = "eucjis"; + } + else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 && + (uprv_strcmp(name, "ANSI_X3.4-1968") == 0 || uprv_strcmp(name, "US-ASCII") == 0)) { + /* + * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII. + */ + name = "UTF-8"; + } + /* + * Linux returns ANSI_X3.4-1968 for C/POSIX, but the call site takes care of + * it by falling back to 'US-ASCII' when NULL is returned from this + * function. So, we don't have to worry about it here. + */ +#endif + /* return NULL when "" is passed in */ + if (*name == 0) { + name = NULL; + } + return name; +} + +static const char* +getCodepageFromPOSIXID(const char *localeName, char * buffer, int32_t buffCapacity) +{ + char localeBuf[100]; + const char *name = NULL; + char *variant = NULL; + + if (localeName != NULL && (name = (uprv_strchr(localeName, '.'))) != NULL) { + size_t localeCapacity = uprv_min(sizeof(localeBuf), (name-localeName)+1); + uprv_strncpy(localeBuf, localeName, localeCapacity); + localeBuf[localeCapacity-1] = 0; /* ensure NULL termination */ + name = uprv_strncpy(buffer, name+1, buffCapacity); + buffer[buffCapacity-1] = 0; /* ensure NULL termination */ + if ((variant = const_cast<char *>(uprv_strchr(name, '@'))) != NULL) { + *variant = 0; + } + name = remapPlatformDependentCodepage(localeBuf, name); + } + return name; +} +#endif + +static const char* +int_getDefaultCodepage() +{ +#if U_PLATFORM == U_PF_OS400 + uint32_t ccsid = 37; /* Default to ibm-37 */ + static char codepage[64]; + Qwc_JOBI0400_t jobinfo; + Qus_EC_t error = { sizeof(Qus_EC_t) }; /* SPI error code */ + + EPT_CALL(QUSRJOBI)(&jobinfo, sizeof(jobinfo), "JOBI0400", + "* ", " ", &error); + + if (error.Bytes_Available == 0) { + if (jobinfo.Coded_Char_Set_ID != 0xFFFF) { + ccsid = (uint32_t)jobinfo.Coded_Char_Set_ID; + } + else if (jobinfo.Default_Coded_Char_Set_Id != 0xFFFF) { + ccsid = (uint32_t)jobinfo.Default_Coded_Char_Set_Id; + } + /* else use the default */ + } + sprintf(codepage,"ibm-%d", ccsid); + return codepage; + +#elif U_PLATFORM == U_PF_OS390 + static char codepage[64]; + + strncpy(codepage, nl_langinfo(CODESET),63-strlen(UCNV_SWAP_LFNL_OPTION_STRING)); + strcat(codepage,UCNV_SWAP_LFNL_OPTION_STRING); + codepage[63] = 0; /* NULL terminate */ + + return codepage; + +#elif U_PLATFORM_USES_ONLY_WIN32_API + static char codepage[64]; + DWORD codepageNumber = 0; + +#if U_PLATFORM_HAS_WINUWP_API == 1 + // UWP doesn't have a direct API to get the default ACP as Microsoft would rather + // have folks use Unicode than a "system" code page, however this is the same + // codepage as the system default locale codepage. (FWIW, the system locale is + // ONLY used for codepage, it should never be used for anything else) + GetLocaleInfoEx(LOCALE_NAME_SYSTEM_DEFAULT, LOCALE_IDEFAULTANSICODEPAGE | LOCALE_RETURN_NUMBER, + (LPWSTR)&codepageNumber, sizeof(codepageNumber) / sizeof(WCHAR)); +#else + // Win32 apps can call GetACP + codepageNumber = GetACP(); +#endif + // Special case for UTF-8 + if (codepageNumber == 65001) + { + return "UTF-8"; + } + // Windows codepages can look like windows-1252, so format the found number + // the numbers are eclectic, however all valid system code pages, besides UTF-8 + // are between 3 and 19999 + if (codepageNumber > 0 && codepageNumber < 20000) + { + sprintf(codepage, "windows-%ld", codepageNumber); + return codepage; + } + // If the codepage number call failed then return UTF-8 + return "UTF-8"; + +#elif U_POSIX_LOCALE + static char codesetName[100]; + const char *localeName = NULL; + const char *name = NULL; + + localeName = uprv_getPOSIXIDForDefaultCodepage(); + uprv_memset(codesetName, 0, sizeof(codesetName)); + /* On Solaris nl_langinfo returns C locale values unless setlocale + * was called earlier. + */ +#if (U_HAVE_NL_LANGINFO_CODESET && U_PLATFORM != U_PF_SOLARIS) + /* When available, check nl_langinfo first because it usually gives more + useful names. It depends on LC_CTYPE. + nl_langinfo may use the same buffer as setlocale. */ + { + const char *codeset = nl_langinfo(U_NL_LANGINFO_CODESET); +#if U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED + /* + * On Linux and MacOSX, ensure that default codepage for non C/POSIX locale is UTF-8 + * instead of ASCII. + */ + if (uprv_strcmp(localeName, "en_US_POSIX") != 0) { + codeset = remapPlatformDependentCodepage(localeName, codeset); + } else +#endif + { + codeset = remapPlatformDependentCodepage(NULL, codeset); + } + + if (codeset != NULL) { + uprv_strncpy(codesetName, codeset, sizeof(codesetName)); + codesetName[sizeof(codesetName)-1] = 0; + return codesetName; + } + } +#endif + + /* Use setlocale in a nice way, and then check some environment variables. + Maybe the application used setlocale already. + */ + uprv_memset(codesetName, 0, sizeof(codesetName)); + name = getCodepageFromPOSIXID(localeName, codesetName, sizeof(codesetName)); + if (name) { + /* if we can find the codeset name from setlocale, return that. */ + return name; + } + + if (*codesetName == 0) + { + /* Everything failed. Return US ASCII (ISO 646). */ + (void)uprv_strcpy(codesetName, "US-ASCII"); + } + return codesetName; +#else + return "US-ASCII"; +#endif +} + + +U_CAPI const char* U_EXPORT2 +uprv_getDefaultCodepage() +{ + static char const *name = NULL; + umtx_lock(NULL); + if (name == NULL) { + name = int_getDefaultCodepage(); + } + umtx_unlock(NULL); + return name; +} +#endif /* !U_CHARSET_IS_UTF8 */ + + +/* end of platform-specific implementation -------------- */ + +/* version handling --------------------------------------------------------- */ + +U_CAPI void U_EXPORT2 +u_versionFromString(UVersionInfo versionArray, const char *versionString) { + char *end; + uint16_t part=0; + + if(versionArray==NULL) { + return; + } + + if(versionString!=NULL) { + for(;;) { + versionArray[part]=(uint8_t)uprv_strtoul(versionString, &end, 10); + if(end==versionString || ++part==U_MAX_VERSION_LENGTH || *end!=U_VERSION_DELIMITER) { + break; + } + versionString=end+1; + } + } + + while(part<U_MAX_VERSION_LENGTH) { + versionArray[part++]=0; + } +} + +U_CAPI void U_EXPORT2 +u_versionFromUString(UVersionInfo versionArray, const UChar *versionString) { + if(versionArray!=NULL && versionString!=NULL) { + char versionChars[U_MAX_VERSION_STRING_LENGTH+1]; + int32_t len = u_strlen(versionString); + if(len>U_MAX_VERSION_STRING_LENGTH) { + len = U_MAX_VERSION_STRING_LENGTH; + } + u_UCharsToChars(versionString, versionChars, len); + versionChars[len]=0; + u_versionFromString(versionArray, versionChars); + } +} + +U_CAPI void U_EXPORT2 +u_versionToString(const UVersionInfo versionArray, char *versionString) { + uint16_t count, part; + uint8_t field; + + if(versionString==NULL) { + return; + } + + if(versionArray==NULL) { + versionString[0]=0; + return; + } + + /* count how many fields need to be written */ + for(count=4; count>0 && versionArray[count-1]==0; --count) { + } + + if(count <= 1) { + count = 2; + } + + /* write the first part */ + /* write the decimal field value */ + field=versionArray[0]; + if(field>=100) { + *versionString++=(char)('0'+field/100); + field%=100; + } + if(field>=10) { + *versionString++=(char)('0'+field/10); + field%=10; + } + *versionString++=(char)('0'+field); + + /* write the following parts */ + for(part=1; part<count; ++part) { + /* write a dot first */ + *versionString++=U_VERSION_DELIMITER; + + /* write the decimal field value */ + field=versionArray[part]; + if(field>=100) { + *versionString++=(char)('0'+field/100); + field%=100; + } + if(field>=10) { + *versionString++=(char)('0'+field/10); + field%=10; + } + *versionString++=(char)('0'+field); + } + + /* NUL-terminate */ + *versionString=0; +} + +U_CAPI void U_EXPORT2 +u_getVersion(UVersionInfo versionArray) { + (void)copyright; // Suppress unused variable warning from clang. + u_versionFromString(versionArray, U_ICU_VERSION); +} + +/** + * icucfg.h dependent code + */ + +#if U_ENABLE_DYLOAD && HAVE_DLOPEN && !U_PLATFORM_USES_ONLY_WIN32_API + +#if HAVE_DLFCN_H +#ifdef __MVS__ +#ifndef __SUSV3 +#define __SUSV3 1 +#endif +#endif +#include <dlfcn.h> +#endif /* HAVE_DLFCN_H */ + +U_CAPI void * U_EXPORT2 +uprv_dl_open(const char *libName, UErrorCode *status) { + void *ret = NULL; + if(U_FAILURE(*status)) return ret; + ret = dlopen(libName, RTLD_NOW|RTLD_GLOBAL); + if(ret==NULL) { +#ifdef U_TRACE_DYLOAD + printf("dlerror on dlopen(%s): %s\n", libName, dlerror()); +#endif + *status = U_MISSING_RESOURCE_ERROR; + } + return ret; +} + +U_CAPI void U_EXPORT2 +uprv_dl_close(void *lib, UErrorCode *status) { + if(U_FAILURE(*status)) return; + dlclose(lib); +} + +U_CAPI UVoidFunction* U_EXPORT2 +uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) { + union { + UVoidFunction *fp; + void *vp; + } uret; + uret.fp = NULL; + if(U_FAILURE(*status)) return uret.fp; + uret.vp = dlsym(lib, sym); + if(uret.vp == NULL) { +#ifdef U_TRACE_DYLOAD + printf("dlerror on dlsym(%p,%s): %s\n", lib,sym, dlerror()); +#endif + *status = U_MISSING_RESOURCE_ERROR; + } + return uret.fp; +} + +#elif U_ENABLE_DYLOAD && U_PLATFORM_USES_ONLY_WIN32_API && !U_PLATFORM_HAS_WINUWP_API + +/* Windows API implementation. */ +// Note: UWP does not expose/allow these APIs, so the UWP version gets the null implementation. */ + +U_CAPI void * U_EXPORT2 +uprv_dl_open(const char *libName, UErrorCode *status) { + HMODULE lib = NULL; + + if(U_FAILURE(*status)) return NULL; + + lib = LoadLibraryA(libName); + + if(lib==NULL) { + *status = U_MISSING_RESOURCE_ERROR; + } + + return (void*)lib; +} + +U_CAPI void U_EXPORT2 +uprv_dl_close(void *lib, UErrorCode *status) { + HMODULE handle = (HMODULE)lib; + if(U_FAILURE(*status)) return; + + FreeLibrary(handle); + + return; +} + +U_CAPI UVoidFunction* U_EXPORT2 +uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) { + HMODULE handle = (HMODULE)lib; + UVoidFunction* addr = NULL; + + if(U_FAILURE(*status) || lib==NULL) return NULL; + + addr = (UVoidFunction*)GetProcAddress(handle, sym); + + if(addr==NULL) { + DWORD lastError = GetLastError(); + if(lastError == ERROR_PROC_NOT_FOUND) { + *status = U_MISSING_RESOURCE_ERROR; + } else { + *status = U_UNSUPPORTED_ERROR; /* other unknown error. */ + } + } + + return addr; +} + +#else + +/* No dynamic loading, null (nonexistent) implementation. */ + +U_CAPI void * U_EXPORT2 +uprv_dl_open(const char *libName, UErrorCode *status) { + (void)libName; + if(U_FAILURE(*status)) return NULL; + *status = U_UNSUPPORTED_ERROR; + return NULL; +} + +U_CAPI void U_EXPORT2 +uprv_dl_close(void *lib, UErrorCode *status) { + (void)lib; + if(U_FAILURE(*status)) return; + *status = U_UNSUPPORTED_ERROR; + return; +} + +U_CAPI UVoidFunction* U_EXPORT2 +uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) { + (void)lib; + (void)sym; + if(U_SUCCESS(*status)) { + *status = U_UNSUPPORTED_ERROR; + } + return (UVoidFunction*)NULL; +} + +#endif + +/* + * Hey, Emacs, please set the following: + * + * Local Variables: + * indent-tabs-mode: nil + * End: + * + */ diff --git a/thirdparty/icu4c/common/putilimp.h b/thirdparty/icu4c/common/putilimp.h new file mode 100644 index 0000000000..a325c6c359 --- /dev/null +++ b/thirdparty/icu4c/common/putilimp.h @@ -0,0 +1,615 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1997-2016, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* +* FILE NAME : putilimp.h +* +* Date Name Description +* 10/17/04 grhoten Move internal functions from putil.h to this file. +****************************************************************************** +*/ + +#ifndef PUTILIMP_H +#define PUTILIMP_H + +#include "unicode/utypes.h" +#include "unicode/putil.h" + +/** + * \def U_SIGNED_RIGHT_SHIFT_IS_ARITHMETIC + * Nearly all CPUs and compilers implement a right-shift of a signed integer + * as an Arithmetic Shift Right which copies the sign bit (the Most Significant Bit (MSB)) + * into the vacated bits (sign extension). + * For example, (int32_t)0xfff5fff3>>4 becomes 0xffff5fff and -1>>1=-1. + * + * This can be useful for storing a signed value in the upper bits + * and another bit field in the lower bits. + * The signed value can be retrieved by simple right-shifting. + * + * This is consistent with the Java language. + * + * However, the C standard allows compilers to implement a right-shift of a signed integer + * as a Logical Shift Right which copies a 0 into the vacated bits. + * For example, (int32_t)0xfff5fff3>>4 becomes 0x0fff5fff and -1>>1=0x7fffffff. + * + * Code that depends on the natural behavior should be guarded with this macro, + * with an alternate path for unusual platforms. + * @internal + */ +#ifdef U_SIGNED_RIGHT_SHIFT_IS_ARITHMETIC + /* Use the predefined value. */ +#else + /* + * Nearly all CPUs & compilers implement a right-shift of a signed integer + * as an Arithmetic Shift Right (with sign extension). + */ +# define U_SIGNED_RIGHT_SHIFT_IS_ARITHMETIC 1 +#endif + +/** Define this to 1 if your platform supports IEEE 754 floating point, + to 0 if it does not. */ +#ifndef IEEE_754 +# define IEEE_754 1 +#endif + +/** + * uintptr_t is an optional part of the standard definitions in stdint.h. + * The opengroup.org documentation for stdint.h says + * "On XSI-conformant systems, the intptr_t and uintptr_t types are required; + * otherwise, they are optional." + * We assume that when uintptr_t is defined, UINTPTR_MAX is defined as well. + * + * Do not use ptrdiff_t since it is signed. size_t is unsigned. + */ +/* TODO: This check fails on some z environments. Filed a ticket #9357 for this. */ +#if !defined(__intptr_t_defined) && !defined(UINTPTR_MAX) && (U_PLATFORM != U_PF_OS390) +typedef size_t uintptr_t; +#endif + +/*===========================================================================*/ +/** @{ Information about POSIX support */ +/*===========================================================================*/ + +#ifdef U_HAVE_NL_LANGINFO_CODESET + /* Use the predefined value. */ +#elif U_PLATFORM_USES_ONLY_WIN32_API || U_PLATFORM == U_PF_ANDROID || U_PLATFORM == U_PF_QNX +# define U_HAVE_NL_LANGINFO_CODESET 0 +#else +# define U_HAVE_NL_LANGINFO_CODESET 1 +#endif + +#ifdef U_NL_LANGINFO_CODESET + /* Use the predefined value. */ +#elif !U_HAVE_NL_LANGINFO_CODESET +# define U_NL_LANGINFO_CODESET -1 +#elif U_PLATFORM == U_PF_OS400 + /* not defined */ +#else +# define U_NL_LANGINFO_CODESET CODESET +#endif + +#if defined(U_TZSET) || defined(U_HAVE_TZSET) + /* Use the predefined value. */ +#elif U_PLATFORM_USES_ONLY_WIN32_API + // UWP doesn't support tzset or environment variables for tz +#if U_PLATFORM_HAS_WINUWP_API == 0 +# define U_TZSET _tzset +#endif +#elif U_PLATFORM == U_PF_OS400 + /* not defined */ +#else +# define U_TZSET tzset +#endif + +#if defined(U_TIMEZONE) || defined(U_HAVE_TIMEZONE) + /* Use the predefined value. */ +#elif U_PLATFORM == U_PF_ANDROID +# define U_TIMEZONE timezone +#elif defined(__UCLIBC__) + // uClibc does not have __timezone or _timezone. +#elif defined(_NEWLIB_VERSION) +# define U_TIMEZONE _timezone +#elif defined(__GLIBC__) + // glibc +# define U_TIMEZONE __timezone +#elif U_PLATFORM_IS_LINUX_BASED + // not defined +#elif U_PLATFORM_USES_ONLY_WIN32_API +# define U_TIMEZONE _timezone +#elif U_PLATFORM == U_PF_BSD && !defined(__NetBSD__) + /* not defined */ +#elif U_PLATFORM == U_PF_OS400 + /* not defined */ +#elif U_PLATFORM == U_PF_IPHONE + /* not defined */ +#else +# define U_TIMEZONE timezone +#endif + +#if defined(U_TZNAME) || defined(U_HAVE_TZNAME) + /* Use the predefined value. */ +#elif U_PLATFORM_USES_ONLY_WIN32_API + /* not usable on all windows platforms */ +#if U_PLATFORM_HAS_WINUWP_API == 0 +# define U_TZNAME _tzname +#endif +#elif U_PLATFORM == U_PF_OS400 + /* not defined */ +#else +# define U_TZNAME tzname +#endif + +#ifdef U_HAVE_MMAP + /* Use the predefined value. */ +#elif U_PLATFORM_USES_ONLY_WIN32_API +# define U_HAVE_MMAP 0 +#else +# define U_HAVE_MMAP 1 +#endif + +#ifdef U_HAVE_POPEN + /* Use the predefined value. */ +#elif U_PLATFORM_USES_ONLY_WIN32_API +# define U_HAVE_POPEN 0 +#elif U_PLATFORM == U_PF_OS400 +# define U_HAVE_POPEN 0 +#else +# define U_HAVE_POPEN 1 +#endif + +/** + * \def U_HAVE_DIRENT_H + * Defines whether dirent.h is available. + * @internal + */ +#ifdef U_HAVE_DIRENT_H + /* Use the predefined value. */ +#elif U_PLATFORM_USES_ONLY_WIN32_API +# define U_HAVE_DIRENT_H 0 +#else +# define U_HAVE_DIRENT_H 1 +#endif + +/** @} */ + +/*===========================================================================*/ +/** @{ Programs used by ICU code */ +/*===========================================================================*/ + +/** + * \def U_MAKE_IS_NMAKE + * Defines whether the "make" program is Windows nmake. + */ +#ifdef U_MAKE_IS_NMAKE + /* Use the predefined value. */ +#elif U_PLATFORM == U_PF_WINDOWS +# define U_MAKE_IS_NMAKE 1 +#else +# define U_MAKE_IS_NMAKE 0 +#endif + +/** @} */ + +/*==========================================================================*/ +/* Platform utilities */ +/*==========================================================================*/ + +/** + * Platform utilities isolates the platform dependencies of the + * library. For each platform which this code is ported to, these + * functions may have to be re-implemented. + */ + +/** + * Floating point utility to determine if a double is Not a Number (NaN). + * @internal + */ +U_CAPI UBool U_EXPORT2 uprv_isNaN(double d); +/** + * Floating point utility to determine if a double has an infinite value. + * @internal + */ +U_CAPI UBool U_EXPORT2 uprv_isInfinite(double d); +/** + * Floating point utility to determine if a double has a positive infinite value. + * @internal + */ +U_CAPI UBool U_EXPORT2 uprv_isPositiveInfinity(double d); +/** + * Floating point utility to determine if a double has a negative infinite value. + * @internal + */ +U_CAPI UBool U_EXPORT2 uprv_isNegativeInfinity(double d); +/** + * Floating point utility that returns a Not a Number (NaN) value. + * @internal + */ +U_CAPI double U_EXPORT2 uprv_getNaN(void); +/** + * Floating point utility that returns an infinite value. + * @internal + */ +U_CAPI double U_EXPORT2 uprv_getInfinity(void); + +/** + * Floating point utility to truncate a double. + * @internal + */ +U_CAPI double U_EXPORT2 uprv_trunc(double d); +/** + * Floating point utility to calculate the floor of a double. + * @internal + */ +U_CAPI double U_EXPORT2 uprv_floor(double d); +/** + * Floating point utility to calculate the ceiling of a double. + * @internal + */ +U_CAPI double U_EXPORT2 uprv_ceil(double d); +/** + * Floating point utility to calculate the absolute value of a double. + * @internal + */ +U_CAPI double U_EXPORT2 uprv_fabs(double d); +/** + * Floating point utility to calculate the fractional and integer parts of a double. + * @internal + */ +U_CAPI double U_EXPORT2 uprv_modf(double d, double* pinteger); +/** + * Floating point utility to calculate the remainder of a double divided by another double. + * @internal + */ +U_CAPI double U_EXPORT2 uprv_fmod(double d, double y); +/** + * Floating point utility to calculate d to the power of exponent (d^exponent). + * @internal + */ +U_CAPI double U_EXPORT2 uprv_pow(double d, double exponent); +/** + * Floating point utility to calculate 10 to the power of exponent (10^exponent). + * @internal + */ +U_CAPI double U_EXPORT2 uprv_pow10(int32_t exponent); +/** + * Floating point utility to calculate the maximum value of two doubles. + * @internal + */ +U_CAPI double U_EXPORT2 uprv_fmax(double d, double y); +/** + * Floating point utility to calculate the minimum value of two doubles. + * @internal + */ +U_CAPI double U_EXPORT2 uprv_fmin(double d, double y); +/** + * Private utility to calculate the maximum value of two integers. + * @internal + */ +U_CAPI int32_t U_EXPORT2 uprv_max(int32_t d, int32_t y); +/** + * Private utility to calculate the minimum value of two integers. + * @internal + */ +U_CAPI int32_t U_EXPORT2 uprv_min(int32_t d, int32_t y); + +#if U_IS_BIG_ENDIAN +# define uprv_isNegative(number) (*((signed char *)&(number))<0) +#else +# define uprv_isNegative(number) (*((signed char *)&(number)+sizeof(number)-1)<0) +#endif + +/** + * Return the largest positive number that can be represented by an integer + * type of arbitrary bit length. + * @internal + */ +U_CAPI double U_EXPORT2 uprv_maxMantissa(void); + +/** + * Floating point utility to calculate the logarithm of a double. + * @internal + */ +U_CAPI double U_EXPORT2 uprv_log(double d); + +/** + * Does common notion of rounding e.g. uprv_floor(x + 0.5); + * @param x the double number + * @return the rounded double + * @internal + */ +U_CAPI double U_EXPORT2 uprv_round(double x); + +/** + * Adds the signed integers a and b, storing the result in res. + * Checks for signed integer overflow. + * Similar to the GCC/Clang extension __builtin_add_overflow + * + * @param a The first operand. + * @param b The second operand. + * @param res a + b + * @return true if overflow occurred; false if no overflow occurred. + * @internal + */ +U_CAPI UBool U_EXPORT2 uprv_add32_overflow(int32_t a, int32_t b, int32_t* res); + +/** + * Multiplies the signed integers a and b, storing the result in res. + * Checks for signed integer overflow. + * Similar to the GCC/Clang extension __builtin_mul_overflow + * + * @param a The first multiplicand. + * @param b The second multiplicand. + * @param res a * b + * @return true if overflow occurred; false if no overflow occurred. + * @internal + */ +U_CAPI UBool U_EXPORT2 uprv_mul32_overflow(int32_t a, int32_t b, int32_t* res); + +#if 0 +/** + * Returns the number of digits after the decimal point in a double number x. + * + * @param x the double number + * @return the number of digits after the decimal point in a double number x. + * @internal + */ +/*U_CAPI int32_t U_EXPORT2 uprv_digitsAfterDecimal(double x);*/ +#endif + +#if !U_CHARSET_IS_UTF8 +/** + * Please use ucnv_getDefaultName() instead. + * Return the default codepage for this platform and locale. + * This function can call setlocale() on Unix platforms. Please read the + * platform documentation on setlocale() before calling this function. + * @return the default codepage for this platform + * @internal + */ +U_CAPI const char* U_EXPORT2 uprv_getDefaultCodepage(void); +#endif + +/** + * Please use uloc_getDefault() instead. + * Return the default locale ID string by querying the system, or + * zero if one cannot be found. + * This function can call setlocale() on Unix platforms. Please read the + * platform documentation on setlocale() before calling this function. + * @return the default locale ID string + * @internal + */ +U_CAPI const char* U_EXPORT2 uprv_getDefaultLocaleID(void); + +/** + * Time zone utilities + * + * Wrappers for C runtime library functions relating to timezones. + * The t_tzset() function (similar to tzset) uses the current setting + * of the environment variable TZ to assign values to three global + * variables: daylight, timezone, and tzname. These variables have the + * following meanings, and are declared in <time.h>. + * + * daylight Nonzero if daylight-saving-time zone (DST) is specified + * in TZ; otherwise, 0. Default value is 1. + * timezone Difference in seconds between coordinated universal + * time and local time. E.g., -28,800 for PST (GMT-8hrs) + * tzname(0) Three-letter time-zone name derived from TZ environment + * variable. E.g., "PST". + * tzname(1) Three-letter DST zone name derived from TZ environment + * variable. E.g., "PDT". If DST zone is omitted from TZ, + * tzname(1) is an empty string. + * + * Notes: For example, to set the TZ environment variable to correspond + * to the current time zone in Germany, you can use one of the + * following statements: + * + * set TZ=GST1GDT + * set TZ=GST+1GDT + * + * If the TZ value is not set, t_tzset() attempts to use the time zone + * information specified by the operating system. Under Windows NT + * and Windows 95, this information is specified in the Control Panel's + * Date/Time application. + * @internal + */ +U_CAPI void U_EXPORT2 uprv_tzset(void); + +/** + * Difference in seconds between coordinated universal + * time and local time. E.g., -28,800 for PST (GMT-8hrs) + * @return the difference in seconds between coordinated universal time and local time. + * @internal + */ +U_CAPI int32_t U_EXPORT2 uprv_timezone(void); + +/** + * tzname(0) Three-letter time-zone name derived from TZ environment + * variable. E.g., "PST". + * tzname(1) Three-letter DST zone name derived from TZ environment + * variable. E.g., "PDT". If DST zone is omitted from TZ, + * tzname(1) is an empty string. + * @internal + */ +U_CAPI const char* U_EXPORT2 uprv_tzname(int n); + +/** + * Reset the global tzname cache. + * @internal + */ +U_CAPI void uprv_tzname_clear_cache(void); + +/** + * Get UTC (GMT) time measured in milliseconds since 0:00 on 1/1/1970. + * This function is affected by 'faketime' and should be the bottleneck for all user-visible ICU time functions. + * @return the UTC time measured in milliseconds + * @internal + */ +U_CAPI UDate U_EXPORT2 uprv_getUTCtime(void); + +/** + * Get UTC (GMT) time measured in milliseconds since 0:00 on 1/1/1970. + * This function is not affected by 'faketime', so it should only be used by low level test functions- not by anything that + * exposes time to the end user. + * @return the UTC time measured in milliseconds + * @internal + */ +U_CAPI UDate U_EXPORT2 uprv_getRawUTCtime(void); + +/** + * Determine whether a pathname is absolute or not, as defined by the platform. + * @param path Pathname to test + * @return true if the path is absolute + * @internal (ICU 3.0) + */ +U_CAPI UBool U_EXPORT2 uprv_pathIsAbsolute(const char *path); + +/** + * Use U_MAX_PTR instead of this function. + * @param void pointer to test + * @return the largest possible pointer greater than the base + * @internal (ICU 3.8) + */ +U_CAPI void * U_EXPORT2 uprv_maximumPtr(void *base); + +/** + * Maximum value of a (void*) - use to indicate the limit of an 'infinite' buffer. + * In fact, buffer sizes must not exceed 2GB so that the difference between + * the buffer limit and the buffer start can be expressed in an int32_t. + * + * The definition of U_MAX_PTR must fulfill the following conditions: + * - return the largest possible pointer greater than base + * - return a valid pointer according to the machine architecture (AS/400, 64-bit, etc.) + * - avoid wrapping around at high addresses + * - make sure that the returned pointer is not farther from base than 0x7fffffff bytes + * + * @param base The beginning of a buffer to find the maximum offset from + * @internal + */ +#ifndef U_MAX_PTR +# if U_PLATFORM == U_PF_OS390 && !defined(_LP64) + /* We have 31-bit pointers. */ +# define U_MAX_PTR(base) ((void *)0x7fffffff) +# elif U_PLATFORM == U_PF_OS400 +# define U_MAX_PTR(base) uprv_maximumPtr((void *)base) +# elif 0 + /* + * For platforms where pointers are scalar values (which is normal, but unlike i5/OS) + * but that do not define uintptr_t. + * + * However, this does not work on modern compilers: + * The C++ standard does not define pointer overflow, and allows compilers to + * assume that p+u>p for any pointer p and any integer u>0. + * Thus, modern compilers optimize away the ">" comparison. + * (See ICU tickets #7187 and #8096.) + */ +# define U_MAX_PTR(base) \ + ((void *)(((char *)(base)+0x7fffffffu) > (char *)(base) \ + ? ((char *)(base)+0x7fffffffu) \ + : (char *)-1)) +# else + /* Default version. C++ standard compliant for scalar pointers. */ +# define U_MAX_PTR(base) \ + ((void *)(((uintptr_t)(base)+0x7fffffffu) > (uintptr_t)(base) \ + ? ((uintptr_t)(base)+0x7fffffffu) \ + : (uintptr_t)-1)) +# endif +#endif + + +#ifdef __cplusplus +/** + * Pin a buffer capacity such that doing pointer arithmetic + * on the destination pointer and capacity cannot overflow. + * + * The pinned capacity must fulfill the following conditions (for positive capacities): + * - dest + capacity is a valid pointer according to the machine arcitecture (AS/400, 64-bit, etc.) + * - (dest + capacity) >= dest + * - The size (in bytes) of T[capacity] does not exceed 0x7fffffff + * + * @param dest the destination buffer pointer. + * @param capacity the requested buffer capacity, in units of type T. + * @return the pinned capacity. + * @internal + */ +template <typename T> +inline int32_t pinCapacity(T *dest, int32_t capacity) { + if (capacity <= 0) { return capacity; } + + uintptr_t destInt = (uintptr_t)dest; + uintptr_t maxInt; + +# if U_PLATFORM == U_PF_OS390 && !defined(_LP64) + // We have 31-bit pointers. + maxInt = 0x7fffffff; +# elif U_PLATFORM == U_PF_OS400 + maxInt = (uintptr_t)uprv_maximumPtr((void *)dest); +# else + maxInt = destInt + 0x7fffffffu; + if (maxInt < destInt) { + // Less than 2GB to the end of the address space. + // Pin to that to prevent address overflow. + maxInt = (uintptr_t)-1; + } +# endif + + uintptr_t maxBytes = maxInt - destInt; // max. 2GB + int32_t maxCapacity = (int32_t)(maxBytes / sizeof(T)); + return capacity <= maxCapacity ? capacity : maxCapacity; +} +#endif // __cplusplus + +/* Dynamic Library Functions */ + +typedef void (UVoidFunction)(void); + +#if U_ENABLE_DYLOAD +/** + * Load a library + * @internal (ICU 4.4) + */ +U_CAPI void * U_EXPORT2 uprv_dl_open(const char *libName, UErrorCode *status); + +/** + * Close a library + * @internal (ICU 4.4) + */ +U_CAPI void U_EXPORT2 uprv_dl_close( void *lib, UErrorCode *status); + +/** + * Extract a symbol from a library (function) + * @internal (ICU 4.8) + */ +U_CAPI UVoidFunction* U_EXPORT2 uprv_dlsym_func( void *lib, const char *symbolName, UErrorCode *status); + +/** + * Extract a symbol from a library (function) + * Not implemented, no clients. + * @internal + */ +/* U_CAPI void * U_EXPORT2 uprv_dlsym_data( void *lib, const char *symbolName, UErrorCode *status); */ + +#endif + +/** + * Define malloc and related functions + * @internal + */ +#if U_PLATFORM == U_PF_OS400 +# define uprv_default_malloc(x) _C_TS_malloc(x) +# define uprv_default_realloc(x,y) _C_TS_realloc(x,y) +# define uprv_default_free(x) _C_TS_free(x) +/* also _C_TS_calloc(x) */ +#else +/* C defaults */ +# define uprv_default_malloc(x) malloc(x) +# define uprv_default_realloc(x,y) realloc(x,y) +# define uprv_default_free(x) free(x) +#endif + + +#endif diff --git a/thirdparty/icu4c/common/rbbi.cpp b/thirdparty/icu4c/common/rbbi.cpp new file mode 100644 index 0000000000..9b7e70c3cf --- /dev/null +++ b/thirdparty/icu4c/common/rbbi.cpp @@ -0,0 +1,1301 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +*************************************************************************** +* Copyright (C) 1999-2016 International Business Machines Corporation +* and others. All rights reserved. +*************************************************************************** +*/ +// +// file: rbbi.cpp Contains the implementation of the rule based break iterator +// runtime engine and the API implementation for +// class RuleBasedBreakIterator +// + +#include "utypeinfo.h" // for 'typeid' to work + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_BREAK_ITERATION + +#include <cinttypes> + +#include "unicode/rbbi.h" +#include "unicode/schriter.h" +#include "unicode/uchriter.h" +#include "unicode/uclean.h" +#include "unicode/udata.h" + +#include "brkeng.h" +#include "ucln_cmn.h" +#include "cmemory.h" +#include "cstring.h" +#include "localsvc.h" +#include "rbbidata.h" +#include "rbbi_cache.h" +#include "rbbirb.h" +#include "uassert.h" +#include "umutex.h" +#include "uvectr32.h" + +#ifdef RBBI_DEBUG +static UBool gTrace = FALSE; +#endif + +U_NAMESPACE_BEGIN + +// The state number of the starting state +constexpr int32_t START_STATE = 1; + +// The state-transition value indicating "stop" +constexpr int32_t STOP_STATE = 0; + + +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedBreakIterator) + + +//======================================================================= +// constructors +//======================================================================= + +/** + * Constructs a RuleBasedBreakIterator that uses the already-created + * tables object that is passed in as a parameter. + */ +RuleBasedBreakIterator::RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status) + : fSCharIter(UnicodeString()) +{ + init(status); + fData = new RBBIDataWrapper(data, status); // status checked in constructor + if (U_FAILURE(status)) {return;} + if(fData == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + if (fData->fForwardTable->fLookAheadResultsSize > 0) { + fLookAheadMatches = static_cast<int32_t *>( + uprv_malloc(fData->fForwardTable->fLookAheadResultsSize * sizeof(int32_t))); + if (fLookAheadMatches == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + } +} + +// +// Construct from precompiled binary rules (tables). This constructor is public API, +// taking the rules as a (const uint8_t *) to match the type produced by getBinaryRules(). +// +RuleBasedBreakIterator::RuleBasedBreakIterator(const uint8_t *compiledRules, + uint32_t ruleLength, + UErrorCode &status) + : fSCharIter(UnicodeString()) +{ + init(status); + if (U_FAILURE(status)) { + return; + } + if (compiledRules == NULL || ruleLength < sizeof(RBBIDataHeader)) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + const RBBIDataHeader *data = (const RBBIDataHeader *)compiledRules; + if (data->fLength > ruleLength) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + fData = new RBBIDataWrapper(data, RBBIDataWrapper::kDontAdopt, status); + if (U_FAILURE(status)) {return;} + if(fData == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + if (fData->fForwardTable->fLookAheadResultsSize > 0) { + fLookAheadMatches = static_cast<int32_t *>( + uprv_malloc(fData->fForwardTable->fLookAheadResultsSize * sizeof(int32_t))); + if (fLookAheadMatches == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + } +} + + +//------------------------------------------------------------------------------- +// +// Constructor from a UDataMemory handle to precompiled break rules +// stored in an ICU data file. +// +//------------------------------------------------------------------------------- +RuleBasedBreakIterator::RuleBasedBreakIterator(UDataMemory* udm, UErrorCode &status) + : fSCharIter(UnicodeString()) +{ + init(status); + fData = new RBBIDataWrapper(udm, status); // status checked in constructor + if (U_FAILURE(status)) {return;} + if(fData == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + if (fData->fForwardTable->fLookAheadResultsSize > 0) { + fLookAheadMatches = static_cast<int32_t *>( + uprv_malloc(fData->fForwardTable->fLookAheadResultsSize * sizeof(int32_t))); + if (fLookAheadMatches == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + } +} + + + +//------------------------------------------------------------------------------- +// +// Constructor from a set of rules supplied as a string. +// +//------------------------------------------------------------------------------- +RuleBasedBreakIterator::RuleBasedBreakIterator( const UnicodeString &rules, + UParseError &parseError, + UErrorCode &status) + : fSCharIter(UnicodeString()) +{ + init(status); + if (U_FAILURE(status)) {return;} + RuleBasedBreakIterator *bi = (RuleBasedBreakIterator *) + RBBIRuleBuilder::createRuleBasedBreakIterator(rules, &parseError, status); + // Note: This is a bit awkward. The RBBI ruleBuilder has a factory method that + // creates and returns a complete RBBI. From here, in a constructor, we + // can't just return the object created by the builder factory, hence + // the assignment of the factory created object to "this". + if (U_SUCCESS(status)) { + *this = *bi; + delete bi; + } +} + + +//------------------------------------------------------------------------------- +// +// Default Constructor. Create an empty shell that can be set up later. +// Used when creating a RuleBasedBreakIterator from a set +// of rules. +//------------------------------------------------------------------------------- +RuleBasedBreakIterator::RuleBasedBreakIterator() + : fSCharIter(UnicodeString()) +{ + UErrorCode status = U_ZERO_ERROR; + init(status); +} + + +//------------------------------------------------------------------------------- +// +// Copy constructor. Will produce a break iterator with the same behavior, +// and which iterates over the same text, as the one passed in. +// +//------------------------------------------------------------------------------- +RuleBasedBreakIterator::RuleBasedBreakIterator(const RuleBasedBreakIterator& other) +: BreakIterator(other), + fSCharIter(UnicodeString()) +{ + UErrorCode status = U_ZERO_ERROR; + this->init(status); + *this = other; +} + + +/** + * Destructor + */ +RuleBasedBreakIterator::~RuleBasedBreakIterator() { + if (fCharIter != &fSCharIter) { + // fCharIter was adopted from the outside. + delete fCharIter; + } + fCharIter = nullptr; + + utext_close(&fText); + + if (fData != nullptr) { + fData->removeReference(); + fData = nullptr; + } + delete fBreakCache; + fBreakCache = nullptr; + + delete fDictionaryCache; + fDictionaryCache = nullptr; + + delete fLanguageBreakEngines; + fLanguageBreakEngines = nullptr; + + delete fUnhandledBreakEngine; + fUnhandledBreakEngine = nullptr; + + uprv_free(fLookAheadMatches); + fLookAheadMatches = nullptr; +} + +/** + * Assignment operator. Sets this iterator to have the same behavior, + * and iterate over the same text, as the one passed in. + * TODO: needs better handling of memory allocation errors. + */ +RuleBasedBreakIterator& +RuleBasedBreakIterator::operator=(const RuleBasedBreakIterator& that) { + if (this == &that) { + return *this; + } + BreakIterator::operator=(that); + + if (fLanguageBreakEngines != NULL) { + delete fLanguageBreakEngines; + fLanguageBreakEngines = NULL; // Just rebuild for now + } + // TODO: clone fLanguageBreakEngines from "that" + UErrorCode status = U_ZERO_ERROR; + utext_clone(&fText, &that.fText, FALSE, TRUE, &status); + + if (fCharIter != &fSCharIter) { + delete fCharIter; + } + fCharIter = &fSCharIter; + + if (that.fCharIter != NULL && that.fCharIter != &that.fSCharIter) { + // This is a little bit tricky - it will intially appear that + // this->fCharIter is adopted, even if that->fCharIter was + // not adopted. That's ok. + fCharIter = that.fCharIter->clone(); + } + fSCharIter = that.fSCharIter; + if (fCharIter == NULL) { + fCharIter = &fSCharIter; + } + + if (fData != NULL) { + fData->removeReference(); + fData = NULL; + } + if (that.fData != NULL) { + fData = that.fData->addReference(); + } + + uprv_free(fLookAheadMatches); + fLookAheadMatches = nullptr; + if (fData && fData->fForwardTable->fLookAheadResultsSize > 0) { + fLookAheadMatches = static_cast<int32_t *>( + uprv_malloc(fData->fForwardTable->fLookAheadResultsSize * sizeof(int32_t))); + } + + + fPosition = that.fPosition; + fRuleStatusIndex = that.fRuleStatusIndex; + fDone = that.fDone; + + // TODO: both the dictionary and the main cache need to be copied. + // Current position could be within a dictionary range. Trying to continue + // the iteration without the caches present would go to the rules, with + // the assumption that the current position is on a rule boundary. + fBreakCache->reset(fPosition, fRuleStatusIndex); + fDictionaryCache->reset(); + + return *this; +} + + + +//----------------------------------------------------------------------------- +// +// init() Shared initialization routine. Used by all the constructors. +// Initializes all fields, leaving the object in a consistent state. +// +//----------------------------------------------------------------------------- +void RuleBasedBreakIterator::init(UErrorCode &status) { + fCharIter = nullptr; + fData = nullptr; + fPosition = 0; + fRuleStatusIndex = 0; + fDone = false; + fDictionaryCharCount = 0; + fLanguageBreakEngines = nullptr; + fUnhandledBreakEngine = nullptr; + fBreakCache = nullptr; + fDictionaryCache = nullptr; + fLookAheadMatches = nullptr; + + // Note: IBM xlC is unable to assign or initialize member fText from UTEXT_INITIALIZER. + // fText = UTEXT_INITIALIZER; + static const UText initializedUText = UTEXT_INITIALIZER; + uprv_memcpy(&fText, &initializedUText, sizeof(UText)); + + if (U_FAILURE(status)) { + return; + } + + utext_openUChars(&fText, NULL, 0, &status); + fDictionaryCache = new DictionaryCache(this, status); + fBreakCache = new BreakCache(this, status); + if (U_SUCCESS(status) && (fDictionaryCache == NULL || fBreakCache == NULL)) { + status = U_MEMORY_ALLOCATION_ERROR; + } + +#ifdef RBBI_DEBUG + static UBool debugInitDone = FALSE; + if (debugInitDone == FALSE) { + char *debugEnv = getenv("U_RBBIDEBUG"); + if (debugEnv && uprv_strstr(debugEnv, "trace")) { + gTrace = TRUE; + } + debugInitDone = TRUE; + } +#endif +} + + + +//----------------------------------------------------------------------------- +// +// clone - Returns a newly-constructed RuleBasedBreakIterator with the same +// behavior, and iterating over the same text, as this one. +// Virtual function: does the right thing with subclasses. +// +//----------------------------------------------------------------------------- +RuleBasedBreakIterator* +RuleBasedBreakIterator::clone() const { + return new RuleBasedBreakIterator(*this); +} + +/** + * Equality operator. Returns TRUE if both BreakIterators are of the + * same class, have the same behavior, and iterate over the same text. + */ +UBool +RuleBasedBreakIterator::operator==(const BreakIterator& that) const { + if (typeid(*this) != typeid(that)) { + return FALSE; + } + if (this == &that) { + return TRUE; + } + + // The base class BreakIterator carries no state that participates in equality, + // and does not implement an equality function that would otherwise be + // checked at this point. + + const RuleBasedBreakIterator& that2 = (const RuleBasedBreakIterator&) that; + + if (!utext_equals(&fText, &that2.fText)) { + // The two break iterators are operating on different text, + // or have a different iteration position. + // Note that fText's position is always the same as the break iterator's position. + return FALSE; + } + + if (!(fPosition == that2.fPosition && + fRuleStatusIndex == that2.fRuleStatusIndex && + fDone == that2.fDone)) { + return FALSE; + } + + if (that2.fData == fData || + (fData != NULL && that2.fData != NULL && *that2.fData == *fData)) { + // The two break iterators are using the same rules. + return TRUE; + } + return FALSE; +} + +/** + * Compute a hash code for this BreakIterator + * @return A hash code + */ +int32_t +RuleBasedBreakIterator::hashCode(void) const { + int32_t hash = 0; + if (fData != NULL) { + hash = fData->hashCode(); + } + return hash; +} + + +void RuleBasedBreakIterator::setText(UText *ut, UErrorCode &status) { + if (U_FAILURE(status)) { + return; + } + fBreakCache->reset(); + fDictionaryCache->reset(); + utext_clone(&fText, ut, FALSE, TRUE, &status); + + // Set up a dummy CharacterIterator to be returned if anyone + // calls getText(). With input from UText, there is no reasonable + // way to return a characterIterator over the actual input text. + // Return one over an empty string instead - this is the closest + // we can come to signaling a failure. + // (GetText() is obsolete, this failure is sort of OK) + fSCharIter.setText(UnicodeString()); + + if (fCharIter != &fSCharIter) { + // existing fCharIter was adopted from the outside. Delete it now. + delete fCharIter; + } + fCharIter = &fSCharIter; + + this->first(); +} + + +UText *RuleBasedBreakIterator::getUText(UText *fillIn, UErrorCode &status) const { + UText *result = utext_clone(fillIn, &fText, FALSE, TRUE, &status); + return result; +} + + +//======================================================================= +// BreakIterator overrides +//======================================================================= + +/** + * Return a CharacterIterator over the text being analyzed. + */ +CharacterIterator& +RuleBasedBreakIterator::getText() const { + return *fCharIter; +} + +/** + * Set the iterator to analyze a new piece of text. This function resets + * the current iteration position to the beginning of the text. + * @param newText An iterator over the text to analyze. + */ +void +RuleBasedBreakIterator::adoptText(CharacterIterator* newText) { + // If we are holding a CharacterIterator adopted from a + // previous call to this function, delete it now. + if (fCharIter != &fSCharIter) { + delete fCharIter; + } + + fCharIter = newText; + UErrorCode status = U_ZERO_ERROR; + fBreakCache->reset(); + fDictionaryCache->reset(); + if (newText==NULL || newText->startIndex() != 0) { + // startIndex !=0 wants to be an error, but there's no way to report it. + // Make the iterator text be an empty string. + utext_openUChars(&fText, NULL, 0, &status); + } else { + utext_openCharacterIterator(&fText, newText, &status); + } + this->first(); +} + +/** + * Set the iterator to analyze a new piece of text. This function resets + * the current iteration position to the beginning of the text. + * @param newText An iterator over the text to analyze. + */ +void +RuleBasedBreakIterator::setText(const UnicodeString& newText) { + UErrorCode status = U_ZERO_ERROR; + fBreakCache->reset(); + fDictionaryCache->reset(); + utext_openConstUnicodeString(&fText, &newText, &status); + + // Set up a character iterator on the string. + // Needed in case someone calls getText(). + // Can not, unfortunately, do this lazily on the (probably never) + // call to getText(), because getText is const. + fSCharIter.setText(newText); + + if (fCharIter != &fSCharIter) { + // old fCharIter was adopted from the outside. Delete it. + delete fCharIter; + } + fCharIter = &fSCharIter; + + this->first(); +} + + +/** + * Provide a new UText for the input text. Must reference text with contents identical + * to the original. + * Intended for use with text data originating in Java (garbage collected) environments + * where the data may be moved in memory at arbitrary times. + */ +RuleBasedBreakIterator &RuleBasedBreakIterator::refreshInputText(UText *input, UErrorCode &status) { + if (U_FAILURE(status)) { + return *this; + } + if (input == NULL) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return *this; + } + int64_t pos = utext_getNativeIndex(&fText); + // Shallow read-only clone of the new UText into the existing input UText + utext_clone(&fText, input, FALSE, TRUE, &status); + if (U_FAILURE(status)) { + return *this; + } + utext_setNativeIndex(&fText, pos); + if (utext_getNativeIndex(&fText) != pos) { + // Sanity check. The new input utext is supposed to have the exact same + // contents as the old. If we can't set to the same position, it doesn't. + // The contents underlying the old utext might be invalid at this point, + // so it's not safe to check directly. + status = U_ILLEGAL_ARGUMENT_ERROR; + } + return *this; +} + + +/** + * Sets the current iteration position to the beginning of the text, position zero. + * @return The new iterator position, which is zero. + */ +int32_t RuleBasedBreakIterator::first(void) { + UErrorCode status = U_ZERO_ERROR; + if (!fBreakCache->seek(0)) { + fBreakCache->populateNear(0, status); + } + fBreakCache->current(); + U_ASSERT(fPosition == 0); + return 0; +} + +/** + * Sets the current iteration position to the end of the text. + * @return The text's past-the-end offset. + */ +int32_t RuleBasedBreakIterator::last(void) { + int32_t endPos = (int32_t)utext_nativeLength(&fText); + UBool endShouldBeBoundary = isBoundary(endPos); // Has side effect of setting iterator position. + (void)endShouldBeBoundary; + U_ASSERT(endShouldBeBoundary); + U_ASSERT(fPosition == endPos); + return endPos; +} + +/** + * Advances the iterator either forward or backward the specified number of steps. + * Negative values move backward, and positive values move forward. This is + * equivalent to repeatedly calling next() or previous(). + * @param n The number of steps to move. The sign indicates the direction + * (negative is backwards, and positive is forwards). + * @return The character offset of the boundary position n boundaries away from + * the current one. + */ +int32_t RuleBasedBreakIterator::next(int32_t n) { + int32_t result = 0; + if (n > 0) { + for (; n > 0 && result != UBRK_DONE; --n) { + result = next(); + } + } else if (n < 0) { + for (; n < 0 && result != UBRK_DONE; ++n) { + result = previous(); + } + } else { + result = current(); + } + return result; +} + +/** + * Advances the iterator to the next boundary position. + * @return The position of the first boundary after this one. + */ +int32_t RuleBasedBreakIterator::next(void) { + fBreakCache->next(); + return fDone ? UBRK_DONE : fPosition; +} + +/** + * Move the iterator backwards, to the boundary preceding the current one. + * + * Starts from the current position within fText. + * Starting position need not be on a boundary. + * + * @return The position of the boundary position immediately preceding the starting position. + */ +int32_t RuleBasedBreakIterator::previous(void) { + UErrorCode status = U_ZERO_ERROR; + fBreakCache->previous(status); + return fDone ? UBRK_DONE : fPosition; +} + +/** + * Sets the iterator to refer to the first boundary position following + * the specified position. + * @param startPos The position from which to begin searching for a break position. + * @return The position of the first break after the current position. + */ +int32_t RuleBasedBreakIterator::following(int32_t startPos) { + // if the supplied position is before the beginning, return the + // text's starting offset + if (startPos < 0) { + return first(); + } + + // Move requested offset to a code point start. It might be on a trail surrogate, + // or on a trail byte if the input is UTF-8. Or it may be beyond the end of the text. + utext_setNativeIndex(&fText, startPos); + startPos = (int32_t)utext_getNativeIndex(&fText); + + UErrorCode status = U_ZERO_ERROR; + fBreakCache->following(startPos, status); + return fDone ? UBRK_DONE : fPosition; +} + +/** + * Sets the iterator to refer to the last boundary position before the + * specified position. + * @param offset The position to begin searching for a break from. + * @return The position of the last boundary before the starting position. + */ +int32_t RuleBasedBreakIterator::preceding(int32_t offset) { + if (offset > utext_nativeLength(&fText)) { + return last(); + } + + // Move requested offset to a code point start. It might be on a trail surrogate, + // or on a trail byte if the input is UTF-8. + + utext_setNativeIndex(&fText, offset); + int32_t adjustedOffset = static_cast<int32_t>(utext_getNativeIndex(&fText)); + + UErrorCode status = U_ZERO_ERROR; + fBreakCache->preceding(adjustedOffset, status); + return fDone ? UBRK_DONE : fPosition; +} + +/** + * Returns true if the specfied position is a boundary position. As a side + * effect, leaves the iterator pointing to the first boundary position at + * or after "offset". + * + * @param offset the offset to check. + * @return True if "offset" is a boundary position. + */ +UBool RuleBasedBreakIterator::isBoundary(int32_t offset) { + // out-of-range indexes are never boundary positions + if (offset < 0) { + first(); // For side effects on current position, tag values. + return FALSE; + } + + // Adjust offset to be on a code point boundary and not beyond the end of the text. + // Note that isBoundary() is always false for offsets that are not on code point boundaries. + // But we still need the side effect of leaving iteration at the following boundary. + + utext_setNativeIndex(&fText, offset); + int32_t adjustedOffset = static_cast<int32_t>(utext_getNativeIndex(&fText)); + + bool result = false; + UErrorCode status = U_ZERO_ERROR; + if (fBreakCache->seek(adjustedOffset) || fBreakCache->populateNear(adjustedOffset, status)) { + result = (fBreakCache->current() == offset); + } + + if (result && adjustedOffset < offset && utext_char32At(&fText, offset) == U_SENTINEL) { + // Original offset is beyond the end of the text. Return FALSE, it's not a boundary, + // but the iteration position remains set to the end of the text, which is a boundary. + return FALSE; + } + if (!result) { + // Not on a boundary. isBoundary() must leave iterator on the following boundary. + // Cache->seek(), above, left us on the preceding boundary, so advance one. + next(); + } + return result; +} + + +/** + * Returns the current iteration position. + * @return The current iteration position. + */ +int32_t RuleBasedBreakIterator::current(void) const { + return fPosition; +} + + +//======================================================================= +// implementation +//======================================================================= + +// +// RBBIRunMode - the state machine runs an extra iteration at the beginning and end +// of user text. A variable with this enum type keeps track of where we +// are. The state machine only fetches user input while in the RUN mode. +// +enum RBBIRunMode { + RBBI_START, // state machine processing is before first char of input + RBBI_RUN, // state machine processing is in the user text + RBBI_END // state machine processing is after end of user text. +}; + + +// Wrapper functions to select the appropriate handleNext() or handleSafePrevious() +// instantiation, based on whether an 8 or 16 bit table is required. +// +// These Trie access functions will be inlined within the handleNext()/Previous() instantions. +static inline uint16_t TrieFunc8(const UCPTrie *trie, UChar32 c) { + return UCPTRIE_FAST_GET(trie, UCPTRIE_8, c); +} + +static inline uint16_t TrieFunc16(const UCPTrie *trie, UChar32 c) { + return UCPTRIE_FAST_GET(trie, UCPTRIE_16, c); +} + +int32_t RuleBasedBreakIterator::handleNext() { + const RBBIStateTable *statetable = fData->fForwardTable; + bool use8BitsTrie = ucptrie_getValueWidth(fData->fTrie) == UCPTRIE_VALUE_BITS_8; + if (statetable->fFlags & RBBI_8BITS_ROWS) { + if (use8BitsTrie) { + return handleNext<RBBIStateTableRow8, TrieFunc8>(); + } else { + return handleNext<RBBIStateTableRow8, TrieFunc16>(); + } + } else { + if (use8BitsTrie) { + return handleNext<RBBIStateTableRow16, TrieFunc8>(); + } else { + return handleNext<RBBIStateTableRow16, TrieFunc16>(); + } + } +} + +int32_t RuleBasedBreakIterator::handleSafePrevious(int32_t fromPosition) { + const RBBIStateTable *statetable = fData->fReverseTable; + bool use8BitsTrie = ucptrie_getValueWidth(fData->fTrie) == UCPTRIE_VALUE_BITS_8; + if (statetable->fFlags & RBBI_8BITS_ROWS) { + if (use8BitsTrie) { + return handleSafePrevious<RBBIStateTableRow8, TrieFunc8>(fromPosition); + } else { + return handleSafePrevious<RBBIStateTableRow8, TrieFunc16>(fromPosition); + } + } else { + if (use8BitsTrie) { + return handleSafePrevious<RBBIStateTableRow16, TrieFunc8>(fromPosition); + } else { + return handleSafePrevious<RBBIStateTableRow16, TrieFunc16>(fromPosition); + } + } +} + + +//----------------------------------------------------------------------------------- +// +// handleNext() +// Run the state machine to find a boundary +// +//----------------------------------------------------------------------------------- +template <typename RowType, RuleBasedBreakIterator::PTrieFunc trieFunc> +int32_t RuleBasedBreakIterator::handleNext() { + int32_t state; + uint16_t category = 0; + RBBIRunMode mode; + + RowType *row; + UChar32 c; + int32_t result = 0; + int32_t initialPosition = 0; + const RBBIStateTable *statetable = fData->fForwardTable; + const char *tableData = statetable->fTableData; + uint32_t tableRowLen = statetable->fRowLen; + uint32_t dictStart = statetable->fDictCategoriesStart; + #ifdef RBBI_DEBUG + if (gTrace) { + RBBIDebugPuts("Handle Next pos char state category"); + } + #endif + + // handleNext alway sets the break tag value. + // Set the default for it. + fRuleStatusIndex = 0; + + fDictionaryCharCount = 0; + + // if we're already at the end of the text, return DONE. + initialPosition = fPosition; + UTEXT_SETNATIVEINDEX(&fText, initialPosition); + result = initialPosition; + c = UTEXT_NEXT32(&fText); + if (c==U_SENTINEL) { + fDone = TRUE; + return UBRK_DONE; + } + + // Set the initial state for the state machine + state = START_STATE; + row = (RowType *) + //(statetable->fTableData + (statetable->fRowLen * state)); + (tableData + tableRowLen * state); + + + mode = RBBI_RUN; + if (statetable->fFlags & RBBI_BOF_REQUIRED) { + category = 2; + mode = RBBI_START; + } + + + // loop until we reach the end of the text or transition to state 0 + // + for (;;) { + if (c == U_SENTINEL) { + // Reached end of input string. + if (mode == RBBI_END) { + // We have already run the loop one last time with the + // character set to the psueudo {eof} value. Now it is time + // to unconditionally bail out. + break; + } + // Run the loop one last time with the fake end-of-input character category. + mode = RBBI_END; + category = 1; + } + + // + // Get the char category. An incoming category of 1 or 2 means that + // we are preset for doing the beginning or end of input, and + // that we shouldn't get a category from an actual text input character. + // + if (mode == RBBI_RUN) { + // look up the current character's character category, which tells us + // which column in the state table to look at. + category = trieFunc(fData->fTrie, c); + fDictionaryCharCount += (category >= dictStart); + } + + #ifdef RBBI_DEBUG + if (gTrace) { + RBBIDebugPrintf(" %4" PRId64 " ", utext_getNativeIndex(&fText)); + if (0x20<=c && c<0x7f) { + RBBIDebugPrintf("\"%c\" ", c); + } else { + RBBIDebugPrintf("%5x ", c); + } + RBBIDebugPrintf("%3d %3d\n", state, category); + } + #endif + + // State Transition - move machine to its next state + // + + // fNextState is a variable-length array. + U_ASSERT(category<fData->fHeader->fCatCount); + state = row->fNextState[category]; /*Not accessing beyond memory*/ + row = (RowType *) + // (statetable->fTableData + (statetable->fRowLen * state)); + (tableData + tableRowLen * state); + + + uint16_t accepting = row->fAccepting; + if (accepting == ACCEPTING_UNCONDITIONAL) { + // Match found, common case. + if (mode != RBBI_START) { + result = (int32_t)UTEXT_GETNATIVEINDEX(&fText); + } + fRuleStatusIndex = row->fTagsIdx; // Remember the break status (tag) values. + } else if (accepting > ACCEPTING_UNCONDITIONAL) { + // Lookahead match is completed. + U_ASSERT(accepting < fData->fForwardTable->fLookAheadResultsSize); + int32_t lookaheadResult = fLookAheadMatches[accepting]; + if (lookaheadResult >= 0) { + fRuleStatusIndex = row->fTagsIdx; + fPosition = lookaheadResult; + return lookaheadResult; + } + } + + // If we are at the position of the '/' in a look-ahead (hard break) rule; + // record the current position, to be returned later, if the full rule matches. + // TODO: Move this check before the previous check of fAccepting. + // This would enable hard-break rules with no following context. + // But there are line break test failures when trying this. Investigate. + // Issue ICU-20837 + uint16_t rule = row->fLookAhead; + U_ASSERT(rule == 0 || rule > ACCEPTING_UNCONDITIONAL); + U_ASSERT(rule == 0 || rule < fData->fForwardTable->fLookAheadResultsSize); + if (rule > ACCEPTING_UNCONDITIONAL) { + int32_t pos = (int32_t)UTEXT_GETNATIVEINDEX(&fText); + fLookAheadMatches[rule] = pos; + } + + if (state == STOP_STATE) { + // This is the normal exit from the lookup state machine. + // We have advanced through the string until it is certain that no + // longer match is possible, no matter what characters follow. + break; + } + + // Advance to the next character. + // If this is a beginning-of-input loop iteration, don't advance + // the input position. The next iteration will be processing the + // first real input character. + if (mode == RBBI_RUN) { + c = UTEXT_NEXT32(&fText); + } else { + if (mode == RBBI_START) { + mode = RBBI_RUN; + } + } + } + + // The state machine is done. Check whether it found a match... + + // If the iterator failed to advance in the match engine, force it ahead by one. + // (This really indicates a defect in the break rules. They should always match + // at least one character.) + if (result == initialPosition) { + utext_setNativeIndex(&fText, initialPosition); + utext_next32(&fText); + result = (int32_t)utext_getNativeIndex(&fText); + fRuleStatusIndex = 0; + } + + // Leave the iterator at our result position. + fPosition = result; + #ifdef RBBI_DEBUG + if (gTrace) { + RBBIDebugPrintf("result = %d\n\n", result); + } + #endif + return result; +} + + +//----------------------------------------------------------------------------------- +// +// handleSafePrevious() +// +// Iterate backwards using the safe reverse rules. +// The logic of this function is similar to handleNext(), but simpler +// because the safe table does not require as many options. +// +//----------------------------------------------------------------------------------- +template <typename RowType, RuleBasedBreakIterator::PTrieFunc trieFunc> +int32_t RuleBasedBreakIterator::handleSafePrevious(int32_t fromPosition) { + + int32_t state; + uint16_t category = 0; + RowType *row; + UChar32 c; + int32_t result = 0; + + const RBBIStateTable *stateTable = fData->fReverseTable; + UTEXT_SETNATIVEINDEX(&fText, fromPosition); + #ifdef RBBI_DEBUG + if (gTrace) { + RBBIDebugPuts("Handle Previous pos char state category"); + } + #endif + + // if we're already at the start of the text, return DONE. + if (fData == NULL || UTEXT_GETNATIVEINDEX(&fText)==0) { + return BreakIterator::DONE; + } + + // Set the initial state for the state machine + c = UTEXT_PREVIOUS32(&fText); + state = START_STATE; + row = (RowType *) + (stateTable->fTableData + (stateTable->fRowLen * state)); + + // loop until we reach the start of the text or transition to state 0 + // + for (; c != U_SENTINEL; c = UTEXT_PREVIOUS32(&fText)) { + + // look up the current character's character category, which tells us + // which column in the state table to look at. + // + // Off the dictionary flag bit. For reverse iteration it is not used. + category = trieFunc(fData->fTrie, c); + + #ifdef RBBI_DEBUG + if (gTrace) { + RBBIDebugPrintf(" %4d ", (int32_t)utext_getNativeIndex(&fText)); + if (0x20<=c && c<0x7f) { + RBBIDebugPrintf("\"%c\" ", c); + } else { + RBBIDebugPrintf("%5x ", c); + } + RBBIDebugPrintf("%3d %3d\n", state, category); + } + #endif + + // State Transition - move machine to its next state + // + // fNextState is a variable-length array. + U_ASSERT(category<fData->fHeader->fCatCount); + state = row->fNextState[category]; /*Not accessing beyond memory*/ + row = (RowType *) + (stateTable->fTableData + (stateTable->fRowLen * state)); + + if (state == STOP_STATE) { + // This is the normal exit from the lookup state machine. + // Transistion to state zero means we have found a safe point. + break; + } + } + + // The state machine is done. Check whether it found a match... + result = (int32_t)UTEXT_GETNATIVEINDEX(&fText); + #ifdef RBBI_DEBUG + if (gTrace) { + RBBIDebugPrintf("result = %d\n\n", result); + } + #endif + return result; +} + + +//------------------------------------------------------------------------------- +// +// getRuleStatus() Return the break rule tag associated with the current +// iterator position. If the iterator arrived at its current +// position by iterating forwards, the value will have been +// cached by the handleNext() function. +// +//------------------------------------------------------------------------------- + +int32_t RuleBasedBreakIterator::getRuleStatus() const { + + // fLastRuleStatusIndex indexes to the start of the appropriate status record + // (the number of status values.) + // This function returns the last (largest) of the array of status values. + int32_t idx = fRuleStatusIndex + fData->fRuleStatusTable[fRuleStatusIndex]; + int32_t tagVal = fData->fRuleStatusTable[idx]; + + return tagVal; +} + + +int32_t RuleBasedBreakIterator::getRuleStatusVec( + int32_t *fillInVec, int32_t capacity, UErrorCode &status) { + if (U_FAILURE(status)) { + return 0; + } + + int32_t numVals = fData->fRuleStatusTable[fRuleStatusIndex]; + int32_t numValsToCopy = numVals; + if (numVals > capacity) { + status = U_BUFFER_OVERFLOW_ERROR; + numValsToCopy = capacity; + } + int i; + for (i=0; i<numValsToCopy; i++) { + fillInVec[i] = fData->fRuleStatusTable[fRuleStatusIndex + i + 1]; + } + return numVals; +} + + + +//------------------------------------------------------------------------------- +// +// getBinaryRules Access to the compiled form of the rules, +// for use by build system tools that save the data +// for standard iterator types. +// +//------------------------------------------------------------------------------- +const uint8_t *RuleBasedBreakIterator::getBinaryRules(uint32_t &length) { + const uint8_t *retPtr = NULL; + length = 0; + + if (fData != NULL) { + retPtr = (const uint8_t *)fData->fHeader; + length = fData->fHeader->fLength; + } + return retPtr; +} + + +RuleBasedBreakIterator *RuleBasedBreakIterator::createBufferClone( + void * /*stackBuffer*/, int32_t &bufferSize, UErrorCode &status) { + if (U_FAILURE(status)){ + return NULL; + } + + if (bufferSize == 0) { + bufferSize = 1; // preflighting for deprecated functionality + return NULL; + } + + BreakIterator *clonedBI = clone(); + if (clonedBI == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + } else { + status = U_SAFECLONE_ALLOCATED_WARNING; + } + return (RuleBasedBreakIterator *)clonedBI; +} + +U_NAMESPACE_END + + +static icu::UStack *gLanguageBreakFactories = nullptr; +static const icu::UnicodeString *gEmptyString = nullptr; +static icu::UInitOnce gLanguageBreakFactoriesInitOnce = U_INITONCE_INITIALIZER; +static icu::UInitOnce gRBBIInitOnce = U_INITONCE_INITIALIZER; + +/** + * Release all static memory held by breakiterator. + */ +U_CDECL_BEGIN +UBool U_CALLCONV rbbi_cleanup(void) { + delete gLanguageBreakFactories; + gLanguageBreakFactories = nullptr; + delete gEmptyString; + gEmptyString = nullptr; + gLanguageBreakFactoriesInitOnce.reset(); + gRBBIInitOnce.reset(); + return TRUE; +} +U_CDECL_END + +U_CDECL_BEGIN +static void U_CALLCONV _deleteFactory(void *obj) { + delete (icu::LanguageBreakFactory *) obj; +} +U_CDECL_END +U_NAMESPACE_BEGIN + +static void U_CALLCONV rbbiInit() { + gEmptyString = new UnicodeString(); + ucln_common_registerCleanup(UCLN_COMMON_RBBI, rbbi_cleanup); +} + +static void U_CALLCONV initLanguageFactories() { + UErrorCode status = U_ZERO_ERROR; + U_ASSERT(gLanguageBreakFactories == NULL); + gLanguageBreakFactories = new UStack(_deleteFactory, NULL, status); + if (gLanguageBreakFactories != NULL && U_SUCCESS(status)) { + ICULanguageBreakFactory *builtIn = new ICULanguageBreakFactory(status); + gLanguageBreakFactories->push(builtIn, status); +#ifdef U_LOCAL_SERVICE_HOOK + LanguageBreakFactory *extra = (LanguageBreakFactory *)uprv_svc_hook("languageBreakFactory", &status); + if (extra != NULL) { + gLanguageBreakFactories->push(extra, status); + } +#endif + } + ucln_common_registerCleanup(UCLN_COMMON_RBBI, rbbi_cleanup); +} + + +static const LanguageBreakEngine* +getLanguageBreakEngineFromFactory(UChar32 c) +{ + umtx_initOnce(gLanguageBreakFactoriesInitOnce, &initLanguageFactories); + if (gLanguageBreakFactories == NULL) { + return NULL; + } + + int32_t i = gLanguageBreakFactories->size(); + const LanguageBreakEngine *lbe = NULL; + while (--i >= 0) { + LanguageBreakFactory *factory = (LanguageBreakFactory *)(gLanguageBreakFactories->elementAt(i)); + lbe = factory->getEngineFor(c); + if (lbe != NULL) { + break; + } + } + return lbe; +} + + +//------------------------------------------------------------------------------- +// +// getLanguageBreakEngine Find an appropriate LanguageBreakEngine for the +// the character c. +// +//------------------------------------------------------------------------------- +const LanguageBreakEngine * +RuleBasedBreakIterator::getLanguageBreakEngine(UChar32 c) { + const LanguageBreakEngine *lbe = NULL; + UErrorCode status = U_ZERO_ERROR; + + if (fLanguageBreakEngines == NULL) { + fLanguageBreakEngines = new UStack(status); + if (fLanguageBreakEngines == NULL || U_FAILURE(status)) { + delete fLanguageBreakEngines; + fLanguageBreakEngines = 0; + return NULL; + } + } + + int32_t i = fLanguageBreakEngines->size(); + while (--i >= 0) { + lbe = (const LanguageBreakEngine *)(fLanguageBreakEngines->elementAt(i)); + if (lbe->handles(c)) { + return lbe; + } + } + + // No existing dictionary took the character. See if a factory wants to + // give us a new LanguageBreakEngine for this character. + lbe = getLanguageBreakEngineFromFactory(c); + + // If we got one, use it and push it on our stack. + if (lbe != NULL) { + fLanguageBreakEngines->push((void *)lbe, status); + // Even if we can't remember it, we can keep looking it up, so + // return it even if the push fails. + return lbe; + } + + // No engine is forthcoming for this character. Add it to the + // reject set. Create the reject break engine if needed. + if (fUnhandledBreakEngine == NULL) { + fUnhandledBreakEngine = new UnhandledEngine(status); + if (U_SUCCESS(status) && fUnhandledBreakEngine == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } + // Put it last so that scripts for which we have an engine get tried + // first. + fLanguageBreakEngines->insertElementAt(fUnhandledBreakEngine, 0, status); + // If we can't insert it, or creation failed, get rid of it + if (U_FAILURE(status)) { + delete fUnhandledBreakEngine; + fUnhandledBreakEngine = 0; + return NULL; + } + } + + // Tell the reject engine about the character; at its discretion, it may + // add more than just the one character. + fUnhandledBreakEngine->handleCharacter(c); + + return fUnhandledBreakEngine; +} + +void RuleBasedBreakIterator::dumpCache() { + fBreakCache->dumpCache(); +} + +void RuleBasedBreakIterator::dumpTables() { + fData->printData(); +} + +/** + * Returns the description used to create this iterator + */ + +const UnicodeString& +RuleBasedBreakIterator::getRules() const { + if (fData != NULL) { + return fData->getRuleSourceString(); + } else { + umtx_initOnce(gRBBIInitOnce, &rbbiInit); + return *gEmptyString; + } +} + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ diff --git a/thirdparty/icu4c/common/rbbi_cache.cpp b/thirdparty/icu4c/common/rbbi_cache.cpp new file mode 100644 index 0000000000..63ff3001c7 --- /dev/null +++ b/thirdparty/icu4c/common/rbbi_cache.cpp @@ -0,0 +1,655 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// file: rbbi_cache.cpp + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_BREAK_ITERATION + +#include "unicode/ubrk.h" +#include "unicode/rbbi.h" + +#include "rbbi_cache.h" + +#include "brkeng.h" +#include "cmemory.h" +#include "rbbidata.h" +#include "rbbirb.h" +#include "uassert.h" +#include "uvectr32.h" + +U_NAMESPACE_BEGIN + +/* + * DictionaryCache implementation + */ + +RuleBasedBreakIterator::DictionaryCache::DictionaryCache(RuleBasedBreakIterator *bi, UErrorCode &status) : + fBI(bi), fBreaks(status), fPositionInCache(-1), + fStart(0), fLimit(0), fFirstRuleStatusIndex(0), fOtherRuleStatusIndex(0) { +} + +RuleBasedBreakIterator::DictionaryCache::~DictionaryCache() { +} + +void RuleBasedBreakIterator::DictionaryCache::reset() { + fPositionInCache = -1; + fStart = 0; + fLimit = 0; + fFirstRuleStatusIndex = 0; + fOtherRuleStatusIndex = 0; + fBreaks.removeAllElements(); +} + +UBool RuleBasedBreakIterator::DictionaryCache::following(int32_t fromPos, int32_t *result, int32_t *statusIndex) { + if (fromPos >= fLimit || fromPos < fStart) { + fPositionInCache = -1; + return FALSE; + } + + // Sequential iteration, move from previous boundary to the following + + int32_t r = 0; + if (fPositionInCache >= 0 && fPositionInCache < fBreaks.size() && fBreaks.elementAti(fPositionInCache) == fromPos) { + ++fPositionInCache; + if (fPositionInCache >= fBreaks.size()) { + fPositionInCache = -1; + return FALSE; + } + r = fBreaks.elementAti(fPositionInCache); + U_ASSERT(r > fromPos); + *result = r; + *statusIndex = fOtherRuleStatusIndex; + return TRUE; + } + + // Random indexing. Linear search for the boundary following the given position. + + for (fPositionInCache = 0; fPositionInCache < fBreaks.size(); ++fPositionInCache) { + r= fBreaks.elementAti(fPositionInCache); + if (r > fromPos) { + *result = r; + *statusIndex = fOtherRuleStatusIndex; + return TRUE; + } + } + UPRV_UNREACHABLE; +} + + +UBool RuleBasedBreakIterator::DictionaryCache::preceding(int32_t fromPos, int32_t *result, int32_t *statusIndex) { + if (fromPos <= fStart || fromPos > fLimit) { + fPositionInCache = -1; + return FALSE; + } + + if (fromPos == fLimit) { + fPositionInCache = fBreaks.size() - 1; + if (fPositionInCache >= 0) { + U_ASSERT(fBreaks.elementAti(fPositionInCache) == fromPos); + } + } + + int32_t r; + if (fPositionInCache > 0 && fPositionInCache < fBreaks.size() && fBreaks.elementAti(fPositionInCache) == fromPos) { + --fPositionInCache; + r = fBreaks.elementAti(fPositionInCache); + U_ASSERT(r < fromPos); + *result = r; + *statusIndex = ( r== fStart) ? fFirstRuleStatusIndex : fOtherRuleStatusIndex; + return TRUE; + } + + if (fPositionInCache == 0) { + fPositionInCache = -1; + return FALSE; + } + + for (fPositionInCache = fBreaks.size()-1; fPositionInCache >= 0; --fPositionInCache) { + r = fBreaks.elementAti(fPositionInCache); + if (r < fromPos) { + *result = r; + *statusIndex = ( r == fStart) ? fFirstRuleStatusIndex : fOtherRuleStatusIndex; + return TRUE; + } + } + UPRV_UNREACHABLE; +} + +void RuleBasedBreakIterator::DictionaryCache::populateDictionary(int32_t startPos, int32_t endPos, + int32_t firstRuleStatus, int32_t otherRuleStatus) { + if ((endPos - startPos) <= 1) { + return; + } + + reset(); + fFirstRuleStatusIndex = firstRuleStatus; + fOtherRuleStatusIndex = otherRuleStatus; + + int32_t rangeStart = startPos; + int32_t rangeEnd = endPos; + + uint16_t category; + int32_t current; + UErrorCode status = U_ZERO_ERROR; + int32_t foundBreakCount = 0; + UText *text = &fBI->fText; + + // Loop through the text, looking for ranges of dictionary characters. + // For each span, find the appropriate break engine, and ask it to find + // any breaks within the span. + + utext_setNativeIndex(text, rangeStart); + UChar32 c = utext_current32(text); + category = ucptrie_get(fBI->fData->fTrie, c); + uint32_t dictStart = fBI->fData->fForwardTable->fDictCategoriesStart; + + while(U_SUCCESS(status)) { + while((current = (int32_t)UTEXT_GETNATIVEINDEX(text)) < rangeEnd + && (category < dictStart)) { + utext_next32(text); // TODO: cleaner loop structure. + c = utext_current32(text); + category = ucptrie_get(fBI->fData->fTrie, c); + } + if (current >= rangeEnd) { + break; + } + + // We now have a dictionary character. Get the appropriate language object + // to deal with it. + const LanguageBreakEngine *lbe = fBI->getLanguageBreakEngine(c); + + // Ask the language object if there are any breaks. It will add them to the cache and + // leave the text pointer on the other side of its range, ready to search for the next one. + if (lbe != NULL) { + foundBreakCount += lbe->findBreaks(text, rangeStart, rangeEnd, fBreaks); + } + + // Reload the loop variables for the next go-round + c = utext_current32(text); + category = ucptrie_get(fBI->fData->fTrie, c); + } + + // If we found breaks, ensure that the first and last entries are + // the original starting and ending position. And initialize the + // cache iteration position to the first entry. + + // printf("foundBreakCount = %d\n", foundBreakCount); + if (foundBreakCount > 0) { + U_ASSERT(foundBreakCount == fBreaks.size()); + if (startPos < fBreaks.elementAti(0)) { + // The dictionary did not place a boundary at the start of the segment of text. + // Add one now. This should not commonly happen, but it would be easy for interactions + // of the rules for dictionary segments and the break engine implementations to + // inadvertently cause it. Cover it here, just in case. + fBreaks.insertElementAt(startPos, 0, status); + } + if (endPos > fBreaks.peeki()) { + fBreaks.push(endPos, status); + } + fPositionInCache = 0; + // Note: Dictionary matching may extend beyond the original limit. + fStart = fBreaks.elementAti(0); + fLimit = fBreaks.peeki(); + } else { + // there were no language-based breaks, even though the segment contained + // dictionary characters. Subsequent attempts to fetch boundaries from the dictionary cache + // for this range will fail, and the calling code will fall back to the rule based boundaries. + } +} + + +/* + * BreakCache implemetation + */ + +RuleBasedBreakIterator::BreakCache::BreakCache(RuleBasedBreakIterator *bi, UErrorCode &status) : + fBI(bi), fSideBuffer(status) { + reset(); +} + + +RuleBasedBreakIterator::BreakCache::~BreakCache() { +} + + +void RuleBasedBreakIterator::BreakCache::reset(int32_t pos, int32_t ruleStatus) { + fStartBufIdx = 0; + fEndBufIdx = 0; + fTextIdx = pos; + fBufIdx = 0; + fBoundaries[0] = pos; + fStatuses[0] = (uint16_t)ruleStatus; +} + + +int32_t RuleBasedBreakIterator::BreakCache::current() { + fBI->fPosition = fTextIdx; + fBI->fRuleStatusIndex = fStatuses[fBufIdx]; + fBI->fDone = FALSE; + return fTextIdx; +} + + +void RuleBasedBreakIterator::BreakCache::following(int32_t startPos, UErrorCode &status) { + if (U_FAILURE(status)) { + return; + } + if (startPos == fTextIdx || seek(startPos) || populateNear(startPos, status)) { + // startPos is in the cache. Do a next() from that position. + // TODO: an awkward set of interactions with bi->fDone + // seek() does not clear it; it can't because of interactions with populateNear(). + // next() does not clear it in the fast-path case, where everything matters. Maybe it should. + // So clear it here, for the case where seek() succeeded on an iterator that had previously run off the end. + fBI->fDone = false; + next(); + } + return; +} + + +void RuleBasedBreakIterator::BreakCache::preceding(int32_t startPos, UErrorCode &status) { + if (U_FAILURE(status)) { + return; + } + if (startPos == fTextIdx || seek(startPos) || populateNear(startPos, status)) { + if (startPos == fTextIdx) { + previous(status); + } else { + // seek() leaves the BreakCache positioned at the preceding boundary + // if the requested position is between two bounaries. + // current() pushes the BreakCache position out to the BreakIterator itself. + U_ASSERT(startPos > fTextIdx); + current(); + } + } + return; +} + + +/* + * Out-of-line code for BreakCache::next(). + * Cache does not already contain the boundary + */ +void RuleBasedBreakIterator::BreakCache::nextOL() { + fBI->fDone = !populateFollowing(); + fBI->fPosition = fTextIdx; + fBI->fRuleStatusIndex = fStatuses[fBufIdx]; + return; +} + + +void RuleBasedBreakIterator::BreakCache::previous(UErrorCode &status) { + if (U_FAILURE(status)) { + return; + } + int32_t initialBufIdx = fBufIdx; + if (fBufIdx == fStartBufIdx) { + // At start of cache. Prepend to it. + populatePreceding(status); + } else { + // Cache already holds the next boundary + fBufIdx = modChunkSize(fBufIdx - 1); + fTextIdx = fBoundaries[fBufIdx]; + } + fBI->fDone = (fBufIdx == initialBufIdx); + fBI->fPosition = fTextIdx; + fBI->fRuleStatusIndex = fStatuses[fBufIdx]; + return; +} + + +UBool RuleBasedBreakIterator::BreakCache::seek(int32_t pos) { + if (pos < fBoundaries[fStartBufIdx] || pos > fBoundaries[fEndBufIdx]) { + return FALSE; + } + if (pos == fBoundaries[fStartBufIdx]) { + // Common case: seek(0), from BreakIterator::first() + fBufIdx = fStartBufIdx; + fTextIdx = fBoundaries[fBufIdx]; + return TRUE; + } + if (pos == fBoundaries[fEndBufIdx]) { + fBufIdx = fEndBufIdx; + fTextIdx = fBoundaries[fBufIdx]; + return TRUE; + } + + int32_t min = fStartBufIdx; + int32_t max = fEndBufIdx; + while (min != max) { + int32_t probe = (min + max + (min>max ? CACHE_SIZE : 0)) / 2; + probe = modChunkSize(probe); + if (fBoundaries[probe] > pos) { + max = probe; + } else { + min = modChunkSize(probe + 1); + } + } + U_ASSERT(fBoundaries[max] > pos); + fBufIdx = modChunkSize(max - 1); + fTextIdx = fBoundaries[fBufIdx]; + U_ASSERT(fTextIdx <= pos); + return TRUE; +} + + +UBool RuleBasedBreakIterator::BreakCache::populateNear(int32_t position, UErrorCode &status) { + if (U_FAILURE(status)) { + return FALSE; + } + U_ASSERT(position < fBoundaries[fStartBufIdx] || position > fBoundaries[fEndBufIdx]); + + // Find a boundary somewhere in the vicinity of the requested position. + // Depending on the safe rules and the text data, it could be either before, at, or after + // the requested position. + + + // If the requested position is not near already cached positions, clear the existing cache, + // find a near-by boundary and begin new cache contents there. + + if ((position < fBoundaries[fStartBufIdx] - 15) || position > (fBoundaries[fEndBufIdx] + 15)) { + int32_t aBoundary = 0; + int32_t ruleStatusIndex = 0; + if (position > 20) { + int32_t backupPos = fBI->handleSafePrevious(position); + + if (backupPos > 0) { + // Advance to the boundary following the backup position. + // There is a complication: the safe reverse rules identify pairs of code points + // that are safe. If advancing from the safe point moves forwards by less than + // two code points, we need to advance one more time to ensure that the boundary + // is good, including a correct rules status value. + // + fBI->fPosition = backupPos; + aBoundary = fBI->handleNext(); + if (aBoundary <= backupPos + 4) { + // +4 is a quick test for possibly having advanced only one codepoint. + // Four being the length of the longest potential code point, a supplementary in UTF-8 + utext_setNativeIndex(&fBI->fText, aBoundary); + if (backupPos == utext_getPreviousNativeIndex(&fBI->fText)) { + // The initial handleNext() only advanced by a single code point. Go again. + aBoundary = fBI->handleNext(); // Safe rules identify safe pairs. + } + } + ruleStatusIndex = fBI->fRuleStatusIndex; + } + } + reset(aBoundary, ruleStatusIndex); // Reset cache to hold aBoundary as a single starting point. + } + + // Fill in boundaries between existing cache content and the new requested position. + + if (fBoundaries[fEndBufIdx] < position) { + // The last position in the cache precedes the requested position. + // Add following position(s) to the cache. + while (fBoundaries[fEndBufIdx] < position) { + if (!populateFollowing()) { + UPRV_UNREACHABLE; + } + } + fBufIdx = fEndBufIdx; // Set iterator position to the end of the buffer. + fTextIdx = fBoundaries[fBufIdx]; // Required because populateFollowing may add extra boundaries. + while (fTextIdx > position) { // Move backwards to a position at or preceding the requested pos. + previous(status); + } + return true; + } + + if (fBoundaries[fStartBufIdx] > position) { + // The first position in the cache is beyond the requested position. + // back up more until we get a boundary <= the requested position. + while (fBoundaries[fStartBufIdx] > position) { + populatePreceding(status); + } + fBufIdx = fStartBufIdx; // Set iterator position to the start of the buffer. + fTextIdx = fBoundaries[fBufIdx]; // Required because populatePreceding may add extra boundaries. + while (fTextIdx < position) { // Move forwards to a position at or following the requested pos. + next(); + } + if (fTextIdx > position) { + // If position is not itself a boundary, the next() loop above will overshoot. + // Back up one, leaving cache position at the boundary preceding the requested position. + previous(status); + } + return true; + } + + U_ASSERT(fTextIdx == position); + return true; +} + + + +UBool RuleBasedBreakIterator::BreakCache::populateFollowing() { + int32_t fromPosition = fBoundaries[fEndBufIdx]; + int32_t fromRuleStatusIdx = fStatuses[fEndBufIdx]; + int32_t pos = 0; + int32_t ruleStatusIdx = 0; + + if (fBI->fDictionaryCache->following(fromPosition, &pos, &ruleStatusIdx)) { + addFollowing(pos, ruleStatusIdx, UpdateCachePosition); + return TRUE; + } + + fBI->fPosition = fromPosition; + pos = fBI->handleNext(); + if (pos == UBRK_DONE) { + return FALSE; + } + + ruleStatusIdx = fBI->fRuleStatusIndex; + if (fBI->fDictionaryCharCount > 0) { + // The text segment obtained from the rules includes dictionary characters. + // Subdivide it, with subdivided results going into the dictionary cache. + fBI->fDictionaryCache->populateDictionary(fromPosition, pos, fromRuleStatusIdx, ruleStatusIdx); + if (fBI->fDictionaryCache->following(fromPosition, &pos, &ruleStatusIdx)) { + addFollowing(pos, ruleStatusIdx, UpdateCachePosition); + return TRUE; + // TODO: may want to move a sizable chunk of dictionary cache to break cache at this point. + // But be careful with interactions with populateNear(). + } + } + + // Rule based segment did not include dictionary characters. + // Or, it did contain dictionary chars, but the dictionary segmenter didn't handle them, + // meaning that we didn't take the return, above. + // Add its end point to the cache. + addFollowing(pos, ruleStatusIdx, UpdateCachePosition); + + // Add several non-dictionary boundaries at this point, to optimize straight forward iteration. + // (subsequent calls to BreakIterator::next() will take the fast path, getting cached results. + // + for (int count=0; count<6; ++count) { + pos = fBI->handleNext(); + if (pos == UBRK_DONE || fBI->fDictionaryCharCount > 0) { + break; + } + addFollowing(pos, fBI->fRuleStatusIndex, RetainCachePosition); + } + + return TRUE; +} + + +UBool RuleBasedBreakIterator::BreakCache::populatePreceding(UErrorCode &status) { + if (U_FAILURE(status)) { + return FALSE; + } + + int32_t fromPosition = fBoundaries[fStartBufIdx]; + if (fromPosition == 0) { + return FALSE; + } + + int32_t position = 0; + int32_t positionStatusIdx = 0; + + if (fBI->fDictionaryCache->preceding(fromPosition, &position, &positionStatusIdx)) { + addPreceding(position, positionStatusIdx, UpdateCachePosition); + return TRUE; + } + + int32_t backupPosition = fromPosition; + + // Find a boundary somewhere preceding the first already-cached boundary + do { + backupPosition = backupPosition - 30; + if (backupPosition <= 0) { + backupPosition = 0; + } else { + backupPosition = fBI->handleSafePrevious(backupPosition); + } + if (backupPosition == UBRK_DONE || backupPosition == 0) { + position = 0; + positionStatusIdx = 0; + } else { + // Advance to the boundary following the backup position. + // There is a complication: the safe reverse rules identify pairs of code points + // that are safe. If advancing from the safe point moves forwards by less than + // two code points, we need to advance one more time to ensure that the boundary + // is good, including a correct rules status value. + // + fBI->fPosition = backupPosition; + position = fBI->handleNext(); + if (position <= backupPosition + 4) { + // +4 is a quick test for possibly having advanced only one codepoint. + // Four being the length of the longest potential code point, a supplementary in UTF-8 + utext_setNativeIndex(&fBI->fText, position); + if (backupPosition == utext_getPreviousNativeIndex(&fBI->fText)) { + // The initial handleNext() only advanced by a single code point. Go again. + position = fBI->handleNext(); // Safe rules identify safe pairs. + } + } + positionStatusIdx = fBI->fRuleStatusIndex; + } + } while (position >= fromPosition); + + // Find boundaries between the one we just located and the first already-cached boundary + // Put them in a side buffer, because we don't yet know where they will fall in the circular cache buffer.. + + fSideBuffer.removeAllElements(); + fSideBuffer.addElement(position, status); + fSideBuffer.addElement(positionStatusIdx, status); + + do { + int32_t prevPosition = fBI->fPosition = position; + int32_t prevStatusIdx = positionStatusIdx; + position = fBI->handleNext(); + positionStatusIdx = fBI->fRuleStatusIndex; + if (position == UBRK_DONE) { + break; + } + + UBool segmentHandledByDictionary = FALSE; + if (fBI->fDictionaryCharCount != 0) { + // Segment from the rules includes dictionary characters. + // Subdivide it, with subdivided results going into the dictionary cache. + int32_t dictSegEndPosition = position; + fBI->fDictionaryCache->populateDictionary(prevPosition, dictSegEndPosition, prevStatusIdx, positionStatusIdx); + while (fBI->fDictionaryCache->following(prevPosition, &position, &positionStatusIdx)) { + segmentHandledByDictionary = true; + U_ASSERT(position > prevPosition); + if (position >= fromPosition) { + break; + } + U_ASSERT(position <= dictSegEndPosition); + fSideBuffer.addElement(position, status); + fSideBuffer.addElement(positionStatusIdx, status); + prevPosition = position; + } + U_ASSERT(position==dictSegEndPosition || position>=fromPosition); + } + + if (!segmentHandledByDictionary && position < fromPosition) { + fSideBuffer.addElement(position, status); + fSideBuffer.addElement(positionStatusIdx, status); + } + } while (position < fromPosition); + + // Move boundaries from the side buffer to the main circular buffer. + UBool success = FALSE; + if (!fSideBuffer.isEmpty()) { + positionStatusIdx = fSideBuffer.popi(); + position = fSideBuffer.popi(); + addPreceding(position, positionStatusIdx, UpdateCachePosition); + success = TRUE; + } + + while (!fSideBuffer.isEmpty()) { + positionStatusIdx = fSideBuffer.popi(); + position = fSideBuffer.popi(); + if (!addPreceding(position, positionStatusIdx, RetainCachePosition)) { + // No space in circular buffer to hold a new preceding result while + // also retaining the current cache (iteration) position. + // Bailing out is safe; the cache will refill again if needed. + break; + } + } + + return success; +} + + +void RuleBasedBreakIterator::BreakCache::addFollowing(int32_t position, int32_t ruleStatusIdx, UpdatePositionValues update) { + U_ASSERT(position > fBoundaries[fEndBufIdx]); + U_ASSERT(ruleStatusIdx <= UINT16_MAX); + int32_t nextIdx = modChunkSize(fEndBufIdx + 1); + if (nextIdx == fStartBufIdx) { + fStartBufIdx = modChunkSize(fStartBufIdx + 6); // TODO: experiment. Probably revert to 1. + } + fBoundaries[nextIdx] = position; + fStatuses[nextIdx] = static_cast<uint16_t>(ruleStatusIdx); + fEndBufIdx = nextIdx; + if (update == UpdateCachePosition) { + // Set current position to the newly added boundary. + fBufIdx = nextIdx; + fTextIdx = position; + } else { + // Retaining the original cache position. + // Check if the added boundary wraps around the buffer, and would over-write the original position. + // It's the responsibility of callers of this function to not add too many. + U_ASSERT(nextIdx != fBufIdx); + } +} + +bool RuleBasedBreakIterator::BreakCache::addPreceding(int32_t position, int32_t ruleStatusIdx, UpdatePositionValues update) { + U_ASSERT(position < fBoundaries[fStartBufIdx]); + U_ASSERT(ruleStatusIdx <= UINT16_MAX); + int32_t nextIdx = modChunkSize(fStartBufIdx - 1); + if (nextIdx == fEndBufIdx) { + if (fBufIdx == fEndBufIdx && update == RetainCachePosition) { + // Failure. The insertion of the new boundary would claim the buffer position that is the + // current iteration position. And we also want to retain the current iteration position. + // (The buffer is already completely full of entries that precede the iteration position.) + return false; + } + fEndBufIdx = modChunkSize(fEndBufIdx - 1); + } + fBoundaries[nextIdx] = position; + fStatuses[nextIdx] = static_cast<uint16_t>(ruleStatusIdx); + fStartBufIdx = nextIdx; + if (update == UpdateCachePosition) { + fBufIdx = nextIdx; + fTextIdx = position; + } + return true; +} + + +void RuleBasedBreakIterator::BreakCache::dumpCache() { +#ifdef RBBI_DEBUG + RBBIDebugPrintf("fTextIdx:%d fBufIdx:%d\n", fTextIdx, fBufIdx); + for (int32_t i=fStartBufIdx; ; i=modChunkSize(i+1)) { + RBBIDebugPrintf("%d %d\n", i, fBoundaries[i]); + if (i == fEndBufIdx) { + break; + } + } +#endif +} + +U_NAMESPACE_END + +#endif // #if !UCONFIG_NO_BREAK_ITERATION diff --git a/thirdparty/icu4c/common/rbbi_cache.h b/thirdparty/icu4c/common/rbbi_cache.h new file mode 100644 index 0000000000..597312e85c --- /dev/null +++ b/thirdparty/icu4c/common/rbbi_cache.h @@ -0,0 +1,203 @@ +// Copyright (C) 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// file: rbbi_cache.h +// +#ifndef RBBI_CACHE_H +#define RBBI_CACHE_H + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_BREAK_ITERATION + +#include "unicode/rbbi.h" +#include "unicode/uobject.h" + +#include "uvectr32.h" + +U_NAMESPACE_BEGIN + +/* DictionaryCache stores the boundaries obtained from a run of dictionary characters. + * Dictionary boundaries are moved first to this cache, then from here + * to the main BreakCache, where they may inter-leave with non-dictionary + * boundaries. The public BreakIterator API always fetches directly + * from the main BreakCache, not from here. + * + * In common situations, the number of boundaries in a single dictionary run + * should be quite small, it will be terminated by punctuation, spaces, + * or any other non-dictionary characters. The main BreakCache may end + * up with boundaries from multiple dictionary based runs. + * + * The boundaries are stored in a simple ArrayList (vector), with the + * assumption that they will be accessed sequentially. + */ +class RuleBasedBreakIterator::DictionaryCache: public UMemory { + public: + DictionaryCache(RuleBasedBreakIterator *bi, UErrorCode &status); + ~DictionaryCache(); + + void reset(); + + UBool following(int32_t fromPos, int32_t *pos, int32_t *statusIndex); + UBool preceding(int32_t fromPos, int32_t *pos, int32_t *statusIndex); + + /** + * Populate the cache with the dictionary based boundaries within a region of text. + * @param startPos The start position of a range of text + * @param endPos The end position of a range of text + * @param firstRuleStatus The rule status index that applies to the break at startPos + * @param otherRuleStatus The rule status index that applies to boundaries other than startPos + * @internal + */ + void populateDictionary(int32_t startPos, int32_t endPos, + int32_t firstRuleStatus, int32_t otherRuleStatus); + + + + RuleBasedBreakIterator *fBI; + + UVector32 fBreaks; // A vector containing the boundaries. + int32_t fPositionInCache; // Index in fBreaks of last boundary returned by following() + // or preceding(). Optimizes sequential access. + int32_t fStart; // Text position of first boundary in cache. + int32_t fLimit; // Last boundary in cache. Which is the limit of the + // text segment being handled by the dictionary. + int32_t fFirstRuleStatusIndex; // Rule status info for first boundary. + int32_t fOtherRuleStatusIndex; // Rule status info for 2nd through last boundaries. +}; + + +/* + * class BreakCache + * + * Cache of break boundary positions and rule status values. + * Break iterator API functions, next(), previous(), etc., will use cached results + * when possible, and otherwise cache new results as they are obtained. + * + * Uniformly caches both dictionary and rule based (non-dictionary) boundaries. + * + * The cache is implemented as a single circular buffer. + */ + +/* + * size of the circular cache buffer. + */ + +class RuleBasedBreakIterator::BreakCache: public UMemory { + public: + BreakCache(RuleBasedBreakIterator *bi, UErrorCode &status); + virtual ~BreakCache(); + void reset(int32_t pos = 0, int32_t ruleStatus = 0); + void next() { if (fBufIdx == fEndBufIdx) { + nextOL(); + } else { + fBufIdx = modChunkSize(fBufIdx + 1); + fTextIdx = fBI->fPosition = fBoundaries[fBufIdx]; + fBI->fRuleStatusIndex = fStatuses[fBufIdx]; + } + } + + + void nextOL(); + void previous(UErrorCode &status); + + // Move the iteration state to the position following the startPosition. + // Input position must be pinned to the input length. + void following(int32_t startPosition, UErrorCode &status); + + void preceding(int32_t startPosition, UErrorCode &status); + + /* + * Update the state of the public BreakIterator (fBI) to reflect the + * current state of the break iterator cache (this). + */ + int32_t current(); + + /** + * Add boundaries to the cache near the specified position. + * The given position need not be a boundary itself. + * The input position must be within the range of the text, and + * on a code point boundary. + * If the requested position is a break boundary, leave the iteration + * position on it. + * If the requested position is not a boundary, leave the iteration + * position on the preceding boundary and include both the + * preceding and following boundaries in the cache. + * Additional boundaries, either preceding or following, may be added + * to the cache as a side effect. + * + * Return false if the operation failed. + */ + UBool populateNear(int32_t position, UErrorCode &status); + + /** + * Add boundary(s) to the cache following the current last boundary. + * Return false if at the end of the text, and no more boundaries can be added. + * Leave iteration position at the first newly added boundary, or unchanged if no boundary was added. + */ + UBool populateFollowing(); + + /** + * Add one or more boundaries to the cache preceding the first currently cached boundary. + * Leave the iteration position on the first added boundary. + * Return false if no boundaries could be added (if at the start of the text.) + */ + UBool populatePreceding(UErrorCode &status); + + enum UpdatePositionValues { + RetainCachePosition = 0, + UpdateCachePosition = 1 + }; + + /* + * Add the boundary following the current position. + * The current position can be left as it was, or changed to the newly added boundary, + * as specified by the update parameter. + */ + void addFollowing(int32_t position, int32_t ruleStatusIdx, UpdatePositionValues update); + + + /* + * Add the boundary preceding the current position. + * The current position can be left as it was, or changed to the newly added boundary, + * as specified by the update parameter. + */ + bool addPreceding(int32_t position, int32_t ruleStatusIdx, UpdatePositionValues update); + + /** + * Set the cache position to the specified position, or, if the position + * falls between to cached boundaries, to the preceding boundary. + * Fails if the requested position is outside of the range of boundaries currently held by the cache. + * The startPosition must be on a code point boundary. + * + * Return true if successful, false if the specified position is after + * the last cached boundary or before the first. + */ + UBool seek(int32_t startPosition); + + void dumpCache(); + + private: + static inline int32_t modChunkSize(int index) { return index & (CACHE_SIZE - 1); } + + static constexpr int32_t CACHE_SIZE = 128; + static_assert((CACHE_SIZE & (CACHE_SIZE-1)) == 0, "CACHE_SIZE must be power of two."); + + RuleBasedBreakIterator *fBI; + int32_t fStartBufIdx; + int32_t fEndBufIdx; // inclusive + + int32_t fTextIdx; + int32_t fBufIdx; + + int32_t fBoundaries[CACHE_SIZE]; + uint16_t fStatuses[CACHE_SIZE]; + + UVector32 fSideBuffer; +}; + +U_NAMESPACE_END + +#endif // #if !UCONFIG_NO_BREAK_ITERATION + +#endif // RBBI_CACHE_H diff --git a/thirdparty/icu4c/common/rbbidata.cpp b/thirdparty/icu4c/common/rbbidata.cpp new file mode 100644 index 0000000000..193acafc44 --- /dev/null +++ b/thirdparty/icu4c/common/rbbidata.cpp @@ -0,0 +1,476 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +*************************************************************************** +* Copyright (C) 1999-2014 International Business Machines Corporation * +* and others. All rights reserved. * +*************************************************************************** +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_BREAK_ITERATION + +#include "unicode/ucptrie.h" +#include "unicode/utypes.h" +#include "rbbidata.h" +#include "rbbirb.h" +#include "udatamem.h" +#include "cmemory.h" +#include "cstring.h" +#include "umutex.h" + +#include "uassert.h" + + +U_NAMESPACE_BEGIN + +//----------------------------------------------------------------------------- +// +// Constructors. +// +//----------------------------------------------------------------------------- +RBBIDataWrapper::RBBIDataWrapper(const RBBIDataHeader *data, UErrorCode &status) { + init0(); + init(data, status); +} + +RBBIDataWrapper::RBBIDataWrapper(const RBBIDataHeader *data, enum EDontAdopt, UErrorCode &status) { + init0(); + init(data, status); + fDontFreeData = TRUE; +} + +RBBIDataWrapper::RBBIDataWrapper(UDataMemory* udm, UErrorCode &status) { + init0(); + if (U_FAILURE(status)) { + return; + } + const DataHeader *dh = udm->pHeader; + int32_t headerSize = dh->dataHeader.headerSize; + if ( !(headerSize >= 20 && + dh->info.isBigEndian == U_IS_BIG_ENDIAN && + dh->info.charsetFamily == U_CHARSET_FAMILY && + dh->info.dataFormat[0] == 0x42 && // dataFormat="Brk " + dh->info.dataFormat[1] == 0x72 && + dh->info.dataFormat[2] == 0x6b && + dh->info.dataFormat[3] == 0x20 && + isDataVersionAcceptable(dh->info.formatVersion)) + ) { + status = U_INVALID_FORMAT_ERROR; + return; + } + const char *dataAsBytes = reinterpret_cast<const char *>(dh); + const RBBIDataHeader *rbbidh = reinterpret_cast<const RBBIDataHeader *>(dataAsBytes + headerSize); + init(rbbidh, status); + fUDataMem = udm; +} + +UBool RBBIDataWrapper::isDataVersionAcceptable(const UVersionInfo version) { + return RBBI_DATA_FORMAT_VERSION[0] == version[0]; +} + + +//----------------------------------------------------------------------------- +// +// init(). Does most of the work of construction, shared between the +// constructors. +// +//----------------------------------------------------------------------------- +void RBBIDataWrapper::init0() { + fHeader = NULL; + fForwardTable = NULL; + fReverseTable = NULL; + fRuleSource = NULL; + fRuleStatusTable = NULL; + fTrie = NULL; + fUDataMem = NULL; + fRefCount = 0; + fDontFreeData = TRUE; +} + +void RBBIDataWrapper::init(const RBBIDataHeader *data, UErrorCode &status) { + if (U_FAILURE(status)) { + return; + } + fHeader = data; + if (fHeader->fMagic != 0xb1a0 || !isDataVersionAcceptable(fHeader->fFormatVersion)) { + status = U_INVALID_FORMAT_ERROR; + return; + } + // Note: in ICU version 3.2 and earlier, there was a formatVersion 1 + // that is no longer supported. At that time fFormatVersion was + // an int32_t field, rather than an array of 4 bytes. + + fDontFreeData = FALSE; + if (data->fFTableLen != 0) { + fForwardTable = (RBBIStateTable *)((char *)data + fHeader->fFTable); + } + if (data->fRTableLen != 0) { + fReverseTable = (RBBIStateTable *)((char *)data + fHeader->fRTable); + } + + fTrie = ucptrie_openFromBinary(UCPTRIE_TYPE_FAST, + UCPTRIE_VALUE_BITS_ANY, + (uint8_t *)data + fHeader->fTrie, + fHeader->fTrieLen, + nullptr, // *actual length + &status); + if (U_FAILURE(status)) { + return; + } + + UCPTrieValueWidth width = ucptrie_getValueWidth(fTrie); + if (!(width == UCPTRIE_VALUE_BITS_8 || width == UCPTRIE_VALUE_BITS_16)) { + status = U_INVALID_FORMAT_ERROR; + return; + } + + fRuleSource = ((char *)data + fHeader->fRuleSource); + fRuleString = UnicodeString::fromUTF8(StringPiece(fRuleSource, fHeader->fRuleSourceLen)); + U_ASSERT(data->fRuleSourceLen > 0); + + fRuleStatusTable = (int32_t *)((char *)data + fHeader->fStatusTable); + fStatusMaxIdx = data->fStatusTableLen / sizeof(int32_t); + + fRefCount = 1; + +#ifdef RBBI_DEBUG + char *debugEnv = getenv("U_RBBIDEBUG"); + if (debugEnv && uprv_strstr(debugEnv, "data")) {this->printData();} +#endif +} + + +//----------------------------------------------------------------------------- +// +// Destructor. Don't call this - use removeReference() instead. +// +//----------------------------------------------------------------------------- +RBBIDataWrapper::~RBBIDataWrapper() { + U_ASSERT(fRefCount == 0); + ucptrie_close(fTrie); + fTrie = nullptr; + if (fUDataMem) { + udata_close(fUDataMem); + } else if (!fDontFreeData) { + uprv_free((void *)fHeader); + } +} + + + +//----------------------------------------------------------------------------- +// +// Operator == Consider two RBBIDataWrappers to be equal if they +// refer to the same underlying data. Although +// the data wrappers are normally shared between +// iterator instances, it's possible to independently +// open the same data twice, and get two instances, which +// should still be ==. +// +//----------------------------------------------------------------------------- +UBool RBBIDataWrapper::operator ==(const RBBIDataWrapper &other) const { + if (fHeader == other.fHeader) { + return TRUE; + } + if (fHeader->fLength != other.fHeader->fLength) { + return FALSE; + } + if (uprv_memcmp(fHeader, other.fHeader, fHeader->fLength) == 0) { + return TRUE; + } + return FALSE; +} + +int32_t RBBIDataWrapper::hashCode() { + return fHeader->fFTableLen; +} + + + +//----------------------------------------------------------------------------- +// +// Reference Counting. A single RBBIDataWrapper object is shared among +// however many RulesBasedBreakIterator instances are +// referencing the same data. +// +//----------------------------------------------------------------------------- +void RBBIDataWrapper::removeReference() { + if (umtx_atomic_dec(&fRefCount) == 0) { + delete this; + } +} + + +RBBIDataWrapper *RBBIDataWrapper::addReference() { + umtx_atomic_inc(&fRefCount); + return this; +} + + + +//----------------------------------------------------------------------------- +// +// getRuleSourceString +// +//----------------------------------------------------------------------------- +const UnicodeString &RBBIDataWrapper::getRuleSourceString() const { + return fRuleString; +} + + +//----------------------------------------------------------------------------- +// +// print - debugging function to dump the runtime data tables. +// +//----------------------------------------------------------------------------- +#ifdef RBBI_DEBUG +void RBBIDataWrapper::printTable(const char *heading, const RBBIStateTable *table) { + uint32_t c; + uint32_t s; + + RBBIDebugPrintf("%s\n", heading); + + RBBIDebugPrintf(" fDictCategoriesStart: %d\n", table->fDictCategoriesStart); + RBBIDebugPrintf(" fLookAheadResultsSize: %d\n", table->fLookAheadResultsSize); + RBBIDebugPrintf(" Flags: %4x RBBI_LOOKAHEAD_HARD_BREAK=%s RBBI_BOF_REQUIRED=%s RBBI_8BITS_ROWS=%s\n", + table->fFlags, + table->fFlags & RBBI_LOOKAHEAD_HARD_BREAK ? "T" : "F", + table->fFlags & RBBI_BOF_REQUIRED ? "T" : "F", + table->fFlags & RBBI_8BITS_ROWS ? "T" : "F"); + RBBIDebugPrintf("\nState | Acc LA TagIx"); + for (c=0; c<fHeader->fCatCount; c++) {RBBIDebugPrintf("%3d ", c);} + RBBIDebugPrintf("\n------|---------------"); for (c=0;c<fHeader->fCatCount; c++) { + RBBIDebugPrintf("----"); + } + RBBIDebugPrintf("\n"); + + if (table == NULL) { + RBBIDebugPrintf(" N U L L T A B L E\n\n"); + return; + } + UBool use8Bits = table->fFlags & RBBI_8BITS_ROWS; + for (s=0; s<table->fNumStates; s++) { + RBBIStateTableRow *row = (RBBIStateTableRow *) + (table->fTableData + (table->fRowLen * s)); + if (use8Bits) { + RBBIDebugPrintf("%4d | %3d %3d %3d ", s, row->r8.fAccepting, row->r8.fLookAhead, row->r8.fTagsIdx); + for (c=0; c<fHeader->fCatCount; c++) { + RBBIDebugPrintf("%3d ", row->r8.fNextState[c]); + } + } else { + RBBIDebugPrintf("%4d | %3d %3d %3d ", s, row->r16.fAccepting, row->r16.fLookAhead, row->r16.fTagsIdx); + for (c=0; c<fHeader->fCatCount; c++) { + RBBIDebugPrintf("%3d ", row->r16.fNextState[c]); + } + } + RBBIDebugPrintf("\n"); + } + RBBIDebugPrintf("\n"); +} +#endif + + +void RBBIDataWrapper::printData() { +#ifdef RBBI_DEBUG + RBBIDebugPrintf("RBBI Data at %p\n", (void *)fHeader); + RBBIDebugPrintf(" Version = {%d %d %d %d}\n", fHeader->fFormatVersion[0], fHeader->fFormatVersion[1], + fHeader->fFormatVersion[2], fHeader->fFormatVersion[3]); + RBBIDebugPrintf(" total length of data = %d\n", fHeader->fLength); + RBBIDebugPrintf(" number of character categories = %d\n\n", fHeader->fCatCount); + + printTable("Forward State Transition Table", fForwardTable); + printTable("Reverse State Transition Table", fReverseTable); + + RBBIDebugPrintf("\nOrignal Rules source:\n"); + for (int32_t c=0; fRuleSource[c] != 0; c++) { + RBBIDebugPrintf("%c", fRuleSource[c]); + } + RBBIDebugPrintf("\n\n"); +#endif +} + + +U_NAMESPACE_END +U_NAMESPACE_USE + +//----------------------------------------------------------------------------- +// +// ubrk_swap - byte swap and char encoding swap of RBBI data +// +//----------------------------------------------------------------------------- + +U_CAPI int32_t U_EXPORT2 +ubrk_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData, + UErrorCode *status) { + + if (status == NULL || U_FAILURE(*status)) { + return 0; + } + if(ds==NULL || inData==NULL || length<-1 || (length>0 && outData==NULL)) { + *status=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + // + // Check that the data header is for for break data. + // (Header contents are defined in genbrk.cpp) + // + const UDataInfo *pInfo = (const UDataInfo *)((const char *)inData+4); + if(!( pInfo->dataFormat[0]==0x42 && /* dataFormat="Brk " */ + pInfo->dataFormat[1]==0x72 && + pInfo->dataFormat[2]==0x6b && + pInfo->dataFormat[3]==0x20 && + RBBIDataWrapper::isDataVersionAcceptable(pInfo->formatVersion) )) { + udata_printError(ds, "ubrk_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized\n", + pInfo->dataFormat[0], pInfo->dataFormat[1], + pInfo->dataFormat[2], pInfo->dataFormat[3], + pInfo->formatVersion[0]); + *status=U_UNSUPPORTED_ERROR; + return 0; + } + + // + // Swap the data header. (This is the generic ICU Data Header, not the RBBI Specific + // RBBIDataHeader). This swap also conveniently gets us + // the size of the ICU d.h., which lets us locate the start + // of the RBBI specific data. + // + int32_t headerSize=udata_swapDataHeader(ds, inData, length, outData, status); + + + // + // Get the RRBI Data Header, and check that it appears to be OK. + // + const uint8_t *inBytes =(const uint8_t *)inData+headerSize; + RBBIDataHeader *rbbiDH = (RBBIDataHeader *)inBytes; + if (ds->readUInt32(rbbiDH->fMagic) != 0xb1a0 || + !RBBIDataWrapper::isDataVersionAcceptable(rbbiDH->fFormatVersion) || + ds->readUInt32(rbbiDH->fLength) < sizeof(RBBIDataHeader)) { + udata_printError(ds, "ubrk_swap(): RBBI Data header is invalid.\n"); + *status=U_UNSUPPORTED_ERROR; + return 0; + } + + // + // Prefight operation? Just return the size + // + int32_t breakDataLength = ds->readUInt32(rbbiDH->fLength); + int32_t totalSize = headerSize + breakDataLength; + if (length < 0) { + return totalSize; + } + + // + // Check that length passed in is consistent with length from RBBI data header. + // + if (length < totalSize) { + udata_printError(ds, "ubrk_swap(): too few bytes (%d after ICU Data header) for break data.\n", + breakDataLength); + *status=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + + + // + // Swap the Data. Do the data itself first, then the RBBI Data Header, because + // we need to reference the header to locate the data, and an + // inplace swap of the header leaves it unusable. + // + uint8_t *outBytes = (uint8_t *)outData + headerSize; + RBBIDataHeader *outputDH = (RBBIDataHeader *)outBytes; + + int32_t tableStartOffset; + int32_t tableLength; + + // + // If not swapping in place, zero out the output buffer before starting. + // Individual tables and other data items within are aligned to 8 byte boundaries + // when originally created. Any unused space between items needs to be zero. + // + if (inBytes != outBytes) { + uprv_memset(outBytes, 0, breakDataLength); + } + + // + // Each state table begins with several 32 bit fields. Calculate the size + // in bytes of these. + // + int32_t topSize = offsetof(RBBIStateTable, fTableData); + + // Forward state table. + tableStartOffset = ds->readUInt32(rbbiDH->fFTable); + tableLength = ds->readUInt32(rbbiDH->fFTableLen); + + if (tableLength > 0) { + RBBIStateTable *rbbiST = (RBBIStateTable *)(inBytes+tableStartOffset); + UBool use8Bits = ds->readUInt32(rbbiST->fFlags) & RBBI_8BITS_ROWS; + + ds->swapArray32(ds, inBytes+tableStartOffset, topSize, + outBytes+tableStartOffset, status); + + // Swap the state table if the table is in 16 bits. + if (use8Bits) { + if (outBytes != inBytes) { + uprv_memmove(outBytes+tableStartOffset+topSize, + inBytes+tableStartOffset+topSize, + tableLength-topSize); + } + } else { + ds->swapArray16(ds, inBytes+tableStartOffset+topSize, tableLength-topSize, + outBytes+tableStartOffset+topSize, status); + } + } + + // Reverse state table. Same layout as forward table, above. + tableStartOffset = ds->readUInt32(rbbiDH->fRTable); + tableLength = ds->readUInt32(rbbiDH->fRTableLen); + + if (tableLength > 0) { + RBBIStateTable *rbbiST = (RBBIStateTable *)(inBytes+tableStartOffset); + UBool use8Bits = ds->readUInt32(rbbiST->fFlags) & RBBI_8BITS_ROWS; + + ds->swapArray32(ds, inBytes+tableStartOffset, topSize, + outBytes+tableStartOffset, status); + + // Swap the state table if the table is in 16 bits. + if (use8Bits) { + if (outBytes != inBytes) { + uprv_memmove(outBytes+tableStartOffset+topSize, + inBytes+tableStartOffset+topSize, + tableLength-topSize); + } + } else { + ds->swapArray16(ds, inBytes+tableStartOffset+topSize, tableLength-topSize, + outBytes+tableStartOffset+topSize, status); + } + } + + // Trie table for character categories + ucptrie_swap(ds, inBytes+ds->readUInt32(rbbiDH->fTrie), ds->readUInt32(rbbiDH->fTrieLen), + outBytes+ds->readUInt32(rbbiDH->fTrie), status); + + // Source Rules Text. It's UTF8 data + if (outBytes != inBytes) { + uprv_memmove(outBytes+ds->readUInt32(rbbiDH->fRuleSource), + inBytes+ds->readUInt32(rbbiDH->fRuleSource), + ds->readUInt32(rbbiDH->fRuleSourceLen)); + } + + // Table of rule status values. It's all int_32 values + ds->swapArray32(ds, inBytes+ds->readUInt32(rbbiDH->fStatusTable), ds->readUInt32(rbbiDH->fStatusTableLen), + outBytes+ds->readUInt32(rbbiDH->fStatusTable), status); + + // And, last, the header. + // It is all int32_t values except for fFormataVersion, which is an array of four bytes. + // Swap the whole thing as int32_t, then re-swap the one field. + // + ds->swapArray32(ds, inBytes, sizeof(RBBIDataHeader), outBytes, status); + ds->swapArray32(ds, outputDH->fFormatVersion, 4, outputDH->fFormatVersion, status); + + return totalSize; +} + + +#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ diff --git a/thirdparty/icu4c/common/rbbidata.h b/thirdparty/icu4c/common/rbbidata.h new file mode 100644 index 0000000000..3749f16799 --- /dev/null +++ b/thirdparty/icu4c/common/rbbidata.h @@ -0,0 +1,212 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 1999-2014 International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: rbbidata.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* RBBI data formats Includes +* +* Structs that describes the format of the Binary RBBI data, +* as it is stored in ICU's data file. +* +* RBBIDataWrapper - Instances of this class sit between the +* raw data structs and the RulesBasedBreakIterator objects +* that are created by applications. The wrapper class +* provides reference counting for the underlying data, +* and direct pointers to data that would not otherwise +* be accessible without ugly pointer arithmetic. The +* wrapper does not attempt to provide any higher level +* abstractions for the data itself. +* +* There will be only one instance of RBBIDataWrapper for any +* set of RBBI run time data being shared by instances +* (clones) of RulesBasedBreakIterator. +*/ + +#ifndef __RBBIDATA_H__ +#define __RBBIDATA_H__ + +#include "unicode/utypes.h" +#include "unicode/udata.h" +#include "udataswp.h" + +/** + * Swap RBBI data. See udataswp.h. + * @internal + */ +U_CAPI int32_t U_EXPORT2 +ubrk_swap(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode); + +#ifdef __cplusplus + +#include "unicode/ucptrie.h" +#include "unicode/uobject.h" +#include "unicode/unistr.h" +#include "unicode/uversion.h" +#include "umutex.h" + + +U_NAMESPACE_BEGIN + +// The current RBBI data format version. +static const uint8_t RBBI_DATA_FORMAT_VERSION[] = {6, 0, 0, 0}; + +/* + * The following structs map exactly onto the raw data from ICU common data file. + */ +struct RBBIDataHeader { + uint32_t fMagic; /* == 0xbla0 */ + UVersionInfo fFormatVersion; /* Data Format. Same as the value in struct UDataInfo */ + /* if there is one associated with this data. */ + /* (version originates in rbbi, is copied to UDataInfo) */ + uint32_t fLength; /* Total length in bytes of this RBBI Data, */ + /* including all sections, not just the header. */ + uint32_t fCatCount; /* Number of character categories. */ + + /* */ + /* Offsets and sizes of each of the subsections within the RBBI data. */ + /* All offsets are bytes from the start of the RBBIDataHeader. */ + /* All sizes are in bytes. */ + /* */ + uint32_t fFTable; /* forward state transition table. */ + uint32_t fFTableLen; + uint32_t fRTable; /* Offset to the reverse state transition table. */ + uint32_t fRTableLen; + uint32_t fTrie; /* Offset to Trie data for character categories */ + uint32_t fTrieLen; + uint32_t fRuleSource; /* Offset to the source for for the break */ + uint32_t fRuleSourceLen; /* rules. Stored UChar *. */ + uint32_t fStatusTable; /* Offset to the table of rule status values */ + uint32_t fStatusTableLen; + + uint32_t fReserved[6]; /* Reserved for expansion */ + +}; + + + +template <typename T> +struct RBBIStateTableRowT { + T fAccepting; // Non-zero if this row is for an accepting state. + // Value 0: not an accepting state. + // 1: (ACCEPTING_UNCONDITIONAL) Unconditional Accepting state. + // >1: Look-ahead match has completed. + // Actual boundary position happened earlier. + // Value here == fLookAhead in earlier + // state, at actual boundary pos. + T fLookAhead; // Non-zero if this row is for a state that + // corresponds to a '/' in the rule source. + // Value is the same as the fAccepting + // value for the rule (which will appear + // in a different state. + T fTagsIdx; // Non-zero if this row covers a {tagged} position + // from a rule. Value is the index in the + // StatusTable of the set of matching + // tags (rule status values) + T fNextState[1]; // Next State, indexed by char category. + // Variable-length array declared with length 1 + // to disable bounds checkers. + // Array Size is actually fData->fHeader->fCatCount + // CAUTION: see RBBITableBuilder::getTableSize() + // before changing anything here. +}; + +typedef RBBIStateTableRowT<uint8_t> RBBIStateTableRow8; +typedef RBBIStateTableRowT<uint16_t> RBBIStateTableRow16; + +constexpr uint16_t ACCEPTING_UNCONDITIONAL = 1; // Value constant for RBBIStateTableRow::fAccepting + +union RBBIStateTableRow { + RBBIStateTableRow16 r16; + RBBIStateTableRow8 r8; +}; + +struct RBBIStateTable { + uint32_t fNumStates; // Number of states. + uint32_t fRowLen; // Length of a state table row, in bytes. + uint32_t fDictCategoriesStart; // Char category number of the first dictionary + // char class, or the the largest category number + 1 + // if there are no dictionary categories. + uint32_t fLookAheadResultsSize; // Size of run-time array required for holding + // look-ahead results. Indexed by row.fLookAhead. + uint32_t fFlags; // Option Flags for this state table. + char fTableData[1]; // First RBBIStateTableRow begins here. + // Variable-length array declared with length 1 + // to disable bounds checkers. + // (making it char[] simplifies ugly address + // arithmetic for indexing variable length rows.) +}; + +constexpr uint32_t RBBI_LOOKAHEAD_HARD_BREAK = 1; +constexpr uint32_t RBBI_BOF_REQUIRED = 2; +constexpr uint32_t RBBI_8BITS_ROWS = 4; + + +/* */ +/* The reference counting wrapper class */ +/* */ +class RBBIDataWrapper : public UMemory { +public: + enum EDontAdopt { + kDontAdopt + }; + RBBIDataWrapper(const RBBIDataHeader *data, UErrorCode &status); + RBBIDataWrapper(const RBBIDataHeader *data, enum EDontAdopt dontAdopt, UErrorCode &status); + RBBIDataWrapper(UDataMemory* udm, UErrorCode &status); + ~RBBIDataWrapper(); + + static UBool isDataVersionAcceptable(const UVersionInfo version); + + void init0(); + void init(const RBBIDataHeader *data, UErrorCode &status); + RBBIDataWrapper *addReference(); + void removeReference(); + UBool operator ==(const RBBIDataWrapper &other) const; + int32_t hashCode(); + const UnicodeString &getRuleSourceString() const; + void printData(); + void printTable(const char *heading, const RBBIStateTable *table); + + /* */ + /* Pointers to items within the data */ + /* */ + const RBBIDataHeader *fHeader; + const RBBIStateTable *fForwardTable; + const RBBIStateTable *fReverseTable; + const char *fRuleSource; + const int32_t *fRuleStatusTable; + + /* number of int32_t values in the rule status table. Used to sanity check indexing */ + int32_t fStatusMaxIdx; + + UCPTrie *fTrie; + +private: + u_atomic_int32_t fRefCount; + UDataMemory *fUDataMem; + UnicodeString fRuleString; + UBool fDontFreeData; + + RBBIDataWrapper(const RBBIDataWrapper &other) = delete; /* forbid copying of this class */ + RBBIDataWrapper &operator=(const RBBIDataWrapper &other) = delete; /* forbid copying of this class */ +}; + + + +U_NAMESPACE_END + +U_CFUNC UBool rbbi_cleanup(void); + +#endif /* C++ */ + +#endif diff --git a/thirdparty/icu4c/common/rbbinode.cpp b/thirdparty/icu4c/common/rbbinode.cpp new file mode 100644 index 0000000000..69d84151fe --- /dev/null +++ b/thirdparty/icu4c/common/rbbinode.cpp @@ -0,0 +1,372 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +*************************************************************************** +* Copyright (C) 2002-2016 International Business Machines Corporation * +* and others. All rights reserved. * +*************************************************************************** +*/ + +// +// File: rbbinode.cpp +// +// Implementation of class RBBINode, which represents a node in the +// tree generated when parsing the Rules Based Break Iterator rules. +// +// This "Class" is actually closer to a struct. +// Code using it is expected to directly access fields much of the time. +// + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_BREAK_ITERATION + +#include "unicode/unistr.h" +#include "unicode/uniset.h" +#include "unicode/uchar.h" +#include "unicode/parsepos.h" + +#include "cstr.h" +#include "uvector.h" + +#include "rbbirb.h" +#include "rbbinode.h" + +#include "uassert.h" + + +U_NAMESPACE_BEGIN + +#ifdef RBBI_DEBUG +static int gLastSerial = 0; +#endif + + +//------------------------------------------------------------------------- +// +// Constructor. Just set the fields to reasonable default values. +// +//------------------------------------------------------------------------- +RBBINode::RBBINode(NodeType t) : UMemory() { +#ifdef RBBI_DEBUG + fSerialNum = ++gLastSerial; +#endif + fType = t; + fParent = NULL; + fLeftChild = NULL; + fRightChild = NULL; + fInputSet = NULL; + fFirstPos = 0; + fLastPos = 0; + fNullable = FALSE; + fLookAheadEnd = FALSE; + fRuleRoot = FALSE; + fChainIn = FALSE; + fVal = 0; + fPrecedence = precZero; + + UErrorCode status = U_ZERO_ERROR; + fFirstPosSet = new UVector(status); // TODO - get a real status from somewhere + fLastPosSet = new UVector(status); + fFollowPos = new UVector(status); + if (t==opCat) {fPrecedence = precOpCat;} + else if (t==opOr) {fPrecedence = precOpOr;} + else if (t==opStart) {fPrecedence = precStart;} + else if (t==opLParen) {fPrecedence = precLParen;} + +} + + +RBBINode::RBBINode(const RBBINode &other) : UMemory(other) { +#ifdef RBBI_DEBUG + fSerialNum = ++gLastSerial; +#endif + fType = other.fType; + fParent = NULL; + fLeftChild = NULL; + fRightChild = NULL; + fInputSet = other.fInputSet; + fPrecedence = other.fPrecedence; + fText = other.fText; + fFirstPos = other.fFirstPos; + fLastPos = other.fLastPos; + fNullable = other.fNullable; + fVal = other.fVal; + fRuleRoot = FALSE; + fChainIn = other.fChainIn; + UErrorCode status = U_ZERO_ERROR; + fFirstPosSet = new UVector(status); // TODO - get a real status from somewhere + fLastPosSet = new UVector(status); + fFollowPos = new UVector(status); +} + + +//------------------------------------------------------------------------- +// +// Destructor. Deletes both this node AND any child nodes, +// except in the case of variable reference nodes. For +// these, the l. child points back to the definition, which +// is common for all references to the variable, meaning +// it can't be deleted here. +// +//------------------------------------------------------------------------- +RBBINode::~RBBINode() { + // printf("deleting node %8x serial %4d\n", this, this->fSerialNum); + delete fInputSet; + fInputSet = NULL; + + switch (this->fType) { + case varRef: + case setRef: + // for these node types, multiple instances point to the same "children" + // Storage ownership of children handled elsewhere. Don't delete here. + break; + + default: + delete fLeftChild; + fLeftChild = NULL; + delete fRightChild; + fRightChild = NULL; + } + + + delete fFirstPosSet; + delete fLastPosSet; + delete fFollowPos; + +} + + +//------------------------------------------------------------------------- +// +// cloneTree Make a copy of the subtree rooted at this node. +// Discard any variable references encountered along the way, +// and replace with copies of the variable's definitions. +// Used to replicate the expression underneath variable +// references in preparation for generating the DFA tables. +// +//------------------------------------------------------------------------- +RBBINode *RBBINode::cloneTree() { + RBBINode *n; + + if (fType == RBBINode::varRef) { + // If the current node is a variable reference, skip over it + // and clone the definition of the variable instead. + n = fLeftChild->cloneTree(); + } else if (fType == RBBINode::uset) { + n = this; + } else { + n = new RBBINode(*this); + // Check for null pointer. + if (n != NULL) { + if (fLeftChild != NULL) { + n->fLeftChild = fLeftChild->cloneTree(); + n->fLeftChild->fParent = n; + } + if (fRightChild != NULL) { + n->fRightChild = fRightChild->cloneTree(); + n->fRightChild->fParent = n; + } + } + } + return n; +} + + + +//------------------------------------------------------------------------- +// +// flattenVariables Walk a parse tree, replacing any variable +// references with a copy of the variable's definition. +// Aside from variables, the tree is not changed. +// +// Return the root of the tree. If the root was not a variable +// reference, it remains unchanged - the root we started with +// is the root we return. If, however, the root was a variable +// reference, the root of the newly cloned replacement tree will +// be returned, and the original tree deleted. +// +// This function works by recursively walking the tree +// without doing anything until a variable reference is +// found, then calling cloneTree() at that point. Any +// nested references are handled by cloneTree(), not here. +// +//------------------------------------------------------------------------- +RBBINode *RBBINode::flattenVariables() { + if (fType == varRef) { + RBBINode *retNode = fLeftChild->cloneTree(); + if (retNode != NULL) { + retNode->fRuleRoot = this->fRuleRoot; + retNode->fChainIn = this->fChainIn; + } + delete this; // TODO: undefined behavior. Fix. + return retNode; + } + + if (fLeftChild != NULL) { + fLeftChild = fLeftChild->flattenVariables(); + fLeftChild->fParent = this; + } + if (fRightChild != NULL) { + fRightChild = fRightChild->flattenVariables(); + fRightChild->fParent = this; + } + return this; +} + + +//------------------------------------------------------------------------- +// +// flattenSets Walk the parse tree, replacing any nodes of type setRef +// with a copy of the expression tree for the set. A set's +// equivalent expression tree is precomputed and saved as +// the left child of the uset node. +// +//------------------------------------------------------------------------- +void RBBINode::flattenSets() { + U_ASSERT(fType != setRef); + + if (fLeftChild != NULL) { + if (fLeftChild->fType==setRef) { + RBBINode *setRefNode = fLeftChild; + RBBINode *usetNode = setRefNode->fLeftChild; + RBBINode *replTree = usetNode->fLeftChild; + fLeftChild = replTree->cloneTree(); + fLeftChild->fParent = this; + delete setRefNode; + } else { + fLeftChild->flattenSets(); + } + } + + if (fRightChild != NULL) { + if (fRightChild->fType==setRef) { + RBBINode *setRefNode = fRightChild; + RBBINode *usetNode = setRefNode->fLeftChild; + RBBINode *replTree = usetNode->fLeftChild; + fRightChild = replTree->cloneTree(); + fRightChild->fParent = this; + delete setRefNode; + } else { + fRightChild->flattenSets(); + } + } +} + + + +//------------------------------------------------------------------------- +// +// findNodes() Locate all the nodes of the specified type, starting +// at the specified root. +// +//------------------------------------------------------------------------- +void RBBINode::findNodes(UVector *dest, RBBINode::NodeType kind, UErrorCode &status) { + /* test for buffer overflows */ + if (U_FAILURE(status)) { + return; + } + if (fType == kind) { + dest->addElement(this, status); + } + if (fLeftChild != NULL) { + fLeftChild->findNodes(dest, kind, status); + } + if (fRightChild != NULL) { + fRightChild->findNodes(dest, kind, status); + } +} + + +//------------------------------------------------------------------------- +// +// print. Print out a single node, for debugging. +// +//------------------------------------------------------------------------- +#ifdef RBBI_DEBUG + +static int32_t serial(const RBBINode *node) { + return (node == NULL? -1 : node->fSerialNum); +} + + +void RBBINode::printNode(const RBBINode *node) { + static const char * const nodeTypeNames[] = { + "setRef", + "uset", + "varRef", + "leafChar", + "lookAhead", + "tag", + "endMark", + "opStart", + "opCat", + "opOr", + "opStar", + "opPlus", + "opQuestion", + "opBreak", + "opReverse", + "opLParen" + }; + + if (node==NULL) { + RBBIDebugPrintf("%10p", (void *)node); + } else { + RBBIDebugPrintf("%10p %5d %12s %c%c %5d %5d %5d %6d %d ", + (void *)node, node->fSerialNum, nodeTypeNames[node->fType], + node->fRuleRoot?'R':' ', node->fChainIn?'C':' ', + serial(node->fLeftChild), serial(node->fRightChild), serial(node->fParent), + node->fFirstPos, node->fVal); + if (node->fType == varRef) { + RBBI_DEBUG_printUnicodeString(node->fText); + } + } + RBBIDebugPrintf("\n"); +} +#endif + + +#ifdef RBBI_DEBUG +U_CFUNC void RBBI_DEBUG_printUnicodeString(const UnicodeString &s, int minWidth) { + RBBIDebugPrintf("%*s", minWidth, CStr(s)()); +} +#endif + + +//------------------------------------------------------------------------- +// +// print. Print out the tree of nodes rooted at "this" +// +//------------------------------------------------------------------------- +#ifdef RBBI_DEBUG +void RBBINode::printNodeHeader() { + RBBIDebugPrintf(" Address serial type LeftChild RightChild Parent position value\n"); +} + +void RBBINode::printTree(const RBBINode *node, UBool printHeading) { + if (printHeading) { + printNodeHeader(); + } + printNode(node); + if (node != NULL) { + // Only dump the definition under a variable reference if asked to. + // Unconditinally dump children of all other node types. + if (node->fType != varRef) { + if (node->fLeftChild != NULL) { + printTree(node->fLeftChild, FALSE); + } + + if (node->fRightChild != NULL) { + printTree(node->fRightChild, FALSE); + } + } + } +} +#endif + + + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ diff --git a/thirdparty/icu4c/common/rbbinode.h b/thirdparty/icu4c/common/rbbinode.h new file mode 100644 index 0000000000..cff3ba70c4 --- /dev/null +++ b/thirdparty/icu4c/common/rbbinode.h @@ -0,0 +1,127 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/******************************************************************** + * COPYRIGHT: + * Copyright (c) 2001-2016, International Business Machines Corporation and + * others. All Rights Reserved. + ********************************************************************/ + +#ifndef RBBINODE_H +#define RBBINODE_H + +#include "unicode/utypes.h" +#include "unicode/unistr.h" +#include "unicode/uobject.h" + +// +// class RBBINode +// +// Represents a node in the parse tree generated when reading +// a rule file. +// + +U_NAMESPACE_BEGIN + +class UnicodeSet; +class UVector; + +class RBBINode : public UMemory { + public: + enum NodeType { + setRef, + uset, + varRef, + leafChar, + lookAhead, + tag, + endMark, + opStart, + opCat, + opOr, + opStar, + opPlus, + opQuestion, + opBreak, + opReverse, + opLParen + }; + + enum OpPrecedence { + precZero, + precStart, + precLParen, + precOpOr, + precOpCat + }; + + NodeType fType; + RBBINode *fParent; + RBBINode *fLeftChild; + RBBINode *fRightChild; + UnicodeSet *fInputSet; // For uset nodes only. + OpPrecedence fPrecedence; // For binary ops only. + + UnicodeString fText; // Text corresponding to this node. + // May be lazily evaluated when (if) needed + // for some node types. + int fFirstPos; // Position in the rule source string of the + // first text associated with the node. + // If there's a left child, this will be the same + // as that child's left pos. + int fLastPos; // Last position in the rule source string + // of any text associated with this node. + // If there's a right child, this will be the same + // as that child's last postion. + + UBool fNullable; // See Aho. + int32_t fVal; // For leafChar nodes, the value. + // Values are the character category, + // corresponds to columns in the final + // state transition table. + + UBool fLookAheadEnd; // For endMark nodes, set true if + // marking the end of a look-ahead rule. + + UBool fRuleRoot; // True if this node is the root of a rule. + UBool fChainIn; // True if chaining into this rule is allowed + // (no '^' present). + + UVector *fFirstPosSet; + UVector *fLastPosSet; // TODO: rename fFirstPos & fLastPos to avoid confusion. + UVector *fFollowPos; + + + RBBINode(NodeType t); + RBBINode(const RBBINode &other); + ~RBBINode(); + + RBBINode *cloneTree(); + RBBINode *flattenVariables(); + void flattenSets(); + void findNodes(UVector *dest, RBBINode::NodeType kind, UErrorCode &status); + +#ifdef RBBI_DEBUG + static void printNodeHeader(); + static void printNode(const RBBINode *n); + static void printTree(const RBBINode *n, UBool withHeading); +#endif + + private: + RBBINode &operator = (const RBBINode &other); // No defs. + UBool operator == (const RBBINode &other); // Private, so these functions won't accidently be used. + +#ifdef RBBI_DEBUG + public: + int fSerialNum; // Debugging aids. +#endif +}; + +#ifdef RBBI_DEBUG +U_CFUNC void +RBBI_DEBUG_printUnicodeString(const UnicodeString &s, int minWidth=0); +#endif + +U_NAMESPACE_END + +#endif + diff --git a/thirdparty/icu4c/common/rbbirb.cpp b/thirdparty/icu4c/common/rbbirb.cpp new file mode 100644 index 0000000000..e5c250dfe4 --- /dev/null +++ b/thirdparty/icu4c/common/rbbirb.cpp @@ -0,0 +1,361 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +// +// file: rbbirb.cpp +// +// Copyright (C) 2002-2011, International Business Machines Corporation and others. +// All Rights Reserved. +// +// This file contains the RBBIRuleBuilder class implementation. This is the main class for +// building (compiling) break rules into the tables required by the runtime +// RBBI engine. +// + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_BREAK_ITERATION + +#include "unicode/brkiter.h" +#include "unicode/rbbi.h" +#include "unicode/ubrk.h" +#include "unicode/unistr.h" +#include "unicode/uniset.h" +#include "unicode/uchar.h" +#include "unicode/uchriter.h" +#include "unicode/ustring.h" +#include "unicode/parsepos.h" +#include "unicode/parseerr.h" + +#include "cmemory.h" +#include "cstring.h" +#include "rbbirb.h" +#include "rbbinode.h" +#include "rbbiscan.h" +#include "rbbisetb.h" +#include "rbbitblb.h" +#include "rbbidata.h" +#include "uassert.h" + + +U_NAMESPACE_BEGIN + + +//---------------------------------------------------------------------------------------- +// +// Constructor. +// +//---------------------------------------------------------------------------------------- +RBBIRuleBuilder::RBBIRuleBuilder(const UnicodeString &rules, + UParseError *parseErr, + UErrorCode &status) + : fRules(rules), fStrippedRules(rules) +{ + fStatus = &status; // status is checked below + fParseError = parseErr; + fDebugEnv = NULL; +#ifdef RBBI_DEBUG + fDebugEnv = getenv("U_RBBIDEBUG"); +#endif + + + fForwardTree = NULL; + fReverseTree = NULL; + fSafeFwdTree = NULL; + fSafeRevTree = NULL; + fDefaultTree = &fForwardTree; + fForwardTable = NULL; + fRuleStatusVals = NULL; + fChainRules = FALSE; + fLBCMNoChain = FALSE; + fLookAheadHardBreak = FALSE; + fUSetNodes = NULL; + fRuleStatusVals = NULL; + fScanner = NULL; + fSetBuilder = NULL; + if (parseErr) { + uprv_memset(parseErr, 0, sizeof(UParseError)); + } + + if (U_FAILURE(status)) { + return; + } + + fUSetNodes = new UVector(status); // bcos status gets overwritten here + fRuleStatusVals = new UVector(status); + fScanner = new RBBIRuleScanner(this); + fSetBuilder = new RBBISetBuilder(this); + if (U_FAILURE(status)) { + return; + } + if(fSetBuilder == 0 || fScanner == 0 || fUSetNodes == 0 || fRuleStatusVals == 0) { + status = U_MEMORY_ALLOCATION_ERROR; + } +} + + + +//---------------------------------------------------------------------------------------- +// +// Destructor +// +//---------------------------------------------------------------------------------------- +RBBIRuleBuilder::~RBBIRuleBuilder() { + + int i; + for (i=0; ; i++) { + RBBINode *n = (RBBINode *)fUSetNodes->elementAt(i); + if (n==NULL) { + break; + } + delete n; + } + + delete fUSetNodes; + delete fSetBuilder; + delete fForwardTable; + delete fForwardTree; + delete fReverseTree; + delete fSafeFwdTree; + delete fSafeRevTree; + delete fScanner; + delete fRuleStatusVals; +} + + + + + +//---------------------------------------------------------------------------------------- +// +// flattenData() - Collect up the compiled RBBI rule data and put it into +// the format for saving in ICU data files, +// which is also the format needed by the RBBI runtime engine. +// +//---------------------------------------------------------------------------------------- +static int32_t align8(int32_t i) {return (i+7) & 0xfffffff8;} + +RBBIDataHeader *RBBIRuleBuilder::flattenData() { + int32_t i; + + if (U_FAILURE(*fStatus)) { + return NULL; + } + + // Remove whitespace from the rules to make it smaller. + // The rule parser has already removed comments. + fStrippedRules = fScanner->stripRules(fStrippedRules); + + // Calculate the size of each section in the data. + // Sizes here are padded up to a multiple of 8 for better memory alignment. + // Sections sizes actually stored in the header are for the actual data + // without the padding. + // + int32_t headerSize = align8(sizeof(RBBIDataHeader)); + int32_t forwardTableSize = align8(fForwardTable->getTableSize()); + int32_t reverseTableSize = align8(fForwardTable->getSafeTableSize()); + int32_t trieSize = align8(fSetBuilder->getTrieSize()); + int32_t statusTableSize = align8(fRuleStatusVals->size() * sizeof(int32_t)); + + int32_t rulesLengthInUTF8 = 0; + u_strToUTF8WithSub(0, 0, &rulesLengthInUTF8, + fStrippedRules.getBuffer(), fStrippedRules.length(), + 0xfffd, nullptr, fStatus); + *fStatus = U_ZERO_ERROR; + + int32_t rulesSize = align8((rulesLengthInUTF8+1)); + + int32_t totalSize = headerSize + + forwardTableSize + + reverseTableSize + + statusTableSize + trieSize + rulesSize; + +#ifdef RBBI_DEBUG + if (fDebugEnv && uprv_strstr(fDebugEnv, "size")) { + RBBIDebugPrintf("Header Size: %8d\n", headerSize); + RBBIDebugPrintf("Forward Table Size: %8d\n", forwardTableSize); + RBBIDebugPrintf("Reverse Table Size: %8d\n", reverseTableSize); + RBBIDebugPrintf("Trie Size: %8d\n", trieSize); + RBBIDebugPrintf("Status Table Size: %8d\n", statusTableSize); + RBBIDebugPrintf("Rules Size: %8d\n", rulesSize); + RBBIDebugPrintf("-----------------------------\n"); + RBBIDebugPrintf("Total Size: %8d\n", totalSize); + } +#endif + + RBBIDataHeader *data = (RBBIDataHeader *)uprv_malloc(totalSize); + if (data == NULL) { + *fStatus = U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + uprv_memset(data, 0, totalSize); + + + data->fMagic = 0xb1a0; + data->fFormatVersion[0] = RBBI_DATA_FORMAT_VERSION[0]; + data->fFormatVersion[1] = RBBI_DATA_FORMAT_VERSION[1]; + data->fFormatVersion[2] = RBBI_DATA_FORMAT_VERSION[2]; + data->fFormatVersion[3] = RBBI_DATA_FORMAT_VERSION[3]; + data->fLength = totalSize; + data->fCatCount = fSetBuilder->getNumCharCategories(); + + data->fFTable = headerSize; + data->fFTableLen = forwardTableSize; + + data->fRTable = data->fFTable + data->fFTableLen; + data->fRTableLen = reverseTableSize; + + data->fTrie = data->fRTable + data->fRTableLen; + data->fTrieLen = trieSize; + data->fStatusTable = data->fTrie + data->fTrieLen; + data->fStatusTableLen= statusTableSize; + data->fRuleSource = data->fStatusTable + statusTableSize; + data->fRuleSourceLen = rulesLengthInUTF8; + + uprv_memset(data->fReserved, 0, sizeof(data->fReserved)); + + fForwardTable->exportTable((uint8_t *)data + data->fFTable); + fForwardTable->exportSafeTable((uint8_t *)data + data->fRTable); + fSetBuilder->serializeTrie ((uint8_t *)data + data->fTrie); + + int32_t *ruleStatusTable = (int32_t *)((uint8_t *)data + data->fStatusTable); + for (i=0; i<fRuleStatusVals->size(); i++) { + ruleStatusTable[i] = fRuleStatusVals->elementAti(i); + } + + u_strToUTF8WithSub((char *)data+data->fRuleSource, rulesSize, &rulesLengthInUTF8, + fStrippedRules.getBuffer(), fStrippedRules.length(), + 0xfffd, nullptr, fStatus); + if (U_FAILURE(*fStatus)) { + return NULL; + } + + return data; +} + + +//---------------------------------------------------------------------------------------- +// +// createRuleBasedBreakIterator construct from source rules that are passed in +// in a UnicodeString +// +//---------------------------------------------------------------------------------------- +BreakIterator * +RBBIRuleBuilder::createRuleBasedBreakIterator( const UnicodeString &rules, + UParseError *parseError, + UErrorCode &status) +{ + // + // Read the input rules, generate a parse tree, symbol table, + // and list of all Unicode Sets referenced by the rules. + // + RBBIRuleBuilder builder(rules, parseError, status); + if (U_FAILURE(status)) { // status checked here bcos build below doesn't + return NULL; + } + + RBBIDataHeader *data = builder.build(status); + + if (U_FAILURE(status)) { + return nullptr; + } + + // + // Create a break iterator from the compiled rules. + // (Identical to creation from stored pre-compiled rules) + // + // status is checked after init in construction. + RuleBasedBreakIterator *This = new RuleBasedBreakIterator(data, status); + if (U_FAILURE(status)) { + delete This; + This = NULL; + } + else if(This == NULL) { // test for NULL + status = U_MEMORY_ALLOCATION_ERROR; + } + return This; +} + +RBBIDataHeader *RBBIRuleBuilder::build(UErrorCode &status) { + if (U_FAILURE(status)) { + return nullptr; + } + + fScanner->parse(); + if (U_FAILURE(status)) { + return nullptr; + } + + // + // UnicodeSet processing. + // Munge the Unicode Sets to create an initial set of character categories. + // + fSetBuilder->buildRanges(); + + // + // Generate the DFA state transition table. + // + fForwardTable = new RBBITableBuilder(this, &fForwardTree, status); + if (fForwardTable == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } + + fForwardTable->buildForwardTable(); + + // State table and character category optimization. + // Merge equivalent rows and columns. + // Note that this process alters the initial set of character categories, + // causing the representation of UnicodeSets in the parse tree to become invalid. + + optimizeTables(); + fForwardTable->buildSafeReverseTable(status); + + +#ifdef RBBI_DEBUG + if (fDebugEnv && uprv_strstr(fDebugEnv, "states")) { + fForwardTable->printStates(); + fForwardTable->printRuleStatusTable(); + fForwardTable->printReverseTable(); + } +#endif + + // Generate the mapping tables (TRIE) from input code points to + // the character categories. + // + fSetBuilder->buildTrie(); + + // + // Package up the compiled data into a memory image + // in the run-time format. + // + RBBIDataHeader *data = flattenData(); // returns NULL if error + if (U_FAILURE(status)) { + return nullptr; + } + return data; +} + +void RBBIRuleBuilder::optimizeTables() { + bool didSomething; + do { + didSomething = false; + + // Begin looking for duplicates with char class 3. + // Classes 0, 1 and 2 are special; they are unused, {bof} and {eof} respectively, + // and should not have other categories merged into them. + IntPair duplPair = {3, 0}; + while (fForwardTable->findDuplCharClassFrom(&duplPair)) { + fSetBuilder->mergeCategories(duplPair); + fForwardTable->removeColumn(duplPair.second); + didSomething = true; + } + + while (fForwardTable->removeDuplicateStates() > 0) { + didSomething = true; + } + } while (didSomething); +} + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ diff --git a/thirdparty/icu4c/common/rbbirb.h b/thirdparty/icu4c/common/rbbirb.h new file mode 100644 index 0000000000..037c1dc2ce --- /dev/null +++ b/thirdparty/icu4c/common/rbbirb.h @@ -0,0 +1,237 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +// +// rbbirb.h +// +// Copyright (C) 2002-2008, International Business Machines Corporation and others. +// All Rights Reserved. +// +// This file contains declarations for several classes from the +// Rule Based Break Iterator rule builder. +// + + +#ifndef RBBIRB_H +#define RBBIRB_H + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_BREAK_ITERATION + +#include <utility> + +#include "unicode/uobject.h" +#include "unicode/rbbi.h" +#include "unicode/uniset.h" +#include "unicode/parseerr.h" +#include "uhash.h" +#include "uvector.h" +#include "unicode/symtable.h"// For UnicodeSet parsing, is the interface that + // looks up references to $variables within a set. + + +U_NAMESPACE_BEGIN + +class RBBIRuleScanner; +struct RBBIRuleTableEl; +class RBBISetBuilder; +class RBBINode; +class RBBITableBuilder; + + + +//-------------------------------------------------------------------------------- +// +// RBBISymbolTable. Implements SymbolTable interface that is used by the +// UnicodeSet parser to resolve references to $variables. +// +//-------------------------------------------------------------------------------- +class RBBISymbolTableEntry : public UMemory { // The symbol table hash table contains one +public: // of these structs for each entry. + RBBISymbolTableEntry(); + UnicodeString key; + RBBINode *val; + ~RBBISymbolTableEntry(); + +private: + RBBISymbolTableEntry(const RBBISymbolTableEntry &other); // forbid copying of this class + RBBISymbolTableEntry &operator=(const RBBISymbolTableEntry &other); // forbid copying of this class +}; + + +class RBBISymbolTable : public UMemory, public SymbolTable { +private: + const UnicodeString &fRules; + UHashtable *fHashTable; + RBBIRuleScanner *fRuleScanner; + + // These next two fields are part of the mechanism for passing references to + // already-constructed UnicodeSets back to the UnicodeSet constructor + // when the pattern includes $variable references. + const UnicodeString ffffString; // = "/uffff" + UnicodeSet *fCachedSetLookup; + +public: + // API inherited from class SymbolTable + virtual const UnicodeString* lookup(const UnicodeString& s) const; + virtual const UnicodeFunctor* lookupMatcher(UChar32 ch) const; + virtual UnicodeString parseReference(const UnicodeString& text, + ParsePosition& pos, int32_t limit) const; + + // Additional Functions + RBBISymbolTable(RBBIRuleScanner *, const UnicodeString &fRules, UErrorCode &status); + virtual ~RBBISymbolTable(); + + virtual RBBINode *lookupNode(const UnicodeString &key) const; + virtual void addEntry (const UnicodeString &key, RBBINode *val, UErrorCode &err); + +#ifdef RBBI_DEBUG + virtual void rbbiSymtablePrint() const; +#else + // A do-nothing inline function for non-debug builds. Member funcs can't be empty + // or the call sites won't compile. + int32_t fFakeField; + #define rbbiSymtablePrint() fFakeField=0; +#endif + +private: + RBBISymbolTable(const RBBISymbolTable &other); // forbid copying of this class + RBBISymbolTable &operator=(const RBBISymbolTable &other); // forbid copying of this class +}; + + +//-------------------------------------------------------------------------------- +// +// class RBBIRuleBuilder The top-level class handling RBBI rule compiling. +// +//-------------------------------------------------------------------------------- +class RBBIRuleBuilder : public UMemory { +public: + + // Create a rule based break iterator from a set of rules. + // This function is the main entry point into the rule builder. The + // public ICU API for creating RBBIs uses this function to do the actual work. + // + static BreakIterator * createRuleBasedBreakIterator( const UnicodeString &rules, + UParseError *parseError, + UErrorCode &status); + +public: + // The "public" functions and data members that appear below are accessed + // (and shared) by the various parts that make up the rule builder. They + // are NOT intended to be accessed by anything outside of the + // rule builder implementation. + RBBIRuleBuilder(const UnicodeString &rules, + UParseError *parseErr, + UErrorCode &status + ); + + virtual ~RBBIRuleBuilder(); + + /** + * Build the state tables and char class Trie from the source rules. + */ + RBBIDataHeader *build(UErrorCode &status); + + + /** + * Fold together redundant character classes (table columns) and + * redundant states (table rows). Done after initial table generation, + * before serializing the result. + */ + void optimizeTables(); + + char *fDebugEnv; // controls debug trace output + UErrorCode *fStatus; // Error reporting. Keeping status + UParseError *fParseError; // here avoids passing it everywhere. + const UnicodeString &fRules; // The rule string that we are compiling + UnicodeString fStrippedRules; // The rule string, with comments stripped. + + RBBIRuleScanner *fScanner; // The scanner. + RBBINode *fForwardTree; // The parse trees, generated by the scanner, + RBBINode *fReverseTree; // then manipulated by subsequent steps. + RBBINode *fSafeFwdTree; + RBBINode *fSafeRevTree; + + RBBINode **fDefaultTree; // For rules not qualified with a ! + // the tree to which they belong to. + + UBool fChainRules; // True for chained Unicode TR style rules. + // False for traditional regexp rules. + + UBool fLBCMNoChain; // True: suppress chaining of rules on + // chars with LineBreak property == CM. + + UBool fLookAheadHardBreak; // True: Look ahead matches cause an + // immediate break, no continuing for the + // longest match. + + RBBISetBuilder *fSetBuilder; // Set and Character Category builder. + UVector *fUSetNodes; // Vector of all uset nodes. + + RBBITableBuilder *fForwardTable; // State transition table, build time form. + + UVector *fRuleStatusVals; // The values that can be returned + // from getRuleStatus(). + + RBBIDataHeader *flattenData(); // Create the flattened (runtime format) + // data tables.. +private: + RBBIRuleBuilder(const RBBIRuleBuilder &other); // forbid copying of this class + RBBIRuleBuilder &operator=(const RBBIRuleBuilder &other); // forbid copying of this class +}; + + + + +//---------------------------------------------------------------------------- +// +// RBBISetTableEl is an entry in the hash table of UnicodeSets that have +// been encountered. The val Node will be of nodetype uset +// and contain pointers to the actual UnicodeSets. +// The Key is the source string for initializing the set. +// +// The hash table is used to avoid creating duplicate +// unnamed (not $var references) UnicodeSets. +// +// Memory Management: +// The Hash Table owns these RBBISetTableEl structs and +// the key strings. It does NOT own the val nodes. +// +//---------------------------------------------------------------------------- +struct RBBISetTableEl { + UnicodeString *key; + RBBINode *val; +}; + +/** + * A pair of ints, used to bundle pairs of states or pairs of character classes. + */ +typedef std::pair<int32_t, int32_t> IntPair; + + +//---------------------------------------------------------------------------- +// +// RBBIDebugPrintf Printf equivalent, for debugging output. +// Conditional compilation of the implementation lets us +// get rid of the stdio dependency in environments where it +// is unavailable. +// +//---------------------------------------------------------------------------- +#ifdef RBBI_DEBUG +#include <stdio.h> +#define RBBIDebugPrintf printf +#define RBBIDebugPuts puts +#else +#undef RBBIDebugPrintf +#define RBBIDebugPuts(arg) +#endif + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ + +#endif + + + diff --git a/thirdparty/icu4c/common/rbbirpt.h b/thirdparty/icu4c/common/rbbirpt.h new file mode 100644 index 0000000000..586953c90c --- /dev/null +++ b/thirdparty/icu4c/common/rbbirpt.h @@ -0,0 +1,296 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +//--------------------------------------------------------------------------------- +// +// Generated Header File. Do not edit by hand. +// This file contains the state table for the ICU Rule Based Break Iterator +// rule parser. +// It is generated by the Perl script "rbbicst.pl" from +// the rule parser state definitions file "rbbirpt.txt". +// +// Copyright (C) 2002-2016 International Business Machines Corporation +// and others. All rights reserved. +// +//--------------------------------------------------------------------------------- +#ifndef RBBIRPT_H +#define RBBIRPT_H + +#include "unicode/utypes.h" + +U_NAMESPACE_BEGIN +// +// Character classes for RBBI rule scanning. +// + static const uint8_t kRuleSet_digit_char = 128; + static const uint8_t kRuleSet_name_char = 129; + static const uint8_t kRuleSet_name_start_char = 130; + static const uint8_t kRuleSet_rule_char = 131; + static const uint8_t kRuleSet_white_space = 132; + + +enum RBBI_RuleParseAction { + doCheckVarDef, + doDotAny, + doEndAssign, + doEndOfRule, + doEndVariableName, + doExit, + doExprCatOperator, + doExprFinished, + doExprOrOperator, + doExprRParen, + doExprStart, + doLParen, + doNOP, + doNoChain, + doOptionEnd, + doOptionStart, + doReverseDir, + doRuleChar, + doRuleError, + doRuleErrorAssignExpr, + doScanUnicodeSet, + doSlash, + doStartAssign, + doStartTagValue, + doStartVariableName, + doTagDigit, + doTagExpectedError, + doTagValue, + doUnaryOpPlus, + doUnaryOpQuestion, + doUnaryOpStar, + doVariableNameExpectedErr, + rbbiLastAction}; + +//------------------------------------------------------------------------------- +// +// RBBIRuleTableEl represents the structure of a row in the transition table +// for the rule parser state machine. +//------------------------------------------------------------------------------- +struct RBBIRuleTableEl { + RBBI_RuleParseAction fAction; + uint8_t fCharClass; // 0-127: an individual ASCII character + // 128-255: character class index + uint8_t fNextState; // 0-250: normal next-stat numbers + // 255: pop next-state from stack. + uint8_t fPushState; + UBool fNextChar; +}; + +static const struct RBBIRuleTableEl gRuleParseStateTable[] = { + {doNOP, 0, 0, 0, TRUE} + , {doExprStart, 254, 29, 9, FALSE} // 1 start + , {doNOP, 132, 1,0, TRUE} // 2 + , {doNoChain, 94 /* ^ */, 12, 9, TRUE} // 3 + , {doExprStart, 36 /* $ */, 88, 98, FALSE} // 4 + , {doNOP, 33 /* ! */, 19,0, TRUE} // 5 + , {doNOP, 59 /* ; */, 1,0, TRUE} // 6 + , {doNOP, 252, 0,0, FALSE} // 7 + , {doExprStart, 255, 29, 9, FALSE} // 8 + , {doEndOfRule, 59 /* ; */, 1,0, TRUE} // 9 break-rule-end + , {doNOP, 132, 9,0, TRUE} // 10 + , {doRuleError, 255, 103,0, FALSE} // 11 + , {doExprStart, 254, 29,0, FALSE} // 12 start-after-caret + , {doNOP, 132, 12,0, TRUE} // 13 + , {doRuleError, 94 /* ^ */, 103,0, FALSE} // 14 + , {doExprStart, 36 /* $ */, 88, 37, FALSE} // 15 + , {doRuleError, 59 /* ; */, 103,0, FALSE} // 16 + , {doRuleError, 252, 103,0, FALSE} // 17 + , {doExprStart, 255, 29,0, FALSE} // 18 + , {doNOP, 33 /* ! */, 21,0, TRUE} // 19 rev-option + , {doReverseDir, 255, 28, 9, FALSE} // 20 + , {doOptionStart, 130, 23,0, TRUE} // 21 option-scan1 + , {doRuleError, 255, 103,0, FALSE} // 22 + , {doNOP, 129, 23,0, TRUE} // 23 option-scan2 + , {doOptionEnd, 255, 25,0, FALSE} // 24 + , {doNOP, 59 /* ; */, 1,0, TRUE} // 25 option-scan3 + , {doNOP, 132, 25,0, TRUE} // 26 + , {doRuleError, 255, 103,0, FALSE} // 27 + , {doExprStart, 255, 29, 9, FALSE} // 28 reverse-rule + , {doRuleChar, 254, 38,0, TRUE} // 29 term + , {doNOP, 132, 29,0, TRUE} // 30 + , {doRuleChar, 131, 38,0, TRUE} // 31 + , {doNOP, 91 /* [ */, 94, 38, FALSE} // 32 + , {doLParen, 40 /* ( */, 29, 38, TRUE} // 33 + , {doNOP, 36 /* $ */, 88, 37, FALSE} // 34 + , {doDotAny, 46 /* . */, 38,0, TRUE} // 35 + , {doRuleError, 255, 103,0, FALSE} // 36 + , {doCheckVarDef, 255, 38,0, FALSE} // 37 term-var-ref + , {doNOP, 132, 38,0, TRUE} // 38 expr-mod + , {doUnaryOpStar, 42 /* * */, 43,0, TRUE} // 39 + , {doUnaryOpPlus, 43 /* + */, 43,0, TRUE} // 40 + , {doUnaryOpQuestion, 63 /* ? */, 43,0, TRUE} // 41 + , {doNOP, 255, 43,0, FALSE} // 42 + , {doExprCatOperator, 254, 29,0, FALSE} // 43 expr-cont + , {doNOP, 132, 43,0, TRUE} // 44 + , {doExprCatOperator, 131, 29,0, FALSE} // 45 + , {doExprCatOperator, 91 /* [ */, 29,0, FALSE} // 46 + , {doExprCatOperator, 40 /* ( */, 29,0, FALSE} // 47 + , {doExprCatOperator, 36 /* $ */, 29,0, FALSE} // 48 + , {doExprCatOperator, 46 /* . */, 29,0, FALSE} // 49 + , {doExprCatOperator, 47 /* / */, 55,0, FALSE} // 50 + , {doExprCatOperator, 123 /* { */, 67,0, TRUE} // 51 + , {doExprOrOperator, 124 /* | */, 29,0, TRUE} // 52 + , {doExprRParen, 41 /* ) */, 255,0, TRUE} // 53 + , {doExprFinished, 255, 255,0, FALSE} // 54 + , {doSlash, 47 /* / */, 57,0, TRUE} // 55 look-ahead + , {doNOP, 255, 103,0, FALSE} // 56 + , {doExprCatOperator, 254, 29,0, FALSE} // 57 expr-cont-no-slash + , {doNOP, 132, 43,0, TRUE} // 58 + , {doExprCatOperator, 131, 29,0, FALSE} // 59 + , {doExprCatOperator, 91 /* [ */, 29,0, FALSE} // 60 + , {doExprCatOperator, 40 /* ( */, 29,0, FALSE} // 61 + , {doExprCatOperator, 36 /* $ */, 29,0, FALSE} // 62 + , {doExprCatOperator, 46 /* . */, 29,0, FALSE} // 63 + , {doExprOrOperator, 124 /* | */, 29,0, TRUE} // 64 + , {doExprRParen, 41 /* ) */, 255,0, TRUE} // 65 + , {doExprFinished, 255, 255,0, FALSE} // 66 + , {doNOP, 132, 67,0, TRUE} // 67 tag-open + , {doStartTagValue, 128, 70,0, FALSE} // 68 + , {doTagExpectedError, 255, 103,0, FALSE} // 69 + , {doNOP, 132, 74,0, TRUE} // 70 tag-value + , {doNOP, 125 /* } */, 74,0, FALSE} // 71 + , {doTagDigit, 128, 70,0, TRUE} // 72 + , {doTagExpectedError, 255, 103,0, FALSE} // 73 + , {doNOP, 132, 74,0, TRUE} // 74 tag-close + , {doTagValue, 125 /* } */, 77,0, TRUE} // 75 + , {doTagExpectedError, 255, 103,0, FALSE} // 76 + , {doExprCatOperator, 254, 29,0, FALSE} // 77 expr-cont-no-tag + , {doNOP, 132, 77,0, TRUE} // 78 + , {doExprCatOperator, 131, 29,0, FALSE} // 79 + , {doExprCatOperator, 91 /* [ */, 29,0, FALSE} // 80 + , {doExprCatOperator, 40 /* ( */, 29,0, FALSE} // 81 + , {doExprCatOperator, 36 /* $ */, 29,0, FALSE} // 82 + , {doExprCatOperator, 46 /* . */, 29,0, FALSE} // 83 + , {doExprCatOperator, 47 /* / */, 55,0, FALSE} // 84 + , {doExprOrOperator, 124 /* | */, 29,0, TRUE} // 85 + , {doExprRParen, 41 /* ) */, 255,0, TRUE} // 86 + , {doExprFinished, 255, 255,0, FALSE} // 87 + , {doStartVariableName, 36 /* $ */, 90,0, TRUE} // 88 scan-var-name + , {doNOP, 255, 103,0, FALSE} // 89 + , {doNOP, 130, 92,0, TRUE} // 90 scan-var-start + , {doVariableNameExpectedErr, 255, 103,0, FALSE} // 91 + , {doNOP, 129, 92,0, TRUE} // 92 scan-var-body + , {doEndVariableName, 255, 255,0, FALSE} // 93 + , {doScanUnicodeSet, 91 /* [ */, 255,0, TRUE} // 94 scan-unicode-set + , {doScanUnicodeSet, 112 /* p */, 255,0, TRUE} // 95 + , {doScanUnicodeSet, 80 /* P */, 255,0, TRUE} // 96 + , {doNOP, 255, 103,0, FALSE} // 97 + , {doNOP, 132, 98,0, TRUE} // 98 assign-or-rule + , {doStartAssign, 61 /* = */, 29, 101, TRUE} // 99 + , {doNOP, 255, 37, 9, FALSE} // 100 + , {doEndAssign, 59 /* ; */, 1,0, TRUE} // 101 assign-end + , {doRuleErrorAssignExpr, 255, 103,0, FALSE} // 102 + , {doExit, 255, 103,0, TRUE} // 103 errorDeath + }; +#ifdef RBBI_DEBUG +static const char * const RBBIRuleStateNames[] = { 0, + "start", + 0, + 0, + 0, + 0, + 0, + 0, + 0, + "break-rule-end", + 0, + 0, + "start-after-caret", + 0, + 0, + 0, + 0, + 0, + 0, + "rev-option", + 0, + "option-scan1", + 0, + "option-scan2", + 0, + "option-scan3", + 0, + 0, + "reverse-rule", + "term", + 0, + 0, + 0, + 0, + 0, + 0, + 0, + "term-var-ref", + "expr-mod", + 0, + 0, + 0, + 0, + "expr-cont", + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + "look-ahead", + 0, + "expr-cont-no-slash", + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + "tag-open", + 0, + 0, + "tag-value", + 0, + 0, + 0, + "tag-close", + 0, + 0, + "expr-cont-no-tag", + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + "scan-var-name", + 0, + "scan-var-start", + 0, + "scan-var-body", + 0, + "scan-unicode-set", + 0, + 0, + 0, + "assign-or-rule", + 0, + 0, + "assign-end", + 0, + "errorDeath", + 0}; +#endif + +U_NAMESPACE_END +#endif diff --git a/thirdparty/icu4c/common/rbbiscan.cpp b/thirdparty/icu4c/common/rbbiscan.cpp new file mode 100644 index 0000000000..9c406af671 --- /dev/null +++ b/thirdparty/icu4c/common/rbbiscan.cpp @@ -0,0 +1,1281 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +// +// file: rbbiscan.cpp +// +// Copyright (C) 2002-2016, International Business Machines Corporation and others. +// All Rights Reserved. +// +// This file contains the Rule Based Break Iterator Rule Builder functions for +// scanning the rules and assembling a parse tree. This is the first phase +// of compiling the rules. +// +// The overall of the rules is managed by class RBBIRuleBuilder, which will +// create and use an instance of this class as part of the process. +// + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_BREAK_ITERATION + +#include "unicode/unistr.h" +#include "unicode/uniset.h" +#include "unicode/uchar.h" +#include "unicode/uchriter.h" +#include "unicode/parsepos.h" +#include "unicode/parseerr.h" +#include "cmemory.h" +#include "cstring.h" + +#include "rbbirpt.h" // Contains state table for the rbbi rules parser. + // generated by a Perl script. +#include "rbbirb.h" +#include "rbbinode.h" +#include "rbbiscan.h" +#include "rbbitblb.h" + +#include "uassert.h" + +//------------------------------------------------------------------------------ +// +// Unicode Set init strings for each of the character classes needed for parsing a rule file. +// (Initialized with hex values for portability to EBCDIC based machines. +// Really ugly, but there's no good way to avoid it.) +// +// The sets are referred to by name in the rbbirpt.txt, which is the +// source form of the state transition table for the RBBI rule parser. +// +//------------------------------------------------------------------------------ +static const UChar gRuleSet_rule_char_pattern[] = { + // Characters that may appear as literals in patterns without escaping or quoting. + // [ ^ [ \ p { Z } \ u 0 0 2 0 + 0x5b, 0x5e, 0x5b, 0x5c, 0x70, 0x7b, 0x5a, 0x7d, 0x5c, 0x75, 0x30, 0x30, 0x32, 0x30, + // - \ u 0 0 7 f ] - [ \ p + 0x2d, 0x5c, 0x75, 0x30, 0x30, 0x37, 0x66, 0x5d, 0x2d, 0x5b, 0x5c, 0x70, + // { L } ] - [ \ p { N } ] ] + 0x7b, 0x4c, 0x7d, 0x5d, 0x2d, 0x5b, 0x5c, 0x70, 0x7b, 0x4e, 0x7d, 0x5d, 0x5d, 0}; + +static const UChar gRuleSet_name_char_pattern[] = { +// [ _ \ p { L } \ p { N } ] + 0x5b, 0x5f, 0x5c, 0x70, 0x7b, 0x4c, 0x7d, 0x5c, 0x70, 0x7b, 0x4e, 0x7d, 0x5d, 0}; + +static const UChar gRuleSet_digit_char_pattern[] = { +// [ 0 - 9 ] + 0x5b, 0x30, 0x2d, 0x39, 0x5d, 0}; + +static const UChar gRuleSet_name_start_char_pattern[] = { +// [ _ \ p { L } ] + 0x5b, 0x5f, 0x5c, 0x70, 0x7b, 0x4c, 0x7d, 0x5d, 0 }; + +static const UChar kAny[] = {0x61, 0x6e, 0x79, 0x00}; // "any" + + +U_CDECL_BEGIN +static void U_CALLCONV RBBISetTable_deleter(void *p) { + icu::RBBISetTableEl *px = (icu::RBBISetTableEl *)p; + delete px->key; + // Note: px->val is owned by the linked list "fSetsListHead" in scanner. + // Don't delete the value nodes here. + uprv_free(px); +} +U_CDECL_END + +U_NAMESPACE_BEGIN + +//------------------------------------------------------------------------------ +// +// Constructor. +// +//------------------------------------------------------------------------------ +RBBIRuleScanner::RBBIRuleScanner(RBBIRuleBuilder *rb) +{ + fRB = rb; + fScanIndex = 0; + fNextIndex = 0; + fQuoteMode = FALSE; + fLineNum = 1; + fCharNum = 0; + fLastChar = 0; + + fStateTable = NULL; + fStack[0] = 0; + fStackPtr = 0; + fNodeStack[0] = NULL; + fNodeStackPtr = 0; + + fReverseRule = FALSE; + fLookAheadRule = FALSE; + fNoChainInRule = FALSE; + + fSymbolTable = NULL; + fSetTable = NULL; + fRuleNum = 0; + fOptionStart = 0; + + // Do not check status until after all critical fields are sufficiently initialized + // that the destructor can run cleanly. + if (U_FAILURE(*rb->fStatus)) { + return; + } + + // + // Set up the constant Unicode Sets. + // Note: These could be made static, lazily initialized, and shared among + // all instances of RBBIRuleScanners. BUT this is quite a bit simpler, + // and the time to build these few sets should be small compared to a + // full break iterator build. + fRuleSets[kRuleSet_rule_char-128] + = UnicodeSet(UnicodeString(gRuleSet_rule_char_pattern), *rb->fStatus); + // fRuleSets[kRuleSet_white_space-128] = [:Pattern_White_Space:] + fRuleSets[kRuleSet_white_space-128]. + add(9, 0xd).add(0x20).add(0x85).add(0x200e, 0x200f).add(0x2028, 0x2029); + fRuleSets[kRuleSet_name_char-128] + = UnicodeSet(UnicodeString(gRuleSet_name_char_pattern), *rb->fStatus); + fRuleSets[kRuleSet_name_start_char-128] + = UnicodeSet(UnicodeString(gRuleSet_name_start_char_pattern), *rb->fStatus); + fRuleSets[kRuleSet_digit_char-128] + = UnicodeSet(UnicodeString(gRuleSet_digit_char_pattern), *rb->fStatus); + if (*rb->fStatus == U_ILLEGAL_ARGUMENT_ERROR) { + // This case happens if ICU's data is missing. UnicodeSet tries to look up property + // names from the init string, can't find them, and claims an illegal argument. + // Change the error so that the actual problem will be clearer to users. + *rb->fStatus = U_BRK_INIT_ERROR; + } + if (U_FAILURE(*rb->fStatus)) { + return; + } + + fSymbolTable = new RBBISymbolTable(this, rb->fRules, *rb->fStatus); + if (fSymbolTable == NULL) { + *rb->fStatus = U_MEMORY_ALLOCATION_ERROR; + return; + } + fSetTable = uhash_open(uhash_hashUnicodeString, uhash_compareUnicodeString, NULL, rb->fStatus); + if (U_FAILURE(*rb->fStatus)) { + return; + } + uhash_setValueDeleter(fSetTable, RBBISetTable_deleter); +} + + + +//------------------------------------------------------------------------------ +// +// Destructor +// +//------------------------------------------------------------------------------ +RBBIRuleScanner::~RBBIRuleScanner() { + delete fSymbolTable; + if (fSetTable != NULL) { + uhash_close(fSetTable); + fSetTable = NULL; + + } + + + // Node Stack. + // Normally has one entry, which is the entire parse tree for the rules. + // If errors occured, there may be additional subtrees left on the stack. + while (fNodeStackPtr > 0) { + delete fNodeStack[fNodeStackPtr]; + fNodeStackPtr--; + } + +} + +//------------------------------------------------------------------------------ +// +// doParseAction Do some action during rule parsing. +// Called by the parse state machine. +// Actions build the parse tree and Unicode Sets, +// and maintain the parse stack for nested expressions. +// +// TODO: unify EParseAction and RBBI_RuleParseAction enum types. +// They represent exactly the same thing. They're separate +// only to work around enum forward declaration restrictions +// in some compilers, while at the same time avoiding multiple +// definitions problems. I'm sure that there's a better way. +// +//------------------------------------------------------------------------------ +UBool RBBIRuleScanner::doParseActions(int32_t action) +{ + RBBINode *n = NULL; + + UBool returnVal = TRUE; + + switch (action) { + + case doExprStart: + pushNewNode(RBBINode::opStart); + fRuleNum++; + break; + + + case doNoChain: + // Scanned a '^' while on the rule start state. + fNoChainInRule = TRUE; + break; + + + case doExprOrOperator: + { + fixOpStack(RBBINode::precOpCat); + RBBINode *operandNode = fNodeStack[fNodeStackPtr--]; + RBBINode *orNode = pushNewNode(RBBINode::opOr); + if (U_FAILURE(*fRB->fStatus)) { + break; + } + orNode->fLeftChild = operandNode; + operandNode->fParent = orNode; + } + break; + + case doExprCatOperator: + // concatenation operator. + // For the implicit concatenation of adjacent terms in an expression that are + // not separated by any other operator. Action is invoked between the + // actions for the two terms. + { + fixOpStack(RBBINode::precOpCat); + RBBINode *operandNode = fNodeStack[fNodeStackPtr--]; + RBBINode *catNode = pushNewNode(RBBINode::opCat); + if (U_FAILURE(*fRB->fStatus)) { + break; + } + catNode->fLeftChild = operandNode; + operandNode->fParent = catNode; + } + break; + + case doLParen: + // Open Paren. + // The openParen node is a dummy operation type with a low precedence, + // which has the affect of ensuring that any real binary op that + // follows within the parens binds more tightly to the operands than + // stuff outside of the parens. + pushNewNode(RBBINode::opLParen); + break; + + case doExprRParen: + fixOpStack(RBBINode::precLParen); + break; + + case doNOP: + break; + + case doStartAssign: + // We've just scanned "$variable = " + // The top of the node stack has the $variable ref node. + + // Save the start position of the RHS text in the StartExpression node + // that precedes the $variableReference node on the stack. + // This will eventually be used when saving the full $variable replacement + // text as a string. + n = fNodeStack[fNodeStackPtr-1]; + n->fFirstPos = fNextIndex; // move past the '=' + + // Push a new start-of-expression node; needed to keep parse of the + // RHS expression happy. + pushNewNode(RBBINode::opStart); + break; + + + + + case doEndAssign: + { + // We have reached the end of an assignement statement. + // Current scan char is the ';' that terminates the assignment. + + // Terminate expression, leaves expression parse tree rooted in TOS node. + fixOpStack(RBBINode::precStart); + + RBBINode *startExprNode = fNodeStack[fNodeStackPtr-2]; + RBBINode *varRefNode = fNodeStack[fNodeStackPtr-1]; + RBBINode *RHSExprNode = fNodeStack[fNodeStackPtr]; + + // Save original text of right side of assignment, excluding the terminating ';' + // in the root of the node for the right-hand-side expression. + RHSExprNode->fFirstPos = startExprNode->fFirstPos; + RHSExprNode->fLastPos = fScanIndex; + fRB->fRules.extractBetween(RHSExprNode->fFirstPos, RHSExprNode->fLastPos, RHSExprNode->fText); + + // Expression parse tree becomes l. child of the $variable reference node. + varRefNode->fLeftChild = RHSExprNode; + RHSExprNode->fParent = varRefNode; + + // Make a symbol table entry for the $variableRef node. + fSymbolTable->addEntry(varRefNode->fText, varRefNode, *fRB->fStatus); + if (U_FAILURE(*fRB->fStatus)) { + // This is a round-about way to get the parse position set + // so that duplicate symbols error messages include a line number. + UErrorCode t = *fRB->fStatus; + *fRB->fStatus = U_ZERO_ERROR; + error(t); + } + + // Clean up the stack. + delete startExprNode; + fNodeStackPtr-=3; + break; + } + + case doEndOfRule: + { + fixOpStack(RBBINode::precStart); // Terminate expression, leaves expression + if (U_FAILURE(*fRB->fStatus)) { // parse tree rooted in TOS node. + break; + } +#ifdef RBBI_DEBUG + if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "rtree")) {printNodeStack("end of rule");} +#endif + U_ASSERT(fNodeStackPtr == 1); + RBBINode *thisRule = fNodeStack[fNodeStackPtr]; + + // If this rule includes a look-ahead '/', add a endMark node to the + // expression tree. + if (fLookAheadRule) { + RBBINode *endNode = pushNewNode(RBBINode::endMark); + RBBINode *catNode = pushNewNode(RBBINode::opCat); + if (U_FAILURE(*fRB->fStatus)) { + break; + } + fNodeStackPtr -= 2; + catNode->fLeftChild = thisRule; + catNode->fRightChild = endNode; + fNodeStack[fNodeStackPtr] = catNode; + endNode->fVal = fRuleNum; + endNode->fLookAheadEnd = TRUE; + thisRule = catNode; + + // TODO: Disable chaining out of look-ahead (hard break) rules. + // The break on rule match is forced, so there is no point in building up + // the state table to chain into another rule for a longer match. + } + + // Mark this node as being the root of a rule. + thisRule->fRuleRoot = TRUE; + + // Flag if chaining into this rule is wanted. + // + if (fRB->fChainRules && // If rule chaining is enabled globally via !!chain + !fNoChainInRule) { // and no '^' chain-in inhibit was on this rule + thisRule->fChainIn = TRUE; + } + + + // All rule expressions are ORed together. + // The ';' that terminates an expression really just functions as a '|' with + // a low operator prededence. + // + // Each of the four sets of rules are collected separately. + // (forward, reverse, safe_forward, safe_reverse) + // OR this rule into the appropriate group of them. + // + RBBINode **destRules = (fReverseRule? &fRB->fSafeRevTree : fRB->fDefaultTree); + + if (*destRules != NULL) { + // This is not the first rule encounted. + // OR previous stuff (from *destRules) + // with the current rule expression (on the Node Stack) + // with the resulting OR expression going to *destRules + // + thisRule = fNodeStack[fNodeStackPtr]; + RBBINode *prevRules = *destRules; + RBBINode *orNode = pushNewNode(RBBINode::opOr); + if (U_FAILURE(*fRB->fStatus)) { + break; + } + orNode->fLeftChild = prevRules; + prevRules->fParent = orNode; + orNode->fRightChild = thisRule; + thisRule->fParent = orNode; + *destRules = orNode; + } + else + { + // This is the first rule encountered (for this direction). + // Just move its parse tree from the stack to *destRules. + *destRules = fNodeStack[fNodeStackPtr]; + } + fReverseRule = FALSE; // in preparation for the next rule. + fLookAheadRule = FALSE; + fNoChainInRule = FALSE; + fNodeStackPtr = 0; + } + break; + + + case doRuleError: + error(U_BRK_RULE_SYNTAX); + returnVal = FALSE; + break; + + + case doVariableNameExpectedErr: + error(U_BRK_RULE_SYNTAX); + break; + + + // + // Unary operands + ? * + // These all appear after the operand to which they apply. + // When we hit one, the operand (may be a whole sub expression) + // will be on the top of the stack. + // Unary Operator becomes TOS, with the old TOS as its one child. + case doUnaryOpPlus: + { + RBBINode *operandNode = fNodeStack[fNodeStackPtr--]; + RBBINode *plusNode = pushNewNode(RBBINode::opPlus); + if (U_FAILURE(*fRB->fStatus)) { + break; + } + plusNode->fLeftChild = operandNode; + operandNode->fParent = plusNode; + } + break; + + case doUnaryOpQuestion: + { + RBBINode *operandNode = fNodeStack[fNodeStackPtr--]; + RBBINode *qNode = pushNewNode(RBBINode::opQuestion); + if (U_FAILURE(*fRB->fStatus)) { + break; + } + qNode->fLeftChild = operandNode; + operandNode->fParent = qNode; + } + break; + + case doUnaryOpStar: + { + RBBINode *operandNode = fNodeStack[fNodeStackPtr--]; + RBBINode *starNode = pushNewNode(RBBINode::opStar); + if (U_FAILURE(*fRB->fStatus)) { + break; + } + starNode->fLeftChild = operandNode; + operandNode->fParent = starNode; + } + break; + + case doRuleChar: + // A "Rule Character" is any single character that is a literal part + // of the regular expression. Like a, b and c in the expression "(abc*) | [:L:]" + // These are pretty uncommon in break rules; the terms are more commonly + // sets. To keep things uniform, treat these characters like as + // sets that just happen to contain only one character. + { + n = pushNewNode(RBBINode::setRef); + if (U_FAILURE(*fRB->fStatus)) { + break; + } + findSetFor(UnicodeString(fC.fChar), n); + n->fFirstPos = fScanIndex; + n->fLastPos = fNextIndex; + fRB->fRules.extractBetween(n->fFirstPos, n->fLastPos, n->fText); + break; + } + + case doDotAny: + // scanned a ".", meaning match any single character. + { + n = pushNewNode(RBBINode::setRef); + if (U_FAILURE(*fRB->fStatus)) { + break; + } + findSetFor(UnicodeString(TRUE, kAny, 3), n); + n->fFirstPos = fScanIndex; + n->fLastPos = fNextIndex; + fRB->fRules.extractBetween(n->fFirstPos, n->fLastPos, n->fText); + break; + } + + case doSlash: + // Scanned a '/', which identifies a look-ahead break position in a rule. + n = pushNewNode(RBBINode::lookAhead); + if (U_FAILURE(*fRB->fStatus)) { + break; + } + n->fVal = fRuleNum; + n->fFirstPos = fScanIndex; + n->fLastPos = fNextIndex; + fRB->fRules.extractBetween(n->fFirstPos, n->fLastPos, n->fText); + fLookAheadRule = TRUE; + break; + + + case doStartTagValue: + // Scanned a '{', the opening delimiter for a tag value within a rule. + n = pushNewNode(RBBINode::tag); + if (U_FAILURE(*fRB->fStatus)) { + break; + } + n->fVal = 0; + n->fFirstPos = fScanIndex; + n->fLastPos = fNextIndex; + break; + + case doTagDigit: + // Just scanned a decimal digit that's part of a tag value + { + n = fNodeStack[fNodeStackPtr]; + uint32_t v = u_charDigitValue(fC.fChar); + U_ASSERT(v < 10); + n->fVal = n->fVal*10 + v; + break; + } + + case doTagValue: + n = fNodeStack[fNodeStackPtr]; + n->fLastPos = fNextIndex; + fRB->fRules.extractBetween(n->fFirstPos, n->fLastPos, n->fText); + break; + + case doTagExpectedError: + error(U_BRK_MALFORMED_RULE_TAG); + returnVal = FALSE; + break; + + case doOptionStart: + // Scanning a !!option. At the start of string. + fOptionStart = fScanIndex; + break; + + case doOptionEnd: + { + UnicodeString opt(fRB->fRules, fOptionStart, fScanIndex-fOptionStart); + if (opt == UNICODE_STRING("chain", 5)) { + fRB->fChainRules = TRUE; + } else if (opt == UNICODE_STRING("LBCMNoChain", 11)) { + fRB->fLBCMNoChain = TRUE; + } else if (opt == UNICODE_STRING("forward", 7)) { + fRB->fDefaultTree = &fRB->fForwardTree; + } else if (opt == UNICODE_STRING("reverse", 7)) { + fRB->fDefaultTree = &fRB->fReverseTree; + } else if (opt == UNICODE_STRING("safe_forward", 12)) { + fRB->fDefaultTree = &fRB->fSafeFwdTree; + } else if (opt == UNICODE_STRING("safe_reverse", 12)) { + fRB->fDefaultTree = &fRB->fSafeRevTree; + } else if (opt == UNICODE_STRING("lookAheadHardBreak", 18)) { + fRB->fLookAheadHardBreak = TRUE; + } else if (opt == UNICODE_STRING("quoted_literals_only", 20)) { + fRuleSets[kRuleSet_rule_char-128].clear(); + } else if (opt == UNICODE_STRING("unquoted_literals", 17)) { + fRuleSets[kRuleSet_rule_char-128].applyPattern(UnicodeString(gRuleSet_rule_char_pattern), *fRB->fStatus); + } else { + error(U_BRK_UNRECOGNIZED_OPTION); + } + } + break; + + case doReverseDir: + fReverseRule = TRUE; + break; + + case doStartVariableName: + n = pushNewNode(RBBINode::varRef); + if (U_FAILURE(*fRB->fStatus)) { + break; + } + n->fFirstPos = fScanIndex; + break; + + case doEndVariableName: + n = fNodeStack[fNodeStackPtr]; + if (n==NULL || n->fType != RBBINode::varRef) { + error(U_BRK_INTERNAL_ERROR); + break; + } + n->fLastPos = fScanIndex; + fRB->fRules.extractBetween(n->fFirstPos+1, n->fLastPos, n->fText); + // Look the newly scanned name up in the symbol table + // If there's an entry, set the l. child of the var ref to the replacement expression. + // (We also pass through here when scanning assignments, but no harm is done, other + // than a slight wasted effort that seems hard to avoid. Lookup will be null) + n->fLeftChild = fSymbolTable->lookupNode(n->fText); + break; + + case doCheckVarDef: + n = fNodeStack[fNodeStackPtr]; + if (n->fLeftChild == NULL) { + error(U_BRK_UNDEFINED_VARIABLE); + returnVal = FALSE; + } + break; + + case doExprFinished: + break; + + case doRuleErrorAssignExpr: + error(U_BRK_ASSIGN_ERROR); + returnVal = FALSE; + break; + + case doExit: + returnVal = FALSE; + break; + + case doScanUnicodeSet: + scanSet(); + break; + + default: + error(U_BRK_INTERNAL_ERROR); + returnVal = FALSE; + break; + } + return returnVal && U_SUCCESS(*fRB->fStatus); +} + + + + +//------------------------------------------------------------------------------ +// +// Error Report a rule parse error. +// Only report it if no previous error has been recorded. +// +//------------------------------------------------------------------------------ +void RBBIRuleScanner::error(UErrorCode e) { + if (U_SUCCESS(*fRB->fStatus)) { + *fRB->fStatus = e; + if (fRB->fParseError) { + fRB->fParseError->line = fLineNum; + fRB->fParseError->offset = fCharNum; + fRB->fParseError->preContext[0] = 0; + fRB->fParseError->postContext[0] = 0; + } + } +} + + + + +//------------------------------------------------------------------------------ +// +// fixOpStack The parse stack holds partially assembled chunks of the parse tree. +// An entry on the stack may be as small as a single setRef node, +// or as large as the parse tree +// for an entire expression (this will be the one item left on the stack +// when the parsing of an RBBI rule completes. +// +// This function is called when a binary operator is encountered. +// It looks back up the stack for operators that are not yet associated +// with a right operand, and if the precedence of the stacked operator >= +// the precedence of the current operator, binds the operand left, +// to the previously encountered operator. +// +//------------------------------------------------------------------------------ +void RBBIRuleScanner::fixOpStack(RBBINode::OpPrecedence p) { + RBBINode *n; + // printNodeStack("entering fixOpStack()"); + for (;;) { + n = fNodeStack[fNodeStackPtr-1]; // an operator node + if (n->fPrecedence == 0) { + RBBIDebugPuts("RBBIRuleScanner::fixOpStack, bad operator node"); + error(U_BRK_INTERNAL_ERROR); + return; + } + + if (n->fPrecedence < p || n->fPrecedence <= RBBINode::precLParen) { + // The most recent operand goes with the current operator, + // not with the previously stacked one. + break; + } + // Stack operator is a binary op ( '|' or concatenation) + // TOS operand becomes right child of this operator. + // Resulting subexpression becomes the TOS operand. + n->fRightChild = fNodeStack[fNodeStackPtr]; + fNodeStack[fNodeStackPtr]->fParent = n; + fNodeStackPtr--; + // printNodeStack("looping in fixOpStack() "); + } + + if (p <= RBBINode::precLParen) { + // Scan is at a right paren or end of expression. + // The scanned item must match the stack, or else there was an error. + // Discard the left paren (or start expr) node from the stack, + // leaving the completed (sub)expression as TOS. + if (n->fPrecedence != p) { + // Right paren encountered matched start of expression node, or + // end of expression matched with a left paren node. + error(U_BRK_MISMATCHED_PAREN); + } + fNodeStack[fNodeStackPtr-1] = fNodeStack[fNodeStackPtr]; + fNodeStackPtr--; + // Delete the now-discarded LParen or Start node. + delete n; + } + // printNodeStack("leaving fixOpStack()"); +} + + + + +//------------------------------------------------------------------------------ +// +// findSetFor given a UnicodeString, +// - find the corresponding Unicode Set (uset node) +// (create one if necessary) +// - Set fLeftChild of the caller's node (should be a setRef node) +// to the uset node +// Maintain a hash table of uset nodes, so the same one is always used +// for the same string. +// If a "to adopt" set is provided and we haven't seen this key before, +// add the provided set to the hash table. +// If the string is one (32 bit) char in length, the set contains +// just one element which is the char in question. +// If the string is "any", return a set containing all chars. +// +//------------------------------------------------------------------------------ +void RBBIRuleScanner::findSetFor(const UnicodeString &s, RBBINode *node, UnicodeSet *setToAdopt) { + + RBBISetTableEl *el; + + // First check whether we've already cached a set for this string. + // If so, just use the cached set in the new node. + // delete any set provided by the caller, since we own it. + el = (RBBISetTableEl *)uhash_get(fSetTable, &s); + if (el != NULL) { + delete setToAdopt; + node->fLeftChild = el->val; + U_ASSERT(node->fLeftChild->fType == RBBINode::uset); + return; + } + + // Haven't seen this set before. + // If the caller didn't provide us with a prebuilt set, + // create a new UnicodeSet now. + if (setToAdopt == NULL) { + if (s.compare(kAny, -1) == 0) { + setToAdopt = new UnicodeSet(0x000000, 0x10ffff); + } else { + UChar32 c; + c = s.char32At(0); + setToAdopt = new UnicodeSet(c, c); + } + } + + // + // Make a new uset node to refer to this UnicodeSet + // This new uset node becomes the child of the caller's setReference node. + // + RBBINode *usetNode = new RBBINode(RBBINode::uset); + if (usetNode == NULL) { + error(U_MEMORY_ALLOCATION_ERROR); + return; + } + usetNode->fInputSet = setToAdopt; + usetNode->fParent = node; + node->fLeftChild = usetNode; + usetNode->fText = s; + + + // + // Add the new uset node to the list of all uset nodes. + // + fRB->fUSetNodes->addElement(usetNode, *fRB->fStatus); + + + // + // Add the new set to the set hash table. + // + el = (RBBISetTableEl *)uprv_malloc(sizeof(RBBISetTableEl)); + UnicodeString *tkey = new UnicodeString(s); + if (tkey == NULL || el == NULL || setToAdopt == NULL) { + // Delete to avoid memory leak + delete tkey; + tkey = NULL; + uprv_free(el); + el = NULL; + delete setToAdopt; + setToAdopt = NULL; + + error(U_MEMORY_ALLOCATION_ERROR); + return; + } + el->key = tkey; + el->val = usetNode; + uhash_put(fSetTable, el->key, el, fRB->fStatus); + + return; +} + + + +// +// Assorted Unicode character constants. +// Numeric because there is no portable way to enter them as literals. +// (Think EBCDIC). +// +static const UChar chCR = 0x0d; // New lines, for terminating comments. +static const UChar chLF = 0x0a; +static const UChar chNEL = 0x85; // NEL newline variant +static const UChar chLS = 0x2028; // Unicode Line Separator +static const UChar chApos = 0x27; // single quote, for quoted chars. +static const UChar chPound = 0x23; // '#', introduces a comment. +static const UChar chBackSlash = 0x5c; // '\' introduces a char escape +static const UChar chLParen = 0x28; +static const UChar chRParen = 0x29; + + +//------------------------------------------------------------------------------ +// +// stripRules Return a rules string without extra spaces. +// (Comments are removed separately, during rule parsing.) +// +//------------------------------------------------------------------------------ +UnicodeString RBBIRuleScanner::stripRules(const UnicodeString &rules) { + UnicodeString strippedRules; + int32_t rulesLength = rules.length(); + + for (int32_t idx=0; idx<rulesLength; idx = rules.moveIndex32(idx, 1)) { + UChar32 cp = rules.char32At(idx); + bool whiteSpace = u_hasBinaryProperty(cp, UCHAR_PATTERN_WHITE_SPACE); + if (whiteSpace) { + continue; + } + strippedRules.append(cp); + } + return strippedRules; +} + + +//------------------------------------------------------------------------------ +// +// nextCharLL Low Level Next Char from rule input source. +// Get a char from the input character iterator, +// keep track of input position for error reporting. +// +//------------------------------------------------------------------------------ +UChar32 RBBIRuleScanner::nextCharLL() { + UChar32 ch; + + if (fNextIndex >= fRB->fRules.length()) { + return (UChar32)-1; + } + ch = fRB->fRules.char32At(fNextIndex); + fNextIndex = fRB->fRules.moveIndex32(fNextIndex, 1); + + if (ch == chCR || + ch == chNEL || + ch == chLS || + (ch == chLF && fLastChar != chCR)) { + // Character is starting a new line. Bump up the line number, and + // reset the column to 0. + fLineNum++; + fCharNum=0; + if (fQuoteMode) { + error(U_BRK_NEW_LINE_IN_QUOTED_STRING); + fQuoteMode = FALSE; + } + } + else { + // Character is not starting a new line. Except in the case of a + // LF following a CR, increment the column position. + if (ch != chLF) { + fCharNum++; + } + } + fLastChar = ch; + return ch; +} + + +//------------------------------------------------------------------------------ +// +// nextChar for rules scanning. At this level, we handle stripping +// out comments and processing backslash character escapes. +// The rest of the rules grammar is handled at the next level up. +// +//------------------------------------------------------------------------------ +void RBBIRuleScanner::nextChar(RBBIRuleChar &c) { + + // Unicode Character constants needed for the processing done by nextChar(), + // in hex because literals wont work on EBCDIC machines. + + fScanIndex = fNextIndex; + c.fChar = nextCharLL(); + c.fEscaped = FALSE; + + // + // check for '' sequence. + // These are recognized in all contexts, whether in quoted text or not. + // + if (c.fChar == chApos) { + if (fRB->fRules.char32At(fNextIndex) == chApos) { + c.fChar = nextCharLL(); // get nextChar officially so character counts + c.fEscaped = TRUE; // stay correct. + } + else + { + // Single quote, by itself. + // Toggle quoting mode. + // Return either '(' or ')', because quotes cause a grouping of the quoted text. + fQuoteMode = !fQuoteMode; + if (fQuoteMode == TRUE) { + c.fChar = chLParen; + } else { + c.fChar = chRParen; + } + c.fEscaped = FALSE; // The paren that we return is not escaped. + return; + } + } + + if (fQuoteMode) { + c.fEscaped = TRUE; + } + else + { + // We are not in a 'quoted region' of the source. + // + if (c.fChar == chPound) { + // Start of a comment. Consume the rest of it. + // The new-line char that terminates the comment is always returned. + // It will be treated as white-space, and serves to break up anything + // that might otherwise incorrectly clump together with a comment in + // the middle (a variable name, for example.) + int32_t commentStart = fScanIndex; + for (;;) { + c.fChar = nextCharLL(); + if (c.fChar == (UChar32)-1 || // EOF + c.fChar == chCR || + c.fChar == chLF || + c.fChar == chNEL || + c.fChar == chLS) {break;} + } + for (int32_t i=commentStart; i<fNextIndex-1; ++i) { + fRB->fStrippedRules.setCharAt(i, u' '); + } + } + if (c.fChar == (UChar32)-1) { + return; + } + + // + // check for backslash escaped characters. + // Use UnicodeString::unescapeAt() to handle them. + // + if (c.fChar == chBackSlash) { + c.fEscaped = TRUE; + int32_t startX = fNextIndex; + c.fChar = fRB->fRules.unescapeAt(fNextIndex); + if (fNextIndex == startX) { + error(U_BRK_HEX_DIGITS_EXPECTED); + } + fCharNum += fNextIndex-startX; + } + } + // putc(c.fChar, stdout); +} + +//------------------------------------------------------------------------------ +// +// Parse RBBI rules. The state machine for rules parsing is here. +// The state tables are hand-written in the file rbbirpt.txt, +// and converted to the form used here by a perl +// script rbbicst.pl +// +//------------------------------------------------------------------------------ +void RBBIRuleScanner::parse() { + uint16_t state; + const RBBIRuleTableEl *tableEl; + + if (U_FAILURE(*fRB->fStatus)) { + return; + } + + state = 1; + nextChar(fC); + // + // Main loop for the rule parsing state machine. + // Runs once per state transition. + // Each time through optionally performs, depending on the state table, + // - an advance to the the next input char + // - an action to be performed. + // - pushing or popping a state to/from the local state return stack. + // + for (;;) { + // Bail out if anything has gone wrong. + // RBBI rule file parsing stops on the first error encountered. + if (U_FAILURE(*fRB->fStatus)) { + break; + } + + // Quit if state == 0. This is the normal way to exit the state machine. + // + if (state == 0) { + break; + } + + // Find the state table element that matches the input char from the rule, or the + // class of the input character. Start with the first table row for this + // state, then linearly scan forward until we find a row that matches the + // character. The last row for each state always matches all characters, so + // the search will stop there, if not before. + // + tableEl = &gRuleParseStateTable[state]; + #ifdef RBBI_DEBUG + if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "scan")) { + RBBIDebugPrintf("char, line, col = (\'%c\', %d, %d) state=%s ", + fC.fChar, fLineNum, fCharNum, RBBIRuleStateNames[state]); + } + #endif + + for (;;) { + #ifdef RBBI_DEBUG + if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "scan")) { RBBIDebugPrintf("."); fflush(stdout);} + #endif + if (tableEl->fCharClass < 127 && fC.fEscaped == FALSE && tableEl->fCharClass == fC.fChar) { + // Table row specified an individual character, not a set, and + // the input character is not escaped, and + // the input character matched it. + break; + } + if (tableEl->fCharClass == 255) { + // Table row specified default, match anything character class. + break; + } + if (tableEl->fCharClass == 254 && fC.fEscaped) { + // Table row specified "escaped" and the char was escaped. + break; + } + if (tableEl->fCharClass == 253 && fC.fEscaped && + (fC.fChar == 0x50 || fC.fChar == 0x70 )) { + // Table row specified "escaped P" and the char is either 'p' or 'P'. + break; + } + if (tableEl->fCharClass == 252 && fC.fChar == (UChar32)-1) { + // Table row specified eof and we hit eof on the input. + break; + } + + if (tableEl->fCharClass >= 128 && tableEl->fCharClass < 240 && // Table specs a char class && + fC.fEscaped == FALSE && // char is not escaped && + fC.fChar != (UChar32)-1) { // char is not EOF + U_ASSERT((tableEl->fCharClass-128) < UPRV_LENGTHOF(fRuleSets)); + if (fRuleSets[tableEl->fCharClass-128].contains(fC.fChar)) { + // Table row specified a character class, or set of characters, + // and the current char matches it. + break; + } + } + + // No match on this row, advance to the next row for this state, + tableEl++; + } + if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "scan")) { RBBIDebugPuts("");} + + // + // We've found the row of the state table that matches the current input + // character from the rules string. + // Perform any action specified by this row in the state table. + if (doParseActions((int32_t)tableEl->fAction) == FALSE) { + // Break out of the state machine loop if the + // the action signalled some kind of error, or + // the action was to exit, occurs on normal end-of-rules-input. + break; + } + + if (tableEl->fPushState != 0) { + fStackPtr++; + if (fStackPtr >= kStackSize) { + error(U_BRK_INTERNAL_ERROR); + RBBIDebugPuts("RBBIRuleScanner::parse() - state stack overflow."); + fStackPtr--; + } + fStack[fStackPtr] = tableEl->fPushState; + } + + if (tableEl->fNextChar) { + nextChar(fC); + } + + // Get the next state from the table entry, or from the + // state stack if the next state was specified as "pop". + if (tableEl->fNextState != 255) { + state = tableEl->fNextState; + } else { + state = fStack[fStackPtr]; + fStackPtr--; + if (fStackPtr < 0) { + error(U_BRK_INTERNAL_ERROR); + RBBIDebugPuts("RBBIRuleScanner::parse() - state stack underflow."); + fStackPtr++; + } + } + + } + + if (U_FAILURE(*fRB->fStatus)) { + return; + } + + // If there are no forward rules set an error. + // + if (fRB->fForwardTree == NULL) { + error(U_BRK_RULE_SYNTAX); + return; + } + + // + // Parsing of the input RBBI rules is complete. + // We now have a parse tree for the rule expressions + // and a list of all UnicodeSets that are referenced. + // +#ifdef RBBI_DEBUG + if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "symbols")) {fSymbolTable->rbbiSymtablePrint();} + if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "ptree")) { + RBBIDebugPrintf("Completed Forward Rules Parse Tree...\n"); + RBBINode::printTree(fRB->fForwardTree, TRUE); + RBBIDebugPrintf("\nCompleted Reverse Rules Parse Tree...\n"); + RBBINode::printTree(fRB->fReverseTree, TRUE); + RBBIDebugPrintf("\nCompleted Safe Point Forward Rules Parse Tree...\n"); + RBBINode::printTree(fRB->fSafeFwdTree, TRUE); + RBBIDebugPrintf("\nCompleted Safe Point Reverse Rules Parse Tree...\n"); + RBBINode::printTree(fRB->fSafeRevTree, TRUE); + } +#endif +} + + +//------------------------------------------------------------------------------ +// +// printNodeStack for debugging... +// +//------------------------------------------------------------------------------ +#ifdef RBBI_DEBUG +void RBBIRuleScanner::printNodeStack(const char *title) { + int i; + RBBIDebugPrintf("%s. Dumping node stack...\n", title); + for (i=fNodeStackPtr; i>0; i--) {RBBINode::printTree(fNodeStack[i], TRUE);} +} +#endif + + + + +//------------------------------------------------------------------------------ +// +// pushNewNode create a new RBBINode of the specified type and push it +// onto the stack of nodes. +// +//------------------------------------------------------------------------------ +RBBINode *RBBIRuleScanner::pushNewNode(RBBINode::NodeType t) { + if (U_FAILURE(*fRB->fStatus)) { + return NULL; + } + if (fNodeStackPtr >= kStackSize - 1) { + error(U_BRK_RULE_SYNTAX); + RBBIDebugPuts("RBBIRuleScanner::pushNewNode - stack overflow."); + return NULL; + } + fNodeStackPtr++; + fNodeStack[fNodeStackPtr] = new RBBINode(t); + if (fNodeStack[fNodeStackPtr] == NULL) { + *fRB->fStatus = U_MEMORY_ALLOCATION_ERROR; + } + return fNodeStack[fNodeStackPtr]; +} + + + +//------------------------------------------------------------------------------ +// +// scanSet Construct a UnicodeSet from the text at the current scan +// position. Advance the scan position to the first character +// after the set. +// +// A new RBBI setref node referring to the set is pushed onto the node +// stack. +// +// The scan position is normally under the control of the state machine +// that controls rule parsing. UnicodeSets, however, are parsed by +// the UnicodeSet constructor, not by the RBBI rule parser. +// +//------------------------------------------------------------------------------ +void RBBIRuleScanner::scanSet() { + UnicodeSet *uset; + ParsePosition pos; + int startPos; + int i; + + if (U_FAILURE(*fRB->fStatus)) { + return; + } + + pos.setIndex(fScanIndex); + startPos = fScanIndex; + UErrorCode localStatus = U_ZERO_ERROR; + uset = new UnicodeSet(); + if (uset == NULL) { + localStatus = U_MEMORY_ALLOCATION_ERROR; + } else { + uset->applyPatternIgnoreSpace(fRB->fRules, pos, fSymbolTable, localStatus); + } + if (U_FAILURE(localStatus)) { + // TODO: Get more accurate position of the error from UnicodeSet's return info. + // UnicodeSet appears to not be reporting correctly at this time. + #ifdef RBBI_DEBUG + RBBIDebugPrintf("UnicodeSet parse postion.ErrorIndex = %d\n", pos.getIndex()); + #endif + error(localStatus); + delete uset; + return; + } + + // Verify that the set contains at least one code point. + // + U_ASSERT(uset!=NULL); + if (uset->isEmpty()) { + // This set is empty. + // Make it an error, because it almost certainly is not what the user wanted. + // Also, avoids having to think about corner cases in the tree manipulation code + // that occurs later on. + error(U_BRK_RULE_EMPTY_SET); + delete uset; + return; + } + + + // Advance the RBBI parse postion over the UnicodeSet pattern. + // Don't just set fScanIndex because the line/char positions maintained + // for error reporting would be thrown off. + i = pos.getIndex(); + for (;;) { + if (fNextIndex >= i) { + break; + } + nextCharLL(); + } + + if (U_SUCCESS(*fRB->fStatus)) { + RBBINode *n; + + n = pushNewNode(RBBINode::setRef); + if (U_FAILURE(*fRB->fStatus)) { + return; + } + n->fFirstPos = startPos; + n->fLastPos = fNextIndex; + fRB->fRules.extractBetween(n->fFirstPos, n->fLastPos, n->fText); + // findSetFor() serves several purposes here: + // - Adopts storage for the UnicodeSet, will be responsible for deleting. + // - Mantains collection of all sets in use, needed later for establishing + // character categories for run time engine. + // - Eliminates mulitiple instances of the same set. + // - Creates a new uset node if necessary (if this isn't a duplicate.) + findSetFor(n->fText, n, uset); + } + +} + +int32_t RBBIRuleScanner::numRules() { + return fRuleNum; +} + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ diff --git a/thirdparty/icu4c/common/rbbiscan.h b/thirdparty/icu4c/common/rbbiscan.h new file mode 100644 index 0000000000..58022002c5 --- /dev/null +++ b/thirdparty/icu4c/common/rbbiscan.h @@ -0,0 +1,167 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +// +// rbbiscan.h +// +// Copyright (C) 2002-2016, International Business Machines Corporation and others. +// All Rights Reserved. +// +// This file contains declarations for class RBBIRuleScanner +// + + +#ifndef RBBISCAN_H +#define RBBISCAN_H + +#include "unicode/utypes.h" +#include "unicode/uobject.h" +#include "unicode/rbbi.h" +#include "unicode/uniset.h" +#include "unicode/parseerr.h" +#include "uhash.h" +#include "uvector.h" +#include "unicode/symtable.h"// For UnicodeSet parsing, is the interface that + // looks up references to $variables within a set. +#include "rbbinode.h" +#include "rbbirpt.h" + +U_NAMESPACE_BEGIN + +class RBBIRuleBuilder; +class RBBISymbolTable; + + +//-------------------------------------------------------------------------------- +// +// class RBBIRuleScanner does the lowest level, character-at-a-time +// scanning of break iterator rules. +// +// The output of the scanner is parse trees for +// the rule expressions and a list of all Unicode Sets +// encountered. +// +//-------------------------------------------------------------------------------- + +class RBBIRuleScanner : public UMemory { +public: + + enum { + kStackSize = 100 // The size of the state stack for + }; // rules parsing. Corresponds roughly + // to the depth of parentheses nesting + // that is allowed in the rules. + + struct RBBIRuleChar { + UChar32 fChar; + UBool fEscaped; + RBBIRuleChar() : fChar(0), fEscaped(false) {} + }; + + RBBIRuleScanner(RBBIRuleBuilder *rb); + + + virtual ~RBBIRuleScanner(); + + void nextChar(RBBIRuleChar &c); // Get the next char from the input stream. + // Return false if at end. + + UBool push(const RBBIRuleChar &c); // Push (unget) one character. + // Only a single character may be pushed. + + void parse(); // Parse the rules, generating two parse + // trees, one each for the forward and + // reverse rules, + // and a list of UnicodeSets encountered. + + int32_t numRules(); // Return the number of rules that have been seen. + + /** + * Return a rules string without unnecessary + * characters. + */ + static UnicodeString stripRules(const UnicodeString &rules); +private: + + UBool doParseActions(int32_t a); + void error(UErrorCode e); // error reporting convenience function. + void fixOpStack(RBBINode::OpPrecedence p); + // a character. + void findSetFor(const UnicodeString &s, RBBINode *node, UnicodeSet *setToAdopt = NULL); + + UChar32 nextCharLL(); +#ifdef RBBI_DEBUG + void printNodeStack(const char *title); +#endif + RBBINode *pushNewNode(RBBINode::NodeType t); + void scanSet(); + + + RBBIRuleBuilder *fRB; // The rule builder that we are part of. + + int32_t fScanIndex; // Index of current character being processed + // in the rule input string. + int32_t fNextIndex; // Index of the next character, which + // is the first character not yet scanned. + UBool fQuoteMode; // Scan is in a 'quoted region' + int32_t fLineNum; // Line number in input file. + int32_t fCharNum; // Char position within the line. + UChar32 fLastChar; // Previous char, needed to count CR-LF + // as a single line, not two. + + RBBIRuleChar fC; // Current char for parse state machine + // processing. + UnicodeString fVarName; // $variableName, valid when we've just + // scanned one. + + RBBIRuleTableEl **fStateTable; // State Transition Table for RBBI Rule + // parsing. index by p[state][char-class] + + uint16_t fStack[kStackSize]; // State stack, holds state pushes + int32_t fStackPtr; // and pops as specified in the state + // transition rules. + + RBBINode *fNodeStack[kStackSize]; // Node stack, holds nodes created + // during the parse of a rule + int32_t fNodeStackPtr; + + + UBool fReverseRule; // True if the rule currently being scanned + // is a reverse direction rule (if it + // starts with a '!') + + UBool fLookAheadRule; // True if the rule includes a '/' + // somewhere within it. + + UBool fNoChainInRule; // True if the current rule starts with a '^'. + + RBBISymbolTable *fSymbolTable; // symbol table, holds definitions of + // $variable symbols. + + UHashtable *fSetTable; // UnicocodeSet hash table, holds indexes to + // the sets created while parsing rules. + // The key is the string used for creating + // the set. + + UnicodeSet fRuleSets[10]; // Unicode Sets that are needed during + // the scanning of RBBI rules. The + // indicies for these are assigned by the + // perl script that builds the state tables. + // See rbbirpt.h. + + int32_t fRuleNum; // Counts each rule as it is scanned. + + int32_t fOptionStart; // Input index of start of a !!option + // keyword, while being scanned. + + UnicodeSet *gRuleSet_rule_char; + UnicodeSet *gRuleSet_white_space; + UnicodeSet *gRuleSet_name_char; + UnicodeSet *gRuleSet_name_start_char; + + RBBIRuleScanner(const RBBIRuleScanner &other); // forbid copying of this class + RBBIRuleScanner &operator=(const RBBIRuleScanner &other); // forbid copying of this class +}; + +U_NAMESPACE_END + +#endif diff --git a/thirdparty/icu4c/common/rbbisetb.cpp b/thirdparty/icu4c/common/rbbisetb.cpp new file mode 100644 index 0000000000..29faeb8c45 --- /dev/null +++ b/thirdparty/icu4c/common/rbbisetb.cpp @@ -0,0 +1,694 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +// +// rbbisetb.cpp +// +/* +*************************************************************************** +* Copyright (C) 2002-2008 International Business Machines Corporation * +* and others. All rights reserved. * +*************************************************************************** +*/ +// +// RBBISetBuilder Handles processing of Unicode Sets from RBBI rules +// (part of the rule building process.) +// +// Starting with the rules parse tree from the scanner, +// +// - Enumerate the set of UnicodeSets that are referenced +// by the RBBI rules. +// - compute a set of non-overlapping character ranges +// with all characters within a range belonging to the same +// set of input unicode sets. +// - Derive a set of non-overlapping UnicodeSet (like things) +// that will correspond to columns in the state table for +// the RBBI execution engine. All characters within one +// of these sets belong to the same set of the original +// UnicodeSets from the user's rules. +// - construct the trie table that maps input characters +// to the index of the matching non-overlapping set of set from +// the previous step. +// + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_BREAK_ITERATION + +#include "unicode/uniset.h" +#include "uvector.h" +#include "uassert.h" +#include "cmemory.h" +#include "cstring.h" + +#include "rbbisetb.h" +#include "rbbinode.h" + +U_NAMESPACE_BEGIN + +const int32_t kMaxCharCategoriesFor8BitsTrie = 255; +//------------------------------------------------------------------------ +// +// Constructor +// +//------------------------------------------------------------------------ +RBBISetBuilder::RBBISetBuilder(RBBIRuleBuilder *rb) +{ + fRB = rb; + fStatus = rb->fStatus; + fRangeList = nullptr; + fMutableTrie = nullptr; + fTrie = nullptr; + fTrieSize = 0; + fGroupCount = 0; + fSawBOF = false; +} + + +//------------------------------------------------------------------------ +// +// Destructor +// +//------------------------------------------------------------------------ +RBBISetBuilder::~RBBISetBuilder() +{ + RangeDescriptor *nextRangeDesc; + + // Walk through & delete the linked list of RangeDescriptors + for (nextRangeDesc = fRangeList; nextRangeDesc!=NULL;) { + RangeDescriptor *r = nextRangeDesc; + nextRangeDesc = r->fNext; + delete r; + } + + ucptrie_close(fTrie); + umutablecptrie_close(fMutableTrie); +} + + + + +//------------------------------------------------------------------------ +// +// build Build the list of non-overlapping character ranges +// from the Unicode Sets. +// +//------------------------------------------------------------------------ +void RBBISetBuilder::buildRanges() { + RBBINode *usetNode; + RangeDescriptor *rlRange; + + if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "usets")) {printSets();} + + // + // Initialize the process by creating a single range encompassing all characters + // that is in no sets. + // + fRangeList = new RangeDescriptor(*fStatus); // will check for status here + if (fRangeList == NULL) { + *fStatus = U_MEMORY_ALLOCATION_ERROR; + return; + } + fRangeList->fStartChar = 0; + fRangeList->fEndChar = 0x10ffff; + + if (U_FAILURE(*fStatus)) { + return; + } + + // + // Find the set of non-overlapping ranges of characters + // + int ni; + for (ni=0; ; ni++) { // Loop over each of the UnicodeSets encountered in the input rules + usetNode = (RBBINode *)this->fRB->fUSetNodes->elementAt(ni); + if (usetNode==NULL) { + break; + } + + UnicodeSet *inputSet = usetNode->fInputSet; + int32_t inputSetRangeCount = inputSet->getRangeCount(); + int inputSetRangeIndex = 0; + rlRange = fRangeList; + + for (;;) { + if (inputSetRangeIndex >= inputSetRangeCount) { + break; + } + UChar32 inputSetRangeBegin = inputSet->getRangeStart(inputSetRangeIndex); + UChar32 inputSetRangeEnd = inputSet->getRangeEnd(inputSetRangeIndex); + + // skip over ranges from the range list that are completely + // below the current range from the input unicode set. + while (rlRange->fEndChar < inputSetRangeBegin) { + rlRange = rlRange->fNext; + } + + // If the start of the range from the range list is before with + // the start of the range from the unicode set, split the range list range + // in two, with one part being before (wholly outside of) the unicode set + // and the other containing the rest. + // Then continue the loop; the post-split current range will then be skipped + // over + if (rlRange->fStartChar < inputSetRangeBegin) { + rlRange->split(inputSetRangeBegin, *fStatus); + if (U_FAILURE(*fStatus)) { + return; + } + continue; + } + + // Same thing at the end of the ranges... + // If the end of the range from the range list doesn't coincide with + // the end of the range from the unicode set, split the range list + // range in two. The first part of the split range will be + // wholly inside the Unicode set. + if (rlRange->fEndChar > inputSetRangeEnd) { + rlRange->split(inputSetRangeEnd+1, *fStatus); + if (U_FAILURE(*fStatus)) { + return; + } + } + + // The current rlRange is now entirely within the UnicodeSet range. + // Add this unicode set to the list of sets for this rlRange + if (rlRange->fIncludesSets->indexOf(usetNode) == -1) { + rlRange->fIncludesSets->addElement(usetNode, *fStatus); + if (U_FAILURE(*fStatus)) { + return; + } + } + + // Advance over ranges that we are finished with. + if (inputSetRangeEnd == rlRange->fEndChar) { + inputSetRangeIndex++; + } + rlRange = rlRange->fNext; + } + } + + if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "range")) { printRanges();} + + // + // Group the above ranges, with each group consisting of one or more + // ranges that are in exactly the same set of original UnicodeSets. + // The groups are numbered, and these group numbers are the set of + // input symbols recognized by the run-time state machine. + // + // Numbering: # 0 (state table column 0) is unused. + // # 1 is reserved - table column 1 is for end-of-input + // # 2 is reserved - table column 2 is for beginning-of-input + // # 3 is the first range list. + // + RangeDescriptor *rlSearchRange; + int32_t dictGroupCount = 0; + + for (rlRange = fRangeList; rlRange!=nullptr; rlRange=rlRange->fNext) { + for (rlSearchRange=fRangeList; rlSearchRange != rlRange; rlSearchRange=rlSearchRange->fNext) { + if (rlRange->fIncludesSets->equals(*rlSearchRange->fIncludesSets)) { + rlRange->fNum = rlSearchRange->fNum; + rlRange->fIncludesDict = rlSearchRange->fIncludesDict; + break; + } + } + if (rlRange->fNum == 0) { + rlRange->fFirstInGroup = true; + if (rlRange->isDictionaryRange()) { + rlRange->fNum = ++dictGroupCount; + rlRange->fIncludesDict = true; + } else { + fGroupCount++; + rlRange->fNum = fGroupCount+2; + addValToSets(rlRange->fIncludesSets, rlRange->fNum); + } + } + } + + // Move the character category numbers for any dictionary ranges up, so that they + // immediately follow the non-dictionary ranges. + + fDictCategoriesStart = fGroupCount + 3; + for (rlRange = fRangeList; rlRange!=nullptr; rlRange=rlRange->fNext) { + if (rlRange->fIncludesDict) { + rlRange->fNum += fDictCategoriesStart - 1; + if (rlRange->fFirstInGroup) { + addValToSets(rlRange->fIncludesSets, rlRange->fNum); + } + } + } + fGroupCount += dictGroupCount; + + + // Handle input sets that contain the special string {eof}. + // Column 1 of the state table is reserved for EOF on input. + // Column 2 is reserved for before-the-start-input. + // (This column can be optimized away later if there are no rule + // references to {bof}.) + // Add this column value (1 or 2) to the equivalent expression + // subtree for each UnicodeSet that contains the string {eof} + // Because {bof} and {eof} are not characters in the normal sense, + // they don't affect the computation of the ranges or TRIE. + + UnicodeString eofString(u"eof"); + UnicodeString bofString(u"bof"); + for (ni=0; ; ni++) { // Loop over each of the UnicodeSets encountered in the input rules + usetNode = (RBBINode *)this->fRB->fUSetNodes->elementAt(ni); + if (usetNode==NULL) { + break; + } + UnicodeSet *inputSet = usetNode->fInputSet; + if (inputSet->contains(eofString)) { + addValToSet(usetNode, 1); + } + if (inputSet->contains(bofString)) { + addValToSet(usetNode, 2); + fSawBOF = TRUE; + } + } + + + if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "rgroup")) {printRangeGroups();} + if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "esets")) {printSets();} +} + + +// +// Build the Trie table for mapping UChar32 values to the corresponding +// range group number. +// +void RBBISetBuilder::buildTrie() { + fMutableTrie = umutablecptrie_open( + 0, // Initial value for all code points. + 0, // Error value for out-of-range input. + fStatus); + + for (RangeDescriptor *range = fRangeList; range!=nullptr && U_SUCCESS(*fStatus); range=range->fNext) { + umutablecptrie_setRange(fMutableTrie, + range->fStartChar, // Range start + range->fEndChar, // Range end (inclusive) + range->fNum, // value for range + fStatus); + } +} + + +void RBBISetBuilder::mergeCategories(IntPair categories) { + U_ASSERT(categories.first >= 1); + U_ASSERT(categories.second > categories.first); + U_ASSERT((categories.first < fDictCategoriesStart && categories.second < fDictCategoriesStart) || + (categories.first >= fDictCategoriesStart && categories.second >= fDictCategoriesStart)); + + for (RangeDescriptor *rd = fRangeList; rd != nullptr; rd = rd->fNext) { + int32_t rangeNum = rd->fNum; + if (rangeNum == categories.second) { + rd->fNum = categories.first; + } else if (rangeNum > categories.second) { + rd->fNum--; + } + } + --fGroupCount; + if (categories.second <= fDictCategoriesStart) { + --fDictCategoriesStart; + } +} + + +//----------------------------------------------------------------------------------- +// +// getTrieSize() Return the size that will be required to serialize the Trie. +// +//----------------------------------------------------------------------------------- +int32_t RBBISetBuilder::getTrieSize() { + if (U_FAILURE(*fStatus)) { + return 0; + } + if (fTrie == nullptr) { + bool use8Bits = getNumCharCategories() <= kMaxCharCategoriesFor8BitsTrie; + fTrie = umutablecptrie_buildImmutable( + fMutableTrie, + UCPTRIE_TYPE_FAST, + use8Bits ? UCPTRIE_VALUE_BITS_8 : UCPTRIE_VALUE_BITS_16, + fStatus); + fTrieSize = ucptrie_toBinary(fTrie, nullptr, 0, fStatus); + if (*fStatus == U_BUFFER_OVERFLOW_ERROR) { + *fStatus = U_ZERO_ERROR; + } + } + return fTrieSize; +} + + +//----------------------------------------------------------------------------------- +// +// serializeTrie() Put the serialized trie at the specified address. +// Trust the caller to have given us enough memory. +// getTrieSize() MUST be called first. +// +//----------------------------------------------------------------------------------- +void RBBISetBuilder::serializeTrie(uint8_t *where) { + ucptrie_toBinary(fTrie, + where, // Buffer + fTrieSize, // Capacity + fStatus); +} + +//------------------------------------------------------------------------ +// +// addValToSets Add a runtime-mapped input value to each uset from a +// list of uset nodes. (val corresponds to a state table column.) +// For each of the original Unicode sets - which correspond +// directly to uset nodes - a logically equivalent expression +// is constructed in terms of the remapped runtime input +// symbol set. This function adds one runtime input symbol to +// a list of sets. +// +// The "logically equivalent expression" is the tree for an +// or-ing together of all of the symbols that go into the set. +// +//------------------------------------------------------------------------ +void RBBISetBuilder::addValToSets(UVector *sets, uint32_t val) { + int32_t ix; + + for (ix=0; ix<sets->size(); ix++) { + RBBINode *usetNode = (RBBINode *)sets->elementAt(ix); + addValToSet(usetNode, val); + } +} + +void RBBISetBuilder::addValToSet(RBBINode *usetNode, uint32_t val) { + RBBINode *leafNode = new RBBINode(RBBINode::leafChar); + if (leafNode == NULL) { + *fStatus = U_MEMORY_ALLOCATION_ERROR; + return; + } + leafNode->fVal = (unsigned short)val; + if (usetNode->fLeftChild == NULL) { + usetNode->fLeftChild = leafNode; + leafNode->fParent = usetNode; + } else { + // There are already input symbols present for this set. + // Set up an OR node, with the previous stuff as the left child + // and the new value as the right child. + RBBINode *orNode = new RBBINode(RBBINode::opOr); + if (orNode == NULL) { + *fStatus = U_MEMORY_ALLOCATION_ERROR; + return; + } + orNode->fLeftChild = usetNode->fLeftChild; + orNode->fRightChild = leafNode; + orNode->fLeftChild->fParent = orNode; + orNode->fRightChild->fParent = orNode; + usetNode->fLeftChild = orNode; + orNode->fParent = usetNode; + } +} + + +//------------------------------------------------------------------------ +// +// getNumCharCategories +// +//------------------------------------------------------------------------ +int32_t RBBISetBuilder::getNumCharCategories() const { + return fGroupCount + 3; +} + + +//------------------------------------------------------------------------ +// +// getDictCategoriesStart +// +//------------------------------------------------------------------------ +int32_t RBBISetBuilder::getDictCategoriesStart() const { + return fDictCategoriesStart; +} + + +//------------------------------------------------------------------------ +// +// sawBOF +// +//------------------------------------------------------------------------ +UBool RBBISetBuilder::sawBOF() const { + return fSawBOF; +} + + +//------------------------------------------------------------------------ +// +// getFirstChar Given a runtime RBBI character category, find +// the first UChar32 that is in the set of chars +// in the category. +//------------------------------------------------------------------------ +UChar32 RBBISetBuilder::getFirstChar(int32_t category) const { + RangeDescriptor *rlRange; + UChar32 retVal = (UChar32)-1; + for (rlRange = fRangeList; rlRange!=nullptr; rlRange=rlRange->fNext) { + if (rlRange->fNum == category) { + retVal = rlRange->fStartChar; + break; + } + } + return retVal; +} + + +//------------------------------------------------------------------------ +// +// printRanges A debugging function. +// dump out all of the range definitions. +// +//------------------------------------------------------------------------ +#ifdef RBBI_DEBUG +void RBBISetBuilder::printRanges() { + RangeDescriptor *rlRange; + int i; + + RBBIDebugPrintf("\n\n Nonoverlapping Ranges ...\n"); + for (rlRange = fRangeList; rlRange!=nullptr; rlRange=rlRange->fNext) { + RBBIDebugPrintf("%4x-%4x ", rlRange->fStartChar, rlRange->fEndChar); + + for (i=0; i<rlRange->fIncludesSets->size(); i++) { + RBBINode *usetNode = (RBBINode *)rlRange->fIncludesSets->elementAt(i); + UnicodeString setName {u"anon"}; + RBBINode *setRef = usetNode->fParent; + if (setRef != nullptr) { + RBBINode *varRef = setRef->fParent; + if (varRef != nullptr && varRef->fType == RBBINode::varRef) { + setName = varRef->fText; + } + } + RBBI_DEBUG_printUnicodeString(setName); RBBIDebugPrintf(" "); + } + RBBIDebugPrintf("\n"); + } +} +#endif + + +//------------------------------------------------------------------------ +// +// printRangeGroups A debugging function. +// dump out all of the range groups. +// +//------------------------------------------------------------------------ +#ifdef RBBI_DEBUG +void RBBISetBuilder::printRangeGroups() { + int i; + + RBBIDebugPrintf("\nRanges grouped by Unicode Set Membership...\n"); + for (RangeDescriptor *rlRange = fRangeList; rlRange!=nullptr; rlRange=rlRange->fNext) { + if (rlRange->fFirstInGroup) { + int groupNum = rlRange->fNum; + RBBIDebugPrintf("%2i ", groupNum); + + if (groupNum >= fDictCategoriesStart) { RBBIDebugPrintf(" <DICT> ");} + + for (i=0; i<rlRange->fIncludesSets->size(); i++) { + RBBINode *usetNode = (RBBINode *)rlRange->fIncludesSets->elementAt(i); + UnicodeString setName = UNICODE_STRING("anon", 4); + RBBINode *setRef = usetNode->fParent; + if (setRef != NULL) { + RBBINode *varRef = setRef->fParent; + if (varRef != NULL && varRef->fType == RBBINode::varRef) { + setName = varRef->fText; + } + } + RBBI_DEBUG_printUnicodeString(setName); RBBIDebugPrintf(" "); + } + + i = 0; + for (RangeDescriptor *tRange = rlRange; tRange != nullptr; tRange = tRange->fNext) { + if (tRange->fNum == rlRange->fNum) { + if (i++ % 5 == 0) { + RBBIDebugPrintf("\n "); + } + RBBIDebugPrintf(" %05x-%05x", tRange->fStartChar, tRange->fEndChar); + } + } + RBBIDebugPrintf("\n"); + } + } + RBBIDebugPrintf("\n"); +} +#endif + + +//------------------------------------------------------------------------ +// +// printSets A debugging function. +// dump out all of the set definitions. +// +//------------------------------------------------------------------------ +#ifdef RBBI_DEBUG +void RBBISetBuilder::printSets() { + int i; + + RBBIDebugPrintf("\n\nUnicode Sets List\n------------------\n"); + for (i=0; ; i++) { + RBBINode *usetNode; + RBBINode *setRef; + RBBINode *varRef; + UnicodeString setName; + + usetNode = (RBBINode *)fRB->fUSetNodes->elementAt(i); + if (usetNode == NULL) { + break; + } + + RBBIDebugPrintf("%3d ", i); + setName = UNICODE_STRING("anonymous", 9); + setRef = usetNode->fParent; + if (setRef != NULL) { + varRef = setRef->fParent; + if (varRef != NULL && varRef->fType == RBBINode::varRef) { + setName = varRef->fText; + } + } + RBBI_DEBUG_printUnicodeString(setName); + RBBIDebugPrintf(" "); + RBBI_DEBUG_printUnicodeString(usetNode->fText); + RBBIDebugPrintf("\n"); + if (usetNode->fLeftChild != NULL) { + RBBINode::printTree(usetNode->fLeftChild, TRUE); + } + } + RBBIDebugPrintf("\n"); +} +#endif + + + +//------------------------------------------------------------------------------------- +// +// RangeDescriptor copy constructor +// +//------------------------------------------------------------------------------------- + +RangeDescriptor::RangeDescriptor(const RangeDescriptor &other, UErrorCode &status) : + fStartChar(other.fStartChar), fEndChar {other.fEndChar}, fNum {other.fNum}, + fIncludesDict{other.fIncludesDict}, fFirstInGroup{other.fFirstInGroup} { + + if (U_FAILURE(status)) { + return; + } + fIncludesSets = new UVector(status); + if (this->fIncludesSets == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + } + if (U_FAILURE(status)) { + return; + } + + for (int32_t i=0; i<other.fIncludesSets->size(); i++) { + this->fIncludesSets->addElement(other.fIncludesSets->elementAt(i), status); + } +} + + +//------------------------------------------------------------------------------------- +// +// RangeDesriptor default constructor +// +//------------------------------------------------------------------------------------- +RangeDescriptor::RangeDescriptor(UErrorCode &status) { + if (U_FAILURE(status)) { + return; + } + fIncludesSets = new UVector(status); + if (fIncludesSets == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + } +} + + +//------------------------------------------------------------------------------------- +// +// RangeDesriptor Destructor +// +//------------------------------------------------------------------------------------- +RangeDescriptor::~RangeDescriptor() { + delete fIncludesSets; + fIncludesSets = nullptr; +} + +//------------------------------------------------------------------------------------- +// +// RangeDesriptor::split() +// +//------------------------------------------------------------------------------------- +void RangeDescriptor::split(UChar32 where, UErrorCode &status) { + U_ASSERT(where>fStartChar && where<=fEndChar); + RangeDescriptor *nr = new RangeDescriptor(*this, status); + if(nr == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + if (U_FAILURE(status)) { + delete nr; + return; + } + // RangeDescriptor copy constructor copies all fields. + // Only need to update those that are different after the split. + nr->fStartChar = where; + this->fEndChar = where-1; + nr->fNext = this->fNext; + this->fNext = nr; +} + + +//------------------------------------------------------------------------------------- +// +// RangeDescriptor::isDictionaryRange +// +// Test whether this range includes characters from +// the original Unicode Set named "dictionary". +// +// This function looks through the Unicode Sets that +// the range includes, checking for one named "dictionary" +// +// TODO: a faster way would be to find the set node for +// "dictionary" just once, rather than looking it +// up by name every time. +// +//------------------------------------------------------------------------------------- +bool RangeDescriptor::isDictionaryRange() { + static const char16_t *dictionary = u"dictionary"; + for (int32_t i=0; i<fIncludesSets->size(); i++) { + RBBINode *usetNode = (RBBINode *)fIncludesSets->elementAt(i); + RBBINode *setRef = usetNode->fParent; + if (setRef != nullptr) { + RBBINode *varRef = setRef->fParent; + if (varRef && varRef->fType == RBBINode::varRef) { + const UnicodeString *setName = &varRef->fText; + if (setName->compare(dictionary, -1) == 0) { + return true; + } + } + } + } + return false; +} + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ diff --git a/thirdparty/icu4c/common/rbbisetb.h b/thirdparty/icu4c/common/rbbisetb.h new file mode 100644 index 0000000000..6409a4ea57 --- /dev/null +++ b/thirdparty/icu4c/common/rbbisetb.h @@ -0,0 +1,147 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +// +// rbbisetb.h +/* +********************************************************************** +* Copyright (c) 2001-2005, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +*/ + +#ifndef RBBISETB_H +#define RBBISETB_H + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_BREAK_ITERATION + +#include "unicode/ucptrie.h" +#include "unicode/umutablecptrie.h" +#include "unicode/uobject.h" +#include "rbbirb.h" +#include "uvector.h" + +U_NAMESPACE_BEGIN + +// +// RBBISetBuilder Derives the character categories used by the runtime RBBI engine +// from the Unicode Sets appearing in the source RBBI rules, and +// creates the TRIE table used to map from Unicode to the +// character categories. +// + + +// +// RangeDescriptor +// +// Each of the non-overlapping character ranges gets one of these descriptors. +// All of them are strung together in a linked list, which is kept in order +// (by character) +// +class RangeDescriptor : public UMemory { +public: + UChar32 fStartChar {}; // Start of range, unicode 32 bit value. + UChar32 fEndChar {}; // End of range, unicode 32 bit value. + int32_t fNum {0}; // runtime-mapped input value for this range. + bool fIncludesDict {false}; // True if the range includes $dictionary. + bool fFirstInGroup {false}; // True if first range in a group with the same fNum. + UVector *fIncludesSets {nullptr}; // vector of the the original + // Unicode sets that include this range. + // (Contains ptrs to uset nodes) + RangeDescriptor *fNext {nullptr}; // Next RangeDescriptor in the linked list. + + RangeDescriptor(UErrorCode &status); + RangeDescriptor(const RangeDescriptor &other, UErrorCode &status); + ~RangeDescriptor(); + void split(UChar32 where, UErrorCode &status); // Spit this range in two at "where", with + // where appearing in the second (higher) part. + bool isDictionaryRange(); // Check whether this range appears as part of + // the Unicode set named "dictionary" + + RangeDescriptor(const RangeDescriptor &other) = delete; // forbid default copying of this class + RangeDescriptor &operator=(const RangeDescriptor &other) = delete; // forbid assigning of this class +}; + + +// +// RBBISetBuilder Handles processing of Unicode Sets from RBBI rules. +// +// Starting with the rules parse tree from the scanner, +// +// - Enumerate the set of UnicodeSets that are referenced +// by the RBBI rules. +// - compute a derived set of non-overlapping UnicodeSets +// that will correspond to columns in the state table for +// the RBBI execution engine. +// - construct the trie table that maps input characters +// to set numbers in the non-overlapping set of sets. +// + + +class RBBISetBuilder : public UMemory { +public: + RBBISetBuilder(RBBIRuleBuilder *rb); + ~RBBISetBuilder(); + + void buildRanges(); + void buildTrie(); + void addValToSets(UVector *sets, uint32_t val); + void addValToSet (RBBINode *usetNode, uint32_t val); + int32_t getNumCharCategories() const; // CharCategories are the same as input symbol set to the + // runtime state machine, which are the same as + // columns in the DFA state table + int32_t getDictCategoriesStart() const; // First char category that includes $dictionary, or + // last category + 1 if there are no dictionary categories. + int32_t getTrieSize() /*const*/; // Size in bytes of the serialized Trie. + void serializeTrie(uint8_t *where); // write out the serialized Trie. + UChar32 getFirstChar(int32_t val) const; + UBool sawBOF() const; // Indicate whether any references to the {bof} pseudo + // character were encountered. + /** + * Merge two character categories that have been identified as having equivalent behavior. + * The ranges belonging to the second category (table column) will be added to the first. + * @param categories the pair of categories to be merged. + */ + void mergeCategories(IntPair categories); + +#ifdef RBBI_DEBUG + void printSets(); + void printRanges(); + void printRangeGroups(); +#else + #define printSets() + #define printRanges() + #define printRangeGroups() +#endif + +private: + RBBIRuleBuilder *fRB; // The RBBI Rule Compiler that owns us. + UErrorCode *fStatus; + + RangeDescriptor *fRangeList; // Head of the linked list of RangeDescriptors + + UMutableCPTrie *fMutableTrie; // The mapping TRIE that is the end result of processing + UCPTrie *fTrie; // the Unicode Sets. + uint32_t fTrieSize; + + // Number of range groups, which are groups of ranges that are in the same original UnicodeSets. + int32_t fGroupCount; + + // The number of the first dictionary char category. + // If there are no Dictionary categories, set to the last category + 1. + int32_t fDictCategoriesStart; + + UBool fSawBOF; + + RBBISetBuilder(const RBBISetBuilder &other); // forbid copying of this class + RBBISetBuilder &operator=(const RBBISetBuilder &other); // forbid copying of this class +}; + + + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ + +#endif diff --git a/thirdparty/icu4c/common/rbbistbl.cpp b/thirdparty/icu4c/common/rbbistbl.cpp new file mode 100644 index 0000000000..5303f76096 --- /dev/null +++ b/thirdparty/icu4c/common/rbbistbl.cpp @@ -0,0 +1,270 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +// +// file: rbbistbl.cpp Implementation of the ICU RBBISymbolTable class +// +/* +*************************************************************************** +* Copyright (C) 2002-2014 International Business Machines Corporation +* and others. All rights reserved. +*************************************************************************** +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_BREAK_ITERATION + +#include "unicode/unistr.h" +#include "unicode/uniset.h" +#include "unicode/uchar.h" +#include "unicode/parsepos.h" + +#include "cstr.h" +#include "rbbinode.h" +#include "rbbirb.h" +#include "umutex.h" + + +// +// RBBISymbolTableEntry_deleter Used by the UHashTable to delete the contents +// when the hash table is deleted. +// +U_CDECL_BEGIN +static void U_CALLCONV RBBISymbolTableEntry_deleter(void *p) { + icu::RBBISymbolTableEntry *px = (icu::RBBISymbolTableEntry *)p; + delete px; +} +U_CDECL_END + + + +U_NAMESPACE_BEGIN + +RBBISymbolTable::RBBISymbolTable(RBBIRuleScanner *rs, const UnicodeString &rules, UErrorCode &status) + :fRules(rules), fRuleScanner(rs), ffffString(UChar(0xffff)) +{ + fHashTable = NULL; + fCachedSetLookup = NULL; + + fHashTable = uhash_open(uhash_hashUnicodeString, uhash_compareUnicodeString, NULL, &status); + // uhash_open checks status + if (U_FAILURE(status)) { + return; + } + uhash_setValueDeleter(fHashTable, RBBISymbolTableEntry_deleter); +} + + + +RBBISymbolTable::~RBBISymbolTable() +{ + uhash_close(fHashTable); +} + + +// +// RBBISymbolTable::lookup This function from the abstract symbol table inteface +// looks up a variable name and returns a UnicodeString +// containing the substitution text. +// +// The variable name does NOT include the leading $. +// +const UnicodeString *RBBISymbolTable::lookup(const UnicodeString& s) const +{ + RBBISymbolTableEntry *el; + RBBINode *varRefNode; + RBBINode *exprNode; + RBBINode *usetNode; + const UnicodeString *retString; + RBBISymbolTable *This = (RBBISymbolTable *)this; // cast off const + + el = (RBBISymbolTableEntry *)uhash_get(fHashTable, &s); + if (el == NULL) { + return NULL; + } + + varRefNode = el->val; + exprNode = varRefNode->fLeftChild; // Root node of expression for variable + if (exprNode->fType == RBBINode::setRef) { + // The $variable refers to a single UnicodeSet + // return the ffffString, which will subsequently be interpreted as a + // stand-in character for the set by RBBISymbolTable::lookupMatcher() + usetNode = exprNode->fLeftChild; + This->fCachedSetLookup = usetNode->fInputSet; + retString = &ffffString; + } + else + { + // The variable refers to something other than just a set. + // return the original source string for the expression + retString = &exprNode->fText; + This->fCachedSetLookup = NULL; + } + return retString; +} + + + +// +// RBBISymbolTable::lookupMatcher This function from the abstract symbol table +// interface maps a single stand-in character to a +// pointer to a Unicode Set. The Unicode Set code uses this +// mechanism to get all references to the same $variable +// name to refer to a single common Unicode Set instance. +// +// This implementation cheats a little, and does not maintain a map of stand-in chars +// to sets. Instead, it takes advantage of the fact that the UnicodeSet +// constructor will always call this function right after calling lookup(), +// and we just need to remember what set to return between these two calls. +const UnicodeFunctor *RBBISymbolTable::lookupMatcher(UChar32 ch) const +{ + UnicodeSet *retVal = NULL; + RBBISymbolTable *This = (RBBISymbolTable *)this; // cast off const + if (ch == 0xffff) { + retVal = fCachedSetLookup; + This->fCachedSetLookup = 0; + } + return retVal; +} + +// +// RBBISymbolTable::parseReference This function from the abstract symbol table interface +// looks for a $variable name in the source text. +// It does not look it up, only scans for it. +// It is used by the UnicodeSet parser. +// +// This implementation is lifted pretty much verbatim +// from the rules based transliterator implementation. +// I didn't see an obvious way of sharing it. +// +UnicodeString RBBISymbolTable::parseReference(const UnicodeString& text, + ParsePosition& pos, int32_t limit) const +{ + int32_t start = pos.getIndex(); + int32_t i = start; + UnicodeString result; + while (i < limit) { + UChar c = text.charAt(i); + if ((i==start && !u_isIDStart(c)) || !u_isIDPart(c)) { + break; + } + ++i; + } + if (i == start) { // No valid name chars + return result; // Indicate failure with empty string + } + pos.setIndex(i); + text.extractBetween(start, i, result); + return result; +} + + + +// +// RBBISymbolTable::lookupNode Given a key (a variable name), return the +// corresponding RBBI Node. If there is no entry +// in the table for this name, return NULL. +// +RBBINode *RBBISymbolTable::lookupNode(const UnicodeString &key) const{ + + RBBINode *retNode = NULL; + RBBISymbolTableEntry *el; + + el = (RBBISymbolTableEntry *)uhash_get(fHashTable, &key); + if (el != NULL) { + retNode = el->val; + } + return retNode; +} + + +// +// RBBISymbolTable::addEntry Add a new entry to the symbol table. +// Indicate an error if the name already exists - +// this will only occur in the case of duplicate +// variable assignments. +// +void RBBISymbolTable::addEntry (const UnicodeString &key, RBBINode *val, UErrorCode &err) { + RBBISymbolTableEntry *e; + /* test for buffer overflows */ + if (U_FAILURE(err)) { + return; + } + e = (RBBISymbolTableEntry *)uhash_get(fHashTable, &key); + if (e != NULL) { + err = U_BRK_VARIABLE_REDFINITION; + return; + } + + e = new RBBISymbolTableEntry; + if (e == NULL) { + err = U_MEMORY_ALLOCATION_ERROR; + return; + } + e->key = key; + e->val = val; + uhash_put( fHashTable, &e->key, e, &err); +} + + +RBBISymbolTableEntry::RBBISymbolTableEntry() : UMemory(), key(), val(NULL) {} + +RBBISymbolTableEntry::~RBBISymbolTableEntry() { + // The "val" of a symbol table entry is a variable reference node. + // The l. child of the val is the rhs expression from the assignment. + // Unlike other node types, children of variable reference nodes are not + // automatically recursively deleted. We do it manually here. + delete val->fLeftChild; + val->fLeftChild = NULL; + + delete val; + + // Note: the key UnicodeString is destructed by virtue of being in the object by value. +} + + +// +// RBBISymbolTable::print Debugging function, dump out the symbol table contents. +// +#ifdef RBBI_DEBUG +void RBBISymbolTable::rbbiSymtablePrint() const { + RBBIDebugPrintf("Variable Definitions Symbol Table\n" + "Name Node serial String Val\n" + "-------------------------------------------------------------------\n"); + + int32_t pos = UHASH_FIRST; + const UHashElement *e = NULL; + for (;;) { + e = uhash_nextElement(fHashTable, &pos); + if (e == NULL ) { + break; + } + RBBISymbolTableEntry *s = (RBBISymbolTableEntry *)e->value.pointer; + + RBBIDebugPrintf("%-19s %8p %7d ", CStr(s->key)(), (void *)s->val, s->val->fSerialNum); + RBBIDebugPrintf(" %s\n", CStr(s->val->fLeftChild->fText)()); + } + + RBBIDebugPrintf("\nParsed Variable Definitions\n"); + pos = -1; + for (;;) { + e = uhash_nextElement(fHashTable, &pos); + if (e == NULL ) { + break; + } + RBBISymbolTableEntry *s = (RBBISymbolTableEntry *)e->value.pointer; + RBBIDebugPrintf("%s\n", CStr(s->key)()); + RBBINode::printTree(s->val, TRUE); + RBBINode::printTree(s->val->fLeftChild, FALSE); + RBBIDebugPrintf("\n"); + } +} +#endif + + + + + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ diff --git a/thirdparty/icu4c/common/rbbitblb.cpp b/thirdparty/icu4c/common/rbbitblb.cpp new file mode 100644 index 0000000000..bcbdab9227 --- /dev/null +++ b/thirdparty/icu4c/common/rbbitblb.cpp @@ -0,0 +1,1793 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (c) 2002-2016, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +*/ +// +// rbbitblb.cpp +// + + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_BREAK_ITERATION + +#include "unicode/unistr.h" +#include "rbbitblb.h" +#include "rbbirb.h" +#include "rbbiscan.h" +#include "rbbisetb.h" +#include "rbbidata.h" +#include "cstring.h" +#include "uassert.h" +#include "uvectr32.h" +#include "cmemory.h" + +U_NAMESPACE_BEGIN + +const int32_t kMaxStateFor8BitsTable = 255; + +RBBITableBuilder::RBBITableBuilder(RBBIRuleBuilder *rb, RBBINode **rootNode, UErrorCode &status) : + fRB(rb), + fTree(*rootNode), + fStatus(&status), + fDStates(nullptr), + fSafeTable(nullptr) { + if (U_FAILURE(status)) { + return; + } + // fDStates is UVector<RBBIStateDescriptor *> + fDStates = new UVector(status); + if (U_SUCCESS(status) && fDStates == nullptr ) { + status = U_MEMORY_ALLOCATION_ERROR; + } +} + + + +RBBITableBuilder::~RBBITableBuilder() { + int i; + for (i=0; i<fDStates->size(); i++) { + delete (RBBIStateDescriptor *)fDStates->elementAt(i); + } + delete fDStates; + delete fSafeTable; + delete fLookAheadRuleMap; +} + + +//----------------------------------------------------------------------------- +// +// RBBITableBuilder::buildForwardTable - This is the main function for building +// the DFA state transition table from the RBBI rules parse tree. +// +//----------------------------------------------------------------------------- +void RBBITableBuilder::buildForwardTable() { + + if (U_FAILURE(*fStatus)) { + return; + } + + // If there were no rules, just return. This situation can easily arise + // for the reverse rules. + if (fTree==NULL) { + return; + } + + // + // Walk through the tree, replacing any references to $variables with a copy of the + // parse tree for the substition expression. + // + fTree = fTree->flattenVariables(); +#ifdef RBBI_DEBUG + if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "ftree")) { + RBBIDebugPuts("\nParse tree after flattening variable references."); + RBBINode::printTree(fTree, TRUE); + } +#endif + + // + // If the rules contained any references to {bof} + // add a {bof} <cat> <former root of tree> to the + // tree. Means that all matches must start out with the + // {bof} fake character. + // + if (fRB->fSetBuilder->sawBOF()) { + RBBINode *bofTop = new RBBINode(RBBINode::opCat); + RBBINode *bofLeaf = new RBBINode(RBBINode::leafChar); + // Delete and exit if memory allocation failed. + if (bofTop == NULL || bofLeaf == NULL) { + *fStatus = U_MEMORY_ALLOCATION_ERROR; + delete bofTop; + delete bofLeaf; + return; + } + bofTop->fLeftChild = bofLeaf; + bofTop->fRightChild = fTree; + bofLeaf->fParent = bofTop; + bofLeaf->fVal = 2; // Reserved value for {bof}. + fTree = bofTop; + } + + // + // Add a unique right-end marker to the expression. + // Appears as a cat-node, left child being the original tree, + // right child being the end marker. + // + RBBINode *cn = new RBBINode(RBBINode::opCat); + // Exit if memory allocation failed. + if (cn == NULL) { + *fStatus = U_MEMORY_ALLOCATION_ERROR; + return; + } + cn->fLeftChild = fTree; + fTree->fParent = cn; + RBBINode *endMarkerNode = cn->fRightChild = new RBBINode(RBBINode::endMark); + // Delete and exit if memory allocation failed. + if (cn->fRightChild == NULL) { + *fStatus = U_MEMORY_ALLOCATION_ERROR; + delete cn; + return; + } + cn->fRightChild->fParent = cn; + fTree = cn; + + // + // Replace all references to UnicodeSets with the tree for the equivalent + // expression. + // + fTree->flattenSets(); +#ifdef RBBI_DEBUG + if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "stree")) { + RBBIDebugPuts("\nParse tree after flattening Unicode Set references."); + RBBINode::printTree(fTree, TRUE); + } +#endif + + + // + // calculate the functions nullable, firstpos, lastpos and followpos on + // nodes in the parse tree. + // See the alogrithm description in Aho. + // Understanding how this works by looking at the code alone will be + // nearly impossible. + // + calcNullable(fTree); + calcFirstPos(fTree); + calcLastPos(fTree); + calcFollowPos(fTree); + if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "pos")) { + RBBIDebugPuts("\n"); + printPosSets(fTree); + } + + // + // For "chained" rules, modify the followPos sets + // + if (fRB->fChainRules) { + calcChainedFollowPos(fTree, endMarkerNode); + } + + // + // BOF (start of input) test fixup. + // + if (fRB->fSetBuilder->sawBOF()) { + bofFixup(); + } + + // + // Build the DFA state transition tables. + // + buildStateTable(); + mapLookAheadRules(); + flagAcceptingStates(); + flagLookAheadStates(); + flagTaggedStates(); + + // + // Update the global table of rule status {tag} values + // The rule builder has a global vector of status values that are common + // for all tables. Merge the ones from this table into the global set. + // + mergeRuleStatusVals(); +} + + + +//----------------------------------------------------------------------------- +// +// calcNullable. Impossible to explain succinctly. See Aho, section 3.9 +// +//----------------------------------------------------------------------------- +void RBBITableBuilder::calcNullable(RBBINode *n) { + if (n == NULL) { + return; + } + if (n->fType == RBBINode::setRef || + n->fType == RBBINode::endMark ) { + // These are non-empty leaf node types. + n->fNullable = FALSE; + return; + } + + if (n->fType == RBBINode::lookAhead || n->fType == RBBINode::tag) { + // Lookahead marker node. It's a leaf, so no recursion on children. + // It's nullable because it does not match any literal text from the input stream. + n->fNullable = TRUE; + return; + } + + + // The node is not a leaf. + // Calculate nullable on its children. + calcNullable(n->fLeftChild); + calcNullable(n->fRightChild); + + // Apply functions from table 3.40 in Aho + if (n->fType == RBBINode::opOr) { + n->fNullable = n->fLeftChild->fNullable || n->fRightChild->fNullable; + } + else if (n->fType == RBBINode::opCat) { + n->fNullable = n->fLeftChild->fNullable && n->fRightChild->fNullable; + } + else if (n->fType == RBBINode::opStar || n->fType == RBBINode::opQuestion) { + n->fNullable = TRUE; + } + else { + n->fNullable = FALSE; + } +} + + + + +//----------------------------------------------------------------------------- +// +// calcFirstPos. Impossible to explain succinctly. See Aho, section 3.9 +// +//----------------------------------------------------------------------------- +void RBBITableBuilder::calcFirstPos(RBBINode *n) { + if (n == NULL) { + return; + } + if (n->fType == RBBINode::leafChar || + n->fType == RBBINode::endMark || + n->fType == RBBINode::lookAhead || + n->fType == RBBINode::tag) { + // These are non-empty leaf node types. + // Note: In order to maintain the sort invariant on the set, + // this function should only be called on a node whose set is + // empty to start with. + n->fFirstPosSet->addElement(n, *fStatus); + return; + } + + // The node is not a leaf. + // Calculate firstPos on its children. + calcFirstPos(n->fLeftChild); + calcFirstPos(n->fRightChild); + + // Apply functions from table 3.40 in Aho + if (n->fType == RBBINode::opOr) { + setAdd(n->fFirstPosSet, n->fLeftChild->fFirstPosSet); + setAdd(n->fFirstPosSet, n->fRightChild->fFirstPosSet); + } + else if (n->fType == RBBINode::opCat) { + setAdd(n->fFirstPosSet, n->fLeftChild->fFirstPosSet); + if (n->fLeftChild->fNullable) { + setAdd(n->fFirstPosSet, n->fRightChild->fFirstPosSet); + } + } + else if (n->fType == RBBINode::opStar || + n->fType == RBBINode::opQuestion || + n->fType == RBBINode::opPlus) { + setAdd(n->fFirstPosSet, n->fLeftChild->fFirstPosSet); + } +} + + + +//----------------------------------------------------------------------------- +// +// calcLastPos. Impossible to explain succinctly. See Aho, section 3.9 +// +//----------------------------------------------------------------------------- +void RBBITableBuilder::calcLastPos(RBBINode *n) { + if (n == NULL) { + return; + } + if (n->fType == RBBINode::leafChar || + n->fType == RBBINode::endMark || + n->fType == RBBINode::lookAhead || + n->fType == RBBINode::tag) { + // These are non-empty leaf node types. + // Note: In order to maintain the sort invariant on the set, + // this function should only be called on a node whose set is + // empty to start with. + n->fLastPosSet->addElement(n, *fStatus); + return; + } + + // The node is not a leaf. + // Calculate lastPos on its children. + calcLastPos(n->fLeftChild); + calcLastPos(n->fRightChild); + + // Apply functions from table 3.40 in Aho + if (n->fType == RBBINode::opOr) { + setAdd(n->fLastPosSet, n->fLeftChild->fLastPosSet); + setAdd(n->fLastPosSet, n->fRightChild->fLastPosSet); + } + else if (n->fType == RBBINode::opCat) { + setAdd(n->fLastPosSet, n->fRightChild->fLastPosSet); + if (n->fRightChild->fNullable) { + setAdd(n->fLastPosSet, n->fLeftChild->fLastPosSet); + } + } + else if (n->fType == RBBINode::opStar || + n->fType == RBBINode::opQuestion || + n->fType == RBBINode::opPlus) { + setAdd(n->fLastPosSet, n->fLeftChild->fLastPosSet); + } +} + + + +//----------------------------------------------------------------------------- +// +// calcFollowPos. Impossible to explain succinctly. See Aho, section 3.9 +// +//----------------------------------------------------------------------------- +void RBBITableBuilder::calcFollowPos(RBBINode *n) { + if (n == NULL || + n->fType == RBBINode::leafChar || + n->fType == RBBINode::endMark) { + return; + } + + calcFollowPos(n->fLeftChild); + calcFollowPos(n->fRightChild); + + // Aho rule #1 + if (n->fType == RBBINode::opCat) { + RBBINode *i; // is 'i' in Aho's description + uint32_t ix; + + UVector *LastPosOfLeftChild = n->fLeftChild->fLastPosSet; + + for (ix=0; ix<(uint32_t)LastPosOfLeftChild->size(); ix++) { + i = (RBBINode *)LastPosOfLeftChild->elementAt(ix); + setAdd(i->fFollowPos, n->fRightChild->fFirstPosSet); + } + } + + // Aho rule #2 + if (n->fType == RBBINode::opStar || + n->fType == RBBINode::opPlus) { + RBBINode *i; // again, n and i are the names from Aho's description. + uint32_t ix; + + for (ix=0; ix<(uint32_t)n->fLastPosSet->size(); ix++) { + i = (RBBINode *)n->fLastPosSet->elementAt(ix); + setAdd(i->fFollowPos, n->fFirstPosSet); + } + } + + + +} + +//----------------------------------------------------------------------------- +// +// addRuleRootNodes Recursively walk a parse tree, adding all nodes flagged +// as roots of a rule to a destination vector. +// +//----------------------------------------------------------------------------- +void RBBITableBuilder::addRuleRootNodes(UVector *dest, RBBINode *node) { + if (node == NULL || U_FAILURE(*fStatus)) { + return; + } + if (node->fRuleRoot) { + dest->addElement(node, *fStatus); + // Note: rules cannot nest. If we found a rule start node, + // no child node can also be a start node. + return; + } + addRuleRootNodes(dest, node->fLeftChild); + addRuleRootNodes(dest, node->fRightChild); +} + +//----------------------------------------------------------------------------- +// +// calcChainedFollowPos. Modify the previously calculated followPos sets +// to implement rule chaining. NOT described by Aho +// +//----------------------------------------------------------------------------- +void RBBITableBuilder::calcChainedFollowPos(RBBINode *tree, RBBINode *endMarkNode) { + + UVector leafNodes(*fStatus); + if (U_FAILURE(*fStatus)) { + return; + } + + // get a list all leaf nodes + tree->findNodes(&leafNodes, RBBINode::leafChar, *fStatus); + if (U_FAILURE(*fStatus)) { + return; + } + + // Collect all leaf nodes that can start matches for rules + // with inbound chaining enabled, which is the union of the + // firstPosition sets from each of the rule root nodes. + + UVector ruleRootNodes(*fStatus); + addRuleRootNodes(&ruleRootNodes, tree); + + UVector matchStartNodes(*fStatus); + for (int j=0; j<ruleRootNodes.size(); ++j) { + RBBINode *node = static_cast<RBBINode *>(ruleRootNodes.elementAt(j)); + if (node->fChainIn) { + setAdd(&matchStartNodes, node->fFirstPosSet); + } + } + if (U_FAILURE(*fStatus)) { + return; + } + + int32_t endNodeIx; + int32_t startNodeIx; + + for (endNodeIx=0; endNodeIx<leafNodes.size(); endNodeIx++) { + RBBINode *endNode = (RBBINode *)leafNodes.elementAt(endNodeIx); + + // Identify leaf nodes that correspond to overall rule match positions. + // These include the endMarkNode in their followPos sets. + // + // Note: do not consider other end marker nodes, those that are added to + // look-ahead rules. These can't chain; a match immediately stops + // further matching. This leaves exactly one end marker node, the one + // at the end of the complete tree. + + if (!endNode->fFollowPos->contains(endMarkNode)) { + continue; + } + + // We've got a node that can end a match. + + // !!LBCMNoChain implementation: If this node's val correspond to + // the Line Break $CM char class, don't chain from it. + // TODO: Remove this. !!LBCMNoChain is deprecated, and is not used + // by any of the standard ICU rules. + if (fRB->fLBCMNoChain) { + UChar32 c = this->fRB->fSetBuilder->getFirstChar(endNode->fVal); + if (c != -1) { + // c == -1 occurs with sets containing only the {eof} marker string. + ULineBreak cLBProp = (ULineBreak)u_getIntPropertyValue(c, UCHAR_LINE_BREAK); + if (cLBProp == U_LB_COMBINING_MARK) { + continue; + } + } + } + + // Now iterate over the nodes that can start a match, looking for ones + // with the same char class as our ending node. + RBBINode *startNode; + for (startNodeIx = 0; startNodeIx<matchStartNodes.size(); startNodeIx++) { + startNode = (RBBINode *)matchStartNodes.elementAt(startNodeIx); + if (startNode->fType != RBBINode::leafChar) { + continue; + } + + if (endNode->fVal == startNode->fVal) { + // The end val (character class) of one possible match is the + // same as the start of another. + + // Add all nodes from the followPos of the start node to the + // followPos set of the end node, which will have the effect of + // letting matches transition from a match state at endNode + // to the second char of a match starting with startNode. + setAdd(endNode->fFollowPos, startNode->fFollowPos); + } + } + } +} + + +//----------------------------------------------------------------------------- +// +// bofFixup. Fixup for state tables that include {bof} beginning of input testing. +// Do an swizzle similar to chaining, modifying the followPos set of +// the bofNode to include the followPos nodes from other {bot} nodes +// scattered through the tree. +// +// This function has much in common with calcChainedFollowPos(). +// +//----------------------------------------------------------------------------- +void RBBITableBuilder::bofFixup() { + + if (U_FAILURE(*fStatus)) { + return; + } + + // The parse tree looks like this ... + // fTree root ---> <cat> + // / \ . + // <cat> <#end node> + // / \ . + // <bofNode> rest + // of tree + // + // We will be adding things to the followPos set of the <bofNode> + // + RBBINode *bofNode = fTree->fLeftChild->fLeftChild; + U_ASSERT(bofNode->fType == RBBINode::leafChar); + U_ASSERT(bofNode->fVal == 2); + + // Get all nodes that can be the start a match of the user-written rules + // (excluding the fake bofNode) + // We want the nodes that can start a match in the + // part labeled "rest of tree" + // + UVector *matchStartNodes = fTree->fLeftChild->fRightChild->fFirstPosSet; + + RBBINode *startNode; + int startNodeIx; + for (startNodeIx = 0; startNodeIx<matchStartNodes->size(); startNodeIx++) { + startNode = (RBBINode *)matchStartNodes->elementAt(startNodeIx); + if (startNode->fType != RBBINode::leafChar) { + continue; + } + + if (startNode->fVal == bofNode->fVal) { + // We found a leaf node corresponding to a {bof} that was + // explicitly written into a rule. + // Add everything from the followPos set of this node to the + // followPos set of the fake bofNode at the start of the tree. + // + setAdd(bofNode->fFollowPos, startNode->fFollowPos); + } + } +} + +//----------------------------------------------------------------------------- +// +// buildStateTable() Determine the set of runtime DFA states and the +// transition tables for these states, by the algorithm +// of fig. 3.44 in Aho. +// +// Most of the comments are quotes of Aho's psuedo-code. +// +//----------------------------------------------------------------------------- +void RBBITableBuilder::buildStateTable() { + if (U_FAILURE(*fStatus)) { + return; + } + RBBIStateDescriptor *failState; + // Set it to NULL to avoid uninitialized warning + RBBIStateDescriptor *initialState = NULL; + // + // Add a dummy state 0 - the stop state. Not from Aho. + int lastInputSymbol = fRB->fSetBuilder->getNumCharCategories() - 1; + failState = new RBBIStateDescriptor(lastInputSymbol, fStatus); + if (failState == NULL) { + *fStatus = U_MEMORY_ALLOCATION_ERROR; + goto ExitBuildSTdeleteall; + } + failState->fPositions = new UVector(*fStatus); + if (failState->fPositions == NULL) { + *fStatus = U_MEMORY_ALLOCATION_ERROR; + } + if (failState->fPositions == NULL || U_FAILURE(*fStatus)) { + goto ExitBuildSTdeleteall; + } + fDStates->addElement(failState, *fStatus); + if (U_FAILURE(*fStatus)) { + goto ExitBuildSTdeleteall; + } + + // initially, the only unmarked state in Dstates is firstpos(root), + // where toot is the root of the syntax tree for (r)#; + initialState = new RBBIStateDescriptor(lastInputSymbol, fStatus); + if (initialState == NULL) { + *fStatus = U_MEMORY_ALLOCATION_ERROR; + } + if (U_FAILURE(*fStatus)) { + goto ExitBuildSTdeleteall; + } + initialState->fPositions = new UVector(*fStatus); + if (initialState->fPositions == NULL) { + *fStatus = U_MEMORY_ALLOCATION_ERROR; + } + if (U_FAILURE(*fStatus)) { + goto ExitBuildSTdeleteall; + } + setAdd(initialState->fPositions, fTree->fFirstPosSet); + fDStates->addElement(initialState, *fStatus); + if (U_FAILURE(*fStatus)) { + goto ExitBuildSTdeleteall; + } + + // while there is an unmarked state T in Dstates do begin + for (;;) { + RBBIStateDescriptor *T = NULL; + int32_t tx; + for (tx=1; tx<fDStates->size(); tx++) { + RBBIStateDescriptor *temp; + temp = (RBBIStateDescriptor *)fDStates->elementAt(tx); + if (temp->fMarked == FALSE) { + T = temp; + break; + } + } + if (T == NULL) { + break; + } + + // mark T; + T->fMarked = TRUE; + + // for each input symbol a do begin + int32_t a; + for (a = 1; a<=lastInputSymbol; a++) { + // let U be the set of positions that are in followpos(p) + // for some position p in T + // such that the symbol at position p is a; + UVector *U = NULL; + RBBINode *p; + int32_t px; + for (px=0; px<T->fPositions->size(); px++) { + p = (RBBINode *)T->fPositions->elementAt(px); + if ((p->fType == RBBINode::leafChar) && (p->fVal == a)) { + if (U == NULL) { + U = new UVector(*fStatus); + if (U == NULL) { + *fStatus = U_MEMORY_ALLOCATION_ERROR; + goto ExitBuildSTdeleteall; + } + } + setAdd(U, p->fFollowPos); + } + } + + // if U is not empty and not in DStates then + int32_t ux = 0; + UBool UinDstates = FALSE; + if (U != NULL) { + U_ASSERT(U->size() > 0); + int ix; + for (ix=0; ix<fDStates->size(); ix++) { + RBBIStateDescriptor *temp2; + temp2 = (RBBIStateDescriptor *)fDStates->elementAt(ix); + if (setEquals(U, temp2->fPositions)) { + delete U; + U = temp2->fPositions; + ux = ix; + UinDstates = TRUE; + break; + } + } + + // Add U as an unmarked state to Dstates + if (!UinDstates) + { + RBBIStateDescriptor *newState = new RBBIStateDescriptor(lastInputSymbol, fStatus); + if (newState == NULL) { + *fStatus = U_MEMORY_ALLOCATION_ERROR; + } + if (U_FAILURE(*fStatus)) { + goto ExitBuildSTdeleteall; + } + newState->fPositions = U; + fDStates->addElement(newState, *fStatus); + if (U_FAILURE(*fStatus)) { + return; + } + ux = fDStates->size()-1; + } + + // Dtran[T, a] := U; + T->fDtran->setElementAt(ux, a); + } + } + } + return; + // delete local pointers only if error occured. +ExitBuildSTdeleteall: + delete initialState; + delete failState; +} + + +/** + * mapLookAheadRules + * + */ +void RBBITableBuilder::mapLookAheadRules() { + fLookAheadRuleMap = new UVector32(fRB->fScanner->numRules() + 1, *fStatus); + if (fLookAheadRuleMap == nullptr) { + *fStatus = U_MEMORY_ALLOCATION_ERROR; + } + if (U_FAILURE(*fStatus)) { + return; + } + fLookAheadRuleMap->setSize(fRB->fScanner->numRules() + 1); + + for (int32_t n=0; n<fDStates->size(); n++) { + RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(n); + int32_t laSlotForState = 0; + + // Establish the look-ahead slot for this state, if the state covers + // any look-ahead nodes - corresponding to the '/' in look-ahead rules. + + // If any of the look-ahead nodes already have a slot assigned, use it, + // otherwise assign a new one. + + bool sawLookAheadNode = false; + for (int32_t ipos=0; ipos<sd->fPositions->size(); ++ipos) { + RBBINode *node = static_cast<RBBINode *>(sd->fPositions->elementAt(ipos)); + if (node->fType != RBBINode::NodeType::lookAhead) { + continue; + } + sawLookAheadNode = true; + int32_t ruleNum = node->fVal; // Set when rule was originally parsed. + U_ASSERT(ruleNum < fLookAheadRuleMap->size()); + U_ASSERT(ruleNum > 0); + int32_t laSlot = fLookAheadRuleMap->elementAti(ruleNum); + if (laSlot != 0) { + if (laSlotForState == 0) { + laSlotForState = laSlot; + } else { + // TODO: figure out if this can fail, change to setting an error code if so. + U_ASSERT(laSlot == laSlotForState); + } + } + } + if (!sawLookAheadNode) { + continue; + } + + if (laSlotForState == 0) { + laSlotForState = ++fLASlotsInUse; + } + + // For each look ahead node covered by this state, + // set the mapping from the node's rule number to the look ahead slot. + // There can be multiple nodes/rule numbers going to the same la slot. + + for (int32_t ipos=0; ipos<sd->fPositions->size(); ++ipos) { + RBBINode *node = static_cast<RBBINode *>(sd->fPositions->elementAt(ipos)); + if (node->fType != RBBINode::NodeType::lookAhead) { + continue; + } + int32_t ruleNum = node->fVal; // Set when rule was originally parsed. + int32_t existingVal = fLookAheadRuleMap->elementAti(ruleNum); + (void)existingVal; + U_ASSERT(existingVal == 0 || existingVal == laSlotForState); + fLookAheadRuleMap->setElementAt(laSlotForState, ruleNum); + } + } + +} + +//----------------------------------------------------------------------------- +// +// flagAcceptingStates Identify accepting states. +// First get a list of all of the end marker nodes. +// Then, for each state s, +// if s contains one of the end marker nodes in its list of tree positions then +// s is an accepting state. +// +//----------------------------------------------------------------------------- +void RBBITableBuilder::flagAcceptingStates() { + if (U_FAILURE(*fStatus)) { + return; + } + UVector endMarkerNodes(*fStatus); + RBBINode *endMarker; + int32_t i; + int32_t n; + + if (U_FAILURE(*fStatus)) { + return; + } + + fTree->findNodes(&endMarkerNodes, RBBINode::endMark, *fStatus); + if (U_FAILURE(*fStatus)) { + return; + } + + for (i=0; i<endMarkerNodes.size(); i++) { + endMarker = (RBBINode *)endMarkerNodes.elementAt(i); + for (n=0; n<fDStates->size(); n++) { + RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(n); + if (sd->fPositions->indexOf(endMarker) >= 0) { + // Any non-zero value for fAccepting means this is an accepting node. + // The value is what will be returned to the user as the break status. + // If no other value was specified, force it to ACCEPTING_UNCONDITIONAL (1). + + if (sd->fAccepting==0) { + // State hasn't been marked as accepting yet. Do it now. + sd->fAccepting = fLookAheadRuleMap->elementAti(endMarker->fVal); + if (sd->fAccepting == 0) { + sd->fAccepting = ACCEPTING_UNCONDITIONAL; + } + } + if (sd->fAccepting==ACCEPTING_UNCONDITIONAL && endMarker->fVal != 0) { + // Both lookahead and non-lookahead accepting for this state. + // Favor the look-ahead, because a look-ahead match needs to + // immediately stop the run-time engine. First match, not longest. + sd->fAccepting = fLookAheadRuleMap->elementAti(endMarker->fVal); + } + // implicit else: + // if sd->fAccepting already had a value other than 0 or 1, leave it be. + } + } + } +} + + +//----------------------------------------------------------------------------- +// +// flagLookAheadStates Very similar to flagAcceptingStates, above. +// +//----------------------------------------------------------------------------- +void RBBITableBuilder::flagLookAheadStates() { + if (U_FAILURE(*fStatus)) { + return; + } + UVector lookAheadNodes(*fStatus); + RBBINode *lookAheadNode; + int32_t i; + int32_t n; + + fTree->findNodes(&lookAheadNodes, RBBINode::lookAhead, *fStatus); + if (U_FAILURE(*fStatus)) { + return; + } + for (i=0; i<lookAheadNodes.size(); i++) { + lookAheadNode = (RBBINode *)lookAheadNodes.elementAt(i); + U_ASSERT(lookAheadNode->fType == RBBINode::NodeType::lookAhead); + + for (n=0; n<fDStates->size(); n++) { + RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(n); + int32_t positionsIdx = sd->fPositions->indexOf(lookAheadNode); + if (positionsIdx >= 0) { + U_ASSERT(lookAheadNode == sd->fPositions->elementAt(positionsIdx)); + uint32_t lookaheadSlot = fLookAheadRuleMap->elementAti(lookAheadNode->fVal); + U_ASSERT(sd->fLookAhead == 0 || sd->fLookAhead == lookaheadSlot); + // if (sd->fLookAhead != 0 && sd->fLookAhead != lookaheadSlot) { + // printf("%s:%d Bingo. sd->fLookAhead:%d lookaheadSlot:%d\n", + // __FILE__, __LINE__, sd->fLookAhead, lookaheadSlot); + // } + sd->fLookAhead = lookaheadSlot; + } + } + } +} + + + + +//----------------------------------------------------------------------------- +// +// flagTaggedStates +// +//----------------------------------------------------------------------------- +void RBBITableBuilder::flagTaggedStates() { + if (U_FAILURE(*fStatus)) { + return; + } + UVector tagNodes(*fStatus); + RBBINode *tagNode; + int32_t i; + int32_t n; + + if (U_FAILURE(*fStatus)) { + return; + } + fTree->findNodes(&tagNodes, RBBINode::tag, *fStatus); + if (U_FAILURE(*fStatus)) { + return; + } + for (i=0; i<tagNodes.size(); i++) { // For each tag node t (all of 'em) + tagNode = (RBBINode *)tagNodes.elementAt(i); + + for (n=0; n<fDStates->size(); n++) { // For each state s (row in the state table) + RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(n); + if (sd->fPositions->indexOf(tagNode) >= 0) { // if s include the tag node t + sortedAdd(&sd->fTagVals, tagNode->fVal); + } + } + } +} + + + + +//----------------------------------------------------------------------------- +// +// mergeRuleStatusVals +// +// Update the global table of rule status {tag} values +// The rule builder has a global vector of status values that are common +// for all tables. Merge the ones from this table into the global set. +// +//----------------------------------------------------------------------------- +void RBBITableBuilder::mergeRuleStatusVals() { + // + // The basic outline of what happens here is this... + // + // for each state in this state table + // if the status tag list for this state is in the global statuses list + // record where and + // continue with the next state + // else + // add the tag list for this state to the global list. + // + int i; + int n; + + // Pre-set a single tag of {0} into the table. + // We will need this as a default, for rule sets with no explicit tagging. + if (fRB->fRuleStatusVals->size() == 0) { + fRB->fRuleStatusVals->addElement(1, *fStatus); // Num of statuses in group + fRB->fRuleStatusVals->addElement((int32_t)0, *fStatus); // and our single status of zero + } + + // For each state + for (n=0; n<fDStates->size(); n++) { + RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(n); + UVector *thisStatesTagValues = sd->fTagVals; + if (thisStatesTagValues == NULL) { + // No tag values are explicitly associated with this state. + // Set the default tag value. + sd->fTagsIdx = 0; + continue; + } + + // There are tag(s) associated with this state. + // fTagsIdx will be the index into the global tag list for this state's tag values. + // Initial value of -1 flags that we haven't got it set yet. + sd->fTagsIdx = -1; + int32_t thisTagGroupStart = 0; // indexes into the global rule status vals list + int32_t nextTagGroupStart = 0; + + // Loop runs once per group of tags in the global list + while (nextTagGroupStart < fRB->fRuleStatusVals->size()) { + thisTagGroupStart = nextTagGroupStart; + nextTagGroupStart += fRB->fRuleStatusVals->elementAti(thisTagGroupStart) + 1; + if (thisStatesTagValues->size() != fRB->fRuleStatusVals->elementAti(thisTagGroupStart)) { + // The number of tags for this state is different from + // the number of tags in this group from the global list. + // Continue with the next group from the global list. + continue; + } + // The lengths match, go ahead and compare the actual tag values + // between this state and the group from the global list. + for (i=0; i<thisStatesTagValues->size(); i++) { + if (thisStatesTagValues->elementAti(i) != + fRB->fRuleStatusVals->elementAti(thisTagGroupStart + 1 + i) ) { + // Mismatch. + break; + } + } + + if (i == thisStatesTagValues->size()) { + // We found a set of tag values in the global list that match + // those for this state. Use them. + sd->fTagsIdx = thisTagGroupStart; + break; + } + } + + if (sd->fTagsIdx == -1) { + // No suitable entry in the global tag list already. Add one + sd->fTagsIdx = fRB->fRuleStatusVals->size(); + fRB->fRuleStatusVals->addElement(thisStatesTagValues->size(), *fStatus); + for (i=0; i<thisStatesTagValues->size(); i++) { + fRB->fRuleStatusVals->addElement(thisStatesTagValues->elementAti(i), *fStatus); + } + } + } +} + + + + + + + +//----------------------------------------------------------------------------- +// +// sortedAdd Add a value to a vector of sorted values (ints). +// Do not replicate entries; if the value is already there, do not +// add a second one. +// Lazily create the vector if it does not already exist. +// +//----------------------------------------------------------------------------- +void RBBITableBuilder::sortedAdd(UVector **vector, int32_t val) { + int32_t i; + + if (*vector == NULL) { + *vector = new UVector(*fStatus); + } + if (*vector == NULL || U_FAILURE(*fStatus)) { + return; + } + UVector *vec = *vector; + int32_t vSize = vec->size(); + for (i=0; i<vSize; i++) { + int32_t valAtI = vec->elementAti(i); + if (valAtI == val) { + // The value is already in the vector. Don't add it again. + return; + } + if (valAtI > val) { + break; + } + } + vec->insertElementAt(val, i, *fStatus); +} + + + +//----------------------------------------------------------------------------- +// +// setAdd Set operation on UVector +// dest = dest union source +// Elements may only appear once and must be sorted. +// +//----------------------------------------------------------------------------- +void RBBITableBuilder::setAdd(UVector *dest, UVector *source) { + int32_t destOriginalSize = dest->size(); + int32_t sourceSize = source->size(); + int32_t di = 0; + MaybeStackArray<void *, 16> destArray, sourceArray; // Handle small cases without malloc + void **destPtr, **sourcePtr; + void **destLim, **sourceLim; + + if (destOriginalSize > destArray.getCapacity()) { + if (destArray.resize(destOriginalSize) == NULL) { + return; + } + } + destPtr = destArray.getAlias(); + destLim = destPtr + destOriginalSize; // destArray.getArrayLimit()? + + if (sourceSize > sourceArray.getCapacity()) { + if (sourceArray.resize(sourceSize) == NULL) { + return; + } + } + sourcePtr = sourceArray.getAlias(); + sourceLim = sourcePtr + sourceSize; // sourceArray.getArrayLimit()? + + // Avoid multiple "get element" calls by getting the contents into arrays + (void) dest->toArray(destPtr); + (void) source->toArray(sourcePtr); + + dest->setSize(sourceSize+destOriginalSize, *fStatus); + + while (sourcePtr < sourceLim && destPtr < destLim) { + if (*destPtr == *sourcePtr) { + dest->setElementAt(*sourcePtr++, di++); + destPtr++; + } + // This check is required for machines with segmented memory, like i5/OS. + // Direct pointer comparison is not recommended. + else if (uprv_memcmp(destPtr, sourcePtr, sizeof(void *)) < 0) { + dest->setElementAt(*destPtr++, di++); + } + else { /* *sourcePtr < *destPtr */ + dest->setElementAt(*sourcePtr++, di++); + } + } + + // At most one of these two cleanup loops will execute + while (destPtr < destLim) { + dest->setElementAt(*destPtr++, di++); + } + while (sourcePtr < sourceLim) { + dest->setElementAt(*sourcePtr++, di++); + } + + dest->setSize(di, *fStatus); +} + + + +//----------------------------------------------------------------------------- +// +// setEqual Set operation on UVector. +// Compare for equality. +// Elements must be sorted. +// +//----------------------------------------------------------------------------- +UBool RBBITableBuilder::setEquals(UVector *a, UVector *b) { + return a->equals(*b); +} + + +//----------------------------------------------------------------------------- +// +// printPosSets Debug function. Dump Nullable, firstpos, lastpos and followpos +// for each node in the tree. +// +//----------------------------------------------------------------------------- +#ifdef RBBI_DEBUG +void RBBITableBuilder::printPosSets(RBBINode *n) { + if (n==NULL) { + return; + } + printf("\n"); + RBBINode::printNodeHeader(); + RBBINode::printNode(n); + RBBIDebugPrintf(" Nullable: %s\n", n->fNullable?"TRUE":"FALSE"); + + RBBIDebugPrintf(" firstpos: "); + printSet(n->fFirstPosSet); + + RBBIDebugPrintf(" lastpos: "); + printSet(n->fLastPosSet); + + RBBIDebugPrintf(" followpos: "); + printSet(n->fFollowPos); + + printPosSets(n->fLeftChild); + printPosSets(n->fRightChild); +} +#endif + +// +// findDuplCharClassFrom() +// +bool RBBITableBuilder::findDuplCharClassFrom(IntPair *categories) { + int32_t numStates = fDStates->size(); + int32_t numCols = fRB->fSetBuilder->getNumCharCategories(); + + for (; categories->first < numCols-1; categories->first++) { + // Note: dictionary & non-dictionary columns cannot be merged. + // The limitSecond value prevents considering mixed pairs. + // Dictionary categories are >= DictCategoriesStart. + // Non dict categories are < DictCategoriesStart. + int limitSecond = categories->first < fRB->fSetBuilder->getDictCategoriesStart() ? + fRB->fSetBuilder->getDictCategoriesStart() : numCols; + for (categories->second=categories->first+1; categories->second < limitSecond; categories->second++) { + // Initialized to different values to prevent returning true if numStates = 0 (implies no duplicates). + uint16_t table_base = 0; + uint16_t table_dupl = 1; + for (int32_t state=0; state<numStates; state++) { + RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(state); + table_base = (uint16_t)sd->fDtran->elementAti(categories->first); + table_dupl = (uint16_t)sd->fDtran->elementAti(categories->second); + if (table_base != table_dupl) { + break; + } + } + if (table_base == table_dupl) { + return true; + } + } + } + return false; +} + + +// +// removeColumn() +// +void RBBITableBuilder::removeColumn(int32_t column) { + int32_t numStates = fDStates->size(); + for (int32_t state=0; state<numStates; state++) { + RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(state); + U_ASSERT(column < sd->fDtran->size()); + sd->fDtran->removeElementAt(column); + } +} + +/* + * findDuplicateState + */ +bool RBBITableBuilder::findDuplicateState(IntPair *states) { + int32_t numStates = fDStates->size(); + int32_t numCols = fRB->fSetBuilder->getNumCharCategories(); + + for (; states->first<numStates-1; states->first++) { + RBBIStateDescriptor *firstSD = (RBBIStateDescriptor *)fDStates->elementAt(states->first); + for (states->second=states->first+1; states->second<numStates; states->second++) { + RBBIStateDescriptor *duplSD = (RBBIStateDescriptor *)fDStates->elementAt(states->second); + if (firstSD->fAccepting != duplSD->fAccepting || + firstSD->fLookAhead != duplSD->fLookAhead || + firstSD->fTagsIdx != duplSD->fTagsIdx) { + continue; + } + bool rowsMatch = true; + for (int32_t col=0; col < numCols; ++col) { + int32_t firstVal = firstSD->fDtran->elementAti(col); + int32_t duplVal = duplSD->fDtran->elementAti(col); + if (!((firstVal == duplVal) || + ((firstVal == states->first || firstVal == states->second) && + (duplVal == states->first || duplVal == states->second)))) { + rowsMatch = false; + break; + } + } + if (rowsMatch) { + return true; + } + } + } + return false; +} + + +bool RBBITableBuilder::findDuplicateSafeState(IntPair *states) { + int32_t numStates = fSafeTable->size(); + + for (; states->first<numStates-1; states->first++) { + UnicodeString *firstRow = static_cast<UnicodeString *>(fSafeTable->elementAt(states->first)); + for (states->second=states->first+1; states->second<numStates; states->second++) { + UnicodeString *duplRow = static_cast<UnicodeString *>(fSafeTable->elementAt(states->second)); + bool rowsMatch = true; + int32_t numCols = firstRow->length(); + for (int32_t col=0; col < numCols; ++col) { + int32_t firstVal = firstRow->charAt(col); + int32_t duplVal = duplRow->charAt(col); + if (!((firstVal == duplVal) || + ((firstVal == states->first || firstVal == states->second) && + (duplVal == states->first || duplVal == states->second)))) { + rowsMatch = false; + break; + } + } + if (rowsMatch) { + return true; + } + } + } + return false; +} + + +void RBBITableBuilder::removeState(IntPair duplStates) { + const int32_t keepState = duplStates.first; + const int32_t duplState = duplStates.second; + U_ASSERT(keepState < duplState); + U_ASSERT(duplState < fDStates->size()); + + RBBIStateDescriptor *duplSD = (RBBIStateDescriptor *)fDStates->elementAt(duplState); + fDStates->removeElementAt(duplState); + delete duplSD; + + int32_t numStates = fDStates->size(); + int32_t numCols = fRB->fSetBuilder->getNumCharCategories(); + for (int32_t state=0; state<numStates; ++state) { + RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(state); + for (int32_t col=0; col<numCols; col++) { + int32_t existingVal = sd->fDtran->elementAti(col); + int32_t newVal = existingVal; + if (existingVal == duplState) { + newVal = keepState; + } else if (existingVal > duplState) { + newVal = existingVal - 1; + } + sd->fDtran->setElementAt(newVal, col); + } + } +} + +void RBBITableBuilder::removeSafeState(IntPair duplStates) { + const int32_t keepState = duplStates.first; + const int32_t duplState = duplStates.second; + U_ASSERT(keepState < duplState); + U_ASSERT(duplState < fSafeTable->size()); + + fSafeTable->removeElementAt(duplState); // Note that fSafeTable has a deleter function + // and will auto-delete the removed element. + int32_t numStates = fSafeTable->size(); + for (int32_t state=0; state<numStates; ++state) { + UnicodeString *sd = (UnicodeString *)fSafeTable->elementAt(state); + int32_t numCols = sd->length(); + for (int32_t col=0; col<numCols; col++) { + int32_t existingVal = sd->charAt(col); + int32_t newVal = existingVal; + if (existingVal == duplState) { + newVal = keepState; + } else if (existingVal > duplState) { + newVal = existingVal - 1; + } + sd->setCharAt(col, static_cast<char16_t>(newVal)); + } + } +} + + +/* + * RemoveDuplicateStates + */ +int32_t RBBITableBuilder::removeDuplicateStates() { + IntPair dupls = {3, 0}; + int32_t numStatesRemoved = 0; + + while (findDuplicateState(&dupls)) { + // printf("Removing duplicate states (%d, %d)\n", dupls.first, dupls.second); + removeState(dupls); + ++numStatesRemoved; + } + return numStatesRemoved; +} + + +//----------------------------------------------------------------------------- +// +// getTableSize() Calculate the size of the runtime form of this +// state transition table. +// +//----------------------------------------------------------------------------- +int32_t RBBITableBuilder::getTableSize() const { + int32_t size = 0; + int32_t numRows; + int32_t numCols; + int32_t rowSize; + + if (fTree == NULL) { + return 0; + } + + size = offsetof(RBBIStateTable, fTableData); // The header, with no rows to the table. + + numRows = fDStates->size(); + numCols = fRB->fSetBuilder->getNumCharCategories(); + + if (use8BitsForTable()) { + rowSize = offsetof(RBBIStateTableRow8, fNextState) + sizeof(int8_t)*numCols; + } else { + rowSize = offsetof(RBBIStateTableRow16, fNextState) + sizeof(int16_t)*numCols; + } + size += numRows * rowSize; + return size; +} + +bool RBBITableBuilder::use8BitsForTable() const { + return fDStates->size() <= kMaxStateFor8BitsTable; +} + +//----------------------------------------------------------------------------- +// +// exportTable() export the state transition table in the format required +// by the runtime engine. getTableSize() bytes of memory +// must be available at the output address "where". +// +//----------------------------------------------------------------------------- +void RBBITableBuilder::exportTable(void *where) { + RBBIStateTable *table = (RBBIStateTable *)where; + uint32_t state; + int col; + + if (U_FAILURE(*fStatus) || fTree == NULL) { + return; + } + + int32_t catCount = fRB->fSetBuilder->getNumCharCategories(); + if (catCount > 0x7fff || + fDStates->size() > 0x7fff) { + *fStatus = U_BRK_INTERNAL_ERROR; + return; + } + + table->fNumStates = fDStates->size(); + table->fDictCategoriesStart = fRB->fSetBuilder->getDictCategoriesStart(); + table->fLookAheadResultsSize = fLASlotsInUse == ACCEPTING_UNCONDITIONAL ? 0 : fLASlotsInUse + 1; + table->fFlags = 0; + if (use8BitsForTable()) { + table->fRowLen = offsetof(RBBIStateTableRow8, fNextState) + sizeof(uint8_t) * catCount; + table->fFlags |= RBBI_8BITS_ROWS; + } else { + table->fRowLen = offsetof(RBBIStateTableRow16, fNextState) + sizeof(int16_t) * catCount; + } + if (fRB->fLookAheadHardBreak) { + table->fFlags |= RBBI_LOOKAHEAD_HARD_BREAK; + } + if (fRB->fSetBuilder->sawBOF()) { + table->fFlags |= RBBI_BOF_REQUIRED; + } + + for (state=0; state<table->fNumStates; state++) { + RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(state); + RBBIStateTableRow *row = (RBBIStateTableRow *)(table->fTableData + state*table->fRowLen); + if (use8BitsForTable()) { + U_ASSERT (sd->fAccepting <= 255); + U_ASSERT (sd->fLookAhead <= 255); + U_ASSERT (0 <= sd->fTagsIdx && sd->fTagsIdx <= 255); + row->r8.fAccepting = sd->fAccepting; + row->r8.fLookAhead = sd->fLookAhead; + row->r8.fTagsIdx = sd->fTagsIdx; + for (col=0; col<catCount; col++) { + U_ASSERT (sd->fDtran->elementAti(col) <= kMaxStateFor8BitsTable); + row->r8.fNextState[col] = sd->fDtran->elementAti(col); + } + } else { + U_ASSERT (sd->fAccepting <= 0xffff); + U_ASSERT (sd->fLookAhead <= 0xffff); + U_ASSERT (0 <= sd->fTagsIdx && sd->fTagsIdx <= 0xffff); + row->r16.fAccepting = sd->fAccepting; + row->r16.fLookAhead = sd->fLookAhead; + row->r16.fTagsIdx = sd->fTagsIdx; + for (col=0; col<catCount; col++) { + row->r16.fNextState[col] = sd->fDtran->elementAti(col); + } + } + } +} + + +/** + * Synthesize a safe state table from the main state table. + */ +void RBBITableBuilder::buildSafeReverseTable(UErrorCode &status) { + // The safe table creation has three steps: + + // 1. Identifiy pairs of character classes that are "safe." Safe means that boundaries + // following the pair do not depend on context or state before the pair. To test + // whether a pair is safe, run it through the main forward state table, starting + // from each state. If the the final state is the same, no matter what the starting state, + // the pair is safe. + // + // 2. Build a state table that recognizes the safe pairs. It's similar to their + // forward table, with a column for each input character [class], and a row for + // each state. Row 1 is the start state, and row 0 is the stop state. Initially + // create an additional state for each input character category; being in + // one of these states means that the character has been seen, and is potentially + // the first of a pair. In each of these rows, the entry for the second character + // of a safe pair is set to the stop state (0), indicating that a match was found. + // All other table entries are set to the state corresponding the current input + // character, allowing that charcter to be the of a start following pair. + // + // Because the safe rules are to be run in reverse, moving backwards in the text, + // the first and second pair categories are swapped when building the table. + // + // 3. Compress the table. There are typically many rows (states) that are + // equivalent - that have zeroes (match completed) in the same columns - + // and can be folded together. + + // Each safe pair is stored as two UChars in the safePair string. + UnicodeString safePairs; + + int32_t numCharClasses = fRB->fSetBuilder->getNumCharCategories(); + int32_t numStates = fDStates->size(); + + for (int32_t c1=0; c1<numCharClasses; ++c1) { + for (int32_t c2=0; c2 < numCharClasses; ++c2) { + int32_t wantedEndState = -1; + int32_t endState = 0; + for (int32_t startState = 1; startState < numStates; ++startState) { + RBBIStateDescriptor *startStateD = static_cast<RBBIStateDescriptor *>(fDStates->elementAt(startState)); + int32_t s2 = startStateD->fDtran->elementAti(c1); + RBBIStateDescriptor *s2StateD = static_cast<RBBIStateDescriptor *>(fDStates->elementAt(s2)); + endState = s2StateD->fDtran->elementAti(c2); + if (wantedEndState < 0) { + wantedEndState = endState; + } else { + if (wantedEndState != endState) { + break; + } + } + } + if (wantedEndState == endState) { + safePairs.append((char16_t)c1); + safePairs.append((char16_t)c2); + // printf("(%d, %d) ", c1, c2); + } + } + // printf("\n"); + } + + // Populate the initial safe table. + // The table as a whole is UVector<UnicodeString> + // Each row is represented by a UnicodeString, being used as a Vector<int16>. + // Row 0 is the stop state. + // Row 1 is the start sate. + // Row 2 and beyond are other states, initially one per char class, but + // after initial construction, many of the states will be combined, compacting the table. + // The String holds the nextState data only. The four leading fields of a row, fAccepting, + // fLookAhead, etc. are not needed for the safe table, and are omitted at this stage of building. + + U_ASSERT(fSafeTable == nullptr); + fSafeTable = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, numCharClasses + 2, status); + for (int32_t row=0; row<numCharClasses + 2; ++row) { + fSafeTable->addElement(new UnicodeString(numCharClasses, 0, numCharClasses+4), status); + } + + // From the start state, each input char class transitions to the state for that input. + UnicodeString &startState = *static_cast<UnicodeString *>(fSafeTable->elementAt(1)); + for (int32_t charClass=0; charClass < numCharClasses; ++charClass) { + // Note: +2 for the start & stop state. + startState.setCharAt(charClass, static_cast<char16_t>(charClass+2)); + } + + // Initially make every other state table row look like the start state row, + for (int32_t row=2; row<numCharClasses+2; ++row) { + UnicodeString &rowState = *static_cast<UnicodeString *>(fSafeTable->elementAt(row)); + rowState = startState; // UnicodeString assignment, copies contents. + } + + // Run through the safe pairs, set the next state to zero when pair has been seen. + // Zero being the stop state, meaning we found a safe point. + for (int32_t pairIdx=0; pairIdx<safePairs.length(); pairIdx+=2) { + int32_t c1 = safePairs.charAt(pairIdx); + int32_t c2 = safePairs.charAt(pairIdx + 1); + + UnicodeString &rowState = *static_cast<UnicodeString *>(fSafeTable->elementAt(c2 + 2)); + rowState.setCharAt(c1, 0); + } + + // Remove duplicate or redundant rows from the table. + IntPair states = {1, 0}; + while (findDuplicateSafeState(&states)) { + // printf("Removing duplicate safe states (%d, %d)\n", states.first, states.second); + removeSafeState(states); + } +} + + +//----------------------------------------------------------------------------- +// +// getSafeTableSize() Calculate the size of the runtime form of this +// safe state table. +// +//----------------------------------------------------------------------------- +int32_t RBBITableBuilder::getSafeTableSize() const { + int32_t size = 0; + int32_t numRows; + int32_t numCols; + int32_t rowSize; + + if (fSafeTable == nullptr) { + return 0; + } + + size = offsetof(RBBIStateTable, fTableData); // The header, with no rows to the table. + + numRows = fSafeTable->size(); + numCols = fRB->fSetBuilder->getNumCharCategories(); + + if (use8BitsForSafeTable()) { + rowSize = offsetof(RBBIStateTableRow8, fNextState) + sizeof(int8_t)*numCols; + } else { + rowSize = offsetof(RBBIStateTableRow16, fNextState) + sizeof(int16_t)*numCols; + } + size += numRows * rowSize; + return size; +} + +bool RBBITableBuilder::use8BitsForSafeTable() const { + return fSafeTable->size() <= kMaxStateFor8BitsTable; +} + +//----------------------------------------------------------------------------- +// +// exportSafeTable() export the state transition table in the format required +// by the runtime engine. getTableSize() bytes of memory +// must be available at the output address "where". +// +//----------------------------------------------------------------------------- +void RBBITableBuilder::exportSafeTable(void *where) { + RBBIStateTable *table = (RBBIStateTable *)where; + uint32_t state; + int col; + + if (U_FAILURE(*fStatus) || fSafeTable == nullptr) { + return; + } + + int32_t catCount = fRB->fSetBuilder->getNumCharCategories(); + if (catCount > 0x7fff || + fSafeTable->size() > 0x7fff) { + *fStatus = U_BRK_INTERNAL_ERROR; + return; + } + + table->fNumStates = fSafeTable->size(); + table->fFlags = 0; + if (use8BitsForSafeTable()) { + table->fRowLen = offsetof(RBBIStateTableRow8, fNextState) + sizeof(uint8_t) * catCount; + table->fFlags |= RBBI_8BITS_ROWS; + } else { + table->fRowLen = offsetof(RBBIStateTableRow16, fNextState) + sizeof(int16_t) * catCount; + } + + for (state=0; state<table->fNumStates; state++) { + UnicodeString *rowString = (UnicodeString *)fSafeTable->elementAt(state); + RBBIStateTableRow *row = (RBBIStateTableRow *)(table->fTableData + state*table->fRowLen); + if (use8BitsForSafeTable()) { + row->r8.fAccepting = 0; + row->r8.fLookAhead = 0; + row->r8.fTagsIdx = 0; + for (col=0; col<catCount; col++) { + U_ASSERT(rowString->charAt(col) <= kMaxStateFor8BitsTable); + row->r8.fNextState[col] = static_cast<uint8_t>(rowString->charAt(col)); + } + } else { + row->r16.fAccepting = 0; + row->r16.fLookAhead = 0; + row->r16.fTagsIdx = 0; + for (col=0; col<catCount; col++) { + row->r16.fNextState[col] = rowString->charAt(col); + } + } + } +} + + + + +//----------------------------------------------------------------------------- +// +// printSet Debug function. Print the contents of a UVector +// +//----------------------------------------------------------------------------- +#ifdef RBBI_DEBUG +void RBBITableBuilder::printSet(UVector *s) { + int32_t i; + for (i=0; i<s->size(); i++) { + const RBBINode *v = static_cast<const RBBINode *>(s->elementAt(i)); + RBBIDebugPrintf("%5d", v==NULL? -1 : v->fSerialNum); + } + RBBIDebugPrintf("\n"); +} +#endif + + +//----------------------------------------------------------------------------- +// +// printStates Debug Function. Dump the fully constructed state transition table. +// +//----------------------------------------------------------------------------- +#ifdef RBBI_DEBUG +void RBBITableBuilder::printStates() { + int c; // input "character" + int n; // state number + + RBBIDebugPrintf("state | i n p u t s y m b o l s \n"); + RBBIDebugPrintf(" | Acc LA Tag"); + for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) { + RBBIDebugPrintf(" %3d", c); + } + RBBIDebugPrintf("\n"); + RBBIDebugPrintf(" |---------------"); + for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) { + RBBIDebugPrintf("----"); + } + RBBIDebugPrintf("\n"); + + for (n=0; n<fDStates->size(); n++) { + RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(n); + RBBIDebugPrintf(" %3d | " , n); + RBBIDebugPrintf("%3d %3d %5d ", sd->fAccepting, sd->fLookAhead, sd->fTagsIdx); + for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) { + RBBIDebugPrintf(" %3d", sd->fDtran->elementAti(c)); + } + RBBIDebugPrintf("\n"); + } + RBBIDebugPrintf("\n\n"); +} +#endif + + +//----------------------------------------------------------------------------- +// +// printSafeTable Debug Function. Dump the fully constructed safe table. +// +//----------------------------------------------------------------------------- +#ifdef RBBI_DEBUG +void RBBITableBuilder::printReverseTable() { + int c; // input "character" + int n; // state number + + RBBIDebugPrintf(" Safe Reverse Table \n"); + if (fSafeTable == nullptr) { + RBBIDebugPrintf(" --- nullptr ---\n"); + return; + } + RBBIDebugPrintf("state | i n p u t s y m b o l s \n"); + RBBIDebugPrintf(" | Acc LA Tag"); + for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) { + RBBIDebugPrintf(" %2d", c); + } + RBBIDebugPrintf("\n"); + RBBIDebugPrintf(" |---------------"); + for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) { + RBBIDebugPrintf("---"); + } + RBBIDebugPrintf("\n"); + + for (n=0; n<fSafeTable->size(); n++) { + UnicodeString *rowString = (UnicodeString *)fSafeTable->elementAt(n); + RBBIDebugPrintf(" %3d | " , n); + RBBIDebugPrintf("%3d %3d %5d ", 0, 0, 0); // Accepting, LookAhead, Tags + for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) { + RBBIDebugPrintf(" %2d", rowString->charAt(c)); + } + RBBIDebugPrintf("\n"); + } + RBBIDebugPrintf("\n\n"); +} +#endif + + + +//----------------------------------------------------------------------------- +// +// printRuleStatusTable Debug Function. Dump the common rule status table +// +//----------------------------------------------------------------------------- +#ifdef RBBI_DEBUG +void RBBITableBuilder::printRuleStatusTable() { + int32_t thisRecord = 0; + int32_t nextRecord = 0; + int i; + UVector *tbl = fRB->fRuleStatusVals; + + RBBIDebugPrintf("index | tags \n"); + RBBIDebugPrintf("-------------------\n"); + + while (nextRecord < tbl->size()) { + thisRecord = nextRecord; + nextRecord = thisRecord + tbl->elementAti(thisRecord) + 1; + RBBIDebugPrintf("%4d ", thisRecord); + for (i=thisRecord+1; i<nextRecord; i++) { + RBBIDebugPrintf(" %5d", tbl->elementAti(i)); + } + RBBIDebugPrintf("\n"); + } + RBBIDebugPrintf("\n\n"); +} +#endif + + +//----------------------------------------------------------------------------- +// +// RBBIStateDescriptor Methods. This is a very struct-like class +// Most access is directly to the fields. +// +//----------------------------------------------------------------------------- + +RBBIStateDescriptor::RBBIStateDescriptor(int lastInputSymbol, UErrorCode *fStatus) { + fMarked = FALSE; + fAccepting = 0; + fLookAhead = 0; + fTagsIdx = 0; + fTagVals = NULL; + fPositions = NULL; + fDtran = NULL; + + fDtran = new UVector32(lastInputSymbol+1, *fStatus); + if (U_FAILURE(*fStatus)) { + return; + } + if (fDtran == NULL) { + *fStatus = U_MEMORY_ALLOCATION_ERROR; + return; + } + fDtran->setSize(lastInputSymbol+1); // fDtran needs to be pre-sized. + // It is indexed by input symbols, and will + // hold the next state number for each + // symbol. +} + + +RBBIStateDescriptor::~RBBIStateDescriptor() { + delete fPositions; + delete fDtran; + delete fTagVals; + fPositions = NULL; + fDtran = NULL; + fTagVals = NULL; +} + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ diff --git a/thirdparty/icu4c/common/rbbitblb.h b/thirdparty/icu4c/common/rbbitblb.h new file mode 100644 index 0000000000..fe3db8d7bf --- /dev/null +++ b/thirdparty/icu4c/common/rbbitblb.h @@ -0,0 +1,232 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +// +// rbbitblb.h +// + +/* +********************************************************************** +* Copyright (c) 2002-2016, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +*/ + +#ifndef RBBITBLB_H +#define RBBITBLB_H + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_BREAK_ITERATION + +#include "unicode/uobject.h" +#include "unicode/rbbi.h" +#include "rbbidata.h" +#include "rbbirb.h" +#include "rbbinode.h" + + +U_NAMESPACE_BEGIN + +class RBBIRuleScanner; +class RBBIRuleBuilder; +class UVector32; + +// +// class RBBITableBuilder is part of the RBBI rule compiler. +// It builds the state transition table used by the RBBI runtime +// from the expression syntax tree generated by the rule scanner. +// +// This class is part of the RBBI implementation only. +// There is no user-visible public API here. +// + +class RBBITableBuilder : public UMemory { +public: + RBBITableBuilder(RBBIRuleBuilder *rb, RBBINode **rootNode, UErrorCode &status); + ~RBBITableBuilder(); + + void buildForwardTable(); + + /** Return the runtime size in bytes of the built state table. */ + int32_t getTableSize() const; + + /** Fill in the runtime state table. Sufficient memory must exist at the specified location. + */ + void exportTable(void *where); + + /** Use 8 bits to encode the forward table */ + bool use8BitsForTable() const; + + /** + * Find duplicate (redundant) character classes. Begin looking with categories.first. + * Duplicate, if found are returned in the categories parameter. + * This is an iterator-like function, used to identify character classes + * (state table columns) that can be eliminated. + * @param categories in/out parameter, specifies where to start looking for duplicates, + * and returns the first pair of duplicates found, if any. + * @return true if duplicate char classes were found, false otherwise. + */ + bool findDuplCharClassFrom(IntPair *categories); + + /** Remove a column from the state table. Used when two character categories + * have been found equivalent, and merged together, to eliminate the uneeded table column. + */ + void removeColumn(int32_t column); + + /** + * Check for, and remove dupicate states (table rows). + * @return the number of states removed. + */ + int32_t removeDuplicateStates(); + + /** Build the safe reverse table from the already-constructed forward table. */ + void buildSafeReverseTable(UErrorCode &status); + + /** Return the runtime size in bytes of the built safe reverse state table. */ + int32_t getSafeTableSize() const; + + /** Fill in the runtime safe state table. Sufficient memory must exist at the specified location. + */ + void exportSafeTable(void *where); + + /** Use 8 bits to encode the safe reverse table */ + bool use8BitsForSafeTable() const; + +private: + void calcNullable(RBBINode *n); + void calcFirstPos(RBBINode *n); + void calcLastPos(RBBINode *n); + void calcFollowPos(RBBINode *n); + void calcChainedFollowPos(RBBINode *n, RBBINode *endMarkNode); + void bofFixup(); + void buildStateTable(); + void mapLookAheadRules(); + void flagAcceptingStates(); + void flagLookAheadStates(); + void flagTaggedStates(); + void mergeRuleStatusVals(); + + /** + * Merge redundant state table columns, eliminating character classes with identical behavior. + * Done after the state tables are generated, just before converting to their run-time format. + */ + int32_t mergeColumns(); + + void addRuleRootNodes(UVector *dest, RBBINode *node); + + /** + * Find duplicate (redundant) states, beginning at the specified pair, + * within this state table. This is an iterator-like function, used to + * identify states (state table rows) that can be eliminated. + * @param states in/out parameter, specifies where to start looking for duplicates, + * and returns the first pair of duplicates found, if any. + * @return true if duplicate states were found, false otherwise. + */ + bool findDuplicateState(IntPair *states); + + /** Remove a duplicate state. + * @param duplStates The duplicate states. The first is kept, the second is removed. + * All references to the second in the state table are retargeted + * to the first. + */ + void removeState(IntPair duplStates); + + /** Find the next duplicate state in the safe reverse table. An iterator function. + * @param states in/out parameter, specifies where to start looking for duplicates, + * and returns the first pair of duplicates found, if any. + * @return true if a duplicate pair of states was found. + */ + bool findDuplicateSafeState(IntPair *states); + + /** Remove a duplicate state from the safe table. + * @param duplStates The duplicate states. The first is kept, the second is removed. + * All references to the second in the state table are retargeted + * to the first. + */ + void removeSafeState(IntPair duplStates); + + // Set functions for UVector. + // TODO: make a USet subclass of UVector + + void setAdd(UVector *dest, UVector *source); + UBool setEquals(UVector *a, UVector *b); + + void sortedAdd(UVector **dest, int32_t val); + +public: +#ifdef RBBI_DEBUG + void printSet(UVector *s); + void printPosSets(RBBINode *n /* = NULL*/); + void printStates(); + void printRuleStatusTable(); + void printReverseTable(); +#else + #define printSet(s) + #define printPosSets(n) + #define printStates() + #define printRuleStatusTable() + #define printReverseTable() +#endif + +private: + RBBIRuleBuilder *fRB; + RBBINode *&fTree; // The root node of the parse tree to build a + // table for. + UErrorCode *fStatus; + + /** State Descriptors, UVector<RBBIStateDescriptor> */ + UVector *fDStates; // D states (Aho's terminology) + // Index is state number + // Contents are RBBIStateDescriptor pointers. + + /** Synthesized safe table, UVector of UnicodeString, one string per table row. */ + UVector *fSafeTable; + + /** Map from rule number (fVal in look ahead nodes) to sequential lookahead index. */ + UVector32 *fLookAheadRuleMap = nullptr; + + /* Counter used when assigning lookahead rule numbers. + * Contains the last look-ahead number already in use. + * The first look-ahead number is 2; Number 1 (ACCEPTING_UNCONDITIONAL) is reserved + * for non-lookahead accepting states. See the declarations of RBBIStateTableRowT. */ + int32_t fLASlotsInUse = ACCEPTING_UNCONDITIONAL; + + + RBBITableBuilder(const RBBITableBuilder &other) = delete; // forbid copying of this class + RBBITableBuilder &operator=(const RBBITableBuilder &other) = delete; // forbid copying of this class +}; + +// +// RBBIStateDescriptor - The DFA is constructed as a set of these descriptors, +// one for each state. +class RBBIStateDescriptor : public UMemory { +public: + UBool fMarked; + uint32_t fAccepting; + uint32_t fLookAhead; + UVector *fTagVals; + int32_t fTagsIdx; + UVector *fPositions; // Set of parse tree positions associated + // with this state. Unordered (it's a set). + // UVector contents are RBBINode * + + UVector32 *fDtran; // Transitions out of this state. + // indexed by input character + // contents is int index of dest state + // in RBBITableBuilder.fDStates + + RBBIStateDescriptor(int maxInputSymbol, UErrorCode *fStatus); + ~RBBIStateDescriptor(); + +private: + RBBIStateDescriptor(const RBBIStateDescriptor &other); // forbid copying of this class + RBBIStateDescriptor &operator=(const RBBIStateDescriptor &other); // forbid copying of this class +}; + + + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ + +#endif diff --git a/thirdparty/icu4c/common/resbund.cpp b/thirdparty/icu4c/common/resbund.cpp new file mode 100644 index 0000000000..7c5063b211 --- /dev/null +++ b/thirdparty/icu4c/common/resbund.cpp @@ -0,0 +1,399 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 1997-2013, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* +* File resbund.cpp +* +* Modification History: +* +* Date Name Description +* 02/05/97 aliu Fixed bug in chopLocale. Added scanForLocaleInFile +* based on code taken from scanForLocale. Added +* constructor which attempts to read resource bundle +* from a specific file, without searching other files. +* 02/11/97 aliu Added UErrorCode return values to constructors. Fixed +* infinite loops in scanForFile and scanForLocale. +* Modified getRawResourceData to not delete storage in +* localeData and resourceData which it doesn't own. +* Added Mac compatibility #ifdefs for tellp() and +* ios::nocreate. +* 03/04/97 aliu Modified to use ExpandingDataSink objects instead of +* the highly inefficient ostrstream objects. +* 03/13/97 aliu Rewrote to load in entire resource bundle and store +* it as a Hashtable of ResourceBundleData objects. +* Added state table to govern parsing of files. +* Modified to load locale index out of new file distinct +* from default.txt. +* 03/25/97 aliu Modified to support 2-d arrays, needed for timezone data. +* Added support for custom file suffixes. Again, needed +* to support timezone data. Improved error handling to +* detect duplicate tags and subtags. +* 04/07/97 aliu Fixed bug in getHashtableForLocale(). Fixed handling +* of failing UErrorCode values on entry to API methods. +* Fixed bugs in getArrayItem() for negative indices. +* 04/29/97 aliu Update to use new Hashtable deletion protocol. +* 05/06/97 aliu Flattened kTransitionTable for HP compiler. +* Fixed usage of CharString. +* 06/11/99 stephen Removed parsing of .txt files. +* Reworked to use new binary format. +* Cleaned up. +* 06/14/99 stephen Removed methods taking a filename suffix. +* 06/22/99 stephen Added missing T_FileStream_close in parse() +* 11/09/99 weiv Added getLocale(), rewritten constructForLocale() +* March 2000 weiv complete overhaul. +****************************************************************************** +*/ + +#include "unicode/utypes.h" +#include "unicode/resbund.h" + +#include "cmemory.h" +#include "mutex.h" +#include "uassert.h" +#include "umutex.h" + +#include "uresimp.h" + +U_NAMESPACE_BEGIN + +/*----------------------------------------------------------------------------- + * Implementation Notes + * + * Resource bundles are read in once, and thereafter cached. + * ResourceBundle statically keeps track of which files have been + * read, so we are guaranteed that each file is read at most once. + * Resource bundles can be loaded from different data directories and + * will be treated as distinct, even if they are for the same locale. + * + * Resource bundles are lightweight objects, which have pointers to + * one or more shared Hashtable objects containing all the data. + * Copying would be cheap, but there is no copy constructor, since + * there wasn't one in the original API. + * + * The ResourceBundle parsing mechanism is implemented as a transition + * network, for easy maintenance and modification. The network is + * implemented as a matrix (instead of in code) to make this even + * easier. The matrix contains Transition objects. Each Transition + * object describes a destination node and an action to take before + * moving to the destination node. The source node is encoded by the + * index of the object in the array that contains it. The pieces + * needed to understand the transition network are the enums for node + * IDs and actions, the parse() method, which walks through the + * network and implements the actions, and the network itself. The + * network guarantees certain conditions, for example, that a new + * resource will not be closed until one has been opened first; or + * that data will not be stored into a TaggedList until a TaggedList + * has been created. Nonetheless, the code in parse() does some + * consistency checks as it runs the network, and fails with an + * U_INTERNAL_PROGRAM_ERROR if one of these checks fails. If the input + * data has a bad format, an U_INVALID_FORMAT_ERROR is returned. If you + * see an U_INTERNAL_PROGRAM_ERROR the transition matrix has a bug in + * it. + * + * Old functionality of multiple locales in a single file is still + * supported. For this reason, LOCALE names override FILE names. If + * data for en_US is located in the en.txt file, once it is loaded, + * the code will not care where it came from (other than remembering + * which directory it came from). However, if there is an en_US + * resource in en_US.txt, that will take precedence. There is no + * limit to the number or type of resources that can be stored in a + * file, however, files are only searched in a specific way. If + * en_US_CA is requested, then first en_US_CA.txt is searched, then + * en_US.txt, then en.txt, then default.txt. So it only makes sense + * to put certain locales in certain files. In this example, it would + * be logical to put en_US_CA, en_US, and en into the en.txt file, + * since they would be found there if asked for. The extreme example + * is to place all locale resources into default.txt, which should + * also work. + * + * Inheritance is implemented. For example, xx_YY_zz inherits as + * follows: xx_YY_zz, xx_YY, xx, default. Inheritance is implemented + * as an array of hashtables. There will be from 1 to 4 hashtables in + * the array. + * + * Fallback files are implemented. The fallback pattern is Language + * Country Variant (LCV) -> LC -> L. Fallback is first done for the + * requested locale. Then it is done for the default locale, as + * returned by Locale::getDefault(). Then the special file + * default.txt is searched for the default locale. The overall FILE + * fallback path is LCV -> LC -> L -> dLCV -> dLC -> dL -> default. + * + * Note that although file name searching includes the default locale, + * once a ResourceBundle object is constructed, the inheritance path + * no longer includes the default locale. The path is LCV -> LC -> L + * -> default. + * + * File parsing is lazy. Nothing is parsed unless it is called for by + * someone. So when a ResourceBundle for xx_YY_zz is constructed, + * only that locale is parsed (along with anything else in the same + * file). Later, if the FooBar tag is asked for, and if it isn't + * found in xx_YY_zz, then xx_YY.txt will be parsed and checked, and + * so forth, until the chain is exhausted or the tag is found. + * + * Thread-safety is implemented around caches, both the cache that + * stores all the resouce data, and the cache that stores flags + * indicating whether or not a file has been visited. These caches + * delete their storage at static cleanup time, when the process + * quits. + * + * ResourceBundle supports TableCollation as a special case. This + * involves having special ResourceBundle objects which DO own their + * data, since we don't want large collation rule strings in the + * ResourceBundle cache (these are already cached in the + * TableCollation cache). TableCollation files (.ctx files) have the + * same format as normal resource data files, with a different + * interpretation, from the standpoint of ResourceBundle. .ctx files + * are loaded into otherwise ordinary ResourceBundle objects. They + * don't inherit (that's implemented by TableCollation) and they own + * their data (as mentioned above). However, they still support + * possible multiple locales in a single .ctx file. (This is in + * practice a bad idea, since you only want the one locale you're + * looking for, and only one tag will be present + * ("CollationElements"), so you don't need an inheritance chain of + * multiple locales.) Up to 4 locale resources will be loaded from a + * .ctx file; everything after the first 4 is ignored (parsed and + * deleted). (Normal .txt files have no limit.) Instead of being + * loaded into the cache, and then looked up as needed, the locale + * resources are read straight into the ResourceBundle object. + * + * The Index, which used to reside in default.txt, has been moved to a + * new file, index.txt. This file contains a slightly modified format + * with the addition of the "InstalledLocales" tag; it looks like: + * + * Index { + * InstalledLocales { + * ar + * .. + * zh_TW + * } + * } + */ +//----------------------------------------------------------------------------- + +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ResourceBundle) + +ResourceBundle::ResourceBundle(UErrorCode &err) + :UObject(), fLocale(NULL) +{ + fResource = ures_open(0, Locale::getDefault().getName(), &err); +} + +ResourceBundle::ResourceBundle(const ResourceBundle &other) + :UObject(other), fLocale(NULL) +{ + UErrorCode status = U_ZERO_ERROR; + + if (other.fResource) { + fResource = ures_copyResb(0, other.fResource, &status); + } else { + /* Copying a bad resource bundle */ + fResource = NULL; + } +} + +ResourceBundle::ResourceBundle(UResourceBundle *res, UErrorCode& err) + :UObject(), fLocale(NULL) +{ + if (res) { + fResource = ures_copyResb(0, res, &err); + } else { + /* Copying a bad resource bundle */ + fResource = NULL; + } +} + +ResourceBundle::ResourceBundle(const char* path, const Locale& locale, UErrorCode& err) + :UObject(), fLocale(NULL) +{ + fResource = ures_open(path, locale.getName(), &err); +} + + +ResourceBundle& ResourceBundle::operator=(const ResourceBundle& other) +{ + if(this == &other) { + return *this; + } + if(fResource != 0) { + ures_close(fResource); + fResource = NULL; + } + if (fLocale != NULL) { + delete fLocale; + fLocale = NULL; + } + UErrorCode status = U_ZERO_ERROR; + if (other.fResource) { + fResource = ures_copyResb(0, other.fResource, &status); + } else { + /* Copying a bad resource bundle */ + fResource = NULL; + } + return *this; +} + +ResourceBundle::~ResourceBundle() +{ + if(fResource != 0) { + ures_close(fResource); + } + if(fLocale != NULL) { + delete(fLocale); + } +} + +ResourceBundle * +ResourceBundle::clone() const { + return new ResourceBundle(*this); +} + +UnicodeString ResourceBundle::getString(UErrorCode& status) const { + int32_t len = 0; + const UChar *r = ures_getString(fResource, &len, &status); + return UnicodeString(TRUE, r, len); +} + +const uint8_t *ResourceBundle::getBinary(int32_t& len, UErrorCode& status) const { + return ures_getBinary(fResource, &len, &status); +} + +const int32_t *ResourceBundle::getIntVector(int32_t& len, UErrorCode& status) const { + return ures_getIntVector(fResource, &len, &status); +} + +uint32_t ResourceBundle::getUInt(UErrorCode& status) const { + return ures_getUInt(fResource, &status); +} + +int32_t ResourceBundle::getInt(UErrorCode& status) const { + return ures_getInt(fResource, &status); +} + +const char *ResourceBundle::getName(void) const { + return ures_getName(fResource); +} + +const char *ResourceBundle::getKey(void) const { + return ures_getKey(fResource); +} + +UResType ResourceBundle::getType(void) const { + return ures_getType(fResource); +} + +int32_t ResourceBundle::getSize(void) const { + return ures_getSize(fResource); +} + +UBool ResourceBundle::hasNext(void) const { + return ures_hasNext(fResource); +} + +void ResourceBundle::resetIterator(void) { + ures_resetIterator(fResource); +} + +ResourceBundle ResourceBundle::getNext(UErrorCode& status) { + UResourceBundle r; + + ures_initStackObject(&r); + ures_getNextResource(fResource, &r, &status); + ResourceBundle res(&r, status); + if (U_SUCCESS(status)) { + ures_close(&r); + } + return res; +} + +UnicodeString ResourceBundle::getNextString(UErrorCode& status) { + int32_t len = 0; + const UChar* r = ures_getNextString(fResource, &len, 0, &status); + return UnicodeString(TRUE, r, len); +} + +UnicodeString ResourceBundle::getNextString(const char ** key, UErrorCode& status) { + int32_t len = 0; + const UChar* r = ures_getNextString(fResource, &len, key, &status); + return UnicodeString(TRUE, r, len); +} + +ResourceBundle ResourceBundle::get(int32_t indexR, UErrorCode& status) const { + UResourceBundle r; + + ures_initStackObject(&r); + ures_getByIndex(fResource, indexR, &r, &status); + ResourceBundle res(&r, status); + if (U_SUCCESS(status)) { + ures_close(&r); + } + return res; +} + +UnicodeString ResourceBundle::getStringEx(int32_t indexS, UErrorCode& status) const { + int32_t len = 0; + const UChar* r = ures_getStringByIndex(fResource, indexS, &len, &status); + return UnicodeString(TRUE, r, len); +} + +ResourceBundle ResourceBundle::get(const char* key, UErrorCode& status) const { + UResourceBundle r; + + ures_initStackObject(&r); + ures_getByKey(fResource, key, &r, &status); + ResourceBundle res(&r, status); + if (U_SUCCESS(status)) { + ures_close(&r); + } + return res; +} + +ResourceBundle ResourceBundle::getWithFallback(const char* key, UErrorCode& status){ + UResourceBundle r; + ures_initStackObject(&r); + ures_getByKeyWithFallback(fResource, key, &r, &status); + ResourceBundle res(&r, status); + if(U_SUCCESS(status)){ + ures_close(&r); + } + return res; +} +UnicodeString ResourceBundle::getStringEx(const char* key, UErrorCode& status) const { + int32_t len = 0; + const UChar* r = ures_getStringByKey(fResource, key, &len, &status); + return UnicodeString(TRUE, r, len); +} + +const char* +ResourceBundle::getVersionNumber() const +{ + return ures_getVersionNumberInternal(fResource); +} + +void ResourceBundle::getVersion(UVersionInfo versionInfo) const { + ures_getVersion(fResource, versionInfo); +} + +const Locale &ResourceBundle::getLocale(void) const { + static UMutex gLocaleLock; + Mutex lock(&gLocaleLock); + if (fLocale != NULL) { + return *fLocale; + } + UErrorCode status = U_ZERO_ERROR; + const char *localeName = ures_getLocaleInternal(fResource, &status); + ResourceBundle *ncThis = const_cast<ResourceBundle *>(this); + ncThis->fLocale = new Locale(localeName); + return ncThis->fLocale != NULL ? *ncThis->fLocale : Locale::getDefault(); +} + +const Locale ResourceBundle::getLocale(ULocDataLocaleType type, UErrorCode &status) const +{ + return ures_getLocaleByType(fResource, type, &status); +} + +U_NAMESPACE_END +//eof diff --git a/thirdparty/icu4c/common/resbund_cnv.cpp b/thirdparty/icu4c/common/resbund_cnv.cpp new file mode 100644 index 0000000000..45c0b399bf --- /dev/null +++ b/thirdparty/icu4c/common/resbund_cnv.cpp @@ -0,0 +1,57 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 1997-2006, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: resbund_cnv.cpp +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2004aug25 +* created by: Markus W. Scherer +* +* Character conversion functions moved here from resbund.cpp +*/ + +#include "unicode/utypes.h" +#include "unicode/resbund.h" +#include "uinvchar.h" + +U_NAMESPACE_BEGIN + +ResourceBundle::ResourceBundle( const UnicodeString& path, + const Locale& locale, + UErrorCode& error) + :UObject(), fLocale(NULL) +{ + constructForLocale(path, locale, error); +} + +ResourceBundle::ResourceBundle( const UnicodeString& path, + UErrorCode& error) + :UObject(), fLocale(NULL) +{ + constructForLocale(path, Locale::getDefault(), error); +} + +void +ResourceBundle::constructForLocale(const UnicodeString& path, + const Locale& locale, + UErrorCode& error) +{ + if (path.isEmpty()) { + fResource = ures_open(NULL, locale.getName(), &error); + } + else { + UnicodeString nullTerminatedPath(path); + nullTerminatedPath.append((UChar)0); + fResource = ures_openU(nullTerminatedPath.getBuffer(), locale.getName(), &error); + } +} + +U_NAMESPACE_END diff --git a/thirdparty/icu4c/common/resource.cpp b/thirdparty/icu4c/common/resource.cpp new file mode 100644 index 0000000000..3d41a16029 --- /dev/null +++ b/thirdparty/icu4c/common/resource.cpp @@ -0,0 +1,22 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2015-2016, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* resource.cpp +* +* created on: 2015nov04 +* created by: Markus W. Scherer +*/ + +#include "resource.h" + +U_NAMESPACE_BEGIN + +ResourceValue::~ResourceValue() {} + +ResourceSink::~ResourceSink() {} + +U_NAMESPACE_END diff --git a/thirdparty/icu4c/common/resource.h b/thirdparty/icu4c/common/resource.h new file mode 100644 index 0000000000..3795694412 --- /dev/null +++ b/thirdparty/icu4c/common/resource.h @@ -0,0 +1,293 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2015-2016, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* resource.h +* +* created on: 2015nov04 +* created by: Markus W. Scherer +*/ + +#ifndef __URESOURCE_H__ +#define __URESOURCE_H__ + +/** + * \file + * \brief ICU resource bundle key and value types. + */ + +// Note: Ported from ICU4J class UResource and its nested classes, +// but the C++ classes are separate, not nested. + +// We use the Resource prefix for C++ classes, as usual. +// The UResource prefix would be used for C types. + +#include "unicode/utypes.h" +#include "unicode/unistr.h" +#include "unicode/ures.h" +#include "restrace.h" + +struct ResourceData; + +U_NAMESPACE_BEGIN + +class ResourceValue; + +// Note: In C++, we use const char * pointers for keys, +// rather than an abstraction like Java UResource.Key. + +/** + * Interface for iterating over a resource bundle array resource. + */ +class U_COMMON_API ResourceArray { +public: + /** Constructs an empty array object. */ + ResourceArray() : items16(NULL), items32(NULL), length(0) {} + + /** Only for implementation use. @internal */ + ResourceArray(const uint16_t *i16, const uint32_t *i32, int32_t len, + const ResourceTracer& traceInfo) : + items16(i16), items32(i32), length(len), + fTraceInfo(traceInfo) {} + + /** + * @return The number of items in the array resource. + */ + int32_t getSize() const { return length; } + /** + * @param i Array item index. + * @param value Output-only, receives the value of the i'th item. + * @return true if i is non-negative and less than getSize(). + */ + UBool getValue(int32_t i, ResourceValue &value) const; + + /** Only for implementation use. @internal */ + uint32_t internalGetResource(const ResourceData *pResData, int32_t i) const; + +private: + const uint16_t *items16; + const uint32_t *items32; + int32_t length; + ResourceTracer fTraceInfo; +}; + +/** + * Interface for iterating over a resource bundle table resource. + */ +class U_COMMON_API ResourceTable { +public: + /** Constructs an empty table object. */ + ResourceTable() : keys16(NULL), keys32(NULL), items16(NULL), items32(NULL), length(0) {} + + /** Only for implementation use. @internal */ + ResourceTable(const uint16_t *k16, const int32_t *k32, + const uint16_t *i16, const uint32_t *i32, int32_t len, + const ResourceTracer& traceInfo) : + keys16(k16), keys32(k32), items16(i16), items32(i32), length(len), + fTraceInfo(traceInfo) {} + + /** + * @return The number of items in the array resource. + */ + int32_t getSize() const { return length; } + /** + * @param i Table item index. + * @param key Output-only, receives the key of the i'th item. + * @param value Output-only, receives the value of the i'th item. + * @return true if i is non-negative and less than getSize(). + */ + UBool getKeyAndValue(int32_t i, const char *&key, ResourceValue &value) const; + + /** + * @param key Key string to find in the table. + * @param value Output-only, receives the value of the item with that key. + * @return true if the table contains the key. + */ + UBool findValue(const char *key, ResourceValue &value) const; + +private: + const uint16_t *keys16; + const int32_t *keys32; + const uint16_t *items16; + const uint32_t *items32; + int32_t length; + ResourceTracer fTraceInfo; +}; + +/** + * Represents a resource bundle item's value. + * Avoids object creations as much as possible. + * Mutable, not thread-safe. + */ +class U_COMMON_API ResourceValue : public UObject { +public: + virtual ~ResourceValue(); + + /** + * @return ICU resource type, for example, URES_STRING + */ + virtual UResType getType() const = 0; + + /** + * Sets U_RESOURCE_TYPE_MISMATCH if this is not a string resource. + * + * @see ures_getString() + */ + virtual const UChar *getString(int32_t &length, UErrorCode &errorCode) const = 0; + + inline UnicodeString getUnicodeString(UErrorCode &errorCode) const { + int32_t len = 0; + const UChar *r = getString(len, errorCode); + return UnicodeString(true, r, len); + } + + /** + * Sets U_RESOURCE_TYPE_MISMATCH if this is not an alias resource. + */ + virtual const UChar *getAliasString(int32_t &length, UErrorCode &errorCode) const = 0; + + inline UnicodeString getAliasUnicodeString(UErrorCode &errorCode) const { + int32_t len = 0; + const UChar *r = getAliasString(len, errorCode); + return UnicodeString(true, r, len); + } + + /** + * Sets U_RESOURCE_TYPE_MISMATCH if this is not an integer resource. + * + * @see ures_getInt() + */ + virtual int32_t getInt(UErrorCode &errorCode) const = 0; + + /** + * Sets U_RESOURCE_TYPE_MISMATCH if this is not an integer resource. + * + * @see ures_getUInt() + */ + virtual uint32_t getUInt(UErrorCode &errorCode) const = 0; + + /** + * Sets U_RESOURCE_TYPE_MISMATCH if this is not an intvector resource. + * + * @see ures_getIntVector() + */ + virtual const int32_t *getIntVector(int32_t &length, UErrorCode &errorCode) const = 0; + + /** + * Sets U_RESOURCE_TYPE_MISMATCH if this is not a binary-blob resource. + * + * @see ures_getBinary() + */ + virtual const uint8_t *getBinary(int32_t &length, UErrorCode &errorCode) const = 0; + + /** + * Sets U_RESOURCE_TYPE_MISMATCH if this is not an array resource + */ + virtual ResourceArray getArray(UErrorCode &errorCode) const = 0; + + /** + * Sets U_RESOURCE_TYPE_MISMATCH if this is not a table resource + */ + virtual ResourceTable getTable(UErrorCode &errorCode) const = 0; + + /** + * Is this a no-fallback/no-inheritance marker string? + * Such a marker is used for + * CLDR no-fallback data values of (three empty-set symbols)=={2205, 2205, 2205} + * when enumerating tables with fallback from the specific resource bundle to root. + * + * @return true if this is a no-inheritance marker string + */ + virtual UBool isNoInheritanceMarker() const = 0; + + /** + * Sets the dest strings from the string values in this array resource. + * + * @return the number of strings in this array resource. + * If greater than capacity, then an overflow error is set. + * + * Sets U_RESOURCE_TYPE_MISMATCH if this is not an array resource + * or if any of the array items is not a string + */ + virtual int32_t getStringArray(UnicodeString *dest, int32_t capacity, + UErrorCode &errorCode) const = 0; + + /** + * Same as + * <pre> + * if (getType() == URES_STRING) { + * return new String[] { getString(); } + * } else { + * return getStringArray(); + * } + * </pre> + * + * Sets U_RESOURCE_TYPE_MISMATCH if this is + * neither a string resource nor an array resource containing strings + * @see getString() + * @see getStringArray() + */ + virtual int32_t getStringArrayOrStringAsArray(UnicodeString *dest, int32_t capacity, + UErrorCode &errorCode) const = 0; + + /** + * Same as + * <pre> + * if (getType() == URES_STRING) { + * return getString(); + * } else { + * return getStringArray()[0]; + * } + * </pre> + * + * Sets U_RESOURCE_TYPE_MISMATCH if this is + * neither a string resource nor an array resource containing strings + * @see getString() + * @see getStringArray() + */ + virtual UnicodeString getStringOrFirstOfArray(UErrorCode &errorCode) const = 0; + +protected: + ResourceValue() {} + +private: + ResourceValue(const ResourceValue &); // no copy constructor + ResourceValue &operator=(const ResourceValue &); // no assignment operator +}; + +/** + * Sink for ICU resource bundle contents. + */ +class U_COMMON_API ResourceSink : public UObject { +public: + ResourceSink() {} + virtual ~ResourceSink(); + + /** + * Called once for each bundle (child-parent-...-root). + * The value is normally an array or table resource, + * and implementations of this method normally iterate over the + * tree of resource items stored there. + * + * @param key The key string of the enumeration-start resource. + * Empty if the enumeration starts at the top level of the bundle. + * @param value Call getArray() or getTable() as appropriate. + * Then reuse for output values from Array and Table getters. + * @param noFallback true if the bundle has no parent; + * that is, its top-level table has the nofallback attribute, + * or it is the root bundle of a locale tree. + */ + virtual void put(const char *key, ResourceValue &value, UBool noFallback, + UErrorCode &errorCode) = 0; + +private: + ResourceSink(const ResourceSink &); // no copy constructor + ResourceSink &operator=(const ResourceSink &); // no assignment operator +}; + +U_NAMESPACE_END + +#endif diff --git a/thirdparty/icu4c/common/restrace.cpp b/thirdparty/icu4c/common/restrace.cpp new file mode 100644 index 0000000000..5c6498850e --- /dev/null +++ b/thirdparty/icu4c/common/restrace.cpp @@ -0,0 +1,130 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include "unicode/utypes.h" + +#if U_ENABLE_TRACING + +#include "restrace.h" +#include "charstr.h" +#include "cstring.h" +#include "utracimp.h" +#include "uresimp.h" +#include "uassert.h" +#include "util.h" + +U_NAMESPACE_BEGIN + +ResourceTracer::~ResourceTracer() = default; + +void ResourceTracer::trace(const char* resType) const { + U_ASSERT(fResB || fParent); + UTRACE_ENTRY(UTRACE_UDATA_RESOURCE); + UErrorCode status = U_ZERO_ERROR; + + CharString filePath; + getFilePath(filePath, status); + + CharString resPath; + getResPath(resPath, status); + + // The longest type ("intvector") is 9 chars + const char kSpaces[] = " "; + CharString format; + format.append(kSpaces, sizeof(kSpaces) - 1 - uprv_strlen(resType), status); + format.append("(%s) %s @ %s", status); + + UTRACE_DATA3(UTRACE_VERBOSE, + format.data(), + resType, + filePath.data(), + resPath.data()); + UTRACE_EXIT_STATUS(status); +} + +void ResourceTracer::traceOpen() const { + U_ASSERT(fResB); + UTRACE_ENTRY(UTRACE_UDATA_BUNDLE); + UErrorCode status = U_ZERO_ERROR; + + CharString filePath; + UTRACE_DATA1(UTRACE_VERBOSE, "%s", getFilePath(filePath, status).data()); + UTRACE_EXIT_STATUS(status); +} + +CharString& ResourceTracer::getFilePath(CharString& output, UErrorCode& status) const { + if (fResB) { + output.append(fResB->fData->fPath, status); + output.append('/', status); + output.append(fResB->fData->fName, status); + output.append(".res", status); + } else { + fParent->getFilePath(output, status); + } + return output; +} + +CharString& ResourceTracer::getResPath(CharString& output, UErrorCode& status) const { + if (fResB) { + output.append('/', status); + output.append(fResB->fResPath, status); + // removing the trailing / + U_ASSERT(output[output.length()-1] == '/'); + output.truncate(output.length()-1); + } else { + fParent->getResPath(output, status); + } + if (fKey) { + output.append('/', status); + output.append(fKey, status); + } + if (fIndex != -1) { + output.append('[', status); + UnicodeString indexString; + ICU_Utility::appendNumber(indexString, fIndex); + output.appendInvariantChars(indexString, status); + output.append(']', status); + } + return output; +} + +void FileTracer::traceOpen(const char* path, const char* type, const char* name) { + if (uprv_strcmp(type, "res") == 0) { + traceOpenResFile(path, name); + } else { + traceOpenDataFile(path, type, name); + } +} + +void FileTracer::traceOpenDataFile(const char* path, const char* type, const char* name) { + UTRACE_ENTRY(UTRACE_UDATA_DATA_FILE); + UErrorCode status = U_ZERO_ERROR; + + CharString filePath; + filePath.append(path, status); + filePath.append('/', status); + filePath.append(name, status); + filePath.append('.', status); + filePath.append(type, status); + + UTRACE_DATA1(UTRACE_VERBOSE, "%s", filePath.data()); + UTRACE_EXIT_STATUS(status); +} + +void FileTracer::traceOpenResFile(const char* path, const char* name) { + UTRACE_ENTRY(UTRACE_UDATA_RES_FILE); + UErrorCode status = U_ZERO_ERROR; + + CharString filePath; + filePath.append(path, status); + filePath.append('/', status); + filePath.append(name, status); + filePath.append(".res", status); + + UTRACE_DATA1(UTRACE_VERBOSE, "%s", filePath.data()); + UTRACE_EXIT_STATUS(status); +} + +U_NAMESPACE_END + +#endif // U_ENABLE_TRACING diff --git a/thirdparty/icu4c/common/restrace.h b/thirdparty/icu4c/common/restrace.h new file mode 100644 index 0000000000..ef29eaed57 --- /dev/null +++ b/thirdparty/icu4c/common/restrace.h @@ -0,0 +1,147 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#ifndef __RESTRACE_H__ +#define __RESTRACE_H__ + +#include "unicode/utypes.h" + +#if U_ENABLE_TRACING + +struct UResourceBundle; + +U_NAMESPACE_BEGIN + +class CharString; + +/** + * Instances of this class store information used to trace reads from resource + * bundles when ICU is built with --enable-tracing. + * + * All arguments of type const UResourceBundle*, const char*, and + * const ResourceTracer& are stored as pointers. The caller must retain + * ownership for the lifetime of this ResourceTracer. + * + * Exported as U_COMMON_API for Windows because it is a value field + * in other exported types. + */ +class U_COMMON_API ResourceTracer { +public: + ResourceTracer() : + fResB(nullptr), + fParent(nullptr), + fKey(nullptr), + fIndex(-1) {} + + ResourceTracer(const UResourceBundle* resB) : + fResB(resB), + fParent(nullptr), + fKey(nullptr), + fIndex(-1) {} + + ResourceTracer(const UResourceBundle* resB, const char* key) : + fResB(resB), + fParent(nullptr), + fKey(key), + fIndex(-1) {} + + ResourceTracer(const UResourceBundle* resB, int32_t index) : + fResB(resB), + fParent(nullptr), + fKey(nullptr), + fIndex(index) {} + + ResourceTracer(const ResourceTracer& parent, const char* key) : + fResB(nullptr), + fParent(&parent), + fKey(key), + fIndex(-1) {} + + ResourceTracer(const ResourceTracer& parent, int32_t index) : + fResB(nullptr), + fParent(&parent), + fKey(nullptr), + fIndex(index) {} + + ~ResourceTracer(); + + void trace(const char* type) const; + void traceOpen() const; + + /** + * Calls trace() if the resB or parent provided to the constructor was + * non-null; otherwise, does nothing. + */ + void maybeTrace(const char* type) const { + if (fResB || fParent) { + trace(type); + } + } + +private: + const UResourceBundle* fResB; + const ResourceTracer* fParent; + const char* fKey; + int32_t fIndex; + + CharString& getFilePath(CharString& output, UErrorCode& status) const; + + CharString& getResPath(CharString& output, UErrorCode& status) const; +}; + +/** + * This class provides methods to trace data file reads when ICU is built + * with --enable-tracing. + */ +class FileTracer { +public: + static void traceOpen(const char* path, const char* type, const char* name); + +private: + static void traceOpenDataFile(const char* path, const char* type, const char* name); + static void traceOpenResFile(const char* path, const char* name); +}; + +U_NAMESPACE_END + +#else // U_ENABLE_TRACING + +U_NAMESPACE_BEGIN + +/** + * Default trivial implementation when --enable-tracing is not used. + */ +class U_COMMON_API ResourceTracer { +public: + ResourceTracer() {} + + ResourceTracer(const void*) {} + + ResourceTracer(const void*, const char*) {} + + ResourceTracer(const void*, int32_t) {} + + ResourceTracer(const ResourceTracer&, const char*) {} + + ResourceTracer(const ResourceTracer&, int32_t) {} + + void trace(const char*) const {} + + void traceOpen() const {} + + void maybeTrace(const char*) const {} +}; + +/** + * Default trivial implementation when --enable-tracing is not used. + */ +class FileTracer { +public: + static void traceOpen(const char*, const char*, const char*) {} +}; + +U_NAMESPACE_END + +#endif // U_ENABLE_TRACING + +#endif //__RESTRACE_H__ diff --git a/thirdparty/icu4c/common/ruleiter.cpp b/thirdparty/icu4c/common/ruleiter.cpp new file mode 100644 index 0000000000..41eea23c0d --- /dev/null +++ b/thirdparty/icu4c/common/ruleiter.cpp @@ -0,0 +1,162 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (c) 2003-2011, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* Author: Alan Liu +* Created: September 24 2003 +* Since: ICU 2.8 +********************************************************************** +*/ +#include "ruleiter.h" +#include "unicode/parsepos.h" +#include "unicode/symtable.h" +#include "unicode/unistr.h" +#include "unicode/utf16.h" +#include "patternprops.h" + +/* \U87654321 or \ud800\udc00 */ +#define MAX_U_NOTATION_LEN 12 + +U_NAMESPACE_BEGIN + +RuleCharacterIterator::RuleCharacterIterator(const UnicodeString& theText, const SymbolTable* theSym, + ParsePosition& thePos) : + text(theText), + pos(thePos), + sym(theSym), + buf(0), + bufPos(0) +{} + +UBool RuleCharacterIterator::atEnd() const { + return buf == 0 && pos.getIndex() == text.length(); +} + +UChar32 RuleCharacterIterator::next(int32_t options, UBool& isEscaped, UErrorCode& ec) { + if (U_FAILURE(ec)) return DONE; + + UChar32 c = DONE; + isEscaped = FALSE; + + for (;;) { + c = _current(); + _advance(U16_LENGTH(c)); + + if (c == SymbolTable::SYMBOL_REF && buf == 0 && + (options & PARSE_VARIABLES) != 0 && sym != 0) { + UnicodeString name = sym->parseReference(text, pos, text.length()); + // If name is empty there was an isolated SYMBOL_REF; + // return it. Caller must be prepared for this. + if (name.length() == 0) { + break; + } + bufPos = 0; + buf = sym->lookup(name); + if (buf == 0) { + ec = U_UNDEFINED_VARIABLE; + return DONE; + } + // Handle empty variable value + if (buf->length() == 0) { + buf = 0; + } + continue; + } + + if ((options & SKIP_WHITESPACE) != 0 && PatternProps::isWhiteSpace(c)) { + continue; + } + + if (c == 0x5C /*'\\'*/ && (options & PARSE_ESCAPES) != 0) { + UnicodeString tempEscape; + int32_t offset = 0; + c = lookahead(tempEscape, MAX_U_NOTATION_LEN).unescapeAt(offset); + jumpahead(offset); + isEscaped = TRUE; + if (c < 0) { + ec = U_MALFORMED_UNICODE_ESCAPE; + return DONE; + } + } + + break; + } + + return c; +} + +void RuleCharacterIterator::getPos(RuleCharacterIterator::Pos& p) const { + p.buf = buf; + p.pos = pos.getIndex(); + p.bufPos = bufPos; +} + +void RuleCharacterIterator::setPos(const RuleCharacterIterator::Pos& p) { + buf = p.buf; + pos.setIndex(p.pos); + bufPos = p.bufPos; +} + +void RuleCharacterIterator::skipIgnored(int32_t options) { + if ((options & SKIP_WHITESPACE) != 0) { + for (;;) { + UChar32 a = _current(); + if (!PatternProps::isWhiteSpace(a)) break; + _advance(U16_LENGTH(a)); + } + } +} + +UnicodeString& RuleCharacterIterator::lookahead(UnicodeString& result, int32_t maxLookAhead) const { + if (maxLookAhead < 0) { + maxLookAhead = 0x7FFFFFFF; + } + if (buf != 0) { + buf->extract(bufPos, maxLookAhead, result); + } else { + text.extract(pos.getIndex(), maxLookAhead, result); + } + return result; +} + +void RuleCharacterIterator::jumpahead(int32_t count) { + _advance(count); +} + +/* +UnicodeString& RuleCharacterIterator::toString(UnicodeString& result) const { + int32_t b = pos.getIndex(); + text.extract(0, b, result); + return result.append((UChar) 0x7C).append(text, b, 0x7FFFFFFF); // Insert '|' at index +} +*/ + +UChar32 RuleCharacterIterator::_current() const { + if (buf != 0) { + return buf->char32At(bufPos); + } else { + int i = pos.getIndex(); + return (i < text.length()) ? text.char32At(i) : (UChar32)DONE; + } +} + +void RuleCharacterIterator::_advance(int32_t count) { + if (buf != 0) { + bufPos += count; + if (bufPos == buf->length()) { + buf = 0; + } + } else { + pos.setIndex(pos.getIndex() + count); + if (pos.getIndex() > text.length()) { + pos.setIndex(text.length()); + } + } +} + +U_NAMESPACE_END + +//eof diff --git a/thirdparty/icu4c/common/ruleiter.h b/thirdparty/icu4c/common/ruleiter.h new file mode 100644 index 0000000000..28e2ca5753 --- /dev/null +++ b/thirdparty/icu4c/common/ruleiter.h @@ -0,0 +1,233 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (c) 2003-2011, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* Author: Alan Liu +* Created: September 24 2003 +* Since: ICU 2.8 +********************************************************************** +*/ +#ifndef _RULEITER_H_ +#define _RULEITER_H_ + +#include "unicode/uobject.h" + +U_NAMESPACE_BEGIN + +class UnicodeString; +class ParsePosition; +class SymbolTable; + +/** + * An iterator that returns 32-bit code points. This class is deliberately + * <em>not</em> related to any of the ICU character iterator classes + * in order to minimize complexity. + * @author Alan Liu + * @since ICU 2.8 + */ +class RuleCharacterIterator : public UMemory { + + // TODO: Ideas for later. (Do not implement if not needed, lest the + // code coverage numbers go down due to unused methods.) + // 1. Add a copy constructor, operator==() method. + // 2. Rather than return DONE, throw an exception if the end + // is reached -- this is an alternate usage model, probably not useful. + +private: + /** + * Text being iterated. + */ + const UnicodeString& text; + + /** + * Position of iterator. + */ + ParsePosition& pos; + + /** + * Symbol table used to parse and dereference variables. May be 0. + */ + const SymbolTable* sym; + + /** + * Current variable expansion, or 0 if none. + */ + const UnicodeString* buf; + + /** + * Position within buf. Meaningless if buf == 0. + */ + int32_t bufPos; + +public: + /** + * Value returned when there are no more characters to iterate. + */ + enum { DONE = -1 }; + + /** + * Bitmask option to enable parsing of variable names. If (options & + * PARSE_VARIABLES) != 0, then an embedded variable will be expanded to + * its value. Variables are parsed using the SymbolTable API. + */ + enum { PARSE_VARIABLES = 1 }; + + /** + * Bitmask option to enable parsing of escape sequences. If (options & + * PARSE_ESCAPES) != 0, then an embedded escape sequence will be expanded + * to its value. Escapes are parsed using Utility.unescapeAt(). + */ + enum { PARSE_ESCAPES = 2 }; + + /** + * Bitmask option to enable skipping of whitespace. If (options & + * SKIP_WHITESPACE) != 0, then Pattern_White_Space characters will be silently + * skipped, as if they were not present in the input. + */ + enum { SKIP_WHITESPACE = 4 }; + + /** + * Constructs an iterator over the given text, starting at the given + * position. + * @param text the text to be iterated + * @param sym the symbol table, or null if there is none. If sym is null, + * then variables will not be deferenced, even if the PARSE_VARIABLES + * option is set. + * @param pos upon input, the index of the next character to return. If a + * variable has been dereferenced, then pos will <em>not</em> increment as + * characters of the variable value are iterated. + */ + RuleCharacterIterator(const UnicodeString& text, const SymbolTable* sym, + ParsePosition& pos); + + /** + * Returns true if this iterator has no more characters to return. + */ + UBool atEnd() const; + + /** + * Returns the next character using the given options, or DONE if there + * are no more characters, and advance the position to the next + * character. + * @param options one or more of the following options, bitwise-OR-ed + * together: PARSE_VARIABLES, PARSE_ESCAPES, SKIP_WHITESPACE. + * @param isEscaped output parameter set to true if the character + * was escaped + * @param ec input-output error code. An error will only be set by + * this routing if options includes PARSE_VARIABLES and an unknown + * variable name is seen, or if options includes PARSE_ESCAPES and + * an invalid escape sequence is seen. + * @return the current 32-bit code point, or DONE + */ + UChar32 next(int32_t options, UBool& isEscaped, UErrorCode& ec); + + /** + * Returns true if this iterator is currently within a variable expansion. + */ + inline UBool inVariable() const; + + /** + * An opaque object representing the position of a RuleCharacterIterator. + */ + struct Pos : public UMemory { + private: + const UnicodeString* buf; + int32_t pos; + int32_t bufPos; + friend class RuleCharacterIterator; + }; + + /** + * Sets an object which, when later passed to setPos(), will + * restore this iterator's position. Usage idiom: + * + * RuleCharacterIterator iterator = ...; + * RuleCharacterIterator::Pos pos; + * iterator.getPos(pos); + * for (;;) { + * iterator.getPos(pos); + * int c = iterator.next(...); + * ... + * } + * iterator.setPos(pos); + * + * @param p a position object to be set to this iterator's + * current position. + */ + void getPos(Pos& p) const; + + /** + * Restores this iterator to the position it had when getPos() + * set the given object. + * @param p a position object previously set by getPos() + */ + void setPos(const Pos& p); + + /** + * Skips ahead past any ignored characters, as indicated by the given + * options. This is useful in conjunction with the lookahead() method. + * + * Currently, this only has an effect for SKIP_WHITESPACE. + * @param options one or more of the following options, bitwise-OR-ed + * together: PARSE_VARIABLES, PARSE_ESCAPES, SKIP_WHITESPACE. + */ + void skipIgnored(int32_t options); + + /** + * Returns a string containing the remainder of the characters to be + * returned by this iterator, without any option processing. If the + * iterator is currently within a variable expansion, this will only + * extend to the end of the variable expansion. This method is provided + * so that iterators may interoperate with string-based APIs. The typical + * sequence of calls is to call skipIgnored(), then call lookahead(), then + * parse the string returned by lookahead(), then call jumpahead() to + * resynchronize the iterator. + * @param result a string to receive the characters to be returned + * by future calls to next() + * @param maxLookAhead The maximum to copy into the result. + * @return a reference to result + */ + UnicodeString& lookahead(UnicodeString& result, int32_t maxLookAhead = -1) const; + + /** + * Advances the position by the given number of 16-bit code units. + * This is useful in conjunction with the lookahead() method. + * @param count the number of 16-bit code units to jump over + */ + void jumpahead(int32_t count); + + /** + * Returns a string representation of this object, consisting of the + * characters being iterated, with a '|' marking the current position. + * Position within an expanded variable is <em>not</em> indicated. + * @param result output parameter to receive a string + * representation of this object + */ +// UnicodeString& toString(UnicodeString& result) const; + +private: + /** + * Returns the current 32-bit code point without parsing escapes, parsing + * variables, or skipping whitespace. + * @return the current 32-bit code point + */ + UChar32 _current() const; + + /** + * Advances the position by the given amount. + * @param count the number of 16-bit code units to advance past + */ + void _advance(int32_t count); +}; + +inline UBool RuleCharacterIterator::inVariable() const { + return buf != 0; +} + +U_NAMESPACE_END + +#endif // _RULEITER_H_ +//eof diff --git a/thirdparty/icu4c/common/schriter.cpp b/thirdparty/icu4c/common/schriter.cpp new file mode 100644 index 0000000000..17b68aee9d --- /dev/null +++ b/thirdparty/icu4c/common/schriter.cpp @@ -0,0 +1,119 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* Copyright (C) 1998-2012, International Business Machines Corporation and +* others. All Rights Reserved. +****************************************************************************** +* +* File schriter.cpp +* +* Modification History: +* +* Date Name Description +* 05/05/99 stephen Cleaned up. +****************************************************************************** +*/ + +#include "utypeinfo.h" // for 'typeid' to work + +#include "unicode/chariter.h" +#include "unicode/schriter.h" + +U_NAMESPACE_BEGIN + +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringCharacterIterator) + +StringCharacterIterator::StringCharacterIterator() + : UCharCharacterIterator(), + text() +{ + // NEVER DEFAULT CONSTRUCT! +} + +StringCharacterIterator::StringCharacterIterator(const UnicodeString& textStr) + : UCharCharacterIterator(textStr.getBuffer(), textStr.length()), + text(textStr) +{ + // we had set the input parameter's array, now we need to set our copy's array + UCharCharacterIterator::text = this->text.getBuffer(); +} + +StringCharacterIterator::StringCharacterIterator(const UnicodeString& textStr, + int32_t textPos) + : UCharCharacterIterator(textStr.getBuffer(), textStr.length(), textPos), + text(textStr) +{ + // we had set the input parameter's array, now we need to set our copy's array + UCharCharacterIterator::text = this->text.getBuffer(); +} + +StringCharacterIterator::StringCharacterIterator(const UnicodeString& textStr, + int32_t textBegin, + int32_t textEnd, + int32_t textPos) + : UCharCharacterIterator(textStr.getBuffer(), textStr.length(), textBegin, textEnd, textPos), + text(textStr) +{ + // we had set the input parameter's array, now we need to set our copy's array + UCharCharacterIterator::text = this->text.getBuffer(); +} + +StringCharacterIterator::StringCharacterIterator(const StringCharacterIterator& that) + : UCharCharacterIterator(that), + text(that.text) +{ + // we had set the input parameter's array, now we need to set our copy's array + UCharCharacterIterator::text = this->text.getBuffer(); +} + +StringCharacterIterator::~StringCharacterIterator() { +} + +StringCharacterIterator& +StringCharacterIterator::operator=(const StringCharacterIterator& that) { + UCharCharacterIterator::operator=(that); + text = that.text; + // we had set the input parameter's array, now we need to set our copy's array + UCharCharacterIterator::text = this->text.getBuffer(); + return *this; +} + +UBool +StringCharacterIterator::operator==(const ForwardCharacterIterator& that) const { + if (this == &that) { + return TRUE; + } + + // do not call UCharCharacterIterator::operator==() + // because that checks for array pointer equality + // while we compare UnicodeString objects + + if (typeid(*this) != typeid(that)) { + return FALSE; + } + + StringCharacterIterator& realThat = (StringCharacterIterator&)that; + + return text == realThat.text + && pos == realThat.pos + && begin == realThat.begin + && end == realThat.end; +} + +StringCharacterIterator* +StringCharacterIterator::clone() const { + return new StringCharacterIterator(*this); +} + +void +StringCharacterIterator::setText(const UnicodeString& newText) { + text = newText; + UCharCharacterIterator::setText(text.getBuffer(), text.length()); +} + +void +StringCharacterIterator::getText(UnicodeString& result) { + result = text; +} +U_NAMESPACE_END diff --git a/thirdparty/icu4c/common/serv.cpp b/thirdparty/icu4c/common/serv.cpp new file mode 100644 index 0000000000..ce545b9dbd --- /dev/null +++ b/thirdparty/icu4c/common/serv.cpp @@ -0,0 +1,982 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/** +******************************************************************************* +* Copyright (C) 2001-2014, International Business Machines Corporation. +* All Rights Reserved. +******************************************************************************* +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_SERVICE + +#include "serv.h" +#include "umutex.h" + +#undef SERVICE_REFCOUNT + +// in case we use the refcount stuff + +U_NAMESPACE_BEGIN + +/* +****************************************************************** +*/ + +const UChar ICUServiceKey::PREFIX_DELIMITER = 0x002F; /* '/' */ + +ICUServiceKey::ICUServiceKey(const UnicodeString& id) +: _id(id) { +} + +ICUServiceKey::~ICUServiceKey() +{ +} + +const UnicodeString& +ICUServiceKey::getID() const +{ + return _id; +} + +UnicodeString& +ICUServiceKey::canonicalID(UnicodeString& result) const +{ + return result.append(_id); +} + +UnicodeString& +ICUServiceKey::currentID(UnicodeString& result) const +{ + return canonicalID(result); +} + +UnicodeString& +ICUServiceKey::currentDescriptor(UnicodeString& result) const +{ + prefix(result); + result.append(PREFIX_DELIMITER); + return currentID(result); +} + +UBool +ICUServiceKey::fallback() +{ + return FALSE; +} + +UBool +ICUServiceKey::isFallbackOf(const UnicodeString& id) const +{ + return id == _id; +} + +UnicodeString& +ICUServiceKey::prefix(UnicodeString& result) const +{ + return result; +} + +UnicodeString& +ICUServiceKey::parsePrefix(UnicodeString& result) +{ + int32_t n = result.indexOf(PREFIX_DELIMITER); + if (n < 0) { + n = 0; + } + result.remove(n); + return result; +} + +UnicodeString& +ICUServiceKey::parseSuffix(UnicodeString& result) +{ + int32_t n = result.indexOf(PREFIX_DELIMITER); + if (n >= 0) { + result.remove(0, n+1); + } + return result; +} + +#ifdef SERVICE_DEBUG +UnicodeString& +ICUServiceKey::debug(UnicodeString& result) const +{ + debugClass(result); + result.append((UnicodeString)" id: "); + result.append(_id); + return result; +} + +UnicodeString& +ICUServiceKey::debugClass(UnicodeString& result) const +{ + return result.append((UnicodeString)"ICUServiceKey"); +} +#endif + +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ICUServiceKey) + +/* +****************************************************************** +*/ + +ICUServiceFactory::~ICUServiceFactory() {} + +SimpleFactory::SimpleFactory(UObject* instanceToAdopt, const UnicodeString& id, UBool visible) +: _instance(instanceToAdopt), _id(id), _visible(visible) +{ +} + +SimpleFactory::~SimpleFactory() +{ + delete _instance; +} + +UObject* +SimpleFactory::create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const +{ + if (U_SUCCESS(status)) { + UnicodeString temp; + if (_id == key.currentID(temp)) { + return service->cloneInstance(_instance); + } + } + return NULL; +} + +void +SimpleFactory::updateVisibleIDs(Hashtable& result, UErrorCode& status) const +{ + if (_visible) { + result.put(_id, (void*)this, status); // cast away const + } else { + result.remove(_id); + } +} + +UnicodeString& +SimpleFactory::getDisplayName(const UnicodeString& id, const Locale& /* locale */, UnicodeString& result) const +{ + if (_visible && _id == id) { + result = _id; + } else { + result.setToBogus(); + } + return result; +} + +#ifdef SERVICE_DEBUG +UnicodeString& +SimpleFactory::debug(UnicodeString& toAppendTo) const +{ + debugClass(toAppendTo); + toAppendTo.append((UnicodeString)" id: "); + toAppendTo.append(_id); + toAppendTo.append((UnicodeString)", visible: "); + toAppendTo.append(_visible ? (UnicodeString)"T" : (UnicodeString)"F"); + return toAppendTo; +} + +UnicodeString& +SimpleFactory::debugClass(UnicodeString& toAppendTo) const +{ + return toAppendTo.append((UnicodeString)"SimpleFactory"); +} +#endif + +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(SimpleFactory) + +/* +****************************************************************** +*/ + +ServiceListener::~ServiceListener() {} + +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ServiceListener) + +/* +****************************************************************** +*/ + +// Record the actual id for this service in the cache, so we can return it +// even if we succeed later with a different id. +class CacheEntry : public UMemory { +private: + int32_t refcount; + +public: + UnicodeString actualDescriptor; + UObject* service; + + /** + * Releases a reference to the shared resource. + */ + ~CacheEntry() { + delete service; + } + + CacheEntry(const UnicodeString& _actualDescriptor, UObject* _service) + : refcount(1), actualDescriptor(_actualDescriptor), service(_service) { + } + + /** + * Instantiation creates an initial reference, so don't call this + * unless you're creating a new pointer to this. Management of + * that pointer will have to know how to deal with refcounts. + * Return true if the resource has not already been released. + */ + CacheEntry* ref() { + ++refcount; + return this; + } + + /** + * Destructions removes a reference, so don't call this unless + * you're removing pointer to this somewhere. Management of that + * pointer will have to know how to deal with refcounts. Once + * the refcount drops to zero, the resource is released. Return + * false if the resouce has been released. + */ + CacheEntry* unref() { + if ((--refcount) == 0) { + delete this; + return NULL; + } + return this; + } + + /** + * Return TRUE if there is at least one reference to this and the + * resource has not been released. + */ + UBool isShared() const { + return refcount > 1; + } +}; + +// UObjectDeleter for serviceCache +U_CDECL_BEGIN +static void U_CALLCONV +cacheDeleter(void* obj) { + U_NAMESPACE_USE ((CacheEntry*)obj)->unref(); +} + +/** +* Deleter for UObjects +*/ +static void U_CALLCONV +deleteUObject(void *obj) { + U_NAMESPACE_USE delete (UObject*) obj; +} +U_CDECL_END + +/* +****************************************************************** +*/ + +class DNCache : public UMemory { +public: + Hashtable cache; + const Locale locale; + + DNCache(const Locale& _locale) + : cache(), locale(_locale) + { + // cache.setKeyDeleter(uprv_deleteUObject); + } +}; + + +/* +****************************************************************** +*/ + +StringPair* +StringPair::create(const UnicodeString& displayName, + const UnicodeString& id, + UErrorCode& status) +{ + if (U_SUCCESS(status)) { + StringPair* sp = new StringPair(displayName, id); + if (sp == NULL || sp->isBogus()) { + status = U_MEMORY_ALLOCATION_ERROR; + delete sp; + return NULL; + } + return sp; + } + return NULL; +} + +UBool +StringPair::isBogus() const { + return displayName.isBogus() || id.isBogus(); +} + +StringPair::StringPair(const UnicodeString& _displayName, + const UnicodeString& _id) +: displayName(_displayName) +, id(_id) +{ +} + +U_CDECL_BEGIN +static void U_CALLCONV +userv_deleteStringPair(void *obj) { + U_NAMESPACE_USE delete (StringPair*) obj; +} +U_CDECL_END + +/* +****************************************************************** +*/ + +static UMutex lock; + +ICUService::ICUService() +: name() +, timestamp(0) +, factories(NULL) +, serviceCache(NULL) +, idCache(NULL) +, dnCache(NULL) +{ +} + +ICUService::ICUService(const UnicodeString& newName) +: name(newName) +, timestamp(0) +, factories(NULL) +, serviceCache(NULL) +, idCache(NULL) +, dnCache(NULL) +{ +} + +ICUService::~ICUService() +{ + { + Mutex mutex(&lock); + clearCaches(); + delete factories; + factories = NULL; + } +} + +UObject* +ICUService::get(const UnicodeString& descriptor, UErrorCode& status) const +{ + return get(descriptor, NULL, status); +} + +UObject* +ICUService::get(const UnicodeString& descriptor, UnicodeString* actualReturn, UErrorCode& status) const +{ + UObject* result = NULL; + ICUServiceKey* key = createKey(&descriptor, status); + if (key) { + result = getKey(*key, actualReturn, status); + delete key; + } + return result; +} + +UObject* +ICUService::getKey(ICUServiceKey& key, UErrorCode& status) const +{ + return getKey(key, NULL, status); +} + +// this is a vector that subclasses of ICUService can override to further customize the result object +// before returning it. All other public get functions should call this one. + +UObject* +ICUService::getKey(ICUServiceKey& key, UnicodeString* actualReturn, UErrorCode& status) const +{ + return getKey(key, actualReturn, NULL, status); +} + +// make it possible to call reentrantly on systems that don't have reentrant mutexes. +// we can use this simple approach since we know the situation where we're calling +// reentrantly even without knowing the thread. +class XMutex : public UMemory { +public: + inline XMutex(UMutex *mutex, UBool reentering) + : fMutex(mutex) + , fActive(!reentering) + { + if (fActive) umtx_lock(fMutex); + } + inline ~XMutex() { + if (fActive) umtx_unlock(fMutex); + } + +private: + UMutex *fMutex; + UBool fActive; +}; + +struct UVectorDeleter { + UVector* _obj; + UVectorDeleter() : _obj(NULL) {} + ~UVectorDeleter() { delete _obj; } +}; + +// called only by factories, treat as private +UObject* +ICUService::getKey(ICUServiceKey& key, UnicodeString* actualReturn, const ICUServiceFactory* factory, UErrorCode& status) const +{ + if (U_FAILURE(status)) { + return NULL; + } + + if (isDefault()) { + return handleDefault(key, actualReturn, status); + } + + ICUService* ncthis = (ICUService*)this; // cast away semantic const + + CacheEntry* result = NULL; + { + // The factory list can't be modified until we're done, + // otherwise we might update the cache with an invalid result. + // The cache has to stay in synch with the factory list. + // ICU doesn't have monitors so we can't use rw locks, so + // we single-thread everything using this service, for now. + + // if factory is not null, we're calling from within the mutex, + // and since some unix machines don't have reentrant mutexes we + // need to make sure not to try to lock it again. + XMutex mutex(&lock, factory != NULL); + + if (serviceCache == NULL) { + ncthis->serviceCache = new Hashtable(status); + if (ncthis->serviceCache == NULL) { + return NULL; + } + if (U_FAILURE(status)) { + delete serviceCache; + return NULL; + } + serviceCache->setValueDeleter(cacheDeleter); + } + + UnicodeString currentDescriptor; + UVectorDeleter cacheDescriptorList; + UBool putInCache = FALSE; + + int32_t startIndex = 0; + int32_t limit = factories->size(); + UBool cacheResult = TRUE; + + if (factory != NULL) { + for (int32_t i = 0; i < limit; ++i) { + if (factory == (const ICUServiceFactory*)factories->elementAt(i)) { + startIndex = i + 1; + break; + } + } + if (startIndex == 0) { + // throw new InternalError("Factory " + factory + "not registered with service: " + this); + status = U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + cacheResult = FALSE; + } + + do { + currentDescriptor.remove(); + key.currentDescriptor(currentDescriptor); + result = (CacheEntry*)serviceCache->get(currentDescriptor); + if (result != NULL) { + break; + } + + // first test of cache failed, so we'll have to update + // the cache if we eventually succeed-- that is, if we're + // going to update the cache at all. + putInCache = TRUE; + + int32_t index = startIndex; + while (index < limit) { + ICUServiceFactory* f = (ICUServiceFactory*)factories->elementAt(index++); + UObject* service = f->create(key, this, status); + if (U_FAILURE(status)) { + delete service; + return NULL; + } + if (service != NULL) { + result = new CacheEntry(currentDescriptor, service); + if (result == NULL) { + delete service; + status = U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + + goto outerEnd; + } + } + + // prepare to load the cache with all additional ids that + // will resolve to result, assuming we'll succeed. We + // don't want to keep querying on an id that's going to + // fallback to the one that succeeded, we want to hit the + // cache the first time next goaround. + if (cacheDescriptorList._obj == NULL) { + cacheDescriptorList._obj = new UVector(uprv_deleteUObject, NULL, 5, status); + if (U_FAILURE(status)) { + return NULL; + } + } + UnicodeString* idToCache = new UnicodeString(currentDescriptor); + if (idToCache == NULL || idToCache->isBogus()) { + status = U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + + cacheDescriptorList._obj->addElement(idToCache, status); + if (U_FAILURE(status)) { + return NULL; + } + } while (key.fallback()); +outerEnd: + + if (result != NULL) { + if (putInCache && cacheResult) { + serviceCache->put(result->actualDescriptor, result, status); + if (U_FAILURE(status)) { + return NULL; + } + + if (cacheDescriptorList._obj != NULL) { + for (int32_t i = cacheDescriptorList._obj->size(); --i >= 0;) { + UnicodeString* desc = (UnicodeString*)cacheDescriptorList._obj->elementAt(i); + + serviceCache->put(*desc, result, status); + if (U_FAILURE(status)) { + return NULL; + } + + result->ref(); + cacheDescriptorList._obj->removeElementAt(i); + } + } + } + + if (actualReturn != NULL) { + // strip null prefix + if (result->actualDescriptor.indexOf((UChar)0x2f) == 0) { // U+002f=slash (/) + actualReturn->remove(); + actualReturn->append(result->actualDescriptor, + 1, + result->actualDescriptor.length() - 1); + } else { + *actualReturn = result->actualDescriptor; + } + + if (actualReturn->isBogus()) { + status = U_MEMORY_ALLOCATION_ERROR; + delete result; + return NULL; + } + } + + UObject* service = cloneInstance(result->service); + if (putInCache && !cacheResult) { + delete result; + } + return service; + } + } + + return handleDefault(key, actualReturn, status); +} + +UObject* +ICUService::handleDefault(const ICUServiceKey& /* key */, UnicodeString* /* actualIDReturn */, UErrorCode& /* status */) const +{ + return NULL; +} + +UVector& +ICUService::getVisibleIDs(UVector& result, UErrorCode& status) const { + return getVisibleIDs(result, NULL, status); +} + +UVector& +ICUService::getVisibleIDs(UVector& result, const UnicodeString* matchID, UErrorCode& status) const +{ + result.removeAllElements(); + + if (U_FAILURE(status)) { + return result; + } + + { + Mutex mutex(&lock); + const Hashtable* map = getVisibleIDMap(status); + if (map != NULL) { + ICUServiceKey* fallbackKey = createKey(matchID, status); + + for (int32_t pos = UHASH_FIRST;;) { + const UHashElement* e = map->nextElement(pos); + if (e == NULL) { + break; + } + + const UnicodeString* id = (const UnicodeString*)e->key.pointer; + if (fallbackKey != NULL) { + if (!fallbackKey->isFallbackOf(*id)) { + continue; + } + } + + UnicodeString* idClone = new UnicodeString(*id); + if (idClone == NULL || idClone->isBogus()) { + delete idClone; + status = U_MEMORY_ALLOCATION_ERROR; + break; + } + result.addElement(idClone, status); + if (U_FAILURE(status)) { + delete idClone; + break; + } + } + delete fallbackKey; + } + } + if (U_FAILURE(status)) { + result.removeAllElements(); + } + return result; +} + +const Hashtable* +ICUService::getVisibleIDMap(UErrorCode& status) const { + if (U_FAILURE(status)) return NULL; + + // must only be called when lock is already held + + ICUService* ncthis = (ICUService*)this; // cast away semantic const + if (idCache == NULL) { + ncthis->idCache = new Hashtable(status); + if (idCache == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + } else if (factories != NULL) { + for (int32_t pos = factories->size(); --pos >= 0;) { + ICUServiceFactory* f = (ICUServiceFactory*)factories->elementAt(pos); + f->updateVisibleIDs(*idCache, status); + } + if (U_FAILURE(status)) { + delete idCache; + ncthis->idCache = NULL; + } + } + } + + return idCache; +} + + +UnicodeString& +ICUService::getDisplayName(const UnicodeString& id, UnicodeString& result) const +{ + return getDisplayName(id, result, Locale::getDefault()); +} + +UnicodeString& +ICUService::getDisplayName(const UnicodeString& id, UnicodeString& result, const Locale& locale) const +{ + { + UErrorCode status = U_ZERO_ERROR; + Mutex mutex(&lock); + const Hashtable* map = getVisibleIDMap(status); + if (map != NULL) { + ICUServiceFactory* f = (ICUServiceFactory*)map->get(id); + if (f != NULL) { + f->getDisplayName(id, locale, result); + return result; + } + + // fallback + status = U_ZERO_ERROR; + ICUServiceKey* fallbackKey = createKey(&id, status); + while (fallbackKey != NULL && fallbackKey->fallback()) { + UnicodeString us; + fallbackKey->currentID(us); + f = (ICUServiceFactory*)map->get(us); + if (f != NULL) { + f->getDisplayName(id, locale, result); + delete fallbackKey; + return result; + } + } + delete fallbackKey; + } + } + result.setToBogus(); + return result; +} + +UVector& +ICUService::getDisplayNames(UVector& result, UErrorCode& status) const +{ + return getDisplayNames(result, Locale::getDefault(), NULL, status); +} + + +UVector& +ICUService::getDisplayNames(UVector& result, const Locale& locale, UErrorCode& status) const +{ + return getDisplayNames(result, locale, NULL, status); +} + +UVector& +ICUService::getDisplayNames(UVector& result, + const Locale& locale, + const UnicodeString* matchID, + UErrorCode& status) const +{ + result.removeAllElements(); + result.setDeleter(userv_deleteStringPair); + if (U_SUCCESS(status)) { + ICUService* ncthis = (ICUService*)this; // cast away semantic const + Mutex mutex(&lock); + + if (dnCache != NULL && dnCache->locale != locale) { + delete dnCache; + ncthis->dnCache = NULL; + } + + if (dnCache == NULL) { + const Hashtable* m = getVisibleIDMap(status); + if (U_FAILURE(status)) { + return result; + } + ncthis->dnCache = new DNCache(locale); + if (dnCache == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + return result; + } + + int32_t pos = UHASH_FIRST; + const UHashElement* entry = NULL; + while ((entry = m->nextElement(pos)) != NULL) { + const UnicodeString* id = (const UnicodeString*)entry->key.pointer; + ICUServiceFactory* f = (ICUServiceFactory*)entry->value.pointer; + UnicodeString dname; + f->getDisplayName(*id, locale, dname); + if (dname.isBogus()) { + status = U_MEMORY_ALLOCATION_ERROR; + } else { + dnCache->cache.put(dname, (void*)id, status); // share pointer with visibleIDMap + if (U_SUCCESS(status)) { + continue; + } + } + delete dnCache; + ncthis->dnCache = NULL; + return result; + } + } + } + + ICUServiceKey* matchKey = createKey(matchID, status); + /* To ensure that all elements in the hashtable are iterated, set pos to -1. + * nextElement(pos) will skip the position at pos and begin the iteration + * at the next position, which in this case will be 0. + */ + int32_t pos = UHASH_FIRST; + const UHashElement *entry = NULL; + while ((entry = dnCache->cache.nextElement(pos)) != NULL) { + const UnicodeString* id = (const UnicodeString*)entry->value.pointer; + if (matchKey != NULL && !matchKey->isFallbackOf(*id)) { + continue; + } + const UnicodeString* dn = (const UnicodeString*)entry->key.pointer; + StringPair* sp = StringPair::create(*id, *dn, status); + result.addElement(sp, status); + if (U_FAILURE(status)) { + result.removeAllElements(); + break; + } + } + delete matchKey; + + return result; +} + +URegistryKey +ICUService::registerInstance(UObject* objToAdopt, const UnicodeString& id, UErrorCode& status) +{ + return registerInstance(objToAdopt, id, TRUE, status); +} + +URegistryKey +ICUService::registerInstance(UObject* objToAdopt, const UnicodeString& id, UBool visible, UErrorCode& status) +{ + ICUServiceKey* key = createKey(&id, status); + if (key != NULL) { + UnicodeString canonicalID; + key->canonicalID(canonicalID); + delete key; + + ICUServiceFactory* f = createSimpleFactory(objToAdopt, canonicalID, visible, status); + if (f != NULL) { + return registerFactory(f, status); + } + } + delete objToAdopt; + return NULL; +} + +ICUServiceFactory* +ICUService::createSimpleFactory(UObject* objToAdopt, const UnicodeString& id, UBool visible, UErrorCode& status) +{ + if (U_SUCCESS(status)) { + if ((objToAdopt != NULL) && (!id.isBogus())) { + return new SimpleFactory(objToAdopt, id, visible); + } + status = U_ILLEGAL_ARGUMENT_ERROR; + } + return NULL; +} + +URegistryKey +ICUService::registerFactory(ICUServiceFactory* factoryToAdopt, UErrorCode& status) +{ + if (U_SUCCESS(status) && factoryToAdopt != NULL) { + Mutex mutex(&lock); + + if (factories == NULL) { + factories = new UVector(deleteUObject, NULL, status); + if (U_FAILURE(status)) { + delete factories; + return NULL; + } + } + factories->insertElementAt(factoryToAdopt, 0, status); + if (U_SUCCESS(status)) { + clearCaches(); + } else { + delete factoryToAdopt; + factoryToAdopt = NULL; + } + } + + if (factoryToAdopt != NULL) { + notifyChanged(); + } + + return (URegistryKey)factoryToAdopt; +} + +UBool +ICUService::unregister(URegistryKey rkey, UErrorCode& status) +{ + ICUServiceFactory *factory = (ICUServiceFactory*)rkey; + UBool result = FALSE; + if (factory != NULL && factories != NULL) { + Mutex mutex(&lock); + + if (factories->removeElement(factory)) { + clearCaches(); + result = TRUE; + } else { + status = U_ILLEGAL_ARGUMENT_ERROR; + delete factory; + } + } + if (result) { + notifyChanged(); + } + return result; +} + +void +ICUService::reset() +{ + { + Mutex mutex(&lock); + reInitializeFactories(); + clearCaches(); + } + notifyChanged(); +} + +void +ICUService::reInitializeFactories() +{ + if (factories != NULL) { + factories->removeAllElements(); + } +} + +UBool +ICUService::isDefault() const +{ + return countFactories() == 0; +} + +ICUServiceKey* +ICUService::createKey(const UnicodeString* id, UErrorCode& status) const +{ + return (U_FAILURE(status) || id == NULL) ? NULL : new ICUServiceKey(*id); +} + +void +ICUService::clearCaches() +{ + // callers synchronize before use + ++timestamp; + delete dnCache; + dnCache = NULL; + delete idCache; + idCache = NULL; + delete serviceCache; serviceCache = NULL; +} + +void +ICUService::clearServiceCache() +{ + // callers synchronize before use + delete serviceCache; serviceCache = NULL; +} + +UBool +ICUService::acceptsListener(const EventListener& l) const +{ + return dynamic_cast<const ServiceListener*>(&l) != NULL; +} + +void +ICUService::notifyListener(EventListener& l) const +{ + ((ServiceListener&)l).serviceChanged(*this); +} + +UnicodeString& +ICUService::getName(UnicodeString& result) const +{ + return result.append(name); +} + +int32_t +ICUService::countFactories() const +{ + return factories == NULL ? 0 : factories->size(); +} + +int32_t +ICUService::getTimestamp() const +{ + return timestamp; +} + +U_NAMESPACE_END + +/* UCONFIG_NO_SERVICE */ +#endif diff --git a/thirdparty/icu4c/common/serv.h b/thirdparty/icu4c/common/serv.h new file mode 100644 index 0000000000..ca070b6c6d --- /dev/null +++ b/thirdparty/icu4c/common/serv.h @@ -0,0 +1,996 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/** + ******************************************************************************* + * Copyright (C) 2001-2011, International Business Machines Corporation. * + * All Rights Reserved. * + ******************************************************************************* + */ + +#ifndef ICUSERV_H +#define ICUSERV_H + +#include "unicode/utypes.h" + +#if UCONFIG_NO_SERVICE + +U_NAMESPACE_BEGIN + +/* + * Allow the declaration of APIs with pointers to ICUService + * even when service is removed from the build. + */ +class ICUService; + +U_NAMESPACE_END + +#else + +#include "unicode/unistr.h" +#include "unicode/locid.h" +#include "unicode/umisc.h" + +#include "hash.h" +#include "uvector.h" +#include "servnotf.h" + +class ICUServiceTest; + +U_NAMESPACE_BEGIN + +class ICUServiceKey; +class ICUServiceFactory; +class SimpleFactory; +class ServiceListener; +class ICUService; + +class DNCache; + +/******************************************************************* + * ICUServiceKey + */ + +/** + * <p>ICUServiceKeys are used to communicate with factories to + * generate an instance of the service. ICUServiceKeys define how + * ids are canonicalized, provide both a current id and a current + * descriptor to use in querying the cache and factories, and + * determine the fallback strategy.</p> + * + * <p>ICUServiceKeys provide both a currentDescriptor and a currentID. + * The descriptor contains an optional prefix, followed by '/' + * and the currentID. Factories that handle complex keys, + * for example number format factories that generate multiple + * kinds of formatters for the same locale, use the descriptor + * to provide a fully unique identifier for the service object, + * while using the currentID (in this case, the locale string), + * as the visible IDs that can be localized.</p> + * + * <p>The default implementation of ICUServiceKey has no fallbacks and + * has no custom descriptors.</p> + */ +class U_COMMON_API ICUServiceKey : public UObject { + private: + const UnicodeString _id; + + protected: + static const UChar PREFIX_DELIMITER; + + public: + + /** + * <p>Construct a key from an id.</p> + * + * @param id the ID from which to construct the key. + */ + ICUServiceKey(const UnicodeString& id); + + /** + * <p>Virtual destructor.</p> + */ + virtual ~ICUServiceKey(); + + /** + * <p>Return the original ID used to construct this key.</p> + * + * @return the ID used to construct this key. + */ + virtual const UnicodeString& getID() const; + + /** + * <p>Return the canonical version of the original ID. This implementation + * appends the original ID to result. Result is returned as a convenience.</p> + * + * @param result the output parameter to which the id will be appended. + * @return the modified result. + */ + virtual UnicodeString& canonicalID(UnicodeString& result) const; + + /** + * <p>Return the (canonical) current ID. This implementation appends + * the canonical ID to result. Result is returned as a convenience.</p> + * + * @param result the output parameter to which the current id will be appended. + * @return the modified result. + */ + virtual UnicodeString& currentID(UnicodeString& result) const; + + /** + * <p>Return the current descriptor. This implementation appends + * the current descriptor to result. Result is returned as a convenience.</p> + * + * <p>The current descriptor is used to fully + * identify an instance of the service in the cache. A + * factory may handle all descriptors for an ID, or just a + * particular descriptor. The factory can either parse the + * descriptor or use custom API on the key in order to + * instantiate the service.</p> + * + * @param result the output parameter to which the current id will be appended. + * @return the modified result. + */ + virtual UnicodeString& currentDescriptor(UnicodeString& result) const; + + /** + * <p>If the key has a fallback, modify the key and return true, + * otherwise return false. The current ID will change if there + * is a fallback. No currentIDs should be repeated, and fallback + * must eventually return false. This implementation has no fallbacks + * and always returns false.</p> + * + * @return true if the ICUServiceKey changed to a valid fallback value. + */ + virtual UBool fallback(); + + /** + * <p>Return true if a key created from id matches, or would eventually + * fallback to match, the canonical ID of this ICUServiceKey.</p> + * + * @param id the id to test. + * @return true if this ICUServiceKey's canonical ID is a fallback of id. + */ + virtual UBool isFallbackOf(const UnicodeString& id) const; + + /** + * <p>Return the prefix. This implementation leaves result unchanged. + * Result is returned as a convenience.</p> + * + * @param result the output parameter to which the prefix will be appended. + * @return the modified result. + */ + virtual UnicodeString& prefix(UnicodeString& result) const; + + /** + * <p>A utility to parse the prefix out of a descriptor string. Only + * the (undelimited) prefix, if any, remains in result. Result is returned as a + * convenience.</p> + * + * @param result an input/output parameter that on entry is a descriptor, and + * on exit is the prefix of that descriptor. + * @return the modified result. + */ + static UnicodeString& parsePrefix(UnicodeString& result); + + /** + * <p>A utility to parse the suffix out of a descriptor string. Only + * the (undelimited) suffix, if any, remains in result. Result is returned as a + * convenience.</p> + * + * @param result an input/output parameter that on entry is a descriptor, and + * on exit is the suffix of that descriptor. + * @return the modified result. + */ + static UnicodeString& parseSuffix(UnicodeString& result); + +public: + /** + * UObject RTTI boilerplate. + */ + static UClassID U_EXPORT2 getStaticClassID(); + + /** + * UObject RTTI boilerplate. + */ + virtual UClassID getDynamicClassID() const; + +#ifdef SERVICE_DEBUG + public: + virtual UnicodeString& debug(UnicodeString& result) const; + virtual UnicodeString& debugClass(UnicodeString& result) const; +#endif + +}; + + /******************************************************************* + * ICUServiceFactory + */ + + /** + * <p>An implementing ICUServiceFactory generates the service objects maintained by the + * service. A factory generates a service object from a key, + * updates id->factory mappings, and returns the display name for + * a supported id.</p> + */ +class U_COMMON_API ICUServiceFactory : public UObject { + public: + virtual ~ICUServiceFactory(); + + /** + * <p>Create a service object from the key, if this factory + * supports the key. Otherwise, return NULL.</p> + * + * <p>If the factory supports the key, then it can call + * the service's getKey(ICUServiceKey, String[], ICUServiceFactory) method + * passing itself as the factory to get the object that + * the service would have created prior to the factory's + * registration with the service. This can change the + * key, so any information required from the key should + * be extracted before making such a callback.</p> + * + * @param key the service key. + * @param service the service with which this factory is registered. + * @param status the error code status. + * @return the service object, or NULL if the factory does not support the key. + */ + virtual UObject* create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const = 0; + + /** + * <p>Update result to reflect the IDs (not descriptors) that this + * factory publicly handles. Result contains mappings from ID to + * factory. On entry it will contain all (visible) mappings from + * previously-registered factories.</p> + * + * <p>This function, together with getDisplayName, are used to + * support ICUService::getDisplayNames. The factory determines + * which IDs (of those it supports) it will make visible, and of + * those, which it will provide localized display names for. In + * most cases it will register mappings from all IDs it supports + * to itself.</p> + * + * @param result the mapping table to update. + * @param status the error code status. + */ + virtual void updateVisibleIDs(Hashtable& result, UErrorCode& status) const = 0; + + /** + * <p>Return, in result, the display name of the id in the provided locale. + * This is an id, not a descriptor. If the id is + * not visible, sets result to bogus. If the + * incoming result is bogus, it remains bogus. Result is returned as a + * convenience. Results are not defined if id is not one supported by this + * factory.</p> + * + * @param id a visible id supported by this factory. + * @param locale the locale for which to generate the corresponding localized display name. + * @param result output parameter to hold the display name. + * @return result. + */ + virtual UnicodeString& getDisplayName(const UnicodeString& id, const Locale& locale, UnicodeString& result) const = 0; +}; + +/* + ****************************************************************** + */ + + /** + * <p>A default implementation of factory. This provides default + * implementations for subclasses, and implements a singleton + * factory that matches a single ID and returns a single + * (possibly deferred-initialized) instance. This implements + * updateVisibleIDs to add a mapping from its ID to itself + * if visible is true, or to remove any existing mapping + * for its ID if visible is false. No localization of display + * names is performed.</p> + */ +class U_COMMON_API SimpleFactory : public ICUServiceFactory { + protected: + UObject* _instance; + const UnicodeString _id; + const UBool _visible; + + public: + /** + * <p>Construct a SimpleFactory that maps a single ID to a single + * service instance. If visible is true, the ID will be visible. + * The instance must not be NULL. The SimpleFactory will adopt + * the instance, which must not be changed subsequent to this call.</p> + * + * @param instanceToAdopt the service instance to adopt. + * @param id the ID to assign to this service instance. + * @param visible if true, the ID will be visible. + */ + SimpleFactory(UObject* instanceToAdopt, const UnicodeString& id, UBool visible = true); + + /** + * <p>Destructor.</p> + */ + virtual ~SimpleFactory(); + + /** + * <p>This implementation returns a clone of the service instance if the factory's ID is equal to + * the key's currentID. Service and prefix are ignored.</p> + * + * @param key the service key. + * @param service the service with which this factory is registered. + * @param status the error code status. + * @return the service object, or NULL if the factory does not support the key. + */ + virtual UObject* create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const; + + /** + * <p>This implementation adds a mapping from ID -> this to result if visible is true, + * otherwise it removes ID from result.</p> + * + * @param result the mapping table to update. + * @param status the error code status. + */ + virtual void updateVisibleIDs(Hashtable& result, UErrorCode& status) const; + + /** + * <p>This implementation returns the factory ID if it equals id and visible is true, + * otherwise it returns the empty string. (This implementation provides + * no localized id information.)</p> + * + * @param id a visible id supported by this factory. + * @param locale the locale for which to generate the corresponding localized display name. + * @param result output parameter to hold the display name. + * @return result. + */ + virtual UnicodeString& getDisplayName(const UnicodeString& id, const Locale& locale, UnicodeString& result) const; + +public: + /** + * UObject RTTI boilerplate. + */ + static UClassID U_EXPORT2 getStaticClassID(); + + /** + * UObject RTTI boilerplate. + */ + virtual UClassID getDynamicClassID() const; + +#ifdef SERVICE_DEBUG + public: + virtual UnicodeString& debug(UnicodeString& toAppendTo) const; + virtual UnicodeString& debugClass(UnicodeString& toAppendTo) const; +#endif + +}; + +/* + ****************************************************************** + */ + +/** + * <p>ServiceListener is the listener that ICUService provides by default. + * ICUService will notifiy this listener when factories are added to + * or removed from the service. Subclasses can provide + * different listener interfaces that extend EventListener, and modify + * acceptsListener and notifyListener as appropriate.</p> + */ +class U_COMMON_API ServiceListener : public EventListener { +public: + virtual ~ServiceListener(); + + /** + * <p>This method is called when the service changes. At the time of the + * call this listener is registered with the service. It must + * not modify the notifier in the context of this call.</p> + * + * @param service the service that changed. + */ + virtual void serviceChanged(const ICUService& service) const = 0; + +public: + /** + * UObject RTTI boilerplate. + */ + static UClassID U_EXPORT2 getStaticClassID(); + + /** + * UObject RTTI boilerplate. + */ + virtual UClassID getDynamicClassID() const; + +}; + +/* + ****************************************************************** + */ + +/** + * <p>A StringPair holds a displayName/ID pair. ICUService uses it + * as the array elements returned by getDisplayNames. + */ +class U_COMMON_API StringPair : public UMemory { +public: + /** + * <p>The display name of the pair.</p> + */ + const UnicodeString displayName; + + /** + * <p>The ID of the pair.</p> + */ + const UnicodeString id; + + /** + * <p>Creates a string pair from a displayName and an ID.</p> + * + * @param displayName the displayName. + * @param id the ID. + * @param status the error code status. + * @return a StringPair if the creation was successful, otherwise NULL. + */ + static StringPair* create(const UnicodeString& displayName, + const UnicodeString& id, + UErrorCode& status); + + /** + * <p>Return true if either string of the pair is bogus.</p> + * @return true if either string of the pair is bogus. + */ + UBool isBogus() const; + +private: + StringPair(const UnicodeString& displayName, const UnicodeString& id); +}; + +/******************************************************************* + * ICUService + */ + + /** + * <p>A Service provides access to service objects that implement a + * particular service, e.g. transliterators. Users provide a String + * id (for example, a locale string) to the service, and get back an + * object for that id. Service objects can be any kind of object. A + * new service object is returned for each query. The caller is + * responsible for deleting it.</p> + * + * <p>Services 'canonicalize' the query ID and use the canonical ID to + * query for the service. The service also defines a mechanism to + * 'fallback' the ID multiple times. Clients can optionally request + * the actual ID that was matched by a query when they use an ID to + * retrieve a service object.</p> + * + * <p>Service objects are instantiated by ICUServiceFactory objects + * registered with the service. The service queries each + * ICUServiceFactory in turn, from most recently registered to + * earliest registered, until one returns a service object. If none + * responds with a service object, a fallback ID is generated, and the + * process repeats until a service object is returned or until the ID + * has no further fallbacks.</p> + * + * <p>In ICU 2.4, UObject (the base class of service instances) does + * not define a polymorphic clone function. ICUService uses clones to + * manage ownership. Thus, for now, ICUService defines an abstract + * method, cloneInstance, that clients must implement to create clones + * of the service instances. This may change in future releases of + * ICU.</p> + * + * <p>ICUServiceFactories can be dynamically registered and + * unregistered with the service. When registered, an + * ICUServiceFactory is installed at the head of the factory list, and + * so gets 'first crack' at any keys or fallback keys. When + * unregistered, it is removed from the service and can no longer be + * located through it. Service objects generated by this factory and + * held by the client are unaffected.</p> + * + * <p>If a service has variants (e.g., the different variants of + * BreakIterator) an ICUServiceFactory can use the prefix of the + * ICUServiceKey to determine the variant of a service to generate. + * If it does not support all variants, it can request + * previously-registered factories to handle the ones it does not + * support.</p> + * + * <p>ICUService uses ICUServiceKeys to query factories and perform + * fallback. The ICUServiceKey defines the canonical form of the ID, + * and implements the fallback strategy. Custom ICUServiceKeys can be + * defined that parse complex IDs into components that + * ICUServiceFactories can more easily use. The ICUServiceKey can + * cache the results of this parsing to save repeated effort. + * ICUService provides convenience APIs that take UnicodeStrings and + * generate default ICUServiceKeys for use in querying.</p> + * + * <p>ICUService provides API to get the list of IDs publicly + * supported by the service (although queries aren't restricted to + * this list). This list contains only 'simple' IDs, and not fully + * unique IDs. ICUServiceFactories are associated with each simple ID + * and the responsible factory can also return a human-readable + * localized version of the simple ID, for use in user interfaces. + * ICUService can also provide an array of the all the localized + * visible IDs and their corresponding internal IDs.</p> + * + * <p>ICUService implements ICUNotifier, so that clients can register + * to receive notification when factories are added or removed from + * the service. ICUService provides a default EventListener + * subinterface, ServiceListener, which can be registered with the + * service. When the service changes, the ServiceListener's + * serviceChanged method is called with the service as the + * argument.</p> + * + * <p>The ICUService API is both rich and generic, and it is expected + * that most implementations will statically 'wrap' ICUService to + * present a more appropriate API-- for example, to declare the type + * of the objects returned from get, to limit the factories that can + * be registered with the service, or to define their own listener + * interface with a custom callback method. They might also customize + * ICUService by overriding it, for example, to customize the + * ICUServiceKey and fallback strategy. ICULocaleService is a + * subclass of ICUService that uses Locale names as IDs and uses + * ICUServiceKeys that implement the standard resource bundle fallback + * strategy. Most clients will wish to subclass it instead of + * ICUService.</p> + */ +class U_COMMON_API ICUService : public ICUNotifier { + protected: + /** + * Name useful for debugging. + */ + const UnicodeString name; + + private: + + /** + * Timestamp so iterators can be fail-fast. + */ + uint32_t timestamp; + + /** + * All the factories registered with this service. + */ + UVector* factories; + + /** + * The service cache. + */ + Hashtable* serviceCache; + + /** + * The ID cache. + */ + Hashtable* idCache; + + /** + * The name cache. + */ + DNCache* dnCache; + + /** + * Constructor. + */ + public: + /** + * <p>Construct a new ICUService.</p> + */ + ICUService(); + + /** + * <p>Construct with a name (useful for debugging).</p> + * + * @param name a name to use in debugging. + */ + ICUService(const UnicodeString& name); + + /** + * <p>Destructor.</p> + */ + virtual ~ICUService(); + + /** + * <p>Return the name of this service. This will be the empty string if none was assigned. + * Returns result as a convenience.</p> + * + * @param result an output parameter to contain the name of this service. + * @return the name of this service. + */ + UnicodeString& getName(UnicodeString& result) const; + + /** + * <p>Convenience override for get(ICUServiceKey&, UnicodeString*). This uses + * createKey to create a key for the provided descriptor.</p> + * + * @param descriptor the descriptor. + * @param status the error code status. + * @return the service instance, or NULL. + */ + UObject* get(const UnicodeString& descriptor, UErrorCode& status) const; + + /** + * <p>Convenience override for get(ICUServiceKey&, UnicodeString*). This uses + * createKey to create a key from the provided descriptor.</p> + * + * @param descriptor the descriptor. + * @param actualReturn a pointer to a UnicodeString to hold the matched descriptor, or NULL. + * @param status the error code status. + * @return the service instance, or NULL. + */ + UObject* get(const UnicodeString& descriptor, UnicodeString* actualReturn, UErrorCode& status) const; + + /** + * <p>Convenience override for get(ICUServiceKey&, UnicodeString*).</p> + * + * @param key the key. + * @param status the error code status. + * @return the service instance, or NULL. + */ + UObject* getKey(ICUServiceKey& key, UErrorCode& status) const; + + /** + * <p>Given a key, return a service object, and, if actualReturn + * is not NULL, the descriptor with which it was found in the + * first element of actualReturn. If no service object matches + * this key, returns NULL and leaves actualReturn unchanged.</p> + * + * <p>This queries the cache using the key's descriptor, and if no + * object in the cache matches, tries the key on each + * registered factory, in order. If none generates a service + * object for the key, repeats the process with each fallback of + * the key, until either a factory returns a service object, or the key + * has no fallback. If no object is found, the result of handleDefault + * is returned.</p> + * + * <p>Subclasses can override this method to further customize the + * result before returning it. + * + * @param key the key. + * @param actualReturn a pointer to a UnicodeString to hold the matched descriptor, or NULL. + * @param status the error code status. + * @return the service instance, or NULL. + */ + virtual UObject* getKey(ICUServiceKey& key, UnicodeString* actualReturn, UErrorCode& status) const; + + /** + * <p>This version of getKey is only called by ICUServiceFactories within the scope + * of a previous getKey call, to determine what previously-registered factories would + * have returned. For details, see getKey(ICUServiceKey&, UErrorCode&). Subclasses + * should not call it directly, but call through one of the other get functions.</p> + * + * @param key the key. + * @param actualReturn a pointer to a UnicodeString to hold the matched descriptor, or NULL. + * @param factory the factory making the recursive call. + * @param status the error code status. + * @return the service instance, or NULL. + */ + UObject* getKey(ICUServiceKey& key, UnicodeString* actualReturn, const ICUServiceFactory* factory, UErrorCode& status) const; + + /** + * <p>Convenience override for getVisibleIDs(String) that passes null + * as the fallback, thus returning all visible IDs.</p> + * + * @param result a vector to hold the returned IDs. + * @param status the error code status. + * @return the result vector. + */ + UVector& getVisibleIDs(UVector& result, UErrorCode& status) const; + + /** + * <p>Return a snapshot of the visible IDs for this service. This + * list will not change as ICUServiceFactories are added or removed, but the + * supported IDs will, so there is no guarantee that all and only + * the IDs in the returned list will be visible and supported by the + * service in subsequent calls.</p> + * + * <p>The IDs are returned as pointers to UnicodeStrings. The + * caller owns the IDs. Previous contents of result are discarded before + * new elements, if any, are added.</p> + * + * <p>matchID is passed to createKey to create a key. If the key + * is not NULL, its isFallbackOf method is used to filter out IDs + * that don't match the key or have it as a fallback.</p> + * + * @param result a vector to hold the returned IDs. + * @param matchID an ID used to filter the result, or NULL if all IDs are desired. + * @param status the error code status. + * @return the result vector. + */ + UVector& getVisibleIDs(UVector& result, const UnicodeString* matchID, UErrorCode& status) const; + + /** + * <p>Convenience override for getDisplayName(const UnicodeString&, const Locale&, UnicodeString&) that + * uses the current default locale.</p> + * + * @param id the ID for which to retrieve the localized displayName. + * @param result an output parameter to hold the display name. + * @return the modified result. + */ + UnicodeString& getDisplayName(const UnicodeString& id, UnicodeString& result) const; + + /** + * <p>Given a visible ID, return the display name in the requested locale. + * If there is no directly supported ID corresponding to this ID, result is + * set to bogus.</p> + * + * @param id the ID for which to retrieve the localized displayName. + * @param result an output parameter to hold the display name. + * @param locale the locale in which to localize the ID. + * @return the modified result. + */ + UnicodeString& getDisplayName(const UnicodeString& id, UnicodeString& result, const Locale& locale) const; + + /** + * <p>Convenience override of getDisplayNames(const Locale&, const UnicodeString*) that + * uses the current default Locale as the locale and NULL for + * the matchID.</p> + * + * @param result a vector to hold the returned displayName/id StringPairs. + * @param status the error code status. + * @return the modified result vector. + */ + UVector& getDisplayNames(UVector& result, UErrorCode& status) const; + + /** + * <p>Convenience override of getDisplayNames(const Locale&, const UnicodeString*) that + * uses NULL for the matchID.</p> + * + * @param result a vector to hold the returned displayName/id StringPairs. + * @param locale the locale in which to localize the ID. + * @param status the error code status. + * @return the modified result vector. + */ + UVector& getDisplayNames(UVector& result, const Locale& locale, UErrorCode& status) const; + + /** + * <p>Return a snapshot of the mapping from display names to visible + * IDs for this service. This set will not change as factories + * are added or removed, but the supported IDs will, so there is + * no guarantee that all and only the IDs in the returned map will + * be visible and supported by the service in subsequent calls, + * nor is there any guarantee that the current display names match + * those in the result.</p> + * + * <p>The names are returned as pointers to StringPairs, which + * contain both the displayName and the corresponding ID. The + * caller owns the StringPairs. Previous contents of result are + * discarded before new elements, if any, are added.</p> + * + * <p>matchID is passed to createKey to create a key. If the key + * is not NULL, its isFallbackOf method is used to filter out IDs + * that don't match the key or have it as a fallback.</p> + * + * @param result a vector to hold the returned displayName/id StringPairs. + * @param locale the locale in which to localize the ID. + * @param matchID an ID used to filter the result, or NULL if all IDs are desired. + * @param status the error code status. + * @return the result vector. */ + UVector& getDisplayNames(UVector& result, + const Locale& locale, + const UnicodeString* matchID, + UErrorCode& status) const; + + /** + * <p>A convenience override of registerInstance(UObject*, const UnicodeString&, UBool) + * that defaults visible to true.</p> + * + * @param objToAdopt the object to register and adopt. + * @param id the ID to assign to this object. + * @param status the error code status. + * @return a registry key that can be passed to unregister to unregister + * (and discard) this instance. + */ + URegistryKey registerInstance(UObject* objToAdopt, const UnicodeString& id, UErrorCode& status); + + /** + * <p>Register a service instance with the provided ID. The ID will be + * canonicalized. The canonicalized ID will be returned by + * getVisibleIDs if visible is true. The service instance will be adopted and + * must not be modified subsequent to this call.</p> + * + * <p>This issues a serviceChanged notification to registered listeners.</p> + * + * <p>This implementation wraps the object using + * createSimpleFactory, and calls registerFactory.</p> + * + * @param objToAdopt the object to register and adopt. + * @param id the ID to assign to this object. + * @param visible true if getVisibleIDs is to return this ID. + * @param status the error code status. + * @return a registry key that can be passed to unregister() to unregister + * (and discard) this instance. + */ + virtual URegistryKey registerInstance(UObject* objToAdopt, const UnicodeString& id, UBool visible, UErrorCode& status); + + /** + * <p>Register an ICUServiceFactory. Returns a registry key that + * can be used to unregister the factory. The factory + * must not be modified subsequent to this call. The service owns + * all registered factories. In case of an error, the factory is + * deleted.</p> + * + * <p>This issues a serviceChanged notification to registered listeners.</p> + * + * <p>The default implementation accepts all factories.</p> + * + * @param factoryToAdopt the factory to register and adopt. + * @param status the error code status. + * @return a registry key that can be passed to unregister to unregister + * (and discard) this factory. + */ + virtual URegistryKey registerFactory(ICUServiceFactory* factoryToAdopt, UErrorCode& status); + + /** + * <p>Unregister a factory using a registry key returned by + * registerInstance or registerFactory. After a successful call, + * the factory will be removed from the service factory list and + * deleted, and the key becomes invalid.</p> + * + * <p>This issues a serviceChanged notification to registered + * listeners.</p> + * + * @param rkey the registry key. + * @param status the error code status. + * @return true if the call successfully unregistered the factory. + */ + virtual UBool unregister(URegistryKey rkey, UErrorCode& status); + + /** + * </p>Reset the service to the default factories. The factory + * lock is acquired and then reInitializeFactories is called.</p> + * + * <p>This issues a serviceChanged notification to registered listeners.</p> + */ + virtual void reset(void); + + /** + * <p>Return true if the service is in its default state.</p> + * + * <p>The default implementation returns true if there are no + * factories registered.</p> + */ + virtual UBool isDefault(void) const; + + /** + * <p>Create a key from an ID. If ID is NULL, returns NULL.</p> + * + * <p>The default implementation creates an ICUServiceKey instance. + * Subclasses can override to define more useful keys appropriate + * to the factories they accept.</p> + * + * @param a pointer to the ID for which to create a default ICUServiceKey. + * @param status the error code status. + * @return the ICUServiceKey corresponding to ID, or NULL. + */ + virtual ICUServiceKey* createKey(const UnicodeString* id, UErrorCode& status) const; + + /** + * <p>Clone object so that caller can own the copy. In ICU2.4, UObject doesn't define + * clone, so we need an instance-aware method that knows how to do this. + * This is public so factories can call it, but should really be protected.</p> + * + * @param instance the service instance to clone. + * @return a clone of the passed-in instance, or NULL if cloning was unsuccessful. + */ + virtual UObject* cloneInstance(UObject* instance) const = 0; + + + /************************************************************************ + * Subclassing API + */ + + protected: + + /** + * <p>Create a factory that wraps a single service object. Called by registerInstance.</p> + * + * <p>The default implementation returns an instance of SimpleFactory.</p> + * + * @param instanceToAdopt the service instance to adopt. + * @param id the ID to assign to this service instance. + * @param visible if true, the ID will be visible. + * @param status the error code status. + * @return an instance of ICUServiceFactory that maps this instance to the provided ID. + */ + virtual ICUServiceFactory* createSimpleFactory(UObject* instanceToAdopt, const UnicodeString& id, UBool visible, UErrorCode& status); + + /** + * <p>Reinitialize the factory list to its default state. After this call, isDefault() + * must return true.</p> + * + * <p>This issues a serviceChanged notification to registered listeners.</p> + * + * <p>The default implementation clears the factory list. + * Subclasses can override to provide other default initialization + * of the factory list. Subclasses must not call this method + * directly, since it must only be called while holding write + * access to the factory list.</p> + */ + virtual void reInitializeFactories(void); + + /** + * <p>Default handler for this service if no factory in the factory list + * handled the key passed to getKey.</p> + * + * <p>The default implementation returns NULL.</p> + * + * @param key the key. + * @param actualReturn a pointer to a UnicodeString to hold the matched descriptor, or NULL. + * @param status the error code status. + * @return the service instance, or NULL. + */ + virtual UObject* handleDefault(const ICUServiceKey& key, UnicodeString* actualReturn, UErrorCode& status) const; + + /** + * <p>Clear caches maintained by this service.</p> + * + * <p>Subclasses can override if they implement additional caches + * that need to be cleared when the service changes. Subclasses + * should generally not call this method directly, as it must only + * be called while synchronized on the factory lock.</p> + */ + virtual void clearCaches(void); + + /** + * <p>Return true if the listener is accepted.</p> + * + * <p>The default implementation accepts the listener if it is + * a ServiceListener. Subclasses can override this to accept + * different listeners.</p> + * + * @param l the listener to test. + * @return true if the service accepts the listener. + */ + virtual UBool acceptsListener(const EventListener& l) const; + + /** + * <p>Notify the listener of a service change.</p> + * + * <p>The default implementation assumes a ServiceListener. + * If acceptsListener has been overridden to accept different + * listeners, this should be overridden as well.</p> + * + * @param l the listener to notify. + */ + virtual void notifyListener(EventListener& l) const; + + /************************************************************************ + * Utilities for subclasses. + */ + + /** + * <p>Clear only the service cache.</p> + * + * <p>This can be called by subclasses when a change affects the service + * cache but not the ID caches, e.g., when the default locale changes + * the resolution of IDs also changes, requiring the cache to be + * flushed, but not the visible IDs themselves.</p> + */ + void clearServiceCache(void); + + /** + * <p>Return a map from visible IDs to factories. + * This must only be called when the mutex is held.</p> + * + * @param status the error code status. + * @return a Hashtable containing mappings from visible + * IDs to factories. + */ + const Hashtable* getVisibleIDMap(UErrorCode& status) const; + + /** + * <p>Allow subclasses to read the time stamp.</p> + * + * @return the timestamp. + */ + int32_t getTimestamp(void) const; + + /** + * <p>Return the number of registered factories.</p> + * + * @return the number of factories registered at the time of the call. + */ + int32_t countFactories(void) const; + +private: + + friend class ::ICUServiceTest; // give tests access to countFactories. +}; + +U_NAMESPACE_END + + /* UCONFIG_NO_SERVICE */ +#endif + + /* ICUSERV_H */ +#endif + diff --git a/thirdparty/icu4c/common/servlk.cpp b/thirdparty/icu4c/common/servlk.cpp new file mode 100644 index 0000000000..538982ca36 --- /dev/null +++ b/thirdparty/icu4c/common/servlk.cpp @@ -0,0 +1,188 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/** + ******************************************************************************* + * Copyright (C) 2001-2014, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + * + ******************************************************************************* + */ +#include "unicode/utypes.h" + +#if !UCONFIG_NO_SERVICE + +#include "unicode/resbund.h" +#include "uresimp.h" +#include "cmemory.h" +#include "servloc.h" +#include "ustrfmt.h" +#include "uhash.h" +#include "charstr.h" +#include "uassert.h" + +#define UNDERSCORE_CHAR ((UChar)0x005f) +#define AT_SIGN_CHAR ((UChar)64) +#define PERIOD_CHAR ((UChar)46) + +U_NAMESPACE_BEGIN + +LocaleKey* +LocaleKey::createWithCanonicalFallback(const UnicodeString* primaryID, + const UnicodeString* canonicalFallbackID, + UErrorCode& status) +{ + return LocaleKey::createWithCanonicalFallback(primaryID, canonicalFallbackID, KIND_ANY, status); +} + +LocaleKey* +LocaleKey::createWithCanonicalFallback(const UnicodeString* primaryID, + const UnicodeString* canonicalFallbackID, + int32_t kind, + UErrorCode& status) +{ + if (primaryID == NULL || U_FAILURE(status)) { + return NULL; + } + UnicodeString canonicalPrimaryID; + LocaleUtility::canonicalLocaleString(primaryID, canonicalPrimaryID); + return new LocaleKey(*primaryID, canonicalPrimaryID, canonicalFallbackID, kind); +} + +LocaleKey::LocaleKey(const UnicodeString& primaryID, + const UnicodeString& canonicalPrimaryID, + const UnicodeString* canonicalFallbackID, + int32_t kind) + : ICUServiceKey(primaryID) + , _kind(kind) + , _primaryID(canonicalPrimaryID) + , _fallbackID() + , _currentID() +{ + _fallbackID.setToBogus(); + if (_primaryID.length() != 0) { + if (canonicalFallbackID != NULL && _primaryID != *canonicalFallbackID) { + _fallbackID = *canonicalFallbackID; + } + } + + _currentID = _primaryID; +} + +LocaleKey::~LocaleKey() {} + +UnicodeString& +LocaleKey::prefix(UnicodeString& result) const { + if (_kind != KIND_ANY) { + UChar buffer[64]; + uprv_itou(buffer, 64, _kind, 10, 0); + UnicodeString temp(buffer); + result.append(temp); + } + return result; +} + +int32_t +LocaleKey::kind() const { + return _kind; +} + +UnicodeString& +LocaleKey::canonicalID(UnicodeString& result) const { + return result.append(_primaryID); +} + +UnicodeString& +LocaleKey::currentID(UnicodeString& result) const { + if (!_currentID.isBogus()) { + result.append(_currentID); + } + return result; +} + +UnicodeString& +LocaleKey::currentDescriptor(UnicodeString& result) const { + if (!_currentID.isBogus()) { + prefix(result).append(PREFIX_DELIMITER).append(_currentID); + } else { + result.setToBogus(); + } + return result; +} + +Locale& +LocaleKey::canonicalLocale(Locale& result) const { + return LocaleUtility::initLocaleFromName(_primaryID, result); +} + +Locale& +LocaleKey::currentLocale(Locale& result) const { + return LocaleUtility::initLocaleFromName(_currentID, result); +} + +UBool +LocaleKey::fallback() { + if (!_currentID.isBogus()) { + int x = _currentID.lastIndexOf(UNDERSCORE_CHAR); + if (x != -1) { + _currentID.remove(x); // truncate current or fallback, whichever we're pointing to + return TRUE; + } + + if (!_fallbackID.isBogus()) { + _currentID = _fallbackID; + _fallbackID.setToBogus(); + return TRUE; + } + + if (_currentID.length() > 0) { + _currentID.remove(0); // completely truncate + return TRUE; + } + + _currentID.setToBogus(); + } + + return FALSE; +} + +UBool +LocaleKey::isFallbackOf(const UnicodeString& id) const { + UnicodeString temp(id); + parseSuffix(temp); + return temp.indexOf(_primaryID) == 0 && + (temp.length() == _primaryID.length() || + temp.charAt(_primaryID.length()) == UNDERSCORE_CHAR); +} + +#ifdef SERVICE_DEBUG +UnicodeString& +LocaleKey::debug(UnicodeString& result) const +{ + ICUServiceKey::debug(result); + result.append((UnicodeString)" kind: "); + result.append(_kind); + result.append((UnicodeString)" primaryID: "); + result.append(_primaryID); + result.append((UnicodeString)" fallbackID: "); + result.append(_fallbackID); + result.append((UnicodeString)" currentID: "); + result.append(_currentID); + return result; +} + +UnicodeString& +LocaleKey::debugClass(UnicodeString& result) const +{ + return result.append((UnicodeString)"LocaleKey "); +} +#endif + +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(LocaleKey) + +U_NAMESPACE_END + +/* !UCONFIG_NO_SERVICE */ +#endif + + diff --git a/thirdparty/icu4c/common/servlkf.cpp b/thirdparty/icu4c/common/servlkf.cpp new file mode 100644 index 0000000000..84f2347cdd --- /dev/null +++ b/thirdparty/icu4c/common/servlkf.cpp @@ -0,0 +1,152 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/** + ******************************************************************************* + * Copyright (C) 2001-2014, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + * + ******************************************************************************* + */ +#include "unicode/utypes.h" + +#if !UCONFIG_NO_SERVICE + +#include "unicode/resbund.h" +#include "uresimp.h" +#include "cmemory.h" +#include "servloc.h" +#include "ustrfmt.h" +#include "uhash.h" +#include "charstr.h" +#include "ucln_cmn.h" +#include "uassert.h" + +#define UNDERSCORE_CHAR ((UChar)0x005f) +#define AT_SIGN_CHAR ((UChar)64) +#define PERIOD_CHAR ((UChar)46) + + +U_NAMESPACE_BEGIN + +LocaleKeyFactory::LocaleKeyFactory(int32_t coverage) + : _name() + , _coverage(coverage) +{ +} + +LocaleKeyFactory::LocaleKeyFactory(int32_t coverage, const UnicodeString& name) + : _name(name) + , _coverage(coverage) +{ +} + +LocaleKeyFactory::~LocaleKeyFactory() { +} + +UObject* +LocaleKeyFactory::create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const { + if (handlesKey(key, status)) { + const LocaleKey& lkey = (const LocaleKey&)key; + int32_t kind = lkey.kind(); + Locale loc; + lkey.currentLocale(loc); + + return handleCreate(loc, kind, service, status); + } + return NULL; +} + +UBool +LocaleKeyFactory::handlesKey(const ICUServiceKey& key, UErrorCode& status) const { + const Hashtable* supported = getSupportedIDs(status); + if (supported) { + UnicodeString id; + key.currentID(id); + return supported->get(id) != NULL; + } + return FALSE; +} + +void +LocaleKeyFactory::updateVisibleIDs(Hashtable& result, UErrorCode& status) const { + const Hashtable* supported = getSupportedIDs(status); + if (supported) { + UBool visible = (_coverage & 0x1) == 0; + const UHashElement* elem = NULL; + int32_t pos = UHASH_FIRST; + while ((elem = supported->nextElement(pos)) != NULL) { + const UnicodeString& id = *((const UnicodeString*)elem->key.pointer); + if (!visible) { + result.remove(id); + } else { + result.put(id, (void*)this, status); // this is dummy non-void marker used for set semantics + if (U_FAILURE(status)) { + break; + } + } + } + } +} + +UnicodeString& +LocaleKeyFactory::getDisplayName(const UnicodeString& id, const Locale& locale, UnicodeString& result) const { + if ((_coverage & 0x1) == 0) { + //UErrorCode status = U_ZERO_ERROR; + // assume if this is called on us, we support some fallback of this id + // if (isSupportedID(id, status)) { + Locale loc; + LocaleUtility::initLocaleFromName(id, loc); + return loc.getDisplayName(locale, result); + // } + } + result.setToBogus(); + return result; +} + +UObject* +LocaleKeyFactory::handleCreate(const Locale& /* loc */, + int32_t /* kind */, + const ICUService* /* service */, + UErrorCode& /* status */) const { + return NULL; +} + +//UBool +//LocaleKeyFactory::isSupportedID(const UnicodeString& id, UErrorCode& status) const { +// const Hashtable* ids = getSupportedIDs(status); +// return ids && ids->get(id); +//} + +const Hashtable* +LocaleKeyFactory::getSupportedIDs(UErrorCode& /* status */) const { + return NULL; +} + +#ifdef SERVICE_DEBUG +UnicodeString& +LocaleKeyFactory::debug(UnicodeString& result) const +{ + debugClass(result); + result.append((UnicodeString)", name: "); + result.append(_name); + result.append((UnicodeString)", coverage: "); + result.append(_coverage); + return result; +} + +UnicodeString& +LocaleKeyFactory::debugClass(UnicodeString& result) const +{ + return result.append((UnicodeString)"LocaleKeyFactory"); +} +#endif + +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(LocaleKeyFactory) + +U_NAMESPACE_END + +/* !UCONFIG_NO_SERVICE */ +#endif + + diff --git a/thirdparty/icu4c/common/servloc.h b/thirdparty/icu4c/common/servloc.h new file mode 100644 index 0000000000..ccf6433379 --- /dev/null +++ b/thirdparty/icu4c/common/servloc.h @@ -0,0 +1,551 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/** + ******************************************************************************* + * Copyright (C) 2001-2011, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + * + ******************************************************************************* + */ +#ifndef ICULSERV_H +#define ICULSERV_H + +#include "unicode/utypes.h" + +#if UCONFIG_NO_SERVICE + +U_NAMESPACE_BEGIN + +/* + * Allow the declaration of APIs with pointers to ICUService + * even when service is removed from the build. + */ +class ICULocaleService; + +U_NAMESPACE_END + +#else + +#include "unicode/unistr.h" +#include "unicode/locid.h" +#include "unicode/strenum.h" + +#include "hash.h" +#include "uvector.h" + +#include "serv.h" +#include "locutil.h" + +U_NAMESPACE_BEGIN + +class ICULocaleService; + +class LocaleKey; +class LocaleKeyFactory; +class SimpleLocaleKeyFactory; +class ServiceListener; + +/* + ****************************************************************** + */ + +/** + * A subclass of Key that implements a locale fallback mechanism. + * The first locale to search for is the locale provided by the + * client, and the fallback locale to search for is the current + * default locale. If a prefix is present, the currentDescriptor + * includes it before the locale proper, separated by "/". This + * is the default key instantiated by ICULocaleService.</p> + * + * <p>Canonicalization adjusts the locale string so that the + * section before the first understore is in lower case, and the rest + * is in upper case, with no trailing underscores.</p> + */ + +class U_COMMON_API LocaleKey : public ICUServiceKey { + private: + int32_t _kind; + UnicodeString _primaryID; + UnicodeString _fallbackID; + UnicodeString _currentID; + + public: + enum { + KIND_ANY = -1 + }; + + /** + * Create a LocaleKey with canonical primary and fallback IDs. + */ + static LocaleKey* createWithCanonicalFallback(const UnicodeString* primaryID, + const UnicodeString* canonicalFallbackID, + UErrorCode& status); + + /** + * Create a LocaleKey with canonical primary and fallback IDs. + */ + static LocaleKey* createWithCanonicalFallback(const UnicodeString* primaryID, + const UnicodeString* canonicalFallbackID, + int32_t kind, + UErrorCode& status); + + protected: + /** + * PrimaryID is the user's requested locale string, + * canonicalPrimaryID is this string in canonical form, + * fallbackID is the current default locale's string in + * canonical form. + */ + LocaleKey(const UnicodeString& primaryID, + const UnicodeString& canonicalPrimaryID, + const UnicodeString* canonicalFallbackID, + int32_t kind); + + public: + /** + * Append the prefix associated with the kind, or nothing if the kind is KIND_ANY. + */ + virtual UnicodeString& prefix(UnicodeString& result) const; + + /** + * Return the kind code associated with this key. + */ + virtual int32_t kind() const; + + /** + * Return the canonicalID. + */ + virtual UnicodeString& canonicalID(UnicodeString& result) const; + + /** + * Return the currentID. + */ + virtual UnicodeString& currentID(UnicodeString& result) const; + + /** + * Return the (canonical) current descriptor, or null if no current id. + */ + virtual UnicodeString& currentDescriptor(UnicodeString& result) const; + + /** + * Convenience method to return the locale corresponding to the (canonical) original ID. + */ + virtual Locale& canonicalLocale(Locale& result) const; + + /** + * Convenience method to return the locale corresponding to the (canonical) current ID. + */ + virtual Locale& currentLocale(Locale& result) const; + + /** + * If the key has a fallback, modify the key and return true, + * otherwise return false.</p> + * + * <p>First falls back through the primary ID, then through + * the fallbackID. The final fallback is the empty string, + * unless the primary id was the empty string, in which case + * there is no fallback. + */ + virtual UBool fallback(); + + /** + * Return true if a key created from id matches, or would eventually + * fallback to match, the canonical ID of this key. + */ + virtual UBool isFallbackOf(const UnicodeString& id) const; + + public: + /** + * UObject boilerplate. + */ + static UClassID U_EXPORT2 getStaticClassID(); + + virtual UClassID getDynamicClassID() const; + + /** + * Destructor. + */ + virtual ~LocaleKey(); + +#ifdef SERVICE_DEBUG + public: + virtual UnicodeString& debug(UnicodeString& result) const; + virtual UnicodeString& debugClass(UnicodeString& result) const; +#endif + +}; + +/* + ****************************************************************** + */ + +/** + * A subclass of ICUServiceFactory that uses LocaleKeys, and is able to + * 'cover' more specific locales with more general locales that it + * supports. + * + * <p>Coverage may be either of the values VISIBLE or INVISIBLE. + * + * <p>'Visible' indicates that the specific locale(s) supported by + * the factory are registered in getSupportedIDs, 'Invisible' + * indicates that they are not. + * + * <p>Localization of visible ids is handled + * by the handling factory, regardless of kind. + */ +class U_COMMON_API LocaleKeyFactory : public ICUServiceFactory { +protected: + const UnicodeString _name; + const int32_t _coverage; + +public: + enum { + /** + * Coverage value indicating that the factory makes + * its locales visible, and does not cover more specific + * locales. + */ + VISIBLE = 0, + + /** + * Coverage value indicating that the factory does not make + * its locales visible, and does not cover more specific + * locales. + */ + INVISIBLE = 1 + }; + + /** + * Destructor. + */ + virtual ~LocaleKeyFactory(); + +protected: + /** + * Constructor used by subclasses. + */ + LocaleKeyFactory(int32_t coverage); + + /** + * Constructor used by subclasses. + */ + LocaleKeyFactory(int32_t coverage, const UnicodeString& name); + + /** + * Implement superclass abstract method. This checks the currentID of + * the key against the supported IDs, and passes the canonicalLocale and + * kind off to handleCreate (which subclasses must implement). + */ +public: + virtual UObject* create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const; + +protected: + virtual UBool handlesKey(const ICUServiceKey& key, UErrorCode& status) const; + +public: + /** + * Override of superclass method. This adjusts the result based + * on the coverage rule for this factory. + */ + virtual void updateVisibleIDs(Hashtable& result, UErrorCode& status) const; + + /** + * Return a localized name for the locale represented by id. + */ + virtual UnicodeString& getDisplayName(const UnicodeString& id, const Locale& locale, UnicodeString& result) const; + +protected: + /** + * Utility method used by create(ICUServiceKey, ICUService). Subclasses can implement + * this instead of create. The default returns NULL. + */ + virtual UObject* handleCreate(const Locale& loc, int32_t kind, const ICUService* service, UErrorCode& status) const; + + /** + * Return true if this id is one the factory supports (visible or + * otherwise). + */ + // virtual UBool isSupportedID(const UnicodeString& id, UErrorCode& status) const; + + /** + * Return the set of ids that this factory supports (visible or + * otherwise). This can be called often and might need to be + * cached if it is expensive to create. + */ + virtual const Hashtable* getSupportedIDs(UErrorCode& status) const; + +public: + /** + * UObject boilerplate. + */ + static UClassID U_EXPORT2 getStaticClassID(); + + virtual UClassID getDynamicClassID() const; + +#ifdef SERVICE_DEBUG + public: + virtual UnicodeString& debug(UnicodeString& result) const; + virtual UnicodeString& debugClass(UnicodeString& result) const; +#endif + +}; + +/* + ****************************************************************** + */ + +/** + * A LocaleKeyFactory that just returns a single object for a kind/locale. + */ + +class U_COMMON_API SimpleLocaleKeyFactory : public LocaleKeyFactory { + private: + UObject* _obj; + UnicodeString _id; + const int32_t _kind; + + public: + SimpleLocaleKeyFactory(UObject* objToAdopt, + const UnicodeString& locale, + int32_t kind, + int32_t coverage); + + SimpleLocaleKeyFactory(UObject* objToAdopt, + const Locale& locale, + int32_t kind, + int32_t coverage); + + /** + * Destructor. + */ + virtual ~SimpleLocaleKeyFactory(); + + /** + * Override of superclass method. Returns the service object if kind/locale match. Service is not used. + */ + virtual UObject* create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const; + + /** + * Override of superclass method. This adjusts the result based + * on the coverage rule for this factory. + */ + virtual void updateVisibleIDs(Hashtable& result, UErrorCode& status) const; + + protected: + /** + * Return true if this id is equal to the locale name. + */ + //virtual UBool isSupportedID(const UnicodeString& id, UErrorCode& status) const; + + +public: + /** + * UObject boilerplate. + */ + static UClassID U_EXPORT2 getStaticClassID(); + + virtual UClassID getDynamicClassID() const; + +#ifdef SERVICE_DEBUG + public: + virtual UnicodeString& debug(UnicodeString& result) const; + virtual UnicodeString& debugClass(UnicodeString& result) const; +#endif + +}; + +/* + ****************************************************************** + */ + +/** + * A LocaleKeyFactory that creates a service based on the ICU locale data. + * This is a base class for most ICU factories. Subclasses instantiate it + * with a constructor that takes a bundle name, which determines the supported + * IDs. Subclasses then override handleCreate to create the actual service + * object. The default implementation returns a resource bundle. + */ +class U_COMMON_API ICUResourceBundleFactory : public LocaleKeyFactory +{ + protected: + UnicodeString _bundleName; + + public: + /** + * Convenience constructor that uses the main ICU bundle name. + */ + ICUResourceBundleFactory(); + + /** + * A service factory based on ICU resource data in resources with + * the given name. This should be a 'path' that can be passed to + * ures_openAvailableLocales, such as U_ICUDATA or U_ICUDATA_COLL. + * The empty string is equivalent to U_ICUDATA. + */ + ICUResourceBundleFactory(const UnicodeString& bundleName); + + /** + * Destructor + */ + virtual ~ICUResourceBundleFactory(); + +protected: + /** + * Return the supported IDs. This is the set of all locale names in ICULocaleData. + */ + virtual const Hashtable* getSupportedIDs(UErrorCode& status) const; + + /** + * Create the service. The default implementation returns the resource bundle + * for the locale, ignoring kind, and service. + */ + virtual UObject* handleCreate(const Locale& loc, int32_t kind, const ICUService* service, UErrorCode& status) const; + +public: + /** + * UObject boilerplate. + */ + static UClassID U_EXPORT2 getStaticClassID(); + virtual UClassID getDynamicClassID() const; + + +#ifdef SERVICE_DEBUG + public: + virtual UnicodeString& debug(UnicodeString& result) const; + virtual UnicodeString& debugClass(UnicodeString& result) const; +#endif + +}; + +/* + ****************************************************************** + */ + +class U_COMMON_API ICULocaleService : public ICUService +{ + private: + Locale fallbackLocale; + UnicodeString fallbackLocaleName; + + public: + /** + * Construct an ICULocaleService. + */ + ICULocaleService(); + + /** + * Construct an ICULocaleService with a name (useful for debugging). + */ + ICULocaleService(const UnicodeString& name); + + /** + * Destructor. + */ + virtual ~ICULocaleService(); + +#if 0 + // redeclare because of overload resolution rules? + // no, causes ambiguities since both UnicodeString and Locale have constructors that take a const char* + // need some compiler flag to remove warnings + UObject* get(const UnicodeString& descriptor, UErrorCode& status) const { + return ICUService::get(descriptor, status); + } + + UObject* get(const UnicodeString& descriptor, UnicodeString* actualReturn, UErrorCode& status) const { + return ICUService::get(descriptor, actualReturn, status); + } +#endif + + /** + * Convenience override for callers using locales. This calls + * get(Locale, int, Locale[]) with KIND_ANY for kind and null for + * actualReturn. + */ + UObject* get(const Locale& locale, UErrorCode& status) const; + + /** + * Convenience override for callers using locales. This calls + * get(Locale, int, Locale[]) with a null actualReturn. + */ + UObject* get(const Locale& locale, int32_t kind, UErrorCode& status) const; + + /** + * Convenience override for callers using locales. This calls + * get(Locale, String, Locale[]) with a null kind. + */ + UObject* get(const Locale& locale, Locale* actualReturn, UErrorCode& status) const; + + /** + * Convenience override for callers using locales. This uses + * createKey(Locale.toString(), kind) to create a key, calls getKey, and then + * if actualReturn is not null, returns the actualResult from + * getKey (stripping any prefix) into a Locale. + */ + UObject* get(const Locale& locale, int32_t kind, Locale* actualReturn, UErrorCode& status) const; + + /** + * Convenience override for callers using locales. This calls + * registerObject(Object, Locale, int32_t kind, int coverage) + * passing KIND_ANY for the kind, and VISIBLE for the coverage. + */ + virtual URegistryKey registerInstance(UObject* objToAdopt, const Locale& locale, UErrorCode& status); + + /** + * Convenience function for callers using locales. This calls + * registerObject(Object, Locale, int kind, int coverage) + * passing VISIBLE for the coverage. + */ + virtual URegistryKey registerInstance(UObject* objToAdopt, const Locale& locale, int32_t kind, UErrorCode& status); + + /** + * Convenience function for callers using locales. This instantiates + * a SimpleLocaleKeyFactory, and registers the factory. + */ + virtual URegistryKey registerInstance(UObject* objToAdopt, const Locale& locale, int32_t kind, int32_t coverage, UErrorCode& status); + + + /** + * (Stop compiler from complaining about hidden overrides.) + * Since both UnicodeString and Locale have constructors that take const char*, adding a public + * method that takes UnicodeString causes ambiguity at call sites that use const char*. + * We really need a flag that is understood by all compilers that will suppress the warning about + * hidden overrides. + */ + virtual URegistryKey registerInstance(UObject* objToAdopt, const UnicodeString& locale, UBool visible, UErrorCode& status); + + /** + * Convenience method for callers using locales. This returns the standard + * service ID enumeration. + */ + virtual StringEnumeration* getAvailableLocales(void) const; + + protected: + + /** + * Return the name of the current fallback locale. If it has changed since this was + * last accessed, the service cache is cleared. + */ + const UnicodeString& validateFallbackLocale() const; + + /** + * Override superclass createKey method. + */ + virtual ICUServiceKey* createKey(const UnicodeString* id, UErrorCode& status) const; + + /** + * Additional createKey that takes a kind. + */ + virtual ICUServiceKey* createKey(const UnicodeString* id, int32_t kind, UErrorCode& status) const; + + friend class ServiceEnumeration; +}; + +U_NAMESPACE_END + + /* UCONFIG_NO_SERVICE */ +#endif + + /* ICULSERV_H */ +#endif + diff --git a/thirdparty/icu4c/common/servls.cpp b/thirdparty/icu4c/common/servls.cpp new file mode 100644 index 0000000000..81dc4f750e --- /dev/null +++ b/thirdparty/icu4c/common/servls.cpp @@ -0,0 +1,295 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/** + ******************************************************************************* + * Copyright (C) 2001-2014, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + * + ******************************************************************************* + */ +#include "unicode/utypes.h" + +#if !UCONFIG_NO_SERVICE + +#include "unicode/resbund.h" +#include "uresimp.h" +#include "cmemory.h" +#include "servloc.h" +#include "ustrfmt.h" +#include "charstr.h" +#include "uassert.h" + +#define UNDERSCORE_CHAR ((UChar)0x005f) +#define AT_SIGN_CHAR ((UChar)64) +#define PERIOD_CHAR ((UChar)46) + +U_NAMESPACE_BEGIN + +ICULocaleService::ICULocaleService() + : fallbackLocale(Locale::getDefault()) +{ +} + +ICULocaleService::ICULocaleService(const UnicodeString& dname) + : ICUService(dname) + , fallbackLocale(Locale::getDefault()) +{ +} + +ICULocaleService::~ICULocaleService() +{ +} + +UObject* +ICULocaleService::get(const Locale& locale, UErrorCode& status) const +{ + return get(locale, LocaleKey::KIND_ANY, NULL, status); +} + +UObject* +ICULocaleService::get(const Locale& locale, int32_t kind, UErrorCode& status) const +{ + return get(locale, kind, NULL, status); +} + +UObject* +ICULocaleService::get(const Locale& locale, Locale* actualReturn, UErrorCode& status) const +{ + return get(locale, LocaleKey::KIND_ANY, actualReturn, status); +} + +UObject* +ICULocaleService::get(const Locale& locale, int32_t kind, Locale* actualReturn, UErrorCode& status) const +{ + UObject* result = NULL; + if (U_FAILURE(status)) { + return result; + } + + UnicodeString locName(locale.getName(), -1, US_INV); + if (locName.isBogus()) { + status = U_MEMORY_ALLOCATION_ERROR; + } else { + ICUServiceKey* key = createKey(&locName, kind, status); + if (key) { + if (actualReturn == NULL) { + result = getKey(*key, status); + } else { + UnicodeString temp; + result = getKey(*key, &temp, status); + + if (result != NULL) { + key->parseSuffix(temp); + LocaleUtility::initLocaleFromName(temp, *actualReturn); + } + } + delete key; + } + } + return result; +} + + +URegistryKey +ICULocaleService::registerInstance(UObject* objToAdopt, const UnicodeString& locale, + UBool visible, UErrorCode& status) +{ + Locale loc; + LocaleUtility::initLocaleFromName(locale, loc); + return registerInstance(objToAdopt, loc, LocaleKey::KIND_ANY, + visible ? LocaleKeyFactory::VISIBLE : LocaleKeyFactory::INVISIBLE, status); +} + +URegistryKey +ICULocaleService::registerInstance(UObject* objToAdopt, const Locale& locale, UErrorCode& status) +{ + return registerInstance(objToAdopt, locale, LocaleKey::KIND_ANY, LocaleKeyFactory::VISIBLE, status); +} + +URegistryKey +ICULocaleService::registerInstance(UObject* objToAdopt, const Locale& locale, int32_t kind, UErrorCode& status) +{ + return registerInstance(objToAdopt, locale, kind, LocaleKeyFactory::VISIBLE, status); +} + +URegistryKey +ICULocaleService::registerInstance(UObject* objToAdopt, const Locale& locale, int32_t kind, int32_t coverage, UErrorCode& status) +{ + ICUServiceFactory * factory = new SimpleLocaleKeyFactory(objToAdopt, locale, kind, coverage); + if (factory != NULL) { + return registerFactory(factory, status); + } + delete objToAdopt; + return NULL; +} + +#if 0 +URegistryKey +ICULocaleService::registerInstance(UObject* objToAdopt, const UnicodeString& locale, UErrorCode& status) +{ + return registerInstance(objToAdopt, locale, LocaleKey::KIND_ANY, LocaleKeyFactory::VISIBLE, status); +} + +URegistryKey +ICULocaleService::registerInstance(UObject* objToAdopt, const UnicodeString& locale, UBool visible, UErrorCode& status) +{ + return registerInstance(objToAdopt, locale, LocaleKey::KIND_ANY, + visible ? LocaleKeyFactory::VISIBLE : LocaleKeyFactory::INVISIBLE, + status); +} + +URegistryKey +ICULocaleService::registerInstance(UObject* objToAdopt, const UnicodeString& locale, int32_t kind, int32_t coverage, UErrorCode& status) +{ + ICUServiceFactory * factory = new SimpleLocaleKeyFactory(objToAdopt, locale, kind, coverage); + if (factory != NULL) { + return registerFactory(factory, status); + } + delete objToAdopt; + return NULL; +} +#endif + +class ServiceEnumeration : public StringEnumeration { +private: + const ICULocaleService* _service; + int32_t _timestamp; + UVector _ids; + int32_t _pos; + +private: + ServiceEnumeration(const ICULocaleService* service, UErrorCode &status) + : _service(service) + , _timestamp(service->getTimestamp()) + , _ids(uprv_deleteUObject, NULL, status) + , _pos(0) + { + _service->getVisibleIDs(_ids, status); + } + + ServiceEnumeration(const ServiceEnumeration &other, UErrorCode &status) + : _service(other._service) + , _timestamp(other._timestamp) + , _ids(uprv_deleteUObject, NULL, status) + , _pos(0) + { + if(U_SUCCESS(status)) { + int32_t i, length; + + length = other._ids.size(); + for(i = 0; i < length; ++i) { + _ids.addElement(((UnicodeString *)other._ids.elementAt(i))->clone(), status); + } + + if(U_SUCCESS(status)) { + _pos = other._pos; + } + } + } + +public: + static ServiceEnumeration* create(const ICULocaleService* service) { + UErrorCode status = U_ZERO_ERROR; + ServiceEnumeration* result = new ServiceEnumeration(service, status); + if (U_SUCCESS(status)) { + return result; + } + delete result; + return NULL; + } + + virtual ~ServiceEnumeration(); + + virtual StringEnumeration *clone() const { + UErrorCode status = U_ZERO_ERROR; + ServiceEnumeration *cl = new ServiceEnumeration(*this, status); + if(U_FAILURE(status)) { + delete cl; + cl = NULL; + } + return cl; + } + + UBool upToDate(UErrorCode& status) const { + if (U_SUCCESS(status)) { + if (_timestamp == _service->getTimestamp()) { + return TRUE; + } + status = U_ENUM_OUT_OF_SYNC_ERROR; + } + return FALSE; + } + + virtual int32_t count(UErrorCode& status) const { + return upToDate(status) ? _ids.size() : 0; + } + + virtual const UnicodeString* snext(UErrorCode& status) { + if (upToDate(status) && (_pos < _ids.size())) { + return (const UnicodeString*)_ids[_pos++]; + } + return NULL; + } + + virtual void reset(UErrorCode& status) { + if (status == U_ENUM_OUT_OF_SYNC_ERROR) { + status = U_ZERO_ERROR; + } + if (U_SUCCESS(status)) { + _timestamp = _service->getTimestamp(); + _pos = 0; + _service->getVisibleIDs(_ids, status); + } + } + +public: + static UClassID U_EXPORT2 getStaticClassID(void); + virtual UClassID getDynamicClassID(void) const; +}; + +ServiceEnumeration::~ServiceEnumeration() {} + +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ServiceEnumeration) + +StringEnumeration* +ICULocaleService::getAvailableLocales(void) const +{ + return ServiceEnumeration::create(this); +} + +const UnicodeString& +ICULocaleService::validateFallbackLocale() const +{ + const Locale& loc = Locale::getDefault(); + ICULocaleService* ncThis = (ICULocaleService*)this; + static UMutex llock; + { + Mutex mutex(&llock); + if (loc != fallbackLocale) { + ncThis->fallbackLocale = loc; + LocaleUtility::initNameFromLocale(loc, ncThis->fallbackLocaleName); + ncThis->clearServiceCache(); + } + } + return fallbackLocaleName; +} + +ICUServiceKey* +ICULocaleService::createKey(const UnicodeString* id, UErrorCode& status) const +{ + return LocaleKey::createWithCanonicalFallback(id, &validateFallbackLocale(), status); +} + +ICUServiceKey* +ICULocaleService::createKey(const UnicodeString* id, int32_t kind, UErrorCode& status) const +{ + return LocaleKey::createWithCanonicalFallback(id, &validateFallbackLocale(), kind, status); +} + +U_NAMESPACE_END + +/* !UCONFIG_NO_SERVICE */ +#endif + + diff --git a/thirdparty/icu4c/common/servnotf.cpp b/thirdparty/icu4c/common/servnotf.cpp new file mode 100644 index 0000000000..f577795cae --- /dev/null +++ b/thirdparty/icu4c/common/servnotf.cpp @@ -0,0 +1,120 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/** + ******************************************************************************* + * Copyright (C) 2001-2012, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_SERVICE + +#include "servnotf.h" +#ifdef NOTIFIER_DEBUG +#include <stdio.h> +#endif + +U_NAMESPACE_BEGIN + +EventListener::~EventListener() {} +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(EventListener) + +static UMutex notifyLock; + +ICUNotifier::ICUNotifier(void) +: listeners(NULL) +{ +} + +ICUNotifier::~ICUNotifier(void) { + { + Mutex lmx(¬ifyLock); + delete listeners; + listeners = NULL; + } +} + + +void +ICUNotifier::addListener(const EventListener* l, UErrorCode& status) +{ + if (U_SUCCESS(status)) { + if (l == NULL) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + if (acceptsListener(*l)) { + Mutex lmx(¬ifyLock); + if (listeners == NULL) { + listeners = new UVector(5, status); + } else { + for (int i = 0, e = listeners->size(); i < e; ++i) { + const EventListener* el = (const EventListener*)(listeners->elementAt(i)); + if (l == el) { + return; + } + } + } + + listeners->addElement((void*)l, status); // cast away const + } +#ifdef NOTIFIER_DEBUG + else { + fprintf(stderr, "Listener invalid for this notifier."); + exit(1); + } +#endif + } +} + +void +ICUNotifier::removeListener(const EventListener *l, UErrorCode& status) +{ + if (U_SUCCESS(status)) { + if (l == NULL) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + { + Mutex lmx(¬ifyLock); + if (listeners != NULL) { + // identity equality check + for (int i = 0, e = listeners->size(); i < e; ++i) { + const EventListener* el = (const EventListener*)listeners->elementAt(i); + if (l == el) { + listeners->removeElementAt(i); + if (listeners->size() == 0) { + delete listeners; + listeners = NULL; + } + return; + } + } + } + } + } +} + +void +ICUNotifier::notifyChanged(void) +{ + if (listeners != NULL) { + Mutex lmx(¬ifyLock); + if (listeners != NULL) { + for (int i = 0, e = listeners->size(); i < e; ++i) { + EventListener* el = (EventListener*)listeners->elementAt(i); + notifyListener(*el); + } + } + } +} + +U_NAMESPACE_END + +/* UCONFIG_NO_SERVICE */ +#endif + diff --git a/thirdparty/icu4c/common/servnotf.h b/thirdparty/icu4c/common/servnotf.h new file mode 100644 index 0000000000..305570c1e6 --- /dev/null +++ b/thirdparty/icu4c/common/servnotf.h @@ -0,0 +1,125 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/** + ******************************************************************************* + * Copyright (C) 2001-2014, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +#ifndef ICUNOTIF_H +#define ICUNOTIF_H + +#include "unicode/utypes.h" + +#if UCONFIG_NO_SERVICE + +U_NAMESPACE_BEGIN + +/* + * Allow the declaration of APIs with pointers to BreakIterator + * even when break iteration is removed from the build. + */ +class ICUNotifier; + +U_NAMESPACE_END + +#else + +#include "unicode/uobject.h" +#include "unicode/unistr.h" + +#include "mutex.h" +#include "uvector.h" + +U_NAMESPACE_BEGIN + +class U_COMMON_API EventListener : public UObject { +public: + virtual ~EventListener(); + +public: + static UClassID U_EXPORT2 getStaticClassID(); + + virtual UClassID getDynamicClassID() const; + +public: +#ifdef SERVICE_DEBUG + virtual UnicodeString& debug(UnicodeString& result) const { + return debugClass(result); + } + + virtual UnicodeString& debugClass(UnicodeString& result) const { + return result.append((UnicodeString)"Key"); + } +#endif +}; + +/** + * <p>Abstract implementation of a notification facility. Clients add + * EventListeners with addListener and remove them with removeListener. + * Notifiers call notifyChanged when they wish to notify listeners. + * This queues the listener list on the notification thread, which + * eventually dequeues the list and calls notifyListener on each + * listener in the list.</p> + * + * <p>Subclasses override acceptsListener and notifyListener + * to add type-safe notification. AcceptsListener should return + * true if the listener is of the appropriate type; ICUNotifier + * itself will ensure the listener is non-null and that the + * identical listener is not already registered with the Notifier. + * NotifyListener should cast the listener to the appropriate + * type and call the appropriate method on the listener. + */ + +class U_COMMON_API ICUNotifier : public UMemory { +private: UVector* listeners; + +public: + ICUNotifier(void); + + virtual ~ICUNotifier(void); + + /** + * Add a listener to be notified when notifyChanged is called. + * The listener must not be null. AcceptsListener must return + * true for the listener. Attempts to concurrently + * register the identical listener more than once will be + * silently ignored. + */ + virtual void addListener(const EventListener* l, UErrorCode& status); + + /** + * Stop notifying this listener. The listener must + * not be null. Attemps to remove a listener that is + * not registered will be silently ignored. + */ + virtual void removeListener(const EventListener* l, UErrorCode& status); + + /** + * ICU doesn't spawn its own threads. All listeners are notified in + * the thread of the caller. Misbehaved listeners can therefore + * indefinitely block the calling thread. Callers should beware of + * deadlock situations. + */ + virtual void notifyChanged(void); + +protected: + /** + * Subclasses implement this to return true if the listener is + * of the appropriate type. + */ + virtual UBool acceptsListener(const EventListener& l) const = 0; + + /** + * Subclasses implement this to notify the listener. + */ + virtual void notifyListener(EventListener& l) const = 0; +}; + +U_NAMESPACE_END + +/* UCONFIG_NO_SERVICE */ +#endif + +/* ICUNOTIF_H */ +#endif diff --git a/thirdparty/icu4c/common/servrbf.cpp b/thirdparty/icu4c/common/servrbf.cpp new file mode 100644 index 0000000000..94279ab3a1 --- /dev/null +++ b/thirdparty/icu4c/common/servrbf.cpp @@ -0,0 +1,96 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/** + ******************************************************************************* + * Copyright (C) 2001-2014, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + * + ******************************************************************************* + */ +#include "unicode/utypes.h" + +#if !UCONFIG_NO_SERVICE + +#include "unicode/resbund.h" +#include "uresimp.h" +#include "cmemory.h" +#include "servloc.h" +#include "ustrfmt.h" +#include "uhash.h" +#include "charstr.h" +#include "ucln_cmn.h" +#include "uassert.h" + +#define UNDERSCORE_CHAR ((UChar)0x005f) +#define AT_SIGN_CHAR ((UChar)64) +#define PERIOD_CHAR ((UChar)46) + +U_NAMESPACE_BEGIN + +ICUResourceBundleFactory::ICUResourceBundleFactory() + : LocaleKeyFactory(VISIBLE) + , _bundleName() +{ +} + +ICUResourceBundleFactory::ICUResourceBundleFactory(const UnicodeString& bundleName) + : LocaleKeyFactory(VISIBLE) + , _bundleName(bundleName) +{ +} + +ICUResourceBundleFactory::~ICUResourceBundleFactory() {} + +const Hashtable* +ICUResourceBundleFactory::getSupportedIDs(UErrorCode& status) const +{ + if (U_SUCCESS(status)) { + return LocaleUtility::getAvailableLocaleNames(_bundleName); + } + return NULL; +} + +UObject* +ICUResourceBundleFactory::handleCreate(const Locale& loc, int32_t /* kind */, const ICUService* /* service */, UErrorCode& status) const +{ + if (U_SUCCESS(status)) { + // _bundleName is a package name + // and should only contain invariant characters + // ??? is it always true that the max length of the bundle name is 19? + // who made this change? -- dlf + char pkg[20]; + int32_t length; + length=_bundleName.extract(0, INT32_MAX, pkg, (int32_t)sizeof(pkg), US_INV); + if(length>=(int32_t)sizeof(pkg)) { + return NULL; + } + return new ResourceBundle(pkg, loc, status); + } + return NULL; +} + +#ifdef SERVICE_DEBUG +UnicodeString& +ICUResourceBundleFactory::debug(UnicodeString& result) const +{ + LocaleKeyFactory::debug(result); + result.append((UnicodeString)", bundle: "); + return result.append(_bundleName); +} + +UnicodeString& +ICUResourceBundleFactory::debugClass(UnicodeString& result) const +{ + return result.append((UnicodeString)"ICUResourceBundleFactory"); +} +#endif + +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ICUResourceBundleFactory) + +U_NAMESPACE_END + +/* !UCONFIG_NO_SERVICE */ +#endif + + diff --git a/thirdparty/icu4c/common/servslkf.cpp b/thirdparty/icu4c/common/servslkf.cpp new file mode 100644 index 0000000000..09154d1b91 --- /dev/null +++ b/thirdparty/icu4c/common/servslkf.cpp @@ -0,0 +1,123 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/** + ******************************************************************************* + * Copyright (C) 2001-2014, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + * + ******************************************************************************* + */ +#include "unicode/utypes.h" + +#if !UCONFIG_NO_SERVICE + +#include "unicode/resbund.h" +#include "uresimp.h" +#include "cmemory.h" +#include "servloc.h" +#include "ustrfmt.h" +#include "uhash.h" +#include "charstr.h" +#include "uassert.h" + +#define UNDERSCORE_CHAR ((UChar)0x005f) +#define AT_SIGN_CHAR ((UChar)64) +#define PERIOD_CHAR ((UChar)46) + +U_NAMESPACE_BEGIN + +/* + ****************************************************************** + */ + +SimpleLocaleKeyFactory::SimpleLocaleKeyFactory(UObject* objToAdopt, + const UnicodeString& locale, + int32_t kind, + int32_t coverage) + : LocaleKeyFactory(coverage) + , _obj(objToAdopt) + , _id(locale) + , _kind(kind) +{ +} + +SimpleLocaleKeyFactory::SimpleLocaleKeyFactory(UObject* objToAdopt, + const Locale& locale, + int32_t kind, + int32_t coverage) + : LocaleKeyFactory(coverage) + , _obj(objToAdopt) + , _id() + , _kind(kind) +{ + LocaleUtility::initNameFromLocale(locale, _id); +} + +SimpleLocaleKeyFactory::~SimpleLocaleKeyFactory() +{ + delete _obj; + _obj = NULL; +} + +UObject* +SimpleLocaleKeyFactory::create(const ICUServiceKey& key, const ICUService* service, UErrorCode& status) const +{ + if (U_SUCCESS(status)) { + const LocaleKey& lkey = (const LocaleKey&)key; + if (_kind == LocaleKey::KIND_ANY || _kind == lkey.kind()) { + UnicodeString keyID; + lkey.currentID(keyID); + if (_id == keyID) { + return service->cloneInstance(_obj); + } + } + } + return NULL; +} + +//UBool +//SimpleLocaleKeyFactory::isSupportedID(const UnicodeString& id, UErrorCode& /* status */) const +//{ +// return id == _id; +//} + +void +SimpleLocaleKeyFactory::updateVisibleIDs(Hashtable& result, UErrorCode& status) const +{ + if (U_SUCCESS(status)) { + if (_coverage & 0x1) { + result.remove(_id); + } else { + result.put(_id, (void*)this, status); + } + } +} + +#ifdef SERVICE_DEBUG +UnicodeString& +SimpleLocaleKeyFactory::debug(UnicodeString& result) const +{ + LocaleKeyFactory::debug(result); + result.append((UnicodeString)", id: "); + result.append(_id); + result.append((UnicodeString)", kind: "); + result.append(_kind); + return result; +} + +UnicodeString& +SimpleLocaleKeyFactory::debugClass(UnicodeString& result) const +{ + return result.append((UnicodeString)"SimpleLocaleKeyFactory"); +} +#endif + +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(SimpleLocaleKeyFactory) + +U_NAMESPACE_END + +/* !UCONFIG_NO_SERVICE */ +#endif + + diff --git a/thirdparty/icu4c/common/sharedobject.cpp b/thirdparty/icu4c/common/sharedobject.cpp new file mode 100644 index 0000000000..6eeca8605f --- /dev/null +++ b/thirdparty/icu4c/common/sharedobject.cpp @@ -0,0 +1,62 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* Copyright (C) 2015, International Business Machines +* Corporation and others. All Rights Reserved. +****************************************************************************** +* sharedobject.cpp +*/ +#include "sharedobject.h" +#include "mutex.h" +#include "uassert.h" +#include "umutex.h" +#include "unifiedcache.h" + +U_NAMESPACE_BEGIN + +SharedObject::~SharedObject() {} + +UnifiedCacheBase::~UnifiedCacheBase() {} + +void +SharedObject::addRef() const { + umtx_atomic_inc(&hardRefCount); +} + +// removeRef Decrement the reference count and delete if it is zero. +// Note that SharedObjects with a non-null cachePtr are owned by the +// unified cache, and the cache will be responsible for the actual deletion. +// The deletion could be as soon as immediately following the +// update to the reference count, if another thread is running +// a cache eviction cycle concurrently. +// NO ACCESS TO *this PERMITTED AFTER REFERENCE COUNT == 0 for cached objects. +// THE OBJECT MAY ALREADY BE GONE. +void +SharedObject::removeRef() const { + const UnifiedCacheBase *cache = this->cachePtr; + int32_t updatedRefCount = umtx_atomic_dec(&hardRefCount); + U_ASSERT(updatedRefCount >= 0); + if (updatedRefCount == 0) { + if (cache) { + cache->handleUnreferencedObject(); + } else { + delete this; + } + } +} + + +int32_t +SharedObject::getRefCount() const { + return umtx_loadAcquire(hardRefCount); +} + +void +SharedObject::deleteIfZeroRefCount() const { + if (this->cachePtr == nullptr && getRefCount() == 0) { + delete this; + } +} + +U_NAMESPACE_END diff --git a/thirdparty/icu4c/common/sharedobject.h b/thirdparty/icu4c/common/sharedobject.h new file mode 100644 index 0000000000..6ccfb27b01 --- /dev/null +++ b/thirdparty/icu4c/common/sharedobject.h @@ -0,0 +1,184 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* Copyright (C) 2015-2016, International Business Machines +* Corporation and others. All Rights Reserved. +****************************************************************************** +* sharedobject.h +*/ + +#ifndef __SHAREDOBJECT_H__ +#define __SHAREDOBJECT_H__ + + +#include "unicode/uobject.h" +#include "umutex.h" + +U_NAMESPACE_BEGIN + +class SharedObject; + +/** + * Base class for unified cache exposing enough methods to SharedObject + * instances to allow their addRef() and removeRef() methods to + * update cache metrics. No other part of ICU, except for SharedObject, + * should directly call the methods of this base class. + */ +class U_COMMON_API UnifiedCacheBase : public UObject { +public: + UnifiedCacheBase() { } + + /** + * Notify the cache implementation that an object was seen transitioning to + * zero hard references. The cache may use this to keep track the number of + * unreferenced SharedObjects, and to trigger evictions. + */ + virtual void handleUnreferencedObject() const = 0; + + virtual ~UnifiedCacheBase(); +private: + UnifiedCacheBase(const UnifiedCacheBase &); + UnifiedCacheBase &operator=(const UnifiedCacheBase &); +}; + +/** + * Base class for shared, reference-counted, auto-deleted objects. + * Subclasses can be immutable. + * If they are mutable, then they must implement their copy constructor + * so that copyOnWrite() works. + * + * Either stack-allocate, use LocalPointer, or use addRef()/removeRef(). + * Sharing requires reference-counting. + */ +class U_COMMON_API SharedObject : public UObject { +public: + /** Initializes totalRefCount, softRefCount to 0. */ + SharedObject() : + softRefCount(0), + hardRefCount(0), + cachePtr(NULL) {} + + /** Initializes totalRefCount, softRefCount to 0. */ + SharedObject(const SharedObject &other) : + UObject(other), + softRefCount(0), + hardRefCount(0), + cachePtr(NULL) {} + + virtual ~SharedObject(); + + /** + * Increments the number of hard references to this object. Thread-safe. + * Not for use from within the Unified Cache implementation. + */ + void addRef() const; + + /** + * Decrements the number of hard references to this object, and + * arrange for possible cache-eviction and/or deletion if ref + * count goes to zero. Thread-safe. + * + * Not for use from within the UnifiedCache implementation. + */ + void removeRef() const; + + /** + * Returns the number of hard references for this object. + * Uses a memory barrier. + */ + int32_t getRefCount() const; + + /** + * If noHardReferences() == true then this object has no hard references. + * Must be called only from within the internals of UnifiedCache. + */ + inline UBool noHardReferences() const { return getRefCount() == 0; } + + /** + * If hasHardReferences() == true then this object has hard references. + * Must be called only from within the internals of UnifiedCache. + */ + inline UBool hasHardReferences() const { return getRefCount() != 0; } + + /** + * Deletes this object if it has no references. + * Available for non-cached SharedObjects only. Ownership of cached objects + * is with the UnifiedCache, which is solely responsible for eviction and deletion. + */ + void deleteIfZeroRefCount() const; + + + /** + * Returns a writable version of ptr. + * If there is exactly one owner, then ptr itself is returned as a + * non-const pointer. + * If there are multiple owners, then ptr is replaced with a + * copy-constructed clone, + * and that is returned. + * Returns NULL if cloning failed. + * + * T must be a subclass of SharedObject. + */ + template<typename T> + static T *copyOnWrite(const T *&ptr) { + const T *p = ptr; + if(p->getRefCount() <= 1) { return const_cast<T *>(p); } + T *p2 = new T(*p); + if(p2 == NULL) { return NULL; } + p->removeRef(); + ptr = p2; + p2->addRef(); + return p2; + } + + /** + * Makes dest an owner of the object pointed to by src while adjusting + * reference counts and deleting the previous object dest pointed to + * if necessary. Before this call is made, dest must either be NULL or + * be included in the reference count of the object it points to. + * + * T must be a subclass of SharedObject. + */ + template<typename T> + static void copyPtr(const T *src, const T *&dest) { + if(src != dest) { + if(dest != NULL) { dest->removeRef(); } + dest = src; + if(src != NULL) { src->addRef(); } + } + } + + /** + * Equivalent to copyPtr(NULL, dest). + */ + template<typename T> + static void clearPtr(const T *&ptr) { + if (ptr != NULL) { + ptr->removeRef(); + ptr = NULL; + } + } + +private: + /** + * The number of references from the UnifiedCache, which is + * the number of times that the sharedObject is stored as a hash table value. + * For use by UnifiedCache implementation code only. + * All access is synchronized by UnifiedCache's gCacheMutex + */ + mutable int32_t softRefCount; + friend class UnifiedCache; + + /** + * Reference count, excluding references from within the UnifiedCache implementation. + */ + mutable u_atomic_int32_t hardRefCount; + + mutable const UnifiedCacheBase *cachePtr; + +}; + +U_NAMESPACE_END + +#endif diff --git a/thirdparty/icu4c/common/simpleformatter.cpp b/thirdparty/icu4c/common/simpleformatter.cpp new file mode 100644 index 0000000000..f7f7aead61 --- /dev/null +++ b/thirdparty/icu4c/common/simpleformatter.cpp @@ -0,0 +1,325 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* Copyright (C) 2014-2016, International Business Machines +* Corporation and others. All Rights Reserved. +****************************************************************************** +* simpleformatter.cpp +*/ + +#include "unicode/utypes.h" +#include "unicode/simpleformatter.h" +#include "unicode/unistr.h" +#include "uassert.h" + +U_NAMESPACE_BEGIN + +namespace { + +/** + * Argument numbers must be smaller than this limit. + * Text segment lengths are offset by this much. + * This is currently the only unused char value in compiled patterns, + * except it is the maximum value of the first unit (max arg +1). + */ +const int32_t ARG_NUM_LIMIT = 0x100; +/** + * Initial and maximum char/UChar value set for a text segment. + * Segment length char values are from ARG_NUM_LIMIT+1 to this value here. + * Normally 0xffff, but can be as small as ARG_NUM_LIMIT+1 for testing. + */ +const UChar SEGMENT_LENGTH_PLACEHOLDER_CHAR = 0xffff; +/** + * Maximum length of a text segment. Longer segments are split into shorter ones. + */ +const int32_t MAX_SEGMENT_LENGTH = SEGMENT_LENGTH_PLACEHOLDER_CHAR - ARG_NUM_LIMIT; + +enum { + APOS = 0x27, + DIGIT_ZERO = 0x30, + DIGIT_ONE = 0x31, + DIGIT_NINE = 0x39, + OPEN_BRACE = 0x7b, + CLOSE_BRACE = 0x7d +}; + +inline UBool isInvalidArray(const void *array, int32_t length) { + return (length < 0 || (array == NULL && length != 0)); +} + +} // namespace + +SimpleFormatter &SimpleFormatter::operator=(const SimpleFormatter& other) { + if (this == &other) { + return *this; + } + compiledPattern = other.compiledPattern; + return *this; +} + +SimpleFormatter::~SimpleFormatter() {} + +UBool SimpleFormatter::applyPatternMinMaxArguments( + const UnicodeString &pattern, + int32_t min, int32_t max, + UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { + return FALSE; + } + // Parse consistent with MessagePattern, but + // - support only simple numbered arguments + // - build a simple binary structure into the result string + const UChar *patternBuffer = pattern.getBuffer(); + int32_t patternLength = pattern.length(); + // Reserve the first char for the number of arguments. + compiledPattern.setTo((UChar)0); + int32_t textLength = 0; + int32_t maxArg = -1; + UBool inQuote = FALSE; + for (int32_t i = 0; i < patternLength;) { + UChar c = patternBuffer[i++]; + if (c == APOS) { + if (i < patternLength && (c = patternBuffer[i]) == APOS) { + // double apostrophe, skip the second one + ++i; + } else if (inQuote) { + // skip the quote-ending apostrophe + inQuote = FALSE; + continue; + } else if (c == OPEN_BRACE || c == CLOSE_BRACE) { + // Skip the quote-starting apostrophe, find the end of the quoted literal text. + ++i; + inQuote = TRUE; + } else { + // The apostrophe is part of literal text. + c = APOS; + } + } else if (!inQuote && c == OPEN_BRACE) { + if (textLength > 0) { + compiledPattern.setCharAt(compiledPattern.length() - textLength - 1, + (UChar)(ARG_NUM_LIMIT + textLength)); + textLength = 0; + } + int32_t argNumber; + if ((i + 1) < patternLength && + 0 <= (argNumber = patternBuffer[i] - DIGIT_ZERO) && argNumber <= 9 && + patternBuffer[i + 1] == CLOSE_BRACE) { + i += 2; + } else { + // Multi-digit argument number (no leading zero) or syntax error. + // MessagePattern permits PatternProps.skipWhiteSpace(pattern, index) + // around the number, but this class does not. + argNumber = -1; + if (i < patternLength && DIGIT_ONE <= (c = patternBuffer[i++]) && c <= DIGIT_NINE) { + argNumber = c - DIGIT_ZERO; + while (i < patternLength && + DIGIT_ZERO <= (c = patternBuffer[i++]) && c <= DIGIT_NINE) { + argNumber = argNumber * 10 + (c - DIGIT_ZERO); + if (argNumber >= ARG_NUM_LIMIT) { + break; + } + } + } + if (argNumber < 0 || c != CLOSE_BRACE) { + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + return FALSE; + } + } + if (argNumber > maxArg) { + maxArg = argNumber; + } + compiledPattern.append((UChar)argNumber); + continue; + } // else: c is part of literal text + // Append c and track the literal-text segment length. + if (textLength == 0) { + // Reserve a char for the length of a new text segment, preset the maximum length. + compiledPattern.append(SEGMENT_LENGTH_PLACEHOLDER_CHAR); + } + compiledPattern.append(c); + if (++textLength == MAX_SEGMENT_LENGTH) { + textLength = 0; + } + } + if (textLength > 0) { + compiledPattern.setCharAt(compiledPattern.length() - textLength - 1, + (UChar)(ARG_NUM_LIMIT + textLength)); + } + int32_t argCount = maxArg + 1; + if (argCount < min || max < argCount) { + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + return FALSE; + } + compiledPattern.setCharAt(0, (UChar)argCount); + return TRUE; +} + +UnicodeString& SimpleFormatter::format( + const UnicodeString &value0, + UnicodeString &appendTo, UErrorCode &errorCode) const { + const UnicodeString *values[] = { &value0 }; + return formatAndAppend(values, 1, appendTo, NULL, 0, errorCode); +} + +UnicodeString& SimpleFormatter::format( + const UnicodeString &value0, + const UnicodeString &value1, + UnicodeString &appendTo, UErrorCode &errorCode) const { + const UnicodeString *values[] = { &value0, &value1 }; + return formatAndAppend(values, 2, appendTo, NULL, 0, errorCode); +} + +UnicodeString& SimpleFormatter::format( + const UnicodeString &value0, + const UnicodeString &value1, + const UnicodeString &value2, + UnicodeString &appendTo, UErrorCode &errorCode) const { + const UnicodeString *values[] = { &value0, &value1, &value2 }; + return formatAndAppend(values, 3, appendTo, NULL, 0, errorCode); +} + +UnicodeString& SimpleFormatter::formatAndAppend( + const UnicodeString *const *values, int32_t valuesLength, + UnicodeString &appendTo, + int32_t *offsets, int32_t offsetsLength, UErrorCode &errorCode) const { + if (U_FAILURE(errorCode)) { + return appendTo; + } + if (isInvalidArray(values, valuesLength) || isInvalidArray(offsets, offsetsLength) || + valuesLength < getArgumentLimit()) { + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + return appendTo; + } + return format(compiledPattern.getBuffer(), compiledPattern.length(), values, + appendTo, NULL, TRUE, + offsets, offsetsLength, errorCode); +} + +UnicodeString &SimpleFormatter::formatAndReplace( + const UnicodeString *const *values, int32_t valuesLength, + UnicodeString &result, + int32_t *offsets, int32_t offsetsLength, UErrorCode &errorCode) const { + if (U_FAILURE(errorCode)) { + return result; + } + if (isInvalidArray(values, valuesLength) || isInvalidArray(offsets, offsetsLength)) { + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + return result; + } + const UChar *cp = compiledPattern.getBuffer(); + int32_t cpLength = compiledPattern.length(); + if (valuesLength < getArgumentLimit(cp, cpLength)) { + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + return result; + } + + // If the pattern starts with an argument whose value is the same object + // as the result, then we keep the result contents and append to it. + // Otherwise we replace its contents. + int32_t firstArg = -1; + // If any non-initial argument value is the same object as the result, + // then we first copy its contents and use that instead while formatting. + UnicodeString resultCopy; + if (getArgumentLimit(cp, cpLength) > 0) { + for (int32_t i = 1; i < cpLength;) { + int32_t n = cp[i++]; + if (n < ARG_NUM_LIMIT) { + if (values[n] == &result) { + if (i == 2) { + firstArg = n; + } else if (resultCopy.isEmpty() && !result.isEmpty()) { + resultCopy = result; + } + } + } else { + i += n - ARG_NUM_LIMIT; + } + } + } + if (firstArg < 0) { + result.remove(); + } + return format(cp, cpLength, values, + result, &resultCopy, FALSE, + offsets, offsetsLength, errorCode); +} + +UnicodeString SimpleFormatter::getTextWithNoArguments( + const UChar *compiledPattern, + int32_t compiledPatternLength, + int32_t* offsets, + int32_t offsetsLength) { + for (int32_t i = 0; i < offsetsLength; i++) { + offsets[i] = -1; + } + int32_t capacity = compiledPatternLength - 1 - + getArgumentLimit(compiledPattern, compiledPatternLength); + UnicodeString sb(capacity, 0, 0); // Java: StringBuilder + for (int32_t i = 1; i < compiledPatternLength;) { + int32_t n = compiledPattern[i++]; + if (n > ARG_NUM_LIMIT) { + n -= ARG_NUM_LIMIT; + sb.append(compiledPattern + i, n); + i += n; + } else if (n < offsetsLength) { + // TODO(ICU-20406): This does not distinguish between "{0}{1}" and "{1}{0}". + // Consider removing this function and replacing it with an iterator interface. + offsets[n] = sb.length(); + } + } + return sb; +} + +UnicodeString &SimpleFormatter::format( + const UChar *compiledPattern, int32_t compiledPatternLength, + const UnicodeString *const *values, + UnicodeString &result, const UnicodeString *resultCopy, UBool forbidResultAsValue, + int32_t *offsets, int32_t offsetsLength, + UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { + return result; + } + for (int32_t i = 0; i < offsetsLength; i++) { + offsets[i] = -1; + } + for (int32_t i = 1; i < compiledPatternLength;) { + int32_t n = compiledPattern[i++]; + if (n < ARG_NUM_LIMIT) { + const UnicodeString *value = values[n]; + if (value == NULL) { + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + return result; + } + if (value == &result) { + if (forbidResultAsValue) { + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + return result; + } + if (i == 2) { + // We are appending to result which is also the first value object. + if (n < offsetsLength) { + offsets[n] = 0; + } + } else { + if (n < offsetsLength) { + offsets[n] = result.length(); + } + result.append(*resultCopy); + } + } else { + if (n < offsetsLength) { + offsets[n] = result.length(); + } + result.append(*value); + } + } else { + int32_t length = n - ARG_NUM_LIMIT; + result.append(compiledPattern + i, length); + i += length; + } + } + return result; +} + +U_NAMESPACE_END diff --git a/thirdparty/icu4c/common/sprpimpl.h b/thirdparty/icu4c/common/sprpimpl.h new file mode 100644 index 0000000000..ca0bcdb516 --- /dev/null +++ b/thirdparty/icu4c/common/sprpimpl.h @@ -0,0 +1,130 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* + ******************************************************************************* + * + * Copyright (C) 2003-2006, International Business Machines + * Corporation and others. All Rights Reserved. + * + ******************************************************************************* + * file name: sprpimpl.h + * encoding: UTF-8 + * tab size: 8 (not used) + * indentation:4 + * + * created on: 2003feb1 + * created by: Ram Viswanadha + */ + +#ifndef SPRPIMPL_H +#define SPRPIMPL_H + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_IDNA + +#include "unicode/ustring.h" +#include "unicode/parseerr.h" +#include "unicode/usprep.h" +#include "unicode/udata.h" +#include "utrie.h" +#include "udataswp.h" +#include "ubidi_props.h" + +#define _SPREP_DATA_TYPE "spp" + +enum UStringPrepType{ + USPREP_UNASSIGNED = 0x0000 , + USPREP_MAP = 0x0001 , + USPREP_PROHIBITED = 0x0002 , + USPREP_DELETE = 0x0003 , + USPREP_TYPE_LIMIT = 0x0004 +}; + +typedef enum UStringPrepType UStringPrepType; + +#ifdef USPREP_TYPE_NAMES_ARRAY +static const char* usprepTypeNames[] ={ + "UNASSIGNED" , + "MAP" , + "PROHIBITED" , + "DELETE", + "TYPE_LIMIT" +}; +#endif + +enum{ + _SPREP_NORMALIZATION_ON = 0x0001, + _SPREP_CHECK_BIDI_ON = 0x0002 +}; + +enum{ + _SPREP_TYPE_THRESHOLD = 0xFFF0, + _SPREP_MAX_INDEX_VALUE = 0x3FBF, /*16139*/ + _SPREP_MAX_INDEX_TOP_LENGTH = 0x0003 +}; + +/* indexes[] value names */ +enum { + _SPREP_INDEX_TRIE_SIZE = 0, /* number of bytes in StringPrep trie */ + _SPREP_INDEX_MAPPING_DATA_SIZE = 1, /* The array that contains the mapping */ + _SPREP_NORM_CORRECTNS_LAST_UNI_VERSION = 2, /* The index of Unicode version of last entry in NormalizationCorrections.txt */ + _SPREP_ONE_UCHAR_MAPPING_INDEX_START = 3, /* The starting index of 1 UChar mapping index in the mapping data array */ + _SPREP_TWO_UCHARS_MAPPING_INDEX_START = 4, /* The starting index of 2 UChars mapping index in the mapping data array */ + _SPREP_THREE_UCHARS_MAPPING_INDEX_START = 5, /* The starting index of 3 UChars mapping index in the mapping data array */ + _SPREP_FOUR_UCHARS_MAPPING_INDEX_START = 6, /* The starting index of 4 UChars mapping index in the mapping data array */ + _SPREP_OPTIONS = 7, /* Bit set of options to turn on in the profile */ + _SPREP_INDEX_TOP=16 /* changing this requires a new formatVersion */ +}; + +typedef struct UStringPrepKey UStringPrepKey; + + +struct UStringPrepKey{ + char* name; + char* path; +}; + +struct UStringPrepProfile{ + int32_t indexes[_SPREP_INDEX_TOP]; + UTrie sprepTrie; + const uint16_t* mappingData; + UDataMemory* sprepData; + int32_t refCount; + UBool isDataLoaded; + UBool doNFKC; + UBool checkBiDi; +}; + +/** + * Helper function for populating the UParseError struct + * @internal + */ +U_CAPI void U_EXPORT2 +uprv_syntaxError(const UChar* rules, + int32_t pos, + int32_t rulesLen, + UParseError* parseError); + + +/** + * Swap StringPrep .spp profile data. See udataswp.h. + * @internal + */ +U_CAPI int32_t U_EXPORT2 +usprep_swap(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode); + +#endif /* #if !UCONFIG_NO_IDNA */ + +#endif + +/* + * Hey, Emacs, please set the following: + * + * Local Variables: + * indent-tabs-mode: nil + * End: + * + */ diff --git a/thirdparty/icu4c/common/static_unicode_sets.cpp b/thirdparty/icu4c/common/static_unicode_sets.cpp new file mode 100644 index 0000000000..5dab3931a7 --- /dev/null +++ b/thirdparty/icu4c/common/static_unicode_sets.cpp @@ -0,0 +1,245 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +// Allow implicit conversion from char16_t* to UnicodeString for this file: +// Helpful in toString methods and elsewhere. +#define UNISTR_FROM_STRING_EXPLICIT + +#include "static_unicode_sets.h" +#include "umutex.h" +#include "ucln_cmn.h" +#include "unicode/uniset.h" +#include "uresimp.h" +#include "cstring.h" +#include "uassert.h" + +using namespace icu; +using namespace icu::unisets; + + +namespace { + +UnicodeSet* gUnicodeSets[UNISETS_KEY_COUNT] = {}; + +// Save the empty instance in static memory to have well-defined behavior if a +// regular UnicodeSet cannot be allocated. +alignas(UnicodeSet) +char gEmptyUnicodeSet[sizeof(UnicodeSet)]; + +// Whether the gEmptyUnicodeSet is initialized and ready to use. +UBool gEmptyUnicodeSetInitialized = FALSE; + +inline UnicodeSet* getImpl(Key key) { + UnicodeSet* candidate = gUnicodeSets[key]; + if (candidate == nullptr) { + return reinterpret_cast<UnicodeSet*>(gEmptyUnicodeSet); + } + return candidate; +} + +UnicodeSet* computeUnion(Key k1, Key k2) { + UnicodeSet* result = new UnicodeSet(); + if (result == nullptr) { + return nullptr; + } + result->addAll(*getImpl(k1)); + result->addAll(*getImpl(k2)); + result->freeze(); + return result; +} + +UnicodeSet* computeUnion(Key k1, Key k2, Key k3) { + UnicodeSet* result = new UnicodeSet(); + if (result == nullptr) { + return nullptr; + } + result->addAll(*getImpl(k1)); + result->addAll(*getImpl(k2)); + result->addAll(*getImpl(k3)); + result->freeze(); + return result; +} + + +void saveSet(Key key, const UnicodeString& unicodeSetPattern, UErrorCode& status) { + // assert unicodeSets.get(key) == null; + gUnicodeSets[key] = new UnicodeSet(unicodeSetPattern, status); +} + +class ParseDataSink : public ResourceSink { + public: + void put(const char* key, ResourceValue& value, UBool /*noFallback*/, UErrorCode& status) U_OVERRIDE { + ResourceTable contextsTable = value.getTable(status); + if (U_FAILURE(status)) { return; } + for (int i = 0; contextsTable.getKeyAndValue(i, key, value); i++) { + if (uprv_strcmp(key, "date") == 0) { + // ignore + } else { + ResourceTable strictnessTable = value.getTable(status); + if (U_FAILURE(status)) { return; } + for (int j = 0; strictnessTable.getKeyAndValue(j, key, value); j++) { + bool isLenient = (uprv_strcmp(key, "lenient") == 0); + ResourceArray array = value.getArray(status); + if (U_FAILURE(status)) { return; } + for (int k = 0; k < array.getSize(); k++) { + array.getValue(k, value); + UnicodeString str = value.getUnicodeString(status); + if (U_FAILURE(status)) { return; } + // There is both lenient and strict data for comma/period, + // but not for any of the other symbols. + if (str.indexOf(u'.') != -1) { + saveSet(isLenient ? PERIOD : STRICT_PERIOD, str, status); + } else if (str.indexOf(u',') != -1) { + saveSet(isLenient ? COMMA : STRICT_COMMA, str, status); + } else if (str.indexOf(u'+') != -1) { + saveSet(PLUS_SIGN, str, status); + } else if (str.indexOf(u'-') != -1) { + saveSet(MINUS_SIGN, str, status); + } else if (str.indexOf(u'$') != -1) { + saveSet(DOLLAR_SIGN, str, status); + } else if (str.indexOf(u'£') != -1) { + saveSet(POUND_SIGN, str, status); + } else if (str.indexOf(u'₹') != -1) { + saveSet(RUPEE_SIGN, str, status); + } else if (str.indexOf(u'¥') != -1) { + saveSet(YEN_SIGN, str, status); + } else if (str.indexOf(u'₩') != -1) { + saveSet(WON_SIGN, str, status); + } else if (str.indexOf(u'%') != -1) { + saveSet(PERCENT_SIGN, str, status); + } else if (str.indexOf(u'‰') != -1) { + saveSet(PERMILLE_SIGN, str, status); + } else if (str.indexOf(u'’') != -1) { + saveSet(APOSTROPHE_SIGN, str, status); + } else { + // Unknown class of parse lenients + // TODO(ICU-20428): Make ICU automatically accept new classes? + U_ASSERT(FALSE); + } + if (U_FAILURE(status)) { return; } + } + } + } + } + } +}; + + +icu::UInitOnce gNumberParseUniSetsInitOnce = U_INITONCE_INITIALIZER; + +UBool U_CALLCONV cleanupNumberParseUniSets() { + if (gEmptyUnicodeSetInitialized) { + reinterpret_cast<UnicodeSet*>(gEmptyUnicodeSet)->~UnicodeSet(); + gEmptyUnicodeSetInitialized = FALSE; + } + for (int32_t i = 0; i < UNISETS_KEY_COUNT; i++) { + delete gUnicodeSets[i]; + gUnicodeSets[i] = nullptr; + } + gNumberParseUniSetsInitOnce.reset(); + return TRUE; +} + +void U_CALLCONV initNumberParseUniSets(UErrorCode& status) { + ucln_common_registerCleanup(UCLN_COMMON_NUMPARSE_UNISETS, cleanupNumberParseUniSets); + + // Initialize the empty instance for well-defined fallback behavior + new(gEmptyUnicodeSet) UnicodeSet(); + reinterpret_cast<UnicodeSet*>(gEmptyUnicodeSet)->freeze(); + gEmptyUnicodeSetInitialized = TRUE; + + // These sets were decided after discussion with icu-design@. See tickets #13084 and #13309. + // Zs+TAB is "horizontal whitespace" according to UTS #18 (blank property). + gUnicodeSets[DEFAULT_IGNORABLES] = new UnicodeSet( + u"[[:Zs:][\\u0009][:Bidi_Control:][:Variation_Selector:]]", status); + gUnicodeSets[STRICT_IGNORABLES] = new UnicodeSet(u"[[:Bidi_Control:]]", status); + + LocalUResourceBundlePointer rb(ures_open(nullptr, "root", &status)); + if (U_FAILURE(status)) { return; } + ParseDataSink sink; + ures_getAllItemsWithFallback(rb.getAlias(), "parse", sink, status); + if (U_FAILURE(status)) { return; } + + // NOTE: It is OK for these assertions to fail if there was a no-data build. + U_ASSERT(gUnicodeSets[COMMA] != nullptr); + U_ASSERT(gUnicodeSets[STRICT_COMMA] != nullptr); + U_ASSERT(gUnicodeSets[PERIOD] != nullptr); + U_ASSERT(gUnicodeSets[STRICT_PERIOD] != nullptr); + U_ASSERT(gUnicodeSets[APOSTROPHE_SIGN] != nullptr); + + LocalPointer<UnicodeSet> otherGrouping(new UnicodeSet( + u"[٬‘\\u0020\\u00A0\\u2000-\\u200A\\u202F\\u205F\\u3000]", + status + ), status); + if (U_FAILURE(status)) { return; } + otherGrouping->addAll(*gUnicodeSets[APOSTROPHE_SIGN]); + gUnicodeSets[OTHER_GROUPING_SEPARATORS] = otherGrouping.orphan(); + gUnicodeSets[ALL_SEPARATORS] = computeUnion(COMMA, PERIOD, OTHER_GROUPING_SEPARATORS); + gUnicodeSets[STRICT_ALL_SEPARATORS] = computeUnion( + STRICT_COMMA, STRICT_PERIOD, OTHER_GROUPING_SEPARATORS); + + U_ASSERT(gUnicodeSets[MINUS_SIGN] != nullptr); + U_ASSERT(gUnicodeSets[PLUS_SIGN] != nullptr); + U_ASSERT(gUnicodeSets[PERCENT_SIGN] != nullptr); + U_ASSERT(gUnicodeSets[PERMILLE_SIGN] != nullptr); + + gUnicodeSets[INFINITY_SIGN] = new UnicodeSet(u"[∞]", status); + if (U_FAILURE(status)) { return; } + + U_ASSERT(gUnicodeSets[DOLLAR_SIGN] != nullptr); + U_ASSERT(gUnicodeSets[POUND_SIGN] != nullptr); + U_ASSERT(gUnicodeSets[RUPEE_SIGN] != nullptr); + U_ASSERT(gUnicodeSets[YEN_SIGN] != nullptr); + U_ASSERT(gUnicodeSets[WON_SIGN] != nullptr); + + gUnicodeSets[DIGITS] = new UnicodeSet(u"[:digit:]", status); + if (U_FAILURE(status)) { return; } + gUnicodeSets[DIGITS_OR_ALL_SEPARATORS] = computeUnion(DIGITS, ALL_SEPARATORS); + gUnicodeSets[DIGITS_OR_STRICT_ALL_SEPARATORS] = computeUnion(DIGITS, STRICT_ALL_SEPARATORS); + + for (auto* uniset : gUnicodeSets) { + if (uniset != nullptr) { + uniset->freeze(); + } + } +} + +} + +const UnicodeSet* unisets::get(Key key) { + UErrorCode localStatus = U_ZERO_ERROR; + umtx_initOnce(gNumberParseUniSetsInitOnce, &initNumberParseUniSets, localStatus); + if (U_FAILURE(localStatus)) { + return reinterpret_cast<UnicodeSet*>(gEmptyUnicodeSet); + } + return getImpl(key); +} + +Key unisets::chooseFrom(UnicodeString str, Key key1) { + return get(key1)->contains(str) ? key1 : NONE; +} + +Key unisets::chooseFrom(UnicodeString str, Key key1, Key key2) { + return get(key1)->contains(str) ? key1 : chooseFrom(str, key2); +} + +//Key unisets::chooseCurrency(UnicodeString str) { +// if (get(DOLLAR_SIGN)->contains(str)) { +// return DOLLAR_SIGN; +// } else if (get(POUND_SIGN)->contains(str)) { +// return POUND_SIGN; +// } else if (get(RUPEE_SIGN)->contains(str)) { +// return RUPEE_SIGN; +// } else if (get(YEN_SIGN)->contains(str)) { +// return YEN_SIGN; +// } else { +// return NONE; +// } +//} + + +#endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/thirdparty/icu4c/common/static_unicode_sets.h b/thirdparty/icu4c/common/static_unicode_sets.h new file mode 100644 index 0000000000..5d90ce5908 --- /dev/null +++ b/thirdparty/icu4c/common/static_unicode_sets.h @@ -0,0 +1,140 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// This file contains utilities to deal with static-allocated UnicodeSets. +// +// Common use case: you write a "private static final" UnicodeSet in Java, and +// want something similarly easy in C++. Originally written for number +// parsing, but this header can be used for other applications. +// +// Main entrypoint: `unisets::get(unisets::MY_SET_ID_HERE)` +// +// This file is in common instead of i18n because it is needed by ucurr.cpp. +// +// Author: sffc + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING +#ifndef __STATIC_UNICODE_SETS_H__ +#define __STATIC_UNICODE_SETS_H__ + +#include "unicode/uniset.h" +#include "unicode/unistr.h" + +U_NAMESPACE_BEGIN +namespace unisets { + +enum Key { + // NONE is used to indicate null in chooseFrom(). + // EMPTY is used to get an empty UnicodeSet. + NONE = -1, + EMPTY = 0, + + // Ignorables + DEFAULT_IGNORABLES, + STRICT_IGNORABLES, + + // Separators + // Notes: + // - COMMA is a superset of STRICT_COMMA + // - PERIOD is a superset of SCRICT_PERIOD + // - ALL_SEPARATORS is the union of COMMA, PERIOD, and OTHER_GROUPING_SEPARATORS + // - STRICT_ALL_SEPARATORS is the union of STRICT_COMMA, STRICT_PERIOD, and OTHER_GRP_SEPARATORS + COMMA, + PERIOD, + STRICT_COMMA, + STRICT_PERIOD, + APOSTROPHE_SIGN, + OTHER_GROUPING_SEPARATORS, + ALL_SEPARATORS, + STRICT_ALL_SEPARATORS, + + // Symbols + MINUS_SIGN, + PLUS_SIGN, + PERCENT_SIGN, + PERMILLE_SIGN, + INFINITY_SIGN, + + // Currency Symbols + DOLLAR_SIGN, + POUND_SIGN, + RUPEE_SIGN, + YEN_SIGN, + WON_SIGN, + + // Other + DIGITS, + + // Combined Separators with Digits (for lead code points) + DIGITS_OR_ALL_SEPARATORS, + DIGITS_OR_STRICT_ALL_SEPARATORS, + + // The number of elements in the enum. + UNISETS_KEY_COUNT +}; + +/** + * Gets the static-allocated UnicodeSet according to the provided key. The + * pointer will be deleted during u_cleanup(); the caller should NOT delete it. + * + * Exported as U_COMMON_API for ucurr.cpp + * + * This method is always safe and OK to chain: in the case of a memory or other + * error, it returns an empty set from static memory. + * + * Example: + * + * UBool hasIgnorables = unisets::get(unisets::DEFAULT_IGNORABLES)->contains(...); + * + * @param key The desired UnicodeSet according to the enum in this file. + * @return The requested UnicodeSet. Guaranteed to be frozen and non-null, but + * may be empty if an error occurred during data loading. + */ +U_COMMON_API const UnicodeSet* get(Key key); + +/** + * Checks if the UnicodeSet given by key1 contains the given string. + * + * Exported as U_COMMON_API for numparse_decimal.cpp + * + * @param str The string to check. + * @param key1 The set to check. + * @return key1 if the set contains str, or NONE if not. + */ +U_COMMON_API Key chooseFrom(UnicodeString str, Key key1); + +/** + * Checks if the UnicodeSet given by either key1 or key2 contains the string. + * + * Exported as U_COMMON_API for numparse_decimal.cpp + * + * @param str The string to check. + * @param key1 The first set to check. + * @param key2 The second set to check. + * @return key1 if that set contains str; key2 if that set contains str; or + * NONE if neither set contains str. + */ +U_COMMON_API Key chooseFrom(UnicodeString str, Key key1, Key key2); + +// TODO: Load these from data: ICU-20108 +// Unused in C++: +// Key chooseCurrency(UnicodeString str); +// Used instead: +static const struct { + Key key; + UChar32 exemplar; +} kCurrencyEntries[] = { + {DOLLAR_SIGN, u'$'}, + {POUND_SIGN, u'£'}, + {RUPEE_SIGN, u'₹'}, + {YEN_SIGN, u'¥'}, + {WON_SIGN, u'₩'}, +}; + +} // namespace unisets +U_NAMESPACE_END + +#endif //__STATIC_UNICODE_SETS_H__ +#endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/thirdparty/icu4c/common/stringpiece.cpp b/thirdparty/icu4c/common/stringpiece.cpp new file mode 100644 index 0000000000..99089e08ef --- /dev/null +++ b/thirdparty/icu4c/common/stringpiece.cpp @@ -0,0 +1,116 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +// Copyright (C) 2009-2013, International Business Machines +// Corporation and others. All Rights Reserved. +// +// Copyright 2004 and onwards Google Inc. +// +// Author: wilsonh@google.com (Wilson Hsieh) +// + +#include "unicode/utypes.h" +#include "unicode/stringpiece.h" +#include "cstring.h" +#include "cmemory.h" + +U_NAMESPACE_BEGIN + +StringPiece::StringPiece(const char* str) + : ptr_(str), length_((str == NULL) ? 0 : static_cast<int32_t>(uprv_strlen(str))) { } + +StringPiece::StringPiece(const StringPiece& x, int32_t pos) { + if (pos < 0) { + pos = 0; + } else if (pos > x.length_) { + pos = x.length_; + } + ptr_ = x.ptr_ + pos; + length_ = x.length_ - pos; +} + +StringPiece::StringPiece(const StringPiece& x, int32_t pos, int32_t len) { + if (pos < 0) { + pos = 0; + } else if (pos > x.length_) { + pos = x.length_; + } + if (len < 0) { + len = 0; + } else if (len > x.length_ - pos) { + len = x.length_ - pos; + } + ptr_ = x.ptr_ + pos; + length_ = len; +} + +void StringPiece::set(const char* str) { + ptr_ = str; + if (str != NULL) + length_ = static_cast<int32_t>(uprv_strlen(str)); + else + length_ = 0; +} + +int32_t StringPiece::find(StringPiece needle, int32_t offset) { + if (length() == 0 && needle.length() == 0) { + return 0; + } + // TODO: Improve to be better than O(N^2)? + for (int32_t i = offset; i < length(); i++) { + int32_t j = 0; + for (; j < needle.length(); i++, j++) { + if (data()[i] != needle.data()[j]) { + i -= j; + goto outer_end; + } + } + return i - j; + outer_end: void(); + } + return -1; +} + +int32_t StringPiece::compare(StringPiece other) { + int32_t i = 0; + for (; i < length(); i++) { + if (i == other.length()) { + // this is longer + return 1; + } + char a = data()[i]; + char b = other.data()[i]; + if (a < b) { + return -1; + } else if (a > b) { + return 1; + } + } + if (i < other.length()) { + // other is longer + return -1; + } + return 0; +} + +U_EXPORT UBool U_EXPORT2 +operator==(const StringPiece& x, const StringPiece& y) { + int32_t len = x.size(); + if (len != y.size()) { + return false; + } + if (len == 0) { + return true; + } + const char* p = x.data(); + const char* p2 = y.data(); + // Test last byte in case strings share large common prefix + --len; + if (p[len] != p2[len]) return false; + // At this point we can, but don't have to, ignore the last byte. + return uprv_memcmp(p, p2, len) == 0; +} + + +const int32_t StringPiece::npos = 0x7fffffff; + +U_NAMESPACE_END diff --git a/thirdparty/icu4c/common/stringtriebuilder.cpp b/thirdparty/icu4c/common/stringtriebuilder.cpp new file mode 100644 index 0000000000..6f9cc2e5c2 --- /dev/null +++ b/thirdparty/icu4c/common/stringtriebuilder.cpp @@ -0,0 +1,618 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2010-2012, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* file name: stringtriebuilder.cpp +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2010dec24 +* created by: Markus W. Scherer +*/ + +#include "utypeinfo.h" // for 'typeid' to work +#include "unicode/utypes.h" +#include "unicode/stringtriebuilder.h" +#include "uassert.h" +#include "uhash.h" + +U_CDECL_BEGIN + +static int32_t U_CALLCONV +hashStringTrieNode(const UHashTok key) { + return icu::StringTrieBuilder::hashNode(key.pointer); +} + +static UBool U_CALLCONV +equalStringTrieNodes(const UHashTok key1, const UHashTok key2) { + return icu::StringTrieBuilder::equalNodes(key1.pointer, key2.pointer); +} + +U_CDECL_END + +U_NAMESPACE_BEGIN + +StringTrieBuilder::StringTrieBuilder() : nodes(NULL) {} + +StringTrieBuilder::~StringTrieBuilder() { + deleteCompactBuilder(); +} + +void +StringTrieBuilder::createCompactBuilder(int32_t sizeGuess, UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { + return; + } + nodes=uhash_openSize(hashStringTrieNode, equalStringTrieNodes, NULL, + sizeGuess, &errorCode); + if(U_SUCCESS(errorCode)) { + if(nodes==NULL) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + } else { + uhash_setKeyDeleter(nodes, uprv_deleteUObject); + } + } +} + +void +StringTrieBuilder::deleteCompactBuilder() { + uhash_close(nodes); + nodes=NULL; +} + +void +StringTrieBuilder::build(UStringTrieBuildOption buildOption, int32_t elementsLength, + UErrorCode &errorCode) { + if(buildOption==USTRINGTRIE_BUILD_FAST) { + writeNode(0, elementsLength, 0); + } else /* USTRINGTRIE_BUILD_SMALL */ { + createCompactBuilder(2*elementsLength, errorCode); + Node *root=makeNode(0, elementsLength, 0, errorCode); + if(U_SUCCESS(errorCode)) { + root->markRightEdgesFirst(-1); + root->write(*this); + } + deleteCompactBuilder(); + } +} + +// Requires start<limit, +// and all strings of the [start..limit[ elements must be sorted and +// have a common prefix of length unitIndex. +int32_t +StringTrieBuilder::writeNode(int32_t start, int32_t limit, int32_t unitIndex) { + UBool hasValue=FALSE; + int32_t value=0; + int32_t type; + if(unitIndex==getElementStringLength(start)) { + // An intermediate or final value. + value=getElementValue(start++); + if(start==limit) { + return writeValueAndFinal(value, TRUE); // final-value node + } + hasValue=TRUE; + } + // Now all [start..limit[ strings are longer than unitIndex. + int32_t minUnit=getElementUnit(start, unitIndex); + int32_t maxUnit=getElementUnit(limit-1, unitIndex); + if(minUnit==maxUnit) { + // Linear-match node: All strings have the same character at unitIndex. + int32_t lastUnitIndex=getLimitOfLinearMatch(start, limit-1, unitIndex); + writeNode(start, limit, lastUnitIndex); + // Break the linear-match sequence into chunks of at most kMaxLinearMatchLength. + int32_t length=lastUnitIndex-unitIndex; + int32_t maxLinearMatchLength=getMaxLinearMatchLength(); + while(length>maxLinearMatchLength) { + lastUnitIndex-=maxLinearMatchLength; + length-=maxLinearMatchLength; + writeElementUnits(start, lastUnitIndex, maxLinearMatchLength); + write(getMinLinearMatch()+maxLinearMatchLength-1); + } + writeElementUnits(start, unitIndex, length); + type=getMinLinearMatch()+length-1; + } else { + // Branch node. + int32_t length=countElementUnits(start, limit, unitIndex); + // length>=2 because minUnit!=maxUnit. + writeBranchSubNode(start, limit, unitIndex, length); + if(--length<getMinLinearMatch()) { + type=length; + } else { + write(length); + type=0; + } + } + return writeValueAndType(hasValue, value, type); +} + +// start<limit && all strings longer than unitIndex && +// length different units at unitIndex +int32_t +StringTrieBuilder::writeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex, int32_t length) { + UChar middleUnits[kMaxSplitBranchLevels]; + int32_t lessThan[kMaxSplitBranchLevels]; + int32_t ltLength=0; + while(length>getMaxBranchLinearSubNodeLength()) { + // Branch on the middle unit. + // First, find the middle unit. + int32_t i=skipElementsBySomeUnits(start, unitIndex, length/2); + // Encode the less-than branch first. + middleUnits[ltLength]=getElementUnit(i, unitIndex); // middle unit + lessThan[ltLength]=writeBranchSubNode(start, i, unitIndex, length/2); + ++ltLength; + // Continue for the greater-or-equal branch. + start=i; + length=length-length/2; + } + // For each unit, find its elements array start and whether it has a final value. + int32_t starts[kMaxBranchLinearSubNodeLength]; + UBool isFinal[kMaxBranchLinearSubNodeLength-1]; + int32_t unitNumber=0; + do { + int32_t i=starts[unitNumber]=start; + UChar unit=getElementUnit(i++, unitIndex); + i=indexOfElementWithNextUnit(i, unitIndex, unit); + isFinal[unitNumber]= start==i-1 && unitIndex+1==getElementStringLength(start); + start=i; + } while(++unitNumber<length-1); + // unitNumber==length-1, and the maxUnit elements range is [start..limit[ + starts[unitNumber]=start; + + // Write the sub-nodes in reverse order: The jump lengths are deltas from + // after their own positions, so if we wrote the minUnit sub-node first, + // then its jump delta would be larger. + // Instead we write the minUnit sub-node last, for a shorter delta. + int32_t jumpTargets[kMaxBranchLinearSubNodeLength-1]; + do { + --unitNumber; + if(!isFinal[unitNumber]) { + jumpTargets[unitNumber]=writeNode(starts[unitNumber], starts[unitNumber+1], unitIndex+1); + } + } while(unitNumber>0); + // The maxUnit sub-node is written as the very last one because we do + // not jump for it at all. + unitNumber=length-1; + writeNode(start, limit, unitIndex+1); + int32_t offset=write(getElementUnit(start, unitIndex)); + // Write the rest of this node's unit-value pairs. + while(--unitNumber>=0) { + start=starts[unitNumber]; + int32_t value; + if(isFinal[unitNumber]) { + // Write the final value for the one string ending with this unit. + value=getElementValue(start); + } else { + // Write the delta to the start position of the sub-node. + value=offset-jumpTargets[unitNumber]; + } + writeValueAndFinal(value, isFinal[unitNumber]); + offset=write(getElementUnit(start, unitIndex)); + } + // Write the split-branch nodes. + while(ltLength>0) { + --ltLength; + writeDeltaTo(lessThan[ltLength]); + offset=write(middleUnits[ltLength]); + } + return offset; +} + +// Requires start<limit, +// and all strings of the [start..limit[ elements must be sorted and +// have a common prefix of length unitIndex. +StringTrieBuilder::Node * +StringTrieBuilder::makeNode(int32_t start, int32_t limit, int32_t unitIndex, UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { + return NULL; + } + UBool hasValue=FALSE; + int32_t value=0; + if(unitIndex==getElementStringLength(start)) { + // An intermediate or final value. + value=getElementValue(start++); + if(start==limit) { + return registerFinalValue(value, errorCode); + } + hasValue=TRUE; + } + Node *node; + // Now all [start..limit[ strings are longer than unitIndex. + int32_t minUnit=getElementUnit(start, unitIndex); + int32_t maxUnit=getElementUnit(limit-1, unitIndex); + if(minUnit==maxUnit) { + // Linear-match node: All strings have the same character at unitIndex. + int32_t lastUnitIndex=getLimitOfLinearMatch(start, limit-1, unitIndex); + Node *nextNode=makeNode(start, limit, lastUnitIndex, errorCode); + // Break the linear-match sequence into chunks of at most kMaxLinearMatchLength. + int32_t length=lastUnitIndex-unitIndex; + int32_t maxLinearMatchLength=getMaxLinearMatchLength(); + while(length>maxLinearMatchLength) { + lastUnitIndex-=maxLinearMatchLength; + length-=maxLinearMatchLength; + node=createLinearMatchNode(start, lastUnitIndex, maxLinearMatchLength, nextNode); + nextNode=registerNode(node, errorCode); + } + node=createLinearMatchNode(start, unitIndex, length, nextNode); + } else { + // Branch node. + int32_t length=countElementUnits(start, limit, unitIndex); + // length>=2 because minUnit!=maxUnit. + Node *subNode=makeBranchSubNode(start, limit, unitIndex, length, errorCode); + node=new BranchHeadNode(length, subNode); + } + if(hasValue && node!=NULL) { + if(matchNodesCanHaveValues()) { + ((ValueNode *)node)->setValue(value); + } else { + node=new IntermediateValueNode(value, registerNode(node, errorCode)); + } + } + return registerNode(node, errorCode); +} + +// start<limit && all strings longer than unitIndex && +// length different units at unitIndex +StringTrieBuilder::Node * +StringTrieBuilder::makeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex, + int32_t length, UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { + return NULL; + } + UChar middleUnits[kMaxSplitBranchLevels]; + Node *lessThan[kMaxSplitBranchLevels]; + int32_t ltLength=0; + while(length>getMaxBranchLinearSubNodeLength()) { + // Branch on the middle unit. + // First, find the middle unit. + int32_t i=skipElementsBySomeUnits(start, unitIndex, length/2); + // Create the less-than branch. + middleUnits[ltLength]=getElementUnit(i, unitIndex); // middle unit + lessThan[ltLength]=makeBranchSubNode(start, i, unitIndex, length/2, errorCode); + ++ltLength; + // Continue for the greater-or-equal branch. + start=i; + length=length-length/2; + } + if(U_FAILURE(errorCode)) { + return NULL; + } + ListBranchNode *listNode=new ListBranchNode(); + if(listNode==NULL) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + // For each unit, find its elements array start and whether it has a final value. + int32_t unitNumber=0; + do { + int32_t i=start; + UChar unit=getElementUnit(i++, unitIndex); + i=indexOfElementWithNextUnit(i, unitIndex, unit); + if(start==i-1 && unitIndex+1==getElementStringLength(start)) { + listNode->add(unit, getElementValue(start)); + } else { + listNode->add(unit, makeNode(start, i, unitIndex+1, errorCode)); + } + start=i; + } while(++unitNumber<length-1); + // unitNumber==length-1, and the maxUnit elements range is [start..limit[ + UChar unit=getElementUnit(start, unitIndex); + if(start==limit-1 && unitIndex+1==getElementStringLength(start)) { + listNode->add(unit, getElementValue(start)); + } else { + listNode->add(unit, makeNode(start, limit, unitIndex+1, errorCode)); + } + Node *node=registerNode(listNode, errorCode); + // Create the split-branch nodes. + while(ltLength>0) { + --ltLength; + node=registerNode( + new SplitBranchNode(middleUnits[ltLength], lessThan[ltLength], node), errorCode); + } + return node; +} + +StringTrieBuilder::Node * +StringTrieBuilder::registerNode(Node *newNode, UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { + delete newNode; + return NULL; + } + if(newNode==NULL) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + const UHashElement *old=uhash_find(nodes, newNode); + if(old!=NULL) { + delete newNode; + return (Node *)old->key.pointer; + } + // If uhash_puti() returns a non-zero value from an equivalent, previously + // registered node, then uhash_find() failed to find that and we will leak newNode. +#if U_DEBUG + int32_t oldValue= // Only in debug mode to avoid a compiler warning about unused oldValue. +#endif + uhash_puti(nodes, newNode, 1, &errorCode); + U_ASSERT(oldValue==0); + if(U_FAILURE(errorCode)) { + delete newNode; + return NULL; + } + return newNode; +} + +StringTrieBuilder::Node * +StringTrieBuilder::registerFinalValue(int32_t value, UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { + return NULL; + } + FinalValueNode key(value); + const UHashElement *old=uhash_find(nodes, &key); + if(old!=NULL) { + return (Node *)old->key.pointer; + } + Node *newNode=new FinalValueNode(value); + if(newNode==NULL) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + // If uhash_puti() returns a non-zero value from an equivalent, previously + // registered node, then uhash_find() failed to find that and we will leak newNode. +#if U_DEBUG + int32_t oldValue= // Only in debug mode to avoid a compiler warning about unused oldValue. +#endif + uhash_puti(nodes, newNode, 1, &errorCode); + U_ASSERT(oldValue==0); + if(U_FAILURE(errorCode)) { + delete newNode; + return NULL; + } + return newNode; +} + +int32_t +StringTrieBuilder::hashNode(const void *node) { + return ((const Node *)node)->hashCode(); +} + +UBool +StringTrieBuilder::equalNodes(const void *left, const void *right) { + return *(const Node *)left==*(const Node *)right; +} + +UBool +StringTrieBuilder::Node::operator==(const Node &other) const { + return this==&other || (typeid(*this)==typeid(other) && hash==other.hash); +} + +int32_t +StringTrieBuilder::Node::markRightEdgesFirst(int32_t edgeNumber) { + if(offset==0) { + offset=edgeNumber; + } + return edgeNumber; +} + +UBool +StringTrieBuilder::FinalValueNode::operator==(const Node &other) const { + if(this==&other) { + return TRUE; + } + if(!Node::operator==(other)) { + return FALSE; + } + const FinalValueNode &o=(const FinalValueNode &)other; + return value==o.value; +} + +void +StringTrieBuilder::FinalValueNode::write(StringTrieBuilder &builder) { + offset=builder.writeValueAndFinal(value, TRUE); +} + +UBool +StringTrieBuilder::ValueNode::operator==(const Node &other) const { + if(this==&other) { + return TRUE; + } + if(!Node::operator==(other)) { + return FALSE; + } + const ValueNode &o=(const ValueNode &)other; + return hasValue==o.hasValue && (!hasValue || value==o.value); +} + +UBool +StringTrieBuilder::IntermediateValueNode::operator==(const Node &other) const { + if(this==&other) { + return TRUE; + } + if(!ValueNode::operator==(other)) { + return FALSE; + } + const IntermediateValueNode &o=(const IntermediateValueNode &)other; + return next==o.next; +} + +int32_t +StringTrieBuilder::IntermediateValueNode::markRightEdgesFirst(int32_t edgeNumber) { + if(offset==0) { + offset=edgeNumber=next->markRightEdgesFirst(edgeNumber); + } + return edgeNumber; +} + +void +StringTrieBuilder::IntermediateValueNode::write(StringTrieBuilder &builder) { + next->write(builder); + offset=builder.writeValueAndFinal(value, FALSE); +} + +UBool +StringTrieBuilder::LinearMatchNode::operator==(const Node &other) const { + if(this==&other) { + return TRUE; + } + if(!ValueNode::operator==(other)) { + return FALSE; + } + const LinearMatchNode &o=(const LinearMatchNode &)other; + return length==o.length && next==o.next; +} + +int32_t +StringTrieBuilder::LinearMatchNode::markRightEdgesFirst(int32_t edgeNumber) { + if(offset==0) { + offset=edgeNumber=next->markRightEdgesFirst(edgeNumber); + } + return edgeNumber; +} + +UBool +StringTrieBuilder::ListBranchNode::operator==(const Node &other) const { + if(this==&other) { + return TRUE; + } + if(!Node::operator==(other)) { + return FALSE; + } + const ListBranchNode &o=(const ListBranchNode &)other; + for(int32_t i=0; i<length; ++i) { + if(units[i]!=o.units[i] || values[i]!=o.values[i] || equal[i]!=o.equal[i]) { + return FALSE; + } + } + return TRUE; +} + +int32_t +StringTrieBuilder::ListBranchNode::markRightEdgesFirst(int32_t edgeNumber) { + if(offset==0) { + firstEdgeNumber=edgeNumber; + int32_t step=0; + int32_t i=length; + do { + Node *edge=equal[--i]; + if(edge!=NULL) { + edgeNumber=edge->markRightEdgesFirst(edgeNumber-step); + } + // For all but the rightmost edge, decrement the edge number. + step=1; + } while(i>0); + offset=edgeNumber; + } + return edgeNumber; +} + +void +StringTrieBuilder::ListBranchNode::write(StringTrieBuilder &builder) { + // Write the sub-nodes in reverse order: The jump lengths are deltas from + // after their own positions, so if we wrote the minUnit sub-node first, + // then its jump delta would be larger. + // Instead we write the minUnit sub-node last, for a shorter delta. + int32_t unitNumber=length-1; + Node *rightEdge=equal[unitNumber]; + int32_t rightEdgeNumber= rightEdge==NULL ? firstEdgeNumber : rightEdge->getOffset(); + do { + --unitNumber; + if(equal[unitNumber]!=NULL) { + equal[unitNumber]->writeUnlessInsideRightEdge(firstEdgeNumber, rightEdgeNumber, builder); + } + } while(unitNumber>0); + // The maxUnit sub-node is written as the very last one because we do + // not jump for it at all. + unitNumber=length-1; + if(rightEdge==NULL) { + builder.writeValueAndFinal(values[unitNumber], TRUE); + } else { + rightEdge->write(builder); + } + offset=builder.write(units[unitNumber]); + // Write the rest of this node's unit-value pairs. + while(--unitNumber>=0) { + int32_t value; + UBool isFinal; + if(equal[unitNumber]==NULL) { + // Write the final value for the one string ending with this unit. + value=values[unitNumber]; + isFinal=TRUE; + } else { + // Write the delta to the start position of the sub-node. + U_ASSERT(equal[unitNumber]->getOffset()>0); + value=offset-equal[unitNumber]->getOffset(); + isFinal=FALSE; + } + builder.writeValueAndFinal(value, isFinal); + offset=builder.write(units[unitNumber]); + } +} + +UBool +StringTrieBuilder::SplitBranchNode::operator==(const Node &other) const { + if(this==&other) { + return TRUE; + } + if(!Node::operator==(other)) { + return FALSE; + } + const SplitBranchNode &o=(const SplitBranchNode &)other; + return unit==o.unit && lessThan==o.lessThan && greaterOrEqual==o.greaterOrEqual; +} + +int32_t +StringTrieBuilder::SplitBranchNode::markRightEdgesFirst(int32_t edgeNumber) { + if(offset==0) { + firstEdgeNumber=edgeNumber; + edgeNumber=greaterOrEqual->markRightEdgesFirst(edgeNumber); + offset=edgeNumber=lessThan->markRightEdgesFirst(edgeNumber-1); + } + return edgeNumber; +} + +void +StringTrieBuilder::SplitBranchNode::write(StringTrieBuilder &builder) { + // Encode the less-than branch first. + lessThan->writeUnlessInsideRightEdge(firstEdgeNumber, greaterOrEqual->getOffset(), builder); + // Encode the greater-or-equal branch last because we do not jump for it at all. + greaterOrEqual->write(builder); + // Write this node. + U_ASSERT(lessThan->getOffset()>0); + builder.writeDeltaTo(lessThan->getOffset()); // less-than + offset=builder.write(unit); +} + +UBool +StringTrieBuilder::BranchHeadNode::operator==(const Node &other) const { + if(this==&other) { + return TRUE; + } + if(!ValueNode::operator==(other)) { + return FALSE; + } + const BranchHeadNode &o=(const BranchHeadNode &)other; + return length==o.length && next==o.next; +} + +int32_t +StringTrieBuilder::BranchHeadNode::markRightEdgesFirst(int32_t edgeNumber) { + if(offset==0) { + offset=edgeNumber=next->markRightEdgesFirst(edgeNumber); + } + return edgeNumber; +} + +void +StringTrieBuilder::BranchHeadNode::write(StringTrieBuilder &builder) { + next->write(builder); + if(length<=builder.getMinLinearMatch()) { + offset=builder.writeValueAndType(hasValue, value, length-1); + } else { + builder.write(length-1); + offset=builder.writeValueAndType(hasValue, value, 0); + } +} + +U_NAMESPACE_END diff --git a/thirdparty/icu4c/common/uarrsort.cpp b/thirdparty/icu4c/common/uarrsort.cpp new file mode 100644 index 0000000000..c17dbb2e2b --- /dev/null +++ b/thirdparty/icu4c/common/uarrsort.cpp @@ -0,0 +1,274 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2003-2013, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: uarrsort.c +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2003aug04 +* created by: Markus W. Scherer +* +* Internal function for sorting arrays. +*/ + +#include <cstddef> + +#include "unicode/utypes.h" +#include "cmemory.h" +#include "uarrsort.h" + +enum { + /** + * "from Knuth" + * + * A binary search over 8 items performs 4 comparisons: + * log2(8)=3 to subdivide, +1 to check for equality. + * A linear search over 8 items on average also performs 4 comparisons. + */ + MIN_QSORT=9, + STACK_ITEM_SIZE=200 +}; + +static constexpr int32_t sizeInMaxAlignTs(int32_t sizeInBytes) { + return (sizeInBytes + sizeof(std::max_align_t) - 1) / sizeof(std::max_align_t); +} + +/* UComparator convenience implementations ---------------------------------- */ + +U_CAPI int32_t U_EXPORT2 +uprv_uint16Comparator(const void *context, const void *left, const void *right) { + (void)context; + return (int32_t)*(const uint16_t *)left - (int32_t)*(const uint16_t *)right; +} + +U_CAPI int32_t U_EXPORT2 +uprv_int32Comparator(const void *context, const void *left, const void *right) { + (void)context; + return *(const int32_t *)left - *(const int32_t *)right; +} + +U_CAPI int32_t U_EXPORT2 +uprv_uint32Comparator(const void *context, const void *left, const void *right) { + (void)context; + uint32_t l=*(const uint32_t *)left, r=*(const uint32_t *)right; + + /* compare directly because (l-r) would overflow the int32_t result */ + if(l<r) { + return -1; + } else if(l==r) { + return 0; + } else /* l>r */ { + return 1; + } +} + +/* Insertion sort using binary search --------------------------------------- */ + +U_CAPI int32_t U_EXPORT2 +uprv_stableBinarySearch(char *array, int32_t limit, void *item, int32_t itemSize, + UComparator *cmp, const void *context) { + int32_t start=0; + UBool found=FALSE; + + /* Binary search until we get down to a tiny sub-array. */ + while((limit-start)>=MIN_QSORT) { + int32_t i=(start+limit)/2; + int32_t diff=cmp(context, item, array+i*itemSize); + if(diff==0) { + /* + * Found the item. We look for the *last* occurrence of such + * an item, for stable sorting. + * If we knew that there will be only few equal items, + * we could break now and enter the linear search. + * However, if there are many equal items, then it should be + * faster to continue with the binary search. + * It seems likely that we either have all unique items + * (where found will never become TRUE in the insertion sort) + * or potentially many duplicates. + */ + found=TRUE; + start=i+1; + } else if(diff<0) { + limit=i; + } else { + start=i; + } + } + + /* Linear search over the remaining tiny sub-array. */ + while(start<limit) { + int32_t diff=cmp(context, item, array+start*itemSize); + if(diff==0) { + found=TRUE; + } else if(diff<0) { + break; + } + ++start; + } + return found ? (start-1) : ~start; +} + +static void +doInsertionSort(char *array, int32_t length, int32_t itemSize, + UComparator *cmp, const void *context, void *pv) { + int32_t j; + + for(j=1; j<length; ++j) { + char *item=array+j*itemSize; + int32_t insertionPoint=uprv_stableBinarySearch(array, j, item, itemSize, cmp, context); + if(insertionPoint<0) { + insertionPoint=~insertionPoint; + } else { + ++insertionPoint; /* one past the last equal item */ + } + if(insertionPoint<j) { + char *dest=array+insertionPoint*itemSize; + uprv_memcpy(pv, item, itemSize); /* v=array[j] */ + uprv_memmove(dest+itemSize, dest, (j-insertionPoint)*(size_t)itemSize); + uprv_memcpy(dest, pv, itemSize); /* array[insertionPoint]=v */ + } + } +} + +static void +insertionSort(char *array, int32_t length, int32_t itemSize, + UComparator *cmp, const void *context, UErrorCode *pErrorCode) { + + icu::MaybeStackArray<std::max_align_t, sizeInMaxAlignTs(STACK_ITEM_SIZE)> v; + if (sizeInMaxAlignTs(itemSize) > v.getCapacity() && + v.resize(sizeInMaxAlignTs(itemSize)) == nullptr) { + *pErrorCode = U_MEMORY_ALLOCATION_ERROR; + return; + } + + doInsertionSort(array, length, itemSize, cmp, context, v.getAlias()); +} + +/* QuickSort ---------------------------------------------------------------- */ + +/* + * This implementation is semi-recursive: + * It recurses for the smaller sub-array to shorten the recursion depth, + * and loops for the larger sub-array. + * + * Loosely after QuickSort algorithms in + * Niklaus Wirth + * Algorithmen und Datenstrukturen mit Modula-2 + * B.G. Teubner Stuttgart + * 4. Auflage 1986 + * ISBN 3-519-02260-5 + */ +static void +subQuickSort(char *array, int32_t start, int32_t limit, int32_t itemSize, + UComparator *cmp, const void *context, + void *px, void *pw) { + int32_t left, right; + + /* start and left are inclusive, limit and right are exclusive */ + do { + if((start+MIN_QSORT)>=limit) { + doInsertionSort(array+start*itemSize, limit-start, itemSize, cmp, context, px); + break; + } + + left=start; + right=limit; + + /* x=array[middle] */ + uprv_memcpy(px, array+(size_t)((start+limit)/2)*itemSize, itemSize); + + do { + while(/* array[left]<x */ + cmp(context, array+left*itemSize, px)<0 + ) { + ++left; + } + while(/* x<array[right-1] */ + cmp(context, px, array+(right-1)*itemSize)<0 + ) { + --right; + } + + /* swap array[left] and array[right-1] via w; ++left; --right */ + if(left<right) { + --right; + + if(left<right) { + uprv_memcpy(pw, array+(size_t)left*itemSize, itemSize); + uprv_memcpy(array+(size_t)left*itemSize, array+(size_t)right*itemSize, itemSize); + uprv_memcpy(array+(size_t)right*itemSize, pw, itemSize); + } + + ++left; + } + } while(left<right); + + /* sort sub-arrays */ + if((right-start)<(limit-left)) { + /* sort [start..right[ */ + if(start<(right-1)) { + subQuickSort(array, start, right, itemSize, cmp, context, px, pw); + } + + /* sort [left..limit[ */ + start=left; + } else { + /* sort [left..limit[ */ + if(left<(limit-1)) { + subQuickSort(array, left, limit, itemSize, cmp, context, px, pw); + } + + /* sort [start..right[ */ + limit=right; + } + } while(start<(limit-1)); +} + +static void +quickSort(char *array, int32_t length, int32_t itemSize, + UComparator *cmp, const void *context, UErrorCode *pErrorCode) { + /* allocate two intermediate item variables (x and w) */ + icu::MaybeStackArray<std::max_align_t, sizeInMaxAlignTs(STACK_ITEM_SIZE) * 2> xw; + if(sizeInMaxAlignTs(itemSize)*2 > xw.getCapacity() && + xw.resize(sizeInMaxAlignTs(itemSize) * 2) == nullptr) { + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + return; + } + + subQuickSort(array, 0, length, itemSize, cmp, context, + xw.getAlias(), xw.getAlias() + sizeInMaxAlignTs(itemSize)); +} + +/* uprv_sortArray() API ----------------------------------------------------- */ + +/* + * Check arguments, select an appropriate implementation, + * cast the array to char * so that array+i*itemSize works. + */ +U_CAPI void U_EXPORT2 +uprv_sortArray(void *array, int32_t length, int32_t itemSize, + UComparator *cmp, const void *context, + UBool sortStable, UErrorCode *pErrorCode) { + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return; + } + if((length>0 && array==NULL) || length<0 || itemSize<=0 || cmp==NULL) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + if(length<=1) { + return; + } else if(length<MIN_QSORT || sortStable) { + insertionSort((char *)array, length, itemSize, cmp, context, pErrorCode); + } else { + quickSort((char *)array, length, itemSize, cmp, context, pErrorCode); + } +} diff --git a/thirdparty/icu4c/common/uarrsort.h b/thirdparty/icu4c/common/uarrsort.h new file mode 100644 index 0000000000..a55dca5b9e --- /dev/null +++ b/thirdparty/icu4c/common/uarrsort.h @@ -0,0 +1,103 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2003-2013, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: uarrsort.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2003aug04 +* created by: Markus W. Scherer +* +* Internal function for sorting arrays. +*/ + +#ifndef __UARRSORT_H__ +#define __UARRSORT_H__ + +#include "unicode/utypes.h" + +U_CDECL_BEGIN +/** + * Function type for comparing two items as part of sorting an array or similar. + * Callback function for uprv_sortArray(). + * + * @param context Application-specific pointer, passed through by uprv_sortArray(). + * @param left Pointer to the "left" item. + * @param right Pointer to the "right" item. + * @return 32-bit signed integer comparison result: + * <0 if left<right + * ==0 if left==right + * >0 if left>right + * + * @internal + */ +typedef int32_t U_CALLCONV +UComparator(const void *context, const void *left, const void *right); +U_CDECL_END + +/** + * Array sorting function. + * Uses a UComparator for comparing array items to each other, and simple + * memory copying to move items. + * + * @param array The array to be sorted. + * @param length The number of items in the array. + * @param itemSize The size in bytes of each array item. + * @param cmp UComparator function used to compare two items each. + * @param context Application-specific pointer, passed through to the UComparator. + * @param sortStable If true, a stable sorting algorithm must be used. + * @param pErrorCode ICU in/out UErrorCode parameter. + * + * @internal + */ +U_CAPI void U_EXPORT2 +uprv_sortArray(void *array, int32_t length, int32_t itemSize, + UComparator *cmp, const void *context, + UBool sortStable, UErrorCode *pErrorCode); + +/** + * Convenience UComparator implementation for uint16_t arrays. + * @internal + */ +U_CAPI int32_t U_EXPORT2 +uprv_uint16Comparator(const void *context, const void *left, const void *right); + +/** + * Convenience UComparator implementation for int32_t arrays. + * @internal + */ +U_CAPI int32_t U_EXPORT2 +uprv_int32Comparator(const void *context, const void *left, const void *right); + +/** + * Convenience UComparator implementation for uint32_t arrays. + * @internal + */ +U_CAPI int32_t U_EXPORT2 +uprv_uint32Comparator(const void *context, const void *left, const void *right); + +/** + * Much like Java Collections.binarySearch(list, key, comparator). + * + * Except: Java documents "If the list contains multiple elements equal to + * the specified object, there is no guarantee which one will be found." + * + * This version here will return the largest index of any equal item, + * for use in stable sorting. + * + * @return the index>=0 where the item was found: + * the largest such index, if multiple, for stable sorting; + * or the index<0 for inserting the item at ~index in sorted order + */ +U_CAPI int32_t U_EXPORT2 +uprv_stableBinarySearch(char *array, int32_t length, void *item, int32_t itemSize, + UComparator *cmp, const void *context); + +#endif diff --git a/thirdparty/icu4c/common/uassert.h b/thirdparty/icu4c/common/uassert.h new file mode 100644 index 0000000000..afd31eeffd --- /dev/null +++ b/thirdparty/icu4c/common/uassert.h @@ -0,0 +1,51 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 2002-2011, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* +* File uassert.h +* +* Contains the U_ASSERT and UPRV_UNREACHABLE macros +* +****************************************************************************** +*/ +#ifndef U_ASSERT_H +#define U_ASSERT_H + +/* utypes.h is included to get the proper define for uint8_t */ +#include "unicode/utypes.h" +/* for abort */ +#include <stdlib.h> + +/** + * \def U_ASSERT + * By default, U_ASSERT just wraps the C library assert macro. + * By changing the definition here, the assert behavior for ICU can be changed + * without affecting other non - ICU uses of the C library assert(). +*/ +#if U_DEBUG +# include <assert.h> +# define U_ASSERT(exp) assert(exp) +#elif U_CPLUSPLUS_VERSION +# define U_ASSERT(exp) (void)0 +#else +# define U_ASSERT(exp) +#endif + +/** + * \def UPRV_UNREACHABLE + * This macro is used to unconditionally abort if unreachable code is ever executed. + * @internal +*/ +#if defined(UPRV_UNREACHABLE) + // Use the predefined value. +#else +# define UPRV_UNREACHABLE abort() +#endif + +#endif diff --git a/thirdparty/icu4c/common/ubidi.cpp b/thirdparty/icu4c/common/ubidi.cpp new file mode 100644 index 0000000000..3ddb45721e --- /dev/null +++ b/thirdparty/icu4c/common/ubidi.cpp @@ -0,0 +1,3036 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1999-2015, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* file name: ubidi.c +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 1999jul27 +* created by: Markus W. Scherer, updated by Matitiahu Allouche +* +*/ + +#include "cmemory.h" +#include "unicode/utypes.h" +#include "unicode/ustring.h" +#include "unicode/uchar.h" +#include "unicode/ubidi.h" +#include "unicode/utf16.h" +#include "ubidi_props.h" +#include "ubidiimp.h" +#include "uassert.h" + +/* + * General implementation notes: + * + * Throughout the implementation, there are comments like (W2) that refer to + * rules of the BiDi algorithm, in this example to the second rule of the + * resolution of weak types. + * + * For handling surrogate pairs, where two UChar's form one "abstract" (or UTF-32) + * character according to UTF-16, the second UChar gets the directional property of + * the entire character assigned, while the first one gets a BN, a boundary + * neutral, type, which is ignored by most of the algorithm according to + * rule (X9) and the implementation suggestions of the BiDi algorithm. + * + * Later, adjustWSLevels() will set the level for each BN to that of the + * following character (UChar), which results in surrogate pairs getting the + * same level on each of their surrogates. + * + * In a UTF-8 implementation, the same thing could be done: the last byte of + * a multi-byte sequence would get the "real" property, while all previous + * bytes of that sequence would get BN. + * + * It is not possible to assign all those parts of a character the same real + * property because this would fail in the resolution of weak types with rules + * that look at immediately surrounding types. + * + * As a related topic, this implementation does not remove Boundary Neutral + * types from the input, but ignores them wherever this is relevant. + * For example, the loop for the resolution of the weak types reads + * types until it finds a non-BN. + * Also, explicit embedding codes are neither changed into BN nor removed. + * They are only treated the same way real BNs are. + * As stated before, adjustWSLevels() takes care of them at the end. + * For the purpose of conformance, the levels of all these codes + * do not matter. + * + * Note that this implementation modifies the dirProps + * after the initial setup, when applying X5c (replace FSI by LRI or RLI), + * X6, N0 (replace paired brackets by L or R). + * + * In this implementation, the resolution of weak types (W1 to W6), + * neutrals (N1 and N2), and the assignment of the resolved level (In) + * are all done in one single loop, in resolveImplicitLevels(). + * Changes of dirProp values are done on the fly, without writing + * them back to the dirProps array. + * + * + * This implementation contains code that allows to bypass steps of the + * algorithm that are not needed on the specific paragraph + * in order to speed up the most common cases considerably, + * like text that is entirely LTR, or RTL text without numbers. + * + * Most of this is done by setting a bit for each directional property + * in a flags variable and later checking for whether there are + * any LTR characters or any RTL characters, or both, whether + * there are any explicit embedding codes, etc. + * + * If the (Xn) steps are performed, then the flags are re-evaluated, + * because they will then not contain the embedding codes any more + * and will be adjusted for override codes, so that subsequently + * more bypassing may be possible than what the initial flags suggested. + * + * If the text is not mixed-directional, then the + * algorithm steps for the weak type resolution are not performed, + * and all levels are set to the paragraph level. + * + * If there are no explicit embedding codes, then the (Xn) steps + * are not performed. + * + * If embedding levels are supplied as a parameter, then all + * explicit embedding codes are ignored, and the (Xn) steps + * are not performed. + * + * White Space types could get the level of the run they belong to, + * and are checked with a test of (flags&MASK_EMBEDDING) to + * consider if the paragraph direction should be considered in + * the flags variable. + * + * If there are no White Space types in the paragraph, then + * (L1) is not necessary in adjustWSLevels(). + */ + +/* to avoid some conditional statements, use tiny constant arrays */ +static const Flags flagLR[2]={ DIRPROP_FLAG(L), DIRPROP_FLAG(R) }; +static const Flags flagE[2]={ DIRPROP_FLAG(LRE), DIRPROP_FLAG(RLE) }; +static const Flags flagO[2]={ DIRPROP_FLAG(LRO), DIRPROP_FLAG(RLO) }; + +#define DIRPROP_FLAG_LR(level) flagLR[(level)&1] +#define DIRPROP_FLAG_E(level) flagE[(level)&1] +#define DIRPROP_FLAG_O(level) flagO[(level)&1] + +#define DIR_FROM_STRONG(strong) ((strong)==L ? L : R) + +#define NO_OVERRIDE(level) ((level)&~UBIDI_LEVEL_OVERRIDE) + +/* UBiDi object management -------------------------------------------------- */ + +U_CAPI UBiDi * U_EXPORT2 +ubidi_open(void) +{ + UErrorCode errorCode=U_ZERO_ERROR; + return ubidi_openSized(0, 0, &errorCode); +} + +U_CAPI UBiDi * U_EXPORT2 +ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode) { + UBiDi *pBiDi; + + /* check the argument values */ + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return NULL; + } else if(maxLength<0 || maxRunCount<0) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return NULL; /* invalid arguments */ + } + + /* allocate memory for the object */ + pBiDi=(UBiDi *)uprv_malloc(sizeof(UBiDi)); + if(pBiDi==NULL) { + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + + /* reset the object, all pointers NULL, all flags FALSE, all sizes 0 */ + uprv_memset(pBiDi, 0, sizeof(UBiDi)); + + /* allocate memory for arrays as requested */ + if(maxLength>0) { + if( !getInitialDirPropsMemory(pBiDi, maxLength) || + !getInitialLevelsMemory(pBiDi, maxLength) + ) { + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + } + } else { + pBiDi->mayAllocateText=TRUE; + } + + if(maxRunCount>0) { + if(maxRunCount==1) { + /* use simpleRuns[] */ + pBiDi->runsSize=sizeof(Run); + } else if(!getInitialRunsMemory(pBiDi, maxRunCount)) { + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + } + } else { + pBiDi->mayAllocateRuns=TRUE; + } + + if(U_SUCCESS(*pErrorCode)) { + return pBiDi; + } else { + ubidi_close(pBiDi); + return NULL; + } +} + +/* + * We are allowed to allocate memory if memory==NULL or + * mayAllocate==TRUE for each array that we need. + * We also try to grow memory as needed if we + * allocate it. + * + * Assume sizeNeeded>0. + * If *pMemory!=NULL, then assume *pSize>0. + * + * ### this realloc() may unnecessarily copy the old data, + * which we know we don't need any more; + * is this the best way to do this?? + */ +U_CFUNC UBool +ubidi_getMemory(BidiMemoryForAllocation *bidiMem, int32_t *pSize, UBool mayAllocate, int32_t sizeNeeded) { + void **pMemory = (void **)bidiMem; + /* check for existing memory */ + if(*pMemory==NULL) { + /* we need to allocate memory */ + if(mayAllocate && (*pMemory=uprv_malloc(sizeNeeded))!=NULL) { + *pSize=sizeNeeded; + return TRUE; + } else { + return FALSE; + } + } else { + if(sizeNeeded<=*pSize) { + /* there is already enough memory */ + return TRUE; + } + else if(!mayAllocate) { + /* not enough memory, and we must not allocate */ + return FALSE; + } else { + /* we try to grow */ + void *memory; + /* in most cases, we do not need the copy-old-data part of + * realloc, but it is needed when adding runs using getRunsMemory() + * in setParaRunsOnly() + */ + if((memory=uprv_realloc(*pMemory, sizeNeeded))!=NULL) { + *pMemory=memory; + *pSize=sizeNeeded; + return TRUE; + } else { + /* we failed to grow */ + return FALSE; + } + } + } +} + +U_CAPI void U_EXPORT2 +ubidi_close(UBiDi *pBiDi) { + if(pBiDi!=NULL) { + pBiDi->pParaBiDi=NULL; /* in case one tries to reuse this block */ + if(pBiDi->dirPropsMemory!=NULL) { + uprv_free(pBiDi->dirPropsMemory); + } + if(pBiDi->levelsMemory!=NULL) { + uprv_free(pBiDi->levelsMemory); + } + if(pBiDi->openingsMemory!=NULL) { + uprv_free(pBiDi->openingsMemory); + } + if(pBiDi->parasMemory!=NULL) { + uprv_free(pBiDi->parasMemory); + } + if(pBiDi->runsMemory!=NULL) { + uprv_free(pBiDi->runsMemory); + } + if(pBiDi->isolatesMemory!=NULL) { + uprv_free(pBiDi->isolatesMemory); + } + if(pBiDi->insertPoints.points!=NULL) { + uprv_free(pBiDi->insertPoints.points); + } + + uprv_free(pBiDi); + } +} + +/* set to approximate "inverse BiDi" ---------------------------------------- */ + +U_CAPI void U_EXPORT2 +ubidi_setInverse(UBiDi *pBiDi, UBool isInverse) { + if(pBiDi!=NULL) { + pBiDi->isInverse=isInverse; + pBiDi->reorderingMode = isInverse ? UBIDI_REORDER_INVERSE_NUMBERS_AS_L + : UBIDI_REORDER_DEFAULT; + } +} + +U_CAPI UBool U_EXPORT2 +ubidi_isInverse(UBiDi *pBiDi) { + if(pBiDi!=NULL) { + return pBiDi->isInverse; + } else { + return FALSE; + } +} + +/* FOOD FOR THOUGHT: currently the reordering modes are a mixture of + * algorithm for direct BiDi, algorithm for inverse BiDi and the bizarre + * concept of RUNS_ONLY which is a double operation. + * It could be advantageous to divide this into 3 concepts: + * a) Operation: direct / inverse / RUNS_ONLY + * b) Direct algorithm: default / NUMBERS_SPECIAL / GROUP_NUMBERS_WITH_R + * c) Inverse algorithm: default / INVERSE_LIKE_DIRECT / NUMBERS_SPECIAL + * This would allow combinations not possible today like RUNS_ONLY with + * NUMBERS_SPECIAL. + * Also allow to set INSERT_MARKS for the direct step of RUNS_ONLY and + * REMOVE_CONTROLS for the inverse step. + * Not all combinations would be supported, and probably not all do make sense. + * This would need to document which ones are supported and what are the + * fallbacks for unsupported combinations. + */ +U_CAPI void U_EXPORT2 +ubidi_setReorderingMode(UBiDi *pBiDi, UBiDiReorderingMode reorderingMode) { + if ((pBiDi!=NULL) && (reorderingMode >= UBIDI_REORDER_DEFAULT) + && (reorderingMode < UBIDI_REORDER_COUNT)) { + pBiDi->reorderingMode = reorderingMode; + pBiDi->isInverse = (UBool)(reorderingMode == UBIDI_REORDER_INVERSE_NUMBERS_AS_L); + } +} + +U_CAPI UBiDiReorderingMode U_EXPORT2 +ubidi_getReorderingMode(UBiDi *pBiDi) { + if (pBiDi!=NULL) { + return pBiDi->reorderingMode; + } else { + return UBIDI_REORDER_DEFAULT; + } +} + +U_CAPI void U_EXPORT2 +ubidi_setReorderingOptions(UBiDi *pBiDi, uint32_t reorderingOptions) { + if (reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) { + reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS; + } + if (pBiDi!=NULL) { + pBiDi->reorderingOptions=reorderingOptions; + } +} + +U_CAPI uint32_t U_EXPORT2 +ubidi_getReorderingOptions(UBiDi *pBiDi) { + if (pBiDi!=NULL) { + return pBiDi->reorderingOptions; + } else { + return 0; + } +} + +U_CAPI UBiDiDirection U_EXPORT2 +ubidi_getBaseDirection(const UChar *text, +int32_t length){ + + int32_t i; + UChar32 uchar; + UCharDirection dir; + + if( text==NULL || length<-1 ){ + return UBIDI_NEUTRAL; + } + + if(length==-1) { + length=u_strlen(text); + } + + for( i = 0 ; i < length; ) { + /* i is incremented by U16_NEXT */ + U16_NEXT(text, i, length, uchar); + dir = u_charDirection(uchar); + if( dir == U_LEFT_TO_RIGHT ) + return UBIDI_LTR; + if( dir == U_RIGHT_TO_LEFT || dir ==U_RIGHT_TO_LEFT_ARABIC ) + return UBIDI_RTL; + } + return UBIDI_NEUTRAL; +} + +/* perform (P2)..(P3) ------------------------------------------------------- */ + +/** + * Returns the directionality of the first strong character + * after the last B in prologue, if any. + * Requires prologue!=null. + */ +static DirProp +firstL_R_AL(UBiDi *pBiDi) { + const UChar *text=pBiDi->prologue; + int32_t length=pBiDi->proLength; + int32_t i; + UChar32 uchar; + DirProp dirProp, result=ON; + for(i=0; i<length; ) { + /* i is incremented by U16_NEXT */ + U16_NEXT(text, i, length, uchar); + dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar); + if(result==ON) { + if(dirProp==L || dirProp==R || dirProp==AL) { + result=dirProp; + } + } else { + if(dirProp==B) { + result=ON; + } + } + } + return result; +} + +/* + * Check that there are enough entries in the array pointed to by pBiDi->paras + */ +static UBool +checkParaCount(UBiDi *pBiDi) { + int32_t count=pBiDi->paraCount; + if(pBiDi->paras==pBiDi->simpleParas) { + if(count<=SIMPLE_PARAS_COUNT) + return TRUE; + if(!getInitialParasMemory(pBiDi, SIMPLE_PARAS_COUNT * 2)) + return FALSE; + pBiDi->paras=pBiDi->parasMemory; + uprv_memcpy(pBiDi->parasMemory, pBiDi->simpleParas, SIMPLE_PARAS_COUNT * sizeof(Para)); + return TRUE; + } + if(!getInitialParasMemory(pBiDi, count * 2)) + return FALSE; + pBiDi->paras=pBiDi->parasMemory; + return TRUE; +} + +/* + * Get the directional properties for the text, calculate the flags bit-set, and + * determine the paragraph level if necessary (in pBiDi->paras[i].level). + * FSI initiators are also resolved and their dirProp replaced with LRI or RLI. + * When encountering an FSI, it is initially replaced with an LRI, which is the + * default. Only if a strong R or AL is found within its scope will the LRI be + * replaced by an RLI. + */ +static UBool +getDirProps(UBiDi *pBiDi) { + const UChar *text=pBiDi->text; + DirProp *dirProps=pBiDi->dirPropsMemory; /* pBiDi->dirProps is const */ + + int32_t i=0, originalLength=pBiDi->originalLength; + Flags flags=0; /* collect all directionalities in the text */ + UChar32 uchar; + DirProp dirProp=0, defaultParaLevel=0; /* initialize to avoid compiler warnings */ + UBool isDefaultLevel=IS_DEFAULT_LEVEL(pBiDi->paraLevel); + /* for inverse BiDi, the default para level is set to RTL if there is a + strong R or AL character at either end of the text */ + UBool isDefaultLevelInverse=isDefaultLevel && (UBool) + (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT || + pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL); + int32_t lastArabicPos=-1; + int32_t controlCount=0; + UBool removeBiDiControls = (UBool)(pBiDi->reorderingOptions & + UBIDI_OPTION_REMOVE_CONTROLS); + + enum State { + NOT_SEEKING_STRONG, /* 0: not contextual paraLevel, not after FSI */ + SEEKING_STRONG_FOR_PARA, /* 1: looking for first strong char in para */ + SEEKING_STRONG_FOR_FSI, /* 2: looking for first strong after FSI */ + LOOKING_FOR_PDI /* 3: found strong after FSI, looking for PDI */ + }; + State state; + DirProp lastStrong=ON; /* for default level & inverse BiDi */ + /* The following stacks are used to manage isolate sequences. Those + sequences may be nested, but obviously never more deeply than the + maximum explicit embedding level. + lastStack is the index of the last used entry in the stack. A value of -1 + means that there is no open isolate sequence. + lastStack is reset to -1 on paragraph boundaries. */ + /* The following stack contains the position of the initiator of + each open isolate sequence */ + int32_t isolateStartStack[UBIDI_MAX_EXPLICIT_LEVEL+1]; + /* The following stack contains the last known state before + encountering the initiator of an isolate sequence */ + State previousStateStack[UBIDI_MAX_EXPLICIT_LEVEL+1]; + int32_t stackLast=-1; + + if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) + pBiDi->length=0; + defaultParaLevel=pBiDi->paraLevel&1; + if(isDefaultLevel) { + pBiDi->paras[0].level=defaultParaLevel; + lastStrong=defaultParaLevel; + if(pBiDi->proLength>0 && /* there is a prologue */ + (dirProp=firstL_R_AL(pBiDi))!=ON) { /* with a strong character */ + if(dirProp==L) + pBiDi->paras[0].level=0; /* set the default para level */ + else + pBiDi->paras[0].level=1; /* set the default para level */ + state=NOT_SEEKING_STRONG; + } else { + state=SEEKING_STRONG_FOR_PARA; + } + } else { + pBiDi->paras[0].level=pBiDi->paraLevel; + state=NOT_SEEKING_STRONG; + } + /* count paragraphs and determine the paragraph level (P2..P3) */ + /* + * see comment in ubidi.h: + * the UBIDI_DEFAULT_XXX values are designed so that + * their bit 0 alone yields the intended default + */ + for( /* i=0 above */ ; i<originalLength; ) { + /* i is incremented by U16_NEXT */ + U16_NEXT(text, i, originalLength, uchar); + flags|=DIRPROP_FLAG(dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar)); + dirProps[i-1]=dirProp; + if(uchar>0xffff) { /* set the lead surrogate's property to BN */ + flags|=DIRPROP_FLAG(BN); + dirProps[i-2]=BN; + } + if(removeBiDiControls && IS_BIDI_CONTROL_CHAR(uchar)) + controlCount++; + if(dirProp==L) { + if(state==SEEKING_STRONG_FOR_PARA) { + pBiDi->paras[pBiDi->paraCount-1].level=0; + state=NOT_SEEKING_STRONG; + } + else if(state==SEEKING_STRONG_FOR_FSI) { + if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) { + /* no need for next statement, already set by default */ + /* dirProps[isolateStartStack[stackLast]]=LRI; */ + flags|=DIRPROP_FLAG(LRI); + } + state=LOOKING_FOR_PDI; + } + lastStrong=L; + continue; + } + if(dirProp==R || dirProp==AL) { + if(state==SEEKING_STRONG_FOR_PARA) { + pBiDi->paras[pBiDi->paraCount-1].level=1; + state=NOT_SEEKING_STRONG; + } + else if(state==SEEKING_STRONG_FOR_FSI) { + if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) { + dirProps[isolateStartStack[stackLast]]=RLI; + flags|=DIRPROP_FLAG(RLI); + } + state=LOOKING_FOR_PDI; + } + lastStrong=R; + if(dirProp==AL) + lastArabicPos=i-1; + continue; + } + if(dirProp>=FSI && dirProp<=RLI) { /* FSI, LRI or RLI */ + stackLast++; + if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) { + isolateStartStack[stackLast]=i-1; + previousStateStack[stackLast]=state; + } + if(dirProp==FSI) { + dirProps[i-1]=LRI; /* default if no strong char */ + state=SEEKING_STRONG_FOR_FSI; + } + else + state=LOOKING_FOR_PDI; + continue; + } + if(dirProp==PDI) { + if(state==SEEKING_STRONG_FOR_FSI) { + if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) { + /* no need for next statement, already set by default */ + /* dirProps[isolateStartStack[stackLast]]=LRI; */ + flags|=DIRPROP_FLAG(LRI); + } + } + if(stackLast>=0) { + if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) + state=previousStateStack[stackLast]; + stackLast--; + } + continue; + } + if(dirProp==B) { + if(i<originalLength && uchar==CR && text[i]==LF) /* do nothing on the CR */ + continue; + pBiDi->paras[pBiDi->paraCount-1].limit=i; + if(isDefaultLevelInverse && lastStrong==R) + pBiDi->paras[pBiDi->paraCount-1].level=1; + if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) { + /* When streaming, we only process whole paragraphs + thus some updates are only done on paragraph boundaries */ + pBiDi->length=i; /* i is index to next character */ + pBiDi->controlCount=controlCount; + } + if(i<originalLength) { /* B not last char in text */ + pBiDi->paraCount++; + if(checkParaCount(pBiDi)==FALSE) /* not enough memory for a new para entry */ + return FALSE; + if(isDefaultLevel) { + pBiDi->paras[pBiDi->paraCount-1].level=defaultParaLevel; + state=SEEKING_STRONG_FOR_PARA; + lastStrong=defaultParaLevel; + } else { + pBiDi->paras[pBiDi->paraCount-1].level=pBiDi->paraLevel; + state=NOT_SEEKING_STRONG; + } + stackLast=-1; + } + continue; + } + } + /* Ignore still open isolate sequences with overflow */ + if(stackLast>UBIDI_MAX_EXPLICIT_LEVEL) { + stackLast=UBIDI_MAX_EXPLICIT_LEVEL; + state=SEEKING_STRONG_FOR_FSI; /* to be on the safe side */ + } + /* Resolve direction of still unresolved open FSI sequences */ + while(stackLast>=0) { + if(state==SEEKING_STRONG_FOR_FSI) { + /* no need for next statement, already set by default */ + /* dirProps[isolateStartStack[stackLast]]=LRI; */ + flags|=DIRPROP_FLAG(LRI); + break; + } + state=previousStateStack[stackLast]; + stackLast--; + } + /* When streaming, ignore text after the last paragraph separator */ + if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) { + if(pBiDi->length<originalLength) + pBiDi->paraCount--; + } else { + pBiDi->paras[pBiDi->paraCount-1].limit=originalLength; + pBiDi->controlCount=controlCount; + } + /* For inverse bidi, default para direction is RTL if there is + a strong R or AL at either end of the paragraph */ + if(isDefaultLevelInverse && lastStrong==R) { + pBiDi->paras[pBiDi->paraCount-1].level=1; + } + if(isDefaultLevel) { + pBiDi->paraLevel=static_cast<UBiDiLevel>(pBiDi->paras[0].level); + } + /* The following is needed to resolve the text direction for default level + paragraphs containing no strong character */ + for(i=0; i<pBiDi->paraCount; i++) + flags|=DIRPROP_FLAG_LR(pBiDi->paras[i].level); + + if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) { + flags|=DIRPROP_FLAG(L); + } + pBiDi->flags=flags; + pBiDi->lastArabicPos=lastArabicPos; + return TRUE; +} + +/* determine the paragraph level at position index */ +U_CFUNC UBiDiLevel +ubidi_getParaLevelAtIndex(const UBiDi *pBiDi, int32_t pindex) { + int32_t i; + for(i=0; i<pBiDi->paraCount; i++) + if(pindex<pBiDi->paras[i].limit) + break; + if(i>=pBiDi->paraCount) + i=pBiDi->paraCount-1; + return (UBiDiLevel)(pBiDi->paras[i].level); +} + +/* Functions for handling paired brackets ----------------------------------- */ + +/* In the isoRuns array, the first entry is used for text outside of any + isolate sequence. Higher entries are used for each more deeply nested + isolate sequence. isoRunLast is the index of the last used entry. The + openings array is used to note the data of opening brackets not yet + matched by a closing bracket, or matched but still susceptible to change + level. + Each isoRun entry contains the index of the first and + one-after-last openings entries for pending opening brackets it + contains. The next openings entry to use is the one-after-last of the + most deeply nested isoRun entry. + isoRun entries also contain their current embedding level and the last + encountered strong character, since these will be needed to resolve + the level of paired brackets. */ + +static void +bracketInit(UBiDi *pBiDi, BracketData *bd) { + bd->pBiDi=pBiDi; + bd->isoRunLast=0; + bd->isoRuns[0].start=0; + bd->isoRuns[0].limit=0; + bd->isoRuns[0].level=GET_PARALEVEL(pBiDi, 0); + UBiDiLevel t = GET_PARALEVEL(pBiDi, 0) & 1; + bd->isoRuns[0].lastStrong = bd->isoRuns[0].lastBase = t; + bd->isoRuns[0].contextDir = (UBiDiDirection)t; + bd->isoRuns[0].contextPos=0; + if(pBiDi->openingsMemory) { + bd->openings=pBiDi->openingsMemory; + bd->openingsCount=pBiDi->openingsSize / sizeof(Opening); + } else { + bd->openings=bd->simpleOpenings; + bd->openingsCount=SIMPLE_OPENINGS_COUNT; + } + bd->isNumbersSpecial=bd->pBiDi->reorderingMode==UBIDI_REORDER_NUMBERS_SPECIAL || + bd->pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL; +} + +/* paragraph boundary */ +static void +bracketProcessB(BracketData *bd, UBiDiLevel level) { + bd->isoRunLast=0; + bd->isoRuns[0].limit=0; + bd->isoRuns[0].level=level; + bd->isoRuns[0].lastStrong=bd->isoRuns[0].lastBase=level&1; + bd->isoRuns[0].contextDir=(UBiDiDirection)(level&1); + bd->isoRuns[0].contextPos=0; +} + +/* LRE, LRO, RLE, RLO, PDF */ +static void +bracketProcessBoundary(BracketData *bd, int32_t lastCcPos, + UBiDiLevel contextLevel, UBiDiLevel embeddingLevel) { + IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast]; + DirProp *dirProps=bd->pBiDi->dirProps; + if(DIRPROP_FLAG(dirProps[lastCcPos])&MASK_ISO) /* after an isolate */ + return; + if(NO_OVERRIDE(embeddingLevel)>NO_OVERRIDE(contextLevel)) /* not a PDF */ + contextLevel=embeddingLevel; + pLastIsoRun->limit=pLastIsoRun->start; + pLastIsoRun->level=embeddingLevel; + pLastIsoRun->lastStrong=pLastIsoRun->lastBase=contextLevel&1; + pLastIsoRun->contextDir=(UBiDiDirection)(contextLevel&1); + pLastIsoRun->contextPos=(UBiDiDirection)lastCcPos; +} + +/* LRI or RLI */ +static void +bracketProcessLRI_RLI(BracketData *bd, UBiDiLevel level) { + IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast]; + int16_t lastLimit; + pLastIsoRun->lastBase=ON; + lastLimit=pLastIsoRun->limit; + bd->isoRunLast++; + pLastIsoRun++; + pLastIsoRun->start=pLastIsoRun->limit=lastLimit; + pLastIsoRun->level=level; + pLastIsoRun->lastStrong=pLastIsoRun->lastBase=level&1; + pLastIsoRun->contextDir=(UBiDiDirection)(level&1); + pLastIsoRun->contextPos=0; +} + +/* PDI */ +static void +bracketProcessPDI(BracketData *bd) { + IsoRun *pLastIsoRun; + bd->isoRunLast--; + pLastIsoRun=&bd->isoRuns[bd->isoRunLast]; + pLastIsoRun->lastBase=ON; +} + +/* newly found opening bracket: create an openings entry */ +static UBool /* return TRUE if success */ +bracketAddOpening(BracketData *bd, UChar match, int32_t position) { + IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast]; + Opening *pOpening; + if(pLastIsoRun->limit>=bd->openingsCount) { /* no available new entry */ + UBiDi *pBiDi=bd->pBiDi; + if(!getInitialOpeningsMemory(pBiDi, pLastIsoRun->limit * 2)) + return FALSE; + if(bd->openings==bd->simpleOpenings) + uprv_memcpy(pBiDi->openingsMemory, bd->simpleOpenings, + SIMPLE_OPENINGS_COUNT * sizeof(Opening)); + bd->openings=pBiDi->openingsMemory; /* may have changed */ + bd->openingsCount=pBiDi->openingsSize / sizeof(Opening); + } + pOpening=&bd->openings[pLastIsoRun->limit]; + pOpening->position=position; + pOpening->match=match; + pOpening->contextDir=pLastIsoRun->contextDir; + pOpening->contextPos=pLastIsoRun->contextPos; + pOpening->flags=0; + pLastIsoRun->limit++; + return TRUE; +} + +/* change N0c1 to N0c2 when a preceding bracket is assigned the embedding level */ +static void +fixN0c(BracketData *bd, int32_t openingIndex, int32_t newPropPosition, DirProp newProp) { + /* This function calls itself recursively */ + IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast]; + Opening *qOpening; + DirProp *dirProps=bd->pBiDi->dirProps; + int32_t k, openingPosition, closingPosition; + for(k=openingIndex+1, qOpening=&bd->openings[k]; k<pLastIsoRun->limit; k++, qOpening++) { + if(qOpening->match>=0) /* not an N0c match */ + continue; + if(newPropPosition<qOpening->contextPos) + break; + if(newPropPosition>=qOpening->position) + continue; + if(newProp==qOpening->contextDir) + break; + openingPosition=qOpening->position; + dirProps[openingPosition]=newProp; + closingPosition=-(qOpening->match); + dirProps[closingPosition]=newProp; + qOpening->match=0; /* prevent further changes */ + fixN0c(bd, k, openingPosition, newProp); + fixN0c(bd, k, closingPosition, newProp); + } +} + +/* process closing bracket */ +static DirProp /* return L or R if N0b or N0c, ON if N0d */ +bracketProcessClosing(BracketData *bd, int32_t openIdx, int32_t position) { + IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast]; + Opening *pOpening, *qOpening; + UBiDiDirection direction; + UBool stable; + DirProp newProp; + pOpening=&bd->openings[openIdx]; + direction=(UBiDiDirection)(pLastIsoRun->level&1); + stable=TRUE; /* assume stable until proved otherwise */ + + /* The stable flag is set when brackets are paired and their + level is resolved and cannot be changed by what will be + found later in the source string. + An unstable match can occur only when applying N0c, where + the resolved level depends on the preceding context, and + this context may be affected by text occurring later. + Example: RTL paragraph containing: abc[(latin) HEBREW] + When the closing parenthesis is encountered, it appears + that N0c1 must be applied since 'abc' sets an opposite + direction context and both parentheses receive level 2. + However, when the closing square bracket is processed, + N0b applies because of 'HEBREW' being included within the + brackets, thus the square brackets are treated like R and + receive level 1. However, this changes the preceding + context of the opening parenthesis, and it now appears + that N0c2 must be applied to the parentheses rather than + N0c1. */ + + if((direction==0 && pOpening->flags&FOUND_L) || + (direction==1 && pOpening->flags&FOUND_R)) { /* N0b */ + newProp=static_cast<DirProp>(direction); + } + else if(pOpening->flags&(FOUND_L|FOUND_R)) { /* N0c */ + /* it is stable if there is no containing pair or in + conditions too complicated and not worth checking */ + stable=(openIdx==pLastIsoRun->start); + if(direction!=pOpening->contextDir) + newProp= static_cast<DirProp>(pOpening->contextDir); /* N0c1 */ + else + newProp= static_cast<DirProp>(direction); /* N0c2 */ + } else { + /* forget this and any brackets nested within this pair */ + pLastIsoRun->limit= static_cast<uint16_t>(openIdx); + return ON; /* N0d */ + } + bd->pBiDi->dirProps[pOpening->position]=newProp; + bd->pBiDi->dirProps[position]=newProp; + /* Update nested N0c pairs that may be affected */ + fixN0c(bd, openIdx, pOpening->position, newProp); + if(stable) { + pLastIsoRun->limit= static_cast<uint16_t>(openIdx); /* forget any brackets nested within this pair */ + /* remove lower located synonyms if any */ + while(pLastIsoRun->limit>pLastIsoRun->start && + bd->openings[pLastIsoRun->limit-1].position==pOpening->position) + pLastIsoRun->limit--; + } else { + int32_t k; + pOpening->match=-position; + /* neutralize lower located synonyms if any */ + k=openIdx-1; + while(k>=pLastIsoRun->start && + bd->openings[k].position==pOpening->position) + bd->openings[k--].match=0; + /* neutralize any unmatched opening between the current pair; + this will also neutralize higher located synonyms if any */ + for(k=openIdx+1; k<pLastIsoRun->limit; k++) { + qOpening=&bd->openings[k]; + if(qOpening->position>=position) + break; + if(qOpening->match>0) + qOpening->match=0; + } + } + return newProp; +} + +/* handle strong characters, digits and candidates for closing brackets */ +static UBool /* return TRUE if success */ +bracketProcessChar(BracketData *bd, int32_t position) { + IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast]; + DirProp *dirProps, dirProp, newProp; + UBiDiLevel level; + dirProps=bd->pBiDi->dirProps; + dirProp=dirProps[position]; + if(dirProp==ON) { + UChar c, match; + int32_t idx; + /* First see if it is a matching closing bracket. Hopefully, this is + more efficient than checking if it is a closing bracket at all */ + c=bd->pBiDi->text[position]; + for(idx=pLastIsoRun->limit-1; idx>=pLastIsoRun->start; idx--) { + if(bd->openings[idx].match!=c) + continue; + /* We have a match */ + newProp=bracketProcessClosing(bd, idx, position); + if(newProp==ON) { /* N0d */ + c=0; /* prevent handling as an opening */ + break; + } + pLastIsoRun->lastBase=ON; + pLastIsoRun->contextDir=(UBiDiDirection)newProp; + pLastIsoRun->contextPos=position; + level=bd->pBiDi->levels[position]; + if(level&UBIDI_LEVEL_OVERRIDE) { /* X4, X5 */ + uint16_t flag; + int32_t i; + newProp=level&1; + pLastIsoRun->lastStrong=newProp; + flag=DIRPROP_FLAG(newProp); + for(i=pLastIsoRun->start; i<idx; i++) + bd->openings[i].flags|=flag; + /* matching brackets are not overridden by LRO/RLO */ + bd->pBiDi->levels[position]&=~UBIDI_LEVEL_OVERRIDE; + } + /* matching brackets are not overridden by LRO/RLO */ + bd->pBiDi->levels[bd->openings[idx].position]&=~UBIDI_LEVEL_OVERRIDE; + return TRUE; + } + /* We get here only if the ON character is not a matching closing + bracket or it is a case of N0d */ + /* Now see if it is an opening bracket */ + if(c) + match= static_cast<UChar>(u_getBidiPairedBracket(c)); /* get the matching char */ + else + match=0; + if(match!=c && /* has a matching char */ + ubidi_getPairedBracketType(c)==U_BPT_OPEN) { /* opening bracket */ + /* special case: process synonyms + create an opening entry for each synonym */ + if(match==0x232A) { /* RIGHT-POINTING ANGLE BRACKET */ + if(!bracketAddOpening(bd, 0x3009, position)) + return FALSE; + } + else if(match==0x3009) { /* RIGHT ANGLE BRACKET */ + if(!bracketAddOpening(bd, 0x232A, position)) + return FALSE; + } + if(!bracketAddOpening(bd, match, position)) + return FALSE; + } + } + level=bd->pBiDi->levels[position]; + if(level&UBIDI_LEVEL_OVERRIDE) { /* X4, X5 */ + newProp=level&1; + if(dirProp!=S && dirProp!=WS && dirProp!=ON) + dirProps[position]=newProp; + pLastIsoRun->lastBase=newProp; + pLastIsoRun->lastStrong=newProp; + pLastIsoRun->contextDir=(UBiDiDirection)newProp; + pLastIsoRun->contextPos=position; + } + else if(dirProp<=R || dirProp==AL) { + newProp= static_cast<DirProp>(DIR_FROM_STRONG(dirProp)); + pLastIsoRun->lastBase=dirProp; + pLastIsoRun->lastStrong=dirProp; + pLastIsoRun->contextDir=(UBiDiDirection)newProp; + pLastIsoRun->contextPos=position; + } + else if(dirProp==EN) { + pLastIsoRun->lastBase=EN; + if(pLastIsoRun->lastStrong==L) { + newProp=L; /* W7 */ + if(!bd->isNumbersSpecial) + dirProps[position]=ENL; + pLastIsoRun->contextDir=(UBiDiDirection)L; + pLastIsoRun->contextPos=position; + } + else { + newProp=R; /* N0 */ + if(pLastIsoRun->lastStrong==AL) + dirProps[position]=AN; /* W2 */ + else + dirProps[position]=ENR; + pLastIsoRun->contextDir=(UBiDiDirection)R; + pLastIsoRun->contextPos=position; + } + } + else if(dirProp==AN) { + newProp=R; /* N0 */ + pLastIsoRun->lastBase=AN; + pLastIsoRun->contextDir=(UBiDiDirection)R; + pLastIsoRun->contextPos=position; + } + else if(dirProp==NSM) { + /* if the last real char was ON, change NSM to ON so that it + will stay ON even if the last real char is a bracket which + may be changed to L or R */ + newProp=pLastIsoRun->lastBase; + if(newProp==ON) + dirProps[position]=newProp; + } + else { + newProp=dirProp; + pLastIsoRun->lastBase=dirProp; + } + if(newProp<=R || newProp==AL) { + int32_t i; + uint16_t flag=DIRPROP_FLAG(DIR_FROM_STRONG(newProp)); + for(i=pLastIsoRun->start; i<pLastIsoRun->limit; i++) + if(position>bd->openings[i].position) + bd->openings[i].flags|=flag; + } + return TRUE; +} + +/* perform (X1)..(X9) ------------------------------------------------------- */ + +/* determine if the text is mixed-directional or single-directional */ +static UBiDiDirection +directionFromFlags(UBiDi *pBiDi) { + Flags flags=pBiDi->flags; + /* if the text contains AN and neutrals, then some neutrals may become RTL */ + if(!(flags&MASK_RTL || ((flags&DIRPROP_FLAG(AN)) && (flags&MASK_POSSIBLE_N)))) { + return UBIDI_LTR; + } else if(!(flags&MASK_LTR)) { + return UBIDI_RTL; + } else { + return UBIDI_MIXED; + } +} + +/* + * Resolve the explicit levels as specified by explicit embedding codes. + * Recalculate the flags to have them reflect the real properties + * after taking the explicit embeddings into account. + * + * The BiDi algorithm is designed to result in the same behavior whether embedding + * levels are externally specified (from "styled text", supposedly the preferred + * method) or set by explicit embedding codes (LRx, RLx, PDF, FSI, PDI) in the plain text. + * That is why (X9) instructs to remove all not-isolate explicit codes (and BN). + * However, in a real implementation, the removal of these codes and their index + * positions in the plain text is undesirable since it would result in + * reallocated, reindexed text. + * Instead, this implementation leaves the codes in there and just ignores them + * in the subsequent processing. + * In order to get the same reordering behavior, positions with a BN or a not-isolate + * explicit embedding code just get the same level assigned as the last "real" + * character. + * + * Some implementations, not this one, then overwrite some of these + * directionality properties at "real" same-level-run boundaries by + * L or R codes so that the resolution of weak types can be performed on the + * entire paragraph at once instead of having to parse it once more and + * perform that resolution on same-level-runs. + * This limits the scope of the implicit rules in effectively + * the same way as the run limits. + * + * Instead, this implementation does not modify these codes, except for + * paired brackets whose properties (ON) may be replaced by L or R. + * On one hand, the paragraph has to be scanned for same-level-runs, but + * on the other hand, this saves another loop to reset these codes, + * or saves making and modifying a copy of dirProps[]. + * + * + * Note that (Pn) and (Xn) changed significantly from version 4 of the BiDi algorithm. + * + * + * Handling the stack of explicit levels (Xn): + * + * With the BiDi stack of explicit levels, as pushed with each + * LRE, RLE, LRO, RLO, LRI, RLI and FSI and popped with each PDF and PDI, + * the explicit level must never exceed UBIDI_MAX_EXPLICIT_LEVEL. + * + * In order to have a correct push-pop semantics even in the case of overflows, + * overflow counters and a valid isolate counter are used as described in UAX#9 + * section 3.3.2 "Explicit Levels and Directions". + * + * This implementation assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd. + * + * Returns normally the direction; -1 if there was a memory shortage + * + */ +static UBiDiDirection +resolveExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) { + DirProp *dirProps=pBiDi->dirProps; + UBiDiLevel *levels=pBiDi->levels; + const UChar *text=pBiDi->text; + + int32_t i=0, length=pBiDi->length; + Flags flags=pBiDi->flags; /* collect all directionalities in the text */ + DirProp dirProp; + UBiDiLevel level=GET_PARALEVEL(pBiDi, 0); + UBiDiDirection direction; + pBiDi->isolateCount=0; + + if(U_FAILURE(*pErrorCode)) { return UBIDI_LTR; } + + /* determine if the text is mixed-directional or single-directional */ + direction=directionFromFlags(pBiDi); + + /* we may not need to resolve any explicit levels */ + if((direction!=UBIDI_MIXED)) { + /* not mixed directionality: levels don't matter - trailingWSStart will be 0 */ + return direction; + } + if(pBiDi->reorderingMode > UBIDI_REORDER_LAST_LOGICAL_TO_VISUAL) { + /* inverse BiDi: mixed, but all characters are at the same embedding level */ + /* set all levels to the paragraph level */ + int32_t paraIndex, start, limit; + for(paraIndex=0; paraIndex<pBiDi->paraCount; paraIndex++) { + if(paraIndex==0) + start=0; + else + start=pBiDi->paras[paraIndex-1].limit; + limit=pBiDi->paras[paraIndex].limit; + level= static_cast<UBiDiLevel>(pBiDi->paras[paraIndex].level); + for(i=start; i<limit; i++) + levels[i]=level; + } + return direction; /* no bracket matching for inverse BiDi */ + } + if(!(flags&(MASK_EXPLICIT|MASK_ISO))) { + /* no embeddings, set all levels to the paragraph level */ + /* we still have to perform bracket matching */ + int32_t paraIndex, start, limit; + BracketData bracketData; + bracketInit(pBiDi, &bracketData); + for(paraIndex=0; paraIndex<pBiDi->paraCount; paraIndex++) { + if(paraIndex==0) + start=0; + else + start=pBiDi->paras[paraIndex-1].limit; + limit=pBiDi->paras[paraIndex].limit; + level= static_cast<UBiDiLevel>(pBiDi->paras[paraIndex].level); + for(i=start; i<limit; i++) { + levels[i]=level; + dirProp=dirProps[i]; + if(dirProp==BN) + continue; + if(dirProp==B) { + if((i+1)<length) { + if(text[i]==CR && text[i+1]==LF) + continue; /* skip CR when followed by LF */ + bracketProcessB(&bracketData, level); + } + continue; + } + if(!bracketProcessChar(&bracketData, i)) { + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + return UBIDI_LTR; + } + } + } + return direction; + } + { + /* continue to perform (Xn) */ + + /* (X1) level is set for all codes, embeddingLevel keeps track of the push/pop operations */ + /* both variables may carry the UBIDI_LEVEL_OVERRIDE flag to indicate the override status */ + UBiDiLevel embeddingLevel=level, newLevel; + UBiDiLevel previousLevel=level; /* previous level for regular (not CC) characters */ + int32_t lastCcPos=0; /* index of last effective LRx,RLx, PDx */ + + /* The following stack remembers the embedding level and the ISOLATE flag of level runs. + stackLast points to its current entry. */ + uint16_t stack[UBIDI_MAX_EXPLICIT_LEVEL+2]; /* we never push anything >=UBIDI_MAX_EXPLICIT_LEVEL + but we need one more entry as base */ + uint32_t stackLast=0; + int32_t overflowIsolateCount=0; + int32_t overflowEmbeddingCount=0; + int32_t validIsolateCount=0; + BracketData bracketData; + bracketInit(pBiDi, &bracketData); + stack[0]=level; /* initialize base entry to para level, no override, no isolate */ + + /* recalculate the flags */ + flags=0; + + for(i=0; i<length; ++i) { + dirProp=dirProps[i]; + switch(dirProp) { + case LRE: + case RLE: + case LRO: + case RLO: + /* (X2, X3, X4, X5) */ + flags|=DIRPROP_FLAG(BN); + levels[i]=previousLevel; + if (dirProp==LRE || dirProp==LRO) + /* least greater even level */ + newLevel=(UBiDiLevel)((embeddingLevel+2)&~(UBIDI_LEVEL_OVERRIDE|1)); + else + /* least greater odd level */ + newLevel=(UBiDiLevel)((NO_OVERRIDE(embeddingLevel)+1)|1); + if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL && overflowIsolateCount==0 && + overflowEmbeddingCount==0) { + lastCcPos=i; + embeddingLevel=newLevel; + if(dirProp==LRO || dirProp==RLO) + embeddingLevel|=UBIDI_LEVEL_OVERRIDE; + stackLast++; + stack[stackLast]=embeddingLevel; + /* we don't need to set UBIDI_LEVEL_OVERRIDE off for LRE and RLE + since this has already been done for newLevel which is + the source for embeddingLevel. + */ + } else { + if(overflowIsolateCount==0) + overflowEmbeddingCount++; + } + break; + case PDF: + /* (X7) */ + flags|=DIRPROP_FLAG(BN); + levels[i]=previousLevel; + /* handle all the overflow cases first */ + if(overflowIsolateCount) { + break; + } + if(overflowEmbeddingCount) { + overflowEmbeddingCount--; + break; + } + if(stackLast>0 && stack[stackLast]<ISOLATE) { /* not an isolate entry */ + lastCcPos=i; + stackLast--; + embeddingLevel=(UBiDiLevel)stack[stackLast]; + } + break; + case LRI: + case RLI: + flags|=(DIRPROP_FLAG(ON)|DIRPROP_FLAG_LR(embeddingLevel)); + levels[i]=NO_OVERRIDE(embeddingLevel); + if(NO_OVERRIDE(embeddingLevel)!=NO_OVERRIDE(previousLevel)) { + bracketProcessBoundary(&bracketData, lastCcPos, + previousLevel, embeddingLevel); + flags|=DIRPROP_FLAG_MULTI_RUNS; + } + previousLevel=embeddingLevel; + /* (X5a, X5b) */ + if(dirProp==LRI) + /* least greater even level */ + newLevel=(UBiDiLevel)((embeddingLevel+2)&~(UBIDI_LEVEL_OVERRIDE|1)); + else + /* least greater odd level */ + newLevel=(UBiDiLevel)((NO_OVERRIDE(embeddingLevel)+1)|1); + if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL && overflowIsolateCount==0 && + overflowEmbeddingCount==0) { + flags|=DIRPROP_FLAG(dirProp); + lastCcPos=i; + validIsolateCount++; + if(validIsolateCount>pBiDi->isolateCount) + pBiDi->isolateCount=validIsolateCount; + embeddingLevel=newLevel; + /* we can increment stackLast without checking because newLevel + will exceed UBIDI_MAX_EXPLICIT_LEVEL before stackLast overflows */ + stackLast++; + stack[stackLast]=embeddingLevel+ISOLATE; + bracketProcessLRI_RLI(&bracketData, embeddingLevel); + } else { + /* make it WS so that it is handled by adjustWSLevels() */ + dirProps[i]=WS; + overflowIsolateCount++; + } + break; + case PDI: + if(NO_OVERRIDE(embeddingLevel)!=NO_OVERRIDE(previousLevel)) { + bracketProcessBoundary(&bracketData, lastCcPos, + previousLevel, embeddingLevel); + flags|=DIRPROP_FLAG_MULTI_RUNS; + } + /* (X6a) */ + if(overflowIsolateCount) { + overflowIsolateCount--; + /* make it WS so that it is handled by adjustWSLevels() */ + dirProps[i]=WS; + } + else if(validIsolateCount) { + flags|=DIRPROP_FLAG(PDI); + lastCcPos=i; + overflowEmbeddingCount=0; + while(stack[stackLast]<ISOLATE) /* pop embedding entries */ + stackLast--; /* until the last isolate entry */ + stackLast--; /* pop also the last isolate entry */ + validIsolateCount--; + bracketProcessPDI(&bracketData); + } else + /* make it WS so that it is handled by adjustWSLevels() */ + dirProps[i]=WS; + embeddingLevel=(UBiDiLevel)stack[stackLast]&~ISOLATE; + flags|=(DIRPROP_FLAG(ON)|DIRPROP_FLAG_LR(embeddingLevel)); + previousLevel=embeddingLevel; + levels[i]=NO_OVERRIDE(embeddingLevel); + break; + case B: + flags|=DIRPROP_FLAG(B); + levels[i]=GET_PARALEVEL(pBiDi, i); + if((i+1)<length) { + if(text[i]==CR && text[i+1]==LF) + break; /* skip CR when followed by LF */ + overflowEmbeddingCount=overflowIsolateCount=0; + validIsolateCount=0; + stackLast=0; + previousLevel=embeddingLevel=GET_PARALEVEL(pBiDi, i+1); + stack[0]=embeddingLevel; /* initialize base entry to para level, no override, no isolate */ + bracketProcessB(&bracketData, embeddingLevel); + } + break; + case BN: + /* BN, LRE, RLE, and PDF are supposed to be removed (X9) */ + /* they will get their levels set correctly in adjustWSLevels() */ + levels[i]=previousLevel; + flags|=DIRPROP_FLAG(BN); + break; + default: + /* all other types are normal characters and get the "real" level */ + if(NO_OVERRIDE(embeddingLevel)!=NO_OVERRIDE(previousLevel)) { + bracketProcessBoundary(&bracketData, lastCcPos, + previousLevel, embeddingLevel); + flags|=DIRPROP_FLAG_MULTI_RUNS; + if(embeddingLevel&UBIDI_LEVEL_OVERRIDE) + flags|=DIRPROP_FLAG_O(embeddingLevel); + else + flags|=DIRPROP_FLAG_E(embeddingLevel); + } + previousLevel=embeddingLevel; + levels[i]=embeddingLevel; + if(!bracketProcessChar(&bracketData, i)) + return (UBiDiDirection)-1; + /* the dirProp may have been changed in bracketProcessChar() */ + flags|=DIRPROP_FLAG(dirProps[i]); + break; + } + } + if(flags&MASK_EMBEDDING) + flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel); + if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) + flags|=DIRPROP_FLAG(L); + /* again, determine if the text is mixed-directional or single-directional */ + pBiDi->flags=flags; + direction=directionFromFlags(pBiDi); + } + return direction; +} + +/* + * Use a pre-specified embedding levels array: + * + * Adjust the directional properties for overrides (->LEVEL_OVERRIDE), + * ignore all explicit codes (X9), + * and check all the preset levels. + * + * Recalculate the flags to have them reflect the real properties + * after taking the explicit embeddings into account. + */ +static UBiDiDirection +checkExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) { + DirProp *dirProps=pBiDi->dirProps; + UBiDiLevel *levels=pBiDi->levels; + int32_t isolateCount=0; + + int32_t length=pBiDi->length; + Flags flags=0; /* collect all directionalities in the text */ + pBiDi->isolateCount=0; + + int32_t currentParaIndex = 0; + int32_t currentParaLimit = pBiDi->paras[0].limit; + int32_t currentParaLevel = pBiDi->paraLevel; + + for(int32_t i=0; i<length; ++i) { + UBiDiLevel level=levels[i]; + DirProp dirProp=dirProps[i]; + if(dirProp==LRI || dirProp==RLI) { + isolateCount++; + if(isolateCount>pBiDi->isolateCount) + pBiDi->isolateCount=isolateCount; + } + else if(dirProp==PDI) + isolateCount--; + else if(dirProp==B) + isolateCount=0; + + // optimized version of int32_t currentParaLevel = GET_PARALEVEL(pBiDi, i); + if (pBiDi->defaultParaLevel != 0 && + i == currentParaLimit && (currentParaIndex + 1) < pBiDi->paraCount) { + currentParaLevel = pBiDi->paras[++currentParaIndex].level; + currentParaLimit = pBiDi->paras[currentParaIndex].limit; + } + + UBiDiLevel overrideFlag = level & UBIDI_LEVEL_OVERRIDE; + level &= ~UBIDI_LEVEL_OVERRIDE; + if (level < currentParaLevel || UBIDI_MAX_EXPLICIT_LEVEL < level) { + if (level == 0) { + if (dirProp == B) { + // Paragraph separators are ok with explicit level 0. + // Prevents reordering of paragraphs. + } else { + // Treat explicit level 0 as a wildcard for the paragraph level. + // Avoid making the caller guess what the paragraph level would be. + level = (UBiDiLevel)currentParaLevel; + levels[i] = level | overrideFlag; + } + } else { + // 1 <= level < currentParaLevel or UBIDI_MAX_EXPLICIT_LEVEL < level + /* level out of bounds */ + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return UBIDI_LTR; + } + } + if (overrideFlag != 0) { + /* keep the override flag in levels[i] but adjust the flags */ + flags|=DIRPROP_FLAG_O(level); + } else { + /* set the flags */ + flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG(dirProp); + } + } + if(flags&MASK_EMBEDDING) + flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel); + /* determine if the text is mixed-directional or single-directional */ + pBiDi->flags=flags; + return directionFromFlags(pBiDi); +} + +/****************************************************************** + The Properties state machine table +******************************************************************* + + All table cells are 8 bits: + bits 0..4: next state + bits 5..7: action to perform (if > 0) + + Cells may be of format "n" where n represents the next state + (except for the rightmost column). + Cells may also be of format "s(x,y)" where x represents an action + to perform and y represents the next state. + +******************************************************************* + Definitions and type for properties state table +******************************************************************* +*/ +#define IMPTABPROPS_COLUMNS 16 +#define IMPTABPROPS_RES (IMPTABPROPS_COLUMNS - 1) +#define GET_STATEPROPS(cell) ((cell)&0x1f) +#define GET_ACTIONPROPS(cell) ((cell)>>5) +#define s(action, newState) ((uint8_t)(newState+(action<<5))) + +static const uint8_t groupProp[] = /* dirProp regrouped */ +{ +/* L R EN ES ET AN CS B S WS ON LRE LRO AL RLE RLO PDF NSM BN FSI LRI RLI PDI ENL ENR */ + 0, 1, 2, 7, 8, 3, 9, 6, 5, 4, 4, 10, 10, 12, 10, 10, 10, 11, 10, 4, 4, 4, 4, 13, 14 +}; +enum { DirProp_L=0, DirProp_R=1, DirProp_EN=2, DirProp_AN=3, DirProp_ON=4, DirProp_S=5, DirProp_B=6 }; /* reduced dirProp */ + +/****************************************************************** + + PROPERTIES STATE TABLE + + In table impTabProps, + - the ON column regroups ON and WS, FSI, RLI, LRI and PDI + - the BN column regroups BN, LRE, RLE, LRO, RLO, PDF + - the Res column is the reduced property assigned to a run + + Action 1: process current run1, init new run1 + 2: init new run2 + 3: process run1, process run2, init new run1 + 4: process run1, set run1=run2, init new run2 + + Notes: + 1) This table is used in resolveImplicitLevels(). + 2) This table triggers actions when there is a change in the Bidi + property of incoming characters (action 1). + 3) Most such property sequences are processed immediately (in + fact, passed to processPropertySeq(). + 4) However, numbers are assembled as one sequence. This means + that undefined situations (like CS following digits, until + it is known if the next char will be a digit) are held until + following chars define them. + Example: digits followed by CS, then comes another CS or ON; + the digits will be processed, then the CS assigned + as the start of an ON sequence (action 3). + 5) There are cases where more than one sequence must be + processed, for instance digits followed by CS followed by L: + the digits must be processed as one sequence, and the CS + must be processed as an ON sequence, all this before starting + assembling chars for the opening L sequence. + + +*/ +static const uint8_t impTabProps[][IMPTABPROPS_COLUMNS] = +{ +/* L , R , EN , AN , ON , S , B , ES , ET , CS , BN , NSM , AL , ENL , ENR , Res */ +/* 0 Init */ { 1 , 2 , 4 , 5 , 7 , 15 , 17 , 7 , 9 , 7 , 0 , 7 , 3 , 18 , 21 , DirProp_ON }, +/* 1 L */ { 1 , s(1,2), s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7), 1 , 1 , s(1,3),s(1,18),s(1,21), DirProp_L }, +/* 2 R */ { s(1,1), 2 , s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7), 2 , 2 , s(1,3),s(1,18),s(1,21), DirProp_R }, +/* 3 AL */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8),s(1,16),s(1,17), s(1,8), s(1,8), s(1,8), 3 , 3 , 3 ,s(1,18),s(1,21), DirProp_R }, +/* 4 EN */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,10), 11 ,s(2,10), 4 , 4 , s(1,3), 18 , 21 , DirProp_EN }, +/* 5 AN */ { s(1,1), s(1,2), s(1,4), 5 , s(1,7),s(1,15),s(1,17), s(1,7), s(1,9),s(2,12), 5 , 5 , s(1,3),s(1,18),s(1,21), DirProp_AN }, +/* 6 AL:EN/AN */ { s(1,1), s(1,2), 6 , 6 , s(1,8),s(1,16),s(1,17), s(1,8), s(1,8),s(2,13), 6 , 6 , s(1,3), 18 , 21 , DirProp_AN }, +/* 7 ON */ { s(1,1), s(1,2), s(1,4), s(1,5), 7 ,s(1,15),s(1,17), 7 ,s(2,14), 7 , 7 , 7 , s(1,3),s(1,18),s(1,21), DirProp_ON }, +/* 8 AL:ON */ { s(1,1), s(1,2), s(1,6), s(1,6), 8 ,s(1,16),s(1,17), 8 , 8 , 8 , 8 , 8 , s(1,3),s(1,18),s(1,21), DirProp_ON }, +/* 9 ET */ { s(1,1), s(1,2), 4 , s(1,5), 7 ,s(1,15),s(1,17), 7 , 9 , 7 , 9 , 9 , s(1,3), 18 , 21 , DirProp_ON }, +/*10 EN+ES/CS */ { s(3,1), s(3,2), 4 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 10 , s(4,7), s(3,3), 18 , 21 , DirProp_EN }, +/*11 EN+ET */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 11 , s(1,7), 11 , 11 , s(1,3), 18 , 21 , DirProp_EN }, +/*12 AN+CS */ { s(3,1), s(3,2), s(3,4), 5 , s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 12 , s(4,7), s(3,3),s(3,18),s(3,21), DirProp_AN }, +/*13 AL:EN/AN+CS */ { s(3,1), s(3,2), 6 , 6 , s(4,8),s(3,16),s(3,17), s(4,8), s(4,8), s(4,8), 13 , s(4,8), s(3,3), 18 , 21 , DirProp_AN }, +/*14 ON+ET */ { s(1,1), s(1,2), s(4,4), s(1,5), 7 ,s(1,15),s(1,17), 7 , 14 , 7 , 14 , 14 , s(1,3),s(4,18),s(4,21), DirProp_ON }, +/*15 S */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7), 15 ,s(1,17), s(1,7), s(1,9), s(1,7), 15 , s(1,7), s(1,3),s(1,18),s(1,21), DirProp_S }, +/*16 AL:S */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8), 16 ,s(1,17), s(1,8), s(1,8), s(1,8), 16 , s(1,8), s(1,3),s(1,18),s(1,21), DirProp_S }, +/*17 B */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7),s(1,15), 17 , s(1,7), s(1,9), s(1,7), 17 , s(1,7), s(1,3),s(1,18),s(1,21), DirProp_B }, +/*18 ENL */ { s(1,1), s(1,2), 18 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,19), 20 ,s(2,19), 18 , 18 , s(1,3), 18 , 21 , DirProp_L }, +/*19 ENL+ES/CS */ { s(3,1), s(3,2), 18 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 19 , s(4,7), s(3,3), 18 , 21 , DirProp_L }, +/*20 ENL+ET */ { s(1,1), s(1,2), 18 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 20 , s(1,7), 20 , 20 , s(1,3), 18 , 21 , DirProp_L }, +/*21 ENR */ { s(1,1), s(1,2), 21 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,22), 23 ,s(2,22), 21 , 21 , s(1,3), 18 , 21 , DirProp_AN }, +/*22 ENR+ES/CS */ { s(3,1), s(3,2), 21 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 22 , s(4,7), s(3,3), 18 , 21 , DirProp_AN }, +/*23 ENR+ET */ { s(1,1), s(1,2), 21 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 23 , s(1,7), 23 , 23 , s(1,3), 18 , 21 , DirProp_AN } +}; + +/* we must undef macro s because the levels tables have a different + * structure (4 bits for action and 4 bits for next state. + */ +#undef s + +/****************************************************************** + The levels state machine tables +******************************************************************* + + All table cells are 8 bits: + bits 0..3: next state + bits 4..7: action to perform (if > 0) + + Cells may be of format "n" where n represents the next state + (except for the rightmost column). + Cells may also be of format "s(x,y)" where x represents an action + to perform and y represents the next state. + + This format limits each table to 16 states each and to 15 actions. + +******************************************************************* + Definitions and type for levels state tables +******************************************************************* +*/ +#define IMPTABLEVELS_COLUMNS (DirProp_B + 2) +#define IMPTABLEVELS_RES (IMPTABLEVELS_COLUMNS - 1) +#define GET_STATE(cell) ((cell)&0x0f) +#define GET_ACTION(cell) ((cell)>>4) +#define s(action, newState) ((uint8_t)(newState+(action<<4))) + +typedef uint8_t ImpTab[][IMPTABLEVELS_COLUMNS]; +typedef uint8_t ImpAct[]; + +/* FOOD FOR THOUGHT: each ImpTab should have its associated ImpAct, + * instead of having a pair of ImpTab and a pair of ImpAct. + */ +typedef struct ImpTabPair { + const void * pImpTab[2]; + const void * pImpAct[2]; +} ImpTabPair; + +/****************************************************************** + + LEVELS STATE TABLES + + In all levels state tables, + - state 0 is the initial state + - the Res column is the increment to add to the text level + for this property sequence. + + The impAct arrays for each table of a pair map the local action + numbers of the table to the total list of actions. For instance, + action 2 in a given table corresponds to the action number which + appears in entry [2] of the impAct array for that table. + The first entry of all impAct arrays must be 0. + + Action 1: init conditional sequence + 2: prepend conditional sequence to current sequence + 3: set ON sequence to new level - 1 + 4: init EN/AN/ON sequence + 5: fix EN/AN/ON sequence followed by R + 6: set previous level sequence to level 2 + + Notes: + 1) These tables are used in processPropertySeq(). The input + is property sequences as determined by resolveImplicitLevels. + 2) Most such property sequences are processed immediately + (levels are assigned). + 3) However, some sequences cannot be assigned a final level till + one or more following sequences are received. For instance, + ON following an R sequence within an even-level paragraph. + If the following sequence is R, the ON sequence will be + assigned basic run level+1, and so will the R sequence. + 4) S is generally handled like ON, since its level will be fixed + to paragraph level in adjustWSLevels(). + +*/ + +static const ImpTab impTabL_DEFAULT = /* Even paragraph level */ +/* In this table, conditional sequences receive the lower possible level + until proven otherwise. +*/ +{ +/* L , R , EN , AN , ON , S , B , Res */ +/* 0 : init */ { 0 , 1 , 0 , 2 , 0 , 0 , 0 , 0 }, +/* 1 : R */ { 0 , 1 , 3 , 3 , s(1,4), s(1,4), 0 , 1 }, +/* 2 : AN */ { 0 , 1 , 0 , 2 , s(1,5), s(1,5), 0 , 2 }, +/* 3 : R+EN/AN */ { 0 , 1 , 3 , 3 , s(1,4), s(1,4), 0 , 2 }, +/* 4 : R+ON */ { 0 , s(2,1), s(3,3), s(3,3), 4 , 4 , 0 , 0 }, +/* 5 : AN+ON */ { 0 , s(2,1), 0 , s(3,2), 5 , 5 , 0 , 0 } +}; +static const ImpTab impTabR_DEFAULT = /* Odd paragraph level */ +/* In this table, conditional sequences receive the lower possible level + until proven otherwise. +*/ +{ +/* L , R , EN , AN , ON , S , B , Res */ +/* 0 : init */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 0 }, +/* 1 : L */ { 1 , 0 , 1 , 3 , s(1,4), s(1,4), 0 , 1 }, +/* 2 : EN/AN */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 1 }, +/* 3 : L+AN */ { 1 , 0 , 1 , 3 , 5 , 5 , 0 , 1 }, +/* 4 : L+ON */ { s(2,1), 0 , s(2,1), 3 , 4 , 4 , 0 , 0 }, +/* 5 : L+AN+ON */ { 1 , 0 , 1 , 3 , 5 , 5 , 0 , 0 } +}; +static const ImpAct impAct0 = {0,1,2,3,4}; +static const ImpTabPair impTab_DEFAULT = {{&impTabL_DEFAULT, + &impTabR_DEFAULT}, + {&impAct0, &impAct0}}; + +static const ImpTab impTabL_NUMBERS_SPECIAL = /* Even paragraph level */ +/* In this table, conditional sequences receive the lower possible level + until proven otherwise. +*/ +{ +/* L , R , EN , AN , ON , S , B , Res */ +/* 0 : init */ { 0 , 2 , s(1,1), s(1,1), 0 , 0 , 0 , 0 }, +/* 1 : L+EN/AN */ { 0 , s(4,2), 1 , 1 , 0 , 0 , 0 , 0 }, +/* 2 : R */ { 0 , 2 , 4 , 4 , s(1,3), s(1,3), 0 , 1 }, +/* 3 : R+ON */ { 0 , s(2,2), s(3,4), s(3,4), 3 , 3 , 0 , 0 }, +/* 4 : R+EN/AN */ { 0 , 2 , 4 , 4 , s(1,3), s(1,3), 0 , 2 } +}; +static const ImpTabPair impTab_NUMBERS_SPECIAL = {{&impTabL_NUMBERS_SPECIAL, + &impTabR_DEFAULT}, + {&impAct0, &impAct0}}; + +static const ImpTab impTabL_GROUP_NUMBERS_WITH_R = +/* In this table, EN/AN+ON sequences receive levels as if associated with R + until proven that there is L or sor/eor on both sides. AN is handled like EN. +*/ +{ +/* L , R , EN , AN , ON , S , B , Res */ +/* 0 init */ { 0 , 3 , s(1,1), s(1,1), 0 , 0 , 0 , 0 }, +/* 1 EN/AN */ { s(2,0), 3 , 1 , 1 , 2 , s(2,0), s(2,0), 2 }, +/* 2 EN/AN+ON */ { s(2,0), 3 , 1 , 1 , 2 , s(2,0), s(2,0), 1 }, +/* 3 R */ { 0 , 3 , 5 , 5 , s(1,4), 0 , 0 , 1 }, +/* 4 R+ON */ { s(2,0), 3 , 5 , 5 , 4 , s(2,0), s(2,0), 1 }, +/* 5 R+EN/AN */ { 0 , 3 , 5 , 5 , s(1,4), 0 , 0 , 2 } +}; +static const ImpTab impTabR_GROUP_NUMBERS_WITH_R = +/* In this table, EN/AN+ON sequences receive levels as if associated with R + until proven that there is L on both sides. AN is handled like EN. +*/ +{ +/* L , R , EN , AN , ON , S , B , Res */ +/* 0 init */ { 2 , 0 , 1 , 1 , 0 , 0 , 0 , 0 }, +/* 1 EN/AN */ { 2 , 0 , 1 , 1 , 0 , 0 , 0 , 1 }, +/* 2 L */ { 2 , 0 , s(1,4), s(1,4), s(1,3), 0 , 0 , 1 }, +/* 3 L+ON */ { s(2,2), 0 , 4 , 4 , 3 , 0 , 0 , 0 }, +/* 4 L+EN/AN */ { s(2,2), 0 , 4 , 4 , 3 , 0 , 0 , 1 } +}; +static const ImpTabPair impTab_GROUP_NUMBERS_WITH_R = { + {&impTabL_GROUP_NUMBERS_WITH_R, + &impTabR_GROUP_NUMBERS_WITH_R}, + {&impAct0, &impAct0}}; + + +static const ImpTab impTabL_INVERSE_NUMBERS_AS_L = +/* This table is identical to the Default LTR table except that EN and AN are + handled like L. +*/ +{ +/* L , R , EN , AN , ON , S , B , Res */ +/* 0 : init */ { 0 , 1 , 0 , 0 , 0 , 0 , 0 , 0 }, +/* 1 : R */ { 0 , 1 , 0 , 0 , s(1,4), s(1,4), 0 , 1 }, +/* 2 : AN */ { 0 , 1 , 0 , 0 , s(1,5), s(1,5), 0 , 2 }, +/* 3 : R+EN/AN */ { 0 , 1 , 0 , 0 , s(1,4), s(1,4), 0 , 2 }, +/* 4 : R+ON */ { s(2,0), 1 , s(2,0), s(2,0), 4 , 4 , s(2,0), 1 }, +/* 5 : AN+ON */ { s(2,0), 1 , s(2,0), s(2,0), 5 , 5 , s(2,0), 1 } +}; +static const ImpTab impTabR_INVERSE_NUMBERS_AS_L = +/* This table is identical to the Default RTL table except that EN and AN are + handled like L. +*/ +{ +/* L , R , EN , AN , ON , S , B , Res */ +/* 0 : init */ { 1 , 0 , 1 , 1 , 0 , 0 , 0 , 0 }, +/* 1 : L */ { 1 , 0 , 1 , 1 , s(1,4), s(1,4), 0 , 1 }, +/* 2 : EN/AN */ { 1 , 0 , 1 , 1 , 0 , 0 , 0 , 1 }, +/* 3 : L+AN */ { 1 , 0 , 1 , 1 , 5 , 5 , 0 , 1 }, +/* 4 : L+ON */ { s(2,1), 0 , s(2,1), s(2,1), 4 , 4 , 0 , 0 }, +/* 5 : L+AN+ON */ { 1 , 0 , 1 , 1 , 5 , 5 , 0 , 0 } +}; +static const ImpTabPair impTab_INVERSE_NUMBERS_AS_L = { + {&impTabL_INVERSE_NUMBERS_AS_L, + &impTabR_INVERSE_NUMBERS_AS_L}, + {&impAct0, &impAct0}}; + +static const ImpTab impTabR_INVERSE_LIKE_DIRECT = /* Odd paragraph level */ +/* In this table, conditional sequences receive the lower possible level + until proven otherwise. +*/ +{ +/* L , R , EN , AN , ON , S , B , Res */ +/* 0 : init */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 0 }, +/* 1 : L */ { 1 , 0 , 1 , 2 , s(1,3), s(1,3), 0 , 1 }, +/* 2 : EN/AN */ { 1 , 0 , 2 , 2 , 0 , 0 , 0 , 1 }, +/* 3 : L+ON */ { s(2,1), s(3,0), 6 , 4 , 3 , 3 , s(3,0), 0 }, +/* 4 : L+ON+AN */ { s(2,1), s(3,0), 6 , 4 , 5 , 5 , s(3,0), 3 }, +/* 5 : L+AN+ON */ { s(2,1), s(3,0), 6 , 4 , 5 , 5 , s(3,0), 2 }, +/* 6 : L+ON+EN */ { s(2,1), s(3,0), 6 , 4 , 3 , 3 , s(3,0), 1 } +}; +static const ImpAct impAct1 = {0,1,13,14}; +/* FOOD FOR THOUGHT: in LTR table below, check case "JKL 123abc" + */ +static const ImpTabPair impTab_INVERSE_LIKE_DIRECT = { + {&impTabL_DEFAULT, + &impTabR_INVERSE_LIKE_DIRECT}, + {&impAct0, &impAct1}}; + +static const ImpTab impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS = +/* The case handled in this table is (visually): R EN L +*/ +{ +/* L , R , EN , AN , ON , S , B , Res */ +/* 0 : init */ { 0 , s(6,3), 0 , 1 , 0 , 0 , 0 , 0 }, +/* 1 : L+AN */ { 0 , s(6,3), 0 , 1 , s(1,2), s(3,0), 0 , 4 }, +/* 2 : L+AN+ON */ { s(2,0), s(6,3), s(2,0), 1 , 2 , s(3,0), s(2,0), 3 }, +/* 3 : R */ { 0 , s(6,3), s(5,5), s(5,6), s(1,4), s(3,0), 0 , 3 }, +/* 4 : R+ON */ { s(3,0), s(4,3), s(5,5), s(5,6), 4 , s(3,0), s(3,0), 3 }, +/* 5 : R+EN */ { s(3,0), s(4,3), 5 , s(5,6), s(1,4), s(3,0), s(3,0), 4 }, +/* 6 : R+AN */ { s(3,0), s(4,3), s(5,5), 6 , s(1,4), s(3,0), s(3,0), 4 } +}; +static const ImpTab impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS = +/* The cases handled in this table are (visually): R EN L + R L AN L +*/ +{ +/* L , R , EN , AN , ON , S , B , Res */ +/* 0 : init */ { s(1,3), 0 , 1 , 1 , 0 , 0 , 0 , 0 }, +/* 1 : R+EN/AN */ { s(2,3), 0 , 1 , 1 , 2 , s(4,0), 0 , 1 }, +/* 2 : R+EN/AN+ON */ { s(2,3), 0 , 1 , 1 , 2 , s(4,0), 0 , 0 }, +/* 3 : L */ { 3 , 0 , 3 , s(3,6), s(1,4), s(4,0), 0 , 1 }, +/* 4 : L+ON */ { s(5,3), s(4,0), 5 , s(3,6), 4 , s(4,0), s(4,0), 0 }, +/* 5 : L+ON+EN */ { s(5,3), s(4,0), 5 , s(3,6), 4 , s(4,0), s(4,0), 1 }, +/* 6 : L+AN */ { s(5,3), s(4,0), 6 , 6 , 4 , s(4,0), s(4,0), 3 } +}; +static const ImpAct impAct2 = {0,1,2,5,6,7,8}; +static const ImpAct impAct3 = {0,1,9,10,11,12}; +static const ImpTabPair impTab_INVERSE_LIKE_DIRECT_WITH_MARKS = { + {&impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS, + &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS}, + {&impAct2, &impAct3}}; + +static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL = { + {&impTabL_NUMBERS_SPECIAL, + &impTabR_INVERSE_LIKE_DIRECT}, + {&impAct0, &impAct1}}; + +static const ImpTab impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = +/* The case handled in this table is (visually): R EN L +*/ +{ +/* L , R , EN , AN , ON , S , B , Res */ +/* 0 : init */ { 0 , s(6,2), 1 , 1 , 0 , 0 , 0 , 0 }, +/* 1 : L+EN/AN */ { 0 , s(6,2), 1 , 1 , 0 , s(3,0), 0 , 4 }, +/* 2 : R */ { 0 , s(6,2), s(5,4), s(5,4), s(1,3), s(3,0), 0 , 3 }, +/* 3 : R+ON */ { s(3,0), s(4,2), s(5,4), s(5,4), 3 , s(3,0), s(3,0), 3 }, +/* 4 : R+EN/AN */ { s(3,0), s(4,2), 4 , 4 , s(1,3), s(3,0), s(3,0), 4 } +}; +static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = { + {&impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS, + &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS}, + {&impAct2, &impAct3}}; + +#undef s + +typedef struct { + const ImpTab * pImpTab; /* level table pointer */ + const ImpAct * pImpAct; /* action map array */ + int32_t startON; /* start of ON sequence */ + int32_t startL2EN; /* start of level 2 sequence */ + int32_t lastStrongRTL; /* index of last found R or AL */ + int32_t state; /* current state */ + int32_t runStart; /* start position of the run */ + UBiDiLevel runLevel; /* run level before implicit solving */ +} LevState; + +/*------------------------------------------------------------------------*/ + +static void +addPoint(UBiDi *pBiDi, int32_t pos, int32_t flag) + /* param pos: position where to insert + param flag: one of LRM_BEFORE, LRM_AFTER, RLM_BEFORE, RLM_AFTER + */ +{ +#define FIRSTALLOC 10 + Point point; + InsertPoints * pInsertPoints=&(pBiDi->insertPoints); + + if (pInsertPoints->capacity == 0) + { + pInsertPoints->points=static_cast<Point *>(uprv_malloc(sizeof(Point)*FIRSTALLOC)); + if (pInsertPoints->points == NULL) + { + pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR; + return; + } + pInsertPoints->capacity=FIRSTALLOC; + } + if (pInsertPoints->size >= pInsertPoints->capacity) /* no room for new point */ + { + Point * savePoints=pInsertPoints->points; + pInsertPoints->points=static_cast<Point *>(uprv_realloc(pInsertPoints->points, + pInsertPoints->capacity*2*sizeof(Point))); + if (pInsertPoints->points == NULL) + { + pInsertPoints->points=savePoints; + pInsertPoints->errorCode=U_MEMORY_ALLOCATION_ERROR; + return; + } + else pInsertPoints->capacity*=2; + } + point.pos=pos; + point.flag=flag; + pInsertPoints->points[pInsertPoints->size]=point; + pInsertPoints->size++; +#undef FIRSTALLOC +} + +static void +setLevelsOutsideIsolates(UBiDi *pBiDi, int32_t start, int32_t limit, UBiDiLevel level) +{ + DirProp *dirProps=pBiDi->dirProps, dirProp; + UBiDiLevel *levels=pBiDi->levels; + int32_t isolateCount=0, k; + for(k=start; k<limit; k++) { + dirProp=dirProps[k]; + if(dirProp==PDI) + isolateCount--; + if(isolateCount==0) + levels[k]=level; + if(dirProp==LRI || dirProp==RLI) + isolateCount++; + } +} + +/* perform rules (Wn), (Nn), and (In) on a run of the text ------------------ */ + +/* + * This implementation of the (Wn) rules applies all rules in one pass. + * In order to do so, it needs a look-ahead of typically 1 character + * (except for W5: sequences of ET) and keeps track of changes + * in a rule Wp that affect a later Wq (p<q). + * + * The (Nn) and (In) rules are also performed in that same single loop, + * but effectively one iteration behind for white space. + * + * Since all implicit rules are performed in one step, it is not necessary + * to actually store the intermediate directional properties in dirProps[]. + */ + +static void +processPropertySeq(UBiDi *pBiDi, LevState *pLevState, uint8_t _prop, + int32_t start, int32_t limit) { + uint8_t cell, oldStateSeq, actionSeq; + const ImpTab * pImpTab=pLevState->pImpTab; + const ImpAct * pImpAct=pLevState->pImpAct; + UBiDiLevel * levels=pBiDi->levels; + UBiDiLevel level, addLevel; + InsertPoints * pInsertPoints; + int32_t start0, k; + + start0=start; /* save original start position */ + oldStateSeq=(uint8_t)pLevState->state; + cell=(*pImpTab)[oldStateSeq][_prop]; + pLevState->state=GET_STATE(cell); /* isolate the new state */ + actionSeq=(*pImpAct)[GET_ACTION(cell)]; /* isolate the action */ + addLevel=(*pImpTab)[pLevState->state][IMPTABLEVELS_RES]; + + if(actionSeq) { + switch(actionSeq) { + case 1: /* init ON seq */ + pLevState->startON=start0; + break; + + case 2: /* prepend ON seq to current seq */ + start=pLevState->startON; + break; + + case 3: /* EN/AN after R+ON */ + level=pLevState->runLevel+1; + setLevelsOutsideIsolates(pBiDi, pLevState->startON, start0, level); + break; + + case 4: /* EN/AN before R for NUMBERS_SPECIAL */ + level=pLevState->runLevel+2; + setLevelsOutsideIsolates(pBiDi, pLevState->startON, start0, level); + break; + + case 5: /* L or S after possible relevant EN/AN */ + /* check if we had EN after R/AL */ + if (pLevState->startL2EN >= 0) { + addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE); + } + pLevState->startL2EN=-1; /* not within previous if since could also be -2 */ + /* check if we had any relevant EN/AN after R/AL */ + pInsertPoints=&(pBiDi->insertPoints); + if ((pInsertPoints->capacity == 0) || + (pInsertPoints->size <= pInsertPoints->confirmed)) + { + /* nothing, just clean up */ + pLevState->lastStrongRTL=-1; + /* check if we have a pending conditional segment */ + level=(*pImpTab)[oldStateSeq][IMPTABLEVELS_RES]; + if ((level & 1) && (pLevState->startON > 0)) { /* after ON */ + start=pLevState->startON; /* reset to basic run level */ + } + if (_prop == DirProp_S) /* add LRM before S */ + { + addPoint(pBiDi, start0, LRM_BEFORE); + pInsertPoints->confirmed=pInsertPoints->size; + } + break; + } + /* reset previous RTL cont to level for LTR text */ + for (k=pLevState->lastStrongRTL+1; k<start0; k++) + { + /* reset odd level, leave runLevel+2 as is */ + levels[k]=(levels[k] - 2) & ~1; + } + /* mark insert points as confirmed */ + pInsertPoints->confirmed=pInsertPoints->size; + pLevState->lastStrongRTL=-1; + if (_prop == DirProp_S) /* add LRM before S */ + { + addPoint(pBiDi, start0, LRM_BEFORE); + pInsertPoints->confirmed=pInsertPoints->size; + } + break; + + case 6: /* R/AL after possible relevant EN/AN */ + /* just clean up */ + pInsertPoints=&(pBiDi->insertPoints); + if (pInsertPoints->capacity > 0) + /* remove all non confirmed insert points */ + pInsertPoints->size=pInsertPoints->confirmed; + pLevState->startON=-1; + pLevState->startL2EN=-1; + pLevState->lastStrongRTL=limit - 1; + break; + + case 7: /* EN/AN after R/AL + possible cont */ + /* check for real AN */ + if ((_prop == DirProp_AN) && (pBiDi->dirProps[start0] == AN) && + (pBiDi->reorderingMode!=UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL)) + { + /* real AN */ + if (pLevState->startL2EN == -1) /* if no relevant EN already found */ + { + /* just note the righmost digit as a strong RTL */ + pLevState->lastStrongRTL=limit - 1; + break; + } + if (pLevState->startL2EN >= 0) /* after EN, no AN */ + { + addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE); + pLevState->startL2EN=-2; + } + /* note AN */ + addPoint(pBiDi, start0, LRM_BEFORE); + break; + } + /* if first EN/AN after R/AL */ + if (pLevState->startL2EN == -1) { + pLevState->startL2EN=start0; + } + break; + + case 8: /* note location of latest R/AL */ + pLevState->lastStrongRTL=limit - 1; + pLevState->startON=-1; + break; + + case 9: /* L after R+ON/EN/AN */ + /* include possible adjacent number on the left */ + for (k=start0-1; k>=0 && !(levels[k]&1); k--); + if(k>=0) { + addPoint(pBiDi, k, RLM_BEFORE); /* add RLM before */ + pInsertPoints=&(pBiDi->insertPoints); + pInsertPoints->confirmed=pInsertPoints->size; /* confirm it */ + } + pLevState->startON=start0; + break; + + case 10: /* AN after L */ + /* AN numbers between L text on both sides may be trouble. */ + /* tentatively bracket with LRMs; will be confirmed if followed by L */ + addPoint(pBiDi, start0, LRM_BEFORE); /* add LRM before */ + addPoint(pBiDi, start0, LRM_AFTER); /* add LRM after */ + break; + + case 11: /* R after L+ON/EN/AN */ + /* false alert, infirm LRMs around previous AN */ + pInsertPoints=&(pBiDi->insertPoints); + pInsertPoints->size=pInsertPoints->confirmed; + if (_prop == DirProp_S) /* add RLM before S */ + { + addPoint(pBiDi, start0, RLM_BEFORE); + pInsertPoints->confirmed=pInsertPoints->size; + } + break; + + case 12: /* L after L+ON/AN */ + level=pLevState->runLevel + addLevel; + for(k=pLevState->startON; k<start0; k++) { + if (levels[k]<level) + levels[k]=level; + } + pInsertPoints=&(pBiDi->insertPoints); + pInsertPoints->confirmed=pInsertPoints->size; /* confirm inserts */ + pLevState->startON=start0; + break; + + case 13: /* L after L+ON+EN/AN/ON */ + level=pLevState->runLevel; + for(k=start0-1; k>=pLevState->startON; k--) { + if(levels[k]==level+3) { + while(levels[k]==level+3) { + levels[k--]-=2; + } + while(levels[k]==level) { + k--; + } + } + if(levels[k]==level+2) { + levels[k]=level; + continue; + } + levels[k]=level+1; + } + break; + + case 14: /* R after L+ON+EN/AN/ON */ + level=pLevState->runLevel+1; + for(k=start0-1; k>=pLevState->startON; k--) { + if(levels[k]>level) { + levels[k]-=2; + } + } + break; + + default: /* we should never get here */ + UPRV_UNREACHABLE; + } + } + if((addLevel) || (start < start0)) { + level=pLevState->runLevel + addLevel; + if(start>=pLevState->runStart) { + for(k=start; k<limit; k++) { + levels[k]=level; + } + } else { + setLevelsOutsideIsolates(pBiDi, start, limit, level); + } + } +} + +/** + * Returns the directionality of the last strong character at the end of the prologue, if any. + * Requires prologue!=null. + */ +static DirProp +lastL_R_AL(UBiDi *pBiDi) { + const UChar *text=pBiDi->prologue; + int32_t length=pBiDi->proLength; + int32_t i; + UChar32 uchar; + DirProp dirProp; + for(i=length; i>0; ) { + /* i is decremented by U16_PREV */ + U16_PREV(text, 0, i, uchar); + dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar); + if(dirProp==L) { + return DirProp_L; + } + if(dirProp==R || dirProp==AL) { + return DirProp_R; + } + if(dirProp==B) { + return DirProp_ON; + } + } + return DirProp_ON; +} + +/** + * Returns the directionality of the first strong character, or digit, in the epilogue, if any. + * Requires epilogue!=null. + */ +static DirProp +firstL_R_AL_EN_AN(UBiDi *pBiDi) { + const UChar *text=pBiDi->epilogue; + int32_t length=pBiDi->epiLength; + int32_t i; + UChar32 uchar; + DirProp dirProp; + for(i=0; i<length; ) { + /* i is incremented by U16_NEXT */ + U16_NEXT(text, i, length, uchar); + dirProp=(DirProp)ubidi_getCustomizedClass(pBiDi, uchar); + if(dirProp==L) { + return DirProp_L; + } + if(dirProp==R || dirProp==AL) { + return DirProp_R; + } + if(dirProp==EN) { + return DirProp_EN; + } + if(dirProp==AN) { + return DirProp_AN; + } + } + return DirProp_ON; +} + +static void +resolveImplicitLevels(UBiDi *pBiDi, + int32_t start, int32_t limit, + DirProp sor, DirProp eor) { + const DirProp *dirProps=pBiDi->dirProps; + DirProp dirProp; + LevState levState; + int32_t i, start1, start2; + uint16_t oldStateImp, stateImp, actionImp; + uint8_t gprop, resProp, cell; + UBool inverseRTL; + DirProp nextStrongProp=R; + int32_t nextStrongPos=-1; + + /* check for RTL inverse BiDi mode */ + /* FOOD FOR THOUGHT: in case of RTL inverse BiDi, it would make sense to + * loop on the text characters from end to start. + * This would need a different properties state table (at least different + * actions) and different levels state tables (maybe very similar to the + * LTR corresponding ones. + */ + inverseRTL=(UBool) + ((start<pBiDi->lastArabicPos) && (GET_PARALEVEL(pBiDi, start) & 1) && + (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT || + pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL)); + + /* initialize for property and levels state tables */ + levState.startL2EN=-1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */ + levState.lastStrongRTL=-1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */ + levState.runStart=start; + levState.runLevel=pBiDi->levels[start]; + levState.pImpTab=(const ImpTab*)((pBiDi->pImpTabPair)->pImpTab)[levState.runLevel&1]; + levState.pImpAct=(const ImpAct*)((pBiDi->pImpTabPair)->pImpAct)[levState.runLevel&1]; + if(start==0 && pBiDi->proLength>0) { + DirProp lastStrong=lastL_R_AL(pBiDi); + if(lastStrong!=DirProp_ON) { + sor=lastStrong; + } + } + /* The isolates[] entries contain enough information to + resume the bidi algorithm in the same state as it was + when it was interrupted by an isolate sequence. */ + if(dirProps[start]==PDI && pBiDi->isolateCount >= 0) { + levState.startON=pBiDi->isolates[pBiDi->isolateCount].startON; + start1=pBiDi->isolates[pBiDi->isolateCount].start1; + stateImp=pBiDi->isolates[pBiDi->isolateCount].stateImp; + levState.state=pBiDi->isolates[pBiDi->isolateCount].state; + pBiDi->isolateCount--; + } else { + levState.startON=-1; + start1=start; + if(dirProps[start]==NSM) + stateImp = 1 + sor; + else + stateImp=0; + levState.state=0; + processPropertySeq(pBiDi, &levState, sor, start, start); + } + start2=start; /* to make Java compiler happy */ + + for(i=start; i<=limit; i++) { + if(i>=limit) { + int32_t k; + for(k=limit-1; k>start&&(DIRPROP_FLAG(dirProps[k])&MASK_BN_EXPLICIT); k--); + dirProp=dirProps[k]; + if(dirProp==LRI || dirProp==RLI) + break; /* no forced closing for sequence ending with LRI/RLI */ + gprop=eor; + } else { + DirProp prop, prop1; + prop=dirProps[i]; + if(prop==B) { + pBiDi->isolateCount=-1; /* current isolates stack entry == none */ + } + if(inverseRTL) { + if(prop==AL) { + /* AL before EN does not make it AN */ + prop=R; + } else if(prop==EN) { + if(nextStrongPos<=i) { + /* look for next strong char (L/R/AL) */ + int32_t j; + nextStrongProp=R; /* set default */ + nextStrongPos=limit; + for(j=i+1; j<limit; j++) { + prop1=dirProps[j]; + if(prop1==L || prop1==R || prop1==AL) { + nextStrongProp=prop1; + nextStrongPos=j; + break; + } + } + } + if(nextStrongProp==AL) { + prop=AN; + } + } + } + gprop=groupProp[prop]; + } + oldStateImp=stateImp; + cell=impTabProps[oldStateImp][gprop]; + stateImp=GET_STATEPROPS(cell); /* isolate the new state */ + actionImp=GET_ACTIONPROPS(cell); /* isolate the action */ + if((i==limit) && (actionImp==0)) { + /* there is an unprocessed sequence if its property == eor */ + actionImp=1; /* process the last sequence */ + } + if(actionImp) { + resProp=impTabProps[oldStateImp][IMPTABPROPS_RES]; + switch(actionImp) { + case 1: /* process current seq1, init new seq1 */ + processPropertySeq(pBiDi, &levState, resProp, start1, i); + start1=i; + break; + case 2: /* init new seq2 */ + start2=i; + break; + case 3: /* process seq1, process seq2, init new seq1 */ + processPropertySeq(pBiDi, &levState, resProp, start1, start2); + processPropertySeq(pBiDi, &levState, DirProp_ON, start2, i); + start1=i; + break; + case 4: /* process seq1, set seq1=seq2, init new seq2 */ + processPropertySeq(pBiDi, &levState, resProp, start1, start2); + start1=start2; + start2=i; + break; + default: /* we should never get here */ + UPRV_UNREACHABLE; + } + } + } + + /* flush possible pending sequence, e.g. ON */ + if(limit==pBiDi->length && pBiDi->epiLength>0) { + DirProp firstStrong=firstL_R_AL_EN_AN(pBiDi); + if(firstStrong!=DirProp_ON) { + eor=firstStrong; + } + } + + /* look for the last char not a BN or LRE/RLE/LRO/RLO/PDF */ + for(i=limit-1; i>start&&(DIRPROP_FLAG(dirProps[i])&MASK_BN_EXPLICIT); i--); + dirProp=dirProps[i]; + if((dirProp==LRI || dirProp==RLI) && limit<pBiDi->length) { + pBiDi->isolateCount++; + pBiDi->isolates[pBiDi->isolateCount].stateImp=stateImp; + pBiDi->isolates[pBiDi->isolateCount].state=levState.state; + pBiDi->isolates[pBiDi->isolateCount].start1=start1; + pBiDi->isolates[pBiDi->isolateCount].startON=levState.startON; + } + else + processPropertySeq(pBiDi, &levState, eor, limit, limit); +} + +/* perform (L1) and (X9) ---------------------------------------------------- */ + +/* + * Reset the embedding levels for some non-graphic characters (L1). + * This function also sets appropriate levels for BN, and + * explicit embedding types that are supposed to have been removed + * from the paragraph in (X9). + */ +static void +adjustWSLevels(UBiDi *pBiDi) { + const DirProp *dirProps=pBiDi->dirProps; + UBiDiLevel *levels=pBiDi->levels; + int32_t i; + + if(pBiDi->flags&MASK_WS) { + UBool orderParagraphsLTR=pBiDi->orderParagraphsLTR; + Flags flag; + + i=pBiDi->trailingWSStart; + while(i>0) { + /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */ + while(i>0 && (flag=DIRPROP_FLAG(dirProps[--i]))&MASK_WS) { + if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) { + levels[i]=0; + } else { + levels[i]=GET_PARALEVEL(pBiDi, i); + } + } + + /* reset BN to the next character's paraLevel until B/S, which restarts above loop */ + /* here, i+1 is guaranteed to be <length */ + while(i>0) { + flag=DIRPROP_FLAG(dirProps[--i]); + if(flag&MASK_BN_EXPLICIT) { + levels[i]=levels[i+1]; + } else if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) { + levels[i]=0; + break; + } else if(flag&MASK_B_S) { + levels[i]=GET_PARALEVEL(pBiDi, i); + break; + } + } + } + } +} + +U_CAPI void U_EXPORT2 +ubidi_setContext(UBiDi *pBiDi, + const UChar *prologue, int32_t proLength, + const UChar *epilogue, int32_t epiLength, + UErrorCode *pErrorCode) { + /* check the argument values */ + RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode); + if(pBiDi==NULL || proLength<-1 || epiLength<-1 || + (prologue==NULL && proLength!=0) || (epilogue==NULL && epiLength!=0)) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + if(proLength==-1) { + pBiDi->proLength=u_strlen(prologue); + } else { + pBiDi->proLength=proLength; + } + if(epiLength==-1) { + pBiDi->epiLength=u_strlen(epilogue); + } else { + pBiDi->epiLength=epiLength; + } + pBiDi->prologue=prologue; + pBiDi->epilogue=epilogue; +} + +static void +setParaSuccess(UBiDi *pBiDi) { + pBiDi->proLength=0; /* forget the last context */ + pBiDi->epiLength=0; + pBiDi->pParaBiDi=pBiDi; /* mark successful setPara */ +} + +#define BIDI_MIN(x, y) ((x)<(y) ? (x) : (y)) +#define BIDI_ABS(x) ((x)>=0 ? (x) : (-(x))) + +static void +setParaRunsOnly(UBiDi *pBiDi, const UChar *text, int32_t length, + UBiDiLevel paraLevel, UErrorCode *pErrorCode) { + int32_t *runsOnlyMemory = NULL; + int32_t *visualMap; + UChar *visualText; + int32_t saveLength, saveTrailingWSStart; + const UBiDiLevel *levels; + UBiDiLevel *saveLevels; + UBiDiDirection saveDirection; + UBool saveMayAllocateText; + Run *runs; + int32_t visualLength, i, j, visualStart, logicalStart, + runCount, runLength, addedRuns, insertRemove, + start, limit, step, indexOddBit, logicalPos, + index0, index1; + uint32_t saveOptions; + + pBiDi->reorderingMode=UBIDI_REORDER_DEFAULT; + if(length==0) { + ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode); + goto cleanup3; + } + /* obtain memory for mapping table and visual text */ + runsOnlyMemory=static_cast<int32_t *>(uprv_malloc(length*(sizeof(int32_t)+sizeof(UChar)+sizeof(UBiDiLevel)))); + if(runsOnlyMemory==NULL) { + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + goto cleanup3; + } + visualMap=runsOnlyMemory; + visualText=(UChar *)&visualMap[length]; + saveLevels=(UBiDiLevel *)&visualText[length]; + saveOptions=pBiDi->reorderingOptions; + if(saveOptions & UBIDI_OPTION_INSERT_MARKS) { + pBiDi->reorderingOptions&=~UBIDI_OPTION_INSERT_MARKS; + pBiDi->reorderingOptions|=UBIDI_OPTION_REMOVE_CONTROLS; + } + paraLevel&=1; /* accept only 0 or 1 */ + ubidi_setPara(pBiDi, text, length, paraLevel, NULL, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + goto cleanup3; + } + /* we cannot access directly pBiDi->levels since it is not yet set if + * direction is not MIXED + */ + levels=ubidi_getLevels(pBiDi, pErrorCode); + uprv_memcpy(saveLevels, levels, (size_t)pBiDi->length*sizeof(UBiDiLevel)); + saveTrailingWSStart=pBiDi->trailingWSStart; + saveLength=pBiDi->length; + saveDirection=pBiDi->direction; + + /* FOOD FOR THOUGHT: instead of writing the visual text, we could use + * the visual map and the dirProps array to drive the second call + * to ubidi_setPara (but must make provision for possible removal of + * BiDi controls. Alternatively, only use the dirProps array via + * customized classifier callback. + */ + visualLength=ubidi_writeReordered(pBiDi, visualText, length, + UBIDI_DO_MIRRORING, pErrorCode); + ubidi_getVisualMap(pBiDi, visualMap, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + goto cleanup2; + } + pBiDi->reorderingOptions=saveOptions; + + pBiDi->reorderingMode=UBIDI_REORDER_INVERSE_LIKE_DIRECT; + paraLevel^=1; + /* Because what we did with reorderingOptions, visualText may be shorter + * than the original text. But we don't want the levels memory to be + * reallocated shorter than the original length, since we need to restore + * the levels as after the first call to ubidi_setpara() before returning. + * We will force mayAllocateText to FALSE before the second call to + * ubidi_setpara(), and will restore it afterwards. + */ + saveMayAllocateText=pBiDi->mayAllocateText; + pBiDi->mayAllocateText=FALSE; + ubidi_setPara(pBiDi, visualText, visualLength, paraLevel, NULL, pErrorCode); + pBiDi->mayAllocateText=saveMayAllocateText; + ubidi_getRuns(pBiDi, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + goto cleanup1; + } + /* check if some runs must be split, count how many splits */ + addedRuns=0; + runCount=pBiDi->runCount; + runs=pBiDi->runs; + visualStart=0; + for(i=0; i<runCount; i++, visualStart+=runLength) { + runLength=runs[i].visualLimit-visualStart; + if(runLength<2) { + continue; + } + logicalStart=GET_INDEX(runs[i].logicalStart); + for(j=logicalStart+1; j<logicalStart+runLength; j++) { + index0=visualMap[j]; + index1=visualMap[j-1]; + if((BIDI_ABS(index0-index1)!=1) || (saveLevels[index0]!=saveLevels[index1])) { + addedRuns++; + } + } + } + if(addedRuns) { + if(getRunsMemory(pBiDi, runCount+addedRuns)) { + if(runCount==1) { + /* because we switch from UBiDi.simpleRuns to UBiDi.runs */ + pBiDi->runsMemory[0]=runs[0]; + } + runs=pBiDi->runs=pBiDi->runsMemory; + pBiDi->runCount+=addedRuns; + } else { + goto cleanup1; + } + } + /* split runs which are not consecutive in source text */ + for(i=runCount-1; i>=0; i--) { + runLength= i==0 ? runs[0].visualLimit : + runs[i].visualLimit-runs[i-1].visualLimit; + logicalStart=runs[i].logicalStart; + indexOddBit=GET_ODD_BIT(logicalStart); + logicalStart=GET_INDEX(logicalStart); + if(runLength<2) { + if(addedRuns) { + runs[i+addedRuns]=runs[i]; + } + logicalPos=visualMap[logicalStart]; + runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos, + saveLevels[logicalPos]^indexOddBit); + continue; + } + if(indexOddBit) { + start=logicalStart; + limit=logicalStart+runLength-1; + step=1; + } else { + start=logicalStart+runLength-1; + limit=logicalStart; + step=-1; + } + for(j=start; j!=limit; j+=step) { + index0=visualMap[j]; + index1=visualMap[j+step]; + if((BIDI_ABS(index0-index1)!=1) || (saveLevels[index0]!=saveLevels[index1])) { + logicalPos=BIDI_MIN(visualMap[start], index0); + runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos, + saveLevels[logicalPos]^indexOddBit); + runs[i+addedRuns].visualLimit=runs[i].visualLimit; + runs[i].visualLimit-=BIDI_ABS(j-start)+1; + insertRemove=runs[i].insertRemove&(LRM_AFTER|RLM_AFTER); + runs[i+addedRuns].insertRemove=insertRemove; + runs[i].insertRemove&=~insertRemove; + start=j+step; + addedRuns--; + } + } + if(addedRuns) { + runs[i+addedRuns]=runs[i]; + } + logicalPos=BIDI_MIN(visualMap[start], visualMap[limit]); + runs[i+addedRuns].logicalStart=MAKE_INDEX_ODD_PAIR(logicalPos, + saveLevels[logicalPos]^indexOddBit); + } + + cleanup1: + /* restore initial paraLevel */ + pBiDi->paraLevel^=1; + cleanup2: + /* restore real text */ + pBiDi->text=text; + pBiDi->length=saveLength; + pBiDi->originalLength=length; + pBiDi->direction=saveDirection; + /* the saved levels should never excess levelsSize, but we check anyway */ + if(saveLength>pBiDi->levelsSize) { + saveLength=pBiDi->levelsSize; + } + uprv_memcpy(pBiDi->levels, saveLevels, (size_t)saveLength*sizeof(UBiDiLevel)); + pBiDi->trailingWSStart=saveTrailingWSStart; + if(pBiDi->runCount>1) { + pBiDi->direction=UBIDI_MIXED; + } + cleanup3: + /* free memory for mapping table and visual text */ + uprv_free(runsOnlyMemory); + + pBiDi->reorderingMode=UBIDI_REORDER_RUNS_ONLY; +} + +/* ubidi_setPara ------------------------------------------------------------ */ + +U_CAPI void U_EXPORT2 +ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length, + UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels, + UErrorCode *pErrorCode) { + UBiDiDirection direction; + DirProp *dirProps; + + /* check the argument values */ + RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode); + if(pBiDi==NULL || text==NULL || length<-1 || + (paraLevel>UBIDI_MAX_EXPLICIT_LEVEL && paraLevel<UBIDI_DEFAULT_LTR)) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + if(length==-1) { + length=u_strlen(text); + } + + /* special treatment for RUNS_ONLY mode */ + if(pBiDi->reorderingMode==UBIDI_REORDER_RUNS_ONLY) { + setParaRunsOnly(pBiDi, text, length, paraLevel, pErrorCode); + return; + } + + /* initialize the UBiDi structure */ + pBiDi->pParaBiDi=NULL; /* mark unfinished setPara */ + pBiDi->text=text; + pBiDi->length=pBiDi->originalLength=pBiDi->resultLength=length; + pBiDi->paraLevel=paraLevel; + pBiDi->direction=(UBiDiDirection)(paraLevel&1); + pBiDi->paraCount=1; + + pBiDi->dirProps=NULL; + pBiDi->levels=NULL; + pBiDi->runs=NULL; + pBiDi->insertPoints.size=0; /* clean up from last call */ + pBiDi->insertPoints.confirmed=0; /* clean up from last call */ + + /* + * Save the original paraLevel if contextual; otherwise, set to 0. + */ + pBiDi->defaultParaLevel=IS_DEFAULT_LEVEL(paraLevel); + + if(length==0) { + /* + * For an empty paragraph, create a UBiDi object with the paraLevel and + * the flags and the direction set but without allocating zero-length arrays. + * There is nothing more to do. + */ + if(IS_DEFAULT_LEVEL(paraLevel)) { + pBiDi->paraLevel&=1; + pBiDi->defaultParaLevel=0; + } + pBiDi->flags=DIRPROP_FLAG_LR(paraLevel); + pBiDi->runCount=0; + pBiDi->paraCount=0; + setParaSuccess(pBiDi); /* mark successful setPara */ + return; + } + + pBiDi->runCount=-1; + + /* allocate paras memory */ + if(pBiDi->parasMemory) + pBiDi->paras=pBiDi->parasMemory; + else + pBiDi->paras=pBiDi->simpleParas; + + /* + * Get the directional properties, + * the flags bit-set, and + * determine the paragraph level if necessary. + */ + if(getDirPropsMemory(pBiDi, length)) { + pBiDi->dirProps=pBiDi->dirPropsMemory; + if(!getDirProps(pBiDi)) { + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + return; + } + } else { + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + return; + } + dirProps=pBiDi->dirProps; + /* the processed length may have changed if UBIDI_OPTION_STREAMING */ + length= pBiDi->length; + pBiDi->trailingWSStart=length; /* the levels[] will reflect the WS run */ + + /* are explicit levels specified? */ + if(embeddingLevels==NULL) { + /* no: determine explicit levels according to the (Xn) rules */\ + if(getLevelsMemory(pBiDi, length)) { + pBiDi->levels=pBiDi->levelsMemory; + direction=resolveExplicitLevels(pBiDi, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + return; + } + } else { + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + return; + } + } else { + /* set BN for all explicit codes, check that all levels are 0 or paraLevel..UBIDI_MAX_EXPLICIT_LEVEL */ + pBiDi->levels=embeddingLevels; + direction=checkExplicitLevels(pBiDi, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + return; + } + } + + /* allocate isolate memory */ + if(pBiDi->isolateCount<=SIMPLE_ISOLATES_COUNT) + pBiDi->isolates=pBiDi->simpleIsolates; + else + if((int32_t)(pBiDi->isolateCount*sizeof(Isolate))<=pBiDi->isolatesSize) + pBiDi->isolates=pBiDi->isolatesMemory; + else { + if(getInitialIsolatesMemory(pBiDi, pBiDi->isolateCount)) { + pBiDi->isolates=pBiDi->isolatesMemory; + } else { + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + return; + } + } + pBiDi->isolateCount=-1; /* current isolates stack entry == none */ + + /* + * The steps after (X9) in the UBiDi algorithm are performed only if + * the paragraph text has mixed directionality! + */ + pBiDi->direction=direction; + switch(direction) { + case UBIDI_LTR: + /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */ + pBiDi->trailingWSStart=0; + break; + case UBIDI_RTL: + /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */ + pBiDi->trailingWSStart=0; + break; + default: + /* + * Choose the right implicit state table + */ + switch(pBiDi->reorderingMode) { + case UBIDI_REORDER_DEFAULT: + pBiDi->pImpTabPair=&impTab_DEFAULT; + break; + case UBIDI_REORDER_NUMBERS_SPECIAL: + pBiDi->pImpTabPair=&impTab_NUMBERS_SPECIAL; + break; + case UBIDI_REORDER_GROUP_NUMBERS_WITH_R: + pBiDi->pImpTabPair=&impTab_GROUP_NUMBERS_WITH_R; + break; + case UBIDI_REORDER_INVERSE_NUMBERS_AS_L: + pBiDi->pImpTabPair=&impTab_INVERSE_NUMBERS_AS_L; + break; + case UBIDI_REORDER_INVERSE_LIKE_DIRECT: + if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) { + pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT_WITH_MARKS; + } else { + pBiDi->pImpTabPair=&impTab_INVERSE_LIKE_DIRECT; + } + break; + case UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL: + if (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) { + pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS; + } else { + pBiDi->pImpTabPair=&impTab_INVERSE_FOR_NUMBERS_SPECIAL; + } + break; + default: + /* we should never get here */ + UPRV_UNREACHABLE; + } + /* + * If there are no external levels specified and there + * are no significant explicit level codes in the text, + * then we can treat the entire paragraph as one run. + * Otherwise, we need to perform the following rules on runs of + * the text with the same embedding levels. (X10) + * "Significant" explicit level codes are ones that actually + * affect non-BN characters. + * Examples for "insignificant" ones are empty embeddings + * LRE-PDF, LRE-RLE-PDF-PDF, etc. + */ + if(embeddingLevels==NULL && pBiDi->paraCount<=1 && + !(pBiDi->flags&DIRPROP_FLAG_MULTI_RUNS)) { + resolveImplicitLevels(pBiDi, 0, length, + GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, 0)), + GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, length-1))); + } else { + /* sor, eor: start and end types of same-level-run */ + UBiDiLevel *levels=pBiDi->levels; + int32_t start, limit=0; + UBiDiLevel level, nextLevel; + DirProp sor, eor; + + /* determine the first sor and set eor to it because of the loop body (sor=eor there) */ + level=GET_PARALEVEL(pBiDi, 0); + nextLevel=levels[0]; + if(level<nextLevel) { + eor=GET_LR_FROM_LEVEL(nextLevel); + } else { + eor=GET_LR_FROM_LEVEL(level); + } + + do { + /* determine start and limit of the run (end points just behind the run) */ + + /* the values for this run's start are the same as for the previous run's end */ + start=limit; + level=nextLevel; + if((start>0) && (dirProps[start-1]==B)) { + /* except if this is a new paragraph, then set sor = para level */ + sor=GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, start)); + } else { + sor=eor; + } + + /* search for the limit of this run */ + while((++limit<length) && + ((levels[limit]==level) || + (DIRPROP_FLAG(dirProps[limit])&MASK_BN_EXPLICIT))) {} + + /* get the correct level of the next run */ + if(limit<length) { + nextLevel=levels[limit]; + } else { + nextLevel=GET_PARALEVEL(pBiDi, length-1); + } + + /* determine eor from max(level, nextLevel); sor is last run's eor */ + if(NO_OVERRIDE(level)<NO_OVERRIDE(nextLevel)) { + eor=GET_LR_FROM_LEVEL(nextLevel); + } else { + eor=GET_LR_FROM_LEVEL(level); + } + + /* if the run consists of overridden directional types, then there + are no implicit types to be resolved */ + if(!(level&UBIDI_LEVEL_OVERRIDE)) { + resolveImplicitLevels(pBiDi, start, limit, sor, eor); + } else { + /* remove the UBIDI_LEVEL_OVERRIDE flags */ + do { + levels[start++]&=~UBIDI_LEVEL_OVERRIDE; + } while(start<limit); + } + } while(limit<length); + } + /* check if we got any memory shortage while adding insert points */ + if (U_FAILURE(pBiDi->insertPoints.errorCode)) + { + *pErrorCode=pBiDi->insertPoints.errorCode; + return; + } + /* reset the embedding levels for some non-graphic characters (L1), (X9) */ + adjustWSLevels(pBiDi); + break; + } + /* add RLM for inverse Bidi with contextual orientation resolving + * to RTL which would not round-trip otherwise + */ + if((pBiDi->defaultParaLevel>0) && + (pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) && + ((pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT) || + (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))) { + int32_t i, j, start, last; + UBiDiLevel level; + DirProp dirProp; + for(i=0; i<pBiDi->paraCount; i++) { + last=(pBiDi->paras[i].limit)-1; + level= static_cast<UBiDiLevel>(pBiDi->paras[i].level); + if(level==0) + continue; /* LTR paragraph */ + start= i==0 ? 0 : pBiDi->paras[i-1].limit; + for(j=last; j>=start; j--) { + dirProp=dirProps[j]; + if(dirProp==L) { + if(j<last) { + while(dirProps[last]==B) { + last--; + } + } + addPoint(pBiDi, last, RLM_BEFORE); + break; + } + if(DIRPROP_FLAG(dirProp) & MASK_R_AL) { + break; + } + } + } + } + + if(pBiDi->reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) { + pBiDi->resultLength -= pBiDi->controlCount; + } else { + pBiDi->resultLength += pBiDi->insertPoints.size; + } + setParaSuccess(pBiDi); /* mark successful setPara */ +} + +U_CAPI void U_EXPORT2 +ubidi_orderParagraphsLTR(UBiDi *pBiDi, UBool orderParagraphsLTR) { + if(pBiDi!=NULL) { + pBiDi->orderParagraphsLTR=orderParagraphsLTR; + } +} + +U_CAPI UBool U_EXPORT2 +ubidi_isOrderParagraphsLTR(UBiDi *pBiDi) { + if(pBiDi!=NULL) { + return pBiDi->orderParagraphsLTR; + } else { + return FALSE; + } +} + +U_CAPI UBiDiDirection U_EXPORT2 +ubidi_getDirection(const UBiDi *pBiDi) { + if(IS_VALID_PARA_OR_LINE(pBiDi)) { + return pBiDi->direction; + } else { + return UBIDI_LTR; + } +} + +U_CAPI const UChar * U_EXPORT2 +ubidi_getText(const UBiDi *pBiDi) { + if(IS_VALID_PARA_OR_LINE(pBiDi)) { + return pBiDi->text; + } else { + return NULL; + } +} + +U_CAPI int32_t U_EXPORT2 +ubidi_getLength(const UBiDi *pBiDi) { + if(IS_VALID_PARA_OR_LINE(pBiDi)) { + return pBiDi->originalLength; + } else { + return 0; + } +} + +U_CAPI int32_t U_EXPORT2 +ubidi_getProcessedLength(const UBiDi *pBiDi) { + if(IS_VALID_PARA_OR_LINE(pBiDi)) { + return pBiDi->length; + } else { + return 0; + } +} + +U_CAPI int32_t U_EXPORT2 +ubidi_getResultLength(const UBiDi *pBiDi) { + if(IS_VALID_PARA_OR_LINE(pBiDi)) { + return pBiDi->resultLength; + } else { + return 0; + } +} + +/* paragraphs API functions ------------------------------------------------- */ + +U_CAPI UBiDiLevel U_EXPORT2 +ubidi_getParaLevel(const UBiDi *pBiDi) { + if(IS_VALID_PARA_OR_LINE(pBiDi)) { + return pBiDi->paraLevel; + } else { + return 0; + } +} + +U_CAPI int32_t U_EXPORT2 +ubidi_countParagraphs(UBiDi *pBiDi) { + if(!IS_VALID_PARA_OR_LINE(pBiDi)) { + return 0; + } else { + return pBiDi->paraCount; + } +} + +U_CAPI void U_EXPORT2 +ubidi_getParagraphByIndex(const UBiDi *pBiDi, int32_t paraIndex, + int32_t *pParaStart, int32_t *pParaLimit, + UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) { + int32_t paraStart; + + /* check the argument values */ + RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode); + RETURN_VOID_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode); + RETURN_VOID_IF_BAD_RANGE(paraIndex, 0, pBiDi->paraCount, *pErrorCode); + + pBiDi=pBiDi->pParaBiDi; /* get Para object if Line object */ + if(paraIndex) { + paraStart=pBiDi->paras[paraIndex-1].limit; + } else { + paraStart=0; + } + if(pParaStart!=NULL) { + *pParaStart=paraStart; + } + if(pParaLimit!=NULL) { + *pParaLimit=pBiDi->paras[paraIndex].limit; + } + if(pParaLevel!=NULL) { + *pParaLevel=GET_PARALEVEL(pBiDi, paraStart); + } +} + +U_CAPI int32_t U_EXPORT2 +ubidi_getParagraph(const UBiDi *pBiDi, int32_t charIndex, + int32_t *pParaStart, int32_t *pParaLimit, + UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) { + int32_t paraIndex; + + /* check the argument values */ + /* pErrorCode will be checked by the call to ubidi_getParagraphByIndex */ + RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1); + RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1); + pBiDi=pBiDi->pParaBiDi; /* get Para object if Line object */ + RETURN_IF_BAD_RANGE(charIndex, 0, pBiDi->length, *pErrorCode, -1); + + for(paraIndex=0; charIndex>=pBiDi->paras[paraIndex].limit; paraIndex++); + ubidi_getParagraphByIndex(pBiDi, paraIndex, pParaStart, pParaLimit, pParaLevel, pErrorCode); + return paraIndex; +} + +U_CAPI void U_EXPORT2 +ubidi_setClassCallback(UBiDi *pBiDi, UBiDiClassCallback *newFn, + const void *newContext, UBiDiClassCallback **oldFn, + const void **oldContext, UErrorCode *pErrorCode) +{ + RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode); + if(pBiDi==NULL) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return; + } + if( oldFn ) + { + *oldFn = pBiDi->fnClassCallback; + } + if( oldContext ) + { + *oldContext = pBiDi->coClassCallback; + } + pBiDi->fnClassCallback = newFn; + pBiDi->coClassCallback = newContext; +} + +U_CAPI void U_EXPORT2 +ubidi_getClassCallback(UBiDi *pBiDi, UBiDiClassCallback **fn, const void **context) +{ + if(pBiDi==NULL) { + return; + } + if( fn ) + { + *fn = pBiDi->fnClassCallback; + } + if( context ) + { + *context = pBiDi->coClassCallback; + } +} + +U_CAPI UCharDirection U_EXPORT2 +ubidi_getCustomizedClass(UBiDi *pBiDi, UChar32 c) +{ + UCharDirection dir; + + if( pBiDi->fnClassCallback == NULL || + (dir = (*pBiDi->fnClassCallback)(pBiDi->coClassCallback, c)) == U_BIDI_CLASS_DEFAULT ) + { + dir = ubidi_getClass(c); + } + if(dir >= U_CHAR_DIRECTION_COUNT) { + dir = (UCharDirection)ON; + } + return dir; +} diff --git a/thirdparty/icu4c/common/ubidi_props.cpp b/thirdparty/icu4c/common/ubidi_props.cpp new file mode 100644 index 0000000000..afcc4aaf4f --- /dev/null +++ b/thirdparty/icu4c/common/ubidi_props.cpp @@ -0,0 +1,254 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2004-2014, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: ubidi_props.c +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2004dec30 +* created by: Markus W. Scherer +* +* Low-level Unicode bidi/shaping properties access. +*/ + +#include "unicode/utypes.h" +#include "unicode/uset.h" +#include "unicode/udata.h" /* UDataInfo */ +#include "ucmndata.h" /* DataHeader */ +#include "udatamem.h" +#include "uassert.h" +#include "cmemory.h" +#include "utrie2.h" +#include "ubidi_props.h" +#include "ucln_cmn.h" + +struct UBiDiProps { + UDataMemory *mem; + const int32_t *indexes; + const uint32_t *mirrors; + const uint8_t *jgArray; + const uint8_t *jgArray2; + + UTrie2 trie; + uint8_t formatVersion[4]; +}; + +/* ubidi_props_data.h is machine-generated by genbidi --csource */ +#define INCLUDED_FROM_UBIDI_PROPS_C +#include "ubidi_props_data.h" + +/* set of property starts for UnicodeSet ------------------------------------ */ + +static UBool U_CALLCONV +_enumPropertyStartsRange(const void *context, UChar32 start, UChar32 end, uint32_t value) { + (void)end; + (void)value; + /* add the start code point to the USet */ + const USetAdder *sa=(const USetAdder *)context; + sa->add(sa->set, start); + return TRUE; +} + +U_CFUNC void +ubidi_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) { + int32_t i, length; + UChar32 c, start, limit; + + const uint8_t *jgArray; + uint8_t prev, jg; + + if(U_FAILURE(*pErrorCode)) { + return; + } + + /* add the start code point of each same-value range of the trie */ + utrie2_enum(&ubidi_props_singleton.trie, NULL, _enumPropertyStartsRange, sa); + + /* add the code points from the bidi mirroring table */ + length=ubidi_props_singleton.indexes[UBIDI_IX_MIRROR_LENGTH]; + for(i=0; i<length; ++i) { + c=UBIDI_GET_MIRROR_CODE_POINT(ubidi_props_singleton.mirrors[i]); + sa->addRange(sa->set, c, c+1); + } + + /* add the code points from the Joining_Group array where the value changes */ + start=ubidi_props_singleton.indexes[UBIDI_IX_JG_START]; + limit=ubidi_props_singleton.indexes[UBIDI_IX_JG_LIMIT]; + jgArray=ubidi_props_singleton.jgArray; + for(;;) { + prev=0; + while(start<limit) { + jg=*jgArray++; + if(jg!=prev) { + sa->add(sa->set, start); + prev=jg; + } + ++start; + } + if(prev!=0) { + /* add the limit code point if the last value was not 0 (it is now start==limit) */ + sa->add(sa->set, limit); + } + if(limit==ubidi_props_singleton.indexes[UBIDI_IX_JG_LIMIT]) { + /* switch to the second Joining_Group range */ + start=ubidi_props_singleton.indexes[UBIDI_IX_JG_START2]; + limit=ubidi_props_singleton.indexes[UBIDI_IX_JG_LIMIT2]; + jgArray=ubidi_props_singleton.jgArray2; + } else { + break; + } + } + + /* add code points with hardcoded properties, plus the ones following them */ + + /* (none right now) */ +} + +/* property access functions ------------------------------------------------ */ + +U_CFUNC int32_t +ubidi_getMaxValue(UProperty which) { + int32_t max=ubidi_props_singleton.indexes[UBIDI_MAX_VALUES_INDEX]; + switch(which) { + case UCHAR_BIDI_CLASS: + return (max&UBIDI_CLASS_MASK); + case UCHAR_JOINING_GROUP: + return (max&UBIDI_MAX_JG_MASK)>>UBIDI_MAX_JG_SHIFT; + case UCHAR_JOINING_TYPE: + return (max&UBIDI_JT_MASK)>>UBIDI_JT_SHIFT; + case UCHAR_BIDI_PAIRED_BRACKET_TYPE: + return (max&UBIDI_BPT_MASK)>>UBIDI_BPT_SHIFT; + default: + return -1; /* undefined */ + } +} + +U_CAPI UCharDirection +ubidi_getClass(UChar32 c) { + uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c); + return (UCharDirection)UBIDI_GET_CLASS(props); +} + +U_CFUNC UBool +ubidi_isMirrored(UChar32 c) { + uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c); + return (UBool)UBIDI_GET_FLAG(props, UBIDI_IS_MIRRORED_SHIFT); +} + +static UChar32 +getMirror(UChar32 c, uint16_t props) { + int32_t delta=UBIDI_GET_MIRROR_DELTA(props); + if(delta!=UBIDI_ESC_MIRROR_DELTA) { + return c+delta; + } else { + /* look for mirror code point in the mirrors[] table */ + const uint32_t *mirrors; + uint32_t m; + int32_t i, length; + UChar32 c2; + + mirrors=ubidi_props_singleton.mirrors; + length=ubidi_props_singleton.indexes[UBIDI_IX_MIRROR_LENGTH]; + + /* linear search */ + for(i=0; i<length; ++i) { + m=mirrors[i]; + c2=UBIDI_GET_MIRROR_CODE_POINT(m); + if(c==c2) { + /* found c, return its mirror code point using the index in m */ + return UBIDI_GET_MIRROR_CODE_POINT(mirrors[UBIDI_GET_MIRROR_INDEX(m)]); + } else if(c<c2) { + break; + } + } + + /* c not found, return it itself */ + return c; + } +} + +U_CFUNC UChar32 +ubidi_getMirror(UChar32 c) { + uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c); + return getMirror(c, props); +} + +U_CFUNC UBool +ubidi_isBidiControl(UChar32 c) { + uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c); + return (UBool)UBIDI_GET_FLAG(props, UBIDI_BIDI_CONTROL_SHIFT); +} + +U_CFUNC UBool +ubidi_isJoinControl(UChar32 c) { + uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c); + return (UBool)UBIDI_GET_FLAG(props, UBIDI_JOIN_CONTROL_SHIFT); +} + +U_CFUNC UJoiningType +ubidi_getJoiningType(UChar32 c) { + uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c); + return (UJoiningType)((props&UBIDI_JT_MASK)>>UBIDI_JT_SHIFT); +} + +U_CFUNC UJoiningGroup +ubidi_getJoiningGroup(UChar32 c) { + UChar32 start, limit; + + start=ubidi_props_singleton.indexes[UBIDI_IX_JG_START]; + limit=ubidi_props_singleton.indexes[UBIDI_IX_JG_LIMIT]; + if(start<=c && c<limit) { + return (UJoiningGroup)ubidi_props_singleton.jgArray[c-start]; + } + start=ubidi_props_singleton.indexes[UBIDI_IX_JG_START2]; + limit=ubidi_props_singleton.indexes[UBIDI_IX_JG_LIMIT2]; + if(start<=c && c<limit) { + return (UJoiningGroup)ubidi_props_singleton.jgArray2[c-start]; + } + return U_JG_NO_JOINING_GROUP; +} + +U_CFUNC UBidiPairedBracketType +ubidi_getPairedBracketType(UChar32 c) { + uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c); + return (UBidiPairedBracketType)((props&UBIDI_BPT_MASK)>>UBIDI_BPT_SHIFT); +} + +U_CFUNC UChar32 +ubidi_getPairedBracket(UChar32 c) { + uint16_t props=UTRIE2_GET16(&ubidi_props_singleton.trie, c); + if((props&UBIDI_BPT_MASK)==0) { + return c; + } else { + return getMirror(c, props); + } +} + +/* public API (see uchar.h) ------------------------------------------------- */ + +U_CFUNC UCharDirection +u_charDirection(UChar32 c) { + return ubidi_getClass(c); +} + +U_CFUNC UBool +u_isMirrored(UChar32 c) { + return ubidi_isMirrored(c); +} + +U_CFUNC UChar32 +u_charMirror(UChar32 c) { + return ubidi_getMirror(c); +} + +U_CAPI UChar32 U_EXPORT2 +u_getBidiPairedBracket(UChar32 c) { + return ubidi_getPairedBracket(c); +} diff --git a/thirdparty/icu4c/common/ubidi_props.h b/thirdparty/icu4c/common/ubidi_props.h new file mode 100644 index 0000000000..698ee9c52b --- /dev/null +++ b/thirdparty/icu4c/common/ubidi_props.h @@ -0,0 +1,148 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2004-2014, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: ubidi_props.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2004dec30 +* created by: Markus W. Scherer +* +* Low-level Unicode bidi/shaping properties access. +*/ + +#ifndef __UBIDI_PROPS_H__ +#define __UBIDI_PROPS_H__ + +#include "unicode/utypes.h" +#include "unicode/uset.h" +#include "putilimp.h" +#include "uset_imp.h" +#include "udataswp.h" + +U_CDECL_BEGIN + +/* library API -------------------------------------------------------------- */ + +U_CFUNC void +ubidi_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode); + +/* property access functions */ + +U_CFUNC int32_t +ubidi_getMaxValue(UProperty which); + +U_CAPI UCharDirection +ubidi_getClass(UChar32 c); + +U_CFUNC UBool +ubidi_isMirrored(UChar32 c); + +U_CFUNC UChar32 +ubidi_getMirror(UChar32 c); + +U_CFUNC UBool +ubidi_isBidiControl(UChar32 c); + +U_CFUNC UBool +ubidi_isJoinControl(UChar32 c); + +U_CFUNC UJoiningType +ubidi_getJoiningType(UChar32 c); + +U_CFUNC UJoiningGroup +ubidi_getJoiningGroup(UChar32 c); + +U_CFUNC UBidiPairedBracketType +ubidi_getPairedBracketType(UChar32 c); + +U_CFUNC UChar32 +ubidi_getPairedBracket(UChar32 c); + +/* file definitions --------------------------------------------------------- */ + +#define UBIDI_DATA_NAME "ubidi" +#define UBIDI_DATA_TYPE "icu" + +/* format "BiDi" */ +#define UBIDI_FMT_0 0x42 +#define UBIDI_FMT_1 0x69 +#define UBIDI_FMT_2 0x44 +#define UBIDI_FMT_3 0x69 + +/* indexes into indexes[] */ +enum { + UBIDI_IX_INDEX_TOP, + UBIDI_IX_LENGTH, + UBIDI_IX_TRIE_SIZE, + UBIDI_IX_MIRROR_LENGTH, + + UBIDI_IX_JG_START, + UBIDI_IX_JG_LIMIT, + UBIDI_IX_JG_START2, /* new in format version 2.2, ICU 54 */ + UBIDI_IX_JG_LIMIT2, + + UBIDI_MAX_VALUES_INDEX=15, + UBIDI_IX_TOP=16 +}; + +/* definitions for 16-bit bidi/shaping properties word ---------------------- */ + +enum { + /* UBIDI_CLASS_SHIFT=0, */ /* bidi class: 5 bits (4..0) */ + UBIDI_JT_SHIFT=5, /* joining type: 3 bits (7..5) */ + + UBIDI_BPT_SHIFT=8, /* Bidi_Paired_Bracket_Type(bpt): 2 bits (9..8) */ + + UBIDI_JOIN_CONTROL_SHIFT=10, + UBIDI_BIDI_CONTROL_SHIFT=11, + + UBIDI_IS_MIRRORED_SHIFT=12, /* 'is mirrored' */ + UBIDI_MIRROR_DELTA_SHIFT=13, /* bidi mirroring delta: 3 bits (15..13) */ + + UBIDI_MAX_JG_SHIFT=16 /* max JG value in indexes[UBIDI_MAX_VALUES_INDEX] bits 23..16 */ +}; + +#define UBIDI_CLASS_MASK 0x0000001f +#define UBIDI_JT_MASK 0x000000e0 +#define UBIDI_BPT_MASK 0x00000300 + +#define UBIDI_MAX_JG_MASK 0x00ff0000 + +#define UBIDI_GET_CLASS(props) ((props)&UBIDI_CLASS_MASK) +#define UBIDI_GET_FLAG(props, shift) (((props)>>(shift))&1) + +#if U_SIGNED_RIGHT_SHIFT_IS_ARITHMETIC +# define UBIDI_GET_MIRROR_DELTA(props) ((int16_t)(props)>>UBIDI_MIRROR_DELTA_SHIFT) +#else +# define UBIDI_GET_MIRROR_DELTA(props) (int16_t)(((props)&0x8000) ? (((props)>>UBIDI_MIRROR_DELTA_SHIFT)|0xe000) : ((props)>>UBIDI_MIRROR_DELTA_SHIFT)) +#endif + +enum { + UBIDI_ESC_MIRROR_DELTA=-4, + UBIDI_MIN_MIRROR_DELTA=-3, + UBIDI_MAX_MIRROR_DELTA=3 +}; + +/* definitions for 32-bit mirror table entry -------------------------------- */ + +enum { + /* the source Unicode code point takes 21 bits (20..0) */ + UBIDI_MIRROR_INDEX_SHIFT=21, + UBIDI_MAX_MIRROR_INDEX=0x7ff +}; + +#define UBIDI_GET_MIRROR_CODE_POINT(m) (UChar32)((m)&0x1fffff) + +#define UBIDI_GET_MIRROR_INDEX(m) ((m)>>UBIDI_MIRROR_INDEX_SHIFT) + +U_CDECL_END + +#endif diff --git a/thirdparty/icu4c/common/ubidi_props_data.h b/thirdparty/icu4c/common/ubidi_props_data.h new file mode 100644 index 0000000000..7a34870bd8 --- /dev/null +++ b/thirdparty/icu4c/common/ubidi_props_data.h @@ -0,0 +1,922 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +// +// Copyright (C) 1999-2016, International Business Machines +// Corporation and others. All Rights Reserved. +// +// file name: ubidi_props_data.h +// +// machine-generated by: icu/tools/unicode/c/genprops/bidipropsbuilder.cpp + + +#ifdef INCLUDED_FROM_UBIDI_PROPS_C + +static const UVersionInfo ubidi_props_dataVersion={0xd,0,0,0}; + +static const int32_t ubidi_props_indexes[UBIDI_IX_TOP]={0x10,0x67ec,0x6200,0x28,0x620,0x8c8,0x10ac0,0x10d24,0,0,0,0,0,0,0,0x6502b6}; + +static const uint16_t ubidi_props_trieIndex[12536]={ +0x37c,0x384,0x38c,0x394,0x3ac,0x3b4,0x3bc,0x3c4,0x39c,0x3a4,0x39c,0x3a4,0x39c,0x3a4,0x39c,0x3a4, +0x39c,0x3a4,0x39c,0x3a4,0x3ca,0x3d2,0x3da,0x3e2,0x3ea,0x3f2,0x3ee,0x3f6,0x3fe,0x406,0x401,0x409, +0x39c,0x3a4,0x39c,0x3a4,0x411,0x419,0x39c,0x3a4,0x39c,0x3a4,0x39c,0x3a4,0x41f,0x427,0x42f,0x437, +0x43f,0x447,0x44f,0x457,0x45d,0x465,0x46d,0x475,0x47d,0x485,0x48b,0x493,0x49b,0x4a3,0x4ab,0x4b3, +0x4bf,0x4bb,0x4c7,0x4cf,0x431,0x4df,0x4e6,0x4d7,0x4ee,0x4f0,0x4f8,0x500,0x508,0x509,0x511,0x519, +0x521,0x509,0x529,0x52e,0x521,0x509,0x536,0x53e,0x508,0x546,0x54e,0x500,0x556,0x39c,0x55e,0x562, +0x56a,0x56c,0x574,0x57c,0x508,0x584,0x58c,0x500,0x413,0x590,0x511,0x500,0x508,0x39c,0x598,0x39c, +0x39c,0x59e,0x5a6,0x39c,0x39c,0x5aa,0x5b2,0x39c,0x5b6,0x5bd,0x39c,0x5c5,0x5cd,0x5d4,0x555,0x39c, +0x39c,0x5dc,0x5e4,0x5ec,0x5f4,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x5fc,0x39c,0x604,0x39c,0x39c,0x39c, +0x60c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x614,0x39c,0x39c,0x39c,0x61c,0x61c,0x515,0x515,0x39c,0x622,0x62a,0x604, +0x640,0x632,0x632,0x648,0x64f,0x638,0x39c,0x39c,0x39c,0x657,0x65f,0x39c,0x39c,0x39c,0x661,0x669, +0x671,0x39c,0x678,0x680,0x39c,0x688,0x56b,0x39c,0x545,0x690,0x556,0x698,0x413,0x6a0,0x39c,0x6a7, +0x39c,0x6ac,0x39c,0x39c,0x39c,0x39c,0x6b2,0x6ba,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x3ea,0x6c2, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x6ca,0x6d2,0x6d6, +0x6ee,0x6f4,0x6de,0x6e6,0x6fc,0x704,0x708,0x5d7,0x710,0x718,0x720,0x39c,0x728,0x669,0x669,0x669, +0x738,0x740,0x748,0x750,0x755,0x75d,0x765,0x730,0x76d,0x775,0x39c,0x77b,0x782,0x669,0x669,0x669, +0x669,0x582,0x788,0x669,0x790,0x39c,0x39c,0x666,0x669,0x669,0x669,0x669,0x669,0x669,0x669,0x669, +0x669,0x669,0x669,0x669,0x669,0x798,0x669,0x669,0x669,0x669,0x669,0x79e,0x669,0x669,0x7a6,0x7ae, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x669,0x669,0x669,0x669,0x7be,0x7c6,0x7ce,0x7b6, +0x7de,0x7e6,0x7ee,0x7f5,0x7fc,0x804,0x808,0x7d6,0x669,0x669,0x669,0x810,0x816,0x669,0x669,0x81c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x824,0x39c,0x39c,0x39c,0x82c,0x39c,0x39c,0x39c,0x3ea, +0x834,0x83c,0x840,0x39c,0x848,0x669,0x669,0x66c,0x669,0x669,0x669,0x669,0x669,0x669,0x84f,0x855, +0x865,0x85d,0x39c,0x39c,0x86d,0x60c,0x39c,0x3c3,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x669,0x82b, +0x3d1,0x39c,0x875,0x87d,0x39c,0x885,0x88d,0x39c,0x39c,0x39c,0x39c,0x891,0x39c,0x39c,0x661,0x3c2, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x669,0x669, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x875,0x669,0x582,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x898,0x39c,0x39c,0x89d,0x56c,0x39c,0x39c,0x5b8,0x669,0x660,0x39c,0x39c,0x8a5,0x39c,0x39c,0x39c, +0x8ad,0x8b4,0x632,0x8bc,0x39c,0x39c,0x58e,0x8c4,0x39c,0x8cc,0x8d3,0x39c,0x4ee,0x8d8,0x39c,0x507, +0x39c,0x8e0,0x8e8,0x509,0x39c,0x8ec,0x508,0x8f4,0x39c,0x39c,0x39c,0x8fa,0x39c,0x39c,0x39c,0x901, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x915,0x909,0x90d,0x49b,0x49b,0x49b,0x49b,0x49b, +0x49b,0x49b,0x49b,0x49b,0x49b,0x49b,0x49b,0x49b,0x49b,0x91d,0x49b,0x49b,0x49b,0x49b,0x925,0x929, +0x931,0x939,0x93d,0x945,0x49b,0x49b,0x49b,0x949,0x951,0x38c,0x959,0x961,0x39c,0x39c,0x39c,0x969, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0xe70,0xe70,0xeb0,0xef0,0xe70,0xe70,0xe70,0xe70,0xe70,0xe70,0xf28,0xf68,0xfa8,0xfb8,0xff8,0x1004, +0xe70,0xe70,0x1044,0xe70,0xe70,0xe70,0x107c,0x10bc,0x10fc,0x113c,0x1174,0x11b4,0x11f4,0x122c,0x126c,0x12ac, +0xa40,0xa80,0xac0,0xaff,0x1a0,0x1a0,0xb3f,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xb68,0x1a0,0x1a0, +0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xba8,0x1a0,0x1a0,0xbdd,0xc1d,0xc5d,0xc9d,0xcdd,0xd1d, +0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0, +0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd5d, +0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0, +0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd5d, +0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0, +0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd5d, +0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0, +0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd5d, +0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0, +0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd5d, +0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0, +0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd5d, +0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0, +0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd5d, +0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0, +0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd5d, +0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0, +0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd5d, +0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0, +0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd5d, +0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0, +0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd5d, +0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0, +0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd5d, +0xd9d,0xdad,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0, +0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd5d, +0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0, +0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd5d, +0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0, +0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0x1a0,0xd5d, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x971,0x39c,0x669,0x669,0x979,0x60c,0x39c,0x501, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x981,0x39c,0x39c,0x39c,0x988,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x431,0x431,0x431,0x431,0x431,0x431,0x431,0x431,0x990,0x431,0x431,0x431,0x431,0x431,0x431,0x431, +0x998,0x99c,0x431,0x431,0x431,0x431,0x9ac,0x9a4,0x431,0x9b4,0x431,0x431,0x9bc,0x9c2,0x431,0x431, +0x431,0x431,0x431,0x431,0x431,0x431,0x431,0x431,0x9d2,0x9ca,0x431,0x431,0x431,0x431,0x431,0x431, +0x431,0x431,0x431,0x9da,0x431,0x9e2,0x431,0x431,0x431,0x9e6,0x9ed,0x9f3,0x431,0x9f7,0x9ff,0x431, +0x508,0xa07,0xa0e,0xa15,0x413,0xa18,0x39c,0x39c,0x4ee,0xa1f,0x39c,0xa25,0x413,0xa2a,0xa32,0x39c, +0x39c,0xa37,0x39c,0x39c,0x39c,0x39c,0x82c,0xa3f,0x413,0x590,0x56b,0xa46,0x39c,0x39c,0x39c,0x39c, +0x39c,0xa07,0xa4e,0x39c,0x39c,0xa56,0xa5e,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0xa62,0xa6a,0x39c, +0x39c,0xa72,0x56b,0xa7a,0x39c,0xa80,0x39c,0x39c,0x5fc,0xa88,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0xa8d,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0xa94,0xa9c,0x39c,0x39c,0x39c,0xa9f,0x56b,0xaa7, +0xaab,0xab3,0x39c,0xaba,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0xac1,0x39c,0x39c,0xacf,0xac9,0x39c,0x39c,0x39c,0xad7,0xadf,0x39c,0xae3,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x592,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0xaf0,0xaeb,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0xaf8,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0xaff, +0x39c,0xb05,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0xa26,0x39c,0xb0b,0x39c,0x39c,0xb13,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x522,0xb1b,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0xb22,0xb2a,0xb30,0x39c,0x39c,0x669,0x669,0xb38, +0x39c,0x39c,0x39c,0x39c,0x39c,0x669,0x669,0x83f,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0xb3a,0x39c,0xb41,0x39c,0xb3d,0x39c,0xb44,0x39c,0xb4c,0xb50,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x3ea,0xb58,0x3ea, +0xb5f,0xb66,0xb6e,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0xb76,0xb7e,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0xb05,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0xb83,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x431,0x431,0x431, +0x431,0x431,0x431,0xb8b,0x431,0xb93,0xb93,0xb9a,0x431,0x431,0x431,0x431,0x431,0x431,0x431,0x431, +0x431,0x431,0x431,0x431,0x431,0x431,0x431,0x431,0x431,0x431,0x431,0x431,0x431,0x431,0x431,0x431, +0x90d,0x49b,0x49b,0x431,0x431,0x49b,0x49b,0x9f3,0x431,0x431,0x431,0x431,0x431,0x49b,0x49b,0x49b, +0x49b,0x49b,0x49b,0x49b,0xba2,0x431,0x431,0x431,0x431,0x431,0x431,0x431,0x431,0x669,0xbaa,0x669, +0x669,0x66c,0xbaf,0xbb3,0x84f,0xbbb,0x3be,0x39c,0xbc1,0x39c,0xbc6,0x39c,0x39c,0x39c,0x39c,0x39c, +0x779,0x39c,0x39c,0x39c,0x39c,0x669,0x669,0x669,0x669,0x669,0x669,0x669,0x669,0x669,0x669,0x669, +0x669,0x669,0x669,0x669,0x669,0x669,0x669,0x669,0x669,0x669,0x669,0x669,0x669,0x669,0x669,0x669, +0x669,0x669,0x669,0x66b,0x979,0x669,0x669,0x669,0x66c,0x669,0x669,0xbce,0x66e,0xbaa,0x669,0xbd6, +0x669,0xbde,0xbe3,0x39c,0x39c,0x669,0x669,0x669,0xbeb,0x669,0x669,0x798,0x669,0x669,0x669,0x66c, +0xbf2,0xbfa,0xc00,0xc05,0x39c,0x669,0x669,0x669,0x669,0xc0d,0x669,0x788,0xc15,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0xc1c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c, +0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0x39c,0xc1c,0xc2c,0xc24,0xc24, +0xc24,0xc2d,0xc2d,0xc2d,0xc2d,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0xc35,0xc2d,0xc2d,0xc2d, +0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d, +0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d, +0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d, +0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0xc2d,0x37b,0x37b,0x37b, +0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,8,7,8,9,7,0x12,0x12, +0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,7,7,7,8, +9,0xa,0xa,4,4,4,0xa,0xa,0x310a,0xf20a,0xa,3,6,3,6,6, +2,2,2,2,2,2,2,2,2,2,6,0xa,0x500a,0xa,0xd00a,0xa, +0xa,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0x510a,0xa,0xd20a,0xa,0xa, +0xa,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0x510a,0xa,0xd20a,0xa,0x12, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0x12,0x12,0x12,0x12,0x12,7,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, +0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, +6,0xa,4,4,4,4,0xa,0xa,0xa,0xa,0,0x900a,0xa,0xb2,0xa,0xa, +4,4,2,2,0xa,0,0xa,0xa,0xa,2,0,0x900a,0xa,0xa,0xa,0xa, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0xa,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0xa,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0xa,0xa,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0xa,0xa,0xa,0xa,0xa,0xa, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0xa,0xa,0xa, +0xa,0xa,0xa,0xa,0xa,0xa,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, +0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, +0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, +0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, +0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0xa,0xa,0,0, +0,0,0,0,0,0,0xa,0,0,0,0,0,0xa,0xa,0,0xa, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0xa,0,0,0,0,0, +0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0xa,0,0,0xa,0xa,4,1,0xb1,0xb1,0xb1, +0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, +0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, +0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,1,0xb1,1,0xb1,0xb1,1, +0xb1,0xb1,1,0xb1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,5,5,5,5, +5,5,0xa,0xa,0xd,4,4,0xd,6,0xd,0xa,0xa,0xb1,0xb1,0xb1,0xb1, +0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xd,0x8ad,0xd,0xd,0xd,0x4d,0xd,0x8d,0x8d, +0x8d,0x8d,0x4d,0x8d,0x4d,0x8d,0x4d,0x4d,0x4d,0x4d,0x4d,0x8d,0x8d,0x8d,0x8d,0x4d, +0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x2d,0x4d,0x4d,0x4d, +0x4d,0x4d,0x4d,0x4d,0x8d,0x4d,0x4d,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, +0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,5,5,5,5, +5,5,5,5,5,5,4,5,5,0xd,0x4d,0x4d,0xb1,0x8d,0x8d,0x8d, +0xd,0x8d,0x8d,0x8d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x8d,0x8d,0x8d,0x8d, +0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x4d,0x4d, +0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d, +0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d, +0x4d,0x4d,0x4d,0x4d,0x8d,0x4d,0x4d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d, +0x4d,0x8d,0x4d,0x8d,0x4d,0x4d,0x8d,0x8d,0xd,0x8d,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, +0xb1,5,0xa,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xd,0xd,0xb1,0xb1,0xa,0xb1,0xb1, +0xb1,0xb1,0x8d,0x8d,2,2,2,2,2,2,2,2,2,2,0x4d,0x4d, +0x4d,0xd,0xd,0x4d,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd, +0xd,0xd,0xd,0xad,0x8d,0xb1,0x4d,0x4d,0x4d,0x8d,0x8d,0x8d,0x8d,0x8d,0x4d,0x4d, +0x4d,0x4d,0x8d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x8d,0x4d,0x8d,0x4d, +0x8d,0x4d,0x4d,0x8d,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, +0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xd,0xd,0x8d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d, +0x4d,0x4d,0x4d,0x4d,0x4d,0x8d,0x8d,0x8d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d, +0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x8d,0x8d,0x4d,0x4d,0x4d,0x4d,0x8d,0x4d,0x8d, +0x8d,0x4d,0x4d,0x4d,0x8d,0x8d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0xd,0xd,0xd,0xd, +0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd, +0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd, +0xd,0xd,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xd,0xd,0xd, +0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,1,1,1,1, +1,1,1,1,1,1,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41, +0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41, +0x41,0x41,0x41,0x41,0x41,0x41,0x41,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, +1,1,0xa,0xa,0xa,0xa,0x21,1,1,0xb1,1,1,0xb1,0xb1,0xb1,0xb1, +1,0xb1,0xb1,0xb1,1,0xb1,0xb1,0xb1,0xb1,0xb1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,0xb1,0xb1,0xb1,0xb1,1,0xb1,0xb1,0xb1,0xb1,0xb1,0x81,0x41,0x41,0x41, +0x41,0x41,0x81,0x81,0x41,0x81,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41, +0x81,0x41,0x81,0x81,0x81,0xb1,0xb1,0xb1,1,1,1,1,0x4d,0xd,0x4d,0x4d, +0x4d,0x4d,0xd,0x8d,0x4d,0x8d,0x8d,0xd,0xd,0xd,0xd,0xd,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,0xb1,0xb1,5,0xb1, +0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, +0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0x4d,0x4d,0x4d,0x4d, +0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x8d,0x8d,0x8d,0xd,0x8d,0x4d,0x4d,0x8d,0x8d,0x4d, +0x4d,0xd,0x4d,0x4d,0x4d,0x8d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d, +0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xb1,0xb1,0xb1,0xb1,0xb1, +0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0xb1,0,0xb1,0,0,0, +0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0xb1,0,0, +0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0, +0,0,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0xb1,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0, +0,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0xb1,0xb1,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,4,4,0,0,0,0,0,0,0,4, +0,0,0xb1,0,0,0xb1,0xb1,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0xb1,0xb1,0,0,0,0,0xb1,0xb1,0,0,0xb1, +0xb1,0xb1,0,0,0,0xb1,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0xb1,0xb1,0,0,0,0xb1,0,0, +0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1, +0xb1,0,0,0,0,0xb1,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0,0,0,0, +0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0, +0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0xb1,0,0,0xb1,0,0xb1,0xb1,0xb1,0xb1,0,0,0, +0,0,0,0,0,0xb1,0,0,0,0,0,0,0,0xb1,0xb1,0, +0,0,0,0,0,0,0,0,0,0,0xb1,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0xb1,0,0,0,0,0,0,0, +0,0,0,0,0,0xb1,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa, +0xa,4,0xa,0,0,0,0,0,0xb1,0,0,0,0xb1,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1, +0xb1,0,0,0,0,0,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0,0, +0,0,0,0,0,0xb1,0xb1,0,0,0,0,0,0,0,0,0, +0,0,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0,0,0xa0, +0,0,0,0,0,0,0xa0,0,0,0,0,0,0xb1,0xb1,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0,0,0, +0,0,0,0,0,0,0,0,0,0,0xb1,0,0,0,0,0, +0,0,0xb1,0xb1,0xb1,0,0xb1,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0xb1,0,0,0xb1,0xb1,0xb1,0xb1, +0xb1,0xb1,0xb1,0,0,0,0,4,0,0,0,0,0,0,0,0xb1, +0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0xb1,0,0,0xb1,0xb1,0xb1,0xb1, +0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0, +0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0xb1,0,0xb1,0,0xb1,0x310a,0xf20a, +0x310a,0xf20a,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, +0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0,0,0,0, +0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1, +0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, +0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1, +0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0,0,0xb1,0xb1,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0xb1,0xb1,0,0,0,0,0xb1,0xb1, +0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0, +0,0,0xb1,0,0,0xb1,0xb1,0,0,0,0,0,0,0xb1,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0, +0xa,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0x310a,0xf20a,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0,0xb1, +0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0xb1,0, +0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0, +0,0,0,4,0,0xb1,0,0,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40, +0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40, +0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0xb1,0x40,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x4a,0xa,0xa,0x2a,0xb1,0xb1,0xb1,0x12,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40, +0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0,0,0,0,0,0,0, +0,0xb1,0xb1,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40, +0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0xb1,0xb1,0xb1,0, +0,0,0,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0xb1,0, +0,0,0,0,0,0xb1,0xb1,0xb1,0,0,0,0,0xa,0,0,0, +0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, +0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0,0,0xb1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0, +0xb1,0,0xb1,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0, +0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0xb1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1, +0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0,0xb1,0xb1,0xb1,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0xb1,0,0xb1,0xb1,0,0,0,0xb1,0,0xb1,0xb1,0xb1,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1, +0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1, +0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, +0xb1,0,0,0,0,0xb1,0,0,0,0,0,0,0xb1,0,0,0, +0xb1,0xb1,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, +0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, +0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0xa,0,0xa,0xa,0xa,0,0,0,0,0,0, +0,0,0,0,0,0xa,0xa,0xa,0,0,0,0,0,0,0,0, +0,0,0,0,0,0xa,0xa,0xa,0,0,0,0,0,0,0,0, +0,0,0,0,0,0xa,0xa,0,0xa,0xa,0xa,0xa,6,0x310a,0xf20a,0xa, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,9,0xb2,0xb2,0xb2,0xb2,0xb2,0x12,0x814,0x815, +0x813,0x816,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,2,0,0,0,2,2,2,2, +2,2,3,3,0xa,0x310a,0xf20a,0,9,9,9,9,9,9,9,9, +9,9,9,0xb2,0x412,0x432,0x8a0,0x8a1,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,9,7,0x8ab,0x8ae,0x8b0,0x8ac,0x8af,6, +4,4,4,4,4,0xa,0xa,0xa,0xa,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa, +2,2,2,2,2,2,2,2,2,2,3,3,0xa,0x310a,0xf20a,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, +4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, +0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, +0xa,0xa,0,0xa,0xa,0xa,0xa,0,0xa,0xa,0,0,0,0,0,0, +0,0,0,0,0xa,0,0xa,0xa,0xa,0,0,0,0,0,0xa,0xa, +0xa,0xa,0xa,0xa,0,0xa,0,0xa,0,0xa,0,0,0,0,4,0, +0,0,0,0,0,0,0,0,0,0,0xa,0xa,0,0,0,0, +0x100a,0xa,0xa,0xa,0xa,0,0,0,0,0,0xa,0xa,0xa,0xa,0,0, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, +0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0,0,0,0, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, +0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0xa,0xa, +0x300a,0xf00a,0x900a,0x900a,0x900a,0x100a,0x900a,0x900a,0x100a,0x100a,0x900a,0x900a,0x900a,0x900a,0x900a,0x100a, +0xa,0x100a,0x100a,0x100a,0x100a,0xa,0xa,0xa,0x700a,0x700a,0x700a,0xb00a,0xb00a,0xb00a,0xa,0xa, +0xa,0x100a,3,4,0xa,0x900a,0x100a,0xa,0xa,0xa,0x100a,0x100a,0x100a,0x100a,0xa,0x900a, +0x900a,0x900a,0x900a,0xa,0x900a,0xa,0x100a,0xa,0xa,0xa,0xa,0x100a,0x100a,0x100a,0x100a,0x100a, +0x100a,0x100a,0x100a,0x100a,0xa,0xa,0xa,0xa,0xa,0x100a,0xa,0x100a,0x300a,0xf00a,0x100a,0x100a, +0x100a,0x100a,0x100a,0x900a,0x100a,0x900a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x900a,0xa,0xa,0xa, +0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x100a, +0x100a,0xa,0x100a,0xa,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0xa,0xa,0x300a,0xf00a, +0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a, +0x100a,0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0x900a,0xa,0xa,0xa, +0xa,0xa,0xa,0xa,0xa,0xa,0x300a,0xf00a,0xa,0xa,0x900a,0x100a,0x900a,0x900a,0x100a,0x900a, +0x100a,0x100a,0x100a,0x100a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x900a,0xa,0xa,0xa, +0xa,0xa,0x100a,0x100a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x300a,0xf00a,0x300a, +0xf00a,0x900a,0xa,0xa,0x300a,0xf00a,0xa,0xa,0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a, +0x300a,0xf00a,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x310a,0xf20a,0x310a,0xf20a, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, +0xa,0xa,0xa,0xa,0x100a,0x100a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x310a,0xf20a,0xa, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0xa,0xa, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0xa,0xa,0xa, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, +0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0xa,0xa, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x100a,0xa,0xa,0x300a,0xf00a,0x310a,0xf20a,0xa, +0x300a,0xf00a,0xa,0x500a,0x100a,0xd00a,0xa,0xa,0xa,0xa,0xa,0x100a,0x100a,0x300a,0xf00a,0xa, +0xa,0xa,0xa,0xa,0x900a,0x300a,0xf00a,0xa,0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0x310a,0xf20a, +0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x100a,0xa,0x100a,0x100a,0x100a,0xa,0xa, +0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x100a,0x900a,0x100a,0x100a, +0x300a,0xf00a,0xa,0xa,0x310a,0xf20a,0xa,0xa,0xa,0xa,0xa,0x310a,0xf20a,0x310a,0xf20a,0x310a, +0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x710a,0x320a,0xf10a,0xb20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a, +0xf20a,0xa,0xa,0x900a,0x100a,0x100a,0x100a,0x100a,0x900a,0xa,0x100a,0x900a,0x300a,0xf00a,0x100a,0x100a, +0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, +0x900a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x300a,0xf00a,0x100a,0x100a,0x300a,0xf00a,0xa,0xa, +0xa,0x100a,0xa,0xa,0xa,0xa,0x100a,0x300a,0xf00a,0x300a,0xf00a,0xa,0x300a,0xf00a,0xa,0xa, +0x310a,0xf20a,0x310a,0xf20a,0x100a,0xa,0xa,0xa,0xa,0xa,0x100a,0x900a,0x900a,0x900a,0x100a,0xa, +0xa,0xa,0xa,0xa,0x300a,0xf00a,0x900a,0xa,0xa,0xa,0xa,0x100a,0xa,0xa,0xa,0x300a, +0xf00a,0x300a,0xf00a,0x100a,0xa,0x100a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, +0xa,0xa,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a,0x100a, +0x100a,0x100a,0x100a,0x100a,0x100a,0xa,0x100a,0x100a,0x100a,0x100a,0xa,0xa,0x100a,0xa,0x100a,0xa, +0xa,0x100a,0xa,0x300a,0xf00a,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0x300a,0xf00a,0xa,0xa, +0xa,0xa,0xa,0xa,0x300a,0xf00a,0x100a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x100a, +0x100a,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x300a,0xf00a,0xa,0xa,0xa,0xa,0x100a,0x100a, +0x100a,0x100a,0xa,0x100a,0x100a,0xa,0xa,0x100a,0x100a,0xa,0xa,0xa,0xa,0x300a,0xf00a,0x300a, +0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a, +0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a, +0xf00a,0x300a,0xf00a,0x100a,0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0xa,0x300a, +0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a, +0xf00a,0x300a,0xf00a,0x300a,0xf00a,0x300a,0xf00a,0xa,0xa,0xa,0xa,0xa,0x100a,0xa,0x900a,0xa, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, +0xa,0xa,0xa,0xa,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0xa, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x900a,0xa, +0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0xb1, +0xb1,0xb1,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1, +0xa,0xa,0x300a,0xf00a,0x300a,0xf00a,0xa,0xa,0xa,0x300a,0xf00a,0xa,0x300a,0xf00a,0xa,0xa, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0x300a,0xf00a,0xa,0xa, +0x300a,0xf00a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0xa,0xa,0xa,0xa,0xa,0xa, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, +0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0,0, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0xa,0xa,0xa,0xa,0xa, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, +0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, +0,0,0,0,0xa,0,0,0,0,0,0,0,0,0,0xb1,0xb1, +0xb1,0xb1,0,0,0xa,0,0,0,0,0,0xa,0xa,0,0,0,0, +0,0xa,0xa,0xa,9,0xa,0xa,0xa,0xa,0,0,0,0x310a,0xf20a,0x310a,0xf20a, +0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0xa,0xa,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a,0x310a,0xf20a, +0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xa, +0xa,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, +0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, +0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0, +0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0xa,0xa,0xa, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0xb1,0xb1,0xb1,0xb1,0xa,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, +0xb1,0xb1,0xa,0xa,0,0,0,0,0,0,0,0,0xa,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0xb1,0,0,0,0xb1,0,0,0,0,0xb1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0xb1,0xb1,0,0xa,0xa,0xa,0xa,0xb1,0,0,0, +0,0,0,0,0,0,0,0,4,4,0,0,0,0,0,0, +0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40, +0x40,0x40,0x60,0,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0, +0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, +0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1, +0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0xb1,0,0,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0,0, +0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0, +0,0xb1,0xb1,0,0,0xb1,0xb1,0,0,0,0,0,0,0,0,0, +0,0,0,0xb1,0,0,0,0,0,0,0,0,0xb1,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0xb1,0,0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0,0,0,0,0,0xb1,0xb1, +0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0,0, +0,0,0,0,0,0,0xb1,0,0,0,0,0,0,0,0,0, +0,0,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0xb1,0,0,0xb1,0,0,0, +0,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,1,1,1,1,1,1,1,1,1,3,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd, +0xd,0xd,0xd,0xd,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,1,0xb1,1,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd, +0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd, +0xd,0xd,0xa,0xa,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd, +0xd,0xd,0xd,0xd,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, +0x12,0x12,0x12,0x12,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd, +0xd,0xa,0xd,0xd,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, +0xb1,0xb1,0xb1,0xb1,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0, +0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, +0xb1,0xb1,0xb1,0xb1,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, +0xa,0xa,0xa,0xa,6,0xa,6,0,0xa,6,0xa,0xa,0xa,0x310a,0xf20a,0x310a, +0xf20a,0x310a,0xf20a,4,0xa,0xa,3,3,0x300a,0xf00a,0xa,0,0xa,4,4,0xa, +0,0,0,0,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd, +0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd, +0xd,0xd,0xd,0xb2,0,0xa,0xa,4,4,4,0xa,0xa,0x310a,0xf20a,0xa,3, +6,3,6,6,2,2,2,2,2,2,2,2,2,2,6,0xa, +0x500a,0xa,0xd00a,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x510a, +0xa,0xd20a,0xa,0x310a,0xf20a,0xa,0x310a,0xf20a,0xa,0xa,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,4,4,0xa,0xa,0xa,4,4,0,0xa,0xa,0xa,0xa, +0xa,0xa,0xa,0,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0xaa,0xaa,0xaa, +0xa,0xa,0x12,0x12,0,0xa,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, +0xa,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, +0xa,0,0,0,0xb1,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xa, +1,0xb1,0xb1,0xb1,1,0xb1,0xb1,1,1,1,1,1,0xb1,0xb1,0xb1,0xb1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,0xb1,0xb1,0xb1,1,1,1,1,0xb1, +0x41,0x81,1,1,0x81,0xb1,0xb1,1,1,1,1,0x41,0x41,0x41,0x41,0x81, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +0x41,0x41,0x41,0x41,0x41,0x81,1,0x81,1,0x81,0x81,1,1,0x61,0x81,0x81, +0x81,0x81,0x81,0x41,0x41,0x41,0x41,0x61,0x41,0x41,0x41,0x41,0x41,0x81,0x41,0x41, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,0xa,0xa,0xa,0xa,0xa,0xa,0xa, +0x41,0x81,0x41,0x81,0x81,0x81,0x41,0x41,0x41,0x81,0x41,0x41,0x81,0x41,0x81,0x81, +0x41,0x81,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,0x81,0x81,0x81,0x81,0x41,0x41,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,0x4d,0x4d,0x8d,0x4d,0xb1,0xb1,0xb1,0xb1, +0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,5,5,5,5,5,5,5,5, +5,5,0xd,0xd,0xd,0xd,0xd,0xd,0x6d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d, +0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d, +0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,1,1,1,1,1,1,1,1,1, +1,1,1,0xb1,0xb1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,0x4d,0x4d,0x4d,0x8d,0x4d,0x4d,0x4d,0x4d, +0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0x4d,0xd,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, +0xb1,0xb1,0xb1,0xb1,0xb1,0x4d,0x4d,0x4d,0x8d,0xd,0xd,0xd,0xd,0xd,0xd,0xd, +0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,0x41,1,0x41,0x41, +0x81,0x81,0x81,1,0x41,0x81,0x81,0x41,0x41,0x81,0x41,0x41,1,0x41,0x81,0x81, +0x41,1,1,1,1,0x81,0x41,0x61,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0, +0,0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0,0,0,0,0, +0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, +0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0xb1,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1, +0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0, +0,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0,0, +0xb1,0,0xb1,0xb1,0,0,0,0,0,0,0xb1,0,0,0,0,0xb1, +0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1, +0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0, +0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0,0xb1,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0xb1,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1, +0xb1,0,0xb1,0,0,0,0,0xb1,0xb1,0,0xb1,0xb1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0,0, +0,0,0,0,0xb1,0xb1,0,0xb1,0xb1,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1, +0xb1,0xb1,0xb1,0,0,0xb1,0,0xb1,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, +0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0xb1,0,0xb1,0,0, +0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0,0,0,0,0,0,0,0, +0,0,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0,0xb1,0, +0,0,0,0xb1,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0,0,0,0,0,0xb1,0xb1,0xb1, +0xb1,0xb1,0xb1,0xa0,0xa0,0xb1,0xb1,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1, +0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0, +0,0,0,0xb1,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1, +0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0, +0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0, +0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1, +0xb1,0xb1,0,0xa0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0xb1, +0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0,0xb1,0xb1,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1, +0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1, +0xb1,0xb1,0xb1,0,0,0,0xb1,0,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1, +0xb1,0xb1,0,0xb1,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0,0, +0,0xb1,0,0xb1,0,0,0,0,0,0,0,0,4,0xa,0xa,0xa, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,4,4,4, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1, +0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xa,0, +0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0xb2,0xb2,0xb2,0xb2, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xb1, +0xb1,0xb1,0,0,0,0,0,0,0,0,0,0xb2,0xb2,0xb2,0xb2,0xb2, +0xb2,0xb2,0xb2,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0xb1,0xb1,0xb1, +0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0xa,0xa,0xb1,0xb1,0xb1,0xa,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0x100a,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0x100a,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0x100a,0,0,0,0,0,0,0,0,0,0,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, +0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1, +0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0, +0,0xb1,0,0,0,0,0,0,0,0,0,0,0xb1,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, +0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0, +0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, +0xb1,0,0,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0,0xb1,0xb1,0,0xb1,0xb1, +0xb1,0xb1,0xb1,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0xb1,0xb1,0xb1,0xb1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,4,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,0xb1,0xb1,0xb1,0xb1, +0xb1,0xb1,0xb1,1,1,1,1,1,1,1,1,1,0x41,0x41,0x41,0x41, +0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41, +0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41,0xb1,0xb1,0xb1,0xb1, +0xb1,0xb1,0xb1,0xa1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd, +0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xa,0xa,0xd,0xd,0xd,0xd,0xd,0xd, +0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xd,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, +0xa,0xa,0xa,0xa,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0xa,0xa,0xa, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0xa,0xa,0xa, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,2,2,2,2, +2,2,2,2,2,2,2,0xa,0xa,0xa,0xa,0xa,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xa,0xa, +0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0xa,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, +0xa,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, +0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, +0xa,0xa,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, +0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0xa,0xa,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, +0xa,0xa,0xa,0xa,0xa,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, +0xa,0xa,0xa,0xa,0xa,0xa,0,0,0xa,0xa,0xa,0xa,0xa,0,0,0, +0xa,0xa,0xa,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0, +0,0,0,0,0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0, +0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0,0,0,0, +0,0,0,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, +0xa,0xa,0xa,0xa,0xa,0xa,0xa,0,0xa,0xa,0xa,0xa,0xa,0xa,0xa,0xa, +0xa,0xa,0xa,0xa,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,2,2,2,2,2,2,2,2,2,2,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x12,0x12, +0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2, +0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2,0xb2, +0x12,0xb2,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, +0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, +0x12,0x12,0x12,0x12,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1,0xb1, +0xb1,0xb1,0xb1,0xb1,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, +0x12,0x12,0x12,0x12,0,0,0,0 +}; + +static const uint32_t ubidi_props_mirrors[40]={ +0x2000ab,0xbb,0x4202215,0x4e0221f,0x3e02220,0x3a02221,0x3c02222,0x4c02224,0x2202243,0x1402245,0x120224c,0x4002298,0x44022a6,0x48022a8,0x46022a9,0x4a022ab, +0x38022b8,0x10022cd,0x2e022f2,0x30022f3,0x32022f4,0x34022f6,0x36022f7,0x24022fa,0x26022fb,0x28022fc,0x2a022fd,0x2c022fe,0x20027dc,0xa0299b,0xc029a0,0x8029a3, +0x16029b8,0x4029f5,0x1802ade,0x1c02ae3,0x1a02ae4,0x1e02ae5,0xe02aee,0x602bfe +}; + +static const uint8_t ubidi_props_jgArray[680]={ +0x2d,0,3,3,0x2c,3,0x2d,3,4,0x2a,4,4,0xd,0xd,0xd,6, +6,0x1f,0x1f,0x23,0x23,0x21,0x21,0x28,0x28,1,1,0xb,0xb,0x37,0x37,0x37, +0,9,0x1d,0x13,0x16,0x18,0x1a,0x10,0x2c,0x2d,0x2d,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0x1d, +0,3,3,3,0,3,0x2c,0x2c,0x2d,4,4,4,4,4,4,4, +4,0xd,0xd,0xd,0xd,0xd,0xd,0xd,6,6,6,6,6,6,6,6, +6,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x1f,0x23,0x23,0x23,0x21,0x21,0x28, +1,9,9,9,9,9,9,0x1d,0x1d,0xb,0x26,0xb,0x13,0x13,0x13,0xb, +0xb,0xb,0xb,0xb,0xb,0x16,0x16,0x16,0x16,0x1a,0x1a,0x1a,0x1a,0x38,0x15,0xd, +0x2a,0x11,0x11,0xe,0x2c,0x2c,0x2c,0x2c,0x2c,0x2c,0x2c,0x2c,0x37,0x2f,0x37,0x2c, +0x2d,0x2d,0x2e,0x2e,0,0x2a,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0x1f, +0,0,0,0,0,0,0,0,0,0,0x23,0x21,1,0,0,0x15, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +2,0,5,0xc,0xc,7,7,0xf,0x27,0x32,0x12,0x2b,0x2b,0x30,0x31,0x14, +0x17,0x19,0x1b,0x24,0xa,8,0x1c,0x20,0x22,0x1e,7,0x25,0x29,5,0xc,7, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0x35,0x34,0x33, +4,4,4,4,4,4,4,0xd,0xd,6,6,0x1f,0x23,1,1,1, +9,9,0xb,0xb,0xb,0x18,0x18,0x1a,0x1a,0x1a,0x16,0x1f,0x1f,0x23,0xd,0xd, +0x23,0x1f,0xd,3,3,0x37,0x37,0x2d,0x2c,0x2c,0x36,0x36,0xd,0x23,0x23,0x13, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0x5d,0x5a,0x60,0x63,0x5e,0x5f,0x59,0x61,0x5b,0x5c,0x62,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +4,4,0xd,0x28,9,0x1d,0x16,0x18,0x2d,0x2d,0x1f,0x2c,0x39,0,6,0x21, +0xb,0x55,0x1f,1,0x13,0,4,4,4,0x1f,0x2d,0x56,0x58,0x57,4,4, +4,0xd,0xb,1,0x58,0xd,0xd,0x16 +}; + +static const uint8_t ubidi_props_jgArray2[612]={ +0x3a,0x3c,0x3c,0x40,0x40,0x3d,0,0x52,0,0x54,0x54,0,0,0x41,0x4f,0x53, +0x43,0x43,0x43,0x44,0x3e,0x50,0x45,0x46,0x4c,0x3b,0x3b,0x48,0x48,0x4b,0x49,0x49, +0x49,0x4a,0,0,0x4d,0,0,0,0,0,0,0x47,0x3f,0x4e,0x51,0x42, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0x65,0,0,0,0,0,0,0x65,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0x64,0,0,0x65,0,0x64,0, +0x64,0,0,0x64 +}; + +static const UBiDiProps ubidi_props_singleton={ + NULL, + ubidi_props_indexes, + ubidi_props_mirrors, + ubidi_props_jgArray, + ubidi_props_jgArray2, + { + ubidi_props_trieIndex, + ubidi_props_trieIndex+3568, + NULL, + 3568, + 8968, + 0x1a0, + 0xe70, + 0x0, + 0x0, + 0x110000, + 0x30f4, + NULL, 0, FALSE, FALSE, 0, NULL + }, + { 2,2,0,0 } +}; + +#endif // INCLUDED_FROM_UBIDI_PROPS_C diff --git a/thirdparty/icu4c/common/ubidiimp.h b/thirdparty/icu4c/common/ubidiimp.h new file mode 100644 index 0000000000..e48fc6f941 --- /dev/null +++ b/thirdparty/icu4c/common/ubidiimp.h @@ -0,0 +1,484 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1999-2016, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* file name: ubidiimp.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 1999aug06 +* created by: Markus W. Scherer, updated by Matitiahu Allouche +*/ + +#ifndef UBIDIIMP_H +#define UBIDIIMP_H + +#include "unicode/utypes.h" +#include "unicode/ubidi.h" +#include "unicode/uchar.h" +#include "ubidi_props.h" + +/* miscellaneous definitions ---------------------------------------------- */ + +// ICU-20853=ICU-20935 Solaris #defines CS and ES in sys/regset.h +#ifdef CS +# undef CS +#endif +#ifdef ES +# undef ES +#endif + +typedef uint8_t DirProp; +typedef uint32_t Flags; + +/* Comparing the description of the BiDi algorithm with this implementation + is easier with the same names for the BiDi types in the code as there. + See UCharDirection in uchar.h . +*/ +enum { + L= U_LEFT_TO_RIGHT, /* 0 */ + R= U_RIGHT_TO_LEFT, /* 1 */ + EN= U_EUROPEAN_NUMBER, /* 2 */ + ES= U_EUROPEAN_NUMBER_SEPARATOR, /* 3 */ + ET= U_EUROPEAN_NUMBER_TERMINATOR, /* 4 */ + AN= U_ARABIC_NUMBER, /* 5 */ + CS= U_COMMON_NUMBER_SEPARATOR, /* 6 */ + B= U_BLOCK_SEPARATOR, /* 7 */ + S= U_SEGMENT_SEPARATOR, /* 8 */ + WS= U_WHITE_SPACE_NEUTRAL, /* 9 */ + ON= U_OTHER_NEUTRAL, /* 10 */ + LRE=U_LEFT_TO_RIGHT_EMBEDDING, /* 11 */ + LRO=U_LEFT_TO_RIGHT_OVERRIDE, /* 12 */ + AL= U_RIGHT_TO_LEFT_ARABIC, /* 13 */ + RLE=U_RIGHT_TO_LEFT_EMBEDDING, /* 14 */ + RLO=U_RIGHT_TO_LEFT_OVERRIDE, /* 15 */ + PDF=U_POP_DIRECTIONAL_FORMAT, /* 16 */ + NSM=U_DIR_NON_SPACING_MARK, /* 17 */ + BN= U_BOUNDARY_NEUTRAL, /* 18 */ + FSI=U_FIRST_STRONG_ISOLATE, /* 19 */ + LRI=U_LEFT_TO_RIGHT_ISOLATE, /* 20 */ + RLI=U_RIGHT_TO_LEFT_ISOLATE, /* 21 */ + PDI=U_POP_DIRECTIONAL_ISOLATE, /* 22 */ + ENL, /* EN after W7 */ /* 23 */ + ENR, /* EN not subject to W7 */ /* 24 */ + dirPropCount +}; + +/* Sometimes, bit values are more appropriate + to deal with directionality properties. + Abbreviations in these macro names refer to names + used in the BiDi algorithm. +*/ +#define DIRPROP_FLAG(dir) (1UL<<(dir)) +#define PURE_DIRPROP(prop) ((prop)&~0xE0) ????????????????????????? + +/* special flag for multiple runs from explicit embedding codes */ +#define DIRPROP_FLAG_MULTI_RUNS (1UL<<31) + +/* are there any characters that are LTR or RTL? */ +#define MASK_LTR (DIRPROP_FLAG(L)|DIRPROP_FLAG(EN)|DIRPROP_FLAG(ENL)|DIRPROP_FLAG(ENR)|DIRPROP_FLAG(AN)|DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO)|DIRPROP_FLAG(LRI)) +#define MASK_RTL (DIRPROP_FLAG(R)|DIRPROP_FLAG(AL)|DIRPROP_FLAG(RLE)|DIRPROP_FLAG(RLO)|DIRPROP_FLAG(RLI)) +#define MASK_R_AL (DIRPROP_FLAG(R)|DIRPROP_FLAG(AL)) +#define MASK_STRONG_EN_AN (DIRPROP_FLAG(L)|DIRPROP_FLAG(R)|DIRPROP_FLAG(AL)|DIRPROP_FLAG(EN)|DIRPROP_FLAG(AN)) + +/* explicit embedding codes */ +#define MASK_EXPLICIT (DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO)|DIRPROP_FLAG(RLE)|DIRPROP_FLAG(RLO)|DIRPROP_FLAG(PDF)) + +/* explicit isolate codes */ +#define MASK_ISO (DIRPROP_FLAG(LRI)|DIRPROP_FLAG(RLI)|DIRPROP_FLAG(FSI)|DIRPROP_FLAG(PDI)) + +#define MASK_BN_EXPLICIT (DIRPROP_FLAG(BN)|MASK_EXPLICIT) + +/* paragraph and segment separators */ +#define MASK_B_S (DIRPROP_FLAG(B)|DIRPROP_FLAG(S)) + +/* all types that are counted as White Space or Neutral in some steps */ +#define MASK_WS (MASK_B_S|DIRPROP_FLAG(WS)|MASK_BN_EXPLICIT|MASK_ISO) + +/* types that are neutrals or could becomes neutrals in (Wn) */ +#define MASK_POSSIBLE_N (DIRPROP_FLAG(ON)|DIRPROP_FLAG(CS)|DIRPROP_FLAG(ES)|DIRPROP_FLAG(ET)|MASK_WS) + +/* + * These types may be changed to "e", + * the embedding type (L or R) of the run, + * in the BiDi algorithm (N2) + */ +#define MASK_EMBEDDING (DIRPROP_FLAG(NSM)|MASK_POSSIBLE_N) + +/* the dirProp's L and R are defined to 0 and 1 values in UCharDirection */ +#define GET_LR_FROM_LEVEL(level) ((DirProp)((level)&1)) + +#define IS_DEFAULT_LEVEL(level) ((level)>=0xfe) + +/* + * The following bit is used for the directional isolate status. + * Stack entries corresponding to isolate sequences are greater than ISOLATE. + */ +#define ISOLATE 0x0100 + +U_CFUNC UBiDiLevel +ubidi_getParaLevelAtIndex(const UBiDi *pBiDi, int32_t index); + +#define GET_PARALEVEL(ubidi, index) \ + ((UBiDiLevel)(!(ubidi)->defaultParaLevel || (index)<(ubidi)->paras[0].limit ? \ + (ubidi)->paraLevel : ubidi_getParaLevelAtIndex((ubidi), (index)))) + +/* number of paras entries allocated initially without malloc */ +#define SIMPLE_PARAS_COUNT 10 +/* number of isolate entries allocated initially without malloc */ +#define SIMPLE_ISOLATES_COUNT 5 +/* number of isolate run entries for paired brackets allocated initially without malloc */ +#define SIMPLE_OPENINGS_COUNT 20 + +#define CR 0x000D +#define LF 0x000A + +/* Run structure for reordering --------------------------------------------- */ +enum { + LRM_BEFORE=1, + LRM_AFTER=2, + RLM_BEFORE=4, + RLM_AFTER=8 +}; + +typedef struct Para { + int32_t limit; + int32_t level; +} Para; + +enum { /* flags for Opening.flags */ + FOUND_L=DIRPROP_FLAG(L), + FOUND_R=DIRPROP_FLAG(R) +}; + +typedef struct Opening { + int32_t position; /* position of opening bracket */ + int32_t match; /* matching char or -position of closing bracket */ + int32_t contextPos; /* position of last strong char found before opening */ + uint16_t flags; /* bits for L or R/AL found within the pair */ + UBiDiDirection contextDir; /* L or R according to last strong char before opening */ + uint8_t filler; /* to complete a nice multiple of 4 chars */ +} Opening; + +typedef struct IsoRun { + int32_t contextPos; /* position of char determining context */ + uint16_t start; /* index of first opening entry for this run */ + uint16_t limit; /* index after last opening entry for this run */ + UBiDiLevel level; /* level of this run */ + DirProp lastStrong; /* bidi class of last strong char found in this run */ + DirProp lastBase; /* bidi class of last base char found in this run */ + UBiDiDirection contextDir; /* L or R to use as context for following openings */ +} IsoRun; + +typedef struct BracketData { + UBiDi *pBiDi; + /* array of opening entries which should be enough in most cases; no malloc() */ + Opening simpleOpenings[SIMPLE_OPENINGS_COUNT]; + Opening *openings; /* pointer to current array of entries */ + int32_t openingsCount; /* number of allocated entries */ + int32_t isoRunLast; /* index of last used entry */ + /* array of nested isolated sequence entries; can never excess UBIDI_MAX_EXPLICIT_LEVEL + + 1 for index 0, + 1 for before the first isolated sequence */ + IsoRun isoRuns[UBIDI_MAX_EXPLICIT_LEVEL+2]; + UBool isNumbersSpecial; /* reordering mode for NUMBERS_SPECIAL */ +} BracketData; + +typedef struct Isolate { + int32_t startON; + int32_t start1; + int32_t state; + int16_t stateImp; +} Isolate; + +typedef struct Run { + int32_t logicalStart, /* first character of the run; b31 indicates even/odd level */ + visualLimit, /* last visual position of the run +1 */ + insertRemove; /* if >0, flags for inserting LRM/RLM before/after run, + if <0, count of bidi controls within run */ +} Run; + +/* in a Run, logicalStart will get this bit set if the run level is odd */ +#define INDEX_ODD_BIT (1UL<<31) + +#define MAKE_INDEX_ODD_PAIR(index, level) ((index)|((int32_t)((level)&1)<<31)) +#define ADD_ODD_BIT_FROM_LEVEL(x, level) ((x)|=((int32_t)((level)&1)<<31)) +#define REMOVE_ODD_BIT(x) ((x)&=~INDEX_ODD_BIT) + +#define GET_INDEX(x) ((x)&~INDEX_ODD_BIT) +#define GET_ODD_BIT(x) ((uint32_t)(x)>>31) +#define IS_ODD_RUN(x) ((UBool)(((x)&INDEX_ODD_BIT)!=0)) +#define IS_EVEN_RUN(x) ((UBool)(((x)&INDEX_ODD_BIT)==0)) + +U_CFUNC UBool +ubidi_getRuns(UBiDi *pBiDi, UErrorCode *pErrorCode); + +/** BiDi control code points */ +enum { + ZWNJ_CHAR=0x200c, + ZWJ_CHAR, + LRM_CHAR, + RLM_CHAR, + LRE_CHAR=0x202a, + RLE_CHAR, + PDF_CHAR, + LRO_CHAR, + RLO_CHAR, + LRI_CHAR=0x2066, + RLI_CHAR, + FSI_CHAR, + PDI_CHAR +}; + +#define IS_BIDI_CONTROL_CHAR(c) (((uint32_t)(c)&0xfffffffc)==ZWNJ_CHAR || (uint32_t)((c)-LRE_CHAR)<5 || (uint32_t)((c)-LRI_CHAR)<4) + +/* InsertPoints structure for noting where to put BiDi marks ---------------- */ + +typedef struct Point { + int32_t pos; /* position in text */ + int32_t flag; /* flag for LRM/RLM, before/after */ +} Point; + +typedef struct InsertPoints { + int32_t capacity; /* number of points allocated */ + int32_t size; /* number of points used */ + int32_t confirmed; /* number of points confirmed */ + UErrorCode errorCode; /* for eventual memory shortage */ + Point *points; /* pointer to array of points */ +} InsertPoints; + + +/* UBiDi structure ----------------------------------------------------------- */ + +struct UBiDi { + /* pointer to parent paragraph object (pointer to self if this object is + * a paragraph object); set to NULL in a newly opened object; set to a + * real value after a successful execution of ubidi_setPara or ubidi_setLine + */ + const UBiDi * pParaBiDi; + + /* alias pointer to the current text */ + const UChar *text; + + /* length of the current text */ + int32_t originalLength; + + /* if the UBIDI_OPTION_STREAMING option is set, this is the length + * of text actually processed by ubidi_setPara, which may be shorter than + * the original length. + * Otherwise, it is identical to the original length. + */ + int32_t length; + + /* if the UBIDI_OPTION_REMOVE_CONTROLS option is set, and/or + * marks are allowed to be inserted in one of the reordering mode, the + * length of the result string may be different from the processed length. + */ + int32_t resultLength; + + /* memory sizes in bytes */ + int32_t dirPropsSize, levelsSize, openingsSize, parasSize, runsSize, isolatesSize; + + /* allocated memory */ + DirProp *dirPropsMemory; + UBiDiLevel *levelsMemory; + Opening *openingsMemory; + Para *parasMemory; + Run *runsMemory; + Isolate *isolatesMemory; + + /* indicators for whether memory may be allocated after ubidi_open() */ + UBool mayAllocateText, mayAllocateRuns; + + /* arrays with one value per text-character */ + DirProp *dirProps; + UBiDiLevel *levels; + + /* are we performing an approximation of the "inverse BiDi" algorithm? */ + UBool isInverse; + + /* are we using the basic algorithm or its variation? */ + UBiDiReorderingMode reorderingMode; + + /* UBIDI_REORDER_xxx values must be ordered so that all the regular + * logical to visual modes come first, and all inverse BiDi modes + * come last. + */ + #define UBIDI_REORDER_LAST_LOGICAL_TO_VISUAL UBIDI_REORDER_NUMBERS_SPECIAL + + /* bitmask for reordering options */ + uint32_t reorderingOptions; + + /* must block separators receive level 0? */ + UBool orderParagraphsLTR; + + /* the paragraph level */ + UBiDiLevel paraLevel; + /* original paraLevel when contextual */ + /* must be one of UBIDI_DEFAULT_xxx or 0 if not contextual */ + UBiDiLevel defaultParaLevel; + + /* context data */ + const UChar *prologue; + int32_t proLength; + const UChar *epilogue; + int32_t epiLength; + + /* the following is set in ubidi_setPara, used in processPropertySeq */ + const struct ImpTabPair * pImpTabPair; /* pointer to levels state table pair */ + + /* the overall paragraph or line directionality - see UBiDiDirection */ + UBiDiDirection direction; + + /* flags is a bit set for which directional properties are in the text */ + Flags flags; + + /* lastArabicPos is index to the last AL in the text, -1 if none */ + int32_t lastArabicPos; + + /* characters after trailingWSStart are WS and are */ + /* implicitly at the paraLevel (rule (L1)) - levels may not reflect that */ + int32_t trailingWSStart; + + /* fields for paragraph handling */ + int32_t paraCount; /* set in getDirProps() */ + /* filled in getDirProps() */ + Para *paras; + + /* for relatively short text, we only need a tiny array of paras (no malloc()) */ + Para simpleParas[SIMPLE_PARAS_COUNT]; + + /* fields for line reordering */ + int32_t runCount; /* ==-1: runs not set up yet */ + Run *runs; + + /* for non-mixed text, we only need a tiny array of runs (no malloc()) */ + Run simpleRuns[1]; + + /* maximum or current nesting depth of isolate sequences */ + /* Within resolveExplicitLevels() and checkExplicitLevels(), this is the maximal + nesting encountered. + Within resolveImplicitLevels(), this is the index of the current isolates + stack entry. */ + int32_t isolateCount; + Isolate *isolates; + + /* for simple text, have a small stack (no malloc()) */ + Isolate simpleIsolates[SIMPLE_ISOLATES_COUNT]; + + /* for inverse Bidi with insertion of directional marks */ + InsertPoints insertPoints; + + /* for option UBIDI_OPTION_REMOVE_CONTROLS */ + int32_t controlCount; + + /* for Bidi class callback */ + UBiDiClassCallback *fnClassCallback; /* action pointer */ + const void *coClassCallback; /* context pointer */ +}; + +#define IS_VALID_PARA(x) ((x) && ((x)->pParaBiDi==(x))) +#define IS_VALID_PARA_OR_LINE(x) ((x) && ((x)->pParaBiDi==(x) || (((x)->pParaBiDi) && (x)->pParaBiDi->pParaBiDi==(x)->pParaBiDi))) + +typedef union { + DirProp *dirPropsMemory; + UBiDiLevel *levelsMemory; + Opening *openingsMemory; + Para *parasMemory; + Run *runsMemory; + Isolate *isolatesMemory; +} BidiMemoryForAllocation; + +/* Macros for initial checks at function entry */ +#define RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrcode, retvalue) UPRV_BLOCK_MACRO_BEGIN { \ + if((pErrcode)==NULL || U_FAILURE(*pErrcode)) return retvalue; \ +} UPRV_BLOCK_MACRO_END +#define RETURN_IF_NOT_VALID_PARA(bidi, errcode, retvalue) UPRV_BLOCK_MACRO_BEGIN { \ + if(!IS_VALID_PARA(bidi)) { \ + errcode=U_INVALID_STATE_ERROR; \ + return retvalue; \ + } \ +} UPRV_BLOCK_MACRO_END +#define RETURN_IF_NOT_VALID_PARA_OR_LINE(bidi, errcode, retvalue) UPRV_BLOCK_MACRO_BEGIN { \ + if(!IS_VALID_PARA_OR_LINE(bidi)) { \ + errcode=U_INVALID_STATE_ERROR; \ + return retvalue; \ + } \ +} UPRV_BLOCK_MACRO_END +#define RETURN_IF_BAD_RANGE(arg, start, limit, errcode, retvalue) UPRV_BLOCK_MACRO_BEGIN { \ + if((arg)<(start) || (arg)>=(limit)) { \ + (errcode)=U_ILLEGAL_ARGUMENT_ERROR; \ + return retvalue; \ + } \ +} UPRV_BLOCK_MACRO_END + +#define RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrcode) UPRV_BLOCK_MACRO_BEGIN { \ + if((pErrcode)==NULL || U_FAILURE(*pErrcode)) return; \ +} UPRV_BLOCK_MACRO_END +#define RETURN_VOID_IF_NOT_VALID_PARA(bidi, errcode) UPRV_BLOCK_MACRO_BEGIN { \ + if(!IS_VALID_PARA(bidi)) { \ + errcode=U_INVALID_STATE_ERROR; \ + return; \ + } \ +} UPRV_BLOCK_MACRO_END +#define RETURN_VOID_IF_NOT_VALID_PARA_OR_LINE(bidi, errcode) UPRV_BLOCK_MACRO_BEGIN { \ + if(!IS_VALID_PARA_OR_LINE(bidi)) { \ + errcode=U_INVALID_STATE_ERROR; \ + return; \ + } \ +} UPRV_BLOCK_MACRO_END +#define RETURN_VOID_IF_BAD_RANGE(arg, start, limit, errcode) UPRV_BLOCK_MACRO_BEGIN { \ + if((arg)<(start) || (arg)>=(limit)) { \ + (errcode)=U_ILLEGAL_ARGUMENT_ERROR; \ + return; \ + } \ +} UPRV_BLOCK_MACRO_END + +/* helper function to (re)allocate memory if allowed */ +U_CFUNC UBool +ubidi_getMemory(BidiMemoryForAllocation *pMemory, int32_t *pSize, UBool mayAllocate, int32_t sizeNeeded); + +/* helper macros for each allocated array in UBiDi */ +#define getDirPropsMemory(pBiDi, length) \ + ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->dirPropsMemory, &(pBiDi)->dirPropsSize, \ + (pBiDi)->mayAllocateText, (length)) + +#define getLevelsMemory(pBiDi, length) \ + ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->levelsMemory, &(pBiDi)->levelsSize, \ + (pBiDi)->mayAllocateText, (length)) + +#define getRunsMemory(pBiDi, length) \ + ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->runsMemory, &(pBiDi)->runsSize, \ + (pBiDi)->mayAllocateRuns, (length)*sizeof(Run)) + +/* additional macros used by ubidi_open() - always allow allocation */ +#define getInitialDirPropsMemory(pBiDi, length) \ + ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->dirPropsMemory, &(pBiDi)->dirPropsSize, \ + true, (length)) + +#define getInitialLevelsMemory(pBiDi, length) \ + ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->levelsMemory, &(pBiDi)->levelsSize, \ + true, (length)) + +#define getInitialOpeningsMemory(pBiDi, length) \ + ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->openingsMemory, &(pBiDi)->openingsSize, \ + true, (length)*sizeof(Opening)) + +#define getInitialParasMemory(pBiDi, length) \ + ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->parasMemory, &(pBiDi)->parasSize, \ + true, (length)*sizeof(Para)) + +#define getInitialRunsMemory(pBiDi, length) \ + ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->runsMemory, &(pBiDi)->runsSize, \ + true, (length)*sizeof(Run)) + +#define getInitialIsolatesMemory(pBiDi, length) \ + ubidi_getMemory((BidiMemoryForAllocation *)&(pBiDi)->isolatesMemory, &(pBiDi)->isolatesSize, \ + true, (length)*sizeof(Isolate)) + +#endif diff --git a/thirdparty/icu4c/common/ubidiln.cpp b/thirdparty/icu4c/common/ubidiln.cpp new file mode 100644 index 0000000000..3545f4e111 --- /dev/null +++ b/thirdparty/icu4c/common/ubidiln.cpp @@ -0,0 +1,1347 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1999-2015, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* file name: ubidiln.c +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 1999aug06 +* created by: Markus W. Scherer, updated by Matitiahu Allouche +*/ + +#include "cmemory.h" +#include "unicode/utypes.h" +#include "unicode/ustring.h" +#include "unicode/uchar.h" +#include "unicode/ubidi.h" +#include "ubidiimp.h" +#include "uassert.h" + +/* + * General remarks about the functions in this file: + * + * These functions deal with the aspects of potentially mixed-directional + * text in a single paragraph or in a line of a single paragraph + * which has already been processed according to + * the Unicode 6.3 BiDi algorithm as defined in + * http://www.unicode.org/unicode/reports/tr9/ , version 28, + * also described in The Unicode Standard, Version 6.3.0 . + * + * This means that there is a UBiDi object with a levels + * and a dirProps array. + * paraLevel and direction are also set. + * Only if the length of the text is zero, then levels==dirProps==NULL. + * + * The overall directionality of the paragraph + * or line is used to bypass the reordering steps if possible. + * Even purely RTL text does not need reordering there because + * the ubidi_getLogical/VisualIndex() functions can compute the + * index on the fly in such a case. + * + * The implementation of the access to same-level-runs and of the reordering + * do attempt to provide better performance and less memory usage compared to + * a direct implementation of especially rule (L2) with an array of + * one (32-bit) integer per text character. + * + * Here, the levels array is scanned as soon as necessary, and a vector of + * same-level-runs is created. Reordering then is done on this vector. + * For each run of text positions that were resolved to the same level, + * only 8 bytes are stored: the first text position of the run and the visual + * position behind the run after reordering. + * One sign bit is used to hold the directionality of the run. + * This is inefficient if there are many very short runs. If the average run + * length is <2, then this uses more memory. + * + * In a further attempt to save memory, the levels array is never changed + * after all the resolution rules (Xn, Wn, Nn, In). + * Many functions have to consider the field trailingWSStart: + * if it is less than length, then there is an implicit trailing run + * at the paraLevel, + * which is not reflected in the levels array. + * This allows a line UBiDi object to use the same levels array as + * its paragraph parent object. + * + * When a UBiDi object is created for a line of a paragraph, then the + * paragraph's levels and dirProps arrays are reused by way of setting + * a pointer into them, not by copying. This again saves memory and forbids to + * change the now shared levels for (L1). + */ + +/* handle trailing WS (L1) -------------------------------------------------- */ + +/* + * setTrailingWSStart() sets the start index for a trailing + * run of WS in the line. This is necessary because we do not modify + * the paragraph's levels array that we just point into. + * Using trailingWSStart is another form of performing (L1). + * + * To make subsequent operations easier, we also include the run + * before the WS if it is at the paraLevel - we merge the two here. + * + * This function is called only from ubidi_setLine(), so pBiDi->paraLevel is + * set correctly for the line even when contextual multiple paragraphs. + */ +static void +setTrailingWSStart(UBiDi *pBiDi) { + /* pBiDi->direction!=UBIDI_MIXED */ + + const DirProp *dirProps=pBiDi->dirProps; + UBiDiLevel *levels=pBiDi->levels; + int32_t start=pBiDi->length; + UBiDiLevel paraLevel=pBiDi->paraLevel; + + /* If the line is terminated by a block separator, all preceding WS etc... + are already set to paragraph level. + Setting trailingWSStart to pBidi->length will avoid changing the + level of B chars from 0 to paraLevel in ubidi_getLevels when + orderParagraphsLTR==TRUE. + */ + if(dirProps[start-1]==B) { + pBiDi->trailingWSStart=start; /* currently == pBiDi->length */ + return; + } + /* go backwards across all WS, BN, explicit codes */ + while(start>0 && DIRPROP_FLAG(dirProps[start-1])&MASK_WS) { + --start; + } + + /* if the WS run can be merged with the previous run then do so here */ + while(start>0 && levels[start-1]==paraLevel) { + --start; + } + + pBiDi->trailingWSStart=start; +} + +/* ubidi_setLine ------------------------------------------------------------ */ + +U_CAPI void U_EXPORT2 +ubidi_setLine(const UBiDi *pParaBiDi, + int32_t start, int32_t limit, + UBiDi *pLineBiDi, + UErrorCode *pErrorCode) { + int32_t length; + + /* check the argument values */ + RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode); + RETURN_VOID_IF_NOT_VALID_PARA(pParaBiDi, *pErrorCode); + RETURN_VOID_IF_BAD_RANGE(start, 0, limit, *pErrorCode); + RETURN_VOID_IF_BAD_RANGE(limit, 0, pParaBiDi->length+1, *pErrorCode); + if(pLineBiDi==NULL) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return; + } + if(ubidi_getParagraph(pParaBiDi, start, NULL, NULL, NULL, pErrorCode) != + ubidi_getParagraph(pParaBiDi, limit-1, NULL, NULL, NULL, pErrorCode)) { + /* the line crosses a paragraph boundary */ + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + /* set the values in pLineBiDi from its pParaBiDi parent */ + pLineBiDi->pParaBiDi=NULL; /* mark unfinished setLine */ + pLineBiDi->text=pParaBiDi->text+start; + length=pLineBiDi->length=limit-start; + pLineBiDi->resultLength=pLineBiDi->originalLength=length; + pLineBiDi->paraLevel=GET_PARALEVEL(pParaBiDi, start); + pLineBiDi->paraCount=pParaBiDi->paraCount; + pLineBiDi->runs=NULL; + pLineBiDi->flags=0; + pLineBiDi->reorderingMode=pParaBiDi->reorderingMode; + pLineBiDi->reorderingOptions=pParaBiDi->reorderingOptions; + pLineBiDi->controlCount=0; + if(pParaBiDi->controlCount>0) { + int32_t j; + for(j=start; j<limit; j++) { + if(IS_BIDI_CONTROL_CHAR(pParaBiDi->text[j])) { + pLineBiDi->controlCount++; + } + } + pLineBiDi->resultLength-=pLineBiDi->controlCount; + } + + pLineBiDi->dirProps=pParaBiDi->dirProps+start; + pLineBiDi->levels=pParaBiDi->levels+start; + pLineBiDi->runCount=-1; + + if(pParaBiDi->direction!=UBIDI_MIXED) { + /* the parent is already trivial */ + pLineBiDi->direction=pParaBiDi->direction; + + /* + * The parent's levels are all either + * implicitly or explicitly ==paraLevel; + * do the same here. + */ + if(pParaBiDi->trailingWSStart<=start) { + pLineBiDi->trailingWSStart=0; + } else if(pParaBiDi->trailingWSStart<limit) { + pLineBiDi->trailingWSStart=pParaBiDi->trailingWSStart-start; + } else { + pLineBiDi->trailingWSStart=length; + } + } else { + const UBiDiLevel *levels=pLineBiDi->levels; + int32_t i, trailingWSStart; + UBiDiLevel level; + + setTrailingWSStart(pLineBiDi); + trailingWSStart=pLineBiDi->trailingWSStart; + + /* recalculate pLineBiDi->direction */ + if(trailingWSStart==0) { + /* all levels are at paraLevel */ + pLineBiDi->direction=(UBiDiDirection)(pLineBiDi->paraLevel&1); + } else { + /* get the level of the first character */ + level=(UBiDiLevel)(levels[0]&1); + + /* if there is anything of a different level, then the line is mixed */ + if(trailingWSStart<length && (pLineBiDi->paraLevel&1)!=level) { + /* the trailing WS is at paraLevel, which differs from levels[0] */ + pLineBiDi->direction=UBIDI_MIXED; + } else { + /* see if levels[1..trailingWSStart-1] have the same direction as levels[0] and paraLevel */ + i=1; + for(;;) { + if(i==trailingWSStart) { + /* the direction values match those in level */ + pLineBiDi->direction=(UBiDiDirection)level; + break; + } else if((levels[i]&1)!=level) { + pLineBiDi->direction=UBIDI_MIXED; + break; + } + ++i; + } + } + } + + switch(pLineBiDi->direction) { + case UBIDI_LTR: + /* make sure paraLevel is even */ + pLineBiDi->paraLevel=(UBiDiLevel)((pLineBiDi->paraLevel+1)&~1); + + /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */ + pLineBiDi->trailingWSStart=0; + break; + case UBIDI_RTL: + /* make sure paraLevel is odd */ + pLineBiDi->paraLevel|=1; + + /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */ + pLineBiDi->trailingWSStart=0; + break; + default: + break; + } + } + pLineBiDi->pParaBiDi=pParaBiDi; /* mark successful setLine */ + return; +} + +U_CAPI UBiDiLevel U_EXPORT2 +ubidi_getLevelAt(const UBiDi *pBiDi, int32_t charIndex) { + /* return paraLevel if in the trailing WS run, otherwise the real level */ + if(!IS_VALID_PARA_OR_LINE(pBiDi) || charIndex<0 || pBiDi->length<=charIndex) { + return 0; + } else if(pBiDi->direction!=UBIDI_MIXED || charIndex>=pBiDi->trailingWSStart) { + return GET_PARALEVEL(pBiDi, charIndex); + } else { + return pBiDi->levels[charIndex]; + } +} + +U_CAPI const UBiDiLevel * U_EXPORT2 +ubidi_getLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) { + int32_t start, length; + + RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, NULL); + RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, NULL); + if((length=pBiDi->length)<=0) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + if((start=pBiDi->trailingWSStart)==length) { + /* the current levels array reflects the WS run */ + return pBiDi->levels; + } + + /* + * After the previous if(), we know that the levels array + * has an implicit trailing WS run and therefore does not fully + * reflect itself all the levels. + * This must be a UBiDi object for a line, and + * we need to create a new levels array. + */ + if(getLevelsMemory(pBiDi, length)) { + UBiDiLevel *levels=pBiDi->levelsMemory; + + if(start>0 && levels!=pBiDi->levels) { + uprv_memcpy(levels, pBiDi->levels, start); + } + /* pBiDi->paraLevel is ok even if contextual multiple paragraphs, + since pBidi is a line object */ + uprv_memset(levels+start, pBiDi->paraLevel, length-start); + + /* this new levels array is set for the line and reflects the WS run */ + pBiDi->trailingWSStart=length; + return pBiDi->levels=levels; + } else { + /* out of memory */ + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + return NULL; + } +} + +U_CAPI void U_EXPORT2 +ubidi_getLogicalRun(const UBiDi *pBiDi, int32_t logicalPosition, + int32_t *pLogicalLimit, UBiDiLevel *pLevel) { + UErrorCode errorCode; + int32_t runCount, visualStart, logicalLimit, logicalFirst, i; + Run iRun; + + errorCode=U_ZERO_ERROR; + RETURN_VOID_IF_BAD_RANGE(logicalPosition, 0, pBiDi->length, errorCode); + /* ubidi_countRuns will check VALID_PARA_OR_LINE */ + runCount=ubidi_countRuns((UBiDi *)pBiDi, &errorCode); + if(U_FAILURE(errorCode)) { + return; + } + /* this is done based on runs rather than on levels since levels have + a special interpretation when UBIDI_REORDER_RUNS_ONLY + */ + visualStart=logicalLimit=0; + iRun=pBiDi->runs[0]; + + for(i=0; i<runCount; i++) { + iRun = pBiDi->runs[i]; + logicalFirst=GET_INDEX(iRun.logicalStart); + logicalLimit=logicalFirst+iRun.visualLimit-visualStart; + if((logicalPosition>=logicalFirst) && + (logicalPosition<logicalLimit)) { + break; + } + visualStart = iRun.visualLimit; + } + if(pLogicalLimit) { + *pLogicalLimit=logicalLimit; + } + if(pLevel) { + if(pBiDi->reorderingMode==UBIDI_REORDER_RUNS_ONLY) { + *pLevel=(UBiDiLevel)GET_ODD_BIT(iRun.logicalStart); + } + else if(pBiDi->direction!=UBIDI_MIXED || logicalPosition>=pBiDi->trailingWSStart) { + *pLevel=GET_PARALEVEL(pBiDi, logicalPosition); + } else { + *pLevel=pBiDi->levels[logicalPosition]; + } + } +} + +/* runs API functions ------------------------------------------------------- */ + +U_CAPI int32_t U_EXPORT2 +ubidi_countRuns(UBiDi *pBiDi, UErrorCode *pErrorCode) { + RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1); + RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1); + ubidi_getRuns(pBiDi, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + return -1; + } + return pBiDi->runCount; +} + +U_CAPI UBiDiDirection U_EXPORT2 +ubidi_getVisualRun(UBiDi *pBiDi, int32_t runIndex, + int32_t *pLogicalStart, int32_t *pLength) +{ + int32_t start; + UErrorCode errorCode = U_ZERO_ERROR; + RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, errorCode, UBIDI_LTR); + ubidi_getRuns(pBiDi, &errorCode); + if(U_FAILURE(errorCode)) { + return UBIDI_LTR; + } + RETURN_IF_BAD_RANGE(runIndex, 0, pBiDi->runCount, errorCode, UBIDI_LTR); + + start=pBiDi->runs[runIndex].logicalStart; + if(pLogicalStart!=NULL) { + *pLogicalStart=GET_INDEX(start); + } + if(pLength!=NULL) { + if(runIndex>0) { + *pLength=pBiDi->runs[runIndex].visualLimit- + pBiDi->runs[runIndex-1].visualLimit; + } else { + *pLength=pBiDi->runs[0].visualLimit; + } + } + return (UBiDiDirection)GET_ODD_BIT(start); +} + +/* in trivial cases there is only one trivial run; called by ubidi_getRuns() */ +static void +getSingleRun(UBiDi *pBiDi, UBiDiLevel level) { + /* simple, single-run case */ + pBiDi->runs=pBiDi->simpleRuns; + pBiDi->runCount=1; + + /* fill and reorder the single run */ + pBiDi->runs[0].logicalStart=MAKE_INDEX_ODD_PAIR(0, level); + pBiDi->runs[0].visualLimit=pBiDi->length; + pBiDi->runs[0].insertRemove=0; +} + +/* reorder the runs array (L2) ---------------------------------------------- */ + +/* + * Reorder the same-level runs in the runs array. + * Here, runCount>1 and maxLevel>=minLevel>=paraLevel. + * All the visualStart fields=logical start before reordering. + * The "odd" bits are not set yet. + * + * Reordering with this data structure lends itself to some handy shortcuts: + * + * Since each run is moved but not modified, and since at the initial maxLevel + * each sequence of same-level runs consists of only one run each, we + * don't need to do anything there and can predecrement maxLevel. + * In many simple cases, the reordering is thus done entirely in the + * index mapping. + * Also, reordering occurs only down to the lowest odd level that occurs, + * which is minLevel|1. However, if the lowest level itself is odd, then + * in the last reordering the sequence of the runs at this level or higher + * will be all runs, and we don't need the elaborate loop to search for them. + * This is covered by ++minLevel instead of minLevel|=1 followed + * by an extra reorder-all after the reorder-some loop. + * About a trailing WS run: + * Such a run would need special treatment because its level is not + * reflected in levels[] if this is not a paragraph object. + * Instead, all characters from trailingWSStart on are implicitly at + * paraLevel. + * However, for all maxLevel>paraLevel, this run will never be reordered + * and does not need to be taken into account. maxLevel==paraLevel is only reordered + * if minLevel==paraLevel is odd, which is done in the extra segment. + * This means that for the main reordering loop we don't need to consider + * this run and can --runCount. If it is later part of the all-runs + * reordering, then runCount is adjusted accordingly. + */ +static void +reorderLine(UBiDi *pBiDi, UBiDiLevel minLevel, UBiDiLevel maxLevel) { + Run *runs, tempRun; + UBiDiLevel *levels; + int32_t firstRun, endRun, limitRun, runCount; + + /* nothing to do? */ + if(maxLevel<=(minLevel|1)) { + return; + } + + /* + * Reorder only down to the lowest odd level + * and reorder at an odd minLevel in a separate, simpler loop. + * See comments above for why minLevel is always incremented. + */ + ++minLevel; + + runs=pBiDi->runs; + levels=pBiDi->levels; + runCount=pBiDi->runCount; + + /* do not include the WS run at paraLevel<=old minLevel except in the simple loop */ + if(pBiDi->trailingWSStart<pBiDi->length) { + --runCount; + } + + while(--maxLevel>=minLevel) { + firstRun=0; + + /* loop for all sequences of runs */ + for(;;) { + /* look for a sequence of runs that are all at >=maxLevel */ + /* look for the first run of such a sequence */ + while(firstRun<runCount && levels[runs[firstRun].logicalStart]<maxLevel) { + ++firstRun; + } + if(firstRun>=runCount) { + break; /* no more such runs */ + } + + /* look for the limit run of such a sequence (the run behind it) */ + for(limitRun=firstRun; ++limitRun<runCount && levels[runs[limitRun].logicalStart]>=maxLevel;) {} + + /* Swap the entire sequence of runs from firstRun to limitRun-1. */ + endRun=limitRun-1; + while(firstRun<endRun) { + tempRun = runs[firstRun]; + runs[firstRun]=runs[endRun]; + runs[endRun]=tempRun; + ++firstRun; + --endRun; + } + + if(limitRun==runCount) { + break; /* no more such runs */ + } else { + firstRun=limitRun+1; + } + } + } + + /* now do maxLevel==old minLevel (==odd!), see above */ + if(!(minLevel&1)) { + firstRun=0; + + /* include the trailing WS run in this complete reordering */ + if(pBiDi->trailingWSStart==pBiDi->length) { + --runCount; + } + + /* Swap the entire sequence of all runs. (endRun==runCount) */ + while(firstRun<runCount) { + tempRun=runs[firstRun]; + runs[firstRun]=runs[runCount]; + runs[runCount]=tempRun; + ++firstRun; + --runCount; + } + } +} + +/* compute the runs array --------------------------------------------------- */ + +static int32_t getRunFromLogicalIndex(UBiDi *pBiDi, int32_t logicalIndex) { + Run *runs=pBiDi->runs; + int32_t runCount=pBiDi->runCount, visualStart=0, i, length, logicalStart; + + for(i=0; i<runCount; i++) { + length=runs[i].visualLimit-visualStart; + logicalStart=GET_INDEX(runs[i].logicalStart); + if((logicalIndex>=logicalStart) && (logicalIndex<(logicalStart+length))) { + return i; + } + visualStart+=length; + } + /* we should never get here */ + UPRV_UNREACHABLE; +} + +/* + * Compute the runs array from the levels array. + * After ubidi_getRuns() returns TRUE, runCount is guaranteed to be >0 + * and the runs are reordered. + * Odd-level runs have visualStart on their visual right edge and + * they progress visually to the left. + * If option UBIDI_OPTION_INSERT_MARKS is set, insertRemove will contain the + * sum of appropriate LRM/RLM_BEFORE/AFTER flags. + * If option UBIDI_OPTION_REMOVE_CONTROLS is set, insertRemove will contain the + * negative number of BiDi control characters within this run. + */ +U_CFUNC UBool +ubidi_getRuns(UBiDi *pBiDi, UErrorCode*) { + /* + * This method returns immediately if the runs are already set. This + * includes the case of length==0 (handled in setPara).. + */ + if (pBiDi->runCount>=0) { + return TRUE; + } + + if(pBiDi->direction!=UBIDI_MIXED) { + /* simple, single-run case - this covers length==0 */ + /* pBiDi->paraLevel is ok even for contextual multiple paragraphs */ + getSingleRun(pBiDi, pBiDi->paraLevel); + } else /* UBIDI_MIXED, length>0 */ { + /* mixed directionality */ + int32_t length=pBiDi->length, limit; + UBiDiLevel *levels=pBiDi->levels; + int32_t i, runCount; + UBiDiLevel level=UBIDI_DEFAULT_LTR; /* initialize with no valid level */ + /* + * If there are WS characters at the end of the line + * and the run preceding them has a level different from + * paraLevel, then they will form their own run at paraLevel (L1). + * Count them separately. + * We need some special treatment for this in order to not + * modify the levels array which a line UBiDi object shares + * with its paragraph parent and its other line siblings. + * In other words, for the trailing WS, it may be + * levels[]!=paraLevel but we have to treat it like it were so. + */ + limit=pBiDi->trailingWSStart; + /* count the runs, there is at least one non-WS run, and limit>0 */ + runCount=0; + for(i=0; i<limit; ++i) { + /* increment runCount at the start of each run */ + if(levels[i]!=level) { + ++runCount; + level=levels[i]; + } + } + + /* + * We don't need to see if the last run can be merged with a trailing + * WS run because setTrailingWSStart() would have done that. + */ + if(runCount==1 && limit==length) { + /* There is only one non-WS run and no trailing WS-run. */ + getSingleRun(pBiDi, levels[0]); + } else /* runCount>1 || limit<length */ { + /* allocate and set the runs */ + Run *runs; + int32_t runIndex, start; + UBiDiLevel minLevel=UBIDI_MAX_EXPLICIT_LEVEL+1, maxLevel=0; + + /* now, count a (non-mergeable) WS run */ + if(limit<length) { + ++runCount; + } + + /* runCount>1 */ + if(getRunsMemory(pBiDi, runCount)) { + runs=pBiDi->runsMemory; + } else { + return FALSE; + } + + /* set the runs */ + /* FOOD FOR THOUGHT: this could be optimized, e.g.: + * 464->444, 484->444, 575->555, 595->555 + * However, that would take longer. Check also how it would + * interact with BiDi control removal and inserting Marks. + */ + runIndex=0; + + /* search for the run limits and initialize visualLimit values with the run lengths */ + i=0; + do { + /* prepare this run */ + start=i; + level=levels[i]; + if(level<minLevel) { + minLevel=level; + } + if(level>maxLevel) { + maxLevel=level; + } + + /* look for the run limit */ + while(++i<limit && levels[i]==level) {} + + /* i is another run limit */ + runs[runIndex].logicalStart=start; + runs[runIndex].visualLimit=i-start; + runs[runIndex].insertRemove=0; + ++runIndex; + } while(i<limit); + + if(limit<length) { + /* there is a separate WS run */ + runs[runIndex].logicalStart=limit; + runs[runIndex].visualLimit=length-limit; + /* For the trailing WS run, pBiDi->paraLevel is ok even + if contextual multiple paragraphs. */ + if(pBiDi->paraLevel<minLevel) { + minLevel=pBiDi->paraLevel; + } + } + + /* set the object fields */ + pBiDi->runs=runs; + pBiDi->runCount=runCount; + + reorderLine(pBiDi, minLevel, maxLevel); + + /* now add the direction flags and adjust the visualLimit's to be just that */ + /* this loop will also handle the trailing WS run */ + limit=0; + for(i=0; i<runCount; ++i) { + ADD_ODD_BIT_FROM_LEVEL(runs[i].logicalStart, levels[runs[i].logicalStart]); + limit+=runs[i].visualLimit; + runs[i].visualLimit=limit; + } + + /* Set the "odd" bit for the trailing WS run. */ + /* For a RTL paragraph, it will be the *first* run in visual order. */ + /* For the trailing WS run, pBiDi->paraLevel is ok even if + contextual multiple paragraphs. */ + if(runIndex<runCount) { + int32_t trailingRun = ((pBiDi->paraLevel & 1) != 0)? 0 : runIndex; + + ADD_ODD_BIT_FROM_LEVEL(runs[trailingRun].logicalStart, pBiDi->paraLevel); + } + } + } + + /* handle insert LRM/RLM BEFORE/AFTER run */ + if(pBiDi->insertPoints.size>0) { + Point *point, *start=pBiDi->insertPoints.points, + *limit=start+pBiDi->insertPoints.size; + int32_t runIndex; + for(point=start; point<limit; point++) { + runIndex=getRunFromLogicalIndex(pBiDi, point->pos); + pBiDi->runs[runIndex].insertRemove|=point->flag; + } + } + + /* handle remove BiDi control characters */ + if(pBiDi->controlCount>0) { + int32_t runIndex; + const UChar *start=pBiDi->text, *limit=start+pBiDi->length, *pu; + for(pu=start; pu<limit; pu++) { + if(IS_BIDI_CONTROL_CHAR(*pu)) { + runIndex=getRunFromLogicalIndex(pBiDi, (int32_t)(pu-start)); + pBiDi->runs[runIndex].insertRemove--; + } + } + } + + return TRUE; +} + +static UBool +prepareReorder(const UBiDiLevel *levels, int32_t length, + int32_t *indexMap, + UBiDiLevel *pMinLevel, UBiDiLevel *pMaxLevel) { + int32_t start; + UBiDiLevel level, minLevel, maxLevel; + + if(levels==NULL || length<=0) { + return FALSE; + } + + /* determine minLevel and maxLevel */ + minLevel=UBIDI_MAX_EXPLICIT_LEVEL+1; + maxLevel=0; + for(start=length; start>0;) { + level=levels[--start]; + if(level>UBIDI_MAX_EXPLICIT_LEVEL+1) { + return FALSE; + } + if(level<minLevel) { + minLevel=level; + } + if(level>maxLevel) { + maxLevel=level; + } + } + *pMinLevel=minLevel; + *pMaxLevel=maxLevel; + + /* initialize the index map */ + for(start=length; start>0;) { + --start; + indexMap[start]=start; + } + + return TRUE; +} + +/* reorder a line based on a levels array (L2) ------------------------------ */ + +U_CAPI void U_EXPORT2 +ubidi_reorderLogical(const UBiDiLevel *levels, int32_t length, int32_t *indexMap) { + int32_t start, limit, sumOfSosEos; + UBiDiLevel minLevel = 0, maxLevel = 0; + + if(indexMap==NULL || !prepareReorder(levels, length, indexMap, &minLevel, &maxLevel)) { + return; + } + + /* nothing to do? */ + if(minLevel==maxLevel && (minLevel&1)==0) { + return; + } + + /* reorder only down to the lowest odd level */ + minLevel|=1; + + /* loop maxLevel..minLevel */ + do { + start=0; + + /* loop for all sequences of levels to reorder at the current maxLevel */ + for(;;) { + /* look for a sequence of levels that are all at >=maxLevel */ + /* look for the first index of such a sequence */ + while(start<length && levels[start]<maxLevel) { + ++start; + } + if(start>=length) { + break; /* no more such sequences */ + } + + /* look for the limit of such a sequence (the index behind it) */ + for(limit=start; ++limit<length && levels[limit]>=maxLevel;) {} + + /* + * sos=start of sequence, eos=end of sequence + * + * The closed (inclusive) interval from sos to eos includes all the logical + * and visual indexes within this sequence. They are logically and + * visually contiguous and in the same range. + * + * For each run, the new visual index=sos+eos-old visual index; + * we pre-add sos+eos into sumOfSosEos -> + * new visual index=sumOfSosEos-old visual index; + */ + sumOfSosEos=start+limit-1; + + /* reorder each index in the sequence */ + do { + indexMap[start]=sumOfSosEos-indexMap[start]; + } while(++start<limit); + + /* start==limit */ + if(limit==length) { + break; /* no more such sequences */ + } else { + start=limit+1; + } + } + } while(--maxLevel>=minLevel); +} + +U_CAPI void U_EXPORT2 +ubidi_reorderVisual(const UBiDiLevel *levels, int32_t length, int32_t *indexMap) { + int32_t start, end, limit, temp; + UBiDiLevel minLevel = 0, maxLevel = 0; + + if(indexMap==NULL || !prepareReorder(levels, length, indexMap, &minLevel, &maxLevel)) { + return; + } + + /* nothing to do? */ + if(minLevel==maxLevel && (minLevel&1)==0) { + return; + } + + /* reorder only down to the lowest odd level */ + minLevel|=1; + + /* loop maxLevel..minLevel */ + do { + start=0; + + /* loop for all sequences of levels to reorder at the current maxLevel */ + for(;;) { + /* look for a sequence of levels that are all at >=maxLevel */ + /* look for the first index of such a sequence */ + while(start<length && levels[start]<maxLevel) { + ++start; + } + if(start>=length) { + break; /* no more such runs */ + } + + /* look for the limit of such a sequence (the index behind it) */ + for(limit=start; ++limit<length && levels[limit]>=maxLevel;) {} + + /* + * Swap the entire interval of indexes from start to limit-1. + * We don't need to swap the levels for the purpose of this + * algorithm: the sequence of levels that we look at does not + * move anyway. + */ + end=limit-1; + while(start<end) { + temp=indexMap[start]; + indexMap[start]=indexMap[end]; + indexMap[end]=temp; + + ++start; + --end; + } + + if(limit==length) { + break; /* no more such sequences */ + } else { + start=limit+1; + } + } + } while(--maxLevel>=minLevel); +} + +/* API functions for logical<->visual mapping ------------------------------- */ + +U_CAPI int32_t U_EXPORT2 +ubidi_getVisualIndex(UBiDi *pBiDi, int32_t logicalIndex, UErrorCode *pErrorCode) { + int32_t visualIndex=UBIDI_MAP_NOWHERE; + RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1); + RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1); + RETURN_IF_BAD_RANGE(logicalIndex, 0, pBiDi->length, *pErrorCode, -1); + + /* we can do the trivial cases without the runs array */ + switch(pBiDi->direction) { + case UBIDI_LTR: + visualIndex=logicalIndex; + break; + case UBIDI_RTL: + visualIndex=pBiDi->length-logicalIndex-1; + break; + default: + if(!ubidi_getRuns(pBiDi, pErrorCode)) { + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + return -1; + } else { + Run *runs=pBiDi->runs; + int32_t i, visualStart=0, offset, length; + + /* linear search for the run, search on the visual runs */ + for(i=0; i<pBiDi->runCount; ++i) { + length=runs[i].visualLimit-visualStart; + offset=logicalIndex-GET_INDEX(runs[i].logicalStart); + if(offset>=0 && offset<length) { + if(IS_EVEN_RUN(runs[i].logicalStart)) { + /* LTR */ + visualIndex=visualStart+offset; + } else { + /* RTL */ + visualIndex=visualStart+length-offset-1; + } + break; /* exit for loop */ + } + visualStart+=length; + } + if(i>=pBiDi->runCount) { + return UBIDI_MAP_NOWHERE; + } + } + } + + if(pBiDi->insertPoints.size>0) { + /* add the number of added marks until the calculated visual index */ + Run *runs=pBiDi->runs; + int32_t i, length, insertRemove; + int32_t visualStart=0, markFound=0; + for(i=0; ; i++, visualStart+=length) { + length=runs[i].visualLimit-visualStart; + insertRemove=runs[i].insertRemove; + if(insertRemove & (LRM_BEFORE|RLM_BEFORE)) { + markFound++; + } + /* is it the run containing the visual index? */ + if(visualIndex<runs[i].visualLimit) { + return visualIndex+markFound; + } + if(insertRemove & (LRM_AFTER|RLM_AFTER)) { + markFound++; + } + } + } + else if(pBiDi->controlCount>0) { + /* subtract the number of controls until the calculated visual index */ + Run *runs=pBiDi->runs; + int32_t i, j, start, limit, length, insertRemove; + int32_t visualStart=0, controlFound=0; + UChar uchar=pBiDi->text[logicalIndex]; + /* is the logical index pointing to a control ? */ + if(IS_BIDI_CONTROL_CHAR(uchar)) { + return UBIDI_MAP_NOWHERE; + } + /* loop on runs */ + for(i=0; ; i++, visualStart+=length) { + length=runs[i].visualLimit-visualStart; + insertRemove=runs[i].insertRemove; + /* calculated visual index is beyond this run? */ + if(visualIndex>=runs[i].visualLimit) { + controlFound-=insertRemove; + continue; + } + /* calculated visual index must be within current run */ + if(insertRemove==0) { + return visualIndex-controlFound; + } + if(IS_EVEN_RUN(runs[i].logicalStart)) { + /* LTR: check from run start to logical index */ + start=runs[i].logicalStart; + limit=logicalIndex; + } else { + /* RTL: check from logical index to run end */ + start=logicalIndex+1; + limit=GET_INDEX(runs[i].logicalStart)+length; + } + for(j=start; j<limit; j++) { + uchar=pBiDi->text[j]; + if(IS_BIDI_CONTROL_CHAR(uchar)) { + controlFound++; + } + } + return visualIndex-controlFound; + } + } + + return visualIndex; +} + +U_CAPI int32_t U_EXPORT2 +ubidi_getLogicalIndex(UBiDi *pBiDi, int32_t visualIndex, UErrorCode *pErrorCode) { + Run *runs; + int32_t i, runCount, start; + RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1); + RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1); + RETURN_IF_BAD_RANGE(visualIndex, 0, pBiDi->resultLength, *pErrorCode, -1); + /* we can do the trivial cases without the runs array */ + if(pBiDi->insertPoints.size==0 && pBiDi->controlCount==0) { + if(pBiDi->direction==UBIDI_LTR) { + return visualIndex; + } + else if(pBiDi->direction==UBIDI_RTL) { + return pBiDi->length-visualIndex-1; + } + } + if(!ubidi_getRuns(pBiDi, pErrorCode)) { + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + return -1; + } + + runs=pBiDi->runs; + runCount=pBiDi->runCount; + if(pBiDi->insertPoints.size>0) { + /* handle inserted LRM/RLM */ + int32_t markFound=0, insertRemove; + int32_t visualStart=0, length; + runs=pBiDi->runs; + /* subtract number of marks until visual index */ + for(i=0; ; i++, visualStart+=length) { + length=runs[i].visualLimit-visualStart; + insertRemove=runs[i].insertRemove; + if(insertRemove&(LRM_BEFORE|RLM_BEFORE)) { + if(visualIndex<=(visualStart+markFound)) { + return UBIDI_MAP_NOWHERE; + } + markFound++; + } + /* is adjusted visual index within this run? */ + if(visualIndex<(runs[i].visualLimit+markFound)) { + visualIndex-=markFound; + break; + } + if(insertRemove&(LRM_AFTER|RLM_AFTER)) { + if(visualIndex==(visualStart+length+markFound)) { + return UBIDI_MAP_NOWHERE; + } + markFound++; + } + } + } + else if(pBiDi->controlCount>0) { + /* handle removed BiDi control characters */ + int32_t controlFound=0, insertRemove, length; + int32_t logicalStart, logicalEnd, visualStart=0, j, k; + UChar uchar; + UBool evenRun; + /* add number of controls until visual index */ + for(i=0; ; i++, visualStart+=length) { + length=runs[i].visualLimit-visualStart; + insertRemove=runs[i].insertRemove; + /* is adjusted visual index beyond current run? */ + if(visualIndex>=(runs[i].visualLimit-controlFound+insertRemove)) { + controlFound-=insertRemove; + continue; + } + /* adjusted visual index is within current run */ + if(insertRemove==0) { + visualIndex+=controlFound; + break; + } + /* count non-control chars until visualIndex */ + logicalStart=runs[i].logicalStart; + evenRun=IS_EVEN_RUN(logicalStart); + REMOVE_ODD_BIT(logicalStart); + logicalEnd=logicalStart+length-1; + for(j=0; j<length; j++) { + k= evenRun ? logicalStart+j : logicalEnd-j; + uchar=pBiDi->text[k]; + if(IS_BIDI_CONTROL_CHAR(uchar)) { + controlFound++; + } + if((visualIndex+controlFound)==(visualStart+j)) { + break; + } + } + visualIndex+=controlFound; + break; + } + } + /* handle all cases */ + if(runCount<=10) { + /* linear search for the run */ + for(i=0; visualIndex>=runs[i].visualLimit; ++i) {} + } else { + /* binary search for the run */ + int32_t begin=0, limit=runCount; + + /* the middle if() is guaranteed to find the run, we don't need a loop limit */ + for(;;) { + i=(begin+limit)/2; + if(visualIndex>=runs[i].visualLimit) { + begin=i+1; + } else if(i==0 || visualIndex>=runs[i-1].visualLimit) { + break; + } else { + limit=i; + } + } + } + + start=runs[i].logicalStart; + if(IS_EVEN_RUN(start)) { + /* LTR */ + /* the offset in runs[i] is visualIndex-runs[i-1].visualLimit */ + if(i>0) { + visualIndex-=runs[i-1].visualLimit; + } + return start+visualIndex; + } else { + /* RTL */ + return GET_INDEX(start)+runs[i].visualLimit-visualIndex-1; + } +} + +U_CAPI void U_EXPORT2 +ubidi_getLogicalMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode) { + RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode); + /* ubidi_countRuns() checks for VALID_PARA_OR_LINE */ + ubidi_countRuns(pBiDi, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + /* no op */ + } else if(indexMap==NULL) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + } else { + /* fill a logical-to-visual index map using the runs[] */ + int32_t visualStart, visualLimit, i, j, k; + int32_t logicalStart, logicalLimit; + Run *runs=pBiDi->runs; + if (pBiDi->length<=0) { + return; + } + if (pBiDi->length>pBiDi->resultLength) { + uprv_memset(indexMap, 0xFF, pBiDi->length*sizeof(int32_t)); + } + + visualStart=0; + for(j=0; j<pBiDi->runCount; ++j) { + logicalStart=GET_INDEX(runs[j].logicalStart); + visualLimit=runs[j].visualLimit; + if(IS_EVEN_RUN(runs[j].logicalStart)) { + do { /* LTR */ + indexMap[logicalStart++]=visualStart++; + } while(visualStart<visualLimit); + } else { + logicalStart+=visualLimit-visualStart; /* logicalLimit */ + do { /* RTL */ + indexMap[--logicalStart]=visualStart++; + } while(visualStart<visualLimit); + } + /* visualStart==visualLimit; */ + } + + if(pBiDi->insertPoints.size>0) { + int32_t markFound=0, runCount=pBiDi->runCount; + int32_t length, insertRemove; + visualStart=0; + /* add number of marks found until each index */ + for(i=0; i<runCount; i++, visualStart+=length) { + length=runs[i].visualLimit-visualStart; + insertRemove=runs[i].insertRemove; + if(insertRemove&(LRM_BEFORE|RLM_BEFORE)) { + markFound++; + } + if(markFound>0) { + logicalStart=GET_INDEX(runs[i].logicalStart); + logicalLimit=logicalStart+length; + for(j=logicalStart; j<logicalLimit; j++) { + indexMap[j]+=markFound; + } + } + if(insertRemove&(LRM_AFTER|RLM_AFTER)) { + markFound++; + } + } + } + else if(pBiDi->controlCount>0) { + int32_t controlFound=0, runCount=pBiDi->runCount; + int32_t length, insertRemove; + UBool evenRun; + UChar uchar; + visualStart=0; + /* subtract number of controls found until each index */ + for(i=0; i<runCount; i++, visualStart+=length) { + length=runs[i].visualLimit-visualStart; + insertRemove=runs[i].insertRemove; + /* no control found within previous runs nor within this run */ + if((controlFound-insertRemove)==0) { + continue; + } + logicalStart=runs[i].logicalStart; + evenRun=IS_EVEN_RUN(logicalStart); + REMOVE_ODD_BIT(logicalStart); + logicalLimit=logicalStart+length; + /* if no control within this run */ + if(insertRemove==0) { + for(j=logicalStart; j<logicalLimit; j++) { + indexMap[j]-=controlFound; + } + continue; + } + for(j=0; j<length; j++) { + k= evenRun ? logicalStart+j : logicalLimit-j-1; + uchar=pBiDi->text[k]; + if(IS_BIDI_CONTROL_CHAR(uchar)) { + controlFound++; + indexMap[k]=UBIDI_MAP_NOWHERE; + continue; + } + indexMap[k]-=controlFound; + } + } + } + } +} + +U_CAPI void U_EXPORT2 +ubidi_getVisualMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode) { + RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode); + if(indexMap==NULL) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return; + } + /* ubidi_countRuns() checks for VALID_PARA_OR_LINE */ + ubidi_countRuns(pBiDi, pErrorCode); + if(U_SUCCESS(*pErrorCode)) { + /* fill a visual-to-logical index map using the runs[] */ + Run *runs=pBiDi->runs, *runsLimit=runs+pBiDi->runCount; + int32_t logicalStart, visualStart, visualLimit, *pi=indexMap; + + if (pBiDi->resultLength<=0) { + return; + } + visualStart=0; + for(; runs<runsLimit; ++runs) { + logicalStart=runs->logicalStart; + visualLimit=runs->visualLimit; + if(IS_EVEN_RUN(logicalStart)) { + do { /* LTR */ + *pi++ = logicalStart++; + } while(++visualStart<visualLimit); + } else { + REMOVE_ODD_BIT(logicalStart); + logicalStart+=visualLimit-visualStart; /* logicalLimit */ + do { /* RTL */ + *pi++ = --logicalStart; + } while(++visualStart<visualLimit); + } + /* visualStart==visualLimit; */ + } + + if(pBiDi->insertPoints.size>0) { + int32_t markFound=0, runCount=pBiDi->runCount; + int32_t insertRemove, i, j, k; + runs=pBiDi->runs; + /* count all inserted marks */ + for(i=0; i<runCount; i++) { + insertRemove=runs[i].insertRemove; + if(insertRemove&(LRM_BEFORE|RLM_BEFORE)) { + markFound++; + } + if(insertRemove&(LRM_AFTER|RLM_AFTER)) { + markFound++; + } + } + /* move back indexes by number of preceding marks */ + k=pBiDi->resultLength; + for(i=runCount-1; i>=0 && markFound>0; i--) { + insertRemove=runs[i].insertRemove; + if(insertRemove&(LRM_AFTER|RLM_AFTER)) { + indexMap[--k]= UBIDI_MAP_NOWHERE; + markFound--; + } + visualStart= i>0 ? runs[i-1].visualLimit : 0; + for(j=runs[i].visualLimit-1; j>=visualStart && markFound>0; j--) { + indexMap[--k]=indexMap[j]; + } + if(insertRemove&(LRM_BEFORE|RLM_BEFORE)) { + indexMap[--k]= UBIDI_MAP_NOWHERE; + markFound--; + } + } + } + else if(pBiDi->controlCount>0) { + int32_t runCount=pBiDi->runCount, logicalEnd; + int32_t insertRemove, length, i, j, k, m; + UChar uchar; + UBool evenRun; + runs=pBiDi->runs; + visualStart=0; + /* move forward indexes by number of preceding controls */ + k=0; + for(i=0; i<runCount; i++, visualStart+=length) { + length=runs[i].visualLimit-visualStart; + insertRemove=runs[i].insertRemove; + /* if no control found yet, nothing to do in this run */ + if((insertRemove==0)&&(k==visualStart)) { + k+=length; + continue; + } + /* if no control in this run */ + if(insertRemove==0) { + visualLimit=runs[i].visualLimit; + for(j=visualStart; j<visualLimit; j++) { + indexMap[k++]=indexMap[j]; + } + continue; + } + logicalStart=runs[i].logicalStart; + evenRun=IS_EVEN_RUN(logicalStart); + REMOVE_ODD_BIT(logicalStart); + logicalEnd=logicalStart+length-1; + for(j=0; j<length; j++) { + m= evenRun ? logicalStart+j : logicalEnd-j; + uchar=pBiDi->text[m]; + if(!IS_BIDI_CONTROL_CHAR(uchar)) { + indexMap[k++]=m; + } + } + } + } + } +} + +U_CAPI void U_EXPORT2 +ubidi_invertMap(const int32_t *srcMap, int32_t *destMap, int32_t length) { + if(srcMap!=NULL && destMap!=NULL && length>0) { + const int32_t *pi; + int32_t destLength=-1, count=0; + /* find highest value and count positive indexes in srcMap */ + pi=srcMap+length; + while(pi>srcMap) { + if(*--pi>destLength) { + destLength=*pi; + } + if(*pi>=0) { + count++; + } + } + destLength++; /* add 1 for origin 0 */ + if(count<destLength) { + /* we must fill unmatched destMap entries with -1 */ + uprv_memset(destMap, 0xFF, destLength*sizeof(int32_t)); + } + pi=srcMap+length; + while(length>0) { + if(*--pi>=0) { + destMap[*pi]=--length; + } else { + --length; + } + } + } +} diff --git a/thirdparty/icu4c/common/ubiditransform.cpp b/thirdparty/icu4c/common/ubiditransform.cpp new file mode 100644 index 0000000000..d56bf1518b --- /dev/null +++ b/thirdparty/icu4c/common/ubiditransform.cpp @@ -0,0 +1,530 @@ +/* +****************************************************************************** +* +* © 2016 and later: Unicode, Inc. and others. +* License & terms of use: http://www.unicode.org/copyright.html +* +****************************************************************************** +* file name: ubiditransform.c +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2016jul24 +* created by: Lina Kemmel +* +*/ + +#include "cmemory.h" +#include "unicode/ubidi.h" +#include "unicode/ustring.h" +#include "unicode/ushape.h" +#include "unicode/utf16.h" +#include "ustr_imp.h" +#include "unicode/ubiditransform.h" + +/* Some convenience defines */ +#define LTR UBIDI_LTR +#define RTL UBIDI_RTL +#define LOGICAL UBIDI_LOGICAL +#define VISUAL UBIDI_VISUAL +#define SHAPE_LOGICAL U_SHAPE_TEXT_DIRECTION_LOGICAL +#define SHAPE_VISUAL U_SHAPE_TEXT_DIRECTION_VISUAL_LTR + +#define CHECK_LEN(STR, LEN, ERROR) UPRV_BLOCK_MACRO_BEGIN { \ + if (LEN == 0) return 0; \ + if (LEN < -1) { *(ERROR) = U_ILLEGAL_ARGUMENT_ERROR; return 0; } \ + if (LEN == -1) LEN = u_strlen(STR); \ +} UPRV_BLOCK_MACRO_END + +#define MAX_ACTIONS 7 + +/** + * Typedef for a pointer to a function, which performs some operation (such as + * reordering, setting "inverse" mode, character mirroring, etc.). Return value + * indicates whether the text was changed in the course of this operation or + * not. + */ +typedef UBool (*UBiDiAction)(UBiDiTransform *, UErrorCode *); + +/** + * Structure that holds a predefined reordering scheme, including the following + * information: + * <ul> + * <li>an input base direction,</li> + * <li>an input order,</li> + * <li>an output base direction,</li> + * <li>an output order,</li> + * <li>a digit shaping direction,</li> + * <li>a letter shaping direction,</li> + * <li>a base direction that should be applied when the reordering engine is + * invoked (which can not always be derived from the caller-defined + * options),</li> + * <li>an array of pointers to functions that accomplish the bidi layout + * transformation.</li> + * </ul> + */ +typedef struct { + UBiDiLevel inLevel; /* input level */ + UBiDiOrder inOrder; /* input order */ + UBiDiLevel outLevel; /* output level */ + UBiDiOrder outOrder; /* output order */ + uint32_t digitsDir; /* digit shaping direction */ + uint32_t lettersDir; /* letter shaping direction */ + UBiDiLevel baseLevel; /* paragraph level to be used with setPara */ + const UBiDiAction actions[MAX_ACTIONS]; /* array of pointers to functions carrying out the transformation */ +} ReorderingScheme; + +struct UBiDiTransform { + UBiDi *pBidi; /* pointer to a UBiDi object */ + const ReorderingScheme *pActiveScheme; /* effective reordering scheme */ + UChar *src; /* input text */ + UChar *dest; /* output text */ + uint32_t srcLength; /* input text length - not really needed as we are zero-terminated and can u_strlen */ + uint32_t srcSize; /* input text capacity excluding the trailing zero */ + uint32_t destSize; /* output text capacity */ + uint32_t *pDestLength; /* number of UChars written to dest */ + uint32_t reorderingOptions; /* reordering options - currently only suppot DO_MIRRORING */ + uint32_t digits; /* digit option for ArabicShaping */ + uint32_t letters; /* letter option for ArabicShaping */ +}; + +U_CAPI UBiDiTransform* U_EXPORT2 +ubiditransform_open(UErrorCode *pErrorCode) +{ + UBiDiTransform *pBiDiTransform = NULL; + if (U_SUCCESS(*pErrorCode)) { + pBiDiTransform = (UBiDiTransform*) uprv_calloc(1, sizeof(UBiDiTransform)); + if (pBiDiTransform == NULL) { + *pErrorCode = U_MEMORY_ALLOCATION_ERROR; + } + } + return pBiDiTransform; +} + +U_CAPI void U_EXPORT2 +ubiditransform_close(UBiDiTransform *pBiDiTransform) +{ + if (pBiDiTransform != NULL) { + if (pBiDiTransform->pBidi != NULL) { + ubidi_close(pBiDiTransform->pBidi); + } + if (pBiDiTransform->src != NULL) { + uprv_free(pBiDiTransform->src); + } + uprv_free(pBiDiTransform); + } +} + +/** + * Performs Bidi resolution of text. + * + * @param pTransform Pointer to the <code>UBiDiTransform</code> structure. + * @param pErrorCode Pointer to the error code value. + * + * @return Whether or not this function modifies the text. Besides the return + * value, the caller should also check <code>U_SUCCESS(*pErrorCode)</code>. + */ +static UBool +action_resolve(UBiDiTransform *pTransform, UErrorCode *pErrorCode) +{ + ubidi_setPara(pTransform->pBidi, pTransform->src, pTransform->srcLength, + pTransform->pActiveScheme->baseLevel, NULL, pErrorCode); + return FALSE; +} + +/** + * Performs basic reordering of text (Logical -> Visual LTR). + * + * @param pTransform Pointer to the <code>UBiDiTransform</code> structure. + * @param pErrorCode Pointer to the error code value. + * + * @return Whether or not this function modifies the text. Besides the return + * value, the caller should also check <code>U_SUCCESS(*pErrorCode)</code>. + */ +static UBool +action_reorder(UBiDiTransform *pTransform, UErrorCode *pErrorCode) +{ + ubidi_writeReordered(pTransform->pBidi, pTransform->dest, pTransform->destSize, + static_cast<uint16_t>(pTransform->reorderingOptions), pErrorCode); + + *pTransform->pDestLength = pTransform->srcLength; + pTransform->reorderingOptions = UBIDI_REORDER_DEFAULT; + return TRUE; +} + +/** + * Sets "inverse" mode on the <code>UBiDi</code> object. + * + * @param pTransform Pointer to the <code>UBiDiTransform</code> structure. + * @param pErrorCode Pointer to the error code value. + * + * @return Whether or not this function modifies the text. Besides the return + * value, the caller should also check <code>U_SUCCESS(*pErrorCode)</code>. + */ +static UBool +action_setInverse(UBiDiTransform *pTransform, UErrorCode *pErrorCode) +{ + (void)pErrorCode; + ubidi_setInverse(pTransform->pBidi, TRUE); + ubidi_setReorderingMode(pTransform->pBidi, UBIDI_REORDER_INVERSE_LIKE_DIRECT); + return FALSE; +} + +/** + * Sets "runs only" reordering mode indicating a Logical LTR <-> Logical RTL + * transformation. + * + * @param pTransform Pointer to the <code>UBiDiTransform</code> structure. + * @param pErrorCode Pointer to the error code value. + * + * @return Whether or not this function modifies the text. Besides the return + * value, the caller should also check <code>U_SUCCESS(*pErrorCode)</code>. + */ +static UBool +action_setRunsOnly(UBiDiTransform *pTransform, UErrorCode *pErrorCode) +{ + (void)pErrorCode; + ubidi_setReorderingMode(pTransform->pBidi, UBIDI_REORDER_RUNS_ONLY); + return FALSE; +} + +/** + * Performs string reverse. + * + * @param pTransform Pointer to the <code>UBiDiTransform</code> structure. + * @param pErrorCode Pointer to the error code value. + * + * @return Whether or not this function modifies the text. Besides the return + * value, the caller should also check <code>U_SUCCESS(*pErrorCode)</code>. + */ +static UBool +action_reverse(UBiDiTransform *pTransform, UErrorCode *pErrorCode) +{ + ubidi_writeReverse(pTransform->src, pTransform->srcLength, + pTransform->dest, pTransform->destSize, + UBIDI_REORDER_DEFAULT, pErrorCode); + *pTransform->pDestLength = pTransform->srcLength; + return TRUE; +} + +/** + * Applies a new value to the text that serves as input at the current + * processing step. This value is identical to the original one when we begin + * the processing, but usually changes as the transformation progresses. + * + * @param pTransform A pointer to the <code>UBiDiTransform</code> structure. + * @param newSrc A pointer whose value is to be used as input text. + * @param newLength A length of the new text in <code>UChar</code>s. + * @param newSize A new source capacity in <code>UChar</code>s. + * @param pErrorCode Pointer to the error code value. + */ +static void +updateSrc(UBiDiTransform *pTransform, const UChar *newSrc, uint32_t newLength, + uint32_t newSize, UErrorCode *pErrorCode) +{ + if (newSize < newLength) { + *pErrorCode = U_BUFFER_OVERFLOW_ERROR; + return; + } + if (newSize > pTransform->srcSize) { + newSize += 50; // allocate slightly more than needed right now + if (pTransform->src != NULL) { + uprv_free(pTransform->src); + pTransform->src = NULL; + } + pTransform->src = (UChar *)uprv_malloc(newSize * sizeof(UChar)); + if (pTransform->src == NULL) { + *pErrorCode = U_MEMORY_ALLOCATION_ERROR; + //pTransform->srcLength = pTransform->srcSize = 0; + return; + } + pTransform->srcSize = newSize; + } + u_strncpy(pTransform->src, newSrc, newLength); + pTransform->srcLength = u_terminateUChars(pTransform->src, + pTransform->srcSize, newLength, pErrorCode); +} + +/** + * Calls a lower level shaping function. + * + * @param pTransform Pointer to the <code>UBiDiTransform</code> structure. + * @param options Shaping options. + * @param pErrorCode Pointer to the error code value. + */ +static void +doShape(UBiDiTransform *pTransform, uint32_t options, UErrorCode *pErrorCode) +{ + *pTransform->pDestLength = u_shapeArabic(pTransform->src, + pTransform->srcLength, pTransform->dest, pTransform->destSize, + options, pErrorCode); +} + +/** + * Performs digit and letter shaping. + * + * @param pTransform Pointer to the <code>UBiDiTransform</code> structure. + * @param pErrorCode Pointer to the error code value. + * + * @return Whether or not this function modifies the text. Besides the return + * value, the caller should also check <code>U_SUCCESS(*pErrorCode)</code>. + */ +static UBool +action_shapeArabic(UBiDiTransform *pTransform, UErrorCode *pErrorCode) +{ + if ((pTransform->letters | pTransform->digits) == 0) { + return FALSE; + } + if (pTransform->pActiveScheme->lettersDir == pTransform->pActiveScheme->digitsDir) { + doShape(pTransform, pTransform->letters | pTransform->digits | pTransform->pActiveScheme->lettersDir, + pErrorCode); + } else { + doShape(pTransform, pTransform->digits | pTransform->pActiveScheme->digitsDir, pErrorCode); + if (U_SUCCESS(*pErrorCode)) { + updateSrc(pTransform, pTransform->dest, *pTransform->pDestLength, + *pTransform->pDestLength, pErrorCode); + doShape(pTransform, pTransform->letters | pTransform->pActiveScheme->lettersDir, + pErrorCode); + } + } + return TRUE; +} + +/** + * Performs character mirroring. + * + * @param pTransform Pointer to the <code>UBiDiTransform</code> structure. + * @param pErrorCode Pointer to the error code value. + * + * @return Whether or not this function modifies the text. Besides the return + * value, the caller should also check <code>U_SUCCESS(*pErrorCode)</code>. + */ +static UBool +action_mirror(UBiDiTransform *pTransform, UErrorCode *pErrorCode) +{ + UChar32 c; + uint32_t i = 0, j = 0; + if (0 == (pTransform->reorderingOptions & UBIDI_DO_MIRRORING)) { + return FALSE; + } + if (pTransform->destSize < pTransform->srcLength) { + *pErrorCode = U_BUFFER_OVERFLOW_ERROR; + return FALSE; + } + do { + UBool isOdd = ubidi_getLevelAt(pTransform->pBidi, i) & 1; + U16_NEXT(pTransform->src, i, pTransform->srcLength, c); + U16_APPEND_UNSAFE(pTransform->dest, j, isOdd ? u_charMirror(c) : c); + } while (i < pTransform->srcLength); + + *pTransform->pDestLength = pTransform->srcLength; + pTransform->reorderingOptions = UBIDI_REORDER_DEFAULT; + return TRUE; +} + +/** + * All possible reordering schemes. + * + */ +static const ReorderingScheme Schemes[] = +{ + /* 0: Logical LTR => Visual LTR */ + {LTR, LOGICAL, LTR, VISUAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR, + {action_shapeArabic, action_resolve, action_reorder, NULL}}, + /* 1: Logical RTL => Visual LTR */ + {RTL, LOGICAL, LTR, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, RTL, + {action_resolve, action_reorder, action_shapeArabic, NULL}}, + /* 2: Logical LTR => Visual RTL */ + {LTR, LOGICAL, RTL, VISUAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR, + {action_shapeArabic, action_resolve, action_reorder, action_reverse, NULL}}, + /* 3: Logical RTL => Visual RTL */ + {RTL, LOGICAL, RTL, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, RTL, + {action_resolve, action_reorder, action_shapeArabic, action_reverse, NULL}}, + /* 4: Visual LTR => Logical RTL */ + {LTR, VISUAL, RTL, LOGICAL, SHAPE_LOGICAL, SHAPE_VISUAL, RTL, + {action_shapeArabic, action_setInverse, action_resolve, action_reorder, NULL}}, + /* 5: Visual RTL => Logical RTL */ + {RTL, VISUAL, RTL, LOGICAL, SHAPE_LOGICAL, SHAPE_VISUAL, RTL, + {action_reverse, action_shapeArabic, action_setInverse, action_resolve, action_reorder, NULL}}, + /* 6: Visual LTR => Logical LTR */ + {LTR, VISUAL, LTR, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR, + {action_setInverse, action_resolve, action_reorder, action_shapeArabic, NULL}}, + /* 7: Visual RTL => Logical LTR */ + {RTL, VISUAL, LTR, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR, + {action_reverse, action_setInverse, action_resolve, action_reorder, action_shapeArabic, NULL}}, + /* 8: Logical LTR => Logical RTL */ + {LTR, LOGICAL, RTL, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR, + {action_shapeArabic, action_resolve, action_mirror, action_setRunsOnly, action_resolve, action_reorder, NULL}}, + /* 9: Logical RTL => Logical LTR */ + {RTL, LOGICAL, LTR, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, RTL, + {action_resolve, action_mirror, action_setRunsOnly, action_resolve, action_reorder, action_shapeArabic, NULL}}, + /* 10: Visual LTR => Visual RTL */ + {LTR, VISUAL, RTL, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, LTR, + {action_shapeArabic, action_setInverse, action_resolve, action_mirror, action_reverse, NULL}}, + /* 11: Visual RTL => Visual LTR */ + {RTL, VISUAL, LTR, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, LTR, + {action_reverse, action_shapeArabic, action_setInverse, action_resolve, action_mirror, NULL}}, + /* 12: Logical LTR => Logical LTR */ + {LTR, LOGICAL, LTR, LOGICAL, SHAPE_LOGICAL, SHAPE_LOGICAL, LTR, + {action_resolve, action_mirror, action_shapeArabic, NULL}}, + /* 13: Logical RTL => Logical RTL */ + {RTL, LOGICAL, RTL, LOGICAL, SHAPE_VISUAL, SHAPE_LOGICAL, RTL, + {action_resolve, action_mirror, action_shapeArabic, NULL}}, + /* 14: Visual LTR => Visual LTR */ + {LTR, VISUAL, LTR, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, LTR, + {action_resolve, action_mirror, action_shapeArabic, NULL}}, + /* 15: Visual RTL => Visual RTL */ + {RTL, VISUAL, RTL, VISUAL, SHAPE_LOGICAL, SHAPE_VISUAL, LTR, + {action_reverse, action_resolve, action_mirror, action_shapeArabic, action_reverse, NULL}} +}; + +static const uint32_t nSchemes = sizeof(Schemes) / sizeof(*Schemes); + +/** + * When the direction option is <code>UBIDI_DEFAULT_LTR</code> or + * <code>UBIDI_DEFAULT_RTL</code>, resolve the base direction according to that + * of the first strong bidi character. + */ +static void +resolveBaseDirection(const UChar *text, uint32_t length, + UBiDiLevel *pInLevel, UBiDiLevel *pOutLevel) +{ + switch (*pInLevel) { + case UBIDI_DEFAULT_LTR: + case UBIDI_DEFAULT_RTL: { + UBiDiLevel level = static_cast<UBiDiLevel>(ubidi_getBaseDirection(text, length)); + *pInLevel = static_cast<UBiDiLevel>(level != UBIDI_NEUTRAL) ? level + : *pInLevel == UBIDI_DEFAULT_RTL ? static_cast<UBiDiLevel>(RTL) : static_cast<UBiDiLevel>(LTR); + break; + } + default: + *pInLevel &= 1; + break; + } + switch (*pOutLevel) { + case UBIDI_DEFAULT_LTR: + case UBIDI_DEFAULT_RTL: + *pOutLevel = *pInLevel; + break; + default: + *pOutLevel &= 1; + break; + } +} + +/** + * Finds a valid <code>ReorderingScheme</code> matching the + * caller-defined scheme. + * + * @return A valid <code>ReorderingScheme</code> object or NULL + */ +static const ReorderingScheme* +findMatchingScheme(UBiDiLevel inLevel, UBiDiLevel outLevel, + UBiDiOrder inOrder, UBiDiOrder outOrder) +{ + uint32_t i; + for (i = 0; i < nSchemes; i++) { + const ReorderingScheme *pScheme = Schemes + i; + if (inLevel == pScheme->inLevel && outLevel == pScheme->outLevel + && inOrder == pScheme->inOrder && outOrder == pScheme->outOrder) { + return pScheme; + } + } + return NULL; +} + +U_CAPI uint32_t U_EXPORT2 +ubiditransform_transform(UBiDiTransform *pBiDiTransform, + const UChar *src, int32_t srcLength, + UChar *dest, int32_t destSize, + UBiDiLevel inParaLevel, UBiDiOrder inOrder, + UBiDiLevel outParaLevel, UBiDiOrder outOrder, + UBiDiMirroring doMirroring, uint32_t shapingOptions, + UErrorCode *pErrorCode) +{ + uint32_t destLength = 0; + UBool textChanged = FALSE; + const UBiDiTransform *pOrigTransform = pBiDiTransform; + const UBiDiAction *action = NULL; + + if (U_FAILURE(*pErrorCode)) { + return 0; + } + if (src == NULL || dest == NULL) { + *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + CHECK_LEN(src, srcLength, pErrorCode); + CHECK_LEN(dest, destSize, pErrorCode); + + if (pBiDiTransform == NULL) { + pBiDiTransform = ubiditransform_open(pErrorCode); + if (U_FAILURE(*pErrorCode)) { + return 0; + } + } + /* Current limitation: in multiple paragraphs will be resolved according + to the 1st paragraph */ + resolveBaseDirection(src, srcLength, &inParaLevel, &outParaLevel); + + pBiDiTransform->pActiveScheme = findMatchingScheme(inParaLevel, outParaLevel, + inOrder, outOrder); + if (pBiDiTransform->pActiveScheme == NULL) { + goto cleanup; + } + pBiDiTransform->reorderingOptions = doMirroring ? UBIDI_DO_MIRRORING + : UBIDI_REORDER_DEFAULT; + + /* Ignore TEXT_DIRECTION_* flags, as we apply our own depending on the text + scheme at the time shaping is invoked. */ + shapingOptions &= ~U_SHAPE_TEXT_DIRECTION_MASK; + pBiDiTransform->digits = shapingOptions & ~U_SHAPE_LETTERS_MASK; + pBiDiTransform->letters = shapingOptions & ~U_SHAPE_DIGITS_MASK; + + updateSrc(pBiDiTransform, src, srcLength, destSize > srcLength ? destSize : srcLength, pErrorCode); + if (U_FAILURE(*pErrorCode)) { + goto cleanup; + } + if (pBiDiTransform->pBidi == NULL) { + pBiDiTransform->pBidi = ubidi_openSized(0, 0, pErrorCode); + if (U_FAILURE(*pErrorCode)) { + goto cleanup; + } + } + pBiDiTransform->dest = dest; + pBiDiTransform->destSize = destSize; + pBiDiTransform->pDestLength = &destLength; + + /* Checking for U_SUCCESS() within the loop to bail out on first failure. */ + for (action = pBiDiTransform->pActiveScheme->actions; *action && U_SUCCESS(*pErrorCode); action++) { + if ((*action)(pBiDiTransform, pErrorCode)) { + if (action + 1) { + updateSrc(pBiDiTransform, pBiDiTransform->dest, *pBiDiTransform->pDestLength, + *pBiDiTransform->pDestLength, pErrorCode); + } + textChanged = TRUE; + } + } + ubidi_setInverse(pBiDiTransform->pBidi, FALSE); + + if (!textChanged && U_SUCCESS(*pErrorCode)) { + /* Text was not changed - just copy src to dest */ + if (destSize < srcLength) { + *pErrorCode = U_BUFFER_OVERFLOW_ERROR; + } else { + u_strncpy(dest, src, srcLength); + destLength = srcLength; + } + } +cleanup: + if (pOrigTransform != pBiDiTransform) { + ubiditransform_close(pBiDiTransform); + } else { + pBiDiTransform->dest = NULL; + pBiDiTransform->pDestLength = NULL; + pBiDiTransform->srcLength = 0; + pBiDiTransform->destSize = 0; + } + return U_FAILURE(*pErrorCode) ? 0 : destLength; +} diff --git a/thirdparty/icu4c/common/ubidiwrt.cpp b/thirdparty/icu4c/common/ubidiwrt.cpp new file mode 100644 index 0000000000..a69c0a4b8b --- /dev/null +++ b/thirdparty/icu4c/common/ubidiwrt.cpp @@ -0,0 +1,650 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 2000-2015, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* file name: ubidiwrt.c +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 1999aug06 +* created by: Markus W. Scherer, updated by Matitiahu Allouche +* +* This file contains implementations for BiDi functions that use +* the core algorithm and core API to write reordered text. +*/ + +#include "unicode/utypes.h" +#include "unicode/ustring.h" +#include "unicode/uchar.h" +#include "unicode/ubidi.h" +#include "unicode/utf16.h" +#include "cmemory.h" +#include "ustr_imp.h" +#include "ubidiimp.h" + +/* + * The function implementations in this file are designed + * for UTF-16 and UTF-32, not for UTF-8. + * + * Assumptions that are not true for UTF-8: + * - Any code point always needs the same number of code units + * ("minimum-length-problem" of UTF-8) + * - The BiDi control characters need only one code unit each + * + * Further assumptions for all UTFs: + * - u_charMirror(c) needs the same number of code units as c + */ +#if defined(UTF_SIZE) && UTF_SIZE==8 +# error reimplement ubidi_writeReordered() for UTF-8, see comment above +#endif + +#define IS_COMBINING(type) ((1UL<<(type))&(1UL<<U_NON_SPACING_MARK|1UL<<U_COMBINING_SPACING_MARK|1UL<<U_ENCLOSING_MARK)) + +/* + * When we have UBIDI_OUTPUT_REVERSE set on ubidi_writeReordered(), then we + * semantically write RTL runs in reverse and later reverse them again. + * Instead, we actually write them in forward order to begin with. + * However, if the RTL run was to be mirrored, we need to mirror here now + * since the implicit second reversal must not do it. + * It looks strange to do mirroring in LTR output, but it is only because + * we are writing RTL output in reverse. + */ +static int32_t +doWriteForward(const UChar *src, int32_t srcLength, + UChar *dest, int32_t destSize, + uint16_t options, + UErrorCode *pErrorCode) { + /* optimize for several combinations of options */ + switch(options&(UBIDI_REMOVE_BIDI_CONTROLS|UBIDI_DO_MIRRORING)) { + case 0: { + /* simply copy the LTR run to the destination */ + int32_t length=srcLength; + if(destSize<length) { + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + return srcLength; + } + do { + *dest++=*src++; + } while(--length>0); + return srcLength; + } + case UBIDI_DO_MIRRORING: { + /* do mirroring */ + int32_t i=0, j=0; + UChar32 c; + + if(destSize<srcLength) { + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + return srcLength; + } + do { + U16_NEXT(src, i, srcLength, c); + c=u_charMirror(c); + U16_APPEND_UNSAFE(dest, j, c); + } while(i<srcLength); + return srcLength; + } + case UBIDI_REMOVE_BIDI_CONTROLS: { + /* copy the LTR run and remove any BiDi control characters */ + int32_t remaining=destSize; + UChar c; + do { + c=*src++; + if(!IS_BIDI_CONTROL_CHAR(c)) { + if(--remaining<0) { + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + + /* preflight the length */ + while(--srcLength>0) { + c=*src++; + if(!IS_BIDI_CONTROL_CHAR(c)) { + --remaining; + } + } + return destSize-remaining; + } + *dest++=c; + } + } while(--srcLength>0); + return destSize-remaining; + } + default: { + /* remove BiDi control characters and do mirroring */ + int32_t remaining=destSize; + int32_t i, j=0; + UChar32 c; + do { + i=0; + U16_NEXT(src, i, srcLength, c); + src+=i; + srcLength-=i; + if(!IS_BIDI_CONTROL_CHAR(c)) { + remaining-=i; + if(remaining<0) { + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + + /* preflight the length */ + while(srcLength>0) { + c=*src++; + if(!IS_BIDI_CONTROL_CHAR(c)) { + --remaining; + } + --srcLength; + } + return destSize-remaining; + } + c=u_charMirror(c); + U16_APPEND_UNSAFE(dest, j, c); + } + } while(srcLength>0); + return j; + } + } /* end of switch */ +} + +static int32_t +doWriteReverse(const UChar *src, int32_t srcLength, + UChar *dest, int32_t destSize, + uint16_t options, + UErrorCode *pErrorCode) { + /* + * RTL run - + * + * RTL runs need to be copied to the destination in reverse order + * of code points, not code units, to keep Unicode characters intact. + * + * The general strategy for this is to read the source text + * in backward order, collect all code units for a code point + * (and optionally following combining characters, see below), + * and copy all these code units in ascending order + * to the destination for this run. + * + * Several options request whether combining characters + * should be kept after their base characters, + * whether BiDi control characters should be removed, and + * whether characters should be replaced by their mirror-image + * equivalent Unicode characters. + */ + int32_t i, j; + UChar32 c; + + /* optimize for several combinations of options */ + switch(options&(UBIDI_REMOVE_BIDI_CONTROLS|UBIDI_DO_MIRRORING|UBIDI_KEEP_BASE_COMBINING)) { + case 0: + /* + * With none of the "complicated" options set, the destination + * run will have the same length as the source run, + * and there is no mirroring and no keeping combining characters + * with their base characters. + */ + if(destSize<srcLength) { + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + return srcLength; + } + destSize=srcLength; + + /* preserve character integrity */ + do { + /* i is always after the last code unit known to need to be kept in this segment */ + i=srcLength; + + /* collect code units for one base character */ + U16_BACK_1(src, 0, srcLength); + + /* copy this base character */ + j=srcLength; + do { + *dest++=src[j++]; + } while(j<i); + } while(srcLength>0); + break; + case UBIDI_KEEP_BASE_COMBINING: + /* + * Here, too, the destination + * run will have the same length as the source run, + * and there is no mirroring. + * We do need to keep combining characters with their base characters. + */ + if(destSize<srcLength) { + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + return srcLength; + } + destSize=srcLength; + + /* preserve character integrity */ + do { + /* i is always after the last code unit known to need to be kept in this segment */ + i=srcLength; + + /* collect code units and modifier letters for one base character */ + do { + U16_PREV(src, 0, srcLength, c); + } while(srcLength>0 && IS_COMBINING(u_charType(c))); + + /* copy this "user character" */ + j=srcLength; + do { + *dest++=src[j++]; + } while(j<i); + } while(srcLength>0); + break; + default: + /* + * With several "complicated" options set, this is the most + * general and the slowest copying of an RTL run. + * We will do mirroring, remove BiDi controls, and + * keep combining characters with their base characters + * as requested. + */ + if(!(options&UBIDI_REMOVE_BIDI_CONTROLS)) { + i=srcLength; + } else { + /* we need to find out the destination length of the run, + which will not include the BiDi control characters */ + int32_t length=srcLength; + UChar ch; + + i=0; + do { + ch=*src++; + if(!IS_BIDI_CONTROL_CHAR(ch)) { + ++i; + } + } while(--length>0); + src-=srcLength; + } + + if(destSize<i) { + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + return i; + } + destSize=i; + + /* preserve character integrity */ + do { + /* i is always after the last code unit known to need to be kept in this segment */ + i=srcLength; + + /* collect code units for one base character */ + U16_PREV(src, 0, srcLength, c); + if(options&UBIDI_KEEP_BASE_COMBINING) { + /* collect modifier letters for this base character */ + while(srcLength>0 && IS_COMBINING(u_charType(c))) { + U16_PREV(src, 0, srcLength, c); + } + } + + if(options&UBIDI_REMOVE_BIDI_CONTROLS && IS_BIDI_CONTROL_CHAR(c)) { + /* do not copy this BiDi control character */ + continue; + } + + /* copy this "user character" */ + j=srcLength; + if(options&UBIDI_DO_MIRRORING) { + /* mirror only the base character */ + int32_t k=0; + c=u_charMirror(c); + U16_APPEND_UNSAFE(dest, k, c); + dest+=k; + j+=k; + } + while(j<i) { + *dest++=src[j++]; + } + } while(srcLength>0); + break; + } /* end of switch */ + + return destSize; +} + +U_CAPI int32_t U_EXPORT2 +ubidi_writeReverse(const UChar *src, int32_t srcLength, + UChar *dest, int32_t destSize, + uint16_t options, + UErrorCode *pErrorCode) { + int32_t destLength; + + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return 0; + } + + /* more error checking */ + if( src==NULL || srcLength<-1 || + destSize<0 || (destSize>0 && dest==NULL)) + { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + /* do input and output overlap? */ + if( dest!=NULL && + ((src>=dest && src<dest+destSize) || + (dest>=src && dest<src+srcLength))) + { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + if(srcLength==-1) { + srcLength=u_strlen(src); + } + if(srcLength>0) { + destLength=doWriteReverse(src, srcLength, dest, destSize, options, pErrorCode); + } else { + /* nothing to do */ + destLength=0; + } + + return u_terminateUChars(dest, destSize, destLength, pErrorCode); +} + +// Ticket 20907 - The optimizer in MSVC/Visual Studio versions below 16.4 has trouble with this +// function on Windows ARM64. As a work-around, we disable optimizations for this function. +// This work-around could/should be removed once the following versions of Visual Studio are no +// longer supported: All versions of VS2017, and versions of VS2019 below 16.4. +#if (defined(_MSC_VER) && (defined(_M_ARM64)) && (_MSC_VER < 1924)) +#pragma optimize( "", off ) +#endif +U_CAPI int32_t U_EXPORT2 +ubidi_writeReordered(UBiDi *pBiDi, + UChar *dest, int32_t destSize, + uint16_t options, + UErrorCode *pErrorCode) { + const UChar *text; + UChar *saveDest; + int32_t length, destCapacity; + int32_t run, runCount, logicalStart, runLength; + + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return 0; + } + + /* more error checking */ + if( pBiDi==NULL || + (text=pBiDi->text)==NULL || (length=pBiDi->length)<0 || + destSize<0 || (destSize>0 && dest==NULL)) + { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + /* do input and output overlap? */ + if( dest!=NULL && + ((text>=dest && text<dest+destSize) || + (dest>=text && dest<text+pBiDi->originalLength))) + { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + if(length==0) { + /* nothing to do */ + return u_terminateUChars(dest, destSize, 0, pErrorCode); + } + + runCount=ubidi_countRuns(pBiDi, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + return 0; + } + + /* destSize shrinks, later destination length=destCapacity-destSize */ + saveDest=dest; + destCapacity=destSize; + + /* + * Option "insert marks" implies UBIDI_INSERT_LRM_FOR_NUMERIC if the + * reordering mode (checked below) is appropriate. + */ + if(pBiDi->reorderingOptions & UBIDI_OPTION_INSERT_MARKS) { + options|=UBIDI_INSERT_LRM_FOR_NUMERIC; + options&=~UBIDI_REMOVE_BIDI_CONTROLS; + } + /* + * Option "remove controls" implies UBIDI_REMOVE_BIDI_CONTROLS + * and cancels UBIDI_INSERT_LRM_FOR_NUMERIC. + */ + if(pBiDi->reorderingOptions & UBIDI_OPTION_REMOVE_CONTROLS) { + options|=UBIDI_REMOVE_BIDI_CONTROLS; + options&=~UBIDI_INSERT_LRM_FOR_NUMERIC; + } + /* + * If we do not perform the "inverse BiDi" algorithm, then we + * don't need to insert any LRMs, and don't need to test for it. + */ + if((pBiDi->reorderingMode != UBIDI_REORDER_INVERSE_NUMBERS_AS_L) && + (pBiDi->reorderingMode != UBIDI_REORDER_INVERSE_LIKE_DIRECT) && + (pBiDi->reorderingMode != UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL) && + (pBiDi->reorderingMode != UBIDI_REORDER_RUNS_ONLY)) { + options&=~UBIDI_INSERT_LRM_FOR_NUMERIC; + } + /* + * Iterate through all visual runs and copy the run text segments to + * the destination, according to the options. + * + * The tests for where to insert LRMs ignore the fact that there may be + * BN codes or non-BMP code points at the beginning and end of a run; + * they may insert LRMs unnecessarily but the tests are faster this way + * (this would have to be improved for UTF-8). + * + * Note that the only errors that are set by doWriteXY() are buffer overflow + * errors. Ignore them until the end, and continue for preflighting. + */ + if(!(options&UBIDI_OUTPUT_REVERSE)) { + /* forward output */ + if(!(options&UBIDI_INSERT_LRM_FOR_NUMERIC)) { + /* do not insert BiDi controls */ + for(run=0; run<runCount; ++run) { + if(UBIDI_LTR==ubidi_getVisualRun(pBiDi, run, &logicalStart, &runLength)) { + runLength=doWriteForward(text+logicalStart, runLength, + dest, destSize, + (uint16_t)(options&~UBIDI_DO_MIRRORING), pErrorCode); + } else { + runLength=doWriteReverse(text+logicalStart, runLength, + dest, destSize, + options, pErrorCode); + } + if(dest!=NULL) { + dest+=runLength; + } + destSize-=runLength; + } + } else { + /* insert BiDi controls for "inverse BiDi" */ + const DirProp *dirProps=pBiDi->dirProps; + const UChar *src; + UChar uc; + UBiDiDirection dir; + int32_t markFlag; + + for(run=0; run<runCount; ++run) { + dir=ubidi_getVisualRun(pBiDi, run, &logicalStart, &runLength); + src=text+logicalStart; + /* check if something relevant in insertPoints */ + markFlag=pBiDi->runs[run].insertRemove; + if(markFlag<0) { /* BiDi controls count */ + markFlag=0; + } + + if(UBIDI_LTR==dir) { + if((pBiDi->isInverse) && + (/*run>0 &&*/ dirProps[logicalStart]!=L)) { + markFlag |= LRM_BEFORE; + } + if (markFlag & LRM_BEFORE) { + uc=LRM_CHAR; + } + else if (markFlag & RLM_BEFORE) { + uc=RLM_CHAR; + } + else uc=0; + if(uc) { + if(destSize>0) { + *dest++=uc; + } + --destSize; + } + + runLength=doWriteForward(src, runLength, + dest, destSize, + (uint16_t)(options&~UBIDI_DO_MIRRORING), pErrorCode); + if(dest!=NULL) { + dest+=runLength; + } + destSize-=runLength; + + if((pBiDi->isInverse) && + (/*run<runCount-1 &&*/ dirProps[logicalStart+runLength-1]!=L)) { + markFlag |= LRM_AFTER; + } + if (markFlag & LRM_AFTER) { + uc=LRM_CHAR; + } + else if (markFlag & RLM_AFTER) { + uc=RLM_CHAR; + } + else uc=0; + if(uc) { + if(destSize>0) { + *dest++=uc; + } + --destSize; + } + } else { /* RTL run */ + if((pBiDi->isInverse) && + (/*run>0 &&*/ !(MASK_R_AL&DIRPROP_FLAG(dirProps[logicalStart+runLength-1])))) { + markFlag |= RLM_BEFORE; + } + if (markFlag & LRM_BEFORE) { + uc=LRM_CHAR; + } + else if (markFlag & RLM_BEFORE) { + uc=RLM_CHAR; + } + else uc=0; + if(uc) { + if(destSize>0) { + *dest++=uc; + } + --destSize; + } + + runLength=doWriteReverse(src, runLength, + dest, destSize, + options, pErrorCode); + if(dest!=NULL) { + dest+=runLength; + } + destSize-=runLength; + + if((pBiDi->isInverse) && + (/*run<runCount-1 &&*/ !(MASK_R_AL&DIRPROP_FLAG(dirProps[logicalStart])))) { + markFlag |= RLM_AFTER; + } + if (markFlag & LRM_AFTER) { + uc=LRM_CHAR; + } + else if (markFlag & RLM_AFTER) { + uc=RLM_CHAR; + } + else uc=0; + if(uc) { + if(destSize>0) { + *dest++=uc; + } + --destSize; + } + } + } + } + } else { + /* reverse output */ + if(!(options&UBIDI_INSERT_LRM_FOR_NUMERIC)) { + /* do not insert BiDi controls */ + for(run=runCount; --run>=0;) { + if(UBIDI_LTR==ubidi_getVisualRun(pBiDi, run, &logicalStart, &runLength)) { + runLength=doWriteReverse(text+logicalStart, runLength, + dest, destSize, + (uint16_t)(options&~UBIDI_DO_MIRRORING), pErrorCode); + } else { + runLength=doWriteForward(text+logicalStart, runLength, + dest, destSize, + options, pErrorCode); + } + if(dest!=NULL) { + dest+=runLength; + } + destSize-=runLength; + } + } else { + /* insert BiDi controls for "inverse BiDi" */ + const DirProp *dirProps=pBiDi->dirProps; + const UChar *src; + UBiDiDirection dir; + + for(run=runCount; --run>=0;) { + /* reverse output */ + dir=ubidi_getVisualRun(pBiDi, run, &logicalStart, &runLength); + src=text+logicalStart; + + if(UBIDI_LTR==dir) { + if(/*run<runCount-1 &&*/ dirProps[logicalStart+runLength-1]!=L) { + if(destSize>0) { + *dest++=LRM_CHAR; + } + --destSize; + } + + runLength=doWriteReverse(src, runLength, + dest, destSize, + (uint16_t)(options&~UBIDI_DO_MIRRORING), pErrorCode); + if(dest!=NULL) { + dest+=runLength; + } + destSize-=runLength; + + if(/*run>0 &&*/ dirProps[logicalStart]!=L) { + if(destSize>0) { + *dest++=LRM_CHAR; + } + --destSize; + } + } else { + if(/*run<runCount-1 &&*/ !(MASK_R_AL&DIRPROP_FLAG(dirProps[logicalStart]))) { + if(destSize>0) { + *dest++=RLM_CHAR; + } + --destSize; + } + + runLength=doWriteForward(src, runLength, + dest, destSize, + options, pErrorCode); + if(dest!=NULL) { + dest+=runLength; + } + destSize-=runLength; + + if(/*run>0 &&*/ !(MASK_R_AL&DIRPROP_FLAG(dirProps[logicalStart+runLength-1]))) { + if(destSize>0) { + *dest++=RLM_CHAR; + } + --destSize; + } + } + } + } + } + + return u_terminateUChars(saveDest, destCapacity, destCapacity-destSize, pErrorCode); +} +#if (defined(_MSC_VER) && (defined(_M_ARM64)) && (_MSC_VER < 1924)) +#pragma optimize( "", on ) +#endif diff --git a/thirdparty/icu4c/common/ubrk.cpp b/thirdparty/icu4c/common/ubrk.cpp new file mode 100644 index 0000000000..f8bdf5a6b6 --- /dev/null +++ b/thirdparty/icu4c/common/ubrk.cpp @@ -0,0 +1,357 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************** +* Copyright (C) 1996-2015, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************** +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_BREAK_ITERATION + +#include "unicode/ubrk.h" + +#include "unicode/brkiter.h" +#include "unicode/uloc.h" +#include "unicode/ustring.h" +#include "unicode/uchriter.h" +#include "unicode/rbbi.h" +#include "rbbirb.h" +#include "uassert.h" +#include "cmemory.h" + +U_NAMESPACE_USE + +//------------------------------------------------------------------------------ +// +// ubrk_open Create a canned type of break iterator based on type (word, line, etc.) +// and locale. +// +//------------------------------------------------------------------------------ +U_CAPI UBreakIterator* U_EXPORT2 +ubrk_open(UBreakIteratorType type, + const char *locale, + const UChar *text, + int32_t textLength, + UErrorCode *status) +{ + + if(U_FAILURE(*status)) return 0; + + BreakIterator *result = 0; + + switch(type) { + + case UBRK_CHARACTER: + result = BreakIterator::createCharacterInstance(Locale(locale), *status); + break; + + case UBRK_WORD: + result = BreakIterator::createWordInstance(Locale(locale), *status); + break; + + case UBRK_LINE: + result = BreakIterator::createLineInstance(Locale(locale), *status); + break; + + case UBRK_SENTENCE: + result = BreakIterator::createSentenceInstance(Locale(locale), *status); + break; + + case UBRK_TITLE: + result = BreakIterator::createTitleInstance(Locale(locale), *status); + break; + + default: + *status = U_ILLEGAL_ARGUMENT_ERROR; + } + + // check for allocation error + if (U_FAILURE(*status)) { + return 0; + } + if(result == 0) { + *status = U_MEMORY_ALLOCATION_ERROR; + return 0; + } + + + UBreakIterator *uBI = (UBreakIterator *)result; + if (text != NULL) { + ubrk_setText(uBI, text, textLength, status); + } + return uBI; +} + + + +//------------------------------------------------------------------------------ +// +// ubrk_openRules open a break iterator from a set of break rules. +// Invokes the rule builder. +// +//------------------------------------------------------------------------------ +U_CAPI UBreakIterator* U_EXPORT2 +ubrk_openRules( const UChar *rules, + int32_t rulesLength, + const UChar *text, + int32_t textLength, + UParseError *parseErr, + UErrorCode *status) { + + if (status == NULL || U_FAILURE(*status)){ + return 0; + } + + BreakIterator *result = 0; + UnicodeString ruleString(rules, rulesLength); + result = RBBIRuleBuilder::createRuleBasedBreakIterator(ruleString, parseErr, *status); + if(U_FAILURE(*status)) { + return 0; + } + + UBreakIterator *uBI = (UBreakIterator *)result; + if (text != NULL) { + ubrk_setText(uBI, text, textLength, status); + } + return uBI; +} + + +U_CAPI UBreakIterator* U_EXPORT2 +ubrk_openBinaryRules(const uint8_t *binaryRules, int32_t rulesLength, + const UChar * text, int32_t textLength, + UErrorCode * status) +{ + if (U_FAILURE(*status)) { + return NULL; + } + if (rulesLength < 0) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + LocalPointer<RuleBasedBreakIterator> lpRBBI(new RuleBasedBreakIterator(binaryRules, rulesLength, *status), *status); + if (U_FAILURE(*status)) { + return NULL; + } + UBreakIterator *uBI = reinterpret_cast<UBreakIterator *>(lpRBBI.orphan()); + if (text != NULL) { + ubrk_setText(uBI, text, textLength, status); + } + return uBI; +} + + +U_CAPI UBreakIterator * U_EXPORT2 +ubrk_safeClone( + const UBreakIterator *bi, + void * /*stackBuffer*/, + int32_t *pBufferSize, + UErrorCode *status) +{ + if (status == NULL || U_FAILURE(*status)){ + return NULL; + } + if (bi == NULL) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + if (pBufferSize != NULL) { + int32_t inputSize = *pBufferSize; + *pBufferSize = 1; + if (inputSize == 0) { + return NULL; // preflighting for deprecated functionality + } + } + BreakIterator *newBI = ((BreakIterator *)bi)->clone(); + if (newBI == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + } else { + *status = U_SAFECLONE_ALLOCATED_WARNING; + } + return (UBreakIterator *)newBI; +} + + + +U_CAPI void U_EXPORT2 +ubrk_close(UBreakIterator *bi) +{ + delete (BreakIterator *)bi; +} + +U_CAPI void U_EXPORT2 +ubrk_setText(UBreakIterator* bi, + const UChar* text, + int32_t textLength, + UErrorCode* status) +{ + UText ut = UTEXT_INITIALIZER; + utext_openUChars(&ut, text, textLength, status); + ((BreakIterator*)bi)->setText(&ut, *status); + // A stack allocated UText wrapping a UChar * string + // can be dumped without explicitly closing it. +} + + + +U_CAPI void U_EXPORT2 +ubrk_setUText(UBreakIterator *bi, + UText *text, + UErrorCode *status) +{ + ((BreakIterator*)bi)->setText(text, *status); +} + + + + + +U_CAPI int32_t U_EXPORT2 +ubrk_current(const UBreakIterator *bi) +{ + + return ((BreakIterator*)bi)->current(); +} + +U_CAPI int32_t U_EXPORT2 +ubrk_next(UBreakIterator *bi) +{ + + return ((BreakIterator*)bi)->next(); +} + +U_CAPI int32_t U_EXPORT2 +ubrk_previous(UBreakIterator *bi) +{ + + return ((BreakIterator*)bi)->previous(); +} + +U_CAPI int32_t U_EXPORT2 +ubrk_first(UBreakIterator *bi) +{ + + return ((BreakIterator*)bi)->first(); +} + +U_CAPI int32_t U_EXPORT2 +ubrk_last(UBreakIterator *bi) +{ + + return ((BreakIterator*)bi)->last(); +} + +U_CAPI int32_t U_EXPORT2 +ubrk_preceding(UBreakIterator *bi, + int32_t offset) +{ + + return ((BreakIterator*)bi)->preceding(offset); +} + +U_CAPI int32_t U_EXPORT2 +ubrk_following(UBreakIterator *bi, + int32_t offset) +{ + + return ((BreakIterator*)bi)->following(offset); +} + +U_CAPI const char* U_EXPORT2 +ubrk_getAvailable(int32_t index) +{ + + return uloc_getAvailable(index); +} + +U_CAPI int32_t U_EXPORT2 +ubrk_countAvailable() +{ + + return uloc_countAvailable(); +} + + +U_CAPI UBool U_EXPORT2 +ubrk_isBoundary(UBreakIterator *bi, int32_t offset) +{ + return ((BreakIterator*)bi)->isBoundary(offset); +} + + +U_CAPI int32_t U_EXPORT2 +ubrk_getRuleStatus(UBreakIterator *bi) +{ + return ((BreakIterator*)bi)->getRuleStatus(); +} + +U_CAPI int32_t U_EXPORT2 +ubrk_getRuleStatusVec(UBreakIterator *bi, int32_t *fillInVec, int32_t capacity, UErrorCode *status) +{ + return ((BreakIterator*)bi)->getRuleStatusVec(fillInVec, capacity, *status); +} + + +U_CAPI const char* U_EXPORT2 +ubrk_getLocaleByType(const UBreakIterator *bi, + ULocDataLocaleType type, + UErrorCode* status) +{ + if (bi == NULL) { + if (U_SUCCESS(*status)) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + } + return NULL; + } + return ((BreakIterator*)bi)->getLocaleID(type, *status); +} + + +U_CAPI void U_EXPORT2 +ubrk_refreshUText(UBreakIterator *bi, + UText *text, + UErrorCode *status) +{ + BreakIterator *bii = reinterpret_cast<BreakIterator *>(bi); + bii->refreshInputText(text, *status); +} + +U_CAPI int32_t U_EXPORT2 +ubrk_getBinaryRules(UBreakIterator *bi, + uint8_t * binaryRules, int32_t rulesCapacity, + UErrorCode * status) +{ + if (U_FAILURE(*status)) { + return 0; + } + if ((binaryRules == NULL && rulesCapacity > 0) || rulesCapacity < 0) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + RuleBasedBreakIterator* rbbi; + if ((rbbi = dynamic_cast<RuleBasedBreakIterator*>(reinterpret_cast<BreakIterator*>(bi))) == NULL) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + uint32_t rulesLength; + const uint8_t * returnedRules = rbbi->getBinaryRules(rulesLength); + if (rulesLength > INT32_MAX) { + *status = U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + if (binaryRules != NULL) { // if not preflighting + // Here we know rulesLength <= INT32_MAX and rulesCapacity >= 0, can cast safely + if ((int32_t)rulesLength > rulesCapacity) { + *status = U_BUFFER_OVERFLOW_ERROR; + } else { + uprv_memcpy(binaryRules, returnedRules, rulesLength); + } + } + return (int32_t)rulesLength; +} + + +#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ diff --git a/thirdparty/icu4c/common/ubrkimpl.h b/thirdparty/icu4c/common/ubrkimpl.h new file mode 100644 index 0000000000..8197f66339 --- /dev/null +++ b/thirdparty/icu4c/common/ubrkimpl.h @@ -0,0 +1,15 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 2006, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +*/ + +#ifndef UBRKIMPL_H +#define UBRKIMPL_H + +#define U_ICUDATA_BRKITR U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "brkitr" + +#endif /*UBRKIMPL_H*/ diff --git a/thirdparty/icu4c/common/ucase.cpp b/thirdparty/icu4c/common/ucase.cpp new file mode 100644 index 0000000000..2b142f5bc2 --- /dev/null +++ b/thirdparty/icu4c/common/ucase.cpp @@ -0,0 +1,1608 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2004-2014, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: ucase.cpp +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2004aug30 +* created by: Markus W. Scherer +* +* Low-level Unicode character/string case mapping code. +* Much code moved here (and modified) from uchar.c. +*/ + +#include "unicode/utypes.h" +#include "unicode/unistr.h" +#include "unicode/uset.h" +#include "unicode/udata.h" /* UDataInfo */ +#include "unicode/utf16.h" +#include "ucmndata.h" /* DataHeader */ +#include "udatamem.h" +#include "umutex.h" +#include "uassert.h" +#include "cmemory.h" +#include "utrie2.h" +#include "ucase.h" + +struct UCaseProps { + UDataMemory *mem; + const int32_t *indexes; + const uint16_t *exceptions; + const uint16_t *unfold; + + UTrie2 trie; + uint8_t formatVersion[4]; +}; + +/* ucase_props_data.h is machine-generated by gencase --csource */ +#define INCLUDED_FROM_UCASE_CPP +#include "ucase_props_data.h" + +/* set of property starts for UnicodeSet ------------------------------------ */ + +static UBool U_CALLCONV +_enumPropertyStartsRange(const void *context, UChar32 start, UChar32 /*end*/, uint32_t /*value*/) { + /* add the start code point to the USet */ + const USetAdder *sa=(const USetAdder *)context; + sa->add(sa->set, start); + return TRUE; +} + +U_CFUNC void U_EXPORT2 +ucase_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) { + if(U_FAILURE(*pErrorCode)) { + return; + } + + /* add the start code point of each same-value range of the trie */ + utrie2_enum(&ucase_props_singleton.trie, NULL, _enumPropertyStartsRange, sa); + + /* add code points with hardcoded properties, plus the ones following them */ + + /* (none right now, see comment below) */ + + /* + * Omit code points with hardcoded specialcasing properties + * because we do not build property UnicodeSets for them right now. + */ +} + +/* data access primitives --------------------------------------------------- */ + +U_CFUNC const UTrie2 * U_EXPORT2 +ucase_getTrie() { + return &ucase_props_singleton.trie; +} + +#define GET_EXCEPTIONS(csp, props) ((csp)->exceptions+((props)>>UCASE_EXC_SHIFT)) + +/* number of bits in an 8-bit integer value */ +static const uint8_t flagsOffset[256]={ + 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 +}; + +#define HAS_SLOT(flags, idx) ((flags)&(1<<(idx))) +#define SLOT_OFFSET(flags, idx) flagsOffset[(flags)&((1<<(idx))-1)] + +/* + * Get the value of an optional-value slot where HAS_SLOT(excWord, idx). + * + * @param excWord (in) initial exceptions word + * @param idx (in) desired slot index + * @param pExc16 (in/out) const uint16_t * after excWord=*pExc16++; + * moved to the last uint16_t of the value, use +1 for beginning of next slot + * @param value (out) int32_t or uint32_t output if hasSlot, otherwise not modified + */ +#define GET_SLOT_VALUE(excWord, idx, pExc16, value) UPRV_BLOCK_MACRO_BEGIN { \ + if(((excWord)&UCASE_EXC_DOUBLE_SLOTS)==0) { \ + (pExc16)+=SLOT_OFFSET(excWord, idx); \ + (value)=*pExc16; \ + } else { \ + (pExc16)+=2*SLOT_OFFSET(excWord, idx); \ + (value)=*pExc16++; \ + (value)=((value)<<16)|*pExc16; \ + } \ +} UPRV_BLOCK_MACRO_END + +/* simple case mappings ----------------------------------------------------- */ + +U_CAPI UChar32 U_EXPORT2 +ucase_tolower(UChar32 c) { + uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); + if(!UCASE_HAS_EXCEPTION(props)) { + if(UCASE_IS_UPPER_OR_TITLE(props)) { + c+=UCASE_GET_DELTA(props); + } + } else { + const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props); + uint16_t excWord=*pe++; + if(HAS_SLOT(excWord, UCASE_EXC_DELTA) && UCASE_IS_UPPER_OR_TITLE(props)) { + int32_t delta; + GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe, delta); + return (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta; + } + if(HAS_SLOT(excWord, UCASE_EXC_LOWER)) { + GET_SLOT_VALUE(excWord, UCASE_EXC_LOWER, pe, c); + } + } + return c; +} + +U_CAPI UChar32 U_EXPORT2 +ucase_toupper(UChar32 c) { + uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); + if(!UCASE_HAS_EXCEPTION(props)) { + if(UCASE_GET_TYPE(props)==UCASE_LOWER) { + c+=UCASE_GET_DELTA(props); + } + } else { + const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props); + uint16_t excWord=*pe++; + if(HAS_SLOT(excWord, UCASE_EXC_DELTA) && UCASE_GET_TYPE(props)==UCASE_LOWER) { + int32_t delta; + GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe, delta); + return (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta; + } + if(HAS_SLOT(excWord, UCASE_EXC_UPPER)) { + GET_SLOT_VALUE(excWord, UCASE_EXC_UPPER, pe, c); + } + } + return c; +} + +U_CAPI UChar32 U_EXPORT2 +ucase_totitle(UChar32 c) { + uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); + if(!UCASE_HAS_EXCEPTION(props)) { + if(UCASE_GET_TYPE(props)==UCASE_LOWER) { + c+=UCASE_GET_DELTA(props); + } + } else { + const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props); + uint16_t excWord=*pe++; + if(HAS_SLOT(excWord, UCASE_EXC_DELTA) && UCASE_GET_TYPE(props)==UCASE_LOWER) { + int32_t delta; + GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe, delta); + return (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta; + } + int32_t idx; + if(HAS_SLOT(excWord, UCASE_EXC_TITLE)) { + idx=UCASE_EXC_TITLE; + } else if(HAS_SLOT(excWord, UCASE_EXC_UPPER)) { + idx=UCASE_EXC_UPPER; + } else { + return c; + } + GET_SLOT_VALUE(excWord, idx, pe, c); + } + return c; +} + +static const UChar iDot[2] = { 0x69, 0x307 }; +static const UChar jDot[2] = { 0x6a, 0x307 }; +static const UChar iOgonekDot[3] = { 0x12f, 0x307 }; +static const UChar iDotGrave[3] = { 0x69, 0x307, 0x300 }; +static const UChar iDotAcute[3] = { 0x69, 0x307, 0x301 }; +static const UChar iDotTilde[3] = { 0x69, 0x307, 0x303 }; + + +U_CFUNC void U_EXPORT2 +ucase_addCaseClosure(UChar32 c, const USetAdder *sa) { + uint16_t props; + + /* + * Hardcode the case closure of i and its relatives and ignore the + * data file data for these characters. + * The Turkic dotless i and dotted I with their case mapping conditions + * and case folding option make the related characters behave specially. + * This code matches their closure behavior to their case folding behavior. + */ + + switch(c) { + case 0x49: + /* regular i and I are in one equivalence class */ + sa->add(sa->set, 0x69); + return; + case 0x69: + sa->add(sa->set, 0x49); + return; + case 0x130: + /* dotted I is in a class with <0069 0307> (for canonical equivalence with <0049 0307>) */ + sa->addString(sa->set, iDot, 2); + return; + case 0x131: + /* dotless i is in a class by itself */ + return; + default: + /* otherwise use the data file data */ + break; + } + + props=UTRIE2_GET16(&ucase_props_singleton.trie, c); + if(!UCASE_HAS_EXCEPTION(props)) { + if(UCASE_GET_TYPE(props)!=UCASE_NONE) { + /* add the one simple case mapping, no matter what type it is */ + int32_t delta=UCASE_GET_DELTA(props); + if(delta!=0) { + sa->add(sa->set, c+delta); + } + } + } else { + /* + * c has exceptions, so there may be multiple simple and/or + * full case mappings. Add them all. + */ + const uint16_t *pe0, *pe=GET_EXCEPTIONS(&ucase_props_singleton, props); + const UChar *closure; + uint16_t excWord=*pe++; + int32_t idx, closureLength, fullLength, length; + + pe0=pe; + + /* add all simple case mappings */ + for(idx=UCASE_EXC_LOWER; idx<=UCASE_EXC_TITLE; ++idx) { + if(HAS_SLOT(excWord, idx)) { + pe=pe0; + GET_SLOT_VALUE(excWord, idx, pe, c); + sa->add(sa->set, c); + } + } + if(HAS_SLOT(excWord, UCASE_EXC_DELTA)) { + pe=pe0; + int32_t delta; + GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe, delta); + sa->add(sa->set, (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta); + } + + /* get the closure string pointer & length */ + if(HAS_SLOT(excWord, UCASE_EXC_CLOSURE)) { + pe=pe0; + GET_SLOT_VALUE(excWord, UCASE_EXC_CLOSURE, pe, closureLength); + closureLength&=UCASE_CLOSURE_MAX_LENGTH; /* higher bits are reserved */ + closure=(const UChar *)pe+1; /* behind this slot, unless there are full case mappings */ + } else { + closureLength=0; + closure=NULL; + } + + /* add the full case folding */ + if(HAS_SLOT(excWord, UCASE_EXC_FULL_MAPPINGS)) { + pe=pe0; + GET_SLOT_VALUE(excWord, UCASE_EXC_FULL_MAPPINGS, pe, fullLength); + + /* start of full case mapping strings */ + ++pe; + + fullLength&=0xffff; /* bits 16 and higher are reserved */ + + /* skip the lowercase result string */ + pe+=fullLength&UCASE_FULL_LOWER; + fullLength>>=4; + + /* add the full case folding string */ + length=fullLength&0xf; + if(length!=0) { + sa->addString(sa->set, (const UChar *)pe, length); + pe+=length; + } + + /* skip the uppercase and titlecase strings */ + fullLength>>=4; + pe+=fullLength&0xf; + fullLength>>=4; + pe+=fullLength; + + closure=(const UChar *)pe; /* behind full case mappings */ + } + + /* add each code point in the closure string */ + for(idx=0; idx<closureLength;) { + U16_NEXT_UNSAFE(closure, idx, c); + sa->add(sa->set, c); + } + } +} + +/* + * compare s, which has a length, with t, which has a maximum length or is NUL-terminated + * must be length>0 and max>0 and length<=max + */ +static inline int32_t +strcmpMax(const UChar *s, int32_t length, const UChar *t, int32_t max) { + int32_t c1, c2; + + max-=length; /* we require length<=max, so no need to decrement max in the loop */ + do { + c1=*s++; + c2=*t++; + if(c2==0) { + return 1; /* reached the end of t but not of s */ + } + c1-=c2; + if(c1!=0) { + return c1; /* return difference result */ + } + } while(--length>0); + /* ends with length==0 */ + + if(max==0 || *t==0) { + return 0; /* equal to length of both strings */ + } else { + return -max; /* return lengh difference */ + } +} + +U_CFUNC UBool U_EXPORT2 +ucase_addStringCaseClosure(const UChar *s, int32_t length, const USetAdder *sa) { + int32_t i, start, limit, result, unfoldRows, unfoldRowWidth, unfoldStringWidth; + + if(ucase_props_singleton.unfold==NULL || s==NULL) { + return FALSE; /* no reverse case folding data, or no string */ + } + if(length<=1) { + /* the string is too short to find any match */ + /* + * more precise would be: + * if(!u_strHasMoreChar32Than(s, length, 1)) + * but this does not make much practical difference because + * a single supplementary code point would just not be found + */ + return FALSE; + } + + const uint16_t *unfold=ucase_props_singleton.unfold; + unfoldRows=unfold[UCASE_UNFOLD_ROWS]; + unfoldRowWidth=unfold[UCASE_UNFOLD_ROW_WIDTH]; + unfoldStringWidth=unfold[UCASE_UNFOLD_STRING_WIDTH]; + unfold+=unfoldRowWidth; + + if(length>unfoldStringWidth) { + /* the string is too long to find any match */ + return FALSE; + } + + /* do a binary search for the string */ + start=0; + limit=unfoldRows; + while(start<limit) { + i=(start+limit)/2; + const UChar *p=reinterpret_cast<const UChar *>(unfold+(i*unfoldRowWidth)); + result=strcmpMax(s, length, p, unfoldStringWidth); + + if(result==0) { + /* found the string: add each code point, and its case closure */ + UChar32 c; + + for(i=unfoldStringWidth; i<unfoldRowWidth && p[i]!=0;) { + U16_NEXT_UNSAFE(p, i, c); + sa->add(sa->set, c); + ucase_addCaseClosure(c, sa); + } + return TRUE; + } else if(result<0) { + limit=i; + } else /* result>0 */ { + start=i+1; + } + } + + return FALSE; /* string not found */ +} + +U_NAMESPACE_BEGIN + +FullCaseFoldingIterator::FullCaseFoldingIterator() + : unfold(reinterpret_cast<const UChar *>(ucase_props_singleton.unfold)), + unfoldRows(unfold[UCASE_UNFOLD_ROWS]), + unfoldRowWidth(unfold[UCASE_UNFOLD_ROW_WIDTH]), + unfoldStringWidth(unfold[UCASE_UNFOLD_STRING_WIDTH]), + currentRow(0), + rowCpIndex(unfoldStringWidth) { + unfold+=unfoldRowWidth; +} + +UChar32 +FullCaseFoldingIterator::next(UnicodeString &full) { + // Advance past the last-delivered code point. + const UChar *p=unfold+(currentRow*unfoldRowWidth); + if(rowCpIndex>=unfoldRowWidth || p[rowCpIndex]==0) { + ++currentRow; + p+=unfoldRowWidth; + rowCpIndex=unfoldStringWidth; + } + if(currentRow>=unfoldRows) { return U_SENTINEL; } + // Set "full" to the NUL-terminated string in the first unfold column. + int32_t length=unfoldStringWidth; + while(length>0 && p[length-1]==0) { --length; } + full.setTo(FALSE, p, length); + // Return the code point. + UChar32 c; + U16_NEXT_UNSAFE(p, rowCpIndex, c); + return c; +} + +namespace LatinCase { + +const int8_t TO_LOWER_NORMAL[LIMIT] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 0, 32, 32, 32, 32, 32, 32, 32, EXC, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + EXC, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, + + 0, 1, 0, 1, 0, 1, 0, 1, 0, EXC, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, -121, 1, 0, 1, 0, 1, 0, EXC +}; + +const int8_t TO_LOWER_TR_LT[LIMIT] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 32, 32, 32, 32, 32, 32, 32, 32, EXC, EXC, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, EXC, EXC, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 0, 32, 32, 32, 32, 32, 32, 32, EXC, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, EXC, 0, 1, 0, 1, 0, EXC, 0, + EXC, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, + + 0, 1, 0, 1, 0, 1, 0, 1, 0, EXC, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, + 1, 0, 1, 0, 1, 0, 1, 0, -121, 1, 0, 1, 0, 1, 0, EXC +}; + +const int8_t TO_UPPER_NORMAL[LIMIT] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, + -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, EXC, + -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, + -32, -32, -32, -32, -32, -32, -32, 0, -32, -32, -32, -32, -32, -32, -32, 121, + + 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, + 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, + 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, + 0, EXC, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, 0, + + -1, 0, -1, 0, -1, 0, -1, 0, -1, EXC, 0, -1, 0, -1, 0, -1, + 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, + 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, + 0, -1, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, EXC +}; + +const int8_t TO_UPPER_TR[LIMIT] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, -32, -32, -32, -32, -32, -32, -32, -32, EXC, -32, -32, -32, -32, -32, -32, + -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, EXC, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, EXC, + -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, -32, + -32, -32, -32, -32, -32, -32, -32, 0, -32, -32, -32, -32, -32, -32, -32, 121, + + 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, + 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, + 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, + 0, EXC, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, 0, + + -1, 0, -1, 0, -1, 0, -1, 0, -1, EXC, 0, -1, 0, -1, 0, -1, + 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, + 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, + 0, -1, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, EXC +}; + +} // namespace LatinCase + +U_NAMESPACE_END + +/** @return UCASE_NONE, UCASE_LOWER, UCASE_UPPER, UCASE_TITLE */ +U_CAPI int32_t U_EXPORT2 +ucase_getType(UChar32 c) { + uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); + return UCASE_GET_TYPE(props); +} + +/** @return same as ucase_getType() and set bit 2 if c is case-ignorable */ +U_CAPI int32_t U_EXPORT2 +ucase_getTypeOrIgnorable(UChar32 c) { + uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); + return UCASE_GET_TYPE_AND_IGNORABLE(props); +} + +/** @return UCASE_NO_DOT, UCASE_SOFT_DOTTED, UCASE_ABOVE, UCASE_OTHER_ACCENT */ +static inline int32_t +getDotType(UChar32 c) { + uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); + if(!UCASE_HAS_EXCEPTION(props)) { + return props&UCASE_DOT_MASK; + } else { + const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props); + return (*pe>>UCASE_EXC_DOT_SHIFT)&UCASE_DOT_MASK; + } +} + +U_CAPI UBool U_EXPORT2 +ucase_isSoftDotted(UChar32 c) { + return (UBool)(getDotType(c)==UCASE_SOFT_DOTTED); +} + +U_CAPI UBool U_EXPORT2 +ucase_isCaseSensitive(UChar32 c) { + uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); + if(!UCASE_HAS_EXCEPTION(props)) { + return (UBool)((props&UCASE_SENSITIVE)!=0); + } else { + const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props); + return (UBool)((*pe&UCASE_EXC_SENSITIVE)!=0); + } +} + +/* string casing ------------------------------------------------------------ */ + +/* + * These internal functions form the core of string case mappings. + * They map single code points to result code points or strings and take + * all necessary conditions (context, locale ID, options) into account. + * + * They do not iterate over the source or write to the destination + * so that the same functions are useful for non-standard string storage, + * such as in a Replaceable (for Transliterator) or UTF-8/32 strings etc. + * For the same reason, the "surrounding text" context is passed in as a + * UCaseContextIterator which does not make any assumptions about + * the underlying storage. + * + * This section contains helper functions that check for conditions + * in the input text surrounding the current code point + * according to SpecialCasing.txt. + * + * Each helper function gets the index + * - after the current code point if it looks at following text + * - before the current code point if it looks at preceding text + * + * Unicode 3.2 UAX 21 "Case Mappings" defines the conditions as follows: + * + * Final_Sigma + * C is preceded by a sequence consisting of + * a cased letter and a case-ignorable sequence, + * and C is not followed by a sequence consisting of + * an ignorable sequence and then a cased letter. + * + * More_Above + * C is followed by one or more characters of combining class 230 (ABOVE) + * in the combining character sequence. + * + * After_Soft_Dotted + * The last preceding character with combining class of zero before C + * was Soft_Dotted, + * and there is no intervening combining character class 230 (ABOVE). + * + * Before_Dot + * C is followed by combining dot above (U+0307). + * Any sequence of characters with a combining class that is neither 0 nor 230 + * may intervene between the current character and the combining dot above. + * + * The erratum from 2002-10-31 adds the condition + * + * After_I + * The last preceding base character was an uppercase I, and there is no + * intervening combining character class 230 (ABOVE). + * + * (See Jitterbug 2344 and the comments on After_I below.) + * + * Helper definitions in Unicode 3.2 UAX 21: + * + * D1. A character C is defined to be cased + * if it meets any of the following criteria: + * + * - The general category of C is Titlecase Letter (Lt) + * - In [CoreProps], C has one of the properties Uppercase, or Lowercase + * - Given D = NFD(C), then it is not the case that: + * D = UCD_lower(D) = UCD_upper(D) = UCD_title(D) + * (This third criterium does not add any characters to the list + * for Unicode 3.2. Ignored.) + * + * D2. A character C is defined to be case-ignorable + * if it meets either of the following criteria: + * + * - The general category of C is + * Nonspacing Mark (Mn), or Enclosing Mark (Me), or Format Control (Cf), or + * Letter Modifier (Lm), or Symbol Modifier (Sk) + * - C is one of the following characters + * U+0027 APOSTROPHE + * U+00AD SOFT HYPHEN (SHY) + * U+2019 RIGHT SINGLE QUOTATION MARK + * (the preferred character for apostrophe) + * + * D3. A case-ignorable sequence is a sequence of + * zero or more case-ignorable characters. + */ + +#define is_d(c) ((c)=='d' || (c)=='D') +#define is_e(c) ((c)=='e' || (c)=='E') +#define is_i(c) ((c)=='i' || (c)=='I') +#define is_l(c) ((c)=='l' || (c)=='L') +#define is_r(c) ((c)=='r' || (c)=='R') +#define is_t(c) ((c)=='t' || (c)=='T') +#define is_u(c) ((c)=='u' || (c)=='U') +#define is_y(c) ((c)=='y' || (c)=='Y') +#define is_z(c) ((c)=='z' || (c)=='Z') + +/* separator? */ +#define is_sep(c) ((c)=='_' || (c)=='-' || (c)==0) + +/** + * Requires non-NULL locale ID but otherwise does the equivalent of + * checking for language codes as if uloc_getLanguage() were called: + * Accepts both 2- and 3-letter codes and accepts case variants. + */ +U_CFUNC int32_t +ucase_getCaseLocale(const char *locale) { + /* + * This function used to use uloc_getLanguage(), but the current code + * removes the dependency of this low-level code on uloc implementation code + * and is faster because not the whole locale ID has to be + * examined and copied/transformed. + * + * Because this code does not want to depend on uloc, the caller must + * pass in a non-NULL locale, i.e., may need to call uloc_getDefault(). + */ + char c=*locale++; + // Fastpath for English "en" which is often used for default (=root locale) case mappings, + // and for Chinese "zh": Very common but no special case mapping behavior. + // Then check lowercase vs. uppercase to reduce the number of comparisons + // for other locales without special behavior. + if(c=='e') { + /* el or ell? */ + c=*locale++; + if(is_l(c)) { + c=*locale++; + if(is_l(c)) { + c=*locale; + } + if(is_sep(c)) { + return UCASE_LOC_GREEK; + } + } + // en, es, ... -> root + } else if(c=='z') { + return UCASE_LOC_ROOT; +#if U_CHARSET_FAMILY==U_ASCII_FAMILY + } else if(c>='a') { // ASCII a-z = 0x61..0x7a, after A-Z +#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY + } else if(c<='z') { // EBCDIC a-z = 0x81..0xa9 with two gaps, before A-Z +#else +# error Unknown charset family! +#endif + // lowercase c + if(c=='t') { + /* tr or tur? */ + c=*locale++; + if(is_u(c)) { + c=*locale++; + } + if(is_r(c)) { + c=*locale; + if(is_sep(c)) { + return UCASE_LOC_TURKISH; + } + } + } else if(c=='a') { + /* az or aze? */ + c=*locale++; + if(is_z(c)) { + c=*locale++; + if(is_e(c)) { + c=*locale; + } + if(is_sep(c)) { + return UCASE_LOC_TURKISH; + } + } + } else if(c=='l') { + /* lt or lit? */ + c=*locale++; + if(is_i(c)) { + c=*locale++; + } + if(is_t(c)) { + c=*locale; + if(is_sep(c)) { + return UCASE_LOC_LITHUANIAN; + } + } + } else if(c=='n') { + /* nl or nld? */ + c=*locale++; + if(is_l(c)) { + c=*locale++; + if(is_d(c)) { + c=*locale; + } + if(is_sep(c)) { + return UCASE_LOC_DUTCH; + } + } + } else if(c=='h') { + /* hy or hye? *not* hyw */ + c=*locale++; + if(is_y(c)) { + c=*locale++; + if(is_e(c)) { + c=*locale; + } + if(is_sep(c)) { + return UCASE_LOC_ARMENIAN; + } + } + } + } else { + // uppercase c + // Same code as for lowercase c but also check for 'E'. + if(c=='T') { + /* tr or tur? */ + c=*locale++; + if(is_u(c)) { + c=*locale++; + } + if(is_r(c)) { + c=*locale; + if(is_sep(c)) { + return UCASE_LOC_TURKISH; + } + } + } else if(c=='A') { + /* az or aze? */ + c=*locale++; + if(is_z(c)) { + c=*locale++; + if(is_e(c)) { + c=*locale; + } + if(is_sep(c)) { + return UCASE_LOC_TURKISH; + } + } + } else if(c=='L') { + /* lt or lit? */ + c=*locale++; + if(is_i(c)) { + c=*locale++; + } + if(is_t(c)) { + c=*locale; + if(is_sep(c)) { + return UCASE_LOC_LITHUANIAN; + } + } + } else if(c=='E') { + /* el or ell? */ + c=*locale++; + if(is_l(c)) { + c=*locale++; + if(is_l(c)) { + c=*locale; + } + if(is_sep(c)) { + return UCASE_LOC_GREEK; + } + } + } else if(c=='N') { + /* nl or nld? */ + c=*locale++; + if(is_l(c)) { + c=*locale++; + if(is_d(c)) { + c=*locale; + } + if(is_sep(c)) { + return UCASE_LOC_DUTCH; + } + } + } else if(c=='H') { + /* hy or hye? *not* hyw */ + c=*locale++; + if(is_y(c)) { + c=*locale++; + if(is_e(c)) { + c=*locale; + } + if(is_sep(c)) { + return UCASE_LOC_ARMENIAN; + } + } + } + } + return UCASE_LOC_ROOT; +} + +/* + * Is followed by + * {case-ignorable}* cased + * ? + * (dir determines looking forward/backward) + * If a character is case-ignorable, it is skipped regardless of whether + * it is also cased or not. + */ +static UBool +isFollowedByCasedLetter(UCaseContextIterator *iter, void *context, int8_t dir) { + UChar32 c; + + if(iter==NULL) { + return FALSE; + } + + for(/* dir!=0 sets direction */; (c=iter(context, dir))>=0; dir=0) { + int32_t type=ucase_getTypeOrIgnorable(c); + if(type&4) { + /* case-ignorable, continue with the loop */ + } else if(type!=UCASE_NONE) { + return TRUE; /* followed by cased letter */ + } else { + return FALSE; /* uncased and not case-ignorable */ + } + } + + return FALSE; /* not followed by cased letter */ +} + +/* Is preceded by Soft_Dotted character with no intervening cc=230 ? */ +static UBool +isPrecededBySoftDotted(UCaseContextIterator *iter, void *context) { + UChar32 c; + int32_t dotType; + int8_t dir; + + if(iter==NULL) { + return FALSE; + } + + for(dir=-1; (c=iter(context, dir))>=0; dir=0) { + dotType=getDotType(c); + if(dotType==UCASE_SOFT_DOTTED) { + return TRUE; /* preceded by TYPE_i */ + } else if(dotType!=UCASE_OTHER_ACCENT) { + return FALSE; /* preceded by different base character (not TYPE_i), or intervening cc==230 */ + } + } + + return FALSE; /* not preceded by TYPE_i */ +} + +/* + * See Jitterbug 2344: + * The condition After_I for Turkic-lowercasing of U+0307 combining dot above + * is checked in ICU 2.0, 2.1, 2.6 but was not in 2.2 & 2.4 because + * we made those releases compatible with Unicode 3.2 which had not fixed + * a related bug in SpecialCasing.txt. + * + * From the Jitterbug 2344 text: + * ... this bug is listed as a Unicode erratum + * from 2002-10-31 at http://www.unicode.org/uni2errata/UnicodeErrata.html + * <quote> + * There are two errors in SpecialCasing.txt. + * 1. Missing semicolons on two lines. ... [irrelevant for ICU] + * 2. An incorrect context definition. Correct as follows: + * < 0307; ; 0307; 0307; tr After_Soft_Dotted; # COMBINING DOT ABOVE + * < 0307; ; 0307; 0307; az After_Soft_Dotted; # COMBINING DOT ABOVE + * --- + * > 0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE + * > 0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE + * where the context After_I is defined as: + * The last preceding base character was an uppercase I, and there is no + * intervening combining character class 230 (ABOVE). + * </quote> + * + * Note that SpecialCasing.txt even in Unicode 3.2 described the condition as: + * + * # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i. + * # This matches the behavior of the canonically equivalent I-dot_above + * + * See also the description in this place in older versions of uchar.c (revision 1.100). + * + * Markus W. Scherer 2003-feb-15 + */ + +/* Is preceded by base character 'I' with no intervening cc=230 ? */ +static UBool +isPrecededBy_I(UCaseContextIterator *iter, void *context) { + UChar32 c; + int32_t dotType; + int8_t dir; + + if(iter==NULL) { + return FALSE; + } + + for(dir=-1; (c=iter(context, dir))>=0; dir=0) { + if(c==0x49) { + return TRUE; /* preceded by I */ + } + dotType=getDotType(c); + if(dotType!=UCASE_OTHER_ACCENT) { + return FALSE; /* preceded by different base character (not I), or intervening cc==230 */ + } + } + + return FALSE; /* not preceded by I */ +} + +/* Is followed by one or more cc==230 ? */ +static UBool +isFollowedByMoreAbove(UCaseContextIterator *iter, void *context) { + UChar32 c; + int32_t dotType; + int8_t dir; + + if(iter==NULL) { + return FALSE; + } + + for(dir=1; (c=iter(context, dir))>=0; dir=0) { + dotType=getDotType(c); + if(dotType==UCASE_ABOVE) { + return TRUE; /* at least one cc==230 following */ + } else if(dotType!=UCASE_OTHER_ACCENT) { + return FALSE; /* next base character, no more cc==230 following */ + } + } + + return FALSE; /* no more cc==230 following */ +} + +/* Is followed by a dot above (without cc==230 in between) ? */ +static UBool +isFollowedByDotAbove(UCaseContextIterator *iter, void *context) { + UChar32 c; + int32_t dotType; + int8_t dir; + + if(iter==NULL) { + return FALSE; + } + + for(dir=1; (c=iter(context, dir))>=0; dir=0) { + if(c==0x307) { + return TRUE; + } + dotType=getDotType(c); + if(dotType!=UCASE_OTHER_ACCENT) { + return FALSE; /* next base character or cc==230 in between */ + } + } + + return FALSE; /* no dot above following */ +} + +U_CAPI int32_t U_EXPORT2 +ucase_toFullLower(UChar32 c, + UCaseContextIterator *iter, void *context, + const UChar **pString, + int32_t loc) { + // The sign of the result has meaning, input must be non-negative so that it can be returned as is. + U_ASSERT(c >= 0); + UChar32 result=c; + uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); + if(!UCASE_HAS_EXCEPTION(props)) { + if(UCASE_IS_UPPER_OR_TITLE(props)) { + result=c+UCASE_GET_DELTA(props); + } + } else { + const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props), *pe2; + uint16_t excWord=*pe++; + int32_t full; + + pe2=pe; + + if(excWord&UCASE_EXC_CONDITIONAL_SPECIAL) { + /* use hardcoded conditions and mappings */ + + /* + * Test for conditional mappings first + * (otherwise the unconditional default mappings are always taken), + * then test for characters that have unconditional mappings in SpecialCasing.txt, + * then get the UnicodeData.txt mappings. + */ + if( loc==UCASE_LOC_LITHUANIAN && + /* base characters, find accents above */ + (((c==0x49 || c==0x4a || c==0x12e) && + isFollowedByMoreAbove(iter, context)) || + /* precomposed with accent above, no need to find one */ + (c==0xcc || c==0xcd || c==0x128)) + ) { + /* + # Lithuanian + + # Lithuanian retains the dot in a lowercase i when followed by accents. + + # Introduce an explicit dot above when lowercasing capital I's and J's + # whenever there are more accents above. + # (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek) + + 0049; 0069 0307; 0049; 0049; lt More_Above; # LATIN CAPITAL LETTER I + 004A; 006A 0307; 004A; 004A; lt More_Above; # LATIN CAPITAL LETTER J + 012E; 012F 0307; 012E; 012E; lt More_Above; # LATIN CAPITAL LETTER I WITH OGONEK + 00CC; 0069 0307 0300; 00CC; 00CC; lt; # LATIN CAPITAL LETTER I WITH GRAVE + 00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE + 0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE + */ + switch(c) { + case 0x49: /* LATIN CAPITAL LETTER I */ + *pString=iDot; + return 2; + case 0x4a: /* LATIN CAPITAL LETTER J */ + *pString=jDot; + return 2; + case 0x12e: /* LATIN CAPITAL LETTER I WITH OGONEK */ + *pString=iOgonekDot; + return 2; + case 0xcc: /* LATIN CAPITAL LETTER I WITH GRAVE */ + *pString=iDotGrave; + return 3; + case 0xcd: /* LATIN CAPITAL LETTER I WITH ACUTE */ + *pString=iDotAcute; + return 3; + case 0x128: /* LATIN CAPITAL LETTER I WITH TILDE */ + *pString=iDotTilde; + return 3; + default: + return 0; /* will not occur */ + } + /* # Turkish and Azeri */ + } else if(loc==UCASE_LOC_TURKISH && c==0x130) { + /* + # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri + # The following rules handle those cases. + + 0130; 0069; 0130; 0130; tr # LATIN CAPITAL LETTER I WITH DOT ABOVE + 0130; 0069; 0130; 0130; az # LATIN CAPITAL LETTER I WITH DOT ABOVE + */ + return 0x69; + } else if(loc==UCASE_LOC_TURKISH && c==0x307 && isPrecededBy_I(iter, context)) { + /* + # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i. + # This matches the behavior of the canonically equivalent I-dot_above + + 0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE + 0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE + */ + *pString=nullptr; + return 0; /* remove the dot (continue without output) */ + } else if(loc==UCASE_LOC_TURKISH && c==0x49 && !isFollowedByDotAbove(iter, context)) { + /* + # When lowercasing, unless an I is before a dot_above, it turns into a dotless i. + + 0049; 0131; 0049; 0049; tr Not_Before_Dot; # LATIN CAPITAL LETTER I + 0049; 0131; 0049; 0049; az Not_Before_Dot; # LATIN CAPITAL LETTER I + */ + return 0x131; + } else if(c==0x130) { + /* + # Preserve canonical equivalence for I with dot. Turkic is handled below. + + 0130; 0069 0307; 0130; 0130; # LATIN CAPITAL LETTER I WITH DOT ABOVE + */ + *pString=iDot; + return 2; + } else if( c==0x3a3 && + !isFollowedByCasedLetter(iter, context, 1) && + isFollowedByCasedLetter(iter, context, -1) /* -1=preceded */ + ) { + /* greek capital sigma maps depending on surrounding cased letters (see SpecialCasing.txt) */ + /* + # Special case for final form of sigma + + 03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA + */ + return 0x3c2; /* greek small final sigma */ + } else { + /* no known conditional special case mapping, use a normal mapping */ + } + } else if(HAS_SLOT(excWord, UCASE_EXC_FULL_MAPPINGS)) { + GET_SLOT_VALUE(excWord, UCASE_EXC_FULL_MAPPINGS, pe, full); + full&=UCASE_FULL_LOWER; + if(full!=0) { + /* set the output pointer to the lowercase mapping */ + *pString=reinterpret_cast<const UChar *>(pe+1); + + /* return the string length */ + return full; + } + } + + if(HAS_SLOT(excWord, UCASE_EXC_DELTA) && UCASE_IS_UPPER_OR_TITLE(props)) { + int32_t delta; + GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe2, delta); + return (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta; + } + if(HAS_SLOT(excWord, UCASE_EXC_LOWER)) { + GET_SLOT_VALUE(excWord, UCASE_EXC_LOWER, pe2, result); + } + } + + return (result==c) ? ~result : result; +} + +/* internal */ +static int32_t +toUpperOrTitle(UChar32 c, + UCaseContextIterator *iter, void *context, + const UChar **pString, + int32_t loc, + UBool upperNotTitle) { + // The sign of the result has meaning, input must be non-negative so that it can be returned as is. + U_ASSERT(c >= 0); + UChar32 result=c; + uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); + if(!UCASE_HAS_EXCEPTION(props)) { + if(UCASE_GET_TYPE(props)==UCASE_LOWER) { + result=c+UCASE_GET_DELTA(props); + } + } else { + const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props), *pe2; + uint16_t excWord=*pe++; + int32_t full, idx; + + pe2=pe; + + if(excWord&UCASE_EXC_CONDITIONAL_SPECIAL) { + /* use hardcoded conditions and mappings */ + if(loc==UCASE_LOC_TURKISH && c==0x69) { + /* + # Turkish and Azeri + + # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri + # The following rules handle those cases. + + # When uppercasing, i turns into a dotted capital I + + 0069; 0069; 0130; 0130; tr; # LATIN SMALL LETTER I + 0069; 0069; 0130; 0130; az; # LATIN SMALL LETTER I + */ + return 0x130; + } else if(loc==UCASE_LOC_LITHUANIAN && c==0x307 && isPrecededBySoftDotted(iter, context)) { + /* + # Lithuanian + + # Lithuanian retains the dot in a lowercase i when followed by accents. + + # Remove DOT ABOVE after "i" with upper or titlecase + + 0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE + */ + *pString=nullptr; + return 0; /* remove the dot (continue without output) */ + } else if(c==0x0587) { + // See ICU-13416: + // և ligature ech-yiwn + // uppercases to ԵՒ=ech+yiwn by default and in Western Armenian, + // but to ԵՎ=ech+vew in Eastern Armenian. + if(loc==UCASE_LOC_ARMENIAN) { + *pString=upperNotTitle ? u"ԵՎ" : u"Եվ"; + } else { + *pString=upperNotTitle ? u"ԵՒ" : u"Եւ"; + } + return 2; + } else { + /* no known conditional special case mapping, use a normal mapping */ + } + } else if(HAS_SLOT(excWord, UCASE_EXC_FULL_MAPPINGS)) { + GET_SLOT_VALUE(excWord, UCASE_EXC_FULL_MAPPINGS, pe, full); + + /* start of full case mapping strings */ + ++pe; + + /* skip the lowercase and case-folding result strings */ + pe+=full&UCASE_FULL_LOWER; + full>>=4; + pe+=full&0xf; + full>>=4; + + if(upperNotTitle) { + full&=0xf; + } else { + /* skip the uppercase result string */ + pe+=full&0xf; + full=(full>>4)&0xf; + } + + if(full!=0) { + /* set the output pointer to the result string */ + *pString=reinterpret_cast<const UChar *>(pe); + + /* return the string length */ + return full; + } + } + + if(HAS_SLOT(excWord, UCASE_EXC_DELTA) && UCASE_GET_TYPE(props)==UCASE_LOWER) { + int32_t delta; + GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe2, delta); + return (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta; + } + if(!upperNotTitle && HAS_SLOT(excWord, UCASE_EXC_TITLE)) { + idx=UCASE_EXC_TITLE; + } else if(HAS_SLOT(excWord, UCASE_EXC_UPPER)) { + /* here, titlecase is same as uppercase */ + idx=UCASE_EXC_UPPER; + } else { + return ~c; + } + GET_SLOT_VALUE(excWord, idx, pe2, result); + } + + return (result==c) ? ~result : result; +} + +U_CAPI int32_t U_EXPORT2 +ucase_toFullUpper(UChar32 c, + UCaseContextIterator *iter, void *context, + const UChar **pString, + int32_t caseLocale) { + return toUpperOrTitle(c, iter, context, pString, caseLocale, TRUE); +} + +U_CAPI int32_t U_EXPORT2 +ucase_toFullTitle(UChar32 c, + UCaseContextIterator *iter, void *context, + const UChar **pString, + int32_t caseLocale) { + return toUpperOrTitle(c, iter, context, pString, caseLocale, FALSE); +} + +/* case folding ------------------------------------------------------------- */ + +/* + * Case folding is similar to lowercasing. + * The result may be a simple mapping, i.e., a single code point, or + * a full mapping, i.e., a string. + * If the case folding for a code point is the same as its simple (1:1) lowercase mapping, + * then only the lowercase mapping is stored. + * + * Some special cases are hardcoded because their conditions cannot be + * parsed and processed from CaseFolding.txt. + * + * Unicode 3.2 CaseFolding.txt specifies for its status field: + +# C: common case folding, common mappings shared by both simple and full mappings. +# F: full case folding, mappings that cause strings to grow in length. Multiple characters are separated by spaces. +# S: simple case folding, mappings to single characters where different from F. +# T: special case for uppercase I and dotted uppercase I +# - For non-Turkic languages, this mapping is normally not used. +# - For Turkic languages (tr, az), this mapping can be used instead of the normal mapping for these characters. +# +# Usage: +# A. To do a simple case folding, use the mappings with status C + S. +# B. To do a full case folding, use the mappings with status C + F. +# +# The mappings with status T can be used or omitted depending on the desired case-folding +# behavior. (The default option is to exclude them.) + + * Unicode 3.2 has 'T' mappings as follows: + +0049; T; 0131; # LATIN CAPITAL LETTER I +0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE + + * while the default mappings for these code points are: + +0049; C; 0069; # LATIN CAPITAL LETTER I +0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE + + * U+0130 has no simple case folding (simple-case-folds to itself). + */ + +/* return the simple case folding mapping for c */ +U_CAPI UChar32 U_EXPORT2 +ucase_fold(UChar32 c, uint32_t options) { + uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); + if(!UCASE_HAS_EXCEPTION(props)) { + if(UCASE_IS_UPPER_OR_TITLE(props)) { + c+=UCASE_GET_DELTA(props); + } + } else { + const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props); + uint16_t excWord=*pe++; + int32_t idx; + if(excWord&UCASE_EXC_CONDITIONAL_FOLD) { + /* special case folding mappings, hardcoded */ + if((options&_FOLD_CASE_OPTIONS_MASK)==U_FOLD_CASE_DEFAULT) { + /* default mappings */ + if(c==0x49) { + /* 0049; C; 0069; # LATIN CAPITAL LETTER I */ + return 0x69; + } else if(c==0x130) { + /* no simple case folding for U+0130 */ + return c; + } + } else { + /* Turkic mappings */ + if(c==0x49) { + /* 0049; T; 0131; # LATIN CAPITAL LETTER I */ + return 0x131; + } else if(c==0x130) { + /* 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE */ + return 0x69; + } + } + } + if((excWord&UCASE_EXC_NO_SIMPLE_CASE_FOLDING)!=0) { + return c; + } + if(HAS_SLOT(excWord, UCASE_EXC_DELTA) && UCASE_IS_UPPER_OR_TITLE(props)) { + int32_t delta; + GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe, delta); + return (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta; + } + if(HAS_SLOT(excWord, UCASE_EXC_FOLD)) { + idx=UCASE_EXC_FOLD; + } else if(HAS_SLOT(excWord, UCASE_EXC_LOWER)) { + idx=UCASE_EXC_LOWER; + } else { + return c; + } + GET_SLOT_VALUE(excWord, idx, pe, c); + } + return c; +} + +/* + * Issue for canonical caseless match (UAX #21): + * Turkic casefolding (using "T" mappings in CaseFolding.txt) does not preserve + * canonical equivalence, unlike default-option casefolding. + * For example, I-grave and I + grave fold to strings that are not canonically + * equivalent. + * For more details, see the comment in unorm_compare() in unorm.cpp + * and the intermediate prototype changes for Jitterbug 2021. + * (For example, revision 1.104 of uchar.c and 1.4 of CaseFolding.txt.) + * + * This did not get fixed because it appears that it is not possible to fix + * it for uppercase and lowercase characters (I-grave vs. i-grave) + * together in a way that they still fold to common result strings. + */ + +U_CAPI int32_t U_EXPORT2 +ucase_toFullFolding(UChar32 c, + const UChar **pString, + uint32_t options) { + // The sign of the result has meaning, input must be non-negative so that it can be returned as is. + U_ASSERT(c >= 0); + UChar32 result=c; + uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); + if(!UCASE_HAS_EXCEPTION(props)) { + if(UCASE_IS_UPPER_OR_TITLE(props)) { + result=c+UCASE_GET_DELTA(props); + } + } else { + const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props), *pe2; + uint16_t excWord=*pe++; + int32_t full, idx; + + pe2=pe; + + if(excWord&UCASE_EXC_CONDITIONAL_FOLD) { + /* use hardcoded conditions and mappings */ + if((options&_FOLD_CASE_OPTIONS_MASK)==U_FOLD_CASE_DEFAULT) { + /* default mappings */ + if(c==0x49) { + /* 0049; C; 0069; # LATIN CAPITAL LETTER I */ + return 0x69; + } else if(c==0x130) { + /* 0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE */ + *pString=iDot; + return 2; + } + } else { + /* Turkic mappings */ + if(c==0x49) { + /* 0049; T; 0131; # LATIN CAPITAL LETTER I */ + return 0x131; + } else if(c==0x130) { + /* 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE */ + return 0x69; + } + } + } else if(HAS_SLOT(excWord, UCASE_EXC_FULL_MAPPINGS)) { + GET_SLOT_VALUE(excWord, UCASE_EXC_FULL_MAPPINGS, pe, full); + + /* start of full case mapping strings */ + ++pe; + + /* skip the lowercase result string */ + pe+=full&UCASE_FULL_LOWER; + full=(full>>4)&0xf; + + if(full!=0) { + /* set the output pointer to the result string */ + *pString=reinterpret_cast<const UChar *>(pe); + + /* return the string length */ + return full; + } + } + + if((excWord&UCASE_EXC_NO_SIMPLE_CASE_FOLDING)!=0) { + return ~c; + } + if(HAS_SLOT(excWord, UCASE_EXC_DELTA) && UCASE_IS_UPPER_OR_TITLE(props)) { + int32_t delta; + GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe2, delta); + return (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta; + } + if(HAS_SLOT(excWord, UCASE_EXC_FOLD)) { + idx=UCASE_EXC_FOLD; + } else if(HAS_SLOT(excWord, UCASE_EXC_LOWER)) { + idx=UCASE_EXC_LOWER; + } else { + return ~c; + } + GET_SLOT_VALUE(excWord, idx, pe2, result); + } + + return (result==c) ? ~result : result; +} + +/* case mapping properties API ---------------------------------------------- */ + +/* public API (see uchar.h) */ + +U_CAPI UBool U_EXPORT2 +u_isULowercase(UChar32 c) { + return (UBool)(UCASE_LOWER==ucase_getType(c)); +} + +U_CAPI UBool U_EXPORT2 +u_isUUppercase(UChar32 c) { + return (UBool)(UCASE_UPPER==ucase_getType(c)); +} + +/* Transforms the Unicode character to its lower case equivalent.*/ +U_CAPI UChar32 U_EXPORT2 +u_tolower(UChar32 c) { + return ucase_tolower(c); +} + +/* Transforms the Unicode character to its upper case equivalent.*/ +U_CAPI UChar32 U_EXPORT2 +u_toupper(UChar32 c) { + return ucase_toupper(c); +} + +/* Transforms the Unicode character to its title case equivalent.*/ +U_CAPI UChar32 U_EXPORT2 +u_totitle(UChar32 c) { + return ucase_totitle(c); +} + +/* return the simple case folding mapping for c */ +U_CAPI UChar32 U_EXPORT2 +u_foldCase(UChar32 c, uint32_t options) { + return ucase_fold(c, options); +} + +U_CFUNC int32_t U_EXPORT2 +ucase_hasBinaryProperty(UChar32 c, UProperty which) { + /* case mapping properties */ + const UChar *resultString; + switch(which) { + case UCHAR_LOWERCASE: + return (UBool)(UCASE_LOWER==ucase_getType(c)); + case UCHAR_UPPERCASE: + return (UBool)(UCASE_UPPER==ucase_getType(c)); + case UCHAR_SOFT_DOTTED: + return ucase_isSoftDotted(c); + case UCHAR_CASE_SENSITIVE: + return ucase_isCaseSensitive(c); + case UCHAR_CASED: + return (UBool)(UCASE_NONE!=ucase_getType(c)); + case UCHAR_CASE_IGNORABLE: + return (UBool)(ucase_getTypeOrIgnorable(c)>>2); + /* + * Note: The following Changes_When_Xyz are defined as testing whether + * the NFD form of the input changes when Xyz-case-mapped. + * However, this simpler implementation of these properties, + * ignoring NFD, passes the tests. + * The implementation needs to be changed if the tests start failing. + * When that happens, optimizations should be used to work with the + * per-single-code point ucase_toFullXyz() functions unless + * the NFD form has more than one code point, + * and the property starts set needs to be the union of the + * start sets for normalization and case mappings. + */ + case UCHAR_CHANGES_WHEN_LOWERCASED: + return (UBool)(ucase_toFullLower(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0); + case UCHAR_CHANGES_WHEN_UPPERCASED: + return (UBool)(ucase_toFullUpper(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0); + case UCHAR_CHANGES_WHEN_TITLECASED: + return (UBool)(ucase_toFullTitle(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0); + /* case UCHAR_CHANGES_WHEN_CASEFOLDED: -- in uprops.c */ + case UCHAR_CHANGES_WHEN_CASEMAPPED: + return (UBool)( + ucase_toFullLower(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0 || + ucase_toFullUpper(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0 || + ucase_toFullTitle(c, NULL, NULL, &resultString, UCASE_LOC_ROOT)>=0); + default: + return FALSE; + } +} diff --git a/thirdparty/icu4c/common/ucase.h b/thirdparty/icu4c/common/ucase.h new file mode 100644 index 0000000000..a018f82b81 --- /dev/null +++ b/thirdparty/icu4c/common/ucase.h @@ -0,0 +1,445 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2004-2012, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: ucase.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2004aug30 +* created by: Markus W. Scherer +* +* Low-level Unicode character/string case mapping code. +*/ + +#ifndef __UCASE_H__ +#define __UCASE_H__ + +#include "unicode/utypes.h" +#include "unicode/uset.h" +#include "putilimp.h" +#include "uset_imp.h" +#include "udataswp.h" +#include "utrie2.h" + +#ifdef __cplusplus +U_NAMESPACE_BEGIN + +class UnicodeString; + +U_NAMESPACE_END +#endif + +/* library API -------------------------------------------------------------- */ + +U_CFUNC void U_EXPORT2 +ucase_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode); + +/** + * Requires non-NULL locale ID but otherwise does the equivalent of + * checking for language codes as if uloc_getLanguage() were called: + * Accepts both 2- and 3-letter codes and accepts case variants. + */ +U_CFUNC int32_t +ucase_getCaseLocale(const char *locale); + +/* Casing locale types for ucase_getCaseLocale */ +enum { + UCASE_LOC_UNKNOWN, + UCASE_LOC_ROOT, + UCASE_LOC_TURKISH, + UCASE_LOC_LITHUANIAN, + UCASE_LOC_GREEK, + UCASE_LOC_DUTCH, + UCASE_LOC_ARMENIAN +}; + +/** + * Bit mask for getting just the options from a string compare options word + * that are relevant for case-insensitive string comparison. + * See stringoptions.h. Also include _STRNCMP_STYLE and U_COMPARE_CODE_POINT_ORDER. + * @internal + */ +#define _STRCASECMP_OPTIONS_MASK 0xffff + +/** + * Bit mask for getting just the options from a string compare options word + * that are relevant for case folding (of a single string or code point). + * + * Currently only bit 0 for U_FOLD_CASE_EXCLUDE_SPECIAL_I. + * It is conceivable that at some point we might use one more bit for using uppercase sharp s. + * It is conceivable that at some point we might want the option to use only simple case foldings + * when operating on strings. + * + * See stringoptions.h. + * @internal + */ +#define _FOLD_CASE_OPTIONS_MASK 7 + +/* single-code point functions */ + +U_CAPI UChar32 U_EXPORT2 +ucase_tolower(UChar32 c); + +U_CAPI UChar32 U_EXPORT2 +ucase_toupper(UChar32 c); + +U_CAPI UChar32 U_EXPORT2 +ucase_totitle(UChar32 c); + +U_CAPI UChar32 U_EXPORT2 +ucase_fold(UChar32 c, uint32_t options); + +/** + * Adds all simple case mappings and the full case folding for c to sa, + * and also adds special case closure mappings. + * c itself is not added. + * For example, the mappings + * - for s include long s + * - for sharp s include ss + * - for k include the Kelvin sign + */ +U_CFUNC void U_EXPORT2 +ucase_addCaseClosure(UChar32 c, const USetAdder *sa); + +/** + * Maps the string to single code points and adds the associated case closure + * mappings. + * The string is mapped to code points if it is their full case folding string. + * In other words, this performs a reverse full case folding and then + * adds the case closure items of the resulting code points. + * If the string is found and its closure applied, then + * the string itself is added as well as part of its code points' closure. + * It must be length>=0. + * + * @return true if the string was found + */ +U_CFUNC UBool U_EXPORT2 +ucase_addStringCaseClosure(const UChar *s, int32_t length, const USetAdder *sa); + +#ifdef __cplusplus +U_NAMESPACE_BEGIN + +/** + * Iterator over characters with more than one code point in the full default Case_Folding. + */ +class U_COMMON_API FullCaseFoldingIterator { +public: + /** Constructor. */ + FullCaseFoldingIterator(); + /** + * Returns the next (cp, full) pair where "full" is cp's full default Case_Folding. + * Returns a negative cp value at the end of the iteration. + */ + UChar32 next(UnicodeString &full); +private: + FullCaseFoldingIterator(const FullCaseFoldingIterator &); // no copy + FullCaseFoldingIterator &operator=(const FullCaseFoldingIterator &); // no assignment + + const UChar *unfold; + int32_t unfoldRows; + int32_t unfoldRowWidth; + int32_t unfoldStringWidth; + int32_t currentRow; + int32_t rowCpIndex; +}; + +/** + * Fast case mapping data for ASCII/Latin. + * Linear arrays of delta bytes: 0=no mapping; EXC=exception. + * Deltas must not cross the ASCII boundary, or else they cannot be easily used + * in simple UTF-8 code. + */ +namespace LatinCase { + +/** Case mapping/folding data for code points up to U+017F. */ +constexpr UChar LIMIT = 0x180; +/** U+017F case-folds and uppercases crossing the ASCII boundary. */ +constexpr UChar LONG_S = 0x17f; +/** Exception: Complex mapping, or too-large delta. */ +constexpr int8_t EXC = -0x80; + +/** Deltas for lowercasing for most locales, and default case folding. */ +extern const int8_t TO_LOWER_NORMAL[LIMIT]; +/** Deltas for lowercasing for tr/az/lt, and Turkic case folding. */ +extern const int8_t TO_LOWER_TR_LT[LIMIT]; + +/** Deltas for uppercasing for most locales. */ +extern const int8_t TO_UPPER_NORMAL[LIMIT]; +/** Deltas for uppercasing for tr/az. */ +extern const int8_t TO_UPPER_TR[LIMIT]; + +} // namespace LatinCase + +U_NAMESPACE_END +#endif + +/** @return UCASE_NONE, UCASE_LOWER, UCASE_UPPER, UCASE_TITLE */ +U_CAPI int32_t U_EXPORT2 +ucase_getType(UChar32 c); + +/** @return like ucase_getType() but also sets UCASE_IGNORABLE if c is case-ignorable */ +U_CAPI int32_t U_EXPORT2 +ucase_getTypeOrIgnorable(UChar32 c); + +U_CAPI UBool U_EXPORT2 +ucase_isSoftDotted(UChar32 c); + +U_CAPI UBool U_EXPORT2 +ucase_isCaseSensitive(UChar32 c); + +/* string case mapping functions */ + +U_CDECL_BEGIN + +/** + * Iterator function for string case mappings, which need to look at the + * context (surrounding text) of a given character for conditional mappings. + * + * The iterator only needs to go backward or forward away from the + * character in question. It does not use any indexes on this interface. + * It does not support random access or an arbitrary change of + * iteration direction. + * + * The code point being case-mapped itself is never returned by + * this iterator. + * + * @param context A pointer to the iterator's working data. + * @param dir If <0 then start iterating backward from the character; + * if >0 then start iterating forward from the character; + * if 0 then continue iterating in the current direction. + * @return Next code point, or <0 when the iteration is done. + */ +typedef UChar32 U_CALLCONV +UCaseContextIterator(void *context, int8_t dir); + +/** + * Sample struct which may be used by some implementations of + * UCaseContextIterator. + */ +struct UCaseContext { + void *p; + int32_t start, index, limit; + int32_t cpStart, cpLimit; + int8_t dir; + int8_t b1, b2, b3; +}; +typedef struct UCaseContext UCaseContext; + +U_CDECL_END + +#define UCASECONTEXT_INITIALIZER { NULL, 0, 0, 0, 0, 0, 0, 0, 0, 0 } + +enum { + /** + * For string case mappings, a single character (a code point) is mapped + * either to itself (in which case in-place mapping functions do nothing), + * or to another single code point, or to a string. + * Aside from the string contents, these are indicated with a single int32_t + * value as follows: + * + * Mapping to self: Negative values (~self instead of -self to support U+0000) + * + * Mapping to another code point: Positive values >UCASE_MAX_STRING_LENGTH + * + * Mapping to a string: The string length (0..UCASE_MAX_STRING_LENGTH) is + * returned. Note that the string result may indeed have zero length. + */ + UCASE_MAX_STRING_LENGTH=0x1f +}; + +/** + * Get the full lowercase mapping for c. + * + * @param csp Case mapping properties. + * @param c Character to be mapped. + * @param iter Character iterator, used for context-sensitive mappings. + * See UCaseContextIterator for details. + * If iter==NULL then a context-independent result is returned. + * @param context Pointer to be passed into iter. + * @param pString If the mapping result is a string, then the pointer is + * written to *pString. + * @param caseLocale Case locale value from ucase_getCaseLocale(). + * @return Output code point or string length, see UCASE_MAX_STRING_LENGTH. + * + * @see UCaseContextIterator + * @see UCASE_MAX_STRING_LENGTH + * @internal + */ +U_CAPI int32_t U_EXPORT2 +ucase_toFullLower(UChar32 c, + UCaseContextIterator *iter, void *context, + const UChar **pString, + int32_t caseLocale); + +U_CAPI int32_t U_EXPORT2 +ucase_toFullUpper(UChar32 c, + UCaseContextIterator *iter, void *context, + const UChar **pString, + int32_t caseLocale); + +U_CAPI int32_t U_EXPORT2 +ucase_toFullTitle(UChar32 c, + UCaseContextIterator *iter, void *context, + const UChar **pString, + int32_t caseLocale); + +U_CAPI int32_t U_EXPORT2 +ucase_toFullFolding(UChar32 c, + const UChar **pString, + uint32_t options); + +U_CFUNC int32_t U_EXPORT2 +ucase_hasBinaryProperty(UChar32 c, UProperty which); + + +U_CDECL_BEGIN + +/** + * @internal + */ +typedef int32_t U_CALLCONV +UCaseMapFull(UChar32 c, + UCaseContextIterator *iter, void *context, + const UChar **pString, + int32_t caseLocale); + +U_CDECL_END + +/* file definitions --------------------------------------------------------- */ + +#define UCASE_DATA_NAME "ucase" +#define UCASE_DATA_TYPE "icu" + +/* format "cAsE" */ +#define UCASE_FMT_0 0x63 +#define UCASE_FMT_1 0x41 +#define UCASE_FMT_2 0x53 +#define UCASE_FMT_3 0x45 + +/* indexes into indexes[] */ +enum { + UCASE_IX_INDEX_TOP, + UCASE_IX_LENGTH, + UCASE_IX_TRIE_SIZE, + UCASE_IX_EXC_LENGTH, + UCASE_IX_UNFOLD_LENGTH, + + UCASE_IX_MAX_FULL_LENGTH=15, + UCASE_IX_TOP=16 +}; + +/* definitions for 16-bit case properties word ------------------------------ */ + +U_CFUNC const UTrie2 * U_EXPORT2 +ucase_getTrie(); + +/* 2-bit constants for types of cased characters */ +#define UCASE_TYPE_MASK 3 +enum { + UCASE_NONE, + UCASE_LOWER, + UCASE_UPPER, + UCASE_TITLE +}; + +#define UCASE_GET_TYPE(props) ((props)&UCASE_TYPE_MASK) +#define UCASE_GET_TYPE_AND_IGNORABLE(props) ((props)&7) + +#define UCASE_IS_UPPER_OR_TITLE(props) ((props)&2) + +#define UCASE_IGNORABLE 4 +#define UCASE_EXCEPTION 8 +#define UCASE_SENSITIVE 0x10 + +#define UCASE_HAS_EXCEPTION(props) ((props)&UCASE_EXCEPTION) + +#define UCASE_DOT_MASK 0x60 +enum { + UCASE_NO_DOT=0, /* normal characters with cc=0 */ + UCASE_SOFT_DOTTED=0x20, /* soft-dotted characters with cc=0 */ + UCASE_ABOVE=0x40, /* "above" accents with cc=230 */ + UCASE_OTHER_ACCENT=0x60 /* other accent character (0<cc!=230) */ +}; + +/* no exception: bits 15..7 are a 9-bit signed case mapping delta */ +#define UCASE_DELTA_SHIFT 7 +#define UCASE_DELTA_MASK 0xff80 +#define UCASE_MAX_DELTA 0xff +#define UCASE_MIN_DELTA (-UCASE_MAX_DELTA-1) + +#if U_SIGNED_RIGHT_SHIFT_IS_ARITHMETIC +# define UCASE_GET_DELTA(props) ((int16_t)(props)>>UCASE_DELTA_SHIFT) +#else +# define UCASE_GET_DELTA(props) (int16_t)(((props)&0x8000) ? (((props)>>UCASE_DELTA_SHIFT)|0xfe00) : ((uint16_t)(props)>>UCASE_DELTA_SHIFT)) +#endif + +/* exception: bits 15..4 are an unsigned 12-bit index into the exceptions array */ +#define UCASE_EXC_SHIFT 4 +#define UCASE_EXC_MASK 0xfff0 +#define UCASE_MAX_EXCEPTIONS ((UCASE_EXC_MASK>>UCASE_EXC_SHIFT)+1) + +/* definitions for 16-bit main exceptions word ------------------------------ */ + +/* first 8 bits indicate values in optional slots */ +enum { + UCASE_EXC_LOWER, + UCASE_EXC_FOLD, + UCASE_EXC_UPPER, + UCASE_EXC_TITLE, + UCASE_EXC_DELTA, + UCASE_EXC_5, /* reserved */ + UCASE_EXC_CLOSURE, + UCASE_EXC_FULL_MAPPINGS, + UCASE_EXC_ALL_SLOTS /* one past the last slot */ +}; + +/* each slot is 2 uint16_t instead of 1 */ +#define UCASE_EXC_DOUBLE_SLOTS 0x100 + +enum { + UCASE_EXC_NO_SIMPLE_CASE_FOLDING=0x200, + UCASE_EXC_DELTA_IS_NEGATIVE=0x400, + UCASE_EXC_SENSITIVE=0x800 +}; + +/* UCASE_EXC_DOT_MASK=UCASE_DOT_MASK<<UCASE_EXC_DOT_SHIFT */ +#define UCASE_EXC_DOT_SHIFT 7 + +/* normally stored in the main word, but pushed out for larger exception indexes */ +#define UCASE_EXC_DOT_MASK 0x3000 +enum { + UCASE_EXC_NO_DOT=0, + UCASE_EXC_SOFT_DOTTED=0x1000, + UCASE_EXC_ABOVE=0x2000, /* "above" accents with cc=230 */ + UCASE_EXC_OTHER_ACCENT=0x3000 /* other character (0<cc!=230) */ +}; + +/* complex/conditional mappings */ +#define UCASE_EXC_CONDITIONAL_SPECIAL 0x4000 +#define UCASE_EXC_CONDITIONAL_FOLD 0x8000 + +/* definitions for lengths word for full case mappings */ +#define UCASE_FULL_LOWER 0xf +#define UCASE_FULL_FOLDING 0xf0 +#define UCASE_FULL_UPPER 0xf00 +#define UCASE_FULL_TITLE 0xf000 + +/* maximum lengths */ +#define UCASE_FULL_MAPPINGS_MAX_LENGTH (4*0xf) +#define UCASE_CLOSURE_MAX_LENGTH 0xf + +/* constants for reverse case folding ("unfold") data */ +enum { + UCASE_UNFOLD_ROWS, + UCASE_UNFOLD_ROW_WIDTH, + UCASE_UNFOLD_STRING_WIDTH +}; + +#endif diff --git a/thirdparty/icu4c/common/ucase_props_data.h b/thirdparty/icu4c/common/ucase_props_data.h new file mode 100644 index 0000000000..aead6d58d1 --- /dev/null +++ b/thirdparty/icu4c/common/ucase_props_data.h @@ -0,0 +1,951 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +// +// Copyright (C) 1999-2016, International Business Machines +// Corporation and others. All Rights Reserved. +// +// file name: ucase_props_data.h +// +// machine-generated by: icu/tools/unicode/c/genprops/casepropsbuilder.cpp + + +#ifdef INCLUDED_FROM_UCASE_CPP + +static const UVersionInfo ucase_props_dataVersion={0xd,0,0,0}; + +static const int32_t ucase_props_indexes[UCASE_IX_TOP]={0x10,0x70c2,0x6098,0x683,0x172,0,0,0,0,0,0,0,0,0,0,3}; + +static const uint16_t ucase_props_trieIndex[12356]={ +0x336,0x33e,0x346,0x34e,0x35c,0x364,0x36c,0x374,0x37c,0x384,0x38b,0x393,0x39b,0x3a3,0x3ab,0x3b3, +0x3b9,0x3c1,0x3c9,0x3d1,0x3d9,0x3e1,0x3e9,0x3f1,0x3f9,0x401,0x409,0x411,0x419,0x421,0x429,0x431, +0x439,0x441,0x449,0x451,0x459,0x461,0x469,0x471,0x46d,0x475,0x47a,0x482,0x489,0x491,0x499,0x4a1, +0x4a9,0x4b1,0x4b9,0x4c1,0x355,0x35d,0x4c6,0x4ce,0x4d3,0x4db,0x4e3,0x4eb,0x4ea,0x4f2,0x4f7,0x4ff, +0x507,0x50e,0x512,0x355,0x355,0x355,0x519,0x521,0x529,0x52b,0x533,0x53b,0x53f,0x540,0x548,0x550, +0x558,0x540,0x560,0x565,0x558,0x540,0x56d,0x575,0x53f,0x57d,0x585,0x58d,0x595,0x355,0x59d,0x355, +0x5a5,0x4ec,0x5ad,0x58d,0x53f,0x57d,0x5b4,0x58d,0x5bc,0x5be,0x548,0x58d,0x53f,0x355,0x5c6,0x355, +0x355,0x5cc,0x5d3,0x355,0x355,0x5d7,0x5df,0x355,0x5e3,0x5ea,0x355,0x5f1,0x5f9,0x600,0x608,0x355, +0x355,0x60d,0x615,0x61d,0x625,0x62d,0x634,0x63c,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x644,0x355,0x355,0x654,0x654,0x64c, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x65c,0x65c,0x54c,0x54c,0x355,0x662,0x66a,0x355, +0x672,0x355,0x67a,0x355,0x681,0x687,0x355,0x355,0x355,0x68f,0x355,0x355,0x355,0x355,0x355,0x355, +0x696,0x355,0x69d,0x6a5,0x355,0x6ad,0x6b5,0x355,0x57c,0x6b8,0x6c0,0x6c6,0x5bc,0x6ce,0x355,0x6d5, +0x355,0x6da,0x355,0x6e0,0x6e8,0x6ec,0x6f4,0x6fc,0x704,0x709,0x70c,0x714,0x724,0x71c,0x734,0x72c, +0x37c,0x73c,0x37c,0x744,0x747,0x37c,0x74f,0x37c,0x757,0x75f,0x767,0x76f,0x777,0x77f,0x787,0x78f, +0x797,0x79e,0x355,0x7a6,0x7ae,0x355,0x7b6,0x7be,0x7c6,0x7ce,0x7d6,0x7de,0x7e6,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x7e9,0x7ef,0x7f5,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x7fd,0x802,0x806,0x80e,0x37c,0x37c,0x37c,0x816,0x81e,0x825,0x355,0x82a,0x355,0x355,0x355,0x832, +0x355,0x677,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x53e,0x83a,0x355,0x355,0x841,0x355,0x355,0x849,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x851,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x6e0,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x857,0x355,0x85f,0x864,0x86c,0x355,0x355,0x874,0x87c,0x884,0x37c,0x889,0x891,0x897,0x89f,0x8a2, +0x8aa,0x8b1,0x355,0x355,0x355,0x355,0x8b8,0x8c0,0x355,0x8c8,0x8cf,0x355,0x529,0x8d4,0x8dc,0x681, +0x355,0x8e2,0x8ea,0x8ee,0x355,0x8f6,0x8fe,0x906,0x355,0x90c,0x910,0x918,0x928,0x920,0x355,0x930, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x938,0x355,0x355,0x355,0x355,0x940,0x5bc,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x945,0x94d,0x951,0x355,0x355,0x355,0x355,0x338,0x33e,0x959,0x961,0x968,0x4ec,0x355,0x355,0x970, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0xd58,0xd58,0xd70,0xdb0,0xdf0,0xe2c,0xe6c,0xeac,0xee4,0xf24,0xf64,0xfa4,0xfe4,0x1024,0x1064,0x10a4, +0x10e4,0x1124,0x1164,0x11a4,0x11b4,0x11e8,0x1224,0x1264,0x12a4,0x12e4,0xd54,0x1318,0x134c,0x138c,0x13a8,0x13dc, +0x9e1,0xa11,0xa51,0xa90,0x188,0x188,0xac8,0x188,0x188,0x188,0x188,0x188,0x188,0xaf1,0x188,0x188, +0x188,0x188,0x188,0x188,0x188,0x188,0x188,0xb31,0x188,0x188,0xb66,0xba5,0xbe5,0xc1f,0xc56,0x188, +0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188, +0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188, +0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188, +0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188, +0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188, +0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188, +0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188, +0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188, +0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188, +0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188, +0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188, +0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188, +0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188, +0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188, +0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188, +0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188, +0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188, +0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188, +0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188, +0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188, +0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188, +0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188, +0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188, +0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188, +0xc96,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x977,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x6b5,0x355,0x355,0x355,0x97f,0x355,0x355,0x355, +0x355,0x987,0x98d,0x991,0x355,0x355,0x995,0x999,0x99f,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x9a7,0x9ab,0x355,0x355,0x355,0x355,0x355,0x9b3,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x9bb,0x9bf,0x9c7,0x9cb,0x355,0x9d2,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x9d8,0x355,0x355,0x355,0x355,0x9df,0x355,0x355,0x355,0x355, +0x355,0x53f,0x9e4,0x9eb,0x5bd,0x5bc,0x9ef,0x53c,0x355,0x9f7,0x9fe,0x355,0xa04,0x5bc,0xa09,0xa11, +0x355,0x355,0xa16,0x355,0x355,0x355,0x355,0x338,0xa1e,0x5bc,0x5be,0xa26,0xa2d,0x355,0x355,0x355, +0x355,0x355,0x9e4,0xa35,0x355,0x355,0xa3d,0xa45,0x355,0x355,0x355,0x355,0x355,0x355,0xa49,0xa51, +0x355,0x355,0xa59,0x4b0,0x355,0x355,0xa61,0x355,0x355,0xa67,0xa6f,0x355,0x355,0x355,0x355,0x355, +0x355,0xa74,0x355,0x355,0x355,0xa7c,0xa84,0x355,0x355,0xa8c,0xa94,0x355,0x355,0x355,0xa97,0x6b5, +0xa9f,0xaa3,0xaab,0x355,0xab2,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0xab9,0x355,0x355,0x940,0xac1,0x355,0x355,0x355,0xac7,0xacf,0x355,0xad3,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0xad9,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0xadf,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0xae6,0x355,0xaec,0x57c,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0xa7c,0xa84,0x355,0x355,0x355,0x355,0x355,0x355,0x677,0x355,0xaf2,0x355,0x355, +0xafa,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0xaff,0x57c,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0xb07,0xb0f,0xb15,0x355,0x355,0x355,0x355,0xb1d,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0xb25,0xb2d,0xb32,0xb38,0xb40,0xb48,0xb50,0xb29,0xb58,0xb60, +0xb68,0xb6f,0xb2a,0xb25,0xb2d,0xb28,0xb38,0xb2b,0xb26,0xb77,0xb29,0xb7f,0xb87,0xb8f,0xb96,0xb82, +0xb8a,0xb92,0xb99,0xb85,0xba1,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x87c,0xba9,0x87c,0xbb0,0xbb7,0xbbf,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0xbc7,0xbcf,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0xbd3,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x9d0,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0xbdb,0x355,0xbe3,0xbeb,0xbf2,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0xb21, +0xbfa,0xbfa,0xc00,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x9f9,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x53f,0x87c,0x87c,0x87c,0x355,0x355,0x355,0x355,0x87c,0x87c, +0x87c,0x87c,0x87c,0x87c,0x87c,0xc08,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355,0x355, +0x355,0x355,0x355,0x355,0x355,0x355,0x335,0x335,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4, +0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0, +0,0,4,0,0,0,0,0,0,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012, +0x1012,0xa,0x5a,0x7a,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0xba,0x1012,0x1012,0x1012,0x1012, +0x1012,0x1012,0x1012,0,0,0,4,0,4,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011, +0xf011,0xf9,0xf031,0x149,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0x189,0xf011,0xf011,0xf011,0xf011, +0xf011,0xf011,0xf011,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,4,0,1,0,0,4,0,4, +0,0,0,0,4,0x1c9,0,4,4,0,1,0,0,0,0,0, +0x1012,0x1012,0x1012,0x1012,0x1012,0x1fa,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x5a,0x5a,0x1012,0x1012, +0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x239, +0xf011,0xf011,0xf011,0xf011,0xf011,0x2d9,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011, +0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0x3c91, +0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91, +0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91, +0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x31a,0xff91,0x92,0xff91,0x92,0xff91,0x31a,0xffb1, +0x33a,0x389,0x92,0xff91,0x92,0xff91,0x92,0xff91,1,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92, +0xff91,0x92,0xff91,0x92,0xff91,0x3d9,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91, +0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91, +0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91, +0x92,0xff91,0x92,0xff91,0xc392,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x459,0x6191,0x6912,0x92,0xff91, +0x92,0xff91,0x6712,0x92,0xff91,0x6692,0x6692,0x92,0xff91,1,0x2792,0x6512,0x6592,0x92,0xff91,0x6692, +0x6792,0x3091,0x6992,0x6892,0x92,0xff91,0x5191,1,0x6992,0x6a92,0x4111,0x6b12,0x92,0xff91,0x92,0xff91, +0x92,0xff91,0x6d12,0x92,0xff91,0x6d12,1,1,0x92,0xff91,0x6d12,0x92,0xff91,0x6c92,0x6c92,0x92, +0xff91,0x92,0xff91,0x6d92,0x92,0xff91,1,0,0x92,0xff91,1,0x1c11,0,0,0,0, +0x48a,0x4bb,0x4f9,0x52a,0x55b,0x599,0x5ca,0x5fb,0x639,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92, +0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0xd891,0x92,0xff91,0x92,0xff91,0x92,0xff91, +0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x669,0x6ea,0x71b,0x759, +0x92,0xff91,0xcf92,0xe412,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91, +0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91, +0x92,0xff91,0x92,0xff91,0xbf12,1,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91, +0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,1,1,1,1,1,1,0x78a,0x92, +0xff91,0xae92,0x7aa,0x7c9,0x7c9,0x92,0xff91,0x9e92,0x2292,0x2392,0x92,0xff91,0x92,0xffb1,0x92,0xff91, +0x92,0xff91,0x92,0xff91,0x7e9,0x809,0x829,0x9711,0x9911,1,0x9991,0x9991,1,0x9b11,1,0x9a91, +0x849,1,1,1,0x9991,0x869,1,0x9891,1,0x889,0x8a9,1,0x97b1,0x9691,0x8a9,0x8c9, +0x8e9,1,1,0x9691,1,0x909,0x9591,1,1,0x9511,1,1,1,1,1,1, +1,0x929,1,1,0x9311,1,0x949,0x9311,1,1,1,0x969,0x9311,0xdd91,0x9391,0x9391, +0xdc91,1,1,1,1,1,0x9291,1,0,1,1,1,1,1,1,1, +1,0x989,0x9a9,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,5,5,0x25,5,5,5,5,5,5,4,4,4, +0x14,4,0x14,4,5,5,4,4,4,4,4,4,4,4,4,4, +4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, +4,4,4,4,5,5,5,5,5,4,4,4,4,4,4,4, +4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, +4,4,4,4,0x54,0x54,0x44,0x44,0x44,0x44,0x44,0x9cc,0x54,0x44,0x54,0x44, +0x54,0x44,0x44,0x44,0x44,0x44,0x44,0x54,0x44,0x64,0x64,0x64,0x64,0x64,0x64,0x64, +0x64,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0x64, +0x64,0x64,0x64,0x64,0x64,0x74,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0x64, +0x64,0x44,0x44,0x44,0x44,0x44,0x54,0x44,0x44,0x9dd,0x44,0x64,0x64,0x64,0x44,0x44, +0x44,0x64,0x64,4,0x44,0x44,0x44,0x64,0x64,0x64,0x64,0x44,0x64,0x64,0x64,0x44, +0x64,0x64,0x64,0x64,0x64,0x64,0x64,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44, +0x44,0x44,0x44,0x44,0x92,0xff91,0x92,0xff91,4,4,0x92,0xff91,0,0,5,0x4111, +0x4111,0x4111,0,0x3a12,0,0,0,0,4,4,0x1312,4,0x1292,0x1292,0x1292,0, +0x2012,0,0x1f92,0x1f92,0xa29,0x1012,0xafa,0x1012,0x1012,0xb3a,0x1012,0x1012,0xb7a,0xbca,0xc1a,0x1012, +0xc5a,0x1012,0x1012,0x1012,0xc9a,0xcda,0,0xd1a,0x1012,0x1012,0xd5a,0x1012,0x1012,0xd9a,0x1012,0x1012, +0xed11,0xed91,0xed91,0xed91,0xdd9,0xf011,0xea9,0xf011,0xf011,0xee9,0xf011,0xf011,0xf29,0xf79,0xfc9,0xf011, +0x1009,0xf011,0xf011,0xf011,0x1049,0x1089,0x10c9,0x10f9,0xf011,0xf011,0x1139,0xf011,0xf011,0x1179,0xf011,0xf011, +0xe011,0xe091,0xe091,0x412,0x11b9,0x11e9,2,2,2,0x1239,0x1269,0xfc11,0x92,0xff91,0x92,0xff91, +0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91, +0x92,0xff91,0x92,0xff91,0x1299,0x12c9,0x391,0xc631,0x12fa,0x1349,0,0x92,0xff91,0xfc92,0x92,0xff91, +1,0xbf12,0xbf12,0xbf12,0x2812,0x2812,0x2812,0x2812,0x2812,0x2812,0x2812,0x2812,0x2812,0x2812,0x2812,0x2812, +0x2812,0x2812,0x2812,0x2812,0x1012,0x1012,0x137a,0x1012,0x13ba,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012, +0x1012,0x1012,0x13fa,0x1012,0x1012,0x143a,0x147a,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x14ca,0x1012, +0x1012,0x1012,0x1012,0x1012,0xf011,0xf011,0x1509,0xf011,0x1549,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011, +0xf011,0xf011,0x1589,0xf011,0xf011,0x15c9,0x1609,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0x1659,0xf011, +0xf011,0xf011,0xf011,0xf011,0xd811,0xd811,0xd811,0xd811,0xd811,0xd811,0xd831,0xd811,0xd831,0xd811,0xd811,0xd811, +0xd811,0xd811,0xd811,0xd811,0x92,0xff91,0x169a,0x16d9,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91, +0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91, +0x92,0xff91,0x92,0xff91,0x92,0xff91,0,0x44,0x44,0x44,0x44,0x44,4,4,0x92,0xff91, +0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91, +0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91, +0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91, +0x92,0xff91,0x92,0xff91,0x792,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92, +0xff91,0x92,0xff91,0xf891,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91, +0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91, +0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91, +0x92,0xff91,0x92,0xff91,0,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812, +0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0, +0,4,0,0,0,0,0,4,1,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811, +0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811, +0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0x1719,1,0,0,0, +0,0,0,0,0,0x64,0x44,0x44,0x44,0x44,0x64,0x44,0x44,0x44,0x64,0x64, +0x44,0x44,0x44,0x44,0x44,0x44,0x64,0x64,0x64,0x64,0x64,0x64,0x44,0x44,0x64,0x44, +0x44,0x64,0x64,0x44,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0x64, +0x64,0x64,0,0x64,0,0x64,0x64,0,0x44,0x64,0,0x64,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0, +0,0,0,0,4,4,4,4,4,4,0,0,0,0,0,0, +0,0,0,0,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x64,0x64,0x64,0, +4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0x64, +0x64,0x64,0x64,0x64,0x64,0x64,0x64,0x44,0x44,0x64,0x64,0x44,0x44,0x44,0x44,0x44, +0x64,0x44,0x44,0x64,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0x64,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x44,0x44, +0x44,0x44,0x44,0x44,0x44,4,0,0x44,0x44,0x44,0x44,0x64,0x44,4,4,0x44, +0x44,0,0x64,0x44,0x44,0x64,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,4,0,0x64,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0x44,0x64,0x44,0x44, +0x64,0x44,0x44,0x64,0x64,0x64,0x44,0x64,0x64,0x44,0x64,0x44,0x44,0x44,0x64,0x44, +0x64,0x44,0x64,0x44,0x64,0x44,0x44,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,4, +4,4,4,4,4,4,4,4,4,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x64,0x44, +4,4,0,0,0,0,4,0,0,0x64,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0x44,0x44,0x44,0x44,4,0x44,0x44,0x44,0x44,0x44,4,0x44,0x44,0x44, +4,0x44,0x44,0x44,0x44,0x44,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0x64,0x64,0x64,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0x64,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44, +0x44,0x44,0x44,0x44,0x44,0x44,4,0x64,0x44,0x44,0x64,0x44,0x44,0x64,0x44,0x44, +0x44,0x64,0x64,0x64,0x64,0x64,0x64,0x44,0x44,0x44,0x64,0x44,0x44,0x64,0x64,0x44, +0x44,0x44,0x44,0x44,4,4,4,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,4,0,0x64,0,0,0,0,4,4,4, +4,4,4,4,4,0,0,0,0,0x64,0,0,0,0x44,0x64,0x44, +0x44,4,4,4,0,0,0,0,0,0,0,0,0,0,4,4, +0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0x64,0,0,0, +0,4,4,4,4,0,0,0,0,0,0,0,0,0x64,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,4,4,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x44,0, +0,4,4,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,4,4,0,0,0,0,4,4,0,0,4,4,0x64,0,0, +0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,4,4,0,0,0,4,0,0,0,0,0,0, +0,0,0,0,0,4,4,4,4,4,0,4,4,0,0,0, +0,0x64,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,4,4,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,4, +4,4,4,4,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0x64,0,0,4,0,4,4,4,4,0,0,0,0,0,0,0, +0,0x64,0,0,0,0,0,0,0,4,4,0,0,0,0,0, +0,0,0,0,0,0,4,4,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0, +0,0x64,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,4,0,0,0,4,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,4,0,0,0,0,0,4,4,4,0,4,4, +4,0x64,0,0,0,0,0,0,0,0x64,0x64,0,0,0,0,0, +0,0,0,0,0,0,4,0,0,0,0,0,4,0x64,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +4,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0x64,0x64,0,0,0,0,0,0,0,0,0,0,0, +0,0,0x64,0,0,0,0,0,0,0,4,4,4,0,4,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,4,0,0,4,4,4,4,0x64,0x64,0x64,0,0,0,0,0, +0,0,4,4,0x64,0x64,0x64,0x64,4,4,4,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0, +4,4,4,4,0x64,0x64,0x64,4,4,0,0,0,0,0,0,0, +0,0,4,0,0x64,0x64,0x64,0x64,4,4,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0x64,0x64,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0x64,0,0x64, +0,0x64,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0x64,0x64,4,0x64,4,4,4,4,4,0x64,0x64, +0x64,0x64,4,0,0x64,4,0x44,0x44,0x64,0,0x44,0x44,0,0,0,0, +0,4,4,4,4,4,4,4,4,4,4,4,0,4,4,4, +4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, +4,4,4,4,4,4,4,4,4,4,4,4,4,0,0,0, +0,0,0,0,0,0,0x64,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,4,4,4,4,0,4,4,4,4,4,0x64,0,0x64,0x64,0, +0,4,4,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,4,4,0,0, +0,0,4,4,4,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,4,4,4,4,0,0,0,0,0,0,0, +0,0,0,0,0,0,4,0,0,4,4,0,0,0,0,0, +0,0x64,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,4,0,0,0x175a,0x175a,0x175a,0x175a,0x175a,0x175a,0x175a,0x175a,0x175a,0x175a,0x175a,0x175a, +0x175a,0x175a,0x175a,0x175a,0x175a,0x175a,0x175a,0x175a,0x175a,0x175a,0x175a,0x175a,0x175a,0x175a,0x175a,0x175a, +0x175a,0x175a,0x175a,0x175a,0x175a,0x175a,0,0x175a,0,0,0,0,0,0x175a,0,0, +0x1779,0x17a9,0x17d9,0x1809,0x1839,0x1869,0x1899,0x18c9,0x18f9,0x1929,0x1959,0x1989,0x19b9,0x19e9,0x1a19,0x1a49, +0x1a79,0x1aa9,0x1ad9,0x1b09,0x1b39,0x1b69,0x1b99,0x1bc9,0x1bf9,0x1c29,0x1c59,0x1c89,0x1cb9,0x1ce9,0x1d19,0x1d49, +0x1d79,0x1da9,0x1dd9,0x1e09,0x1e39,0x1e69,0x1e99,0x1ec9,0x1ef9,0x1f29,0x1f59,0,4,0x1f89,0x1fb9,0x1fe9, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0x44,0x44,0x44, +0x201a,0x201a,0x201a,0x201a,0x201a,0x201a,0x201a,0x201a,0x201a,0x201a,0x201a,0x201a,0x201a,0x201a,0x201a,0x201a, +0x203a,0x203a,0x203a,0x203a,0x203a,0x203a,0,0,0x2059,0x2089,0x20b9,0x20e9,0x2119,0x2149,0,0, +0x201a,0x201a,0x201a,0x201a,0x201a,0x201a,0x201a,0x201a,0x201a,0x201a,0x201a,0x201a,0x201a,0x201a,0x201a,0x201a, +0x201a,0x201a,0x201a,0x201a,0x201a,0x201a,0x201a,0x201a,0x201a,0x201a,0x201a,0x201a,0x201a,0x201a,0x201a,0x201a, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,4,4,0x64,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,4,4,0,4, +4,4,4,4,4,4,0,0,0,0,0,0,0,0,4,0, +0,4,4,4,4,4,4,4,4,4,0x64,4,0,0,0,4, +0,0,0,0,0,0x44,0,0,0,0,0,0,0,0,0,0, +0,0,0,4,4,4,4,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,4,4,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0x64,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,4,4,4,0, +0,0,0,4,4,0,0,0,0,0,0,0,0,0,4,0, +0,0,0,0,0,0x64,0x44,0x64,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x44, +0x64,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,4,0,4,4,4,4, +4,4,4,0,0x64,0,4,0,0,4,4,4,4,4,4,4, +4,0,0,0,0,0,0,4,4,0x44,0x44,0x44,0x44,0x44,0x44,0x44, +0x44,0,0,0x64,0,0,0,0,0,0,0,4,0,0,0,0, +0,0,0,0,0x44,0x44,0x44,0x44,0x44,0x64,0x64,0x64,0x64,0x64,0x64,0x44, +0x44,0x64,4,0x64,0x64,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0x64,0,4,4,4,4,4,0,4,0,0,0, +0,0,4,0,0x60,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0x44,0x64,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,4,4,4,4,0,0, +4,4,0x60,0x64,4,4,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0x64,0,4,4,0,0, +0,4,0,4,4,4,0x60,0x60,0,0,0,0,0,0,0,0, +0,0,0,0,4,4,4,4,4,4,4,4,0,0,4,0x64, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,4,4,4,4,4,4,0,0, +0x2179,0x21a9,0x21d9,0x2209,0x2239,0x2289,0x22d9,0x2309,0x2339,0,0,0,0,0,0,0, +0x236a,0x236a,0x236a,0x236a,0x236a,0x236a,0x236a,0x236a,0x236a,0x236a,0x236a,0x236a,0x236a,0x236a,0x236a,0x236a, +0x236a,0x236a,0x236a,0x236a,0x236a,0x236a,0x236a,0x236a,0x236a,0x236a,0x236a,0,0,0x236a,0x236a,0x236a, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0x44,0x44,0x44,0,0x64,0x64,0x64,0x64,0x64,0x64,0x44,0x44,0x64,0x64,0x64,0x64, +0x44,0,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0,0,0,0,0x64,0,0, +0,0,0,0,0x44,0,0,0,0x44,0x44,0,0,0,0,0,0, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,0x25,5,5,5,5,5,5,5,5,1,1,1,1,1, +1,1,1,1,1,1,1,1,5,0x2389,1,1,1,0x23a9,1,1, +5,5,5,5,0x25,5,5,5,0x25,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,0x23c9,1, +1,1,1,1,1,1,0x21,1,1,1,1,5,5,5,5,5, +0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44, +0x44,0x44,0x44,0x44,0x44,0x44,0x64,0x64,0x64,0x64,0,0x44,0x64,0x64,0x44,0x64, +0x44,0x44,0x64,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x64,0x44,0x44,0x64,0x64,0x64, +0x64,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44, +0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xffb1,0x92,0xff91, +0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91, +0x23ea,0x2429,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91, +0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91, +0x92,0xff91,0x2469,0x24e9,0x2569,0x25e9,0x2669,0x26e9,1,1,0x271a,1,0x92,0xff91,0x92,0xff91, +0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xffb1,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91, +0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x411,0x411,0x411,0x411, +0x411,0x411,0x411,0x411,0xfc12,0xfc12,0xfc12,0xfc12,0xfc12,0xfc12,0xfc12,0xfc12,0x411,0x411,0x411,0x411, +0x411,0x411,0,0,0xfc12,0xfc12,0xfc12,0xfc12,0xfc12,0xfc12,0,0,0x411,0x411,0x411,0x411, +0x411,0x411,0x411,0x411,0xfc12,0xfc12,0xfc12,0xfc12,0xfc12,0xfc12,0xfc12,0xfc12,0x411,0x411,0x411,0x411, +0x411,0x411,0x411,0x411,0xfc12,0xfc12,0xfc12,0xfc12,0xfc12,0xfc12,0xfc12,0xfc12,0x411,0x411,0x411,0x411, +0x411,0x411,0,0,0xfc12,0xfc12,0xfc12,0xfc12,0xfc12,0xfc12,0,0,0x2769,0x411,0x27e9,0x411, +0x2899,0x411,0x2949,0x411,0,0xfc12,0,0xfc12,0,0xfc12,0,0xfc12,0x411,0x411,0x411,0x411, +0x411,0x411,0x411,0x411,0xfc12,0xfc12,0xfc12,0xfc12,0xfc12,0xfc12,0xfc12,0xfc12,0x2511,0x2511,0x2b11,0x2b11, +0x2b11,0x2b11,0x3211,0x3211,0x4011,0x4011,0x3811,0x3811,0x3f11,0x3f11,0,0,0x29f9,0x2a69,0x2ad9,0x2b49, +0x2bb9,0x2c29,0x2c99,0x2d09,0x2d7b,0x2deb,0x2e5b,0x2ecb,0x2f3b,0x2fab,0x301b,0x308b,0x30f9,0x3169,0x31d9,0x3249, +0x32b9,0x3329,0x3399,0x3409,0x347b,0x34eb,0x355b,0x35cb,0x363b,0x36ab,0x371b,0x378b,0x37f9,0x3869,0x38d9,0x3949, +0x39b9,0x3a29,0x3a99,0x3b09,0x3b7b,0x3beb,0x3c5b,0x3ccb,0x3d3b,0x3dab,0x3e1b,0x3e8b,0x411,0x411,0x3ef9,0x3f79, +0x3fe9,0,0x4069,0x40e9,0xfc12,0xfc12,0xdb12,0xdb12,0x419b,4,0x4209,4,4,4,0x4259,0x42d9, +0x4349,0,0x43c9,0x4449,0xd512,0xd512,0xd512,0xd512,0x44fb,4,4,4,0x411,0x411,0x4569,0x4619, +0,0,0x46e9,0x4769,0xfc12,0xfc12,0xce12,0xce12,0,4,4,4,0x411,0x411,0x4819,0x48c9, +0x4999,0x391,0x4a19,0x4a99,0xfc12,0xfc12,0xc812,0xc812,0xfc92,4,4,4,0,0,0x4b49,0x4bc9, +0x4c39,0,0x4cb9,0x4d39,0xc012,0xc012,0xc112,0xc112,0x4deb,4,4,0,0,0,0,0, +0,0,0,0,0,0,0,4,4,4,4,4,0,0,0,0, +0,0,0,0,4,4,0,0,0,0,0,0,4,0,0,4, +0,0,4,4,4,4,4,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,4,4,4,4,4,0,4,4, +4,4,4,4,4,4,4,4,0,0x25,0,0,0,0,0,0, +0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,5,5,5,5,5,5,5,5, +5,5,5,5,5,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0x44,0x44,0x64,0x64,0x44,0x44,0x44,0x44, +0x64,0x64,0x64,0x44,0x44,4,4,4,4,0x44,4,4,4,0x64,0x64,0x44, +0x64,0x44,0x64,0x64,0x64,0x64,0x64,0x64,0x44,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,2, +0,0,1,2,2,2,1,1,2,2,2,1,0,2,0,0, +0,2,2,2,2,2,0,0,0,0,0,0,2,0,0x4e5a,0, +2,0,0x4e9a,0x4eda,2,2,0,1,2,2,0xe12,2,1,0,0,0, +0,1,0,0,1,1,2,2,0,0,0,0,0,2,1,1, +0x21,0x21,0,0,0,0,0xf211,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0x812,0x812,0x812,0x812,0x812,0x812,0x812,0x812, +0x812,0x812,0x812,0x812,0x812,0x812,0x812,0x812,0xf811,0xf811,0xf811,0xf811,0xf811,0xf811,0xf811,0xf811, +0xf811,0xf811,0xf811,0xf811,0xf811,0xf811,0xf811,0xf811,0,0,0,0x92,0xff91,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0xd12,0xd12,0xd12,0xd12,0xd12,0xd12, +0xd12,0xd12,0xd12,0xd12,0xd12,0xd12,0xd12,0xd12,0xd12,0xd12,0xd12,0xd12,0xf311,0xf311,0xf311,0xf311, +0xf311,0xf311,0xf311,0xf311,0xf311,0xf311,0xf311,0xf311,0xf311,0xf311,0xf311,0xf311,0xf311,0xf311,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812, +0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812, +0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0x1812,0,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811, +0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811, +0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0xe811,0,0x92,0xff91,0x4f1a,0x4f3a,0x4f5a,0x4f79,0x4f99,0x92, +0xff91,0x92,0xff91,0x92,0xff91,0x4fba,0x4fda,0x4ffa,0x501a,1,0x92,0xff91,1,0x92,0xff91,1, +1,1,1,1,0x25,5,0x503a,0x503a,0x92,0xff91,0x92,0xff91,1,0,0,0, +0,0,0,0x92,0xff91,0x92,0xff91,0x44,0x44,0x44,0x92,0xff91,0,0,0,0, +0,0,0,0,0,0,0,0,0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,0x5059, +0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,0x5059, +0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,0x5059,0,0x5059,0,0,0,0, +0,0x5059,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0x64,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44, +0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44, +0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0,0,0,0,0,0,0,0, +0,0,0x64,0x64,0x64,0x64,0x60,0x60,0,4,4,4,4,4,0,0, +0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0x64,0x64,4, +4,4,4,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +4,4,4,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0x92,0xff91,0x92,0xff91, +0x92,0xff91,0x92,0xff91,0x92,0xff91,0x507a,0x50b9,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91, +0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0,0x44, +4,4,4,0,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0,4, +0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91, +0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,5,5,0x44,0x44, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0x44,0x44,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, +4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, +4,4,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91, +1,1,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91, +0x92,0xff91,0x92,0xff91,5,1,1,1,1,1,1,1,1,0x92,0xff91,0x92, +0xff91,0x50fa,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,4,4,4,0x92, +0xff91,0x511a,1,0,0x92,0xff91,0x92,0xff91,0x1811,1,0x92,0xff91,0x92,0xff91,0x92,0xff91, +0x92,0xff91,0x92,0xff91,0x92,0xff91,0x513a,0x515a,0x517a,0x519a,0x513a,1,0x51ba,0x51da,0x51fa,0x521a, +0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0x92,0xff91,0,0,0x92,0xff91, +0xe812,0x523a,0x525a,0x92,0xff91,0x92,0xff91,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0x92,0xff91,0, +5,5,1,0,0,0,0,0,0,0,4,0,0,0,0x64,0, +0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,4,4,0,0,0,0,0, +0x64,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0x64,4,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44, +0x44,0x44,0,0,0,0,0,0,0,0,0,0,0,0,0,4, +0,0,0,0,0,0,4,4,4,4,4,0x64,0x64,0x64,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,4,4,4,4,4,4,4,4,4,4,4,0,0x60, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0x64,0,0,4,4,4,4,0,0,4,4,0,0, +0x60,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,4,4,4,4,4,4,0,0,4,4,0,0,4,4,0, +0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0, +0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0, +0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0x44,0,0x44,0x44,0x64,0,0,0x44, +0x44,0,0,0,0,0,0x44,0x44,0,0x44,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,4,4,0,0,0,0,0,4,4,0,0x64,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,0x5279,1,1,1,1,1,1,1,4,5,5,5,5, +1,1,1,1,1,1,1,1,1,4,4,4,0,0,0,0, +0x5299,0x52c9,0x52f9,0x5329,0x5359,0x5389,0x53b9,0x53e9,0x5419,0x5449,0x5479,0x54a9,0x54d9,0x5509,0x5539,0x5569, +0x5b99,0x5bc9,0x5bf9,0x5c29,0x5c59,0x5c89,0x5cb9,0x5ce9,0x5d19,0x5d49,0x5d79,0x5da9,0x5dd9,0x5e09,0x5e39,0x5e69, +0x5e99,0x5ec9,0x5ef9,0x5f29,0x5f59,0x5f89,0x5fb9,0x5fe9,0x6019,0x6049,0x6079,0x60a9,0x60d9,0x6109,0x6139,0x6169, +0x5599,0x55c9,0x55f9,0x5629,0x5659,0x5689,0x56b9,0x56e9,0x5719,0x5749,0x5779,0x57a9,0x57d9,0x5809,0x5839,0x5869, +0x5899,0x58c9,0x58f9,0x5929,0x5959,0x5989,0x59b9,0x59e9,0x5a19,0x5a49,0x5a79,0x5aa9,0x5ad9,0x5b09,0x5b39,0x5b69, +0,0,0,0,0,4,0,0,4,0,0,0,0,0x64,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0x6199,0x6219,0x6299,0x6319,0x63c9,0x6479,0x6519,0,0,0,0,0,0,0,0,0, +0,0,0,0x65b9,0x6639,0x66b9,0x6739,0x67b9,0,0,0,0,0,0,0x64,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,4,4,4,4,4,4,4,4,4,4,4,4,4,4, +4,4,4,4,0,0,0,4,0,0,0,0,0,0,0,0, +0,0,0,0,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x64,0x64,0x64,0x64,0x64, +0x64,0x64,0x44,0x44,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,4,0,0,4,0,0,0,0,0,0, +0,0,0,0,0,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012, +0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0, +0,0,4,0,4,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011, +0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,4,4,4,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0x64,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0x44,0x44,0x44,0x44,0x44,0,0,0,0,0,0x1412,0x1412,0x1412,0x1412, +0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412, +0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0xec11,0xec11,0xec11,0xec11, +0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0xec11, +0xec11,0xec11,0xec11,0xec11,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412, +0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0x1412,0,0,0,0,0xec11,0xec11,0xec11,0xec11, +0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0xec11, +0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0xec11,0,0,0,0,0,4,4,4, +0,4,4,0,0,0,0,0,4,0x64,4,0x44,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0x44,0x64,0x64,0,0,0,0,0x64,0,0,0,0, +0,0x44,0x64,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0x2012,0x2012,0x2012,0x2012, +0x2012,0x2012,0x2012,0x2012,0x2012,0x2012,0x2012,0x2012,0x2012,0x2012,0x2012,0x2012,0x2012,0x2012,0x2012,0x2012, +0x2012,0x2012,0x2012,0x2012,0x2012,0x2012,0x2012,0x2012,0x2012,0x2012,0x2012,0x2012,0x2012,0x2012,0x2012,0, +0,0,0,0,0,0,0,0,0,0,0,0,0xe011,0xe011,0xe011,0xe011, +0xe011,0xe011,0xe011,0xe011,0xe011,0xe011,0xe011,0xe011,0xe011,0xe011,0xe011,0xe011,0xe011,0xe011,0xe011,0xe011, +0xe011,0xe011,0xe011,0xe011,0xe011,0xe011,0xe011,0xe011,0xe011,0xe011,0xe011,0xe011,0xe011,0xe011,0xe011,0, +0,0,0,0,0,0,0,0,0,0,0,0,0x44,0x44,0x44,0x44, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0x44,0x44,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0x64,0x64,0x44,0x44,0x44,0x64,0x44,0x64,0x64,0x64,0x64,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,4,4,4,4,4,4,4,4, +4,4,0x64,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4, +4,4,4,0,0,0x64,0x64,0,0,4,0,0,0x44,0x44,0x44,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4, +4,4,4,4,0,4,4,4,4,4,4,0x64,0x64,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0x64,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,4,4,4,4,4,4, +4,4,4,0,0x60,0,0,0,0,0,0,0,0,4,0x64,4, +4,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,4,4,4,0,0,4,0x60,0x64,4, +0,0,0,0,0,0,4,0,0,0,0,4,4,4,4,4, +4,0x64,0x64,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0, +0,0,0,0,0,0x60,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0x44,0x44,0x44,0x44,0x44,0x44, +0x44,0,0,0,0x44,0x44,0x44,0x44,0x44,0,0,0,0,0,0,0, +0,0,0,0,0,0,0x64,4,4,0,0x64,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0x44,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,4,4,4,4,4,4,0,4,0, +0,0,0,4,4,0,0x64,0x64,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,4,4,4,4,0,0,0,0,0,0, +4,4,0,0x64,0x64,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +4,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,4,4,4,4,4,4,4,4,0, +0,4,0,0x64,0,0,0,0,0,0,0,0,0,0,0,4, +0,4,0,0,4,4,4,4,4,4,0x60,0x64,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,4,4,4,0,0,4,4, +4,4,0,4,4,4,4,0x64,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4, +4,4,4,4,4,4,4,4,0,0x64,0x64,0,0,0,0,0, +0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012, +0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012,0x1012, +0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011, +0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011,0xf011, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,4,4,0x60,0x64,0, +0,0,0,0x64,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +4,4,4,4,0,0,4,4,0,0,0,0,0,4,4,4, +4,4,4,4,4,4,4,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4, +0x64,4,4,4,4,0,0,4,4,4,4,0,0,0,0,0, +0,0,0,0x64,0,0,0,0,0,0,0,0,0,4,4,4, +4,4,4,0,0,4,4,4,0,0,0,0,0,0,0,0, +0,0,4,4,4,4,4,4,4,4,4,4,4,4,4,0, +4,0x64,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,4,4,4,4,4,4,4,0,4,4,4,4, +4,4,0,0x64,4,4,4,4,4,4,4,4,0,0,4,4, +4,4,4,4,4,0,4,4,0,4,4,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,4,4,4, +4,4,4,0,0,0,4,0,4,4,0,4,4,4,0x64,4, +0x64,0x64,0,4,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,4,4,0,0, +0,4,0,0x64,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,4,4,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,4,4,4,4, +4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0x64,0x64,0x64,0x64,0x64,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0x44,0x44,0x44,0x44,0x44,0x44,0x44,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,4,4,4,4,4,4,4,4,4, +4,4,4,4,4,4,4,4,4,4,0,4,4,0,0,0, +0,0,0,0,0,0,0,0,0x60,0x60,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,4,0x64,0,0,0,0,0, +0,0x60,0x60,0x64,0x64,0x64,0,0,0,0x60,0x60,0x60,0x60,0x60,0x60,4, +4,4,4,4,4,4,4,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0x64,0, +0,0x44,0x44,0x44,0x44,0x44,0x64,0x64,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x44,0x44, +0x44,0x44,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0x44,0x44,0x44,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1, +1,1,1,1,1,1,0x21,0x21,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,1,1,1,1,1,1,1,0,0x21,0x21, +1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,1,1,1,1,1,1,1,1,0x21,0x21,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,2,0,2,2, +0,0,2,0,0,2,2,0,0,2,2,2,2,0,2,2, +2,2,2,2,2,2,1,1,1,1,0,1,0,1,0x21,0x21, +1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +1,1,1,1,2,2,0,2,2,2,2,0,0,2,2,2, +2,2,2,2,2,0,2,2,2,2,2,2,2,0,1,1, +1,1,1,1,1,1,0x21,0x21,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,2,2,0,2,2,2,2,0, +2,2,2,2,2,0,2,0,0,0,2,2,2,2,2,2, +2,0,1,1,1,1,1,1,1,1,0x21,0x21,1,1,1,1, +1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1, +1,1,0,0,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,0,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,0,1,1,1,1,1,1,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,0,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1, +1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,1,1,1,0,1,1,1,1,1,1,2,1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,4,4,4,4,4,4,4,4,4,4,4,4, +4,4,4,4,4,4,4,4,4,4,4,0,0,0,0,4, +4,4,4,4,4,4,4,4,4,4,4,4,4,0,0,0, +0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0, +4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,4,4,4,4,4,0,4,4,4, +4,4,4,4,4,4,4,4,4,4,4,4,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0x44,0x44,0x44,0x44, +0x44,0x44,0x44,0,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0x44, +0x44,0x44,0x44,0x44,0x44,0,0,0x44,0x44,0x44,0x44,0x44,0x44,0x44,0,0x44, +0x44,0,0x44,0x44,0x44,0x44,0x44,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0x44,0x44,0x44,0x44, +0x44,0x44,0x44,4,4,4,4,4,4,4,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0x64,0x64,0x64,0x64, +0x64,0x64,0x64,0,0,0,0,0,0,0,0,0,0x1112,0x1112,0x1112,0x1112, +0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112, +0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0x1112,0xef11,0xef11, +0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11, +0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0xef11,0x44,0x44,0x44,0x44, +0x44,0x44,0x64,4,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,2,2,2,2,2,2,2,2, +2,2,0,0,0,0,0,0,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0 +}; + +static const uint16_t ucase_props_exceptions[1667]={ +0xc850,0x20,2,0x130,0x131,0x4810,0x20,0x841,0x6b,1,0x212a,0x841,0x73,1,0x17f,0x5c50, +0x20,2,0x130,0x131,0x844,0x4b,1,0x212a,0x844,0x53,1,0x17f,0x806,0x3bc,0x39c,0x841, +0xe5,1,0x212b,0x8c0,1,0x2220,0x73,0x73,0x53,0x53,0x53,0x73,0x1e9e,0x844,0xc5,1, +0x212b,0x4810,1,0xce50,0xc7,2,0x49,0x131,0x844,0x49,2,0x69,0x130,0x880,0x2220,0x2bc, +0x6e,0x2bc,0x4e,0x2bc,0x4e,0x806,0x73,0x53,0x809,0x1c6,0x1c5,0x80d,0x1c6,0x1c4,0x1c5,0x80c, +0x1c4,0x1c5,0x809,0x1c9,0x1c8,0x80d,0x1c9,0x1c7,0x1c8,0x80c,0x1c7,0x1c8,0x809,0x1cc,0x1cb,0x80d, +0x1cc,0x1ca,0x1cb,0x80c,0x1ca,0x1cb,0x880,0x2220,0x6a,0x30c,0x4a,0x30c,0x4a,0x30c,0x809,0x1f3, +0x1f2,0x80d,0x1f3,0x1f1,0x1f2,0x80c,0x1f1,0x1f2,0x810,0x2a2b,0x810,0x2a28,0x810,0x2a3f,0x810,0x2a1f, +0x810,0x2a1c,0x810,0x2a1e,0x810,0xa54f,0x810,0xa54b,0x810,0xa528,0x810,0xa544,0x810,0x29f7,0x810,0xa541, +0x810,0x29fd,0x810,0x29e7,0x810,0xa543,0x810,0xa52a,0x1810,0xa515,0x810,0xa512,0x6800,0x3846,0x3b9,0x399, +1,0x1fbe,0x8c0,1,0x3330,0x3b9,0x308,0x301,0x399,0x308,0x301,0x399,0x308,0x301,0x1fd3,0x841, +0x3b2,1,0x3d0,0x841,0x3b5,1,0x3f5,0x841,0x3b8,2,0x3d1,0x3f4,0x841,0x3b9,2,0x345, +0x1fbe,0x841,0x3ba,1,0x3f0,0x841,0x3bc,1,0xb5,0x841,0x3c0,1,0x3d6,0x841,0x3c1,1, +0x3f1,0x4850,0x20,1,0x3c2,0x841,0x3c6,1,0x3d5,0x841,0x3c9,1,0x2126,0x8c0,1,0x3330, +0x3c5,0x308,0x301,0x3a5,0x308,0x301,0x3a5,0x308,0x301,0x1fe3,0x844,0x392,1,0x3d0,0x844,0x395, +1,0x3f5,0x844,0x398,2,0x3d1,0x3f4,0x844,0x399,2,0x345,0x1fbe,0x844,0x39a,1,0x3f0, +0x844,0x39c,1,0xb5,0x844,0x3a0,1,0x3d6,0x844,0x3a1,1,0x3f1,0x806,0x3c3,0x3a3,0x844, +0x3a3,1,0x3c2,0x844,0x3a6,1,0x3d5,0x844,0x3a9,1,0x2126,0x806,0x3b2,0x392,0x846,0x3b8, +0x398,1,0x3f4,0x806,0x3c6,0x3a6,0x806,0x3c0,0x3a0,0x806,0x3ba,0x39a,0x806,0x3c1,0x3a1,0x841, +0x3b8,2,0x398,0x3d1,0x806,0x3b5,0x395,0x841,0x432,1,0x1c80,0x841,0x434,1,0x1c81,0x841, +0x43e,1,0x1c82,0x841,0x441,1,0x1c83,0x841,0x442,2,0x1c84,0x1c85,0x841,0x44a,1,0x1c86, +0x844,0x412,1,0x1c80,0x844,0x414,1,0x1c81,0x844,0x41e,1,0x1c82,0x844,0x421,1,0x1c83, +0x844,0x422,2,0x1c84,0x1c85,0x844,0x42a,1,0x1c86,0x841,0x463,1,0x1c87,0x844,0x462,1, +0x1c87,0x4880,0x20,0x565,0x582,0x810,0x1c60,0x80c,0x1c90,0x10d0,0x80c,0x1c91,0x10d1,0x80c,0x1c92,0x10d2, +0x80c,0x1c93,0x10d3,0x80c,0x1c94,0x10d4,0x80c,0x1c95,0x10d5,0x80c,0x1c96,0x10d6,0x80c,0x1c97,0x10d7,0x80c, +0x1c98,0x10d8,0x80c,0x1c99,0x10d9,0x80c,0x1c9a,0x10da,0x80c,0x1c9b,0x10db,0x80c,0x1c9c,0x10dc,0x80c,0x1c9d, +0x10dd,0x80c,0x1c9e,0x10de,0x80c,0x1c9f,0x10df,0x80c,0x1ca0,0x10e0,0x80c,0x1ca1,0x10e1,0x80c,0x1ca2,0x10e2, +0x80c,0x1ca3,0x10e3,0x80c,0x1ca4,0x10e4,0x80c,0x1ca5,0x10e5,0x80c,0x1ca6,0x10e6,0x80c,0x1ca7,0x10e7,0x80c, +0x1ca8,0x10e8,0x80c,0x1ca9,0x10e9,0x80c,0x1caa,0x10ea,0x80c,0x1cab,0x10eb,0x80c,0x1cac,0x10ec,0x80c,0x1cad, +0x10ed,0x80c,0x1cae,0x10ee,0x80c,0x1caf,0x10ef,0x80c,0x1cb0,0x10f0,0x80c,0x1cb1,0x10f1,0x80c,0x1cb2,0x10f2, +0x80c,0x1cb3,0x10f3,0x80c,0x1cb4,0x10f4,0x80c,0x1cb5,0x10f5,0x80c,0x1cb6,0x10f6,0x80c,0x1cb7,0x10f7,0x80c, +0x1cb8,0x10f8,0x80c,0x1cb9,0x10f9,0x80c,0x1cba,0x10fa,0x80c,0x1cbd,0x10fd,0x80c,0x1cbe,0x10fe,0x80c,0x1cbf, +0x10ff,0xa10,0x97d0,0xa10,8,0x806,0x13f0,0x13f0,0x806,0x13f1,0x13f1,0x806,0x13f2,0x13f2,0x806,0x13f3, +0x13f3,0x806,0x13f4,0x13f4,0x806,0x13f5,0x13f5,0x806,0x432,0x412,0x806,0x434,0x414,0x806,0x43e,0x41e, +0x806,0x441,0x421,0x846,0x442,0x422,1,0x1c85,0x846,0x442,0x422,1,0x1c84,0x806,0x44a,0x42a, +0x806,0x463,0x462,0x806,0xa64b,0xa64a,0xc10,0xbc0,0x810,0x8a04,0x810,0xee6,0x810,0x8a38,0x841,0x1e61, +1,0x1e9b,0x844,0x1e60,1,0x1e9b,0x880,0x2220,0x68,0x331,0x48,0x331,0x48,0x331,0x880,0x2220, +0x74,0x308,0x54,0x308,0x54,0x308,0x880,0x2220,0x77,0x30a,0x57,0x30a,0x57,0x30a,0x880,0x2220, +0x79,0x30a,0x59,0x30a,0x59,0x30a,0x880,0x2220,0x61,0x2be,0x41,0x2be,0x41,0x2be,0x806,0x1e61, +0x1e60,0xc90,0x1dbf,0x20,0x73,0x73,0x880,0x2220,0x3c5,0x313,0x3a5,0x313,0x3a5,0x313,0x880,0x3330, +0x3c5,0x313,0x300,0x3a5,0x313,0x300,0x3a5,0x313,0x300,0x880,0x3330,0x3c5,0x313,0x301,0x3a5,0x313, +0x301,0x3a5,0x313,0x301,0x880,0x3330,0x3c5,0x313,0x342,0x3a5,0x313,0x342,0x3a5,0x313,0x342,0x890, +8,0x220,0x1f00,0x3b9,0x1f08,0x399,0x890,8,0x220,0x1f01,0x3b9,0x1f09,0x399,0x890,8,0x220, +0x1f02,0x3b9,0x1f0a,0x399,0x890,8,0x220,0x1f03,0x3b9,0x1f0b,0x399,0x890,8,0x220,0x1f04,0x3b9, +0x1f0c,0x399,0x890,8,0x220,0x1f05,0x3b9,0x1f0d,0x399,0x890,8,0x220,0x1f06,0x3b9,0x1f0e,0x399, +0x890,8,0x220,0x1f07,0x3b9,0x1f0f,0x399,0xc90,8,0x220,0x1f00,0x3b9,0x1f08,0x399,0xc90,8, +0x220,0x1f01,0x3b9,0x1f09,0x399,0xc90,8,0x220,0x1f02,0x3b9,0x1f0a,0x399,0xc90,8,0x220,0x1f03, +0x3b9,0x1f0b,0x399,0xc90,8,0x220,0x1f04,0x3b9,0x1f0c,0x399,0xc90,8,0x220,0x1f05,0x3b9,0x1f0d, +0x399,0xc90,8,0x220,0x1f06,0x3b9,0x1f0e,0x399,0xc90,8,0x220,0x1f07,0x3b9,0x1f0f,0x399,0x890, +8,0x220,0x1f20,0x3b9,0x1f28,0x399,0x890,8,0x220,0x1f21,0x3b9,0x1f29,0x399,0x890,8,0x220, +0x1f22,0x3b9,0x1f2a,0x399,0x890,8,0x220,0x1f23,0x3b9,0x1f2b,0x399,0x890,8,0x220,0x1f24,0x3b9, +0x1f2c,0x399,0x890,8,0x220,0x1f25,0x3b9,0x1f2d,0x399,0x890,8,0x220,0x1f26,0x3b9,0x1f2e,0x399, +0x890,8,0x220,0x1f27,0x3b9,0x1f2f,0x399,0xc90,8,0x220,0x1f20,0x3b9,0x1f28,0x399,0xc90,8, +0x220,0x1f21,0x3b9,0x1f29,0x399,0xc90,8,0x220,0x1f22,0x3b9,0x1f2a,0x399,0xc90,8,0x220,0x1f23, +0x3b9,0x1f2b,0x399,0xc90,8,0x220,0x1f24,0x3b9,0x1f2c,0x399,0xc90,8,0x220,0x1f25,0x3b9,0x1f2d, +0x399,0xc90,8,0x220,0x1f26,0x3b9,0x1f2e,0x399,0xc90,8,0x220,0x1f27,0x3b9,0x1f2f,0x399,0x890, +8,0x220,0x1f60,0x3b9,0x1f68,0x399,0x890,8,0x220,0x1f61,0x3b9,0x1f69,0x399,0x890,8,0x220, +0x1f62,0x3b9,0x1f6a,0x399,0x890,8,0x220,0x1f63,0x3b9,0x1f6b,0x399,0x890,8,0x220,0x1f64,0x3b9, +0x1f6c,0x399,0x890,8,0x220,0x1f65,0x3b9,0x1f6d,0x399,0x890,8,0x220,0x1f66,0x3b9,0x1f6e,0x399, +0x890,8,0x220,0x1f67,0x3b9,0x1f6f,0x399,0xc90,8,0x220,0x1f60,0x3b9,0x1f68,0x399,0xc90,8, +0x220,0x1f61,0x3b9,0x1f69,0x399,0xc90,8,0x220,0x1f62,0x3b9,0x1f6a,0x399,0xc90,8,0x220,0x1f63, +0x3b9,0x1f6b,0x399,0xc90,8,0x220,0x1f64,0x3b9,0x1f6c,0x399,0xc90,8,0x220,0x1f65,0x3b9,0x1f6d, +0x399,0xc90,8,0x220,0x1f66,0x3b9,0x1f6e,0x399,0xc90,8,0x220,0x1f67,0x3b9,0x1f6f,0x399,0x880, +0x2220,0x1f70,0x3b9,0x1fba,0x399,0x1fba,0x345,0x890,9,0x220,0x3b1,0x3b9,0x391,0x399,0x880,0x2220, +0x3ac,0x3b9,0x386,0x399,0x386,0x345,0x880,0x2220,0x3b1,0x342,0x391,0x342,0x391,0x342,0x880,0x3330, +0x3b1,0x342,0x3b9,0x391,0x342,0x399,0x391,0x342,0x345,0xc90,9,0x220,0x3b1,0x3b9,0x391,0x399, +0x846,0x3b9,0x399,1,0x345,0x880,0x2220,0x1f74,0x3b9,0x1fca,0x399,0x1fca,0x345,0x890,9,0x220, +0x3b7,0x3b9,0x397,0x399,0x880,0x2220,0x3ae,0x3b9,0x389,0x399,0x389,0x345,0x880,0x2220,0x3b7,0x342, +0x397,0x342,0x397,0x342,0x880,0x3330,0x3b7,0x342,0x3b9,0x397,0x342,0x399,0x397,0x342,0x345,0xc90, +9,0x220,0x3b7,0x3b9,0x397,0x399,0x880,0x3330,0x3b9,0x308,0x300,0x399,0x308,0x300,0x399,0x308, +0x300,0x8c0,1,0x3330,0x3b9,0x308,0x301,0x399,0x308,0x301,0x399,0x308,0x301,0x390,0x880,0x2220, +0x3b9,0x342,0x399,0x342,0x399,0x342,0x880,0x3330,0x3b9,0x308,0x342,0x399,0x308,0x342,0x399,0x308, +0x342,0x880,0x3330,0x3c5,0x308,0x300,0x3a5,0x308,0x300,0x3a5,0x308,0x300,0x8c0,1,0x3330,0x3c5, +0x308,0x301,0x3a5,0x308,0x301,0x3a5,0x308,0x301,0x3b0,0x880,0x2220,0x3c1,0x313,0x3a1,0x313,0x3a1, +0x313,0x880,0x2220,0x3c5,0x342,0x3a5,0x342,0x3a5,0x342,0x880,0x3330,0x3c5,0x308,0x342,0x3a5,0x308, +0x342,0x3a5,0x308,0x342,0x880,0x2220,0x1f7c,0x3b9,0x1ffa,0x399,0x1ffa,0x345,0x890,9,0x220,0x3c9, +0x3b9,0x3a9,0x399,0x880,0x2220,0x3ce,0x3b9,0x38f,0x399,0x38f,0x345,0x880,0x2220,0x3c9,0x342,0x3a9, +0x342,0x3a9,0x342,0x880,0x3330,0x3c9,0x342,0x3b9,0x3a9,0x342,0x399,0x3a9,0x342,0x345,0xc90,9, +0x220,0x3c9,0x3b9,0x3a9,0x399,0xc50,0x1d5d,1,0x3a9,0xc50,0x20bf,1,0x4b,0xc50,0x2046,1, +0xc5,0xc10,0x29f7,0xc10,0xee6,0xc10,0x29e7,0xc10,0x2a2b,0xc10,0x2a28,0xc10,0x2a1c,0xc10,0x29fd,0xc10, +0x2a1f,0xc10,0x2a1e,0xc10,0x2a3f,0xc10,0x1c60,0x841,0xa64b,1,0x1c88,0x844,0xa64a,1,0x1c88,0xc10, +0x8a04,0xc10,0xa528,0xc10,0xa544,0xc10,0xa54f,0xc10,0xa54b,0xc10,0xa541,0xc10,0xa512,0xc10,0xa52a,0xc10, +0xa515,0x810,0x3a0,0xc10,0xa543,0xc10,0x8a38,0xc10,0x3a0,0x806,0x13a0,0x13a0,0x806,0x13a1,0x13a1,0x806, +0x13a2,0x13a2,0x806,0x13a3,0x13a3,0x806,0x13a4,0x13a4,0x806,0x13a5,0x13a5,0x806,0x13a6,0x13a6,0x806,0x13a7, +0x13a7,0x806,0x13a8,0x13a8,0x806,0x13a9,0x13a9,0x806,0x13aa,0x13aa,0x806,0x13ab,0x13ab,0x806,0x13ac,0x13ac, +0x806,0x13ad,0x13ad,0x806,0x13ae,0x13ae,0x806,0x13af,0x13af,0x806,0x13b0,0x13b0,0x806,0x13b1,0x13b1,0x806, +0x13b2,0x13b2,0x806,0x13b3,0x13b3,0x806,0x13b4,0x13b4,0x806,0x13b5,0x13b5,0x806,0x13b6,0x13b6,0x806,0x13b7, +0x13b7,0x806,0x13b8,0x13b8,0x806,0x13b9,0x13b9,0x806,0x13ba,0x13ba,0x806,0x13bb,0x13bb,0x806,0x13bc,0x13bc, +0x806,0x13bd,0x13bd,0x806,0x13be,0x13be,0x806,0x13bf,0x13bf,0x806,0x13c0,0x13c0,0x806,0x13c1,0x13c1,0x806, +0x13c2,0x13c2,0x806,0x13c3,0x13c3,0x806,0x13c4,0x13c4,0x806,0x13c5,0x13c5,0x806,0x13c6,0x13c6,0x806,0x13c7, +0x13c7,0x806,0x13c8,0x13c8,0x806,0x13c9,0x13c9,0x806,0x13ca,0x13ca,0x806,0x13cb,0x13cb,0x806,0x13cc,0x13cc, +0x806,0x13cd,0x13cd,0x806,0x13ce,0x13ce,0x806,0x13cf,0x13cf,0x806,0x13d0,0x13d0,0x806,0x13d1,0x13d1,0x806, +0x13d2,0x13d2,0x806,0x13d3,0x13d3,0x806,0x13d4,0x13d4,0x806,0x13d5,0x13d5,0x806,0x13d6,0x13d6,0x806,0x13d7, +0x13d7,0x806,0x13d8,0x13d8,0x806,0x13d9,0x13d9,0x806,0x13da,0x13da,0x806,0x13db,0x13db,0x806,0x13dc,0x13dc, +0x806,0x13dd,0x13dd,0x806,0x13de,0x13de,0x806,0x13df,0x13df,0x806,0x13e0,0x13e0,0x806,0x13e1,0x13e1,0x806, +0x13e2,0x13e2,0x806,0x13e3,0x13e3,0x806,0x13e4,0x13e4,0x806,0x13e5,0x13e5,0x806,0x13e6,0x13e6,0x806,0x13e7, +0x13e7,0x806,0x13e8,0x13e8,0x806,0x13e9,0x13e9,0x806,0x13ea,0x13ea,0x806,0x13eb,0x13eb,0x806,0x13ec,0x13ec, +0x806,0x13ed,0x13ed,0x806,0x13ee,0x13ee,0x806,0x13ef,0x13ef,0x880,0x2220,0x66,0x66,0x46,0x46,0x46, +0x66,0x880,0x2220,0x66,0x69,0x46,0x49,0x46,0x69,0x880,0x2220,0x66,0x6c,0x46,0x4c,0x46, +0x6c,0x880,0x3330,0x66,0x66,0x69,0x46,0x46,0x49,0x46,0x66,0x69,0x880,0x3330,0x66,0x66, +0x6c,0x46,0x46,0x4c,0x46,0x66,0x6c,0x8c0,1,0x2220,0x73,0x74,0x53,0x54,0x53,0x74, +0xfb06,0x8c0,1,0x2220,0x73,0x74,0x53,0x54,0x53,0x74,0xfb05,0x880,0x2220,0x574,0x576,0x544, +0x546,0x544,0x576,0x880,0x2220,0x574,0x565,0x544,0x535,0x544,0x565,0x880,0x2220,0x574,0x56b,0x544, +0x53b,0x544,0x56b,0x880,0x2220,0x57e,0x576,0x54e,0x546,0x54e,0x576,0x880,0x2220,0x574,0x56d,0x544, +0x53d,0x544,0x56d +}; + +static const uint16_t ucase_props_unfold[370]={ +0x49,5,3,0,0,0x61,0x2be,0,0x1e9a,0,0x66,0x66,0,0xfb00,0,0x66, +0x66,0x69,0xfb03,0,0x66,0x66,0x6c,0xfb04,0,0x66,0x69,0,0xfb01,0,0x66,0x6c, +0,0xfb02,0,0x68,0x331,0,0x1e96,0,0x69,0x307,0,0x130,0,0x6a,0x30c,0, +0x1f0,0,0x73,0x73,0,0xdf,0x1e9e,0x73,0x74,0,0xfb05,0xfb06,0x74,0x308,0,0x1e97, +0,0x77,0x30a,0,0x1e98,0,0x79,0x30a,0,0x1e99,0,0x2bc,0x6e,0,0x149,0, +0x3ac,0x3b9,0,0x1fb4,0,0x3ae,0x3b9,0,0x1fc4,0,0x3b1,0x342,0,0x1fb6,0,0x3b1, +0x342,0x3b9,0x1fb7,0,0x3b1,0x3b9,0,0x1fb3,0x1fbc,0x3b7,0x342,0,0x1fc6,0,0x3b7,0x342, +0x3b9,0x1fc7,0,0x3b7,0x3b9,0,0x1fc3,0x1fcc,0x3b9,0x308,0x300,0x1fd2,0,0x3b9,0x308,0x301, +0x390,0x1fd3,0x3b9,0x308,0x342,0x1fd7,0,0x3b9,0x342,0,0x1fd6,0,0x3c1,0x313,0,0x1fe4, +0,0x3c5,0x308,0x300,0x1fe2,0,0x3c5,0x308,0x301,0x3b0,0x1fe3,0x3c5,0x308,0x342,0x1fe7,0, +0x3c5,0x313,0,0x1f50,0,0x3c5,0x313,0x300,0x1f52,0,0x3c5,0x313,0x301,0x1f54,0,0x3c5, +0x313,0x342,0x1f56,0,0x3c5,0x342,0,0x1fe6,0,0x3c9,0x342,0,0x1ff6,0,0x3c9,0x342, +0x3b9,0x1ff7,0,0x3c9,0x3b9,0,0x1ff3,0x1ffc,0x3ce,0x3b9,0,0x1ff4,0,0x565,0x582,0, +0x587,0,0x574,0x565,0,0xfb14,0,0x574,0x56b,0,0xfb15,0,0x574,0x56d,0,0xfb17, +0,0x574,0x576,0,0xfb13,0,0x57e,0x576,0,0xfb16,0,0x1f00,0x3b9,0,0x1f80,0x1f88, +0x1f01,0x3b9,0,0x1f81,0x1f89,0x1f02,0x3b9,0,0x1f82,0x1f8a,0x1f03,0x3b9,0,0x1f83,0x1f8b,0x1f04, +0x3b9,0,0x1f84,0x1f8c,0x1f05,0x3b9,0,0x1f85,0x1f8d,0x1f06,0x3b9,0,0x1f86,0x1f8e,0x1f07,0x3b9, +0,0x1f87,0x1f8f,0x1f20,0x3b9,0,0x1f90,0x1f98,0x1f21,0x3b9,0,0x1f91,0x1f99,0x1f22,0x3b9,0, +0x1f92,0x1f9a,0x1f23,0x3b9,0,0x1f93,0x1f9b,0x1f24,0x3b9,0,0x1f94,0x1f9c,0x1f25,0x3b9,0,0x1f95, +0x1f9d,0x1f26,0x3b9,0,0x1f96,0x1f9e,0x1f27,0x3b9,0,0x1f97,0x1f9f,0x1f60,0x3b9,0,0x1fa0,0x1fa8, +0x1f61,0x3b9,0,0x1fa1,0x1fa9,0x1f62,0x3b9,0,0x1fa2,0x1faa,0x1f63,0x3b9,0,0x1fa3,0x1fab,0x1f64, +0x3b9,0,0x1fa4,0x1fac,0x1f65,0x3b9,0,0x1fa5,0x1fad,0x1f66,0x3b9,0,0x1fa6,0x1fae,0x1f67,0x3b9, +0,0x1fa7,0x1faf,0x1f70,0x3b9,0,0x1fb2,0,0x1f74,0x3b9,0,0x1fc2,0,0x1f7c,0x3b9,0, +0x1ff2,0 +}; + +static const UCaseProps ucase_props_singleton={ + NULL, + ucase_props_indexes, + ucase_props_exceptions, + ucase_props_unfold, + { + ucase_props_trieIndex, + ucase_props_trieIndex+3288, + NULL, + 3288, + 9068, + 0x188, + 0xd54, + 0x0, + 0x0, + 0xe0800, + 0x3040, + NULL, 0, FALSE, FALSE, 0, NULL + }, + { 4,0,0,0 } +}; + +#endif // INCLUDED_FROM_UCASE_CPP diff --git a/thirdparty/icu4c/common/ucasemap.cpp b/thirdparty/icu4c/common/ucasemap.cpp new file mode 100644 index 0000000000..ed72bda828 --- /dev/null +++ b/thirdparty/icu4c/common/ucasemap.cpp @@ -0,0 +1,953 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2005-2016, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: ucasemap.cpp +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2005may06 +* created by: Markus W. Scherer +* +* Case mapping service object and functions using it. +*/ + +#include "unicode/utypes.h" +#include "unicode/brkiter.h" +#include "unicode/bytestream.h" +#include "unicode/casemap.h" +#include "unicode/edits.h" +#include "unicode/stringoptions.h" +#include "unicode/stringpiece.h" +#include "unicode/ubrk.h" +#include "unicode/uloc.h" +#include "unicode/ustring.h" +#include "unicode/ucasemap.h" +#if !UCONFIG_NO_BREAK_ITERATION +#include "unicode/utext.h" +#endif +#include "unicode/utf.h" +#include "unicode/utf8.h" +#include "unicode/utf16.h" +#include "bytesinkutil.h" +#include "cmemory.h" +#include "cstring.h" +#include "uassert.h" +#include "ucase.h" +#include "ucasemap_imp.h" +#include "ustr_imp.h" + +U_NAMESPACE_USE + +/* UCaseMap service object -------------------------------------------------- */ + +UCaseMap::UCaseMap(const char *localeID, uint32_t opts, UErrorCode *pErrorCode) : +#if !UCONFIG_NO_BREAK_ITERATION + iter(NULL), +#endif + caseLocale(UCASE_LOC_UNKNOWN), options(opts) { + ucasemap_setLocale(this, localeID, pErrorCode); +} + +UCaseMap::~UCaseMap() { +#if !UCONFIG_NO_BREAK_ITERATION + delete iter; +#endif +} + +U_CAPI UCaseMap * U_EXPORT2 +ucasemap_open(const char *locale, uint32_t options, UErrorCode *pErrorCode) { + if(U_FAILURE(*pErrorCode)) { + return NULL; + } + UCaseMap *csm = new UCaseMap(locale, options, pErrorCode); + if(csm==NULL) { + *pErrorCode = U_MEMORY_ALLOCATION_ERROR; + return NULL; + } else if (U_FAILURE(*pErrorCode)) { + delete csm; + return NULL; + } + return csm; +} + +U_CAPI void U_EXPORT2 +ucasemap_close(UCaseMap *csm) { + delete csm; +} + +U_CAPI const char * U_EXPORT2 +ucasemap_getLocale(const UCaseMap *csm) { + return csm->locale; +} + +U_CAPI uint32_t U_EXPORT2 +ucasemap_getOptions(const UCaseMap *csm) { + return csm->options; +} + +U_CAPI void U_EXPORT2 +ucasemap_setLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode) { + if(U_FAILURE(*pErrorCode)) { + return; + } + if (locale != NULL && *locale == 0) { + csm->locale[0] = 0; + csm->caseLocale = UCASE_LOC_ROOT; + return; + } + + int32_t length=uloc_getName(locale, csm->locale, (int32_t)sizeof(csm->locale), pErrorCode); + if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR || length==sizeof(csm->locale)) { + *pErrorCode=U_ZERO_ERROR; + /* we only really need the language code for case mappings */ + length=uloc_getLanguage(locale, csm->locale, (int32_t)sizeof(csm->locale), pErrorCode); + } + if(length==sizeof(csm->locale)) { + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } + if(U_SUCCESS(*pErrorCode)) { + csm->caseLocale=UCASE_LOC_UNKNOWN; + csm->caseLocale = ucase_getCaseLocale(csm->locale); + } else { + csm->locale[0]=0; + csm->caseLocale = UCASE_LOC_ROOT; + } +} + +U_CAPI void U_EXPORT2 +ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode *pErrorCode) { + if(U_FAILURE(*pErrorCode)) { + return; + } + csm->options=options; +} + +/* UTF-8 string case mappings ----------------------------------------------- */ + +/* TODO(markus): Move to a new, separate utf8case.cpp file. */ + +namespace { + +/* append a full case mapping result, see UCASE_MAX_STRING_LENGTH */ +inline UBool +appendResult(int32_t cpLength, int32_t result, const UChar *s, + ByteSink &sink, uint32_t options, icu::Edits *edits, UErrorCode &errorCode) { + U_ASSERT(U_SUCCESS(errorCode)); + + /* decode the result */ + if(result<0) { + /* (not) original code point */ + if(edits!=NULL) { + edits->addUnchanged(cpLength); + } + if((options & U_OMIT_UNCHANGED_TEXT) == 0) { + ByteSinkUtil::appendCodePoint(cpLength, ~result, sink); + } + } else { + if(result<=UCASE_MAX_STRING_LENGTH) { + // string: "result" is the UTF-16 length + return ByteSinkUtil::appendChange(cpLength, s, result, sink, edits, errorCode); + } else { + ByteSinkUtil::appendCodePoint(cpLength, result, sink, edits); + } + } + return TRUE; +} + +// See unicode/utf8.h U8_APPEND_UNSAFE(). +inline uint8_t getTwoByteLead(UChar32 c) { return (uint8_t)((c >> 6) | 0xc0); } +inline uint8_t getTwoByteTrail(UChar32 c) { return (uint8_t)((c & 0x3f) | 0x80); } + +UChar32 U_CALLCONV +utf8_caseContextIterator(void *context, int8_t dir) { + UCaseContext *csc=(UCaseContext *)context; + UChar32 c; + + if(dir<0) { + /* reset for backward iteration */ + csc->index=csc->cpStart; + csc->dir=dir; + } else if(dir>0) { + /* reset for forward iteration */ + csc->index=csc->cpLimit; + csc->dir=dir; + } else { + /* continue current iteration direction */ + dir=csc->dir; + } + + if(dir<0) { + if(csc->start<csc->index) { + U8_PREV((const uint8_t *)csc->p, csc->start, csc->index, c); + return c; + } + } else { + if(csc->index<csc->limit) { + U8_NEXT((const uint8_t *)csc->p, csc->index, csc->limit, c); + return c; + } + } + return U_SENTINEL; +} + +/** + * caseLocale >= 0: Lowercases [srcStart..srcLimit[ but takes context [0..srcLength[ into account. + * caseLocale < 0: Case-folds [srcStart..srcLimit[. + */ +void toLower(int32_t caseLocale, uint32_t options, + const uint8_t *src, UCaseContext *csc, int32_t srcStart, int32_t srcLimit, + icu::ByteSink &sink, icu::Edits *edits, UErrorCode &errorCode) { + const int8_t *latinToLower; + if (caseLocale == UCASE_LOC_ROOT || + (caseLocale >= 0 ? + !(caseLocale == UCASE_LOC_TURKISH || caseLocale == UCASE_LOC_LITHUANIAN) : + (options & _FOLD_CASE_OPTIONS_MASK) == U_FOLD_CASE_DEFAULT)) { + latinToLower = LatinCase::TO_LOWER_NORMAL; + } else { + latinToLower = LatinCase::TO_LOWER_TR_LT; + } + const UTrie2 *trie = ucase_getTrie(); + int32_t prev = srcStart; + int32_t srcIndex = srcStart; + for (;;) { + // fast path for simple cases + int32_t cpStart; + UChar32 c; + for (;;) { + if (U_FAILURE(errorCode) || srcIndex >= srcLimit) { + c = U_SENTINEL; + break; + } + uint8_t lead = src[srcIndex++]; + if (lead <= 0x7f) { + int8_t d = latinToLower[lead]; + if (d == LatinCase::EXC) { + cpStart = srcIndex - 1; + c = lead; + break; + } + if (d == 0) { continue; } + ByteSinkUtil::appendUnchanged(src + prev, srcIndex - 1 - prev, + sink, options, edits, errorCode); + char ascii = (char)(lead + d); + sink.Append(&ascii, 1); + if (edits != nullptr) { + edits->addReplace(1, 1); + } + prev = srcIndex; + continue; + } else if (lead < 0xe3) { + uint8_t t; + if (0xc2 <= lead && lead <= 0xc5 && srcIndex < srcLimit && + (t = src[srcIndex] - 0x80) <= 0x3f) { + // U+0080..U+017F + ++srcIndex; + c = ((lead - 0xc0) << 6) | t; + int8_t d = latinToLower[c]; + if (d == LatinCase::EXC) { + cpStart = srcIndex - 2; + break; + } + if (d == 0) { continue; } + ByteSinkUtil::appendUnchanged(src + prev, srcIndex - 2 - prev, + sink, options, edits, errorCode); + ByteSinkUtil::appendTwoBytes(c + d, sink); + if (edits != nullptr) { + edits->addReplace(2, 2); + } + prev = srcIndex; + continue; + } + } else if ((lead <= 0xe9 || lead == 0xeb || lead == 0xec) && + (srcIndex + 2) <= srcLimit && + U8_IS_TRAIL(src[srcIndex]) && U8_IS_TRAIL(src[srcIndex + 1])) { + // most of CJK: no case mappings + srcIndex += 2; + continue; + } + cpStart = --srcIndex; + U8_NEXT(src, srcIndex, srcLimit, c); + if (c < 0) { + // ill-formed UTF-8 + continue; + } + uint16_t props = UTRIE2_GET16(trie, c); + if (UCASE_HAS_EXCEPTION(props)) { break; } + int32_t delta; + if (!UCASE_IS_UPPER_OR_TITLE(props) || (delta = UCASE_GET_DELTA(props)) == 0) { + continue; + } + ByteSinkUtil::appendUnchanged(src + prev, cpStart - prev, + sink, options, edits, errorCode); + ByteSinkUtil::appendCodePoint(srcIndex - cpStart, c + delta, sink, edits); + prev = srcIndex; + } + if (c < 0) { + break; + } + // slow path + const UChar *s; + if (caseLocale >= 0) { + csc->cpStart = cpStart; + csc->cpLimit = srcIndex; + c = ucase_toFullLower(c, utf8_caseContextIterator, csc, &s, caseLocale); + } else { + c = ucase_toFullFolding(c, &s, options); + } + if (c >= 0) { + ByteSinkUtil::appendUnchanged(src + prev, cpStart - prev, + sink, options, edits, errorCode); + appendResult(srcIndex - cpStart, c, s, sink, options, edits, errorCode); + prev = srcIndex; + } + } + ByteSinkUtil::appendUnchanged(src + prev, srcIndex - prev, + sink, options, edits, errorCode); +} + +void toUpper(int32_t caseLocale, uint32_t options, + const uint8_t *src, UCaseContext *csc, int32_t srcLength, + icu::ByteSink &sink, icu::Edits *edits, UErrorCode &errorCode) { + const int8_t *latinToUpper; + if (caseLocale == UCASE_LOC_TURKISH) { + latinToUpper = LatinCase::TO_UPPER_TR; + } else { + latinToUpper = LatinCase::TO_UPPER_NORMAL; + } + const UTrie2 *trie = ucase_getTrie(); + int32_t prev = 0; + int32_t srcIndex = 0; + for (;;) { + // fast path for simple cases + int32_t cpStart; + UChar32 c; + for (;;) { + if (U_FAILURE(errorCode) || srcIndex >= srcLength) { + c = U_SENTINEL; + break; + } + uint8_t lead = src[srcIndex++]; + if (lead <= 0x7f) { + int8_t d = latinToUpper[lead]; + if (d == LatinCase::EXC) { + cpStart = srcIndex - 1; + c = lead; + break; + } + if (d == 0) { continue; } + ByteSinkUtil::appendUnchanged(src + prev, srcIndex - 1 - prev, + sink, options, edits, errorCode); + char ascii = (char)(lead + d); + sink.Append(&ascii, 1); + if (edits != nullptr) { + edits->addReplace(1, 1); + } + prev = srcIndex; + continue; + } else if (lead < 0xe3) { + uint8_t t; + if (0xc2 <= lead && lead <= 0xc5 && srcIndex < srcLength && + (t = src[srcIndex] - 0x80) <= 0x3f) { + // U+0080..U+017F + ++srcIndex; + c = ((lead - 0xc0) << 6) | t; + int8_t d = latinToUpper[c]; + if (d == LatinCase::EXC) { + cpStart = srcIndex - 2; + break; + } + if (d == 0) { continue; } + ByteSinkUtil::appendUnchanged(src + prev, srcIndex - 2 - prev, + sink, options, edits, errorCode); + ByteSinkUtil::appendTwoBytes(c + d, sink); + if (edits != nullptr) { + edits->addReplace(2, 2); + } + prev = srcIndex; + continue; + } + } else if ((lead <= 0xe9 || lead == 0xeb || lead == 0xec) && + (srcIndex + 2) <= srcLength && + U8_IS_TRAIL(src[srcIndex]) && U8_IS_TRAIL(src[srcIndex + 1])) { + // most of CJK: no case mappings + srcIndex += 2; + continue; + } + cpStart = --srcIndex; + U8_NEXT(src, srcIndex, srcLength, c); + if (c < 0) { + // ill-formed UTF-8 + continue; + } + uint16_t props = UTRIE2_GET16(trie, c); + if (UCASE_HAS_EXCEPTION(props)) { break; } + int32_t delta; + if (UCASE_GET_TYPE(props) != UCASE_LOWER || (delta = UCASE_GET_DELTA(props)) == 0) { + continue; + } + ByteSinkUtil::appendUnchanged(src + prev, cpStart - prev, + sink, options, edits, errorCode); + ByteSinkUtil::appendCodePoint(srcIndex - cpStart, c + delta, sink, edits); + prev = srcIndex; + } + if (c < 0) { + break; + } + // slow path + csc->cpStart = cpStart; + csc->cpLimit = srcIndex; + const UChar *s; + c = ucase_toFullUpper(c, utf8_caseContextIterator, csc, &s, caseLocale); + if (c >= 0) { + ByteSinkUtil::appendUnchanged(src + prev, cpStart - prev, + sink, options, edits, errorCode); + appendResult(srcIndex - cpStart, c, s, sink, options, edits, errorCode); + prev = srcIndex; + } + } + ByteSinkUtil::appendUnchanged(src + prev, srcIndex - prev, + sink, options, edits, errorCode); +} + +} // namespace + +#if !UCONFIG_NO_BREAK_ITERATION + +U_CFUNC void U_CALLCONV +ucasemap_internalUTF8ToTitle( + int32_t caseLocale, uint32_t options, BreakIterator *iter, + const uint8_t *src, int32_t srcLength, + ByteSink &sink, icu::Edits *edits, + UErrorCode &errorCode) { + if (!ustrcase_checkTitleAdjustmentOptions(options, errorCode)) { + return; + } + + /* set up local variables */ + UCaseContext csc=UCASECONTEXT_INITIALIZER; + csc.p=(void *)src; + csc.limit=srcLength; + int32_t prev=0; + UBool isFirstIndex=TRUE; + + /* titlecasing loop */ + while(prev<srcLength) { + /* find next index where to titlecase */ + int32_t index; + if(isFirstIndex) { + isFirstIndex=FALSE; + index=iter->first(); + } else { + index=iter->next(); + } + if(index==UBRK_DONE || index>srcLength) { + index=srcLength; + } + + /* + * Segment [prev..index[ into 3 parts: + * a) skipped characters (copy as-is) [prev..titleStart[ + * b) first letter (titlecase) [titleStart..titleLimit[ + * c) subsequent characters (lowercase) [titleLimit..index[ + */ + if(prev<index) { + /* find and copy skipped characters [prev..titleStart[ */ + int32_t titleStart=prev; + int32_t titleLimit=prev; + UChar32 c; + U8_NEXT(src, titleLimit, index, c); + if ((options&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0) { + // Adjust the titlecasing index to the next cased character, + // or to the next letter/number/symbol/private use. + // Stop with titleStart<titleLimit<=index + // if there is a character to be titlecased, + // or else stop with titleStart==titleLimit==index. + UBool toCased = (options&U_TITLECASE_ADJUST_TO_CASED) != 0; + while (toCased ? UCASE_NONE==ucase_getType(c) : !ustrcase_isLNS(c)) { + titleStart=titleLimit; + if(titleLimit==index) { + break; + } + U8_NEXT(src, titleLimit, index, c); + } + if (prev < titleStart) { + if (!ByteSinkUtil::appendUnchanged(src+prev, titleStart-prev, + sink, options, edits, errorCode)) { + return; + } + } + } + + if(titleStart<titleLimit) { + /* titlecase c which is from [titleStart..titleLimit[ */ + if(c>=0) { + csc.cpStart=titleStart; + csc.cpLimit=titleLimit; + const UChar *s; + c=ucase_toFullTitle(c, utf8_caseContextIterator, &csc, &s, caseLocale); + if (!appendResult(titleLimit-titleStart, c, s, sink, options, edits, errorCode)) { + return; + } + } else { + // Malformed UTF-8. + if (!ByteSinkUtil::appendUnchanged(src+titleStart, titleLimit-titleStart, + sink, options, edits, errorCode)) { + return; + } + } + + /* Special case Dutch IJ titlecasing */ + if (titleStart+1 < index && + caseLocale == UCASE_LOC_DUTCH && + (src[titleStart] == 0x0049 || src[titleStart] == 0x0069)) { + if (src[titleStart+1] == 0x006A) { + ByteSinkUtil::appendCodePoint(1, 0x004A, sink, edits); + titleLimit++; + } else if (src[titleStart+1] == 0x004A) { + // Keep the capital J from getting lowercased. + if (!ByteSinkUtil::appendUnchanged(src+titleStart+1, 1, + sink, options, edits, errorCode)) { + return; + } + titleLimit++; + } + } + + /* lowercase [titleLimit..index[ */ + if(titleLimit<index) { + if((options&U_TITLECASE_NO_LOWERCASE)==0) { + /* Normal operation: Lowercase the rest of the word. */ + toLower(caseLocale, options, + src, &csc, titleLimit, index, + sink, edits, errorCode); + if(U_FAILURE(errorCode)) { + return; + } + } else { + /* Optionally just copy the rest of the word unchanged. */ + if (!ByteSinkUtil::appendUnchanged(src+titleLimit, index-titleLimit, + sink, options, edits, errorCode)) { + return; + } + } + } + } + } + + prev=index; + } +} + +#endif + +U_NAMESPACE_BEGIN +namespace GreekUpper { + +UBool isFollowedByCasedLetter(const uint8_t *s, int32_t i, int32_t length) { + while (i < length) { + UChar32 c; + U8_NEXT(s, i, length, c); + int32_t type = ucase_getTypeOrIgnorable(c); + if ((type & UCASE_IGNORABLE) != 0) { + // Case-ignorable, continue with the loop. + } else if (type != UCASE_NONE) { + return TRUE; // Followed by cased letter. + } else { + return FALSE; // Uncased and not case-ignorable. + } + } + return FALSE; // Not followed by cased letter. +} + +// Keep this consistent with the UTF-16 version in ustrcase.cpp and the Java version in CaseMap.java. +void toUpper(uint32_t options, + const uint8_t *src, int32_t srcLength, + ByteSink &sink, Edits *edits, + UErrorCode &errorCode) { + uint32_t state = 0; + for (int32_t i = 0; i < srcLength;) { + int32_t nextIndex = i; + UChar32 c; + U8_NEXT(src, nextIndex, srcLength, c); + uint32_t nextState = 0; + int32_t type = ucase_getTypeOrIgnorable(c); + if ((type & UCASE_IGNORABLE) != 0) { + // c is case-ignorable + nextState |= (state & AFTER_CASED); + } else if (type != UCASE_NONE) { + // c is cased + nextState |= AFTER_CASED; + } + uint32_t data = getLetterData(c); + if (data > 0) { + uint32_t upper = data & UPPER_MASK; + // Add a dialytika to this iota or ypsilon vowel + // if we removed a tonos from the previous vowel, + // and that previous vowel did not also have (or gain) a dialytika. + // Adding one only to the final vowel in a longer sequence + // (which does not occur in normal writing) would require lookahead. + // Set the same flag as for preserving an existing dialytika. + if ((data & HAS_VOWEL) != 0 && (state & AFTER_VOWEL_WITH_ACCENT) != 0 && + (upper == 0x399 || upper == 0x3A5)) { + data |= HAS_DIALYTIKA; + } + int32_t numYpogegrammeni = 0; // Map each one to a trailing, spacing, capital iota. + if ((data & HAS_YPOGEGRAMMENI) != 0) { + numYpogegrammeni = 1; + } + // Skip combining diacritics after this Greek letter. + int32_t nextNextIndex = nextIndex; + while (nextIndex < srcLength) { + UChar32 c2; + U8_NEXT(src, nextNextIndex, srcLength, c2); + uint32_t diacriticData = getDiacriticData(c2); + if (diacriticData != 0) { + data |= diacriticData; + if ((diacriticData & HAS_YPOGEGRAMMENI) != 0) { + ++numYpogegrammeni; + } + nextIndex = nextNextIndex; + } else { + break; // not a Greek diacritic + } + } + if ((data & HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA) == HAS_VOWEL_AND_ACCENT) { + nextState |= AFTER_VOWEL_WITH_ACCENT; + } + // Map according to Greek rules. + UBool addTonos = FALSE; + if (upper == 0x397 && + (data & HAS_ACCENT) != 0 && + numYpogegrammeni == 0 && + (state & AFTER_CASED) == 0 && + !isFollowedByCasedLetter(src, nextIndex, srcLength)) { + // Keep disjunctive "or" with (only) a tonos. + // We use the same "word boundary" conditions as for the Final_Sigma test. + if (i == nextIndex) { + upper = 0x389; // Preserve the precomposed form. + } else { + addTonos = TRUE; + } + } else if ((data & HAS_DIALYTIKA) != 0) { + // Preserve a vowel with dialytika in precomposed form if it exists. + if (upper == 0x399) { + upper = 0x3AA; + data &= ~HAS_EITHER_DIALYTIKA; + } else if (upper == 0x3A5) { + upper = 0x3AB; + data &= ~HAS_EITHER_DIALYTIKA; + } + } + + UBool change; + if (edits == nullptr && (options & U_OMIT_UNCHANGED_TEXT) == 0) { + change = TRUE; // common, simple usage + } else { + // Find out first whether we are changing the text. + U_ASSERT(0x370 <= upper && upper <= 0x3ff); // 2-byte UTF-8, main Greek block + change = (i + 2) > nextIndex || + src[i] != getTwoByteLead(upper) || src[i + 1] != getTwoByteTrail(upper) || + numYpogegrammeni > 0; + int32_t i2 = i + 2; + if ((data & HAS_EITHER_DIALYTIKA) != 0) { + change |= (i2 + 2) > nextIndex || + src[i2] != (uint8_t)u8"\u0308"[0] || + src[i2 + 1] != (uint8_t)u8"\u0308"[1]; + i2 += 2; + } + if (addTonos) { + change |= (i2 + 2) > nextIndex || + src[i2] != (uint8_t)u8"\u0301"[0] || + src[i2 + 1] != (uint8_t)u8"\u0301"[1]; + i2 += 2; + } + int32_t oldLength = nextIndex - i; + int32_t newLength = (i2 - i) + numYpogegrammeni * 2; // 2 bytes per U+0399 + change |= oldLength != newLength; + if (change) { + if (edits != NULL) { + edits->addReplace(oldLength, newLength); + } + } else { + if (edits != NULL) { + edits->addUnchanged(oldLength); + } + // Write unchanged text? + change = (options & U_OMIT_UNCHANGED_TEXT) == 0; + } + } + + if (change) { + ByteSinkUtil::appendTwoBytes(upper, sink); + if ((data & HAS_EITHER_DIALYTIKA) != 0) { + sink.AppendU8(u8"\u0308", 2); // restore or add a dialytika + } + if (addTonos) { + sink.AppendU8(u8"\u0301", 2); + } + while (numYpogegrammeni > 0) { + sink.AppendU8(u8"\u0399", 2); + --numYpogegrammeni; + } + } + } else if(c>=0) { + const UChar *s; + c=ucase_toFullUpper(c, NULL, NULL, &s, UCASE_LOC_GREEK); + if (!appendResult(nextIndex - i, c, s, sink, options, edits, errorCode)) { + return; + } + } else { + // Malformed UTF-8. + if (!ByteSinkUtil::appendUnchanged(src+i, nextIndex-i, + sink, options, edits, errorCode)) { + return; + } + } + i = nextIndex; + state = nextState; + } +} + +} // namespace GreekUpper +U_NAMESPACE_END + +static void U_CALLCONV +ucasemap_internalUTF8ToLower(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED + const uint8_t *src, int32_t srcLength, + icu::ByteSink &sink, icu::Edits *edits, + UErrorCode &errorCode) { + UCaseContext csc=UCASECONTEXT_INITIALIZER; + csc.p=(void *)src; + csc.limit=srcLength; + toLower( + caseLocale, options, + src, &csc, 0, srcLength, + sink, edits, errorCode); +} + +static void U_CALLCONV +ucasemap_internalUTF8ToUpper(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED + const uint8_t *src, int32_t srcLength, + icu::ByteSink &sink, icu::Edits *edits, + UErrorCode &errorCode) { + if (caseLocale == UCASE_LOC_GREEK) { + GreekUpper::toUpper(options, src, srcLength, sink, edits, errorCode); + } else { + UCaseContext csc=UCASECONTEXT_INITIALIZER; + csc.p=(void *)src; + csc.limit=srcLength; + toUpper( + caseLocale, options, + src, &csc, srcLength, + sink, edits, errorCode); + } +} + +static void U_CALLCONV +ucasemap_internalUTF8Fold(int32_t /* caseLocale */, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED + const uint8_t *src, int32_t srcLength, + icu::ByteSink &sink, icu::Edits *edits, + UErrorCode &errorCode) { + toLower( + -1, options, + src, nullptr, 0, srcLength, + sink, edits, errorCode); +} + +void +ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM + const char *src, int32_t srcLength, + UTF8CaseMapper *stringCaseMapper, + icu::ByteSink &sink, icu::Edits *edits, + UErrorCode &errorCode) { + /* check argument values */ + if (U_FAILURE(errorCode)) { + return; + } + if ((src == nullptr && srcLength != 0) || srcLength < -1) { + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + // Get the string length. + if (srcLength == -1) { + srcLength = (int32_t)uprv_strlen((const char *)src); + } + + if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) { + edits->reset(); + } + stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR + (const uint8_t *)src, srcLength, sink, edits, errorCode); + sink.Flush(); + if (U_SUCCESS(errorCode)) { + if (edits != nullptr) { + edits->copyErrorTo(errorCode); + } + } +} + +int32_t +ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM + char *dest, int32_t destCapacity, + const char *src, int32_t srcLength, + UTF8CaseMapper *stringCaseMapper, + icu::Edits *edits, + UErrorCode &errorCode) { + /* check argument values */ + if(U_FAILURE(errorCode)) { + return 0; + } + if( destCapacity<0 || + (dest==NULL && destCapacity>0) || + (src==NULL && srcLength!=0) || srcLength<-1 + ) { + errorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + /* get the string length */ + if(srcLength==-1) { + srcLength=(int32_t)uprv_strlen((const char *)src); + } + + /* check for overlapping source and destination */ + if( dest!=NULL && + ((src>=dest && src<(dest+destCapacity)) || + (dest>=src && dest<(src+srcLength))) + ) { + errorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + CheckedArrayByteSink sink(dest, destCapacity); + if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) { + edits->reset(); + } + stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR + (const uint8_t *)src, srcLength, sink, edits, errorCode); + sink.Flush(); + if (U_SUCCESS(errorCode)) { + if (sink.Overflowed()) { + errorCode = U_BUFFER_OVERFLOW_ERROR; + } else if (edits != nullptr) { + edits->copyErrorTo(errorCode); + } + } + return u_terminateChars(dest, destCapacity, sink.NumberOfBytesAppended(), &errorCode); +} + +/* public API functions */ + +U_CAPI int32_t U_EXPORT2 +ucasemap_utf8ToLower(const UCaseMap *csm, + char *dest, int32_t destCapacity, + const char *src, int32_t srcLength, + UErrorCode *pErrorCode) { + return ucasemap_mapUTF8( + csm->caseLocale, csm->options, UCASEMAP_BREAK_ITERATOR_NULL + dest, destCapacity, + src, srcLength, + ucasemap_internalUTF8ToLower, NULL, *pErrorCode); +} + +U_CAPI int32_t U_EXPORT2 +ucasemap_utf8ToUpper(const UCaseMap *csm, + char *dest, int32_t destCapacity, + const char *src, int32_t srcLength, + UErrorCode *pErrorCode) { + return ucasemap_mapUTF8( + csm->caseLocale, csm->options, UCASEMAP_BREAK_ITERATOR_NULL + dest, destCapacity, + src, srcLength, + ucasemap_internalUTF8ToUpper, NULL, *pErrorCode); +} + +U_CAPI int32_t U_EXPORT2 +ucasemap_utf8FoldCase(const UCaseMap *csm, + char *dest, int32_t destCapacity, + const char *src, int32_t srcLength, + UErrorCode *pErrorCode) { + return ucasemap_mapUTF8( + UCASE_LOC_ROOT, csm->options, UCASEMAP_BREAK_ITERATOR_NULL + dest, destCapacity, + src, srcLength, + ucasemap_internalUTF8Fold, NULL, *pErrorCode); +} + +U_NAMESPACE_BEGIN + +void CaseMap::utf8ToLower( + const char *locale, uint32_t options, + StringPiece src, ByteSink &sink, Edits *edits, + UErrorCode &errorCode) { + ucasemap_mapUTF8( + ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL + src.data(), src.length(), + ucasemap_internalUTF8ToLower, sink, edits, errorCode); +} + +void CaseMap::utf8ToUpper( + const char *locale, uint32_t options, + StringPiece src, ByteSink &sink, Edits *edits, + UErrorCode &errorCode) { + ucasemap_mapUTF8( + ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL + src.data(), src.length(), + ucasemap_internalUTF8ToUpper, sink, edits, errorCode); +} + +void CaseMap::utf8Fold( + uint32_t options, + StringPiece src, ByteSink &sink, Edits *edits, + UErrorCode &errorCode) { + ucasemap_mapUTF8( + UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL + src.data(), src.length(), + ucasemap_internalUTF8Fold, sink, edits, errorCode); +} + +int32_t CaseMap::utf8ToLower( + const char *locale, uint32_t options, + const char *src, int32_t srcLength, + char *dest, int32_t destCapacity, Edits *edits, + UErrorCode &errorCode) { + return ucasemap_mapUTF8( + ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL + dest, destCapacity, + src, srcLength, + ucasemap_internalUTF8ToLower, edits, errorCode); +} + +int32_t CaseMap::utf8ToUpper( + const char *locale, uint32_t options, + const char *src, int32_t srcLength, + char *dest, int32_t destCapacity, Edits *edits, + UErrorCode &errorCode) { + return ucasemap_mapUTF8( + ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL + dest, destCapacity, + src, srcLength, + ucasemap_internalUTF8ToUpper, edits, errorCode); +} + +int32_t CaseMap::utf8Fold( + uint32_t options, + const char *src, int32_t srcLength, + char *dest, int32_t destCapacity, Edits *edits, + UErrorCode &errorCode) { + return ucasemap_mapUTF8( + UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL + dest, destCapacity, + src, srcLength, + ucasemap_internalUTF8Fold, edits, errorCode); +} + +U_NAMESPACE_END diff --git a/thirdparty/icu4c/common/ucasemap_imp.h b/thirdparty/icu4c/common/ucasemap_imp.h new file mode 100644 index 0000000000..e17a0ae5a3 --- /dev/null +++ b/thirdparty/icu4c/common/ucasemap_imp.h @@ -0,0 +1,282 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// ucasemap_imp.h +// created: 2017feb08 Markus W. Scherer + +#ifndef __UCASEMAP_IMP_H__ +#define __UCASEMAP_IMP_H__ + +#include "unicode/utypes.h" +#include "unicode/ucasemap.h" +#include "unicode/uchar.h" +#include "ucase.h" + +/** + * Bit mask for the titlecasing iterator options bit field. + * Currently only 3 out of 8 values are used: + * 0 (words), U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES. + * See stringoptions.h. + * @internal + */ +#define U_TITLECASE_ITERATOR_MASK 0xe0 + +/** + * Bit mask for the titlecasing index adjustment options bit set. + * Currently two bits are defined: + * U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED. + * See stringoptions.h. + * @internal + */ +#define U_TITLECASE_ADJUSTMENT_MASK 0x600 + +/** + * Internal API, used by u_strcasecmp() etc. + * Compare strings case-insensitively, + * in code point order or code unit order. + */ +U_CFUNC int32_t +u_strcmpFold(const UChar *s1, int32_t length1, + const UChar *s2, int32_t length2, + uint32_t options, + UErrorCode *pErrorCode); + +/** + * Internal API, used for detecting length of + * shared prefix case-insensitively. + * @param s1 input string 1 + * @param length1 length of string 1, or -1 (NULL terminated) + * @param s2 input string 2 + * @param length2 length of string 2, or -1 (NULL terminated) + * @param options compare options + * @param matchLen1 (output) length of partial prefix match in s1 + * @param matchLen2 (output) length of partial prefix match in s2 + * @param pErrorCode receives error status + */ +U_CAPI void +u_caseInsensitivePrefixMatch(const UChar *s1, int32_t length1, + const UChar *s2, int32_t length2, + uint32_t options, + int32_t *matchLen1, int32_t *matchLen2, + UErrorCode *pErrorCode); + +#ifdef __cplusplus + +U_NAMESPACE_BEGIN + +class BreakIterator; // unicode/brkiter.h +class ByteSink; +class Locale; // unicode/locid.h + +/** Returns true if the options are valid. Otherwise false, and sets an error. */ +inline UBool ustrcase_checkTitleAdjustmentOptions(uint32_t options, UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { return false; } + if ((options & U_TITLECASE_ADJUSTMENT_MASK) == U_TITLECASE_ADJUSTMENT_MASK) { + // Both options together. + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + return false; + } + return true; +} + +inline UBool ustrcase_isLNS(UChar32 c) { + // Letter, number, symbol, + // or a private use code point because those are typically used as letters or numbers. + // Consider modifier letters only if they are cased. + const uint32_t LNS = (U_GC_L_MASK|U_GC_N_MASK|U_GC_S_MASK|U_GC_CO_MASK) & ~U_GC_LM_MASK; + int gc = u_charType(c); + return (U_MASK(gc) & LNS) != 0 || (gc == U_MODIFIER_LETTER && ucase_getType(c) != UCASE_NONE); +} + +#if !UCONFIG_NO_BREAK_ITERATION + +/** Returns nullptr if error. Pass in either locale or locID, not both. */ +U_CFUNC +BreakIterator *ustrcase_getTitleBreakIterator( + const Locale *locale, const char *locID, uint32_t options, BreakIterator *iter, + LocalPointer<BreakIterator> &ownedIter, UErrorCode &errorCode); + +#endif + +U_NAMESPACE_END + +#include "unicode/unistr.h" // for UStringCaseMapper + +/* + * Internal string casing functions implementing + * ustring.h/ustrcase.cpp and UnicodeString case mapping functions. + */ + +struct UCaseMap : public icu::UMemory { + /** Implements most of ucasemap_open(). */ + UCaseMap(const char *localeID, uint32_t opts, UErrorCode *pErrorCode); + ~UCaseMap(); + +#if !UCONFIG_NO_BREAK_ITERATION + icu::BreakIterator *iter; /* We adopt the iterator, so we own it. */ +#endif + char locale[32]; + int32_t caseLocale; + uint32_t options; +}; + +#if UCONFIG_NO_BREAK_ITERATION +# define UCASEMAP_BREAK_ITERATOR_PARAM +# define UCASEMAP_BREAK_ITERATOR_UNUSED +# define UCASEMAP_BREAK_ITERATOR +# define UCASEMAP_BREAK_ITERATOR_NULL +#else +# define UCASEMAP_BREAK_ITERATOR_PARAM icu::BreakIterator *iter, +# define UCASEMAP_BREAK_ITERATOR_UNUSED icu::BreakIterator *, +# define UCASEMAP_BREAK_ITERATOR iter, +# define UCASEMAP_BREAK_ITERATOR_NULL NULL, +#endif + +U_CFUNC int32_t +ustrcase_getCaseLocale(const char *locale); + +// TODO: swap src / dest if approved for new public api +/** Implements UStringCaseMapper. */ +U_CFUNC int32_t U_CALLCONV +ustrcase_internalToLower(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM + UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + icu::Edits *edits, + UErrorCode &errorCode); + +/** Implements UStringCaseMapper. */ +U_CFUNC int32_t U_CALLCONV +ustrcase_internalToUpper(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM + UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + icu::Edits *edits, + UErrorCode &errorCode); + +#if !UCONFIG_NO_BREAK_ITERATION + +/** Implements UStringCaseMapper. */ +U_CFUNC int32_t U_CALLCONV +ustrcase_internalToTitle(int32_t caseLocale, uint32_t options, + icu::BreakIterator *iter, + UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + icu::Edits *edits, + UErrorCode &errorCode); + +#endif + +/** Implements UStringCaseMapper. */ +U_CFUNC int32_t U_CALLCONV +ustrcase_internalFold(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM + UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + icu::Edits *edits, + UErrorCode &errorCode); + +/** + * Common string case mapping implementation for ucasemap_toXyz() and UnicodeString::toXyz(). + * Implements argument checking. + */ +U_CFUNC int32_t +ustrcase_map(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM + UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + UStringCaseMapper *stringCaseMapper, + icu::Edits *edits, + UErrorCode &errorCode); + +/** + * Common string case mapping implementation for old-fashioned u_strToXyz() functions + * that allow the source string to overlap the destination buffer. + * Implements argument checking and internally works with an intermediate buffer if necessary. + */ +U_CFUNC int32_t +ustrcase_mapWithOverlap(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM + UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + UStringCaseMapper *stringCaseMapper, + UErrorCode &errorCode); + +/** + * UTF-8 string case mapping function type, used by ucasemap_mapUTF8(). + * UTF-8 version of UStringCaseMapper. + * All error checking must be done. + * The UCaseMap must be fully initialized, with locale and/or iter set as needed. + */ +typedef void U_CALLCONV +UTF8CaseMapper(int32_t caseLocale, uint32_t options, +#if !UCONFIG_NO_BREAK_ITERATION + icu::BreakIterator *iter, +#endif + const uint8_t *src, int32_t srcLength, + icu::ByteSink &sink, icu::Edits *edits, + UErrorCode &errorCode); + +#if !UCONFIG_NO_BREAK_ITERATION + +/** Implements UTF8CaseMapper. */ +U_CFUNC void U_CALLCONV +ucasemap_internalUTF8ToTitle(int32_t caseLocale, uint32_t options, + icu::BreakIterator *iter, + const uint8_t *src, int32_t srcLength, + icu::ByteSink &sink, icu::Edits *edits, + UErrorCode &errorCode); + +#endif + +void +ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM + const char *src, int32_t srcLength, + UTF8CaseMapper *stringCaseMapper, + icu::ByteSink &sink, icu::Edits *edits, + UErrorCode &errorCode); + +/** + * Implements argument checking and buffer handling + * for UTF-8 string case mapping as a common function. + */ +int32_t +ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM + char *dest, int32_t destCapacity, + const char *src, int32_t srcLength, + UTF8CaseMapper *stringCaseMapper, + icu::Edits *edits, + UErrorCode &errorCode); + +U_NAMESPACE_BEGIN +namespace GreekUpper { + +// Data bits. +static const uint32_t UPPER_MASK = 0x3ff; +static const uint32_t HAS_VOWEL = 0x1000; +static const uint32_t HAS_YPOGEGRAMMENI = 0x2000; +static const uint32_t HAS_ACCENT = 0x4000; +static const uint32_t HAS_DIALYTIKA = 0x8000; +// Further bits during data building and processing, not stored in the data map. +static const uint32_t HAS_COMBINING_DIALYTIKA = 0x10000; +static const uint32_t HAS_OTHER_GREEK_DIACRITIC = 0x20000; + +static const uint32_t HAS_VOWEL_AND_ACCENT = HAS_VOWEL | HAS_ACCENT; +static const uint32_t HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA = + HAS_VOWEL_AND_ACCENT | HAS_DIALYTIKA; +static const uint32_t HAS_EITHER_DIALYTIKA = HAS_DIALYTIKA | HAS_COMBINING_DIALYTIKA; + +// State bits. +static const uint32_t AFTER_CASED = 1; +static const uint32_t AFTER_VOWEL_WITH_ACCENT = 2; + +uint32_t getLetterData(UChar32 c); + +/** + * Returns a non-zero value for each of the Greek combining diacritics + * listed in The Unicode Standard, version 8, chapter 7.2 Greek, + * plus some perispomeni look-alikes. + */ +uint32_t getDiacriticData(UChar32 c); + +} // namespace GreekUpper +U_NAMESPACE_END + +#endif // __cplusplus + +#endif // __UCASEMAP_IMP_H__ diff --git a/thirdparty/icu4c/common/ucasemap_titlecase_brkiter.cpp b/thirdparty/icu4c/common/ucasemap_titlecase_brkiter.cpp new file mode 100644 index 0000000000..c21dfb7698 --- /dev/null +++ b/thirdparty/icu4c/common/ucasemap_titlecase_brkiter.cpp @@ -0,0 +1,134 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2011, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* file name: ucasemap_titlecase_brkiter.cpp +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2011jun02 +* created by: Markus W. Scherer +* +* Titlecasing functions that are based on BreakIterator +* were moved here to break dependency cycles among parts of the common library. +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_BREAK_ITERATION + +#include "unicode/brkiter.h" +#include "unicode/ubrk.h" +#include "unicode/casemap.h" +#include "unicode/ucasemap.h" +#include "cmemory.h" +#include "ucase.h" +#include "ucasemap_imp.h" + +U_NAMESPACE_BEGIN + +void CaseMap::utf8ToTitle( + const char *locale, uint32_t options, BreakIterator *iter, + StringPiece src, ByteSink &sink, Edits *edits, + UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { + return; + } + UText utext = UTEXT_INITIALIZER; + utext_openUTF8(&utext, src.data(), src.length(), &errorCode); + LocalPointer<BreakIterator> ownedIter; + iter = ustrcase_getTitleBreakIterator(nullptr, locale, options, iter, ownedIter, errorCode); + if (iter == nullptr) { + utext_close(&utext); + return; + } + iter->setText(&utext, errorCode); + ucasemap_mapUTF8( + ustrcase_getCaseLocale(locale), options, iter, + src.data(), src.length(), + ucasemap_internalUTF8ToTitle, sink, edits, errorCode); + utext_close(&utext); +} + +int32_t CaseMap::utf8ToTitle( + const char *locale, uint32_t options, BreakIterator *iter, + const char *src, int32_t srcLength, + char *dest, int32_t destCapacity, Edits *edits, + UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { + return 0; + } + UText utext=UTEXT_INITIALIZER; + utext_openUTF8(&utext, src, srcLength, &errorCode); + LocalPointer<BreakIterator> ownedIter; + iter = ustrcase_getTitleBreakIterator(nullptr, locale, options, iter, ownedIter, errorCode); + if(iter==NULL) { + utext_close(&utext); + return 0; + } + iter->setText(&utext, errorCode); + int32_t length=ucasemap_mapUTF8( + ustrcase_getCaseLocale(locale), options, iter, + dest, destCapacity, + src, srcLength, + ucasemap_internalUTF8ToTitle, edits, errorCode); + utext_close(&utext); + return length; +} + +U_NAMESPACE_END + +U_NAMESPACE_USE + +U_CAPI const UBreakIterator * U_EXPORT2 +ucasemap_getBreakIterator(const UCaseMap *csm) { + return reinterpret_cast<UBreakIterator *>(csm->iter); +} + +U_CAPI void U_EXPORT2 +ucasemap_setBreakIterator(UCaseMap *csm, UBreakIterator *iterToAdopt, UErrorCode *pErrorCode) { + if(U_FAILURE(*pErrorCode)) { + return; + } + delete csm->iter; + csm->iter=reinterpret_cast<BreakIterator *>(iterToAdopt); +} + +U_CAPI int32_t U_EXPORT2 +ucasemap_utf8ToTitle(UCaseMap *csm, + char *dest, int32_t destCapacity, + const char *src, int32_t srcLength, + UErrorCode *pErrorCode) { + if (U_FAILURE(*pErrorCode)) { + return 0; + } + UText utext=UTEXT_INITIALIZER; + utext_openUTF8(&utext, (const char *)src, srcLength, pErrorCode); + if (U_FAILURE(*pErrorCode)) { + return 0; + } + if(csm->iter==NULL) { + LocalPointer<BreakIterator> ownedIter; + BreakIterator *iter = ustrcase_getTitleBreakIterator( + nullptr, csm->locale, csm->options, nullptr, ownedIter, *pErrorCode); + if (iter == nullptr) { + utext_close(&utext); + return 0; + } + csm->iter = ownedIter.orphan(); + } + csm->iter->setText(&utext, *pErrorCode); + int32_t length=ucasemap_mapUTF8( + csm->caseLocale, csm->options, csm->iter, + dest, destCapacity, + src, srcLength, + ucasemap_internalUTF8ToTitle, NULL, *pErrorCode); + utext_close(&utext); + return length; +} + +#endif // !UCONFIG_NO_BREAK_ITERATION diff --git a/thirdparty/icu4c/common/ucat.cpp b/thirdparty/icu4c/common/ucat.cpp new file mode 100644 index 0000000000..dac56eeb5c --- /dev/null +++ b/thirdparty/icu4c/common/ucat.cpp @@ -0,0 +1,78 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (c) 2003, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* Author: Alan Liu +* Created: March 19 2003 +* Since: ICU 2.6 +********************************************************************** +*/ +#include "unicode/ucat.h" +#include "unicode/ustring.h" +#include "cstring.h" +#include "uassert.h" + +/* Separator between set_num and msg_num */ +static const char SEPARATOR = '%'; + +/* Maximum length of a set_num/msg_num key, incl. terminating zero. + * Longest possible key is "-2147483648%-2147483648" */ +#define MAX_KEY_LEN (24) + +/** + * Fill in buffer with a set_num/msg_num key string, given the numeric + * values. Numeric values must be >= 0. Buffer must be of length + * MAX_KEY_LEN or more. + */ +static char* +_catkey(char* buffer, int32_t set_num, int32_t msg_num) { + int32_t i = 0; + i = T_CString_integerToString(buffer, set_num, 10); + buffer[i++] = SEPARATOR; + T_CString_integerToString(buffer+i, msg_num, 10); + return buffer; +} + +U_CAPI u_nl_catd U_EXPORT2 +u_catopen(const char* name, const char* locale, UErrorCode* ec) { + return (u_nl_catd) ures_open(name, locale, ec); +} + +U_CAPI void U_EXPORT2 +u_catclose(u_nl_catd catd) { + ures_close((UResourceBundle*) catd); /* may be NULL */ +} + +U_CAPI const UChar* U_EXPORT2 +u_catgets(u_nl_catd catd, int32_t set_num, int32_t msg_num, + const UChar* s, + int32_t* len, UErrorCode* ec) { + + char key[MAX_KEY_LEN]; + const UChar* result; + + if (ec == NULL || U_FAILURE(*ec)) { + goto ERROR; + } + + result = ures_getStringByKey((const UResourceBundle*) catd, + _catkey(key, set_num, msg_num), + len, ec); + if (U_FAILURE(*ec)) { + goto ERROR; + } + + return result; + + ERROR: + /* In case of any failure, return s */ + if (len != NULL) { + *len = u_strlen(s); + } + return s; +} + +/*eof*/ diff --git a/thirdparty/icu4c/common/uchar.cpp b/thirdparty/icu4c/common/uchar.cpp new file mode 100644 index 0000000000..eb14e4c75d --- /dev/null +++ b/thirdparty/icu4c/common/uchar.cpp @@ -0,0 +1,730 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************** +* Copyright (C) 1996-2016, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************** +* +* File UCHAR.C +* +* Modification History: +* +* Date Name Description +* 04/02/97 aliu Creation. +* 4/15/99 Madhu Updated all the function definitions for C Implementation +* 5/20/99 Madhu Added the function u_getVersion() +* 8/19/1999 srl Upgraded scripts to Unicode3.0 +* 11/11/1999 weiv added u_isalnum(), cleaned comments +* 01/11/2000 helena Renamed u_getVersion to u_getUnicodeVersion. +* 06/20/2000 helena OS/400 port changes; mostly typecast. +****************************************************************************** +*/ + +#include "unicode/utypes.h" +#include "unicode/uchar.h" +#include "unicode/uscript.h" +#include "unicode/udata.h" +#include "uassert.h" +#include "cmemory.h" +#include "ucln_cmn.h" +#include "utrie2.h" +#include "udataswp.h" +#include "uprops.h" +#include "ustr_imp.h" + +/* uchar_props_data.h is machine-generated by genprops --csource */ +#define INCLUDED_FROM_UCHAR_C +#include "uchar_props_data.h" + +/* constants and macros for access to the data ------------------------------ */ + +/* getting a uint32_t properties word from the data */ +#define GET_PROPS(c, result) ((result)=UTRIE2_GET16(&propsTrie, c)) + +/* API functions ------------------------------------------------------------ */ + +/* Gets the Unicode character's general category.*/ +U_CAPI int8_t U_EXPORT2 +u_charType(UChar32 c) { + uint32_t props; + GET_PROPS(c, props); + return (int8_t)GET_CATEGORY(props); +} + +/* Enumerate all code points with their general categories. */ +struct _EnumTypeCallback { + UCharEnumTypeRange *enumRange; + const void *context; +}; + +static uint32_t U_CALLCONV +_enumTypeValue(const void *context, uint32_t value) { + (void)context; + return GET_CATEGORY(value); +} + +static UBool U_CALLCONV +_enumTypeRange(const void *context, UChar32 start, UChar32 end, uint32_t value) { + /* just cast the value to UCharCategory */ + return ((struct _EnumTypeCallback *)context)-> + enumRange(((struct _EnumTypeCallback *)context)->context, + start, end+1, (UCharCategory)value); +} + +U_CAPI void U_EXPORT2 +u_enumCharTypes(UCharEnumTypeRange *enumRange, const void *context) { + struct _EnumTypeCallback callback; + + if(enumRange==NULL) { + return; + } + + callback.enumRange=enumRange; + callback.context=context; + utrie2_enum(&propsTrie, _enumTypeValue, _enumTypeRange, &callback); +} + +/* Checks if ch is a lower case letter.*/ +U_CAPI UBool U_EXPORT2 +u_islower(UChar32 c) { + uint32_t props; + GET_PROPS(c, props); + return (UBool)(GET_CATEGORY(props)==U_LOWERCASE_LETTER); +} + +/* Checks if ch is an upper case letter.*/ +U_CAPI UBool U_EXPORT2 +u_isupper(UChar32 c) { + uint32_t props; + GET_PROPS(c, props); + return (UBool)(GET_CATEGORY(props)==U_UPPERCASE_LETTER); +} + +/* Checks if ch is a title case letter; usually upper case letters.*/ +U_CAPI UBool U_EXPORT2 +u_istitle(UChar32 c) { + uint32_t props; + GET_PROPS(c, props); + return (UBool)(GET_CATEGORY(props)==U_TITLECASE_LETTER); +} + +/* Checks if ch is a decimal digit. */ +U_CAPI UBool U_EXPORT2 +u_isdigit(UChar32 c) { + uint32_t props; + GET_PROPS(c, props); + return (UBool)(GET_CATEGORY(props)==U_DECIMAL_DIGIT_NUMBER); +} + +U_CAPI UBool U_EXPORT2 +u_isxdigit(UChar32 c) { + uint32_t props; + + /* check ASCII and Fullwidth ASCII a-fA-F */ + if( + (c<=0x66 && c>=0x41 && (c<=0x46 || c>=0x61)) || + (c>=0xff21 && c<=0xff46 && (c<=0xff26 || c>=0xff41)) + ) { + return TRUE; + } + + GET_PROPS(c, props); + return (UBool)(GET_CATEGORY(props)==U_DECIMAL_DIGIT_NUMBER); +} + +/* Checks if the Unicode character is a letter.*/ +U_CAPI UBool U_EXPORT2 +u_isalpha(UChar32 c) { + uint32_t props; + GET_PROPS(c, props); + return (UBool)((CAT_MASK(props)&U_GC_L_MASK)!=0); +} + +U_CAPI UBool U_EXPORT2 +u_isUAlphabetic(UChar32 c) { + return (u_getUnicodeProperties(c, 1)&U_MASK(UPROPS_ALPHABETIC))!=0; +} + +/* Checks if c is a letter or a decimal digit */ +U_CAPI UBool U_EXPORT2 +u_isalnum(UChar32 c) { + uint32_t props; + GET_PROPS(c, props); + return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_ND_MASK))!=0); +} + +/** + * Checks if c is alphabetic, or a decimal digit; implements UCHAR_POSIX_ALNUM. + * @internal + */ +U_CFUNC UBool +u_isalnumPOSIX(UChar32 c) { + return (UBool)(u_isUAlphabetic(c) || u_isdigit(c)); +} + +/* Checks if ch is a unicode character with assigned character type.*/ +U_CAPI UBool U_EXPORT2 +u_isdefined(UChar32 c) { + uint32_t props; + GET_PROPS(c, props); + return (UBool)(GET_CATEGORY(props)!=0); +} + +/* Checks if the Unicode character is a base form character that can take a diacritic.*/ +U_CAPI UBool U_EXPORT2 +u_isbase(UChar32 c) { + uint32_t props; + GET_PROPS(c, props); + return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_N_MASK|U_GC_MC_MASK|U_GC_ME_MASK))!=0); +} + +/* Checks if the Unicode character is a control character.*/ +U_CAPI UBool U_EXPORT2 +u_iscntrl(UChar32 c) { + uint32_t props; + GET_PROPS(c, props); + return (UBool)((CAT_MASK(props)&(U_GC_CC_MASK|U_GC_CF_MASK|U_GC_ZL_MASK|U_GC_ZP_MASK))!=0); +} + +U_CAPI UBool U_EXPORT2 +u_isISOControl(UChar32 c) { + return (uint32_t)c<=0x9f && (c<=0x1f || c>=0x7f); +} + +/* Some control characters that are used as space. */ +#define IS_THAT_CONTROL_SPACE(c) \ + (c<=0x9f && ((c>=TAB && c<=CR) || (c>=0x1c && c <=0x1f) || c==NL)) + +/* Java has decided that U+0085 New Line is not whitespace any more. */ +#define IS_THAT_ASCII_CONTROL_SPACE(c) \ + (c<=0x1f && c>=TAB && (c<=CR || c>=0x1c)) + +/* Checks if the Unicode character is a space character.*/ +U_CAPI UBool U_EXPORT2 +u_isspace(UChar32 c) { + uint32_t props; + GET_PROPS(c, props); + return (UBool)((CAT_MASK(props)&U_GC_Z_MASK)!=0 || IS_THAT_CONTROL_SPACE(c)); +} + +U_CAPI UBool U_EXPORT2 +u_isJavaSpaceChar(UChar32 c) { + uint32_t props; + GET_PROPS(c, props); + return (UBool)((CAT_MASK(props)&U_GC_Z_MASK)!=0); +} + +/* Checks if the Unicode character is a whitespace character.*/ +U_CAPI UBool U_EXPORT2 +u_isWhitespace(UChar32 c) { + uint32_t props; + GET_PROPS(c, props); + return (UBool)( + ((CAT_MASK(props)&U_GC_Z_MASK)!=0 && + c!=NBSP && c!=FIGURESP && c!=NNBSP) || /* exclude no-break spaces */ + IS_THAT_ASCII_CONTROL_SPACE(c) + ); +} + +U_CAPI UBool U_EXPORT2 +u_isblank(UChar32 c) { + if((uint32_t)c<=0x9f) { + return c==9 || c==0x20; /* TAB or SPACE */ + } else { + /* Zs */ + uint32_t props; + GET_PROPS(c, props); + return (UBool)(GET_CATEGORY(props)==U_SPACE_SEPARATOR); + } +} + +U_CAPI UBool U_EXPORT2 +u_isUWhiteSpace(UChar32 c) { + return (u_getUnicodeProperties(c, 1)&U_MASK(UPROPS_WHITE_SPACE))!=0; +} + +/* Checks if the Unicode character is printable.*/ +U_CAPI UBool U_EXPORT2 +u_isprint(UChar32 c) { + uint32_t props; + GET_PROPS(c, props); + /* comparing ==0 returns FALSE for the categories mentioned */ + return (UBool)((CAT_MASK(props)&U_GC_C_MASK)==0); +} + +/** + * Checks if c is in \p{graph}\p{blank} - \p{cntrl}. + * Implements UCHAR_POSIX_PRINT. + * @internal + */ +U_CFUNC UBool +u_isprintPOSIX(UChar32 c) { + uint32_t props; + GET_PROPS(c, props); + /* + * The only cntrl character in graph+blank is TAB (in blank). + * Here we implement (blank-TAB)=Zs instead of calling u_isblank(). + */ + return (UBool)((GET_CATEGORY(props)==U_SPACE_SEPARATOR) || u_isgraphPOSIX(c)); +} + +U_CAPI UBool U_EXPORT2 +u_isgraph(UChar32 c) { + uint32_t props; + GET_PROPS(c, props); + /* comparing ==0 returns FALSE for the categories mentioned */ + return (UBool)((CAT_MASK(props)& + (U_GC_CC_MASK|U_GC_CF_MASK|U_GC_CS_MASK|U_GC_CN_MASK|U_GC_Z_MASK)) + ==0); +} + +/** + * Checks if c is in + * [^\p{space}\p{gc=Control}\p{gc=Surrogate}\p{gc=Unassigned}] + * with space=\p{Whitespace} and Control=Cc. + * Implements UCHAR_POSIX_GRAPH. + * @internal + */ +U_CFUNC UBool +u_isgraphPOSIX(UChar32 c) { + uint32_t props; + GET_PROPS(c, props); + /* \p{space}\p{gc=Control} == \p{gc=Z}\p{Control} */ + /* comparing ==0 returns FALSE for the categories mentioned */ + return (UBool)((CAT_MASK(props)& + (U_GC_CC_MASK|U_GC_CS_MASK|U_GC_CN_MASK|U_GC_Z_MASK)) + ==0); +} + +U_CAPI UBool U_EXPORT2 +u_ispunct(UChar32 c) { + uint32_t props; + GET_PROPS(c, props); + return (UBool)((CAT_MASK(props)&U_GC_P_MASK)!=0); +} + +/* Checks if the Unicode character can start a Unicode identifier.*/ +U_CAPI UBool U_EXPORT2 +u_isIDStart(UChar32 c) { + /* same as u_isalpha() */ + uint32_t props; + GET_PROPS(c, props); + return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_NL_MASK))!=0); +} + +/* Checks if the Unicode character can be a Unicode identifier part other than starting the + identifier.*/ +U_CAPI UBool U_EXPORT2 +u_isIDPart(UChar32 c) { + uint32_t props; + GET_PROPS(c, props); + return (UBool)( + (CAT_MASK(props)& + (U_GC_ND_MASK|U_GC_NL_MASK| + U_GC_L_MASK| + U_GC_PC_MASK|U_GC_MC_MASK|U_GC_MN_MASK) + )!=0 || + u_isIDIgnorable(c)); +} + +/*Checks if the Unicode character can be ignorable in a Java or Unicode identifier.*/ +U_CAPI UBool U_EXPORT2 +u_isIDIgnorable(UChar32 c) { + if(c<=0x9f) { + return u_isISOControl(c) && !IS_THAT_ASCII_CONTROL_SPACE(c); + } else { + uint32_t props; + GET_PROPS(c, props); + return (UBool)(GET_CATEGORY(props)==U_FORMAT_CHAR); + } +} + +/*Checks if the Unicode character can start a Java identifier.*/ +U_CAPI UBool U_EXPORT2 +u_isJavaIDStart(UChar32 c) { + uint32_t props; + GET_PROPS(c, props); + return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_SC_MASK|U_GC_PC_MASK))!=0); +} + +/*Checks if the Unicode character can be a Java identifier part other than starting the + * identifier. + */ +U_CAPI UBool U_EXPORT2 +u_isJavaIDPart(UChar32 c) { + uint32_t props; + GET_PROPS(c, props); + return (UBool)( + (CAT_MASK(props)& + (U_GC_ND_MASK|U_GC_NL_MASK| + U_GC_L_MASK| + U_GC_SC_MASK|U_GC_PC_MASK| + U_GC_MC_MASK|U_GC_MN_MASK) + )!=0 || + u_isIDIgnorable(c)); +} + +U_CAPI int32_t U_EXPORT2 +u_charDigitValue(UChar32 c) { + uint32_t props; + int32_t value; + GET_PROPS(c, props); + value=(int32_t)GET_NUMERIC_TYPE_VALUE(props)-UPROPS_NTV_DECIMAL_START; + if(value<=9) { + return value; + } else { + return -1; + } +} + +U_CAPI double U_EXPORT2 +u_getNumericValue(UChar32 c) { + uint32_t props; + int32_t ntv; + GET_PROPS(c, props); + ntv=(int32_t)GET_NUMERIC_TYPE_VALUE(props); + + if(ntv==UPROPS_NTV_NONE) { + return U_NO_NUMERIC_VALUE; + } else if(ntv<UPROPS_NTV_DIGIT_START) { + /* decimal digit */ + return ntv-UPROPS_NTV_DECIMAL_START; + } else if(ntv<UPROPS_NTV_NUMERIC_START) { + /* other digit */ + return ntv-UPROPS_NTV_DIGIT_START; + } else if(ntv<UPROPS_NTV_FRACTION_START) { + /* small integer */ + return ntv-UPROPS_NTV_NUMERIC_START; + } else if(ntv<UPROPS_NTV_LARGE_START) { + /* fraction */ + int32_t numerator=(ntv>>4)-12; + int32_t denominator=(ntv&0xf)+1; + return (double)numerator/denominator; + } else if(ntv<UPROPS_NTV_BASE60_START) { + /* large, single-significant-digit integer */ + double numValue; + int32_t mant=(ntv>>5)-14; + int32_t exp=(ntv&0x1f)+2; + numValue=mant; + + /* multiply by 10^exp without math.h */ + while(exp>=4) { + numValue*=10000.; + exp-=4; + } + switch(exp) { + case 3: + numValue*=1000.; + break; + case 2: + numValue*=100.; + break; + case 1: + numValue*=10.; + break; + case 0: + default: + break; + } + + return numValue; + } else if(ntv<UPROPS_NTV_FRACTION20_START) { + /* sexagesimal (base 60) integer */ + int32_t numValue=(ntv>>2)-0xbf; + int32_t exp=(ntv&3)+1; + + switch(exp) { + case 4: + numValue*=60*60*60*60; + break; + case 3: + numValue*=60*60*60; + break; + case 2: + numValue*=60*60; + break; + case 1: + numValue*=60; + break; + case 0: + default: + break; + } + + return numValue; + } else if(ntv<UPROPS_NTV_FRACTION32_START) { + // fraction-20 e.g. 3/80 + int32_t frac20=ntv-UPROPS_NTV_FRACTION20_START; // 0..0x17 + int32_t numerator=2*(frac20&3)+1; + int32_t denominator=20<<(frac20>>2); + return (double)numerator/denominator; + } else if(ntv<UPROPS_NTV_RESERVED_START) { + // fraction-32 e.g. 3/64 + int32_t frac32=ntv-UPROPS_NTV_FRACTION32_START; // 0..15 + int32_t numerator=2*(frac32&3)+1; + int32_t denominator=32<<(frac32>>2); + return (double)numerator/denominator; + } else { + /* reserved */ + return U_NO_NUMERIC_VALUE; + } +} + +U_CAPI int32_t U_EXPORT2 +u_digit(UChar32 ch, int8_t radix) { + int8_t value; + if((uint8_t)(radix-2)<=(36-2)) { + value=(int8_t)u_charDigitValue(ch); + if(value<0) { + /* ch is not a decimal digit, try latin letters */ + if(ch>=0x61 && ch<=0x7A) { + value=(int8_t)(ch-0x57); /* ch - 'a' + 10 */ + } else if(ch>=0x41 && ch<=0x5A) { + value=(int8_t)(ch-0x37); /* ch - 'A' + 10 */ + } else if(ch>=0xFF41 && ch<=0xFF5A) { + value=(int8_t)(ch-0xFF37); /* fullwidth ASCII a-z */ + } else if(ch>=0xFF21 && ch<=0xFF3A) { + value=(int8_t)(ch-0xFF17); /* fullwidth ASCII A-Z */ + } + } + } else { + value=-1; /* invalid radix */ + } + return (int8_t)((value<radix) ? value : -1); +} + +U_CAPI UChar32 U_EXPORT2 +u_forDigit(int32_t digit, int8_t radix) { + if((uint8_t)(radix-2)>(36-2) || (uint32_t)digit>=(uint32_t)radix) { + return 0; + } else if(digit<10) { + return (UChar32)(0x30+digit); + } else { + return (UChar32)((0x61-10)+digit); + } +} + +/* miscellaneous, and support for uprops.cpp -------------------------------- */ + +U_CAPI void U_EXPORT2 +u_getUnicodeVersion(UVersionInfo versionArray) { + if(versionArray!=NULL) { + uprv_memcpy(versionArray, dataVersion, U_MAX_VERSION_LENGTH); + } +} + +U_CFUNC uint32_t +u_getMainProperties(UChar32 c) { + uint32_t props; + GET_PROPS(c, props); + return props; +} + +U_CFUNC uint32_t +u_getUnicodeProperties(UChar32 c, int32_t column) { + U_ASSERT(column>=0); + if(column>=propsVectorsColumns) { + return 0; + } else { + uint16_t vecIndex=UTRIE2_GET16(&propsVectorsTrie, c); + return propsVectors[vecIndex+column]; + } +} + +U_CFUNC int32_t +uprv_getMaxValues(int32_t column) { + switch(column) { + case 0: + return indexes[UPROPS_MAX_VALUES_INDEX]; + case 2: + return indexes[UPROPS_MAX_VALUES_2_INDEX]; + default: + return 0; + } +} + +U_CAPI void U_EXPORT2 +u_charAge(UChar32 c, UVersionInfo versionArray) { + if(versionArray!=NULL) { + uint32_t version=u_getUnicodeProperties(c, 0)>>UPROPS_AGE_SHIFT; + versionArray[0]=(uint8_t)(version>>4); + versionArray[1]=(uint8_t)(version&0xf); + versionArray[2]=versionArray[3]=0; + } +} + +U_CAPI UScriptCode U_EXPORT2 +uscript_getScript(UChar32 c, UErrorCode *pErrorCode) { + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return USCRIPT_INVALID_CODE; + } + if((uint32_t)c>0x10ffff) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return USCRIPT_INVALID_CODE; + } + uint32_t scriptX=u_getUnicodeProperties(c, 0)&UPROPS_SCRIPT_X_MASK; + uint32_t codeOrIndex=uprops_mergeScriptCodeOrIndex(scriptX); + if(scriptX<UPROPS_SCRIPT_X_WITH_COMMON) { + return (UScriptCode)codeOrIndex; + } else if(scriptX<UPROPS_SCRIPT_X_WITH_INHERITED) { + return USCRIPT_COMMON; + } else if(scriptX<UPROPS_SCRIPT_X_WITH_OTHER) { + return USCRIPT_INHERITED; + } else { + return (UScriptCode)scriptExtensions[codeOrIndex]; + } +} + +U_CAPI UBool U_EXPORT2 +uscript_hasScript(UChar32 c, UScriptCode sc) { + uint32_t scriptX=u_getUnicodeProperties(c, 0)&UPROPS_SCRIPT_X_MASK; + uint32_t codeOrIndex=uprops_mergeScriptCodeOrIndex(scriptX); + if(scriptX<UPROPS_SCRIPT_X_WITH_COMMON) { + return sc==(UScriptCode)codeOrIndex; + } + + const uint16_t *scx=scriptExtensions+codeOrIndex; + if(scriptX>=UPROPS_SCRIPT_X_WITH_OTHER) { + scx=scriptExtensions+scx[1]; + } + uint32_t sc32=sc; + if(sc32>0x7fff) { + /* Guard against bogus input that would make us go past the Script_Extensions terminator. */ + return FALSE; + } + while(sc32>*scx) { + ++scx; + } + return sc32==(*scx&0x7fff); +} + +U_CAPI int32_t U_EXPORT2 +uscript_getScriptExtensions(UChar32 c, + UScriptCode *scripts, int32_t capacity, + UErrorCode *pErrorCode) { + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return 0; + } + if(capacity<0 || (capacity>0 && scripts==NULL)) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + uint32_t scriptX=u_getUnicodeProperties(c, 0)&UPROPS_SCRIPT_X_MASK; + uint32_t codeOrIndex=uprops_mergeScriptCodeOrIndex(scriptX); + if(scriptX<UPROPS_SCRIPT_X_WITH_COMMON) { + if(capacity==0) { + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } else { + scripts[0]=(UScriptCode)codeOrIndex; + } + return 1; + } + + const uint16_t *scx=scriptExtensions+codeOrIndex; + if(scriptX>=UPROPS_SCRIPT_X_WITH_OTHER) { + scx=scriptExtensions+scx[1]; + } + int32_t length=0; + uint16_t sx; + do { + sx=*scx++; + if(length<capacity) { + scripts[length]=(UScriptCode)(sx&0x7fff); + } + ++length; + } while(sx<0x8000); + if(length>capacity) { + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } + return length; +} + +U_CAPI UBlockCode U_EXPORT2 +ublock_getCode(UChar32 c) { + return (UBlockCode)((u_getUnicodeProperties(c, 0)&UPROPS_BLOCK_MASK)>>UPROPS_BLOCK_SHIFT); +} + +/* property starts for UnicodeSet ------------------------------------------- */ + +static UBool U_CALLCONV +_enumPropertyStartsRange(const void *context, UChar32 start, UChar32 end, uint32_t value) { + /* add the start code point to the USet */ + const USetAdder *sa=(const USetAdder *)context; + sa->add(sa->set, start); + (void)end; + (void)value; + return TRUE; +} + +#define USET_ADD_CP_AND_NEXT(sa, cp) sa->add(sa->set, cp); sa->add(sa->set, cp+1) + +U_CFUNC void U_EXPORT2 +uchar_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) { + if(U_FAILURE(*pErrorCode)) { + return; + } + + /* add the start code point of each same-value range of the main trie */ + utrie2_enum(&propsTrie, NULL, _enumPropertyStartsRange, sa); + + /* add code points with hardcoded properties, plus the ones following them */ + + /* add for u_isblank() */ + USET_ADD_CP_AND_NEXT(sa, TAB); + + /* add for IS_THAT_CONTROL_SPACE() */ + sa->add(sa->set, CR+1); /* range TAB..CR */ + sa->add(sa->set, 0x1c); + sa->add(sa->set, 0x1f+1); + USET_ADD_CP_AND_NEXT(sa, NL); + + /* add for u_isIDIgnorable() what was not added above */ + sa->add(sa->set, DEL); /* range DEL..NBSP-1, NBSP added below */ + sa->add(sa->set, HAIRSP); + sa->add(sa->set, RLM+1); + sa->add(sa->set, INHSWAP); + sa->add(sa->set, NOMDIG+1); + USET_ADD_CP_AND_NEXT(sa, ZWNBSP); + + /* add no-break spaces for u_isWhitespace() what was not added above */ + USET_ADD_CP_AND_NEXT(sa, NBSP); + USET_ADD_CP_AND_NEXT(sa, FIGURESP); + USET_ADD_CP_AND_NEXT(sa, NNBSP); + + /* add for u_digit() */ + sa->add(sa->set, U_a); + sa->add(sa->set, U_z+1); + sa->add(sa->set, U_A); + sa->add(sa->set, U_Z+1); + sa->add(sa->set, U_FW_a); + sa->add(sa->set, U_FW_z+1); + sa->add(sa->set, U_FW_A); + sa->add(sa->set, U_FW_Z+1); + + /* add for u_isxdigit() */ + sa->add(sa->set, U_f+1); + sa->add(sa->set, U_F+1); + sa->add(sa->set, U_FW_f+1); + sa->add(sa->set, U_FW_F+1); + + /* add for UCHAR_DEFAULT_IGNORABLE_CODE_POINT what was not added above */ + sa->add(sa->set, WJ); /* range WJ..NOMDIG */ + sa->add(sa->set, 0xfff0); + sa->add(sa->set, 0xfffb+1); + sa->add(sa->set, 0xe0000); + sa->add(sa->set, 0xe0fff+1); + + /* add for UCHAR_GRAPHEME_BASE and others */ + USET_ADD_CP_AND_NEXT(sa, CGJ); +} + +U_CFUNC void U_EXPORT2 +upropsvec_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) { + if(U_FAILURE(*pErrorCode)) { + return; + } + + /* add the start code point of each same-value range of the properties vectors trie */ + utrie2_enum(&propsVectorsTrie, NULL, _enumPropertyStartsRange, sa); +} diff --git a/thirdparty/icu4c/common/uchar_props_data.h b/thirdparty/icu4c/common/uchar_props_data.h new file mode 100644 index 0000000000..9a78918204 --- /dev/null +++ b/thirdparty/icu4c/common/uchar_props_data.h @@ -0,0 +1,3860 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +// +// Copyright (C) 1999-2016, International Business Machines +// Corporation and others. All Rights Reserved. +// +// file name: uchar_props_data.h +// +// machine-generated by: icu/tools/unicode/c/genprops/corepropsbuilder.cpp + + +#ifdef INCLUDED_FROM_UCHAR_C + +static const UVersionInfo dataVersion={0xd,0,0,0}; + +static const uint16_t propsTrie_index[22276]={ +0x46d,0x475,0x47d,0x485,0x49d,0x4a5,0x4ad,0x4b5,0x4bd,0x4c5,0x4cb,0x4d3,0x4db,0x4e3,0x4eb,0x4f3, +0x4f9,0x501,0x509,0x511,0x514,0x51c,0x524,0x52c,0x534,0x53c,0x538,0x540,0x548,0x550,0x555,0x55d, +0x565,0x56d,0x571,0x579,0x581,0x589,0x591,0x599,0x595,0x59d,0x5a2,0x5aa,0x5b0,0x5b8,0x5c0,0x5c8, +0x5d0,0x5d8,0x5e0,0x5e8,0x5ed,0x5f5,0x5f8,0x600,0x608,0x610,0x616,0x61e,0x61d,0x625,0x62d,0x635, +0x645,0x63d,0x64d,0x655,0x48d,0x665,0x66b,0x65d,0x67b,0x67d,0x685,0x673,0x695,0x69b,0x6a3,0x68d, +0x6b3,0x6b9,0x6c1,0x6ab,0x6d1,0x6d7,0x6df,0x6c9,0x6ef,0x6f5,0x6fd,0x6e7,0x70d,0x715,0x71d,0x705, +0x72d,0x733,0x73b,0x725,0x74b,0x751,0x759,0x743,0x769,0x76e,0x776,0x761,0x786,0x78d,0x795,0x77e, +0x619,0x79d,0x7a5,0x48d,0x7ad,0x7b4,0x7bc,0x48d,0x7c4,0x7cc,0x7d4,0x7d9,0x7e1,0x7e8,0x7f0,0x48d, +0x5d8,0x7f8,0x800,0x808,0x810,0x565,0x820,0x818,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x828,0x5d8,0x830,0x834,0x83c,0x5d8,0x842,0x5d8,0x848,0x850,0x858,0x565,0x565,0x860, +0x868,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x86d,0x875,0x5d8,0x5d8,0x87d,0x885,0x88d,0x895,0x89d,0x5d8,0x8a5,0x8ad,0x8b5, +0x8c5,0x5d8,0x8cd,0x8cf,0x8d7,0x8bd,0x5d8,0x8da,0x8ee,0x8e2,0x8ea,0x8f6,0x5d8,0x8fe,0x904,0x90c, +0x914,0x5d8,0x924,0x92c,0x934,0x91c,0x944,0x48d,0x94c,0x94f,0x957,0x93c,0x967,0x95f,0x5d8,0x96e, +0x5d8,0x97d,0x976,0x985,0x98d,0x991,0x999,0x9a1,0x50d,0x9a9,0x9ac,0x9b2,0x9b9,0x9ac,0x534,0x9c1, +0x4bd,0x4bd,0x4bd,0x4bd,0x9c9,0x4bd,0x4bd,0x4bd,0x9d9,0x9e1,0x9e9,0x9f1,0x9f9,0x9fd,0xa05,0x9d1, +0xa1d,0xa25,0xa0d,0xa15,0xa2d,0xa35,0xa3d,0xa45,0xa5d,0xa4d,0xa55,0xa65,0xa6d,0xa7c,0xa81,0xa74, +0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa91,0xa99,0x90c,0xa9c,0xaa4,0xaab,0xab0,0xab8, +0x90c,0xabf,0xabe,0xacf,0xad2,0x90c,0x90c,0xac7,0x90c,0x90c,0x90c,0x90c,0x90c,0xae1,0xae9,0xad9, +0x90c,0x90c,0x90c,0xaee,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0xaf4,0xafc,0x90c,0xb04,0xb0b, +0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0xa89,0xa89,0xa89,0xa89,0xb13,0xa89,0xb1a,0xb21, +0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0x90c,0xb29,0xb30,0xb34,0xb3a,0x90c,0x90c,0x90c, +0x565,0xb4a,0xb42,0xb52,0x4bd,0x4bd,0x4bd,0xb5a,0x50d,0xb62,0x5d8,0xb68,0xb78,0xb70,0xb70,0x534, +0xb80,0xb88,0xb90,0x48d,0xb98,0x90c,0x90c,0xb9f,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0xba7,0xbad, +0xbbd,0xbb5,0x619,0x5d8,0xbc5,0x868,0x5d8,0xbcd,0xbd5,0xbd9,0x5d8,0x5d8,0xbde,0x5d8,0x90c,0xbe5, +0xab9,0xbed,0xbf3,0x90c,0xbed,0xbfb,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c, +0xc03,0x5d8,0x5d8,0x5d8,0xc0b,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0xc11,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0xc16,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x90c,0x90c, +0xc1e,0x5d8,0xc21,0x5d8,0xc29,0xc2f,0xc37,0xc3f,0xc44,0x5d8,0x5d8,0xc48,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0xc4f,0x5d8,0xc56,0xc5c,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0xc64,0x5d8,0x5d8,0x5d8,0xc6c,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0xc6e,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0xc75,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0xc7c,0x5d8,0x5d8,0x5d8,0xc83,0xc8b,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0xc90,0x5d8,0x5d8,0xc98,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0xc9c,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0xc9f,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0xca2,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0xca8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0xcb0,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0xcb5,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0xcba,0x5d8,0x5d8,0x5d8,0xcbf,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0xcc7,0xcce,0xcd2,0x5d8,0x5d8,0x5d8,0xcd9,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x8ce, +0xce7,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0xcdf,0x90c,0xcef,0x985,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0xcf4,0xcfc,0x4bd,0xd0c,0xd04,0x5d8,0x5d8,0xd14,0xd1c,0xd2c,0x4bd,0xd31,0xd39,0xd3f,0xd47,0xd24, +0xd4f,0xd57,0x5d8,0xd5f,0xd6f,0xd72,0xd67,0xd7a,0x62d,0xd82,0xd89,0x8ce,0x67b,0xd99,0xd91,0xda1, +0x5d8,0xda9,0xdb1,0xdb9,0x5d8,0xdc1,0xdc9,0xdd1,0xdd9,0xde1,0xde5,0xded,0x50d,0x50d,0x5d8,0xdf5, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0xdfd,0xe09,0xe01, +0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d, +0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d, +0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11, +0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11, +0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19, +0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19, +0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19, +0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19, +0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19, +0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19, +0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19, +0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19, +0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19, +0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19, +0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19, +0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19, +0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0x5d8,0x5d8,0x5d8,0xe21,0x5d8,0xcda,0xe28,0xe2d, +0x5d8,0x5d8,0x5d8,0xe35,0x5d8,0x5d8,0x8d9,0x48d,0xe4b,0xe3b,0xe43,0x5d8,0x5d8,0xe53,0xe5b,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0xe60,0xe68,0x5d8,0xe6c,0x5d8,0xe72,0xe76, +0xe7e,0xe86,0xe8d,0xe95,0x5d8,0x5d8,0x5d8,0xe9b,0xeb3,0x47d,0xebb,0xec3,0xec8,0x8ee,0xea3,0xeab, +0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11, +0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11,0xe11, +0x1234,0x1234,0x1274,0x12b4,0x12f4,0x132c,0x136c,0x13ac,0x13e4,0x1424,0x1450,0x1490,0x14d0,0x14e0,0x1520,0x1554, +0x1594,0x15c4,0x1604,0x1644,0x1654,0x1688,0x16c0,0x1700,0x1740,0x1780,0x17b4,0x17e0,0x1820,0x1858,0x1874,0x18b4, +0xa80,0xac0,0xb00,0xb40,0xb80,0xa40,0xbc0,0xa40,0xbe2,0xa40,0xa40,0xa40,0xa40,0xc22,0x1db,0x1db, +0xc62,0xca2,0xa40,0xa40,0xa40,0xa40,0xce2,0xd02,0xa40,0xa40,0xd42,0xd82,0xdc2,0xe02,0xe42,0xe82, +0xec2,0xef9,0x1db,0x1db,0xf1d,0xf51,0x1db,0xf79,0x1db,0x1db,0x1db,0x1db,0xfa6,0x1db,0x1db,0x1db, +0x1db,0x1db,0x1db,0x1db,0xfba,0x1db,0xff2,0x1032,0x1db,0x103d,0x1db,0x1db,0x1db,0x1073,0xa40,0x10b3, +0x1db,0x1db,0x10f3,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, +0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, +0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, +0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, +0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, +0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, +0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, +0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, +0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, +0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, +0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, +0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, +0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, +0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, +0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, +0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, +0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, +0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, +0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, +0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, +0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, +0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, +0x1133,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, +0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, +0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700, +0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x1173, +0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700, +0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x700,0x1173, +0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d, +0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d, +0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d, +0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d, +0xed0,0xed7,0xedf,0x48d,0x5d8,0x5d8,0x5d8,0xee7,0xef7,0xeef,0xf0e,0xeff,0xf06,0xf16,0xf1a,0xf1e, +0x48d,0x48d,0x48d,0x48d,0x8ce,0x5d8,0xf26,0xf2e,0x5d8,0xf36,0xf3e,0xf42,0xf4a,0x5d8,0xf52,0x48d, +0x565,0x56f,0xf5a,0x5d8,0xf5e,0xf66,0xf76,0xf6e,0x5d8,0xf7e,0x5d8,0xf85,0x48d,0x48d,0x48d,0x48d, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0xb78,0x8da,0xe72,0x48d,0x48d,0x48d,0x48d, +0xf95,0xf8d,0xf98,0xfa0,0x8ee,0xfa8,0x48d,0xfb0,0xfb8,0xfc0,0x48d,0x48d,0x5d8,0xfd0,0xfd8,0xfc8, +0xfe8,0xfef,0xfe0,0xff7,0xfff,0x48d,0x100f,0x1007,0x5d8,0x1012,0x101a,0x1022,0x102a,0x1032,0x48d,0x48d, +0x5d8,0x5d8,0x103a,0x48d,0x565,0x1042,0x50d,0x104a,0x5d8,0x1052,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d, +0x48d,0x48d,0x48d,0x105a,0x5d8,0x1062,0x48d,0x48d,0x106a,0x1072,0x1079,0x48d,0x48d,0xe68,0x1081,0xb78, +0x1091,0x60e,0x1099,0x1089,0x967,0x10a1,0x10a9,0x10af,0x10c7,0x10b7,0x10bf,0x10cb,0x967,0x10db,0x10d3,0x10e3, +0x10f3,0x10eb,0x48d,0x48d,0x10fa,0x1102,0x630,0x110a,0x111a,0x1120,0x1128,0x1112,0x48d,0x48d,0x48d,0x48d, +0x5d8,0x1130,0x1138,0x1140,0x5d8,0x1148,0x1150,0x48d,0x48d,0x48d,0x48d,0x48d,0x5d8,0x1158,0x1160,0x48d, +0x5d8,0x1168,0x1170,0x1178,0x5d8,0x1188,0x1180,0x48d,0x848,0x1190,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d, +0x5d8,0x1198,0x48d,0x48d,0x48d,0x565,0x50d,0x11a0,0x11b0,0x11b6,0x11a8,0x48d,0x48d,0x11c6,0x11ca,0x11be, +0x11e2,0x11d2,0x11da,0x5d8,0x11f2,0x11ea,0x5d8,0x8cf,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d, +0x1208,0x120d,0x11fa,0x1202,0x121d,0x1215,0x48d,0x48d,0x122c,0x1230,0x1224,0x1240,0x1238,0x1180,0x48d,0x48d, +0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x1244,0x48d,0x48d,0x48d,0x48d,0x48d,0x124b,0x125b,0x1253, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x8d9,0x48d,0x48d,0x48d, +0x126b,0x1273,0x127b,0x1263,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x1283,0x48d,0x48d,0x48d,0x48d,0x48d, +0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x128b,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d, +0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d, +0x48d,0x48d,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x1293,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d, +0x48d,0x48d,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x8cf,0x8ee,0x129b,0x48d,0x48d,0xe68,0x12a3,0x5d8,0x12b3,0x12bb,0x12c3,0x12ab,0x48d, +0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d, +0x48d,0x48d,0x48d,0x48d,0x565,0x50d,0x12cb,0x48d,0x48d,0x48d,0x5d8,0x5d8,0x12d3,0x12d8,0x12de,0x48d, +0x48d,0x12e6,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x12ee,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x8da,0x48d,0x103a,0x48d,0x48d,0x48d,0x48d,0x48d, +0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d, +0x48d,0x48d,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x8ee,0x48d,0x12f4,0x12fb,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0xe01,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d, +0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d, +0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d, +0x48d,0x48d,0x5d8,0x5d8,0x5d8,0x1301,0x1306,0x130e,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d, +0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d, +0x48d,0x48d,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0xba7,0x90c,0x1316,0x90c,0x131d,0x1325,0x132b, +0x90c,0x1331,0x90c,0x90c,0x1339,0x48d,0x48d,0x48d,0x48d,0x1341,0x90c,0x90c,0xabb,0x1349,0x48d,0x48d, +0x48d,0x48d,0x1359,0x1360,0x1365,0x136b,0x1373,0x137b,0x1383,0x135d,0x138b,0x1393,0x139b,0x13a0,0x1372,0x1359, +0x1360,0x135c,0x136b,0x13a8,0x135a,0x13ab,0x135d,0x13b3,0x13bb,0x13c3,0x13ca,0x13b6,0x13be,0x13c6,0x13cd,0x13b9, +0x13d5,0x1351,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c, +0x90c,0x90c,0x534,0x13e5,0x534,0x13ec,0x13f3,0x13dd,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d, +0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d, +0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d, +0x48d,0x48d,0x13fa,0x1402,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x5d8,0x1412,0x140a,0x48d,0x48d,0x48d, +0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x5d8,0x141a,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d, +0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d, +0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d, +0x48d,0x48d,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x1422,0x48d,0x565,0x1432,0x142a,0x48d,0x48d,0x48d, +0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d, +0x48d,0x48d,0x48d,0x48d,0x48d,0x143a,0x144a,0x1442,0x48d,0x48d,0x145a,0x1452,0x48d,0x48d,0x48d,0x48d, +0x48d,0x48d,0x146a,0x1472,0x147a,0x1482,0x148a,0x1492,0x48d,0x1462,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d, +0x48d,0x48d,0x90c,0x149a,0x90c,0x90c,0xb9f,0x149f,0x14a3,0xba7,0x14ab,0x90c,0x90c,0x90c,0x90c,0xba9, +0x48d,0x14b3,0x14bb,0x14bf,0x14c7,0x14cf,0x48d,0x48d,0x48d,0x48d,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c, +0x90c,0x14d7,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c, +0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x90c,0x14df,0x14e7,0x90c,0x90c,0x90c,0xb9f,0x90c,0x90c, +0x14ef,0x14f7,0x149a,0x90c,0x14ff,0x90c,0x1507,0x150c,0x48d,0x48d,0x90c,0x90c,0x90c,0x1514,0x90c,0x90c, +0x151b,0x90c,0x90c,0x90c,0xb9f,0x1520,0x1528,0x152e,0x1533,0x48d,0x90c,0x90c,0x90c,0x90c,0x153b,0x90c, +0xabe,0x117c,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d, +0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d, +0x48d,0x48d,0x1543,0x5d8,0x5d8,0x154a,0x5d8,0x5d8,0x5d8,0x1552,0x5d8,0x155a,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0xc80,0x5d8,0x5d8,0x1562,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x156a,0x1572,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0xcbf,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x1579,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x1580,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x1587,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0xf5e,0x48d,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x158b,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0xf5e,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x1066,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x1590,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d, +0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d, +0x48d,0x48d,0x48d,0x5d8,0x5d8,0x5d8,0x5d8,0x1598,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0xf5e,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d, +0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d, +0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d, +0x48d,0x48d,0x48d,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8, +0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x5d8,0x655,0x48d,0x48d, +0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d, +0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d, +0x48d,0x48d,0x48d,0x15a8,0x15a0,0x15a0,0x15a0,0x48d,0x48d,0x48d,0x48d,0x534,0x534,0x534,0x534,0x534, +0x534,0x534,0x15b0,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d, +0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d, +0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d,0x48d, +0x48d,0x48d,0x48d,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19, +0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19, +0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19, +0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19,0xe19, +0xe19,0xe19,0x15b8,0x46c,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf, +0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf, +0xf,0xf,0xf,0xf,0xc,0x17,0x17,0x17,0x19,0x17,0x17,0x17,0x14,0x15,0x17,0x18, +0x17,0x13,0x17,0x17,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0x17,0x17, +0x18,0x18,0x18,0x17,0x17,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0x14, +0x17,0x15,0x1a,0x16,0x1a,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0x14, +0x18,0x15,0x18,0xf,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf, +0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf, +0xf,0xf,0xf,0xf,0xc,0x17,0x19,0x19,0x19,0x19,0x1b,0x17,0x1a,0x1b,5,0x1c, +0x18,0x10,0x1b,0x1a,0x1b,0x18,0x34b,0x38b,0x1a,2,0x17,0x17,0x1a,0x30b,5,0x1d, +0x34cb,0x344b,0x3ccb,0x17,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,0x18,1,1,1,1, +1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,0x18,2,2,2,2, +2,2,2,2,1,2,1,2,1,2,1,2,1,2,1,2, +1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2, +1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2, +1,2,1,2,1,2,1,2,1,2,1,2,2,1,2,1, +2,1,2,1,2,2,1,2,1,2,1,2,1,2,1,2, +1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2, +1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2, +1,2,1,2,1,1,2,1,2,1,2,2,2,1,1,2, +1,2,1,1,2,1,1,1,2,2,1,1,1,1,2,1, +1,2,1,1,1,2,2,2,1,1,2,1,1,2,1,2, +1,2,1,1,2,1,2,2,1,2,1,1,2,1,1,1, +2,1,2,1,1,2,2,5,1,2,2,2,5,5,5,5, +1,3,2,1,3,2,1,3,2,1,2,1,2,1,2,1, +2,1,2,1,2,1,2,1,2,2,1,2,1,2,1,2, +1,2,1,2,1,2,1,2,1,2,1,2,2,1,3,2, +1,2,1,1,1,2,1,2,1,2,1,2,1,2,1,2, +1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2, +1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2, +1,2,1,2,1,2,1,2,2,2,2,2,2,2,1,1, +2,1,1,2,2,1,2,1,1,1,1,2,1,2,1,2, +1,2,1,2,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,5,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, +4,4,0x1a,0x1a,0x1a,0x1a,4,4,4,4,4,4,4,4,4,4, +4,4,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a, +4,4,4,4,4,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,4,0x1a,4,0x1a, +0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a, +6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, +6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, +6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, +6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, +1,2,1,2,4,0x1a,1,2,0,0,4,2,2,2,0x17,1, +0,0,0,0,0x1a,0x1a,1,0x17,1,1,1,0,1,0,1,1, +2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,0,1,1,1,1,1,1,1,1,1,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,1,2,2,1,1,1,2,2,2,1,2,1,2, +1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2, +1,2,1,2,2,2,2,2,1,2,0x18,1,2,1,1,2, +2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,1,2,1,2,1,2,1,2,1,2,1,2, +1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2, +1,2,1,2,1,2,0x1b,6,6,6,6,6,7,7,1,2, +1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2, +1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2, +1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2, +1,2,1,2,1,1,2,1,2,1,2,1,2,1,2,1, +2,1,2,2,1,2,1,2,1,2,1,2,1,2,1,2, +1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2, +1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2, +1,2,1,2,0,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0, +0,4,0x17,0x17,0x17,0x17,0x17,0x17,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,0x17,0x13,0,0,0x1b,0x1b,0x19, +0,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, +6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, +6,6,6,6,6,6,6,6,6,6,6,6,6,6,0x13,6, +0x17,6,6,0x17,6,6,0x17,6,0,0,0,0,0,0,0,0, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,0,0,0,0,5, +5,5,5,0x17,0x17,0,0,0,0,0,0,0,0,0,0,0, +0x10,0x10,0x10,0x10,0x10,0x10,0x18,0x18,0x18,0x17,0x17,0x19,0x17,0x17,0x1b,0x1b, +6,6,6,6,6,6,6,6,6,6,6,0x17,0x10,0,0x17,0x17, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +4,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6, +6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, +0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0x17,0x17,0x17,0x17,5,5, +6,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,0x17,5,6,6,6,6,6,6,6,0x10,0x1b,6, +6,6,6,6,6,4,4,6,6,0x1b,6,6,6,6,5,5, +0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,5,5,5,0x1b,0x1b,5, +0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0,0x10, +5,6,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, +6,6,6,0,0,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6, +6,6,6,6,6,5,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6, +6,6,6,6,6,6,6,6,4,4,0x1b,0x17,0x17,0x17,4,0, +0,6,0x19,0x19,6,6,6,6,4,6,6,6,4,6,6,6, +6,6,0,0,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17, +0x17,0x17,0x17,0,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,6,6,6,6,4,6, +6,6,6,6,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,6,6,6, +0,0,0x17,0,5,5,5,5,5,5,5,5,5,5,5,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,6,6,0x10,6,6,6,6,6,6,6,6,6, +6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, +6,6,6,6,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,0,5,5,5,5,5,5, +5,5,5,5,0,0,0,0,0,0,0,0,0,0,0,6, +6,6,6,6,6,6,6,6,6,6,6,6,5,5,6,6, +0x17,0x17,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0x17,4,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,8, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,8, +6,5,8,8,8,6,6,6,6,6,6,6,6,8,8,8, +8,6,8,8,5,6,6,6,6,6,6,6,5,5,5,5, +5,5,5,5,5,5,6,6,0,0,0x49,0x89,0xc9,0x109,0x149,0x189, +0x1c9,0x209,0x249,0x289,5,5,0x19,0x19,0x37cb,0x35cb,0x3fcb,0x34cb,0x3ccb,0x94b,0x1b,0x19, +5,0x17,6,0,5,6,8,8,0,5,5,5,5,5,5,5, +5,0,0,5,5,0,0,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,0,5,5,5,5,5,5,5,0,5,0, +0,0,5,5,5,5,0,0,6,5,8,8,8,6,6,6, +6,0,0,8,8,0,0,8,8,6,5,0,0,0,0,0, +0,0,0,8,0,0,0,0,5,5,0,5,0,0,0,0, +0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,6,6,5,5, +5,6,0x17,0,0,0,0,0,0,0,0,0,0,6,6,8, +0,5,5,5,5,5,5,0,0,0,0,5,5,0,0,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,0,5,5, +5,5,5,5,5,0,5,5,0,5,5,0,5,5,0,0, +6,0,8,8,8,6,6,0,0,0,0,6,6,0,0,6, +6,6,0,0,0,6,0,0,0,0,0,0,0,5,5,5, +5,0,5,0,5,5,6,6,0,0,0x49,0x89,0xc9,0x109,0x149,0x189, +0x1c9,0x209,0x249,0x289,0x17,0x19,0,0,0,0,0,0,0,5,6,6, +6,6,6,6,0,6,6,8,0,5,5,5,5,5,5,5, +5,5,0,5,5,5,0,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,0,5,5,5,5,5,5,5,0,5,5, +0,5,5,5,5,5,0,0,6,5,8,8,8,6,6,6, +6,6,0,6,6,8,0,8,8,6,0,0,5,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,5,5,6,6, +0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0x1b,5,0x34cb,0x344b, +0x3ccb,0x37cb,0x35cb,0x3fcb,0,0,0,0,0,0,0,0,0,6,8,8, +0,5,5,5,5,5,5,5,5,0,0,5,5,0,0,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,0,5,5, +5,5,5,5,5,0,5,5,0,5,5,5,5,5,0,0, +6,5,8,6,8,6,6,6,6,0,0,8,8,0,0,8, +8,6,0,0,0,0,0,0,0,6,6,8,0,0,0,0, +5,5,0,5,0,0,0,0,0,0,0x49,0x89,0xc9,0x109,0x149,0x189, +0x1c9,0x209,0x249,0x289,0x7cb,0x1e4b,0x784b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x19,0x1b,0, +0,0,0,0,0,0,6,5,0,5,5,5,5,5,5,0, +0,0,5,5,5,0,5,5,5,5,0,0,0,5,5,0, +5,0,5,5,0,0,0,5,5,0,0,0,5,5,5,0, +0,0,5,5,5,5,5,5,5,5,5,5,5,5,0,0, +0,0,8,8,6,8,8,0,0,0,8,8,8,0,8,8, +8,6,0,0,5,0,0,0,0,0,0,8,0,0,0,0, +0,0,0,0,5,5,6,6,0,0,0x49,0x89,0xc9,0x109,0x149,0x189, +0x1c9,0x209,0x249,0x289,0,0,0,0,0,0,0,0x17,0x54b,0x58b,0x5cb,0x60b, +0x58b,0x5cb,0x60b,0x1b,6,8,8,8,6,5,5,5,5,5,5,5, +5,0,5,5,5,0,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,0,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,0,0,0,5,6,6,6,8,8,8, +8,0,6,6,6,0,6,6,6,6,0,0,0,0,0,0, +0,6,6,0,5,5,5,0,0,0,0,0,5,5,6,6, +0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,5,5,0, +0,0,0,0,0,0,0,0,0,0,0,0,5,6,8,8, +0x17,5,5,5,5,5,5,5,5,0,5,5,5,0,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,0,5,5, +5,5,5,5,5,5,5,5,0,5,5,5,5,5,0,0, +6,5,8,6,8,8,8,8,8,0,6,8,8,0,8,8, +6,6,0,0,0,0,0,0,0,8,8,0,0,0,0,0, +0,0,5,0,5,5,6,6,0,0,0x49,0x89,0xc9,0x109,0x149,0x189, +0x1c9,0x209,0x249,0x289,0x7cb,0x1e4b,0x784b,0x34cb,0x344b,0x3ccb,0x37cb,0x35cb,0x3fcb,0x1b,5,5, +5,5,5,5,6,6,8,8,5,5,5,5,5,5,5,5, +5,0,5,5,5,0,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,6,6,5,8,8,8,6,6,6,6,0,8,8, +8,0,8,8,8,6,5,0x1b,0,0,0,0,5,5,5,8, +0xcc0b,0xca0b,0xcb4b,0xc90b,0x364b,0xc94b,0x350b,5,0,0,0,0,0,0,0x49,0x89, +0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,8,8,0x17,0,0,0, +0,0,0,0,0,0,0,0,0,6,8,8,0,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,0, +0,0,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,0,5,5,5,5,5,5,5,5,5, +0,5,0,0,5,5,5,5,5,5,5,0,0,0,6,0, +0,0,0,8,8,8,6,6,6,0,6,0,8,8,8,8, +8,8,8,8,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,6,5,5,6,6,6,6,6,6,6,0, +0,0,0,0x19,5,5,5,5,5,5,4,6,6,6,6,6, +6,6,6,0x17,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0x17,0x17, +0,0,0,0,0,5,5,0,5,0,5,5,5,5,5,0, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,0,5,0,5,5,5,5,5,5,5,5,5, +5,6,5,5,6,6,6,6,6,6,6,6,6,5,0,0, +5,5,5,5,5,0,4,0,6,6,6,6,6,6,0,0, +0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,5,5,5,5, +5,0x1b,0x1b,0x1b,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17, +0x17,0x17,0x17,0x1b,0x17,0x1b,0x1b,0x1b,6,6,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0x344b,0x3c4b,0x444b,0x4c4b,0x544b,0x5c4b, +0x644b,0x6c4b,0x744b,0x2c4b,0x1b,6,0x1b,6,0x1b,6,0x14,0x15,0x14,0x15,8,8, +5,5,5,5,5,5,5,5,0,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,0,0,0,0,6,6,6,6,6,6,6,6,6,6,6, +6,6,6,8,6,6,6,6,6,0x17,6,6,5,5,5,5, +5,6,6,6,6,6,6,6,6,6,6,6,0,6,6,6, +6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, +6,6,6,6,6,6,6,6,6,6,6,6,6,0,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,6,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0x1b,0x1b, +0x17,0x17,0x17,0x17,0x17,0x1b,0x1b,0x1b,0x1b,0x17,0x17,0,0,0,0,0, +5,5,5,5,5,5,5,5,5,5,5,8,8,6,6,6, +6,8,6,6,6,6,6,6,8,6,6,8,8,6,6,5, +0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0x17,0x17,0x17,0x17,0x17,0x17, +5,5,5,5,5,5,8,8,6,6,5,5,5,5,6,6, +6,5,8,8,8,5,5,8,8,8,8,8,8,8,5,5, +5,6,6,6,6,5,5,5,5,5,5,5,5,5,5,5, +5,5,6,8,8,6,6,8,8,8,8,8,8,6,5,8, +0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,8,8,8,6,0x1b,0x1b, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,0x17,4,2,2,2, +1,1,1,1,1,1,0,1,0,0,0,0,0,1,0,0, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +5,5,5,5,5,5,5,5,5,0,5,5,5,5,0,0, +5,5,5,5,5,5,5,0,5,0,5,5,5,5,0,0, +5,5,5,5,5,5,5,5,5,0,5,5,5,5,0,0, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,0,5,5,5,5,0,0,5,5,5,5,5,5,5,0, +5,0,5,5,5,5,0,0,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,0,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,0,5,5,5,5,0,0, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,0,0,6,6,6, +0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x30b,0x34b,0x38b,0x3cb,0x40b,0x44b,0x48b, +0x4cb,0x50b,0x7cb,0xa4b,0xccb,0xf4b,0x11cb,0x144b,0x16cb,0x194b,0x1bcb,0x1e4b,0x788b,0,0,0, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0,0,0, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,0,0,2,2,2,2,2,2,0,0, +0x13,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,0x1b,0x17,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,0xc,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,0x14, +0x15,0,0,0,5,5,5,5,5,5,5,5,5,5,5,0x17, +0x17,0x17,0x98a,0x9ca,0xa0a,5,5,5,5,5,5,5,5,0,0,0, +0,0,0,0,5,5,5,5,5,5,5,5,5,5,5,5, +5,0,5,5,5,5,6,6,6,0,0,0,0,0,0,0, +0,0,0,0,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,6,6,6,0x17,0x17,0,0,0,0,0, +0,0,0,0,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,6,6,0,0,0,0,0,0,0,0, +0,0,0,0,5,5,5,5,5,5,5,5,5,5,5,5, +5,0,5,5,5,0,6,6,0,0,0,0,0,0,0,0, +0,0,0,0,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,6,6,8,6,6,6,6,6, +6,6,8,8,8,8,8,8,8,8,6,8,8,6,6,6, +6,6,6,6,6,6,6,6,0x17,0x17,0x17,4,0x17,0x17,0x17,0x19, +5,6,0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0, +0,0,0,0,0x54b,0x58b,0x5cb,0x60b,0x64b,0x68b,0x6cb,0x70b,0x74b,0x78b,0,0, +0,0,0,0,5,5,5,5,5,5,5,5,5,6,5,0, +0,0,0,0,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,0x17,0x17,0x17,0x17,0x17,0x17,0x13,0x17,0x17,0x17,0x17,6, +6,6,0x10,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0, +0,0,0,0,5,5,5,4,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,0,0,0,0,0,0,0,5,5,5,5, +5,6,6,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,0,0, +0,0,0,0,0,0,0,0,6,6,6,8,8,8,8,6, +6,8,8,8,0,0,0,0,8,8,6,8,8,8,8,8, +8,6,6,6,0,0,0,0,0x1b,0,0,0,0x17,0x17,0x49,0x89, +0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,0,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,0,0,5,5,5,5,5,0,0,0, +0,0,0,0,0,0,0,0,5,5,5,5,5,5,5,5, +5,5,5,5,0,0,0,0,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,0,0,0,0,0,0, +0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0x30b,0,0,0,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,6,6,8,8,6,0,0,0x17,0x17, +0x17,0x17,0x17,0x17,0x17,0x17,0x17,4,0x17,0x17,0x17,0x17,0x17,0x17,0,0, +6,6,6,6,6,6,6,6,6,6,6,6,6,6,7,6, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,8,6,8,6,6,6,6,6,6,6,0, +6,8,6,8,8,6,6,6,6,6,6,6,6,8,8,8, +8,8,8,6,6,6,6,6,6,6,6,6,6,0,0,6, +0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0,0,0,0, +0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0,0,0,0, +0x17,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,6,6,6,6,6, +6,6,6,6,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0, +6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +6,6,6,6,8,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +6,8,6,6,6,6,6,8,6,8,8,8,8,8,6,8, +8,5,5,5,5,5,5,5,0,0,0,0,0x49,0x89,0xc9,0x109, +0x149,0x189,0x1c9,0x209,0x249,0x289,0x17,0x17,0x17,0x17,0x17,0x17,5,8,6,6, +6,6,8,8,6,6,8,6,6,6,5,5,0x49,0x89,0xc9,0x109, +0x149,0x189,0x1c9,0x209,0x249,0x289,5,5,5,5,5,5,6,6,8,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,8, +6,6,8,8,8,6,8,6,6,6,8,8,0,0,0,0, +0,0,0,0,0x17,0x17,0x17,0x17,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209, +0x249,0x289,0,0,0,5,5,5,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209, +0x249,0x289,5,5,5,5,5,5,8,8,8,8,8,8,8,8, +6,6,6,6,6,6,6,6,8,8,6,6,0,0,0,0x17, +0x17,0x17,0x17,0x17,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,4,4,4,4, +4,4,0x17,0x17,2,2,2,2,2,2,2,2,2,0,0,0, +0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0, +0,1,1,1,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0,0,0,0, +0,0,0,0,6,6,6,0x17,6,6,6,6,6,6,6,6, +6,6,6,6,6,8,6,6,6,6,6,6,6,5,5,5, +5,6,5,5,5,5,5,5,6,5,5,8,6,6,5,0, +0,0,0,0,2,2,2,2,2,2,2,2,2,2,2,2, +4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, +4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, +4,4,4,2,2,2,2,2,2,2,2,2,2,2,2,2, +4,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,4, +4,4,4,4,6,6,6,6,6,6,6,6,6,6,6,6, +6,6,6,6,6,6,6,6,6,6,6,6,6,6,0,6, +6,6,6,6,1,2,1,2,1,2,1,2,1,2,1,2, +1,2,1,2,1,2,1,2,1,2,2,2,2,2,2,2, +2,2,1,2,2,2,2,2,2,2,2,2,1,1,1,1, +1,0x1a,0x1a,0x1a,0,0,2,2,2,0,2,2,1,1,1,1, +3,0x1a,0x1a,0,2,2,2,2,2,2,2,2,1,1,1,1, +1,1,1,1,2,2,2,2,2,2,0,0,1,1,1,1, +1,1,0,0,2,2,2,2,2,2,2,2,1,1,1,1, +1,1,1,1,2,2,2,2,2,2,2,2,1,1,1,1, +1,1,1,1,2,2,2,2,2,2,0,0,1,1,1,1, +1,1,0,0,2,2,2,2,2,2,2,2,0,1,0,1, +0,1,0,1,2,2,2,2,2,2,2,2,1,1,1,1, +1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,0,0,2,2,2,2,2,2,2,2,3,3,3,3, +3,3,3,3,2,2,2,2,2,2,2,2,3,3,3,3, +3,3,3,3,2,2,2,2,2,0,2,2,1,1,1,1, +3,0x1a,2,0x1a,0x1a,0x1a,2,2,2,0,2,2,1,1,1,1, +3,0x1a,0x1a,0x1a,2,2,2,2,0,0,2,2,1,1,1,1, +0,0x1a,0x1a,0x1a,0x16,0x17,0x17,0x17,0x18,0x14,0x15,0x17,0x17,0x17,0x17,0x17, +0x17,0x17,0x17,0x17,0x17,0x17,0x18,0x17,0x16,0x17,0x17,0x17,0x17,0x17,0x17,0x17, +0x17,0x17,0x17,0xc,0x10,0x10,0x10,0x10,0x10,0,0x10,0x10,0x10,0x10,0x10,0x10, +0x10,0x10,0x10,0x10,0x2cb,4,0,0,0x3cb,0x40b,0x44b,0x48b,0x4cb,0x50b,0x18,0x18, +0x18,0x14,0x15,4,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0x10, +0x10,0x10,0x10,0x10,0x13,0x13,0x13,0x13,0x13,0x13,0x17,0x17,0x1c,0x1d,0x14,0x1c, +0x1c,0x1d,0x14,0x1c,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0xd,0xe,0x10,0x10, +0x10,0x10,0x10,0xc,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x1c,0x1d,0x17, +0x17,0x17,0x17,0x16,0x2cb,0x30b,0x34b,0x38b,0x3cb,0x40b,0x44b,0x48b,0x4cb,0x50b,0x18,0x18, +0x18,0x14,0x15,0,4,4,4,4,4,4,4,4,4,4,4,4, +4,0,0,0,0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19, +0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19, +0x19,0x19,0x19,0x19,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,6,6,6,6,6,6,6,6,6,6,6,6, +6,7,7,7,7,6,7,7,7,6,6,6,6,6,6,6, +6,6,6,6,6,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0x1b,0x1b,0x1b,0x1b,1,0x1b,1,0x1b,1,0x1b,1,1, +1,1,0x1b,2,1,1,1,1,2,5,5,5,5,2,0x1b,0x1b, +2,2,1,1,0x18,0x18,0x18,0x18,0x18,1,2,2,2,2,0x1b,0x18, +0x1b,0x1b,2,0x1b,0x358b,0x360b,0x364b,0x348b,0x388b,0x350b,0x390b,0x3d0b,0x410b,0x354b,0x454b,0x35cb, +0x3dcb,0x45cb,0x4dcb,0x58b,0x1b,0x1b,1,0x1b,0x1b,0x1b,0x1b,1,0x1b,0x1b,2,1, +1,1,2,2,1,1,1,2,0x1b,1,0x1b,0x1b,0x18,1,1,1, +1,1,0x1b,0x1b,0x58a,0x5ca,0x60a,0x64a,0x68a,0x6ca,0x70a,0x74a,0x78a,0x7ca,0x80a,0x84a, +0x11ca,0x1e4a,0x980a,0x784a,0x58a,0x5ca,0x60a,0x64a,0x68a,0x6ca,0x70a,0x74a,0x78a,0x7ca,0x80a,0x84a, +0x11ca,0x1e4a,0x980a,0x784a,0x784a,0x984a,0x788a,1,2,0x6ca,0x11ca,0x988a,0x78ca,0x54b,0x1b,0x1b, +0,0,0,0,0x18,0x18,0x18,0x18,0x18,0x1b,0x1b,0x1b,0x1b,0x1b,0x18,0x18, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, +0x18,0x1b,0x1b,0x18,0x1b,0x1b,0x18,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x18,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x18,0x18,0x1b,0x1b,0x18,0x1b,0x18,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, +0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, +0x18,0x18,0x18,0x18,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x14,0x15,0x14,0x15, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x18,0x18,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x14,0x15,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x18,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x18,0x18,0x18,0x18,0x18, +0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x18,0x18,0x18,0x18, +0x18,0x18,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x2cb,0x80b,0x84b,0x88b,0x8cb,0x90b,0x94b,0x98b,0x9cb,0xa0b, +0xa4b,0x30b,0x34b,0x38b,0x3cb,0x40b,0x44b,0x48b,0x4cb,0x50b,0x7cb,0x2cb,0x30b,0x34b,0x38b,0x3cb, +0x40b,0x44b,0x48b,0x4cb,0x50b,0x7cb,0x80b,0x84b,0x88b,0x8cb,0x90b,0x94b,0x98b,0x9cb,0xa0b,0xa4b, +0x30b,0x34b,0x38b,0x3cb,0x40b,0x44b,0x48b,0x4cb,0x50b,0x7cb,0x80b,0x84b,0x88b,0x8cb,0x90b,0x94b, +0x98b,0x9cb,0xa0b,0xa4b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x18,0x18,0x18,0x18, +0x18,0x18,0x18,0x18,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x18,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x18,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x18,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x14,0x15,0x14,0x15,0x14,0x15,0x14,0x15, +0x14,0x15,0x14,0x15,0x14,0x15,0x30b,0x34b,0x38b,0x3cb,0x40b,0x44b,0x48b,0x4cb,0x50b,0x7cb, +0x30b,0x34b,0x38b,0x3cb,0x40b,0x44b,0x48b,0x4cb,0x50b,0x7cb,0x30b,0x34b,0x38b,0x3cb,0x40b,0x44b, +0x48b,0x4cb,0x50b,0x7cb,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +0x18,0x18,0x18,0x18,0x18,0x14,0x15,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, +0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, +0x18,0x18,0x14,0x15,0x14,0x15,0x14,0x15,0x14,0x15,0x14,0x15,0x18,0x18,0x18,0x18, +0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x14, +0x15,0x14,0x15,0x14,0x15,0x14,0x15,0x14,0x15,0x14,0x15,0x14,0x15,0x14,0x15,0x14, +0x15,0x14,0x15,0x14,0x15,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, +0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, +0x14,0x15,0x14,0x15,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, +0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, +0x14,0x15,0x18,0x18,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, +0x18,0x18,0x18,0x18,0x18,0x1b,0x1b,0x18,0x18,0x18,0x18,0x18,0x18,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,0,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,0,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,1,2,1,1,1,2,2,1, +2,1,2,1,2,1,1,1,1,2,1,2,2,1,2,2, +2,2,2,2,4,4,1,1,1,2,1,2,2,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,1,2,1,2,6,6,6,1,2,0,0,0,0, +0,0x17,0x17,0x17,0x17,0x344b,0x17,0x17,2,2,2,2,2,2,0,2, +0,0,0,0,0,2,0,0,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,0,0,0,0,0,0,0,4, +0x17,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6, +5,5,5,5,5,5,5,0,5,5,5,5,5,5,5,0, +5,5,5,5,5,5,5,0,5,5,5,5,5,5,5,0, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,0,0,0,0,0,0,0,0,0, +0x17,0x17,0x1c,0x1d,0x1c,0x1d,0x17,0x17,0x17,0x1c,0x1d,0x17,0x1c,0x1d,0x17,0x17, +0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x13,0x17,0x17,0x13,0x17,0x1c,0x1d,0x17,0x17, +0x1c,0x1d,0x14,0x15,0x14,0x15,0x14,0x15,0x14,0x15,0x17,0x17,0x17,0x17,0x17,4, +0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x13,0x13,0x17,0x17,0x17,0x17, +0x13,0x17,0x14,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17, +0x1b,0x1b,0x17,0,0,0,0,0,0,0,0,0,0,0,0,0, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0x1b,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +0,0,0,0,0,0,0,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +0,0,0,0,0x1b,0x58a,0x5ca,0x60a,0x64a,0x68a,0x6ca,0x70a,0x74a,0x78a,6,6, +6,6,8,8,0x13,4,4,4,4,4,0x1b,0x1b,0x7ca,0xa4a,0xcca,4, +5,0x17,0x1b,0x1b,0xc,0x17,0x17,0x17,0x1b,4,5,0x54a,0x14,0x15,0x14,0x15, +0x14,0x15,0x14,0x15,0x14,0x15,0x1b,0x1b,0x14,0x15,0x14,0x15,0x14,0x15,0x14,0x15, +0x13,0x14,0x15,0x15,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,0,0,6,6,0x1a, +0x1a,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,0x17, +4,4,4,5,0,0,0,0,0,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,0,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,0,0x1b,0x1b,0x58b,0x5cb,0x60b,0x64b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0,0,0,0,0, +0,0,0,0,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,0x58b,0x5cb,0x60b,0x64b,0x68b,0x6cb,0x70b,0x74b,0x78b,0x7cb,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x7cb,0xa4b,0xccb,0xf4b,0x11cb,0x144b,0x16cb,0x194b,0x1b,0xa8b,0xacb,0xb0b, +0xb4b,0xb8b,0xbcb,0xc0b,0xc4b,0xc8b,0xccb,0xd0b,0xd4b,0xd8b,0xdcb,0xe0b,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0xe4b,0xe8b,0xecb, +0xf0b,0xf4b,0xf8b,0xfcb,0x100b,0x104b,0x108b,0x10cb,0x110b,0x114b,0x118b,0x11cb,5,5,5,5, +5,0x685,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,0x5c5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,0x685,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,0x705,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,0x585,5,5,0x705,5,5,5,0x7885, +5,0x605,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,0x785,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +0x5c5,5,5,5,5,5,5,5,0x685,5,0x645,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,0x7985,0x7c5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,0x7845,5,5,5,5, +5,5,5,5,0x605,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,0x685,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +0x1e45,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +0x7985,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,0x7a85,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,0x5c5,5,0x745,5,0x6c5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,0x7c5,5,0x7845,0xa45,0xcc5,5,5,5,5,5,5,0xf45,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,0x605,0x605,0x605,0x605,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,0x645,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,0x585,5,5,5,5,5,5,5,0x585,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,0x585,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,0x785,0xa45,5,5,5,5, +5,5,5,5,5,5,5,5,0x585,0x5c5,0x605,5,0x5c5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,0x7c5,5, +5,5,5,5,5,5,5,5,5,5,5,5,0x745,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,0x705,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,0x785,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,0x1e45,5, +5,5,5,5,5,5,0x645,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +0x7885,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,0x5c5,5,5,5,5,0x5c5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,0x5c5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,0x7845,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,0x6c5,5, +5,5,5,5,0x1e45,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +0x6c5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,0x545,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,0,0,0,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,4,5,5,5,5,5,5,5,5,5,5,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0,0,0,0,0,0,0,0,0,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,4,0x17,0x17,0x17, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,5,5,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2, +1,2,1,2,1,2,1,2,1,2,1,2,4,4,6,6, +1,2,1,2,1,2,1,2,1,2,1,2,1,2,5,6, +7,7,7,0x17,6,6,6,6,6,6,6,6,6,6,0x17,4, +5,5,5,5,5,5,0x58a,0x5ca,0x60a,0x64a,0x68a,0x6ca,0x70a,0x74a,0x78a,0x54a, +6,6,0x17,0x17,0x17,0x17,0x17,0x17,0,0,0,0,0,0,0,0, +0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a, +0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,4,4,4,4,4,4,4,4,4, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,1,2,5,4,4,2,5,5,5,5,5, +0x1a,0x1a,1,2,1,2,1,2,1,2,1,2,1,2,1,2, +2,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2, +1,2,1,2,4,2,2,2,2,2,2,2,2,1,2,1, +2,1,1,2,1,2,1,2,1,2,1,2,4,0x1a,0x1a,1, +2,1,2,5,1,2,1,2,2,2,1,2,1,2,1,2, +1,2,1,2,1,2,1,1,1,1,1,2,1,1,1,1, +1,2,1,2,1,2,1,2,1,2,1,2,0,0,1,2, +1,1,1,1,2,1,2,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,5,5,6,5, +5,5,6,5,5,5,5,6,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,8, +8,6,6,8,0x1b,0x1b,0x1b,0x1b,6,0,0,0,0x34cb,0x344b,0x3ccb,0x37cb, +0x35cb,0x3fcb,0x1b,0x1b,0x19,0x1b,0,0,0,0,0,0,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +0x17,0x17,0x17,0x17,0,0,0,0,0,0,0,0,8,8,8,8, +6,6,0,0,0,0,0,0,0,0,0x17,0x17,0x49,0x89,0xc9,0x109, +0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0,0,0,0,8,8,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,8,8,8,8, +8,8,8,8,8,8,8,8,6,6,6,6,6,6,6,6, +6,6,6,6,6,6,6,6,6,6,5,5,5,5,5,5, +0x17,0x17,0x17,5,0x17,5,5,6,5,5,5,5,5,5,6,6, +6,6,6,6,6,6,0x17,0x17,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6, +6,6,6,6,6,6,8,8,0,0,0,0,0,0,0,0, +0,0,0,0x17,8,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17, +0x17,0x17,0,4,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0, +0,0,0x17,0x17,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,6,8,8,6,6,6,6,8,8, +6,6,8,8,5,5,5,5,5,6,4,5,5,5,5,5, +5,5,5,5,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,5,5, +5,5,5,0,5,5,5,5,5,5,5,5,5,6,6,6, +6,6,6,8,8,6,6,8,8,6,6,0,0,0,0,0, +0,0,0,0,5,5,5,6,5,5,5,5,5,5,5,5, +6,8,0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0, +0x17,0x17,0x17,0x17,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,4,5,5,5,5,5,5,0x1b,0x1b,0x1b,5,8, +6,8,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,6,5,6,6,6,5,5,6,6,5,5,5, +5,5,6,6,5,6,5,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5, +5,4,0x17,0x17,5,5,5,5,5,5,5,5,5,5,5,8, +6,6,8,8,0x17,0x17,5,4,4,8,6,0,0,0,0,0, +0,0,0,0,0,5,5,5,5,5,5,0,0,5,5,5, +5,5,5,0,0,5,5,5,5,5,5,0,0,0,0,0, +0,0,0,0,5,5,5,5,5,5,5,0,5,5,5,5, +5,5,5,0,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0x1a, +4,4,4,4,2,2,2,2,2,2,2,2,2,4,0x1a,0x1a, +0,0,0,0,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,5,5,5,8,8,6,8,8,6,8,8,0x17, +8,6,0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0, +0,0,0,0,5,5,5,5,0,0,0,0,0,0,0,0, +0,0,0,0,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +0,0,0,0,5,5,5,5,5,5,5,0,0,0,0,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, +0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, +0x12,0x12,0x12,0x12,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11, +0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11, +0x11,0x11,0x11,0x11,5,5,5,5,5,5,5,5,5,5,5,0x605, +5,5,5,5,5,5,5,0x7c5,5,5,5,5,0x5c5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,0x6c5,5,0x6c5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,0x7c5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,0,0,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,0x18,5,5,5,5,5,5,5,5,5,5, +5,5,5,0,5,5,5,5,5,0,5,0,5,5,0,5, +5,0,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,2,2,2,2, +2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,2, +2,2,2,2,0,0,0,0,0,5,6,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,0x1a,0x1a, +0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,0x15,0x14, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +0,0,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,5,5,5,5,5,5,5,5, +5,5,5,5,0x19,0x1b,0,0,6,6,6,6,6,6,6,6, +6,6,6,6,6,6,6,6,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x14, +0x15,0x17,0,0,0,0,0,0,6,6,6,6,6,6,6,6, +6,6,6,6,6,6,6,6,0x17,0x13,0x13,0x16,0x16,0x14,0x15,0x14, +0x15,0x14,0x15,0x14,0x15,0x14,0x15,0x14,0x15,0x17,0x17,0x14,0x15,0x17,0x17,0x17, +0x17,0x16,0x16,0x16,0x17,0x17,0x17,0,0x17,0x17,0x17,0x17,0x13,0x14,0x15,0x14, +0x15,0x14,0x15,0x17,0x17,0x17,0x18,0x13,0x18,0x18,0x18,0,0x17,0x19,0x17,0x17, +0,0,0,0,5,5,5,5,5,0,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,0,0,0x10,0,0,5,5, +5,5,5,5,0,0,5,5,5,5,5,5,0,0,5,5, +5,5,5,5,0,0,5,5,5,0,0,0,0x19,0x19,0x18,0x1a, +0x1b,0x19,0x19,0,0x1b,0x18,0x18,0x18,0x18,0x1b,0x1b,0,0,0,0,0, +0,0,0,0,0,0x10,0x10,0x10,0x1b,0x1b,0,0,0,0x17,0x17,0x17, +0x19,0x17,0x17,0x17,0x14,0x15,0x17,0x18,0x17,0x13,0x17,0x17,0x49,0x89,0xc9,0x109, +0x149,0x189,0x1c9,0x209,0x249,0x289,0x17,0x17,0x18,0x18,0x18,0x17,0x1a,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,0x14,0x18,0x15,0x18,0x14,0x15,0x17,0x14,0x15, +0x17,0x17,5,5,5,5,5,5,5,5,5,5,4,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,4,4, +5,5,5,5,5,5,5,5,5,5,5,5,0,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,0,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,0,5,5,0,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,0,0,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,0,0,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,0,0,0,0,0,0xb00b,0xb80b,0x784b,0x804b, +0x884b,0x904b,0x984b,0xa04b,0xa84b,0xb04b,0xb84b,0x788b,0x808b,0x888b,0x908b,0x988b,0xa08b,0xa88b,0xb08b,0xb88b, +0,0,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x17,0x17,0x17,0, +0,0,0,0x58b,0x5cb,0x60b,0x64b,0x68b,0x6cb,0x70b,0x74b,0x78b,0x7cb,0xa4b,0xccb,0xf4b, +0x11cb,0x144b,0x16cb,0x194b,0x1bcb,0x1e4b,0x800b,0x880b,0x900b,0x980b,0xa00b,0xa80b,0x7ca,0x7ca,0x7ca,0x7ca, +0x7ca,0xcca,0x11ca,0x11ca,0x11ca,0x11ca,0x1e4a,0x880a,0x980a,0x980a,0x980a,0x980a,0x980a,0x784a,0x984a,0x68a, +0x11ca,0x344b,0x344b,0x388b,0x3ccb,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x54b,0x34cb,0x1b,0x1b,0x1b,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0x34ca,0x344a,0x58a,0x68a,0x11ca,0x980a,0x984a,0x988a, +0x68a,0x7ca,0x11ca,0x1e4a,0x980a,0x784a,0x984a,0x68a,0x7ca,0x11ca,0x1e4a,0x980a,0x784a,0x788a,0x988a,0x7ca, +0x58a,0x58a,0x58a,0x5ca,0x5ca,0x5ca,0x5ca,0x68a,0x1b,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,6,0,0,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,6,0x58b,0x5cb,0x60b,0x64b,0x68b,0x6cb,0x70b, +0x74b,0x78b,0x7cb,0xa4b,0xccb,0xf4b,0x11cb,0x144b,0x16cb,0x194b,0x1bcb,0x1e4b,0x800b,0x880b,0x900b,0x980b, +0xa00b,0xa80b,0xb00b,0xb80b,0,0,0,0,0x58b,0x68b,0x7cb,0x11cb,0,0,0,0, +0,0,0,0,0,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,0x1bca,5,5,5,5,5,5, +5,5,0xb80a,0,0,0,0,0,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,6,6, +6,6,6,0,0,0,0,0,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,0,0x17,5,5,5,5,0,0,0,0, +5,5,5,5,5,5,5,5,0x17,0x58a,0x5ca,0x7ca,0xa4a,0x1e4a,0,0, +0,0,0,0,0,0,0,0,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209, +0x249,0x289,0,0,0,0,0,0,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,0,0,0,0,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0, +2,2,2,2,2,2,2,2,5,5,5,5,5,5,5,5, +0,0,0,0,0,0,0,0,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,0,0,0,0,0,0,0,0, +0,0,0,0x17,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,0,5,5,0,0,0, +5,0,0,5,5,5,5,5,5,5,0,0,5,0,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,0,0x17,0x58b,0x5cb,0x60b,0x7cb,0xa4b,0x1e4b,0x784b,0x788b, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,0x1b,0x1b,0x58b,0x5cb,0x60b,0x64b,0x68b,0x7cb,0xa4b, +0,0,0,0,0,0,0,0x58b,0x5cb,0x60b,0x64b,0x64b,0x68b,0x7cb,0xa4b,0x1e4b, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,0,5,5,0,0,0,0,0,0x58b,0x68b,0x7cb,0xa4b,0x1e4b, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,0x58b,0x7cb,0xa4b,0x1e4b,0x5cb,0x60b,0,0,0,0x17, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,0,0,0,0,0,0x17, +0xa04b,0xa84b,0xb04b,0xb84b,0x788b,0x808b,0x888b,0x908b,0x988b,0xa08b,0xa88b,0xb08b,0xb88b,0x78cb,0x80cb,0x88cb, +0x90cb,0x98cb,0xa0cb,0xa8cb,0xb0cb,0xb8cb,0x36cb,0x354b,0x34cb,0x348b,0x46cb,0x344b,0x4ecb,0x388b,0x3ccb,0x454b, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,0,0,0,0,0x5ecb,0x344b,5,5, +0x58b,0x5cb,0x60b,0x64b,0x68b,0x6cb,0x70b,0x74b,0x78b,0x7cb,0xa4b,0xccb,0xf4b,0x11cb,0x144b,0x16cb, +0,0,0x1e4b,0x800b,0x880b,0x900b,0x980b,0xa00b,0xa80b,0xb00b,0xb80b,0x784b,0x804b,0x884b,0x904b,0x984b, +0x30b,0x34b,0x38b,0x3cb,0x7cb,0xa4b,0x1e4b,0x784b,0x344b,0,0,0,0,0,0,0, +0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0,0,0,0,0,0,0, +5,6,6,6,0,6,6,0,0,0,0,0,6,6,6,6, +5,5,5,5,0,5,5,5,0,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,0,0,6,6,6,0,0,0,0,6,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,0x58b,0x11cb,0x17,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,0x58b,0x7cb,0xa4b,5,5,5,5, +5,6,6,0,0,0,0,0x58b,0x68b,0x7cb,0xa4b,0x1e4b,0x17,0x17,0x17,0x17, +0x17,0x17,0x17,0,0,0,0,0,0,0,0,0,5,5,5,5, +5,5,5,5,0x1b,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,0,0, +0,0x17,0x17,0x17,0x17,0x17,0x17,0x17,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,0,0, +0x58b,0x5cb,0x60b,0x64b,0x7cb,0xa4b,0x1e4b,0x784b,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,0,0,0,0,0, +0x58b,0x5cb,0x60b,0x64b,0x7cb,0xa4b,0x1e4b,0x784b,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,0,0,0,0,0,0, +0,0x17,0x17,0x17,0x17,0,0,0,0,0,0,0,0,0,0,0, +0,0x58b,0x5cb,0x60b,0x64b,0x7cb,0xa4b,0x1e4b,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,5,5,5,5,5,5,5,5, +5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0, +0,0,0,0,0,0,0,0,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0, +0,0,0x58b,0x68b,0x7cb,0x11cb,0x1e4b,0x784b,5,5,5,5,6,6,6,6, +0,0,0,0,0,0,0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209, +0x249,0x289,0,0,0,0,0,0,0x30b,0x34b,0x38b,0x3cb,0x40b,0x44b,0x48b,0x4cb, +0x50b,0x7cb,0xa4b,0xccb,0xf4b,0x11cb,0x144b,0x16cb,0x194b,0x1bcb,0x1e4b,0x800b,0x880b,0x900b,0x980b,0xa00b, +0xa80b,0xb00b,0xb80b,0x344b,0x34cb,0x348b,0x388b,0,5,5,5,5,5,5,5,5, +5,5,0,6,6,0x13,0,0,5,5,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,0x58b,0x5cb,0x60b,0x64b,0x68b,0x7cb,0xa4b,0xccb,0x1e4b,0x344b,5, +0,0,0,0,0,0,0,0,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,6,6,6,6,6,6, +6,6,6,6,6,0x58b,0x7cb,0xa4b,0x1e4b,0x17,0x17,0x17,0x17,0x17,0,0, +0,0,0,0,5,5,5,5,5,0x58b,0x5cb,0x60b,0x64b,0x7cb,0xa4b,0x1e4b, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0x144b,0x16cb,0x194b,0x1bcb,0x1e4b,0x784b,0x49,0x89,0xc9,0x109,0x149,0x189, +0x1c9,0x209,0x249,0x289,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,6,8,6,8,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,6,6,6,6,6,6,6,0x17,0x17,0x17,0x17,0x17, +0x17,0x17,0,0,0,0,0x30b,0x34b,0x38b,0x3cb,0x40b,0x44b,0x48b,0x4cb,0x50b,0x7cb, +0xa4b,0xccb,0xf4b,0x11cb,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,8,8,8,6,6,6,6,8,8,6,6,0x17, +0x17,0x10,0x17,0x17,0x17,0x17,0,0,0,0,0,0,0,0,0,0, +0,0x10,0,0,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,0,0,0,0,0,0,0,0x49,0x89,0xc9,0x109, +0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0,0,0,0,5,5,5,5, +5,5,5,6,6,6,6,6,8,6,6,6,6,6,6,6, +6,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0x17,0x17,0x17,0x17, +5,8,8,5,0,0,0,0,0,0,0,0,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,6, +0x17,0x17,5,0,0,0,0,0,0,0,0,0,8,5,5,5, +5,0x17,0x17,0x17,0x17,6,6,6,6,0x17,8,6,0x49,0x89,0xc9,0x109, +0x149,0x189,0x1c9,0x209,0x249,0x289,5,0x17,5,0x17,0x17,0x17,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,8, +8,8,6,6,6,6,6,6,6,6,6,8,0,0x58b,0x5cb,0x60b, +0x64b,0x68b,0x6cb,0x70b,0x74b,0x78b,0x7cb,0xa4b,0xccb,0xf4b,0x11cb,0x144b,0x16cb,0x194b,0x1bcb,0x1e4b, +0x784b,0,0,0,0,0,0,0,0,0,0,0,5,5,5,5, +5,5,5,5,5,5,5,5,8,8,8,6,6,6,8,8, +6,8,6,6,0x17,0x17,0x17,0x17,0x17,0x17,6,0,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,0,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,0, +5,0,5,5,5,5,0,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,0,5,5,5,5,5,5,5,5,5, +5,0x17,0,0,0,0,0,0,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,8,8,8,6,6,6,6,6, +6,6,6,0,0,0,0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209, +0x249,0x289,0,0,0,0,0,0,5,5,8,8,0,0,6,6, +6,6,6,6,6,0,0,0,6,6,6,6,6,0,0,0, +0,0,0,0,0,0,0,0,6,6,8,8,0,5,5,5, +5,5,5,5,5,0,0,5,5,0,0,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,0,5,5,5,5,5,5, +5,0,5,5,0,5,5,5,5,5,0,6,6,5,8,8, +6,8,8,8,8,0,0,8,8,0,0,8,8,8,0,0, +5,0,0,0,0,0,0,8,0,0,0,0,0,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,8,8,8,6,6,6,6,6,6,6,6, +8,8,6,6,6,8,6,5,5,5,5,0x17,0x17,0x17,0x17,0x17, +0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0x17,0x17,0,0x17,6,5, +5,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +8,8,8,6,6,6,6,6,6,8,6,8,8,8,8,6, +6,8,6,6,5,5,0x17,5,0,0,0,0,0,0,0,0, +0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0,0,0,0, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,8, +8,8,6,6,6,6,0,0,8,8,8,8,6,6,8,6, +6,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17, +0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,5,5,5,5,6,6,0,0, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +8,8,8,6,6,6,6,6,6,6,6,8,8,6,8,6, +6,0x17,0x17,0x17,5,0,0,0,0,0,0,0,0,0,0,0, +0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0,0,0,0, +0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +5,5,5,5,5,5,5,5,5,5,5,6,8,6,8,8, +6,6,6,6,6,6,8,6,5,0,0,0,0,0,0,0, +8,8,6,6,6,6,8,6,6,6,6,6,0,0,0,0, +0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0x7cb,0xa4b,0x17,0x17,0x17,0x1b, +5,5,5,5,5,5,5,5,5,5,5,5,8,8,8,6, +6,6,6,6,6,6,6,6,8,6,6,0x17,0,0,0,0, +0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0x7cb,0xa4b,0xccb,0xf4b,0x11cb,0x144b, +0x16cb,0x194b,0x1bcb,0,0,0,0,0,0,0,0,0,0,0,0,5, +8,5,8,6,0x17,0x17,0x17,0,0,0,0,0,0,0,0,0, +0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0,0,0,0, +5,5,5,5,5,5,5,0,0,5,0,0,5,5,5,5, +5,5,5,5,0,5,5,0,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,8,8,8,8,8,8,0,8, +8,0,0,6,6,8,6,5,6,5,0x17,5,8,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,5,5,5,5,5,5,5,5, +0,0,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,8,8,8,6,6,6,6, +0,0,6,6,8,8,8,8,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,6,6,6,6,6, +6,8,5,6,6,6,6,0x17,0x17,0x17,0x17,0x17,0x17,0x17,0x17,6, +0,0,0,0,0,0,0,0,5,6,6,6,6,6,6,8, +8,6,6,6,5,5,5,5,5,6,6,6,6,6,6,6, +6,6,6,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,0x17,0x17,0x17,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,5,5,5,5,5,5,5,5, +5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,8, +6,6,0x17,0x17,0x17,5,0x17,0x17,5,0x17,0x17,0x17,0x17,0x17,0,0, +0,0,0,0,0,0,0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209, +0x249,0x289,0x58b,0x5cb,0x60b,0x64b,0x68b,0x6cb,0x70b,0x74b,0x78b,0x7cb,0xa4b,0xccb,0xf4b,0x11cb, +0x144b,0x16cb,0x194b,0x1bcb,0x1e4b,0,0,0,0x17,0x17,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,0,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,8,6,6,6,6,6,6,6,0,6,6,6,6, +6,6,8,6,6,6,6,6,6,6,6,6,0,8,6,6, +6,6,6,6,6,8,6,6,8,6,6,0,0,0,0,0, +0,0,0,0,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,0,0,6,6,6,6,6,6,6,6,6,6, +6,6,6,6,6,6,5,6,0,0,0,0,0,0,0,0, +0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0,0,0,0, +5,5,5,5,5,5,5,0,5,5,0,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,6,6,6,6,6,6,0,0,0,6,0,6,6,0,6, +5,5,5,5,5,5,5,5,5,5,8,8,8,8,8,0, +6,6,0,8,8,6,8,6,5,0,0,0,0,0,0,0, +5,5,5,5,5,5,0,5,5,0,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,6,6,8,8,0x17,0x17,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0x19,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0, +0,0,0,0,0,0,0,0,0,0,0,0x17,0xcd0b,0xcc0b,0xcb0b,0xd00b, +0xca0b,0xcf0b,0xcb4b,0xd04b,0xc90b,0x37cb,0x37cb,0x364b,0x35cb,0xc94b,0x3fcb,0x350b,0x34cb,0x344b,0x344b,0x3ccb, +0xcd0b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x19,0x19,0x19,0x34ca,0x354a,0x34ca,0x34ca, +0x344a,0x348a,0x388a,0xf4a,0x11ca,0x64a,0x68a,0x6ca,0x70a,0x74a,0x78a,0,0x17,0x17,0x17,0x17, +0x17,0,0,0,0,0,0,0,0,0,0,0,0x5ca,0x60a,0x64a,0x68a, +0x6ca,0x70a,0x74a,0x78a,0x60a,0x64a,0x68a,0x6ca,0x70a,0x74a,0x78a,0x64a,0x68a,0x6ca,0x70a,0x74a, +0x78a,0x58a,0x5ca,0x60a,0x64a,0x68a,0x6ca,0x70a,0x74a,0x78a,0x58a,0x5ca,0x60a,0x64a,0x68a,0x5ca, +0x60a,0x60a,0x64a,0x68a,0x6ca,0x70a,0x74a,0x78a,0x58a,0x5ca,0x60a,0x60a,0x64a,0x68a,0xc08a,0xc18a, +0x58a,0x5ca,0x60a,0x60a,0x64a,0x68a,0x60a,0x60a,0x64a,0x64a,0x64a,0x64a,0x6ca,0x70a,0x70a,0x70a, +0x74a,0x74a,0x78a,0x78a,0x78a,0x78a,0x5ca,0x60a,0x64a,0x68a,0x6ca,0x58a,0x5ca,0x60a,0x64a,0x64a, +0x68a,0x68a,0x5ca,0x60a,0x58a,0x5ca,0x348a,0x388a,0x454a,0x348a,0x388a,0x35ca,5,5,5,5, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,0,0x10,0x10,0x10,0x10, +0x10,0x10,0x10,0x10,0x10,0,0,0,0,0,0,0,5,5,5,5, +5,5,5,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0x49,0x89,0xc9,0x109, +0x149,0x189,0x1c9,0x209,0x249,0x289,0,0,0,0,0x17,0x17,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,0,0,6,6,6,6, +6,0x17,0,0,0,0,0,0,0,0,0,0,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,6,6,6,6, +6,6,6,0x17,0x17,0x17,0x17,0x17,0x1b,0x1b,0x1b,0x1b,4,4,4,4, +0x17,0x1b,0,0,0,0,0,0,0,0,0,0,0x49,0x89,0xc9,0x109, +0x149,0x189,0x1c9,0x209,0x249,0x289,0,0x7cb,0x1e4b,0x788b,0x790b,0x798b,0x7a0b,0x7a8b,0,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,0,0,0,0,0,5,5,5,0x54b,0x58b,0x5cb,0x60b, +0x64b,0x68b,0x6cb,0x70b,0x74b,0x78b,0x7cb,0x80b,0x84b,0x88b,0x8cb,0x90b,0x94b,0x98b,0x9cb,0xa0b, +0x58b,0x5cb,0x60b,0x17,0x17,0x17,0x17,0,0,0,0,0,5,5,5,5, +5,5,5,5,5,5,5,0,0,0,0,6,5,8,8,8, +8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, +8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, +0,0,0,0,0,0,0,6,6,6,6,4,4,4,4,4, +4,4,4,4,4,4,4,4,4,4,0x17,4,6,0,0,0, +0,0,0,0,0,0,0,0,8,8,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +5,5,5,0,0,0,0,0,0,0,0,0,0,0,0,0, +5,5,5,5,0,0,0,0,0,0,0,0,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,0, +0,0,0,0,5,5,5,5,5,5,5,5,5,5,5,5, +5,0,0,0,0,0,0,0,5,5,5,5,5,5,5,5, +5,5,0,0,0x1b,6,6,0x17,0x10,0x10,0x10,0x10,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0, +0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,8,8,6,6,6,0x1b,0x1b, +0x1b,8,8,8,8,8,8,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,6, +6,6,6,6,6,6,6,0x1b,0x1b,6,6,6,6,6,6,6, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,6,6,6,6,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0x1b,0x1b,6,6,6,0x1b,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0x54b,0x58b,0x5cb,0x60b,0x64b,0x68b,0x6cb,0x70b,0x74b,0x78b,0x7cb,0x80b, +0x84b,0x88b,0x8cb,0x90b,0x94b,0x98b,0x9cb,0xa0b,0,0,0,0,0,0,0,0, +0,0,0,0,0x58b,0x5cb,0x60b,0x64b,0x68b,0x6cb,0x70b,0x74b,0x78b,0x7cb,0xa4b,0xccb, +0xf4b,0x11cb,0x144b,0x16cb,0x194b,0x1bcb,0x58b,0x5cb,0x60b,0x64b,0x68b,0x58b,0x68b,0,0,0, +0,0,0,0,0x249,0x289,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289, +0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0x49,0x89,0xc9,0x109,0x149,0x189, +0x1c9,0x209,0x249,0x289,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,2,2,2,2,2,2,2,0,2,2,2,2,2,2, +2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,1,0,1,1,0,0,1,0, +0,1,1,0,0,1,1,1,1,0,1,1,1,1,1,1, +1,1,2,2,2,2,0,2,0,2,2,2,2,2,2,2, +0,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2, +1,1,0,1,1,1,1,0,0,1,1,1,1,1,1,1, +1,0,1,1,1,1,1,1,1,0,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,1,1,0,1,1,1,1,0,1,1,1,1, +1,0,1,0,0,0,1,1,1,1,1,1,1,0,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2, +2,2,0,0,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,0x18,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,0x18,2,2,2,2,2,2,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,0x18,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,0x18,2,2, +2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,2,2,2,0x18,2,2,2,2,2,2,1,2, +0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209,0x249,0x289,0x49,0x89,0xc9,0x109, +0x149,0x189,0x1c9,0x209,0,6,6,6,6,6,6,6,6,6,6,6, +6,6,6,6,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,6,6,6,6,6,6,6,6,6,6,6,6, +6,6,6,6,6,6,6,6,6,6,6,0x1b,0x1b,0x1b,0x1b,6, +6,6,6,6,6,6,6,6,6,6,6,6,6,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,6,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +6,0x1b,0x1b,0x17,0x17,0x17,0x17,0x17,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,6,6,6,6,6,6,6,6,0, +6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, +6,0,0,6,6,6,6,6,6,6,0,6,6,0,6,6, +6,6,6,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209, +0x249,0x289,0,0,0,0,5,0x1b,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,5,5,5,5,5,5,5,5, +5,5,5,5,5,0,0,0,6,6,6,6,6,6,6,4, +4,4,4,4,4,4,0,0,5,5,5,5,5,5,5,5, +5,5,5,5,6,6,6,6,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209, +0x249,0x289,0,0,0,0,0,0x19,5,5,5,5,5,0,0,0x58b, +0x5cb,0x60b,0x64b,0x68b,0x6cb,0x70b,0x74b,0x78b,6,6,6,6,6,6,6,0, +0,0,0,0,0,0,0,0,2,2,2,2,6,6,6,6, +6,6,6,4,0,0,0,0,0x49,0x89,0xc9,0x109,0x149,0x189,0x1c9,0x209, +0x249,0x289,0,0,0,0,0x17,0x17,1,1,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0x58b,0x5cb,0x60b,0x64b,0x68b,0x6cb,0x70b, +0x74b,0x78b,0x7cb,0xa4b,0xccb,0xf4b,0x11cb,0x144b,0x78cb,0x794b,0x814b,0x58b,0x5cb,0x60b,0x64b,0x68b, +0x6cb,0x70b,0x74b,0x78b,0x1b,0x34cb,0x344b,0x3ccb,0x19,0x58b,0x5cb,0x788b,0x78cb,0,0,0, +0,0,0,0,0,0,0,0,0x16cb,0x194b,0x1bcb,0x1e4b,0x800b,0x880b,0x900b,0x980b, +0xa00b,0xa80b,0xb00b,0xb80b,0x784b,0x804b,0x884b,0x904b,0x984b,0xa04b,0xa84b,0xb04b,0xb84b,0x788b,0x808b,0x888b, +0x908b,0x988b,0xa08b,0xa88b,0xb08b,0xb88b,0x78cb,0x80cb,0x984b,0xa04b,0xa84b,0xb04b,0xb84b,0x788b,0x808b,0x888b, +0x908b,0x988b,0xa08b,0xa88b,0xb08b,0xb88b,0x1b,0x5cb,0x60b,0x64b,0x68b,0x6cb,0x70b,0x74b,0x78b,0x7cb, +0x900b,0xa00b,0x804b,0x788b,0x344b,0x354b,0,0,0,0x58b,0x5cb,0x60b,0x64b,0x68b,0x6cb,0x70b, +0x74b,0x78b,0x7cb,0xa4b,0xccb,0xf4b,0x11cb,0x144b,0x16cb,0x194b,0x1bcb,0x1e4b,0x800b,0x880b,0x900b,0x980b, +0xa00b,0xa80b,0xb00b,0xb80b,0x784b,0x804b,0x884b,0x904b,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0x18,0x18,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,5,5,5,5,0,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,0,5,5,0,5,0,0,5, +0,5,5,5,5,5,5,5,5,5,5,0,5,5,5,5, +0,5,0,5,0,0,0,0,0,0,5,0,0,0,0,5, +0,5,0,5,0,5,5,5,0,5,5,0,5,0,0,5, +0,5,0,5,0,5,0,5,0,5,5,0,5,0,0,5, +5,5,5,0,5,5,5,5,5,5,5,0,5,5,5,5, +0,5,5,5,5,0,5,0,5,5,5,5,5,5,5,5, +5,5,0,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,0,0,0,0,0,5,5,5,0,5,5,5, +5,5,0,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,0,0,0,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0,0,0,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x2cb,0x2cb,0x30b,0x34b, +0x38b,0x3cb,0x40b,0x44b,0x48b,0x4cb,0x50b,0x54b,0x54b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0, +0,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0, +0,0,0,0,0,0,0,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0,0,0,0,0x1b,0x1b,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1a,0x1a,0x1a,0x1a,0x1a,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0,0,0,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0,0,0,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0,0,0,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0, +0x1b,0x1b,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0, +0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0x1b,0x1b,0x1b,0,0,0,0,0, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0,0,0,0,0,0,0,0,0, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b, +0x1b,0,0,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0, +0,0,0,0,0,0,0,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0,0,0,0,0,0,0,0,0,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0, +0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,0x1b,5,0x705,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,0x645,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,0x645,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,0x685,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,0xcc5,5,5,5,5, +5,5,5,5,0xf45,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,0xf45,5,5,5,5,5,5,5,5,5,5,5, +5,5,0x6c5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,0x605,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,0x605,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,0x605,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,0x605,5,5,5,5, +5,5,5,5,5,5,5,5,5,0x645,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +0x785,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10, +0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10, +0,0x10,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11, +0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0x11,0,0, +0,0,0,0 +}; + +static const UTrie2 propsTrie={ + propsTrie_index, + propsTrie_index+4532, + NULL, + 4532, + 17744, + 0xa40, + 0x1234, + 0x0, + 0x0, + 0x110000, + 0x5700, + NULL, 0, FALSE, FALSE, 0, NULL +}; + +static const uint16_t propsVectorsTrie_index[31228]={ +0x4e8,0x4f0,0x4f8,0x500,0x518,0x520,0x528,0x530,0x538,0x540,0x548,0x550,0x558,0x560,0x568,0x570, +0x577,0x57f,0x587,0x58f,0x592,0x59a,0x5a2,0x5aa,0x5b2,0x5ba,0x5c2,0x5ca,0x5d2,0x5da,0x5e2,0x5ea, +0x5f2,0x5fa,0x601,0x609,0x611,0x619,0x621,0x629,0x631,0x639,0x63e,0x646,0x64d,0x655,0x65d,0x665, +0x66d,0x675,0x67d,0x685,0x68c,0x694,0x69c,0x6a4,0x6ac,0x6b4,0x6bc,0x6c4,0x6cc,0x6d4,0x6dc,0x6e4, +0x1a38,0xd5e,0xe35,0x6ec,0x508,0xe9c,0xea4,0x1bf2,0x1300,0x1310,0x12f8,0x1308,0x7c5,0x7cb,0x7d3,0x7db, +0x7e3,0x7e9,0x7f1,0x7f9,0x801,0x807,0x80f,0x817,0x81f,0x825,0x82d,0x835,0x83d,0x845,0x84d,0x854, +0x85c,0x862,0x86a,0x872,0x87a,0x880,0x888,0x890,0x898,0x1318,0x8a0,0x8a8,0x8b0,0x8b7,0x8bf,0x8c7, +0x8cf,0x8d3,0x8db,0x8e2,0x8ea,0x8f2,0x8fa,0x902,0x162c,0x1634,0x90a,0x912,0x91a,0x922,0x92a,0x931, +0x1692,0x1682,0x168a,0x1973,0x197b,0x1328,0x939,0x1320,0x1572,0x1572,0x1574,0x133c,0x133d,0x1330,0x1332,0x1334, +0x169a,0x169c,0x941,0x169c,0x949,0x94e,0x956,0x16a1,0x95c,0x169c,0x962,0x96a,0xc39,0x16a9,0x16a9,0x972, +0x16b9,0x16ba,0x16ba,0x16ba,0x16ba,0x16ba,0x16ba,0x16ba,0x16ba,0x16ba,0x16ba,0x16ba,0x16ba,0x16ba,0x16ba,0x16ba, +0x16ba,0x16ba,0x16ba,0x16b1,0x97a,0x16c2,0x16c2,0x982,0xb59,0xb61,0xb69,0xb71,0x16d2,0x16ca,0x98a,0x992, +0x99a,0x16dc,0x16e4,0x9a2,0x16da,0x9aa,0x1a40,0xd66,0xb79,0xb81,0xb89,0xb8e,0x18e1,0xc6c,0xc73,0x1849, +0xc09,0x1a48,0xd6e,0xd76,0xd7e,0xd86,0xf47,0xf48,0x1939,0x193e,0xca8,0xcb0,0x19af,0x19b7,0x1b11,0xe3d, +0x19bf,0xcf2,0xcfa,0x19c7,0x10f6,0x1196,0xf27,0xd8e,0x1869,0x1851,0x1861,0x1859,0x18f9,0x18f1,0x18b9,0xc19, +0x1345,0x1345,0x1345,0x1345,0x1348,0x1345,0x1345,0x1350,0x9b2,0x1358,0x9b6,0x9be,0x1358,0x9c6,0x9ce,0x9d6, +0x1368,0x1360,0x1370,0x9de,0x9e6,0x1378,0x9ee,0x9f6,0x1380,0x1388,0x1390,0x1398,0x9fe,0x13a0,0x13a7,0x13af, +0x13b7,0x13bf,0x13c7,0x13cf,0x13d7,0x13de,0x13e6,0x13ee,0x13f6,0x13fe,0x1401,0x1403,0x16ec,0x17dc,0x17e2,0x1929, +0x140b,0xa06,0xa0e,0x1525,0x152a,0x152d,0x1535,0x1413,0x153d,0x153d,0x1423,0x141b,0x142b,0x1433,0x143b,0x1443, +0x144b,0x1453,0x145b,0x1463,0x17ea,0x1841,0x1983,0x1ad9,0x1473,0x147a,0x1482,0x148a,0x146b,0x1492,0x17f2,0x17f9, +0x16f4,0x16f4,0x16f4,0x16f4,0x16f4,0x16f4,0x16f4,0x16f4,0x1801,0x1804,0x1801,0x1801,0x180c,0x1813,0x1815,0x181c, +0x1824,0x1828,0x1828,0x182b,0x1828,0x1828,0x1831,0x1828,0x1871,0x1931,0x198b,0xb96,0xb9c,0x1c36,0x1c3e,0x1d15, +0x18d1,0xc49,0xc4d,0x1946,0x18c1,0x18c1,0x18c1,0xc21,0x18c9,0xc41,0x1911,0xc98,0xc29,0xc31,0xc31,0x19cf, +0x1901,0x1993,0xc83,0xc88,0xa16,0x16fc,0x16fc,0xa1e,0x1704,0x1704,0x1704,0x1704,0x1704,0x1704,0xa26,0x6f0, +0x155a,0x157c,0xa2e,0x1584,0xa36,0x158c,0x1594,0x159c,0xa3e,0xa43,0x15a4,0x15ab,0xa48,0x170c,0x1921,0xc11, +0xa50,0x1606,0x160d,0x15b3,0x1615,0x161c,0x15bb,0x15bf,0x15d8,0x15d8,0x15da,0x15c7,0x15cf,0x15cf,0x15d0,0x1624, +0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714, +0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714, +0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714, +0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714, +0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714, +0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714, +0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714, +0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714, +0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714, +0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714, +0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714, +0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714, +0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1714,0x1717,0x1879,0x1879, +0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2, +0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2, +0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2, +0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2, +0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2, +0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2, +0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2, +0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2, +0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2, +0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2, +0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2, +0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2, +0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2, +0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2, +0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2, +0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2, +0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2, +0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2, +0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2, +0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2, +0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2, +0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2, +0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2, +0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2, +0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2, +0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2, +0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2, +0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2, +0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2, +0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2, +0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2, +0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2, +0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2, +0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2, +0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2, +0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2, +0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2, +0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2, +0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2, +0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2, +0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e2,0x15e9,0x1a30,0x12b5, +0x171f,0x1725,0x1725,0x1725,0x1725,0x1725,0x1725,0x1725,0x1725,0x1725,0x1725,0x1725,0x1725,0x1725,0x1725,0x1725, +0x1725,0x1725,0x1725,0x1725,0x1725,0x1725,0x1725,0x1725,0x1725,0x1725,0x1725,0x1725,0x1725,0x1725,0x1725,0x1725, +0x1725,0x1725,0x1725,0x1725,0xa58,0x172d,0xa60,0x1a50,0x19db,0x19db,0x19db,0x19db,0x19db,0x19db,0x19db,0x19db, +0x19d7,0xd02,0x19eb,0x19e3,0x19ed,0x1a58,0x1a58,0xd96,0x18d9,0x194e,0x19a3,0x19a7,0x199b,0x1b09,0xcb8,0xcbb, +0x1909,0xc90,0x1956,0xcc3,0x19f5,0x19f8,0xd0a,0x1a60,0x1a08,0x1a00,0xd12,0xd9e,0x1a68,0x1a6c,0xda6,0xff0, +0x1a10,0xd1a,0xd22,0x1a74,0x1a84,0x1a7c,0xdae,0xef7,0xe45,0xe4d,0x1c85,0xfa8,0x1d32,0x1d32,0x1a8c,0xdb6, +0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675, +0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677, +0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679, +0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674, +0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676, +0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678, +0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a, +0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675, +0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677, +0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679, +0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674, +0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676, +0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678, +0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a, +0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675, +0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677, +0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679, +0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674, +0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676, +0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678, +0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a, +0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0x167a,0x1674,0x1675,0x1676,0x1677,0x1678,0x1679,0xa68,0xdbe,0xdc1, +0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508, +0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508, +0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c, +0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c,0x164c, +0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545, +0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545, +0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545, +0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545, +0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545, +0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545, +0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545, +0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545, +0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545, +0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545, +0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545, +0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545, +0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x1545,0x15f1,0x15f1,0x15f1,0x15f1,0x15f1,0x15f1,0x15f1,0x15f1, +0x15f6,0x15fe,0x1839,0x12bd,0x1919,0x1919,0x12c1,0x12c8,0xa70,0xa78,0xa80,0x14b2,0x14b9,0x14c1,0xa88,0x14c9, +0x14fa,0x14fa,0x14a2,0x14aa,0x14d1,0x14f1,0x14f2,0x1502,0x14d9,0x149a,0xa90,0x14e1,0xa98,0x14e9,0xaa0,0xaa4, +0xca0,0x150a,0xaac,0xab4,0x1512,0x1518,0x151d,0xabc,0xacc,0x1562,0x156a,0x154d,0x1552,0xad4,0xadc,0xac4, +0x163c,0x163c,0x163c,0x163c,0x163c,0x163c,0x163c,0x163c,0x163c,0x163c,0x163c,0x163c,0x163c,0x163c,0x163c,0x163c, +0x163c,0x163c,0x163c,0x163c,0x163c,0x163c,0x163c,0x163c,0x163c,0x163c,0x163c,0x163c,0x1644,0x1644,0x1644,0x1644, +0x1420,0x1420,0x1460,0x14a0,0x14e0,0x1520,0x1560,0x15a0,0x15dc,0x161c,0x1648,0x1688,0x16c8,0x1708,0x1748,0x1788, +0x17c8,0x1804,0x1844,0x1884,0x18c4,0x18f8,0x1934,0x1974,0x19b4,0x19f4,0x1a30,0x1a70,0x1ab0,0x1af0,0x1b30,0x1b70, +0xa80,0xac0,0xb00,0xb40,0xb80,0xa40,0xe75,0xa40,0xe97,0xa40,0xa40,0xa40,0xa40,0xbc0,0x12dd,0x12dd, +0xed7,0xc00,0xa40,0xa40,0xa40,0xa40,0xf17,0xc2d,0xa40,0xa40,0xc6d,0xcad,0xced,0xd2d,0xe35,0xda5, +0x121d,0x121d,0x121d,0x121d,0x121d,0x121d,0x121d,0x121d,0x121d,0x121d,0x121d,0x121d,0x121d,0x121d,0x121d,0x121d, +0x121d,0x121d,0x121d,0x121d,0xf57,0x125d,0x1092,0x10d2,0x129d,0x10dd,0x131d,0x131d,0x131d,0xf97,0xfb7,0xff7, +0x135d,0x135d,0x1037,0xfb7,0xfb7,0xfb7,0xfb7,0xfb7,0xfb7,0xfb7,0xfb7,0xfb7,0xfb7,0xfb7,0xfb7,0xfb7, +0xfb7,0xfb7,0xfb7,0xfb7,0xfb7,0xfb7,0xfb7,0xfb7,0xfb7,0xfb7,0xfb7,0xfb7,0xfb7,0xfb7,0xfb7,0x1052, +0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, +0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xd65, +0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, +0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xd65, +0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, +0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xd65, +0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, +0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xd65, +0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, +0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xd65, +0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, +0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xd65, +0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, +0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xd65, +0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, +0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xd65, +0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, +0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xd65, +0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, +0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xd65, +0xde5,0xdf5,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40, +0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xa40,0xd65, +0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d, +0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x119d,0x111d, +0x11dd,0x11dd,0x11dd,0x11dd,0x11dd,0x11dd,0x11dd,0x11dd,0x11dd,0x11dd,0x11dd,0x11dd,0x11dd,0x11dd,0x11dd,0x11dd, +0x11dd,0x11dd,0x11dd,0x11dd,0x11dd,0x11dd,0x11dd,0x11dd,0x11dd,0x11dd,0x11dd,0x11dd,0x11dd,0x11dd,0x11dd,0x115d, +0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508, +0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508, +0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508, +0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508, +0xba4,0xbab,0xbb3,0xbbb,0x1881,0x1881,0x1881,0xbc3,0xbcb,0xbce,0x18b1,0x18a9,0xc01,0xd2a,0xd2e,0xd32, +0x508,0x508,0x508,0x508,0xd3a,0x1a18,0xd42,0xf3f,0x1735,0xae4,0xaea,0x1000,0xbd6,0x18e9,0xc7b,0x508, +0x174a,0x173d,0x1742,0x1889,0xbde,0xbe6,0x1134,0x113a,0x1c6d,0xf5d,0x1c5d,0x6f8,0x508,0x508,0x508,0x508, +0x1c8d,0x1c8d,0x1c8d,0x1c8d,0x1c8d,0x1c8d,0x1c8d,0x1c8d,0x1c8d,0xfb0,0xfb8,0xfc0,0x508,0x508,0x508,0x508, +0xbee,0xbf1,0xdc9,0x1cd5,0xff8,0x700,0x508,0x1092,0xccb,0xd4a,0x508,0x508,0x1c02,0xeff,0xf07,0x1d1d, +0xc55,0xc5c,0xc64,0x1a94,0x1cb5,0x508,0x1c95,0xfd0,0x1a9c,0xdd1,0xdd9,0xde1,0x1020,0x708,0x508,0x508, +0x1aa4,0x1aa4,0x710,0x508,0x1d4a,0x10aa,0x1d42,0x10b2,0x1e0e,0x11ac,0x508,0x508,0x508,0x508,0x508,0x508, +0x508,0x508,0x508,0xde9,0x1e66,0x1291,0x508,0x508,0x1e2e,0x11d4,0x11db,0x718,0x508,0x71c,0x1248,0x11e3, +0x1b19,0x1b1b,0xe55,0xe5c,0x1aac,0x1ab4,0xdf1,0xf1f,0x1bfa,0xee7,0xeef,0xfc8,0x1c1a,0x1c1e,0x1c26,0x1040, +0xf93,0xf98,0x724,0x508,0x109a,0x10a2,0x1c7d,0xfa0,0xf75,0xf7b,0xf83,0xf8b,0x508,0x508,0x508,0x508, +0x1daa,0x1da2,0x1124,0x112c,0x1cfd,0x1cf5,0x1068,0x508,0x508,0x508,0x508,0x508,0x1ce5,0x1028,0x1030,0x1038, +0x1cad,0x1ca5,0xfe0,0x111c,0x1c2e,0xf2f,0x72c,0x508,0x1078,0x1080,0x508,0x508,0x508,0x508,0x508,0x508, +0x1e06,0x118e,0x734,0x508,0x508,0x1d0d,0x1d05,0x1070,0x1250,0x1256,0x125e,0x508,0x508,0x11eb,0x11ef,0x11f7, +0x1dde,0x1dd6,0x1176,0x1dce,0x1dc6,0x73c,0x1cdd,0x1018,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508, +0x10da,0x10df,0x10e7,0x10ee,0x110e,0x1114,0x508,0x508,0x115a,0x115e,0x1166,0x119e,0x11a4,0x744,0x508,0x508, +0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x11bc,0x508,0x508,0x508,0x508,0x508,0x748,0x1e4e,0x1238, +0x195e,0x195e,0x195e,0x195e,0x195e,0x195e,0x195e,0x195e,0x195e,0x195e,0x195e,0x195e,0x195e,0x195e,0x195e,0x195e, +0x195e,0x195e,0x195e,0x195e,0x195e,0x195e,0x195e,0x195e,0x195e,0x195e,0x195e,0x1963,0xcd3,0xcda,0xcda,0xcda, +0x196b,0x196b,0x196b,0xce2,0x1d3a,0x1d3a,0x1d3a,0x1d3a,0x1d3a,0x1d3a,0x750,0x508,0x508,0x508,0x508,0x508, +0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508, +0x1b23,0x1b23,0x1b23,0x1b23,0x1b23,0x1b23,0x1b23,0x1b23,0x1b23,0x1b23,0x1b23,0x1b23,0x1b23,0x1b23,0x1b23,0x1b23, +0x1b23,0xe64,0xfe8,0x758,0x508,0x508,0x75c,0xf37,0x1ccd,0x1cc5,0x1008,0x1010,0x764,0x508,0x508,0x508, +0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508, +0x508,0x508,0x1e26,0x1e1e,0x11cc,0x508,0x508,0x508,0x1c12,0x1c12,0xf0f,0x1c0a,0xf17,0x508,0x508,0x1106, +0x1dba,0x1dba,0x1dba,0x1dba,0x1dba,0x1dba,0x1dba,0x1dba,0x1dba,0x1dba,0x1dba,0x1dba,0x1dba,0x1dba,0x1dba,0x1dba, +0x1dba,0x1dba,0x1dba,0x1dba,0x1dba,0x1dba,0x1dba,0x1dbe,0x1e76,0x1e76,0x1e76,0x1e76,0x1e76,0x1e76,0x1e76,0x1e76, +0x1e76,0x1e76,0x1e76,0x1e76,0x1e76,0x1e76,0x1266,0x126c,0x1286,0x1289,0x1289,0x1289,0x76c,0x508,0x508,0x508, +0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508, +0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x1c65,0x1c65,0x1c65, +0xf50,0xf55,0x774,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508, +0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x1752,0x1752,0x1752, +0x1752,0x1752,0x1752,0x1752,0xaf2,0x1762,0xafa,0x1763,0x175a,0x176b,0x1771,0x1779,0xb02,0x18a1,0x18a1,0x77c, +0x508,0x508,0x508,0x508,0x11c4,0x1891,0x1891,0xbf9,0xcea,0x508,0x508,0x508,0x508,0x17aa,0x17b1,0xb0a, +0x17b4,0xb12,0xb1a,0xb22,0x17ae,0xb2a,0xb32,0xb3a,0x17b3,0x17bb,0x17aa,0x17b1,0x17ad,0x17b4,0x17bc,0x17ab, +0x17b2,0x17ae,0xb41,0x1781,0x1789,0x1790,0x1797,0x1784,0x178c,0x1793,0x179a,0xb49,0x17a2,0x1d62,0x1d62,0x1d62, +0x1d62,0x1d62,0x1d62,0x1d62,0x1d62,0x1d62,0x1d62,0x1d62,0x1d62,0x1d62,0x1d62,0x1d62,0x1d62,0x1d52,0x1d55,0x1d52, +0x1d5c,0x10ca,0x784,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508, +0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508, +0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x10fe,0x78c,0x508, +0x508,0x508,0x508,0x508,0x508,0x1e46,0x11ff,0x794,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508, +0x508,0x508,0x508,0x1e56,0x1240,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508, +0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508, +0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x1c9d,0x1c9d,0x1c9d, +0x1c9d,0x1c9d,0x1c9d,0xfd8,0x508,0x1d9a,0x1d92,0x10d2,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508, +0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508, +0x798,0x1e16,0x11b4,0x508,0x508,0x1207,0x1208,0x7a0,0x508,0x508,0x508,0x508,0x508,0xeac,0xeb4,0xebc, +0xec4,0xecc,0xed4,0xedb,0xedf,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508, +0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508, +0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508, +0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508, +0x508,0x508,0x508,0x508,0x7a4,0x1048,0x1ced,0x104e,0x1ced,0x1056,0x105b,0x1060,0x1060,0x1d72,0x1d82,0x1d8a, +0x10ba,0x1d7a,0x1e36,0x10c2,0x1dee,0x1e3e,0x1e3e,0x117e,0x1186,0x121f,0x1225,0x122a,0x1230,0x1e5e,0x1e5e,0x1e5e, +0x1e5e,0x1274,0x1e5e,0x127a,0x127e,0x7ac,0x7ac,0x7ac,0x7ac,0x7ac,0x7ac,0x7ac,0x7ac,0x7ac,0x7ac,0x7ac, +0x7ac,0x7ac,0x7ac,0x7ac,0x7ac,0x7ac,0x7ac,0x7ac,0x7ac,0x7ac,0x7ac,0x7ac,0x7ac,0x7ac,0x7ac,0x7ac, +0x7ac,0x7ac,0x7ac,0x7ac,0x7ad,0xb51,0x17c4,0x17c4,0x17c4,0x7b5,0x7b5,0x7b5,0x7b5,0x1899,0x1899,0x1899, +0x1899,0x1899,0x1899,0x1899,0x7bd,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5, +0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5, +0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5, +0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x7b5, +0x7b5,0x7b5,0x7b5,0x7b5,0x7b5,0x1a20,0xd52,0x1a28,0x1a28,0xd56,0xe6c,0xe74,0xe7c,0x1ae9,0x1ad1,0x1af1, +0x1af9,0x1ae1,0xe01,0xe05,0xe0c,0xe14,0xe1b,0xe23,0xe2b,0xe2d,0xe2d,0xe2d,0xe2d,0x1b5a,0x1b62,0x1b5a, +0x1b68,0x1b70,0x1b3b,0x1b78,0x1b80,0x1b5a,0x1b88,0x1b90,0x1b97,0x1b9f,0x1b43,0x1b5a,0x1ba4,0x1b4b,0x1b52,0x1bac, +0x1bb2,0x1c4e,0x1c55,0x1c46,0x1bba,0x1bc2,0x1bca,0x1bd2,0x1cbd,0x1bda,0x1be2,0xe84,0xe8c,0x1b2b,0x1b2b,0x1b2b, +0xe94,0x1c75,0x1c75,0xf65,0xf6d,0x1abc,0x1abc,0x1abc,0x1abc,0x1abc,0x1abc,0x1abc,0x1abc,0x1abc,0x1abc,0x1abc, +0x1abc,0x1abc,0x1abc,0x1abc,0x1abc,0x1abc,0x1abc,0x1abe,0x1abc,0x1ac6,0x1abc,0x1abc,0x1abc,0x1abc,0x1abc,0x1abc, +0x1ac9,0x1abc,0x1abc,0x1abc,0x1abc,0x1abc,0xdf9,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508, +0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508, +0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x1d25,0x1d25,0x1d25,0x1d25,0x1d25,0x1d25,0x1d25,0x1d25,0x1d25, +0x1d25,0x1d25,0x1d25,0x1d25,0x1d25,0x1d2a,0x1d25,0x1d25,0x1d25,0x1088,0x108a,0x508,0x508,0x508,0x508,0x508, +0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2, +0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2, +0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2, +0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2, +0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1142,0x1b33,0x1de6,0x1de6,0x1de6,0x1de6,0x1de6,0x1de6,0x1de6,0x114a, +0x1152,0x1210,0x1217,0x1dfe,0x1dfe,0x1dfe,0x1dfe,0x1dfe,0x1dfe,0x1dfe,0x1dfe,0x1dfe,0x1dfe,0x1dfe,0x116e,0x508, +0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508, +0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x508, +0x508,0x508,0x508,0x508,0x508,0x508,0x508,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc, +0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc, +0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc, +0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x12d0,0x1299,0x1b01, +0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6, +0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6, +0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x12a1,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299, +0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299, +0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299, +0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299, +0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x17d4,0x17d4,0x17d4,0x17d4,0x17d4,0x17d4,0x17d4,0x17d4,0x17d4, +0x17d4,0x17d4,0x17d4,0x17d4,0x17d4,0x17d4,0x17d4,0x12d8,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299, +0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299, +0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299, +0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x12a5,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e, +0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e, +0x1e6e,0x12ad,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299, +0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299, +0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299, +0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299,0x1299, +0x1299,0x12a5,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01, +0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01, +0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01, +0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x12e0,0x1bea,0x1bea,0x1bea,0x1bea, +0x1bea,0x1bea,0x12e8,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a, +0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a, +0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a, +0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a, +0x1d6a,0x1d6a,0x12f0,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1664,0x1664,0x1664, +0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664, +0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664, +0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664, +0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1654,0x166c,0x166c,0x166c, +0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c, +0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c, +0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c, +0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x165c,0x1664,0x1664,0x1664, +0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664, +0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664, +0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664, +0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x1664,0x166c,0x166c,0x166c, +0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c, +0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c, +0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c, +0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x166c,0x17cc,0x17cc,0x17cc, +0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc, +0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc, +0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc, +0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x17cc,0x1b01,0x1b01,0x1b01, +0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01, +0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01, +0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01, +0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1b01,0x1d6a,0x1d6a,0x1d6a, +0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a, +0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a, +0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a, +0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1d6a,0x1db2,0x1db2,0x1db2, +0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2, +0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2, +0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2, +0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1db2,0x1df6,0x1df6,0x1df6, +0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6, +0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6, +0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6, +0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1df6,0x1e6e,0x1e6e,0x1e6e, +0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e, +0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e, +0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e, +0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x1e6e,0x4e7,0x4e7,0x4e7, +0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2ca,0x2d3,0x2cd,0x2cd,0x2d0,0x2c7,0x2c7, +0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7,0x2c7, +0x7fb,0x7f5,0x7da,0x7d1,0x7c8,0x7c5,0x7bc,0x7d7,0x7c2,0x7ce,0x7d1,0x7ec,0x7e3,0x7d4,0x7f8,0x7cb, +0x7b9,0x7b9,0x7b9,0x7b9,0x7b9,0x7b9,0x7b9,0x7b9,0x7b9,0x7b9,0x7e0,0x7dd,0x7e6,0x7e6,0x7e6,0x7f5, +0x7bc,0x807,0x807,0x807,0x807,0x807,0x807,0x801,0x801,0x801,0x801,0x801,0x801,0x801,0x801,0x801, +0x801,0x801,0x801,0x801,0x801,0x801,0x801,0x801,0x801,0x801,0x801,0x7c2,0x7c8,0x7ce,0x7f2,0x7b6, +0x7ef,0x804,0x804,0x804,0x804,0x804,0x804,0x7fe,0x7fe,0x7fe,0x7fe,0x7fe,0x7fe,0x7fe,0x7fe,0x7fe, +0x7fe,0x7fe,0x7fe,0x7fe,0x7fe,0x7fe,0x7fe,0x7fe,0x7fe,0x7fe,0x7fe,0x7c2,0x7e9,0x7bf,0x7e6,0x2c7, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,0x2e5,0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,0x2d6, +0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,0x2d6,0x2d6, +0x2d9,0x651,0x810,0x813,0x657,0x813,0x80d,0x64e,0x645,0x2df,0x663,0x2e2,0x816,0x63c,0x65a,0x80a, +0x654,0x660,0x642,0x642,0x648,0x2dc,0x64e,0x64b,0x645,0x642,0x663,0x2e2,0x63f,0x63f,0x63f,0x651, +0x2eb,0x2eb,0x2eb,0x2eb,0x2eb,0x2eb,0x66c,0x2eb,0x2eb,0x2eb,0x2eb,0x2eb,0x2eb,0x2eb,0x2eb,0x2eb, +0x66c,0x2eb,0x2eb,0x2eb,0x2eb,0x2eb,0x2eb,0x65d,0x66c,0x2eb,0x2eb,0x2eb,0x2eb,0x2eb,0x66c,0x666, +0x669,0x669,0x2e8,0x2e8,0x2e8,0x2e8,0x666,0x2e8,0x669,0x669,0x669,0x2e8,0x669,0x669,0x2e8,0x2e8, +0x666,0x2e8,0x669,0x669,0x2e8,0x2e8,0x2e8,0x65d,0x666,0x669,0x669,0x2e8,0x669,0x2e8,0x666,0x2e8, +0x2f7,0x672,0x2f7,0x2ee,0x2f7,0x2ee,0x2f7,0x2ee,0x2f7,0x2ee,0x2f7,0x2ee,0x2f7,0x2ee,0x2f7,0x2ee, +0x2f4,0x66f,0x2f7,0x672,0x2f7,0x2ee,0x2f7,0x2ee,0x2f7,0x2ee,0x2f7,0x672,0x2f7,0x2ee,0x2f7,0x2ee, +0x2f7,0x2ee,0x2f7,0x2ee,0x2f7,0x2ee,0x678,0x66f,0x2f7,0x2ee,0x2f7,0x672,0x2f7,0x2ee,0x2f7,0x2ee, +0x2f7,0x66f,0x67b,0x675,0x2f7,0x2ee,0x2f7,0x2ee,0x66f,0x2f7,0x2ee,0x2f7,0x2ee,0x2f7,0x2ee,0x67b, +0x675,0x678,0x66f,0x2f7,0x672,0x2f7,0x2ee,0x2f7,0x672,0x67e,0x678,0x66f,0x2f7,0x672,0x2f7,0x2ee, +0x2f7,0x2ee,0x678,0x66f,0x2f7,0x2ee,0x2f7,0x2ee,0x2f7,0x2ee,0x2f7,0x2ee,0x2f7,0x2ee,0x2f7,0x2ee, +0x2f7,0x2ee,0x2f7,0x2ee,0x2f7,0x2ee,0x678,0x66f,0x2f7,0x2ee,0x2f7,0x672,0x2f7,0x2ee,0x2f7,0x2ee, +0x2f7,0x2ee,0x2f7,0x2ee,0x2f7,0x2ee,0x2f7,0x2ee,0x2f7,0x2f7,0x2ee,0x2f7,0x2ee,0x2f7,0x2ee,0x2f1, +0x2fa,0x306,0x306,0x2fa,0x306,0x2fa,0x306,0x306,0x2fa,0x306,0x306,0x306,0x2fa,0x2fa,0x306,0x306, +0x306,0x306,0x2fa,0x306,0x306,0x2fa,0x306,0x306,0x306,0x2fa,0x2fa,0x2fa,0x306,0x306,0x2fa,0x306, +0x309,0x2fd,0x306,0x2fa,0x306,0x2fa,0x306,0x306,0x2fa,0x306,0x2fa,0x2fa,0x306,0x2fa,0x306,0x309, +0x2fd,0x306,0x306,0x306,0x2fa,0x306,0x2fa,0x306,0x306,0x2fa,0x2fa,0x303,0x306,0x2fa,0x2fa,0x2fa, +0x303,0x303,0x303,0x303,0x30c,0x30c,0x300,0x30c,0x30c,0x300,0x30c,0x30c,0x300,0x309,0x681,0x309, +0x681,0x309,0x681,0x309,0x681,0x309,0x681,0x309,0x681,0x309,0x681,0x309,0x681,0x2fa,0x309,0x2fd, +0x309,0x2fd,0x309,0x2fd,0x306,0x2fa,0x309,0x2fd,0x309,0x2fd,0x309,0x2fd,0x309,0x2fd,0x309,0x2fd, +0x2fd,0x30c,0x30c,0x300,0x309,0x2fd,0x9ea,0x9ea,0x9ed,0x9e7,0x309,0x2fd,0x309,0x2fd,0x309,0x2fd, +0x309,0x2fd,0x309,0x2fd,0x309,0x2fd,0x309,0x2fd,0x309,0x2fd,0x309,0x2fd,0x309,0x2fd,0x309,0x2fd, +0x309,0x2fd,0x309,0x2fd,0x9ed,0x9e7,0x9ed,0x9e7,0x9ea,0x9e4,0x9ed,0x9e7,0xbaf,0xcb7,0x9ea,0x9e4, +0x9ea,0x9e4,0x9ed,0x9e7,0x9ed,0x9e7,0x9ed,0x9e7,0x9ed,0x9e7,0x9ed,0x9e7,0x9ed,0x9e7,0x9ed,0x9e7, +0xcb7,0xcb7,0xcb7,0xdb6,0xdb6,0xdb6,0xdb9,0xdb9,0xdb6,0xdb9,0xdb9,0xdb6,0xdb6,0xdb9,0xefa,0xefd, +0xefd,0xefd,0xefd,0xefa,0xefd,0xefa,0xefd,0xefa,0xefd,0xefa,0xefd,0xefa,0x30f,0x684,0x30f,0x30f, +0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x684,0x30f,0x30f, +0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f, +0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x312,0x30f,0x30f,0x30f, +0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f,0x30f, +0x30f,0x9f0,0x9f0,0x9f0,0x9f0,0x9f0,0xcba,0xcba,0x327,0x327,0x327,0x327,0x327,0x327,0x327,0x327, +0x327,0x31e,0x31e,0x31e,0x31e,0x31e,0x31e,0x31e,0x31b,0x31b,0x318,0x318,0x68a,0x318,0x31e,0x68d, +0x321,0x68d,0x68d,0x68d,0x321,0x68d,0x31e,0x31e,0x690,0x324,0x318,0x318,0x318,0x318,0x318,0x318, +0x687,0x687,0x687,0x687,0x315,0x687,0x318,0xb25,0x327,0x327,0x327,0x327,0x327,0x318,0x318,0x318, +0x318,0x318,0x9f9,0x9f9,0x9f6,0x9f3,0x9f6,0xcbd,0xcbd,0xcbd,0xcbd,0xcbd,0xcbd,0xcbd,0xcbd,0xcbd, +0xcbd,0xcbd,0xcbd,0xcbd,0xcbd,0xcbd,0xcbd,0xcbd,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693, +0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693, +0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693, +0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693, +0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x693,0x696,0x696,0x94b,0x696,0x696,0x94e,0xb28,0xb28, +0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xb28,0xc6c,0xd83,0xd83,0xd83,0xd83,0xd83,0xd83,0xd83,0xd83, +0xebe,0xebe,0xebe,0xebe,0xec1,0xd86,0xd86,0xd86,0x699,0x699,0xb2b,0xcb4,0xcb4,0xcb4,0xcb4,0xcb4, +0xcb4,0xcb4,0xcb4,0xcb4,0xcb4,0xcb4,0xcb4,0xcb4,0xfa8,0xfa5,0xfa8,0xfa5,0x333,0x33c,0xfa8,0xfa5, +9,9,0x342,0xf00,0xf00,0xf00,0x32a,0x14fd,9,9,9,9,0x33f,0x32d,0x351,0x330, +0x351,0x351,0x351,9,0x351,9,0x351,0x351,0x348,0x69f,0x69f,0x69f,0x69f,0x69f,0x69f,0x69f, +0x69f,0x69f,0x69f,0x69f,0x69f,0x69f,0x69f,0x69f,0x69f,0x69f,9,0x69f,0x69f,0x69f,0x69f,0x69f, +0x69f,0x69f,0x351,0x351,0x348,0x348,0x348,0x348,0x348,0x69c,0x69c,0x69c,0x69c,0x69c,0x69c,0x69c, +0x69c,0x69c,0x69c,0x69c,0x69c,0x69c,0x69c,0x69c,0x69c,0x69c,0x345,0x69c,0x69c,0x69c,0x69c,0x69c, +0x69c,0x69c,0x348,0x348,0x348,0x348,0x348,0xfa8,0x354,0x354,0x357,0x351,0x351,0x354,0x34b,0x9fc, +0xbb8,0xbb5,0x34e,0x9fc,0x34e,0x9fc,0x34e,0x9fc,0x34e,0x9fc,0x339,0x336,0x339,0x336,0x339,0x336, +0x339,0x336,0x339,0x336,0x339,0x336,0x339,0x336,0x354,0x354,0x34b,0x345,0xb67,0xb64,0xbb2,0xcc3, +0xcc0,0xcc6,0xcc3,0xcc0,0xdbc,0xdbf,0xdbf,0xdbf,0xa0b,0x6ab,0x363,0x366,0x363,0x363,0x363,0x366, +0x363,0x363,0x363,0x363,0x366,0xa0b,0x366,0x363,0x6a8,0x6a8,0x6a8,0x6a8,0x6a8,0x6a8,0x6a8,0x6a8, +0x6a8,0x6ab,0x6a8,0x6a8,0x6a8,0x6a8,0x6a8,0x6a8,0x6a8,0x6a8,0x6a8,0x6a8,0x6a8,0x6a8,0x6a8,0x6a8, +0x6a8,0x6a8,0x6a8,0x6a8,0x6a8,0x6a8,0x6a8,0x6a8,0x6a2,0x6a2,0x6a2,0x6a2,0x6a2,0x6a2,0x6a2,0x6a2, +0x6a2,0x6a5,0x6a2,0x6a2,0x6a2,0x6a2,0x6a2,0x6a2,0x6a2,0x6a2,0x6a2,0x6a2,0x6a2,0x6a2,0x6a2,0x6a2, +0x6a2,0x6a2,0x6a2,0x6a2,0xa05,0x6a5,0x35d,0x360,0x35d,0x35d,0x35d,0x360,0x35d,0x35d,0x35d,0x35d, +0x360,0xa05,0x360,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d, +0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,0x366,0x360,0x363,0x35d,0x363,0x35d, +0x363,0x35d,0x363,0x35d,0x363,0x35d,0x35a,0x957,0x95a,0x93c,0x93c,0x114f,0x9ff,0x9ff,0xbbe,0xbbb, +0xa08,0xa02,0xa08,0xa02,0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d, +0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d, +0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d,0x363,0x35d, +0x363,0x35d,0x363,0x35d,0x363,0x366,0x360,0x363,0x35d,0xbbe,0xbbb,0x363,0x35d,0xbbe,0xbbb,0x363, +0x35d,0xbbe,0xbbb,0xf03,0x366,0x360,0x366,0x360,0x363,0x35d,0x366,0x360,0x363,0x35d,0x366,0x360, +0x366,0x360,0x366,0x360,0x363,0x35d,0x366,0x360,0x366,0x360,0x366,0x360,0x363,0x35d,0x366,0x360, +0xa0b,0xa05,0x366,0x360,0x366,0x360,0x366,0x360,0x366,0x360,0xdc5,0xdc2,0x366,0x360,0xf06,0xf03, +0xf06,0xf03,0xf06,0xf03,0xc2d,0xc2a,0xc2d,0xc2a,0xc2d,0xc2a,0xc2d,0xc2a,0xc2d,0xc2a,0xc2d,0xc2a, +0xc2d,0xc2a,0xc2d,0xc2a,0xf33,0xf30,0xf33,0xf30,0x1023,0x1020,0x1023,0x1020,0x1023,0x1020,0x1023,0x1020, +0x1023,0x1020,0x1023,0x1020,0x1023,0x1020,0x1023,0x1020,0x1188,0x1185,0x1371,0x136e,0x1536,0x1533,0x1536,0x1533, +0x1536,0x1533,0x1536,0x1533,0xc,0x378,0x378,0x378,0x378,0x378,0x378,0x378,0x378,0x378,0x378,0x378, +0x378,0x378,0x378,0x378,0x378,0x378,0x378,0x378,0x378,0x378,0x378,0x378,0x378,0x378,0x378,0xc, +0xc,0x37b,0x369,0x369,0x369,0x36f,0x369,0x36c,0x1941,0x372,0x372,0x372,0x372,0x372,0x372,0x372, +0x372,0x372,0x372,0x372,0x372,0x372,0x372,0x372,0x372,0x372,0x372,0x372,0x372,0x372,0x372,0x372, +0x372,0x372,0x372,0x372,0x372,0x372,0x372,0x372,0x372,0x372,0x372,0x375,0x1941,0x37e,0xa0e,0xc, +0xc,0x1500,0x1500,0x141c,0xf,0x97e,0x97e,0x97e,0x97e,0x97e,0x97e,0x97e,0x97e,0x97e,0x97e,0x97e, +0x97e,0x97e,0x97e,0x97e,0x97e,0x97e,0xdc8,0x97e,0x97e,0x97e,0x97e,0x97e,0x97e,0x97e,0x97e,0x97e, +0x97e,0x97e,0x97e,0x97e,0x381,0x381,0x381,0x381,0x381,0x381,0x381,0x381,0x381,0x381,0xf09,0x381, +0x381,0x381,0x38d,0x381,0x384,0x381,0x381,0x390,0x981,0xdcb,0xdce,0xdcb,0xf,0xf,0xf,0xf, +0xf,0xf,0xf,0xf,0x393,0x393,0x393,0x393,0x393,0x393,0x393,0x393,0x393,0x393,0x393,0x393, +0x393,0x393,0x393,0x393,0x393,0x393,0x393,0x393,0x393,0x393,0x393,0x393,0x393,0x393,0x393,0xf, +0xf,0xf,0xf,0x1944,0x393,0x393,0x393,0x38a,0x387,0xf,0xf,0xf,0xf,0xf,0xf,0xf, +0xf,0xf,0xf,0xf,0xcdb,0xcdb,0xcdb,0xcdb,0x141f,0x1503,0xfb1,0xfb1,0xfb1,0xfae,0xfae,0xdd4, +0x8c7,0xcd5,0xcd2,0xcd2,0xcc9,0xcc9,0xcc9,0xcc9,0xcc9,0xcc9,0xfab,0xfab,0xfab,0xfab,0xfab,0x8c4, +0x14fa,0x12,0xdd7,0x8ca,0x1338,0x3ae,0x3b1,0x3b1,0x3b1,0x3b1,0x3b1,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae, +0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0xfb4, +0xfb4,0xfb4,0xfb4,0xfb4,0x8cd,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x942, +0x942,0x942,0x942,0x942,0x942,0x942,0x942,0xb5e,0xb5e,0xb5e,0xcc9,0xccf,0xccc,0xdd1,0xdd1,0xdd1, +0xdd1,0xdd1,0xdd1,0x1335,0x960,0x960,0x960,0x960,0x960,0x960,0x960,0x960,0x960,0x960,0x3a8,0x3a5, +0x3a2,0x39f,0xbc1,0xbc1,0x93f,0x3ae,0x3ae,0x3ba,0x3ae,0x3b4,0x3b4,0x3b4,0x3b4,0x3ae,0x3ae,0x3ae, +0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae, +0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae, +0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae, +0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0xa14,0xa14,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0xa14, +0x3b1,0x3ae,0x3b1,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0x3ae,0xa14, +0x3ae,0x3ae,0x3ae,0x3b1,0x95d,0x3ae,0x399,0x399,0x399,0x399,0x399,0x399,0x399,0x396,0x39f,0x39c, +0x39c,0x399,0x399,0x399,0x399,0x3b7,0x3b7,0x399,0x399,0x39f,0x39c,0x39c,0x39c,0x399,0xcd8,0xcd8, +0x3ab,0x3ab,0x3ab,0x3ab,0x3ab,0x3ab,0x3ab,0x3ab,0x3ab,0x3ab,0xa14,0xa14,0xa14,0xa11,0xa11,0xcd8, +0xa29,0xa29,0xa29,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa23,0xa20,0xa23,0xa20,0x15,0xa2c, +0xa26,0xa17,0xa26,0xa26,0xa26,0xa26,0xa26,0xa26,0xa26,0xa26,0xa26,0xa26,0xa26,0xa26,0xa26,0xa26, +0xa26,0xa26,0xa26,0xa26,0xa26,0xa26,0xa26,0xa26,0xa26,0xa26,0xa26,0xa26,0xa26,0xcde,0xcde,0xcde, +0xa1d,0xa1d,0xa1d,0xa1d,0xa1d,0xa1d,0xa1d,0xa1d,0xa1d,0xa1d,0xa1d,0xa1d,0xa1d,0xa1d,0xa1d,0xa1d, +0xa1a,0xa1a,0xa1a,0xa1a,0xa1a,0xa1a,0xa1a,0xa1a,0xa1a,0xa1a,0xa1a,0x15,0x15,0xcde,0xcde,0xcde, +0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0xe37, +0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0xe37,0x1035,0x1035, +0x1035,0x1035,0x1035,0x1035,0x1035,0x1035,0x1035,0x1035,0x1035,0x1035,0x1035,0x1035,0x1035,0x1035,0x1035,0x1035, +0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,0xa32, +0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,0xa32, +0xa32,0xa32,0xa32,0xa32,0xa32,0xa32,0xa2f,0xa2f,0xa2f,0xa2f,0xa2f,0xa2f,0xa2f,0xa2f,0xa2f,0xa2f, +0xa2f,0xbc4,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, +0xf4b,0xf4b,0xf4b,0xf4b,0xf4b,0xf4b,0xf4b,0xf4b,0xf4b,0xf4b,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e, +0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e, +0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf4e,0xf42,0xf42,0xf42,0xf42,0xf42, +0xf42,0xf42,0xf42,0xf42,0xf51,0xf51,0xf45,0xf45,0xf48,0xf57,0xf54,0x10b,0x10b,0x1968,0x196b,0x196b, +0x18f9,0x18f9,0x18f9,0x18f9,0x18f9,0x18f9,0x18f9,0x18f9,0x18f9,0x18f9,0x18f9,0x252,0x252,0x252,0x252,0x252, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0xb37,0xb37,0xb3a,0xb3a,0xb37,0xb37,0xb37,0xb37,0xb37,0xb37,0xb37,0xb37,0x72,0x72,0x72,0x72, +0x15ba,0x15ba,0x15ba,0x15ba,0x1bc,0x1bc,0x1bc,0x1bc,0x1bc,0x1bc,0x1bc,0x1bc,0x1bc,0x1bc,0x1bc,0x15b7, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0x1ef,0x1ef,0x1ef,0x1ef,0x1ef,0x1ef,0x1ef,0x1674,0x1674,0x1674,0x1674,0x1674,0x1674,0x1674,0x1674,0x1674, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0x1fb,0x1fb,0x1fb,0x1fb,0x1fb,0x1fb,0x1fb,0x1fb,0x1fb,0x16ad,0x16ad,0x16ad,0x16ad,0x16ad,0x16ad,0x16ad, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x174,0x174,0x174,0x174,0x174,0x174,0x174, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0x276,0x276,0x276,0x276,0x276,0x276,0x276,0x276,0x276,0x276,0x276,0x276,0x276,0x276,0x276,0x276, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af7, +0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1,0x1d1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0x14d6,0x14d6,0x14d6,0x14d6,0x14d6,0x14d6,0x14d6,0x14d6,0x14d6,0x14d6,0x1b6,0x1b6,0x1b6,0x1b6,0x1b6,0x1b6, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0x25b,0x25b,0x25b,0x25b,0x25b,0x25b,0x25b,0x25b,0x25b,0x25b,0x25b,0x25b,0x25b,0x25b,0x25b,0x25b, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0x18e7,0x18ea,0x18ea,0x24f,0x24f,0x24f,0x24f,0x24f,0x24f,0x24f,0x24f,0x24f,0x24f,0x24f,0x24f,0x24f, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0x19b6,0x19b6,0x19b6,0x19b6,0x19b6,0x19b6,0x19b6,0x19b6,0x19b6,0x19b6,0x261,0x261,0x261,0x261,0x261,0x261, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0x1b1b,0x29d,0x29d,0x29d,0x29d,0x29d,0x29d,0x29d,0x29d,0x29d,0x29d,0x29d,0x29d,0x29d,0x29d,0x29d, +0x17a9,0x17a9,0x17a9,0x17a9,0x216,0x216,0x216,0x216,0x216,0x216,0x216,0x216,0x216,0x216,0x216,0x216, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0x165f,0x165f,0x165f,0x165f,0x165f,0x165f,0x165f,0x165f,0x165f,0x165f,0x1e9,0x1e9,0x1e9,0x1e9,0x1665,0x1665, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1, +0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0x1713,0x1713,0x1713,0x1713,0x1fe,0x1fe,0x1fe,0x1fe,0x1fe,0x1fe,0x1fe,0x1fe,0x1fe,0x1fe,0x1fe,0x1fe, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0xe2e,0xe2e,0xe2b,0xe2b,0xe2b,0xe2e,0xd8,0xd8,0xd8,0xd8,0xd8,0xd8,0xd8,0xd8,0xd8,0xd8, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0x225,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0x1845,0x1845,0x231,0x1845,0x1845,0x231,0x1845,0x1845,0x1845,0x1845,0x1845,0x231,0x231,0x231,0x231,0x231, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0x1a6d,0x1a6d,0x1a6d,0x1a6d,0x1a6d,0x1a6d,0x1a6d,0x1a6d,0x1a6d,0x1a6d,0x282,0x282,0x282,0x282,0x1a70,0x1a6a, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0x267,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb, +0x285,0x285,0x285,0x285,0x285,0x285,0x285,0x285,0x285,0x285,0x285,0x285,0x285,0x285,0x285,0x285, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x97b,0x97b, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, +3,3,0x97b,0x97b,6,6,6,6,6,6,6,6,6,6,6,6, +6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, +6,6,6,6,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c, +0xd8c,0xd8c,0xd8c,0xd8c,6,6,6,6,6,6,6,6,6,6,6,6, +6,6,6,6,0x1509,0x3d5,0x3e4,0x3e4,0x1b,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea, +0x3ea,0x1b,0x1b,0x3ea,0x3ea,0x1b,0x1b,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea, +0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x1b,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x3ea,0x1b,0x3ea,0x1b, +0x1b,0x1b,0x3ea,0x3ea,0x3ea,0x3ea,0x1b,0x1b,0x3d8,0xce4,0x3d5,0x3e4,0x3e4,0x3d5,0x3d5,0x3d5, +0x3d5,0x1b,0x1b,0x3e4,0x3e4,0x1b,0x1b,0x3e7,0x3e7,0x3db,0xddd,0x1b,0x1b,0x1b,0x1b,0x1b, +0x1b,0x1b,0x1b,0x3d5,0x1b,0x1b,0x1b,0x1b,0x3ed,0x3ed,0x1b,0x3ed,0x3ea,0x3ea,0x3d5,0x3d5, +0x1b,0x1b,0x966,0x966,0x966,0x966,0x966,0x966,0x966,0x966,0x966,0x966,0x3ea,0x3ea,0x3e1,0x3e1, +0x3de,0x3de,0x3de,0x3de,0x3de,0x3e1,0x3de,0x115e,0x18a2,0x189f,0x1947,0x1b,0x1e,0xce7,0x3f0,0xcea, +0x1e,0x3fc,0x3fc,0x3fc,0x3fc,0x3fc,0x3fc,0x1e,0x1e,0x1e,0x1e,0x3fc,0x3fc,0x1e,0x1e,0x3fc, +0x3fc,0x3fc,0x3fc,0x3fc,0x3fc,0x3fc,0x3fc,0x3fc,0x3fc,0x3fc,0x3fc,0x3fc,0x3fc,0x1e,0x3fc,0x3fc, +0x3fc,0x3fc,0x3fc,0x3fc,0x3fc,0x1e,0x3fc,0x3ff,0x1e,0x3fc,0x3ff,0x1e,0x3fc,0x3fc,0x1e,0x1e, +0x3f3,0x1e,0x3f9,0x3f9,0x3f9,0x3f0,0x3f0,0x1e,0x1e,0x1e,0x1e,0x3f0,0x3f0,0x1e,0x1e,0x3f0, +0x3f0,0x3f6,0x1e,0x1e,0x1e,0xfbd,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x3ff,0x3ff,0x3ff, +0x3fc,0x1e,0x3ff,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x969,0x969,0x969,0x969,0x969,0x969, +0x969,0x969,0x969,0x969,0x3f0,0x3f0,0x3fc,0x3fc,0x3fc,0xfbd,0x194a,0x1e,0x1e,0x1e,0x1e,0x1e, +0x1e,0x1e,0x1e,0x1e,0x21,0x402,0x402,0x40b,0x21,0x40e,0x40e,0x40e,0x40e,0x40e,0x40e,0x40e, +0xcf3,0x40e,0x21,0x40e,0x40e,0x40e,0x21,0x40e,0x40e,0x40e,0x40e,0x40e,0x40e,0x40e,0x40e,0x40e, +0x40e,0x40e,0x40e,0x40e,0x40e,0x21,0x40e,0x40e,0x40e,0x40e,0x40e,0x40e,0x40e,0x21,0x40e,0x40e, +0x21,0x40e,0x40e,0x40e,0x40e,0x40e,0x21,0x21,0x405,0x40e,0x40b,0x40b,0x40b,0x402,0x402,0x402, +0x402,0x402,0x21,0x402,0x402,0x40b,0x21,0x40b,0x40b,0x408,0x21,0x21,0x40e,0x21,0x21,0x21, +0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x40e,0xcf3,0xced,0xced, +0x21,0x21,0x96c,0x96c,0x96c,0x96c,0x96c,0x96c,0x96c,0x96c,0x96c,0x96c,0x1422,0xcf0,0x21,0x21, +0x21,0x21,0x21,0x21,0x21,0x1725,0x18a5,0x18a5,0x18a5,0x18a8,0x18a8,0x18a8,0x24,0x411,0x420,0x420, +0x24,0x426,0x426,0x426,0x426,0x426,0x426,0x426,0x426,0x24,0x24,0x426,0x426,0x24,0x24,0x426, +0x426,0x426,0x426,0x426,0x426,0x426,0x426,0x426,0x426,0x426,0x426,0x426,0x426,0x24,0x426,0x426, +0x426,0x426,0x426,0x426,0x426,0x24,0x426,0x426,0x24,0xcf6,0x426,0x426,0x426,0x426,0x24,0x24, +0x414,0x426,0x411,0x411,0x420,0x411,0x411,0x411,0xfc0,0x24,0x24,0x420,0x423,0x24,0x24,0x423, +0x423,0x417,0x24,0x24,0x24,0x24,0x24,0x24,0x24,0x1ab5,0x411,0x411,0x24,0x24,0x24,0x24, +0x429,0x429,0x24,0x426,0x426,0x426,0xfc0,0xfc0,0x24,0x24,0x41d,0x41d,0x41d,0x41d,0x41d,0x41d, +0x41d,0x41d,0x41d,0x41d,0x41a,0xcf6,0x1344,0x1344,0x1344,0x1344,0x1344,0x1344,0x24,0x24,0x24,0x24, +0x24,0x24,0x24,0x24,0x27,0x27,0x42c,0x438,0x27,0x438,0x438,0x438,0x438,0x438,0x438,0x27, +0x27,0x27,0x438,0x438,0x438,0x27,0x438,0x438,0x43b,0x438,0x27,0x27,0x27,0x438,0x438,0x27, +0x438,0x27,0x438,0x438,0x27,0x27,0x27,0x438,0x438,0x27,0x27,0x27,0x438,0x438,0x438,0x27, +0x27,0x27,0x438,0x438,0x438,0x438,0x438,0x438,0x438,0x438,0xde0,0x438,0x438,0x438,0x27,0x27, +0x27,0x27,0x42c,0x432,0x42c,0x432,0x432,0x27,0x27,0x27,0x432,0x432,0x432,0x27,0x435,0x435, +0x435,0x42f,0x27,0x27,0xfc3,0x27,0x27,0x27,0x27,0x27,0x27,0x42c,0x27,0x27,0x27,0x27, +0x27,0x27,0x27,0x27,0x27,0x27,0xef7,0x972,0x972,0x972,0x972,0x972,0x972,0x972,0x972,0x972, +0x96f,0x96f,0x96f,0xdb0,0xcf9,0xcf9,0xcf9,0xcf9,0xcf9,0xcfc,0xcf9,0x27,0x27,0x27,0x27,0x27, +0x150c,0x44a,0x44a,0x44a,0x194d,0x44d,0x44d,0x44d,0x44d,0x44d,0x44d,0x44d,0x44d,0x2a,0x44d,0x44d, +0x44d,0x2a,0x44d,0x44d,0x44d,0x44d,0x44d,0x44d,0x44d,0x44d,0x44d,0x44d,0x44d,0x44d,0x44d,0x44d, +0x44d,0x2a,0x44d,0x44d,0x44d,0x44d,0x44d,0x44d,0x44d,0x44d,0x44d,0x44d,0x150f,0x44d,0x44d,0x44d, +0x44d,0x44d,0x2a,0x2a,0x2a,0xfcc,0x43e,0x43e,0x43e,0x44a,0x44a,0x44a,0x44a,0x2a,0x43e,0x43e, +0x441,0x2a,0x43e,0x43e,0x43e,0x444,0x2a,0x2a,0x2a,0x2a,0x2a,0x2a,0x2a,0x43e,0x43e,0x2a, +0xfcc,0xfcc,0x1728,0x2a,0x2a,0x2a,0x2a,0x2a,0x44d,0x44d,0xfc6,0xfc6,0x2a,0x2a,0x447,0x447, +0x447,0x447,0x447,0x447,0x447,0x447,0x447,0x447,0x2a,0x2a,0x2a,0x2a,0x2a,0x2a,0x2a,0x1a19, +0xfc9,0xfc9,0xfc9,0xfc9,0xfc9,0xfc9,0xfc9,0xfc9,0x17e5,0x1512,0x456,0x456,0x1950,0x45c,0x45c,0x45c, +0x45c,0x45c,0x45c,0x45c,0x45c,0x2d,0x45c,0x45c,0x45c,0x2d,0x45c,0x45c,0x45c,0x45c,0x45c,0x45c, +0x45c,0x45c,0x45c,0x45c,0x45c,0x45c,0x45c,0x45c,0x45c,0x2d,0x45c,0x45c,0x45c,0x45c,0x45c,0x45c, +0x45c,0x45c,0x45c,0x45c,0x2d,0x45c,0x45c,0x45c,0x45c,0x45c,0x2d,0x2d,0xcff,0xd02,0x456,0x450, +0x459,0x456,0x450,0x456,0x456,0x2d,0x450,0x459,0x459,0x2d,0x459,0x459,0x450,0x453,0x2d,0x2d, +0x2d,0x2d,0x2d,0x2d,0x2d,0x450,0x450,0x2d,0x2d,0x2d,0x2d,0x2d,0x2d,0x2d,0x45c,0x2d, +0x45c,0x45c,0xf0f,0xf0f,0x2d,0x2d,0x975,0x975,0x975,0x975,0x975,0x975,0x975,0x975,0x975,0x975, +0x2d,0xf12,0xf12,0x2d,0x2d,0x2d,0x2d,0x2d,0x2d,0x2d,0x2d,0x2d,0x2d,0x2d,0x2d,0x2d, +0x18ab,0x1515,0x468,0x468,0x1ab8,0x46e,0x46e,0x46e,0x46e,0x46e,0x46e,0x46e,0x46e,0x30,0x46e,0x46e, +0x46e,0x30,0x46e,0x46e,0x46e,0x46e,0x46e,0x46e,0x46e,0x46e,0x46e,0x46e,0x46e,0x46e,0x46e,0x46e, +0x468,0x45f,0x45f,0x45f,0xfcf,0x30,0x468,0x468,0x468,0x30,0x46b,0x46b,0x46b,0x462,0x134a,0x17e8, +0x30,0x30,0x30,0x30,0x17eb,0x17eb,0x17eb,0x45f,0x17e8,0x17e8,0x17e8,0x17e8,0x17e8,0x17e8,0x17e8,0x172b, +0x46e,0x46e,0xfcf,0xfcf,0x30,0x30,0x465,0x465,0x465,0x465,0x465,0x465,0x465,0x465,0x465,0x465, +0xfd2,0xfd2,0xfd2,0xfd2,0xfd2,0xfd2,0x17e8,0x17e8,0x17e8,0xfd5,0xfd8,0xfd8,0xfd8,0xfd8,0xfd8,0xfd8, +0x33,0x1abb,0xa3e,0xa3e,0x33,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44, +0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0x33,0x33,0x33,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44, +0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0x33,0xa44, +0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0xa44,0x33,0xa44,0x33,0x33,0xa44,0xa44,0xa44,0xa44, +0xa44,0xa44,0xa44,0x33,0x33,0x33,0xa38,0x33,0x33,0x33,0x33,0xa35,0xa3e,0xa3e,0xa35,0xa35, +0xa35,0x33,0xa35,0x33,0xa3e,0xa3e,0xa41,0xa3e,0xa41,0xa41,0xa41,0xa35,0x33,0x33,0x33,0x33, +0x33,0x33,0x1518,0x1518,0x1518,0x1518,0x1518,0x1518,0x1518,0x1518,0x1518,0x1518,0x33,0x33,0xa3e,0xa3e, +0xa3b,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x36,0x489,0x489,0x489, +0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489, +0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x489,0x474,0x489,0x486, +0x474,0x474,0x474,0x474,0x474,0x474,0x47a,0x36,0x36,0x36,0x36,0x471,0x48f,0x48f,0x48f,0x48f, +0x48f,0x489,0x48c,0x477,0x477,0x477,0x477,0x477,0x477,0x474,0x477,0x47d,0x483,0x483,0x483,0x483, +0x483,0x483,0x483,0x483,0x483,0x483,0x480,0x480,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36, +0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36, +0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x36,0x39,0x49e,0x49e,0x39,0x49e,0x39,0x1a1f,0x49e, +0x49e,0x1a1f,0x49e,0x39,0x1a1f,0x49e,0x1a1f,0x1a1f,0x1a1f,0x1a1f,0x1a1f,0x1a1f,0x49e,0x49e,0x49e,0x49e, +0x1a1f,0x49e,0x49e,0x49e,0x49e,0x49e,0x49e,0x49e,0x1a1f,0x49e,0x49e,0x49e,0x39,0x49e,0x39,0x49e, +0x1a1f,0x1a1f,0x49e,0x49e,0x1a1f,0x49e,0x49e,0x49e,0x49e,0x492,0x49e,0x49b,0x492,0x492,0x492,0x492, +0x492,0x492,0x1a1c,0x492,0x492,0x49e,0x39,0x39,0x4a7,0x4a7,0x4a7,0x4a7,0x4a7,0x39,0x4a4,0x39, +0x495,0x495,0x495,0x495,0x495,0x492,0x39,0x39,0x498,0x498,0x498,0x498,0x498,0x498,0x498,0x498, +0x498,0x498,0x39,0x39,0x4a1,0x4a1,0x1425,0x1425,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39, +0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39, +0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x39,0x9b7,0x9b7,0x9b7,0x9ba,0x9b7,0x9b7,0x9b7,0x9b7, +0x3c,0x9b7,0x9b7,0x9b7,0x9b7,0x9ba,0x9b7,0x9b7,0x9b7,0x9b7,0x9ba,0x9b7,0x9b7,0x9b7,0x9b7,0x9ba, +0x9b7,0x9b7,0x9b7,0x9b7,0x9ba,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7,0x9b7, +0x9b7,0x9ba,0xa53,0xfe4,0xfe4,0x3c,0x3c,0x3c,0x3c,0x984,0x984,0x987,0x984,0x987,0x987,0x990, +0x987,0x990,0x984,0x984,0x984,0x984,0x984,0x9b1,0x984,0x987,0x98a,0x98a,0x98d,0x996,0x98a,0x98a, +0x9b7,0x9b7,0x9b7,0x9b7,0x1353,0x134d,0x134d,0x134d,0x984,0x984,0x984,0x987,0x984,0x984,0xa47,0x984, +0x3c,0x984,0x984,0x984,0x984,0x987,0x984,0x984,0x984,0x984,0x987,0x984,0x984,0x984,0x984,0x987, +0x984,0x984,0x984,0x984,0x987,0x984,0xa47,0xa47,0xa47,0x984,0x984,0x984,0x984,0x984,0x984,0x984, +0xa47,0x987,0xa47,0xa47,0xa47,0x3c,0xa50,0xa50,0xa4d,0xa4d,0xa4d,0xa4d,0xa4d,0xa4d,0xa4a,0xa4d, +0xa4d,0xa4d,0xa4d,0xa4d,0xa4d,0x3c,0xfdb,0xa4d,0xde3,0xde3,0xfde,0xfe1,0xfdb,0x1161,0x1161,0x1161, +0x1161,0x1350,0x1350,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c, +0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c,0x3c, +0x3c,0x3c,0x3c,0x3c,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x3f,0x142b,0x3f,0x3f,0x3f,0x3f, +0x3f,0x142b,0x3f,0x3f,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa, +0x4aa,0x4aa,0x4aa,0x4aa,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xdf2,0xa7d,0x42,0xa7d,0xa7d, +0xa7d,0xa7d,0x42,0x42,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0x42,0xa7d,0x42,0xa7d,0xa7d, +0xa7d,0xa7d,0x42,0x42,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xdf2,0xa7d,0x42,0xa7d,0xa7d, +0xa7d,0xa7d,0x42,0x42,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d, +0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xdf2,0xa7d,0x42,0xa7d,0xa7d,0xa7d,0xa7d,0x42,0x42, +0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0x42,0xa7d,0x42,0xa7d,0xa7d,0xa7d,0xa7d,0x42,0x42, +0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xdf2,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0x42, +0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xdf2, +0xa7d,0x42,0xa7d,0xa7d,0xa7d,0xa7d,0x42,0x42,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xdf2, +0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d, +0xa7d,0xa7d,0xa7d,0x42,0x42,0x1356,0x1356,0xdec,0xdef,0xa77,0xa80,0xa74,0xa74,0xa74,0xa74,0xa80, +0xa80,0xa7a,0xa7a,0xa7a,0xa7a,0xa7a,0xa7a,0xa7a,0xa7a,0xa7a,0xa71,0xa71,0xa71,0xa71,0xa71,0xa71, +0xa71,0xa71,0xa71,0xa71,0xa71,0x42,0x42,0x42,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83, +0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0x1731,0x45,0x45, +0x172e,0x172e,0x172e,0x172e,0x172e,0x172e,0x45,0x45,0xa95,0xa98,0xa98,0xa98,0xa98,0xa98,0xa98,0xa98, +0xa98,0xa98,0xa98,0xa98,0xa98,0xa98,0xa98,0xa98,0xa98,0xa98,0xa98,0xa98,0xa98,0xa98,0xa98,0xa98, +0xa98,0xa98,0xa98,0xa92,0xa8f,0x48,0x48,0x48,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e, +0xa9e,0xa9e,0xa9e,0xa9b,0xa9b,0xa9b,0xa9e,0xa9e,0xa9e,0x151b,0x151b,0x151b,0x151b,0x151b,0x151b,0x151b, +0x151b,0x4b,0x4b,0x4b,0x4b,0x4b,0x4b,0x4b,0xabf,0xabf,0xabf,0xabf,0xabf,0xabf,0xaa1,0xabf, +0xabf,0xaa4,0xaa4,0xaa4,0xaa4,0xaa4,0xaa4,0xaa4,0xaa4,0xaa4,0xaa7,0xaa4,0xab6,0xab6,0xab9,0xac2, +0xab0,0xaad,0xab6,0xab3,0xac2,0xd05,0x4e,0x4e,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc,0xabc, +0xabc,0xabc,0x4e,0x4e,0x4e,0x4e,0x4e,0x4e,0xd08,0xd08,0xd08,0xd08,0xd08,0xd08,0xd08,0xd08, +0xd08,0xd08,0x4e,0x4e,0x4e,0x4e,0x4e,0x4e,0xad1,0xad1,0xb52,0xb55,0xad7,0xb4f,0xad4,0xad1, +0xada,0xae9,0xadd,0xaec,0xaec,0xaec,0xac8,0x51,0xae0,0xae0,0xae0,0xae0,0xae0,0xae0,0xae0,0xae0, +0xae0,0xae0,0x51,0x51,0x51,0x51,0x51,0x51,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3, +0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3, +0x1953,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3, +0xae3,0xacb,0x1002,0x51,0x51,0x51,0x51,0x51,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8, +0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb, +0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x54,0x54, +0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x54,0x54,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb, +0x54,0x4ce,0x54,0x4ce,0x54,0x4ce,0x54,0x4ce,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb, +0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb, +0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x54,0x54,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb, +0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x54,0x4cb,0x4cb, +0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4c5,0x4cb,0x4c5,0x4c5,0x4c2,0x4cb,0x4cb,0x4cb,0x54,0x4cb,0x4cb, +0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4c2,0x4c2,0x4c2,0x4cb,0x4cb,0x4cb,0x4cb,0x54,0x54,0x4cb,0x4cb, +0x4ce,0x4ce,0x4ce,0x4ce,0x54,0x4c2,0x4c2,0x4c2,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb, +0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4c2,0x4c2,0x4c2,0x54,0x54,0x4cb,0x4cb,0x4cb,0x54,0x4cb,0x4cb, +0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4c8,0x4c5,0x54,0xbca,0xbcd,0xbcd,0xbcd,0x100b,0x57,0x14f7,0x14f7, +0x14f7,0x14f7,0x4d7,0x4d7,0x4d7,0x4d7,0x4d7,0x4d7,0x522,0xbdf,0x5a,0x5a,0x6e1,0x522,0x522,0x522, +0x522,0x522,0x528,0x53a,0x528,0x534,0x52e,0x6e4,0x51f,0x6de,0x6de,0x6de,0x6de,0x51f,0x51f,0x51f, +0x51f,0x51f,0x525,0x537,0x525,0x531,0x52b,0x5a,0xdfb,0xdfb,0xdfb,0xdfb,0xdfb,0x1359,0x1359,0x1359, +0x1359,0x1359,0x1359,0x1359,0x1359,0x5a,0x5a,0x5a,0x5d,0x5d,0x5d,0x5d,0x5d,0x5d,0x5d,0x5d, +0x5d,0x5d,0x5d,0x5d,0x5d,0x5d,0x5d,0x5d,0x549,0x549,0x549,0x549,0x549,0x549,0x549,0x549, +0x549,0x549,0x549,0x549,0x549,0x546,0x546,0x546,0x546,0x549,0xafb,0xafe,0xbe5,0xbeb,0xbeb,0xbe8, +0xbe8,0xbe8,0xbe8,0xe01,0xf15,0xf15,0xf15,0xf15,0x114c,0x60,0x60,0x60,0x60,0x60,0x60,0x60, +0x60,0x60,0x60,0x60,0x60,0x60,0x60,0x60,0x579,0x579,0x579,0xb07,0xf1e,0x1011,0x1011,0x1011, +0x1011,0x12ab,0x1737,0x1737,0x63,0x63,0x63,0x63,0x70b,0x70b,0x70b,0x70b,0x70e,0x70e,0x70e,0x70e, +0x70e,0x70e,0x585,0x585,0x582,0x582,0x582,0x582,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0xb13,0xb13,0x66, +0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66, +0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x66,0x5af,0x5af,0x5af,0x5af,0x5af,0x5af,0x5af,0x5af, +0x5af,0x5af,0x5af,0x69,0x69,0x69,0x69,0x69,0x69,0x69,0x69,0x69,0x69,0x69,0x69,0x69, +0x69,0x69,0x69,0x69,0x69,0x69,0x69,0x69,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e, +0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e, +0xb2e,0xb2e,0x6c,0xb2e,0xb2e,0xb2e,0xb2e,0xb31,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e, +0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb31,0x6c,0x6c,0x6c,0x6c, +0x6c,0x6c,0x6c,0x6c,0x6c,0x6c,0x6c,0x6c,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34, +0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0x6f,0x6f, +0x6f,0x6f,0x6f,0x6f,0x6f,0x6f,0x6f,0x6f,0x75,0x843,0x83d,0x843,0x83d,0x843,0x83d,0x843, +0x83d,0x843,0x83d,0x83d,0x840,0x83d,0x840,0x83d,0x840,0x83d,0x840,0x83d,0x840,0x83d,0x840,0x83d, +0x840,0x83d,0x840,0x83d,0x840,0x83d,0x840,0x83d,0x83d,0x83d,0x83d,0x843,0x83d,0x843,0x83d,0x843, +0x83d,0x83d,0x83d,0x83d,0x83d,0x83d,0x843,0x83d,0x83d,0x83d,0x83d,0x83d,0x840,0xc93,0xc93,0x75, +0x75,0x954,0x954,0x91e,0x91e,0x846,0x849,0xc90,0x78,0x78,0x78,0x78,0x78,0x85b,0x85b,0x85b, +0x85b,0x85b,0x85b,0x85b,0x85b,0x85b,0x85b,0x85b,0x85b,0x85b,0x85b,0x85b,0x85b,0x85b,0x85b,0x85b, +0x85b,0x85b,0x85b,0x85b,0x85b,0x85b,0x85b,0x85b,0x85b,0x113a,0x191a,0x1a01,0x7b,0x85e,0x85e,0x85e, +0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x7b, +0x927,0x927,0x92a,0x92a,0x92a,0x92a,0x92a,0x92a,0x92a,0x92a,0x92a,0x92a,0x92a,0x92a,0x92a,0x92a, +0x867,0x867,0x867,0x867,0x867,0x867,0x867,0x867,0x867,0x867,0x867,0x867,0x867,0x867,0x867,0x867, +0x867,0x867,0x867,0x867,0x867,0x867,0x867,0x867,0x867,0x867,0x867,0x867,0x867,0xd98,0xd98,0x7e, +0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0x81,0x81,0x81, +0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c, +0xb4c,0xc9c,0xb4c,0xb4c,0xb4c,0xc9c,0xb4c,0x84,0x84,0x84,0x84,0x84,0x84,0x84,0x84,0x84, +0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df, +0x9db,0x9db,0x9db,0x9db,0x87,0x87,0x87,0x87,0x87,0x87,0x87,0x87,0x87,0x87,0x87,0x87, +0x1254,0x1254,0x1254,0x1254,0x1254,0x1254,0x1254,0x1254,0x1254,0x1254,0x1254,0x1254,0x1254,0x1254,0x1254,0x1254, +0x60c,0x60c,0x60c,0x60c,0x60c,0x60c,0x60c,0x8a,0x8a,0x8a,0x8a,0x8a,0x8a,0x8a,0x8a,0x8a, +0x8a,0x8a,0x8a,0x5fa,0x5fa,0x5fa,0x5fa,0x5fa,0x8a,0x8a,0x8a,0x8a,0x8a,0xb1f,0x5fd,0x603, +0x609,0x609,0x609,0x609,0x609,0x609,0x609,0x609,0x609,0x600,0x603,0x603,0x603,0x603,0x603,0x603, +0x603,0x603,0x603,0x603,0x603,0x603,0x603,0x8a,0x603,0x603,0x603,0x603,0x603,0x8a,0x603,0x8a, +0x603,0x603,0x8a,0x603,0x603,0x8a,0x603,0x603,0x603,0x603,0x603,0x603,0x603,0x603,0x603,0x606, +0x61e,0x618,0x61e,0x618,0x61b,0x621,0x61e,0x618,0x61b,0x621,0x61e,0x618,0x61b,0x621,0x61e,0x618, +0x136b,0x136b,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d, +0x8d,0x8d,0x8d,0x61e,0x618,0x61b,0x621,0x61e,0x618,0x61e,0x618,0x61e,0x618,0x61e,0x61e,0x618, +0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d, +0x61b,0x618,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x618,0x61b,0x618,0x618,0x61b,0x61b,0x618,0x618, +0x618,0x618,0x618,0x61b,0x618,0x618,0x61b,0x618,0x61b,0x61b,0x61b,0x618,0x61b,0x61b,0x61b,0x61b, +0x8d,0x8d,0x61b,0x61b,0x61b,0x61b,0x618,0x618,0x61b,0x618,0x618,0x618,0x618,0x61b,0x618,0x618, +0x618,0x618,0x618,0x61b,0x61b,0x61b,0x618,0x618,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d,0x8d, +0xb6a,0xb6a,0xb6a,0xb6a,0xb6a,0xb6a,0xb6a,0xb6a,0xb6a,0xb6a,0xb6a,0xb6a,0xb6a,0xb6a,0xb6a,0xb6a, +0x61e,0x61e,0x978,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x615,0x615,0xc24,0xdb3,0x8d,0x8d, +0x87f,0x891,0x88e,0x891,0x88e,0xcb1,0xcb1,0xda4,0xda1,0x882,0x882,0x882,0x882,0x894,0x894,0x894, +0x8ac,0x8af,0x8be,0x90,0x8b2,0x8b5,0x8c1,0x8c1,0x8a9,0x8a0,0x89a,0x8a0,0x89a,0x8a0,0x89a,0x89d, +0x89d,0x8b8,0x8b8,0x8bb,0x8b8,0x8b8,0x8b8,0x90,0x8b8,0x8a6,0x8a3,0x89d,0x90,0x90,0x90,0x90, +0x62a,0x636,0x62a,0xc27,0x62a,0x93,0x62a,0x636,0x62a,0x636,0x62a,0x636,0x62a,0x636,0x62a,0x636, +0x636,0x633,0x62d,0x630,0x636,0x633,0x62d,0x630,0x636,0x633,0x62d,0x630,0x636,0x633,0x62d,0x633, +0x62d,0x633,0x62d,0x630,0x636,0x633,0x62d,0x633,0x62d,0x633,0x62d,0x633,0x62d,0x93,0x93,0x627, +0x77d,0x780,0x795,0x798,0x777,0x780,0x780,0x99,0x75f,0x762,0x762,0x762,0x762,0x75f,0x75f,0x99, +0x96,0x96,0x96,0x96,0x96,0x96,0x96,0x96,0x96,0xb22,0xb22,0xb22,0x9de,0x759,0x639,0x639, +0x99,0x7a7,0x786,0x777,0x780,0x77d,0x777,0x789,0x77a,0x774,0x777,0x795,0x78c,0x783,0x7a4,0x777, +0x7a1,0x7a1,0x7a1,0x7a1,0x7a1,0x7a1,0x7a1,0x7a1,0x7a1,0x7a1,0x792,0x78f,0x795,0x795,0x795,0x7a7, +0x768,0x765,0x765,0x765,0x765,0x765,0x765,0x765,0x765,0x765,0x765,0x765,0x765,0x765,0x765,0x765, +0x765,0x765,0x765,0x765,0x765,0x765,0x765,0x765,0x765,0x765,0x765,0x765,0x765,0x765,0x765,0x99, +0x99,0x99,0x765,0x765,0x765,0x765,0x765,0x765,0x99,0x99,0x765,0x765,0x765,0x765,0x765,0x765, +0x99,0x99,0x765,0x765,0x765,0x765,0x765,0x765,0x99,0x99,0x765,0x765,0x765,0x99,0x99,0x99, +0xb6d,0xb6d,0xb6d,0xb6d,0x9c,0x9c,0x9c,0x9c,0x9c,0x9c,0x9c,0x9c,0x9c,0x18b7,0x18b7,0x18b7, +0xb73,0xb73,0xb73,0xb73,0xb73,0xb73,0xb73,0xb73,0xb73,0xb73,0xb73,0xb73,0xb73,0xb73,0xb73,0xb73, +0xb73,0xb73,0xb73,0x9f,0x9f,0x9f,0x9f,0x9f,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683, +0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c, +0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xa2,0xa2, +0xa2,0xa2,0xa2,0xa2,0xa2,0xa2,0xa2,0xa2,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xa5, +0xa5,0x101d,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88, +0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0x173d,0x173d,0x173d,0x173d,0x173d,0x173d,0x173d,0x173d, +0x173d,0xa5,0xa5,0xa5,0xa5,0xa5,0xa5,0xa5,0xa5,0xa5,0xa5,0xa5,0xa5,0xa5,0xa5,0xa5, +0xa5,0xa5,0xa5,0xa5,0xa5,0xa5,0xa5,0xa5,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0, +0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xa8,0xb9d,0xb9d, +0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xba0,0xba0,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d, +0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d, +0xb9d,0xb9d,0xb9d,0xb9d,0xba0,0xa8,0xba0,0xba0,0xa8,0xa8,0xba0,0xa8,0xa8,0xba0,0xba0,0xa8, +0xa8,0xba0,0xba0,0xba0,0xba0,0xa8,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xb9d,0xb9d, +0xb9d,0xb9d,0xa8,0xb9d,0xa8,0xb9d,0xb9d,0xb9d,0xb9d,0xd29,0xb9d,0xb9d,0xa8,0xb9d,0xb9d,0xb9d, +0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0, +0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xb9d,0xb9d,0xb9d,0xb9d,0xba0,0xba0,0xa8,0xba0, +0xba0,0xba0,0xba0,0xa8,0xa8,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xa8,0xba0,0xba0, +0xba0,0xba0,0xba0,0xba0,0xba0,0xa8,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d, +0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d, +0xba0,0xba0,0xa8,0xba0,0xba0,0xba0,0xba0,0xa8,0xba0,0xba0,0xba0,0xba0,0xba0,0xa8,0xba0,0xa8, +0xa8,0xa8,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xa8,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d, +0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xe16,0xe16,0xa8,0xa8,0xba0,0xba0,0xba0,0xba0, +0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0, +0xba0,0xba0,0xba0,0xba0,0xb9d,0xb9d,0xb9d,0xb97,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xf2d,0xf2a, +0xa8,0xa8,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a, +0xb9a,0xb9a,0xb9a,0xb9a,0xab,0xba6,0xab,0xab,0xab,0xab,0xab,0xab,0xab,0xab,0xab,0xab, +0xab,0xab,0xab,0xab,0xab,0xab,0xab,0xab,0xab,0xab,0xab,0xab,0xab,0xab,0xab,0xab, +0xab,0xab,0xab,0xab,0xc36,0xc36,0xc36,0xc36,0xc36,0xc36,0xc36,0xc36,0xc36,0xc36,0xc36,0xc36, +0xc36,0xae,0xc36,0xc36,0xc36,0xc36,0xc30,0xc30,0xc33,0xae,0xae,0xae,0xae,0xae,0xae,0xae, +0xae,0xae,0xae,0xae,0xc3f,0xc3f,0xc3f,0xc3f,0xc3f,0xc3f,0xc3f,0xc3f,0xc3f,0xc3f,0xc3f,0xc3f, +0xc3f,0xc3f,0xc3f,0xc3f,0xc3f,0xc3f,0xc39,0xc39,0xc3c,0xca5,0xca5,0xb1,0xb1,0xb1,0xb1,0xb1, +0xb1,0xb1,0xb1,0xb1,0xc45,0xc45,0xc45,0xc45,0xc45,0xc45,0xc45,0xc45,0xc45,0xc45,0xc45,0xc45, +0xc45,0xc45,0xc45,0xc45,0xc45,0xc45,0xc42,0xc42,0xb4,0xb4,0xb4,0xb4,0xb4,0xb4,0xb4,0xb4, +0xb4,0xb4,0xb4,0xb4,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b,0xc4b, +0xc4b,0xb7,0xc4b,0xc4b,0xc4b,0xb7,0xc48,0xc48,0xb7,0xb7,0xb7,0xb7,0xb7,0xb7,0xb7,0xb7, +0xb7,0xb7,0xb7,0xb7,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b, +0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b,0xd3b, +0xd3b,0x1539,0x1539,0xba,0xd2c,0xd2c,0xd2c,0xd38,0xd38,0xd38,0xd38,0xd2c,0xd2c,0xd38,0xd38,0xd38, +0xba,0xba,0xba,0xba,0xd38,0xd38,0xd2c,0xd38,0xd38,0xd38,0xd38,0xd38,0xd38,0xd2f,0xd2f,0xd2f, +0xba,0xba,0xba,0xba,0xd32,0xba,0xba,0xba,0xd3e,0xd3e,0xd35,0xd35,0xd35,0xd35,0xd35,0xd35, +0xd35,0xd35,0xd35,0xd35,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xd41, +0xd41,0xd41,0xd41,0xd41,0xd41,0xd41,0xbd,0xbd,0xd41,0xd41,0xd41,0xd41,0xd41,0xbd,0xbd,0xbd, +0xbd,0xbd,0xbd,0xbd,0xbd,0xbd,0xbd,0xbd,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c, +0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0xc0,0xc0,0x153c,0x153c, +0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c, +0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0xc0,0x1abe,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c, +0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xc3,0xd68,0xd68,0xd68, +0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68, +0xd68,0xd68,0xd68,0xc3,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68, +0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xc3,0xd68,0xd68,0xc3,0xd68,0xd68,0xd68,0xd68,0xd68, +0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xc3,0xc3,0xd68,0xd68,0xd68,0xd68, +0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xd68,0xc3,0xc3,0xc3,0xc3,0xc3,0xc3, +0xc3,0xc3,0xc3,0xc3,0xc3,0xc3,0xc3,0xc3,0xc3,0xc3,0xc3,0xc3,0xc3,0xc3,0xc3,0xc3, +0xc3,0xc3,0xc3,0xc3,0xc3,0xc3,0xc3,0xc3,0xc3,0xc3,0xc3,0xc3,0xd6b,0xd6b,0xd6b,0xd6b, +0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b, +0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xc6,0xc6,0xc6,0xc6,0xc6,0xdad,0xdad,0xdad,0xc9, +0xc9,0xc9,0xc9,0xda7,0xda7,0xda7,0xda7,0xda7,0xda7,0xda7,0xda7,0xda7,0xda7,0xda7,0xda7,0xda7, +0xda7,0xda7,0xda7,0xda7,0xda7,0xda7,0xda7,0xda7,0xda7,0xda7,0xda7,0xda7,0xc9,0xc9,0xc9,0xdaa, +0xdaa,0xdaa,0xdaa,0xdaa,0xdaa,0xdaa,0xdaa,0xdaa,0xd71,0xd71,0xd71,0xd71,0xd71,0xd71,0xd71,0xd71, +0xd71,0xd71,0xd71,0xd71,0xd71,0xd71,0xd71,0xd71,0xd71,0xd71,0xd71,0xd71,0xd71,0xd71,0xd71,0xd71, +0xd71,0xd71,0xd71,0xd71,0xd71,0xd71,0xcc,0xd6e,0xd7a,0xd7a,0xd7a,0xd7a,0xd7a,0xd7a,0xd7a,0xd7a, +0xd7a,0xd7a,0xd7a,0xd7a,0xd7a,0xd7a,0xd7a,0xd7a,0xd7a,0xd7a,0xd7a,0xd7a,0xd7a,0xd7a,0xd7a,0xd7a, +0xd7a,0xd7a,0xd7a,0xd7a,0xd7a,0xd7a,0xcf,0xcf,0xd77,0xd77,0xd77,0xd77,0xd77,0xd77,0xd77,0xd77, +0xd77,0xd77,0xcf,0xcf,0xcf,0xcf,0xcf,0xcf,0x187b,0x187b,0x187b,0x187b,0x187b,0x187b,0x187b,0x187b, +0x187b,0x187b,0x187b,0x187b,0x187b,0x187b,0x187b,0x187b,0xd7d,0xd7d,0xd7d,0xd7d,0xd7d,0xd7d,0xd2,0xd2, +0xd7d,0xd2,0xd7d,0xd7d,0xd7d,0xd7d,0xd7d,0xd7d,0xd7d,0xd7d,0xd7d,0xd7d,0xd7d,0xd7d,0xd7d,0xd7d, +0xd7d,0xd7d,0xd7d,0xd7d,0xd7d,0xd7d,0xd7d,0xd7d,0xd7d,0xd7d,0xd2,0xd7d,0xd7d,0xd2,0xd2,0xd2, +0xd7d,0xd2,0xd2,0xd7d,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80, +0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd5,0xd5,0xd5,0xd5,0xd5, +0xd5,0xd5,0xd5,0xd5,0xe31,0xe31,0xe31,0xe31,0xe31,0xe31,0xe31,0xe31,0xe31,0xe31,0xe31,0x153f, +0x153f,0x17f1,0x17f1,0xdb,0x1119,0x1119,0x1119,0x1119,0x1119,0x1119,0x1119,0x1119,0x1119,0x1119,0x1119,0x1119, +0x1acd,0x132,0x132,0x132,0xe43,0xe43,0xe43,0xe43,0xe43,0xe43,0xe43,0xe43,0xe43,0xe43,0xe43,0xe43, +0xe43,0xe43,0xe43,0xe43,0xe43,0xe43,0xe43,0xe43,0xe43,0xe43,0xe43,0xe3a,0xe3a,0xe40,0xe40,0xe3a, +0xde,0xde,0xe3d,0xe3d,0x1149,0x1149,0x1149,0x1149,0xe1,0xe1,0xe1,0xe1,0xe1,0xe1,0xe1,0xe1, +0xe1,0xe1,0xe1,0xe1,0xca2,0xca2,0xca2,0xca2,0xca2,0xca2,0xca2,0xca2,0xca2,0xca2,0xca2,0xca2, +0xca2,0xca2,0xca2,0xca2,0x1038,0x1038,0x1038,0x1038,0x1038,0x1038,0x1038,0x1542,0x1542,0x1542,0x1542,0x1542, +0x1542,0x1542,0x1542,0x1542,0x1542,0x1542,0x1542,0x1542,0x1542,0x1545,0x18bd,0x18bd,0x193e,0x18bd,0xe4,0x17f4, +0x1377,0x118b,0xf3c,0xf3c,0xe55,0xe52,0xe55,0xe52,0xe52,0xe49,0xe49,0xe49,0xe49,0xe49,0xe49,0x1194, +0x1191,0x1194,0x1191,0x118e,0x118e,0x118e,0x1434,0x1431,0xe7,0xe7,0xe7,0xe7,0xe7,0xe4f,0xe4c,0xe4c, +0xe4c,0xe49,0xe4f,0xe4c,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58, +0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xea,0xea,0xea,0xea,0xea, +0xea,0xea,0xea,0xea,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xea,0xe58,0xe58,0xe58,0xe58, +0xe58,0xe58,0xe58,0xea,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xe58,0xea,0xe58,0xe58,0xe58,0xe58, +0xe58,0xe58,0xe58,0xea,0xe5e,0xe5e,0xe5e,0xe5e,0xe5e,0xe5e,0xe5e,0xe5e,0xe5e,0xe5e,0xe5e,0xe5e, +0xe5e,0xe5e,0xe5e,0xe5e,0xe5b,0xe5b,0xe5b,0xe5b,0xe5b,0xe5b,0xe5b,0xe5b,0xe5b,0xe5b,0xed,0xed, +0xed,0xed,0xed,0xed,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xf0,0x1437,0xf0,0xf0,0xf0,0xf0, +0xf0,0x1437,0xf0,0xf0,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8, +0xeb8,0xeb8,0xeb8,0xeb8,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67, +0xe67,0xe67,0xe67,0xf3,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64, +0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64,0xe64, +0xe64,0xe64,0xe64,0xf3,0xe79,0xe6d,0xe6d,0xe6d,0xf6,0xe6d,0xe6d,0xf6,0xf6,0xf6,0xf6,0xf6, +0xe6d,0xe6d,0xe6d,0xe6d,0xe79,0xe79,0xe79,0xe79,0xf6,0xe79,0xe79,0xe79,0xf6,0xe79,0xe79,0xe79, +0xe79,0xe79,0xe79,0xe79,0xe79,0xe79,0xe79,0xe79,0xe79,0xe79,0xe79,0xe79,0xe79,0xe79,0xe79,0xe79, +0xe79,0xe79,0xe79,0xe79,0x195c,0x195c,0xf6,0xf6,0xe6a,0xe6a,0xe6a,0xf6,0xf6,0xf6,0xf6,0xe70, +0xe73,0xe73,0xe73,0xe73,0xe73,0xe73,0xe73,0xe73,0x1959,0xf6,0xf6,0xf6,0xf6,0xf6,0xf6,0xf6, +0xe76,0xe76,0xe76,0xe76,0xe76,0xe76,0xe7c,0xe7c,0xe73,0xf6,0xf6,0xf6,0xf6,0xf6,0xf6,0xf6, +0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,0x119a,0x119a,0xf9,0xf9,0xf9,0xf9, +0xe88,0xe88,0xe88,0xe88,0xe88,0xe8b,0xe8b,0xe8b,0xe88,0xe88,0xe8b,0xe88,0xe88,0xe88,0xe88,0xe88, +0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,0xf9,0xf9,0xf9,0xf9,0xf9,0xf9,0xe85,0xe85,0xe85,0xe85, +0xe85,0xe85,0xe85,0xe85,0xe85,0xe85,0x1197,0xf9,0xf9,0xf9,0xe82,0xe82,0xe91,0xe91,0xe91,0xe91, +0xfc,0xfc,0xfc,0xfc,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe8e,0xe91,0xe91,0xe91, +0xe91,0xe91,0xfc,0xfc,0xfc,0xfc,0xfc,0xfc,0xfc,0xfc,0xfc,0xfc,0x154e,0x1554,0x1551,0x189c, +0x17f7,0x18c0,0x18c0,0x18c0,0x18c0,0x18c0,0x1962,0x195f,0x1965,0x195f,0x1965,0x1a25,0x1ac1,0x1ac1,0x1ac1,0xff, +0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, +0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, +0xeb5,0xeb5,0xeb5,0xeb2,0xeb2,0xea9,0xea9,0xeb2,0xeaf,0xeaf,0xeaf,0xeaf,0x1ac4,0x102,0x102,0x102, +0x1314,0x1314,0x1314,0x1317,0x1317,0x1317,0x130e,0x130e,0x1311,0x130e,0x156,0x156,0x156,0x156,0x156,0x156, +0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0x1443,0x1443,0x105,0x105,0x105,0x105,0x105,0x105,0x105,0xebb, +0x137d,0x105,0x105,0x105,0x105,0x105,0x105,0x105,0x105,0x105,0x105,0x105,0x105,0x105,0x105,0x137a, +0xc72,0xc72,0xc72,0xc72,0xc72,0xc72,0xc72,0xc72,0xc72,0xc72,0xc72,0xc72,0xc72,0xc72,0xc72,0xc75, +0xee8,0xed9,0xed3,0xee5,0xee2,0xedc,0xedc,0xeeb,0xed6,0xedf,0x108,0x108,0x108,0x108,0x108,0x108, +0xf6f,0xf6f,0xf5a,0xf6f,0xf72,0xf75,0xf75,0xf75,0xf75,0xf75,0xf75,0xf75,0x10e,0x10e,0x10e,0x10e, +0xf69,0xf69,0xf69,0xf69,0xf69,0xf69,0xf69,0xf69,0xf69,0xf69,0xf7b,0xf7b,0xf60,0xf66,0xf7b,0xf7b, +0xf63,0xf60,0xf60,0xf60,0xf60,0xf60,0xf60,0xf60,0xf60,0xf60,0xf60,0xf5d,0xf5d,0xf5d,0xf5d,0xf5d, +0xf5d,0xf5d,0xf5d,0xf5d,0xf60,0xf60,0xf60,0xf60,0xf60,0xf60,0xf60,0xf60,0xf60,0x10e,0x10e,0x10e, +0x111,0x111,0x1a2b,0x1a28,0x1a2b,0x1a2b,0x1a2b,0x1aca,0x1ac7,0x1aca,0x1ac7,0x111,0x111,0x111,0x111,0x111, +0x111,0x111,0x111,0x111,0x111,0x111,0x111,0x111,0x111,0x111,0x111,0x111,0x111,0x111,0x111,0x111, +0x111,0x1aca,0x1ac7,0x155d,0x144c,0x144c,0x1380,0x1074,0x1074,0x1074,0x1074,0x1074,0xf8a,0xf8a,0xf8a,0xf8a, +0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a, +0xf87,0xf87,0xf8d,0xf8d,0x114,0x114,0x114,0x114,0x114,0x114,0x114,0x114,0xf96,0xf96,0xf96,0xf96, +0xf96,0xf96,0xf96,0xf96,0xf96,0xf96,0xf96,0xf96,0xf96,0xf96,0xf96,0xf96,0xf96,0xf96,0xf96,0xf96, +0xf96,0xf96,0xf90,0xf90,0xf90,0xf90,0x11a3,0x11a3,0x117,0x117,0x117,0xf93,0x1563,0x1563,0x1563,0x1563, +0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563, +0x1563,0x1563,0x1563,0x1563,0x1563,0x174c,0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,0x11a, +0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,0x11a, +0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,0x11a,0xf9f,0xf9f,0xf9f,0x1569,0x1569,0x1569,0x1569,0x1569, +0x1569,0x1569,0x1569,0x1569,0x1569,0x1569,0x1569,0x11d,0xf9c,0xf9c,0xf9c,0xf9c,0x1566,0x11d,0x11d,0x11d, +0x11d,0x11d,0x11d,0x11d,0x11d,0x11d,0x11d,0x11d,0xfa2,0xfa2,0xfa2,0xfa2,0xfa2,0xfa2,0xfa2,0xfa2, +0xfa2,0xfa2,0xfa2,0xfa2,0xfa2,0xfa2,0xfa2,0xfa2,0xfa2,0xfa2,0x1974,0x1974,0x1974,0x1974,0x1974,0x1974, +0x1974,0x120,0x120,0x120,0x120,0x120,0x120,0x120,0x109b,0x109b,0x109b,0x109b,0x1098,0x1098,0x1098,0x1098, +0x1098,0x1098,0x1098,0x1098,0x1089,0x1089,0x1089,0x1089,0x1089,0x1089,0x1089,0x1089,0x1098,0x1098,0x108f,0x108c, +0x123,0x123,0x123,0x109e,0x109e,0x1092,0x1092,0x1092,0x1095,0x1095,0x1095,0x1095,0x1095,0x1095,0x1095,0x1095, +0x1095,0x1095,0x123,0x123,0x123,0x109b,0x109b,0x109b,0x10a1,0x10a1,0x10a1,0x10a1,0x10a1,0x10a1,0x10a1,0x10a1, +0x10a1,0x10a1,0x10a4,0x10a4,0x10a4,0x10a4,0x10a4,0x10a4,0x10b6,0x10b6,0x10b6,0x10b6,0x10b6,0x10b6,0x10b6,0x10b6, +0x10b6,0x10b6,0x10b9,0x10b9,0x126,0x126,0x126,0x126,0x126,0x126,0x126,0x126,0x126,0x126,0x126,0x126, +0x126,0x126,0x126,0x126,0x126,0x126,0x126,0x126,0x10e0,0x10e0,0x10e0,0x10e0,0x10da,0x17fd,0x129,0x129, +0x129,0x129,0x129,0x129,0x129,0x129,0x10e6,0x10e6,0x10dd,0x10dd,0x10dd,0x10dd,0x10dd,0x10dd,0x10dd,0x10dd, +0x10dd,0x10dd,0x129,0x129,0x129,0x129,0x129,0x129,0x1104,0x1104,0x1104,0x1104,0x1104,0x1104,0x1104,0x10f8, +0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x10f8,0x10fe,0x1101,0x12c,0x12c,0x12c,0x12c, +0x12c,0x12c,0x12c,0x12c,0x12c,0x12c,0x12c,0x10fb,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113, +0x1113,0x1107,0x1107,0x1107,0x1107,0x1107,0x1107,0x1110,0x1110,0x1107,0x1107,0x1110,0x1110,0x1107,0x1107,0x12f, +0x12f,0x12f,0x12f,0x12f,0x12f,0x12f,0x12f,0x12f,0x1113,0x1113,0x1113,0x1107,0x1113,0x1113,0x1113,0x1113, +0x1113,0x1113,0x1113,0x1113,0x1107,0x1110,0x12f,0x12f,0x110d,0x110d,0x110d,0x110d,0x110d,0x110d,0x110d,0x110d, +0x110d,0x110d,0x12f,0x12f,0x110a,0x1116,0x1116,0x1116,0x1575,0x132,0x132,0x132,0x132,0x132,0x132,0x132, +0x132,0x132,0x132,0x132,0x132,0x132,0x132,0x132,0x132,0x132,0x132,0x132,0x132,0x132,0x132,0x132, +0x132,0x132,0x132,0x132,0x132,0x132,0x132,0x132,0x111c,0x111c,0x111c,0x111c,0x111c,0x111c,0x111c,0x111c, +0x111c,0x111c,0x111c,0x111c,0x111c,0x111c,0x111c,0x111c,0x111c,0x111c,0x111c,0x111c,0x111c,0x111c,0x111c,0x111c, +0x111c,0x111c,0x111c,0x111c,0x111c,0x111f,0x135,0x135,0x1122,0x1122,0x1122,0x1122,0x1122,0x1122,0x1122,0x1122, +0x1122,0x1122,0x1122,0x1122,0x1122,0x1122,0x1122,0x1122,0x1122,0x1122,0x1122,0x1122,0x1122,0x1122,0x1122,0x1122, +0x1122,0x1122,0x1122,0x1122,0x1122,0x138,0x138,0x138,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125, +0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x13b,0x13b,0x13b,0x13b,0x13b,0x13b,0x13b, +0x13b,0x13b,0x13b,0x13b,0x13b,0x13b,0x13b,0x13b,0x112b,0x112b,0x112b,0x112b,0x112b,0x112b,0x112b,0x112b, +0x112b,0x112b,0x112b,0x112b,0x112b,0x112b,0x112b,0x112b,0x112b,0x112b,0x112b,0x112b,0x112b,0x112b,0x112b,0x112b, +0x112b,0x112b,0x13e,0x13e,0x13e,0x13e,0x13e,0x1128,0x112e,0x112e,0x112e,0x112e,0x112e,0x112e,0x112e,0x112e, +0x112e,0x112e,0x112e,0x112e,0x141,0x141,0x141,0x141,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131, +0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x144,0x144,0x144,0x144, +0x144,0x144,0x144,0x144,0x144,0x144,0x144,0x144,0x11a9,0x11a9,0x11a9,0x11a9,0x11b2,0x11a9,0x11a9,0x11a9, +0x11b2,0x11a9,0x11a9,0x11a9,0x11a9,0x11a6,0x147,0x147,0x11af,0x11af,0x11af,0x11af,0x11af,0x11af,0x11af,0x11b5, +0x11af,0x11b5,0x11af,0x11af,0x11af,0x11b5,0x11b5,0x147,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8, +0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x14a,0x14a, +0x14a,0x14a,0x14a,0x14a,0x14a,0x14a,0x14a,0x14a,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3, +0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d0,0x11bb,0x11d0, +0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x14d,0x11c4,0x11cd,0x11bb,0x11cd,0x11cd,0x11bb,0x11bb,0x11bb, +0x11bb,0x11bb,0x11bb,0x11bb,0x11bb,0x11d0,0x11d0,0x11d0,0x11d0,0x11d0,0x11d0,0x11bb,0x11bb,0x11c1,0x11c1,0x11c1, +0x11c1,0x11c1,0x11c1,0x11c1,0x11c1,0x14d,0x14d,0x11be,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca, +0x11ca,0x11ca,0x14d,0x14d,0x14d,0x14d,0x14d,0x14d,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca,0x11ca, +0x11ca,0x11ca,0x14d,0x14d,0x14d,0x14d,0x14d,0x14d,0x11c7,0x11c7,0x11c7,0x11c7,0x11c7,0x11c7,0x11c7,0x11d6, +0x11d9,0x11d9,0x11d9,0x11d9,0x11c7,0x11c7,0x14d,0x14d,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0, +0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15c0,0x15bd,0x1adf,0x1329,0x1302,0x1320,0x1320,0x1320,0x1320,0x1320,0x1320, +0x1320,0x1308,0x1305,0x12fc,0x12fc,0x1326,0x12fc,0x12fc,0x12fc,0x12fc,0x130b,0x14eb,0x14f1,0x14ee,0x14ee,0x193b, +0x1716,0x1716,0x1aac,0x150,0x150,0x150,0x150,0x150,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee, +0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11e5,0x11e5,0x11e8,0x11f1,0x11eb,0x11eb,0x11eb,0x11f1, +0x153,0x153,0x153,0x153,0x153,0x153,0x153,0x153,0x12ea,0x12ea,0x12ea,0x12ea,0x12ea,0x12ea,0x12ea,0x12ea, +0x12ea,0x12ea,0x12ea,0x12ea,0x12ea,0x12ea,0x12ea,0x12ea,0x12ea,0x12ea,0x12ea,0x12ea,0x12ea,0x12ea,0x12ea,0x12ea, +0x12ea,0x12ea,0x12ea,0x12ea,0x12ea,0x159,0x159,0x159,0x120f,0x1203,0x1203,0x1203,0x1203,0x1203,0x1203,0x1206, +0x1215,0x1215,0x1203,0x1203,0x1203,0x1203,0x15c,0x131a,0x1209,0x1209,0x1209,0x1209,0x1209,0x1209,0x1209,0x1209, +0x1209,0x1209,0x15c,0x15c,0x15c,0x15c,0x1203,0x1203,0x1233,0x1227,0x1233,0x15f,0x15f,0x15f,0x15f,0x15f, +0x15f,0x15f,0x15f,0x15f,0x15f,0x15f,0x15f,0x15f,0x15f,0x15f,0x15f,0x15f,0x15f,0x15f,0x15f,0x15f, +0x15f,0x15f,0x15f,0x1230,0x1230,0x1236,0x122a,0x122d,0x124b,0x124b,0x124b,0x1245,0x1245,0x123c,0x1245,0x1245, +0x123c,0x1245,0x1245,0x124e,0x1248,0x123f,0x162,0x162,0x1242,0x1242,0x1242,0x1242,0x1242,0x1242,0x1242,0x1242, +0x1242,0x1242,0x162,0x162,0x162,0x162,0x162,0x162,0x1254,0x1254,0x1254,0x1254,0x1254,0x1254,0x1254,0x165, +0x165,0x165,0x165,0x1251,0x1251,0x1251,0x1251,0x1251,0x1251,0x1251,0x1251,0x1251,0x1251,0x1251,0x1251,0x1251, +0x1251,0x1251,0x1251,0x1251,0x1251,0x1251,0x1251,0x1251,0x1251,0x1251,0x1251,0x1251,0x1251,0x1251,0x1251,0x1251, +0x165,0x165,0x165,0x165,0x125d,0x125d,0x125d,0x125d,0x125d,0x125d,0x125d,0x125d,0x125d,0x125d,0x125d,0x125d, +0x125d,0x125d,0x125d,0x125d,0x125d,0x125d,0x125d,0x125d,0x125d,0x125d,0x168,0x125a,0x1257,0x1257,0x1257,0x1257, +0x1257,0x1257,0x1257,0x1257,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c, +0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x16b,0x16b,0x16b,0x1266,0x1269,0x1269, +0x1269,0x1269,0x1269,0x1269,0x1272,0x1272,0x1272,0x1272,0x1272,0x1272,0x1272,0x1272,0x1272,0x1272,0x1272,0x1272, +0x1272,0x1272,0x1272,0x1272,0x1272,0x1272,0x1272,0x1272,0x1272,0x1272,0x16e,0x16e,0x126f,0x126f,0x126f,0x126f, +0x126f,0x126f,0x126f,0x126f,0x1278,0x1278,0x1278,0x1278,0x1278,0x1278,0x1278,0x1278,0x1278,0x1278,0x1278,0x1278, +0x1278,0x1278,0x1278,0x1278,0x1278,0x1278,0x1278,0x171,0x171,0x171,0x171,0x171,0x1275,0x1275,0x1275,0x1275, +0x1275,0x1275,0x1275,0x1275,0x127e,0x127e,0x127e,0x127e,0x127e,0x127e,0x127e,0x127e,0x127e,0x127e,0x127e,0x127e, +0x127e,0x127e,0x127e,0x127e,0x127e,0x127e,0x127e,0x127e,0x127e,0x127e,0x127e,0x127e,0x127e,0x127e,0x127e,0x127e, +0x127e,0x127e,0x127e,0x177,0x1299,0x1299,0x17a,0x17a,0x17a,0x17a,0x17a,0x17a,0x17a,0x17a,0x17a,0x17a, +0x17a,0x197d,0x17a,0x17a,0x14ca,0x14ca,0x14ca,0x14ca,0x14ca,0x14ca,0x14ca,0x14ca,0x14ca,0x14ca,0x14ca,0x14ca, +0x14ca,0x14ca,0x14ca,0x14ca,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f, +0x129f,0x129f,0x129f,0x17d,0x1a4f,0x1a4f,0x1a4f,0x1a4f,0x1a4f,0x1a4f,0x1a4f,0x1a52,0x1a4c,0x279,0x279,0x279, +0x279,0x279,0x279,0x279,0x187e,0x187e,0x187e,0x187e,0x187e,0x187e,0x187e,0x187e,0x187e,0x187e,0x187e,0x187e, +0x187e,0x1ad0,0x180,0x180,0x180,0x180,0x180,0x180,0x180,0x180,0x180,0x180,0x180,0x180,0x180,0x180, +0x180,0x180,0x180,0x180,0x180,0x180,0x180,0x180,0x180,0x180,0x180,0x180,0x180,0x180,0x180,0x180, +0x180,0x180,0x180,0x180,0x180,0x180,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c, +0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c,0x138c, +0x12f6,0x13f5,0x13f2,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183, +0x12f0,0x12f0,0x12f0,0x12f0,0x12f0,0x12f0,0x12f0,0x12f0,0x12f0,0x12f0,0x12f3,0x12f0,0x12f0,0x12f0,0x12f0,0x12f0, +0x12f0,0x12f0,0x12f0,0x12f0,0x12f0,0x12f0,0x12f0,0x12f0,0x12f0,0x12f0,0x12f0,0x12f3,0x12f0,0x12f0,0x13f5,0x13f5, +0x13f5,0x13f5,0x13f5,0x13f2,0x13f5,0x13f5,0x13f5,0x1881,0x183,0x183,0x183,0x183,0x12ed,0x12ed,0x12ed,0x12ed, +0x12ed,0x12ed,0x12ed,0x12ed,0x12ed,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x1419,0x1419,0x183,0x183, +0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x1920,0x1920,0x1920,0x1920, +0x1920,0x1920,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183, +0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183,0x183, +0x183,0x183,0x183,0x183,0x1395,0x1395,0x1395,0x1395,0x1395,0x1395,0x1395,0x1395,0x1395,0x1395,0x1395,0x1395, +0x1395,0x1395,0x1395,0x1395,0x1395,0x1395,0x1395,0x1395,0x1395,0x1395,0x1395,0x1395,0x1395,0x138f,0x138f,0x138f, +0x186,0x186,0x1392,0x186,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x1398,0x13a1,0x139b,0x139b,0x13a1,0x13a1, +0x13a1,0x139b,0x13a1,0x139b,0x139b,0x139b,0x13a4,0x13a4,0x189,0x189,0x189,0x189,0x189,0x189,0x189,0x189, +0x139e,0x139e,0x139e,0x139e,0x18c,0x13aa,0x13aa,0x13aa,0x13aa,0x13aa,0x13aa,0x18c,0x18c,0x13aa,0x13aa,0x13aa, +0x13aa,0x13aa,0x13aa,0x18c,0x18c,0x13aa,0x13aa,0x13aa,0x13aa,0x13aa,0x13aa,0x18c,0x18c,0x18c,0x18c,0x18c, +0x18c,0x18c,0x18c,0x18c,0x13aa,0x13aa,0x13aa,0x13aa,0x13aa,0x13aa,0x13aa,0x18c,0x13aa,0x13aa,0x13aa,0x13aa, +0x13aa,0x13aa,0x13aa,0x18c,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d, +0x161d,0x161d,0x161d,0x161d,0x13ad,0x13ad,0x13ad,0x13ad,0x13ad,0x13ad,0x13b0,0x13c2,0x13c2,0x13b6,0x13b6,0x13b6, +0x13b6,0x13b6,0x18f,0x18f,0x18f,0x18f,0x13b3,0x13b3,0x13b3,0x13b3,0x13b3,0x13b3,0x13b3,0x13b3,0x13b3,0x13b3, +0x13b3,0x13b3,0x13b3,0x13b3,0x13b3,0x13b3,0x13b9,0x13b9,0x13b9,0x13b9,0x13b9,0x13b9,0x13b9,0x13b9,0x13b9,0x13b9, +0x18f,0x18f,0x18f,0x18f,0x18f,0x18f,0x18f,0x18f,0x18f,0x18f,0x18f,0x18f,0x18f,0x18f,0x18f,0x1584, +0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5, +0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x192,0x192,0x192,0x192,0x192,0x192,0x192, +0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x195, +0x195,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x1587, +0x195,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13fe, +0x195,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8,0x13c8, +0x1587,0x1587,0x1587,0x1587,0x1587,0x1587,0x1587,0x1587,0x1587,0x1587,0x1587,0x1587,0x1587,0x1587,0x1587,0x1587, +0x1587,0x1587,0x1587,0x1587,0x1587,0x1587,0x195,0x195,0x195,0x195,0x195,0x195,0x195,0x195,0x195,0x195, +0x1413,0x1410,0x1410,0x1410,0x1410,0x1410,0x159c,0x159c,0x159c,0x159c,0x159c,0x159f,0x170d,0x159f,0x159f,0x159f, +0x17d9,0x188a,0x188a,0x18c3,0x18c3,0x1a8e,0x1b39,0x1b39,0x198,0x198,0x198,0x198,0x198,0x198,0x198,0x198, +0x159f,0x159f,0x159f,0x159f,0x159f,0x159f,0x159c,0x159c,0x159c,0x159f,0x159c,0x170a,0x170a,0x198,0x198,0x198, +0x159f,0x159c,0x159c,0x159f,0x188a,0x188a,0x188a,0x1926,0x1926,0x1a07,0x1a8e,0x1b39,0x1b39,0x198,0x198,0x198, +0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb, +0x13cb,0x13cb,0x13cb,0x13cb,0x19b,0x19b,0x19b,0x19b,0x19b,0x19b,0x19b,0x19b,0x19b,0x19b,0x19b,0x19b, +0x1467,0x15a5,0x1467,0x1467,0x1467,0x1467,0x1467,0x1467,0x1467,0x1467,0x1467,0x1467,0x1467,0x15a5,0x15a5,0x15a5, +0x15a5,0x15a5,0x15a5,0x175e,0x175e,0x19e,0x1809,0x1809,0x1809,0x1809,0x1809,0x1809,0x1809,0x1809,0x1ad3,0x1ad3, +0x1ad3,0x1ad3,0x1ad3,0x1ad3,0x1ad3,0x1ad3,0x1ad3,0x1ad3,0x19e,0x19e,0x19e,0x19e,0x19e,0x19e,0x19e,0x19e, +0x19e,0x19e,0x19e,0x1983,0x1806,0x1806,0x1806,0x1806,0x1806,0x1806,0x1806,0x1806,0x1806,0x1806,0x1806,0x1806, +0x146d,0x146d,0x146d,0x146d,0x1a1,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d, +0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d, +0x1a1,0x146d,0x146d,0x1a1,0x146d,0x1a1,0x1a1,0x146d,0x1a1,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d, +0x146d,0x146d,0x146d,0x1a1,0x146d,0x146d,0x146d,0x146d,0x1a1,0x146d,0x1a1,0x146d,0x1a1,0x1a1,0x1a1,0x1a1, +0x1a1,0x1a1,0x146d,0x1a1,0x1a1,0x1a1,0x1a1,0x146d,0x1a1,0x146d,0x1a1,0x146d,0x1a1,0x146d,0x146d,0x146d, +0x1a1,0x146d,0x146d,0x1a1,0x146d,0x1a1,0x1a1,0x146d,0x1a1,0x146d,0x1a1,0x146d,0x1a1,0x146d,0x1a1,0x146d, +0x1a1,0x146d,0x146d,0x1a1,0x146d,0x1a1,0x1a1,0x146d,0x146d,0x146d,0x146d,0x1a1,0x146d,0x146d,0x146d,0x146d, +0x146d,0x146d,0x146d,0x1a1,0x146d,0x146d,0x146d,0x146d,0x1a1,0x146d,0x146d,0x146d,0x146d,0x1a1,0x146d,0x1a1, +0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x1a1,0x146d,0x146d,0x146d,0x146d,0x146d, +0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x1a1,0x1a1,0x1a1,0x1a1, +0x1a1,0x146d,0x146d,0x146d,0x1a1,0x146d,0x146d,0x146d,0x146d,0x146d,0x1a1,0x146d,0x146d,0x146d,0x146d,0x146d, +0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x146d,0x1a1,0x1a1,0x1a1,0x1a1, +0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1, +0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x146a,0x146a,0x1a1,0x1a1, +0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1a1,0x1482,0x1482,0x1482,0x1482, +0x1482,0x1482,0x1482,0x1470,0x1470,0x1470,0x1470,0x1470,0x147f,0x1470,0x1473,0x1473,0x1470,0x1470,0x1470,0x1476, +0x1476,0x1a4,0x147c,0x147c,0x147c,0x147c,0x147c,0x147c,0x147c,0x147c,0x147c,0x147c,0x1479,0x1485,0x1485,0x1485, +0x1989,0x1986,0x1986,0x1ad6,0x1a4,0x1a4,0x1a4,0x1a4,0x1a4,0x1a4,0x1a4,0x1a4,0x162f,0x162f,0x162f,0x162f, +0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,0x1491,0x1491,0x1491,0x1491, +0x1491,0x1491,0x1491,0x1491,0x1491,0x1491,0x1491,0x148e,0x1488,0x1488,0x148e,0x148e,0x1497,0x1497,0x1491,0x1494, +0x1494,0x148e,0x148b,0x1a7,0x1a7,0x1a7,0x1a7,0x1a7,0x1a7,0x1a7,0x1a7,0x1a7,0x149a,0x149a,0x149a,0x149a, +0x149a,0x149a,0x149a,0x149a,0x149a,0x149a,0x149a,0x149a,0x149a,0x149a,0x149a,0x149a,0x149a,0x149a,0x149a,0x149a, +0x149a,0x149a,0x149a,0x149a,0x1aa,0x1aa,0x1aa,0x1aa,0x1761,0x1761,0x149a,0x149a,0x1761,0x1761,0x1761,0x1761, +0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1aa,0x1aa,0x1761,0x1761, +0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x14a6,0x14a6,0x14a6,0x14a6, +0x14a6,0x1a37,0x1a37,0x1a37,0x1a37,0x1a37,0x1a37,0x1ad,0x1ad,0x1ad,0x1ad,0x1a31,0x14a6,0x14a3,0x14a3,0x14a3, +0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x1a34,0x1a34,0x1a34,0x1a34, +0x1a34,0x1a34,0x1a34,0x1a34,0x1ad,0x1ad,0x1ad,0x1ad,0x1ad,0x1ad,0x1ad,0x14a0,0x14a0,0x14a0,0x14a0,0x14a9, +0x14a9,0x14a9,0x14a9,0x14a9,0x14a9,0x14a9,0x14a9,0x14a9,0x14a9,0x14a9,0x14a9,0x14a9,0x14ca,0x14ca,0x14ca,0x14ca, +0x14ca,0x14ca,0x14ca,0x14ca,0x14ca,0x1b0,0x1b0,0x1b0,0x1b0,0x1b0,0x1b0,0x1b0,0x14c7,0x14c7,0x14c7,0x14c7, +0x14c7,0x14c7,0x14c7,0x14c7,0x14c7,0x14c7,0x1b0,0x1b0,0x1b0,0x1b0,0x1b0,0x1b0,0x14cd,0x14cd,0x14cd,0x14cd, +0x14cd,0x14cd,0x14cd,0x14cd,0x1b3,0x1b3,0x1b3,0x1b3,0x1b3,0x1b3,0x1b3,0x1b3,0x1323,0x1320,0x1323,0x12ff, +0x1320,0x1326,0x1326,0x1329,0x1326,0x1329,0x132c,0x1320,0x1329,0x1329,0x1320,0x1320,0x14df,0x14df,0x14df,0x14df, +0x14df,0x14df,0x14df,0x14df,0x14df,0x14df,0x14df,0x14d0,0x14d9,0x14d0,0x14d9,0x14d9,0x14d0,0x14d0,0x14d0,0x14d0, +0x14d0,0x14d0,0x14dc,0x14d3,0x1a3a,0x1b6,0x1b6,0x1b6,0x1b6,0x1b6,0x1b6,0x1b6,0x15b1,0x15b1,0x15b1,0x15b1, +0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x15b1,0x1b9,0x1b9,0x15ae,0x15ae,0x15ae,0x15ae, +0x15ae,0x15b4,0x1b9,0x1b9,0x1b9,0x1b9,0x1b9,0x1b9,0x1b9,0x1b9,0x1b9,0x1b9,0x1719,0x1710,0x1710,0x1710, +0x1710,0x1710,0x1710,0x1710,0x1710,0x1710,0x1710,0x1710,0x1710,0x1710,0x1710,0x1710,0x1710,0x1710,0x1710,0x1710, +0x1710,0x1710,0x1710,0x1710,0x1710,0x1710,0x1710,0x1710,0x1bf,0x1bf,0x1bf,0x1bf,0x1adf,0x1c2,0x1c2,0x1c2, +0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2, +0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2,0x1c2, +0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x1c5,0x1c5,0x1c5,0x1c5,0x1c5, +0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x1c5,0x1c5,0x1c5, +0x1c5,0x1c5,0x1c5,0x1c5,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x1c5,0x1c5, +0x15c9,0x15c3,0x15c6,0x15cf,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x1c8,0x1c8,0x1c8,0x1c8, +0x1c8,0x1c8,0x1c8,0x1c8,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba, +0x15ba,0x15ba,0x15ba,0x15ba,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5, +0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x198c,0x198c,0x198c,0x198c,0x1cb,0x1cb,0x1cb, +0x1cb,0x1cb,0x1cb,0x1cb,0x1a91,0x1a91,0x1a91,0x1a91,0x1a91,0x1a91,0x1a91,0x1a91,0x1a91,0x1a91,0x1a91,0x1a91, +0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb,0x1cb, +0x1cb,0x1cb,0x1cb,0x1cb,0x1776,0x171c,0x15de,0x1722,0x1ce,0x15e7,0x15e7,0x15e7,0x15e7,0x15e7,0x15e7,0x15e7, +0x15e7,0x1ce,0x1ce,0x15e7,0x15e7,0x1ce,0x1ce,0x15e7,0x15e7,0x15e7,0x15e7,0x15e7,0x15e7,0x15e7,0x15e7,0x15e7, +0x15e7,0x15e7,0x15e7,0x15e7,0x15e7,0x1ce,0x15e7,0x15e7,0x15e7,0x15e7,0x15e7,0x15e7,0x15e7,0x1ce,0x15e7,0x15e7, +0x1ce,0x15e7,0x15e7,0x15e7,0x15e7,0x15e7,0x1ce,0x1a16,0x171f,0x15e7,0x15d8,0x15de,0x15d8,0x15de,0x15de,0x15de, +0x15de,0x1ce,0x1ce,0x15de,0x15de,0x1ce,0x1ce,0x15e1,0x15e1,0x15e4,0x1ce,0x1ce,0x1779,0x1ce,0x1ce,0x1ce, +0x1ce,0x1ce,0x1ce,0x15d8,0x1ce,0x1ce,0x1ce,0x1ce,0x1ce,0x15ea,0x15e7,0x15e7,0x15e7,0x15e7,0x15de,0x15de, +0x1ce,0x1ce,0x15db,0x15db,0x15db,0x15db,0x15db,0x15db,0x15db,0x1ce,0x1ce,0x1ce,0x15db,0x15db,0x15db,0x15db, +0x15db,0x1ce,0x1ce,0x1ce,0x1ce,0x1ce,0x1ce,0x1ce,0x1ce,0x1ce,0x1ce,0x1ce,0x15ff,0x15ff,0x15ff,0x15ff, +0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x1d1,0x15ff, +0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15ff,0x15f9,0x15f9,0x15f9,0x15ed, +0x15ed,0x15ed,0x15f9,0x15f9,0x15ed,0x15fc,0x15f0,0x15ed,0x1602,0x1602,0x15f6,0x1602,0x1602,0x15f3,0x180c,0x1d1, +0x1611,0x1611,0x1611,0x1605,0x1605,0x1605,0x1605,0x1605,0x1605,0x1608,0x160b,0x1d4,0x1d4,0x1d4,0x1d4,0x1d4, +0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x160e,0x1d4,0x1d4,0x1d4,0x1d4,0x1d4,0x1d4, +0x177c,0x177c,0x177c,0x177c,0x161d,0x161a,0x1a3d,0x1a3d,0x1ae5,0x1ae8,0x1ae2,0x1ae2,0x1d7,0x1d7,0x1d7,0x1d7, +0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6, +0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623, +0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da, +0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623, +0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da, +0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da, +0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da,0x1da, +0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,0x162f,0x162f, +0x162f,0x162f,0x162f,0x1626,0x1629,0x162c,0x162f,0x1dd,0x1dd,0x1dd,0x1dd,0x1dd,0x1dd,0x1dd,0x1dd,0x1dd, +0x163e,0x163e,0x163e,0x163e,0x163e,0x1632,0x1632,0x1e0,0x1e0,0x1e0,0x1e0,0x1635,0x1635,0x1635,0x1635,0x1635, +0x163b,0x163b,0x163b,0x163b,0x163b,0x163b,0x1638,0x1e0,0x1e0,0x1e0,0x1e0,0x1e0,0x1e0,0x1e0,0x1e0,0x1e0, +0x1647,0x1647,0x1647,0x1647,0x1647,0x1e3,0x1e3,0x1644,0x1644,0x1644,0x1644,0x1644,0x1644,0x1644,0x1644,0x1644, +0x1641,0x1641,0x1641,0x1641,0x1641,0x1641,0x1641,0x1e3,0x1e3,0x1e3,0x1e3,0x1e3,0x1e3,0x1e3,0x1e3,0x1e3, +0x164a,0x165c,0x165c,0x1650,0x1659,0x1e6,0x1e6,0x1e6,0x1e6,0x1e6,0x1e6,0x1e6,0x1e6,0x1e6,0x1e6,0x1e6, +0x1653,0x1653,0x1653,0x1653,0x1653,0x1653,0x1653,0x1653,0x1653,0x1653,0x1e6,0x1e6,0x1e6,0x1e6,0x1e6,0x1e6, +0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662, +0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1662,0x1e9, +0x166e,0x166e,0x166e,0x166e,0x166e,0x1668,0x1671,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e,0x166e, +0x166b,0x166b,0x166b,0x166b,0x166b,0x166b,0x166b,0x166b,0x166b,0x166b,0x166e,0x166e,0x166e,0x166e,0x166e,0x1ec, +0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1677, +0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1677,0x1ef, +0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1683, +0x1683,0x1683,0x1683,0x1683,0x1683,0x1683,0x1680,0x1680,0x1680,0x1680,0x1680,0x1f2,0x1f2,0x1f2,0x1f2,0x1f2, +0x169b,0x169b,0x169e,0x169e,0x16a1,0x1692,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5, +0x1698,0x1698,0x1698,0x1698,0x1698,0x1698,0x1698,0x1698,0x1698,0x1698,0x1f5,0x1692,0x1692,0x1692,0x1692,0x1692, +0x1692,0x1692,0x1f5,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b, +0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x1f5,0x1f5,0x1f5,0x1f5,0x1f5,0x169b,0x169b,0x169b, +0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa, +0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x1f8,0x1f8,0x1f8,0x1f8,0x1f8,0x1f8,0x1f8, +0x16b3,0x16b3,0x16b3,0x16b3,0x16b3,0x16b3,0x16b3,0x16b3,0x16b3,0x16b3,0x16b3,0x16b3,0x16b3,0x16b3,0x16b3,0x16b3, +0x16b3,0x16b3,0x1fb,0x1fb,0x1fb,0x1fb,0x1fb,0x1fb,0x1fb,0x16b0,0x16b0,0x16b0,0x16b0,0x1fb,0x1fb,0x1fb, +0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16b6, +0x16c8,0x16c8,0x16b6,0x16b6,0x16b6,0x16b6,0x201,0x201,0x16c8,0x16c8,0x16cb,0x16cb,0x16b6,0x16b6,0x16c8,0x16bc, +0x16b9,0x16bf,0x16d1,0x16d1,0x16c2,0x16c2,0x16c5,0x16c5,0x16c5,0x16d1,0x1785,0x1785,0x1785,0x1785,0x1785,0x1785, +0x1785,0x1785,0x1785,0x1785,0x1785,0x1785,0x1785,0x1785,0x1782,0x1782,0x1782,0x1782,0x177f,0x177f,0x201,0x201, +0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201, +0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201,0x201, +0x204,0x16d4,0x16d4,0x16d4,0x16d4,0x16d4,0x16d4,0x16d4,0x16d4,0x16d4,0x16d4,0x16d4,0x16d4,0x16d4,0x16d4,0x16d4, +0x16d4,0x16d4,0x16d4,0x16d4,0x16d4,0x204,0x204,0x204,0x204,0x204,0x204,0x204,0x204,0x204,0x204,0x204, +0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x207,0x207,0x207,0x207, +0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7, +0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7, +0x16d7,0x16d7,0x207,0x207,0x207,0x207,0x207,0x207,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7, +0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7, +0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x207,0x207,0x1aeb,0x1aeb,0x207,0x207, +0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207, +0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207,0x207, +0x16da,0x16e9,0x16e0,0x16dd,0x16ef,0x16ef,0x16e3,0x16ef,0x20a,0x20a,0x20a,0x20a,0x20a,0x20a,0x20a,0x20a, +0x16e6,0x16e6,0x16e6,0x16e6,0x16e6,0x16e6,0x16e6,0x16e6,0x16e6,0x16e6,0x20a,0x20a,0x20a,0x20a,0x20a,0x20a, +0x16f5,0x16f5,0x16f5,0x16f5,0x16f5,0x16f5,0x16f5,0x16f5,0x16f5,0x16f5,0x16f2,0x16f2,0x16f2,0x16f2,0x16f2,0x16f2, +0x16f2,0x16f2,0x16f2,0x20d,0x20d,0x20d,0x20d,0x20d,0x20d,0x20d,0x20d,0x20d,0x20d,0x20d,0x20d,0x16fb, +0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797, +0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x1797,0x198f,0x210,0x210,0x1788,0x1788,0x1788, +0x1794,0x1794,0x1788,0x1788,0x1788,0x1788,0x1794,0x1788,0x1788,0x1788,0x1788,0x178b,0x210,0x210,0x210,0x210, +0x1791,0x1791,0x1791,0x1791,0x1791,0x1791,0x1791,0x1791,0x1791,0x1791,0x178e,0x178e,0x179a,0x179a,0x179a,0x178e, +0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x213,0x213,0x213,0x213,0x213,0x213,0x213,0x213,0x213, +0x213,0x213,0x213,0x213,0x213,0x213,0x213,0x213,0x213,0x213,0x213,0x213,0x213,0x213,0x213,0x213, +0x213,0x213,0x213,0x213,0x213,0x213,0x213,0x213,0x17af,0x17af,0x17af,0x17af,0x17af,0x17af,0x17af,0x17af, +0x17af,0x17af,0x17af,0x17af,0x17af,0x17af,0x17af,0x17af,0x17af,0x17af,0x17af,0x219,0x17af,0x17af,0x219,0x219, +0x219,0x219,0x219,0x17ac,0x17ac,0x17ac,0x17ac,0x17ac,0x17b2,0x17b2,0x17b2,0x17b2,0x17b2,0x17b2,0x17b2,0x21c, +0x17b2,0x21c,0x17b2,0x17b2,0x17b2,0x17b2,0x21c,0x17b2,0x17b2,0x17b2,0x17b2,0x17b2,0x17b2,0x17b2,0x17b2,0x17b2, +0x17b2,0x17b2,0x17b2,0x17b2,0x17b2,0x17b2,0x21c,0x17b2,0x17b2,0x17b2,0x17b2,0x17b2,0x17b2,0x17b2,0x17b2,0x17b2, +0x17b2,0x17b5,0x21c,0x21c,0x21c,0x21c,0x21c,0x21c,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614, +0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be, +0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x21f,0x21f,0x21f,0x21f,0x21f, +0x21f,0x21f,0x21f,0x21f,0x21f,0x21f,0x21f,0x21f,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb, +0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x21f,0x21f,0x21f,0x21f,0x21f, +0x21f,0x21f,0x17b8,0x17b8,0x17b8,0x17b8,0x17b8,0x17b8,0x1929,0x1929,0x1929,0x1929,0x1929,0x1929,0x1929,0x1929, +0x1929,0x1929,0x1929,0x1929,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a94,0x1b3c,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1b3f, +0x1b3c,0x222,0x1a0a,0x1a94,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x17df,0x1a0a,0x1a0a,0x1a94,0x1a94,0x1a94,0x1a94,0x1a94, +0x1a94,0x1a94,0x1a94,0x1b3c,0x222,0x1a97,0x1a97,0x1a97,0x1929,0x192c,0x192c,0x192c,0x192c,0x192c,0x192c,0x192c, +0x192c,0x192c,0x192c,0x192c,0x192c,0x192c,0x1929,0x1929,0x17c4,0x17c4,0x17c4,0x17c4,0x17c1,0x17c4,0x17c4,0x17c7, +0x17ca,0x17c7,0x17c7,0x17c4,0x225,0x225,0x225,0x225,0x225,0x225,0x225,0x225,0x225,0x225,0x225,0x225, +0x225,0x225,0x225,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x1821,0x1821,0x1821,0x1821,0x1818,0x1818,0x1818,0x1812, +0x1815,0x1815,0x1815,0x1a40,0x228,0x228,0x228,0x228,0x181e,0x181e,0x181e,0x181e,0x181e,0x181e,0x181e,0x181e, +0x181e,0x181e,0x228,0x228,0x228,0x228,0x181b,0x181b,0x183c,0x183c,0x183c,0x183c,0x183c,0x183c,0x183c,0x183c, +0x183c,0x22b,0x183c,0x183c,0x183c,0x183c,0x183c,0x183c,0x183c,0x183c,0x183c,0x183c,0x183c,0x183c,0x183c,0x183c, +0x183c,0x183c,0x183c,0x183c,0x183c,0x183c,0x183c,0x183c,0x183c,0x183c,0x183c,0x1839,0x1827,0x1827,0x1827,0x1827, +0x1827,0x1827,0x1827,0x22b,0x1827,0x1827,0x1827,0x1827,0x1827,0x1827,0x1839,0x182a,0x183c,0x183f,0x183f,0x1833, +0x1830,0x1830,0x22b,0x22b,0x22b,0x22b,0x22b,0x22b,0x22b,0x22b,0x22b,0x22b,0x1836,0x1836,0x1836,0x1836, +0x1836,0x1836,0x1836,0x1836,0x1836,0x1836,0x182d,0x182d,0x182d,0x182d,0x182d,0x182d,0x182d,0x182d,0x182d,0x182d, +0x182d,0x182d,0x182d,0x182d,0x182d,0x22b,0x22b,0x22b,0x184b,0x184e,0x1854,0x1854,0x1854,0x1854,0x1854,0x1854, +0x1854,0x1854,0x1854,0x1854,0x1854,0x1854,0x1854,0x1854,0x1842,0x1842,0x1842,0x1842,0x1842,0x1842,0x1842,0x1842, +0x1842,0x22e,0x22e,0x22e,0x22e,0x22e,0x22e,0x22e,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad, +0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x1845,0x1845,0x1845,0x1845,0x1845,0x1845,0x1845,0x231, +0x1845,0x1845,0x1845,0x1845,0x1845,0x1845,0x1845,0x1845,0x1845,0x1845,0x1845,0x1845,0x1845,0x1845,0x1845,0x1845, +0x1845,0x231,0x231,0x1845,0x1845,0x1845,0x1845,0x1845,0x1893,0x192f,0x1a9a,0x1a9d,0x1b45,0x234,0x234,0x234, +0x234,0x234,0x234,0x234,0x234,0x234,0x234,0x234,0x1b42,0x1b42,0x234,0x234,0x234,0x234,0x234,0x234, +0x234,0x234,0x234,0x234,0x234,0x234,0x234,0x234,0x1854,0x1854,0x1854,0x1854,0x1854,0x1854,0x1854,0x1854, +0x1854,0x1854,0x1854,0x1854,0x1854,0x1854,0x1854,0x1854,0x237,0x237,0x1848,0x1848,0x1848,0x1848,0x1848,0x1848, +0x1848,0x1848,0x1848,0x1848,0x1848,0x1848,0x1848,0x1848,0x237,0x1851,0x1848,0x1848,0x1848,0x1848,0x1848,0x1848, +0x1848,0x1851,0x1848,0x1848,0x1851,0x1848,0x1848,0x237,0x237,0x237,0x237,0x237,0x237,0x237,0x237,0x237, +0x1857,0x1857,0x1857,0x1857,0x1857,0x1857,0x1857,0x1857,0x1857,0x1857,0x1857,0x1857,0x1857,0x23a,0x23a,0x23a, +0x23a,0x23a,0x23a,0x23a,0x23a,0x23a,0x23a,0x23a,0x23a,0x23a,0x23a,0x23a,0x23a,0x23a,0x23a,0x23a, +0x186f,0x186f,0x1860,0x185a,0x185a,0x186f,0x185d,0x1872,0x1872,0x1872,0x1872,0x1875,0x1875,0x1869,0x1866,0x1863, +0x186c,0x186c,0x186c,0x186c,0x186c,0x186c,0x186c,0x186c,0x186c,0x186c,0x1aee,0x1869,0x23d,0x1863,0x1992,0x1a43, +0x1af1,0x1af1,0x23d,0x23d,0x23d,0x23d,0x23d,0x23d,0x23d,0x23d,0x23d,0x23d,0x23d,0x23d,0x23d,0x23d, +0x23d,0x23d,0x23d,0x23d,0x23d,0x23d,0x23d,0x23d,0x23d,0x23d,0x23d,0x23d,0x23d,0x23d,0x23d,0x23d, +0x187b,0x187b,0x187b,0x187b,0x187b,0x187b,0x187b,0x187b,0x187b,0x187b,0x187b,0x187b,0x187b,0x187b,0x187b,0x187b, +0x187b,0x187b,0x187b,0x187b,0x240,0x240,0x240,0x240,0x1878,0x1878,0x1878,0x1878,0x1878,0x1878,0x1878,0x1878, +0x1878,0x1878,0x1878,0x1878,0x1878,0x1878,0x1878,0x1878,0x1878,0x1878,0x1878,0x1878,0x1878,0x1878,0x1878,0x1878, +0x1878,0x1878,0x1878,0x1878,0x240,0x240,0x240,0x240,0x1896,0x1896,0x1896,0x1896,0x1896,0x1896,0x1896,0x1896, +0x1896,0x1896,0x1896,0x1896,0x1896,0x1a13,0x1a13,0x1a13,0x1a13,0x1a13,0x1aa0,0x1aa0,0x1aa0,0x1aa0,0x1aa0,0x1aa0, +0x243,0x243,0x243,0x243,0x243,0x243,0x243,0x243,0x1935,0x1935,0x1935,0x1935,0x1935,0x1935,0x1935,0x1935, +0x1935,0x1935,0x1935,0x1935,0x1935,0x1935,0x1935,0x1935,0x1935,0x1935,0x1935,0x1935,0x1935,0x1935,0x1935,0x1935, +0x1935,0x1935,0x1935,0x1935,0x1935,0x1935,0x1935,0x246,0x246,0x246,0x246,0x246,0x246,0x246,0x246,0x246, +0x246,0x246,0x246,0x246,0x246,0x246,0x246,0x246,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288, +0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x18d5,0x18d5,0x18d5,0x18d5,0x18d5,0x18d5,0x18d5,0x249, +0x18d5,0x18d5,0x249,0x18d5,0x18d5,0x18d5,0x18d5,0x18d5,0x18d5,0x18d5,0x18d5,0x18d5,0x18d5,0x18d5,0x18d5,0x18d5, +0x18d5,0x18d5,0x18d5,0x18d5,0x18d5,0x18d5,0x18d5,0x18d5,0x18d5,0x18c9,0x18c9,0x18c9,0x18c9,0x18c9,0x18c9,0x249, +0x249,0x249,0x18c9,0x249,0x18c9,0x18c9,0x249,0x18c9,0x18c9,0x18c9,0x18cc,0x18c9,0x18cf,0x18cf,0x18d8,0x18c9, +0x249,0x249,0x249,0x249,0x249,0x249,0x249,0x249,0x18d2,0x18d2,0x18d2,0x18d2,0x18d2,0x18d2,0x18d2,0x18d2, +0x18d2,0x18d2,0x249,0x249,0x249,0x249,0x249,0x249,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938, +0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938, +0x1938,0x1938,0x1938,0x1938,0x24c,0x24c,0x24c,0x24c,0x1905,0x1908,0x1917,0x1917,0x1908,0x190b,0x1905,0x1902, +0x255,0x255,0x255,0x255,0x255,0x255,0x255,0x255,0x18f0,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18ed, +0x18ed,0x18db,0x18db,0x18db,0x18f0,0x18f0,0x18f0,0x18f0,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49, +0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x258,0x258,0x258,0x258, +0x258,0x258,0x258,0x258,0x258,0x258,0x258,0x258,0x1998,0x1998,0x1998,0x1998,0x1998,0x1998,0x1998,0x1998, +0x1998,0x1998,0x1998,0x1998,0x1998,0x1998,0x258,0x258,0x1aa9,0x1aa9,0x1aa9,0x1aa9,0x1b4b,0x28b,0x28b,0x28b, +0x1aa9,0x1aa9,0x1aa9,0x28b,0x28b,0x28b,0x28b,0x28b,0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,0x19aa, +0x19aa,0x19aa,0x19aa,0x19aa,0x19a7,0x19a7,0x19a7,0x199b,0x199b,0x199b,0x199b,0x199b,0x199b,0x199b,0x199b,0x199b, +0x19a7,0x19a1,0x199e,0x19a4,0x25b,0x25b,0x25b,0x25b,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad, +0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad,0x19ad, +0x19ad,0x19ad,0x19ad,0x25e,0x25e,0x19ad,0x19ad,0x19ad,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x261,0x19bc, +0x19bc,0x261,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc, +0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x19bc,0x19b9,0x19b9,0x19b9,0x19b9,0x19b9,0x261, +0x19b0,0x19b0,0x261,0x19b9,0x19b9,0x19b0,0x19b9,0x19b3,0x19bc,0x261,0x261,0x261,0x261,0x261,0x261,0x261, +0x19c5,0x19c5,0x19c8,0x19c8,0x19bf,0x19bf,0x19bf,0x19bf,0x264,0x264,0x264,0x264,0x264,0x264,0x264,0x264, +0x19c2,0x19c2,0x19c2,0x19c2,0x19c2,0x19c2,0x19c2,0x19c2,0x19c2,0x19c2,0x264,0x264,0x264,0x264,0x264,0x264, +0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19ce,0x19cb,0x19cb,0x19cb, +0x19ce,0x19cb,0x19cb,0x19cb,0x19cb,0x267,0x267,0x267,0x267,0x267,0x267,0x267,0x267,0x267,0x267,0x267, +0x19d7,0x19d7,0x19d7,0x19d7,0x19d7,0x19d7,0x19d7,0x19d7,0x19d7,0x19d7,0x19d7,0x19d7,0x19d7,0x19d7,0x19d7,0x19d7, +0x19d7,0x19d7,0x19d7,0x19d1,0x19d1,0x19d4,0x19d4,0x19da,0x19da,0x26a,0x26a,0x26a,0x26a,0x26a,0x26a,0x26a, +0x19dd,0x19dd,0x19dd,0x19dd,0x19dd,0x19dd,0x19dd,0x19dd,0x19dd,0x19dd,0x19dd,0x19dd,0x19dd,0x19dd,0x19dd,0x19dd, +0x19dd,0x19dd,0x19dd,0x19dd,0x26d,0x26d,0x26d,0x26d,0x26d,0x26d,0x26d,0x26d,0x26d,0x26d,0x26d,0x26d, +0x19e0,0x19e0,0x19e0,0x19e0,0x19e0,0x19e0,0x19e0,0x19e0,0x19e0,0x19e0,0x19e0,0x19e0,0x19e0,0x19e0,0x19e0,0x19e0, +0x19e0,0x19e0,0x19e0,0x19e0,0x19e0,0x19e0,0x19e0,0x19e3,0x19ec,0x19e0,0x19e0,0x270,0x270,0x270,0x270,0x270, +0x19ef,0x19ef,0x19ef,0x19ef,0x19ef,0x19ef,0x19ef,0x19f2,0x273,0x273,0x273,0x273,0x273,0x273,0x273,0x273, +0x19fb,0x19fb,0x19fb,0x19fb,0x19fb,0x19fb,0x19fb,0x19fb,0x19fb,0x19fb,0x19fb,0x19fb,0x19fb,0x19fb,0x19fb,0x19fb, +0x19fb,0x19fb,0x19f5,0x19f5,0x19f5,0x19f5,0x19f5,0x19f5,0x19f5,0x19f5,0x19f5,0x19f5,0x19f5,0x19f8,0x19f8,0x19f8, +0x19f8,0x19fe,0x19fe,0x19fe,0x19fe,0x19fe,0x276,0x276,0x276,0x276,0x276,0x276,0x1a55,0x1a55,0x1a55,0x1a55, +0x1a55,0x1a55,0x1a55,0x1a55,0x1a55,0x1a55,0x1a55,0x1a55,0x1a55,0x1a55,0x1a55,0x1a55,0x1a55,0x1a55,0x1a55,0x1a55, +0x1a55,0x1a55,0x1a55,0x27c,0x27c,0x27c,0x27c,0x27c,0x27c,0x27c,0x27c,0x27c,0x1a64,0x1a64,0x1a64,0x1a64, +0x1a64,0x1a64,0x1a64,0x1a64,0x27f,0x27f,0x1a64,0x1a64,0x1a64,0x1a64,0x1a64,0x1a64,0x1a64,0x1a64,0x1a64,0x1a64, +0x1a64,0x1a64,0x1a64,0x1a64,0x1a64,0x1a64,0x1a64,0x1a64,0x1a64,0x1a64,0x1a64,0x1a64,0x1a64,0x1a61,0x1a61,0x1a61, +0x1a58,0x1a58,0x1a58,0x1a58,0x27f,0x27f,0x1a58,0x1a58,0x1a61,0x1a61,0x1a61,0x1a61,0x1a5b,0x1a64,0x1a5e,0x1a64, +0x1a61,0x27f,0x27f,0x27f,0x27f,0x27f,0x27f,0x27f,0x27f,0x27f,0x27f,0x27f,0x27f,0x27f,0x27f,0x27f, +0x27f,0x27f,0x27f,0x27f,0x27f,0x27f,0x27f,0x27f,0x27f,0x27f,0x27f,0x27f,0x1a70,0x1a70,0x1a70,0x1a70, +0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x282,0x282,0x282,0x1a67,0x1a67,0x1a67,0x1a67, +0x1a67,0x1a67,0x1a67,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a73,0x1a73,0x282,0x282,0x285,0x1a76,0x1a76,0x1a76, +0x1a76,0x1a76,0x1a76,0x1a76,0x1a76,0x1a76,0x1a76,0x1a76,0x1a76,0x1a76,0x1a76,0x1a76,0x1a76,0x1a76,0x1a76,0x1a76, +0x1a76,0x1a76,0x1a76,0x1a76,0x1a76,0x1a76,0x1a76,0x1a76,0x1a76,0x1a76,0x1a76,0x1a76,0x1a76,0x1a76,0x285,0x285, +0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288, +0x1aa3,0x1aa3,0x1aa3,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288, +0x1aa6,0x1aa6,0x1aa6,0x1aa6,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x288,0x1938,0x1938,0x1938,0x1938, +0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1aa9,0x1aa9,0x1aa9,0x1b4b, +0x1b4b,0x1b4b,0x1b4b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x1aa9,0x1aa9,0x1aa9,0x1aa9, +0x1aa9,0x1aa9,0x1b4b,0x1b4b,0x1b4b,0x1b4b,0x1b4b,0x1b4b,0x1b4b,0x1b4b,0x1b4b,0x1b4b,0x1b4b,0x28b,0x28b,0x28b, +0x28b,0x28b,0x28b,0x28b,0x1b4b,0x1b4b,0x1b4b,0x1b4b,0x1b4b,0x1b4b,0x1b4b,0x28b,0x28b,0x28b,0x28b,0x28b, +0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x1b4b,0x1b4b,0x1b4b,0x1b4b,0x1b4b,0x1b4b,0x1b4b,0x28b, +0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b, +0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b,0x28b, +0x1a7f,0x1a79,0x1a79,0x1a79,0x1a79,0x1a79,0x1a79,0x1a79,0x1a79,0x1a79,0x1a79,0x1a79,0x1a79,0x1a79,0x1a79,0x1a79, +0x1a79,0x1a79,0x28e,0x28e,0x28e,0x28e,0x28e,0x28e,0x28e,0x28e,0x28e,0x28e,0x28e,0x28e,0x28e,0x1a7c, +0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a82,0x1a82,0x1a82,0x1a82, +0x1a88,0x1a88,0x1a88,0x1a88,0x1a88,0x1a88,0x1a88,0x1a88,0x1a88,0x1a88,0x291,0x291,0x291,0x291,0x291,0x1a85, +0x1af7,0x1af7,0x1af7,0x1af7,0x1af7,0x1af4,0x1af4,0x1af4,0x1af4,0x1af4,0x1af4,0x1af4,0x294,0x294,0x294,0x294, +0x294,0x294,0x294,0x294,0x294,0x294,0x294,0x294,0x294,0x294,0x294,0x294,0x294,0x294,0x294,0x294, +0x1b12,0x1b12,0x1b12,0x1b12,0x1b12,0x1b12,0x1b12,0x297,0x297,0x1b12,0x297,0x297,0x1b12,0x1b12,0x1b12,0x1b12, +0x1b12,0x1b12,0x1b12,0x1b12,0x297,0x1b12,0x1b12,0x297,0x1b12,0x1b12,0x1b12,0x1b12,0x1b12,0x1b12,0x1b12,0x1b12, +0x1b12,0x1b12,0x1b12,0x1b12,0x1b12,0x1b12,0x1b12,0x1b12,0x1afa,0x1b09,0x1b09,0x1b09,0x1b09,0x1b09,0x297,0x1b09, +0x1b0c,0x297,0x297,0x1afa,0x1afa,0x1b0f,0x1b00,0x1b15,0x1b09,0x1b15,0x1b09,0x1afd,0x1b18,0x1b03,0x1b18,0x297, +0x297,0x297,0x297,0x297,0x297,0x297,0x297,0x297,0x1b06,0x1b06,0x1b06,0x1b06,0x1b06,0x1b06,0x1b06,0x1b06, +0x1b06,0x1b06,0x297,0x297,0x297,0x297,0x297,0x297,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51, +0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x29a,0x29a, +0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a, +0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a,0x29a, +0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e, +0x1b1e,0x1b1e,0x1b1e,0x2a0,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e, +0x1b1e,0x1b1e,0x1b1e,0x2a0,0x2a0,0x2a0,0x2a0,0x2a0,0x2a0,0x2a0,0x2a0,0x2a0,0x2a0,0x2a0,0x2a0,0x2a0, +0x2a0,0x2a0,0x2a0,0x2a0,0x2a0,0x2a0,0x2a0,0x2a0,0x1b21,0x1b21,0x1b21,0x1b21,0x1b21,0x1b21,0x1b21,0x1b21, +0x1b21,0x1b21,0x2a0,0x2a0,0x2a0,0x2a0,0x2a0,0x2a0,0x1b54,0x1b54,0x1b54,0x1b54,0x1b54,0x1b54,0x1b54,0x1b54, +0x1b54,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3, +0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3,0x2a3, +0x2a3,0x2a3,0x2a3,0x2a3,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x2a6,0x1b24, +0x1b24,0x1b27,0x2a6,0x2a6,0x1b2a,0x1b2a,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6,0x2a6, +0x2a6,0x2a6,0x2a6,0x2a6,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9, +0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9, +0x2a9,0x2a9,0x2a9,0x2a9,0x1932,0x2c1,0x2c1,0x2c1,0x2c1,0x2c1,0x2c1,0x2c1,0x2c1,0x2c1,0x2c1,0x2c1, +0x2c1,0x2c1,0x2c1,0x2c1,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9, +0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9, +0x2a9,0x2a9,0x97b,0x97b,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x2c4, +0x2c4,0x2c4,0x2c4,0x2c4,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9,0x2a9, +0x2a9,0x2a9,0x2a9,0x2a9,0x191d,0x191d,0x191d,0x191d,0x191d,0x191d,0x191d,0x191d,0x191d,0x191d,0x191d,0x1a04, +0x1a04,0x1a04,0x1a04,0x1a04,0x1b33,0x1b33,0x1b33,0x1b33,0x1b33,0x1b33,0x1b33,0x1b33,0x1b33,0x1b33,0x1b33,0x1b33, +0x1b33,0x2ac,0x2ac,0x2ac,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0x12e4, +0x12e4,0x12e4,0x2af,0x2af,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0, +0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0x2af,0x2af, +0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af, +0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af,0x2af, +0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9, +0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0x1b36,0x1b36,0x1b36,0x1b36,0x1b36,0x1b36,0x1b36,0x2b2,0x2b2, +0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac, +0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0x2b5,0x2b5, +0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9, +0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x2b8,0x2b8,0x2b8,0x2b8,0x2b8,0x2b8,0x2b8,0x2b8,0x2b8,0x2b8,0x2b8, +0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416, +0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x2bb,0x2bb, +0x17dc,0x17dc,0x2be,0x2be,0x2be,0x2be,0x2be,0x2be,0x2be,0x2be,0x2be,0x2be,0x2be,0x2be,0x2be,0x2be, +0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932, +0x3cc,0x3c0,0x3c0,0x3c0,0x3c0,0x3c0,0x3c0,0x3c0,0x3c0,0x3cc,0x3cc,0x3cc,0x3cc,0x3c6,0x1158,0x133e, +0x3cf,0x945,0x948,0x3bd,0x3bd,0x1155,0x133b,0x133b,0x3d2,0x3d2,0x3d2,0x3d2,0x3d2,0x3d2,0x3d2,0x3d2, +0x1155,0x3c0,0x3c0,0x3cc,0xce1,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf, +0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf, +0x3cf,0x3cf,0x3c0,0x3c0,0x8d0,0x8d3,0x963,0x963,0x963,0x963,0x963,0x963,0x963,0x963,0x963,0x963, +0x3c9,0xfba,0xfb7,0x1341,0x1341,0x1341,0x1341,0x1341,0x1506,0x115b,0x115b,0xf0c,0xf0c,0xdda,0xf0c,0xf0c, +0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3d2,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf, +0x3cf,0x3d2,0x3cf,0x3cf,0x3d2,0x3cf,0x3cf,0x3cf,0x3cf,0x3cf,0x133b,0x133e,0x3c3,0x3cf,0x3cc,0x3cc, +0x46e,0x46e,0x46e,0x46e,0x46e,0x46e,0x46e,0x46e,0x46e,0x1347,0x46e,0x46e,0x46e,0x46e,0x46e,0x46e, +0x46e,0x46e,0x46e,0x46e,0x46e,0x46e,0x46e,0x46e,0x46e,0x46e,0x1347,0x18ae,0x18ae,0xfd8,0x45f,0x468, +0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa, +0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0x4aa,0xbc7,0xbc7,0xde6,0xde6,0x8d6,0xde9,0x1428,0x1428,0x1428, +0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad, +0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad,0x4ad, +0x4b3,0x4b3,0x4b3,0x1170,0x1170,0x1170,0x1170,0x1170,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0, +0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0, +0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x4b0,0x116d,0x116d,0x116d,0x116d,0x116d,0x116d, +0x4b6,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3, +0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3,0x4b3, +0x4b3,0x4b3,0x4b3,0x4b3,0x4bf,0x4b9,0x4bf,0x4b9,0x4bf,0x4b9,0x4bf,0x4b9,0x4bf,0x4b9,0x4bf,0x4b9, +0x4bf,0x4b9,0x4bf,0x4b9,0x4bf,0x4b9,0x4bf,0x4b9,0x4bf,0x4b9,0x4bf,0x4b9,0x4bf,0x4b9,0x4bf,0x4b9, +0x4bf,0x4b9,0x4bf,0x4b9,0x4bf,0x4b9,0x4b9,0x4b9,0x4b9,0x4b9,0x4bc,0x9bd,0x1005,0x1005,0x1008,0x1005, +0x4bf,0x4b9,0x4bf,0x4b9,0x4bf,0x4b9,0x4bf,0x4b9,0x4bf,0x4b9,0x4bf,0x4b9,0x4bf,0x4b9,0x4bf,0x4b9, +0x4bf,0x4b9,0x4bf,0x4b9,0x4bf,0x4b9,0x4bf,0x4b9,0x4bf,0x4b9,0x1008,0x1005,0x1008,0x1005,0x1008,0x1005, +0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce, +0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4cb,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce,0x4ce, +0x6ae,0x6ae,0x6b1,0x4e9,0x6bd,0x6ba,0x6ba,0x6b7,0x513,0x513,0x4d1,0x4d1,0x4d1,0x4d1,0x4d1,0xb58, +0x6c0,0x4f5,0x6d8,0x6db,0x50a,0x6c0,0x4f8,0x4f8,0x4e9,0x504,0x504,0x6ae,0x510,0x50d,0x6b4,0x4e3, +0x4da,0x4da,0x4dd,0x4dd,0x4dd,0x4dd,0x4dd,0x4e0,0x4dd,0x4dd,0x4dd,0x4d4,0x51c,0x519,0x516,0x516, +0x6cc,0x4fe,0x4fb,0x6c9,0x6c6,0x6c3,0x6d5,0x4ec,0x6d2,0x6d2,0x501,0x504,0x6cf,0x6cf,0x501,0x504, +0x4e6,0x4e9,0x4e9,0x4e9,0x507,0x4f2,0x4ef,0xbdc,0xaf2,0xaf5,0xaef,0xaef,0xaef,0xaef,0xbd3,0xbd3, +0xbd3,0xbd3,0xbd9,0xd0e,0xd0b,0xdf5,0xdf8,0xbd6,0xdf8,0xdf8,0xdf8,0xdf8,0xdf5,0xdf8,0xdf8,0xbd0, +0x540,0x540,0x540,0x540,0x540,0x540,0x540,0x53d,0x543,0x75c,0x540,0x9c0,0x9e1,0xaf8,0xaf8,0xaf8, +0xbe2,0xbe2,0xdfe,0xdfe,0xdfe,0xdfe,0x1179,0x117c,0x117c,0x135c,0x14f4,0x151e,0x1521,0x1521,0x1734,0x18b1, +0x54f,0x54f,0x567,0x6ea,0x54c,0x6e7,0x54f,0x564,0x54c,0x6ea,0x55e,0x567,0x567,0x567,0x55e,0x55e, +0x567,0x567,0x567,0x6f3,0x54c,0x567,0x6ed,0x54c,0x55b,0x567,0x567,0x567,0x567,0x567,0x54c,0x54c, +0x552,0x6e7,0x6f0,0x54c,0x567,0x54c,0x6f6,0x54c,0x567,0x555,0x56d,0x6f9,0x567,0x567,0x558,0x55e, +0x567,0x567,0x56a,0x567,0x55e,0x561,0x561,0x561,0x561,0xb04,0xb01,0xd11,0xe07,0xbf7,0xbfa,0xbfa, +0xbf4,0xbf1,0xbf1,0xbf1,0xbf1,0xbfa,0xbf7,0xbf7,0xbf7,0xbf7,0xbee,0xbf1,0xe04,0xf18,0xf1b,0x100e, +0x117f,0x117f,0x117f,0x6ff,0x6fc,0x570,0x573,0x573,0x573,0x573,0x573,0x6fc,0x6ff,0x6ff,0x6fc,0x573, +0x705,0x705,0x705,0x705,0x705,0x705,0x705,0x705,0x705,0x705,0x705,0x705,0x57c,0x57c,0x57c,0x57c, +0x702,0x702,0x702,0x702,0x702,0x702,0x702,0x702,0x702,0x702,0x576,0x576,0x576,0x576,0x576,0x576, +0x582,0x582,0x582,0x582,0x582,0x582,0x582,0x582,0x57f,0x588,0x588,0x582,0x582,0x582,0x585,0x57f, +0x582,0x582,0x57f,0x57f,0x57f,0x57f,0x582,0x582,0x708,0x708,0x57f,0x57f,0x582,0x582,0x582,0x582, +0x582,0x582,0x582,0x582,0x582,0x582,0x582,0x582,0x582,0x585,0x585,0x585,0x582,0x582,0x70b,0x582, +0x70b,0x582,0x582,0x582,0x582,0x582,0x582,0x582,0x57f,0x582,0x57f,0x57f,0x57f,0x57f,0x57f,0x57f, +0x582,0x582,0x57f,0x708,0x57f,0x57f,0x57f,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a,0xb0a, +0xbfd,0xbfd,0xbfd,0xbfd,0xbfd,0xbfd,0xbfd,0xbfd,0xbfd,0xbfd,0xbfd,0xbfd,0x711,0x58b,0x711,0x711, +0x58e,0x58b,0x58b,0x711,0x711,0x58e,0x58b,0x711,0x58e,0x58b,0x58b,0x711,0x58b,0x711,0x59a,0x597, +0x58b,0x711,0x58b,0x58b,0x58b,0x58b,0x711,0x58b,0x58b,0x711,0x711,0x711,0x711,0x58b,0x58b,0x711, +0x58e,0x711,0x58e,0x711,0x711,0x711,0x711,0x711,0x717,0x591,0x711,0x591,0x591,0x58b,0x58b,0x58b, +0x711,0x711,0x711,0x711,0x58b,0x58b,0x58b,0x58b,0x711,0x711,0x58b,0x58b,0x58b,0x58e,0x58b,0x58b, +0x58e,0x58b,0x58b,0x58e,0x711,0x58e,0x58b,0x58b,0x711,0x58b,0x58b,0x58b,0x58b,0x58b,0x711,0x58b, +0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x714,0x711,0x58e,0x58b, +0x711,0x711,0x711,0x711,0x58b,0x58b,0x711,0x711,0x58b,0x58e,0x714,0x714,0x58e,0x58e,0x58b,0x58b, +0x58e,0x58e,0x58b,0x58b,0x58e,0x58e,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58e,0x58e,0x711,0x711, +0x58e,0x58e,0x711,0x711,0x58e,0x58e,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b, +0x58b,0x711,0x58b,0x58b,0x58b,0x711,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x711,0x58b,0x58b, +0x58b,0x58b,0x58b,0x58b,0x58e,0x58e,0x58e,0x58e,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b, +0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x711,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b, +0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b, +0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58b,0x58e,0x58e,0x58e,0x58e,0x58b,0x58b,0x58b,0x58b, +0x58b,0x58b,0x58e,0x58e,0x58e,0x58e,0x58b,0x594,0x58b,0x58b,0xc00,0xc00,0xc00,0xc00,0xc00,0xc00, +0xc00,0xc00,0xc00,0xc00,0xc00,0xc00,0xc00,0xc00,0x59d,0xb0d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d, +0x5a9,0x5a6,0x5a9,0x5a6,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x71a,0x59d,0x59d,0x59d,0x59d,0x59d, +0x59d,0x59d,0x81f,0x81f,0x59d,0x59d,0x59d,0x59d,0x5a3,0x5a3,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d, +0x5a0,0x825,0x822,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d, +0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d, +0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0x59d,0xb0d,0xc06,0xb0d,0xb0d,0xb0d,0x5ac,0x5ac,0x5ac,0x5ac, +0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac, +0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x5ac,0x723,0x723,0x723,0x723, +0x723,0x723,0x723,0x723,0x723,0x723,0x5b2,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f, +0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xc6f,0xd89,0x72c,0x72c,0x72c,0x72c, +0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c, +0x5b5,0x5b8,0x5b8,0x5b8,0x5b8,0x5b8,0x5b8,0x5b8,0x5b8,0x5b8,0x5b8,0x5b8,0x72c,0x72c,0x72c,0x72c, +0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x5b8,0x5b8,0x5b8,0x5b8,0x72c,0x72c,0x72c,0x72c, +0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72f,0x72f,0x72f,0x72f, +0x72f,0x72f,0x72f,0x72f,0x72f,0x72f,0x72f,0x72f,0x72f,0x72f,0x72f,0x72f,0x5bb,0x5bb,0x72f,0x72f, +0x72f,0x72f,0xc09,0xc09,0xc09,0xc09,0xc09,0xc09,0xc09,0xc09,0xc09,0xc09,0x735,0x735,0x5be,0x732, +0x732,0x732,0x732,0x732,0x732,0x732,0x5c1,0x5c1,0x5be,0x5be,0x5c4,0x5c4,0x5c4,0x5c4,0x735,0x735, +0x5c4,0x5c4,0x738,0x735,0x5be,0x5be,0x5be,0x5be,0x735,0x735,0x5c4,0x5c4,0x738,0x735,0x5be,0x5be, +0x5be,0x5be,0x735,0x735,0x732,0x5be,0x5c4,0x735,0x5be,0x5be,0x732,0x735,0x735,0x735,0x5c4,0x5c4, +0x5be,0x5be,0x5be,0x5be,0x5be,0x5be,0x5be,0x5be,0x5be,0x5be,0x5be,0x5be,0x5be,0x5be,0x735,0x732, +0x735,0x732,0x5be,0x5c4,0x5c4,0x5c4,0x5c4,0x5c4,0x5c4,0x5be,0x5be,0x732,0xb16,0xb16,0xb16,0xb16, +0xb16,0xb16,0xb16,0xb16,0xc0c,0xc0c,0xc0c,0xc0f,0xc0f,0xc8a,0xc8a,0xc0c,0x5d3,0x5d3,0x5d3,0x5d3, +0x5d0,0x74a,0x747,0x5ca,0x5ca,0x73b,0x5ca,0x5ca,0x5ca,0x5ca,0x741,0x73b,0x5ca,0x5d0,0x5ca,0x5c7, +0xd92,0xd92,0xc15,0xc15,0xe13,0xb19,0x5cd,0x5cd,0x73e,0x5d6,0x73e,0x5cd,0x5d0,0x5ca,0x5d0,0x5d0, +0x5ca,0x5ca,0x5d0,0x5ca,0x5ca,0x5ca,0x5d0,0x5ca,0x5ca,0x5ca,0x5d0,0x5d0,0x5ca,0x5ca,0x5ca,0x5ca, +0x5ca,0x5ca,0x5ca,0x5ca,0x5d0,0x5d3,0x5d3,0x5cd,0x5ca,0x5ca,0x5ca,0x5ca,0x74d,0x5ca,0x74d,0x5ca, +0x5ca,0x5ca,0x5ca,0x5ca,0x828,0x828,0x828,0x828,0x828,0x828,0x828,0x828,0x828,0x828,0x828,0x828, +0x5ca,0x5ca,0x5ca,0x5ca,0x5ca,0x5ca,0x5ca,0x5ca,0x5ca,0x5ca,0x5ca,0x5d0,0x74d,0x74a,0x5d9,0x74d, +0x73b,0x741,0x5d0,0x73b,0x744,0x73b,0x73b,0x5ca,0x73b,0x74a,0x5d9,0x74a,0xb19,0xb19,0xc18,0xc18, +0xc18,0xc18,0xc18,0xc18,0xc18,0xc18,0xc18,0xc1b,0xc18,0xc18,0xe10,0xec7,0x5dc,0x5dc,0x5dc,0x5dc, +0x5dc,0x5dc,0x5dc,0x5dc,0x5dc,0x5dc,0x5dc,0x5dc,0x5dc,0x5dc,0x5dc,0x5dc,0x5dc,0x5dc,0x5dc,0x5dc, +0x5df,0x13e6,0x13e6,0x13e6,0x5df,0x5df,0x5df,0x5df,0x5df,0x5df,0x5df,0x5df,0x152a,0x5eb,0x5f4,0x5eb, +0x5eb,0x13e6,0x5df,0x5df,0x5f4,0x5f4,0x13e9,0x13e9,0x5f7,0x5f7,0x5e8,0x5ee,0x5e8,0x5e8,0x5ee,0x5df, +0x5ee,0x5df,0x5ee,0x5df,0x5df,0x5df,0x5df,0x5df,0x5df,0x5ee,0x5df,0x5df,0x5df,0x5df,0x5df,0x5df, +0x13e6,0x5df,0x5df,0x5df,0x5df,0x5df,0x5df,0x5df,0x5df,0x5df,0x5df,0x5ee,0x5ee,0x5df,0x5df,0x5df, +0x5df,0x5df,0x5df,0x5df,0x5df,0x753,0x5df,0x5df,0x5df,0x5df,0x5df,0x5df,0x5ee,0x5df,0x5df,0x5ee, +0x5df,0x5df,0x5df,0x5df,0x13e6,0x5df,0x13e6,0x5df,0x5df,0x5df,0x5df,0x13e6,0x13e6,0x13e6,0x5df,0x12de, +0x5df,0x5df,0x5df,0x5e5,0x5e5,0x5e5,0x5e5,0x1368,0x1368,0x5df,0x5e2,0x5f1,0x5f4,0x5e8,0x5e8,0x5e8, +0xc21,0xc1e,0xc21,0xc1e,0xc21,0xc1e,0xc21,0xc1e,0xc21,0xc1e,0xc21,0xc1e,0xc21,0xc1e,0x750,0x750, +0x750,0x750,0x750,0x750,0x750,0x750,0x750,0x750,0x5df,0x5ee,0x5df,0x5df,0x5df,0x5df,0x5df,0x5df, +0x5df,0x5df,0x5df,0x5df,0x5df,0x5df,0x5df,0x5df,0x13e6,0x5df,0x5df,0x5df,0x5df,0x5df,0x5df,0x5df, +0x5df,0x5df,0x5df,0x5df,0x5df,0x5df,0x5df,0x13e6,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618, +0x618,0x618,0x618,0x618,0x618,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x621,0x621,0x621,0x621, +0x621,0x621,0x621,0x621,0x618,0x61e,0x60f,0x612,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e, +0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e, +0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x615,0x615,0x615,0x615,0x615,0x615,0x618,0x618,0x618,0x618, +0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618, +0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x61b,0x621,0x61e,0x618,0x61b,0x621,0x61e,0x618, +0x61b,0x621,0x61e,0x618,0x61b,0x621,0x61e,0x618,0x61b,0x621,0x61e,0x618,0x61b,0x621,0x61e,0x618, +0x61b,0x621,0x61e,0x618,0x61b,0x621,0x61e,0x618,0x61e,0x618,0x61e,0x618,0x61e,0x618,0x61e,0x618, +0x61e,0x618,0x61e,0x618,0x61b,0x621,0x61e,0x618,0x61b,0x621,0x61e,0x618,0x61b,0x621,0x61e,0x618, +0x61b,0x621,0x61e,0x618,0x61e,0x618,0x61b,0x621,0x61e,0x618,0x61e,0x618,0x61b,0x621,0x61e,0x618, +0x61b,0x621,0x61e,0x618,0x61e,0x618,0x136b,0x136b,0x136b,0x136b,0x136b,0x136b,0x136b,0x136b,0x136b,0x136b, +0x136b,0x136b,0x136b,0x136b,0x61e,0x618,0x61e,0x618,0x61e,0x618,0x61b,0x621,0x61b,0x621,0x61e,0x618, +0x61e,0x618,0x61e,0x618,0x61e,0x618,0x61e,0x618,0x61e,0x618,0x61e,0x618,0x61b,0x61e,0x618,0x61b, +0x61e,0x618,0x61b,0x621,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618, +0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x61b,0x61b,0x61b,0x61b,0x61b, +0x61b,0x61b,0x61b,0x61b,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e, +0x61e,0x61e,0x61e,0x61e,0x61e,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618, +0x618,0x618,0x618,0x618,0x61b,0x61b,0x618,0x61b,0x618,0x61b,0x618,0x618,0x61b,0x618,0x618,0x61b, +0x618,0x61b,0x618,0x618,0x61b,0x618,0x61b,0x61b,0x618,0x618,0x618,0x61b,0x618,0x618,0x618,0x618, +0x618,0x61b,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618, +0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x618,0x61b,0x61b,0x618,0x618,0x61b,0x618,0x61b,0x618, +0x618,0x618,0x618,0x618,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b, +0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b, +0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x61b,0x621,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e, +0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e, +0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x621,0x621,0x621,0x621,0x621,0x621,0x621,0x621, +0x621,0x621,0x621,0x621,0x621,0x621,0x621,0x621,0x621,0x621,0x621,0x621,0x621,0x61e,0x61e,0x61e, +0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x61e,0x624,0x624,0x624,0x624,0x101a,0x101a,0x101a,0x152d, +0x152d,0x152d,0x152d,0x152d,0x152d,0x152d,0x173a,0x173a,0x885,0x88b,0x88b,0x897,0x897,0x888,0x87f,0x888, +0x87f,0x888,0x87f,0x888,0x87f,0x888,0x87f,0x888,0x633,0x633,0x62d,0x633,0x62d,0x633,0x62d,0x633, +0x62d,0x633,0x62d,0x630,0x636,0x633,0x62d,0x633,0x62d,0x630,0x636,0x633,0x62d,0x633,0x62d,0x630, +0x636,0x633,0x62d,0x630,0x636,0x633,0x62d,0x630,0x636,0x633,0x62d,0x633,0x62d,0x633,0x62d,0x633, +0x62d,0x633,0x62d,0x630,0x636,0x633,0x62d,0x630,0x636,0x633,0x62d,0x630,0x636,0x633,0x62d,0x630, +0x636,0x633,0x62d,0x630,0x636,0x633,0x62d,0x630,0x636,0x633,0x62d,0x630,0x636,0x633,0x62d,0x630, +0x636,0x633,0x62d,0x630,0x720,0x720,0x720,0x720,0x720,0x720,0x720,0x720,0x720,0x720,0x720,0x720, +0x720,0x720,0x720,0x720,0x720,0x720,0x720,0x720,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d, +0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d, +0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x71d,0x726,0x726,0x726,0x726,0x726,0x726, +0x726,0x726,0x726,0x726,0x726,0x726,0x729,0x726,0x726,0x726,0x726,0x726,0x726,0x726,0x726,0x726, +0x726,0x726,0x726,0x726,0x723,0x723,0x723,0x723,0x723,0x723,0x723,0x723,0x723,0x723,0x723,0x723, +0x723,0x723,0x723,0x723,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c, +0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c,0x72c, +0x72c,0x72c,0x72c,0x72c,0x756,0x756,0x756,0x756,0x756,0x756,0x756,0x756,0x756,0x756,0x756,0x756, +0x756,0x756,0x756,0x756,0x756,0x756,0x756,0x756,0x756,0x756,0x756,0x756,0x756,0x756,0x756,0x756, +0x756,0x756,0x756,0x756,0xc78,0x8e8,0x8e2,0x8df,0x8e5,0x8dc,0x76b,0x76e,0x76e,0x76e,0x76e,0x76e, +0x76e,0x76e,0x76e,0x76e,0x8ee,0x76b,0x76b,0x76b,0x76b,0x76b,0x76b,0x76b,0x76b,0x76b,0x76b,0x76b, +0x76b,0x76b,0x76b,0x76b,0x76b,0x76b,0x76b,0x76b,0x76b,0x76b,0x76b,0x76b,0x76b,0x76b,0x76b,0x76b, +0x76b,0x76b,0x76b,0x76b,0x76b,0x76b,0x8eb,0x8eb,0x771,0x8fd,0x900,0x906,0x82b,0x837,0x91b,0x834, +0x8f4,0x8f1,0x8f4,0x8f1,0x8fa,0x8f7,0x8fa,0x8f7,0x8f4,0x8f1,0x831,0x906,0x8f4,0x8f1,0x8f4,0x8f1, +0x8f4,0x8f1,0x8f4,0x8f1,0x909,0x912,0x90f,0x90f,0x777,0x7b3,0x7b3,0x7b3,0x7b3,0x7b3,0x7b3,0x7ad, +0x7ad,0x7ad,0x7ad,0x7ad,0x7ad,0x7ad,0x7ad,0x7ad,0x7ad,0x7ad,0x7ad,0x7ad,0x7ad,0x7ad,0x7ad,0x7ad, +0x7ad,0x7ad,0x7ad,0x77a,0x795,0x774,0x79b,0x79e,0x798,0x7b0,0x7b0,0x7b0,0x7b0,0x7b0,0x7b0,0x7aa, +0x7aa,0x7aa,0x7aa,0x7aa,0x7aa,0x7aa,0x7aa,0x7aa,0x7aa,0x7aa,0x7aa,0x7aa,0x7aa,0x7aa,0x7aa,0x7aa, +0x7aa,0x7aa,0x7aa,0x77a,0x795,0x774,0x795,0xc7b,0x819,0x819,0x819,0x819,0x819,0x819,0x819,0x819, +0x819,0x819,0x819,0x819,0x819,0x819,0x819,0x819,0x819,0x819,0x819,0x819,0x819,0x819,0x819,0x819, +0x819,0x819,0x819,0x819,0x819,0x819,0x819,0x819,0x819,0x819,0x12d8,0x12d8,0x12d8,0x12d8,0x12d8,0x81c, +0x831,0x834,0x834,0x834,0x834,0x834,0x834,0x834,0x834,0x834,0x951,0x951,0x951,0x951,0x83a,0x83a, +0x90c,0x918,0x918,0x918,0x918,0x915,0x82e,0x903,0xb3d,0xb3d,0xb3d,0xc8d,0xcab,0xca8,0xb5b,0x8d9, +0x840,0x83d,0x840,0x843,0x83d,0x840,0x83d,0x840,0x83d,0x840,0x83d,0x83d,0x83d,0x83d,0x83d,0x83d, +0x840,0x840,0x83d,0x840,0x840,0x83d,0x840,0x840,0x83d,0x840,0x840,0x83d,0x840,0x840,0x83d,0x83d, +0xcae,0x852,0x84c,0x852,0x84c,0x852,0x84c,0x852,0x84c,0x852,0x84c,0x84c,0x84f,0x84c,0x84f,0x84c, +0x84f,0x84c,0x84f,0x84c,0x84f,0x84c,0x84f,0x84c,0x84f,0x84c,0x84f,0x84c,0x84f,0x84c,0x84f,0x84c, +0x84f,0x84c,0x84f,0x852,0x84c,0x84f,0x84c,0x84f,0x84c,0x84f,0x84c,0x84c,0x84c,0x84c,0x84c,0x84c, +0x84f,0x84f,0x84c,0x84f,0x84f,0x84c,0x84f,0x84f,0x84c,0x84f,0x84f,0x84c,0x84f,0x84f,0x84c,0x84c, +0x84c,0x84c,0x84c,0x852,0x84c,0x852,0x84c,0x852,0x84c,0x84c,0x84c,0x84c,0x84c,0x84c,0x852,0x84c, +0x84c,0x84c,0x84c,0x84c,0x84f,0x852,0x852,0x84f,0x84f,0x84f,0x84f,0x921,0x924,0x855,0x858,0xc96, +0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e, +0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e, +0x861,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e, +0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x85e,0x86a,0x86a,0x86a,0x86a, +0x86a,0x86a,0x86a,0x86a,0x86a,0x86a,0x86a,0x86a,0x86a,0x86a,0x86a,0x86a,0x86a,0x86a,0x86a,0x86a, +0x86a,0x86a,0x86a,0x86a,0x86a,0x86a,0x86a,0x86a,0xd9b,0xd9b,0xeca,0x864,0x92d,0x92d,0x92d,0x92d, +0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0xd95,0xd95,0xd95,0xd95,0x86d,0x86d,0x86d,0x86d, +0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,0x86d, +0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,0x86d,0x1ab2,0x936,0x936,0x936,0x936, +0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x870,0x870,0x870, +0x870,0x870,0x870,0xd9e,0xd9e,0xd9e,0xd9e,0x939,0x939,0x939,0x939,0x939,0x870,0x870,0x870,0x870, +0x870,0x870,0x870,0x870,0x870,0x870,0x870,0x870,0x870,0x870,0x870,0x870,0x870,0x870,0x870,0x870, +0x870,0x870,0x870,0x870,0x870,0x870,0x870,0x870,0x870,0x870,0x870,0x870,0x870,0x870,0xd9e,0xd9e, +0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x873, +0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x873,0x873, +0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x876,0x876,0x876,0x876,0x876,0x876,0x876,0x876, +0x876,0x876,0x876,0x876,0x876,0x876,0x876,0x876,0x876,0x876,0x876,0x876,0x876,0x876,0x876,0x876, +0x876,0x876,0x876,0x876,0x876,0x876,0x876,0x876,0x876,0x876,0xecd,0xecd,0xecd,0xecd,0xecd,0xecd, +0xecd,0xecd,0xecd,0xecd,0xecd,0xecd,0xecd,0xecd,0xecd,0xecd,0xecd,0xecd,0xecd,0xecd,0xecd,0xecd, +0x113d,0x113d,0x113d,0x113d,0x879,0x879,0x879,0x879,0x879,0x879,0x879,0x879,0x879,0x879,0x879,0x879, +0x879,0x879,0x879,0x879,0x879,0x879,0x879,0x879,0x879,0x879,0x879,0x879,0x879,0x879,0x879,0x879, +0x879,0x879,0x879,0x879,0x879,0x879,0x87c,0x87c,0x879,0x87c,0x879,0x87c,0x87c,0x879,0x879,0x879, +0x879,0x879,0x879,0x879,0x879,0x879,0x879,0x87c,0x879,0x87c,0x879,0x87c,0x87c,0x879,0x879,0x87c, +0x87c,0x87c,0x879,0x879,0x879,0x879,0x14e5,0x14e5,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f, +0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d, +0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d, +0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x92d,0x131d,0x131d,0x131d,0x131d,0x12bd,0x12bd,0x12bd,0x12bd, +0x12bd,0x12bd,0x12bd,0x12bd,0xd95,0xc99,0xc99,0xc99,0xc99,0xc99,0xc99,0xc99,0xc99,0xc99,0xc99,0xc99, +0xc99,0xc99,0xc99,0xc99,0x930,0x930,0x930,0x930,0x930,0x930,0x930,0x930,0x930,0x930,0x930,0x930, +0x930,0x930,0x930,0x930,0x930,0x930,0x930,0x930,0x930,0x930,0x930,0x933,0x930,0x933,0x930,0x930, +0x930,0x930,0x930,0x930,0x930,0x930,0x930,0x930,0x930,0x930,0x930,0x930,0x930,0x930,0x930,0x930, +0x930,0xc99,0xc99,0xc99,0xc99,0xc99,0xc99,0xc99,0xc99,0xc99,0xc99,0xc99,0xc99,0xc99,0xc99,0xc99, +0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936, +0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0x936,0xd9e, +0x9b7,0x999,0x999,0x999,0x999,0x993,0x999,0x999,0x9ab,0x999,0x999,0x996,0x9a2,0x9a8,0x9a8,0x9a8, +0x9a8,0x9a8,0x9ab,0x993,0x99f,0x993,0x993,0x993,0x98a,0x98a,0x993,0x993,0x993,0x993,0x993,0x993, +0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x9ae,0x993,0x993,0x993,0x993,0x993,0x993, +0x993,0x993,0x993,0x993,0x996,0x98a,0x993,0x98a,0x993,0x98a,0x9a5,0x99c,0x9a5,0x99c,0x9b4,0x9b4, +0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3, +0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3,0x9c3, +0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6, +0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6,0x9c6, +0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,0x9c9, +0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,0x9c9,0x9c9, +0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2, +0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9cc,0x9cc, +0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5, +0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9cf,0x9cf, +0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2, +0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2,0x9d2, +0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5, +0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5,0x9d5, +0x9d8,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db, +0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9d8,0x9db,0x9db,0x9db, +0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db, +0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0x9db,0xa68,0xa68,0xfff,0xa68,0xa68,0xa68,0xa6b,0xa68, +0xfff,0xa68,0xa68,0xff6,0xa62,0xa56,0xa56,0xa56,0xa56,0xa65,0xa56,0xfe7,0xfe7,0xfe7,0xa56,0xa59, +0xa62,0xa5c,0xfed,0xff9,0xff9,0xfe7,0xfe7,0xfff,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61,0xb61, +0xb61,0xb61,0xa6e,0xa6e,0xa5f,0xa5f,0xa5f,0xa5f,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa65,0xa65, +0xa56,0xa56,0xfff,0xfff,0xfff,0xfff,0xfe7,0xfe7,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68, +0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68, +0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa68,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xdf2, +0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d, +0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d, +0xa7d,0xa7d,0xa7d,0xdf2,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d,0xa7d, +0xa7d,0xa7d,0xa7d,0xa7d,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83, +0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83,0xa83, +0xa83,0xa83,0xa83,0xa83,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89, +0xa89,0xa86,0xa8c,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0x1176,0x1176,0x1176,0x1176,0x1176, +0x1176,0x1176,0x1176,0x1176,0x1173,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89, +0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89, +0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa89,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e, +0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e, +0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,0xa9e,0xac2,0xac2,0xac2,0xac5,0xac5,0xac2,0xac2,0xac2, +0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,0xaaa,0xaaa,0xabf,0xaa1, +0xaa1,0xaa1,0xaa1,0xaa1,0xaa1,0xaa1,0xabf,0xabf,0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,0xac2, +0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,0xac2, +0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,0xac2,0xae3,0xae3,0xae3,0xae3,0xae3,0xace,0xace,0xae3, +0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3, +0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3, +0xae3,0xae3,0xae3,0xae6,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3, +0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3,0xae3, +0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb10,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d, +0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xb0d,0xc06,0xc06,0xc06,0xc06,0xc06, +0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c, +0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c,0xb1c, +0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e, +0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e,0xb2e, +0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34, +0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34,0xb34, +0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40, +0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0xb40,0x13ec,0x13ec,0x13ec,0x1b2d,0x1b2d,0x1b2d,0x1b2d,0x1b2d, +0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43, +0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43,0xb43, +0xb43,0xb43,0x1b30,0x1b30,0x1b30,0x1b30,0x1b30,0x1b30,0x1b30,0x1b30,0x1b30,0x1b30,0xb46,0xb46,0xb46,0xb46, +0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46, +0xb46,0xb49,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46, +0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46,0xb46, +0xb46,0xb46,0xb46,0xb46,0xb4c,0xb4c,0xc9c,0xc9c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c, +0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xc9c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c,0xb4c, +0xb4c,0xb4c,0xb4c,0xb4c,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70, +0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70,0xb70, +0xb70,0xb70,0xb70,0x1530,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xd26,0xd26,0xb76,0xb76,0xb76,0xb76, +0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xb76, +0xb76,0xb76,0xb76,0xb76,0xb76,0xb76,0xd23,0xd23,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74, +0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79, +0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79, +0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb79,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c, +0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c, +0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb7c,0xb8b,0xb8b,0xb8b,0xb8b,0xb8b,0xb82,0xb8e,0xb94, +0xb94,0xb94,0xb88,0xb88,0xb88,0xb91,0xb85,0xb85,0xb85,0xb85,0xb85,0xb7f,0xb7f,0xb7f,0xb7f,0xb7f, +0xb7f,0xb7f,0xb7f,0xb94,0xb94,0xb94,0xb94,0xb94,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88, +0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88, +0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb8b,0xb8b,0xb94,0xb94,0xb94,0xb88, +0xb88,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb94,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88, +0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb94,0xb94, +0xb94,0xb94,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb8b, +0xb8b,0xb8b,0xb8b,0xb8b,0xb8b,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88, +0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88,0xb88, +0xb88,0xb88,0x173d,0x173d,0xba0,0xb97,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d, +0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb97, +0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0, +0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xb97, +0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d, +0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb97,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xba0,0xba0,0xba0,0xba0, +0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0, +0xba0,0xb97,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d, +0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a, +0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a, +0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xb9a,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0, +0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0, +0xba0,0xba0,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d, +0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0, +0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0,0xba0, +0xba0,0xba0,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d, +0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xb9d,0xba0,0xba0,0xba0,0xba0, +0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3, +0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3,0xba3, +0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9, +0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9,0xba9, +0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac, +0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac,0xbac, +0xc06,0xc06,0xc06,0xc06,0xc06,0xc06,0xc06,0xc06,0xc06,0xc06,0xc06,0xc06,0xc06,0xc06,0xc06,0xc06, +0xc06,0xc06,0xc06,0xc06,0xc06,0xc06,0xc03,0xc06,0xc03,0xc03,0xc03,0xc03,0xc03,0xc03,0xc03,0xc03, +0xc03,0xc03,0xc03,0xc03,0xc03,0xc03,0xc03,0xd14,0xd17,0xe0a,0xe0a,0xe0a,0xe0a,0xe0a,0xe0a,0xe0a, +0xe0a,0xe0a,0xe0a,0xe0a,0xf24,0xf24,0xf24,0xf24,0xc18,0xc18,0xc18,0xc18,0xc18,0xc18,0xc12,0xc12, +0xc12,0xc12,0xd1a,0xd1a,0xd1a,0xd1a,0xd1a,0xd1a,0xd1d,0xd1d,0xe10,0xec4,0xe10,0xe10,0xe10,0xe10, +0xe0d,0xe10,0xe0d,0xe10,0xe10,0x1014,0x12ae,0x12ae,0xe19,0xe19,0xe19,0xe19,0xe19,0xe1f,0xe1c,0xf36, +0xf36,0xf36,0xf36,0x142e,0x1026,0x142e,0x1374,0x1374,0xc4e,0xc4e,0xc4e,0xc4e,0xc4e,0xc4e,0xc4e,0xc4e, +0xc4e,0xc4e,0xc4e,0xc4e,0xc4e,0xc4e,0xc4e,0xc4e,0xc4e,0xc4e,0xc81,0xc7e,0xc81,0xc7e,0xc81,0xc7e, +0x1137,0x1134,0x102c,0x1029,0xc51,0xc51,0xc51,0xc51,0xc51,0xc51,0xc51,0xc51,0xc51,0xc51,0xc51,0xc51, +0xc51,0xc51,0xc51,0xc51,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54, +0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54, +0xc54,0xc54,0xc54,0xc54,0xc57,0xc57,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54,0xc54, +0xc5a,0xc5a,0xc5a,0xc60,0xc5d,0xc87,0xc84,0xc60,0xc5d,0xc60,0xc5d,0xc60,0xc5d,0xc60,0xc5d,0xc60, +0xc5d,0xc60,0xc5d,0xc60,0xc5d,0xc60,0xc5d,0xc60,0xc5d,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a, +0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a, +0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc60,0xc5d,0xc60,0xc5d, +0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a, +0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc5a,0xc60,0xc5d,0xc5a,0xc5a, +0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc69,0xc63,0xc63,0xc63, +0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63, +0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63, +0xc69,0xc69,0xc69,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63, +0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63,0xc63, +0xc66,0xc63,0xc63,0xc63,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f, +0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f,0xc9f, +0xc9f,0xc9f,0xc9f,0xc9f,0xd20,0xd8f,0xe0d,0xe0d,0xe0d,0xe0d,0xe0d,0xe10,0xe0d,0xe0d,0xec4,0xec4, +0xe0d,0xe0d,0xe0d,0xe0d,0xe10,0xe10,0xf27,0x1014,0x1014,0x1014,0x1014,0x1014,0x1014,0x1014,0x1014,0x1014, +0x1014,0x12db,0x12db,0x12b1,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44, +0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44,0xd44, +0xd44,0xd44,0xd44,0xd44,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd4a,0xd4a,0xd4a,0xd4a,0xd4a,0xd47, +0xd5c,0xd5c,0xd5c,0xd56,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd56, +0xd5c,0xd5c,0xd5c,0xd5c,0xd50,0xd50,0xd59,0xd59,0xd59,0xd59,0xd4d,0xd4d,0xd4d,0xd4d,0xd4d,0xd53, +0xe25,0xe25,0xe25,0xe25,0xe25,0xe25,0xe25,0xe25,0xe25,0xe25,0xe25,0xe25,0xe22,0xe25,0xe25,0xe25, +0xe25,0xe25,0xe25,0xe25,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c, +0xd5c,0xd5c,0xd56,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c,0xd5c, +0xd5c,0xd50,0xd50,0xd50,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53, +0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53,0xd53, +0xd53,0xd53,0xd53,0xd53,0xd5f,0xd5f,0xd5f,0xd5f,0xd5f,0xd62,0xd62,0xd62,0xd5f,0xd5f,0xd5f,0xd5f, +0xd5f,0xd5f,0xe28,0xe28,0xe28,0xe28,0xe28,0xe28,0xf39,0xf39,0xf39,0xf39,0xf39,0xf39,0xf39,0x1140, +0x1140,0x102f,0x102f,0x102f,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65, +0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65,0xd65, +0xd65,0xd65,0xd65,0xd65,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b, +0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b,0xd6b, +0xd6b,0xd6b,0xd6b,0xd6b,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74, +0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74,0xd74, +0xd74,0xd74,0xd74,0xd74,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80, +0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80,0xd80, +0xd80,0xd80,0xd80,0xd80,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c, +0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c,0xd8c, +0xd8c,0xd8c,0xd8c,0xd8c,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e, +0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e,0xe2e, +0xe2e,0xe2e,0xe2e,0xe2e,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34, +0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe31,0xe31,0xe31,0xe31,0xe31,0xe31,0xe31, +0xe31,0xe31,0xe31,0xe31,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34, +0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34,0xe34, +0xe34,0xe34,0xe34,0xe34,0xef4,0xef4,0xe46,0xe46,0xf3c,0xf3c,0xf3c,0xf3c,0xf3c,0xf3c,0xf3c,0x103b, +0x103b,0x103b,0x103b,0x103b,0x1038,0x1038,0x1038,0x1038,0x1038,0x1038,0x1038,0x1038,0x1038,0x1038,0x1038,0x1038, +0x1038,0x1038,0x1038,0x1038,0xe55,0xe52,0xe55,0xe52,0xe55,0xe52,0xe55,0xe52,0xe55,0xe52,0xe55,0xe52, +0xe55,0xe52,0xe55,0xe52,0xe55,0xe52,0xe55,0xe52,0xe55,0xe52,0xe55,0xe52,0xe55,0xe52,0xe55,0xe52, +0xe55,0xe52,0xe55,0xe52,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61, +0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61,0xe61, +0xe61,0xe61,0xe61,0xe61,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67, +0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67,0xe67, +0xe67,0xe67,0xe67,0xe67,0xeee,0xeee,0xeee,0xeee,0xeee,0xeee,0xeee,0xeee,0xe7f,0xe7f,0xe7f,0xe7f, +0xe7f,0xe7f,0xe7f,0xe7f,0xe7f,0xe7f,0xe7f,0xe7f,0xe7f,0xe7f,0xe7f,0xf3f,0xf3f,0xf3f,0xf3f,0x103e, +0x103e,0x103e,0x103e,0x103e,0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,0xe88, +0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,0xe88,0xe88, +0xe88,0xe88,0xe88,0xe88,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91, +0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91,0xe91, +0xe91,0xe91,0xe91,0xe91,0xe9a,0xe9a,0xe9a,0xe9a,0xe9a,0xe9a,0xe9a,0xe9a,0xe9a,0xe9a,0xe9a,0xe9a, +0xe9a,0xe9a,0xe9a,0xe9a,0xe9a,0xe9a,0xe9a,0xe9a,0xe9a,0xe9a,0xe9a,0xe9a,0xe9a,0xe9a,0xe9a,0xe9a, +0xe9a,0xe9a,0xe9a,0xe94,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97, +0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe97,0xe9a, +0xe9a,0xe9a,0xe9a,0xe9a,0xea3,0xea3,0xea3,0xea3,0xea3,0xea3,0xea3,0xea3,0xea3,0xea3,0xea3,0xea3, +0xea3,0xea3,0xea0,0xea0,0xea0,0xea0,0xea0,0xea0,0xea0,0xea0,0xe9d,0xea6,0x104a,0x1044,0x1053,0x1041, +0xea3,0xea3,0x1041,0x1041,0xeb5,0xeb5,0xea9,0xeb5,0xeb5,0xeb5,0xeac,0xeb5,0xeb5,0xeb5,0xeb5,0xea9, +0xeb5,0xeb5,0xeb5,0xeb5,0xeb5,0xeb5,0xeb5,0xeb5,0xeb5,0xeb5,0xeb5,0xeb5,0xeb5,0xeb5,0xeb5,0xeb5, +0xeb5,0xeb5,0xeb5,0xeb5,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8, +0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8,0xeb8, +0xeb8,0xeb8,0xeb8,0xeb8,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0, +0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0,0xed0, +0xed0,0xed0,0xed0,0xed0,0xef1,0xef1,0xef1,0xef1,0xef1,0xef1,0xef1,0xef1,0xef1,0xef1,0xef1,0xef1, +0xef1,0xef1,0xef1,0xef1,0x1149,0x1149,0x1149,0x1149,0x1149,0x1149,0x1149,0x1149,0x1149,0x1149,0x1149,0x1149, +0x1149,0x1149,0x1149,0x1149,0xf24,0xf24,0xf24,0xf21,0xf21,0xf21,0xf21,0xf21,0x1182,0x13dd,0x13dd,0x13dd, +0x13dd,0x135f,0x135f,0x135f,0x13e0,0x1362,0x1362,0x13e0,0x1524,0x1524,0x1524,0x1524,0x1527,0x1527,0x1527,0x17ee, +0x17ee,0x17ee,0x17ee,0x18b4,0xf39,0xf39,0xf39,0xf39,0x102f,0x102f,0x102f,0x102f,0x102f,0x102f,0x102f,0x102f, +0x102f,0x102f,0x102f,0x102f,0x1032,0x1032,0x1032,0x1032,0x1032,0x1032,0x1032,0x1032,0x1032,0x1032,0x1032,0x1032, +0x1032,0x1032,0x1032,0x1032,0xf5a,0xf5a,0xf5a,0xf5a,0xf6c,0xf75,0xf78,0xf75,0xf78,0xf75,0xf78,0xf75, +0xf78,0xf75,0xf78,0xf75,0xf75,0xf75,0xf78,0xf75,0xf75,0xf75,0xf75,0xf75,0xf75,0xf75,0xf75,0xf75, +0xf75,0xf75,0xf75,0xf75,0xf75,0xf75,0xf75,0xf75,0xf75,0xf75,0xf75,0xf75,0xf5d,0xf5a,0xf5a,0xf5a, +0xf5a,0xf5a,0xf5a,0xf6f,0xf5a,0xf6f,0xf6c,0xf6c,0xf81,0xf7e,0xf81,0xf81,0xf81,0xf7e,0xf7e,0xf81, +0xf7e,0xf81,0xf7e,0xf81,0xf7e,0x1065,0x1065,0x1065,0x11a0,0x105c,0x1065,0x105c,0xf7e,0xf81,0xf7e,0xf7e, +0x105c,0x105c,0x105c,0x105c,0x105f,0x1062,0x11a0,0x11a0,0xf84,0xf84,0x1077,0x106e,0x1077,0x106e,0x1077,0x106e, +0x1077,0x106e,0x1077,0x106e,0x1077,0x106e,0x1077,0x106e,0x106e,0x106e,0x1077,0x106e,0x1077,0x106e,0x1077,0x106e, +0x1077,0x106e,0x1077,0x106e,0x1077,0x106e,0x1077,0x106e,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a, +0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a, +0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf8a,0xf99,0xf99,0xf99,0xf99,0xf99,0xf99,0xf99,0xf99, +0xf99,0xf99,0xf99,0xf99,0xf99,0xf99,0xf99,0xf99,0xf99,0xf99,0xf99,0xf99,0xf99,0xf99,0xf99,0xf99, +0xf99,0xf99,0xf99,0xf99,0xf99,0xf99,0xf99,0xf99,0xf99,0xf99,0xf99,0x1563,0x1563,0x1563,0x1563,0x1563, +0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0x1563,0xf9f,0xf9f,0xf9f,0xf9f, +0xf9f,0xf9f,0xf9f,0xf9f,0xf9f,0xf9f,0xf9f,0xf9f,0xf9f,0xf9f,0xf9f,0xf9f,0xf9f,0xf9f,0xf9f,0xf9f, +0xf9f,0xf9f,0xf9f,0xf9f,0xf9f,0xf9f,0xf9f,0xf9f,0xf9f,0xf9f,0xf9f,0xf9f,0xfe7,0xfff,0xff6,0xffc, +0xffc,0xfff,0xfff,0xff6,0xff6,0xffc,0xffc,0xffc,0xffc,0xffc,0xfff,0xfff,0xfff,0xfe7,0xfe7,0xfe7, +0xfe7,0xfff,0xfff,0xfff,0xfff,0xfff,0xfff,0xfff,0xfff,0xfff,0xfff,0xfff,0xfff,0xfff,0xfe7,0xff6, +0xff9,0xfe7,0xfe7,0xffc,0xffc,0xffc,0xffc,0xffc,0xffc,0xfea,0xfff,0xffc,0xff3,0xff3,0xff3,0xff3, +0xff3,0xff3,0xff3,0xff3,0xff3,0xff3,0x116a,0x116a,0x1167,0x1164,0xff0,0xff0,0x1017,0x1017,0x1017,0x1017, +0x12db,0x12db,0x12b1,0x12b1,0x12b7,0x12ae,0x12ae,0x12ae,0x12ae,0x12b1,0x13e3,0x12b7,0x12b1,0x12b7,0x12ae,0x12b7, +0x12db,0x12ae,0x12ae,0x12ae,0x12b1,0x12b1,0x12ae,0x12ae,0x12b1,0x12ae,0x12ae,0x12b1,0x1032,0x1032,0x1032,0x1032, +0x1032,0x102f,0x102f,0x1032,0x1032,0x1032,0x1032,0x1032,0x1032,0x153c,0x153c,0x153c,0x1140,0x102f,0x102f,0x102f, +0x102f,0x12e7,0x12c0,0x12c0,0x12c0,0x12c0,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x1050,0x1050,0x104d,0x1047, +0x104d,0x1047,0x104d,0x1047,0x104d,0x1047,0x1044,0x1044,0x1044,0x1044,0x1059,0x1056,0x1044,0x119d,0x143a,0x143d, +0x143d,0x143a,0x143a,0x143a,0x143a,0x143a,0x1440,0x1440,0x1557,0x154b,0x154b,0x1548,0x1077,0x106e,0x1077,0x106e, +0x1077,0x106e,0x1077,0x106e,0x106b,0x1068,0x1068,0x1077,0x106e,0x1383,0x1380,0x1746,0x1383,0x1380,0x1449,0x1446, +0x155a,0x155a,0x1560,0x155a,0x1560,0x155a,0x1560,0x155a,0x1560,0x155a,0x1560,0x155a,0x1077,0x106e,0x1077,0x106e, +0x1077,0x106e,0x1077,0x106e,0x1077,0x106e,0x1077,0x106e,0x1077,0x106e,0x1077,0x106e,0x1077,0x106e,0x1077,0x106e, +0x1077,0x106e,0x1077,0x106e,0x1077,0x106e,0x1077,0x106e,0x1077,0x106e,0x1077,0x106e,0x1071,0x106e,0x106e,0x106e, +0x106e,0x106e,0x106e,0x106e,0x106e,0x1077,0x106e,0x1077,0x106e,0x1077,0x1077,0x106e,0x107a,0x107a,0x1080,0x1086, +0x1086,0x1086,0x1086,0x1086,0x1086,0x1086,0x1086,0x1086,0x1086,0x1086,0x1086,0x1086,0x1086,0x1086,0x1086,0x1086, +0x1086,0x1086,0x1086,0x1086,0x1086,0x1086,0x1086,0x1086,0x1086,0x1086,0x1086,0x1086,0x1086,0x1080,0x107a,0x107a, +0x107a,0x107a,0x1080,0x1080,0x107a,0x107a,0x1083,0x1452,0x144f,0x144f,0x1086,0x1086,0x107d,0x107d,0x107d,0x107d, +0x107d,0x107d,0x107d,0x107d,0x107d,0x107d,0x1455,0x1455,0x1455,0x1455,0x1455,0x1455,0x109b,0x109b,0x109b,0x109b, +0x109b,0x109b,0x109b,0x109b,0x109b,0x109b,0x109b,0x109b,0x109b,0x109b,0x109b,0x109b,0x109b,0x109b,0x109b,0x109b, +0x109b,0x109b,0x109b,0x109b,0x109b,0x109b,0x109b,0x109b,0x109b,0x109b,0x109b,0x109b,0x10a4,0x10a4,0x10a4,0x10a4, +0x10a4,0x10a4,0x10a4,0x10a4,0x10a4,0x10a4,0x10a4,0x10a4,0x10a4,0x10a4,0x10a4,0x10a4,0x10a4,0x10a4,0x10a4,0x10a4, +0x10a4,0x10a4,0x10a4,0x10a4,0x10a7,0x10a7,0x10a7,0x10aa,0x10a7,0x10a7,0x10ad,0x10ad,0x10b0,0x10b0,0x10b0,0x10b0, +0x10b0,0x10b0,0x10b0,0x10b0,0x10b0,0x10b0,0x10b0,0x10b0,0x10b0,0x10b0,0x10b0,0x10b0,0x10b0,0x10b0,0x10b0,0x10b0, +0x10b0,0x10b0,0x10b0,0x10b0,0x10b0,0x10b0,0x10b0,0x10b0,0x10b0,0x10b0,0x10b0,0x10b0,0x10b9,0x10b9,0x10b9,0x10b9, +0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10bc,0x10b3,0x10c2,0x10bf,0x10b9,0x10b9,0x10b9,0x10b9, +0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9, +0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x10b9,0x1389,0x1386,0x10d4,0x10ce, +0x10d4,0x10ce,0x10d4,0x10ce,0x10d4,0x10ce,0x10d4,0x10ce,0x10d4,0x10ce,0x10d1,0x1152,0x10c5,0x10c5,0x10c5,0x10cb, +0x1458,0x1458,0x1458,0x1458,0x1458,0x1458,0x1458,0x1458,0x10c8,0x10c8,0x10cb,0x10d7,0x10d4,0x10ce,0x10d4,0x10ce, +0x10d4,0x10ce,0x10d4,0x10ce,0x10d4,0x10ce,0x10d4,0x10ce,0x10d4,0x10ce,0x10d4,0x10ce,0x10d4,0x10ce,0x10d4,0x10ce, +0x10d4,0x10ce,0x10d4,0x10ce,0x10d4,0x10ce,0x10d4,0x10ce,0x10d4,0x10ce,0x10d4,0x10ce,0x156f,0x156c,0x156f,0x156c, +0x1572,0x1572,0x174f,0x1458,0x10e0,0x10e0,0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,0x10e3, +0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,0x10e3,0x10e3, +0x10e3,0x10e3,0x10e3,0x10e3,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0,0x10e0, +0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10e9,0x10e9,0x10e9,0x10e9,0x10e9,0x10ec,0x10ec,0x10ec,0x1146,0x10f5, +0x1104,0x1104,0x1104,0x1104,0x1104,0x1104,0x1104,0x1104,0x1104,0x1104,0x1104,0x1104,0x1104,0x1104,0x1104,0x1104, +0x10ef,0x10ef,0x10ef,0x10ef,0x10ef,0x10ef,0x10ef,0x10ef,0x10ef,0x10ef,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2, +0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2,0x10f2, +0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113, +0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113,0x1113, +0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125, +0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125,0x1125, +0x112e,0x112e,0x112e,0x112e,0x1143,0x112e,0x112e,0x112e,0x112e,0x112e,0x112e,0x112e,0x112e,0x112e,0x112e,0x112e, +0x112e,0x112e,0x112e,0x112e,0x112e,0x112e,0x112e,0x112e,0x112e,0x112e,0x112e,0x112e,0x112e,0x112e,0x112e,0x112e, +0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131, +0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131,0x1131, +0x113d,0x113d,0x113d,0x113d,0x12e1,0x12e1,0x12e1,0x12e1,0x12e1,0x12e1,0x12e1,0x12e1,0x14e2,0x17cd,0x17cd,0x17cd, +0x17cd,0x17cd,0x17cd,0x17cd,0x17cd,0x17cd,0x191d,0x191d,0x191d,0x191d,0x191d,0x191d,0x191d,0x191d,0x191d,0x191d, +0x11b2,0x11b2,0x11b2,0x11b2,0x11b2,0x11b2,0x11b2,0x11b2,0x11b2,0x11b2,0x11b2,0x11b2,0x11b2,0x11b2,0x11b2,0x11b2, +0x11b2,0x11b2,0x11b2,0x11b2,0x11b2,0x11b2,0x11a9,0x11a9,0x11ac,0x11ac,0x11b2,0x11a9,0x11a9,0x11a9,0x11a9,0x11a9, +0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8, +0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8,0x11b8, +0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3, +0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3,0x11d3, +0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df, +0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11df,0x11dc,0x11e2, +0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee, +0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee,0x11ee, +0x11f4,0x11f4,0x11f4,0x11f4,0x11f4,0x11f4,0x11f4,0x11f4,0x11f4,0x11f4,0x11f4,0x11f4,0x11f4,0x11f4,0x11f4,0x11f4, +0x11f4,0x132f,0x11fa,0x1332,0x11fa,0x11fa,0x11fa,0x11fa,0x11f7,0x11f7,0x11f7,0x11fa,0x1752,0x1755,0x197a,0x1977, +0x11fd,0x11fd,0x11fd,0x120c,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212, +0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212,0x1212, +0x1212,0x1212,0x1212,0x1200,0x120c,0x120c,0x11fd,0x11fd,0x11fd,0x11fd,0x120c,0x120c,0x11fd,0x11fd,0x120c,0x120c, +0x121e,0x121e,0x121e,0x121e,0x121e,0x121e,0x121e,0x121e,0x121e,0x121e,0x121e,0x121e,0x121e,0x121e,0x121e,0x121e, +0x1221,0x121e,0x121e,0x121e,0x121e,0x121e,0x121e,0x1218,0x1218,0x1218,0x121e,0x121b,0x1578,0x157b,0x157e,0x157e, +0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230, +0x1224,0x1230,0x1224,0x1224,0x1224,0x1239,0x1239,0x1224,0x1224,0x1239,0x1230,0x1239,0x1239,0x1230,0x1224,0x1227, +0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230, +0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230,0x1230, +0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b, +0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b,0x124b, +0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263, +0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1263,0x1260,0x1260,0x1260, +0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c, +0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c,0x126c, +0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b, +0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b,0x127b, +0x1281,0x1281,0x1290,0x1293,0x1293,0x1293,0x1293,0x1293,0x1293,0x1293,0x1293,0x1293,0x1293,0x1293,0x1293,0x1293, +0x1293,0x1293,0x1293,0x1293,0x1293,0x1293,0x1293,0x1293,0x1293,0x1293,0x1296,0x1293,0x1296,0x1293,0x1293,0x1293, +0x1293,0x1293,0x1293,0x1293,0x1293,0x1293,0x1293,0x1293,0x1293,0x1293,0x1293,0x1296,0x1293,0x1293,0x1293,0x1293, +0x1290,0x1290,0x1290,0x1284,0x1284,0x1284,0x1284,0x1290,0x1290,0x128a,0x1287,0x128d,0x128d,0x129c,0x1299,0x1299, +0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f, +0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f, +0x12a5,0x12a5,0x12a5,0x12a2,0x12a2,0x12a2,0x129f,0x129f,0x129f,0x129f,0x12a2,0x129f,0x129f,0x129f,0x12a5,0x12a2, +0x12a5,0x12a2,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f, +0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x129f,0x12a5,0x12a2,0x12a2, +0x129f,0x129f,0x129f,0x129f,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c9, +0x12c9,0x12c9,0x12a8,0x1980,0x13d7,0x12d2,0x13d7,0x13d7,0x13d7,0x13d7,0x13d7,0x13d7,0x13d7,0x13d7,0x13d7,0x13d7, +0x13d7,0x12d2,0x13d7,0x12d2,0x12b1,0x12b1,0x1365,0x12ae,0x1365,0x1365,0x1365,0x1365,0x12ae,0x12b4,0x12db,0x12ae, +0x12ae,0x12ae,0x12ae,0x12ae,0x12b4,0x12b7,0x12db,0x12db,0x12b7,0x12db,0x12ae,0x12b7,0x12b7,0x12ba,0x12db,0x12ae, +0x12ae,0x12db,0x12b1,0x12b1,0x13d4,0x13d4,0x13d4,0x13d4,0x13d4,0x13d4,0x13d4,0x13d4,0x13d4,0x13d4,0x12c3,0x12c3, +0x12c3,0x12c3,0x13ef,0x13ce,0x12cc,0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x13ef,0x187e, +0x187e,0x187e,0x187e,0x187e,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x1581, +0x1581,0x1ad0,0x1ad0,0x1ad0,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6,0x12c6, +0x12c6,0x12c6,0x12c6,0x12c6,0x13d7,0x13d7,0x12d2,0x13d7,0x13d7,0x13d7,0x12d2,0x13d7,0x13d7,0x13d7,0x12cc,0x12cc, +0x12cc,0x12cc,0x12cc,0x13d1,0x13d4,0x13d4,0x13d4,0x13d4,0x13d4,0x13d4,0x13d4,0x12cf,0x13d4,0x13d4,0x13d4,0x13d4, +0x13d4,0x13d4,0x13d4,0x12cf,0x13d4,0x13d4,0x13d4,0x13d4,0x13d4,0x13d4,0x13d4,0x13d4,0x13d4,0x13d4,0x145b,0x145b, +0x1a2e,0x1ad0,0x1ad0,0x1ad0,0x13da,0x13da,0x13d4,0x13d4,0x13d4,0x13d4,0x13d4,0x13d4,0x13d4,0x12cf,0x13d4,0x12cf, +0x12cf,0x13d4,0x13da,0x12d5,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9, +0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9,0x12f9, +0x12f9,0x12f9,0x12f9,0x12f9,0x1383,0x1380,0x1383,0x1380,0x1383,0x1380,0x1383,0x1380,0x1383,0x1380,0x1449,0x1560, +0x1560,0x1560,0x17fa,0x196e,0x1560,0x1560,0x1749,0x1749,0x1749,0x1743,0x1749,0x1743,0x1971,0x196e,0x1a2b,0x1a28, +0x1a2b,0x1a28,0x1a2b,0x1a28,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7, +0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7,0x13a7, +0x13a7,0x13a7,0x13a7,0x13a7,0x13bc,0x13ad,0x13bc,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf, +0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf,0x13bf, +0x13bf,0x13bf,0x13bf,0x13bf,0x13ad,0x13ad,0x13ad,0x13ad,0x13ad,0x13ad,0x13ad,0x13ad,0x13c5,0x13c5,0x13c5,0x13c5, +0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5, +0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13c5,0x13cb,0x13cb,0x13cb,0x13cb, +0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb, +0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13cb,0x13fb,0x13f8,0x1923,0x1923, +0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923, +0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1404,0x1404,0x1404,0x1404, +0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404, +0x1404,0x1401,0x1401,0x1404,0x1404,0x1404,0x1404,0x1404,0x1401,0x1404,0x1404,0x1404,0x1401,0x1404,0x1401,0x1404, +0x1401,0x1404,0x1404,0x1404,0x1404,0x1404,0x1407,0x1404,0x1404,0x1404,0x1404,0x1401,0x1404,0x1401,0x1401,0x1404, +0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1401,0x1401,0x1401,0x1401, +0x1401,0x1401,0x1401,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404, +0x1404,0x1404,0x1404,0x1401,0x1401,0x1401,0x1401,0x1401,0x1401,0x1401,0x1401,0x1401,0x1401,0x1404,0x1404,0x1404, +0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1401,0x1401,0x1401,0x1401,0x1401,0x1401, +0x1401,0x1401,0x1401,0x1401,0x1401,0x1401,0x158a,0x158a,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404, +0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404, +0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1596,0x1590,0x1590,0x1596,0x1596,0x1596,0x1596, +0x1596,0x1596,0x1596,0x1596,0x1596,0x17d0,0x17d0,0x17d0,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1596,0x1404, +0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404, +0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1596,0x17d0,0x17d0, +0x1404,0x1404,0x1404,0x1404,0x1404,0x1407,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404, +0x1404,0x1404,0x1404,0x1404,0x1590,0x1590,0x1596,0x1596,0x1590,0x1596,0x1596,0x1596,0x158d,0x158d,0x1596,0x1596, +0x1404,0x1404,0x1407,0x1407,0x1407,0x1701,0x1404,0x1407,0x1404,0x1404,0x1407,0x1599,0x1599,0x1596,0x1596,0x17d0, +0x17d0,0x17d0,0x17d0,0x17d0,0x1596,0x1596,0x1596,0x1596,0x1596,0x1596,0x1596,0x1596,0x1596,0x1596,0x1596,0x1596, +0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404, +0x1404,0x1590,0x1590,0x1596,0x1701,0x1596,0x1590,0x1596,0x17d0,0x17d0,0x17d0,0x17d3,0x17d3,0x17d3,0x17d3,0x17d3, +0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404, +0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1596, +0x1404,0x1596,0x1407,0x1407,0x1404,0x1404,0x1407,0x1407,0x1407,0x1407,0x1407,0x1407,0x1407,0x1407,0x1407,0x1407, +0x1407,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404, +0x1404,0x1404,0x1407,0x1407,0x1407,0x1407,0x1407,0x1407,0x1407,0x1407,0x1407,0x1407,0x1407,0x1407,0x1407,0x1407, +0x1407,0x1407,0x1407,0x1407,0x1407,0x1404,0x1404,0x1404,0x1407,0x1404,0x1404,0x1404,0x1404,0x1407,0x1407,0x1407, +0x1404,0x1407,0x1407,0x1407,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1407,0x1404,0x1407,0x1404,0x1404, +0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404, +0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1701,0x1404,0x1404,0x1404,0x1404,0x1596,0x1590,0x17d0, +0x145e,0x145e,0x145e,0x145e,0x158a,0x158a,0x158d,0x158d,0x158d,0x1593,0x1596,0x17d0,0x17d0,0x17d0,0x17d0,0x1758, +0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404,0x1404, +0x1590,0x1590,0x1590,0x1590,0x1590,0x1590,0x1590,0x1596,0x1596,0x1590,0x1590,0x1596,0x1599,0x1599,0x1596,0x1596, +0x1596,0x1596,0x1887,0x1590,0x1590,0x1590,0x1590,0x1590,0x1590,0x1596,0x1590,0x1596,0x1590,0x1590,0x1590,0x1590, +0x1596,0x1590,0x1590,0x1590,0x1590,0x1590,0x1590,0x1596,0x1590,0x1590,0x1590,0x1596,0x158d,0x158d,0x158d,0x158d, +0x158d,0x158d,0x1596,0x1404,0x1404,0x1404,0x1404,0x1404,0x14e8,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a, +0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x14e8,0x140a,0x140a,0x140a,0x14e8,0x140a,0x14e8, +0x140a,0x14e8,0x140a,0x14e8,0x140a,0x140a,0x140a,0x14e8,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x14e8,0x14e8, +0x140a,0x140a,0x140a,0x140a,0x14e8,0x140a,0x14e8,0x14e8,0x140a,0x140a,0x140a,0x140a,0x14e8,0x140a,0x140a,0x140a, +0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x140a,0x1707,0x1707,0x17d6,0x17d6,0x140d,0x140d,0x140d, +0x140a,0x140a,0x140a,0x140d,0x140d,0x140d,0x140d,0x140d,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686, +0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410, +0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410, +0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1413,0x1410,0x1410,0x1410,0x1410, +0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1413,0x1413,0x1413,0x1410, +0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1410,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416, +0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416, +0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1416,0x1803,0x1803,0x1800,0x175b,0x1464,0x1464,0x1464,0x1464, +0x1464,0x1464,0x1461,0x1461,0x1461,0x1461,0x1461,0x1461,0x1464,0x1464,0x1464,0x1464,0x1464,0x1464,0x1464,0x1464, +0x1464,0x1464,0x1464,0x1464,0x1464,0x1464,0x1464,0x15a2,0x1470,0x1470,0x1470,0x1482,0x1482,0x1482,0x1482,0x1482, +0x1482,0x1482,0x1482,0x1482,0x1482,0x1482,0x1482,0x1482,0x1482,0x1482,0x1482,0x1482,0x1482,0x1482,0x1482,0x1482, +0x1482,0x1482,0x1482,0x1482,0x1482,0x1482,0x1482,0x1482,0x149d,0x149d,0x149d,0x149d,0x149d,0x149d,0x149d,0x149d, +0x149d,0x149d,0x149d,0x149d,0x149d,0x149d,0x149d,0x149d,0x149d,0x149d,0x149d,0x149d,0x149d,0x149d,0x149d,0x149d, +0x149d,0x149d,0x149d,0x149d,0x149d,0x149d,0x149d,0x149d,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3, +0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3, +0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x14a3,0x1a34,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6, +0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6, +0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14a6,0x14ac,0x14ac,0x14b8,0x14be,0x14be,0x14be,0x14be,0x14be, +0x14be,0x14be,0x14be,0x14be,0x14be,0x14be,0x14be,0x14be,0x14be,0x14be,0x14be,0x14be,0x14be,0x14be,0x14be,0x14be, +0x14be,0x14be,0x14be,0x14be,0x14be,0x14be,0x14be,0x14be,0x14be,0x14be,0x14be,0x14b8,0x14b8,0x14b8,0x14ac,0x14ac, +0x14ac,0x14ac,0x14ac,0x14ac,0x14ac,0x14ac,0x14ac,0x14b8,0x14bb,0x14be,0x14c1,0x14c1,0x14be,0x14c4,0x14c4,0x14af, +0x14b2,0x1764,0x1767,0x1767,0x1767,0x15ab,0x1adc,0x1ad9,0x14b5,0x14b5,0x14b5,0x14b5,0x14b5,0x14b5,0x14b5,0x14b5, +0x14b5,0x14b5,0x15a8,0x176d,0x1770,0x176a,0x1773,0x1773,0x14df,0x14df,0x14df,0x14df,0x14df,0x14df,0x14df,0x14df, +0x14df,0x14df,0x14df,0x14df,0x14df,0x14df,0x14df,0x14df,0x14df,0x14df,0x14df,0x14df,0x14df,0x14df,0x14df,0x14df, +0x14df,0x14df,0x14df,0x14df,0x14df,0x14df,0x14df,0x14df,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c, +0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c, +0x153c,0x153c,0x1956,0x1956,0x1956,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c, +0x153c,0x1a22,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x153c,0x18ba,0x1956,0x1956,0x1956,0x1956,0x1956, +0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,0x1590,0x1590,0x1596,0x1596,0x1596,0x1590,0x1590,0x1590, +0x1590,0x1590,0x1590,0x1590,0x1590,0x1590,0x1590,0x1590,0x1590,0x1596,0x1596,0x1596,0x158d,0x158d,0x158d,0x158d, +0x158d,0x158d,0x158d,0x158d,0x1596,0x1596,0x1596,0x1590,0x1590,0x1590,0x1590,0x1590,0x1590,0x1590,0x1590,0x1596, +0x1590,0x1590,0x1596,0x1596,0x1596,0x1596,0x1590,0x1590,0x1599,0x1590,0x1590,0x1590,0x1590,0x1704,0x1704,0x1590, +0x1590,0x1590,0x1590,0x1590,0x1590,0x1590,0x1590,0x1590,0x1884,0x1596,0x1590,0x1590,0x1596,0x1590,0x1590,0x1590, +0x1590,0x1590,0x1590,0x1590,0x1590,0x1596,0x1596,0x1590,0x1590,0x1590,0x1590,0x1590,0x1590,0x1590,0x1590,0x1590, +0x1596,0x1590,0x1590,0x1590,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba, +0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba,0x15ba, +0x15ba,0x15ba,0x15ba,0x15ba,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc, +0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc,0x15cc, +0x15cc,0x15cc,0x15cc,0x15cc,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2, +0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2,0x15d2, +0x15d2,0x15d2,0x15d2,0x15d2,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5, +0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5,0x15d5, +0x15d5,0x15d5,0x15d5,0x15d5,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614, +0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614,0x1614, +0x1614,0x1614,0x1614,0x1605,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d, +0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x161d,0x1617, +0x1620,0x1620,0x1620,0x1620,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623, +0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623,0x1623, +0x1623,0x1623,0x1623,0x1623,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x1635,0x163e,0x163e,0x163e, +0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e,0x163e, +0x163e,0x163e,0x163e,0x163e,0x1647,0x1647,0x1647,0x1647,0x1647,0x1647,0x1647,0x1647,0x1647,0x1647,0x1647,0x1647, +0x1647,0x1647,0x1647,0x1647,0x1647,0x1647,0x1647,0x1647,0x1647,0x1647,0x1647,0x1647,0x1647,0x1647,0x1647,0x1647, +0x1647,0x1647,0x1647,0x1647,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659, +0x1659,0x1659,0x1659,0x1659,0x1656,0x1656,0x1656,0x164a,0x164a,0x164a,0x164a,0x164a,0x164a,0x164a,0x164a,0x1656, +0x1656,0x164a,0x1656,0x164d,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659, +0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659,0x1659, +0x1659,0x1659,0x1659,0x1659,0x167d,0x167d,0x167d,0x167d,0x167d,0x167d,0x167d,0x167d,0x167d,0x167d,0x167d,0x167d, +0x167d,0x167d,0x167d,0x167d,0x167d,0x167d,0x167d,0x167d,0x167d,0x167d,0x167d,0x167d,0x167d,0x167d,0x167d,0x167d, +0x167d,0x167a,0x167a,0x167a,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686, +0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x1686,0x168c,0x168c,0x168c,0x1689,0x1689,0x1689, +0x1686,0x1686,0x1686,0x1686,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b, +0x169b,0x169b,0x169b,0x169b,0x168f,0x168f,0x168f,0x168f,0x168f,0x168f,0x168f,0x16a1,0x16a1,0x1695,0x1692,0x1692, +0x1692,0x1692,0x1692,0x1692,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b, +0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b,0x169b, +0x169b,0x169b,0x169b,0x169b,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7, +0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a7,0x16a4,0x16a4,0x16a4,0x16a4,0x16a4, +0x16a4,0x16a4,0x16a4,0x16a4,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa, +0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa,0x16aa, +0x16aa,0x16aa,0x16aa,0x16aa,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce, +0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce,0x16ce, +0x16ce,0x16ce,0x16ce,0x16ce,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7, +0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7,0x16d7, +0x16d7,0x16d7,0x16d7,0x16d7,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef, +0x16ef,0x16ef,0x16ef,0x16ef,0x16da,0x16e9,0x16e9,0x16da,0x16da,0x16da,0x16da,0x16da,0x16da,0x16e9,0x16da,0x16ec, +0x16ec,0x16da,0x16ec,0x16da,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef, +0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef,0x16ef, +0x16ef,0x16ef,0x16ef,0x16ef,0x16f8,0x16f8,0x16f8,0x16f8,0x16f8,0x16f8,0x16f8,0x16f8,0x16f8,0x16f8,0x16f8,0x16f8, +0x16f8,0x16f8,0x16f8,0x16f8,0x16f8,0x16f8,0x16f8,0x16f8,0x16f8,0x16f8,0x16f8,0x16f8,0x16f8,0x16f8,0x16f8,0x16f8, +0x16f8,0x16f8,0x16f8,0x16f8,0x16fe,0x16fe,0x16fe,0x16fe,0x16fe,0x16fe,0x16fe,0x16fe,0x16fe,0x16fe,0x16fe,0x16fe, +0x16fe,0x16fe,0x16fe,0x16fe,0x16fe,0x16fe,0x16fe,0x16fe,0x16fe,0x16fe,0x16fe,0x16fe,0x16fe,0x16fe,0x16fe,0x16fe, +0x16fe,0x16fe,0x16fe,0x16fe,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956, +0x1740,0x1740,0x1740,0x1740,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956,0x1956, +0x1956,0x1956,0x1956,0x1a22,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761, +0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761,0x1761, +0x1761,0x1761,0x1761,0x1761,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d, +0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d, +0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x17a3,0x17a0,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d, +0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x179d,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6, +0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6, +0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a6,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9, +0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9, +0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17a9,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb, +0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb, +0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17bb,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be, +0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be, +0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17be,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1, +0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1, +0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c4,0x17c4,0x17c4,0x17c4,0x17c1, +0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c1,0x17c4,0x17c4,0x17c4, +0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c1,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4, +0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4, +0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17c4,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc, +0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc, +0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x17dc,0x18c6,0x18c6,0x18c6,0x18c6,0x18c6,0x18c6,0x18c6,0x18c6, +0x18c6,0x18c6,0x18c6,0x18c6,0x1b3f,0x1a94,0x1a94,0x1a97,0x17df,0x17df,0x17df,0x17df,0x17df,0x17df,0x17df,0x17df, +0x17e2,0x1890,0x1890,0x1890,0x1890,0x1890,0x1890,0x192c,0x17df,0x17df,0x17df,0x17df,0x17df,0x188d,0x188d,0x188d, +0x188d,0x188d,0x188d,0x188d,0x188d,0x188d,0x188d,0x188d,0x188d,0x188d,0x1929,0x1929,0x1929,0x1929,0x1929,0x1929, +0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x188d,0x188d,0x188d,0x188d,0x188d,0x188d,0x1890,0x188d, +0x1929,0x1929,0x1929,0x1929,0x1929,0x1929,0x1929,0x1929,0x1890,0x192c,0x192c,0x1890,0x1890,0x1890,0x1890,0x1890, +0x1890,0x1890,0x188d,0x180f,0x1890,0x1890,0x1890,0x1a94,0x188d,0x188d,0x188d,0x188d,0x188d,0x188d,0x180f,0x188d, +0x188d,0x188d,0x188d,0x188d,0x1929,0x1a0a,0x1a0a,0x1a0a,0x188d,0x188d,0x188d,0x188d,0x188d,0x188d,0x188d,0x188d, +0x188d,0x188d,0x188d,0x188d,0x188d,0x188d,0x188d,0x1929,0x1824,0x1824,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821, +0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821, +0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1821,0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x1824, +0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x1824, +0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x1824,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872, +0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x186f,0x186f,0x186f, +0x185a,0x185a,0x185a,0x185a,0x185a,0x185a,0x185a,0x185a,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872, +0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872, +0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1872,0x1896,0x1896,0x1896,0x1896,0x1896,0x1896,0x1896,0x1896, +0x1896,0x1896,0x1896,0x1896,0x1896,0x1896,0x1896,0x1896,0x1896,0x1896,0x1896,0x1896,0x1896,0x1896,0x1896,0x1896, +0x1896,0x1896,0x1896,0x1896,0x1896,0x1896,0x1896,0x1896,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899, +0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899, +0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1899,0x1b48,0x1b48,0x1b48,0x1b48,0x1b48, +0x1b48,0x1b48,0x1b48,0x1b48,0x1b48,0x1b48,0x1b48,0x1b48,0x18f0,0x18f0,0x18f0,0x18f0,0x1a46,0x1a46,0x18f3,0x18f3, +0x18f3,0x18f3,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18db,0x18ed, +0x18de,0x18e1,0x18e4,0x18f6,0x18f6,0x1995,0x18e7,0x18e7,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0, +0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0, +0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x18f0,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911, +0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x18fc,0x1902,0x18ff,0x18ff,0x18ff, +0x18ff,0x190e,0x1914,0x18ff,0x18ff,0x18ff,0x18ff,0x190b,0x1911,0x18ff,0x18ff,0x18ff,0x18ff,0x18ff,0x18ff,0x18ff, +0x18ff,0x18ff,0x18ff,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911, +0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1911,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923, +0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923, +0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1923,0x1929,0x1929,0x1929,0x1929,0x1929,0x1929,0x1929,0x1a0a, +0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a, +0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1a0a,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932, +0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932, +0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1932,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938, +0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938, +0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x1938,0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,0x19aa, +0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,0x19aa, +0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,0x19aa,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5, +0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5, +0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19c5,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb, +0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb, +0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19cb,0x19e6,0x19e6,0x19e6,0x19e6,0x19e6,0x19e6,0x19e6,0x19e6, +0x19e6,0x19e6,0x19e6,0x19e6,0x19e6,0x19e6,0x19e6,0x19e6,0x19e6,0x19e6,0x19e6,0x19e6,0x19e6,0x19e6,0x19e6,0x19e6, +0x19e6,0x19e6,0x19e6,0x19e6,0x19e6,0x19e6,0x19e6,0x19e6,0x19e9,0x19e9,0x19e9,0x19e9,0x19e9,0x19e9,0x19e9,0x19e9, +0x19e9,0x19e9,0x19e9,0x19e9,0x19e9,0x19e9,0x19e9,0x19e9,0x19e9,0x19e9,0x19e9,0x19e9,0x19e9,0x19e9,0x19e9,0x19e9, +0x19e9,0x19e9,0x19e9,0x19e9,0x19e9,0x19e9,0x19e9,0x19e9,0x19f2,0x19f2,0x19f2,0x19f2,0x19f2,0x19f2,0x19f2,0x19f2, +0x19f2,0x19f2,0x19f2,0x19f2,0x19f2,0x19f2,0x19f2,0x19f2,0x19f2,0x19f2,0x19f2,0x19f2,0x19f2,0x19f2,0x19f2,0x19f2, +0x19f2,0x19f2,0x19f2,0x19f2,0x19f2,0x19ef,0x19ef,0x19ef,0x1a0a,0x1a0a,0x1a0a,0x1b3c,0x1b3c,0x1a94,0x1a94,0x1a94, +0x1a94,0x1a94,0x1a94,0x1b3c,0x1b3c,0x1b3c,0x1a94,0x1a94,0x1a0d,0x1a0d,0x1a0d,0x1a0d,0x1a0a,0x1a10,0x1a10,0x1a0a, +0x1a10,0x1a10,0x1a94,0x1a97,0x1a94,0x1a94,0x1a94,0x1a94,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49, +0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49, +0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a49,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70, +0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70, +0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a70,0x1a79,0x1a79,0x1a79,0x1a79,0x1a79,0x1a79,0x1a79,0x1a79, +0x1a79,0x1a79,0x1a79,0x1a79,0x1a79,0x1a79,0x1a79,0x1a79,0x1aaf,0x1aaf,0x1a79,0x1aaf,0x1a79,0x1a79,0x1a79,0x1a79, +0x1a79,0x1a79,0x1a79,0x1a79,0x1a79,0x1a7f,0x1a7f,0x1a7f,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b, +0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b, +0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1a8b,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e, +0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e, +0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b1e,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a, +0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a, +0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b2a,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e, +0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e, +0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b4e,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51, +0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51, +0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0x1b51,0,0,0,0 +}; + +static const UTrie2 propsVectorsTrie={ + propsVectorsTrie_index, + propsVectorsTrie_index+5024, + NULL, + 5024, + 26204, + 0xa40, + 0x1420, + 0x0, + 0x0, + 0x110000, + 0x79f8, + NULL, 0, FALSE, FALSE, 0, NULL +}; + +static const uint32_t propsVectors[6999]={ +0x67,0,0,0x67,0,0x4e00000,0x67,0x80000,0x20,0x867,0,0,0xa67,0,0,0xb67, +0,0,0xc67,0,0,0xd67,0,0,0xe67,0,0,0x1067,0,0,0x1167,0, +0,0x1267,0,0,0x1367,0,0,0x1467,0,0,0x1567,0,0,0x1667,0,0, +0x1767,0,0,0x1867,0,0,0x1967,0,0,0x1a67,0,0,0x1b67,0,0,0x1d67, +0,0,0x1f67,0,0,0x2067,0,0,0x2267,0,0,0x2367,0,0,0x2467,0, +0,0x2567,0,0,0x2767,0,0,0x2867,0x80000,0x20,0x2967,0,0,0x2a67,0,0x1600000, +0x2b67,0,0,0x2d67,0,0,0x3167,0x20000000,0,0x3267,0x20000000,0,0x3a67,0,0,0x3b67, +0,0,0x3c67,0,0,0x3e67,0,0,0x4067,0,0,0x4167,0,0,0x4467,0, +0,0x4867,0,0,0x4967,0,0,0x4a67,0,0,0x5067,0,0,0x5167,0,0, +0x5467,0,0,0x5567,0,0,0x5667,0x80000,0x20,0x5767,0,0,0x5867,0,0,0x5967, +0,0,0x5b67,0,0,0x5c67,0,0,0x5d67,0,0,0x6067,0x80000,0x20,0x6267,0, +0,0x6367,0,0,0x6467,0,0,0x6567,0,0,0x6f67,0,0,0x7067,0,0, +0x7367,0x20000000,0,0x7567,0,0,0x7667,0,0,0x7767,0,0,0x7867,0,0,0x7a67, +0,0,0x7b67,0,0,0x7c67,0,0,0x7e67,0,0,0x7f67,0,0,0x8167,0, +0,0x8267,0,0,0x8367,0,0,0x8467,0,0,0x8567,0,0,0x8667,0,0, +0x8767,0,0,0x8867,0,0,0x8967,0,0,0x8b67,0,0,0x8c67,0,0,0x8e67, +0x20000000,0,0x8f67,0,0,0x9067,0,0,0x9167,0,0,0x9267,0,0,0x9367,0, +0,0x9567,0,0,0x9667,0,0,0x9767,0,0,0x9867,0,0,0x9967,0,0, +0x9a67,0,0,0x9c67,0,0,0x9f67,0,0,0xa167,0,0,0xa367,0,0,0xa467, +0,0,0xa567,0,0,0xa667,0,0,0xa767,0,0,0xa867,0,0,0xa967,0, +0,0xaa67,0,0x4e00000,0xab67,0,0x4e00000,0xac67,0,0,0xad67,0,0,0xae67,0,0, +0xaf67,0,0,0xb167,0,0,0xb267,0,0,0xb467,0,0,0xb567,0,0,0xb767, +0,0,0xb867,0,0,0xb967,0,0,0xba67,0,0,0xbc67,0,0,0xbd67,0, +0,0xbe67,0,0,0xbf67,0,0,0xc067,0,0,0xc167,0,0,0xc267,0,0, +0xc367,0,0x4e00000,0xc467,0,0x4e00000,0xc667,0,0,0xc767,0,0,0xc867,0,0,0xc967, +0,0,0xca67,0,0,0xcc67,0,0x4e00000,0xcf67,0,0x4e00000,0xd067,0,0x4e00000,0xd267,0, +0,0xd367,0,0,0xd467,0,0,0xd567,0,0,0xd667,0,0,0xd867,0,0, +0xda67,0,0,0xdb67,0,0,0xdc67,0,0,0xdd67,0,0,0xde67,0,0,0xdf67, +0,0,0xe067,0,0,0xe167,0,0,0xe267,0,0,0xe367,0,0x4e00000,0xe467,0, +0,0xe567,0,0,0xe667,0,0,0xe767,0,0,0xe867,0,0,0xe967,0,0, +0xea67,0,0,0xeb67,0,0,0xec67,0,0,0xed67,0,0,0xee67,0,0,0xef67, +0,0,0xf167,0,0,0xf367,0,0,0xf567,0,0,0xf667,0,0,0xf767,0, +0,0xf867,0,0,0xf967,0,0,0xfa67,0,0x4e00000,0xfb67,0,0,0xfc67,0,0, +0xfd67,0,0,0xfe67,0,0,0x10167,0,0,0x10267,0,0,0x10367,0,0,0x10467, +0,0,0x10567,0,0x4e00000,0x10667,0,0,0x10767,0,0,0x10867,0,0,0x10967,0, +0,0x10a67,0,0,0x10b67,0,0,0x10c67,0,0,0x10d67,0,0,0x10e67,0,0, +0x10f67,0,0,0x11067,0,0,0x11367,0,0,0x11467,0,0,0x11567,0,0,0x11667, +0,0,0x11767,0,0,0x11867,0,0,0x11967,0,0x4e00000,0x11a67,0,0,0x11b67,0, +0,0x11c67,0,0,0x11d67,0,0,0x11e67,0,0,0x11f67,0,0,0x12067,0,0, +0x12167,0,0,0x12267,0,0,0x12367,0,0,0x12467,0,0,0x12567,0,0,0x12667, +0,0,0x12767,0,0,0x12867,0,0,0x12967,0,0,0x12a67,0,0x4e00000,0x12b67,0, +0,0x12c67,0,0,0x12d67,0,0,0x12f67,0,0,0x13067,0,0,0x13167,0,0, +0x13267,0,0,0x13367,0,0,0x13467,0,0,0xa0067,0,0xe00000,0xa4767,0,0xe00000,0xa4f67, +0,0xe00000,0xa5e67,0,0xe00000,0xa5f67,0,0xe00000,0xac567,0,0xe00000,0xad167,0,0xe00000,0xb0067,0, +0xe00000,0xb1267,0,0xe00000,0xb2e67,0,0xe00000,0x11000100,0,0x900020,0x11000100,0x40000001,0x440020,0x11000100,0x40000001,0x643020, +0x11000100,0x40000001,0xa5a040,0x11000100,0x40000001,0x116a8a0,0x11000200,0,0x900020,0x11000200,0x4000001,0xc4000b,0x11000200,0x7c00100,0x220402,0x11000200, +0x24000000,0x14200000,0x11000200,0x24000008,0x1710000,0x11000200,0x40000001,0x1d3b020,0x11000219,0x7c00100,0x220401,0x11000219,0x7c00100,0x250401,0x11000319,0x7c00100, +0x220401,0x11000319,0x7c00100,0x220402,0x11000319,0x7c00100,0x250400,0x11000319,0x7c00100,0x250401,0x11000419,0x7c00100,0x220400,0x11000419,0x7c00100,0x220401, +0x11000419,0x7c00100,0x220402,0x11000419,0x7c00100,0x230400,0x11000419,0x7c00100,0x250400,0x11000419,0x7c00100,0x250401,0x11000419,0x7c00100,0x250402,0x11000519, +0x7c00100,0x220400,0x11000519,0x7c00100,0x230400,0x11000600,0x4000400,0x200002,0x11000600,0x4000400,0x200400,0x11000600,0x7c00500,0x220400,0x11000600,0x7c00500, +0x230400,0x11000600,0x7c00500,0x530400,0x11000600,0x7c00d00,0x230400,0x11000619,0x7c00500,0x22040f,0x11000800,0x4000010,0x1001401,0x11000800,0x4000400,0x200001, +0x11000800,0x6800010,0x201001,0x11000800,0x7c00500,0x230401,0x11000807,0x7c00100,0x220400,0x11000807,0x7c00100,0x250400,0x1100080e,0x4000400,0x200000,0x1100080e, +0x4000400,0x200002,0x1100080e,0x7000500,0x220402,0x1100080e,0x7c00100,0x220400,0x1100080e,0x7c00100,0x220401,0x1100080e,0x7c00100,0x220402,0x1100080e,0x7c00100, +0x250400,0x1100080e,0x7c00100,0x250401,0x1100080e,0x7c00120,0x220402,0x1100080e,0x7c00120,0x250402,0x11000908,0x4000000,0x200000,0x11000908,0x7c00100,0x220400, +0x11000908,0x7c00100,0x220401,0x11000908,0x7c00100,0x250400,0x11000908,0x7c00100,0x250401,0x11000a03,0x4000000,0x200400,0x11000a03,0x4000000,0x201000,0x11000a03, +0x4000000,0x270000,0x11000a03,0x7c00100,0x220400,0x11000a03,0x7c00100,0x220402,0x11000a03,0x7c00100,0x250400,0x11000a03,0x7c00500,0x230400,0x11000a03,0xc000010, +0x1049400,0x11000b13,0x2802500,0x962460,0x11000b13,0x4000000,0x200000,0x11000b13,0x4000000,0x201000,0x11000b13,0x4000000,0x230400,0x11000b13,0x4000002,0x400000, +0x11000b13,0x4000010,0x200000,0x11000b13,0x7c00100,0x2633800,0x11000c00,0x80000000,0x218960,0x11000c02,0x2802100,0x962460,0x11000c02,0x2802400,0x962460,0x11000c02, +0x4000000,0x200000,0x11000c02,0x4000000,0x1329400,0x11000c02,0x4000000,0x1329800,0x11000c02,0x4000000,0x1500000,0x11000c02,0x6800000,0x1329800,0x11000c02,0x7c00100, +0x230400,0x11000c02,0x7c00100,0x230401,0x11000c02,0x7c00100,0x230402,0x11000c02,0x7c00500,0x230400,0x11000c02,0x7d00100,0x230400,0x11000f01,0x2802400,0x962460, +0x11000f0a,0x2802100,0x962460,0x11000f0a,0x2802400,0x962460,0x11000f0a,0x2806400,0x962460,0x11000f0a,0x4000000,0x200000,0x11000f0a,0x6800100,0x962540,0x11000f0a, +0x7c00100,0x230400,0x11000f0a,0x7c00100,0x230401,0x11001004,0x2802100,0x962460,0x11001004,0x2802400,0x962460,0x11001004,0x2806400,0x962460,0x11001004,0x4000000, +0x200000,0x11001004,0x4000000,0x1500000,0x11001004,0x6800100,0x962540,0x11001004,0x6800100,0x962541,0x11001004,0x7c00100,0x230400,0x11001004,0x7c00100,0x230401, +0x11001110,0x2802100,0x962460,0x11001110,0x2802400,0x962460,0x11001110,0x2806400,0x962460,0x11001110,0x6800100,0x962540,0x11001110,0x7c00100,0x230400,0x11001110, +0x7c00100,0x230401,0x1100120f,0x2802100,0x962460,0x1100120f,0x2802400,0x962460,0x1100120f,0x2806400,0x962460,0x1100120f,0x6800100,0x962540,0x1100120f,0x7c00100, +0x230400,0x1100131f,0x2802100,0x962460,0x1100131f,0x2802400,0x962460,0x1100131f,0x2806400,0x962460,0x1100131f,0x4000000,0x200000,0x1100131f,0x6800000,0x1329800, +0x1100131f,0x6800100,0x962540,0x1100131f,0x6800100,0x962541,0x1100131f,0x7c00100,0x230400,0x1100131f,0x7c00100,0x230401,0x11001423,0x2802100,0x962460,0x11001423, +0x2806400,0x962460,0x11001423,0x6800100,0x962540,0x11001423,0x6800100,0x962541,0x11001423,0x7c00100,0x230400,0x11001423,0x7c00100,0x230401,0x11001524,0x2802100, +0x962460,0x11001524,0x2802100,0x962461,0x11001524,0x2806400,0x962460,0x11001524,0x6800000,0x1329800,0x11001524,0x6800100,0x962540,0x11001524,0x7c00100,0x230400, +0x11001615,0x2802100,0x962460,0x11001615,0x2806400,0x962460,0x11001615,0x6800100,0x962540,0x11001615,0x6800100,0x962541,0x11001615,0x7c00100,0x230400,0x1100171a, +0x2802100,0x962460,0x1100171a,0x2806400,0x962460,0x1100171a,0x6800000,0x1329800,0x1100171a,0x6800100,0x962540,0x1100171a,0x6800100,0x962541,0x1100171a,0x7c00100, +0x230400,0x11001900,0x4000000,0x1600000,0x11001926,0x2802100,0x1862460,0x11001926,0x2802400,0x1862460,0x11001926,0x2806100,0x1862460,0x11001926,0x4000000,0x200000, +0x11001926,0x4000010,0x400000,0x11001926,0x6800000,0x1329800,0x11001926,0x7800100,0x1830142,0x11001926,0x7c00100,0x1830000,0x11001926,0x7c00900,0x1830000,0x11001926, +0x7e00100,0x1830000,0x11001a18,0x2802100,0x1862460,0x11001a18,0x2802400,0x1862460,0x11001a18,0x6800000,0x1329800,0x11001a18,0x7800100,0x1830142,0x11001a18,0x7c00100, +0x1830000,0x11001a18,0x7c00100,0x1830002,0x11001a18,0x7c00900,0x1830000,0x11001a18,0x7e00100,0x1830000,0x11001d0c,0x7c00100,0x230400,0x11001d0c,0x7c00100,0x250400, +0x11001e12,0x7c00100,0x2230500,0x11001e12,0x7c00100,0x2330520,0x11001e12,0x7c80100,0x2330520,0x11002619,0x7c00100,0x220401,0x11002619,0x7c00100,0x220402,0x11002619, +0x7c00100,0x250401,0x1100270e,0x4000400,0x200001,0x1100270e,0x4000400,0x200002,0x1100270e,0x4000400,0x500001,0x1100270e,0x7c00100,0x220401,0x1100270e,0x7c00100, +0x250401,0x11002800,0x80000,0x918820,0x11002800,0x80000,0x1c18020,0x11002800,0x180000,0x918820,0x11002800,0x4000001,0x445801,0x11002800,0x4000001,0x445802, +0x11002800,0x4000001,0xc4000b,0x11002800,0x6800000,0x201c00,0x11002800,0x6800020,0x201c00,0x11002800,0x24000000,0x200000,0x11002800,0x24000000,0x200002,0x11002800, +0x24000000,0x810000,0x11002800,0x24000000,0x1410000,0x11002800,0x24000000,0x1500000,0x11002800,0x24000000,0x1500002,0x11002800,0x24000002,0x400000,0x11002800,0x24000006, +0xc0000b,0x11002800,0x24000008,0x1410000,0x11002800,0x24000008,0x1710000,0x11002800,0x24000020,0x1001400,0x11002800,0x24000020,0x1500002,0x11002800,0x2c000010,0x1248000, +0x11002800,0x2c000010,0x15248002,0x11002800,0x40000001,0x63b020,0x11002800,0x40080000,0x918820,0x11002801,0x80000,0xaa65620,0x11002801,0x82000,0x962460,0x11002900, +0x4000000,0x20000e,0x11002900,0x4000000,0x20000f,0x11002900,0x4000020,0x20000e,0x11002900,0x4000020,0x20000f,0x11002900,0x4000020,0x81000e,0x11002900,0x4000020, +0x81000f,0x11002900,0x4000020,0x141000e,0x11002900,0x4000020,0x141000f,0x11002900,0x4000022,0x20000e,0x11002900,0x4000022,0x20000f,0x11002a00,0x4000000,0x1500000, +0x11002a00,0x4000000,0x1600000,0x11002a00,0x4000000,0x1600002,0x11002b01,0x2000,0x962460,0x11002b01,0x2802020,0x962460,0x11002c00,0x4000000,0x200000,0x11002c00, +0x4000000,0x200002,0x11002c00,0x4000000,0x20000f,0x11002c00,0x4000020,0x200000,0x11002c00,0x7c00000,0x200000,0x11002c00,0x7c00020,0x200000,0x11002c00,0x7c00120, +0x220405,0x11002c00,0x7c00120,0x230402,0x11002c00,0x7c00120,0x250402,0x11002c00,0x7c00120,0x250405,0x11002c19,0x7c00100,0x250400,0x11002c19,0x7c00100,0x250401, +0x11002d00,0x4000000,0x100006,0x11002d00,0x4000000,0x200006,0x11002d19,0x7c00100,0x220402,0x11002d19,0x7c00100,0x230400,0x11002d19,0x7c00100,0x250402,0x11002e00, +0x24000000,0x200000,0x11002e00,0x24000020,0x200000,0x11002e00,0x24000020,0x200001,0x11002e00,0x24000020,0x14200000,0x11002f00,0x24000020,0x200000,0x11002f00,0x24000020, +0x200001,0x11002f00,0x24000020,0x200002,0x11002f00,0x24000020,0xf00000,0x11002f00,0x24000020,0x1600000,0x11002f00,0x24000022,0x1600000,0x11003000,0x24000000,0x200000, +0x11003000,0x24000000,0x14200000,0x11003000,0x24000020,0x200000,0x11003000,0x24000020,0x810000,0x11003000,0x24000020,0x1410000,0x11003100,0x24000000,0x200000,0x11003200, +0x24000000,0x200000,0x11003300,0x4000000,0x100003,0x11003400,0x24000000,0x100000,0x11003400,0x24000000,0x200000,0x11003500,0x24000000,0x200000,0x11003600,0x24000000, +0x200000,0x11003600,0x24000000,0x14200000,0x11003600,0x24000020,0x200000,0x11003700,0x24000000,0x200000,0x11003700,0x24000000,0x4200000,0x11003700,0x24000000,0x4e00000, +0x11003700,0x24000000,0x14200000,0x11003700,0x24000000,0x14e00000,0x11003700,0x24000000,0x96800000,0x11003700,0x24000020,0x4200000,0x11003800,0x4000000,0x100000,0x11003800, +0x24000000,0x200000,0x11003800,0x24000000,0xb00000,0x11003800,0x24000000,0x1710000,0x11003800,0x24000000,0x4200000,0x11003800,0x24000000,0x4e00000,0x11003800,0x24000000, +0x14200000,0x11003800,0x24000000,0x14b00000,0x11003800,0x24000000,0x14e00000,0x11003800,0x24000000,0x96800000,0x11005003,0x7c00100,0x220402,0x11005013,0x2802500,0x962460, +0x11005013,0x4000020,0x200005,0x11005013,0x7c00100,0x2633801,0x11005013,0x7c00100,0x2633802,0x11005013,0x7c00100,0x2633805,0x11005019,0x7c00100,0x220402,0x11005100, +0x24000000,0x810000,0x11005100,0x24000000,0x1410000,0x11005102,0x7000100,0x230408,0x11005102,0x7c00100,0x230404,0x11005102,0x7c00100,0x230407,0x11005102,0x7c00100, +0x230408,0x11005102,0x7c00100,0x230409,0x11005201,0x2802400,0x962460,0x11005500,0x80000,0x1e18820,0x11005502,0x7000100,0x230408,0x11005502,0x7c00100,0x230404, +0x11005502,0x7c00100,0x230407,0x11005502,0x7c00100,0x230408,0x11005502,0x7c00100,0x230409,0x11005667,0x1000,0,0x11020200,0x80004,0x418820,0x11020200, +0x4000000,0x100006,0x11020200,0x4000000,0x10000f,0x11020200,0x4000400,0x100002,0x11020200,0x4000400,0x500002,0x11020200,0x6800c00,0x101000,0x11020200,0x24000000, +0x100000,0x11020200,0x24000000,0x1400000,0x11020200,0x24000000,0x1500000,0x11020200,0x24000000,0x1600000,0x11020200,0x24000000,0x14200000,0x11020200,0x24000020,0x100000, +0x11020200,0x24000020,0x1600000,0x11020219,0x7c00100,0x12040f,0x11020219,0x7c00100,0x220400,0x11020219,0x7c00100,0x220401,0x11020219,0x7c00100,0x250400,0x11020319, +0x7c00100,0x220400,0x11020319,0x7c00100,0x220401,0x11020319,0x7c00100,0x220402,0x11020319,0x7c00100,0x250400,0x11020319,0x7c00100,0x250402,0x11020319,0x7d00100, +0x220402,0x11020419,0x7c00100,0x220401,0x11020519,0x7c00100,0x220400,0x11020600,0x4000400,0x100002,0x11020600,0x4000400,0x200400,0x11020600,0x7c00500,0x130400, +0x11020600,0x7c00d00,0x130400,0x11020701,0x2802400,0x962460,0x11020701,0x2802400,0x962461,0x11020701,0x2802400,0xc62460,0x1102080e,0x7c00100,0x220400,0x1102080e, +0x7c00100,0x250400,0x11020908,0x7c00100,0x220400,0x11020908,0x7c00100,0x220401,0x11020908,0x7c00100,0x250400,0x11020908,0x7c00100,0x250401,0x11022800,0x24000000, +0x100000,0x11022800,0x24000000,0x200000,0x11022800,0x24000000,0x200002,0x11022800,0x24000000,0x401000,0x11022800,0x24000000,0xf00002,0x11022800,0x24000000,0xf0ac02, +0x11022800,0x24000000,0x1500000,0x11022800,0x24000002,0x100000,0x11022800,0x24000002,0x370000,0x11022800,0x24000002,0x470000,0x11022800,0x24000006,0x400000,0x11022800, +0x24000008,0x1710000,0x11022800,0x24000008,0x1712c00,0x11022800,0x24000020,0x100000,0x11022800,0x24000020,0x1500000,0x11022800,0x24000020,0x1500002,0x11022900,0x4000000, +0x10000e,0x11022900,0x4000000,0x10000f,0x11022919,0x7c00100,0x12040f,0x11022c00,0x4000000,0x100002,0x11022c00,0x4000000,0x1500002,0x11022c00,0x4000000,0x1600002, +0x11022c00,0x4000000,0x1410000f,0x11022c00,0x7c00120,0x120405,0x11022c0e,0x7c00100,0x250401,0x11022c19,0x7c00100,0x150401,0x11022d00,0x4000000,0x100006,0x11022d00, +0x4000000,0x200006,0x11022d19,0x7c00100,0x120402,0x11022d19,0x7c00100,0x150402,0x11022e00,0x24000000,0x200000,0x11022e00,0x24000020,0x100000,0x11022e00,0x24000020, +0x14100000,0x11022f00,0x24000020,0x100000,0x11022f00,0x24000020,0x100001,0x11022f00,0x24000020,0x100002,0x11023000,0x24000000,0x100000,0x11023300,0x4000000,0x100002, +0x11023300,0x4000000,0x100003,0x11023300,0x4000100,0x120403,0x11023300,0x4000100,0x150403,0x11023300,0x4000100,0x14150403,0x11023400,0x24000000,0x100000,0x11023500, +0x24000000,0x100000,0x11023600,0x24000000,0x100000,0x11023600,0x24000020,0x100000,0x11023600,0x24000020,0x14100000,0x11023700,0x24000000,0x4100000,0x11023700,0x24000000, +0x4e00000,0x11023700,0x24000000,0x14100000,0x11023700,0x24000000,0x14e00000,0x11023700,0x24000020,0x100000,0x11023700,0x24000020,0x4100000,0x11023700,0x24000020,0x14100000, +0x11023800,0x4000000,0x100000,0x11023800,0x24000000,0x200000,0x11024e67,0,0,0x11025600,0x4000000,0x100000,0x11042a00,0x4000000,0x1600000,0x11045700, +0x4000000,0x20000a,0x11045700,0x4000020,0x20000a,0x11045712,0x7c00100,0xe3040a,0x11045712,0x7c80100,0xe3040a,0x11045716,0x7c00100,0xe30c0a,0x11045716,0x7c00100, +0x2530c0a,0x11063d00,0x4000001,0x445811,0x11065700,0x4000000,0x810011,0x11065700,0x4000000,0xe00011,0x11065700,0x4000000,0x1410011,0x11065700,0x4000000,0x1500011, +0x11065700,0x4000000,0x1600011,0x11065700,0x4000006,0xe70011,0x11065700,0x4000008,0xe00011,0x11065700,0x4000008,0xe02c11,0x11065700,0x4000010,0x871411,0x11065700, +0x4000010,0x1201411,0x11065700,0x4000010,0x1271011,0x11065700,0x4000020,0xe00011,0x11065700,0x4000400,0xe00011,0x11065700,0x4000420,0xe00011,0x11065700,0x6800000, +0xe01c11,0x11065700,0x6800040,0xe29811,0x11065700,0xc000010,0x80ac11,0x11065700,0xc000010,0xb48011,0x11065719,0x7c00100,0xe20411,0x11065719,0x7c00100,0xe50411, +0x11065719,0x7c00140,0xe20411,0x11065719,0x7c00140,0xe50411,0x11080100,0x6800000,0x201c00,0x11080100,0x68000c0,0x19329800,0x11080100,0x24000000,0x200000,0x11080100, +0x24000000,0x810000,0x11080100,0x24000000,0x1410000,0x11080100,0x24000000,0x1500000,0x11080100,0x24000000,0x1600000,0x11080100,0x24000000,0x1b00000,0x11080100,0x24000000, +0x2410000,0x11080100,0x24000000,0x18200000,0x11080100,0x24000006,0xd70000,0x11080100,0x24000008,0x1713c00,0x11080100,0x24000008,0x1714000,0x11080100,0x24000010,0x1001400, +0x11080100,0x24000010,0x1071000,0x11080100,0x24000010,0x1071400,0x11080100,0x24000020,0x200000,0x11080100,0x24000020,0x400000,0x11080100,0x24000020,0x1600000,0x11080100, +0x24000400,0x200000,0x11080100,0x24000420,0x200000,0x11080100,0x2c000010,0xb48000,0x11080100,0x2c000010,0x100ac00,0x11080100,0x44000001,0x1a45800,0x11080119,0x7c00100, +0x220400,0x11080119,0x7c00100,0x250400,0x11080119,0x7c001c0,0x220400,0x11080119,0x7c001c0,0x250400,0x11080200,0x4000400,0x200002,0x11080200,0x24000000,0x200000, +0x11080200,0x24000000,0x1500000,0x11080200,0x24000000,0x1600000,0x11080200,0x24000020,0x200000,0x110a1e12,0x7c00100,0x2130480,0x110a1e12,0x7c80100,0x2130480,0x110a3000, +0x24000000,0x34e00000,0x110a3000,0x24100000,0x810001,0x110a3000,0x24100000,0x1410001,0x110a3700,0x24000000,0x34200000,0x110a3d00,0x4000000,0xe00000,0x110a3d00,0x4000000, +0xe00002,0x110a3d00,0x24000000,0xe00000,0x110a3d11,0x7c00300,0xe30000,0x110a3d11,0x7c00900,0x1230400,0x110a3d12,0x2802400,0x962460,0x110a3e14,0x7c00100,0xe30000, +0x110a3e14,0x7c00100,0xe30001,0x110a3e14,0x7c00100,0x2530000,0x110a3e14,0x7c00900,0x1230000,0x110a3e14,0x7c00900,0x1230001,0x110a3f16,0x7c00100,0xe30c00,0x110a3f16, +0x7c00100,0xe30c01,0x110a3f16,0x7c00100,0x2530c00,0x110a3f16,0x7c00900,0x1230c00,0x110a3f16,0x7c00900,0x1230c01,0x110a4005,0x7c00100,0xe30400,0x110a4112,0x7c00100, +0xe30402,0x110a4112,0x7c80100,0xe30402,0x110a4400,0x4000000,0xe00000,0x110a4412,0x4000000,0xe00002,0x110a4412,0x4000000,0xe00003,0x110a4416,0x4000000,0xe00c03, +0x110a4500,0x4000000,0xe0000d,0x110a4516,0x4000000,0xe00c0d,0x110a4711,0x7c40300,0xe30000,0x110a4f11,0x7c00300,0xe30001,0x110a4f11,0x7c40300,0xe30000,0x110a5300, +0x4000000,0x810010,0x110a5300,0x4000000,0xe00002,0x110a5300,0x4000000,0xe00010,0x110a5300,0x4000000,0x1410010,0x110a5300,0x4000002,0xe70010,0x110a5300,0x4000008, +0x810010,0x110a5300,0x4000008,0x1410010,0x110a5300,0x6800000,0xe01c02,0x110a5300,0x6800000,0xe01c10,0x110a5400,0x4000000,0x81000c,0x110a5400,0x4000000,0xe0000c, +0x110a5400,0x4000000,0x141000c,0x110a5400,0x4000000,0x150000c,0x110a5400,0x4000000,0x160000c,0x110a5400,0x4000002,0xe7000c,0x110a5400,0x4000010,0x87140c,0x110a5400, +0x4000010,0xe7000c,0x110a5400,0x4000010,0x120140c,0x110a5400,0x4000010,0x127100c,0x110a5400,0x4000020,0xe0000c,0x110a5400,0x4000026,0xe7000c,0x110a5400,0xc000010, +0x80ac0c,0x110a5400,0xc000010,0xb4800c,0x11400c0c,0x4000010,0xb00000,0x11400c0c,0x4000010,0x1071400,0x11400c0c,0xc000010,0xb48000,0x11400c16,0x7c00900,0x230400, +0x11400f40,0xc000010,0x448000,0x11400f54,0xc000010,0x448000,0x11401d89,0x4000000,0x200000,0x11403dbf,0x4000000,0xe00000,0x114457b4,0x4000004,0x120000a,0x114457b4, +0x4000008,0x81000a,0x114457b4,0x4000008,0x141000a,0x114457b4,0x4000010,0x87000a,0x114457b4,0xc000010,0x84800a,0x114457bd,0x3802500,0x126246a,0x114457bd,0x7c00d00, +0x2530c0a,0x114a3db4,0x24000000,0x810000,0x114a3db4,0x24000000,0x1410000,0x114a3db4,0x24000008,0x810000,0x114a3db4,0x24000008,0x1410000,0x114a3db4,0x24000010,0x870000, +0x114a3db4,0x2c000010,0x848000,0x114a3dba,0x4000000,0xe00000,0x114a3dba,0x24000000,0xe00000,0x114a3dba,0x24000002,0x1200000,0x114a3dba,0x24000002,0x14e00000,0x114a3dba, +0x24000008,0x810000,0x114a3dba,0x24000008,0x1410000,0x114a3dbd,0x7c00900,0x930c00,0x114a3dbd,0x7c00900,0xe30c00,0x114a3dbf,0x7c00300,0xe30000,0x114a3ebd,0x7000400, +0x1200c02,0x114a3fb4,0x4000004,0x1200000,0x114a3fbd,0x7c00d00,0x2530c00,0x114a42bf,0x4000000,0xe00000,0x114a42bf,0x4000000,0xe0000f,0x114a44bf,0x4000000,0xe00002, +0x114a44bf,0x4000000,0xe00003,0x114a44bf,0x4000000,0x14e00003,0x114a45bf,0x4000000,0xe00002,0x114a45bf,0x4000000,0xe0000d,0x1180090a,0x2802400,0x962460,0x11800c1e, +0x2802100,0x962460,0x11800c1e,0x2802500,0x962460,0x11800f27,0x2802400,0x962460,0x11800f34,0x2802400,0x962460,0x11820700,0x2802400,0x962460,0x11820700,0x2802500, +0x962460,0x118a3dc0,0x2802400,0x962460,0x118a3ebd,0x2802400,0x962460,0x11c00904,0x2802400,0x962460,0x11c00908,0x2802400,0x962460,0x11c00c20,0xc000010,0xb48000, +0x11c00c23,0x6800000,0x1329800,0x11c00f6d,0x6800000,0x1329800,0x11c01072,0x6800000,0x1329800,0x11c01176,0x6800000,0x1329800,0x11c0127a,0x6800000,0x1329800,0x11c0147e, +0x4000000,0x200000,0x11c0147e,0x6800000,0x1329800,0x11c01682,0x6800000,0x1329800,0x11c051fa,0x7c00100,0x230408,0x20000067,0x1000,0,0x20000b13,0x2802400, +0x962460,0x20000b13,0x2802500,0x962460,0x20001b27,0x2802100,0x962460,0x20001b27,0x2802100,0x962461,0x20001b27,0x2802400,0x962460,0x20001b27,0x2806400,0x962460, +0x20001b27,0x2902100,0x962462,0x20001b27,0x4000000,0x200000,0x20001b27,0x4000000,0x400000,0x20001b27,0x4000000,0x500000,0x20001b27,0x4000000,0x810000,0x20001b27, +0x4000000,0xb00000,0x20001b27,0x4000000,0xc0000b,0x20001b27,0x4000000,0x1410000,0x20001b27,0x4000010,0xb00000,0x20001b27,0x4000010,0xc00000,0x20001b27,0x6800000, +0x1329800,0x20001b27,0x6800100,0x462540,0x20001b27,0x6800400,0x962540,0x20001b27,0x7c00100,0x230400,0x20001b27,0x7c00100,0x230401,0x20002619,0x7c00100,0x220401, +0x20002a00,0x4000000,0x1600000,0x20004b67,0,0x1900000,0x20004c67,0,0x1900000,0x20004d67,0,0x1900000,0x20006d67,0x1000,0,0x20006e67, +0x1000,0,0x20026d67,0,0,0x20026e67,0,0,0x200a4a12,0x7c00100,0x1f304c1,0x200a4a12,0x7c00100,0x20304e1,0x21005600,0x4000000, +0x700000,0x21022a00,0x4000000,0x1600000,0x30000419,0x7c00100,0x220400,0x30000419,0x7c00100,0x220401,0x30000419,0x7c00100,0x250400,0x30000419,0x7c00100,0x250401, +0x30000519,0x7c00100,0x220400,0x30000600,0x4000400,0x200400,0x30000600,0x7c00500,0x230400,0x30000605,0x4000400,0x200400,0x3000080e,0x7c00100,0x220400,0x30000908, +0x2000,0x962460,0x30000908,0x7c00100,0x220400,0x30000908,0x7c00100,0x220401,0x30000908,0x7c00100,0x250400,0x30000908,0x7c00100,0x250401,0x30000a03,0x4000006, +0x400400,0x30000c02,0x4000000,0x200000,0x30000c02,0x7c00100,0x230400,0x30000d22,0x2802100,0x962460,0x30000d22,0x2802400,0x962460,0x30000d22,0x2802500,0x962460, +0x30000d22,0x4000000,0x200000,0x30000d22,0x4000010,0x200000,0x30000d22,0x7c00100,0x230400,0x30000d22,0xc000010,0x248000,0x30000d22,0x80000000,0x218960,0x30000e25, +0x2802500,0x962460,0x30000e25,0x7c00100,0x230400,0x30001821,0x2802100,0x962460,0x30001821,0x2806400,0x962460,0x30001821,0x4000000,0x200000,0x30001821,0x6800100, +0x962540,0x30001821,0x6800100,0x962541,0x30001821,0x7c00100,0x230400,0x30001b27,0x2802100,0x962460,0x30001b27,0x2802400,0x962460,0x30001b27,0x4000000,0x200000, +0x30001b27,0x4000000,0x400000,0x30001b27,0x7c00100,0x230400,0x30001c1c,0x2802100,0x1862460,0x30001c1c,0x2802400,0x1862460,0x30001c1c,0x2806400,0x1862460,0x30001c1c, +0x4000000,0x200000,0x30001c1c,0x6800100,0x1862400,0x30001c1c,0x6800100,0x1862540,0x30001c1c,0x7c00100,0x1830000,0x30001c1c,0x7c00100,0x1830001,0x30001c1c,0xc000010, +0x448000,0x30001f0b,0x4000000,0x200000,0x30001f0b,0x4000010,0x200000,0x30001f0b,0x4000010,0x400000,0x30001f0b,0x6800000,0x200000,0x30001f0b,0x7c00100,0x230400, +0x30001f0b,0xc000010,0x248000,0x30002006,0x7c00100,0x250400,0x30002128,0x4000000,0x200000,0x30002128,0x7c00100,0x230400,0x30002128,0xc000010,0x248000,0x3000221d, +0x4000000,0x810000,0x3000221d,0x4000000,0x1410000,0x3000221d,0x4000001,0x445800,0x3000221d,0x7c00100,0x230400,0x30002300,0x4000010,0x400000,0x30002320,0x7c00100, +0x230400,0x30002417,0x2802100,0x1862460,0x30002417,0x2802400,0x1862460,0x30002417,0x2806400,0x1862460,0x30002417,0x2882000,0x1862460,0x30002417,0x4000000,0x200000, +0x30002417,0x4000000,0x400000,0x30002417,0x4000000,0x1600000,0x30002417,0x4000010,0x400000,0x30002417,0x4000010,0x1200000,0x30002417,0x6800000,0x1329800,0x30002417, +0x6800100,0x1862540,0x30002417,0x7c00100,0x1830000,0x30002417,0x7d00100,0x1830000,0x3000251b,0x80000,0xc18820,0x3000251b,0x2802100,0x962460,0x3000251b,0x3c02100, +0x962460,0x3000251b,0x4000000,0x200000,0x3000251b,0x4000006,0x500000,0x3000251b,0x4000010,0x400000,0x3000251b,0x4000010,0xb70000,0x3000251b,0x4000800,0x200000, +0x3000251b,0x6800000,0x1329800,0x3000251b,0x7c00100,0x230400,0x3000251b,0x7c00900,0x230400,0x3000251b,0xc000010,0xb48000,0x3000251b,0x12882000,0x962460,0x30002800, +0x24000000,0x200000,0x30002800,0x2c000010,0x1248002,0x30002800,0x2c000010,0x15248002,0x30002a00,0x4000000,0x1600000,0x30002b01,0x2000,0x962460,0x30002b01,0x2000, +0x8962460,0x30002c00,0x4000000,0x200000,0x30002c00,0x7c00100,0x14220405,0x30002d19,0x7c00100,0x250400,0x30002e00,0x24000000,0x200000,0x30003000,0x24000000,0x200000, +0x30003000,0x24000000,0x4200000,0x30003100,0x24000000,0x200000,0x30003600,0x24000000,0x200000,0x30003700,0x24000000,0x4200000,0x3000392e,0x24000000,0x200000,0x30005013, +0x7c00100,0x2633801,0x30005600,0,0x918820,0x30020600,0x4000400,0x500400,0x30020701,0x2802400,0x962460,0x30020701,0x2802400,0xc62460,0x300a3a11,0x4020000, +0xe00000,0x300a3a11,0x4020000,0xe00002,0x300a3b11,0x4020000,0xe00002,0x300a3c00,0x4008000,0xe00000,0x300a3c00,0x4010000,0xe00000,0x300a3d11,0x7c00300,0xe30002, +0x300a4305,0x7c00100,0xe30400,0x300a4611,0x7c40300,0xe30000,0x300a4829,0x7c00100,0xe30400,0x300a4829,0x7c00900,0x1230400,0x300a4929,0x4000000,0xe00000,0x3040258f, +0x4000010,0x400000,0x3040258f,0x4000010,0xb70000,0x3040258f,0xc000010,0xb48000,0x304028af,0x4000001,0xc41c0b,0x304a3dbf,0x4000000,0xe00000,0x30800c1e,0x2802100, +0x962460,0x30c01c87,0x6800000,0x1329800,0x3100080e,0x7c00120,0x220402,0x3100080e,0x7c00120,0x250402,0x31005167,0x1000,0,0x3100581e,0x4000000,0x200000, +0x3100581e,0x7c00100,0x230400,0x3100590d,0x7c00100,0x230400,0x31005a09,0x7c00100,0x220400,0x31005a09,0x7c00100,0x250400,0x31005b00,0x4000000,0x200000,0x31005c00, +0x80000,0x918820,0x31005c00,0x2802000,0x962460,0x31005c00,0x2802400,0x962460,0x31005c00,0x4000000,0x200000,0x31005c00,0x4000000,0x200001,0x31005c00,0x6800000, +0x962540,0x31005c00,0x6800400,0x962540,0x31005c01,0x2802400,0x962460,0x31005d00,0x4000020,0x200005,0x31005d00,0x6800020,0x1329805,0x31005d00,0x7c00120,0x220405, +0x31005d00,0x7c00120,0x250405,0x31006000,0x82000,0x8962460,0x31006000,0x180000,0x918820,0x310a5e11,0x7c40300,0xe30000,0x310a5f11,0x7c00300,0xe30001,0x32000419, +0x7c00100,0x250400,0x3200080e,0x4000020,0x200000,0x3200080e,0x7c00100,0x220400,0x3200080e,0x7c00100,0x250400,0x32000908,0x7c00100,0x220400,0x32000908,0x7c00100, +0x250400,0x32000c02,0x7c00100,0x230400,0x32000e25,0x7c00100,0x230400,0x32001d0c,0x7c00100,0x230400,0x32002800,0x80000,0x1e18820,0x32002800,0x80020,0x218820, +0x32002800,0x4000001,0x445802,0x32002800,0x24000000,0x200000,0x32002800,0x24000000,0x200002,0x32002800,0x24000020,0x200000,0x32002800,0x2c000010,0x1248002,0x32002919, +0x7c00100,0x22040f,0x32002a00,0x4000000,0x1600000,0x32002b01,0x2000,0x962460,0x32002b01,0x2802000,0x962460,0x32002b01,0x2802020,0x962460,0x32002c00,0x4000000, +0x200000,0x32002c00,0x4000020,0x200000,0x32002c00,0x4000020,0x200005,0x32002c00,0x7c00120,0x220405,0x32002c00,0x7c00120,0x250405,0x32002e00,0x24000020,0x200000, +0x32002f00,0x24000020,0x200000,0x32003000,0x24000000,0x200000,0x32003000,0x24000020,0x200000,0x32003500,0x24000000,0x200000,0x32003600,0x24000020,0x200000,0x32003600, +0x24000020,0x14200000,0x32003700,0x24000000,0x200000,0x32003700,0x24000000,0x4100000,0x32003700,0x24000000,0x4200000,0x32003700,0x24000000,0x14200000,0x32003800,0x24000000, +0x810000,0x32003800,0x24000000,0x1410000,0x32005102,0x4000000,0x1500008,0x32005502,0x7c00100,0x230400,0x32006108,0x7c00100,0x220400,0x32006108,0x7c00100,0x250400, +0x3200622a,0x2802100,0x962460,0x3200622a,0x2806000,0x962460,0x3200622a,0x7c00100,0x230400,0x3200632b,0x2802100,0x962460,0x3200632b,0x2806000,0x962460,0x3200632b, +0x7c00100,0x230400,0x3200642c,0x2802100,0x962460,0x3200642c,0x7c00100,0x230400,0x3200652d,0x2802100,0x962460,0x3200652d,0x7c00100,0x230400,0x32006600,0x24000020, +0x200000,0x32006700,0x24000020,0x200000,0x32006800,0x24000020,0x200000,0x32006800,0x24000020,0x14200000,0x32006900,0x24000020,0x200000,0x32006900,0x24000020,0x810000, +0x32006900,0x24000020,0x1410000,0x32006a00,0x24000020,0x200000,0x32006a00,0x24000020,0x200001,0x32006a00,0x24000020,0x200002,0x32020701,0x2882000,0xc62460,0x32023300, +0x4000000,0x100000,0x32026c01,0x12882000,0x962460,0x32026c01,0x12882000,0x8962460,0x32065700,0x4000000,0x810011,0x32065700,0x4000000,0x1410011,0x32086600,0x24000020, +0x810000,0x32086600,0x24000020,0x1410000,0x32086900,0x24000020,0x810000,0x32086900,0x24000020,0x1410000,0x320a3600,0x24000020,0x34200000,0x320a3d11,0x7c00100,0x1230400, +0x320a3e14,0x7c00100,0xe30010,0x320a3e14,0x7c00100,0x2530000,0x320a3f16,0x7c00100,0xe30c10,0x320a4400,0x4000000,0xe00003,0x320a4929,0x4000000,0xe00000,0x320a4f11, +0x7c00300,0xe30001,0x320a6b16,0x7c00100,0x2530c00,0x3240638b,0xc000010,0x448000,0x324a3dc2,0x4000000,0x14e00000,0x324a3dc2,0x7c00100,0x1230400,0x324a3fbd,0x4000002, +0x1200c00,0x324a53ba,0x24000000,0xe00000,0x32820701,0x2802000,0x962460,0x40000419,0x7c00100,0x220400,0x40000519,0x7c00100,0x220400,0x40000600,0x4000400,0x200400, +0x4000080e,0x7c00100,0x220400,0x4000080e,0x7c00100,0x250400,0x4000080e,0x7c00100,0x250402,0x40000c02,0x2802100,0x962460,0x40000c02,0x2802400,0x962460,0x40000c02, +0x2802500,0x962460,0x40000c02,0x4000000,0x200000,0x40000c02,0x4000000,0x1071400,0x40000c02,0x7c00100,0x230400,0x40000c02,0x80000000,0x218960,0x40000d22,0x7c00100, +0x230400,0x40000f0a,0x7c00100,0x230400,0x40001004,0x7c00100,0x230400,0x40001110,0x2802100,0x962460,0x40001110,0x6800100,0x962540,0x4000120f,0x2802100,0x962460, +0x4000120f,0x4000000,0x1600000,0x4000120f,0x7c00100,0x230400,0x4000131f,0x7c00100,0x230400,0x40001423,0x4000000,0x200000,0x40001423,0x4000000,0x1600000,0x40001615, +0x2802400,0x962460,0x40001615,0x7c00100,0x230400,0x40002417,0x2802400,0x1862460,0x40002417,0x4000000,0x200000,0x40002800,0x6800000,0x201c00,0x40002800,0x24000002, +0x200000,0x40002c00,0x4000000,0x200002,0x40003000,0x24000000,0x14200000,0x40003000,0x24000020,0x200000,0x40003700,0x24000000,0x200000,0x40003700,0x24000000,0x4200000, +0x40003700,0x24000000,0x14200000,0x40005a09,0x7c00100,0x220400,0x40005a09,0x7c00100,0x250400,0x40005d00,0x7c00120,0x220405,0x40006f30,0x2802100,0x962460,0x40006f30, +0x2802400,0x962460,0x40006f30,0x4000000,0x200000,0x40006f30,0x6800000,0x1329800,0x40006f30,0x6800100,0x962540,0x40006f30,0x7c00100,0x230400,0x40006f30,0xc000010, +0xb48000,0x40007034,0x7c00100,0x1830000,0x40007117,0x4000000,0x200000,0x40007208,0x7c00100,0x220400,0x4000720e,0x7c00100,0x220400,0x4000720e,0x7c00500,0x22040e, +0x4000720e,0x7c00500,0x22040f,0x40007219,0x7c00100,0x220400,0x40007219,0x7c00500,0x220400,0x40007219,0x7c00500,0x22040e,0x40007219,0x7c00500,0x22040f,0x40007300, +0x24000000,0x200000,0x40007300,0x24000000,0x14200000,0x40007400,0x4000000,0x200000,0x40007531,0x7c00100,0x230400,0x40007631,0x7c00100,0x230400,0x40007835,0x4000010, +0x400000,0x40007835,0x7c00100,0x230400,0x40007933,0x7c00100,0x230400,0x40007a32,0x6800000,0x1329800,0x40007a32,0x7c00100,0x230400,0x40007b2f,0x7c00100,0x230400, +0x40007c00,0x4000000,0x200000,0x40020701,0x2802400,0x962460,0x40020701,0x2802400,0xc62460,0x40023300,0x4000000,0x200000,0x40027d01,0x12882000,0x962460,0x400a3700, +0x24000000,0x34200000,0x400a3700,0x24000000,0x34e00000,0x400a4400,0x4000000,0xe0000d,0x400a4412,0x4000000,0xe00002,0x400a4412,0x4000000,0xe00003,0x400a4500,0x4000000, +0xe0000d,0x400a5300,0x4000000,0x810010,0x400a5300,0x4000000,0x1410010,0x404077fc,0x4000000,0x200000,0x404077ff,0x4000000,0x200000,0x404077ff,0x4000000,0x400000, +0x40c0147e,0x4000000,0x200000,0x40c051fa,0x4000000,0x200000,0x41000419,0x7c00100,0x220400,0x41000419,0x7c00100,0x250400,0x4100080e,0x7c00100,0x220400,0x4100080e, +0x7c00100,0x250400,0x41000908,0x7c00100,0x220400,0x41000908,0x7c00100,0x250400,0x41000b13,0x2802000,0x962460,0x41000b13,0x2802100,0x962460,0x41000b13,0x4000000, +0xb00000,0x41000c02,0x2802100,0x962460,0x41000c02,0x4000000,0x1500000,0x41000c02,0xc000010,0xb48000,0x41000f0a,0x7c00100,0x230400,0x41001004,0x7c00100,0x230400, +0x41001423,0x7c00100,0x230400,0x41001b27,0x4000000,0x500000,0x41001d0c,0x7c00100,0x230400,0x41001d0c,0x7c00100,0x23040f,0x41001f0b,0x2802400,0x962460,0x41001f0b, +0x4000000,0x200000,0x41001f0b,0x7c00100,0x230400,0x41002800,0x24000000,0x200000,0x41002800,0x24000000,0x400000,0x41002919,0x7c00100,0x22040e,0x41002a00,0x4000000, +0x1600000,0x41002b01,0x2802020,0x962460,0x41002c00,0x4000000,0x200000,0x41002c00,0x7c00120,0x220405,0x41003000,0x24000000,0x200000,0x41003700,0x24000000,0x4200000, +0x41003700,0x24000000,0x14200000,0x41003700,0x24000000,0x14e00000,0x41005d00,0x7c00120,0x220405,0x41006600,0x24000020,0x200000,0x41006600,0x24000020,0x810000,0x41006600, +0x24000020,0x1410000,0x41007208,0x7c00100,0x22040f,0x41007219,0x7c00100,0x220400,0x41007300,0x24000000,0x200000,0x41007e0e,0x2802000,0x962460,0x41007e0e,0x4000000, +0x200000,0x41007f0e,0x4000000,0x200000,0x41007f0e,0x7c00100,0x230400,0x41008002,0x7c00100,0x230400,0x41008137,0x2802100,0x962460,0x41008137,0x4000000,0x200000, +0x41008137,0x6800100,0x962540,0x41008137,0x7c00100,0x230400,0x41008301,0x2802000,0x962460,0x41008407,0x4000000,0x200000,0x41008407,0x4000000,0x400000,0x41008407, +0x4000000,0xb00000,0x41008407,0x7c00100,0x220400,0x41008407,0x7c00100,0x250400,0x4100850b,0x7c00100,0x230400,0x4100860b,0x4000000,0x200000,0x4100860b,0x7c00100, +0x230400,0x4100870c,0x7c00100,0x220400,0x41008838,0x7c00100,0x220400,0x41008838,0x7c00100,0x250400,0x41008939,0x2802000,0x962460,0x41008939,0x2802100,0x962460, +0x41008939,0x2806000,0x962460,0x41008939,0x4000000,0x200000,0x41008939,0x4000000,0x400000,0x41008939,0x7c00100,0x230400,0x41008939,0xc000010,0x448000,0x41008a00, +0x4000400,0x200400,0x41008b3b,0x4000000,0x1800000,0x41008b3b,0x6800000,0x1329800,0x41008b3b,0x7c00100,0x1830000,0x41008b3b,0x7e00100,0x1830000,0x41008c3d,0x4000010, +0x400000,0x41008c3d,0x7c00100,0x230400,0x41008d0e,0x7c00100,0x22040f,0x41008d19,0x7c00100,0x220400,0x41008d19,0x7c00100,0x22040f,0x41008e00,0x24000000,0x200000, +0x41008e00,0x24000000,0x400000,0x41008e00,0x24000000,0x1710000,0x41008e00,0x24000006,0x400000,0x41008f3a,0x2802100,0x962460,0x41008f3a,0x2806000,0x962460,0x41008f3a, +0x4000000,0x200000,0x41008f3a,0x6800100,0x962540,0x41008f3a,0x7c00100,0x230400,0x4100903c,0x7c00100,0x230400,0x4100903c,0x7c00100,0x23040f,0x41020701,0x2802000, +0x962460,0x41020701,0x2802000,0xc62460,0x410a3700,0x24000000,0x34200000,0x410a3700,0x24000000,0x34e00000,0x410a4412,0x4000000,0xe00003,0x410a4711,0x7c40300,0xe30000, +0x410a4f11,0x7c00300,0xe30001,0x410a9100,0x4000000,0x800010,0x410a9100,0x4000000,0x810010,0x410a9100,0x4000000,0x870010,0x410a9100,0x4000000,0xb00010,0x410a9100, +0x4000000,0xf00010,0x410a9100,0x4000000,0x1001410,0x410a9100,0x4000000,0x1071010,0x410a9100,0x4000000,0x1071410,0x410a9100,0x4000000,0x1410010,0x41408ac5,0x4000400, +0x200000,0x414a82bf,0x4000000,0xe00000,0x41808300,0x2802000,0x962460,0x41c0147e,0x6800000,0x1329800,0x50000419,0x7c00100,0x220400,0x50000419,0x7c00100,0x250400, +0x5000080e,0x7c00100,0x220400,0x50000908,0x7c00100,0x220400,0x50000908,0x7c00100,0x250400,0x50000b13,0x2802500,0x962460,0x50000f0a,0x7c00100,0x230400,0x50001615, +0x2802100,0x962460,0x50001615,0x7c00100,0x230400,0x50002b01,0x2802020,0x962460,0x50002c00,0x4000000,0x200000,0x50002c19,0x7c00100,0x220400,0x50002d19,0x7c00100, +0x220400,0x50003000,0x24000000,0x200000,0x50003000,0x24000020,0x200000,0x50003700,0x24000000,0x4200000,0x50005d00,0x7c00120,0x220405,0x50005d00,0x7c00120,0x250405, +0x50006108,0x7c00100,0x220400,0x50006108,0x7c00100,0x250400,0x50006600,0x24000020,0x200000,0x50007300,0x24000000,0x200000,0x50008301,0x2802400,0x962460,0x50008a00, +0x7c00500,0x230400,0x50009257,0x2802400,0x962460,0x50009257,0x4000000,0x200000,0x50009257,0x4000010,0x1071400,0x50009257,0x6800000,0x1329800,0x50009257,0x7c00100, +0x230400,0x50009257,0x7c00500,0x230400,0x50009257,0x7c00900,0x230400,0x50009257,0xc000010,0xb48000,0x5000933e,0x2802100,0x962460,0x5000933e,0x2802400,0x962460, +0x5000933e,0x4000000,0x200000,0x5000933e,0x4000000,0x400000,0x5000933e,0x4000010,0x400000,0x5000933e,0x6800000,0x1329800,0x5000933e,0x6800100,0x962540,0x5000933e, +0x6800100,0x962541,0x5000933e,0x6804400,0x962540,0x5000933e,0x7c00100,0x230400,0x5000933e,0x7c00100,0x230401,0x5000933e,0xc000010,0x448000,0x50009419,0x7c00100, +0x220400,0x50009419,0x7c00100,0x250400,0x50009500,0x4000400,0x200400,0x5000965a,0x4000000,0x500000,0x5000965a,0x7c00100,0x230400,0x5000965a,0xc000010,0xb48000, +0x5000975b,0x4000000,0x200000,0x5000975b,0x4000010,0x400000,0x5000975b,0x7c00100,0x230400,0x50009865,0x7c00100,0x230400,0x50009965,0x4000010,0x400000,0x50009965, +0x7c00100,0x230400,0x50409abf,0x4000000,0x200000,0x5100080e,0x7c00100,0x220400,0x5100080e,0x7c00100,0x250400,0x51000c02,0x2802100,0x962460,0x51000c02,0x4000000, +0x1500000,0x51000c02,0x4000020,0x200000,0x51000c02,0x7c00100,0x230400,0x51000f0a,0x7c00100,0x230400,0x51000f0a,0x7c00500,0x230400,0x51001110,0x2802100,0x962460, +0x5100131f,0x2802100,0x962460,0x51001423,0x7c00100,0x230400,0x51001524,0x2802100,0x962460,0x51001524,0x4000000,0x200000,0x51001524,0x7c00100,0x230400,0x5100171a, +0x2802100,0x962460,0x5100171a,0x4000000,0x200000,0x5100171a,0x4000000,0x1500000,0x5100171a,0x7c00100,0x230400,0x51001b27,0x4000000,0x200000,0x51001b27,0x4000000, +0x400000,0x51001b27,0x4000000,0x500000,0x51001b27,0x7c00100,0x230400,0x51001c1c,0x2802100,0x1862460,0x51001c1c,0x2802500,0x1862460,0x51001c1c,0x2806400,0x1862460, +0x51001c1c,0x4000000,0x1800000,0x51001c1c,0x6800000,0x1329800,0x51001c1c,0x6800100,0x1862400,0x51001c1c,0x6800100,0x1862540,0x51001c1c,0x6800500,0x1862400,0x51001c1c, +0x7c00100,0x1830000,0x5100251b,0x7c00100,0x230400,0x51002619,0x7c00100,0x220400,0x51002619,0x7c00100,0x250400,0x51002800,0x80020,0x218820,0x51002c00,0x4000000, +0x200000,0x51002d19,0x7c00100,0x230400,0x51003700,0x24000000,0x4200000,0x51003700,0x24000000,0x4e00000,0x51005201,0x2802400,0x962460,0x51005c00,0x4000000,0x200000, +0x51006108,0x7c00100,0x220400,0x51006108,0x7c00100,0x250400,0x51006600,0x24000020,0x200000,0x51006600,0x24000020,0x810000,0x51006600,0x24000020,0x1410000,0x51007300, +0x24000000,0x200000,0x51007300,0x24000020,0x200000,0x51008002,0x7c00100,0x230400,0x51008301,0x2802000,0x962460,0x51008301,0x2802400,0x962460,0x51008a00,0x7c00500, +0x230400,0x51008e00,0x24000000,0x200000,0x51008e00,0x24000000,0x400000,0x51008e00,0x24000000,0x810000,0x51008e00,0x24000000,0x1400000,0x51008e00,0x24000000,0x1410000, +0x51008e00,0x24000000,0x1710000,0x51008e00,0x24000002,0x200000,0x51008e00,0x24000500,0x230400,0x51008e00,0x2c000010,0xb48000,0x51009419,0x7c00100,0x220400,0x51009419, +0x7c00100,0x22040e,0x51009419,0x7c00100,0x22040f,0x51009419,0x7c00100,0x250400,0x51009500,0x4000400,0x200400,0x51009500,0x7c00500,0x230400,0x51009519,0x7c00100, +0x220400,0x51009519,0x7c00100,0x22040f,0x51009519,0x7c00100,0x230400,0x51009519,0x7c00100,0x250400,0x51009b71,0x2802100,0x962460,0x51009b71,0x6800000,0x1329800, +0x51009b71,0x6800100,0x962540,0x51009b71,0x6804400,0x962540,0x51009b71,0x7c00100,0x230400,0x51009c52,0x2802100,0x962460,0x51009c52,0x2802400,0x962460,0x51009c52, +0x2802d00,0x962460,0x51009c52,0x4000010,0x400000,0x51009c52,0x6800000,0x1329800,0x51009c52,0x6800100,0x962540,0x51009c52,0x7c00100,0x230400,0x51009c52,0xc000010, +0x448000,0x51009d6d,0x6800000,0x1329800,0x51009d6d,0x7c00100,0x230400,0x51009d6d,0x7c00500,0x230400,0x51009d6d,0x7c00d00,0x230400,0x51009d6d,0xc000010,0x448000, +0x51009e08,0x2802100,0x962460,0x51009f63,0x4000010,0x400000,0x51009f63,0x6800000,0x1329800,0x51009f63,0x7c00100,0x230400,0x51009f63,0x7c00900,0x230400,0x51009f63, +0xc000010,0x448000,0x51009f63,0xc000010,0xb48000,0x5100a008,0x2000,0x962460,0x5100a008,0x2802400,0x962460,0x5100a008,0x4000000,0x200000,0x5100a008,0x7c00100, +0x220400,0x5100a008,0x7c00100,0x230400,0x5100a008,0x7c00100,0x250400,0x5100a008,0x7c00500,0x230400,0x5100a16f,0x2806400,0x962460,0x5100a16f,0x6800000,0x1329800, +0x5100a16f,0x6800100,0x962540,0x5100a16f,0x7c00100,0x230400,0x5100a16f,0xc000010,0x448000,0x5100a24f,0x2802100,0x962460,0x5100a24f,0x2802400,0x962460,0x5100a24f, +0x6800000,0x1329800,0x5100a24f,0x7c00100,0x230400,0x5100a24f,0xc000010,0x448000,0x5100a36e,0x2802100,0x962460,0x5100a36e,0x4000000,0x200000,0x5100a36e,0x6800100, +0x962540,0x5100a36e,0x6804400,0x962540,0x5100a36e,0x7c00100,0x230400,0x5100a442,0x2802100,0x962460,0x5100a442,0x4000000,0x200000,0x5100a442,0x6800000,0x1329800, +0x5100a442,0x6800100,0x962540,0x5100a442,0x7c00100,0x230400,0x5100a442,0xc000010,0x448000,0x5100a500,0x4000000,0x200000,0x5100a600,0x4000000,0x200000,0x5100a601, +0x2802000,0x962460,0x5100a76b,0x7c00100,0x230400,0x5100a868,0x7c00100,0x230400,0x5100a96c,0x4000000,0x200000,0x5100a96c,0x7c00100,0x230400,0x5100aa00,0x4000000, +0x4e00000,0x5100ab00,0x4000000,0x4e00000,0x51086600,0x24000020,0x810000,0x51086600,0x24000020,0x1410000,0x510a4005,0x7c00100,0xe30400,0x510a4711,0x7c40300,0xe30000, +0x510a7300,0x24000000,0x34200000,0x510aaa00,0x4000000,0x34e00000,0x5140a2f3,0x4000400,0x400000,0x514a82bf,0x4000000,0xe00000,0x51802bb1,0x2802000,0x962460,0x51c00908, +0x2802400,0x962460,0x51c0a008,0x2802400,0x962460,0x52000f0a,0x2802100,0x962460,0x52000f0a,0x6800100,0x962540,0x52000f0a,0x7c00100,0x230400,0x52001004,0x4000000, +0x1600000,0x52001b00,0x4000000,0x200000,0x52001c1c,0x2802100,0x1862460,0x52001c1c,0x6800100,0x1862400,0x52001c1c,0x6800500,0x1862400,0x52001e12,0x7c00100,0x2230500, +0x52001e12,0x7c00100,0x2330520,0x52002128,0x4000002,0x400000,0x52002128,0x7c00100,0x230400,0x52002a00,0x4000000,0x1500000,0x52002a00,0x4000000,0x1600000,0x52002d00, +0x4000000,0x200006,0x52003000,0x24000000,0x200000,0x52006108,0x7c00100,0x220400,0x52006108,0x7c00100,0x250400,0x52008301,0x2802400,0x962460,0x52008407,0x2802400, +0x962460,0x52008407,0x7c00100,0x220400,0x52008407,0x7c00100,0x250400,0x52008b3b,0x6800000,0x1800000,0x52008b3b,0x7c00100,0x1830000,0x52008e00,0x24000000,0x400000, +0x52009419,0x7c00100,0x250400,0x5200975b,0x4000000,0x200000,0x5200ac7e,0x2802000,0x962460,0x5200ac7e,0x2802100,0x962460,0x5200ac7e,0x2802400,0x962460,0x5200ac7e, +0x4000010,0x200000,0x5200ac7e,0x7c00100,0x230400,0x5200ac7e,0xc000010,0x248000,0x5200ad28,0x7c00100,0x230400,0x5200ae6a,0x2802100,0x1862460,0x5200ae6a,0x2802400, +0x962460,0x5200ae6a,0x2802400,0x1862460,0x5200ae6a,0x2806000,0x1862460,0x5200ae6a,0x4000000,0x1800000,0x5200ae6a,0x6800000,0x1329800,0x5200ae6a,0x6800100,0x1862400, +0x5200ae6a,0x6800100,0x1862540,0x5200ae6a,0x7c00100,0x1830000,0x5200ae6a,0x7c00900,0x1830000,0x5200ae6a,0xc000010,0x1848000,0x5200b083,0x4000010,0x400000,0x5200b083, +0x7c00100,0x230400,0x5200b083,0xc000010,0x448000,0x5200b182,0x2802400,0x962460,0x5200b182,0x4000000,0x200000,0x5200b182,0x4000010,0x400000,0x5200b182,0x7c00100, +0x230400,0x5200b182,0xc000010,0x448000,0x5200b30a,0x2802400,0x962460,0x5200b30a,0x4000000,0x200000,0x5200b30a,0x7c00100,0x230400,0x5200b54e,0x2802100,0x962460, +0x5200b54e,0x2802400,0x962460,0x5200b54e,0x4000000,0x200000,0x5200b54e,0x4000010,0x400000,0x5200b54e,0x6800000,0x1329800,0x5200b54e,0x6800100,0x962540,0x5200b54e, +0x6804400,0x962540,0x5200b54e,0x7c00100,0x230400,0x5200b54e,0xc000010,0x448000,0x5200b61c,0x4000000,0x1800000,0x5200b61c,0x6800500,0x1862400,0x5200b61c,0x7c00100, +0x1830000,0x5200b61c,0x7c00900,0x1830000,0x5200b77f,0x2802100,0x1862460,0x5200b77f,0x2802400,0x1862460,0x5200b77f,0x4000000,0x1800000,0x5200b77f,0x4000010,0x1800000, +0x5200b77f,0x7c00100,0x1830000,0x5200b77f,0x7c00500,0x1830000,0x5200b77f,0x7c00900,0x1830000,0x5200b77f,0x7e00100,0x1830000,0x5200b873,0x2802100,0x962460,0x5200b873, +0x2806400,0x962460,0x5200b873,0x6800000,0x1329800,0x5200b873,0x6800100,0x962540,0x5200b873,0x6800400,0x962540,0x5200b873,0x7c00100,0x230400,0x5200b873,0xc000010, +0x448000,0x5200b912,0x7c00100,0x2230500,0x5200b912,0x7c00100,0x2330520,0x5200ba74,0x4000000,0x200000,0x5200ba74,0x4000010,0x400000,0x5200ba74,0x7c00100,0x230400, +0x5200bb85,0x4000000,0x200000,0x5200bb85,0x7c00100,0x230400,0x5200bc75,0x4000000,0x400000,0x5200bc75,0x4000010,0x400000,0x5200bc75,0x7c00100,0x230400,0x5200bd7d, +0x4000000,0x200000,0x5200bd7d,0x7c00100,0x230400,0x5200be7a,0x4000000,0x200000,0x5200be7a,0x7c00100,0x230400,0x5200bf58,0x7c00100,0x230400,0x5200c002,0x4000000, +0x200000,0x5200c178,0x2802000,0x962460,0x5200c178,0x2802100,0x962460,0x5200c178,0x2802400,0x962460,0x5200c178,0x2806400,0x962460,0x5200c178,0x4000000,0x200000, +0x5200c178,0x6800100,0x962540,0x5200c178,0x7c00100,0x230400,0x5200c178,0x7c00100,0x230401,0x5200c178,0xc000010,0x448000,0x5200c178,0x80000000,0x218960,0x5200c247, +0x7c00100,0x230400,0x5200c247,0x7c00100,0x830400,0x5200c247,0x7c00100,0x1430400,0x5200c300,0x4000000,0x200003,0x52022d00,0x4000000,0x100006,0x52023700,0x24000000, +0x4100000,0x52023700,0x24000000,0x4e00000,0x52023700,0x24000000,0x14100000,0x52023700,0x24000000,0x14e00000,0x52023700,0x24000000,0x96800000,0x52024400,0x4000000,0x100000, +0x52027300,0x24000000,0x100000,0x5202c300,0x4000000,0x100000,0x5202c300,0x4000000,0x100002,0x5202c300,0x4000000,0x100003,0x5202c300,0x4000000,0x10000d,0x5202c300, +0x4000100,0x150400,0x5202c300,0x4000100,0x15040d,0x5202c300,0x4000100,0x14150400,0x520a1e12,0x7c00100,0x2130480,0x520a3700,0x24000000,0x34e00000,0x520a3800,0x24000000, +0x34100000,0x520a4711,0x7c40300,0xe30000,0x520a4f11,0x7c00300,0xe30001,0x520a7300,0x24000000,0x34100000,0x520ab412,0x7c00100,0x2130480,0x520ac400,0x4000000,0xe00002, +0x520ac400,0x4000000,0xe0000d,0x520ac400,0x4000000,0x34e0000d,0x520ac414,0x4000000,0xe0000d,0x520ac511,0x7c40300,0xe30000,0x5240af91,0x7c00100,0x230400,0x5240af96, +0x4000400,0x200000,0x5240af98,0x6800400,0x962540,0x5240af98,0x7c00100,0x230400,0x5240afa2,0x7c00100,0x230400,0x5240afa4,0x7c00100,0x230400,0x5240b2c7,0x4000000, +0x200000,0x5240b2c7,0x4000000,0x1500000,0x5240b2d2,0x4000000,0x200000,0x5240b2e0,0x4000000,0x200000,0x5240b5f6,0x7c00900,0x230400,0x524a44bf,0x4000000,0xe00003, +0x5280af91,0x2802400,0x962460,0x5280af92,0x2802400,0x962460,0x5280af98,0x2802400,0x962460,0x5280af9a,0x2802400,0x962460,0x5280af9c,0x2802400,0x962460,0x52c0b3ed, +0x2802400,0x962460,0x52c0b3f1,0x7c00100,0x230400,0x60000c02,0x2802100,0x962460,0x60000c02,0x7c00100,0x230400,0x60000f0a,0x2802100,0x962460,0x60000f0a,0x6800100, +0x962540,0x60000f0a,0x7c00100,0x230400,0x6000131f,0x4000000,0x200000,0x6000171a,0x7c00100,0x230400,0x6000171a,0x7c00100,0x230560,0x60001b27,0x2802100,0x962460, +0x60001b27,0x4000000,0xc00000,0x60001b27,0x7c00100,0x230400,0x60001f0b,0x2802400,0x962460,0x60002919,0x7c00100,0x22040e,0x60002a00,0x4000000,0x1600000,0x60003000, +0x24000000,0x14200000,0x60003000,0x24000000,0x14e00000,0x60003700,0x24000000,0x4200000,0x60003800,0x24000000,0x1710000,0x60005102,0x4000000,0x200000,0x60006108,0x7c00100, +0x220400,0x60006108,0x7c00100,0x250400,0x60006600,0x24000020,0x200000,0x60008301,0x2802000,0x962460,0x6000903c,0x2806000,0x962460,0x6000903c,0x4000000,0x400000, +0x60009519,0x7c00100,0x220400,0x60009519,0x7c00100,0x250400,0x6000a008,0x7c00100,0x220400,0x6000a008,0x7c00100,0x250400,0x6000c300,0x4000000,0x3a703580,0x6000c654, +0x2802000,0x962460,0x6000c654,0x4000010,0x200000,0x6000c654,0x7c00100,0x230400,0x6000c73f,0x2802000,0x962460,0x6000c73f,0x2802100,0x962460,0x6000c73f,0x4000000, +0x200000,0x6000c73f,0x6800100,0x962540,0x6000c73f,0x6804000,0x962540,0x6000c73f,0x7c00100,0x230400,0x6000c80b,0x7c00100,0x230400,0x6000c941,0x2802100,0x962460, +0x6000c941,0x2806000,0x962460,0x6000c941,0x4000000,0x200000,0x6000c941,0x4000010,0x200000,0x6000c941,0x6800000,0x1329800,0x6000c941,0x6800100,0x962540,0x6000c941, +0x7c00100,0x230400,0x6000c941,0xc000010,0x448000,0x6000ca82,0x7c00100,0x230400,0x6000cc00,0x4000000,0x4e00000,0x6000d000,0x4000000,0x200000,0x6002c300,0x4000000, +0x100000,0x6002c300,0x4000000,0x10000d,0x6002c300,0x4000100,0x150400,0x6002c300,0x4000100,0x15040d,0x6002c300,0x4000100,0x14150400,0x600a3000,0x24000000,0x34200000, +0x600a3000,0x24000000,0x34e00000,0x600a3700,0x24000000,0x34200000,0x600a3800,0x24000000,0x34200000,0x600a3800,0x24000000,0xb6800000,0x600a4305,0x7c00100,0xe30400,0x600ac300, +0x4000000,0x34100000,0x600ac400,0x4000000,0x14e0000d,0x600ac400,0x4000000,0x34e0000d,0x600acb14,0x7c00100,0xe30000,0x600acb16,0x7c00100,0xe30c00,0x600acc00,0x4000000, +0x34e00000,0x600acd00,0x4000000,0x34200000,0x600acd00,0x4000000,0x34e00000,0x600acd00,0x4000000,0xb6800000,0x600ace00,0x4000000,0x34e00000,0x600ace00,0x4000000,0xb6800000, +0x600acf00,0x4000000,0x34e00000,0x600acf00,0x4000000,0xb6800000,0x600ad111,0x7c40300,0xe30000,0x604ac4bf,0x4000000,0x34e00003,0x61000a03,0x4000000,0x1600000,0x61000c02, +0x80000000,0x218960,0x6100120f,0x4000000,0x200000,0x61001a18,0x7c00100,0x1830000,0x61001d0c,0x7c00100,0x230400,0x61001d0c,0x7c00100,0x250400,0x61006600,0x24000020, +0x200000,0x61008407,0x7c00100,0x220400,0x61008407,0x7c00100,0x250400,0x6100870c,0x7c00100,0x220400,0x61008e00,0x24000000,0x200000,0x61008e00,0x24000000,0x400000, +0x61008e00,0x24000002,0x300000,0x6100903c,0x7c00100,0x230400,0x61009519,0x7c00100,0x220400,0x61009519,0x7c00100,0x250400,0x61009519,0x7c00500,0x22040f,0x61009b71, +0x2802100,0x962460,0x61009b71,0x2806400,0x962460,0x61009b71,0x7c00100,0x230400,0x6100a008,0x2802100,0x962460,0x6100c300,0x4000000,0x20000f,0x6100cd00,0x4000000, +0x200000,0x6100d202,0x2802400,0x962460,0x6100d202,0x2802500,0x962460,0x6100d202,0x7c00100,0x230400,0x6100d302,0x4000020,0x200000,0x6100d302,0x7c00120,0x230405, +0x6100d476,0x2802100,0x962460,0x6100d476,0x2802100,0x962461,0x6100d476,0x2806400,0x962460,0x6100d476,0x4000000,0x400000,0x6100d476,0x6800000,0x1329800,0x6100d476, +0x6800100,0x962540,0x6100d476,0x7c00100,0x230400,0x6100d476,0xc000010,0x448000,0x6100d573,0x2802100,0x962460,0x6100d573,0x2806400,0x962460,0x6100d573,0x6800100, +0x962540,0x6100d573,0x7c00100,0x230400,0x6100d573,0x7c00900,0x230400,0x6100d573,0xc000010,0x448000,0x6100d68d,0x7c00100,0x230400,0x6100d756,0x7c00100,0x230400, +0x6100d85c,0x2802500,0x962460,0x6100d85c,0x6800100,0x962540,0x6100d85c,0x7c00100,0x230400,0x6100d85c,0x7c00500,0x230400,0x6100d997,0x2802100,0x962460,0x6100d997, +0x4000000,0x200000,0x6100d997,0x4000000,0x400000,0x6100d997,0x6800000,0x1329800,0x6100d997,0x6800100,0x962540,0x6100d997,0x6804400,0x962540,0x6100d997,0x7c00100, +0x230400,0x6100d997,0x7c00100,0x230560,0x6100d997,0xc000010,0x448000,0x6100da98,0x6800000,0x1329800,0x6100da98,0x7c00100,0x230400,0x6100db71,0x4000000,0x200000, +0x6100dc99,0x2802100,0x962460,0x6100dc99,0x2802400,0x962460,0x6100dc99,0x6800000,0x1329800,0x6100dc99,0x6800100,0x962540,0x6100dc99,0x6804400,0x962540,0x6100dc99, +0x7c00100,0x230400,0x610a4711,0x7c40300,0xe30000,0x610a4f11,0x7c00300,0xe30001,0x610ace00,0x4000000,0x34e00000,0x6140af96,0x7c00100,0x230400,0x6140af98,0x7c00100, +0x230400,0x6180af93,0x2802400,0x962460,0x62002a00,0x4000000,0x1600000,0x63002800,0x80000,0x918820,0x63c00c14,0x80000,0x918820,0x7000080e,0x7c00100,0x250400, +0x70000a03,0x4000000,0x200000,0x70000c00,0x80000000,0x218960,0x70000f0a,0x7c00100,0x230400,0x70001004,0x7c00100,0x230400,0x70001524,0x2802100,0x962460,0x70001524, +0x7c00100,0x230400,0x70001615,0x2802100,0x962460,0x7000171a,0x2802100,0x962460,0x70001821,0x6800000,0x1329800,0x70002320,0x7c00100,0x230400,0x70002a00,0x4000000, +0x1500000,0x70002a00,0x4000000,0x1600000,0x70003000,0x24000000,0x200000,0x70003000,0x24000000,0x14200000,0x70003800,0x24000000,0x4e00000,0x70005201,0x2802400,0x962460, +0x7000581e,0x7c00100,0x230400,0x70006108,0x7c00100,0x220400,0x70006108,0x7c00100,0x250400,0x70006f30,0x7c00100,0x230400,0x70007300,0x24000000,0x200000,0x70007f0e, +0x4000000,0x200000,0x70008301,0x2802100,0x962460,0x70008301,0x2802400,0x962460,0x70008e00,0x24000000,0x200000,0x70008e00,0x24000000,0x400000,0x70008e00,0x24000002, +0x400000,0x70008e00,0x24000008,0x1410000,0x70008e00,0x24000010,0x400000,0x70008e00,0x2c000010,0x448000,0x70009519,0x7c00100,0x220400,0x70009519,0x7c00100,0x230400, +0x70009519,0x7c00100,0x250400,0x70009865,0x7c00100,0x230400,0x70009965,0x4000010,0x400000,0x70009965,0x7c00100,0x230400,0x7000a008,0x7c00100,0x220400,0x7000a008, +0x7c00100,0x250400,0x7000a008,0x7c00500,0x22040f,0x7000a50e,0x4000000,0x200000,0x7000b61c,0x2802500,0x1862460,0x7000b61c,0x6800500,0x1862400,0x7000b61c,0x7c00100, +0x1830000,0x7000c300,0x4000000,0x100000,0x7000c941,0x2806000,0x962460,0x7000cc00,0x4000000,0x4e00000,0x7000cd00,0x4000000,0x200000,0x7000cd00,0x4000000,0x4200000, +0x7000cd00,0x4000000,0x4e00000,0x7000cd00,0x4000000,0x14200000,0x7000cd00,0x4000000,0x14e00000,0x7000cd00,0x4000000,0x96800000,0x7000cf00,0x4000000,0x4e00000,0x7000cf00, +0x4000000,0x14e00000,0x7000d202,0x2802100,0x962460,0x7000d202,0x7c00100,0x230400,0x7000d997,0x7c00100,0x230400,0x7000d997,0xc000010,0x248000,0x7000dd86,0x2802400, +0x962460,0x7000dd86,0x7c00100,0x230400,0x7000dd86,0xc000010,0x448000,0x7000de9f,0x4000000,0x200000,0x7000de9f,0x7c00100,0x230400,0x7000e001,0x2000,0x962460, +0x7000e001,0x2802400,0x962460,0x7000e187,0x2802000,0x962460,0x7000e187,0x2802100,0x962460,0x7000e187,0x4000000,0x200000,0x7000e187,0x7c00100,0x230400,0x7000e187, +0xc000010,0x448000,0x7000e288,0x7c00100,0x230400,0x7000e300,0x4000000,0x200000,0x7000e489,0x2802100,0x962460,0x7000e489,0x2802400,0x962460,0x7000e489,0x6800100, +0x962540,0x7000e489,0x6800100,0x962541,0x7000e489,0x6804400,0x962540,0x7000e489,0x7c00100,0x230400,0x7000e489,0x7c00900,0x230400,0x7000e59d,0x2802100,0x962460, +0x7000e59d,0x2802400,0x962460,0x7000e59d,0x4000000,0x200000,0x7000e59d,0x4000010,0x200000,0x7000e59d,0x6800100,0x962540,0x7000e59d,0x6804400,0x962540,0x7000e59d, +0x7c00100,0x230400,0x7000e59d,0xc000010,0x448000,0x7000e691,0x2802100,0x962460,0x7000e691,0x2802400,0x962460,0x7000e691,0x2806400,0x962460,0x7000e691,0x6800000, +0x1329800,0x7000e691,0x6800100,0x962540,0x7000e691,0x7c00100,0x230400,0x7000e700,0x4000400,0x200400,0x7000e70e,0x7c00100,0x220400,0x7000e719,0x7c00100,0x220400, +0x7000e719,0x7c00500,0x22040f,0x7000e853,0x7c00100,0x230400,0x7000e9a0,0x2802400,0x962460,0x7000e9a0,0x4000000,0x200000,0x7000e9a0,0x4000000,0x500000,0x7000e9a0, +0x7c00100,0x230400,0x7000ea79,0x2802400,0x962460,0x7000ea79,0x4000000,0x200000,0x7000ea79,0x4000000,0xf00000,0x7000ea79,0x4000010,0x400000,0x7000ea79,0x7c00100, +0x230400,0x7000eb8c,0x2802400,0x962460,0x7000eb8c,0x4000000,0x200000,0x7000eb8c,0x7c00100,0x230400,0x7000eca3,0x2802100,0x962460,0x7000eca3,0x2806400,0x962460, +0x7000eca3,0x4000000,0x200000,0x7000eca3,0x6800000,0x1329800,0x7000eca3,0x6800100,0x962540,0x7000eca3,0x7c00100,0x230400,0x7000eca3,0xc000010,0x448000,0x7000ed95, +0x6800000,0x1329800,0x7000ed95,0x7c00100,0x230400,0x7000ed95,0xc000010,0x448000,0x7000ee1c,0x2802500,0x1862460,0x7000ee1c,0x6800000,0x1329800,0x7000ee1c,0x7c00100, +0x1830000,0x7000ee1c,0x7c00900,0x1830000,0x7000ef8f,0x4000000,0x200000,0x7000ef8f,0x7c00100,0x230400,0x7000f08e,0x4000000,0x200000,0x7000f08e,0x7c00100,0x230400, +0x7000f159,0x2802100,0x962460,0x7000f159,0x7c00100,0x230400,0x7000f200,0x4000000,0x200000,0x7000f200,0x4000000,0x1200000,0x7000f200,0x4000000,0x1710000,0x7000f34b, +0x2802400,0x962460,0x7000f34b,0x4000000,0x200000,0x7000f34b,0x4000010,0x400000,0x7000f34b,0x6800000,0x1329800,0x7000f34b,0x7c00100,0x230400,0x7000f34b,0x7c00900, +0x230400,0x7000f34b,0xc000010,0x448000,0x7000f490,0x4000000,0x200000,0x7000f490,0x7c00100,0x230400,0x7000f5a5,0x7c00100,0x230400,0x7000f67b,0x4000000,0x200000, +0x7000f67b,0x4000010,0x200000,0x7000f67b,0x7c00100,0x230400,0x7000f8a6,0x2802100,0x962460,0x7000f8a6,0x2802400,0x962460,0x7000f8a6,0x2806400,0x962460,0x7000f8a6, +0x4000000,0x500000,0x7000f8a6,0x4000010,0xb00000,0x7000f8a6,0x4000800,0x200000,0x7000f8a6,0x6800100,0x962540,0x7000f8a6,0x6800100,0x962541,0x7000f8a6,0x7c00100, +0x230400,0x7000f8a6,0xc000010,0x448000,0x7000f921,0x4000000,0x200000,0x7000fa00,0x4000000,0x200000,0x7000fb9e,0x2802100,0x962460,0x7000fb9e,0x2802400,0x962460, +0x7000fb9e,0x2806400,0x962460,0x7000fb9e,0x4000000,0x200000,0x7000fb9e,0x6800000,0x1329800,0x7000fb9e,0x6800100,0x962540,0x7000fb9e,0x6800100,0x962541,0x7000fb9e, +0x7c00100,0x230400,0x7000fc92,0x4000000,0x200000,0x7000fc92,0x6800000,0x1329800,0x7000fc92,0x7c00100,0x220400,0x7000fc92,0x7c00100,0x230400,0x7000fc92,0x7c00100, +0x250400,0x700acd00,0x4000000,0x34e00000,0x700acd00,0x4000000,0xb6800000,0x700ace00,0x4000000,0x34e00000,0x700acf00,0x4000000,0x34e00000,0x700acf00,0x4000000,0xb6800000, +0x7050df01,0x4000000,0x200000,0x7050f705,0x80000,0x918820,0x7080af96,0x2802400,0x962460,0x7090df01,0x2802400,0x962460,0x70d0e403,0x2802100,0x962460,0x70d0e403, +0x2802400,0x962460,0x70d0e403,0x6800100,0x962540,0x8000120f,0x7c00100,0x230400,0x80001524,0x7c00100,0x230400,0x8000171a,0x7c00100,0x230400,0x80002006,0x7c00100, +0x220400,0x80002006,0x7c00100,0x250400,0x80002a00,0x4000000,0x1500000,0x80002d00,0x4000000,0x200000,0x80005208,0x2802400,0x962460,0x80005c00,0x4000000,0x200000, +0x80007300,0x24000000,0x200000,0x80009519,0x7c00100,0x220400,0x80009519,0x7c00100,0x230400,0x80009519,0x7c00100,0x250400,0x80009865,0x7c00100,0x230400,0x8000a008, +0x2802100,0x962460,0x8000b30a,0x4000000,0x500000,0x8000b30a,0x7c00100,0x230400,0x8000cd00,0x4000000,0x4e00000,0x8000d202,0x2802500,0x962460,0x8000d202,0x7c00100, +0x230400,0x8000d68d,0x4000000,0x200000,0x8000d997,0x2802000,0x962460,0x8000d997,0x2802400,0x962460,0x8000d997,0x4000000,0x400000,0x8000d997,0x4000000,0x500000, +0x8000d997,0x7c00100,0x230400,0x8000d997,0xc000010,0x448000,0x8000e489,0x2802100,0x962460,0x8000e489,0x7c00100,0x230400,0x8000e719,0x7c00100,0x220400,0x8000f8a6, +0x2802100,0x962460,0x8000f8a6,0x7c00100,0x230400,0x8000f8a6,0xc000010,0x448000,0x8000fda1,0x2802100,0x1862460,0x8000fda1,0x2806400,0x1862460,0x8000fda1,0x4000000, +0x1800000,0x8000fda1,0x6800000,0x1329800,0x8000fda1,0x6800100,0x1862540,0x8000fda1,0x7c00100,0x1830000,0x8000fda1,0xc000010,0x448000,0x8000fe9c,0x7c00100,0x230400, +0x8000fe9c,0x7c00100,0x830400,0x8000fe9c,0x7c00100,0x1430400,0x8000ff06,0x7c00100,0x220400,0x80010165,0x7c00100,0x230400,0x800102a2,0x4000000,0x200000,0x800102a2, +0x7c00100,0x230400,0x800103a4,0x7c00100,0x230400,0x800103a4,0xc000010,0x448000,0x8001044c,0x4000000,0x200000,0x8001044c,0x7c00100,0x220400,0x8001044c,0x7c00100, +0x250400,0x80010670,0x2802000,0x962460,0x80010670,0x4000000,0x200000,0x80010670,0x4000010,0x400000,0x80010670,0xc000010,0x448000,0x800a4711,0x7c40300,0xe30000, +0x800acd00,0x4000000,0x34e00000,0x800acd00,0x4000000,0x7a902460,0x800ace00,0x4000000,0x34e00000,0x800acf00,0x4000000,0x34e00000,0x800b0011,0x7c40300,0xe30000,0x800b0500, +0x4000000,0x34e00000,0x800b0500,0x4000000,0xb6800000,0x90001615,0x7c00100,0x230400,0x9000171a,0x4000000,0x200000,0x9000171a,0x7c00100,0x230400,0x90003000,0x24000000, +0x200000,0x90007f0e,0x4000000,0x200000,0x90008301,0x2802000,0x962460,0x90008e00,0x24000000,0x400000,0x90009519,0x7c00100,0x250400,0x9000a16f,0x2802100,0x962460, +0x9000d200,0x80000000,0x218960,0x9000d202,0x2802000,0x962460,0x9000d202,0x2802100,0x962460,0x9000d202,0x7c00100,0x230400,0x9000e59d,0x2802100,0x962460,0x90010500, +0x4000000,0xe00000,0x900107a7,0x2802100,0x962460,0x900107a7,0x2802400,0x962460,0x900107a7,0x2802c00,0x962460,0x900107a7,0x4000000,0x1400000,0x900107a7,0x6800000, +0x1329800,0x900107a7,0x7c00100,0x220400,0x900107a7,0x7c00100,0x250400,0x900108a8,0x2802100,0x962460,0x900108a8,0x2806400,0x962460,0x900108a8,0x4000000,0x200000, +0x900108a8,0x4000000,0x400000,0x900108a8,0x4000010,0x400000,0x900108a8,0x6800000,0x1329800,0x900108a8,0x6800100,0x962540,0x900108a8,0x7c00100,0x230400,0x900108a8, +0xc000010,0x448000,0x90010908,0x7c00100,0x220400,0x90010a38,0x2802100,0x962460,0x90010ca9,0x2802100,0x962460,0x90010ca9,0x4000000,0x500000,0x90010ca9,0x4000010, +0xb00000,0x90010ca9,0x6800100,0x962540,0x90010ca9,0x7c00100,0x230400,0x90010d1b,0x4000000,0x500000,0x90010eaa,0x2802100,0x962460,0x90010eaa,0x2802400,0x962460, +0x90010eaa,0x2806400,0x962460,0x90010eaa,0x4000000,0x200000,0x90010eaa,0x4000000,0x400000,0x90010eaa,0x4000010,0x400000,0x90010eaa,0x6800000,0x1329800,0x90010eaa, +0x6800100,0x962540,0x90010eaa,0x7c00100,0x230400,0x90010eaa,0xc000010,0x448000,0x90010fab,0x7c00100,0x220400,0x90010fab,0x7c00100,0x250400,0x9002c300,0x4000000, +0x100000,0x900ac400,0x4000000,0xe0000d,0x900acd00,0x4000000,0x34e00000,0x900acd00,0x4000000,0xb6800000,0x900acf00,0x4000000,0x34e00000,0x900b0500,0x4000000,0x34e00000, +0x900b0500,0x4000000,0xb6800000,0x900b0b9a,0x7c00900,0x1230400,0x900b109a,0x7c00300,0xe30000,0x900b119a,0x7c00300,0xe30000,0x90408e06,0x24000000,0x400000,0xa0001004, +0x4000000,0x200000,0xa0001004,0x7c00100,0x230400,0xa000120f,0x2802100,0x962460,0xa000120f,0x2802400,0x962460,0xa000171a,0x2802100,0x962460,0xa000171a,0x2806400, +0x962460,0xa0002a00,0x4000000,0x1600000,0xa0003000,0x24000000,0x200000,0xa000581e,0x7c00100,0x230400,0xa0007300,0x24000000,0x200000,0xa0008301,0x2802400,0x962460, +0xa0008e00,0x24000000,0x400000,0xa000cf00,0x4000000,0x4e00000,0xa0010500,0x4000000,0x200000,0xa00114af,0x2802100,0x962460,0xa00114af,0x2802400,0x962460,0xa00114af, +0x2806400,0x962460,0xa00114af,0x6800000,0x1329800,0xa00114af,0x7c00100,0x230400,0xa00114af,0x7c00100,0x230560,0xa00116b0,0x2802100,0x962460,0xa00116b0,0x2802800, +0x962460,0xa00116b0,0x2806400,0x962460,0xa00116b0,0x4000000,0x400000,0xa00116b0,0x4000000,0x500000,0xa00116b0,0x4000010,0x400000,0xa00116b0,0x6800100,0x962540, +0xa00116b0,0x7c00100,0x230400,0xa00116b0,0x7c00100,0x230560,0xa00116b0,0xc000010,0x448000,0xa0011722,0x7c00100,0x230400,0xa00118b1,0x2802000,0x962460,0xa00118b1, +0x2802100,0x962460,0xa00118b1,0x2806400,0x962460,0xa00118b1,0x4000000,0x200000,0xa00118b1,0x4000000,0x400000,0xa00118b1,0x4000000,0x500000,0xa00118b1,0x6800100, +0x962540,0xa00118b1,0x7c00100,0x230400,0xa00118b1,0x7c00100,0x230560,0xa00118b1,0xc000010,0x448000,0xa00a4005,0x7c00100,0xe30400,0xa00a4711,0x7c40300,0xe30000, +0xa00ac400,0x4000000,0x4e00000,0xa00acb14,0x7c00100,0xe30000,0xa00acf00,0x4000000,0x34e00000,0xa00b0500,0x4000000,0x34e00000,0xa00b0500,0x4000000,0xb6800000,0xa00b0b96, +0x7c00900,0x1230400,0xa00b1211,0x7c40300,0xe30000,0xa00b1314,0x7c00100,0xe30000,0xa00b1596,0x7c00300,0xe30000,0xa040afac,0x6800400,0x962540,0xa08083ad,0x2802400, +0x962460,0xb0000a03,0x7c00100,0x220400,0xb0000b13,0x7c00100,0x2633800,0xb0001004,0x2802000,0x962460,0xb0001110,0x4000000,0x200000,0xb0001524,0x2802000,0x962460, +0xb0001615,0x4000000,0x500000,0xb000251b,0x7c00100,0x230400,0xb0007300,0x24000000,0x200000,0xb0008939,0x4000000,0x200000,0xb0008939,0x7c00100,0x230400,0xb0008e00, +0x24000000,0x200000,0xb0008e00,0x24000000,0x400000,0xb0008e00,0x24000010,0x400000,0xb0009257,0x2802000,0x962460,0xb0009257,0x4000000,0x1600000,0xb0009519,0x7c00100, +0x220400,0xb0009519,0x7c00100,0x250400,0xb0009a00,0x4000000,0x200000,0xb000b30a,0x2802100,0x962460,0xb000b30a,0x7c00100,0x230400,0xb000c178,0x80000000,0x218960, +0xb000c300,0x4000000,0x4200000,0xb000d202,0x2802000,0x962460,0xb000d476,0x6800100,0x962540,0xb000d476,0x7c00100,0x230400,0xb000e300,0x4000000,0x4e00000,0xb000fda1, +0x7c00100,0x1830000,0xb0010eaa,0x2802000,0x962460,0xb00116b0,0x7c00100,0x230400,0xb0011900,0x4000000,0x4e00000,0xb0011ab2,0x2802100,0x962460,0xb0011ab2,0x2802400, +0x962460,0xb0011ab2,0x2806400,0x962460,0xb0011ab2,0x4000000,0x200000,0xb0011ab2,0x6800100,0x962540,0xb0011ab2,0x7c00100,0x230400,0xb0011b0c,0x7c00100,0x230400, +0xb0011cb3,0x2802100,0x962460,0xb0011cb3,0x2806400,0x962460,0xb0011cb3,0x6800000,0x1329800,0xb0011cb3,0x6800100,0x962540,0xb0011cb3,0x7c00100,0x230400,0xb0011db6, +0x2802500,0x962460,0xb0011db6,0x6800000,0x1329800,0xb0011db6,0x7c00100,0x230400,0xb0011db6,0x7c00500,0x230400,0xb0011e00,0x4000000,0x200000,0xb0011e00,0x4000000, +0x1500000,0xb0011fb4,0x2802100,0x962460,0xb0011fb4,0x6800100,0x962540,0xb0011fb4,0x7c00100,0x230400,0xb0011fb4,0xc000010,0x248000,0xb0012000,0x4000000,0x200000, +0xb00121b5,0x4000000,0x200000,0xb00121b5,0x4000010,0x400000,0xb00121b5,0x7c00100,0x220400,0xb00121b5,0x7c00100,0x250400,0xb00121b5,0xc000010,0x448000,0xb00122b8, +0x4000000,0x200000,0xb00122b8,0x7c00100,0x230400,0xb00123b7,0x2802400,0x962460,0xb00123b7,0x4000000,0x200000,0xb00123b7,0x7c00100,0x230400,0xb00123b7,0xc000010, +0x248000,0xb00a4005,0x7c00100,0xe30400,0xb00a4711,0x7c40300,0xe30000,0xb00acf00,0x4000000,0x34e00000,0xb00b0500,0x4000000,0x34e00000,0xb00b0500,0x4000000,0x3ce00000, +0xb00b0500,0x4000000,0xb6800000,0xb00b109a,0x7c00300,0xe30000,0xb080e47c,0x2802000,0x962460,0xc0001524,0x4000000,0x500000,0xc0001a18,0x2806400,0x1862460,0xc0001a18, +0x7c00100,0x1830000,0xc0007300,0x24000000,0x200000,0xc0008e00,0x24000010,0x400000,0xc0009519,0x7c00100,0x220400,0xc0009519,0x7c00100,0x250400,0xc000c300,0x4000000, +0x420000f,0xc000d85c,0x2802100,0x962460,0xc000d85c,0x6800100,0x962540,0xc000d85c,0x7c00100,0x230400,0xc000dc99,0x7c00100,0x230400,0xc000e719,0x7c00100,0x220400, +0xc00107a7,0x7c00100,0x230400,0xc0010eaa,0x7c00100,0x230400,0xc00116b0,0x7c00100,0x230560,0xc0011900,0x4000000,0x4200000,0xc0012447,0,0x818820,0xc0012447, +0,0xc18820,0xc0012447,0,0x1418820,0xc00125b9,0x7c00100,0x230400,0xc00126bb,0x2802100,0x962460,0xc00126bb,0x2806400,0x962460,0xc00126bb,0x4000000, +0x500000,0xc00126bb,0x6800100,0x962540,0xc00126bb,0x7c00100,0x230400,0xc00127ba,0x2802400,0x962460,0xc00127ba,0x4000000,0x200000,0xc00127ba,0x6800000,0x1329800, +0xc00127ba,0x7c00100,0x230400,0xc00127ba,0x7c00900,0x230400,0xc0012800,0x4000000,0x200000,0xc0012b23,0x4000000,0x200000,0xc0012b23,0x4000000,0x400000,0xc0012b23, +0x4000000,0x1500000,0xc0012cbc,0x2802400,0x962460,0xc0012cbc,0x4000000,0x1600000,0xc0012cbc,0x6800000,0x1329800,0xc0012cbc,0x7c00100,0x230400,0xc00acf00,0x4000000, +0x34e00000,0xc00ae300,0x4000000,0x34e00000,0xc00b0500,0x4000000,0x34e00000,0xc00b0500,0x4000000,0xb6800000,0xc00b0b00,0x4000000,0x1200000,0xc00b0b00,0x7c00900,0x1230400, +0xc00b109a,0x7c00300,0xe30000,0xc00b2914,0x7c00100,0x2530000,0xc00b2916,0x7c00100,0x2530c00,0xc00b2a00,0x4000000,0x34e00000,0xc040af53,0x7c00100,0x230400,0xc0c12b7e, +0x4000000,0x200000,0xc14a44bf,0x4000000,0xe0000d,0xd000131f,0x2802c00,0x962460,0xd000171a,0x7c00100,0x230400,0xd0001821,0x2802100,0x962460,0xd0007300,0x24000000, +0x200000,0xd0008e00,0x24000000,0x200000,0xd0008f3a,0x2806000,0x962460,0xd0009519,0x7c00100,0x220400,0xd0009519,0x7c00100,0x250400,0xd000a500,0x4000000,0x200000, +0xd000c300,0x4000000,0x4e00000,0xd000d202,0x7c00100,0x230400,0xd000d476,0x7c00100,0x230400,0xd000d997,0x2802100,0x962460,0xd000d997,0x6800100,0x962540,0xd000e001, +0x2802100,0x962460,0xd000e700,0x4000400,0x200000,0xd000e719,0x7c00100,0x220400,0xd000e719,0x7c00500,0x23040f,0xd000fa00,0x4000000,0x4e00000,0xd0010eaa,0x4000010, +0x400000,0xd0010eaa,0x7c00100,0x230400,0xd0012dbd,0x4000000,0x200000,0xd0012dbd,0x7c00100,0x230400,0xd0012fbe,0x2802100,0x962460,0xd0012fbe,0x2802400,0x962460, +0xd0012fbe,0x2806400,0x962460,0xd0012fbe,0x4000000,0x400000,0xd0012fbe,0x6800000,0x1329800,0xd0012fbe,0x6800100,0x962540,0xd0012fbe,0x6800100,0x962541,0xd0012fbe, +0x6804400,0x962540,0xd0012fbe,0x7c00100,0x230400,0xd0012fbe,0x7c00100,0x230560,0xd0012fbe,0xc000010,0x448000,0xd0013183,0x7c00100,0x230400,0xd0013200,0x4000000, +0x200000,0xd0013200,0x6800000,0x1329805,0xd00134c0,0x2802100,0x962460,0xd00134c0,0x4000002,0x400000,0xd00134c0,0x7c00100,0x230400,0xd00a4305,0x7c00100,0xe30400, +0xd00a4611,0x7c40300,0xe30000,0xd00a4711,0x7c40300,0xe30000,0xd00a5e11,0x7c40300,0xe30000,0xd00acf00,0x4000000,0x34e00000,0xd00b0500,0x4000000,0x34e00000,0xd00b0500, +0x4000000,0xb6800000,0xd00b0b11,0x6800500,0x962540,0xd00b0bbf,0x2802200,0xc62460,0xd00b119a,0x7c00300,0xe30000,0xd00b2a00,0x4000000,0x34e00000,0xd00b2e11,0x7c40300, +0xe30000,0xd00b30bf,0x7c00300,0x230000,0xd00b339a,0x7c00300,0xe30000}; + +static const int32_t countPropsVectors=6999; +static const int32_t propsVectorsColumns=3; +static const uint16_t scriptExtensions[262]={ +0x800e,0x8019,8,0x8059,8,2,8,0x8038,8,6,8,0x8019,2,0x22,0x25,0xb6, +0x80c0,2,0x22,0x8025,2,0x11,2,0x22,0x54,0x79,0x7b,0xa7,0xb6,0x80b7,2,0x8022, +2,0x25,0x80c0,2,0x20,2,0x80b6,4,0xa,0xf,0x10,0x15,0x19,0x1a,0x1f,0x23, +0x24,0x89,0x97,0x809e,4,0xa,0xf,0x10,0x15,0x19,0x1a,0x1f,0x23,0x24,0x89,0x809e, +4,0xa,0xf,0x10,0x15,0x1a,0x1f,0x21,0x23,0x24,0x3a,0x89,0x91,0x99,0x9e,0xa0, +0xaf,0xb2,0xb3,0x80bb,4,0xa,0xf,0x10,0x15,0x1a,0x1f,0x21,0x23,0x24,0x30,0x3a, +0x89,0x91,0x99,0x9e,0xa0,0xaf,0xb2,0xb3,0x80bb,0xa,0x78,0xa0,0x80b2,0xa,0x69,4, +0x3a,0x8076,4,0x6f,0x10,0x80a4,0x10,0x74,0xf,0x809d,0xf,0x78,0x23,0x8089,0x23,0x7c, +0x15,0x80bb,0x15,0x80,0x1c,0x34,0x8076,0x1c,0x84,0xc,0x8019,0x2a,0x2b,0x2c,0x802d,0x1b, +0x805a,0x800a,4,0xa,0x15,0x8089,0xa,0x8089,4,0x800a,0xa,0x8097,0xa,0x15,0x1a,0x1f, +0x23,0x8024,0xa,0x80bb,4,0xa,0x15,0x1f,0x24,0x89,0x9e,0x80bb,0x8004,8,0x8022,0x19, +0x801b,0xa,0x19,0x8089,5,0x11,0x12,0x14,0x16,0x8029,5,0x11,0x12,0x14,0x8016,0x8011, +5,0x8011,0x11,0x14,0x8016,0x11,0x8019,0xa,0xf,0x10,0x78,0x91,0x99,0x9d,0x9e,0xa0, +0xa3,0x80b2,0xa,0xf,0x10,0x15,0x1a,0x78,0x91,0x99,0x9d,0x9e,0xa0,0xa3,0xb2,0x80bb, +0xa,0xf,0x10,0x15,0x78,0x91,0x99,0x9d,0x9e,0xa0,0xa3,0xb2,0x80bb,0xa,0x98,0xa, +0x8023,0xa,0xef,0x19,0x1c,0x804f,0x37,0x804e,2,0x8025,2,0xf8,0x2f,0x31,0x8053,0x2f, +0x8031,2,0x8007,0x89,0x7c,0x8087}; + +static const int32_t indexes[UPROPS_INDEX_COUNT]={0x2b96,0x2b96,0x2b96,0x2b96,0x6898,3,0x83ef,0x8472,0x8472,0x8472,0xb34c0,0x2a75a31,0,0,0,0}; + +#endif // INCLUDED_FROM_UCHAR_C diff --git a/thirdparty/icu4c/common/ucharstrie.cpp b/thirdparty/icu4c/common/ucharstrie.cpp new file mode 100644 index 0000000000..e0b33af519 --- /dev/null +++ b/thirdparty/icu4c/common/ucharstrie.cpp @@ -0,0 +1,414 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2010-2011, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* file name: ucharstrie.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2010nov14 +* created by: Markus W. Scherer +*/ + +#include "unicode/utypes.h" +#include "unicode/appendable.h" +#include "unicode/ucharstrie.h" +#include "unicode/uobject.h" +#include "unicode/utf16.h" +#include "cmemory.h" +#include "uassert.h" + +U_NAMESPACE_BEGIN + +UCharsTrie::~UCharsTrie() { + uprv_free(ownedArray_); +} + +UStringTrieResult +UCharsTrie::current() const { + const UChar *pos=pos_; + if(pos==NULL) { + return USTRINGTRIE_NO_MATCH; + } else { + int32_t node; + return (remainingMatchLength_<0 && (node=*pos)>=kMinValueLead) ? + valueResult(node) : USTRINGTRIE_NO_VALUE; + } +} + +UStringTrieResult +UCharsTrie::firstForCodePoint(UChar32 cp) { + return cp<=0xffff ? + first(cp) : + (USTRINGTRIE_HAS_NEXT(first(U16_LEAD(cp))) ? + next(U16_TRAIL(cp)) : + USTRINGTRIE_NO_MATCH); +} + +UStringTrieResult +UCharsTrie::nextForCodePoint(UChar32 cp) { + return cp<=0xffff ? + next(cp) : + (USTRINGTRIE_HAS_NEXT(next(U16_LEAD(cp))) ? + next(U16_TRAIL(cp)) : + USTRINGTRIE_NO_MATCH); +} + +UStringTrieResult +UCharsTrie::branchNext(const UChar *pos, int32_t length, int32_t uchar) { + // Branch according to the current unit. + if(length==0) { + length=*pos++; + } + ++length; + // The length of the branch is the number of units to select from. + // The data structure encodes a binary search. + while(length>kMaxBranchLinearSubNodeLength) { + if(uchar<*pos++) { + length>>=1; + pos=jumpByDelta(pos); + } else { + length=length-(length>>1); + pos=skipDelta(pos); + } + } + // Drop down to linear search for the last few units. + // length>=2 because the loop body above sees length>kMaxBranchLinearSubNodeLength>=3 + // and divides length by 2. + do { + if(uchar==*pos++) { + UStringTrieResult result; + int32_t node=*pos; + if(node&kValueIsFinal) { + // Leave the final value for getValue() to read. + result=USTRINGTRIE_FINAL_VALUE; + } else { + // Use the non-final value as the jump delta. + ++pos; + // int32_t delta=readValue(pos, node); + int32_t delta; + if(node<kMinTwoUnitValueLead) { + delta=node; + } else if(node<kThreeUnitValueLead) { + delta=((node-kMinTwoUnitValueLead)<<16)|*pos++; + } else { + delta=(pos[0]<<16)|pos[1]; + pos+=2; + } + // end readValue() + pos+=delta; + node=*pos; + result= node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE; + } + pos_=pos; + return result; + } + --length; + pos=skipValue(pos); + } while(length>1); + if(uchar==*pos++) { + pos_=pos; + int32_t node=*pos; + return node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE; + } else { + stop(); + return USTRINGTRIE_NO_MATCH; + } +} + +UStringTrieResult +UCharsTrie::nextImpl(const UChar *pos, int32_t uchar) { + int32_t node=*pos++; + for(;;) { + if(node<kMinLinearMatch) { + return branchNext(pos, node, uchar); + } else if(node<kMinValueLead) { + // Match the first of length+1 units. + int32_t length=node-kMinLinearMatch; // Actual match length minus 1. + if(uchar==*pos++) { + remainingMatchLength_=--length; + pos_=pos; + return (length<0 && (node=*pos)>=kMinValueLead) ? + valueResult(node) : USTRINGTRIE_NO_VALUE; + } else { + // No match. + break; + } + } else if(node&kValueIsFinal) { + // No further matching units. + break; + } else { + // Skip intermediate value. + pos=skipNodeValue(pos, node); + node&=kNodeTypeMask; + } + } + stop(); + return USTRINGTRIE_NO_MATCH; +} + +UStringTrieResult +UCharsTrie::next(int32_t uchar) { + const UChar *pos=pos_; + if(pos==NULL) { + return USTRINGTRIE_NO_MATCH; + } + int32_t length=remainingMatchLength_; // Actual remaining match length minus 1. + if(length>=0) { + // Remaining part of a linear-match node. + if(uchar==*pos++) { + remainingMatchLength_=--length; + pos_=pos; + int32_t node; + return (length<0 && (node=*pos)>=kMinValueLead) ? + valueResult(node) : USTRINGTRIE_NO_VALUE; + } else { + stop(); + return USTRINGTRIE_NO_MATCH; + } + } + return nextImpl(pos, uchar); +} + +UStringTrieResult +UCharsTrie::next(ConstChar16Ptr ptr, int32_t sLength) { + const UChar *s=ptr; + if(sLength<0 ? *s==0 : sLength==0) { + // Empty input. + return current(); + } + const UChar *pos=pos_; + if(pos==NULL) { + return USTRINGTRIE_NO_MATCH; + } + int32_t length=remainingMatchLength_; // Actual remaining match length minus 1. + for(;;) { + // Fetch the next input unit, if there is one. + // Continue a linear-match node without rechecking sLength<0. + int32_t uchar; + if(sLength<0) { + for(;;) { + if((uchar=*s++)==0) { + remainingMatchLength_=length; + pos_=pos; + int32_t node; + return (length<0 && (node=*pos)>=kMinValueLead) ? + valueResult(node) : USTRINGTRIE_NO_VALUE; + } + if(length<0) { + remainingMatchLength_=length; + break; + } + if(uchar!=*pos) { + stop(); + return USTRINGTRIE_NO_MATCH; + } + ++pos; + --length; + } + } else { + for(;;) { + if(sLength==0) { + remainingMatchLength_=length; + pos_=pos; + int32_t node; + return (length<0 && (node=*pos)>=kMinValueLead) ? + valueResult(node) : USTRINGTRIE_NO_VALUE; + } + uchar=*s++; + --sLength; + if(length<0) { + remainingMatchLength_=length; + break; + } + if(uchar!=*pos) { + stop(); + return USTRINGTRIE_NO_MATCH; + } + ++pos; + --length; + } + } + int32_t node=*pos++; + for(;;) { + if(node<kMinLinearMatch) { + UStringTrieResult result=branchNext(pos, node, uchar); + if(result==USTRINGTRIE_NO_MATCH) { + return USTRINGTRIE_NO_MATCH; + } + // Fetch the next input unit, if there is one. + if(sLength<0) { + if((uchar=*s++)==0) { + return result; + } + } else { + if(sLength==0) { + return result; + } + uchar=*s++; + --sLength; + } + if(result==USTRINGTRIE_FINAL_VALUE) { + // No further matching units. + stop(); + return USTRINGTRIE_NO_MATCH; + } + pos=pos_; // branchNext() advanced pos and wrote it to pos_ . + node=*pos++; + } else if(node<kMinValueLead) { + // Match length+1 units. + length=node-kMinLinearMatch; // Actual match length minus 1. + if(uchar!=*pos) { + stop(); + return USTRINGTRIE_NO_MATCH; + } + ++pos; + --length; + break; + } else if(node&kValueIsFinal) { + // No further matching units. + stop(); + return USTRINGTRIE_NO_MATCH; + } else { + // Skip intermediate value. + pos=skipNodeValue(pos, node); + node&=kNodeTypeMask; + } + } + } +} + +const UChar * +UCharsTrie::findUniqueValueFromBranch(const UChar *pos, int32_t length, + UBool haveUniqueValue, int32_t &uniqueValue) { + while(length>kMaxBranchLinearSubNodeLength) { + ++pos; // ignore the comparison unit + if(NULL==findUniqueValueFromBranch(jumpByDelta(pos), length>>1, haveUniqueValue, uniqueValue)) { + return NULL; + } + length=length-(length>>1); + pos=skipDelta(pos); + } + do { + ++pos; // ignore a comparison unit + // handle its value + int32_t node=*pos++; + UBool isFinal=(UBool)(node>>15); + node&=0x7fff; + int32_t value=readValue(pos, node); + pos=skipValue(pos, node); + if(isFinal) { + if(haveUniqueValue) { + if(value!=uniqueValue) { + return NULL; + } + } else { + uniqueValue=value; + haveUniqueValue=TRUE; + } + } else { + if(!findUniqueValue(pos+value, haveUniqueValue, uniqueValue)) { + return NULL; + } + haveUniqueValue=TRUE; + } + } while(--length>1); + return pos+1; // ignore the last comparison unit +} + +UBool +UCharsTrie::findUniqueValue(const UChar *pos, UBool haveUniqueValue, int32_t &uniqueValue) { + int32_t node=*pos++; + for(;;) { + if(node<kMinLinearMatch) { + if(node==0) { + node=*pos++; + } + pos=findUniqueValueFromBranch(pos, node+1, haveUniqueValue, uniqueValue); + if(pos==NULL) { + return FALSE; + } + haveUniqueValue=TRUE; + node=*pos++; + } else if(node<kMinValueLead) { + // linear-match node + pos+=node-kMinLinearMatch+1; // Ignore the match units. + node=*pos++; + } else { + UBool isFinal=(UBool)(node>>15); + int32_t value; + if(isFinal) { + value=readValue(pos, node&0x7fff); + } else { + value=readNodeValue(pos, node); + } + if(haveUniqueValue) { + if(value!=uniqueValue) { + return FALSE; + } + } else { + uniqueValue=value; + haveUniqueValue=TRUE; + } + if(isFinal) { + return TRUE; + } + pos=skipNodeValue(pos, node); + node&=kNodeTypeMask; + } + } +} + +int32_t +UCharsTrie::getNextUChars(Appendable &out) const { + const UChar *pos=pos_; + if(pos==NULL) { + return 0; + } + if(remainingMatchLength_>=0) { + out.appendCodeUnit(*pos); // Next unit of a pending linear-match node. + return 1; + } + int32_t node=*pos++; + if(node>=kMinValueLead) { + if(node&kValueIsFinal) { + return 0; + } else { + pos=skipNodeValue(pos, node); + node&=kNodeTypeMask; + } + } + if(node<kMinLinearMatch) { + if(node==0) { + node=*pos++; + } + out.reserveAppendCapacity(++node); + getNextBranchUChars(pos, node, out); + return node; + } else { + // First unit of the linear-match node. + out.appendCodeUnit(*pos); + return 1; + } +} + +void +UCharsTrie::getNextBranchUChars(const UChar *pos, int32_t length, Appendable &out) { + while(length>kMaxBranchLinearSubNodeLength) { + ++pos; // ignore the comparison unit + getNextBranchUChars(jumpByDelta(pos), length>>1, out); + length=length-(length>>1); + pos=skipDelta(pos); + } + do { + out.appendCodeUnit(*pos++); + pos=skipValue(pos); + } while(--length>1); + out.appendCodeUnit(*pos); +} + +U_NAMESPACE_END diff --git a/thirdparty/icu4c/common/ucharstriebuilder.cpp b/thirdparty/icu4c/common/ucharstriebuilder.cpp new file mode 100644 index 0000000000..049997a275 --- /dev/null +++ b/thirdparty/icu4c/common/ucharstriebuilder.cpp @@ -0,0 +1,443 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2010-2012, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* file name: ucharstriebuilder.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2010nov14 +* created by: Markus W. Scherer +*/ + +#include "unicode/utypes.h" +#include "unicode/ucharstrie.h" +#include "unicode/ucharstriebuilder.h" +#include "unicode/unistr.h" +#include "unicode/ustring.h" +#include "cmemory.h" +#include "uarrsort.h" +#include "uassert.h" +#include "uhash.h" +#include "ustr_imp.h" + +U_NAMESPACE_BEGIN + +/* + * Note: This builder implementation stores (string, value) pairs with full copies + * of the 16-bit-unit sequences, until the UCharsTrie is built. + * It might(!) take less memory if we collected the data in a temporary, dynamic trie. + */ + +class UCharsTrieElement : public UMemory { +public: + // Use compiler's default constructor, initializes nothing. + + void setTo(const UnicodeString &s, int32_t val, UnicodeString &strings, UErrorCode &errorCode); + + UnicodeString getString(const UnicodeString &strings) const { + int32_t length=strings[stringOffset]; + return strings.tempSubString(stringOffset+1, length); + } + int32_t getStringLength(const UnicodeString &strings) const { + return strings[stringOffset]; + } + + UChar charAt(int32_t index, const UnicodeString &strings) const { + return strings[stringOffset+1+index]; + } + + int32_t getValue() const { return value; } + + int32_t compareStringTo(const UCharsTrieElement &o, const UnicodeString &strings) const; + +private: + // The first strings unit contains the string length. + // (Compared with a stringLength field here, this saves 2 bytes per string.) + int32_t stringOffset; + int32_t value; +}; + +void +UCharsTrieElement::setTo(const UnicodeString &s, int32_t val, + UnicodeString &strings, UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { + return; + } + int32_t length=s.length(); + if(length>0xffff) { + // Too long: We store the length in 1 unit. + errorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return; + } + stringOffset=strings.length(); + strings.append((UChar)length); + value=val; + strings.append(s); +} + +int32_t +UCharsTrieElement::compareStringTo(const UCharsTrieElement &other, const UnicodeString &strings) const { + return getString(strings).compare(other.getString(strings)); +} + +UCharsTrieBuilder::UCharsTrieBuilder(UErrorCode & /*errorCode*/) + : elements(NULL), elementsCapacity(0), elementsLength(0), + uchars(NULL), ucharsCapacity(0), ucharsLength(0) {} + +UCharsTrieBuilder::~UCharsTrieBuilder() { + delete[] elements; + uprv_free(uchars); +} + +UCharsTrieBuilder & +UCharsTrieBuilder::add(const UnicodeString &s, int32_t value, UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { + return *this; + } + if(ucharsLength>0) { + // Cannot add elements after building. + errorCode=U_NO_WRITE_PERMISSION; + return *this; + } + if(elementsLength==elementsCapacity) { + int32_t newCapacity; + if(elementsCapacity==0) { + newCapacity=1024; + } else { + newCapacity=4*elementsCapacity; + } + UCharsTrieElement *newElements=new UCharsTrieElement[newCapacity]; + if(newElements==NULL) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + return *this; + } + if(elementsLength>0) { + uprv_memcpy(newElements, elements, (size_t)elementsLength*sizeof(UCharsTrieElement)); + } + delete[] elements; + elements=newElements; + elementsCapacity=newCapacity; + } + elements[elementsLength++].setTo(s, value, strings, errorCode); + if(U_SUCCESS(errorCode) && strings.isBogus()) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + } + return *this; +} + +U_CDECL_BEGIN + +static int32_t U_CALLCONV +compareElementStrings(const void *context, const void *left, const void *right) { + const UnicodeString *strings=static_cast<const UnicodeString *>(context); + const UCharsTrieElement *leftElement=static_cast<const UCharsTrieElement *>(left); + const UCharsTrieElement *rightElement=static_cast<const UCharsTrieElement *>(right); + return leftElement->compareStringTo(*rightElement, *strings); +} + +U_CDECL_END + +UCharsTrie * +UCharsTrieBuilder::build(UStringTrieBuildOption buildOption, UErrorCode &errorCode) { + buildUChars(buildOption, errorCode); + UCharsTrie *newTrie=NULL; + if(U_SUCCESS(errorCode)) { + newTrie=new UCharsTrie(uchars, uchars+(ucharsCapacity-ucharsLength)); + if(newTrie==NULL) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + } else { + uchars=NULL; // The new trie now owns the array. + ucharsCapacity=0; + } + } + return newTrie; +} + +UnicodeString & +UCharsTrieBuilder::buildUnicodeString(UStringTrieBuildOption buildOption, UnicodeString &result, + UErrorCode &errorCode) { + buildUChars(buildOption, errorCode); + if(U_SUCCESS(errorCode)) { + result.setTo(FALSE, uchars+(ucharsCapacity-ucharsLength), ucharsLength); + } + return result; +} + +void +UCharsTrieBuilder::buildUChars(UStringTrieBuildOption buildOption, UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { + return; + } + if(uchars!=NULL && ucharsLength>0) { + // Already built. + return; + } + if(ucharsLength==0) { + if(elementsLength==0) { + errorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return; + } + if(strings.isBogus()) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + return; + } + uprv_sortArray(elements, elementsLength, (int32_t)sizeof(UCharsTrieElement), + compareElementStrings, &strings, + FALSE, // need not be a stable sort + &errorCode); + if(U_FAILURE(errorCode)) { + return; + } + // Duplicate strings are not allowed. + UnicodeString prev=elements[0].getString(strings); + for(int32_t i=1; i<elementsLength; ++i) { + UnicodeString current=elements[i].getString(strings); + if(prev==current) { + errorCode=U_ILLEGAL_ARGUMENT_ERROR; + return; + } + prev.fastCopyFrom(current); + } + } + // Create and UChar-serialize the trie for the elements. + ucharsLength=0; + int32_t capacity=strings.length(); + if(capacity<1024) { + capacity=1024; + } + if(ucharsCapacity<capacity) { + uprv_free(uchars); + uchars=static_cast<UChar *>(uprv_malloc(capacity*2)); + if(uchars==NULL) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + ucharsCapacity=0; + return; + } + ucharsCapacity=capacity; + } + StringTrieBuilder::build(buildOption, elementsLength, errorCode); + if(uchars==NULL) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + } +} + +int32_t +UCharsTrieBuilder::getElementStringLength(int32_t i) const { + return elements[i].getStringLength(strings); +} + +UChar +UCharsTrieBuilder::getElementUnit(int32_t i, int32_t unitIndex) const { + return elements[i].charAt(unitIndex, strings); +} + +int32_t +UCharsTrieBuilder::getElementValue(int32_t i) const { + return elements[i].getValue(); +} + +int32_t +UCharsTrieBuilder::getLimitOfLinearMatch(int32_t first, int32_t last, int32_t unitIndex) const { + const UCharsTrieElement &firstElement=elements[first]; + const UCharsTrieElement &lastElement=elements[last]; + int32_t minStringLength=firstElement.getStringLength(strings); + while(++unitIndex<minStringLength && + firstElement.charAt(unitIndex, strings)== + lastElement.charAt(unitIndex, strings)) {} + return unitIndex; +} + +int32_t +UCharsTrieBuilder::countElementUnits(int32_t start, int32_t limit, int32_t unitIndex) const { + int32_t length=0; // Number of different units at unitIndex. + int32_t i=start; + do { + UChar unit=elements[i++].charAt(unitIndex, strings); + while(i<limit && unit==elements[i].charAt(unitIndex, strings)) { + ++i; + } + ++length; + } while(i<limit); + return length; +} + +int32_t +UCharsTrieBuilder::skipElementsBySomeUnits(int32_t i, int32_t unitIndex, int32_t count) const { + do { + UChar unit=elements[i++].charAt(unitIndex, strings); + while(unit==elements[i].charAt(unitIndex, strings)) { + ++i; + } + } while(--count>0); + return i; +} + +int32_t +UCharsTrieBuilder::indexOfElementWithNextUnit(int32_t i, int32_t unitIndex, UChar unit) const { + while(unit==elements[i].charAt(unitIndex, strings)) { + ++i; + } + return i; +} + +UCharsTrieBuilder::UCTLinearMatchNode::UCTLinearMatchNode(const UChar *units, int32_t len, Node *nextNode) + : LinearMatchNode(len, nextNode), s(units) { + hash=hash*37u+ustr_hashUCharsN(units, len); +} + +UBool +UCharsTrieBuilder::UCTLinearMatchNode::operator==(const Node &other) const { + if(this==&other) { + return TRUE; + } + if(!LinearMatchNode::operator==(other)) { + return FALSE; + } + const UCTLinearMatchNode &o=(const UCTLinearMatchNode &)other; + return 0==u_memcmp(s, o.s, length); +} + +void +UCharsTrieBuilder::UCTLinearMatchNode::write(StringTrieBuilder &builder) { + UCharsTrieBuilder &b=(UCharsTrieBuilder &)builder; + next->write(builder); + b.write(s, length); + offset=b.writeValueAndType(hasValue, value, b.getMinLinearMatch()+length-1); +} + +StringTrieBuilder::Node * +UCharsTrieBuilder::createLinearMatchNode(int32_t i, int32_t unitIndex, int32_t length, + Node *nextNode) const { + return new UCTLinearMatchNode( + elements[i].getString(strings).getBuffer()+unitIndex, + length, + nextNode); +} + +UBool +UCharsTrieBuilder::ensureCapacity(int32_t length) { + if(uchars==NULL) { + return FALSE; // previous memory allocation had failed + } + if(length>ucharsCapacity) { + int32_t newCapacity=ucharsCapacity; + do { + newCapacity*=2; + } while(newCapacity<=length); + UChar *newUChars=static_cast<UChar *>(uprv_malloc(newCapacity*2)); + if(newUChars==NULL) { + // unable to allocate memory + uprv_free(uchars); + uchars=NULL; + ucharsCapacity=0; + return FALSE; + } + u_memcpy(newUChars+(newCapacity-ucharsLength), + uchars+(ucharsCapacity-ucharsLength), ucharsLength); + uprv_free(uchars); + uchars=newUChars; + ucharsCapacity=newCapacity; + } + return TRUE; +} + +int32_t +UCharsTrieBuilder::write(int32_t unit) { + int32_t newLength=ucharsLength+1; + if(ensureCapacity(newLength)) { + ucharsLength=newLength; + uchars[ucharsCapacity-ucharsLength]=(UChar)unit; + } + return ucharsLength; +} + +int32_t +UCharsTrieBuilder::write(const UChar *s, int32_t length) { + int32_t newLength=ucharsLength+length; + if(ensureCapacity(newLength)) { + ucharsLength=newLength; + u_memcpy(uchars+(ucharsCapacity-ucharsLength), s, length); + } + return ucharsLength; +} + +int32_t +UCharsTrieBuilder::writeElementUnits(int32_t i, int32_t unitIndex, int32_t length) { + return write(elements[i].getString(strings).getBuffer()+unitIndex, length); +} + +int32_t +UCharsTrieBuilder::writeValueAndFinal(int32_t i, UBool isFinal) { + if(0<=i && i<=UCharsTrie::kMaxOneUnitValue) { + return write(i|(isFinal<<15)); + } + UChar intUnits[3]; + int32_t length; + if(i<0 || i>UCharsTrie::kMaxTwoUnitValue) { + intUnits[0]=(UChar)(UCharsTrie::kThreeUnitValueLead); + intUnits[1]=(UChar)((uint32_t)i>>16); + intUnits[2]=(UChar)i; + length=3; + // } else if(i<=UCharsTrie::kMaxOneUnitValue) { + // intUnits[0]=(UChar)(i); + // length=1; + } else { + intUnits[0]=(UChar)(UCharsTrie::kMinTwoUnitValueLead+(i>>16)); + intUnits[1]=(UChar)i; + length=2; + } + intUnits[0]=(UChar)(intUnits[0]|(isFinal<<15)); + return write(intUnits, length); +} + +int32_t +UCharsTrieBuilder::writeValueAndType(UBool hasValue, int32_t value, int32_t node) { + if(!hasValue) { + return write(node); + } + UChar intUnits[3]; + int32_t length; + if(value<0 || value>UCharsTrie::kMaxTwoUnitNodeValue) { + intUnits[0]=(UChar)(UCharsTrie::kThreeUnitNodeValueLead); + intUnits[1]=(UChar)((uint32_t)value>>16); + intUnits[2]=(UChar)value; + length=3; + } else if(value<=UCharsTrie::kMaxOneUnitNodeValue) { + intUnits[0]=(UChar)((value+1)<<6); + length=1; + } else { + intUnits[0]=(UChar)(UCharsTrie::kMinTwoUnitNodeValueLead+((value>>10)&0x7fc0)); + intUnits[1]=(UChar)value; + length=2; + } + intUnits[0]|=(UChar)node; + return write(intUnits, length); +} + +int32_t +UCharsTrieBuilder::writeDeltaTo(int32_t jumpTarget) { + int32_t i=ucharsLength-jumpTarget; + U_ASSERT(i>=0); + if(i<=UCharsTrie::kMaxOneUnitDelta) { + return write(i); + } + UChar intUnits[3]; + int32_t length; + if(i<=UCharsTrie::kMaxTwoUnitDelta) { + intUnits[0]=(UChar)(UCharsTrie::kMinTwoUnitDeltaLead+(i>>16)); + length=1; + } else { + intUnits[0]=(UChar)(UCharsTrie::kThreeUnitDeltaLead); + intUnits[1]=(UChar)(i>>16); + length=2; + } + intUnits[length++]=(UChar)i; + return write(intUnits, length); +} + +U_NAMESPACE_END diff --git a/thirdparty/icu4c/common/ucharstrieiterator.cpp b/thirdparty/icu4c/common/ucharstrieiterator.cpp new file mode 100644 index 0000000000..b3132241fe --- /dev/null +++ b/thirdparty/icu4c/common/ucharstrieiterator.cpp @@ -0,0 +1,215 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2010-2011, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* file name: ucharstrieiterator.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2010nov15 +* created by: Markus W. Scherer +*/ + +#include "unicode/utypes.h" +#include "unicode/ucharstrie.h" +#include "unicode/unistr.h" +#include "uvectr32.h" + +U_NAMESPACE_BEGIN + +UCharsTrie::Iterator::Iterator(ConstChar16Ptr trieUChars, int32_t maxStringLength, + UErrorCode &errorCode) + : uchars_(trieUChars), + pos_(uchars_), initialPos_(uchars_), + remainingMatchLength_(-1), initialRemainingMatchLength_(-1), + skipValue_(FALSE), + maxLength_(maxStringLength), value_(0), stack_(NULL) { + if(U_FAILURE(errorCode)) { + return; + } + // stack_ is a pointer so that it's easy to turn ucharstrie.h into + // a public API header for which we would want it to depend only on + // other public headers. + // Unlike UCharsTrie itself, its Iterator performs memory allocations anyway + // via the UnicodeString and UVector32 implementations, so this additional + // cost is minimal. + stack_=new UVector32(errorCode); + if(stack_==NULL) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + } +} + +UCharsTrie::Iterator::Iterator(const UCharsTrie &trie, int32_t maxStringLength, + UErrorCode &errorCode) + : uchars_(trie.uchars_), pos_(trie.pos_), initialPos_(trie.pos_), + remainingMatchLength_(trie.remainingMatchLength_), + initialRemainingMatchLength_(trie.remainingMatchLength_), + skipValue_(FALSE), + maxLength_(maxStringLength), value_(0), stack_(NULL) { + if(U_FAILURE(errorCode)) { + return; + } + stack_=new UVector32(errorCode); + if(U_FAILURE(errorCode)) { + return; + } + if(stack_==NULL) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + return; + } + int32_t length=remainingMatchLength_; // Actual remaining match length minus 1. + if(length>=0) { + // Pending linear-match node, append remaining UChars to str_. + ++length; + if(maxLength_>0 && length>maxLength_) { + length=maxLength_; // This will leave remainingMatchLength>=0 as a signal. + } + str_.append(pos_, length); + pos_+=length; + remainingMatchLength_-=length; + } +} + +UCharsTrie::Iterator::~Iterator() { + delete stack_; +} + +UCharsTrie::Iterator & +UCharsTrie::Iterator::reset() { + pos_=initialPos_; + remainingMatchLength_=initialRemainingMatchLength_; + skipValue_=FALSE; + int32_t length=remainingMatchLength_+1; // Remaining match length. + if(maxLength_>0 && length>maxLength_) { + length=maxLength_; + } + str_.truncate(length); + pos_+=length; + remainingMatchLength_-=length; + stack_->setSize(0); + return *this; +} + +UBool +UCharsTrie::Iterator::hasNext() const { return pos_!=NULL || !stack_->isEmpty(); } + +UBool +UCharsTrie::Iterator::next(UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { + return FALSE; + } + const UChar *pos=pos_; + if(pos==NULL) { + if(stack_->isEmpty()) { + return FALSE; + } + // Pop the state off the stack and continue with the next outbound edge of + // the branch node. + int32_t stackSize=stack_->size(); + int32_t length=stack_->elementAti(stackSize-1); + pos=uchars_+stack_->elementAti(stackSize-2); + stack_->setSize(stackSize-2); + str_.truncate(length&0xffff); + length=(int32_t)((uint32_t)length>>16); + if(length>1) { + pos=branchNext(pos, length, errorCode); + if(pos==NULL) { + return TRUE; // Reached a final value. + } + } else { + str_.append(*pos++); + } + } + if(remainingMatchLength_>=0) { + // We only get here if we started in a pending linear-match node + // with more than maxLength remaining units. + return truncateAndStop(); + } + for(;;) { + int32_t node=*pos++; + if(node>=kMinValueLead) { + if(skipValue_) { + pos=skipNodeValue(pos, node); + node&=kNodeTypeMask; + skipValue_=FALSE; + } else { + // Deliver value for the string so far. + UBool isFinal=(UBool)(node>>15); + if(isFinal) { + value_=readValue(pos, node&0x7fff); + } else { + value_=readNodeValue(pos, node); + } + if(isFinal || (maxLength_>0 && str_.length()==maxLength_)) { + pos_=NULL; + } else { + // We cannot skip the value right here because it shares its + // lead unit with a match node which we have to evaluate + // next time. + // Instead, keep pos_ on the node lead unit itself. + pos_=pos-1; + skipValue_=TRUE; + } + return TRUE; + } + } + if(maxLength_>0 && str_.length()==maxLength_) { + return truncateAndStop(); + } + if(node<kMinLinearMatch) { + if(node==0) { + node=*pos++; + } + pos=branchNext(pos, node+1, errorCode); + if(pos==NULL) { + return TRUE; // Reached a final value. + } + } else { + // Linear-match node, append length units to str_. + int32_t length=node-kMinLinearMatch+1; + if(maxLength_>0 && str_.length()+length>maxLength_) { + str_.append(pos, maxLength_-str_.length()); + return truncateAndStop(); + } + str_.append(pos, length); + pos+=length; + } + } +} + +// Branch node, needs to take the first outbound edge and push state for the rest. +const UChar * +UCharsTrie::Iterator::branchNext(const UChar *pos, int32_t length, UErrorCode &errorCode) { + while(length>kMaxBranchLinearSubNodeLength) { + ++pos; // ignore the comparison unit + // Push state for the greater-or-equal edge. + stack_->addElement((int32_t)(skipDelta(pos)-uchars_), errorCode); + stack_->addElement(((length-(length>>1))<<16)|str_.length(), errorCode); + // Follow the less-than edge. + length>>=1; + pos=jumpByDelta(pos); + } + // List of key-value pairs where values are either final values or jump deltas. + // Read the first (key, value) pair. + UChar trieUnit=*pos++; + int32_t node=*pos++; + UBool isFinal=(UBool)(node>>15); + int32_t value=readValue(pos, node&=0x7fff); + pos=skipValue(pos, node); + stack_->addElement((int32_t)(pos-uchars_), errorCode); + stack_->addElement(((length-1)<<16)|str_.length(), errorCode); + str_.append(trieUnit); + if(isFinal) { + pos_=NULL; + value_=value; + return NULL; + } else { + return pos+value; + } +} + +U_NAMESPACE_END diff --git a/thirdparty/icu4c/common/uchriter.cpp b/thirdparty/icu4c/common/uchriter.cpp new file mode 100644 index 0000000000..bedbabc74c --- /dev/null +++ b/thirdparty/icu4c/common/uchriter.cpp @@ -0,0 +1,367 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* Copyright (C) 1998-2012, International Business Machines Corporation and +* others. All Rights Reserved. +****************************************************************************** +*/ + +#include "utypeinfo.h" // for 'typeid' to work + +#include "unicode/uchriter.h" +#include "unicode/ustring.h" +#include "unicode/utf16.h" +#include "ustr_imp.h" + +U_NAMESPACE_BEGIN + +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UCharCharacterIterator) + +UCharCharacterIterator::UCharCharacterIterator() + : CharacterIterator(), + text(0) +{ + // never default construct! +} + +UCharCharacterIterator::UCharCharacterIterator(ConstChar16Ptr textPtr, + int32_t length) + : CharacterIterator(textPtr != 0 ? (length>=0 ? length : u_strlen(textPtr)) : 0), + text(textPtr) +{ +} + +UCharCharacterIterator::UCharCharacterIterator(ConstChar16Ptr textPtr, + int32_t length, + int32_t position) + : CharacterIterator(textPtr != 0 ? (length>=0 ? length : u_strlen(textPtr)) : 0, position), + text(textPtr) +{ +} + +UCharCharacterIterator::UCharCharacterIterator(ConstChar16Ptr textPtr, + int32_t length, + int32_t textBegin, + int32_t textEnd, + int32_t position) + : CharacterIterator(textPtr != 0 ? (length>=0 ? length : u_strlen(textPtr)) : 0, textBegin, textEnd, position), + text(textPtr) +{ +} + +UCharCharacterIterator::UCharCharacterIterator(const UCharCharacterIterator& that) +: CharacterIterator(that), + text(that.text) +{ +} + +UCharCharacterIterator& +UCharCharacterIterator::operator=(const UCharCharacterIterator& that) { + CharacterIterator::operator=(that); + text = that.text; + return *this; +} + +UCharCharacterIterator::~UCharCharacterIterator() { +} + +UBool +UCharCharacterIterator::operator==(const ForwardCharacterIterator& that) const { + if (this == &that) { + return TRUE; + } + if (typeid(*this) != typeid(that)) { + return FALSE; + } + + UCharCharacterIterator& realThat = (UCharCharacterIterator&)that; + + return text == realThat.text + && textLength == realThat.textLength + && pos == realThat.pos + && begin == realThat.begin + && end == realThat.end; +} + +int32_t +UCharCharacterIterator::hashCode() const { + return ustr_hashUCharsN(text, textLength) ^ pos ^ begin ^ end; +} + +UCharCharacterIterator* +UCharCharacterIterator::clone() const { + return new UCharCharacterIterator(*this); +} + +UChar +UCharCharacterIterator::first() { + pos = begin; + if(pos < end) { + return text[pos]; + } else { + return DONE; + } +} + +UChar +UCharCharacterIterator::firstPostInc() { + pos = begin; + if(pos < end) { + return text[pos++]; + } else { + return DONE; + } +} + +UChar +UCharCharacterIterator::last() { + pos = end; + if(pos > begin) { + return text[--pos]; + } else { + return DONE; + } +} + +UChar +UCharCharacterIterator::setIndex(int32_t position) { + if(position < begin) { + pos = begin; + } else if(position > end) { + pos = end; + } else { + pos = position; + } + if(pos < end) { + return text[pos]; + } else { + return DONE; + } +} + +UChar +UCharCharacterIterator::current() const { + if (pos >= begin && pos < end) { + return text[pos]; + } else { + return DONE; + } +} + +UChar +UCharCharacterIterator::next() { + if (pos + 1 < end) { + return text[++pos]; + } else { + /* make current() return DONE */ + pos = end; + return DONE; + } +} + +UChar +UCharCharacterIterator::nextPostInc() { + if (pos < end) { + return text[pos++]; + } else { + return DONE; + } +} + +UBool +UCharCharacterIterator::hasNext() { + return (UBool)(pos < end ? TRUE : FALSE); +} + +UChar +UCharCharacterIterator::previous() { + if (pos > begin) { + return text[--pos]; + } else { + return DONE; + } +} + +UBool +UCharCharacterIterator::hasPrevious() { + return (UBool)(pos > begin ? TRUE : FALSE); +} + +UChar32 +UCharCharacterIterator::first32() { + pos = begin; + if(pos < end) { + int32_t i = pos; + UChar32 c; + U16_NEXT(text, i, end, c); + return c; + } else { + return DONE; + } +} + +UChar32 +UCharCharacterIterator::first32PostInc() { + pos = begin; + if(pos < end) { + UChar32 c; + U16_NEXT(text, pos, end, c); + return c; + } else { + return DONE; + } +} + +UChar32 +UCharCharacterIterator::last32() { + pos = end; + if(pos > begin) { + UChar32 c; + U16_PREV(text, begin, pos, c); + return c; + } else { + return DONE; + } +} + +UChar32 +UCharCharacterIterator::setIndex32(int32_t position) { + if(position < begin) { + position = begin; + } else if(position > end) { + position = end; + } + if(position < end) { + U16_SET_CP_START(text, begin, position); + int32_t i = this->pos = position; + UChar32 c; + U16_NEXT(text, i, end, c); + return c; + } else { + this->pos = position; + return DONE; + } +} + +UChar32 +UCharCharacterIterator::current32() const { + if (pos >= begin && pos < end) { + UChar32 c; + U16_GET(text, begin, pos, end, c); + return c; + } else { + return DONE; + } +} + +UChar32 +UCharCharacterIterator::next32() { + if (pos < end) { + U16_FWD_1(text, pos, end); + if(pos < end) { + int32_t i = pos; + UChar32 c; + U16_NEXT(text, i, end, c); + return c; + } + } + /* make current() return DONE */ + pos = end; + return DONE; +} + +UChar32 +UCharCharacterIterator::next32PostInc() { + if (pos < end) { + UChar32 c; + U16_NEXT(text, pos, end, c); + return c; + } else { + return DONE; + } +} + +UChar32 +UCharCharacterIterator::previous32() { + if (pos > begin) { + UChar32 c; + U16_PREV(text, begin, pos, c); + return c; + } else { + return DONE; + } +} + +int32_t +UCharCharacterIterator::move(int32_t delta, CharacterIterator::EOrigin origin) { + switch(origin) { + case kStart: + pos = begin + delta; + break; + case kCurrent: + pos += delta; + break; + case kEnd: + pos = end + delta; + break; + default: + break; + } + + if(pos < begin) { + pos = begin; + } else if(pos > end) { + pos = end; + } + + return pos; +} + +int32_t +UCharCharacterIterator::move32(int32_t delta, CharacterIterator::EOrigin origin) { + // this implementation relies on the "safe" version of the UTF macros + // (or the trustworthiness of the caller) + switch(origin) { + case kStart: + pos = begin; + if(delta > 0) { + U16_FWD_N(text, pos, end, delta); + } + break; + case kCurrent: + if(delta > 0) { + U16_FWD_N(text, pos, end, delta); + } else { + U16_BACK_N(text, begin, pos, -delta); + } + break; + case kEnd: + pos = end; + if(delta < 0) { + U16_BACK_N(text, begin, pos, -delta); + } + break; + default: + break; + } + + return pos; +} + +void UCharCharacterIterator::setText(ConstChar16Ptr newText, + int32_t newTextLength) { + text = newText; + if(newText == 0 || newTextLength < 0) { + newTextLength = 0; + } + end = textLength = newTextLength; + pos = begin = 0; +} + +void +UCharCharacterIterator::getText(UnicodeString& result) { + result = UnicodeString(text, textLength); +} + +U_NAMESPACE_END diff --git a/thirdparty/icu4c/common/ucln.h b/thirdparty/icu4c/common/ucln.h new file mode 100644 index 0000000000..fe6666efed --- /dev/null +++ b/thirdparty/icu4c/common/ucln.h @@ -0,0 +1,91 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 2001-2013, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* file name: ucln.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2001July05 +* created by: George Rhoten +*/ + +#ifndef __UCLN_H__ +#define __UCLN_H__ + +#include "unicode/utypes.h" + +/** These are the functions used to register a library's memory cleanup + * functions. Each library should define a single library register function + * to call this API. In the i18n library, it is ucln_i18n_registerCleanup(). + * + * None of the cleanup functions should use a mutex to clean up an API's + * allocated memory because a cleanup function is not meant to be thread safe, + * and plenty of data cannot be reference counted in order to make sure that + * no one else needs the allocated data. + * + * In order to make a cleanup function get called when u_cleanup is called, + * You should add your function to the library specific cleanup function. + * If the cleanup function is not in the common library, the code that + * allocates the memory should call the library specific cleanup function. + * For instance, in the i18n library, any memory allocated statically must + * call ucln_i18n_registerCleanup() from the ucln_in.h header. These library + * cleanup functions are needed in order to prevent a circular dependency + * between the common library and any other library. + * + * The order of the cleanup is very important. In general, an API that + * depends on a second API should be cleaned up before the second API. + * For instance, the default converter in ustring depends upon the converter + * API. So the default converter should be closed before the converter API + * has its cache flushed. This will prevent any memory leaks due to + * reference counting. + * + * Please see common/ucln_cmn.{h,c} and i18n/ucln_in.{h,c} for examples. + */ + +/** + * Data Type for cleanup function selector. These roughly correspond to libraries. + */ +typedef enum ECleanupLibraryType { + UCLN_START = -1, + UCLN_UPLUG, /* ICU plugins */ + UCLN_CUSTOM, /* Custom is for anyone else. */ + UCLN_CTESTFW, + UCLN_TOOLUTIL, + UCLN_LAYOUTEX, + UCLN_LAYOUT, + UCLN_IO, + UCLN_I18N, + UCLN_COMMON /* This must be the last one to cleanup. */ +} ECleanupLibraryType; + +/** + * Data type for cleanup function pointer + */ +U_CDECL_BEGIN +typedef UBool U_CALLCONV cleanupFunc(void); +typedef void U_CALLCONV initFunc(UErrorCode *); +U_CDECL_END + +/** + * Register a cleanup function + * @param type which library to register for. + * @param func the function pointer + */ +U_CAPI void U_EXPORT2 ucln_registerCleanup(ECleanupLibraryType type, + cleanupFunc *func); + +/** + * Request cleanup for one specific library. + * Not thread safe. + * @param type which library to cleanup + */ +U_CAPI void U_EXPORT2 ucln_cleanupOne(ECleanupLibraryType type); + +#endif diff --git a/thirdparty/icu4c/common/ucln_cmn.cpp b/thirdparty/icu4c/common/ucln_cmn.cpp new file mode 100644 index 0000000000..f3e07c6b89 --- /dev/null +++ b/thirdparty/icu4c/common/ucln_cmn.cpp @@ -0,0 +1,124 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* Copyright (C) 2001-2014, International Business Machines +* Corporation and others. All Rights Reserved. +****************************************************************************** +* file name: ucln_cmn.cpp +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2001July05 +* created by: George Rhoten +*/ + +#include "unicode/utypes.h" +#include "unicode/uclean.h" +#include "cmemory.h" +#include "mutex.h" +#include "uassert.h" +#include "ucln.h" +#include "ucln_cmn.h" +#include "utracimp.h" +#include "umutex.h" + +/** Auto-client for UCLN_COMMON **/ +#define UCLN_TYPE_IS_COMMON +#include "ucln_imp.h" + +static cleanupFunc *gCommonCleanupFunctions[UCLN_COMMON_COUNT]; +static cleanupFunc *gLibCleanupFunctions[UCLN_COMMON]; + + +/************************************************ + The cleanup order is important in this function. + Please be sure that you have read ucln.h + ************************************************/ +U_CAPI void U_EXPORT2 +u_cleanup(void) +{ + UTRACE_ENTRY_OC(UTRACE_U_CLEANUP); + icu::umtx_lock(NULL); /* Force a memory barrier, so that we are sure to see */ + icu::umtx_unlock(NULL); /* all state left around by any other threads. */ + + ucln_lib_cleanup(); + + cmemory_cleanup(); /* undo any heap functions set by u_setMemoryFunctions(). */ + UTRACE_EXIT(); /* Must be before utrace_cleanup(), which turns off tracing. */ +/*#if U_ENABLE_TRACING*/ + utrace_cleanup(); +/*#endif*/ +} + +U_CAPI void U_EXPORT2 ucln_cleanupOne(ECleanupLibraryType libType) +{ + if (gLibCleanupFunctions[libType]) + { + gLibCleanupFunctions[libType](); + gLibCleanupFunctions[libType] = NULL; + } +} + +U_CFUNC void +ucln_common_registerCleanup(ECleanupCommonType type, + cleanupFunc *func) +{ + // Thread safety messiness: From ticket 10295, calls to registerCleanup() may occur + // concurrently. Although such cases should be storing the same value, they raise errors + // from the thread sanity checker. Doing the store within a mutex avoids those. + // BUT that can trigger a recursive entry into std::call_once() in umutex.cpp when this code, + // running from the call_once function, tries to grab the ICU global mutex, which + // re-enters the mutex init path. So, work-around by special casing UCLN_COMMON_MUTEX, not + // using the ICU global mutex for it. + // + // No other point in ICU uses std::call_once(). + + U_ASSERT(UCLN_COMMON_START < type && type < UCLN_COMMON_COUNT); + if (type == UCLN_COMMON_MUTEX) { + gCommonCleanupFunctions[type] = func; + } else if (UCLN_COMMON_START < type && type < UCLN_COMMON_COUNT) { + icu::Mutex m; // See ticket 10295 for discussion. + gCommonCleanupFunctions[type] = func; + } +#if !UCLN_NO_AUTO_CLEANUP && (defined(UCLN_AUTO_ATEXIT) || defined(UCLN_AUTO_LOCAL)) + ucln_registerAutomaticCleanup(); +#endif +} + +// Note: ucln_registerCleanup() is called with the ICU global mutex locked. +// Be aware if adding anything to the function. +// See ticket 10295 for discussion. + +U_CAPI void U_EXPORT2 +ucln_registerCleanup(ECleanupLibraryType type, + cleanupFunc *func) +{ + U_ASSERT(UCLN_START < type && type < UCLN_COMMON); + if (UCLN_START < type && type < UCLN_COMMON) + { + gLibCleanupFunctions[type] = func; + } +} + +U_CFUNC UBool ucln_lib_cleanup(void) { + int32_t libType = UCLN_START; + int32_t commonFunc = UCLN_COMMON_START; + + for (libType++; libType<UCLN_COMMON; libType++) { + ucln_cleanupOne(static_cast<ECleanupLibraryType>(libType)); + } + + for (commonFunc++; commonFunc<UCLN_COMMON_COUNT; commonFunc++) { + if (gCommonCleanupFunctions[commonFunc]) + { + gCommonCleanupFunctions[commonFunc](); + gCommonCleanupFunctions[commonFunc] = NULL; + } + } +#if !UCLN_NO_AUTO_CLEANUP && (defined(UCLN_AUTO_ATEXIT) || defined(UCLN_AUTO_LOCAL)) + ucln_unRegisterAutomaticCleanup(); +#endif + return TRUE; +} diff --git a/thirdparty/icu4c/common/ucln_cmn.h b/thirdparty/icu4c/common/ucln_cmn.h new file mode 100644 index 0000000000..44b73e94da --- /dev/null +++ b/thirdparty/icu4c/common/ucln_cmn.h @@ -0,0 +1,77 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* Copyright (C) 2001-2016, International Business Machines +* Corporation and others. All Rights Reserved. +****************************************************************************** +* file name: ucln_cmn.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2001July05 +* created by: George Rhoten +*/ + +#ifndef __UCLN_CMN_H__ +#define __UCLN_CMN_H__ + +#include "unicode/utypes.h" +#include "ucln.h" + +/* These are the cleanup functions for various APIs. */ +/* @return true if cleanup complete successfully.*/ +U_CFUNC UBool utrace_cleanup(void); + +U_CFUNC UBool ucln_lib_cleanup(void); + +/* +Please keep the order of enums declared in same order +as the cleanup functions are suppose to be called. */ +typedef enum ECleanupCommonType { + UCLN_COMMON_START = -1, + UCLN_COMMON_NUMPARSE_UNISETS, + UCLN_COMMON_USPREP, + UCLN_COMMON_BREAKITERATOR, + UCLN_COMMON_RBBI, + UCLN_COMMON_SERVICE, + UCLN_COMMON_LOCALE_KEY_TYPE, + UCLN_COMMON_LOCALE, + UCLN_COMMON_LOCALE_ALIAS, + UCLN_COMMON_LOCALE_KNOWN_CANONICALIZED, + UCLN_COMMON_LOCALE_AVAILABLE, + UCLN_COMMON_LIKELY_SUBTAGS, + UCLN_COMMON_LOCALE_DISTANCE, + UCLN_COMMON_ULOC, + UCLN_COMMON_CURRENCY, + UCLN_COMMON_LOADED_NORMALIZER2, + UCLN_COMMON_NORMALIZER2, + UCLN_COMMON_CHARACTERPROPERTIES, + UCLN_COMMON_USET, + UCLN_COMMON_UNAMES, + UCLN_COMMON_UPROPS, + UCLN_COMMON_UCNV, + UCLN_COMMON_UCNV_IO, + UCLN_COMMON_UDATA, + UCLN_COMMON_PUTIL, + UCLN_COMMON_UINIT, + + /* + Unified caches caches collation stuff. Collation data structures + contain resource bundles which means that unified cache cleanup + must happen before resource bundle clean up. + */ + UCLN_COMMON_UNIFIED_CACHE, + UCLN_COMMON_URES, + UCLN_COMMON_MUTEX, // Mutexes should be the last to be cleaned up. + UCLN_COMMON_COUNT /* This must be last */ +} ECleanupCommonType; + +/* Main library cleanup registration function. */ +/* See common/ucln.h for details on adding a cleanup function. */ +/* Note: the global mutex must not be held when calling this function. */ +U_CFUNC void U_EXPORT2 ucln_common_registerCleanup(ECleanupCommonType type, + cleanupFunc *func); + +#endif diff --git a/thirdparty/icu4c/common/ucln_imp.h b/thirdparty/icu4c/common/ucln_imp.h new file mode 100644 index 0000000000..63a54c86f6 --- /dev/null +++ b/thirdparty/icu4c/common/ucln_imp.h @@ -0,0 +1,182 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 2009-2011, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* file name: ucln_imp.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* This file contains the platform specific implementation of per-library cleanup. +* +*/ + + +#ifndef __UCLN_IMP_H__ +#define __UCLN_IMP_H__ + +#include "ucln.h" +#include <stdlib.h> + +/** + * Auto cleanup of ICU libraries + * There are several methods in per library cleanup of icu libraries: + * 1) Compiler/Platform based cleanup: + * a) Windows MSVC uses DllMain() + * b) GCC uses destructor function attribute + * c) Sun Studio, AIX VA, and HP-UX aCC uses a linker option to set the exit function + * 2) Using atexit() + * 3) Implementing own automatic cleanup functions + * + * For option 1, ensure that UCLN_NO_AUTO_CLEANUP is set to 0 by using --enable-auto-cleanup + * configure option or by otherwise setting UCLN_NO_AUTO_CLEANUP to 0 + * For option 2, follow option 1 and also define UCLN_AUTO_ATEXIT + * For option 3, follow option 1 and also define UCLN_AUTO_LOCAL (see below for more information) + */ + +#if !UCLN_NO_AUTO_CLEANUP + +/* + * The following declarations are for when UCLN_AUTO_LOCAL or UCLN_AUTO_ATEXIT + * are defined. They are commented out because they are static and will be defined + * later. The information is still here to provide some guidance for the developer + * who chooses to use UCLN_AUTO_LOCAL. + */ +/** + * Give the library an opportunity to register an automatic cleanup. + * This may be called more than once. + */ +/*static void ucln_registerAutomaticCleanup();*/ +/** + * Unregister an automatic cleanup, if possible. Called from cleanup. + */ +/*static void ucln_unRegisterAutomaticCleanup();*/ + +#ifdef UCLN_TYPE_IS_COMMON +# define UCLN_CLEAN_ME_UP u_cleanup() +#else +# define UCLN_CLEAN_ME_UP ucln_cleanupOne(UCLN_TYPE) +#endif + +/* ------------ automatic cleanup: registration. Choose ONE ------- */ +#if defined(UCLN_AUTO_LOCAL) +/* To use: + * 1. define UCLN_AUTO_LOCAL, + * 2. create ucln_local_hook.c containing implementations of + * static void ucln_registerAutomaticCleanup() + * static void ucln_unRegisterAutomaticCleanup() + */ +#include "ucln_local_hook.c" + +#elif defined(UCLN_AUTO_ATEXIT) +/* + * Use the ANSI C 'atexit' function. Note that this mechanism does not + * guarantee the order of cleanup relative to other users of ICU! + */ +static UBool gAutoCleanRegistered = false; + +static void ucln_atexit_handler() +{ + UCLN_CLEAN_ME_UP; +} + +static void ucln_registerAutomaticCleanup() +{ + if(!gAutoCleanRegistered) { + gAutoCleanRegistered = true; + atexit(&ucln_atexit_handler); + } +} + +static void ucln_unRegisterAutomaticCleanup () { +} +/* ------------end of automatic cleanup: registration. ------- */ + +#elif defined (UCLN_FINI) +/** + * If UCLN_FINI is defined, it is the (versioned, etc) name of a cleanup + * entrypoint. Add a stub to call ucln_cleanupOne + * Used on AIX, Solaris, and HP-UX + */ +U_CAPI void U_EXPORT2 UCLN_FINI (void); + +U_CAPI void U_EXPORT2 UCLN_FINI () +{ + /* This function must be defined, if UCLN_FINI is defined, else link error. */ + UCLN_CLEAN_ME_UP; +} + +/* Windows: DllMain */ +#elif U_PLATFORM_HAS_WIN32_API +/* + * ICU's own DllMain. + */ + +/* these are from putil.c */ +/* READ READ READ READ! Are you getting compilation errors from windows.h? + Any source file which includes this (ucln_imp.h) header MUST + be defined with language extensions ON. */ +#ifndef WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN +#endif +# define VC_EXTRALEAN +# define NOUSER +# define NOSERVICE +# define NOIME +# define NOMCX +# include <windows.h> +/* + * This is a stub DllMain function with icu specific process handling code. + */ +BOOL WINAPI DllMain(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) +{ + BOOL status = true; + + switch(fdwReason) { + case DLL_PROCESS_ATTACH: + /* ICU does not trap process attach, but must pass these through properly. */ + /* ICU specific process attach could go here */ + break; + + case DLL_PROCESS_DETACH: + /* Here is the one we actually care about. */ + + UCLN_CLEAN_ME_UP; + + break; + + case DLL_THREAD_ATTACH: + /* ICU does not trap thread attach, but must pass these through properly. */ + /* ICU specific thread attach could go here */ + break; + + case DLL_THREAD_DETACH: + /* ICU does not trap thread detach, but must pass these through properly. */ + /* ICU specific thread detach could go here */ + break; + + } + return status; +} + +#elif defined(__GNUC__) +/* GCC - use __attribute((destructor)) */ +static void ucln_destructor() __attribute__((destructor)) ; + +static void ucln_destructor() +{ + UCLN_CLEAN_ME_UP; +} + +#endif + +#endif /* UCLN_NO_AUTO_CLEANUP */ + +#else +#error This file can only be included once. +#endif diff --git a/thirdparty/icu4c/common/ucmndata.cpp b/thirdparty/icu4c/common/ucmndata.cpp new file mode 100644 index 0000000000..ba2310bb7a --- /dev/null +++ b/thirdparty/icu4c/common/ucmndata.cpp @@ -0,0 +1,393 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1999-2011, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************/ + + +/*------------------------------------------------------------------------------ + * + * UCommonData An abstract interface for dealing with ICU Common Data Files. + * ICU Common Data Files are a grouping of a number of individual + * data items (resources, converters, tables, anything) into a + * single file or dll. The combined format includes a table of + * contents for locating the individual items by name. + * + * Two formats for the table of contents are supported, which is + * why there is an abstract inteface involved. + * + */ + +#include "unicode/utypes.h" +#include "unicode/udata.h" +#include "cstring.h" +#include "ucmndata.h" +#include "udatamem.h" + +#if defined(UDATA_DEBUG) || defined(UDATA_DEBUG_DUMP) +# include <stdio.h> +#endif + +U_CFUNC uint16_t +udata_getHeaderSize(const DataHeader *udh) { + if(udh==NULL) { + return 0; + } else if(udh->info.isBigEndian==U_IS_BIG_ENDIAN) { + /* same endianness */ + return udh->dataHeader.headerSize; + } else { + /* opposite endianness */ + uint16_t x=udh->dataHeader.headerSize; + return (uint16_t)((x<<8)|(x>>8)); + } +} + +U_CFUNC uint16_t +udata_getInfoSize(const UDataInfo *info) { + if(info==NULL) { + return 0; + } else if(info->isBigEndian==U_IS_BIG_ENDIAN) { + /* same endianness */ + return info->size; + } else { + /* opposite endianness */ + uint16_t x=info->size; + return (uint16_t)((x<<8)|(x>>8)); + } +} + +/*-----------------------------------------------------------------------------* + * * + * Pointer TOCs. TODO: This form of table-of-contents should be removed * + * because DLLs must be relocated on loading to correct the * + * pointer values and this operation makes shared memory * + * mapping of the data much less likely to work. * + * * + *-----------------------------------------------------------------------------*/ +typedef struct { + const char *entryName; + const DataHeader *pHeader; +} PointerTOCEntry; + + +typedef struct { + uint32_t count; + uint32_t reserved; + /** + * Variable-length array declared with length 1 to disable bounds checkers. + * The actual array length is in the count field. + */ + PointerTOCEntry entry[1]; +} PointerTOC; + + +/* definition of OffsetTOC struct types moved to ucmndata.h */ + +/*-----------------------------------------------------------------------------* + * * + * entry point lookup implementations * + * * + *-----------------------------------------------------------------------------*/ + +#ifndef MIN +#define MIN(a,b) (((a)<(b)) ? (a) : (b)) +#endif + +/** + * Compare strings where we know the shared prefix length, + * and advance the prefix length as we find that the strings share even more characters. + */ +static int32_t +strcmpAfterPrefix(const char *s1, const char *s2, int32_t *pPrefixLength) { + int32_t pl=*pPrefixLength; + int32_t cmp=0; + s1+=pl; + s2+=pl; + for(;;) { + int32_t c1=(uint8_t)*s1++; + int32_t c2=(uint8_t)*s2++; + cmp=c1-c2; + if(cmp!=0 || c1==0) { /* different or done */ + break; + } + ++pl; /* increment shared same-prefix length */ + } + *pPrefixLength=pl; + return cmp; +} + +static int32_t +offsetTOCPrefixBinarySearch(const char *s, const char *names, + const UDataOffsetTOCEntry *toc, int32_t count) { + int32_t start=0; + int32_t limit=count; + /* + * Remember the shared prefix between s, start and limit, + * and don't compare that shared prefix again. + * The shared prefix should get longer as we narrow the [start, limit[ range. + */ + int32_t startPrefixLength=0; + int32_t limitPrefixLength=0; + if(count==0) { + return -1; + } + /* + * Prime the prefix lengths so that we don't keep prefixLength at 0 until + * both the start and limit indexes have moved. + * At the same time, we find if s is one of the start and (limit-1) names, + * and if not, exclude them from the actual binary search. + */ + if(0==strcmpAfterPrefix(s, names+toc[0].nameOffset, &startPrefixLength)) { + return 0; + } + ++start; + --limit; + if(0==strcmpAfterPrefix(s, names+toc[limit].nameOffset, &limitPrefixLength)) { + return limit; + } + while(start<limit) { + int32_t i=(start+limit)/2; + int32_t prefixLength=MIN(startPrefixLength, limitPrefixLength); + int32_t cmp=strcmpAfterPrefix(s, names+toc[i].nameOffset, &prefixLength); + if(cmp<0) { + limit=i; + limitPrefixLength=prefixLength; + } else if(cmp==0) { + return i; + } else { + start=i+1; + startPrefixLength=prefixLength; + } + } + return -1; +} + +static int32_t +pointerTOCPrefixBinarySearch(const char *s, const PointerTOCEntry *toc, int32_t count) { + int32_t start=0; + int32_t limit=count; + /* + * Remember the shared prefix between s, start and limit, + * and don't compare that shared prefix again. + * The shared prefix should get longer as we narrow the [start, limit[ range. + */ + int32_t startPrefixLength=0; + int32_t limitPrefixLength=0; + if(count==0) { + return -1; + } + /* + * Prime the prefix lengths so that we don't keep prefixLength at 0 until + * both the start and limit indexes have moved. + * At the same time, we find if s is one of the start and (limit-1) names, + * and if not, exclude them from the actual binary search. + */ + if(0==strcmpAfterPrefix(s, toc[0].entryName, &startPrefixLength)) { + return 0; + } + ++start; + --limit; + if(0==strcmpAfterPrefix(s, toc[limit].entryName, &limitPrefixLength)) { + return limit; + } + while(start<limit) { + int32_t i=(start+limit)/2; + int32_t prefixLength=MIN(startPrefixLength, limitPrefixLength); + int32_t cmp=strcmpAfterPrefix(s, toc[i].entryName, &prefixLength); + if(cmp<0) { + limit=i; + limitPrefixLength=prefixLength; + } else if(cmp==0) { + return i; + } else { + start=i+1; + startPrefixLength=prefixLength; + } + } + return -1; +} + +U_CDECL_BEGIN +static uint32_t U_CALLCONV +offsetTOCEntryCount(const UDataMemory *pData) { + int32_t retVal=0; + const UDataOffsetTOC *toc = (UDataOffsetTOC *)pData->toc; + if (toc != NULL) { + retVal = toc->count; + } + return retVal; +} + +static const DataHeader * U_CALLCONV +offsetTOCLookupFn(const UDataMemory *pData, + const char *tocEntryName, + int32_t *pLength, + UErrorCode *pErrorCode) { + (void)pErrorCode; + const UDataOffsetTOC *toc = (UDataOffsetTOC *)pData->toc; + if(toc!=NULL) { + const char *base=(const char *)toc; + int32_t number, count=(int32_t)toc->count; + + /* perform a binary search for the data in the common data's table of contents */ +#if defined (UDATA_DEBUG_DUMP) + /* list the contents of the TOC each time .. not recommended */ + for(number=0; number<count; ++number) { + fprintf(stderr, "\tx%d: %s\n", number, &base[toc->entry[number].nameOffset]); + } +#endif + number=offsetTOCPrefixBinarySearch(tocEntryName, base, toc->entry, count); + if(number>=0) { + /* found it */ + const UDataOffsetTOCEntry *entry=toc->entry+number; +#ifdef UDATA_DEBUG + fprintf(stderr, "%s: Found.\n", tocEntryName); +#endif + if((number+1) < count) { + *pLength = (int32_t)(entry[1].dataOffset - entry->dataOffset); + } else { + *pLength = -1; + } + return (const DataHeader *)(base+entry->dataOffset); + } else { +#ifdef UDATA_DEBUG + fprintf(stderr, "%s: Not found.\n", tocEntryName); +#endif + return NULL; + } + } else { +#ifdef UDATA_DEBUG + fprintf(stderr, "returning header\n"); +#endif + + return pData->pHeader; + } +} + + +static uint32_t U_CALLCONV pointerTOCEntryCount(const UDataMemory *pData) { + const PointerTOC *toc = (PointerTOC *)pData->toc; + return (uint32_t)((toc != NULL) ? (toc->count) : 0); +} + +static const DataHeader * U_CALLCONV pointerTOCLookupFn(const UDataMemory *pData, + const char *name, + int32_t *pLength, + UErrorCode *pErrorCode) { + (void)pErrorCode; + if(pData->toc!=NULL) { + const PointerTOC *toc = (PointerTOC *)pData->toc; + int32_t number, count=(int32_t)toc->count; + +#if defined (UDATA_DEBUG_DUMP) + /* list the contents of the TOC each time .. not recommended */ + for(number=0; number<count; ++number) { + fprintf(stderr, "\tx%d: %s\n", number, toc->entry[number].entryName); + } +#endif + number=pointerTOCPrefixBinarySearch(name, toc->entry, count); + if(number>=0) { + /* found it */ +#ifdef UDATA_DEBUG + fprintf(stderr, "%s: Found.\n", toc->entry[number].entryName); +#endif + *pLength=-1; + return UDataMemory_normalizeDataPointer(toc->entry[number].pHeader); + } else { +#ifdef UDATA_DEBUG + fprintf(stderr, "%s: Not found.\n", name); +#endif + return NULL; + } + } else { + return pData->pHeader; + } +} +U_CDECL_END + + +static const commonDataFuncs CmnDFuncs = {offsetTOCLookupFn, offsetTOCEntryCount}; +static const commonDataFuncs ToCPFuncs = {pointerTOCLookupFn, pointerTOCEntryCount}; + + + +/*----------------------------------------------------------------------* + * * + * checkCommonData Validate the format of a common data file. * + * Fill in the virtual function ptr based on TOC type * + * If the data is invalid, close the UDataMemory * + * and set the appropriate error code. * + * * + *----------------------------------------------------------------------*/ +U_CFUNC void udata_checkCommonData(UDataMemory *udm, UErrorCode *err) { + if (U_FAILURE(*err)) { + return; + } + + if(udm==NULL || udm->pHeader==NULL) { + *err=U_INVALID_FORMAT_ERROR; + } else if(!(udm->pHeader->dataHeader.magic1==0xda && + udm->pHeader->dataHeader.magic2==0x27 && + udm->pHeader->info.isBigEndian==U_IS_BIG_ENDIAN && + udm->pHeader->info.charsetFamily==U_CHARSET_FAMILY) + ) { + /* header not valid */ + *err=U_INVALID_FORMAT_ERROR; + } + else if (udm->pHeader->info.dataFormat[0]==0x43 && + udm->pHeader->info.dataFormat[1]==0x6d && + udm->pHeader->info.dataFormat[2]==0x6e && + udm->pHeader->info.dataFormat[3]==0x44 && + udm->pHeader->info.formatVersion[0]==1 + ) { + /* dataFormat="CmnD" */ + udm->vFuncs = &CmnDFuncs; + udm->toc=(const char *)udm->pHeader+udata_getHeaderSize(udm->pHeader); + } + else if(udm->pHeader->info.dataFormat[0]==0x54 && + udm->pHeader->info.dataFormat[1]==0x6f && + udm->pHeader->info.dataFormat[2]==0x43 && + udm->pHeader->info.dataFormat[3]==0x50 && + udm->pHeader->info.formatVersion[0]==1 + ) { + /* dataFormat="ToCP" */ + udm->vFuncs = &ToCPFuncs; + udm->toc=(const char *)udm->pHeader+udata_getHeaderSize(udm->pHeader); + } + else { + /* dataFormat not recognized */ + *err=U_INVALID_FORMAT_ERROR; + } + + if (U_FAILURE(*err)) { + /* If the data is no good and we memory-mapped it ourselves, + * close the memory mapping so it doesn't leak. Note that this has + * no effect on non-memory mapped data, other than clearing fields in udm. + */ + udata_close(udm); + } +} + +/* + * TODO: Add a udata_swapPackageHeader() function that swaps an ICU .dat package + * header but not its sub-items. + * This function will be needed for automatic runtime swapping. + * Sub-items should not be swapped to limit the swapping to the parts of the + * package that are actually used. + * + * Since lengths of items are implicit in the order and offsets of their + * ToC entries, and since offsets are relative to the start of the ToC, + * a swapped version may need to generate a different data structure + * with pointers to the original data items and with their lengths + * (-1 for the last one if it is not known), and maybe even pointers to the + * swapped versions of the items. + * These pointers to swapped versions would establish a cache; + * instead, each open data item could simply own the storage for its swapped + * data. This fits better with the current design. + * + * markus 2003sep18 Jitterbug 2235 + */ diff --git a/thirdparty/icu4c/common/ucmndata.h b/thirdparty/icu4c/common/ucmndata.h new file mode 100644 index 0000000000..c3eba9f4d0 --- /dev/null +++ b/thirdparty/icu4c/common/ucmndata.h @@ -0,0 +1,117 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1999-2011, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************/ + + +/*---------------------------------------------------------------------------------- + * + * UCommonData An abstract interface for dealing with ICU Common Data Files. + * ICU Common Data Files are a grouping of a number of individual + * data items (resources, converters, tables, anything) into a + * single file or dll. The combined format includes a table of + * contents for locating the individual items by name. + * + * Two formats for the table of contents are supported, which is + * why there is an abstract inteface involved. + * + * These functions are part of the ICU internal implementation, and + * are not inteded to be used directly by applications. + */ + +#ifndef __UCMNDATA_H__ +#define __UCMNDATA_H__ + +#include "unicode/udata.h" +#include "umapfile.h" + + +#define COMMON_DATA_NAME U_ICUDATA_NAME + +typedef struct { + uint16_t headerSize; + uint8_t magic1; + uint8_t magic2; +} MappedData; + + +typedef struct { + MappedData dataHeader; + UDataInfo info; +} DataHeader; + +typedef struct { + uint32_t nameOffset; + uint32_t dataOffset; +} UDataOffsetTOCEntry; + +typedef struct { + uint32_t count; + /** + * Variable-length array declared with length 1 to disable bounds checkers. + * The actual array length is in the count field. + */ + UDataOffsetTOCEntry entry[1]; +} UDataOffsetTOC; + +/** + * Get the header size from a const DataHeader *udh. + * Handles opposite-endian data. + * + * @internal + */ +U_CFUNC uint16_t +udata_getHeaderSize(const DataHeader *udh); + +/** + * Get the UDataInfo.size from a const UDataInfo *info. + * Handles opposite-endian data. + * + * @internal + */ +U_CFUNC uint16_t +udata_getInfoSize(const UDataInfo *info); + +U_CDECL_BEGIN +/* + * "Virtual" functions for data lookup. + * To call one, given a UDataMemory *p, the code looks like this: + * p->vFuncs.Lookup(p, tocEntryName, pErrorCode); + * (I sure do wish this was written in C++, not C) + */ + +typedef const DataHeader * +(U_CALLCONV * LookupFn)(const UDataMemory *pData, + const char *tocEntryName, + int32_t *pLength, + UErrorCode *pErrorCode); + +typedef uint32_t +(U_CALLCONV * NumEntriesFn)(const UDataMemory *pData); + +U_CDECL_END + +typedef struct { + LookupFn Lookup; + NumEntriesFn NumEntries; +} commonDataFuncs; + + +/* + * Functions to check whether a UDataMemory refers to memory containing + * a recognizable header and table of contents a Common Data Format + * + * If a valid header and TOC are found, + * set the CommonDataFuncs function dispatch vector in the UDataMemory + * to point to the right functions for the TOC type. + * otherwise + * set an errorcode. + */ +U_CFUNC void udata_checkCommonData(UDataMemory *pData, UErrorCode *pErrorCode); + +#endif diff --git a/thirdparty/icu4c/common/ucnv.cpp b/thirdparty/icu4c/common/ucnv.cpp new file mode 100644 index 0000000000..5dcf35e043 --- /dev/null +++ b/thirdparty/icu4c/common/ucnv.cpp @@ -0,0 +1,2910 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1998-2016, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* +* ucnv.c: +* Implements APIs for the ICU's codeset conversion library; +* mostly calls through internal functions; +* created by Bertrand A. Damiba +* +* Modification History: +* +* Date Name Description +* 04/04/99 helena Fixed internal header inclusion. +* 05/09/00 helena Added implementation to handle fallback mappings. +* 06/20/2000 helena OS/400 port changes; mostly typecast. +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_CONVERSION + +#include <memory> + +#include "unicode/ustring.h" +#include "unicode/ucnv.h" +#include "unicode/ucnv_err.h" +#include "unicode/uset.h" +#include "unicode/utf.h" +#include "unicode/utf16.h" +#include "putilimp.h" +#include "cmemory.h" +#include "cstring.h" +#include "uassert.h" +#include "utracimp.h" +#include "ustr_imp.h" +#include "ucnv_imp.h" +#include "ucnv_cnv.h" +#include "ucnv_bld.h" + +/* size of intermediate and preflighting buffers in ucnv_convert() */ +#define CHUNK_SIZE 1024 + +typedef struct UAmbiguousConverter { + const char *name; + const UChar variant5c; +} UAmbiguousConverter; + +static const UAmbiguousConverter ambiguousConverters[]={ + { "ibm-897_P100-1995", 0xa5 }, + { "ibm-942_P120-1999", 0xa5 }, + { "ibm-943_P130-1999", 0xa5 }, + { "ibm-946_P100-1995", 0xa5 }, + { "ibm-33722_P120-1999", 0xa5 }, + { "ibm-1041_P100-1995", 0xa5 }, + /*{ "ibm-54191_P100-2006", 0xa5 },*/ + /*{ "ibm-62383_P100-2007", 0xa5 },*/ + /*{ "ibm-891_P100-1995", 0x20a9 },*/ + { "ibm-944_P100-1995", 0x20a9 }, + { "ibm-949_P110-1999", 0x20a9 }, + { "ibm-1363_P110-1997", 0x20a9 }, + { "ISO_2022,locale=ko,version=0", 0x20a9 }, + { "ibm-1088_P100-1995", 0x20a9 } +}; + +/*Calls through createConverter */ +U_CAPI UConverter* U_EXPORT2 +ucnv_open (const char *name, + UErrorCode * err) +{ + UConverter *r; + + if (err == NULL || U_FAILURE (*err)) { + return NULL; + } + + r = ucnv_createConverter(NULL, name, err); + return r; +} + +U_CAPI UConverter* U_EXPORT2 +ucnv_openPackage (const char *packageName, const char *converterName, UErrorCode * err) +{ + return ucnv_createConverterFromPackage(packageName, converterName, err); +} + +/*Extracts the UChar* to a char* and calls through createConverter */ +U_CAPI UConverter* U_EXPORT2 +ucnv_openU (const UChar * name, + UErrorCode * err) +{ + char asciiName[UCNV_MAX_CONVERTER_NAME_LENGTH]; + + if (err == NULL || U_FAILURE(*err)) + return NULL; + if (name == NULL) + return ucnv_open (NULL, err); + if (u_strlen(name) >= UCNV_MAX_CONVERTER_NAME_LENGTH) + { + *err = U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + return ucnv_open(u_austrcpy(asciiName, name), err); +} + +/* Copy the string that is represented by the UConverterPlatform enum + * @param platformString An output buffer + * @param platform An enum representing a platform + * @return the length of the copied string. + */ +static int32_t +ucnv_copyPlatformString(char *platformString, UConverterPlatform pltfrm) +{ + switch (pltfrm) + { + case UCNV_IBM: + uprv_strcpy(platformString, "ibm-"); + return 4; + case UCNV_UNKNOWN: + break; + } + + /* default to empty string */ + *platformString = 0; + return 0; +} + +/*Assumes a $platform-#codepage.$CONVERTER_FILE_EXTENSION scheme and calls + *through createConverter*/ +U_CAPI UConverter* U_EXPORT2 +ucnv_openCCSID (int32_t codepage, + UConverterPlatform platform, + UErrorCode * err) +{ + char myName[UCNV_MAX_CONVERTER_NAME_LENGTH]; + int32_t myNameLen; + + if (err == NULL || U_FAILURE (*err)) + return NULL; + + /* ucnv_copyPlatformString could return "ibm-" or "cp" */ + myNameLen = ucnv_copyPlatformString(myName, platform); + T_CString_integerToString(myName + myNameLen, codepage, 10); + + return ucnv_createConverter(NULL, myName, err); +} + +/* Creating a temporary stack-based object that can be used in one thread, +and created from a converter that is shared across threads. +*/ + +U_CAPI UConverter* U_EXPORT2 +ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status) +{ + UConverter *localConverter, *allocatedConverter; + int32_t stackBufferSize; + int32_t bufferSizeNeeded; + UErrorCode cbErr; + UConverterToUnicodeArgs toUArgs = { + sizeof(UConverterToUnicodeArgs), + TRUE, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL + }; + UConverterFromUnicodeArgs fromUArgs = { + sizeof(UConverterFromUnicodeArgs), + TRUE, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL + }; + + UTRACE_ENTRY_OC(UTRACE_UCNV_CLONE); + + if (status == NULL || U_FAILURE(*status)){ + UTRACE_EXIT_STATUS(status? *status: U_ILLEGAL_ARGUMENT_ERROR); + return NULL; + } + + if (cnv == NULL) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + UTRACE_EXIT_STATUS(*status); + return NULL; + } + + UTRACE_DATA3(UTRACE_OPEN_CLOSE, "clone converter %s at %p into stackBuffer %p", + ucnv_getName(cnv, status), cnv, stackBuffer); + + if (cnv->sharedData->impl->safeClone != NULL) { + /* call the custom safeClone function for sizing */ + bufferSizeNeeded = 0; + cnv->sharedData->impl->safeClone(cnv, NULL, &bufferSizeNeeded, status); + if (U_FAILURE(*status)) { + UTRACE_EXIT_STATUS(*status); + return NULL; + } + } + else + { + /* inherent sizing */ + bufferSizeNeeded = sizeof(UConverter); + } + + if (pBufferSize == NULL) { + stackBufferSize = 1; + pBufferSize = &stackBufferSize; + } else { + stackBufferSize = *pBufferSize; + if (stackBufferSize <= 0){ /* 'preflighting' request - set needed size into *pBufferSize */ + *pBufferSize = bufferSizeNeeded; + UTRACE_EXIT_VALUE(bufferSizeNeeded); + return NULL; + } + } + + /* Adjust (if necessary) the stackBuffer pointer to be aligned correctly for a UConverter. + * TODO(Jira ICU-20736) Redo this using std::align() once g++4.9 compatibility is no longer needed. + */ + if (stackBuffer) { + uintptr_t p = reinterpret_cast<uintptr_t>(stackBuffer); + uintptr_t aligned_p = (p + alignof(UConverter) - 1) & ~(alignof(UConverter) - 1); + ptrdiff_t pointerAdjustment = aligned_p - p; + if (bufferSizeNeeded + pointerAdjustment <= stackBufferSize) { + stackBuffer = reinterpret_cast<void *>(aligned_p); + stackBufferSize -= static_cast<int32_t>(pointerAdjustment); + } else { + /* prevent using the stack buffer but keep the size > 0 so that we do not just preflight */ + stackBufferSize = 1; + } + } + + /* Now, see if we must allocate any memory */ + if (stackBufferSize < bufferSizeNeeded || stackBuffer == NULL) + { + /* allocate one here...*/ + localConverter = allocatedConverter = (UConverter *) uprv_malloc (bufferSizeNeeded); + + if(localConverter == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + UTRACE_EXIT_STATUS(*status); + return NULL; + } + *status = U_SAFECLONE_ALLOCATED_WARNING; + + /* record the fact that memory was allocated */ + *pBufferSize = bufferSizeNeeded; + } else { + /* just use the stack buffer */ + localConverter = (UConverter*) stackBuffer; + allocatedConverter = NULL; + } + + uprv_memset(localConverter, 0, bufferSizeNeeded); + + /* Copy initial state */ + uprv_memcpy(localConverter, cnv, sizeof(UConverter)); + localConverter->isCopyLocal = localConverter->isExtraLocal = FALSE; + + /* copy the substitution string */ + if (cnv->subChars == (uint8_t *)cnv->subUChars) { + localConverter->subChars = (uint8_t *)localConverter->subUChars; + } else { + localConverter->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); + if (localConverter->subChars == NULL) { + uprv_free(allocatedConverter); + UTRACE_EXIT_STATUS(*status); + return NULL; + } + uprv_memcpy(localConverter->subChars, cnv->subChars, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); + } + + /* now either call the safeclone fcn or not */ + if (cnv->sharedData->impl->safeClone != NULL) { + /* call the custom safeClone function */ + localConverter = cnv->sharedData->impl->safeClone(cnv, localConverter, pBufferSize, status); + } + + if(localConverter==NULL || U_FAILURE(*status)) { + if (allocatedConverter != NULL && allocatedConverter->subChars != (uint8_t *)allocatedConverter->subUChars) { + uprv_free(allocatedConverter->subChars); + } + uprv_free(allocatedConverter); + UTRACE_EXIT_STATUS(*status); + return NULL; + } + + /* increment refcount of shared data if needed */ + if (cnv->sharedData->isReferenceCounted) { + ucnv_incrementRefCount(cnv->sharedData); + } + + if(localConverter == (UConverter*)stackBuffer) { + /* we're using user provided data - set to not destroy */ + localConverter->isCopyLocal = TRUE; + } + + /* allow callback functions to handle any memory allocation */ + toUArgs.converter = fromUArgs.converter = localConverter; + cbErr = U_ZERO_ERROR; + cnv->fromCharErrorBehaviour(cnv->toUContext, &toUArgs, NULL, 0, UCNV_CLONE, &cbErr); + cbErr = U_ZERO_ERROR; + cnv->fromUCharErrorBehaviour(cnv->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLONE, &cbErr); + + UTRACE_EXIT_PTR_STATUS(localConverter, *status); + return localConverter; +} + + + +/*Decreases the reference counter in the shared immutable section of the object + *and frees the mutable part*/ + +U_CAPI void U_EXPORT2 +ucnv_close (UConverter * converter) +{ + UErrorCode errorCode = U_ZERO_ERROR; + + UTRACE_ENTRY_OC(UTRACE_UCNV_CLOSE); + + if (converter == NULL) + { + UTRACE_EXIT(); + return; + } + + UTRACE_DATA3(UTRACE_OPEN_CLOSE, "close converter %s at %p, isCopyLocal=%b", + ucnv_getName(converter, &errorCode), converter, converter->isCopyLocal); + + /* In order to speed up the close, only call the callbacks when they have been changed. + This performance check will only work when the callbacks are set within a shared library + or from user code that statically links this code. */ + /* first, notify the callback functions that the converter is closed */ + if (converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) { + UConverterToUnicodeArgs toUArgs = { + sizeof(UConverterToUnicodeArgs), + TRUE, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL + }; + + toUArgs.converter = converter; + errorCode = U_ZERO_ERROR; + converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_CLOSE, &errorCode); + } + if (converter->fromUCharErrorBehaviour != UCNV_FROM_U_DEFAULT_CALLBACK) { + UConverterFromUnicodeArgs fromUArgs = { + sizeof(UConverterFromUnicodeArgs), + TRUE, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL + }; + fromUArgs.converter = converter; + errorCode = U_ZERO_ERROR; + converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLOSE, &errorCode); + } + + if (converter->sharedData->impl->close != NULL) { + converter->sharedData->impl->close(converter); + } + + if (converter->subChars != (uint8_t *)converter->subUChars) { + uprv_free(converter->subChars); + } + + if (converter->sharedData->isReferenceCounted) { + ucnv_unloadSharedDataIfReady(converter->sharedData); + } + + if(!converter->isCopyLocal){ + uprv_free(converter); + } + + UTRACE_EXIT(); +} + +/*returns a single Name from the list, will return NULL if out of bounds + */ +U_CAPI const char* U_EXPORT2 +ucnv_getAvailableName (int32_t n) +{ + if (0 <= n && n <= 0xffff) { + UErrorCode err = U_ZERO_ERROR; + const char *name = ucnv_bld_getAvailableConverter((uint16_t)n, &err); + if (U_SUCCESS(err)) { + return name; + } + } + return NULL; +} + +U_CAPI int32_t U_EXPORT2 +ucnv_countAvailable () +{ + UErrorCode err = U_ZERO_ERROR; + return ucnv_bld_countAvailableConverters(&err); +} + +U_CAPI void U_EXPORT2 +ucnv_getSubstChars (const UConverter * converter, + char *mySubChar, + int8_t * len, + UErrorCode * err) +{ + if (U_FAILURE (*err)) + return; + + if (converter->subCharLen <= 0) { + /* Unicode string or empty string from ucnv_setSubstString(). */ + *len = 0; + return; + } + + if (*len < converter->subCharLen) /*not enough space in subChars */ + { + *err = U_INDEX_OUTOFBOUNDS_ERROR; + return; + } + + uprv_memcpy (mySubChar, converter->subChars, converter->subCharLen); /*fills in the subchars */ + *len = converter->subCharLen; /*store # of bytes copied to buffer */ +} + +U_CAPI void U_EXPORT2 +ucnv_setSubstChars (UConverter * converter, + const char *mySubChar, + int8_t len, + UErrorCode * err) +{ + if (U_FAILURE (*err)) + return; + + /*Makes sure that the subChar is within the codepages char length boundaries */ + if ((len > converter->sharedData->staticData->maxBytesPerChar) + || (len < converter->sharedData->staticData->minBytesPerChar)) + { + *err = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + uprv_memcpy (converter->subChars, mySubChar, len); /*copies the subchars */ + converter->subCharLen = len; /*sets the new len */ + + /* + * There is currently (2001Feb) no separate API to set/get subChar1. + * In order to always have subChar written after it is explicitly set, + * we set subChar1 to 0. + */ + converter->subChar1 = 0; + + return; +} + +U_CAPI void U_EXPORT2 +ucnv_setSubstString(UConverter *cnv, + const UChar *s, + int32_t length, + UErrorCode *err) { + alignas(UConverter) char cloneBuffer[U_CNV_SAFECLONE_BUFFERSIZE]; + char chars[UCNV_ERROR_BUFFER_LENGTH]; + + UConverter *clone; + uint8_t *subChars; + int32_t cloneSize, length8; + + /* Let the following functions check all arguments. */ + cloneSize = sizeof(cloneBuffer); + clone = ucnv_safeClone(cnv, cloneBuffer, &cloneSize, err); + ucnv_setFromUCallBack(clone, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, err); + length8 = ucnv_fromUChars(clone, chars, (int32_t)sizeof(chars), s, length, err); + ucnv_close(clone); + if (U_FAILURE(*err)) { + return; + } + + if (cnv->sharedData->impl->writeSub == NULL +#if !UCONFIG_NO_LEGACY_CONVERSION + || (cnv->sharedData->staticData->conversionType == UCNV_MBCS && + ucnv_MBCSGetType(cnv) != UCNV_EBCDIC_STATEFUL) +#endif + ) { + /* The converter is not stateful. Store the charset bytes as a fixed string. */ + subChars = (uint8_t *)chars; + } else { + /* + * The converter has a non-default writeSub() function, indicating + * that it is stateful. + * Store the Unicode string for on-the-fly conversion for correct + * state handling. + */ + if (length > UCNV_ERROR_BUFFER_LENGTH) { + /* + * Should not occur. The converter should output at least one byte + * per UChar, which means that ucnv_fromUChars() should catch all + * overflows. + */ + *err = U_BUFFER_OVERFLOW_ERROR; + return; + } + subChars = (uint8_t *)s; + if (length < 0) { + length = u_strlen(s); + } + length8 = length * U_SIZEOF_UCHAR; + } + + /* + * For storing the substitution string, select either the small buffer inside + * UConverter or allocate a subChars buffer. + */ + if (length8 > UCNV_MAX_SUBCHAR_LEN) { + /* Use a separate buffer for the string. Outside UConverter to not make it too large. */ + if (cnv->subChars == (uint8_t *)cnv->subUChars) { + /* Allocate a new buffer for the string. */ + cnv->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); + if (cnv->subChars == NULL) { + cnv->subChars = (uint8_t *)cnv->subUChars; + *err = U_MEMORY_ALLOCATION_ERROR; + return; + } + uprv_memset(cnv->subChars, 0, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); + } + } + + /* Copy the substitution string into the UConverter or its subChars buffer. */ + if (length8 == 0) { + cnv->subCharLen = 0; + } else { + uprv_memcpy(cnv->subChars, subChars, length8); + if (subChars == (uint8_t *)chars) { + cnv->subCharLen = (int8_t)length8; + } else /* subChars == s */ { + cnv->subCharLen = (int8_t)-length; + } + } + + /* See comment in ucnv_setSubstChars(). */ + cnv->subChar1 = 0; +} + +/*resets the internal states of a converter + *goal : have the same behaviour than a freshly created converter + */ +static void _reset(UConverter *converter, UConverterResetChoice choice, + UBool callCallback) { + if(converter == NULL) { + return; + } + + if(callCallback) { + /* first, notify the callback functions that the converter is reset */ + UErrorCode errorCode; + + if(choice<=UCNV_RESET_TO_UNICODE && converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) { + UConverterToUnicodeArgs toUArgs = { + sizeof(UConverterToUnicodeArgs), + TRUE, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL + }; + toUArgs.converter = converter; + errorCode = U_ZERO_ERROR; + converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_RESET, &errorCode); + } + if(choice!=UCNV_RESET_TO_UNICODE && converter->fromUCharErrorBehaviour != UCNV_FROM_U_DEFAULT_CALLBACK) { + UConverterFromUnicodeArgs fromUArgs = { + sizeof(UConverterFromUnicodeArgs), + TRUE, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL + }; + fromUArgs.converter = converter; + errorCode = U_ZERO_ERROR; + converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_RESET, &errorCode); + } + } + + /* now reset the converter itself */ + if(choice<=UCNV_RESET_TO_UNICODE) { + converter->toUnicodeStatus = converter->sharedData->toUnicodeStatus; + converter->mode = 0; + converter->toULength = 0; + converter->invalidCharLength = converter->UCharErrorBufferLength = 0; + converter->preToULength = 0; + } + if(choice!=UCNV_RESET_TO_UNICODE) { + converter->fromUnicodeStatus = 0; + converter->fromUChar32 = 0; + converter->invalidUCharLength = converter->charErrorBufferLength = 0; + converter->preFromUFirstCP = U_SENTINEL; + converter->preFromULength = 0; + } + + if (converter->sharedData->impl->reset != NULL) { + /* call the custom reset function */ + converter->sharedData->impl->reset(converter, choice); + } +} + +U_CAPI void U_EXPORT2 +ucnv_reset(UConverter *converter) +{ + _reset(converter, UCNV_RESET_BOTH, TRUE); +} + +U_CAPI void U_EXPORT2 +ucnv_resetToUnicode(UConverter *converter) +{ + _reset(converter, UCNV_RESET_TO_UNICODE, TRUE); +} + +U_CAPI void U_EXPORT2 +ucnv_resetFromUnicode(UConverter *converter) +{ + _reset(converter, UCNV_RESET_FROM_UNICODE, TRUE); +} + +U_CAPI int8_t U_EXPORT2 +ucnv_getMaxCharSize (const UConverter * converter) +{ + return converter->maxBytesPerUChar; +} + + +U_CAPI int8_t U_EXPORT2 +ucnv_getMinCharSize (const UConverter * converter) +{ + return converter->sharedData->staticData->minBytesPerChar; +} + +U_CAPI const char* U_EXPORT2 +ucnv_getName (const UConverter * converter, UErrorCode * err) + +{ + if (U_FAILURE (*err)) + return NULL; + if(converter->sharedData->impl->getName){ + const char* temp= converter->sharedData->impl->getName(converter); + if(temp) + return temp; + } + return converter->sharedData->staticData->name; +} + +U_CAPI int32_t U_EXPORT2 +ucnv_getCCSID(const UConverter * converter, + UErrorCode * err) +{ + int32_t ccsid; + if (U_FAILURE (*err)) + return -1; + + ccsid = converter->sharedData->staticData->codepage; + if (ccsid == 0) { + /* Rare case. This is for cases like gb18030, + which doesn't have an IBM canonical name, but does have an IBM alias. */ + const char *standardName = ucnv_getStandardName(ucnv_getName(converter, err), "IBM", err); + if (U_SUCCESS(*err) && standardName) { + const char *ccsidStr = uprv_strchr(standardName, '-'); + if (ccsidStr) { + ccsid = (int32_t)atol(ccsidStr+1); /* +1 to skip '-' */ + } + } + } + return ccsid; +} + + +U_CAPI UConverterPlatform U_EXPORT2 +ucnv_getPlatform (const UConverter * converter, + UErrorCode * err) +{ + if (U_FAILURE (*err)) + return UCNV_UNKNOWN; + + return (UConverterPlatform)converter->sharedData->staticData->platform; +} + +U_CAPI void U_EXPORT2 + ucnv_getToUCallBack (const UConverter * converter, + UConverterToUCallback *action, + const void **context) +{ + *action = converter->fromCharErrorBehaviour; + *context = converter->toUContext; +} + +U_CAPI void U_EXPORT2 + ucnv_getFromUCallBack (const UConverter * converter, + UConverterFromUCallback *action, + const void **context) +{ + *action = converter->fromUCharErrorBehaviour; + *context = converter->fromUContext; +} + +U_CAPI void U_EXPORT2 +ucnv_setToUCallBack (UConverter * converter, + UConverterToUCallback newAction, + const void* newContext, + UConverterToUCallback *oldAction, + const void** oldContext, + UErrorCode * err) +{ + if (U_FAILURE (*err)) + return; + if (oldAction) *oldAction = converter->fromCharErrorBehaviour; + converter->fromCharErrorBehaviour = newAction; + if (oldContext) *oldContext = converter->toUContext; + converter->toUContext = newContext; +} + +U_CAPI void U_EXPORT2 +ucnv_setFromUCallBack (UConverter * converter, + UConverterFromUCallback newAction, + const void* newContext, + UConverterFromUCallback *oldAction, + const void** oldContext, + UErrorCode * err) +{ + if (U_FAILURE (*err)) + return; + if (oldAction) *oldAction = converter->fromUCharErrorBehaviour; + converter->fromUCharErrorBehaviour = newAction; + if (oldContext) *oldContext = converter->fromUContext; + converter->fromUContext = newContext; +} + +static void +_updateOffsets(int32_t *offsets, int32_t length, + int32_t sourceIndex, int32_t errorInputLength) { + int32_t *limit; + int32_t delta, offset; + + if(sourceIndex>=0) { + /* + * adjust each offset by adding the previous sourceIndex + * minus the length of the input sequence that caused an + * error, if any + */ + delta=sourceIndex-errorInputLength; + } else { + /* + * set each offset to -1 because this conversion function + * does not handle offsets + */ + delta=-1; + } + + limit=offsets+length; + if(delta==0) { + /* most common case, nothing to do */ + } else if(delta>0) { + /* add the delta to each offset (but not if the offset is <0) */ + while(offsets<limit) { + offset=*offsets; + if(offset>=0) { + *offsets=offset+delta; + } + ++offsets; + } + } else /* delta<0 */ { + /* + * set each offset to -1 because this conversion function + * does not handle offsets + * or the error input sequence started in a previous buffer + */ + while(offsets<limit) { + *offsets++=-1; + } + } +} + +/* ucnv_fromUnicode --------------------------------------------------------- */ + +/* + * Implementation note for m:n conversions + * + * While collecting source units to find the longest match for m:n conversion, + * some source units may need to be stored for a partial match. + * When a second buffer does not yield a match on all of the previously stored + * source units, then they must be "replayed", i.e., fed back into the converter. + * + * The code relies on the fact that replaying will not nest - + * converting a replay buffer will not result in a replay. + * This is because a replay is necessary only after the _continuation_ of a + * partial match failed, but a replay buffer is converted as a whole. + * It may result in some of its units being stored again for a partial match, + * but there will not be a continuation _during_ the replay which could fail. + * + * It is conceivable that a callback function could call the converter + * recursively in a way that causes another replay to be stored, but that + * would be an error in the callback function. + * Such violations will cause assertion failures in a debug build, + * and wrong output, but they will not cause a crash. + */ + +static void +_fromUnicodeWithCallback(UConverterFromUnicodeArgs *pArgs, UErrorCode *err) { + UConverterFromUnicode fromUnicode; + UConverter *cnv; + const UChar *s; + char *t; + int32_t *offsets; + int32_t sourceIndex; + int32_t errorInputLength; + UBool converterSawEndOfInput, calledCallback; + + /* variables for m:n conversion */ + UChar replay[UCNV_EXT_MAX_UCHARS]; + const UChar *realSource, *realSourceLimit; + int32_t realSourceIndex; + UBool realFlush; + + cnv=pArgs->converter; + s=pArgs->source; + t=pArgs->target; + offsets=pArgs->offsets; + + /* get the converter implementation function */ + sourceIndex=0; + if(offsets==NULL) { + fromUnicode=cnv->sharedData->impl->fromUnicode; + } else { + fromUnicode=cnv->sharedData->impl->fromUnicodeWithOffsets; + if(fromUnicode==NULL) { + /* there is no WithOffsets implementation */ + fromUnicode=cnv->sharedData->impl->fromUnicode; + /* we will write -1 for each offset */ + sourceIndex=-1; + } + } + + if(cnv->preFromULength>=0) { + /* normal mode */ + realSource=NULL; + + /* avoid compiler warnings - not otherwise necessary, and the values do not matter */ + realSourceLimit=NULL; + realFlush=FALSE; + realSourceIndex=0; + } else { + /* + * Previous m:n conversion stored source units from a partial match + * and failed to consume all of them. + * We need to "replay" them from a temporary buffer and convert them first. + */ + realSource=pArgs->source; + realSourceLimit=pArgs->sourceLimit; + realFlush=pArgs->flush; + realSourceIndex=sourceIndex; + + uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR); + pArgs->source=replay; + pArgs->sourceLimit=replay-cnv->preFromULength; + pArgs->flush=FALSE; + sourceIndex=-1; + + cnv->preFromULength=0; + } + + /* + * loop for conversion and error handling + * + * loop { + * convert + * loop { + * update offsets + * handle end of input + * handle errors/call callback + * } + * } + */ + for(;;) { + if(U_SUCCESS(*err)) { + /* convert */ + fromUnicode(pArgs, err); + + /* + * set a flag for whether the converter + * successfully processed the end of the input + * + * need not check cnv->preFromULength==0 because a replay (<0) will cause + * s<sourceLimit before converterSawEndOfInput is checked + */ + converterSawEndOfInput= + (UBool)(U_SUCCESS(*err) && + pArgs->flush && pArgs->source==pArgs->sourceLimit && + cnv->fromUChar32==0); + } else { + /* handle error from ucnv_convertEx() */ + converterSawEndOfInput=FALSE; + } + + /* no callback called yet for this iteration */ + calledCallback=FALSE; + + /* no sourceIndex adjustment for conversion, only for callback output */ + errorInputLength=0; + + /* + * loop for offsets and error handling + * + * iterates at most 3 times: + * 1. to clean up after the conversion function + * 2. after the callback + * 3. after the callback again if there was truncated input + */ + for(;;) { + /* update offsets if we write any */ + if(offsets!=NULL) { + int32_t length=(int32_t)(pArgs->target-t); + if(length>0) { + _updateOffsets(offsets, length, sourceIndex, errorInputLength); + + /* + * if a converter handles offsets and updates the offsets + * pointer at the end, then pArgs->offset should not change + * here; + * however, some converters do not handle offsets at all + * (sourceIndex<0) or may not update the offsets pointer + */ + pArgs->offsets=offsets+=length; + } + + if(sourceIndex>=0) { + sourceIndex+=(int32_t)(pArgs->source-s); + } + } + + if(cnv->preFromULength<0) { + /* + * switch the source to new replay units (cannot occur while replaying) + * after offset handling and before end-of-input and callback handling + */ + if(realSource==NULL) { + realSource=pArgs->source; + realSourceLimit=pArgs->sourceLimit; + realFlush=pArgs->flush; + realSourceIndex=sourceIndex; + + uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR); + pArgs->source=replay; + pArgs->sourceLimit=replay-cnv->preFromULength; + pArgs->flush=FALSE; + if((sourceIndex+=cnv->preFromULength)<0) { + sourceIndex=-1; + } + + cnv->preFromULength=0; + } else { + /* see implementation note before _fromUnicodeWithCallback() */ + U_ASSERT(realSource==NULL); + *err=U_INTERNAL_PROGRAM_ERROR; + } + } + + /* update pointers */ + s=pArgs->source; + t=pArgs->target; + + if(U_SUCCESS(*err)) { + if(s<pArgs->sourceLimit) { + /* + * continue with the conversion loop while there is still input left + * (continue converting by breaking out of only the inner loop) + */ + break; + } else if(realSource!=NULL) { + /* switch back from replaying to the real source and continue */ + pArgs->source=realSource; + pArgs->sourceLimit=realSourceLimit; + pArgs->flush=realFlush; + sourceIndex=realSourceIndex; + + realSource=NULL; + break; + } else if(pArgs->flush && cnv->fromUChar32!=0) { + /* + * the entire input stream is consumed + * and there is a partial, truncated input sequence left + */ + + /* inject an error and continue with callback handling */ + *err=U_TRUNCATED_CHAR_FOUND; + calledCallback=FALSE; /* new error condition */ + } else { + /* input consumed */ + if(pArgs->flush) { + /* + * return to the conversion loop once more if the flush + * flag is set and the conversion function has not + * successfully processed the end of the input yet + * + * (continue converting by breaking out of only the inner loop) + */ + if(!converterSawEndOfInput) { + break; + } + + /* reset the converter without calling the callback function */ + _reset(cnv, UCNV_RESET_FROM_UNICODE, FALSE); + } + + /* done successfully */ + return; + } + } + + /* U_FAILURE(*err) */ + { + UErrorCode e; + + if( calledCallback || + (e=*err)==U_BUFFER_OVERFLOW_ERROR || + (e!=U_INVALID_CHAR_FOUND && + e!=U_ILLEGAL_CHAR_FOUND && + e!=U_TRUNCATED_CHAR_FOUND) + ) { + /* + * the callback did not or cannot resolve the error: + * set output pointers and return + * + * the check for buffer overflow is redundant but it is + * a high-runner case and hopefully documents the intent + * well + * + * if we were replaying, then the replay buffer must be + * copied back into the UConverter + * and the real arguments must be restored + */ + if(realSource!=NULL) { + int32_t length; + + U_ASSERT(cnv->preFromULength==0); + + length=(int32_t)(pArgs->sourceLimit-pArgs->source); + if(length>0) { + u_memcpy(cnv->preFromU, pArgs->source, length); + cnv->preFromULength=(int8_t)-length; + } + + pArgs->source=realSource; + pArgs->sourceLimit=realSourceLimit; + pArgs->flush=realFlush; + } + + return; + } + } + + /* callback handling */ + { + UChar32 codePoint; + + /* get and write the code point */ + codePoint=cnv->fromUChar32; + errorInputLength=0; + U16_APPEND_UNSAFE(cnv->invalidUCharBuffer, errorInputLength, codePoint); + cnv->invalidUCharLength=(int8_t)errorInputLength; + + /* set the converter state to deal with the next character */ + cnv->fromUChar32=0; + + /* call the callback function */ + cnv->fromUCharErrorBehaviour(cnv->fromUContext, pArgs, + cnv->invalidUCharBuffer, errorInputLength, codePoint, + *err==U_INVALID_CHAR_FOUND ? UCNV_UNASSIGNED : UCNV_ILLEGAL, + err); + } + + /* + * loop back to the offset handling + * + * this flag will indicate after offset handling + * that a callback was called; + * if the callback did not resolve the error, then we return + */ + calledCallback=TRUE; + } + } +} + +/* + * Output the fromUnicode overflow buffer. + * Call this function if(cnv->charErrorBufferLength>0). + * @return TRUE if overflow + */ +static UBool +ucnv_outputOverflowFromUnicode(UConverter *cnv, + char **target, const char *targetLimit, + int32_t **pOffsets, + UErrorCode *err) { + int32_t *offsets; + char *overflow, *t; + int32_t i, length; + + t=*target; + if(pOffsets!=NULL) { + offsets=*pOffsets; + } else { + offsets=NULL; + } + + overflow=(char *)cnv->charErrorBuffer; + length=cnv->charErrorBufferLength; + i=0; + while(i<length) { + if(t==targetLimit) { + /* the overflow buffer contains too much, keep the rest */ + int32_t j=0; + + do { + overflow[j++]=overflow[i++]; + } while(i<length); + + cnv->charErrorBufferLength=(int8_t)j; + *target=t; + if(offsets!=NULL) { + *pOffsets=offsets; + } + *err=U_BUFFER_OVERFLOW_ERROR; + return TRUE; + } + + /* copy the overflow contents to the target */ + *t++=overflow[i++]; + if(offsets!=NULL) { + *offsets++=-1; /* no source index available for old output */ + } + } + + /* the overflow buffer is completely copied to the target */ + cnv->charErrorBufferLength=0; + *target=t; + if(offsets!=NULL) { + *pOffsets=offsets; + } + return FALSE; +} + +U_CAPI void U_EXPORT2 +ucnv_fromUnicode(UConverter *cnv, + char **target, const char *targetLimit, + const UChar **source, const UChar *sourceLimit, + int32_t *offsets, + UBool flush, + UErrorCode *err) { + UConverterFromUnicodeArgs args; + const UChar *s; + char *t; + + /* check parameters */ + if(err==NULL || U_FAILURE(*err)) { + return; + } + + if(cnv==NULL || target==NULL || source==NULL) { + *err=U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + s=*source; + t=*target; + + if ((const void *)U_MAX_PTR(sourceLimit) == (const void *)sourceLimit) { + /* + Prevent code from going into an infinite loop in case we do hit this + limit. The limit pointer is expected to be on a UChar * boundary. + This also prevents the next argument check from failing. + */ + sourceLimit = (const UChar *)(((const char *)sourceLimit) - 1); + } + + /* + * All these conditions should never happen. + * + * 1) Make sure that the limits are >= to the address source or target + * + * 2) Make sure that the buffer sizes do not exceed the number range for + * int32_t because some functions use the size (in units or bytes) + * rather than comparing pointers, and because offsets are int32_t values. + * + * size_t is guaranteed to be unsigned and large enough for the job. + * + * Return with an error instead of adjusting the limits because we would + * not be able to maintain the semantics that either the source must be + * consumed or the target filled (unless an error occurs). + * An adjustment would be targetLimit=t+0x7fffffff; for example. + * + * 3) Make sure that the user didn't incorrectly cast a UChar * pointer + * to a char * pointer and provide an incomplete UChar code unit. + */ + if (sourceLimit<s || targetLimit<t || + ((size_t)(sourceLimit-s)>(size_t)0x3fffffff && sourceLimit>s) || + ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t) || + (((const char *)sourceLimit-(const char *)s) & 1) != 0) + { + *err=U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + /* output the target overflow buffer */ + if( cnv->charErrorBufferLength>0 && + ucnv_outputOverflowFromUnicode(cnv, target, targetLimit, &offsets, err) + ) { + /* U_BUFFER_OVERFLOW_ERROR */ + return; + } + /* *target may have moved, therefore stop using t */ + + if(!flush && s==sourceLimit && cnv->preFromULength>=0) { + /* the overflow buffer is emptied and there is no new input: we are done */ + return; + } + + /* + * Do not simply return with a buffer overflow error if + * !flush && t==targetLimit + * because it is possible that the source will not generate any output. + * For example, the skip callback may be called; + * it does not output anything. + */ + + /* prepare the converter arguments */ + args.converter=cnv; + args.flush=flush; + args.offsets=offsets; + args.source=s; + args.sourceLimit=sourceLimit; + args.target=*target; + args.targetLimit=targetLimit; + args.size=sizeof(args); + + _fromUnicodeWithCallback(&args, err); + + *source=args.source; + *target=args.target; +} + +/* ucnv_toUnicode() --------------------------------------------------------- */ + +static void +_toUnicodeWithCallback(UConverterToUnicodeArgs *pArgs, UErrorCode *err) { + UConverterToUnicode toUnicode; + UConverter *cnv; + const char *s; + UChar *t; + int32_t *offsets; + int32_t sourceIndex; + int32_t errorInputLength; + UBool converterSawEndOfInput, calledCallback; + + /* variables for m:n conversion */ + char replay[UCNV_EXT_MAX_BYTES]; + const char *realSource, *realSourceLimit; + int32_t realSourceIndex; + UBool realFlush; + + cnv=pArgs->converter; + s=pArgs->source; + t=pArgs->target; + offsets=pArgs->offsets; + + /* get the converter implementation function */ + sourceIndex=0; + if(offsets==NULL) { + toUnicode=cnv->sharedData->impl->toUnicode; + } else { + toUnicode=cnv->sharedData->impl->toUnicodeWithOffsets; + if(toUnicode==NULL) { + /* there is no WithOffsets implementation */ + toUnicode=cnv->sharedData->impl->toUnicode; + /* we will write -1 for each offset */ + sourceIndex=-1; + } + } + + if(cnv->preToULength>=0) { + /* normal mode */ + realSource=NULL; + + /* avoid compiler warnings - not otherwise necessary, and the values do not matter */ + realSourceLimit=NULL; + realFlush=FALSE; + realSourceIndex=0; + } else { + /* + * Previous m:n conversion stored source units from a partial match + * and failed to consume all of them. + * We need to "replay" them from a temporary buffer and convert them first. + */ + realSource=pArgs->source; + realSourceLimit=pArgs->sourceLimit; + realFlush=pArgs->flush; + realSourceIndex=sourceIndex; + + uprv_memcpy(replay, cnv->preToU, -cnv->preToULength); + pArgs->source=replay; + pArgs->sourceLimit=replay-cnv->preToULength; + pArgs->flush=FALSE; + sourceIndex=-1; + + cnv->preToULength=0; + } + + /* + * loop for conversion and error handling + * + * loop { + * convert + * loop { + * update offsets + * handle end of input + * handle errors/call callback + * } + * } + */ + for(;;) { + if(U_SUCCESS(*err)) { + /* convert */ + toUnicode(pArgs, err); + + /* + * set a flag for whether the converter + * successfully processed the end of the input + * + * need not check cnv->preToULength==0 because a replay (<0) will cause + * s<sourceLimit before converterSawEndOfInput is checked + */ + converterSawEndOfInput= + (UBool)(U_SUCCESS(*err) && + pArgs->flush && pArgs->source==pArgs->sourceLimit && + cnv->toULength==0); + } else { + /* handle error from getNextUChar() or ucnv_convertEx() */ + converterSawEndOfInput=FALSE; + } + + /* no callback called yet for this iteration */ + calledCallback=FALSE; + + /* no sourceIndex adjustment for conversion, only for callback output */ + errorInputLength=0; + + /* + * loop for offsets and error handling + * + * iterates at most 3 times: + * 1. to clean up after the conversion function + * 2. after the callback + * 3. after the callback again if there was truncated input + */ + for(;;) { + /* update offsets if we write any */ + if(offsets!=NULL) { + int32_t length=(int32_t)(pArgs->target-t); + if(length>0) { + _updateOffsets(offsets, length, sourceIndex, errorInputLength); + + /* + * if a converter handles offsets and updates the offsets + * pointer at the end, then pArgs->offset should not change + * here; + * however, some converters do not handle offsets at all + * (sourceIndex<0) or may not update the offsets pointer + */ + pArgs->offsets=offsets+=length; + } + + if(sourceIndex>=0) { + sourceIndex+=(int32_t)(pArgs->source-s); + } + } + + if(cnv->preToULength<0) { + /* + * switch the source to new replay units (cannot occur while replaying) + * after offset handling and before end-of-input and callback handling + */ + if(realSource==NULL) { + realSource=pArgs->source; + realSourceLimit=pArgs->sourceLimit; + realFlush=pArgs->flush; + realSourceIndex=sourceIndex; + + uprv_memcpy(replay, cnv->preToU, -cnv->preToULength); + pArgs->source=replay; + pArgs->sourceLimit=replay-cnv->preToULength; + pArgs->flush=FALSE; + if((sourceIndex+=cnv->preToULength)<0) { + sourceIndex=-1; + } + + cnv->preToULength=0; + } else { + /* see implementation note before _fromUnicodeWithCallback() */ + U_ASSERT(realSource==NULL); + *err=U_INTERNAL_PROGRAM_ERROR; + } + } + + /* update pointers */ + s=pArgs->source; + t=pArgs->target; + + if(U_SUCCESS(*err)) { + if(s<pArgs->sourceLimit) { + /* + * continue with the conversion loop while there is still input left + * (continue converting by breaking out of only the inner loop) + */ + break; + } else if(realSource!=NULL) { + /* switch back from replaying to the real source and continue */ + pArgs->source=realSource; + pArgs->sourceLimit=realSourceLimit; + pArgs->flush=realFlush; + sourceIndex=realSourceIndex; + + realSource=NULL; + break; + } else if(pArgs->flush && cnv->toULength>0) { + /* + * the entire input stream is consumed + * and there is a partial, truncated input sequence left + */ + + /* inject an error and continue with callback handling */ + *err=U_TRUNCATED_CHAR_FOUND; + calledCallback=FALSE; /* new error condition */ + } else { + /* input consumed */ + if(pArgs->flush) { + /* + * return to the conversion loop once more if the flush + * flag is set and the conversion function has not + * successfully processed the end of the input yet + * + * (continue converting by breaking out of only the inner loop) + */ + if(!converterSawEndOfInput) { + break; + } + + /* reset the converter without calling the callback function */ + _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE); + } + + /* done successfully */ + return; + } + } + + /* U_FAILURE(*err) */ + { + UErrorCode e; + + if( calledCallback || + (e=*err)==U_BUFFER_OVERFLOW_ERROR || + (e!=U_INVALID_CHAR_FOUND && + e!=U_ILLEGAL_CHAR_FOUND && + e!=U_TRUNCATED_CHAR_FOUND && + e!=U_ILLEGAL_ESCAPE_SEQUENCE && + e!=U_UNSUPPORTED_ESCAPE_SEQUENCE) + ) { + /* + * the callback did not or cannot resolve the error: + * set output pointers and return + * + * the check for buffer overflow is redundant but it is + * a high-runner case and hopefully documents the intent + * well + * + * if we were replaying, then the replay buffer must be + * copied back into the UConverter + * and the real arguments must be restored + */ + if(realSource!=NULL) { + int32_t length; + + U_ASSERT(cnv->preToULength==0); + + length=(int32_t)(pArgs->sourceLimit-pArgs->source); + if(length>0) { + uprv_memcpy(cnv->preToU, pArgs->source, length); + cnv->preToULength=(int8_t)-length; + } + + pArgs->source=realSource; + pArgs->sourceLimit=realSourceLimit; + pArgs->flush=realFlush; + } + + return; + } + } + + /* copy toUBytes[] to invalidCharBuffer[] */ + errorInputLength=cnv->invalidCharLength=cnv->toULength; + if(errorInputLength>0) { + uprv_memcpy(cnv->invalidCharBuffer, cnv->toUBytes, errorInputLength); + } + + /* set the converter state to deal with the next character */ + cnv->toULength=0; + + /* call the callback function */ + if(cnv->toUCallbackReason==UCNV_ILLEGAL && *err==U_INVALID_CHAR_FOUND) { + cnv->toUCallbackReason = UCNV_UNASSIGNED; + } + cnv->fromCharErrorBehaviour(cnv->toUContext, pArgs, + cnv->invalidCharBuffer, errorInputLength, + cnv->toUCallbackReason, + err); + cnv->toUCallbackReason = UCNV_ILLEGAL; /* reset to default value */ + + /* + * loop back to the offset handling + * + * this flag will indicate after offset handling + * that a callback was called; + * if the callback did not resolve the error, then we return + */ + calledCallback=TRUE; + } + } +} + +/* + * Output the toUnicode overflow buffer. + * Call this function if(cnv->UCharErrorBufferLength>0). + * @return TRUE if overflow + */ +static UBool +ucnv_outputOverflowToUnicode(UConverter *cnv, + UChar **target, const UChar *targetLimit, + int32_t **pOffsets, + UErrorCode *err) { + int32_t *offsets; + UChar *overflow, *t; + int32_t i, length; + + t=*target; + if(pOffsets!=NULL) { + offsets=*pOffsets; + } else { + offsets=NULL; + } + + overflow=cnv->UCharErrorBuffer; + length=cnv->UCharErrorBufferLength; + i=0; + while(i<length) { + if(t==targetLimit) { + /* the overflow buffer contains too much, keep the rest */ + int32_t j=0; + + do { + overflow[j++]=overflow[i++]; + } while(i<length); + + cnv->UCharErrorBufferLength=(int8_t)j; + *target=t; + if(offsets!=NULL) { + *pOffsets=offsets; + } + *err=U_BUFFER_OVERFLOW_ERROR; + return TRUE; + } + + /* copy the overflow contents to the target */ + *t++=overflow[i++]; + if(offsets!=NULL) { + *offsets++=-1; /* no source index available for old output */ + } + } + + /* the overflow buffer is completely copied to the target */ + cnv->UCharErrorBufferLength=0; + *target=t; + if(offsets!=NULL) { + *pOffsets=offsets; + } + return FALSE; +} + +U_CAPI void U_EXPORT2 +ucnv_toUnicode(UConverter *cnv, + UChar **target, const UChar *targetLimit, + const char **source, const char *sourceLimit, + int32_t *offsets, + UBool flush, + UErrorCode *err) { + UConverterToUnicodeArgs args; + const char *s; + UChar *t; + + /* check parameters */ + if(err==NULL || U_FAILURE(*err)) { + return; + } + + if(cnv==NULL || target==NULL || source==NULL) { + *err=U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + s=*source; + t=*target; + + if ((const void *)U_MAX_PTR(targetLimit) == (const void *)targetLimit) { + /* + Prevent code from going into an infinite loop in case we do hit this + limit. The limit pointer is expected to be on a UChar * boundary. + This also prevents the next argument check from failing. + */ + targetLimit = (const UChar *)(((const char *)targetLimit) - 1); + } + + /* + * All these conditions should never happen. + * + * 1) Make sure that the limits are >= to the address source or target + * + * 2) Make sure that the buffer sizes do not exceed the number range for + * int32_t because some functions use the size (in units or bytes) + * rather than comparing pointers, and because offsets are int32_t values. + * + * size_t is guaranteed to be unsigned and large enough for the job. + * + * Return with an error instead of adjusting the limits because we would + * not be able to maintain the semantics that either the source must be + * consumed or the target filled (unless an error occurs). + * An adjustment would be sourceLimit=t+0x7fffffff; for example. + * + * 3) Make sure that the user didn't incorrectly cast a UChar * pointer + * to a char * pointer and provide an incomplete UChar code unit. + */ + if (sourceLimit<s || targetLimit<t || + ((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s) || + ((size_t)(targetLimit-t)>(size_t)0x3fffffff && targetLimit>t) || + (((const char *)targetLimit-(const char *)t) & 1) != 0 + ) { + *err=U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + /* output the target overflow buffer */ + if( cnv->UCharErrorBufferLength>0 && + ucnv_outputOverflowToUnicode(cnv, target, targetLimit, &offsets, err) + ) { + /* U_BUFFER_OVERFLOW_ERROR */ + return; + } + /* *target may have moved, therefore stop using t */ + + if(!flush && s==sourceLimit && cnv->preToULength>=0) { + /* the overflow buffer is emptied and there is no new input: we are done */ + return; + } + + /* + * Do not simply return with a buffer overflow error if + * !flush && t==targetLimit + * because it is possible that the source will not generate any output. + * For example, the skip callback may be called; + * it does not output anything. + */ + + /* prepare the converter arguments */ + args.converter=cnv; + args.flush=flush; + args.offsets=offsets; + args.source=s; + args.sourceLimit=sourceLimit; + args.target=*target; + args.targetLimit=targetLimit; + args.size=sizeof(args); + + _toUnicodeWithCallback(&args, err); + + *source=args.source; + *target=args.target; +} + +/* ucnv_to/fromUChars() ----------------------------------------------------- */ + +U_CAPI int32_t U_EXPORT2 +ucnv_fromUChars(UConverter *cnv, + char *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + UErrorCode *pErrorCode) { + const UChar *srcLimit; + char *originalDest, *destLimit; + int32_t destLength; + + /* check arguments */ + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return 0; + } + + if( cnv==NULL || + destCapacity<0 || (destCapacity>0 && dest==NULL) || + srcLength<-1 || (srcLength!=0 && src==NULL) + ) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + /* initialize */ + ucnv_resetFromUnicode(cnv); + originalDest=dest; + if(srcLength==-1) { + srcLength=u_strlen(src); + } + if(srcLength>0) { + srcLimit=src+srcLength; + destCapacity=pinCapacity(dest, destCapacity); + destLimit=dest+destCapacity; + + /* perform the conversion */ + ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode); + destLength=(int32_t)(dest-originalDest); + + /* if an overflow occurs, then get the preflighting length */ + if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { + char buffer[1024]; + + destLimit=buffer+sizeof(buffer); + do { + dest=buffer; + *pErrorCode=U_ZERO_ERROR; + ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode); + destLength+=(int32_t)(dest-buffer); + } while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR); + } + } else { + destLength=0; + } + + return u_terminateChars(originalDest, destCapacity, destLength, pErrorCode); +} + +U_CAPI int32_t U_EXPORT2 +ucnv_toUChars(UConverter *cnv, + UChar *dest, int32_t destCapacity, + const char *src, int32_t srcLength, + UErrorCode *pErrorCode) { + const char *srcLimit; + UChar *originalDest, *destLimit; + int32_t destLength; + + /* check arguments */ + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return 0; + } + + if( cnv==NULL || + destCapacity<0 || (destCapacity>0 && dest==NULL) || + srcLength<-1 || (srcLength!=0 && src==NULL)) + { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + /* initialize */ + ucnv_resetToUnicode(cnv); + originalDest=dest; + if(srcLength==-1) { + srcLength=(int32_t)uprv_strlen(src); + } + if(srcLength>0) { + srcLimit=src+srcLength; + destCapacity=pinCapacity(dest, destCapacity); + destLimit=dest+destCapacity; + + /* perform the conversion */ + ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode); + destLength=(int32_t)(dest-originalDest); + + /* if an overflow occurs, then get the preflighting length */ + if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) + { + UChar buffer[1024]; + + destLimit=buffer+UPRV_LENGTHOF(buffer); + do { + dest=buffer; + *pErrorCode=U_ZERO_ERROR; + ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode); + destLength+=(int32_t)(dest-buffer); + } + while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR); + } + } else { + destLength=0; + } + + return u_terminateUChars(originalDest, destCapacity, destLength, pErrorCode); +} + +/* ucnv_getNextUChar() ------------------------------------------------------ */ + +U_CAPI UChar32 U_EXPORT2 +ucnv_getNextUChar(UConverter *cnv, + const char **source, const char *sourceLimit, + UErrorCode *err) { + UConverterToUnicodeArgs args; + UChar buffer[U16_MAX_LENGTH]; + const char *s; + UChar32 c; + int32_t i, length; + + /* check parameters */ + if(err==NULL || U_FAILURE(*err)) { + return 0xffff; + } + + if(cnv==NULL || source==NULL) { + *err=U_ILLEGAL_ARGUMENT_ERROR; + return 0xffff; + } + + s=*source; + if(sourceLimit<s) { + *err=U_ILLEGAL_ARGUMENT_ERROR; + return 0xffff; + } + + /* + * Make sure that the buffer sizes do not exceed the number range for + * int32_t because some functions use the size (in units or bytes) + * rather than comparing pointers, and because offsets are int32_t values. + * + * size_t is guaranteed to be unsigned and large enough for the job. + * + * Return with an error instead of adjusting the limits because we would + * not be able to maintain the semantics that either the source must be + * consumed or the target filled (unless an error occurs). + * An adjustment would be sourceLimit=t+0x7fffffff; for example. + */ + if(((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s)) { + *err=U_ILLEGAL_ARGUMENT_ERROR; + return 0xffff; + } + + c=U_SENTINEL; + + /* flush the target overflow buffer */ + if(cnv->UCharErrorBufferLength>0) { + UChar *overflow; + + overflow=cnv->UCharErrorBuffer; + i=0; + length=cnv->UCharErrorBufferLength; + U16_NEXT(overflow, i, length, c); + + /* move the remaining overflow contents up to the beginning */ + if((cnv->UCharErrorBufferLength=(int8_t)(length-i))>0) { + uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+i, + cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR); + } + + if(!U16_IS_LEAD(c) || i<length) { + return c; + } + /* + * Continue if the overflow buffer contained only a lead surrogate, + * in case the converter outputs single surrogates from complete + * input sequences. + */ + } + + /* + * flush==TRUE is implied for ucnv_getNextUChar() + * + * do not simply return even if s==sourceLimit because the converter may + * not have seen flush==TRUE before + */ + + /* prepare the converter arguments */ + args.converter=cnv; + args.flush=TRUE; + args.offsets=NULL; + args.source=s; + args.sourceLimit=sourceLimit; + args.target=buffer; + args.targetLimit=buffer+1; + args.size=sizeof(args); + + if(c<0) { + /* + * call the native getNextUChar() implementation if we are + * at a character boundary (toULength==0) + * + * unlike with _toUnicode(), getNextUChar() implementations must set + * U_TRUNCATED_CHAR_FOUND for truncated input, + * in addition to setting toULength/toUBytes[] + */ + if(cnv->toULength==0 && cnv->sharedData->impl->getNextUChar!=NULL) { + c=cnv->sharedData->impl->getNextUChar(&args, err); + *source=s=args.source; + if(*err==U_INDEX_OUTOFBOUNDS_ERROR) { + /* reset the converter without calling the callback function */ + _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE); + return 0xffff; /* no output */ + } else if(U_SUCCESS(*err) && c>=0) { + return c; + /* + * else fall through to use _toUnicode() because + * UCNV_GET_NEXT_UCHAR_USE_TO_U: the native function did not want to handle it after all + * U_FAILURE: call _toUnicode() for callback handling (do not output c) + */ + } + } + + /* convert to one UChar in buffer[0], or handle getNextUChar() errors */ + _toUnicodeWithCallback(&args, err); + + if(*err==U_BUFFER_OVERFLOW_ERROR) { + *err=U_ZERO_ERROR; + } + + i=0; + length=(int32_t)(args.target-buffer); + } else { + /* write the lead surrogate from the overflow buffer */ + buffer[0]=(UChar)c; + args.target=buffer+1; + i=0; + length=1; + } + + /* buffer contents starts at i and ends before length */ + + if(U_FAILURE(*err)) { + c=0xffff; /* no output */ + } else if(length==0) { + /* no input or only state changes */ + *err=U_INDEX_OUTOFBOUNDS_ERROR; + /* no need to reset explicitly because _toUnicodeWithCallback() did it */ + c=0xffff; /* no output */ + } else { + c=buffer[0]; + i=1; + if(!U16_IS_LEAD(c)) { + /* consume c=buffer[0], done */ + } else { + /* got a lead surrogate, see if a trail surrogate follows */ + UChar c2; + + if(cnv->UCharErrorBufferLength>0) { + /* got overflow output from the conversion */ + if(U16_IS_TRAIL(c2=cnv->UCharErrorBuffer[0])) { + /* got a trail surrogate, too */ + c=U16_GET_SUPPLEMENTARY(c, c2); + + /* move the remaining overflow contents up to the beginning */ + if((--cnv->UCharErrorBufferLength)>0) { + uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+1, + cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR); + } + } else { + /* c is an unpaired lead surrogate, just return it */ + } + } else if(args.source<sourceLimit) { + /* convert once more, to buffer[1] */ + args.targetLimit=buffer+2; + _toUnicodeWithCallback(&args, err); + if(*err==U_BUFFER_OVERFLOW_ERROR) { + *err=U_ZERO_ERROR; + } + + length=(int32_t)(args.target-buffer); + if(U_SUCCESS(*err) && length==2 && U16_IS_TRAIL(c2=buffer[1])) { + /* got a trail surrogate, too */ + c=U16_GET_SUPPLEMENTARY(c, c2); + i=2; + } + } + } + } + + /* + * move leftover output from buffer[i..length[ + * into the beginning of the overflow buffer + */ + if(i<length) { + /* move further overflow back */ + int32_t delta=length-i; + if((length=cnv->UCharErrorBufferLength)>0) { + uprv_memmove(cnv->UCharErrorBuffer+delta, cnv->UCharErrorBuffer, + length*U_SIZEOF_UCHAR); + } + cnv->UCharErrorBufferLength=(int8_t)(length+delta); + + cnv->UCharErrorBuffer[0]=buffer[i++]; + if(delta>1) { + cnv->UCharErrorBuffer[1]=buffer[i]; + } + } + + *source=args.source; + return c; +} + +/* ucnv_convert() and siblings ---------------------------------------------- */ + +U_CAPI void U_EXPORT2 +ucnv_convertEx(UConverter *targetCnv, UConverter *sourceCnv, + char **target, const char *targetLimit, + const char **source, const char *sourceLimit, + UChar *pivotStart, UChar **pivotSource, + UChar **pivotTarget, const UChar *pivotLimit, + UBool reset, UBool flush, + UErrorCode *pErrorCode) { + UChar pivotBuffer[CHUNK_SIZE]; + const UChar *myPivotSource; + UChar *myPivotTarget; + const char *s; + char *t; + + UConverterToUnicodeArgs toUArgs; + UConverterFromUnicodeArgs fromUArgs; + UConverterConvert convert; + + /* error checking */ + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return; + } + + if( targetCnv==NULL || sourceCnv==NULL || + source==NULL || *source==NULL || + target==NULL || *target==NULL || targetLimit==NULL + ) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + s=*source; + t=*target; + if((sourceLimit!=NULL && sourceLimit<s) || targetLimit<t) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + /* + * Make sure that the buffer sizes do not exceed the number range for + * int32_t. See ucnv_toUnicode() for a more detailed comment. + */ + if( + (sourceLimit!=NULL && ((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s)) || + ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t) + ) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + if(pivotStart==NULL) { + if(!flush) { + /* streaming conversion requires an explicit pivot buffer */ + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + /* use the stack pivot buffer */ + myPivotSource=myPivotTarget=pivotStart=pivotBuffer; + pivotSource=(UChar **)&myPivotSource; + pivotTarget=&myPivotTarget; + pivotLimit=pivotBuffer+CHUNK_SIZE; + } else if( pivotStart>=pivotLimit || + pivotSource==NULL || *pivotSource==NULL || + pivotTarget==NULL || *pivotTarget==NULL || + pivotLimit==NULL + ) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + if(sourceLimit==NULL) { + /* get limit of single-byte-NUL-terminated source string */ + sourceLimit=uprv_strchr(*source, 0); + } + + if(reset) { + ucnv_resetToUnicode(sourceCnv); + ucnv_resetFromUnicode(targetCnv); + *pivotSource=*pivotTarget=pivotStart; + } else if(targetCnv->charErrorBufferLength>0) { + /* output the targetCnv overflow buffer */ + if(ucnv_outputOverflowFromUnicode(targetCnv, target, targetLimit, NULL, pErrorCode)) { + /* U_BUFFER_OVERFLOW_ERROR */ + return; + } + /* *target has moved, therefore stop using t */ + + if( !flush && + targetCnv->preFromULength>=0 && *pivotSource==*pivotTarget && + sourceCnv->UCharErrorBufferLength==0 && sourceCnv->preToULength>=0 && s==sourceLimit + ) { + /* the fromUnicode overflow buffer is emptied and there is no new input: we are done */ + return; + } + } + + /* Is direct-UTF-8 conversion available? */ + if( sourceCnv->sharedData->staticData->conversionType==UCNV_UTF8 && + targetCnv->sharedData->impl->fromUTF8!=NULL + ) { + convert=targetCnv->sharedData->impl->fromUTF8; + } else if( targetCnv->sharedData->staticData->conversionType==UCNV_UTF8 && + sourceCnv->sharedData->impl->toUTF8!=NULL + ) { + convert=sourceCnv->sharedData->impl->toUTF8; + } else { + convert=NULL; + } + + /* + * If direct-UTF-8 conversion is available, then we use a smaller + * pivot buffer for error handling and partial matches + * so that we quickly return to direct conversion. + * + * 32 is large enough for UCNV_EXT_MAX_UCHARS and UCNV_ERROR_BUFFER_LENGTH. + * + * We could reduce the pivot buffer size further, at the cost of + * buffer overflows from callbacks. + * The pivot buffer should not be smaller than the maximum number of + * fromUnicode extension table input UChars + * (for m:n conversion, see + * targetCnv->sharedData->mbcs.extIndexes[UCNV_EXT_COUNT_UCHARS]) + * or 2 for surrogate pairs. + * + * Too small a buffer can cause thrashing between pivoting and direct + * conversion, with function call overhead outweighing the benefits + * of direct conversion. + */ + if(convert!=NULL && (pivotLimit-pivotStart)>32) { + pivotLimit=pivotStart+32; + } + + /* prepare the converter arguments */ + fromUArgs.converter=targetCnv; + fromUArgs.flush=FALSE; + fromUArgs.offsets=NULL; + fromUArgs.target=*target; + fromUArgs.targetLimit=targetLimit; + fromUArgs.size=sizeof(fromUArgs); + + toUArgs.converter=sourceCnv; + toUArgs.flush=flush; + toUArgs.offsets=NULL; + toUArgs.source=s; + toUArgs.sourceLimit=sourceLimit; + toUArgs.targetLimit=pivotLimit; + toUArgs.size=sizeof(toUArgs); + + /* + * TODO: Consider separating this function into two functions, + * extracting exactly the conversion loop, + * for readability and to reduce the set of visible variables. + * + * Otherwise stop using s and t from here on. + */ + s=t=NULL; + + /* + * conversion loop + * + * The sequence of steps in the loop may appear backward, + * but the principle is simple: + * In the chain of + * source - sourceCnv overflow - pivot - targetCnv overflow - target + * empty out later buffers before refilling them from earlier ones. + * + * The targetCnv overflow buffer is flushed out only once before the loop. + */ + for(;;) { + /* + * if(pivot not empty or error or replay or flush fromUnicode) { + * fromUnicode(pivot -> target); + * } + * + * For pivoting conversion; and for direct conversion for + * error callback handling and flushing the replay buffer. + */ + if( *pivotSource<*pivotTarget || + U_FAILURE(*pErrorCode) || + targetCnv->preFromULength<0 || + fromUArgs.flush + ) { + fromUArgs.source=*pivotSource; + fromUArgs.sourceLimit=*pivotTarget; + _fromUnicodeWithCallback(&fromUArgs, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + /* target overflow, or conversion error */ + *pivotSource=(UChar *)fromUArgs.source; + break; + } + + /* + * _fromUnicodeWithCallback() must have consumed the pivot contents + * (*pivotSource==*pivotTarget) since it returned with U_SUCCESS() + */ + } + + /* The pivot buffer is empty; reset it so we start at pivotStart. */ + *pivotSource=*pivotTarget=pivotStart; + + /* + * if(sourceCnv overflow buffer not empty) { + * move(sourceCnv overflow buffer -> pivot); + * continue; + * } + */ + /* output the sourceCnv overflow buffer */ + if(sourceCnv->UCharErrorBufferLength>0) { + if(ucnv_outputOverflowToUnicode(sourceCnv, pivotTarget, pivotLimit, NULL, pErrorCode)) { + /* U_BUFFER_OVERFLOW_ERROR */ + *pErrorCode=U_ZERO_ERROR; + } + continue; + } + + /* + * check for end of input and break if done + * + * Checking both flush and fromUArgs.flush ensures that the converters + * have been called with the flush flag set if the ucnv_convertEx() + * caller set it. + */ + if( toUArgs.source==sourceLimit && + sourceCnv->preToULength>=0 && sourceCnv->toULength==0 && + (!flush || fromUArgs.flush) + ) { + /* done successfully */ + break; + } + + /* + * use direct conversion if available + * but not if continuing a partial match + * or flushing the toUnicode replay buffer + */ + if(convert!=NULL && targetCnv->preFromUFirstCP<0 && sourceCnv->preToULength==0) { + if(*pErrorCode==U_USING_DEFAULT_WARNING) { + /* remove a warning that may be set by this function */ + *pErrorCode=U_ZERO_ERROR; + } + convert(&fromUArgs, &toUArgs, pErrorCode); + if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { + break; + } else if(U_FAILURE(*pErrorCode)) { + if(sourceCnv->toULength>0) { + /* + * Fall through to calling _toUnicodeWithCallback() + * for callback handling. + * + * The pivot buffer will be reset with + * *pivotSource=*pivotTarget=pivotStart; + * which indicates a toUnicode error to the caller + * (*pivotSource==pivotStart shows no pivot UChars consumed). + */ + } else { + /* + * Indicate a fromUnicode error to the caller + * (*pivotSource>pivotStart shows some pivot UChars consumed). + */ + *pivotSource=*pivotTarget=pivotStart+1; + /* + * Loop around to calling _fromUnicodeWithCallbacks() + * for callback handling. + */ + continue; + } + } else if(*pErrorCode==U_USING_DEFAULT_WARNING) { + /* + * No error, but the implementation requested to temporarily + * fall back to pivoting. + */ + *pErrorCode=U_ZERO_ERROR; + /* + * The following else branches are almost identical to the end-of-input + * handling in _toUnicodeWithCallback(). + * Avoid calling it just for the end of input. + */ + } else if(flush && sourceCnv->toULength>0) { /* flush==toUArgs.flush */ + /* + * the entire input stream is consumed + * and there is a partial, truncated input sequence left + */ + + /* inject an error and continue with callback handling */ + *pErrorCode=U_TRUNCATED_CHAR_FOUND; + } else { + /* input consumed */ + if(flush) { + /* reset the converters without calling the callback functions */ + _reset(sourceCnv, UCNV_RESET_TO_UNICODE, FALSE); + _reset(targetCnv, UCNV_RESET_FROM_UNICODE, FALSE); + } + + /* done successfully */ + break; + } + } + + /* + * toUnicode(source -> pivot); + * + * For pivoting conversion; and for direct conversion for + * error callback handling, continuing partial matches + * and flushing the replay buffer. + * + * The pivot buffer is empty and reset. + */ + toUArgs.target=pivotStart; /* ==*pivotTarget */ + /* toUArgs.targetLimit=pivotLimit; already set before the loop */ + _toUnicodeWithCallback(&toUArgs, pErrorCode); + *pivotTarget=toUArgs.target; + if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { + /* pivot overflow: continue with the conversion loop */ + *pErrorCode=U_ZERO_ERROR; + } else if(U_FAILURE(*pErrorCode) || (!flush && *pivotTarget==pivotStart)) { + /* conversion error, or there was nothing left to convert */ + break; + } + /* + * else: + * _toUnicodeWithCallback() wrote into the pivot buffer, + * continue with fromUnicode conversion. + * + * Set the fromUnicode flush flag if we flush and if toUnicode has + * processed the end of the input. + */ + if( flush && toUArgs.source==sourceLimit && + sourceCnv->preToULength>=0 && + sourceCnv->UCharErrorBufferLength==0 + ) { + fromUArgs.flush=TRUE; + } + } + + /* + * The conversion loop is exited when one of the following is true: + * - the entire source text has been converted successfully to the target buffer + * - a target buffer overflow occurred + * - a conversion error occurred + */ + + *source=toUArgs.source; + *target=fromUArgs.target; + + /* terminate the target buffer if possible */ + if(flush && U_SUCCESS(*pErrorCode)) { + if(*target!=targetLimit) { + **target=0; + if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) { + *pErrorCode=U_ZERO_ERROR; + } + } else { + *pErrorCode=U_STRING_NOT_TERMINATED_WARNING; + } + } +} + +/* internal implementation of ucnv_convert() etc. with preflighting */ +static int32_t +ucnv_internalConvert(UConverter *outConverter, UConverter *inConverter, + char *target, int32_t targetCapacity, + const char *source, int32_t sourceLength, + UErrorCode *pErrorCode) { + UChar pivotBuffer[CHUNK_SIZE]; + UChar *pivot, *pivot2; + + char *myTarget; + const char *sourceLimit; + const char *targetLimit; + int32_t targetLength=0; + + /* set up */ + if(sourceLength<0) { + sourceLimit=uprv_strchr(source, 0); + } else { + sourceLimit=source+sourceLength; + } + + /* if there is no input data, we're done */ + if(source==sourceLimit) { + return u_terminateChars(target, targetCapacity, 0, pErrorCode); + } + + pivot=pivot2=pivotBuffer; + myTarget=target; + targetLength=0; + + if(targetCapacity>0) { + /* perform real conversion */ + targetLimit=target+targetCapacity; + ucnv_convertEx(outConverter, inConverter, + &myTarget, targetLimit, + &source, sourceLimit, + pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE, + FALSE, + TRUE, + pErrorCode); + targetLength=(int32_t)(myTarget-target); + } + + /* + * If the output buffer is exhausted (or we are only "preflighting"), we need to stop writing + * to it but continue the conversion in order to store in targetCapacity + * the number of bytes that was required. + */ + if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR || targetCapacity==0) + { + char targetBuffer[CHUNK_SIZE]; + + targetLimit=targetBuffer+CHUNK_SIZE; + do { + *pErrorCode=U_ZERO_ERROR; + myTarget=targetBuffer; + ucnv_convertEx(outConverter, inConverter, + &myTarget, targetLimit, + &source, sourceLimit, + pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE, + FALSE, + TRUE, + pErrorCode); + targetLength+=(int32_t)(myTarget-targetBuffer); + } while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR); + + /* done with preflighting, set warnings and errors as appropriate */ + return u_terminateChars(target, targetCapacity, targetLength, pErrorCode); + } + + /* no need to call u_terminateChars() because ucnv_convertEx() took care of that */ + return targetLength; +} + +U_CAPI int32_t U_EXPORT2 +ucnv_convert(const char *toConverterName, const char *fromConverterName, + char *target, int32_t targetCapacity, + const char *source, int32_t sourceLength, + UErrorCode *pErrorCode) { + UConverter in, out; /* stack-allocated */ + UConverter *inConverter, *outConverter; + int32_t targetLength; + + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return 0; + } + + if( source==NULL || sourceLength<-1 || + targetCapacity<0 || (targetCapacity>0 && target==NULL) + ) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + /* if there is no input data, we're done */ + if(sourceLength==0 || (sourceLength<0 && *source==0)) { + return u_terminateChars(target, targetCapacity, 0, pErrorCode); + } + + /* create the converters */ + inConverter=ucnv_createConverter(&in, fromConverterName, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + return 0; + } + + outConverter=ucnv_createConverter(&out, toConverterName, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + ucnv_close(inConverter); + return 0; + } + + targetLength=ucnv_internalConvert(outConverter, inConverter, + target, targetCapacity, + source, sourceLength, + pErrorCode); + + ucnv_close(inConverter); + ucnv_close(outConverter); + + return targetLength; +} + +/* @internal */ +static int32_t +ucnv_convertAlgorithmic(UBool convertToAlgorithmic, + UConverterType algorithmicType, + UConverter *cnv, + char *target, int32_t targetCapacity, + const char *source, int32_t sourceLength, + UErrorCode *pErrorCode) { + UConverter algoConverterStatic; /* stack-allocated */ + UConverter *algoConverter, *to, *from; + int32_t targetLength; + + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return 0; + } + + if( cnv==NULL || source==NULL || sourceLength<-1 || + targetCapacity<0 || (targetCapacity>0 && target==NULL) + ) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + /* if there is no input data, we're done */ + if(sourceLength==0 || (sourceLength<0 && *source==0)) { + return u_terminateChars(target, targetCapacity, 0, pErrorCode); + } + + /* create the algorithmic converter */ + algoConverter=ucnv_createAlgorithmicConverter(&algoConverterStatic, algorithmicType, + "", 0, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + return 0; + } + + /* reset the other converter */ + if(convertToAlgorithmic) { + /* cnv->Unicode->algo */ + ucnv_resetToUnicode(cnv); + to=algoConverter; + from=cnv; + } else { + /* algo->Unicode->cnv */ + ucnv_resetFromUnicode(cnv); + from=algoConverter; + to=cnv; + } + + targetLength=ucnv_internalConvert(to, from, + target, targetCapacity, + source, sourceLength, + pErrorCode); + + ucnv_close(algoConverter); + + return targetLength; +} + +U_CAPI int32_t U_EXPORT2 +ucnv_toAlgorithmic(UConverterType algorithmicType, + UConverter *cnv, + char *target, int32_t targetCapacity, + const char *source, int32_t sourceLength, + UErrorCode *pErrorCode) { + return ucnv_convertAlgorithmic(TRUE, algorithmicType, cnv, + target, targetCapacity, + source, sourceLength, + pErrorCode); +} + +U_CAPI int32_t U_EXPORT2 +ucnv_fromAlgorithmic(UConverter *cnv, + UConverterType algorithmicType, + char *target, int32_t targetCapacity, + const char *source, int32_t sourceLength, + UErrorCode *pErrorCode) { + return ucnv_convertAlgorithmic(FALSE, algorithmicType, cnv, + target, targetCapacity, + source, sourceLength, + pErrorCode); +} + +U_CAPI UConverterType U_EXPORT2 +ucnv_getType(const UConverter* converter) +{ + int8_t type = converter->sharedData->staticData->conversionType; +#if !UCONFIG_NO_LEGACY_CONVERSION + if(type == UCNV_MBCS) { + return ucnv_MBCSGetType(converter); + } +#endif + return (UConverterType)type; +} + +U_CAPI void U_EXPORT2 +ucnv_getStarters(const UConverter* converter, + UBool starters[256], + UErrorCode* err) +{ + if (err == NULL || U_FAILURE(*err)) { + return; + } + + if(converter->sharedData->impl->getStarters != NULL) { + converter->sharedData->impl->getStarters(converter, starters, err); + } else { + *err = U_ILLEGAL_ARGUMENT_ERROR; + } +} + +static const UAmbiguousConverter *ucnv_getAmbiguous(const UConverter *cnv) +{ + UErrorCode errorCode; + const char *name; + int32_t i; + + if(cnv==NULL) { + return NULL; + } + + errorCode=U_ZERO_ERROR; + name=ucnv_getName(cnv, &errorCode); + if(U_FAILURE(errorCode)) { + return NULL; + } + + for(i=0; i<UPRV_LENGTHOF(ambiguousConverters); ++i) + { + if(0==uprv_strcmp(name, ambiguousConverters[i].name)) + { + return ambiguousConverters+i; + } + } + + return NULL; +} + +U_CAPI void U_EXPORT2 +ucnv_fixFileSeparator(const UConverter *cnv, + UChar* source, + int32_t sourceLength) { + const UAmbiguousConverter *a; + int32_t i; + UChar variant5c; + + if(cnv==NULL || source==NULL || sourceLength<=0 || (a=ucnv_getAmbiguous(cnv))==NULL) + { + return; + } + + variant5c=a->variant5c; + for(i=0; i<sourceLength; ++i) { + if(source[i]==variant5c) { + source[i]=0x5c; + } + } +} + +U_CAPI UBool U_EXPORT2 +ucnv_isAmbiguous(const UConverter *cnv) { + return (UBool)(ucnv_getAmbiguous(cnv)!=NULL); +} + +U_CAPI void U_EXPORT2 +ucnv_setFallback(UConverter *cnv, UBool usesFallback) +{ + cnv->useFallback = usesFallback; +} + +U_CAPI UBool U_EXPORT2 +ucnv_usesFallback(const UConverter *cnv) +{ + return cnv->useFallback; +} + +U_CAPI void U_EXPORT2 +ucnv_getInvalidChars (const UConverter * converter, + char *errBytes, + int8_t * len, + UErrorCode * err) +{ + if (err == NULL || U_FAILURE(*err)) + { + return; + } + if (len == NULL || errBytes == NULL || converter == NULL) + { + *err = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + if (*len < converter->invalidCharLength) + { + *err = U_INDEX_OUTOFBOUNDS_ERROR; + return; + } + if ((*len = converter->invalidCharLength) > 0) + { + uprv_memcpy (errBytes, converter->invalidCharBuffer, *len); + } +} + +U_CAPI void U_EXPORT2 +ucnv_getInvalidUChars (const UConverter * converter, + UChar *errChars, + int8_t * len, + UErrorCode * err) +{ + if (err == NULL || U_FAILURE(*err)) + { + return; + } + if (len == NULL || errChars == NULL || converter == NULL) + { + *err = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + if (*len < converter->invalidUCharLength) + { + *err = U_INDEX_OUTOFBOUNDS_ERROR; + return; + } + if ((*len = converter->invalidUCharLength) > 0) + { + u_memcpy (errChars, converter->invalidUCharBuffer, *len); + } +} + +#define SIG_MAX_LEN 5 + +U_CAPI const char* U_EXPORT2 +ucnv_detectUnicodeSignature( const char* source, + int32_t sourceLength, + int32_t* signatureLength, + UErrorCode* pErrorCode) { + int32_t dummy; + + /* initial 0xa5 bytes: make sure that if we read <SIG_MAX_LEN + * bytes we don't misdetect something + */ + char start[SIG_MAX_LEN]={ '\xa5', '\xa5', '\xa5', '\xa5', '\xa5' }; + int i = 0; + + if((pErrorCode==NULL) || U_FAILURE(*pErrorCode)){ + return NULL; + } + + if(source == NULL || sourceLength < -1){ + *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + + if(signatureLength == NULL) { + signatureLength = &dummy; + } + + if(sourceLength==-1){ + sourceLength=(int32_t)uprv_strlen(source); + } + + + while(i<sourceLength&& i<SIG_MAX_LEN){ + start[i]=source[i]; + i++; + } + + if(start[0] == '\xFE' && start[1] == '\xFF') { + *signatureLength=2; + return "UTF-16BE"; + } else if(start[0] == '\xFF' && start[1] == '\xFE') { + if(start[2] == '\x00' && start[3] =='\x00') { + *signatureLength=4; + return "UTF-32LE"; + } else { + *signatureLength=2; + return "UTF-16LE"; + } + } else if(start[0] == '\xEF' && start[1] == '\xBB' && start[2] == '\xBF') { + *signatureLength=3; + return "UTF-8"; + } else if(start[0] == '\x00' && start[1] == '\x00' && + start[2] == '\xFE' && start[3]=='\xFF') { + *signatureLength=4; + return "UTF-32BE"; + } else if(start[0] == '\x0E' && start[1] == '\xFE' && start[2] == '\xFF') { + *signatureLength=3; + return "SCSU"; + } else if(start[0] == '\xFB' && start[1] == '\xEE' && start[2] == '\x28') { + *signatureLength=3; + return "BOCU-1"; + } else if(start[0] == '\x2B' && start[1] == '\x2F' && start[2] == '\x76') { + /* + * UTF-7: Initial U+FEFF is encoded as +/v8 or +/v9 or +/v+ or +/v/ + * depending on the second UTF-16 code unit. + * Detect the entire, closed Unicode mode sequence +/v8- for only U+FEFF + * if it occurs. + * + * So far we have +/v + */ + if(start[3] == '\x38' && start[4] == '\x2D') { + /* 5 bytes +/v8- */ + *signatureLength=5; + return "UTF-7"; + } else if(start[3] == '\x38' || start[3] == '\x39' || start[3] == '\x2B' || start[3] == '\x2F') { + /* 4 bytes +/v8 or +/v9 or +/v+ or +/v/ */ + *signatureLength=4; + return "UTF-7"; + } + }else if(start[0]=='\xDD' && start[1]== '\x73'&& start[2]=='\x66' && start[3]=='\x73'){ + *signatureLength=4; + return "UTF-EBCDIC"; + } + + + /* no known Unicode signature byte sequence recognized */ + *signatureLength=0; + return NULL; +} + +U_CAPI int32_t U_EXPORT2 +ucnv_fromUCountPending(const UConverter* cnv, UErrorCode* status) +{ + if(status == NULL || U_FAILURE(*status)){ + return -1; + } + if(cnv == NULL){ + *status = U_ILLEGAL_ARGUMENT_ERROR; + return -1; + } + + if(cnv->preFromUFirstCP >= 0){ + return U16_LENGTH(cnv->preFromUFirstCP)+cnv->preFromULength ; + }else if(cnv->preFromULength < 0){ + return -cnv->preFromULength ; + }else if(cnv->fromUChar32 > 0){ + return 1; + } + return 0; + +} + +U_CAPI int32_t U_EXPORT2 +ucnv_toUCountPending(const UConverter* cnv, UErrorCode* status){ + + if(status == NULL || U_FAILURE(*status)){ + return -1; + } + if(cnv == NULL){ + *status = U_ILLEGAL_ARGUMENT_ERROR; + return -1; + } + + if(cnv->preToULength > 0){ + return cnv->preToULength ; + }else if(cnv->preToULength < 0){ + return -cnv->preToULength; + }else if(cnv->toULength > 0){ + return cnv->toULength; + } + return 0; +} + +U_CAPI UBool U_EXPORT2 +ucnv_isFixedWidth(UConverter *cnv, UErrorCode *status){ + if (U_FAILURE(*status)) { + return FALSE; + } + + if (cnv == NULL) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return FALSE; + } + + switch (ucnv_getType(cnv)) { + case UCNV_SBCS: + case UCNV_DBCS: + case UCNV_UTF32_BigEndian: + case UCNV_UTF32_LittleEndian: + case UCNV_UTF32: + case UCNV_US_ASCII: + return TRUE; + default: + return FALSE; + } +} +#endif + +/* + * Hey, Emacs, please set the following: + * + * Local Variables: + * indent-tabs-mode: nil + * End: + * + */ diff --git a/thirdparty/icu4c/common/ucnv2022.cpp b/thirdparty/icu4c/common/ucnv2022.cpp new file mode 100644 index 0000000000..169ad4c526 --- /dev/null +++ b/thirdparty/icu4c/common/ucnv2022.cpp @@ -0,0 +1,3973 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 2000-2016, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* file name: ucnv2022.cpp +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2000feb03 +* created by: Markus W. Scherer +* +* Change history: +* +* 06/29/2000 helena Major rewrite of the callback APIs. +* 08/08/2000 Ram Included support for ISO-2022-JP-2 +* Changed implementation of toUnicode +* function +* 08/21/2000 Ram Added support for ISO-2022-KR +* 08/29/2000 Ram Seperated implementation of EBCDIC to +* ucnvebdc.c +* 09/20/2000 Ram Added support for ISO-2022-CN +* Added implementations for getNextUChar() +* for specific 2022 country variants. +* 10/31/2000 Ram Implemented offsets logic functions +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION + +#include "unicode/ucnv.h" +#include "unicode/uset.h" +#include "unicode/ucnv_err.h" +#include "unicode/ucnv_cb.h" +#include "unicode/utf16.h" +#include "ucnv_imp.h" +#include "ucnv_bld.h" +#include "ucnv_cnv.h" +#include "ucnvmbcs.h" +#include "cstring.h" +#include "cmemory.h" +#include "uassert.h" + +#ifdef U_ENABLE_GENERIC_ISO_2022 +/* + * I am disabling the generic ISO-2022 converter after proposing to do so on + * the icu mailing list two days ago. + * + * Reasons: + * 1. It does not fully support the ISO-2022/ECMA-35 specification with all of + * its designation sequences, single shifts with return to the previous state, + * switch-with-no-return to UTF-16BE or similar, etc. + * This is unlike the language-specific variants like ISO-2022-JP which + * require a much smaller repertoire of ISO-2022 features. + * These variants continue to be supported. + * 2. I believe that no one is really using the generic ISO-2022 converter + * but rather always one of the language-specific variants. + * Note that ICU's generic ISO-2022 converter has always output one escape + * sequence followed by UTF-8 for the whole stream. + * 3. Switching between subcharsets is extremely slow, because each time + * the previous converter is closed and a new one opened, + * without any kind of caching, least-recently-used list, etc. + * 4. The code is currently buggy, and given the above it does not seem + * reasonable to spend the time on maintenance. + * 5. ISO-2022 subcharsets should normally be used with 7-bit byte encodings. + * This means, for example, that when ISO-8859-7 is designated, the following + * ISO-2022 bytes 00..7f should be interpreted as ISO-8859-7 bytes 80..ff. + * The ICU ISO-2022 converter does not handle this - and has no information + * about which subconverter would have to be shifted vs. which is designed + * for 7-bit ISO-2022. + * + * Markus Scherer 2003-dec-03 + */ +#endif + +#if !UCONFIG_ONLY_HTML_CONVERSION +static const char SHIFT_IN_STR[] = "\x0F"; +// static const char SHIFT_OUT_STR[] = "\x0E"; +#endif + +#define CR 0x0D +#define LF 0x0A +#define H_TAB 0x09 +#define V_TAB 0x0B +#define SPACE 0x20 + +enum { + HWKANA_START=0xff61, + HWKANA_END=0xff9f +}; + +/* + * 94-character sets with native byte values A1..FE are encoded in ISO 2022 + * as bytes 21..7E. (Subtract 0x80.) + * 96-character sets with native byte values A0..FF are encoded in ISO 2022 + * as bytes 20..7F. (Subtract 0x80.) + * Do not encode C1 control codes with native bytes 80..9F + * as bytes 00..1F (C0 control codes). + */ +enum { + GR94_START=0xa1, + GR94_END=0xfe, + GR96_START=0xa0, + GR96_END=0xff +}; + +/* + * ISO 2022 control codes must not be converted from Unicode + * because they would mess up the byte stream. + * The bit mask 0x0800c000 has bits set at bit positions 0xe, 0xf, 0x1b + * corresponding to SO, SI, and ESC. + */ +#define IS_2022_CONTROL(c) (((c)<0x20) && (((uint32_t)1<<(c))&0x0800c000)!=0) + +/* for ISO-2022-JP and -CN implementations */ +typedef enum { + /* shared values */ + INVALID_STATE=-1, + ASCII = 0, + + SS2_STATE=0x10, + SS3_STATE, + + /* JP */ + ISO8859_1 = 1 , + ISO8859_7 = 2 , + JISX201 = 3, + JISX208 = 4, + JISX212 = 5, + GB2312 =6, + KSC5601 =7, + HWKANA_7BIT=8, /* Halfwidth Katakana 7 bit */ + + /* CN */ + /* the first few enum constants must keep their values because they correspond to myConverterArray[] */ + GB2312_1=1, + ISO_IR_165=2, + CNS_11643=3, + + /* + * these are used in StateEnum and ISO2022State variables, + * but CNS_11643 must be used to index into myConverterArray[] + */ + CNS_11643_0=0x20, + CNS_11643_1, + CNS_11643_2, + CNS_11643_3, + CNS_11643_4, + CNS_11643_5, + CNS_11643_6, + CNS_11643_7 +} StateEnum; + +/* is the StateEnum charset value for a DBCS charset? */ +#if UCONFIG_ONLY_HTML_CONVERSION +#define IS_JP_DBCS(cs) (JISX208==(cs)) +#else +#define IS_JP_DBCS(cs) (JISX208<=(cs) && (cs)<=KSC5601) +#endif + +#define CSM(cs) ((uint16_t)1<<(cs)) + +/* + * Each of these charset masks (with index x) contains a bit for a charset in exact correspondence + * to whether that charset is used in the corresponding version x of ISO_2022,locale=ja,version=x + * + * Note: The converter uses some leniency: + * - The escape sequence ESC ( I for half-width 7-bit Katakana is recognized in + * all versions, not just JIS7 and JIS8. + * - ICU does not distinguish between different versions of JIS X 0208. + */ +#if UCONFIG_ONLY_HTML_CONVERSION +enum { MAX_JA_VERSION=0 }; +#else +enum { MAX_JA_VERSION=4 }; +#endif +static const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={ + CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT), +#if !UCONFIG_ONLY_HTML_CONVERSION + CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212), + CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7), + CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7), + CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7) +#endif +}; + +typedef enum { + ASCII1=0, + LATIN1, + SBCS, + DBCS, + MBCS, + HWKANA +}Cnv2022Type; + +typedef struct ISO2022State { + int8_t cs[4]; /* charset number for SI (G0)/SO (G1)/SS2 (G2)/SS3 (G3) */ + int8_t g; /* 0..3 for G0..G3 (SI/SO/SS2/SS3) */ + int8_t prevG; /* g before single shift (SS2 or SS3) */ +} ISO2022State; + +#define UCNV_OPTIONS_VERSION_MASK 0xf +#define UCNV_2022_MAX_CONVERTERS 10 + +typedef struct{ + UConverterSharedData *myConverterArray[UCNV_2022_MAX_CONVERTERS]; + UConverter *currentConverter; + Cnv2022Type currentType; + ISO2022State toU2022State, fromU2022State; + uint32_t key; + uint32_t version; +#ifdef U_ENABLE_GENERIC_ISO_2022 + UBool isFirstBuffer; +#endif + UBool isEmptySegment; + char name[30]; + char locale[3]; +}UConverterDataISO2022; + +/* Protos */ +/* ISO-2022 ----------------------------------------------------------------- */ + +/*Forward declaration */ +U_CFUNC void U_CALLCONV +ucnv_fromUnicode_UTF8(UConverterFromUnicodeArgs * args, + UErrorCode * err); +U_CFUNC void U_CALLCONV +ucnv_fromUnicode_UTF8_OFFSETS_LOGIC(UConverterFromUnicodeArgs * args, + UErrorCode * err); + +#define ESC_2022 0x1B /*ESC*/ + +typedef enum +{ + INVALID_2022 = -1, /*Doesn't correspond to a valid iso 2022 escape sequence*/ + VALID_NON_TERMINAL_2022 = 0, /*so far corresponds to a valid iso 2022 escape sequence*/ + VALID_TERMINAL_2022 = 1, /*corresponds to a valid iso 2022 escape sequence*/ + VALID_MAYBE_TERMINAL_2022 = 2 /*so far matches one iso 2022 escape sequence, but by adding more characters might match another escape sequence*/ +} UCNV_TableStates_2022; + +/* +* The way these state transition arrays work is: +* ex : ESC$B is the sequence for JISX208 +* a) First Iteration: char is ESC +* i) Get the value of ESC from normalize_esq_chars_2022[] with int value of ESC as index +* int x = normalize_esq_chars_2022[27] which is equal to 1 +* ii) Search for this value in escSeqStateTable_Key_2022[] +* value of x is stored at escSeqStateTable_Key_2022[0] +* iii) Save this index as offset +* iv) Get state of this sequence from escSeqStateTable_Value_2022[] +* escSeqStateTable_Value_2022[offset], which is VALID_NON_TERMINAL_2022 +* b) Switch on this state and continue to next char +* i) Get the value of $ from normalize_esq_chars_2022[] with int value of $ as index +* which is normalize_esq_chars_2022[36] == 4 +* ii) x is currently 1(from above) +* x<<=5 -- x is now 32 +* x+=normalize_esq_chars_2022[36] +* now x is 36 +* iii) Search for this value in escSeqStateTable_Key_2022[] +* value of x is stored at escSeqStateTable_Key_2022[2], so offset is 2 +* iv) Get state of this sequence from escSeqStateTable_Value_2022[] +* escSeqStateTable_Value_2022[offset], which is VALID_NON_TERMINAL_2022 +* c) Switch on this state and continue to next char +* i) Get the value of B from normalize_esq_chars_2022[] with int value of B as index +* ii) x is currently 36 (from above) +* x<<=5 -- x is now 1152 +* x+=normalize_esq_chars_2022[66] +* now x is 1161 +* iii) Search for this value in escSeqStateTable_Key_2022[] +* value of x is stored at escSeqStateTable_Key_2022[21], so offset is 21 +* iv) Get state of this sequence from escSeqStateTable_Value_2022[21] +* escSeqStateTable_Value_2022[offset], which is VALID_TERMINAL_2022 +* v) Get the converter name form escSeqStateTable_Result_2022[21] which is JISX208 +*/ + + +/*Below are the 3 arrays depicting a state transition table*/ +static const int8_t normalize_esq_chars_2022[256] = { +/* 0 1 2 3 4 5 6 7 8 9 */ + + 0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 + ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 + ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,1 ,0 ,0 + ,0 ,0 ,0 ,0 ,0 ,0 ,4 ,7 ,29 ,0 + ,2 ,24 ,26 ,27 ,0 ,3 ,23 ,6 ,0 ,0 + ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 + ,0 ,0 ,0 ,0 ,5 ,8 ,9 ,10 ,11 ,12 + ,13 ,14 ,15 ,16 ,17 ,18 ,19 ,20 ,25 ,28 + ,0 ,0 ,21 ,0 ,0 ,0 ,0 ,0 ,0 ,0 + ,22 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 + ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 + ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 + ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 + ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 + ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 + ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 + ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 + ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 + ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 + ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 + ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 + ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 + ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 + ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 + ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 + ,0 ,0 ,0 ,0 ,0 ,0 +}; + +#ifdef U_ENABLE_GENERIC_ISO_2022 +/* + * When the generic ISO-2022 converter is completely removed, not just disabled + * per #ifdef, then the following state table and the associated tables that are + * dimensioned with MAX_STATES_2022 should be trimmed. + * + * Especially, VALID_MAYBE_TERMINAL_2022 will not be used any more, and all of + * the associated escape sequences starting with ESC ( B should be removed. + * This includes the ones with key values 1097 and all of the ones above 1000000. + * + * For the latter, the tables can simply be truncated. + * For the former, since the tables must be kept parallel, it is probably best + * to simply duplicate an adjacent table cell, parallel in all tables. + * + * It may make sense to restructure the tables, especially by using small search + * tables for the variants instead of indexing them parallel to the table here. + */ +#endif + +#define MAX_STATES_2022 74 +static const int32_t escSeqStateTable_Key_2022[MAX_STATES_2022] = { +/* 0 1 2 3 4 5 6 7 8 9 */ + + 1 ,34 ,36 ,39 ,55 ,57 ,60 ,61 ,1093 ,1096 + ,1097 ,1098 ,1099 ,1100 ,1101 ,1102 ,1103 ,1104 ,1105 ,1106 + ,1109 ,1154 ,1157 ,1160 ,1161 ,1176 ,1178 ,1179 ,1254 ,1257 + ,1768 ,1773 ,1957 ,35105 ,36933 ,36936 ,36937 ,36938 ,36939 ,36940 + ,36942 ,36943 ,36944 ,36945 ,36946 ,36947 ,36948 ,37640 ,37642 ,37644 + ,37646 ,37711 ,37744 ,37745 ,37746 ,37747 ,37748 ,40133 ,40136 ,40138 + ,40139 ,40140 ,40141 ,1123363 ,35947624 ,35947625 ,35947626 ,35947627 ,35947629 ,35947630 + ,35947631 ,35947635 ,35947636 ,35947638 +}; + +#ifdef U_ENABLE_GENERIC_ISO_2022 + +static const char* const escSeqStateTable_Result_2022[MAX_STATES_2022] = { + /* 0 1 2 3 4 5 6 7 8 9 */ + + NULL ,NULL ,NULL ,NULL ,NULL ,NULL ,NULL ,NULL ,"latin1" ,"latin1" + ,"latin1" ,"ibm-865" ,"ibm-865" ,"ibm-865" ,"ibm-865" ,"ibm-865" ,"ibm-865" ,"JISX0201" ,"JISX0201" ,"latin1" + ,"latin1" ,NULL ,"JISX-208" ,"ibm-5478" ,"JISX-208" ,NULL ,NULL ,NULL ,NULL ,"UTF8" + ,"ISO-8859-1" ,"ISO-8859-7" ,"JIS-X-208" ,NULL ,"ibm-955" ,"ibm-367" ,"ibm-952" ,"ibm-949" ,"JISX-212" ,"ibm-1383" + ,"ibm-952" ,"ibm-964" ,"ibm-964" ,"ibm-964" ,"ibm-964" ,"ibm-964" ,"ibm-964" ,"ibm-5478" ,"ibm-949" ,"ISO-IR-165" + ,"CNS-11643-1992,1" ,"CNS-11643-1992,2" ,"CNS-11643-1992,3" ,"CNS-11643-1992,4" ,"CNS-11643-1992,5" ,"CNS-11643-1992,6" ,"CNS-11643-1992,7" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" + ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,"UTF16_PlatformEndian" ,NULL ,"latin1" ,"ibm-912" ,"ibm-913" ,"ibm-914" ,"ibm-813" ,"ibm-1089" + ,"ibm-920" ,"ibm-915" ,"ibm-915" ,"latin1" +}; + +#endif + +static const int8_t escSeqStateTable_Value_2022[MAX_STATES_2022] = { +/* 0 1 2 3 4 5 6 7 8 9 */ + VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 + ,VALID_MAYBE_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 + ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 + ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 + ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 + ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 + ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_NON_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 + ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 +}; + +/* Type def for refactoring changeState_2022 code*/ +typedef enum{ +#ifdef U_ENABLE_GENERIC_ISO_2022 + ISO_2022=0, +#endif + ISO_2022_JP=1, +#if !UCONFIG_ONLY_HTML_CONVERSION + ISO_2022_KR=2, + ISO_2022_CN=3 +#endif +} Variant2022; + +/*********** ISO 2022 Converter Protos ***********/ +static void U_CALLCONV +_ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode); + +static void U_CALLCONV + _ISO2022Close(UConverter *converter); + +static void U_CALLCONV +_ISO2022Reset(UConverter *converter, UConverterResetChoice choice); + +U_CDECL_BEGIN +static const char * U_CALLCONV +_ISO2022getName(const UConverter* cnv); +U_CDECL_END + +static void U_CALLCONV +_ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err); + +U_CDECL_BEGIN +static UConverter * U_CALLCONV +_ISO_2022_SafeClone(const UConverter *cnv, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status); + +U_CDECL_END + +#ifdef U_ENABLE_GENERIC_ISO_2022 +static void U_CALLCONV +T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC(UConverterToUnicodeArgs* args, UErrorCode* err); +#endif + +namespace { + +/*const UConverterSharedData _ISO2022Data;*/ +extern const UConverterSharedData _ISO2022JPData; + +#if !UCONFIG_ONLY_HTML_CONVERSION +extern const UConverterSharedData _ISO2022KRData; +extern const UConverterSharedData _ISO2022CNData; +#endif + +} // namespace + +/*************** Converter implementations ******************/ + +/* The purpose of this function is to get around gcc compiler warnings. */ +static inline void +fromUWriteUInt8(UConverter *cnv, + const char *bytes, int32_t length, + uint8_t **target, const char *targetLimit, + int32_t **offsets, + int32_t sourceIndex, + UErrorCode *pErrorCode) +{ + char *targetChars = (char *)*target; + ucnv_fromUWriteBytes(cnv, bytes, length, &targetChars, targetLimit, + offsets, sourceIndex, pErrorCode); + *target = (uint8_t*)targetChars; + +} + +static inline void +setInitialStateToUnicodeKR(UConverter* /*converter*/, UConverterDataISO2022 *myConverterData){ + if(myConverterData->version == 1) { + UConverter *cnv = myConverterData->currentConverter; + + cnv->toUnicodeStatus=0; /* offset */ + cnv->mode=0; /* state */ + cnv->toULength=0; /* byteIndex */ + } +} + +static inline void +setInitialStateFromUnicodeKR(UConverter* converter,UConverterDataISO2022 *myConverterData){ + /* in ISO-2022-KR the designator sequence appears only once + * in a file so we append it only once + */ + if( converter->charErrorBufferLength==0){ + + converter->charErrorBufferLength = 4; + converter->charErrorBuffer[0] = 0x1b; + converter->charErrorBuffer[1] = 0x24; + converter->charErrorBuffer[2] = 0x29; + converter->charErrorBuffer[3] = 0x43; + } + if(myConverterData->version == 1) { + UConverter *cnv = myConverterData->currentConverter; + + cnv->fromUChar32=0; + cnv->fromUnicodeStatus=1; /* prevLength */ + } +} + +static void U_CALLCONV +_ISO2022Open(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){ + + char myLocale[7]={' ',' ',' ',' ',' ',' ', '\0'}; + + cnv->extraInfo = uprv_malloc (sizeof (UConverterDataISO2022)); + if(cnv->extraInfo != NULL) { + UConverterNamePieces stackPieces; + UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; + UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) cnv->extraInfo; + uint32_t version; + + stackArgs.onlyTestIsLoadable = pArgs->onlyTestIsLoadable; + + uprv_memset(myConverterData, 0, sizeof(UConverterDataISO2022)); + myConverterData->currentType = ASCII1; + cnv->fromUnicodeStatus =FALSE; + if(pArgs->locale){ + uprv_strncpy(myLocale, pArgs->locale, sizeof(myLocale)-1); + } + version = pArgs->options & UCNV_OPTIONS_VERSION_MASK; + myConverterData->version = version; + if(myLocale[0]=='j' && (myLocale[1]=='a'|| myLocale[1]=='p') && + (myLocale[2]=='_' || myLocale[2]=='\0')) + { + /* open the required converters and cache them */ + if(version>MAX_JA_VERSION) { + // ICU 55 fails to open a converter for an unsupported version. + // Previously, it fell back to version 0, but that would yield + // unexpected behavior. + *errorCode = U_MISSING_RESOURCE_ERROR; + return; + } + if(jpCharsetMasks[version]&CSM(ISO8859_7)) { + myConverterData->myConverterArray[ISO8859_7] = + ucnv_loadSharedData("ISO8859_7", &stackPieces, &stackArgs, errorCode); + } + myConverterData->myConverterArray[JISX208] = + ucnv_loadSharedData("Shift-JIS", &stackPieces, &stackArgs, errorCode); + if(jpCharsetMasks[version]&CSM(JISX212)) { + myConverterData->myConverterArray[JISX212] = + ucnv_loadSharedData("jisx-212", &stackPieces, &stackArgs, errorCode); + } + if(jpCharsetMasks[version]&CSM(GB2312)) { + myConverterData->myConverterArray[GB2312] = + ucnv_loadSharedData("ibm-5478", &stackPieces, &stackArgs, errorCode); /* gb_2312_80-1 */ + } + if(jpCharsetMasks[version]&CSM(KSC5601)) { + myConverterData->myConverterArray[KSC5601] = + ucnv_loadSharedData("ksc_5601", &stackPieces, &stackArgs, errorCode); + } + + /* set the function pointers to appropriate funtions */ + cnv->sharedData=(UConverterSharedData*)(&_ISO2022JPData); + uprv_strcpy(myConverterData->locale,"ja"); + + (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ja,version="); + size_t len = uprv_strlen(myConverterData->name); + myConverterData->name[len]=(char)(myConverterData->version+(int)'0'); + myConverterData->name[len+1]='\0'; + } +#if !UCONFIG_ONLY_HTML_CONVERSION + else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') && + (myLocale[2]=='_' || myLocale[2]=='\0')) + { + if(version>1) { + // ICU 55 fails to open a converter for an unsupported version. + // Previously, it fell back to version 0, but that would yield + // unexpected behavior. + *errorCode = U_MISSING_RESOURCE_ERROR; + return; + } + const char *cnvName; + if(version==1) { + cnvName="icu-internal-25546"; + } else { + cnvName="ibm-949"; + myConverterData->version=version=0; + } + if(pArgs->onlyTestIsLoadable) { + ucnv_canCreateConverter(cnvName, errorCode); /* errorCode carries result */ + uprv_free(cnv->extraInfo); + cnv->extraInfo=NULL; + return; + } else { + myConverterData->currentConverter=ucnv_open(cnvName, errorCode); + if (U_FAILURE(*errorCode)) { + _ISO2022Close(cnv); + return; + } + + if(version==1) { + (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ko,version=1"); + uprv_memcpy(cnv->subChars, myConverterData->currentConverter->subChars, 4); + cnv->subCharLen = myConverterData->currentConverter->subCharLen; + }else{ + (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=ko,version=0"); + } + + /* initialize the state variables */ + setInitialStateToUnicodeKR(cnv, myConverterData); + setInitialStateFromUnicodeKR(cnv, myConverterData); + + /* set the function pointers to appropriate funtions */ + cnv->sharedData=(UConverterSharedData*)&_ISO2022KRData; + uprv_strcpy(myConverterData->locale,"ko"); + } + } + else if(((myLocale[0]=='z' && myLocale[1]=='h') || (myLocale[0]=='c'&& myLocale[1]=='n'))&& + (myLocale[2]=='_' || myLocale[2]=='\0')) + { + if(version>2) { + // ICU 55 fails to open a converter for an unsupported version. + // Previously, it fell back to version 0, but that would yield + // unexpected behavior. + *errorCode = U_MISSING_RESOURCE_ERROR; + return; + } + + /* open the required converters and cache them */ + myConverterData->myConverterArray[GB2312_1] = + ucnv_loadSharedData("ibm-5478", &stackPieces, &stackArgs, errorCode); + if(version==1) { + myConverterData->myConverterArray[ISO_IR_165] = + ucnv_loadSharedData("iso-ir-165", &stackPieces, &stackArgs, errorCode); + } + myConverterData->myConverterArray[CNS_11643] = + ucnv_loadSharedData("cns-11643-1992", &stackPieces, &stackArgs, errorCode); + + + /* set the function pointers to appropriate funtions */ + cnv->sharedData=(UConverterSharedData*)&_ISO2022CNData; + uprv_strcpy(myConverterData->locale,"cn"); + + if (version==0){ + myConverterData->version = 0; + (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=0"); + }else if (version==1){ + myConverterData->version = 1; + (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=1"); + }else { + myConverterData->version = 2; + (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=2"); + } + } +#endif // !UCONFIG_ONLY_HTML_CONVERSION + else{ +#ifdef U_ENABLE_GENERIC_ISO_2022 + myConverterData->isFirstBuffer = TRUE; + + /* append the UTF-8 escape sequence */ + cnv->charErrorBufferLength = 3; + cnv->charErrorBuffer[0] = 0x1b; + cnv->charErrorBuffer[1] = 0x25; + cnv->charErrorBuffer[2] = 0x42; + + cnv->sharedData=(UConverterSharedData*)&_ISO2022Data; + /* initialize the state variables */ + uprv_strcpy(myConverterData->name,"ISO_2022"); +#else + *errorCode = U_MISSING_RESOURCE_ERROR; + // Was U_UNSUPPORTED_ERROR but changed in ICU 55 to a more standard + // data loading error code. + return; +#endif + } + + cnv->maxBytesPerUChar=cnv->sharedData->staticData->maxBytesPerChar; + + if(U_FAILURE(*errorCode) || pArgs->onlyTestIsLoadable) { + _ISO2022Close(cnv); + } + } else { + *errorCode = U_MEMORY_ALLOCATION_ERROR; + } +} + + +static void U_CALLCONV +_ISO2022Close(UConverter *converter) { + UConverterDataISO2022* myData =(UConverterDataISO2022 *) (converter->extraInfo); + UConverterSharedData **array = myData->myConverterArray; + int32_t i; + + if (converter->extraInfo != NULL) { + /*close the array of converter pointers and free the memory*/ + for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) { + if(array[i]!=NULL) { + ucnv_unloadSharedDataIfReady(array[i]); + } + } + + ucnv_close(myData->currentConverter); + + if(!converter->isExtraLocal){ + uprv_free (converter->extraInfo); + converter->extraInfo = NULL; + } + } +} + +static void U_CALLCONV +_ISO2022Reset(UConverter *converter, UConverterResetChoice choice) { + UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) (converter->extraInfo); + if(choice<=UCNV_RESET_TO_UNICODE) { + uprv_memset(&myConverterData->toU2022State, 0, sizeof(ISO2022State)); + myConverterData->key = 0; + myConverterData->isEmptySegment = FALSE; + } + if(choice!=UCNV_RESET_TO_UNICODE) { + uprv_memset(&myConverterData->fromU2022State, 0, sizeof(ISO2022State)); + } +#ifdef U_ENABLE_GENERIC_ISO_2022 + if(myConverterData->locale[0] == 0){ + if(choice<=UCNV_RESET_TO_UNICODE) { + myConverterData->isFirstBuffer = TRUE; + myConverterData->key = 0; + if (converter->mode == UCNV_SO){ + ucnv_close (myConverterData->currentConverter); + myConverterData->currentConverter=NULL; + } + converter->mode = UCNV_SI; + } + if(choice!=UCNV_RESET_TO_UNICODE) { + /* re-append UTF-8 escape sequence */ + converter->charErrorBufferLength = 3; + converter->charErrorBuffer[0] = 0x1b; + converter->charErrorBuffer[1] = 0x28; + converter->charErrorBuffer[2] = 0x42; + } + } + else +#endif + { + /* reset the state variables */ + if(myConverterData->locale[0] == 'k'){ + if(choice<=UCNV_RESET_TO_UNICODE) { + setInitialStateToUnicodeKR(converter, myConverterData); + } + if(choice!=UCNV_RESET_TO_UNICODE) { + setInitialStateFromUnicodeKR(converter, myConverterData); + } + } + } +} + +U_CDECL_BEGIN + +static const char * U_CALLCONV +_ISO2022getName(const UConverter* cnv){ + if(cnv->extraInfo){ + UConverterDataISO2022* myData= (UConverterDataISO2022*)cnv->extraInfo; + return myData->name; + } + return NULL; +} + +U_CDECL_END + + +/*************** to unicode *******************/ +/**************************************************************************** + * Recognized escape sequences are + * <ESC>(B ASCII + * <ESC>.A ISO-8859-1 + * <ESC>.F ISO-8859-7 + * <ESC>(J JISX-201 + * <ESC>(I JISX-201 + * <ESC>$B JISX-208 + * <ESC>$@ JISX-208 + * <ESC>$(D JISX-212 + * <ESC>$A GB2312 + * <ESC>$(C KSC5601 + */ +static const int8_t nextStateToUnicodeJP[MAX_STATES_2022]= { +/* 0 1 2 3 4 5 6 7 8 9 */ + INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,SS2_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE + ,ASCII ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,JISX201 ,HWKANA_7BIT ,JISX201 ,INVALID_STATE + ,INVALID_STATE ,INVALID_STATE ,JISX208 ,GB2312 ,JISX208 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE + ,ISO8859_1 ,ISO8859_7 ,JISX208 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,KSC5601 ,JISX212 ,INVALID_STATE + ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE + ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE + ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE + ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE +}; + +#if !UCONFIG_ONLY_HTML_CONVERSION +/*************** to unicode *******************/ +static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= { +/* 0 1 2 3 4 5 6 7 8 9 */ + INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,SS2_STATE ,SS3_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE + ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE + ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE + ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE + ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,GB2312_1 ,INVALID_STATE ,ISO_IR_165 + ,CNS_11643_1 ,CNS_11643_2 ,CNS_11643_3 ,CNS_11643_4 ,CNS_11643_5 ,CNS_11643_6 ,CNS_11643_7 ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE + ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE + ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE +}; +#endif + + +static UCNV_TableStates_2022 +getKey_2022(char c,int32_t* key,int32_t* offset){ + int32_t togo; + int32_t low = 0; + int32_t hi = MAX_STATES_2022; + int32_t oldmid=0; + + togo = normalize_esq_chars_2022[(uint8_t)c]; + if(togo == 0) { + /* not a valid character anywhere in an escape sequence */ + *key = 0; + *offset = 0; + return INVALID_2022; + } + togo = (*key << 5) + togo; + + while (hi != low) /*binary search*/{ + + int32_t mid = (hi+low) >> 1; /*Finds median*/ + + if (mid == oldmid) + break; + + if (escSeqStateTable_Key_2022[mid] > togo){ + hi = mid; + } + else if (escSeqStateTable_Key_2022[mid] < togo){ + low = mid; + } + else /*we found it*/{ + *key = togo; + *offset = mid; + return (UCNV_TableStates_2022)escSeqStateTable_Value_2022[mid]; + } + oldmid = mid; + + } + + *key = 0; + *offset = 0; + return INVALID_2022; +} + +/*runs through a state machine to determine the escape sequence - codepage correspondance + */ +static void +changeState_2022(UConverter* _this, + const char** source, + const char* sourceLimit, + Variant2022 var, + UErrorCode* err){ + UCNV_TableStates_2022 value; + UConverterDataISO2022* myData2022 = ((UConverterDataISO2022*)_this->extraInfo); + uint32_t key = myData2022->key; + int32_t offset = 0; + int8_t initialToULength = _this->toULength; + char c; + + value = VALID_NON_TERMINAL_2022; + while (*source < sourceLimit) { + c = *(*source)++; + _this->toUBytes[_this->toULength++]=(uint8_t)c; + value = getKey_2022(c,(int32_t *) &key, &offset); + + switch (value){ + + case VALID_NON_TERMINAL_2022 : + /* continue with the loop */ + break; + + case VALID_TERMINAL_2022: + key = 0; + goto DONE; + + case INVALID_2022: + goto DONE; + + case VALID_MAYBE_TERMINAL_2022: +#ifdef U_ENABLE_GENERIC_ISO_2022 + /* ESC ( B is ambiguous only for ISO_2022 itself */ + if(var == ISO_2022) { + /* discard toUBytes[] for ESC ( B because this sequence is correct and complete */ + _this->toULength = 0; + + /* TODO need to indicate that ESC ( B was seen; if failure, then need to replay from source or from MBCS-style replay */ + + /* continue with the loop */ + value = VALID_NON_TERMINAL_2022; + break; + } else +#endif + { + /* not ISO_2022 itself, finish here */ + value = VALID_TERMINAL_2022; + key = 0; + goto DONE; + } + } + } + +DONE: + myData2022->key = key; + + if (value == VALID_NON_TERMINAL_2022) { + /* indicate that the escape sequence is incomplete: key!=0 */ + return; + } else if (value == INVALID_2022 ) { + *err = U_ILLEGAL_ESCAPE_SEQUENCE; + } else /* value == VALID_TERMINAL_2022 */ { + switch(var){ +#ifdef U_ENABLE_GENERIC_ISO_2022 + case ISO_2022: + { + const char *chosenConverterName = escSeqStateTable_Result_2022[offset]; + if(chosenConverterName == NULL) { + /* SS2 or SS3 */ + *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; + _this->toUCallbackReason = UCNV_UNASSIGNED; + return; + } + + _this->mode = UCNV_SI; + ucnv_close(myData2022->currentConverter); + myData2022->currentConverter = myUConverter = ucnv_open(chosenConverterName, err); + if(U_SUCCESS(*err)) { + myUConverter->fromCharErrorBehaviour = UCNV_TO_U_CALLBACK_STOP; + _this->mode = UCNV_SO; + } + break; + } +#endif + case ISO_2022_JP: + { + StateEnum tempState=(StateEnum)nextStateToUnicodeJP[offset]; + switch(tempState) { + case INVALID_STATE: + *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; + break; + case SS2_STATE: + if(myData2022->toU2022State.cs[2]!=0) { + if(myData2022->toU2022State.g<2) { + myData2022->toU2022State.prevG=myData2022->toU2022State.g; + } + myData2022->toU2022State.g=2; + } else { + /* illegal to have SS2 before a matching designator */ + *err = U_ILLEGAL_ESCAPE_SEQUENCE; + } + break; + /* case SS3_STATE: not used in ISO-2022-JP-x */ + case ISO8859_1: + case ISO8859_7: + if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) { + *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; + } else { + /* G2 charset for SS2 */ + myData2022->toU2022State.cs[2]=(int8_t)tempState; + } + break; + default: + if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) { + *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; + } else { + /* G0 charset */ + myData2022->toU2022State.cs[0]=(int8_t)tempState; + } + break; + } + } + break; +#if !UCONFIG_ONLY_HTML_CONVERSION + case ISO_2022_CN: + { + StateEnum tempState=(StateEnum)nextStateToUnicodeCN[offset]; + switch(tempState) { + case INVALID_STATE: + *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; + break; + case SS2_STATE: + if(myData2022->toU2022State.cs[2]!=0) { + if(myData2022->toU2022State.g<2) { + myData2022->toU2022State.prevG=myData2022->toU2022State.g; + } + myData2022->toU2022State.g=2; + } else { + /* illegal to have SS2 before a matching designator */ + *err = U_ILLEGAL_ESCAPE_SEQUENCE; + } + break; + case SS3_STATE: + if(myData2022->toU2022State.cs[3]!=0) { + if(myData2022->toU2022State.g<2) { + myData2022->toU2022State.prevG=myData2022->toU2022State.g; + } + myData2022->toU2022State.g=3; + } else { + /* illegal to have SS3 before a matching designator */ + *err = U_ILLEGAL_ESCAPE_SEQUENCE; + } + break; + case ISO_IR_165: + if(myData2022->version==0) { + *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; + break; + } + U_FALLTHROUGH; + case GB2312_1: + U_FALLTHROUGH; + case CNS_11643_1: + myData2022->toU2022State.cs[1]=(int8_t)tempState; + break; + case CNS_11643_2: + myData2022->toU2022State.cs[2]=(int8_t)tempState; + break; + default: + /* other CNS 11643 planes */ + if(myData2022->version==0) { + *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; + } else { + myData2022->toU2022State.cs[3]=(int8_t)tempState; + } + break; + } + } + break; + case ISO_2022_KR: + if(offset==0x30){ + /* nothing to be done, just accept this one escape sequence */ + } else { + *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; + } + break; +#endif // !UCONFIG_ONLY_HTML_CONVERSION + + default: + *err = U_ILLEGAL_ESCAPE_SEQUENCE; + break; + } + } + if(U_SUCCESS(*err)) { + _this->toULength = 0; + } else if(*err==U_ILLEGAL_ESCAPE_SEQUENCE) { + if(_this->toULength>1) { + /* + * Ticket 5691: consistent illegal sequences: + * - We include at least the first byte (ESC) in the illegal sequence. + * - If any of the non-initial bytes could be the start of a character, + * we stop the illegal sequence before the first one of those. + * In escape sequences, all following bytes are "printable", that is, + * unless they are completely illegal (>7f in SBCS, outside 21..7e in DBCS), + * they are valid single/lead bytes. + * For simplicity, we always only report the initial ESC byte as the + * illegal sequence and back out all other bytes we looked at. + */ + /* Back out some bytes. */ + int8_t backOutDistance=_this->toULength-1; + int8_t bytesFromThisBuffer=_this->toULength-initialToULength; + if(backOutDistance<=bytesFromThisBuffer) { + /* same as initialToULength<=1 */ + *source-=backOutDistance; + } else { + /* Back out bytes from the previous buffer: Need to replay them. */ + _this->preToULength=(int8_t)(bytesFromThisBuffer-backOutDistance); + /* same as -(initialToULength-1) */ + /* preToULength is negative! */ + uprv_memcpy(_this->preToU, _this->toUBytes+1, -_this->preToULength); + *source-=bytesFromThisBuffer; + } + _this->toULength=1; + } + } else if(*err==U_UNSUPPORTED_ESCAPE_SEQUENCE) { + _this->toUCallbackReason = UCNV_UNASSIGNED; + } +} + +#if !UCONFIG_ONLY_HTML_CONVERSION +/*Checks the characters of the buffer against valid 2022 escape sequences +*if the match we return a pointer to the initial start of the sequence otherwise +*we return sourceLimit +*/ +/*for 2022 looks ahead in the stream + *to determine the longest possible convertible + *data stream + */ +static inline const char* +getEndOfBuffer_2022(const char** source, + const char* sourceLimit, + UBool /*flush*/){ + + const char* mySource = *source; + +#ifdef U_ENABLE_GENERIC_ISO_2022 + if (*source >= sourceLimit) + return sourceLimit; + + do{ + + if (*mySource == ESC_2022){ + int8_t i; + int32_t key = 0; + int32_t offset; + UCNV_TableStates_2022 value = VALID_NON_TERMINAL_2022; + + /* Kludge: I could not + * figure out the reason for validating an escape sequence + * twice - once here and once in changeState_2022(). + * is it possible to have an ESC character in a ISO2022 + * byte stream which is valid in a code page? Is it legal? + */ + for (i=0; + (mySource+i < sourceLimit)&&(value == VALID_NON_TERMINAL_2022); + i++) { + value = getKey_2022(*(mySource+i), &key, &offset); + } + if (value > 0 || *mySource==ESC_2022) + return mySource; + + if ((value == VALID_NON_TERMINAL_2022)&&(!flush) ) + return sourceLimit; + } + }while (++mySource < sourceLimit); + + return sourceLimit; +#else + while(mySource < sourceLimit && *mySource != ESC_2022) { + ++mySource; + } + return mySource; +#endif +} +#endif + +/* This inline function replicates code in _MBCSFromUChar32() function in ucnvmbcs.c + * any future change in _MBCSFromUChar32() function should be reflected here. + * @return number of bytes in *value; negative number if fallback; 0 if no mapping + */ +static inline int32_t +MBCS_FROM_UCHAR32_ISO2022(UConverterSharedData* sharedData, + UChar32 c, + uint32_t* value, + UBool useFallback, + int outputType) +{ + const int32_t *cx; + const uint16_t *table; + uint32_t stage2Entry; + uint32_t myValue; + int32_t length; + const uint8_t *p; + /* + * TODO(markus): Use and require new, faster MBCS conversion table structures. + * Use internal version of ucnv_open() that verifies that the new structures are available, + * else U_INTERNAL_PROGRAM_ERROR. + */ + /* BMP-only codepages are stored without stage 1 entries for supplementary code points */ + if(c<0x10000 || (sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) { + table=sharedData->mbcs.fromUnicodeTable; + stage2Entry=MBCS_STAGE_2_FROM_U(table, c); + /* get the bytes and the length for the output */ + if(outputType==MBCS_OUTPUT_2){ + myValue=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c); + if(myValue<=0xff) { + length=1; + } else { + length=2; + } + } else /* outputType==MBCS_OUTPUT_3 */ { + p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c); + myValue=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2]; + if(myValue<=0xff) { + length=1; + } else if(myValue<=0xffff) { + length=2; + } else { + length=3; + } + } + /* is this code point assigned, or do we use fallbacks? */ + if((stage2Entry&(1<<(16+(c&0xf))))!=0) { + /* assigned */ + *value=myValue; + return length; + } else if(FROM_U_USE_FALLBACK(useFallback, c) && myValue!=0) { + /* + * We allow a 0 byte output if the "assigned" bit is set for this entry. + * There is no way with this data structure for fallback output + * to be a zero byte. + */ + *value=myValue; + return -length; + } + } + + cx=sharedData->mbcs.extIndexes; + if(cx!=NULL) { + return ucnv_extSimpleMatchFromU(cx, c, value, useFallback); + } + + /* unassigned */ + return 0; +} + +/* This inline function replicates code in _MBCSSingleFromUChar32() function in ucnvmbcs.c + * any future change in _MBCSSingleFromUChar32() function should be reflected here. + * @param retval pointer to output byte + * @return 1 roundtrip byte 0 no mapping -1 fallback byte + */ +static inline int32_t +MBCS_SINGLE_FROM_UCHAR32(UConverterSharedData* sharedData, + UChar32 c, + uint32_t* retval, + UBool useFallback) +{ + const uint16_t *table; + int32_t value; + /* BMP-only codepages are stored without stage 1 entries for supplementary code points */ + if(c>=0x10000 && !(sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) { + return 0; + } + /* convert the Unicode code point in c into codepage bytes (same as in _MBCSFromUnicodeWithOffsets) */ + table=sharedData->mbcs.fromUnicodeTable; + /* get the byte for the output */ + value=MBCS_SINGLE_RESULT_FROM_U(table, (uint16_t *)sharedData->mbcs.fromUnicodeBytes, c); + /* is this code point assigned, or do we use fallbacks? */ + *retval=(uint32_t)(value&0xff); + if(value>=0xf00) { + return 1; /* roundtrip */ + } else if(useFallback ? value>=0x800 : value>=0xc00) { + return -1; /* fallback taken */ + } else { + return 0; /* no mapping */ + } +} + +/* + * Check that the result is a 2-byte value with each byte in the range A1..FE + * (strict EUC DBCS) before accepting it and subtracting 0x80 from each byte + * to move it to the ISO 2022 range 21..7E. + * Return 0 if out of range. + */ +static inline uint32_t +_2022FromGR94DBCS(uint32_t value) { + if( (uint16_t)(value - 0xa1a1) <= (0xfefe - 0xa1a1) && + (uint8_t)(value - 0xa1) <= (0xfe - 0xa1) + ) { + return value - 0x8080; /* shift down to 21..7e byte range */ + } else { + return 0; /* not valid for ISO 2022 */ + } +} + +#if 0 /* 5691: Call sites now check for validity. They can just += 0x8080 after that. */ +/* + * This method does the reverse of _2022FromGR94DBCS(). Given the 2022 code point, it returns the + * 2 byte value that is in the range A1..FE for each byte. Otherwise it returns the 2022 code point + * unchanged. + */ +static inline uint32_t +_2022ToGR94DBCS(uint32_t value) { + uint32_t returnValue = value + 0x8080; + if( (uint16_t)(returnValue - 0xa1a1) <= (0xfefe - 0xa1a1) && + (uint8_t)(returnValue - 0xa1) <= (0xfe - 0xa1)) { + return returnValue; + } else { + return value; + } +} +#endif + +#ifdef U_ENABLE_GENERIC_ISO_2022 + +/********************************************************************************** +* ISO-2022 Converter +* +* +*/ + +static void U_CALLCONV +T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC(UConverterToUnicodeArgs* args, + UErrorCode* err){ + const char* mySourceLimit, *realSourceLimit; + const char* sourceStart; + const UChar* myTargetStart; + UConverter* saveThis; + UConverterDataISO2022* myData; + int8_t length; + + saveThis = args->converter; + myData=((UConverterDataISO2022*)(saveThis->extraInfo)); + + realSourceLimit = args->sourceLimit; + while (args->source < realSourceLimit) { + if(myData->key == 0) { /* are we in the middle of an escape sequence? */ + /*Find the end of the buffer e.g : Next Escape Seq | end of Buffer*/ + mySourceLimit = getEndOfBuffer_2022(&(args->source), realSourceLimit, args->flush); + + if(args->source < mySourceLimit) { + if(myData->currentConverter==NULL) { + myData->currentConverter = ucnv_open("ASCII",err); + if(U_FAILURE(*err)){ + return; + } + + myData->currentConverter->fromCharErrorBehaviour = UCNV_TO_U_CALLBACK_STOP; + saveThis->mode = UCNV_SO; + } + + /* convert to before the ESC or until the end of the buffer */ + myData->isFirstBuffer=FALSE; + sourceStart = args->source; + myTargetStart = args->target; + args->converter = myData->currentConverter; + ucnv_toUnicode(args->converter, + &args->target, + args->targetLimit, + &args->source, + mySourceLimit, + args->offsets, + (UBool)(args->flush && mySourceLimit == realSourceLimit), + err); + args->converter = saveThis; + + if (*err == U_BUFFER_OVERFLOW_ERROR) { + /* move the overflow buffer */ + length = saveThis->UCharErrorBufferLength = myData->currentConverter->UCharErrorBufferLength; + myData->currentConverter->UCharErrorBufferLength = 0; + if(length > 0) { + uprv_memcpy(saveThis->UCharErrorBuffer, + myData->currentConverter->UCharErrorBuffer, + length*U_SIZEOF_UCHAR); + } + return; + } + + /* + * At least one of: + * -Error while converting + * -Done with entire buffer + * -Need to write offsets or update the current offset + * (leave that up to the code in ucnv.c) + * + * or else we just stopped at an ESC byte and continue with changeState_2022() + */ + if (U_FAILURE(*err) || + (args->source == realSourceLimit) || + (args->offsets != NULL && (args->target != myTargetStart || args->source != sourceStart) || + (mySourceLimit < realSourceLimit && myData->currentConverter->toULength > 0)) + ) { + /* copy partial or error input for truncated detection and error handling */ + if(U_FAILURE(*err)) { + length = saveThis->invalidCharLength = myData->currentConverter->invalidCharLength; + if(length > 0) { + uprv_memcpy(saveThis->invalidCharBuffer, myData->currentConverter->invalidCharBuffer, length); + } + } else { + length = saveThis->toULength = myData->currentConverter->toULength; + if(length > 0) { + uprv_memcpy(saveThis->toUBytes, myData->currentConverter->toUBytes, length); + if(args->source < mySourceLimit) { + *err = U_TRUNCATED_CHAR_FOUND; /* truncated input before ESC */ + } + } + } + return; + } + } + } + + sourceStart = args->source; + changeState_2022(args->converter, + &(args->source), + realSourceLimit, + ISO_2022, + err); + if (U_FAILURE(*err) || (args->source != sourceStart && args->offsets != NULL)) { + /* let the ucnv.c code update its current offset */ + return; + } + } +} + +#endif + +/* + * To Unicode Callback helper function + */ +static void +toUnicodeCallback(UConverter *cnv, + const uint32_t sourceChar, const uint32_t targetUniChar, + UErrorCode* err){ + if(sourceChar>0xff){ + cnv->toUBytes[0] = (uint8_t)(sourceChar>>8); + cnv->toUBytes[1] = (uint8_t)sourceChar; + cnv->toULength = 2; + } + else{ + cnv->toUBytes[0] =(char) sourceChar; + cnv->toULength = 1; + } + + if(targetUniChar == (missingCharMarker-1/*0xfffe*/)){ + *err = U_INVALID_CHAR_FOUND; + } + else{ + *err = U_ILLEGAL_CHAR_FOUND; + } +} + +/**************************************ISO-2022-JP*************************************************/ + +/************************************** IMPORTANT ************************************************** +* The UConverter_fromUnicode_ISO2022_JP converter does not use ucnv_fromUnicode() functions for SBCS,DBCS and +* MBCS; instead, the values are obtained directly by calling _MBCSFromUChar32(). +* The converter iterates over each Unicode codepoint +* to obtain the equivalent codepoints from the codepages supported. Since the source buffer is +* processed one char at a time it would make sense to reduce the extra processing a canned converter +* would do as far as possible. +* +* If the implementation of these macros or structure of sharedData struct change in the future, make +* sure that ISO-2022 is also changed. +*************************************************************************************************** +*/ + +/*************************************************************************************************** +* Rules for ISO-2022-jp encoding +* (i) Escape sequences must be fully contained within a line they should not +* span new lines or CRs +* (ii) If the last character on a line is represented by two bytes then an ASCII or +* JIS-Roman character escape sequence should follow before the line terminates +* (iii) If the first character on the line is represented by two bytes then a two +* byte character escape sequence should precede it +* (iv) If no escape sequence is encountered then the characters are ASCII +* (v) Latin(ISO-8859-1) and Greek(ISO-8859-7) characters must be designated to G2, +* and invoked with SS2 (ESC N). +* (vi) If there is any G0 designation in text, there must be a switch to +* ASCII or to JIS X 0201-Roman before a space character (but not +* necessarily before "ESC 4/14 2/0" or "ESC N ' '") or control +* characters such as tab or CRLF. +* (vi) Supported encodings: +* ASCII, JISX201, JISX208, JISX212, GB2312, KSC5601, ISO-8859-1,ISO-8859-7 +* +* source : RFC-1554 +* +* JISX201, JISX208,JISX212 : new .cnv data files created +* KSC5601 : alias to ibm-949 mapping table +* GB2312 : alias to ibm-1386 mapping table +* ISO-8859-1 : Algorithmic implemented as LATIN1 case +* ISO-8859-7 : alisas to ibm-9409 mapping table +*/ + +/* preference order of JP charsets */ +static const StateEnum jpCharsetPref[]={ + ASCII, + JISX201, + ISO8859_1, + JISX208, + ISO8859_7, + JISX212, + GB2312, + KSC5601, + HWKANA_7BIT +}; + +/* + * The escape sequences must be in order of the enum constants like JISX201 = 3, + * not in order of jpCharsetPref[]! + */ +static const char escSeqChars[][6] ={ + "\x1B\x28\x42", /* <ESC>(B ASCII */ + "\x1B\x2E\x41", /* <ESC>.A ISO-8859-1 */ + "\x1B\x2E\x46", /* <ESC>.F ISO-8859-7 */ + "\x1B\x28\x4A", /* <ESC>(J JISX-201 */ + "\x1B\x24\x42", /* <ESC>$B JISX-208 */ + "\x1B\x24\x28\x44", /* <ESC>$(D JISX-212 */ + "\x1B\x24\x41", /* <ESC>$A GB2312 */ + "\x1B\x24\x28\x43", /* <ESC>$(C KSC5601 */ + "\x1B\x28\x49" /* <ESC>(I HWKANA_7BIT */ + +}; +static const int8_t escSeqCharsLen[] ={ + 3, /* length of <ESC>(B ASCII */ + 3, /* length of <ESC>.A ISO-8859-1 */ + 3, /* length of <ESC>.F ISO-8859-7 */ + 3, /* length of <ESC>(J JISX-201 */ + 3, /* length of <ESC>$B JISX-208 */ + 4, /* length of <ESC>$(D JISX-212 */ + 3, /* length of <ESC>$A GB2312 */ + 4, /* length of <ESC>$(C KSC5601 */ + 3 /* length of <ESC>(I HWKANA_7BIT */ +}; + +/* +* The iteration over various code pages works this way: +* i) Get the currentState from myConverterData->currentState +* ii) Check if the character is mapped to a valid character in the currentState +* Yes -> a) set the initIterState to currentState +* b) remain in this state until an invalid character is found +* No -> a) go to the next code page and find the character +* iii) Before changing the state increment the current state check if the current state +* is equal to the intitIteration state +* Yes -> A character that cannot be represented in any of the supported encodings +* break and return a U_INVALID_CHARACTER error +* No -> Continue and find the character in next code page +* +* +* TODO: Implement a priority technique where the users are allowed to set the priority of code pages +*/ + +/* Map 00..7F to Unicode according to JIS X 0201. */ +static inline uint32_t +jisx201ToU(uint32_t value) { + if(value < 0x5c) { + return value; + } else if(value == 0x5c) { + return 0xa5; + } else if(value == 0x7e) { + return 0x203e; + } else /* value <= 0x7f */ { + return value; + } +} + +/* Map Unicode to 00..7F according to JIS X 0201. Return U+FFFE if unmappable. */ +static inline uint32_t +jisx201FromU(uint32_t value) { + if(value<=0x7f) { + if(value!=0x5c && value!=0x7e) { + return value; + } + } else if(value==0xa5) { + return 0x5c; + } else if(value==0x203e) { + return 0x7e; + } + return 0xfffe; +} + +/* + * Take a valid Shift-JIS byte pair, check that it is in the range corresponding + * to JIS X 0208, and convert it to a pair of 21..7E bytes. + * Return 0 if the byte pair is out of range. + */ +static inline uint32_t +_2022FromSJIS(uint32_t value) { + uint8_t trail; + + if(value > 0xEFFC) { + return 0; /* beyond JIS X 0208 */ + } + + trail = (uint8_t)value; + + value &= 0xff00; /* lead byte */ + if(value <= 0x9f00) { + value -= 0x7000; + } else /* 0xe000 <= value <= 0xef00 */ { + value -= 0xb000; + } + value <<= 1; + + if(trail <= 0x9e) { + value -= 0x100; + if(trail <= 0x7e) { + value |= trail - 0x1f; + } else { + value |= trail - 0x20; + } + } else /* trail <= 0xfc */ { + value |= trail - 0x7e; + } + return value; +} + +/* + * Convert a pair of JIS X 0208 21..7E bytes to Shift-JIS. + * If either byte is outside 21..7E make sure that the result is not valid + * for Shift-JIS so that the converter catches it. + * Some invalid byte values already turn into equally invalid Shift-JIS + * byte values and need not be tested explicitly. + */ +static inline void +_2022ToSJIS(uint8_t c1, uint8_t c2, char bytes[2]) { + if(c1&1) { + ++c1; + if(c2 <= 0x5f) { + c2 += 0x1f; + } else if(c2 <= 0x7e) { + c2 += 0x20; + } else { + c2 = 0; /* invalid */ + } + } else { + if((uint8_t)(c2-0x21) <= ((0x7e)-0x21)) { + c2 += 0x7e; + } else { + c2 = 0; /* invalid */ + } + } + c1 >>= 1; + if(c1 <= 0x2f) { + c1 += 0x70; + } else if(c1 <= 0x3f) { + c1 += 0xb0; + } else { + c1 = 0; /* invalid */ + } + bytes[0] = (char)c1; + bytes[1] = (char)c2; +} + +/* + * JIS X 0208 has fallbacks from Unicode half-width Katakana to full-width (DBCS) + * Katakana. + * Now that we use a Shift-JIS table for JIS X 0208 we need to hardcode these fallbacks + * because Shift-JIS roundtrips half-width Katakana to single bytes. + * These were the only fallbacks in ICU's jisx-208.ucm file. + */ +static const uint16_t hwkana_fb[HWKANA_END - HWKANA_START + 1] = { + 0x2123, /* U+FF61 */ + 0x2156, + 0x2157, + 0x2122, + 0x2126, + 0x2572, + 0x2521, + 0x2523, + 0x2525, + 0x2527, + 0x2529, + 0x2563, + 0x2565, + 0x2567, + 0x2543, + 0x213C, /* U+FF70 */ + 0x2522, + 0x2524, + 0x2526, + 0x2528, + 0x252A, + 0x252B, + 0x252D, + 0x252F, + 0x2531, + 0x2533, + 0x2535, + 0x2537, + 0x2539, + 0x253B, + 0x253D, + 0x253F, /* U+FF80 */ + 0x2541, + 0x2544, + 0x2546, + 0x2548, + 0x254A, + 0x254B, + 0x254C, + 0x254D, + 0x254E, + 0x254F, + 0x2552, + 0x2555, + 0x2558, + 0x255B, + 0x255E, + 0x255F, /* U+FF90 */ + 0x2560, + 0x2561, + 0x2562, + 0x2564, + 0x2566, + 0x2568, + 0x2569, + 0x256A, + 0x256B, + 0x256C, + 0x256D, + 0x256F, + 0x2573, + 0x212B, + 0x212C /* U+FF9F */ +}; + +static void U_CALLCONV +UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err) { + UConverter *cnv = args->converter; + UConverterDataISO2022 *converterData; + ISO2022State *pFromU2022State; + uint8_t *target = (uint8_t *) args->target; + const uint8_t *targetLimit = (const uint8_t *) args->targetLimit; + const UChar* source = args->source; + const UChar* sourceLimit = args->sourceLimit; + int32_t* offsets = args->offsets; + UChar32 sourceChar; + char buffer[8]; + int32_t len, outLen; + int8_t choices[10]; + int32_t choiceCount; + uint32_t targetValue = 0; + UBool useFallback; + + int32_t i; + int8_t cs, g; + + /* set up the state */ + converterData = (UConverterDataISO2022*)cnv->extraInfo; + pFromU2022State = &converterData->fromU2022State; + + choiceCount = 0; + + /* check if the last codepoint of previous buffer was a lead surrogate*/ + if((sourceChar = cnv->fromUChar32)!=0 && target< targetLimit) { + goto getTrail; + } + + while(source < sourceLimit) { + if(target < targetLimit) { + + sourceChar = *(source++); + /*check if the char is a First surrogate*/ + if(U16_IS_SURROGATE(sourceChar)) { + if(U16_IS_SURROGATE_LEAD(sourceChar)) { +getTrail: + /*look ahead to find the trail surrogate*/ + if(source < sourceLimit) { + /* test the following code unit */ + UChar trail=(UChar) *source; + if(U16_IS_TRAIL(trail)) { + source++; + sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail); + cnv->fromUChar32=0x00; + /* convert this supplementary code point */ + /* exit this condition tree */ + } else { + /* this is an unmatched lead code unit (1st surrogate) */ + /* callback(illegal) */ + *err=U_ILLEGAL_CHAR_FOUND; + cnv->fromUChar32=sourceChar; + break; + } + } else { + /* no more input */ + cnv->fromUChar32=sourceChar; + break; + } + } else { + /* this is an unmatched trail code unit (2nd surrogate) */ + /* callback(illegal) */ + *err=U_ILLEGAL_CHAR_FOUND; + cnv->fromUChar32=sourceChar; + break; + } + } + + /* do not convert SO/SI/ESC */ + if(IS_2022_CONTROL(sourceChar)) { + /* callback(illegal) */ + *err=U_ILLEGAL_CHAR_FOUND; + cnv->fromUChar32=sourceChar; + break; + } + + /* do the conversion */ + + if(choiceCount == 0) { + uint16_t csm; + + /* + * The csm variable keeps track of which charsets are allowed + * and not used yet while building the choices[]. + */ + csm = jpCharsetMasks[converterData->version]; + choiceCount = 0; + + /* JIS7/8: try single-byte half-width Katakana before JISX208 */ + if(converterData->version == 3 || converterData->version == 4) { + choices[choiceCount++] = (int8_t)HWKANA_7BIT; + } + /* Do not try single-byte half-width Katakana for other versions. */ + csm &= ~CSM(HWKANA_7BIT); + + /* try the current G0 charset */ + choices[choiceCount++] = cs = pFromU2022State->cs[0]; + csm &= ~CSM(cs); + + /* try the current G2 charset */ + if((cs = pFromU2022State->cs[2]) != 0) { + choices[choiceCount++] = cs; + csm &= ~CSM(cs); + } + + /* try all the other possible charsets */ + for(i = 0; i < UPRV_LENGTHOF(jpCharsetPref); ++i) { + cs = (int8_t)jpCharsetPref[i]; + if(CSM(cs) & csm) { + choices[choiceCount++] = cs; + csm &= ~CSM(cs); + } + } + } + + cs = g = 0; + /* + * len==0: no mapping found yet + * len<0: found a fallback result: continue looking for a roundtrip but no further fallbacks + * len>0: found a roundtrip result, done + */ + len = 0; + /* + * We will turn off useFallback after finding a fallback, + * but we still get fallbacks from PUA code points as usual. + * Therefore, we will also need to check that we don't overwrite + * an early fallback with a later one. + */ + useFallback = cnv->useFallback; + + for(i = 0; i < choiceCount && len <= 0; ++i) { + uint32_t value; + int32_t len2; + int8_t cs0 = choices[i]; + switch(cs0) { + case ASCII: + if(sourceChar <= 0x7f) { + targetValue = (uint32_t)sourceChar; + len = 1; + cs = cs0; + g = 0; + } + break; + case ISO8859_1: + if(GR96_START <= sourceChar && sourceChar <= GR96_END) { + targetValue = (uint32_t)sourceChar - 0x80; + len = 1; + cs = cs0; + g = 2; + } + break; + case HWKANA_7BIT: + if((uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HWKANA_START)) { + if(converterData->version==3) { + /* JIS7: use G1 (SO) */ + /* Shift U+FF61..U+FF9F to bytes 21..5F. */ + targetValue = (uint32_t)(sourceChar - (HWKANA_START - 0x21)); + len = 1; + pFromU2022State->cs[1] = cs = cs0; /* do not output an escape sequence */ + g = 1; + } else if(converterData->version==4) { + /* JIS8: use 8-bit bytes with any single-byte charset, see escape sequence output below */ + /* Shift U+FF61..U+FF9F to bytes A1..DF. */ + targetValue = (uint32_t)(sourceChar - (HWKANA_START - 0xa1)); + len = 1; + + cs = pFromU2022State->cs[0]; + if(IS_JP_DBCS(cs)) { + /* switch from a DBCS charset to JISX201 */ + cs = (int8_t)JISX201; + } + /* else stay in the current G0 charset */ + g = 0; + } + /* else do not use HWKANA_7BIT with other versions */ + } + break; + case JISX201: + /* G0 SBCS */ + value = jisx201FromU(sourceChar); + if(value <= 0x7f) { + targetValue = value; + len = 1; + cs = cs0; + g = 0; + useFallback = FALSE; + } + break; + case JISX208: + /* G0 DBCS from Shift-JIS table */ + len2 = MBCS_FROM_UCHAR32_ISO2022( + converterData->myConverterArray[cs0], + sourceChar, &value, + useFallback, MBCS_OUTPUT_2); + if(len2 == 2 || (len2 == -2 && len == 0)) { /* only accept DBCS: abs(len)==2 */ + value = _2022FromSJIS(value); + if(value != 0) { + targetValue = value; + len = len2; + cs = cs0; + g = 0; + useFallback = FALSE; + } + } else if(len == 0 && useFallback && + (uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HWKANA_START)) { + targetValue = hwkana_fb[sourceChar - HWKANA_START]; + len = -2; + cs = cs0; + g = 0; + useFallback = FALSE; + } + break; + case ISO8859_7: + /* G0 SBCS forced to 7-bit output */ + len2 = MBCS_SINGLE_FROM_UCHAR32( + converterData->myConverterArray[cs0], + sourceChar, &value, + useFallback); + if(len2 != 0 && !(len2 < 0 && len != 0) && GR96_START <= value && value <= GR96_END) { + targetValue = value - 0x80; + len = len2; + cs = cs0; + g = 2; + useFallback = FALSE; + } + break; + default: + /* G0 DBCS */ + len2 = MBCS_FROM_UCHAR32_ISO2022( + converterData->myConverterArray[cs0], + sourceChar, &value, + useFallback, MBCS_OUTPUT_2); + if(len2 == 2 || (len2 == -2 && len == 0)) { /* only accept DBCS: abs(len)==2 */ + if(cs0 == KSC5601) { + /* + * Check for valid bytes for the encoding scheme. + * This is necessary because the sub-converter (windows-949) + * has a broader encoding scheme than is valid for 2022. + */ + value = _2022FromGR94DBCS(value); + if(value == 0) { + break; + } + } + targetValue = value; + len = len2; + cs = cs0; + g = 0; + useFallback = FALSE; + } + break; + } + } + + if(len != 0) { + if(len < 0) { + len = -len; /* fallback */ + } + outLen = 0; /* count output bytes */ + + /* write SI if necessary (only for JIS7) */ + if(pFromU2022State->g == 1 && g == 0) { + buffer[outLen++] = UCNV_SI; + pFromU2022State->g = 0; + } + + /* write the designation sequence if necessary */ + if(cs != pFromU2022State->cs[g]) { + int32_t escLen = escSeqCharsLen[cs]; + uprv_memcpy(buffer + outLen, escSeqChars[cs], escLen); + outLen += escLen; + pFromU2022State->cs[g] = cs; + + /* invalidate the choices[] */ + choiceCount = 0; + } + + /* write the shift sequence if necessary */ + if(g != pFromU2022State->g) { + switch(g) { + /* case 0 handled before writing escapes */ + case 1: + buffer[outLen++] = UCNV_SO; + pFromU2022State->g = 1; + break; + default: /* case 2 */ + buffer[outLen++] = 0x1b; + buffer[outLen++] = 0x4e; + break; + /* no case 3: no SS3 in ISO-2022-JP-x */ + } + } + + /* write the output bytes */ + if(len == 1) { + buffer[outLen++] = (char)targetValue; + } else /* len == 2 */ { + buffer[outLen++] = (char)(targetValue >> 8); + buffer[outLen++] = (char)targetValue; + } + } else { + /* + * if we cannot find the character after checking all codepages + * then this is an error + */ + *err = U_INVALID_CHAR_FOUND; + cnv->fromUChar32=sourceChar; + break; + } + + if(sourceChar == CR || sourceChar == LF) { + /* reset the G2 state at the end of a line (conversion got us into ASCII or JISX201 already) */ + pFromU2022State->cs[2] = 0; + choiceCount = 0; + } + + /* output outLen>0 bytes in buffer[] */ + if(outLen == 1) { + *target++ = buffer[0]; + if(offsets) { + *offsets++ = (int32_t)(source - args->source - 1); /* -1: known to be ASCII */ + } + } else if(outLen == 2 && (target + 2) <= targetLimit) { + *target++ = buffer[0]; + *target++ = buffer[1]; + if(offsets) { + int32_t sourceIndex = (int32_t)(source - args->source - U16_LENGTH(sourceChar)); + *offsets++ = sourceIndex; + *offsets++ = sourceIndex; + } + } else { + fromUWriteUInt8( + cnv, + buffer, outLen, + &target, (const char *)targetLimit, + &offsets, (int32_t)(source - args->source - U16_LENGTH(sourceChar)), + err); + if(U_FAILURE(*err)) { + break; + } + } + } /* end if(myTargetIndex<myTargetLength) */ + else{ + *err =U_BUFFER_OVERFLOW_ERROR; + break; + } + + }/* end while(mySourceIndex<mySourceLength) */ + + /* + * the end of the input stream and detection of truncated input + * are handled by the framework, but for ISO-2022-JP conversion + * we need to be in ASCII mode at the very end + * + * conditions: + * successful + * in SO mode or not in ASCII mode + * end of input and no truncated input + */ + if( U_SUCCESS(*err) && + (pFromU2022State->g!=0 || pFromU2022State->cs[0]!=ASCII) && + args->flush && source>=sourceLimit && cnv->fromUChar32==0 + ) { + int32_t sourceIndex; + + outLen = 0; + + if(pFromU2022State->g != 0) { + buffer[outLen++] = UCNV_SI; + pFromU2022State->g = 0; + } + + if(pFromU2022State->cs[0] != ASCII) { + int32_t escLen = escSeqCharsLen[ASCII]; + uprv_memcpy(buffer + outLen, escSeqChars[ASCII], escLen); + outLen += escLen; + pFromU2022State->cs[0] = (int8_t)ASCII; + } + + /* get the source index of the last input character */ + /* + * TODO this would be simpler and more reliable if we used a pair + * of sourceIndex/prevSourceIndex like in ucnvmbcs.c + * so that we could simply use the prevSourceIndex here; + * this code gives an incorrect result for the rare case of an unmatched + * trail surrogate that is alone in the last buffer of the text stream + */ + sourceIndex=(int32_t)(source-args->source); + if(sourceIndex>0) { + --sourceIndex; + if( U16_IS_TRAIL(args->source[sourceIndex]) && + (sourceIndex==0 || U16_IS_LEAD(args->source[sourceIndex-1])) + ) { + --sourceIndex; + } + } else { + sourceIndex=-1; + } + + fromUWriteUInt8( + cnv, + buffer, outLen, + &target, (const char *)targetLimit, + &offsets, sourceIndex, + err); + } + + /*save the state and return */ + args->source = source; + args->target = (char*)target; +} + +/*************** to unicode *******************/ + +static void U_CALLCONV +UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterToUnicodeArgs *args, + UErrorCode* err){ + char tempBuf[2]; + const char *mySource = (char *) args->source; + UChar *myTarget = args->target; + const char *mySourceLimit = args->sourceLimit; + uint32_t targetUniChar = 0x0000; + uint32_t mySourceChar = 0x0000; + uint32_t tmpSourceChar = 0x0000; + UConverterDataISO2022* myData; + ISO2022State *pToU2022State; + StateEnum cs; + + myData=(UConverterDataISO2022*)(args->converter->extraInfo); + pToU2022State = &myData->toU2022State; + + if(myData->key != 0) { + /* continue with a partial escape sequence */ + goto escape; + } else if(args->converter->toULength == 1 && mySource < mySourceLimit && myTarget < args->targetLimit) { + /* continue with a partial double-byte character */ + mySourceChar = args->converter->toUBytes[0]; + args->converter->toULength = 0; + cs = (StateEnum)pToU2022State->cs[pToU2022State->g]; + targetUniChar = missingCharMarker; + goto getTrailByte; + } + + while(mySource < mySourceLimit){ + + targetUniChar =missingCharMarker; + + if(myTarget < args->targetLimit){ + + mySourceChar= (unsigned char) *mySource++; + + switch(mySourceChar) { + case UCNV_SI: + if(myData->version==3) { + pToU2022State->g=0; + continue; + } else { + /* only JIS7 uses SI/SO, not ISO-2022-JP-x */ + myData->isEmptySegment = FALSE; /* reset this, we have a different error */ + break; + } + + case UCNV_SO: + if(myData->version==3) { + /* JIS7: switch to G1 half-width Katakana */ + pToU2022State->cs[1] = (int8_t)HWKANA_7BIT; + pToU2022State->g=1; + continue; + } else { + /* only JIS7 uses SI/SO, not ISO-2022-JP-x */ + myData->isEmptySegment = FALSE; /* reset this, we have a different error */ + break; + } + + case ESC_2022: + mySource--; +escape: + { + const char * mySourceBefore = mySource; + int8_t toULengthBefore = args->converter->toULength; + + changeState_2022(args->converter,&(mySource), + mySourceLimit, ISO_2022_JP,err); + + /* If in ISO-2022-JP only and we successully completed an escape sequence, but previous segment was empty, create an error */ + if(myData->version==0 && myData->key==0 && U_SUCCESS(*err) && myData->isEmptySegment) { + *err = U_ILLEGAL_ESCAPE_SEQUENCE; + args->converter->toUCallbackReason = UCNV_IRREGULAR; + args->converter->toULength = (int8_t)(toULengthBefore + (mySource - mySourceBefore)); + } + } + + /* invalid or illegal escape sequence */ + if(U_FAILURE(*err)){ + args->target = myTarget; + args->source = mySource; + myData->isEmptySegment = FALSE; /* Reset to avoid future spurious errors */ + return; + } + /* If we successfully completed an escape sequence, we begin a new segment, empty so far */ + if(myData->key==0) { + myData->isEmptySegment = TRUE; + } + continue; + + /* ISO-2022-JP does not use single-byte (C1) SS2 and SS3 */ + + case CR: + case LF: + /* automatically reset to single-byte mode */ + if((StateEnum)pToU2022State->cs[0] != ASCII && (StateEnum)pToU2022State->cs[0] != JISX201) { + pToU2022State->cs[0] = (int8_t)ASCII; + } + pToU2022State->cs[2] = 0; + pToU2022State->g = 0; + U_FALLTHROUGH; + default: + /* convert one or two bytes */ + myData->isEmptySegment = FALSE; + cs = (StateEnum)pToU2022State->cs[pToU2022State->g]; + if( (uint8_t)(mySourceChar - 0xa1) <= (0xdf - 0xa1) && myData->version==4 && + !IS_JP_DBCS(cs) + ) { + /* 8-bit halfwidth katakana in any single-byte mode for JIS8 */ + targetUniChar = mySourceChar + (HWKANA_START - 0xa1); + + /* return from a single-shift state to the previous one */ + if(pToU2022State->g >= 2) { + pToU2022State->g=pToU2022State->prevG; + } + } else switch(cs) { + case ASCII: + if(mySourceChar <= 0x7f) { + targetUniChar = mySourceChar; + } + break; + case ISO8859_1: + if(mySourceChar <= 0x7f) { + targetUniChar = mySourceChar + 0x80; + } + /* return from a single-shift state to the previous one */ + pToU2022State->g=pToU2022State->prevG; + break; + case ISO8859_7: + if(mySourceChar <= 0x7f) { + /* convert mySourceChar+0x80 to use a normal 8-bit table */ + targetUniChar = + _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP( + myData->myConverterArray[cs], + mySourceChar + 0x80); + } + /* return from a single-shift state to the previous one */ + pToU2022State->g=pToU2022State->prevG; + break; + case JISX201: + if(mySourceChar <= 0x7f) { + targetUniChar = jisx201ToU(mySourceChar); + } + break; + case HWKANA_7BIT: + if((uint8_t)(mySourceChar - 0x21) <= (0x5f - 0x21)) { + /* 7-bit halfwidth Katakana */ + targetUniChar = mySourceChar + (HWKANA_START - 0x21); + } + break; + default: + /* G0 DBCS */ + if(mySource < mySourceLimit) { + int leadIsOk, trailIsOk; + uint8_t trailByte; +getTrailByte: + trailByte = (uint8_t)*mySource; + /* + * Ticket 5691: consistent illegal sequences: + * - We include at least the first byte in the illegal sequence. + * - If any of the non-initial bytes could be the start of a character, + * we stop the illegal sequence before the first one of those. + * + * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is + * an ESC/SO/SI, we report only the first byte as the illegal sequence. + * Otherwise we convert or report the pair of bytes. + */ + leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21); + trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21); + if (leadIsOk && trailIsOk) { + ++mySource; + tmpSourceChar = (mySourceChar << 8) | trailByte; + if(cs == JISX208) { + _2022ToSJIS((uint8_t)mySourceChar, trailByte, tempBuf); + mySourceChar = tmpSourceChar; + } else { + /* Copy before we modify tmpSourceChar so toUnicodeCallback() sees the correct bytes. */ + mySourceChar = tmpSourceChar; + if (cs == KSC5601) { + tmpSourceChar += 0x8080; /* = _2022ToGR94DBCS(tmpSourceChar) */ + } + tempBuf[0] = (char)(tmpSourceChar >> 8); + tempBuf[1] = (char)(tmpSourceChar); + } + targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->myConverterArray[cs], tempBuf, 2, FALSE); + } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) { + /* report a pair of illegal bytes if the second byte is not a DBCS starter */ + ++mySource; + /* add another bit so that the code below writes 2 bytes in case of error */ + mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte; + } + } else { + args->converter->toUBytes[0] = (uint8_t)mySourceChar; + args->converter->toULength = 1; + goto endloop; + } + } /* End of inner switch */ + break; + } /* End of outer switch */ + if(targetUniChar < (missingCharMarker-1/*0xfffe*/)){ + if(args->offsets){ + args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2)); + } + *(myTarget++)=(UChar)targetUniChar; + } + else if(targetUniChar > missingCharMarker){ + /* disassemble the surrogate pair and write to output*/ + targetUniChar-=0x0010000; + *myTarget = (UChar)(0xd800+(UChar)(targetUniChar>>10)); + if(args->offsets){ + args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2)); + } + ++myTarget; + if(myTarget< args->targetLimit){ + *myTarget = (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff)); + if(args->offsets){ + args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2)); + } + ++myTarget; + }else{ + args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= + (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff)); + } + + } + else{ + /* Call the callback function*/ + toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err); + break; + } + } + else{ /* goes with "if(myTarget < args->targetLimit)" way up near top of function */ + *err =U_BUFFER_OVERFLOW_ERROR; + break; + } + } +endloop: + args->target = myTarget; + args->source = mySource; +} + + +#if !UCONFIG_ONLY_HTML_CONVERSION +/*************************************************************** +* Rules for ISO-2022-KR encoding +* i) The KSC5601 designator sequence should appear only once in a file, +* at the begining of a line before any KSC5601 characters. This usually +* means that it appears by itself on the first line of the file +* ii) There are only 2 shifting sequences SO to shift into double byte mode +* and SI to shift into single byte mode +*/ +static void U_CALLCONV +UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(UConverterFromUnicodeArgs* args, UErrorCode* err){ + + UConverter* saveConv = args->converter; + UConverterDataISO2022 *myConverterData=(UConverterDataISO2022*)saveConv->extraInfo; + args->converter=myConverterData->currentConverter; + + myConverterData->currentConverter->fromUChar32 = saveConv->fromUChar32; + ucnv_MBCSFromUnicodeWithOffsets(args,err); + saveConv->fromUChar32 = myConverterData->currentConverter->fromUChar32; + + if(*err == U_BUFFER_OVERFLOW_ERROR) { + if(myConverterData->currentConverter->charErrorBufferLength > 0) { + uprv_memcpy( + saveConv->charErrorBuffer, + myConverterData->currentConverter->charErrorBuffer, + myConverterData->currentConverter->charErrorBufferLength); + } + saveConv->charErrorBufferLength = myConverterData->currentConverter->charErrorBufferLength; + myConverterData->currentConverter->charErrorBufferLength = 0; + } + args->converter=saveConv; +} + +static void U_CALLCONV +UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err){ + + const UChar *source = args->source; + const UChar *sourceLimit = args->sourceLimit; + unsigned char *target = (unsigned char *) args->target; + unsigned char *targetLimit = (unsigned char *) args->targetLimit; + int32_t* offsets = args->offsets; + uint32_t targetByteUnit = 0x0000; + UChar32 sourceChar = 0x0000; + UBool isTargetByteDBCS; + UBool oldIsTargetByteDBCS; + UConverterDataISO2022 *converterData; + UConverterSharedData* sharedData; + UBool useFallback; + int32_t length =0; + + converterData=(UConverterDataISO2022*)args->converter->extraInfo; + /* if the version is 1 then the user is requesting + * conversion with ibm-25546 pass the arguments to + * MBCS converter and return + */ + if(converterData->version==1){ + UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(args,err); + return; + } + + /* initialize data */ + sharedData = converterData->currentConverter->sharedData; + useFallback = args->converter->useFallback; + isTargetByteDBCS=(UBool)args->converter->fromUnicodeStatus; + oldIsTargetByteDBCS = isTargetByteDBCS; + + isTargetByteDBCS = (UBool) args->converter->fromUnicodeStatus; + if((sourceChar = args->converter->fromUChar32)!=0 && target <targetLimit) { + goto getTrail; + } + while(source < sourceLimit){ + + targetByteUnit = missingCharMarker; + + if(target < (unsigned char*) args->targetLimit){ + sourceChar = *source++; + + /* do not convert SO/SI/ESC */ + if(IS_2022_CONTROL(sourceChar)) { + /* callback(illegal) */ + *err=U_ILLEGAL_CHAR_FOUND; + args->converter->fromUChar32=sourceChar; + break; + } + + length = MBCS_FROM_UCHAR32_ISO2022(sharedData,sourceChar,&targetByteUnit,useFallback,MBCS_OUTPUT_2); + if(length < 0) { + length = -length; /* fallback */ + } + /* only DBCS or SBCS characters are expected*/ + /* DB characters with high bit set to 1 are expected */ + if( length > 2 || length==0 || + (length == 1 && targetByteUnit > 0x7f) || + (length == 2 && + ((uint16_t)(targetByteUnit - 0xa1a1) > (0xfefe - 0xa1a1) || + (uint8_t)(targetByteUnit - 0xa1) > (0xfe - 0xa1))) + ) { + targetByteUnit=missingCharMarker; + } + if (targetByteUnit != missingCharMarker){ + + oldIsTargetByteDBCS = isTargetByteDBCS; + isTargetByteDBCS = (UBool)(targetByteUnit>0x00FF); + /* append the shift sequence */ + if (oldIsTargetByteDBCS != isTargetByteDBCS ){ + + if (isTargetByteDBCS) + *target++ = UCNV_SO; + else + *target++ = UCNV_SI; + if(offsets) + *(offsets++) = (int32_t)(source - args->source-1); + } + /* write the targetUniChar to target */ + if(targetByteUnit <= 0x00FF){ + if( target < targetLimit){ + *(target++) = (unsigned char) targetByteUnit; + if(offsets){ + *(offsets++) = (int32_t)(source - args->source-1); + } + + }else{ + args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit); + *err = U_BUFFER_OVERFLOW_ERROR; + } + }else{ + if(target < targetLimit){ + *(target++) =(unsigned char) ((targetByteUnit>>8) -0x80); + if(offsets){ + *(offsets++) = (int32_t)(source - args->source-1); + } + if(target < targetLimit){ + *(target++) =(unsigned char) (targetByteUnit -0x80); + if(offsets){ + *(offsets++) = (int32_t)(source - args->source-1); + } + }else{ + args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit -0x80); + *err = U_BUFFER_OVERFLOW_ERROR; + } + }else{ + args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) ((targetByteUnit>>8) -0x80); + args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (unsigned char) (targetByteUnit-0x80); + *err = U_BUFFER_OVERFLOW_ERROR; + } + } + + } + else{ + /* oops.. the code point is unassingned + * set the error and reason + */ + + /*check if the char is a First surrogate*/ + if(U16_IS_SURROGATE(sourceChar)) { + if(U16_IS_SURROGATE_LEAD(sourceChar)) { +getTrail: + /*look ahead to find the trail surrogate*/ + if(source < sourceLimit) { + /* test the following code unit */ + UChar trail=(UChar) *source; + if(U16_IS_TRAIL(trail)) { + source++; + sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail); + *err = U_INVALID_CHAR_FOUND; + /* convert this surrogate code point */ + /* exit this condition tree */ + } else { + /* this is an unmatched lead code unit (1st surrogate) */ + /* callback(illegal) */ + *err=U_ILLEGAL_CHAR_FOUND; + } + } else { + /* no more input */ + *err = U_ZERO_ERROR; + } + } else { + /* this is an unmatched trail code unit (2nd surrogate) */ + /* callback(illegal) */ + *err=U_ILLEGAL_CHAR_FOUND; + } + } else { + /* callback(unassigned) for a BMP code point */ + *err = U_INVALID_CHAR_FOUND; + } + + args->converter->fromUChar32=sourceChar; + break; + } + } /* end if(myTargetIndex<myTargetLength) */ + else{ + *err =U_BUFFER_OVERFLOW_ERROR; + break; + } + + }/* end while(mySourceIndex<mySourceLength) */ + + /* + * the end of the input stream and detection of truncated input + * are handled by the framework, but for ISO-2022-KR conversion + * we need to be in ASCII mode at the very end + * + * conditions: + * successful + * not in ASCII mode + * end of input and no truncated input + */ + if( U_SUCCESS(*err) && + isTargetByteDBCS && + args->flush && source>=sourceLimit && args->converter->fromUChar32==0 + ) { + int32_t sourceIndex; + + /* we are switching to ASCII */ + isTargetByteDBCS=FALSE; + + /* get the source index of the last input character */ + /* + * TODO this would be simpler and more reliable if we used a pair + * of sourceIndex/prevSourceIndex like in ucnvmbcs.c + * so that we could simply use the prevSourceIndex here; + * this code gives an incorrect result for the rare case of an unmatched + * trail surrogate that is alone in the last buffer of the text stream + */ + sourceIndex=(int32_t)(source-args->source); + if(sourceIndex>0) { + --sourceIndex; + if( U16_IS_TRAIL(args->source[sourceIndex]) && + (sourceIndex==0 || U16_IS_LEAD(args->source[sourceIndex-1])) + ) { + --sourceIndex; + } + } else { + sourceIndex=-1; + } + + fromUWriteUInt8( + args->converter, + SHIFT_IN_STR, 1, + &target, (const char *)targetLimit, + &offsets, sourceIndex, + err); + } + + /*save the state and return */ + args->source = source; + args->target = (char*)target; + args->converter->fromUnicodeStatus = (uint32_t)isTargetByteDBCS; +} + +/************************ To Unicode ***************************************/ + +static void U_CALLCONV +UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(UConverterToUnicodeArgs *args, + UErrorCode* err){ + char const* sourceStart; + UConverterDataISO2022* myData=(UConverterDataISO2022*)(args->converter->extraInfo); + + UConverterToUnicodeArgs subArgs; + int32_t minArgsSize; + + /* set up the subconverter arguments */ + if(args->size<sizeof(UConverterToUnicodeArgs)) { + minArgsSize = args->size; + } else { + minArgsSize = (int32_t)sizeof(UConverterToUnicodeArgs); + } + + uprv_memcpy(&subArgs, args, minArgsSize); + subArgs.size = (uint16_t)minArgsSize; + subArgs.converter = myData->currentConverter; + + /* remember the original start of the input for offsets */ + sourceStart = args->source; + + if(myData->key != 0) { + /* continue with a partial escape sequence */ + goto escape; + } + + while(U_SUCCESS(*err) && args->source < args->sourceLimit) { + /*Find the end of the buffer e.g : Next Escape Seq | end of Buffer*/ + subArgs.source = args->source; + subArgs.sourceLimit = getEndOfBuffer_2022(&(args->source), args->sourceLimit, args->flush); + if(subArgs.source != subArgs.sourceLimit) { + /* + * get the current partial byte sequence + * + * it needs to be moved between the public and the subconverter + * so that the conversion framework, which only sees the public + * converter, can handle truncated and illegal input etc. + */ + if(args->converter->toULength > 0) { + uprv_memcpy(subArgs.converter->toUBytes, args->converter->toUBytes, args->converter->toULength); + } + subArgs.converter->toULength = args->converter->toULength; + + /* + * Convert up to the end of the input, or to before the next escape character. + * Does not handle conversion extensions because the preToU[] state etc. + * is not copied. + */ + ucnv_MBCSToUnicodeWithOffsets(&subArgs, err); + + if(args->offsets != NULL && sourceStart != args->source) { + /* update offsets to base them on the actual start of the input */ + int32_t *offsets = args->offsets; + UChar *target = args->target; + int32_t delta = (int32_t)(args->source - sourceStart); + while(target < subArgs.target) { + if(*offsets >= 0) { + *offsets += delta; + } + ++offsets; + ++target; + } + } + args->source = subArgs.source; + args->target = subArgs.target; + args->offsets = subArgs.offsets; + + /* copy input/error/overflow buffers */ + if(subArgs.converter->toULength > 0) { + uprv_memcpy(args->converter->toUBytes, subArgs.converter->toUBytes, subArgs.converter->toULength); + } + args->converter->toULength = subArgs.converter->toULength; + + if(*err == U_BUFFER_OVERFLOW_ERROR) { + if(subArgs.converter->UCharErrorBufferLength > 0) { + uprv_memcpy(args->converter->UCharErrorBuffer, subArgs.converter->UCharErrorBuffer, + subArgs.converter->UCharErrorBufferLength); + } + args->converter->UCharErrorBufferLength=subArgs.converter->UCharErrorBufferLength; + subArgs.converter->UCharErrorBufferLength = 0; + } + } + + if (U_FAILURE(*err) || (args->source == args->sourceLimit)) { + return; + } + +escape: + changeState_2022(args->converter, + &(args->source), + args->sourceLimit, + ISO_2022_KR, + err); + } +} + +static void U_CALLCONV +UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterToUnicodeArgs *args, + UErrorCode* err){ + char tempBuf[2]; + const char *mySource = ( char *) args->source; + UChar *myTarget = args->target; + const char *mySourceLimit = args->sourceLimit; + UChar32 targetUniChar = 0x0000; + UChar mySourceChar = 0x0000; + UConverterDataISO2022* myData; + UConverterSharedData* sharedData ; + UBool useFallback; + + myData=(UConverterDataISO2022*)(args->converter->extraInfo); + if(myData->version==1){ + UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC_IBM(args,err); + return; + } + + /* initialize state */ + sharedData = myData->currentConverter->sharedData; + useFallback = args->converter->useFallback; + + if(myData->key != 0) { + /* continue with a partial escape sequence */ + goto escape; + } else if(args->converter->toULength == 1 && mySource < mySourceLimit && myTarget < args->targetLimit) { + /* continue with a partial double-byte character */ + mySourceChar = args->converter->toUBytes[0]; + args->converter->toULength = 0; + goto getTrailByte; + } + + while(mySource< mySourceLimit){ + + if(myTarget < args->targetLimit){ + + mySourceChar= (unsigned char) *mySource++; + + if(mySourceChar==UCNV_SI){ + myData->toU2022State.g = 0; + if (myData->isEmptySegment) { + myData->isEmptySegment = FALSE; /* we are handling it, reset to avoid future spurious errors */ + *err = U_ILLEGAL_ESCAPE_SEQUENCE; + args->converter->toUCallbackReason = UCNV_IRREGULAR; + args->converter->toUBytes[0] = (uint8_t)mySourceChar; + args->converter->toULength = 1; + args->target = myTarget; + args->source = mySource; + return; + } + /*consume the source */ + continue; + }else if(mySourceChar==UCNV_SO){ + myData->toU2022State.g = 1; + myData->isEmptySegment = TRUE; /* Begin a new segment, empty so far */ + /*consume the source */ + continue; + }else if(mySourceChar==ESC_2022){ + mySource--; +escape: + myData->isEmptySegment = FALSE; /* Any invalid ESC sequences will be detected separately, so just reset this */ + changeState_2022(args->converter,&(mySource), + mySourceLimit, ISO_2022_KR, err); + if(U_FAILURE(*err)){ + args->target = myTarget; + args->source = mySource; + return; + } + continue; + } + + myData->isEmptySegment = FALSE; /* Any invalid char errors will be detected separately, so just reset this */ + if(myData->toU2022State.g == 1) { + if(mySource < mySourceLimit) { + int leadIsOk, trailIsOk; + uint8_t trailByte; +getTrailByte: + targetUniChar = missingCharMarker; + trailByte = (uint8_t)*mySource; + /* + * Ticket 5691: consistent illegal sequences: + * - We include at least the first byte in the illegal sequence. + * - If any of the non-initial bytes could be the start of a character, + * we stop the illegal sequence before the first one of those. + * + * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is + * an ESC/SO/SI, we report only the first byte as the illegal sequence. + * Otherwise we convert or report the pair of bytes. + */ + leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21); + trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21); + if (leadIsOk && trailIsOk) { + ++mySource; + tempBuf[0] = (char)(mySourceChar + 0x80); + tempBuf[1] = (char)(trailByte + 0x80); + targetUniChar = ucnv_MBCSSimpleGetNextUChar(sharedData, tempBuf, 2, useFallback); + mySourceChar = (mySourceChar << 8) | trailByte; + } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) { + /* report a pair of illegal bytes if the second byte is not a DBCS starter */ + ++mySource; + /* add another bit so that the code below writes 2 bytes in case of error */ + mySourceChar = static_cast<UChar>(0x10000 | (mySourceChar << 8) | trailByte); + } + } else { + args->converter->toUBytes[0] = (uint8_t)mySourceChar; + args->converter->toULength = 1; + break; + } + } + else if(mySourceChar <= 0x7f) { + targetUniChar = ucnv_MBCSSimpleGetNextUChar(sharedData, mySource - 1, 1, useFallback); + } else { + targetUniChar = 0xffff; + } + if(targetUniChar < 0xfffe){ + if(args->offsets) { + args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2)); + } + *(myTarget++)=(UChar)targetUniChar; + } + else { + /* Call the callback function*/ + toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err); + break; + } + } + else{ + *err =U_BUFFER_OVERFLOW_ERROR; + break; + } + } + args->target = myTarget; + args->source = mySource; +} + +/*************************** END ISO2022-KR *********************************/ + +/*************************** ISO-2022-CN ********************************* +* +* Rules for ISO-2022-CN Encoding: +* i) The designator sequence must appear once on a line before any instance +* of character set it designates. +* ii) If two lines contain characters from the same character set, both lines +* must include the designator sequence. +* iii) Once the designator sequence is known, a shifting sequence has to be found +* to invoke the shifting +* iv) All lines start in ASCII and end in ASCII. +* v) Four shifting sequences are employed for this purpose: +* +* Sequcence ASCII Eq Charsets +* ---------- ------- --------- +* SI <SI> US-ASCII +* SO <SO> CNS-11643-1992 Plane 1, GB2312, ISO-IR-165 +* SS2 <ESC>N CNS-11643-1992 Plane 2 +* SS3 <ESC>O CNS-11643-1992 Planes 3-7 +* +* vi) +* SOdesignator : ESC "$" ")" finalchar_for_SO +* SS2designator : ESC "$" "*" finalchar_for_SS2 +* SS3designator : ESC "$" "+" finalchar_for_SS3 +* +* ESC $ ) A Indicates the bytes following SO are Chinese +* characters as defined in GB 2312-80, until +* another SOdesignation appears +* +* +* ESC $ ) E Indicates the bytes following SO are as defined +* in ISO-IR-165 (for details, see section 2.1), +* until another SOdesignation appears +* +* ESC $ ) G Indicates the bytes following SO are as defined +* in CNS 11643-plane-1, until another +* SOdesignation appears +* +* ESC $ * H Indicates the two bytes immediately following +* SS2 is a Chinese character as defined in CNS +* 11643-plane-2, until another SS2designation +* appears +* (Meaning <ESC>N must preceed every 2 byte +* sequence.) +* +* ESC $ + I Indicates the immediate two bytes following SS3 +* is a Chinese character as defined in CNS +* 11643-plane-3, until another SS3designation +* appears +* (Meaning <ESC>O must preceed every 2 byte +* sequence.) +* +* ESC $ + J Indicates the immediate two bytes following SS3 +* is a Chinese character as defined in CNS +* 11643-plane-4, until another SS3designation +* appears +* (In English: <ESC>O must preceed every 2 byte +* sequence.) +* +* ESC $ + K Indicates the immediate two bytes following SS3 +* is a Chinese character as defined in CNS +* 11643-plane-5, until another SS3designation +* appears +* +* ESC $ + L Indicates the immediate two bytes following SS3 +* is a Chinese character as defined in CNS +* 11643-plane-6, until another SS3designation +* appears +* +* ESC $ + M Indicates the immediate two bytes following SS3 +* is a Chinese character as defined in CNS +* 11643-plane-7, until another SS3designation +* appears +* +* As in ISO-2022-CN, each line starts in ASCII, and ends in ASCII, and +* has its own designation information before any Chinese characters +* appear +* +*/ + +/* The following are defined this way to make the strings truly readonly */ +static const char GB_2312_80_STR[] = "\x1B\x24\x29\x41"; +static const char ISO_IR_165_STR[] = "\x1B\x24\x29\x45"; +static const char CNS_11643_1992_Plane_1_STR[] = "\x1B\x24\x29\x47"; +static const char CNS_11643_1992_Plane_2_STR[] = "\x1B\x24\x2A\x48"; +static const char CNS_11643_1992_Plane_3_STR[] = "\x1B\x24\x2B\x49"; +static const char CNS_11643_1992_Plane_4_STR[] = "\x1B\x24\x2B\x4A"; +static const char CNS_11643_1992_Plane_5_STR[] = "\x1B\x24\x2B\x4B"; +static const char CNS_11643_1992_Plane_6_STR[] = "\x1B\x24\x2B\x4C"; +static const char CNS_11643_1992_Plane_7_STR[] = "\x1B\x24\x2B\x4D"; + +/********************** ISO2022-CN Data **************************/ +static const char* const escSeqCharsCN[10] ={ + SHIFT_IN_STR, /* 0 ASCII */ + GB_2312_80_STR, /* 1 GB2312_1 */ + ISO_IR_165_STR, /* 2 ISO_IR_165 */ + CNS_11643_1992_Plane_1_STR, + CNS_11643_1992_Plane_2_STR, + CNS_11643_1992_Plane_3_STR, + CNS_11643_1992_Plane_4_STR, + CNS_11643_1992_Plane_5_STR, + CNS_11643_1992_Plane_6_STR, + CNS_11643_1992_Plane_7_STR +}; + +static void U_CALLCONV +UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args, UErrorCode* err){ + UConverter *cnv = args->converter; + UConverterDataISO2022 *converterData; + ISO2022State *pFromU2022State; + uint8_t *target = (uint8_t *) args->target; + const uint8_t *targetLimit = (const uint8_t *) args->targetLimit; + const UChar* source = args->source; + const UChar* sourceLimit = args->sourceLimit; + int32_t* offsets = args->offsets; + UChar32 sourceChar; + char buffer[8]; + int32_t len; + int8_t choices[3]; + int32_t choiceCount; + uint32_t targetValue = 0; + UBool useFallback; + + /* set up the state */ + converterData = (UConverterDataISO2022*)cnv->extraInfo; + pFromU2022State = &converterData->fromU2022State; + + choiceCount = 0; + + /* check if the last codepoint of previous buffer was a lead surrogate*/ + if((sourceChar = cnv->fromUChar32)!=0 && target< targetLimit) { + goto getTrail; + } + + while( source < sourceLimit){ + if(target < targetLimit){ + + sourceChar = *(source++); + /*check if the char is a First surrogate*/ + if(U16_IS_SURROGATE(sourceChar)) { + if(U16_IS_SURROGATE_LEAD(sourceChar)) { +getTrail: + /*look ahead to find the trail surrogate*/ + if(source < sourceLimit) { + /* test the following code unit */ + UChar trail=(UChar) *source; + if(U16_IS_TRAIL(trail)) { + source++; + sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail); + cnv->fromUChar32=0x00; + /* convert this supplementary code point */ + /* exit this condition tree */ + } else { + /* this is an unmatched lead code unit (1st surrogate) */ + /* callback(illegal) */ + *err=U_ILLEGAL_CHAR_FOUND; + cnv->fromUChar32=sourceChar; + break; + } + } else { + /* no more input */ + cnv->fromUChar32=sourceChar; + break; + } + } else { + /* this is an unmatched trail code unit (2nd surrogate) */ + /* callback(illegal) */ + *err=U_ILLEGAL_CHAR_FOUND; + cnv->fromUChar32=sourceChar; + break; + } + } + + /* do the conversion */ + if(sourceChar <= 0x007f ){ + /* do not convert SO/SI/ESC */ + if(IS_2022_CONTROL(sourceChar)) { + /* callback(illegal) */ + *err=U_ILLEGAL_CHAR_FOUND; + cnv->fromUChar32=sourceChar; + break; + } + + /* US-ASCII */ + if(pFromU2022State->g == 0) { + buffer[0] = (char)sourceChar; + len = 1; + } else { + buffer[0] = UCNV_SI; + buffer[1] = (char)sourceChar; + len = 2; + pFromU2022State->g = 0; + choiceCount = 0; + } + if(sourceChar == CR || sourceChar == LF) { + /* reset the state at the end of a line */ + uprv_memset(pFromU2022State, 0, sizeof(ISO2022State)); + choiceCount = 0; + } + } + else{ + /* convert U+0080..U+10ffff */ + int32_t i; + int8_t cs, g; + + if(choiceCount == 0) { + /* try the current SO/G1 converter first */ + choices[0] = pFromU2022State->cs[1]; + + /* default to GB2312_1 if none is designated yet */ + if(choices[0] == 0) { + choices[0] = GB2312_1; + } + + if(converterData->version == 0) { + /* ISO-2022-CN */ + + /* try the other SO/G1 converter; a CNS_11643_1 lookup may result in any plane */ + if(choices[0] == GB2312_1) { + choices[1] = (int8_t)CNS_11643_1; + } else { + choices[1] = (int8_t)GB2312_1; + } + + choiceCount = 2; + } else if (converterData->version == 1) { + /* ISO-2022-CN-EXT */ + + /* try one of the other converters */ + switch(choices[0]) { + case GB2312_1: + choices[1] = (int8_t)CNS_11643_1; + choices[2] = (int8_t)ISO_IR_165; + break; + case ISO_IR_165: + choices[1] = (int8_t)GB2312_1; + choices[2] = (int8_t)CNS_11643_1; + break; + default: /* CNS_11643_x */ + choices[1] = (int8_t)GB2312_1; + choices[2] = (int8_t)ISO_IR_165; + break; + } + + choiceCount = 3; + } else { + choices[0] = (int8_t)CNS_11643_1; + choices[1] = (int8_t)GB2312_1; + } + } + + cs = g = 0; + /* + * len==0: no mapping found yet + * len<0: found a fallback result: continue looking for a roundtrip but no further fallbacks + * len>0: found a roundtrip result, done + */ + len = 0; + /* + * We will turn off useFallback after finding a fallback, + * but we still get fallbacks from PUA code points as usual. + * Therefore, we will also need to check that we don't overwrite + * an early fallback with a later one. + */ + useFallback = cnv->useFallback; + + for(i = 0; i < choiceCount && len <= 0; ++i) { + int8_t cs0 = choices[i]; + if(cs0 > 0) { + uint32_t value; + int32_t len2; + if(cs0 >= CNS_11643_0) { + len2 = MBCS_FROM_UCHAR32_ISO2022( + converterData->myConverterArray[CNS_11643], + sourceChar, + &value, + useFallback, + MBCS_OUTPUT_3); + if(len2 == 3 || (len2 == -3 && len == 0)) { + targetValue = value; + cs = (int8_t)(CNS_11643_0 + (value >> 16) - 0x80); + if(len2 >= 0) { + len = 2; + } else { + len = -2; + useFallback = FALSE; + } + if(cs == CNS_11643_1) { + g = 1; + } else if(cs == CNS_11643_2) { + g = 2; + } else /* plane 3..7 */ if(converterData->version == 1) { + g = 3; + } else { + /* ISO-2022-CN (without -EXT) does not support plane 3..7 */ + len = 0; + } + } + } else { + /* GB2312_1 or ISO-IR-165 */ + U_ASSERT(cs0<UCNV_2022_MAX_CONVERTERS); + len2 = MBCS_FROM_UCHAR32_ISO2022( + converterData->myConverterArray[cs0], + sourceChar, + &value, + useFallback, + MBCS_OUTPUT_2); + if(len2 == 2 || (len2 == -2 && len == 0)) { + targetValue = value; + len = len2; + cs = cs0; + g = 1; + useFallback = FALSE; + } + } + } + } + + if(len != 0) { + len = 0; /* count output bytes; it must have been abs(len) == 2 */ + + /* write the designation sequence if necessary */ + if(cs != pFromU2022State->cs[g]) { + if(cs < CNS_11643) { + uprv_memcpy(buffer, escSeqCharsCN[cs], 4); + } else { + U_ASSERT(cs >= CNS_11643_1); + uprv_memcpy(buffer, escSeqCharsCN[CNS_11643 + (cs - CNS_11643_1)], 4); + } + len = 4; + pFromU2022State->cs[g] = cs; + if(g == 1) { + /* changing the SO/G1 charset invalidates the choices[] */ + choiceCount = 0; + } + } + + /* write the shift sequence if necessary */ + if(g != pFromU2022State->g) { + switch(g) { + case 1: + buffer[len++] = UCNV_SO; + + /* set the new state only if it is the locking shift SO/G1, not for SS2 or SS3 */ + pFromU2022State->g = 1; + break; + case 2: + buffer[len++] = 0x1b; + buffer[len++] = 0x4e; + break; + default: /* case 3 */ + buffer[len++] = 0x1b; + buffer[len++] = 0x4f; + break; + } + } + + /* write the two output bytes */ + buffer[len++] = (char)(targetValue >> 8); + buffer[len++] = (char)targetValue; + } else { + /* if we cannot find the character after checking all codepages + * then this is an error + */ + *err = U_INVALID_CHAR_FOUND; + cnv->fromUChar32=sourceChar; + break; + } + } + + /* output len>0 bytes in buffer[] */ + if(len == 1) { + *target++ = buffer[0]; + if(offsets) { + *offsets++ = (int32_t)(source - args->source - 1); /* -1: known to be ASCII */ + } + } else if(len == 2 && (target + 2) <= targetLimit) { + *target++ = buffer[0]; + *target++ = buffer[1]; + if(offsets) { + int32_t sourceIndex = (int32_t)(source - args->source - U16_LENGTH(sourceChar)); + *offsets++ = sourceIndex; + *offsets++ = sourceIndex; + } + } else { + fromUWriteUInt8( + cnv, + buffer, len, + &target, (const char *)targetLimit, + &offsets, (int32_t)(source - args->source - U16_LENGTH(sourceChar)), + err); + if(U_FAILURE(*err)) { + break; + } + } + } /* end if(myTargetIndex<myTargetLength) */ + else{ + *err =U_BUFFER_OVERFLOW_ERROR; + break; + } + + }/* end while(mySourceIndex<mySourceLength) */ + + /* + * the end of the input stream and detection of truncated input + * are handled by the framework, but for ISO-2022-CN conversion + * we need to be in ASCII mode at the very end + * + * conditions: + * successful + * not in ASCII mode + * end of input and no truncated input + */ + if( U_SUCCESS(*err) && + pFromU2022State->g!=0 && + args->flush && source>=sourceLimit && cnv->fromUChar32==0 + ) { + int32_t sourceIndex; + + /* we are switching to ASCII */ + pFromU2022State->g=0; + + /* get the source index of the last input character */ + /* + * TODO this would be simpler and more reliable if we used a pair + * of sourceIndex/prevSourceIndex like in ucnvmbcs.c + * so that we could simply use the prevSourceIndex here; + * this code gives an incorrect result for the rare case of an unmatched + * trail surrogate that is alone in the last buffer of the text stream + */ + sourceIndex=(int32_t)(source-args->source); + if(sourceIndex>0) { + --sourceIndex; + if( U16_IS_TRAIL(args->source[sourceIndex]) && + (sourceIndex==0 || U16_IS_LEAD(args->source[sourceIndex-1])) + ) { + --sourceIndex; + } + } else { + sourceIndex=-1; + } + + fromUWriteUInt8( + cnv, + SHIFT_IN_STR, 1, + &target, (const char *)targetLimit, + &offsets, sourceIndex, + err); + } + + /*save the state and return */ + args->source = source; + args->target = (char*)target; +} + + +static void U_CALLCONV +UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterToUnicodeArgs *args, + UErrorCode* err){ + char tempBuf[3]; + const char *mySource = (char *) args->source; + UChar *myTarget = args->target; + const char *mySourceLimit = args->sourceLimit; + uint32_t targetUniChar = 0x0000; + uint32_t mySourceChar = 0x0000; + UConverterDataISO2022* myData; + ISO2022State *pToU2022State; + + myData=(UConverterDataISO2022*)(args->converter->extraInfo); + pToU2022State = &myData->toU2022State; + + if(myData->key != 0) { + /* continue with a partial escape sequence */ + goto escape; + } else if(args->converter->toULength == 1 && mySource < mySourceLimit && myTarget < args->targetLimit) { + /* continue with a partial double-byte character */ + mySourceChar = args->converter->toUBytes[0]; + args->converter->toULength = 0; + targetUniChar = missingCharMarker; + goto getTrailByte; + } + + while(mySource < mySourceLimit){ + + targetUniChar =missingCharMarker; + + if(myTarget < args->targetLimit){ + + mySourceChar= (unsigned char) *mySource++; + + switch(mySourceChar){ + case UCNV_SI: + pToU2022State->g=0; + if (myData->isEmptySegment) { + myData->isEmptySegment = FALSE; /* we are handling it, reset to avoid future spurious errors */ + *err = U_ILLEGAL_ESCAPE_SEQUENCE; + args->converter->toUCallbackReason = UCNV_IRREGULAR; + args->converter->toUBytes[0] = static_cast<uint8_t>(mySourceChar); + args->converter->toULength = 1; + args->target = myTarget; + args->source = mySource; + return; + } + continue; + + case UCNV_SO: + if(pToU2022State->cs[1] != 0) { + pToU2022State->g=1; + myData->isEmptySegment = TRUE; /* Begin a new segment, empty so far */ + continue; + } else { + /* illegal to have SO before a matching designator */ + myData->isEmptySegment = FALSE; /* Handling a different error, reset this to avoid future spurious errs */ + break; + } + + case ESC_2022: + mySource--; +escape: + { + const char * mySourceBefore = mySource; + int8_t toULengthBefore = args->converter->toULength; + + changeState_2022(args->converter,&(mySource), + mySourceLimit, ISO_2022_CN,err); + + /* After SO there must be at least one character before a designator (designator error handled separately) */ + if(myData->key==0 && U_SUCCESS(*err) && myData->isEmptySegment) { + *err = U_ILLEGAL_ESCAPE_SEQUENCE; + args->converter->toUCallbackReason = UCNV_IRREGULAR; + args->converter->toULength = (int8_t)(toULengthBefore + (mySource - mySourceBefore)); + } + } + + /* invalid or illegal escape sequence */ + if(U_FAILURE(*err)){ + args->target = myTarget; + args->source = mySource; + myData->isEmptySegment = FALSE; /* Reset to avoid future spurious errors */ + return; + } + continue; + + /* ISO-2022-CN does not use single-byte (C1) SS2 and SS3 */ + + case CR: + case LF: + uprv_memset(pToU2022State, 0, sizeof(ISO2022State)); + U_FALLTHROUGH; + default: + /* convert one or two bytes */ + myData->isEmptySegment = FALSE; + if(pToU2022State->g != 0) { + if(mySource < mySourceLimit) { + UConverterSharedData *cnv; + StateEnum tempState; + int32_t tempBufLen; + int leadIsOk, trailIsOk; + uint8_t trailByte; +getTrailByte: + trailByte = (uint8_t)*mySource; + /* + * Ticket 5691: consistent illegal sequences: + * - We include at least the first byte in the illegal sequence. + * - If any of the non-initial bytes could be the start of a character, + * we stop the illegal sequence before the first one of those. + * + * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is + * an ESC/SO/SI, we report only the first byte as the illegal sequence. + * Otherwise we convert or report the pair of bytes. + */ + leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21); + trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21); + if (leadIsOk && trailIsOk) { + ++mySource; + tempState = (StateEnum)pToU2022State->cs[pToU2022State->g]; + if(tempState >= CNS_11643_0) { + cnv = myData->myConverterArray[CNS_11643]; + tempBuf[0] = (char) (0x80+(tempState-CNS_11643_0)); + tempBuf[1] = (char) (mySourceChar); + tempBuf[2] = (char) trailByte; + tempBufLen = 3; + + }else{ + U_ASSERT(tempState<UCNV_2022_MAX_CONVERTERS); + cnv = myData->myConverterArray[tempState]; + tempBuf[0] = (char) (mySourceChar); + tempBuf[1] = (char) trailByte; + tempBufLen = 2; + } + targetUniChar = ucnv_MBCSSimpleGetNextUChar(cnv, tempBuf, tempBufLen, FALSE); + mySourceChar = (mySourceChar << 8) | trailByte; + } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) { + /* report a pair of illegal bytes if the second byte is not a DBCS starter */ + ++mySource; + /* add another bit so that the code below writes 2 bytes in case of error */ + mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte; + } + if(pToU2022State->g>=2) { + /* return from a single-shift state to the previous one */ + pToU2022State->g=pToU2022State->prevG; + } + } else { + args->converter->toUBytes[0] = (uint8_t)mySourceChar; + args->converter->toULength = 1; + goto endloop; + } + } + else{ + if(mySourceChar <= 0x7f) { + targetUniChar = (UChar) mySourceChar; + } + } + break; + } + if(targetUniChar < (missingCharMarker-1/*0xfffe*/)){ + if(args->offsets){ + args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2)); + } + *(myTarget++)=(UChar)targetUniChar; + } + else if(targetUniChar > missingCharMarker){ + /* disassemble the surrogate pair and write to output*/ + targetUniChar-=0x0010000; + *myTarget = (UChar)(0xd800+(UChar)(targetUniChar>>10)); + if(args->offsets){ + args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2)); + } + ++myTarget; + if(myTarget< args->targetLimit){ + *myTarget = (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff)); + if(args->offsets){ + args->offsets[myTarget - args->target] = (int32_t)(mySource - args->source - (mySourceChar <= 0xff ? 1 : 2)); + } + ++myTarget; + }else{ + args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= + (UChar)(0xdc00+(UChar)(targetUniChar&0x3ff)); + } + + } + else{ + /* Call the callback function*/ + toUnicodeCallback(args->converter,mySourceChar,targetUniChar,err); + break; + } + } + else{ + *err =U_BUFFER_OVERFLOW_ERROR; + break; + } + } +endloop: + args->target = myTarget; + args->source = mySource; +} +#endif /* #if !UCONFIG_ONLY_HTML_CONVERSION */ + +static void U_CALLCONV +_ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err) { + UConverter *cnv = args->converter; + UConverterDataISO2022 *myConverterData=(UConverterDataISO2022 *) cnv->extraInfo; + ISO2022State *pFromU2022State=&myConverterData->fromU2022State; + char *p, *subchar; + char buffer[8]; + int32_t length; + + subchar=(char *)cnv->subChars; + length=cnv->subCharLen; /* assume length==1 for most variants */ + + p = buffer; + switch(myConverterData->locale[0]){ + case 'j': + { + int8_t cs; + + if(pFromU2022State->g == 1) { + /* JIS7: switch from G1 to G0 */ + pFromU2022State->g = 0; + *p++ = UCNV_SI; + } + + cs = pFromU2022State->cs[0]; + if(cs != ASCII && cs != JISX201) { + /* not in ASCII or JIS X 0201: switch to ASCII */ + pFromU2022State->cs[0] = (int8_t)ASCII; + *p++ = '\x1b'; + *p++ = '\x28'; + *p++ = '\x42'; + } + + *p++ = subchar[0]; + break; + } + case 'c': + if(pFromU2022State->g != 0) { + /* not in ASCII mode: switch to ASCII */ + pFromU2022State->g = 0; + *p++ = UCNV_SI; + } + *p++ = subchar[0]; + break; + case 'k': + if(myConverterData->version == 0) { + if(length == 1) { + if(args->converter->fromUnicodeStatus) { + /* in DBCS mode: switch to SBCS */ + args->converter->fromUnicodeStatus = 0; + *p++ = UCNV_SI; + } + *p++ = subchar[0]; + } else /* length == 2*/ { + if(!args->converter->fromUnicodeStatus) { + /* in SBCS mode: switch to DBCS */ + args->converter->fromUnicodeStatus = 1; + *p++ = UCNV_SO; + } + *p++ = subchar[0]; + *p++ = subchar[1]; + } + break; + } else { + /* save the subconverter's substitution string */ + uint8_t *currentSubChars = myConverterData->currentConverter->subChars; + int8_t currentSubCharLen = myConverterData->currentConverter->subCharLen; + + /* set our substitution string into the subconverter */ + myConverterData->currentConverter->subChars = (uint8_t *)subchar; + myConverterData->currentConverter->subCharLen = (int8_t)length; + + /* let the subconverter write the subchar, set/retrieve fromUChar32 state */ + args->converter = myConverterData->currentConverter; + myConverterData->currentConverter->fromUChar32 = cnv->fromUChar32; + ucnv_cbFromUWriteSub(args, 0, err); + cnv->fromUChar32 = myConverterData->currentConverter->fromUChar32; + args->converter = cnv; + + /* restore the subconverter's substitution string */ + myConverterData->currentConverter->subChars = currentSubChars; + myConverterData->currentConverter->subCharLen = currentSubCharLen; + + if(*err == U_BUFFER_OVERFLOW_ERROR) { + if(myConverterData->currentConverter->charErrorBufferLength > 0) { + uprv_memcpy( + cnv->charErrorBuffer, + myConverterData->currentConverter->charErrorBuffer, + myConverterData->currentConverter->charErrorBufferLength); + } + cnv->charErrorBufferLength = myConverterData->currentConverter->charErrorBufferLength; + myConverterData->currentConverter->charErrorBufferLength = 0; + } + return; + } + default: + /* not expected */ + break; + } + ucnv_cbFromUWriteBytes(args, + buffer, (int32_t)(p - buffer), + offsetIndex, err); +} + +/* + * Structure for cloning an ISO 2022 converter into a single memory block. + */ +struct cloneStruct +{ + UConverter cnv; + UConverter currentConverter; + UConverterDataISO2022 mydata; +}; + + +U_CDECL_BEGIN + +static UConverter * U_CALLCONV +_ISO_2022_SafeClone( + const UConverter *cnv, + void *stackBuffer, + int32_t *pBufferSize, + UErrorCode *status) +{ + struct cloneStruct * localClone; + UConverterDataISO2022 *cnvData; + int32_t i, size; + + if (U_FAILURE(*status)){ + return nullptr; + } + + if (*pBufferSize == 0) { /* 'preflighting' request - set needed size into *pBufferSize */ + *pBufferSize = (int32_t)sizeof(struct cloneStruct); + return NULL; + } + + cnvData = (UConverterDataISO2022 *)cnv->extraInfo; + localClone = (struct cloneStruct *)stackBuffer; + + /* ucnv.c/ucnv_safeClone() copied the main UConverter already */ + + uprv_memcpy(&localClone->mydata, cnvData, sizeof(UConverterDataISO2022)); + localClone->cnv.extraInfo = &localClone->mydata; /* set pointer to extra data */ + localClone->cnv.isExtraLocal = TRUE; + + /* share the subconverters */ + + if(cnvData->currentConverter != NULL) { + size = (int32_t)sizeof(UConverter); + localClone->mydata.currentConverter = + ucnv_safeClone(cnvData->currentConverter, + &localClone->currentConverter, + &size, status); + if(U_FAILURE(*status)) { + return NULL; + } + } + + for(i=0; i<UCNV_2022_MAX_CONVERTERS; ++i) { + if(cnvData->myConverterArray[i] != NULL) { + ucnv_incrementRefCount(cnvData->myConverterArray[i]); + } + } + + return &localClone->cnv; +} + +U_CDECL_END + +static void U_CALLCONV +_ISO_2022_GetUnicodeSet(const UConverter *cnv, + const USetAdder *sa, + UConverterUnicodeSet which, + UErrorCode *pErrorCode) +{ + int32_t i; + UConverterDataISO2022* cnvData; + + if (U_FAILURE(*pErrorCode)) { + return; + } +#ifdef U_ENABLE_GENERIC_ISO_2022 + if (cnv->sharedData == &_ISO2022Data) { + /* We use UTF-8 in this case */ + sa->addRange(sa->set, 0, 0xd7FF); + sa->addRange(sa->set, 0xE000, 0x10FFFF); + return; + } +#endif + + cnvData = (UConverterDataISO2022*)cnv->extraInfo; + + /* open a set and initialize it with code points that are algorithmically round-tripped */ + switch(cnvData->locale[0]){ + case 'j': + /* include JIS X 0201 which is hardcoded */ + sa->add(sa->set, 0xa5); + sa->add(sa->set, 0x203e); + if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) { + /* include Latin-1 for some variants of JP */ + sa->addRange(sa->set, 0, 0xff); + } else { + /* include ASCII for JP */ + sa->addRange(sa->set, 0, 0x7f); + } + if(cnvData->version==3 || cnvData->version==4 || which==UCNV_ROUNDTRIP_AND_FALLBACK_SET) { + /* + * Do not test (jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT))!=0 + * because the bit is on for all JP versions although only versions 3 & 4 (JIS7 & JIS8) + * use half-width Katakana. + * This is because all ISO-2022-JP variants are lenient in that they accept (in toUnicode) + * half-width Katakana via the ESC ( I sequence. + * However, we only emit (fromUnicode) half-width Katakana according to the + * definition of each variant. + * + * When including fallbacks, + * we need to include half-width Katakana Unicode code points for all JP variants because + * JIS X 0208 has hardcoded fallbacks for them (which map to full-width Katakana). + */ + /* include half-width Katakana for JP */ + sa->addRange(sa->set, HWKANA_START, HWKANA_END); + } + break; +#if !UCONFIG_ONLY_HTML_CONVERSION + case 'c': + case 'z': + /* include ASCII for CN */ + sa->addRange(sa->set, 0, 0x7f); + break; + case 'k': + /* there is only one converter for KR, and it is not in the myConverterArray[] */ + cnvData->currentConverter->sharedData->impl->getUnicodeSet( + cnvData->currentConverter, sa, which, pErrorCode); + /* the loop over myConverterArray[] will simply not find another converter */ + break; +#endif + default: + break; + } + +#if 0 /* Replaced by ucnv_MBCSGetFilteredUnicodeSetForUnicode() until we implement ucnv_getUnicodeSet() with reverse fallbacks. */ + if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') && + cnvData->version==0 && i==CNS_11643 + ) { + /* special handling for non-EXT ISO-2022-CN: add only code points for CNS planes 1 and 2 */ + ucnv_MBCSGetUnicodeSetForBytes( + cnvData->myConverterArray[i], + sa, UCNV_ROUNDTRIP_SET, + 0, 0x81, 0x82, + pErrorCode); + } +#endif + + for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) { + UConverterSetFilter filter; + if(cnvData->myConverterArray[i]!=NULL) { + if(cnvData->locale[0]=='j' && i==JISX208) { + /* + * Only add code points that map to Shift-JIS codes + * corresponding to JIS X 0208. + */ + filter=UCNV_SET_FILTER_SJIS; +#if !UCONFIG_ONLY_HTML_CONVERSION + } else if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') && + cnvData->version==0 && i==CNS_11643) { + /* + * Version-specific for CN: + * CN version 0 does not map CNS planes 3..7 although + * they are all available in the CNS conversion table; + * CN version 1 (-EXT) does map them all. + * The two versions create different Unicode sets. + */ + filter=UCNV_SET_FILTER_2022_CN; + } else if(i==KSC5601) { + /* + * Some of the KSC 5601 tables (convrtrs.txt has this aliases on multiple tables) + * are broader than GR94. + */ + filter=UCNV_SET_FILTER_GR94DBCS; +#endif + } else { + filter=UCNV_SET_FILTER_NONE; + } + ucnv_MBCSGetFilteredUnicodeSetForUnicode(cnvData->myConverterArray[i], sa, which, filter, pErrorCode); + } + } + + /* + * ISO 2022 converters must not convert SO/SI/ESC despite what + * sub-converters do by themselves. + * Remove these characters from the set. + */ + sa->remove(sa->set, 0x0e); + sa->remove(sa->set, 0x0f); + sa->remove(sa->set, 0x1b); + + /* ISO 2022 converters do not convert C1 controls either */ + sa->removeRange(sa->set, 0x80, 0x9f); +} + +static const UConverterImpl _ISO2022Impl={ + UCNV_ISO_2022, + + NULL, + NULL, + + _ISO2022Open, + _ISO2022Close, + _ISO2022Reset, + +#ifdef U_ENABLE_GENERIC_ISO_2022 + T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC, + T_UConverter_toUnicode_ISO_2022_OFFSETS_LOGIC, + ucnv_fromUnicode_UTF8, + ucnv_fromUnicode_UTF8_OFFSETS_LOGIC, +#else + NULL, + NULL, + NULL, + NULL, +#endif + NULL, + + NULL, + _ISO2022getName, + _ISO_2022_WriteSub, + _ISO_2022_SafeClone, + _ISO_2022_GetUnicodeSet, + + NULL, + NULL +}; +static const UConverterStaticData _ISO2022StaticData={ + sizeof(UConverterStaticData), + "ISO_2022", + 2022, + UCNV_IBM, + UCNV_ISO_2022, + 1, + 3, /* max 3 bytes per UChar from UTF-8 (4 bytes from surrogate _pair_) */ + { 0x1a, 0, 0, 0 }, + 1, + FALSE, + FALSE, + 0, + 0, + { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ +}; +const UConverterSharedData _ISO2022Data= + UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISO2022StaticData, &_ISO2022Impl); + +/*************JP****************/ +static const UConverterImpl _ISO2022JPImpl={ + UCNV_ISO_2022, + + NULL, + NULL, + + _ISO2022Open, + _ISO2022Close, + _ISO2022Reset, + + UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC, + UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC, + UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC, + UConverter_fromUnicode_ISO_2022_JP_OFFSETS_LOGIC, + NULL, + + NULL, + _ISO2022getName, + _ISO_2022_WriteSub, + _ISO_2022_SafeClone, + _ISO_2022_GetUnicodeSet, + + NULL, + NULL +}; +static const UConverterStaticData _ISO2022JPStaticData={ + sizeof(UConverterStaticData), + "ISO_2022_JP", + 0, + UCNV_IBM, + UCNV_ISO_2022, + 1, + 6, /* max 6 bytes per UChar: 4-byte escape sequence + DBCS */ + { 0x1a, 0, 0, 0 }, + 1, + FALSE, + FALSE, + 0, + 0, + { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ +}; + +namespace { + +const UConverterSharedData _ISO2022JPData= + UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISO2022JPStaticData, &_ISO2022JPImpl); + +} // namespace + +#if !UCONFIG_ONLY_HTML_CONVERSION +/************* KR ***************/ +static const UConverterImpl _ISO2022KRImpl={ + UCNV_ISO_2022, + + NULL, + NULL, + + _ISO2022Open, + _ISO2022Close, + _ISO2022Reset, + + UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC, + UConverter_toUnicode_ISO_2022_KR_OFFSETS_LOGIC, + UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC, + UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC, + NULL, + + NULL, + _ISO2022getName, + _ISO_2022_WriteSub, + _ISO_2022_SafeClone, + _ISO_2022_GetUnicodeSet, + + NULL, + NULL +}; +static const UConverterStaticData _ISO2022KRStaticData={ + sizeof(UConverterStaticData), + "ISO_2022_KR", + 0, + UCNV_IBM, + UCNV_ISO_2022, + 1, + 8, /* max 8 bytes per UChar */ + { 0x1a, 0, 0, 0 }, + 1, + FALSE, + FALSE, + 0, + 0, + { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ +}; + +namespace { + +const UConverterSharedData _ISO2022KRData= + UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISO2022KRStaticData, &_ISO2022KRImpl); + +} // namespace + +/*************** CN ***************/ +static const UConverterImpl _ISO2022CNImpl={ + + UCNV_ISO_2022, + + NULL, + NULL, + + _ISO2022Open, + _ISO2022Close, + _ISO2022Reset, + + UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC, + UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC, + UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC, + UConverter_fromUnicode_ISO_2022_CN_OFFSETS_LOGIC, + NULL, + + NULL, + _ISO2022getName, + _ISO_2022_WriteSub, + _ISO_2022_SafeClone, + _ISO_2022_GetUnicodeSet, + + NULL, + NULL +}; +static const UConverterStaticData _ISO2022CNStaticData={ + sizeof(UConverterStaticData), + "ISO_2022_CN", + 0, + UCNV_IBM, + UCNV_ISO_2022, + 1, + 8, /* max 8 bytes per UChar: 4-byte CNS designator + 2 bytes for SS2/SS3 + DBCS */ + { 0x1a, 0, 0, 0 }, + 1, + FALSE, + FALSE, + 0, + 0, + { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ +}; + +namespace { + +const UConverterSharedData _ISO2022CNData= + UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISO2022CNStaticData, &_ISO2022CNImpl); + +} // namespace +#endif /* #if !UCONFIG_ONLY_HTML_CONVERSION */ + +#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ diff --git a/thirdparty/icu4c/common/ucnv_bld.cpp b/thirdparty/icu4c/common/ucnv_bld.cpp new file mode 100644 index 0000000000..0e198892f1 --- /dev/null +++ b/thirdparty/icu4c/common/ucnv_bld.cpp @@ -0,0 +1,1689 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* + ******************************************************************** + * COPYRIGHT: + * Copyright (c) 1996-2016, International Business Machines Corporation and + * others. All Rights Reserved. + ******************************************************************** + * + * ucnv_bld.cpp: + * + * Defines functions that are used in the creation/initialization/deletion + * of converters and related structures. + * uses uconv_io.h routines to access disk information + * is used by ucnv.h to implement public API create/delete/flushCache routines + * Modification History: + * + * Date Name Description + * + * 06/20/2000 helena OS/400 port changes; mostly typecast. + * 06/29/2000 helena Major rewrite of the callback interface. +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_CONVERSION + +#include "unicode/putil.h" +#include "unicode/udata.h" +#include "unicode/ucnv.h" +#include "unicode/uloc.h" +#include "mutex.h" +#include "putilimp.h" +#include "uassert.h" +#include "utracimp.h" +#include "ucnv_io.h" +#include "ucnv_bld.h" +#include "ucnvmbcs.h" +#include "ucnv_ext.h" +#include "ucnv_cnv.h" +#include "ucnv_imp.h" +#include "uhash.h" +#include "umutex.h" +#include "cstring.h" +#include "cmemory.h" +#include "ucln_cmn.h" +#include "ustr_cnv.h" + + +#if 0 +#include <stdio.h> +extern void UCNV_DEBUG_LOG(char *what, char *who, void *p, int l); +#define UCNV_DEBUG_LOG(x,y,z) UCNV_DEBUG_LOG(x,y,z,__LINE__) +#else +# define UCNV_DEBUG_LOG(x,y,z) +#endif + +static const UConverterSharedData * const +converterData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES]={ + NULL, NULL, + +#if UCONFIG_NO_LEGACY_CONVERSION + NULL, +#else + &_MBCSData, +#endif + + &_Latin1Data, + &_UTF8Data, &_UTF16BEData, &_UTF16LEData, +#if UCONFIG_ONLY_HTML_CONVERSION + NULL, NULL, +#else + &_UTF32BEData, &_UTF32LEData, +#endif + NULL, + +#if UCONFIG_NO_LEGACY_CONVERSION + NULL, +#else + &_ISO2022Data, +#endif + +#if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_ONLY_HTML_CONVERSION + NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, + NULL, +#else + &_LMBCSData1,&_LMBCSData2, &_LMBCSData3, &_LMBCSData4, &_LMBCSData5, &_LMBCSData6, + &_LMBCSData8,&_LMBCSData11,&_LMBCSData16,&_LMBCSData17,&_LMBCSData18,&_LMBCSData19, + &_HZData, +#endif + +#if UCONFIG_ONLY_HTML_CONVERSION + NULL, +#else + &_SCSUData, +#endif + + +#if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_ONLY_HTML_CONVERSION + NULL, +#else + &_ISCIIData, +#endif + + &_ASCIIData, +#if UCONFIG_ONLY_HTML_CONVERSION + NULL, NULL, &_UTF16Data, NULL, NULL, NULL, +#else + &_UTF7Data, &_Bocu1Data, &_UTF16Data, &_UTF32Data, &_CESU8Data, &_IMAPData, +#endif + +#if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_ONLY_HTML_CONVERSION + NULL, +#else + &_CompoundTextData +#endif +}; + +/* Please keep this in binary sorted order for getAlgorithmicTypeFromName. + Also the name should be in lower case and all spaces, dashes and underscores + removed +*/ +static struct { + const char *name; + const UConverterType type; +} const cnvNameType[] = { +#if !UCONFIG_ONLY_HTML_CONVERSION + { "bocu1", UCNV_BOCU1 }, + { "cesu8", UCNV_CESU8 }, +#endif +#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION + { "hz",UCNV_HZ }, +#endif +#if !UCONFIG_ONLY_HTML_CONVERSION + { "imapmailboxname", UCNV_IMAP_MAILBOX }, +#endif +#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION + { "iscii", UCNV_ISCII }, +#endif +#if !UCONFIG_NO_LEGACY_CONVERSION + { "iso2022", UCNV_ISO_2022 }, +#endif + { "iso88591", UCNV_LATIN_1 }, +#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION + { "lmbcs1", UCNV_LMBCS_1 }, + { "lmbcs11",UCNV_LMBCS_11 }, + { "lmbcs16",UCNV_LMBCS_16 }, + { "lmbcs17",UCNV_LMBCS_17 }, + { "lmbcs18",UCNV_LMBCS_18 }, + { "lmbcs19",UCNV_LMBCS_19 }, + { "lmbcs2", UCNV_LMBCS_2 }, + { "lmbcs3", UCNV_LMBCS_3 }, + { "lmbcs4", UCNV_LMBCS_4 }, + { "lmbcs5", UCNV_LMBCS_5 }, + { "lmbcs6", UCNV_LMBCS_6 }, + { "lmbcs8", UCNV_LMBCS_8 }, +#endif +#if !UCONFIG_ONLY_HTML_CONVERSION + { "scsu", UCNV_SCSU }, +#endif + { "usascii", UCNV_US_ASCII }, + { "utf16", UCNV_UTF16 }, + { "utf16be", UCNV_UTF16_BigEndian }, + { "utf16le", UCNV_UTF16_LittleEndian }, +#if U_IS_BIG_ENDIAN + { "utf16oppositeendian", UCNV_UTF16_LittleEndian }, + { "utf16platformendian", UCNV_UTF16_BigEndian }, +#else + { "utf16oppositeendian", UCNV_UTF16_BigEndian}, + { "utf16platformendian", UCNV_UTF16_LittleEndian }, +#endif +#if !UCONFIG_ONLY_HTML_CONVERSION + { "utf32", UCNV_UTF32 }, + { "utf32be", UCNV_UTF32_BigEndian }, + { "utf32le", UCNV_UTF32_LittleEndian }, +#if U_IS_BIG_ENDIAN + { "utf32oppositeendian", UCNV_UTF32_LittleEndian }, + { "utf32platformendian", UCNV_UTF32_BigEndian }, +#else + { "utf32oppositeendian", UCNV_UTF32_BigEndian }, + { "utf32platformendian", UCNV_UTF32_LittleEndian }, +#endif +#endif +#if !UCONFIG_ONLY_HTML_CONVERSION + { "utf7", UCNV_UTF7 }, +#endif + { "utf8", UCNV_UTF8 }, +#if !UCONFIG_ONLY_HTML_CONVERSION + { "x11compoundtext", UCNV_COMPOUND_TEXT} +#endif +}; + + +/*initializes some global variables */ +static UHashtable *SHARED_DATA_HASHTABLE = NULL; +static icu::UMutex cnvCacheMutex; +/* Note: the global mutex is used for */ +/* reference count updates. */ + +static const char **gAvailableConverters = NULL; +static uint16_t gAvailableConverterCount = 0; +static icu::UInitOnce gAvailableConvertersInitOnce = U_INITONCE_INITIALIZER; + +#if !U_CHARSET_IS_UTF8 + +/* This contains the resolved converter name. So no further alias lookup is needed again. */ +static char gDefaultConverterNameBuffer[UCNV_MAX_CONVERTER_NAME_LENGTH + 1]; /* +1 for NULL */ +static const char *gDefaultConverterName = NULL; + +/* +If the default converter is an algorithmic converter, this is the cached value. +We don't cache a full UConverter and clone it because ucnv_clone doesn't have +less overhead than an algorithmic open. We don't cache non-algorithmic converters +because ucnv_flushCache must be able to unload the default converter and its table. +*/ +static const UConverterSharedData *gDefaultAlgorithmicSharedData = NULL; + +/* Does gDefaultConverterName have a converter option and require extra parsing? */ +static UBool gDefaultConverterContainsOption; + +#endif /* !U_CHARSET_IS_UTF8 */ + +static const char DATA_TYPE[] = "cnv"; + +/* ucnv_flushAvailableConverterCache. This is only called from ucnv_cleanup(). + * If it is ever to be called from elsewhere, synchronization + * will need to be considered. + */ +static void +ucnv_flushAvailableConverterCache() { + gAvailableConverterCount = 0; + if (gAvailableConverters) { + uprv_free((char **)gAvailableConverters); + gAvailableConverters = NULL; + } + gAvailableConvertersInitOnce.reset(); +} + +/* ucnv_cleanup - delete all storage held by the converter cache, except any */ +/* in use by open converters. */ +/* Not thread safe. */ +/* Not supported API. */ +static UBool U_CALLCONV ucnv_cleanup(void) { + ucnv_flushCache(); + if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) { + uhash_close(SHARED_DATA_HASHTABLE); + SHARED_DATA_HASHTABLE = NULL; + } + + /* Isn't called from flushCache because other threads may have preexisting references to the table. */ + ucnv_flushAvailableConverterCache(); + +#if !U_CHARSET_IS_UTF8 + gDefaultConverterName = NULL; + gDefaultConverterNameBuffer[0] = 0; + gDefaultConverterContainsOption = FALSE; + gDefaultAlgorithmicSharedData = NULL; +#endif + + return (SHARED_DATA_HASHTABLE == NULL); +} + +U_CAPI void U_EXPORT2 +ucnv_enableCleanup(void) { + ucln_common_registerCleanup(UCLN_COMMON_UCNV, ucnv_cleanup); +} + +static UBool U_CALLCONV +isCnvAcceptable(void * /*context*/, + const char * /*type*/, const char * /*name*/, + const UDataInfo *pInfo) { + return (UBool)( + pInfo->size>=20 && + pInfo->isBigEndian==U_IS_BIG_ENDIAN && + pInfo->charsetFamily==U_CHARSET_FAMILY && + pInfo->sizeofUChar==U_SIZEOF_UCHAR && + pInfo->dataFormat[0]==0x63 && /* dataFormat="cnvt" */ + pInfo->dataFormat[1]==0x6e && + pInfo->dataFormat[2]==0x76 && + pInfo->dataFormat[3]==0x74 && + pInfo->formatVersion[0]==6); /* Everything will be version 6 */ +} + +/** + * Un flatten shared data from a UDATA.. + */ +static UConverterSharedData* +ucnv_data_unFlattenClone(UConverterLoadArgs *pArgs, UDataMemory *pData, UErrorCode *status) +{ + /* UDataInfo info; -- necessary only if some converters have different formatVersion */ + const uint8_t *raw = (const uint8_t *)udata_getMemory(pData); + const UConverterStaticData *source = (const UConverterStaticData *) raw; + UConverterSharedData *data; + UConverterType type = (UConverterType)source->conversionType; + + if(U_FAILURE(*status)) + return NULL; + + if( (uint16_t)type >= UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES || + converterData[type] == NULL || + !converterData[type]->isReferenceCounted || + converterData[type]->referenceCounter != 1 || + source->structSize != sizeof(UConverterStaticData)) + { + *status = U_INVALID_TABLE_FORMAT; + return NULL; + } + + data = (UConverterSharedData *)uprv_malloc(sizeof(UConverterSharedData)); + if(data == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + + /* copy initial values from the static structure for this type */ + uprv_memcpy(data, converterData[type], sizeof(UConverterSharedData)); + + data->staticData = source; + + data->sharedDataCached = FALSE; + + /* fill in fields from the loaded data */ + data->dataMemory = (void*)pData; /* for future use */ + + if(data->impl->load != NULL) { + data->impl->load(data, pArgs, raw + source->structSize, status); + if(U_FAILURE(*status)) { + uprv_free(data); + return NULL; + } + } + return data; +} + +/*Takes an alias name gets an actual converter file name + *goes to disk and opens it. + *allocates the memory and returns a new UConverter object + */ +static UConverterSharedData *createConverterFromFile(UConverterLoadArgs *pArgs, UErrorCode * err) +{ + UDataMemory *data; + UConverterSharedData *sharedData; + + UTRACE_ENTRY_OC(UTRACE_UCNV_LOAD); + + if (U_FAILURE (*err)) { + UTRACE_EXIT_STATUS(*err); + return NULL; + } + + UTRACE_DATA2(UTRACE_OPEN_CLOSE, "load converter %s from package %s", pArgs->name, pArgs->pkg); + + data = udata_openChoice(pArgs->pkg, DATA_TYPE, pArgs->name, isCnvAcceptable, NULL, err); + if(U_FAILURE(*err)) + { + UTRACE_EXIT_STATUS(*err); + return NULL; + } + + sharedData = ucnv_data_unFlattenClone(pArgs, data, err); + if(U_FAILURE(*err)) + { + udata_close(data); + UTRACE_EXIT_STATUS(*err); + return NULL; + } + + /* + * TODO Store pkg in a field in the shared data so that delta-only converters + * can load base converters from the same package. + * If the pkg name is longer than the field, then either do not load the converter + * in the first place, or just set the pkg field to "". + */ + + UTRACE_EXIT_PTR_STATUS(sharedData, *err); + return sharedData; +} + +/*returns a converter type from a string + */ +static const UConverterSharedData * +getAlgorithmicTypeFromName(const char *realName) +{ + uint32_t mid, start, limit; + uint32_t lastMid; + int result; + char strippedName[UCNV_MAX_CONVERTER_NAME_LENGTH]; + + /* Lower case and remove ignoreable characters. */ + ucnv_io_stripForCompare(strippedName, realName); + + /* do a binary search for the alias */ + start = 0; + limit = UPRV_LENGTHOF(cnvNameType); + mid = limit; + lastMid = UINT32_MAX; + + for (;;) { + mid = (uint32_t)((start + limit) / 2); + if (lastMid == mid) { /* Have we moved? */ + break; /* We haven't moved, and it wasn't found. */ + } + lastMid = mid; + result = uprv_strcmp(strippedName, cnvNameType[mid].name); + + if (result < 0) { + limit = mid; + } else if (result > 0) { + start = mid; + } else { + return converterData[cnvNameType[mid].type]; + } + } + + return NULL; +} + +/* +* Based on the number of known converters, this determines how many times larger +* the shared data hash table should be. When on small platforms, or just a couple +* of converters are used, this number should be 2. When memory is plentiful, or +* when ucnv_countAvailable is ever used with a lot of available converters, +* this should be 4. +* Larger numbers reduce the number of hash collisions, but use more memory. +*/ +#define UCNV_CACHE_LOAD_FACTOR 2 + +/* Puts the shared data in the static hashtable SHARED_DATA_HASHTABLE */ +/* Will always be called with the cnvCacheMutex alrady being held */ +/* by the calling function. */ +/* Stores the shared data in the SHARED_DATA_HASHTABLE + * @param data The shared data + */ +static void +ucnv_shareConverterData(UConverterSharedData * data) +{ + UErrorCode err = U_ZERO_ERROR; + /*Lazy evaluates the Hashtable itself */ + /*void *sanity = NULL;*/ + + if (SHARED_DATA_HASHTABLE == NULL) + { + SHARED_DATA_HASHTABLE = uhash_openSize(uhash_hashChars, uhash_compareChars, NULL, + ucnv_io_countKnownConverters(&err)*UCNV_CACHE_LOAD_FACTOR, + &err); + ucnv_enableCleanup(); + + if (U_FAILURE(err)) + return; + } + + /* ### check to see if the element is not already there! */ + + /* + sanity = ucnv_getSharedConverterData (data->staticData->name); + if(sanity != NULL) + { + UCNV_DEBUG_LOG("put:overwrite!",data->staticData->name,sanity); + } + UCNV_DEBUG_LOG("put:chk",data->staticData->name,sanity); + */ + + /* Mark it shared */ + data->sharedDataCached = TRUE; + + uhash_put(SHARED_DATA_HASHTABLE, + (void*) data->staticData->name, /* Okay to cast away const as long as + keyDeleter == NULL */ + data, + &err); + UCNV_DEBUG_LOG("put", data->staticData->name,data); + +} + +/* Look up a converter name in the shared data cache. */ +/* cnvCacheMutex must be held by the caller to protect the hash table. */ +/* gets the shared data from the SHARED_DATA_HASHTABLE (might return NULL if it isn't there) + * @param name The name of the shared data + * @return the shared data from the SHARED_DATA_HASHTABLE + */ +static UConverterSharedData * +ucnv_getSharedConverterData(const char *name) +{ + /*special case when no Table has yet been created we return NULL */ + if (SHARED_DATA_HASHTABLE == NULL) + { + return NULL; + } + else + { + UConverterSharedData *rc; + + rc = (UConverterSharedData*)uhash_get(SHARED_DATA_HASHTABLE, name); + UCNV_DEBUG_LOG("get",name,rc); + return rc; + } +} + +/*frees the string of memory blocks associates with a sharedConverter + *if and only if the referenceCounter == 0 + */ +/* Deletes (frees) the Shared data it's passed. first it checks the referenceCounter to + * see if anyone is using it, if not it frees all the memory stemming from sharedConverterData and + * returns TRUE, + * otherwise returns FALSE + * @param sharedConverterData The shared data + * @return if not it frees all the memory stemming from sharedConverterData and + * returns TRUE, otherwise returns FALSE + */ +static UBool +ucnv_deleteSharedConverterData(UConverterSharedData * deadSharedData) +{ + UTRACE_ENTRY_OC(UTRACE_UCNV_UNLOAD); + UTRACE_DATA2(UTRACE_OPEN_CLOSE, "unload converter %s shared data %p", deadSharedData->staticData->name, deadSharedData); + + if (deadSharedData->referenceCounter > 0) { + UTRACE_EXIT_VALUE((int32_t)FALSE); + return FALSE; + } + + if (deadSharedData->impl->unload != NULL) { + deadSharedData->impl->unload(deadSharedData); + } + + if(deadSharedData->dataMemory != NULL) + { + UDataMemory *data = (UDataMemory*)deadSharedData->dataMemory; + udata_close(data); + } + + uprv_free(deadSharedData); + + UTRACE_EXIT_VALUE((int32_t)TRUE); + return TRUE; +} + +/** + * Load a non-algorithmic converter. + * If pkg==NULL, then this function must be called inside umtx_lock(&cnvCacheMutex). + */ +UConverterSharedData * +ucnv_load(UConverterLoadArgs *pArgs, UErrorCode *err) { + UConverterSharedData *mySharedConverterData; + + if(err == NULL || U_FAILURE(*err)) { + return NULL; + } + + if(pArgs->pkg != NULL && *pArgs->pkg != 0) { + /* application-provided converters are not currently cached */ + return createConverterFromFile(pArgs, err); + } + + mySharedConverterData = ucnv_getSharedConverterData(pArgs->name); + if (mySharedConverterData == NULL) + { + /*Not cached, we need to stream it in from file */ + mySharedConverterData = createConverterFromFile(pArgs, err); + if (U_FAILURE (*err) || (mySharedConverterData == NULL)) + { + return NULL; + } + else if (!pArgs->onlyTestIsLoadable) + { + /* share it with other library clients */ + ucnv_shareConverterData(mySharedConverterData); + } + } + else + { + /* The data for this converter was already in the cache. */ + /* Update the reference counter on the shared data: one more client */ + mySharedConverterData->referenceCounter++; + } + + return mySharedConverterData; +} + +/** + * Unload a non-algorithmic converter. + * It must be sharedData->isReferenceCounted + * and this function must be called inside umtx_lock(&cnvCacheMutex). + */ +U_CAPI void +ucnv_unload(UConverterSharedData *sharedData) { + if(sharedData != NULL) { + if (sharedData->referenceCounter > 0) { + sharedData->referenceCounter--; + } + + if((sharedData->referenceCounter <= 0)&&(sharedData->sharedDataCached == FALSE)) { + ucnv_deleteSharedConverterData(sharedData); + } + } +} + +U_CFUNC void +ucnv_unloadSharedDataIfReady(UConverterSharedData *sharedData) +{ + if(sharedData != NULL && sharedData->isReferenceCounted) { + umtx_lock(&cnvCacheMutex); + ucnv_unload(sharedData); + umtx_unlock(&cnvCacheMutex); + } +} + +U_CFUNC void +ucnv_incrementRefCount(UConverterSharedData *sharedData) +{ + if(sharedData != NULL && sharedData->isReferenceCounted) { + umtx_lock(&cnvCacheMutex); + sharedData->referenceCounter++; + umtx_unlock(&cnvCacheMutex); + } +} + +/* + * *pPieces must be initialized. + * The name without options will be copied to pPieces->cnvName. + * The locale and options will be copied to pPieces only if present in inName, + * otherwise the existing values in pPieces remain. + * *pArgs will be set to the pPieces values. + */ +static void +parseConverterOptions(const char *inName, + UConverterNamePieces *pPieces, + UConverterLoadArgs *pArgs, + UErrorCode *err) +{ + char *cnvName = pPieces->cnvName; + char c; + int32_t len = 0; + + pArgs->name=inName; + pArgs->locale=pPieces->locale; + pArgs->options=pPieces->options; + + /* copy the converter name itself to cnvName */ + while((c=*inName)!=0 && c!=UCNV_OPTION_SEP_CHAR) { + if (++len>=UCNV_MAX_CONVERTER_NAME_LENGTH) { + *err = U_ILLEGAL_ARGUMENT_ERROR; /* bad name */ + pPieces->cnvName[0]=0; + return; + } + *cnvName++=c; + inName++; + } + *cnvName=0; + pArgs->name=pPieces->cnvName; + + /* parse options. No more name copying should occur. */ + while((c=*inName)!=0) { + if(c==UCNV_OPTION_SEP_CHAR) { + ++inName; + } + + /* inName is behind an option separator */ + if(uprv_strncmp(inName, "locale=", 7)==0) { + /* do not modify locale itself in case we have multiple locale options */ + char *dest=pPieces->locale; + + /* copy the locale option value */ + inName+=7; + len=0; + while((c=*inName)!=0 && c!=UCNV_OPTION_SEP_CHAR) { + ++inName; + + if(++len>=ULOC_FULLNAME_CAPACITY) { + *err=U_ILLEGAL_ARGUMENT_ERROR; /* bad name */ + pPieces->locale[0]=0; + return; + } + + *dest++=c; + } + *dest=0; + } else if(uprv_strncmp(inName, "version=", 8)==0) { + /* copy the version option value into bits 3..0 of pPieces->options */ + inName+=8; + c=*inName; + if(c==0) { + pArgs->options=(pPieces->options&=~UCNV_OPTION_VERSION); + return; + } else if((uint8_t)(c-'0')<10) { + pArgs->options=pPieces->options=(pPieces->options&~UCNV_OPTION_VERSION)|(uint32_t)(c-'0'); + ++inName; + } + } else if(uprv_strncmp(inName, "swaplfnl", 8)==0) { + inName+=8; + pArgs->options=(pPieces->options|=UCNV_OPTION_SWAP_LFNL); + /* add processing for new options here with another } else if(uprv_strncmp(inName, "option-name=", XX)==0) { */ + } else { + /* ignore any other options until we define some */ + while(((c = *inName++) != 0) && (c != UCNV_OPTION_SEP_CHAR)) { + } + if(c==0) { + return; + } + } + } +} + +/*Logic determines if the converter is Algorithmic AND/OR cached + *depending on that: + * -we either go to get data from disk and cache it (Data=TRUE, Cached=False) + * -Get it from a Hashtable (Data=X, Cached=TRUE) + * -Call dataConverter initializer (Data=TRUE, Cached=TRUE) + * -Call AlgorithmicConverter initializer (Data=FALSE, Cached=TRUE) + */ +U_CFUNC UConverterSharedData * +ucnv_loadSharedData(const char *converterName, + UConverterNamePieces *pPieces, + UConverterLoadArgs *pArgs, + UErrorCode * err) { + UConverterNamePieces stackPieces; + UConverterLoadArgs stackArgs; + UConverterSharedData *mySharedConverterData = NULL; + UErrorCode internalErrorCode = U_ZERO_ERROR; + UBool mayContainOption = TRUE; + UBool checkForAlgorithmic = TRUE; + + if (U_FAILURE (*err)) { + return NULL; + } + + if(pPieces == NULL) { + if(pArgs != NULL) { + /* + * Bad: We may set pArgs pointers to stackPieces fields + * which will be invalid after this function returns. + */ + *err = U_INTERNAL_PROGRAM_ERROR; + return NULL; + } + pPieces = &stackPieces; + } + if(pArgs == NULL) { + uprv_memset(&stackArgs, 0, sizeof(stackArgs)); + stackArgs.size = (int32_t)sizeof(stackArgs); + pArgs = &stackArgs; + } + + pPieces->cnvName[0] = 0; + pPieces->locale[0] = 0; + pPieces->options = 0; + + pArgs->name = converterName; + pArgs->locale = pPieces->locale; + pArgs->options = pPieces->options; + + /* In case "name" is NULL we want to open the default converter. */ + if (converterName == NULL) { +#if U_CHARSET_IS_UTF8 + pArgs->name = "UTF-8"; + return (UConverterSharedData *)converterData[UCNV_UTF8]; +#else + /* Call ucnv_getDefaultName first to query the name from the OS. */ + pArgs->name = ucnv_getDefaultName(); + if (pArgs->name == NULL) { + *err = U_MISSING_RESOURCE_ERROR; + return NULL; + } + mySharedConverterData = (UConverterSharedData *)gDefaultAlgorithmicSharedData; + checkForAlgorithmic = FALSE; + mayContainOption = gDefaultConverterContainsOption; + /* the default converter name is already canonical */ +#endif + } + else if(UCNV_FAST_IS_UTF8(converterName)) { + /* fastpath for UTF-8 */ + pArgs->name = "UTF-8"; + return (UConverterSharedData *)converterData[UCNV_UTF8]; + } + else { + /* separate the converter name from the options */ + parseConverterOptions(converterName, pPieces, pArgs, err); + if (U_FAILURE(*err)) { + /* Very bad name used. */ + return NULL; + } + + /* get the canonical converter name */ + pArgs->name = ucnv_io_getConverterName(pArgs->name, &mayContainOption, &internalErrorCode); + if (U_FAILURE(internalErrorCode) || pArgs->name == NULL) { + /* + * set the input name in case the converter was added + * without updating the alias table, or when there is no alias table + */ + pArgs->name = pPieces->cnvName; + } else if (internalErrorCode == U_AMBIGUOUS_ALIAS_WARNING) { + *err = U_AMBIGUOUS_ALIAS_WARNING; + } + } + + /* separate the converter name from the options */ + if(mayContainOption && pArgs->name != pPieces->cnvName) { + parseConverterOptions(pArgs->name, pPieces, pArgs, err); + } + + /* get the shared data for an algorithmic converter, if it is one */ + if (checkForAlgorithmic) { + mySharedConverterData = (UConverterSharedData *)getAlgorithmicTypeFromName(pArgs->name); + } + if (mySharedConverterData == NULL) + { + /* it is a data-based converter, get its shared data. */ + /* Hold the cnvCacheMutex through the whole process of checking the */ + /* converter data cache, and adding new entries to the cache */ + /* to prevent other threads from modifying the cache during the */ + /* process. */ + pArgs->nestedLoads=1; + pArgs->pkg=NULL; + + umtx_lock(&cnvCacheMutex); + mySharedConverterData = ucnv_load(pArgs, err); + umtx_unlock(&cnvCacheMutex); + if (U_FAILURE (*err) || (mySharedConverterData == NULL)) + { + return NULL; + } + } + + return mySharedConverterData; +} + +U_CAPI UConverter * +ucnv_createConverter(UConverter *myUConverter, const char *converterName, UErrorCode * err) +{ + UConverterNamePieces stackPieces; + UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; + UConverterSharedData *mySharedConverterData; + + UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN); + + if(U_SUCCESS(*err)) { + UTRACE_DATA1(UTRACE_OPEN_CLOSE, "open converter %s", converterName); + + mySharedConverterData = ucnv_loadSharedData(converterName, &stackPieces, &stackArgs, err); + + myUConverter = ucnv_createConverterFromSharedData( + myUConverter, mySharedConverterData, + &stackArgs, + err); + + if(U_SUCCESS(*err)) { + UTRACE_EXIT_PTR_STATUS(myUConverter, *err); + return myUConverter; + } + } + + /* exit with error */ + UTRACE_EXIT_STATUS(*err); + return NULL; +} + +U_CFUNC UBool +ucnv_canCreateConverter(const char *converterName, UErrorCode *err) { + UConverter myUConverter; + UConverterNamePieces stackPieces; + UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; + UConverterSharedData *mySharedConverterData; + + UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN); + + if(U_SUCCESS(*err)) { + UTRACE_DATA1(UTRACE_OPEN_CLOSE, "test if can open converter %s", converterName); + + stackArgs.onlyTestIsLoadable=TRUE; + mySharedConverterData = ucnv_loadSharedData(converterName, &stackPieces, &stackArgs, err); + ucnv_createConverterFromSharedData( + &myUConverter, mySharedConverterData, + &stackArgs, + err); + ucnv_unloadSharedDataIfReady(mySharedConverterData); + } + + UTRACE_EXIT_STATUS(*err); + return U_SUCCESS(*err); +} + +UConverter * +ucnv_createAlgorithmicConverter(UConverter *myUConverter, + UConverterType type, + const char *locale, uint32_t options, + UErrorCode *err) { + UConverter *cnv; + const UConverterSharedData *sharedData; + UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; + + UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN_ALGORITHMIC); + UTRACE_DATA1(UTRACE_OPEN_CLOSE, "open algorithmic converter type %d", (int32_t)type); + + if(type<0 || UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES<=type) { + *err = U_ILLEGAL_ARGUMENT_ERROR; + UTRACE_EXIT_STATUS(U_ILLEGAL_ARGUMENT_ERROR); + return NULL; + } + + sharedData = converterData[type]; + if(sharedData == NULL || sharedData->isReferenceCounted) { + /* not a valid type, or not an algorithmic converter */ + *err = U_ILLEGAL_ARGUMENT_ERROR; + UTRACE_EXIT_STATUS(U_ILLEGAL_ARGUMENT_ERROR); + return NULL; + } + + stackArgs.name = ""; + stackArgs.options = options; + stackArgs.locale=locale; + cnv = ucnv_createConverterFromSharedData( + myUConverter, (UConverterSharedData *)sharedData, + &stackArgs, err); + + UTRACE_EXIT_PTR_STATUS(cnv, *err); + return cnv; +} + +U_CFUNC UConverter* +ucnv_createConverterFromPackage(const char *packageName, const char *converterName, UErrorCode * err) +{ + UConverter *myUConverter; + UConverterSharedData *mySharedConverterData; + UConverterNamePieces stackPieces; + UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; + + UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN_PACKAGE); + + if(U_FAILURE(*err)) { + UTRACE_EXIT_STATUS(*err); + return NULL; + } + + UTRACE_DATA2(UTRACE_OPEN_CLOSE, "open converter %s from package %s", converterName, packageName); + + /* first, get the options out of the converterName string */ + stackPieces.cnvName[0] = 0; + stackPieces.locale[0] = 0; + stackPieces.options = 0; + parseConverterOptions(converterName, &stackPieces, &stackArgs, err); + if (U_FAILURE(*err)) { + /* Very bad name used. */ + UTRACE_EXIT_STATUS(*err); + return NULL; + } + stackArgs.nestedLoads=1; + stackArgs.pkg=packageName; + + /* open the data, unflatten the shared structure */ + mySharedConverterData = createConverterFromFile(&stackArgs, err); + + if (U_FAILURE(*err)) { + UTRACE_EXIT_STATUS(*err); + return NULL; + } + + /* create the actual converter */ + myUConverter = ucnv_createConverterFromSharedData(NULL, mySharedConverterData, &stackArgs, err); + + if (U_FAILURE(*err)) { + ucnv_close(myUConverter); + UTRACE_EXIT_STATUS(*err); + return NULL; + } + + UTRACE_EXIT_PTR_STATUS(myUConverter, *err); + return myUConverter; +} + + +U_CFUNC UConverter* +ucnv_createConverterFromSharedData(UConverter *myUConverter, + UConverterSharedData *mySharedConverterData, + UConverterLoadArgs *pArgs, + UErrorCode *err) +{ + UBool isCopyLocal; + + if(U_FAILURE(*err)) { + ucnv_unloadSharedDataIfReady(mySharedConverterData); + return myUConverter; + } + if(myUConverter == NULL) + { + myUConverter = (UConverter *) uprv_malloc (sizeof (UConverter)); + if(myUConverter == NULL) + { + *err = U_MEMORY_ALLOCATION_ERROR; + ucnv_unloadSharedDataIfReady(mySharedConverterData); + return NULL; + } + isCopyLocal = FALSE; + } else { + isCopyLocal = TRUE; + } + + /* initialize the converter */ + uprv_memset(myUConverter, 0, sizeof(UConverter)); + myUConverter->isCopyLocal = isCopyLocal; + /*myUConverter->isExtraLocal = FALSE;*/ /* Set by the memset call */ + myUConverter->sharedData = mySharedConverterData; + myUConverter->options = pArgs->options; + if(!pArgs->onlyTestIsLoadable) { + myUConverter->preFromUFirstCP = U_SENTINEL; + myUConverter->fromCharErrorBehaviour = UCNV_TO_U_DEFAULT_CALLBACK; + myUConverter->fromUCharErrorBehaviour = UCNV_FROM_U_DEFAULT_CALLBACK; + myUConverter->toUnicodeStatus = mySharedConverterData->toUnicodeStatus; + myUConverter->maxBytesPerUChar = mySharedConverterData->staticData->maxBytesPerChar; + myUConverter->subChar1 = mySharedConverterData->staticData->subChar1; + myUConverter->subCharLen = mySharedConverterData->staticData->subCharLen; + myUConverter->subChars = (uint8_t *)myUConverter->subUChars; + uprv_memcpy(myUConverter->subChars, mySharedConverterData->staticData->subChar, myUConverter->subCharLen); + myUConverter->toUCallbackReason = UCNV_ILLEGAL; /* default reason to invoke (*fromCharErrorBehaviour) */ + } + + if(mySharedConverterData->impl->open != NULL) { + mySharedConverterData->impl->open(myUConverter, pArgs, err); + if(U_FAILURE(*err) && !pArgs->onlyTestIsLoadable) { + /* don't ucnv_close() if onlyTestIsLoadable because not fully initialized */ + ucnv_close(myUConverter); + return NULL; + } + } + + return myUConverter; +} + +/*Frees all shared immutable objects that aren't referred to (reference count = 0) + */ +U_CAPI int32_t U_EXPORT2 +ucnv_flushCache () +{ + UConverterSharedData *mySharedData = NULL; + int32_t pos; + int32_t tableDeletedNum = 0; + const UHashElement *e; + /*UErrorCode status = U_ILLEGAL_ARGUMENT_ERROR;*/ + int32_t i, remaining; + + UTRACE_ENTRY_OC(UTRACE_UCNV_FLUSH_CACHE); + + /* Close the default converter without creating a new one so that everything will be flushed. */ + u_flushDefaultConverter(); + + /*if shared data hasn't even been lazy evaluated yet + * return 0 + */ + if (SHARED_DATA_HASHTABLE == NULL) { + UTRACE_EXIT_VALUE((int32_t)0); + return 0; + } + + /*creates an enumeration to iterate through every element in the + * table + * + * Synchronization: holding cnvCacheMutex will prevent any other thread from + * accessing or modifying the hash table during the iteration. + * The reference count of an entry may be decremented by + * ucnv_close while the iteration is in process, but this is + * benign. It can't be incremented (in ucnv_createConverter()) + * because the sequence of looking up in the cache + incrementing + * is protected by cnvCacheMutex. + */ + umtx_lock(&cnvCacheMutex); + /* + * double loop: A delta/extension-only converter has a pointer to its base table's + * shared data; the first iteration of the outer loop may see the delta converter + * before the base converter, and unloading the delta converter may get the base + * converter's reference counter down to 0. + */ + i = 0; + do { + remaining = 0; + pos = UHASH_FIRST; + while ((e = uhash_nextElement (SHARED_DATA_HASHTABLE, &pos)) != NULL) + { + mySharedData = (UConverterSharedData *) e->value.pointer; + /*deletes only if reference counter == 0 */ + if (mySharedData->referenceCounter == 0) + { + tableDeletedNum++; + + UCNV_DEBUG_LOG("del",mySharedData->staticData->name,mySharedData); + + uhash_removeElement(SHARED_DATA_HASHTABLE, e); + mySharedData->sharedDataCached = FALSE; + ucnv_deleteSharedConverterData (mySharedData); + } else { + ++remaining; + } + } + } while(++i == 1 && remaining > 0); + umtx_unlock(&cnvCacheMutex); + + UTRACE_DATA1(UTRACE_INFO, "ucnv_flushCache() exits with %d converters remaining", remaining); + + UTRACE_EXIT_VALUE(tableDeletedNum); + return tableDeletedNum; +} + +/* available converters list --------------------------------------------------- */ + +static void U_CALLCONV initAvailableConvertersList(UErrorCode &errCode) { + U_ASSERT(gAvailableConverterCount == 0); + U_ASSERT(gAvailableConverters == NULL); + + ucnv_enableCleanup(); + UEnumeration *allConvEnum = ucnv_openAllNames(&errCode); + int32_t allConverterCount = uenum_count(allConvEnum, &errCode); + if (U_FAILURE(errCode)) { + return; + } + + /* We can't have more than "*converterTable" converters to open */ + gAvailableConverters = (const char **) uprv_malloc(allConverterCount * sizeof(char*)); + if (!gAvailableConverters) { + errCode = U_MEMORY_ALLOCATION_ERROR; + return; + } + + /* Open the default converter to make sure that it has first dibs in the hash table. */ + UErrorCode localStatus = U_ZERO_ERROR; + UConverter tempConverter; + ucnv_close(ucnv_createConverter(&tempConverter, NULL, &localStatus)); + + gAvailableConverterCount = 0; + + for (int32_t idx = 0; idx < allConverterCount; idx++) { + localStatus = U_ZERO_ERROR; + const char *converterName = uenum_next(allConvEnum, NULL, &localStatus); + if (ucnv_canCreateConverter(converterName, &localStatus)) { + gAvailableConverters[gAvailableConverterCount++] = converterName; + } + } + + uenum_close(allConvEnum); +} + + +static UBool haveAvailableConverterList(UErrorCode *pErrorCode) { + umtx_initOnce(gAvailableConvertersInitOnce, &initAvailableConvertersList, *pErrorCode); + return U_SUCCESS(*pErrorCode); +} + +U_CFUNC uint16_t +ucnv_bld_countAvailableConverters(UErrorCode *pErrorCode) { + if (haveAvailableConverterList(pErrorCode)) { + return gAvailableConverterCount; + } + return 0; +} + +U_CFUNC const char * +ucnv_bld_getAvailableConverter(uint16_t n, UErrorCode *pErrorCode) { + if (haveAvailableConverterList(pErrorCode)) { + if (n < gAvailableConverterCount) { + return gAvailableConverters[n]; + } + *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR; + } + return NULL; +} + +/* default converter name --------------------------------------------------- */ + +#if !U_CHARSET_IS_UTF8 +/* +Copy the canonical converter name. +ucnv_getDefaultName must be thread safe, which can call this function. + +ucnv_setDefaultName calls this function and it doesn't have to be +thread safe because there is no reliable/safe way to reset the +converter in use in all threads. If you did reset the converter, you +would not be sure that retrieving a default converter for one string +would be the same type of default converter for a successive string. +Since the name is a returned via ucnv_getDefaultName without copying, +you shouldn't be modifying or deleting the string from a separate thread. +*/ +static inline void +internalSetName(const char *name, UErrorCode *status) { + UConverterNamePieces stackPieces; + UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; + int32_t length=(int32_t)(uprv_strlen(name)); + UBool containsOption = (UBool)(uprv_strchr(name, UCNV_OPTION_SEP_CHAR) != NULL); + const UConverterSharedData *algorithmicSharedData; + + stackArgs.name = name; + if(containsOption) { + stackPieces.cnvName[0] = 0; + stackPieces.locale[0] = 0; + stackPieces.options = 0; + parseConverterOptions(name, &stackPieces, &stackArgs, status); + if(U_FAILURE(*status)) { + return; + } + } + algorithmicSharedData = getAlgorithmicTypeFromName(stackArgs.name); + + umtx_lock(&cnvCacheMutex); + + gDefaultAlgorithmicSharedData = algorithmicSharedData; + gDefaultConverterContainsOption = containsOption; + uprv_memcpy(gDefaultConverterNameBuffer, name, length); + gDefaultConverterNameBuffer[length]=0; + + /* gDefaultConverterName MUST be the last global var set by this function. */ + /* It is the variable checked in ucnv_getDefaultName() to see if initialization is required. */ + // But there is nothing here preventing that from being reordered, either by the compiler + // or hardware. I'm adding the mutex to ucnv_getDefaultName for now. UMTX_CHECK is not enough. + // -- Andy + gDefaultConverterName = gDefaultConverterNameBuffer; + + ucnv_enableCleanup(); + + umtx_unlock(&cnvCacheMutex); +} +#endif + +/* + * In order to be really thread-safe, the get function would have to take + * a buffer parameter and copy the current string inside a mutex block. + * This implementation only tries to be really thread-safe while + * setting the name. + * It assumes that setting a pointer is atomic. + */ + +U_CAPI const char* U_EXPORT2 +ucnv_getDefaultName() { +#if U_CHARSET_IS_UTF8 + return "UTF-8"; +#else + /* local variable to be thread-safe */ + const char *name; + + /* + Concurrent calls to ucnv_getDefaultName must be thread safe, + but ucnv_setDefaultName is not thread safe. + */ + { + icu::Mutex lock(&cnvCacheMutex); + name = gDefaultConverterName; + } + if(name==NULL) { + UErrorCode errorCode = U_ZERO_ERROR; + UConverter *cnv = NULL; + + name = uprv_getDefaultCodepage(); + + /* if the name is there, test it out and get the canonical name with options */ + if(name != NULL) { + cnv = ucnv_open(name, &errorCode); + if(U_SUCCESS(errorCode) && cnv != NULL) { + name = ucnv_getName(cnv, &errorCode); + } + } + + if(name == NULL || name[0] == 0 + || U_FAILURE(errorCode) || cnv == NULL + || uprv_strlen(name)>=sizeof(gDefaultConverterNameBuffer)) + { + /* Panic time, let's use a fallback. */ +#if (U_CHARSET_FAMILY == U_ASCII_FAMILY) + name = "US-ASCII"; + /* there is no 'algorithmic' converter for EBCDIC */ +#elif U_PLATFORM == U_PF_OS390 + name = "ibm-1047_P100-1995" UCNV_SWAP_LFNL_OPTION_STRING; +#else + name = "ibm-37_P100-1995"; +#endif + } + + internalSetName(name, &errorCode); + + /* The close may make the current name go away. */ + ucnv_close(cnv); + } + + return name; +#endif +} + +#if U_CHARSET_IS_UTF8 +U_CAPI void U_EXPORT2 ucnv_setDefaultName(const char *) {} +#else +/* +This function is not thread safe, and it can't be thread safe. +See internalSetName or the API reference for details. +*/ +U_CAPI void U_EXPORT2 +ucnv_setDefaultName(const char *converterName) { + if(converterName==NULL) { + /* reset to the default codepage */ + gDefaultConverterName=NULL; + } else { + UErrorCode errorCode = U_ZERO_ERROR; + UConverter *cnv = NULL; + const char *name = NULL; + + /* if the name is there, test it out and get the canonical name with options */ + cnv = ucnv_open(converterName, &errorCode); + if(U_SUCCESS(errorCode) && cnv != NULL) { + name = ucnv_getName(cnv, &errorCode); + } + + if(U_SUCCESS(errorCode) && name!=NULL) { + internalSetName(name, &errorCode); + } + /* else this converter is bad to use. Don't change it to a bad value. */ + + /* The close may make the current name go away. */ + ucnv_close(cnv); + + /* reset the converter cache */ + u_flushDefaultConverter(); + } +} +#endif + +/* data swapping ------------------------------------------------------------ */ + +/* most of this might belong more properly into ucnvmbcs.c, but that is so large */ + +#if !UCONFIG_NO_LEGACY_CONVERSION + +U_CAPI int32_t U_EXPORT2 +ucnv_swap(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode) { + const UDataInfo *pInfo; + int32_t headerSize; + + const uint8_t *inBytes; + uint8_t *outBytes; + + uint32_t offset, count, staticDataSize; + int32_t size; + + const UConverterStaticData *inStaticData; + UConverterStaticData *outStaticData; + + const _MBCSHeader *inMBCSHeader; + _MBCSHeader *outMBCSHeader; + _MBCSHeader mbcsHeader; + uint32_t mbcsHeaderLength; + UBool noFromU=FALSE; + + uint8_t outputType; + + int32_t maxFastUChar, mbcsIndexLength; + + const int32_t *inExtIndexes; + int32_t extOffset; + + /* udata_swapDataHeader checks the arguments */ + headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return 0; + } + + /* check data format and format version */ + pInfo=(const UDataInfo *)((const char *)inData+4); + if(!( + pInfo->dataFormat[0]==0x63 && /* dataFormat="cnvt" */ + pInfo->dataFormat[1]==0x6e && + pInfo->dataFormat[2]==0x76 && + pInfo->dataFormat[3]==0x74 && + pInfo->formatVersion[0]==6 && + pInfo->formatVersion[1]>=2 + )) { + udata_printError(ds, "ucnv_swap(): data format %02x.%02x.%02x.%02x (format version %02x.%02x) is not recognized as an ICU .cnv conversion table\n", + pInfo->dataFormat[0], pInfo->dataFormat[1], + pInfo->dataFormat[2], pInfo->dataFormat[3], + pInfo->formatVersion[0], pInfo->formatVersion[1]); + *pErrorCode=U_UNSUPPORTED_ERROR; + return 0; + } + + inBytes=(const uint8_t *)inData+headerSize; + outBytes=(uint8_t *)outData+headerSize; + + /* read the initial UConverterStaticData structure after the UDataInfo header */ + inStaticData=(const UConverterStaticData *)inBytes; + outStaticData=(UConverterStaticData *)outBytes; + + if(length<0) { + staticDataSize=ds->readUInt32(inStaticData->structSize); + } else { + length-=headerSize; + if( length<(int32_t)sizeof(UConverterStaticData) || + (uint32_t)length<(staticDataSize=ds->readUInt32(inStaticData->structSize)) + ) { + udata_printError(ds, "ucnv_swap(): too few bytes (%d after header) for an ICU .cnv conversion table\n", + length); + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + } + + if(length>=0) { + /* swap the static data */ + if(inStaticData!=outStaticData) { + uprv_memcpy(outStaticData, inStaticData, staticDataSize); + } + + ds->swapArray32(ds, &inStaticData->structSize, 4, + &outStaticData->structSize, pErrorCode); + ds->swapArray32(ds, &inStaticData->codepage, 4, + &outStaticData->codepage, pErrorCode); + + ds->swapInvChars(ds, inStaticData->name, (int32_t)uprv_strlen(inStaticData->name), + outStaticData->name, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + udata_printError(ds, "ucnv_swap(): error swapping converter name\n"); + return 0; + } + } + + inBytes+=staticDataSize; + outBytes+=staticDataSize; + if(length>=0) { + length-=(int32_t)staticDataSize; + } + + /* check for supported conversionType values */ + if(inStaticData->conversionType==UCNV_MBCS) { + /* swap MBCS data */ + inMBCSHeader=(const _MBCSHeader *)inBytes; + outMBCSHeader=(_MBCSHeader *)outBytes; + + if(0<=length && length<(int32_t)sizeof(_MBCSHeader)) { + udata_printError(ds, "ucnv_swap(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table\n", + length); + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + if(inMBCSHeader->version[0]==4 && inMBCSHeader->version[1]>=1) { + mbcsHeaderLength=MBCS_HEADER_V4_LENGTH; + } else if(inMBCSHeader->version[0]==5 && inMBCSHeader->version[1]>=3 && + ((mbcsHeader.options=ds->readUInt32(inMBCSHeader->options))& + MBCS_OPT_UNKNOWN_INCOMPATIBLE_MASK)==0 + ) { + mbcsHeaderLength=mbcsHeader.options&MBCS_OPT_LENGTH_MASK; + noFromU=(UBool)((mbcsHeader.options&MBCS_OPT_NO_FROM_U)!=0); + } else { + udata_printError(ds, "ucnv_swap(): unsupported _MBCSHeader.version %d.%d\n", + inMBCSHeader->version[0], inMBCSHeader->version[1]); + *pErrorCode=U_UNSUPPORTED_ERROR; + return 0; + } + + uprv_memcpy(mbcsHeader.version, inMBCSHeader->version, 4); + mbcsHeader.countStates= ds->readUInt32(inMBCSHeader->countStates); + mbcsHeader.countToUFallbacks= ds->readUInt32(inMBCSHeader->countToUFallbacks); + mbcsHeader.offsetToUCodeUnits= ds->readUInt32(inMBCSHeader->offsetToUCodeUnits); + mbcsHeader.offsetFromUTable= ds->readUInt32(inMBCSHeader->offsetFromUTable); + mbcsHeader.offsetFromUBytes= ds->readUInt32(inMBCSHeader->offsetFromUBytes); + mbcsHeader.flags= ds->readUInt32(inMBCSHeader->flags); + mbcsHeader.fromUBytesLength= ds->readUInt32(inMBCSHeader->fromUBytesLength); + /* mbcsHeader.options have been read above */ + + extOffset=(int32_t)(mbcsHeader.flags>>8); + outputType=(uint8_t)mbcsHeader.flags; + if(noFromU && outputType==MBCS_OUTPUT_1) { + udata_printError(ds, "ucnv_swap(): unsupported combination of makeconv --small with SBCS\n"); + *pErrorCode=U_UNSUPPORTED_ERROR; + return 0; + } + + /* make sure that the output type is known */ + switch(outputType) { + case MBCS_OUTPUT_1: + case MBCS_OUTPUT_2: + case MBCS_OUTPUT_3: + case MBCS_OUTPUT_4: + case MBCS_OUTPUT_3_EUC: + case MBCS_OUTPUT_4_EUC: + case MBCS_OUTPUT_2_SISO: + case MBCS_OUTPUT_EXT_ONLY: + /* OK */ + break; + default: + udata_printError(ds, "ucnv_swap(): unsupported MBCS output type 0x%x\n", + outputType); + *pErrorCode=U_UNSUPPORTED_ERROR; + return 0; + } + + /* calculate the length of the MBCS data */ + + /* + * utf8Friendly MBCS files (mbcsHeader.version 4.3) + * contain an additional mbcsIndex table: + * uint16_t[(maxFastUChar+1)>>6]; + * where maxFastUChar=((mbcsHeader.version[2]<<8)|0xff). + */ + maxFastUChar=0; + mbcsIndexLength=0; + if( outputType!=MBCS_OUTPUT_EXT_ONLY && outputType!=MBCS_OUTPUT_1 && + mbcsHeader.version[1]>=3 && (maxFastUChar=mbcsHeader.version[2])!=0 + ) { + maxFastUChar=(maxFastUChar<<8)|0xff; + mbcsIndexLength=((maxFastUChar+1)>>6)*2; /* number of bytes */ + } + + if(extOffset==0) { + size=(int32_t)(mbcsHeader.offsetFromUBytes+mbcsIndexLength); + if(!noFromU) { + size+=(int32_t)mbcsHeader.fromUBytesLength; + } + + /* avoid compiler warnings - not otherwise necessary, and the value does not matter */ + inExtIndexes=NULL; + } else { + /* there is extension data after the base data, see ucnv_ext.h */ + if(length>=0 && length<(extOffset+UCNV_EXT_INDEXES_MIN_LENGTH*4)) { + udata_printError(ds, "ucnv_swap(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table with extension data\n", + length); + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + + inExtIndexes=(const int32_t *)(inBytes+extOffset); + size=extOffset+udata_readInt32(ds, inExtIndexes[UCNV_EXT_SIZE]); + } + + if(length>=0) { + if(length<size) { + udata_printError(ds, "ucnv_swap(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table\n", + length); + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + + /* copy the data for inaccessible bytes */ + if(inBytes!=outBytes) { + uprv_memcpy(outBytes, inBytes, size); + } + + /* swap the MBCSHeader, except for the version field */ + count=mbcsHeaderLength*4; + ds->swapArray32(ds, &inMBCSHeader->countStates, count-4, + &outMBCSHeader->countStates, pErrorCode); + + if(outputType==MBCS_OUTPUT_EXT_ONLY) { + /* + * extension-only file, + * contains a base name instead of normal base table data + */ + + /* swap the base name, between the header and the extension data */ + const char *inBaseName=(const char *)inBytes+count; + char *outBaseName=(char *)outBytes+count; + ds->swapInvChars(ds, inBaseName, (int32_t)uprv_strlen(inBaseName), + outBaseName, pErrorCode); + } else { + /* normal file with base table data */ + + /* swap the state table, 1kB per state */ + offset=count; + count=mbcsHeader.countStates*1024; + ds->swapArray32(ds, inBytes+offset, (int32_t)count, + outBytes+offset, pErrorCode); + + /* swap the toUFallbacks[] */ + offset+=count; + count=mbcsHeader.countToUFallbacks*8; + ds->swapArray32(ds, inBytes+offset, (int32_t)count, + outBytes+offset, pErrorCode); + + /* swap the unicodeCodeUnits[] */ + offset=mbcsHeader.offsetToUCodeUnits; + count=mbcsHeader.offsetFromUTable-offset; + ds->swapArray16(ds, inBytes+offset, (int32_t)count, + outBytes+offset, pErrorCode); + + /* offset to the stage 1 table, independent of the outputType */ + offset=mbcsHeader.offsetFromUTable; + + if(outputType==MBCS_OUTPUT_1) { + /* SBCS: swap the fromU tables, all 16 bits wide */ + count=(mbcsHeader.offsetFromUBytes-offset)+mbcsHeader.fromUBytesLength; + ds->swapArray16(ds, inBytes+offset, (int32_t)count, + outBytes+offset, pErrorCode); + } else { + /* otherwise: swap the stage tables separately */ + + /* stage 1 table: uint16_t[0x440 or 0x40] */ + if(inStaticData->unicodeMask&UCNV_HAS_SUPPLEMENTARY) { + count=0x440*2; /* for all of Unicode */ + } else { + count=0x40*2; /* only BMP */ + } + ds->swapArray16(ds, inBytes+offset, (int32_t)count, + outBytes+offset, pErrorCode); + + /* stage 2 table: uint32_t[] */ + offset+=count; + count=mbcsHeader.offsetFromUBytes-offset; + ds->swapArray32(ds, inBytes+offset, (int32_t)count, + outBytes+offset, pErrorCode); + + /* stage 3/result bytes: sometimes uint16_t[] or uint32_t[] */ + offset=mbcsHeader.offsetFromUBytes; + count= noFromU ? 0 : mbcsHeader.fromUBytesLength; + switch(outputType) { + case MBCS_OUTPUT_2: + case MBCS_OUTPUT_3_EUC: + case MBCS_OUTPUT_2_SISO: + ds->swapArray16(ds, inBytes+offset, (int32_t)count, + outBytes+offset, pErrorCode); + break; + case MBCS_OUTPUT_4: + ds->swapArray32(ds, inBytes+offset, (int32_t)count, + outBytes+offset, pErrorCode); + break; + default: + /* just uint8_t[], nothing to swap */ + break; + } + + if(mbcsIndexLength!=0) { + offset+=count; + count=mbcsIndexLength; + ds->swapArray16(ds, inBytes+offset, (int32_t)count, + outBytes+offset, pErrorCode); + } + } + } + + if(extOffset!=0) { + /* swap the extension data */ + inBytes+=extOffset; + outBytes+=extOffset; + + /* swap toUTable[] */ + offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_INDEX]); + length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_LENGTH]); + ds->swapArray32(ds, inBytes+offset, length*4, outBytes+offset, pErrorCode); + + /* swap toUUChars[] */ + offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_UCHARS_INDEX]); + length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_UCHARS_LENGTH]); + ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode); + + /* swap fromUTableUChars[] */ + offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_UCHARS_INDEX]); + length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_LENGTH]); + ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode); + + /* swap fromUTableValues[] */ + offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_VALUES_INDEX]); + /* same length as for fromUTableUChars[] */ + ds->swapArray32(ds, inBytes+offset, length*4, outBytes+offset, pErrorCode); + + /* no need to swap fromUBytes[] */ + + /* swap fromUStage12[] */ + offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_12_INDEX]); + length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_12_LENGTH]); + ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode); + + /* swap fromUStage3[] */ + offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3_INDEX]); + length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3_LENGTH]); + ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode); + + /* swap fromUStage3b[] */ + offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3B_INDEX]); + length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3B_LENGTH]); + ds->swapArray32(ds, inBytes+offset, length*4, outBytes+offset, pErrorCode); + + /* swap indexes[] */ + length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_INDEXES_LENGTH]); + ds->swapArray32(ds, inBytes, length*4, outBytes, pErrorCode); + } + } + } else { + udata_printError(ds, "ucnv_swap(): unknown conversionType=%d!=UCNV_MBCS\n", + inStaticData->conversionType); + *pErrorCode=U_UNSUPPORTED_ERROR; + return 0; + } + + return headerSize+(int32_t)staticDataSize+size; +} + +#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ + +#endif diff --git a/thirdparty/icu4c/common/ucnv_bld.h b/thirdparty/icu4c/common/ucnv_bld.h new file mode 100644 index 0000000000..43e6c09ac0 --- /dev/null +++ b/thirdparty/icu4c/common/ucnv_bld.h @@ -0,0 +1,296 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 1999-2015 International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* +* +* ucnv_bld.h: +* Contains internal data structure definitions +* Created by Bertrand A. Damiba +* +* Change history: +* +* 06/29/2000 helena Major rewrite of the callback APIs. +*/ + +#ifndef UCNV_BLD_H +#define UCNV_BLD_H + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_CONVERSION + +#include "unicode/ucnv.h" +#include "unicode/ucnv_err.h" +#include "unicode/utf16.h" +#include "ucnv_cnv.h" +#include "ucnvmbcs.h" +#include "ucnv_ext.h" +#include "udataswp.h" + +/* size of the overflow buffers in UConverter, enough for escaping callbacks */ +#define UCNV_ERROR_BUFFER_LENGTH 32 + +/* at most 4 bytes per substitution character (part of .cnv file format! see UConverterStaticData) */ +#define UCNV_MAX_SUBCHAR_LEN 4 + +/* at most 8 bytes per character in toUBytes[] (UTF-8 uses up to 6) */ +#define UCNV_MAX_CHAR_LEN 8 + +/* converter options bits */ +#define UCNV_OPTION_VERSION 0xf +#define UCNV_OPTION_SWAP_LFNL 0x10 + +#define UCNV_GET_VERSION(cnv) ((cnv)->options&UCNV_OPTION_VERSION) + +U_CDECL_BEGIN /* We must declare the following as 'extern "C"' so that if ucnv + itself is compiled under C++, the linkage of the funcptrs will + work. + */ + +union UConverterTable { + UConverterMBCSTable mbcs; +}; + +typedef union UConverterTable UConverterTable; + +struct UConverterImpl; +typedef struct UConverterImpl UConverterImpl; + +/** values for the unicodeMask */ +#define UCNV_HAS_SUPPLEMENTARY 1 +#define UCNV_HAS_SURROGATES 2 + +typedef struct UConverterStaticData { /* +offset: size */ + uint32_t structSize; /* +0: 4 Size of this structure */ + + char name + [UCNV_MAX_CONVERTER_NAME_LENGTH]; /* +4: 60 internal name of the converter- invariant chars */ + + int32_t codepage; /* +64: 4 codepage # (now IBM-$codepage) */ + + int8_t platform; /* +68: 1 platform of the converter (only IBM now) */ + int8_t conversionType; /* +69: 1 conversion type */ + + int8_t minBytesPerChar; /* +70: 1 Minimum # bytes per char in this codepage */ + int8_t maxBytesPerChar; /* +71: 1 Maximum # bytes output per UChar in this codepage */ + + uint8_t subChar[UCNV_MAX_SUBCHAR_LEN]; /* +72: 4 [note: 4 and 8 byte boundary] */ + int8_t subCharLen; /* +76: 1 */ + + uint8_t hasToUnicodeFallback; /* +77: 1 UBool needs to be changed to UBool to be consistent across platform */ + uint8_t hasFromUnicodeFallback; /* +78: 1 */ + uint8_t unicodeMask; /* +79: 1 bit 0: has supplementary bit 1: has single surrogates */ + uint8_t subChar1; /* +80: 1 single-byte substitution character for IBM MBCS (0 if none) */ + uint8_t reserved[19]; /* +81: 19 to round out the structure */ + /* total size: 100 */ +} UConverterStaticData; + +/* + * Defines the UConverterSharedData struct, + * the immutable, shared part of UConverter. + */ +struct UConverterSharedData { + uint32_t structSize; /* Size of this structure */ + uint32_t referenceCounter; /* used to count number of clients, unused for static/immutable SharedData */ + + const void *dataMemory; /* from udata_openChoice() - for cleanup */ + + const UConverterStaticData *staticData; /* pointer to the static (non changing) data. */ + + UBool sharedDataCached; /* true: shared data is in cache, don't destroy on ucnv_close() if 0 ref. false: shared data isn't in the cache, do attempt to clean it up if the ref is 0 */ + /** If false, then referenceCounter is not used. Must not change after initialization. */ + UBool isReferenceCounted; + + const UConverterImpl *impl; /* vtable-style struct of mostly function pointers */ + + /*initial values of some members of the mutable part of object */ + uint32_t toUnicodeStatus; + + /* + * Shared data structures currently come in two flavors: + * - readonly for built-in algorithmic converters + * - allocated for MBCS, with a pointer to an allocated UConverterTable + * which always has a UConverterMBCSTable + * + * To eliminate one allocation, I am making the UConverterMBCSTable + * a member of the shared data. + * + * markus 2003-nov-07 + */ + UConverterMBCSTable mbcs; +}; + +/** UConverterSharedData initializer for static, non-reference-counted converters. */ +#define UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(pStaticData, pImpl) \ + { \ + sizeof(UConverterSharedData), ~((uint32_t)0), \ + NULL, pStaticData, false, false, pImpl, \ + 0, UCNV_MBCS_TABLE_INITIALIZER \ + } + +/* Defines a UConverter, the lightweight mutable part the user sees */ + +struct UConverter { + /* + * Error function pointer called when conversion issues + * occur during a ucnv_fromUnicode call + */ + void (U_EXPORT2 *fromUCharErrorBehaviour) (const void *context, + UConverterFromUnicodeArgs *args, + const UChar *codeUnits, + int32_t length, + UChar32 codePoint, + UConverterCallbackReason reason, + UErrorCode *); + /* + * Error function pointer called when conversion issues + * occur during a ucnv_toUnicode call + */ + void (U_EXPORT2 *fromCharErrorBehaviour) (const void *context, + UConverterToUnicodeArgs *args, + const char *codeUnits, + int32_t length, + UConverterCallbackReason reason, + UErrorCode *); + + /* + * Pointer to additional data that depends on the converter type. + * Used by ISO 2022, SCSU, GB 18030 converters, possibly more. + */ + void *extraInfo; + + const void *fromUContext; + const void *toUContext; + + /* + * Pointer to charset bytes for substitution string if subCharLen>0, + * or pointer to Unicode string (UChar *) if subCharLen<0. + * subCharLen==0 is equivalent to using a skip callback. + * If the pointer is !=subUChars then it is allocated with + * UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR bytes. + * The subUChars field is declared as UChar[] not uint8_t[] to + * guarantee alignment for UChars. + */ + uint8_t *subChars; + + UConverterSharedData *sharedData; /* Pointer to the shared immutable part of the converter object */ + + uint32_t options; /* options flags from UConverterOpen, may contain additional bits */ + + UBool sharedDataIsCached; /* true: shared data is in cache, don't destroy on ucnv_close() if 0 ref. false: shared data isn't in the cache, do attempt to clean it up if the ref is 0 */ + UBool isCopyLocal; /* true if UConverter is not owned and not released in ucnv_close() (stack-allocated, safeClone(), etc.) */ + UBool isExtraLocal; /* true if extraInfo is not owned and not released in ucnv_close() (stack-allocated, safeClone(), etc.) */ + + UBool useFallback; + int8_t toULength; /* number of bytes in toUBytes */ + uint8_t toUBytes[UCNV_MAX_CHAR_LEN-1];/* more "toU status"; keeps the bytes of the current character */ + uint32_t toUnicodeStatus; /* Used to internalize stream status information */ + int32_t mode; + uint32_t fromUnicodeStatus; + + /* + * More fromUnicode() status. Serves 3 purposes: + * - keeps a lead surrogate between buffers (similar to toUBytes[]) + * - keeps a lead surrogate at the end of the stream, + * which the framework handles as truncated input + * - if the fromUnicode() implementation returns to the framework + * (ucnv.c ucnv_fromUnicode()), then the framework calls the callback + * for this code point + */ + UChar32 fromUChar32; + + /* + * value for ucnv_getMaxCharSize() + * + * usually simply copied from the static data, but ucnvmbcs.c modifies + * the value depending on the converter type and options + */ + int8_t maxBytesPerUChar; + + int8_t subCharLen; /* length of the codepage specific character sequence */ + int8_t invalidCharLength; + int8_t charErrorBufferLength; /* number of valid bytes in charErrorBuffer */ + + int8_t invalidUCharLength; + int8_t UCharErrorBufferLength; /* number of valid UChars in charErrorBuffer */ + + uint8_t subChar1; /* single-byte substitution character if different from subChar */ + UBool useSubChar1; + char invalidCharBuffer[UCNV_MAX_CHAR_LEN]; /* bytes from last error/callback situation */ + uint8_t charErrorBuffer[UCNV_ERROR_BUFFER_LENGTH]; /* codepage output from Error functions */ + UChar subUChars[UCNV_MAX_SUBCHAR_LEN/U_SIZEOF_UCHAR]; /* see subChars documentation */ + + UChar invalidUCharBuffer[U16_MAX_LENGTH]; /* UChars from last error/callback situation */ + UChar UCharErrorBuffer[UCNV_ERROR_BUFFER_LENGTH]; /* unicode output from Error functions */ + + /* fields for conversion extension */ + + /* store previous UChars/chars to continue partial matches */ + UChar32 preFromUFirstCP; /* >=0: partial match */ + UChar preFromU[UCNV_EXT_MAX_UCHARS]; + char preToU[UCNV_EXT_MAX_BYTES]; + int8_t preFromULength, preToULength; /* negative: replay */ + int8_t preToUFirstLength; /* length of first character */ + + /* new fields for ICU 4.0 */ + UConverterCallbackReason toUCallbackReason; /* (*fromCharErrorBehaviour) reason, set when error is detected */ +}; + +U_CDECL_END /* end of UConverter */ + +#define CONVERTER_FILE_EXTENSION ".cnv" + + +/** + * Return the number of all converter names. + * @param pErrorCode The error code + * @return the number of all converter names + */ +U_CFUNC uint16_t +ucnv_bld_countAvailableConverters(UErrorCode *pErrorCode); + +/** + * Return the (n)th converter name in mixed case, or NULL + * if there is none (typically, if the data cannot be loaded). + * 0<=index<ucnv_io_countAvailableConverters(). + * @param n The number specifies which converter name to get + * @param pErrorCode The error code + * @return the (n)th converter name in mixed case, or NULL if there is none. + */ +U_CFUNC const char * +ucnv_bld_getAvailableConverter(uint16_t n, UErrorCode *pErrorCode); + +/** + * Load a non-algorithmic converter. + * If pkg==NULL, then this function must be called inside umtx_lock(&cnvCacheMutex). + */ +U_CAPI UConverterSharedData * +ucnv_load(UConverterLoadArgs *pArgs, UErrorCode *err); + +/** + * Unload a non-algorithmic converter. + * It must be sharedData->isReferenceCounted + * and this function must be called inside umtx_lock(&cnvCacheMutex). + */ +U_CAPI void +ucnv_unload(UConverterSharedData *sharedData); + +/** + * Swap ICU .cnv conversion tables. See udataswp.h. + * @internal + */ +U_CAPI int32_t U_EXPORT2 +ucnv_swap(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode); + +U_CAPI void U_EXPORT2 +ucnv_enableCleanup(void); + +#endif + +#endif /* _UCNV_BLD */ diff --git a/thirdparty/icu4c/common/ucnv_cb.cpp b/thirdparty/icu4c/common/ucnv_cb.cpp new file mode 100644 index 0000000000..1bb0012014 --- /dev/null +++ b/thirdparty/icu4c/common/ucnv_cb.cpp @@ -0,0 +1,261 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 2000-2006, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** + * ucnv_cb.c: + * External APIs for the ICU's codeset conversion library + * Helena Shih + * + * Modification History: + * + * Date Name Description + * 7/28/2000 srl Implementation + */ + +/** + * @name Character Conversion C API + * + */ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_CONVERSION + +#include "unicode/ucnv_cb.h" +#include "ucnv_bld.h" +#include "ucnv_cnv.h" +#include "cmemory.h" + +/* need to update the offsets when the target moves. */ +/* Note: Recursion may occur in the cb functions, be sure to update the offsets correctly +if you don't use ucnv_cbXXX functions. Make sure you don't use the same callback within +the same call stack if the complexity arises. */ +U_CAPI void U_EXPORT2 +ucnv_cbFromUWriteBytes (UConverterFromUnicodeArgs *args, + const char* source, + int32_t length, + int32_t offsetIndex, + UErrorCode * err) +{ + if(U_FAILURE(*err)) { + return; + } + + ucnv_fromUWriteBytes( + args->converter, + source, length, + &args->target, args->targetLimit, + &args->offsets, offsetIndex, + err); +} + +U_CAPI void U_EXPORT2 +ucnv_cbFromUWriteUChars(UConverterFromUnicodeArgs *args, + const UChar** source, + const UChar* sourceLimit, + int32_t offsetIndex, + UErrorCode * err) +{ + /* + This is a fun one. Recursion can occur - we're basically going to + just retry shoving data through the same converter. Note, if you got + here through some kind of invalid sequence, you maybe should emit a + reset sequence of some kind and/or call ucnv_reset(). Since this + IS an actual conversion, take care that you've changed the callback + or the data, or you'll get an infinite loop. + + Please set the err value to something reasonable before calling + into this. + */ + + char *oldTarget; + + if(U_FAILURE(*err)) + { + return; + } + + oldTarget = args->target; + + ucnv_fromUnicode(args->converter, + &args->target, + args->targetLimit, + source, + sourceLimit, + NULL, /* no offsets */ + FALSE, /* no flush */ + err); + + if(args->offsets) + { + while (args->target != oldTarget) /* if it moved at all.. */ + { + *(args->offsets)++ = offsetIndex; + oldTarget++; + } + } + + /* + Note, if you did something like used a Stop subcallback, things would get interesting. + In fact, here's where we want to return the partially consumed in-source! + */ + if(*err == U_BUFFER_OVERFLOW_ERROR) + /* && (*source < sourceLimit && args->target >= args->targetLimit) + -- S. Hrcek */ + { + /* Overflowed the target. Now, we'll write into the charErrorBuffer. + It's a fixed size. If we overflow it... Hmm */ + char *newTarget; + const char *newTargetLimit; + UErrorCode err2 = U_ZERO_ERROR; + + int8_t errBuffLen; + + errBuffLen = args->converter->charErrorBufferLength; + + /* start the new target at the first free slot in the errbuff.. */ + newTarget = (char *)(args->converter->charErrorBuffer + errBuffLen); + + newTargetLimit = (char *)(args->converter->charErrorBuffer + + sizeof(args->converter->charErrorBuffer)); + + if(newTarget >= newTargetLimit) + { + *err = U_INTERNAL_PROGRAM_ERROR; + return; + } + + /* We're going to tell the converter that the errbuff len is empty. + This prevents the existing errbuff from being 'flushed' out onto + itself. If the errbuff is needed by the converter this time, + we're hosed - we're out of space! */ + + args->converter->charErrorBufferLength = 0; + + ucnv_fromUnicode(args->converter, + &newTarget, + newTargetLimit, + source, + sourceLimit, + NULL, + FALSE, + &err2); + + /* We can go ahead and overwrite the length here. We know just how + to recalculate it. */ + + args->converter->charErrorBufferLength = (int8_t)( + newTarget - (char*)args->converter->charErrorBuffer); + + if((newTarget >= newTargetLimit) || (err2 == U_BUFFER_OVERFLOW_ERROR)) + { + /* now we're REALLY in trouble. + Internal program error - callback shouldn't have written this much + data! + */ + *err = U_INTERNAL_PROGRAM_ERROR; + return; + } + /*else {*/ + /* sub errs could be invalid/truncated/illegal chars or w/e. + These might want to be passed on up.. But the problem is, we already + need to pass U_BUFFER_OVERFLOW_ERROR. That has to override these + other errs.. */ + + /* + if(U_FAILURE(err2)) + ?? + */ + /*}*/ + } +} + +U_CAPI void U_EXPORT2 +ucnv_cbFromUWriteSub (UConverterFromUnicodeArgs *args, + int32_t offsetIndex, + UErrorCode * err) +{ + UConverter *converter; + int32_t length; + + if(U_FAILURE(*err)) { + return; + } + converter = args->converter; + length = converter->subCharLen; + + if(length == 0) { + return; + } + + if(length < 0) { + /* + * Write/convert the substitution string. Its real length is -length. + * Unlike the escape callback, we need not change the converter's + * callback function because ucnv_setSubstString() verified that + * the string can be converted, so we will not get a conversion error + * and will not recurse. + * At worst we should get a U_BUFFER_OVERFLOW_ERROR. + */ + const UChar *source = (const UChar *)converter->subChars; + ucnv_cbFromUWriteUChars(args, &source, source - length, offsetIndex, err); + return; + } + + if(converter->sharedData->impl->writeSub!=NULL) { + converter->sharedData->impl->writeSub(args, offsetIndex, err); + } + else if(converter->subChar1!=0 && (uint16_t)converter->invalidUCharBuffer[0]<=(uint16_t)0xffu) { + /* + TODO: Is this untestable because the MBCS converter has a writeSub function to call + and the other converters don't use subChar1? + */ + ucnv_cbFromUWriteBytes(args, + (const char *)&converter->subChar1, 1, + offsetIndex, err); + } + else { + ucnv_cbFromUWriteBytes(args, + (const char *)converter->subChars, length, + offsetIndex, err); + } +} + +U_CAPI void U_EXPORT2 +ucnv_cbToUWriteUChars (UConverterToUnicodeArgs *args, + const UChar* source, + int32_t length, + int32_t offsetIndex, + UErrorCode * err) +{ + if(U_FAILURE(*err)) { + return; + } + + ucnv_toUWriteUChars( + args->converter, + source, length, + &args->target, args->targetLimit, + &args->offsets, offsetIndex, + err); +} + +U_CAPI void U_EXPORT2 +ucnv_cbToUWriteSub (UConverterToUnicodeArgs *args, + int32_t offsetIndex, + UErrorCode * err) +{ + static const UChar kSubstituteChar1 = 0x1A, kSubstituteChar = 0xFFFD; + + /* could optimize this case, just one uchar */ + if(args->converter->invalidCharLength == 1 && args->converter->subChar1 != 0) { + ucnv_cbToUWriteUChars(args, &kSubstituteChar1, 1, offsetIndex, err); + } else { + ucnv_cbToUWriteUChars(args, &kSubstituteChar, 1, offsetIndex, err); + } +} + +#endif diff --git a/thirdparty/icu4c/common/ucnv_cnv.cpp b/thirdparty/icu4c/common/ucnv_cnv.cpp new file mode 100644 index 0000000000..ea71acf92c --- /dev/null +++ b/thirdparty/icu4c/common/ucnv_cnv.cpp @@ -0,0 +1,182 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1999-2004, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* +* uconv_cnv.c: +* Implements all the low level conversion functions +* T_UnicodeConverter_{to,from}Unicode_$ConversionType +* +* Change history: +* +* 06/29/2000 helena Major rewrite of the callback APIs. +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_CONVERSION + +#include "unicode/ucnv_err.h" +#include "unicode/ucnv.h" +#include "unicode/uset.h" +#include "ucnv_cnv.h" +#include "ucnv_bld.h" +#include "cmemory.h" + +U_CFUNC void +ucnv_getCompleteUnicodeSet(const UConverter *cnv, + const USetAdder *sa, + UConverterUnicodeSet which, + UErrorCode *pErrorCode) { + (void)cnv; + (void)which; + (void)pErrorCode; + sa->addRange(sa->set, 0, 0x10ffff); +} + +U_CFUNC void +ucnv_getNonSurrogateUnicodeSet(const UConverter *cnv, + const USetAdder *sa, + UConverterUnicodeSet which, + UErrorCode *pErrorCode) { + (void)cnv; + (void)which; + (void)pErrorCode; + sa->addRange(sa->set, 0, 0xd7ff); + sa->addRange(sa->set, 0xe000, 0x10ffff); +} + +U_CFUNC void +ucnv_fromUWriteBytes(UConverter *cnv, + const char *bytes, int32_t length, + char **target, const char *targetLimit, + int32_t **offsets, + int32_t sourceIndex, + UErrorCode *pErrorCode) { + char *t=*target; + int32_t *o; + + /* write bytes */ + if(offsets==NULL || (o=*offsets)==NULL) { + while(length>0 && t<targetLimit) { + *t++=*bytes++; + --length; + } + } else { + /* output with offsets */ + while(length>0 && t<targetLimit) { + *t++=*bytes++; + *o++=sourceIndex; + --length; + } + *offsets=o; + } + *target=t; + + /* write overflow */ + if(length>0) { + if(cnv!=NULL) { + t=(char *)cnv->charErrorBuffer; + cnv->charErrorBufferLength=(int8_t)length; + do { + *t++=(uint8_t)*bytes++; + } while(--length>0); + } + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } +} + +U_CFUNC void +ucnv_toUWriteUChars(UConverter *cnv, + const UChar *uchars, int32_t length, + UChar **target, const UChar *targetLimit, + int32_t **offsets, + int32_t sourceIndex, + UErrorCode *pErrorCode) { + UChar *t=*target; + int32_t *o; + + /* write UChars */ + if(offsets==NULL || (o=*offsets)==NULL) { + while(length>0 && t<targetLimit) { + *t++=*uchars++; + --length; + } + } else { + /* output with offsets */ + while(length>0 && t<targetLimit) { + *t++=*uchars++; + *o++=sourceIndex; + --length; + } + *offsets=o; + } + *target=t; + + /* write overflow */ + if(length>0) { + if(cnv!=NULL) { + t=cnv->UCharErrorBuffer; + cnv->UCharErrorBufferLength=(int8_t)length; + do { + *t++=*uchars++; + } while(--length>0); + } + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } +} + +U_CFUNC void +ucnv_toUWriteCodePoint(UConverter *cnv, + UChar32 c, + UChar **target, const UChar *targetLimit, + int32_t **offsets, + int32_t sourceIndex, + UErrorCode *pErrorCode) { + UChar *t; + int32_t *o; + + t=*target; + + if(t<targetLimit) { + if(c<=0xffff) { + *t++=(UChar)c; + c=U_SENTINEL; + } else /* c is a supplementary code point */ { + *t++=U16_LEAD(c); + c=U16_TRAIL(c); + if(t<targetLimit) { + *t++=(UChar)c; + c=U_SENTINEL; + } + } + + /* write offsets */ + if(offsets!=NULL && (o=*offsets)!=NULL) { + *o++=sourceIndex; + if((*target+1)<t) { + *o++=sourceIndex; + } + *offsets=o; + } + } + + *target=t; + + /* write overflow from c */ + if(c>=0) { + if(cnv!=NULL) { + int8_t i=0; + U16_APPEND_UNSAFE(cnv->UCharErrorBuffer, i, c); + cnv->UCharErrorBufferLength=i; + } + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } +} + +#endif diff --git a/thirdparty/icu4c/common/ucnv_cnv.h b/thirdparty/icu4c/common/ucnv_cnv.h new file mode 100644 index 0000000000..59be8bdb37 --- /dev/null +++ b/thirdparty/icu4c/common/ucnv_cnv.h @@ -0,0 +1,323 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 1999-2011, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* +* ucnv_cnv.h: +* Definitions for converter implementations. +* +* Modification History: +* +* Date Name Description +* 05/09/00 helena Added implementation to handle fallback mappings. +* 06/29/2000 helena Major rewrite of the callback APIs. +*/ + +#ifndef UCNV_CNV_H +#define UCNV_CNV_H + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_CONVERSION + +#include "unicode/ucnv.h" +#include "unicode/ucnv_err.h" +#include "unicode/uset.h" +#include "uset_imp.h" + +U_CDECL_BEGIN + +/* this is used in fromUnicode DBCS tables as an "unassigned" marker */ +#define missingCharMarker 0xFFFF + +/* + * #define missingUCharMarker 0xfffe + * + * commented out because there are actually two values used in toUnicode tables: + * U+fffe "unassigned" + * U+ffff "illegal" + */ + +/** Forward declaration, see ucnv_bld.h */ +struct UConverterSharedData; +typedef struct UConverterSharedData UConverterSharedData; + +/* function types for UConverterImpl ---------------------------------------- */ + +/* struct with arguments for UConverterLoad and ucnv_load() */ +typedef struct { + int32_t size; /* sizeof(UConverterLoadArgs) */ + int32_t nestedLoads; /* count nested ucnv_load() calls */ + UBool onlyTestIsLoadable; /* input: don't actually load */ + UBool reserved0; /* reserved - for good alignment of the pointers */ + int16_t reserved; /* reserved - for good alignment of the pointers */ + uint32_t options; + const char *pkg, *name, *locale; +} UConverterLoadArgs; + +#define UCNV_LOAD_ARGS_INITIALIZER \ + { (int32_t)sizeof(UConverterLoadArgs), 0, false, false, 0, 0, NULL, NULL, NULL } + +typedef void (*UConverterLoad) (UConverterSharedData *sharedData, + UConverterLoadArgs *pArgs, + const uint8_t *raw, UErrorCode *pErrorCode); +typedef void (*UConverterUnload) (UConverterSharedData *sharedData); + +typedef void (*UConverterOpen) (UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *pErrorCode); +typedef void (*UConverterClose) (UConverter *cnv); + +typedef enum UConverterResetChoice { + UCNV_RESET_BOTH, + UCNV_RESET_TO_UNICODE, + UCNV_RESET_FROM_UNICODE +} UConverterResetChoice; + +typedef void (*UConverterReset) (UConverter *cnv, UConverterResetChoice choice); + +/* + * Converter implementation function(s) for ucnv_toUnicode(). + * If the toUnicodeWithOffsets function pointer is NULL, + * then the toUnicode function will be used and the offsets will be set to -1. + * + * Must maintain state across buffers. Use toUBytes[toULength] for partial input + * sequences; it will be checked in ucnv.c at the end of the input stream + * to detect truncated input. + * Some converters may need additional detection and may then set U_TRUNCATED_CHAR_FOUND. + * + * The toUnicodeWithOffsets must write exactly as many offset values as target + * units. Write offset values of -1 for when the source index corresponding to + * the output unit is not known (e.g., the character started in an earlier buffer). + * The pArgs->offsets pointer need not be moved forward. + * + * At function return, either one of the following conditions must be true: + * - U_BUFFER_OVERFLOW_ERROR and the target is full: target==targetLimit + * - another error code with toUBytes[toULength] set to the offending input + * - no error, and the source is consumed: source==sourceLimit + * + * The ucnv.c code will handle the end of the input (reset) + * (reset, and truncation detection) and callbacks. + */ +typedef void (*UConverterToUnicode) (UConverterToUnicodeArgs *, UErrorCode *); + +/* + * Same rules as for UConverterToUnicode. + * A lead surrogate is kept in fromUChar32 across buffers, and if an error + * occurs, then the offending input code point must be put into fromUChar32 + * as well. + */ +typedef void (*UConverterFromUnicode) (UConverterFromUnicodeArgs *, UErrorCode *); + +/* + * Converter implementation function for ucnv_convertEx(), for direct conversion + * between two charsets without pivoting through UTF-16. + * The rules are the same as for UConverterToUnicode and UConverterFromUnicode. + * In addition, + * - The toUnicode side must behave and keep state exactly like the + * UConverterToUnicode implementation for the same source charset. + * - A U_USING_DEFAULT_WARNING can be set to request to temporarily fall back + * to pivoting. When this function is called, the conversion framework makes + * sure that this warning is not set on input. + * - Continuing a partial match and flushing the toUnicode replay buffer + * are handled by pivoting, using the toUnicode and fromUnicode functions. + */ +typedef void (*UConverterConvert) (UConverterFromUnicodeArgs *pFromUArgs, + UConverterToUnicodeArgs *pToUArgs, + UErrorCode *pErrorCode); + +/* + * Converter implementation function for ucnv_getNextUChar(). + * If the function pointer is NULL, then the toUnicode function will be used. + * + * Will be called at a character boundary (toULength==0). + * May return with + * - U_INDEX_OUTOFBOUNDS_ERROR if there was no output for the input + * (the return value will be ignored) + * - U_TRUNCATED_CHAR_FOUND or another error code (never U_BUFFER_OVERFLOW_ERROR!) + * with toUBytes[toULength] set to the offending input + * (the return value will be ignored) + * - return UCNV_GET_NEXT_UCHAR_USE_TO_U, without moving the source pointer, + * to indicate that the ucnv.c code shall call the toUnicode function instead + * - return a real code point result + * + * Unless UCNV_GET_NEXT_UCHAR_USE_TO_U is returned, the source bytes must be consumed. + * + * The ucnv.c code will handle the end of the input (reset) + * (except for truncation detection!) and callbacks. + */ +typedef UChar32 (*UConverterGetNextUChar) (UConverterToUnicodeArgs *, UErrorCode *); + +typedef void (*UConverterGetStarters)(const UConverter* converter, + UBool starters[256], + UErrorCode *pErrorCode); + +/* If this function pointer is null or if the function returns null + * the name field in static data struct should be returned by + * ucnv_getName() API function + */ +typedef const char * (*UConverterGetName) (const UConverter *cnv); + +/** + * Write the codepage substitution character. + * If this function is not set, then ucnv_cbFromUWriteSub() writes + * the substitution character from UConverter. + * For stateful converters, it is typically necessary to handle this + * specificially for the converter in order to properly maintain the state. + */ +typedef void (*UConverterWriteSub) (UConverterFromUnicodeArgs *pArgs, int32_t offsetIndex, UErrorCode *pErrorCode); + +/** + * For converter-specific safeClone processing + * If this function is not set, then ucnv_safeClone assumes that the converter has no private data that changes + * after the converter is done opening. + * If this function is set, then it is called just after a memcpy() of + * converter data to the new, empty converter, and is expected to set up + * the initial state of the converter. It is not expected to increment the + * reference counts of the standard data types such as the shared data. + */ +typedef UConverter * (*UConverterSafeClone) (const UConverter *cnv, + void *stackBuffer, + int32_t *pBufferSize, + UErrorCode *status); + +/** + * Filters for some ucnv_getUnicodeSet() implementation code. + */ +typedef enum UConverterSetFilter { + UCNV_SET_FILTER_NONE, + UCNV_SET_FILTER_DBCS_ONLY, + UCNV_SET_FILTER_2022_CN, + UCNV_SET_FILTER_SJIS, + UCNV_SET_FILTER_GR94DBCS, + UCNV_SET_FILTER_HZ, + UCNV_SET_FILTER_COUNT +} UConverterSetFilter; + +/** + * Fills the set of Unicode code points that can be converted by an ICU converter. + * The API function ucnv_getUnicodeSet() clears the USet before calling + * the converter's getUnicodeSet() implementation; the converter should only + * add the appropriate code points to allow recursive use. + * For example, the ISO-2022-JP converter will call each subconverter's + * getUnicodeSet() implementation to consecutively add code points to + * the same USet, which will result in a union of the sets of all subconverters. + * + * For more documentation, see ucnv_getUnicodeSet() in ucnv.h. + */ +typedef void (*UConverterGetUnicodeSet) (const UConverter *cnv, + const USetAdder *sa, + UConverterUnicodeSet which, + UErrorCode *pErrorCode); + +UBool CONVERSION_U_SUCCESS (UErrorCode err); + +/** + * UConverterImpl contains all the data and functions for a converter type. + * Its function pointers work much like a C++ vtable. + * Many converter types need to define only a subset of the functions; + * when a function pointer is NULL, then a default action will be performed. + * + * Every converter type must implement toUnicode, fromUnicode, and getNextUChar, + * otherwise the converter may crash. + * Every converter type that has variable-length codepage sequences should + * also implement toUnicodeWithOffsets and fromUnicodeWithOffsets for + * correct offset handling. + * All other functions may or may not be implemented - it depends only on + * whether the converter type needs them. + * + * When open() fails, then close() will be called, if present. + */ +struct UConverterImpl { + UConverterType type; + + UConverterLoad load; + UConverterUnload unload; + + UConverterOpen open; + UConverterClose close; + UConverterReset reset; + + UConverterToUnicode toUnicode; + UConverterToUnicode toUnicodeWithOffsets; + UConverterFromUnicode fromUnicode; + UConverterFromUnicode fromUnicodeWithOffsets; + UConverterGetNextUChar getNextUChar; + + UConverterGetStarters getStarters; + UConverterGetName getName; + UConverterWriteSub writeSub; + UConverterSafeClone safeClone; + UConverterGetUnicodeSet getUnicodeSet; + + UConverterConvert toUTF8; + UConverterConvert fromUTF8; +}; + +extern const UConverterSharedData + _MBCSData, _Latin1Data, + _UTF8Data, _UTF16BEData, _UTF16LEData, _UTF32BEData, _UTF32LEData, + _ISO2022Data, + _LMBCSData1,_LMBCSData2, _LMBCSData3, _LMBCSData4, _LMBCSData5, _LMBCSData6, + _LMBCSData8,_LMBCSData11,_LMBCSData16,_LMBCSData17,_LMBCSData18,_LMBCSData19, + _HZData,_ISCIIData, _SCSUData, _ASCIIData, + _UTF7Data, _Bocu1Data, _UTF16Data, _UTF32Data, _CESU8Data, _IMAPData, _CompoundTextData; + +U_CDECL_END + +/** Always use fallbacks from codepage to Unicode */ +#define TO_U_USE_FALLBACK(useFallback) true +#define UCNV_TO_U_USE_FALLBACK(cnv) true + +/** Use fallbacks from Unicode to codepage when cnv->useFallback or for private-use code points */ +#define IS_PRIVATE_USE(c) ((uint32_t)((c)-0xe000)<0x1900 || (uint32_t)((c)-0xf0000)<0x20000) +#define FROM_U_USE_FALLBACK(useFallback, c) ((useFallback) || IS_PRIVATE_USE(c)) +#define UCNV_FROM_U_USE_FALLBACK(cnv, c) FROM_U_USE_FALLBACK((cnv)->useFallback, c) + +/** + * Magic number for ucnv_getNextUChar(), returned by a + * getNextUChar() implementation to indicate to use the converter's toUnicode() + * instead of the native function. + * @internal + */ +#define UCNV_GET_NEXT_UCHAR_USE_TO_U -9 + +U_CFUNC void +ucnv_getCompleteUnicodeSet(const UConverter *cnv, + const USetAdder *sa, + UConverterUnicodeSet which, + UErrorCode *pErrorCode); + +U_CFUNC void +ucnv_getNonSurrogateUnicodeSet(const UConverter *cnv, + const USetAdder *sa, + UConverterUnicodeSet which, + UErrorCode *pErrorCode); + +U_CFUNC void +ucnv_fromUWriteBytes(UConverter *cnv, + const char *bytes, int32_t length, + char **target, const char *targetLimit, + int32_t **offsets, + int32_t sourceIndex, + UErrorCode *pErrorCode); +U_CFUNC void +ucnv_toUWriteUChars(UConverter *cnv, + const UChar *uchars, int32_t length, + UChar **target, const UChar *targetLimit, + int32_t **offsets, + int32_t sourceIndex, + UErrorCode *pErrorCode); + +U_CFUNC void +ucnv_toUWriteCodePoint(UConverter *cnv, + UChar32 c, + UChar **target, const UChar *targetLimit, + int32_t **offsets, + int32_t sourceIndex, + UErrorCode *pErrorCode); + +#endif + +#endif /* UCNV_CNV */ diff --git a/thirdparty/icu4c/common/ucnv_ct.cpp b/thirdparty/icu4c/common/ucnv_ct.cpp new file mode 100644 index 0000000000..b40e1b2c97 --- /dev/null +++ b/thirdparty/icu4c/common/ucnv_ct.cpp @@ -0,0 +1,646 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 2010-2015, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* file name: ucnv_ct.c +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2010Dec09 +* created by: Michael Ow +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION + +#include "unicode/ucnv.h" +#include "unicode/uset.h" +#include "unicode/ucnv_err.h" +#include "unicode/ucnv_cb.h" +#include "unicode/utf16.h" +#include "ucnv_imp.h" +#include "ucnv_bld.h" +#include "ucnv_cnv.h" +#include "ucnvmbcs.h" +#include "cstring.h" +#include "cmemory.h" + +typedef enum { + INVALID = -2, + DO_SEARCH = -1, + + COMPOUND_TEXT_SINGLE_0 = 0, + COMPOUND_TEXT_SINGLE_1 = 1, + COMPOUND_TEXT_SINGLE_2 = 2, + COMPOUND_TEXT_SINGLE_3 = 3, + + COMPOUND_TEXT_DOUBLE_1 = 4, + COMPOUND_TEXT_DOUBLE_2 = 5, + COMPOUND_TEXT_DOUBLE_3 = 6, + COMPOUND_TEXT_DOUBLE_4 = 7, + COMPOUND_TEXT_DOUBLE_5 = 8, + COMPOUND_TEXT_DOUBLE_6 = 9, + COMPOUND_TEXT_DOUBLE_7 = 10, + + COMPOUND_TEXT_TRIPLE_DOUBLE = 11, + + IBM_915 = 12, + IBM_916 = 13, + IBM_914 = 14, + IBM_874 = 15, + IBM_912 = 16, + IBM_913 = 17, + ISO_8859_14 = 18, + IBM_923 = 19, + NUM_OF_CONVERTERS = 20 +} COMPOUND_TEXT_CONVERTERS; + +#define SEARCH_LENGTH 12 + +static const uint8_t escSeqCompoundText[NUM_OF_CONVERTERS][5] = { + /* Single */ + { 0x1B, 0x2D, 0x41, 0, 0 }, + { 0x1B, 0x2D, 0x4D, 0, 0 }, + { 0x1B, 0x2D, 0x46, 0, 0 }, + { 0x1B, 0x2D, 0x47, 0, 0 }, + + /* Double */ + { 0x1B, 0x24, 0x29, 0x41, 0 }, + { 0x1B, 0x24, 0x29, 0x42, 0 }, + { 0x1B, 0x24, 0x29, 0x43, 0 }, + { 0x1B, 0x24, 0x29, 0x44, 0 }, + { 0x1B, 0x24, 0x29, 0x47, 0 }, + { 0x1B, 0x24, 0x29, 0x48, 0 }, + { 0x1B, 0x24, 0x29, 0x49, 0 }, + + /* Triple/Double */ + { 0x1B, 0x25, 0x47, 0, 0 }, + + /*IBM-915*/ + { 0x1B, 0x2D, 0x4C, 0, 0 }, + /*IBM-916*/ + { 0x1B, 0x2D, 0x48, 0, 0 }, + /*IBM-914*/ + { 0x1B, 0x2D, 0x44, 0, 0 }, + /*IBM-874*/ + { 0x1B, 0x2D, 0x54, 0, 0 }, + /*IBM-912*/ + { 0x1B, 0x2D, 0x42, 0, 0 }, + /* IBM-913 */ + { 0x1B, 0x2D, 0x43, 0, 0 }, + /* ISO-8859_14 */ + { 0x1B, 0x2D, 0x5F, 0, 0 }, + /* IBM-923 */ + { 0x1B, 0x2D, 0x62, 0, 0 }, +}; + +#define ESC_START 0x1B + +#define isASCIIRange(codepoint) \ + ((codepoint == 0x0000) || (codepoint == 0x0009) || (codepoint == 0x000A) || \ + (codepoint >= 0x0020 && codepoint <= 0x007f) || (codepoint >= 0x00A0 && codepoint <= 0x00FF)) + +#define isIBM915(codepoint) \ + ((codepoint >= 0x0401 && codepoint <= 0x045F) || (codepoint == 0x2116)) + +#define isIBM916(codepoint) \ + ((codepoint >= 0x05D0 && codepoint <= 0x05EA) || (codepoint == 0x2017) || (codepoint == 0x203E)) + +#define isCompoundS3(codepoint) \ + ((codepoint == 0x060C) || (codepoint == 0x061B) || (codepoint == 0x061F) || (codepoint >= 0x0621 && codepoint <= 0x063A) || \ + (codepoint >= 0x0640 && codepoint <= 0x0652) || (codepoint >= 0x0660 && codepoint <= 0x066D) || (codepoint == 0x200B) || \ + (codepoint >= 0x0FE70 && codepoint <= 0x0FE72) || (codepoint == 0x0FE74) || (codepoint >= 0x0FE76 && codepoint <= 0x0FEBE)) + +#define isCompoundS2(codepoint) \ + ((codepoint == 0x02BC) || (codepoint == 0x02BD) || (codepoint >= 0x0384 && codepoint <= 0x03CE) || (codepoint == 0x2015)) + +#define isIBM914(codepoint) \ + ((codepoint == 0x0100) || (codepoint == 0x0101) || (codepoint == 0x0112) || (codepoint == 0x0113) || (codepoint == 0x0116) || (codepoint == 0x0117) || \ + (codepoint == 0x0122) || (codepoint == 0x0123) || (codepoint >= 0x0128 && codepoint <= 0x012B) || (codepoint == 0x012E) || (codepoint == 0x012F) || \ + (codepoint >= 0x0136 && codepoint <= 0x0138) || (codepoint == 0x013B) || (codepoint == 0x013C) || (codepoint == 0x0145) || (codepoint == 0x0146) || \ + (codepoint >= 0x014A && codepoint <= 0x014D) || (codepoint == 0x0156) || (codepoint == 0x0157) || (codepoint >= 0x0166 && codepoint <= 0x016B) || \ + (codepoint == 0x0172) || (codepoint == 0x0173)) + +#define isIBM874(codepoint) \ + ((codepoint >= 0x0E01 && codepoint <= 0x0E3A) || (codepoint >= 0x0E3F && codepoint <= 0x0E5B)) + +#define isIBM912(codepoint) \ + ((codepoint >= 0x0102 && codepoint <= 0x0107) || (codepoint >= 0x010C && codepoint <= 0x0111) || (codepoint >= 0x0118 && codepoint <= 0x011B) || \ + (codepoint == 0x0139) || (codepoint == 0x013A) || (codepoint == 0x013D) || (codepoint == 0x013E) || (codepoint >= 0x0141 && codepoint <= 0x0144) || \ + (codepoint == 0x0147) || (codepoint == 0x0147) || (codepoint == 0x0150) || (codepoint == 0x0151) || (codepoint == 0x0154) || (codepoint == 0x0155) || \ + (codepoint >= 0x0158 && codepoint <= 0x015B) || (codepoint == 0x015E) || (codepoint == 0x015F) || (codepoint >= 0x0160 && codepoint <= 0x0165) || \ + (codepoint == 0x016E) || (codepoint == 0x016F) || (codepoint == 0x0170) || (codepoint == 0x0171) || (codepoint >= 0x0179 && codepoint <= 0x017E) || \ + (codepoint == 0x02C7) || (codepoint == 0x02D8) || (codepoint == 0x02D9) || (codepoint == 0x02DB) || (codepoint == 0x02DD)) + +#define isIBM913(codepoint) \ + ((codepoint >= 0x0108 && codepoint <= 0x010B) || (codepoint == 0x011C) || \ + (codepoint == 0x011D) || (codepoint == 0x0120) || (codepoint == 0x0121) || \ + (codepoint >= 0x0124 && codepoint <= 0x0127) || (codepoint == 0x0134) || (codepoint == 0x0135) || \ + (codepoint == 0x015C) || (codepoint == 0x015D) || (codepoint == 0x016C) || (codepoint == 0x016D)) + +#define isCompoundS1(codepoint) \ + ((codepoint == 0x011E) || (codepoint == 0x011F) || (codepoint == 0x0130) || \ + (codepoint == 0x0131) || (codepoint >= 0x0218 && codepoint <= 0x021B)) + +#define isISO8859_14(codepoint) \ + ((codepoint >= 0x0174 && codepoint <= 0x0177) || (codepoint == 0x1E0A) || \ + (codepoint == 0x1E0B) || (codepoint == 0x1E1E) || (codepoint == 0x1E1F) || \ + (codepoint == 0x1E40) || (codepoint == 0x1E41) || (codepoint == 0x1E56) || \ + (codepoint == 0x1E57) || (codepoint == 0x1E60) || (codepoint == 0x1E61) || \ + (codepoint == 0x1E6A) || (codepoint == 0x1E6B) || (codepoint == 0x1EF2) || \ + (codepoint == 0x1EF3) || (codepoint >= 0x1E80 && codepoint <= 0x1E85)) + +#define isIBM923(codepoint) \ + ((codepoint == 0x0152) || (codepoint == 0x0153) || (codepoint == 0x0178) || (codepoint == 0x20AC)) + + +typedef struct{ + UConverterSharedData *myConverterArray[NUM_OF_CONVERTERS]; + COMPOUND_TEXT_CONVERTERS state; +} UConverterDataCompoundText; + +/*********** Compound Text Converter Protos ***********/ +U_CDECL_BEGIN +static void U_CALLCONV +_CompoundTextOpen(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode); + +static void U_CALLCONV + _CompoundTextClose(UConverter *converter); + +static void U_CALLCONV +_CompoundTextReset(UConverter *converter, UConverterResetChoice choice); + +static const char* U_CALLCONV +_CompoundTextgetName(const UConverter* cnv); + + +static int32_t findNextEsc(const char *source, const char *sourceLimit) { + int32_t length = static_cast<int32_t>(sourceLimit - source); + int32_t i; + for (i = 1; i < length; i++) { + if (*(source + i) == 0x1B) { + return i; + } + } + + return length; +} + +static COMPOUND_TEXT_CONVERTERS getState(int codepoint) { + COMPOUND_TEXT_CONVERTERS state = DO_SEARCH; + + if (isASCIIRange(codepoint)) { + state = COMPOUND_TEXT_SINGLE_0; + } else if (isIBM912(codepoint)) { + state = IBM_912; + }else if (isIBM913(codepoint)) { + state = IBM_913; + } else if (isISO8859_14(codepoint)) { + state = ISO_8859_14; + } else if (isIBM923(codepoint)) { + state = IBM_923; + } else if (isIBM874(codepoint)) { + state = IBM_874; + } else if (isIBM914(codepoint)) { + state = IBM_914; + } else if (isCompoundS2(codepoint)) { + state = COMPOUND_TEXT_SINGLE_2; + } else if (isCompoundS3(codepoint)) { + state = COMPOUND_TEXT_SINGLE_3; + } else if (isIBM916(codepoint)) { + state = IBM_916; + } else if (isIBM915(codepoint)) { + state = IBM_915; + } else if (isCompoundS1(codepoint)) { + state = COMPOUND_TEXT_SINGLE_1; + } + + return state; +} + +static COMPOUND_TEXT_CONVERTERS findStateFromEscSeq(const char* source, const char* sourceLimit, const uint8_t* toUBytesBuffer, int32_t toUBytesBufferLength, UErrorCode *err) { + COMPOUND_TEXT_CONVERTERS state = INVALID; + UBool matchFound = FALSE; + int32_t i, n, offset = toUBytesBufferLength; + + for (i = 0; i < NUM_OF_CONVERTERS; i++) { + matchFound = TRUE; + for (n = 0; escSeqCompoundText[i][n] != 0; n++) { + if (n < toUBytesBufferLength) { + if (toUBytesBuffer[n] != escSeqCompoundText[i][n]) { + matchFound = FALSE; + break; + } + } else if ((source + (n - offset)) >= sourceLimit) { + *err = U_TRUNCATED_CHAR_FOUND; + matchFound = FALSE; + break; + } else if (*(source + (n - offset)) != escSeqCompoundText[i][n]) { + matchFound = FALSE; + break; + } + } + + if (matchFound) { + break; + } + } + + if (matchFound) { + state = (COMPOUND_TEXT_CONVERTERS)i; + } + + return state; +} + +static void U_CALLCONV +_CompoundTextOpen(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){ + cnv->extraInfo = uprv_malloc (sizeof (UConverterDataCompoundText)); + if (cnv->extraInfo != NULL) { + UConverterDataCompoundText *myConverterData = (UConverterDataCompoundText *) cnv->extraInfo; + + UConverterNamePieces stackPieces; + UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; + + myConverterData->myConverterArray[COMPOUND_TEXT_SINGLE_0] = NULL; + myConverterData->myConverterArray[COMPOUND_TEXT_SINGLE_1] = ucnv_loadSharedData("icu-internal-compound-s1", &stackPieces, &stackArgs, errorCode); + myConverterData->myConverterArray[COMPOUND_TEXT_SINGLE_2] = ucnv_loadSharedData("icu-internal-compound-s2", &stackPieces, &stackArgs, errorCode); + myConverterData->myConverterArray[COMPOUND_TEXT_SINGLE_3] = ucnv_loadSharedData("icu-internal-compound-s3", &stackPieces, &stackArgs, errorCode); + myConverterData->myConverterArray[COMPOUND_TEXT_DOUBLE_1] = ucnv_loadSharedData("icu-internal-compound-d1", &stackPieces, &stackArgs, errorCode); + myConverterData->myConverterArray[COMPOUND_TEXT_DOUBLE_2] = ucnv_loadSharedData("icu-internal-compound-d2", &stackPieces, &stackArgs, errorCode); + myConverterData->myConverterArray[COMPOUND_TEXT_DOUBLE_3] = ucnv_loadSharedData("icu-internal-compound-d3", &stackPieces, &stackArgs, errorCode); + myConverterData->myConverterArray[COMPOUND_TEXT_DOUBLE_4] = ucnv_loadSharedData("icu-internal-compound-d4", &stackPieces, &stackArgs, errorCode); + myConverterData->myConverterArray[COMPOUND_TEXT_DOUBLE_5] = ucnv_loadSharedData("icu-internal-compound-d5", &stackPieces, &stackArgs, errorCode); + myConverterData->myConverterArray[COMPOUND_TEXT_DOUBLE_6] = ucnv_loadSharedData("icu-internal-compound-d6", &stackPieces, &stackArgs, errorCode); + myConverterData->myConverterArray[COMPOUND_TEXT_DOUBLE_7] = ucnv_loadSharedData("icu-internal-compound-d7", &stackPieces, &stackArgs, errorCode); + myConverterData->myConverterArray[COMPOUND_TEXT_TRIPLE_DOUBLE] = ucnv_loadSharedData("icu-internal-compound-t", &stackPieces, &stackArgs, errorCode); + + myConverterData->myConverterArray[IBM_915] = ucnv_loadSharedData("ibm-915_P100-1995", &stackPieces, &stackArgs, errorCode); + myConverterData->myConverterArray[IBM_916] = ucnv_loadSharedData("ibm-916_P100-1995", &stackPieces, &stackArgs, errorCode); + myConverterData->myConverterArray[IBM_914] = ucnv_loadSharedData("ibm-914_P100-1995", &stackPieces, &stackArgs, errorCode); + myConverterData->myConverterArray[IBM_874] = ucnv_loadSharedData("ibm-874_P100-1995", &stackPieces, &stackArgs, errorCode); + myConverterData->myConverterArray[IBM_912] = ucnv_loadSharedData("ibm-912_P100-1995", &stackPieces, &stackArgs, errorCode); + myConverterData->myConverterArray[IBM_913] = ucnv_loadSharedData("ibm-913_P100-2000", &stackPieces, &stackArgs, errorCode); + myConverterData->myConverterArray[ISO_8859_14] = ucnv_loadSharedData("iso-8859_14-1998", &stackPieces, &stackArgs, errorCode); + myConverterData->myConverterArray[IBM_923] = ucnv_loadSharedData("ibm-923_P100-1998", &stackPieces, &stackArgs, errorCode); + + if (U_FAILURE(*errorCode) || pArgs->onlyTestIsLoadable) { + _CompoundTextClose(cnv); + return; + } + + myConverterData->state = (COMPOUND_TEXT_CONVERTERS)0; + } else { + *errorCode = U_MEMORY_ALLOCATION_ERROR; + } +} + + +static void U_CALLCONV +_CompoundTextClose(UConverter *converter) { + UConverterDataCompoundText* myConverterData = (UConverterDataCompoundText*)(converter->extraInfo); + int32_t i; + + if (converter->extraInfo != NULL) { + /*close the array of converter pointers and free the memory*/ + for (i = 0; i < NUM_OF_CONVERTERS; i++) { + if (myConverterData->myConverterArray[i] != NULL) { + ucnv_unloadSharedDataIfReady(myConverterData->myConverterArray[i]); + } + } + + uprv_free(converter->extraInfo); + converter->extraInfo = NULL; + } +} + +static void U_CALLCONV +_CompoundTextReset(UConverter *converter, UConverterResetChoice choice) { + (void)converter; + (void)choice; +} + +static const char* U_CALLCONV +_CompoundTextgetName(const UConverter* cnv){ + (void)cnv; + return "x11-compound-text"; +} + +static void U_CALLCONV +UConverter_fromUnicode_CompoundText_OFFSETS(UConverterFromUnicodeArgs* args, UErrorCode* err){ + UConverter *cnv = args->converter; + uint8_t *target = (uint8_t *) args->target; + const uint8_t *targetLimit = (const uint8_t *) args->targetLimit; + const UChar* source = args->source; + const UChar* sourceLimit = args->sourceLimit; + /* int32_t* offsets = args->offsets; */ + UChar32 sourceChar; + UBool useFallback = cnv->useFallback; + uint8_t tmpTargetBuffer[7]; + int32_t tmpTargetBufferLength = 0; + COMPOUND_TEXT_CONVERTERS currentState, tmpState; + uint32_t pValue; + int32_t pValueLength = 0; + int32_t i, n, j; + + UConverterDataCompoundText *myConverterData = (UConverterDataCompoundText *) cnv->extraInfo; + + currentState = myConverterData->state; + + /* check if the last codepoint of previous buffer was a lead surrogate*/ + if((sourceChar = cnv->fromUChar32)!=0 && target< targetLimit) { + goto getTrail; + } + + while( source < sourceLimit){ + if(target < targetLimit){ + + sourceChar = *(source++); + /*check if the char is a First surrogate*/ + if(U16_IS_SURROGATE(sourceChar)) { + if(U16_IS_SURROGATE_LEAD(sourceChar)) { +getTrail: + /*look ahead to find the trail surrogate*/ + if(source < sourceLimit) { + /* test the following code unit */ + UChar trail=(UChar) *source; + if(U16_IS_TRAIL(trail)) { + source++; + sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail); + cnv->fromUChar32=0x00; + /* convert this supplementary code point */ + /* exit this condition tree */ + } else { + /* this is an unmatched lead code unit (1st surrogate) */ + /* callback(illegal) */ + *err=U_ILLEGAL_CHAR_FOUND; + cnv->fromUChar32=sourceChar; + break; + } + } else { + /* no more input */ + cnv->fromUChar32=sourceChar; + break; + } + } else { + /* this is an unmatched trail code unit (2nd surrogate) */ + /* callback(illegal) */ + *err=U_ILLEGAL_CHAR_FOUND; + cnv->fromUChar32=sourceChar; + break; + } + } + + tmpTargetBufferLength = 0; + tmpState = getState(sourceChar); + + if (tmpState != DO_SEARCH && currentState != tmpState) { + /* Get escape sequence if necessary */ + currentState = tmpState; + for (i = 0; escSeqCompoundText[currentState][i] != 0; i++) { + tmpTargetBuffer[tmpTargetBufferLength++] = escSeqCompoundText[currentState][i]; + } + } + + if (tmpState == DO_SEARCH) { + /* Test all available converters */ + for (i = 1; i < SEARCH_LENGTH; i++) { + pValueLength = ucnv_MBCSFromUChar32(myConverterData->myConverterArray[i], sourceChar, &pValue, useFallback); + if (pValueLength > 0) { + tmpState = (COMPOUND_TEXT_CONVERTERS)i; + if (currentState != tmpState) { + currentState = tmpState; + for (j = 0; escSeqCompoundText[currentState][j] != 0; j++) { + tmpTargetBuffer[tmpTargetBufferLength++] = escSeqCompoundText[currentState][j]; + } + } + for (n = (pValueLength - 1); n >= 0; n--) { + tmpTargetBuffer[tmpTargetBufferLength++] = (uint8_t)(pValue >> (n * 8)); + } + break; + } + } + } else if (tmpState == COMPOUND_TEXT_SINGLE_0) { + tmpTargetBuffer[tmpTargetBufferLength++] = (uint8_t)sourceChar; + } else { + pValueLength = ucnv_MBCSFromUChar32(myConverterData->myConverterArray[currentState], sourceChar, &pValue, useFallback); + if (pValueLength > 0) { + for (n = (pValueLength - 1); n >= 0; n--) { + tmpTargetBuffer[tmpTargetBufferLength++] = (uint8_t)(pValue >> (n * 8)); + } + } + } + + for (i = 0; i < tmpTargetBufferLength; i++) { + if (target < targetLimit) { + *target++ = tmpTargetBuffer[i]; + } else { + *err = U_BUFFER_OVERFLOW_ERROR; + break; + } + } + + if (*err == U_BUFFER_OVERFLOW_ERROR) { + for (; i < tmpTargetBufferLength; i++) { + args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = tmpTargetBuffer[i]; + } + } + } else { + *err = U_BUFFER_OVERFLOW_ERROR; + break; + } + } + + /*save the state and return */ + myConverterData->state = currentState; + args->source = source; + args->target = (char*)target; +} + + +static void U_CALLCONV +UConverter_toUnicode_CompoundText_OFFSETS(UConverterToUnicodeArgs *args, + UErrorCode* err){ + const char *mySource = (char *) args->source; + UChar *myTarget = args->target; + const char *mySourceLimit = args->sourceLimit; + const char *tmpSourceLimit = mySourceLimit; + uint32_t mySourceChar = 0x0000; + COMPOUND_TEXT_CONVERTERS currentState, tmpState; + int32_t sourceOffset = 0; + UConverterDataCompoundText *myConverterData = (UConverterDataCompoundText *) args->converter->extraInfo; + UConverterSharedData* savedSharedData = NULL; + + UConverterToUnicodeArgs subArgs; + int32_t minArgsSize; + + /* set up the subconverter arguments */ + if(args->size<sizeof(UConverterToUnicodeArgs)) { + minArgsSize = args->size; + } else { + minArgsSize = (int32_t)sizeof(UConverterToUnicodeArgs); + } + + uprv_memcpy(&subArgs, args, minArgsSize); + subArgs.size = (uint16_t)minArgsSize; + + currentState = tmpState = myConverterData->state; + + while(mySource < mySourceLimit){ + if(myTarget < args->targetLimit){ + if (args->converter->toULength > 0) { + mySourceChar = args->converter->toUBytes[0]; + } else { + mySourceChar = (uint8_t)*mySource; + } + + if (mySourceChar == ESC_START) { + tmpState = findStateFromEscSeq(mySource, mySourceLimit, args->converter->toUBytes, args->converter->toULength, err); + + if (*err == U_TRUNCATED_CHAR_FOUND) { + for (; mySource < mySourceLimit;) { + args->converter->toUBytes[args->converter->toULength++] = *mySource++; + } + *err = U_ZERO_ERROR; + break; + } else if (tmpState == INVALID) { + if (args->converter->toULength == 0) { + mySource++; /* skip over the 0x1b byte */ + } + *err = U_ILLEGAL_CHAR_FOUND; + break; + } + + if (tmpState != currentState) { + currentState = tmpState; + } + + sourceOffset = static_cast<int32_t>(uprv_strlen((char*)escSeqCompoundText[currentState]) - args->converter->toULength); + + mySource += sourceOffset; + + args->converter->toULength = 0; + } + + if (currentState == COMPOUND_TEXT_SINGLE_0) { + while (mySource < mySourceLimit) { + if (*mySource == ESC_START) { + break; + } + if (myTarget < args->targetLimit) { + *myTarget++ = 0x00ff&(*mySource++); + } else { + *err = U_BUFFER_OVERFLOW_ERROR; + break; + } + } + } else if (mySource < mySourceLimit){ + sourceOffset = findNextEsc(mySource, mySourceLimit); + + tmpSourceLimit = mySource + sourceOffset; + + subArgs.source = mySource; + subArgs.sourceLimit = tmpSourceLimit; + subArgs.target = myTarget; + savedSharedData = subArgs.converter->sharedData; + subArgs.converter->sharedData = myConverterData->myConverterArray[currentState]; + + ucnv_MBCSToUnicodeWithOffsets(&subArgs, err); + + subArgs.converter->sharedData = savedSharedData; + + mySource = subArgs.source; + myTarget = subArgs.target; + + if (U_FAILURE(*err)) { + if(*err == U_BUFFER_OVERFLOW_ERROR) { + if(subArgs.converter->UCharErrorBufferLength > 0) { + uprv_memcpy(args->converter->UCharErrorBuffer, subArgs.converter->UCharErrorBuffer, + subArgs.converter->UCharErrorBufferLength); + } + args->converter->UCharErrorBufferLength=subArgs.converter->UCharErrorBufferLength; + subArgs.converter->UCharErrorBufferLength = 0; + } + break; + } + } + } else { + *err = U_BUFFER_OVERFLOW_ERROR; + break; + } + } + myConverterData->state = currentState; + args->target = myTarget; + args->source = mySource; +} + +static void U_CALLCONV +_CompoundText_GetUnicodeSet(const UConverter *cnv, + const USetAdder *sa, + UConverterUnicodeSet which, + UErrorCode *pErrorCode) { + UConverterDataCompoundText *myConverterData = (UConverterDataCompoundText *)cnv->extraInfo; + int32_t i; + + for (i = 1; i < NUM_OF_CONVERTERS; i++) { + ucnv_MBCSGetUnicodeSetForUnicode(myConverterData->myConverterArray[i], sa, which, pErrorCode); + } + sa->add(sa->set, 0x0000); + sa->add(sa->set, 0x0009); + sa->add(sa->set, 0x000A); + sa->addRange(sa->set, 0x0020, 0x007F); + sa->addRange(sa->set, 0x00A0, 0x00FF); +} +U_CDECL_END + +static const UConverterImpl _CompoundTextImpl = { + + UCNV_COMPOUND_TEXT, + + NULL, + NULL, + + _CompoundTextOpen, + _CompoundTextClose, + _CompoundTextReset, + + UConverter_toUnicode_CompoundText_OFFSETS, + UConverter_toUnicode_CompoundText_OFFSETS, + UConverter_fromUnicode_CompoundText_OFFSETS, + UConverter_fromUnicode_CompoundText_OFFSETS, + NULL, + + NULL, + _CompoundTextgetName, + NULL, + NULL, + _CompoundText_GetUnicodeSet, + NULL, + NULL +}; + +static const UConverterStaticData _CompoundTextStaticData = { + sizeof(UConverterStaticData), + "COMPOUND_TEXT", + 0, + UCNV_IBM, + UCNV_COMPOUND_TEXT, + 1, + 6, + { 0xef, 0, 0, 0 }, + 1, + FALSE, + FALSE, + 0, + 0, + { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ +}; +const UConverterSharedData _CompoundTextData = + UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_CompoundTextStaticData, &_CompoundTextImpl); + +#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ diff --git a/thirdparty/icu4c/common/ucnv_err.cpp b/thirdparty/icu4c/common/ucnv_err.cpp new file mode 100644 index 0000000000..6b738face5 --- /dev/null +++ b/thirdparty/icu4c/common/ucnv_err.cpp @@ -0,0 +1,486 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* + ***************************************************************************** + * + * Copyright (C) 1998-2016, International Business Machines + * Corporation and others. All Rights Reserved. + * + ***************************************************************************** + * + * ucnv_err.c + * Implements error behaviour functions called by T_UConverter_{from,to}Unicode + * + * +* Change history: +* +* 06/29/2000 helena Major rewrite of the callback APIs. +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_CONVERSION + +#include "unicode/ucnv_err.h" +#include "unicode/ucnv_cb.h" +#include "ucnv_cnv.h" +#include "cmemory.h" +#include "unicode/ucnv.h" +#include "ustrfmt.h" + +#define VALUE_STRING_LENGTH 48 +/*Magic # 32 = 4(number of char in value string) * 8(max number of bytes per char for any converter) */ +#define UNICODE_PERCENT_SIGN_CODEPOINT 0x0025 +#define UNICODE_U_CODEPOINT 0x0055 +#define UNICODE_X_CODEPOINT 0x0058 +#define UNICODE_RS_CODEPOINT 0x005C +#define UNICODE_U_LOW_CODEPOINT 0x0075 +#define UNICODE_X_LOW_CODEPOINT 0x0078 +#define UNICODE_AMP_CODEPOINT 0x0026 +#define UNICODE_HASH_CODEPOINT 0x0023 +#define UNICODE_SEMICOLON_CODEPOINT 0x003B +#define UNICODE_PLUS_CODEPOINT 0x002B +#define UNICODE_LEFT_CURLY_CODEPOINT 0x007B +#define UNICODE_RIGHT_CURLY_CODEPOINT 0x007D +#define UNICODE_SPACE_CODEPOINT 0x0020 +#define UCNV_PRV_ESCAPE_ICU 0 +#define UCNV_PRV_ESCAPE_C 'C' +#define UCNV_PRV_ESCAPE_XML_DEC 'D' +#define UCNV_PRV_ESCAPE_XML_HEX 'X' +#define UCNV_PRV_ESCAPE_JAVA 'J' +#define UCNV_PRV_ESCAPE_UNICODE 'U' +#define UCNV_PRV_ESCAPE_CSS2 'S' +#define UCNV_PRV_STOP_ON_ILLEGAL 'i' + +/* + * IS_DEFAULT_IGNORABLE_CODE_POINT + * This is to check if a code point has the default ignorable unicode property. + * As such, this list needs to be updated if the ignorable code point list ever + * changes. + * To avoid dependency on other code, this list is hard coded here. + * When an ignorable code point is found and is unmappable, the default callbacks + * will ignore them. + * For a list of the default ignorable code points, use this link: + * https://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5B%3ADI%3A%5D&abb=on&g=&i= + * + * This list should be sync with the one in CharsetCallback.java + */ +#define IS_DEFAULT_IGNORABLE_CODE_POINT(c) ( \ + (c == 0x00AD) || \ + (c == 0x034F) || \ + (c == 0x061C) || \ + (c == 0x115F) || \ + (c == 0x1160) || \ + (0x17B4 <= c && c <= 0x17B5) || \ + (0x180B <= c && c <= 0x180E) || \ + (0x200B <= c && c <= 0x200F) || \ + (0x202A <= c && c <= 0x202E) || \ + (0x2060 <= c && c <= 0x206F) || \ + (c == 0x3164) || \ + (0xFE00 <= c && c <= 0xFE0F) || \ + (c == 0xFEFF) || \ + (c == 0xFFA0) || \ + (0xFFF0 <= c && c <= 0xFFF8) || \ + (0x1BCA0 <= c && c <= 0x1BCA3) || \ + (0x1D173 <= c && c <= 0x1D17A) || \ + (0xE0000 <= c && c <= 0xE0FFF)) + + +/*Function Pointer STOPS at the ILLEGAL_SEQUENCE */ +U_CAPI void U_EXPORT2 +UCNV_FROM_U_CALLBACK_STOP ( + const void *context, + UConverterFromUnicodeArgs *fromUArgs, + const UChar* codeUnits, + int32_t length, + UChar32 codePoint, + UConverterCallbackReason reason, + UErrorCode * err) +{ + (void)context; + (void)fromUArgs; + (void)codeUnits; + (void)length; + if (reason == UCNV_UNASSIGNED && IS_DEFAULT_IGNORABLE_CODE_POINT(codePoint)) + { + /* + * Skip if the codepoint has unicode property of default ignorable. + */ + *err = U_ZERO_ERROR; + } + /* the caller must have set the error code accordingly */ + return; +} + + +/*Function Pointer STOPS at the ILLEGAL_SEQUENCE */ +U_CAPI void U_EXPORT2 +UCNV_TO_U_CALLBACK_STOP ( + const void *context, + UConverterToUnicodeArgs *toUArgs, + const char* codePoints, + int32_t length, + UConverterCallbackReason reason, + UErrorCode * err) +{ + /* the caller must have set the error code accordingly */ + (void)context; (void)toUArgs; (void)codePoints; (void)length; (void)reason; (void)err; + return; +} + +U_CAPI void U_EXPORT2 +UCNV_FROM_U_CALLBACK_SKIP ( + const void *context, + UConverterFromUnicodeArgs *fromUArgs, + const UChar* codeUnits, + int32_t length, + UChar32 codePoint, + UConverterCallbackReason reason, + UErrorCode * err) +{ + (void)fromUArgs; + (void)codeUnits; + (void)length; + if (reason <= UCNV_IRREGULAR) + { + if (reason == UCNV_UNASSIGNED && IS_DEFAULT_IGNORABLE_CODE_POINT(codePoint)) + { + /* + * Skip if the codepoint has unicode property of default ignorable. + */ + *err = U_ZERO_ERROR; + } + else if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED)) + { + *err = U_ZERO_ERROR; + } + /* else the caller must have set the error code accordingly. */ + } + /* else ignore the reset, close and clone calls. */ +} + +U_CAPI void U_EXPORT2 +UCNV_FROM_U_CALLBACK_SUBSTITUTE ( + const void *context, + UConverterFromUnicodeArgs *fromArgs, + const UChar* codeUnits, + int32_t length, + UChar32 codePoint, + UConverterCallbackReason reason, + UErrorCode * err) +{ + (void)codeUnits; + (void)length; + if (reason <= UCNV_IRREGULAR) + { + if (reason == UCNV_UNASSIGNED && IS_DEFAULT_IGNORABLE_CODE_POINT(codePoint)) + { + /* + * Skip if the codepoint has unicode property of default ignorable. + */ + *err = U_ZERO_ERROR; + } + else if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED)) + { + *err = U_ZERO_ERROR; + ucnv_cbFromUWriteSub(fromArgs, 0, err); + } + /* else the caller must have set the error code accordingly. */ + } + /* else ignore the reset, close and clone calls. */ +} + +/*uses uprv_itou to get a unicode escape sequence of the offensive sequence, + *uses a clean copy (resetted) of the converter, to convert that unicode + *escape sequence to the target codepage (if conversion failure happens then + *we revert to substituting with subchar) + */ +U_CAPI void U_EXPORT2 +UCNV_FROM_U_CALLBACK_ESCAPE ( + const void *context, + UConverterFromUnicodeArgs *fromArgs, + const UChar *codeUnits, + int32_t length, + UChar32 codePoint, + UConverterCallbackReason reason, + UErrorCode * err) +{ + + UChar valueString[VALUE_STRING_LENGTH]; + int32_t valueStringLength = 0; + int32_t i = 0; + + const UChar *myValueSource = NULL; + UErrorCode err2 = U_ZERO_ERROR; + UConverterFromUCallback original = NULL; + const void *originalContext; + + UConverterFromUCallback ignoredCallback = NULL; + const void *ignoredContext; + + if (reason > UCNV_IRREGULAR) + { + return; + } + else if (reason == UCNV_UNASSIGNED && IS_DEFAULT_IGNORABLE_CODE_POINT(codePoint)) + { + /* + * Skip if the codepoint has unicode property of default ignorable. + */ + *err = U_ZERO_ERROR; + return; + } + + ucnv_setFromUCallBack (fromArgs->converter, + (UConverterFromUCallback) UCNV_FROM_U_CALLBACK_SUBSTITUTE, + NULL, + &original, + &originalContext, + &err2); + + if (U_FAILURE (err2)) + { + *err = err2; + return; + } + if(context==NULL) + { + while (i < length) + { + valueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */ + valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */ + valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4); + } + } + else + { + switch(*((char*)context)) + { + case UCNV_PRV_ESCAPE_JAVA: + while (i < length) + { + valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */ + valueString[valueStringLength++] = (UChar) UNICODE_U_LOW_CODEPOINT; /* adding u */ + valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4); + } + break; + + case UCNV_PRV_ESCAPE_C: + valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */ + + if(length==2){ + valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */ + valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 8); + + } + else{ + valueString[valueStringLength++] = (UChar) UNICODE_U_LOW_CODEPOINT; /* adding u */ + valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 4); + } + break; + + case UCNV_PRV_ESCAPE_XML_DEC: + + valueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */ + valueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */ + if(length==2){ + valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 10, 0); + } + else{ + valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 10, 0); + } + valueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */ + break; + + case UCNV_PRV_ESCAPE_XML_HEX: + + valueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */ + valueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */ + valueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */ + if(length==2){ + valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 0); + } + else{ + valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 0); + } + valueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */ + break; + + case UCNV_PRV_ESCAPE_UNICODE: + valueString[valueStringLength++] = (UChar) UNICODE_LEFT_CURLY_CODEPOINT; /* adding { */ + valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */ + valueString[valueStringLength++] = (UChar) UNICODE_PLUS_CODEPOINT; /* adding + */ + if (length == 2) { + valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 4); + } else { + valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 4); + } + valueString[valueStringLength++] = (UChar) UNICODE_RIGHT_CURLY_CODEPOINT; /* adding } */ + break; + + case UCNV_PRV_ESCAPE_CSS2: + valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */ + valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 0); + /* Always add space character, becase the next character might be whitespace, + which would erroneously be considered the termination of the escape sequence. */ + valueString[valueStringLength++] = (UChar) UNICODE_SPACE_CODEPOINT; + break; + + default: + while (i < length) + { + valueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */ + valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */ + valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4); + } + } + } + myValueSource = valueString; + + /* reset the error */ + *err = U_ZERO_ERROR; + + ucnv_cbFromUWriteUChars(fromArgs, &myValueSource, myValueSource+valueStringLength, 0, err); + + ucnv_setFromUCallBack (fromArgs->converter, + original, + originalContext, + &ignoredCallback, + &ignoredContext, + &err2); + if (U_FAILURE (err2)) + { + *err = err2; + return; + } + + return; +} + + + +U_CAPI void U_EXPORT2 +UCNV_TO_U_CALLBACK_SKIP ( + const void *context, + UConverterToUnicodeArgs *toArgs, + const char* codeUnits, + int32_t length, + UConverterCallbackReason reason, + UErrorCode * err) +{ + (void)toArgs; + (void)codeUnits; + (void)length; + if (reason <= UCNV_IRREGULAR) + { + if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED)) + { + *err = U_ZERO_ERROR; + } + /* else the caller must have set the error code accordingly. */ + } + /* else ignore the reset, close and clone calls. */ +} + +U_CAPI void U_EXPORT2 +UCNV_TO_U_CALLBACK_SUBSTITUTE ( + const void *context, + UConverterToUnicodeArgs *toArgs, + const char* codeUnits, + int32_t length, + UConverterCallbackReason reason, + UErrorCode * err) +{ + (void)codeUnits; + (void)length; + if (reason <= UCNV_IRREGULAR) + { + if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED)) + { + *err = U_ZERO_ERROR; + ucnv_cbToUWriteSub(toArgs,0,err); + } + /* else the caller must have set the error code accordingly. */ + } + /* else ignore the reset, close and clone calls. */ +} + +/*uses uprv_itou to get a unicode escape sequence of the offensive sequence, + *and uses that as the substitution sequence + */ +U_CAPI void U_EXPORT2 +UCNV_TO_U_CALLBACK_ESCAPE ( + const void *context, + UConverterToUnicodeArgs *toArgs, + const char* codeUnits, + int32_t length, + UConverterCallbackReason reason, + UErrorCode * err) +{ + UChar uniValueString[VALUE_STRING_LENGTH]; + int32_t valueStringLength = 0; + int32_t i = 0; + + if (reason > UCNV_IRREGULAR) + { + return; + } + + if(context==NULL) + { + while (i < length) + { + uniValueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */ + uniValueString[valueStringLength++] = (UChar) UNICODE_X_CODEPOINT; /* adding X */ + valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t) codeUnits[i++], 16, 2); + } + } + else + { + switch(*((char*)context)) + { + case UCNV_PRV_ESCAPE_XML_DEC: + while (i < length) + { + uniValueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */ + uniValueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */ + valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 10, 0); + uniValueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */ + } + break; + + case UCNV_PRV_ESCAPE_XML_HEX: + while (i < length) + { + uniValueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */ + uniValueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */ + uniValueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */ + valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 16, 0); + uniValueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */ + } + break; + case UCNV_PRV_ESCAPE_C: + while (i < length) + { + uniValueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */ + uniValueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */ + valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 16, 2); + } + break; + default: + while (i < length) + { + uniValueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */ + uniValueString[valueStringLength++] = (UChar) UNICODE_X_CODEPOINT; /* adding X */ + uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t) codeUnits[i++], 16, 2); + valueStringLength += 2; + } + } + } + /* reset the error */ + *err = U_ZERO_ERROR; + + ucnv_cbToUWriteUChars(toArgs, uniValueString, valueStringLength, 0, err); +} + +#endif diff --git a/thirdparty/icu4c/common/ucnv_ext.cpp b/thirdparty/icu4c/common/ucnv_ext.cpp new file mode 100644 index 0000000000..7dea4eef41 --- /dev/null +++ b/thirdparty/icu4c/common/ucnv_ext.cpp @@ -0,0 +1,1143 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 2003-2016, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* file name: ucnv_ext.cpp +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2003jun13 +* created by: Markus W. Scherer +* +* Conversion extensions +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION + +#include "unicode/uset.h" +#include "unicode/ustring.h" +#include "ucnv_bld.h" +#include "ucnv_cnv.h" +#include "ucnv_ext.h" +#include "cmemory.h" +#include "uassert.h" + +/* to Unicode --------------------------------------------------------------- */ + +/* + * @return lookup value for the byte, if found; else 0 + */ +static inline uint32_t +ucnv_extFindToU(const uint32_t *toUSection, int32_t length, uint8_t byte) { + uint32_t word0, word; + int32_t i, start, limit; + + /* check the input byte against the lowest and highest section bytes */ + start=(int32_t)UCNV_EXT_TO_U_GET_BYTE(toUSection[0]); + limit=(int32_t)UCNV_EXT_TO_U_GET_BYTE(toUSection[length-1]); + if(byte<start || limit<byte) { + return 0; /* the byte is out of range */ + } + + if(length==((limit-start)+1)) { + /* direct access on a linear array */ + return UCNV_EXT_TO_U_GET_VALUE(toUSection[byte-start]); /* could be 0 */ + } + + /* word0 is suitable for <=toUSection[] comparison, word for <toUSection[] */ + word0=UCNV_EXT_TO_U_MAKE_WORD(byte, 0); + + /* + * Shift byte once instead of each section word and add 0xffffff. + * We will compare the shifted/added byte (bbffffff) against + * section words which have byte values in the same bit position. + * If and only if byte bb < section byte ss then bbffffff<ssvvvvvv + * for all v=0..f + * so we need not mask off the lower 24 bits of each section word. + */ + word=word0|UCNV_EXT_TO_U_VALUE_MASK; + + /* binary search */ + start=0; + limit=length; + for(;;) { + i=limit-start; + if(i<=1) { + break; /* done */ + } + /* start<limit-1 */ + + if(i<=4) { + /* linear search for the last part */ + if(word0<=toUSection[start]) { + break; + } + if(++start<limit && word0<=toUSection[start]) { + break; + } + if(++start<limit && word0<=toUSection[start]) { + break; + } + /* always break at start==limit-1 */ + ++start; + break; + } + + i=(start+limit)/2; + if(word<toUSection[i]) { + limit=i; + } else { + start=i; + } + } + + /* did we really find it? */ + if(start<limit && byte==UCNV_EXT_TO_U_GET_BYTE(word=toUSection[start])) { + return UCNV_EXT_TO_U_GET_VALUE(word); /* never 0 */ + } else { + return 0; /* not found */ + } +} + +/* + * TRUE if not an SI/SO stateful converter, + * or if the match length fits with the current converter state + */ +#define UCNV_EXT_TO_U_VERIFY_SISO_MATCH(sisoState, match) \ + ((sisoState)<0 || ((sisoState)==0) == (match==1)) + +/* + * this works like ucnv_extMatchFromU() except + * - the first character is in pre + * - no trie is used + * - the returned matchLength is not offset by 2 + */ +static int32_t +ucnv_extMatchToU(const int32_t *cx, int8_t sisoState, + const char *pre, int32_t preLength, + const char *src, int32_t srcLength, + uint32_t *pMatchValue, + UBool /*useFallback*/, UBool flush) { + const uint32_t *toUTable, *toUSection; + + uint32_t value, matchValue; + int32_t i, j, idx, length, matchLength; + uint8_t b; + + if(cx==NULL || cx[UCNV_EXT_TO_U_LENGTH]<=0) { + return 0; /* no extension data, no match */ + } + + /* initialize */ + toUTable=UCNV_EXT_ARRAY(cx, UCNV_EXT_TO_U_INDEX, uint32_t); + idx=0; + + matchValue=0; + i=j=matchLength=0; + + if(sisoState==0) { + /* SBCS state of an SI/SO stateful converter, look at only exactly 1 byte */ + if(preLength>1) { + return 0; /* no match of a DBCS sequence in SBCS mode */ + } else if(preLength==1) { + srcLength=0; + } else /* preLength==0 */ { + if(srcLength>1) { + srcLength=1; + } + } + flush=TRUE; + } + + /* we must not remember fallback matches when not using fallbacks */ + + /* match input units until there is a full match or the input is consumed */ + for(;;) { + /* go to the next section */ + toUSection=toUTable+idx; + + /* read first pair of the section */ + value=*toUSection++; + length=UCNV_EXT_TO_U_GET_BYTE(value); + value=UCNV_EXT_TO_U_GET_VALUE(value); + if( value!=0 && + (UCNV_EXT_TO_U_IS_ROUNDTRIP(value) || + TO_U_USE_FALLBACK(useFallback)) && + UCNV_EXT_TO_U_VERIFY_SISO_MATCH(sisoState, i+j) + ) { + /* remember longest match so far */ + matchValue=value; + matchLength=i+j; + } + + /* match pre[] then src[] */ + if(i<preLength) { + b=(uint8_t)pre[i++]; + } else if(j<srcLength) { + b=(uint8_t)src[j++]; + } else { + /* all input consumed, partial match */ + if(flush || (length=(i+j))>UCNV_EXT_MAX_BYTES) { + /* + * end of the entire input stream, stop with the longest match so far + * or: partial match must not be longer than UCNV_EXT_MAX_BYTES + * because it must fit into state buffers + */ + break; + } else { + /* continue with more input next time */ + return -length; + } + } + + /* search for the current UChar */ + value=ucnv_extFindToU(toUSection, length, b); + if(value==0) { + /* no match here, stop with the longest match so far */ + break; + } else { + if(UCNV_EXT_TO_U_IS_PARTIAL(value)) { + /* partial match, continue */ + idx=(int32_t)UCNV_EXT_TO_U_GET_PARTIAL_INDEX(value); + } else { + if( (UCNV_EXT_TO_U_IS_ROUNDTRIP(value) || + TO_U_USE_FALLBACK(useFallback)) && + UCNV_EXT_TO_U_VERIFY_SISO_MATCH(sisoState, i+j) + ) { + /* full match, stop with result */ + matchValue=value; + matchLength=i+j; + } else { + /* full match on fallback not taken, stop with the longest match so far */ + } + break; + } + } + } + + if(matchLength==0) { + /* no match at all */ + return 0; + } + + /* return result */ + *pMatchValue=UCNV_EXT_TO_U_MASK_ROUNDTRIP(matchValue); + return matchLength; +} + +static inline void +ucnv_extWriteToU(UConverter *cnv, const int32_t *cx, + uint32_t value, + UChar **target, const UChar *targetLimit, + int32_t **offsets, int32_t srcIndex, + UErrorCode *pErrorCode) { + /* output the result */ + if(UCNV_EXT_TO_U_IS_CODE_POINT(value)) { + /* output a single code point */ + ucnv_toUWriteCodePoint( + cnv, UCNV_EXT_TO_U_GET_CODE_POINT(value), + target, targetLimit, + offsets, srcIndex, + pErrorCode); + } else { + /* output a string - with correct data we have resultLength>0 */ + ucnv_toUWriteUChars( + cnv, + UCNV_EXT_ARRAY(cx, UCNV_EXT_TO_U_UCHARS_INDEX, UChar)+ + UCNV_EXT_TO_U_GET_INDEX(value), + UCNV_EXT_TO_U_GET_LENGTH(value), + target, targetLimit, + offsets, srcIndex, + pErrorCode); + } +} + +/* + * get the SI/SO toU state (state 0 is for SBCS, 1 for DBCS), + * or 1 for DBCS-only, + * or -1 if the converter is not SI/SO stateful + * + * Note: For SI/SO stateful converters getting here, + * cnv->mode==0 is equivalent to firstLength==1. + */ +#define UCNV_SISO_STATE(cnv) \ + ((cnv)->sharedData->mbcs.outputType==MBCS_OUTPUT_2_SISO ? (int8_t)(cnv)->mode : \ + (cnv)->sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY ? 1 : -1) + +/* + * target<targetLimit; set error code for overflow + */ +U_CFUNC UBool +ucnv_extInitialMatchToU(UConverter *cnv, const int32_t *cx, + int32_t firstLength, + const char **src, const char *srcLimit, + UChar **target, const UChar *targetLimit, + int32_t **offsets, int32_t srcIndex, + UBool flush, + UErrorCode *pErrorCode) { + uint32_t value = 0; /* initialize output-only param to 0 to silence gcc */ + int32_t match; + + /* try to match */ + match=ucnv_extMatchToU(cx, (int8_t)UCNV_SISO_STATE(cnv), + (const char *)cnv->toUBytes, firstLength, + *src, (int32_t)(srcLimit-*src), + &value, + cnv->useFallback, flush); + if(match>0) { + /* advance src pointer for the consumed input */ + *src+=match-firstLength; + + /* write result to target */ + ucnv_extWriteToU(cnv, cx, + value, + target, targetLimit, + offsets, srcIndex, + pErrorCode); + return TRUE; + } else if(match<0) { + /* save state for partial match */ + const char *s; + int32_t j; + + /* copy the first code point */ + s=(const char *)cnv->toUBytes; + cnv->preToUFirstLength=(int8_t)firstLength; + for(j=0; j<firstLength; ++j) { + cnv->preToU[j]=*s++; + } + + /* now copy the newly consumed input */ + s=*src; + match=-match; + for(; j<match; ++j) { + cnv->preToU[j]=*s++; + } + *src=s; /* same as *src=srcLimit; because we reached the end of input */ + cnv->preToULength=(int8_t)match; + return TRUE; + } else /* match==0 no match */ { + return FALSE; + } +} + +U_CFUNC UChar32 +ucnv_extSimpleMatchToU(const int32_t *cx, + const char *source, int32_t length, + UBool useFallback) { + uint32_t value = 0; /* initialize output-only param to 0 to silence gcc */ + int32_t match; + + if(length<=0) { + return 0xffff; + } + + /* try to match */ + match=ucnv_extMatchToU(cx, -1, + source, length, + NULL, 0, + &value, + useFallback, TRUE); + if(match==length) { + /* write result for simple, single-character conversion */ + if(UCNV_EXT_TO_U_IS_CODE_POINT(value)) { + return UCNV_EXT_TO_U_GET_CODE_POINT(value); + } + } + + /* + * return no match because + * - match>0 && value points to string: simple conversion cannot handle multiple code points + * - match>0 && match!=length: not all input consumed, forbidden for this function + * - match==0: no match found in the first place + * - match<0: partial match, not supported for simple conversion (and flush==TRUE) + */ + return 0xfffe; +} + +/* + * continue partial match with new input + * never called for simple, single-character conversion + */ +U_CFUNC void +ucnv_extContinueMatchToU(UConverter *cnv, + UConverterToUnicodeArgs *pArgs, int32_t srcIndex, + UErrorCode *pErrorCode) { + uint32_t value = 0; /* initialize output-only param to 0 to silence gcc */ + int32_t match, length; + + match=ucnv_extMatchToU(cnv->sharedData->mbcs.extIndexes, (int8_t)UCNV_SISO_STATE(cnv), + cnv->preToU, cnv->preToULength, + pArgs->source, (int32_t)(pArgs->sourceLimit-pArgs->source), + &value, + cnv->useFallback, pArgs->flush); + if(match>0) { + if(match>=cnv->preToULength) { + /* advance src pointer for the consumed input */ + pArgs->source+=match-cnv->preToULength; + cnv->preToULength=0; + } else { + /* the match did not use all of preToU[] - keep the rest for replay */ + length=cnv->preToULength-match; + uprv_memmove(cnv->preToU, cnv->preToU+match, length); + cnv->preToULength=(int8_t)-length; + } + + /* write result */ + ucnv_extWriteToU(cnv, cnv->sharedData->mbcs.extIndexes, + value, + &pArgs->target, pArgs->targetLimit, + &pArgs->offsets, srcIndex, + pErrorCode); + } else if(match<0) { + /* save state for partial match */ + const char *s; + int32_t j; + + /* just _append_ the newly consumed input to preToU[] */ + s=pArgs->source; + match=-match; + for(j=cnv->preToULength; j<match; ++j) { + cnv->preToU[j]=*s++; + } + pArgs->source=s; /* same as *src=srcLimit; because we reached the end of input */ + cnv->preToULength=(int8_t)match; + } else /* match==0 */ { + /* + * no match + * + * We need to split the previous input into two parts: + * + * 1. The first codepage character is unmappable - that's how we got into + * trying the extension data in the first place. + * We need to move it from the preToU buffer + * to the error buffer, set an error code, + * and prepare the rest of the previous input for 2. + * + * 2. The rest of the previous input must be converted once we + * come back from the callback for the first character. + * At that time, we have to try again from scratch to convert + * these input characters. + * The replay will be handled by the ucnv.c conversion code. + */ + + /* move the first codepage character to the error field */ + uprv_memcpy(cnv->toUBytes, cnv->preToU, cnv->preToUFirstLength); + cnv->toULength=cnv->preToUFirstLength; + + /* move the rest up inside the buffer */ + length=cnv->preToULength-cnv->preToUFirstLength; + if(length>0) { + uprv_memmove(cnv->preToU, cnv->preToU+cnv->preToUFirstLength, length); + } + + /* mark preToU for replay */ + cnv->preToULength=(int8_t)-length; + + /* set the error code for unassigned */ + *pErrorCode=U_INVALID_CHAR_FOUND; + } +} + +/* from Unicode ------------------------------------------------------------- */ + +// Use roundtrips, "good one-way" mappings, and some normal fallbacks. +static inline UBool +extFromUUseMapping(UBool useFallback, uint32_t value, UChar32 firstCP) { + return + ((value&UCNV_EXT_FROM_U_STATUS_MASK)!=0 || + FROM_U_USE_FALLBACK(useFallback, firstCP)) && + (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0; +} + +/* + * @return index of the UChar, if found; else <0 + */ +static inline int32_t +ucnv_extFindFromU(const UChar *fromUSection, int32_t length, UChar u) { + int32_t i, start, limit; + + /* binary search */ + start=0; + limit=length; + for(;;) { + i=limit-start; + if(i<=1) { + break; /* done */ + } + /* start<limit-1 */ + + if(i<=4) { + /* linear search for the last part */ + if(u<=fromUSection[start]) { + break; + } + if(++start<limit && u<=fromUSection[start]) { + break; + } + if(++start<limit && u<=fromUSection[start]) { + break; + } + /* always break at start==limit-1 */ + ++start; + break; + } + + i=(start+limit)/2; + if(u<fromUSection[i]) { + limit=i; + } else { + start=i; + } + } + + /* did we really find it? */ + if(start<limit && u==fromUSection[start]) { + return start; + } else { + return -1; /* not found */ + } +} + +/* + * @param cx pointer to extension data; if NULL, returns 0 + * @param firstCP the first code point before all the other UChars + * @param pre UChars that must match; !initialMatch: partial match with them + * @param preLength length of pre, >=0 + * @param src UChars that can be used to complete a match + * @param srcLength length of src, >=0 + * @param pMatchValue [out] output result value for the match from the data structure + * @param useFallback "use fallback" flag, usually from cnv->useFallback + * @param flush TRUE if the end of the input stream is reached + * @return >1: matched, return value=total match length (number of input units matched) + * 1: matched, no mapping but request for <subchar1> + * (only for the first code point) + * 0: no match + * <0: partial match, return value=negative total match length + * (partial matches are never returned for flush==TRUE) + * (partial matches are never returned as being longer than UCNV_EXT_MAX_UCHARS) + * the matchLength is 2 if only firstCP matched, and >2 if firstCP and + * further code units matched + */ +static int32_t +ucnv_extMatchFromU(const int32_t *cx, + UChar32 firstCP, + const UChar *pre, int32_t preLength, + const UChar *src, int32_t srcLength, + uint32_t *pMatchValue, + UBool useFallback, UBool flush) { + const uint16_t *stage12, *stage3; + const uint32_t *stage3b; + + const UChar *fromUTableUChars, *fromUSectionUChars; + const uint32_t *fromUTableValues, *fromUSectionValues; + + uint32_t value, matchValue; + int32_t i, j, idx, length, matchLength; + UChar c; + + if(cx==NULL) { + return 0; /* no extension data, no match */ + } + + /* trie lookup of firstCP */ + idx=firstCP>>10; /* stage 1 index */ + if(idx>=cx[UCNV_EXT_FROM_U_STAGE_1_LENGTH]) { + return 0; /* the first code point is outside the trie */ + } + + stage12=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_12_INDEX, uint16_t); + stage3=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_3_INDEX, uint16_t); + idx=UCNV_EXT_FROM_U(stage12, stage3, idx, firstCP); + + stage3b=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_3B_INDEX, uint32_t); + value=stage3b[idx]; + if(value==0) { + return 0; + } + + /* + * Tests for (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0: + * Do not interpret values with reserved bits used, for forward compatibility, + * and do not even remember intermediate results with reserved bits used. + */ + + if(UCNV_EXT_TO_U_IS_PARTIAL(value)) { + /* partial match, enter the loop below */ + idx=(int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value); + + /* initialize */ + fromUTableUChars=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_UCHARS_INDEX, UChar); + fromUTableValues=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_VALUES_INDEX, uint32_t); + + matchValue=0; + i=j=matchLength=0; + + /* we must not remember fallback matches when not using fallbacks */ + + /* match input units until there is a full match or the input is consumed */ + for(;;) { + /* go to the next section */ + fromUSectionUChars=fromUTableUChars+idx; + fromUSectionValues=fromUTableValues+idx; + + /* read first pair of the section */ + length=*fromUSectionUChars++; + value=*fromUSectionValues++; + if(value!=0 && extFromUUseMapping(useFallback, value, firstCP)) { + /* remember longest match so far */ + matchValue=value; + matchLength=2+i+j; + } + + /* match pre[] then src[] */ + if(i<preLength) { + c=pre[i++]; + } else if(j<srcLength) { + c=src[j++]; + } else { + /* all input consumed, partial match */ + if(flush || (length=(i+j))>UCNV_EXT_MAX_UCHARS) { + /* + * end of the entire input stream, stop with the longest match so far + * or: partial match must not be longer than UCNV_EXT_MAX_UCHARS + * because it must fit into state buffers + */ + break; + } else { + /* continue with more input next time */ + return -(2+length); + } + } + + /* search for the current UChar */ + idx=ucnv_extFindFromU(fromUSectionUChars, length, c); + if(idx<0) { + /* no match here, stop with the longest match so far */ + break; + } else { + value=fromUSectionValues[idx]; + if(UCNV_EXT_FROM_U_IS_PARTIAL(value)) { + /* partial match, continue */ + idx=(int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value); + } else { + if(extFromUUseMapping(useFallback, value, firstCP)) { + /* full match, stop with result */ + matchValue=value; + matchLength=2+i+j; + } else { + /* full match on fallback not taken, stop with the longest match so far */ + } + break; + } + } + } + + if(matchLength==0) { + /* no match at all */ + return 0; + } + } else /* result from firstCP trie lookup */ { + if(extFromUUseMapping(useFallback, value, firstCP)) { + /* full match, stop with result */ + matchValue=value; + matchLength=2; + } else { + /* fallback not taken */ + return 0; + } + } + + /* return result */ + if(matchValue==UCNV_EXT_FROM_U_SUBCHAR1) { + return 1; /* assert matchLength==2 */ + } + + *pMatchValue=matchValue; + return matchLength; +} + +/* + * @param value fromUnicode mapping table value; ignores roundtrip and reserved bits + */ +static inline void +ucnv_extWriteFromU(UConverter *cnv, const int32_t *cx, + uint32_t value, + char **target, const char *targetLimit, + int32_t **offsets, int32_t srcIndex, + UErrorCode *pErrorCode) { + uint8_t buffer[1+UCNV_EXT_MAX_BYTES]; + const uint8_t *result; + int32_t length, prevLength; + + length=UCNV_EXT_FROM_U_GET_LENGTH(value); + value=(uint32_t)UCNV_EXT_FROM_U_GET_DATA(value); + + /* output the result */ + if(length<=UCNV_EXT_FROM_U_MAX_DIRECT_LENGTH) { + /* + * Generate a byte array and then write it below. + * This is not the fastest possible way, but it should be ok for + * extension mappings, and it is much simpler. + * Offset and overflow handling are only done once this way. + */ + uint8_t *p=buffer+1; /* reserve buffer[0] for shiftByte below */ + switch(length) { + case 3: + *p++=(uint8_t)(value>>16); + U_FALLTHROUGH; + case 2: + *p++=(uint8_t)(value>>8); + U_FALLTHROUGH; + case 1: + *p++=(uint8_t)value; + U_FALLTHROUGH; + default: + break; /* will never occur */ + } + result=buffer+1; + } else { + result=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_BYTES_INDEX, uint8_t)+value; + } + + /* with correct data we have length>0 */ + + if((prevLength=cnv->fromUnicodeStatus)!=0) { + /* handle SI/SO stateful output */ + uint8_t shiftByte; + + if(prevLength>1 && length==1) { + /* change from double-byte mode to single-byte */ + shiftByte=(uint8_t)UCNV_SI; + cnv->fromUnicodeStatus=1; + } else if(prevLength==1 && length>1) { + /* change from single-byte mode to double-byte */ + shiftByte=(uint8_t)UCNV_SO; + cnv->fromUnicodeStatus=2; + } else { + shiftByte=0; + } + + if(shiftByte!=0) { + /* prepend the shift byte to the result bytes */ + buffer[0]=shiftByte; + if(result!=buffer+1) { + uprv_memcpy(buffer+1, result, length); + } + result=buffer; + ++length; + } + } + + ucnv_fromUWriteBytes(cnv, (const char *)result, length, + target, targetLimit, + offsets, srcIndex, + pErrorCode); +} + +/* + * target<targetLimit; set error code for overflow + */ +U_CFUNC UBool +ucnv_extInitialMatchFromU(UConverter *cnv, const int32_t *cx, + UChar32 cp, + const UChar **src, const UChar *srcLimit, + char **target, const char *targetLimit, + int32_t **offsets, int32_t srcIndex, + UBool flush, + UErrorCode *pErrorCode) { + uint32_t value = 0; /* initialize output-only param to 0 to silence gcc */ + int32_t match; + + /* try to match */ + match=ucnv_extMatchFromU(cx, cp, + NULL, 0, + *src, (int32_t)(srcLimit-*src), + &value, + cnv->useFallback, flush); + + /* reject a match if the result is a single byte for DBCS-only */ + if( match>=2 && + !(UCNV_EXT_FROM_U_GET_LENGTH(value)==1 && + cnv->sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY) + ) { + /* advance src pointer for the consumed input */ + *src+=match-2; /* remove 2 for the initial code point */ + + /* write result to target */ + ucnv_extWriteFromU(cnv, cx, + value, + target, targetLimit, + offsets, srcIndex, + pErrorCode); + return TRUE; + } else if(match<0) { + /* save state for partial match */ + const UChar *s; + int32_t j; + + /* copy the first code point */ + cnv->preFromUFirstCP=cp; + + /* now copy the newly consumed input */ + s=*src; + match=-match-2; /* remove 2 for the initial code point */ + for(j=0; j<match; ++j) { + cnv->preFromU[j]=*s++; + } + *src=s; /* same as *src=srcLimit; because we reached the end of input */ + cnv->preFromULength=(int8_t)match; + return TRUE; + } else if(match==1) { + /* matched, no mapping but request for <subchar1> */ + cnv->useSubChar1=TRUE; + return FALSE; + } else /* match==0 no match */ { + return FALSE; + } +} + +/* + * Used by ISO 2022 implementation. + * @return number of bytes in *pValue; negative number if fallback; 0 for no mapping + */ +U_CFUNC int32_t +ucnv_extSimpleMatchFromU(const int32_t *cx, + UChar32 cp, uint32_t *pValue, + UBool useFallback) { + uint32_t value; + int32_t match; + + /* try to match */ + match=ucnv_extMatchFromU(cx, + cp, + NULL, 0, + NULL, 0, + &value, + useFallback, TRUE); + if(match>=2) { + /* write result for simple, single-character conversion */ + int32_t length; + int isRoundtrip; + + isRoundtrip=UCNV_EXT_FROM_U_IS_ROUNDTRIP(value); + length=UCNV_EXT_FROM_U_GET_LENGTH(value); + value=(uint32_t)UCNV_EXT_FROM_U_GET_DATA(value); + + if(length<=UCNV_EXT_FROM_U_MAX_DIRECT_LENGTH) { + *pValue=value; + return isRoundtrip ? length : -length; +#if 0 /* not currently used */ + } else if(length==4) { + /* de-serialize a 4-byte result */ + const uint8_t *result=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_BYTES_INDEX, uint8_t)+value; + *pValue= + ((uint32_t)result[0]<<24)| + ((uint32_t)result[1]<<16)| + ((uint32_t)result[2]<<8)| + result[3]; + return isRoundtrip ? 4 : -4; +#endif + } + } + + /* + * return no match because + * - match>1 && resultLength>4: result too long for simple conversion + * - match==1: no match found, <subchar1> preferred + * - match==0: no match found in the first place + * - match<0: partial match, not supported for simple conversion (and flush==TRUE) + */ + return 0; +} + +/* + * continue partial match with new input, requires cnv->preFromUFirstCP>=0 + * never called for simple, single-character conversion + */ +U_CFUNC void +ucnv_extContinueMatchFromU(UConverter *cnv, + UConverterFromUnicodeArgs *pArgs, int32_t srcIndex, + UErrorCode *pErrorCode) { + uint32_t value = 0; /* initialize output-only param to 0 to silence gcc */ + int32_t match; + + match=ucnv_extMatchFromU(cnv->sharedData->mbcs.extIndexes, + cnv->preFromUFirstCP, + cnv->preFromU, cnv->preFromULength, + pArgs->source, (int32_t)(pArgs->sourceLimit-pArgs->source), + &value, + cnv->useFallback, pArgs->flush); + if(match>=2) { + match-=2; /* remove 2 for the initial code point */ + + if(match>=cnv->preFromULength) { + /* advance src pointer for the consumed input */ + pArgs->source+=match-cnv->preFromULength; + cnv->preFromULength=0; + } else { + /* the match did not use all of preFromU[] - keep the rest for replay */ + int32_t length=cnv->preFromULength-match; + u_memmove(cnv->preFromU, cnv->preFromU+match, length); + cnv->preFromULength=(int8_t)-length; + } + + /* finish the partial match */ + cnv->preFromUFirstCP=U_SENTINEL; + + /* write result */ + ucnv_extWriteFromU(cnv, cnv->sharedData->mbcs.extIndexes, + value, + &pArgs->target, pArgs->targetLimit, + &pArgs->offsets, srcIndex, + pErrorCode); + } else if(match<0) { + /* save state for partial match */ + const UChar *s; + int32_t j; + + /* just _append_ the newly consumed input to preFromU[] */ + s=pArgs->source; + match=-match-2; /* remove 2 for the initial code point */ + for(j=cnv->preFromULength; j<match; ++j) { + U_ASSERT(j>=0); + cnv->preFromU[j]=*s++; + } + pArgs->source=s; /* same as *src=srcLimit; because we reached the end of input */ + cnv->preFromULength=(int8_t)match; + } else /* match==0 or 1 */ { + /* + * no match + * + * We need to split the previous input into two parts: + * + * 1. The first code point is unmappable - that's how we got into + * trying the extension data in the first place. + * We need to move it from the preFromU buffer + * to the error buffer, set an error code, + * and prepare the rest of the previous input for 2. + * + * 2. The rest of the previous input must be converted once we + * come back from the callback for the first code point. + * At that time, we have to try again from scratch to convert + * these input characters. + * The replay will be handled by the ucnv.c conversion code. + */ + + if(match==1) { + /* matched, no mapping but request for <subchar1> */ + cnv->useSubChar1=TRUE; + } + + /* move the first code point to the error field */ + cnv->fromUChar32=cnv->preFromUFirstCP; + cnv->preFromUFirstCP=U_SENTINEL; + + /* mark preFromU for replay */ + cnv->preFromULength=-cnv->preFromULength; + + /* set the error code for unassigned */ + *pErrorCode=U_INVALID_CHAR_FOUND; + } +} + +static UBool +extSetUseMapping(UConverterUnicodeSet which, int32_t minLength, uint32_t value) { + if(which==UCNV_ROUNDTRIP_SET) { + // Add only code points for which the roundtrip flag is set. + // Do not add any fallbacks, even if ucnv_fromUnicode() would use them + // (fallbacks from PUA). See the API docs for ucnv_getUnicodeSet(). + // + // By analogy, also do not add "good one-way" mappings. + // + // Do not add entries with reserved bits set. + if(((value&(UCNV_EXT_FROM_U_ROUNDTRIP_FLAG|UCNV_EXT_FROM_U_RESERVED_MASK))!= + UCNV_EXT_FROM_U_ROUNDTRIP_FLAG)) { + return FALSE; + } + } else /* UCNV_ROUNDTRIP_AND_FALLBACK_SET */ { + // Do not add entries with reserved bits set. + if((value&UCNV_EXT_FROM_U_RESERVED_MASK)!=0) { + return FALSE; + } + } + // Do not add <subchar1> entries or other (future?) pseudo-entries + // with an output length of 0. + return UCNV_EXT_FROM_U_GET_LENGTH(value)>=minLength; +} + +static void +ucnv_extGetUnicodeSetString(const UConverterSharedData *sharedData, + const int32_t *cx, + const USetAdder *sa, + UConverterUnicodeSet which, + int32_t minLength, + UChar32 firstCP, + UChar s[UCNV_EXT_MAX_UCHARS], int32_t length, + int32_t sectionIndex, + UErrorCode *pErrorCode) { + const UChar *fromUSectionUChars; + const uint32_t *fromUSectionValues; + + uint32_t value; + int32_t i, count; + + fromUSectionUChars=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_UCHARS_INDEX, UChar)+sectionIndex; + fromUSectionValues=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_VALUES_INDEX, uint32_t)+sectionIndex; + + /* read first pair of the section */ + count=*fromUSectionUChars++; + value=*fromUSectionValues++; + + if(extSetUseMapping(which, minLength, value)) { + if(length==U16_LENGTH(firstCP)) { + /* add the initial code point */ + sa->add(sa->set, firstCP); + } else { + /* add the string so far */ + sa->addString(sa->set, s, length); + } + } + + for(i=0; i<count; ++i) { + /* append this code unit and recurse or add the string */ + s[length]=fromUSectionUChars[i]; + value=fromUSectionValues[i]; + + if(value==0) { + /* no mapping, do nothing */ + } else if(UCNV_EXT_FROM_U_IS_PARTIAL(value)) { + ucnv_extGetUnicodeSetString( + sharedData, cx, sa, which, minLength, + firstCP, s, length+1, + (int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value), + pErrorCode); + } else if(extSetUseMapping(which, minLength, value)) { + sa->addString(sa->set, s, length+1); + } + } +} + +U_CFUNC void +ucnv_extGetUnicodeSet(const UConverterSharedData *sharedData, + const USetAdder *sa, + UConverterUnicodeSet which, + UConverterSetFilter filter, + UErrorCode *pErrorCode) { + const int32_t *cx; + const uint16_t *stage12, *stage3, *ps2, *ps3; + const uint32_t *stage3b; + + uint32_t value; + int32_t st1, stage1Length, st2, st3, minLength; + + UChar s[UCNV_EXT_MAX_UCHARS]; + UChar32 c; + int32_t length; + + cx=sharedData->mbcs.extIndexes; + if(cx==NULL) { + return; + } + + stage12=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_12_INDEX, uint16_t); + stage3=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_3_INDEX, uint16_t); + stage3b=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_3B_INDEX, uint32_t); + + stage1Length=cx[UCNV_EXT_FROM_U_STAGE_1_LENGTH]; + + /* enumerate the from-Unicode trie table */ + c=0; /* keep track of the current code point while enumerating */ + + if(filter==UCNV_SET_FILTER_2022_CN) { + minLength=3; + } else if( sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY || + filter!=UCNV_SET_FILTER_NONE + ) { + /* DBCS-only, ignore single-byte results */ + minLength=2; + } else { + minLength=1; + } + + /* + * the trie enumeration is almost the same as + * in MBCSGetUnicodeSet() for MBCS_OUTPUT_1 + */ + for(st1=0; st1<stage1Length; ++st1) { + st2=stage12[st1]; + if(st2>stage1Length) { + ps2=stage12+st2; + for(st2=0; st2<64; ++st2) { + if((st3=(int32_t)ps2[st2]<<UCNV_EXT_STAGE_2_LEFT_SHIFT)!=0) { + /* read the stage 3 block */ + ps3=stage3+st3; + + do { + value=stage3b[*ps3++]; + if(value==0) { + /* no mapping, do nothing */ + } else if(UCNV_EXT_FROM_U_IS_PARTIAL(value)) { + // Recurse for partial results. + length=0; + U16_APPEND_UNSAFE(s, length, c); + ucnv_extGetUnicodeSetString( + sharedData, cx, sa, which, minLength, + c, s, length, + (int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value), + pErrorCode); + } else if(extSetUseMapping(which, minLength, value)) { + switch(filter) { + case UCNV_SET_FILTER_2022_CN: + if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==3 && UCNV_EXT_FROM_U_GET_DATA(value)<=0x82ffff)) { + continue; + } + break; + case UCNV_SET_FILTER_SJIS: + if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==2 && (value=UCNV_EXT_FROM_U_GET_DATA(value))>=0x8140 && value<=0xeffc)) { + continue; + } + break; + case UCNV_SET_FILTER_GR94DBCS: + if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==2 && + (uint16_t)((value=UCNV_EXT_FROM_U_GET_DATA(value))-0xa1a1)<=(0xfefe - 0xa1a1) && + (uint8_t)(value-0xa1)<=(0xfe - 0xa1))) { + continue; + } + break; + case UCNV_SET_FILTER_HZ: + if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==2 && + (uint16_t)((value=UCNV_EXT_FROM_U_GET_DATA(value))-0xa1a1)<=(0xfdfe - 0xa1a1) && + (uint8_t)(value-0xa1)<=(0xfe - 0xa1))) { + continue; + } + break; + default: + /* + * UCNV_SET_FILTER_NONE, + * or UCNV_SET_FILTER_DBCS_ONLY which is handled via minLength + */ + break; + } + sa->add(sa->set, c); + } + } while((++c&0xf)!=0); + } else { + c+=16; /* empty stage 3 block */ + } + } + } else { + c+=1024; /* empty stage 2 block */ + } + } +} + +#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ diff --git a/thirdparty/icu4c/common/ucnv_ext.h b/thirdparty/icu4c/common/ucnv_ext.h new file mode 100644 index 0000000000..dceea7ef12 --- /dev/null +++ b/thirdparty/icu4c/common/ucnv_ext.h @@ -0,0 +1,481 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 2003-2013, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* file name: ucnv_ext.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2003jun13 +* created by: Markus W. Scherer +* +* Conversion extensions +*/ + +#ifndef __UCNV_EXT_H__ +#define __UCNV_EXT_H__ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_CONVERSION + +#include "unicode/ucnv.h" +#include "ucnv_cnv.h" + +/* + * See icuhtml/design/conversion/conversion_extensions.html + * + * Conversion extensions serve three purposes: + * 1. They support m:n mappings. + * 2. They support extension-only conversion files that are used together + * with the regular conversion data in base files. + * 3. They support mappings with more complicated meta data, + * for example "good one-way" mappings (|4). + * + * A base file may contain an extension table (explicitly requested or + * implicitly generated for m:n mappings), but its extension table is not + * used when an extension-only file is used. + * + * It is an error if a base file contains any regular (not extension) mapping + * from the same sequence as a mapping in the extension file + * because the base mapping would hide the extension mapping. + * + * + * Data for conversion extensions: + * + * One set of data structures per conversion direction (to/from Unicode). + * The data structures are sorted by input units to allow for binary search. + * Input sequences of more than one unit are handled like contraction tables + * in collation: + * The lookup value of a unit points to another table that is to be searched + * for the next unit, recursively. + * + * For conversion from Unicode, the initial code point is looked up in + * a 3-stage trie for speed, + * with an additional table of unique results to save space. + * + * Long output strings are stored in separate arrays, with length and index + * in the lookup tables. + * Output results also include a flag distinguishing roundtrip from + * (reverse) fallback mappings. + * + * Input Unicode strings must not begin or end with unpaired surrogates + * to avoid problems with matches on parts of surrogate pairs. + * + * Mappings from multiple characters (code points or codepage state + * table sequences) must be searched preferring the longest match. + * For this to work and be efficient, the variable-width table must contain + * all mappings that contain prefixes of the multiple characters. + * If an extension table is built on top of a base table in another file + * and a base table entry is a prefix of a multi-character mapping, then + * this is an error. + * + * + * Implementation note: + * + * Currently, the parser and several checks in the code limit the number + * of UChars or bytes in a mapping to + * UCNV_EXT_MAX_UCHARS and UCNV_EXT_MAX_BYTES, respectively, + * which are output value limits in the data structure. + * + * For input, this is not strictly necessary - it is a hard limit only for the + * buffers in UConverter that are used to store partial matches. + * + * Input sequences could otherwise be arbitrarily long if partial matches + * need not be stored (i.e., if a sequence does not span several buffers with too + * many units before the last buffer), although then results would differ + * depending on whether partial matches exceed the limits or not, + * which depends on the pattern of buffer sizes. + * + * + * Data structure: + * + * int32_t indexes[>=32]; + * + * Array of indexes and lengths etc. The length of the array is at least 32. + * The actual length is stored in indexes[0] to be forward compatible. + * + * Each index to another array is the number of bytes from indexes[]. + * Each length of an array is the number of array base units in that array. + * + * Some of the structures may not be present, in which case their indexes + * and lengths are 0. + * + * Usage of indexes[i]: + * [0] length of indexes[] + * + * // to Unicode table + * [1] index of toUTable[] (array of uint32_t) + * [2] length of toUTable[] + * [3] index of toUUChars[] (array of UChar) + * [4] length of toUUChars[] + * + * // from Unicode table, not for the initial code point + * [5] index of fromUTableUChars[] (array of UChar) + * [6] index of fromUTableValues[] (array of uint32_t) + * [7] length of fromUTableUChars[] and fromUTableValues[] + * [8] index of fromUBytes[] (array of char) + * [9] length of fromUBytes[] + * + * // from Unicode trie for initial-code point lookup + * [10] index of fromUStage12[] (combined array of uint16_t for stages 1 & 2) + * [11] length of stage 1 portion of fromUStage12[] + * [12] length of fromUStage12[] + * [13] index of fromUStage3[] (array of uint16_t indexes into fromUStage3b[]) + * [14] length of fromUStage3[] + * [15] index of fromUStage3b[] (array of uint32_t like fromUTableValues[]) + * [16] length of fromUStage3b[] + * + * [17] Bit field containing numbers of bytes: + * 31..24 reserved, 0 + * 23..16 maximum input bytes + * 15.. 8 maximum output bytes + * 7.. 0 maximum bytes per UChar + * + * [18] Bit field containing numbers of UChars: + * 31..24 reserved, 0 + * 23..16 maximum input UChars + * 15.. 8 maximum output UChars + * 7.. 0 maximum UChars per byte + * + * [19] Bit field containing flags: + * (extension table unicodeMask) + * 1 UCNV_HAS_SURROGATES flag for the extension table + * 0 UCNV_HAS_SUPPLEMENTARY flag for the extension table + * + * [20]..[30] reserved, 0 + * [31] number of bytes for the entire extension structure + * [>31] reserved; there are indexes[0] indexes + * + * + * uint32_t toUTable[]; + * + * Array of byte/value pairs for lookups for toUnicode conversion. + * The array is partitioned into sections like collation contraction tables. + * Each section contains one word with the number of following words and + * a default value for when the lookup in this section yields no match. + * + * A section is sorted in ascending order of input bytes, + * allowing for fast linear or binary searches. + * The builder may store entries for a contiguous range of byte values + * (compare difference between the first and last one with count), + * which then allows for direct array access. + * The builder should always do this for the initial table section. + * + * Entries may have 0 values, see below. + * No two entries in a section have the same byte values. + * + * Each uint32_t contains an input byte value in bits 31..24 and the + * corresponding lookup value in bits 23..0. + * Interpret the value as follows: + * if(value==0) { + * no match, see below + * } else if(value<0x1f0000) { + * partial match - use value as index to the next toUTable section + * and match the next unit; (value indexes toUTable[value]) + * } else { + * if(bit 23 set) { + * roundtrip; + * } else { + * fallback; + * } + * unset value bit 23; + * if(value<=0x2fffff) { + * (value-0x1f0000) is a code point; (BMP: value<=0x1fffff) + * } else { + * bits 17..0 (value&0x3ffff) is an index to + * the result UChars in toUUChars[]; (0 indexes toUUChars[0]) + * length of the result=((value>>18)-12); (length=0..19) + * } + * } + * + * The first word in a section contains the number of following words in the + * input byte position (bits 31..24, number=1..0xff). + * The value of the initial word is used when the current byte is not found + * in this section. + * If the value is not 0, then it represents a result as above. + * If the value is 0, then the search has to return a shorter match with an + * earlier default value as the result, or result in "unmappable" even for the + * initial bytes. + * If the value is 0 for the initial toUTable entry, then the initial byte + * does not start any mapping input. + * + * + * UChar toUUChars[]; + * + * Contains toUnicode mapping results, stored as sequences of UChars. + * Indexes and lengths stored in the toUTable[]. + * + * + * UChar fromUTableUChars[]; + * uint32_t fromUTableValues[]; + * + * The fromUTable is split into two arrays, but works otherwise much like + * the toUTable. The array is partitioned into sections like collation + * contraction tables and toUTable. + * A row in the table consists of same-index entries in fromUTableUChars[] + * and fromUTableValues[]. + * + * Interpret a value as follows: + * if(value==0) { + * no match, see below + * } else if(value<=0xffffff) { (bits 31..24 are 0) + * partial match - use value as index to the next fromUTable section + * and match the next unit; (value indexes fromUTable[value]) + * } else { + * if(value==0x80000001) { + * return no mapping, but request for <subchar1>; + * } + * if(bit 31 set) { + * roundtrip (|0); + * } else if(bit 30 set) { + * "good one-way" mapping (|4); -- new in ICU4C 51, _MBCSHeader.version 5.4/4.4 + * } else { + * normal fallback (|1); + * } + * // bit 29 reserved, 0 + * length=(value>>24)&0x1f; (bits 28..24) + * if(length==1..3) { + * bits 23..0 contain 1..3 bytes, padded with 00s on the left; + * } else { + * bits 23..0 (value&0xffffff) is an index to + * the result bytes in fromUBytes[]; (0 indexes fromUBytes[0]) + * } + * } + * + * The first pair in a section contains the number of following pairs in the + * UChar position (16 bits, number=1..0xffff). + * The value of the initial pair is used when the current UChar is not found + * in this section. + * If the value is not 0, then it represents a result as above. + * If the value is 0, then the search has to return a shorter match with an + * earlier default value as the result, or result in "unmappable" even for the + * initial UChars. + * + * If the from Unicode trie is present, then the from Unicode search tables + * are not used for initial code points. + * In this case, the first entries (index 0) in the tables are not used + * (reserved, set to 0) because a value of 0 is used in trie results + * to indicate no mapping. + * + * + * uint16_t fromUStage12[]; + * + * Stages 1 & 2 of a trie that maps an initial code point. + * Indexes in stage 1 are all offset by the length of stage 1 so that the + * same array pointer can be used for both stages. + * If (c>>10)>=(length of stage 1) then c does not start any mapping. + * Same bit distribution as for regular conversion tries. + * + * + * uint16_t fromUStage3[]; + * uint32_t fromUStage3b[]; + * + * Stage 3 of the trie. The first array simply contains indexes to the second, + * which contains words in the same format as fromUTableValues[]. + * Use a stage 3 granularity of 4, which allows for 256k stage 3 entries, + * and 16-bit entries in stage 3 allow for 64k stage 3b entries. + * The stage 3 granularity means that the stage 2 entry needs to be left-shifted. + * + * Two arrays are used because it is expected that more than half of the stage 3 + * entries will be zero. The 16-bit index stage 3 array saves space even + * considering storing a total of 6 bytes per non-zero entry in both arrays + * together. + * Using a stage 3 granularity of >1 diminishes the compactability in that stage + * but provides a larger effective addressing space in stage 2. + * All but the final result stage use 16-bit entries to save space. + * + * fromUStage3b[] contains a zero for "no mapping" at its index 0, + * and may contain UCNV_EXT_FROM_U_SUBCHAR1 at index 1 for "<subchar1> SUB mapping" + * (i.e., "no mapping" with preference for <subchar1> rather than <subchar>), + * and all other items are unique non-zero results. + * + * The default value of a fromUTableValues[] section that is referenced + * _directly_ from a fromUStage3b[] item may also be UCNV_EXT_FROM_U_SUBCHAR1, + * but this value must not occur anywhere else in fromUTableValues[] + * because "no mapping" is always a property of a single code point, + * never of multiple. + * + * + * char fromUBytes[]; + * + * Contains fromUnicode mapping results, stored as sequences of chars. + * Indexes and lengths stored in the fromUTableValues[]. + */ +enum { + UCNV_EXT_INDEXES_LENGTH, /* 0 */ + + UCNV_EXT_TO_U_INDEX, /* 1 */ + UCNV_EXT_TO_U_LENGTH, + UCNV_EXT_TO_U_UCHARS_INDEX, + UCNV_EXT_TO_U_UCHARS_LENGTH, + + UCNV_EXT_FROM_U_UCHARS_INDEX, /* 5 */ + UCNV_EXT_FROM_U_VALUES_INDEX, + UCNV_EXT_FROM_U_LENGTH, + UCNV_EXT_FROM_U_BYTES_INDEX, + UCNV_EXT_FROM_U_BYTES_LENGTH, + + UCNV_EXT_FROM_U_STAGE_12_INDEX, /* 10 */ + UCNV_EXT_FROM_U_STAGE_1_LENGTH, + UCNV_EXT_FROM_U_STAGE_12_LENGTH, + UCNV_EXT_FROM_U_STAGE_3_INDEX, + UCNV_EXT_FROM_U_STAGE_3_LENGTH, + UCNV_EXT_FROM_U_STAGE_3B_INDEX, + UCNV_EXT_FROM_U_STAGE_3B_LENGTH, + + UCNV_EXT_COUNT_BYTES, /* 17 */ + UCNV_EXT_COUNT_UCHARS, + UCNV_EXT_FLAGS, + + UCNV_EXT_RESERVED_INDEX, /* 20, moves with additional indexes */ + + UCNV_EXT_SIZE=31, + UCNV_EXT_INDEXES_MIN_LENGTH=32 +}; + +/* get the pointer to an extension array from indexes[index] */ +#define UCNV_EXT_ARRAY(indexes, index, itemType) \ + ((const itemType *)((const char *)(indexes)+(indexes)[index])) + +#define UCNV_GET_MAX_BYTES_PER_UCHAR(indexes) \ + ((indexes)[UCNV_EXT_COUNT_BYTES]&0xff) + +/* internal API ------------------------------------------------------------- */ + +U_CFUNC UBool +ucnv_extInitialMatchToU(UConverter *cnv, const int32_t *cx, + int32_t firstLength, + const char **src, const char *srcLimit, + UChar **target, const UChar *targetLimit, + int32_t **offsets, int32_t srcIndex, + UBool flush, + UErrorCode *pErrorCode); + +U_CFUNC UChar32 +ucnv_extSimpleMatchToU(const int32_t *cx, + const char *source, int32_t length, + UBool useFallback); + +U_CFUNC void +ucnv_extContinueMatchToU(UConverter *cnv, + UConverterToUnicodeArgs *pArgs, int32_t srcIndex, + UErrorCode *pErrorCode); + + +U_CFUNC UBool +ucnv_extInitialMatchFromU(UConverter *cnv, const int32_t *cx, + UChar32 cp, + const UChar **src, const UChar *srcLimit, + char **target, const char *targetLimit, + int32_t **offsets, int32_t srcIndex, + UBool flush, + UErrorCode *pErrorCode); + +U_CFUNC int32_t +ucnv_extSimpleMatchFromU(const int32_t *cx, + UChar32 cp, uint32_t *pValue, + UBool useFallback); + +U_CFUNC void +ucnv_extContinueMatchFromU(UConverter *cnv, + UConverterFromUnicodeArgs *pArgs, int32_t srcIndex, + UErrorCode *pErrorCode); + +/* + * Add code points and strings to the set according to the extension mappings. + * Limitation on the UConverterSetFilter: + * The filters currently assume that they are used with 1:1 mappings. + * They only apply to single input code points, and then they pass through + * only mappings with single-charset-code results. + * For example, the Shift-JIS filter only works for 2-byte results and tests + * that those 2 bytes are in the JIS X 0208 range of Shift-JIS. + */ +U_CFUNC void +ucnv_extGetUnicodeSet(const UConverterSharedData *sharedData, + const USetAdder *sa, + UConverterUnicodeSet which, + UConverterSetFilter filter, + UErrorCode *pErrorCode); + +/* toUnicode helpers -------------------------------------------------------- */ + +#define UCNV_EXT_TO_U_BYTE_SHIFT 24 +#define UCNV_EXT_TO_U_VALUE_MASK 0xffffff +#define UCNV_EXT_TO_U_MIN_CODE_POINT 0x1f0000 +#define UCNV_EXT_TO_U_MAX_CODE_POINT 0x2fffff +#define UCNV_EXT_TO_U_ROUNDTRIP_FLAG ((uint32_t)1<<23) +#define UCNV_EXT_TO_U_INDEX_MASK 0x3ffff +#define UCNV_EXT_TO_U_LENGTH_SHIFT 18 +#define UCNV_EXT_TO_U_LENGTH_OFFSET 12 + +/* maximum number of indexed UChars */ +#define UCNV_EXT_MAX_UCHARS 19 + +#define UCNV_EXT_TO_U_MAKE_WORD(byte, value) (((uint32_t)(byte)<<UCNV_EXT_TO_U_BYTE_SHIFT)|(value)) + +#define UCNV_EXT_TO_U_GET_BYTE(word) ((word)>>UCNV_EXT_TO_U_BYTE_SHIFT) +#define UCNV_EXT_TO_U_GET_VALUE(word) ((word)&UCNV_EXT_TO_U_VALUE_MASK) + +#define UCNV_EXT_TO_U_IS_PARTIAL(value) ((value)<UCNV_EXT_TO_U_MIN_CODE_POINT) +#define UCNV_EXT_TO_U_GET_PARTIAL_INDEX(value) (value) + +#define UCNV_EXT_TO_U_IS_ROUNDTRIP(value) (((value)&UCNV_EXT_TO_U_ROUNDTRIP_FLAG)!=0) +#define UCNV_EXT_TO_U_MASK_ROUNDTRIP(value) ((value)&~UCNV_EXT_TO_U_ROUNDTRIP_FLAG) + +/* use after masking off the roundtrip flag */ +#define UCNV_EXT_TO_U_IS_CODE_POINT(value) ((value)<=UCNV_EXT_TO_U_MAX_CODE_POINT) +#define UCNV_EXT_TO_U_GET_CODE_POINT(value) ((value)-UCNV_EXT_TO_U_MIN_CODE_POINT) + +#define UCNV_EXT_TO_U_GET_INDEX(value) ((value)&UCNV_EXT_TO_U_INDEX_MASK) +#define UCNV_EXT_TO_U_GET_LENGTH(value) (((value)>>UCNV_EXT_TO_U_LENGTH_SHIFT)-UCNV_EXT_TO_U_LENGTH_OFFSET) + +/* fromUnicode helpers ------------------------------------------------------ */ + +/* most trie constants are shared with ucnvmbcs.h */ + +/* see similar utrie.h UTRIE_INDEX_SHIFT and UTRIE_DATA_GRANULARITY */ +#define UCNV_EXT_STAGE_2_LEFT_SHIFT 2 +#define UCNV_EXT_STAGE_3_GRANULARITY 4 + +/* trie access, returns the stage 3 value=index to stage 3b; s1Index=c>>10 */ +#define UCNV_EXT_FROM_U(stage12, stage3, s1Index, c) \ + (stage3)[ ((int32_t)(stage12)[ (stage12)[s1Index] +(((c)>>4)&0x3f) ]<<UCNV_EXT_STAGE_2_LEFT_SHIFT) +((c)&0xf) ] + +#define UCNV_EXT_FROM_U_LENGTH_SHIFT 24 +#define UCNV_EXT_FROM_U_ROUNDTRIP_FLAG ((uint32_t)1<<31) +#define UCNV_EXT_FROM_U_GOOD_ONE_WAY_FLAG 0x40000000 +#define UCNV_EXT_FROM_U_STATUS_MASK 0xc0000000 +#define UCNV_EXT_FROM_U_RESERVED_MASK 0x20000000 +#define UCNV_EXT_FROM_U_DATA_MASK 0xffffff + +/* special value for "no mapping" to <subchar1> (impossible roundtrip to 0 bytes, value 01) */ +#define UCNV_EXT_FROM_U_SUBCHAR1 0x80000001 + +/* at most 3 bytes in the lower part of the value */ +#define UCNV_EXT_FROM_U_MAX_DIRECT_LENGTH 3 + +/* maximum number of indexed bytes */ +#define UCNV_EXT_MAX_BYTES 0x1f + +#define UCNV_EXT_FROM_U_IS_PARTIAL(value) (((value)>>UCNV_EXT_FROM_U_LENGTH_SHIFT)==0) +#define UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value) (value) + +#define UCNV_EXT_FROM_U_IS_ROUNDTRIP(value) (((value)&UCNV_EXT_FROM_U_ROUNDTRIP_FLAG)!=0) +#define UCNV_EXT_FROM_U_MASK_ROUNDTRIP(value) ((value)&~UCNV_EXT_FROM_U_ROUNDTRIP_FLAG) + +/* get length; masks away all other bits */ +#define UCNV_EXT_FROM_U_GET_LENGTH(value) (int32_t)(((value)>>UCNV_EXT_FROM_U_LENGTH_SHIFT)&UCNV_EXT_MAX_BYTES) + +/* get bytes or bytes index */ +#define UCNV_EXT_FROM_U_GET_DATA(value) ((value)&UCNV_EXT_FROM_U_DATA_MASK) + +#endif + +#endif diff --git a/thirdparty/icu4c/common/ucnv_imp.h b/thirdparty/icu4c/common/ucnv_imp.h new file mode 100644 index 0000000000..c5e6aeb47e --- /dev/null +++ b/thirdparty/icu4c/common/ucnv_imp.h @@ -0,0 +1,139 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 1999-2011, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* +* +* ucnv_imp.h: +* Contains all internal and external data structure definitions +* Created & Maitained by Bertrand A. Damiba +* +* +* +* ATTENTION: +* --------- +* Although the data structures in this file are open and stack allocatable +* we reserve the right to hide them in further releases. +*/ + +#ifndef UCNV_IMP_H +#define UCNV_IMP_H + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_CONVERSION + +#include "unicode/uloc.h" +#include "ucnv_bld.h" + +/* + * Fast check for whether a charset name is "UTF-8". + * This does not recognize all of the variations that ucnv_open() + * and other functions recognize, but it covers most cases. + * @param name const char * charset name + * @return + */ +#define UCNV_FAST_IS_UTF8(name) \ + (((name[0]=='U' ? \ + ( name[1]=='T' && name[2]=='F') : \ + (name[0]=='u' && name[1]=='t' && name[2]=='f'))) \ + && (name[3]=='-' ? \ + (name[4]=='8' && name[5]==0) : \ + (name[3]=='8' && name[4]==0))) + +typedef struct { + char cnvName[UCNV_MAX_CONVERTER_NAME_LENGTH]; + char locale[ULOC_FULLNAME_CAPACITY]; + uint32_t options; +} UConverterNamePieces; + +U_CFUNC UBool +ucnv_canCreateConverter(const char *converterName, UErrorCode *err); + +/* figures out if we need to go to file to read in the data tables. + * @param converterName The name of the converter + * @param err The error code + * @return the newly created converter + */ +U_CAPI UConverter * +ucnv_createConverter(UConverter *myUConverter, const char *converterName, UErrorCode * err); + +/* + * Open a purely algorithmic converter, specified by a type constant. + * @param myUConverter NULL, or pre-allocated UConverter structure to avoid + * a memory allocation + * @param type requested converter type + * @param locale locale parameter, or "" + * @param options converter options bit set (default 0) + * @param err ICU error code, not tested for U_FAILURE on input + * because this is an internal function + * @internal + */ +U_CFUNC UConverter * +ucnv_createAlgorithmicConverter(UConverter *myUConverter, + UConverterType type, + const char *locale, uint32_t options, + UErrorCode *err); + +/* + * Creates a converter from shared data. + * Adopts mySharedConverterData: No matter what happens, the caller must not + * unload mySharedConverterData, except via ucnv_close(return value) + * if this function is successful. + */ +U_CFUNC UConverter * +ucnv_createConverterFromSharedData(UConverter *myUConverter, + UConverterSharedData *mySharedConverterData, + UConverterLoadArgs *pArgs, + UErrorCode *err); + +U_CFUNC UConverter * +ucnv_createConverterFromPackage(const char *packageName, const char *converterName, UErrorCode *err); + +/** + * Load a converter but do not create a UConverter object. + * Simply return the UConverterSharedData. + * Performs alias lookup etc. + * The UConverterNamePieces need not be initialized + * before calling this function. + * The UConverterLoadArgs must be initialized + * before calling this function. + * If the args are passed in, then the pieces must be passed in too. + * In other words, the following combinations are allowed: + * - pieces==NULL && args==NULL + * - pieces!=NULL && args==NULL + * - pieces!=NULL && args!=NULL + * @internal + */ +U_CFUNC UConverterSharedData * +ucnv_loadSharedData(const char *converterName, + UConverterNamePieces *pieces, + UConverterLoadArgs *pArgs, + UErrorCode * err); + +/** + * This may unload the shared data in a thread safe manner. + * This will only unload the data if no other converters are sharing it. + */ +U_CFUNC void +ucnv_unloadSharedDataIfReady(UConverterSharedData *sharedData); + +/** + * This is a thread safe way to increment the reference count. + */ +U_CFUNC void +ucnv_incrementRefCount(UConverterSharedData *sharedData); + +/** + * These are the default error handling callbacks for the charset conversion framework. + * For performance reasons, they are only called to handle an error (not normally called for a reset or close). + */ +#define UCNV_TO_U_DEFAULT_CALLBACK ((UConverterToUCallback) UCNV_TO_U_CALLBACK_SUBSTITUTE) +#define UCNV_FROM_U_DEFAULT_CALLBACK ((UConverterFromUCallback) UCNV_FROM_U_CALLBACK_SUBSTITUTE) + +#endif + +#endif /* _UCNV_IMP */ diff --git a/thirdparty/icu4c/common/ucnv_io.cpp b/thirdparty/icu4c/common/ucnv_io.cpp new file mode 100644 index 0000000000..7a95a3f1e6 --- /dev/null +++ b/thirdparty/icu4c/common/ucnv_io.cpp @@ -0,0 +1,1360 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1999-2015, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* +* +* ucnv_io.cpp: +* initializes global variables and defines functions pertaining to converter +* name resolution aspect of the conversion code. +* +* new implementation: +* +* created on: 1999nov22 +* created by: Markus W. Scherer +* +* Use the binary cnvalias.icu (created from convrtrs.txt) to work +* with aliases for converter names. +* +* Date Name Description +* 11/22/1999 markus Created +* 06/28/2002 grhoten Major overhaul of the converter alias design. +* Now an alias can map to different converters +* depending on the specified standard. +******************************************************************************* +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_CONVERSION + +#include "unicode/ucnv.h" +#include "unicode/udata.h" + +#include "umutex.h" +#include "uarrsort.h" +#include "uassert.h" +#include "udataswp.h" +#include "cstring.h" +#include "cmemory.h" +#include "ucnv_io.h" +#include "uenumimp.h" +#include "ucln_cmn.h" + +/* Format of cnvalias.icu ----------------------------------------------------- + * + * cnvalias.icu is a binary, memory-mappable form of convrtrs.txt. + * This binary form contains several tables. All indexes are to uint16_t + * units, and not to the bytes (uint8_t units). Addressing everything on + * 16-bit boundaries allows us to store more information with small index + * numbers, which are also 16-bit in size. The majority of the table (except + * the string table) are 16-bit numbers. + * + * First there is the size of the Table of Contents (TOC). The TOC + * entries contain the size of each section. In order to find the offset + * you just need to sum up the previous offsets. + * The TOC length and entries are an array of uint32_t values. + * The first section after the TOC starts immediately after the TOC. + * + * 1) This section contains a list of converters. This list contains indexes + * into the string table for the converter name. The index of this list is + * also used by other sections, which are mentioned later on. + * This list is not sorted. + * + * 2) This section contains a list of tags. This list contains indexes + * into the string table for the tag name. The index of this list is + * also used by other sections, which are mentioned later on. + * This list is in priority order of standards. + * + * 3) This section contains a list of sorted unique aliases. This + * list contains indexes into the string table for the alias name. The + * index of this list is also used by other sections, like the 4th section. + * The index for the 3rd and 4th section is used to get the + * alias -> converter name mapping. Section 3 and 4 form a two column table. + * Some of the most significant bits of each index may contain other + * information (see findConverter for details). + * + * 4) This section contains a list of mapped converter names. Consider this + * as a table that maps the 3rd section to the 1st section. This list contains + * indexes into the 1st section. The index of this list is the same index in + * the 3rd section. There is also some extra information in the high bits of + * each converter index in this table. Currently it's only used to say that + * an alias mapped to this converter is ambiguous. See UCNV_CONVERTER_INDEX_MASK + * and UCNV_AMBIGUOUS_ALIAS_MAP_BIT for more information. This section is + * the predigested form of the 5th section so that an alias lookup can be fast. + * + * 5) This section contains a 2D array with indexes to the 6th section. This + * section is the full form of all alias mappings. The column index is the + * index into the converter list (column header). The row index is the index + * to tag list (row header). This 2D array is the top part a 3D array. The + * third dimension is in the 6th section. + * + * 6) This is blob of variable length arrays. Each array starts with a size, + * and is followed by indexes to alias names in the string table. This is + * the third dimension to the section 5. No other section should be referencing + * this section. + * + * 7) Starting in ICU 3.6, this can be a UConverterAliasOptions struct. Its + * presence indicates that a section 9 exists. UConverterAliasOptions specifies + * what type of string normalization is used among other potential things in the + * future. + * + * 8) This is the string table. All strings are indexed on an even address. + * There are two reasons for this. First many chip architectures locate strings + * faster on even address boundaries. Second, since all indexes are 16-bit + * numbers, this string table can be 128KB in size instead of 64KB when we + * only have strings starting on an even address. + * + * 9) When present this is a set of prenormalized strings from section 8. This + * table contains normalized strings with the dashes and spaces stripped out, + * and all strings lowercased. In the future, the options in section 7 may state + * other types of normalization. + * + * Here is the concept of section 5 and 6. It's a 3D cube. Each tag + * has a unique alias among all converters. That same alias can + * be mentioned in other standards on different converters, + * but only one alias per tag can be unique. + * + * + * Converter Names (Usually in TR22 form) + * -------------------------------------------. + * T / /| + * a / / | + * g / / | + * s / / | + * / / | + * ------------------------------------------/ | + * A | | | + * l | | | + * i | | / + * a | | / + * s | | / + * e | | / + * s | |/ + * ------------------------------------------- + * + * + * + * Here is what it really looks like. It's like swiss cheese. + * There are holes. Some converters aren't recognized by + * a standard, or they are really old converters that the + * standard doesn't recognize anymore. + * + * Converter Names (Usually in TR22 form) + * -------------------------------------------. + * T /##########################################/| + * a / # # /# + * g / # ## ## ### # ### ### ### #/ + * s / # ##### #### ## ## #/# + * / ### # # ## # # # ### # # #/## + * ------------------------------------------/# # + * A |### # # ## # # # ### # # #|# # + * l |# # # # # ## # #|# # + * i |# # # # # # #|# + * a |# #|# + * s | #|# + * e + * s + * + */ + +/** + * Used by the UEnumeration API + */ +typedef struct UAliasContext { + uint32_t listOffset; + uint32_t listIdx; +} UAliasContext; + +static const char DATA_NAME[] = "cnvalias"; +static const char DATA_TYPE[] = "icu"; + +static UDataMemory *gAliasData=NULL; +static icu::UInitOnce gAliasDataInitOnce = U_INITONCE_INITIALIZER; + +enum { + tocLengthIndex=0, + converterListIndex=1, + tagListIndex=2, + aliasListIndex=3, + untaggedConvArrayIndex=4, + taggedAliasArrayIndex=5, + taggedAliasListsIndex=6, + tableOptionsIndex=7, + stringTableIndex=8, + normalizedStringTableIndex=9, + offsetsCount, /* length of the swapper's temporary offsets[] */ + minTocLength=8 /* min. tocLength in the file, does not count the tocLengthIndex! */ +}; + +static const UConverterAliasOptions defaultTableOptions = { + UCNV_IO_UNNORMALIZED, + 0 /* containsCnvOptionInfo */ +}; +static UConverterAlias gMainTable; + +#define GET_STRING(idx) (const char *)(gMainTable.stringTable + (idx)) +#define GET_NORMALIZED_STRING(idx) (const char *)(gMainTable.normalizedStringTable + (idx)) + +static UBool U_CALLCONV +isAcceptable(void * /*context*/, + const char * /*type*/, const char * /*name*/, + const UDataInfo *pInfo) { + return (UBool)( + pInfo->size>=20 && + pInfo->isBigEndian==U_IS_BIG_ENDIAN && + pInfo->charsetFamily==U_CHARSET_FAMILY && + pInfo->dataFormat[0]==0x43 && /* dataFormat="CvAl" */ + pInfo->dataFormat[1]==0x76 && + pInfo->dataFormat[2]==0x41 && + pInfo->dataFormat[3]==0x6c && + pInfo->formatVersion[0]==3); +} + +static UBool U_CALLCONV ucnv_io_cleanup(void) +{ + if (gAliasData) { + udata_close(gAliasData); + gAliasData = NULL; + } + gAliasDataInitOnce.reset(); + + uprv_memset(&gMainTable, 0, sizeof(gMainTable)); + + return TRUE; /* Everything was cleaned up */ +} + +static void U_CALLCONV initAliasData(UErrorCode &errCode) { + UDataMemory *data; + const uint16_t *table; + const uint32_t *sectionSizes; + uint32_t tableStart; + uint32_t currOffset; + + ucln_common_registerCleanup(UCLN_COMMON_UCNV_IO, ucnv_io_cleanup); + + U_ASSERT(gAliasData == NULL); + data = udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, &errCode); + if(U_FAILURE(errCode)) { + return; + } + + sectionSizes = (const uint32_t *)udata_getMemory(data); + table = (const uint16_t *)sectionSizes; + + tableStart = sectionSizes[0]; + if (tableStart < minTocLength) { + errCode = U_INVALID_FORMAT_ERROR; + udata_close(data); + return; + } + gAliasData = data; + + gMainTable.converterListSize = sectionSizes[1]; + gMainTable.tagListSize = sectionSizes[2]; + gMainTable.aliasListSize = sectionSizes[3]; + gMainTable.untaggedConvArraySize = sectionSizes[4]; + gMainTable.taggedAliasArraySize = sectionSizes[5]; + gMainTable.taggedAliasListsSize = sectionSizes[6]; + gMainTable.optionTableSize = sectionSizes[7]; + gMainTable.stringTableSize = sectionSizes[8]; + + if (tableStart > 8) { + gMainTable.normalizedStringTableSize = sectionSizes[9]; + } + + currOffset = tableStart * (sizeof(uint32_t)/sizeof(uint16_t)) + (sizeof(uint32_t)/sizeof(uint16_t)); + gMainTable.converterList = table + currOffset; + + currOffset += gMainTable.converterListSize; + gMainTable.tagList = table + currOffset; + + currOffset += gMainTable.tagListSize; + gMainTable.aliasList = table + currOffset; + + currOffset += gMainTable.aliasListSize; + gMainTable.untaggedConvArray = table + currOffset; + + currOffset += gMainTable.untaggedConvArraySize; + gMainTable.taggedAliasArray = table + currOffset; + + /* aliasLists is a 1's based array, but it has a padding character */ + currOffset += gMainTable.taggedAliasArraySize; + gMainTable.taggedAliasLists = table + currOffset; + + currOffset += gMainTable.taggedAliasListsSize; + if (gMainTable.optionTableSize > 0 + && ((const UConverterAliasOptions *)(table + currOffset))->stringNormalizationType < UCNV_IO_NORM_TYPE_COUNT) + { + /* Faster table */ + gMainTable.optionTable = (const UConverterAliasOptions *)(table + currOffset); + } + else { + /* Smaller table, or I can't handle this normalization mode! + Use the original slower table lookup. */ + gMainTable.optionTable = &defaultTableOptions; + } + + currOffset += gMainTable.optionTableSize; + gMainTable.stringTable = table + currOffset; + + currOffset += gMainTable.stringTableSize; + gMainTable.normalizedStringTable = ((gMainTable.optionTable->stringNormalizationType == UCNV_IO_UNNORMALIZED) + ? gMainTable.stringTable : (table + currOffset)); +} + + +static UBool +haveAliasData(UErrorCode *pErrorCode) { + umtx_initOnce(gAliasDataInitOnce, &initAliasData, *pErrorCode); + return U_SUCCESS(*pErrorCode); +} + +static inline UBool +isAlias(const char *alias, UErrorCode *pErrorCode) { + if(alias==NULL) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return FALSE; + } + return (UBool)(*alias!=0); +} + +static uint32_t getTagNumber(const char *tagname) { + if (gMainTable.tagList) { + uint32_t tagNum; + for (tagNum = 0; tagNum < gMainTable.tagListSize; tagNum++) { + if (!uprv_stricmp(GET_STRING(gMainTable.tagList[tagNum]), tagname)) { + return tagNum; + } + } + } + + return UINT32_MAX; +} + +/* character types relevant for ucnv_compareNames() */ +enum { + UIGNORE, + ZERO, + NONZERO, + MINLETTER /* any values from here on are lowercase letter mappings */ +}; + +/* character types for ASCII 00..7F */ +static const uint8_t asciiTypes[128] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, 0, 0, 0, 0, 0, 0, + 0, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0, 0, 0, 0, 0, + 0, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0, 0, 0, 0, 0 +}; + +#define GET_ASCII_TYPE(c) ((int8_t)(c) >= 0 ? asciiTypes[(uint8_t)c] : (uint8_t)UIGNORE) + +/* character types for EBCDIC 80..FF */ +static const uint8_t ebcdicTypes[128] = { + 0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0, 0, 0, 0, 0, 0, + 0, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0, 0, 0, 0, 0, 0, + 0, 0, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0, 0, 0, 0, 0, 0, + 0, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0, 0, 0, 0, 0, 0, + 0, 0, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0, 0, 0, 0, 0, 0, + ZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, NONZERO, 0, 0, 0, 0, 0, 0 +}; + +#define GET_EBCDIC_TYPE(c) ((int8_t)(c) < 0 ? ebcdicTypes[(c)&0x7f] : (uint8_t)UIGNORE) + +#if U_CHARSET_FAMILY==U_ASCII_FAMILY +# define GET_CHAR_TYPE(c) GET_ASCII_TYPE(c) +#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY +# define GET_CHAR_TYPE(c) GET_EBCDIC_TYPE(c) +#else +# error U_CHARSET_FAMILY is not valid +#endif + + +/* @see ucnv_compareNames */ +U_CAPI char * U_CALLCONV +ucnv_io_stripASCIIForCompare(char *dst, const char *name) { + char *dstItr = dst; + uint8_t type, nextType; + char c1; + UBool afterDigit = FALSE; + + while ((c1 = *name++) != 0) { + type = GET_ASCII_TYPE(c1); + switch (type) { + case UIGNORE: + afterDigit = FALSE; + continue; /* ignore all but letters and digits */ + case ZERO: + if (!afterDigit) { + nextType = GET_ASCII_TYPE(*name); + if (nextType == ZERO || nextType == NONZERO) { + continue; /* ignore leading zero before another digit */ + } + } + break; + case NONZERO: + afterDigit = TRUE; + break; + default: + c1 = (char)type; /* lowercased letter */ + afterDigit = FALSE; + break; + } + *dstItr++ = c1; + } + *dstItr = 0; + return dst; +} + +U_CAPI char * U_CALLCONV +ucnv_io_stripEBCDICForCompare(char *dst, const char *name) { + char *dstItr = dst; + uint8_t type, nextType; + char c1; + UBool afterDigit = FALSE; + + while ((c1 = *name++) != 0) { + type = GET_EBCDIC_TYPE(c1); + switch (type) { + case UIGNORE: + afterDigit = FALSE; + continue; /* ignore all but letters and digits */ + case ZERO: + if (!afterDigit) { + nextType = GET_EBCDIC_TYPE(*name); + if (nextType == ZERO || nextType == NONZERO) { + continue; /* ignore leading zero before another digit */ + } + } + break; + case NONZERO: + afterDigit = TRUE; + break; + default: + c1 = (char)type; /* lowercased letter */ + afterDigit = FALSE; + break; + } + *dstItr++ = c1; + } + *dstItr = 0; + return dst; +} + +/** + * Do a fuzzy compare of two converter/alias names. + * The comparison is case-insensitive, ignores leading zeroes if they are not + * followed by further digits, and ignores all but letters and digits. + * Thus the strings "UTF-8", "utf_8", "u*T@f08" and "Utf 8" are exactly equivalent. + * See section 1.4, Charset Alias Matching in Unicode Technical Standard #22 + * at http://www.unicode.org/reports/tr22/ + * + * This is a symmetrical (commutative) operation; order of arguments + * is insignificant. This is an important property for sorting the + * list (when the list is preprocessed into binary form) and for + * performing binary searches on it at run time. + * + * @param name1 a converter name or alias, zero-terminated + * @param name2 a converter name or alias, zero-terminated + * @return 0 if the names match, or a negative value if the name1 + * lexically precedes name2, or a positive value if the name1 + * lexically follows name2. + * + * @see ucnv_io_stripForCompare + */ +U_CAPI int U_EXPORT2 +ucnv_compareNames(const char *name1, const char *name2) { + int rc; + uint8_t type, nextType; + char c1, c2; + UBool afterDigit1 = FALSE, afterDigit2 = FALSE; + + for (;;) { + while ((c1 = *name1++) != 0) { + type = GET_CHAR_TYPE(c1); + switch (type) { + case UIGNORE: + afterDigit1 = FALSE; + continue; /* ignore all but letters and digits */ + case ZERO: + if (!afterDigit1) { + nextType = GET_CHAR_TYPE(*name1); + if (nextType == ZERO || nextType == NONZERO) { + continue; /* ignore leading zero before another digit */ + } + } + break; + case NONZERO: + afterDigit1 = TRUE; + break; + default: + c1 = (char)type; /* lowercased letter */ + afterDigit1 = FALSE; + break; + } + break; /* deliver c1 */ + } + while ((c2 = *name2++) != 0) { + type = GET_CHAR_TYPE(c2); + switch (type) { + case UIGNORE: + afterDigit2 = FALSE; + continue; /* ignore all but letters and digits */ + case ZERO: + if (!afterDigit2) { + nextType = GET_CHAR_TYPE(*name2); + if (nextType == ZERO || nextType == NONZERO) { + continue; /* ignore leading zero before another digit */ + } + } + break; + case NONZERO: + afterDigit2 = TRUE; + break; + default: + c2 = (char)type; /* lowercased letter */ + afterDigit2 = FALSE; + break; + } + break; /* deliver c2 */ + } + + /* If we reach the ends of both strings then they match */ + if ((c1|c2)==0) { + return 0; + } + + /* Case-insensitive comparison */ + rc = (int)(unsigned char)c1 - (int)(unsigned char)c2; + if (rc != 0) { + return rc; + } + } +} + +/* + * search for an alias + * return the converter number index for gConverterList + */ +static inline uint32_t +findConverter(const char *alias, UBool *containsOption, UErrorCode *pErrorCode) { + uint32_t mid, start, limit; + uint32_t lastMid; + int result; + int isUnnormalized = (gMainTable.optionTable->stringNormalizationType == UCNV_IO_UNNORMALIZED); + char strippedName[UCNV_MAX_CONVERTER_NAME_LENGTH]; + + if (!isUnnormalized) { + if (uprv_strlen(alias) >= UCNV_MAX_CONVERTER_NAME_LENGTH) { + *pErrorCode = U_BUFFER_OVERFLOW_ERROR; + return UINT32_MAX; + } + + /* Lower case and remove ignoreable characters. */ + ucnv_io_stripForCompare(strippedName, alias); + alias = strippedName; + } + + /* do a binary search for the alias */ + start = 0; + limit = gMainTable.untaggedConvArraySize; + mid = limit; + lastMid = UINT32_MAX; + + for (;;) { + mid = (uint32_t)((start + limit) / 2); + if (lastMid == mid) { /* Have we moved? */ + break; /* We haven't moved, and it wasn't found. */ + } + lastMid = mid; + if (isUnnormalized) { + result = ucnv_compareNames(alias, GET_STRING(gMainTable.aliasList[mid])); + } + else { + result = uprv_strcmp(alias, GET_NORMALIZED_STRING(gMainTable.aliasList[mid])); + } + + if (result < 0) { + limit = mid; + } else if (result > 0) { + start = mid; + } else { + /* Since the gencnval tool folds duplicates into one entry, + * this alias in gAliasList is unique, but different standards + * may map an alias to different converters. + */ + if (gMainTable.untaggedConvArray[mid] & UCNV_AMBIGUOUS_ALIAS_MAP_BIT) { + *pErrorCode = U_AMBIGUOUS_ALIAS_WARNING; + } + /* State whether the canonical converter name contains an option. + This information is contained in this list in order to maintain backward & forward compatibility. */ + if (containsOption) { + UBool containsCnvOptionInfo = (UBool)gMainTable.optionTable->containsCnvOptionInfo; + *containsOption = (UBool)((containsCnvOptionInfo + && ((gMainTable.untaggedConvArray[mid] & UCNV_CONTAINS_OPTION_BIT) != 0)) + || !containsCnvOptionInfo); + } + return gMainTable.untaggedConvArray[mid] & UCNV_CONVERTER_INDEX_MASK; + } + } + + return UINT32_MAX; +} + +/* + * Is this alias in this list? + * alias and listOffset should be non-NULL. + */ +static inline UBool +isAliasInList(const char *alias, uint32_t listOffset) { + if (listOffset) { + uint32_t currAlias; + uint32_t listCount = gMainTable.taggedAliasLists[listOffset]; + /* +1 to skip listCount */ + const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1; + for (currAlias = 0; currAlias < listCount; currAlias++) { + if (currList[currAlias] + && ucnv_compareNames(alias, GET_STRING(currList[currAlias]))==0) + { + return TRUE; + } + } + } + return FALSE; +} + +/* + * Search for an standard name of an alias (what is the default name + * that this standard uses?) + * return the listOffset for gTaggedAliasLists. If it's 0, + * the it couldn't be found, but the parameters are valid. + */ +static uint32_t +findTaggedAliasListsOffset(const char *alias, const char *standard, UErrorCode *pErrorCode) { + uint32_t idx; + uint32_t listOffset; + uint32_t convNum; + UErrorCode myErr = U_ZERO_ERROR; + uint32_t tagNum = getTagNumber(standard); + + /* Make a quick guess. Hopefully they used a TR22 canonical alias. */ + convNum = findConverter(alias, NULL, &myErr); + if (myErr != U_ZERO_ERROR) { + *pErrorCode = myErr; + } + + if (tagNum < (gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) && convNum < gMainTable.converterListSize) { + listOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + convNum]; + if (listOffset && gMainTable.taggedAliasLists[listOffset + 1]) { + return listOffset; + } + if (myErr == U_AMBIGUOUS_ALIAS_WARNING) { + /* Uh Oh! They used an ambiguous alias. + We have to search the whole swiss cheese starting + at the highest standard affinity. + This may take a while. + */ + for (idx = 0; idx < gMainTable.taggedAliasArraySize; idx++) { + listOffset = gMainTable.taggedAliasArray[idx]; + if (listOffset && isAliasInList(alias, listOffset)) { + uint32_t currTagNum = idx/gMainTable.converterListSize; + uint32_t currConvNum = (idx - currTagNum*gMainTable.converterListSize); + uint32_t tempListOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + currConvNum]; + if (tempListOffset && gMainTable.taggedAliasLists[tempListOffset + 1]) { + return tempListOffset; + } + /* else keep on looking */ + /* We could speed this up by starting on the next row + because an alias is unique per row, right now. + This would change if alias versioning appears. */ + } + } + /* The standard doesn't know about the alias */ + } + /* else no default name */ + return 0; + } + /* else converter or tag not found */ + + return UINT32_MAX; +} + +/* Return the canonical name */ +static uint32_t +findTaggedConverterNum(const char *alias, const char *standard, UErrorCode *pErrorCode) { + uint32_t idx; + uint32_t listOffset; + uint32_t convNum; + UErrorCode myErr = U_ZERO_ERROR; + uint32_t tagNum = getTagNumber(standard); + + /* Make a quick guess. Hopefully they used a TR22 canonical alias. */ + convNum = findConverter(alias, NULL, &myErr); + if (myErr != U_ZERO_ERROR) { + *pErrorCode = myErr; + } + + if (tagNum < (gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) && convNum < gMainTable.converterListSize) { + listOffset = gMainTable.taggedAliasArray[tagNum*gMainTable.converterListSize + convNum]; + if (listOffset && isAliasInList(alias, listOffset)) { + return convNum; + } + if (myErr == U_AMBIGUOUS_ALIAS_WARNING) { + /* Uh Oh! They used an ambiguous alias. + We have to search one slice of the swiss cheese. + We search only in the requested tag, not the whole thing. + This may take a while. + */ + uint32_t convStart = (tagNum)*gMainTable.converterListSize; + uint32_t convLimit = (tagNum+1)*gMainTable.converterListSize; + for (idx = convStart; idx < convLimit; idx++) { + listOffset = gMainTable.taggedAliasArray[idx]; + if (listOffset && isAliasInList(alias, listOffset)) { + return idx-convStart; + } + } + /* The standard doesn't know about the alias */ + } + /* else no canonical name */ + } + /* else converter or tag not found */ + + return UINT32_MAX; +} + +U_CAPI const char * +ucnv_io_getConverterName(const char *alias, UBool *containsOption, UErrorCode *pErrorCode) { + const char *aliasTmp = alias; + int32_t i = 0; + for (i = 0; i < 2; i++) { + if (i == 1) { + /* + * After the first unsuccess converter lookup, check to see if + * the name begins with 'x-'. If it does, strip it off and try + * again. This behaviour is similar to how ICU4J does it. + */ + if (aliasTmp[0] == 'x' && aliasTmp[1] == '-') { + aliasTmp = aliasTmp+2; + } else { + break; + } + } + if(haveAliasData(pErrorCode) && isAlias(aliasTmp, pErrorCode)) { + uint32_t convNum = findConverter(aliasTmp, containsOption, pErrorCode); + if (convNum < gMainTable.converterListSize) { + return GET_STRING(gMainTable.converterList[convNum]); + } + /* else converter not found */ + } else { + break; + } + } + + return NULL; +} + +U_CDECL_BEGIN + + +static int32_t U_CALLCONV +ucnv_io_countStandardAliases(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) { + int32_t value = 0; + UAliasContext *myContext = (UAliasContext *)(enumerator->context); + uint32_t listOffset = myContext->listOffset; + + if (listOffset) { + value = gMainTable.taggedAliasLists[listOffset]; + } + return value; +} + +static const char * U_CALLCONV +ucnv_io_nextStandardAliases(UEnumeration *enumerator, + int32_t* resultLength, + UErrorCode * /*pErrorCode*/) +{ + UAliasContext *myContext = (UAliasContext *)(enumerator->context); + uint32_t listOffset = myContext->listOffset; + + if (listOffset) { + uint32_t listCount = gMainTable.taggedAliasLists[listOffset]; + const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1; + + if (myContext->listIdx < listCount) { + const char *myStr = GET_STRING(currList[myContext->listIdx++]); + if (resultLength) { + *resultLength = (int32_t)uprv_strlen(myStr); + } + return myStr; + } + } + /* Either we accessed a zero length list, or we enumerated too far. */ + if (resultLength) { + *resultLength = 0; + } + return NULL; +} + +static void U_CALLCONV +ucnv_io_resetStandardAliases(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) { + ((UAliasContext *)(enumerator->context))->listIdx = 0; +} + +static void U_CALLCONV +ucnv_io_closeUEnumeration(UEnumeration *enumerator) { + uprv_free(enumerator->context); + uprv_free(enumerator); +} + +U_CDECL_END + +/* Enumerate the aliases for the specified converter and standard tag */ +static const UEnumeration gEnumAliases = { + NULL, + NULL, + ucnv_io_closeUEnumeration, + ucnv_io_countStandardAliases, + uenum_unextDefault, + ucnv_io_nextStandardAliases, + ucnv_io_resetStandardAliases +}; + +U_CAPI UEnumeration * U_EXPORT2 +ucnv_openStandardNames(const char *convName, + const char *standard, + UErrorCode *pErrorCode) +{ + UEnumeration *myEnum = NULL; + if (haveAliasData(pErrorCode) && isAlias(convName, pErrorCode)) { + uint32_t listOffset = findTaggedAliasListsOffset(convName, standard, pErrorCode); + + /* When listOffset == 0, we want to acknowledge that the + converter name and standard are okay, but there + is nothing to enumerate. */ + if (listOffset < gMainTable.taggedAliasListsSize) { + UAliasContext *myContext; + + myEnum = static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration))); + if (myEnum == NULL) { + *pErrorCode = U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + uprv_memcpy(myEnum, &gEnumAliases, sizeof(UEnumeration)); + myContext = static_cast<UAliasContext *>(uprv_malloc(sizeof(UAliasContext))); + if (myContext == NULL) { + *pErrorCode = U_MEMORY_ALLOCATION_ERROR; + uprv_free(myEnum); + return NULL; + } + myContext->listOffset = listOffset; + myContext->listIdx = 0; + myEnum->context = myContext; + } + /* else converter or tag not found */ + } + return myEnum; +} + +static uint16_t +ucnv_io_countAliases(const char *alias, UErrorCode *pErrorCode) { + if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) { + uint32_t convNum = findConverter(alias, NULL, pErrorCode); + if (convNum < gMainTable.converterListSize) { + /* tagListNum - 1 is the ALL tag */ + int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum]; + + if (listOffset) { + return gMainTable.taggedAliasLists[listOffset]; + } + /* else this shouldn't happen. internal program error */ + } + /* else converter not found */ + } + return 0; +} + +static uint16_t +ucnv_io_getAliases(const char *alias, uint16_t start, const char **aliases, UErrorCode *pErrorCode) { + if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) { + uint32_t currAlias; + uint32_t convNum = findConverter(alias, NULL, pErrorCode); + if (convNum < gMainTable.converterListSize) { + /* tagListNum - 1 is the ALL tag */ + int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum]; + + if (listOffset) { + uint32_t listCount = gMainTable.taggedAliasLists[listOffset]; + /* +1 to skip listCount */ + const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1; + + for (currAlias = start; currAlias < listCount; currAlias++) { + aliases[currAlias] = GET_STRING(currList[currAlias]); + } + } + /* else this shouldn't happen. internal program error */ + } + /* else converter not found */ + } + return 0; +} + +static const char * +ucnv_io_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode) { + if(haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) { + uint32_t convNum = findConverter(alias, NULL, pErrorCode); + if (convNum < gMainTable.converterListSize) { + /* tagListNum - 1 is the ALL tag */ + int32_t listOffset = gMainTable.taggedAliasArray[(gMainTable.tagListSize - 1)*gMainTable.converterListSize + convNum]; + + if (listOffset) { + uint32_t listCount = gMainTable.taggedAliasLists[listOffset]; + /* +1 to skip listCount */ + const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1; + + if (n < listCount) { + return GET_STRING(currList[n]); + } + *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR; + } + /* else this shouldn't happen. internal program error */ + } + /* else converter not found */ + } + return NULL; +} + +static uint16_t +ucnv_io_countStandards(UErrorCode *pErrorCode) { + if (haveAliasData(pErrorCode)) { + /* Don't include the empty list */ + return (uint16_t)(gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS); + } + + return 0; +} + +U_CAPI const char * U_EXPORT2 +ucnv_getStandard(uint16_t n, UErrorCode *pErrorCode) { + if (haveAliasData(pErrorCode)) { + if (n < gMainTable.tagListSize - UCNV_NUM_HIDDEN_TAGS) { + return GET_STRING(gMainTable.tagList[n]); + } + *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR; + } + + return NULL; +} + +U_CAPI const char * U_EXPORT2 +ucnv_getStandardName(const char *alias, const char *standard, UErrorCode *pErrorCode) { + if (haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) { + uint32_t listOffset = findTaggedAliasListsOffset(alias, standard, pErrorCode); + + if (0 < listOffset && listOffset < gMainTable.taggedAliasListsSize) { + const uint16_t *currList = gMainTable.taggedAliasLists + listOffset + 1; + + /* Get the preferred name from this list */ + if (currList[0]) { + return GET_STRING(currList[0]); + } + /* else someone screwed up the alias table. */ + /* *pErrorCode = U_INVALID_FORMAT_ERROR */ + } + } + + return NULL; +} + +U_CAPI uint16_t U_EXPORT2 +ucnv_countAliases(const char *alias, UErrorCode *pErrorCode) +{ + return ucnv_io_countAliases(alias, pErrorCode); +} + + +U_CAPI const char* U_EXPORT2 +ucnv_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode) +{ + return ucnv_io_getAlias(alias, n, pErrorCode); +} + +U_CAPI void U_EXPORT2 +ucnv_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCode) +{ + ucnv_io_getAliases(alias, 0, aliases, pErrorCode); +} + +U_CAPI uint16_t U_EXPORT2 +ucnv_countStandards(void) +{ + UErrorCode err = U_ZERO_ERROR; + return ucnv_io_countStandards(&err); +} + +U_CAPI const char * U_EXPORT2 +ucnv_getCanonicalName(const char *alias, const char *standard, UErrorCode *pErrorCode) { + if (haveAliasData(pErrorCode) && isAlias(alias, pErrorCode)) { + uint32_t convNum = findTaggedConverterNum(alias, standard, pErrorCode); + + if (convNum < gMainTable.converterListSize) { + return GET_STRING(gMainTable.converterList[convNum]); + } + } + + return NULL; +} + +U_CDECL_BEGIN + + +static int32_t U_CALLCONV +ucnv_io_countAllConverters(UEnumeration * /*enumerator*/, UErrorCode * /*pErrorCode*/) { + return gMainTable.converterListSize; +} + +static const char * U_CALLCONV +ucnv_io_nextAllConverters(UEnumeration *enumerator, + int32_t* resultLength, + UErrorCode * /*pErrorCode*/) +{ + uint16_t *myContext = (uint16_t *)(enumerator->context); + + if (*myContext < gMainTable.converterListSize) { + const char *myStr = GET_STRING(gMainTable.converterList[(*myContext)++]); + if (resultLength) { + *resultLength = (int32_t)uprv_strlen(myStr); + } + return myStr; + } + /* Either we accessed a zero length list, or we enumerated too far. */ + if (resultLength) { + *resultLength = 0; + } + return NULL; +} + +static void U_CALLCONV +ucnv_io_resetAllConverters(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) { + *((uint16_t *)(enumerator->context)) = 0; +} +U_CDECL_END +static const UEnumeration gEnumAllConverters = { + NULL, + NULL, + ucnv_io_closeUEnumeration, + ucnv_io_countAllConverters, + uenum_unextDefault, + ucnv_io_nextAllConverters, + ucnv_io_resetAllConverters +}; + +U_CAPI UEnumeration * U_EXPORT2 +ucnv_openAllNames(UErrorCode *pErrorCode) { + UEnumeration *myEnum = NULL; + if (haveAliasData(pErrorCode)) { + uint16_t *myContext; + + myEnum = static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration))); + if (myEnum == NULL) { + *pErrorCode = U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + uprv_memcpy(myEnum, &gEnumAllConverters, sizeof(UEnumeration)); + myContext = static_cast<uint16_t *>(uprv_malloc(sizeof(uint16_t))); + if (myContext == NULL) { + *pErrorCode = U_MEMORY_ALLOCATION_ERROR; + uprv_free(myEnum); + return NULL; + } + *myContext = 0; + myEnum->context = myContext; + } + return myEnum; +} + +U_CAPI uint16_t +ucnv_io_countKnownConverters(UErrorCode *pErrorCode) { + if (haveAliasData(pErrorCode)) { + return (uint16_t)gMainTable.converterListSize; + } + return 0; +} + +/* alias table swapping ----------------------------------------------------- */ + +U_CDECL_BEGIN + +typedef char * U_CALLCONV StripForCompareFn(char *dst, const char *name); +U_CDECL_END + + +/* + * row of a temporary array + * + * gets platform-endian charset string indexes and sorting indexes; + * after sorting this array by strings, the actual arrays are permutated + * according to the sorting indexes + */ +typedef struct TempRow { + uint16_t strIndex, sortIndex; +} TempRow; + +typedef struct TempAliasTable { + const char *chars; + TempRow *rows; + uint16_t *resort; + StripForCompareFn *stripForCompare; +} TempAliasTable; + +enum { + STACK_ROW_CAPACITY=500 +}; + +static int32_t U_CALLCONV +io_compareRows(const void *context, const void *left, const void *right) { + char strippedLeft[UCNV_MAX_CONVERTER_NAME_LENGTH], + strippedRight[UCNV_MAX_CONVERTER_NAME_LENGTH]; + + TempAliasTable *tempTable=(TempAliasTable *)context; + const char *chars=tempTable->chars; + + return (int32_t)uprv_strcmp(tempTable->stripForCompare(strippedLeft, chars+2*((const TempRow *)left)->strIndex), + tempTable->stripForCompare(strippedRight, chars+2*((const TempRow *)right)->strIndex)); +} + +U_CAPI int32_t U_EXPORT2 +ucnv_swapAliases(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode) { + const UDataInfo *pInfo; + int32_t headerSize; + + const uint16_t *inTable; + const uint32_t *inSectionSizes; + uint32_t toc[offsetsCount]; + uint32_t offsets[offsetsCount]; /* 16-bit-addressed offsets from inTable/outTable */ + uint32_t i, count, tocLength, topOffset; + + TempRow rows[STACK_ROW_CAPACITY]; + uint16_t resort[STACK_ROW_CAPACITY]; + TempAliasTable tempTable; + + /* udata_swapDataHeader checks the arguments */ + headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return 0; + } + + /* check data format and format version */ + pInfo=(const UDataInfo *)((const char *)inData+4); + if(!( + pInfo->dataFormat[0]==0x43 && /* dataFormat="CvAl" */ + pInfo->dataFormat[1]==0x76 && + pInfo->dataFormat[2]==0x41 && + pInfo->dataFormat[3]==0x6c && + pInfo->formatVersion[0]==3 + )) { + udata_printError(ds, "ucnv_swapAliases(): data format %02x.%02x.%02x.%02x (format version %02x) is not an alias table\n", + pInfo->dataFormat[0], pInfo->dataFormat[1], + pInfo->dataFormat[2], pInfo->dataFormat[3], + pInfo->formatVersion[0]); + *pErrorCode=U_UNSUPPORTED_ERROR; + return 0; + } + + /* an alias table must contain at least the table of contents array */ + if(length>=0 && (length-headerSize)<4*(1+minTocLength)) { + udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n", + length-headerSize); + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + + inSectionSizes=(const uint32_t *)((const char *)inData+headerSize); + inTable=(const uint16_t *)inSectionSizes; + uprv_memset(toc, 0, sizeof(toc)); + toc[tocLengthIndex]=tocLength=ds->readUInt32(inSectionSizes[tocLengthIndex]); + if(tocLength<minTocLength || offsetsCount<=tocLength) { + udata_printError(ds, "ucnv_swapAliases(): table of contents contains unsupported number of sections (%u sections)\n", tocLength); + *pErrorCode=U_INVALID_FORMAT_ERROR; + return 0; + } + + /* read the known part of the table of contents */ + for(i=converterListIndex; i<=tocLength; ++i) { + toc[i]=ds->readUInt32(inSectionSizes[i]); + } + + /* compute offsets */ + uprv_memset(offsets, 0, sizeof(offsets)); + offsets[converterListIndex]=2*(1+tocLength); /* count two 16-bit units per toc entry */ + for(i=tagListIndex; i<=tocLength; ++i) { + offsets[i]=offsets[i-1]+toc[i-1]; + } + + /* compute the overall size of the after-header data, in numbers of 16-bit units */ + topOffset=offsets[i-1]+toc[i-1]; + + if(length>=0) { + uint16_t *outTable; + const uint16_t *p, *p2; + uint16_t *q, *q2; + uint16_t oldIndex; + + if((length-headerSize)<(2*(int32_t)topOffset)) { + udata_printError(ds, "ucnv_swapAliases(): too few bytes (%d after header) for an alias table\n", + length-headerSize); + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + + outTable=(uint16_t *)((char *)outData+headerSize); + + /* swap the entire table of contents */ + ds->swapArray32(ds, inTable, 4*(1+tocLength), outTable, pErrorCode); + + /* swap unormalized strings & normalized strings */ + ds->swapInvChars(ds, inTable+offsets[stringTableIndex], 2*(int32_t)(toc[stringTableIndex]+toc[normalizedStringTableIndex]), + outTable+offsets[stringTableIndex], pErrorCode); + if(U_FAILURE(*pErrorCode)) { + udata_printError(ds, "ucnv_swapAliases().swapInvChars(charset names) failed\n"); + return 0; + } + + if(ds->inCharset==ds->outCharset) { + /* no need to sort, just swap all 16-bit values together */ + ds->swapArray16(ds, + inTable+offsets[converterListIndex], + 2*(int32_t)(offsets[stringTableIndex]-offsets[converterListIndex]), + outTable+offsets[converterListIndex], + pErrorCode); + } else { + /* allocate the temporary table for sorting */ + count=toc[aliasListIndex]; + + tempTable.chars=(const char *)(outTable+offsets[stringTableIndex]); /* sort by outCharset */ + + if(count<=STACK_ROW_CAPACITY) { + tempTable.rows=rows; + tempTable.resort=resort; + } else { + tempTable.rows=(TempRow *)uprv_malloc(count*sizeof(TempRow)+count*2); + if(tempTable.rows==NULL) { + udata_printError(ds, "ucnv_swapAliases(): unable to allocate memory for sorting tables (max length: %u)\n", + count); + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + return 0; + } + tempTable.resort=(uint16_t *)(tempTable.rows+count); + } + + if(ds->outCharset==U_ASCII_FAMILY) { + tempTable.stripForCompare=ucnv_io_stripASCIIForCompare; + } else /* U_EBCDIC_FAMILY */ { + tempTable.stripForCompare=ucnv_io_stripEBCDICForCompare; + } + + /* + * Sort unique aliases+mapped names. + * + * We need to sort the list again by outCharset strings because they + * sort differently for different charset families. + * First we set up a temporary table with the string indexes and + * sorting indexes and sort that. + * Then we permutate and copy/swap the actual values. + */ + p=inTable+offsets[aliasListIndex]; + q=outTable+offsets[aliasListIndex]; + + p2=inTable+offsets[untaggedConvArrayIndex]; + q2=outTable+offsets[untaggedConvArrayIndex]; + + for(i=0; i<count; ++i) { + tempTable.rows[i].strIndex=ds->readUInt16(p[i]); + tempTable.rows[i].sortIndex=(uint16_t)i; + } + + uprv_sortArray(tempTable.rows, (int32_t)count, sizeof(TempRow), + io_compareRows, &tempTable, + FALSE, pErrorCode); + + if(U_SUCCESS(*pErrorCode)) { + /* copy/swap/permutate items */ + if(p!=q) { + for(i=0; i<count; ++i) { + oldIndex=tempTable.rows[i].sortIndex; + ds->swapArray16(ds, p+oldIndex, 2, q+i, pErrorCode); + ds->swapArray16(ds, p2+oldIndex, 2, q2+i, pErrorCode); + } + } else { + /* + * If we swap in-place, then the permutation must use another + * temporary array (tempTable.resort) + * before the results are copied to the outBundle. + */ + uint16_t *r=tempTable.resort; + + for(i=0; i<count; ++i) { + oldIndex=tempTable.rows[i].sortIndex; + ds->swapArray16(ds, p+oldIndex, 2, r+i, pErrorCode); + } + uprv_memcpy(q, r, 2*(size_t)count); + + for(i=0; i<count; ++i) { + oldIndex=tempTable.rows[i].sortIndex; + ds->swapArray16(ds, p2+oldIndex, 2, r+i, pErrorCode); + } + uprv_memcpy(q2, r, 2*(size_t)count); + } + } + + if(tempTable.rows!=rows) { + uprv_free(tempTable.rows); + } + + if(U_FAILURE(*pErrorCode)) { + udata_printError(ds, "ucnv_swapAliases().uprv_sortArray(%u items) failed\n", + count); + return 0; + } + + /* swap remaining 16-bit values */ + ds->swapArray16(ds, + inTable+offsets[converterListIndex], + 2*(int32_t)(offsets[aliasListIndex]-offsets[converterListIndex]), + outTable+offsets[converterListIndex], + pErrorCode); + ds->swapArray16(ds, + inTable+offsets[taggedAliasArrayIndex], + 2*(int32_t)(offsets[stringTableIndex]-offsets[taggedAliasArrayIndex]), + outTable+offsets[taggedAliasArrayIndex], + pErrorCode); + } + } + + return headerSize+2*(int32_t)topOffset; +} + +#endif + + +/* + * Hey, Emacs, please set the following: + * + * Local Variables: + * indent-tabs-mode: nil + * End: + * + */ diff --git a/thirdparty/icu4c/common/ucnv_io.h b/thirdparty/icu4c/common/ucnv_io.h new file mode 100644 index 0000000000..8f2d7b5a02 --- /dev/null +++ b/thirdparty/icu4c/common/ucnv_io.h @@ -0,0 +1,127 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* + ********************************************************************** + * Copyright (C) 1999-2006, International Business Machines + * Corporation and others. All Rights Reserved. + ********************************************************************** + * + * + * ucnv_io.h: + * defines variables and functions pertaining to converter name resolution + * aspect of the conversion code + */ + +#ifndef UCNV_IO_H +#define UCNV_IO_H + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_CONVERSION + +#include "udataswp.h" + +#define UCNV_AMBIGUOUS_ALIAS_MAP_BIT 0x8000 +#define UCNV_CONTAINS_OPTION_BIT 0x4000 +#define UCNV_CONVERTER_INDEX_MASK 0xFFF +#define UCNV_NUM_RESERVED_TAGS 2 +#define UCNV_NUM_HIDDEN_TAGS 1 + +enum { + UCNV_IO_UNNORMALIZED, + UCNV_IO_STD_NORMALIZED, + UCNV_IO_NORM_TYPE_COUNT +}; + +typedef struct { + uint16_t stringNormalizationType; + uint16_t containsCnvOptionInfo; +} UConverterAliasOptions; + +typedef struct UConverterAlias { + const uint16_t *converterList; + const uint16_t *tagList; + const uint16_t *aliasList; + const uint16_t *untaggedConvArray; + const uint16_t *taggedAliasArray; + const uint16_t *taggedAliasLists; + const UConverterAliasOptions *optionTable; + const uint16_t *stringTable; + const uint16_t *normalizedStringTable; + + uint32_t converterListSize; + uint32_t tagListSize; + uint32_t aliasListSize; + uint32_t untaggedConvArraySize; + uint32_t taggedAliasArraySize; + uint32_t taggedAliasListsSize; + uint32_t optionTableSize; + uint32_t stringTableSize; + uint32_t normalizedStringTableSize; +} UConverterAlias; + +/** + * \var ucnv_io_stripForCompare + * Remove the underscores, dashes and spaces from the name, and convert + * the name to lower case. + * @param dst The destination buffer, which is <= the buffer of name. + * @param dst The destination buffer, which is <= the buffer of name. + * @see ucnv_compareNames + * @return the destination buffer. + */ +#if U_CHARSET_FAMILY==U_ASCII_FAMILY +# define ucnv_io_stripForCompare ucnv_io_stripASCIIForCompare +#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY +# define ucnv_io_stripForCompare ucnv_io_stripEBCDICForCompare +#else +# error U_CHARSET_FAMILY is not valid +#endif + +U_CAPI char * U_CALLCONV +ucnv_io_stripASCIIForCompare(char *dst, const char *name); + +U_CAPI char * U_CALLCONV +ucnv_io_stripEBCDICForCompare(char *dst, const char *name); + +/** + * Map a converter alias name to a canonical converter name. + * The alias is searched for case-insensitively, the converter name + * is returned in mixed-case. + * Returns NULL if the alias is not found. + * @param alias The alias name to be searched. + * @param containsOption A return value stating whether the returned converter name contains an option (a comma) + * @param pErrorCode The error code + * @return the converter name in mixed-case, return NULL if the alias is not found. + */ +U_CAPI const char * +ucnv_io_getConverterName(const char *alias, UBool *containsOption, UErrorCode *pErrorCode); + +/** + * Return the number of all known converter names (no aliases). + * @param pErrorCode The error code + * @return the number of all aliases + */ +U_CAPI uint16_t +ucnv_io_countKnownConverters(UErrorCode *pErrorCode); + +/** + * Swap an ICU converter alias table. See implementation for details. + * @internal + */ +U_CAPI int32_t U_EXPORT2 +ucnv_swapAliases(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode); + +#endif + +#endif /* _UCNV_IO */ + +/* + * Hey, Emacs, please set the following: + * + * Local Variables: + * indent-tabs-mode: nil + * End: + * + */ diff --git a/thirdparty/icu4c/common/ucnv_lmb.cpp b/thirdparty/icu4c/common/ucnv_lmb.cpp new file mode 100644 index 0000000000..168392837b --- /dev/null +++ b/thirdparty/icu4c/common/ucnv_lmb.cpp @@ -0,0 +1,1388 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 2000-2016, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* file name: ucnv_lmb.cpp +* encoding: UTF-8 +* tab size: 4 (not used) +* indentation:4 +* +* created on: 2000feb09 +* created by: Brendan Murray +* extensively hacked up by: Jim Snyder-Grant +* +* Modification History: +* +* Date Name Description +* +* 06/20/2000 helena OS/400 port changes; mostly typecast. +* 06/27/2000 Jim Snyder-Grant Deal with partial characters and small buffers. +* Add comments to document LMBCS format and implementation +* restructured order & breakdown of functions +* 06/28/2000 helena Major rewrite for the callback API changes. +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION + +#include "unicode/ucnv_err.h" +#include "unicode/ucnv.h" +#include "unicode/uset.h" +#include "cmemory.h" +#include "cstring.h" +#include "uassert.h" +#include "ucnv_imp.h" +#include "ucnv_bld.h" +#include "ucnv_cnv.h" + +#ifdef EBCDIC_RTL + #include "ascii_a.h" +#endif + +/* + LMBCS + + (Lotus Multi-Byte Character Set) + + LMBCS was invented in the late 1980's and is primarily used in Lotus Notes + databases and in Lotus 1-2-3 files. Programmers who work with the APIs + into these products will sometimes need to deal with strings in this format. + + The code in this file provides an implementation for an ICU converter of + LMBCS to and from Unicode. + + Since the LMBCS character set is only sparsely documented in existing + printed or online material, we have added extensive annotation to this + file to serve as a guide to understanding LMBCS. + + LMBCS was originally designed with these four sometimes-competing design goals: + + -Provide encodings for the characters in 12 existing national standards + (plus a few other characters) + -Minimal memory footprint + -Maximal speed of conversion into the existing national character sets + -No need to track a changing state as you interpret a string. + + + All of the national character sets LMBCS was trying to encode are 'ANSI' + based, in that the bytes from 0x20 - 0x7F are almost exactly the + same common Latin unaccented characters and symbols in all character sets. + + So, in order to help meet the speed & memory design goals, the common ANSI + bytes from 0x20-0x7F are represented by the same single-byte values in LMBCS. + + The general LMBCS code unit is from 1-3 bytes. We can describe the 3 bytes as + follows: + + [G] D1 [D2] + + That is, a sometimes-optional 'group' byte, followed by 1 and sometimes 2 + data bytes. The maximum size of a LMBCS chjaracter is 3 bytes: +*/ +#define ULMBCS_CHARSIZE_MAX 3 +/* + The single-byte values from 0x20 to 0x7F are examples of single D1 bytes. + We often have to figure out if byte values are below or above this, so we + use the ANSI nomenclature 'C0' and 'C1' to refer to the range of control + characters just above & below the common lower-ANSI range */ +#define ULMBCS_C0END 0x1F +#define ULMBCS_C1START 0x80 +/* + Since LMBCS is always dealing in byte units. we create a local type here for + dealing with these units of LMBCS code units: + +*/ +typedef uint8_t ulmbcs_byte_t; + +/* + Most of the values less than 0x20 are reserved in LMBCS to announce + which national character standard is being used for the 'D' bytes. + In the comments we show the common name and the IBM character-set ID + for these character-set announcers: +*/ + +#define ULMBCS_GRP_L1 0x01 /* Latin-1 :ibm-850 */ +#define ULMBCS_GRP_GR 0x02 /* Greek :ibm-851 */ +#define ULMBCS_GRP_HE 0x03 /* Hebrew :ibm-1255 */ +#define ULMBCS_GRP_AR 0x04 /* Arabic :ibm-1256 */ +#define ULMBCS_GRP_RU 0x05 /* Cyrillic :ibm-1251 */ +#define ULMBCS_GRP_L2 0x06 /* Latin-2 :ibm-852 */ +#define ULMBCS_GRP_TR 0x08 /* Turkish :ibm-1254 */ +#define ULMBCS_GRP_TH 0x0B /* Thai :ibm-874 */ +#define ULMBCS_GRP_JA 0x10 /* Japanese :ibm-943 */ +#define ULMBCS_GRP_KO 0x11 /* Korean :ibm-1261 */ +#define ULMBCS_GRP_TW 0x12 /* Chinese SC :ibm-950 */ +#define ULMBCS_GRP_CN 0x13 /* Chinese TC :ibm-1386 */ + +/* + So, the beginning of understanding LMBCS is that IF the first byte of a LMBCS + character is one of those 12 values, you can interpret the remaining bytes of + that character as coming from one of those character sets. Since the lower + ANSI bytes already are represented in single bytes, using one of the character + set announcers is used to announce a character that starts with a byte of + 0x80 or greater. + + The character sets are arranged so that the single byte sets all appear + before the multi-byte character sets. When we need to tell whether a + group byte is for a single byte char set or not we use this define: */ + +#define ULMBCS_DOUBLEOPTGROUP_START 0x10 + +/* +However, to fully understand LMBCS, you must also understand a series of +exceptions & optimizations made in service of the design goals. + +First, those of you who are character set mavens may have noticed that +the 'double-byte' character sets are actually multi-byte character sets +that can have 1 or two bytes, even in the upper-ascii range. To force +each group byte to introduce a fixed-width encoding (to make it faster to +count characters), we use a convention of doubling up on the group byte +to introduce any single-byte character > 0x80 in an otherwise double-byte +character set. So, for example, the LMBCS sequence x10 x10 xAE is the +same as '0xAE' in the Japanese code page 943. + +Next, you will notice that the list of group bytes has some gaps. +These are used in various ways. + +We reserve a few special single byte values for common control +characters. These are in the same place as their ANSI eqivalents for speed. +*/ + +#define ULMBCS_HT 0x09 /* Fixed control char - Horizontal Tab */ +#define ULMBCS_LF 0x0A /* Fixed control char - Line Feed */ +#define ULMBCS_CR 0x0D /* Fixed control char - Carriage Return */ + +/* Then, 1-2-3 reserved a special single-byte character to put at the +beginning of internal 'system' range names: */ + +#define ULMBCS_123SYSTEMRANGE 0x19 + +/* Then we needed a place to put all the other ansi control characters +that must be moved to different values because LMBCS reserves those +values for other purposes. To represent the control characters, we start +with a first byte of 0xF & add the control chaarcter value as the +second byte */ +#define ULMBCS_GRP_CTRL 0x0F + +/* For the C0 controls (less than 0x20), we add 0x20 to preserve the +useful doctrine that any byte less than 0x20 in a LMBCS char must be +the first byte of a character:*/ +#define ULMBCS_CTRLOFFSET 0x20 + +/* +Where to put the characters that aren't part of any of the 12 national +character sets? The first thing that was done, in the earlier years of +LMBCS, was to use up the spaces of the form + + [G] D1, + + where 'G' was one of the single-byte character groups, and + D1 was less than 0x80. These sequences are gathered together + into a Lotus-invented doublebyte character set to represent a + lot of stray values. Internally, in this implementation, we track this + as group '0', as a place to tuck this exceptions list.*/ + +#define ULMBCS_GRP_EXCEPT 0x00 +/* + Finally, as the durability and usefulness of UNICODE became clear, + LOTUS added a new group 0x14 to hold Unicode values not otherwise + represented in LMBCS: */ +#define ULMBCS_GRP_UNICODE 0x14 +/* The two bytes appearing after a 0x14 are intrepreted as UFT-16 BE +(Big-Endian) characters. The exception comes when the UTF16 +representation would have a zero as the second byte. In that case, +'F6' is used in its place, and the bytes are swapped. (This prevents +LMBCS from encoding any Unicode values of the form U+F6xx, but that's OK: +0xF6xx is in the middle of the Private Use Area.)*/ +#define ULMBCS_UNICOMPATZERO 0xF6 + +/* It is also useful in our code to have a constant for the size of +a LMBCS char that holds a literal Unicode value */ +#define ULMBCS_UNICODE_SIZE 3 + +/* +To squish the LMBCS representations down even further, and to make +translations even faster,sometimes the optimization group byte can be dropped +from a LMBCS character. This is decided on a process-by-process basis. The +group byte that is dropped is called the 'optimization group'. + +For Notes, the optimzation group is always 0x1.*/ +#define ULMBCS_DEFAULTOPTGROUP 0x1 +/* For 1-2-3 files, the optimzation group is stored in the header of the 1-2-3 +file. + + In any case, when using ICU, you either pass in the +optimization group as part of the name of the converter (LMBCS-1, LMBCS-2, +etc.). Using plain 'LMBCS' as the name of the converter will give you +LMBCS-1. + + +*** Implementation strategy *** + + +Because of the extensive use of other character sets, the LMBCS converter +keeps a mapping between optimization groups and IBM character sets, so that +ICU converters can be created and used as needed. */ + +/* As you can see, even though any byte below 0x20 could be an optimization +byte, only those at 0x13 or below can map to an actual converter. To limit +some loops and searches, we define a value for that last group converter:*/ + +#define ULMBCS_GRP_LAST 0x13 /* last LMBCS group that has a converter */ + +static const char * const OptGroupByteToCPName[ULMBCS_GRP_LAST + 1] = { + /* 0x0000 */ "lmb-excp", /* internal home for the LOTUS exceptions list */ + /* 0x0001 */ "ibm-850", + /* 0x0002 */ "ibm-851", + /* 0x0003 */ "windows-1255", + /* 0x0004 */ "windows-1256", + /* 0x0005 */ "windows-1251", + /* 0x0006 */ "ibm-852", + /* 0x0007 */ NULL, /* Unused */ + /* 0x0008 */ "windows-1254", + /* 0x0009 */ NULL, /* Control char HT */ + /* 0x000A */ NULL, /* Control char LF */ + /* 0x000B */ "windows-874", + /* 0x000C */ NULL, /* Unused */ + /* 0x000D */ NULL, /* Control char CR */ + /* 0x000E */ NULL, /* Unused */ + /* 0x000F */ NULL, /* Control chars: 0x0F20 + C0/C1 character: algorithmic */ + /* 0x0010 */ "windows-932", + /* 0x0011 */ "windows-949", + /* 0x0012 */ "windows-950", + /* 0x0013 */ "windows-936" + + /* The rest are null, including the 0x0014 Unicode compatibility region + and 0x0019, the 1-2-3 system range control char */ +}; + + +/* That's approximately all the data that's needed for translating + LMBCS to Unicode. + + +However, to translate Unicode to LMBCS, we need some more support. + +That's because there are often more than one possible mappings from a Unicode +code point back into LMBCS. The first thing we do is look up into a table +to figure out if there are more than one possible mappings. This table, +arranged by Unicode values (including ranges) either lists which group +to use, or says that it could go into one or more of the SBCS sets, or +into one or more of the DBCS sets. (If the character exists in both DBCS & +SBCS, the table will place it in the SBCS sets, to make the LMBCS code point +length as small as possible. Here's the two special markers we use to indicate +ambiguous mappings: */ + +#define ULMBCS_AMBIGUOUS_SBCS 0x80 /* could fit in more than one + LMBCS sbcs native encoding + (example: most accented latin) */ +#define ULMBCS_AMBIGUOUS_MBCS 0x81 /* could fit in more than one + LMBCS mbcs native encoding + (example: Unihan) */ +#define ULMBCS_AMBIGUOUS_ALL 0x82 +/* And here's a simple way to see if a group falls in an appropriate range */ +#define ULMBCS_AMBIGUOUS_MATCH(agroup, xgroup) \ + ((((agroup) == ULMBCS_AMBIGUOUS_SBCS) && \ + (xgroup) < ULMBCS_DOUBLEOPTGROUP_START) || \ + (((agroup) == ULMBCS_AMBIGUOUS_MBCS) && \ + (xgroup) >= ULMBCS_DOUBLEOPTGROUP_START)) || \ + ((agroup) == ULMBCS_AMBIGUOUS_ALL) + + +/* The table & some code to use it: */ + + +static const struct _UniLMBCSGrpMap +{ + const UChar uniStartRange; + const UChar uniEndRange; + const ulmbcs_byte_t GrpType; +} UniLMBCSGrpMap[] += +{ + + {0x0001, 0x001F, ULMBCS_GRP_CTRL}, + {0x0080, 0x009F, ULMBCS_GRP_CTRL}, + {0x00A0, 0x00A6, ULMBCS_AMBIGUOUS_SBCS}, + {0x00A7, 0x00A8, ULMBCS_AMBIGUOUS_ALL}, + {0x00A9, 0x00AF, ULMBCS_AMBIGUOUS_SBCS}, + {0x00B0, 0x00B1, ULMBCS_AMBIGUOUS_ALL}, + {0x00B2, 0x00B3, ULMBCS_AMBIGUOUS_SBCS}, + {0x00B4, 0x00B4, ULMBCS_AMBIGUOUS_ALL}, + {0x00B5, 0x00B5, ULMBCS_AMBIGUOUS_SBCS}, + {0x00B6, 0x00B6, ULMBCS_AMBIGUOUS_ALL}, + {0x00B7, 0x00D6, ULMBCS_AMBIGUOUS_SBCS}, + {0x00D7, 0x00D7, ULMBCS_AMBIGUOUS_ALL}, + {0x00D8, 0x00F6, ULMBCS_AMBIGUOUS_SBCS}, + {0x00F7, 0x00F7, ULMBCS_AMBIGUOUS_ALL}, + {0x00F8, 0x01CD, ULMBCS_AMBIGUOUS_SBCS}, + {0x01CE, 0x01CE, ULMBCS_GRP_TW }, + {0x01CF, 0x02B9, ULMBCS_AMBIGUOUS_SBCS}, + {0x02BA, 0x02BA, ULMBCS_GRP_CN}, + {0x02BC, 0x02C8, ULMBCS_AMBIGUOUS_SBCS}, + {0x02C9, 0x02D0, ULMBCS_AMBIGUOUS_MBCS}, + {0x02D8, 0x02DD, ULMBCS_AMBIGUOUS_SBCS}, + {0x0384, 0x0390, ULMBCS_AMBIGUOUS_SBCS}, + {0x0391, 0x03A9, ULMBCS_AMBIGUOUS_ALL}, + {0x03AA, 0x03B0, ULMBCS_AMBIGUOUS_SBCS}, + {0x03B1, 0x03C9, ULMBCS_AMBIGUOUS_ALL}, + {0x03CA, 0x03CE, ULMBCS_AMBIGUOUS_SBCS}, + {0x0400, 0x0400, ULMBCS_GRP_RU}, + {0x0401, 0x0401, ULMBCS_AMBIGUOUS_ALL}, + {0x0402, 0x040F, ULMBCS_GRP_RU}, + {0x0410, 0x0431, ULMBCS_AMBIGUOUS_ALL}, + {0x0432, 0x044E, ULMBCS_GRP_RU}, + {0x044F, 0x044F, ULMBCS_AMBIGUOUS_ALL}, + {0x0450, 0x0491, ULMBCS_GRP_RU}, + {0x05B0, 0x05F2, ULMBCS_GRP_HE}, + {0x060C, 0x06AF, ULMBCS_GRP_AR}, + {0x0E01, 0x0E5B, ULMBCS_GRP_TH}, + {0x200C, 0x200F, ULMBCS_AMBIGUOUS_SBCS}, + {0x2010, 0x2010, ULMBCS_AMBIGUOUS_MBCS}, + {0x2013, 0x2014, ULMBCS_AMBIGUOUS_SBCS}, + {0x2015, 0x2015, ULMBCS_AMBIGUOUS_MBCS}, + {0x2016, 0x2016, ULMBCS_AMBIGUOUS_MBCS}, + {0x2017, 0x2017, ULMBCS_AMBIGUOUS_SBCS}, + {0x2018, 0x2019, ULMBCS_AMBIGUOUS_ALL}, + {0x201A, 0x201B, ULMBCS_AMBIGUOUS_SBCS}, + {0x201C, 0x201D, ULMBCS_AMBIGUOUS_ALL}, + {0x201E, 0x201F, ULMBCS_AMBIGUOUS_SBCS}, + {0x2020, 0x2021, ULMBCS_AMBIGUOUS_ALL}, + {0x2022, 0x2024, ULMBCS_AMBIGUOUS_SBCS}, + {0x2025, 0x2025, ULMBCS_AMBIGUOUS_MBCS}, + {0x2026, 0x2026, ULMBCS_AMBIGUOUS_ALL}, + {0x2027, 0x2027, ULMBCS_GRP_TW}, + {0x2030, 0x2030, ULMBCS_AMBIGUOUS_ALL}, + {0x2031, 0x2031, ULMBCS_AMBIGUOUS_SBCS}, + {0x2032, 0x2033, ULMBCS_AMBIGUOUS_MBCS}, + {0x2035, 0x2035, ULMBCS_AMBIGUOUS_MBCS}, + {0x2039, 0x203A, ULMBCS_AMBIGUOUS_SBCS}, + {0x203B, 0x203B, ULMBCS_AMBIGUOUS_MBCS}, + {0x203C, 0x203C, ULMBCS_GRP_EXCEPT}, + {0x2074, 0x2074, ULMBCS_GRP_KO}, + {0x207F, 0x207F, ULMBCS_GRP_EXCEPT}, + {0x2081, 0x2084, ULMBCS_GRP_KO}, + {0x20A4, 0x20AC, ULMBCS_AMBIGUOUS_SBCS}, + {0x2103, 0x2109, ULMBCS_AMBIGUOUS_MBCS}, + {0x2111, 0x2120, ULMBCS_AMBIGUOUS_SBCS}, + /*zhujin: upgrade, for regressiont test, spr HKIA4YHTSU*/ + {0x2121, 0x2121, ULMBCS_AMBIGUOUS_MBCS}, + {0x2122, 0x2126, ULMBCS_AMBIGUOUS_SBCS}, + {0x212B, 0x212B, ULMBCS_AMBIGUOUS_MBCS}, + {0x2135, 0x2135, ULMBCS_AMBIGUOUS_SBCS}, + {0x2153, 0x2154, ULMBCS_GRP_KO}, + {0x215B, 0x215E, ULMBCS_GRP_EXCEPT}, + {0x2160, 0x2179, ULMBCS_AMBIGUOUS_MBCS}, + {0x2190, 0x2193, ULMBCS_AMBIGUOUS_ALL}, + {0x2194, 0x2195, ULMBCS_GRP_EXCEPT}, + {0x2196, 0x2199, ULMBCS_AMBIGUOUS_MBCS}, + {0x21A8, 0x21A8, ULMBCS_GRP_EXCEPT}, + {0x21B8, 0x21B9, ULMBCS_GRP_CN}, + {0x21D0, 0x21D1, ULMBCS_GRP_EXCEPT}, + {0x21D2, 0x21D2, ULMBCS_AMBIGUOUS_MBCS}, + {0x21D3, 0x21D3, ULMBCS_GRP_EXCEPT}, + {0x21D4, 0x21D4, ULMBCS_AMBIGUOUS_MBCS}, + {0x21D5, 0x21D5, ULMBCS_GRP_EXCEPT}, + {0x21E7, 0x21E7, ULMBCS_GRP_CN}, + {0x2200, 0x2200, ULMBCS_AMBIGUOUS_MBCS}, + {0x2201, 0x2201, ULMBCS_GRP_EXCEPT}, + {0x2202, 0x2202, ULMBCS_AMBIGUOUS_MBCS}, + {0x2203, 0x2203, ULMBCS_AMBIGUOUS_MBCS}, + {0x2204, 0x2206, ULMBCS_GRP_EXCEPT}, + {0x2207, 0x2208, ULMBCS_AMBIGUOUS_MBCS}, + {0x2209, 0x220A, ULMBCS_GRP_EXCEPT}, + {0x220B, 0x220B, ULMBCS_AMBIGUOUS_MBCS}, + {0x220F, 0x2215, ULMBCS_AMBIGUOUS_MBCS}, + {0x2219, 0x2219, ULMBCS_GRP_EXCEPT}, + {0x221A, 0x221A, ULMBCS_AMBIGUOUS_MBCS}, + {0x221B, 0x221C, ULMBCS_GRP_EXCEPT}, + {0x221D, 0x221E, ULMBCS_AMBIGUOUS_MBCS}, + {0x221F, 0x221F, ULMBCS_GRP_EXCEPT}, + {0x2220, 0x2220, ULMBCS_AMBIGUOUS_MBCS}, + {0x2223, 0x222A, ULMBCS_AMBIGUOUS_MBCS}, + {0x222B, 0x223D, ULMBCS_AMBIGUOUS_MBCS}, + {0x2245, 0x2248, ULMBCS_GRP_EXCEPT}, + {0x224C, 0x224C, ULMBCS_GRP_TW}, + {0x2252, 0x2252, ULMBCS_AMBIGUOUS_MBCS}, + {0x2260, 0x2261, ULMBCS_AMBIGUOUS_MBCS}, + {0x2262, 0x2265, ULMBCS_GRP_EXCEPT}, + {0x2266, 0x226F, ULMBCS_AMBIGUOUS_MBCS}, + {0x2282, 0x2283, ULMBCS_AMBIGUOUS_MBCS}, + {0x2284, 0x2285, ULMBCS_GRP_EXCEPT}, + {0x2286, 0x2287, ULMBCS_AMBIGUOUS_MBCS}, + {0x2288, 0x2297, ULMBCS_GRP_EXCEPT}, + {0x2299, 0x22BF, ULMBCS_AMBIGUOUS_MBCS}, + {0x22C0, 0x22C0, ULMBCS_GRP_EXCEPT}, + {0x2310, 0x2310, ULMBCS_GRP_EXCEPT}, + {0x2312, 0x2312, ULMBCS_AMBIGUOUS_MBCS}, + {0x2318, 0x2321, ULMBCS_GRP_EXCEPT}, + {0x2318, 0x2321, ULMBCS_GRP_CN}, + {0x2460, 0x24E9, ULMBCS_AMBIGUOUS_MBCS}, + {0x2500, 0x2500, ULMBCS_AMBIGUOUS_SBCS}, + {0x2501, 0x2501, ULMBCS_AMBIGUOUS_MBCS}, + {0x2502, 0x2502, ULMBCS_AMBIGUOUS_ALL}, + {0x2503, 0x2503, ULMBCS_AMBIGUOUS_MBCS}, + {0x2504, 0x2505, ULMBCS_GRP_TW}, + {0x2506, 0x2665, ULMBCS_AMBIGUOUS_ALL}, + {0x2666, 0x2666, ULMBCS_GRP_EXCEPT}, + {0x2667, 0x2669, ULMBCS_AMBIGUOUS_SBCS}, + {0x266A, 0x266A, ULMBCS_AMBIGUOUS_ALL}, + {0x266B, 0x266C, ULMBCS_AMBIGUOUS_SBCS}, + {0x266D, 0x266D, ULMBCS_AMBIGUOUS_MBCS}, + {0x266E, 0x266E, ULMBCS_AMBIGUOUS_SBCS}, + {0x266F, 0x266F, ULMBCS_GRP_JA}, + {0x2670, 0x2E7F, ULMBCS_AMBIGUOUS_SBCS}, + {0x2E80, 0xF861, ULMBCS_AMBIGUOUS_MBCS}, + {0xF862, 0xF8FF, ULMBCS_GRP_EXCEPT}, + {0xF900, 0xFA2D, ULMBCS_AMBIGUOUS_MBCS}, + {0xFB00, 0xFEFF, ULMBCS_AMBIGUOUS_SBCS}, + {0xFF01, 0xFFEE, ULMBCS_AMBIGUOUS_MBCS}, + {0xFFFF, 0xFFFF, ULMBCS_GRP_UNICODE} +}; + +static ulmbcs_byte_t +FindLMBCSUniRange(UChar uniChar) +{ + const struct _UniLMBCSGrpMap * pTable = UniLMBCSGrpMap; + + while (uniChar > pTable->uniEndRange) + { + pTable++; + } + + if (uniChar >= pTable->uniStartRange) + { + return pTable->GrpType; + } + return ULMBCS_GRP_UNICODE; +} + +/* +We also ask the creator of a converter to send in a preferred locale +that we can use in resolving ambiguous mappings. They send the locale +in as a string, and we map it, if possible, to one of the +LMBCS groups. We use this table, and the associated code, to +do the lookup: */ + +/************************************************** + This table maps locale ID's to LMBCS opt groups. + The default return is group 0x01. Note that for + performance reasons, the table is sorted in + increasing alphabetic order, with the notable + exception of zhTW. This is to force the check + for Traditonal Chinese before dropping back to + Simplified. + + Note too that the Latin-1 groups have been + commented out because it's the default, and + this shortens the table, allowing a serial + search to go quickly. + *************************************************/ + +static const struct _LocaleLMBCSGrpMap +{ + const char *LocaleID; + const ulmbcs_byte_t OptGroup; +} LocaleLMBCSGrpMap[] = +{ + {"ar", ULMBCS_GRP_AR}, + {"be", ULMBCS_GRP_RU}, + {"bg", ULMBCS_GRP_L2}, + /* {"ca", ULMBCS_GRP_L1}, */ + {"cs", ULMBCS_GRP_L2}, + /* {"da", ULMBCS_GRP_L1}, */ + /* {"de", ULMBCS_GRP_L1}, */ + {"el", ULMBCS_GRP_GR}, + /* {"en", ULMBCS_GRP_L1}, */ + /* {"es", ULMBCS_GRP_L1}, */ + /* {"et", ULMBCS_GRP_L1}, */ + /* {"fi", ULMBCS_GRP_L1}, */ + /* {"fr", ULMBCS_GRP_L1}, */ + {"he", ULMBCS_GRP_HE}, + {"hu", ULMBCS_GRP_L2}, + /* {"is", ULMBCS_GRP_L1}, */ + /* {"it", ULMBCS_GRP_L1}, */ + {"iw", ULMBCS_GRP_HE}, + {"ja", ULMBCS_GRP_JA}, + {"ko", ULMBCS_GRP_KO}, + /* {"lt", ULMBCS_GRP_L1}, */ + /* {"lv", ULMBCS_GRP_L1}, */ + {"mk", ULMBCS_GRP_RU}, + /* {"nl", ULMBCS_GRP_L1}, */ + /* {"no", ULMBCS_GRP_L1}, */ + {"pl", ULMBCS_GRP_L2}, + /* {"pt", ULMBCS_GRP_L1}, */ + {"ro", ULMBCS_GRP_L2}, + {"ru", ULMBCS_GRP_RU}, + {"sh", ULMBCS_GRP_L2}, + {"sk", ULMBCS_GRP_L2}, + {"sl", ULMBCS_GRP_L2}, + {"sq", ULMBCS_GRP_L2}, + {"sr", ULMBCS_GRP_RU}, + /* {"sv", ULMBCS_GRP_L1}, */ + {"th", ULMBCS_GRP_TH}, + {"tr", ULMBCS_GRP_TR}, + {"uk", ULMBCS_GRP_RU}, + /* {"vi", ULMBCS_GRP_L1}, */ + {"zhTW", ULMBCS_GRP_TW}, + {"zh", ULMBCS_GRP_CN}, + {NULL, ULMBCS_GRP_L1} +}; + + +static ulmbcs_byte_t +FindLMBCSLocale(const char *LocaleID) +{ + const struct _LocaleLMBCSGrpMap *pTable = LocaleLMBCSGrpMap; + + if ((!LocaleID) || (!*LocaleID)) + { + return 0; + } + + while (pTable->LocaleID) + { + if (*pTable->LocaleID == *LocaleID) /* Check only first char for speed */ + { + /* First char matches - check whole name, for entry-length */ + if (uprv_strncmp(pTable->LocaleID, LocaleID, strlen(pTable->LocaleID)) == 0) + return pTable->OptGroup; + } + else + if (*pTable->LocaleID > *LocaleID) /* Sorted alphabetically - exit */ + break; + pTable++; + } + return ULMBCS_GRP_L1; +} + + +/* + Before we get to the main body of code, here's how we hook up to the rest + of ICU. ICU converters are required to define a structure that includes + some function pointers, and some common data, in the style of a C++ + vtable. There is also room in there for converter-specific data. LMBCS + uses that converter-specific data to keep track of the 12 subconverters + we use, the optimization group, and the group (if any) that matches the + locale. We have one structure instantiated for each of the 12 possible + optimization groups. To avoid typos & to avoid boring the reader, we + put the declarations of these structures and functions into macros. To see + the definitions of these structures, see unicode\ucnv_bld.h +*/ + +typedef struct + { + UConverterSharedData *OptGrpConverter[ULMBCS_GRP_LAST+1]; /* Converter per Opt. grp. */ + uint8_t OptGroup; /* default Opt. grp. for this LMBCS session */ + uint8_t localeConverterIndex; /* reasonable locale match for index */ + } +UConverterDataLMBCS; + +U_CDECL_BEGIN +static void U_CALLCONV _LMBCSClose(UConverter * _this); +U_CDECL_END + +#define DECLARE_LMBCS_DATA(n) \ +static const UConverterImpl _LMBCSImpl##n={\ + UCNV_LMBCS_##n,\ + NULL,NULL,\ + _LMBCSOpen##n,\ + _LMBCSClose,\ + NULL,\ + _LMBCSToUnicodeWithOffsets,\ + _LMBCSToUnicodeWithOffsets,\ + _LMBCSFromUnicode,\ + _LMBCSFromUnicode,\ + NULL,\ + NULL,\ + NULL,\ + NULL,\ + _LMBCSSafeClone,\ + ucnv_getCompleteUnicodeSet,\ + NULL,\ + NULL\ +};\ +static const UConverterStaticData _LMBCSStaticData##n={\ + sizeof(UConverterStaticData),\ + "LMBCS-" #n,\ + 0, UCNV_IBM, UCNV_LMBCS_##n, 1, 3,\ + { 0x3f, 0, 0, 0 },1,FALSE,FALSE,0,0,{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0} \ +};\ +const UConverterSharedData _LMBCSData##n= \ + UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_LMBCSStaticData##n, &_LMBCSImpl##n); + + /* The only function we needed to duplicate 12 times was the 'open' +function, which will do basically the same thing except set a different +optimization group. So, we put the common stuff into a worker function, +and set up another macro to stamp out the 12 open functions:*/ +#define DEFINE_LMBCS_OPEN(n) \ +static void U_CALLCONV \ + _LMBCSOpen##n(UConverter* _this, UConverterLoadArgs* pArgs, UErrorCode* err) \ +{ _LMBCSOpenWorker(_this, pArgs, err, n); } + + + +/* Here's the open worker & the common close function */ +static void +_LMBCSOpenWorker(UConverter* _this, + UConverterLoadArgs *pArgs, + UErrorCode* err, + ulmbcs_byte_t OptGroup) +{ + UConverterDataLMBCS * extraInfo = (UConverterDataLMBCS*)uprv_malloc (sizeof (UConverterDataLMBCS)); + _this->extraInfo = extraInfo; + if(extraInfo != NULL) + { + UConverterNamePieces stackPieces; + UConverterLoadArgs stackArgs= UCNV_LOAD_ARGS_INITIALIZER; + ulmbcs_byte_t i; + + uprv_memset(extraInfo, 0, sizeof(UConverterDataLMBCS)); + + stackArgs.onlyTestIsLoadable = pArgs->onlyTestIsLoadable; + + for (i=0; i <= ULMBCS_GRP_LAST && U_SUCCESS(*err); i++) + { + if(OptGroupByteToCPName[i] != NULL) { + extraInfo->OptGrpConverter[i] = ucnv_loadSharedData(OptGroupByteToCPName[i], &stackPieces, &stackArgs, err); + } + } + + if(U_FAILURE(*err) || pArgs->onlyTestIsLoadable) { + _LMBCSClose(_this); + return; + } + extraInfo->OptGroup = OptGroup; + extraInfo->localeConverterIndex = FindLMBCSLocale(pArgs->locale); + } + else + { + *err = U_MEMORY_ALLOCATION_ERROR; + } +} + +U_CDECL_BEGIN +static void U_CALLCONV +_LMBCSClose(UConverter * _this) +{ + if (_this->extraInfo != NULL) + { + ulmbcs_byte_t Ix; + UConverterDataLMBCS * extraInfo = (UConverterDataLMBCS *) _this->extraInfo; + + for (Ix=0; Ix <= ULMBCS_GRP_LAST; Ix++) + { + if (extraInfo->OptGrpConverter[Ix] != NULL) + ucnv_unloadSharedDataIfReady(extraInfo->OptGrpConverter[Ix]); + } + if (!_this->isExtraLocal) { + uprv_free (_this->extraInfo); + _this->extraInfo = NULL; + } + } +} + +typedef struct LMBCSClone { + UConverter cnv; + UConverterDataLMBCS lmbcs; +} LMBCSClone; + +static UConverter * U_CALLCONV +_LMBCSSafeClone(const UConverter *cnv, + void *stackBuffer, + int32_t *pBufferSize, + UErrorCode *status) { + (void)status; + LMBCSClone *newLMBCS; + UConverterDataLMBCS *extraInfo; + int32_t i; + + if(*pBufferSize<=0) { + *pBufferSize=(int32_t)sizeof(LMBCSClone); + return NULL; + } + + extraInfo=(UConverterDataLMBCS *)cnv->extraInfo; + newLMBCS=(LMBCSClone *)stackBuffer; + + /* ucnv.c/ucnv_safeClone() copied the main UConverter already */ + + uprv_memcpy(&newLMBCS->lmbcs, extraInfo, sizeof(UConverterDataLMBCS)); + + /* share the subconverters */ + for(i = 0; i <= ULMBCS_GRP_LAST; ++i) { + if(extraInfo->OptGrpConverter[i] != NULL) { + ucnv_incrementRefCount(extraInfo->OptGrpConverter[i]); + } + } + + newLMBCS->cnv.extraInfo = &newLMBCS->lmbcs; + newLMBCS->cnv.isExtraLocal = TRUE; + return &newLMBCS->cnv; +} + +/* + * There used to be a _LMBCSGetUnicodeSet() function here (up to svn revision 20117) + * which added all code points except for U+F6xx + * because those cannot be represented in the Unicode group. + * However, it turns out that windows-950 has roundtrips for all of U+F6xx + * which means that LMBCS can convert all Unicode code points after all. + * We now simply use ucnv_getCompleteUnicodeSet(). + * + * This may need to be looked at again as Lotus uses _LMBCSGetUnicodeSet(). (091216) + */ + +/* + Here's the basic helper function that we use when converting from + Unicode to LMBCS, and we suspect that a Unicode character will fit into + one of the 12 groups. The return value is the number of bytes written + starting at pStartLMBCS (if any). +*/ + +static size_t +LMBCSConversionWorker ( + UConverterDataLMBCS * extraInfo, /* subconverters, opt & locale groups */ + ulmbcs_byte_t group, /* The group to try */ + ulmbcs_byte_t * pStartLMBCS, /* where to put the results */ + UChar * pUniChar, /* The input unicode character */ + ulmbcs_byte_t * lastConverterIndex, /* output: track last successful group used */ + UBool * groups_tried /* output: track any unsuccessful groups */ +) +{ + ulmbcs_byte_t * pLMBCS = pStartLMBCS; + UConverterSharedData * xcnv = extraInfo->OptGrpConverter[group]; + + int bytesConverted; + uint32_t value; + ulmbcs_byte_t firstByte; + + U_ASSERT(xcnv); + U_ASSERT(group<ULMBCS_GRP_UNICODE); + + bytesConverted = ucnv_MBCSFromUChar32(xcnv, *pUniChar, &value, FALSE); + + /* get the first result byte */ + if(bytesConverted > 0) { + firstByte = (ulmbcs_byte_t)(value >> ((bytesConverted - 1) * 8)); + } else { + /* most common failure mode is an unassigned character */ + groups_tried[group] = TRUE; + return 0; + } + + *lastConverterIndex = group; + + /* All initial byte values in lower ascii range should have been caught by now, + except with the exception group. + */ + U_ASSERT((firstByte <= ULMBCS_C0END) || (firstByte >= ULMBCS_C1START) || (group == ULMBCS_GRP_EXCEPT)); + + /* use converted data: first write 0, 1 or two group bytes */ + if (group != ULMBCS_GRP_EXCEPT && extraInfo->OptGroup != group) + { + *pLMBCS++ = group; + if (bytesConverted == 1 && group >= ULMBCS_DOUBLEOPTGROUP_START) + { + *pLMBCS++ = group; + } + } + + /* don't emit control chars */ + if ( bytesConverted == 1 && firstByte < 0x20 ) + return 0; + + + /* then move over the converted data */ + switch(bytesConverted) + { + case 4: + *pLMBCS++ = (ulmbcs_byte_t)(value >> 24); + U_FALLTHROUGH; + case 3: + *pLMBCS++ = (ulmbcs_byte_t)(value >> 16); + U_FALLTHROUGH; + case 2: + *pLMBCS++ = (ulmbcs_byte_t)(value >> 8); + U_FALLTHROUGH; + case 1: + *pLMBCS++ = (ulmbcs_byte_t)value; + U_FALLTHROUGH; + default: + /* will never occur */ + break; + } + + return (pLMBCS - pStartLMBCS); +} + + +/* This is a much simpler version of above, when we +know we are writing LMBCS using the Unicode group +*/ +static size_t +LMBCSConvertUni(ulmbcs_byte_t * pLMBCS, UChar uniChar) +{ + /* encode into LMBCS Unicode range */ + uint8_t LowCh = (uint8_t)(uniChar & 0x00FF); + uint8_t HighCh = (uint8_t)(uniChar >> 8); + + *pLMBCS++ = ULMBCS_GRP_UNICODE; + + if (LowCh == 0) + { + *pLMBCS++ = ULMBCS_UNICOMPATZERO; + *pLMBCS++ = HighCh; + } + else + { + *pLMBCS++ = HighCh; + *pLMBCS++ = LowCh; + } + return ULMBCS_UNICODE_SIZE; +} + + + +/* The main Unicode to LMBCS conversion function */ +static void U_CALLCONV +_LMBCSFromUnicode(UConverterFromUnicodeArgs* args, + UErrorCode* err) +{ + ulmbcs_byte_t lastConverterIndex = 0; + UChar uniChar; + ulmbcs_byte_t LMBCS[ULMBCS_CHARSIZE_MAX]; + ulmbcs_byte_t * pLMBCS; + int32_t bytes_written; + UBool groups_tried[ULMBCS_GRP_LAST+1]; + UConverterDataLMBCS * extraInfo = (UConverterDataLMBCS *) args->converter->extraInfo; + int sourceIndex = 0; + + /* Basic strategy: attempt to fill in local LMBCS 1-char buffer.(LMBCS) + If that succeeds, see if it will all fit into the target & copy it over + if it does. + + We try conversions in the following order: + + 1. Single-byte ascii & special fixed control chars (&null) + 2. Look up group in table & try that (could be + A) Unicode group + B) control group, + C) national encoding, + or ambiguous SBCS or MBCS group (on to step 4...) + + 3. If its ambiguous, try this order: + A) The optimization group + B) The locale group + C) The last group that succeeded with this string. + D) every other group that's relevent (single or double) + E) If its single-byte ambiguous, try the exceptions group + + 4. And as a grand fallback: Unicode + */ + + /*Fix for SPR#DJOE66JFN3 (Lotus)*/ + ulmbcs_byte_t OldConverterIndex = 0; + + while (args->source < args->sourceLimit && !U_FAILURE(*err)) + { + /*Fix for SPR#DJOE66JFN3 (Lotus)*/ + OldConverterIndex = extraInfo->localeConverterIndex; + + if (args->target >= args->targetLimit) + { + *err = U_BUFFER_OVERFLOW_ERROR; + break; + } + uniChar = *(args->source); + bytes_written = 0; + pLMBCS = LMBCS; + + /* check cases in rough order of how common they are, for speed */ + + /* single byte matches: strategy 1 */ + /*Fix for SPR#DJOE66JFN3 (Lotus)*/ + if((uniChar>=0x80) && (uniChar<=0xff) + /*Fix for SPR#JUYA6XAERU and TSAO7GL5NK (Lotus)*/ &&(uniChar!=0xB1) &&(uniChar!=0xD7) &&(uniChar!=0xF7) + &&(uniChar!=0xB0) &&(uniChar!=0xB4) &&(uniChar!=0xB6) &&(uniChar!=0xA7) &&(uniChar!=0xA8)) + { + extraInfo->localeConverterIndex = ULMBCS_GRP_L1; + } + if (((uniChar > ULMBCS_C0END) && (uniChar < ULMBCS_C1START)) || + uniChar == 0 || uniChar == ULMBCS_HT || uniChar == ULMBCS_CR || + uniChar == ULMBCS_LF || uniChar == ULMBCS_123SYSTEMRANGE + ) + { + *pLMBCS++ = (ulmbcs_byte_t ) uniChar; + bytes_written = 1; + } + + + if (!bytes_written) + { + /* Check by UNICODE range (Strategy 2) */ + ulmbcs_byte_t group = FindLMBCSUniRange(uniChar); + + if (group == ULMBCS_GRP_UNICODE) /* (Strategy 2A) */ + { + pLMBCS += LMBCSConvertUni(pLMBCS,uniChar); + + bytes_written = (int32_t)(pLMBCS - LMBCS); + } + else if (group == ULMBCS_GRP_CTRL) /* (Strategy 2B) */ + { + /* Handle control characters here */ + if (uniChar <= ULMBCS_C0END) + { + *pLMBCS++ = ULMBCS_GRP_CTRL; + *pLMBCS++ = (ulmbcs_byte_t)(ULMBCS_CTRLOFFSET + uniChar); + } + else if (uniChar >= ULMBCS_C1START && uniChar <= ULMBCS_C1START + ULMBCS_CTRLOFFSET) + { + *pLMBCS++ = ULMBCS_GRP_CTRL; + *pLMBCS++ = (ulmbcs_byte_t ) (uniChar & 0x00FF); + } + bytes_written = (int32_t)(pLMBCS - LMBCS); + } + else if (group < ULMBCS_GRP_UNICODE) /* (Strategy 2C) */ + { + /* a specific converter has been identified - use it */ + bytes_written = (int32_t)LMBCSConversionWorker ( + extraInfo, group, pLMBCS, &uniChar, + &lastConverterIndex, groups_tried); + } + if (!bytes_written) /* the ambiguous group cases (Strategy 3) */ + { + uprv_memset(groups_tried, 0, sizeof(groups_tried)); + + /* check for non-default optimization group (Strategy 3A )*/ + if ((extraInfo->OptGroup != 1) && (ULMBCS_AMBIGUOUS_MATCH(group, extraInfo->OptGroup))) + { + /*zhujin: upgrade, merge #39299 here (Lotus) */ + /*To make R5 compatible translation, look for exceptional group first for non-DBCS*/ + + if(extraInfo->localeConverterIndex < ULMBCS_DOUBLEOPTGROUP_START) + { + bytes_written = (int32_t)LMBCSConversionWorker (extraInfo, + ULMBCS_GRP_L1, pLMBCS, &uniChar, + &lastConverterIndex, groups_tried); + + if(!bytes_written) + { + bytes_written = (int32_t)LMBCSConversionWorker (extraInfo, + ULMBCS_GRP_EXCEPT, pLMBCS, &uniChar, + &lastConverterIndex, groups_tried); + } + if(!bytes_written) + { + bytes_written = (int32_t)LMBCSConversionWorker (extraInfo, + extraInfo->localeConverterIndex, pLMBCS, &uniChar, + &lastConverterIndex, groups_tried); + } + } + else + { + bytes_written = (int32_t)LMBCSConversionWorker (extraInfo, + extraInfo->localeConverterIndex, pLMBCS, &uniChar, + &lastConverterIndex, groups_tried); + } + } + /* check for locale optimization group (Strategy 3B) */ + if (!bytes_written && (extraInfo->localeConverterIndex) && (ULMBCS_AMBIGUOUS_MATCH(group, extraInfo->localeConverterIndex))) + { + bytes_written = (int32_t)LMBCSConversionWorker (extraInfo, + extraInfo->localeConverterIndex, pLMBCS, &uniChar, &lastConverterIndex, groups_tried); + } + /* check for last optimization group used for this string (Strategy 3C) */ + if (!bytes_written && (lastConverterIndex) && (ULMBCS_AMBIGUOUS_MATCH(group, lastConverterIndex))) + { + bytes_written = (int32_t)LMBCSConversionWorker (extraInfo, + lastConverterIndex, pLMBCS, &uniChar, &lastConverterIndex, groups_tried); + } + if (!bytes_written) + { + /* just check every possible matching converter (Strategy 3D) */ + ulmbcs_byte_t grp_start; + ulmbcs_byte_t grp_end; + ulmbcs_byte_t grp_ix; + grp_start = (ulmbcs_byte_t)((group == ULMBCS_AMBIGUOUS_MBCS) + ? ULMBCS_DOUBLEOPTGROUP_START + : ULMBCS_GRP_L1); + grp_end = (ulmbcs_byte_t)((group == ULMBCS_AMBIGUOUS_MBCS) + ? ULMBCS_GRP_LAST + : ULMBCS_GRP_TH); + if(group == ULMBCS_AMBIGUOUS_ALL) + { + grp_start = ULMBCS_GRP_L1; + grp_end = ULMBCS_GRP_LAST; + } + for (grp_ix = grp_start; + grp_ix <= grp_end && !bytes_written; + grp_ix++) + { + if (extraInfo->OptGrpConverter [grp_ix] && !groups_tried [grp_ix]) + { + bytes_written = (int32_t)LMBCSConversionWorker (extraInfo, + grp_ix, pLMBCS, &uniChar, + &lastConverterIndex, groups_tried); + } + } + /* a final conversion fallback to the exceptions group if its likely + to be single byte (Strategy 3E) */ + if (!bytes_written && grp_start == ULMBCS_GRP_L1) + { + bytes_written = (int32_t)LMBCSConversionWorker (extraInfo, + ULMBCS_GRP_EXCEPT, pLMBCS, &uniChar, + &lastConverterIndex, groups_tried); + } + } + /* all of our other strategies failed. Fallback to Unicode. (Strategy 4)*/ + if (!bytes_written) + { + + pLMBCS += LMBCSConvertUni(pLMBCS, uniChar); + bytes_written = (int32_t)(pLMBCS - LMBCS); + } + } + } + + /* we have a translation. increment source and write as much as posible to target */ + args->source++; + pLMBCS = LMBCS; + while (args->target < args->targetLimit && bytes_written--) + { + *(args->target)++ = *pLMBCS++; + if (args->offsets) + { + *(args->offsets)++ = sourceIndex; + } + } + sourceIndex++; + if (bytes_written > 0) + { + /* write any bytes that didn't fit in target to the error buffer, + common code will move this to target if we get called back with + enough target room + */ + uint8_t * pErrorBuffer = args->converter->charErrorBuffer; + *err = U_BUFFER_OVERFLOW_ERROR; + args->converter->charErrorBufferLength = (int8_t)bytes_written; + while (bytes_written--) + { + *pErrorBuffer++ = *pLMBCS++; + } + } + /*Fix for SPR#DJOE66JFN3 (Lotus)*/ + extraInfo->localeConverterIndex = OldConverterIndex; + } +} + + +/* Now, the Unicode from LMBCS section */ + + +/* A function to call when we are looking at the Unicode group byte in LMBCS */ +static UChar +GetUniFromLMBCSUni(char const ** ppLMBCSin) /* Called with LMBCS-style Unicode byte stream */ +{ + uint8_t HighCh = *(*ppLMBCSin)++; /* Big-endian Unicode in LMBCS compatibility group*/ + uint8_t LowCh = *(*ppLMBCSin)++; + + if (HighCh == ULMBCS_UNICOMPATZERO ) + { + HighCh = LowCh; + LowCh = 0; /* zero-byte in LSB special character */ + } + return (UChar)((HighCh << 8) | LowCh); +} + + + +/* CHECK_SOURCE_LIMIT: Helper macro to verify that there are at least'index' + bytes left in source up to sourceLimit.Errors appropriately if not. + If we reach the limit, then update the source pointer to there to consume + all input as required by ICU converter semantics. +*/ + +#define CHECK_SOURCE_LIMIT(index) UPRV_BLOCK_MACRO_BEGIN { \ + if (args->source+index > args->sourceLimit) { \ + *err = U_TRUNCATED_CHAR_FOUND; \ + args->source = args->sourceLimit; \ + return 0xffff; \ + } \ +} UPRV_BLOCK_MACRO_END + +/* Return the Unicode representation for the current LMBCS character */ + +static UChar32 U_CALLCONV +_LMBCSGetNextUCharWorker(UConverterToUnicodeArgs* args, + UErrorCode* err) +{ + UChar32 uniChar = 0; /* an output UNICODE char */ + ulmbcs_byte_t CurByte; /* A byte from the input stream */ + + /* error check */ + if (args->source >= args->sourceLimit) + { + *err = U_ILLEGAL_ARGUMENT_ERROR; + return 0xffff; + } + /* Grab first byte & save address for error recovery */ + CurByte = *((ulmbcs_byte_t *) (args->source++)); + + /* + * at entry of each if clause: + * 1. 'CurByte' points at the first byte of a LMBCS character + * 2. '*source'points to the next byte of the source stream after 'CurByte' + * + * the job of each if clause is: + * 1. set '*source' to point at the beginning of next char (nop if LMBCS char is only 1 byte) + * 2. set 'uniChar' up with the right Unicode value, or set 'err' appropriately + */ + + /* First lets check the simple fixed values. */ + + if(((CurByte > ULMBCS_C0END) && (CurByte < ULMBCS_C1START)) /* ascii range */ + || (CurByte == 0) + || CurByte == ULMBCS_HT || CurByte == ULMBCS_CR + || CurByte == ULMBCS_LF || CurByte == ULMBCS_123SYSTEMRANGE) + { + uniChar = CurByte; + } + else + { + UConverterDataLMBCS * extraInfo; + ulmbcs_byte_t group; + UConverterSharedData *cnv; + + if (CurByte == ULMBCS_GRP_CTRL) /* Control character group - no opt group update */ + { + ulmbcs_byte_t C0C1byte; + CHECK_SOURCE_LIMIT(1); + C0C1byte = *(args->source)++; + uniChar = (C0C1byte < ULMBCS_C1START) ? C0C1byte - ULMBCS_CTRLOFFSET : C0C1byte; + } + else + if (CurByte == ULMBCS_GRP_UNICODE) /* Unicode compatibility group: BigEndian UTF16 */ + { + CHECK_SOURCE_LIMIT(2); + + /* don't check for error indicators fffe/ffff below */ + return GetUniFromLMBCSUni(&(args->source)); + } + else if (CurByte <= ULMBCS_CTRLOFFSET) + { + group = CurByte; /* group byte is in the source */ + extraInfo = (UConverterDataLMBCS *) args->converter->extraInfo; + if (group > ULMBCS_GRP_LAST || (cnv = extraInfo->OptGrpConverter[group]) == NULL) + { + /* this is not a valid group byte - no converter*/ + *err = U_INVALID_CHAR_FOUND; + } + else if (group >= ULMBCS_DOUBLEOPTGROUP_START) /* double byte conversion */ + { + + CHECK_SOURCE_LIMIT(2); + + /* check for LMBCS doubled-group-byte case */ + if (*args->source == group) { + /* single byte */ + ++args->source; + uniChar = ucnv_MBCSSimpleGetNextUChar(cnv, args->source, 1, FALSE); + ++args->source; + } else { + /* double byte */ + uniChar = ucnv_MBCSSimpleGetNextUChar(cnv, args->source, 2, FALSE); + args->source += 2; + } + } + else { /* single byte conversion */ + CHECK_SOURCE_LIMIT(1); + CurByte = *(args->source)++; + + if (CurByte >= ULMBCS_C1START) + { + uniChar = _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(cnv, CurByte); + } + else + { + /* The non-optimizable oddballs where there is an explicit byte + * AND the second byte is not in the upper ascii range + */ + char bytes[2]; + + extraInfo = (UConverterDataLMBCS *) args->converter->extraInfo; + cnv = extraInfo->OptGrpConverter [ULMBCS_GRP_EXCEPT]; + + /* Lookup value must include opt group */ + bytes[0] = group; + bytes[1] = CurByte; + uniChar = ucnv_MBCSSimpleGetNextUChar(cnv, bytes, 2, FALSE); + } + } + } + else if (CurByte >= ULMBCS_C1START) /* group byte is implicit */ + { + extraInfo = (UConverterDataLMBCS *) args->converter->extraInfo; + group = extraInfo->OptGroup; + cnv = extraInfo->OptGrpConverter[group]; + if (group >= ULMBCS_DOUBLEOPTGROUP_START) /* double byte conversion */ + { + if (!ucnv_MBCSIsLeadByte(cnv, CurByte)) + { + CHECK_SOURCE_LIMIT(0); + + /* let the MBCS conversion consume CurByte again */ + uniChar = ucnv_MBCSSimpleGetNextUChar(cnv, args->source - 1, 1, FALSE); + } + else + { + CHECK_SOURCE_LIMIT(1); + /* let the MBCS conversion consume CurByte again */ + uniChar = ucnv_MBCSSimpleGetNextUChar(cnv, args->source - 1, 2, FALSE); + ++args->source; + } + } + else /* single byte conversion */ + { + uniChar = _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(cnv, CurByte); + } + } + } + return uniChar; +} + + +/* The exported function that converts lmbcs to one or more + UChars - currently UTF-16 +*/ +static void U_CALLCONV +_LMBCSToUnicodeWithOffsets(UConverterToUnicodeArgs* args, + UErrorCode* err) +{ + char LMBCS [ULMBCS_CHARSIZE_MAX]; + UChar uniChar; /* one output UNICODE char */ + const char * saveSource; /* beginning of current code point */ + const char * pStartLMBCS = args->source; /* beginning of whole string */ + const char * errSource = NULL; /* pointer to actual input in case an error occurs */ + int8_t savebytes = 0; + + /* Process from source to limit, or until error */ + while (U_SUCCESS(*err) && args->sourceLimit > args->source && args->targetLimit > args->target) + { + saveSource = args->source; /* beginning of current code point */ + + if (args->converter->toULength) /* reassemble char from previous call */ + { + const char *saveSourceLimit; + size_t size_old = args->converter->toULength; + + /* limit from source is either remainder of temp buffer, or user limit on source */ + size_t size_new_maybe_1 = sizeof(LMBCS) - size_old; + size_t size_new_maybe_2 = args->sourceLimit - args->source; + size_t size_new = (size_new_maybe_1 < size_new_maybe_2) ? size_new_maybe_1 : size_new_maybe_2; + + + uprv_memcpy(LMBCS, args->converter->toUBytes, size_old); + uprv_memcpy(LMBCS + size_old, args->source, size_new); + saveSourceLimit = args->sourceLimit; + args->source = errSource = LMBCS; + args->sourceLimit = LMBCS+size_old+size_new; + savebytes = (int8_t)(size_old+size_new); + uniChar = (UChar) _LMBCSGetNextUCharWorker(args, err); + args->source = saveSource + ((args->source - LMBCS) - size_old); + args->sourceLimit = saveSourceLimit; + + if (*err == U_TRUNCATED_CHAR_FOUND) + { + /* evil special case: source buffers so small a char spans more than 2 buffers */ + args->converter->toULength = savebytes; + uprv_memcpy(args->converter->toUBytes, LMBCS, savebytes); + args->source = args->sourceLimit; + *err = U_ZERO_ERROR; + return; + } + else + { + /* clear the partial-char marker */ + args->converter->toULength = 0; + } + } + else + { + errSource = saveSource; + uniChar = (UChar) _LMBCSGetNextUCharWorker(args, err); + savebytes = (int8_t)(args->source - saveSource); + } + if (U_SUCCESS(*err)) + { + if (uniChar < 0xfffe) + { + *(args->target)++ = uniChar; + if(args->offsets) + { + *(args->offsets)++ = (int32_t)(saveSource - pStartLMBCS); + } + } + else if (uniChar == 0xfffe) + { + *err = U_INVALID_CHAR_FOUND; + } + else /* if (uniChar == 0xffff) */ + { + *err = U_ILLEGAL_CHAR_FOUND; + } + } + } + /* if target ran out before source, return U_BUFFER_OVERFLOW_ERROR */ + if (U_SUCCESS(*err) && args->sourceLimit > args->source && args->targetLimit <= args->target) + { + *err = U_BUFFER_OVERFLOW_ERROR; + } + else if (U_FAILURE(*err)) + { + /* If character incomplete or unmappable/illegal, store it in toUBytes[] */ + args->converter->toULength = savebytes; + if (savebytes > 0) { + uprv_memcpy(args->converter->toUBytes, errSource, savebytes); + } + if (*err == U_TRUNCATED_CHAR_FOUND) { + *err = U_ZERO_ERROR; + } + } +} + +/* And now, the macroized declarations of data & functions: */ +DEFINE_LMBCS_OPEN(1) +DEFINE_LMBCS_OPEN(2) +DEFINE_LMBCS_OPEN(3) +DEFINE_LMBCS_OPEN(4) +DEFINE_LMBCS_OPEN(5) +DEFINE_LMBCS_OPEN(6) +DEFINE_LMBCS_OPEN(8) +DEFINE_LMBCS_OPEN(11) +DEFINE_LMBCS_OPEN(16) +DEFINE_LMBCS_OPEN(17) +DEFINE_LMBCS_OPEN(18) +DEFINE_LMBCS_OPEN(19) + + +DECLARE_LMBCS_DATA(1) +DECLARE_LMBCS_DATA(2) +DECLARE_LMBCS_DATA(3) +DECLARE_LMBCS_DATA(4) +DECLARE_LMBCS_DATA(5) +DECLARE_LMBCS_DATA(6) +DECLARE_LMBCS_DATA(8) +DECLARE_LMBCS_DATA(11) +DECLARE_LMBCS_DATA(16) +DECLARE_LMBCS_DATA(17) +DECLARE_LMBCS_DATA(18) +DECLARE_LMBCS_DATA(19) + +U_CDECL_END + +#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ diff --git a/thirdparty/icu4c/common/ucnv_set.cpp b/thirdparty/icu4c/common/ucnv_set.cpp new file mode 100644 index 0000000000..926cee0de8 --- /dev/null +++ b/thirdparty/icu4c/common/ucnv_set.cpp @@ -0,0 +1,70 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2003-2007, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: ucnv_set.c +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2004sep07 +* created by: Markus W. Scherer +* +* Conversion API functions using USet (ucnv_getUnicodeSet()) +* moved here from ucnv.c for removing the dependency of other ucnv_ +* implementation functions on the USet implementation. +*/ + +#include "unicode/utypes.h" +#include "unicode/uset.h" +#include "unicode/ucnv.h" +#include "ucnv_bld.h" +#include "uset_imp.h" + +#if !UCONFIG_NO_CONVERSION + +U_CAPI void U_EXPORT2 +ucnv_getUnicodeSet(const UConverter *cnv, + USet *setFillIn, + UConverterUnicodeSet whichSet, + UErrorCode *pErrorCode) { + /* argument checking */ + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return; + } + if(cnv==NULL || setFillIn==NULL || whichSet<UCNV_ROUNDTRIP_SET || UCNV_SET_COUNT<=whichSet) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + /* does this converter support this function? */ + if(cnv->sharedData->impl->getUnicodeSet==NULL) { + *pErrorCode=U_UNSUPPORTED_ERROR; + return; + } + + { + USetAdder sa={ + NULL, + uset_add, + uset_addRange, + uset_addString, + uset_remove, + uset_removeRange + }; + sa.set=setFillIn; + + /* empty the set */ + uset_clear(setFillIn); + + /* call the converter to add the code points it supports */ + cnv->sharedData->impl->getUnicodeSet(cnv, &sa, whichSet, pErrorCode); + } +} + +#endif diff --git a/thirdparty/icu4c/common/ucnv_u16.cpp b/thirdparty/icu4c/common/ucnv_u16.cpp new file mode 100644 index 0000000000..a5e8367400 --- /dev/null +++ b/thirdparty/icu4c/common/ucnv_u16.cpp @@ -0,0 +1,1579 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 2002-2015, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* file name: ucnv_u16.c +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2002jul01 +* created by: Markus W. Scherer +* +* UTF-16 converter implementation. Used to be in ucnv_utf.c. +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_CONVERSION + +#include "unicode/ucnv.h" +#include "unicode/uversion.h" +#include "ucnv_bld.h" +#include "ucnv_cnv.h" +#include "cmemory.h" + +enum { + UCNV_NEED_TO_WRITE_BOM=1 +}; + +U_CDECL_BEGIN +/* + * The UTF-16 toUnicode implementation is also used for the Java-specific + * "with BOM" variants of UTF-16BE and UTF-16LE. + */ +static void U_CALLCONV +_UTF16ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, + UErrorCode *pErrorCode); + +/* UTF-16BE ----------------------------------------------------------------- */ + +#if U_IS_BIG_ENDIAN +# define _UTF16PEFromUnicodeWithOffsets _UTF16BEFromUnicodeWithOffsets +#else +# define _UTF16PEFromUnicodeWithOffsets _UTF16LEFromUnicodeWithOffsets +#endif + + +static void U_CALLCONV +_UTF16BEFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, + UErrorCode *pErrorCode) { + UConverter *cnv; + const UChar *source; + char *target; + int32_t *offsets; + + uint32_t targetCapacity, length, sourceIndex; + UChar c, trail; + char overflow[4]; + + source=pArgs->source; + length=(int32_t)(pArgs->sourceLimit-source); + if(length<=0) { + /* no input, nothing to do */ + return; + } + + cnv=pArgs->converter; + + /* write the BOM if necessary */ + if(cnv->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) { + static const char bom[]={ (char)0xfeu, (char)0xffu }; + ucnv_fromUWriteBytes(cnv, + bom, 2, + &pArgs->target, pArgs->targetLimit, + &pArgs->offsets, -1, + pErrorCode); + cnv->fromUnicodeStatus=0; + } + + target=pArgs->target; + if(target >= pArgs->targetLimit) { + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + return; + } + + targetCapacity=(uint32_t)(pArgs->targetLimit-target); + offsets=pArgs->offsets; + sourceIndex=0; + + /* c!=0 indicates in several places outside the main loops that a surrogate was found */ + + if((c=(UChar)cnv->fromUChar32)!=0 && U16_IS_TRAIL(trail=*source) && targetCapacity>=4) { + /* the last buffer ended with a lead surrogate, output the surrogate pair */ + ++source; + --length; + target[0]=(uint8_t)(c>>8); + target[1]=(uint8_t)c; + target[2]=(uint8_t)(trail>>8); + target[3]=(uint8_t)trail; + target+=4; + targetCapacity-=4; + if(offsets!=NULL) { + *offsets++=-1; + *offsets++=-1; + *offsets++=-1; + *offsets++=-1; + } + sourceIndex=1; + cnv->fromUChar32=c=0; + } + + if(c==0) { + /* copy an even number of bytes for complete UChars */ + uint32_t count=2*length; + if(count>targetCapacity) { + count=targetCapacity&~1; + } + /* count is even */ + targetCapacity-=count; + count>>=1; + length-=count; + + if(offsets==NULL) { + while(count>0) { + c=*source++; + if(U16_IS_SINGLE(c)) { + target[0]=(uint8_t)(c>>8); + target[1]=(uint8_t)c; + target+=2; + } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && U16_IS_TRAIL(trail=*source)) { + ++source; + --count; + target[0]=(uint8_t)(c>>8); + target[1]=(uint8_t)c; + target[2]=(uint8_t)(trail>>8); + target[3]=(uint8_t)trail; + target+=4; + } else { + break; + } + --count; + } + } else { + while(count>0) { + c=*source++; + if(U16_IS_SINGLE(c)) { + target[0]=(uint8_t)(c>>8); + target[1]=(uint8_t)c; + target+=2; + *offsets++=sourceIndex; + *offsets++=sourceIndex++; + } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && U16_IS_TRAIL(trail=*source)) { + ++source; + --count; + target[0]=(uint8_t)(c>>8); + target[1]=(uint8_t)c; + target[2]=(uint8_t)(trail>>8); + target[3]=(uint8_t)trail; + target+=4; + *offsets++=sourceIndex; + *offsets++=sourceIndex; + *offsets++=sourceIndex; + *offsets++=sourceIndex; + sourceIndex+=2; + } else { + break; + } + --count; + } + } + + if(count==0) { + /* done with the loop for complete UChars */ + if(length>0 && targetCapacity>0) { + /* + * there is more input and some target capacity - + * it must be targetCapacity==1 because otherwise + * the above would have copied more; + * prepare for overflow output + */ + if(U16_IS_SINGLE(c=*source++)) { + overflow[0]=(char)(c>>8); + overflow[1]=(char)c; + length=2; /* 2 bytes to output */ + c=0; + /* } else { keep c for surrogate handling, length will be set there */ + } + } else { + length=0; + c=0; + } + } else { + /* keep c for surrogate handling, length will be set there */ + targetCapacity+=2*count; + } + } else { + length=0; /* from here on, length counts the bytes in overflow[] */ + } + + if(c!=0) { + /* + * c is a surrogate, and + * - source or target too short + * - or the surrogate is unmatched + */ + length=0; + if(U16_IS_SURROGATE_LEAD(c)) { + if(source<pArgs->sourceLimit) { + if(U16_IS_TRAIL(trail=*source)) { + /* output the surrogate pair, will overflow (see conditions comment above) */ + ++source; + overflow[0]=(char)(c>>8); + overflow[1]=(char)c; + overflow[2]=(char)(trail>>8); + overflow[3]=(char)trail; + length=4; /* 4 bytes to output */ + c=0; + } else { + /* unmatched lead surrogate */ + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + } + } else { + /* see if the trail surrogate is in the next buffer */ + } + } else { + /* unmatched trail surrogate */ + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + } + cnv->fromUChar32=c; + } + + if(length>0) { + /* output length bytes with overflow (length>targetCapacity>0) */ + ucnv_fromUWriteBytes(cnv, + overflow, length, + (char **)&target, pArgs->targetLimit, + &offsets, sourceIndex, + pErrorCode); + targetCapacity=(uint32_t)(pArgs->targetLimit-(char *)target); + } + + if(U_SUCCESS(*pErrorCode) && source<pArgs->sourceLimit && targetCapacity==0) { + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } + + /* write back the updated pointers */ + pArgs->source=source; + pArgs->target=(char *)target; + pArgs->offsets=offsets; +} + +static void U_CALLCONV +_UTF16BEToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, + UErrorCode *pErrorCode) { + UConverter *cnv; + const uint8_t *source; + UChar *target; + int32_t *offsets; + + uint32_t targetCapacity, length, count, sourceIndex; + UChar c, trail; + + if(pArgs->converter->mode<8) { + _UTF16ToUnicodeWithOffsets(pArgs, pErrorCode); + return; + } + + cnv=pArgs->converter; + source=(const uint8_t *)pArgs->source; + length=(int32_t)((const uint8_t *)pArgs->sourceLimit-source); + if(length<=0 && cnv->toUnicodeStatus==0) { + /* no input, nothing to do */ + return; + } + + target=pArgs->target; + if(target >= pArgs->targetLimit) { + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + return; + } + + targetCapacity=(uint32_t)(pArgs->targetLimit-target); + offsets=pArgs->offsets; + sourceIndex=0; + c=0; + + /* complete a partial UChar or pair from the last call */ + if(cnv->toUnicodeStatus!=0) { + /* + * special case: single byte from a previous buffer, + * where the byte turned out not to belong to a trail surrogate + * and the preceding, unmatched lead surrogate was put into toUBytes[] + * for error handling + */ + cnv->toUBytes[0]=(uint8_t)cnv->toUnicodeStatus; + cnv->toULength=1; + cnv->toUnicodeStatus=0; + } + if((count=cnv->toULength)!=0) { + uint8_t *p=cnv->toUBytes; + do { + p[count++]=*source++; + ++sourceIndex; + --length; + if(count==2) { + c=((UChar)p[0]<<8)|p[1]; + if(U16_IS_SINGLE(c)) { + /* output the BMP code point */ + *target++=c; + if(offsets!=NULL) { + *offsets++=-1; + } + --targetCapacity; + count=0; + c=0; + break; + } else if(U16_IS_SURROGATE_LEAD(c)) { + /* continue collecting bytes for the trail surrogate */ + c=0; /* avoid unnecessary surrogate handling below */ + } else { + /* fall through to error handling for an unmatched trail surrogate */ + break; + } + } else if(count==4) { + c=((UChar)p[0]<<8)|p[1]; + trail=((UChar)p[2]<<8)|p[3]; + if(U16_IS_TRAIL(trail)) { + /* output the surrogate pair */ + *target++=c; + if(targetCapacity>=2) { + *target++=trail; + if(offsets!=NULL) { + *offsets++=-1; + *offsets++=-1; + } + targetCapacity-=2; + } else /* targetCapacity==1 */ { + targetCapacity=0; + cnv->UCharErrorBuffer[0]=trail; + cnv->UCharErrorBufferLength=1; + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } + count=0; + c=0; + break; + } else { + /* unmatched lead surrogate, handle here for consistent toUBytes[] */ + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + + /* back out reading the code unit after it */ + if(((const uint8_t *)pArgs->source-source)>=2) { + source-=2; + } else { + /* + * if the trail unit's first byte was in a previous buffer, then + * we need to put it into a special place because toUBytes[] will be + * used for the lead unit's bytes + */ + cnv->toUnicodeStatus=0x100|p[2]; + --source; + } + cnv->toULength=2; + + /* write back the updated pointers */ + pArgs->source=(const char *)source; + pArgs->target=target; + pArgs->offsets=offsets; + return; + } + } + } while(length>0); + cnv->toULength=(int8_t)count; + } + + /* copy an even number of bytes for complete UChars */ + count=2*targetCapacity; + if(count>length) { + count=length&~1; + } + if(c==0 && count>0) { + length-=count; + count>>=1; + targetCapacity-=count; + if(offsets==NULL) { + do { + c=((UChar)source[0]<<8)|source[1]; + source+=2; + if(U16_IS_SINGLE(c)) { + *target++=c; + } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && + U16_IS_TRAIL(trail=((UChar)source[0]<<8)|source[1]) + ) { + source+=2; + --count; + *target++=c; + *target++=trail; + } else { + break; + } + } while(--count>0); + } else { + do { + c=((UChar)source[0]<<8)|source[1]; + source+=2; + if(U16_IS_SINGLE(c)) { + *target++=c; + *offsets++=sourceIndex; + sourceIndex+=2; + } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && + U16_IS_TRAIL(trail=((UChar)source[0]<<8)|source[1]) + ) { + source+=2; + --count; + *target++=c; + *target++=trail; + *offsets++=sourceIndex; + *offsets++=sourceIndex; + sourceIndex+=4; + } else { + break; + } + } while(--count>0); + } + + if(count==0) { + /* done with the loop for complete UChars */ + c=0; + } else { + /* keep c for surrogate handling, trail will be set there */ + length+=2*(count-1); /* one more byte pair was consumed than count decremented */ + targetCapacity+=count; + } + } + + if(c!=0) { + /* + * c is a surrogate, and + * - source or target too short + * - or the surrogate is unmatched + */ + cnv->toUBytes[0]=(uint8_t)(c>>8); + cnv->toUBytes[1]=(uint8_t)c; + cnv->toULength=2; + + if(U16_IS_SURROGATE_LEAD(c)) { + if(length>=2) { + if(U16_IS_TRAIL(trail=((UChar)source[0]<<8)|source[1])) { + /* output the surrogate pair, will overflow (see conditions comment above) */ + source+=2; + length-=2; + *target++=c; + if(offsets!=NULL) { + *offsets++=sourceIndex; + } + cnv->UCharErrorBuffer[0]=trail; + cnv->UCharErrorBufferLength=1; + cnv->toULength=0; + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } else { + /* unmatched lead surrogate */ + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + } + } else { + /* see if the trail surrogate is in the next buffer */ + } + } else { + /* unmatched trail surrogate */ + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + } + } + + if(U_SUCCESS(*pErrorCode)) { + /* check for a remaining source byte */ + if(length>0) { + if(targetCapacity==0) { + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } else { + /* it must be length==1 because otherwise the above would have copied more */ + cnv->toUBytes[cnv->toULength++]=*source++; + } + } + } + + /* write back the updated pointers */ + pArgs->source=(const char *)source; + pArgs->target=target; + pArgs->offsets=offsets; +} + +static UChar32 U_CALLCONV +_UTF16BEGetNextUChar(UConverterToUnicodeArgs *pArgs, UErrorCode *err) { + const uint8_t *s, *sourceLimit; + UChar32 c; + + if(pArgs->converter->mode<8) { + return UCNV_GET_NEXT_UCHAR_USE_TO_U; + } + + s=(const uint8_t *)pArgs->source; + sourceLimit=(const uint8_t *)pArgs->sourceLimit; + + if(s>=sourceLimit) { + /* no input */ + *err=U_INDEX_OUTOFBOUNDS_ERROR; + return 0xffff; + } + + if(s+2>sourceLimit) { + /* only one byte: truncated UChar */ + pArgs->converter->toUBytes[0]=*s++; + pArgs->converter->toULength=1; + pArgs->source=(const char *)s; + *err = U_TRUNCATED_CHAR_FOUND; + return 0xffff; + } + + /* get one UChar */ + c=((UChar32)*s<<8)|s[1]; + s+=2; + + /* check for a surrogate pair */ + if(U_IS_SURROGATE(c)) { + if(U16_IS_SURROGATE_LEAD(c)) { + if(s+2<=sourceLimit) { + UChar trail; + + /* get a second UChar and see if it is a trail surrogate */ + trail=((UChar)*s<<8)|s[1]; + if(U16_IS_TRAIL(trail)) { + c=U16_GET_SUPPLEMENTARY(c, trail); + s+=2; + } else { + /* unmatched lead surrogate */ + c=-2; + } + } else { + /* too few (2 or 3) bytes for a surrogate pair: truncated code point */ + uint8_t *bytes=pArgs->converter->toUBytes; + s-=2; + pArgs->converter->toULength=(int8_t)(sourceLimit-s); + do { + *bytes++=*s++; + } while(s<sourceLimit); + + c=0xffff; + *err=U_TRUNCATED_CHAR_FOUND; + } + } else { + /* unmatched trail surrogate */ + c=-2; + } + + if(c<0) { + /* write the unmatched surrogate */ + uint8_t *bytes=pArgs->converter->toUBytes; + pArgs->converter->toULength=2; + *bytes=*(s-2); + bytes[1]=*(s-1); + + c=0xffff; + *err=U_ILLEGAL_CHAR_FOUND; + } + } + + pArgs->source=(const char *)s; + return c; +} + +static void U_CALLCONV +_UTF16BEReset(UConverter *cnv, UConverterResetChoice choice) { + if(choice<=UCNV_RESET_TO_UNICODE) { + /* reset toUnicode state */ + if(UCNV_GET_VERSION(cnv)==0) { + cnv->mode=8; /* no BOM handling */ + } else { + cnv->mode=0; /* Java-specific "UnicodeBig" requires BE BOM or no BOM */ + } + } + if(choice!=UCNV_RESET_TO_UNICODE && UCNV_GET_VERSION(cnv)==1) { + /* reset fromUnicode for "UnicodeBig": prepare to output the UTF-16BE BOM */ + cnv->fromUnicodeStatus=UCNV_NEED_TO_WRITE_BOM; + } +} + +static void U_CALLCONV +_UTF16BEOpen(UConverter *cnv, + UConverterLoadArgs *pArgs, + UErrorCode *pErrorCode) { + (void)pArgs; + if(UCNV_GET_VERSION(cnv)<=1) { + _UTF16BEReset(cnv, UCNV_RESET_BOTH); + } else { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + } +} + +static const char * U_CALLCONV +_UTF16BEGetName(const UConverter *cnv) { + if(UCNV_GET_VERSION(cnv)==0) { + return "UTF-16BE"; + } else { + return "UTF-16BE,version=1"; + } +} +U_CDECL_END + +static const UConverterImpl _UTF16BEImpl={ + UCNV_UTF16_BigEndian, + + NULL, + NULL, + + _UTF16BEOpen, + NULL, + _UTF16BEReset, + + _UTF16BEToUnicodeWithOffsets, + _UTF16BEToUnicodeWithOffsets, + _UTF16BEFromUnicodeWithOffsets, + _UTF16BEFromUnicodeWithOffsets, + _UTF16BEGetNextUChar, + + NULL, + _UTF16BEGetName, + NULL, + NULL, + ucnv_getNonSurrogateUnicodeSet, + + NULL, + NULL +}; + +static const UConverterStaticData _UTF16BEStaticData={ + sizeof(UConverterStaticData), + "UTF-16BE", + 1200, UCNV_IBM, UCNV_UTF16_BigEndian, 2, 2, + { 0xff, 0xfd, 0, 0 },2,FALSE,FALSE, + 0, + 0, + { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ +}; + + +const UConverterSharedData _UTF16BEData= + UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF16BEStaticData, &_UTF16BEImpl); + +/* UTF-16LE ----------------------------------------------------------------- */ +U_CDECL_BEGIN +static void U_CALLCONV +_UTF16LEFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, + UErrorCode *pErrorCode) { + UConverter *cnv; + const UChar *source; + char *target; + int32_t *offsets; + + uint32_t targetCapacity, length, sourceIndex; + UChar c, trail; + char overflow[4]; + + source=pArgs->source; + length=(int32_t)(pArgs->sourceLimit-source); + if(length<=0) { + /* no input, nothing to do */ + return; + } + + cnv=pArgs->converter; + + /* write the BOM if necessary */ + if(cnv->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) { + static const char bom[]={ (char)0xffu, (char)0xfeu }; + ucnv_fromUWriteBytes(cnv, + bom, 2, + &pArgs->target, pArgs->targetLimit, + &pArgs->offsets, -1, + pErrorCode); + cnv->fromUnicodeStatus=0; + } + + target=pArgs->target; + if(target >= pArgs->targetLimit) { + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + return; + } + + targetCapacity=(uint32_t)(pArgs->targetLimit-pArgs->target); + offsets=pArgs->offsets; + sourceIndex=0; + + /* c!=0 indicates in several places outside the main loops that a surrogate was found */ + + if((c=(UChar)cnv->fromUChar32)!=0 && U16_IS_TRAIL(trail=*source) && targetCapacity>=4) { + /* the last buffer ended with a lead surrogate, output the surrogate pair */ + ++source; + --length; + target[0]=(uint8_t)c; + target[1]=(uint8_t)(c>>8); + target[2]=(uint8_t)trail; + target[3]=(uint8_t)(trail>>8); + target+=4; + targetCapacity-=4; + if(offsets!=NULL) { + *offsets++=-1; + *offsets++=-1; + *offsets++=-1; + *offsets++=-1; + } + sourceIndex=1; + cnv->fromUChar32=c=0; + } + + if(c==0) { + /* copy an even number of bytes for complete UChars */ + uint32_t count=2*length; + if(count>targetCapacity) { + count=targetCapacity&~1; + } + /* count is even */ + targetCapacity-=count; + count>>=1; + length-=count; + + if(offsets==NULL) { + while(count>0) { + c=*source++; + if(U16_IS_SINGLE(c)) { + target[0]=(uint8_t)c; + target[1]=(uint8_t)(c>>8); + target+=2; + } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && U16_IS_TRAIL(trail=*source)) { + ++source; + --count; + target[0]=(uint8_t)c; + target[1]=(uint8_t)(c>>8); + target[2]=(uint8_t)trail; + target[3]=(uint8_t)(trail>>8); + target+=4; + } else { + break; + } + --count; + } + } else { + while(count>0) { + c=*source++; + if(U16_IS_SINGLE(c)) { + target[0]=(uint8_t)c; + target[1]=(uint8_t)(c>>8); + target+=2; + *offsets++=sourceIndex; + *offsets++=sourceIndex++; + } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && U16_IS_TRAIL(trail=*source)) { + ++source; + --count; + target[0]=(uint8_t)c; + target[1]=(uint8_t)(c>>8); + target[2]=(uint8_t)trail; + target[3]=(uint8_t)(trail>>8); + target+=4; + *offsets++=sourceIndex; + *offsets++=sourceIndex; + *offsets++=sourceIndex; + *offsets++=sourceIndex; + sourceIndex+=2; + } else { + break; + } + --count; + } + } + + if(count==0) { + /* done with the loop for complete UChars */ + if(length>0 && targetCapacity>0) { + /* + * there is more input and some target capacity - + * it must be targetCapacity==1 because otherwise + * the above would have copied more; + * prepare for overflow output + */ + if(U16_IS_SINGLE(c=*source++)) { + overflow[0]=(char)c; + overflow[1]=(char)(c>>8); + length=2; /* 2 bytes to output */ + c=0; + /* } else { keep c for surrogate handling, length will be set there */ + } + } else { + length=0; + c=0; + } + } else { + /* keep c for surrogate handling, length will be set there */ + targetCapacity+=2*count; + } + } else { + length=0; /* from here on, length counts the bytes in overflow[] */ + } + + if(c!=0) { + /* + * c is a surrogate, and + * - source or target too short + * - or the surrogate is unmatched + */ + length=0; + if(U16_IS_SURROGATE_LEAD(c)) { + if(source<pArgs->sourceLimit) { + if(U16_IS_TRAIL(trail=*source)) { + /* output the surrogate pair, will overflow (see conditions comment above) */ + ++source; + overflow[0]=(char)c; + overflow[1]=(char)(c>>8); + overflow[2]=(char)trail; + overflow[3]=(char)(trail>>8); + length=4; /* 4 bytes to output */ + c=0; + } else { + /* unmatched lead surrogate */ + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + } + } else { + /* see if the trail surrogate is in the next buffer */ + } + } else { + /* unmatched trail surrogate */ + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + } + cnv->fromUChar32=c; + } + + if(length>0) { + /* output length bytes with overflow (length>targetCapacity>0) */ + ucnv_fromUWriteBytes(cnv, + overflow, length, + &target, pArgs->targetLimit, + &offsets, sourceIndex, + pErrorCode); + targetCapacity=(uint32_t)(pArgs->targetLimit-(char *)target); + } + + if(U_SUCCESS(*pErrorCode) && source<pArgs->sourceLimit && targetCapacity==0) { + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } + + /* write back the updated pointers */ + pArgs->source=source; + pArgs->target=target; + pArgs->offsets=offsets; +} + +static void U_CALLCONV +_UTF16LEToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, + UErrorCode *pErrorCode) { + UConverter *cnv; + const uint8_t *source; + UChar *target; + int32_t *offsets; + + uint32_t targetCapacity, length, count, sourceIndex; + UChar c, trail; + + if(pArgs->converter->mode<8) { + _UTF16ToUnicodeWithOffsets(pArgs, pErrorCode); + return; + } + + cnv=pArgs->converter; + source=(const uint8_t *)pArgs->source; + length=(int32_t)((const uint8_t *)pArgs->sourceLimit-source); + if(length<=0 && cnv->toUnicodeStatus==0) { + /* no input, nothing to do */ + return; + } + + target=pArgs->target; + if(target >= pArgs->targetLimit) { + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + return; + } + + targetCapacity=(uint32_t)(pArgs->targetLimit-pArgs->target); + offsets=pArgs->offsets; + sourceIndex=0; + c=0; + + /* complete a partial UChar or pair from the last call */ + if(cnv->toUnicodeStatus!=0) { + /* + * special case: single byte from a previous buffer, + * where the byte turned out not to belong to a trail surrogate + * and the preceding, unmatched lead surrogate was put into toUBytes[] + * for error handling + */ + cnv->toUBytes[0]=(uint8_t)cnv->toUnicodeStatus; + cnv->toULength=1; + cnv->toUnicodeStatus=0; + } + if((count=cnv->toULength)!=0) { + uint8_t *p=cnv->toUBytes; + do { + p[count++]=*source++; + ++sourceIndex; + --length; + if(count==2) { + c=((UChar)p[1]<<8)|p[0]; + if(U16_IS_SINGLE(c)) { + /* output the BMP code point */ + *target++=c; + if(offsets!=NULL) { + *offsets++=-1; + } + --targetCapacity; + count=0; + c=0; + break; + } else if(U16_IS_SURROGATE_LEAD(c)) { + /* continue collecting bytes for the trail surrogate */ + c=0; /* avoid unnecessary surrogate handling below */ + } else { + /* fall through to error handling for an unmatched trail surrogate */ + break; + } + } else if(count==4) { + c=((UChar)p[1]<<8)|p[0]; + trail=((UChar)p[3]<<8)|p[2]; + if(U16_IS_TRAIL(trail)) { + /* output the surrogate pair */ + *target++=c; + if(targetCapacity>=2) { + *target++=trail; + if(offsets!=NULL) { + *offsets++=-1; + *offsets++=-1; + } + targetCapacity-=2; + } else /* targetCapacity==1 */ { + targetCapacity=0; + cnv->UCharErrorBuffer[0]=trail; + cnv->UCharErrorBufferLength=1; + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } + count=0; + c=0; + break; + } else { + /* unmatched lead surrogate, handle here for consistent toUBytes[] */ + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + + /* back out reading the code unit after it */ + if(((const uint8_t *)pArgs->source-source)>=2) { + source-=2; + } else { + /* + * if the trail unit's first byte was in a previous buffer, then + * we need to put it into a special place because toUBytes[] will be + * used for the lead unit's bytes + */ + cnv->toUnicodeStatus=0x100|p[2]; + --source; + } + cnv->toULength=2; + + /* write back the updated pointers */ + pArgs->source=(const char *)source; + pArgs->target=target; + pArgs->offsets=offsets; + return; + } + } + } while(length>0); + cnv->toULength=(int8_t)count; + } + + /* copy an even number of bytes for complete UChars */ + count=2*targetCapacity; + if(count>length) { + count=length&~1; + } + if(c==0 && count>0) { + length-=count; + count>>=1; + targetCapacity-=count; + if(offsets==NULL) { + do { + c=((UChar)source[1]<<8)|source[0]; + source+=2; + if(U16_IS_SINGLE(c)) { + *target++=c; + } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && + U16_IS_TRAIL(trail=((UChar)source[1]<<8)|source[0]) + ) { + source+=2; + --count; + *target++=c; + *target++=trail; + } else { + break; + } + } while(--count>0); + } else { + do { + c=((UChar)source[1]<<8)|source[0]; + source+=2; + if(U16_IS_SINGLE(c)) { + *target++=c; + *offsets++=sourceIndex; + sourceIndex+=2; + } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && + U16_IS_TRAIL(trail=((UChar)source[1]<<8)|source[0]) + ) { + source+=2; + --count; + *target++=c; + *target++=trail; + *offsets++=sourceIndex; + *offsets++=sourceIndex; + sourceIndex+=4; + } else { + break; + } + } while(--count>0); + } + + if(count==0) { + /* done with the loop for complete UChars */ + c=0; + } else { + /* keep c for surrogate handling, trail will be set there */ + length+=2*(count-1); /* one more byte pair was consumed than count decremented */ + targetCapacity+=count; + } + } + + if(c!=0) { + /* + * c is a surrogate, and + * - source or target too short + * - or the surrogate is unmatched + */ + cnv->toUBytes[0]=(uint8_t)c; + cnv->toUBytes[1]=(uint8_t)(c>>8); + cnv->toULength=2; + + if(U16_IS_SURROGATE_LEAD(c)) { + if(length>=2) { + if(U16_IS_TRAIL(trail=((UChar)source[1]<<8)|source[0])) { + /* output the surrogate pair, will overflow (see conditions comment above) */ + source+=2; + length-=2; + *target++=c; + if(offsets!=NULL) { + *offsets++=sourceIndex; + } + cnv->UCharErrorBuffer[0]=trail; + cnv->UCharErrorBufferLength=1; + cnv->toULength=0; + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } else { + /* unmatched lead surrogate */ + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + } + } else { + /* see if the trail surrogate is in the next buffer */ + } + } else { + /* unmatched trail surrogate */ + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + } + } + + if(U_SUCCESS(*pErrorCode)) { + /* check for a remaining source byte */ + if(length>0) { + if(targetCapacity==0) { + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } else { + /* it must be length==1 because otherwise the above would have copied more */ + cnv->toUBytes[cnv->toULength++]=*source++; + } + } + } + + /* write back the updated pointers */ + pArgs->source=(const char *)source; + pArgs->target=target; + pArgs->offsets=offsets; +} + +static UChar32 U_CALLCONV +_UTF16LEGetNextUChar(UConverterToUnicodeArgs *pArgs, UErrorCode *err) { + const uint8_t *s, *sourceLimit; + UChar32 c; + + if(pArgs->converter->mode<8) { + return UCNV_GET_NEXT_UCHAR_USE_TO_U; + } + + s=(const uint8_t *)pArgs->source; + sourceLimit=(const uint8_t *)pArgs->sourceLimit; + + if(s>=sourceLimit) { + /* no input */ + *err=U_INDEX_OUTOFBOUNDS_ERROR; + return 0xffff; + } + + if(s+2>sourceLimit) { + /* only one byte: truncated UChar */ + pArgs->converter->toUBytes[0]=*s++; + pArgs->converter->toULength=1; + pArgs->source=(const char *)s; + *err = U_TRUNCATED_CHAR_FOUND; + return 0xffff; + } + + /* get one UChar */ + c=((UChar32)s[1]<<8)|*s; + s+=2; + + /* check for a surrogate pair */ + if(U_IS_SURROGATE(c)) { + if(U16_IS_SURROGATE_LEAD(c)) { + if(s+2<=sourceLimit) { + UChar trail; + + /* get a second UChar and see if it is a trail surrogate */ + trail=((UChar)s[1]<<8)|*s; + if(U16_IS_TRAIL(trail)) { + c=U16_GET_SUPPLEMENTARY(c, trail); + s+=2; + } else { + /* unmatched lead surrogate */ + c=-2; + } + } else { + /* too few (2 or 3) bytes for a surrogate pair: truncated code point */ + uint8_t *bytes=pArgs->converter->toUBytes; + s-=2; + pArgs->converter->toULength=(int8_t)(sourceLimit-s); + do { + *bytes++=*s++; + } while(s<sourceLimit); + + c=0xffff; + *err=U_TRUNCATED_CHAR_FOUND; + } + } else { + /* unmatched trail surrogate */ + c=-2; + } + + if(c<0) { + /* write the unmatched surrogate */ + uint8_t *bytes=pArgs->converter->toUBytes; + pArgs->converter->toULength=2; + *bytes=*(s-2); + bytes[1]=*(s-1); + + c=0xffff; + *err=U_ILLEGAL_CHAR_FOUND; + } + } + + pArgs->source=(const char *)s; + return c; +} + +static void U_CALLCONV +_UTF16LEReset(UConverter *cnv, UConverterResetChoice choice) { + if(choice<=UCNV_RESET_TO_UNICODE) { + /* reset toUnicode state */ + if(UCNV_GET_VERSION(cnv)==0) { + cnv->mode=8; /* no BOM handling */ + } else { + cnv->mode=0; /* Java-specific "UnicodeLittle" requires LE BOM or no BOM */ + } + } + if(choice!=UCNV_RESET_TO_UNICODE && UCNV_GET_VERSION(cnv)==1) { + /* reset fromUnicode for "UnicodeLittle": prepare to output the UTF-16LE BOM */ + cnv->fromUnicodeStatus=UCNV_NEED_TO_WRITE_BOM; + } +} + +static void U_CALLCONV +_UTF16LEOpen(UConverter *cnv, + UConverterLoadArgs *pArgs, + UErrorCode *pErrorCode) { + (void)pArgs; + if(UCNV_GET_VERSION(cnv)<=1) { + _UTF16LEReset(cnv, UCNV_RESET_BOTH); + } else { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + } +} + +static const char * U_CALLCONV +_UTF16LEGetName(const UConverter *cnv) { + if(UCNV_GET_VERSION(cnv)==0) { + return "UTF-16LE"; + } else { + return "UTF-16LE,version=1"; + } +} +U_CDECL_END + +static const UConverterImpl _UTF16LEImpl={ + UCNV_UTF16_LittleEndian, + + NULL, + NULL, + + _UTF16LEOpen, + NULL, + _UTF16LEReset, + + _UTF16LEToUnicodeWithOffsets, + _UTF16LEToUnicodeWithOffsets, + _UTF16LEFromUnicodeWithOffsets, + _UTF16LEFromUnicodeWithOffsets, + _UTF16LEGetNextUChar, + + NULL, + _UTF16LEGetName, + NULL, + NULL, + ucnv_getNonSurrogateUnicodeSet, + + NULL, + NULL +}; + + +static const UConverterStaticData _UTF16LEStaticData={ + sizeof(UConverterStaticData), + "UTF-16LE", + 1202, UCNV_IBM, UCNV_UTF16_LittleEndian, 2, 2, + { 0xfd, 0xff, 0, 0 },2,FALSE,FALSE, + 0, + 0, + { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ +}; + + +const UConverterSharedData _UTF16LEData= + UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF16LEStaticData, &_UTF16LEImpl); + +/* UTF-16 (Detect BOM) ------------------------------------------------------ */ + +/* + * Detect a BOM at the beginning of the stream and select UTF-16BE or UTF-16LE + * accordingly. + * This is a simpler version of the UTF-32 converter, with + * fewer states for shorter BOMs. + * + * State values: + * 0 initial state + * 1 saw first byte + * 2..5 - + * 6..7 see _UTF16ToUnicodeWithOffsets() comments in state 1 + * 8 UTF-16BE mode + * 9 UTF-16LE mode + * + * During detection: state==number of initial bytes seen so far. + * + * On output, emit U+FEFF as the first code point. + * + * Variants: + * - UTF-16,version=1 (Java "Unicode" encoding) treats a missing BOM as an error. + * - UTF-16BE,version=1 (Java "UnicodeBig" encoding) and + * UTF-16LE,version=1 (Java "UnicodeLittle" encoding) treat a reverse BOM as an error. + */ +U_CDECL_BEGIN +static void U_CALLCONV +_UTF16Reset(UConverter *cnv, UConverterResetChoice choice) { + if(choice<=UCNV_RESET_TO_UNICODE) { + /* reset toUnicode: state=0 */ + cnv->mode=0; + } + if(choice!=UCNV_RESET_TO_UNICODE) { + /* reset fromUnicode: prepare to output the UTF-16PE BOM */ + cnv->fromUnicodeStatus=UCNV_NEED_TO_WRITE_BOM; + } +} +U_CDECL_END +extern const UConverterSharedData _UTF16v2Data; +U_CDECL_BEGIN +static void U_CALLCONV +_UTF16Open(UConverter *cnv, + UConverterLoadArgs *pArgs, + UErrorCode *pErrorCode) { + if(UCNV_GET_VERSION(cnv)<=2) { + if(UCNV_GET_VERSION(cnv)==2 && !pArgs->onlyTestIsLoadable) { + /* + * Switch implementation, and switch the staticData that's different + * and was copied into the UConverter. + * (See ucnv_createConverterFromSharedData() in ucnv_bld.c.) + * UTF-16,version=2 fromUnicode() always writes a big-endian byte stream. + */ + cnv->sharedData=(UConverterSharedData*)&_UTF16v2Data; + uprv_memcpy(cnv->subChars, _UTF16v2Data.staticData->subChar, UCNV_MAX_SUBCHAR_LEN); + } + _UTF16Reset(cnv, UCNV_RESET_BOTH); + } else { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + } +} + +static const char * U_CALLCONV +_UTF16GetName(const UConverter *cnv) { + if(UCNV_GET_VERSION(cnv)==0) { + return "UTF-16"; + } else if(UCNV_GET_VERSION(cnv)==1) { + return "UTF-16,version=1"; + } else { + return "UTF-16,version=2"; + } +} +U_CDECL_END +extern const UConverterSharedData _UTF16Data; + +static inline bool IS_UTF16BE(const UConverter *cnv) { + return ((cnv)->sharedData == &_UTF16BEData); +} + +static inline bool IS_UTF16LE(const UConverter *cnv) { + return ((cnv)->sharedData == &_UTF16LEData); +} + +static inline bool IS_UTF16(const UConverter *cnv) { + return ((cnv)->sharedData==&_UTF16Data) || ((cnv)->sharedData == &_UTF16v2Data); +} + +U_CDECL_BEGIN +static void U_CALLCONV +_UTF16ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, + UErrorCode *pErrorCode) { + UConverter *cnv=pArgs->converter; + const char *source=pArgs->source; + const char *sourceLimit=pArgs->sourceLimit; + int32_t *offsets=pArgs->offsets; + + int32_t state, offsetDelta; + uint8_t b; + + state=cnv->mode; + + /* + * If we detect a BOM in this buffer, then we must add the BOM size to the + * offsets because the actual converter function will not see and count the BOM. + * offsetDelta will have the number of the BOM bytes that are in the current buffer. + */ + offsetDelta=0; + + while(source<sourceLimit && U_SUCCESS(*pErrorCode)) { + switch(state) { + case 0: + cnv->toUBytes[0]=(uint8_t)*source++; + cnv->toULength=1; + state=1; + break; + case 1: + /* + * Only inside this switch case can the state variable + * temporarily take two additional values: + * 6: BOM error, continue with BE + * 7: BOM error, continue with LE + */ + b=*source; + if(cnv->toUBytes[0]==0xfe && b==0xff) { + if(IS_UTF16LE(cnv)) { + state=7; /* illegal reverse BOM for Java "UnicodeLittle" */ + } else { + state=8; /* detect UTF-16BE */ + } + } else if(cnv->toUBytes[0]==0xff && b==0xfe) { + if(IS_UTF16BE(cnv)) { + state=6; /* illegal reverse BOM for Java "UnicodeBig" */ + } else { + state=9; /* detect UTF-16LE */ + } + } else if((IS_UTF16(cnv) && UCNV_GET_VERSION(cnv)==1)) { + state=6; /* illegal missing BOM for Java "Unicode" */ + } + if(state>=8) { + /* BOM detected, consume it */ + ++source; + cnv->toULength=0; + offsetDelta=(int32_t)(source-pArgs->source); + } else if(state<6) { + /* ok: no BOM, and not a reverse BOM */ + if(source!=pArgs->source) { + /* reset the source for a correct first offset */ + source=pArgs->source; + cnv->toULength=0; + } + if(IS_UTF16LE(cnv)) { + /* Make Java "UnicodeLittle" default to LE. */ + state=9; + } else { + /* Make standard UTF-16 and Java "UnicodeBig" default to BE. */ + state=8; + } + } else { + /* + * error: missing BOM, or reverse BOM + * UTF-16,version=1: Java-specific "Unicode" requires a BOM. + * UTF-16BE,version=1: Java-specific "UnicodeBig" requires a BE BOM or no BOM. + * UTF-16LE,version=1: Java-specific "UnicodeLittle" requires an LE BOM or no BOM. + */ + /* report the non-BOM or reverse BOM as an illegal sequence */ + cnv->toUBytes[1]=b; + cnv->toULength=2; + pArgs->source=source+1; + /* continue with conversion if the callback resets the error */ + /* + * Make Java "Unicode" default to BE like standard UTF-16. + * Make Java "UnicodeBig" and "UnicodeLittle" default + * to their normal endiannesses. + */ + cnv->mode=state+2; + *pErrorCode=U_ILLEGAL_ESCAPE_SEQUENCE; + return; + } + /* convert the rest of the stream */ + cnv->mode=state; + continue; + case 8: + /* call UTF-16BE */ + pArgs->source=source; + _UTF16BEToUnicodeWithOffsets(pArgs, pErrorCode); + source=pArgs->source; + break; + case 9: + /* call UTF-16LE */ + pArgs->source=source; + _UTF16LEToUnicodeWithOffsets(pArgs, pErrorCode); + source=pArgs->source; + break; + default: + break; /* does not occur */ + } + } + + /* add BOM size to offsets - see comment at offsetDelta declaration */ + if(offsets!=NULL && offsetDelta!=0) { + int32_t *offsetsLimit=pArgs->offsets; + while(offsets<offsetsLimit) { + *offsets++ += offsetDelta; + } + } + + pArgs->source=source; + + if(source==sourceLimit && pArgs->flush) { + /* handle truncated input */ + switch(state) { + case 0: + break; /* no input at all, nothing to do */ + case 8: + _UTF16BEToUnicodeWithOffsets(pArgs, pErrorCode); + break; + case 9: + _UTF16LEToUnicodeWithOffsets(pArgs, pErrorCode); + break; + default: + /* 0<state<8: framework will report truncation, nothing to do here */ + break; + } + } + + cnv->mode=state; +} + +static UChar32 U_CALLCONV +_UTF16GetNextUChar(UConverterToUnicodeArgs *pArgs, + UErrorCode *pErrorCode) { + switch(pArgs->converter->mode) { + case 8: + return _UTF16BEGetNextUChar(pArgs, pErrorCode); + case 9: + return _UTF16LEGetNextUChar(pArgs, pErrorCode); + default: + return UCNV_GET_NEXT_UCHAR_USE_TO_U; + } +} +U_CDECL_END + +static const UConverterImpl _UTF16Impl = { + UCNV_UTF16, + + NULL, + NULL, + + _UTF16Open, + NULL, + _UTF16Reset, + + _UTF16ToUnicodeWithOffsets, + _UTF16ToUnicodeWithOffsets, + _UTF16PEFromUnicodeWithOffsets, + _UTF16PEFromUnicodeWithOffsets, + _UTF16GetNextUChar, + + NULL, /* ### TODO implement getStarters for all Unicode encodings?! */ + _UTF16GetName, + NULL, + NULL, + ucnv_getNonSurrogateUnicodeSet, + + NULL, + NULL +}; + +static const UConverterStaticData _UTF16StaticData = { + sizeof(UConverterStaticData), + "UTF-16", + 1204, /* CCSID for BOM sensitive UTF-16 */ + UCNV_IBM, UCNV_UTF16, 2, 2, +#if U_IS_BIG_ENDIAN + { 0xff, 0xfd, 0, 0 }, 2, +#else + { 0xfd, 0xff, 0, 0 }, 2, +#endif + FALSE, FALSE, + 0, + 0, + { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ +}; + +const UConverterSharedData _UTF16Data = + UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF16StaticData, &_UTF16Impl); + +static const UConverterImpl _UTF16v2Impl = { + UCNV_UTF16, + + NULL, + NULL, + + _UTF16Open, + NULL, + _UTF16Reset, + + _UTF16ToUnicodeWithOffsets, + _UTF16ToUnicodeWithOffsets, + _UTF16BEFromUnicodeWithOffsets, + _UTF16BEFromUnicodeWithOffsets, + _UTF16GetNextUChar, + + NULL, /* ### TODO implement getStarters for all Unicode encodings?! */ + _UTF16GetName, + NULL, + NULL, + ucnv_getNonSurrogateUnicodeSet, + + NULL, + NULL +}; + +static const UConverterStaticData _UTF16v2StaticData = { + sizeof(UConverterStaticData), + "UTF-16,version=2", + 1204, /* CCSID for BOM sensitive UTF-16 */ + UCNV_IBM, UCNV_UTF16, 2, 2, + { 0xff, 0xfd, 0, 0 }, 2, + FALSE, FALSE, + 0, + 0, + { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ +}; + +const UConverterSharedData _UTF16v2Data = + UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF16v2StaticData, &_UTF16v2Impl); + +#endif diff --git a/thirdparty/icu4c/common/ucnv_u32.cpp b/thirdparty/icu4c/common/ucnv_u32.cpp new file mode 100644 index 0000000000..9f98914b9d --- /dev/null +++ b/thirdparty/icu4c/common/ucnv_u32.cpp @@ -0,0 +1,1253 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 2002-2015, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* file name: ucnv_u32.c +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2002jul01 +* created by: Markus W. Scherer +* +* UTF-32 converter implementation. Used to be in ucnv_utf.c. +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION + +#include "unicode/ucnv.h" +#include "unicode/utf.h" +#include "ucnv_bld.h" +#include "ucnv_cnv.h" +#include "cmemory.h" + +#define MAXIMUM_UCS2 0x0000FFFF +#define MAXIMUM_UTF 0x0010FFFF +#define HALF_SHIFT 10 +#define HALF_BASE 0x0010000 +#define HALF_MASK 0x3FF +#define SURROGATE_HIGH_START 0xD800 +#define SURROGATE_LOW_START 0xDC00 + +/* -SURROGATE_LOW_START + HALF_BASE */ +#define SURROGATE_LOW_BASE 9216 + +enum { + UCNV_NEED_TO_WRITE_BOM=1 +}; + +/* UTF-32BE ----------------------------------------------------------------- */ +U_CDECL_BEGIN +static void U_CALLCONV +T_UConverter_toUnicode_UTF32_BE(UConverterToUnicodeArgs * args, + UErrorCode * err) +{ + const unsigned char *mySource = (unsigned char *) args->source; + UChar *myTarget = args->target; + const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; + const UChar *targetLimit = args->targetLimit; + unsigned char *toUBytes = args->converter->toUBytes; + uint32_t ch, i; + + /* Restore state of current sequence */ + if (args->converter->toULength > 0 && myTarget < targetLimit) { + i = args->converter->toULength; /* restore # of bytes consumed */ + args->converter->toULength = 0; + + ch = args->converter->toUnicodeStatus - 1;/*Stores the previously calculated ch from a previous call*/ + args->converter->toUnicodeStatus = 0; + goto morebytes; + } + + while (mySource < sourceLimit && myTarget < targetLimit) { + i = 0; + ch = 0; +morebytes: + while (i < sizeof(uint32_t)) { + if (mySource < sourceLimit) { + ch = (ch << 8) | (uint8_t)(*mySource); + toUBytes[i++] = (char) *(mySource++); + } + else { + /* stores a partially calculated target*/ + /* + 1 to make 0 a valid character */ + args->converter->toUnicodeStatus = ch + 1; + args->converter->toULength = (int8_t) i; + goto donefornow; + } + } + + if (ch <= MAXIMUM_UTF && !U_IS_SURROGATE(ch)) { + /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */ + if (ch <= MAXIMUM_UCS2) + { + /* fits in 16 bits */ + *(myTarget++) = (UChar) ch; + } + else { + /* write out the surrogates */ + *(myTarget++) = U16_LEAD(ch); + ch = U16_TRAIL(ch); + if (myTarget < targetLimit) { + *(myTarget++) = (UChar)ch; + } + else { + /* Put in overflow buffer (not handled here) */ + args->converter->UCharErrorBuffer[0] = (UChar) ch; + args->converter->UCharErrorBufferLength = 1; + *err = U_BUFFER_OVERFLOW_ERROR; + break; + } + } + } + else { + args->converter->toULength = (int8_t)i; + *err = U_ILLEGAL_CHAR_FOUND; + break; + } + } + +donefornow: + if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) { + /* End of target buffer */ + *err = U_BUFFER_OVERFLOW_ERROR; + } + + args->target = myTarget; + args->source = (const char *) mySource; +} + +static void U_CALLCONV +T_UConverter_toUnicode_UTF32_BE_OFFSET_LOGIC(UConverterToUnicodeArgs * args, + UErrorCode * err) +{ + const unsigned char *mySource = (unsigned char *) args->source; + UChar *myTarget = args->target; + int32_t *myOffsets = args->offsets; + const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; + const UChar *targetLimit = args->targetLimit; + unsigned char *toUBytes = args->converter->toUBytes; + uint32_t ch, i; + int32_t offsetNum = 0; + + /* Restore state of current sequence */ + if (args->converter->toULength > 0 && myTarget < targetLimit) { + i = args->converter->toULength; /* restore # of bytes consumed */ + args->converter->toULength = 0; + + ch = args->converter->toUnicodeStatus - 1;/*Stores the previously calculated ch from a previous call*/ + args->converter->toUnicodeStatus = 0; + goto morebytes; + } + + while (mySource < sourceLimit && myTarget < targetLimit) { + i = 0; + ch = 0; +morebytes: + while (i < sizeof(uint32_t)) { + if (mySource < sourceLimit) { + ch = (ch << 8) | (uint8_t)(*mySource); + toUBytes[i++] = (char) *(mySource++); + } + else { + /* stores a partially calculated target*/ + /* + 1 to make 0 a valid character */ + args->converter->toUnicodeStatus = ch + 1; + args->converter->toULength = (int8_t) i; + goto donefornow; + } + } + + if (ch <= MAXIMUM_UTF && !U_IS_SURROGATE(ch)) { + /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */ + if (ch <= MAXIMUM_UCS2) { + /* fits in 16 bits */ + *(myTarget++) = (UChar) ch; + *(myOffsets++) = offsetNum; + } + else { + /* write out the surrogates */ + *(myTarget++) = U16_LEAD(ch); + *myOffsets++ = offsetNum; + ch = U16_TRAIL(ch); + if (myTarget < targetLimit) + { + *(myTarget++) = (UChar)ch; + *(myOffsets++) = offsetNum; + } + else { + /* Put in overflow buffer (not handled here) */ + args->converter->UCharErrorBuffer[0] = (UChar) ch; + args->converter->UCharErrorBufferLength = 1; + *err = U_BUFFER_OVERFLOW_ERROR; + break; + } + } + } + else { + args->converter->toULength = (int8_t)i; + *err = U_ILLEGAL_CHAR_FOUND; + break; + } + offsetNum += i; + } + +donefornow: + if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) + { + /* End of target buffer */ + *err = U_BUFFER_OVERFLOW_ERROR; + } + + args->target = myTarget; + args->source = (const char *) mySource; + args->offsets = myOffsets; +} + +static void U_CALLCONV +T_UConverter_fromUnicode_UTF32_BE(UConverterFromUnicodeArgs * args, + UErrorCode * err) +{ + const UChar *mySource = args->source; + unsigned char *myTarget; + const UChar *sourceLimit = args->sourceLimit; + const unsigned char *targetLimit = (unsigned char *) args->targetLimit; + UChar32 ch, ch2; + unsigned int indexToWrite; + unsigned char temp[sizeof(uint32_t)]; + + if(mySource >= sourceLimit) { + /* no input, nothing to do */ + return; + } + + /* write the BOM if necessary */ + if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) { + static const char bom[]={ 0, 0, (char)0xfeu, (char)0xffu }; + ucnv_fromUWriteBytes(args->converter, + bom, 4, + &args->target, args->targetLimit, + &args->offsets, -1, + err); + args->converter->fromUnicodeStatus=0; + } + + myTarget = (unsigned char *) args->target; + temp[0] = 0; + + if (args->converter->fromUChar32) { + ch = args->converter->fromUChar32; + args->converter->fromUChar32 = 0; + goto lowsurogate; + } + + while (mySource < sourceLimit && myTarget < targetLimit) { + ch = *(mySource++); + + if (U_IS_SURROGATE(ch)) { + if (U_IS_LEAD(ch)) { +lowsurogate: + if (mySource < sourceLimit) { + ch2 = *mySource; + if (U_IS_TRAIL(ch2)) { + ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 + SURROGATE_LOW_BASE; + mySource++; + } + else { + /* this is an unmatched trail code unit (2nd surrogate) */ + /* callback(illegal) */ + args->converter->fromUChar32 = ch; + *err = U_ILLEGAL_CHAR_FOUND; + break; + } + } + else { + /* ran out of source */ + args->converter->fromUChar32 = ch; + if (args->flush) { + /* this is an unmatched trail code unit (2nd surrogate) */ + /* callback(illegal) */ + *err = U_ILLEGAL_CHAR_FOUND; + } + break; + } + } + else { + /* this is an unmatched trail code unit (2nd surrogate) */ + /* callback(illegal) */ + args->converter->fromUChar32 = ch; + *err = U_ILLEGAL_CHAR_FOUND; + break; + } + } + + /* We cannot get any larger than 10FFFF because we are coming from UTF-16 */ + temp[1] = (uint8_t) (ch >> 16 & 0x1F); + temp[2] = (uint8_t) (ch >> 8); /* unsigned cast implicitly does (ch & FF) */ + temp[3] = (uint8_t) (ch); /* unsigned cast implicitly does (ch & FF) */ + + for (indexToWrite = 0; indexToWrite <= sizeof(uint32_t) - 1; indexToWrite++) { + if (myTarget < targetLimit) { + *(myTarget++) = temp[indexToWrite]; + } + else { + args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = temp[indexToWrite]; + *err = U_BUFFER_OVERFLOW_ERROR; + } + } + } + + if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) { + *err = U_BUFFER_OVERFLOW_ERROR; + } + + args->target = (char *) myTarget; + args->source = mySource; +} + +static void U_CALLCONV +T_UConverter_fromUnicode_UTF32_BE_OFFSET_LOGIC(UConverterFromUnicodeArgs * args, + UErrorCode * err) +{ + const UChar *mySource = args->source; + unsigned char *myTarget; + int32_t *myOffsets; + const UChar *sourceLimit = args->sourceLimit; + const unsigned char *targetLimit = (unsigned char *) args->targetLimit; + UChar32 ch, ch2; + int32_t offsetNum = 0; + unsigned int indexToWrite; + unsigned char temp[sizeof(uint32_t)]; + + if(mySource >= sourceLimit) { + /* no input, nothing to do */ + return; + } + + /* write the BOM if necessary */ + if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) { + static const char bom[]={ 0, 0, (char)0xfeu, (char)0xffu }; + ucnv_fromUWriteBytes(args->converter, + bom, 4, + &args->target, args->targetLimit, + &args->offsets, -1, + err); + args->converter->fromUnicodeStatus=0; + } + + myTarget = (unsigned char *) args->target; + myOffsets = args->offsets; + temp[0] = 0; + + if (args->converter->fromUChar32) { + ch = args->converter->fromUChar32; + args->converter->fromUChar32 = 0; + goto lowsurogate; + } + + while (mySource < sourceLimit && myTarget < targetLimit) { + ch = *(mySource++); + + if (U_IS_SURROGATE(ch)) { + if (U_IS_LEAD(ch)) { +lowsurogate: + if (mySource < sourceLimit) { + ch2 = *mySource; + if (U_IS_TRAIL(ch2)) { + ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 + SURROGATE_LOW_BASE; + mySource++; + } + else { + /* this is an unmatched trail code unit (2nd surrogate) */ + /* callback(illegal) */ + args->converter->fromUChar32 = ch; + *err = U_ILLEGAL_CHAR_FOUND; + break; + } + } + else { + /* ran out of source */ + args->converter->fromUChar32 = ch; + if (args->flush) { + /* this is an unmatched trail code unit (2nd surrogate) */ + /* callback(illegal) */ + *err = U_ILLEGAL_CHAR_FOUND; + } + break; + } + } + else { + /* this is an unmatched trail code unit (2nd surrogate) */ + /* callback(illegal) */ + args->converter->fromUChar32 = ch; + *err = U_ILLEGAL_CHAR_FOUND; + break; + } + } + + /* We cannot get any larger than 10FFFF because we are coming from UTF-16 */ + temp[1] = (uint8_t) (ch >> 16 & 0x1F); + temp[2] = (uint8_t) (ch >> 8); /* unsigned cast implicitly does (ch & FF) */ + temp[3] = (uint8_t) (ch); /* unsigned cast implicitly does (ch & FF) */ + + for (indexToWrite = 0; indexToWrite <= sizeof(uint32_t) - 1; indexToWrite++) { + if (myTarget < targetLimit) { + *(myTarget++) = temp[indexToWrite]; + *(myOffsets++) = offsetNum; + } + else { + args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = temp[indexToWrite]; + *err = U_BUFFER_OVERFLOW_ERROR; + } + } + offsetNum = offsetNum + 1 + (temp[1] != 0); + } + + if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) { + *err = U_BUFFER_OVERFLOW_ERROR; + } + + args->target = (char *) myTarget; + args->source = mySource; + args->offsets = myOffsets; +} + +static UChar32 U_CALLCONV +T_UConverter_getNextUChar_UTF32_BE(UConverterToUnicodeArgs* args, + UErrorCode* err) +{ + const uint8_t *mySource; + UChar32 myUChar; + int32_t length; + + mySource = (const uint8_t *)args->source; + if (mySource >= (const uint8_t *)args->sourceLimit) + { + /* no input */ + *err = U_INDEX_OUTOFBOUNDS_ERROR; + return 0xffff; + } + + length = (int32_t)((const uint8_t *)args->sourceLimit - mySource); + if (length < 4) + { + /* got a partial character */ + uprv_memcpy(args->converter->toUBytes, mySource, length); + args->converter->toULength = (int8_t)length; + args->source = (const char *)(mySource + length); + *err = U_TRUNCATED_CHAR_FOUND; + return 0xffff; + } + + /* Don't even try to do a direct cast because the value may be on an odd address. */ + myUChar = ((UChar32)mySource[0] << 24) + | ((UChar32)mySource[1] << 16) + | ((UChar32)mySource[2] << 8) + | ((UChar32)mySource[3]); + + args->source = (const char *)(mySource + 4); + if ((uint32_t)myUChar <= MAXIMUM_UTF && !U_IS_SURROGATE(myUChar)) { + return myUChar; + } + + uprv_memcpy(args->converter->toUBytes, mySource, 4); + args->converter->toULength = 4; + + *err = U_ILLEGAL_CHAR_FOUND; + return 0xffff; +} +U_CDECL_END +static const UConverterImpl _UTF32BEImpl = { + UCNV_UTF32_BigEndian, + + NULL, + NULL, + + NULL, + NULL, + NULL, + + T_UConverter_toUnicode_UTF32_BE, + T_UConverter_toUnicode_UTF32_BE_OFFSET_LOGIC, + T_UConverter_fromUnicode_UTF32_BE, + T_UConverter_fromUnicode_UTF32_BE_OFFSET_LOGIC, + T_UConverter_getNextUChar_UTF32_BE, + + NULL, + NULL, + NULL, + NULL, + ucnv_getNonSurrogateUnicodeSet, + + NULL, + NULL +}; + +/* The 1232 CCSID refers to any version of Unicode with any endianess of UTF-32 */ +static const UConverterStaticData _UTF32BEStaticData = { + sizeof(UConverterStaticData), + "UTF-32BE", + 1232, + UCNV_IBM, UCNV_UTF32_BigEndian, 4, 4, + { 0, 0, 0xff, 0xfd }, 4, FALSE, FALSE, + 0, + 0, + { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ +}; + +const UConverterSharedData _UTF32BEData = + UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF32BEStaticData, &_UTF32BEImpl); + +/* UTF-32LE ---------------------------------------------------------- */ +U_CDECL_BEGIN +static void U_CALLCONV +T_UConverter_toUnicode_UTF32_LE(UConverterToUnicodeArgs * args, + UErrorCode * err) +{ + const unsigned char *mySource = (unsigned char *) args->source; + UChar *myTarget = args->target; + const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; + const UChar *targetLimit = args->targetLimit; + unsigned char *toUBytes = args->converter->toUBytes; + uint32_t ch, i; + + /* Restore state of current sequence */ + if (args->converter->toULength > 0 && myTarget < targetLimit) + { + i = args->converter->toULength; /* restore # of bytes consumed */ + args->converter->toULength = 0; + + /* Stores the previously calculated ch from a previous call*/ + ch = args->converter->toUnicodeStatus - 1; + args->converter->toUnicodeStatus = 0; + goto morebytes; + } + + while (mySource < sourceLimit && myTarget < targetLimit) + { + i = 0; + ch = 0; +morebytes: + while (i < sizeof(uint32_t)) + { + if (mySource < sourceLimit) + { + ch |= ((uint8_t)(*mySource)) << (i * 8); + toUBytes[i++] = (char) *(mySource++); + } + else + { + /* stores a partially calculated target*/ + /* + 1 to make 0 a valid character */ + args->converter->toUnicodeStatus = ch + 1; + args->converter->toULength = (int8_t) i; + goto donefornow; + } + } + + if (ch <= MAXIMUM_UTF && !U_IS_SURROGATE(ch)) { + /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */ + if (ch <= MAXIMUM_UCS2) { + /* fits in 16 bits */ + *(myTarget++) = (UChar) ch; + } + else { + /* write out the surrogates */ + *(myTarget++) = U16_LEAD(ch); + ch = U16_TRAIL(ch); + if (myTarget < targetLimit) { + *(myTarget++) = (UChar)ch; + } + else { + /* Put in overflow buffer (not handled here) */ + args->converter->UCharErrorBuffer[0] = (UChar) ch; + args->converter->UCharErrorBufferLength = 1; + *err = U_BUFFER_OVERFLOW_ERROR; + break; + } + } + } + else { + args->converter->toULength = (int8_t)i; + *err = U_ILLEGAL_CHAR_FOUND; + break; + } + } + +donefornow: + if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) + { + /* End of target buffer */ + *err = U_BUFFER_OVERFLOW_ERROR; + } + + args->target = myTarget; + args->source = (const char *) mySource; +} + +static void U_CALLCONV +T_UConverter_toUnicode_UTF32_LE_OFFSET_LOGIC(UConverterToUnicodeArgs * args, + UErrorCode * err) +{ + const unsigned char *mySource = (unsigned char *) args->source; + UChar *myTarget = args->target; + int32_t *myOffsets = args->offsets; + const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; + const UChar *targetLimit = args->targetLimit; + unsigned char *toUBytes = args->converter->toUBytes; + uint32_t ch, i; + int32_t offsetNum = 0; + + /* Restore state of current sequence */ + if (args->converter->toULength > 0 && myTarget < targetLimit) + { + i = args->converter->toULength; /* restore # of bytes consumed */ + args->converter->toULength = 0; + + /* Stores the previously calculated ch from a previous call*/ + ch = args->converter->toUnicodeStatus - 1; + args->converter->toUnicodeStatus = 0; + goto morebytes; + } + + while (mySource < sourceLimit && myTarget < targetLimit) + { + i = 0; + ch = 0; +morebytes: + while (i < sizeof(uint32_t)) + { + if (mySource < sourceLimit) + { + ch |= ((uint8_t)(*mySource)) << (i * 8); + toUBytes[i++] = (char) *(mySource++); + } + else + { + /* stores a partially calculated target*/ + /* + 1 to make 0 a valid character */ + args->converter->toUnicodeStatus = ch + 1; + args->converter->toULength = (int8_t) i; + goto donefornow; + } + } + + if (ch <= MAXIMUM_UTF && !U_IS_SURROGATE(ch)) + { + /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */ + if (ch <= MAXIMUM_UCS2) + { + /* fits in 16 bits */ + *(myTarget++) = (UChar) ch; + *(myOffsets++) = offsetNum; + } + else { + /* write out the surrogates */ + *(myTarget++) = U16_LEAD(ch); + *(myOffsets++) = offsetNum; + ch = U16_TRAIL(ch); + if (myTarget < targetLimit) + { + *(myTarget++) = (UChar)ch; + *(myOffsets++) = offsetNum; + } + else + { + /* Put in overflow buffer (not handled here) */ + args->converter->UCharErrorBuffer[0] = (UChar) ch; + args->converter->UCharErrorBufferLength = 1; + *err = U_BUFFER_OVERFLOW_ERROR; + break; + } + } + } + else + { + args->converter->toULength = (int8_t)i; + *err = U_ILLEGAL_CHAR_FOUND; + break; + } + offsetNum += i; + } + +donefornow: + if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) + { + /* End of target buffer */ + *err = U_BUFFER_OVERFLOW_ERROR; + } + + args->target = myTarget; + args->source = (const char *) mySource; + args->offsets = myOffsets; +} + +static void U_CALLCONV +T_UConverter_fromUnicode_UTF32_LE(UConverterFromUnicodeArgs * args, + UErrorCode * err) +{ + const UChar *mySource = args->source; + unsigned char *myTarget; + const UChar *sourceLimit = args->sourceLimit; + const unsigned char *targetLimit = (unsigned char *) args->targetLimit; + UChar32 ch, ch2; + unsigned int indexToWrite; + unsigned char temp[sizeof(uint32_t)]; + + if(mySource >= sourceLimit) { + /* no input, nothing to do */ + return; + } + + /* write the BOM if necessary */ + if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) { + static const char bom[]={ (char)0xffu, (char)0xfeu, 0, 0 }; + ucnv_fromUWriteBytes(args->converter, + bom, 4, + &args->target, args->targetLimit, + &args->offsets, -1, + err); + args->converter->fromUnicodeStatus=0; + } + + myTarget = (unsigned char *) args->target; + temp[3] = 0; + + if (args->converter->fromUChar32) + { + ch = args->converter->fromUChar32; + args->converter->fromUChar32 = 0; + goto lowsurogate; + } + + while (mySource < sourceLimit && myTarget < targetLimit) + { + ch = *(mySource++); + + if (U16_IS_SURROGATE(ch)) { + if (U16_IS_LEAD(ch)) + { +lowsurogate: + if (mySource < sourceLimit) + { + ch2 = *mySource; + if (U16_IS_TRAIL(ch2)) { + ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 + SURROGATE_LOW_BASE; + mySource++; + } + else { + /* this is an unmatched trail code unit (2nd surrogate) */ + /* callback(illegal) */ + args->converter->fromUChar32 = ch; + *err = U_ILLEGAL_CHAR_FOUND; + break; + } + } + else { + /* ran out of source */ + args->converter->fromUChar32 = ch; + if (args->flush) { + /* this is an unmatched trail code unit (2nd surrogate) */ + /* callback(illegal) */ + *err = U_ILLEGAL_CHAR_FOUND; + } + break; + } + } + else { + /* this is an unmatched trail code unit (2nd surrogate) */ + /* callback(illegal) */ + args->converter->fromUChar32 = ch; + *err = U_ILLEGAL_CHAR_FOUND; + break; + } + } + + /* We cannot get any larger than 10FFFF because we are coming from UTF-16 */ + temp[2] = (uint8_t) (ch >> 16 & 0x1F); + temp[1] = (uint8_t) (ch >> 8); /* unsigned cast implicitly does (ch & FF) */ + temp[0] = (uint8_t) (ch); /* unsigned cast implicitly does (ch & FF) */ + + for (indexToWrite = 0; indexToWrite <= sizeof(uint32_t) - 1; indexToWrite++) + { + if (myTarget < targetLimit) + { + *(myTarget++) = temp[indexToWrite]; + } + else + { + args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = temp[indexToWrite]; + *err = U_BUFFER_OVERFLOW_ERROR; + } + } + } + + if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) + { + *err = U_BUFFER_OVERFLOW_ERROR; + } + + args->target = (char *) myTarget; + args->source = mySource; +} + +static void U_CALLCONV +T_UConverter_fromUnicode_UTF32_LE_OFFSET_LOGIC(UConverterFromUnicodeArgs * args, + UErrorCode * err) +{ + const UChar *mySource = args->source; + unsigned char *myTarget; + int32_t *myOffsets; + const UChar *sourceLimit = args->sourceLimit; + const unsigned char *targetLimit = (unsigned char *) args->targetLimit; + UChar32 ch, ch2; + unsigned int indexToWrite; + unsigned char temp[sizeof(uint32_t)]; + int32_t offsetNum = 0; + + if(mySource >= sourceLimit) { + /* no input, nothing to do */ + return; + } + + /* write the BOM if necessary */ + if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) { + static const char bom[]={ (char)0xffu, (char)0xfeu, 0, 0 }; + ucnv_fromUWriteBytes(args->converter, + bom, 4, + &args->target, args->targetLimit, + &args->offsets, -1, + err); + args->converter->fromUnicodeStatus=0; + } + + myTarget = (unsigned char *) args->target; + myOffsets = args->offsets; + temp[3] = 0; + + if (args->converter->fromUChar32) + { + ch = args->converter->fromUChar32; + args->converter->fromUChar32 = 0; + goto lowsurogate; + } + + while (mySource < sourceLimit && myTarget < targetLimit) + { + ch = *(mySource++); + + if (U16_IS_SURROGATE(ch)) { + if (U16_IS_LEAD(ch)) + { +lowsurogate: + if (mySource < sourceLimit) + { + ch2 = *mySource; + if (U16_IS_TRAIL(ch2)) + { + ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 + SURROGATE_LOW_BASE; + mySource++; + } + else { + /* this is an unmatched trail code unit (2nd surrogate) */ + /* callback(illegal) */ + args->converter->fromUChar32 = ch; + *err = U_ILLEGAL_CHAR_FOUND; + break; + } + } + else { + /* ran out of source */ + args->converter->fromUChar32 = ch; + if (args->flush) { + /* this is an unmatched trail code unit (2nd surrogate) */ + /* callback(illegal) */ + *err = U_ILLEGAL_CHAR_FOUND; + } + break; + } + } + else { + /* this is an unmatched trail code unit (2nd surrogate) */ + /* callback(illegal) */ + args->converter->fromUChar32 = ch; + *err = U_ILLEGAL_CHAR_FOUND; + break; + } + } + + /* We cannot get any larger than 10FFFF because we are coming from UTF-16 */ + temp[2] = (uint8_t) (ch >> 16 & 0x1F); + temp[1] = (uint8_t) (ch >> 8); /* unsigned cast implicitly does (ch & FF) */ + temp[0] = (uint8_t) (ch); /* unsigned cast implicitly does (ch & FF) */ + + for (indexToWrite = 0; indexToWrite <= sizeof(uint32_t) - 1; indexToWrite++) + { + if (myTarget < targetLimit) + { + *(myTarget++) = temp[indexToWrite]; + *(myOffsets++) = offsetNum; + } + else + { + args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = temp[indexToWrite]; + *err = U_BUFFER_OVERFLOW_ERROR; + } + } + offsetNum = offsetNum + 1 + (temp[2] != 0); + } + + if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) + { + *err = U_BUFFER_OVERFLOW_ERROR; + } + + args->target = (char *) myTarget; + args->source = mySource; + args->offsets = myOffsets; +} + +static UChar32 U_CALLCONV +T_UConverter_getNextUChar_UTF32_LE(UConverterToUnicodeArgs* args, + UErrorCode* err) +{ + const uint8_t *mySource; + UChar32 myUChar; + int32_t length; + + mySource = (const uint8_t *)args->source; + if (mySource >= (const uint8_t *)args->sourceLimit) + { + /* no input */ + *err = U_INDEX_OUTOFBOUNDS_ERROR; + return 0xffff; + } + + length = (int32_t)((const uint8_t *)args->sourceLimit - mySource); + if (length < 4) + { + /* got a partial character */ + uprv_memcpy(args->converter->toUBytes, mySource, length); + args->converter->toULength = (int8_t)length; + args->source = (const char *)(mySource + length); + *err = U_TRUNCATED_CHAR_FOUND; + return 0xffff; + } + + /* Don't even try to do a direct cast because the value may be on an odd address. */ + myUChar = ((UChar32)mySource[3] << 24) + | ((UChar32)mySource[2] << 16) + | ((UChar32)mySource[1] << 8) + | ((UChar32)mySource[0]); + + args->source = (const char *)(mySource + 4); + if ((uint32_t)myUChar <= MAXIMUM_UTF && !U_IS_SURROGATE(myUChar)) { + return myUChar; + } + + uprv_memcpy(args->converter->toUBytes, mySource, 4); + args->converter->toULength = 4; + + *err = U_ILLEGAL_CHAR_FOUND; + return 0xffff; +} +U_CDECL_END +static const UConverterImpl _UTF32LEImpl = { + UCNV_UTF32_LittleEndian, + + NULL, + NULL, + + NULL, + NULL, + NULL, + + T_UConverter_toUnicode_UTF32_LE, + T_UConverter_toUnicode_UTF32_LE_OFFSET_LOGIC, + T_UConverter_fromUnicode_UTF32_LE, + T_UConverter_fromUnicode_UTF32_LE_OFFSET_LOGIC, + T_UConverter_getNextUChar_UTF32_LE, + + NULL, + NULL, + NULL, + NULL, + ucnv_getNonSurrogateUnicodeSet, + + NULL, + NULL +}; + +/* The 1232 CCSID refers to any version of Unicode with any endianess of UTF-32 */ +static const UConverterStaticData _UTF32LEStaticData = { + sizeof(UConverterStaticData), + "UTF-32LE", + 1234, + UCNV_IBM, UCNV_UTF32_LittleEndian, 4, 4, + { 0xfd, 0xff, 0, 0 }, 4, FALSE, FALSE, + 0, + 0, + { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ +}; + + +const UConverterSharedData _UTF32LEData = + UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF32LEStaticData, &_UTF32LEImpl); + +/* UTF-32 (Detect BOM) ------------------------------------------------------ */ + +/* + * Detect a BOM at the beginning of the stream and select UTF-32BE or UTF-32LE + * accordingly. + * + * State values: + * 0 initial state + * 1 saw 00 + * 2 saw 00 00 + * 3 saw 00 00 FE + * 4 - + * 5 saw FF + * 6 saw FF FE + * 7 saw FF FE 00 + * 8 UTF-32BE mode + * 9 UTF-32LE mode + * + * During detection: state&3==number of matching bytes so far. + * + * On output, emit U+FEFF as the first code point. + */ +U_CDECL_BEGIN +static void U_CALLCONV +_UTF32Reset(UConverter *cnv, UConverterResetChoice choice) { + if(choice<=UCNV_RESET_TO_UNICODE) { + /* reset toUnicode: state=0 */ + cnv->mode=0; + } + if(choice!=UCNV_RESET_TO_UNICODE) { + /* reset fromUnicode: prepare to output the UTF-32PE BOM */ + cnv->fromUnicodeStatus=UCNV_NEED_TO_WRITE_BOM; + } +} + +static void U_CALLCONV +_UTF32Open(UConverter *cnv, + UConverterLoadArgs *pArgs, + UErrorCode *pErrorCode) { + (void)pArgs; + (void)pErrorCode; + _UTF32Reset(cnv, UCNV_RESET_BOTH); +} + +static const char utf32BOM[8]={ 0, 0, (char)0xfeu, (char)0xffu, (char)0xffu, (char)0xfeu, 0, 0 }; + +static void U_CALLCONV +_UTF32ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, + UErrorCode *pErrorCode) { + UConverter *cnv=pArgs->converter; + const char *source=pArgs->source; + const char *sourceLimit=pArgs->sourceLimit; + int32_t *offsets=pArgs->offsets; + + int32_t state, offsetDelta; + char b; + + state=cnv->mode; + + /* + * If we detect a BOM in this buffer, then we must add the BOM size to the + * offsets because the actual converter function will not see and count the BOM. + * offsetDelta will have the number of the BOM bytes that are in the current buffer. + */ + offsetDelta=0; + + while(source<sourceLimit && U_SUCCESS(*pErrorCode)) { + switch(state) { + case 0: + b=*source; + if(b==0) { + state=1; /* could be 00 00 FE FF */ + } else if(b==(char)0xffu) { + state=5; /* could be FF FE 00 00 */ + } else { + state=8; /* default to UTF-32BE */ + continue; + } + ++source; + break; + case 1: + case 2: + case 3: + case 5: + case 6: + case 7: + if(*source==utf32BOM[state]) { + ++state; + ++source; + if(state==4) { + state=8; /* detect UTF-32BE */ + offsetDelta=(int32_t)(source-pArgs->source); + } else if(state==8) { + state=9; /* detect UTF-32LE */ + offsetDelta=(int32_t)(source-pArgs->source); + } + } else { + /* switch to UTF-32BE and pass the previous bytes */ + int32_t count=(int32_t)(source-pArgs->source); /* number of bytes from this buffer */ + + /* reset the source */ + source=pArgs->source; + + if(count==(state&3)) { + /* simple: all in the same buffer, just reset source */ + } else { + UBool oldFlush=pArgs->flush; + + /* some of the bytes are from a previous buffer, replay those first */ + pArgs->source=utf32BOM+(state&4); /* select the correct BOM */ + pArgs->sourceLimit=pArgs->source+((state&3)-count); /* replay previous bytes */ + pArgs->flush=FALSE; /* this sourceLimit is not the real source stream limit */ + + /* no offsets: bytes from previous buffer, and not enough for output */ + T_UConverter_toUnicode_UTF32_BE(pArgs, pErrorCode); + + /* restore real pointers; pArgs->source will be set in case 8/9 */ + pArgs->sourceLimit=sourceLimit; + pArgs->flush=oldFlush; + } + state=8; + continue; + } + break; + case 8: + /* call UTF-32BE */ + pArgs->source=source; + if(offsets==NULL) { + T_UConverter_toUnicode_UTF32_BE(pArgs, pErrorCode); + } else { + T_UConverter_toUnicode_UTF32_BE_OFFSET_LOGIC(pArgs, pErrorCode); + } + source=pArgs->source; + break; + case 9: + /* call UTF-32LE */ + pArgs->source=source; + if(offsets==NULL) { + T_UConverter_toUnicode_UTF32_LE(pArgs, pErrorCode); + } else { + T_UConverter_toUnicode_UTF32_LE_OFFSET_LOGIC(pArgs, pErrorCode); + } + source=pArgs->source; + break; + default: + break; /* does not occur */ + } + } + + /* add BOM size to offsets - see comment at offsetDelta declaration */ + if(offsets!=NULL && offsetDelta!=0) { + int32_t *offsetsLimit=pArgs->offsets; + while(offsets<offsetsLimit) { + *offsets++ += offsetDelta; + } + } + + pArgs->source=source; + + if(source==sourceLimit && pArgs->flush) { + /* handle truncated input */ + switch(state) { + case 0: + break; /* no input at all, nothing to do */ + case 8: + T_UConverter_toUnicode_UTF32_BE(pArgs, pErrorCode); + break; + case 9: + T_UConverter_toUnicode_UTF32_LE(pArgs, pErrorCode); + break; + default: + /* handle 0<state<8: call UTF-32BE with too-short input */ + pArgs->source=utf32BOM+(state&4); /* select the correct BOM */ + pArgs->sourceLimit=pArgs->source+(state&3); /* replay bytes */ + + /* no offsets: not enough for output */ + T_UConverter_toUnicode_UTF32_BE(pArgs, pErrorCode); + pArgs->source=source; + pArgs->sourceLimit=sourceLimit; + state=8; + break; + } + } + + cnv->mode=state; +} + +static UChar32 U_CALLCONV +_UTF32GetNextUChar(UConverterToUnicodeArgs *pArgs, + UErrorCode *pErrorCode) { + switch(pArgs->converter->mode) { + case 8: + return T_UConverter_getNextUChar_UTF32_BE(pArgs, pErrorCode); + case 9: + return T_UConverter_getNextUChar_UTF32_LE(pArgs, pErrorCode); + default: + return UCNV_GET_NEXT_UCHAR_USE_TO_U; + } +} +U_CDECL_END +static const UConverterImpl _UTF32Impl = { + UCNV_UTF32, + + NULL, + NULL, + + _UTF32Open, + NULL, + _UTF32Reset, + + _UTF32ToUnicodeWithOffsets, + _UTF32ToUnicodeWithOffsets, +#if U_IS_BIG_ENDIAN + T_UConverter_fromUnicode_UTF32_BE, + T_UConverter_fromUnicode_UTF32_BE_OFFSET_LOGIC, +#else + T_UConverter_fromUnicode_UTF32_LE, + T_UConverter_fromUnicode_UTF32_LE_OFFSET_LOGIC, +#endif + _UTF32GetNextUChar, + + NULL, /* ### TODO implement getStarters for all Unicode encodings?! */ + NULL, + NULL, + NULL, + ucnv_getNonSurrogateUnicodeSet, + + NULL, + NULL +}; + +/* The 1236 CCSID refers to any version of Unicode with a BOM sensitive endianess of UTF-32 */ +static const UConverterStaticData _UTF32StaticData = { + sizeof(UConverterStaticData), + "UTF-32", + 1236, + UCNV_IBM, UCNV_UTF32, 4, 4, +#if U_IS_BIG_ENDIAN + { 0, 0, 0xff, 0xfd }, 4, +#else + { 0xfd, 0xff, 0, 0 }, 4, +#endif + FALSE, FALSE, + 0, + 0, + { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ +}; + +const UConverterSharedData _UTF32Data = + UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF32StaticData, &_UTF32Impl); + +#endif diff --git a/thirdparty/icu4c/common/ucnv_u7.cpp b/thirdparty/icu4c/common/ucnv_u7.cpp new file mode 100644 index 0000000000..87ba8cf37e --- /dev/null +++ b/thirdparty/icu4c/common/ucnv_u7.cpp @@ -0,0 +1,1491 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 2002-2016, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* file name: ucnv_u7.c +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2002jul01 +* created by: Markus W. Scherer +* +* UTF-7 converter implementation. Used to be in ucnv_utf.c. +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION + +#include "cmemory.h" +#include "unicode/ucnv.h" +#include "ucnv_bld.h" +#include "ucnv_cnv.h" +#include "uassert.h" + +/* UTF-7 -------------------------------------------------------------------- */ + +/* + * UTF-7 is a stateful encoding of Unicode. + * It is defined in RFC 2152. (http://www.ietf.org/rfc/rfc2152.txt) + * It was intended for use in Internet email systems, using in its bytewise + * encoding only a subset of 7-bit US-ASCII. + * UTF-7 is deprecated in favor of UTF-8/16/32 and SCSU, but still + * occasionally used. + * + * For converting Unicode to UTF-7, the RFC allows to encode some US-ASCII + * characters directly or in base64. Especially, the characters in set O + * as defined in the RFC (see below) may be encoded directly but are not + * allowed in, e.g., email headers. + * By default, the ICU UTF-7 converter encodes set O directly. + * By choosing the option "version=1", set O will be escaped instead. + * For example: + * utf7Converter=ucnv_open("UTF-7,version=1"); + * + * For details about email headers see RFC 2047. + */ + +/* + * Tests for US-ASCII characters belonging to character classes + * defined in UTF-7. + * + * Set D (directly encoded characters) consists of the following + * characters: the upper and lower case letters A through Z + * and a through z, the 10 digits 0-9, and the following nine special + * characters (note that "+" and "=" are omitted): + * '(),-./:? + * + * Set O (optional direct characters) consists of the following + * characters (note that "\" and "~" are omitted): + * !"#$%&*;<=>@[]^_`{|} + * + * According to the rules in RFC 2152, the byte values for the following + * US-ASCII characters are not used in UTF-7 and are therefore illegal: + * - all C0 control codes except for CR LF TAB + * - BACKSLASH + * - TILDE + * - DEL + * - all codes beyond US-ASCII, i.e. all >127 + */ +#define inSetD(c) \ + ((uint8_t)((c)-97)<26 || (uint8_t)((c)-65)<26 || /* letters */ \ + (uint8_t)((c)-48)<10 || /* digits */ \ + (uint8_t)((c)-39)<3 || /* '() */ \ + (uint8_t)((c)-44)<4 || /* ,-./ */ \ + (c)==58 || (c)==63 /* :? */ \ + ) + +#define inSetO(c) \ + ((uint8_t)((c)-33)<6 || /* !"#$%& */ \ + (uint8_t)((c)-59)<4 || /* ;<=> */ \ + (uint8_t)((c)-93)<4 || /* ]^_` */ \ + (uint8_t)((c)-123)<3 || /* {|} */ \ + (c)==42 || (c)==64 || (c)==91 /* *@[ */ \ + ) + +#define isCRLFTAB(c) ((c)==13 || (c)==10 || (c)==9) +#define isCRLFSPTAB(c) ((c)==32 || (c)==13 || (c)==10 || (c)==9) + +#define PLUS 43 +#define MINUS 45 +#define BACKSLASH 92 +#define TILDE 126 + +/* legal byte values: all US-ASCII graphic characters from space to before tilde, and CR LF TAB */ +#define isLegalUTF7(c) (((uint8_t)((c)-32)<94 && (c)!=BACKSLASH) || isCRLFTAB(c)) + +/* encode directly sets D and O and CR LF SP TAB */ +static const UBool encodeDirectlyMaximum[128]={ + /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, + + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0 +}; + +/* encode directly set D and CR LF SP TAB but not set O */ +static const UBool encodeDirectlyRestricted[128]={ + /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, + + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, + + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 +}; + +static const uint8_t +toBase64[64]={ + /* A-Z */ + 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, + 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, + /* a-z */ + 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, + 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, + /* 0-9 */ + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, + /* +/ */ + 43, 47 +}; + +static const int8_t +fromBase64[128]={ + /* C0 controls, -1 for legal ones (CR LF TAB), -3 for illegal ones */ + -3, -3, -3, -3, -3, -3, -3, -3, -3, -1, -1, -3, -3, -1, -3, -3, + -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, + + /* general punctuation with + and / and a special value (-2) for - */ + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -2, -1, 63, + /* digits */ + 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, + + /* A-Z */ + -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -3, -1, -1, -1, + + /* a-z */ + -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, + 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -3, -3 +}; + +/* + * converter status values: + * + * toUnicodeStatus: + * 24 inDirectMode (boolean) + * 23..16 base64Counter (-1..7) + * 15..0 bits (up to 14 bits incoming base64) + * + * fromUnicodeStatus: + * 31..28 version (0: set O direct 1: set O escaped) + * 24 inDirectMode (boolean) + * 23..16 base64Counter (0..2) + * 7..0 bits (6 bits outgoing base64) + * + */ + +U_CDECL_BEGIN +static void U_CALLCONV +_UTF7Reset(UConverter *cnv, UConverterResetChoice choice) { + if(choice<=UCNV_RESET_TO_UNICODE) { + /* reset toUnicode */ + cnv->toUnicodeStatus=0x1000000; /* inDirectMode=TRUE */ + cnv->toULength=0; + } + if(choice!=UCNV_RESET_TO_UNICODE) { + /* reset fromUnicode */ + cnv->fromUnicodeStatus=(cnv->fromUnicodeStatus&0xf0000000)|0x1000000; /* keep version, inDirectMode=TRUE */ + } +} + +static void U_CALLCONV +_UTF7Open(UConverter *cnv, + UConverterLoadArgs *pArgs, + UErrorCode *pErrorCode) { + (void)pArgs; + if(UCNV_GET_VERSION(cnv)<=1) { + /* TODO(markus): Should just use cnv->options rather than copying the version number. */ + cnv->fromUnicodeStatus=UCNV_GET_VERSION(cnv)<<28; + _UTF7Reset(cnv, UCNV_RESET_BOTH); + } else { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + } +} + +static void U_CALLCONV +_UTF7ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, + UErrorCode *pErrorCode) { + UConverter *cnv; + const uint8_t *source, *sourceLimit; + UChar *target; + const UChar *targetLimit; + int32_t *offsets; + + uint8_t *bytes; + uint8_t byteIndex; + + int32_t length, targetCapacity; + + /* UTF-7 state */ + uint16_t bits; + int8_t base64Counter; + UBool inDirectMode; + + int8_t base64Value; + + int32_t sourceIndex, nextSourceIndex; + + uint8_t b; + /* set up the local pointers */ + cnv=pArgs->converter; + + source=(const uint8_t *)pArgs->source; + sourceLimit=(const uint8_t *)pArgs->sourceLimit; + target=pArgs->target; + targetLimit=pArgs->targetLimit; + offsets=pArgs->offsets; + /* get the state machine state */ + { + uint32_t status=cnv->toUnicodeStatus; + inDirectMode=(UBool)((status>>24)&1); + base64Counter=(int8_t)(status>>16); + bits=(uint16_t)status; + } + bytes=cnv->toUBytes; + byteIndex=cnv->toULength; + + /* sourceIndex=-1 if the current character began in the previous buffer */ + sourceIndex=byteIndex==0 ? 0 : -1; + nextSourceIndex=0; + + if(inDirectMode) { +directMode: + /* + * In Direct Mode, most US-ASCII characters are encoded directly, i.e., + * with their US-ASCII byte values. + * Backslash and Tilde and most control characters are not allowed in UTF-7. + * A plus sign starts Unicode (or "escape") Mode. + * + * In Direct Mode, only the sourceIndex is used. + */ + byteIndex=0; + length=(int32_t)(sourceLimit-source); + targetCapacity=(int32_t)(targetLimit-target); + if(length>targetCapacity) { + length=targetCapacity; + } + while(length>0) { + b=*source++; + if(!isLegalUTF7(b)) { + /* illegal */ + bytes[0]=b; + byteIndex=1; + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + break; + } else if(b!=PLUS) { + /* write directly encoded character */ + *target++=b; + if(offsets!=NULL) { + *offsets++=sourceIndex++; + } + } else /* PLUS */ { + /* switch to Unicode mode */ + nextSourceIndex=++sourceIndex; + inDirectMode=FALSE; + byteIndex=0; + bits=0; + base64Counter=-1; + goto unicodeMode; + } + --length; + } + if(source<sourceLimit && target>=targetLimit) { + /* target is full */ + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } + } else { +unicodeMode: + /* + * In Unicode (or "escape") Mode, UTF-16BE is base64-encoded. + * The base64 sequence ends with any character that is not in the base64 alphabet. + * A terminating minus sign is consumed. + * + * In Unicode Mode, the sourceIndex has the index to the start of the current + * base64 bytes, while nextSourceIndex is precisely parallel to source, + * keeping the index to the following byte. + * Note that in 2 out of 3 cases, UChars overlap within a base64 byte. + */ + while(source<sourceLimit) { + if(target<targetLimit) { + bytes[byteIndex++]=b=*source++; + ++nextSourceIndex; + base64Value = -3; /* initialize as illegal */ + if(b>=126 || (base64Value=fromBase64[b])==-3 || base64Value==-1) { + /* either + * base64Value==-1 for any legal character except base64 and minus sign, or + * base64Value==-3 for illegal characters: + * 1. In either case, leave Unicode mode. + * 2.1. If we ended with an incomplete UChar or none after the +, then + * generate an error for the preceding erroneous sequence and deal with + * the current (possibly illegal) character next time through. + * 2.2. Else the current char comes after a complete UChar, which was already + * pushed to the output buf, so: + * 2.2.1. If the current char is legal, just save it for processing next time. + * It may be for example, a plus which we need to deal with in direct mode. + * 2.2.2. Else if the current char is illegal, we might as well deal with it here. + */ + inDirectMode=TRUE; + if(base64Counter==-1) { + /* illegal: + immediately followed by something other than base64 or minus sign */ + /* include the plus sign in the reported sequence, but not the subsequent char */ + --source; + bytes[0]=PLUS; + byteIndex=1; + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + break; + } else if(bits!=0) { + /* bits are illegally left over, a UChar is incomplete */ + /* don't include current char (legal or illegal) in error seq */ + --source; + --byteIndex; + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + break; + } else { + /* previous UChar was complete */ + if(base64Value==-3) { + /* current character is illegal, deal with it here */ + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + break; + } else { + /* un-read the current character in case it is a plus sign */ + --source; + sourceIndex=nextSourceIndex-1; + goto directMode; + } + } + } else if(base64Value>=0) { + /* collect base64 bytes into UChars */ + switch(base64Counter) { + case -1: /* -1 is immediately after the + */ + case 0: + bits=base64Value; + base64Counter=1; + break; + case 1: + case 3: + case 4: + case 6: + bits=(uint16_t)((bits<<6)|base64Value); + ++base64Counter; + break; + case 2: + *target++=(UChar)((bits<<4)|(base64Value>>2)); + if(offsets!=NULL) { + *offsets++=sourceIndex; + sourceIndex=nextSourceIndex-1; + } + bytes[0]=b; /* keep this byte in case an error occurs */ + byteIndex=1; + bits=(uint16_t)(base64Value&3); + base64Counter=3; + break; + case 5: + *target++=(UChar)((bits<<2)|(base64Value>>4)); + if(offsets!=NULL) { + *offsets++=sourceIndex; + sourceIndex=nextSourceIndex-1; + } + bytes[0]=b; /* keep this byte in case an error occurs */ + byteIndex=1; + bits=(uint16_t)(base64Value&15); + base64Counter=6; + break; + case 7: + *target++=(UChar)((bits<<6)|base64Value); + if(offsets!=NULL) { + *offsets++=sourceIndex; + sourceIndex=nextSourceIndex; + } + byteIndex=0; + bits=0; + base64Counter=0; + break; + default: + /* will never occur */ + break; + } + } else /*base64Value==-2*/ { + /* minus sign terminates the base64 sequence */ + inDirectMode=TRUE; + if(base64Counter==-1) { + /* +- i.e. a minus immediately following a plus */ + *target++=PLUS; + if(offsets!=NULL) { + *offsets++=sourceIndex-1; + } + } else { + /* absorb the minus and leave the Unicode Mode */ + if(bits!=0) { + /* bits are illegally left over, a UChar is incomplete */ + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + break; + } + } + sourceIndex=nextSourceIndex; + goto directMode; + } + } else { + /* target is full */ + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + break; + } + } + } + + if(U_SUCCESS(*pErrorCode) && pArgs->flush && source==sourceLimit && bits==0) { + /* + * if we are in Unicode mode, then the byteIndex might not be 0, + * but that is ok if bits==0 + * -> we set byteIndex=0 at the end of the stream to avoid a truncated error + * (not true for IMAP-mailbox-name where we must end in direct mode) + */ + byteIndex=0; + } + + /* set the converter state back into UConverter */ + cnv->toUnicodeStatus=((uint32_t)inDirectMode<<24)|((uint32_t)((uint8_t)base64Counter)<<16)|(uint32_t)bits; + cnv->toULength=byteIndex; + + /* write back the updated pointers */ + pArgs->source=(const char *)source; + pArgs->target=target; + pArgs->offsets=offsets; + return; +} + +static void U_CALLCONV +_UTF7FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, + UErrorCode *pErrorCode) { + UConverter *cnv; + const UChar *source, *sourceLimit; + uint8_t *target, *targetLimit; + int32_t *offsets; + + int32_t length, targetCapacity, sourceIndex; + UChar c; + + /* UTF-7 state */ + const UBool *encodeDirectly; + uint8_t bits; + int8_t base64Counter; + UBool inDirectMode; + + /* set up the local pointers */ + cnv=pArgs->converter; + + /* set up the local pointers */ + source=pArgs->source; + sourceLimit=pArgs->sourceLimit; + target=(uint8_t *)pArgs->target; + targetLimit=(uint8_t *)pArgs->targetLimit; + offsets=pArgs->offsets; + + /* get the state machine state */ + { + uint32_t status=cnv->fromUnicodeStatus; + encodeDirectly= status<0x10000000 ? encodeDirectlyMaximum : encodeDirectlyRestricted; + inDirectMode=(UBool)((status>>24)&1); + base64Counter=(int8_t)(status>>16); + bits=(uint8_t)status; + U_ASSERT(bits<=UPRV_LENGTHOF(toBase64)); + } + + /* UTF-7 always encodes UTF-16 code units, therefore we need only a simple sourceIndex */ + sourceIndex=0; + + if(inDirectMode) { +directMode: + length=(int32_t)(sourceLimit-source); + targetCapacity=(int32_t)(targetLimit-target); + if(length>targetCapacity) { + length=targetCapacity; + } + while(length>0) { + c=*source++; + /* currently always encode CR LF SP TAB directly */ + if(c<=127 && encodeDirectly[c]) { + /* encode directly */ + *target++=(uint8_t)c; + if(offsets!=NULL) { + *offsets++=sourceIndex++; + } + } else if(c==PLUS) { + /* output +- for + */ + *target++=PLUS; + if(target<targetLimit) { + *target++=MINUS; + if(offsets!=NULL) { + *offsets++=sourceIndex; + *offsets++=sourceIndex++; + } + /* realign length and targetCapacity */ + goto directMode; + } else { + if(offsets!=NULL) { + *offsets++=sourceIndex++; + } + cnv->charErrorBuffer[0]=MINUS; + cnv->charErrorBufferLength=1; + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + break; + } + } else { + /* un-read this character and switch to Unicode Mode */ + --source; + *target++=PLUS; + if(offsets!=NULL) { + *offsets++=sourceIndex; + } + inDirectMode=FALSE; + base64Counter=0; + goto unicodeMode; + } + --length; + } + if(source<sourceLimit && target>=targetLimit) { + /* target is full */ + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } + } else { +unicodeMode: + while(source<sourceLimit) { + if(target<targetLimit) { + c=*source++; + if(c<=127 && encodeDirectly[c]) { + /* encode directly */ + inDirectMode=TRUE; + + /* trick: back out this character to make this easier */ + --source; + + /* terminate the base64 sequence */ + if(base64Counter!=0) { + /* write remaining bits for the previous character */ + *target++=toBase64[bits]; + if(offsets!=NULL) { + *offsets++=sourceIndex-1; + } + } + if(fromBase64[c]!=-1) { + /* need to terminate with a minus */ + if(target<targetLimit) { + *target++=MINUS; + if(offsets!=NULL) { + *offsets++=sourceIndex-1; + } + } else { + cnv->charErrorBuffer[0]=MINUS; + cnv->charErrorBufferLength=1; + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + break; + } + } + goto directMode; + } else { + /* + * base64 this character: + * Output 2 or 3 base64 bytes for the remaining bits of the previous character + * and the bits of this character, each implicitly in UTF-16BE. + * + * Here, bits is an 8-bit variable because only 6 bits need to be kept from one + * character to the next. The actual 2 or 4 bits are shifted to the left edge + * of the 6-bits field 5..0 to make the termination of the base64 sequence easier. + */ + switch(base64Counter) { + case 0: + *target++=toBase64[c>>10]; + if(target<targetLimit) { + *target++=toBase64[(c>>4)&0x3f]; + if(offsets!=NULL) { + *offsets++=sourceIndex; + *offsets++=sourceIndex++; + } + } else { + if(offsets!=NULL) { + *offsets++=sourceIndex++; + } + cnv->charErrorBuffer[0]=toBase64[(c>>4)&0x3f]; + cnv->charErrorBufferLength=1; + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } + bits=(uint8_t)((c&15)<<2); + base64Counter=1; + break; + case 1: + *target++=toBase64[bits|(c>>14)]; + if(target<targetLimit) { + *target++=toBase64[(c>>8)&0x3f]; + if(target<targetLimit) { + *target++=toBase64[(c>>2)&0x3f]; + if(offsets!=NULL) { + *offsets++=sourceIndex; + *offsets++=sourceIndex; + *offsets++=sourceIndex++; + } + } else { + if(offsets!=NULL) { + *offsets++=sourceIndex; + *offsets++=sourceIndex++; + } + cnv->charErrorBuffer[0]=toBase64[(c>>2)&0x3f]; + cnv->charErrorBufferLength=1; + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } + } else { + if(offsets!=NULL) { + *offsets++=sourceIndex++; + } + cnv->charErrorBuffer[0]=toBase64[(c>>8)&0x3f]; + cnv->charErrorBuffer[1]=toBase64[(c>>2)&0x3f]; + cnv->charErrorBufferLength=2; + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } + bits=(uint8_t)((c&3)<<4); + base64Counter=2; + break; + case 2: + *target++=toBase64[bits|(c>>12)]; + if(target<targetLimit) { + *target++=toBase64[(c>>6)&0x3f]; + if(target<targetLimit) { + *target++=toBase64[c&0x3f]; + if(offsets!=NULL) { + *offsets++=sourceIndex; + *offsets++=sourceIndex; + *offsets++=sourceIndex++; + } + } else { + if(offsets!=NULL) { + *offsets++=sourceIndex; + *offsets++=sourceIndex++; + } + cnv->charErrorBuffer[0]=toBase64[c&0x3f]; + cnv->charErrorBufferLength=1; + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } + } else { + if(offsets!=NULL) { + *offsets++=sourceIndex++; + } + cnv->charErrorBuffer[0]=toBase64[(c>>6)&0x3f]; + cnv->charErrorBuffer[1]=toBase64[c&0x3f]; + cnv->charErrorBufferLength=2; + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } + bits=0; + base64Counter=0; + break; + default: + /* will never occur */ + break; + } + } + } else { + /* target is full */ + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + break; + } + } + } + + if(pArgs->flush && source>=sourceLimit) { + /* flush remaining bits to the target */ + if(!inDirectMode) { + if (base64Counter!=0) { + if(target<targetLimit) { + *target++=toBase64[bits]; + if(offsets!=NULL) { + *offsets++=sourceIndex-1; + } + } else { + cnv->charErrorBuffer[cnv->charErrorBufferLength++]=toBase64[bits]; + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } + } + /* Add final MINUS to terminate unicodeMode */ + if(target<targetLimit) { + *target++=MINUS; + if(offsets!=NULL) { + *offsets++=sourceIndex-1; + } + } else { + cnv->charErrorBuffer[cnv->charErrorBufferLength++]=MINUS; + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } + } + /* reset the state for the next conversion */ + cnv->fromUnicodeStatus=(cnv->fromUnicodeStatus&0xf0000000)|0x1000000; /* keep version, inDirectMode=TRUE */ + } else { + /* set the converter state back into UConverter */ + cnv->fromUnicodeStatus= + (cnv->fromUnicodeStatus&0xf0000000)| /* keep version*/ + ((uint32_t)inDirectMode<<24)|((uint32_t)base64Counter<<16)|(uint32_t)bits; + } + + /* write back the updated pointers */ + pArgs->source=source; + pArgs->target=(char *)target; + pArgs->offsets=offsets; + return; +} + +static const char * U_CALLCONV +_UTF7GetName(const UConverter *cnv) { + switch(cnv->fromUnicodeStatus>>28) { + case 1: + return "UTF-7,version=1"; + default: + return "UTF-7"; + } +} +U_CDECL_END + +static const UConverterImpl _UTF7Impl={ + UCNV_UTF7, + + NULL, + NULL, + + _UTF7Open, + NULL, + _UTF7Reset, + + _UTF7ToUnicodeWithOffsets, + _UTF7ToUnicodeWithOffsets, + _UTF7FromUnicodeWithOffsets, + _UTF7FromUnicodeWithOffsets, + NULL, + + NULL, + _UTF7GetName, + NULL, /* we don't need writeSub() because we never call a callback at fromUnicode() */ + NULL, + ucnv_getCompleteUnicodeSet, + + NULL, + NULL +}; + +static const UConverterStaticData _UTF7StaticData={ + sizeof(UConverterStaticData), + "UTF-7", + 0, /* TODO CCSID for UTF-7 */ + UCNV_IBM, UCNV_UTF7, + 1, 4, + { 0x3f, 0, 0, 0 }, 1, /* the subchar is not used */ + FALSE, FALSE, + 0, + 0, + { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ +}; + +const UConverterSharedData _UTF7Data= + UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF7StaticData, &_UTF7Impl); + +/* IMAP mailbox name encoding ----------------------------------------------- */ + +/* + * RFC 2060: INTERNET MESSAGE ACCESS PROTOCOL - VERSION 4rev1 + * http://www.ietf.org/rfc/rfc2060.txt + * + * 5.1.3. Mailbox International Naming Convention + * + * By convention, international mailbox names are specified using a + * modified version of the UTF-7 encoding described in [UTF-7]. The + * purpose of these modifications is to correct the following problems + * with UTF-7: + * + * 1) UTF-7 uses the "+" character for shifting; this conflicts with + * the common use of "+" in mailbox names, in particular USENET + * newsgroup names. + * + * 2) UTF-7's encoding is BASE64 which uses the "/" character; this + * conflicts with the use of "/" as a popular hierarchy delimiter. + * + * 3) UTF-7 prohibits the unencoded usage of "\"; this conflicts with + * the use of "\" as a popular hierarchy delimiter. + * + * 4) UTF-7 prohibits the unencoded usage of "~"; this conflicts with + * the use of "~" in some servers as a home directory indicator. + * + * 5) UTF-7 permits multiple alternate forms to represent the same + * string; in particular, printable US-ASCII chararacters can be + * represented in encoded form. + * + * In modified UTF-7, printable US-ASCII characters except for "&" + * represent themselves; that is, characters with octet values 0x20-0x25 + * and 0x27-0x7e. The character "&" (0x26) is represented by the two- + * octet sequence "&-". + * + * All other characters (octet values 0x00-0x1f, 0x7f-0xff, and all + * Unicode 16-bit octets) are represented in modified BASE64, with a + * further modification from [UTF-7] that "," is used instead of "/". + * Modified BASE64 MUST NOT be used to represent any printing US-ASCII + * character which can represent itself. + * + * "&" is used to shift to modified BASE64 and "-" to shift back to US- + * ASCII. All names start in US-ASCII, and MUST end in US-ASCII (that + * is, a name that ends with a Unicode 16-bit octet MUST end with a "- + * "). + * + * For example, here is a mailbox name which mixes English, Japanese, + * and Chinese text: ~peter/mail/&ZeVnLIqe-/&U,BTFw- + */ + +/* + * Tests for US-ASCII characters belonging to character classes + * defined in UTF-7. + * + * Set D (directly encoded characters) consists of the following + * characters: the upper and lower case letters A through Z + * and a through z, the 10 digits 0-9, and the following nine special + * characters (note that "+" and "=" are omitted): + * '(),-./:? + * + * Set O (optional direct characters) consists of the following + * characters (note that "\" and "~" are omitted): + * !"#$%&*;<=>@[]^_`{|} + * + * According to the rules in RFC 2152, the byte values for the following + * US-ASCII characters are not used in UTF-7 and are therefore illegal: + * - all C0 control codes except for CR LF TAB + * - BACKSLASH + * - TILDE + * - DEL + * - all codes beyond US-ASCII, i.e. all >127 + */ + +/* uses '&' not '+' to start a base64 sequence */ +#define AMPERSAND 0x26 +#define COMMA 0x2c +#define SLASH 0x2f + +/* legal byte values: all US-ASCII graphic characters 0x20..0x7e */ +#define isLegalIMAP(c) (0x20<=(c) && (c)<=0x7e) + +/* direct-encode all of printable ASCII 0x20..0x7e except '&' 0x26 */ +#define inSetDIMAP(c) (isLegalIMAP(c) && c!=AMPERSAND) + +#define TO_BASE64_IMAP(n) ((n)<63 ? toBase64[n] : COMMA) +#define FROM_BASE64_IMAP(c) ((c)==COMMA ? 63 : (c)==SLASH ? -1 : fromBase64[c]) + +/* + * converter status values: + * + * toUnicodeStatus: + * 24 inDirectMode (boolean) + * 23..16 base64Counter (-1..7) + * 15..0 bits (up to 14 bits incoming base64) + * + * fromUnicodeStatus: + * 24 inDirectMode (boolean) + * 23..16 base64Counter (0..2) + * 7..0 bits (6 bits outgoing base64) + * + * ignore bits 31..25 + */ + +U_CDECL_BEGIN +static void U_CALLCONV +_IMAPToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, + UErrorCode *pErrorCode) { + UConverter *cnv; + const uint8_t *source, *sourceLimit; + UChar *target; + const UChar *targetLimit; + int32_t *offsets; + + uint8_t *bytes; + uint8_t byteIndex; + + int32_t length, targetCapacity; + + /* UTF-7 state */ + uint16_t bits; + int8_t base64Counter; + UBool inDirectMode; + + int8_t base64Value; + + int32_t sourceIndex, nextSourceIndex; + + UChar c; + uint8_t b; + + /* set up the local pointers */ + cnv=pArgs->converter; + + source=(const uint8_t *)pArgs->source; + sourceLimit=(const uint8_t *)pArgs->sourceLimit; + target=pArgs->target; + targetLimit=pArgs->targetLimit; + offsets=pArgs->offsets; + /* get the state machine state */ + { + uint32_t status=cnv->toUnicodeStatus; + inDirectMode=(UBool)((status>>24)&1); + base64Counter=(int8_t)(status>>16); + bits=(uint16_t)status; + } + bytes=cnv->toUBytes; + byteIndex=cnv->toULength; + + /* sourceIndex=-1 if the current character began in the previous buffer */ + sourceIndex=byteIndex==0 ? 0 : -1; + nextSourceIndex=0; + + if(inDirectMode) { +directMode: + /* + * In Direct Mode, US-ASCII characters are encoded directly, i.e., + * with their US-ASCII byte values. + * An ampersand starts Unicode (or "escape") Mode. + * + * In Direct Mode, only the sourceIndex is used. + */ + byteIndex=0; + length=(int32_t)(sourceLimit-source); + targetCapacity=(int32_t)(targetLimit-target); + if(length>targetCapacity) { + length=targetCapacity; + } + while(length>0) { + b=*source++; + if(!isLegalIMAP(b)) { + /* illegal */ + bytes[0]=b; + byteIndex=1; + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + break; + } else if(b!=AMPERSAND) { + /* write directly encoded character */ + *target++=b; + if(offsets!=NULL) { + *offsets++=sourceIndex++; + } + } else /* AMPERSAND */ { + /* switch to Unicode mode */ + nextSourceIndex=++sourceIndex; + inDirectMode=FALSE; + byteIndex=0; + bits=0; + base64Counter=-1; + goto unicodeMode; + } + --length; + } + if(source<sourceLimit && target>=targetLimit) { + /* target is full */ + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } + } else { +unicodeMode: + /* + * In Unicode (or "escape") Mode, UTF-16BE is base64-encoded. + * The base64 sequence ends with any character that is not in the base64 alphabet. + * A terminating minus sign is consumed. + * US-ASCII must not be base64-ed. + * + * In Unicode Mode, the sourceIndex has the index to the start of the current + * base64 bytes, while nextSourceIndex is precisely parallel to source, + * keeping the index to the following byte. + * Note that in 2 out of 3 cases, UChars overlap within a base64 byte. + */ + while(source<sourceLimit) { + if(target<targetLimit) { + bytes[byteIndex++]=b=*source++; + ++nextSourceIndex; + if(b>0x7e) { + /* illegal - test other illegal US-ASCII values by base64Value==-3 */ + inDirectMode=TRUE; + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + break; + } else if((base64Value=FROM_BASE64_IMAP(b))>=0) { + /* collect base64 bytes into UChars */ + switch(base64Counter) { + case -1: /* -1 is immediately after the & */ + case 0: + bits=base64Value; + base64Counter=1; + break; + case 1: + case 3: + case 4: + case 6: + bits=(uint16_t)((bits<<6)|base64Value); + ++base64Counter; + break; + case 2: + c=(UChar)((bits<<4)|(base64Value>>2)); + if(isLegalIMAP(c)) { + /* illegal */ + inDirectMode=TRUE; + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + goto endloop; + } + *target++=c; + if(offsets!=NULL) { + *offsets++=sourceIndex; + sourceIndex=nextSourceIndex-1; + } + bytes[0]=b; /* keep this byte in case an error occurs */ + byteIndex=1; + bits=(uint16_t)(base64Value&3); + base64Counter=3; + break; + case 5: + c=(UChar)((bits<<2)|(base64Value>>4)); + if(isLegalIMAP(c)) { + /* illegal */ + inDirectMode=TRUE; + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + goto endloop; + } + *target++=c; + if(offsets!=NULL) { + *offsets++=sourceIndex; + sourceIndex=nextSourceIndex-1; + } + bytes[0]=b; /* keep this byte in case an error occurs */ + byteIndex=1; + bits=(uint16_t)(base64Value&15); + base64Counter=6; + break; + case 7: + c=(UChar)((bits<<6)|base64Value); + if(isLegalIMAP(c)) { + /* illegal */ + inDirectMode=TRUE; + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + goto endloop; + } + *target++=c; + if(offsets!=NULL) { + *offsets++=sourceIndex; + sourceIndex=nextSourceIndex; + } + byteIndex=0; + bits=0; + base64Counter=0; + break; + default: + /* will never occur */ + break; + } + } else if(base64Value==-2) { + /* minus sign terminates the base64 sequence */ + inDirectMode=TRUE; + if(base64Counter==-1) { + /* &- i.e. a minus immediately following an ampersand */ + *target++=AMPERSAND; + if(offsets!=NULL) { + *offsets++=sourceIndex-1; + } + } else { + /* absorb the minus and leave the Unicode Mode */ + if(bits!=0 || (base64Counter!=0 && base64Counter!=3 && base64Counter!=6)) { + /* bits are illegally left over, a UChar is incomplete */ + /* base64Counter other than 0, 3, 6 means non-minimal zero-padding, also illegal */ + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + break; + } + } + sourceIndex=nextSourceIndex; + goto directMode; + } else { + if(base64Counter==-1) { + /* illegal: & immediately followed by something other than base64 or minus sign */ + /* include the ampersand in the reported sequence */ + --sourceIndex; + bytes[0]=AMPERSAND; + bytes[1]=b; + byteIndex=2; + } + /* base64Value==-1 for characters that are illegal only in Unicode mode */ + /* base64Value==-3 for illegal characters */ + /* illegal */ + inDirectMode=TRUE; + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + break; + } + } else { + /* target is full */ + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + break; + } + } + } +endloop: + + /* + * the end of the input stream and detection of truncated input + * are handled by the framework, but here we must check if we are in Unicode + * mode and byteIndex==0 because we must end in direct mode + * + * conditions: + * successful + * in Unicode mode and byteIndex==0 + * end of input and no truncated input + */ + if( U_SUCCESS(*pErrorCode) && + !inDirectMode && byteIndex==0 && + pArgs->flush && source>=sourceLimit + ) { + if(base64Counter==-1) { + /* & at the very end of the input */ + /* make the ampersand the reported sequence */ + bytes[0]=AMPERSAND; + byteIndex=1; + } + /* else if(base64Counter!=-1) byteIndex remains 0 because there is no particular byte sequence */ + + inDirectMode=TRUE; /* avoid looping */ + *pErrorCode=U_TRUNCATED_CHAR_FOUND; + } + + /* set the converter state back into UConverter */ + cnv->toUnicodeStatus=((uint32_t)inDirectMode<<24)|((uint32_t)((uint8_t)base64Counter)<<16)|(uint32_t)bits; + cnv->toULength=byteIndex; + + /* write back the updated pointers */ + pArgs->source=(const char *)source; + pArgs->target=target; + pArgs->offsets=offsets; + return; +} + +static void U_CALLCONV +_IMAPFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, + UErrorCode *pErrorCode) { + UConverter *cnv; + const UChar *source, *sourceLimit; + uint8_t *target, *targetLimit; + int32_t *offsets; + + int32_t length, targetCapacity, sourceIndex; + UChar c; + uint8_t b; + + /* UTF-7 state */ + uint8_t bits; + int8_t base64Counter; + UBool inDirectMode; + + /* set up the local pointers */ + cnv=pArgs->converter; + + /* set up the local pointers */ + source=pArgs->source; + sourceLimit=pArgs->sourceLimit; + target=(uint8_t *)pArgs->target; + targetLimit=(uint8_t *)pArgs->targetLimit; + offsets=pArgs->offsets; + + /* get the state machine state */ + { + uint32_t status=cnv->fromUnicodeStatus; + inDirectMode=(UBool)((status>>24)&1); + base64Counter=(int8_t)(status>>16); + bits=(uint8_t)status; + } + + /* UTF-7 always encodes UTF-16 code units, therefore we need only a simple sourceIndex */ + sourceIndex=0; + + if(inDirectMode) { +directMode: + length=(int32_t)(sourceLimit-source); + targetCapacity=(int32_t)(targetLimit-target); + if(length>targetCapacity) { + length=targetCapacity; + } + while(length>0) { + c=*source++; + /* encode 0x20..0x7e except '&' directly */ + if(inSetDIMAP(c)) { + /* encode directly */ + *target++=(uint8_t)c; + if(offsets!=NULL) { + *offsets++=sourceIndex++; + } + } else if(c==AMPERSAND) { + /* output &- for & */ + *target++=AMPERSAND; + if(target<targetLimit) { + *target++=MINUS; + if(offsets!=NULL) { + *offsets++=sourceIndex; + *offsets++=sourceIndex++; + } + /* realign length and targetCapacity */ + goto directMode; + } else { + if(offsets!=NULL) { + *offsets++=sourceIndex++; + } + cnv->charErrorBuffer[0]=MINUS; + cnv->charErrorBufferLength=1; + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + break; + } + } else { + /* un-read this character and switch to Unicode Mode */ + --source; + *target++=AMPERSAND; + if(offsets!=NULL) { + *offsets++=sourceIndex; + } + inDirectMode=FALSE; + base64Counter=0; + goto unicodeMode; + } + --length; + } + if(source<sourceLimit && target>=targetLimit) { + /* target is full */ + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } + } else { +unicodeMode: + while(source<sourceLimit) { + if(target<targetLimit) { + c=*source++; + if(isLegalIMAP(c)) { + /* encode directly */ + inDirectMode=TRUE; + + /* trick: back out this character to make this easier */ + --source; + + /* terminate the base64 sequence */ + if(base64Counter!=0) { + /* write remaining bits for the previous character */ + *target++=TO_BASE64_IMAP(bits); + if(offsets!=NULL) { + *offsets++=sourceIndex-1; + } + } + /* need to terminate with a minus */ + if(target<targetLimit) { + *target++=MINUS; + if(offsets!=NULL) { + *offsets++=sourceIndex-1; + } + } else { + cnv->charErrorBuffer[0]=MINUS; + cnv->charErrorBufferLength=1; + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + break; + } + goto directMode; + } else { + /* + * base64 this character: + * Output 2 or 3 base64 bytes for the remaining bits of the previous character + * and the bits of this character, each implicitly in UTF-16BE. + * + * Here, bits is an 8-bit variable because only 6 bits need to be kept from one + * character to the next. The actual 2 or 4 bits are shifted to the left edge + * of the 6-bits field 5..0 to make the termination of the base64 sequence easier. + */ + switch(base64Counter) { + case 0: + b=(uint8_t)(c>>10); + *target++=TO_BASE64_IMAP(b); + if(target<targetLimit) { + b=(uint8_t)((c>>4)&0x3f); + *target++=TO_BASE64_IMAP(b); + if(offsets!=NULL) { + *offsets++=sourceIndex; + *offsets++=sourceIndex++; + } + } else { + if(offsets!=NULL) { + *offsets++=sourceIndex++; + } + b=(uint8_t)((c>>4)&0x3f); + cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b); + cnv->charErrorBufferLength=1; + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } + bits=(uint8_t)((c&15)<<2); + base64Counter=1; + break; + case 1: + b=(uint8_t)(bits|(c>>14)); + *target++=TO_BASE64_IMAP(b); + if(target<targetLimit) { + b=(uint8_t)((c>>8)&0x3f); + *target++=TO_BASE64_IMAP(b); + if(target<targetLimit) { + b=(uint8_t)((c>>2)&0x3f); + *target++=TO_BASE64_IMAP(b); + if(offsets!=NULL) { + *offsets++=sourceIndex; + *offsets++=sourceIndex; + *offsets++=sourceIndex++; + } + } else { + if(offsets!=NULL) { + *offsets++=sourceIndex; + *offsets++=sourceIndex++; + } + b=(uint8_t)((c>>2)&0x3f); + cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b); + cnv->charErrorBufferLength=1; + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } + } else { + if(offsets!=NULL) { + *offsets++=sourceIndex++; + } + b=(uint8_t)((c>>8)&0x3f); + cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b); + b=(uint8_t)((c>>2)&0x3f); + cnv->charErrorBuffer[1]=TO_BASE64_IMAP(b); + cnv->charErrorBufferLength=2; + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } + bits=(uint8_t)((c&3)<<4); + base64Counter=2; + break; + case 2: + b=(uint8_t)(bits|(c>>12)); + *target++=TO_BASE64_IMAP(b); + if(target<targetLimit) { + b=(uint8_t)((c>>6)&0x3f); + *target++=TO_BASE64_IMAP(b); + if(target<targetLimit) { + b=(uint8_t)(c&0x3f); + *target++=TO_BASE64_IMAP(b); + if(offsets!=NULL) { + *offsets++=sourceIndex; + *offsets++=sourceIndex; + *offsets++=sourceIndex++; + } + } else { + if(offsets!=NULL) { + *offsets++=sourceIndex; + *offsets++=sourceIndex++; + } + b=(uint8_t)(c&0x3f); + cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b); + cnv->charErrorBufferLength=1; + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } + } else { + if(offsets!=NULL) { + *offsets++=sourceIndex++; + } + b=(uint8_t)((c>>6)&0x3f); + cnv->charErrorBuffer[0]=TO_BASE64_IMAP(b); + b=(uint8_t)(c&0x3f); + cnv->charErrorBuffer[1]=TO_BASE64_IMAP(b); + cnv->charErrorBufferLength=2; + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } + bits=0; + base64Counter=0; + break; + default: + /* will never occur */ + break; + } + } + } else { + /* target is full */ + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + break; + } + } + } + + if(pArgs->flush && source>=sourceLimit) { + /* flush remaining bits to the target */ + if(!inDirectMode) { + if(base64Counter!=0) { + if(target<targetLimit) { + *target++=TO_BASE64_IMAP(bits); + if(offsets!=NULL) { + *offsets++=sourceIndex-1; + } + } else { + cnv->charErrorBuffer[cnv->charErrorBufferLength++]=TO_BASE64_IMAP(bits); + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } + } + /* need to terminate with a minus */ + if(target<targetLimit) { + *target++=MINUS; + if(offsets!=NULL) { + *offsets++=sourceIndex-1; + } + } else { + cnv->charErrorBuffer[cnv->charErrorBufferLength++]=MINUS; + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } + } + /* reset the state for the next conversion */ + cnv->fromUnicodeStatus=(cnv->fromUnicodeStatus&0xf0000000)|0x1000000; /* keep version, inDirectMode=TRUE */ + } else { + /* set the converter state back into UConverter */ + cnv->fromUnicodeStatus= + (cnv->fromUnicodeStatus&0xf0000000)| /* keep version*/ + ((uint32_t)inDirectMode<<24)|((uint32_t)base64Counter<<16)|(uint32_t)bits; + } + + /* write back the updated pointers */ + pArgs->source=source; + pArgs->target=(char *)target; + pArgs->offsets=offsets; + return; +} +U_CDECL_END + +static const UConverterImpl _IMAPImpl={ + UCNV_IMAP_MAILBOX, + + NULL, + NULL, + + _UTF7Open, + NULL, + _UTF7Reset, + + _IMAPToUnicodeWithOffsets, + _IMAPToUnicodeWithOffsets, + _IMAPFromUnicodeWithOffsets, + _IMAPFromUnicodeWithOffsets, + NULL, + + NULL, + NULL, + NULL, /* we don't need writeSub() because we never call a callback at fromUnicode() */ + NULL, + ucnv_getCompleteUnicodeSet, + NULL, + NULL +}; + +static const UConverterStaticData _IMAPStaticData={ + sizeof(UConverterStaticData), + "IMAP-mailbox-name", + 0, /* TODO CCSID for IMAP-mailbox-name */ + UCNV_IBM, UCNV_IMAP_MAILBOX, + 1, 4, + { 0x3f, 0, 0, 0 }, 1, /* the subchar is not used */ + FALSE, FALSE, + 0, + 0, + { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ +}; + +const UConverterSharedData _IMAPData= + UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_IMAPStaticData, &_IMAPImpl); + +#endif diff --git a/thirdparty/icu4c/common/ucnv_u8.cpp b/thirdparty/icu4c/common/ucnv_u8.cpp new file mode 100644 index 0000000000..1ef7fa2f02 --- /dev/null +++ b/thirdparty/icu4c/common/ucnv_u8.cpp @@ -0,0 +1,944 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 2002-2016, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* file name: ucnv_u8.c +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2002jul01 +* created by: Markus W. Scherer +* +* UTF-8 converter implementation. Used to be in ucnv_utf.c. +* +* Also, CESU-8 implementation, see UTR 26. +* The CESU-8 converter uses all the same functions as the +* UTF-8 converter, with a branch for converting supplementary code points. +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_CONVERSION + +#include "unicode/ucnv.h" +#include "unicode/utf.h" +#include "unicode/utf8.h" +#include "unicode/utf16.h" +#include "uassert.h" +#include "ucnv_bld.h" +#include "ucnv_cnv.h" +#include "cmemory.h" +#include "ustr_imp.h" + +/* Prototypes --------------------------------------------------------------- */ + +/* Keep these here to make finicky compilers happy */ + +U_CFUNC void ucnv_fromUnicode_UTF8(UConverterFromUnicodeArgs *args, + UErrorCode *err); +U_CFUNC void ucnv_fromUnicode_UTF8_OFFSETS_LOGIC(UConverterFromUnicodeArgs *args, + UErrorCode *err); + + +/* UTF-8 -------------------------------------------------------------------- */ + +#define MAXIMUM_UCS2 0x0000FFFF + +static const uint32_t offsetsFromUTF8[5] = {0, + (uint32_t) 0x00000000, (uint32_t) 0x00003080, (uint32_t) 0x000E2080, + (uint32_t) 0x03C82080 +}; + +static UBool hasCESU8Data(const UConverter *cnv) +{ +#if UCONFIG_ONLY_HTML_CONVERSION + return FALSE; +#else + return (UBool)(cnv->sharedData == &_CESU8Data); +#endif +} +U_CDECL_BEGIN +static void U_CALLCONV ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs * args, + UErrorCode * err) +{ + UConverter *cnv = args->converter; + const unsigned char *mySource = (unsigned char *) args->source; + UChar *myTarget = args->target; + const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; + const UChar *targetLimit = args->targetLimit; + unsigned char *toUBytes = cnv->toUBytes; + UBool isCESU8 = hasCESU8Data(cnv); + uint32_t ch, ch2 = 0; + int32_t i, inBytes; + + /* Restore size of current sequence */ + if (cnv->toULength > 0 && myTarget < targetLimit) + { + inBytes = cnv->mode; /* restore # of bytes to consume */ + i = cnv->toULength; /* restore # of bytes consumed */ + cnv->toULength = 0; + + ch = cnv->toUnicodeStatus;/*Stores the previously calculated ch from a previous call*/ + cnv->toUnicodeStatus = 0; + goto morebytes; + } + + + while (mySource < sourceLimit && myTarget < targetLimit) + { + ch = *(mySource++); + if (U8_IS_SINGLE(ch)) /* Simple case */ + { + *(myTarget++) = (UChar) ch; + } + else + { + /* store the first char */ + toUBytes[0] = (char)ch; + inBytes = U8_COUNT_BYTES_NON_ASCII(ch); /* lookup current sequence length */ + i = 1; + +morebytes: + while (i < inBytes) + { + if (mySource < sourceLimit) + { + toUBytes[i] = (char) (ch2 = *mySource); + if (!icu::UTF8::isValidTrail(ch, static_cast<uint8_t>(ch2), i, inBytes) && + !(isCESU8 && i == 1 && ch == 0xed && U8_IS_TRAIL(ch2))) + { + break; /* i < inBytes */ + } + ch = (ch << 6) + ch2; + ++mySource; + i++; + } + else + { + /* stores a partially calculated target*/ + cnv->toUnicodeStatus = ch; + cnv->mode = inBytes; + cnv->toULength = (int8_t) i; + goto donefornow; + } + } + + // In CESU-8, only surrogates, not supplementary code points, are encoded directly. + if (i == inBytes && (!isCESU8 || i <= 3)) + { + /* Remove the accumulated high bits */ + ch -= offsetsFromUTF8[inBytes]; + + /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */ + if (ch <= MAXIMUM_UCS2) + { + /* fits in 16 bits */ + *(myTarget++) = (UChar) ch; + } + else + { + /* write out the surrogates */ + *(myTarget++) = U16_LEAD(ch); + ch = U16_TRAIL(ch); + if (myTarget < targetLimit) + { + *(myTarget++) = (UChar)ch; + } + else + { + /* Put in overflow buffer (not handled here) */ + cnv->UCharErrorBuffer[0] = (UChar) ch; + cnv->UCharErrorBufferLength = 1; + *err = U_BUFFER_OVERFLOW_ERROR; + break; + } + } + } + else + { + cnv->toULength = (int8_t)i; + *err = U_ILLEGAL_CHAR_FOUND; + break; + } + } + } + +donefornow: + if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) + { + /* End of target buffer */ + *err = U_BUFFER_OVERFLOW_ERROR; + } + + args->target = myTarget; + args->source = (const char *) mySource; +} + +static void U_CALLCONV ucnv_toUnicode_UTF8_OFFSETS_LOGIC (UConverterToUnicodeArgs * args, + UErrorCode * err) +{ + UConverter *cnv = args->converter; + const unsigned char *mySource = (unsigned char *) args->source; + UChar *myTarget = args->target; + int32_t *myOffsets = args->offsets; + int32_t offsetNum = 0; + const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; + const UChar *targetLimit = args->targetLimit; + unsigned char *toUBytes = cnv->toUBytes; + UBool isCESU8 = hasCESU8Data(cnv); + uint32_t ch, ch2 = 0; + int32_t i, inBytes; + + /* Restore size of current sequence */ + if (cnv->toULength > 0 && myTarget < targetLimit) + { + inBytes = cnv->mode; /* restore # of bytes to consume */ + i = cnv->toULength; /* restore # of bytes consumed */ + cnv->toULength = 0; + + ch = cnv->toUnicodeStatus;/*Stores the previously calculated ch from a previous call*/ + cnv->toUnicodeStatus = 0; + goto morebytes; + } + + while (mySource < sourceLimit && myTarget < targetLimit) + { + ch = *(mySource++); + if (U8_IS_SINGLE(ch)) /* Simple case */ + { + *(myTarget++) = (UChar) ch; + *(myOffsets++) = offsetNum++; + } + else + { + toUBytes[0] = (char)ch; + inBytes = U8_COUNT_BYTES_NON_ASCII(ch); + i = 1; + +morebytes: + while (i < inBytes) + { + if (mySource < sourceLimit) + { + toUBytes[i] = (char) (ch2 = *mySource); + if (!icu::UTF8::isValidTrail(ch, static_cast<uint8_t>(ch2), i, inBytes) && + !(isCESU8 && i == 1 && ch == 0xed && U8_IS_TRAIL(ch2))) + { + break; /* i < inBytes */ + } + ch = (ch << 6) + ch2; + ++mySource; + i++; + } + else + { + cnv->toUnicodeStatus = ch; + cnv->mode = inBytes; + cnv->toULength = (int8_t)i; + goto donefornow; + } + } + + // In CESU-8, only surrogates, not supplementary code points, are encoded directly. + if (i == inBytes && (!isCESU8 || i <= 3)) + { + /* Remove the accumulated high bits */ + ch -= offsetsFromUTF8[inBytes]; + + /* Normal valid byte when the loop has not prematurely terminated (i < inBytes) */ + if (ch <= MAXIMUM_UCS2) + { + /* fits in 16 bits */ + *(myTarget++) = (UChar) ch; + *(myOffsets++) = offsetNum; + } + else + { + /* write out the surrogates */ + *(myTarget++) = U16_LEAD(ch); + *(myOffsets++) = offsetNum; + ch = U16_TRAIL(ch); + if (myTarget < targetLimit) + { + *(myTarget++) = (UChar)ch; + *(myOffsets++) = offsetNum; + } + else + { + cnv->UCharErrorBuffer[0] = (UChar) ch; + cnv->UCharErrorBufferLength = 1; + *err = U_BUFFER_OVERFLOW_ERROR; + } + } + offsetNum += i; + } + else + { + cnv->toULength = (int8_t)i; + *err = U_ILLEGAL_CHAR_FOUND; + break; + } + } + } + +donefornow: + if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) + { /* End of target buffer */ + *err = U_BUFFER_OVERFLOW_ERROR; + } + + args->target = myTarget; + args->source = (const char *) mySource; + args->offsets = myOffsets; +} +U_CDECL_END + +U_CFUNC void U_CALLCONV ucnv_fromUnicode_UTF8 (UConverterFromUnicodeArgs * args, + UErrorCode * err) +{ + UConverter *cnv = args->converter; + const UChar *mySource = args->source; + const UChar *sourceLimit = args->sourceLimit; + uint8_t *myTarget = (uint8_t *) args->target; + const uint8_t *targetLimit = (uint8_t *) args->targetLimit; + uint8_t *tempPtr; + UChar32 ch; + uint8_t tempBuf[4]; + int32_t indexToWrite; + UBool isNotCESU8 = !hasCESU8Data(cnv); + + if (cnv->fromUChar32 && myTarget < targetLimit) + { + ch = cnv->fromUChar32; + cnv->fromUChar32 = 0; + goto lowsurrogate; + } + + while (mySource < sourceLimit && myTarget < targetLimit) + { + ch = *(mySource++); + + if (ch < 0x80) /* Single byte */ + { + *(myTarget++) = (uint8_t) ch; + } + else if (ch < 0x800) /* Double byte */ + { + *(myTarget++) = (uint8_t) ((ch >> 6) | 0xc0); + if (myTarget < targetLimit) + { + *(myTarget++) = (uint8_t) ((ch & 0x3f) | 0x80); + } + else + { + cnv->charErrorBuffer[0] = (uint8_t) ((ch & 0x3f) | 0x80); + cnv->charErrorBufferLength = 1; + *err = U_BUFFER_OVERFLOW_ERROR; + } + } + else { + /* Check for surrogates */ + if(U16_IS_SURROGATE(ch) && isNotCESU8) { +lowsurrogate: + if (mySource < sourceLimit) { + /* test both code units */ + if(U16_IS_SURROGATE_LEAD(ch) && U16_IS_TRAIL(*mySource)) { + /* convert and consume this supplementary code point */ + ch=U16_GET_SUPPLEMENTARY(ch, *mySource); + ++mySource; + /* exit this condition tree */ + } + else { + /* this is an unpaired trail or lead code unit */ + /* callback(illegal) */ + cnv->fromUChar32 = ch; + *err = U_ILLEGAL_CHAR_FOUND; + break; + } + } + else { + /* no more input */ + cnv->fromUChar32 = ch; + break; + } + } + + /* Do we write the buffer directly for speed, + or do we have to be careful about target buffer space? */ + tempPtr = (((targetLimit - myTarget) >= 4) ? myTarget : tempBuf); + + if (ch <= MAXIMUM_UCS2) { + indexToWrite = 2; + tempPtr[0] = (uint8_t) ((ch >> 12) | 0xe0); + } + else { + indexToWrite = 3; + tempPtr[0] = (uint8_t) ((ch >> 18) | 0xf0); + tempPtr[1] = (uint8_t) (((ch >> 12) & 0x3f) | 0x80); + } + tempPtr[indexToWrite-1] = (uint8_t) (((ch >> 6) & 0x3f) | 0x80); + tempPtr[indexToWrite] = (uint8_t) ((ch & 0x3f) | 0x80); + + if (tempPtr == myTarget) { + /* There was enough space to write the codepoint directly. */ + myTarget += (indexToWrite + 1); + } + else { + /* We might run out of room soon. Write it slowly. */ + for (; tempPtr <= (tempBuf + indexToWrite); tempPtr++) { + if (myTarget < targetLimit) { + *(myTarget++) = *tempPtr; + } + else { + cnv->charErrorBuffer[cnv->charErrorBufferLength++] = *tempPtr; + *err = U_BUFFER_OVERFLOW_ERROR; + } + } + } + } + } + + if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) + { + *err = U_BUFFER_OVERFLOW_ERROR; + } + + args->target = (char *) myTarget; + args->source = mySource; +} + +U_CFUNC void U_CALLCONV ucnv_fromUnicode_UTF8_OFFSETS_LOGIC (UConverterFromUnicodeArgs * args, + UErrorCode * err) +{ + UConverter *cnv = args->converter; + const UChar *mySource = args->source; + int32_t *myOffsets = args->offsets; + const UChar *sourceLimit = args->sourceLimit; + uint8_t *myTarget = (uint8_t *) args->target; + const uint8_t *targetLimit = (uint8_t *) args->targetLimit; + uint8_t *tempPtr; + UChar32 ch; + int32_t offsetNum, nextSourceIndex; + int32_t indexToWrite; + uint8_t tempBuf[4]; + UBool isNotCESU8 = !hasCESU8Data(cnv); + + if (cnv->fromUChar32 && myTarget < targetLimit) + { + ch = cnv->fromUChar32; + cnv->fromUChar32 = 0; + offsetNum = -1; + nextSourceIndex = 0; + goto lowsurrogate; + } else { + offsetNum = 0; + } + + while (mySource < sourceLimit && myTarget < targetLimit) + { + ch = *(mySource++); + + if (ch < 0x80) /* Single byte */ + { + *(myOffsets++) = offsetNum++; + *(myTarget++) = (char) ch; + } + else if (ch < 0x800) /* Double byte */ + { + *(myOffsets++) = offsetNum; + *(myTarget++) = (uint8_t) ((ch >> 6) | 0xc0); + if (myTarget < targetLimit) + { + *(myOffsets++) = offsetNum++; + *(myTarget++) = (uint8_t) ((ch & 0x3f) | 0x80); + } + else + { + cnv->charErrorBuffer[0] = (uint8_t) ((ch & 0x3f) | 0x80); + cnv->charErrorBufferLength = 1; + *err = U_BUFFER_OVERFLOW_ERROR; + } + } + else + /* Check for surrogates */ + { + nextSourceIndex = offsetNum + 1; + + if(U16_IS_SURROGATE(ch) && isNotCESU8) { +lowsurrogate: + if (mySource < sourceLimit) { + /* test both code units */ + if(U16_IS_SURROGATE_LEAD(ch) && U16_IS_TRAIL(*mySource)) { + /* convert and consume this supplementary code point */ + ch=U16_GET_SUPPLEMENTARY(ch, *mySource); + ++mySource; + ++nextSourceIndex; + /* exit this condition tree */ + } + else { + /* this is an unpaired trail or lead code unit */ + /* callback(illegal) */ + cnv->fromUChar32 = ch; + *err = U_ILLEGAL_CHAR_FOUND; + break; + } + } + else { + /* no more input */ + cnv->fromUChar32 = ch; + break; + } + } + + /* Do we write the buffer directly for speed, + or do we have to be careful about target buffer space? */ + tempPtr = (((targetLimit - myTarget) >= 4) ? myTarget : tempBuf); + + if (ch <= MAXIMUM_UCS2) { + indexToWrite = 2; + tempPtr[0] = (uint8_t) ((ch >> 12) | 0xe0); + } + else { + indexToWrite = 3; + tempPtr[0] = (uint8_t) ((ch >> 18) | 0xf0); + tempPtr[1] = (uint8_t) (((ch >> 12) & 0x3f) | 0x80); + } + tempPtr[indexToWrite-1] = (uint8_t) (((ch >> 6) & 0x3f) | 0x80); + tempPtr[indexToWrite] = (uint8_t) ((ch & 0x3f) | 0x80); + + if (tempPtr == myTarget) { + /* There was enough space to write the codepoint directly. */ + myTarget += (indexToWrite + 1); + myOffsets[0] = offsetNum; + myOffsets[1] = offsetNum; + myOffsets[2] = offsetNum; + if (indexToWrite >= 3) { + myOffsets[3] = offsetNum; + } + myOffsets += (indexToWrite + 1); + } + else { + /* We might run out of room soon. Write it slowly. */ + for (; tempPtr <= (tempBuf + indexToWrite); tempPtr++) { + if (myTarget < targetLimit) + { + *(myOffsets++) = offsetNum; + *(myTarget++) = *tempPtr; + } + else + { + cnv->charErrorBuffer[cnv->charErrorBufferLength++] = *tempPtr; + *err = U_BUFFER_OVERFLOW_ERROR; + } + } + } + offsetNum = nextSourceIndex; + } + } + + if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) + { + *err = U_BUFFER_OVERFLOW_ERROR; + } + + args->target = (char *) myTarget; + args->source = mySource; + args->offsets = myOffsets; +} + +U_CDECL_BEGIN +static UChar32 U_CALLCONV ucnv_getNextUChar_UTF8(UConverterToUnicodeArgs *args, + UErrorCode *err) { + UConverter *cnv; + const uint8_t *sourceInitial; + const uint8_t *source; + uint8_t myByte; + UChar32 ch; + int8_t i; + + /* UTF-8 only here, the framework handles CESU-8 to combine surrogate pairs */ + + cnv = args->converter; + sourceInitial = source = (const uint8_t *)args->source; + if (source >= (const uint8_t *)args->sourceLimit) + { + /* no input */ + *err = U_INDEX_OUTOFBOUNDS_ERROR; + return 0xffff; + } + + myByte = (uint8_t)*(source++); + if (U8_IS_SINGLE(myByte)) + { + args->source = (const char *)source; + return (UChar32)myByte; + } + + uint16_t countTrailBytes = U8_COUNT_TRAIL_BYTES(myByte); + if (countTrailBytes == 0) { + cnv->toUBytes[0] = myByte; + cnv->toULength = 1; + *err = U_ILLEGAL_CHAR_FOUND; + args->source = (const char *)source; + return 0xffff; + } + + /*The byte sequence is longer than the buffer area passed*/ + if (((const char *)source + countTrailBytes) > args->sourceLimit) + { + /* check if all of the remaining bytes are trail bytes */ + uint16_t extraBytesToWrite = countTrailBytes + 1; + cnv->toUBytes[0] = myByte; + i = 1; + *err = U_TRUNCATED_CHAR_FOUND; + while(source < (const uint8_t *)args->sourceLimit) { + uint8_t b = *source; + if(icu::UTF8::isValidTrail(myByte, b, i, extraBytesToWrite)) { + cnv->toUBytes[i++] = b; + ++source; + } else { + /* error even before we run out of input */ + *err = U_ILLEGAL_CHAR_FOUND; + break; + } + } + cnv->toULength = i; + args->source = (const char *)source; + return 0xffff; + } + + ch = myByte << 6; + if(countTrailBytes == 2) { + uint8_t t1 = *source, t2; + if(U8_IS_VALID_LEAD3_AND_T1(myByte, t1) && U8_IS_TRAIL(t2 = *++source)) { + args->source = (const char *)(source + 1); + return (((ch + t1) << 6) + t2) - offsetsFromUTF8[3]; + } + } else if(countTrailBytes == 1) { + uint8_t t1 = *source; + if(U8_IS_TRAIL(t1)) { + args->source = (const char *)(source + 1); + return (ch + t1) - offsetsFromUTF8[2]; + } + } else { // countTrailBytes == 3 + uint8_t t1 = *source, t2, t3; + if(U8_IS_VALID_LEAD4_AND_T1(myByte, t1) && U8_IS_TRAIL(t2 = *++source) && + U8_IS_TRAIL(t3 = *++source)) { + args->source = (const char *)(source + 1); + return (((((ch + t1) << 6) + t2) << 6) + t3) - offsetsFromUTF8[4]; + } + } + args->source = (const char *)source; + + for(i = 0; sourceInitial < source; ++i) { + cnv->toUBytes[i] = *sourceInitial++; + } + cnv->toULength = i; + *err = U_ILLEGAL_CHAR_FOUND; + return 0xffff; +} +U_CDECL_END + +/* UTF-8-from-UTF-8 conversion functions ------------------------------------ */ + +U_CDECL_BEGIN +/* "Convert" UTF-8 to UTF-8: Validate and copy. Modified from ucnv_DBCSFromUTF8(). */ +static void U_CALLCONV +ucnv_UTF8FromUTF8(UConverterFromUnicodeArgs *pFromUArgs, + UConverterToUnicodeArgs *pToUArgs, + UErrorCode *pErrorCode) { + UConverter *utf8; + const uint8_t *source, *sourceLimit; + uint8_t *target; + int32_t targetCapacity; + int32_t count; + + int8_t oldToULength, toULength, toULimit; + + UChar32 c; + uint8_t b, t1, t2; + + /* set up the local pointers */ + utf8=pToUArgs->converter; + source=(uint8_t *)pToUArgs->source; + sourceLimit=(uint8_t *)pToUArgs->sourceLimit; + target=(uint8_t *)pFromUArgs->target; + targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target); + + /* get the converter state from the UTF-8 UConverter */ + if(utf8->toULength > 0) { + toULength=oldToULength=utf8->toULength; + toULimit=(int8_t)utf8->mode; + c=(UChar32)utf8->toUnicodeStatus; + } else { + toULength=oldToULength=toULimit=0; + c = 0; + } + + count=(int32_t)(sourceLimit-source)+oldToULength; + if(count<toULimit) { + /* + * Not enough input to complete the partial character. + * Jump to moreBytes below - it will not output to target. + */ + } else if(targetCapacity<toULimit) { + /* + * Not enough target capacity to output the partial character. + * Let the standard converter handle this. + */ + *pErrorCode=U_USING_DEFAULT_WARNING; + return; + } else { + // Use a single counter for source and target, counting the minimum of + // the source length and the target capacity. + // Let the standard converter handle edge cases. + if(count>targetCapacity) { + count=targetCapacity; + } + + // The conversion loop checks count>0 only once per character. + // If the buffer ends with a truncated sequence, + // then we reduce the count to stop before that, + // and collect the remaining bytes after the conversion loop. + + // Do not go back into the bytes that will be read for finishing a partial + // sequence from the previous buffer. + int32_t length=count-toULength; + U8_TRUNCATE_IF_INCOMPLETE(source, 0, length); + count=toULength+length; + } + + if(c!=0) { + utf8->toUnicodeStatus=0; + utf8->toULength=0; + goto moreBytes; + /* See note in ucnv_SBCSFromUTF8() about this goto. */ + } + + /* conversion loop */ + while(count>0) { + b=*source++; + if(U8_IS_SINGLE(b)) { + /* convert ASCII */ + *target++=b; + --count; + continue; + } else { + if(b>=0xe0) { + if( /* handle U+0800..U+FFFF inline */ + b<0xf0 && + U8_IS_VALID_LEAD3_AND_T1(b, t1=source[0]) && + U8_IS_TRAIL(t2=source[1]) + ) { + source+=2; + *target++=b; + *target++=t1; + *target++=t2; + count-=3; + continue; + } + } else { + if( /* handle U+0080..U+07FF inline */ + b>=0xc2 && + U8_IS_TRAIL(t1=*source) + ) { + ++source; + *target++=b; + *target++=t1; + count-=2; + continue; + } + } + + /* handle "complicated" and error cases, and continuing partial characters */ + oldToULength=0; + toULength=1; + toULimit=U8_COUNT_BYTES_NON_ASCII(b); + c=b; +moreBytes: + while(toULength<toULimit) { + if(source<sourceLimit) { + b=*source; + if(icu::UTF8::isValidTrail(c, b, toULength, toULimit)) { + ++source; + ++toULength; + c=(c<<6)+b; + } else { + break; /* sequence too short, stop with toULength<toULimit */ + } + } else { + /* store the partial UTF-8 character, compatible with the regular UTF-8 converter */ + source-=(toULength-oldToULength); + while(oldToULength<toULength) { + utf8->toUBytes[oldToULength++]=*source++; + } + utf8->toUnicodeStatus=c; + utf8->toULength=toULength; + utf8->mode=toULimit; + pToUArgs->source=(char *)source; + pFromUArgs->target=(char *)target; + return; + } + } + + if(toULength!=toULimit) { + /* error handling: illegal UTF-8 byte sequence */ + source-=(toULength-oldToULength); + while(oldToULength<toULength) { + utf8->toUBytes[oldToULength++]=*source++; + } + utf8->toULength=toULength; + pToUArgs->source=(char *)source; + pFromUArgs->target=(char *)target; + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + return; + } + + /* copy the legal byte sequence to the target */ + { + int8_t i; + + for(i=0; i<oldToULength; ++i) { + *target++=utf8->toUBytes[i]; + } + source-=(toULength-oldToULength); + for(; i<toULength; ++i) { + *target++=*source++; + } + count-=toULength; + } + } + } + U_ASSERT(count>=0); + + if(U_SUCCESS(*pErrorCode) && source<sourceLimit) { + if(target==(const uint8_t *)pFromUArgs->targetLimit) { + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } else { + b=*source; + toULimit=U8_COUNT_BYTES(b); + if(toULimit>(sourceLimit-source)) { + /* collect a truncated byte sequence */ + toULength=0; + c=b; + for(;;) { + utf8->toUBytes[toULength++]=b; + if(++source==sourceLimit) { + /* partial byte sequence at end of source */ + utf8->toUnicodeStatus=c; + utf8->toULength=toULength; + utf8->mode=toULimit; + break; + } else if(!icu::UTF8::isValidTrail(c, b=*source, toULength, toULimit)) { + utf8->toULength=toULength; + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + break; + } + c=(c<<6)+b; + } + } else { + /* partial-sequence target overflow: fall back to the pivoting implementation */ + *pErrorCode=U_USING_DEFAULT_WARNING; + } + } + } + + /* write back the updated pointers */ + pToUArgs->source=(char *)source; + pFromUArgs->target=(char *)target; +} + +U_CDECL_END + +/* UTF-8 converter data ----------------------------------------------------- */ + +static const UConverterImpl _UTF8Impl={ + UCNV_UTF8, + + NULL, + NULL, + + NULL, + NULL, + NULL, + + ucnv_toUnicode_UTF8, + ucnv_toUnicode_UTF8_OFFSETS_LOGIC, + ucnv_fromUnicode_UTF8, + ucnv_fromUnicode_UTF8_OFFSETS_LOGIC, + ucnv_getNextUChar_UTF8, + + NULL, + NULL, + NULL, + NULL, + ucnv_getNonSurrogateUnicodeSet, + + ucnv_UTF8FromUTF8, + ucnv_UTF8FromUTF8 +}; + +/* The 1208 CCSID refers to any version of Unicode of UTF-8 */ +static const UConverterStaticData _UTF8StaticData={ + sizeof(UConverterStaticData), + "UTF-8", + 1208, UCNV_IBM, UCNV_UTF8, + 1, 3, /* max 3 bytes per UChar from UTF-8 (4 bytes from surrogate _pair_) */ + { 0xef, 0xbf, 0xbd, 0 },3,FALSE,FALSE, + 0, + 0, + { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ +}; + + +const UConverterSharedData _UTF8Data= + UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF8StaticData, &_UTF8Impl); + +/* CESU-8 converter data ---------------------------------------------------- */ + +static const UConverterImpl _CESU8Impl={ + UCNV_CESU8, + + NULL, + NULL, + + NULL, + NULL, + NULL, + + ucnv_toUnicode_UTF8, + ucnv_toUnicode_UTF8_OFFSETS_LOGIC, + ucnv_fromUnicode_UTF8, + ucnv_fromUnicode_UTF8_OFFSETS_LOGIC, + NULL, + + NULL, + NULL, + NULL, + NULL, + ucnv_getCompleteUnicodeSet, + + NULL, + NULL +}; + +static const UConverterStaticData _CESU8StaticData={ + sizeof(UConverterStaticData), + "CESU-8", + 9400, /* CCSID for CESU-8 */ + UCNV_UNKNOWN, UCNV_CESU8, 1, 3, + { 0xef, 0xbf, 0xbd, 0 },3,FALSE,FALSE, + 0, + 0, + { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ +}; + + +const UConverterSharedData _CESU8Data= + UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_CESU8StaticData, &_CESU8Impl); + +#endif diff --git a/thirdparty/icu4c/common/ucnvbocu.cpp b/thirdparty/icu4c/common/ucnvbocu.cpp new file mode 100644 index 0000000000..7c2aab5655 --- /dev/null +++ b/thirdparty/icu4c/common/ucnvbocu.cpp @@ -0,0 +1,1413 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 2002-2016, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* file name: ucnvbocu.cpp +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2002mar27 +* created by: Markus W. Scherer +* +* This is an implementation of the Binary Ordered Compression for Unicode, +* in its MIME-friendly form as defined in http://www.unicode.org/notes/tn6/ +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION + +#include "unicode/ucnv.h" +#include "unicode/ucnv_cb.h" +#include "unicode/utf16.h" +#include "putilimp.h" +#include "ucnv_bld.h" +#include "ucnv_cnv.h" +#include "uassert.h" + +/* BOCU-1 constants and macros ---------------------------------------------- */ + +/* + * BOCU-1 encodes the code points of a Unicode string as + * a sequence of byte-encoded differences (slope detection), + * preserving lexical order. + * + * Optimize the difference-taking for runs of Unicode text within + * small scripts: + * + * Most small scripts are allocated within aligned 128-blocks of Unicode + * code points. Lexical order is preserved if the "previous code point" state + * is always moved into the middle of such a block. + * + * Additionally, "prev" is moved from anywhere in the Unihan and Hangul + * areas into the middle of those areas. + * + * C0 control codes and space are encoded with their US-ASCII bytes. + * "prev" is reset for C0 controls but not for space. + */ + +/* initial value for "prev": middle of the ASCII range */ +#define BOCU1_ASCII_PREV 0x40 + +/* bounding byte values for differences */ +#define BOCU1_MIN 0x21 +#define BOCU1_MIDDLE 0x90 +#define BOCU1_MAX_LEAD 0xfe +#define BOCU1_MAX_TRAIL 0xff +#define BOCU1_RESET 0xff + +/* number of lead bytes */ +#define BOCU1_COUNT (BOCU1_MAX_LEAD-BOCU1_MIN+1) + +/* adjust trail byte counts for the use of some C0 control byte values */ +#define BOCU1_TRAIL_CONTROLS_COUNT 20 +#define BOCU1_TRAIL_BYTE_OFFSET (BOCU1_MIN-BOCU1_TRAIL_CONTROLS_COUNT) + +/* number of trail bytes */ +#define BOCU1_TRAIL_COUNT ((BOCU1_MAX_TRAIL-BOCU1_MIN+1)+BOCU1_TRAIL_CONTROLS_COUNT) + +/* + * number of positive and negative single-byte codes + * (counting 0==BOCU1_MIDDLE among the positive ones) + */ +#define BOCU1_SINGLE 64 + +/* number of lead bytes for positive and negative 2/3/4-byte sequences */ +#define BOCU1_LEAD_2 43 +#define BOCU1_LEAD_3 3 +#define BOCU1_LEAD_4 1 + +/* The difference value range for single-byters. */ +#define BOCU1_REACH_POS_1 (BOCU1_SINGLE-1) +#define BOCU1_REACH_NEG_1 (-BOCU1_SINGLE) + +/* The difference value range for double-byters. */ +#define BOCU1_REACH_POS_2 (BOCU1_REACH_POS_1+BOCU1_LEAD_2*BOCU1_TRAIL_COUNT) +#define BOCU1_REACH_NEG_2 (BOCU1_REACH_NEG_1-BOCU1_LEAD_2*BOCU1_TRAIL_COUNT) + +/* The difference value range for 3-byters. */ +#define BOCU1_REACH_POS_3 \ + (BOCU1_REACH_POS_2+BOCU1_LEAD_3*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT) + +#define BOCU1_REACH_NEG_3 (BOCU1_REACH_NEG_2-BOCU1_LEAD_3*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT) + +/* The lead byte start values. */ +#define BOCU1_START_POS_2 (BOCU1_MIDDLE+BOCU1_REACH_POS_1+1) +#define BOCU1_START_POS_3 (BOCU1_START_POS_2+BOCU1_LEAD_2) +#define BOCU1_START_POS_4 (BOCU1_START_POS_3+BOCU1_LEAD_3) + /* ==BOCU1_MAX_LEAD */ + +#define BOCU1_START_NEG_2 (BOCU1_MIDDLE+BOCU1_REACH_NEG_1) +#define BOCU1_START_NEG_3 (BOCU1_START_NEG_2-BOCU1_LEAD_2) +#define BOCU1_START_NEG_4 (BOCU1_START_NEG_3-BOCU1_LEAD_3) + /* ==BOCU1_MIN+1 */ + +/* The length of a byte sequence, according to the lead byte (!=BOCU1_RESET). */ +#define BOCU1_LENGTH_FROM_LEAD(lead) \ + ((BOCU1_START_NEG_2<=(lead) && (lead)<BOCU1_START_POS_2) ? 1 : \ + (BOCU1_START_NEG_3<=(lead) && (lead)<BOCU1_START_POS_3) ? 2 : \ + (BOCU1_START_NEG_4<=(lead) && (lead)<BOCU1_START_POS_4) ? 3 : 4) + +/* The length of a byte sequence, according to its packed form. */ +#define BOCU1_LENGTH_FROM_PACKED(packed) \ + ((uint32_t)(packed)<0x04000000 ? (packed)>>24 : 4) + +/* + * 12 commonly used C0 control codes (and space) are only used to encode + * themselves directly, + * which makes BOCU-1 MIME-usable and reasonably safe for + * ASCII-oriented software. + * + * These controls are + * 0 NUL + * + * 7 BEL + * 8 BS + * + * 9 TAB + * a LF + * b VT + * c FF + * d CR + * + * e SO + * f SI + * + * 1a SUB + * 1b ESC + * + * The other 20 C0 controls are also encoded directly (to preserve order) + * but are also used as trail bytes in difference encoding + * (for better compression). + */ +#define BOCU1_TRAIL_TO_BYTE(t) ((t)>=BOCU1_TRAIL_CONTROLS_COUNT ? (t)+BOCU1_TRAIL_BYTE_OFFSET : bocu1TrailToByte[t]) + +/* + * Byte value map for control codes, + * from external byte values 0x00..0x20 + * to trail byte values 0..19 (0..0x13) as used in the difference calculation. + * External byte values that are illegal as trail bytes are mapped to -1. + */ +static const int8_t +bocu1ByteToTrail[BOCU1_MIN]={ +/* 0 1 2 3 4 5 6 7 */ + -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, -1, + +/* 8 9 a b c d e f */ + -1, -1, -1, -1, -1, -1, -1, -1, + +/* 10 11 12 13 14 15 16 17 */ + 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, + +/* 18 19 1a 1b 1c 1d 1e 1f */ + 0x0e, 0x0f, -1, -1, 0x10, 0x11, 0x12, 0x13, + +/* 20 */ + -1 +}; + +/* + * Byte value map for control codes, + * from trail byte values 0..19 (0..0x13) as used in the difference calculation + * to external byte values 0x00..0x20. + */ +static const int8_t +bocu1TrailToByte[BOCU1_TRAIL_CONTROLS_COUNT]={ +/* 0 1 2 3 4 5 6 7 */ + 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x10, 0x11, + +/* 8 9 a b c d e f */ + 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, + +/* 10 11 12 13 */ + 0x1c, 0x1d, 0x1e, 0x1f +}; + +/** + * Integer division and modulo with negative numerators + * yields negative modulo results and quotients that are one more than + * what we need here. + * This macro adjust the results so that the modulo-value m is always >=0. + * + * For positive n, the if() condition is always FALSE. + * + * @param n Number to be split into quotient and rest. + * Will be modified to contain the quotient. + * @param d Divisor. + * @param m Output variable for the rest (modulo result). + */ +#define NEGDIVMOD(n, d, m) UPRV_BLOCK_MACRO_BEGIN { \ + (m)=(n)%(d); \ + (n)/=(d); \ + if((m)<0) { \ + --(n); \ + (m)+=(d); \ + } \ +} UPRV_BLOCK_MACRO_END + +/* Faster versions of packDiff() for single-byte-encoded diff values. */ + +/** Is a diff value encodable in a single byte? */ +#define DIFF_IS_SINGLE(diff) (BOCU1_REACH_NEG_1<=(diff) && (diff)<=BOCU1_REACH_POS_1) + +/** Encode a diff value in a single byte. */ +#define PACK_SINGLE_DIFF(diff) (BOCU1_MIDDLE+(diff)) + +/** Is a diff value encodable in two bytes? */ +#define DIFF_IS_DOUBLE(diff) (BOCU1_REACH_NEG_2<=(diff) && (diff)<=BOCU1_REACH_POS_2) + +/* BOCU-1 implementation functions ------------------------------------------ */ + +#define BOCU1_SIMPLE_PREV(c) (((c)&~0x7f)+BOCU1_ASCII_PREV) + +/** + * Compute the next "previous" value for differencing + * from the current code point. + * + * @param c current code point, 0x3040..0xd7a3 (rest handled by macro below) + * @return "previous code point" state value + */ +static inline int32_t +bocu1Prev(int32_t c) { + /* compute new prev */ + if(/* 0x3040<=c && */ c<=0x309f) { + /* Hiragana is not 128-aligned */ + return 0x3070; + } else if(0x4e00<=c && c<=0x9fa5) { + /* CJK Unihan */ + return 0x4e00-BOCU1_REACH_NEG_2; + } else if(0xac00<=c /* && c<=0xd7a3 */) { + /* Korean Hangul */ + return (0xd7a3+0xac00)/2; + } else { + /* mostly small scripts */ + return BOCU1_SIMPLE_PREV(c); + } +} + +/** Fast version of bocu1Prev() for most scripts. */ +#define BOCU1_PREV(c) ((c)<0x3040 || (c)>0xd7a3 ? BOCU1_SIMPLE_PREV(c) : bocu1Prev(c)) + +/* + * The BOCU-1 converter uses the standard setup code in ucnv.c/ucnv_bld.c. + * The UConverter fields are used as follows: + * + * fromUnicodeStatus encoder's prev (0 will be interpreted as BOCU1_ASCII_PREV) + * + * toUnicodeStatus decoder's prev (0 will be interpreted as BOCU1_ASCII_PREV) + * mode decoder's incomplete (diff<<2)|count (ignored when toULength==0) + */ + +/* BOCU-1-from-Unicode conversion functions --------------------------------- */ + +/** + * Encode a difference -0x10ffff..0x10ffff in 1..4 bytes + * and return a packed integer with them. + * + * The encoding favors small absolute differences with short encodings + * to compress runs of same-script characters. + * + * Optimized version with unrolled loops and fewer floating-point operations + * than the standard packDiff(). + * + * @param diff difference value -0x10ffff..0x10ffff + * @return + * 0x010000zz for 1-byte sequence zz + * 0x0200yyzz for 2-byte sequence yy zz + * 0x03xxyyzz for 3-byte sequence xx yy zz + * 0xwwxxyyzz for 4-byte sequence ww xx yy zz (ww>0x03) + */ +static int32_t +packDiff(int32_t diff) { + int32_t result, m; + + U_ASSERT(!DIFF_IS_SINGLE(diff)); /* assume we won't be called where diff==BOCU1_REACH_NEG_1=-64 */ + if(diff>=BOCU1_REACH_NEG_1) { + /* mostly positive differences, and single-byte negative ones */ +#if 0 /* single-byte case handled in macros, see below */ + if(diff<=BOCU1_REACH_POS_1) { + /* single byte */ + return 0x01000000|(BOCU1_MIDDLE+diff); + } else +#endif + if(diff<=BOCU1_REACH_POS_2) { + /* two bytes */ + diff-=BOCU1_REACH_POS_1+1; + result=0x02000000; + + m=diff%BOCU1_TRAIL_COUNT; + diff/=BOCU1_TRAIL_COUNT; + result|=BOCU1_TRAIL_TO_BYTE(m); + + result|=(BOCU1_START_POS_2+diff)<<8; + } else if(diff<=BOCU1_REACH_POS_3) { + /* three bytes */ + diff-=BOCU1_REACH_POS_2+1; + result=0x03000000; + + m=diff%BOCU1_TRAIL_COUNT; + diff/=BOCU1_TRAIL_COUNT; + result|=BOCU1_TRAIL_TO_BYTE(m); + + m=diff%BOCU1_TRAIL_COUNT; + diff/=BOCU1_TRAIL_COUNT; + result|=BOCU1_TRAIL_TO_BYTE(m)<<8; + + result|=(BOCU1_START_POS_3+diff)<<16; + } else { + /* four bytes */ + diff-=BOCU1_REACH_POS_3+1; + + m=diff%BOCU1_TRAIL_COUNT; + diff/=BOCU1_TRAIL_COUNT; + result=BOCU1_TRAIL_TO_BYTE(m); + + m=diff%BOCU1_TRAIL_COUNT; + diff/=BOCU1_TRAIL_COUNT; + result|=BOCU1_TRAIL_TO_BYTE(m)<<8; + + /* + * We know that / and % would deliver quotient 0 and rest=diff. + * Avoid division and modulo for performance. + */ + result|=BOCU1_TRAIL_TO_BYTE(diff)<<16; + + result|=((uint32_t)BOCU1_START_POS_4)<<24; + } + } else { + /* two- to four-byte negative differences */ + if(diff>=BOCU1_REACH_NEG_2) { + /* two bytes */ + diff-=BOCU1_REACH_NEG_1; + result=0x02000000; + + NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m); + result|=BOCU1_TRAIL_TO_BYTE(m); + + result|=(BOCU1_START_NEG_2+diff)<<8; + } else if(diff>=BOCU1_REACH_NEG_3) { + /* three bytes */ + diff-=BOCU1_REACH_NEG_2; + result=0x03000000; + + NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m); + result|=BOCU1_TRAIL_TO_BYTE(m); + + NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m); + result|=BOCU1_TRAIL_TO_BYTE(m)<<8; + + result|=(BOCU1_START_NEG_3+diff)<<16; + } else { + /* four bytes */ + diff-=BOCU1_REACH_NEG_3; + + NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m); + result=BOCU1_TRAIL_TO_BYTE(m); + + NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m); + result|=BOCU1_TRAIL_TO_BYTE(m)<<8; + + /* + * We know that NEGDIVMOD would deliver + * quotient -1 and rest=diff+BOCU1_TRAIL_COUNT. + * Avoid division and modulo for performance. + */ + m=diff+BOCU1_TRAIL_COUNT; + result|=BOCU1_TRAIL_TO_BYTE(m)<<16; + + result|=BOCU1_MIN<<24; + } + } + return result; +} + + +static void U_CALLCONV +_Bocu1FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, + UErrorCode *pErrorCode) { + UConverter *cnv; + const UChar *source, *sourceLimit; + uint8_t *target; + int32_t targetCapacity; + int32_t *offsets; + + int32_t prev, c, diff; + + int32_t sourceIndex, nextSourceIndex; + + /* set up the local pointers */ + cnv=pArgs->converter; + source=pArgs->source; + sourceLimit=pArgs->sourceLimit; + target=(uint8_t *)pArgs->target; + targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target); + offsets=pArgs->offsets; + + /* get the converter state from UConverter */ + c=cnv->fromUChar32; + prev=(int32_t)cnv->fromUnicodeStatus; + if(prev==0) { + prev=BOCU1_ASCII_PREV; + } + + /* sourceIndex=-1 if the current character began in the previous buffer */ + sourceIndex= c==0 ? 0 : -1; + nextSourceIndex=0; + + /* conversion loop */ + if(c!=0 && targetCapacity>0) { + goto getTrail; + } + +fastSingle: + /* fast loop for single-byte differences */ + /* use only one loop counter variable, targetCapacity, not also source */ + diff=(int32_t)(sourceLimit-source); + if(targetCapacity>diff) { + targetCapacity=diff; + } + while(targetCapacity>0 && (c=*source)<0x3000) { + if(c<=0x20) { + if(c!=0x20) { + prev=BOCU1_ASCII_PREV; + } + *target++=(uint8_t)c; + *offsets++=nextSourceIndex++; + ++source; + --targetCapacity; + } else { + diff=c-prev; + if(DIFF_IS_SINGLE(diff)) { + prev=BOCU1_SIMPLE_PREV(c); + *target++=(uint8_t)PACK_SINGLE_DIFF(diff); + *offsets++=nextSourceIndex++; + ++source; + --targetCapacity; + } else { + break; + } + } + } + /* restore real values */ + targetCapacity=(int32_t)((const uint8_t *)pArgs->targetLimit-target); + sourceIndex=nextSourceIndex; /* wrong if offsets==NULL but does not matter */ + + /* regular loop for all cases */ + while(source<sourceLimit) { + if(targetCapacity>0) { + c=*source++; + ++nextSourceIndex; + + if(c<=0x20) { + /* + * ISO C0 control & space: + * Encode directly for MIME compatibility, + * and reset state except for space, to not disrupt compression. + */ + if(c!=0x20) { + prev=BOCU1_ASCII_PREV; + } + *target++=(uint8_t)c; + *offsets++=sourceIndex; + --targetCapacity; + + sourceIndex=nextSourceIndex; + continue; + } + + if(U16_IS_LEAD(c)) { +getTrail: + if(source<sourceLimit) { + /* test the following code unit */ + UChar trail=*source; + if(U16_IS_TRAIL(trail)) { + ++source; + ++nextSourceIndex; + c=U16_GET_SUPPLEMENTARY(c, trail); + } + } else { + /* no more input */ + c=-c; /* negative lead surrogate as "incomplete" indicator to avoid c=0 everywhere else */ + break; + } + } + + /* + * all other Unicode code points c==U+0021..U+10ffff + * are encoded with the difference c-prev + * + * a new prev is computed from c, + * placed in the middle of a 0x80-block (for most small scripts) or + * in the middle of the Unihan and Hangul blocks + * to statistically minimize the following difference + */ + diff=c-prev; + prev=BOCU1_PREV(c); + if(DIFF_IS_SINGLE(diff)) { + *target++=(uint8_t)PACK_SINGLE_DIFF(diff); + *offsets++=sourceIndex; + --targetCapacity; + sourceIndex=nextSourceIndex; + if(c<0x3000) { + goto fastSingle; + } + } else if(DIFF_IS_DOUBLE(diff) && 2<=targetCapacity) { + /* optimize 2-byte case */ + int32_t m; + + if(diff>=0) { + diff-=BOCU1_REACH_POS_1+1; + m=diff%BOCU1_TRAIL_COUNT; + diff/=BOCU1_TRAIL_COUNT; + diff+=BOCU1_START_POS_2; + } else { + diff-=BOCU1_REACH_NEG_1; + NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m); + diff+=BOCU1_START_NEG_2; + } + *target++=(uint8_t)diff; + *target++=(uint8_t)BOCU1_TRAIL_TO_BYTE(m); + *offsets++=sourceIndex; + *offsets++=sourceIndex; + targetCapacity-=2; + sourceIndex=nextSourceIndex; + } else { + int32_t length; /* will be 2..4 */ + + diff=packDiff(diff); + length=BOCU1_LENGTH_FROM_PACKED(diff); + + /* write the output character bytes from diff and length */ + /* from the first if in the loop we know that targetCapacity>0 */ + if(length<=targetCapacity) { + switch(length) { + /* each branch falls through to the next one */ + case 4: + *target++=(uint8_t)(diff>>24); + *offsets++=sourceIndex; + U_FALLTHROUGH; + case 3: + *target++=(uint8_t)(diff>>16); + *offsets++=sourceIndex; + U_FALLTHROUGH; + case 2: + *target++=(uint8_t)(diff>>8); + *offsets++=sourceIndex; + /* case 1: handled above */ + *target++=(uint8_t)diff; + *offsets++=sourceIndex; + U_FALLTHROUGH; + default: + /* will never occur */ + break; + } + targetCapacity-=length; + sourceIndex=nextSourceIndex; + } else { + uint8_t *charErrorBuffer; + + /* + * We actually do this backwards here: + * In order to save an intermediate variable, we output + * first to the overflow buffer what does not fit into the + * regular target. + */ + /* we know that 1<=targetCapacity<length<=4 */ + length-=targetCapacity; + charErrorBuffer=(uint8_t *)cnv->charErrorBuffer; + switch(length) { + /* each branch falls through to the next one */ + case 3: + *charErrorBuffer++=(uint8_t)(diff>>16); + U_FALLTHROUGH; + case 2: + *charErrorBuffer++=(uint8_t)(diff>>8); + U_FALLTHROUGH; + case 1: + *charErrorBuffer=(uint8_t)diff; + U_FALLTHROUGH; + default: + /* will never occur */ + break; + } + cnv->charErrorBufferLength=(int8_t)length; + + /* now output what fits into the regular target */ + diff>>=8*length; /* length was reduced by targetCapacity */ + switch(targetCapacity) { + /* each branch falls through to the next one */ + case 3: + *target++=(uint8_t)(diff>>16); + *offsets++=sourceIndex; + U_FALLTHROUGH; + case 2: + *target++=(uint8_t)(diff>>8); + *offsets++=sourceIndex; + U_FALLTHROUGH; + case 1: + *target++=(uint8_t)diff; + *offsets++=sourceIndex; + U_FALLTHROUGH; + default: + /* will never occur */ + break; + } + + /* target overflow */ + targetCapacity=0; + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + break; + } + } + } else { + /* target is full */ + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + break; + } + } + + /* set the converter state back into UConverter */ + cnv->fromUChar32= c<0 ? -c : 0; + cnv->fromUnicodeStatus=(uint32_t)prev; + + /* write back the updated pointers */ + pArgs->source=source; + pArgs->target=(char *)target; + pArgs->offsets=offsets; +} + +/* + * Identical to _Bocu1FromUnicodeWithOffsets but without offset handling. + * If a change is made in the original function, then either + * change this function the same way or + * re-copy the original function and remove the variables + * offsets, sourceIndex, and nextSourceIndex. + */ +static void U_CALLCONV +_Bocu1FromUnicode(UConverterFromUnicodeArgs *pArgs, + UErrorCode *pErrorCode) { + UConverter *cnv; + const UChar *source, *sourceLimit; + uint8_t *target; + int32_t targetCapacity; + + int32_t prev, c, diff; + + /* set up the local pointers */ + cnv=pArgs->converter; + source=pArgs->source; + sourceLimit=pArgs->sourceLimit; + target=(uint8_t *)pArgs->target; + targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target); + + /* get the converter state from UConverter */ + c=cnv->fromUChar32; + prev=(int32_t)cnv->fromUnicodeStatus; + if(prev==0) { + prev=BOCU1_ASCII_PREV; + } + + /* conversion loop */ + if(c!=0 && targetCapacity>0) { + goto getTrail; + } + +fastSingle: + /* fast loop for single-byte differences */ + /* use only one loop counter variable, targetCapacity, not also source */ + diff=(int32_t)(sourceLimit-source); + if(targetCapacity>diff) { + targetCapacity=diff; + } + while(targetCapacity>0 && (c=*source)<0x3000) { + if(c<=0x20) { + if(c!=0x20) { + prev=BOCU1_ASCII_PREV; + } + *target++=(uint8_t)c; + } else { + diff=c-prev; + if(DIFF_IS_SINGLE(diff)) { + prev=BOCU1_SIMPLE_PREV(c); + *target++=(uint8_t)PACK_SINGLE_DIFF(diff); + } else { + break; + } + } + ++source; + --targetCapacity; + } + /* restore real values */ + targetCapacity=(int32_t)((const uint8_t *)pArgs->targetLimit-target); + + /* regular loop for all cases */ + while(source<sourceLimit) { + if(targetCapacity>0) { + c=*source++; + + if(c<=0x20) { + /* + * ISO C0 control & space: + * Encode directly for MIME compatibility, + * and reset state except for space, to not disrupt compression. + */ + if(c!=0x20) { + prev=BOCU1_ASCII_PREV; + } + *target++=(uint8_t)c; + --targetCapacity; + continue; + } + + if(U16_IS_LEAD(c)) { +getTrail: + if(source<sourceLimit) { + /* test the following code unit */ + UChar trail=*source; + if(U16_IS_TRAIL(trail)) { + ++source; + c=U16_GET_SUPPLEMENTARY(c, trail); + } + } else { + /* no more input */ + c=-c; /* negative lead surrogate as "incomplete" indicator to avoid c=0 everywhere else */ + break; + } + } + + /* + * all other Unicode code points c==U+0021..U+10ffff + * are encoded with the difference c-prev + * + * a new prev is computed from c, + * placed in the middle of a 0x80-block (for most small scripts) or + * in the middle of the Unihan and Hangul blocks + * to statistically minimize the following difference + */ + diff=c-prev; + prev=BOCU1_PREV(c); + if(DIFF_IS_SINGLE(diff)) { + *target++=(uint8_t)PACK_SINGLE_DIFF(diff); + --targetCapacity; + if(c<0x3000) { + goto fastSingle; + } + } else if(DIFF_IS_DOUBLE(diff) && 2<=targetCapacity) { + /* optimize 2-byte case */ + int32_t m; + + if(diff>=0) { + diff-=BOCU1_REACH_POS_1+1; + m=diff%BOCU1_TRAIL_COUNT; + diff/=BOCU1_TRAIL_COUNT; + diff+=BOCU1_START_POS_2; + } else { + diff-=BOCU1_REACH_NEG_1; + NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m); + diff+=BOCU1_START_NEG_2; + } + *target++=(uint8_t)diff; + *target++=(uint8_t)BOCU1_TRAIL_TO_BYTE(m); + targetCapacity-=2; + } else { + int32_t length; /* will be 2..4 */ + + diff=packDiff(diff); + length=BOCU1_LENGTH_FROM_PACKED(diff); + + /* write the output character bytes from diff and length */ + /* from the first if in the loop we know that targetCapacity>0 */ + if(length<=targetCapacity) { + switch(length) { + /* each branch falls through to the next one */ + case 4: + *target++=(uint8_t)(diff>>24); + U_FALLTHROUGH; + case 3: + *target++=(uint8_t)(diff>>16); + /* case 2: handled above */ + *target++=(uint8_t)(diff>>8); + /* case 1: handled above */ + *target++=(uint8_t)diff; + U_FALLTHROUGH; + default: + /* will never occur */ + break; + } + targetCapacity-=length; + } else { + uint8_t *charErrorBuffer; + + /* + * We actually do this backwards here: + * In order to save an intermediate variable, we output + * first to the overflow buffer what does not fit into the + * regular target. + */ + /* we know that 1<=targetCapacity<length<=4 */ + length-=targetCapacity; + charErrorBuffer=(uint8_t *)cnv->charErrorBuffer; + switch(length) { + /* each branch falls through to the next one */ + case 3: + *charErrorBuffer++=(uint8_t)(diff>>16); + U_FALLTHROUGH; + case 2: + *charErrorBuffer++=(uint8_t)(diff>>8); + U_FALLTHROUGH; + case 1: + *charErrorBuffer=(uint8_t)diff; + U_FALLTHROUGH; + default: + /* will never occur */ + break; + } + cnv->charErrorBufferLength=(int8_t)length; + + /* now output what fits into the regular target */ + diff>>=8*length; /* length was reduced by targetCapacity */ + switch(targetCapacity) { + /* each branch falls through to the next one */ + case 3: + *target++=(uint8_t)(diff>>16); + U_FALLTHROUGH; + case 2: + *target++=(uint8_t)(diff>>8); + U_FALLTHROUGH; + case 1: + *target++=(uint8_t)diff; + U_FALLTHROUGH; + default: + /* will never occur */ + break; + } + + /* target overflow */ + targetCapacity=0; + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + break; + } + } + } else { + /* target is full */ + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + break; + } + } + + /* set the converter state back into UConverter */ + cnv->fromUChar32= c<0 ? -c : 0; + cnv->fromUnicodeStatus=(uint32_t)prev; + + /* write back the updated pointers */ + pArgs->source=source; + pArgs->target=(char *)target; +} + +/* BOCU-1-to-Unicode conversion functions ----------------------------------- */ + +/** + * Function for BOCU-1 decoder; handles multi-byte lead bytes. + * + * @param b lead byte; + * BOCU1_MIN<=b<BOCU1_START_NEG_2 or BOCU1_START_POS_2<=b<BOCU1_MAX_LEAD + * @return (diff<<2)|count + */ +static inline int32_t +decodeBocu1LeadByte(int32_t b) { + int32_t diff, count; + + if(b>=BOCU1_START_NEG_2) { + /* positive difference */ + if(b<BOCU1_START_POS_3) { + /* two bytes */ + diff=((int32_t)b-BOCU1_START_POS_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_POS_1+1; + count=1; + } else if(b<BOCU1_START_POS_4) { + /* three bytes */ + diff=((int32_t)b-BOCU1_START_POS_3)*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT+BOCU1_REACH_POS_2+1; + count=2; + } else { + /* four bytes */ + diff=BOCU1_REACH_POS_3+1; + count=3; + } + } else { + /* negative difference */ + if(b>=BOCU1_START_NEG_3) { + /* two bytes */ + diff=((int32_t)b-BOCU1_START_NEG_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_1; + count=1; + } else if(b>BOCU1_MIN) { + /* three bytes */ + diff=((int32_t)b-BOCU1_START_NEG_3)*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_2; + count=2; + } else { + /* four bytes */ + diff=-BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_3; + count=3; + } + } + + /* return the state for decoding the trail byte(s) */ + return (diff<<2)|count; +} + +/** + * Function for BOCU-1 decoder; handles multi-byte trail bytes. + * + * @param count number of remaining trail bytes including this one + * @param b trail byte + * @return new delta for diff including b - <0 indicates an error + * + * @see decodeBocu1 + */ +static inline int32_t +decodeBocu1TrailByte(int32_t count, int32_t b) { + if(b<=0x20) { + /* skip some C0 controls and make the trail byte range contiguous */ + b=bocu1ByteToTrail[b]; + /* b<0 for an illegal trail byte value will result in return<0 below */ +#if BOCU1_MAX_TRAIL<0xff + } else if(b>BOCU1_MAX_TRAIL) { + return -99; +#endif + } else { + b-=BOCU1_TRAIL_BYTE_OFFSET; + } + + /* add trail byte into difference and decrement count */ + if(count==1) { + return b; + } else if(count==2) { + return b*BOCU1_TRAIL_COUNT; + } else /* count==3 */ { + return b*(BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT); + } +} + +static void U_CALLCONV +_Bocu1ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, + UErrorCode *pErrorCode) { + UConverter *cnv; + const uint8_t *source, *sourceLimit; + UChar *target; + const UChar *targetLimit; + int32_t *offsets; + + int32_t prev, count, diff, c; + + int8_t byteIndex; + uint8_t *bytes; + + int32_t sourceIndex, nextSourceIndex; + + /* set up the local pointers */ + cnv=pArgs->converter; + source=(const uint8_t *)pArgs->source; + sourceLimit=(const uint8_t *)pArgs->sourceLimit; + target=pArgs->target; + targetLimit=pArgs->targetLimit; + offsets=pArgs->offsets; + + /* get the converter state from UConverter */ + prev=(int32_t)cnv->toUnicodeStatus; + if(prev==0) { + prev=BOCU1_ASCII_PREV; + } + diff=cnv->mode; /* mode may be set to UCNV_SI by ucnv_bld.c but then toULength==0 */ + count=diff&3; + diff>>=2; + + byteIndex=cnv->toULength; + bytes=cnv->toUBytes; + + /* sourceIndex=-1 if the current character began in the previous buffer */ + sourceIndex=byteIndex==0 ? 0 : -1; + nextSourceIndex=0; + + /* conversion "loop" similar to _SCSUToUnicodeWithOffsets() */ + if(count>0 && byteIndex>0 && target<targetLimit) { + goto getTrail; + } + +fastSingle: + /* fast loop for single-byte differences */ + /* use count as the only loop counter variable */ + diff=(int32_t)(sourceLimit-source); + count=(int32_t)(pArgs->targetLimit-target); + if(count>diff) { + count=diff; + } + while(count>0) { + if(BOCU1_START_NEG_2<=(c=*source) && c<BOCU1_START_POS_2) { + c=prev+(c-BOCU1_MIDDLE); + if(c<0x3000) { + *target++=(UChar)c; + *offsets++=nextSourceIndex++; + prev=BOCU1_SIMPLE_PREV(c); + } else { + break; + } + } else if(c<=0x20) { + if(c!=0x20) { + prev=BOCU1_ASCII_PREV; + } + *target++=(UChar)c; + *offsets++=nextSourceIndex++; + } else { + break; + } + ++source; + --count; + } + sourceIndex=nextSourceIndex; /* wrong if offsets==NULL but does not matter */ + + /* decode a sequence of single and lead bytes */ + while(source<sourceLimit) { + if(target>=targetLimit) { + /* target is full */ + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + break; + } + + ++nextSourceIndex; + c=*source++; + if(BOCU1_START_NEG_2<=c && c<BOCU1_START_POS_2) { + /* Write a code point directly from a single-byte difference. */ + c=prev+(c-BOCU1_MIDDLE); + if(c<0x3000) { + *target++=(UChar)c; + *offsets++=sourceIndex; + prev=BOCU1_SIMPLE_PREV(c); + sourceIndex=nextSourceIndex; + goto fastSingle; + } + } else if(c<=0x20) { + /* + * Direct-encoded C0 control code or space. + * Reset prev for C0 control codes but not for space. + */ + if(c!=0x20) { + prev=BOCU1_ASCII_PREV; + } + *target++=(UChar)c; + *offsets++=sourceIndex; + sourceIndex=nextSourceIndex; + continue; + } else if(BOCU1_START_NEG_3<=c && c<BOCU1_START_POS_3 && source<sourceLimit) { + /* Optimize two-byte case. */ + if(c>=BOCU1_MIDDLE) { + diff=((int32_t)c-BOCU1_START_POS_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_POS_1+1; + } else { + diff=((int32_t)c-BOCU1_START_NEG_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_1; + } + + /* trail byte */ + ++nextSourceIndex; + c=decodeBocu1TrailByte(1, *source++); + if(c<0 || (uint32_t)(c=prev+diff+c)>0x10ffff) { + bytes[0]=source[-2]; + bytes[1]=source[-1]; + byteIndex=2; + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + break; + } + } else if(c==BOCU1_RESET) { + /* only reset the state, no code point */ + prev=BOCU1_ASCII_PREV; + sourceIndex=nextSourceIndex; + continue; + } else { + /* + * For multi-byte difference lead bytes, set the decoder state + * with the partial difference value from the lead byte and + * with the number of trail bytes. + */ + bytes[0]=(uint8_t)c; + byteIndex=1; + + diff=decodeBocu1LeadByte(c); + count=diff&3; + diff>>=2; +getTrail: + for(;;) { + if(source>=sourceLimit) { + goto endloop; + } + ++nextSourceIndex; + c=bytes[byteIndex++]=*source++; + + /* trail byte in any position */ + c=decodeBocu1TrailByte(count, c); + if(c<0) { + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + goto endloop; + } + + diff+=c; + if(--count==0) { + /* final trail byte, deliver a code point */ + byteIndex=0; + c=prev+diff; + if((uint32_t)c>0x10ffff) { + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + goto endloop; + } + break; + } + } + } + + /* calculate the next prev and output c */ + prev=BOCU1_PREV(c); + if(c<=0xffff) { + *target++=(UChar)c; + *offsets++=sourceIndex; + } else { + /* output surrogate pair */ + *target++=U16_LEAD(c); + if(target<targetLimit) { + *target++=U16_TRAIL(c); + *offsets++=sourceIndex; + *offsets++=sourceIndex; + } else { + /* target overflow */ + *offsets++=sourceIndex; + cnv->UCharErrorBuffer[0]=U16_TRAIL(c); + cnv->UCharErrorBufferLength=1; + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + break; + } + } + sourceIndex=nextSourceIndex; + } +endloop: + + if(*pErrorCode==U_ILLEGAL_CHAR_FOUND) { + /* set the converter state in UConverter to deal with the next character */ + cnv->toUnicodeStatus=BOCU1_ASCII_PREV; + cnv->mode=0; + } else { + /* set the converter state back into UConverter */ + cnv->toUnicodeStatus=(uint32_t)prev; + cnv->mode=(diff<<2)|count; + } + cnv->toULength=byteIndex; + + /* write back the updated pointers */ + pArgs->source=(const char *)source; + pArgs->target=target; + pArgs->offsets=offsets; + return; +} + +/* + * Identical to _Bocu1ToUnicodeWithOffsets but without offset handling. + * If a change is made in the original function, then either + * change this function the same way or + * re-copy the original function and remove the variables + * offsets, sourceIndex, and nextSourceIndex. + */ +static void U_CALLCONV +_Bocu1ToUnicode(UConverterToUnicodeArgs *pArgs, + UErrorCode *pErrorCode) { + UConverter *cnv; + const uint8_t *source, *sourceLimit; + UChar *target; + const UChar *targetLimit; + + int32_t prev, count, diff, c; + + int8_t byteIndex; + uint8_t *bytes; + + /* set up the local pointers */ + cnv=pArgs->converter; + source=(const uint8_t *)pArgs->source; + sourceLimit=(const uint8_t *)pArgs->sourceLimit; + target=pArgs->target; + targetLimit=pArgs->targetLimit; + + /* get the converter state from UConverter */ + prev=(int32_t)cnv->toUnicodeStatus; + if(prev==0) { + prev=BOCU1_ASCII_PREV; + } + diff=cnv->mode; /* mode may be set to UCNV_SI by ucnv_bld.c but then toULength==0 */ + count=diff&3; + diff>>=2; + + byteIndex=cnv->toULength; + bytes=cnv->toUBytes; + + /* conversion "loop" similar to _SCSUToUnicodeWithOffsets() */ + if(count>0 && byteIndex>0 && target<targetLimit) { + goto getTrail; + } + +fastSingle: + /* fast loop for single-byte differences */ + /* use count as the only loop counter variable */ + diff=(int32_t)(sourceLimit-source); + count=(int32_t)(pArgs->targetLimit-target); + if(count>diff) { + count=diff; + } + while(count>0) { + if(BOCU1_START_NEG_2<=(c=*source) && c<BOCU1_START_POS_2) { + c=prev+(c-BOCU1_MIDDLE); + if(c<0x3000) { + *target++=(UChar)c; + prev=BOCU1_SIMPLE_PREV(c); + } else { + break; + } + } else if(c<=0x20) { + if(c!=0x20) { + prev=BOCU1_ASCII_PREV; + } + *target++=(UChar)c; + } else { + break; + } + ++source; + --count; + } + + /* decode a sequence of single and lead bytes */ + while(source<sourceLimit) { + if(target>=targetLimit) { + /* target is full */ + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + break; + } + + c=*source++; + if(BOCU1_START_NEG_2<=c && c<BOCU1_START_POS_2) { + /* Write a code point directly from a single-byte difference. */ + c=prev+(c-BOCU1_MIDDLE); + if(c<0x3000) { + *target++=(UChar)c; + prev=BOCU1_SIMPLE_PREV(c); + goto fastSingle; + } + } else if(c<=0x20) { + /* + * Direct-encoded C0 control code or space. + * Reset prev for C0 control codes but not for space. + */ + if(c!=0x20) { + prev=BOCU1_ASCII_PREV; + } + *target++=(UChar)c; + continue; + } else if(BOCU1_START_NEG_3<=c && c<BOCU1_START_POS_3 && source<sourceLimit) { + /* Optimize two-byte case. */ + if(c>=BOCU1_MIDDLE) { + diff=((int32_t)c-BOCU1_START_POS_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_POS_1+1; + } else { + diff=((int32_t)c-BOCU1_START_NEG_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_1; + } + + /* trail byte */ + c=decodeBocu1TrailByte(1, *source++); + if(c<0 || (uint32_t)(c=prev+diff+c)>0x10ffff) { + bytes[0]=source[-2]; + bytes[1]=source[-1]; + byteIndex=2; + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + break; + } + } else if(c==BOCU1_RESET) { + /* only reset the state, no code point */ + prev=BOCU1_ASCII_PREV; + continue; + } else { + /* + * For multi-byte difference lead bytes, set the decoder state + * with the partial difference value from the lead byte and + * with the number of trail bytes. + */ + bytes[0]=(uint8_t)c; + byteIndex=1; + + diff=decodeBocu1LeadByte(c); + count=diff&3; + diff>>=2; +getTrail: + for(;;) { + if(source>=sourceLimit) { + goto endloop; + } + c=bytes[byteIndex++]=*source++; + + /* trail byte in any position */ + c=decodeBocu1TrailByte(count, c); + if(c<0) { + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + goto endloop; + } + + diff+=c; + if(--count==0) { + /* final trail byte, deliver a code point */ + byteIndex=0; + c=prev+diff; + if((uint32_t)c>0x10ffff) { + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + goto endloop; + } + break; + } + } + } + + /* calculate the next prev and output c */ + prev=BOCU1_PREV(c); + if(c<=0xffff) { + *target++=(UChar)c; + } else { + /* output surrogate pair */ + *target++=U16_LEAD(c); + if(target<targetLimit) { + *target++=U16_TRAIL(c); + } else { + /* target overflow */ + cnv->UCharErrorBuffer[0]=U16_TRAIL(c); + cnv->UCharErrorBufferLength=1; + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + break; + } + } + } +endloop: + + if(*pErrorCode==U_ILLEGAL_CHAR_FOUND) { + /* set the converter state in UConverter to deal with the next character */ + cnv->toUnicodeStatus=BOCU1_ASCII_PREV; + cnv->mode=0; + } else { + /* set the converter state back into UConverter */ + cnv->toUnicodeStatus=(uint32_t)prev; + cnv->mode=(diff<<2)|count; + } + cnv->toULength=byteIndex; + + /* write back the updated pointers */ + pArgs->source=(const char *)source; + pArgs->target=target; + return; +} + +/* miscellaneous ------------------------------------------------------------ */ + +static const UConverterImpl _Bocu1Impl={ + UCNV_BOCU1, + + NULL, + NULL, + + NULL, + NULL, + NULL, + + _Bocu1ToUnicode, + _Bocu1ToUnicodeWithOffsets, + _Bocu1FromUnicode, + _Bocu1FromUnicodeWithOffsets, + NULL, + + NULL, + NULL, + NULL, + NULL, + ucnv_getCompleteUnicodeSet, + + NULL, + NULL +}; + +static const UConverterStaticData _Bocu1StaticData={ + sizeof(UConverterStaticData), + "BOCU-1", + 1214, /* CCSID for BOCU-1 */ + UCNV_IBM, UCNV_BOCU1, + 1, 4, /* one UChar generates at least 1 byte and at most 4 bytes */ + { 0x1a, 0, 0, 0 }, 1, /* BOCU-1 never needs to write a subchar */ + FALSE, FALSE, + 0, + 0, + { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ +}; + +const UConverterSharedData _Bocu1Data= + UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_Bocu1StaticData, &_Bocu1Impl); + +#endif diff --git a/thirdparty/icu4c/common/ucnvdisp.cpp b/thirdparty/icu4c/common/ucnvdisp.cpp new file mode 100644 index 0000000000..ac86b98597 --- /dev/null +++ b/thirdparty/icu4c/common/ucnvdisp.cpp @@ -0,0 +1,88 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1998-2004, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* +* ucnvdisp.c: +* Implements APIs for the ICU's codeset conversion library display names. +* +* Modification History: +* +* Date Name Description +* 04/04/99 helena Fixed internal header inclusion. +* 05/09/00 helena Added implementation to handle fallback mappings. +* 06/20/2000 helena OS/400 port changes; mostly typecast. +* 09/08/2004 grhoten split from ucnv.c +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_CONVERSION + +#include "unicode/ustring.h" +#include "unicode/ures.h" +#include "unicode/ucnv.h" +#include "cstring.h" +#include "ustr_imp.h" +#include "ucnv_imp.h" +#include "putilimp.h" + +U_CAPI int32_t U_EXPORT2 +ucnv_getDisplayName(const UConverter *cnv, + const char *displayLocale, + UChar *displayName, int32_t displayNameCapacity, + UErrorCode *pErrorCode) { + UResourceBundle *rb; + const UChar *name; + int32_t length; + UErrorCode localStatus = U_ZERO_ERROR; + + /* check arguments */ + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return 0; + } + + if(cnv==NULL || displayNameCapacity<0 || (displayNameCapacity>0 && displayName==NULL)) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + /* open the resource bundle and get the display name string */ + rb=ures_open(NULL, displayLocale, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + return 0; + } + + /* use the internal name as the key */ + name=ures_getStringByKey(rb, cnv->sharedData->staticData->name, &length, &localStatus); + ures_close(rb); + + if(U_SUCCESS(localStatus)) { + /* copy the string */ + if (*pErrorCode == U_ZERO_ERROR) { + *pErrorCode = localStatus; + } + u_memcpy(displayName, name, uprv_min(length, displayNameCapacity)*U_SIZEOF_UCHAR); + } else { + /* convert the internal name into a Unicode string */ + length=(int32_t)uprv_strlen(cnv->sharedData->staticData->name); + u_charsToUChars(cnv->sharedData->staticData->name, displayName, uprv_min(length, displayNameCapacity)); + } + return u_terminateUChars(displayName, displayNameCapacity, length, pErrorCode); +} + +#endif + +/* + * Hey, Emacs, please set the following: + * + * Local Variables: + * indent-tabs-mode: nil + * End: + * + */ diff --git a/thirdparty/icu4c/common/ucnvhz.cpp b/thirdparty/icu4c/common/ucnvhz.cpp new file mode 100644 index 0000000000..6b2f5faaf0 --- /dev/null +++ b/thirdparty/icu4c/common/ucnvhz.cpp @@ -0,0 +1,625 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 2000-2015, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* file name: ucnvhz.c +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2000oct16 +* created by: Ram Viswanadha +* 10/31/2000 Ram Implemented offsets logic function +* +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION + +#include "cmemory.h" +#include "unicode/ucnv.h" +#include "unicode/ucnv_cb.h" +#include "unicode/uset.h" +#include "unicode/utf16.h" +#include "ucnv_bld.h" +#include "ucnv_cnv.h" +#include "ucnv_imp.h" + +#define UCNV_TILDE 0x7E /* ~ */ +#define UCNV_OPEN_BRACE 0x7B /* { */ +#define UCNV_CLOSE_BRACE 0x7D /* } */ +#define SB_ESCAPE "\x7E\x7D" +#define DB_ESCAPE "\x7E\x7B" +#define TILDE_ESCAPE "\x7E\x7E" +#define ESC_LEN 2 + + +#define CONCAT_ESCAPE_MACRO(args, targetIndex,targetLength,strToAppend, err, len,sourceIndex) UPRV_BLOCK_MACRO_BEGIN { \ + while(len-->0){ \ + if(targetIndex < targetLength){ \ + args->target[targetIndex] = (unsigned char) *strToAppend; \ + if(args->offsets!=NULL){ \ + *(offsets++) = sourceIndex-1; \ + } \ + targetIndex++; \ + } \ + else{ \ + args->converter->charErrorBuffer[(int)args->converter->charErrorBufferLength++] = (unsigned char) *strToAppend; \ + *err =U_BUFFER_OVERFLOW_ERROR; \ + } \ + strToAppend++; \ + } \ +} UPRV_BLOCK_MACRO_END + + +typedef struct{ + UConverter* gbConverter; + int32_t targetIndex; + int32_t sourceIndex; + UBool isEscapeAppended; + UBool isStateDBCS; + UBool isTargetUCharDBCS; + UBool isEmptySegment; +}UConverterDataHZ; + + +U_CDECL_BEGIN +static void U_CALLCONV +_HZOpen(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){ + UConverter *gbConverter; + if(pArgs->onlyTestIsLoadable) { + ucnv_canCreateConverter("GBK", errorCode); /* errorCode carries result */ + return; + } + gbConverter = ucnv_open("GBK", errorCode); + if(U_FAILURE(*errorCode)) { + return; + } + cnv->toUnicodeStatus = 0; + cnv->fromUnicodeStatus= 0; + cnv->mode=0; + cnv->fromUChar32=0x0000; + cnv->extraInfo = uprv_calloc(1, sizeof(UConverterDataHZ)); + if(cnv->extraInfo != NULL){ + ((UConverterDataHZ*)cnv->extraInfo)->gbConverter = gbConverter; + } + else { + ucnv_close(gbConverter); + *errorCode = U_MEMORY_ALLOCATION_ERROR; + return; + } +} + +static void U_CALLCONV +_HZClose(UConverter *cnv){ + if(cnv->extraInfo != NULL) { + ucnv_close (((UConverterDataHZ *) (cnv->extraInfo))->gbConverter); + if(!cnv->isExtraLocal) { + uprv_free(cnv->extraInfo); + } + cnv->extraInfo = NULL; + } +} + +static void U_CALLCONV +_HZReset(UConverter *cnv, UConverterResetChoice choice){ + if(choice<=UCNV_RESET_TO_UNICODE) { + cnv->toUnicodeStatus = 0; + cnv->mode=0; + if(cnv->extraInfo != NULL){ + ((UConverterDataHZ*)cnv->extraInfo)->isStateDBCS = FALSE; + ((UConverterDataHZ*)cnv->extraInfo)->isEmptySegment = FALSE; + } + } + if(choice!=UCNV_RESET_TO_UNICODE) { + cnv->fromUnicodeStatus= 0; + cnv->fromUChar32=0x0000; + if(cnv->extraInfo != NULL){ + ((UConverterDataHZ*)cnv->extraInfo)->isEscapeAppended = FALSE; + ((UConverterDataHZ*)cnv->extraInfo)->targetIndex = 0; + ((UConverterDataHZ*)cnv->extraInfo)->sourceIndex = 0; + ((UConverterDataHZ*)cnv->extraInfo)->isTargetUCharDBCS = FALSE; + } + } +} + +/**************************************HZ Encoding************************************************* +* Rules for HZ encoding +* +* In ASCII mode, a byte is interpreted as an ASCII character, unless a +* '~' is encountered. The character '~' is an escape character. By +* convention, it must be immediately followed ONLY by '~', '{' or '\n' +* (<LF>), with the following special meaning. + +* 1. The escape sequence '~~' is interpreted as a '~'. +* 2. The escape-to-GB sequence '~{' switches the mode from ASCII to GB. +* 3. The escape sequence '~\n' is a line-continuation marker to be +* consumed with no output produced. +* In GB mode, characters are interpreted two bytes at a time as (pure) +* GB codes until the escape-from-GB code '~}' is read. This code +* switches the mode from GB back to ASCII. (Note that the escape- +* from-GB code '~}' ($7E7D) is outside the defined GB range.) +* +* Source: RFC 1842 +* +* Note that the formal syntax in RFC 1842 is invalid. I assume that the +* intended definition of single-byte-segment is as follows (pedberg): +* single-byte-segment = single-byte-seq 1*single-byte-char +*/ + + +static void U_CALLCONV +UConverter_toUnicode_HZ_OFFSETS_LOGIC(UConverterToUnicodeArgs *args, + UErrorCode* err){ + char tempBuf[2]; + const char *mySource = ( char *) args->source; + UChar *myTarget = args->target; + const char *mySourceLimit = args->sourceLimit; + UChar32 targetUniChar = 0x0000; + int32_t mySourceChar = 0x0000; + UConverterDataHZ* myData=(UConverterDataHZ*)(args->converter->extraInfo); + tempBuf[0]=0; + tempBuf[1]=0; + + /* Calling code already handles this situation. */ + /*if ((args->converter == NULL) || (args->targetLimit < args->target) || (mySourceLimit < args->source)){ + *err = U_ILLEGAL_ARGUMENT_ERROR; + return; + }*/ + + while(mySource< mySourceLimit){ + + if(myTarget < args->targetLimit){ + + mySourceChar= (unsigned char) *mySource++; + + if(args->converter->mode == UCNV_TILDE) { + /* second byte after ~ */ + args->converter->mode=0; + switch(mySourceChar) { + case 0x0A: + /* no output for ~\n (line-continuation marker) */ + continue; + case UCNV_TILDE: + if(args->offsets) { + args->offsets[myTarget - args->target]=(int32_t)(mySource - args->source - 2); + } + *(myTarget++)=(UChar)mySourceChar; + myData->isEmptySegment = FALSE; + continue; + case UCNV_OPEN_BRACE: + case UCNV_CLOSE_BRACE: + myData->isStateDBCS = (mySourceChar == UCNV_OPEN_BRACE); + if (myData->isEmptySegment) { + myData->isEmptySegment = FALSE; /* we are handling it, reset to avoid future spurious errors */ + *err = U_ILLEGAL_ESCAPE_SEQUENCE; + args->converter->toUCallbackReason = UCNV_IRREGULAR; + args->converter->toUBytes[0] = UCNV_TILDE; + args->converter->toUBytes[1] = static_cast<uint8_t>(mySourceChar); + args->converter->toULength = 2; + args->target = myTarget; + args->source = mySource; + return; + } + myData->isEmptySegment = TRUE; + continue; + default: + /* if the first byte is equal to TILDE and the trail byte + * is not a valid byte then it is an error condition + */ + /* + * Ticket 5691: consistent illegal sequences: + * - We include at least the first byte in the illegal sequence. + * - If any of the non-initial bytes could be the start of a character, + * we stop the illegal sequence before the first one of those. + */ + myData->isEmptySegment = FALSE; /* different error here, reset this to avoid spurious future error */ + *err = U_ILLEGAL_ESCAPE_SEQUENCE; + args->converter->toUBytes[0] = UCNV_TILDE; + if( myData->isStateDBCS ? + (0x21 <= mySourceChar && mySourceChar <= 0x7e) : + mySourceChar <= 0x7f + ) { + /* The current byte could be the start of a character: Back it out. */ + args->converter->toULength = 1; + --mySource; + } else { + /* Include the current byte in the illegal sequence. */ + args->converter->toUBytes[1] = static_cast<uint8_t>(mySourceChar); + args->converter->toULength = 2; + } + args->target = myTarget; + args->source = mySource; + return; + } + } else if(myData->isStateDBCS) { + if(args->converter->toUnicodeStatus == 0x00){ + /* lead byte */ + if(mySourceChar == UCNV_TILDE) { + args->converter->mode = UCNV_TILDE; + } else { + /* add another bit to distinguish a 0 byte from not having seen a lead byte */ + args->converter->toUnicodeStatus = (uint32_t) (mySourceChar | 0x100); + myData->isEmptySegment = FALSE; /* the segment has something, either valid or will produce a different error, so reset this */ + } + continue; + } + else{ + /* trail byte */ + int leadIsOk, trailIsOk; + uint32_t leadByte = args->converter->toUnicodeStatus & 0xff; + targetUniChar = 0xffff; + /* + * Ticket 5691: consistent illegal sequences: + * - We include at least the first byte in the illegal sequence. + * - If any of the non-initial bytes could be the start of a character, + * we stop the illegal sequence before the first one of those. + * + * In HZ DBCS, if the second byte is in the 21..7e range, + * we report only the first byte as the illegal sequence. + * Otherwise we convert or report the pair of bytes. + */ + leadIsOk = (uint8_t)(leadByte - 0x21) <= (0x7d - 0x21); + trailIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21); + if (leadIsOk && trailIsOk) { + tempBuf[0] = (char) (leadByte+0x80) ; + tempBuf[1] = (char) (mySourceChar+0x80); + targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->gbConverter->sharedData, + tempBuf, 2, args->converter->useFallback); + mySourceChar= (leadByte << 8) | mySourceChar; + } else if (trailIsOk) { + /* report a single illegal byte and continue with the following DBCS starter byte */ + --mySource; + mySourceChar = (int32_t)leadByte; + } else { + /* report a pair of illegal bytes if the second byte is not a DBCS starter */ + /* add another bit so that the code below writes 2 bytes in case of error */ + mySourceChar= 0x10000 | (leadByte << 8) | mySourceChar; + } + args->converter->toUnicodeStatus =0x00; + } + } + else{ + if(mySourceChar == UCNV_TILDE) { + args->converter->mode = UCNV_TILDE; + continue; + } else if(mySourceChar <= 0x7f) { + targetUniChar = (UChar)mySourceChar; /* ASCII */ + myData->isEmptySegment = FALSE; /* the segment has something valid */ + } else { + targetUniChar = 0xffff; + myData->isEmptySegment = FALSE; /* different error here, reset this to avoid spurious future error */ + } + } + if(targetUniChar < 0xfffe){ + if(args->offsets) { + args->offsets[myTarget - args->target]=(int32_t)(mySource - args->source - 1-(myData->isStateDBCS)); + } + + *(myTarget++)=(UChar)targetUniChar; + } + else /* targetUniChar>=0xfffe */ { + if(targetUniChar == 0xfffe){ + *err = U_INVALID_CHAR_FOUND; + } + else{ + *err = U_ILLEGAL_CHAR_FOUND; + } + if(mySourceChar > 0xff){ + args->converter->toUBytes[0] = (uint8_t)(mySourceChar >> 8); + args->converter->toUBytes[1] = (uint8_t)mySourceChar; + args->converter->toULength=2; + } + else{ + args->converter->toUBytes[0] = (uint8_t)mySourceChar; + args->converter->toULength=1; + } + break; + } + } + else{ + *err =U_BUFFER_OVERFLOW_ERROR; + break; + } + } + + args->target = myTarget; + args->source = mySource; +} + + +static void U_CALLCONV +UConverter_fromUnicode_HZ_OFFSETS_LOGIC (UConverterFromUnicodeArgs * args, + UErrorCode * err){ + const UChar *mySource = args->source; + char *myTarget = args->target; + int32_t* offsets = args->offsets; + int32_t mySourceIndex = 0; + int32_t myTargetIndex = 0; + int32_t targetLength = (int32_t)(args->targetLimit - myTarget); + int32_t mySourceLength = (int32_t)(args->sourceLimit - args->source); + uint32_t targetUniChar = 0x0000; + UChar32 mySourceChar = 0x0000; + UConverterDataHZ *myConverterData=(UConverterDataHZ*)args->converter->extraInfo; + UBool isTargetUCharDBCS = (UBool) myConverterData->isTargetUCharDBCS; + UBool oldIsTargetUCharDBCS; + int len =0; + const char* escSeq=NULL; + + /* Calling code already handles this situation. */ + /*if ((args->converter == NULL) || (args->targetLimit < myTarget) || (args->sourceLimit < args->source)){ + *err = U_ILLEGAL_ARGUMENT_ERROR; + return; + }*/ + if(args->converter->fromUChar32!=0 && myTargetIndex < targetLength) { + goto getTrail; + } + /*writing the char to the output stream */ + while (mySourceIndex < mySourceLength){ + targetUniChar = missingCharMarker; + if (myTargetIndex < targetLength){ + + mySourceChar = (UChar) mySource[mySourceIndex++]; + + + oldIsTargetUCharDBCS = isTargetUCharDBCS; + if(mySourceChar ==UCNV_TILDE){ + /*concatEscape(args, &myTargetIndex, &targetLength,"\x7E\x7E",err,2,&mySourceIndex);*/ + len = ESC_LEN; + escSeq = TILDE_ESCAPE; + CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex); + continue; + } else if(mySourceChar <= 0x7f) { + targetUniChar = mySourceChar; + } else { + int32_t length= ucnv_MBCSFromUChar32(myConverterData->gbConverter->sharedData, + mySourceChar,&targetUniChar,args->converter->useFallback); + /* we can only use lead bytes 21..7D and trail bytes 21..7E */ + if( length == 2 && + (uint16_t)(targetUniChar - 0xa1a1) <= (0xfdfe - 0xa1a1) && + (uint8_t)(targetUniChar - 0xa1) <= (0xfe - 0xa1) + ) { + targetUniChar -= 0x8080; + } else { + targetUniChar = missingCharMarker; + } + } + if (targetUniChar != missingCharMarker){ + myConverterData->isTargetUCharDBCS = isTargetUCharDBCS = (UBool)(targetUniChar>0x00FF); + if(oldIsTargetUCharDBCS != isTargetUCharDBCS || !myConverterData->isEscapeAppended ){ + /*Shifting from a double byte to single byte mode*/ + if(!isTargetUCharDBCS){ + len =ESC_LEN; + escSeq = SB_ESCAPE; + CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex); + myConverterData->isEscapeAppended = TRUE; + } + else{ /* Shifting from a single byte to double byte mode*/ + len =ESC_LEN; + escSeq = DB_ESCAPE; + CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex); + myConverterData->isEscapeAppended = TRUE; + + } + } + + if(isTargetUCharDBCS){ + if( myTargetIndex <targetLength){ + myTarget[myTargetIndex++] =(char) (targetUniChar >> 8); + if(offsets){ + *(offsets++) = mySourceIndex-1; + } + if(myTargetIndex < targetLength){ + myTarget[myTargetIndex++] =(char) targetUniChar; + if(offsets){ + *(offsets++) = mySourceIndex-1; + } + }else{ + args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) targetUniChar; + *err = U_BUFFER_OVERFLOW_ERROR; + } + }else{ + args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =(char) (targetUniChar >> 8); + args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) targetUniChar; + *err = U_BUFFER_OVERFLOW_ERROR; + } + + }else{ + if( myTargetIndex <targetLength){ + myTarget[myTargetIndex++] = (char) (targetUniChar ); + if(offsets){ + *(offsets++) = mySourceIndex-1; + } + + }else{ + args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) targetUniChar; + *err = U_BUFFER_OVERFLOW_ERROR; + } + } + + } + else{ + /* oops.. the code point is unassigned */ + /*Handle surrogates */ + /*check if the char is a First surrogate*/ + if(U16_IS_SURROGATE(mySourceChar)) { + if(U16_IS_SURROGATE_LEAD(mySourceChar)) { + args->converter->fromUChar32=mySourceChar; +getTrail: + /*look ahead to find the trail surrogate*/ + if(mySourceIndex < mySourceLength) { + /* test the following code unit */ + UChar trail=(UChar) args->source[mySourceIndex]; + if(U16_IS_TRAIL(trail)) { + ++mySourceIndex; + mySourceChar=U16_GET_SUPPLEMENTARY(args->converter->fromUChar32, trail); + args->converter->fromUChar32=0x00; + /* there are no surrogates in GB2312*/ + *err = U_INVALID_CHAR_FOUND; + /* exit this condition tree */ + } else { + /* this is an unmatched lead code unit (1st surrogate) */ + /* callback(illegal) */ + *err=U_ILLEGAL_CHAR_FOUND; + } + } else { + /* no more input */ + *err = U_ZERO_ERROR; + } + } else { + /* this is an unmatched trail code unit (2nd surrogate) */ + /* callback(illegal) */ + *err=U_ILLEGAL_CHAR_FOUND; + } + } else { + /* callback(unassigned) for a BMP code point */ + *err = U_INVALID_CHAR_FOUND; + } + + args->converter->fromUChar32=mySourceChar; + break; + } + } + else{ + *err = U_BUFFER_OVERFLOW_ERROR; + break; + } + targetUniChar=missingCharMarker; + } + + args->target += myTargetIndex; + args->source += mySourceIndex; + myConverterData->isTargetUCharDBCS = isTargetUCharDBCS; +} + +static void U_CALLCONV +_HZ_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err) { + UConverter *cnv = args->converter; + UConverterDataHZ *convData=(UConverterDataHZ *) cnv->extraInfo; + char *p; + char buffer[4]; + p = buffer; + + if( convData->isTargetUCharDBCS){ + *p++= UCNV_TILDE; + *p++= UCNV_CLOSE_BRACE; + convData->isTargetUCharDBCS=FALSE; + } + *p++= (char)cnv->subChars[0]; + + ucnv_cbFromUWriteBytes(args, + buffer, (int32_t)(p - buffer), + offsetIndex, err); +} + +/* + * Structure for cloning an HZ converter into a single memory block. + */ +struct cloneHZStruct +{ + UConverter cnv; + UConverter subCnv; + UConverterDataHZ mydata; +}; + + +static UConverter * U_CALLCONV +_HZ_SafeClone(const UConverter *cnv, + void *stackBuffer, + int32_t *pBufferSize, + UErrorCode *status) +{ + struct cloneHZStruct * localClone; + int32_t size, bufferSizeNeeded = sizeof(struct cloneHZStruct); + + if (U_FAILURE(*status)){ + return nullptr; + } + + if (*pBufferSize == 0){ /* 'preflighting' request - set needed size into *pBufferSize */ + *pBufferSize = bufferSizeNeeded; + return nullptr; + } + + localClone = (struct cloneHZStruct *)stackBuffer; + /* ucnv.c/ucnv_safeClone() copied the main UConverter already */ + + uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(UConverterDataHZ)); + localClone->cnv.extraInfo = &localClone->mydata; + localClone->cnv.isExtraLocal = TRUE; + + /* deep-clone the sub-converter */ + size = (int32_t)sizeof(UConverter); + ((UConverterDataHZ*)localClone->cnv.extraInfo)->gbConverter = + ucnv_safeClone(((UConverterDataHZ*)cnv->extraInfo)->gbConverter, &localClone->subCnv, &size, status); + + return &localClone->cnv; +} + +static void U_CALLCONV +_HZ_GetUnicodeSet(const UConverter *cnv, + const USetAdder *sa, + UConverterUnicodeSet which, + UErrorCode *pErrorCode) { + /* HZ converts all of ASCII */ + sa->addRange(sa->set, 0, 0x7f); + + /* add all of the code points that the sub-converter handles */ + ucnv_MBCSGetFilteredUnicodeSetForUnicode( + ((UConverterDataHZ*)cnv->extraInfo)->gbConverter->sharedData, + sa, which, UCNV_SET_FILTER_HZ, + pErrorCode); +} +U_CDECL_END +static const UConverterImpl _HZImpl={ + + UCNV_HZ, + + NULL, + NULL, + + _HZOpen, + _HZClose, + _HZReset, + + UConverter_toUnicode_HZ_OFFSETS_LOGIC, + UConverter_toUnicode_HZ_OFFSETS_LOGIC, + UConverter_fromUnicode_HZ_OFFSETS_LOGIC, + UConverter_fromUnicode_HZ_OFFSETS_LOGIC, + NULL, + + NULL, + NULL, + _HZ_WriteSub, + _HZ_SafeClone, + _HZ_GetUnicodeSet, + NULL, + NULL +}; + +static const UConverterStaticData _HZStaticData={ + sizeof(UConverterStaticData), + "HZ", + 0, + UCNV_IBM, + UCNV_HZ, + 1, + 4, + { 0x1a, 0, 0, 0 }, + 1, + FALSE, + FALSE, + 0, + 0, + { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }, /* reserved */ + +}; + +const UConverterSharedData _HZData= + UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_HZStaticData, &_HZImpl); + +#endif /* #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION */ diff --git a/thirdparty/icu4c/common/ucnvisci.cpp b/thirdparty/icu4c/common/ucnvisci.cpp new file mode 100644 index 0000000000..44a7c05a3c --- /dev/null +++ b/thirdparty/icu4c/common/ucnvisci.cpp @@ -0,0 +1,1635 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 2000-2016, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* file name: ucnvisci.c +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2001JUN26 +* created by: Ram Viswanadha +* +* Date Name Description +* 24/7/2001 Ram Added support for EXT character handling +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION + +#include "unicode/ucnv.h" +#include "unicode/ucnv_cb.h" +#include "unicode/utf16.h" +#include "cmemory.h" +#include "ucnv_bld.h" +#include "ucnv_cnv.h" +#include "cstring.h" +#include "uassert.h" + +#define UCNV_OPTIONS_VERSION_MASK 0xf +#define NUKTA 0x093c +#define HALANT 0x094d +#define ZWNJ 0x200c /* Zero Width Non Joiner */ +#define ZWJ 0x200d /* Zero width Joiner */ +#define INVALID_CHAR 0xffff +#define ATR 0xEF /* Attribute code */ +#define EXT 0xF0 /* Extension code */ +#define DANDA 0x0964 +#define DOUBLE_DANDA 0x0965 +#define ISCII_NUKTA 0xE9 +#define ISCII_HALANT 0xE8 +#define ISCII_DANDA 0xEA +#define ISCII_INV 0xD9 +#define ISCII_VOWEL_SIGN_E 0xE0 +#define INDIC_BLOCK_BEGIN 0x0900 +#define INDIC_BLOCK_END 0x0D7F +#define INDIC_RANGE (INDIC_BLOCK_END - INDIC_BLOCK_BEGIN) +#define VOCALLIC_RR 0x0931 +#define LF 0x0A +#define ASCII_END 0xA0 +#define NO_CHAR_MARKER 0xFFFE +#define TELUGU_DELTA DELTA * TELUGU +#define DEV_ABBR_SIGN 0x0970 +#define DEV_ANUDATTA 0x0952 +#define EXT_RANGE_BEGIN 0xA1 +#define EXT_RANGE_END 0xEE + +#define PNJ_DELTA 0x0100 +#define PNJ_BINDI 0x0A02 +#define PNJ_TIPPI 0x0A70 +#define PNJ_SIGN_VIRAMA 0x0A4D +#define PNJ_ADHAK 0x0A71 +#define PNJ_HA 0x0A39 +#define PNJ_RRA 0x0A5C + +typedef enum { + DEVANAGARI =0, + BENGALI, + GURMUKHI, + GUJARATI, + ORIYA, + TAMIL, + TELUGU, + KANNADA, + MALAYALAM, + DELTA=0x80 +}UniLang; + +/** + * Enumeration for switching code pages if <ATR>+<one of below values> + * is encountered + */ +typedef enum { + DEF = 0x40, + RMN = 0x41, + DEV = 0x42, + BNG = 0x43, + TML = 0x44, + TLG = 0x45, + ASM = 0x46, + ORI = 0x47, + KND = 0x48, + MLM = 0x49, + GJR = 0x4A, + PNJ = 0x4B, + ARB = 0x71, + PES = 0x72, + URD = 0x73, + SND = 0x74, + KSM = 0x75, + PST = 0x76 +}ISCIILang; + +typedef enum { + DEV_MASK =0x80, + PNJ_MASK =0x40, + GJR_MASK =0x20, + ORI_MASK =0x10, + BNG_MASK =0x08, + KND_MASK =0x04, + MLM_MASK =0x02, + TML_MASK =0x01, + ZERO =0x00 +}MaskEnum; + +#define ISCII_CNV_PREFIX "ISCII,version=" + +typedef struct { + UChar contextCharToUnicode; /* previous Unicode codepoint for contextual analysis */ + UChar contextCharFromUnicode; /* previous Unicode codepoint for contextual analysis */ + uint16_t defDeltaToUnicode; /* delta for switching to default state when DEF is encountered */ + uint16_t currentDeltaFromUnicode; /* current delta in Indic block */ + uint16_t currentDeltaToUnicode; /* current delta in Indic block */ + MaskEnum currentMaskFromUnicode; /* mask for current state in toUnicode */ + MaskEnum currentMaskToUnicode; /* mask for current state in toUnicode */ + MaskEnum defMaskToUnicode; /* mask for default state in toUnicode */ + UBool isFirstBuffer; /* boolean for fromUnicode to see if we need to announce the first script */ + UBool resetToDefaultToUnicode; /* boolean for reseting to default delta and mask when a newline is encountered*/ + char name[sizeof(ISCII_CNV_PREFIX) + 1]; + UChar32 prevToUnicodeStatus; /* Hold the previous toUnicodeStatus. This is necessary because we may need to know the last two code points. */ +} UConverterDataISCII; + +typedef struct LookupDataStruct { + UniLang uniLang; + MaskEnum maskEnum; + ISCIILang isciiLang; +} LookupDataStruct; + +static const LookupDataStruct lookupInitialData[]={ + { DEVANAGARI, DEV_MASK, DEV }, + { BENGALI, BNG_MASK, BNG }, + { GURMUKHI, PNJ_MASK, PNJ }, + { GUJARATI, GJR_MASK, GJR }, + { ORIYA, ORI_MASK, ORI }, + { TAMIL, TML_MASK, TML }, + { TELUGU, KND_MASK, TLG }, + { KANNADA, KND_MASK, KND }, + { MALAYALAM, MLM_MASK, MLM } +}; + +/* + * For special handling of certain Gurmukhi characters. + * Bit 0 (value 1): PNJ consonant + * Bit 1 (value 2): PNJ Bindi Tippi + */ +static const uint8_t pnjMap[80] = { + /* 0A00..0A0F */ + 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, + /* 0A10..0A1F */ + 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + /* 0A20..0A2F */ + 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, + /* 0A30..0A3F */ + 3, 0, 0, 0, 0, 3, 3, 0, 3, 3, 0, 0, 0, 0, 0, 2, + /* 0A40..0A4F */ + 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +static UBool +isPNJConsonant(UChar32 c) { + if (c < 0xa00 || 0xa50 <= c) { + return FALSE; + } else { + return (UBool)(pnjMap[c - 0xa00] & 1); + } +} + +static UBool +isPNJBindiTippi(UChar32 c) { + if (c < 0xa00 || 0xa50 <= c) { + return FALSE; + } else { + return (UBool)(pnjMap[c - 0xa00] >> 1); + } +} +U_CDECL_BEGIN +static void U_CALLCONV +_ISCIIOpen(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode) { + if(pArgs->onlyTestIsLoadable) { + return; + } + + cnv->extraInfo = uprv_malloc(sizeof(UConverterDataISCII)); + + if (cnv->extraInfo != NULL) { + int32_t len=0; + UConverterDataISCII *converterData= + (UConverterDataISCII *) cnv->extraInfo; + converterData->contextCharToUnicode=NO_CHAR_MARKER; + cnv->toUnicodeStatus = missingCharMarker; + converterData->contextCharFromUnicode=0x0000; + converterData->resetToDefaultToUnicode=FALSE; + /* check if the version requested is supported */ + if ((pArgs->options & UCNV_OPTIONS_VERSION_MASK) < 9) { + /* initialize state variables */ + converterData->currentDeltaFromUnicode + = converterData->currentDeltaToUnicode + = converterData->defDeltaToUnicode = (uint16_t)(lookupInitialData[pArgs->options & UCNV_OPTIONS_VERSION_MASK].uniLang * DELTA); + + converterData->currentMaskFromUnicode + = converterData->currentMaskToUnicode + = converterData->defMaskToUnicode = lookupInitialData[pArgs->options & UCNV_OPTIONS_VERSION_MASK].maskEnum; + + converterData->isFirstBuffer=TRUE; + (void)uprv_strcpy(converterData->name, ISCII_CNV_PREFIX); + len = (int32_t)uprv_strlen(converterData->name); + converterData->name[len]= (char)((pArgs->options & UCNV_OPTIONS_VERSION_MASK) + '0'); + converterData->name[len+1]=0; + + converterData->prevToUnicodeStatus = 0x0000; + } else { + uprv_free(cnv->extraInfo); + cnv->extraInfo = NULL; + *errorCode = U_ILLEGAL_ARGUMENT_ERROR; + } + + } else { + *errorCode =U_MEMORY_ALLOCATION_ERROR; + } +} + +static void U_CALLCONV +_ISCIIClose(UConverter *cnv) { + if (cnv->extraInfo!=NULL) { + if (!cnv->isExtraLocal) { + uprv_free(cnv->extraInfo); + } + cnv->extraInfo=NULL; + } +} + +static const char* U_CALLCONV +_ISCIIgetName(const UConverter* cnv) { + if (cnv->extraInfo) { + UConverterDataISCII* myData= (UConverterDataISCII*)cnv->extraInfo; + return myData->name; + } + return NULL; +} + +static void U_CALLCONV +_ISCIIReset(UConverter *cnv, UConverterResetChoice choice) { + UConverterDataISCII* data =(UConverterDataISCII *) (cnv->extraInfo); + if (choice<=UCNV_RESET_TO_UNICODE) { + cnv->toUnicodeStatus = missingCharMarker; + cnv->mode=0; + data->currentDeltaToUnicode=data->defDeltaToUnicode; + data->currentMaskToUnicode = data->defMaskToUnicode; + data->contextCharToUnicode=NO_CHAR_MARKER; + data->prevToUnicodeStatus = 0x0000; + } + if (choice!=UCNV_RESET_TO_UNICODE) { + cnv->fromUChar32=0x0000; + data->contextCharFromUnicode=0x00; + data->currentMaskFromUnicode=data->defMaskToUnicode; + data->currentDeltaFromUnicode=data->defDeltaToUnicode; + data->isFirstBuffer=TRUE; + data->resetToDefaultToUnicode=FALSE; + } +} + +/** + * The values in validity table are indexed by the lower bits of Unicode + * range 0x0900 - 0x09ff. The values have a structure like: + * --------------------------------------------------------------- + * | DEV | PNJ | GJR | ORI | BNG | TLG | MLM | TML | + * | | | | | ASM | KND | | | + * --------------------------------------------------------------- + * If a code point is valid in a particular script + * then that bit is turned on + * + * Unicode does not distinguish between Bengali and Assamese so we use 1 bit for + * to represent these languages + * + * Telugu and Kannada have same codepoints except for Vocallic_RR which we special case + * and combine and use 1 bit to represent these languages. + * + * TODO: It is probably easier to understand and maintain to change this + * to use uint16_t and give each of the 9 Unicode/script blocks its own bit. + */ + +static const uint8_t validityTable[128] = { +/* This state table is tool generated please do not edit unless you know exactly what you are doing */ +/* Note: This table was edited to mirror the Windows XP implementation */ +/*ISCII:Valid:Unicode */ +/*0xa0 : 0x00: 0x900 */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0xa1 : 0xb8: 0x901 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + ZERO + ZERO + ZERO , +/*0xa2 : 0xfe: 0x902 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xa3 : 0xbf: 0x903 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0x00 : 0x00: 0x904 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0xa4 : 0xff: 0x905 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xa5 : 0xff: 0x906 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xa6 : 0xff: 0x907 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xa7 : 0xff: 0x908 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xa8 : 0xff: 0x909 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xa9 : 0xff: 0x90a */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xaa : 0xfe: 0x90b */ DEV_MASK + ZERO + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , +/*0x00 : 0x00: 0x90c */ DEV_MASK + ZERO + ZERO + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , +/*0xae : 0x80: 0x90d */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0xab : 0x87: 0x90e */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + KND_MASK + MLM_MASK + TML_MASK , +/*0xac : 0xff: 0x90f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xad : 0xff: 0x910 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xb2 : 0x80: 0x911 */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0xaf : 0x87: 0x912 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + KND_MASK + MLM_MASK + TML_MASK , +/*0xb0 : 0xff: 0x913 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xb1 : 0xff: 0x914 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xb3 : 0xff: 0x915 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xb4 : 0xfe: 0x916 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , +/*0xb5 : 0xfe: 0x917 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , +/*0xb6 : 0xfe: 0x918 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , +/*0xb7 : 0xff: 0x919 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xb8 : 0xff: 0x91a */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xb9 : 0xfe: 0x91b */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , +/*0xba : 0xff: 0x91c */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xbb : 0xfe: 0x91d */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , +/*0xbc : 0xff: 0x91e */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xbd : 0xff: 0x91f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xbe : 0xfe: 0x920 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , +/*0xbf : 0xfe: 0x921 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , +/*0xc0 : 0xfe: 0x922 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , +/*0xc1 : 0xff: 0x923 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xc2 : 0xff: 0x924 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xc3 : 0xfe: 0x925 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , +/*0xc4 : 0xfe: 0x926 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , +/*0xc5 : 0xfe: 0x927 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , +/*0xc6 : 0xff: 0x928 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xc7 : 0x81: 0x929 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + TML_MASK , +/*0xc8 : 0xff: 0x92a */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xc9 : 0xfe: 0x92b */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , +/*0xca : 0xfe: 0x92c */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , +/*0xcb : 0xfe: 0x92d */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , +/*0xcc : 0xfe: 0x92e */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xcd : 0xff: 0x92f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xcf : 0xff: 0x930 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xd0 : 0x87: 0x931 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + MLM_MASK + TML_MASK , +/*0xd1 : 0xff: 0x932 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xd2 : 0xb7: 0x933 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + ZERO + KND_MASK + MLM_MASK + TML_MASK , +/*0xd3 : 0x83: 0x934 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + MLM_MASK + TML_MASK , +/*0xd4 : 0xff: 0x935 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + ZERO + KND_MASK + MLM_MASK + TML_MASK , +/*0xd5 : 0xfe: 0x936 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , +/*0xd6 : 0xbf: 0x937 */ DEV_MASK + ZERO + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xd7 : 0xff: 0x938 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xd8 : 0xff: 0x939 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0x00 : 0x00: 0x93A */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0x00 : 0x00: 0x93B */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0xe9 : 0xda: 0x93c */ DEV_MASK + PNJ_MASK + ZERO + ORI_MASK + BNG_MASK + ZERO + ZERO + ZERO , +/*0x00 : 0x00: 0x93d */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0xda : 0xff: 0x93e */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xdb : 0xff: 0x93f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xdc : 0xff: 0x940 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xdd : 0xff: 0x941 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xde : 0xff: 0x942 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xdf : 0xbe: 0x943 */ DEV_MASK + ZERO + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , +/*0x00 : 0x00: 0x944 */ DEV_MASK + ZERO + GJR_MASK + ZERO + BNG_MASK + KND_MASK + ZERO + ZERO , +/*0xe3 : 0x80: 0x945 */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0xe0 : 0x87: 0x946 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + KND_MASK + MLM_MASK + TML_MASK , +/*0xe1 : 0xff: 0x947 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xe2 : 0xff: 0x948 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xe7 : 0x80: 0x949 */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0xe4 : 0x87: 0x94a */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + KND_MASK + MLM_MASK + TML_MASK , +/*0xe5 : 0xff: 0x94b */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xe6 : 0xff: 0x94c */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xe8 : 0xff: 0x94d */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xec : 0x00: 0x94e */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0xed : 0x00: 0x94f */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0x00 : 0x00: 0x950 */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0x00 : 0x00: 0x951 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0x00 : 0x00: 0x952 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0x00 : 0x00: 0x953 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0x00 : 0x00: 0x954 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0x00 : 0x00: 0x955 */ ZERO + ZERO + ZERO + ZERO + ZERO + KND_MASK + ZERO + ZERO , +/*0x00 : 0x00: 0x956 */ ZERO + ZERO + ZERO + ORI_MASK + ZERO + KND_MASK + ZERO + ZERO , +/*0x00 : 0x00: 0x957 */ ZERO + ZERO + ZERO + ORI_MASK + BNG_MASK + ZERO + MLM_MASK + ZERO , +/*0x00 : 0x00: 0x958 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0x00 : 0x00: 0x959 */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0x00 : 0x00: 0x95a */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0x00 : 0x00: 0x95b */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0x00 : 0x00: 0x95c */ DEV_MASK + PNJ_MASK + ZERO + ZERO + BNG_MASK + ZERO + ZERO + ZERO , +/*0x00 : 0x00: 0x95d */ DEV_MASK + ZERO + ZERO + ORI_MASK + BNG_MASK + ZERO + ZERO + ZERO , +/*0x00 : 0x00: 0x95e */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0xce : 0x98: 0x95f */ DEV_MASK + ZERO + ZERO + ORI_MASK + BNG_MASK + ZERO + ZERO + ZERO , +/*0x00 : 0x00: 0x960 */ DEV_MASK + ZERO + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , +/*0x00 : 0x00: 0x961 */ DEV_MASK + ZERO + ZERO + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , +/*0x00 : 0x00: 0x962 */ DEV_MASK + ZERO + ZERO + ZERO + BNG_MASK + ZERO + ZERO + ZERO , +/*0x00 : 0x00: 0x963 */ DEV_MASK + ZERO + ZERO + ZERO + BNG_MASK + ZERO + ZERO + ZERO , +/*0xea : 0xf8: 0x964 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0xeaea : 0x00: 0x965*/ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0xf1 : 0xff: 0x966 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xf2 : 0xff: 0x967 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xf3 : 0xff: 0x968 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xf4 : 0xff: 0x969 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xf5 : 0xff: 0x96a */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xf6 : 0xff: 0x96b */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xf7 : 0xff: 0x96c */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xf8 : 0xff: 0x96d */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xf9 : 0xff: 0x96e */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xfa : 0xff: 0x96f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0x00 : 0x80: 0x970 */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , +/* + * The length of the array is 128 to provide values for 0x900..0x97f. + * The last 15 entries for 0x971..0x97f of the validity table are all zero + * because no Indic script uses such Unicode code points. + */ +/*0x00 : 0x00: 0x9yz */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO +}; + +static const uint16_t fromUnicodeTable[128]={ + 0x00a0 ,/* 0x0900 */ + 0x00a1 ,/* 0x0901 */ + 0x00a2 ,/* 0x0902 */ + 0x00a3 ,/* 0x0903 */ + 0xa4e0 ,/* 0x0904 */ + 0x00a4 ,/* 0x0905 */ + 0x00a5 ,/* 0x0906 */ + 0x00a6 ,/* 0x0907 */ + 0x00a7 ,/* 0x0908 */ + 0x00a8 ,/* 0x0909 */ + 0x00a9 ,/* 0x090a */ + 0x00aa ,/* 0x090b */ + 0xA6E9 ,/* 0x090c */ + 0x00ae ,/* 0x090d */ + 0x00ab ,/* 0x090e */ + 0x00ac ,/* 0x090f */ + 0x00ad ,/* 0x0910 */ + 0x00b2 ,/* 0x0911 */ + 0x00af ,/* 0x0912 */ + 0x00b0 ,/* 0x0913 */ + 0x00b1 ,/* 0x0914 */ + 0x00b3 ,/* 0x0915 */ + 0x00b4 ,/* 0x0916 */ + 0x00b5 ,/* 0x0917 */ + 0x00b6 ,/* 0x0918 */ + 0x00b7 ,/* 0x0919 */ + 0x00b8 ,/* 0x091a */ + 0x00b9 ,/* 0x091b */ + 0x00ba ,/* 0x091c */ + 0x00bb ,/* 0x091d */ + 0x00bc ,/* 0x091e */ + 0x00bd ,/* 0x091f */ + 0x00be ,/* 0x0920 */ + 0x00bf ,/* 0x0921 */ + 0x00c0 ,/* 0x0922 */ + 0x00c1 ,/* 0x0923 */ + 0x00c2 ,/* 0x0924 */ + 0x00c3 ,/* 0x0925 */ + 0x00c4 ,/* 0x0926 */ + 0x00c5 ,/* 0x0927 */ + 0x00c6 ,/* 0x0928 */ + 0x00c7 ,/* 0x0929 */ + 0x00c8 ,/* 0x092a */ + 0x00c9 ,/* 0x092b */ + 0x00ca ,/* 0x092c */ + 0x00cb ,/* 0x092d */ + 0x00cc ,/* 0x092e */ + 0x00cd ,/* 0x092f */ + 0x00cf ,/* 0x0930 */ + 0x00d0 ,/* 0x0931 */ + 0x00d1 ,/* 0x0932 */ + 0x00d2 ,/* 0x0933 */ + 0x00d3 ,/* 0x0934 */ + 0x00d4 ,/* 0x0935 */ + 0x00d5 ,/* 0x0936 */ + 0x00d6 ,/* 0x0937 */ + 0x00d7 ,/* 0x0938 */ + 0x00d8 ,/* 0x0939 */ + 0xFFFF ,/* 0x093A */ + 0xFFFF ,/* 0x093B */ + 0x00e9 ,/* 0x093c */ + 0xEAE9 ,/* 0x093d */ + 0x00da ,/* 0x093e */ + 0x00db ,/* 0x093f */ + 0x00dc ,/* 0x0940 */ + 0x00dd ,/* 0x0941 */ + 0x00de ,/* 0x0942 */ + 0x00df ,/* 0x0943 */ + 0xDFE9 ,/* 0x0944 */ + 0x00e3 ,/* 0x0945 */ + 0x00e0 ,/* 0x0946 */ + 0x00e1 ,/* 0x0947 */ + 0x00e2 ,/* 0x0948 */ + 0x00e7 ,/* 0x0949 */ + 0x00e4 ,/* 0x094a */ + 0x00e5 ,/* 0x094b */ + 0x00e6 ,/* 0x094c */ + 0x00e8 ,/* 0x094d */ + 0x00ec ,/* 0x094e */ + 0x00ed ,/* 0x094f */ + 0xA1E9 ,/* 0x0950 */ /* OM Symbol */ + 0xFFFF ,/* 0x0951 */ + 0xF0B8 ,/* 0x0952 */ + 0xFFFF ,/* 0x0953 */ + 0xFFFF ,/* 0x0954 */ + 0xFFFF ,/* 0x0955 */ + 0xFFFF ,/* 0x0956 */ + 0xFFFF ,/* 0x0957 */ + 0xb3e9 ,/* 0x0958 */ + 0xb4e9 ,/* 0x0959 */ + 0xb5e9 ,/* 0x095a */ + 0xbae9 ,/* 0x095b */ + 0xbfe9 ,/* 0x095c */ + 0xC0E9 ,/* 0x095d */ + 0xc9e9 ,/* 0x095e */ + 0x00ce ,/* 0x095f */ + 0xAAe9 ,/* 0x0960 */ + 0xA7E9 ,/* 0x0961 */ + 0xDBE9 ,/* 0x0962 */ + 0xDCE9 ,/* 0x0963 */ + 0x00ea ,/* 0x0964 */ + 0xeaea ,/* 0x0965 */ + 0x00f1 ,/* 0x0966 */ + 0x00f2 ,/* 0x0967 */ + 0x00f3 ,/* 0x0968 */ + 0x00f4 ,/* 0x0969 */ + 0x00f5 ,/* 0x096a */ + 0x00f6 ,/* 0x096b */ + 0x00f7 ,/* 0x096c */ + 0x00f8 ,/* 0x096d */ + 0x00f9 ,/* 0x096e */ + 0x00fa ,/* 0x096f */ + 0xF0BF ,/* 0x0970 */ + 0xFFFF ,/* 0x0971 */ + 0xFFFF ,/* 0x0972 */ + 0xFFFF ,/* 0x0973 */ + 0xFFFF ,/* 0x0974 */ + 0xFFFF ,/* 0x0975 */ + 0xFFFF ,/* 0x0976 */ + 0xFFFF ,/* 0x0977 */ + 0xFFFF ,/* 0x0978 */ + 0xFFFF ,/* 0x0979 */ + 0xFFFF ,/* 0x097a */ + 0xFFFF ,/* 0x097b */ + 0xFFFF ,/* 0x097c */ + 0xFFFF ,/* 0x097d */ + 0xFFFF ,/* 0x097e */ + 0xFFFF ,/* 0x097f */ +}; +static const uint16_t toUnicodeTable[256]={ + 0x0000,/* 0x00 */ + 0x0001,/* 0x01 */ + 0x0002,/* 0x02 */ + 0x0003,/* 0x03 */ + 0x0004,/* 0x04 */ + 0x0005,/* 0x05 */ + 0x0006,/* 0x06 */ + 0x0007,/* 0x07 */ + 0x0008,/* 0x08 */ + 0x0009,/* 0x09 */ + 0x000a,/* 0x0a */ + 0x000b,/* 0x0b */ + 0x000c,/* 0x0c */ + 0x000d,/* 0x0d */ + 0x000e,/* 0x0e */ + 0x000f,/* 0x0f */ + 0x0010,/* 0x10 */ + 0x0011,/* 0x11 */ + 0x0012,/* 0x12 */ + 0x0013,/* 0x13 */ + 0x0014,/* 0x14 */ + 0x0015,/* 0x15 */ + 0x0016,/* 0x16 */ + 0x0017,/* 0x17 */ + 0x0018,/* 0x18 */ + 0x0019,/* 0x19 */ + 0x001a,/* 0x1a */ + 0x001b,/* 0x1b */ + 0x001c,/* 0x1c */ + 0x001d,/* 0x1d */ + 0x001e,/* 0x1e */ + 0x001f,/* 0x1f */ + 0x0020,/* 0x20 */ + 0x0021,/* 0x21 */ + 0x0022,/* 0x22 */ + 0x0023,/* 0x23 */ + 0x0024,/* 0x24 */ + 0x0025,/* 0x25 */ + 0x0026,/* 0x26 */ + 0x0027,/* 0x27 */ + 0x0028,/* 0x28 */ + 0x0029,/* 0x29 */ + 0x002a,/* 0x2a */ + 0x002b,/* 0x2b */ + 0x002c,/* 0x2c */ + 0x002d,/* 0x2d */ + 0x002e,/* 0x2e */ + 0x002f,/* 0x2f */ + 0x0030,/* 0x30 */ + 0x0031,/* 0x31 */ + 0x0032,/* 0x32 */ + 0x0033,/* 0x33 */ + 0x0034,/* 0x34 */ + 0x0035,/* 0x35 */ + 0x0036,/* 0x36 */ + 0x0037,/* 0x37 */ + 0x0038,/* 0x38 */ + 0x0039,/* 0x39 */ + 0x003A,/* 0x3A */ + 0x003B,/* 0x3B */ + 0x003c,/* 0x3c */ + 0x003d,/* 0x3d */ + 0x003e,/* 0x3e */ + 0x003f,/* 0x3f */ + 0x0040,/* 0x40 */ + 0x0041,/* 0x41 */ + 0x0042,/* 0x42 */ + 0x0043,/* 0x43 */ + 0x0044,/* 0x44 */ + 0x0045,/* 0x45 */ + 0x0046,/* 0x46 */ + 0x0047,/* 0x47 */ + 0x0048,/* 0x48 */ + 0x0049,/* 0x49 */ + 0x004a,/* 0x4a */ + 0x004b,/* 0x4b */ + 0x004c,/* 0x4c */ + 0x004d,/* 0x4d */ + 0x004e,/* 0x4e */ + 0x004f,/* 0x4f */ + 0x0050,/* 0x50 */ + 0x0051,/* 0x51 */ + 0x0052,/* 0x52 */ + 0x0053,/* 0x53 */ + 0x0054,/* 0x54 */ + 0x0055,/* 0x55 */ + 0x0056,/* 0x56 */ + 0x0057,/* 0x57 */ + 0x0058,/* 0x58 */ + 0x0059,/* 0x59 */ + 0x005a,/* 0x5a */ + 0x005b,/* 0x5b */ + 0x005c,/* 0x5c */ + 0x005d,/* 0x5d */ + 0x005e,/* 0x5e */ + 0x005f,/* 0x5f */ + 0x0060,/* 0x60 */ + 0x0061,/* 0x61 */ + 0x0062,/* 0x62 */ + 0x0063,/* 0x63 */ + 0x0064,/* 0x64 */ + 0x0065,/* 0x65 */ + 0x0066,/* 0x66 */ + 0x0067,/* 0x67 */ + 0x0068,/* 0x68 */ + 0x0069,/* 0x69 */ + 0x006a,/* 0x6a */ + 0x006b,/* 0x6b */ + 0x006c,/* 0x6c */ + 0x006d,/* 0x6d */ + 0x006e,/* 0x6e */ + 0x006f,/* 0x6f */ + 0x0070,/* 0x70 */ + 0x0071,/* 0x71 */ + 0x0072,/* 0x72 */ + 0x0073,/* 0x73 */ + 0x0074,/* 0x74 */ + 0x0075,/* 0x75 */ + 0x0076,/* 0x76 */ + 0x0077,/* 0x77 */ + 0x0078,/* 0x78 */ + 0x0079,/* 0x79 */ + 0x007a,/* 0x7a */ + 0x007b,/* 0x7b */ + 0x007c,/* 0x7c */ + 0x007d,/* 0x7d */ + 0x007e,/* 0x7e */ + 0x007f,/* 0x7f */ + 0x0080,/* 0x80 */ + 0x0081,/* 0x81 */ + 0x0082,/* 0x82 */ + 0x0083,/* 0x83 */ + 0x0084,/* 0x84 */ + 0x0085,/* 0x85 */ + 0x0086,/* 0x86 */ + 0x0087,/* 0x87 */ + 0x0088,/* 0x88 */ + 0x0089,/* 0x89 */ + 0x008a,/* 0x8a */ + 0x008b,/* 0x8b */ + 0x008c,/* 0x8c */ + 0x008d,/* 0x8d */ + 0x008e,/* 0x8e */ + 0x008f,/* 0x8f */ + 0x0090,/* 0x90 */ + 0x0091,/* 0x91 */ + 0x0092,/* 0x92 */ + 0x0093,/* 0x93 */ + 0x0094,/* 0x94 */ + 0x0095,/* 0x95 */ + 0x0096,/* 0x96 */ + 0x0097,/* 0x97 */ + 0x0098,/* 0x98 */ + 0x0099,/* 0x99 */ + 0x009a,/* 0x9a */ + 0x009b,/* 0x9b */ + 0x009c,/* 0x9c */ + 0x009d,/* 0x9d */ + 0x009e,/* 0x9e */ + 0x009f,/* 0x9f */ + 0x00A0,/* 0xa0 */ + 0x0901,/* 0xa1 */ + 0x0902,/* 0xa2 */ + 0x0903,/* 0xa3 */ + 0x0905,/* 0xa4 */ + 0x0906,/* 0xa5 */ + 0x0907,/* 0xa6 */ + 0x0908,/* 0xa7 */ + 0x0909,/* 0xa8 */ + 0x090a,/* 0xa9 */ + 0x090b,/* 0xaa */ + 0x090e,/* 0xab */ + 0x090f,/* 0xac */ + 0x0910,/* 0xad */ + 0x090d,/* 0xae */ + 0x0912,/* 0xaf */ + 0x0913,/* 0xb0 */ + 0x0914,/* 0xb1 */ + 0x0911,/* 0xb2 */ + 0x0915,/* 0xb3 */ + 0x0916,/* 0xb4 */ + 0x0917,/* 0xb5 */ + 0x0918,/* 0xb6 */ + 0x0919,/* 0xb7 */ + 0x091a,/* 0xb8 */ + 0x091b,/* 0xb9 */ + 0x091c,/* 0xba */ + 0x091d,/* 0xbb */ + 0x091e,/* 0xbc */ + 0x091f,/* 0xbd */ + 0x0920,/* 0xbe */ + 0x0921,/* 0xbf */ + 0x0922,/* 0xc0 */ + 0x0923,/* 0xc1 */ + 0x0924,/* 0xc2 */ + 0x0925,/* 0xc3 */ + 0x0926,/* 0xc4 */ + 0x0927,/* 0xc5 */ + 0x0928,/* 0xc6 */ + 0x0929,/* 0xc7 */ + 0x092a,/* 0xc8 */ + 0x092b,/* 0xc9 */ + 0x092c,/* 0xca */ + 0x092d,/* 0xcb */ + 0x092e,/* 0xcc */ + 0x092f,/* 0xcd */ + 0x095f,/* 0xce */ + 0x0930,/* 0xcf */ + 0x0931,/* 0xd0 */ + 0x0932,/* 0xd1 */ + 0x0933,/* 0xd2 */ + 0x0934,/* 0xd3 */ + 0x0935,/* 0xd4 */ + 0x0936,/* 0xd5 */ + 0x0937,/* 0xd6 */ + 0x0938,/* 0xd7 */ + 0x0939,/* 0xd8 */ + 0x200D,/* 0xd9 */ + 0x093e,/* 0xda */ + 0x093f,/* 0xdb */ + 0x0940,/* 0xdc */ + 0x0941,/* 0xdd */ + 0x0942,/* 0xde */ + 0x0943,/* 0xdf */ + 0x0946,/* 0xe0 */ + 0x0947,/* 0xe1 */ + 0x0948,/* 0xe2 */ + 0x0945,/* 0xe3 */ + 0x094a,/* 0xe4 */ + 0x094b,/* 0xe5 */ + 0x094c,/* 0xe6 */ + 0x0949,/* 0xe7 */ + 0x094d,/* 0xe8 */ + 0x093c,/* 0xe9 */ + 0x0964,/* 0xea */ + 0xFFFF,/* 0xeb */ + 0xFFFF,/* 0xec */ + 0xFFFF,/* 0xed */ + 0xFFFF,/* 0xee */ + 0xFFFF,/* 0xef */ + 0xFFFF,/* 0xf0 */ + 0x0966,/* 0xf1 */ + 0x0967,/* 0xf2 */ + 0x0968,/* 0xf3 */ + 0x0969,/* 0xf4 */ + 0x096a,/* 0xf5 */ + 0x096b,/* 0xf6 */ + 0x096c,/* 0xf7 */ + 0x096d,/* 0xf8 */ + 0x096e,/* 0xf9 */ + 0x096f,/* 0xfa */ + 0xFFFF,/* 0xfb */ + 0xFFFF,/* 0xfc */ + 0xFFFF,/* 0xfd */ + 0xFFFF,/* 0xfe */ + 0xFFFF /* 0xff */ +}; + +static const uint16_t vowelSignESpecialCases[][2]={ + { 2 /*length of array*/ , 0 }, + { 0xA4 , 0x0904 }, +}; + +static const uint16_t nuktaSpecialCases[][2]={ + { 16 /*length of array*/ , 0 }, + { 0xA6 , 0x090c }, + { 0xEA , 0x093D }, + { 0xDF , 0x0944 }, + { 0xA1 , 0x0950 }, + { 0xb3 , 0x0958 }, + { 0xb4 , 0x0959 }, + { 0xb5 , 0x095a }, + { 0xba , 0x095b }, + { 0xbf , 0x095c }, + { 0xC0 , 0x095d }, + { 0xc9 , 0x095e }, + { 0xAA , 0x0960 }, + { 0xA7 , 0x0961 }, + { 0xDB , 0x0962 }, + { 0xDC , 0x0963 }, +}; + + +#define WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err) UPRV_BLOCK_MACRO_BEGIN { \ + int32_t offset = (int32_t)(source - args->source-1); \ + /* write the targetUniChar to target */ \ + if(target < targetLimit){ \ + if(targetByteUnit <= 0xFF){ \ + *(target)++ = (uint8_t)(targetByteUnit); \ + if(offsets){ \ + *(offsets++) = offset; \ + } \ + }else{ \ + if (targetByteUnit > 0xFFFF) { \ + *(target)++ = (uint8_t)(targetByteUnit>>16); \ + if (offsets) { \ + --offset; \ + *(offsets++) = offset; \ + } \ + } \ + if (!(target < targetLimit)) { \ + args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \ + (uint8_t)(targetByteUnit >> 8); \ + args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \ + (uint8_t)targetByteUnit; \ + *err = U_BUFFER_OVERFLOW_ERROR; \ + } else { \ + *(target)++ = (uint8_t)(targetByteUnit>>8); \ + if(offsets){ \ + *(offsets++) = offset; \ + } \ + if(target < targetLimit){ \ + *(target)++ = (uint8_t) targetByteUnit; \ + if(offsets){ \ + *(offsets++) = offset ; \ + } \ + }else{ \ + args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =\ + (uint8_t) (targetByteUnit); \ + *err = U_BUFFER_OVERFLOW_ERROR; \ + } \ + } \ + } \ + }else{ \ + if (targetByteUnit & 0xFF0000) { \ + args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \ + (uint8_t) (targetByteUnit >>16); \ + } \ + if(targetByteUnit & 0xFF00){ \ + args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \ + (uint8_t) (targetByteUnit >>8); \ + } \ + args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \ + (uint8_t) (targetByteUnit); \ + *err = U_BUFFER_OVERFLOW_ERROR; \ + } \ +} UPRV_BLOCK_MACRO_END + +/* Rules: + * Explicit Halant : + * <HALANT> + <ZWNJ> + * Soft Halant : + * <HALANT> + <ZWJ> + */ +static void U_CALLCONV +UConverter_fromUnicode_ISCII_OFFSETS_LOGIC( + UConverterFromUnicodeArgs * args, UErrorCode * err) { + const UChar *source = args->source; + const UChar *sourceLimit = args->sourceLimit; + unsigned char *target = (unsigned char *) args->target; + unsigned char *targetLimit = (unsigned char *) args->targetLimit; + int32_t* offsets = args->offsets; + uint32_t targetByteUnit = 0x0000; + UChar32 sourceChar = 0x0000; + UChar32 tempContextFromUnicode = 0x0000; /* For special handling of the Gurmukhi script. */ + UConverterDataISCII *converterData; + uint16_t newDelta=0; + uint16_t range = 0; + UBool deltaChanged = FALSE; + + if ((args->converter == NULL) || (args->targetLimit < args->target) || (args->sourceLimit < args->source)) { + *err = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + /* initialize data */ + converterData=(UConverterDataISCII*)args->converter->extraInfo; + newDelta=converterData->currentDeltaFromUnicode; + range = (uint16_t)(newDelta/DELTA); + + if ((sourceChar = args->converter->fromUChar32)!=0) { + goto getTrail; + } + + /*writing the char to the output stream */ + while (source < sourceLimit) { + /* Write the language code following LF only if LF is not the last character. */ + if (args->converter->fromUnicodeStatus == LF) { + targetByteUnit = ATR<<8; + targetByteUnit += (uint8_t) lookupInitialData[range].isciiLang; + args->converter->fromUnicodeStatus = 0x0000; + /* now append ATR and language code */ + WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err); + if (U_FAILURE(*err)) { + break; + } + } + + sourceChar = *source++; + tempContextFromUnicode = converterData->contextCharFromUnicode; + + targetByteUnit = missingCharMarker; + + /*check if input is in ASCII and C0 control codes range*/ + if (sourceChar <= ASCII_END) { + args->converter->fromUnicodeStatus = sourceChar; + WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,sourceChar,err); + if (U_FAILURE(*err)) { + break; + } + continue; + } + switch (sourceChar) { + case ZWNJ: + /* contextChar has HALANT */ + if (converterData->contextCharFromUnicode) { + converterData->contextCharFromUnicode = 0x00; + targetByteUnit = ISCII_HALANT; + } else { + /* consume ZWNJ and continue */ + converterData->contextCharFromUnicode = 0x00; + continue; + } + break; + case ZWJ: + /* contextChar has HALANT */ + if (converterData->contextCharFromUnicode) { + targetByteUnit = ISCII_NUKTA; + } else { + targetByteUnit =ISCII_INV; + } + converterData->contextCharFromUnicode = 0x00; + break; + default: + /* is the sourceChar in the INDIC_RANGE? */ + if ((uint16_t)(INDIC_BLOCK_END-sourceChar) <= INDIC_RANGE) { + /* Danda and Double Danda are valid in Northern scripts.. since Unicode + * does not include these codepoints in all Northern scrips we need to + * filter them out + */ + if (sourceChar!= DANDA && sourceChar != DOUBLE_DANDA) { + /* find out to which block the souceChar belongs*/ + range =(uint16_t)((sourceChar-INDIC_BLOCK_BEGIN)/DELTA); + newDelta =(uint16_t)(range*DELTA); + + /* Now are we in the same block as the previous? */ + if (newDelta!= converterData->currentDeltaFromUnicode || converterData->isFirstBuffer) { + converterData->currentDeltaFromUnicode = newDelta; + converterData->currentMaskFromUnicode = lookupInitialData[range].maskEnum; + deltaChanged =TRUE; + converterData->isFirstBuffer=FALSE; + } + + if (converterData->currentDeltaFromUnicode == PNJ_DELTA) { + if (sourceChar == PNJ_TIPPI) { + /* Make sure Tippi is converterd to Bindi. */ + sourceChar = PNJ_BINDI; + } else if (sourceChar == PNJ_ADHAK) { + /* This is for consonant cluster handling. */ + converterData->contextCharFromUnicode = PNJ_ADHAK; + } + + } + /* Normalize all Indic codepoints to Devanagari and map them to ISCII */ + /* now subtract the new delta from sourceChar*/ + sourceChar -= converterData->currentDeltaFromUnicode; + } + + /* get the target byte unit */ + targetByteUnit=fromUnicodeTable[(uint8_t)sourceChar]; + + /* is the code point valid in current script? */ + if ((validityTable[(uint8_t)sourceChar] & converterData->currentMaskFromUnicode)==0) { + /* Vocallic RR is assigned in ISCII Telugu and Unicode */ + if (converterData->currentDeltaFromUnicode!=(TELUGU_DELTA) || sourceChar!=VOCALLIC_RR) { + targetByteUnit=missingCharMarker; + } + } + + if (deltaChanged) { + /* we are in a script block which is different than + * previous sourceChar's script block write ATR and language codes + */ + uint32_t temp=0; + temp =(uint16_t)(ATR<<8); + temp += (uint16_t)((uint8_t) lookupInitialData[range].isciiLang); + /* reset */ + deltaChanged=FALSE; + /* now append ATR and language code */ + WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,temp,err); + if (U_FAILURE(*err)) { + break; + } + } + + if (converterData->currentDeltaFromUnicode == PNJ_DELTA && (sourceChar + PNJ_DELTA) == PNJ_ADHAK) { + continue; + } + } + /* reset context char */ + converterData->contextCharFromUnicode = 0x00; + break; + } + if (converterData->currentDeltaFromUnicode == PNJ_DELTA && tempContextFromUnicode == PNJ_ADHAK && isPNJConsonant((sourceChar + PNJ_DELTA))) { + /* If the previous codepoint is Adhak and the current codepoint is a consonant, the targetByteUnit should be C + Halant + C. */ + /* reset context char */ + converterData->contextCharFromUnicode = 0x0000; + targetByteUnit = targetByteUnit << 16 | ISCII_HALANT << 8 | targetByteUnit; + /* write targetByteUnit to target */ + WRITE_TO_TARGET_FROM_U(args, offsets, source, target, targetLimit, targetByteUnit,err); + if (U_FAILURE(*err)) { + break; + } + } else if (targetByteUnit != missingCharMarker) { + if (targetByteUnit==ISCII_HALANT) { + converterData->contextCharFromUnicode = (UChar)targetByteUnit; + } + /* write targetByteUnit to target*/ + WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err); + if (U_FAILURE(*err)) { + break; + } + } else { + /* oops.. the code point is unassigned */ + /*check if the char is a First surrogate*/ + if (U16_IS_SURROGATE(sourceChar)) { + if (U16_IS_SURROGATE_LEAD(sourceChar)) { +getTrail: + /*look ahead to find the trail surrogate*/ + if (source < sourceLimit) { + /* test the following code unit */ + UChar trail= (*source); + if (U16_IS_TRAIL(trail)) { + source++; + sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail); + *err =U_INVALID_CHAR_FOUND; + /* convert this surrogate code point */ + /* exit this condition tree */ + } else { + /* this is an unmatched lead code unit (1st surrogate) */ + /* callback(illegal) */ + *err=U_ILLEGAL_CHAR_FOUND; + } + } else { + /* no more input */ + *err = U_ZERO_ERROR; + } + } else { + /* this is an unmatched trail code unit (2nd surrogate) */ + /* callback(illegal) */ + *err=U_ILLEGAL_CHAR_FOUND; + } + } else { + /* callback(unassigned) for a BMP code point */ + *err = U_INVALID_CHAR_FOUND; + } + + args->converter->fromUChar32=sourceChar; + break; + } + }/* end while(mySourceIndex<mySourceLength) */ + + /*save the state and return */ + args->source = source; + args->target = (char*)target; +} + +static const uint16_t lookupTable[][2]={ + { ZERO, ZERO }, /*DEFALT*/ + { ZERO, ZERO }, /*ROMAN*/ + { DEVANAGARI, DEV_MASK }, + { BENGALI, BNG_MASK }, + { TAMIL, TML_MASK }, + { TELUGU, KND_MASK }, + { BENGALI, BNG_MASK }, + { ORIYA, ORI_MASK }, + { KANNADA, KND_MASK }, + { MALAYALAM, MLM_MASK }, + { GUJARATI, GJR_MASK }, + { GURMUKHI, PNJ_MASK } +}; + +#define WRITE_TO_TARGET_TO_U(args,source,target,offsets,offset,targetUniChar,delta, err) UPRV_BLOCK_MACRO_BEGIN { \ + /* add offset to current Indic Block */ \ + if(targetUniChar>ASCII_END && \ + targetUniChar != ZWJ && \ + targetUniChar != ZWNJ && \ + targetUniChar != DANDA && \ + targetUniChar != DOUBLE_DANDA){ \ + \ + targetUniChar+=(uint16_t)(delta); \ + } \ + /* now write the targetUniChar */ \ + if(target<args->targetLimit){ \ + *(target)++ = (UChar)targetUniChar; \ + if(offsets){ \ + *(offsets)++ = (int32_t)(offset); \ + } \ + }else{ \ + args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++] = \ + (UChar)targetUniChar; \ + *err = U_BUFFER_OVERFLOW_ERROR; \ + } \ +} UPRV_BLOCK_MACRO_END + +#define GET_MAPPING(sourceChar,targetUniChar,data) UPRV_BLOCK_MACRO_BEGIN { \ + targetUniChar = toUnicodeTable[(sourceChar)] ; \ + /* is the code point valid in current script? */ \ + if(sourceChar> ASCII_END && \ + (validityTable[(targetUniChar & 0x7F)] & data->currentMaskToUnicode)==0){ \ + /* Vocallic RR is assigne in ISCII Telugu and Unicode */ \ + if(data->currentDeltaToUnicode!=(TELUGU_DELTA) || \ + targetUniChar!=VOCALLIC_RR){ \ + targetUniChar=missingCharMarker; \ + } \ + } \ +} UPRV_BLOCK_MACRO_END + +/*********** + * Rules for ISCII to Unicode converter + * ISCII is stateful encoding. To convert ISCII bytes to Unicode, + * which has both precomposed and decomposed forms characters + * pre-context and post-context need to be considered. + * + * Post context + * i) ATR : Attribute code is used to declare the font and script switching. + * Currently we only switch scripts and font codes consumed without generating an error + * ii) EXT : Extention code is used to declare switching to Sanskrit and for obscure, + * obsolete characters + * Pre context + * i) Halant: if preceeded by a halant then it is a explicit halant + * ii) Nukta : + * a) if preceeded by a halant then it is a soft halant + * b) if preceeded by specific consonants and the ligatures have pre-composed + * characters in Unicode then convert to pre-composed characters + * iii) Danda: If Danda is preceeded by a Danda then convert to Double Danda + * + */ + +static void U_CALLCONV +UConverter_toUnicode_ISCII_OFFSETS_LOGIC(UConverterToUnicodeArgs *args, UErrorCode* err) { + const char *source = ( char *) args->source; + UChar *target = args->target; + const char *sourceLimit = args->sourceLimit; + const UChar* targetLimit = args->targetLimit; + uint32_t targetUniChar = 0x0000; + uint8_t sourceChar = 0x0000; + UConverterDataISCII* data; + UChar32* toUnicodeStatus=NULL; + UChar32 tempTargetUniChar = 0x0000; + UChar* contextCharToUnicode= NULL; + UBool found; + int i; + int offset = 0; + + if ((args->converter == NULL) || (target < args->target) || (source < args->source)) { + *err = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + data = (UConverterDataISCII*)(args->converter->extraInfo); + contextCharToUnicode = &data->contextCharToUnicode; /* contains previous ISCII codepoint visited */ + toUnicodeStatus = (UChar32*)&args->converter->toUnicodeStatus;/* contains the mapping to Unicode of the above codepoint*/ + + while (U_SUCCESS(*err) && source<sourceLimit) { + + targetUniChar = missingCharMarker; + + if (target < targetLimit) { + sourceChar = (unsigned char)*(source)++; + + /* look at the post-context preform special processing */ + if (*contextCharToUnicode==ATR) { + + /* If we have ATR in *contextCharToUnicode then we need to change our + * state to the Indic Script specified by sourceChar + */ + + /* check if the sourceChar is supported script range*/ + if ((uint8_t)(PNJ-sourceChar)<=PNJ-DEV) { + data->currentDeltaToUnicode = (uint16_t)(lookupTable[sourceChar & 0x0F][0] * DELTA); + data->currentMaskToUnicode = (MaskEnum)lookupTable[sourceChar & 0x0F][1]; + } else if (sourceChar==DEF) { + /* switch back to default */ + data->currentDeltaToUnicode = data->defDeltaToUnicode; + data->currentMaskToUnicode = data->defMaskToUnicode; + } else { + if ((sourceChar >= 0x21 && sourceChar <= 0x3F)) { + /* these are display codes consume and continue */ + } else { + *err =U_ILLEGAL_CHAR_FOUND; + /* reset */ + *contextCharToUnicode=NO_CHAR_MARKER; + goto CALLBACK; + } + } + + /* reset */ + *contextCharToUnicode=NO_CHAR_MARKER; + + continue; + + } else if (*contextCharToUnicode==EXT) { + /* check if sourceChar is in 0xA1-0xEE range */ + if ((uint8_t) (EXT_RANGE_END - sourceChar) <= (EXT_RANGE_END - EXT_RANGE_BEGIN)) { + /* We currently support only Anudatta and Devanagari abbreviation sign */ + if (sourceChar==0xBF || sourceChar == 0xB8) { + targetUniChar = (sourceChar==0xBF) ? DEV_ABBR_SIGN : DEV_ANUDATTA; + + /* find out if the mapping is valid in this state */ + if (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode) { + *contextCharToUnicode= NO_CHAR_MARKER; + + /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */ + if (data->prevToUnicodeStatus) { + WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err); + data->prevToUnicodeStatus = 0x0000; + } + /* write to target */ + WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err); + + continue; + } + } + /* byte unit is unassigned */ + targetUniChar = missingCharMarker; + *err= U_INVALID_CHAR_FOUND; + } else { + /* only 0xA1 - 0xEE are legal after EXT char */ + *contextCharToUnicode= NO_CHAR_MARKER; + *err = U_ILLEGAL_CHAR_FOUND; + } + goto CALLBACK; + } else if (*contextCharToUnicode==ISCII_INV) { + if (sourceChar==ISCII_HALANT) { + targetUniChar = 0x0020; /* replace with space accoding to Indic FAQ */ + } else { + targetUniChar = ZWJ; + } + + /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */ + if (data->prevToUnicodeStatus) { + WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err); + data->prevToUnicodeStatus = 0x0000; + } + /* write to target */ + WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err); + /* reset */ + *contextCharToUnicode=NO_CHAR_MARKER; + } + + /* look at the pre-context and perform special processing */ + switch (sourceChar) { + case ISCII_INV: + case EXT: + case ATR: + *contextCharToUnicode = (UChar)sourceChar; + + if (*toUnicodeStatus != missingCharMarker) { + /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */ + if (data->prevToUnicodeStatus) { + WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err); + data->prevToUnicodeStatus = 0x0000; + } + WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,data->currentDeltaToUnicode,err); + *toUnicodeStatus = missingCharMarker; + } + continue; + case ISCII_DANDA: + /* handle double danda*/ + if (*contextCharToUnicode== ISCII_DANDA) { + targetUniChar = DOUBLE_DANDA; + /* clear the context */ + *contextCharToUnicode = NO_CHAR_MARKER; + *toUnicodeStatus = missingCharMarker; + } else { + GET_MAPPING(sourceChar,targetUniChar,data); + *contextCharToUnicode = sourceChar; + } + break; + case ISCII_HALANT: + /* handle explicit halant */ + if (*contextCharToUnicode == ISCII_HALANT) { + targetUniChar = ZWNJ; + /* clear the context */ + *contextCharToUnicode = NO_CHAR_MARKER; + } else { + GET_MAPPING(sourceChar,targetUniChar,data); + *contextCharToUnicode = sourceChar; + } + break; + case 0x0A: + case 0x0D: + data->resetToDefaultToUnicode = TRUE; + GET_MAPPING(sourceChar,targetUniChar,data) + ; + *contextCharToUnicode = sourceChar; + break; + + case ISCII_VOWEL_SIGN_E: + i=1; + found=FALSE; + for (; i<vowelSignESpecialCases[0][0]; i++) { + U_ASSERT(i<UPRV_LENGTHOF(vowelSignESpecialCases)); + if (vowelSignESpecialCases[i][0]==(uint8_t)*contextCharToUnicode) { + targetUniChar=vowelSignESpecialCases[i][1]; + found=TRUE; + break; + } + } + if (found) { + /* find out if the mapping is valid in this state */ + if (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode) { + /*targetUniChar += data->currentDeltaToUnicode ;*/ + *contextCharToUnicode= NO_CHAR_MARKER; + *toUnicodeStatus = missingCharMarker; + break; + } + } + GET_MAPPING(sourceChar,targetUniChar,data); + *contextCharToUnicode = sourceChar; + break; + + case ISCII_NUKTA: + /* handle soft halant */ + if (*contextCharToUnicode == ISCII_HALANT) { + targetUniChar = ZWJ; + /* clear the context */ + *contextCharToUnicode = NO_CHAR_MARKER; + break; + } else if (data->currentDeltaToUnicode == PNJ_DELTA && data->contextCharToUnicode == 0xc0) { + /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */ + if (data->prevToUnicodeStatus) { + WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err); + data->prevToUnicodeStatus = 0x0000; + } + /* We got here because ISCII_NUKTA was preceded by 0xc0 and we are converting Gurmukhi. + * In that case we must convert (0xc0 0xe9) to (\u0a5c\u0a4d\u0a39). + */ + targetUniChar = PNJ_RRA; + WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err); + if (U_SUCCESS(*err)) { + targetUniChar = PNJ_SIGN_VIRAMA; + WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err); + if (U_SUCCESS(*err)) { + targetUniChar = PNJ_HA; + WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err); + } else { + args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_HA; + } + } else { + args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_SIGN_VIRAMA; + args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_HA; + } + *toUnicodeStatus = missingCharMarker; + data->contextCharToUnicode = NO_CHAR_MARKER; + continue; + } else { + /* try to handle <CHAR> + ISCII_NUKTA special mappings */ + i=1; + found =FALSE; + for (; i<nuktaSpecialCases[0][0]; i++) { + if (nuktaSpecialCases[i][0]==(uint8_t) + *contextCharToUnicode) { + targetUniChar=nuktaSpecialCases[i][1]; + found =TRUE; + break; + } + } + if (found) { + /* find out if the mapping is valid in this state */ + if (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode) { + /*targetUniChar += data->currentDeltaToUnicode ;*/ + *contextCharToUnicode= NO_CHAR_MARKER; + *toUnicodeStatus = missingCharMarker; + if (data->currentDeltaToUnicode == PNJ_DELTA) { + /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */ + if (data->prevToUnicodeStatus) { + WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err); + data->prevToUnicodeStatus = 0x0000; + } + WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err); + continue; + } + break; + } + /* else fall through to default */ + } + /* else fall through to default */ + U_FALLTHROUGH; + } + default:GET_MAPPING(sourceChar,targetUniChar,data) + ; + *contextCharToUnicode = sourceChar; + break; + } + + if (*toUnicodeStatus != missingCharMarker) { + /* Check to make sure that consonant clusters are handled correct for Gurmukhi script. */ + if (data->currentDeltaToUnicode == PNJ_DELTA && data->prevToUnicodeStatus != 0 && isPNJConsonant(data->prevToUnicodeStatus) && + (*toUnicodeStatus + PNJ_DELTA) == PNJ_SIGN_VIRAMA && ((UChar32)(targetUniChar + PNJ_DELTA) == data->prevToUnicodeStatus)) { + /* Consonant clusters C + HALANT + C should be encoded as ADHAK + C */ + offset = (int)(source-args->source - 3); + tempTargetUniChar = PNJ_ADHAK; /* This is necessary to avoid some compiler warnings. */ + WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,offset,tempTargetUniChar,0,err); + WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,offset,data->prevToUnicodeStatus,0,err); + data->prevToUnicodeStatus = 0x0000; /* reset the previous unicode code point */ + *toUnicodeStatus = missingCharMarker; + continue; + } else { + /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */ + if (data->prevToUnicodeStatus) { + WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err); + data->prevToUnicodeStatus = 0x0000; + } + /* Check to make sure that Bindi and Tippi are handled correctly for Gurmukhi script. + * If 0xA2 is preceded by a codepoint in the PNJ_BINDI_TIPPI_SET then the target codepoint should be Tippi instead of Bindi. + */ + if (data->currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_BINDI && isPNJBindiTippi((*toUnicodeStatus + PNJ_DELTA))) { + targetUniChar = PNJ_TIPPI - PNJ_DELTA; + WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,PNJ_DELTA,err); + } else if (data->currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_SIGN_VIRAMA && isPNJConsonant((*toUnicodeStatus + PNJ_DELTA))) { + /* Store the current toUnicodeStatus code point for later handling of consonant cluster in Gurmukhi. */ + data->prevToUnicodeStatus = *toUnicodeStatus + PNJ_DELTA; + } else { + /* write the previously mapped codepoint */ + WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,data->currentDeltaToUnicode,err); + } + } + *toUnicodeStatus = missingCharMarker; + } + + if (targetUniChar != missingCharMarker) { + /* now save the targetUniChar for delayed write */ + *toUnicodeStatus = (UChar) targetUniChar; + if (data->resetToDefaultToUnicode==TRUE) { + data->currentDeltaToUnicode = data->defDeltaToUnicode; + data->currentMaskToUnicode = data->defMaskToUnicode; + data->resetToDefaultToUnicode=FALSE; + } + } else { + + /* we reach here only if targetUniChar == missingCharMarker + * so assign codes to reason and err + */ + *err = U_INVALID_CHAR_FOUND; +CALLBACK: + args->converter->toUBytes[0] = (uint8_t) sourceChar; + args->converter->toULength = 1; + break; + } + + } else { + *err =U_BUFFER_OVERFLOW_ERROR; + break; + } + } + + if (U_SUCCESS(*err) && args->flush && source == sourceLimit) { + /* end of the input stream */ + UConverter *cnv = args->converter; + + if (*contextCharToUnicode==ATR || *contextCharToUnicode==EXT || *contextCharToUnicode==ISCII_INV) { + /* set toUBytes[] */ + cnv->toUBytes[0] = (uint8_t)*contextCharToUnicode; + cnv->toULength = 1; + + /* avoid looping on truncated sequences */ + *contextCharToUnicode = NO_CHAR_MARKER; + } else { + cnv->toULength = 0; + } + + if (*toUnicodeStatus != missingCharMarker) { + /* output a remaining target character */ + WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source - args->source -1),*toUnicodeStatus,data->currentDeltaToUnicode,err); + *toUnicodeStatus = missingCharMarker; + } + } + + args->target = target; + args->source = source; +} + +/* structure for SafeClone calculations */ +struct cloneISCIIStruct { + UConverter cnv; + UConverterDataISCII mydata; +}; + +static UConverter * U_CALLCONV +_ISCII_SafeClone(const UConverter *cnv, + void *stackBuffer, + int32_t *pBufferSize, + UErrorCode *status) +{ + struct cloneISCIIStruct * localClone; + int32_t bufferSizeNeeded = sizeof(struct cloneISCIIStruct); + + if (U_FAILURE(*status)) { + return 0; + } + + if (*pBufferSize == 0) { /* 'preflighting' request - set needed size into *pBufferSize */ + *pBufferSize = bufferSizeNeeded; + return 0; + } + + localClone = (struct cloneISCIIStruct *)stackBuffer; + /* ucnv.c/ucnv_safeClone() copied the main UConverter already */ + + uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(UConverterDataISCII)); + localClone->cnv.extraInfo = &localClone->mydata; + localClone->cnv.isExtraLocal = TRUE; + + return &localClone->cnv; +} + +static void U_CALLCONV +_ISCIIGetUnicodeSet(const UConverter *cnv, + const USetAdder *sa, + UConverterUnicodeSet which, + UErrorCode *pErrorCode) +{ + (void)cnv; + (void)which; + (void)pErrorCode; + int32_t idx, script; + uint8_t mask; + + /* Since all ISCII versions allow switching to other ISCII + scripts, we add all roundtrippable characters to this set. */ + sa->addRange(sa->set, 0, ASCII_END); + for (script = DEVANAGARI; script <= MALAYALAM; script++) { + mask = (uint8_t)(lookupInitialData[script].maskEnum); + for (idx = 0; idx < DELTA; idx++) { + /* added check for TELUGU character */ + if ((validityTable[idx] & mask) || (script==TELUGU && idx==0x31)) { + sa->add(sa->set, idx + (script * DELTA) + INDIC_BLOCK_BEGIN); + } + } + } + sa->add(sa->set, DANDA); + sa->add(sa->set, DOUBLE_DANDA); + sa->add(sa->set, ZWNJ); + sa->add(sa->set, ZWJ); +} +U_CDECL_END +static const UConverterImpl _ISCIIImpl={ + + UCNV_ISCII, + + NULL, + NULL, + + _ISCIIOpen, + _ISCIIClose, + _ISCIIReset, + + UConverter_toUnicode_ISCII_OFFSETS_LOGIC, + UConverter_toUnicode_ISCII_OFFSETS_LOGIC, + UConverter_fromUnicode_ISCII_OFFSETS_LOGIC, + UConverter_fromUnicode_ISCII_OFFSETS_LOGIC, + NULL, + + NULL, + _ISCIIgetName, + NULL, + _ISCII_SafeClone, + _ISCIIGetUnicodeSet, + NULL, + NULL +}; + +static const UConverterStaticData _ISCIIStaticData={ + sizeof(UConverterStaticData), + "ISCII", + 0, + UCNV_IBM, + UCNV_ISCII, + 1, + 4, + { 0x1a, 0, 0, 0 }, + 0x1, + FALSE, + FALSE, + 0x0, + 0x0, + { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }, /* reserved */ + +}; + +const UConverterSharedData _ISCIIData= + UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISCIIStaticData, &_ISCIIImpl); + +#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ diff --git a/thirdparty/icu4c/common/ucnvlat1.cpp b/thirdparty/icu4c/common/ucnvlat1.cpp new file mode 100644 index 0000000000..358bc0caa2 --- /dev/null +++ b/thirdparty/icu4c/common/ucnvlat1.cpp @@ -0,0 +1,756 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 2000-2015, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* file name: ucnvlat1.cpp +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2000feb07 +* created by: Markus W. Scherer +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_CONVERSION + +#include "unicode/ucnv.h" +#include "unicode/uset.h" +#include "unicode/utf8.h" +#include "ucnv_bld.h" +#include "ucnv_cnv.h" +#include "ustr_imp.h" + +/* control optimizations according to the platform */ +#define LATIN1_UNROLL_FROM_UNICODE 1 + +/* ISO 8859-1 --------------------------------------------------------------- */ + +/* This is a table-less and callback-less version of ucnv_MBCSSingleToBMPWithOffsets(). */ +U_CDECL_BEGIN +static void U_CALLCONV +_Latin1ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, + UErrorCode *pErrorCode) { + const uint8_t *source; + UChar *target; + int32_t targetCapacity, length; + int32_t *offsets; + + int32_t sourceIndex; + + /* set up the local pointers */ + source=(const uint8_t *)pArgs->source; + target=pArgs->target; + targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target); + offsets=pArgs->offsets; + + sourceIndex=0; + + /* + * since the conversion here is 1:1 UChar:uint8_t, we need only one counter + * for the minimum of the sourceLength and targetCapacity + */ + length=(int32_t)((const uint8_t *)pArgs->sourceLimit-source); + if(length<=targetCapacity) { + targetCapacity=length; + } else { + /* target will be full */ + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + length=targetCapacity; + } + + if(targetCapacity>=8) { + /* This loop is unrolled for speed and improved pipelining. */ + int32_t count, loops; + + loops=count=targetCapacity>>3; + length=targetCapacity&=0x7; + do { + target[0]=source[0]; + target[1]=source[1]; + target[2]=source[2]; + target[3]=source[3]; + target[4]=source[4]; + target[5]=source[5]; + target[6]=source[6]; + target[7]=source[7]; + target+=8; + source+=8; + } while(--count>0); + + if(offsets!=NULL) { + do { + offsets[0]=sourceIndex++; + offsets[1]=sourceIndex++; + offsets[2]=sourceIndex++; + offsets[3]=sourceIndex++; + offsets[4]=sourceIndex++; + offsets[5]=sourceIndex++; + offsets[6]=sourceIndex++; + offsets[7]=sourceIndex++; + offsets+=8; + } while(--loops>0); + } + } + + /* conversion loop */ + while(targetCapacity>0) { + *target++=*source++; + --targetCapacity; + } + + /* write back the updated pointers */ + pArgs->source=(const char *)source; + pArgs->target=target; + + /* set offsets */ + if(offsets!=NULL) { + while(length>0) { + *offsets++=sourceIndex++; + --length; + } + pArgs->offsets=offsets; + } +} + +/* This is a table-less and callback-less version of ucnv_MBCSSingleGetNextUChar(). */ +static UChar32 U_CALLCONV +_Latin1GetNextUChar(UConverterToUnicodeArgs *pArgs, + UErrorCode *pErrorCode) { + const uint8_t *source=(const uint8_t *)pArgs->source; + if(source<(const uint8_t *)pArgs->sourceLimit) { + pArgs->source=(const char *)(source+1); + return *source; + } + + /* no output because of empty input */ + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0xffff; +} + +/* This is a table-less version of ucnv_MBCSSingleFromBMPWithOffsets(). */ +static void U_CALLCONV +_Latin1FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, + UErrorCode *pErrorCode) { + UConverter *cnv; + const UChar *source, *sourceLimit; + uint8_t *target, *oldTarget; + int32_t targetCapacity, length; + int32_t *offsets; + + UChar32 cp; + UChar c, max; + + int32_t sourceIndex; + + /* set up the local pointers */ + cnv=pArgs->converter; + source=pArgs->source; + sourceLimit=pArgs->sourceLimit; + target=oldTarget=(uint8_t *)pArgs->target; + targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target); + offsets=pArgs->offsets; + + if(cnv->sharedData==&_Latin1Data) { + max=0xff; /* Latin-1 */ + } else { + max=0x7f; /* US-ASCII */ + } + + /* get the converter state from UConverter */ + cp=cnv->fromUChar32; + + /* sourceIndex=-1 if the current character began in the previous buffer */ + sourceIndex= cp==0 ? 0 : -1; + + /* + * since the conversion here is 1:1 UChar:uint8_t, we need only one counter + * for the minimum of the sourceLength and targetCapacity + */ + length=(int32_t)(sourceLimit-source); + if(length<targetCapacity) { + targetCapacity=length; + } + + /* conversion loop */ + if(cp!=0 && targetCapacity>0) { + goto getTrail; + } + +#if LATIN1_UNROLL_FROM_UNICODE + /* unroll the loop with the most common case */ + if(targetCapacity>=16) { + int32_t count, loops; + UChar u, oredChars; + + loops=count=targetCapacity>>4; + do { + oredChars=u=*source++; + *target++=(uint8_t)u; + oredChars|=u=*source++; + *target++=(uint8_t)u; + oredChars|=u=*source++; + *target++=(uint8_t)u; + oredChars|=u=*source++; + *target++=(uint8_t)u; + oredChars|=u=*source++; + *target++=(uint8_t)u; + oredChars|=u=*source++; + *target++=(uint8_t)u; + oredChars|=u=*source++; + *target++=(uint8_t)u; + oredChars|=u=*source++; + *target++=(uint8_t)u; + oredChars|=u=*source++; + *target++=(uint8_t)u; + oredChars|=u=*source++; + *target++=(uint8_t)u; + oredChars|=u=*source++; + *target++=(uint8_t)u; + oredChars|=u=*source++; + *target++=(uint8_t)u; + oredChars|=u=*source++; + *target++=(uint8_t)u; + oredChars|=u=*source++; + *target++=(uint8_t)u; + oredChars|=u=*source++; + *target++=(uint8_t)u; + oredChars|=u=*source++; + *target++=(uint8_t)u; + + /* were all 16 entries really valid? */ + if(oredChars>max) { + /* no, return to the first of these 16 */ + source-=16; + target-=16; + break; + } + } while(--count>0); + count=loops-count; + targetCapacity-=16*count; + + if(offsets!=NULL) { + oldTarget+=16*count; + while(count>0) { + *offsets++=sourceIndex++; + *offsets++=sourceIndex++; + *offsets++=sourceIndex++; + *offsets++=sourceIndex++; + *offsets++=sourceIndex++; + *offsets++=sourceIndex++; + *offsets++=sourceIndex++; + *offsets++=sourceIndex++; + *offsets++=sourceIndex++; + *offsets++=sourceIndex++; + *offsets++=sourceIndex++; + *offsets++=sourceIndex++; + *offsets++=sourceIndex++; + *offsets++=sourceIndex++; + *offsets++=sourceIndex++; + *offsets++=sourceIndex++; + --count; + } + } + } +#endif + + /* conversion loop */ + c=0; + while(targetCapacity>0 && (c=*source++)<=max) { + /* convert the Unicode code point */ + *target++=(uint8_t)c; + --targetCapacity; + } + + if(c>max) { + cp=c; + if(!U_IS_SURROGATE(cp)) { + /* callback(unassigned) */ + } else if(U_IS_SURROGATE_LEAD(cp)) { +getTrail: + if(source<sourceLimit) { + /* test the following code unit */ + UChar trail=*source; + if(U16_IS_TRAIL(trail)) { + ++source; + cp=U16_GET_SUPPLEMENTARY(cp, trail); + /* this codepage does not map supplementary code points */ + /* callback(unassigned) */ + } else { + /* this is an unmatched lead code unit (1st surrogate) */ + /* callback(illegal) */ + } + } else { + /* no more input */ + cnv->fromUChar32=cp; + goto noMoreInput; + } + } else { + /* this is an unmatched trail code unit (2nd surrogate) */ + /* callback(illegal) */ + } + + *pErrorCode= U_IS_SURROGATE(cp) ? U_ILLEGAL_CHAR_FOUND : U_INVALID_CHAR_FOUND; + cnv->fromUChar32=cp; + } +noMoreInput: + + /* set offsets since the start */ + if(offsets!=NULL) { + size_t count=target-oldTarget; + while(count>0) { + *offsets++=sourceIndex++; + --count; + } + } + + if(U_SUCCESS(*pErrorCode) && source<sourceLimit && target>=(uint8_t *)pArgs->targetLimit) { + /* target is full */ + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } + + /* write back the updated pointers */ + pArgs->source=source; + pArgs->target=(char *)target; + pArgs->offsets=offsets; +} + +/* Convert UTF-8 to Latin-1. Adapted from ucnv_SBCSFromUTF8(). */ +static void U_CALLCONV +ucnv_Latin1FromUTF8(UConverterFromUnicodeArgs *pFromUArgs, + UConverterToUnicodeArgs *pToUArgs, + UErrorCode *pErrorCode) { + UConverter *utf8; + const uint8_t *source, *sourceLimit; + uint8_t *target; + int32_t targetCapacity; + + UChar32 c; + uint8_t b, t1; + + /* set up the local pointers */ + utf8=pToUArgs->converter; + source=(uint8_t *)pToUArgs->source; + sourceLimit=(uint8_t *)pToUArgs->sourceLimit; + target=(uint8_t *)pFromUArgs->target; + targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target); + + /* get the converter state from the UTF-8 UConverter */ + if (utf8->toULength > 0) { + c=(UChar32)utf8->toUnicodeStatus; + } else { + c = 0; + } + if(c!=0 && source<sourceLimit) { + if(targetCapacity==0) { + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + return; + } else if(c>=0xc2 && c<=0xc3 && (t1=(uint8_t)(*source-0x80)) <= 0x3f) { + ++source; + *target++=(uint8_t)(((c&3)<<6)|t1); + --targetCapacity; + + utf8->toUnicodeStatus=0; + utf8->toULength=0; + } else { + /* complicated, illegal or unmappable input: fall back to the pivoting implementation */ + *pErrorCode=U_USING_DEFAULT_WARNING; + return; + } + } + + /* + * Make sure that the last byte sequence before sourceLimit is complete + * or runs into a lead byte. + * In the conversion loop compare source with sourceLimit only once + * per multi-byte character. + * For Latin-1, adjust sourceLimit only for 1 trail byte because + * the conversion loop handles at most 2-byte sequences. + */ + if(source<sourceLimit && U8_IS_LEAD(*(sourceLimit-1))) { + --sourceLimit; + } + + /* conversion loop */ + while(source<sourceLimit) { + if(targetCapacity>0) { + b=*source++; + if(U8_IS_SINGLE(b)) { + /* convert ASCII */ + *target++=(uint8_t)b; + --targetCapacity; + } else if( /* handle U+0080..U+00FF inline */ + b>=0xc2 && b<=0xc3 && + (t1=(uint8_t)(*source-0x80)) <= 0x3f + ) { + ++source; + *target++=(uint8_t)(((b&3)<<6)|t1); + --targetCapacity; + } else { + /* complicated, illegal or unmappable input: fall back to the pivoting implementation */ + pToUArgs->source=(char *)(source-1); + pFromUArgs->target=(char *)target; + *pErrorCode=U_USING_DEFAULT_WARNING; + return; + } + } else { + /* target is full */ + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + break; + } + } + + /* + * The sourceLimit may have been adjusted before the conversion loop + * to stop before a truncated sequence. + * If so, then collect the truncated sequence now. + * For Latin-1, there is at most exactly one lead byte because of the + * smaller sourceLimit adjustment logic. + */ + if(U_SUCCESS(*pErrorCode) && source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) { + utf8->toUnicodeStatus=utf8->toUBytes[0]=b=*source++; + utf8->toULength=1; + utf8->mode=U8_COUNT_BYTES(b); + } + + /* write back the updated pointers */ + pToUArgs->source=(char *)source; + pFromUArgs->target=(char *)target; +} + +static void U_CALLCONV +_Latin1GetUnicodeSet(const UConverter *cnv, + const USetAdder *sa, + UConverterUnicodeSet which, + UErrorCode *pErrorCode) { + (void)cnv; + (void)which; + (void)pErrorCode; + sa->addRange(sa->set, 0, 0xff); +} +U_CDECL_END + + +static const UConverterImpl _Latin1Impl={ + UCNV_LATIN_1, + + NULL, + NULL, + + NULL, + NULL, + NULL, + + _Latin1ToUnicodeWithOffsets, + _Latin1ToUnicodeWithOffsets, + _Latin1FromUnicodeWithOffsets, + _Latin1FromUnicodeWithOffsets, + _Latin1GetNextUChar, + + NULL, + NULL, + NULL, + NULL, + _Latin1GetUnicodeSet, + + NULL, + ucnv_Latin1FromUTF8 +}; + +static const UConverterStaticData _Latin1StaticData={ + sizeof(UConverterStaticData), + "ISO-8859-1", + 819, UCNV_IBM, UCNV_LATIN_1, 1, 1, + { 0x1a, 0, 0, 0 }, 1, FALSE, FALSE, + 0, + 0, + { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ +}; + +const UConverterSharedData _Latin1Data= + UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_Latin1StaticData, &_Latin1Impl); + +/* US-ASCII ----------------------------------------------------------------- */ + +U_CDECL_BEGIN +/* This is a table-less version of ucnv_MBCSSingleToBMPWithOffsets(). */ +static void U_CALLCONV +_ASCIIToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, + UErrorCode *pErrorCode) { + const uint8_t *source, *sourceLimit; + UChar *target, *oldTarget; + int32_t targetCapacity, length; + int32_t *offsets; + + int32_t sourceIndex; + + uint8_t c; + + /* set up the local pointers */ + source=(const uint8_t *)pArgs->source; + sourceLimit=(const uint8_t *)pArgs->sourceLimit; + target=oldTarget=pArgs->target; + targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target); + offsets=pArgs->offsets; + + /* sourceIndex=-1 if the current character began in the previous buffer */ + sourceIndex=0; + + /* + * since the conversion here is 1:1 UChar:uint8_t, we need only one counter + * for the minimum of the sourceLength and targetCapacity + */ + length=(int32_t)(sourceLimit-source); + if(length<targetCapacity) { + targetCapacity=length; + } + + if(targetCapacity>=8) { + /* This loop is unrolled for speed and improved pipelining. */ + int32_t count, loops; + UChar oredChars; + + loops=count=targetCapacity>>3; + do { + oredChars=target[0]=source[0]; + oredChars|=target[1]=source[1]; + oredChars|=target[2]=source[2]; + oredChars|=target[3]=source[3]; + oredChars|=target[4]=source[4]; + oredChars|=target[5]=source[5]; + oredChars|=target[6]=source[6]; + oredChars|=target[7]=source[7]; + + /* were all 16 entries really valid? */ + if(oredChars>0x7f) { + /* no, return to the first of these 16 */ + break; + } + source+=8; + target+=8; + } while(--count>0); + count=loops-count; + targetCapacity-=count*8; + + if(offsets!=NULL) { + oldTarget+=count*8; + while(count>0) { + offsets[0]=sourceIndex++; + offsets[1]=sourceIndex++; + offsets[2]=sourceIndex++; + offsets[3]=sourceIndex++; + offsets[4]=sourceIndex++; + offsets[5]=sourceIndex++; + offsets[6]=sourceIndex++; + offsets[7]=sourceIndex++; + offsets+=8; + --count; + } + } + } + + /* conversion loop */ + c=0; + while(targetCapacity>0 && (c=*source++)<=0x7f) { + *target++=c; + --targetCapacity; + } + + if(c>0x7f) { + /* callback(illegal); copy the current bytes to toUBytes[] */ + UConverter *cnv=pArgs->converter; + cnv->toUBytes[0]=c; + cnv->toULength=1; + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + } else if(source<sourceLimit && target>=pArgs->targetLimit) { + /* target is full */ + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } + + /* set offsets since the start */ + if(offsets!=NULL) { + size_t count=target-oldTarget; + while(count>0) { + *offsets++=sourceIndex++; + --count; + } + } + + /* write back the updated pointers */ + pArgs->source=(const char *)source; + pArgs->target=target; + pArgs->offsets=offsets; +} + +/* This is a table-less version of ucnv_MBCSSingleGetNextUChar(). */ +static UChar32 U_CALLCONV +_ASCIIGetNextUChar(UConverterToUnicodeArgs *pArgs, + UErrorCode *pErrorCode) { + const uint8_t *source; + uint8_t b; + + source=(const uint8_t *)pArgs->source; + if(source<(const uint8_t *)pArgs->sourceLimit) { + b=*source++; + pArgs->source=(const char *)source; + if(b<=0x7f) { + return b; + } else { + UConverter *cnv=pArgs->converter; + cnv->toUBytes[0]=b; + cnv->toULength=1; + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + return 0xffff; + } + } + + /* no output because of empty input */ + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0xffff; +} + +/* "Convert" UTF-8 to US-ASCII: Validate and copy. */ +static void U_CALLCONV +ucnv_ASCIIFromUTF8(UConverterFromUnicodeArgs *pFromUArgs, + UConverterToUnicodeArgs *pToUArgs, + UErrorCode *pErrorCode) { + const uint8_t *source, *sourceLimit; + uint8_t *target; + int32_t targetCapacity, length; + + uint8_t c; + + if(pToUArgs->converter->toULength > 0) { + /* no handling of partial UTF-8 characters here, fall back to pivoting */ + *pErrorCode=U_USING_DEFAULT_WARNING; + return; + } + + /* set up the local pointers */ + source=(const uint8_t *)pToUArgs->source; + sourceLimit=(const uint8_t *)pToUArgs->sourceLimit; + target=(uint8_t *)pFromUArgs->target; + targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target); + + /* + * since the conversion here is 1:1 uint8_t:uint8_t, we need only one counter + * for the minimum of the sourceLength and targetCapacity + */ + length=(int32_t)(sourceLimit-source); + if(length<targetCapacity) { + targetCapacity=length; + } + + /* unroll the loop with the most common case */ + if(targetCapacity>=16) { + int32_t count, loops; + uint8_t oredChars; + + loops=count=targetCapacity>>4; + do { + oredChars=*target++=*source++; + oredChars|=*target++=*source++; + oredChars|=*target++=*source++; + oredChars|=*target++=*source++; + oredChars|=*target++=*source++; + oredChars|=*target++=*source++; + oredChars|=*target++=*source++; + oredChars|=*target++=*source++; + oredChars|=*target++=*source++; + oredChars|=*target++=*source++; + oredChars|=*target++=*source++; + oredChars|=*target++=*source++; + oredChars|=*target++=*source++; + oredChars|=*target++=*source++; + oredChars|=*target++=*source++; + oredChars|=*target++=*source++; + + /* were all 16 entries really valid? */ + if(oredChars>0x7f) { + /* no, return to the first of these 16 */ + source-=16; + target-=16; + break; + } + } while(--count>0); + count=loops-count; + targetCapacity-=16*count; + } + + /* conversion loop */ + c=0; + while(targetCapacity>0 && (c=*source)<=0x7f) { + ++source; + *target++=c; + --targetCapacity; + } + + if(c>0x7f) { + /* non-ASCII character, handle in standard converter */ + *pErrorCode=U_USING_DEFAULT_WARNING; + } else if(source<sourceLimit && target>=(const uint8_t *)pFromUArgs->targetLimit) { + /* target is full */ + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } + + /* write back the updated pointers */ + pToUArgs->source=(const char *)source; + pFromUArgs->target=(char *)target; +} + +static void U_CALLCONV +_ASCIIGetUnicodeSet(const UConverter *cnv, + const USetAdder *sa, + UConverterUnicodeSet which, + UErrorCode *pErrorCode) { + (void)cnv; + (void)which; + (void)pErrorCode; + sa->addRange(sa->set, 0, 0x7f); +} +U_CDECL_END + +static const UConverterImpl _ASCIIImpl={ + UCNV_US_ASCII, + + NULL, + NULL, + + NULL, + NULL, + NULL, + + _ASCIIToUnicodeWithOffsets, + _ASCIIToUnicodeWithOffsets, + _Latin1FromUnicodeWithOffsets, + _Latin1FromUnicodeWithOffsets, + _ASCIIGetNextUChar, + + NULL, + NULL, + NULL, + NULL, + _ASCIIGetUnicodeSet, + + NULL, + ucnv_ASCIIFromUTF8 +}; + +static const UConverterStaticData _ASCIIStaticData={ + sizeof(UConverterStaticData), + "US-ASCII", + 367, UCNV_IBM, UCNV_US_ASCII, 1, 1, + { 0x1a, 0, 0, 0 }, 1, FALSE, FALSE, + 0, + 0, + { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ +}; + +const UConverterSharedData _ASCIIData= + UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ASCIIStaticData, &_ASCIIImpl); + +#endif diff --git a/thirdparty/icu4c/common/ucnvmbcs.cpp b/thirdparty/icu4c/common/ucnvmbcs.cpp new file mode 100644 index 0000000000..ca9b0a335a --- /dev/null +++ b/thirdparty/icu4c/common/ucnvmbcs.cpp @@ -0,0 +1,5723 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 2000-2016, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* file name: ucnvmbcs.cpp +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2000jul03 +* created by: Markus W. Scherer +* +* The current code in this file replaces the previous implementation +* of conversion code from multi-byte codepages to Unicode and back. +* This implementation supports the following: +* - legacy variable-length codepages with up to 4 bytes per character +* - all Unicode code points (up to 0x10ffff) +* - efficient distinction of unassigned vs. illegal byte sequences +* - it is possible in fromUnicode() to directly deal with simple +* stateful encodings (used for EBCDIC_STATEFUL) +* - it is possible to convert Unicode code points +* to a single zero byte (but not as a fallback except for SBCS) +* +* Remaining limitations in fromUnicode: +* - byte sequences must not have leading zero bytes +* - except for SBCS codepages: no fallback mapping from Unicode to a zero byte +* - limitation to up to 4 bytes per character +* +* ICU 2.8 (late 2003) adds a secondary data structure which lifts some of these +* limitations and adds m:n character mappings and other features. +* See ucnv_ext.h for details. +* +* Change history: +* +* 5/6/2001 Ram Moved MBCS_SINGLE_RESULT_FROM_U,MBCS_STAGE_2_FROM_U, +* MBCS_VALUE_2_FROM_STAGE_2, MBCS_VALUE_4_FROM_STAGE_2 +* macros to ucnvmbcs.h file +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION + +#include "unicode/ucnv.h" +#include "unicode/ucnv_cb.h" +#include "unicode/udata.h" +#include "unicode/uset.h" +#include "unicode/utf8.h" +#include "unicode/utf16.h" +#include "ucnv_bld.h" +#include "ucnvmbcs.h" +#include "ucnv_ext.h" +#include "ucnv_cnv.h" +#include "cmemory.h" +#include "cstring.h" +#include "umutex.h" +#include "ustr_imp.h" + +/* control optimizations according to the platform */ +#define MBCS_UNROLL_SINGLE_TO_BMP 1 +#define MBCS_UNROLL_SINGLE_FROM_BMP 0 + +/* + * _MBCSHeader versions 5.3 & 4.3 + * (Note that the _MBCSHeader version is in addition to the converter formatVersion.) + * + * This version is optional. Version 5 is used for incompatible data format changes. + * makeconv will continue to generate version 4 files if possible. + * + * Changes from version 4: + * + * The main difference is an additional _MBCSHeader field with + * - the length (number of uint32_t) of the _MBCSHeader + * - flags for further incompatible data format changes + * - flags for further, backward compatible data format changes + * + * The MBCS_OPT_FROM_U flag indicates that most of the fromUnicode data is omitted from + * the file and needs to be reconstituted at load time. + * This requires a utf8Friendly format with an additional mbcsIndex table for fast + * (and UTF-8-friendly) fromUnicode conversion for Unicode code points up to maxFastUChar. + * (For details about these structures see below, and see ucnvmbcs.h.) + * + * utf8Friendly also implies that the fromUnicode mappings are stored in ascending order + * of the Unicode code points. (This requires that the .ucm file has the |0 etc. + * precision markers for all mappings.) + * + * All fallbacks have been moved to the extension table, leaving only roundtrips in the + * omitted data that can be reconstituted from the toUnicode data. + * + * Of the stage 2 table, the part corresponding to maxFastUChar and below is omitted. + * With only roundtrip mappings in the base fromUnicode data, this part is fully + * redundant with the mbcsIndex and will be reconstituted from that (also using the + * stage 1 table which contains the information about how stage 2 was compacted). + * + * The rest of the stage 2 table, the part for code points above maxFastUChar, + * is stored in the file and will be appended to the reconstituted part. + * + * The entire fromUBytes array is omitted from the file and will be reconstitued. + * This is done by enumerating all toUnicode roundtrip mappings, performing + * each mapping (using the stage 1 and reconstituted stage 2 tables) and + * writing instead of reading the byte values. + * + * _MBCSHeader version 4.3 + * + * Change from version 4.2: + * - Optional utf8Friendly data structures, with 64-entry stage 3 block + * allocation for parts of the BMP, and an additional mbcsIndex in non-SBCS + * files which can be used instead of stages 1 & 2. + * Faster lookups for roundtrips from most commonly used characters, + * and lookups from UTF-8 byte sequences with a natural bit distribution. + * See ucnvmbcs.h for more details. + * + * Change from version 4.1: + * - Added an optional extension table structure at the end of the .cnv file. + * It is present if the upper bits of the header flags field contains a non-zero + * byte offset to it. + * Files that contain only a conversion table and no base table + * use the special outputType MBCS_OUTPUT_EXT_ONLY. + * These contain the base table name between the MBCS header and the extension + * data. + * + * Change from version 4.0: + * - Replace header.reserved with header.fromUBytesLength so that all + * fields in the data have length. + * + * Changes from version 3 (for performance improvements): + * - new bit distribution for state table entries + * - reordered action codes + * - new data structure for single-byte fromUnicode + * + stage 2 only contains indexes + * + stage 3 stores 16 bits per character with classification bits 15..8 + * - no multiplier for stage 1 entries + * - stage 2 for non-single-byte codepages contains the index and the flags in + * one 32-bit value + * - 2-byte and 4-byte fromUnicode results are stored directly as 16/32-bit integers + * + * For more details about old versions of the MBCS data structure, see + * the corresponding versions of this file. + * + * Converting stateless codepage data ---------------------------------------*** + * (or codepage data with simple states) to Unicode. + * + * Data structure and algorithm for converting from complex legacy codepages + * to Unicode. (Designed before 2000-may-22.) + * + * The basic idea is that the structure of legacy codepages can be described + * with state tables. + * When reading a byte stream, each input byte causes a state transition. + * Some transitions result in the output of a code point, some result in + * "unassigned" or "illegal" output. + * This is used here for character conversion. + * + * The data structure begins with a state table consisting of a row + * per state, with 256 entries (columns) per row for each possible input + * byte value. + * Each entry is 32 bits wide, with two formats distinguished by + * the sign bit (bit 31): + * + * One format for transitional entries (bit 31 not set) for non-final bytes, and + * one format for final entries (bit 31 set). + * Both formats contain the number of the next state in the same bit + * positions. + * State 0 is the initial state. + * + * Most of the time, the offset values of subsequent states are added + * up to a scalar value. This value will eventually be the index of + * the Unicode code point in a table that follows the state table. + * The effect is that the code points for final state table rows + * are contiguous. The code points of final state rows follow each other + * in the order of the references to those final states by previous + * states, etc. + * + * For some terminal states, the offset is itself the output Unicode + * code point (16 bits for a BMP code point or 20 bits for a supplementary + * code point (stored as code point minus 0x10000 so that 20 bits are enough). + * For others, the code point in the Unicode table is stored with either + * one or two code units: one for BMP code points, two for a pair of + * surrogates. + * All code points for a final state entry take up the same number of code + * units, regardless of whether they all actually _use_ the same number + * of code units. This is necessary for simple array access. + * + * An additional feature comes in with what in ICU is called "fallback" + * mappings: + * + * In addition to round-trippable, precise, 1:1 mappings, there are often + * mappings defined between similar, though not the same, characters. + * Typically, such mappings occur only in fromUnicode mapping tables because + * Unicode has a superset repertoire of most other codepages. However, it + * is possible to provide such mappings in the toUnicode tables, too. + * In this case, the fallback mappings are partly integrated into the + * general state tables because the structure of the encoding includes their + * byte sequences. + * For final entries in an initial state, fallback mappings are stored in + * the entry itself like with roundtrip mappings. + * For other final entries, they are stored in the code units table if + * the entry is for a pair of code units. + * For single-unit results in the code units table, there is no space to + * alternatively hold a fallback mapping; in this case, the code unit + * is stored as U+fffe (unassigned), and the fallback mapping needs to + * be looked up by the scalar offset value in a separate table. + * + * "Unassigned" state entries really mean "structurally unassigned", + * i.e., such a byte sequence will never have a mapping result. + * + * The interpretation of the bits in each entry is as follows: + * + * Bit 31 not set, not a terminal entry ("transitional"): + * 30..24 next state + * 23..0 offset delta, to be added up + * + * Bit 31 set, terminal ("final") entry: + * 30..24 next state (regardless of action code) + * 23..20 action code: + * action codes 0 and 1 result in precise-mapping Unicode code points + * 0 valid byte sequence + * 19..16 not used, 0 + * 15..0 16-bit Unicode BMP code point + * never U+fffe or U+ffff + * 1 valid byte sequence + * 19..0 20-bit Unicode supplementary code point + * never U+fffe or U+ffff + * + * action codes 2 and 3 result in fallback (unidirectional-mapping) Unicode code points + * 2 valid byte sequence (fallback) + * 19..16 not used, 0 + * 15..0 16-bit Unicode BMP code point as fallback result + * 3 valid byte sequence (fallback) + * 19..0 20-bit Unicode supplementary code point as fallback result + * + * action codes 4 and 5 may result in roundtrip/fallback/unassigned/illegal results + * depending on the code units they result in + * 4 valid byte sequence + * 19..9 not used, 0 + * 8..0 final offset delta + * pointing to one 16-bit code unit which may be + * fffe unassigned -- look for a fallback for this offset + * ffff illegal + * 5 valid byte sequence + * 19..9 not used, 0 + * 8..0 final offset delta + * pointing to two 16-bit code units + * (typically UTF-16 surrogates) + * the result depends on the first code unit as follows: + * 0000..d7ff roundtrip BMP code point (1st alone) + * d800..dbff roundtrip surrogate pair (1st, 2nd) + * dc00..dfff fallback surrogate pair (1st-400, 2nd) + * e000 roundtrip BMP code point (2nd alone) + * e001 fallback BMP code point (2nd alone) + * fffe unassigned + * ffff illegal + * (the final offset deltas are at most 255 * 2, + * times 2 because of storing code unit pairs) + * + * 6 unassigned byte sequence + * 19..16 not used, 0 + * 15..0 16-bit Unicode BMP code point U+fffe (new with version 2) + * this does not contain a final offset delta because the main + * purpose of this action code is to save scalar offset values; + * therefore, fallback values cannot be assigned to byte + * sequences that result in this action code + * 7 illegal byte sequence + * 19..16 not used, 0 + * 15..0 16-bit Unicode BMP code point U+ffff (new with version 2) + * 8 state change only + * 19..0 not used, 0 + * useful for state changes in simple stateful encodings, + * at Shift-In/Shift-Out codes + * + * + * 9..15 reserved for future use + * current implementations will only perform a state change + * and ignore bits 19..0 + * + * An encoding with contiguous ranges of unassigned byte sequences, like + * Shift-JIS and especially EUC-TW, can be stored efficiently by having + * at least two states for the trail bytes: + * One trail byte state that results in code points, and one that only + * has "unassigned" and "illegal" terminal states. + * + * Note: partly by accident, this data structure supports simple stateful + * encodings without any additional logic. + * Currently, only simple Shift-In/Shift-Out schemes are handled with + * appropriate state tables (especially EBCDIC_STATEFUL!). + * + * MBCS version 2 added: + * unassigned and illegal action codes have U+fffe and U+ffff + * instead of unused bits; this is useful for _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP() + * + * Converting from Unicode to codepage bytes --------------------------------*** + * + * The conversion data structure for fromUnicode is designed for the known + * structure of Unicode. It maps from 21-bit code points (0..0x10ffff) to + * a sequence of 1..4 bytes, in addition to a flag that indicates if there is + * a roundtrip mapping. + * + * The lookup is done with a 3-stage trie, using 11/6/4 bits for stage 1/2/3 + * like in the character properties table. + * The beginning of the trie is at offsetFromUTable, the beginning of stage 3 + * with the resulting bytes is at offsetFromUBytes. + * + * Beginning with version 4, single-byte codepages have a significantly different + * trie compared to other codepages. + * In all cases, the entry in stage 1 is directly the index of the block of + * 64 entries in stage 2. + * + * Single-byte lookup: + * + * Stage 2 only contains 16-bit indexes directly to the 16-blocks in stage 3. + * Stage 3 contains one 16-bit word per result: + * Bits 15..8 indicate the kind of result: + * f roundtrip result + * c fallback result from private-use code point + * 8 fallback result from other code points + * 0 unassigned + * Bits 7..0 contain the codepage byte. A zero byte is always possible. + * + * In version 4.3, the runtime code can build an sbcsIndex for a utf8Friendly + * file. For 2-byte UTF-8 byte sequences and some 3-byte sequences the lookup + * becomes a 2-stage (single-index) trie lookup with 6 bits for stage 3. + * ASCII code points can be looked up with a linear array access into stage 3. + * See maxFastUChar and other details in ucnvmbcs.h. + * + * Multi-byte lookup: + * + * Stage 2 contains a 32-bit word for each 16-block in stage 3: + * Bits 31..16 contain flags for which stage 3 entries contain roundtrip results + * test: MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c) + * If this test is false, then a non-zero result will be interpreted as + * a fallback mapping. + * Bits 15..0 contain the index to stage 3, which must be multiplied by 16*(bytes per char) + * + * Stage 3 contains 2, 3, or 4 bytes per result. + * 2 or 4 bytes are stored as uint16_t/uint32_t in platform endianness, + * while 3 bytes are stored as bytes in big-endian order. + * Leading zero bytes are ignored, and the number of bytes is counted. + * A zero byte mapping result is possible as a roundtrip result. + * For some output types, the actual result is processed from this; + * see ucnv_MBCSFromUnicodeWithOffsets(). + * + * Note that stage 1 always contains 0x440=1088 entries (0x440==0x110000>>10), + * or (version 3 and up) for BMP-only codepages, it contains 64 entries. + * + * In version 4.3, a utf8Friendly file contains an mbcsIndex table. + * For 2-byte UTF-8 byte sequences and most 3-byte sequences the lookup + * becomes a 2-stage (single-index) trie lookup with 6 bits for stage 3. + * ASCII code points can be looked up with a linear array access into stage 3. + * See maxFastUChar, mbcsIndex and other details in ucnvmbcs.h. + * + * In version 3, stage 2 blocks may overlap by multiples of the multiplier + * for compaction. + * In version 4, stage 2 blocks (and for single-byte codepages, stage 3 blocks) + * may overlap by any number of entries. + * + * MBCS version 2 added: + * the converter checks for known output types, which allows + * adding new ones without crashing an unaware converter + */ + +/** + * Callback from ucnv_MBCSEnumToUnicode(), takes 32 mappings from + * consecutive sequences of bytes, starting from the one encoded in value, + * to Unicode code points. (Multiple mappings to reduce per-function call overhead.) + * Does not currently support m:n mappings or reverse fallbacks. + * This function will not be called for sequences of bytes with leading zeros. + * + * @param context an opaque pointer, as passed into ucnv_MBCSEnumToUnicode() + * @param value contains 1..4 bytes of the first byte sequence, right-aligned + * @param codePoints resulting Unicode code points, or negative if a byte sequence does + * not map to anything + * @return TRUE to continue enumeration, FALSE to stop + */ +typedef UBool U_CALLCONV +UConverterEnumToUCallback(const void *context, uint32_t value, UChar32 codePoints[32]); + +static void U_CALLCONV +ucnv_MBCSLoad(UConverterSharedData *sharedData, + UConverterLoadArgs *pArgs, + const uint8_t *raw, + UErrorCode *pErrorCode); + +static void U_CALLCONV +ucnv_MBCSUnload(UConverterSharedData *sharedData); + +static void U_CALLCONV +ucnv_MBCSOpen(UConverter *cnv, + UConverterLoadArgs *pArgs, + UErrorCode *pErrorCode); + +static UChar32 U_CALLCONV +ucnv_MBCSGetNextUChar(UConverterToUnicodeArgs *pArgs, + UErrorCode *pErrorCode); + +static void U_CALLCONV +ucnv_MBCSGetStarters(const UConverter* cnv, + UBool starters[256], + UErrorCode *pErrorCode); + +U_CDECL_BEGIN +static const char* U_CALLCONV +ucnv_MBCSGetName(const UConverter *cnv); +U_CDECL_END + +static void U_CALLCONV +ucnv_MBCSWriteSub(UConverterFromUnicodeArgs *pArgs, + int32_t offsetIndex, + UErrorCode *pErrorCode); + +static UChar32 U_CALLCONV +ucnv_MBCSGetNextUChar(UConverterToUnicodeArgs *pArgs, + UErrorCode *pErrorCode); + +static void U_CALLCONV +ucnv_SBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs, + UConverterToUnicodeArgs *pToUArgs, + UErrorCode *pErrorCode); + +static void U_CALLCONV +ucnv_MBCSGetUnicodeSet(const UConverter *cnv, + const USetAdder *sa, + UConverterUnicodeSet which, + UErrorCode *pErrorCode); + +static void U_CALLCONV +ucnv_DBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs, + UConverterToUnicodeArgs *pToUArgs, + UErrorCode *pErrorCode); + +static const UConverterImpl _SBCSUTF8Impl={ + UCNV_MBCS, + + ucnv_MBCSLoad, + ucnv_MBCSUnload, + + ucnv_MBCSOpen, + NULL, + NULL, + + ucnv_MBCSToUnicodeWithOffsets, + ucnv_MBCSToUnicodeWithOffsets, + ucnv_MBCSFromUnicodeWithOffsets, + ucnv_MBCSFromUnicodeWithOffsets, + ucnv_MBCSGetNextUChar, + + ucnv_MBCSGetStarters, + ucnv_MBCSGetName, + ucnv_MBCSWriteSub, + NULL, + ucnv_MBCSGetUnicodeSet, + + NULL, + ucnv_SBCSFromUTF8 +}; + +static const UConverterImpl _DBCSUTF8Impl={ + UCNV_MBCS, + + ucnv_MBCSLoad, + ucnv_MBCSUnload, + + ucnv_MBCSOpen, + NULL, + NULL, + + ucnv_MBCSToUnicodeWithOffsets, + ucnv_MBCSToUnicodeWithOffsets, + ucnv_MBCSFromUnicodeWithOffsets, + ucnv_MBCSFromUnicodeWithOffsets, + ucnv_MBCSGetNextUChar, + + ucnv_MBCSGetStarters, + ucnv_MBCSGetName, + ucnv_MBCSWriteSub, + NULL, + ucnv_MBCSGetUnicodeSet, + + NULL, + ucnv_DBCSFromUTF8 +}; + +static const UConverterImpl _MBCSImpl={ + UCNV_MBCS, + + ucnv_MBCSLoad, + ucnv_MBCSUnload, + + ucnv_MBCSOpen, + NULL, + NULL, + + ucnv_MBCSToUnicodeWithOffsets, + ucnv_MBCSToUnicodeWithOffsets, + ucnv_MBCSFromUnicodeWithOffsets, + ucnv_MBCSFromUnicodeWithOffsets, + ucnv_MBCSGetNextUChar, + + ucnv_MBCSGetStarters, + ucnv_MBCSGetName, + ucnv_MBCSWriteSub, + NULL, + ucnv_MBCSGetUnicodeSet, + NULL, + NULL +}; + +/* Static data is in tools/makeconv/ucnvstat.c for data-based + * converters. Be sure to update it as well. + */ + +const UConverterSharedData _MBCSData={ + sizeof(UConverterSharedData), 1, + NULL, NULL, FALSE, TRUE, &_MBCSImpl, + 0, UCNV_MBCS_TABLE_INITIALIZER +}; + + +/* GB 18030 data ------------------------------------------------------------ */ + +/* helper macros for linear values for GB 18030 four-byte sequences */ +#define LINEAR_18030(a, b, c, d) ((((a)*10+(b))*126L+(c))*10L+(d)) + +#define LINEAR_18030_BASE LINEAR_18030(0x81, 0x30, 0x81, 0x30) + +#define LINEAR(x) LINEAR_18030(x>>24, (x>>16)&0xff, (x>>8)&0xff, x&0xff) + +/* + * Some ranges of GB 18030 where both the Unicode code points and the + * GB four-byte sequences are contiguous and are handled algorithmically by + * the special callback functions below. + * The values are start & end of Unicode & GB codes. + * + * Note that single surrogates are not mapped by GB 18030 + * as of the re-released mapping tables from 2000-nov-30. + */ +static const uint32_t +gb18030Ranges[14][4]={ + {0x10000, 0x10FFFF, LINEAR(0x90308130), LINEAR(0xE3329A35)}, + {0x9FA6, 0xD7FF, LINEAR(0x82358F33), LINEAR(0x8336C738)}, + {0x0452, 0x1E3E, LINEAR(0x8130D330), LINEAR(0x8135F436)}, + {0x1E40, 0x200F, LINEAR(0x8135F438), LINEAR(0x8136A531)}, + {0xE865, 0xF92B, LINEAR(0x8336D030), LINEAR(0x84308534)}, + {0x2643, 0x2E80, LINEAR(0x8137A839), LINEAR(0x8138FD38)}, + {0xFA2A, 0xFE2F, LINEAR(0x84309C38), LINEAR(0x84318537)}, + {0x3CE1, 0x4055, LINEAR(0x8231D438), LINEAR(0x8232AF32)}, + {0x361B, 0x3917, LINEAR(0x8230A633), LINEAR(0x8230F237)}, + {0x49B8, 0x4C76, LINEAR(0x8234A131), LINEAR(0x8234E733)}, + {0x4160, 0x4336, LINEAR(0x8232C937), LINEAR(0x8232F837)}, + {0x478E, 0x4946, LINEAR(0x8233E838), LINEAR(0x82349638)}, + {0x44D7, 0x464B, LINEAR(0x8233A339), LINEAR(0x8233C931)}, + {0xFFE6, 0xFFFF, LINEAR(0x8431A234), LINEAR(0x8431A439)} +}; + +/* bit flag for UConverter.options indicating GB 18030 special handling */ +#define _MBCS_OPTION_GB18030 0x8000 + +/* bit flag for UConverter.options indicating KEIS,JEF,JIF special handling */ +#define _MBCS_OPTION_KEIS 0x01000 +#define _MBCS_OPTION_JEF 0x02000 +#define _MBCS_OPTION_JIPS 0x04000 + +#define KEIS_SO_CHAR_1 0x0A +#define KEIS_SO_CHAR_2 0x42 +#define KEIS_SI_CHAR_1 0x0A +#define KEIS_SI_CHAR_2 0x41 + +#define JEF_SO_CHAR 0x28 +#define JEF_SI_CHAR 0x29 + +#define JIPS_SO_CHAR_1 0x1A +#define JIPS_SO_CHAR_2 0x70 +#define JIPS_SI_CHAR_1 0x1A +#define JIPS_SI_CHAR_2 0x71 + +enum SISO_Option { + SI, + SO +}; +typedef enum SISO_Option SISO_Option; + +static int32_t getSISOBytes(SISO_Option option, uint32_t cnvOption, uint8_t *value) { + int32_t SISOLength = 0; + + switch (option) { + case SI: + if ((cnvOption&_MBCS_OPTION_KEIS)!=0) { + value[0] = KEIS_SI_CHAR_1; + value[1] = KEIS_SI_CHAR_2; + SISOLength = 2; + } else if ((cnvOption&_MBCS_OPTION_JEF)!=0) { + value[0] = JEF_SI_CHAR; + SISOLength = 1; + } else if ((cnvOption&_MBCS_OPTION_JIPS)!=0) { + value[0] = JIPS_SI_CHAR_1; + value[1] = JIPS_SI_CHAR_2; + SISOLength = 2; + } else { + value[0] = UCNV_SI; + SISOLength = 1; + } + break; + case SO: + if ((cnvOption&_MBCS_OPTION_KEIS)!=0) { + value[0] = KEIS_SO_CHAR_1; + value[1] = KEIS_SO_CHAR_2; + SISOLength = 2; + } else if ((cnvOption&_MBCS_OPTION_JEF)!=0) { + value[0] = JEF_SO_CHAR; + SISOLength = 1; + } else if ((cnvOption&_MBCS_OPTION_JIPS)!=0) { + value[0] = JIPS_SO_CHAR_1; + value[1] = JIPS_SO_CHAR_2; + SISOLength = 2; + } else { + value[0] = UCNV_SO; + SISOLength = 1; + } + break; + default: + /* Should never happen. */ + break; + } + + return SISOLength; +} + +/* Miscellaneous ------------------------------------------------------------ */ + +/* similar to ucnv_MBCSGetNextUChar() but recursive */ +static UBool +enumToU(UConverterMBCSTable *mbcsTable, int8_t stateProps[], + int32_t state, uint32_t offset, + uint32_t value, + UConverterEnumToUCallback *callback, const void *context, + UErrorCode *pErrorCode) { + UChar32 codePoints[32]; + const int32_t *row; + const uint16_t *unicodeCodeUnits; + UChar32 anyCodePoints; + int32_t b, limit; + + row=mbcsTable->stateTable[state]; + unicodeCodeUnits=mbcsTable->unicodeCodeUnits; + + value<<=8; + anyCodePoints=-1; /* becomes non-negative if there is a mapping */ + + b=(stateProps[state]&0x38)<<2; + if(b==0 && stateProps[state]>=0x40) { + /* skip byte sequences with leading zeros because they are not stored in the fromUnicode table */ + codePoints[0]=U_SENTINEL; + b=1; + } + limit=((stateProps[state]&7)+1)<<5; + while(b<limit) { + int32_t entry=row[b]; + if(MBCS_ENTRY_IS_TRANSITION(entry)) { + int32_t nextState=MBCS_ENTRY_TRANSITION_STATE(entry); + if(stateProps[nextState]>=0) { + /* recurse to a state with non-ignorable actions */ + if(!enumToU( + mbcsTable, stateProps, nextState, + offset+MBCS_ENTRY_TRANSITION_OFFSET(entry), + value|(uint32_t)b, + callback, context, + pErrorCode)) { + return FALSE; + } + } + codePoints[b&0x1f]=U_SENTINEL; + } else { + UChar32 c; + int32_t action; + + /* + * An if-else-if chain provides more reliable performance for + * the most common cases compared to a switch. + */ + action=MBCS_ENTRY_FINAL_ACTION(entry); + if(action==MBCS_STATE_VALID_DIRECT_16) { + /* output BMP code point */ + c=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); + } else if(action==MBCS_STATE_VALID_16) { + int32_t finalOffset=offset+MBCS_ENTRY_FINAL_VALUE_16(entry); + c=unicodeCodeUnits[finalOffset]; + if(c<0xfffe) { + /* output BMP code point */ + } else { + c=U_SENTINEL; + } + } else if(action==MBCS_STATE_VALID_16_PAIR) { + int32_t finalOffset=offset+MBCS_ENTRY_FINAL_VALUE_16(entry); + c=unicodeCodeUnits[finalOffset++]; + if(c<0xd800) { + /* output BMP code point below 0xd800 */ + } else if(c<=0xdbff) { + /* output roundtrip or fallback supplementary code point */ + c=((c&0x3ff)<<10)+unicodeCodeUnits[finalOffset]+(0x10000-0xdc00); + } else if(c==0xe000) { + /* output roundtrip BMP code point above 0xd800 or fallback BMP code point */ + c=unicodeCodeUnits[finalOffset]; + } else { + c=U_SENTINEL; + } + } else if(action==MBCS_STATE_VALID_DIRECT_20) { + /* output supplementary code point */ + c=(UChar32)(MBCS_ENTRY_FINAL_VALUE(entry)+0x10000); + } else { + c=U_SENTINEL; + } + + codePoints[b&0x1f]=c; + anyCodePoints&=c; + } + if(((++b)&0x1f)==0) { + if(anyCodePoints>=0) { + if(!callback(context, value|(uint32_t)(b-0x20), codePoints)) { + return FALSE; + } + anyCodePoints=-1; + } + } + } + return TRUE; +} + +/* + * Only called if stateProps[state]==-1. + * A recursive call may do stateProps[state]|=0x40 if this state is the target of an + * MBCS_STATE_CHANGE_ONLY. + */ +static int8_t +getStateProp(const int32_t (*stateTable)[256], int8_t stateProps[], int state) { + const int32_t *row; + int32_t min, max, entry, nextState; + + row=stateTable[state]; + stateProps[state]=0; + + /* find first non-ignorable state */ + for(min=0;; ++min) { + entry=row[min]; + nextState=MBCS_ENTRY_STATE(entry); + if(stateProps[nextState]==-1) { + getStateProp(stateTable, stateProps, nextState); + } + if(MBCS_ENTRY_IS_TRANSITION(entry)) { + if(stateProps[nextState]>=0) { + break; + } + } else if(MBCS_ENTRY_FINAL_ACTION(entry)<MBCS_STATE_UNASSIGNED) { + break; + } + if(min==0xff) { + stateProps[state]=-0x40; /* (int8_t)0xc0 */ + return stateProps[state]; + } + } + stateProps[state]|=(int8_t)((min>>5)<<3); + + /* find last non-ignorable state */ + for(max=0xff; min<max; --max) { + entry=row[max]; + nextState=MBCS_ENTRY_STATE(entry); + if(stateProps[nextState]==-1) { + getStateProp(stateTable, stateProps, nextState); + } + if(MBCS_ENTRY_IS_TRANSITION(entry)) { + if(stateProps[nextState]>=0) { + break; + } + } else if(MBCS_ENTRY_FINAL_ACTION(entry)<MBCS_STATE_UNASSIGNED) { + break; + } + } + stateProps[state]|=(int8_t)(max>>5); + + /* recurse further and collect direct-state information */ + while(min<=max) { + entry=row[min]; + nextState=MBCS_ENTRY_STATE(entry); + if(stateProps[nextState]==-1) { + getStateProp(stateTable, stateProps, nextState); + } + if(MBCS_ENTRY_IS_FINAL(entry)) { + stateProps[nextState]|=0x40; + if(MBCS_ENTRY_FINAL_ACTION(entry)<=MBCS_STATE_FALLBACK_DIRECT_20) { + stateProps[state]|=0x40; + } + } + ++min; + } + return stateProps[state]; +} + +/* + * Internal function enumerating the toUnicode data of an MBCS converter. + * Currently only used for reconstituting data for a MBCS_OPT_NO_FROM_U + * table, but could also be used for a future ucnv_getUnicodeSet() option + * that includes reverse fallbacks (after updating this function's implementation). + * Currently only handles roundtrip mappings. + * Does not currently handle extensions. + */ +static void +ucnv_MBCSEnumToUnicode(UConverterMBCSTable *mbcsTable, + UConverterEnumToUCallback *callback, const void *context, + UErrorCode *pErrorCode) { + /* + * Properties for each state, to speed up the enumeration. + * Ignorable actions are unassigned/illegal/state-change-only: + * They do not lead to mappings. + * + * Bits 7..6: + * 1 direct/initial state (stateful converters have multiple) + * 0 non-initial state with transitions or with non-ignorable result actions + * -1 final state with only ignorable actions + * + * Bits 5..3: + * The lowest byte value with non-ignorable actions is + * value<<5 (rounded down). + * + * Bits 2..0: + * The highest byte value with non-ignorable actions is + * (value<<5)&0x1f (rounded up). + */ + int8_t stateProps[MBCS_MAX_STATE_COUNT]; + int32_t state; + + uprv_memset(stateProps, -1, sizeof(stateProps)); + + /* recurse from state 0 and set all stateProps */ + getStateProp(mbcsTable->stateTable, stateProps, 0); + + for(state=0; state<mbcsTable->countStates; ++state) { + /*if(stateProps[state]==-1) { + printf("unused/unreachable <icu:state> %d\n", state); + }*/ + if(stateProps[state]>=0x40) { + /* start from each direct state */ + enumToU( + mbcsTable, stateProps, state, 0, 0, + callback, context, + pErrorCode); + } + } +} + +U_CFUNC void +ucnv_MBCSGetFilteredUnicodeSetForUnicode(const UConverterSharedData *sharedData, + const USetAdder *sa, + UConverterUnicodeSet which, + UConverterSetFilter filter, + UErrorCode *pErrorCode) { + const UConverterMBCSTable *mbcsTable; + const uint16_t *table; + + uint32_t st3; + uint16_t st1, maxStage1, st2; + + UChar32 c; + + /* enumerate the from-Unicode trie table */ + mbcsTable=&sharedData->mbcs; + table=mbcsTable->fromUnicodeTable; + if(mbcsTable->unicodeMask&UCNV_HAS_SUPPLEMENTARY) { + maxStage1=0x440; + } else { + maxStage1=0x40; + } + + c=0; /* keep track of the current code point while enumerating */ + + if(mbcsTable->outputType==MBCS_OUTPUT_1) { + const uint16_t *stage2, *stage3, *results; + uint16_t minValue; + + results=(const uint16_t *)mbcsTable->fromUnicodeBytes; + + /* + * Set a threshold variable for selecting which mappings to use. + * See ucnv_MBCSSingleFromBMPWithOffsets() and + * MBCS_SINGLE_RESULT_FROM_U() for details. + */ + if(which==UCNV_ROUNDTRIP_SET) { + /* use only roundtrips */ + minValue=0xf00; + } else /* UCNV_ROUNDTRIP_AND_FALLBACK_SET */ { + /* use all roundtrip and fallback results */ + minValue=0x800; + } + + for(st1=0; st1<maxStage1; ++st1) { + st2=table[st1]; + if(st2>maxStage1) { + stage2=table+st2; + for(st2=0; st2<64; ++st2) { + if((st3=stage2[st2])!=0) { + /* read the stage 3 block */ + stage3=results+st3; + + do { + if(*stage3++>=minValue) { + sa->add(sa->set, c); + } + } while((++c&0xf)!=0); + } else { + c+=16; /* empty stage 3 block */ + } + } + } else { + c+=1024; /* empty stage 2 block */ + } + } + } else { + const uint32_t *stage2; + const uint8_t *stage3, *bytes; + uint32_t st3Multiplier; + uint32_t value; + UBool useFallback; + + bytes=mbcsTable->fromUnicodeBytes; + + useFallback=(UBool)(which==UCNV_ROUNDTRIP_AND_FALLBACK_SET); + + switch(mbcsTable->outputType) { + case MBCS_OUTPUT_3: + case MBCS_OUTPUT_4_EUC: + st3Multiplier=3; + break; + case MBCS_OUTPUT_4: + st3Multiplier=4; + break; + default: + st3Multiplier=2; + break; + } + + for(st1=0; st1<maxStage1; ++st1) { + st2=table[st1]; + if(st2>(maxStage1>>1)) { + stage2=(const uint32_t *)table+st2; + for(st2=0; st2<64; ++st2) { + if((st3=stage2[st2])!=0) { + /* read the stage 3 block */ + stage3=bytes+st3Multiplier*16*(uint32_t)(uint16_t)st3; + + /* get the roundtrip flags for the stage 3 block */ + st3>>=16; + + /* + * Add code points for which the roundtrip flag is set, + * or which map to non-zero bytes if we use fallbacks. + * See ucnv_MBCSFromUnicodeWithOffsets() for details. + */ + switch(filter) { + case UCNV_SET_FILTER_NONE: + do { + if(st3&1) { + sa->add(sa->set, c); + stage3+=st3Multiplier; + } else if(useFallback) { + uint8_t b=0; + switch(st3Multiplier) { + case 4: + b|=*stage3++; + U_FALLTHROUGH; + case 3: + b|=*stage3++; + U_FALLTHROUGH; + case 2: + b|=stage3[0]|stage3[1]; + stage3+=2; + U_FALLTHROUGH; + default: + break; + } + if(b!=0) { + sa->add(sa->set, c); + } + } + st3>>=1; + } while((++c&0xf)!=0); + break; + case UCNV_SET_FILTER_DBCS_ONLY: + /* Ignore single-byte results (<0x100). */ + do { + if(((st3&1)!=0 || useFallback) && *((const uint16_t *)stage3)>=0x100) { + sa->add(sa->set, c); + } + st3>>=1; + stage3+=2; /* +=st3Multiplier */ + } while((++c&0xf)!=0); + break; + case UCNV_SET_FILTER_2022_CN: + /* Only add code points that map to CNS 11643 planes 1 & 2 for non-EXT ISO-2022-CN. */ + do { + if(((st3&1)!=0 || useFallback) && ((value=*stage3)==0x81 || value==0x82)) { + sa->add(sa->set, c); + } + st3>>=1; + stage3+=3; /* +=st3Multiplier */ + } while((++c&0xf)!=0); + break; + case UCNV_SET_FILTER_SJIS: + /* Only add code points that map to Shift-JIS codes corresponding to JIS X 0208. */ + do { + if(((st3&1)!=0 || useFallback) && (value=*((const uint16_t *)stage3))>=0x8140 && value<=0xeffc) { + sa->add(sa->set, c); + } + st3>>=1; + stage3+=2; /* +=st3Multiplier */ + } while((++c&0xf)!=0); + break; + case UCNV_SET_FILTER_GR94DBCS: + /* Only add code points that map to ISO 2022 GR 94 DBCS codes (each byte A1..FE). */ + do { + if( ((st3&1)!=0 || useFallback) && + (uint16_t)((value=*((const uint16_t *)stage3)) - 0xa1a1)<=(0xfefe - 0xa1a1) && + (uint8_t)(value-0xa1)<=(0xfe - 0xa1) + ) { + sa->add(sa->set, c); + } + st3>>=1; + stage3+=2; /* +=st3Multiplier */ + } while((++c&0xf)!=0); + break; + case UCNV_SET_FILTER_HZ: + /* Only add code points that are suitable for HZ DBCS (lead byte A1..FD). */ + do { + if( ((st3&1)!=0 || useFallback) && + (uint16_t)((value=*((const uint16_t *)stage3))-0xa1a1)<=(0xfdfe - 0xa1a1) && + (uint8_t)(value-0xa1)<=(0xfe - 0xa1) + ) { + sa->add(sa->set, c); + } + st3>>=1; + stage3+=2; /* +=st3Multiplier */ + } while((++c&0xf)!=0); + break; + default: + *pErrorCode=U_INTERNAL_PROGRAM_ERROR; + return; + } + } else { + c+=16; /* empty stage 3 block */ + } + } + } else { + c+=1024; /* empty stage 2 block */ + } + } + } + + ucnv_extGetUnicodeSet(sharedData, sa, which, filter, pErrorCode); +} + +U_CFUNC void +ucnv_MBCSGetUnicodeSetForUnicode(const UConverterSharedData *sharedData, + const USetAdder *sa, + UConverterUnicodeSet which, + UErrorCode *pErrorCode) { + ucnv_MBCSGetFilteredUnicodeSetForUnicode( + sharedData, sa, which, + sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY ? + UCNV_SET_FILTER_DBCS_ONLY : + UCNV_SET_FILTER_NONE, + pErrorCode); +} + +static void U_CALLCONV +ucnv_MBCSGetUnicodeSet(const UConverter *cnv, + const USetAdder *sa, + UConverterUnicodeSet which, + UErrorCode *pErrorCode) { + if(cnv->options&_MBCS_OPTION_GB18030) { + sa->addRange(sa->set, 0, 0xd7ff); + sa->addRange(sa->set, 0xe000, 0x10ffff); + } else { + ucnv_MBCSGetUnicodeSetForUnicode(cnv->sharedData, sa, which, pErrorCode); + } +} + +/* conversion extensions for input not in the main table -------------------- */ + +/* + * Hardcoded extension handling for GB 18030. + * Definition of LINEAR macros and gb18030Ranges see near the beginning of the file. + * + * In the future, conversion extensions may handle m:n mappings and delta tables, + * see http://source.icu-project.org/repos/icu/icuhtml/trunk/design/conversion/conversion_extensions.html + * + * If an input character cannot be mapped, then these functions set an error + * code. The framework will then call the callback function. + */ + +/* + * @return if(U_FAILURE) return the code point for cnv->fromUChar32 + * else return 0 after output has been written to the target + */ +static UChar32 +_extFromU(UConverter *cnv, const UConverterSharedData *sharedData, + UChar32 cp, + const UChar **source, const UChar *sourceLimit, + uint8_t **target, const uint8_t *targetLimit, + int32_t **offsets, int32_t sourceIndex, + UBool flush, + UErrorCode *pErrorCode) { + const int32_t *cx; + + cnv->useSubChar1=FALSE; + + if( (cx=sharedData->mbcs.extIndexes)!=NULL && + ucnv_extInitialMatchFromU( + cnv, cx, + cp, source, sourceLimit, + (char **)target, (char *)targetLimit, + offsets, sourceIndex, + flush, + pErrorCode) + ) { + return 0; /* an extension mapping handled the input */ + } + + /* GB 18030 */ + if((cnv->options&_MBCS_OPTION_GB18030)!=0) { + const uint32_t *range; + int32_t i; + + range=gb18030Ranges[0]; + for(i=0; i<UPRV_LENGTHOF(gb18030Ranges); range+=4, ++i) { + if(range[0]<=(uint32_t)cp && (uint32_t)cp<=range[1]) { + /* found the Unicode code point, output the four-byte sequence for it */ + uint32_t linear; + char bytes[4]; + + /* get the linear value of the first GB 18030 code in this range */ + linear=range[2]-LINEAR_18030_BASE; + + /* add the offset from the beginning of the range */ + linear+=((uint32_t)cp-range[0]); + + /* turn this into a four-byte sequence */ + bytes[3]=(char)(0x30+linear%10); linear/=10; + bytes[2]=(char)(0x81+linear%126); linear/=126; + bytes[1]=(char)(0x30+linear%10); linear/=10; + bytes[0]=(char)(0x81+linear); + + /* output this sequence */ + ucnv_fromUWriteBytes(cnv, + bytes, 4, (char **)target, (char *)targetLimit, + offsets, sourceIndex, pErrorCode); + return 0; + } + } + } + + /* no mapping */ + *pErrorCode=U_INVALID_CHAR_FOUND; + return cp; +} + +/* + * Input sequence: cnv->toUBytes[0..length[ + * @return if(U_FAILURE) return the length (toULength, byteIndex) for the input + * else return 0 after output has been written to the target + */ +static int8_t +_extToU(UConverter *cnv, const UConverterSharedData *sharedData, + int8_t length, + const uint8_t **source, const uint8_t *sourceLimit, + UChar **target, const UChar *targetLimit, + int32_t **offsets, int32_t sourceIndex, + UBool flush, + UErrorCode *pErrorCode) { + const int32_t *cx; + + if( (cx=sharedData->mbcs.extIndexes)!=NULL && + ucnv_extInitialMatchToU( + cnv, cx, + length, (const char **)source, (const char *)sourceLimit, + target, targetLimit, + offsets, sourceIndex, + flush, + pErrorCode) + ) { + return 0; /* an extension mapping handled the input */ + } + + /* GB 18030 */ + if(length==4 && (cnv->options&_MBCS_OPTION_GB18030)!=0) { + const uint32_t *range; + uint32_t linear; + int32_t i; + + linear=LINEAR_18030(cnv->toUBytes[0], cnv->toUBytes[1], cnv->toUBytes[2], cnv->toUBytes[3]); + range=gb18030Ranges[0]; + for(i=0; i<UPRV_LENGTHOF(gb18030Ranges); range+=4, ++i) { + if(range[2]<=linear && linear<=range[3]) { + /* found the sequence, output the Unicode code point for it */ + *pErrorCode=U_ZERO_ERROR; + + /* add the linear difference between the input and start sequences to the start code point */ + linear=range[0]+(linear-range[2]); + + /* output this code point */ + ucnv_toUWriteCodePoint(cnv, linear, target, targetLimit, offsets, sourceIndex, pErrorCode); + + return 0; + } + } + } + + /* no mapping */ + *pErrorCode=U_INVALID_CHAR_FOUND; + return length; +} + +/* EBCDIC swap LF<->NL ------------------------------------------------------ */ + +/* + * This code modifies a standard EBCDIC<->Unicode mapping table for + * OS/390 (z/OS) Unix System Services (Open Edition). + * The difference is in the mapping of Line Feed and New Line control codes: + * Standard EBCDIC maps + * + * <U000A> \x25 |0 + * <U0085> \x15 |0 + * + * but OS/390 USS EBCDIC swaps the control codes for LF and NL, + * mapping + * + * <U000A> \x15 |0 + * <U0085> \x25 |0 + * + * This code modifies a loaded standard EBCDIC<->Unicode mapping table + * by copying it into allocated memory and swapping the LF and NL values. + * It allows to support the same EBCDIC charset in both versions without + * duplicating the entire installed table. + */ + +/* standard EBCDIC codes */ +#define EBCDIC_LF 0x25 +#define EBCDIC_NL 0x15 + +/* standard EBCDIC codes with roundtrip flag as stored in Unicode-to-single-byte tables */ +#define EBCDIC_RT_LF 0xf25 +#define EBCDIC_RT_NL 0xf15 + +/* Unicode code points */ +#define U_LF 0x0a +#define U_NL 0x85 + +static UBool +_EBCDICSwapLFNL(UConverterSharedData *sharedData, UErrorCode *pErrorCode) { + UConverterMBCSTable *mbcsTable; + + const uint16_t *table, *results; + const uint8_t *bytes; + + int32_t (*newStateTable)[256]; + uint16_t *newResults; + uint8_t *p; + char *name; + + uint32_t stage2Entry; + uint32_t size, sizeofFromUBytes; + + mbcsTable=&sharedData->mbcs; + + table=mbcsTable->fromUnicodeTable; + bytes=mbcsTable->fromUnicodeBytes; + results=(const uint16_t *)bytes; + + /* + * Check that this is an EBCDIC table with SBCS portion - + * SBCS or EBCDIC_STATEFUL with standard EBCDIC LF and NL mappings. + * + * If not, ignore the option. Options are always ignored if they do not apply. + */ + if(!( + (mbcsTable->outputType==MBCS_OUTPUT_1 || mbcsTable->outputType==MBCS_OUTPUT_2_SISO) && + mbcsTable->stateTable[0][EBCDIC_LF]==MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_LF) && + mbcsTable->stateTable[0][EBCDIC_NL]==MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_NL) + )) { + return FALSE; + } + + if(mbcsTable->outputType==MBCS_OUTPUT_1) { + if(!( + EBCDIC_RT_LF==MBCS_SINGLE_RESULT_FROM_U(table, results, U_LF) && + EBCDIC_RT_NL==MBCS_SINGLE_RESULT_FROM_U(table, results, U_NL) + )) { + return FALSE; + } + } else /* MBCS_OUTPUT_2_SISO */ { + stage2Entry=MBCS_STAGE_2_FROM_U(table, U_LF); + if(!( + MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, U_LF)!=0 && + EBCDIC_LF==MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, U_LF) + )) { + return FALSE; + } + + stage2Entry=MBCS_STAGE_2_FROM_U(table, U_NL); + if(!( + MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, U_NL)!=0 && + EBCDIC_NL==MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, U_NL) + )) { + return FALSE; + } + } + + if(mbcsTable->fromUBytesLength>0) { + /* + * We _know_ the number of bytes in the fromUnicodeBytes array + * starting with header.version 4.1. + */ + sizeofFromUBytes=mbcsTable->fromUBytesLength; + } else { + /* + * Otherwise: + * There used to be code to enumerate the fromUnicode + * trie and find the highest entry, but it was removed in ICU 3.2 + * because it was not tested and caused a low code coverage number. + * See Jitterbug 3674. + * This affects only some .cnv file formats with a header.version + * below 4.1, and only when swaplfnl is requested. + * + * ucnvmbcs.c revision 1.99 is the last one with the + * ucnv_MBCSSizeofFromUBytes() function. + */ + *pErrorCode=U_INVALID_FORMAT_ERROR; + return FALSE; + } + + /* + * The table has an appropriate format. + * Allocate and build + * - a modified to-Unicode state table + * - a modified from-Unicode output array + * - a converter name string with the swap option appended + */ + size= + mbcsTable->countStates*1024+ + sizeofFromUBytes+ + UCNV_MAX_CONVERTER_NAME_LENGTH+20; + p=(uint8_t *)uprv_malloc(size); + if(p==NULL) { + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + return FALSE; + } + + /* copy and modify the to-Unicode state table */ + newStateTable=(int32_t (*)[256])p; + uprv_memcpy(newStateTable, mbcsTable->stateTable, mbcsTable->countStates*1024); + + newStateTable[0][EBCDIC_LF]=MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_NL); + newStateTable[0][EBCDIC_NL]=MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, U_LF); + + /* copy and modify the from-Unicode result table */ + newResults=(uint16_t *)newStateTable[mbcsTable->countStates]; + uprv_memcpy(newResults, bytes, sizeofFromUBytes); + + /* conveniently, the table access macros work on the left side of expressions */ + if(mbcsTable->outputType==MBCS_OUTPUT_1) { + MBCS_SINGLE_RESULT_FROM_U(table, newResults, U_LF)=EBCDIC_RT_NL; + MBCS_SINGLE_RESULT_FROM_U(table, newResults, U_NL)=EBCDIC_RT_LF; + } else /* MBCS_OUTPUT_2_SISO */ { + stage2Entry=MBCS_STAGE_2_FROM_U(table, U_LF); + MBCS_VALUE_2_FROM_STAGE_2(newResults, stage2Entry, U_LF)=EBCDIC_NL; + + stage2Entry=MBCS_STAGE_2_FROM_U(table, U_NL); + MBCS_VALUE_2_FROM_STAGE_2(newResults, stage2Entry, U_NL)=EBCDIC_LF; + } + + /* set the canonical converter name */ + name=(char *)newResults+sizeofFromUBytes; + uprv_strcpy(name, sharedData->staticData->name); + uprv_strcat(name, UCNV_SWAP_LFNL_OPTION_STRING); + + /* set the pointers */ + icu::umtx_lock(NULL); + if(mbcsTable->swapLFNLStateTable==NULL) { + mbcsTable->swapLFNLStateTable=newStateTable; + mbcsTable->swapLFNLFromUnicodeBytes=(uint8_t *)newResults; + mbcsTable->swapLFNLName=name; + + newStateTable=NULL; + } + icu::umtx_unlock(NULL); + + /* release the allocated memory if another thread beat us to it */ + if(newStateTable!=NULL) { + uprv_free(newStateTable); + } + return TRUE; +} + +/* reconstitute omitted fromUnicode data ------------------------------------ */ + +/* for details, compare with genmbcs.c MBCSAddFromUnicode() and transformEUC() */ +static UBool U_CALLCONV +writeStage3Roundtrip(const void *context, uint32_t value, UChar32 codePoints[32]) { + UConverterMBCSTable *mbcsTable=(UConverterMBCSTable *)context; + const uint16_t *table; + uint32_t *stage2; + uint8_t *bytes, *p; + UChar32 c; + int32_t i, st3; + + table=mbcsTable->fromUnicodeTable; + bytes=(uint8_t *)mbcsTable->fromUnicodeBytes; + + /* for EUC outputTypes, modify the value like genmbcs.c's transformEUC() */ + switch(mbcsTable->outputType) { + case MBCS_OUTPUT_3_EUC: + if(value<=0xffff) { + /* short sequences are stored directly */ + /* code set 0 or 1 */ + } else if(value<=0x8effff) { + /* code set 2 */ + value&=0x7fff; + } else /* first byte is 0x8f */ { + /* code set 3 */ + value&=0xff7f; + } + break; + case MBCS_OUTPUT_4_EUC: + if(value<=0xffffff) { + /* short sequences are stored directly */ + /* code set 0 or 1 */ + } else if(value<=0x8effffff) { + /* code set 2 */ + value&=0x7fffff; + } else /* first byte is 0x8f */ { + /* code set 3 */ + value&=0xff7fff; + } + break; + default: + break; + } + + for(i=0; i<=0x1f; ++value, ++i) { + c=codePoints[i]; + if(c<0) { + continue; + } + + /* locate the stage 2 & 3 data */ + stage2=((uint32_t *)table)+table[c>>10]+((c>>4)&0x3f); + p=bytes; + st3=(int32_t)(uint16_t)*stage2*16+(c&0xf); + + /* write the codepage bytes into stage 3 */ + switch(mbcsTable->outputType) { + case MBCS_OUTPUT_3: + case MBCS_OUTPUT_4_EUC: + p+=st3*3; + p[0]=(uint8_t)(value>>16); + p[1]=(uint8_t)(value>>8); + p[2]=(uint8_t)value; + break; + case MBCS_OUTPUT_4: + ((uint32_t *)p)[st3]=value; + break; + default: + /* 2 bytes per character */ + ((uint16_t *)p)[st3]=(uint16_t)value; + break; + } + + /* set the roundtrip flag */ + *stage2|=(1UL<<(16+(c&0xf))); + } + return TRUE; + } + +static void +reconstituteData(UConverterMBCSTable *mbcsTable, + uint32_t stage1Length, uint32_t stage2Length, + uint32_t fullStage2Length, /* lengths are numbers of units, not bytes */ + UErrorCode *pErrorCode) { + uint16_t *stage1; + uint32_t *stage2; + uint32_t dataLength=stage1Length*2+fullStage2Length*4+mbcsTable->fromUBytesLength; + mbcsTable->reconstitutedData=(uint8_t *)uprv_malloc(dataLength); + if(mbcsTable->reconstitutedData==NULL) { + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + return; + } + uprv_memset(mbcsTable->reconstitutedData, 0, dataLength); + + /* copy existing data and reroute the pointers */ + stage1=(uint16_t *)mbcsTable->reconstitutedData; + uprv_memcpy(stage1, mbcsTable->fromUnicodeTable, stage1Length*2); + + stage2=(uint32_t *)(stage1+stage1Length); + uprv_memcpy(stage2+(fullStage2Length-stage2Length), + mbcsTable->fromUnicodeTable+stage1Length, + stage2Length*4); + + mbcsTable->fromUnicodeTable=stage1; + mbcsTable->fromUnicodeBytes=(uint8_t *)(stage2+fullStage2Length); + + /* indexes into stage 2 count from the bottom of the fromUnicodeTable */ + stage2=(uint32_t *)stage1; + + /* reconstitute the initial part of stage 2 from the mbcsIndex */ + { + int32_t stageUTF8Length=((int32_t)mbcsTable->maxFastUChar+1)>>6; + int32_t stageUTF8Index=0; + int32_t st1, st2, st3, i; + + for(st1=0; stageUTF8Index<stageUTF8Length; ++st1) { + st2=stage1[st1]; + if(st2!=(int32_t)stage1Length/2) { + /* each stage 2 block has 64 entries corresponding to 16 entries in the mbcsIndex */ + for(i=0; i<16; ++i) { + st3=mbcsTable->mbcsIndex[stageUTF8Index++]; + if(st3!=0) { + /* an stage 2 entry's index is per stage 3 16-block, not per stage 3 entry */ + st3>>=4; + /* + * 4 stage 2 entries point to 4 consecutive stage 3 16-blocks which are + * allocated together as a single 64-block for access from the mbcsIndex + */ + stage2[st2++]=st3++; + stage2[st2++]=st3++; + stage2[st2++]=st3++; + stage2[st2++]=st3; + } else { + /* no stage 3 block, skip */ + st2+=4; + } + } + } else { + /* no stage 2 block, skip */ + stageUTF8Index+=16; + } + } + } + + /* reconstitute fromUnicodeBytes with roundtrips from toUnicode data */ + ucnv_MBCSEnumToUnicode(mbcsTable, writeStage3Roundtrip, mbcsTable, pErrorCode); +} + +/* MBCS setup functions ----------------------------------------------------- */ + +static void U_CALLCONV +ucnv_MBCSLoad(UConverterSharedData *sharedData, + UConverterLoadArgs *pArgs, + const uint8_t *raw, + UErrorCode *pErrorCode) { + UDataInfo info; + UConverterMBCSTable *mbcsTable=&sharedData->mbcs; + _MBCSHeader *header=(_MBCSHeader *)raw; + uint32_t offset; + uint32_t headerLength; + UBool noFromU=FALSE; + + if(header->version[0]==4) { + headerLength=MBCS_HEADER_V4_LENGTH; + } else if(header->version[0]==5 && header->version[1]>=3 && + (header->options&MBCS_OPT_UNKNOWN_INCOMPATIBLE_MASK)==0) { + headerLength=header->options&MBCS_OPT_LENGTH_MASK; + noFromU=(UBool)((header->options&MBCS_OPT_NO_FROM_U)!=0); + } else { + *pErrorCode=U_INVALID_TABLE_FORMAT; + return; + } + + mbcsTable->outputType=(uint8_t)header->flags; + if(noFromU && mbcsTable->outputType==MBCS_OUTPUT_1) { + *pErrorCode=U_INVALID_TABLE_FORMAT; + return; + } + + /* extension data, header version 4.2 and higher */ + offset=header->flags>>8; + if(offset!=0) { + mbcsTable->extIndexes=(const int32_t *)(raw+offset); + } + + if(mbcsTable->outputType==MBCS_OUTPUT_EXT_ONLY) { + UConverterLoadArgs args=UCNV_LOAD_ARGS_INITIALIZER; + UConverterSharedData *baseSharedData; + const int32_t *extIndexes; + const char *baseName; + + /* extension-only file, load the base table and set values appropriately */ + if((extIndexes=mbcsTable->extIndexes)==NULL) { + /* extension-only file without extension */ + *pErrorCode=U_INVALID_TABLE_FORMAT; + return; + } + + if(pArgs->nestedLoads!=1) { + /* an extension table must not be loaded as a base table */ + *pErrorCode=U_INVALID_TABLE_FILE; + return; + } + + /* load the base table */ + baseName=(const char *)header+headerLength*4; + if(0==uprv_strcmp(baseName, sharedData->staticData->name)) { + /* forbid loading this same extension-only file */ + *pErrorCode=U_INVALID_TABLE_FORMAT; + return; + } + + /* TODO parse package name out of the prefix of the base name in the extension .cnv file? */ + args.size=sizeof(UConverterLoadArgs); + args.nestedLoads=2; + args.onlyTestIsLoadable=pArgs->onlyTestIsLoadable; + args.reserved=pArgs->reserved; + args.options=pArgs->options; + args.pkg=pArgs->pkg; + args.name=baseName; + baseSharedData=ucnv_load(&args, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + return; + } + if( baseSharedData->staticData->conversionType!=UCNV_MBCS || + baseSharedData->mbcs.baseSharedData!=NULL + ) { + ucnv_unload(baseSharedData); + *pErrorCode=U_INVALID_TABLE_FORMAT; + return; + } + if(pArgs->onlyTestIsLoadable) { + /* + * Exit as soon as we know that we can load the converter + * and the format is valid and supported. + * The worst that can happen in the following code is a memory + * allocation error. + */ + ucnv_unload(baseSharedData); + return; + } + + /* copy the base table data */ + uprv_memcpy(mbcsTable, &baseSharedData->mbcs, sizeof(UConverterMBCSTable)); + + /* overwrite values with relevant ones for the extension converter */ + mbcsTable->baseSharedData=baseSharedData; + mbcsTable->extIndexes=extIndexes; + + /* + * It would be possible to share the swapLFNL data with a base converter, + * but the generated name would have to be different, and the memory + * would have to be free'd only once. + * It is easier to just create the data for the extension converter + * separately when it is requested. + */ + mbcsTable->swapLFNLStateTable=NULL; + mbcsTable->swapLFNLFromUnicodeBytes=NULL; + mbcsTable->swapLFNLName=NULL; + + /* + * The reconstitutedData must be deleted only when the base converter + * is unloaded. + */ + mbcsTable->reconstitutedData=NULL; + + /* + * Set a special, runtime-only outputType if the extension converter + * is a DBCS version of a base converter that also maps single bytes. + */ + if( sharedData->staticData->conversionType==UCNV_DBCS || + (sharedData->staticData->conversionType==UCNV_MBCS && + sharedData->staticData->minBytesPerChar>=2) + ) { + if(baseSharedData->mbcs.outputType==MBCS_OUTPUT_2_SISO) { + /* the base converter is SI/SO-stateful */ + int32_t entry; + + /* get the dbcs state from the state table entry for SO=0x0e */ + entry=mbcsTable->stateTable[0][0xe]; + if( MBCS_ENTRY_IS_FINAL(entry) && + MBCS_ENTRY_FINAL_ACTION(entry)==MBCS_STATE_CHANGE_ONLY && + MBCS_ENTRY_FINAL_STATE(entry)!=0 + ) { + mbcsTable->dbcsOnlyState=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); + + mbcsTable->outputType=MBCS_OUTPUT_DBCS_ONLY; + } + } else if( + baseSharedData->staticData->conversionType==UCNV_MBCS && + baseSharedData->staticData->minBytesPerChar==1 && + baseSharedData->staticData->maxBytesPerChar==2 && + mbcsTable->countStates<=127 + ) { + /* non-stateful base converter, need to modify the state table */ + int32_t (*newStateTable)[256]; + int32_t *state; + int32_t i, count; + + /* allocate a new state table and copy the base state table contents */ + count=mbcsTable->countStates; + newStateTable=(int32_t (*)[256])uprv_malloc((count+1)*1024); + if(newStateTable==NULL) { + ucnv_unload(baseSharedData); + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + return; + } + + uprv_memcpy(newStateTable, mbcsTable->stateTable, count*1024); + + /* change all final single-byte entries to go to a new all-illegal state */ + state=newStateTable[0]; + for(i=0; i<256; ++i) { + if(MBCS_ENTRY_IS_FINAL(state[i])) { + state[i]=MBCS_ENTRY_TRANSITION(count, 0); + } + } + + /* build the new all-illegal state */ + state=newStateTable[count]; + for(i=0; i<256; ++i) { + state[i]=MBCS_ENTRY_FINAL(0, MBCS_STATE_ILLEGAL, 0); + } + mbcsTable->stateTable=(const int32_t (*)[256])newStateTable; + mbcsTable->countStates=(uint8_t)(count+1); + mbcsTable->stateTableOwned=TRUE; + + mbcsTable->outputType=MBCS_OUTPUT_DBCS_ONLY; + } + } + + /* + * unlike below for files with base tables, do not get the unicodeMask + * from the sharedData; instead, use the base table's unicodeMask, + * which we copied in the memcpy above; + * this is necessary because the static data unicodeMask, especially + * the UCNV_HAS_SUPPLEMENTARY flag, is part of the base table data + */ + } else { + /* conversion file with a base table; an additional extension table is optional */ + /* make sure that the output type is known */ + switch(mbcsTable->outputType) { + case MBCS_OUTPUT_1: + case MBCS_OUTPUT_2: + case MBCS_OUTPUT_3: + case MBCS_OUTPUT_4: + case MBCS_OUTPUT_3_EUC: + case MBCS_OUTPUT_4_EUC: + case MBCS_OUTPUT_2_SISO: + /* OK */ + break; + default: + *pErrorCode=U_INVALID_TABLE_FORMAT; + return; + } + if(pArgs->onlyTestIsLoadable) { + /* + * Exit as soon as we know that we can load the converter + * and the format is valid and supported. + * The worst that can happen in the following code is a memory + * allocation error. + */ + return; + } + + mbcsTable->countStates=(uint8_t)header->countStates; + mbcsTable->countToUFallbacks=header->countToUFallbacks; + mbcsTable->stateTable=(const int32_t (*)[256])(raw+headerLength*4); + mbcsTable->toUFallbacks=(const _MBCSToUFallback *)(mbcsTable->stateTable+header->countStates); + mbcsTable->unicodeCodeUnits=(const uint16_t *)(raw+header->offsetToUCodeUnits); + + mbcsTable->fromUnicodeTable=(const uint16_t *)(raw+header->offsetFromUTable); + mbcsTable->fromUnicodeBytes=(const uint8_t *)(raw+header->offsetFromUBytes); + mbcsTable->fromUBytesLength=header->fromUBytesLength; + + /* + * converter versions 6.1 and up contain a unicodeMask that is + * used here to select the most efficient function implementations + */ + info.size=sizeof(UDataInfo); + udata_getInfo((UDataMemory *)sharedData->dataMemory, &info); + if(info.formatVersion[0]>6 || (info.formatVersion[0]==6 && info.formatVersion[1]>=1)) { + /* mask off possible future extensions to be safe */ + mbcsTable->unicodeMask=(uint8_t)(sharedData->staticData->unicodeMask&3); + } else { + /* for older versions, assume worst case: contains anything possible (prevent over-optimizations) */ + mbcsTable->unicodeMask=UCNV_HAS_SUPPLEMENTARY|UCNV_HAS_SURROGATES; + } + + /* + * _MBCSHeader.version 4.3 adds utf8Friendly data structures. + * Check for the header version, SBCS vs. MBCS, and for whether the + * data structures are optimized for code points as high as what the + * runtime code is designed for. + * The implementation does not handle mapping tables with entries for + * unpaired surrogates. + */ + if( header->version[1]>=3 && + (mbcsTable->unicodeMask&UCNV_HAS_SURROGATES)==0 && + (mbcsTable->countStates==1 ? + (header->version[2]>=(SBCS_FAST_MAX>>8)) : + (header->version[2]>=(MBCS_FAST_MAX>>8)) + ) + ) { + mbcsTable->utf8Friendly=TRUE; + + if(mbcsTable->countStates==1) { + /* + * SBCS: Stage 3 is allocated in 64-entry blocks for U+0000..SBCS_FAST_MAX or higher. + * Build a table with indexes to each block, to be used instead of + * the regular stage 1/2 table. + */ + int32_t i; + for(i=0; i<(SBCS_FAST_LIMIT>>6); ++i) { + mbcsTable->sbcsIndex[i]=mbcsTable->fromUnicodeTable[mbcsTable->fromUnicodeTable[i>>4]+((i<<2)&0x3c)]; + } + /* set SBCS_FAST_MAX to reflect the reach of sbcsIndex[] even if header->version[2]>(SBCS_FAST_MAX>>8) */ + mbcsTable->maxFastUChar=SBCS_FAST_MAX; + } else { + /* + * MBCS: Stage 3 is allocated in 64-entry blocks for U+0000..MBCS_FAST_MAX or higher. + * The .cnv file is prebuilt with an additional stage table with indexes + * to each block. + */ + mbcsTable->mbcsIndex=(const uint16_t *) + (mbcsTable->fromUnicodeBytes+ + (noFromU ? 0 : mbcsTable->fromUBytesLength)); + mbcsTable->maxFastUChar=(((UChar)header->version[2])<<8)|0xff; + } + } + + /* calculate a bit set of 4 ASCII characters per bit that round-trip to ASCII bytes */ + { + uint32_t asciiRoundtrips=0xffffffff; + int32_t i; + + for(i=0; i<0x80; ++i) { + if(mbcsTable->stateTable[0][i]!=MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, i)) { + asciiRoundtrips&=~((uint32_t)1<<(i>>2)); + } + } + mbcsTable->asciiRoundtrips=asciiRoundtrips; + } + + if(noFromU) { + uint32_t stage1Length= + mbcsTable->unicodeMask&UCNV_HAS_SUPPLEMENTARY ? + 0x440 : 0x40; + uint32_t stage2Length= + (header->offsetFromUBytes-header->offsetFromUTable)/4- + stage1Length/2; + reconstituteData(mbcsTable, stage1Length, stage2Length, header->fullStage2Length, pErrorCode); + } + } + + /* Set the impl pointer here so that it is set for both extension-only and base tables. */ + if(mbcsTable->utf8Friendly) { + if(mbcsTable->countStates==1) { + sharedData->impl=&_SBCSUTF8Impl; + } else { + if(mbcsTable->outputType==MBCS_OUTPUT_2) { + sharedData->impl=&_DBCSUTF8Impl; + } + } + } + + if(mbcsTable->outputType==MBCS_OUTPUT_DBCS_ONLY || mbcsTable->outputType==MBCS_OUTPUT_2_SISO) { + /* + * MBCS_OUTPUT_DBCS_ONLY: No SBCS mappings, therefore ASCII does not roundtrip. + * MBCS_OUTPUT_2_SISO: Bypass the ASCII fastpath to handle prevLength correctly. + */ + mbcsTable->asciiRoundtrips=0; + } +} + +static void U_CALLCONV +ucnv_MBCSUnload(UConverterSharedData *sharedData) { + UConverterMBCSTable *mbcsTable=&sharedData->mbcs; + + if(mbcsTable->swapLFNLStateTable!=NULL) { + uprv_free(mbcsTable->swapLFNLStateTable); + } + if(mbcsTable->stateTableOwned) { + uprv_free((void *)mbcsTable->stateTable); + } + if(mbcsTable->baseSharedData!=NULL) { + ucnv_unload(mbcsTable->baseSharedData); + } + if(mbcsTable->reconstitutedData!=NULL) { + uprv_free(mbcsTable->reconstitutedData); + } +} + +static void U_CALLCONV +ucnv_MBCSOpen(UConverter *cnv, + UConverterLoadArgs *pArgs, + UErrorCode *pErrorCode) { + UConverterMBCSTable *mbcsTable; + const int32_t *extIndexes; + uint8_t outputType; + int8_t maxBytesPerUChar; + + if(pArgs->onlyTestIsLoadable) { + return; + } + + mbcsTable=&cnv->sharedData->mbcs; + outputType=mbcsTable->outputType; + + if(outputType==MBCS_OUTPUT_DBCS_ONLY) { + /* the swaplfnl option does not apply, remove it */ + cnv->options=pArgs->options&=~UCNV_OPTION_SWAP_LFNL; + } + + if((pArgs->options&UCNV_OPTION_SWAP_LFNL)!=0) { + /* do this because double-checked locking is broken */ + UBool isCached; + + icu::umtx_lock(NULL); + isCached=mbcsTable->swapLFNLStateTable!=NULL; + icu::umtx_unlock(NULL); + + if(!isCached) { + if(!_EBCDICSwapLFNL(cnv->sharedData, pErrorCode)) { + if(U_FAILURE(*pErrorCode)) { + return; /* something went wrong */ + } + + /* the option does not apply, remove it */ + cnv->options=pArgs->options&=~UCNV_OPTION_SWAP_LFNL; + } + } + } + + if(uprv_strstr(pArgs->name, "18030")!=NULL) { + if(uprv_strstr(pArgs->name, "gb18030")!=NULL || uprv_strstr(pArgs->name, "GB18030")!=NULL) { + /* set a flag for GB 18030 mode, which changes the callback behavior */ + cnv->options|=_MBCS_OPTION_GB18030; + } + } else if((uprv_strstr(pArgs->name, "KEIS")!=NULL) || (uprv_strstr(pArgs->name, "keis")!=NULL)) { + /* set a flag for KEIS converter, which changes the SI/SO character sequence */ + cnv->options|=_MBCS_OPTION_KEIS; + } else if((uprv_strstr(pArgs->name, "JEF")!=NULL) || (uprv_strstr(pArgs->name, "jef")!=NULL)) { + /* set a flag for JEF converter, which changes the SI/SO character sequence */ + cnv->options|=_MBCS_OPTION_JEF; + } else if((uprv_strstr(pArgs->name, "JIPS")!=NULL) || (uprv_strstr(pArgs->name, "jips")!=NULL)) { + /* set a flag for JIPS converter, which changes the SI/SO character sequence */ + cnv->options|=_MBCS_OPTION_JIPS; + } + + /* fix maxBytesPerUChar depending on outputType and options etc. */ + if(outputType==MBCS_OUTPUT_2_SISO) { + cnv->maxBytesPerUChar=3; /* SO+DBCS */ + } + + extIndexes=mbcsTable->extIndexes; + if(extIndexes!=NULL) { + maxBytesPerUChar=(int8_t)UCNV_GET_MAX_BYTES_PER_UCHAR(extIndexes); + if(outputType==MBCS_OUTPUT_2_SISO) { + ++maxBytesPerUChar; /* SO + multiple DBCS */ + } + + if(maxBytesPerUChar>cnv->maxBytesPerUChar) { + cnv->maxBytesPerUChar=maxBytesPerUChar; + } + } + +#if 0 + /* + * documentation of UConverter fields used for status + * all of these fields are (re)set to 0 by ucnv_bld.c and ucnv_reset() + */ + + /* toUnicode */ + cnv->toUnicodeStatus=0; /* offset */ + cnv->mode=0; /* state */ + cnv->toULength=0; /* byteIndex */ + + /* fromUnicode */ + cnv->fromUChar32=0; + cnv->fromUnicodeStatus=1; /* prevLength */ +#endif +} + +U_CDECL_BEGIN + +static const char* U_CALLCONV +ucnv_MBCSGetName(const UConverter *cnv) { + if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0 && cnv->sharedData->mbcs.swapLFNLName!=NULL) { + return cnv->sharedData->mbcs.swapLFNLName; + } else { + return cnv->sharedData->staticData->name; + } +} +U_CDECL_END + + +/* MBCS-to-Unicode conversion functions ------------------------------------- */ + +static UChar32 U_CALLCONV +ucnv_MBCSGetFallback(UConverterMBCSTable *mbcsTable, uint32_t offset) { + const _MBCSToUFallback *toUFallbacks; + uint32_t i, start, limit; + + limit=mbcsTable->countToUFallbacks; + if(limit>0) { + /* do a binary search for the fallback mapping */ + toUFallbacks=mbcsTable->toUFallbacks; + start=0; + while(start<limit-1) { + i=(start+limit)/2; + if(offset<toUFallbacks[i].offset) { + limit=i; + } else { + start=i; + } + } + + /* did we really find it? */ + if(offset==toUFallbacks[start].offset) { + return toUFallbacks[start].codePoint; + } + } + + return 0xfffe; +} + +/* This version of ucnv_MBCSToUnicodeWithOffsets() is optimized for single-byte, single-state codepages. */ +static void +ucnv_MBCSSingleToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, + UErrorCode *pErrorCode) { + UConverter *cnv; + const uint8_t *source, *sourceLimit; + UChar *target; + const UChar *targetLimit; + int32_t *offsets; + + const int32_t (*stateTable)[256]; + + int32_t sourceIndex; + + int32_t entry; + UChar c; + uint8_t action; + + /* set up the local pointers */ + cnv=pArgs->converter; + source=(const uint8_t *)pArgs->source; + sourceLimit=(const uint8_t *)pArgs->sourceLimit; + target=pArgs->target; + targetLimit=pArgs->targetLimit; + offsets=pArgs->offsets; + + if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) { + stateTable=(const int32_t (*)[256])cnv->sharedData->mbcs.swapLFNLStateTable; + } else { + stateTable=cnv->sharedData->mbcs.stateTable; + } + + /* sourceIndex=-1 if the current character began in the previous buffer */ + sourceIndex=0; + + /* conversion loop */ + while(source<sourceLimit) { + /* + * This following test is to see if available input would overflow the output. + * It does not catch output of more than one code unit that + * overflows as a result of a surrogate pair or callback output + * from the last source byte. + * Therefore, those situations also test for overflows and will + * then break the loop, too. + */ + if(target>=targetLimit) { + /* target is full */ + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + break; + } + + entry=stateTable[0][*source++]; + /* MBCS_ENTRY_IS_FINAL(entry) */ + + /* test the most common case first */ + if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) { + /* output BMP code point */ + *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); + if(offsets!=NULL) { + *offsets++=sourceIndex; + } + + /* normal end of action codes: prepare for a new character */ + ++sourceIndex; + continue; + } + + /* + * An if-else-if chain provides more reliable performance for + * the most common cases compared to a switch. + */ + action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry)); + if(action==MBCS_STATE_VALID_DIRECT_20 || + (action==MBCS_STATE_FALLBACK_DIRECT_20 && UCNV_TO_U_USE_FALLBACK(cnv)) + ) { + entry=MBCS_ENTRY_FINAL_VALUE(entry); + /* output surrogate pair */ + *target++=(UChar)(0xd800|(UChar)(entry>>10)); + if(offsets!=NULL) { + *offsets++=sourceIndex; + } + c=(UChar)(0xdc00|(UChar)(entry&0x3ff)); + if(target<targetLimit) { + *target++=c; + if(offsets!=NULL) { + *offsets++=sourceIndex; + } + } else { + /* target overflow */ + cnv->UCharErrorBuffer[0]=c; + cnv->UCharErrorBufferLength=1; + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + break; + } + + ++sourceIndex; + continue; + } else if(action==MBCS_STATE_FALLBACK_DIRECT_16) { + if(UCNV_TO_U_USE_FALLBACK(cnv)) { + /* output BMP code point */ + *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); + if(offsets!=NULL) { + *offsets++=sourceIndex; + } + + ++sourceIndex; + continue; + } + } else if(action==MBCS_STATE_UNASSIGNED) { + /* just fall through */ + } else if(action==MBCS_STATE_ILLEGAL) { + /* callback(illegal) */ + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + } else { + /* reserved, must never occur */ + ++sourceIndex; + continue; + } + + if(U_FAILURE(*pErrorCode)) { + /* callback(illegal) */ + break; + } else /* unassigned sequences indicated with byteIndex>0 */ { + /* try an extension mapping */ + pArgs->source=(const char *)source; + cnv->toUBytes[0]=*(source-1); + cnv->toULength=_extToU(cnv, cnv->sharedData, + 1, &source, sourceLimit, + &target, targetLimit, + &offsets, sourceIndex, + pArgs->flush, + pErrorCode); + sourceIndex+=1+(int32_t)(source-(const uint8_t *)pArgs->source); + + if(U_FAILURE(*pErrorCode)) { + /* not mappable or buffer overflow */ + break; + } + } + } + + /* write back the updated pointers */ + pArgs->source=(const char *)source; + pArgs->target=target; + pArgs->offsets=offsets; +} + +/* + * This version of ucnv_MBCSSingleToUnicodeWithOffsets() is optimized for single-byte, single-state codepages + * that only map to and from the BMP. + * In addition to single-byte optimizations, the offset calculations + * become much easier. + */ +static void +ucnv_MBCSSingleToBMPWithOffsets(UConverterToUnicodeArgs *pArgs, + UErrorCode *pErrorCode) { + UConverter *cnv; + const uint8_t *source, *sourceLimit, *lastSource; + UChar *target; + int32_t targetCapacity, length; + int32_t *offsets; + + const int32_t (*stateTable)[256]; + + int32_t sourceIndex; + + int32_t entry; + uint8_t action; + + /* set up the local pointers */ + cnv=pArgs->converter; + source=(const uint8_t *)pArgs->source; + sourceLimit=(const uint8_t *)pArgs->sourceLimit; + target=pArgs->target; + targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target); + offsets=pArgs->offsets; + + if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) { + stateTable=(const int32_t (*)[256])cnv->sharedData->mbcs.swapLFNLStateTable; + } else { + stateTable=cnv->sharedData->mbcs.stateTable; + } + + /* sourceIndex=-1 if the current character began in the previous buffer */ + sourceIndex=0; + lastSource=source; + + /* + * since the conversion here is 1:1 UChar:uint8_t, we need only one counter + * for the minimum of the sourceLength and targetCapacity + */ + length=(int32_t)(sourceLimit-source); + if(length<targetCapacity) { + targetCapacity=length; + } + +#if MBCS_UNROLL_SINGLE_TO_BMP + /* unrolling makes it faster on Pentium III/Windows 2000 */ + /* unroll the loop with the most common case */ +unrolled: + if(targetCapacity>=16) { + int32_t count, loops, oredEntries; + + loops=count=targetCapacity>>4; + do { + oredEntries=entry=stateTable[0][*source++]; + *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); + oredEntries|=entry=stateTable[0][*source++]; + *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); + oredEntries|=entry=stateTable[0][*source++]; + *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); + oredEntries|=entry=stateTable[0][*source++]; + *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); + oredEntries|=entry=stateTable[0][*source++]; + *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); + oredEntries|=entry=stateTable[0][*source++]; + *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); + oredEntries|=entry=stateTable[0][*source++]; + *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); + oredEntries|=entry=stateTable[0][*source++]; + *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); + oredEntries|=entry=stateTable[0][*source++]; + *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); + oredEntries|=entry=stateTable[0][*source++]; + *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); + oredEntries|=entry=stateTable[0][*source++]; + *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); + oredEntries|=entry=stateTable[0][*source++]; + *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); + oredEntries|=entry=stateTable[0][*source++]; + *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); + oredEntries|=entry=stateTable[0][*source++]; + *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); + oredEntries|=entry=stateTable[0][*source++]; + *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); + oredEntries|=entry=stateTable[0][*source++]; + *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); + + /* were all 16 entries really valid? */ + if(!MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(oredEntries)) { + /* no, return to the first of these 16 */ + source-=16; + target-=16; + break; + } + } while(--count>0); + count=loops-count; + targetCapacity-=16*count; + + if(offsets!=NULL) { + lastSource+=16*count; + while(count>0) { + *offsets++=sourceIndex++; + *offsets++=sourceIndex++; + *offsets++=sourceIndex++; + *offsets++=sourceIndex++; + *offsets++=sourceIndex++; + *offsets++=sourceIndex++; + *offsets++=sourceIndex++; + *offsets++=sourceIndex++; + *offsets++=sourceIndex++; + *offsets++=sourceIndex++; + *offsets++=sourceIndex++; + *offsets++=sourceIndex++; + *offsets++=sourceIndex++; + *offsets++=sourceIndex++; + *offsets++=sourceIndex++; + *offsets++=sourceIndex++; + --count; + } + } + } +#endif + + /* conversion loop */ + while(targetCapacity > 0 && source < sourceLimit) { + entry=stateTable[0][*source++]; + /* MBCS_ENTRY_IS_FINAL(entry) */ + + /* test the most common case first */ + if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) { + /* output BMP code point */ + *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); + --targetCapacity; + continue; + } + + /* + * An if-else-if chain provides more reliable performance for + * the most common cases compared to a switch. + */ + action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry)); + if(action==MBCS_STATE_FALLBACK_DIRECT_16) { + if(UCNV_TO_U_USE_FALLBACK(cnv)) { + /* output BMP code point */ + *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); + --targetCapacity; + continue; + } + } else if(action==MBCS_STATE_UNASSIGNED) { + /* just fall through */ + } else if(action==MBCS_STATE_ILLEGAL) { + /* callback(illegal) */ + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + } else { + /* reserved, must never occur */ + continue; + } + + /* set offsets since the start or the last extension */ + if(offsets!=NULL) { + int32_t count=(int32_t)(source-lastSource); + + /* predecrement: do not set the offset for the callback-causing character */ + while(--count>0) { + *offsets++=sourceIndex++; + } + /* offset and sourceIndex are now set for the current character */ + } + + if(U_FAILURE(*pErrorCode)) { + /* callback(illegal) */ + break; + } else /* unassigned sequences indicated with byteIndex>0 */ { + /* try an extension mapping */ + lastSource=source; + cnv->toUBytes[0]=*(source-1); + cnv->toULength=_extToU(cnv, cnv->sharedData, + 1, &source, sourceLimit, + &target, pArgs->targetLimit, + &offsets, sourceIndex, + pArgs->flush, + pErrorCode); + sourceIndex+=1+(int32_t)(source-lastSource); + + if(U_FAILURE(*pErrorCode)) { + /* not mappable or buffer overflow */ + break; + } + + /* recalculate the targetCapacity after an extension mapping */ + targetCapacity=(int32_t)(pArgs->targetLimit-target); + length=(int32_t)(sourceLimit-source); + if(length<targetCapacity) { + targetCapacity=length; + } + } + +#if MBCS_UNROLL_SINGLE_TO_BMP + /* unrolling makes it faster on Pentium III/Windows 2000 */ + goto unrolled; +#endif + } + + if(U_SUCCESS(*pErrorCode) && source<sourceLimit && target>=pArgs->targetLimit) { + /* target is full */ + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } + + /* set offsets since the start or the last callback */ + if(offsets!=NULL) { + size_t count=source-lastSource; + while(count>0) { + *offsets++=sourceIndex++; + --count; + } + } + + /* write back the updated pointers */ + pArgs->source=(const char *)source; + pArgs->target=target; + pArgs->offsets=offsets; +} + +static UBool +hasValidTrailBytes(const int32_t (*stateTable)[256], uint8_t state) { + const int32_t *row=stateTable[state]; + int32_t b, entry; + /* First test for final entries in this state for some commonly valid byte values. */ + entry=row[0xa1]; + if( !MBCS_ENTRY_IS_TRANSITION(entry) && + MBCS_ENTRY_FINAL_ACTION(entry)!=MBCS_STATE_ILLEGAL + ) { + return TRUE; + } + entry=row[0x41]; + if( !MBCS_ENTRY_IS_TRANSITION(entry) && + MBCS_ENTRY_FINAL_ACTION(entry)!=MBCS_STATE_ILLEGAL + ) { + return TRUE; + } + /* Then test for final entries in this state. */ + for(b=0; b<=0xff; ++b) { + entry=row[b]; + if( !MBCS_ENTRY_IS_TRANSITION(entry) && + MBCS_ENTRY_FINAL_ACTION(entry)!=MBCS_STATE_ILLEGAL + ) { + return TRUE; + } + } + /* Then recurse for transition entries. */ + for(b=0; b<=0xff; ++b) { + entry=row[b]; + if( MBCS_ENTRY_IS_TRANSITION(entry) && + hasValidTrailBytes(stateTable, (uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry)) + ) { + return TRUE; + } + } + return FALSE; +} + +/* + * Is byte b a single/lead byte in this state? + * Recurse for transition states, because here we don't want to say that + * b is a lead byte if all byte sequences that start with b are illegal. + */ +static UBool +isSingleOrLead(const int32_t (*stateTable)[256], uint8_t state, UBool isDBCSOnly, uint8_t b) { + const int32_t *row=stateTable[state]; + int32_t entry=row[b]; + if(MBCS_ENTRY_IS_TRANSITION(entry)) { /* lead byte */ + return hasValidTrailBytes(stateTable, (uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry)); + } else { + uint8_t action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry)); + if(action==MBCS_STATE_CHANGE_ONLY && isDBCSOnly) { + return FALSE; /* SI/SO are illegal for DBCS-only conversion */ + } else { + return action!=MBCS_STATE_ILLEGAL; + } + } +} + +U_CFUNC void +ucnv_MBCSToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, + UErrorCode *pErrorCode) { + UConverter *cnv; + const uint8_t *source, *sourceLimit; + UChar *target; + const UChar *targetLimit; + int32_t *offsets; + + const int32_t (*stateTable)[256]; + const uint16_t *unicodeCodeUnits; + + uint32_t offset; + uint8_t state; + int8_t byteIndex; + uint8_t *bytes; + + int32_t sourceIndex, nextSourceIndex; + + int32_t entry; + UChar c; + uint8_t action; + + /* use optimized function if possible */ + cnv=pArgs->converter; + + if(cnv->preToULength>0) { + /* + * pass sourceIndex=-1 because we continue from an earlier buffer + * in the future, this may change with continuous offsets + */ + ucnv_extContinueMatchToU(cnv, pArgs, -1, pErrorCode); + + if(U_FAILURE(*pErrorCode) || cnv->preToULength<0) { + return; + } + } + + if(cnv->sharedData->mbcs.countStates==1) { + if(!(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) { + ucnv_MBCSSingleToBMPWithOffsets(pArgs, pErrorCode); + } else { + ucnv_MBCSSingleToUnicodeWithOffsets(pArgs, pErrorCode); + } + return; + } + + /* set up the local pointers */ + source=(const uint8_t *)pArgs->source; + sourceLimit=(const uint8_t *)pArgs->sourceLimit; + target=pArgs->target; + targetLimit=pArgs->targetLimit; + offsets=pArgs->offsets; + + if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) { + stateTable=(const int32_t (*)[256])cnv->sharedData->mbcs.swapLFNLStateTable; + } else { + stateTable=cnv->sharedData->mbcs.stateTable; + } + unicodeCodeUnits=cnv->sharedData->mbcs.unicodeCodeUnits; + + /* get the converter state from UConverter */ + offset=cnv->toUnicodeStatus; + byteIndex=cnv->toULength; + bytes=cnv->toUBytes; + + /* + * if we are in the SBCS state for a DBCS-only converter, + * then load the DBCS state from the MBCS data + * (dbcsOnlyState==0 if it is not a DBCS-only converter) + */ + if((state=(uint8_t)(cnv->mode))==0) { + state=cnv->sharedData->mbcs.dbcsOnlyState; + } + + /* sourceIndex=-1 if the current character began in the previous buffer */ + sourceIndex=byteIndex==0 ? 0 : -1; + nextSourceIndex=0; + + /* conversion loop */ + while(source<sourceLimit) { + /* + * This following test is to see if available input would overflow the output. + * It does not catch output of more than one code unit that + * overflows as a result of a surrogate pair or callback output + * from the last source byte. + * Therefore, those situations also test for overflows and will + * then break the loop, too. + */ + if(target>=targetLimit) { + /* target is full */ + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + break; + } + + if(byteIndex==0) { + /* optimized loop for 1/2-byte input and BMP output */ + if(offsets==NULL) { + do { + entry=stateTable[state][*source]; + if(MBCS_ENTRY_IS_TRANSITION(entry)) { + state=(uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry); + offset=MBCS_ENTRY_TRANSITION_OFFSET(entry); + + ++source; + if( source<sourceLimit && + MBCS_ENTRY_IS_FINAL(entry=stateTable[state][*source]) && + MBCS_ENTRY_FINAL_ACTION(entry)==MBCS_STATE_VALID_16 && + (c=unicodeCodeUnits[offset+MBCS_ENTRY_FINAL_VALUE_16(entry)])<0xfffe + ) { + ++source; + *target++=c; + state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */ + offset=0; + } else { + /* set the state and leave the optimized loop */ + bytes[0]=*(source-1); + byteIndex=1; + break; + } + } else { + if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) { + /* output BMP code point */ + ++source; + *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); + state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */ + } else { + /* leave the optimized loop */ + break; + } + } + } while(source<sourceLimit && target<targetLimit); + } else /* offsets!=NULL */ { + do { + entry=stateTable[state][*source]; + if(MBCS_ENTRY_IS_TRANSITION(entry)) { + state=(uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry); + offset=MBCS_ENTRY_TRANSITION_OFFSET(entry); + + ++source; + if( source<sourceLimit && + MBCS_ENTRY_IS_FINAL(entry=stateTable[state][*source]) && + MBCS_ENTRY_FINAL_ACTION(entry)==MBCS_STATE_VALID_16 && + (c=unicodeCodeUnits[offset+MBCS_ENTRY_FINAL_VALUE_16(entry)])<0xfffe + ) { + ++source; + *target++=c; + if(offsets!=NULL) { + *offsets++=sourceIndex; + sourceIndex=(nextSourceIndex+=2); + } + state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */ + offset=0; + } else { + /* set the state and leave the optimized loop */ + ++nextSourceIndex; + bytes[0]=*(source-1); + byteIndex=1; + break; + } + } else { + if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) { + /* output BMP code point */ + ++source; + *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); + if(offsets!=NULL) { + *offsets++=sourceIndex; + sourceIndex=++nextSourceIndex; + } + state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */ + } else { + /* leave the optimized loop */ + break; + } + } + } while(source<sourceLimit && target<targetLimit); + } + + /* + * these tests and break statements could be put inside the loop + * if C had "break outerLoop" like Java + */ + if(source>=sourceLimit) { + break; + } + if(target>=targetLimit) { + /* target is full */ + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + break; + } + + ++nextSourceIndex; + bytes[byteIndex++]=*source++; + } else /* byteIndex>0 */ { + ++nextSourceIndex; + entry=stateTable[state][bytes[byteIndex++]=*source++]; + } + + if(MBCS_ENTRY_IS_TRANSITION(entry)) { + state=(uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry); + offset+=MBCS_ENTRY_TRANSITION_OFFSET(entry); + continue; + } + + /* save the previous state for proper extension mapping with SI/SO-stateful converters */ + cnv->mode=state; + + /* set the next state early so that we can reuse the entry variable */ + state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */ + + /* + * An if-else-if chain provides more reliable performance for + * the most common cases compared to a switch. + */ + action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry)); + if(action==MBCS_STATE_VALID_16) { + offset+=MBCS_ENTRY_FINAL_VALUE_16(entry); + c=unicodeCodeUnits[offset]; + if(c<0xfffe) { + /* output BMP code point */ + *target++=c; + if(offsets!=NULL) { + *offsets++=sourceIndex; + } + byteIndex=0; + } else if(c==0xfffe) { + if(UCNV_TO_U_USE_FALLBACK(cnv) && (entry=(int32_t)ucnv_MBCSGetFallback(&cnv->sharedData->mbcs, offset))!=0xfffe) { + /* output fallback BMP code point */ + *target++=(UChar)entry; + if(offsets!=NULL) { + *offsets++=sourceIndex; + } + byteIndex=0; + } + } else { + /* callback(illegal) */ + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + } + } else if(action==MBCS_STATE_VALID_DIRECT_16) { + /* output BMP code point */ + *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); + if(offsets!=NULL) { + *offsets++=sourceIndex; + } + byteIndex=0; + } else if(action==MBCS_STATE_VALID_16_PAIR) { + offset+=MBCS_ENTRY_FINAL_VALUE_16(entry); + c=unicodeCodeUnits[offset++]; + if(c<0xd800) { + /* output BMP code point below 0xd800 */ + *target++=c; + if(offsets!=NULL) { + *offsets++=sourceIndex; + } + byteIndex=0; + } else if(UCNV_TO_U_USE_FALLBACK(cnv) ? c<=0xdfff : c<=0xdbff) { + /* output roundtrip or fallback surrogate pair */ + *target++=(UChar)(c&0xdbff); + if(offsets!=NULL) { + *offsets++=sourceIndex; + } + byteIndex=0; + if(target<targetLimit) { + *target++=unicodeCodeUnits[offset]; + if(offsets!=NULL) { + *offsets++=sourceIndex; + } + } else { + /* target overflow */ + cnv->UCharErrorBuffer[0]=unicodeCodeUnits[offset]; + cnv->UCharErrorBufferLength=1; + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + + offset=0; + break; + } + } else if(UCNV_TO_U_USE_FALLBACK(cnv) ? (c&0xfffe)==0xe000 : c==0xe000) { + /* output roundtrip BMP code point above 0xd800 or fallback BMP code point */ + *target++=unicodeCodeUnits[offset]; + if(offsets!=NULL) { + *offsets++=sourceIndex; + } + byteIndex=0; + } else if(c==0xffff) { + /* callback(illegal) */ + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + } + } else if(action==MBCS_STATE_VALID_DIRECT_20 || + (action==MBCS_STATE_FALLBACK_DIRECT_20 && UCNV_TO_U_USE_FALLBACK(cnv)) + ) { + entry=MBCS_ENTRY_FINAL_VALUE(entry); + /* output surrogate pair */ + *target++=(UChar)(0xd800|(UChar)(entry>>10)); + if(offsets!=NULL) { + *offsets++=sourceIndex; + } + byteIndex=0; + c=(UChar)(0xdc00|(UChar)(entry&0x3ff)); + if(target<targetLimit) { + *target++=c; + if(offsets!=NULL) { + *offsets++=sourceIndex; + } + } else { + /* target overflow */ + cnv->UCharErrorBuffer[0]=c; + cnv->UCharErrorBufferLength=1; + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + + offset=0; + break; + } + } else if(action==MBCS_STATE_CHANGE_ONLY) { + /* + * This serves as a state change without any output. + * It is useful for reading simple stateful encodings, + * for example using just Shift-In/Shift-Out codes. + * The 21 unused bits may later be used for more sophisticated + * state transitions. + */ + if(cnv->sharedData->mbcs.dbcsOnlyState==0) { + byteIndex=0; + } else { + /* SI/SO are illegal for DBCS-only conversion */ + state=(uint8_t)(cnv->mode); /* restore the previous state */ + + /* callback(illegal) */ + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + } + } else if(action==MBCS_STATE_FALLBACK_DIRECT_16) { + if(UCNV_TO_U_USE_FALLBACK(cnv)) { + /* output BMP code point */ + *target++=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); + if(offsets!=NULL) { + *offsets++=sourceIndex; + } + byteIndex=0; + } + } else if(action==MBCS_STATE_UNASSIGNED) { + /* just fall through */ + } else if(action==MBCS_STATE_ILLEGAL) { + /* callback(illegal) */ + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + } else { + /* reserved, must never occur */ + byteIndex=0; + } + + /* end of action codes: prepare for a new character */ + offset=0; + + if(byteIndex==0) { + sourceIndex=nextSourceIndex; + } else if(U_FAILURE(*pErrorCode)) { + /* callback(illegal) */ + if(byteIndex>1) { + /* + * Ticket 5691: consistent illegal sequences: + * - We include at least the first byte in the illegal sequence. + * - If any of the non-initial bytes could be the start of a character, + * we stop the illegal sequence before the first one of those. + */ + UBool isDBCSOnly=(UBool)(cnv->sharedData->mbcs.dbcsOnlyState!=0); + int8_t i; + for(i=1; + i<byteIndex && !isSingleOrLead(stateTable, state, isDBCSOnly, bytes[i]); + ++i) {} + if(i<byteIndex) { + /* Back out some bytes. */ + int8_t backOutDistance=byteIndex-i; + int32_t bytesFromThisBuffer=(int32_t)(source-(const uint8_t *)pArgs->source); + byteIndex=i; /* length of reported illegal byte sequence */ + if(backOutDistance<=bytesFromThisBuffer) { + source-=backOutDistance; + } else { + /* Back out bytes from the previous buffer: Need to replay them. */ + cnv->preToULength=(int8_t)(bytesFromThisBuffer-backOutDistance); + /* preToULength is negative! */ + uprv_memcpy(cnv->preToU, bytes+i, -cnv->preToULength); + source=(const uint8_t *)pArgs->source; + } + } + } + break; + } else /* unassigned sequences indicated with byteIndex>0 */ { + /* try an extension mapping */ + pArgs->source=(const char *)source; + byteIndex=_extToU(cnv, cnv->sharedData, + byteIndex, &source, sourceLimit, + &target, targetLimit, + &offsets, sourceIndex, + pArgs->flush, + pErrorCode); + sourceIndex=nextSourceIndex+=(int32_t)(source-(const uint8_t *)pArgs->source); + + if(U_FAILURE(*pErrorCode)) { + /* not mappable or buffer overflow */ + break; + } + } + } + + /* set the converter state back into UConverter */ + cnv->toUnicodeStatus=offset; + cnv->mode=state; + cnv->toULength=byteIndex; + + /* write back the updated pointers */ + pArgs->source=(const char *)source; + pArgs->target=target; + pArgs->offsets=offsets; +} + +/* + * This version of ucnv_MBCSGetNextUChar() is optimized for single-byte, single-state codepages. + * We still need a conversion loop in case we find reserved action codes, which are to be ignored. + */ +static UChar32 +ucnv_MBCSSingleGetNextUChar(UConverterToUnicodeArgs *pArgs, + UErrorCode *pErrorCode) { + UConverter *cnv; + const int32_t (*stateTable)[256]; + const uint8_t *source, *sourceLimit; + + int32_t entry; + uint8_t action; + + /* set up the local pointers */ + cnv=pArgs->converter; + source=(const uint8_t *)pArgs->source; + sourceLimit=(const uint8_t *)pArgs->sourceLimit; + if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) { + stateTable=(const int32_t (*)[256])cnv->sharedData->mbcs.swapLFNLStateTable; + } else { + stateTable=cnv->sharedData->mbcs.stateTable; + } + + /* conversion loop */ + while(source<sourceLimit) { + entry=stateTable[0][*source++]; + /* MBCS_ENTRY_IS_FINAL(entry) */ + + /* write back the updated pointer early so that we can return directly */ + pArgs->source=(const char *)source; + + if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) { + /* output BMP code point */ + return (UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); + } + + /* + * An if-else-if chain provides more reliable performance for + * the most common cases compared to a switch. + */ + action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry)); + if( action==MBCS_STATE_VALID_DIRECT_20 || + (action==MBCS_STATE_FALLBACK_DIRECT_20 && UCNV_TO_U_USE_FALLBACK(cnv)) + ) { + /* output supplementary code point */ + return (UChar32)(MBCS_ENTRY_FINAL_VALUE(entry)+0x10000); + } else if(action==MBCS_STATE_FALLBACK_DIRECT_16) { + if(UCNV_TO_U_USE_FALLBACK(cnv)) { + /* output BMP code point */ + return (UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); + } + } else if(action==MBCS_STATE_UNASSIGNED) { + /* just fall through */ + } else if(action==MBCS_STATE_ILLEGAL) { + /* callback(illegal) */ + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + } else { + /* reserved, must never occur */ + continue; + } + + if(U_FAILURE(*pErrorCode)) { + /* callback(illegal) */ + break; + } else /* unassigned sequence */ { + /* defer to the generic implementation */ + pArgs->source=(const char *)source-1; + return UCNV_GET_NEXT_UCHAR_USE_TO_U; + } + } + + /* no output because of empty input or only state changes */ + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0xffff; +} + +/* + * Version of _MBCSToUnicodeWithOffsets() optimized for single-character + * conversion without offset handling. + * + * When a character does not have a mapping to Unicode, then we return to the + * generic ucnv_getNextUChar() code for extension/GB 18030 and error/callback + * handling. + * We also defer to the generic code in other complicated cases and have them + * ultimately handled by _MBCSToUnicodeWithOffsets() itself. + * + * All normal mappings and errors are handled here. + */ +static UChar32 U_CALLCONV +ucnv_MBCSGetNextUChar(UConverterToUnicodeArgs *pArgs, + UErrorCode *pErrorCode) { + UConverter *cnv; + const uint8_t *source, *sourceLimit, *lastSource; + + const int32_t (*stateTable)[256]; + const uint16_t *unicodeCodeUnits; + + uint32_t offset; + uint8_t state; + + int32_t entry; + UChar32 c; + uint8_t action; + + /* use optimized function if possible */ + cnv=pArgs->converter; + + if(cnv->preToULength>0) { + /* use the generic code in ucnv_getNextUChar() to continue with a partial match */ + return UCNV_GET_NEXT_UCHAR_USE_TO_U; + } + + if(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SURROGATES) { + /* + * Using the generic ucnv_getNextUChar() code lets us deal correctly + * with the rare case of a codepage that maps single surrogates + * without adding the complexity to this already complicated function here. + */ + return UCNV_GET_NEXT_UCHAR_USE_TO_U; + } else if(cnv->sharedData->mbcs.countStates==1) { + return ucnv_MBCSSingleGetNextUChar(pArgs, pErrorCode); + } + + /* set up the local pointers */ + source=lastSource=(const uint8_t *)pArgs->source; + sourceLimit=(const uint8_t *)pArgs->sourceLimit; + + if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) { + stateTable=(const int32_t (*)[256])cnv->sharedData->mbcs.swapLFNLStateTable; + } else { + stateTable=cnv->sharedData->mbcs.stateTable; + } + unicodeCodeUnits=cnv->sharedData->mbcs.unicodeCodeUnits; + + /* get the converter state from UConverter */ + offset=cnv->toUnicodeStatus; + + /* + * if we are in the SBCS state for a DBCS-only converter, + * then load the DBCS state from the MBCS data + * (dbcsOnlyState==0 if it is not a DBCS-only converter) + */ + if((state=(uint8_t)(cnv->mode))==0) { + state=cnv->sharedData->mbcs.dbcsOnlyState; + } + + /* conversion loop */ + c=U_SENTINEL; + while(source<sourceLimit) { + entry=stateTable[state][*source++]; + if(MBCS_ENTRY_IS_TRANSITION(entry)) { + state=(uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry); + offset+=MBCS_ENTRY_TRANSITION_OFFSET(entry); + + /* optimization for 1/2-byte input and BMP output */ + if( source<sourceLimit && + MBCS_ENTRY_IS_FINAL(entry=stateTable[state][*source]) && + MBCS_ENTRY_FINAL_ACTION(entry)==MBCS_STATE_VALID_16 && + (c=unicodeCodeUnits[offset+MBCS_ENTRY_FINAL_VALUE_16(entry)])<0xfffe + ) { + ++source; + state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */ + /* output BMP code point */ + break; + } + } else { + /* save the previous state for proper extension mapping with SI/SO-stateful converters */ + cnv->mode=state; + + /* set the next state early so that we can reuse the entry variable */ + state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry); /* typically 0 */ + + /* + * An if-else-if chain provides more reliable performance for + * the most common cases compared to a switch. + */ + action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry)); + if(action==MBCS_STATE_VALID_DIRECT_16) { + /* output BMP code point */ + c=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); + break; + } else if(action==MBCS_STATE_VALID_16) { + offset+=MBCS_ENTRY_FINAL_VALUE_16(entry); + c=unicodeCodeUnits[offset]; + if(c<0xfffe) { + /* output BMP code point */ + break; + } else if(c==0xfffe) { + if(UCNV_TO_U_USE_FALLBACK(cnv) && (c=ucnv_MBCSGetFallback(&cnv->sharedData->mbcs, offset))!=0xfffe) { + break; + } + } else { + /* callback(illegal) */ + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + } + } else if(action==MBCS_STATE_VALID_16_PAIR) { + offset+=MBCS_ENTRY_FINAL_VALUE_16(entry); + c=unicodeCodeUnits[offset++]; + if(c<0xd800) { + /* output BMP code point below 0xd800 */ + break; + } else if(UCNV_TO_U_USE_FALLBACK(cnv) ? c<=0xdfff : c<=0xdbff) { + /* output roundtrip or fallback supplementary code point */ + c=((c&0x3ff)<<10)+unicodeCodeUnits[offset]+(0x10000-0xdc00); + break; + } else if(UCNV_TO_U_USE_FALLBACK(cnv) ? (c&0xfffe)==0xe000 : c==0xe000) { + /* output roundtrip BMP code point above 0xd800 or fallback BMP code point */ + c=unicodeCodeUnits[offset]; + break; + } else if(c==0xffff) { + /* callback(illegal) */ + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + } + } else if(action==MBCS_STATE_VALID_DIRECT_20 || + (action==MBCS_STATE_FALLBACK_DIRECT_20 && UCNV_TO_U_USE_FALLBACK(cnv)) + ) { + /* output supplementary code point */ + c=(UChar32)(MBCS_ENTRY_FINAL_VALUE(entry)+0x10000); + break; + } else if(action==MBCS_STATE_CHANGE_ONLY) { + /* + * This serves as a state change without any output. + * It is useful for reading simple stateful encodings, + * for example using just Shift-In/Shift-Out codes. + * The 21 unused bits may later be used for more sophisticated + * state transitions. + */ + if(cnv->sharedData->mbcs.dbcsOnlyState!=0) { + /* SI/SO are illegal for DBCS-only conversion */ + state=(uint8_t)(cnv->mode); /* restore the previous state */ + + /* callback(illegal) */ + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + } + } else if(action==MBCS_STATE_FALLBACK_DIRECT_16) { + if(UCNV_TO_U_USE_FALLBACK(cnv)) { + /* output BMP code point */ + c=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); + break; + } + } else if(action==MBCS_STATE_UNASSIGNED) { + /* just fall through */ + } else if(action==MBCS_STATE_ILLEGAL) { + /* callback(illegal) */ + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + } else { + /* reserved (must never occur), or only state change */ + offset=0; + lastSource=source; + continue; + } + + /* end of action codes: prepare for a new character */ + offset=0; + + if(U_FAILURE(*pErrorCode)) { + /* callback(illegal) */ + break; + } else /* unassigned sequence */ { + /* defer to the generic implementation */ + cnv->toUnicodeStatus=0; + cnv->mode=state; + pArgs->source=(const char *)lastSource; + return UCNV_GET_NEXT_UCHAR_USE_TO_U; + } + } + } + + if(c<0) { + if(U_SUCCESS(*pErrorCode) && source==sourceLimit && lastSource<source) { + /* incomplete character byte sequence */ + uint8_t *bytes=cnv->toUBytes; + cnv->toULength=(int8_t)(source-lastSource); + do { + *bytes++=*lastSource++; + } while(lastSource<source); + *pErrorCode=U_TRUNCATED_CHAR_FOUND; + } else if(U_FAILURE(*pErrorCode)) { + /* callback(illegal) */ + /* + * Ticket 5691: consistent illegal sequences: + * - We include at least the first byte in the illegal sequence. + * - If any of the non-initial bytes could be the start of a character, + * we stop the illegal sequence before the first one of those. + */ + UBool isDBCSOnly=(UBool)(cnv->sharedData->mbcs.dbcsOnlyState!=0); + uint8_t *bytes=cnv->toUBytes; + *bytes++=*lastSource++; /* first byte */ + if(lastSource==source) { + cnv->toULength=1; + } else /* lastSource<source: multi-byte character */ { + int8_t i; + for(i=1; + lastSource<source && !isSingleOrLead(stateTable, state, isDBCSOnly, *lastSource); + ++i + ) { + *bytes++=*lastSource++; + } + cnv->toULength=i; + source=lastSource; + } + } else { + /* no output because of empty input or only state changes */ + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + } + c=0xffff; + } + + /* set the converter state back into UConverter, ready for a new character */ + cnv->toUnicodeStatus=0; + cnv->mode=state; + + /* write back the updated pointer */ + pArgs->source=(const char *)source; + return c; +} + +#if 0 +/* + * Code disabled 2002dec09 (ICU 2.4) because it is not currently used in ICU. markus + * Removal improves code coverage. + */ +/** + * This version of ucnv_MBCSSimpleGetNextUChar() is optimized for single-byte, single-state codepages. + * It does not handle the EBCDIC swaplfnl option (set in UConverter). + * It does not handle conversion extensions (_extToU()). + */ +U_CFUNC UChar32 +ucnv_MBCSSingleSimpleGetNextUChar(UConverterSharedData *sharedData, + uint8_t b, UBool useFallback) { + int32_t entry; + uint8_t action; + + entry=sharedData->mbcs.stateTable[0][b]; + /* MBCS_ENTRY_IS_FINAL(entry) */ + + if(MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry)) { + /* output BMP code point */ + return (UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); + } + + /* + * An if-else-if chain provides more reliable performance for + * the most common cases compared to a switch. + */ + action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry)); + if(action==MBCS_STATE_VALID_DIRECT_20) { + /* output supplementary code point */ + return 0x10000+MBCS_ENTRY_FINAL_VALUE(entry); + } else if(action==MBCS_STATE_FALLBACK_DIRECT_16) { + if(!TO_U_USE_FALLBACK(useFallback)) { + return 0xfffe; + } + /* output BMP code point */ + return (UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); + } else if(action==MBCS_STATE_FALLBACK_DIRECT_20) { + if(!TO_U_USE_FALLBACK(useFallback)) { + return 0xfffe; + } + /* output supplementary code point */ + return 0x10000+MBCS_ENTRY_FINAL_VALUE(entry); + } else if(action==MBCS_STATE_UNASSIGNED) { + return 0xfffe; + } else if(action==MBCS_STATE_ILLEGAL) { + return 0xffff; + } else { + /* reserved, must never occur */ + return 0xffff; + } +} +#endif + +/* + * This is a simple version of _MBCSGetNextUChar() that is used + * by other converter implementations. + * It only returns an "assigned" result if it consumes the entire input. + * It does not use state from the converter, nor error codes. + * It does not handle the EBCDIC swaplfnl option (set in UConverter). + * It handles conversion extensions but not GB 18030. + * + * Return value: + * U+fffe unassigned + * U+ffff illegal + * otherwise the Unicode code point + */ +U_CFUNC UChar32 +ucnv_MBCSSimpleGetNextUChar(UConverterSharedData *sharedData, + const char *source, int32_t length, + UBool useFallback) { + const int32_t (*stateTable)[256]; + const uint16_t *unicodeCodeUnits; + + uint32_t offset; + uint8_t state, action; + + UChar32 c; + int32_t i, entry; + + if(length<=0) { + /* no input at all: "illegal" */ + return 0xffff; + } + +#if 0 +/* + * Code disabled 2002dec09 (ICU 2.4) because it is not currently used in ICU. markus + * TODO In future releases, verify that this function is never called for SBCS + * conversions, i.e., that sharedData->mbcs.countStates==1 is still true. + * Removal improves code coverage. + */ + /* use optimized function if possible */ + if(sharedData->mbcs.countStates==1) { + if(length==1) { + return ucnv_MBCSSingleSimpleGetNextUChar(sharedData, (uint8_t)*source, useFallback); + } else { + return 0xffff; /* illegal: more than a single byte for an SBCS converter */ + } + } +#endif + + /* set up the local pointers */ + stateTable=sharedData->mbcs.stateTable; + unicodeCodeUnits=sharedData->mbcs.unicodeCodeUnits; + + /* converter state */ + offset=0; + state=sharedData->mbcs.dbcsOnlyState; + + /* conversion loop */ + for(i=0;;) { + entry=stateTable[state][(uint8_t)source[i++]]; + if(MBCS_ENTRY_IS_TRANSITION(entry)) { + state=(uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry); + offset+=MBCS_ENTRY_TRANSITION_OFFSET(entry); + + if(i==length) { + return 0xffff; /* truncated character */ + } + } else { + /* + * An if-else-if chain provides more reliable performance for + * the most common cases compared to a switch. + */ + action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry)); + if(action==MBCS_STATE_VALID_16) { + offset+=MBCS_ENTRY_FINAL_VALUE_16(entry); + c=unicodeCodeUnits[offset]; + if(c!=0xfffe) { + /* done */ + } else if(UCNV_TO_U_USE_FALLBACK(cnv)) { + c=ucnv_MBCSGetFallback(&sharedData->mbcs, offset); + /* else done with 0xfffe */ + } + break; + } else if(action==MBCS_STATE_VALID_DIRECT_16) { + /* output BMP code point */ + c=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); + break; + } else if(action==MBCS_STATE_VALID_16_PAIR) { + offset+=MBCS_ENTRY_FINAL_VALUE_16(entry); + c=unicodeCodeUnits[offset++]; + if(c<0xd800) { + /* output BMP code point below 0xd800 */ + } else if(UCNV_TO_U_USE_FALLBACK(cnv) ? c<=0xdfff : c<=0xdbff) { + /* output roundtrip or fallback supplementary code point */ + c=(UChar32)(((c&0x3ff)<<10)+unicodeCodeUnits[offset]+(0x10000-0xdc00)); + } else if(UCNV_TO_U_USE_FALLBACK(cnv) ? (c&0xfffe)==0xe000 : c==0xe000) { + /* output roundtrip BMP code point above 0xd800 or fallback BMP code point */ + c=unicodeCodeUnits[offset]; + } else if(c==0xffff) { + return 0xffff; + } else { + c=0xfffe; + } + break; + } else if(action==MBCS_STATE_VALID_DIRECT_20) { + /* output supplementary code point */ + c=0x10000+MBCS_ENTRY_FINAL_VALUE(entry); + break; + } else if(action==MBCS_STATE_FALLBACK_DIRECT_16) { + if(!TO_U_USE_FALLBACK(useFallback)) { + c=0xfffe; + break; + } + /* output BMP code point */ + c=(UChar)MBCS_ENTRY_FINAL_VALUE_16(entry); + break; + } else if(action==MBCS_STATE_FALLBACK_DIRECT_20) { + if(!TO_U_USE_FALLBACK(useFallback)) { + c=0xfffe; + break; + } + /* output supplementary code point */ + c=0x10000+MBCS_ENTRY_FINAL_VALUE(entry); + break; + } else if(action==MBCS_STATE_UNASSIGNED) { + c=0xfffe; + break; + } + + /* + * forbid MBCS_STATE_CHANGE_ONLY for this function, + * and MBCS_STATE_ILLEGAL and reserved action codes + */ + return 0xffff; + } + } + + if(i!=length) { + /* illegal for this function: not all input consumed */ + return 0xffff; + } + + if(c==0xfffe) { + /* try an extension mapping */ + const int32_t *cx=sharedData->mbcs.extIndexes; + if(cx!=NULL) { + return ucnv_extSimpleMatchToU(cx, source, length, useFallback); + } + } + + return c; +} + +/* MBCS-from-Unicode conversion functions ----------------------------------- */ + +/* This version of ucnv_MBCSFromUnicodeWithOffsets() is optimized for double-byte codepages. */ +static void +ucnv_MBCSDoubleFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, + UErrorCode *pErrorCode) { + UConverter *cnv; + const UChar *source, *sourceLimit; + uint8_t *target; + int32_t targetCapacity; + int32_t *offsets; + + const uint16_t *table; + const uint16_t *mbcsIndex; + const uint8_t *bytes; + + UChar32 c; + + int32_t sourceIndex, nextSourceIndex; + + uint32_t stage2Entry; + uint32_t asciiRoundtrips; + uint32_t value; + uint8_t unicodeMask; + + /* use optimized function if possible */ + cnv=pArgs->converter; + unicodeMask=cnv->sharedData->mbcs.unicodeMask; + + /* set up the local pointers */ + source=pArgs->source; + sourceLimit=pArgs->sourceLimit; + target=(uint8_t *)pArgs->target; + targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target); + offsets=pArgs->offsets; + + table=cnv->sharedData->mbcs.fromUnicodeTable; + mbcsIndex=cnv->sharedData->mbcs.mbcsIndex; + if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) { + bytes=cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes; + } else { + bytes=cnv->sharedData->mbcs.fromUnicodeBytes; + } + asciiRoundtrips=cnv->sharedData->mbcs.asciiRoundtrips; + + /* get the converter state from UConverter */ + c=cnv->fromUChar32; + + /* sourceIndex=-1 if the current character began in the previous buffer */ + sourceIndex= c==0 ? 0 : -1; + nextSourceIndex=0; + + /* conversion loop */ + if(c!=0 && targetCapacity>0) { + goto getTrail; + } + + while(source<sourceLimit) { + /* + * This following test is to see if available input would overflow the output. + * It does not catch output of more than one byte that + * overflows as a result of a multi-byte character or callback output + * from the last source character. + * Therefore, those situations also test for overflows and will + * then break the loop, too. + */ + if(targetCapacity>0) { + /* + * Get a correct Unicode code point: + * a single UChar for a BMP code point or + * a matched surrogate pair for a "supplementary code point". + */ + c=*source++; + ++nextSourceIndex; + if(c<=0x7f && IS_ASCII_ROUNDTRIP(c, asciiRoundtrips)) { + *target++=(uint8_t)c; + if(offsets!=NULL) { + *offsets++=sourceIndex; + sourceIndex=nextSourceIndex; + } + --targetCapacity; + c=0; + continue; + } + /* + * utf8Friendly table: Test for <=0xd7ff rather than <=MBCS_FAST_MAX + * to avoid dealing with surrogates. + * MBCS_FAST_MAX must be >=0xd7ff. + */ + if(c<=0xd7ff) { + value=DBCS_RESULT_FROM_MOST_BMP(mbcsIndex, (const uint16_t *)bytes, c); + /* There are only roundtrips (!=0) and no-mapping (==0) entries. */ + if(value==0) { + goto unassigned; + } + /* output the value */ + } else { + /* + * This also tests if the codepage maps single surrogates. + * If it does, then surrogates are not paired but mapped separately. + * Note that in this case unmatched surrogates are not detected. + */ + if(U16_IS_SURROGATE(c) && !(unicodeMask&UCNV_HAS_SURROGATES)) { + if(U16_IS_SURROGATE_LEAD(c)) { +getTrail: + if(source<sourceLimit) { + /* test the following code unit */ + UChar trail=*source; + if(U16_IS_TRAIL(trail)) { + ++source; + ++nextSourceIndex; + c=U16_GET_SUPPLEMENTARY(c, trail); + if(!(unicodeMask&UCNV_HAS_SUPPLEMENTARY)) { + /* BMP-only codepages are stored without stage 1 entries for supplementary code points */ + /* callback(unassigned) */ + goto unassigned; + } + /* convert this supplementary code point */ + /* exit this condition tree */ + } else { + /* this is an unmatched lead code unit (1st surrogate) */ + /* callback(illegal) */ + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + break; + } + } else { + /* no more input */ + break; + } + } else { + /* this is an unmatched trail code unit (2nd surrogate) */ + /* callback(illegal) */ + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + break; + } + } + + /* convert the Unicode code point in c into codepage bytes */ + stage2Entry=MBCS_STAGE_2_FROM_U(table, c); + + /* get the bytes and the length for the output */ + /* MBCS_OUTPUT_2 */ + value=MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c); + + /* is this code point assigned, or do we use fallbacks? */ + if(!(MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c) || + (UCNV_FROM_U_USE_FALLBACK(cnv, c) && value!=0)) + ) { + /* + * We allow a 0 byte output if the "assigned" bit is set for this entry. + * There is no way with this data structure for fallback output + * to be a zero byte. + */ + +unassigned: + /* try an extension mapping */ + pArgs->source=source; + c=_extFromU(cnv, cnv->sharedData, + c, &source, sourceLimit, + &target, target+targetCapacity, + &offsets, sourceIndex, + pArgs->flush, + pErrorCode); + nextSourceIndex+=(int32_t)(source-pArgs->source); + + if(U_FAILURE(*pErrorCode)) { + /* not mappable or buffer overflow */ + break; + } else { + /* a mapping was written to the target, continue */ + + /* recalculate the targetCapacity after an extension mapping */ + targetCapacity=(int32_t)(pArgs->targetLimit-(char *)target); + + /* normal end of conversion: prepare for a new character */ + sourceIndex=nextSourceIndex; + continue; + } + } + } + + /* write the output character bytes from value and length */ + /* from the first if in the loop we know that targetCapacity>0 */ + if(value<=0xff) { + /* this is easy because we know that there is enough space */ + *target++=(uint8_t)value; + if(offsets!=NULL) { + *offsets++=sourceIndex; + } + --targetCapacity; + } else /* length==2 */ { + *target++=(uint8_t)(value>>8); + if(2<=targetCapacity) { + *target++=(uint8_t)value; + if(offsets!=NULL) { + *offsets++=sourceIndex; + *offsets++=sourceIndex; + } + targetCapacity-=2; + } else { + if(offsets!=NULL) { + *offsets++=sourceIndex; + } + cnv->charErrorBuffer[0]=(char)value; + cnv->charErrorBufferLength=1; + + /* target overflow */ + targetCapacity=0; + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + c=0; + break; + } + } + + /* normal end of conversion: prepare for a new character */ + c=0; + sourceIndex=nextSourceIndex; + continue; + } else { + /* target is full */ + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + break; + } + } + + /* set the converter state back into UConverter */ + cnv->fromUChar32=c; + + /* write back the updated pointers */ + pArgs->source=source; + pArgs->target=(char *)target; + pArgs->offsets=offsets; +} + +/* This version of ucnv_MBCSFromUnicodeWithOffsets() is optimized for single-byte codepages. */ +static void +ucnv_MBCSSingleFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, + UErrorCode *pErrorCode) { + UConverter *cnv; + const UChar *source, *sourceLimit; + uint8_t *target; + int32_t targetCapacity; + int32_t *offsets; + + const uint16_t *table; + const uint16_t *results; + + UChar32 c; + + int32_t sourceIndex, nextSourceIndex; + + uint16_t value, minValue; + UBool hasSupplementary; + + /* set up the local pointers */ + cnv=pArgs->converter; + source=pArgs->source; + sourceLimit=pArgs->sourceLimit; + target=(uint8_t *)pArgs->target; + targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target); + offsets=pArgs->offsets; + + table=cnv->sharedData->mbcs.fromUnicodeTable; + if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) { + results=(uint16_t *)cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes; + } else { + results=(uint16_t *)cnv->sharedData->mbcs.fromUnicodeBytes; + } + + if(cnv->useFallback) { + /* use all roundtrip and fallback results */ + minValue=0x800; + } else { + /* use only roundtrips and fallbacks from private-use characters */ + minValue=0xc00; + } + hasSupplementary=(UBool)(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY); + + /* get the converter state from UConverter */ + c=cnv->fromUChar32; + + /* sourceIndex=-1 if the current character began in the previous buffer */ + sourceIndex= c==0 ? 0 : -1; + nextSourceIndex=0; + + /* conversion loop */ + if(c!=0 && targetCapacity>0) { + goto getTrail; + } + + while(source<sourceLimit) { + /* + * This following test is to see if available input would overflow the output. + * It does not catch output of more than one byte that + * overflows as a result of a multi-byte character or callback output + * from the last source character. + * Therefore, those situations also test for overflows and will + * then break the loop, too. + */ + if(targetCapacity>0) { + /* + * Get a correct Unicode code point: + * a single UChar for a BMP code point or + * a matched surrogate pair for a "supplementary code point". + */ + c=*source++; + ++nextSourceIndex; + if(U16_IS_SURROGATE(c)) { + if(U16_IS_SURROGATE_LEAD(c)) { +getTrail: + if(source<sourceLimit) { + /* test the following code unit */ + UChar trail=*source; + if(U16_IS_TRAIL(trail)) { + ++source; + ++nextSourceIndex; + c=U16_GET_SUPPLEMENTARY(c, trail); + if(!hasSupplementary) { + /* BMP-only codepages are stored without stage 1 entries for supplementary code points */ + /* callback(unassigned) */ + goto unassigned; + } + /* convert this supplementary code point */ + /* exit this condition tree */ + } else { + /* this is an unmatched lead code unit (1st surrogate) */ + /* callback(illegal) */ + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + break; + } + } else { + /* no more input */ + break; + } + } else { + /* this is an unmatched trail code unit (2nd surrogate) */ + /* callback(illegal) */ + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + break; + } + } + + /* convert the Unicode code point in c into codepage bytes */ + value=MBCS_SINGLE_RESULT_FROM_U(table, results, c); + + /* is this code point assigned, or do we use fallbacks? */ + if(value>=minValue) { + /* assigned, write the output character bytes from value and length */ + /* length==1 */ + /* this is easy because we know that there is enough space */ + *target++=(uint8_t)value; + if(offsets!=NULL) { + *offsets++=sourceIndex; + } + --targetCapacity; + + /* normal end of conversion: prepare for a new character */ + c=0; + sourceIndex=nextSourceIndex; + } else { /* unassigned */ +unassigned: + /* try an extension mapping */ + pArgs->source=source; + c=_extFromU(cnv, cnv->sharedData, + c, &source, sourceLimit, + &target, target+targetCapacity, + &offsets, sourceIndex, + pArgs->flush, + pErrorCode); + nextSourceIndex+=(int32_t)(source-pArgs->source); + + if(U_FAILURE(*pErrorCode)) { + /* not mappable or buffer overflow */ + break; + } else { + /* a mapping was written to the target, continue */ + + /* recalculate the targetCapacity after an extension mapping */ + targetCapacity=(int32_t)(pArgs->targetLimit-(char *)target); + + /* normal end of conversion: prepare for a new character */ + sourceIndex=nextSourceIndex; + } + } + } else { + /* target is full */ + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + break; + } + } + + /* set the converter state back into UConverter */ + cnv->fromUChar32=c; + + /* write back the updated pointers */ + pArgs->source=source; + pArgs->target=(char *)target; + pArgs->offsets=offsets; +} + +/* + * This version of ucnv_MBCSFromUnicode() is optimized for single-byte codepages + * that map only to and from the BMP. + * In addition to single-byte/state optimizations, the offset calculations + * become much easier. + * It would be possible to use the sbcsIndex for UTF-8-friendly tables, + * but measurements have shown that this diminishes performance + * in more cases than it improves it. + * See SVN revision 21013 (2007-feb-06) for the last version with #if switches + * for various MBCS and SBCS optimizations. + */ +static void +ucnv_MBCSSingleFromBMPWithOffsets(UConverterFromUnicodeArgs *pArgs, + UErrorCode *pErrorCode) { + UConverter *cnv; + const UChar *source, *sourceLimit, *lastSource; + uint8_t *target; + int32_t targetCapacity, length; + int32_t *offsets; + + const uint16_t *table; + const uint16_t *results; + + UChar32 c; + + int32_t sourceIndex; + + uint32_t asciiRoundtrips; + uint16_t value, minValue; + + /* set up the local pointers */ + cnv=pArgs->converter; + source=pArgs->source; + sourceLimit=pArgs->sourceLimit; + target=(uint8_t *)pArgs->target; + targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target); + offsets=pArgs->offsets; + + table=cnv->sharedData->mbcs.fromUnicodeTable; + if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) { + results=(uint16_t *)cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes; + } else { + results=(uint16_t *)cnv->sharedData->mbcs.fromUnicodeBytes; + } + asciiRoundtrips=cnv->sharedData->mbcs.asciiRoundtrips; + + if(cnv->useFallback) { + /* use all roundtrip and fallback results */ + minValue=0x800; + } else { + /* use only roundtrips and fallbacks from private-use characters */ + minValue=0xc00; + } + + /* get the converter state from UConverter */ + c=cnv->fromUChar32; + + /* sourceIndex=-1 if the current character began in the previous buffer */ + sourceIndex= c==0 ? 0 : -1; + lastSource=source; + + /* + * since the conversion here is 1:1 UChar:uint8_t, we need only one counter + * for the minimum of the sourceLength and targetCapacity + */ + length=(int32_t)(sourceLimit-source); + if(length<targetCapacity) { + targetCapacity=length; + } + + /* conversion loop */ + if(c!=0 && targetCapacity>0) { + goto getTrail; + } + +#if MBCS_UNROLL_SINGLE_FROM_BMP + /* unrolling makes it slower on Pentium III/Windows 2000?! */ + /* unroll the loop with the most common case */ +unrolled: + if(targetCapacity>=4) { + int32_t count, loops; + uint16_t andedValues; + + loops=count=targetCapacity>>2; + do { + c=*source++; + andedValues=value=MBCS_SINGLE_RESULT_FROM_U(table, results, c); + *target++=(uint8_t)value; + c=*source++; + andedValues&=value=MBCS_SINGLE_RESULT_FROM_U(table, results, c); + *target++=(uint8_t)value; + c=*source++; + andedValues&=value=MBCS_SINGLE_RESULT_FROM_U(table, results, c); + *target++=(uint8_t)value; + c=*source++; + andedValues&=value=MBCS_SINGLE_RESULT_FROM_U(table, results, c); + *target++=(uint8_t)value; + + /* were all 4 entries really valid? */ + if(andedValues<minValue) { + /* no, return to the first of these 4 */ + source-=4; + target-=4; + break; + } + } while(--count>0); + count=loops-count; + targetCapacity-=4*count; + + if(offsets!=NULL) { + lastSource+=4*count; + while(count>0) { + *offsets++=sourceIndex++; + *offsets++=sourceIndex++; + *offsets++=sourceIndex++; + *offsets++=sourceIndex++; + --count; + } + } + + c=0; + } +#endif + + while(targetCapacity>0) { + /* + * Get a correct Unicode code point: + * a single UChar for a BMP code point or + * a matched surrogate pair for a "supplementary code point". + */ + c=*source++; + /* + * Do not immediately check for single surrogates: + * Assume that they are unassigned and check for them in that case. + * This speeds up the conversion of assigned characters. + */ + /* convert the Unicode code point in c into codepage bytes */ + if(c<=0x7f && IS_ASCII_ROUNDTRIP(c, asciiRoundtrips)) { + *target++=(uint8_t)c; + --targetCapacity; + c=0; + continue; + } + value=MBCS_SINGLE_RESULT_FROM_U(table, results, c); + /* is this code point assigned, or do we use fallbacks? */ + if(value>=minValue) { + /* assigned, write the output character bytes from value and length */ + /* length==1 */ + /* this is easy because we know that there is enough space */ + *target++=(uint8_t)value; + --targetCapacity; + + /* normal end of conversion: prepare for a new character */ + c=0; + continue; + } else if(!U16_IS_SURROGATE(c)) { + /* normal, unassigned BMP character */ + } else if(U16_IS_SURROGATE_LEAD(c)) { +getTrail: + if(source<sourceLimit) { + /* test the following code unit */ + UChar trail=*source; + if(U16_IS_TRAIL(trail)) { + ++source; + c=U16_GET_SUPPLEMENTARY(c, trail); + /* this codepage does not map supplementary code points */ + /* callback(unassigned) */ + } else { + /* this is an unmatched lead code unit (1st surrogate) */ + /* callback(illegal) */ + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + break; + } + } else { + /* no more input */ + if (pArgs->flush) { + *pErrorCode=U_TRUNCATED_CHAR_FOUND; + } + break; + } + } else { + /* this is an unmatched trail code unit (2nd surrogate) */ + /* callback(illegal) */ + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + break; + } + + /* c does not have a mapping */ + + /* get the number of code units for c to correctly advance sourceIndex */ + length=U16_LENGTH(c); + + /* set offsets since the start or the last extension */ + if(offsets!=NULL) { + int32_t count=(int32_t)(source-lastSource); + + /* do not set the offset for this character */ + count-=length; + + while(count>0) { + *offsets++=sourceIndex++; + --count; + } + /* offsets and sourceIndex are now set for the current character */ + } + + /* try an extension mapping */ + lastSource=source; + c=_extFromU(cnv, cnv->sharedData, + c, &source, sourceLimit, + &target, (const uint8_t *)(pArgs->targetLimit), + &offsets, sourceIndex, + pArgs->flush, + pErrorCode); + sourceIndex+=length+(int32_t)(source-lastSource); + lastSource=source; + + if(U_FAILURE(*pErrorCode)) { + /* not mappable or buffer overflow */ + break; + } else { + /* a mapping was written to the target, continue */ + + /* recalculate the targetCapacity after an extension mapping */ + targetCapacity=(int32_t)(pArgs->targetLimit-(char *)target); + length=(int32_t)(sourceLimit-source); + if(length<targetCapacity) { + targetCapacity=length; + } + } + +#if MBCS_UNROLL_SINGLE_FROM_BMP + /* unrolling makes it slower on Pentium III/Windows 2000?! */ + goto unrolled; +#endif + } + + if(U_SUCCESS(*pErrorCode) && source<sourceLimit && target>=(uint8_t *)pArgs->targetLimit) { + /* target is full */ + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } + + /* set offsets since the start or the last callback */ + if(offsets!=NULL) { + size_t count=source-lastSource; + if (count > 0 && *pErrorCode == U_TRUNCATED_CHAR_FOUND) { + /* + Caller gave us a partial supplementary character, + which this function couldn't convert in any case. + The callback will handle the offset. + */ + count--; + } + while(count>0) { + *offsets++=sourceIndex++; + --count; + } + } + + /* set the converter state back into UConverter */ + cnv->fromUChar32=c; + + /* write back the updated pointers */ + pArgs->source=source; + pArgs->target=(char *)target; + pArgs->offsets=offsets; +} + +U_CFUNC void +ucnv_MBCSFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, + UErrorCode *pErrorCode) { + UConverter *cnv; + const UChar *source, *sourceLimit; + uint8_t *target; + int32_t targetCapacity; + int32_t *offsets; + + const uint16_t *table; + const uint16_t *mbcsIndex; + const uint8_t *p, *bytes; + uint8_t outputType; + + UChar32 c; + + int32_t prevSourceIndex, sourceIndex, nextSourceIndex; + + uint32_t stage2Entry; + uint32_t asciiRoundtrips; + uint32_t value; + /* Shift-In and Shift-Out byte sequences differ by encoding scheme. */ + uint8_t siBytes[2] = {0, 0}; + uint8_t soBytes[2] = {0, 0}; + uint8_t siLength, soLength; + int32_t length = 0, prevLength; + uint8_t unicodeMask; + + cnv=pArgs->converter; + + if(cnv->preFromUFirstCP>=0) { + /* + * pass sourceIndex=-1 because we continue from an earlier buffer + * in the future, this may change with continuous offsets + */ + ucnv_extContinueMatchFromU(cnv, pArgs, -1, pErrorCode); + + if(U_FAILURE(*pErrorCode) || cnv->preFromULength<0) { + return; + } + } + + /* use optimized function if possible */ + outputType=cnv->sharedData->mbcs.outputType; + unicodeMask=cnv->sharedData->mbcs.unicodeMask; + if(outputType==MBCS_OUTPUT_1 && !(unicodeMask&UCNV_HAS_SURROGATES)) { + if(!(unicodeMask&UCNV_HAS_SUPPLEMENTARY)) { + ucnv_MBCSSingleFromBMPWithOffsets(pArgs, pErrorCode); + } else { + ucnv_MBCSSingleFromUnicodeWithOffsets(pArgs, pErrorCode); + } + return; + } else if(outputType==MBCS_OUTPUT_2 && cnv->sharedData->mbcs.utf8Friendly) { + ucnv_MBCSDoubleFromUnicodeWithOffsets(pArgs, pErrorCode); + return; + } + + /* set up the local pointers */ + source=pArgs->source; + sourceLimit=pArgs->sourceLimit; + target=(uint8_t *)pArgs->target; + targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target); + offsets=pArgs->offsets; + + table=cnv->sharedData->mbcs.fromUnicodeTable; + if(cnv->sharedData->mbcs.utf8Friendly) { + mbcsIndex=cnv->sharedData->mbcs.mbcsIndex; + } else { + mbcsIndex=NULL; + } + if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) { + bytes=cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes; + } else { + bytes=cnv->sharedData->mbcs.fromUnicodeBytes; + } + asciiRoundtrips=cnv->sharedData->mbcs.asciiRoundtrips; + + /* get the converter state from UConverter */ + c=cnv->fromUChar32; + + if(outputType==MBCS_OUTPUT_2_SISO) { + prevLength=cnv->fromUnicodeStatus; + if(prevLength==0) { + /* set the real value */ + prevLength=1; + } + } else { + /* prevent fromUnicodeStatus from being set to something non-0 */ + prevLength=0; + } + + /* sourceIndex=-1 if the current character began in the previous buffer */ + prevSourceIndex=-1; + sourceIndex= c==0 ? 0 : -1; + nextSourceIndex=0; + + /* Get the SI/SO character for the converter */ + siLength = static_cast<uint8_t>(getSISOBytes(SI, cnv->options, siBytes)); + soLength = static_cast<uint8_t>(getSISOBytes(SO, cnv->options, soBytes)); + + /* conversion loop */ + /* + * This is another piece of ugly code: + * A goto into the loop if the converter state contains a first surrogate + * from the previous function call. + * It saves me to check in each loop iteration a check of if(c==0) + * and duplicating the trail-surrogate-handling code in the else + * branch of that check. + * I could not find any other way to get around this other than + * using a function call for the conversion and callback, which would + * be even more inefficient. + * + * Markus Scherer 2000-jul-19 + */ + if(c!=0 && targetCapacity>0) { + goto getTrail; + } + + while(source<sourceLimit) { + /* + * This following test is to see if available input would overflow the output. + * It does not catch output of more than one byte that + * overflows as a result of a multi-byte character or callback output + * from the last source character. + * Therefore, those situations also test for overflows and will + * then break the loop, too. + */ + if(targetCapacity>0) { + /* + * Get a correct Unicode code point: + * a single UChar for a BMP code point or + * a matched surrogate pair for a "supplementary code point". + */ + c=*source++; + ++nextSourceIndex; + if(c<=0x7f && IS_ASCII_ROUNDTRIP(c, asciiRoundtrips)) { + *target++=(uint8_t)c; + if(offsets!=NULL) { + *offsets++=sourceIndex; + prevSourceIndex=sourceIndex; + sourceIndex=nextSourceIndex; + } + --targetCapacity; + c=0; + continue; + } + /* + * utf8Friendly table: Test for <=0xd7ff rather than <=MBCS_FAST_MAX + * to avoid dealing with surrogates. + * MBCS_FAST_MAX must be >=0xd7ff. + */ + if(c<=0xd7ff && mbcsIndex!=NULL) { + value=mbcsIndex[c>>6]; + + /* get the bytes and the length for the output (copied from below and adapted for utf8Friendly data) */ + /* There are only roundtrips (!=0) and no-mapping (==0) entries. */ + switch(outputType) { + case MBCS_OUTPUT_2: + value=((const uint16_t *)bytes)[value +(c&0x3f)]; + if(value<=0xff) { + if(value==0) { + goto unassigned; + } else { + length=1; + } + } else { + length=2; + } + break; + case MBCS_OUTPUT_2_SISO: + /* 1/2-byte stateful with Shift-In/Shift-Out */ + /* + * Save the old state in the converter object + * right here, then change the local prevLength state variable if necessary. + * Then, if this character turns out to be unassigned or a fallback that + * is not taken, the callback code must not save the new state in the converter + * because the new state is for a character that is not output. + * However, the callback must still restore the state from the converter + * in case the callback function changed it for its output. + */ + cnv->fromUnicodeStatus=prevLength; /* save the old state */ + value=((const uint16_t *)bytes)[value +(c&0x3f)]; + if(value<=0xff) { + if(value==0) { + goto unassigned; + } else if(prevLength<=1) { + length=1; + } else { + /* change from double-byte mode to single-byte */ + if (siLength == 1) { + value|=(uint32_t)siBytes[0]<<8; + length = 2; + } else if (siLength == 2) { + value|=(uint32_t)siBytes[1]<<8; + value|=(uint32_t)siBytes[0]<<16; + length = 3; + } + prevLength=1; + } + } else { + if(prevLength==2) { + length=2; + } else { + /* change from single-byte mode to double-byte */ + if (soLength == 1) { + value|=(uint32_t)soBytes[0]<<16; + length = 3; + } else if (soLength == 2) { + value|=(uint32_t)soBytes[1]<<16; + value|=(uint32_t)soBytes[0]<<24; + length = 4; + } + prevLength=2; + } + } + break; + case MBCS_OUTPUT_DBCS_ONLY: + /* table with single-byte results, but only DBCS mappings used */ + value=((const uint16_t *)bytes)[value +(c&0x3f)]; + if(value<=0xff) { + /* no mapping or SBCS result, not taken for DBCS-only */ + goto unassigned; + } else { + length=2; + } + break; + case MBCS_OUTPUT_3: + p=bytes+(value+(c&0x3f))*3; + value=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2]; + if(value<=0xff) { + if(value==0) { + goto unassigned; + } else { + length=1; + } + } else if(value<=0xffff) { + length=2; + } else { + length=3; + } + break; + case MBCS_OUTPUT_4: + value=((const uint32_t *)bytes)[value +(c&0x3f)]; + if(value<=0xff) { + if(value==0) { + goto unassigned; + } else { + length=1; + } + } else if(value<=0xffff) { + length=2; + } else if(value<=0xffffff) { + length=3; + } else { + length=4; + } + break; + case MBCS_OUTPUT_3_EUC: + value=((const uint16_t *)bytes)[value +(c&0x3f)]; + /* EUC 16-bit fixed-length representation */ + if(value<=0xff) { + if(value==0) { + goto unassigned; + } else { + length=1; + } + } else if((value&0x8000)==0) { + value|=0x8e8000; + length=3; + } else if((value&0x80)==0) { + value|=0x8f0080; + length=3; + } else { + length=2; + } + break; + case MBCS_OUTPUT_4_EUC: + p=bytes+(value+(c&0x3f))*3; + value=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2]; + /* EUC 16-bit fixed-length representation applied to the first two bytes */ + if(value<=0xff) { + if(value==0) { + goto unassigned; + } else { + length=1; + } + } else if(value<=0xffff) { + length=2; + } else if((value&0x800000)==0) { + value|=0x8e800000; + length=4; + } else if((value&0x8000)==0) { + value|=0x8f008000; + length=4; + } else { + length=3; + } + break; + default: + /* must not occur */ + /* + * To avoid compiler warnings that value & length may be + * used without having been initialized, we set them here. + * In reality, this is unreachable code. + * Not having a default branch also causes warnings with + * some compilers. + */ + value=0; + length=0; + break; + } + /* output the value */ + } else { + /* + * This also tests if the codepage maps single surrogates. + * If it does, then surrogates are not paired but mapped separately. + * Note that in this case unmatched surrogates are not detected. + */ + if(U16_IS_SURROGATE(c) && !(unicodeMask&UCNV_HAS_SURROGATES)) { + if(U16_IS_SURROGATE_LEAD(c)) { +getTrail: + if(source<sourceLimit) { + /* test the following code unit */ + UChar trail=*source; + if(U16_IS_TRAIL(trail)) { + ++source; + ++nextSourceIndex; + c=U16_GET_SUPPLEMENTARY(c, trail); + if(!(unicodeMask&UCNV_HAS_SUPPLEMENTARY)) { + /* BMP-only codepages are stored without stage 1 entries for supplementary code points */ + cnv->fromUnicodeStatus=prevLength; /* save the old state */ + /* callback(unassigned) */ + goto unassigned; + } + /* convert this supplementary code point */ + /* exit this condition tree */ + } else { + /* this is an unmatched lead code unit (1st surrogate) */ + /* callback(illegal) */ + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + break; + } + } else { + /* no more input */ + break; + } + } else { + /* this is an unmatched trail code unit (2nd surrogate) */ + /* callback(illegal) */ + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + break; + } + } + + /* convert the Unicode code point in c into codepage bytes */ + + /* + * The basic lookup is a triple-stage compact array (trie) lookup. + * For details see the beginning of this file. + * + * Single-byte codepages are handled with a different data structure + * by _MBCSSingle... functions. + * + * The result consists of a 32-bit value from stage 2 and + * a pointer to as many bytes as are stored per character. + * The pointer points to the character's bytes in stage 3. + * Bits 15..0 of the stage 2 entry contain the stage 3 index + * for that pointer, while bits 31..16 are flags for which of + * the 16 characters in the block are roundtrip-assigned. + * + * For 2-byte and 4-byte codepages, the bytes are stored as uint16_t + * respectively as uint32_t, in the platform encoding. + * For 3-byte codepages, the bytes are always stored in big-endian order. + * + * For EUC encodings that use only either 0x8e or 0x8f as the first + * byte of their longest byte sequences, the first two bytes in + * this third stage indicate with their 7th bits whether these bytes + * are to be written directly or actually need to be preceeded by + * one of the two Single-Shift codes. With this, the third stage + * stores one byte fewer per character than the actual maximum length of + * EUC byte sequences. + * + * Other than that, leading zero bytes are removed and the other + * bytes output. A single zero byte may be output if the "assigned" + * bit in stage 2 was on. + * The data structure does not support zero byte output as a fallback, + * and also does not allow output of leading zeros. + */ + stage2Entry=MBCS_STAGE_2_FROM_U(table, c); + + /* get the bytes and the length for the output */ + switch(outputType) { + case MBCS_OUTPUT_2: + value=MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c); + if(value<=0xff) { + length=1; + } else { + length=2; + } + break; + case MBCS_OUTPUT_2_SISO: + /* 1/2-byte stateful with Shift-In/Shift-Out */ + /* + * Save the old state in the converter object + * right here, then change the local prevLength state variable if necessary. + * Then, if this character turns out to be unassigned or a fallback that + * is not taken, the callback code must not save the new state in the converter + * because the new state is for a character that is not output. + * However, the callback must still restore the state from the converter + * in case the callback function changed it for its output. + */ + cnv->fromUnicodeStatus=prevLength; /* save the old state */ + value=MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c); + if(value<=0xff) { + if(value==0 && MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c)==0) { + /* no mapping, leave value==0 */ + length=0; + } else if(prevLength<=1) { + length=1; + } else { + /* change from double-byte mode to single-byte */ + if (siLength == 1) { + value|=(uint32_t)siBytes[0]<<8; + length = 2; + } else if (siLength == 2) { + value|=(uint32_t)siBytes[1]<<8; + value|=(uint32_t)siBytes[0]<<16; + length = 3; + } + prevLength=1; + } + } else { + if(prevLength==2) { + length=2; + } else { + /* change from single-byte mode to double-byte */ + if (soLength == 1) { + value|=(uint32_t)soBytes[0]<<16; + length = 3; + } else if (soLength == 2) { + value|=(uint32_t)soBytes[1]<<16; + value|=(uint32_t)soBytes[0]<<24; + length = 4; + } + prevLength=2; + } + } + break; + case MBCS_OUTPUT_DBCS_ONLY: + /* table with single-byte results, but only DBCS mappings used */ + value=MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c); + if(value<=0xff) { + /* no mapping or SBCS result, not taken for DBCS-only */ + value=stage2Entry=0; /* stage2Entry=0 to reset roundtrip flags */ + length=0; + } else { + length=2; + } + break; + case MBCS_OUTPUT_3: + p=MBCS_POINTER_3_FROM_STAGE_2(bytes, stage2Entry, c); + value=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2]; + if(value<=0xff) { + length=1; + } else if(value<=0xffff) { + length=2; + } else { + length=3; + } + break; + case MBCS_OUTPUT_4: + value=MBCS_VALUE_4_FROM_STAGE_2(bytes, stage2Entry, c); + if(value<=0xff) { + length=1; + } else if(value<=0xffff) { + length=2; + } else if(value<=0xffffff) { + length=3; + } else { + length=4; + } + break; + case MBCS_OUTPUT_3_EUC: + value=MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c); + /* EUC 16-bit fixed-length representation */ + if(value<=0xff) { + length=1; + } else if((value&0x8000)==0) { + value|=0x8e8000; + length=3; + } else if((value&0x80)==0) { + value|=0x8f0080; + length=3; + } else { + length=2; + } + break; + case MBCS_OUTPUT_4_EUC: + p=MBCS_POINTER_3_FROM_STAGE_2(bytes, stage2Entry, c); + value=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2]; + /* EUC 16-bit fixed-length representation applied to the first two bytes */ + if(value<=0xff) { + length=1; + } else if(value<=0xffff) { + length=2; + } else if((value&0x800000)==0) { + value|=0x8e800000; + length=4; + } else if((value&0x8000)==0) { + value|=0x8f008000; + length=4; + } else { + length=3; + } + break; + default: + /* must not occur */ + /* + * To avoid compiler warnings that value & length may be + * used without having been initialized, we set them here. + * In reality, this is unreachable code. + * Not having a default branch also causes warnings with + * some compilers. + */ + value=stage2Entry=0; /* stage2Entry=0 to reset roundtrip flags */ + length=0; + break; + } + + /* is this code point assigned, or do we use fallbacks? */ + if(!(MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c)!=0 || + (UCNV_FROM_U_USE_FALLBACK(cnv, c) && value!=0)) + ) { + /* + * We allow a 0 byte output if the "assigned" bit is set for this entry. + * There is no way with this data structure for fallback output + * to be a zero byte. + */ + +unassigned: + /* try an extension mapping */ + pArgs->source=source; + c=_extFromU(cnv, cnv->sharedData, + c, &source, sourceLimit, + &target, target+targetCapacity, + &offsets, sourceIndex, + pArgs->flush, + pErrorCode); + nextSourceIndex+=(int32_t)(source-pArgs->source); + prevLength=cnv->fromUnicodeStatus; /* restore SISO state */ + + if(U_FAILURE(*pErrorCode)) { + /* not mappable or buffer overflow */ + break; + } else { + /* a mapping was written to the target, continue */ + + /* recalculate the targetCapacity after an extension mapping */ + targetCapacity=(int32_t)(pArgs->targetLimit-(char *)target); + + /* normal end of conversion: prepare for a new character */ + if(offsets!=NULL) { + prevSourceIndex=sourceIndex; + sourceIndex=nextSourceIndex; + } + continue; + } + } + } + + /* write the output character bytes from value and length */ + /* from the first if in the loop we know that targetCapacity>0 */ + if(length<=targetCapacity) { + if(offsets==NULL) { + switch(length) { + /* each branch falls through to the next one */ + case 4: + *target++=(uint8_t)(value>>24); + U_FALLTHROUGH; + case 3: + *target++=(uint8_t)(value>>16); + U_FALLTHROUGH; + case 2: + *target++=(uint8_t)(value>>8); + U_FALLTHROUGH; + case 1: + *target++=(uint8_t)value; + U_FALLTHROUGH; + default: + /* will never occur */ + break; + } + } else { + switch(length) { + /* each branch falls through to the next one */ + case 4: + *target++=(uint8_t)(value>>24); + *offsets++=sourceIndex; + U_FALLTHROUGH; + case 3: + *target++=(uint8_t)(value>>16); + *offsets++=sourceIndex; + U_FALLTHROUGH; + case 2: + *target++=(uint8_t)(value>>8); + *offsets++=sourceIndex; + U_FALLTHROUGH; + case 1: + *target++=(uint8_t)value; + *offsets++=sourceIndex; + U_FALLTHROUGH; + default: + /* will never occur */ + break; + } + } + targetCapacity-=length; + } else { + uint8_t *charErrorBuffer; + + /* + * We actually do this backwards here: + * In order to save an intermediate variable, we output + * first to the overflow buffer what does not fit into the + * regular target. + */ + /* we know that 1<=targetCapacity<length<=4 */ + length-=targetCapacity; + charErrorBuffer=(uint8_t *)cnv->charErrorBuffer; + switch(length) { + /* each branch falls through to the next one */ + case 3: + *charErrorBuffer++=(uint8_t)(value>>16); + U_FALLTHROUGH; + case 2: + *charErrorBuffer++=(uint8_t)(value>>8); + U_FALLTHROUGH; + case 1: + *charErrorBuffer=(uint8_t)value; + U_FALLTHROUGH; + default: + /* will never occur */ + break; + } + cnv->charErrorBufferLength=(int8_t)length; + + /* now output what fits into the regular target */ + value>>=8*length; /* length was reduced by targetCapacity */ + switch(targetCapacity) { + /* each branch falls through to the next one */ + case 3: + *target++=(uint8_t)(value>>16); + if(offsets!=NULL) { + *offsets++=sourceIndex; + } + U_FALLTHROUGH; + case 2: + *target++=(uint8_t)(value>>8); + if(offsets!=NULL) { + *offsets++=sourceIndex; + } + U_FALLTHROUGH; + case 1: + *target++=(uint8_t)value; + if(offsets!=NULL) { + *offsets++=sourceIndex; + } + U_FALLTHROUGH; + default: + /* will never occur */ + break; + } + + /* target overflow */ + targetCapacity=0; + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + c=0; + break; + } + + /* normal end of conversion: prepare for a new character */ + c=0; + if(offsets!=NULL) { + prevSourceIndex=sourceIndex; + sourceIndex=nextSourceIndex; + } + continue; + } else { + /* target is full */ + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + break; + } + } + + /* + * the end of the input stream and detection of truncated input + * are handled by the framework, but for EBCDIC_STATEFUL conversion + * we need to emit an SI at the very end + * + * conditions: + * successful + * EBCDIC_STATEFUL in DBCS mode + * end of input and no truncated input + */ + if( U_SUCCESS(*pErrorCode) && + outputType==MBCS_OUTPUT_2_SISO && prevLength==2 && + pArgs->flush && source>=sourceLimit && c==0 + ) { + /* EBCDIC_STATEFUL ending with DBCS: emit an SI to return the output stream to SBCS */ + if(targetCapacity>0) { + *target++=(uint8_t)siBytes[0]; + if (siLength == 2) { + if (targetCapacity<2) { + cnv->charErrorBuffer[0]=(uint8_t)siBytes[1]; + cnv->charErrorBufferLength=1; + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } else { + *target++=(uint8_t)siBytes[1]; + } + } + if(offsets!=NULL) { + /* set the last source character's index (sourceIndex points at sourceLimit now) */ + *offsets++=prevSourceIndex; + } + } else { + /* target is full */ + cnv->charErrorBuffer[0]=(uint8_t)siBytes[0]; + if (siLength == 2) { + cnv->charErrorBuffer[1]=(uint8_t)siBytes[1]; + } + cnv->charErrorBufferLength=siLength; + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } + prevLength=1; /* we switched into SBCS */ + } + + /* set the converter state back into UConverter */ + cnv->fromUChar32=c; + cnv->fromUnicodeStatus=prevLength; + + /* write back the updated pointers */ + pArgs->source=source; + pArgs->target=(char *)target; + pArgs->offsets=offsets; +} + +/* + * This is another simple conversion function for internal use by other + * conversion implementations. + * It does not use the converter state nor call callbacks. + * It does not handle the EBCDIC swaplfnl option (set in UConverter). + * It handles conversion extensions but not GB 18030. + * + * It converts one single Unicode code point into codepage bytes, encoded + * as one 32-bit value. The function returns the number of bytes in *pValue: + * 1..4 the number of bytes in *pValue + * 0 unassigned (*pValue undefined) + * -1 illegal (currently not used, *pValue undefined) + * + * *pValue will contain the resulting bytes with the last byte in bits 7..0, + * the second to last byte in bits 15..8, etc. + * Currently, the function assumes but does not check that 0<=c<=0x10ffff. + */ +U_CFUNC int32_t +ucnv_MBCSFromUChar32(UConverterSharedData *sharedData, + UChar32 c, uint32_t *pValue, + UBool useFallback) { + const int32_t *cx; + const uint16_t *table; +#if 0 +/* #if 0 because this is not currently used in ICU - reduce code, increase code coverage */ + const uint8_t *p; +#endif + uint32_t stage2Entry; + uint32_t value; + int32_t length; + + /* BMP-only codepages are stored without stage 1 entries for supplementary code points */ + if(c<=0xffff || (sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) { + table=sharedData->mbcs.fromUnicodeTable; + + /* convert the Unicode code point in c into codepage bytes (same as in _MBCSFromUnicodeWithOffsets) */ + if(sharedData->mbcs.outputType==MBCS_OUTPUT_1) { + value=MBCS_SINGLE_RESULT_FROM_U(table, (uint16_t *)sharedData->mbcs.fromUnicodeBytes, c); + /* is this code point assigned, or do we use fallbacks? */ + if(useFallback ? value>=0x800 : value>=0xc00) { + *pValue=value&0xff; + return 1; + } + } else /* outputType!=MBCS_OUTPUT_1 */ { + stage2Entry=MBCS_STAGE_2_FROM_U(table, c); + + /* get the bytes and the length for the output */ + switch(sharedData->mbcs.outputType) { + case MBCS_OUTPUT_2: + value=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c); + if(value<=0xff) { + length=1; + } else { + length=2; + } + break; +#if 0 +/* #if 0 because this is not currently used in ICU - reduce code, increase code coverage */ + case MBCS_OUTPUT_DBCS_ONLY: + /* table with single-byte results, but only DBCS mappings used */ + value=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c); + if(value<=0xff) { + /* no mapping or SBCS result, not taken for DBCS-only */ + value=stage2Entry=0; /* stage2Entry=0 to reset roundtrip flags */ + length=0; + } else { + length=2; + } + break; + case MBCS_OUTPUT_3: + p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c); + value=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2]; + if(value<=0xff) { + length=1; + } else if(value<=0xffff) { + length=2; + } else { + length=3; + } + break; + case MBCS_OUTPUT_4: + value=MBCS_VALUE_4_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c); + if(value<=0xff) { + length=1; + } else if(value<=0xffff) { + length=2; + } else if(value<=0xffffff) { + length=3; + } else { + length=4; + } + break; + case MBCS_OUTPUT_3_EUC: + value=MBCS_VALUE_2_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c); + /* EUC 16-bit fixed-length representation */ + if(value<=0xff) { + length=1; + } else if((value&0x8000)==0) { + value|=0x8e8000; + length=3; + } else if((value&0x80)==0) { + value|=0x8f0080; + length=3; + } else { + length=2; + } + break; + case MBCS_OUTPUT_4_EUC: + p=MBCS_POINTER_3_FROM_STAGE_2(sharedData->mbcs.fromUnicodeBytes, stage2Entry, c); + value=((uint32_t)*p<<16)|((uint32_t)p[1]<<8)|p[2]; + /* EUC 16-bit fixed-length representation applied to the first two bytes */ + if(value<=0xff) { + length=1; + } else if(value<=0xffff) { + length=2; + } else if((value&0x800000)==0) { + value|=0x8e800000; + length=4; + } else if((value&0x8000)==0) { + value|=0x8f008000; + length=4; + } else { + length=3; + } + break; +#endif + default: + /* must not occur */ + return -1; + } + + /* is this code point assigned, or do we use fallbacks? */ + if( MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c) || + (FROM_U_USE_FALLBACK(useFallback, c) && value!=0) + ) { + /* + * We allow a 0 byte output if the "assigned" bit is set for this entry. + * There is no way with this data structure for fallback output + * to be a zero byte. + */ + /* assigned */ + *pValue=value; + return length; + } + } + } + + cx=sharedData->mbcs.extIndexes; + if(cx!=NULL) { + length=ucnv_extSimpleMatchFromU(cx, c, pValue, useFallback); + return length>=0 ? length : -length; /* return abs(length); */ + } + + /* unassigned */ + return 0; +} + + +#if 0 +/* + * This function has been moved to ucnv2022.c for inlining. + * This implementation is here only for documentation purposes + */ + +/** + * This version of ucnv_MBCSFromUChar32() is optimized for single-byte codepages. + * It does not handle the EBCDIC swaplfnl option (set in UConverter). + * It does not handle conversion extensions (_extFromU()). + * + * It returns the codepage byte for the code point, or -1 if it is unassigned. + */ +U_CFUNC int32_t +ucnv_MBCSSingleFromUChar32(UConverterSharedData *sharedData, + UChar32 c, + UBool useFallback) { + const uint16_t *table; + int32_t value; + + /* BMP-only codepages are stored without stage 1 entries for supplementary code points */ + if(c>=0x10000 && !(sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY)) { + return -1; + } + + /* convert the Unicode code point in c into codepage bytes (same as in _MBCSFromUnicodeWithOffsets) */ + table=sharedData->mbcs.fromUnicodeTable; + + /* get the byte for the output */ + value=MBCS_SINGLE_RESULT_FROM_U(table, (uint16_t *)sharedData->mbcs.fromUnicodeBytes, c); + /* is this code point assigned, or do we use fallbacks? */ + if(useFallback ? value>=0x800 : value>=0xc00) { + return value&0xff; + } else { + return -1; + } +} +#endif + +/* MBCS-from-UTF-8 conversion functions ------------------------------------- */ + +/* offsets for n-byte UTF-8 sequences that were calculated with ((lead<<6)+trail)<<6+trail... */ +static const UChar32 +utf8_offsets[5]={ 0, 0, 0x3080, 0xE2080, 0x3C82080 }; + +static void U_CALLCONV +ucnv_SBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs, + UConverterToUnicodeArgs *pToUArgs, + UErrorCode *pErrorCode) { + UConverter *utf8, *cnv; + const uint8_t *source, *sourceLimit; + uint8_t *target; + int32_t targetCapacity; + + const uint16_t *table, *sbcsIndex; + const uint16_t *results; + + int8_t oldToULength, toULength, toULimit; + + UChar32 c; + uint8_t b, t1, t2; + + uint32_t asciiRoundtrips; + uint16_t value, minValue = 0; + UBool hasSupplementary; + + /* set up the local pointers */ + utf8=pToUArgs->converter; + cnv=pFromUArgs->converter; + source=(uint8_t *)pToUArgs->source; + sourceLimit=(uint8_t *)pToUArgs->sourceLimit; + target=(uint8_t *)pFromUArgs->target; + targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target); + + table=cnv->sharedData->mbcs.fromUnicodeTable; + sbcsIndex=cnv->sharedData->mbcs.sbcsIndex; + if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) { + results=(uint16_t *)cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes; + } else { + results=(uint16_t *)cnv->sharedData->mbcs.fromUnicodeBytes; + } + asciiRoundtrips=cnv->sharedData->mbcs.asciiRoundtrips; + + if(cnv->useFallback) { + /* use all roundtrip and fallback results */ + minValue=0x800; + } else { + /* use only roundtrips and fallbacks from private-use characters */ + minValue=0xc00; + } + hasSupplementary=(UBool)(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY); + + /* get the converter state from the UTF-8 UConverter */ + if(utf8->toULength > 0) { + toULength=oldToULength=utf8->toULength; + toULimit=(int8_t)utf8->mode; + c=(UChar32)utf8->toUnicodeStatus; + } else { + toULength=oldToULength=toULimit=0; + c = 0; + } + + // The conversion loop checks source<sourceLimit only once per 1/2/3-byte character. + // If the buffer ends with a truncated 2- or 3-byte sequence, + // then we reduce the sourceLimit to before that, + // and collect the remaining bytes after the conversion loop. + { + // Do not go back into the bytes that will be read for finishing a partial + // sequence from the previous buffer. + int32_t length=(int32_t)(sourceLimit-source) - (toULimit-oldToULength); + if(length>0) { + uint8_t b1=*(sourceLimit-1); + if(U8_IS_SINGLE(b1)) { + // common ASCII character + } else if(U8_IS_TRAIL(b1) && length>=2) { + uint8_t b2=*(sourceLimit-2); + if(0xe0<=b2 && b2<0xf0 && U8_IS_VALID_LEAD3_AND_T1(b2, b1)) { + // truncated 3-byte sequence + sourceLimit-=2; + } + } else if(0xc2<=b1 && b1<0xf0) { + // truncated 2- or 3-byte sequence + --sourceLimit; + } + } + } + + if(c!=0 && targetCapacity>0) { + utf8->toUnicodeStatus=0; + utf8->toULength=0; + goto moreBytes; + /* + * Note: We could avoid the goto by duplicating some of the moreBytes + * code, but only up to the point of collecting a complete UTF-8 + * sequence; then recurse for the toUBytes[toULength] + * and then continue with normal conversion. + * + * If so, move this code to just after initializing the minimum + * set of local variables for reading the UTF-8 input + * (utf8, source, target, limits but not cnv, table, minValue, etc.). + * + * Potential advantages: + * - avoid the goto + * - oldToULength could become a local variable in just those code blocks + * that deal with buffer boundaries + * - possibly faster if the goto prevents some compiler optimizations + * (this would need measuring to confirm) + * Disadvantage: + * - code duplication + */ + } + + /* conversion loop */ + while(source<sourceLimit) { + if(targetCapacity>0) { + b=*source++; + if(U8_IS_SINGLE(b)) { + /* convert ASCII */ + if(IS_ASCII_ROUNDTRIP(b, asciiRoundtrips)) { + *target++=(uint8_t)b; + --targetCapacity; + continue; + } else { + c=b; + value=SBCS_RESULT_FROM_UTF8(sbcsIndex, results, 0, c); + } + } else { + if(b<0xe0) { + if( /* handle U+0080..U+07FF inline */ + b>=0xc2 && + (t1=(uint8_t)(*source-0x80)) <= 0x3f + ) { + c=b&0x1f; + ++source; + value=SBCS_RESULT_FROM_UTF8(sbcsIndex, results, c, t1); + if(value>=minValue) { + *target++=(uint8_t)value; + --targetCapacity; + continue; + } else { + c=(c<<6)|t1; + } + } else { + c=-1; + } + } else if(b==0xe0) { + if( /* handle U+0800..U+0FFF inline */ + (t1=(uint8_t)(source[0]-0x80)) <= 0x3f && t1 >= 0x20 && + (t2=(uint8_t)(source[1]-0x80)) <= 0x3f + ) { + c=t1; + source+=2; + value=SBCS_RESULT_FROM_UTF8(sbcsIndex, results, c, t2); + if(value>=minValue) { + *target++=(uint8_t)value; + --targetCapacity; + continue; + } else { + c=(c<<6)|t2; + } + } else { + c=-1; + } + } else { + c=-1; + } + + if(c<0) { + /* handle "complicated" and error cases, and continuing partial characters */ + oldToULength=0; + toULength=1; + toULimit=U8_COUNT_BYTES_NON_ASCII(b); + c=b; +moreBytes: + while(toULength<toULimit) { + /* + * The sourceLimit may have been adjusted before the conversion loop + * to stop before a truncated sequence. + * Here we need to use the real limit in case we have two truncated + * sequences at the end. + * See ticket #7492. + */ + if(source<(uint8_t *)pToUArgs->sourceLimit) { + b=*source; + if(icu::UTF8::isValidTrail(c, b, toULength, toULimit)) { + ++source; + ++toULength; + c=(c<<6)+b; + } else { + break; /* sequence too short, stop with toULength<toULimit */ + } + } else { + /* store the partial UTF-8 character, compatible with the regular UTF-8 converter */ + source-=(toULength-oldToULength); + while(oldToULength<toULength) { + utf8->toUBytes[oldToULength++]=*source++; + } + utf8->toUnicodeStatus=c; + utf8->toULength=toULength; + utf8->mode=toULimit; + pToUArgs->source=(char *)source; + pFromUArgs->target=(char *)target; + return; + } + } + + if(toULength==toULimit) { + c-=utf8_offsets[toULength]; + if(toULength<=3) { /* BMP */ + value=MBCS_SINGLE_RESULT_FROM_U(table, results, c); + } else { + /* supplementary code point */ + if(!hasSupplementary) { + /* BMP-only codepages are stored without stage 1 entries for supplementary code points */ + value=0; + } else { + value=MBCS_SINGLE_RESULT_FROM_U(table, results, c); + } + } + } else { + /* error handling: illegal UTF-8 byte sequence */ + source-=(toULength-oldToULength); + while(oldToULength<toULength) { + utf8->toUBytes[oldToULength++]=*source++; + } + utf8->toULength=toULength; + pToUArgs->source=(char *)source; + pFromUArgs->target=(char *)target; + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + return; + } + } + } + + if(value>=minValue) { + /* output the mapping for c */ + *target++=(uint8_t)value; + --targetCapacity; + } else { + /* value<minValue means c is unassigned (unmappable) */ + /* + * Try an extension mapping. + * Pass in no source because we don't have UTF-16 input. + * If we have a partial match on c, we will return and revert + * to UTF-8->UTF-16->charset conversion. + */ + static const UChar nul=0; + const UChar *noSource=&nul; + c=_extFromU(cnv, cnv->sharedData, + c, &noSource, noSource, + &target, target+targetCapacity, + NULL, -1, + pFromUArgs->flush, + pErrorCode); + + if(U_FAILURE(*pErrorCode)) { + /* not mappable or buffer overflow */ + cnv->fromUChar32=c; + break; + } else if(cnv->preFromUFirstCP>=0) { + /* + * Partial match, return and revert to pivoting. + * In normal from-UTF-16 conversion, we would just continue + * but then exit the loop because the extension match would + * have consumed the source. + */ + *pErrorCode=U_USING_DEFAULT_WARNING; + break; + } else { + /* a mapping was written to the target, continue */ + + /* recalculate the targetCapacity after an extension mapping */ + targetCapacity=(int32_t)(pFromUArgs->targetLimit-(char *)target); + } + } + } else { + /* target is full */ + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + break; + } + } + + /* + * The sourceLimit may have been adjusted before the conversion loop + * to stop before a truncated sequence. + * If so, then collect the truncated sequence now. + */ + if(U_SUCCESS(*pErrorCode) && + cnv->preFromUFirstCP<0 && + source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) { + c=utf8->toUBytes[0]=b=*source++; + toULength=1; + toULimit=U8_COUNT_BYTES(b); + while(source<sourceLimit) { + utf8->toUBytes[toULength++]=b=*source++; + c=(c<<6)+b; + } + utf8->toUnicodeStatus=c; + utf8->toULength=toULength; + utf8->mode=toULimit; + } + + /* write back the updated pointers */ + pToUArgs->source=(char *)source; + pFromUArgs->target=(char *)target; +} + +static void U_CALLCONV +ucnv_DBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs, + UConverterToUnicodeArgs *pToUArgs, + UErrorCode *pErrorCode) { + UConverter *utf8, *cnv; + const uint8_t *source, *sourceLimit; + uint8_t *target; + int32_t targetCapacity; + + const uint16_t *table, *mbcsIndex; + const uint16_t *results; + + int8_t oldToULength, toULength, toULimit; + + UChar32 c; + uint8_t b, t1, t2; + + uint32_t stage2Entry; + uint32_t asciiRoundtrips; + uint16_t value = 0; + UBool hasSupplementary; + + /* set up the local pointers */ + utf8=pToUArgs->converter; + cnv=pFromUArgs->converter; + source=(uint8_t *)pToUArgs->source; + sourceLimit=(uint8_t *)pToUArgs->sourceLimit; + target=(uint8_t *)pFromUArgs->target; + targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target); + + table=cnv->sharedData->mbcs.fromUnicodeTable; + mbcsIndex=cnv->sharedData->mbcs.mbcsIndex; + if((cnv->options&UCNV_OPTION_SWAP_LFNL)!=0) { + results=(uint16_t *)cnv->sharedData->mbcs.swapLFNLFromUnicodeBytes; + } else { + results=(uint16_t *)cnv->sharedData->mbcs.fromUnicodeBytes; + } + asciiRoundtrips=cnv->sharedData->mbcs.asciiRoundtrips; + + hasSupplementary=(UBool)(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY); + + /* get the converter state from the UTF-8 UConverter */ + if(utf8->toULength > 0) { + toULength=oldToULength=utf8->toULength; + toULimit=(int8_t)utf8->mode; + c=(UChar32)utf8->toUnicodeStatus; + } else { + toULength=oldToULength=toULimit=0; + c = 0; + } + + // The conversion loop checks source<sourceLimit only once per 1/2/3-byte character. + // If the buffer ends with a truncated 2- or 3-byte sequence, + // then we reduce the sourceLimit to before that, + // and collect the remaining bytes after the conversion loop. + { + // Do not go back into the bytes that will be read for finishing a partial + // sequence from the previous buffer. + int32_t length=(int32_t)(sourceLimit-source) - (toULimit-oldToULength); + if(length>0) { + uint8_t b1=*(sourceLimit-1); + if(U8_IS_SINGLE(b1)) { + // common ASCII character + } else if(U8_IS_TRAIL(b1) && length>=2) { + uint8_t b2=*(sourceLimit-2); + if(0xe0<=b2 && b2<0xf0 && U8_IS_VALID_LEAD3_AND_T1(b2, b1)) { + // truncated 3-byte sequence + sourceLimit-=2; + } + } else if(0xc2<=b1 && b1<0xf0) { + // truncated 2- or 3-byte sequence + --sourceLimit; + } + } + } + + if(c!=0 && targetCapacity>0) { + utf8->toUnicodeStatus=0; + utf8->toULength=0; + goto moreBytes; + /* See note in ucnv_SBCSFromUTF8() about this goto. */ + } + + /* conversion loop */ + while(source<sourceLimit) { + if(targetCapacity>0) { + b=*source++; + if(U8_IS_SINGLE(b)) { + /* convert ASCII */ + if(IS_ASCII_ROUNDTRIP(b, asciiRoundtrips)) { + *target++=b; + --targetCapacity; + continue; + } else { + value=DBCS_RESULT_FROM_UTF8(mbcsIndex, results, 0, b); + if(value==0) { + c=b; + goto unassigned; + } + } + } else { + if(b>=0xe0) { + if( /* handle U+0800..U+D7FF inline */ + b<=0xed && // do not assume maxFastUChar>0xd7ff + U8_IS_VALID_LEAD3_AND_T1(b, t1=source[0]) && + (t2=(uint8_t)(source[1]-0x80)) <= 0x3f + ) { + c=((b&0xf)<<6)|(t1&0x3f); + source+=2; + value=DBCS_RESULT_FROM_UTF8(mbcsIndex, results, c, t2); + if(value==0) { + c=(c<<6)|t2; + goto unassigned; + } + } else { + c=-1; + } + } else { + if( /* handle U+0080..U+07FF inline */ + b>=0xc2 && + (t1=(uint8_t)(*source-0x80)) <= 0x3f + ) { + c=b&0x1f; + ++source; + value=DBCS_RESULT_FROM_UTF8(mbcsIndex, results, c, t1); + if(value==0) { + c=(c<<6)|t1; + goto unassigned; + } + } else { + c=-1; + } + } + + if(c<0) { + /* handle "complicated" and error cases, and continuing partial characters */ + oldToULength=0; + toULength=1; + toULimit=U8_COUNT_BYTES_NON_ASCII(b); + c=b; +moreBytes: + while(toULength<toULimit) { + /* + * The sourceLimit may have been adjusted before the conversion loop + * to stop before a truncated sequence. + * Here we need to use the real limit in case we have two truncated + * sequences at the end. + * See ticket #7492. + */ + if(source<(uint8_t *)pToUArgs->sourceLimit) { + b=*source; + if(icu::UTF8::isValidTrail(c, b, toULength, toULimit)) { + ++source; + ++toULength; + c=(c<<6)+b; + } else { + break; /* sequence too short, stop with toULength<toULimit */ + } + } else { + /* store the partial UTF-8 character, compatible with the regular UTF-8 converter */ + source-=(toULength-oldToULength); + while(oldToULength<toULength) { + utf8->toUBytes[oldToULength++]=*source++; + } + utf8->toUnicodeStatus=c; + utf8->toULength=toULength; + utf8->mode=toULimit; + pToUArgs->source=(char *)source; + pFromUArgs->target=(char *)target; + return; + } + } + + if(toULength==toULimit) { + c-=utf8_offsets[toULength]; + if(toULength<=3) { /* BMP */ + stage2Entry=MBCS_STAGE_2_FROM_U(table, c); + } else { + /* supplementary code point */ + if(!hasSupplementary) { + /* BMP-only codepages are stored without stage 1 entries for supplementary code points */ + stage2Entry=0; + } else { + stage2Entry=MBCS_STAGE_2_FROM_U(table, c); + } + } + } else { + /* error handling: illegal UTF-8 byte sequence */ + source-=(toULength-oldToULength); + while(oldToULength<toULength) { + utf8->toUBytes[oldToULength++]=*source++; + } + utf8->toULength=toULength; + pToUArgs->source=(char *)source; + pFromUArgs->target=(char *)target; + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + return; + } + + /* get the bytes and the length for the output */ + /* MBCS_OUTPUT_2 */ + value=MBCS_VALUE_2_FROM_STAGE_2(results, stage2Entry, c); + + /* is this code point assigned, or do we use fallbacks? */ + if(!(MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c) || + (UCNV_FROM_U_USE_FALLBACK(cnv, c) && value!=0)) + ) { + goto unassigned; + } + } + } + + /* write the output character bytes from value and length */ + /* from the first if in the loop we know that targetCapacity>0 */ + if(value<=0xff) { + /* this is easy because we know that there is enough space */ + *target++=(uint8_t)value; + --targetCapacity; + } else /* length==2 */ { + *target++=(uint8_t)(value>>8); + if(2<=targetCapacity) { + *target++=(uint8_t)value; + targetCapacity-=2; + } else { + cnv->charErrorBuffer[0]=(char)value; + cnv->charErrorBufferLength=1; + + /* target overflow */ + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + break; + } + } + continue; + +unassigned: + { + /* + * Try an extension mapping. + * Pass in no source because we don't have UTF-16 input. + * If we have a partial match on c, we will return and revert + * to UTF-8->UTF-16->charset conversion. + */ + static const UChar nul=0; + const UChar *noSource=&nul; + c=_extFromU(cnv, cnv->sharedData, + c, &noSource, noSource, + &target, target+targetCapacity, + NULL, -1, + pFromUArgs->flush, + pErrorCode); + + if(U_FAILURE(*pErrorCode)) { + /* not mappable or buffer overflow */ + cnv->fromUChar32=c; + break; + } else if(cnv->preFromUFirstCP>=0) { + /* + * Partial match, return and revert to pivoting. + * In normal from-UTF-16 conversion, we would just continue + * but then exit the loop because the extension match would + * have consumed the source. + */ + *pErrorCode=U_USING_DEFAULT_WARNING; + break; + } else { + /* a mapping was written to the target, continue */ + + /* recalculate the targetCapacity after an extension mapping */ + targetCapacity=(int32_t)(pFromUArgs->targetLimit-(char *)target); + continue; + } + } + } else { + /* target is full */ + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + break; + } + } + + /* + * The sourceLimit may have been adjusted before the conversion loop + * to stop before a truncated sequence. + * If so, then collect the truncated sequence now. + */ + if(U_SUCCESS(*pErrorCode) && + cnv->preFromUFirstCP<0 && + source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) { + c=utf8->toUBytes[0]=b=*source++; + toULength=1; + toULimit=U8_COUNT_BYTES(b); + while(source<sourceLimit) { + utf8->toUBytes[toULength++]=b=*source++; + c=(c<<6)+b; + } + utf8->toUnicodeStatus=c; + utf8->toULength=toULength; + utf8->mode=toULimit; + } + + /* write back the updated pointers */ + pToUArgs->source=(char *)source; + pFromUArgs->target=(char *)target; +} + +/* miscellaneous ------------------------------------------------------------ */ + +static void U_CALLCONV +ucnv_MBCSGetStarters(const UConverter* cnv, + UBool starters[256], + UErrorCode *) { + const int32_t *state0; + int i; + + state0=cnv->sharedData->mbcs.stateTable[cnv->sharedData->mbcs.dbcsOnlyState]; + for(i=0; i<256; ++i) { + /* all bytes that cause a state transition from state 0 are lead bytes */ + starters[i]= (UBool)MBCS_ENTRY_IS_TRANSITION(state0[i]); + } +} + +/* + * This is an internal function that allows other converter implementations + * to check whether a byte is a lead byte. + */ +U_CFUNC UBool +ucnv_MBCSIsLeadByte(UConverterSharedData *sharedData, char byte) { + return (UBool)MBCS_ENTRY_IS_TRANSITION(sharedData->mbcs.stateTable[0][(uint8_t)byte]); +} + +static void U_CALLCONV +ucnv_MBCSWriteSub(UConverterFromUnicodeArgs *pArgs, + int32_t offsetIndex, + UErrorCode *pErrorCode) { + UConverter *cnv=pArgs->converter; + char *p, *subchar; + char buffer[4]; + int32_t length; + + /* first, select between subChar and subChar1 */ + if( cnv->subChar1!=0 && + (cnv->sharedData->mbcs.extIndexes!=NULL ? + cnv->useSubChar1 : + (cnv->invalidUCharBuffer[0]<=0xff)) + ) { + /* select subChar1 if it is set (not 0) and the unmappable Unicode code point is up to U+00ff (IBM MBCS behavior) */ + subchar=(char *)&cnv->subChar1; + length=1; + } else { + /* select subChar in all other cases */ + subchar=(char *)cnv->subChars; + length=cnv->subCharLen; + } + + /* reset the selector for the next code point */ + cnv->useSubChar1=FALSE; + + if (cnv->sharedData->mbcs.outputType == MBCS_OUTPUT_2_SISO) { + p=buffer; + + /* fromUnicodeStatus contains prevLength */ + switch(length) { + case 1: + if(cnv->fromUnicodeStatus==2) { + /* DBCS mode and SBCS sub char: change to SBCS */ + cnv->fromUnicodeStatus=1; + *p++=UCNV_SI; + } + *p++=subchar[0]; + break; + case 2: + if(cnv->fromUnicodeStatus<=1) { + /* SBCS mode and DBCS sub char: change to DBCS */ + cnv->fromUnicodeStatus=2; + *p++=UCNV_SO; + } + *p++=subchar[0]; + *p++=subchar[1]; + break; + default: + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return; + } + subchar=buffer; + length=(int32_t)(p-buffer); + } + + ucnv_cbFromUWriteBytes(pArgs, subchar, length, offsetIndex, pErrorCode); +} + +U_CFUNC UConverterType +ucnv_MBCSGetType(const UConverter* converter) { + /* SBCS, DBCS, and EBCDIC_STATEFUL are replaced by MBCS, but here we cheat a little */ + if(converter->sharedData->mbcs.countStates==1) { + return (UConverterType)UCNV_SBCS; + } else if((converter->sharedData->mbcs.outputType&0xff)==MBCS_OUTPUT_2_SISO) { + return (UConverterType)UCNV_EBCDIC_STATEFUL; + } else if(converter->sharedData->staticData->minBytesPerChar==2 && converter->sharedData->staticData->maxBytesPerChar==2) { + return (UConverterType)UCNV_DBCS; + } + return (UConverterType)UCNV_MBCS; +} + +#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ diff --git a/thirdparty/icu4c/common/ucnvmbcs.h b/thirdparty/icu4c/common/ucnvmbcs.h new file mode 100644 index 0000000000..c8f3b89a5e --- /dev/null +++ b/thirdparty/icu4c/common/ucnvmbcs.h @@ -0,0 +1,605 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 2000-2013, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* file name: ucnvmbcs.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2000jul07 +* created by: Markus W. Scherer +*/ + +#ifndef __UCNVMBCS_H__ +#define __UCNVMBCS_H__ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_CONVERSION + +#include "unicode/ucnv.h" +#include "ucnv_cnv.h" +#include "ucnv_ext.h" + +/** + * ICU conversion (.cnv) data file structure, following the usual UDataInfo + * header. + * + * Format version: 6.2 + * + * struct UConverterStaticData -- struct containing the converter name, IBM CCSID, + * min/max bytes per character, etc. + * see ucnv_bld.h + * + * -------------------- + * + * The static data is followed by conversionType-specific data structures. + * At the moment, there are only variations of MBCS converters. They all have + * the same toUnicode structures, while the fromUnicode structures for SBCS + * differ from those for other MBCS-style converters. + * + * _MBCSHeader.version 5 is optional and not backward-compatible + * (as usual for changes in the major version field). + * + * Versions 5.m work like versions 4.m except: + * - The _MBCSHeader has variable length (and is always longer than in version 4). + * See the struct _MBCSHeader further description below. + * - There is a set of flags which indicate further incompatible changes. + * (Reader code must reject the file if it does not recognize them all.) + * - In particular, one of these flags indicates that most of the fromUnicode + * data is missing and must be reconstituted from the toUnicode data + * and from the utf8Friendly mbcsIndex at load time. + * (This only works with a utf8Friendly table.) + * In this case, makeconv may increase maxFastUChar automatically to U+FFFF. + * + * The first of these versions is 5.3, which is like 4.3 except for the differences above. + * + * When possible, makeconv continues to generate version 4.m files. + * + * _MBCSHeader.version 5.4/4.4 supports "good one-way" mappings (|4) + * in the extension tables (fromUTableValues bit 30). See ucnv_ext.h for details. + * + * _MBCSHeader.version 4.3 optionally modifies the fromUnicode data structures + * slightly and optionally adds a table for conversion to MBCS (non-SBCS) + * charsets. + * + * The modifications are to make the data utf8Friendly. Not every 4.3 file + * file contains utf8Friendly data. + * It is utf8Friendly if _MBCSHeader.version[2]!=0. + * In this case, the data structures are utf8Friendly up to the code point + * maxFastUChar=((_MBCSHeader.version[2]<<8)|0xff) + * + * A utf8Friendly file has fromUnicode stage 3 entries for code points up to + * maxFastUChar allocated in blocks of 64 for indexing with the 6 bits from + * a UTF-8 trail byte. ASCII is allocated linearly with 128 contiguous entries. + * + * In addition, a utf8Friendly MBCS file contains an additional + * uint16_t mbcsIndex[(maxFastUChar+1)>>6]; + * which replaces the stage 1 and 2 tables for indexing with bits from the + * UTF-8 lead byte and middle trail byte. Unlike the older MBCS stage 2 table, + * the mbcsIndex does not contain roundtrip flags. Therefore, all fallbacks + * from code points up to maxFastUChar (and roundtrips to 0x00) are moved to + * the extension data structure. This also allows for faster roundtrip + * conversion from UTF-16. + * + * SBCS files do not contain an additional sbcsIndex[] array because the + * proportional size increase would be noticeable, but the runtime + * code builds one for the code point range for which the runtime conversion + * code is optimized. + * + * For SBCS, maxFastUChar should be at least U+0FFF. The initial makeconv + * implementation sets it to U+1FFF. Because the sbcsIndex is not stored in + * the file, a larger maxFastUChar only affects stage 3 block allocation size + * and is free in empty blocks. (Larger blocks with sparse contents cause larger + * files.) U+1FFF includes almost all of the small scripts. + * U+0FFF covers UTF-8 two-byte sequences and three-byte sequences starting with + * 0xe0. This includes most scripts with legacy SBCS charsets. + * The initial runtime implementation using 4.3 files only builds an sbcsIndex + * for code points up to U+0FFF. + * + * For MBCS, maxFastUChar should be at least U+D7FF (=initial value). + * This boundary is convenient because practically all of the commonly used + * characters are below it, and because it is the boundary to surrogate + * code points, above which special handling is necessary anyway. + * (Surrogate pair assembly for UTF-16, validity checking for UTF-8.) + * + * maxFastUChar could be up to U+FFFF to cover the whole BMP, which could be + * useful especially for conversion from UTF-8 when the input can be assumed + * to be valid, because the surrogate range would then not have to be + * checked. + * (With maxFastUChar=0xffff, makeconv would have to check for mbcsIndex value + * overflow because with the all-unassigned block 0 and nearly full mappings + * from the BMP it is theoretically possible that an index into stage 3 + * exceeds 16 bits.) + * + * _MBCSHeader.version 4.2 adds an optional conversion extension data structure. + * If it is present, then an ICU version reading header versions 4.0 or 4.1 + * will be able to use the base table and ignore the extension. + * + * The unicodeMask in the static data is part of the base table data structure. + * Especially, the UCNV_HAS_SUPPLEMENTARY flag determines the length of the + * fromUnicode stage 1 array. + * The static data unicodeMask refers only to the base table's properties if + * a base table is included. + * In an extension-only file, the static data unicodeMask is 0. + * The extension data indexes have a separate field with the unicodeMask flags. + * + * MBCS-style data structure following the static data. + * Offsets are counted in bytes from the beginning of the MBCS header structure. + * Details about usage in comments in ucnvmbcs.c. + * + * struct _MBCSHeader (see the definition in this header file below) + * contains 32-bit fields as follows: + * 8 values: + * 0 uint8_t[4] MBCS version in UVersionInfo format (currently 4.3.x.0) + * 1 uint32_t countStates + * 2 uint32_t countToUFallbacks + * 3 uint32_t offsetToUCodeUnits + * 4 uint32_t offsetFromUTable + * 5 uint32_t offsetFromUBytes + * 6 uint32_t flags, bits: + * 31.. 8 offsetExtension -- _MBCSHeader.version 4.2 (ICU 2.8) and higher + * 0 for older versions and if + * there is not extension structure + * 7.. 0 outputType + * 7 uint32_t fromUBytesLength -- _MBCSHeader.version 4.1 (ICU 2.4) and higher + * counts bytes in fromUBytes[] + * + * New and required in version 5: + * 8 uint32_t options, bits: + * 31..16 reserved for flags that can be added without breaking + * backward compatibility + * 15.. 6 reserved for flags whose addition will break + * backward compatibility + * 6 MBCS_OPT_FROM_U -- if set, + * then most of the fromUnicode data is omitted; + * fullStage2Length is present and the missing + * bottom part of stage 2 must be reconstituted from + * the toUnicode data; + * stage 3 is missing completely as well; + * not used for SBCS tables + * 5.. 0 length of the _MBCSHeader (number of uint32_t) + * + * New and optional in version 5: + * 9 uint32_t fullStage2Length: used if MBCS_OPT_FROM_U is set + * specifies the full length of stage 2 + * including the omitted part + * + * if(outputType==MBCS_OUTPUT_EXT_ONLY) { + * -- base table name for extension-only table + * char baseTableName[variable]; -- with NUL plus padding for 4-alignment + * + * -- all _MBCSHeader fields except for version and flags are 0 + * } else { + * -- normal base table with optional extension + * + * int32_t stateTable[countStates][256]; + * + * struct _MBCSToUFallback { (fallbacks are sorted by offset) + * uint32_t offset; + * UChar32 codePoint; + * } toUFallbacks[countToUFallbacks]; + * + * uint16_t unicodeCodeUnits[(offsetFromUTable-offsetToUCodeUnits)/2]; + * (padded to an even number of units) + * + * -- stage 1 tables + * if(staticData.unicodeMask&UCNV_HAS_SUPPLEMENTARY) { + * -- stage 1 table for all of Unicode + * uint16_t fromUTable[0x440]; (32-bit-aligned) + * } else { + * -- BMP-only tables have a smaller stage 1 table + * uint16_t fromUTable[0x40]; (32-bit-aligned) + * } + * + * -- stage 2 tables + * length determined by top of stage 1 and bottom of stage 3 tables + * if(outputType==MBCS_OUTPUT_1) { + * -- SBCS: pure indexes + * uint16_t stage 2 indexes[?]; + * } else { + * -- DBCS, MBCS, EBCDIC_STATEFUL, ...: roundtrip flags and indexes + * uint32_t stage 2 flags and indexes[?]; + * if(options&MBCS_OPT_NO_FROM_U) { + * stage 2 really has length fullStage2Length + * and the omitted lower part must be reconstituted from + * the toUnicode data + * } + * } + * + * -- stage 3 tables with byte results + * if(outputType==MBCS_OUTPUT_1) { + * -- SBCS: each 16-bit result contains flags and the result byte, see ucnvmbcs.c + * uint16_t fromUBytes[fromUBytesLength/2]; + * } else if(!(options&MBCS_OPT_NO_FROM_U)) { + * -- DBCS, MBCS, EBCDIC_STATEFUL, ... 2/3/4 bytes result, see ucnvmbcs.c + * uint8_t fromUBytes[fromUBytesLength]; or + * uint16_t fromUBytes[fromUBytesLength/2]; or + * uint32_t fromUBytes[fromUBytesLength/4]; + * } else { + * fromUBytes[] must be reconstituted from the toUnicode data + * } + * + * -- optional utf8Friendly mbcsIndex -- _MBCSHeader.version 4.3 (ICU 3.8) and higher + * if(outputType!=MBCS_OUTPUT_1 && + * _MBCSHeader.version[1]>=3 && + * (maxFastUChar=_MBCSHeader.version[2])!=0 + * ) { + * maxFastUChar=(maxFastUChar<<8)|0xff; + * uint16_t mbcsIndex[(maxFastUChar+1)>>6]; + * } + * } + * + * -- extension table, details see ucnv_ext.h + * int32_t indexes[>=32]; ... + */ + +/* MBCS converter data and state -------------------------------------------- */ + +enum { + MBCS_MAX_STATE_COUNT=128 +}; + +/** + * MBCS action codes for conversions to Unicode. + * These values are in bits 23..20 of the state table entries. + */ +enum { + MBCS_STATE_VALID_DIRECT_16, + MBCS_STATE_VALID_DIRECT_20, + + MBCS_STATE_FALLBACK_DIRECT_16, + MBCS_STATE_FALLBACK_DIRECT_20, + + MBCS_STATE_VALID_16, + MBCS_STATE_VALID_16_PAIR, + + MBCS_STATE_UNASSIGNED, + MBCS_STATE_ILLEGAL, + + MBCS_STATE_CHANGE_ONLY +}; + +/* Macros for state table entries */ +#define MBCS_ENTRY_TRANSITION(state, offset) (int32_t)(((int32_t)(state)<<24L)|(offset)) +#define MBCS_ENTRY_TRANSITION_SET_OFFSET(entry, offset) (int32_t)(((entry)&0xff000000)|(offset)) +#define MBCS_ENTRY_TRANSITION_ADD_OFFSET(entry, offset) (int32_t)((entry)+(offset)) + +#define MBCS_ENTRY_FINAL(state, action, value) (int32_t)(0x80000000|((int32_t)(state)<<24L)|((action)<<20L)|(value)) +#define MBCS_ENTRY_SET_FINAL(entry) (int32_t)((entry)|0x80000000) +#define MBCS_ENTRY_FINAL_SET_ACTION(entry, action) (int32_t)(((entry)&0xff0fffff)|((int32_t)(action)<<20L)) +#define MBCS_ENTRY_FINAL_SET_VALUE(entry, value) (int32_t)(((entry)&0xfff00000)|(value)) +#define MBCS_ENTRY_FINAL_SET_ACTION_VALUE(entry, action, value) (int32_t)(((entry)&0xff000000)|((int32_t)(action)<<20L)|(value)) + +#define MBCS_ENTRY_SET_STATE(entry, state) (int32_t)(((entry)&0x80ffffff)|((int32_t)(state)<<24L)) + +#define MBCS_ENTRY_STATE(entry) ((((uint32_t)entry)>>24)&0x7f) + +#define MBCS_ENTRY_IS_TRANSITION(entry) ((entry)>=0) +#define MBCS_ENTRY_IS_FINAL(entry) ((entry)<0) + +#define MBCS_ENTRY_TRANSITION_STATE(entry) (((uint32_t)entry)>>24) +#define MBCS_ENTRY_TRANSITION_OFFSET(entry) ((entry)&0xffffff) + +#define MBCS_ENTRY_FINAL_STATE(entry) ((((uint32_t)entry)>>24)&0x7f) +#define MBCS_ENTRY_FINAL_IS_VALID_DIRECT_16(entry) ((entry)<(int32_t)0x80100000) +#define MBCS_ENTRY_FINAL_ACTION(entry) ((((uint32_t)entry)>>20)&0xf) +#define MBCS_ENTRY_FINAL_VALUE(entry) ((entry)&0xfffff) +#define MBCS_ENTRY_FINAL_VALUE_16(entry) (uint16_t)(entry) + +#define IS_ASCII_ROUNDTRIP(b, asciiRoundtrips) (((asciiRoundtrips) & (1<<((b)>>2)))!=0) + +/* single-byte fromUnicode: get the 16-bit result word */ +#define MBCS_SINGLE_RESULT_FROM_U(table, results, c) (results)[ (table)[ (table)[(c)>>10] +(((c)>>4)&0x3f) ] +((c)&0xf) ] + +/* single-byte fromUnicode using the sbcsIndex */ +#define SBCS_RESULT_FROM_LOW_BMP(table, results, c) (results)[ (table)[(c)>>6] +((c)&0x3f) ] + +/* single-byte fromUTF8 using the sbcsIndex; l and t must be masked externally; can be l=0 and t<=0x7f */ +#define SBCS_RESULT_FROM_UTF8(table, results, l, t) (results)[ (table)[l] +(t) ] + +/* multi-byte fromUnicode: get the 32-bit stage 2 entry */ +#define MBCS_STAGE_2_FROM_U(table, c) ((const uint32_t *)(table))[ (table)[(c)>>10] +(((c)>>4)&0x3f) ] +#define MBCS_FROM_U_IS_ROUNDTRIP(stage2Entry, c) ( ((stage2Entry) & ((uint32_t)1<< (16+((c)&0xf)) )) !=0) + +#define MBCS_VALUE_2_FROM_STAGE_2(bytes, stage2Entry, c) ((uint16_t *)(bytes))[16*(uint32_t)(uint16_t)(stage2Entry)+((c)&0xf)] +#define MBCS_VALUE_4_FROM_STAGE_2(bytes, stage2Entry, c) ((uint32_t *)(bytes))[16*(uint32_t)(uint16_t)(stage2Entry)+((c)&0xf)] + +#define MBCS_POINTER_3_FROM_STAGE_2(bytes, stage2Entry, c) ((bytes)+(16*(uint32_t)(uint16_t)(stage2Entry)+((c)&0xf))*3) + +/* double-byte fromUnicode using the mbcsIndex */ +#define DBCS_RESULT_FROM_MOST_BMP(table, results, c) (results)[ (table)[(c)>>6] +((c)&0x3f) ] + +/* double-byte fromUTF8 using the mbcsIndex; l and t1 combined into lt1; lt1 and t2 must be masked externally */ +#define DBCS_RESULT_FROM_UTF8(table, results, lt1, t2) (results)[ (table)[lt1] +(t2) ] + + +/** + * MBCS output types for conversions from Unicode. + * These per-converter types determine the storage method in stage 3 of the lookup table, + * mostly how many bytes are stored per entry. + */ +enum { + MBCS_OUTPUT_1, /* 0 */ + MBCS_OUTPUT_2, /* 1 */ + MBCS_OUTPUT_3, /* 2 */ + MBCS_OUTPUT_4, /* 3 */ + + MBCS_OUTPUT_3_EUC=8, /* 8 */ + MBCS_OUTPUT_4_EUC, /* 9 */ + + MBCS_OUTPUT_2_SISO=12, /* c */ + MBCS_OUTPUT_2_HZ, /* d */ + + MBCS_OUTPUT_EXT_ONLY, /* e */ + + MBCS_OUTPUT_COUNT, + + MBCS_OUTPUT_DBCS_ONLY=0xdb /* runtime-only type for DBCS-only handling of SISO tables */ +}; + +/** + * Fallbacks to Unicode are stored outside the normal state table and code point structures + * in a vector of items of this type. They are sorted by offset. + */ +typedef struct { + uint32_t offset; + UChar32 codePoint; +} _MBCSToUFallback; + +/** Constants for fast and UTF-8-friendly conversion. */ +enum { + SBCS_FAST_MAX=0x0fff, /* maximum code point with UTF-8-friendly SBCS runtime code, see makeconv SBCS_UTF8_MAX */ + SBCS_FAST_LIMIT=SBCS_FAST_MAX+1, /* =0x1000 */ + MBCS_FAST_MAX=0xd7ff, /* maximum code point with UTF-8-friendly MBCS runtime code, see makeconv MBCS_UTF8_MAX */ + MBCS_FAST_LIMIT=MBCS_FAST_MAX+1 /* =0xd800 */ +}; + +/** + * This is the MBCS part of the UConverterTable union (a runtime data structure). + * It keeps all the per-converter data and points into the loaded mapping tables. + * + * utf8Friendly data structures added with _MBCSHeader.version 4.3 + */ +typedef struct UConverterMBCSTable { + /* toUnicode */ + uint8_t countStates, dbcsOnlyState, stateTableOwned; + uint32_t countToUFallbacks; + + const int32_t (*stateTable)/*[countStates]*/[256]; + int32_t (*swapLFNLStateTable)/*[countStates]*/[256]; /* for swaplfnl */ + const uint16_t *unicodeCodeUnits/*[countUnicodeResults]*/; + const _MBCSToUFallback *toUFallbacks; + + /* fromUnicode */ + const uint16_t *fromUnicodeTable; + const uint16_t *mbcsIndex; /* for fast conversion from most of BMP to MBCS (utf8Friendly data) */ + uint16_t sbcsIndex[SBCS_FAST_LIMIT>>6]; /* for fast conversion from low BMP to SBCS (utf8Friendly data) */ + const uint8_t *fromUnicodeBytes; + uint8_t *swapLFNLFromUnicodeBytes; /* for swaplfnl */ + uint32_t fromUBytesLength; + uint8_t outputType, unicodeMask; + UBool utf8Friendly; /* for utf8Friendly data */ + UChar maxFastUChar; /* for utf8Friendly data */ + + /* roundtrips */ + uint32_t asciiRoundtrips; + + /* reconstituted data that was omitted from the .cnv file */ + uint8_t *reconstitutedData; + + /* converter name for swaplfnl */ + char *swapLFNLName; + + /* extension data */ + struct UConverterSharedData *baseSharedData; + const int32_t *extIndexes; +} UConverterMBCSTable; + +#define UCNV_MBCS_TABLE_INITIALIZER { \ + /* toUnicode */ \ + 0, 0, 0, \ + 0, \ + \ + NULL, \ + NULL, \ + NULL, \ + NULL, \ + \ + /* fromUnicode */ \ + NULL, \ + NULL, \ + { 0 }, \ + NULL, \ + NULL, \ + 0, \ + 0, 0, \ + false, \ + 0, \ + \ + /* roundtrips */ \ + 0, \ + \ + /* reconstituted data that was omitted from the .cnv file */ \ + NULL, \ + \ + /* converter name for swaplfnl */ \ + NULL, \ + \ + /* extension data */ \ + NULL, \ + NULL \ +} + +enum { + MBCS_OPT_LENGTH_MASK=0x3f, + MBCS_OPT_NO_FROM_U=0x40, + /* + * If any of the following options bits are set, + * then the file must be rejected. + */ + MBCS_OPT_INCOMPATIBLE_MASK=0xffc0, + /* + * Remove bits from this mask as more options are recognized + * by all implementations that use this constant. + */ + MBCS_OPT_UNKNOWN_INCOMPATIBLE_MASK=0xff80 +}; + +enum { + MBCS_HEADER_V4_LENGTH=8, + MBCS_HEADER_V5_MIN_LENGTH=9 +}; + +/** + * MBCS data header. See data format description above. + */ +typedef struct { + UVersionInfo version; + uint32_t countStates, + countToUFallbacks, + offsetToUCodeUnits, + offsetFromUTable, + offsetFromUBytes, + flags, + fromUBytesLength; + + /* new and required in version 5 */ + uint32_t options; + + /* new and optional in version 5; used if options&MBCS_OPT_NO_FROM_U */ + uint32_t fullStage2Length; /* number of 32-bit units */ +} _MBCSHeader; + +#define UCNV_MBCS_HEADER_INITIALIZER { { 0 }, 0, 0, 0, 0, 0, 0, 0, 0, 0 } + +/* + * This is a simple version of _MBCSGetNextUChar() that is used + * by other converter implementations. + * It only returns an "assigned" result if it consumes the entire input. + * It does not use state from the converter, nor error codes. + * It does not handle the EBCDIC swaplfnl option (set in UConverter). + * It handles conversion extensions but not GB 18030. + * + * Return value: + * U+fffe unassigned + * U+ffff illegal + * otherwise the Unicode code point + */ +U_CFUNC UChar32 +ucnv_MBCSSimpleGetNextUChar(UConverterSharedData *sharedData, + const char *source, int32_t length, + UBool useFallback); + +/** + * This version of _MBCSSimpleGetNextUChar() is optimized for single-byte, single-state codepages. + * It does not handle the EBCDIC swaplfnl option (set in UConverter). + * It does not handle conversion extensions (_extToU()). + */ +U_CFUNC UChar32 +ucnv_MBCSSingleSimpleGetNextUChar(UConverterSharedData *sharedData, + uint8_t b, UBool useFallback); + +/** + * This macro version of _MBCSSingleSimpleGetNextUChar() gets a code point from a byte. + * It works for single-byte, single-state codepages that only map + * to and from BMP code points, and it always + * returns fallback values. + */ +#define _MBCS_SINGLE_SIMPLE_GET_NEXT_BMP(sharedData, b) \ + (UChar)MBCS_ENTRY_FINAL_VALUE_16((sharedData)->mbcs.stateTable[0][(uint8_t)(b)]) + +/** + * This is an internal function that allows other converter implementations + * to check whether a byte is a lead byte. + */ +U_CFUNC UBool +ucnv_MBCSIsLeadByte(UConverterSharedData *sharedData, char byte); + +/** This is a macro version of _MBCSIsLeadByte(). */ +#define _MBCS_IS_LEAD_BYTE(sharedData, byte) \ + (UBool)MBCS_ENTRY_IS_TRANSITION((sharedData)->mbcs.stateTable[0][(uint8_t)(byte)]) + +/* + * This is another simple conversion function for internal use by other + * conversion implementations. + * It does not use the converter state nor call callbacks. + * It does not handle the EBCDIC swaplfnl option (set in UConverter). + * It handles conversion extensions but not GB 18030. + * + * It converts one single Unicode code point into codepage bytes, encoded + * as one 32-bit value. The function returns the number of bytes in *pValue: + * 1..4 the number of bytes in *pValue + * 0 unassigned (*pValue undefined) + * -1 illegal (currently not used, *pValue undefined) + * + * *pValue will contain the resulting bytes with the last byte in bits 7..0, + * the second to last byte in bits 15..8, etc. + * Currently, the function assumes but does not check that 0<=c<=0x10ffff. + */ +U_CFUNC int32_t +ucnv_MBCSFromUChar32(UConverterSharedData *sharedData, + UChar32 c, uint32_t *pValue, + UBool useFallback); + +/** + * This version of _MBCSFromUChar32() is optimized for single-byte codepages. + * It does not handle the EBCDIC swaplfnl option (set in UConverter). + * + * It returns the codepage byte for the code point, or -1 if it is unassigned. + */ +U_CFUNC int32_t +ucnv_MBCSSingleFromUChar32(UConverterSharedData *sharedData, + UChar32 c, + UBool useFallback); + +/** + * SBCS, DBCS, and EBCDIC_STATEFUL are replaced by MBCS, but + * we cheat a little about the type, returning the old types if appropriate. + */ +U_CFUNC UConverterType +ucnv_MBCSGetType(const UConverter* converter); + +U_CFUNC void +ucnv_MBCSFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, + UErrorCode *pErrorCode); +U_CFUNC void +ucnv_MBCSToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, + UErrorCode *pErrorCode); + +/* + * Internal function returning a UnicodeSet for toUnicode() conversion. + * Currently only used for ISO-2022-CN, and only handles roundtrip mappings. + * In the future, if we add support for fallback sets, this function + * needs to be updated. + * Handles extensions. + * Does not empty the set first. + */ +U_CFUNC void +ucnv_MBCSGetUnicodeSetForUnicode(const UConverterSharedData *sharedData, + const USetAdder *sa, + UConverterUnicodeSet which, + UErrorCode *pErrorCode); + +/* + * Same as ucnv_MBCSGetUnicodeSetForUnicode() but + * the set can be filtered by encoding scheme. + * Used by stateful converters which share regular conversion tables + * but only use a subset of their mappings. + */ +U_CFUNC void +ucnv_MBCSGetFilteredUnicodeSetForUnicode(const UConverterSharedData *sharedData, + const USetAdder *sa, + UConverterUnicodeSet which, + UConverterSetFilter filter, + UErrorCode *pErrorCode); + +#endif + +#endif diff --git a/thirdparty/icu4c/common/ucnvscsu.cpp b/thirdparty/icu4c/common/ucnvscsu.cpp new file mode 100644 index 0000000000..74b5722b97 --- /dev/null +++ b/thirdparty/icu4c/common/ucnvscsu.cpp @@ -0,0 +1,2045 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 2000-2016, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* file name: ucnvscsu.c +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2000nov18 +* created by: Markus W. Scherer +* +* This is an implementation of the Standard Compression Scheme for Unicode +* as defined in http://www.unicode.org/unicode/reports/tr6/ . +* Reserved commands and window settings are treated as illegal sequences and +* will result in callback calls. +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION + +#include "unicode/ucnv.h" +#include "unicode/ucnv_cb.h" +#include "unicode/utf16.h" +#include "ucnv_bld.h" +#include "ucnv_cnv.h" +#include "cmemory.h" + +/* SCSU definitions --------------------------------------------------------- */ + +/* SCSU command byte values */ +enum { + SQ0=0x01, /* Quote from window pair 0 */ + SQ7=0x08, /* Quote from window pair 7 */ + SDX=0x0B, /* Define a window as extended */ + Srs=0x0C, /* reserved */ + SQU=0x0E, /* Quote a single Unicode character */ + SCU=0x0F, /* Change to Unicode mode */ + SC0=0x10, /* Select window 0 */ + SC7=0x17, /* Select window 7 */ + SD0=0x18, /* Define and select window 0 */ + SD7=0x1F, /* Define and select window 7 */ + + UC0=0xE0, /* Select window 0 */ + UC7=0xE7, /* Select window 7 */ + UD0=0xE8, /* Define and select window 0 */ + UD7=0xEF, /* Define and select window 7 */ + UQU=0xF0, /* Quote a single Unicode character */ + UDX=0xF1, /* Define a Window as extended */ + Urs=0xF2 /* reserved */ +}; + +enum { + /* + * Unicode code points from 3400 to E000 are not adressible by + * dynamic window, since in these areas no short run alphabets are + * found. Therefore add gapOffset to all values from gapThreshold. + */ + gapThreshold=0x68, + gapOffset=0xAC00, + + /* values between reservedStart and fixedThreshold are reserved */ + reservedStart=0xA8, + + /* use table of predefined fixed offsets for values from fixedThreshold */ + fixedThreshold=0xF9 +}; + +/* constant offsets for the 8 static windows */ +static const uint32_t staticOffsets[8]={ + 0x0000, /* ASCII for quoted tags */ + 0x0080, /* Latin - 1 Supplement (for access to punctuation) */ + 0x0100, /* Latin Extended-A */ + 0x0300, /* Combining Diacritical Marks */ + 0x2000, /* General Punctuation */ + 0x2080, /* Currency Symbols */ + 0x2100, /* Letterlike Symbols and Number Forms */ + 0x3000 /* CJK Symbols and punctuation */ +}; + +/* initial offsets for the 8 dynamic (sliding) windows */ +static const uint32_t initialDynamicOffsets[8]={ + 0x0080, /* Latin-1 */ + 0x00C0, /* Latin Extended A */ + 0x0400, /* Cyrillic */ + 0x0600, /* Arabic */ + 0x0900, /* Devanagari */ + 0x3040, /* Hiragana */ + 0x30A0, /* Katakana */ + 0xFF00 /* Fullwidth ASCII */ +}; + +/* Table of fixed predefined Offsets */ +static const uint32_t fixedOffsets[]={ + /* 0xF9 */ 0x00C0, /* Latin-1 Letters + half of Latin Extended A */ + /* 0xFA */ 0x0250, /* IPA extensions */ + /* 0xFB */ 0x0370, /* Greek */ + /* 0xFC */ 0x0530, /* Armenian */ + /* 0xFD */ 0x3040, /* Hiragana */ + /* 0xFE */ 0x30A0, /* Katakana */ + /* 0xFF */ 0xFF60 /* Halfwidth Katakana */ +}; + +/* state values */ +enum { + readCommand, + quotePairOne, + quotePairTwo, + quoteOne, + definePairOne, + definePairTwo, + defineOne +}; + +typedef struct SCSUData { + /* dynamic window offsets, intitialize to default values from initialDynamicOffsets */ + uint32_t toUDynamicOffsets[8]; + uint32_t fromUDynamicOffsets[8]; + + /* state machine state - toUnicode */ + UBool toUIsSingleByteMode; + uint8_t toUState; + int8_t toUQuoteWindow, toUDynamicWindow; + uint8_t toUByteOne; + uint8_t toUPadding[3]; + + /* state machine state - fromUnicode */ + UBool fromUIsSingleByteMode; + int8_t fromUDynamicWindow; + + /* + * windowUse[] keeps track of the use of the dynamic windows: + * At nextWindowUseIndex there is the least recently used window, + * and the following windows (in a wrapping manner) are more and more + * recently used. + * At nextWindowUseIndex-1 there is the most recently used window. + */ + uint8_t locale; + int8_t nextWindowUseIndex; + int8_t windowUse[8]; +} SCSUData; + +static const int8_t initialWindowUse[8]={ 7, 0, 3, 2, 4, 5, 6, 1 }; +static const int8_t initialWindowUse_ja[8]={ 3, 2, 4, 1, 0, 7, 5, 6 }; + +enum { + lGeneric, l_ja +}; + +/* SCSU setup functions ----------------------------------------------------- */ +U_CDECL_BEGIN +static void U_CALLCONV +_SCSUReset(UConverter *cnv, UConverterResetChoice choice) { + SCSUData *scsu=(SCSUData *)cnv->extraInfo; + + if(choice<=UCNV_RESET_TO_UNICODE) { + /* reset toUnicode */ + uprv_memcpy(scsu->toUDynamicOffsets, initialDynamicOffsets, 32); + + scsu->toUIsSingleByteMode=TRUE; + scsu->toUState=readCommand; + scsu->toUQuoteWindow=scsu->toUDynamicWindow=0; + scsu->toUByteOne=0; + + cnv->toULength=0; + } + if(choice!=UCNV_RESET_TO_UNICODE) { + /* reset fromUnicode */ + uprv_memcpy(scsu->fromUDynamicOffsets, initialDynamicOffsets, 32); + + scsu->fromUIsSingleByteMode=TRUE; + scsu->fromUDynamicWindow=0; + + scsu->nextWindowUseIndex=0; + switch(scsu->locale) { + case l_ja: + uprv_memcpy(scsu->windowUse, initialWindowUse_ja, 8); + break; + default: + uprv_memcpy(scsu->windowUse, initialWindowUse, 8); + break; + } + + cnv->fromUChar32=0; + } +} + +static void U_CALLCONV +_SCSUOpen(UConverter *cnv, + UConverterLoadArgs *pArgs, + UErrorCode *pErrorCode) { + const char *locale=pArgs->locale; + if(pArgs->onlyTestIsLoadable) { + return; + } + cnv->extraInfo=uprv_malloc(sizeof(SCSUData)); + if(cnv->extraInfo!=NULL) { + if(locale!=NULL && locale[0]=='j' && locale[1]=='a' && (locale[2]==0 || locale[2]=='_')) { + ((SCSUData *)cnv->extraInfo)->locale=l_ja; + } else { + ((SCSUData *)cnv->extraInfo)->locale=lGeneric; + } + _SCSUReset(cnv, UCNV_RESET_BOTH); + } else { + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + } + + /* Set the substitution character U+fffd as a Unicode string. */ + cnv->subUChars[0]=0xfffd; + cnv->subCharLen=-1; +} + +static void U_CALLCONV +_SCSUClose(UConverter *cnv) { + if(cnv->extraInfo!=NULL) { + if(!cnv->isExtraLocal) { + uprv_free(cnv->extraInfo); + } + cnv->extraInfo=NULL; + } +} + +/* SCSU-to-Unicode conversion functions ------------------------------------- */ + +static void U_CALLCONV +_SCSUToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, + UErrorCode *pErrorCode) { + UConverter *cnv; + SCSUData *scsu; + const uint8_t *source, *sourceLimit; + UChar *target; + const UChar *targetLimit; + int32_t *offsets; + UBool isSingleByteMode; + uint8_t state, byteOne; + int8_t quoteWindow, dynamicWindow; + + int32_t sourceIndex, nextSourceIndex; + + uint8_t b; + + /* set up the local pointers */ + cnv=pArgs->converter; + scsu=(SCSUData *)cnv->extraInfo; + + source=(const uint8_t *)pArgs->source; + sourceLimit=(const uint8_t *)pArgs->sourceLimit; + target=pArgs->target; + targetLimit=pArgs->targetLimit; + offsets=pArgs->offsets; + + /* get the state machine state */ + isSingleByteMode=scsu->toUIsSingleByteMode; + state=scsu->toUState; + quoteWindow=scsu->toUQuoteWindow; + dynamicWindow=scsu->toUDynamicWindow; + byteOne=scsu->toUByteOne; + + /* sourceIndex=-1 if the current character began in the previous buffer */ + sourceIndex=state==readCommand ? 0 : -1; + nextSourceIndex=0; + + /* + * conversion "loop" + * + * For performance, this is not a normal C loop. + * Instead, there are two code blocks for the two SCSU modes. + * The function branches to either one, and a change of the mode is done with a goto to + * the other branch. + * + * Each branch has two conventional loops: + * - a fast-path loop for the most common codes in the mode + * - a loop for all other codes in the mode + * When the fast-path runs into a code that it cannot handle, its loop ends and it + * runs into the following loop to handle the other codes. + * The end of the input or output buffer is also handled by the slower loop. + * The slow loop jumps (goto) to the fast-path loop again as soon as possible. + * + * The callback handling is done by returning with an error code. + * The conversion framework actually calls the callback function. + */ + if(isSingleByteMode) { + /* fast path for single-byte mode */ + if(state==readCommand) { +fastSingle: + while(source<sourceLimit && target<targetLimit && (b=*source)>=0x20) { + ++source; + ++nextSourceIndex; + if(b<=0x7f) { + /* write US-ASCII graphic character or DEL */ + *target++=(UChar)b; + if(offsets!=NULL) { + *offsets++=sourceIndex; + } + } else { + /* write from dynamic window */ + uint32_t c=scsu->toUDynamicOffsets[dynamicWindow]+(b&0x7f); + if(c<=0xffff) { + *target++=(UChar)c; + if(offsets!=NULL) { + *offsets++=sourceIndex; + } + } else { + /* output surrogate pair */ + *target++=(UChar)(0xd7c0+(c>>10)); + if(target<targetLimit) { + *target++=(UChar)(0xdc00|(c&0x3ff)); + if(offsets!=NULL) { + *offsets++=sourceIndex; + *offsets++=sourceIndex; + } + } else { + /* target overflow */ + if(offsets!=NULL) { + *offsets++=sourceIndex; + } + cnv->UCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff)); + cnv->UCharErrorBufferLength=1; + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + goto endloop; + } + } + } + sourceIndex=nextSourceIndex; + } + } + + /* normal state machine for single-byte mode, minus handling for what fastSingle covers */ +singleByteMode: + while(source<sourceLimit) { + if(target>=targetLimit) { + /* target is full */ + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + break; + } + b=*source++; + ++nextSourceIndex; + switch(state) { + case readCommand: + /* redundant conditions are commented out */ + /* here: b<0x20 because otherwise we would be in fastSingle */ + if((1UL<<b)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) { + /* CR/LF/TAB/NUL */ + *target++=(UChar)b; + if(offsets!=NULL) { + *offsets++=sourceIndex; + } + sourceIndex=nextSourceIndex; + goto fastSingle; + } else if(SC0<=b) { + if(b<=SC7) { + dynamicWindow=(int8_t)(b-SC0); + sourceIndex=nextSourceIndex; + goto fastSingle; + } else /* if(SD0<=b && b<=SD7) */ { + dynamicWindow=(int8_t)(b-SD0); + state=defineOne; + } + } else if(/* SQ0<=b && */ b<=SQ7) { + quoteWindow=(int8_t)(b-SQ0); + state=quoteOne; + } else if(b==SDX) { + state=definePairOne; + } else if(b==SQU) { + state=quotePairOne; + } else if(b==SCU) { + sourceIndex=nextSourceIndex; + isSingleByteMode=FALSE; + goto fastUnicode; + } else /* Srs */ { + /* callback(illegal) */ + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + cnv->toUBytes[0]=b; + cnv->toULength=1; + goto endloop; + } + + /* store the first byte of a multibyte sequence in toUBytes[] */ + cnv->toUBytes[0]=b; + cnv->toULength=1; + break; + case quotePairOne: + byteOne=b; + cnv->toUBytes[1]=b; + cnv->toULength=2; + state=quotePairTwo; + break; + case quotePairTwo: + *target++=(UChar)((byteOne<<8)|b); + if(offsets!=NULL) { + *offsets++=sourceIndex; + } + sourceIndex=nextSourceIndex; + state=readCommand; + goto fastSingle; + case quoteOne: + if(b<0x80) { + /* all static offsets are in the BMP */ + *target++=(UChar)(staticOffsets[quoteWindow]+b); + if(offsets!=NULL) { + *offsets++=sourceIndex; + } + } else { + /* write from dynamic window */ + uint32_t c=scsu->toUDynamicOffsets[quoteWindow]+(b&0x7f); + if(c<=0xffff) { + *target++=(UChar)c; + if(offsets!=NULL) { + *offsets++=sourceIndex; + } + } else { + /* output surrogate pair */ + *target++=(UChar)(0xd7c0+(c>>10)); + if(target<targetLimit) { + *target++=(UChar)(0xdc00|(c&0x3ff)); + if(offsets!=NULL) { + *offsets++=sourceIndex; + *offsets++=sourceIndex; + } + } else { + /* target overflow */ + if(offsets!=NULL) { + *offsets++=sourceIndex; + } + cnv->UCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff)); + cnv->UCharErrorBufferLength=1; + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + goto endloop; + } + } + } + sourceIndex=nextSourceIndex; + state=readCommand; + goto fastSingle; + case definePairOne: + dynamicWindow=(int8_t)((b>>5)&7); + byteOne=(uint8_t)(b&0x1f); + cnv->toUBytes[1]=b; + cnv->toULength=2; + state=definePairTwo; + break; + case definePairTwo: + scsu->toUDynamicOffsets[dynamicWindow]=0x10000+(byteOne<<15UL | b<<7UL); + sourceIndex=nextSourceIndex; + state=readCommand; + goto fastSingle; + case defineOne: + if(b==0) { + /* callback(illegal): Reserved window offset value 0 */ + cnv->toUBytes[1]=b; + cnv->toULength=2; + goto endloop; + } else if(b<gapThreshold) { + scsu->toUDynamicOffsets[dynamicWindow]=b<<7UL; + } else if((uint8_t)(b-gapThreshold)<(reservedStart-gapThreshold)) { + scsu->toUDynamicOffsets[dynamicWindow]=(b<<7UL)+gapOffset; + } else if(b>=fixedThreshold) { + scsu->toUDynamicOffsets[dynamicWindow]=fixedOffsets[b-fixedThreshold]; + } else { + /* callback(illegal): Reserved window offset value 0xa8..0xf8 */ + cnv->toUBytes[1]=b; + cnv->toULength=2; + goto endloop; + } + sourceIndex=nextSourceIndex; + state=readCommand; + goto fastSingle; + } + } + } else { + /* fast path for Unicode mode */ + if(state==readCommand) { +fastUnicode: + while(source+1<sourceLimit && target<targetLimit && (uint8_t)((b=*source)-UC0)>(Urs-UC0)) { + *target++=(UChar)((b<<8)|source[1]); + if(offsets!=NULL) { + *offsets++=sourceIndex; + } + sourceIndex=nextSourceIndex; + nextSourceIndex+=2; + source+=2; + } + } + + /* normal state machine for Unicode mode */ +/* unicodeByteMode: */ + while(source<sourceLimit) { + if(target>=targetLimit) { + /* target is full */ + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + break; + } + b=*source++; + ++nextSourceIndex; + switch(state) { + case readCommand: + if((uint8_t)(b-UC0)>(Urs-UC0)) { + byteOne=b; + cnv->toUBytes[0]=b; + cnv->toULength=1; + state=quotePairTwo; + } else if(/* UC0<=b && */ b<=UC7) { + dynamicWindow=(int8_t)(b-UC0); + sourceIndex=nextSourceIndex; + isSingleByteMode=TRUE; + goto fastSingle; + } else if(/* UD0<=b && */ b<=UD7) { + dynamicWindow=(int8_t)(b-UD0); + isSingleByteMode=TRUE; + cnv->toUBytes[0]=b; + cnv->toULength=1; + state=defineOne; + goto singleByteMode; + } else if(b==UDX) { + isSingleByteMode=TRUE; + cnv->toUBytes[0]=b; + cnv->toULength=1; + state=definePairOne; + goto singleByteMode; + } else if(b==UQU) { + cnv->toUBytes[0]=b; + cnv->toULength=1; + state=quotePairOne; + } else /* Urs */ { + /* callback(illegal) */ + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + cnv->toUBytes[0]=b; + cnv->toULength=1; + goto endloop; + } + break; + case quotePairOne: + byteOne=b; + cnv->toUBytes[1]=b; + cnv->toULength=2; + state=quotePairTwo; + break; + case quotePairTwo: + *target++=(UChar)((byteOne<<8)|b); + if(offsets!=NULL) { + *offsets++=sourceIndex; + } + sourceIndex=nextSourceIndex; + state=readCommand; + goto fastUnicode; + } + } + } +endloop: + + /* set the converter state back into UConverter */ + if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) { + /* reset to deal with the next character */ + state=readCommand; + } else if(state==readCommand) { + /* not in a multi-byte sequence, reset toULength */ + cnv->toULength=0; + } + scsu->toUIsSingleByteMode=isSingleByteMode; + scsu->toUState=state; + scsu->toUQuoteWindow=quoteWindow; + scsu->toUDynamicWindow=dynamicWindow; + scsu->toUByteOne=byteOne; + + /* write back the updated pointers */ + pArgs->source=(const char *)source; + pArgs->target=target; + pArgs->offsets=offsets; + return; +} + +/* + * Identical to _SCSUToUnicodeWithOffsets but without offset handling. + * If a change is made in the original function, then either + * change this function the same way or + * re-copy the original function and remove the variables + * offsets, sourceIndex, and nextSourceIndex. + */ +static void U_CALLCONV +_SCSUToUnicode(UConverterToUnicodeArgs *pArgs, + UErrorCode *pErrorCode) { + UConverter *cnv; + SCSUData *scsu; + const uint8_t *source, *sourceLimit; + UChar *target; + const UChar *targetLimit; + UBool isSingleByteMode; + uint8_t state, byteOne; + int8_t quoteWindow, dynamicWindow; + + uint8_t b; + + /* set up the local pointers */ + cnv=pArgs->converter; + scsu=(SCSUData *)cnv->extraInfo; + + source=(const uint8_t *)pArgs->source; + sourceLimit=(const uint8_t *)pArgs->sourceLimit; + target=pArgs->target; + targetLimit=pArgs->targetLimit; + + /* get the state machine state */ + isSingleByteMode=scsu->toUIsSingleByteMode; + state=scsu->toUState; + quoteWindow=scsu->toUQuoteWindow; + dynamicWindow=scsu->toUDynamicWindow; + byteOne=scsu->toUByteOne; + + /* + * conversion "loop" + * + * For performance, this is not a normal C loop. + * Instead, there are two code blocks for the two SCSU modes. + * The function branches to either one, and a change of the mode is done with a goto to + * the other branch. + * + * Each branch has two conventional loops: + * - a fast-path loop for the most common codes in the mode + * - a loop for all other codes in the mode + * When the fast-path runs into a code that it cannot handle, its loop ends and it + * runs into the following loop to handle the other codes. + * The end of the input or output buffer is also handled by the slower loop. + * The slow loop jumps (goto) to the fast-path loop again as soon as possible. + * + * The callback handling is done by returning with an error code. + * The conversion framework actually calls the callback function. + */ + if(isSingleByteMode) { + /* fast path for single-byte mode */ + if(state==readCommand) { +fastSingle: + while(source<sourceLimit && target<targetLimit && (b=*source)>=0x20) { + ++source; + if(b<=0x7f) { + /* write US-ASCII graphic character or DEL */ + *target++=(UChar)b; + } else { + /* write from dynamic window */ + uint32_t c=scsu->toUDynamicOffsets[dynamicWindow]+(b&0x7f); + if(c<=0xffff) { + *target++=(UChar)c; + } else { + /* output surrogate pair */ + *target++=(UChar)(0xd7c0+(c>>10)); + if(target<targetLimit) { + *target++=(UChar)(0xdc00|(c&0x3ff)); + } else { + /* target overflow */ + cnv->UCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff)); + cnv->UCharErrorBufferLength=1; + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + goto endloop; + } + } + } + } + } + + /* normal state machine for single-byte mode, minus handling for what fastSingle covers */ +singleByteMode: + while(source<sourceLimit) { + if(target>=targetLimit) { + /* target is full */ + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + break; + } + b=*source++; + switch(state) { + case readCommand: + /* redundant conditions are commented out */ + /* here: b<0x20 because otherwise we would be in fastSingle */ + if((1UL<<b)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) { + /* CR/LF/TAB/NUL */ + *target++=(UChar)b; + goto fastSingle; + } else if(SC0<=b) { + if(b<=SC7) { + dynamicWindow=(int8_t)(b-SC0); + goto fastSingle; + } else /* if(SD0<=b && b<=SD7) */ { + dynamicWindow=(int8_t)(b-SD0); + state=defineOne; + } + } else if(/* SQ0<=b && */ b<=SQ7) { + quoteWindow=(int8_t)(b-SQ0); + state=quoteOne; + } else if(b==SDX) { + state=definePairOne; + } else if(b==SQU) { + state=quotePairOne; + } else if(b==SCU) { + isSingleByteMode=FALSE; + goto fastUnicode; + } else /* Srs */ { + /* callback(illegal) */ + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + cnv->toUBytes[0]=b; + cnv->toULength=1; + goto endloop; + } + + /* store the first byte of a multibyte sequence in toUBytes[] */ + cnv->toUBytes[0]=b; + cnv->toULength=1; + break; + case quotePairOne: + byteOne=b; + cnv->toUBytes[1]=b; + cnv->toULength=2; + state=quotePairTwo; + break; + case quotePairTwo: + *target++=(UChar)((byteOne<<8)|b); + state=readCommand; + goto fastSingle; + case quoteOne: + if(b<0x80) { + /* all static offsets are in the BMP */ + *target++=(UChar)(staticOffsets[quoteWindow]+b); + } else { + /* write from dynamic window */ + uint32_t c=scsu->toUDynamicOffsets[quoteWindow]+(b&0x7f); + if(c<=0xffff) { + *target++=(UChar)c; + } else { + /* output surrogate pair */ + *target++=(UChar)(0xd7c0+(c>>10)); + if(target<targetLimit) { + *target++=(UChar)(0xdc00|(c&0x3ff)); + } else { + /* target overflow */ + cnv->UCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff)); + cnv->UCharErrorBufferLength=1; + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + goto endloop; + } + } + } + state=readCommand; + goto fastSingle; + case definePairOne: + dynamicWindow=(int8_t)((b>>5)&7); + byteOne=(uint8_t)(b&0x1f); + cnv->toUBytes[1]=b; + cnv->toULength=2; + state=definePairTwo; + break; + case definePairTwo: + scsu->toUDynamicOffsets[dynamicWindow]=0x10000+(byteOne<<15UL | b<<7UL); + state=readCommand; + goto fastSingle; + case defineOne: + if(b==0) { + /* callback(illegal): Reserved window offset value 0 */ + cnv->toUBytes[1]=b; + cnv->toULength=2; + goto endloop; + } else if(b<gapThreshold) { + scsu->toUDynamicOffsets[dynamicWindow]=b<<7UL; + } else if((uint8_t)(b-gapThreshold)<(reservedStart-gapThreshold)) { + scsu->toUDynamicOffsets[dynamicWindow]=(b<<7UL)+gapOffset; + } else if(b>=fixedThreshold) { + scsu->toUDynamicOffsets[dynamicWindow]=fixedOffsets[b-fixedThreshold]; + } else { + /* callback(illegal): Reserved window offset value 0xa8..0xf8 */ + cnv->toUBytes[1]=b; + cnv->toULength=2; + goto endloop; + } + state=readCommand; + goto fastSingle; + } + } + } else { + /* fast path for Unicode mode */ + if(state==readCommand) { +fastUnicode: + while(source+1<sourceLimit && target<targetLimit && (uint8_t)((b=*source)-UC0)>(Urs-UC0)) { + *target++=(UChar)((b<<8)|source[1]); + source+=2; + } + } + + /* normal state machine for Unicode mode */ +/* unicodeByteMode: */ + while(source<sourceLimit) { + if(target>=targetLimit) { + /* target is full */ + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + break; + } + b=*source++; + switch(state) { + case readCommand: + if((uint8_t)(b-UC0)>(Urs-UC0)) { + byteOne=b; + cnv->toUBytes[0]=b; + cnv->toULength=1; + state=quotePairTwo; + } else if(/* UC0<=b && */ b<=UC7) { + dynamicWindow=(int8_t)(b-UC0); + isSingleByteMode=TRUE; + goto fastSingle; + } else if(/* UD0<=b && */ b<=UD7) { + dynamicWindow=(int8_t)(b-UD0); + isSingleByteMode=TRUE; + cnv->toUBytes[0]=b; + cnv->toULength=1; + state=defineOne; + goto singleByteMode; + } else if(b==UDX) { + isSingleByteMode=TRUE; + cnv->toUBytes[0]=b; + cnv->toULength=1; + state=definePairOne; + goto singleByteMode; + } else if(b==UQU) { + cnv->toUBytes[0]=b; + cnv->toULength=1; + state=quotePairOne; + } else /* Urs */ { + /* callback(illegal) */ + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + cnv->toUBytes[0]=b; + cnv->toULength=1; + goto endloop; + } + break; + case quotePairOne: + byteOne=b; + cnv->toUBytes[1]=b; + cnv->toULength=2; + state=quotePairTwo; + break; + case quotePairTwo: + *target++=(UChar)((byteOne<<8)|b); + state=readCommand; + goto fastUnicode; + } + } + } +endloop: + + /* set the converter state back into UConverter */ + if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) { + /* reset to deal with the next character */ + state=readCommand; + } else if(state==readCommand) { + /* not in a multi-byte sequence, reset toULength */ + cnv->toULength=0; + } + scsu->toUIsSingleByteMode=isSingleByteMode; + scsu->toUState=state; + scsu->toUQuoteWindow=quoteWindow; + scsu->toUDynamicWindow=dynamicWindow; + scsu->toUByteOne=byteOne; + + /* write back the updated pointers */ + pArgs->source=(const char *)source; + pArgs->target=target; + return; +} +U_CDECL_END +/* SCSU-from-Unicode conversion functions ----------------------------------- */ + +/* + * This SCSU Encoder is fairly simple but uses all SCSU commands to achieve + * reasonable results. The lookahead is minimal. + * Many cases are simple: + * A character fits directly into the current mode, a dynamic or static window, + * or is not compressible. These cases are tested first. + * Real compression heuristics are applied to the rest, in code branches for + * single/Unicode mode and BMP/supplementary code points. + * The heuristics used here are extremely simple. + */ + +/* get the number of the window that this character is in, or -1 */ +static int8_t +getWindow(const uint32_t offsets[8], uint32_t c) { + int i; + for(i=0; i<8; ++i) { + if((uint32_t)(c-offsets[i])<=0x7f) { + return (int8_t)(i); + } + } + return -1; +} + +/* is the character in the dynamic window starting at the offset, or in the direct-encoded range? */ +static UBool +isInOffsetWindowOrDirect(uint32_t offset, uint32_t c) { + return (UBool)(c<=offset+0x7f && + (c>=offset || (c<=0x7f && + (c>=0x20 || (1UL<<c)&0x2601)))); + /* binary 0010 0110 0000 0001, + check for b==0xd || b==0xa || b==9 || b==0 */ +} + +/* + * getNextDynamicWindow returns the next dynamic window to be redefined + */ +static int8_t +getNextDynamicWindow(SCSUData *scsu) { + int8_t window=scsu->windowUse[scsu->nextWindowUseIndex]; + if(++scsu->nextWindowUseIndex==8) { + scsu->nextWindowUseIndex=0; + } + return window; +} + +/* + * useDynamicWindow() adjusts + * windowUse[] and nextWindowUseIndex for the algorithm to choose + * the next dynamic window to be defined; + * a subclass may override it and provide its own algorithm. + */ +static void +useDynamicWindow(SCSUData *scsu, int8_t window) { + /* + * move the existing window, which just became the most recently used one, + * up in windowUse[] to nextWindowUseIndex-1 + */ + + /* first, find the index of the window - backwards to favor the more recently used windows */ + int i, j; + + i=scsu->nextWindowUseIndex; + do { + if(--i<0) { + i=7; + } + } while(scsu->windowUse[i]!=window); + + /* now copy each windowUse[i+1] to [i] */ + j=i+1; + if(j==8) { + j=0; + } + while(j!=scsu->nextWindowUseIndex) { + scsu->windowUse[i]=scsu->windowUse[j]; + i=j; + if(++j==8) { j=0; } + } + + /* finally, set the window into the most recently used index */ + scsu->windowUse[i]=window; +} + +/* + * calculate the offset and the code for a dynamic window that contains the character + * takes fixed offsets into account + * the offset of the window is stored in the offset variable, + * the code is returned + * + * return offset code: -1 none <=0xff code for SDn/UDn else code for SDX/UDX, subtract 0x200 to get the true code + */ +static int +getDynamicOffset(uint32_t c, uint32_t *pOffset) { + int i; + + for(i=0; i<7; ++i) { + if((uint32_t)(c-fixedOffsets[i])<=0x7f) { + *pOffset=fixedOffsets[i]; + return 0xf9+i; + } + } + + if(c<0x80) { + /* No dynamic window for US-ASCII. */ + return -1; + } else if(c<0x3400 || + (uint32_t)(c-0x10000)<(0x14000-0x10000) || + (uint32_t)(c-0x1d000)<=(0x1ffff-0x1d000) + ) { + /* This character is in a code range for a "small", i.e., reasonably windowable, script. */ + *pOffset=c&0x7fffff80; + return (int)(c>>7); + } else if(0xe000<=c && c!=0xfeff && c<0xfff0) { + /* For these characters we need to take the gapOffset into account. */ + *pOffset=c&0x7fffff80; + return (int)((c-gapOffset)>>7); + } else { + return -1; + } +} +U_CDECL_BEGIN +/* + * Idea for compression: + * - save SCSUData and other state before really starting work + * - at endloop, see if compression could be better with just unicode mode + * - don't do this if a callback has been called + * - if unicode mode would be smaller, then override the results with it - may need SCU at the beginning + * - different buffer handling! + * + * Drawback or need for corrective handling: + * it is desirable to encode U+feff as SQU fe ff for the SCSU signature, and + * it is desirable to start a document in US-ASCII/Latin-1 for as long as possible + * not only for compression but also for HTML/XML documents with following charset/encoding announcers. + * + * How to achieve both? + * - Only replace the result after an SDX or SCU? + */ + +static void U_CALLCONV +_SCSUFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, + UErrorCode *pErrorCode) { + UConverter *cnv; + SCSUData *scsu; + const UChar *source, *sourceLimit; + uint8_t *target; + int32_t targetCapacity; + int32_t *offsets; + + UBool isSingleByteMode; + uint8_t dynamicWindow; + uint32_t currentOffset; + + uint32_t c, delta; + + int32_t sourceIndex, nextSourceIndex; + + int32_t length; + + /* variables for compression heuristics */ + uint32_t offset; + UChar lead, trail; + int code; + int8_t window; + + /* set up the local pointers */ + cnv=pArgs->converter; + scsu=(SCSUData *)cnv->extraInfo; + + /* set up the local pointers */ + source=pArgs->source; + sourceLimit=pArgs->sourceLimit; + target=(uint8_t *)pArgs->target; + targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target); + offsets=pArgs->offsets; + + /* get the state machine state */ + isSingleByteMode=scsu->fromUIsSingleByteMode; + dynamicWindow=scsu->fromUDynamicWindow; + currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; + + c=cnv->fromUChar32; + + /* sourceIndex=-1 if the current character began in the previous buffer */ + sourceIndex= c==0 ? 0 : -1; + nextSourceIndex=0; + + /* similar conversion "loop" as in toUnicode */ +loop: + if(isSingleByteMode) { + if(c!=0 && targetCapacity>0) { + goto getTrailSingle; + } + + /* state machine for single-byte mode */ +/* singleByteMode: */ + while(source<sourceLimit) { + if(targetCapacity<=0) { + /* target is full */ + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + break; + } + c=*source++; + ++nextSourceIndex; + + if((c-0x20)<=0x5f) { + /* pass US-ASCII graphic character through */ + *target++=(uint8_t)c; + if(offsets!=NULL) { + *offsets++=sourceIndex; + } + --targetCapacity; + } else if(c<0x20) { + if((1UL<<c)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) { + /* CR/LF/TAB/NUL */ + *target++=(uint8_t)c; + if(offsets!=NULL) { + *offsets++=sourceIndex; + } + --targetCapacity; + } else { + /* quote C0 control character */ + c|=SQ0<<8; + length=2; + goto outputBytes; + } + } else if((delta=c-currentOffset)<=0x7f) { + /* use the current dynamic window */ + *target++=(uint8_t)(delta|0x80); + if(offsets!=NULL) { + *offsets++=sourceIndex; + } + --targetCapacity; + } else if(U16_IS_SURROGATE(c)) { + if(U16_IS_SURROGATE_LEAD(c)) { +getTrailSingle: + lead=(UChar)c; + if(source<sourceLimit) { + /* test the following code unit */ + trail=*source; + if(U16_IS_TRAIL(trail)) { + ++source; + ++nextSourceIndex; + c=U16_GET_SUPPLEMENTARY(c, trail); + /* convert this surrogate code point */ + /* exit this condition tree */ + } else { + /* this is an unmatched lead code unit (1st surrogate) */ + /* callback(illegal) */ + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + goto endloop; + } + } else { + /* no more input */ + break; + } + } else { + /* this is an unmatched trail code unit (2nd surrogate) */ + /* callback(illegal) */ + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + goto endloop; + } + + /* compress supplementary character U+10000..U+10ffff */ + if((delta=c-currentOffset)<=0x7f) { + /* use the current dynamic window */ + *target++=(uint8_t)(delta|0x80); + if(offsets!=NULL) { + *offsets++=sourceIndex; + } + --targetCapacity; + } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) { + /* there is a dynamic window that contains this character, change to it */ + dynamicWindow=window; + currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; + useDynamicWindow(scsu, dynamicWindow); + c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; + length=2; + goto outputBytes; + } else if((code=getDynamicOffset(c, &offset))>=0) { + /* might check if there are more characters in this window to come */ + /* define an extended window with this character */ + code-=0x200; + dynamicWindow=getNextDynamicWindow(scsu); + currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; + useDynamicWindow(scsu, dynamicWindow); + c=((uint32_t)SDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80; + length=4; + goto outputBytes; + } else { + /* change to Unicode mode and output this (lead, trail) pair */ + isSingleByteMode=FALSE; + *target++=(uint8_t)SCU; + if(offsets!=NULL) { + *offsets++=sourceIndex; + } + --targetCapacity; + c=((uint32_t)lead<<16)|trail; + length=4; + goto outputBytes; + } + } else if(c<0xa0) { + /* quote C1 control character */ + c=(c&0x7f)|(SQ0+1)<<8; /* SQ0+1==SQ1 */ + length=2; + goto outputBytes; + } else if(c==0xfeff || c>=0xfff0) { + /* quote signature character=byte order mark and specials */ + c|=SQU<<16; + length=3; + goto outputBytes; + } else { + /* compress all other BMP characters */ + if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) { + /* there is a window defined that contains this character - switch to it or quote from it? */ + if(source>=sourceLimit || isInOffsetWindowOrDirect(scsu->fromUDynamicOffsets[window], *source)) { + /* change to dynamic window */ + dynamicWindow=window; + currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; + useDynamicWindow(scsu, dynamicWindow); + c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; + length=2; + goto outputBytes; + } else { + /* quote from dynamic window */ + c=((uint32_t)(SQ0+window)<<8)|(c-scsu->fromUDynamicOffsets[window])|0x80; + length=2; + goto outputBytes; + } + } else if((window=getWindow(staticOffsets, c))>=0) { + /* quote from static window */ + c=((uint32_t)(SQ0+window)<<8)|(c-staticOffsets[window]); + length=2; + goto outputBytes; + } else if((code=getDynamicOffset(c, &offset))>=0) { + /* define a dynamic window with this character */ + dynamicWindow=getNextDynamicWindow(scsu); + currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; + useDynamicWindow(scsu, dynamicWindow); + c=((uint32_t)(SD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80; + length=3; + goto outputBytes; + } else if((uint32_t)(c-0x3400)<(0xd800-0x3400) && + (source>=sourceLimit || (uint32_t)(*source-0x3400)<(0xd800-0x3400)) + ) { + /* + * this character is not compressible (a BMP ideograph or similar); + * switch to Unicode mode if this is the last character in the block + * or there is at least one more ideograph following immediately + */ + isSingleByteMode=FALSE; + c|=SCU<<16; + length=3; + goto outputBytes; + } else { + /* quote Unicode */ + c|=SQU<<16; + length=3; + goto outputBytes; + } + } + + /* normal end of conversion: prepare for a new character */ + c=0; + sourceIndex=nextSourceIndex; + } + } else { + if(c!=0 && targetCapacity>0) { + goto getTrailUnicode; + } + + /* state machine for Unicode mode */ +/* unicodeByteMode: */ + while(source<sourceLimit) { + if(targetCapacity<=0) { + /* target is full */ + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + break; + } + c=*source++; + ++nextSourceIndex; + + if((uint32_t)(c-0x3400)<(0xd800-0x3400)) { + /* not compressible, write character directly */ + if(targetCapacity>=2) { + *target++=(uint8_t)(c>>8); + *target++=(uint8_t)c; + if(offsets!=NULL) { + *offsets++=sourceIndex; + *offsets++=sourceIndex; + } + targetCapacity-=2; + } else { + length=2; + goto outputBytes; + } + } else if((uint32_t)(c-0x3400)>=(0xf300-0x3400) /* c<0x3400 || c>=0xf300 */) { + /* compress BMP character if the following one is not an uncompressible ideograph */ + if(!(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))) { + if(((uint32_t)(c-0x30)<10 || (uint32_t)(c-0x61)<26 || (uint32_t)(c-0x41)<26)) { + /* ASCII digit or letter */ + isSingleByteMode=TRUE; + c|=((uint32_t)(UC0+dynamicWindow)<<8)|c; + length=2; + goto outputBytes; + } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) { + /* there is a dynamic window that contains this character, change to it */ + isSingleByteMode=TRUE; + dynamicWindow=window; + currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; + useDynamicWindow(scsu, dynamicWindow); + c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; + length=2; + goto outputBytes; + } else if((code=getDynamicOffset(c, &offset))>=0) { + /* define a dynamic window with this character */ + isSingleByteMode=TRUE; + dynamicWindow=getNextDynamicWindow(scsu); + currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; + useDynamicWindow(scsu, dynamicWindow); + c=((uint32_t)(UD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80; + length=3; + goto outputBytes; + } + } + + /* don't know how to compress this character, just write it directly */ + length=2; + goto outputBytes; + } else if(c<0xe000) { + /* c is a surrogate */ + if(U16_IS_SURROGATE_LEAD(c)) { +getTrailUnicode: + lead=(UChar)c; + if(source<sourceLimit) { + /* test the following code unit */ + trail=*source; + if(U16_IS_TRAIL(trail)) { + ++source; + ++nextSourceIndex; + c=U16_GET_SUPPLEMENTARY(c, trail); + /* convert this surrogate code point */ + /* exit this condition tree */ + } else { + /* this is an unmatched lead code unit (1st surrogate) */ + /* callback(illegal) */ + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + goto endloop; + } + } else { + /* no more input */ + break; + } + } else { + /* this is an unmatched trail code unit (2nd surrogate) */ + /* callback(illegal) */ + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + goto endloop; + } + + /* compress supplementary character */ + if( (window=getWindow(scsu->fromUDynamicOffsets, c))>=0 && + !(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400)) + ) { + /* + * there is a dynamic window that contains this character and + * the following character is not uncompressible, + * change to the window + */ + isSingleByteMode=TRUE; + dynamicWindow=window; + currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; + useDynamicWindow(scsu, dynamicWindow); + c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; + length=2; + goto outputBytes; + } else if(source<sourceLimit && lead==*source && /* too lazy to check trail in same window as source[1] */ + (code=getDynamicOffset(c, &offset))>=0 + ) { + /* two supplementary characters in (probably) the same window - define an extended one */ + isSingleByteMode=TRUE; + code-=0x200; + dynamicWindow=getNextDynamicWindow(scsu); + currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; + useDynamicWindow(scsu, dynamicWindow); + c=((uint32_t)UDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80; + length=4; + goto outputBytes; + } else { + /* don't know how to compress this character, just write it directly */ + c=((uint32_t)lead<<16)|trail; + length=4; + goto outputBytes; + } + } else /* 0xe000<=c<0xf300 */ { + /* quote to avoid SCSU tags */ + c|=UQU<<16; + length=3; + goto outputBytes; + } + + /* normal end of conversion: prepare for a new character */ + c=0; + sourceIndex=nextSourceIndex; + } + } +endloop: + + /* set the converter state back into UConverter */ + scsu->fromUIsSingleByteMode=isSingleByteMode; + scsu->fromUDynamicWindow=dynamicWindow; + + cnv->fromUChar32=c; + + /* write back the updated pointers */ + pArgs->source=source; + pArgs->target=(char *)target; + pArgs->offsets=offsets; + return; + +outputBytes: + /* write the output character bytes from c and length [code copied from ucnvmbcs.c] */ + /* from the first if in the loop we know that targetCapacity>0 */ + if(length<=targetCapacity) { + if(offsets==NULL) { + switch(length) { + /* each branch falls through to the next one */ + case 4: + *target++=(uint8_t)(c>>24); + U_FALLTHROUGH; + case 3: + *target++=(uint8_t)(c>>16); + U_FALLTHROUGH; + case 2: + *target++=(uint8_t)(c>>8); + U_FALLTHROUGH; + case 1: + *target++=(uint8_t)c; + U_FALLTHROUGH; + default: + /* will never occur */ + break; + } + } else { + switch(length) { + /* each branch falls through to the next one */ + case 4: + *target++=(uint8_t)(c>>24); + *offsets++=sourceIndex; + U_FALLTHROUGH; + case 3: + *target++=(uint8_t)(c>>16); + *offsets++=sourceIndex; + U_FALLTHROUGH; + case 2: + *target++=(uint8_t)(c>>8); + *offsets++=sourceIndex; + U_FALLTHROUGH; + case 1: + *target++=(uint8_t)c; + *offsets++=sourceIndex; + U_FALLTHROUGH; + default: + /* will never occur */ + break; + } + } + targetCapacity-=length; + + /* normal end of conversion: prepare for a new character */ + c=0; + sourceIndex=nextSourceIndex; + goto loop; + } else { + uint8_t *p; + + /* + * We actually do this backwards here: + * In order to save an intermediate variable, we output + * first to the overflow buffer what does not fit into the + * regular target. + */ + /* we know that 0<=targetCapacity<length<=4 */ + /* targetCapacity==0 when SCU+supplementary where SCU used up targetCapacity==1 */ + length-=targetCapacity; + p=(uint8_t *)cnv->charErrorBuffer; + switch(length) { + /* each branch falls through to the next one */ + case 4: + *p++=(uint8_t)(c>>24); + U_FALLTHROUGH; + case 3: + *p++=(uint8_t)(c>>16); + U_FALLTHROUGH; + case 2: + *p++=(uint8_t)(c>>8); + U_FALLTHROUGH; + case 1: + *p=(uint8_t)c; + U_FALLTHROUGH; + default: + /* will never occur */ + break; + } + cnv->charErrorBufferLength=(int8_t)length; + + /* now output what fits into the regular target */ + c>>=8*length; /* length was reduced by targetCapacity */ + switch(targetCapacity) { + /* each branch falls through to the next one */ + case 3: + *target++=(uint8_t)(c>>16); + if(offsets!=NULL) { + *offsets++=sourceIndex; + } + U_FALLTHROUGH; + case 2: + *target++=(uint8_t)(c>>8); + if(offsets!=NULL) { + *offsets++=sourceIndex; + } + U_FALLTHROUGH; + case 1: + *target++=(uint8_t)c; + if(offsets!=NULL) { + *offsets++=sourceIndex; + } + U_FALLTHROUGH; + default: + break; + } + + /* target overflow */ + targetCapacity=0; + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + c=0; + goto endloop; + } +} + +/* + * Identical to _SCSUFromUnicodeWithOffsets but without offset handling. + * If a change is made in the original function, then either + * change this function the same way or + * re-copy the original function and remove the variables + * offsets, sourceIndex, and nextSourceIndex. + */ +static void U_CALLCONV +_SCSUFromUnicode(UConverterFromUnicodeArgs *pArgs, + UErrorCode *pErrorCode) { + UConverter *cnv; + SCSUData *scsu; + const UChar *source, *sourceLimit; + uint8_t *target; + int32_t targetCapacity; + + UBool isSingleByteMode; + uint8_t dynamicWindow; + uint32_t currentOffset; + + uint32_t c, delta; + + int32_t length; + + /* variables for compression heuristics */ + uint32_t offset; + UChar lead, trail; + int code; + int8_t window; + + /* set up the local pointers */ + cnv=pArgs->converter; + scsu=(SCSUData *)cnv->extraInfo; + + /* set up the local pointers */ + source=pArgs->source; + sourceLimit=pArgs->sourceLimit; + target=(uint8_t *)pArgs->target; + targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target); + + /* get the state machine state */ + isSingleByteMode=scsu->fromUIsSingleByteMode; + dynamicWindow=scsu->fromUDynamicWindow; + currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; + + c=cnv->fromUChar32; + + /* similar conversion "loop" as in toUnicode */ +loop: + if(isSingleByteMode) { + if(c!=0 && targetCapacity>0) { + goto getTrailSingle; + } + + /* state machine for single-byte mode */ +/* singleByteMode: */ + while(source<sourceLimit) { + if(targetCapacity<=0) { + /* target is full */ + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + break; + } + c=*source++; + + if((c-0x20)<=0x5f) { + /* pass US-ASCII graphic character through */ + *target++=(uint8_t)c; + --targetCapacity; + } else if(c<0x20) { + if((1UL<<c)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) { + /* CR/LF/TAB/NUL */ + *target++=(uint8_t)c; + --targetCapacity; + } else { + /* quote C0 control character */ + c|=SQ0<<8; + length=2; + goto outputBytes; + } + } else if((delta=c-currentOffset)<=0x7f) { + /* use the current dynamic window */ + *target++=(uint8_t)(delta|0x80); + --targetCapacity; + } else if(U16_IS_SURROGATE(c)) { + if(U16_IS_SURROGATE_LEAD(c)) { +getTrailSingle: + lead=(UChar)c; + if(source<sourceLimit) { + /* test the following code unit */ + trail=*source; + if(U16_IS_TRAIL(trail)) { + ++source; + c=U16_GET_SUPPLEMENTARY(c, trail); + /* convert this surrogate code point */ + /* exit this condition tree */ + } else { + /* this is an unmatched lead code unit (1st surrogate) */ + /* callback(illegal) */ + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + goto endloop; + } + } else { + /* no more input */ + break; + } + } else { + /* this is an unmatched trail code unit (2nd surrogate) */ + /* callback(illegal) */ + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + goto endloop; + } + + /* compress supplementary character U+10000..U+10ffff */ + if((delta=c-currentOffset)<=0x7f) { + /* use the current dynamic window */ + *target++=(uint8_t)(delta|0x80); + --targetCapacity; + } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) { + /* there is a dynamic window that contains this character, change to it */ + dynamicWindow=window; + currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; + useDynamicWindow(scsu, dynamicWindow); + c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; + length=2; + goto outputBytes; + } else if((code=getDynamicOffset(c, &offset))>=0) { + /* might check if there are more characters in this window to come */ + /* define an extended window with this character */ + code-=0x200; + dynamicWindow=getNextDynamicWindow(scsu); + currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; + useDynamicWindow(scsu, dynamicWindow); + c=((uint32_t)SDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80; + length=4; + goto outputBytes; + } else { + /* change to Unicode mode and output this (lead, trail) pair */ + isSingleByteMode=FALSE; + *target++=(uint8_t)SCU; + --targetCapacity; + c=((uint32_t)lead<<16)|trail; + length=4; + goto outputBytes; + } + } else if(c<0xa0) { + /* quote C1 control character */ + c=(c&0x7f)|(SQ0+1)<<8; /* SQ0+1==SQ1 */ + length=2; + goto outputBytes; + } else if(c==0xfeff || c>=0xfff0) { + /* quote signature character=byte order mark and specials */ + c|=SQU<<16; + length=3; + goto outputBytes; + } else { + /* compress all other BMP characters */ + if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) { + /* there is a window defined that contains this character - switch to it or quote from it? */ + if(source>=sourceLimit || isInOffsetWindowOrDirect(scsu->fromUDynamicOffsets[window], *source)) { + /* change to dynamic window */ + dynamicWindow=window; + currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; + useDynamicWindow(scsu, dynamicWindow); + c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; + length=2; + goto outputBytes; + } else { + /* quote from dynamic window */ + c=((uint32_t)(SQ0+window)<<8)|(c-scsu->fromUDynamicOffsets[window])|0x80; + length=2; + goto outputBytes; + } + } else if((window=getWindow(staticOffsets, c))>=0) { + /* quote from static window */ + c=((uint32_t)(SQ0+window)<<8)|(c-staticOffsets[window]); + length=2; + goto outputBytes; + } else if((code=getDynamicOffset(c, &offset))>=0) { + /* define a dynamic window with this character */ + dynamicWindow=getNextDynamicWindow(scsu); + currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; + useDynamicWindow(scsu, dynamicWindow); + c=((uint32_t)(SD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80; + length=3; + goto outputBytes; + } else if((uint32_t)(c-0x3400)<(0xd800-0x3400) && + (source>=sourceLimit || (uint32_t)(*source-0x3400)<(0xd800-0x3400)) + ) { + /* + * this character is not compressible (a BMP ideograph or similar); + * switch to Unicode mode if this is the last character in the block + * or there is at least one more ideograph following immediately + */ + isSingleByteMode=FALSE; + c|=SCU<<16; + length=3; + goto outputBytes; + } else { + /* quote Unicode */ + c|=SQU<<16; + length=3; + goto outputBytes; + } + } + + /* normal end of conversion: prepare for a new character */ + c=0; + } + } else { + if(c!=0 && targetCapacity>0) { + goto getTrailUnicode; + } + + /* state machine for Unicode mode */ +/* unicodeByteMode: */ + while(source<sourceLimit) { + if(targetCapacity<=0) { + /* target is full */ + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + break; + } + c=*source++; + + if((uint32_t)(c-0x3400)<(0xd800-0x3400)) { + /* not compressible, write character directly */ + if(targetCapacity>=2) { + *target++=(uint8_t)(c>>8); + *target++=(uint8_t)c; + targetCapacity-=2; + } else { + length=2; + goto outputBytes; + } + } else if((uint32_t)(c-0x3400)>=(0xf300-0x3400) /* c<0x3400 || c>=0xf300 */) { + /* compress BMP character if the following one is not an uncompressible ideograph */ + if(!(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))) { + if(((uint32_t)(c-0x30)<10 || (uint32_t)(c-0x61)<26 || (uint32_t)(c-0x41)<26)) { + /* ASCII digit or letter */ + isSingleByteMode=TRUE; + c|=((uint32_t)(UC0+dynamicWindow)<<8)|c; + length=2; + goto outputBytes; + } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) { + /* there is a dynamic window that contains this character, change to it */ + isSingleByteMode=TRUE; + dynamicWindow=window; + currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; + useDynamicWindow(scsu, dynamicWindow); + c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; + length=2; + goto outputBytes; + } else if((code=getDynamicOffset(c, &offset))>=0) { + /* define a dynamic window with this character */ + isSingleByteMode=TRUE; + dynamicWindow=getNextDynamicWindow(scsu); + currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; + useDynamicWindow(scsu, dynamicWindow); + c=((uint32_t)(UD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80; + length=3; + goto outputBytes; + } + } + + /* don't know how to compress this character, just write it directly */ + length=2; + goto outputBytes; + } else if(c<0xe000) { + /* c is a surrogate */ + if(U16_IS_SURROGATE_LEAD(c)) { +getTrailUnicode: + lead=(UChar)c; + if(source<sourceLimit) { + /* test the following code unit */ + trail=*source; + if(U16_IS_TRAIL(trail)) { + ++source; + c=U16_GET_SUPPLEMENTARY(c, trail); + /* convert this surrogate code point */ + /* exit this condition tree */ + } else { + /* this is an unmatched lead code unit (1st surrogate) */ + /* callback(illegal) */ + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + goto endloop; + } + } else { + /* no more input */ + break; + } + } else { + /* this is an unmatched trail code unit (2nd surrogate) */ + /* callback(illegal) */ + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + goto endloop; + } + + /* compress supplementary character */ + if( (window=getWindow(scsu->fromUDynamicOffsets, c))>=0 && + !(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400)) + ) { + /* + * there is a dynamic window that contains this character and + * the following character is not uncompressible, + * change to the window + */ + isSingleByteMode=TRUE; + dynamicWindow=window; + currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; + useDynamicWindow(scsu, dynamicWindow); + c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80; + length=2; + goto outputBytes; + } else if(source<sourceLimit && lead==*source && /* too lazy to check trail in same window as source[1] */ + (code=getDynamicOffset(c, &offset))>=0 + ) { + /* two supplementary characters in (probably) the same window - define an extended one */ + isSingleByteMode=TRUE; + code-=0x200; + dynamicWindow=getNextDynamicWindow(scsu); + currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset; + useDynamicWindow(scsu, dynamicWindow); + c=((uint32_t)UDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80; + length=4; + goto outputBytes; + } else { + /* don't know how to compress this character, just write it directly */ + c=((uint32_t)lead<<16)|trail; + length=4; + goto outputBytes; + } + } else /* 0xe000<=c<0xf300 */ { + /* quote to avoid SCSU tags */ + c|=UQU<<16; + length=3; + goto outputBytes; + } + + /* normal end of conversion: prepare for a new character */ + c=0; + } + } +endloop: + + /* set the converter state back into UConverter */ + scsu->fromUIsSingleByteMode=isSingleByteMode; + scsu->fromUDynamicWindow=dynamicWindow; + + cnv->fromUChar32=c; + + /* write back the updated pointers */ + pArgs->source=source; + pArgs->target=(char *)target; + return; + +outputBytes: + /* write the output character bytes from c and length [code copied from ucnvmbcs.c] */ + /* from the first if in the loop we know that targetCapacity>0 */ + if(length<=targetCapacity) { + switch(length) { + /* each branch falls through to the next one */ + case 4: + *target++=(uint8_t)(c>>24); + U_FALLTHROUGH; + case 3: + *target++=(uint8_t)(c>>16); + U_FALLTHROUGH; + case 2: + *target++=(uint8_t)(c>>8); + U_FALLTHROUGH; + case 1: + *target++=(uint8_t)c; + U_FALLTHROUGH; + default: + /* will never occur */ + break; + } + targetCapacity-=length; + + /* normal end of conversion: prepare for a new character */ + c=0; + goto loop; + } else { + uint8_t *p; + + /* + * We actually do this backwards here: + * In order to save an intermediate variable, we output + * first to the overflow buffer what does not fit into the + * regular target. + */ + /* we know that 0<=targetCapacity<length<=4 */ + /* targetCapacity==0 when SCU+supplementary where SCU used up targetCapacity==1 */ + length-=targetCapacity; + p=(uint8_t *)cnv->charErrorBuffer; + switch(length) { + /* each branch falls through to the next one */ + case 4: + *p++=(uint8_t)(c>>24); + U_FALLTHROUGH; + case 3: + *p++=(uint8_t)(c>>16); + U_FALLTHROUGH; + case 2: + *p++=(uint8_t)(c>>8); + U_FALLTHROUGH; + case 1: + *p=(uint8_t)c; + U_FALLTHROUGH; + default: + /* will never occur */ + break; + } + cnv->charErrorBufferLength=(int8_t)length; + + /* now output what fits into the regular target */ + c>>=8*length; /* length was reduced by targetCapacity */ + switch(targetCapacity) { + /* each branch falls through to the next one */ + case 3: + *target++=(uint8_t)(c>>16); + U_FALLTHROUGH; + case 2: + *target++=(uint8_t)(c>>8); + U_FALLTHROUGH; + case 1: + *target++=(uint8_t)c; + U_FALLTHROUGH; + default: + break; + } + + /* target overflow */ + targetCapacity=0; + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + c=0; + goto endloop; + } +} + +/* miscellaneous ------------------------------------------------------------ */ + +static const char * U_CALLCONV +_SCSUGetName(const UConverter *cnv) { + SCSUData *scsu=(SCSUData *)cnv->extraInfo; + + switch(scsu->locale) { + case l_ja: + return "SCSU,locale=ja"; + default: + return "SCSU"; + } +} + +/* structure for SafeClone calculations */ +struct cloneSCSUStruct +{ + UConverter cnv; + SCSUData mydata; +}; + +static UConverter * U_CALLCONV +_SCSUSafeClone(const UConverter *cnv, + void *stackBuffer, + int32_t *pBufferSize, + UErrorCode *status) +{ + struct cloneSCSUStruct * localClone; + int32_t bufferSizeNeeded = sizeof(struct cloneSCSUStruct); + + if (U_FAILURE(*status)){ + return 0; + } + + if (*pBufferSize == 0){ /* 'preflighting' request - set needed size into *pBufferSize */ + *pBufferSize = bufferSizeNeeded; + return 0; + } + + localClone = (struct cloneSCSUStruct *)stackBuffer; + /* ucnv.c/ucnv_safeClone() copied the main UConverter already */ + + uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(SCSUData)); + localClone->cnv.extraInfo = &localClone->mydata; + localClone->cnv.isExtraLocal = TRUE; + + return &localClone->cnv; +} +U_CDECL_END + +static const UConverterImpl _SCSUImpl={ + UCNV_SCSU, + + NULL, + NULL, + + _SCSUOpen, + _SCSUClose, + _SCSUReset, + + _SCSUToUnicode, + _SCSUToUnicodeWithOffsets, + _SCSUFromUnicode, + _SCSUFromUnicodeWithOffsets, + NULL, + + NULL, + _SCSUGetName, + NULL, + _SCSUSafeClone, + ucnv_getCompleteUnicodeSet, + NULL, + NULL +}; + +static const UConverterStaticData _SCSUStaticData={ + sizeof(UConverterStaticData), + "SCSU", + 1212, /* CCSID for SCSU */ + UCNV_IBM, UCNV_SCSU, + 1, 3, /* one UChar generates at least 1 byte and at most 3 bytes */ + /* + * The subchar here is ignored because _SCSUOpen() sets U+fffd as a Unicode + * substitution string. + */ + { 0x0e, 0xff, 0xfd, 0 }, 3, + FALSE, FALSE, + 0, + 0, + { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ +}; + +const UConverterSharedData _SCSUData= + UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_SCSUStaticData, &_SCSUImpl); + +#endif diff --git a/thirdparty/icu4c/common/ucnvsel.cpp b/thirdparty/icu4c/common/ucnvsel.cpp new file mode 100644 index 0000000000..2dff5ac1bc --- /dev/null +++ b/thirdparty/icu4c/common/ucnvsel.cpp @@ -0,0 +1,823 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2008-2011, International Business Machines +* Corporation, Google and others. All Rights Reserved. +* +******************************************************************************* +*/ +// Author : eldawy@google.com (Mohamed Eldawy) +// ucnvsel.cpp +// +// Purpose: To generate a list of encodings capable of handling +// a given Unicode text +// +// Started 09-April-2008 + +/** + * \file + * + * This is an implementation of an encoding selector. + * The goal is, given a unicode string, find the encodings + * this string can be mapped to. To make processing faster + * a trie is built when you call ucnvsel_open() that + * stores all encodings a codepoint can map to + */ + +#include "unicode/ucnvsel.h" + +#if !UCONFIG_NO_CONVERSION + +#include <string.h> + +#include "unicode/uchar.h" +#include "unicode/uniset.h" +#include "unicode/ucnv.h" +#include "unicode/ustring.h" +#include "unicode/uchriter.h" +#include "utrie2.h" +#include "propsvec.h" +#include "uassert.h" +#include "ucmndata.h" +#include "udataswp.h" +#include "uenumimp.h" +#include "cmemory.h" +#include "cstring.h" + +U_NAMESPACE_USE + +struct UConverterSelector { + UTrie2 *trie; // 16 bit trie containing offsets into pv + uint32_t* pv; // table of bits! + int32_t pvCount; + char** encodings; // which encodings did user ask to use? + int32_t encodingsCount; + int32_t encodingStrLength; + uint8_t* swapped; + UBool ownPv, ownEncodingStrings; +}; + +static void generateSelectorData(UConverterSelector* result, + UPropsVectors *upvec, + const USet* excludedCodePoints, + const UConverterUnicodeSet whichSet, + UErrorCode* status) { + if (U_FAILURE(*status)) { + return; + } + + int32_t columns = (result->encodingsCount+31)/32; + + // set errorValue to all-ones + for (int32_t col = 0; col < columns; col++) { + upvec_setValue(upvec, UPVEC_ERROR_VALUE_CP, UPVEC_ERROR_VALUE_CP, + col, static_cast<uint32_t>(~0), static_cast<uint32_t>(~0), status); + } + + for (int32_t i = 0; i < result->encodingsCount; ++i) { + uint32_t mask; + uint32_t column; + int32_t item_count; + int32_t j; + UConverter* test_converter = ucnv_open(result->encodings[i], status); + if (U_FAILURE(*status)) { + return; + } + USet* unicode_point_set; + unicode_point_set = uset_open(1, 0); // empty set + + ucnv_getUnicodeSet(test_converter, unicode_point_set, + whichSet, status); + if (U_FAILURE(*status)) { + ucnv_close(test_converter); + return; + } + + column = i / 32; + mask = 1 << (i%32); + // now iterate over intervals on set i! + item_count = uset_getItemCount(unicode_point_set); + + for (j = 0; j < item_count; ++j) { + UChar32 start_char; + UChar32 end_char; + UErrorCode smallStatus = U_ZERO_ERROR; + uset_getItem(unicode_point_set, j, &start_char, &end_char, NULL, 0, + &smallStatus); + if (U_FAILURE(smallStatus)) { + // this will be reached for the converters that fill the set with + // strings. Those should be ignored by our system + } else { + upvec_setValue(upvec, start_char, end_char, column, static_cast<uint32_t>(~0), mask, + status); + } + } + ucnv_close(test_converter); + uset_close(unicode_point_set); + if (U_FAILURE(*status)) { + return; + } + } + + // handle excluded encodings! Simply set their values to all 1's in the upvec + if (excludedCodePoints) { + int32_t item_count = uset_getItemCount(excludedCodePoints); + for (int32_t j = 0; j < item_count; ++j) { + UChar32 start_char; + UChar32 end_char; + + uset_getItem(excludedCodePoints, j, &start_char, &end_char, NULL, 0, + status); + for (int32_t col = 0; col < columns; col++) { + upvec_setValue(upvec, start_char, end_char, col, static_cast<uint32_t>(~0), static_cast<uint32_t>(~0), + status); + } + } + } + + // alright. Now, let's put things in the same exact form you'd get when you + // unserialize things. + result->trie = upvec_compactToUTrie2WithRowIndexes(upvec, status); + result->pv = upvec_cloneArray(upvec, &result->pvCount, NULL, status); + result->pvCount *= columns; // number of uint32_t = rows * columns + result->ownPv = TRUE; +} + +/* open a selector. If converterListSize is 0, build for all converters. + If excludedCodePoints is NULL, don't exclude any codepoints */ +U_CAPI UConverterSelector* U_EXPORT2 +ucnvsel_open(const char* const* converterList, int32_t converterListSize, + const USet* excludedCodePoints, + const UConverterUnicodeSet whichSet, UErrorCode* status) { + // check if already failed + if (U_FAILURE(*status)) { + return NULL; + } + // ensure args make sense! + if (converterListSize < 0 || (converterList == NULL && converterListSize != 0)) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + + // allocate a new converter + LocalUConverterSelectorPointer newSelector( + (UConverterSelector*)uprv_malloc(sizeof(UConverterSelector))); + if (newSelector.isNull()) { + *status = U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + uprv_memset(newSelector.getAlias(), 0, sizeof(UConverterSelector)); + + if (converterListSize == 0) { + converterList = NULL; + converterListSize = ucnv_countAvailable(); + } + newSelector->encodings = + (char**)uprv_malloc(converterListSize * sizeof(char*)); + if (!newSelector->encodings) { + *status = U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + newSelector->encodings[0] = NULL; // now we can call ucnvsel_close() + + // make a backup copy of the list of converters + int32_t totalSize = 0; + int32_t i; + for (i = 0; i < converterListSize; i++) { + totalSize += + (int32_t)uprv_strlen(converterList != NULL ? converterList[i] : ucnv_getAvailableName(i)) + 1; + } + // 4-align the totalSize to 4-align the size of the serialized form + int32_t encodingStrPadding = totalSize & 3; + if (encodingStrPadding != 0) { + encodingStrPadding = 4 - encodingStrPadding; + } + newSelector->encodingStrLength = totalSize += encodingStrPadding; + char* allStrings = (char*) uprv_malloc(totalSize); + if (!allStrings) { + *status = U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + + for (i = 0; i < converterListSize; i++) { + newSelector->encodings[i] = allStrings; + uprv_strcpy(newSelector->encodings[i], + converterList != NULL ? converterList[i] : ucnv_getAvailableName(i)); + allStrings += uprv_strlen(newSelector->encodings[i]) + 1; + } + while (encodingStrPadding > 0) { + *allStrings++ = 0; + --encodingStrPadding; + } + + newSelector->ownEncodingStrings = TRUE; + newSelector->encodingsCount = converterListSize; + UPropsVectors *upvec = upvec_open((converterListSize+31)/32, status); + generateSelectorData(newSelector.getAlias(), upvec, excludedCodePoints, whichSet, status); + upvec_close(upvec); + + if (U_FAILURE(*status)) { + return NULL; + } + + return newSelector.orphan(); +} + +/* close opened selector */ +U_CAPI void U_EXPORT2 +ucnvsel_close(UConverterSelector *sel) { + if (!sel) { + return; + } + if (sel->ownEncodingStrings) { + uprv_free(sel->encodings[0]); + } + uprv_free(sel->encodings); + if (sel->ownPv) { + uprv_free(sel->pv); + } + utrie2_close(sel->trie); + uprv_free(sel->swapped); + uprv_free(sel); +} + +static const UDataInfo dataInfo = { + sizeof(UDataInfo), + 0, + + U_IS_BIG_ENDIAN, + U_CHARSET_FAMILY, + U_SIZEOF_UCHAR, + 0, + + { 0x43, 0x53, 0x65, 0x6c }, /* dataFormat="CSel" */ + { 1, 0, 0, 0 }, /* formatVersion */ + { 0, 0, 0, 0 } /* dataVersion */ +}; + +enum { + UCNVSEL_INDEX_TRIE_SIZE, // trie size in bytes + UCNVSEL_INDEX_PV_COUNT, // number of uint32_t in the bit vectors + UCNVSEL_INDEX_NAMES_COUNT, // number of encoding names + UCNVSEL_INDEX_NAMES_LENGTH, // number of encoding name bytes including padding + UCNVSEL_INDEX_SIZE = 15, // bytes following the DataHeader + UCNVSEL_INDEX_COUNT = 16 +}; + +/* + * Serialized form of a UConverterSelector, formatVersion 1: + * + * The serialized form begins with a standard ICU DataHeader with a UDataInfo + * as the template above. + * This is followed by: + * int32_t indexes[UCNVSEL_INDEX_COUNT]; // see index entry constants above + * serialized UTrie2; // indexes[UCNVSEL_INDEX_TRIE_SIZE] bytes + * uint32_t pv[indexes[UCNVSEL_INDEX_PV_COUNT]]; // bit vectors + * char* encodingNames[indexes[UCNVSEL_INDEX_NAMES_LENGTH]]; // NUL-terminated strings + padding + */ + +/* serialize a selector */ +U_CAPI int32_t U_EXPORT2 +ucnvsel_serialize(const UConverterSelector* sel, + void* buffer, int32_t bufferCapacity, UErrorCode* status) { + // check if already failed + if (U_FAILURE(*status)) { + return 0; + } + // ensure args make sense! + uint8_t *p = (uint8_t *)buffer; + if (bufferCapacity < 0 || + (bufferCapacity > 0 && (p == NULL || (U_POINTER_MASK_LSB(p, 3) != 0))) + ) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + // add up the size of the serialized form + int32_t serializedTrieSize = utrie2_serialize(sel->trie, NULL, 0, status); + if (*status != U_BUFFER_OVERFLOW_ERROR && U_FAILURE(*status)) { + return 0; + } + *status = U_ZERO_ERROR; + + DataHeader header; + uprv_memset(&header, 0, sizeof(header)); + header.dataHeader.headerSize = (uint16_t)((sizeof(header) + 15) & ~15); + header.dataHeader.magic1 = 0xda; + header.dataHeader.magic2 = 0x27; + uprv_memcpy(&header.info, &dataInfo, sizeof(dataInfo)); + + int32_t indexes[UCNVSEL_INDEX_COUNT] = { + serializedTrieSize, + sel->pvCount, + sel->encodingsCount, + sel->encodingStrLength + }; + + int32_t totalSize = + header.dataHeader.headerSize + + (int32_t)sizeof(indexes) + + serializedTrieSize + + sel->pvCount * 4 + + sel->encodingStrLength; + indexes[UCNVSEL_INDEX_SIZE] = totalSize - header.dataHeader.headerSize; + if (totalSize > bufferCapacity) { + *status = U_BUFFER_OVERFLOW_ERROR; + return totalSize; + } + // ok, save! + int32_t length = header.dataHeader.headerSize; + uprv_memcpy(p, &header, sizeof(header)); + uprv_memset(p + sizeof(header), 0, length - sizeof(header)); + p += length; + + length = (int32_t)sizeof(indexes); + uprv_memcpy(p, indexes, length); + p += length; + + utrie2_serialize(sel->trie, p, serializedTrieSize, status); + p += serializedTrieSize; + + length = sel->pvCount * 4; + uprv_memcpy(p, sel->pv, length); + p += length; + + uprv_memcpy(p, sel->encodings[0], sel->encodingStrLength); + p += sel->encodingStrLength; + + return totalSize; +} + +/** + * swap a selector into the desired Endianness and Asciiness of + * the system. Just as FYI, selectors are always saved in the format + * of the system that created them. They are only converted if used + * on another system. In other words, selectors created on different + * system can be different even if the params are identical (endianness + * and Asciiness differences only) + * + * @param ds pointer to data swapper containing swapping info + * @param inData pointer to incoming data + * @param length length of inData in bytes + * @param outData pointer to output data. Capacity should + * be at least equal to capacity of inData + * @param status an in/out ICU UErrorCode + * @return 0 on failure, number of bytes swapped on success + * number of bytes swapped can be smaller than length + */ +static int32_t +ucnvsel_swap(const UDataSwapper *ds, + const void *inData, int32_t length, + void *outData, UErrorCode *status) { + /* udata_swapDataHeader checks the arguments */ + int32_t headerSize = udata_swapDataHeader(ds, inData, length, outData, status); + if(U_FAILURE(*status)) { + return 0; + } + + /* check data format and format version */ + const UDataInfo *pInfo = (const UDataInfo *)((const char *)inData + 4); + if(!( + pInfo->dataFormat[0] == 0x43 && /* dataFormat="CSel" */ + pInfo->dataFormat[1] == 0x53 && + pInfo->dataFormat[2] == 0x65 && + pInfo->dataFormat[3] == 0x6c + )) { + udata_printError(ds, "ucnvsel_swap(): data format %02x.%02x.%02x.%02x is not recognized as UConverterSelector data\n", + pInfo->dataFormat[0], pInfo->dataFormat[1], + pInfo->dataFormat[2], pInfo->dataFormat[3]); + *status = U_INVALID_FORMAT_ERROR; + return 0; + } + if(pInfo->formatVersion[0] != 1) { + udata_printError(ds, "ucnvsel_swap(): format version %02x is not supported\n", + pInfo->formatVersion[0]); + *status = U_UNSUPPORTED_ERROR; + return 0; + } + + if(length >= 0) { + length -= headerSize; + if(length < 16*4) { + udata_printError(ds, "ucnvsel_swap(): too few bytes (%d after header) for UConverterSelector data\n", + length); + *status = U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + } + + const uint8_t *inBytes = (const uint8_t *)inData + headerSize; + uint8_t *outBytes = (uint8_t *)outData + headerSize; + + /* read the indexes */ + const int32_t *inIndexes = (const int32_t *)inBytes; + int32_t indexes[16]; + int32_t i; + for(i = 0; i < 16; ++i) { + indexes[i] = udata_readInt32(ds, inIndexes[i]); + } + + /* get the total length of the data */ + int32_t size = indexes[UCNVSEL_INDEX_SIZE]; + if(length >= 0) { + if(length < size) { + udata_printError(ds, "ucnvsel_swap(): too few bytes (%d after header) for all of UConverterSelector data\n", + length); + *status = U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + + /* copy the data for inaccessible bytes */ + if(inBytes != outBytes) { + uprv_memcpy(outBytes, inBytes, size); + } + + int32_t offset = 0, count; + + /* swap the int32_t indexes[] */ + count = UCNVSEL_INDEX_COUNT*4; + ds->swapArray32(ds, inBytes, count, outBytes, status); + offset += count; + + /* swap the UTrie2 */ + count = indexes[UCNVSEL_INDEX_TRIE_SIZE]; + utrie2_swap(ds, inBytes + offset, count, outBytes + offset, status); + offset += count; + + /* swap the uint32_t pv[] */ + count = indexes[UCNVSEL_INDEX_PV_COUNT]*4; + ds->swapArray32(ds, inBytes + offset, count, outBytes + offset, status); + offset += count; + + /* swap the encoding names */ + count = indexes[UCNVSEL_INDEX_NAMES_LENGTH]; + ds->swapInvChars(ds, inBytes + offset, count, outBytes + offset, status); + offset += count; + + U_ASSERT(offset == size); + } + + return headerSize + size; +} + +/* unserialize a selector */ +U_CAPI UConverterSelector* U_EXPORT2 +ucnvsel_openFromSerialized(const void* buffer, int32_t length, UErrorCode* status) { + // check if already failed + if (U_FAILURE(*status)) { + return NULL; + } + // ensure args make sense! + const uint8_t *p = (const uint8_t *)buffer; + if (length <= 0 || + (length > 0 && (p == NULL || (U_POINTER_MASK_LSB(p, 3) != 0))) + ) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + // header + if (length < 32) { + // not even enough space for a minimal header + *status = U_INDEX_OUTOFBOUNDS_ERROR; + return NULL; + } + const DataHeader *pHeader = (const DataHeader *)p; + if (!( + pHeader->dataHeader.magic1==0xda && + pHeader->dataHeader.magic2==0x27 && + pHeader->info.dataFormat[0] == 0x43 && + pHeader->info.dataFormat[1] == 0x53 && + pHeader->info.dataFormat[2] == 0x65 && + pHeader->info.dataFormat[3] == 0x6c + )) { + /* header not valid or dataFormat not recognized */ + *status = U_INVALID_FORMAT_ERROR; + return NULL; + } + if (pHeader->info.formatVersion[0] != 1) { + *status = U_UNSUPPORTED_ERROR; + return NULL; + } + uint8_t* swapped = NULL; + if (pHeader->info.isBigEndian != U_IS_BIG_ENDIAN || + pHeader->info.charsetFamily != U_CHARSET_FAMILY + ) { + // swap the data + UDataSwapper *ds = + udata_openSwapperForInputData(p, length, U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, status); + int32_t totalSize = ucnvsel_swap(ds, p, -1, NULL, status); + if (U_FAILURE(*status)) { + udata_closeSwapper(ds); + return NULL; + } + if (length < totalSize) { + udata_closeSwapper(ds); + *status = U_INDEX_OUTOFBOUNDS_ERROR; + return NULL; + } + swapped = (uint8_t*)uprv_malloc(totalSize); + if (swapped == NULL) { + udata_closeSwapper(ds); + *status = U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + ucnvsel_swap(ds, p, length, swapped, status); + udata_closeSwapper(ds); + if (U_FAILURE(*status)) { + uprv_free(swapped); + return NULL; + } + p = swapped; + pHeader = (const DataHeader *)p; + } + if (length < (pHeader->dataHeader.headerSize + 16 * 4)) { + // not even enough space for the header and the indexes + uprv_free(swapped); + *status = U_INDEX_OUTOFBOUNDS_ERROR; + return NULL; + } + p += pHeader->dataHeader.headerSize; + length -= pHeader->dataHeader.headerSize; + // indexes + const int32_t *indexes = (const int32_t *)p; + if (length < indexes[UCNVSEL_INDEX_SIZE]) { + uprv_free(swapped); + *status = U_INDEX_OUTOFBOUNDS_ERROR; + return NULL; + } + p += UCNVSEL_INDEX_COUNT * 4; + // create and populate the selector object + UConverterSelector* sel = (UConverterSelector*)uprv_malloc(sizeof(UConverterSelector)); + char **encodings = + (char **)uprv_malloc( + indexes[UCNVSEL_INDEX_NAMES_COUNT] * sizeof(char *)); + if (sel == NULL || encodings == NULL) { + uprv_free(swapped); + uprv_free(sel); + uprv_free(encodings); + *status = U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + uprv_memset(sel, 0, sizeof(UConverterSelector)); + sel->pvCount = indexes[UCNVSEL_INDEX_PV_COUNT]; + sel->encodings = encodings; + sel->encodingsCount = indexes[UCNVSEL_INDEX_NAMES_COUNT]; + sel->encodingStrLength = indexes[UCNVSEL_INDEX_NAMES_LENGTH]; + sel->swapped = swapped; + // trie + sel->trie = utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS, + p, indexes[UCNVSEL_INDEX_TRIE_SIZE], NULL, + status); + p += indexes[UCNVSEL_INDEX_TRIE_SIZE]; + if (U_FAILURE(*status)) { + ucnvsel_close(sel); + return NULL; + } + // bit vectors + sel->pv = (uint32_t *)p; + p += sel->pvCount * 4; + // encoding names + char* s = (char*)p; + for (int32_t i = 0; i < sel->encodingsCount; ++i) { + sel->encodings[i] = s; + s += uprv_strlen(s) + 1; + } + p += sel->encodingStrLength; + + return sel; +} + +// a bunch of functions for the enumeration thingie! Nothing fancy here. Just +// iterate over the selected encodings +struct Enumerator { + int16_t* index; + int16_t length; + int16_t cur; + const UConverterSelector* sel; +}; + +U_CDECL_BEGIN + +static void U_CALLCONV +ucnvsel_close_selector_iterator(UEnumeration *enumerator) { + uprv_free(((Enumerator*)(enumerator->context))->index); + uprv_free(enumerator->context); + uprv_free(enumerator); +} + + +static int32_t U_CALLCONV +ucnvsel_count_encodings(UEnumeration *enumerator, UErrorCode *status) { + // check if already failed + if (U_FAILURE(*status)) { + return 0; + } + return ((Enumerator*)(enumerator->context))->length; +} + + +static const char* U_CALLCONV ucnvsel_next_encoding(UEnumeration* enumerator, + int32_t* resultLength, + UErrorCode* status) { + // check if already failed + if (U_FAILURE(*status)) { + return NULL; + } + + int16_t cur = ((Enumerator*)(enumerator->context))->cur; + const UConverterSelector* sel; + const char* result; + if (cur >= ((Enumerator*)(enumerator->context))->length) { + return NULL; + } + sel = ((Enumerator*)(enumerator->context))->sel; + result = sel->encodings[((Enumerator*)(enumerator->context))->index[cur] ]; + ((Enumerator*)(enumerator->context))->cur++; + if (resultLength) { + *resultLength = (int32_t)uprv_strlen(result); + } + return result; +} + +static void U_CALLCONV ucnvsel_reset_iterator(UEnumeration* enumerator, + UErrorCode* status) { + // check if already failed + if (U_FAILURE(*status)) { + return ; + } + ((Enumerator*)(enumerator->context))->cur = 0; +} + +U_CDECL_END + + +static const UEnumeration defaultEncodings = { + NULL, + NULL, + ucnvsel_close_selector_iterator, + ucnvsel_count_encodings, + uenum_unextDefault, + ucnvsel_next_encoding, + ucnvsel_reset_iterator +}; + + +// internal fn to intersect two sets of masks +// returns whether the mask has reduced to all zeros +static UBool intersectMasks(uint32_t* dest, const uint32_t* source1, int32_t len) { + int32_t i; + uint32_t oredDest = 0; + for (i = 0 ; i < len ; ++i) { + oredDest |= (dest[i] &= source1[i]); + } + return oredDest == 0; +} + +// internal fn to count how many 1's are there in a mask +// algorithm taken from http://graphics.stanford.edu/~seander/bithacks.html +static int16_t countOnes(uint32_t* mask, int32_t len) { + int32_t i, totalOnes = 0; + for (i = 0 ; i < len ; ++i) { + uint32_t ent = mask[i]; + for (; ent; totalOnes++) + { + ent &= ent - 1; // clear the least significant bit set + } + } + return static_cast<int16_t>(totalOnes); +} + + +/* internal function! */ +static UEnumeration *selectForMask(const UConverterSelector* sel, + uint32_t *theMask, UErrorCode *status) { + LocalMemory<uint32_t> mask(theMask); + // this is the context we will use. Store a table of indices to which + // encodings are legit. + LocalMemory<Enumerator> result(static_cast<Enumerator *>(uprv_malloc(sizeof(Enumerator)))); + if (result.isNull()) { + *status = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } + result->index = nullptr; // this will be allocated later! + result->length = result->cur = 0; + result->sel = sel; + + LocalMemory<UEnumeration> en(static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration)))); + if (en.isNull()) { + // TODO(markus): Combine Enumerator and UEnumeration into one struct. + *status = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } + memcpy(en.getAlias(), &defaultEncodings, sizeof(UEnumeration)); + + int32_t columns = (sel->encodingsCount+31)/32; + int16_t numOnes = countOnes(mask.getAlias(), columns); + // now, we know the exact space we need for index + if (numOnes > 0) { + result->index = static_cast<int16_t*>(uprv_malloc(numOnes * sizeof(int16_t))); + if (result->index == nullptr) { + *status = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } + int32_t i, j; + int16_t k = 0; + for (j = 0 ; j < columns; j++) { + uint32_t v = mask[j]; + for (i = 0 ; i < 32 && k < sel->encodingsCount; i++, k++) { + if ((v & 1) != 0) { + result->index[result->length++] = k; + } + v >>= 1; + } + } + } //otherwise, index will remain NULL (and will never be touched by + //the enumerator code anyway) + en->context = result.orphan(); + return en.orphan(); +} + +/* check a string against the selector - UTF16 version */ +U_CAPI UEnumeration * U_EXPORT2 +ucnvsel_selectForString(const UConverterSelector* sel, + const UChar *s, int32_t length, UErrorCode *status) { + // check if already failed + if (U_FAILURE(*status)) { + return NULL; + } + // ensure args make sense! + if (sel == NULL || (s == NULL && length != 0)) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + + int32_t columns = (sel->encodingsCount+31)/32; + uint32_t* mask = (uint32_t*) uprv_malloc(columns * 4); + if (mask == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + uprv_memset(mask, ~0, columns *4); + + if(s!=NULL) { + const UChar *limit; + if (length >= 0) { + limit = s + length; + } else { + limit = NULL; + } + + while (limit == NULL ? *s != 0 : s != limit) { + UChar32 c; + uint16_t pvIndex; + UTRIE2_U16_NEXT16(sel->trie, s, limit, c, pvIndex); + if (intersectMasks(mask, sel->pv+pvIndex, columns)) { + break; + } + } + } + return selectForMask(sel, mask, status); +} + +/* check a string against the selector - UTF8 version */ +U_CAPI UEnumeration * U_EXPORT2 +ucnvsel_selectForUTF8(const UConverterSelector* sel, + const char *s, int32_t length, UErrorCode *status) { + // check if already failed + if (U_FAILURE(*status)) { + return NULL; + } + // ensure args make sense! + if (sel == NULL || (s == NULL && length != 0)) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + + int32_t columns = (sel->encodingsCount+31)/32; + uint32_t* mask = (uint32_t*) uprv_malloc(columns * 4); + if (mask == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + uprv_memset(mask, ~0, columns *4); + + if (length < 0) { + length = (int32_t)uprv_strlen(s); + } + + if(s!=NULL) { + const char *limit = s + length; + + while (s != limit) { + uint16_t pvIndex; + UTRIE2_U8_NEXT16(sel->trie, s, limit, pvIndex); + if (intersectMasks(mask, sel->pv+pvIndex, columns)) { + break; + } + } + } + return selectForMask(sel, mask, status); +} + +#endif // !UCONFIG_NO_CONVERSION diff --git a/thirdparty/icu4c/common/ucol_data.h b/thirdparty/icu4c/common/ucol_data.h new file mode 100644 index 0000000000..83f54abba1 --- /dev/null +++ b/thirdparty/icu4c/common/ucol_data.h @@ -0,0 +1,89 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2000-2011, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* file name: ucol_data.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2011jul02 +* created by: Markus Scherer +* +* Private implementation header for C/C++ collation. +* Some file data structure definitions were moved here from i18n/ucol_imp.h +* so that the common library (via ucol_swp.cpp) need not depend on the i18n library at all. +* +* We do not want to move the collation swapper to the i18n library because +* a) the resource bundle swapper depends on it and would have to move too, and +* b) we might want to eventually implement runtime data swapping, +* which might (or might not) be easier if all swappers are in the common library. +*/ + +#ifndef __UCOL_DATA_H__ +#define __UCOL_DATA_H__ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_COLLATION + +/* let us know whether reserved fields are reset to zero or junked */ +#define UCOL_HEADER_MAGIC 0x20030618 + +typedef struct { + int32_t size; + /* all the offsets are in bytes */ + /* to get the address add to the header address and cast properly */ + uint32_t options; /* these are the default options for the collator */ + uint32_t UCAConsts; /* structure which holds values for indirect positioning and implicit ranges */ + uint32_t contractionUCACombos; /* this one is needed only for UCA, to copy the appropriate contractions */ + uint32_t magic; /* magic number - lets us know whether reserved data is reset or junked */ + uint32_t mappingPosition; /* const uint8_t *mappingPosition; */ + uint32_t expansion; /* uint32_t *expansion; */ + uint32_t contractionIndex; /* UChar *contractionIndex; */ + uint32_t contractionCEs; /* uint32_t *contractionCEs; */ + uint32_t contractionSize; /* needed for various closures */ + /*int32_t latinOneMapping;*/ /* this is now handled in the trie itself *//* fast track to latin1 chars */ + + uint32_t endExpansionCE; /* array of last collation element in + expansion */ + uint32_t expansionCESize; /* array of maximum expansion size + corresponding to the expansion + collation elements with last element + in endExpansionCE*/ + int32_t endExpansionCECount; /* size of endExpansionCE */ + uint32_t unsafeCP; /* hash table of unsafe code points */ + uint32_t contrEndCP; /* hash table of final code points */ + /* in contractions. */ + + int32_t contractionUCACombosSize; /* number of UCA contraction items. */ + /*Length is contractionUCACombosSize*contractionUCACombosWidth*sizeof(UChar) */ + UBool jamoSpecial; /* is jamoSpecial */ + UBool isBigEndian; /* is this data big endian? from the UDataInfo header*/ + uint8_t charSetFamily; /* what is the charset family of this data from the UDataInfo header*/ + uint8_t contractionUCACombosWidth; /* width of UCA combos field */ + UVersionInfo version; + UVersionInfo UCAVersion; /* version of the UCA, read from file */ + UVersionInfo UCDVersion; /* UCD version, obtained by u_getUnicodeVersion */ + UVersionInfo formatVersion; /* format version from the UDataInfo header */ + uint32_t scriptToLeadByte; /* offset to script to lead collation byte mapping data */ + uint32_t leadByteToScript; /* offset to lead collation byte to script mapping data */ + uint8_t reserved[76]; /* for future use */ +} UCATableHeader; + +typedef struct { + uint32_t byteSize; + uint32_t tableSize; + uint32_t contsSize; + uint32_t table; + uint32_t conts; + UVersionInfo UCAVersion; /* version of the UCA, read from file */ + uint8_t padding[8]; +} InverseUCATableHeader; + +#endif /* !UCONFIG_NO_COLLATION */ + +#endif /* __UCOL_DATA_H__ */ diff --git a/thirdparty/icu4c/common/ucol_swp.cpp b/thirdparty/icu4c/common/ucol_swp.cpp new file mode 100644 index 0000000000..1af19863fa --- /dev/null +++ b/thirdparty/icu4c/common/ucol_swp.cpp @@ -0,0 +1,615 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2003-2015, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: ucol_swp.cpp +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2003sep10 +* created by: Markus W. Scherer +* +* Swap collation binaries. +*/ + +#include "unicode/udata.h" /* UDataInfo */ +#include "utrie.h" +#include "utrie2.h" +#include "udataswp.h" +#include "cmemory.h" +#include "ucol_data.h" +#include "ucol_swp.h" + +/* swapping ----------------------------------------------------------------- */ + +#if !UCONFIG_NO_COLLATION + +U_CAPI UBool U_EXPORT2 +ucol_looksLikeCollationBinary(const UDataSwapper *ds, + const void *inData, int32_t length) { + if(ds==NULL || inData==NULL || length<-1) { + return FALSE; + } + + // First check for format version 4+ which has a standard data header. + UErrorCode errorCode=U_ZERO_ERROR; + (void)udata_swapDataHeader(ds, inData, -1, NULL, &errorCode); + if(U_SUCCESS(errorCode)) { + const UDataInfo &info=*(const UDataInfo *)((const char *)inData+4); + if(info.dataFormat[0]==0x55 && // dataFormat="UCol" + info.dataFormat[1]==0x43 && + info.dataFormat[2]==0x6f && + info.dataFormat[3]==0x6c) { + return TRUE; + } + } + + // Else check for format version 3. + const UCATableHeader *inHeader=(const UCATableHeader *)inData; + + /* + * The collation binary must contain at least the UCATableHeader, + * starting with its size field. + * sizeof(UCATableHeader)==42*4 in ICU 2.8 + * check the length against the header size before reading the size field + */ + UCATableHeader header; + uprv_memset(&header, 0, sizeof(header)); + if(length<0) { + header.size=udata_readInt32(ds, inHeader->size); + } else if((length<(42*4) || length<(header.size=udata_readInt32(ds, inHeader->size)))) { + return FALSE; + } + + header.magic=ds->readUInt32(inHeader->magic); + if(!( + header.magic==UCOL_HEADER_MAGIC && + inHeader->formatVersion[0]==3 /*&& + inHeader->formatVersion[1]>=0*/ + )) { + return FALSE; + } + + if(inHeader->isBigEndian!=ds->inIsBigEndian || inHeader->charSetFamily!=ds->inCharset) { + return FALSE; + } + + return TRUE; +} + +namespace { + +/* swap a header-less collation formatVersion=3 binary, inside a resource bundle or ucadata.icu */ +int32_t +swapFormatVersion3(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode) { + const uint8_t *inBytes; + uint8_t *outBytes; + + const UCATableHeader *inHeader; + UCATableHeader *outHeader; + UCATableHeader header; + + uint32_t count; + + /* argument checking in case we were not called from ucol_swap() */ + if(U_FAILURE(*pErrorCode)) { + return 0; + } + if(ds==NULL || inData==NULL || length<-1 || (length>0 && outData==NULL)) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + inBytes=(const uint8_t *)inData; + outBytes=(uint8_t *)outData; + + inHeader=(const UCATableHeader *)inData; + outHeader=(UCATableHeader *)outData; + + /* + * The collation binary must contain at least the UCATableHeader, + * starting with its size field. + * sizeof(UCATableHeader)==42*4 in ICU 2.8 + * check the length against the header size before reading the size field + */ + uprv_memset(&header, 0, sizeof(header)); + if(length<0) { + header.size=udata_readInt32(ds, inHeader->size); + } else if((length<(42*4) || length<(header.size=udata_readInt32(ds, inHeader->size)))) { + udata_printError(ds, "ucol_swap(formatVersion=3): too few bytes (%d after header) for collation data\n", + length); + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + + header.magic=ds->readUInt32(inHeader->magic); + if(!( + header.magic==UCOL_HEADER_MAGIC && + inHeader->formatVersion[0]==3 /*&& + inHeader->formatVersion[1]>=0*/ + )) { + udata_printError(ds, "ucol_swap(formatVersion=3): magic 0x%08x or format version %02x.%02x is not a collation binary\n", + header.magic, + inHeader->formatVersion[0], inHeader->formatVersion[1]); + *pErrorCode=U_UNSUPPORTED_ERROR; + return 0; + } + + if(inHeader->isBigEndian!=ds->inIsBigEndian || inHeader->charSetFamily!=ds->inCharset) { + udata_printError(ds, "ucol_swap(formatVersion=3): endianness %d or charset %d does not match the swapper\n", + inHeader->isBigEndian, inHeader->charSetFamily); + *pErrorCode=U_INVALID_FORMAT_ERROR; + return 0; + } + + if(length>=0) { + /* copy everything, takes care of data that needs no swapping */ + if(inBytes!=outBytes) { + uprv_memcpy(outBytes, inBytes, header.size); + } + + /* swap the necessary pieces in the order of their occurrence in the data */ + + /* read more of the UCATableHeader (the size field was read above) */ + header.options= ds->readUInt32(inHeader->options); + header.UCAConsts= ds->readUInt32(inHeader->UCAConsts); + header.contractionUCACombos= ds->readUInt32(inHeader->contractionUCACombos); + header.mappingPosition= ds->readUInt32(inHeader->mappingPosition); + header.expansion= ds->readUInt32(inHeader->expansion); + header.contractionIndex= ds->readUInt32(inHeader->contractionIndex); + header.contractionCEs= ds->readUInt32(inHeader->contractionCEs); + header.contractionSize= ds->readUInt32(inHeader->contractionSize); + header.endExpansionCE= ds->readUInt32(inHeader->endExpansionCE); + header.expansionCESize= ds->readUInt32(inHeader->expansionCESize); + header.endExpansionCECount= udata_readInt32(ds, inHeader->endExpansionCECount); + header.contractionUCACombosSize=udata_readInt32(ds, inHeader->contractionUCACombosSize); + header.scriptToLeadByte= ds->readUInt32(inHeader->scriptToLeadByte); + header.leadByteToScript= ds->readUInt32(inHeader->leadByteToScript); + + /* swap the 32-bit integers in the header */ + ds->swapArray32(ds, inHeader, (int32_t)((const char *)&inHeader->jamoSpecial-(const char *)inHeader), + outHeader, pErrorCode); + ds->swapArray32(ds, &(inHeader->scriptToLeadByte), sizeof(header.scriptToLeadByte) + sizeof(header.leadByteToScript), + &(outHeader->scriptToLeadByte), pErrorCode); + /* set the output platform properties */ + outHeader->isBigEndian=ds->outIsBigEndian; + outHeader->charSetFamily=ds->outCharset; + + /* swap the options */ + if(header.options!=0) { + ds->swapArray32(ds, inBytes+header.options, header.expansion-header.options, + outBytes+header.options, pErrorCode); + } + + /* swap the expansions */ + if(header.mappingPosition!=0 && header.expansion!=0) { + if(header.contractionIndex!=0) { + /* expansions bounded by contractions */ + count=header.contractionIndex-header.expansion; + } else { + /* no contractions: expansions bounded by the main trie */ + count=header.mappingPosition-header.expansion; + } + ds->swapArray32(ds, inBytes+header.expansion, (int32_t)count, + outBytes+header.expansion, pErrorCode); + } + + /* swap the contractions */ + if(header.contractionSize!=0) { + /* contractionIndex: UChar[] */ + ds->swapArray16(ds, inBytes+header.contractionIndex, header.contractionSize*2, + outBytes+header.contractionIndex, pErrorCode); + + /* contractionCEs: CEs[] */ + ds->swapArray32(ds, inBytes+header.contractionCEs, header.contractionSize*4, + outBytes+header.contractionCEs, pErrorCode); + } + + /* swap the main trie */ + if(header.mappingPosition!=0) { + count=header.endExpansionCE-header.mappingPosition; + utrie_swap(ds, inBytes+header.mappingPosition, (int32_t)count, + outBytes+header.mappingPosition, pErrorCode); + } + + /* swap the max expansion table */ + if(header.endExpansionCECount!=0) { + ds->swapArray32(ds, inBytes+header.endExpansionCE, header.endExpansionCECount*4, + outBytes+header.endExpansionCE, pErrorCode); + } + + /* expansionCESize, unsafeCP, contrEndCP: uint8_t[], no need to swap */ + + /* swap UCA constants */ + if(header.UCAConsts!=0) { + /* + * if UCAConsts!=0 then contractionUCACombos because we are swapping + * the UCA data file, and we know that the UCA contains contractions + */ + ds->swapArray32(ds, inBytes+header.UCAConsts, header.contractionUCACombos-header.UCAConsts, + outBytes+header.UCAConsts, pErrorCode); + } + + /* swap UCA contractions */ + if(header.contractionUCACombosSize!=0) { + count=header.contractionUCACombosSize*inHeader->contractionUCACombosWidth*U_SIZEOF_UCHAR; + ds->swapArray16(ds, inBytes+header.contractionUCACombos, (int32_t)count, + outBytes+header.contractionUCACombos, pErrorCode); + } + + /* swap the script to lead bytes */ + if(header.scriptToLeadByte!=0) { + int indexCount = ds->readUInt16(*((uint16_t*)(inBytes+header.scriptToLeadByte))); // each entry = 2 * uint16 + int dataCount = ds->readUInt16(*((uint16_t*)(inBytes+header.scriptToLeadByte + 2))); // each entry = uint16 + ds->swapArray16(ds, inBytes+header.scriptToLeadByte, + 4 + (4 * indexCount) + (2 * dataCount), + outBytes+header.scriptToLeadByte, pErrorCode); + } + + /* swap the lead byte to scripts */ + if(header.leadByteToScript!=0) { + int indexCount = ds->readUInt16(*((uint16_t*)(inBytes+header.leadByteToScript))); // each entry = uint16 + int dataCount = ds->readUInt16(*((uint16_t*)(inBytes+header.leadByteToScript + 2))); // each entry = uint16 + ds->swapArray16(ds, inBytes+header.leadByteToScript, + 4 + (2 * indexCount) + (2 * dataCount), + outBytes+header.leadByteToScript, pErrorCode); + } + } + + return header.size; +} + +// swap formatVersion 4 or 5 ----------------------------------------------- *** + +// The following are copied from CollationDataReader, trading an awkward copy of constants +// for an awkward relocation of the i18n collationdatareader.h file into the common library. +// Keep them in sync! + +enum { + IX_INDEXES_LENGTH, // 0 + IX_OPTIONS, + IX_RESERVED2, + IX_RESERVED3, + + IX_JAMO_CE32S_START, // 4 + IX_REORDER_CODES_OFFSET, + IX_REORDER_TABLE_OFFSET, + IX_TRIE_OFFSET, + + IX_RESERVED8_OFFSET, // 8 + IX_CES_OFFSET, + IX_RESERVED10_OFFSET, + IX_CE32S_OFFSET, + + IX_ROOT_ELEMENTS_OFFSET, // 12 + IX_CONTEXTS_OFFSET, + IX_UNSAFE_BWD_OFFSET, + IX_FAST_LATIN_TABLE_OFFSET, + + IX_SCRIPTS_OFFSET, // 16 + IX_COMPRESSIBLE_BYTES_OFFSET, + IX_RESERVED18_OFFSET, + IX_TOTAL_SIZE +}; + +int32_t +swapFormatVersion4(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { return 0; } + + const uint8_t *inBytes=(const uint8_t *)inData; + uint8_t *outBytes=(uint8_t *)outData; + + const int32_t *inIndexes=(const int32_t *)inBytes; + int32_t indexes[IX_TOTAL_SIZE+1]; + + // Need at least IX_INDEXES_LENGTH and IX_OPTIONS. + if(0<=length && length<8) { + udata_printError(ds, "ucol_swap(formatVersion=4): too few bytes " + "(%d after header) for collation data\n", + length); + errorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + + int32_t indexesLength=indexes[0]=udata_readInt32(ds, inIndexes[0]); + if(0<=length && length<(indexesLength*4)) { + udata_printError(ds, "ucol_swap(formatVersion=4): too few bytes " + "(%d after header) for collation data\n", + length); + errorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + + for(int32_t i=1; i<=IX_TOTAL_SIZE && i<indexesLength; ++i) { + indexes[i]=udata_readInt32(ds, inIndexes[i]); + } + for(int32_t i=indexesLength; i<=IX_TOTAL_SIZE; ++i) { + indexes[i]=-1; + } + inIndexes=NULL; // Make sure we do not accidentally use these instead of indexes[]. + + // Get the total length of the data. + int32_t size; + if(indexesLength>IX_TOTAL_SIZE) { + size=indexes[IX_TOTAL_SIZE]; + } else if(indexesLength>IX_REORDER_CODES_OFFSET) { + size=indexes[indexesLength-1]; + } else { + size=indexesLength*4; + } + if(length<0) { return size; } + + if(length<size) { + udata_printError(ds, "ucol_swap(formatVersion=4): too few bytes " + "(%d after header) for collation data\n", + length); + errorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + + // Copy the data for inaccessible bytes and arrays of bytes. + if(inBytes!=outBytes) { + uprv_memcpy(outBytes, inBytes, size); + } + + // Swap the int32_t indexes[]. + ds->swapArray32(ds, inBytes, indexesLength * 4, outBytes, &errorCode); + + // The following is a modified version of CollationDataReader::read(). + // Here we use indexes[] not inIndexes[] because + // the inIndexes[] may not be in this machine's endianness. + int32_t index; // one of the indexes[] slots + int32_t offset; // byte offset for the index part + // int32_t length; // number of bytes in the index part + + index = IX_REORDER_CODES_OFFSET; + offset = indexes[index]; + length = indexes[index + 1] - offset; + if(length > 0) { + ds->swapArray32(ds, inBytes + offset, length, outBytes + offset, &errorCode); + } + + // Skip the IX_REORDER_TABLE_OFFSET byte array. + + index = IX_TRIE_OFFSET; + offset = indexes[index]; + length = indexes[index + 1] - offset; + if(length > 0) { + utrie2_swap(ds, inBytes + offset, length, outBytes + offset, &errorCode); + } + + index = IX_RESERVED8_OFFSET; + offset = indexes[index]; + length = indexes[index + 1] - offset; + if(length > 0) { + udata_printError(ds, "ucol_swap(formatVersion=4): unknown data at IX_RESERVED8_OFFSET\n", length); + errorCode = U_UNSUPPORTED_ERROR; + return 0; + } + + index = IX_CES_OFFSET; + offset = indexes[index]; + length = indexes[index + 1] - offset; + if(length > 0) { + ds->swapArray64(ds, inBytes + offset, length, outBytes + offset, &errorCode); + } + + index = IX_RESERVED10_OFFSET; + offset = indexes[index]; + length = indexes[index + 1] - offset; + if(length > 0) { + udata_printError(ds, "ucol_swap(formatVersion=4): unknown data at IX_RESERVED10_OFFSET\n", length); + errorCode = U_UNSUPPORTED_ERROR; + return 0; + } + + index = IX_CE32S_OFFSET; + offset = indexes[index]; + length = indexes[index + 1] - offset; + if(length > 0) { + ds->swapArray32(ds, inBytes + offset, length, outBytes + offset, &errorCode); + } + + index = IX_ROOT_ELEMENTS_OFFSET; + offset = indexes[index]; + length = indexes[index + 1] - offset; + if(length > 0) { + ds->swapArray32(ds, inBytes + offset, length, outBytes + offset, &errorCode); + } + + index = IX_CONTEXTS_OFFSET; + offset = indexes[index]; + length = indexes[index + 1] - offset; + if(length > 0) { + ds->swapArray16(ds, inBytes + offset, length, outBytes + offset, &errorCode); + } + + index = IX_UNSAFE_BWD_OFFSET; + offset = indexes[index]; + length = indexes[index + 1] - offset; + if(length > 0) { + ds->swapArray16(ds, inBytes + offset, length, outBytes + offset, &errorCode); + } + + index = IX_FAST_LATIN_TABLE_OFFSET; + offset = indexes[index]; + length = indexes[index + 1] - offset; + if(length > 0) { + ds->swapArray16(ds, inBytes + offset, length, outBytes + offset, &errorCode); + } + + index = IX_SCRIPTS_OFFSET; + offset = indexes[index]; + length = indexes[index + 1] - offset; + if(length > 0) { + ds->swapArray16(ds, inBytes + offset, length, outBytes + offset, &errorCode); + } + + // Skip the IX_COMPRESSIBLE_BYTES_OFFSET byte array. + + index = IX_RESERVED18_OFFSET; + offset = indexes[index]; + length = indexes[index + 1] - offset; + if(length > 0) { + udata_printError(ds, "ucol_swap(formatVersion=4): unknown data at IX_RESERVED18_OFFSET\n", length); + errorCode = U_UNSUPPORTED_ERROR; + return 0; + } + + return size; +} + +} // namespace + +/* swap ICU collation data like ucadata.icu */ +U_CAPI int32_t U_EXPORT2 +ucol_swap(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode) { + if(U_FAILURE(*pErrorCode)) { return 0; } + + /* udata_swapDataHeader checks the arguments */ + int32_t headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + // Try to swap the old format version which did not have a standard data header. + *pErrorCode=U_ZERO_ERROR; + return swapFormatVersion3(ds, inData, length, outData, pErrorCode); + } + + /* check data format and format version */ + const UDataInfo &info=*(const UDataInfo *)((const char *)inData+4); + if(!( + info.dataFormat[0]==0x55 && // dataFormat="UCol" + info.dataFormat[1]==0x43 && + info.dataFormat[2]==0x6f && + info.dataFormat[3]==0x6c && + (3<=info.formatVersion[0] && info.formatVersion[0]<=5) + )) { + udata_printError(ds, "ucol_swap(): data format %02x.%02x.%02x.%02x " + "(format version %02x.%02x) is not recognized as collation data\n", + info.dataFormat[0], info.dataFormat[1], + info.dataFormat[2], info.dataFormat[3], + info.formatVersion[0], info.formatVersion[1]); + *pErrorCode=U_UNSUPPORTED_ERROR; + return 0; + } + + inData=(const char *)inData+headerSize; + if(length>=0) { length-=headerSize; } + outData=(char *)outData+headerSize; + int32_t collationSize; + if(info.formatVersion[0]>=4) { + collationSize=swapFormatVersion4(ds, inData, length, outData, *pErrorCode); + } else { + collationSize=swapFormatVersion3(ds, inData, length, outData, pErrorCode); + } + if(U_SUCCESS(*pErrorCode)) { + return headerSize+collationSize; + } else { + return 0; + } +} + +/* swap inverse UCA collation data (invuca.icu) */ +U_CAPI int32_t U_EXPORT2 +ucol_swapInverseUCA(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode) { + const UDataInfo *pInfo; + int32_t headerSize; + + const uint8_t *inBytes; + uint8_t *outBytes; + + const InverseUCATableHeader *inHeader; + InverseUCATableHeader *outHeader; + InverseUCATableHeader header={ 0,0,0,0,0,{0,0,0,0},{0,0,0,0,0,0,0,0} }; + + /* udata_swapDataHeader checks the arguments */ + headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return 0; + } + + /* check data format and format version */ + pInfo=(const UDataInfo *)((const char *)inData+4); + if(!( + pInfo->dataFormat[0]==0x49 && /* dataFormat="InvC" */ + pInfo->dataFormat[1]==0x6e && + pInfo->dataFormat[2]==0x76 && + pInfo->dataFormat[3]==0x43 && + pInfo->formatVersion[0]==2 && + pInfo->formatVersion[1]>=1 + )) { + udata_printError(ds, "ucol_swapInverseUCA(): data format %02x.%02x.%02x.%02x (format version %02x.%02x) is not an inverse UCA collation file\n", + pInfo->dataFormat[0], pInfo->dataFormat[1], + pInfo->dataFormat[2], pInfo->dataFormat[3], + pInfo->formatVersion[0], pInfo->formatVersion[1]); + *pErrorCode=U_UNSUPPORTED_ERROR; + return 0; + } + + inBytes=(const uint8_t *)inData+headerSize; + outBytes=(uint8_t *)outData+headerSize; + + inHeader=(const InverseUCATableHeader *)inBytes; + outHeader=(InverseUCATableHeader *)outBytes; + + /* + * The inverse UCA collation binary must contain at least the InverseUCATableHeader, + * starting with its size field. + * sizeof(UCATableHeader)==8*4 in ICU 2.8 + * check the length against the header size before reading the size field + */ + if(length<0) { + header.byteSize=udata_readInt32(ds, inHeader->byteSize); + } else if( + ((length-headerSize)<(8*4) || + (uint32_t)(length-headerSize)<(header.byteSize=udata_readInt32(ds, inHeader->byteSize))) + ) { + udata_printError(ds, "ucol_swapInverseUCA(): too few bytes (%d after header) for inverse UCA collation data\n", + length); + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + + if(length>=0) { + /* copy everything, takes care of data that needs no swapping */ + if(inBytes!=outBytes) { + uprv_memcpy(outBytes, inBytes, header.byteSize); + } + + /* swap the necessary pieces in the order of their occurrence in the data */ + + /* read more of the InverseUCATableHeader (the byteSize field was read above) */ + header.tableSize= ds->readUInt32(inHeader->tableSize); + header.contsSize= ds->readUInt32(inHeader->contsSize); + header.table= ds->readUInt32(inHeader->table); + header.conts= ds->readUInt32(inHeader->conts); + + /* swap the 32-bit integers in the header */ + ds->swapArray32(ds, inHeader, 5*4, outHeader, pErrorCode); + + /* swap the inverse table; tableSize counts uint32_t[3] rows */ + ds->swapArray32(ds, inBytes+header.table, header.tableSize*3*4, + outBytes+header.table, pErrorCode); + + /* swap the continuation table; contsSize counts UChars */ + ds->swapArray16(ds, inBytes+header.conts, header.contsSize*U_SIZEOF_UCHAR, + outBytes+header.conts, pErrorCode); + } + + return headerSize+header.byteSize; +} + +#endif /* #if !UCONFIG_NO_COLLATION */ diff --git a/thirdparty/icu4c/common/ucol_swp.h b/thirdparty/icu4c/common/ucol_swp.h new file mode 100644 index 0000000000..0c2990a85e --- /dev/null +++ b/thirdparty/icu4c/common/ucol_swp.h @@ -0,0 +1,58 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2003-2014, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: ucol_swp.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2003sep10 +* created by: Markus W. Scherer +* +* Swap collation binaries. +*/ + +#ifndef __UCOL_SWP_H__ +#define __UCOL_SWP_H__ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_COLLATION + +#include "udataswp.h" + +/* + * Does the data look like a collation binary? + * @internal + */ +U_CAPI UBool U_EXPORT2 +ucol_looksLikeCollationBinary(const UDataSwapper *ds, + const void *inData, int32_t length); + +/** + * Swap ICU collation data like ucadata.icu. See udataswp.h. + * @internal + */ +U_CAPI int32_t U_EXPORT2 +ucol_swap(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode); + +/** + * Swap inverse UCA collation data (invuca.icu). See udataswp.h. + * @internal + */ +U_CAPI int32_t U_EXPORT2 +ucol_swapInverseUCA(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode); + +#endif /* #if !UCONFIG_NO_COLLATION */ + +#endif diff --git a/thirdparty/icu4c/common/ucptrie.cpp b/thirdparty/icu4c/common/ucptrie.cpp new file mode 100644 index 0000000000..0004160a23 --- /dev/null +++ b/thirdparty/icu4c/common/ucptrie.cpp @@ -0,0 +1,601 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// ucptrie.cpp (modified from utrie2.cpp) +// created: 2017dec29 Markus W. Scherer + +// #define UCPTRIE_DEBUG +#ifdef UCPTRIE_DEBUG +# include <stdio.h> +#endif + +#include "unicode/utypes.h" +#include "unicode/ucptrie.h" +#include "unicode/utf.h" +#include "unicode/utf8.h" +#include "unicode/utf16.h" +#include "cmemory.h" +#include "uassert.h" +#include "ucptrie_impl.h" + +U_CAPI UCPTrie * U_EXPORT2 +ucptrie_openFromBinary(UCPTrieType type, UCPTrieValueWidth valueWidth, + const void *data, int32_t length, int32_t *pActualLength, + UErrorCode *pErrorCode) { + if (U_FAILURE(*pErrorCode)) { + return nullptr; + } + + if (length <= 0 || (U_POINTER_MASK_LSB(data, 3) != 0) || + type < UCPTRIE_TYPE_ANY || UCPTRIE_TYPE_SMALL < type || + valueWidth < UCPTRIE_VALUE_BITS_ANY || UCPTRIE_VALUE_BITS_8 < valueWidth) { + *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; + return nullptr; + } + + // Enough data for a trie header? + if (length < (int32_t)sizeof(UCPTrieHeader)) { + *pErrorCode = U_INVALID_FORMAT_ERROR; + return nullptr; + } + + // Check the signature. + const UCPTrieHeader *header = (const UCPTrieHeader *)data; + if (header->signature != UCPTRIE_SIG) { + *pErrorCode = U_INVALID_FORMAT_ERROR; + return nullptr; + } + + int32_t options = header->options; + int32_t typeInt = (options >> 6) & 3; + int32_t valueWidthInt = options & UCPTRIE_OPTIONS_VALUE_BITS_MASK; + if (typeInt > UCPTRIE_TYPE_SMALL || valueWidthInt > UCPTRIE_VALUE_BITS_8 || + (options & UCPTRIE_OPTIONS_RESERVED_MASK) != 0) { + *pErrorCode = U_INVALID_FORMAT_ERROR; + return nullptr; + } + UCPTrieType actualType = (UCPTrieType)typeInt; + UCPTrieValueWidth actualValueWidth = (UCPTrieValueWidth)valueWidthInt; + if (type < 0) { + type = actualType; + } + if (valueWidth < 0) { + valueWidth = actualValueWidth; + } + if (type != actualType || valueWidth != actualValueWidth) { + *pErrorCode = U_INVALID_FORMAT_ERROR; + return nullptr; + } + + // Get the length values and offsets. + UCPTrie tempTrie; + uprv_memset(&tempTrie, 0, sizeof(tempTrie)); + tempTrie.indexLength = header->indexLength; + tempTrie.dataLength = + ((options & UCPTRIE_OPTIONS_DATA_LENGTH_MASK) << 4) | header->dataLength; + tempTrie.index3NullOffset = header->index3NullOffset; + tempTrie.dataNullOffset = + ((options & UCPTRIE_OPTIONS_DATA_NULL_OFFSET_MASK) << 8) | header->dataNullOffset; + + tempTrie.highStart = header->shiftedHighStart << UCPTRIE_SHIFT_2; + tempTrie.shifted12HighStart = (tempTrie.highStart + 0xfff) >> 12; + tempTrie.type = type; + tempTrie.valueWidth = valueWidth; + + // Calculate the actual length. + int32_t actualLength = (int32_t)sizeof(UCPTrieHeader) + tempTrie.indexLength * 2; + if (valueWidth == UCPTRIE_VALUE_BITS_16) { + actualLength += tempTrie.dataLength * 2; + } else if (valueWidth == UCPTRIE_VALUE_BITS_32) { + actualLength += tempTrie.dataLength * 4; + } else { + actualLength += tempTrie.dataLength; + } + if (length < actualLength) { + *pErrorCode = U_INVALID_FORMAT_ERROR; // Not enough bytes. + return nullptr; + } + + // Allocate the trie. + UCPTrie *trie = (UCPTrie *)uprv_malloc(sizeof(UCPTrie)); + if (trie == nullptr) { + *pErrorCode = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } + uprv_memcpy(trie, &tempTrie, sizeof(tempTrie)); +#ifdef UCPTRIE_DEBUG + trie->name = "fromSerialized"; +#endif + + // Set the pointers to its index and data arrays. + const uint16_t *p16 = (const uint16_t *)(header + 1); + trie->index = p16; + p16 += trie->indexLength; + + // Get the data. + int32_t nullValueOffset = trie->dataNullOffset; + if (nullValueOffset >= trie->dataLength) { + nullValueOffset = trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET; + } + switch (valueWidth) { + case UCPTRIE_VALUE_BITS_16: + trie->data.ptr16 = p16; + trie->nullValue = trie->data.ptr16[nullValueOffset]; + break; + case UCPTRIE_VALUE_BITS_32: + trie->data.ptr32 = (const uint32_t *)p16; + trie->nullValue = trie->data.ptr32[nullValueOffset]; + break; + case UCPTRIE_VALUE_BITS_8: + trie->data.ptr8 = (const uint8_t *)p16; + trie->nullValue = trie->data.ptr8[nullValueOffset]; + break; + default: + // Unreachable because valueWidth was checked above. + *pErrorCode = U_INVALID_FORMAT_ERROR; + return nullptr; + } + + if (pActualLength != nullptr) { + *pActualLength = actualLength; + } + return trie; +} + +U_CAPI void U_EXPORT2 +ucptrie_close(UCPTrie *trie) { + uprv_free(trie); +} + +U_CAPI UCPTrieType U_EXPORT2 +ucptrie_getType(const UCPTrie *trie) { + return (UCPTrieType)trie->type; +} + +U_CAPI UCPTrieValueWidth U_EXPORT2 +ucptrie_getValueWidth(const UCPTrie *trie) { + return (UCPTrieValueWidth)trie->valueWidth; +} + +U_CAPI int32_t U_EXPORT2 +ucptrie_internalSmallIndex(const UCPTrie *trie, UChar32 c) { + int32_t i1 = c >> UCPTRIE_SHIFT_1; + if (trie->type == UCPTRIE_TYPE_FAST) { + U_ASSERT(0xffff < c && c < trie->highStart); + i1 += UCPTRIE_BMP_INDEX_LENGTH - UCPTRIE_OMITTED_BMP_INDEX_1_LENGTH; + } else { + U_ASSERT((uint32_t)c < (uint32_t)trie->highStart && trie->highStart > UCPTRIE_SMALL_LIMIT); + i1 += UCPTRIE_SMALL_INDEX_LENGTH; + } + int32_t i3Block = trie->index[ + (int32_t)trie->index[i1] + ((c >> UCPTRIE_SHIFT_2) & UCPTRIE_INDEX_2_MASK)]; + int32_t i3 = (c >> UCPTRIE_SHIFT_3) & UCPTRIE_INDEX_3_MASK; + int32_t dataBlock; + if ((i3Block & 0x8000) == 0) { + // 16-bit indexes + dataBlock = trie->index[i3Block + i3]; + } else { + // 18-bit indexes stored in groups of 9 entries per 8 indexes. + i3Block = (i3Block & 0x7fff) + (i3 & ~7) + (i3 >> 3); + i3 &= 7; + dataBlock = ((int32_t)trie->index[i3Block++] << (2 + (2 * i3))) & 0x30000; + dataBlock |= trie->index[i3Block + i3]; + } + return dataBlock + (c & UCPTRIE_SMALL_DATA_MASK); +} + +U_CAPI int32_t U_EXPORT2 +ucptrie_internalSmallU8Index(const UCPTrie *trie, int32_t lt1, uint8_t t2, uint8_t t3) { + UChar32 c = (lt1 << 12) | (t2 << 6) | t3; + if (c >= trie->highStart) { + // Possible because the UTF-8 macro compares with shifted12HighStart which may be higher. + return trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET; + } + return ucptrie_internalSmallIndex(trie, c); +} + +U_CAPI int32_t U_EXPORT2 +ucptrie_internalU8PrevIndex(const UCPTrie *trie, UChar32 c, + const uint8_t *start, const uint8_t *src) { + int32_t i, length; + // Support 64-bit pointers by avoiding cast of arbitrary difference. + if ((src - start) <= 7) { + i = length = (int32_t)(src - start); + } else { + i = length = 7; + start = src - 7; + } + c = utf8_prevCharSafeBody(start, 0, &i, c, -1); + i = length - i; // Number of bytes read backward from src. + int32_t idx = _UCPTRIE_CP_INDEX(trie, 0xffff, c); + return (idx << 3) | i; +} + +namespace { + +inline uint32_t getValue(UCPTrieData data, UCPTrieValueWidth valueWidth, int32_t dataIndex) { + switch (valueWidth) { + case UCPTRIE_VALUE_BITS_16: + return data.ptr16[dataIndex]; + case UCPTRIE_VALUE_BITS_32: + return data.ptr32[dataIndex]; + case UCPTRIE_VALUE_BITS_8: + return data.ptr8[dataIndex]; + default: + // Unreachable if the trie is properly initialized. + return 0xffffffff; + } +} + +} // namespace + +U_CAPI uint32_t U_EXPORT2 +ucptrie_get(const UCPTrie *trie, UChar32 c) { + int32_t dataIndex; + if ((uint32_t)c <= 0x7f) { + // linear ASCII + dataIndex = c; + } else { + UChar32 fastMax = trie->type == UCPTRIE_TYPE_FAST ? 0xffff : UCPTRIE_SMALL_MAX; + dataIndex = _UCPTRIE_CP_INDEX(trie, fastMax, c); + } + return getValue(trie->data, (UCPTrieValueWidth)trie->valueWidth, dataIndex); +} + +namespace { + +constexpr int32_t MAX_UNICODE = 0x10ffff; + +inline uint32_t maybeFilterValue(uint32_t value, uint32_t trieNullValue, uint32_t nullValue, + UCPMapValueFilter *filter, const void *context) { + if (value == trieNullValue) { + value = nullValue; + } else if (filter != nullptr) { + value = filter(context, value); + } + return value; +} + +UChar32 getRange(const void *t, UChar32 start, + UCPMapValueFilter *filter, const void *context, uint32_t *pValue) { + if ((uint32_t)start > MAX_UNICODE) { + return U_SENTINEL; + } + const UCPTrie *trie = reinterpret_cast<const UCPTrie *>(t); + UCPTrieValueWidth valueWidth = (UCPTrieValueWidth)trie->valueWidth; + if (start >= trie->highStart) { + if (pValue != nullptr) { + int32_t di = trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET; + uint32_t value = getValue(trie->data, valueWidth, di); + if (filter != nullptr) { value = filter(context, value); } + *pValue = value; + } + return MAX_UNICODE; + } + + uint32_t nullValue = trie->nullValue; + if (filter != nullptr) { nullValue = filter(context, nullValue); } + const uint16_t *index = trie->index; + + int32_t prevI3Block = -1; + int32_t prevBlock = -1; + UChar32 c = start; + uint32_t trieValue, value = nullValue; + bool haveValue = false; + do { + int32_t i3Block; + int32_t i3; + int32_t i3BlockLength; + int32_t dataBlockLength; + if (c <= 0xffff && (trie->type == UCPTRIE_TYPE_FAST || c <= UCPTRIE_SMALL_MAX)) { + i3Block = 0; + i3 = c >> UCPTRIE_FAST_SHIFT; + i3BlockLength = trie->type == UCPTRIE_TYPE_FAST ? + UCPTRIE_BMP_INDEX_LENGTH : UCPTRIE_SMALL_INDEX_LENGTH; + dataBlockLength = UCPTRIE_FAST_DATA_BLOCK_LENGTH; + } else { + // Use the multi-stage index. + int32_t i1 = c >> UCPTRIE_SHIFT_1; + if (trie->type == UCPTRIE_TYPE_FAST) { + U_ASSERT(0xffff < c && c < trie->highStart); + i1 += UCPTRIE_BMP_INDEX_LENGTH - UCPTRIE_OMITTED_BMP_INDEX_1_LENGTH; + } else { + U_ASSERT(c < trie->highStart && trie->highStart > UCPTRIE_SMALL_LIMIT); + i1 += UCPTRIE_SMALL_INDEX_LENGTH; + } + i3Block = trie->index[ + (int32_t)trie->index[i1] + ((c >> UCPTRIE_SHIFT_2) & UCPTRIE_INDEX_2_MASK)]; + if (i3Block == prevI3Block && (c - start) >= UCPTRIE_CP_PER_INDEX_2_ENTRY) { + // The index-3 block is the same as the previous one, and filled with value. + U_ASSERT((c & (UCPTRIE_CP_PER_INDEX_2_ENTRY - 1)) == 0); + c += UCPTRIE_CP_PER_INDEX_2_ENTRY; + continue; + } + prevI3Block = i3Block; + if (i3Block == trie->index3NullOffset) { + // This is the index-3 null block. + if (haveValue) { + if (nullValue != value) { + return c - 1; + } + } else { + trieValue = trie->nullValue; + value = nullValue; + if (pValue != nullptr) { *pValue = nullValue; } + haveValue = true; + } + prevBlock = trie->dataNullOffset; + c = (c + UCPTRIE_CP_PER_INDEX_2_ENTRY) & ~(UCPTRIE_CP_PER_INDEX_2_ENTRY - 1); + continue; + } + i3 = (c >> UCPTRIE_SHIFT_3) & UCPTRIE_INDEX_3_MASK; + i3BlockLength = UCPTRIE_INDEX_3_BLOCK_LENGTH; + dataBlockLength = UCPTRIE_SMALL_DATA_BLOCK_LENGTH; + } + // Enumerate data blocks for one index-3 block. + do { + int32_t block; + if ((i3Block & 0x8000) == 0) { + block = index[i3Block + i3]; + } else { + // 18-bit indexes stored in groups of 9 entries per 8 indexes. + int32_t group = (i3Block & 0x7fff) + (i3 & ~7) + (i3 >> 3); + int32_t gi = i3 & 7; + block = ((int32_t)index[group++] << (2 + (2 * gi))) & 0x30000; + block |= index[group + gi]; + } + if (block == prevBlock && (c - start) >= dataBlockLength) { + // The block is the same as the previous one, and filled with value. + U_ASSERT((c & (dataBlockLength - 1)) == 0); + c += dataBlockLength; + } else { + int32_t dataMask = dataBlockLength - 1; + prevBlock = block; + if (block == trie->dataNullOffset) { + // This is the data null block. + if (haveValue) { + if (nullValue != value) { + return c - 1; + } + } else { + trieValue = trie->nullValue; + value = nullValue; + if (pValue != nullptr) { *pValue = nullValue; } + haveValue = true; + } + c = (c + dataBlockLength) & ~dataMask; + } else { + int32_t di = block + (c & dataMask); + uint32_t trieValue2 = getValue(trie->data, valueWidth, di); + if (haveValue) { + if (trieValue2 != trieValue) { + if (filter == nullptr || + maybeFilterValue(trieValue2, trie->nullValue, nullValue, + filter, context) != value) { + return c - 1; + } + trieValue = trieValue2; // may or may not help + } + } else { + trieValue = trieValue2; + value = maybeFilterValue(trieValue2, trie->nullValue, nullValue, + filter, context); + if (pValue != nullptr) { *pValue = value; } + haveValue = true; + } + while ((++c & dataMask) != 0) { + trieValue2 = getValue(trie->data, valueWidth, ++di); + if (trieValue2 != trieValue) { + if (filter == nullptr || + maybeFilterValue(trieValue2, trie->nullValue, nullValue, + filter, context) != value) { + return c - 1; + } + trieValue = trieValue2; // may or may not help + } + } + } + } + } while (++i3 < i3BlockLength); + } while (c < trie->highStart); + U_ASSERT(haveValue); + int32_t di = trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET; + uint32_t highValue = getValue(trie->data, valueWidth, di); + if (maybeFilterValue(highValue, trie->nullValue, nullValue, + filter, context) != value) { + return c - 1; + } else { + return MAX_UNICODE; + } +} + +} // namespace + +U_CFUNC UChar32 +ucptrie_internalGetRange(UCPTrieGetRange *getRange, + const void *trie, UChar32 start, + UCPMapRangeOption option, uint32_t surrogateValue, + UCPMapValueFilter *filter, const void *context, uint32_t *pValue) { + if (option == UCPMAP_RANGE_NORMAL) { + return getRange(trie, start, filter, context, pValue); + } + uint32_t value; + if (pValue == nullptr) { + // We need to examine the range value even if the caller does not want it. + pValue = &value; + } + UChar32 surrEnd = option == UCPMAP_RANGE_FIXED_ALL_SURROGATES ? 0xdfff : 0xdbff; + UChar32 end = getRange(trie, start, filter, context, pValue); + if (end < 0xd7ff || start > surrEnd) { + return end; + } + // The range overlaps with surrogates, or ends just before the first one. + if (*pValue == surrogateValue) { + if (end >= surrEnd) { + // Surrogates followed by a non-surrogateValue range, + // or surrogates are part of a larger surrogateValue range. + return end; + } + } else { + if (start <= 0xd7ff) { + return 0xd7ff; // Non-surrogateValue range ends before surrogateValue surrogates. + } + // Start is a surrogate with a non-surrogateValue code *unit* value. + // Return a surrogateValue code *point* range. + *pValue = surrogateValue; + if (end > surrEnd) { + return surrEnd; // Surrogate range ends before non-surrogateValue rest of range. + } + } + // See if the surrogateValue surrogate range can be merged with + // an immediately following range. + uint32_t value2; + UChar32 end2 = getRange(trie, surrEnd + 1, filter, context, &value2); + if (value2 == surrogateValue) { + return end2; + } + return surrEnd; +} + +U_CAPI UChar32 U_EXPORT2 +ucptrie_getRange(const UCPTrie *trie, UChar32 start, + UCPMapRangeOption option, uint32_t surrogateValue, + UCPMapValueFilter *filter, const void *context, uint32_t *pValue) { + return ucptrie_internalGetRange(getRange, trie, start, + option, surrogateValue, + filter, context, pValue); +} + +U_CAPI int32_t U_EXPORT2 +ucptrie_toBinary(const UCPTrie *trie, + void *data, int32_t capacity, + UErrorCode *pErrorCode) { + if (U_FAILURE(*pErrorCode)) { + return 0; + } + + UCPTrieType type = (UCPTrieType)trie->type; + UCPTrieValueWidth valueWidth = (UCPTrieValueWidth)trie->valueWidth; + if (type < UCPTRIE_TYPE_FAST || UCPTRIE_TYPE_SMALL < type || + valueWidth < UCPTRIE_VALUE_BITS_16 || UCPTRIE_VALUE_BITS_8 < valueWidth || + capacity < 0 || + (capacity > 0 && (data == nullptr || (U_POINTER_MASK_LSB(data, 3) != 0)))) { + *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + int32_t length = (int32_t)sizeof(UCPTrieHeader) + trie->indexLength * 2; + switch (valueWidth) { + case UCPTRIE_VALUE_BITS_16: + length += trie->dataLength * 2; + break; + case UCPTRIE_VALUE_BITS_32: + length += trie->dataLength * 4; + break; + case UCPTRIE_VALUE_BITS_8: + length += trie->dataLength; + break; + default: + // unreachable + break; + } + if (capacity < length) { + *pErrorCode = U_BUFFER_OVERFLOW_ERROR; + return length; + } + + char *bytes = (char *)data; + UCPTrieHeader *header = (UCPTrieHeader *)bytes; + header->signature = UCPTRIE_SIG; // "Tri3" + header->options = (uint16_t)( + ((trie->dataLength & 0xf0000) >> 4) | + ((trie->dataNullOffset & 0xf0000) >> 8) | + (trie->type << 6) | + valueWidth); + header->indexLength = (uint16_t)trie->indexLength; + header->dataLength = (uint16_t)trie->dataLength; + header->index3NullOffset = trie->index3NullOffset; + header->dataNullOffset = (uint16_t)trie->dataNullOffset; + header->shiftedHighStart = trie->highStart >> UCPTRIE_SHIFT_2; + bytes += sizeof(UCPTrieHeader); + + uprv_memcpy(bytes, trie->index, trie->indexLength * 2); + bytes += trie->indexLength * 2; + + switch (valueWidth) { + case UCPTRIE_VALUE_BITS_16: + uprv_memcpy(bytes, trie->data.ptr16, trie->dataLength * 2); + break; + case UCPTRIE_VALUE_BITS_32: + uprv_memcpy(bytes, trie->data.ptr32, trie->dataLength * 4); + break; + case UCPTRIE_VALUE_BITS_8: + uprv_memcpy(bytes, trie->data.ptr8, trie->dataLength); + break; + default: + // unreachable + break; + } + return length; +} + +namespace { + +#ifdef UCPTRIE_DEBUG +long countNull(const UCPTrie *trie) { + uint32_t nullValue=trie->nullValue; + int32_t length=trie->dataLength; + long count=0; + switch (trie->valueWidth) { + case UCPTRIE_VALUE_BITS_16: + for(int32_t i=0; i<length; ++i) { + if(trie->data.ptr16[i]==nullValue) { ++count; } + } + break; + case UCPTRIE_VALUE_BITS_32: + for(int32_t i=0; i<length; ++i) { + if(trie->data.ptr32[i]==nullValue) { ++count; } + } + break; + case UCPTRIE_VALUE_BITS_8: + for(int32_t i=0; i<length; ++i) { + if(trie->data.ptr8[i]==nullValue) { ++count; } + } + break; + default: + // unreachable + break; + } + return count; +} + +U_CFUNC void +ucptrie_printLengths(const UCPTrie *trie, const char *which) { + long indexLength=trie->indexLength; + long dataLength=(long)trie->dataLength; + long totalLength=(long)sizeof(UCPTrieHeader)+indexLength*2+ + dataLength*(trie->valueWidth==UCPTRIE_VALUE_BITS_16 ? 2 : + trie->valueWidth==UCPTRIE_VALUE_BITS_32 ? 4 : 1); + printf("**UCPTrieLengths(%s %s)** index:%6ld data:%6ld countNull:%6ld serialized:%6ld\n", + which, trie->name, indexLength, dataLength, countNull(trie), totalLength); +} +#endif + +} // namespace + +// UCPMap ---- +// Initially, this is the same as UCPTrie. This may well change. + +U_CAPI uint32_t U_EXPORT2 +ucpmap_get(const UCPMap *map, UChar32 c) { + return ucptrie_get(reinterpret_cast<const UCPTrie *>(map), c); +} + +U_CAPI UChar32 U_EXPORT2 +ucpmap_getRange(const UCPMap *map, UChar32 start, + UCPMapRangeOption option, uint32_t surrogateValue, + UCPMapValueFilter *filter, const void *context, uint32_t *pValue) { + return ucptrie_getRange(reinterpret_cast<const UCPTrie *>(map), start, + option, surrogateValue, + filter, context, pValue); +} diff --git a/thirdparty/icu4c/common/ucptrie_impl.h b/thirdparty/icu4c/common/ucptrie_impl.h new file mode 100644 index 0000000000..1fe6a18ac5 --- /dev/null +++ b/thirdparty/icu4c/common/ucptrie_impl.h @@ -0,0 +1,289 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// ucptrie_impl.h (modified from utrie2_impl.h) +// created: 2017dec29 Markus W. Scherer + +#ifndef __UCPTRIE_IMPL_H__ +#define __UCPTRIE_IMPL_H__ + +#include "unicode/ucptrie.h" +#ifdef UCPTRIE_DEBUG +#include "unicode/umutablecptrie.h" +#endif + +// UCPTrie signature values, in platform endianness and opposite endianness. +// The UCPTrie signature ASCII byte values spell "Tri3". +#define UCPTRIE_SIG 0x54726933 +#define UCPTRIE_OE_SIG 0x33697254 + +/** + * Header data for the binary, memory-mappable representation of a UCPTrie/CodePointTrie. + * @internal + */ +struct UCPTrieHeader { + /** "Tri3" in big-endian US-ASCII (0x54726933) */ + uint32_t signature; + + /** + * Options bit field: + * Bits 15..12: Data length bits 19..16. + * Bits 11..8: Data null block offset bits 19..16. + * Bits 7..6: UCPTrieType + * Bits 5..3: Reserved (0). + * Bits 2..0: UCPTrieValueWidth + */ + uint16_t options; + + /** Total length of the index tables. */ + uint16_t indexLength; + + /** Data length bits 15..0. */ + uint16_t dataLength; + + /** Index-3 null block offset, 0x7fff or 0xffff if none. */ + uint16_t index3NullOffset; + + /** Data null block offset bits 15..0, 0xfffff if none. */ + uint16_t dataNullOffset; + + /** + * First code point of the single-value range ending with U+10ffff, + * rounded up and then shifted right by UCPTRIE_SHIFT_2. + */ + uint16_t shiftedHighStart; +}; + +/** + * Constants for use with UCPTrieHeader.options. + * @internal + */ +enum { + UCPTRIE_OPTIONS_DATA_LENGTH_MASK = 0xf000, + UCPTRIE_OPTIONS_DATA_NULL_OFFSET_MASK = 0xf00, + UCPTRIE_OPTIONS_RESERVED_MASK = 0x38, + UCPTRIE_OPTIONS_VALUE_BITS_MASK = 7, + /** + * Value for index3NullOffset which indicates that there is no index-3 null block. + * Bit 15 is unused for this value because this bit is used if the index-3 contains + * 18-bit indexes. + */ + UCPTRIE_NO_INDEX3_NULL_OFFSET = 0x7fff, + UCPTRIE_NO_DATA_NULL_OFFSET = 0xfffff +}; + +// Internal constants. +enum { + /** The length of the BMP index table. 1024=0x400 */ + UCPTRIE_BMP_INDEX_LENGTH = 0x10000 >> UCPTRIE_FAST_SHIFT, + + UCPTRIE_SMALL_LIMIT = 0x1000, + UCPTRIE_SMALL_INDEX_LENGTH = UCPTRIE_SMALL_LIMIT >> UCPTRIE_FAST_SHIFT, + + /** Shift size for getting the index-3 table offset. */ + UCPTRIE_SHIFT_3 = 4, + + /** Shift size for getting the index-2 table offset. */ + UCPTRIE_SHIFT_2 = 5 + UCPTRIE_SHIFT_3, + + /** Shift size for getting the index-1 table offset. */ + UCPTRIE_SHIFT_1 = 5 + UCPTRIE_SHIFT_2, + + /** + * Difference between two shift sizes, + * for getting an index-2 offset from an index-3 offset. 5=9-4 + */ + UCPTRIE_SHIFT_2_3 = UCPTRIE_SHIFT_2 - UCPTRIE_SHIFT_3, + + /** + * Difference between two shift sizes, + * for getting an index-1 offset from an index-2 offset. 5=14-9 + */ + UCPTRIE_SHIFT_1_2 = UCPTRIE_SHIFT_1 - UCPTRIE_SHIFT_2, + + /** + * Number of index-1 entries for the BMP. (4) + * This part of the index-1 table is omitted from the serialized form. + */ + UCPTRIE_OMITTED_BMP_INDEX_1_LENGTH = 0x10000 >> UCPTRIE_SHIFT_1, + + /** Number of entries in an index-2 block. 32=0x20 */ + UCPTRIE_INDEX_2_BLOCK_LENGTH = 1 << UCPTRIE_SHIFT_1_2, + + /** Mask for getting the lower bits for the in-index-2-block offset. */ + UCPTRIE_INDEX_2_MASK = UCPTRIE_INDEX_2_BLOCK_LENGTH - 1, + + /** Number of code points per index-2 table entry. 512=0x200 */ + UCPTRIE_CP_PER_INDEX_2_ENTRY = 1 << UCPTRIE_SHIFT_2, + + /** Number of entries in an index-3 block. 32=0x20 */ + UCPTRIE_INDEX_3_BLOCK_LENGTH = 1 << UCPTRIE_SHIFT_2_3, + + /** Mask for getting the lower bits for the in-index-3-block offset. */ + UCPTRIE_INDEX_3_MASK = UCPTRIE_INDEX_3_BLOCK_LENGTH - 1, + + /** Number of entries in a small data block. 16=0x10 */ + UCPTRIE_SMALL_DATA_BLOCK_LENGTH = 1 << UCPTRIE_SHIFT_3, + + /** Mask for getting the lower bits for the in-small-data-block offset. */ + UCPTRIE_SMALL_DATA_MASK = UCPTRIE_SMALL_DATA_BLOCK_LENGTH - 1 +}; + +typedef UChar32 +UCPTrieGetRange(const void *trie, UChar32 start, + UCPMapValueFilter *filter, const void *context, uint32_t *pValue); + +U_CFUNC UChar32 +ucptrie_internalGetRange(UCPTrieGetRange *getRange, + const void *trie, UChar32 start, + UCPMapRangeOption option, uint32_t surrogateValue, + UCPMapValueFilter *filter, const void *context, uint32_t *pValue); + +#ifdef UCPTRIE_DEBUG +U_CFUNC void +ucptrie_printLengths(const UCPTrie *trie, const char *which); + +U_CFUNC void umutablecptrie_setName(UMutableCPTrie *builder, const char *name); +#endif + +/* + * Format of the binary, memory-mappable representation of a UCPTrie/CodePointTrie. + * For overview information see http://site.icu-project.org/design/struct/utrie + * + * The binary trie data should be 32-bit-aligned. + * The overall layout is: + * + * UCPTrieHeader header; -- 16 bytes, see struct definition above + * uint16_t index[header.indexLength]; + * uintXY_t data[header.dataLength]; + * + * The trie data array is an array of uint16_t, uint32_t, or uint8_t, + * specified via the UCPTrieValueWidth when building the trie. + * The data array is 32-bit-aligned for uint32_t, otherwise 16-bit-aligned. + * The overall length of the trie data is a multiple of 4 bytes. + * (Padding is added at the end of the index array and/or near the end of the data array as needed.) + * + * The length of the data array (dataLength) is stored as an integer split across two fields + * of the header struct (high bits in header.options). + * + * The trie type can be "fast" or "small" which determines the index structure, + * specified via the UCPTrieType when building the trie. + * + * The type and valueWidth are stored in the header.options. + * There are reserved type and valueWidth values, and reserved header.options bits. + * They could be used in future format extensions. + * Code reading the trie structure must fail with an error when unknown values or options are set. + * + * Values for ASCII character (U+0000..U+007F) can always be found at the start of the data array. + * + * Values for code points below a type-specific fast-indexing limit are found via two-stage lookup. + * For a "fast" trie, the limit is the BMP/supplementary boundary at U+10000. + * For a "small" trie, the limit is UCPTRIE_SMALL_MAX+1=U+1000. + * + * All code points in the range highStart..U+10FFFF map to a single highValue + * which is stored at the second-to-last position of the data array. + * (See UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET.) + * The highStart value is header.shiftedHighStart<<UCPTRIE_SHIFT_2. + * (UCPTRIE_SHIFT_2=9) + * + * Values for code points fast_limit..highStart-1 are found via four-stage lookup. + * The data block size is smaller for this range than for the fast range. + * This together with more index stages with small blocks makes this range + * more easily compactable. + * + * There is also a trie error value stored at the last position of the data array. + * (See UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET.) + * It is intended to be returned for inputs that are not Unicode code points + * (outside U+0000..U+10FFFF), or in string processing for ill-formed input + * (unpaired surrogate in UTF-16, ill-formed UTF-8 subsequence). + * + * For a "fast" trie: + * + * The index array starts with the BMP index table for BMP code point lookup. + * Its length is 1024=0x400. + * + * The supplementary index-1 table follows the BMP index table. + * Variable length, for code points up to highStart-1. + * Maximum length 64=0x40=0x100000>>UCPTRIE_SHIFT_1. + * (For 0x100000 supplementary code points U+10000..U+10ffff.) + * + * After this index-1 table follow the variable-length index-3 and index-2 tables. + * + * The supplementary index tables are omitted completely + * if there is only BMP data (highStart<=U+10000). + * + * For a "small" trie: + * + * The index array starts with a fast-index table for lookup of code points U+0000..U+0FFF. + * + * The "supplementary" index tables are always stored. + * The index-1 table starts from U+0000, its maximum length is 68=0x44=0x110000>>UCPTRIE_SHIFT_1. + * + * For both trie types: + * + * The last index-2 block may be a partial block, storing indexes only for code points + * below highStart. + * + * Lookup for ASCII code point c: + * + * Linear access from the start of the data array. + * + * value = data[c]; + * + * Lookup for fast-range code point c: + * + * Shift the code point right by UCPTRIE_FAST_SHIFT=6 bits, + * fetch the index array value at that offset, + * add the lower code point bits, index into the data array. + * + * value = data[index[c>>6] + (c&0x3f)]; + * + * (This works for ASCII as well.) + * + * Lookup for small-range code point c below highStart: + * + * Split the code point into four bit fields using several sets of shifts & masks + * to read consecutive values from the index-1, index-2, index-3 and data tables. + * + * If all of the data block offsets in an index-3 block fit within 16 bits (up to 0xffff), + * then the data block offsets are stored directly as uint16_t. + * + * Otherwise (this is very unusual but possible), the index-2 entry for the index-3 block + * has bit 15 (0x8000) set, and each set of 8 index-3 entries is preceded by + * an additional uint16_t word. Data block offsets are 18 bits wide, with the top 2 bits stored + * in the additional word. + * + * See ucptrie_internalSmallIndex() for details. + * + * (In a "small" trie, this works for ASCII and below-fast_limit code points as well.) + * + * Compaction: + * + * Multiple code point ranges ("blocks") that are aligned on certain boundaries + * (determined by the shifting/bit fields of code points) and + * map to the same data values normally share a single subsequence of the data array. + * Data blocks can also overlap partially. + * (Depending on the builder code finding duplicate and overlapping blocks.) + * + * Iteration over same-value ranges: + * + * Range iteration (ucptrie_getRange()) walks the structure from a start code point + * until some code point is found that maps to a different value; + * the end of the returned range is just before that. + * + * The header.dataNullOffset (split across two header fields, high bits in header.options) + * is the offset of a widely shared data block filled with one single value. + * It helps quickly skip over large ranges of data with that value. + * The builder must ensure that if the start of any data block (fast or small) + * matches the dataNullOffset, then the whole block must be filled with the null value. + * Special care must be taken if there is no fast null data block + * but a small one, which is shorter, and it matches the *start* of some fast data block. + * + * Similarly, the header.index3NullOffset is the index-array offset of an index-3 block + * where all index entries point to the dataNullOffset. + * If there is no such data or index-3 block, then these offsets are set to + * values that cannot be reached (data offset out of range/reserved index offset), + * normally UCPTRIE_NO_DATA_NULL_OFFSET or UCPTRIE_NO_INDEX3_NULL_OFFSET respectively. + */ + +#endif diff --git a/thirdparty/icu4c/common/ucurr.cpp b/thirdparty/icu4c/common/ucurr.cpp new file mode 100644 index 0000000000..0e14cddcff --- /dev/null +++ b/thirdparty/icu4c/common/ucurr.cpp @@ -0,0 +1,2701 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (c) 2002-2016, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/ucurr.h" +#include "unicode/locid.h" +#include "unicode/ures.h" +#include "unicode/ustring.h" +#include "unicode/parsepos.h" +#include "unicode/uniset.h" +#include "unicode/usetiter.h" +#include "unicode/utf16.h" +#include "ustr_imp.h" +#include "charstr.h" +#include "cmemory.h" +#include "cstring.h" +#include "static_unicode_sets.h" +#include "uassert.h" +#include "umutex.h" +#include "ucln_cmn.h" +#include "uenumimp.h" +#include "uhash.h" +#include "hash.h" +#include "uinvchar.h" +#include "uresimp.h" +#include "ulist.h" +#include "uresimp.h" +#include "ureslocs.h" +#include "ulocimp.h" + +using namespace icu; + +//#define UCURR_DEBUG_EQUIV 1 +#ifdef UCURR_DEBUG_EQUIV +#include "stdio.h" +#endif +//#define UCURR_DEBUG 1 +#ifdef UCURR_DEBUG +#include "stdio.h" +#endif + +typedef struct IsoCodeEntry { + const UChar *isoCode; /* const because it's a reference to a resource bundle string. */ + UDate from; + UDate to; +} IsoCodeEntry; + +//------------------------------------------------------------ +// Constants + +// Default currency meta data of last resort. We try to use the +// defaults encoded in the meta data resource bundle. If there is a +// configuration/build error and these are not available, we use these +// hard-coded defaults (which should be identical). +static const int32_t LAST_RESORT_DATA[] = { 2, 0, 2, 0 }; + +// POW10[i] = 10^i, i=0..MAX_POW10 +static const int32_t POW10[] = { 1, 10, 100, 1000, 10000, 100000, + 1000000, 10000000, 100000000, 1000000000 }; + +static const int32_t MAX_POW10 = UPRV_LENGTHOF(POW10) - 1; + +#define ISO_CURRENCY_CODE_LENGTH 3 + +//------------------------------------------------------------ +// Resource tags +// + +static const char CURRENCY_DATA[] = "supplementalData"; +// Tag for meta-data, in root. +static const char CURRENCY_META[] = "CurrencyMeta"; + +// Tag for map from countries to currencies, in root. +static const char CURRENCY_MAP[] = "CurrencyMap"; + +// Tag for default meta-data, in CURRENCY_META +static const char DEFAULT_META[] = "DEFAULT"; + +// Variant delimiter +static const char VAR_DELIM = '_'; + +// Tag for localized display names (symbols) of currencies +static const char CURRENCIES[] = "Currencies"; +static const char CURRENCIES_NARROW[] = "Currencies%narrow"; +static const char CURRENCIES_FORMAL[] = "Currencies%formal"; +static const char CURRENCIES_VARIANT[] = "Currencies%variant"; +static const char CURRENCYPLURALS[] = "CurrencyPlurals"; + +// ISO codes mapping table +static const UHashtable* gIsoCodes = NULL; +static icu::UInitOnce gIsoCodesInitOnce = U_INITONCE_INITIALIZER; + +// Currency symbol equivalances +static const icu::Hashtable* gCurrSymbolsEquiv = NULL; +static icu::UInitOnce gCurrSymbolsEquivInitOnce = U_INITONCE_INITIALIZER; + +U_NAMESPACE_BEGIN + +// EquivIterator iterates over all strings that are equivalent to a given +// string, s. Note that EquivIterator will never yield s itself. +class EquivIterator : public icu::UMemory { +public: + // Constructor. hash stores the equivalence relationships; s is the string + // for which we find equivalent strings. + inline EquivIterator(const icu::Hashtable& hash, const icu::UnicodeString& s) + : _hash(hash) { + _start = _current = &s; + } + inline ~EquivIterator() { } + + // next returns the next equivalent string or NULL if there are no more. + // If s has no equivalent strings, next returns NULL on the first call. + const icu::UnicodeString *next(); +private: + const icu::Hashtable& _hash; + const icu::UnicodeString* _start; + const icu::UnicodeString* _current; +}; + +const icu::UnicodeString * +EquivIterator::next() { + const icu::UnicodeString* _next = (const icu::UnicodeString*) _hash.get(*_current); + if (_next == NULL) { + U_ASSERT(_current == _start); + return NULL; + } + if (*_next == *_start) { + return NULL; + } + _current = _next; + return _next; +} + +U_NAMESPACE_END + +// makeEquivalent makes lhs and rhs equivalent by updating the equivalence +// relations in hash accordingly. +static void makeEquivalent( + const icu::UnicodeString &lhs, + const icu::UnicodeString &rhs, + icu::Hashtable* hash, UErrorCode &status) { + if (U_FAILURE(status)) { + return; + } + if (lhs == rhs) { + // already equivalent + return; + } + icu::EquivIterator leftIter(*hash, lhs); + icu::EquivIterator rightIter(*hash, rhs); + const icu::UnicodeString *firstLeft = leftIter.next(); + const icu::UnicodeString *firstRight = rightIter.next(); + const icu::UnicodeString *nextLeft = firstLeft; + const icu::UnicodeString *nextRight = firstRight; + while (nextLeft != NULL && nextRight != NULL) { + if (*nextLeft == rhs || *nextRight == lhs) { + // Already equivalent + return; + } + nextLeft = leftIter.next(); + nextRight = rightIter.next(); + } + // Not equivalent. Must join. + icu::UnicodeString *newFirstLeft; + icu::UnicodeString *newFirstRight; + if (firstRight == NULL && firstLeft == NULL) { + // Neither lhs or rhs belong to an equivalence circle, so we form + // a new equivalnce circle of just lhs and rhs. + newFirstLeft = new icu::UnicodeString(rhs); + newFirstRight = new icu::UnicodeString(lhs); + } else if (firstRight == NULL) { + // lhs belongs to an equivalence circle, but rhs does not, so we link + // rhs into lhs' circle. + newFirstLeft = new icu::UnicodeString(rhs); + newFirstRight = new icu::UnicodeString(*firstLeft); + } else if (firstLeft == NULL) { + // rhs belongs to an equivlance circle, but lhs does not, so we link + // lhs into rhs' circle. + newFirstLeft = new icu::UnicodeString(*firstRight); + newFirstRight = new icu::UnicodeString(lhs); + } else { + // Both lhs and rhs belong to different equivalnce circles. We link + // them together to form one single, larger equivalnce circle. + newFirstLeft = new icu::UnicodeString(*firstRight); + newFirstRight = new icu::UnicodeString(*firstLeft); + } + if (newFirstLeft == NULL || newFirstRight == NULL) { + delete newFirstLeft; + delete newFirstRight; + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + hash->put(lhs, (void *) newFirstLeft, status); + hash->put(rhs, (void *) newFirstRight, status); +} + +// countEquivalent counts how many strings are equivalent to s. +// hash stores all the equivalnce relations. +// countEquivalent does not include s itself in the count. +static int32_t countEquivalent(const icu::Hashtable &hash, const icu::UnicodeString &s) { + int32_t result = 0; + icu::EquivIterator iter(hash, s); + while (iter.next() != NULL) { + ++result; + } +#ifdef UCURR_DEBUG_EQUIV + { + char tmp[200]; + s.extract(0,s.length(),tmp, "UTF-8"); + printf("CountEquivalent('%s') = %d\n", tmp, result); + } +#endif + return result; +} + +static const icu::Hashtable* getCurrSymbolsEquiv(); + +//------------------------------------------------------------ +// Code + +/** + * Cleanup callback func + */ +static UBool U_CALLCONV +isoCodes_cleanup(void) +{ + if (gIsoCodes != NULL) { + uhash_close(const_cast<UHashtable *>(gIsoCodes)); + gIsoCodes = NULL; + } + gIsoCodesInitOnce.reset(); + return TRUE; +} + +/** + * Cleanup callback func + */ +static UBool U_CALLCONV +currSymbolsEquiv_cleanup(void) +{ + delete const_cast<icu::Hashtable *>(gCurrSymbolsEquiv); + gCurrSymbolsEquiv = NULL; + gCurrSymbolsEquivInitOnce.reset(); + return TRUE; +} + +/** + * Deleter for OlsonToMetaMappingEntry + */ +static void U_CALLCONV +deleteIsoCodeEntry(void *obj) { + IsoCodeEntry *entry = (IsoCodeEntry*)obj; + uprv_free(entry); +} + +/** + * Deleter for gCurrSymbolsEquiv. + */ +static void U_CALLCONV +deleteUnicode(void *obj) { + icu::UnicodeString *entry = (icu::UnicodeString*)obj; + delete entry; +} + +/** + * Unfortunately, we have to convert the UChar* currency code to char* + * to use it as a resource key. + */ +static inline char* +myUCharsToChars(char* resultOfLen4, const UChar* currency) { + u_UCharsToChars(currency, resultOfLen4, ISO_CURRENCY_CODE_LENGTH); + resultOfLen4[ISO_CURRENCY_CODE_LENGTH] = 0; + return resultOfLen4; +} + +/** + * Internal function to look up currency data. Result is an array of + * four integers. The first is the fraction digits. The second is the + * rounding increment, or 0 if none. The rounding increment is in + * units of 10^(-fraction_digits). The third and fourth are the same + * except that they are those used in cash transations ( cashDigits + * and cashRounding ). + */ +static const int32_t* +_findMetaData(const UChar* currency, UErrorCode& ec) { + + if (currency == 0 || *currency == 0) { + if (U_SUCCESS(ec)) { + ec = U_ILLEGAL_ARGUMENT_ERROR; + } + return LAST_RESORT_DATA; + } + + // Get CurrencyMeta resource out of root locale file. [This may + // move out of the root locale file later; if it does, update this + // code.] + UResourceBundle* currencyData = ures_openDirect(U_ICUDATA_CURR, CURRENCY_DATA, &ec); + UResourceBundle* currencyMeta = ures_getByKey(currencyData, CURRENCY_META, currencyData, &ec); + + if (U_FAILURE(ec)) { + ures_close(currencyMeta); + // Config/build error; return hard-coded defaults + return LAST_RESORT_DATA; + } + + // Look up our currency, or if that's not available, then DEFAULT + char buf[ISO_CURRENCY_CODE_LENGTH+1]; + UErrorCode ec2 = U_ZERO_ERROR; // local error code: soft failure + UResourceBundle* rb = ures_getByKey(currencyMeta, myUCharsToChars(buf, currency), NULL, &ec2); + if (U_FAILURE(ec2)) { + ures_close(rb); + rb = ures_getByKey(currencyMeta,DEFAULT_META, NULL, &ec); + if (U_FAILURE(ec)) { + ures_close(currencyMeta); + ures_close(rb); + // Config/build error; return hard-coded defaults + return LAST_RESORT_DATA; + } + } + + int32_t len; + const int32_t *data = ures_getIntVector(rb, &len, &ec); + if (U_FAILURE(ec) || len != 4) { + // Config/build error; return hard-coded defaults + if (U_SUCCESS(ec)) { + ec = U_INVALID_FORMAT_ERROR; + } + ures_close(currencyMeta); + ures_close(rb); + return LAST_RESORT_DATA; + } + + ures_close(currencyMeta); + ures_close(rb); + return data; +} + +// ------------------------------------- + +static void +idForLocale(const char* locale, char* countryAndVariant, int capacity, UErrorCode* ec) +{ + ulocimp_getRegionForSupplementalData(locale, FALSE, countryAndVariant, capacity, ec); +} + +// ------------------------------------------ +// +// Registration +// +//------------------------------------------- + +// don't use ICUService since we don't need fallback + +U_CDECL_BEGIN +static UBool U_CALLCONV currency_cleanup(void); +U_CDECL_END + +#if !UCONFIG_NO_SERVICE +struct CReg; + +static UMutex gCRegLock; +static CReg* gCRegHead = 0; + +struct CReg : public icu::UMemory { + CReg *next; + UChar iso[ISO_CURRENCY_CODE_LENGTH+1]; + char id[ULOC_FULLNAME_CAPACITY]; + + CReg(const UChar* _iso, const char* _id) + : next(0) + { + int32_t len = (int32_t)uprv_strlen(_id); + if (len > (int32_t)(sizeof(id)-1)) { + len = (sizeof(id)-1); + } + uprv_strncpy(id, _id, len); + id[len] = 0; + u_memcpy(iso, _iso, ISO_CURRENCY_CODE_LENGTH); + iso[ISO_CURRENCY_CODE_LENGTH] = 0; + } + + static UCurrRegistryKey reg(const UChar* _iso, const char* _id, UErrorCode* status) + { + if (status && U_SUCCESS(*status) && _iso && _id) { + CReg* n = new CReg(_iso, _id); + if (n) { + umtx_lock(&gCRegLock); + if (!gCRegHead) { + /* register for the first time */ + ucln_common_registerCleanup(UCLN_COMMON_CURRENCY, currency_cleanup); + } + n->next = gCRegHead; + gCRegHead = n; + umtx_unlock(&gCRegLock); + return n; + } + *status = U_MEMORY_ALLOCATION_ERROR; + } + return 0; + } + + static UBool unreg(UCurrRegistryKey key) { + UBool found = FALSE; + umtx_lock(&gCRegLock); + + CReg** p = &gCRegHead; + while (*p) { + if (*p == key) { + *p = ((CReg*)key)->next; + delete (CReg*)key; + found = TRUE; + break; + } + p = &((*p)->next); + } + + umtx_unlock(&gCRegLock); + return found; + } + + static const UChar* get(const char* id) { + const UChar* result = NULL; + umtx_lock(&gCRegLock); + CReg* p = gCRegHead; + + /* register cleanup of the mutex */ + ucln_common_registerCleanup(UCLN_COMMON_CURRENCY, currency_cleanup); + while (p) { + if (uprv_strcmp(id, p->id) == 0) { + result = p->iso; + break; + } + p = p->next; + } + umtx_unlock(&gCRegLock); + return result; + } + + /* This doesn't need to be thread safe. It's for u_cleanup only. */ + static void cleanup(void) { + while (gCRegHead) { + CReg* n = gCRegHead; + gCRegHead = gCRegHead->next; + delete n; + } + } +}; + +// ------------------------------------- + +U_CAPI UCurrRegistryKey U_EXPORT2 +ucurr_register(const UChar* isoCode, const char* locale, UErrorCode *status) +{ + if (status && U_SUCCESS(*status)) { + char id[ULOC_FULLNAME_CAPACITY]; + idForLocale(locale, id, sizeof(id), status); + return CReg::reg(isoCode, id, status); + } + return NULL; +} + +// ------------------------------------- + +U_CAPI UBool U_EXPORT2 +ucurr_unregister(UCurrRegistryKey key, UErrorCode* status) +{ + if (status && U_SUCCESS(*status)) { + return CReg::unreg(key); + } + return FALSE; +} +#endif /* UCONFIG_NO_SERVICE */ + +// ------------------------------------- + +/** + * Release all static memory held by currency. + */ +/*The declaration here is needed so currency_cleanup(void) + * can call this function. + */ +static UBool U_CALLCONV +currency_cache_cleanup(void); + +U_CDECL_BEGIN +static UBool U_CALLCONV currency_cleanup(void) { +#if !UCONFIG_NO_SERVICE + CReg::cleanup(); +#endif + /* + * There might be some cached currency data or isoCodes data. + */ + currency_cache_cleanup(); + isoCodes_cleanup(); + currSymbolsEquiv_cleanup(); + + return TRUE; +} +U_CDECL_END + +// ------------------------------------- + +U_CAPI int32_t U_EXPORT2 +ucurr_forLocale(const char* locale, + UChar* buff, + int32_t buffCapacity, + UErrorCode* ec) { + if (U_FAILURE(*ec)) { return 0; } + if (buffCapacity < 0 || (buff == nullptr && buffCapacity > 0)) { + *ec = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + char currency[4]; // ISO currency codes are alpha3 codes. + UErrorCode localStatus = U_ZERO_ERROR; + int32_t resLen = uloc_getKeywordValue(locale, "currency", + currency, UPRV_LENGTHOF(currency), &localStatus); + if (U_SUCCESS(localStatus) && resLen == 3 && uprv_isInvariantString(currency, resLen)) { + if (resLen < buffCapacity) { + T_CString_toUpperCase(currency); + u_charsToUChars(currency, buff, resLen); + } + return u_terminateUChars(buff, buffCapacity, resLen, ec); + } + + // get country or country_variant in `id' + char id[ULOC_FULLNAME_CAPACITY]; + idForLocale(locale, id, UPRV_LENGTHOF(id), ec); + if (U_FAILURE(*ec)) { + return 0; + } + +#if !UCONFIG_NO_SERVICE + const UChar* result = CReg::get(id); + if (result) { + if(buffCapacity > u_strlen(result)) { + u_strcpy(buff, result); + } + resLen = u_strlen(result); + return u_terminateUChars(buff, buffCapacity, resLen, ec); + } +#endif + // Remove variants, which is only needed for registration. + char *idDelim = uprv_strchr(id, VAR_DELIM); + if (idDelim) { + idDelim[0] = 0; + } + + const UChar* s = NULL; // Currency code from data file. + if (id[0] == 0) { + // No point looking in the data for an empty string. + // This is what we would get. + localStatus = U_MISSING_RESOURCE_ERROR; + } else { + // Look up the CurrencyMap element in the root bundle. + localStatus = U_ZERO_ERROR; + UResourceBundle *rb = ures_openDirect(U_ICUDATA_CURR, CURRENCY_DATA, &localStatus); + UResourceBundle *cm = ures_getByKey(rb, CURRENCY_MAP, rb, &localStatus); + UResourceBundle *countryArray = ures_getByKey(rb, id, cm, &localStatus); + UResourceBundle *currencyReq = ures_getByIndex(countryArray, 0, NULL, &localStatus); + s = ures_getStringByKey(currencyReq, "id", &resLen, &localStatus); + ures_close(currencyReq); + ures_close(countryArray); + } + + if ((U_FAILURE(localStatus)) && strchr(id, '_') != 0) { + // We don't know about it. Check to see if we support the variant. + uloc_getParent(locale, id, UPRV_LENGTHOF(id), ec); + *ec = U_USING_FALLBACK_WARNING; + // TODO: Loop over the shortened id rather than recursing and + // looking again for a currency keyword. + return ucurr_forLocale(id, buff, buffCapacity, ec); + } + if (*ec == U_ZERO_ERROR || localStatus != U_ZERO_ERROR) { + // There is nothing to fallback to. Report the failure/warning if possible. + *ec = localStatus; + } + if (U_SUCCESS(*ec)) { + if(buffCapacity > resLen) { + u_strcpy(buff, s); + } + } + return u_terminateUChars(buff, buffCapacity, resLen, ec); +} + +// end registration + +/** + * Modify the given locale name by removing the rightmost _-delimited + * element. If there is none, empty the string ("" == root). + * NOTE: The string "root" is not recognized; do not use it. + * @return TRUE if the fallback happened; FALSE if locale is already + * root (""). + */ +static UBool fallback(char *loc) { + if (!*loc) { + return FALSE; + } + UErrorCode status = U_ZERO_ERROR; + if (uprv_strcmp(loc, "en_GB") == 0) { + // HACK: See #13368. We need "en_GB" to fall back to "en_001" instead of "en" + // in order to consume the correct data strings. This hack will be removed + // when proper data sink loading is implemented here. + // NOTE: "001" adds 1 char over "GB". However, both call sites allocate + // arrays with length ULOC_FULLNAME_CAPACITY (plenty of room for en_001). + uprv_strcpy(loc + 3, "001"); + } else { + uloc_getParent(loc, loc, (int32_t)uprv_strlen(loc), &status); + } + /* + char *i = uprv_strrchr(loc, '_'); + if (i == NULL) { + i = loc; + } + *i = 0; + */ + return TRUE; +} + + +U_CAPI const UChar* U_EXPORT2 +ucurr_getName(const UChar* currency, + const char* locale, + UCurrNameStyle nameStyle, + UBool* isChoiceFormat, // fillin + int32_t* len, // fillin + UErrorCode* ec) { + + // Look up the Currencies resource for the given locale. The + // Currencies locale data looks like this: + //|en { + //| Currencies { + //| USD { "US$", "US Dollar" } + //| CHF { "Sw F", "Swiss Franc" } + //| INR { "=0#Rs|1#Re|1<Rs", "=0#Rupees|1#Rupee|1<Rupees" } + //| //... + //| } + //|} + + if (U_FAILURE(*ec)) { + return 0; + } + + int32_t choice = (int32_t) nameStyle; + if (choice < 0 || choice > 4) { + *ec = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + // In the future, resource bundles may implement multi-level + // fallback. That is, if a currency is not found in the en_US + // Currencies data, then the en Currencies data will be searched. + // Currently, if a Currencies datum exists in en_US and en, the + // en_US entry hides that in en. + + // We want multi-level fallback for this resource, so we implement + // it manually. + + // Use a separate UErrorCode here that does not propagate out of + // this function. + UErrorCode ec2 = U_ZERO_ERROR; + + char loc[ULOC_FULLNAME_CAPACITY]; + uloc_getName(locale, loc, sizeof(loc), &ec2); + if (U_FAILURE(ec2) || ec2 == U_STRING_NOT_TERMINATED_WARNING) { + *ec = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + char buf[ISO_CURRENCY_CODE_LENGTH+1]; + myUCharsToChars(buf, currency); + + /* Normalize the keyword value to uppercase */ + T_CString_toUpperCase(buf); + + const UChar* s = NULL; + ec2 = U_ZERO_ERROR; + LocalUResourceBundlePointer rb(ures_open(U_ICUDATA_CURR, loc, &ec2)); + + if (nameStyle == UCURR_NARROW_SYMBOL_NAME || nameStyle == UCURR_FORMAL_SYMBOL_NAME || nameStyle == UCURR_VARIANT_SYMBOL_NAME) { + CharString key; + switch (nameStyle) { + case UCURR_NARROW_SYMBOL_NAME: + key.append(CURRENCIES_NARROW, ec2); + break; + case UCURR_FORMAL_SYMBOL_NAME: + key.append(CURRENCIES_FORMAL, ec2); + break; + case UCURR_VARIANT_SYMBOL_NAME: + key.append(CURRENCIES_VARIANT, ec2); + break; + default: + *ec = U_UNSUPPORTED_ERROR; + return 0; + } + key.append("/", ec2); + key.append(buf, ec2); + s = ures_getStringByKeyWithFallback(rb.getAlias(), key.data(), len, &ec2); + if (ec2 == U_MISSING_RESOURCE_ERROR) { + *ec = U_USING_FALLBACK_WARNING; + ec2 = U_ZERO_ERROR; + choice = UCURR_SYMBOL_NAME; + } + } + if (s == NULL) { + ures_getByKey(rb.getAlias(), CURRENCIES, rb.getAlias(), &ec2); + ures_getByKeyWithFallback(rb.getAlias(), buf, rb.getAlias(), &ec2); + s = ures_getStringByIndex(rb.getAlias(), choice, len, &ec2); + } + + // If we've succeeded we're done. Otherwise, try to fallback. + // If that fails (because we are already at root) then exit. + if (U_SUCCESS(ec2)) { + if (ec2 == U_USING_DEFAULT_WARNING + || (ec2 == U_USING_FALLBACK_WARNING && *ec != U_USING_DEFAULT_WARNING)) { + *ec = ec2; + } + } + + // We no longer support choice format data in names. Data should not contain + // choice patterns. + if (isChoiceFormat != NULL) { + *isChoiceFormat = FALSE; + } + if (U_SUCCESS(ec2)) { + U_ASSERT(s != NULL); + return s; + } + + // If we fail to find a match, use the ISO 4217 code + *len = u_strlen(currency); // Should == ISO_CURRENCY_CODE_LENGTH, but maybe not...? + *ec = U_USING_DEFAULT_WARNING; + return currency; +} + +U_CAPI const UChar* U_EXPORT2 +ucurr_getPluralName(const UChar* currency, + const char* locale, + UBool* isChoiceFormat, + const char* pluralCount, + int32_t* len, // fillin + UErrorCode* ec) { + // Look up the Currencies resource for the given locale. The + // Currencies locale data looks like this: + //|en { + //| CurrencyPlurals { + //| USD{ + //| one{"US dollar"} + //| other{"US dollars"} + //| } + //| } + //|} + + if (U_FAILURE(*ec)) { + return 0; + } + + // Use a separate UErrorCode here that does not propagate out of + // this function. + UErrorCode ec2 = U_ZERO_ERROR; + + char loc[ULOC_FULLNAME_CAPACITY]; + uloc_getName(locale, loc, sizeof(loc), &ec2); + if (U_FAILURE(ec2) || ec2 == U_STRING_NOT_TERMINATED_WARNING) { + *ec = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + char buf[ISO_CURRENCY_CODE_LENGTH+1]; + myUCharsToChars(buf, currency); + + const UChar* s = NULL; + ec2 = U_ZERO_ERROR; + UResourceBundle* rb = ures_open(U_ICUDATA_CURR, loc, &ec2); + + rb = ures_getByKey(rb, CURRENCYPLURALS, rb, &ec2); + + // Fetch resource with multi-level resource inheritance fallback + rb = ures_getByKeyWithFallback(rb, buf, rb, &ec2); + + s = ures_getStringByKeyWithFallback(rb, pluralCount, len, &ec2); + if (U_FAILURE(ec2)) { + // fall back to "other" + ec2 = U_ZERO_ERROR; + s = ures_getStringByKeyWithFallback(rb, "other", len, &ec2); + if (U_FAILURE(ec2)) { + ures_close(rb); + // fall back to long name in Currencies + return ucurr_getName(currency, locale, UCURR_LONG_NAME, + isChoiceFormat, len, ec); + } + } + ures_close(rb); + + // If we've succeeded we're done. Otherwise, try to fallback. + // If that fails (because we are already at root) then exit. + if (U_SUCCESS(ec2)) { + if (ec2 == U_USING_DEFAULT_WARNING + || (ec2 == U_USING_FALLBACK_WARNING && *ec != U_USING_DEFAULT_WARNING)) { + *ec = ec2; + } + U_ASSERT(s != NULL); + return s; + } + + // If we fail to find a match, use the ISO 4217 code + *len = u_strlen(currency); // Should == ISO_CURRENCY_CODE_LENGTH, but maybe not...? + *ec = U_USING_DEFAULT_WARNING; + return currency; +} + + +//======================================================================== +// Following are structure and function for parsing currency names + +#define NEED_TO_BE_DELETED 0x1 + +// TODO: a better way to define this? +#define MAX_CURRENCY_NAME_LEN 100 + +typedef struct { + const char* IsoCode; // key + UChar* currencyName; // value + int32_t currencyNameLen; // value length + int32_t flag; // flags +} CurrencyNameStruct; + + +#ifndef MIN +#define MIN(a,b) (((a)<(b)) ? (a) : (b)) +#endif + +#ifndef MAX +#define MAX(a,b) (((a)<(b)) ? (b) : (a)) +#endif + + +// Comparason function used in quick sort. +static int U_CALLCONV currencyNameComparator(const void* a, const void* b) { + const CurrencyNameStruct* currName_1 = (const CurrencyNameStruct*)a; + const CurrencyNameStruct* currName_2 = (const CurrencyNameStruct*)b; + for (int32_t i = 0; + i < MIN(currName_1->currencyNameLen, currName_2->currencyNameLen); + ++i) { + if (currName_1->currencyName[i] < currName_2->currencyName[i]) { + return -1; + } + if (currName_1->currencyName[i] > currName_2->currencyName[i]) { + return 1; + } + } + if (currName_1->currencyNameLen < currName_2->currencyNameLen) { + return -1; + } else if (currName_1->currencyNameLen > currName_2->currencyNameLen) { + return 1; + } + return 0; +} + + +// Give a locale, return the maximum number of currency names associated with +// this locale. +// It gets currency names from resource bundles using fallback. +// It is the maximum number because in the fallback chain, some of the +// currency names are duplicated. +// For example, given locale as "en_US", the currency names get from resource +// bundle in "en_US" and "en" are duplicated. The fallback mechanism will count +// all currency names in "en_US" and "en". +static void +getCurrencyNameCount(const char* loc, int32_t* total_currency_name_count, int32_t* total_currency_symbol_count) { + U_NAMESPACE_USE + *total_currency_name_count = 0; + *total_currency_symbol_count = 0; + const UChar* s = NULL; + char locale[ULOC_FULLNAME_CAPACITY] = ""; + uprv_strcpy(locale, loc); + const icu::Hashtable *currencySymbolsEquiv = getCurrSymbolsEquiv(); + for (;;) { + UErrorCode ec2 = U_ZERO_ERROR; + // TODO: ures_openDirect? + UResourceBundle* rb = ures_open(U_ICUDATA_CURR, locale, &ec2); + UResourceBundle* curr = ures_getByKey(rb, CURRENCIES, NULL, &ec2); + int32_t n = ures_getSize(curr); + for (int32_t i=0; i<n; ++i) { + UResourceBundle* names = ures_getByIndex(curr, i, NULL, &ec2); + int32_t len; + s = ures_getStringByIndex(names, UCURR_SYMBOL_NAME, &len, &ec2); + ++(*total_currency_symbol_count); // currency symbol + if (currencySymbolsEquiv != NULL) { + *total_currency_symbol_count += countEquivalent(*currencySymbolsEquiv, UnicodeString(TRUE, s, len)); + } + ++(*total_currency_symbol_count); // iso code + ++(*total_currency_name_count); // long name + ures_close(names); + } + + // currency plurals + UErrorCode ec3 = U_ZERO_ERROR; + UResourceBundle* curr_p = ures_getByKey(rb, CURRENCYPLURALS, NULL, &ec3); + n = ures_getSize(curr_p); + for (int32_t i=0; i<n; ++i) { + UResourceBundle* names = ures_getByIndex(curr_p, i, NULL, &ec3); + *total_currency_name_count += ures_getSize(names); + ures_close(names); + } + ures_close(curr_p); + ures_close(curr); + ures_close(rb); + + if (!fallback(locale)) { + break; + } + } +} + +static UChar* +toUpperCase(const UChar* source, int32_t len, const char* locale) { + UChar* dest = NULL; + UErrorCode ec = U_ZERO_ERROR; + int32_t destLen = u_strToUpper(dest, 0, source, len, locale, &ec); + + ec = U_ZERO_ERROR; + dest = (UChar*)uprv_malloc(sizeof(UChar) * MAX(destLen, len)); + u_strToUpper(dest, destLen, source, len, locale, &ec); + if (U_FAILURE(ec)) { + u_memcpy(dest, source, len); + } + return dest; +} + + +// Collect all available currency names associated with the given locale +// (enable fallback chain). +// Read currenc names defined in resource bundle "Currencies" and +// "CurrencyPlural", enable fallback chain. +// return the malloc-ed currency name arrays and the total number of currency +// names in the array. +static void +collectCurrencyNames(const char* locale, + CurrencyNameStruct** currencyNames, + int32_t* total_currency_name_count, + CurrencyNameStruct** currencySymbols, + int32_t* total_currency_symbol_count, + UErrorCode& ec) { + U_NAMESPACE_USE + const icu::Hashtable *currencySymbolsEquiv = getCurrSymbolsEquiv(); + // Look up the Currencies resource for the given locale. + UErrorCode ec2 = U_ZERO_ERROR; + + char loc[ULOC_FULLNAME_CAPACITY] = ""; + uloc_getName(locale, loc, sizeof(loc), &ec2); + if (U_FAILURE(ec2) || ec2 == U_STRING_NOT_TERMINATED_WARNING) { + ec = U_ILLEGAL_ARGUMENT_ERROR; + } + + // Get maximum currency name count first. + getCurrencyNameCount(loc, total_currency_name_count, total_currency_symbol_count); + + *currencyNames = (CurrencyNameStruct*)uprv_malloc + (sizeof(CurrencyNameStruct) * (*total_currency_name_count)); + *currencySymbols = (CurrencyNameStruct*)uprv_malloc + (sizeof(CurrencyNameStruct) * (*total_currency_symbol_count)); + + if(currencyNames == NULL || currencySymbols == NULL) { + ec = U_MEMORY_ALLOCATION_ERROR; + } + + if (U_FAILURE(ec)) return; + + const UChar* s = NULL; // currency name + char* iso = NULL; // currency ISO code + + *total_currency_name_count = 0; + *total_currency_symbol_count = 0; + + UErrorCode ec3 = U_ZERO_ERROR; + UErrorCode ec4 = U_ZERO_ERROR; + + // Using hash to remove duplicates caused by locale fallback + UHashtable* currencyIsoCodes = uhash_open(uhash_hashChars, uhash_compareChars, NULL, &ec3); + UHashtable* currencyPluralIsoCodes = uhash_open(uhash_hashChars, uhash_compareChars, NULL, &ec4); + for (int32_t localeLevel = 0; ; ++localeLevel) { + ec2 = U_ZERO_ERROR; + // TODO: ures_openDirect + UResourceBundle* rb = ures_open(U_ICUDATA_CURR, loc, &ec2); + UResourceBundle* curr = ures_getByKey(rb, CURRENCIES, NULL, &ec2); + int32_t n = ures_getSize(curr); + for (int32_t i=0; i<n; ++i) { + UResourceBundle* names = ures_getByIndex(curr, i, NULL, &ec2); + int32_t len; + s = ures_getStringByIndex(names, UCURR_SYMBOL_NAME, &len, &ec2); + // TODO: uhash_put wont change key/value? + iso = (char*)ures_getKey(names); + if (localeLevel == 0) { + uhash_put(currencyIsoCodes, iso, iso, &ec3); + } else { + if (uhash_get(currencyIsoCodes, iso) != NULL) { + ures_close(names); + continue; + } else { + uhash_put(currencyIsoCodes, iso, iso, &ec3); + } + } + // Add currency symbol. + (*currencySymbols)[*total_currency_symbol_count].IsoCode = iso; + (*currencySymbols)[*total_currency_symbol_count].currencyName = (UChar*)s; + (*currencySymbols)[*total_currency_symbol_count].flag = 0; + (*currencySymbols)[(*total_currency_symbol_count)++].currencyNameLen = len; + // Add equivalent symbols + if (currencySymbolsEquiv != NULL) { + UnicodeString str(TRUE, s, len); + icu::EquivIterator iter(*currencySymbolsEquiv, str); + const UnicodeString *symbol; + while ((symbol = iter.next()) != NULL) { + (*currencySymbols)[*total_currency_symbol_count].IsoCode = iso; + (*currencySymbols)[*total_currency_symbol_count].currencyName = + const_cast<UChar*>(symbol->getBuffer()); + (*currencySymbols)[*total_currency_symbol_count].flag = 0; + (*currencySymbols)[(*total_currency_symbol_count)++].currencyNameLen = symbol->length(); + } + } + + // Add currency long name. + s = ures_getStringByIndex(names, UCURR_LONG_NAME, &len, &ec2); + (*currencyNames)[*total_currency_name_count].IsoCode = iso; + UChar* upperName = toUpperCase(s, len, locale); + (*currencyNames)[*total_currency_name_count].currencyName = upperName; + (*currencyNames)[*total_currency_name_count].flag = NEED_TO_BE_DELETED; + (*currencyNames)[(*total_currency_name_count)++].currencyNameLen = len; + + // put (iso, 3, and iso) in to array + // Add currency ISO code. + (*currencySymbols)[*total_currency_symbol_count].IsoCode = iso; + (*currencySymbols)[*total_currency_symbol_count].currencyName = (UChar*)uprv_malloc(sizeof(UChar)*3); + // Must convert iso[] into Unicode + u_charsToUChars(iso, (*currencySymbols)[*total_currency_symbol_count].currencyName, 3); + (*currencySymbols)[*total_currency_symbol_count].flag = NEED_TO_BE_DELETED; + (*currencySymbols)[(*total_currency_symbol_count)++].currencyNameLen = 3; + + ures_close(names); + } + + // currency plurals + UErrorCode ec5 = U_ZERO_ERROR; + UResourceBundle* curr_p = ures_getByKey(rb, CURRENCYPLURALS, NULL, &ec5); + n = ures_getSize(curr_p); + for (int32_t i=0; i<n; ++i) { + UResourceBundle* names = ures_getByIndex(curr_p, i, NULL, &ec5); + iso = (char*)ures_getKey(names); + // Using hash to remove duplicated ISO codes in fallback chain. + if (localeLevel == 0) { + uhash_put(currencyPluralIsoCodes, iso, iso, &ec4); + } else { + if (uhash_get(currencyPluralIsoCodes, iso) != NULL) { + ures_close(names); + continue; + } else { + uhash_put(currencyPluralIsoCodes, iso, iso, &ec4); + } + } + int32_t num = ures_getSize(names); + int32_t len; + for (int32_t j = 0; j < num; ++j) { + // TODO: remove duplicates between singular name and + // currency long name? + s = ures_getStringByIndex(names, j, &len, &ec5); + (*currencyNames)[*total_currency_name_count].IsoCode = iso; + UChar* upperName = toUpperCase(s, len, locale); + (*currencyNames)[*total_currency_name_count].currencyName = upperName; + (*currencyNames)[*total_currency_name_count].flag = NEED_TO_BE_DELETED; + (*currencyNames)[(*total_currency_name_count)++].currencyNameLen = len; + } + ures_close(names); + } + ures_close(curr_p); + ures_close(curr); + ures_close(rb); + + if (!fallback(loc)) { + break; + } + } + + uhash_close(currencyIsoCodes); + uhash_close(currencyPluralIsoCodes); + + // quick sort the struct + qsort(*currencyNames, *total_currency_name_count, + sizeof(CurrencyNameStruct), currencyNameComparator); + qsort(*currencySymbols, *total_currency_symbol_count, + sizeof(CurrencyNameStruct), currencyNameComparator); + +#ifdef UCURR_DEBUG + printf("currency name count: %d\n", *total_currency_name_count); + for (int32_t index = 0; index < *total_currency_name_count; ++index) { + printf("index: %d\n", index); + printf("iso: %s\n", (*currencyNames)[index].IsoCode); + char curNameBuf[1024]; + memset(curNameBuf, 0, 1024); + u_austrncpy(curNameBuf, (*currencyNames)[index].currencyName, (*currencyNames)[index].currencyNameLen); + printf("currencyName: %s\n", curNameBuf); + printf("len: %d\n", (*currencyNames)[index].currencyNameLen); + } + printf("currency symbol count: %d\n", *total_currency_symbol_count); + for (int32_t index = 0; index < *total_currency_symbol_count; ++index) { + printf("index: %d\n", index); + printf("iso: %s\n", (*currencySymbols)[index].IsoCode); + char curNameBuf[1024]; + memset(curNameBuf, 0, 1024); + u_austrncpy(curNameBuf, (*currencySymbols)[index].currencyName, (*currencySymbols)[index].currencyNameLen); + printf("currencySymbol: %s\n", curNameBuf); + printf("len: %d\n", (*currencySymbols)[index].currencyNameLen); + } +#endif + // fail on hashtable errors + if (U_FAILURE(ec3)) { + ec = ec3; + return; + } + if (U_FAILURE(ec4)) { + ec = ec4; + return; + } +} + +// @param currencyNames: currency names array +// @param indexInCurrencyNames: the index of the character in currency names +// array against which the comparison is done +// @param key: input text char to compare against +// @param begin(IN/OUT): the begin index of matching range in currency names array +// @param end(IN/OUT): the end index of matching range in currency names array. +static int32_t +binarySearch(const CurrencyNameStruct* currencyNames, + int32_t indexInCurrencyNames, + const UChar key, + int32_t* begin, int32_t* end) { +#ifdef UCURR_DEBUG + printf("key = %x\n", key); +#endif + int32_t first = *begin; + int32_t last = *end; + while (first <= last) { + int32_t mid = (first + last) / 2; // compute mid point. + if (indexInCurrencyNames >= currencyNames[mid].currencyNameLen) { + first = mid + 1; + } else { + if (key > currencyNames[mid].currencyName[indexInCurrencyNames]) { + first = mid + 1; + } + else if (key < currencyNames[mid].currencyName[indexInCurrencyNames]) { + last = mid - 1; + } + else { + // Find a match, and looking for ranges + // Now do two more binary searches. First, on the left side for + // the greatest L such that CurrencyNameStruct[L] < key. + int32_t L = *begin; + int32_t R = mid; + +#ifdef UCURR_DEBUG + printf("mid = %d\n", mid); +#endif + while (L < R) { + int32_t M = (L + R) / 2; +#ifdef UCURR_DEBUG + printf("L = %d, R = %d, M = %d\n", L, R, M); +#endif + if (indexInCurrencyNames >= currencyNames[M].currencyNameLen) { + L = M + 1; + } else { + if (currencyNames[M].currencyName[indexInCurrencyNames] < key) { + L = M + 1; + } else { +#ifdef UCURR_DEBUG + U_ASSERT(currencyNames[M].currencyName[indexInCurrencyNames] == key); +#endif + R = M; + } + } + } +#ifdef UCURR_DEBUG + U_ASSERT(L == R); +#endif + *begin = L; +#ifdef UCURR_DEBUG + printf("begin = %d\n", *begin); + U_ASSERT(currencyNames[*begin].currencyName[indexInCurrencyNames] == key); +#endif + + // Now for the second search, finding the least R such that + // key < CurrencyNameStruct[R]. + L = mid; + R = *end; + while (L < R) { + int32_t M = (L + R) / 2; +#ifdef UCURR_DEBUG + printf("L = %d, R = %d, M = %d\n", L, R, M); +#endif + if (currencyNames[M].currencyNameLen < indexInCurrencyNames) { + L = M + 1; + } else { + if (currencyNames[M].currencyName[indexInCurrencyNames] > key) { + R = M; + } else { +#ifdef UCURR_DEBUG + U_ASSERT(currencyNames[M].currencyName[indexInCurrencyNames] == key); +#endif + L = M + 1; + } + } + } +#ifdef UCURR_DEBUG + U_ASSERT(L == R); +#endif + if (currencyNames[R].currencyName[indexInCurrencyNames] > key) { + *end = R - 1; + } else { + *end = R; + } +#ifdef UCURR_DEBUG + printf("end = %d\n", *end); +#endif + + // now, found the range. check whether there is exact match + if (currencyNames[*begin].currencyNameLen == indexInCurrencyNames + 1) { + return *begin; // find range and exact match. + } + return -1; // find range, but no exact match. + } + } + } + *begin = -1; + *end = -1; + return -1; // failed to find range. +} + + +// Linear search "text" in "currencyNames". +// @param begin, end: the begin and end index in currencyNames, within which +// range should the search be performed. +// @param textLen: the length of the text to be compared +// @param maxMatchLen(IN/OUT): passing in the computed max matching length +// pass out the new max matching length +// @param maxMatchIndex: the index in currencyName which has the longest +// match with input text. +static void +linearSearch(const CurrencyNameStruct* currencyNames, + int32_t begin, int32_t end, + const UChar* text, int32_t textLen, + int32_t *partialMatchLen, + int32_t *maxMatchLen, int32_t* maxMatchIndex) { + int32_t initialPartialMatchLen = *partialMatchLen; + for (int32_t index = begin; index <= end; ++index) { + int32_t len = currencyNames[index].currencyNameLen; + if (len > *maxMatchLen && len <= textLen && + uprv_memcmp(currencyNames[index].currencyName, text, len * sizeof(UChar)) == 0) { + *partialMatchLen = MAX(*partialMatchLen, len); + *maxMatchIndex = index; + *maxMatchLen = len; +#ifdef UCURR_DEBUG + printf("maxMatchIndex = %d, maxMatchLen = %d\n", + *maxMatchIndex, *maxMatchLen); +#endif + } else { + // Check for partial matches. + for (int32_t i=initialPartialMatchLen; i<MIN(len, textLen); i++) { + if (currencyNames[index].currencyName[i] != text[i]) { + break; + } + *partialMatchLen = MAX(*partialMatchLen, i + 1); + } + } + } +} + +#define LINEAR_SEARCH_THRESHOLD 10 + +// Find longest match between "text" and currency names in "currencyNames". +// @param total_currency_count: total number of currency names in CurrencyNames. +// @param textLen: the length of the text to be compared +// @param maxMatchLen: passing in the computed max matching length +// pass out the new max matching length +// @param maxMatchIndex: the index in currencyName which has the longest +// match with input text. +static void +searchCurrencyName(const CurrencyNameStruct* currencyNames, + int32_t total_currency_count, + const UChar* text, int32_t textLen, + int32_t *partialMatchLen, + int32_t* maxMatchLen, int32_t* maxMatchIndex) { + *maxMatchIndex = -1; + *maxMatchLen = 0; + int32_t matchIndex = -1; + int32_t binarySearchBegin = 0; + int32_t binarySearchEnd = total_currency_count - 1; + // It is a variant of binary search. + // For example, given the currency names in currencyNames array are: + // A AB ABC AD AZ B BB BBEX BBEXYZ BS C D E.... + // and the input text is BBEXST + // The first round binary search search "B" in the text against + // the first char in currency names, and find the first char matching range + // to be "B BB BBEX BBEXYZ BS" (and the maximum matching "B"). + // The 2nd round binary search search the second "B" in the text against + // the 2nd char in currency names, and narrow the matching range to + // "BB BBEX BBEXYZ" (and the maximum matching "BB"). + // The 3rd round returnes the range as "BBEX BBEXYZ" (without changing + // maximum matching). + // The 4th round returns the same range (the maximum matching is "BBEX"). + // The 5th round returns no matching range. + for (int32_t index = 0; index < textLen; ++index) { + // matchIndex saves the one with exact match till the current point. + // [binarySearchBegin, binarySearchEnd] saves the matching range. + matchIndex = binarySearch(currencyNames, index, + text[index], + &binarySearchBegin, &binarySearchEnd); + if (binarySearchBegin == -1) { // did not find the range + break; + } + *partialMatchLen = MAX(*partialMatchLen, index + 1); + if (matchIndex != -1) { + // find an exact match for text from text[0] to text[index] + // in currencyNames array. + *maxMatchLen = index + 1; + *maxMatchIndex = matchIndex; + } + if (binarySearchEnd - binarySearchBegin < LINEAR_SEARCH_THRESHOLD) { + // linear search if within threshold. + linearSearch(currencyNames, binarySearchBegin, binarySearchEnd, + text, textLen, + partialMatchLen, + maxMatchLen, maxMatchIndex); + break; + } + } + return; +} + +//========================= currency name cache ===================== +typedef struct { + char locale[ULOC_FULLNAME_CAPACITY]; //key + // currency names, case insensitive + CurrencyNameStruct* currencyNames; // value + int32_t totalCurrencyNameCount; // currency name count + // currency symbols and ISO code, case sensitive + CurrencyNameStruct* currencySymbols; // value + int32_t totalCurrencySymbolCount; // count + // reference count. + // reference count is set to 1 when an entry is put to cache. + // it increases by 1 before accessing, and decreased by 1 after accessing. + // The entry is deleted when ref count is zero, which means + // the entry is replaced out of cache and no process is accessing it. + int32_t refCount; +} CurrencyNameCacheEntry; + + +#define CURRENCY_NAME_CACHE_NUM 10 + +// Reserve 10 cache entries. +static CurrencyNameCacheEntry* currCache[CURRENCY_NAME_CACHE_NUM] = {NULL}; +// Using an index to indicate which entry to be replaced when cache is full. +// It is a simple round-robin replacement strategy. +static int8_t currentCacheEntryIndex = 0; + +static UMutex gCurrencyCacheMutex; + +// Cache deletion +static void +deleteCurrencyNames(CurrencyNameStruct* currencyNames, int32_t count) { + for (int32_t index = 0; index < count; ++index) { + if ( (currencyNames[index].flag & NEED_TO_BE_DELETED) ) { + uprv_free(currencyNames[index].currencyName); + } + } + uprv_free(currencyNames); +} + + +static void +deleteCacheEntry(CurrencyNameCacheEntry* entry) { + deleteCurrencyNames(entry->currencyNames, entry->totalCurrencyNameCount); + deleteCurrencyNames(entry->currencySymbols, entry->totalCurrencySymbolCount); + uprv_free(entry); +} + + +// Cache clean up +static UBool U_CALLCONV +currency_cache_cleanup(void) { + for (int32_t i = 0; i < CURRENCY_NAME_CACHE_NUM; ++i) { + if (currCache[i]) { + deleteCacheEntry(currCache[i]); + currCache[i] = 0; + } + } + return TRUE; +} + + +/** + * Loads the currency name data from the cache, or from resource bundles if necessary. + * The refCount is automatically incremented. It is the caller's responsibility + * to decrement it when done! + */ +static CurrencyNameCacheEntry* +getCacheEntry(const char* locale, UErrorCode& ec) { + + int32_t total_currency_name_count = 0; + CurrencyNameStruct* currencyNames = NULL; + int32_t total_currency_symbol_count = 0; + CurrencyNameStruct* currencySymbols = NULL; + CurrencyNameCacheEntry* cacheEntry = NULL; + + umtx_lock(&gCurrencyCacheMutex); + // in order to handle racing correctly, + // not putting 'search' in a separate function. + int8_t found = -1; + for (int8_t i = 0; i < CURRENCY_NAME_CACHE_NUM; ++i) { + if (currCache[i]!= NULL && + uprv_strcmp(locale, currCache[i]->locale) == 0) { + found = i; + break; + } + } + if (found != -1) { + cacheEntry = currCache[found]; + ++(cacheEntry->refCount); + } + umtx_unlock(&gCurrencyCacheMutex); + if (found == -1) { + collectCurrencyNames(locale, ¤cyNames, &total_currency_name_count, ¤cySymbols, &total_currency_symbol_count, ec); + if (U_FAILURE(ec)) { + return NULL; + } + umtx_lock(&gCurrencyCacheMutex); + // check again. + for (int8_t i = 0; i < CURRENCY_NAME_CACHE_NUM; ++i) { + if (currCache[i]!= NULL && + uprv_strcmp(locale, currCache[i]->locale) == 0) { + found = i; + break; + } + } + if (found == -1) { + // insert new entry to + // currentCacheEntryIndex % CURRENCY_NAME_CACHE_NUM + // and remove the existing entry + // currentCacheEntryIndex % CURRENCY_NAME_CACHE_NUM + // from cache. + cacheEntry = currCache[currentCacheEntryIndex]; + if (cacheEntry) { + --(cacheEntry->refCount); + // delete if the ref count is zero + if (cacheEntry->refCount == 0) { + deleteCacheEntry(cacheEntry); + } + } + cacheEntry = (CurrencyNameCacheEntry*)uprv_malloc(sizeof(CurrencyNameCacheEntry)); + currCache[currentCacheEntryIndex] = cacheEntry; + uprv_strcpy(cacheEntry->locale, locale); + cacheEntry->currencyNames = currencyNames; + cacheEntry->totalCurrencyNameCount = total_currency_name_count; + cacheEntry->currencySymbols = currencySymbols; + cacheEntry->totalCurrencySymbolCount = total_currency_symbol_count; + cacheEntry->refCount = 2; // one for cache, one for reference + currentCacheEntryIndex = (currentCacheEntryIndex + 1) % CURRENCY_NAME_CACHE_NUM; + ucln_common_registerCleanup(UCLN_COMMON_CURRENCY, currency_cleanup); + } else { + deleteCurrencyNames(currencyNames, total_currency_name_count); + deleteCurrencyNames(currencySymbols, total_currency_symbol_count); + cacheEntry = currCache[found]; + ++(cacheEntry->refCount); + } + umtx_unlock(&gCurrencyCacheMutex); + } + + return cacheEntry; +} + +static void releaseCacheEntry(CurrencyNameCacheEntry* cacheEntry) { + umtx_lock(&gCurrencyCacheMutex); + --(cacheEntry->refCount); + if (cacheEntry->refCount == 0) { // remove + deleteCacheEntry(cacheEntry); + } + umtx_unlock(&gCurrencyCacheMutex); +} + +U_CAPI void +uprv_parseCurrency(const char* locale, + const icu::UnicodeString& text, + icu::ParsePosition& pos, + int8_t type, + int32_t* partialMatchLen, + UChar* result, + UErrorCode& ec) { + U_NAMESPACE_USE + if (U_FAILURE(ec)) { + return; + } + CurrencyNameCacheEntry* cacheEntry = getCacheEntry(locale, ec); + if (U_FAILURE(ec)) { + return; + } + + int32_t total_currency_name_count = cacheEntry->totalCurrencyNameCount; + CurrencyNameStruct* currencyNames = cacheEntry->currencyNames; + int32_t total_currency_symbol_count = cacheEntry->totalCurrencySymbolCount; + CurrencyNameStruct* currencySymbols = cacheEntry->currencySymbols; + + int32_t start = pos.getIndex(); + + UChar inputText[MAX_CURRENCY_NAME_LEN]; + UChar upperText[MAX_CURRENCY_NAME_LEN]; + int32_t textLen = MIN(MAX_CURRENCY_NAME_LEN, text.length() - start); + text.extract(start, textLen, inputText); + UErrorCode ec1 = U_ZERO_ERROR; + textLen = u_strToUpper(upperText, MAX_CURRENCY_NAME_LEN, inputText, textLen, locale, &ec1); + + // Make sure partialMatchLen is initialized + *partialMatchLen = 0; + + int32_t max = 0; + int32_t matchIndex = -1; + // case in-sensitive comparision against currency names + searchCurrencyName(currencyNames, total_currency_name_count, + upperText, textLen, partialMatchLen, &max, &matchIndex); + +#ifdef UCURR_DEBUG + printf("search in names, max = %d, matchIndex = %d\n", max, matchIndex); +#endif + + int32_t maxInSymbol = 0; + int32_t matchIndexInSymbol = -1; + if (type != UCURR_LONG_NAME) { // not name only + // case sensitive comparison against currency symbols and ISO code. + searchCurrencyName(currencySymbols, total_currency_symbol_count, + inputText, textLen, + partialMatchLen, + &maxInSymbol, &matchIndexInSymbol); + } + +#ifdef UCURR_DEBUG + printf("search in symbols, maxInSymbol = %d, matchIndexInSymbol = %d\n", maxInSymbol, matchIndexInSymbol); + if(matchIndexInSymbol != -1) { + printf("== ISO=%s\n", currencySymbols[matchIndexInSymbol].IsoCode); + } +#endif + + if (max >= maxInSymbol && matchIndex != -1) { + u_charsToUChars(currencyNames[matchIndex].IsoCode, result, 4); + pos.setIndex(start + max); + } else if (maxInSymbol >= max && matchIndexInSymbol != -1) { + u_charsToUChars(currencySymbols[matchIndexInSymbol].IsoCode, result, 4); + pos.setIndex(start + maxInSymbol); + } + + // decrease reference count + releaseCacheEntry(cacheEntry); +} + +void uprv_currencyLeads(const char* locale, icu::UnicodeSet& result, UErrorCode& ec) { + U_NAMESPACE_USE + if (U_FAILURE(ec)) { + return; + } + CurrencyNameCacheEntry* cacheEntry = getCacheEntry(locale, ec); + if (U_FAILURE(ec)) { + return; + } + + for (int32_t i=0; i<cacheEntry->totalCurrencySymbolCount; i++) { + const CurrencyNameStruct& info = cacheEntry->currencySymbols[i]; + UChar32 cp; + U16_GET(info.currencyName, 0, 0, info.currencyNameLen, cp); + result.add(cp); + } + + for (int32_t i=0; i<cacheEntry->totalCurrencyNameCount; i++) { + const CurrencyNameStruct& info = cacheEntry->currencyNames[i]; + UChar32 cp; + U16_GET(info.currencyName, 0, 0, info.currencyNameLen, cp); + result.add(cp); + } + + // decrease reference count + releaseCacheEntry(cacheEntry); +} + + +/** + * Internal method. Given a currency ISO code and a locale, return + * the "static" currency name. This is usually the same as the + * UCURR_SYMBOL_NAME, but if the latter is a choice format, then the + * format is applied to the number 2.0 (to yield the more common + * plural) to return a static name. + * + * This is used for backward compatibility with old currency logic in + * DecimalFormat and DecimalFormatSymbols. + */ +U_CAPI void +uprv_getStaticCurrencyName(const UChar* iso, const char* loc, + icu::UnicodeString& result, UErrorCode& ec) +{ + U_NAMESPACE_USE + + int32_t len; + const UChar* currname = ucurr_getName(iso, loc, UCURR_SYMBOL_NAME, + nullptr /* isChoiceFormat */, &len, &ec); + if (U_SUCCESS(ec)) { + result.setTo(currname, len); + } +} + +U_CAPI int32_t U_EXPORT2 +ucurr_getDefaultFractionDigits(const UChar* currency, UErrorCode* ec) { + return ucurr_getDefaultFractionDigitsForUsage(currency,UCURR_USAGE_STANDARD,ec); +} + +U_CAPI int32_t U_EXPORT2 +ucurr_getDefaultFractionDigitsForUsage(const UChar* currency, const UCurrencyUsage usage, UErrorCode* ec) { + int32_t fracDigits = 0; + if (U_SUCCESS(*ec)) { + switch (usage) { + case UCURR_USAGE_STANDARD: + fracDigits = (_findMetaData(currency, *ec))[0]; + break; + case UCURR_USAGE_CASH: + fracDigits = (_findMetaData(currency, *ec))[2]; + break; + default: + *ec = U_UNSUPPORTED_ERROR; + } + } + return fracDigits; +} + +U_CAPI double U_EXPORT2 +ucurr_getRoundingIncrement(const UChar* currency, UErrorCode* ec) { + return ucurr_getRoundingIncrementForUsage(currency, UCURR_USAGE_STANDARD, ec); +} + +U_CAPI double U_EXPORT2 +ucurr_getRoundingIncrementForUsage(const UChar* currency, const UCurrencyUsage usage, UErrorCode* ec) { + double result = 0.0; + + const int32_t *data = _findMetaData(currency, *ec); + if (U_SUCCESS(*ec)) { + int32_t fracDigits; + int32_t increment; + switch (usage) { + case UCURR_USAGE_STANDARD: + fracDigits = data[0]; + increment = data[1]; + break; + case UCURR_USAGE_CASH: + fracDigits = data[2]; + increment = data[3]; + break; + default: + *ec = U_UNSUPPORTED_ERROR; + return result; + } + + // If the meta data is invalid, return 0.0 + if (fracDigits < 0 || fracDigits > MAX_POW10) { + *ec = U_INVALID_FORMAT_ERROR; + } else { + // A rounding value of 0 or 1 indicates no rounding. + if (increment >= 2) { + // Return (increment) / 10^(fracDigits). The only actual rounding data, + // as of this writing, is CHF { 2, 5 }. + result = double(increment) / POW10[fracDigits]; + } + } + } + + return result; +} + +U_CDECL_BEGIN + +typedef struct UCurrencyContext { + uint32_t currType; /* UCurrCurrencyType */ + uint32_t listIdx; +} UCurrencyContext; + +/* +Please keep this list in alphabetical order. +You can look at the CLDR supplemental data or ISO-4217 for the meaning of some +of these items. +ISO-4217: http://www.iso.org/iso/en/prods-services/popstds/currencycodeslist.html +*/ +static const struct CurrencyList { + const char *currency; + uint32_t currType; +} gCurrencyList[] = { + {"ADP", UCURR_COMMON|UCURR_DEPRECATED}, + {"AED", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"AFA", UCURR_COMMON|UCURR_DEPRECATED}, + {"AFN", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"ALK", UCURR_COMMON|UCURR_DEPRECATED}, + {"ALL", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"AMD", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"ANG", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"AOA", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"AOK", UCURR_COMMON|UCURR_DEPRECATED}, + {"AON", UCURR_COMMON|UCURR_DEPRECATED}, + {"AOR", UCURR_COMMON|UCURR_DEPRECATED}, + {"ARA", UCURR_COMMON|UCURR_DEPRECATED}, + {"ARL", UCURR_COMMON|UCURR_DEPRECATED}, + {"ARM", UCURR_COMMON|UCURR_DEPRECATED}, + {"ARP", UCURR_COMMON|UCURR_DEPRECATED}, + {"ARS", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"ATS", UCURR_COMMON|UCURR_DEPRECATED}, + {"AUD", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"AWG", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"AZM", UCURR_COMMON|UCURR_DEPRECATED}, + {"AZN", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"BAD", UCURR_COMMON|UCURR_DEPRECATED}, + {"BAM", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"BAN", UCURR_COMMON|UCURR_DEPRECATED}, + {"BBD", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"BDT", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"BEC", UCURR_UNCOMMON|UCURR_DEPRECATED}, + {"BEF", UCURR_COMMON|UCURR_DEPRECATED}, + {"BEL", UCURR_UNCOMMON|UCURR_DEPRECATED}, + {"BGL", UCURR_COMMON|UCURR_DEPRECATED}, + {"BGM", UCURR_COMMON|UCURR_DEPRECATED}, + {"BGN", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"BGO", UCURR_COMMON|UCURR_DEPRECATED}, + {"BHD", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"BIF", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"BMD", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"BND", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"BOB", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"BOL", UCURR_COMMON|UCURR_DEPRECATED}, + {"BOP", UCURR_COMMON|UCURR_DEPRECATED}, + {"BOV", UCURR_UNCOMMON|UCURR_NON_DEPRECATED}, + {"BRB", UCURR_COMMON|UCURR_DEPRECATED}, + {"BRC", UCURR_COMMON|UCURR_DEPRECATED}, + {"BRE", UCURR_COMMON|UCURR_DEPRECATED}, + {"BRL", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"BRN", UCURR_COMMON|UCURR_DEPRECATED}, + {"BRR", UCURR_COMMON|UCURR_DEPRECATED}, + {"BRZ", UCURR_COMMON|UCURR_DEPRECATED}, + {"BSD", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"BTN", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"BUK", UCURR_COMMON|UCURR_DEPRECATED}, + {"BWP", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"BYB", UCURR_COMMON|UCURR_DEPRECATED}, + {"BYN", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"BYR", UCURR_COMMON|UCURR_DEPRECATED}, + {"BZD", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"CAD", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"CDF", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"CHE", UCURR_UNCOMMON|UCURR_NON_DEPRECATED}, + {"CHF", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"CHW", UCURR_UNCOMMON|UCURR_NON_DEPRECATED}, + {"CLE", UCURR_COMMON|UCURR_DEPRECATED}, + {"CLF", UCURR_UNCOMMON|UCURR_NON_DEPRECATED}, + {"CLP", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"CNH", UCURR_UNCOMMON|UCURR_NON_DEPRECATED}, + {"CNX", UCURR_UNCOMMON|UCURR_DEPRECATED}, + {"CNY", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"COP", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"COU", UCURR_UNCOMMON|UCURR_NON_DEPRECATED}, + {"CRC", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"CSD", UCURR_COMMON|UCURR_DEPRECATED}, + {"CSK", UCURR_COMMON|UCURR_DEPRECATED}, + {"CUC", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"CUP", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"CVE", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"CYP", UCURR_COMMON|UCURR_DEPRECATED}, + {"CZK", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"DDM", UCURR_COMMON|UCURR_DEPRECATED}, + {"DEM", UCURR_COMMON|UCURR_DEPRECATED}, + {"DJF", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"DKK", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"DOP", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"DZD", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"ECS", UCURR_COMMON|UCURR_DEPRECATED}, + {"ECV", UCURR_UNCOMMON|UCURR_DEPRECATED}, + {"EEK", UCURR_COMMON|UCURR_DEPRECATED}, + {"EGP", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"EQE", UCURR_COMMON|UCURR_DEPRECATED}, // questionable, remove? + {"ERN", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"ESA", UCURR_UNCOMMON|UCURR_DEPRECATED}, + {"ESB", UCURR_UNCOMMON|UCURR_DEPRECATED}, + {"ESP", UCURR_COMMON|UCURR_DEPRECATED}, + {"ETB", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"EUR", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"FIM", UCURR_COMMON|UCURR_DEPRECATED}, + {"FJD", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"FKP", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"FRF", UCURR_COMMON|UCURR_DEPRECATED}, + {"GBP", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"GEK", UCURR_COMMON|UCURR_DEPRECATED}, + {"GEL", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"GHC", UCURR_COMMON|UCURR_DEPRECATED}, + {"GHS", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"GIP", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"GMD", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"GNF", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"GNS", UCURR_COMMON|UCURR_DEPRECATED}, + {"GQE", UCURR_COMMON|UCURR_DEPRECATED}, + {"GRD", UCURR_COMMON|UCURR_DEPRECATED}, + {"GTQ", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"GWE", UCURR_COMMON|UCURR_DEPRECATED}, + {"GWP", UCURR_COMMON|UCURR_DEPRECATED}, + {"GYD", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"HKD", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"HNL", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"HRD", UCURR_COMMON|UCURR_DEPRECATED}, + {"HRK", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"HTG", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"HUF", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"IDR", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"IEP", UCURR_COMMON|UCURR_DEPRECATED}, + {"ILP", UCURR_COMMON|UCURR_DEPRECATED}, + {"ILR", UCURR_COMMON|UCURR_DEPRECATED}, + {"ILS", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"INR", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"IQD", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"IRR", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"ISJ", UCURR_COMMON|UCURR_DEPRECATED}, + {"ISK", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"ITL", UCURR_COMMON|UCURR_DEPRECATED}, + {"JMD", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"JOD", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"JPY", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"KES", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"KGS", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"KHR", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"KMF", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"KPW", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"KRH", UCURR_COMMON|UCURR_DEPRECATED}, + {"KRO", UCURR_COMMON|UCURR_DEPRECATED}, + {"KRW", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"KWD", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"KYD", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"KZT", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"LAK", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"LBP", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"LKR", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"LRD", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"LSL", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"LSM", UCURR_COMMON|UCURR_DEPRECATED}, // questionable, remove? + {"LTL", UCURR_COMMON|UCURR_DEPRECATED}, + {"LTT", UCURR_COMMON|UCURR_DEPRECATED}, + {"LUC", UCURR_UNCOMMON|UCURR_DEPRECATED}, + {"LUF", UCURR_COMMON|UCURR_DEPRECATED}, + {"LUL", UCURR_UNCOMMON|UCURR_DEPRECATED}, + {"LVL", UCURR_COMMON|UCURR_DEPRECATED}, + {"LVR", UCURR_COMMON|UCURR_DEPRECATED}, + {"LYD", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"MAD", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"MAF", UCURR_COMMON|UCURR_DEPRECATED}, + {"MCF", UCURR_COMMON|UCURR_DEPRECATED}, + {"MDC", UCURR_COMMON|UCURR_DEPRECATED}, + {"MDL", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"MGA", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"MGF", UCURR_COMMON|UCURR_DEPRECATED}, + {"MKD", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"MKN", UCURR_COMMON|UCURR_DEPRECATED}, + {"MLF", UCURR_COMMON|UCURR_DEPRECATED}, + {"MMK", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"MNT", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"MOP", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"MRO", UCURR_COMMON|UCURR_DEPRECATED}, + {"MRU", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"MTL", UCURR_COMMON|UCURR_DEPRECATED}, + {"MTP", UCURR_COMMON|UCURR_DEPRECATED}, + {"MUR", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"MVP", UCURR_COMMON|UCURR_DEPRECATED}, // questionable, remove? + {"MVR", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"MWK", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"MXN", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"MXP", UCURR_COMMON|UCURR_DEPRECATED}, + {"MXV", UCURR_UNCOMMON|UCURR_NON_DEPRECATED}, + {"MYR", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"MZE", UCURR_COMMON|UCURR_DEPRECATED}, + {"MZM", UCURR_COMMON|UCURR_DEPRECATED}, + {"MZN", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"NAD", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"NGN", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"NIC", UCURR_COMMON|UCURR_DEPRECATED}, + {"NIO", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"NLG", UCURR_COMMON|UCURR_DEPRECATED}, + {"NOK", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"NPR", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"NZD", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"OMR", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"PAB", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"PEI", UCURR_COMMON|UCURR_DEPRECATED}, + {"PEN", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"PES", UCURR_COMMON|UCURR_DEPRECATED}, + {"PGK", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"PHP", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"PKR", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"PLN", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"PLZ", UCURR_COMMON|UCURR_DEPRECATED}, + {"PTE", UCURR_COMMON|UCURR_DEPRECATED}, + {"PYG", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"QAR", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"RHD", UCURR_COMMON|UCURR_DEPRECATED}, + {"ROL", UCURR_COMMON|UCURR_DEPRECATED}, + {"RON", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"RSD", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"RUB", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"RUR", UCURR_COMMON|UCURR_DEPRECATED}, + {"RWF", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"SAR", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"SBD", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"SCR", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"SDD", UCURR_COMMON|UCURR_DEPRECATED}, + {"SDG", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"SDP", UCURR_COMMON|UCURR_DEPRECATED}, + {"SEK", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"SGD", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"SHP", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"SIT", UCURR_COMMON|UCURR_DEPRECATED}, + {"SKK", UCURR_COMMON|UCURR_DEPRECATED}, + {"SLL", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"SOS", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"SRD", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"SRG", UCURR_COMMON|UCURR_DEPRECATED}, + {"SSP", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"STD", UCURR_COMMON|UCURR_DEPRECATED}, + {"STN", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"SUR", UCURR_COMMON|UCURR_DEPRECATED}, + {"SVC", UCURR_COMMON|UCURR_DEPRECATED}, + {"SYP", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"SZL", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"THB", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"TJR", UCURR_COMMON|UCURR_DEPRECATED}, + {"TJS", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"TMM", UCURR_COMMON|UCURR_DEPRECATED}, + {"TMT", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"TND", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"TOP", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"TPE", UCURR_COMMON|UCURR_DEPRECATED}, + {"TRL", UCURR_COMMON|UCURR_DEPRECATED}, + {"TRY", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"TTD", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"TWD", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"TZS", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"UAH", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"UAK", UCURR_COMMON|UCURR_DEPRECATED}, + {"UGS", UCURR_COMMON|UCURR_DEPRECATED}, + {"UGX", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"USD", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"USN", UCURR_UNCOMMON|UCURR_NON_DEPRECATED}, + {"USS", UCURR_UNCOMMON|UCURR_NON_DEPRECATED}, + {"UYI", UCURR_UNCOMMON|UCURR_NON_DEPRECATED}, + {"UYP", UCURR_COMMON|UCURR_DEPRECATED}, + {"UYU", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"UZS", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"VEB", UCURR_COMMON|UCURR_DEPRECATED}, + {"VEF", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"VND", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"VNN", UCURR_COMMON|UCURR_DEPRECATED}, + {"VUV", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"WST", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"XAF", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"XAG", UCURR_UNCOMMON|UCURR_NON_DEPRECATED}, + {"XAU", UCURR_UNCOMMON|UCURR_NON_DEPRECATED}, + {"XBA", UCURR_UNCOMMON|UCURR_NON_DEPRECATED}, + {"XBB", UCURR_UNCOMMON|UCURR_NON_DEPRECATED}, + {"XBC", UCURR_UNCOMMON|UCURR_NON_DEPRECATED}, + {"XBD", UCURR_UNCOMMON|UCURR_NON_DEPRECATED}, + {"XCD", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"XDR", UCURR_UNCOMMON|UCURR_NON_DEPRECATED}, + {"XEU", UCURR_UNCOMMON|UCURR_DEPRECATED}, + {"XFO", UCURR_UNCOMMON|UCURR_NON_DEPRECATED}, + {"XFU", UCURR_UNCOMMON|UCURR_NON_DEPRECATED}, + {"XOF", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"XPD", UCURR_UNCOMMON|UCURR_NON_DEPRECATED}, + {"XPF", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"XPT", UCURR_UNCOMMON|UCURR_NON_DEPRECATED}, + {"XRE", UCURR_UNCOMMON|UCURR_DEPRECATED}, + {"XSU", UCURR_UNCOMMON|UCURR_NON_DEPRECATED}, + {"XTS", UCURR_UNCOMMON|UCURR_NON_DEPRECATED}, + {"XUA", UCURR_UNCOMMON|UCURR_NON_DEPRECATED}, + {"XXX", UCURR_UNCOMMON|UCURR_NON_DEPRECATED}, + {"YDD", UCURR_COMMON|UCURR_DEPRECATED}, + {"YER", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"YUD", UCURR_COMMON|UCURR_DEPRECATED}, + {"YUM", UCURR_COMMON|UCURR_DEPRECATED}, + {"YUN", UCURR_COMMON|UCURR_DEPRECATED}, + {"YUR", UCURR_COMMON|UCURR_DEPRECATED}, + {"ZAL", UCURR_UNCOMMON|UCURR_DEPRECATED}, + {"ZAR", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"ZMK", UCURR_COMMON|UCURR_DEPRECATED}, + {"ZMW", UCURR_COMMON|UCURR_NON_DEPRECATED}, + {"ZRN", UCURR_COMMON|UCURR_DEPRECATED}, + {"ZRZ", UCURR_COMMON|UCURR_DEPRECATED}, + {"ZWD", UCURR_COMMON|UCURR_DEPRECATED}, + {"ZWL", UCURR_COMMON|UCURR_DEPRECATED}, + {"ZWR", UCURR_COMMON|UCURR_DEPRECATED}, + { NULL, 0 } // Leave here to denote the end of the list. +}; + +#define UCURR_MATCHES_BITMASK(variable, typeToMatch) \ + ((typeToMatch) == UCURR_ALL || ((variable) & (typeToMatch)) == (typeToMatch)) + +static int32_t U_CALLCONV +ucurr_countCurrencyList(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) { + UCurrencyContext *myContext = (UCurrencyContext *)(enumerator->context); + uint32_t currType = myContext->currType; + int32_t count = 0; + + /* Count the number of items matching the type we are looking for. */ + for (int32_t idx = 0; gCurrencyList[idx].currency != NULL; idx++) { + if (UCURR_MATCHES_BITMASK(gCurrencyList[idx].currType, currType)) { + count++; + } + } + return count; +} + +static const char* U_CALLCONV +ucurr_nextCurrencyList(UEnumeration *enumerator, + int32_t* resultLength, + UErrorCode * /*pErrorCode*/) +{ + UCurrencyContext *myContext = (UCurrencyContext *)(enumerator->context); + + /* Find the next in the list that matches the type we are looking for. */ + while (myContext->listIdx < UPRV_LENGTHOF(gCurrencyList)-1) { + const struct CurrencyList *currItem = &gCurrencyList[myContext->listIdx++]; + if (UCURR_MATCHES_BITMASK(currItem->currType, myContext->currType)) + { + if (resultLength) { + *resultLength = 3; /* Currency codes are only 3 chars long */ + } + return currItem->currency; + } + } + /* We enumerated too far. */ + if (resultLength) { + *resultLength = 0; + } + return NULL; +} + +static void U_CALLCONV +ucurr_resetCurrencyList(UEnumeration *enumerator, UErrorCode * /*pErrorCode*/) { + ((UCurrencyContext *)(enumerator->context))->listIdx = 0; +} + +static void U_CALLCONV +ucurr_closeCurrencyList(UEnumeration *enumerator) { + uprv_free(enumerator->context); + uprv_free(enumerator); +} + +static void U_CALLCONV +ucurr_createCurrencyList(UHashtable *isoCodes, UErrorCode* status){ + UErrorCode localStatus = U_ZERO_ERROR; + + // Look up the CurrencyMap element in the root bundle. + UResourceBundle *rb = ures_openDirect(U_ICUDATA_CURR, CURRENCY_DATA, &localStatus); + UResourceBundle *currencyMapArray = ures_getByKey(rb, CURRENCY_MAP, rb, &localStatus); + + if (U_SUCCESS(localStatus)) { + // process each entry in currency map + for (int32_t i=0; i<ures_getSize(currencyMapArray); i++) { + // get the currency resource + UResourceBundle *currencyArray = ures_getByIndex(currencyMapArray, i, NULL, &localStatus); + // process each currency + if (U_SUCCESS(localStatus)) { + for (int32_t j=0; j<ures_getSize(currencyArray); j++) { + // get the currency resource + UResourceBundle *currencyRes = ures_getByIndex(currencyArray, j, NULL, &localStatus); + IsoCodeEntry *entry = (IsoCodeEntry*)uprv_malloc(sizeof(IsoCodeEntry)); + if (entry == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + return; + } + + // get the ISO code + int32_t isoLength = 0; + UResourceBundle *idRes = ures_getByKey(currencyRes, "id", NULL, &localStatus); + if (idRes == NULL) { + continue; + } + const UChar *isoCode = ures_getString(idRes, &isoLength, &localStatus); + + // get from date + UDate fromDate = U_DATE_MIN; + UResourceBundle *fromRes = ures_getByKey(currencyRes, "from", NULL, &localStatus); + + if (U_SUCCESS(localStatus)) { + int32_t fromLength = 0; + const int32_t *fromArray = ures_getIntVector(fromRes, &fromLength, &localStatus); + int64_t currDate64 = (int64_t)fromArray[0] << 32; + currDate64 |= ((int64_t)fromArray[1] & (int64_t)INT64_C(0x00000000FFFFFFFF)); + fromDate = (UDate)currDate64; + } + ures_close(fromRes); + + // get to date + UDate toDate = U_DATE_MAX; + localStatus = U_ZERO_ERROR; + UResourceBundle *toRes = ures_getByKey(currencyRes, "to", NULL, &localStatus); + + if (U_SUCCESS(localStatus)) { + int32_t toLength = 0; + const int32_t *toArray = ures_getIntVector(toRes, &toLength, &localStatus); + int64_t currDate64 = (int64_t)toArray[0] << 32; + currDate64 |= ((int64_t)toArray[1] & (int64_t)INT64_C(0x00000000FFFFFFFF)); + toDate = (UDate)currDate64; + } + ures_close(toRes); + + ures_close(idRes); + ures_close(currencyRes); + + entry->isoCode = isoCode; + entry->from = fromDate; + entry->to = toDate; + + localStatus = U_ZERO_ERROR; + uhash_put(isoCodes, (UChar *)isoCode, entry, &localStatus); + } + } else { + *status = localStatus; + } + ures_close(currencyArray); + } + } else { + *status = localStatus; + } + + ures_close(currencyMapArray); +} + +static const UEnumeration gEnumCurrencyList = { + NULL, + NULL, + ucurr_closeCurrencyList, + ucurr_countCurrencyList, + uenum_unextDefault, + ucurr_nextCurrencyList, + ucurr_resetCurrencyList +}; +U_CDECL_END + + +static void U_CALLCONV initIsoCodes(UErrorCode &status) { + U_ASSERT(gIsoCodes == NULL); + ucln_common_registerCleanup(UCLN_COMMON_CURRENCY, currency_cleanup); + + UHashtable *isoCodes = uhash_open(uhash_hashUChars, uhash_compareUChars, NULL, &status); + if (U_FAILURE(status)) { + return; + } + uhash_setValueDeleter(isoCodes, deleteIsoCodeEntry); + + ucurr_createCurrencyList(isoCodes, &status); + if (U_FAILURE(status)) { + uhash_close(isoCodes); + return; + } + gIsoCodes = isoCodes; // Note: gIsoCodes is const. Once set up here it is never altered, + // and read only access is safe without synchronization. +} + +static void populateCurrSymbolsEquiv(icu::Hashtable *hash, UErrorCode &status) { + if (U_FAILURE(status)) { return; } + for (auto& entry : unisets::kCurrencyEntries) { + UnicodeString exemplar(entry.exemplar); + const UnicodeSet* set = unisets::get(entry.key); + if (set == nullptr) { return; } + UnicodeSetIterator it(*set); + while (it.next()) { + UnicodeString value = it.getString(); + if (value == exemplar) { + // No need to mark the exemplar character as an equivalent + continue; + } + makeEquivalent(exemplar, value, hash, status); + if (U_FAILURE(status)) { return; } + } + } +} + +static void U_CALLCONV initCurrSymbolsEquiv() { + U_ASSERT(gCurrSymbolsEquiv == NULL); + UErrorCode status = U_ZERO_ERROR; + ucln_common_registerCleanup(UCLN_COMMON_CURRENCY, currency_cleanup); + icu::Hashtable *temp = new icu::Hashtable(status); + if (temp == NULL) { + return; + } + if (U_FAILURE(status)) { + delete temp; + return; + } + temp->setValueDeleter(deleteUnicode); + populateCurrSymbolsEquiv(temp, status); + if (U_FAILURE(status)) { + delete temp; + return; + } + gCurrSymbolsEquiv = temp; +} + +U_CAPI UBool U_EXPORT2 +ucurr_isAvailable(const UChar* isoCode, UDate from, UDate to, UErrorCode* eErrorCode) { + umtx_initOnce(gIsoCodesInitOnce, &initIsoCodes, *eErrorCode); + if (U_FAILURE(*eErrorCode)) { + return FALSE; + } + + IsoCodeEntry* result = (IsoCodeEntry *) uhash_get(gIsoCodes, isoCode); + if (result == NULL) { + return FALSE; + } else if (from > to) { + *eErrorCode = U_ILLEGAL_ARGUMENT_ERROR; + return FALSE; + } else if ((from > result->to) || (to < result->from)) { + return FALSE; + } + return TRUE; +} + +static const icu::Hashtable* getCurrSymbolsEquiv() { + umtx_initOnce(gCurrSymbolsEquivInitOnce, &initCurrSymbolsEquiv); + return gCurrSymbolsEquiv; +} + +U_CAPI UEnumeration * U_EXPORT2 +ucurr_openISOCurrencies(uint32_t currType, UErrorCode *pErrorCode) { + UEnumeration *myEnum = NULL; + UCurrencyContext *myContext; + + myEnum = (UEnumeration*)uprv_malloc(sizeof(UEnumeration)); + if (myEnum == NULL) { + *pErrorCode = U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + uprv_memcpy(myEnum, &gEnumCurrencyList, sizeof(UEnumeration)); + myContext = (UCurrencyContext*)uprv_malloc(sizeof(UCurrencyContext)); + if (myContext == NULL) { + *pErrorCode = U_MEMORY_ALLOCATION_ERROR; + uprv_free(myEnum); + return NULL; + } + myContext->currType = currType; + myContext->listIdx = 0; + myEnum->context = myContext; + return myEnum; +} + +U_CAPI int32_t U_EXPORT2 +ucurr_countCurrencies(const char* locale, + UDate date, + UErrorCode* ec) +{ + int32_t currCount = 0; + + if (ec != NULL && U_SUCCESS(*ec)) + { + // local variables + UErrorCode localStatus = U_ZERO_ERROR; + char id[ULOC_FULLNAME_CAPACITY]; + + // get country or country_variant in `id' + idForLocale(locale, id, sizeof(id), ec); + + if (U_FAILURE(*ec)) + { + return 0; + } + + // Remove variants, which is only needed for registration. + char *idDelim = strchr(id, VAR_DELIM); + if (idDelim) + { + idDelim[0] = 0; + } + + // Look up the CurrencyMap element in the root bundle. + UResourceBundle *rb = ures_openDirect(U_ICUDATA_CURR, CURRENCY_DATA, &localStatus); + UResourceBundle *cm = ures_getByKey(rb, CURRENCY_MAP, rb, &localStatus); + + // Using the id derived from the local, get the currency data + UResourceBundle *countryArray = ures_getByKey(rb, id, cm, &localStatus); + + // process each currency to see which one is valid for the given date + if (U_SUCCESS(localStatus)) + { + for (int32_t i=0; i<ures_getSize(countryArray); i++) + { + // get the currency resource + UResourceBundle *currencyRes = ures_getByIndex(countryArray, i, NULL, &localStatus); + + // get the from date + int32_t fromLength = 0; + UResourceBundle *fromRes = ures_getByKey(currencyRes, "from", NULL, &localStatus); + const int32_t *fromArray = ures_getIntVector(fromRes, &fromLength, &localStatus); + + int64_t currDate64 = (int64_t)fromArray[0] << 32; + currDate64 |= ((int64_t)fromArray[1] & (int64_t)INT64_C(0x00000000FFFFFFFF)); + UDate fromDate = (UDate)currDate64; + + if (ures_getSize(currencyRes)> 2) + { + int32_t toLength = 0; + UResourceBundle *toRes = ures_getByKey(currencyRes, "to", NULL, &localStatus); + const int32_t *toArray = ures_getIntVector(toRes, &toLength, &localStatus); + + currDate64 = (int64_t)toArray[0] << 32; + currDate64 |= ((int64_t)toArray[1] & (int64_t)INT64_C(0x00000000FFFFFFFF)); + UDate toDate = (UDate)currDate64; + + if ((fromDate <= date) && (date < toDate)) + { + currCount++; + } + + ures_close(toRes); + } + else + { + if (fromDate <= date) + { + currCount++; + } + } + + // close open resources + ures_close(currencyRes); + ures_close(fromRes); + + } // end For loop + } // end if (U_SUCCESS(localStatus)) + + ures_close(countryArray); + + // Check for errors + if (*ec == U_ZERO_ERROR || localStatus != U_ZERO_ERROR) + { + // There is nothing to fallback to. + // Report the failure/warning if possible. + *ec = localStatus; + } + + if (U_SUCCESS(*ec)) + { + // no errors + return currCount; + } + + } + + // If we got here, either error code is invalid or + // some argument passed is no good. + return 0; +} + +U_CAPI int32_t U_EXPORT2 +ucurr_forLocaleAndDate(const char* locale, + UDate date, + int32_t index, + UChar* buff, + int32_t buffCapacity, + UErrorCode* ec) +{ + int32_t resLen = 0; + int32_t currIndex = 0; + const UChar* s = NULL; + + if (ec != NULL && U_SUCCESS(*ec)) + { + // check the arguments passed + if ((buff && buffCapacity) || !buffCapacity ) + { + // local variables + UErrorCode localStatus = U_ZERO_ERROR; + char id[ULOC_FULLNAME_CAPACITY]; + + // get country or country_variant in `id' + idForLocale(locale, id, sizeof(id), ec); + if (U_FAILURE(*ec)) + { + return 0; + } + + // Remove variants, which is only needed for registration. + char *idDelim = strchr(id, VAR_DELIM); + if (idDelim) + { + idDelim[0] = 0; + } + + // Look up the CurrencyMap element in the root bundle. + UResourceBundle *rb = ures_openDirect(U_ICUDATA_CURR, CURRENCY_DATA, &localStatus); + UResourceBundle *cm = ures_getByKey(rb, CURRENCY_MAP, rb, &localStatus); + + // Using the id derived from the local, get the currency data + UResourceBundle *countryArray = ures_getByKey(rb, id, cm, &localStatus); + + // process each currency to see which one is valid for the given date + bool matchFound = false; + if (U_SUCCESS(localStatus)) + { + if ((index <= 0) || (index> ures_getSize(countryArray))) + { + // requested index is out of bounds + ures_close(countryArray); + return 0; + } + + for (int32_t i=0; i<ures_getSize(countryArray); i++) + { + // get the currency resource + UResourceBundle *currencyRes = ures_getByIndex(countryArray, i, NULL, &localStatus); + s = ures_getStringByKey(currencyRes, "id", &resLen, &localStatus); + + // get the from date + int32_t fromLength = 0; + UResourceBundle *fromRes = ures_getByKey(currencyRes, "from", NULL, &localStatus); + const int32_t *fromArray = ures_getIntVector(fromRes, &fromLength, &localStatus); + + int64_t currDate64 = (int64_t)fromArray[0] << 32; + currDate64 |= ((int64_t)fromArray[1] & (int64_t)INT64_C(0x00000000FFFFFFFF)); + UDate fromDate = (UDate)currDate64; + + if (ures_getSize(currencyRes)> 2) + { + int32_t toLength = 0; + UResourceBundle *toRes = ures_getByKey(currencyRes, "to", NULL, &localStatus); + const int32_t *toArray = ures_getIntVector(toRes, &toLength, &localStatus); + + currDate64 = (int64_t)toArray[0] << 32; + currDate64 |= ((int64_t)toArray[1] & (int64_t)INT64_C(0x00000000FFFFFFFF)); + UDate toDate = (UDate)currDate64; + + if ((fromDate <= date) && (date < toDate)) + { + currIndex++; + if (currIndex == index) + { + matchFound = true; + } + } + + ures_close(toRes); + } + else + { + if (fromDate <= date) + { + currIndex++; + if (currIndex == index) + { + matchFound = true; + } + } + } + + // close open resources + ures_close(currencyRes); + ures_close(fromRes); + + // check for loop exit + if (matchFound) + { + break; + } + + } // end For loop + } + + ures_close(countryArray); + + // Check for errors + if (*ec == U_ZERO_ERROR || localStatus != U_ZERO_ERROR) + { + // There is nothing to fallback to. + // Report the failure/warning if possible. + *ec = localStatus; + } + + if (U_SUCCESS(*ec)) + { + // no errors + if((buffCapacity> resLen) && matchFound) + { + // write out the currency value + u_strcpy(buff, s); + } + else + { + return 0; + } + } + + // return null terminated currency string + return u_terminateUChars(buff, buffCapacity, resLen, ec); + } + else + { + // illegal argument encountered + *ec = U_ILLEGAL_ARGUMENT_ERROR; + } + + } + + // If we got here, either error code is invalid or + // some argument passed is no good. + return resLen; +} + +static const UEnumeration defaultKeywordValues = { + NULL, + NULL, + ulist_close_keyword_values_iterator, + ulist_count_keyword_values, + uenum_unextDefault, + ulist_next_keyword_value, + ulist_reset_keyword_values_iterator +}; + +U_CAPI UEnumeration *U_EXPORT2 ucurr_getKeywordValuesForLocale(const char *key, const char *locale, UBool commonlyUsed, UErrorCode* status) { + // Resolve region + char prefRegion[ULOC_COUNTRY_CAPACITY]; + ulocimp_getRegionForSupplementalData(locale, TRUE, prefRegion, sizeof(prefRegion), status); + + // Read value from supplementalData + UList *values = ulist_createEmptyList(status); + UList *otherValues = ulist_createEmptyList(status); + UEnumeration *en = (UEnumeration *)uprv_malloc(sizeof(UEnumeration)); + if (U_FAILURE(*status) || en == NULL) { + if (en == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + } else { + uprv_free(en); + } + ulist_deleteList(values); + ulist_deleteList(otherValues); + return NULL; + } + memcpy(en, &defaultKeywordValues, sizeof(UEnumeration)); + en->context = values; + + UResourceBundle *bundle = ures_openDirect(U_ICUDATA_CURR, "supplementalData", status); + ures_getByKey(bundle, "CurrencyMap", bundle, status); + UResourceBundle bundlekey, regbndl, curbndl, to; + ures_initStackObject(&bundlekey); + ures_initStackObject(®bndl); + ures_initStackObject(&curbndl); + ures_initStackObject(&to); + + while (U_SUCCESS(*status) && ures_hasNext(bundle)) { + ures_getNextResource(bundle, &bundlekey, status); + if (U_FAILURE(*status)) { + break; + } + const char *region = ures_getKey(&bundlekey); + UBool isPrefRegion = uprv_strcmp(region, prefRegion) == 0 ? TRUE : FALSE; + if (!isPrefRegion && commonlyUsed) { + // With commonlyUsed=true, we do not put + // currencies for other regions in the + // result list. + continue; + } + ures_getByKey(bundle, region, ®bndl, status); + if (U_FAILURE(*status)) { + break; + } + while (U_SUCCESS(*status) && ures_hasNext(®bndl)) { + ures_getNextResource(®bndl, &curbndl, status); + if (ures_getType(&curbndl) != URES_TABLE) { + // Currently, an empty ARRAY is mixed in. + continue; + } + char *curID = (char *)uprv_malloc(sizeof(char) * ULOC_KEYWORDS_CAPACITY); + int32_t curIDLength = ULOC_KEYWORDS_CAPACITY; + if (curID == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + break; + } + +#if U_CHARSET_FAMILY==U_ASCII_FAMILY + ures_getUTF8StringByKey(&curbndl, "id", curID, &curIDLength, TRUE, status); + /* optimize - use the utf-8 string */ +#else + { + const UChar* defString = ures_getStringByKey(&curbndl, "id", &curIDLength, status); + if(U_SUCCESS(*status)) { + if(curIDLength+1 > ULOC_KEYWORDS_CAPACITY) { + *status = U_BUFFER_OVERFLOW_ERROR; + } else { + u_UCharsToChars(defString, curID, curIDLength+1); + } + } + } +#endif + + if (U_FAILURE(*status)) { + break; + } + UBool hasTo = FALSE; + ures_getByKey(&curbndl, "to", &to, status); + if (U_FAILURE(*status)) { + // Do nothing here... + *status = U_ZERO_ERROR; + } else { + hasTo = TRUE; + } + if (isPrefRegion && !hasTo && !ulist_containsString(values, curID, (int32_t)uprv_strlen(curID))) { + // Currently active currency for the target country + ulist_addItemEndList(values, curID, TRUE, status); + } else if (!ulist_containsString(otherValues, curID, (int32_t)uprv_strlen(curID)) && !commonlyUsed) { + ulist_addItemEndList(otherValues, curID, TRUE, status); + } else { + uprv_free(curID); + } + } + + } + if (U_SUCCESS(*status)) { + if (commonlyUsed) { + if (ulist_getListSize(values) == 0) { + // This could happen if no valid region is supplied in the input + // locale. In this case, we use the CLDR's default. + uenum_close(en); + en = ucurr_getKeywordValuesForLocale(key, "und", TRUE, status); + } + } else { + // Consolidate the list + char *value = NULL; + ulist_resetList(otherValues); + while ((value = (char *)ulist_getNext(otherValues)) != NULL) { + if (!ulist_containsString(values, value, (int32_t)uprv_strlen(value))) { + char *tmpValue = (char *)uprv_malloc(sizeof(char) * ULOC_KEYWORDS_CAPACITY); + uprv_memcpy(tmpValue, value, uprv_strlen(value) + 1); + ulist_addItemEndList(values, tmpValue, TRUE, status); + if (U_FAILURE(*status)) { + break; + } + } + } + } + + ulist_resetList((UList *)(en->context)); + } else { + ulist_deleteList(values); + uprv_free(en); + values = NULL; + en = NULL; + } + ures_close(&to); + ures_close(&curbndl); + ures_close(®bndl); + ures_close(&bundlekey); + ures_close(bundle); + + ulist_deleteList(otherValues); + + return en; +} + + +U_CAPI int32_t U_EXPORT2 +ucurr_getNumericCode(const UChar* currency) { + int32_t code = 0; + if (currency && u_strlen(currency) == ISO_CURRENCY_CODE_LENGTH) { + UErrorCode status = U_ZERO_ERROR; + + UResourceBundle *bundle = ures_openDirect(0, "currencyNumericCodes", &status); + ures_getByKey(bundle, "codeMap", bundle, &status); + if (U_SUCCESS(status)) { + char alphaCode[ISO_CURRENCY_CODE_LENGTH+1]; + myUCharsToChars(alphaCode, currency); + T_CString_toUpperCase(alphaCode); + ures_getByKey(bundle, alphaCode, bundle, &status); + int tmpCode = ures_getInt(bundle, &status); + if (U_SUCCESS(status)) { + code = tmpCode; + } + } + ures_close(bundle); + } + return code; +} +#endif /* #if !UCONFIG_NO_FORMATTING */ + +//eof diff --git a/thirdparty/icu4c/common/ucurrimp.h b/thirdparty/icu4c/common/ucurrimp.h new file mode 100644 index 0000000000..6d9588295d --- /dev/null +++ b/thirdparty/icu4c/common/ucurrimp.h @@ -0,0 +1,78 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (c) 2002-2016, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +*/ + +#ifndef _UCURR_IMP_H_ +#define _UCURR_IMP_H_ + +#include "unicode/utypes.h" +#include "unicode/unistr.h" +#include "unicode/parsepos.h" +#include "unicode/uniset.h" + +/** + * Internal method. Given a currency ISO code and a locale, return + * the "static" currency name. This is usually the same as the + * UCURR_SYMBOL_NAME, but if the latter is a choice format, then the + * format is applied to the number 2.0 (to yield the more common + * plural) to return a static name. + * + * This is used for backward compatibility with old currency logic in + * DecimalFormat and DecimalFormatSymbols. + */ +U_CAPI void +uprv_getStaticCurrencyName(const UChar* iso, const char* loc, + icu::UnicodeString& result, UErrorCode& ec); + +/** + * Attempt to parse the given string as a currency, either as a + * display name in the given locale, or as a 3-letter ISO 4217 + * code. If multiple display names match, then the longest one is + * selected. If both a display name and a 3-letter ISO code + * match, then the display name is preferred, unless it's length + * is less than 3. + * + * The parameters must not be NULL. + * + * @param locale the locale of the display names to match + * @param text the text to parse + * @param pos input-output position; on input, the position within + * text to match; must have 0 <= pos.getIndex() < text.length(); + * on output, the position after the last matched character. If + * the parse fails, the position in unchanged upon output. + * @param type currency type to parse against, LONG_NAME only or not + * @param partialMatchLen The length of the longest matching prefix; + * this may be nonzero even if no full currency was matched. + * @return the ISO 4217 code, as a string, of the best match, or + * null if there is no match + * + * @internal + */ +U_CAPI void +uprv_parseCurrency(const char* locale, + const icu::UnicodeString& text, + icu::ParsePosition& pos, + int8_t type, + int32_t* partialMatchLen, + UChar* result, + UErrorCode& ec); + +/** + * Puts all possible first-characters of a currency into the + * specified UnicodeSet. + * + * @param locale the locale of the display names of interest + * @param result the UnicodeSet to which to add the starting characters + */ +void uprv_currencyLeads(const char* locale, icu::UnicodeSet& result, UErrorCode& ec); + + + +#endif /* #ifndef _UCURR_IMP_H_ */ + +//eof diff --git a/thirdparty/icu4c/common/udata.cpp b/thirdparty/icu4c/common/udata.cpp new file mode 100644 index 0000000000..ec9c999cea --- /dev/null +++ b/thirdparty/icu4c/common/udata.cpp @@ -0,0 +1,1460 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1999-2016, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* file name: udata.cpp +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 1999oct25 +* created by: Markus W. Scherer +*/ + +#include "unicode/utypes.h" /* U_PLATFORM etc. */ + +#ifdef __GNUC__ +/* if gcc +#define ATTRIBUTE_WEAK __attribute__ ((weak)) +might have to #include some other header +*/ +#endif + +#include "unicode/putil.h" +#include "unicode/udata.h" +#include "unicode/uversion.h" +#include "charstr.h" +#include "cmemory.h" +#include "cstring.h" +#include "mutex.h" +#include "putilimp.h" +#include "restrace.h" +#include "uassert.h" +#include "ucln_cmn.h" +#include "ucmndata.h" +#include "udatamem.h" +#include "uhash.h" +#include "umapfile.h" +#include "umutex.h" + +/*********************************************************************** +* +* Notes on the organization of the ICU data implementation +* +* All of the public API is defined in udata.h +* +* The implementation is split into several files... +* +* - udata.c (this file) contains higher level code that knows about +* the search paths for locating data, caching opened data, etc. +* +* - umapfile.c contains the low level platform-specific code for actually loading +* (memory mapping, file reading, whatever) data into memory. +* +* - ucmndata.c deals with the tables of contents of ICU data items within +* an ICU common format data file. The implementation includes +* an abstract interface and support for multiple TOC formats. +* All knowledge of any specific TOC format is encapsulated here. +* +* - udatamem.c has code for managing UDataMemory structs. These are little +* descriptor objects for blocks of memory holding ICU data of +* various types. +*/ + +/* configuration ---------------------------------------------------------- */ + +/* If you are excruciatingly bored turn this on .. */ +/* #define UDATA_DEBUG 1 */ + +#if defined(UDATA_DEBUG) +# include <stdio.h> +#endif + +U_NAMESPACE_USE + +/* + * Forward declarations + */ +static UDataMemory *udata_findCachedData(const char *path, UErrorCode &err); + +/*********************************************************************** +* +* static (Global) data +* +************************************************************************/ + +/* + * Pointers to the common ICU data. + * + * We store multiple pointers to ICU data packages and iterate through them + * when looking for a data item. + * + * It is possible to combine this with dependency inversion: + * One or more data package libraries may export + * functions that each return a pointer to their piece of the ICU data, + * and this file would import them as weak functions, without a + * strong linker dependency from the common library on the data library. + * + * Then we can have applications depend on only that part of ICU's data + * that they really need, reducing the size of binaries that take advantage + * of this. + */ +static UDataMemory *gCommonICUDataArray[10] = { NULL }; // Access protected by icu global mutex. + +static u_atomic_int32_t gHaveTriedToLoadCommonData = ATOMIC_INT32_T_INITIALIZER(0); // See extendICUData(). + +static UHashtable *gCommonDataCache = NULL; /* Global hash table of opened ICU data files. */ +static icu::UInitOnce gCommonDataCacheInitOnce = U_INITONCE_INITIALIZER; + +#if !defined(ICU_DATA_DIR_WINDOWS) +static UDataFileAccess gDataFileAccess = UDATA_DEFAULT_ACCESS; // Access not synchronized. + // Modifying is documented as thread-unsafe. +#else +// If we are using the Windows data directory, then look in one spot only. +static UDataFileAccess gDataFileAccess = UDATA_NO_FILES; +#endif + +static UBool U_CALLCONV +udata_cleanup(void) +{ + int32_t i; + + if (gCommonDataCache) { /* Delete the cache of user data mappings. */ + uhash_close(gCommonDataCache); /* Table owns the contents, and will delete them. */ + gCommonDataCache = NULL; /* Cleanup is not thread safe. */ + } + gCommonDataCacheInitOnce.reset(); + + for (i = 0; i < UPRV_LENGTHOF(gCommonICUDataArray) && gCommonICUDataArray[i] != NULL; ++i) { + udata_close(gCommonICUDataArray[i]); + gCommonICUDataArray[i] = NULL; + } + gHaveTriedToLoadCommonData = 0; + + return TRUE; /* Everything was cleaned up */ +} + +static UBool U_CALLCONV +findCommonICUDataByName(const char *inBasename, UErrorCode &err) +{ + UBool found = FALSE; + int32_t i; + + UDataMemory *pData = udata_findCachedData(inBasename, err); + if (U_FAILURE(err) || pData == NULL) + return FALSE; + + { + Mutex lock; + for (i = 0; i < UPRV_LENGTHOF(gCommonICUDataArray); ++i) { + if ((gCommonICUDataArray[i] != NULL) && (gCommonICUDataArray[i]->pHeader == pData->pHeader)) { + /* The data pointer is already in the array. */ + found = TRUE; + break; + } + } + } + return found; +} + + +/* + * setCommonICUData. Set a UDataMemory to be the global ICU Data + */ +static UBool +setCommonICUData(UDataMemory *pData, /* The new common data. Belongs to caller, we copy it. */ + UBool warn, /* If true, set USING_DEFAULT warning if ICUData was */ + /* changed by another thread before we got to it. */ + UErrorCode *pErr) +{ + UDataMemory *newCommonData = UDataMemory_createNewInstance(pErr); + int32_t i; + UBool didUpdate = FALSE; + if (U_FAILURE(*pErr)) { + return FALSE; + } + + /* For the assignment, other threads must cleanly see either the old */ + /* or the new, not some partially initialized new. The old can not be */ + /* deleted - someone may still have a pointer to it lying around in */ + /* their locals. */ + UDatamemory_assign(newCommonData, pData); + umtx_lock(NULL); + for (i = 0; i < UPRV_LENGTHOF(gCommonICUDataArray); ++i) { + if (gCommonICUDataArray[i] == NULL) { + gCommonICUDataArray[i] = newCommonData; + didUpdate = TRUE; + break; + } else if (gCommonICUDataArray[i]->pHeader == pData->pHeader) { + /* The same data pointer is already in the array. */ + break; + } + } + umtx_unlock(NULL); + + if (i == UPRV_LENGTHOF(gCommonICUDataArray) && warn) { + *pErr = U_USING_DEFAULT_WARNING; + } + if (didUpdate) { + ucln_common_registerCleanup(UCLN_COMMON_UDATA, udata_cleanup); + } else { + uprv_free(newCommonData); + } + return didUpdate; +} + +#if !defined(ICU_DATA_DIR_WINDOWS) + +static UBool +setCommonICUDataPointer(const void *pData, UBool /*warn*/, UErrorCode *pErrorCode) { + UDataMemory tData; + UDataMemory_init(&tData); + UDataMemory_setData(&tData, pData); + udata_checkCommonData(&tData, pErrorCode); + return setCommonICUData(&tData, FALSE, pErrorCode); +} + +#endif + +static const char * +findBasename(const char *path) { + const char *basename=uprv_strrchr(path, U_FILE_SEP_CHAR); + if(basename==NULL) { + return path; + } else { + return basename+1; + } +} + +#ifdef UDATA_DEBUG +static const char * +packageNameFromPath(const char *path) +{ + if((path == NULL) || (*path == 0)) { + return U_ICUDATA_NAME; + } + + path = findBasename(path); + + if((path == NULL) || (*path == 0)) { + return U_ICUDATA_NAME; + } + + return path; +} +#endif + +/*----------------------------------------------------------------------* + * * + * Cache for common data * + * Functions for looking up or adding entries to a cache of * + * data that has been previously opened. Avoids a potentially * + * expensive operation of re-opening the data for subsequent * + * uses. * + * * + * Data remains cached for the duration of the process. * + * * + *----------------------------------------------------------------------*/ + +typedef struct DataCacheElement { + char *name; + UDataMemory *item; +} DataCacheElement; + + + +/* + * Deleter function for DataCacheElements. + * udata cleanup function closes the hash table; hash table in turn calls back to + * here for each entry. + */ +static void U_CALLCONV DataCacheElement_deleter(void *pDCEl) { + DataCacheElement *p = (DataCacheElement *)pDCEl; + udata_close(p->item); /* unmaps storage */ + uprv_free(p->name); /* delete the hash key string. */ + uprv_free(pDCEl); /* delete 'this' */ +} + +static void U_CALLCONV udata_initHashTable(UErrorCode &err) { + U_ASSERT(gCommonDataCache == NULL); + gCommonDataCache = uhash_open(uhash_hashChars, uhash_compareChars, NULL, &err); + if (U_FAILURE(err)) { + return; + } + U_ASSERT(gCommonDataCache != NULL); + uhash_setValueDeleter(gCommonDataCache, DataCacheElement_deleter); + ucln_common_registerCleanup(UCLN_COMMON_UDATA, udata_cleanup); +} + + /* udata_getCacheHashTable() + * Get the hash table used to store the data cache entries. + * Lazy create it if it doesn't yet exist. + */ +static UHashtable *udata_getHashTable(UErrorCode &err) { + umtx_initOnce(gCommonDataCacheInitOnce, &udata_initHashTable, err); + return gCommonDataCache; +} + + + +static UDataMemory *udata_findCachedData(const char *path, UErrorCode &err) +{ + UHashtable *htable; + UDataMemory *retVal = NULL; + DataCacheElement *el; + const char *baseName; + + htable = udata_getHashTable(err); + if (U_FAILURE(err)) { + return NULL; + } + + baseName = findBasename(path); /* Cache remembers only the base name, not the full path. */ + umtx_lock(NULL); + el = (DataCacheElement *)uhash_get(htable, baseName); + umtx_unlock(NULL); + if (el != NULL) { + retVal = el->item; + } +#ifdef UDATA_DEBUG + fprintf(stderr, "Cache: [%s] -> %p\n", baseName, (void*) retVal); +#endif + return retVal; +} + + +static UDataMemory *udata_cacheDataItem(const char *path, UDataMemory *item, UErrorCode *pErr) { + DataCacheElement *newElement; + const char *baseName; + int32_t nameLen; + UHashtable *htable; + DataCacheElement *oldValue = NULL; + UErrorCode subErr = U_ZERO_ERROR; + + htable = udata_getHashTable(*pErr); + if (U_FAILURE(*pErr)) { + return NULL; + } + + /* Create a new DataCacheElement - the thingy we store in the hash table - + * and copy the supplied path and UDataMemoryItems into it. + */ + newElement = (DataCacheElement *)uprv_malloc(sizeof(DataCacheElement)); + if (newElement == NULL) { + *pErr = U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + newElement->item = UDataMemory_createNewInstance(pErr); + if (U_FAILURE(*pErr)) { + uprv_free(newElement); + return NULL; + } + UDatamemory_assign(newElement->item, item); + + baseName = findBasename(path); + nameLen = (int32_t)uprv_strlen(baseName); + newElement->name = (char *)uprv_malloc(nameLen+1); + if (newElement->name == NULL) { + *pErr = U_MEMORY_ALLOCATION_ERROR; + uprv_free(newElement->item); + uprv_free(newElement); + return NULL; + } + uprv_strcpy(newElement->name, baseName); + + /* Stick the new DataCacheElement into the hash table. + */ + umtx_lock(NULL); + oldValue = (DataCacheElement *)uhash_get(htable, path); + if (oldValue != NULL) { + subErr = U_USING_DEFAULT_WARNING; + } + else { + uhash_put( + htable, + newElement->name, /* Key */ + newElement, /* Value */ + &subErr); + } + umtx_unlock(NULL); + +#ifdef UDATA_DEBUG + fprintf(stderr, "Cache: [%s] <<< %p : %s. vFunc=%p\n", newElement->name, + (void*) newElement->item, u_errorName(subErr), (void*) newElement->item->vFuncs); +#endif + + if (subErr == U_USING_DEFAULT_WARNING || U_FAILURE(subErr)) { + *pErr = subErr; /* copy sub err unto fillin ONLY if something happens. */ + uprv_free(newElement->name); + uprv_free(newElement->item); + uprv_free(newElement); + return oldValue ? oldValue->item : NULL; + } + + return newElement->item; +} + +/*----------------------------------------------------------------------*============== + * * + * Path management. Could be shared with other tools/etc if need be * + * later on. * + * * + *----------------------------------------------------------------------*/ + +U_NAMESPACE_BEGIN + +class UDataPathIterator +{ +public: + UDataPathIterator(const char *path, const char *pkg, + const char *item, const char *suffix, UBool doCheckLastFour, + UErrorCode *pErrorCode); + const char *next(UErrorCode *pErrorCode); + +private: + const char *path; /* working path (u_icudata_Dir) */ + const char *nextPath; /* path following this one */ + const char *basename; /* item's basename (icudt22e_mt.res)*/ + + StringPiece suffix; /* item suffix (can be null) */ + + uint32_t basenameLen; /* length of basename */ + + CharString itemPath; /* path passed in with item name */ + CharString pathBuffer; /* output path for this it'ion */ + CharString packageStub; /* example: "/icudt28b". Will ignore that leaf in set paths. */ + + UBool checkLastFour; /* if TRUE then allow paths such as '/foo/myapp.dat' + * to match, checks last 4 chars of suffix with + * last 4 of path, then previous chars. */ +}; + +/** + * @param iter The iterator to be initialized. Its current state does not matter. + * @param inPath The full pathname to be iterated over. If NULL, defaults to U_ICUDATA_NAME + * @param pkg Package which is being searched for, ex "icudt28l". Will ignore leaf directories such as /icudt28l + * @param item Item to be searched for. Can include full path, such as /a/b/foo.dat + * @param inSuffix Optional item suffix, if not-null (ex. ".dat") then 'path' can contain 'item' explicitly. + * Ex: 'stuff.dat' would be found in '/a/foo:/tmp/stuff.dat:/bar/baz' as item #2. + * '/blarg/stuff.dat' would also be found. + * Note: inSuffix may also be the 'item' being searched for as well, (ex: "ibm-5348_P100-1997.cnv"), in which case + * the 'item' parameter is often the same as pkg. (Though sometimes might have a tree part as well, ex: "icudt62l-curr"). + */ +UDataPathIterator::UDataPathIterator(const char *inPath, const char *pkg, + const char *item, const char *inSuffix, UBool doCheckLastFour, + UErrorCode *pErrorCode) +{ +#ifdef UDATA_DEBUG + fprintf(stderr, "SUFFIX1=%s PATH=%s\n", inSuffix, inPath); +#endif + /** Path **/ + if(inPath == NULL) { + path = u_getDataDirectory(); + } else { + path = inPath; + } + + /** Package **/ + if(pkg != NULL) { + packageStub.append(U_FILE_SEP_CHAR, *pErrorCode).append(pkg, *pErrorCode); +#ifdef UDATA_DEBUG + fprintf(stderr, "STUB=%s [%d]\n", packageStub.data(), packageStub.length()); +#endif + } + + /** Item **/ + basename = findBasename(item); + basenameLen = (int32_t)uprv_strlen(basename); + + /** Item path **/ + if(basename == item) { + nextPath = path; + } else { + itemPath.append(item, (int32_t)(basename-item), *pErrorCode); + nextPath = itemPath.data(); + } +#ifdef UDATA_DEBUG + fprintf(stderr, "SUFFIX=%s [%p]\n", inSuffix, (void*) inSuffix); +#endif + + /** Suffix **/ + if(inSuffix != NULL) { + suffix = inSuffix; + } else { + suffix = ""; + } + + checkLastFour = doCheckLastFour; + + /* pathBuffer will hold the output path strings returned by this iterator */ + +#ifdef UDATA_DEBUG + fprintf(stderr, "0: init %s -> [path=%s], [base=%s], [suff=%s], [itempath=%s], [nextpath=%s], [checklast4=%s]\n", + item, + path, + basename, + suffix.data(), + itemPath.data(), + nextPath, + checkLastFour?"TRUE":"false"); +#endif +} + +/** + * Get the next path on the list. + * + * @param iter The Iter to be used + * @param len If set, pointer to the length of the returned path, for convenience. + * @return Pointer to the next path segment, or NULL if there are no more. + */ +const char *UDataPathIterator::next(UErrorCode *pErrorCode) +{ + if(U_FAILURE(*pErrorCode)) { + return NULL; + } + + const char *currentPath = NULL; + int32_t pathLen = 0; + const char *pathBasename; + + do + { + if( nextPath == NULL ) { + break; + } + currentPath = nextPath; + + if(nextPath == itemPath.data()) { /* we were processing item's path. */ + nextPath = path; /* start with regular path next tm. */ + pathLen = (int32_t)uprv_strlen(currentPath); + } else { + /* fix up next for next time */ + nextPath = uprv_strchr(currentPath, U_PATH_SEP_CHAR); + if(nextPath == NULL) { + /* segment: entire path */ + pathLen = (int32_t)uprv_strlen(currentPath); + } else { + /* segment: until next segment */ + pathLen = (int32_t)(nextPath - currentPath); + /* skip divider */ + nextPath ++; + } + } + + if(pathLen == 0) { + continue; + } + +#ifdef UDATA_DEBUG + fprintf(stderr, "rest of path (IDD) = %s\n", currentPath); + fprintf(stderr, " "); + { + int32_t qqq; + for(qqq=0;qqq<pathLen;qqq++) + { + fprintf(stderr, " "); + } + + fprintf(stderr, "^\n"); + } +#endif + pathBuffer.clear().append(currentPath, pathLen, *pErrorCode); + + /* check for .dat files */ + pathBasename = findBasename(pathBuffer.data()); + + if(checkLastFour == TRUE && + (pathLen>=4) && + uprv_strncmp(pathBuffer.data() +(pathLen-4), suffix.data(), 4)==0 && /* suffix matches */ + uprv_strncmp(findBasename(pathBuffer.data()), basename, basenameLen)==0 && /* base matches */ + uprv_strlen(pathBasename)==(basenameLen+4)) { /* base+suffix = full len */ + +#ifdef UDATA_DEBUG + fprintf(stderr, "Have %s file on the path: %s\n", suffix.data(), pathBuffer.data()); +#endif + /* do nothing */ + } + else + { /* regular dir path */ + if(pathBuffer[pathLen-1] != U_FILE_SEP_CHAR) { + if((pathLen>=4) && + uprv_strncmp(pathBuffer.data()+(pathLen-4), ".dat", 4) == 0) + { +#ifdef UDATA_DEBUG + fprintf(stderr, "skipping non-directory .dat file %s\n", pathBuffer.data()); +#endif + continue; + } + + /* Check if it is a directory with the same name as our package */ + if(!packageStub.isEmpty() && + (pathLen > packageStub.length()) && + !uprv_strcmp(pathBuffer.data() + pathLen - packageStub.length(), packageStub.data())) { +#ifdef UDATA_DEBUG + fprintf(stderr, "Found stub %s (will add package %s of len %d)\n", packageStub.data(), basename, basenameLen); +#endif + pathBuffer.truncate(pathLen - packageStub.length()); + } + pathBuffer.append(U_FILE_SEP_CHAR, *pErrorCode); + } + + /* + basename */ + pathBuffer.append(packageStub.data()+1, packageStub.length()-1, *pErrorCode); + + if (!suffix.empty()) /* tack on suffix */ + { + if (suffix.length() > 4) { + // If the suffix is actually an item ("ibm-5348_P100-1997.cnv") and not an extension (".res") + // then we need to ensure that the path ends with a separator. + pathBuffer.ensureEndsWithFileSeparator(*pErrorCode); + } + pathBuffer.append(suffix, *pErrorCode); + } + } + +#ifdef UDATA_DEBUG + fprintf(stderr, " --> %s\n", pathBuffer.data()); +#endif + + return pathBuffer.data(); + + } while(path); + + /* fell way off the end */ + return NULL; +} + +U_NAMESPACE_END + +/* ==================================================================================*/ + + +/*----------------------------------------------------------------------* + * * + * Add a static reference to the common data library * + * Unless overridden by an explicit udata_setCommonData, this will be * + * our common data. * + * * + *----------------------------------------------------------------------*/ +#if !defined(ICU_DATA_DIR_WINDOWS) +// When using the Windows system data, we expect only a single data file. +extern "C" const DataHeader U_DATA_API U_ICUDATA_ENTRY_POINT; +#endif + +/* + * This would be a good place for weak-linkage declarations of + * partial-data-library access functions where each returns a pointer + * to its data package, if it is linked in. + */ +/* +extern const void *uprv_getICUData_collation(void) ATTRIBUTE_WEAK; +extern const void *uprv_getICUData_conversion(void) ATTRIBUTE_WEAK; +*/ + +/*----------------------------------------------------------------------* + * * + * openCommonData Attempt to open a common format (.dat) file * + * Map it into memory (if it's not there already) * + * and return a UDataMemory object for it. * + * * + * If the requested data is already open and cached * + * just return the cached UDataMem object. * + * * + *----------------------------------------------------------------------*/ +static UDataMemory * +openCommonData(const char *path, /* Path from OpenChoice? */ + int32_t commonDataIndex, /* ICU Data (index >= 0) if path == NULL */ + UErrorCode *pErrorCode) +{ + UDataMemory tData; + const char *pathBuffer; + const char *inBasename; + + if (U_FAILURE(*pErrorCode)) { + return NULL; + } + + UDataMemory_init(&tData); + + /* ??????? TODO revisit this */ + if (commonDataIndex >= 0) { + /* "mini-cache" for common ICU data */ + if(commonDataIndex >= UPRV_LENGTHOF(gCommonICUDataArray)) { + return NULL; + } + { + Mutex lock; + if(gCommonICUDataArray[commonDataIndex] != NULL) { + return gCommonICUDataArray[commonDataIndex]; + } +#if !defined(ICU_DATA_DIR_WINDOWS) +// When using the Windows system data, we expect only a single data file. + int32_t i; + for(i = 0; i < commonDataIndex; ++i) { + if(gCommonICUDataArray[i]->pHeader == &U_ICUDATA_ENTRY_POINT) { + /* The linked-in data is already in the list. */ + return NULL; + } + } +#endif + } + + /* Add the linked-in data to the list. */ + /* + * This is where we would check and call weakly linked partial-data-library + * access functions. + */ + /* + if (uprv_getICUData_collation) { + setCommonICUDataPointer(uprv_getICUData_collation(), FALSE, pErrorCode); + } + if (uprv_getICUData_conversion) { + setCommonICUDataPointer(uprv_getICUData_conversion(), FALSE, pErrorCode); + } + */ +#if !defined(ICU_DATA_DIR_WINDOWS) +// When using the Windows system data, we expect only a single data file. + setCommonICUDataPointer(&U_ICUDATA_ENTRY_POINT, FALSE, pErrorCode); + { + Mutex lock; + return gCommonICUDataArray[commonDataIndex]; + } +#endif + } + + + /* request is NOT for ICU Data. */ + + /* Find the base name portion of the supplied path. */ + /* inBasename will be left pointing somewhere within the original path string. */ + inBasename = findBasename(path); +#ifdef UDATA_DEBUG + fprintf(stderr, "inBasename = %s\n", inBasename); +#endif + + if(*inBasename==0) { + /* no basename. This will happen if the original path was a directory name, */ + /* like "a/b/c/". (Fallback to separate files will still work.) */ +#ifdef UDATA_DEBUG + fprintf(stderr, "ocd: no basename in %s, bailing.\n", path); +#endif + if (U_SUCCESS(*pErrorCode)) { + *pErrorCode=U_FILE_ACCESS_ERROR; + } + return NULL; + } + + /* Is the requested common data file already open and cached? */ + /* Note that the cache is keyed by the base name only. The rest of the path, */ + /* if any, is not considered. */ + UDataMemory *dataToReturn = udata_findCachedData(inBasename, *pErrorCode); + if (dataToReturn != NULL || U_FAILURE(*pErrorCode)) { + return dataToReturn; + } + + /* Requested item is not in the cache. + * Hunt it down, trying all the path locations + */ + + UDataPathIterator iter(u_getDataDirectory(), inBasename, path, ".dat", TRUE, pErrorCode); + + while ((UDataMemory_isLoaded(&tData)==FALSE) && (pathBuffer = iter.next(pErrorCode)) != NULL) + { +#ifdef UDATA_DEBUG + fprintf(stderr, "ocd: trying path %s - ", pathBuffer); +#endif + uprv_mapFile(&tData, pathBuffer, pErrorCode); +#ifdef UDATA_DEBUG + fprintf(stderr, "%s\n", UDataMemory_isLoaded(&tData)?"LOADED":"not loaded"); +#endif + } + if (U_FAILURE(*pErrorCode)) { + return NULL; + } + +#if defined(OS390_STUBDATA) && defined(OS390BATCH) + if (!UDataMemory_isLoaded(&tData)) { + char ourPathBuffer[1024]; + /* One more chance, for extendCommonData() */ + uprv_strncpy(ourPathBuffer, path, 1019); + ourPathBuffer[1019]=0; + uprv_strcat(ourPathBuffer, ".dat"); + uprv_mapFile(&tData, ourPathBuffer, pErrorCode); + } +#endif + + if (U_FAILURE(*pErrorCode)) { + return NULL; + } + if (!UDataMemory_isLoaded(&tData)) { + /* no common data */ + *pErrorCode=U_FILE_ACCESS_ERROR; + return NULL; + } + + /* we have mapped a file, check its header */ + udata_checkCommonData(&tData, pErrorCode); + + + /* Cache the UDataMemory struct for this .dat file, + * so we won't need to hunt it down and map it again next time + * something is needed from it. */ + return udata_cacheDataItem(inBasename, &tData, pErrorCode); +} + + +/*----------------------------------------------------------------------* + * * + * extendICUData If the full set of ICU data was not loaded at * + * program startup, load it now. This function will * + * be called when the lookup of an ICU data item in * + * the common ICU data fails. * + * * + * return true if new data is loaded, false otherwise.* + * * + *----------------------------------------------------------------------*/ +static UBool extendICUData(UErrorCode *pErr) +{ + UDataMemory *pData; + UDataMemory copyPData; + UBool didUpdate = FALSE; + + /* + * There is a chance for a race condition here. + * Normally, ICU data is loaded from a DLL or via mmap() and + * setCommonICUData() will detect if the same address is set twice. + * If ICU is built with data loading via fread() then the address will + * be different each time the common data is loaded and we may add + * multiple copies of the data. + * In this case, use a mutex to prevent the race. + * Use a specific mutex to avoid nested locks of the global mutex. + */ +#if MAP_IMPLEMENTATION==MAP_STDIO + static UMutex extendICUDataMutex; + umtx_lock(&extendICUDataMutex); +#endif + if(!umtx_loadAcquire(gHaveTriedToLoadCommonData)) { + /* See if we can explicitly open a .dat file for the ICUData. */ + pData = openCommonData( + U_ICUDATA_NAME, /* "icudt20l" , for example. */ + -1, /* Pretend we're not opening ICUData */ + pErr); + + /* How about if there is no pData, eh... */ + + UDataMemory_init(©PData); + if(pData != NULL) { + UDatamemory_assign(©PData, pData); + copyPData.map = 0; /* The mapping for this data is owned by the hash table */ + copyPData.mapAddr = 0; /* which will unmap it when ICU is shut down. */ + /* CommonICUData is also unmapped when ICU is shut down.*/ + /* To avoid unmapping the data twice, zero out the map */ + /* fields in the UDataMemory that we're assigning */ + /* to CommonICUData. */ + + didUpdate = /* no longer using this result */ + setCommonICUData(©PData,/* The new common data. */ + FALSE, /* No warnings if write didn't happen */ + pErr); /* setCommonICUData honors errors; NOP if error set */ + } + + umtx_storeRelease(gHaveTriedToLoadCommonData, 1); + } + + didUpdate = findCommonICUDataByName(U_ICUDATA_NAME, *pErr); /* Return 'true' when a racing writes out the extended */ + /* data after another thread has failed to see it (in openCommonData), so */ + /* extended data can be examined. */ + /* Also handles a race through here before gHaveTriedToLoadCommonData is set. */ + +#if MAP_IMPLEMENTATION==MAP_STDIO + umtx_unlock(&extendICUDataMutex); +#endif + return didUpdate; /* Return true if ICUData pointer was updated. */ + /* (Could potentially have been done by another thread racing */ + /* us through here, but that's fine, we still return true */ + /* so that current thread will also examine extended data. */ +} + +/*----------------------------------------------------------------------* + * * + * udata_setCommonData * + * * + *----------------------------------------------------------------------*/ +U_CAPI void U_EXPORT2 +udata_setCommonData(const void *data, UErrorCode *pErrorCode) { + UDataMemory dataMemory; + + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return; + } + + if(data==NULL) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + /* set the data pointer and test for validity */ + UDataMemory_init(&dataMemory); + UDataMemory_setData(&dataMemory, data); + udata_checkCommonData(&dataMemory, pErrorCode); + if (U_FAILURE(*pErrorCode)) {return;} + + /* we have good data */ + /* Set it up as the ICU Common Data. */ + setCommonICUData(&dataMemory, TRUE, pErrorCode); +} + +/*--------------------------------------------------------------------------- + * + * udata_setAppData + * + *---------------------------------------------------------------------------- */ +U_CAPI void U_EXPORT2 +udata_setAppData(const char *path, const void *data, UErrorCode *err) +{ + UDataMemory udm; + + if(err==NULL || U_FAILURE(*err)) { + return; + } + if(data==NULL) { + *err=U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + UDataMemory_init(&udm); + UDataMemory_setData(&udm, data); + udata_checkCommonData(&udm, err); + udata_cacheDataItem(path, &udm, err); +} + +/*----------------------------------------------------------------------------* + * * + * checkDataItem Given a freshly located/loaded data item, either * + * an entry in a common file or a separately loaded file, * + * sanity check its header, and see if the data is * + * acceptable to the app. * + * If the data is good, create and return a UDataMemory * + * object that can be returned to the application. * + * Return NULL on any sort of failure. * + * * + *----------------------------------------------------------------------------*/ +static UDataMemory * +checkDataItem +( + const DataHeader *pHeader, /* The data item to be checked. */ + UDataMemoryIsAcceptable *isAcceptable, /* App's call-back function */ + void *context, /* pass-thru param for above. */ + const char *type, /* pass-thru param for above. */ + const char *name, /* pass-thru param for above. */ + UErrorCode *nonFatalErr, /* Error code if this data was not acceptable */ + /* but openChoice should continue with */ + /* trying to get data from fallback path. */ + UErrorCode *fatalErr /* Bad error, caller should return immediately */ + ) +{ + UDataMemory *rDataMem = NULL; /* the new UDataMemory, to be returned. */ + + if (U_FAILURE(*fatalErr)) { + return NULL; + } + + if(pHeader->dataHeader.magic1==0xda && + pHeader->dataHeader.magic2==0x27 && + (isAcceptable==NULL || isAcceptable(context, type, name, &pHeader->info)) + ) { + rDataMem=UDataMemory_createNewInstance(fatalErr); + if (U_FAILURE(*fatalErr)) { + return NULL; + } + rDataMem->pHeader = pHeader; + } else { + /* the data is not acceptable, look further */ + /* If we eventually find something good, this errorcode will be */ + /* cleared out. */ + *nonFatalErr=U_INVALID_FORMAT_ERROR; + } + return rDataMem; +} + +/** + * @return 0 if not loaded, 1 if loaded or err + */ +static UDataMemory *doLoadFromIndividualFiles(const char *pkgName, + const char *dataPath, const char *tocEntryPathSuffix, + /* following arguments are the same as doOpenChoice itself */ + const char *path, const char *type, const char *name, + UDataMemoryIsAcceptable *isAcceptable, void *context, + UErrorCode *subErrorCode, + UErrorCode *pErrorCode) +{ + const char *pathBuffer; + UDataMemory dataMemory; + UDataMemory *pEntryData; + + /* look in ind. files: package\nam.typ ========================= */ + /* init path iterator for individual files */ + UDataPathIterator iter(dataPath, pkgName, path, tocEntryPathSuffix, FALSE, pErrorCode); + + while ((pathBuffer = iter.next(pErrorCode)) != NULL) + { +#ifdef UDATA_DEBUG + fprintf(stderr, "UDATA: trying individual file %s\n", pathBuffer); +#endif + if (uprv_mapFile(&dataMemory, pathBuffer, pErrorCode)) + { + pEntryData = checkDataItem(dataMemory.pHeader, isAcceptable, context, type, name, subErrorCode, pErrorCode); + if (pEntryData != NULL) { + /* Data is good. + * Hand off ownership of the backing memory to the user's UDataMemory. + * and return it. */ + pEntryData->mapAddr = dataMemory.mapAddr; + pEntryData->map = dataMemory.map; + +#ifdef UDATA_DEBUG + fprintf(stderr, "** Mapped file: %s\n", pathBuffer); +#endif + return pEntryData; + } + + /* the data is not acceptable, or some error occurred. Either way, unmap the memory */ + udata_close(&dataMemory); + + /* If we had a nasty error, bail out completely. */ + if (U_FAILURE(*pErrorCode)) { + return NULL; + } + + /* Otherwise remember that we found data but didn't like it for some reason */ + *subErrorCode=U_INVALID_FORMAT_ERROR; + } +#ifdef UDATA_DEBUG + fprintf(stderr, "%s\n", UDataMemory_isLoaded(&dataMemory)?"LOADED":"not loaded"); +#endif + } + return NULL; +} + +/** + * @return 0 if not loaded, 1 if loaded or err + */ +static UDataMemory *doLoadFromCommonData(UBool isICUData, const char * /*pkgName*/, + const char * /*dataPath*/, const char * /*tocEntryPathSuffix*/, const char *tocEntryName, + /* following arguments are the same as doOpenChoice itself */ + const char *path, const char *type, const char *name, + UDataMemoryIsAcceptable *isAcceptable, void *context, + UErrorCode *subErrorCode, + UErrorCode *pErrorCode) +{ + UDataMemory *pEntryData; + const DataHeader *pHeader; + UDataMemory *pCommonData; + int32_t commonDataIndex; + UBool checkedExtendedICUData = FALSE; + /* try to get common data. The loop is for platforms such as the 390 that do + * not initially load the full set of ICU data. If the lookup of an ICU data item + * fails, the full (but slower to load) set is loaded, the and the loop repeats, + * trying the lookup again. Once the full set of ICU data is loaded, the loop wont + * repeat because the full set will be checked the first time through. + * + * The loop also handles the fallback to a .dat file if the application linked + * to the stub data library rather than a real library. + */ + for (commonDataIndex = isICUData ? 0 : -1;;) { + pCommonData=openCommonData(path, commonDataIndex, subErrorCode); /** search for pkg **/ + + if(U_SUCCESS(*subErrorCode) && pCommonData!=NULL) { + int32_t length; + + /* look up the data piece in the common data */ + pHeader=pCommonData->vFuncs->Lookup(pCommonData, tocEntryName, &length, subErrorCode); +#ifdef UDATA_DEBUG + fprintf(stderr, "%s: pHeader=%p - %s\n", tocEntryName, (void*) pHeader, u_errorName(*subErrorCode)); +#endif + + if(pHeader!=NULL) { + pEntryData = checkDataItem(pHeader, isAcceptable, context, type, name, subErrorCode, pErrorCode); +#ifdef UDATA_DEBUG + fprintf(stderr, "pEntryData=%p\n", (void*) pEntryData); +#endif + if (U_FAILURE(*pErrorCode)) { + return NULL; + } + if (pEntryData != NULL) { + pEntryData->length = length; + return pEntryData; + } + } + } + // If we failed due to being out-of-memory, then stop early and report the error. + if (*subErrorCode == U_MEMORY_ALLOCATION_ERROR) { + *pErrorCode = *subErrorCode; + return NULL; + } + /* Data wasn't found. If we were looking for an ICUData item and there is + * more data available, load it and try again, + * otherwise break out of this loop. */ + if (!isICUData) { + return NULL; + } else if (pCommonData != NULL) { + ++commonDataIndex; /* try the next data package */ + } else if ((!checkedExtendedICUData) && extendICUData(subErrorCode)) { + checkedExtendedICUData = TRUE; + /* try this data package slot again: it changed from NULL to non-NULL */ + } else { + return NULL; + } + } +} + +/* + * Identify the Time Zone resources that are subject to special override data loading. + */ +static UBool isTimeZoneFile(const char *name, const char *type) { + return ((uprv_strcmp(type, "res") == 0) && + (uprv_strcmp(name, "zoneinfo64") == 0 || + uprv_strcmp(name, "timezoneTypes") == 0 || + uprv_strcmp(name, "windowsZones") == 0 || + uprv_strcmp(name, "metaZones") == 0)); +} + +/* + * A note on the ownership of Mapped Memory + * + * For common format files, ownership resides with the UDataMemory object + * that lives in the cache of opened common data. These UDataMemorys are private + * to the udata implementation, and are never seen directly by users. + * + * The UDataMemory objects returned to users will have the address of some desired + * data within the mapped region, but they wont have the mapping info itself, and thus + * won't cause anything to be removed from memory when they are closed. + * + * For individual data files, the UDataMemory returned to the user holds the + * information necessary to unmap the data on close. If the user independently + * opens the same data file twice, two completely independent mappings will be made. + * (There is no cache of opened data items from individual files, only a cache of + * opened Common Data files, that is, files containing a collection of data items.) + * + * For common data passed in from the user via udata_setAppData() or + * udata_setCommonData(), ownership remains with the user. + * + * UDataMemory objects themselves, as opposed to the memory they describe, + * can be anywhere - heap, stack/local or global. + * They have a flag to indicate when they're heap allocated and thus + * must be deleted when closed. + */ + + +/*----------------------------------------------------------------------------* + * * + * main data loading functions * + * * + *----------------------------------------------------------------------------*/ +static UDataMemory * +doOpenChoice(const char *path, const char *type, const char *name, + UDataMemoryIsAcceptable *isAcceptable, void *context, + UErrorCode *pErrorCode) +{ + UDataMemory *retVal = NULL; + + const char *dataPath; + + int32_t tocEntrySuffixIndex; + const char *tocEntryPathSuffix; + UErrorCode subErrorCode=U_ZERO_ERROR; + const char *treeChar; + + UBool isICUData = FALSE; + + + FileTracer::traceOpen(path, type, name); + + + /* Is this path ICU data? */ + if(path == NULL || + !strcmp(path, U_ICUDATA_ALIAS) || /* "ICUDATA" */ + !uprv_strncmp(path, U_ICUDATA_NAME U_TREE_SEPARATOR_STRING, /* "icudt26e-" */ + uprv_strlen(U_ICUDATA_NAME U_TREE_SEPARATOR_STRING)) || + !uprv_strncmp(path, U_ICUDATA_ALIAS U_TREE_SEPARATOR_STRING, /* "ICUDATA-" */ + uprv_strlen(U_ICUDATA_ALIAS U_TREE_SEPARATOR_STRING))) { + isICUData = TRUE; + } + +#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR) /* Windows: try "foo\bar" and "foo/bar" */ + /* remap from alternate path char to the main one */ + CharString altSepPath; + if(path) { + if(uprv_strchr(path,U_FILE_ALT_SEP_CHAR) != NULL) { + altSepPath.append(path, *pErrorCode); + char *p; + while ((p = uprv_strchr(altSepPath.data(), U_FILE_ALT_SEP_CHAR)) != NULL) { + *p = U_FILE_SEP_CHAR; + } +#if defined (UDATA_DEBUG) + fprintf(stderr, "Changed path from [%s] to [%s]\n", path, altSepPath.s); +#endif + path = altSepPath.data(); + } + } +#endif + + CharString tocEntryName; /* entry name in tree format. ex: 'icudt28b/coll/ar.res' */ + CharString tocEntryPath; /* entry name in path format. ex: 'icudt28b\\coll\\ar.res' */ + + CharString pkgName; + CharString treeName; + + /* ======= Set up strings */ + if(path==NULL) { + pkgName.append(U_ICUDATA_NAME, *pErrorCode); + } else { + const char *pkg; + const char *first; + pkg = uprv_strrchr(path, U_FILE_SEP_CHAR); + first = uprv_strchr(path, U_FILE_SEP_CHAR); + if(uprv_pathIsAbsolute(path) || (pkg != first)) { /* more than one slash in the path- not a tree name */ + /* see if this is an /absolute/path/to/package path */ + if(pkg) { + pkgName.append(pkg+1, *pErrorCode); + } else { + pkgName.append(path, *pErrorCode); + } + } else { + treeChar = uprv_strchr(path, U_TREE_SEPARATOR); + if(treeChar) { + treeName.append(treeChar+1, *pErrorCode); /* following '-' */ + if(isICUData) { + pkgName.append(U_ICUDATA_NAME, *pErrorCode); + } else { + pkgName.append(path, (int32_t)(treeChar-path), *pErrorCode); + if (first == NULL) { + /* + This user data has no path, but there is a tree name. + Look up the correct path from the data cache later. + */ + path = pkgName.data(); + } + } + } else { + if(isICUData) { + pkgName.append(U_ICUDATA_NAME, *pErrorCode); + } else { + pkgName.append(path, *pErrorCode); + } + } + } + } + +#ifdef UDATA_DEBUG + fprintf(stderr, " P=%s T=%s\n", pkgName.data(), treeName.data()); +#endif + + /* setting up the entry name and file name + * Make up a full name by appending the type to the supplied + * name, assuming that a type was supplied. + */ + + /* prepend the package */ + tocEntryName.append(pkgName, *pErrorCode); + tocEntryPath.append(pkgName, *pErrorCode); + tocEntrySuffixIndex = tocEntryName.length(); + + if(!treeName.isEmpty()) { + tocEntryName.append(U_TREE_ENTRY_SEP_CHAR, *pErrorCode).append(treeName, *pErrorCode); + tocEntryPath.append(U_FILE_SEP_CHAR, *pErrorCode).append(treeName, *pErrorCode); + } + + tocEntryName.append(U_TREE_ENTRY_SEP_CHAR, *pErrorCode).append(name, *pErrorCode); + tocEntryPath.append(U_FILE_SEP_CHAR, *pErrorCode).append(name, *pErrorCode); + if(type!=NULL && *type!=0) { + tocEntryName.append(".", *pErrorCode).append(type, *pErrorCode); + tocEntryPath.append(".", *pErrorCode).append(type, *pErrorCode); + } + // The +1 is for the U_FILE_SEP_CHAR that is always appended above. + tocEntryPathSuffix = tocEntryPath.data() + tocEntrySuffixIndex + 1; /* suffix starts here */ + +#ifdef UDATA_DEBUG + fprintf(stderr, " tocEntryName = %s\n", tocEntryName.data()); + fprintf(stderr, " tocEntryPath = %s\n", tocEntryName.data()); +#endif + +#if !defined(ICU_DATA_DIR_WINDOWS) + if(path == NULL) { + path = COMMON_DATA_NAME; /* "icudt26e" */ + } +#else + // When using the Windows system data, we expects only a single data file. + path = COMMON_DATA_NAME; /* "icudt26e" */ +#endif + + /************************ Begin loop looking for ind. files ***************/ +#ifdef UDATA_DEBUG + fprintf(stderr, "IND: inBasename = %s, pkg=%s\n", "(n/a)", packageNameFromPath(path)); +#endif + + /* End of dealing with a null basename */ + dataPath = u_getDataDirectory(); + + /**** Time zone individual files override */ + if (isICUData && isTimeZoneFile(name, type)) { + const char *tzFilesDir = u_getTimeZoneFilesDirectory(pErrorCode); + if (tzFilesDir[0] != 0) { +#ifdef UDATA_DEBUG + fprintf(stderr, "Trying Time Zone Files directory = %s\n", tzFilesDir); +#endif + retVal = doLoadFromIndividualFiles(/* pkgName.data() */ "", tzFilesDir, tocEntryPathSuffix, + /* path */ "", type, name, isAcceptable, context, &subErrorCode, pErrorCode); + if((retVal != NULL) || U_FAILURE(*pErrorCode)) { + return retVal; + } + } + } + + /**** COMMON PACKAGE - only if packages are first. */ + if(gDataFileAccess == UDATA_PACKAGES_FIRST) { +#ifdef UDATA_DEBUG + fprintf(stderr, "Trying packages (UDATA_PACKAGES_FIRST)\n"); +#endif + /* #2 */ + retVal = doLoadFromCommonData(isICUData, + pkgName.data(), dataPath, tocEntryPathSuffix, tocEntryName.data(), + path, type, name, isAcceptable, context, &subErrorCode, pErrorCode); + if((retVal != NULL) || U_FAILURE(*pErrorCode)) { + return retVal; + } + } + + /**** INDIVIDUAL FILES */ + if((gDataFileAccess==UDATA_PACKAGES_FIRST) || + (gDataFileAccess==UDATA_FILES_FIRST)) { +#ifdef UDATA_DEBUG + fprintf(stderr, "Trying individual files\n"); +#endif + /* Check to make sure that there is a dataPath to iterate over */ + if ((dataPath && *dataPath) || !isICUData) { + retVal = doLoadFromIndividualFiles(pkgName.data(), dataPath, tocEntryPathSuffix, + path, type, name, isAcceptable, context, &subErrorCode, pErrorCode); + if((retVal != NULL) || U_FAILURE(*pErrorCode)) { + return retVal; + } + } + } + + /**** COMMON PACKAGE */ + if((gDataFileAccess==UDATA_ONLY_PACKAGES) || + (gDataFileAccess==UDATA_FILES_FIRST)) { +#ifdef UDATA_DEBUG + fprintf(stderr, "Trying packages (UDATA_ONLY_PACKAGES || UDATA_FILES_FIRST)\n"); +#endif + retVal = doLoadFromCommonData(isICUData, + pkgName.data(), dataPath, tocEntryPathSuffix, tocEntryName.data(), + path, type, name, isAcceptable, context, &subErrorCode, pErrorCode); + if((retVal != NULL) || U_FAILURE(*pErrorCode)) { + return retVal; + } + } + + /* Load from DLL. If we haven't attempted package load, we also haven't had any chance to + try a DLL (static or setCommonData/etc) load. + If we ever have a "UDATA_ONLY_FILES", add it to the or list here. */ + if(gDataFileAccess==UDATA_NO_FILES) { +#ifdef UDATA_DEBUG + fprintf(stderr, "Trying common data (UDATA_NO_FILES)\n"); +#endif + retVal = doLoadFromCommonData(isICUData, + pkgName.data(), "", tocEntryPathSuffix, tocEntryName.data(), + path, type, name, isAcceptable, context, &subErrorCode, pErrorCode); + if((retVal != NULL) || U_FAILURE(*pErrorCode)) { + return retVal; + } + } + + /* data not found */ + if(U_SUCCESS(*pErrorCode)) { + if(U_SUCCESS(subErrorCode)) { + /* file not found */ + *pErrorCode=U_FILE_ACCESS_ERROR; + } else { + /* entry point not found or rejected */ + *pErrorCode=subErrorCode; + } + } + return retVal; +} + + + +/* API ---------------------------------------------------------------------- */ + +U_CAPI UDataMemory * U_EXPORT2 +udata_open(const char *path, const char *type, const char *name, + UErrorCode *pErrorCode) { +#ifdef UDATA_DEBUG + fprintf(stderr, "udata_open(): Opening: %s : %s . %s\n", (path?path:"NULL"), name, type); + fflush(stderr); +#endif + + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return NULL; + } else if(name==NULL || *name==0) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } else { + return doOpenChoice(path, type, name, NULL, NULL, pErrorCode); + } +} + + + +U_CAPI UDataMemory * U_EXPORT2 +udata_openChoice(const char *path, const char *type, const char *name, + UDataMemoryIsAcceptable *isAcceptable, void *context, + UErrorCode *pErrorCode) { +#ifdef UDATA_DEBUG + fprintf(stderr, "udata_openChoice(): Opening: %s : %s . %s\n", (path?path:"NULL"), name, type); +#endif + + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return NULL; + } else if(name==NULL || *name==0 || isAcceptable==NULL) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } else { + return doOpenChoice(path, type, name, isAcceptable, context, pErrorCode); + } +} + + + +U_CAPI void U_EXPORT2 +udata_getInfo(UDataMemory *pData, UDataInfo *pInfo) { + if(pInfo!=NULL) { + if(pData!=NULL && pData->pHeader!=NULL) { + const UDataInfo *info=&pData->pHeader->info; + uint16_t dataInfoSize=udata_getInfoSize(info); + if(pInfo->size>dataInfoSize) { + pInfo->size=dataInfoSize; + } + uprv_memcpy((uint16_t *)pInfo+1, (const uint16_t *)info+1, pInfo->size-2); + if(info->isBigEndian!=U_IS_BIG_ENDIAN) { + /* opposite endianness */ + uint16_t x=info->reservedWord; + pInfo->reservedWord=(uint16_t)((x<<8)|(x>>8)); + } + } else { + pInfo->size=0; + } + } +} + + +U_CAPI void U_EXPORT2 udata_setFileAccess(UDataFileAccess access, UErrorCode * /*status*/) +{ + // Note: this function is documented as not thread safe. + gDataFileAccess = access; +} diff --git a/thirdparty/icu4c/common/udatamem.cpp b/thirdparty/icu4c/common/udatamem.cpp new file mode 100644 index 0000000000..6bf7c01235 --- /dev/null +++ b/thirdparty/icu4c/common/udatamem.cpp @@ -0,0 +1,161 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1999-2011, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************/ + + +/*---------------------------------------------------------------------------------- + * + * UDataMemory A class-like struct that serves as a handle to a piece of memory + * that contains some ICU data (resource, converters, whatever.) + * + * When an application opens ICU data (with udata_open, for example, + * a UDataMemory * is returned. + * + *----------------------------------------------------------------------------------*/ + +#include "unicode/utypes.h" +#include "cmemory.h" +#include "unicode/udata.h" + +#include "udatamem.h" + +U_CFUNC void UDataMemory_init(UDataMemory *This) { + uprv_memset(This, 0, sizeof(UDataMemory)); + This->length=-1; +} + + +U_CFUNC void UDatamemory_assign(UDataMemory *dest, UDataMemory *source) { + /* UDataMemory Assignment. Destination UDataMemory must be initialized first. */ + UBool mallocedFlag = dest->heapAllocated; + uprv_memcpy(dest, source, sizeof(UDataMemory)); + dest->heapAllocated = mallocedFlag; +} + +U_CFUNC UDataMemory *UDataMemory_createNewInstance(UErrorCode *pErr) { + UDataMemory *This; + + if (U_FAILURE(*pErr)) { + return NULL; + } + This = (UDataMemory *)uprv_malloc(sizeof(UDataMemory)); + if (This == NULL) { + *pErr = U_MEMORY_ALLOCATION_ERROR; } + else { + UDataMemory_init(This); + This->heapAllocated = TRUE; + } + return This; +} + + +U_CFUNC const DataHeader * +UDataMemory_normalizeDataPointer(const void *p) { + /* allow the data to be optionally prepended with an alignment-forcing double value */ + const DataHeader *pdh = (const DataHeader *)p; + if(pdh==NULL || (pdh->dataHeader.magic1==0xda && pdh->dataHeader.magic2==0x27)) { + return pdh; + } else { +#if U_PLATFORM == U_PF_OS400 + /* + TODO: Fix this once the compiler implements this feature. Keep in sync with genccode.c + + This is here because this platform can't currently put + const data into the read-only pages of an object or + shared library (service program). Only strings are allowed in read-only + pages, so we use char * strings to store the data. + + In order to prevent the beginning of the data from ever matching the + magic numbers we must skip the initial double. + [grhoten 4/24/2003] + */ + return (const DataHeader *)*((const void **)p+1); +#else + return (const DataHeader *)((const double *)p+1); +#endif + } +} + + +U_CFUNC void UDataMemory_setData (UDataMemory *This, const void *dataAddr) { + This->pHeader = UDataMemory_normalizeDataPointer(dataAddr); +} + + +U_CAPI void U_EXPORT2 +udata_close(UDataMemory *pData) { + if(pData!=NULL) { + uprv_unmapFile(pData); + if(pData->heapAllocated ) { + uprv_free(pData); + } else { + UDataMemory_init(pData); + } + } +} + +U_CAPI const void * U_EXPORT2 +udata_getMemory(UDataMemory *pData) { + if(pData!=NULL && pData->pHeader!=NULL) { + return (char *)(pData->pHeader)+udata_getHeaderSize(pData->pHeader); + } else { + return NULL; + } +} + +/** + * Get the length of the data item if possible. + * The length may be up to 15 bytes larger than the actual data. + * + * TODO Consider making this function public. + * It would have to return the actual length in more cases. + * For example, the length of the last item in a .dat package could be + * computed from the size of the whole .dat package minus the offset of the + * last item. + * The size of a file that was directly memory-mapped could be determined + * using some system API. + * + * In order to get perfect values for all data items, we may have to add a + * length field to UDataInfo, but that complicates data generation + * and may be overkill. + * + * @param pData The data item. + * @return the length of the data item, or -1 if not known + * @internal Currently used only in cintltst/udatatst.c + */ +U_CAPI int32_t U_EXPORT2 +udata_getLength(const UDataMemory *pData) { + if(pData!=NULL && pData->pHeader!=NULL && pData->length>=0) { + /* + * subtract the header size, + * return only the size of the actual data starting at udata_getMemory() + */ + return pData->length-udata_getHeaderSize(pData->pHeader); + } else { + return -1; + } +} + +/** + * Get the memory including the data header. + * Used in cintltst/udatatst.c + * @internal + */ +U_CAPI const void * U_EXPORT2 +udata_getRawMemory(const UDataMemory *pData) { + if(pData!=NULL && pData->pHeader!=NULL) { + return pData->pHeader; + } else { + return NULL; + } +} + +U_CFUNC UBool UDataMemory_isLoaded(const UDataMemory *This) { + return This->pHeader != NULL; +} diff --git a/thirdparty/icu4c/common/udatamem.h b/thirdparty/icu4c/common/udatamem.h new file mode 100644 index 0000000000..a05dd69756 --- /dev/null +++ b/thirdparty/icu4c/common/udatamem.h @@ -0,0 +1,61 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1999-2010, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************/ + + +/*---------------------------------------------------------------------------------- + * + * UDataMemory A class-like struct that serves as a handle to a piece of memory + * that contains some ICU data (resource, converters, whatever.) + * + * When an application opens ICU data (with udata_open, for example, + * a UDataMemory * is returned. + * + *----------------------------------------------------------------------------------*/ +#ifndef __UDATAMEM_H__ +#define __UDATAMEM_H__ + +#include "unicode/udata.h" +#include "ucmndata.h" + +struct UDataMemory { + const commonDataFuncs *vFuncs; /* Function Pointers for accessing TOC */ + + const DataHeader *pHeader; /* Header of the memory being described by this */ + /* UDataMemory object. */ + const void *toc; /* For common memory, table of contents for */ + /* the pieces within. */ + UBool heapAllocated; /* True if this UDataMemory Object is on the */ + /* heap and thus needs to be deleted when closed. */ + + void *mapAddr; /* For mapped or allocated memory, the start addr. */ + /* Only non-null if a close operation should unmap */ + /* the associated data. */ + void *map; /* Handle, or other data, OS dependent. */ + /* Only non-null if a close operation should unmap */ + /* the associated data, and additional info */ + /* beyond the mapAddr is needed to do that. */ + int32_t length; /* Length of the data in bytes; -1 if unknown. */ +}; + +U_CFUNC UDataMemory *UDataMemory_createNewInstance(UErrorCode *pErr); +U_CFUNC void UDatamemory_assign (UDataMemory *dest, UDataMemory *source); +U_CFUNC void UDataMemory_init (UDataMemory *This); +U_CFUNC UBool UDataMemory_isLoaded(const UDataMemory *This); +U_CFUNC void UDataMemory_setData (UDataMemory *This, const void *dataAddr); + +U_CFUNC const DataHeader *UDataMemory_normalizeDataPointer(const void *p); + +U_CAPI int32_t U_EXPORT2 +udata_getLength(const UDataMemory *pData); + +U_CAPI const void * U_EXPORT2 +udata_getRawMemory(const UDataMemory *pData); + +#endif diff --git a/thirdparty/icu4c/common/udataswp.cpp b/thirdparty/icu4c/common/udataswp.cpp new file mode 100644 index 0000000000..86f302bd9c --- /dev/null +++ b/thirdparty/icu4c/common/udataswp.cpp @@ -0,0 +1,473 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2003-2014, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: udataswp.c +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2003jun05 +* created by: Markus W. Scherer +* +* Definitions for ICU data transformations for different platforms, +* changing between big- and little-endian data and/or between +* charset families (ASCII<->EBCDIC). +*/ + +#include <stdarg.h> +#include "unicode/utypes.h" +#include "unicode/udata.h" /* UDataInfo */ +#include "ucmndata.h" /* DataHeader */ +#include "cmemory.h" +#include "udataswp.h" + +/* swapping primitives ------------------------------------------------------ */ + +static int32_t U_CALLCONV +uprv_swapArray16(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode) { + const uint16_t *p; + uint16_t *q; + int32_t count; + uint16_t x; + + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return 0; + } + if(ds==NULL || inData==NULL || length<0 || (length&1)!=0 || outData==NULL) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + /* setup and swapping */ + p=(const uint16_t *)inData; + q=(uint16_t *)outData; + count=length/2; + while(count>0) { + x=*p++; + *q++=(uint16_t)((x<<8)|(x>>8)); + --count; + } + + return length; +} + +static int32_t U_CALLCONV +uprv_copyArray16(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode) { + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return 0; + } + if(ds==NULL || inData==NULL || length<0 || (length&1)!=0 || outData==NULL) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + if(length>0 && inData!=outData) { + uprv_memcpy(outData, inData, length); + } + return length; +} + +static int32_t U_CALLCONV +uprv_swapArray32(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode) { + const uint32_t *p; + uint32_t *q; + int32_t count; + uint32_t x; + + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return 0; + } + if(ds==NULL || inData==NULL || length<0 || (length&3)!=0 || outData==NULL) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + /* setup and swapping */ + p=(const uint32_t *)inData; + q=(uint32_t *)outData; + count=length/4; + while(count>0) { + x=*p++; + *q++=(uint32_t)((x<<24)|((x<<8)&0xff0000)|((x>>8)&0xff00)|(x>>24)); + --count; + } + + return length; +} + +static int32_t U_CALLCONV +uprv_copyArray32(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode) { + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return 0; + } + if(ds==NULL || inData==NULL || length<0 || (length&3)!=0 || outData==NULL) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + if(length>0 && inData!=outData) { + uprv_memcpy(outData, inData, length); + } + return length; +} + +static int32_t U_CALLCONV +uprv_swapArray64(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode) { + const uint64_t *p; + uint64_t *q; + int32_t count; + + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return 0; + } + if(ds==NULL || inData==NULL || length<0 || (length&7)!=0 || outData==NULL) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + /* setup and swapping */ + p=(const uint64_t *)inData; + q=(uint64_t *)outData; + count=length/8; + while(count>0) { + uint64_t x=*p++; + x=(x<<56)|((x&0xff00)<<40)|((x&0xff0000)<<24)|((x&0xff000000)<<8)| + ((x>>8)&0xff000000)|((x>>24)&0xff0000)|((x>>40)&0xff00)|(x>>56); + *q++=x; + --count; + } + + return length; +} + +static int32_t U_CALLCONV +uprv_copyArray64(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode) { + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return 0; + } + if(ds==NULL || inData==NULL || length<0 || (length&7)!=0 || outData==NULL) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + if(length>0 && inData!=outData) { + uprv_memcpy(outData, inData, length); + } + return length; +} + +static uint16_t U_CALLCONV +uprv_readSwapUInt16(uint16_t x) { + return (uint16_t)((x<<8)|(x>>8)); +} + +static uint16_t U_CALLCONV +uprv_readDirectUInt16(uint16_t x) { + return x; +} + +static uint32_t U_CALLCONV +uprv_readSwapUInt32(uint32_t x) { + return (uint32_t)((x<<24)|((x<<8)&0xff0000)|((x>>8)&0xff00)|(x>>24)); +} + +static uint32_t U_CALLCONV +uprv_readDirectUInt32(uint32_t x) { + return x; +} + +static void U_CALLCONV +uprv_writeSwapUInt16(uint16_t *p, uint16_t x) { + *p=(uint16_t)((x<<8)|(x>>8)); +} + +static void U_CALLCONV +uprv_writeDirectUInt16(uint16_t *p, uint16_t x) { + *p=x; +} + +static void U_CALLCONV +uprv_writeSwapUInt32(uint32_t *p, uint32_t x) { + *p=(uint32_t)((x<<24)|((x<<8)&0xff0000)|((x>>8)&0xff00)|(x>>24)); +} + +static void U_CALLCONV +uprv_writeDirectUInt32(uint32_t *p, uint32_t x) { + *p=x; +} + +U_CAPI int16_t U_EXPORT2 +udata_readInt16(const UDataSwapper *ds, int16_t x) { + return (int16_t)ds->readUInt16((uint16_t)x); +} + +U_CAPI int32_t U_EXPORT2 +udata_readInt32(const UDataSwapper *ds, int32_t x) { + return (int32_t)ds->readUInt32((uint32_t)x); +} + +/** + * Swap a block of invariant, NUL-terminated strings, but not padding + * bytes after the last string. + * @internal + */ +U_CAPI int32_t U_EXPORT2 +udata_swapInvStringBlock(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode) { + const char *inChars; + int32_t stringsLength; + + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return 0; + } + if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + /* reduce the strings length to not include bytes after the last NUL */ + inChars=(const char *)inData; + stringsLength=length; + while(stringsLength>0 && inChars[stringsLength-1]!=0) { + --stringsLength; + } + + /* swap up to the last NUL */ + ds->swapInvChars(ds, inData, stringsLength, outData, pErrorCode); + + /* copy the bytes after the last NUL */ + if(inData!=outData && length>stringsLength) { + uprv_memcpy((char *)outData+stringsLength, inChars+stringsLength, length-stringsLength); + } + + /* return the length including padding bytes */ + if(U_SUCCESS(*pErrorCode)) { + return length; + } else { + return 0; + } +} + +U_CAPI void U_EXPORT2 +udata_printError(const UDataSwapper *ds, + const char *fmt, + ...) { + va_list args; + + if(ds->printError!=NULL) { + va_start(args, fmt); + ds->printError(ds->printErrorContext, fmt, args); + va_end(args); + } +} + +/* swap a data header ------------------------------------------------------- */ + +U_CAPI int32_t U_EXPORT2 +udata_swapDataHeader(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode) { + const DataHeader *pHeader; + uint16_t headerSize, infoSize; + + /* argument checking */ + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return 0; + } + if(ds==NULL || inData==NULL || length<-1 || (length>0 && outData==NULL)) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + /* check minimum length and magic bytes */ + pHeader=(const DataHeader *)inData; + if( (length>=0 && length<(int32_t)sizeof(DataHeader)) || + pHeader->dataHeader.magic1!=0xda || + pHeader->dataHeader.magic2!=0x27 || + pHeader->info.sizeofUChar!=2 + ) { + udata_printError(ds, "udata_swapDataHeader(): initial bytes do not look like ICU data\n"); + *pErrorCode=U_UNSUPPORTED_ERROR; + return 0; + } + + headerSize=ds->readUInt16(pHeader->dataHeader.headerSize); + infoSize=ds->readUInt16(pHeader->info.size); + + if( headerSize<sizeof(DataHeader) || + infoSize<sizeof(UDataInfo) || + headerSize<(sizeof(pHeader->dataHeader)+infoSize) || + (length>=0 && length<headerSize) + ) { + udata_printError(ds, "udata_swapDataHeader(): header size mismatch - headerSize %d infoSize %d length %d\n", + headerSize, infoSize, length); + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + + if(length>0) { + DataHeader *outHeader; + const char *s; + int32_t maxLength; + + /* Most of the fields are just bytes and need no swapping. */ + if(inData!=outData) { + uprv_memcpy(outData, inData, headerSize); + } + outHeader=(DataHeader *)outData; + + outHeader->info.isBigEndian = ds->outIsBigEndian; + outHeader->info.charsetFamily = ds->outCharset; + + /* swap headerSize */ + ds->swapArray16(ds, &pHeader->dataHeader.headerSize, 2, &outHeader->dataHeader.headerSize, pErrorCode); + + /* swap UDataInfo size and reservedWord */ + ds->swapArray16(ds, &pHeader->info.size, 4, &outHeader->info.size, pErrorCode); + + /* swap copyright statement after the UDataInfo */ + infoSize+=sizeof(pHeader->dataHeader); + s=(const char *)inData+infoSize; + maxLength=headerSize-infoSize; + /* get the length of the string */ + for(length=0; length<maxLength && s[length]!=0; ++length) {} + /* swap the string contents */ + ds->swapInvChars(ds, s, length, (char *)outData+infoSize, pErrorCode); + } + + return headerSize; +} + +/* API functions ------------------------------------------------------------ */ + +U_CAPI UDataSwapper * U_EXPORT2 +udata_openSwapper(UBool inIsBigEndian, uint8_t inCharset, + UBool outIsBigEndian, uint8_t outCharset, + UErrorCode *pErrorCode) { + UDataSwapper *swapper; + + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return NULL; + } + if(inCharset>U_EBCDIC_FAMILY || outCharset>U_EBCDIC_FAMILY) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + + /* allocate the swapper */ + swapper=(UDataSwapper *)uprv_malloc(sizeof(UDataSwapper)); + if(swapper==NULL) { + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + uprv_memset(swapper, 0, sizeof(UDataSwapper)); + + /* set values and functions pointers according to in/out parameters */ + swapper->inIsBigEndian=inIsBigEndian; + swapper->inCharset=inCharset; + swapper->outIsBigEndian=outIsBigEndian; + swapper->outCharset=outCharset; + + swapper->readUInt16= inIsBigEndian==U_IS_BIG_ENDIAN ? uprv_readDirectUInt16 : uprv_readSwapUInt16; + swapper->readUInt32= inIsBigEndian==U_IS_BIG_ENDIAN ? uprv_readDirectUInt32 : uprv_readSwapUInt32; + + swapper->writeUInt16= outIsBigEndian==U_IS_BIG_ENDIAN ? uprv_writeDirectUInt16 : uprv_writeSwapUInt16; + swapper->writeUInt32= outIsBigEndian==U_IS_BIG_ENDIAN ? uprv_writeDirectUInt32 : uprv_writeSwapUInt32; + + swapper->compareInvChars= outCharset==U_ASCII_FAMILY ? uprv_compareInvAscii : uprv_compareInvEbcdic; + + if(inIsBigEndian==outIsBigEndian) { + swapper->swapArray16=uprv_copyArray16; + swapper->swapArray32=uprv_copyArray32; + swapper->swapArray64=uprv_copyArray64; + } else { + swapper->swapArray16=uprv_swapArray16; + swapper->swapArray32=uprv_swapArray32; + swapper->swapArray64=uprv_swapArray64; + } + + if(inCharset==U_ASCII_FAMILY) { + swapper->swapInvChars= outCharset==U_ASCII_FAMILY ? uprv_copyAscii : uprv_ebcdicFromAscii; + } else /* U_EBCDIC_FAMILY */ { + swapper->swapInvChars= outCharset==U_EBCDIC_FAMILY ? uprv_copyEbcdic : uprv_asciiFromEbcdic; + } + + return swapper; +} + +U_CAPI UDataSwapper * U_EXPORT2 +udata_openSwapperForInputData(const void *data, int32_t length, + UBool outIsBigEndian, uint8_t outCharset, + UErrorCode *pErrorCode) { + const DataHeader *pHeader; + uint16_t headerSize, infoSize; + UBool inIsBigEndian; + int8_t inCharset; + + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return NULL; + } + if( data==NULL || + (length>=0 && length<(int32_t)sizeof(DataHeader)) || + outCharset>U_EBCDIC_FAMILY + ) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + + pHeader=(const DataHeader *)data; + if( (length>=0 && length<(int32_t)sizeof(DataHeader)) || + pHeader->dataHeader.magic1!=0xda || + pHeader->dataHeader.magic2!=0x27 || + pHeader->info.sizeofUChar!=2 + ) { + *pErrorCode=U_UNSUPPORTED_ERROR; + return 0; + } + + inIsBigEndian=(UBool)pHeader->info.isBigEndian; + inCharset=pHeader->info.charsetFamily; + + if(inIsBigEndian==U_IS_BIG_ENDIAN) { + headerSize=pHeader->dataHeader.headerSize; + infoSize=pHeader->info.size; + } else { + headerSize=uprv_readSwapUInt16(pHeader->dataHeader.headerSize); + infoSize=uprv_readSwapUInt16(pHeader->info.size); + } + + if( headerSize<sizeof(DataHeader) || + infoSize<sizeof(UDataInfo) || + headerSize<(sizeof(pHeader->dataHeader)+infoSize) || + (length>=0 && length<headerSize) + ) { + *pErrorCode=U_UNSUPPORTED_ERROR; + return 0; + } + + return udata_openSwapper(inIsBigEndian, inCharset, outIsBigEndian, outCharset, pErrorCode); +} + +U_CAPI void U_EXPORT2 +udata_closeSwapper(UDataSwapper *ds) { + uprv_free(ds); +} diff --git a/thirdparty/icu4c/common/udataswp.h b/thirdparty/icu4c/common/udataswp.h new file mode 100644 index 0000000000..5e7b043c4c --- /dev/null +++ b/thirdparty/icu4c/common/udataswp.h @@ -0,0 +1,404 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2003-2014, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: udataswp.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2003jun05 +* created by: Markus W. Scherer +* +* Definitions for ICU data transformations for different platforms, +* changing between big- and little-endian data and/or between +* charset families (ASCII<->EBCDIC). +*/ + +#ifndef __UDATASWP_H__ +#define __UDATASWP_H__ + +#include <stdarg.h> +#include "unicode/utypes.h" + +/* forward declaration */ + +U_CDECL_BEGIN + +struct UDataSwapper; +typedef struct UDataSwapper UDataSwapper; + +/** + * Function type for data transformation. + * Transforms data, or just returns the length of the data if + * the input length is -1. + * Swap functions assume that their data pointers are aligned properly. + * + * Quick implementation outline: + * (best to copy and adapt and existing swapper implementation) + * check that the data looks like the expected format + * if(length<0) { + * preflight: + * never dereference outData + * read inData and determine the data size + * assume that inData is long enough for this + * } else { + * outData can be NULL if length==0 + * inData==outData (in-place swapping) possible but not required! + * verify that length>=(actual size) + * if there is a chance that not every byte up to size is reached + * due to padding etc.: + * if(inData!=outData) { + * memcpy(outData, inData, actual size); + * } + * swap contents + * } + * return actual size + * + * Further implementation notes: + * - read integers from inData before swapping them + * because in-place swapping can make them unreadable + * - compareInvChars compares a local Unicode string with already-swapped + * output charset strings + * + * @param ds Pointer to UDataSwapper containing global data about the + * transformation and function pointers for handling primitive + * types. + * @param inData Pointer to the input data to be transformed or examined. + * @param length Length of the data, counting bytes. May be -1 for preflighting. + * If length>=0, then transform the data. + * If length==-1, then only determine the length of the data. + * The length cannot be determined from the data itself for all + * types of data (e.g., not for simple arrays of integers). + * @param outData Pointer to the output data buffer. + * If length>=0 (transformation), then the output buffer must + * have a capacity of at least length. + * If length==-1, then outData will not be used and can be NULL. + * @param pErrorCode ICU UErrorCode parameter, must not be NULL and must + * fulfill U_SUCCESS on input. + * @return The actual length of the data. + * + * @see UDataSwapper + * @internal ICU 2.8 + */ +typedef int32_t U_CALLCONV +UDataSwapFn(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode); + +/** + * Convert one uint16_t from input to platform endianness. + * @internal ICU 2.8 + */ +typedef uint16_t U_CALLCONV +UDataReadUInt16(uint16_t x); + +/** + * Convert one uint32_t from input to platform endianness. + * @internal ICU 2.8 + */ +typedef uint32_t U_CALLCONV +UDataReadUInt32(uint32_t x); + +/** + * Convert one uint16_t from platform to input endianness. + * @internal ICU 2.8 + */ +typedef void U_CALLCONV +UDataWriteUInt16(uint16_t *p, uint16_t x); + +/** + * Convert one uint32_t from platform to input endianness. + * @internal ICU 2.8 + */ +typedef void U_CALLCONV +UDataWriteUInt32(uint32_t *p, uint32_t x); + +/** + * Compare invariant-character strings, one in the output data and the + * other one caller-provided in Unicode. + * An output data string is compared because strings are usually swapped + * before the rest of the data, to allow for sorting of string tables + * according to the output charset. + * You can use -1 for the length parameters of NUL-terminated strings as usual. + * Returns Unicode code point order for invariant characters. + * @internal ICU 2.8 + */ +typedef int32_t U_CALLCONV +UDataCompareInvChars(const UDataSwapper *ds, + const char *outString, int32_t outLength, + const UChar *localString, int32_t localLength); + +/** + * Function for message output when an error occurs during data swapping. + * A format string and variable number of arguments are passed + * like for vprintf(). + * + * @param context A function-specific context pointer. + * @param fmt The format string. + * @param args The arguments for format string inserts. + * + * @internal ICU 2.8 + */ +typedef void U_CALLCONV +UDataPrintError(void *context, const char *fmt, va_list args); + +struct UDataSwapper { + /** Input endianness. @internal ICU 2.8 */ + UBool inIsBigEndian; + /** Input charset family. @see U_CHARSET_FAMILY @internal ICU 2.8 */ + uint8_t inCharset; + /** Output endianness. @internal ICU 2.8 */ + UBool outIsBigEndian; + /** Output charset family. @see U_CHARSET_FAMILY @internal ICU 2.8 */ + uint8_t outCharset; + + /* basic functions for reading data values */ + + /** Convert one uint16_t from input to platform endianness. @internal ICU 2.8 */ + UDataReadUInt16 *readUInt16; + /** Convert one uint32_t from input to platform endianness. @internal ICU 2.8 */ + UDataReadUInt32 *readUInt32; + /** Compare an invariant-character output string with a local one. @internal ICU 2.8 */ + UDataCompareInvChars *compareInvChars; + + /* basic functions for writing data values */ + + /** Convert one uint16_t from platform to input endianness. @internal ICU 2.8 */ + UDataWriteUInt16 *writeUInt16; + /** Convert one uint32_t from platform to input endianness. @internal ICU 2.8 */ + UDataWriteUInt32 *writeUInt32; + + /* basic functions for data transformations */ + + /** Transform an array of 16-bit integers. @internal ICU 2.8 */ + UDataSwapFn *swapArray16; + /** Transform an array of 32-bit integers. @internal ICU 2.8 */ + UDataSwapFn *swapArray32; + /** Transform an array of 64-bit integers. @internal ICU 53 */ + UDataSwapFn *swapArray64; + /** Transform an invariant-character string. @internal ICU 2.8 */ + UDataSwapFn *swapInvChars; + + /** + * Function for message output when an error occurs during data swapping. + * Can be NULL. + * @internal ICU 2.8 + */ + UDataPrintError *printError; + /** Context pointer for printError. @internal ICU 2.8 */ + void *printErrorContext; +}; + +U_CDECL_END + +U_CAPI UDataSwapper * U_EXPORT2 +udata_openSwapper(UBool inIsBigEndian, uint8_t inCharset, + UBool outIsBigEndian, uint8_t outCharset, + UErrorCode *pErrorCode); + +/** + * Open a UDataSwapper for the given input data and the specified output + * characteristics. + * Values of -1 for any of the characteristics mean the local platform's + * characteristics. + * + * @see udata_swap + * @internal ICU 2.8 + */ +U_CAPI UDataSwapper * U_EXPORT2 +udata_openSwapperForInputData(const void *data, int32_t length, + UBool outIsBigEndian, uint8_t outCharset, + UErrorCode *pErrorCode); + +U_CAPI void U_EXPORT2 +udata_closeSwapper(UDataSwapper *ds); + +/** + * Read the beginning of an ICU data piece, recognize magic bytes, + * swap the structure. + * Set a U_UNSUPPORTED_ERROR if it does not look like an ICU data piece. + * + * @return The size of the data header, in bytes. + * + * @internal ICU 2.8 + */ +U_CAPI int32_t U_EXPORT2 +udata_swapDataHeader(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode); + +/** + * Convert one int16_t from input to platform endianness. + * @internal ICU 2.8 + */ +U_CAPI int16_t U_EXPORT2 +udata_readInt16(const UDataSwapper *ds, int16_t x); + +/** + * Convert one int32_t from input to platform endianness. + * @internal ICU 2.8 + */ +U_CAPI int32_t U_EXPORT2 +udata_readInt32(const UDataSwapper *ds, int32_t x); + +/** + * Swap a block of invariant, NUL-terminated strings, but not padding + * bytes after the last string. + * @internal + */ +U_CAPI int32_t U_EXPORT2 +udata_swapInvStringBlock(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode); + +U_CAPI void U_EXPORT2 +udata_printError(const UDataSwapper *ds, + const char *fmt, + ...); + +/* internal exports from putil.c -------------------------------------------- */ + +/* declared here to keep them out of the public putil.h */ + +/** + * Swap invariant char * strings ASCII->EBCDIC. + * @internal + */ +U_CAPI int32_t U_EXPORT2 +uprv_ebcdicFromAscii(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode); + +/** + * Copy invariant ASCII char * strings and verify they are invariant. + * @internal + */ +U_CFUNC int32_t +uprv_copyAscii(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode); + +/** + * Swap invariant char * strings EBCDIC->ASCII. + * @internal + */ +U_CFUNC int32_t +uprv_asciiFromEbcdic(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode); + +/** + * Copy invariant EBCDIC char * strings and verify they are invariant. + * @internal + */ +U_CFUNC int32_t +uprv_copyEbcdic(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode); + +/** + * Compare ASCII invariant char * with Unicode invariant UChar * + * @internal + */ +U_CFUNC int32_t +uprv_compareInvAscii(const UDataSwapper *ds, + const char *outString, int32_t outLength, + const UChar *localString, int32_t localLength); + +/** + * Compare EBCDIC invariant char * with Unicode invariant UChar * + * @internal + */ +U_CFUNC int32_t +uprv_compareInvEbcdic(const UDataSwapper *ds, + const char *outString, int32_t outLength, + const UChar *localString, int32_t localLength); + +/** + * \def uprv_compareInvWithUChar + * Compare an invariant-character strings with a UChar string + * @internal + */ +#if U_CHARSET_FAMILY==U_ASCII_FAMILY +# define uprv_compareInvWithUChar uprv_compareInvAscii +#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY +# define uprv_compareInvWithUChar uprv_compareInvEbcdic +#else +# error Unknown charset family! +#endif + +// utrie_swap.cpp -----------------------------------------------------------*** + +/** + * Swaps a serialized UTrie. + * @internal + */ +U_CAPI int32_t U_EXPORT2 +utrie_swap(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode); + +/** + * Swaps a serialized UTrie2. + * @internal + */ +U_CAPI int32_t U_EXPORT2 +utrie2_swap(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode); + +/** + * Swaps a serialized UCPTrie. + * @internal + */ +U_CAPI int32_t U_EXPORT2 +ucptrie_swap(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode); + +/** + * Swaps a serialized UTrie, UTrie2, or UCPTrie. + * @internal + */ +U_CAPI int32_t U_EXPORT2 +utrie_swapAnyVersion(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode); + +/* material... -------------------------------------------------------------- */ + +#if 0 + +/* udata.h */ + +/** + * Public API function in udata.c + * + * Same as udata_openChoice() but automatically swaps the data. + * isAcceptable, if not NULL, may accept data with endianness and charset family + * different from the current platform's properties. + * If the data is acceptable and the platform properties do not match, then + * the swap function is called to swap an allocated version of the data. + * Preflighting may or may not be performed depending on whether the size of + * the loaded data item is known. + * + * @param isAcceptable Same as for udata_openChoice(). May be NULL. + * + * @internal ICU 2.8 + */ +U_CAPI UDataMemory * U_EXPORT2 +udata_openSwap(const char *path, const char *type, const char *name, + UDataMemoryIsAcceptable *isAcceptable, void *isAcceptableContext, + UDataSwapFn *swap, + UDataPrintError *printError, void *printErrorContext, + UErrorCode *pErrorCode); + +#endif + +#endif diff --git a/thirdparty/icu4c/common/uelement.h b/thirdparty/icu4c/common/uelement.h new file mode 100644 index 0000000000..88dd4d66fb --- /dev/null +++ b/thirdparty/icu4c/common/uelement.h @@ -0,0 +1,91 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 1997-2011, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* file name: uelement.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2011jul04 +* created by: Markus W. Scherer +* +* Common definitions for UHashTable and UVector. +* UHashTok moved here from uhash.h and renamed UElement. +* This allows users of UVector to avoid the confusing #include of uhash.h. +* uhash.h aliases UElement to UHashTok, +* so that we need not change all of its code and its users. +*/ + +#ifndef __UELEMENT_H__ +#define __UELEMENT_H__ + +#include "unicode/utypes.h" + +U_CDECL_BEGIN + +/** + * A UVector element, or a key or value within a UHashtable. + * It may be either a 32-bit integral value or an opaque void* pointer. + * The void* pointer may be smaller than 32 bits (e.g. 24 bits) + * or may be larger (e.g. 64 bits). + * + * Because a UElement is the size of a native pointer or a 32-bit + * integer, we pass it around by value. + */ +union UElement { + void* pointer; + int32_t integer; +}; +typedef union UElement UElement; + +/** + * An element-equality (boolean) comparison function. + * @param e1 An element (object or integer) + * @param e2 An element (object or integer) + * @return true if the two elements are equal. + */ +typedef UBool U_CALLCONV UElementsAreEqual(const UElement e1, const UElement e2); + +/** + * An element sorting (three-way) comparison function. + * @param e1 An element (object or integer) + * @param e2 An element (object or integer) + * @return 0 if the two elements are equal, -1 if e1 is < e2, or +1 if e1 is > e2. + */ +typedef int8_t U_CALLCONV UElementComparator(UElement e1, UElement e2); + +/** + * An element assignment function. It may copy an integer, copy + * a pointer, or clone a pointer, as appropriate. + * @param dst The element to be assigned to + * @param src The element to assign from + */ +typedef void U_CALLCONV UElementAssigner(UElement *dst, UElement *src); + +U_CDECL_END + +/** + * Comparator function for UnicodeString* keys. Implements UElementsAreEqual. + * @param key1 The string for comparison + * @param key2 The string for comparison + * @return true if key1 and key2 are equal, return false otherwise. + */ +U_CAPI UBool U_EXPORT2 +uhash_compareUnicodeString(const UElement key1, const UElement key2); + +/** + * Comparator function for UnicodeString* keys (case insensitive). + * Make sure to use together with uhash_hashCaselessUnicodeString. + * Implements UElementsAreEqual. + * @param key1 The string for comparison + * @param key2 The string for comparison + * @return true if key1 and key2 are equal, return false otherwise. + */ +U_CAPI UBool U_EXPORT2 +uhash_compareCaselessUnicodeString(const UElement key1, const UElement key2); + +#endif /* __UELEMENT_H__ */ diff --git a/thirdparty/icu4c/common/uenum.cpp b/thirdparty/icu4c/common/uenum.cpp new file mode 100644 index 0000000000..11d895ebcd --- /dev/null +++ b/thirdparty/icu4c/common/uenum.cpp @@ -0,0 +1,189 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2002-2012, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: uenum.c +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:2 +* +* created on: 2002jul08 +* created by: Vladimir Weinstein +*/ + +#include "unicode/putil.h" +#include "uenumimp.h" +#include "cmemory.h" + +/* Layout of the baseContext buffer. */ +typedef struct { + int32_t len; /* number of bytes available starting at 'data' */ + char data; /* actual data starts here */ +} _UEnumBuffer; + +/* Extra bytes to allocate in the baseContext buffer. */ +static const int32_t PAD = 8; + +/* Return a pointer to the baseContext buffer, possibly allocating + or reallocating it if at least 'capacity' bytes are not available. */ +static void* _getBuffer(UEnumeration* en, int32_t capacity) { + + if (en->baseContext != NULL) { + if (((_UEnumBuffer*) en->baseContext)->len < capacity) { + capacity += PAD; + en->baseContext = uprv_realloc(en->baseContext, + sizeof(int32_t) + capacity); + if (en->baseContext == NULL) { + return NULL; + } + ((_UEnumBuffer*) en->baseContext)->len = capacity; + } + } else { + capacity += PAD; + en->baseContext = uprv_malloc(sizeof(int32_t) + capacity); + if (en->baseContext == NULL) { + return NULL; + } + ((_UEnumBuffer*) en->baseContext)->len = capacity; + } + + return (void*) & ((_UEnumBuffer*) en->baseContext)->data; +} + +U_CAPI void U_EXPORT2 +uenum_close(UEnumeration* en) +{ + if (en) { + if (en->close != NULL) { + if (en->baseContext) { + uprv_free(en->baseContext); + } + en->close(en); + } else { /* this seems dangerous, but we better kill the object */ + uprv_free(en); + } + } +} + +U_CAPI int32_t U_EXPORT2 +uenum_count(UEnumeration* en, UErrorCode* status) +{ + if (!en || U_FAILURE(*status)) { + return -1; + } + if (en->count != NULL) { + return en->count(en, status); + } else { + *status = U_UNSUPPORTED_ERROR; + return -1; + } +} + +/* Don't call this directly. Only uenum_unext should be calling this. */ +U_CAPI const UChar* U_EXPORT2 +uenum_unextDefault(UEnumeration* en, + int32_t* resultLength, + UErrorCode* status) +{ + UChar *ustr = NULL; + int32_t len = 0; + if (en->next != NULL) { + const char *cstr = en->next(en, &len, status); + if (cstr != NULL) { + ustr = (UChar*) _getBuffer(en, (len+1) * sizeof(UChar)); + if (ustr == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + } else { + u_charsToUChars(cstr, ustr, len+1); + } + } + } else { + *status = U_UNSUPPORTED_ERROR; + } + if (resultLength) { + *resultLength = len; + } + return ustr; +} + +/* Don't call this directly. Only uenum_next should be calling this. */ +U_CAPI const char* U_EXPORT2 +uenum_nextDefault(UEnumeration* en, + int32_t* resultLength, + UErrorCode* status) +{ + if (en->uNext != NULL) { + char *tempCharVal; + const UChar *tempUCharVal = en->uNext(en, resultLength, status); + if (tempUCharVal == NULL) { + return NULL; + } + tempCharVal = (char*) + _getBuffer(en, (*resultLength+1) * sizeof(char)); + if (!tempCharVal) { + *status = U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + u_UCharsToChars(tempUCharVal, tempCharVal, *resultLength + 1); + return tempCharVal; + } else { + *status = U_UNSUPPORTED_ERROR; + return NULL; + } +} + +U_CAPI const UChar* U_EXPORT2 +uenum_unext(UEnumeration* en, + int32_t* resultLength, + UErrorCode* status) +{ + if (!en || U_FAILURE(*status)) { + return NULL; + } + if (en->uNext != NULL) { + return en->uNext(en, resultLength, status); + } else { + *status = U_UNSUPPORTED_ERROR; + return NULL; + } +} + +U_CAPI const char* U_EXPORT2 +uenum_next(UEnumeration* en, + int32_t* resultLength, + UErrorCode* status) +{ + if (!en || U_FAILURE(*status)) { + return NULL; + } + if (en->next != NULL) { + if (resultLength != NULL) { + return en->next(en, resultLength, status); + } + else { + int32_t dummyLength=0; + return en->next(en, &dummyLength, status); + } + } else { + *status = U_UNSUPPORTED_ERROR; + return NULL; + } +} + +U_CAPI void U_EXPORT2 +uenum_reset(UEnumeration* en, UErrorCode* status) +{ + if (!en || U_FAILURE(*status)) { + return; + } + if (en->reset != NULL) { + en->reset(en, status); + } else { + *status = U_UNSUPPORTED_ERROR; + } +} diff --git a/thirdparty/icu4c/common/uenumimp.h b/thirdparty/icu4c/common/uenumimp.h new file mode 100644 index 0000000000..9c9df75ae0 --- /dev/null +++ b/thirdparty/icu4c/common/uenumimp.h @@ -0,0 +1,155 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2002-2006, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: uenumimp.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:2 +* +* created on: 2002jul08 +* created by: Vladimir Weinstein +*/ + +#ifndef __UENUMIMP_H +#define __UENUMIMP_H + +#include "unicode/uenum.h" + +U_CDECL_BEGIN + +/** + * following are the type declarations for + * implementations of APIs. If any of these + * functions are NULL, U_UNSUPPORTED_ERROR + * is returned. If close is NULL, the enumeration + * object is going to be released. + * Initial error checking is done in the body + * of API function, so the implementations + * need not to check the initial error condition. + */ + +/** + * Function type declaration for uenum_close(). + * + * This function should cleanup the enumerator object + * + * @param en enumeration to be closed + */ +typedef void U_CALLCONV +UEnumClose(UEnumeration *en); + +/** + * Function type declaration for uenum_count(). + * + * This function should count the number of elements + * in this enumeration + * + * @param en enumeration to be counted + * @param status pointer to UErrorCode variable + * @return number of elements in enumeration + */ +typedef int32_t U_CALLCONV +UEnumCount(UEnumeration *en, UErrorCode *status); + +/** + * Function type declaration for uenum_unext(). + * + * This function returns the next element as a UChar *, + * or NULL after all elements haven been enumerated. + * + * @param en enumeration + * @param resultLength pointer to result length + * @param status pointer to UErrorCode variable + * @return next element as UChar *, + * or NULL after all elements haven been enumerated + */ +typedef const UChar* U_CALLCONV +UEnumUNext(UEnumeration* en, + int32_t* resultLength, + UErrorCode* status); + +/** + * Function type declaration for uenum_next(). + * + * This function returns the next element as a char *, + * or NULL after all elements haven been enumerated. + * + * @param en enumeration + * @param resultLength pointer to result length + * @param status pointer to UErrorCode variable + * @return next element as char *, + * or NULL after all elements haven been enumerated + */ +typedef const char* U_CALLCONV +UEnumNext(UEnumeration* en, + int32_t* resultLength, + UErrorCode* status); + +/** + * Function type declaration for uenum_reset(). + * + * This function should reset the enumeration + * object + * + * @param en enumeration + * @param status pointer to UErrorCode variable + */ +typedef void U_CALLCONV +UEnumReset(UEnumeration* en, + UErrorCode* status); + + +struct UEnumeration { + /* baseContext. For the base class only. Don't touch! */ + void *baseContext; + + /* context. Use it for what you need */ + void *context; + + /** + * these are functions that will + * be used for APIs + */ + /* called from uenum_close */ + UEnumClose *close; + /* called from uenum_count */ + UEnumCount *count; + /* called from uenum_unext */ + UEnumUNext *uNext; + /* called from uenum_next */ + UEnumNext *next; + /* called from uenum_reset */ + UEnumReset *reset; +}; + +U_CDECL_END + +/* This is the default implementation for uenum_unext(). + * It automatically converts the char * string to UChar *. + * Don't call this directly. This is called internally by uenum_unext + * when a UEnumeration is defined with 'uNext' pointing to this + * function. + */ +U_CAPI const UChar* U_EXPORT2 +uenum_unextDefault(UEnumeration* en, + int32_t* resultLength, + UErrorCode* status); + +/* This is the default implementation for uenum_next(). + * It automatically converts the UChar * string to char *. + * Don't call this directly. This is called internally by uenum_next + * when a UEnumeration is defined with 'next' pointing to this + * function. + */ +U_CAPI const char* U_EXPORT2 +uenum_nextDefault(UEnumeration* en, + int32_t* resultLength, + UErrorCode* status); + +#endif diff --git a/thirdparty/icu4c/common/uhash.cpp b/thirdparty/icu4c/common/uhash.cpp new file mode 100644 index 0000000000..86311ceb0b --- /dev/null +++ b/thirdparty/icu4c/common/uhash.cpp @@ -0,0 +1,991 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* Copyright (C) 1997-2016, International Business Machines +* Corporation and others. All Rights Reserved. +****************************************************************************** +* Date Name Description +* 03/22/00 aliu Adapted from original C++ ICU Hashtable. +* 07/06/01 aliu Modified to support int32_t keys on +* platforms with sizeof(void*) < 32. +****************************************************************************** +*/ + +#include "uhash.h" +#include "unicode/ustring.h" +#include "cstring.h" +#include "cmemory.h" +#include "uassert.h" +#include "ustr_imp.h" + +/* This hashtable is implemented as a double hash. All elements are + * stored in a single array with no secondary storage for collision + * resolution (no linked list, etc.). When there is a hash collision + * (when two unequal keys have the same hashcode) we resolve this by + * using a secondary hash. The secondary hash is an increment + * computed as a hash function (a different one) of the primary + * hashcode. This increment is added to the initial hash value to + * obtain further slots assigned to the same hash code. For this to + * work, the length of the array and the increment must be relatively + * prime. The easiest way to achieve this is to have the length of + * the array be prime, and the increment be any value from + * 1..length-1. + * + * Hashcodes are 32-bit integers. We make sure all hashcodes are + * non-negative by masking off the top bit. This has two effects: (1) + * modulo arithmetic is simplified. If we allowed negative hashcodes, + * then when we computed hashcode % length, we could get a negative + * result, which we would then have to adjust back into range. It's + * simpler to just make hashcodes non-negative. (2) It makes it easy + * to check for empty vs. occupied slots in the table. We just mark + * empty or deleted slots with a negative hashcode. + * + * The central function is _uhash_find(). This function looks for a + * slot matching the given key and hashcode. If one is found, it + * returns a pointer to that slot. If the table is full, and no match + * is found, it returns NULL -- in theory. This would make the code + * more complicated, since all callers of _uhash_find() would then + * have to check for a NULL result. To keep this from happening, we + * don't allow the table to fill. When there is only one + * empty/deleted slot left, uhash_put() will refuse to increase the + * count, and fail. This simplifies the code. In practice, one will + * seldom encounter this using default UHashtables. However, if a + * hashtable is set to a U_FIXED resize policy, or if memory is + * exhausted, then the table may fill. + * + * High and low water ratios control rehashing. They establish levels + * of fullness (from 0 to 1) outside of which the data array is + * reallocated and repopulated. Setting the low water ratio to zero + * means the table will never shrink. Setting the high water ratio to + * one means the table will never grow. The ratios should be + * coordinated with the ratio between successive elements of the + * PRIMES table, so that when the primeIndex is incremented or + * decremented during rehashing, it brings the ratio of count / length + * back into the desired range (between low and high water ratios). + */ + +/******************************************************************** + * PRIVATE Constants, Macros + ********************************************************************/ + +/* This is a list of non-consecutive primes chosen such that + * PRIMES[i+1] ~ 2*PRIMES[i]. (Currently, the ratio ranges from 1.81 + * to 2.18; the inverse ratio ranges from 0.459 to 0.552.) If this + * ratio is changed, the low and high water ratios should also be + * adjusted to suit. + * + * These prime numbers were also chosen so that they are the largest + * prime number while being less than a power of two. + */ +static const int32_t PRIMES[] = { + 7, 13, 31, 61, 127, 251, 509, 1021, 2039, 4093, 8191, 16381, 32749, + 65521, 131071, 262139, 524287, 1048573, 2097143, 4194301, 8388593, + 16777213, 33554393, 67108859, 134217689, 268435399, 536870909, + 1073741789, 2147483647 /*, 4294967291 */ +}; + +#define PRIMES_LENGTH UPRV_LENGTHOF(PRIMES) +#define DEFAULT_PRIME_INDEX 4 + +/* These ratios are tuned to the PRIMES array such that a resize + * places the table back into the zone of non-resizing. That is, + * after a call to _uhash_rehash(), a subsequent call to + * _uhash_rehash() should do nothing (should not churn). This is only + * a potential problem with U_GROW_AND_SHRINK. + */ +static const float RESIZE_POLICY_RATIO_TABLE[6] = { + /* low, high water ratio */ + 0.0F, 0.5F, /* U_GROW: Grow on demand, do not shrink */ + 0.1F, 0.5F, /* U_GROW_AND_SHRINK: Grow and shrink on demand */ + 0.0F, 1.0F /* U_FIXED: Never change size */ +}; + +/* + Invariants for hashcode values: + + * DELETED < 0 + * EMPTY < 0 + * Real hashes >= 0 + + Hashcodes may not start out this way, but internally they are + adjusted so that they are always positive. We assume 32-bit + hashcodes; adjust these constants for other hashcode sizes. +*/ +#define HASH_DELETED ((int32_t) 0x80000000) +#define HASH_EMPTY ((int32_t) HASH_DELETED + 1) + +#define IS_EMPTY_OR_DELETED(x) ((x) < 0) + +/* This macro expects a UHashTok.pointer as its keypointer and + valuepointer parameters */ +#define HASH_DELETE_KEY_VALUE(hash, keypointer, valuepointer) UPRV_BLOCK_MACRO_BEGIN { \ + if (hash->keyDeleter != NULL && keypointer != NULL) { \ + (*hash->keyDeleter)(keypointer); \ + } \ + if (hash->valueDeleter != NULL && valuepointer != NULL) { \ + (*hash->valueDeleter)(valuepointer); \ + } \ +} UPRV_BLOCK_MACRO_END + +/* + * Constants for hinting whether a key or value is an integer + * or a pointer. If a hint bit is zero, then the associated + * token is assumed to be an integer. + */ +#define HINT_KEY_POINTER (1) +#define HINT_VALUE_POINTER (2) + +/******************************************************************** + * PRIVATE Implementation + ********************************************************************/ + +static UHashTok +_uhash_setElement(UHashtable *hash, UHashElement* e, + int32_t hashcode, + UHashTok key, UHashTok value, int8_t hint) { + + UHashTok oldValue = e->value; + if (hash->keyDeleter != NULL && e->key.pointer != NULL && + e->key.pointer != key.pointer) { /* Avoid double deletion */ + (*hash->keyDeleter)(e->key.pointer); + } + if (hash->valueDeleter != NULL) { + if (oldValue.pointer != NULL && + oldValue.pointer != value.pointer) { /* Avoid double deletion */ + (*hash->valueDeleter)(oldValue.pointer); + } + oldValue.pointer = NULL; + } + /* Compilers should copy the UHashTok union correctly, but even if + * they do, memory heap tools (e.g. BoundsChecker) can get + * confused when a pointer is cloaked in a union and then copied. + * TO ALLEVIATE THIS, we use hints (based on what API the user is + * calling) to copy pointers when we know the user thinks + * something is a pointer. */ + if (hint & HINT_KEY_POINTER) { + e->key.pointer = key.pointer; + } else { + e->key = key; + } + if (hint & HINT_VALUE_POINTER) { + e->value.pointer = value.pointer; + } else { + e->value = value; + } + e->hashcode = hashcode; + return oldValue; +} + +/** + * Assumes that the given element is not empty or deleted. + */ +static UHashTok +_uhash_internalRemoveElement(UHashtable *hash, UHashElement* e) { + UHashTok empty; + U_ASSERT(!IS_EMPTY_OR_DELETED(e->hashcode)); + --hash->count; + empty.pointer = NULL; empty.integer = 0; + return _uhash_setElement(hash, e, HASH_DELETED, empty, empty, 0); +} + +static void +_uhash_internalSetResizePolicy(UHashtable *hash, enum UHashResizePolicy policy) { + U_ASSERT(hash != NULL); + U_ASSERT(((int32_t)policy) >= 0); + U_ASSERT(((int32_t)policy) < 3); + hash->lowWaterRatio = RESIZE_POLICY_RATIO_TABLE[policy * 2]; + hash->highWaterRatio = RESIZE_POLICY_RATIO_TABLE[policy * 2 + 1]; +} + +/** + * Allocate internal data array of a size determined by the given + * prime index. If the index is out of range it is pinned into range. + * If the allocation fails the status is set to + * U_MEMORY_ALLOCATION_ERROR and all array storage is freed. In + * either case the previous array pointer is overwritten. + * + * Caller must ensure primeIndex is in range 0..PRIME_LENGTH-1. + */ +static void +_uhash_allocate(UHashtable *hash, + int32_t primeIndex, + UErrorCode *status) { + + UHashElement *p, *limit; + UHashTok emptytok; + + if (U_FAILURE(*status)) return; + + U_ASSERT(primeIndex >= 0 && primeIndex < PRIMES_LENGTH); + + hash->primeIndex = static_cast<int8_t>(primeIndex); + hash->length = PRIMES[primeIndex]; + + p = hash->elements = (UHashElement*) + uprv_malloc(sizeof(UHashElement) * hash->length); + + if (hash->elements == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + return; + } + + emptytok.pointer = NULL; /* Only one of these two is needed */ + emptytok.integer = 0; /* but we don't know which one. */ + + limit = p + hash->length; + while (p < limit) { + p->key = emptytok; + p->value = emptytok; + p->hashcode = HASH_EMPTY; + ++p; + } + + hash->count = 0; + hash->lowWaterMark = (int32_t)(hash->length * hash->lowWaterRatio); + hash->highWaterMark = (int32_t)(hash->length * hash->highWaterRatio); +} + +static UHashtable* +_uhash_init(UHashtable *result, + UHashFunction *keyHash, + UKeyComparator *keyComp, + UValueComparator *valueComp, + int32_t primeIndex, + UErrorCode *status) +{ + if (U_FAILURE(*status)) return NULL; + U_ASSERT(keyHash != NULL); + U_ASSERT(keyComp != NULL); + + result->keyHasher = keyHash; + result->keyComparator = keyComp; + result->valueComparator = valueComp; + result->keyDeleter = NULL; + result->valueDeleter = NULL; + result->allocated = FALSE; + _uhash_internalSetResizePolicy(result, U_GROW); + + _uhash_allocate(result, primeIndex, status); + + if (U_FAILURE(*status)) { + return NULL; + } + + return result; +} + +static UHashtable* +_uhash_create(UHashFunction *keyHash, + UKeyComparator *keyComp, + UValueComparator *valueComp, + int32_t primeIndex, + UErrorCode *status) { + UHashtable *result; + + if (U_FAILURE(*status)) return NULL; + + result = (UHashtable*) uprv_malloc(sizeof(UHashtable)); + if (result == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + + _uhash_init(result, keyHash, keyComp, valueComp, primeIndex, status); + result->allocated = TRUE; + + if (U_FAILURE(*status)) { + uprv_free(result); + return NULL; + } + + return result; +} + +/** + * Look for a key in the table, or if no such key exists, the first + * empty slot matching the given hashcode. Keys are compared using + * the keyComparator function. + * + * First find the start position, which is the hashcode modulo + * the length. Test it to see if it is: + * + * a. identical: First check the hash values for a quick check, + * then compare keys for equality using keyComparator. + * b. deleted + * c. empty + * + * Stop if it is identical or empty, otherwise continue by adding a + * "jump" value (moduloing by the length again to keep it within + * range) and retesting. For efficiency, there need enough empty + * values so that the searchs stop within a reasonable amount of time. + * This can be changed by changing the high/low water marks. + * + * In theory, this function can return NULL, if it is full (no empty + * or deleted slots) and if no matching key is found. In practice, we + * prevent this elsewhere (in uhash_put) by making sure the last slot + * in the table is never filled. + * + * The size of the table should be prime for this algorithm to work; + * otherwise we are not guaranteed that the jump value (the secondary + * hash) is relatively prime to the table length. + */ +static UHashElement* +_uhash_find(const UHashtable *hash, UHashTok key, + int32_t hashcode) { + + int32_t firstDeleted = -1; /* assume invalid index */ + int32_t theIndex, startIndex; + int32_t jump = 0; /* lazy evaluate */ + int32_t tableHash; + UHashElement *elements = hash->elements; + + hashcode &= 0x7FFFFFFF; /* must be positive */ + startIndex = theIndex = (hashcode ^ 0x4000000) % hash->length; + + do { + tableHash = elements[theIndex].hashcode; + if (tableHash == hashcode) { /* quick check */ + if ((*hash->keyComparator)(key, elements[theIndex].key)) { + return &(elements[theIndex]); + } + } else if (!IS_EMPTY_OR_DELETED(tableHash)) { + /* We have hit a slot which contains a key-value pair, + * but for which the hash code does not match. Keep + * looking. + */ + } else if (tableHash == HASH_EMPTY) { /* empty, end o' the line */ + break; + } else if (firstDeleted < 0) { /* remember first deleted */ + firstDeleted = theIndex; + } + if (jump == 0) { /* lazy compute jump */ + /* The jump value must be relatively prime to the table + * length. As long as the length is prime, then any value + * 1..length-1 will be relatively prime to it. + */ + jump = (hashcode % (hash->length - 1)) + 1; + } + theIndex = (theIndex + jump) % hash->length; + } while (theIndex != startIndex); + + if (firstDeleted >= 0) { + theIndex = firstDeleted; /* reset if had deleted slot */ + } else if (tableHash != HASH_EMPTY) { + /* We get to this point if the hashtable is full (no empty or + * deleted slots), and we've failed to find a match. THIS + * WILL NEVER HAPPEN as long as uhash_put() makes sure that + * count is always < length. + */ + UPRV_UNREACHABLE; + } + return &(elements[theIndex]); +} + +/** + * Attempt to grow or shrink the data arrays in order to make the + * count fit between the high and low water marks. hash_put() and + * hash_remove() call this method when the count exceeds the high or + * low water marks. This method may do nothing, if memory allocation + * fails, or if the count is already in range, or if the length is + * already at the low or high limit. In any case, upon return the + * arrays will be valid. + */ +static void +_uhash_rehash(UHashtable *hash, UErrorCode *status) { + + UHashElement *old = hash->elements; + int32_t oldLength = hash->length; + int32_t newPrimeIndex = hash->primeIndex; + int32_t i; + + if (hash->count > hash->highWaterMark) { + if (++newPrimeIndex >= PRIMES_LENGTH) { + return; + } + } else if (hash->count < hash->lowWaterMark) { + if (--newPrimeIndex < 0) { + return; + } + } else { + return; + } + + _uhash_allocate(hash, newPrimeIndex, status); + + if (U_FAILURE(*status)) { + hash->elements = old; + hash->length = oldLength; + return; + } + + for (i = oldLength - 1; i >= 0; --i) { + if (!IS_EMPTY_OR_DELETED(old[i].hashcode)) { + UHashElement *e = _uhash_find(hash, old[i].key, old[i].hashcode); + U_ASSERT(e != NULL); + U_ASSERT(e->hashcode == HASH_EMPTY); + e->key = old[i].key; + e->value = old[i].value; + e->hashcode = old[i].hashcode; + ++hash->count; + } + } + + uprv_free(old); +} + +static UHashTok +_uhash_remove(UHashtable *hash, + UHashTok key) { + /* First find the position of the key in the table. If the object + * has not been removed already, remove it. If the user wanted + * keys deleted, then delete it also. We have to put a special + * hashcode in that position that means that something has been + * deleted, since when we do a find, we have to continue PAST any + * deleted values. + */ + UHashTok result; + UHashElement* e = _uhash_find(hash, key, hash->keyHasher(key)); + U_ASSERT(e != NULL); + result.pointer = NULL; + result.integer = 0; + if (!IS_EMPTY_OR_DELETED(e->hashcode)) { + result = _uhash_internalRemoveElement(hash, e); + if (hash->count < hash->lowWaterMark) { + UErrorCode status = U_ZERO_ERROR; + _uhash_rehash(hash, &status); + } + } + return result; +} + +static UHashTok +_uhash_put(UHashtable *hash, + UHashTok key, + UHashTok value, + int8_t hint, + UErrorCode *status) { + + /* Put finds the position in the table for the new value. If the + * key is already in the table, it is deleted, if there is a + * non-NULL keyDeleter. Then the key, the hash and the value are + * all put at the position in their respective arrays. + */ + int32_t hashcode; + UHashElement* e; + UHashTok emptytok; + + if (U_FAILURE(*status)) { + goto err; + } + U_ASSERT(hash != NULL); + /* Cannot always check pointer here or iSeries sees NULL every time. */ + if ((hint & HINT_VALUE_POINTER) && value.pointer == NULL) { + /* Disallow storage of NULL values, since NULL is returned by + * get() to indicate an absent key. Storing NULL == removing. + */ + return _uhash_remove(hash, key); + } + if (hash->count > hash->highWaterMark) { + _uhash_rehash(hash, status); + if (U_FAILURE(*status)) { + goto err; + } + } + + hashcode = (*hash->keyHasher)(key); + e = _uhash_find(hash, key, hashcode); + U_ASSERT(e != NULL); + + if (IS_EMPTY_OR_DELETED(e->hashcode)) { + /* Important: We must never actually fill the table up. If we + * do so, then _uhash_find() will return NULL, and we'll have + * to check for NULL after every call to _uhash_find(). To + * avoid this we make sure there is always at least one empty + * or deleted slot in the table. This only is a problem if we + * are out of memory and rehash isn't working. + */ + ++hash->count; + if (hash->count == hash->length) { + /* Don't allow count to reach length */ + --hash->count; + *status = U_MEMORY_ALLOCATION_ERROR; + goto err; + } + } + + /* We must in all cases handle storage properly. If there was an + * old key, then it must be deleted (if the deleter != NULL). + * Make hashcodes stored in table positive. + */ + return _uhash_setElement(hash, e, hashcode & 0x7FFFFFFF, key, value, hint); + + err: + /* If the deleters are non-NULL, this method adopts its key and/or + * value arguments, and we must be sure to delete the key and/or + * value in all cases, even upon failure. + */ + HASH_DELETE_KEY_VALUE(hash, key.pointer, value.pointer); + emptytok.pointer = NULL; emptytok.integer = 0; + return emptytok; +} + + +/******************************************************************** + * PUBLIC API + ********************************************************************/ + +U_CAPI UHashtable* U_EXPORT2 +uhash_open(UHashFunction *keyHash, + UKeyComparator *keyComp, + UValueComparator *valueComp, + UErrorCode *status) { + + return _uhash_create(keyHash, keyComp, valueComp, DEFAULT_PRIME_INDEX, status); +} + +U_CAPI UHashtable* U_EXPORT2 +uhash_openSize(UHashFunction *keyHash, + UKeyComparator *keyComp, + UValueComparator *valueComp, + int32_t size, + UErrorCode *status) { + + /* Find the smallest index i for which PRIMES[i] >= size. */ + int32_t i = 0; + while (i<(PRIMES_LENGTH-1) && PRIMES[i]<size) { + ++i; + } + + return _uhash_create(keyHash, keyComp, valueComp, i, status); +} + +U_CAPI UHashtable* U_EXPORT2 +uhash_init(UHashtable *fillinResult, + UHashFunction *keyHash, + UKeyComparator *keyComp, + UValueComparator *valueComp, + UErrorCode *status) { + + return _uhash_init(fillinResult, keyHash, keyComp, valueComp, DEFAULT_PRIME_INDEX, status); +} + +U_CAPI UHashtable* U_EXPORT2 +uhash_initSize(UHashtable *fillinResult, + UHashFunction *keyHash, + UKeyComparator *keyComp, + UValueComparator *valueComp, + int32_t size, + UErrorCode *status) { + + // Find the smallest index i for which PRIMES[i] >= size. + int32_t i = 0; + while (i<(PRIMES_LENGTH-1) && PRIMES[i]<size) { + ++i; + } + return _uhash_init(fillinResult, keyHash, keyComp, valueComp, i, status); +} + +U_CAPI void U_EXPORT2 +uhash_close(UHashtable *hash) { + if (hash == NULL) { + return; + } + if (hash->elements != NULL) { + if (hash->keyDeleter != NULL || hash->valueDeleter != NULL) { + int32_t pos=UHASH_FIRST; + UHashElement *e; + while ((e = (UHashElement*) uhash_nextElement(hash, &pos)) != NULL) { + HASH_DELETE_KEY_VALUE(hash, e->key.pointer, e->value.pointer); + } + } + uprv_free(hash->elements); + hash->elements = NULL; + } + if (hash->allocated) { + uprv_free(hash); + } +} + +U_CAPI UHashFunction *U_EXPORT2 +uhash_setKeyHasher(UHashtable *hash, UHashFunction *fn) { + UHashFunction *result = hash->keyHasher; + hash->keyHasher = fn; + return result; +} + +U_CAPI UKeyComparator *U_EXPORT2 +uhash_setKeyComparator(UHashtable *hash, UKeyComparator *fn) { + UKeyComparator *result = hash->keyComparator; + hash->keyComparator = fn; + return result; +} +U_CAPI UValueComparator *U_EXPORT2 +uhash_setValueComparator(UHashtable *hash, UValueComparator *fn){ + UValueComparator *result = hash->valueComparator; + hash->valueComparator = fn; + return result; +} + +U_CAPI UObjectDeleter *U_EXPORT2 +uhash_setKeyDeleter(UHashtable *hash, UObjectDeleter *fn) { + UObjectDeleter *result = hash->keyDeleter; + hash->keyDeleter = fn; + return result; +} + +U_CAPI UObjectDeleter *U_EXPORT2 +uhash_setValueDeleter(UHashtable *hash, UObjectDeleter *fn) { + UObjectDeleter *result = hash->valueDeleter; + hash->valueDeleter = fn; + return result; +} + +U_CAPI void U_EXPORT2 +uhash_setResizePolicy(UHashtable *hash, enum UHashResizePolicy policy) { + UErrorCode status = U_ZERO_ERROR; + _uhash_internalSetResizePolicy(hash, policy); + hash->lowWaterMark = (int32_t)(hash->length * hash->lowWaterRatio); + hash->highWaterMark = (int32_t)(hash->length * hash->highWaterRatio); + _uhash_rehash(hash, &status); +} + +U_CAPI int32_t U_EXPORT2 +uhash_count(const UHashtable *hash) { + return hash->count; +} + +U_CAPI void* U_EXPORT2 +uhash_get(const UHashtable *hash, + const void* key) { + UHashTok keyholder; + keyholder.pointer = (void*) key; + return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.pointer; +} + +U_CAPI void* U_EXPORT2 +uhash_iget(const UHashtable *hash, + int32_t key) { + UHashTok keyholder; + keyholder.integer = key; + return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.pointer; +} + +U_CAPI int32_t U_EXPORT2 +uhash_geti(const UHashtable *hash, + const void* key) { + UHashTok keyholder; + keyholder.pointer = (void*) key; + return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.integer; +} + +U_CAPI int32_t U_EXPORT2 +uhash_igeti(const UHashtable *hash, + int32_t key) { + UHashTok keyholder; + keyholder.integer = key; + return _uhash_find(hash, keyholder, hash->keyHasher(keyholder))->value.integer; +} + +U_CAPI void* U_EXPORT2 +uhash_put(UHashtable *hash, + void* key, + void* value, + UErrorCode *status) { + UHashTok keyholder, valueholder; + keyholder.pointer = key; + valueholder.pointer = value; + return _uhash_put(hash, keyholder, valueholder, + HINT_KEY_POINTER | HINT_VALUE_POINTER, + status).pointer; +} + +U_CAPI void* U_EXPORT2 +uhash_iput(UHashtable *hash, + int32_t key, + void* value, + UErrorCode *status) { + UHashTok keyholder, valueholder; + keyholder.integer = key; + valueholder.pointer = value; + return _uhash_put(hash, keyholder, valueholder, + HINT_VALUE_POINTER, + status).pointer; +} + +U_CAPI int32_t U_EXPORT2 +uhash_puti(UHashtable *hash, + void* key, + int32_t value, + UErrorCode *status) { + UHashTok keyholder, valueholder; + keyholder.pointer = key; + valueholder.integer = value; + return _uhash_put(hash, keyholder, valueholder, + HINT_KEY_POINTER, + status).integer; +} + + +U_CAPI int32_t U_EXPORT2 +uhash_iputi(UHashtable *hash, + int32_t key, + int32_t value, + UErrorCode *status) { + UHashTok keyholder, valueholder; + keyholder.integer = key; + valueholder.integer = value; + return _uhash_put(hash, keyholder, valueholder, + 0, /* neither is a ptr */ + status).integer; +} + +U_CAPI void* U_EXPORT2 +uhash_remove(UHashtable *hash, + const void* key) { + UHashTok keyholder; + keyholder.pointer = (void*) key; + return _uhash_remove(hash, keyholder).pointer; +} + +U_CAPI void* U_EXPORT2 +uhash_iremove(UHashtable *hash, + int32_t key) { + UHashTok keyholder; + keyholder.integer = key; + return _uhash_remove(hash, keyholder).pointer; +} + +U_CAPI int32_t U_EXPORT2 +uhash_removei(UHashtable *hash, + const void* key) { + UHashTok keyholder; + keyholder.pointer = (void*) key; + return _uhash_remove(hash, keyholder).integer; +} + +U_CAPI int32_t U_EXPORT2 +uhash_iremovei(UHashtable *hash, + int32_t key) { + UHashTok keyholder; + keyholder.integer = key; + return _uhash_remove(hash, keyholder).integer; +} + +U_CAPI void U_EXPORT2 +uhash_removeAll(UHashtable *hash) { + int32_t pos = UHASH_FIRST; + const UHashElement *e; + U_ASSERT(hash != NULL); + if (hash->count != 0) { + while ((e = uhash_nextElement(hash, &pos)) != NULL) { + uhash_removeElement(hash, e); + } + } + U_ASSERT(hash->count == 0); +} + +U_CAPI const UHashElement* U_EXPORT2 +uhash_find(const UHashtable *hash, const void* key) { + UHashTok keyholder; + const UHashElement *e; + keyholder.pointer = (void*) key; + e = _uhash_find(hash, keyholder, hash->keyHasher(keyholder)); + return IS_EMPTY_OR_DELETED(e->hashcode) ? NULL : e; +} + +U_CAPI const UHashElement* U_EXPORT2 +uhash_nextElement(const UHashtable *hash, int32_t *pos) { + /* Walk through the array until we find an element that is not + * EMPTY and not DELETED. + */ + int32_t i; + U_ASSERT(hash != NULL); + for (i = *pos + 1; i < hash->length; ++i) { + if (!IS_EMPTY_OR_DELETED(hash->elements[i].hashcode)) { + *pos = i; + return &(hash->elements[i]); + } + } + + /* No more elements */ + return NULL; +} + +U_CAPI void* U_EXPORT2 +uhash_removeElement(UHashtable *hash, const UHashElement* e) { + U_ASSERT(hash != NULL); + U_ASSERT(e != NULL); + if (!IS_EMPTY_OR_DELETED(e->hashcode)) { + UHashElement *nce = (UHashElement *)e; + return _uhash_internalRemoveElement(hash, nce).pointer; + } + return NULL; +} + +/******************************************************************** + * UHashTok convenience + ********************************************************************/ + +/** + * Return a UHashTok for an integer. + */ +/*U_CAPI UHashTok U_EXPORT2 +uhash_toki(int32_t i) { + UHashTok tok; + tok.integer = i; + return tok; +}*/ + +/** + * Return a UHashTok for a pointer. + */ +/*U_CAPI UHashTok U_EXPORT2 +uhash_tokp(void* p) { + UHashTok tok; + tok.pointer = p; + return tok; +}*/ + +/******************************************************************** + * PUBLIC Key Hash Functions + ********************************************************************/ + +U_CAPI int32_t U_EXPORT2 +uhash_hashUChars(const UHashTok key) { + const UChar *s = (const UChar *)key.pointer; + return s == NULL ? 0 : ustr_hashUCharsN(s, u_strlen(s)); +} + +U_CAPI int32_t U_EXPORT2 +uhash_hashChars(const UHashTok key) { + const char *s = (const char *)key.pointer; + return s == NULL ? 0 : static_cast<int32_t>(ustr_hashCharsN(s, static_cast<int32_t>(uprv_strlen(s)))); +} + +U_CAPI int32_t U_EXPORT2 +uhash_hashIChars(const UHashTok key) { + const char *s = (const char *)key.pointer; + return s == NULL ? 0 : ustr_hashICharsN(s, static_cast<int32_t>(uprv_strlen(s))); +} + +U_CAPI UBool U_EXPORT2 +uhash_equals(const UHashtable* hash1, const UHashtable* hash2){ + int32_t count1, count2, pos, i; + + if(hash1==hash2){ + return TRUE; + } + + /* + * Make sure that we are comparing 2 valid hashes of the same type + * with valid comparison functions. + * Without valid comparison functions, a binary comparison + * of the hash values will yield random results on machines + * with 64-bit pointers and 32-bit integer hashes. + * A valueComparator is normally optional. + */ + if (hash1==NULL || hash2==NULL || + hash1->keyComparator != hash2->keyComparator || + hash1->valueComparator != hash2->valueComparator || + hash1->valueComparator == NULL) + { + /* + Normally we would return an error here about incompatible hash tables, + but we return FALSE instead. + */ + return FALSE; + } + + count1 = uhash_count(hash1); + count2 = uhash_count(hash2); + if(count1!=count2){ + return FALSE; + } + + pos=UHASH_FIRST; + for(i=0; i<count1; i++){ + const UHashElement* elem1 = uhash_nextElement(hash1, &pos); + const UHashTok key1 = elem1->key; + const UHashTok val1 = elem1->value; + /* here the keys are not compared, instead the key form hash1 is used to fetch + * value from hash2. If the hashes are equal then then both hashes should + * contain equal values for the same key! + */ + const UHashElement* elem2 = _uhash_find(hash2, key1, hash2->keyHasher(key1)); + const UHashTok val2 = elem2->value; + if(hash1->valueComparator(val1, val2)==FALSE){ + return FALSE; + } + } + return TRUE; +} + +/******************************************************************** + * PUBLIC Comparator Functions + ********************************************************************/ + +U_CAPI UBool U_EXPORT2 +uhash_compareUChars(const UHashTok key1, const UHashTok key2) { + const UChar *p1 = (const UChar*) key1.pointer; + const UChar *p2 = (const UChar*) key2.pointer; + if (p1 == p2) { + return TRUE; + } + if (p1 == NULL || p2 == NULL) { + return FALSE; + } + while (*p1 != 0 && *p1 == *p2) { + ++p1; + ++p2; + } + return (UBool)(*p1 == *p2); +} + +U_CAPI UBool U_EXPORT2 +uhash_compareChars(const UHashTok key1, const UHashTok key2) { + const char *p1 = (const char*) key1.pointer; + const char *p2 = (const char*) key2.pointer; + if (p1 == p2) { + return TRUE; + } + if (p1 == NULL || p2 == NULL) { + return FALSE; + } + while (*p1 != 0 && *p1 == *p2) { + ++p1; + ++p2; + } + return (UBool)(*p1 == *p2); +} + +U_CAPI UBool U_EXPORT2 +uhash_compareIChars(const UHashTok key1, const UHashTok key2) { + const char *p1 = (const char*) key1.pointer; + const char *p2 = (const char*) key2.pointer; + if (p1 == p2) { + return TRUE; + } + if (p1 == NULL || p2 == NULL) { + return FALSE; + } + while (*p1 != 0 && uprv_tolower(*p1) == uprv_tolower(*p2)) { + ++p1; + ++p2; + } + return (UBool)(*p1 == *p2); +} + +/******************************************************************** + * PUBLIC int32_t Support Functions + ********************************************************************/ + +U_CAPI int32_t U_EXPORT2 +uhash_hashLong(const UHashTok key) { + return key.integer; +} + +U_CAPI UBool U_EXPORT2 +uhash_compareLong(const UHashTok key1, const UHashTok key2) { + return (UBool)(key1.integer == key2.integer); +} diff --git a/thirdparty/icu4c/common/uhash.h b/thirdparty/icu4c/common/uhash.h new file mode 100644 index 0000000000..b59d2711bb --- /dev/null +++ b/thirdparty/icu4c/common/uhash.h @@ -0,0 +1,718 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* Copyright (C) 1997-2015, International Business Machines +* Corporation and others. All Rights Reserved. +****************************************************************************** +* Date Name Description +* 03/22/00 aliu Adapted from original C++ ICU Hashtable. +* 07/06/01 aliu Modified to support int32_t keys on +* platforms with sizeof(void*) < 32. +****************************************************************************** +*/ + +#ifndef UHASH_H +#define UHASH_H + +#include "unicode/utypes.h" +#include "cmemory.h" +#include "uelement.h" +#include "unicode/localpointer.h" + +/** + * UHashtable stores key-value pairs and does moderately fast lookup + * based on keys. It provides a good tradeoff between access time and + * storage space. As elements are added to it, it grows to accomodate + * them. By default, the table never shrinks, even if all elements + * are removed from it. + * + * Keys and values are stored as void* pointers. These void* pointers + * may be actual pointers to strings, objects, or any other structure + * in memory, or they may simply be integral values cast to void*. + * UHashtable doesn't care and manipulates them via user-supplied + * functions. These functions hash keys, compare keys, delete keys, + * and delete values. Some function pointers are optional (may be + * NULL); others must be supplied. Several prebuilt functions exist + * to handle common key types. + * + * UHashtable ownership of keys and values is flexible, and controlled + * by whether or not the key deleter and value deleter functions are + * set. If a void* key is actually a pointer to a deletable object, + * then UHashtable can be made to delete that object by setting the + * key deleter function pointer to a non-NULL value. If this is done, + * then keys passed to uhash_put() are owned by the hashtable and will + * be deleted by it at some point, either as keys are replaced, or + * when uhash_close() is finally called. The same is true of values + * and the value deleter function pointer. Keys passed to methods + * other than uhash_put() are never owned by the hashtable. + * + * NULL values are not allowed. uhash_get() returns NULL to indicate + * a key that is not in the table, and having a NULL value in the + * table would generate an ambiguous result. If a key and a NULL + * value is passed to uhash_put(), this has the effect of doing a + * uhash_remove() on that key. This keeps uhash_get(), uhash_count(), + * and uhash_nextElement() consistent with one another. + * + * To see everything in a hashtable, use uhash_nextElement() to + * iterate through its contents. Each call to this function returns a + * UHashElement pointer. A hash element contains a key, value, and + * hashcode. During iteration an element may be deleted by calling + * uhash_removeElement(); iteration may safely continue thereafter. + * The uhash_remove() function may also be safely called in + * mid-iteration. If uhash_put() is called during iteration, + * the iteration is still guaranteed to terminate reasonably, but + * there is no guarantee that every element will be returned or that + * some won't be returned more than once. + * + * Under no circumstances should the UHashElement returned by + * uhash_nextElement be modified directly. + * + * By default, the hashtable grows when necessary, but never shrinks, + * even if all items are removed. For most applications this is + * optimal. However, in a highly dynamic usage where memory is at a + * premium, the table can be set to both grow and shrink by calling + * uhash_setResizePolicy() with the policy U_GROW_AND_SHRINK. In a + * situation where memory is critical and the client wants a table + * that does not grow at all, the constant U_FIXED can be used. + */ + +/******************************************************************** + * Data Structures + ********************************************************************/ + +U_CDECL_BEGIN + +/** + * A key or value within a UHashtable. + * The hashing and comparison functions take a pointer to a + * UHashTok, but the deleter receives the void* pointer within it. + */ +typedef UElement UHashTok; + +/** + * This is a single hash element. + */ +struct UHashElement { + /* Reorder these elements to pack nicely if necessary */ + int32_t hashcode; + UHashTok value; + UHashTok key; +}; +typedef struct UHashElement UHashElement; + +/** + * A hashing function. + * @param key A key stored in a hashtable + * @return A NON-NEGATIVE hash code for parm. + */ +typedef int32_t U_CALLCONV UHashFunction(const UHashTok key); + +/** + * A key equality (boolean) comparison function. + */ +typedef UElementsAreEqual UKeyComparator; + +/** + * A value equality (boolean) comparison function. + */ +typedef UElementsAreEqual UValueComparator; + +/* see cmemory.h for UObjectDeleter and uprv_deleteUObject() */ + +/** + * This specifies whether or not, and how, the hastable resizes itself. + * See uhash_setResizePolicy(). + */ +enum UHashResizePolicy { + U_GROW, /* Grow on demand, do not shrink */ + U_GROW_AND_SHRINK, /* Grow and shrink on demand */ + U_FIXED /* Never change size */ +}; + +/** + * The UHashtable struct. Clients should treat this as an opaque data + * type and manipulate it only through the uhash_... API. + */ +struct UHashtable { + + /* Main key-value pair storage array */ + + UHashElement *elements; + + /* Function pointers */ + + UHashFunction *keyHasher; /* Computes hash from key. + * Never null. */ + UKeyComparator *keyComparator; /* Compares keys for equality. + * Never null. */ + UValueComparator *valueComparator; /* Compares the values for equality */ + + UObjectDeleter *keyDeleter; /* Deletes keys when required. + * If NULL won't do anything */ + UObjectDeleter *valueDeleter; /* Deletes values when required. + * If NULL won't do anything */ + + /* Size parameters */ + + int32_t count; /* The number of key-value pairs in this table. + * 0 <= count <= length. In practice we + * never let count == length (see code). */ + int32_t length; /* The physical size of the arrays hashes, keys + * and values. Must be prime. */ + + /* Rehashing thresholds */ + + int32_t highWaterMark; /* If count > highWaterMark, rehash */ + int32_t lowWaterMark; /* If count < lowWaterMark, rehash */ + float highWaterRatio; /* 0..1; high water as a fraction of length */ + float lowWaterRatio; /* 0..1; low water as a fraction of length */ + + int8_t primeIndex; /* Index into our prime table for length. + * length == PRIMES[primeIndex] */ + UBool allocated; /* Was this UHashtable allocated? */ +}; +typedef struct UHashtable UHashtable; + +U_CDECL_END + +/******************************************************************** + * API + ********************************************************************/ + +/** + * Initialize a new UHashtable. + * @param keyHash A pointer to the key hashing function. Must not be + * NULL. + * @param keyComp A pointer to the function that compares keys. Must + * not be NULL. + * @param status A pointer to an UErrorCode to receive any errors. + * @return A pointer to a UHashtable, or 0 if an error occurred. + * @see uhash_openSize + */ +U_CAPI UHashtable* U_EXPORT2 +uhash_open(UHashFunction *keyHash, + UKeyComparator *keyComp, + UValueComparator *valueComp, + UErrorCode *status); + +/** + * Initialize a new UHashtable with a given initial size. + * @param keyHash A pointer to the key hashing function. Must not be + * NULL. + * @param keyComp A pointer to the function that compares keys. Must + * not be NULL. + * @param size The initial capacity of this hash table. + * @param status A pointer to an UErrorCode to receive any errors. + * @return A pointer to a UHashtable, or 0 if an error occurred. + * @see uhash_open + */ +U_CAPI UHashtable* U_EXPORT2 +uhash_openSize(UHashFunction *keyHash, + UKeyComparator *keyComp, + UValueComparator *valueComp, + int32_t size, + UErrorCode *status); + +/** + * Initialize an existing UHashtable. + * @param keyHash A pointer to the key hashing function. Must not be + * NULL. + * @param keyComp A pointer to the function that compares keys. Must + * not be NULL. + * @param status A pointer to an UErrorCode to receive any errors. + * @return A pointer to a UHashtable, or 0 if an error occurred. + * @see uhash_openSize + */ +U_CAPI UHashtable* U_EXPORT2 +uhash_init(UHashtable *hash, + UHashFunction *keyHash, + UKeyComparator *keyComp, + UValueComparator *valueComp, + UErrorCode *status); + +/** + * Initialize an existing UHashtable. + * @param keyHash A pointer to the key hashing function. Must not be + * NULL. + * @param keyComp A pointer to the function that compares keys. Must + * not be NULL. + * @param size The initial capacity of this hash table. + * @param status A pointer to an UErrorCode to receive any errors. + * @return A pointer to a UHashtable, or 0 if an error occurred. + * @see uhash_openSize + */ +U_CAPI UHashtable* U_EXPORT2 +uhash_initSize(UHashtable *hash, + UHashFunction *keyHash, + UKeyComparator *keyComp, + UValueComparator *valueComp, + int32_t size, + UErrorCode *status); + +/** + * Close a UHashtable, releasing the memory used. + * @param hash The UHashtable to close. If hash is NULL no operation is performed. + */ +U_CAPI void U_EXPORT2 +uhash_close(UHashtable *hash); + + + +/** + * Set the function used to hash keys. + * @param hash The UHashtable to set + * @param fn the function to be used hash keys; must not be NULL + * @return the previous key hasher; non-NULL + */ +U_CAPI UHashFunction *U_EXPORT2 +uhash_setKeyHasher(UHashtable *hash, UHashFunction *fn); + +/** + * Set the function used to compare keys. The default comparison is a + * void* pointer comparison. + * @param hash The UHashtable to set + * @param fn the function to be used compare keys; must not be NULL + * @return the previous key comparator; non-NULL + */ +U_CAPI UKeyComparator *U_EXPORT2 +uhash_setKeyComparator(UHashtable *hash, UKeyComparator *fn); + +/** + * Set the function used to compare values. The default comparison is a + * void* pointer comparison. + * @param hash The UHashtable to set + * @param fn the function to be used compare keys; must not be NULL + * @return the previous key comparator; non-NULL + */ +U_CAPI UValueComparator *U_EXPORT2 +uhash_setValueComparator(UHashtable *hash, UValueComparator *fn); + +/** + * Set the function used to delete keys. If this function pointer is + * NULL, this hashtable does not delete keys. If it is non-NULL, this + * hashtable does delete keys. This function should be set once + * before any elements are added to the hashtable and should not be + * changed thereafter. + * @param hash The UHashtable to set + * @param fn the function to be used delete keys, or NULL + * @return the previous key deleter; may be NULL + */ +U_CAPI UObjectDeleter *U_EXPORT2 +uhash_setKeyDeleter(UHashtable *hash, UObjectDeleter *fn); + +/** + * Set the function used to delete values. If this function pointer + * is NULL, this hashtable does not delete values. If it is non-NULL, + * this hashtable does delete values. This function should be set + * once before any elements are added to the hashtable and should not + * be changed thereafter. + * @param hash The UHashtable to set + * @param fn the function to be used delete values, or NULL + * @return the previous value deleter; may be NULL + */ +U_CAPI UObjectDeleter *U_EXPORT2 +uhash_setValueDeleter(UHashtable *hash, UObjectDeleter *fn); + +/** + * Specify whether or not, and how, the hastable resizes itself. + * By default, tables grow but do not shrink (policy U_GROW). + * See enum UHashResizePolicy. + * @param hash The UHashtable to set + * @param policy The way the hashtable resizes itself, {U_GROW, U_GROW_AND_SHRINK, U_FIXED} + */ +U_CAPI void U_EXPORT2 +uhash_setResizePolicy(UHashtable *hash, enum UHashResizePolicy policy); + +/** + * Get the number of key-value pairs stored in a UHashtable. + * @param hash The UHashtable to query. + * @return The number of key-value pairs stored in hash. + */ +U_CAPI int32_t U_EXPORT2 +uhash_count(const UHashtable *hash); + +/** + * Put a (key=pointer, value=pointer) item in a UHashtable. If the + * keyDeleter is non-NULL, then the hashtable owns 'key' after this + * call. If the valueDeleter is non-NULL, then the hashtable owns + * 'value' after this call. Storing a NULL value is the same as + * calling uhash_remove(). + * @param hash The target UHashtable. + * @param key The key to store. + * @param value The value to store, may be NULL (see above). + * @param status A pointer to an UErrorCode to receive any errors. + * @return The previous value, or NULL if none. + * @see uhash_get + */ +U_CAPI void* U_EXPORT2 +uhash_put(UHashtable *hash, + void *key, + void *value, + UErrorCode *status); + +/** + * Put a (key=integer, value=pointer) item in a UHashtable. + * keyDeleter must be NULL. If the valueDeleter is non-NULL, then the + * hashtable owns 'value' after this call. Storing a NULL value is + * the same as calling uhash_remove(). + * @param hash The target UHashtable. + * @param key The integer key to store. + * @param value The value to store, may be NULL (see above). + * @param status A pointer to an UErrorCode to receive any errors. + * @return The previous value, or NULL if none. + * @see uhash_get + */ +U_CAPI void* U_EXPORT2 +uhash_iput(UHashtable *hash, + int32_t key, + void* value, + UErrorCode *status); + +/** + * Put a (key=pointer, value=integer) item in a UHashtable. If the + * keyDeleter is non-NULL, then the hashtable owns 'key' after this + * call. valueDeleter must be NULL. Storing a 0 value is the same as + * calling uhash_remove(). + * @param hash The target UHashtable. + * @param key The key to store. + * @param value The integer value to store. + * @param status A pointer to an UErrorCode to receive any errors. + * @return The previous value, or 0 if none. + * @see uhash_get + */ +U_CAPI int32_t U_EXPORT2 +uhash_puti(UHashtable *hash, + void* key, + int32_t value, + UErrorCode *status); + +/** + * Put a (key=integer, value=integer) item in a UHashtable. If the + * keyDeleter is non-NULL, then the hashtable owns 'key' after this + * call. valueDeleter must be NULL. Storing a 0 value is the same as + * calling uhash_remove(). + * @param hash The target UHashtable. + * @param key The key to store. + * @param value The integer value to store. + * @param status A pointer to an UErrorCode to receive any errors. + * @return The previous value, or 0 if none. + * @see uhash_get + */ +U_CAPI int32_t U_EXPORT2 +uhash_iputi(UHashtable *hash, + int32_t key, + int32_t value, + UErrorCode *status); + +/** + * Retrieve a pointer value from a UHashtable using a pointer key, + * as previously stored by uhash_put(). + * @param hash The target UHashtable. + * @param key A pointer key stored in a hashtable + * @return The requested item, or NULL if not found. + */ +U_CAPI void* U_EXPORT2 +uhash_get(const UHashtable *hash, + const void *key); + +/** + * Retrieve a pointer value from a UHashtable using a integer key, + * as previously stored by uhash_iput(). + * @param hash The target UHashtable. + * @param key An integer key stored in a hashtable + * @return The requested item, or NULL if not found. + */ +U_CAPI void* U_EXPORT2 +uhash_iget(const UHashtable *hash, + int32_t key); + +/** + * Retrieve an integer value from a UHashtable using a pointer key, + * as previously stored by uhash_puti(). + * @param hash The target UHashtable. + * @param key A pointer key stored in a hashtable + * @return The requested item, or 0 if not found. + */ +U_CAPI int32_t U_EXPORT2 +uhash_geti(const UHashtable *hash, + const void* key); +/** + * Retrieve an integer value from a UHashtable using an integer key, + * as previously stored by uhash_iputi(). + * @param hash The target UHashtable. + * @param key An integer key stored in a hashtable + * @return The requested item, or 0 if not found. + */ +U_CAPI int32_t U_EXPORT2 +uhash_igeti(const UHashtable *hash, + int32_t key); + +/** + * Remove an item from a UHashtable stored by uhash_put(). + * @param hash The target UHashtable. + * @param key A key stored in a hashtable + * @return The item removed, or NULL if not found. + */ +U_CAPI void* U_EXPORT2 +uhash_remove(UHashtable *hash, + const void *key); + +/** + * Remove an item from a UHashtable stored by uhash_iput(). + * @param hash The target UHashtable. + * @param key An integer key stored in a hashtable + * @return The item removed, or NULL if not found. + */ +U_CAPI void* U_EXPORT2 +uhash_iremove(UHashtable *hash, + int32_t key); + +/** + * Remove an item from a UHashtable stored by uhash_puti(). + * @param hash The target UHashtable. + * @param key An key stored in a hashtable + * @return The item removed, or 0 if not found. + */ +U_CAPI int32_t U_EXPORT2 +uhash_removei(UHashtable *hash, + const void* key); + +/** + * Remove an item from a UHashtable stored by uhash_iputi(). + * @param hash The target UHashtable. + * @param key An integer key stored in a hashtable + * @return The item removed, or 0 if not found. + */ +U_CAPI int32_t U_EXPORT2 +uhash_iremovei(UHashtable *hash, + int32_t key); + +/** + * Remove all items from a UHashtable. + * @param hash The target UHashtable. + */ +U_CAPI void U_EXPORT2 +uhash_removeAll(UHashtable *hash); + +/** + * Locate an element of a UHashtable. The caller must not modify the + * returned object. The primary use of this function is to obtain the + * stored key when it may not be identical to the search key. For + * example, if the compare function is a case-insensitive string + * compare, then the hash key may be desired in order to obtain the + * canonical case corresponding to a search key. + * @param hash The target UHashtable. + * @param key A key stored in a hashtable + * @return a hash element, or NULL if the key is not found. + */ +U_CAPI const UHashElement* U_EXPORT2 +uhash_find(const UHashtable *hash, const void* key); + +/** + * \def UHASH_FIRST + * Constant for use with uhash_nextElement + * @see uhash_nextElement + */ +#define UHASH_FIRST (-1) + +/** + * Iterate through the elements of a UHashtable. The caller must not + * modify the returned object. However, uhash_removeElement() may be + * called during iteration to remove an element from the table. + * Iteration may safely be resumed afterwards. If uhash_put() is + * called during iteration the iteration will then be out of sync and + * should be restarted. + * @param hash The target UHashtable. + * @param pos This should be set to UHASH_FIRST initially, and left untouched + * thereafter. + * @return a hash element, or NULL if no further key-value pairs + * exist in the table. + */ +U_CAPI const UHashElement* U_EXPORT2 +uhash_nextElement(const UHashtable *hash, + int32_t *pos); + +/** + * Remove an element, returned by uhash_nextElement(), from the table. + * Iteration may be safely continued afterwards. + * @param hash The hashtable + * @param e The element, returned by uhash_nextElement(), to remove. + * Must not be NULL. Must not be an empty or deleted element (as long + * as this was returned by uhash_nextElement() it will not be empty or + * deleted). Note: Although this parameter is const, it will be + * modified. + * @return the value that was removed. + */ +U_CAPI void* U_EXPORT2 +uhash_removeElement(UHashtable *hash, const UHashElement* e); + +/******************************************************************** + * UHashTok convenience + ********************************************************************/ + +/** + * Return a UHashTok for an integer. + * @param i The given integer + * @return a UHashTok for an integer. + */ +/*U_CAPI UHashTok U_EXPORT2 +uhash_toki(int32_t i);*/ + +/** + * Return a UHashTok for a pointer. + * @param p The given pointer + * @return a UHashTok for a pointer. + */ +/*U_CAPI UHashTok U_EXPORT2 +uhash_tokp(void* p);*/ + +/******************************************************************** + * UChar* and char* Support Functions + ********************************************************************/ + +/** + * Generate a hash code for a null-terminated UChar* string. If the + * string is not null-terminated do not use this function. Use + * together with uhash_compareUChars. + * @param key The string (const UChar*) to hash. + * @return A hash code for the key. + */ +U_CAPI int32_t U_EXPORT2 +uhash_hashUChars(const UHashTok key); + +/** + * Generate a hash code for a null-terminated char* string. If the + * string is not null-terminated do not use this function. Use + * together with uhash_compareChars. + * @param key The string (const char*) to hash. + * @return A hash code for the key. + */ +U_CAPI int32_t U_EXPORT2 +uhash_hashChars(const UHashTok key); + +/** + * Generate a case-insensitive hash code for a null-terminated char* + * string. If the string is not null-terminated do not use this + * function. Use together with uhash_compareIChars. + * @param key The string (const char*) to hash. + * @return A hash code for the key. + */ +U_CAPI int32_t U_EXPORT2 +uhash_hashIChars(const UHashTok key); + +/** + * Comparator for null-terminated UChar* strings. Use together with + * uhash_hashUChars. + * @param key1 The string for comparison + * @param key2 The string for comparison + * @return true if key1 and key2 are equal, return false otherwise. + */ +U_CAPI UBool U_EXPORT2 +uhash_compareUChars(const UHashTok key1, const UHashTok key2); + +/** + * Comparator for null-terminated char* strings. Use together with + * uhash_hashChars. + * @param key1 The string for comparison + * @param key2 The string for comparison + * @return true if key1 and key2 are equal, return false otherwise. + */ +U_CAPI UBool U_EXPORT2 +uhash_compareChars(const UHashTok key1, const UHashTok key2); + +/** + * Case-insensitive comparator for null-terminated char* strings. Use + * together with uhash_hashIChars. + * @param key1 The string for comparison + * @param key2 The string for comparison + * @return true if key1 and key2 are equal, return false otherwise. + */ +U_CAPI UBool U_EXPORT2 +uhash_compareIChars(const UHashTok key1, const UHashTok key2); + +/******************************************************************** + * UnicodeString Support Functions + ********************************************************************/ + +/** + * Hash function for UnicodeString* keys. + * @param key The string (const char*) to hash. + * @return A hash code for the key. + */ +U_CAPI int32_t U_EXPORT2 +uhash_hashUnicodeString(const UElement key); + +/** + * Hash function for UnicodeString* keys (case insensitive). + * Make sure to use together with uhash_compareCaselessUnicodeString. + * @param key The string (const char*) to hash. + * @return A hash code for the key. + */ +U_CAPI int32_t U_EXPORT2 +uhash_hashCaselessUnicodeString(const UElement key); + +/******************************************************************** + * int32_t Support Functions + ********************************************************************/ + +/** + * Hash function for 32-bit integer keys. + * @param key The string (const char*) to hash. + * @return A hash code for the key. + */ +U_CAPI int32_t U_EXPORT2 +uhash_hashLong(const UHashTok key); + +/** + * Comparator function for 32-bit integer keys. + * @param key1 The integer for comparison + * @param Key2 The integer for comparison + * @return true if key1 and key2 are equal, return false otherwise + */ +U_CAPI UBool U_EXPORT2 +uhash_compareLong(const UHashTok key1, const UHashTok key2); + +/******************************************************************** + * Other Support Functions + ********************************************************************/ + +/** + * Deleter for Hashtable objects. + * @param obj The object to be deleted + */ +U_CAPI void U_EXPORT2 +uhash_deleteHashtable(void *obj); + +/* Use uprv_free() itself as a deleter for any key or value allocated using uprv_malloc. */ + +/** + * Checks if the given hash tables are equal or not. + * @param hash1 + * @param hash2 + * @return true if the hashtables are equal and false if not. + */ +U_CAPI UBool U_EXPORT2 +uhash_equals(const UHashtable* hash1, const UHashtable* hash2); + + +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN + +/** + * \class LocalUHashtablePointer + * "Smart pointer" class, closes a UHashtable via uhash_close(). + * For most methods see the LocalPointerBase base class. + * + * @see LocalPointerBase + * @see LocalPointer + * @stable ICU 4.4 + */ +U_DEFINE_LOCAL_OPEN_POINTER(LocalUHashtablePointer, UHashtable, uhash_close); + +U_NAMESPACE_END + +#endif + +#endif diff --git a/thirdparty/icu4c/common/uhash_us.cpp b/thirdparty/icu4c/common/uhash_us.cpp new file mode 100644 index 0000000000..ef482c2746 --- /dev/null +++ b/thirdparty/icu4c/common/uhash_us.cpp @@ -0,0 +1,26 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* Copyright (C) 1997-2011, International Business Machines +* Corporation and others. All Rights Reserved. +****************************************************************************** +* Date Name Description +* 03/22/00 aliu Creation. +* 07/06/01 aliu Modified to support int32_t keys on +* platforms with sizeof(void*) < 32. +****************************************************************************** +*/ + +#include "hash.h" + +/** + * Deleter for Hashtable objects. + */ +U_CAPI void U_EXPORT2 +uhash_deleteHashtable(void *obj) { + U_NAMESPACE_USE + delete (Hashtable*) obj; +} + +//eof diff --git a/thirdparty/icu4c/common/uidna.cpp b/thirdparty/icu4c/common/uidna.cpp new file mode 100644 index 0000000000..ac2f9c3c8c --- /dev/null +++ b/thirdparty/icu4c/common/uidna.cpp @@ -0,0 +1,921 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* + ******************************************************************************* + * + * Copyright (C) 2003-2014, International Business Machines + * Corporation and others. All Rights Reserved. + * + ******************************************************************************* + * file name: uidna.cpp + * encoding: UTF-8 + * tab size: 8 (not used) + * indentation:4 + * + * created on: 2003feb1 + * created by: Ram Viswanadha + */ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_IDNA + +#include "unicode/uidna.h" +#include "unicode/ustring.h" +#include "unicode/usprep.h" +#include "punycode.h" +#include "ustr_imp.h" +#include "cmemory.h" +#include "uassert.h" +#include "sprpimpl.h" + +/* it is official IDNA ACE Prefix is "xn--" */ +static const UChar ACE_PREFIX[] ={ 0x0078,0x006E,0x002d,0x002d } ; +#define ACE_PREFIX_LENGTH 4 + +#define MAX_LABEL_LENGTH 63 +/* The Max length of the labels should not be more than MAX_LABEL_LENGTH */ +#define MAX_LABEL_BUFFER_SIZE 100 + +#define MAX_DOMAIN_NAME_LENGTH 255 +/* The Max length of the domain names should not be more than MAX_DOMAIN_NAME_LENGTH */ +#define MAX_IDN_BUFFER_SIZE MAX_DOMAIN_NAME_LENGTH+1 + +#define LOWER_CASE_DELTA 0x0020 +#define HYPHEN 0x002D +#define FULL_STOP 0x002E +#define CAPITAL_A 0x0041 +#define CAPITAL_Z 0x005A + +inline static UChar +toASCIILower(UChar ch){ + if(CAPITAL_A <= ch && ch <= CAPITAL_Z){ + return ch + LOWER_CASE_DELTA; + } + return ch; +} + +inline static UBool +startsWithPrefix(const UChar* src , int32_t srcLength){ + if(srcLength < ACE_PREFIX_LENGTH){ + return FALSE; + } + + for(int8_t i=0; i< ACE_PREFIX_LENGTH; i++){ + if(toASCIILower(src[i]) != ACE_PREFIX[i]){ + return FALSE; + } + } + return TRUE; +} + + +inline static int32_t +compareCaseInsensitiveASCII(const UChar* s1, int32_t s1Len, + const UChar* s2, int32_t s2Len){ + + int32_t minLength; + int32_t lengthResult; + + // are we comparing different lengths? + if(s1Len != s2Len) { + if(s1Len < s2Len) { + minLength = s1Len; + lengthResult = -1; + } else { + minLength = s2Len; + lengthResult = 1; + } + } else { + // ok the lengths are equal + minLength = s1Len; + lengthResult = 0; + } + + UChar c1,c2; + int32_t rc; + + for(int32_t i =0;/* no condition */;i++) { + + /* If we reach the ends of both strings then they match */ + if(i == minLength) { + return lengthResult; + } + + c1 = s1[i]; + c2 = s2[i]; + + /* Case-insensitive comparison */ + if(c1!=c2) { + rc=(int32_t)toASCIILower(c1)-(int32_t)toASCIILower(c2); + if(rc!=0) { + lengthResult=rc; + break; + } + } + } + return lengthResult; +} + + +/** + * Ascertain if the given code point is a label separator as + * defined by the IDNA RFC + * + * @param ch The code point to be ascertained + * @return true if the char is a label separator + * @stable ICU 2.8 + */ +static inline UBool isLabelSeparator(UChar ch){ + switch(ch){ + case 0x002e: + case 0x3002: + case 0xFF0E: + case 0xFF61: + return TRUE; + default: + return FALSE; + } +} + +// returns the length of the label excluding the separator +// if *limit == separator then the length returned does not include +// the separtor. +static inline int32_t +getNextSeparator(UChar *src, int32_t srcLength, + UChar **limit, UBool *done){ + if(srcLength == -1){ + int32_t i; + for(i=0 ; ;i++){ + if(src[i] == 0){ + *limit = src + i; // point to null + *done = TRUE; + return i; + } + if(isLabelSeparator(src[i])){ + *limit = src + (i+1); // go past the delimiter + return i; + + } + } + }else{ + int32_t i; + for(i=0;i<srcLength;i++){ + if(isLabelSeparator(src[i])){ + *limit = src + (i+1); // go past the delimiter + return i; + } + } + // we have not found the delimiter + // if(i==srcLength) + *limit = src+srcLength; + *done = TRUE; + + return i; + } +} +static inline UBool isLDHChar(UChar ch){ + // high runner case + if(ch>0x007A){ + return FALSE; + } + //[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A] + if( (ch==0x002D) || + (0x0030 <= ch && ch <= 0x0039) || + (0x0041 <= ch && ch <= 0x005A) || + (0x0061 <= ch && ch <= 0x007A) + ){ + return TRUE; + } + return FALSE; +} + +static int32_t +_internal_toASCII(const UChar* src, int32_t srcLength, + UChar* dest, int32_t destCapacity, + int32_t options, + UStringPrepProfile* nameprep, + UParseError* parseError, + UErrorCode* status) +{ + + // TODO Revisit buffer handling. The label should not be over 63 ASCII characters. ICU4J may need to be updated too. + UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE]; + //initialize pointers to stack buffers + UChar *b1 = b1Stack, *b2 = b2Stack; + int32_t b1Len=0, b2Len, + b1Capacity = MAX_LABEL_BUFFER_SIZE, + b2Capacity = MAX_LABEL_BUFFER_SIZE , + reqLength=0; + + int32_t namePrepOptions = ((options & UIDNA_ALLOW_UNASSIGNED) != 0) ? USPREP_ALLOW_UNASSIGNED: 0; + UBool* caseFlags = NULL; + + // the source contains all ascii codepoints + UBool srcIsASCII = TRUE; + // assume the source contains all LDH codepoints + UBool srcIsLDH = TRUE; + + int32_t j=0; + + //get the options + UBool useSTD3ASCIIRules = (UBool)((options & UIDNA_USE_STD3_RULES) != 0); + + int32_t failPos = -1; + + if(srcLength == -1){ + srcLength = u_strlen(src); + } + + if(srcLength > b1Capacity){ + b1 = (UChar*) uprv_malloc(srcLength * U_SIZEOF_UCHAR); + if(b1==NULL){ + *status = U_MEMORY_ALLOCATION_ERROR; + goto CLEANUP; + } + b1Capacity = srcLength; + } + + // step 1 + for( j=0;j<srcLength;j++){ + if(src[j] > 0x7F){ + srcIsASCII = FALSE; + } + b1[b1Len++] = src[j]; + } + + // step 2 is performed only if the source contains non ASCII + if(srcIsASCII == FALSE){ + + // step 2 + b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Capacity, namePrepOptions, parseError, status); + + if(*status == U_BUFFER_OVERFLOW_ERROR){ + // redo processing of string + // we do not have enough room so grow the buffer + if(b1 != b1Stack){ + uprv_free(b1); + } + b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); + if(b1==NULL){ + *status = U_MEMORY_ALLOCATION_ERROR; + goto CLEANUP; + } + + *status = U_ZERO_ERROR; // reset error + + b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Len, namePrepOptions, parseError, status); + } + } + // error bail out + if(U_FAILURE(*status)){ + goto CLEANUP; + } + if(b1Len == 0){ + *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR; + goto CLEANUP; + } + + // for step 3 & 4 + srcIsASCII = TRUE; + for( j=0;j<b1Len;j++){ + // check if output of usprep_prepare is all ASCII + if(b1[j] > 0x7F){ + srcIsASCII = FALSE; + }else if(isLDHChar(b1[j])==FALSE){ // if the char is in ASCII range verify that it is an LDH character + srcIsLDH = FALSE; + failPos = j; + } + } + if(useSTD3ASCIIRules == TRUE){ + // verify 3a and 3b + // 3(a) Verify the absence of non-LDH ASCII code points; that is, the + // absence of 0..2C, 2E..2F, 3A..40, 5B..60, and 7B..7F. + // 3(b) Verify the absence of leading and trailing hyphen-minus; that + // is, the absence of U+002D at the beginning and end of the + // sequence. + if( srcIsLDH == FALSE /* source at this point should not contain anyLDH characters */ + || b1[0] == HYPHEN || b1[b1Len-1] == HYPHEN){ + *status = U_IDNA_STD3_ASCII_RULES_ERROR; + + /* populate the parseError struct */ + if(srcIsLDH==FALSE){ + // failPos is always set the index of failure + uprv_syntaxError(b1,failPos, b1Len,parseError); + }else if(b1[0] == HYPHEN){ + // fail position is 0 + uprv_syntaxError(b1,0,b1Len,parseError); + }else{ + // the last index in the source is always length-1 + uprv_syntaxError(b1, (b1Len>0) ? b1Len-1 : b1Len, b1Len,parseError); + } + + goto CLEANUP; + } + } + // Step 4: if the source is ASCII then proceed to step 8 + if(srcIsASCII){ + if(b1Len <= destCapacity){ + u_memmove(dest, b1, b1Len); + reqLength = b1Len; + }else{ + reqLength = b1Len; + goto CLEANUP; + } + }else{ + // step 5 : verify the sequence does not begin with ACE prefix + if(!startsWithPrefix(b1,b1Len)){ + + //step 6: encode the sequence with punycode + + // do not preserve the case flags for now! + // TODO: Preserve the case while implementing the RFE + // caseFlags = (UBool*) uprv_malloc(b1Len * sizeof(UBool)); + // uprv_memset(caseFlags,TRUE,b1Len); + + b2Len = u_strToPunycode(b1,b1Len,b2,b2Capacity,caseFlags, status); + + if(*status == U_BUFFER_OVERFLOW_ERROR){ + // redo processing of string + /* we do not have enough room so grow the buffer*/ + b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); + if(b2 == NULL){ + *status = U_MEMORY_ALLOCATION_ERROR; + goto CLEANUP; + } + + *status = U_ZERO_ERROR; // reset error + + b2Len = u_strToPunycode(b1,b1Len,b2,b2Len,caseFlags, status); + } + //error bail out + if(U_FAILURE(*status)){ + goto CLEANUP; + } + // TODO : Reconsider while implementing the case preserve RFE + // convert all codepoints to lower case ASCII + // toASCIILower(b2,b2Len); + reqLength = b2Len+ACE_PREFIX_LENGTH; + + if(reqLength > destCapacity){ + *status = U_BUFFER_OVERFLOW_ERROR; + goto CLEANUP; + } + //Step 7: prepend the ACE prefix + u_memcpy(dest, ACE_PREFIX, ACE_PREFIX_LENGTH); + //Step 6: copy the contents in b2 into dest + u_memcpy(dest+ACE_PREFIX_LENGTH, b2, b2Len); + + }else{ + *status = U_IDNA_ACE_PREFIX_ERROR; + //position of failure is 0 + uprv_syntaxError(b1,0,b1Len,parseError); + goto CLEANUP; + } + } + // step 8: verify the length of label + if(reqLength > MAX_LABEL_LENGTH){ + *status = U_IDNA_LABEL_TOO_LONG_ERROR; + } + +CLEANUP: + if(b1 != b1Stack){ + uprv_free(b1); + } + if(b2 != b2Stack){ + uprv_free(b2); + } + uprv_free(caseFlags); + + return u_terminateUChars(dest, destCapacity, reqLength, status); +} + +static int32_t +_internal_toUnicode(const UChar* src, int32_t srcLength, + UChar* dest, int32_t destCapacity, + int32_t options, + UStringPrepProfile* nameprep, + UParseError* parseError, + UErrorCode* status) +{ + + //get the options + //UBool useSTD3ASCIIRules = (UBool)((options & UIDNA_USE_STD3_RULES) != 0); + int32_t namePrepOptions = ((options & UIDNA_ALLOW_UNASSIGNED) != 0) ? USPREP_ALLOW_UNASSIGNED: 0; + + // TODO Revisit buffer handling. The label should not be over 63 ASCII characters. ICU4J may need to be updated too. + UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE], b3Stack[MAX_LABEL_BUFFER_SIZE]; + + //initialize pointers to stack buffers + UChar *b1 = b1Stack, *b2 = b2Stack, *b1Prime=NULL, *b3=b3Stack; + int32_t b1Len = 0, b2Len, b1PrimeLen, b3Len, + b1Capacity = MAX_LABEL_BUFFER_SIZE, + b2Capacity = MAX_LABEL_BUFFER_SIZE, + b3Capacity = MAX_LABEL_BUFFER_SIZE, + reqLength=0; + + UBool* caseFlags = NULL; + + UBool srcIsASCII = TRUE; + /*UBool srcIsLDH = TRUE; + int32_t failPos =0;*/ + + // step 1: find out if all the codepoints in src are ASCII + if(srcLength==-1){ + srcLength = 0; + for(;src[srcLength]!=0;){ + if(src[srcLength]> 0x7f){ + srcIsASCII = FALSE; + }/*else if(isLDHChar(src[srcLength])==FALSE){ + // here we do not assemble surrogates + // since we know that LDH code points + // are in the ASCII range only + srcIsLDH = FALSE; + failPos = srcLength; + }*/ + srcLength++; + } + }else if(srcLength > 0){ + for(int32_t j=0; j<srcLength; j++){ + if(src[j]> 0x7f){ + srcIsASCII = FALSE; + break; + }/*else if(isLDHChar(src[j])==FALSE){ + // here we do not assemble surrogates + // since we know that LDH code points + // are in the ASCII range only + srcIsLDH = FALSE; + failPos = j; + }*/ + } + }else{ + return 0; + } + + if(srcIsASCII == FALSE){ + // step 2: process the string + b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Capacity, namePrepOptions, parseError, status); + if(*status == U_BUFFER_OVERFLOW_ERROR){ + // redo processing of string + /* we do not have enough room so grow the buffer*/ + b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); + if(b1==NULL){ + *status = U_MEMORY_ALLOCATION_ERROR; + goto CLEANUP; + } + + *status = U_ZERO_ERROR; // reset error + + b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Len, namePrepOptions, parseError, status); + } + //bail out on error + if(U_FAILURE(*status)){ + goto CLEANUP; + } + }else{ + + //just point src to b1 + b1 = (UChar*) src; + b1Len = srcLength; + } + + // The RFC states that + // <quote> + // ToUnicode never fails. If any step fails, then the original input + // is returned immediately in that step. + // </quote> + + //step 3: verify ACE Prefix + if(startsWithPrefix(b1,b1Len)){ + + //step 4: Remove the ACE Prefix + b1Prime = b1 + ACE_PREFIX_LENGTH; + b1PrimeLen = b1Len - ACE_PREFIX_LENGTH; + + //step 5: Decode using punycode + b2Len = u_strFromPunycode(b1Prime, b1PrimeLen, b2, b2Capacity, caseFlags,status); + + if(*status == U_BUFFER_OVERFLOW_ERROR){ + // redo processing of string + /* we do not have enough room so grow the buffer*/ + b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); + if(b2==NULL){ + *status = U_MEMORY_ALLOCATION_ERROR; + goto CLEANUP; + } + + *status = U_ZERO_ERROR; // reset error + + b2Len = u_strFromPunycode(b1Prime, b1PrimeLen, b2, b2Len, caseFlags, status); + } + + + //step 6:Apply toASCII + b3Len = uidna_toASCII(b2, b2Len, b3, b3Capacity, options, parseError, status); + + if(*status == U_BUFFER_OVERFLOW_ERROR){ + // redo processing of string + /* we do not have enough room so grow the buffer*/ + b3 = (UChar*) uprv_malloc(b3Len * U_SIZEOF_UCHAR); + if(b3==NULL){ + *status = U_MEMORY_ALLOCATION_ERROR; + goto CLEANUP; + } + + *status = U_ZERO_ERROR; // reset error + + b3Len = uidna_toASCII(b2,b2Len,b3,b3Len,options,parseError, status); + + } + //bail out on error + if(U_FAILURE(*status)){ + goto CLEANUP; + } + + //step 7: verify + if(compareCaseInsensitiveASCII(b1, b1Len, b3, b3Len) !=0){ + // Cause the original to be returned. + *status = U_IDNA_VERIFICATION_ERROR; + goto CLEANUP; + } + + //step 8: return output of step 5 + reqLength = b2Len; + if(b2Len <= destCapacity) { + u_memmove(dest, b2, b2Len); + } + } + else{ + // See the start of this if statement for why this is commented out. + // verify that STD3 ASCII rules are satisfied + /*if(useSTD3ASCIIRules == TRUE){ + if( srcIsLDH == FALSE // source contains some non-LDH characters + || src[0] == HYPHEN || src[srcLength-1] == HYPHEN){ + *status = U_IDNA_STD3_ASCII_RULES_ERROR; + + // populate the parseError struct + if(srcIsLDH==FALSE){ + // failPos is always set the index of failure + uprv_syntaxError(src,failPos, srcLength,parseError); + }else if(src[0] == HYPHEN){ + // fail position is 0 + uprv_syntaxError(src,0,srcLength,parseError); + }else{ + // the last index in the source is always length-1 + uprv_syntaxError(src, (srcLength>0) ? srcLength-1 : srcLength, srcLength,parseError); + } + + goto CLEANUP; + } + }*/ + // just return the source + //copy the source to destination + if(srcLength <= destCapacity){ + u_memmove(dest, src, srcLength); + } + reqLength = srcLength; + } + + +CLEANUP: + + if(b1 != b1Stack && b1!=src){ + uprv_free(b1); + } + if(b2 != b2Stack){ + uprv_free(b2); + } + uprv_free(caseFlags); + + // The RFC states that + // <quote> + // ToUnicode never fails. If any step fails, then the original input + // is returned immediately in that step. + // </quote> + // So if any step fails lets copy source to destination + if(U_FAILURE(*status)){ + //copy the source to destination + if(dest && srcLength <= destCapacity){ + // srcLength should have already been set earlier. + U_ASSERT(srcLength >= 0); + u_memmove(dest, src, srcLength); + } + reqLength = srcLength; + *status = U_ZERO_ERROR; + } + + return u_terminateUChars(dest, destCapacity, reqLength, status); +} + +U_CAPI int32_t U_EXPORT2 +uidna_toASCII(const UChar* src, int32_t srcLength, + UChar* dest, int32_t destCapacity, + int32_t options, + UParseError* parseError, + UErrorCode* status){ + + if(status == NULL || U_FAILURE(*status)){ + return 0; + } + if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ + *status = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status); + + if(U_FAILURE(*status)){ + return -1; + } + + int32_t retLen = _internal_toASCII(src, srcLength, dest, destCapacity, options, nameprep, parseError, status); + + /* close the profile*/ + usprep_close(nameprep); + + return retLen; +} + +U_CAPI int32_t U_EXPORT2 +uidna_toUnicode(const UChar* src, int32_t srcLength, + UChar* dest, int32_t destCapacity, + int32_t options, + UParseError* parseError, + UErrorCode* status){ + + if(status == NULL || U_FAILURE(*status)){ + return 0; + } + if( (src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ + *status = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status); + + if(U_FAILURE(*status)){ + return -1; + } + + int32_t retLen = _internal_toUnicode(src, srcLength, dest, destCapacity, options, nameprep, parseError, status); + + usprep_close(nameprep); + + return retLen; +} + + +U_CAPI int32_t U_EXPORT2 +uidna_IDNToASCII( const UChar *src, int32_t srcLength, + UChar* dest, int32_t destCapacity, + int32_t options, + UParseError *parseError, + UErrorCode *status){ + + if(status == NULL || U_FAILURE(*status)){ + return 0; + } + if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ + *status = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + int32_t reqLength = 0; + + UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status); + + if(U_FAILURE(*status)){ + return 0; + } + + //initialize pointers + UChar *delimiter = (UChar*)src; + UChar *labelStart = (UChar*)src; + UChar *currentDest = (UChar*) dest; + int32_t remainingLen = srcLength; + int32_t remainingDestCapacity = destCapacity; + int32_t labelLen = 0, labelReqLength = 0; + UBool done = FALSE; + + + for(;;){ + + labelLen = getNextSeparator(labelStart,remainingLen, &delimiter,&done); + labelReqLength = 0; + if(!(labelLen==0 && done)){// make sure this is not a root label separator. + + labelReqLength = _internal_toASCII( labelStart, labelLen, + currentDest, remainingDestCapacity, + options, nameprep, + parseError, status); + + if(*status == U_BUFFER_OVERFLOW_ERROR){ + + *status = U_ZERO_ERROR; // reset error + remainingDestCapacity = 0; + } + } + + + if(U_FAILURE(*status)){ + break; + } + + reqLength +=labelReqLength; + // adjust the destination pointer + if(labelReqLength < remainingDestCapacity){ + currentDest = currentDest + labelReqLength; + remainingDestCapacity -= labelReqLength; + }else{ + // should never occur + remainingDestCapacity = 0; + } + + if(done == TRUE){ + break; + } + + // add the label separator + if(remainingDestCapacity > 0){ + *currentDest++ = FULL_STOP; + remainingDestCapacity--; + } + reqLength++; + + labelStart = delimiter; + if(remainingLen >0 ){ + remainingLen = (int32_t)(srcLength - (delimiter - src)); + } + + } + + if(reqLength > MAX_DOMAIN_NAME_LENGTH){ + *status = U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR; + } + + usprep_close(nameprep); + + return u_terminateUChars(dest, destCapacity, reqLength, status); +} + +U_CAPI int32_t U_EXPORT2 +uidna_IDNToUnicode( const UChar* src, int32_t srcLength, + UChar* dest, int32_t destCapacity, + int32_t options, + UParseError* parseError, + UErrorCode* status){ + + if(status == NULL || U_FAILURE(*status)){ + return 0; + } + if((src==NULL) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ + *status = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + int32_t reqLength = 0; + + UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status); + + if(U_FAILURE(*status)){ + return 0; + } + + //initialize pointers + UChar *delimiter = (UChar*)src; + UChar *labelStart = (UChar*)src; + UChar *currentDest = (UChar*) dest; + int32_t remainingLen = srcLength; + int32_t remainingDestCapacity = destCapacity; + int32_t labelLen = 0, labelReqLength = 0; + UBool done = FALSE; + + for(;;){ + + labelLen = getNextSeparator(labelStart,remainingLen, &delimiter,&done); + + // The RFC states that + // <quote> + // ToUnicode never fails. If any step fails, then the original input + // is returned immediately in that step. + // </quote> + // _internal_toUnicode will copy the label. + /*if(labelLen==0 && done==FALSE){ + *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR; + break; + }*/ + + labelReqLength = _internal_toUnicode(labelStart, labelLen, + currentDest, remainingDestCapacity, + options, nameprep, + parseError, status); + + if(*status == U_BUFFER_OVERFLOW_ERROR){ + *status = U_ZERO_ERROR; // reset error + remainingDestCapacity = 0; + } + + if(U_FAILURE(*status)){ + break; + } + + reqLength +=labelReqLength; + // adjust the destination pointer + if(labelReqLength < remainingDestCapacity){ + currentDest = currentDest + labelReqLength; + remainingDestCapacity -= labelReqLength; + }else{ + // should never occur + remainingDestCapacity = 0; + } + + if(done == TRUE){ + break; + } + + // add the label separator + // Unlike the ToASCII operation we don't normalize the label separators + if(remainingDestCapacity > 0){ + *currentDest++ = *(labelStart + labelLen); + remainingDestCapacity--; + } + reqLength++; + + labelStart = delimiter; + if(remainingLen >0 ){ + remainingLen = (int32_t)(srcLength - (delimiter - src)); + } + + } + + if(reqLength > MAX_DOMAIN_NAME_LENGTH){ + *status = U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR; + } + + usprep_close(nameprep); + + return u_terminateUChars(dest, destCapacity, reqLength, status); +} + +U_CAPI int32_t U_EXPORT2 +uidna_compare( const UChar *s1, int32_t length1, + const UChar *s2, int32_t length2, + int32_t options, + UErrorCode* status){ + + if(status == NULL || U_FAILURE(*status)){ + return -1; + } + + UChar b1Stack[MAX_IDN_BUFFER_SIZE], b2Stack[MAX_IDN_BUFFER_SIZE]; + UChar *b1 = b1Stack, *b2 = b2Stack; + int32_t b1Len, b2Len, b1Capacity = MAX_IDN_BUFFER_SIZE, b2Capacity = MAX_IDN_BUFFER_SIZE; + int32_t result=-1; + + UParseError parseError; + + b1Len = uidna_IDNToASCII(s1, length1, b1, b1Capacity, options, &parseError, status); + if(*status == U_BUFFER_OVERFLOW_ERROR){ + // redo processing of string + b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); + if(b1==NULL){ + *status = U_MEMORY_ALLOCATION_ERROR; + goto CLEANUP; + } + + *status = U_ZERO_ERROR; // reset error + + b1Len = uidna_IDNToASCII(s1,length1,b1,b1Len, options, &parseError, status); + + } + + b2Len = uidna_IDNToASCII(s2,length2, b2,b2Capacity, options, &parseError, status); + if(*status == U_BUFFER_OVERFLOW_ERROR){ + // redo processing of string + b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); + if(b2==NULL){ + *status = U_MEMORY_ALLOCATION_ERROR; + goto CLEANUP; + } + + *status = U_ZERO_ERROR; // reset error + + b2Len = uidna_IDNToASCII(s2, length2, b2, b2Len, options, &parseError, status); + + } + // when toASCII is applied all label separators are replaced with FULL_STOP + result = compareCaseInsensitiveASCII(b1,b1Len,b2,b2Len); + +CLEANUP: + if(b1 != b1Stack){ + uprv_free(b1); + } + + if(b2 != b2Stack){ + uprv_free(b2); + } + + return result; +} + +#endif /* #if !UCONFIG_NO_IDNA */ diff --git a/thirdparty/icu4c/common/uinit.cpp b/thirdparty/icu4c/common/uinit.cpp new file mode 100644 index 0000000000..624431be02 --- /dev/null +++ b/thirdparty/icu4c/common/uinit.cpp @@ -0,0 +1,74 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* Copyright (C) 2001-2015, International Business Machines +* Corporation and others. All Rights Reserved. +****************************************************************************** +* file name: uinit.cpp +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2001July05 +* created by: George Rhoten +*/ + +#include "unicode/utypes.h" +#include "unicode/icuplug.h" +#include "unicode/uclean.h" +#include "cmemory.h" +#include "icuplugimp.h" +#include "ucln_cmn.h" +#include "ucnv_io.h" +#include "umutex.h" +#include "utracimp.h" + +U_NAMESPACE_BEGIN + +static UInitOnce gICUInitOnce = U_INITONCE_INITIALIZER; + +static UBool U_CALLCONV uinit_cleanup() { + gICUInitOnce.reset(); + return TRUE; +} + +static void U_CALLCONV +initData(UErrorCode &status) +{ +#if UCONFIG_ENABLE_PLUGINS + /* initialize plugins */ + uplug_init(&status); +#endif + +#if !UCONFIG_NO_CONVERSION + /* + * 2005-may-02 + * + * ICU4C 3.4 (jitterbug 4497) hardcodes the data for Unicode character + * properties for APIs that want to be fast. + * Therefore, we need not load them here nor check for errors. + * Instead, we load the converter alias table to see if any ICU data + * is available. + * Users should really open the service objects they need and check + * for errors there, to make sure that the actual items they need are + * available. + */ + ucnv_io_countKnownConverters(&status); +#endif + ucln_common_registerCleanup(UCLN_COMMON_UINIT, uinit_cleanup); +} + +U_NAMESPACE_END + +U_NAMESPACE_USE + +/* + * ICU Initialization Function. Need not be called. + */ +U_CAPI void U_EXPORT2 +u_init(UErrorCode *status) { + UTRACE_ENTRY_OC(UTRACE_U_INIT); + umtx_initOnce(gICUInitOnce, &initData, *status); + UTRACE_EXIT_STATUS(*status); +} diff --git a/thirdparty/icu4c/common/uinvchar.cpp b/thirdparty/icu4c/common/uinvchar.cpp new file mode 100644 index 0000000000..52b8906568 --- /dev/null +++ b/thirdparty/icu4c/common/uinvchar.cpp @@ -0,0 +1,627 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 1999-2010, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: uinvchar.c +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:2 +* +* created on: 2004sep14 +* created by: Markus W. Scherer +* +* Functions for handling invariant characters, moved here from putil.c +* for better modularization. +*/ + +#include "unicode/utypes.h" +#include "unicode/ustring.h" +#include "udataswp.h" +#include "cstring.h" +#include "cmemory.h" +#include "uassert.h" +#include "uinvchar.h" + +/* invariant-character handling --------------------------------------------- */ + +/* + * These maps for ASCII to/from EBCDIC map invariant characters (see utypes.h) + * appropriately for most EBCDIC codepages. + * + * They currently also map most other ASCII graphic characters, + * appropriately for codepages 37 and 1047. + * Exceptions: The characters for []^ have different codes in 37 & 1047. + * Both versions are mapped to ASCII. + * + * ASCII 37 1047 + * [ 5B BA AD + * ] 5D BB BD + * ^ 5E B0 5F + * + * There are no mappings for variant characters from Unicode to EBCDIC. + * + * Currently, C0 control codes are also included in these maps. + * Exceptions: S/390 Open Edition swaps LF and NEL codes compared with other + * EBCDIC platforms; both codes (15 and 25) are mapped to ASCII LF (0A), + * but there is no mapping for ASCII LF back to EBCDIC. + * + * ASCII EBCDIC S/390-OE + * LF 0A 25 15 + * NEL 85 15 25 + * + * The maps below explicitly exclude the variant + * control and graphical characters that are in ASCII-based + * codepages at 0x80 and above. + * "No mapping" is expressed by mapping to a 00 byte. + * + * These tables do not establish a converter or a codepage. + */ + +static const uint8_t asciiFromEbcdic[256]={ + 0x00, 0x01, 0x02, 0x03, 0x00, 0x09, 0x00, 0x7f, 0x00, 0x00, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x00, 0x0a, 0x08, 0x00, 0x18, 0x19, 0x00, 0x00, 0x1c, 0x1d, 0x1e, 0x1f, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x17, 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x06, 0x07, + 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x14, 0x15, 0x00, 0x1a, + + 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, 0x3c, 0x28, 0x2b, 0x7c, + 0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x5e, + 0x2d, 0x2f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c, 0x25, 0x5f, 0x3e, 0x3f, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22, + + 0x00, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x5b, 0x00, 0x00, + 0x5e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5b, 0x5d, 0x00, 0x5d, 0x00, 0x00, + + 0x7b, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x7d, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x5c, 0x00, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +}; + +static const uint8_t ebcdicFromAscii[256]={ + 0x00, 0x01, 0x02, 0x03, 0x37, 0x2d, 0x2e, 0x2f, 0x16, 0x05, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x3c, 0x3d, 0x32, 0x26, 0x18, 0x19, 0x3f, 0x27, 0x1c, 0x1d, 0x1e, 0x1f, + 0x40, 0x00, 0x7f, 0x00, 0x00, 0x6c, 0x50, 0x7d, 0x4d, 0x5d, 0x5c, 0x4e, 0x6b, 0x60, 0x4b, 0x61, + 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0x7a, 0x5e, 0x4c, 0x7e, 0x6e, 0x6f, + + 0x00, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, + 0xd7, 0xd8, 0xd9, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0x00, 0x00, 0x00, 0x00, 0x6d, + 0x00, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, + 0x97, 0x98, 0x99, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0x00, 0x00, 0x00, 0x00, 0x07, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +/* Same as asciiFromEbcdic[] except maps all letters to lowercase. */ +static const uint8_t lowercaseAsciiFromEbcdic[256]={ + 0x00, 0x01, 0x02, 0x03, 0x00, 0x09, 0x00, 0x7f, 0x00, 0x00, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x00, 0x0a, 0x08, 0x00, 0x18, 0x19, 0x00, 0x00, 0x1c, 0x1d, 0x1e, 0x1f, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x17, 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x06, 0x07, + 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x14, 0x15, 0x00, 0x1a, + + 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, 0x3c, 0x28, 0x2b, 0x7c, + 0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x5e, + 0x2d, 0x2f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c, 0x25, 0x5f, 0x3e, 0x3f, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22, + + 0x00, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x5b, 0x00, 0x00, + 0x5e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5b, 0x5d, 0x00, 0x5d, 0x00, 0x00, + + 0x7b, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x7d, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x7c, 0x00, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +}; + +/* + * Bit sets indicating which characters of the ASCII repertoire + * (by ASCII/Unicode code) are "invariant". + * See utypes.h for more details. + * + * As invariant are considered the characters of the ASCII repertoire except + * for the following: + * 21 '!' <exclamation mark> + * 23 '#' <number sign> + * 24 '$' <dollar sign> + * + * 40 '@' <commercial at> + * + * 5b '[' <left bracket> + * 5c '\' <backslash> + * 5d ']' <right bracket> + * 5e '^' <circumflex> + * + * 60 '`' <grave accent> + * + * 7b '{' <left brace> + * 7c '|' <vertical line> + * 7d '}' <right brace> + * 7e '~' <tilde> + */ +static const uint32_t invariantChars[4]={ + 0xfffffbff, /* 00..1f but not 0a */ + 0xffffffe5, /* 20..3f but not 21 23 24 */ + 0x87fffffe, /* 40..5f but not 40 5b..5e */ + 0x87fffffe /* 60..7f but not 60 7b..7e */ +}; + +/* + * test unsigned types (or values known to be non-negative) for invariant characters, + * tests ASCII-family character values + */ +#define UCHAR_IS_INVARIANT(c) (((c)<=0x7f) && (invariantChars[(c)>>5]&((uint32_t)1<<((c)&0x1f)))!=0) + +/* test signed types for invariant characters, adds test for positive values */ +#define SCHAR_IS_INVARIANT(c) ((0<=(c)) && UCHAR_IS_INVARIANT(c)) + +#if U_CHARSET_FAMILY==U_ASCII_FAMILY +#define CHAR_TO_UCHAR(c) c +#define UCHAR_TO_CHAR(c) c +#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY +#define CHAR_TO_UCHAR(u) asciiFromEbcdic[u] +#define UCHAR_TO_CHAR(u) ebcdicFromAscii[u] +#else +# error U_CHARSET_FAMILY is not valid +#endif + + +U_CAPI void U_EXPORT2 +u_charsToUChars(const char *cs, UChar *us, int32_t length) { + UChar u; + uint8_t c; + + /* + * Allow the entire ASCII repertoire to be mapped _to_ Unicode. + * For EBCDIC systems, this works for characters with codes from + * codepages 37 and 1047 or compatible. + */ + while(length>0) { + c=(uint8_t)(*cs++); + u=(UChar)CHAR_TO_UCHAR(c); + U_ASSERT((u!=0 || c==0)); /* only invariant chars converted? */ + *us++=u; + --length; + } +} + +U_CAPI void U_EXPORT2 +u_UCharsToChars(const UChar *us, char *cs, int32_t length) { + UChar u; + + while(length>0) { + u=*us++; + if(!UCHAR_IS_INVARIANT(u)) { + U_ASSERT(FALSE); /* Variant characters were used. These are not portable in ICU. */ + u=0; + } + *cs++=(char)UCHAR_TO_CHAR(u); + --length; + } +} + +U_CAPI UBool U_EXPORT2 +uprv_isInvariantString(const char *s, int32_t length) { + uint8_t c; + + for(;;) { + if(length<0) { + /* NUL-terminated */ + c=(uint8_t)*s++; + if(c==0) { + break; + } + } else { + /* count length */ + if(length==0) { + break; + } + --length; + c=(uint8_t)*s++; + if(c==0) { + continue; /* NUL is invariant */ + } + } + /* c!=0 now, one branch below checks c==0 for variant characters */ + + /* + * no assertions here because these functions are legitimately called + * for strings with variant characters + */ +#if U_CHARSET_FAMILY==U_ASCII_FAMILY + if(!UCHAR_IS_INVARIANT(c)) { + return FALSE; /* found a variant char */ + } +#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY + c=CHAR_TO_UCHAR(c); + if(c==0 || !UCHAR_IS_INVARIANT(c)) { + return FALSE; /* found a variant char */ + } +#else +# error U_CHARSET_FAMILY is not valid +#endif + } + return TRUE; +} + +U_CAPI UBool U_EXPORT2 +uprv_isInvariantUString(const UChar *s, int32_t length) { + UChar c; + + for(;;) { + if(length<0) { + /* NUL-terminated */ + c=*s++; + if(c==0) { + break; + } + } else { + /* count length */ + if(length==0) { + break; + } + --length; + c=*s++; + } + + /* + * no assertions here because these functions are legitimately called + * for strings with variant characters + */ + if(!UCHAR_IS_INVARIANT(c)) { + return FALSE; /* found a variant char */ + } + } + return TRUE; +} + +/* UDataSwapFn implementations used in udataswp.c ------- */ + +/* convert ASCII to EBCDIC and verify that all characters are invariant */ +U_CAPI int32_t U_EXPORT2 +uprv_ebcdicFromAscii(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode) { + const uint8_t *s; + uint8_t *t; + uint8_t c; + + int32_t count; + + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return 0; + } + if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + /* setup and swapping */ + s=(const uint8_t *)inData; + t=(uint8_t *)outData; + count=length; + while(count>0) { + c=*s++; + if(!UCHAR_IS_INVARIANT(c)) { + udata_printError(ds, "uprv_ebcdicFromAscii() string[%d] contains a variant character in position %d\n", + length, length-count); + *pErrorCode=U_INVALID_CHAR_FOUND; + return 0; + } + *t++=ebcdicFromAscii[c]; + --count; + } + + return length; +} + +/* this function only checks and copies ASCII strings without conversion */ +U_CFUNC int32_t +uprv_copyAscii(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode) { + const uint8_t *s; + uint8_t c; + + int32_t count; + + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return 0; + } + if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + /* setup and checking */ + s=(const uint8_t *)inData; + count=length; + while(count>0) { + c=*s++; + if(!UCHAR_IS_INVARIANT(c)) { + udata_printError(ds, "uprv_copyFromAscii() string[%d] contains a variant character in position %d\n", + length, length-count); + *pErrorCode=U_INVALID_CHAR_FOUND; + return 0; + } + --count; + } + + if(length>0 && inData!=outData) { + uprv_memcpy(outData, inData, length); + } + + return length; +} + +/* convert EBCDIC to ASCII and verify that all characters are invariant */ +U_CFUNC int32_t +uprv_asciiFromEbcdic(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode) { + const uint8_t *s; + uint8_t *t; + uint8_t c; + + int32_t count; + + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return 0; + } + if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + /* setup and swapping */ + s=(const uint8_t *)inData; + t=(uint8_t *)outData; + count=length; + while(count>0) { + c=*s++; + if(c!=0 && ((c=asciiFromEbcdic[c])==0 || !UCHAR_IS_INVARIANT(c))) { + udata_printError(ds, "uprv_asciiFromEbcdic() string[%d] contains a variant character in position %d\n", + length, length-count); + *pErrorCode=U_INVALID_CHAR_FOUND; + return 0; + } + *t++=c; + --count; + } + + return length; +} + +/* this function only checks and copies EBCDIC strings without conversion */ +U_CFUNC int32_t +uprv_copyEbcdic(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode) { + const uint8_t *s; + uint8_t c; + + int32_t count; + + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return 0; + } + if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + /* setup and checking */ + s=(const uint8_t *)inData; + count=length; + while(count>0) { + c=*s++; + if(c!=0 && ((c=asciiFromEbcdic[c])==0 || !UCHAR_IS_INVARIANT(c))) { + udata_printError(ds, "uprv_copyEbcdic() string[%] contains a variant character in position %d\n", + length, length-count); + *pErrorCode=U_INVALID_CHAR_FOUND; + return 0; + } + --count; + } + + if(length>0 && inData!=outData) { + uprv_memcpy(outData, inData, length); + } + + return length; +} + +U_CFUNC UBool +uprv_isEbcdicAtSign(char c) { + static const uint8_t ebcdicAtSigns[] = { + 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 }; + return c != 0 && uprv_strchr((const char *)ebcdicAtSigns, c) != nullptr; +} + +/* compare invariant strings; variant characters compare less than others and unlike each other */ +U_CFUNC int32_t +uprv_compareInvAscii(const UDataSwapper *ds, + const char *outString, int32_t outLength, + const UChar *localString, int32_t localLength) { + (void)ds; + int32_t minLength; + UChar32 c1, c2; + uint8_t c; + + if(outString==NULL || outLength<-1 || localString==NULL || localLength<-1) { + return 0; + } + + if(outLength<0) { + outLength=(int32_t)uprv_strlen(outString); + } + if(localLength<0) { + localLength=u_strlen(localString); + } + + minLength= outLength<localLength ? outLength : localLength; + + while(minLength>0) { + c=(uint8_t)*outString++; + if(UCHAR_IS_INVARIANT(c)) { + c1=c; + } else { + c1=-1; + } + + c2=*localString++; + if(!UCHAR_IS_INVARIANT(c2)) { + c2=-2; + } + + if((c1-=c2)!=0) { + return c1; + } + + --minLength; + } + + /* strings start with same prefix, compare lengths */ + return outLength-localLength; +} + +U_CFUNC int32_t +uprv_compareInvEbcdic(const UDataSwapper *ds, + const char *outString, int32_t outLength, + const UChar *localString, int32_t localLength) { + (void)ds; + int32_t minLength; + UChar32 c1, c2; + uint8_t c; + + if(outString==NULL || outLength<-1 || localString==NULL || localLength<-1) { + return 0; + } + + if(outLength<0) { + outLength=(int32_t)uprv_strlen(outString); + } + if(localLength<0) { + localLength=u_strlen(localString); + } + + minLength= outLength<localLength ? outLength : localLength; + + while(minLength>0) { + c=(uint8_t)*outString++; + if(c==0) { + c1=0; + } else if((c1=asciiFromEbcdic[c])!=0 && UCHAR_IS_INVARIANT(c1)) { + /* c1 is set */ + } else { + c1=-1; + } + + c2=*localString++; + if(!UCHAR_IS_INVARIANT(c2)) { + c2=-2; + } + + if((c1-=c2)!=0) { + return c1; + } + + --minLength; + } + + /* strings start with same prefix, compare lengths */ + return outLength-localLength; +} + +U_CAPI int32_t U_EXPORT2 +uprv_compareInvEbcdicAsAscii(const char *s1, const char *s2) { + int32_t c1, c2; + + for(;; ++s1, ++s2) { + c1=(uint8_t)*s1; + c2=(uint8_t)*s2; + if(c1!=c2) { + if(c1!=0 && ((c1=asciiFromEbcdic[c1])==0 || !UCHAR_IS_INVARIANT(c1))) { + c1=-(int32_t)(uint8_t)*s1; + } + if(c2!=0 && ((c2=asciiFromEbcdic[c2])==0 || !UCHAR_IS_INVARIANT(c2))) { + c2=-(int32_t)(uint8_t)*s2; + } + return c1-c2; + } else if(c1==0) { + return 0; + } + } +} + +U_CAPI char U_EXPORT2 +uprv_ebcdicToAscii(char c) { + return (char)asciiFromEbcdic[(uint8_t)c]; +} + +U_CAPI char U_EXPORT2 +uprv_ebcdicToLowercaseAscii(char c) { + return (char)lowercaseAsciiFromEbcdic[(uint8_t)c]; +} + +U_CAPI uint8_t* U_EXPORT2 +uprv_aestrncpy(uint8_t *dst, const uint8_t *src, int32_t n) +{ + uint8_t *orig_dst = dst; + + if(n==-1) { + n = static_cast<int32_t>(uprv_strlen((const char*)src)+1); /* copy NUL */ + } + /* copy non-null */ + while(*src && n>0) { + *(dst++) = asciiFromEbcdic[*(src++)]; + n--; + } + /* pad */ + while(n>0) { + *(dst++) = 0; + n--; + } + return orig_dst; +} + +U_CAPI uint8_t* U_EXPORT2 +uprv_eastrncpy(uint8_t *dst, const uint8_t *src, int32_t n) +{ + uint8_t *orig_dst = dst; + + if(n==-1) { + n = static_cast<int32_t>(uprv_strlen((const char*)src)+1); /* copy NUL */ + } + /* copy non-null */ + while(*src && n>0) { + char ch = ebcdicFromAscii[*(src++)]; + if(ch == 0) { + ch = ebcdicFromAscii[0x3f]; /* questionmark (subchar) */ + } + *(dst++) = ch; + n--; + } + /* pad */ + while(n>0) { + *(dst++) = 0; + n--; + } + return orig_dst; +} + diff --git a/thirdparty/icu4c/common/uinvchar.h b/thirdparty/icu4c/common/uinvchar.h new file mode 100644 index 0000000000..9b7a9bd114 --- /dev/null +++ b/thirdparty/icu4c/common/uinvchar.h @@ -0,0 +1,219 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 1999-2015, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: uinvchar.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:2 +* +* created on: 2004sep14 +* created by: Markus W. Scherer +* +* Definitions for handling invariant characters, moved here from putil.c +* for better modularization. +*/ + +#ifndef __UINVCHAR_H__ +#define __UINVCHAR_H__ + +#include "unicode/utypes.h" +#ifdef __cplusplus +#include "unicode/unistr.h" +#endif + +/** + * Check if a char string only contains invariant characters. + * See utypes.h for details. + * + * @param s Input string pointer. + * @param length Length of the string, can be -1 if NUL-terminated. + * @return true if s contains only invariant characters. + * + * @internal (ICU 2.8) + */ +U_CAPI UBool U_EXPORT2 +uprv_isInvariantString(const char *s, int32_t length); + +/** + * Check if a Unicode string only contains invariant characters. + * See utypes.h for details. + * + * @param s Input string pointer. + * @param length Length of the string, can be -1 if NUL-terminated. + * @return true if s contains only invariant characters. + * + * @internal (ICU 2.8) + */ +U_CAPI UBool U_EXPORT2 +uprv_isInvariantUString(const UChar *s, int32_t length); + +/** + * \def U_UPPER_ORDINAL + * Get the ordinal number of an uppercase invariant character + * @internal + */ +#if U_CHARSET_FAMILY==U_ASCII_FAMILY +# define U_UPPER_ORDINAL(x) ((x)-'A') +#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY +# define U_UPPER_ORDINAL(x) (((x) < 'J') ? ((x)-'A') : \ + (((x) < 'S') ? ((x)-'J'+9) : \ + ((x)-'S'+18))) +#else +# error Unknown charset family! +#endif + +#ifdef __cplusplus + +U_NAMESPACE_BEGIN + +/** + * Like U_UPPER_ORDINAL(x) but with validation. + * Returns 0..25 for A..Z else a value outside 0..25. + */ +inline int32_t uprv_upperOrdinal(int32_t c) { +#if U_CHARSET_FAMILY==U_ASCII_FAMILY + return c - 'A'; +#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY + // EBCDIC: A-Z (26 letters) is split into three ranges A-I (9 letters), J-R (9), S-Z (8). + // https://en.wikipedia.org/wiki/EBCDIC_037#Codepage_layout + if (c <= 'I') { return c - 'A'; } // A-I --> 0-8 + if (c < 'J') { return -1; } + if (c <= 'R') { return c - 'J' + 9; } // J-R --> 9..17 + if (c < 'S') { return -1; } + return c - 'S' + 18; // S-Z --> 18..25 +#else +# error Unknown charset family! +#endif +} + +// Like U_UPPER_ORDINAL(x) but for lowercase and with validation. +// Returns 0..25 for a..z else a value outside 0..25. +inline int32_t uprv_lowerOrdinal(int32_t c) { +#if U_CHARSET_FAMILY==U_ASCII_FAMILY + return c - 'a'; +#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY + // EBCDIC: a-z (26 letters) is split into three ranges a-i (9 letters), j-r (9), s-z (8). + // https://en.wikipedia.org/wiki/EBCDIC_037#Codepage_layout + if (c <= 'i') { return c - 'a'; } // a-i --> 0-8 + if (c < 'j') { return -1; } + if (c <= 'r') { return c - 'j' + 9; } // j-r --> 9..17 + if (c < 's') { return -1; } + return c - 's' + 18; // s-z --> 18..25 +#else +# error Unknown charset family! +#endif +} + +U_NAMESPACE_END + +#endif + +/** + * Returns true if c == '@' is possible. + * The @ sign is variant, and the @ sign used on one + * EBCDIC machine won't be compiled the same way on other EBCDIC based machines. + * @internal + */ +U_CFUNC UBool +uprv_isEbcdicAtSign(char c); + +/** + * \def uprv_isAtSign + * Returns true if c == '@' is possible. + * For ASCII, checks for exactly '@'. For EBCDIC, calls uprv_isEbcdicAtSign(). + * @internal + */ +#if U_CHARSET_FAMILY==U_ASCII_FAMILY +# define uprv_isAtSign(c) ((c)=='@') +#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY +# define uprv_isAtSign(c) uprv_isEbcdicAtSign(c) +#else +# error Unknown charset family! +#endif + +/** + * Compare two EBCDIC invariant-character strings in ASCII order. + * @internal + */ +U_CAPI int32_t U_EXPORT2 +uprv_compareInvEbcdicAsAscii(const char *s1, const char *s2); + +/** + * \def uprv_compareInvCharsAsAscii + * Compare two invariant-character strings in ASCII order. + * @internal + */ +#if U_CHARSET_FAMILY==U_ASCII_FAMILY +# define uprv_compareInvCharsAsAscii(s1, s2) uprv_strcmp(s1, s2) +#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY +# define uprv_compareInvCharsAsAscii(s1, s2) uprv_compareInvEbcdicAsAscii(s1, s2) +#else +# error Unknown charset family! +#endif + +/** + * Converts an EBCDIC invariant character to ASCII. + * @internal + */ +U_CAPI char U_EXPORT2 +uprv_ebcdicToAscii(char c); + +/** + * \def uprv_invCharToAscii + * Converts an invariant character to ASCII. + * @internal + */ +#if U_CHARSET_FAMILY==U_ASCII_FAMILY +# define uprv_invCharToAscii(c) (c) +#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY +# define uprv_invCharToAscii(c) uprv_ebcdicToAscii(c) +#else +# error Unknown charset family! +#endif + +/** + * Converts an EBCDIC invariant character to lowercase ASCII. + * @internal + */ +U_CAPI char U_EXPORT2 +uprv_ebcdicToLowercaseAscii(char c); + +/** + * \def uprv_invCharToLowercaseAscii + * Converts an invariant character to lowercase ASCII. + * @internal + */ +#if U_CHARSET_FAMILY==U_ASCII_FAMILY +# define uprv_invCharToLowercaseAscii uprv_asciitolower +#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY +# define uprv_invCharToLowercaseAscii uprv_ebcdicToLowercaseAscii +#else +# error Unknown charset family! +#endif + +/** + * Copy EBCDIC to ASCII + * @internal + * @see uprv_strncpy + */ +U_CAPI uint8_t* U_EXPORT2 +uprv_aestrncpy(uint8_t *dst, const uint8_t *src, int32_t n); + + +/** + * Copy ASCII to EBCDIC + * @internal + * @see uprv_strncpy + */ +U_CAPI uint8_t* U_EXPORT2 +uprv_eastrncpy(uint8_t *dst, const uint8_t *src, int32_t n); + + + +#endif diff --git a/thirdparty/icu4c/common/uiter.cpp b/thirdparty/icu4c/common/uiter.cpp new file mode 100644 index 0000000000..b9252d81c2 --- /dev/null +++ b/thirdparty/icu4c/common/uiter.cpp @@ -0,0 +1,1108 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2002-2012, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: uiter.cpp +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2002jan18 +* created by: Markus W. Scherer +*/ + +#include "unicode/utypes.h" +#include "unicode/ustring.h" +#include "unicode/chariter.h" +#include "unicode/rep.h" +#include "unicode/uiter.h" +#include "unicode/utf.h" +#include "unicode/utf8.h" +#include "unicode/utf16.h" +#include "cstring.h" + +U_NAMESPACE_USE + +#define IS_EVEN(n) (((n)&1)==0) +#define IS_POINTER_EVEN(p) IS_EVEN((size_t)p) + +U_CDECL_BEGIN + +/* No-Op UCharIterator implementation for illegal input --------------------- */ + +static int32_t U_CALLCONV +noopGetIndex(UCharIterator * /*iter*/, UCharIteratorOrigin /*origin*/) { + return 0; +} + +static int32_t U_CALLCONV +noopMove(UCharIterator * /*iter*/, int32_t /*delta*/, UCharIteratorOrigin /*origin*/) { + return 0; +} + +static UBool U_CALLCONV +noopHasNext(UCharIterator * /*iter*/) { + return FALSE; +} + +static UChar32 U_CALLCONV +noopCurrent(UCharIterator * /*iter*/) { + return U_SENTINEL; +} + +static uint32_t U_CALLCONV +noopGetState(const UCharIterator * /*iter*/) { + return UITER_NO_STATE; +} + +static void U_CALLCONV +noopSetState(UCharIterator * /*iter*/, uint32_t /*state*/, UErrorCode *pErrorCode) { + *pErrorCode=U_UNSUPPORTED_ERROR; +} + +static const UCharIterator noopIterator={ + 0, 0, 0, 0, 0, 0, + noopGetIndex, + noopMove, + noopHasNext, + noopHasNext, + noopCurrent, + noopCurrent, + noopCurrent, + NULL, + noopGetState, + noopSetState +}; + +/* UCharIterator implementation for simple strings -------------------------- */ + +/* + * This is an implementation of a code unit (UChar) iterator + * for UChar * strings. + * + * The UCharIterator.context field holds a pointer to the string. + */ + +static int32_t U_CALLCONV +stringIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin) { + switch(origin) { + case UITER_ZERO: + return 0; + case UITER_START: + return iter->start; + case UITER_CURRENT: + return iter->index; + case UITER_LIMIT: + return iter->limit; + case UITER_LENGTH: + return iter->length; + default: + /* not a valid origin */ + /* Should never get here! */ + return -1; + } +} + +static int32_t U_CALLCONV +stringIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin) { + int32_t pos; + + switch(origin) { + case UITER_ZERO: + pos=delta; + break; + case UITER_START: + pos=iter->start+delta; + break; + case UITER_CURRENT: + pos=iter->index+delta; + break; + case UITER_LIMIT: + pos=iter->limit+delta; + break; + case UITER_LENGTH: + pos=iter->length+delta; + break; + default: + return -1; /* Error */ + } + + if(pos<iter->start) { + pos=iter->start; + } else if(pos>iter->limit) { + pos=iter->limit; + } + + return iter->index=pos; +} + +static UBool U_CALLCONV +stringIteratorHasNext(UCharIterator *iter) { + return iter->index<iter->limit; +} + +static UBool U_CALLCONV +stringIteratorHasPrevious(UCharIterator *iter) { + return iter->index>iter->start; +} + +static UChar32 U_CALLCONV +stringIteratorCurrent(UCharIterator *iter) { + if(iter->index<iter->limit) { + return ((const UChar *)(iter->context))[iter->index]; + } else { + return U_SENTINEL; + } +} + +static UChar32 U_CALLCONV +stringIteratorNext(UCharIterator *iter) { + if(iter->index<iter->limit) { + return ((const UChar *)(iter->context))[iter->index++]; + } else { + return U_SENTINEL; + } +} + +static UChar32 U_CALLCONV +stringIteratorPrevious(UCharIterator *iter) { + if(iter->index>iter->start) { + return ((const UChar *)(iter->context))[--iter->index]; + } else { + return U_SENTINEL; + } +} + +static uint32_t U_CALLCONV +stringIteratorGetState(const UCharIterator *iter) { + return (uint32_t)iter->index; +} + +static void U_CALLCONV +stringIteratorSetState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode) { + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + /* do nothing */ + } else if(iter==NULL) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + } else if((int32_t)state<iter->start || iter->limit<(int32_t)state) { + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + } else { + iter->index=(int32_t)state; + } +} + +static const UCharIterator stringIterator={ + 0, 0, 0, 0, 0, 0, + stringIteratorGetIndex, + stringIteratorMove, + stringIteratorHasNext, + stringIteratorHasPrevious, + stringIteratorCurrent, + stringIteratorNext, + stringIteratorPrevious, + NULL, + stringIteratorGetState, + stringIteratorSetState +}; + +U_CAPI void U_EXPORT2 +uiter_setString(UCharIterator *iter, const UChar *s, int32_t length) { + if(iter!=0) { + if(s!=0 && length>=-1) { + *iter=stringIterator; + iter->context=s; + if(length>=0) { + iter->length=length; + } else { + iter->length=u_strlen(s); + } + iter->limit=iter->length; + } else { + *iter=noopIterator; + } + } +} + +/* UCharIterator implementation for UTF-16BE strings ------------------------ */ + +/* + * This is an implementation of a code unit (UChar) iterator + * for UTF-16BE strings, i.e., strings in byte-vectors where + * each UChar is stored as a big-endian pair of bytes. + * + * The UCharIterator.context field holds a pointer to the string. + * Everything works just like with a normal UChar iterator (uiter_setString), + * except that UChars are assembled from byte pairs. + */ + +/* internal helper function */ +static inline UChar32 +utf16BEIteratorGet(UCharIterator *iter, int32_t index) { + const uint8_t *p=(const uint8_t *)iter->context; + return ((UChar)p[2*index]<<8)|(UChar)p[2*index+1]; +} + +static UChar32 U_CALLCONV +utf16BEIteratorCurrent(UCharIterator *iter) { + int32_t index; + + if((index=iter->index)<iter->limit) { + return utf16BEIteratorGet(iter, index); + } else { + return U_SENTINEL; + } +} + +static UChar32 U_CALLCONV +utf16BEIteratorNext(UCharIterator *iter) { + int32_t index; + + if((index=iter->index)<iter->limit) { + iter->index=index+1; + return utf16BEIteratorGet(iter, index); + } else { + return U_SENTINEL; + } +} + +static UChar32 U_CALLCONV +utf16BEIteratorPrevious(UCharIterator *iter) { + int32_t index; + + if((index=iter->index)>iter->start) { + iter->index=--index; + return utf16BEIteratorGet(iter, index); + } else { + return U_SENTINEL; + } +} + +static const UCharIterator utf16BEIterator={ + 0, 0, 0, 0, 0, 0, + stringIteratorGetIndex, + stringIteratorMove, + stringIteratorHasNext, + stringIteratorHasPrevious, + utf16BEIteratorCurrent, + utf16BEIteratorNext, + utf16BEIteratorPrevious, + NULL, + stringIteratorGetState, + stringIteratorSetState +}; + +/* + * Count the number of UChars in a UTF-16BE string before a terminating UChar NUL, + * i.e., before a pair of 0 bytes where the first 0 byte is at an even + * offset from s. + */ +static int32_t +utf16BE_strlen(const char *s) { + if(IS_POINTER_EVEN(s)) { + /* + * even-aligned, call u_strlen(s) + * we are probably on a little-endian machine, but searching for UChar NUL + * does not care about endianness + */ + return u_strlen((const UChar *)s); + } else { + /* odd-aligned, search for pair of 0 bytes */ + const char *p=s; + + while(!(*p==0 && p[1]==0)) { + p+=2; + } + return (int32_t)((p-s)/2); + } +} + +U_CAPI void U_EXPORT2 +uiter_setUTF16BE(UCharIterator *iter, const char *s, int32_t length) { + if(iter!=NULL) { + /* allow only even-length strings (the input length counts bytes) */ + if(s!=NULL && (length==-1 || (length>=0 && IS_EVEN(length)))) { + /* length/=2, except that >>=1 also works for -1 (-1/2==0, -1>>1==-1) */ + length>>=1; + + if(U_IS_BIG_ENDIAN && IS_POINTER_EVEN(s)) { + /* big-endian machine and 2-aligned UTF-16BE string: use normal UChar iterator */ + uiter_setString(iter, (const UChar *)s, length); + return; + } + + *iter=utf16BEIterator; + iter->context=s; + if(length>=0) { + iter->length=length; + } else { + iter->length=utf16BE_strlen(s); + } + iter->limit=iter->length; + } else { + *iter=noopIterator; + } + } +} + +/* UCharIterator wrapper around CharacterIterator --------------------------- */ + +/* + * This is wrapper code around a C++ CharacterIterator to + * look like a C UCharIterator. + * + * The UCharIterator.context field holds a pointer to the CharacterIterator. + */ + +static int32_t U_CALLCONV +characterIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin) { + switch(origin) { + case UITER_ZERO: + return 0; + case UITER_START: + return ((CharacterIterator *)(iter->context))->startIndex(); + case UITER_CURRENT: + return ((CharacterIterator *)(iter->context))->getIndex(); + case UITER_LIMIT: + return ((CharacterIterator *)(iter->context))->endIndex(); + case UITER_LENGTH: + return ((CharacterIterator *)(iter->context))->getLength(); + default: + /* not a valid origin */ + /* Should never get here! */ + return -1; + } +} + +static int32_t U_CALLCONV +characterIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin) { + switch(origin) { + case UITER_ZERO: + ((CharacterIterator *)(iter->context))->setIndex(delta); + return ((CharacterIterator *)(iter->context))->getIndex(); + case UITER_START: + case UITER_CURRENT: + case UITER_LIMIT: + return ((CharacterIterator *)(iter->context))->move(delta, (CharacterIterator::EOrigin)origin); + case UITER_LENGTH: + ((CharacterIterator *)(iter->context))->setIndex(((CharacterIterator *)(iter->context))->getLength()+delta); + return ((CharacterIterator *)(iter->context))->getIndex(); + default: + /* not a valid origin */ + /* Should never get here! */ + return -1; + } +} + +static UBool U_CALLCONV +characterIteratorHasNext(UCharIterator *iter) { + return ((CharacterIterator *)(iter->context))->hasNext(); +} + +static UBool U_CALLCONV +characterIteratorHasPrevious(UCharIterator *iter) { + return ((CharacterIterator *)(iter->context))->hasPrevious(); +} + +static UChar32 U_CALLCONV +characterIteratorCurrent(UCharIterator *iter) { + UChar32 c; + + c=((CharacterIterator *)(iter->context))->current(); + if(c!=0xffff || ((CharacterIterator *)(iter->context))->hasNext()) { + return c; + } else { + return U_SENTINEL; + } +} + +static UChar32 U_CALLCONV +characterIteratorNext(UCharIterator *iter) { + if(((CharacterIterator *)(iter->context))->hasNext()) { + return ((CharacterIterator *)(iter->context))->nextPostInc(); + } else { + return U_SENTINEL; + } +} + +static UChar32 U_CALLCONV +characterIteratorPrevious(UCharIterator *iter) { + if(((CharacterIterator *)(iter->context))->hasPrevious()) { + return ((CharacterIterator *)(iter->context))->previous(); + } else { + return U_SENTINEL; + } +} + +static uint32_t U_CALLCONV +characterIteratorGetState(const UCharIterator *iter) { + return ((CharacterIterator *)(iter->context))->getIndex(); +} + +static void U_CALLCONV +characterIteratorSetState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode) { + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + /* do nothing */ + } else if(iter==NULL || iter->context==NULL) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + } else if((int32_t)state<((CharacterIterator *)(iter->context))->startIndex() || ((CharacterIterator *)(iter->context))->endIndex()<(int32_t)state) { + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + } else { + ((CharacterIterator *)(iter->context))->setIndex((int32_t)state); + } +} + +static const UCharIterator characterIteratorWrapper={ + 0, 0, 0, 0, 0, 0, + characterIteratorGetIndex, + characterIteratorMove, + characterIteratorHasNext, + characterIteratorHasPrevious, + characterIteratorCurrent, + characterIteratorNext, + characterIteratorPrevious, + NULL, + characterIteratorGetState, + characterIteratorSetState +}; + +U_CAPI void U_EXPORT2 +uiter_setCharacterIterator(UCharIterator *iter, CharacterIterator *charIter) { + if(iter!=0) { + if(charIter!=0) { + *iter=characterIteratorWrapper; + iter->context=charIter; + } else { + *iter=noopIterator; + } + } +} + +/* UCharIterator wrapper around Replaceable --------------------------------- */ + +/* + * This is an implementation of a code unit (UChar) iterator + * based on a Replaceable object. + * + * The UCharIterator.context field holds a pointer to the Replaceable. + * UCharIterator.length and UCharIterator.index hold Replaceable.length() + * and the iteration index. + */ + +static UChar32 U_CALLCONV +replaceableIteratorCurrent(UCharIterator *iter) { + if(iter->index<iter->limit) { + return ((Replaceable *)(iter->context))->charAt(iter->index); + } else { + return U_SENTINEL; + } +} + +static UChar32 U_CALLCONV +replaceableIteratorNext(UCharIterator *iter) { + if(iter->index<iter->limit) { + return ((Replaceable *)(iter->context))->charAt(iter->index++); + } else { + return U_SENTINEL; + } +} + +static UChar32 U_CALLCONV +replaceableIteratorPrevious(UCharIterator *iter) { + if(iter->index>iter->start) { + return ((Replaceable *)(iter->context))->charAt(--iter->index); + } else { + return U_SENTINEL; + } +} + +static const UCharIterator replaceableIterator={ + 0, 0, 0, 0, 0, 0, + stringIteratorGetIndex, + stringIteratorMove, + stringIteratorHasNext, + stringIteratorHasPrevious, + replaceableIteratorCurrent, + replaceableIteratorNext, + replaceableIteratorPrevious, + NULL, + stringIteratorGetState, + stringIteratorSetState +}; + +U_CAPI void U_EXPORT2 +uiter_setReplaceable(UCharIterator *iter, const Replaceable *rep) { + if(iter!=0) { + if(rep!=0) { + *iter=replaceableIterator; + iter->context=rep; + iter->limit=iter->length=rep->length(); + } else { + *iter=noopIterator; + } + } +} + +/* UCharIterator implementation for UTF-8 strings --------------------------- */ + +/* + * Possible, probably necessary only for an implementation for arbitrary + * converters: + * Maintain a buffer (ring buffer?) for a piece of converted 16-bit text. + * This would require to turn reservedFn into a close function and + * to introduce a uiter_close(iter). + */ + +#define UITER_CNV_CAPACITY 16 + +/* + * Minimal implementation: + * Maintain a single-UChar buffer for an additional surrogate. + * The caller must not modify start and limit because they are used internally. + * + * Use UCharIterator fields as follows: + * context pointer to UTF-8 string + * length UTF-16 length of the string; -1 until lazy evaluation + * start current UTF-8 index + * index current UTF-16 index; may be -1="unknown" after setState() + * limit UTF-8 length of the string + * reservedField supplementary code point + * + * Since UCharIterator delivers 16-bit code units, the iteration can be + * currently in the middle of the byte sequence for a supplementary code point. + * In this case, reservedField will contain that code point and start will + * point to after the corresponding byte sequence. The UTF-16 index will be + * one less than what it would otherwise be corresponding to the UTF-8 index. + * Otherwise, reservedField will be 0. + */ + +/* + * Possible optimization for NUL-terminated UTF-8 and UTF-16 strings: + * Add implementations that do not call strlen() for iteration but check for NUL. + */ + +static int32_t U_CALLCONV +utf8IteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin) { + switch(origin) { + case UITER_ZERO: + case UITER_START: + return 0; + case UITER_CURRENT: + if(iter->index<0) { + /* the current UTF-16 index is unknown after setState(), count from the beginning */ + const uint8_t *s; + UChar32 c; + int32_t i, limit, index; + + s=(const uint8_t *)iter->context; + i=index=0; + limit=iter->start; /* count up to the UTF-8 index */ + while(i<limit) { + U8_NEXT_OR_FFFD(s, i, limit, c); + index+=U16_LENGTH(c); + } + + iter->start=i; /* just in case setState() did not get us to a code point boundary */ + if(i==iter->limit) { + iter->length=index; /* in case it was <0 or wrong */ + } + if(iter->reservedField!=0) { + --index; /* we are in the middle of a supplementary code point */ + } + iter->index=index; + } + return iter->index; + case UITER_LIMIT: + case UITER_LENGTH: + if(iter->length<0) { + const uint8_t *s; + UChar32 c; + int32_t i, limit, length; + + s=(const uint8_t *)iter->context; + if(iter->index<0) { + /* + * the current UTF-16 index is unknown after setState(), + * we must first count from the beginning to here + */ + i=length=0; + limit=iter->start; + + /* count from the beginning to the current index */ + while(i<limit) { + U8_NEXT_OR_FFFD(s, i, limit, c); + length+=U16_LENGTH(c); + } + + /* assume i==limit==iter->start, set the UTF-16 index */ + iter->start=i; /* just in case setState() did not get us to a code point boundary */ + iter->index= iter->reservedField!=0 ? length-1 : length; + } else { + i=iter->start; + length=iter->index; + if(iter->reservedField!=0) { + ++length; + } + } + + /* count from the current index to the end */ + limit=iter->limit; + while(i<limit) { + U8_NEXT_OR_FFFD(s, i, limit, c); + length+=U16_LENGTH(c); + } + iter->length=length; + } + return iter->length; + default: + /* not a valid origin */ + /* Should never get here! */ + return -1; + } +} + +static int32_t U_CALLCONV +utf8IteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin) { + const uint8_t *s; + UChar32 c; + int32_t pos; /* requested UTF-16 index */ + int32_t i; /* UTF-8 index */ + UBool havePos; + + /* calculate the requested UTF-16 index */ + switch(origin) { + case UITER_ZERO: + case UITER_START: + pos=delta; + havePos=TRUE; + /* iter->index<0 (unknown) is possible */ + break; + case UITER_CURRENT: + if(iter->index>=0) { + pos=iter->index+delta; + havePos=TRUE; + } else { + /* the current UTF-16 index is unknown after setState(), use only delta */ + pos=0; + havePos=FALSE; + } + break; + case UITER_LIMIT: + case UITER_LENGTH: + if(iter->length>=0) { + pos=iter->length+delta; + havePos=TRUE; + } else { + /* pin to the end, avoid counting the length */ + iter->index=-1; + iter->start=iter->limit; + iter->reservedField=0; + if(delta>=0) { + return UITER_UNKNOWN_INDEX; + } else { + /* the current UTF-16 index is unknown, use only delta */ + pos=0; + havePos=FALSE; + } + } + break; + default: + return -1; /* Error */ + } + + if(havePos) { + /* shortcuts: pinning to the edges of the string */ + if(pos<=0) { + iter->index=iter->start=iter->reservedField=0; + return 0; + } else if(iter->length>=0 && pos>=iter->length) { + iter->index=iter->length; + iter->start=iter->limit; + iter->reservedField=0; + return iter->index; + } + + /* minimize the number of U8_NEXT/PREV operations */ + if(iter->index<0 || pos<iter->index/2) { + /* go forward from the start instead of backward from the current index */ + iter->index=iter->start=iter->reservedField=0; + } else if(iter->length>=0 && (iter->length-pos)<(pos-iter->index)) { + /* + * if we have the UTF-16 index and length and the new position is + * closer to the end than the current index, + * then go backward from the end instead of forward from the current index + */ + iter->index=iter->length; + iter->start=iter->limit; + iter->reservedField=0; + } + + delta=pos-iter->index; + if(delta==0) { + return iter->index; /* nothing to do */ + } + } else { + /* move relative to unknown UTF-16 index */ + if(delta==0) { + return UITER_UNKNOWN_INDEX; /* nothing to do */ + } else if(-delta>=iter->start) { + /* moving backwards by more UChars than there are UTF-8 bytes, pin to 0 */ + iter->index=iter->start=iter->reservedField=0; + return 0; + } else if(delta>=(iter->limit-iter->start)) { + /* moving forward by more UChars than the remaining UTF-8 bytes, pin to the end */ + iter->index=iter->length; /* may or may not be <0 (unknown) */ + iter->start=iter->limit; + iter->reservedField=0; + return iter->index>=0 ? iter->index : (int32_t)UITER_UNKNOWN_INDEX; + } + } + + /* delta!=0 */ + + /* move towards the requested position, pin to the edges of the string */ + s=(const uint8_t *)iter->context; + pos=iter->index; /* could be <0 (unknown) */ + i=iter->start; + if(delta>0) { + /* go forward */ + int32_t limit=iter->limit; + if(iter->reservedField!=0) { + iter->reservedField=0; + ++pos; + --delta; + } + while(delta>0 && i<limit) { + U8_NEXT_OR_FFFD(s, i, limit, c); + if(c<=0xffff) { + ++pos; + --delta; + } else if(delta>=2) { + pos+=2; + delta-=2; + } else /* delta==1 */ { + /* stop in the middle of a supplementary code point */ + iter->reservedField=c; + ++pos; + break; /* delta=0; */ + } + } + if(i==limit) { + if(iter->length<0 && iter->index>=0) { + iter->length= iter->reservedField==0 ? pos : pos+1; + } else if(iter->index<0 && iter->length>=0) { + iter->index= iter->reservedField==0 ? iter->length : iter->length-1; + } + } + } else /* delta<0 */ { + /* go backward */ + if(iter->reservedField!=0) { + iter->reservedField=0; + i-=4; /* we stayed behind the supplementary code point; go before it now */ + --pos; + ++delta; + } + while(delta<0 && i>0) { + U8_PREV_OR_FFFD(s, 0, i, c); + if(c<=0xffff) { + --pos; + ++delta; + } else if(delta<=-2) { + pos-=2; + delta+=2; + } else /* delta==-1 */ { + /* stop in the middle of a supplementary code point */ + i+=4; /* back to behind this supplementary code point for consistent state */ + iter->reservedField=c; + --pos; + break; /* delta=0; */ + } + } + } + + iter->start=i; + if(iter->index>=0) { + return iter->index=pos; + } else { + /* we started with index<0 (unknown) so pos is bogus */ + if(i<=1) { + return iter->index=i; /* reached the beginning */ + } else { + /* we still don't know the UTF-16 index */ + return UITER_UNKNOWN_INDEX; + } + } +} + +static UBool U_CALLCONV +utf8IteratorHasNext(UCharIterator *iter) { + return iter->start<iter->limit || iter->reservedField!=0; +} + +static UBool U_CALLCONV +utf8IteratorHasPrevious(UCharIterator *iter) { + return iter->start>0; +} + +static UChar32 U_CALLCONV +utf8IteratorCurrent(UCharIterator *iter) { + if(iter->reservedField!=0) { + return U16_TRAIL(iter->reservedField); + } else if(iter->start<iter->limit) { + const uint8_t *s=(const uint8_t *)iter->context; + UChar32 c; + int32_t i=iter->start; + + U8_NEXT_OR_FFFD(s, i, iter->limit, c); + if(c<=0xffff) { + return c; + } else { + return U16_LEAD(c); + } + } else { + return U_SENTINEL; + } +} + +static UChar32 U_CALLCONV +utf8IteratorNext(UCharIterator *iter) { + int32_t index; + + if(iter->reservedField!=0) { + UChar trail=U16_TRAIL(iter->reservedField); + iter->reservedField=0; + if((index=iter->index)>=0) { + iter->index=index+1; + } + return trail; + } else if(iter->start<iter->limit) { + const uint8_t *s=(const uint8_t *)iter->context; + UChar32 c; + + U8_NEXT_OR_FFFD(s, iter->start, iter->limit, c); + if((index=iter->index)>=0) { + iter->index=++index; + if(iter->length<0 && iter->start==iter->limit) { + iter->length= c<=0xffff ? index : index+1; + } + } else if(iter->start==iter->limit && iter->length>=0) { + iter->index= c<=0xffff ? iter->length : iter->length-1; + } + if(c<=0xffff) { + return c; + } else { + iter->reservedField=c; + return U16_LEAD(c); + } + } else { + return U_SENTINEL; + } +} + +static UChar32 U_CALLCONV +utf8IteratorPrevious(UCharIterator *iter) { + int32_t index; + + if(iter->reservedField!=0) { + UChar lead=U16_LEAD(iter->reservedField); + iter->reservedField=0; + iter->start-=4; /* we stayed behind the supplementary code point; go before it now */ + if((index=iter->index)>0) { + iter->index=index-1; + } + return lead; + } else if(iter->start>0) { + const uint8_t *s=(const uint8_t *)iter->context; + UChar32 c; + + U8_PREV_OR_FFFD(s, 0, iter->start, c); + if((index=iter->index)>0) { + iter->index=index-1; + } else if(iter->start<=1) { + iter->index= c<=0xffff ? iter->start : iter->start+1; + } + if(c<=0xffff) { + return c; + } else { + iter->start+=4; /* back to behind this supplementary code point for consistent state */ + iter->reservedField=c; + return U16_TRAIL(c); + } + } else { + return U_SENTINEL; + } +} + +static uint32_t U_CALLCONV +utf8IteratorGetState(const UCharIterator *iter) { + uint32_t state=(uint32_t)(iter->start<<1); + if(iter->reservedField!=0) { + state|=1; + } + return state; +} + +static void U_CALLCONV +utf8IteratorSetState(UCharIterator *iter, + uint32_t state, + UErrorCode *pErrorCode) +{ + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + /* do nothing */ + } else if(iter==NULL) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + } else if(state==utf8IteratorGetState(iter)) { + /* setting to the current state: no-op */ + } else { + int32_t index=(int32_t)(state>>1); /* UTF-8 index */ + state&=1; /* 1 if in surrogate pair, must be index>=4 */ + + if((state==0 ? index<0 : index<4) || iter->limit<index) { + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + } else { + iter->start=index; /* restore UTF-8 byte index */ + if(index<=1) { + iter->index=index; + } else { + iter->index=-1; /* unknown UTF-16 index */ + } + if(state==0) { + iter->reservedField=0; + } else { + /* verified index>=4 above */ + UChar32 c; + U8_PREV_OR_FFFD((const uint8_t *)iter->context, 0, index, c); + if(c<=0xffff) { + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + } else { + iter->reservedField=c; + } + } + } + } +} + +static const UCharIterator utf8Iterator={ + 0, 0, 0, 0, 0, 0, + utf8IteratorGetIndex, + utf8IteratorMove, + utf8IteratorHasNext, + utf8IteratorHasPrevious, + utf8IteratorCurrent, + utf8IteratorNext, + utf8IteratorPrevious, + NULL, + utf8IteratorGetState, + utf8IteratorSetState +}; + +U_CAPI void U_EXPORT2 +uiter_setUTF8(UCharIterator *iter, const char *s, int32_t length) { + if(iter!=0) { + if(s!=0 && length>=-1) { + *iter=utf8Iterator; + iter->context=s; + if(length>=0) { + iter->limit=length; + } else { + iter->limit=(int32_t)uprv_strlen(s); + } + iter->length= iter->limit<=1 ? iter->limit : -1; + } else { + *iter=noopIterator; + } + } +} + +/* Helper functions --------------------------------------------------------- */ + +U_CAPI UChar32 U_EXPORT2 +uiter_current32(UCharIterator *iter) { + UChar32 c, c2; + + c=iter->current(iter); + if(U16_IS_SURROGATE(c)) { + if(U16_IS_SURROGATE_LEAD(c)) { + /* + * go to the next code unit + * we know that we are not at the limit because c!=U_SENTINEL + */ + iter->move(iter, 1, UITER_CURRENT); + if(U16_IS_TRAIL(c2=iter->current(iter))) { + c=U16_GET_SUPPLEMENTARY(c, c2); + } + + /* undo index movement */ + iter->move(iter, -1, UITER_CURRENT); + } else { + if(U16_IS_LEAD(c2=iter->previous(iter))) { + c=U16_GET_SUPPLEMENTARY(c2, c); + } + if(c2>=0) { + /* undo index movement */ + iter->move(iter, 1, UITER_CURRENT); + } + } + } + return c; +} + +U_CAPI UChar32 U_EXPORT2 +uiter_next32(UCharIterator *iter) { + UChar32 c, c2; + + c=iter->next(iter); + if(U16_IS_LEAD(c)) { + if(U16_IS_TRAIL(c2=iter->next(iter))) { + c=U16_GET_SUPPLEMENTARY(c, c2); + } else if(c2>=0) { + /* unmatched first surrogate, undo index movement */ + iter->move(iter, -1, UITER_CURRENT); + } + } + return c; +} + +U_CAPI UChar32 U_EXPORT2 +uiter_previous32(UCharIterator *iter) { + UChar32 c, c2; + + c=iter->previous(iter); + if(U16_IS_TRAIL(c)) { + if(U16_IS_LEAD(c2=iter->previous(iter))) { + c=U16_GET_SUPPLEMENTARY(c2, c); + } else if(c2>=0) { + /* unmatched second surrogate, undo index movement */ + iter->move(iter, 1, UITER_CURRENT); + } + } + return c; +} + +U_CAPI uint32_t U_EXPORT2 +uiter_getState(const UCharIterator *iter) { + if(iter==NULL || iter->getState==NULL) { + return UITER_NO_STATE; + } else { + return iter->getState(iter); + } +} + +U_CAPI void U_EXPORT2 +uiter_setState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode) { + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + /* do nothing */ + } else if(iter==NULL) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + } else if(iter->setState==NULL) { + *pErrorCode=U_UNSUPPORTED_ERROR; + } else { + iter->setState(iter, state, pErrorCode); + } +} + +U_CDECL_END diff --git a/thirdparty/icu4c/common/ulayout_props.h b/thirdparty/icu4c/common/ulayout_props.h new file mode 100644 index 0000000000..c0f028c713 --- /dev/null +++ b/thirdparty/icu4c/common/ulayout_props.h @@ -0,0 +1,46 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// ulayout_props.h +// created: 2019feb12 Markus W. Scherer + +#ifndef __ULAYOUT_PROPS_H__ +#define __ULAYOUT_PROPS_H__ + +#include "unicode/utypes.h" + +// file definitions ------------------------------------------------------------ + +#define ULAYOUT_DATA_NAME "ulayout" +#define ULAYOUT_DATA_TYPE "icu" + +// data format "Layo" +#define ULAYOUT_FMT_0 0x4c +#define ULAYOUT_FMT_1 0x61 +#define ULAYOUT_FMT_2 0x79 +#define ULAYOUT_FMT_3 0x6f + +// indexes into indexes[] +enum { + // Element 0 stores the length of the indexes[] array. + ULAYOUT_IX_INDEXES_LENGTH, + // Elements 1..7 store the tops of consecutive code point tries. + // No trie is stored if the difference between two of these is less than 16. + ULAYOUT_IX_INPC_TRIE_TOP, + ULAYOUT_IX_INSC_TRIE_TOP, + ULAYOUT_IX_VO_TRIE_TOP, + ULAYOUT_IX_RESERVED_TOP, + + ULAYOUT_IX_TRIES_TOP = 7, + + ULAYOUT_IX_MAX_VALUES = 9, + + // Length of indexes[]. Multiple of 4 to 16-align the tries. + ULAYOUT_IX_COUNT = 12 +}; + +constexpr int32_t ULAYOUT_MAX_INPC_SHIFT = 24; +constexpr int32_t ULAYOUT_MAX_INSC_SHIFT = 16; +constexpr int32_t ULAYOUT_MAX_VO_SHIFT = 8; + +#endif // __ULAYOUT_PROPS_H__ diff --git a/thirdparty/icu4c/common/ulist.cpp b/thirdparty/icu4c/common/ulist.cpp new file mode 100644 index 0000000000..c5180431c3 --- /dev/null +++ b/thirdparty/icu4c/common/ulist.cpp @@ -0,0 +1,270 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* Copyright (C) 2009-2016, International Business Machines +* Corporation and others. All Rights Reserved. +****************************************************************************** +*/ + +#include "ulist.h" +#include "cmemory.h" +#include "cstring.h" +#include "uenumimp.h" + +typedef struct UListNode UListNode; +struct UListNode { + void *data; + + UListNode *next; + UListNode *previous; + + /* When data is created with uprv_malloc, needs to be freed during deleteList function. */ + UBool forceDelete; +}; + +struct UList { + UListNode *curr; + UListNode *head; + UListNode *tail; + + int32_t size; +}; + +static void ulist_addFirstItem(UList *list, UListNode *newItem); + +U_CAPI UList *U_EXPORT2 ulist_createEmptyList(UErrorCode *status) { + UList *newList = NULL; + + if (U_FAILURE(*status)) { + return NULL; + } + + newList = (UList *)uprv_malloc(sizeof(UList)); + if (newList == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + + newList->curr = NULL; + newList->head = NULL; + newList->tail = NULL; + newList->size = 0; + + return newList; +} + +/* + * Function called by addItemEndList or addItemBeginList when the first item is added to the list. + * This function properly sets the pointers for the first item added. + */ +static void ulist_addFirstItem(UList *list, UListNode *newItem) { + newItem->next = NULL; + newItem->previous = NULL; + list->head = newItem; + list->tail = newItem; +} + +static void ulist_removeItem(UList *list, UListNode *p) { + if (p->previous == NULL) { + // p is the list head. + list->head = p->next; + } else { + p->previous->next = p->next; + } + if (p->next == NULL) { + // p is the list tail. + list->tail = p->previous; + } else { + p->next->previous = p->previous; + } + if (p == list->curr) { + list->curr = p->next; + } + --list->size; + if (p->forceDelete) { + uprv_free(p->data); + } + uprv_free(p); +} + +U_CAPI void U_EXPORT2 ulist_addItemEndList(UList *list, const void *data, UBool forceDelete, UErrorCode *status) { + UListNode *newItem = NULL; + + if (U_FAILURE(*status) || list == NULL || data == NULL) { + if (forceDelete) { + uprv_free((void *)data); + } + return; + } + + newItem = (UListNode *)uprv_malloc(sizeof(UListNode)); + if (newItem == NULL) { + if (forceDelete) { + uprv_free((void *)data); + } + *status = U_MEMORY_ALLOCATION_ERROR; + return; + } + newItem->data = (void *)(data); + newItem->forceDelete = forceDelete; + + if (list->size == 0) { + ulist_addFirstItem(list, newItem); + } else { + newItem->next = NULL; + newItem->previous = list->tail; + list->tail->next = newItem; + list->tail = newItem; + } + + list->size++; +} + +U_CAPI void U_EXPORT2 ulist_addItemBeginList(UList *list, const void *data, UBool forceDelete, UErrorCode *status) { + UListNode *newItem = NULL; + + if (U_FAILURE(*status) || list == NULL || data == NULL) { + if (forceDelete) { + uprv_free((void *)data); + } + return; + } + + newItem = (UListNode *)uprv_malloc(sizeof(UListNode)); + if (newItem == NULL) { + if (forceDelete) { + uprv_free((void *)data); + } + *status = U_MEMORY_ALLOCATION_ERROR; + return; + } + newItem->data = (void *)(data); + newItem->forceDelete = forceDelete; + + if (list->size == 0) { + ulist_addFirstItem(list, newItem); + } else { + newItem->previous = NULL; + newItem->next = list->head; + list->head->previous = newItem; + list->head = newItem; + } + + list->size++; +} + +U_CAPI UBool U_EXPORT2 ulist_containsString(const UList *list, const char *data, int32_t length) { + if (list != NULL) { + const UListNode *pointer; + for (pointer = list->head; pointer != NULL; pointer = pointer->next) { + if (length == (int32_t)uprv_strlen((const char *)pointer->data)) { + if (uprv_memcmp(data, pointer->data, length) == 0) { + return TRUE; + } + } + } + } + return FALSE; +} + +U_CAPI UBool U_EXPORT2 ulist_removeString(UList *list, const char *data) { + if (list != NULL) { + UListNode *pointer; + for (pointer = list->head; pointer != NULL; pointer = pointer->next) { + if (uprv_strcmp(data, (const char *)pointer->data) == 0) { + ulist_removeItem(list, pointer); + // Remove only the first occurrence, like Java LinkedList.remove(Object). + return TRUE; + } + } + } + return FALSE; +} + +U_CAPI void *U_EXPORT2 ulist_getNext(UList *list) { + UListNode *curr = NULL; + + if (list == NULL || list->curr == NULL) { + return NULL; + } + + curr = list->curr; + list->curr = curr->next; + + return curr->data; +} + +U_CAPI int32_t U_EXPORT2 ulist_getListSize(const UList *list) { + if (list != NULL) { + return list->size; + } + + return -1; +} + +U_CAPI void U_EXPORT2 ulist_resetList(UList *list) { + if (list != NULL) { + list->curr = list->head; + } +} + +U_CAPI void U_EXPORT2 ulist_deleteList(UList *list) { + UListNode *listHead = NULL; + + if (list != NULL) { + listHead = list->head; + while (listHead != NULL) { + UListNode *listPointer = listHead->next; + + if (listHead->forceDelete) { + uprv_free(listHead->data); + } + + uprv_free(listHead); + listHead = listPointer; + } + uprv_free(list); + list = NULL; + } +} + +U_CAPI void U_EXPORT2 ulist_close_keyword_values_iterator(UEnumeration *en) { + if (en != NULL) { + ulist_deleteList((UList *)(en->context)); + uprv_free(en); + } +} + +U_CAPI int32_t U_EXPORT2 ulist_count_keyword_values(UEnumeration *en, UErrorCode *status) { + if (U_FAILURE(*status)) { + return -1; + } + + return ulist_getListSize((UList *)(en->context)); +} + +U_CAPI const char * U_EXPORT2 ulist_next_keyword_value(UEnumeration *en, int32_t *resultLength, UErrorCode *status) { + const char *s; + if (U_FAILURE(*status)) { + return NULL; + } + + s = (const char *)ulist_getNext((UList *)(en->context)); + if (s != NULL && resultLength != NULL) { + *resultLength = static_cast<int32_t>(uprv_strlen(s)); + } + return s; +} + +U_CAPI void U_EXPORT2 ulist_reset_keyword_values_iterator(UEnumeration *en, UErrorCode *status) { + if (U_FAILURE(*status)) { + return ; + } + + ulist_resetList((UList *)(en->context)); +} + +U_CAPI UList * U_EXPORT2 ulist_getListFromEnum(UEnumeration *en) { + return (UList *)(en->context); +} diff --git a/thirdparty/icu4c/common/ulist.h b/thirdparty/icu4c/common/ulist.h new file mode 100644 index 0000000000..de58a4ad02 --- /dev/null +++ b/thirdparty/icu4c/common/ulist.h @@ -0,0 +1,50 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* Copyright (C) 2009-2016, International Business Machines +* Corporation and others. All Rights Reserved. +****************************************************************************** +*/ + +#ifndef ULIST_H +#define ULIST_H + +#include "unicode/utypes.h" +#include "unicode/uenum.h" + +struct UList; +typedef struct UList UList; + +U_CAPI UList * U_EXPORT2 ulist_createEmptyList(UErrorCode *status); + +U_CAPI void U_EXPORT2 ulist_addItemEndList(UList *list, const void *data, UBool forceDelete, UErrorCode *status); + +U_CAPI void U_EXPORT2 ulist_addItemBeginList(UList *list, const void *data, UBool forceDelete, UErrorCode *status); + +U_CAPI UBool U_EXPORT2 ulist_containsString(const UList *list, const char *data, int32_t length); + +U_CAPI UBool U_EXPORT2 ulist_removeString(UList *list, const char *data); + +U_CAPI void *U_EXPORT2 ulist_getNext(UList *list); + +U_CAPI int32_t U_EXPORT2 ulist_getListSize(const UList *list); + +U_CAPI void U_EXPORT2 ulist_resetList(UList *list); + +U_CAPI void U_EXPORT2 ulist_deleteList(UList *list); + +/* + * The following are for use when creating UEnumeration object backed by UList. + */ +U_CAPI void U_EXPORT2 ulist_close_keyword_values_iterator(UEnumeration *en); + +U_CAPI int32_t U_EXPORT2 ulist_count_keyword_values(UEnumeration *en, UErrorCode *status); + +U_CAPI const char * U_EXPORT2 ulist_next_keyword_value(UEnumeration* en, int32_t *resultLength, UErrorCode* status); + +U_CAPI void U_EXPORT2 ulist_reset_keyword_values_iterator(UEnumeration* en, UErrorCode* status); + +U_CAPI UList * U_EXPORT2 ulist_getListFromEnum(UEnumeration *en); + +#endif diff --git a/thirdparty/icu4c/common/uloc.cpp b/thirdparty/icu4c/common/uloc.cpp new file mode 100644 index 0000000000..522f33dbe2 --- /dev/null +++ b/thirdparty/icu4c/common/uloc.cpp @@ -0,0 +1,2176 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 1997-2016, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* +* File ULOC.CPP +* +* Modification History: +* +* Date Name Description +* 04/01/97 aliu Creation. +* 08/21/98 stephen JDK 1.2 sync +* 12/08/98 rtg New Locale implementation and C API +* 03/15/99 damiba overhaul. +* 04/06/99 stephen changed setDefault() to realloc and copy +* 06/14/99 stephen Changed calls to ures_open for new params +* 07/21/99 stephen Modified setDefault() to propagate to C++ +* 05/14/04 alan 7 years later: refactored, cleaned up, fixed bugs, +* brought canonicalization code into line with spec +*****************************************************************************/ + +/* + POSIX's locale format, from putil.c: [no spaces] + + ll [ _CC ] [ . MM ] [ @ VV] + + l = lang, C = ctry, M = charmap, V = variant +*/ + +#include "unicode/bytestream.h" +#include "unicode/errorcode.h" +#include "unicode/stringpiece.h" +#include "unicode/utypes.h" +#include "unicode/ustring.h" +#include "unicode/uloc.h" + +#include "bytesinkutil.h" +#include "putilimp.h" +#include "ustr_imp.h" +#include "ulocimp.h" +#include "umutex.h" +#include "cstring.h" +#include "cmemory.h" +#include "locmap.h" +#include "uarrsort.h" +#include "uenumimp.h" +#include "uassert.h" +#include "charstr.h" + +U_NAMESPACE_USE + +/* ### Declarations **************************************************/ + +/* Locale stuff from locid.cpp */ +U_CFUNC void locale_set_default(const char *id); +U_CFUNC const char *locale_get_default(void); + +/* ### Data tables **************************************************/ + +/** + * Table of language codes, both 2- and 3-letter, with preference + * given to 2-letter codes where possible. Includes 3-letter codes + * that lack a 2-letter equivalent. + * + * This list must be in sorted order. This list is returned directly + * to the user by some API. + * + * This list must be kept in sync with LANGUAGES_3, with corresponding + * entries matched. + * + * This table should be terminated with a NULL entry, followed by a + * second list, and another NULL entry. The first list is visible to + * user code when this array is returned by API. The second list + * contains codes we support, but do not expose through user API. + * + * Notes + * + * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to + * include the revisions up to 2001/7/27 *CWB* + * + * The 3 character codes are the terminology codes like RFC 3066. This + * is compatible with prior ICU codes + * + * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the + * table but now at the end of the table because 3 character codes are + * duplicates. This avoids bad searches going from 3 to 2 character + * codes. + * + * The range qaa-qtz is reserved for local use + */ +/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */ +/* ISO639 table version is 20150505 */ +/* Subsequent hand addition of selected languages */ +static const char * const LANGUAGES[] = { + "aa", "ab", "ace", "ach", "ada", "ady", "ae", "aeb", + "af", "afh", "agq", "ain", "ak", "akk", "akz", "ale", + "aln", "alt", "am", "an", "ang", "anp", "ar", "arc", + "arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "as", + "asa", "ase", "ast", "av", "avk", "awa", "ay", "az", + "ba", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj", + "be", "bej", "bem", "bew", "bez", "bfd", "bfq", "bg", + "bgn", "bho", "bi", "bik", "bin", "bjn", "bkm", "bla", + "bm", "bn", "bo", "bpy", "bqi", "br", "bra", "brh", + "brx", "bs", "bss", "bua", "bug", "bum", "byn", "byv", + "ca", "cad", "car", "cay", "cch", "ccp", "ce", "ceb", "cgg", + "ch", "chb", "chg", "chk", "chm", "chn", "cho", "chp", + "chr", "chy", "ckb", "co", "cop", "cps", "cr", "crh", + "cs", "csb", "cu", "cv", "cy", + "da", "dak", "dar", "dav", "de", "del", "den", "dgr", + "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "dv", + "dyo", "dyu", "dz", "dzg", + "ebu", "ee", "efi", "egl", "egy", "eka", "el", "elx", + "en", "enm", "eo", "es", "esu", "et", "eu", "ewo", + "ext", + "fa", "fan", "fat", "ff", "fi", "fil", "fit", "fj", + "fo", "fon", "fr", "frc", "frm", "fro", "frp", "frr", + "frs", "fur", "fy", + "ga", "gaa", "gag", "gan", "gay", "gba", "gbz", "gd", + "gez", "gil", "gl", "glk", "gmh", "gn", "goh", "gom", + "gon", "gor", "got", "grb", "grc", "gsw", "gu", "guc", + "gur", "guz", "gv", "gwi", + "ha", "hai", "hak", "haw", "he", "hi", "hif", "hil", + "hit", "hmn", "ho", "hr", "hsb", "hsn", "ht", "hu", + "hup", "hy", "hz", + "ia", "iba", "ibb", "id", "ie", "ig", "ii", "ik", + "ilo", "inh", "io", "is", "it", "iu", "izh", + "ja", "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut", + "jv", + "ka", "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd", + "kbl", "kcg", "kde", "kea", "ken", "kfo", "kg", "kgp", + "kha", "kho", "khq", "khw", "ki", "kiu", "kj", "kk", + "kkj", "kl", "kln", "km", "kmb", "kn", "ko", "koi", + "kok", "kos", "kpe", "kr", "krc", "kri", "krj", "krl", + "kru", "ks", "ksb", "ksf", "ksh", "ku", "kum", "kut", + "kv", "kw", "ky", + "la", "lad", "lag", "lah", "lam", "lb", "lez", "lfn", + "lg", "li", "lij", "liv", "lkt", "lmo", "ln", "lo", + "lol", "loz", "lrc", "lt", "ltg", "lu", "lua", "lui", + "lun", "luo", "lus", "luy", "lv", "lzh", "lzz", + "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde", + "mdf", "mdh", "mdr", "men", "mer", "mfe", "mg", "mga", + "mgh", "mgo", "mh", "mi", "mic", "min", "mis", "mk", + "ml", "mn", "mnc", "mni", "mo", + "moh", "mos", "mr", "mrj", + "ms", "mt", "mua", "mul", "mus", "mwl", "mwr", "mwv", + "my", "mye", "myv", "mzn", + "na", "nan", "nap", "naq", "nb", "nd", "nds", "ne", + "new", "ng", "nia", "niu", "njo", "nl", "nmg", "nn", + "nnh", "no", "nog", "non", "nov", "nqo", "nr", "nso", + "nus", "nv", "nwc", "ny", "nym", "nyn", "nyo", "nzi", + "oc", "oj", "om", "or", "os", "osa", "ota", + "pa", "pag", "pal", "pam", "pap", "pau", "pcd", "pcm", "pdc", + "pdt", "peo", "pfl", "phn", "pi", "pl", "pms", "pnt", + "pon", "prg", "pro", "ps", "pt", + "qu", "quc", "qug", + "raj", "rap", "rar", "rgn", "rif", "rm", "rn", "ro", + "rof", "rom", "rtm", "ru", "rue", "rug", "rup", + "rw", "rwk", + "sa", "sad", "sah", "sam", "saq", "sas", "sat", "saz", + "sba", "sbp", "sc", "scn", "sco", "sd", "sdc", "sdh", + "se", "see", "seh", "sei", "sel", "ses", "sg", "sga", + "sgs", "shi", "shn", "shu", "si", "sid", "sk", + "sl", "sli", "sly", "sm", "sma", "smj", "smn", "sms", + "sn", "snk", "so", "sog", "sq", "sr", "srn", "srr", + "ss", "ssy", "st", "stq", "su", "suk", "sus", "sux", + "sv", "sw", "swb", "swc", "syc", "syr", "szl", + "ta", "tcy", "te", "tem", "teo", "ter", "tet", "tg", + "th", "ti", "tig", "tiv", "tk", "tkl", "tkr", "tl", + "tlh", "tli", "tly", "tmh", "tn", "to", "tog", "tpi", + "tr", "tru", "trv", "ts", "tsd", "tsi", "tt", "ttt", + "tum", "tvl", "tw", "twq", "ty", "tyv", "tzm", + "udm", "ug", "uga", "uk", "umb", "und", "ur", "uz", + "vai", "ve", "vec", "vep", "vi", "vls", "vmf", "vo", + "vot", "vro", "vun", + "wa", "wae", "wal", "war", "was", "wbp", "wo", "wuu", + "xal", "xh", "xmf", "xog", + "yao", "yap", "yav", "ybb", "yi", "yo", "yrl", "yue", + "za", "zap", "zbl", "zea", "zen", "zgh", "zh", "zu", + "zun", "zxx", "zza", +NULL, + "in", "iw", "ji", "jw", "sh", /* obsolete language codes */ +NULL +}; + +static const char* const DEPRECATED_LANGUAGES[]={ + "in", "iw", "ji", "jw", NULL, NULL +}; +static const char* const REPLACEMENT_LANGUAGES[]={ + "id", "he", "yi", "jv", NULL, NULL +}; + +/** + * Table of 3-letter language codes. + * + * This is a lookup table used to convert 3-letter language codes to + * their 2-letter equivalent, where possible. It must be kept in sync + * with LANGUAGES. For all valid i, LANGUAGES[i] must refer to the + * same language as LANGUAGES_3[i]. The commented-out lines are + * copied from LANGUAGES to make eyeballing this baby easier. + * + * Where a 3-letter language code has no 2-letter equivalent, the + * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i]. + * + * This table should be terminated with a NULL entry, followed by a + * second list, and another NULL entry. The two lists correspond to + * the two lists in LANGUAGES. + */ +/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */ +/* ISO639 table version is 20150505 */ +/* Subsequent hand addition of selected languages */ +static const char * const LANGUAGES_3[] = { + "aar", "abk", "ace", "ach", "ada", "ady", "ave", "aeb", + "afr", "afh", "agq", "ain", "aka", "akk", "akz", "ale", + "aln", "alt", "amh", "arg", "ang", "anp", "ara", "arc", + "arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "asm", + "asa", "ase", "ast", "ava", "avk", "awa", "aym", "aze", + "bak", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj", + "bel", "bej", "bem", "bew", "bez", "bfd", "bfq", "bul", + "bgn", "bho", "bis", "bik", "bin", "bjn", "bkm", "bla", + "bam", "ben", "bod", "bpy", "bqi", "bre", "bra", "brh", + "brx", "bos", "bss", "bua", "bug", "bum", "byn", "byv", + "cat", "cad", "car", "cay", "cch", "ccp", "che", "ceb", "cgg", + "cha", "chb", "chg", "chk", "chm", "chn", "cho", "chp", + "chr", "chy", "ckb", "cos", "cop", "cps", "cre", "crh", + "ces", "csb", "chu", "chv", "cym", + "dan", "dak", "dar", "dav", "deu", "del", "den", "dgr", + "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "div", + "dyo", "dyu", "dzo", "dzg", + "ebu", "ewe", "efi", "egl", "egy", "eka", "ell", "elx", + "eng", "enm", "epo", "spa", "esu", "est", "eus", "ewo", + "ext", + "fas", "fan", "fat", "ful", "fin", "fil", "fit", "fij", + "fao", "fon", "fra", "frc", "frm", "fro", "frp", "frr", + "frs", "fur", "fry", + "gle", "gaa", "gag", "gan", "gay", "gba", "gbz", "gla", + "gez", "gil", "glg", "glk", "gmh", "grn", "goh", "gom", + "gon", "gor", "got", "grb", "grc", "gsw", "guj", "guc", + "gur", "guz", "glv", "gwi", + "hau", "hai", "hak", "haw", "heb", "hin", "hif", "hil", + "hit", "hmn", "hmo", "hrv", "hsb", "hsn", "hat", "hun", + "hup", "hye", "her", + "ina", "iba", "ibb", "ind", "ile", "ibo", "iii", "ipk", + "ilo", "inh", "ido", "isl", "ita", "iku", "izh", + "jpn", "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut", + "jav", + "kat", "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd", + "kbl", "kcg", "kde", "kea", "ken", "kfo", "kon", "kgp", + "kha", "kho", "khq", "khw", "kik", "kiu", "kua", "kaz", + "kkj", "kal", "kln", "khm", "kmb", "kan", "kor", "koi", + "kok", "kos", "kpe", "kau", "krc", "kri", "krj", "krl", + "kru", "kas", "ksb", "ksf", "ksh", "kur", "kum", "kut", + "kom", "cor", "kir", + "lat", "lad", "lag", "lah", "lam", "ltz", "lez", "lfn", + "lug", "lim", "lij", "liv", "lkt", "lmo", "lin", "lao", + "lol", "loz", "lrc", "lit", "ltg", "lub", "lua", "lui", + "lun", "luo", "lus", "luy", "lav", "lzh", "lzz", + "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde", + "mdf", "mdh", "mdr", "men", "mer", "mfe", "mlg", "mga", + "mgh", "mgo", "mah", "mri", "mic", "min", "mis", "mkd", + "mal", "mon", "mnc", "mni", "mol", + "moh", "mos", "mar", "mrj", + "msa", "mlt", "mua", "mul", "mus", "mwl", "mwr", "mwv", + "mya", "mye", "myv", "mzn", + "nau", "nan", "nap", "naq", "nob", "nde", "nds", "nep", + "new", "ndo", "nia", "niu", "njo", "nld", "nmg", "nno", + "nnh", "nor", "nog", "non", "nov", "nqo", "nbl", "nso", + "nus", "nav", "nwc", "nya", "nym", "nyn", "nyo", "nzi", + "oci", "oji", "orm", "ori", "oss", "osa", "ota", + "pan", "pag", "pal", "pam", "pap", "pau", "pcd", "pcm", "pdc", + "pdt", "peo", "pfl", "phn", "pli", "pol", "pms", "pnt", + "pon", "prg", "pro", "pus", "por", + "que", "quc", "qug", + "raj", "rap", "rar", "rgn", "rif", "roh", "run", "ron", + "rof", "rom", "rtm", "rus", "rue", "rug", "rup", + "kin", "rwk", + "san", "sad", "sah", "sam", "saq", "sas", "sat", "saz", + "sba", "sbp", "srd", "scn", "sco", "snd", "sdc", "sdh", + "sme", "see", "seh", "sei", "sel", "ses", "sag", "sga", + "sgs", "shi", "shn", "shu", "sin", "sid", "slk", + "slv", "sli", "sly", "smo", "sma", "smj", "smn", "sms", + "sna", "snk", "som", "sog", "sqi", "srp", "srn", "srr", + "ssw", "ssy", "sot", "stq", "sun", "suk", "sus", "sux", + "swe", "swa", "swb", "swc", "syc", "syr", "szl", + "tam", "tcy", "tel", "tem", "teo", "ter", "tet", "tgk", + "tha", "tir", "tig", "tiv", "tuk", "tkl", "tkr", "tgl", + "tlh", "tli", "tly", "tmh", "tsn", "ton", "tog", "tpi", + "tur", "tru", "trv", "tso", "tsd", "tsi", "tat", "ttt", + "tum", "tvl", "twi", "twq", "tah", "tyv", "tzm", + "udm", "uig", "uga", "ukr", "umb", "und", "urd", "uzb", + "vai", "ven", "vec", "vep", "vie", "vls", "vmf", "vol", + "vot", "vro", "vun", + "wln", "wae", "wal", "war", "was", "wbp", "wol", "wuu", + "xal", "xho", "xmf", "xog", + "yao", "yap", "yav", "ybb", "yid", "yor", "yrl", "yue", + "zha", "zap", "zbl", "zea", "zen", "zgh", "zho", "zul", + "zun", "zxx", "zza", +NULL, +/* "in", "iw", "ji", "jw", "sh", */ + "ind", "heb", "yid", "jaw", "srp", +NULL +}; + +/** + * Table of 2-letter country codes. + * + * This list must be in sorted order. This list is returned directly + * to the user by some API. + * + * This list must be kept in sync with COUNTRIES_3, with corresponding + * entries matched. + * + * This table should be terminated with a NULL entry, followed by a + * second list, and another NULL entry. The first list is visible to + * user code when this array is returned by API. The second list + * contains codes we support, but do not expose through user API. + * + * Notes: + * + * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per + * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added + * new codes keeping the old ones for compatibility updated to include + * 1999/12/03 revisions *CWB* + * + * RO(ROM) is now RO(ROU) according to + * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html + */ +static const char * const COUNTRIES[] = { + "AD", "AE", "AF", "AG", "AI", "AL", "AM", + "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", + "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", + "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV", + "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", + "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR", + "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DJ", "DK", + "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER", + "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", + "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", + "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", + "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", + "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS", + "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", + "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", + "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", + "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK", + "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", + "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", + "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", + "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", + "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", + "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA", + "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", + "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV", + "SX", "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ", + "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", + "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", + "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", + "WS", "YE", "YT", "ZA", "ZM", "ZW", +NULL, + "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR", /* obsolete country codes */ +NULL +}; + +static const char* const DEPRECATED_COUNTRIES[] = { + "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR", NULL, NULL /* deprecated country list */ +}; +static const char* const REPLACEMENT_COUNTRIES[] = { +/* "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR" */ + "CW", "MM", "RS", "DE", "BJ", "FR", "BF", "VU", "ZW", "RU", "TL", "GB", "VN", "YE", "RS", "CD", NULL, NULL /* replacement country codes */ +}; + +/** + * Table of 3-letter country codes. + * + * This is a lookup table used to convert 3-letter country codes to + * their 2-letter equivalent. It must be kept in sync with COUNTRIES. + * For all valid i, COUNTRIES[i] must refer to the same country as + * COUNTRIES_3[i]. The commented-out lines are copied from COUNTRIES + * to make eyeballing this baby easier. + * + * This table should be terminated with a NULL entry, followed by a + * second list, and another NULL entry. The two lists correspond to + * the two lists in COUNTRIES. + */ +static const char * const COUNTRIES_3[] = { +/* "AD", "AE", "AF", "AG", "AI", "AL", "AM", */ + "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM", +/* "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", */ + "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE", +/* "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", */ + "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI", +/* "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV", */ + "BEN", "BLM", "BMU", "BRN", "BOL", "BES", "BRA", "BHS", "BTN", "BVT", +/* "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", */ + "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG", +/* "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR", */ + "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI", +/* "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DJ", "DK", */ + "CUB", "CPV", "CUW", "CXR", "CYP", "CZE", "DEU", "DJI", "DNK", +/* "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER", */ + "DMA", "DOM", "DZA", "ECU", "EST", "EGY", "ESH", "ERI", +/* "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", */ + "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA", +/* "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", */ + "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL", +/* "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", */ + "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM", +/* "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", */ + "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN", +/* "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */ + "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL", +/* "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", */ + "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR", +/* "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", */ + "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO", +/* "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", */ + "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX", +/* "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK", */ + "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD", +/* "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", */ + "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR", +/* "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", */ + "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM", +/* "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", */ + "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL", +/* "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", */ + "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG", +/* "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", */ + "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT", +/* "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA", */ + "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU", +/* "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", */ + "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM", +/* "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV", */ + "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "SSD", "STP", "SLV", +/* "SX", "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ", */ + "SXM", "SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK", +/* "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", */ + "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV", +/* "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", */ + "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB", +/* "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", */ + "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF", +/* "WS", "YE", "YT", "ZA", "ZM", "ZW", */ + "WSM", "YEM", "MYT", "ZAF", "ZMB", "ZWE", +NULL, +/* "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR" */ + "ANT", "BUR", "SCG", "FXX", "ROM", "SUN", "TMP", "YMD", "YUG", "ZAR", +NULL +}; + +typedef struct CanonicalizationMap { + const char *id; /* input ID */ + const char *canonicalID; /* canonicalized output ID */ +} CanonicalizationMap; + +/** + * A map to canonicalize locale IDs. This handles a variety of + * different semantic kinds of transformations. + */ +static const CanonicalizationMap CANONICALIZE_MAP[] = { + { "art__LOJBAN", "jbo" }, /* registered name */ + { "hy__AREVELA", "hy" }, /* Registered IANA variant */ + { "hy__AREVMDA", "hyw" }, /* Registered IANA variant */ + { "zh__GUOYU", "zh" }, /* registered name */ + { "zh__HAKKA", "hak" }, /* registered name */ + { "zh__XIANG", "hsn" }, /* registered name */ + // subtags with 3 chars won't be treated as variants. + { "zh_GAN", "gan" }, /* registered name */ + { "zh_MIN_NAN", "nan" }, /* registered name */ + { "zh_WUU", "wuu" }, /* registered name */ + { "zh_YUE", "yue" }, /* registered name */ +}; + +/* ### BCP47 Conversion *******************************************/ +/* Test if the locale id has BCP47 u extension and does not have '@' */ +#define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1) +/* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */ +#define _ConvertBCP47(finalID, id, buffer, length,err) UPRV_BLOCK_MACRO_BEGIN { \ + if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 || \ + U_FAILURE(*err) || *err == U_STRING_NOT_TERMINATED_WARNING) { \ + finalID=id; \ + if (*err == U_STRING_NOT_TERMINATED_WARNING) { *err = U_BUFFER_OVERFLOW_ERROR; } \ + } else { \ + finalID=buffer; \ + } \ +} UPRV_BLOCK_MACRO_END +/* Gets the size of the shortest subtag in the given localeID. */ +static int32_t getShortestSubtagLength(const char *localeID) { + int32_t localeIDLength = static_cast<int32_t>(uprv_strlen(localeID)); + int32_t length = localeIDLength; + int32_t tmpLength = 0; + int32_t i; + UBool reset = TRUE; + + for (i = 0; i < localeIDLength; i++) { + if (localeID[i] != '_' && localeID[i] != '-') { + if (reset) { + tmpLength = 0; + reset = FALSE; + } + tmpLength++; + } else { + if (tmpLength != 0 && tmpLength < length) { + length = tmpLength; + } + reset = TRUE; + } + } + + return length; +} + +/* ### Keywords **************************************************/ +#define UPRV_ISDIGIT(c) (((c) >= '0') && ((c) <= '9')) +#define UPRV_ISALPHANUM(c) (uprv_isASCIILetter(c) || UPRV_ISDIGIT(c) ) +/* Punctuation/symbols allowed in legacy key values */ +#define UPRV_OK_VALUE_PUNCTUATION(c) ((c) == '_' || (c) == '-' || (c) == '+' || (c) == '/') + +#define ULOC_KEYWORD_BUFFER_LEN 25 +#define ULOC_MAX_NO_KEYWORDS 25 + +U_CAPI const char * U_EXPORT2 +locale_getKeywordsStart(const char *localeID) { + const char *result = NULL; + if((result = uprv_strchr(localeID, '@')) != NULL) { + return result; + } +#if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY) + else { + /* We do this because the @ sign is variant, and the @ sign used on one + EBCDIC machine won't be compiled the same way on other EBCDIC based + machines. */ + static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 }; + const uint8_t *charToFind = ebcdicSigns; + while(*charToFind) { + if((result = uprv_strchr(localeID, *charToFind)) != NULL) { + return result; + } + charToFind++; + } + } +#endif + return NULL; +} + +/** + * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN] + * @param keywordName incoming name to be canonicalized + * @param status return status (keyword too long) + * @return length of the keyword name + */ +static int32_t locale_canonKeywordName(char *buf, const char *keywordName, UErrorCode *status) +{ + int32_t keywordNameLen = 0; + + for (; *keywordName != 0; keywordName++) { + if (!UPRV_ISALPHANUM(*keywordName)) { + *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */ + return 0; + } + if (keywordNameLen < ULOC_KEYWORD_BUFFER_LEN - 1) { + buf[keywordNameLen++] = uprv_tolower(*keywordName); + } else { + /* keyword name too long for internal buffer */ + *status = U_INTERNAL_PROGRAM_ERROR; + return 0; + } + } + if (keywordNameLen == 0) { + *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name */ + return 0; + } + buf[keywordNameLen] = 0; /* terminate */ + + return keywordNameLen; +} + +typedef struct { + char keyword[ULOC_KEYWORD_BUFFER_LEN]; + int32_t keywordLen; + const char *valueStart; + int32_t valueLen; +} KeywordStruct; + +static int32_t U_CALLCONV +compareKeywordStructs(const void * /*context*/, const void *left, const void *right) { + const char* leftString = ((const KeywordStruct *)left)->keyword; + const char* rightString = ((const KeywordStruct *)right)->keyword; + return uprv_strcmp(leftString, rightString); +} + +U_CFUNC void +ulocimp_getKeywords(const char *localeID, + char prev, + ByteSink& sink, + UBool valuesToo, + UErrorCode *status) +{ + KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS]; + + int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS; + int32_t numKeywords = 0; + const char* pos = localeID; + const char* equalSign = NULL; + const char* semicolon = NULL; + int32_t i = 0, j, n; + + if(prev == '@') { /* start of keyword definition */ + /* we will grab pairs, trim spaces, lowercase keywords, sort and return */ + do { + UBool duplicate = FALSE; + /* skip leading spaces */ + while(*pos == ' ') { + pos++; + } + if (!*pos) { /* handle trailing "; " */ + break; + } + if(numKeywords == maxKeywords) { + *status = U_INTERNAL_PROGRAM_ERROR; + return; + } + equalSign = uprv_strchr(pos, '='); + semicolon = uprv_strchr(pos, ';'); + /* lack of '=' [foo@currency] is illegal */ + /* ';' before '=' [foo@currency;collation=pinyin] is illegal */ + if(!equalSign || (semicolon && semicolon<equalSign)) { + *status = U_INVALID_FORMAT_ERROR; + return; + } + /* need to normalize both keyword and keyword name */ + if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) { + /* keyword name too long for internal buffer */ + *status = U_INTERNAL_PROGRAM_ERROR; + return; + } + for(i = 0, n = 0; i < equalSign - pos; ++i) { + if (pos[i] != ' ') { + keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]); + } + } + + /* zero-length keyword is an error. */ + if (n == 0) { + *status = U_INVALID_FORMAT_ERROR; + return; + } + + keywordList[numKeywords].keyword[n] = 0; + keywordList[numKeywords].keywordLen = n; + /* now grab the value part. First we skip the '=' */ + equalSign++; + /* then we leading spaces */ + while(*equalSign == ' ') { + equalSign++; + } + + /* Premature end or zero-length value */ + if (!*equalSign || equalSign == semicolon) { + *status = U_INVALID_FORMAT_ERROR; + return; + } + + keywordList[numKeywords].valueStart = equalSign; + + pos = semicolon; + i = 0; + if(pos) { + while(*(pos - i - 1) == ' ') { + i++; + } + keywordList[numKeywords].valueLen = (int32_t)(pos - equalSign - i); + pos++; + } else { + i = (int32_t)uprv_strlen(equalSign); + while(i && equalSign[i-1] == ' ') { + i--; + } + keywordList[numKeywords].valueLen = i; + } + /* If this is a duplicate keyword, then ignore it */ + for (j=0; j<numKeywords; ++j) { + if (uprv_strcmp(keywordList[j].keyword, keywordList[numKeywords].keyword) == 0) { + duplicate = TRUE; + break; + } + } + if (!duplicate) { + ++numKeywords; + } + } while(pos); + + /* now we have a list of keywords */ + /* we need to sort it */ + uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, NULL, FALSE, status); + + /* Now construct the keyword part */ + for(i = 0; i < numKeywords; i++) { + sink.Append(keywordList[i].keyword, keywordList[i].keywordLen); + if(valuesToo) { + sink.Append("=", 1); + sink.Append(keywordList[i].valueStart, keywordList[i].valueLen); + if(i < numKeywords - 1) { + sink.Append(";", 1); + } + } else { + sink.Append("\0", 1); + } + } + } +} + +U_CAPI int32_t U_EXPORT2 +uloc_getKeywordValue(const char* localeID, + const char* keywordName, + char* buffer, int32_t bufferCapacity, + UErrorCode* status) +{ + if (U_FAILURE(*status)) { + return 0; + } + + CheckedArrayByteSink sink(buffer, bufferCapacity); + ulocimp_getKeywordValue(localeID, keywordName, sink, status); + + int32_t reslen = sink.NumberOfBytesAppended(); + + if (U_FAILURE(*status)) { + return reslen; + } + + if (sink.Overflowed()) { + *status = U_BUFFER_OVERFLOW_ERROR; + } else { + u_terminateChars(buffer, bufferCapacity, reslen, status); + } + + return reslen; +} + +U_CAPI void U_EXPORT2 +ulocimp_getKeywordValue(const char* localeID, + const char* keywordName, + icu::ByteSink& sink, + UErrorCode* status) +{ + const char* startSearchHere = NULL; + const char* nextSeparator = NULL; + char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN]; + char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN]; + + if(status && U_SUCCESS(*status) && localeID) { + char tempBuffer[ULOC_FULLNAME_CAPACITY]; + const char* tmpLocaleID; + + if (keywordName == NULL || keywordName[0] == 0) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + locale_canonKeywordName(keywordNameBuffer, keywordName, status); + if(U_FAILURE(*status)) { + return; + } + + if (_hasBCP47Extension(localeID)) { + _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status); + } else { + tmpLocaleID=localeID; + } + + startSearchHere = locale_getKeywordsStart(tmpLocaleID); + if(startSearchHere == NULL) { + /* no keywords, return at once */ + return; + } + + /* find the first keyword */ + while(startSearchHere) { + const char* keyValueTail; + int32_t keyValueLen; + + startSearchHere++; /* skip @ or ; */ + nextSeparator = uprv_strchr(startSearchHere, '='); + if(!nextSeparator) { + *status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value */ + return; + } + /* strip leading & trailing spaces (TC decided to tolerate these) */ + while(*startSearchHere == ' ') { + startSearchHere++; + } + keyValueTail = nextSeparator; + while (keyValueTail > startSearchHere && *(keyValueTail-1) == ' ') { + keyValueTail--; + } + /* now keyValueTail points to first char after the keyName */ + /* copy & normalize keyName from locale */ + if (startSearchHere == keyValueTail) { + *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale */ + return; + } + keyValueLen = 0; + while (startSearchHere < keyValueTail) { + if (!UPRV_ISALPHANUM(*startSearchHere)) { + *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */ + return; + } + if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - 1) { + localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*startSearchHere++); + } else { + /* keyword name too long for internal buffer */ + *status = U_INTERNAL_PROGRAM_ERROR; + return; + } + } + localeKeywordNameBuffer[keyValueLen] = 0; /* terminate */ + + startSearchHere = uprv_strchr(nextSeparator, ';'); + + if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) { + /* current entry matches the keyword. */ + nextSeparator++; /* skip '=' */ + /* First strip leading & trailing spaces (TC decided to tolerate these) */ + while(*nextSeparator == ' ') { + nextSeparator++; + } + keyValueTail = (startSearchHere)? startSearchHere: nextSeparator + uprv_strlen(nextSeparator); + while(keyValueTail > nextSeparator && *(keyValueTail-1) == ' ') { + keyValueTail--; + } + /* Now copy the value, but check well-formedness */ + if (nextSeparator == keyValueTail) { + *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value name in passed-in locale */ + return; + } + while (nextSeparator < keyValueTail) { + if (!UPRV_ISALPHANUM(*nextSeparator) && !UPRV_OK_VALUE_PUNCTUATION(*nextSeparator)) { + *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */ + return; + } + /* Should we lowercase value to return here? Tests expect as-is. */ + sink.Append(nextSeparator++, 1); + } + return; + } + } + } +} + +U_CAPI int32_t U_EXPORT2 +uloc_setKeywordValue(const char* keywordName, + const char* keywordValue, + char* buffer, int32_t bufferCapacity, + UErrorCode* status) +{ + /* TODO: sorting. removal. */ + int32_t keywordNameLen; + int32_t keywordValueLen; + int32_t bufLen; + int32_t needLen = 0; + char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN]; + char keywordValueBuffer[ULOC_KEYWORDS_CAPACITY+1]; + char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN]; + int32_t rc; + char* nextSeparator = NULL; + char* nextEqualsign = NULL; + char* startSearchHere = NULL; + char* keywordStart = NULL; + CharString updatedKeysAndValues; + UBool handledInputKeyAndValue = FALSE; + char keyValuePrefix = '@'; + + if(U_FAILURE(*status)) { + return -1; + } + if (keywordName == NULL || keywordName[0] == 0 || bufferCapacity <= 1) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + bufLen = (int32_t)uprv_strlen(buffer); + if(bufferCapacity<bufLen) { + /* The capacity is less than the length?! Is this NULL terminated? */ + *status = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + keywordNameLen = locale_canonKeywordName(keywordNameBuffer, keywordName, status); + if(U_FAILURE(*status)) { + return 0; + } + + keywordValueLen = 0; + if(keywordValue) { + while (*keywordValue != 0) { + if (!UPRV_ISALPHANUM(*keywordValue) && !UPRV_OK_VALUE_PUNCTUATION(*keywordValue)) { + *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */ + return 0; + } + if (keywordValueLen < ULOC_KEYWORDS_CAPACITY) { + /* Should we force lowercase in value to set? */ + keywordValueBuffer[keywordValueLen++] = *keywordValue++; + } else { + /* keywordValue too long for internal buffer */ + *status = U_INTERNAL_PROGRAM_ERROR; + return 0; + } + } + } + keywordValueBuffer[keywordValueLen] = 0; /* terminate */ + + startSearchHere = (char*)locale_getKeywordsStart(buffer); + if(startSearchHere == NULL || (startSearchHere[1]==0)) { + if(keywordValueLen == 0) { /* no keywords = nothing to remove */ + return bufLen; + } + + needLen = bufLen+1+keywordNameLen+1+keywordValueLen; + if(startSearchHere) { /* had a single @ */ + needLen--; /* already had the @ */ + /* startSearchHere points at the @ */ + } else { + startSearchHere=buffer+bufLen; + } + if(needLen >= bufferCapacity) { + *status = U_BUFFER_OVERFLOW_ERROR; + return needLen; /* no change */ + } + *startSearchHere++ = '@'; + uprv_strcpy(startSearchHere, keywordNameBuffer); + startSearchHere += keywordNameLen; + *startSearchHere++ = '='; + uprv_strcpy(startSearchHere, keywordValueBuffer); + return needLen; + } /* end shortcut - no @ */ + + keywordStart = startSearchHere; + /* search for keyword */ + while(keywordStart) { + const char* keyValueTail; + int32_t keyValueLen; + + keywordStart++; /* skip @ or ; */ + nextEqualsign = uprv_strchr(keywordStart, '='); + if (!nextEqualsign) { + *status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value */ + return 0; + } + /* strip leading & trailing spaces (TC decided to tolerate these) */ + while(*keywordStart == ' ') { + keywordStart++; + } + keyValueTail = nextEqualsign; + while (keyValueTail > keywordStart && *(keyValueTail-1) == ' ') { + keyValueTail--; + } + /* now keyValueTail points to first char after the keyName */ + /* copy & normalize keyName from locale */ + if (keywordStart == keyValueTail) { + *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale */ + return 0; + } + keyValueLen = 0; + while (keywordStart < keyValueTail) { + if (!UPRV_ISALPHANUM(*keywordStart)) { + *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */ + return 0; + } + if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - 1) { + localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*keywordStart++); + } else { + /* keyword name too long for internal buffer */ + *status = U_INTERNAL_PROGRAM_ERROR; + return 0; + } + } + localeKeywordNameBuffer[keyValueLen] = 0; /* terminate */ + + nextSeparator = uprv_strchr(nextEqualsign, ';'); + + /* start processing the value part */ + nextEqualsign++; /* skip '=' */ + /* First strip leading & trailing spaces (TC decided to tolerate these) */ + while(*nextEqualsign == ' ') { + nextEqualsign++; + } + keyValueTail = (nextSeparator)? nextSeparator: nextEqualsign + uprv_strlen(nextEqualsign); + while(keyValueTail > nextEqualsign && *(keyValueTail-1) == ' ') { + keyValueTail--; + } + if (nextEqualsign == keyValueTail) { + *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value in passed-in locale */ + return 0; + } + + rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer); + if(rc == 0) { + /* Current entry matches the input keyword. Update the entry */ + if(keywordValueLen > 0) { /* updating a value */ + updatedKeysAndValues.append(keyValuePrefix, *status); + keyValuePrefix = ';'; /* for any subsequent key-value pair */ + updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status); + updatedKeysAndValues.append('=', *status); + updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status); + } /* else removing this entry, don't emit anything */ + handledInputKeyAndValue = TRUE; + } else { + /* input keyword sorts earlier than current entry, add before current entry */ + if (rc < 0 && keywordValueLen > 0 && !handledInputKeyAndValue) { + /* insert new entry at this location */ + updatedKeysAndValues.append(keyValuePrefix, *status); + keyValuePrefix = ';'; /* for any subsequent key-value pair */ + updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status); + updatedKeysAndValues.append('=', *status); + updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status); + handledInputKeyAndValue = TRUE; + } + /* copy the current entry */ + updatedKeysAndValues.append(keyValuePrefix, *status); + keyValuePrefix = ';'; /* for any subsequent key-value pair */ + updatedKeysAndValues.append(localeKeywordNameBuffer, keyValueLen, *status); + updatedKeysAndValues.append('=', *status); + updatedKeysAndValues.append(nextEqualsign, static_cast<int32_t>(keyValueTail-nextEqualsign), *status); + } + if (!nextSeparator && keywordValueLen > 0 && !handledInputKeyAndValue) { + /* append new entry at the end, it sorts later than existing entries */ + updatedKeysAndValues.append(keyValuePrefix, *status); + /* skip keyValuePrefix update, no subsequent key-value pair */ + updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status); + updatedKeysAndValues.append('=', *status); + updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status); + handledInputKeyAndValue = TRUE; + } + keywordStart = nextSeparator; + } /* end loop searching */ + + /* Any error from updatedKeysAndValues.append above would be internal and not due to + * problems with the passed-in locale. So if we did encounter problems with the + * passed-in locale above, those errors took precedence and overrode any error + * status from updatedKeysAndValues.append, and also caused a return of 0. If there + * are errors here they are from updatedKeysAndValues.append; they do cause an + * error return but the passed-in locale is unmodified and the original bufLen is + * returned. + */ + if (!handledInputKeyAndValue || U_FAILURE(*status)) { + /* if input key/value specified removal of a keyword not present in locale, or + * there was an error in CharString.append, leave original locale alone. */ + return bufLen; + } + + // needLen = length of the part before '@' + needLen = (int32_t)(startSearchHere - buffer); + return needLen + updatedKeysAndValues.extract( + startSearchHere, bufferCapacity - needLen, *status); +} + +/* ### ID parsing implementation **************************************************/ + +#define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I')) + +/*returns TRUE if one of the special prefixes is here (s=string) + 'x-' or 'i-' */ +#define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1])) + +/* Dot terminates it because of POSIX form where dot precedes the codepage + * except for variant + */ +#define _isTerminator(a) ((a==0)||(a=='.')||(a=='@')) + +/** + * Lookup 'key' in the array 'list'. The array 'list' should contain + * a NULL entry, followed by more entries, and a second NULL entry. + * + * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or + * COUNTRIES_3. + */ +static int16_t _findIndex(const char* const* list, const char* key) +{ + const char* const* anchor = list; + int32_t pass = 0; + + /* Make two passes through two NULL-terminated arrays at 'list' */ + while (pass++ < 2) { + while (*list) { + if (uprv_strcmp(key, *list) == 0) { + return (int16_t)(list - anchor); + } + list++; + } + ++list; /* skip final NULL *CWB*/ + } + return -1; +} + +U_CFUNC const char* +uloc_getCurrentCountryID(const char* oldID){ + int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID); + if (offset >= 0) { + return REPLACEMENT_COUNTRIES[offset]; + } + return oldID; +} +U_CFUNC const char* +uloc_getCurrentLanguageID(const char* oldID){ + int32_t offset = _findIndex(DEPRECATED_LANGUAGES, oldID); + if (offset >= 0) { + return REPLACEMENT_LANGUAGES[offset]; + } + return oldID; +} +/* + * the internal functions _getLanguage(), _getCountry(), _getVariant() + * avoid duplicating code to handle the earlier locale ID pieces + * in the functions for the later ones by + * setting the *pEnd pointer to where they stopped parsing + * + * TODO try to use this in Locale + */ +CharString U_EXPORT2 +ulocimp_getLanguage(const char *localeID, + const char **pEnd, + UErrorCode &status) { + CharString result; + + if (uprv_stricmp(localeID, "root") == 0) { + localeID += 4; + } else if (uprv_strnicmp(localeID, "und", 3) == 0 && + (localeID[3] == '\0' || + localeID[3] == '-' || + localeID[3] == '_' || + localeID[3] == '@')) { + localeID += 3; + } + + /* if it starts with i- or x- then copy that prefix */ + if(_isIDPrefix(localeID)) { + result.append((char)uprv_tolower(*localeID), status); + result.append('-', status); + localeID+=2; + } + + /* copy the language as far as possible and count its length */ + while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) { + result.append((char)uprv_tolower(*localeID), status); + localeID++; + } + + if(result.length()==3) { + /* convert 3 character code to 2 character code if possible *CWB*/ + int32_t offset = _findIndex(LANGUAGES_3, result.data()); + if(offset>=0) { + result.clear(); + result.append(LANGUAGES[offset], status); + } + } + + if(pEnd!=NULL) { + *pEnd=localeID; + } + + return result; +} + +CharString U_EXPORT2 +ulocimp_getScript(const char *localeID, + const char **pEnd, + UErrorCode &status) { + CharString result; + int32_t idLen = 0; + + if (pEnd != NULL) { + *pEnd = localeID; + } + + /* copy the second item as far as possible and count its length */ + while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen]) + && uprv_isASCIILetter(localeID[idLen])) { + idLen++; + } + + /* If it's exactly 4 characters long, then it's a script and not a country. */ + if (idLen == 4) { + int32_t i; + if (pEnd != NULL) { + *pEnd = localeID+idLen; + } + if (idLen >= 1) { + result.append((char)uprv_toupper(*(localeID++)), status); + } + for (i = 1; i < idLen; i++) { + result.append((char)uprv_tolower(*(localeID++)), status); + } + } + + return result; +} + +CharString U_EXPORT2 +ulocimp_getCountry(const char *localeID, + const char **pEnd, + UErrorCode &status) { + CharString result; + int32_t idLen=0; + + /* copy the country as far as possible and count its length */ + while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) { + result.append((char)uprv_toupper(localeID[idLen]), status); + idLen++; + } + + /* the country should be either length 2 or 3 */ + if (idLen == 2 || idLen == 3) { + /* convert 3 character code to 2 character code if possible *CWB*/ + if(idLen==3) { + int32_t offset = _findIndex(COUNTRIES_3, result.data()); + if(offset>=0) { + result.clear(); + result.append(COUNTRIES[offset], status); + } + } + localeID+=idLen; + } else { + result.clear(); + } + + if(pEnd!=NULL) { + *pEnd=localeID; + } + + return result; +} + +/** + * @param needSeparator if true, then add leading '_' if any variants + * are added to 'variant' + */ +static void +_getVariant(const char *localeID, + char prev, + ByteSink& sink, + UBool needSeparator) { + UBool hasVariant = FALSE; + + /* get one or more variant tags and separate them with '_' */ + if(_isIDSeparator(prev)) { + /* get a variant string after a '-' or '_' */ + while(!_isTerminator(*localeID)) { + if (needSeparator) { + sink.Append("_", 1); + needSeparator = FALSE; + } + char c = (char)uprv_toupper(*localeID); + if (c == '-') c = '_'; + sink.Append(&c, 1); + hasVariant = TRUE; + localeID++; + } + } + + /* if there is no variant tag after a '-' or '_' then look for '@' */ + if(!hasVariant) { + if(prev=='@') { + /* keep localeID */ + } else if((localeID=locale_getKeywordsStart(localeID))!=NULL) { + ++localeID; /* point after the '@' */ + } else { + return; + } + while(!_isTerminator(*localeID)) { + if (needSeparator) { + sink.Append("_", 1); + needSeparator = FALSE; + } + char c = (char)uprv_toupper(*localeID); + if (c == '-' || c == ',') c = '_'; + sink.Append(&c, 1); + localeID++; + } + } +} + +/* Keyword enumeration */ + +typedef struct UKeywordsContext { + char* keywords; + char* current; +} UKeywordsContext; + +U_CDECL_BEGIN + +static void U_CALLCONV +uloc_kw_closeKeywords(UEnumeration *enumerator) { + uprv_free(((UKeywordsContext *)enumerator->context)->keywords); + uprv_free(enumerator->context); + uprv_free(enumerator); +} + +static int32_t U_CALLCONV +uloc_kw_countKeywords(UEnumeration *en, UErrorCode * /*status*/) { + char *kw = ((UKeywordsContext *)en->context)->keywords; + int32_t result = 0; + while(*kw) { + result++; + kw += uprv_strlen(kw)+1; + } + return result; +} + +static const char * U_CALLCONV +uloc_kw_nextKeyword(UEnumeration* en, + int32_t* resultLength, + UErrorCode* /*status*/) { + const char* result = ((UKeywordsContext *)en->context)->current; + int32_t len = 0; + if(*result) { + len = (int32_t)uprv_strlen(((UKeywordsContext *)en->context)->current); + ((UKeywordsContext *)en->context)->current += len+1; + } else { + result = NULL; + } + if (resultLength) { + *resultLength = len; + } + return result; +} + +static void U_CALLCONV +uloc_kw_resetKeywords(UEnumeration* en, + UErrorCode* /*status*/) { + ((UKeywordsContext *)en->context)->current = ((UKeywordsContext *)en->context)->keywords; +} + +U_CDECL_END + + +static const UEnumeration gKeywordsEnum = { + NULL, + NULL, + uloc_kw_closeKeywords, + uloc_kw_countKeywords, + uenum_unextDefault, + uloc_kw_nextKeyword, + uloc_kw_resetKeywords +}; + +U_CAPI UEnumeration* U_EXPORT2 +uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status) +{ + LocalMemory<UKeywordsContext> myContext; + LocalMemory<UEnumeration> result; + + if (U_FAILURE(*status)) { + return nullptr; + } + myContext.adoptInstead(static_cast<UKeywordsContext *>(uprv_malloc(sizeof(UKeywordsContext)))); + result.adoptInstead(static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration)))); + if (myContext.isNull() || result.isNull()) { + *status = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } + uprv_memcpy(result.getAlias(), &gKeywordsEnum, sizeof(UEnumeration)); + myContext->keywords = static_cast<char *>(uprv_malloc(keywordListSize+1)); + if (myContext->keywords == nullptr) { + *status = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } + uprv_memcpy(myContext->keywords, keywordList, keywordListSize); + myContext->keywords[keywordListSize] = 0; + myContext->current = myContext->keywords; + result->context = myContext.orphan(); + return result.orphan(); +} + +U_CAPI UEnumeration* U_EXPORT2 +uloc_openKeywords(const char* localeID, + UErrorCode* status) +{ + char tempBuffer[ULOC_FULLNAME_CAPACITY]; + const char* tmpLocaleID; + + if(status==NULL || U_FAILURE(*status)) { + return 0; + } + + if (_hasBCP47Extension(localeID)) { + _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status); + } else { + if (localeID==NULL) { + localeID=uloc_getDefault(); + } + tmpLocaleID=localeID; + } + + /* Skip the language */ + ulocimp_getLanguage(tmpLocaleID, &tmpLocaleID, *status); + if (U_FAILURE(*status)) { + return 0; + } + + if(_isIDSeparator(*tmpLocaleID)) { + const char *scriptID; + /* Skip the script if available */ + ulocimp_getScript(tmpLocaleID+1, &scriptID, *status); + if (U_FAILURE(*status)) { + return 0; + } + if(scriptID != tmpLocaleID+1) { + /* Found optional script */ + tmpLocaleID = scriptID; + } + /* Skip the Country */ + if (_isIDSeparator(*tmpLocaleID)) { + ulocimp_getCountry(tmpLocaleID+1, &tmpLocaleID, *status); + if (U_FAILURE(*status)) { + return 0; + } + } + } + + /* keywords are located after '@' */ + if((tmpLocaleID = locale_getKeywordsStart(tmpLocaleID)) != NULL) { + CharString keywords; + CharStringByteSink sink(&keywords); + ulocimp_getKeywords(tmpLocaleID+1, '@', sink, FALSE, status); + if (U_FAILURE(*status)) { + return NULL; + } + return uloc_openKeywordList(keywords.data(), keywords.length(), status); + } + return NULL; +} + + +/* bit-flags for 'options' parameter of _canonicalize */ +#define _ULOC_STRIP_KEYWORDS 0x2 +#define _ULOC_CANONICALIZE 0x1 + +#define OPTION_SET(options, mask) ((options & mask) != 0) + +static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'}; +#define I_DEFAULT_LENGTH UPRV_LENGTHOF(i_default) + +/** + * Canonicalize the given localeID, to level 1 or to level 2, + * depending on the options. To specify level 1, pass in options=0. + * To specify level 2, pass in options=_ULOC_CANONICALIZE. + * + * This is the code underlying uloc_getName and uloc_canonicalize. + */ +static void +_canonicalize(const char* localeID, + ByteSink& sink, + uint32_t options, + UErrorCode* err) { + int32_t j, fieldCount=0, scriptSize=0, variantSize=0; + char tempBuffer[ULOC_FULLNAME_CAPACITY]; + const char* origLocaleID; + const char* tmpLocaleID; + const char* keywordAssign = NULL; + const char* separatorIndicator = NULL; + + if (U_FAILURE(*err)) { + return; + } + + if (_hasBCP47Extension(localeID)) { + _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err); + } else { + if (localeID==NULL) { + localeID=uloc_getDefault(); + } + tmpLocaleID=localeID; + } + + origLocaleID=tmpLocaleID; + + /* get all pieces, one after another, and separate with '_' */ + CharString tag = ulocimp_getLanguage(tmpLocaleID, &tmpLocaleID, *err); + + if (tag.length() == I_DEFAULT_LENGTH && + uprv_strncmp(origLocaleID, i_default, I_DEFAULT_LENGTH) == 0) { + tag.clear(); + tag.append(uloc_getDefault(), *err); + } else if(_isIDSeparator(*tmpLocaleID)) { + const char *scriptID; + + ++fieldCount; + tag.append('_', *err); + + CharString script = ulocimp_getScript(tmpLocaleID+1, &scriptID, *err); + tag.append(script, *err); + scriptSize = script.length(); + if(scriptSize > 0) { + /* Found optional script */ + tmpLocaleID = scriptID; + ++fieldCount; + if (_isIDSeparator(*tmpLocaleID)) { + /* If there is something else, then we add the _ */ + tag.append('_', *err); + } + } + + if (_isIDSeparator(*tmpLocaleID)) { + const char *cntryID; + + CharString country = ulocimp_getCountry(tmpLocaleID+1, &cntryID, *err); + tag.append(country, *err); + if (!country.isEmpty()) { + /* Found optional country */ + tmpLocaleID = cntryID; + } + if(_isIDSeparator(*tmpLocaleID)) { + /* If there is something else, then we add the _ if we found country before. */ + if (!_isIDSeparator(*(tmpLocaleID+1))) { + ++fieldCount; + tag.append('_', *err); + } + + variantSize = -tag.length(); + { + CharStringByteSink s(&tag); + _getVariant(tmpLocaleID+1, *tmpLocaleID, s, FALSE); + } + variantSize += tag.length(); + if (variantSize > 0) { + tmpLocaleID += variantSize + 1; /* skip '_' and variant */ + } + } + } + } + + /* Copy POSIX-style charset specifier, if any [mr.utf8] */ + if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *tmpLocaleID == '.') { + UBool done = FALSE; + do { + char c = *tmpLocaleID; + switch (c) { + case 0: + case '@': + done = TRUE; + break; + default: + tag.append(c, *err); + ++tmpLocaleID; + break; + } + } while (!done); + } + + /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';' + After this, tmpLocaleID either points to '@' or is NULL */ + if ((tmpLocaleID=locale_getKeywordsStart(tmpLocaleID))!=NULL) { + keywordAssign = uprv_strchr(tmpLocaleID, '='); + separatorIndicator = uprv_strchr(tmpLocaleID, ';'); + } + + /* Copy POSIX-style variant, if any [mr@FOO] */ + if (!OPTION_SET(options, _ULOC_CANONICALIZE) && + tmpLocaleID != NULL && keywordAssign == NULL) { + for (;;) { + char c = *tmpLocaleID; + if (c == 0) { + break; + } + tag.append(c, *err); + ++tmpLocaleID; + } + } + + if (OPTION_SET(options, _ULOC_CANONICALIZE)) { + /* Handle @FOO variant if @ is present and not followed by = */ + if (tmpLocaleID!=NULL && keywordAssign==NULL) { + /* Add missing '_' if needed */ + if (fieldCount < 2 || (fieldCount < 3 && scriptSize > 0)) { + do { + tag.append('_', *err); + ++fieldCount; + } while(fieldCount<2); + } + + int32_t posixVariantSize = -tag.length(); + { + CharStringByteSink s(&tag); + _getVariant(tmpLocaleID+1, '@', s, (UBool)(variantSize > 0)); + } + posixVariantSize += tag.length(); + if (posixVariantSize > 0) { + variantSize += posixVariantSize; + } + } + + /* Look up the ID in the canonicalization map */ + for (j=0; j<UPRV_LENGTHOF(CANONICALIZE_MAP); j++) { + StringPiece id(CANONICALIZE_MAP[j].id); + if (tag == id) { + if (id.empty() && tmpLocaleID != NULL) { + break; /* Don't remap "" if keywords present */ + } + tag.clear(); + tag.append(CANONICALIZE_MAP[j].canonicalID, *err); + break; + } + } + } + + sink.Append(tag.data(), tag.length()); + + if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) { + if (tmpLocaleID!=NULL && keywordAssign!=NULL && + (!separatorIndicator || separatorIndicator > keywordAssign)) { + sink.Append("@", 1); + ++fieldCount; + ulocimp_getKeywords(tmpLocaleID+1, '@', sink, TRUE, err); + } + } +} + +/* ### ID parsing API **************************************************/ + +U_CAPI int32_t U_EXPORT2 +uloc_getParent(const char* localeID, + char* parent, + int32_t parentCapacity, + UErrorCode* err) +{ + const char *lastUnderscore; + int32_t i; + + if (U_FAILURE(*err)) + return 0; + + if (localeID == NULL) + localeID = uloc_getDefault(); + + lastUnderscore=uprv_strrchr(localeID, '_'); + if(lastUnderscore!=NULL) { + i=(int32_t)(lastUnderscore-localeID); + } else { + i=0; + } + + if (i > 0) { + if (uprv_strnicmp(localeID, "und_", 4) == 0) { + localeID += 3; + i -= 3; + uprv_memmove(parent, localeID, uprv_min(i, parentCapacity)); + } else if (parent != localeID) { + uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity)); + } + } + + return u_terminateChars(parent, parentCapacity, i, err); +} + +U_CAPI int32_t U_EXPORT2 +uloc_getLanguage(const char* localeID, + char* language, + int32_t languageCapacity, + UErrorCode* err) +{ + /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/ + + if (err==NULL || U_FAILURE(*err)) { + return 0; + } + + if(localeID==NULL) { + localeID=uloc_getDefault(); + } + + return ulocimp_getLanguage(localeID, NULL, *err).extract(language, languageCapacity, *err); +} + +U_CAPI int32_t U_EXPORT2 +uloc_getScript(const char* localeID, + char* script, + int32_t scriptCapacity, + UErrorCode* err) +{ + if(err==NULL || U_FAILURE(*err)) { + return 0; + } + + if(localeID==NULL) { + localeID=uloc_getDefault(); + } + + /* skip the language */ + ulocimp_getLanguage(localeID, &localeID, *err); + if (U_FAILURE(*err)) { + return 0; + } + + if(_isIDSeparator(*localeID)) { + return ulocimp_getScript(localeID+1, NULL, *err).extract(script, scriptCapacity, *err); + } + return u_terminateChars(script, scriptCapacity, 0, err); +} + +U_CAPI int32_t U_EXPORT2 +uloc_getCountry(const char* localeID, + char* country, + int32_t countryCapacity, + UErrorCode* err) +{ + if(err==NULL || U_FAILURE(*err)) { + return 0; + } + + if(localeID==NULL) { + localeID=uloc_getDefault(); + } + + /* Skip the language */ + ulocimp_getLanguage(localeID, &localeID, *err); + if (U_FAILURE(*err)) { + return 0; + } + + if(_isIDSeparator(*localeID)) { + const char *scriptID; + /* Skip the script if available */ + ulocimp_getScript(localeID+1, &scriptID, *err); + if (U_FAILURE(*err)) { + return 0; + } + if(scriptID != localeID+1) { + /* Found optional script */ + localeID = scriptID; + } + if(_isIDSeparator(*localeID)) { + return ulocimp_getCountry(localeID+1, NULL, *err).extract(country, countryCapacity, *err); + } + } + return u_terminateChars(country, countryCapacity, 0, err); +} + +U_CAPI int32_t U_EXPORT2 +uloc_getVariant(const char* localeID, + char* variant, + int32_t variantCapacity, + UErrorCode* err) +{ + char tempBuffer[ULOC_FULLNAME_CAPACITY]; + const char* tmpLocaleID; + int32_t i=0; + + if(err==NULL || U_FAILURE(*err)) { + return 0; + } + + if (_hasBCP47Extension(localeID)) { + _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err); + } else { + if (localeID==NULL) { + localeID=uloc_getDefault(); + } + tmpLocaleID=localeID; + } + + /* Skip the language */ + ulocimp_getLanguage(tmpLocaleID, &tmpLocaleID, *err); + if (U_FAILURE(*err)) { + return 0; + } + + if(_isIDSeparator(*tmpLocaleID)) { + const char *scriptID; + /* Skip the script if available */ + ulocimp_getScript(tmpLocaleID+1, &scriptID, *err); + if (U_FAILURE(*err)) { + return 0; + } + if(scriptID != tmpLocaleID+1) { + /* Found optional script */ + tmpLocaleID = scriptID; + } + /* Skip the Country */ + if (_isIDSeparator(*tmpLocaleID)) { + const char *cntryID; + ulocimp_getCountry(tmpLocaleID+1, &cntryID, *err); + if (U_FAILURE(*err)) { + return 0; + } + if (cntryID != tmpLocaleID+1) { + /* Found optional country */ + tmpLocaleID = cntryID; + } + if(_isIDSeparator(*tmpLocaleID)) { + /* If there was no country ID, skip a possible extra IDSeparator */ + if (tmpLocaleID != cntryID && _isIDSeparator(tmpLocaleID[1])) { + tmpLocaleID++; + } + + CheckedArrayByteSink sink(variant, variantCapacity); + _getVariant(tmpLocaleID+1, *tmpLocaleID, sink, FALSE); + + i = sink.NumberOfBytesAppended(); + + if (U_FAILURE(*err)) { + return i; + } + + if (sink.Overflowed()) { + *err = U_BUFFER_OVERFLOW_ERROR; + return i; + } + } + } + } + + return u_terminateChars(variant, variantCapacity, i, err); +} + +U_CAPI int32_t U_EXPORT2 +uloc_getName(const char* localeID, + char* name, + int32_t nameCapacity, + UErrorCode* err) +{ + if (U_FAILURE(*err)) { + return 0; + } + + CheckedArrayByteSink sink(name, nameCapacity); + ulocimp_getName(localeID, sink, err); + + int32_t reslen = sink.NumberOfBytesAppended(); + + if (U_FAILURE(*err)) { + return reslen; + } + + if (sink.Overflowed()) { + *err = U_BUFFER_OVERFLOW_ERROR; + } else { + u_terminateChars(name, nameCapacity, reslen, err); + } + + return reslen; +} + +U_CAPI void U_EXPORT2 +ulocimp_getName(const char* localeID, + ByteSink& sink, + UErrorCode* err) +{ + _canonicalize(localeID, sink, 0, err); +} + +U_CAPI int32_t U_EXPORT2 +uloc_getBaseName(const char* localeID, + char* name, + int32_t nameCapacity, + UErrorCode* err) +{ + if (U_FAILURE(*err)) { + return 0; + } + + CheckedArrayByteSink sink(name, nameCapacity); + ulocimp_getBaseName(localeID, sink, err); + + int32_t reslen = sink.NumberOfBytesAppended(); + + if (U_FAILURE(*err)) { + return reslen; + } + + if (sink.Overflowed()) { + *err = U_BUFFER_OVERFLOW_ERROR; + } else { + u_terminateChars(name, nameCapacity, reslen, err); + } + + return reslen; +} + +U_CAPI void U_EXPORT2 +ulocimp_getBaseName(const char* localeID, + ByteSink& sink, + UErrorCode* err) +{ + _canonicalize(localeID, sink, _ULOC_STRIP_KEYWORDS, err); +} + +U_CAPI int32_t U_EXPORT2 +uloc_canonicalize(const char* localeID, + char* name, + int32_t nameCapacity, + UErrorCode* err) +{ + if (U_FAILURE(*err)) { + return 0; + } + + CheckedArrayByteSink sink(name, nameCapacity); + ulocimp_canonicalize(localeID, sink, err); + + int32_t reslen = sink.NumberOfBytesAppended(); + + if (U_FAILURE(*err)) { + return reslen; + } + + if (sink.Overflowed()) { + *err = U_BUFFER_OVERFLOW_ERROR; + } else { + u_terminateChars(name, nameCapacity, reslen, err); + } + + return reslen; +} + +U_CAPI void U_EXPORT2 +ulocimp_canonicalize(const char* localeID, + ByteSink& sink, + UErrorCode* err) +{ + _canonicalize(localeID, sink, _ULOC_CANONICALIZE, err); +} + +U_CAPI const char* U_EXPORT2 +uloc_getISO3Language(const char* localeID) +{ + int16_t offset; + char lang[ULOC_LANG_CAPACITY]; + UErrorCode err = U_ZERO_ERROR; + + if (localeID == NULL) + { + localeID = uloc_getDefault(); + } + uloc_getLanguage(localeID, lang, ULOC_LANG_CAPACITY, &err); + if (U_FAILURE(err)) + return ""; + offset = _findIndex(LANGUAGES, lang); + if (offset < 0) + return ""; + return LANGUAGES_3[offset]; +} + +U_CAPI const char* U_EXPORT2 +uloc_getISO3Country(const char* localeID) +{ + int16_t offset; + char cntry[ULOC_LANG_CAPACITY]; + UErrorCode err = U_ZERO_ERROR; + + if (localeID == NULL) + { + localeID = uloc_getDefault(); + } + uloc_getCountry(localeID, cntry, ULOC_LANG_CAPACITY, &err); + if (U_FAILURE(err)) + return ""; + offset = _findIndex(COUNTRIES, cntry); + if (offset < 0) + return ""; + + return COUNTRIES_3[offset]; +} + +U_CAPI uint32_t U_EXPORT2 +uloc_getLCID(const char* localeID) +{ + UErrorCode status = U_ZERO_ERROR; + char langID[ULOC_FULLNAME_CAPACITY]; + uint32_t lcid = 0; + + /* Check for incomplete id. */ + if (!localeID || uprv_strlen(localeID) < 2) { + return 0; + } + + // First, attempt Windows platform lookup if available, but fall + // through to catch any special cases (ICU vs Windows name differences). + lcid = uprv_convertToLCIDPlatform(localeID, &status); + if (U_FAILURE(status)) { + return 0; + } + if (lcid > 0) { + // Windows found an LCID, return that + return lcid; + } + + uloc_getLanguage(localeID, langID, sizeof(langID), &status); + if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING) { + return 0; + } + + if (uprv_strchr(localeID, '@')) { + // uprv_convertToLCID does not support keywords other than collation. + // Remove all keywords except collation. + int32_t len; + char tmpLocaleID[ULOC_FULLNAME_CAPACITY]; + + CharString collVal; + { + CharStringByteSink sink(&collVal); + ulocimp_getKeywordValue(localeID, "collation", sink, &status); + } + + if (U_SUCCESS(status) && !collVal.isEmpty()) { + len = uloc_getBaseName(localeID, tmpLocaleID, + UPRV_LENGTHOF(tmpLocaleID) - 1, &status); + + if (U_SUCCESS(status) && len > 0) { + tmpLocaleID[len] = 0; + + len = uloc_setKeywordValue("collation", collVal.data(), tmpLocaleID, + UPRV_LENGTHOF(tmpLocaleID) - len - 1, &status); + + if (U_SUCCESS(status) && len > 0) { + tmpLocaleID[len] = 0; + return uprv_convertToLCID(langID, tmpLocaleID, &status); + } + } + } + + // fall through - all keywords are simply ignored + status = U_ZERO_ERROR; + } + + return uprv_convertToLCID(langID, localeID, &status); +} + +U_CAPI int32_t U_EXPORT2 +uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity, + UErrorCode *status) +{ + return uprv_convertToPosix(hostid, locale, localeCapacity, status); +} + +/* ### Default locale **************************************************/ + +U_CAPI const char* U_EXPORT2 +uloc_getDefault() +{ + return locale_get_default(); +} + +U_CAPI void U_EXPORT2 +uloc_setDefault(const char* newDefaultLocale, + UErrorCode* err) +{ + if (U_FAILURE(*err)) + return; + /* the error code isn't currently used for anything by this function*/ + + /* propagate change to C++ */ + locale_set_default(newDefaultLocale); +} + +/** + * Returns a list of all 2-letter language codes defined in ISO 639. This is a pointer + * to an array of pointers to arrays of char. All of these pointers are owned + * by ICU-- do not delete them, and do not write through them. The array is + * terminated with a null pointer. + */ +U_CAPI const char* const* U_EXPORT2 +uloc_getISOLanguages() +{ + return LANGUAGES; +} + +/** + * Returns a list of all 2-letter country codes defined in ISO 639. This is a + * pointer to an array of pointers to arrays of char. All of these pointers are + * owned by ICU-- do not delete them, and do not write through them. The array is + * terminated with a null pointer. + */ +U_CAPI const char* const* U_EXPORT2 +uloc_getISOCountries() +{ + return COUNTRIES; +} + +U_CAPI const char* U_EXPORT2 +uloc_toUnicodeLocaleKey(const char* keyword) +{ + const char* bcpKey = ulocimp_toBcpKey(keyword); + if (bcpKey == NULL && ultag_isUnicodeLocaleKey(keyword, -1)) { + // unknown keyword, but syntax is fine.. + return keyword; + } + return bcpKey; +} + +U_CAPI const char* U_EXPORT2 +uloc_toUnicodeLocaleType(const char* keyword, const char* value) +{ + const char* bcpType = ulocimp_toBcpType(keyword, value, NULL, NULL); + if (bcpType == NULL && ultag_isUnicodeLocaleType(value, -1)) { + // unknown keyword, but syntax is fine.. + return value; + } + return bcpType; +} + +static UBool +isWellFormedLegacyKey(const char* legacyKey) +{ + const char* p = legacyKey; + while (*p) { + if (!UPRV_ISALPHANUM(*p)) { + return FALSE; + } + p++; + } + return TRUE; +} + +static UBool +isWellFormedLegacyType(const char* legacyType) +{ + const char* p = legacyType; + int32_t alphaNumLen = 0; + while (*p) { + if (*p == '_' || *p == '/' || *p == '-') { + if (alphaNumLen == 0) { + return FALSE; + } + alphaNumLen = 0; + } else if (UPRV_ISALPHANUM(*p)) { + alphaNumLen++; + } else { + return FALSE; + } + p++; + } + return (alphaNumLen != 0); +} + +U_CAPI const char* U_EXPORT2 +uloc_toLegacyKey(const char* keyword) +{ + const char* legacyKey = ulocimp_toLegacyKey(keyword); + if (legacyKey == NULL) { + // Checks if the specified locale key is well-formed with the legacy locale syntax. + // + // Note: + // LDML/CLDR provides some definition of keyword syntax in + // * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and + // * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax + // Keys can only consist of [0-9a-zA-Z]. + if (isWellFormedLegacyKey(keyword)) { + return keyword; + } + } + return legacyKey; +} + +U_CAPI const char* U_EXPORT2 +uloc_toLegacyType(const char* keyword, const char* value) +{ + const char* legacyType = ulocimp_toLegacyType(keyword, value, NULL, NULL); + if (legacyType == NULL) { + // Checks if the specified locale type is well-formed with the legacy locale syntax. + // + // Note: + // LDML/CLDR provides some definition of keyword syntax in + // * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and + // * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax + // Values (types) can only consist of [0-9a-zA-Z], plus for legacy values + // we allow [/_-+] in the middle (e.g. "Etc/GMT+1", "Asia/Tel_Aviv") + if (isWellFormedLegacyType(value)) { + return value; + } + } + return legacyType; +} + +/*eof*/ diff --git a/thirdparty/icu4c/common/uloc_keytype.cpp b/thirdparty/icu4c/common/uloc_keytype.cpp new file mode 100644 index 0000000000..019da058cf --- /dev/null +++ b/thirdparty/icu4c/common/uloc_keytype.cpp @@ -0,0 +1,534 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 2014-2016, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +*/ +#include <algorithm> + +#include "unicode/utypes.h" +#include "unicode/unistr.h" +#include "unicode/uobject.h" + +#include "charstr.h" +#include "cmemory.h" +#include "cstring.h" +#include "uassert.h" +#include "ucln_cmn.h" +#include "uhash.h" +#include "umutex.h" +#include "uresimp.h" +#include "uvector.h" +#include "udataswp.h" /* for InvChar functions */ + +static UHashtable* gLocExtKeyMap = NULL; +static icu::UInitOnce gLocExtKeyMapInitOnce = U_INITONCE_INITIALIZER; + +// bit flags for special types +typedef enum { + SPECIALTYPE_NONE = 0, + SPECIALTYPE_CODEPOINTS = 1, + SPECIALTYPE_REORDER_CODE = 2, + SPECIALTYPE_RG_KEY_VALUE = 4 +} SpecialType; + +struct LocExtKeyData : public icu::UMemory { + const char* legacyId; + const char* bcpId; + icu::LocalUHashtablePointer typeMap; + uint32_t specialTypes; +}; + +struct LocExtType : public icu::UMemory { + const char* legacyId; + const char* bcpId; +}; + +static icu::MemoryPool<icu::CharString>* gKeyTypeStringPool = NULL; +static icu::MemoryPool<LocExtKeyData>* gLocExtKeyDataEntries = NULL; +static icu::MemoryPool<LocExtType>* gLocExtTypeEntries = NULL; + +U_CDECL_BEGIN + +static UBool U_CALLCONV +uloc_key_type_cleanup(void) { + if (gLocExtKeyMap != NULL) { + uhash_close(gLocExtKeyMap); + gLocExtKeyMap = NULL; + } + + delete gLocExtKeyDataEntries; + gLocExtKeyDataEntries = NULL; + + delete gLocExtTypeEntries; + gLocExtTypeEntries = NULL; + + delete gKeyTypeStringPool; + gKeyTypeStringPool = NULL; + + gLocExtKeyMapInitOnce.reset(); + return TRUE; +} + +U_CDECL_END + + +static void U_CALLCONV +initFromResourceBundle(UErrorCode& sts) { + U_NAMESPACE_USE + ucln_common_registerCleanup(UCLN_COMMON_LOCALE_KEY_TYPE, uloc_key_type_cleanup); + + gLocExtKeyMap = uhash_open(uhash_hashIChars, uhash_compareIChars, NULL, &sts); + + LocalUResourceBundlePointer keyTypeDataRes(ures_openDirect(NULL, "keyTypeData", &sts)); + LocalUResourceBundlePointer keyMapRes(ures_getByKey(keyTypeDataRes.getAlias(), "keyMap", NULL, &sts)); + LocalUResourceBundlePointer typeMapRes(ures_getByKey(keyTypeDataRes.getAlias(), "typeMap", NULL, &sts)); + + if (U_FAILURE(sts)) { + return; + } + + UErrorCode tmpSts = U_ZERO_ERROR; + LocalUResourceBundlePointer typeAliasRes(ures_getByKey(keyTypeDataRes.getAlias(), "typeAlias", NULL, &tmpSts)); + tmpSts = U_ZERO_ERROR; + LocalUResourceBundlePointer bcpTypeAliasRes(ures_getByKey(keyTypeDataRes.getAlias(), "bcpTypeAlias", NULL, &tmpSts)); + + // initialize pools storing dynamically allocated objects + gKeyTypeStringPool = new icu::MemoryPool<icu::CharString>; + if (gKeyTypeStringPool == NULL) { + sts = U_MEMORY_ALLOCATION_ERROR; + return; + } + gLocExtKeyDataEntries = new icu::MemoryPool<LocExtKeyData>; + if (gLocExtKeyDataEntries == NULL) { + sts = U_MEMORY_ALLOCATION_ERROR; + return; + } + gLocExtTypeEntries = new icu::MemoryPool<LocExtType>; + if (gLocExtTypeEntries == NULL) { + sts = U_MEMORY_ALLOCATION_ERROR; + return; + } + + // iterate through keyMap resource + LocalUResourceBundlePointer keyMapEntry; + + while (ures_hasNext(keyMapRes.getAlias())) { + keyMapEntry.adoptInstead(ures_getNextResource(keyMapRes.getAlias(), keyMapEntry.orphan(), &sts)); + if (U_FAILURE(sts)) { + break; + } + const char* legacyKeyId = ures_getKey(keyMapEntry.getAlias()); + UnicodeString uBcpKeyId = ures_getUnicodeString(keyMapEntry.getAlias(), &sts); + if (U_FAILURE(sts)) { + break; + } + + // empty value indicates that BCP key is same with the legacy key. + const char* bcpKeyId = legacyKeyId; + if (!uBcpKeyId.isEmpty()) { + icu::CharString* bcpKeyIdBuf = gKeyTypeStringPool->create(); + if (bcpKeyIdBuf == NULL) { + sts = U_MEMORY_ALLOCATION_ERROR; + break; + } + bcpKeyIdBuf->appendInvariantChars(uBcpKeyId, sts); + if (U_FAILURE(sts)) { + break; + } + bcpKeyId = bcpKeyIdBuf->data(); + } + + UBool isTZ = uprv_strcmp(legacyKeyId, "timezone") == 0; + + UHashtable* typeDataMap = uhash_open(uhash_hashIChars, uhash_compareIChars, NULL, &sts); + if (U_FAILURE(sts)) { + break; + } + uint32_t specialTypes = SPECIALTYPE_NONE; + + LocalUResourceBundlePointer typeAliasResByKey; + LocalUResourceBundlePointer bcpTypeAliasResByKey; + + if (typeAliasRes.isValid()) { + tmpSts = U_ZERO_ERROR; + typeAliasResByKey.adoptInstead(ures_getByKey(typeAliasRes.getAlias(), legacyKeyId, NULL, &tmpSts)); + if (U_FAILURE(tmpSts)) { + typeAliasResByKey.orphan(); + } + } + if (bcpTypeAliasRes.isValid()) { + tmpSts = U_ZERO_ERROR; + bcpTypeAliasResByKey.adoptInstead(ures_getByKey(bcpTypeAliasRes.getAlias(), bcpKeyId, NULL, &tmpSts)); + if (U_FAILURE(tmpSts)) { + bcpTypeAliasResByKey.orphan(); + } + } + + // look up type map for the key, and walk through the mapping data + tmpSts = U_ZERO_ERROR; + LocalUResourceBundlePointer typeMapResByKey(ures_getByKey(typeMapRes.getAlias(), legacyKeyId, NULL, &tmpSts)); + if (U_FAILURE(tmpSts)) { + // type map for each key must exist + UPRV_UNREACHABLE; + } else { + LocalUResourceBundlePointer typeMapEntry; + + while (ures_hasNext(typeMapResByKey.getAlias())) { + typeMapEntry.adoptInstead(ures_getNextResource(typeMapResByKey.getAlias(), typeMapEntry.orphan(), &sts)); + if (U_FAILURE(sts)) { + break; + } + const char* legacyTypeId = ures_getKey(typeMapEntry.getAlias()); + + // special types + if (uprv_strcmp(legacyTypeId, "CODEPOINTS") == 0) { + specialTypes |= SPECIALTYPE_CODEPOINTS; + continue; + } + if (uprv_strcmp(legacyTypeId, "REORDER_CODE") == 0) { + specialTypes |= SPECIALTYPE_REORDER_CODE; + continue; + } + if (uprv_strcmp(legacyTypeId, "RG_KEY_VALUE") == 0) { + specialTypes |= SPECIALTYPE_RG_KEY_VALUE; + continue; + } + + if (isTZ) { + // a timezone key uses a colon instead of a slash in the resource. + // e.g. America:Los_Angeles + if (uprv_strchr(legacyTypeId, ':') != NULL) { + icu::CharString* legacyTypeIdBuf = + gKeyTypeStringPool->create(legacyTypeId, sts); + if (legacyTypeIdBuf == NULL) { + sts = U_MEMORY_ALLOCATION_ERROR; + break; + } + if (U_FAILURE(sts)) { + break; + } + std::replace( + legacyTypeIdBuf->data(), + legacyTypeIdBuf->data() + legacyTypeIdBuf->length(), + ':', '/'); + legacyTypeId = legacyTypeIdBuf->data(); + } + } + + UnicodeString uBcpTypeId = ures_getUnicodeString(typeMapEntry.getAlias(), &sts); + if (U_FAILURE(sts)) { + break; + } + + // empty value indicates that BCP type is same with the legacy type. + const char* bcpTypeId = legacyTypeId; + if (!uBcpTypeId.isEmpty()) { + icu::CharString* bcpTypeIdBuf = gKeyTypeStringPool->create(); + if (bcpTypeIdBuf == NULL) { + sts = U_MEMORY_ALLOCATION_ERROR; + break; + } + bcpTypeIdBuf->appendInvariantChars(uBcpTypeId, sts); + if (U_FAILURE(sts)) { + break; + } + bcpTypeId = bcpTypeIdBuf->data(); + } + + // Note: legacy type value should never be + // equivalent to bcp type value of a different + // type under the same key. So we use a single + // map for lookup. + LocExtType* t = gLocExtTypeEntries->create(); + if (t == NULL) { + sts = U_MEMORY_ALLOCATION_ERROR; + break; + } + t->bcpId = bcpTypeId; + t->legacyId = legacyTypeId; + + uhash_put(typeDataMap, (void*)legacyTypeId, t, &sts); + if (bcpTypeId != legacyTypeId) { + // different type value + uhash_put(typeDataMap, (void*)bcpTypeId, t, &sts); + } + if (U_FAILURE(sts)) { + break; + } + + // also put aliases in the map + if (typeAliasResByKey.isValid()) { + LocalUResourceBundlePointer typeAliasDataEntry; + + ures_resetIterator(typeAliasResByKey.getAlias()); + while (ures_hasNext(typeAliasResByKey.getAlias()) && U_SUCCESS(sts)) { + int32_t toLen; + typeAliasDataEntry.adoptInstead(ures_getNextResource(typeAliasResByKey.getAlias(), typeAliasDataEntry.orphan(), &sts)); + const UChar* to = ures_getString(typeAliasDataEntry.getAlias(), &toLen, &sts); + if (U_FAILURE(sts)) { + break; + } + // check if this is an alias of canoncal legacy type + if (uprv_compareInvWithUChar(NULL, legacyTypeId, -1, to, toLen) == 0) { + const char* from = ures_getKey(typeAliasDataEntry.getAlias()); + if (isTZ) { + // replace colon with slash if necessary + if (uprv_strchr(from, ':') != NULL) { + icu::CharString* fromBuf = + gKeyTypeStringPool->create(from, sts); + if (fromBuf == NULL) { + sts = U_MEMORY_ALLOCATION_ERROR; + break; + } + if (U_FAILURE(sts)) { + break; + } + std::replace( + fromBuf->data(), + fromBuf->data() + fromBuf->length(), + ':', '/'); + from = fromBuf->data(); + } + } + uhash_put(typeDataMap, (void*)from, t, &sts); + } + } + if (U_FAILURE(sts)) { + break; + } + } + + if (bcpTypeAliasResByKey.isValid()) { + LocalUResourceBundlePointer bcpTypeAliasDataEntry; + + ures_resetIterator(bcpTypeAliasResByKey.getAlias()); + while (ures_hasNext(bcpTypeAliasResByKey.getAlias()) && U_SUCCESS(sts)) { + int32_t toLen; + bcpTypeAliasDataEntry.adoptInstead(ures_getNextResource(bcpTypeAliasResByKey.getAlias(), bcpTypeAliasDataEntry.orphan(), &sts)); + const UChar* to = ures_getString(bcpTypeAliasDataEntry.getAlias(), &toLen, &sts); + if (U_FAILURE(sts)) { + break; + } + // check if this is an alias of bcp type + if (uprv_compareInvWithUChar(NULL, bcpTypeId, -1, to, toLen) == 0) { + const char* from = ures_getKey(bcpTypeAliasDataEntry.getAlias()); + uhash_put(typeDataMap, (void*)from, t, &sts); + } + } + if (U_FAILURE(sts)) { + break; + } + } + } + } + if (U_FAILURE(sts)) { + break; + } + + LocExtKeyData* keyData = gLocExtKeyDataEntries->create(); + if (keyData == NULL) { + sts = U_MEMORY_ALLOCATION_ERROR; + break; + } + keyData->bcpId = bcpKeyId; + keyData->legacyId = legacyKeyId; + keyData->specialTypes = specialTypes; + keyData->typeMap.adoptInstead(typeDataMap); + + uhash_put(gLocExtKeyMap, (void*)legacyKeyId, keyData, &sts); + if (legacyKeyId != bcpKeyId) { + // different key value + uhash_put(gLocExtKeyMap, (void*)bcpKeyId, keyData, &sts); + } + if (U_FAILURE(sts)) { + break; + } + } +} + +static UBool +init() { + UErrorCode sts = U_ZERO_ERROR; + umtx_initOnce(gLocExtKeyMapInitOnce, &initFromResourceBundle, sts); + if (U_FAILURE(sts)) { + return FALSE; + } + return TRUE; +} + +static UBool +isSpecialTypeCodepoints(const char* val) { + int32_t subtagLen = 0; + const char* p = val; + while (*p) { + if (*p == '-') { + if (subtagLen < 4 || subtagLen > 6) { + return FALSE; + } + subtagLen = 0; + } else if ((*p >= '0' && *p <= '9') || + (*p >= 'A' && *p <= 'F') || // A-F/a-f are contiguous + (*p >= 'a' && *p <= 'f')) { // also in EBCDIC + subtagLen++; + } else { + return FALSE; + } + p++; + } + return (subtagLen >= 4 && subtagLen <= 6); +} + +static UBool +isSpecialTypeReorderCode(const char* val) { + int32_t subtagLen = 0; + const char* p = val; + while (*p) { + if (*p == '-') { + if (subtagLen < 3 || subtagLen > 8) { + return FALSE; + } + subtagLen = 0; + } else if (uprv_isASCIILetter(*p)) { + subtagLen++; + } else { + return FALSE; + } + p++; + } + return (subtagLen >=3 && subtagLen <=8); +} + +static UBool +isSpecialTypeRgKeyValue(const char* val) { + int32_t subtagLen = 0; + const char* p = val; + while (*p) { + if ( (subtagLen < 2 && uprv_isASCIILetter(*p)) || + (subtagLen >= 2 && (*p == 'Z' || *p == 'z')) ) { + subtagLen++; + } else { + return FALSE; + } + p++; + } + return (subtagLen == 6); +} + +U_CFUNC const char* +ulocimp_toBcpKey(const char* key) { + if (!init()) { + return NULL; + } + + LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key); + if (keyData != NULL) { + return keyData->bcpId; + } + return NULL; +} + +U_CFUNC const char* +ulocimp_toLegacyKey(const char* key) { + if (!init()) { + return NULL; + } + + LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key); + if (keyData != NULL) { + return keyData->legacyId; + } + return NULL; +} + +U_CFUNC const char* +ulocimp_toBcpType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType) { + if (isKnownKey != NULL) { + *isKnownKey = FALSE; + } + if (isSpecialType != NULL) { + *isSpecialType = FALSE; + } + + if (!init()) { + return NULL; + } + + LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key); + if (keyData != NULL) { + if (isKnownKey != NULL) { + *isKnownKey = TRUE; + } + LocExtType* t = (LocExtType*)uhash_get(keyData->typeMap.getAlias(), type); + if (t != NULL) { + return t->bcpId; + } + if (keyData->specialTypes != SPECIALTYPE_NONE) { + UBool matched = FALSE; + if (keyData->specialTypes & SPECIALTYPE_CODEPOINTS) { + matched = isSpecialTypeCodepoints(type); + } + if (!matched && keyData->specialTypes & SPECIALTYPE_REORDER_CODE) { + matched = isSpecialTypeReorderCode(type); + } + if (!matched && keyData->specialTypes & SPECIALTYPE_RG_KEY_VALUE) { + matched = isSpecialTypeRgKeyValue(type); + } + if (matched) { + if (isSpecialType != NULL) { + *isSpecialType = TRUE; + } + return type; + } + } + } + return NULL; +} + + +U_CFUNC const char* +ulocimp_toLegacyType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType) { + if (isKnownKey != NULL) { + *isKnownKey = FALSE; + } + if (isSpecialType != NULL) { + *isSpecialType = FALSE; + } + + if (!init()) { + return NULL; + } + + LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key); + if (keyData != NULL) { + if (isKnownKey != NULL) { + *isKnownKey = TRUE; + } + LocExtType* t = (LocExtType*)uhash_get(keyData->typeMap.getAlias(), type); + if (t != NULL) { + return t->legacyId; + } + if (keyData->specialTypes != SPECIALTYPE_NONE) { + UBool matched = FALSE; + if (keyData->specialTypes & SPECIALTYPE_CODEPOINTS) { + matched = isSpecialTypeCodepoints(type); + } + if (!matched && keyData->specialTypes & SPECIALTYPE_REORDER_CODE) { + matched = isSpecialTypeReorderCode(type); + } + if (!matched && keyData->specialTypes & SPECIALTYPE_RG_KEY_VALUE) { + matched = isSpecialTypeRgKeyValue(type); + } + if (matched) { + if (isSpecialType != NULL) { + *isSpecialType = TRUE; + } + return type; + } + } + } + return NULL; +} + diff --git a/thirdparty/icu4c/common/uloc_tag.cpp b/thirdparty/icu4c/common/uloc_tag.cpp new file mode 100644 index 0000000000..7f7fd9119e --- /dev/null +++ b/thirdparty/icu4c/common/uloc_tag.cpp @@ -0,0 +1,2844 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 2009-2015, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +*/ + +#include "unicode/bytestream.h" +#include "unicode/utypes.h" +#include "unicode/ures.h" +#include "unicode/localpointer.h" +#include "unicode/putil.h" +#include "unicode/uenum.h" +#include "unicode/uloc.h" +#include "ustr_imp.h" +#include "bytesinkutil.h" +#include "charstr.h" +#include "cmemory.h" +#include "cstring.h" +#include "putilimp.h" +#include "uinvchar.h" +#include "ulocimp.h" +#include "uassert.h" + + +/* struct holding a single variant */ +typedef struct VariantListEntry { + const char *variant; + struct VariantListEntry *next; +} VariantListEntry; + +/* struct holding a single attribute value */ +struct AttributeListEntry : public icu::UMemory { + const char *attribute; + struct AttributeListEntry *next; +}; + +/* struct holding a single extension */ +struct ExtensionListEntry : public icu::UMemory { + const char *key; + const char *value; + struct ExtensionListEntry *next; +}; + +#define MAXEXTLANG 3 +typedef struct ULanguageTag { + char *buf; /* holding parsed subtags */ + const char *language; + const char *extlang[MAXEXTLANG]; + const char *script; + const char *region; + VariantListEntry *variants; + ExtensionListEntry *extensions; + const char *privateuse; + const char *legacy; +} ULanguageTag; + +#define MINLEN 2 +#define SEP '-' +#define PRIVATEUSE 'x' +#define LDMLEXT 'u' + +#define LOCALE_SEP '_' +#define LOCALE_EXT_SEP '@' +#define LOCALE_KEYWORD_SEP ';' +#define LOCALE_KEY_TYPE_SEP '=' + +#define ISALPHA(c) uprv_isASCIILetter(c) +#define ISNUMERIC(c) ((c)>='0' && (c)<='9') + +static const char EMPTY[] = ""; +static const char LANG_UND[] = "und"; +static const char PRIVATEUSE_KEY[] = "x"; +static const char _POSIX[] = "_POSIX"; +static const char POSIX_KEY[] = "va"; +static const char POSIX_VALUE[] = "posix"; +static const char LOCALE_ATTRIBUTE_KEY[] = "attribute"; +static const char PRIVUSE_VARIANT_PREFIX[] = "lvariant"; +static const char LOCALE_TYPE_YES[] = "yes"; + +#define LANG_UND_LEN 3 + +/* + Updated on 2018-09-12 from + https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry . + + This table has 2 parts. The part for + legacy language tags (marked as “Type: grandfathered” in BCP 47) + is generated by the following scripts from the IANA language tag registry. + + curl https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry |\ + egrep -A 7 'Type: grandfathered' | \ + egrep 'Tag|Prefe' | grep -B1 'Preferred' | grep -v '^--' | \ + awk -n '/Tag/ {printf(" \"%s\", ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}' |\ + tr 'A-Z' 'a-z' + + + The 2nd part is made of five ICU-specific entries. They're kept for + the backward compatibility for now, even though there are no preferred + values. They may have to be removed for the strict BCP 47 compliance. + +*/ +static const char* const LEGACY[] = { +/* legacy preferred */ + "art-lojban", "jbo", + "en-gb-oed", "en-gb-oxendict", + "i-ami", "ami", + "i-bnn", "bnn", + "i-hak", "hak", + "i-klingon", "tlh", + "i-lux", "lb", + "i-navajo", "nv", + "i-pwn", "pwn", + "i-tao", "tao", + "i-tay", "tay", + "i-tsu", "tsu", + "no-bok", "nb", + "no-nyn", "nn", + "sgn-be-fr", "sfb", + "sgn-be-nl", "vgt", + "sgn-ch-de", "sgg", + "zh-guoyu", "cmn", + "zh-hakka", "hak", + "zh-min-nan", "nan", + "zh-xiang", "hsn", + + // Legacy tags with no preferred value in the IANA + // registry. Kept for now for the backward compatibility + // because ICU has mapped them this way. + "cel-gaulish", "xtg-x-cel-gaulish", + "i-default", "en-x-i-default", + "i-enochian", "und-x-i-enochian", + "i-mingo", "see-x-i-mingo", + "zh-min", "nan-x-zh-min", +}; + +/* + Updated on 2018-09-12 from + https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry . + + The table lists redundant tags with preferred value in the IANA languate tag registry. + It's generated with the following command: + + curl https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry |\ + grep 'Type: redundant' -A 5 | egrep '^(Tag:|Prefer)' | grep -B1 'Preferred' | \ + awk -n '/Tag/ {printf(" \"%s\", ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}' | \ + tr 'A-Z' 'a-z' + + In addition, ja-latn-hepburn-heploc is mapped to ja-latn-alalc97 because + a variant tag 'hepburn-heploc' has the preferred subtag, 'alaic97'. +*/ + +static const char* const REDUNDANT[] = { +// redundant preferred + "sgn-br", "bzs", + "sgn-co", "csn", + "sgn-de", "gsg", + "sgn-dk", "dsl", + "sgn-es", "ssp", + "sgn-fr", "fsl", + "sgn-gb", "bfi", + "sgn-gr", "gss", + "sgn-ie", "isg", + "sgn-it", "ise", + "sgn-jp", "jsl", + "sgn-mx", "mfs", + "sgn-ni", "ncs", + "sgn-nl", "dse", + "sgn-no", "nsl", + "sgn-pt", "psr", + "sgn-se", "swl", + "sgn-us", "ase", + "sgn-za", "sfs", + "zh-cmn", "cmn", + "zh-cmn-hans", "cmn-hans", + "zh-cmn-hant", "cmn-hant", + "zh-gan", "gan", + "zh-wuu", "wuu", + "zh-yue", "yue", + + // variant tag with preferred value + "ja-latn-hepburn-heploc", "ja-latn-alalc97", +}; + +/* + Updated on 2018-09-12 from + https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry . + + grep 'Type: language' -A 7 language-subtag-registry | egrep 'Subtag|Prefe' | \ + grep -B1 'Preferred' | grep -v '^--' | \ + awk -n '/Subtag/ {printf(" \"%s\", ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}' + + Make sure that 2-letter language subtags come before 3-letter subtags. +*/ +static const char DEPRECATEDLANGS[][4] = { +/* deprecated new */ + "in", "id", + "iw", "he", + "ji", "yi", + "jw", "jv", + "mo", "ro", + "aam", "aas", + "adp", "dz", + "aue", "ktz", + "ayx", "nun", + "bgm", "bcg", + "bjd", "drl", + "ccq", "rki", + "cjr", "mom", + "cka", "cmr", + "cmk", "xch", + "coy", "pij", + "cqu", "quh", + "drh", "khk", + "drw", "prs", + "gav", "dev", + "gfx", "vaj", + "ggn", "gvr", + "gti", "nyc", + "guv", "duz", + "hrr", "jal", + "ibi", "opa", + "ilw", "gal", + "jeg", "oyb", + "kgc", "tdf", + "kgh", "kml", + "koj", "kwv", + "krm", "bmf", + "ktr", "dtp", + "kvs", "gdj", + "kwq", "yam", + "kxe", "tvd", + "kzj", "dtp", + "kzt", "dtp", + "lii", "raq", + "lmm", "rmx", + "meg", "cir", + "mst", "mry", + "mwj", "vaj", + "myt", "mry", + "nad", "xny", + "ncp", "kdz", + "nnx", "ngv", + "nts", "pij", + "oun", "vaj", + "pcr", "adx", + "pmc", "huw", + "pmu", "phr", + "ppa", "bfy", + "ppr", "lcq", + "pry", "prt", + "puz", "pub", + "sca", "hle", + "skk", "oyb", + "tdu", "dtp", + "thc", "tpo", + "thx", "oyb", + "tie", "ras", + "tkk", "twm", + "tlw", "weo", + "tmp", "tyj", + "tne", "kak", + "tnf", "prs", + "tsf", "taj", + "uok", "ema", + "xba", "cax", + "xia", "acn", + "xkh", "waw", + "xsj", "suj", + "ybd", "rki", + "yma", "lrr", + "ymt", "mtm", + "yos", "zom", + "yuu", "yug", +}; + +/* + Updated on 2018-04-24 from + + curl https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry | \ + grep 'Type: region' -A 7 | egrep 'Subtag|Prefe' | \ + grep -B1 'Preferred' | \ + awk -n '/Subtag/ {printf(" \"%s\", ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}' +*/ +static const char DEPRECATEDREGIONS[][3] = { +/* deprecated new */ + "BU", "MM", + "DD", "DE", + "FX", "FR", + "TP", "TL", + "YD", "YE", + "ZR", "CD", +}; + +/* +* ------------------------------------------------- +* +* These ultag_ functions may be exposed as APIs later +* +* ------------------------------------------------- +*/ + +static ULanguageTag* +ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status); + +static void +ultag_close(ULanguageTag* langtag); + +static const char* +ultag_getLanguage(const ULanguageTag* langtag); + +#if 0 +static const char* +ultag_getJDKLanguage(const ULanguageTag* langtag); +#endif + +static const char* +ultag_getExtlang(const ULanguageTag* langtag, int32_t idx); + +static int32_t +ultag_getExtlangSize(const ULanguageTag* langtag); + +static const char* +ultag_getScript(const ULanguageTag* langtag); + +static const char* +ultag_getRegion(const ULanguageTag* langtag); + +static const char* +ultag_getVariant(const ULanguageTag* langtag, int32_t idx); + +static int32_t +ultag_getVariantsSize(const ULanguageTag* langtag); + +static const char* +ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx); + +static const char* +ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx); + +static int32_t +ultag_getExtensionsSize(const ULanguageTag* langtag); + +static const char* +ultag_getPrivateUse(const ULanguageTag* langtag); + +#if 0 +static const char* +ultag_getLegacy(const ULanguageTag* langtag); +#endif + +U_NAMESPACE_BEGIN + +/** + * \class LocalULanguageTagPointer + * "Smart pointer" class, closes a ULanguageTag via ultag_close(). + * For most methods see the LocalPointerBase base class. + * + * @see LocalPointerBase + * @see LocalPointer + * @internal + */ +U_DEFINE_LOCAL_OPEN_POINTER(LocalULanguageTagPointer, ULanguageTag, ultag_close); + +U_NAMESPACE_END + +/* +* ------------------------------------------------- +* +* Language subtag syntax validation functions +* +* ------------------------------------------------- +*/ + +static UBool +_isAlphaString(const char* s, int32_t len) { + int32_t i; + for (i = 0; i < len; i++) { + if (!ISALPHA(*(s + i))) { + return FALSE; + } + } + return TRUE; +} + +static UBool +_isNumericString(const char* s, int32_t len) { + int32_t i; + for (i = 0; i < len; i++) { + if (!ISNUMERIC(*(s + i))) { + return FALSE; + } + } + return TRUE; +} + +static UBool +_isAlphaNumericString(const char* s, int32_t len) { + int32_t i; + for (i = 0; i < len; i++) { + if (!ISALPHA(*(s + i)) && !ISNUMERIC(*(s + i))) { + return FALSE; + } + } + return TRUE; +} + +static UBool +_isAlphaNumericStringLimitedLength(const char* s, int32_t len, int32_t min, int32_t max) { + if (len < 0) { + len = (int32_t)uprv_strlen(s); + } + if (len >= min && len <= max && _isAlphaNumericString(s, len)) { + return TRUE; + } + return FALSE; +} + +U_CFUNC UBool +ultag_isLanguageSubtag(const char* s, int32_t len) { + /* + * unicode_language_subtag = alpha{2,3} | alpha{5,8}; + * NOTE: Per ICUTC 2019/01/23- accepting alpha 4 + * See ICU-20372 + */ + if (len < 0) { + len = (int32_t)uprv_strlen(s); + } + if (len >= 2 && len <= 8 && _isAlphaString(s, len)) { + return TRUE; + } + return FALSE; +} + +static UBool +_isExtlangSubtag(const char* s, int32_t len) { + /* + * extlang = 3ALPHA ; selected ISO 639 codes + * *2("-" 3ALPHA) ; permanently reserved + */ + if (len < 0) { + len = (int32_t)uprv_strlen(s); + } + if (len == 3 && _isAlphaString(s, len)) { + return TRUE; + } + return FALSE; +} + +U_CFUNC UBool +ultag_isScriptSubtag(const char* s, int32_t len) { + /* + * script = 4ALPHA ; ISO 15924 code + */ + if (len < 0) { + len = (int32_t)uprv_strlen(s); + } + if (len == 4 && _isAlphaString(s, len)) { + return TRUE; + } + return FALSE; +} + +U_CFUNC UBool +ultag_isRegionSubtag(const char* s, int32_t len) { + /* + * region = 2ALPHA ; ISO 3166-1 code + * / 3DIGIT ; UN M.49 code + */ + if (len < 0) { + len = (int32_t)uprv_strlen(s); + } + if (len == 2 && _isAlphaString(s, len)) { + return TRUE; + } + if (len == 3 && _isNumericString(s, len)) { + return TRUE; + } + return FALSE; +} + +static UBool +_isVariantSubtag(const char* s, int32_t len) { + /* + * variant = 5*8alphanum ; registered variants + * / (DIGIT 3alphanum) + */ + if (len < 0) { + len = (int32_t)uprv_strlen(s); + } + if (_isAlphaNumericStringLimitedLength(s, len, 5, 8)) { + return TRUE; + } + if (len == 4 && ISNUMERIC(*s) && _isAlphaNumericString(s + 1, 3)) { + return TRUE; + } + return FALSE; +} + +static UBool +_isSepListOf(UBool (*test)(const char*, int32_t), const char* s, int32_t len) { + const char *p = s; + const char *pSubtag = NULL; + + if (len < 0) { + len = (int32_t)uprv_strlen(s); + } + + while ((p - s) < len) { + if (*p == SEP) { + if (pSubtag == NULL) { + return FALSE; + } + if (!test(pSubtag, (int32_t)(p - pSubtag))) { + return FALSE; + } + pSubtag = NULL; + } else if (pSubtag == NULL) { + pSubtag = p; + } + p++; + } + if (pSubtag == NULL) { + return FALSE; + } + return test(pSubtag, (int32_t)(p - pSubtag)); +} + +U_CFUNC UBool +ultag_isVariantSubtags(const char* s, int32_t len) { + return _isSepListOf(&_isVariantSubtag, s, len); +} + +// This is for the ICU-specific "lvariant" handling. +static UBool +_isPrivateuseVariantSubtag(const char* s, int32_t len) { + /* + * variant = 1*8alphanum ; registered variants + * / (DIGIT 3alphanum) + */ + return _isAlphaNumericStringLimitedLength(s, len , 1, 8); +} + +static UBool +_isExtensionSingleton(const char* s, int32_t len) { + /* + * extension = singleton 1*("-" (2*8alphanum)) + * + * singleton = DIGIT ; 0 - 9 + * / %x41-57 ; A - W + * / %x59-5A ; Y - Z + * / %x61-77 ; a - w + * / %x79-7A ; y - z + */ + if (len < 0) { + len = (int32_t)uprv_strlen(s); + } + if (len == 1 && (ISALPHA(*s) || ISNUMERIC(*s)) && (uprv_tolower(*s) != PRIVATEUSE)) { + return TRUE; + } + return FALSE; +} + +static UBool +_isExtensionSubtag(const char* s, int32_t len) { + /* + * extension = singleton 1*("-" (2*8alphanum)) + */ + return _isAlphaNumericStringLimitedLength(s, len, 2, 8); +} + +U_CFUNC UBool +ultag_isExtensionSubtags(const char* s, int32_t len) { + return _isSepListOf(&_isExtensionSubtag, s, len); +} + +static UBool +_isPrivateuseValueSubtag(const char* s, int32_t len) { + /* + * privateuse = "x" 1*("-" (1*8alphanum)) + */ + return _isAlphaNumericStringLimitedLength(s, len, 1, 8); +} + +U_CFUNC UBool +ultag_isPrivateuseValueSubtags(const char* s, int32_t len) { + return _isSepListOf(&_isPrivateuseValueSubtag, s, len); +} + +U_CFUNC UBool +ultag_isUnicodeLocaleAttribute(const char* s, int32_t len) { + /* + * attribute = alphanum{3,8} ; + */ + return _isAlphaNumericStringLimitedLength(s, len , 3, 8); +} + +U_CFUNC UBool +ultag_isUnicodeLocaleAttributes(const char* s, int32_t len) { + return _isSepListOf(&ultag_isUnicodeLocaleAttribute, s, len); +} + +U_CFUNC UBool +ultag_isUnicodeLocaleKey(const char* s, int32_t len) { + /* + * key = alphanum alpha ; + */ + if (len < 0) { + len = (int32_t)uprv_strlen(s); + } + if (len == 2 && (ISALPHA(*s) || ISNUMERIC(*s)) && ISALPHA(s[1])) { + return TRUE; + } + return FALSE; +} + +U_CFUNC UBool +_isUnicodeLocaleTypeSubtag(const char*s, int32_t len) { + /* + * alphanum{3,8} + */ + return _isAlphaNumericStringLimitedLength(s, len , 3, 8); +} + +U_CFUNC UBool +ultag_isUnicodeLocaleType(const char*s, int32_t len) { + /* + * type = alphanum{3,8} (sep alphanum{3,8})* ; + */ + return _isSepListOf(&_isUnicodeLocaleTypeSubtag, s, len); +} + +static UBool +_isTKey(const char* s, int32_t len) +{ + /* + * tkey = alpha digit ; + */ + if (len < 0) { + len = (int32_t)uprv_strlen(s); + } + if (len == 2 && ISALPHA(*s) && ISNUMERIC(*(s + 1))) { + return TRUE; + } + return FALSE; +} + +static UBool +_isTValue(const char* s, int32_t len) +{ + /* + * tvalue = (sep alphanum{3,8})+ ; + */ + return _isAlphaNumericStringLimitedLength(s, len , 3, 8); +} + +static UBool +_isTransformedExtensionSubtag(int32_t& state, const char* s, int32_t len) +{ + const int32_t kStart = 0; // Start, wait for unicode_language_subtag, tkey or end + const int32_t kGotLanguage = 1; // Got unicode_language_subtag, wait for unicode_script_subtag, + // unicode_region_subtag, unicode_variant_subtag, tkey or end + const int32_t kGotScript = 2; // Got unicode_script_subtag, wait for unicode_region_subtag, + // unicode_variant_subtag, tkey, or end + const int32_t kGotRegion = 3; // Got unicode_region_subtag, wait for unicode_variant_subtag, + // tkey, or end. + const int32_t kGotVariant = 4; // Got unicode_variant_subtag, wait for unicode_variant_subtag + // tkey or end. + const int32_t kGotTKey = -1; // Got tkey, wait for tvalue. ERROR if stop here. + const int32_t kGotTValue = 6; // Got tvalue, wait for tkey, tvalue or end + + switch (state) { + case kStart: + if (ultag_isLanguageSubtag(s, len)) { + state = kGotLanguage; + return TRUE; + } + if (_isTKey(s, len)) { + state = kGotTKey; + return TRUE; + } + return FALSE; + case kGotLanguage: + if (ultag_isScriptSubtag(s, len)) { + state = kGotScript; + return TRUE; + } + U_FALLTHROUGH; + case kGotScript: + if (ultag_isRegionSubtag(s, len)) { + state = kGotRegion; + return TRUE; + } + U_FALLTHROUGH; + case kGotRegion: + U_FALLTHROUGH; + case kGotVariant: + if (_isVariantSubtag(s, len)) { + state = kGotVariant; + return TRUE; + } + if (_isTKey(s, len)) { + state = kGotTKey; + return TRUE; + } + return FALSE; + case kGotTKey: + if (_isTValue(s, len)) { + state = kGotTValue; + return TRUE; + } + return FALSE; + case kGotTValue: + if (_isTKey(s, len)) { + state = kGotTKey; + return TRUE; + } + if (_isTValue(s, len)) { + return TRUE; + } + return FALSE; + } + return FALSE; +} + +static UBool +_isUnicodeExtensionSubtag(int32_t& state, const char* s, int32_t len) +{ + const int32_t kStart = 0; // Start, wait for a key or attribute or end + const int32_t kGotKey = 1; // Got a key, wait for type or key or end + const int32_t kGotType = 2; // Got a type, wait for key or end + + switch (state) { + case kStart: + if (ultag_isUnicodeLocaleKey(s, len)) { + state = kGotKey; + return TRUE; + } + if (ultag_isUnicodeLocaleAttribute(s, len)) { + return TRUE; + } + return FALSE; + case kGotKey: + if (ultag_isUnicodeLocaleKey(s, len)) { + return TRUE; + } + if (_isUnicodeLocaleTypeSubtag(s, len)) { + state = kGotType; + return TRUE; + } + return FALSE; + case kGotType: + if (ultag_isUnicodeLocaleKey(s, len)) { + state = kGotKey; + return TRUE; + } + if (_isUnicodeLocaleTypeSubtag(s, len)) { + return TRUE; + } + return FALSE; + } + return FALSE; +} + +static UBool +_isStatefulSepListOf(UBool (*test)(int32_t&, const char*, int32_t), const char* s, int32_t len) +{ + int32_t state = 0; + const char* p; + const char* start = s; + int32_t subtagLen = 0; + + if (len < 0) { + len = (int32_t)uprv_strlen(s); + } + + for (p = s; len > 0; p++, len--) { + if (*p == SEP) { + if (!test(state, start, subtagLen)) { + return FALSE; + } + subtagLen = 0; + start = p + 1; + } else { + subtagLen++; + } + } + + if (test(state, start, subtagLen) && state >= 0) { + return TRUE; + } + return FALSE; +} + +U_CFUNC UBool +ultag_isTransformedExtensionSubtags(const char* s, int32_t len) +{ + return _isStatefulSepListOf(&_isTransformedExtensionSubtag, s, len); +} + +U_CFUNC UBool +ultag_isUnicodeExtensionSubtags(const char* s, int32_t len) { + return _isStatefulSepListOf(&_isUnicodeExtensionSubtag, s, len); +} + + +/* +* ------------------------------------------------- +* +* Helper functions +* +* ------------------------------------------------- +*/ + +static UBool +_addVariantToList(VariantListEntry **first, VariantListEntry *var) { + UBool bAdded = TRUE; + + if (*first == NULL) { + var->next = NULL; + *first = var; + } else { + VariantListEntry *prev, *cur; + int32_t cmp; + + /* variants order should be preserved */ + prev = NULL; + cur = *first; + while (TRUE) { + if (cur == NULL) { + prev->next = var; + var->next = NULL; + break; + } + + /* Checking for duplicate variant */ + cmp = uprv_compareInvCharsAsAscii(var->variant, cur->variant); + if (cmp == 0) { + /* duplicated variant */ + bAdded = FALSE; + break; + } + prev = cur; + cur = cur->next; + } + } + + return bAdded; +} + +static UBool +_addAttributeToList(AttributeListEntry **first, AttributeListEntry *attr) { + UBool bAdded = TRUE; + + if (*first == NULL) { + attr->next = NULL; + *first = attr; + } else { + AttributeListEntry *prev, *cur; + int32_t cmp; + + /* reorder variants in alphabetical order */ + prev = NULL; + cur = *first; + while (TRUE) { + if (cur == NULL) { + prev->next = attr; + attr->next = NULL; + break; + } + cmp = uprv_compareInvCharsAsAscii(attr->attribute, cur->attribute); + if (cmp < 0) { + if (prev == NULL) { + *first = attr; + } else { + prev->next = attr; + } + attr->next = cur; + break; + } + if (cmp == 0) { + /* duplicated variant */ + bAdded = FALSE; + break; + } + prev = cur; + cur = cur->next; + } + } + + return bAdded; +} + + +static UBool +_addExtensionToList(ExtensionListEntry **first, ExtensionListEntry *ext, UBool localeToBCP) { + UBool bAdded = TRUE; + + if (*first == NULL) { + ext->next = NULL; + *first = ext; + } else { + ExtensionListEntry *prev, *cur; + int32_t cmp; + + /* reorder variants in alphabetical order */ + prev = NULL; + cur = *first; + while (TRUE) { + if (cur == NULL) { + prev->next = ext; + ext->next = NULL; + break; + } + if (localeToBCP) { + /* special handling for locale to bcp conversion */ + int32_t len, curlen; + + len = (int32_t)uprv_strlen(ext->key); + curlen = (int32_t)uprv_strlen(cur->key); + + if (len == 1 && curlen == 1) { + if (*(ext->key) == *(cur->key)) { + cmp = 0; + } else if (*(ext->key) == PRIVATEUSE) { + cmp = 1; + } else if (*(cur->key) == PRIVATEUSE) { + cmp = -1; + } else { + cmp = *(ext->key) - *(cur->key); + } + } else if (len == 1) { + cmp = *(ext->key) - LDMLEXT; + } else if (curlen == 1) { + cmp = LDMLEXT - *(cur->key); + } else { + cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key); + /* Both are u extension keys - we need special handling for 'attribute' */ + if (cmp != 0) { + if (uprv_strcmp(cur->key, LOCALE_ATTRIBUTE_KEY) == 0) { + cmp = 1; + } else if (uprv_strcmp(ext->key, LOCALE_ATTRIBUTE_KEY) == 0) { + cmp = -1; + } + } + } + } else { + cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key); + } + if (cmp < 0) { + if (prev == NULL) { + *first = ext; + } else { + prev->next = ext; + } + ext->next = cur; + break; + } + if (cmp == 0) { + /* duplicated extension key */ + bAdded = FALSE; + break; + } + prev = cur; + cur = cur->next; + } + } + + return bAdded; +} + +static void +_initializeULanguageTag(ULanguageTag* langtag) { + int32_t i; + + langtag->buf = NULL; + + langtag->language = EMPTY; + for (i = 0; i < MAXEXTLANG; i++) { + langtag->extlang[i] = NULL; + } + + langtag->script = EMPTY; + langtag->region = EMPTY; + + langtag->variants = NULL; + langtag->extensions = NULL; + + langtag->legacy = EMPTY; + langtag->privateuse = EMPTY; +} + +static void +_appendLanguageToLanguageTag(const char* localeID, icu::ByteSink& sink, UBool strict, UErrorCode* status) { + char buf[ULOC_LANG_CAPACITY]; + UErrorCode tmpStatus = U_ZERO_ERROR; + int32_t len, i; + + if (U_FAILURE(*status)) { + return; + } + + len = uloc_getLanguage(localeID, buf, sizeof(buf), &tmpStatus); + if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { + if (strict) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + len = 0; + } + + /* Note: returned language code is in lower case letters */ + + if (len == 0) { + sink.Append(LANG_UND, LANG_UND_LEN); + } else if (!ultag_isLanguageSubtag(buf, len)) { + /* invalid language code */ + if (strict) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + sink.Append(LANG_UND, LANG_UND_LEN); + } else { + /* resolve deprecated */ + for (i = 0; i < UPRV_LENGTHOF(DEPRECATEDLANGS); i += 2) { + // 2-letter deprecated subtags are listede before 3-letter + // ones in DEPRECATEDLANGS[]. Get out of loop on coming + // across the 1st 3-letter subtag, if the input is a 2-letter code. + // to avoid continuing to try when there's no match. + if (uprv_strlen(buf) < uprv_strlen(DEPRECATEDLANGS[i])) break; + if (uprv_compareInvCharsAsAscii(buf, DEPRECATEDLANGS[i]) == 0) { + uprv_strcpy(buf, DEPRECATEDLANGS[i + 1]); + len = (int32_t)uprv_strlen(buf); + break; + } + } + sink.Append(buf, len); + } +} + +static void +_appendScriptToLanguageTag(const char* localeID, icu::ByteSink& sink, UBool strict, UErrorCode* status) { + char buf[ULOC_SCRIPT_CAPACITY]; + UErrorCode tmpStatus = U_ZERO_ERROR; + int32_t len; + + if (U_FAILURE(*status)) { + return; + } + + len = uloc_getScript(localeID, buf, sizeof(buf), &tmpStatus); + if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { + if (strict) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + } + return; + } + + if (len > 0) { + if (!ultag_isScriptSubtag(buf, len)) { + /* invalid script code */ + if (strict) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + } + return; + } else { + sink.Append("-", 1); + sink.Append(buf, len); + } + } +} + +static void +_appendRegionToLanguageTag(const char* localeID, icu::ByteSink& sink, UBool strict, UErrorCode* status) { + char buf[ULOC_COUNTRY_CAPACITY]; + UErrorCode tmpStatus = U_ZERO_ERROR; + int32_t len; + + if (U_FAILURE(*status)) { + return; + } + + len = uloc_getCountry(localeID, buf, sizeof(buf), &tmpStatus); + if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { + if (strict) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + } + return; + } + + if (len > 0) { + if (!ultag_isRegionSubtag(buf, len)) { + /* invalid region code */ + if (strict) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + } + return; + } else { + sink.Append("-", 1); + /* resolve deprecated */ + for (int i = 0; i < UPRV_LENGTHOF(DEPRECATEDREGIONS); i += 2) { + if (uprv_compareInvCharsAsAscii(buf, DEPRECATEDREGIONS[i]) == 0) { + uprv_strcpy(buf, DEPRECATEDREGIONS[i + 1]); + len = (int32_t)uprv_strlen(buf); + break; + } + } + sink.Append(buf, len); + } + } +} + +static void _sortVariants(VariantListEntry* first) { + for (VariantListEntry* var1 = first; var1 != NULL; var1 = var1->next) { + for (VariantListEntry* var2 = var1->next; var2 != NULL; var2 = var2->next) { + // Swap var1->variant and var2->variant. + if (uprv_compareInvCharsAsAscii(var1->variant, var2->variant) > 0) { + const char* temp = var1->variant; + var1->variant = var2->variant; + var2->variant = temp; + } + } + } +} + +static void +_appendVariantsToLanguageTag(const char* localeID, icu::ByteSink& sink, UBool strict, UBool *hadPosix, UErrorCode* status) { + char buf[ULOC_FULLNAME_CAPACITY]; + UErrorCode tmpStatus = U_ZERO_ERROR; + int32_t len, i; + + if (U_FAILURE(*status)) { + return; + } + + len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus); + if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { + if (strict) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + } + return; + } + + if (len > 0) { + char *p, *pVar; + UBool bNext = TRUE; + VariantListEntry *var; + VariantListEntry *varFirst = NULL; + + pVar = NULL; + p = buf; + while (bNext) { + if (*p == SEP || *p == LOCALE_SEP || *p == 0) { + if (*p == 0) { + bNext = FALSE; + } else { + *p = 0; /* terminate */ + } + if (pVar == NULL) { + if (strict) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + break; + } + /* ignore empty variant */ + } else { + /* ICU uses upper case letters for variants, but + the canonical format is lowercase in BCP47 */ + for (i = 0; *(pVar + i) != 0; i++) { + *(pVar + i) = uprv_tolower(*(pVar + i)); + } + + /* validate */ + if (_isVariantSubtag(pVar, -1)) { + if (uprv_strcmp(pVar,POSIX_VALUE) || len != (int32_t)uprv_strlen(POSIX_VALUE)) { + /* emit the variant to the list */ + var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry)); + if (var == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + break; + } + var->variant = pVar; + if (!_addVariantToList(&varFirst, var)) { + /* duplicated variant */ + uprv_free(var); + if (strict) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + break; + } + } + } else { + /* Special handling for POSIX variant, need to remember that we had it and then */ + /* treat it like an extension later. */ + *hadPosix = TRUE; + } + } else if (strict) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + break; + } else if (_isPrivateuseValueSubtag(pVar, -1)) { + /* Handle private use subtags separately */ + break; + } + } + /* reset variant starting position */ + pVar = NULL; + } else if (pVar == NULL) { + pVar = p; + } + p++; + } + + if (U_SUCCESS(*status)) { + if (varFirst != NULL) { + int32_t varLen; + + /* per UTS35, we should sort the variants */ + _sortVariants(varFirst); + + /* write out validated/normalized variants to the target */ + var = varFirst; + while (var != NULL) { + sink.Append("-", 1); + varLen = (int32_t)uprv_strlen(var->variant); + sink.Append(var->variant, varLen); + var = var->next; + } + } + } + + /* clean up */ + var = varFirst; + while (var != NULL) { + VariantListEntry *tmpVar = var->next; + uprv_free(var); + var = tmpVar; + } + + if (U_FAILURE(*status)) { + return; + } + } +} + +static void +_appendKeywordsToLanguageTag(const char* localeID, icu::ByteSink& sink, UBool strict, UBool hadPosix, UErrorCode* status) { + char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY] = { 0 }; + int32_t attrBufLength = 0; + + icu::MemoryPool<AttributeListEntry> attrPool; + icu::MemoryPool<ExtensionListEntry> extPool; + icu::MemoryPool<icu::CharString> strPool; + + icu::LocalUEnumerationPointer keywordEnum(uloc_openKeywords(localeID, status)); + if (U_FAILURE(*status) && !hadPosix) { + return; + } + if (keywordEnum.isValid() || hadPosix) { + /* reorder extensions */ + int32_t len; + const char *key; + ExtensionListEntry *firstExt = NULL; + ExtensionListEntry *ext; + AttributeListEntry *firstAttr = NULL; + AttributeListEntry *attr; + icu::MemoryPool<icu::CharString> extBufPool; + const char *bcpKey=nullptr, *bcpValue=nullptr; + UErrorCode tmpStatus = U_ZERO_ERROR; + int32_t keylen; + UBool isBcpUExt; + + while (TRUE) { + key = uenum_next(keywordEnum.getAlias(), NULL, status); + if (key == NULL) { + break; + } + + icu::CharString buf; + { + icu::CharStringByteSink sink(&buf); + ulocimp_getKeywordValue(localeID, key, sink, &tmpStatus); + } + len = buf.length(); + + if (U_FAILURE(tmpStatus)) { + if (tmpStatus == U_MEMORY_ALLOCATION_ERROR) { + *status = U_MEMORY_ALLOCATION_ERROR; + break; + } + if (strict) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + break; + } + /* ignore this keyword */ + tmpStatus = U_ZERO_ERROR; + continue; + } + + keylen = (int32_t)uprv_strlen(key); + isBcpUExt = (keylen > 1); + + /* special keyword used for representing Unicode locale attributes */ + if (uprv_strcmp(key, LOCALE_ATTRIBUTE_KEY) == 0) { + if (len > 0) { + int32_t i = 0; + while (TRUE) { + attrBufLength = 0; + for (; i < len; i++) { + if (buf[i] != '-') { + attrBuf[attrBufLength++] = buf[i]; + } else { + i++; + break; + } + } + if (attrBufLength > 0) { + attrBuf[attrBufLength] = 0; + + } else if (i >= len){ + break; + } + + /* create AttributeListEntry */ + attr = attrPool.create(); + if (attr == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + break; + } + icu::CharString* attrValue = + strPool.create(attrBuf, attrBufLength, *status); + if (attrValue == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + break; + } + if (U_FAILURE(*status)) { + break; + } + attr->attribute = attrValue->data(); + + if (!_addAttributeToList(&firstAttr, attr)) { + if (strict) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + break; + } + } + } + /* for a place holder ExtensionListEntry */ + bcpKey = LOCALE_ATTRIBUTE_KEY; + bcpValue = NULL; + } + } else if (isBcpUExt) { + bcpKey = uloc_toUnicodeLocaleKey(key); + if (bcpKey == NULL) { + if (strict) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + break; + } + continue; + } + + /* we've checked buf is null-terminated above */ + bcpValue = uloc_toUnicodeLocaleType(key, buf.data()); + if (bcpValue == NULL) { + if (strict) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + break; + } + continue; + } + if (bcpValue == buf.data()) { + /* + When uloc_toUnicodeLocaleType(key, buf) returns the + input value as is, the value is well-formed, but has + no known mapping. This implementation normalizes the + value to lower case + */ + icu::CharString* extBuf = extBufPool.create(buf, tmpStatus); + + if (extBuf == nullptr) { + *status = U_MEMORY_ALLOCATION_ERROR; + break; + } + if (U_FAILURE(tmpStatus)) { + *status = tmpStatus; + break; + } + + T_CString_toLowerCase(extBuf->data()); + bcpValue = extBuf->data(); + } + } else { + if (*key == PRIVATEUSE) { + if (!ultag_isPrivateuseValueSubtags(buf.data(), len)) { + if (strict) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + break; + } + continue; + } + } else { + if (!_isExtensionSingleton(key, keylen) || !ultag_isExtensionSubtags(buf.data(), len)) { + if (strict) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + break; + } + continue; + } + } + bcpKey = key; + icu::CharString* extBuf = + extBufPool.create(buf.data(), len, tmpStatus); + if (extBuf == nullptr) { + *status = U_MEMORY_ALLOCATION_ERROR; + break; + } + if (U_FAILURE(tmpStatus)) { + *status = tmpStatus; + break; + } + bcpValue = extBuf->data(); + } + + /* create ExtensionListEntry */ + ext = extPool.create(); + if (ext == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + break; + } + ext->key = bcpKey; + ext->value = bcpValue; + + if (!_addExtensionToList(&firstExt, ext, TRUE)) { + if (strict) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + break; + } + } + } + + /* Special handling for POSIX variant - add the keywords for POSIX */ + if (hadPosix) { + /* create ExtensionListEntry for POSIX */ + ext = extPool.create(); + if (ext == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + return; + } + ext->key = POSIX_KEY; + ext->value = POSIX_VALUE; + + if (!_addExtensionToList(&firstExt, ext, TRUE)) { + // Silently ignore errors. + } + } + + if (U_SUCCESS(*status) && (firstExt != NULL || firstAttr != NULL)) { + UBool startLDMLExtension = FALSE; + for (ext = firstExt; ext; ext = ext->next) { + if (!startLDMLExtension && uprv_strlen(ext->key) > 1) { + /* first LDML u singlton extension */ + sink.Append("-u", 2); + startLDMLExtension = TRUE; + } + + /* write out the sorted BCP47 attributes, extensions and private use */ + if (uprv_strcmp(ext->key, LOCALE_ATTRIBUTE_KEY) == 0) { + /* write the value for the attributes */ + for (attr = firstAttr; attr; attr = attr->next) { + sink.Append("-", 1); + sink.Append( + attr->attribute, static_cast<int32_t>(uprv_strlen(attr->attribute))); + } + } else { + sink.Append("-", 1); + sink.Append(ext->key, static_cast<int32_t>(uprv_strlen(ext->key))); + if (uprv_strcmp(ext->value, "true") != 0 && + uprv_strcmp(ext->value, "yes") != 0) { + sink.Append("-", 1); + sink.Append(ext->value, static_cast<int32_t>(uprv_strlen(ext->value))); + } + } + } + } + } +} + +/** + * Append keywords parsed from LDML extension value + * e.g. "u-ca-gregory-co-trad" -> {calendar = gregorian} {collation = traditional} + * Note: char* buf is used for storing keywords + */ +static void +_appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendTo, icu::MemoryPool<ExtensionListEntry>& extPool, icu::MemoryPool<icu::CharString>& kwdBuf, UBool *posixVariant, UErrorCode *status) { + const char *pTag; /* beginning of current subtag */ + const char *pKwds; /* beginning of key-type pairs */ + UBool variantExists = *posixVariant; + + ExtensionListEntry *kwdFirst = NULL; /* first LDML keyword */ + ExtensionListEntry *kwd, *nextKwd; + + int32_t len; + + /* Reset the posixVariant value */ + *posixVariant = FALSE; + + pTag = ldmlext; + pKwds = NULL; + + { + AttributeListEntry *attrFirst = NULL; /* first attribute */ + AttributeListEntry *attr, *nextAttr; + + char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY]; + int32_t attrBufIdx = 0; + + icu::MemoryPool<AttributeListEntry> attrPool; + + /* Iterate through u extension attributes */ + while (*pTag) { + /* locate next separator char */ + for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++); + + if (ultag_isUnicodeLocaleKey(pTag, len)) { + pKwds = pTag; + break; + } + + /* add this attribute to the list */ + attr = attrPool.create(); + if (attr == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + return; + } + + if (len < (int32_t)sizeof(attrBuf) - attrBufIdx) { + uprv_memcpy(&attrBuf[attrBufIdx], pTag, len); + attrBuf[attrBufIdx + len] = 0; + attr->attribute = &attrBuf[attrBufIdx]; + attrBufIdx += (len + 1); + } else { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + // duplicate attribute is ignored, causes no error. + _addAttributeToList(&attrFirst, attr); + + /* next tag */ + pTag += len; + if (*pTag) { + /* next to the separator */ + pTag++; + } + } + + if (attrFirst) { + /* emit attributes as an LDML keyword, e.g. attribute=attr1-attr2 */ + + kwd = extPool.create(); + if (kwd == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + return; + } + + icu::CharString* value = kwdBuf.create(); + if (value == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + return; + } + + /* attribute subtags sorted in alphabetical order as type */ + attr = attrFirst; + while (attr != NULL) { + nextAttr = attr->next; + if (attr != attrFirst) { + value->append('-', *status); + } + value->append(attr->attribute, *status); + attr = nextAttr; + } + if (U_FAILURE(*status)) { + return; + } + + kwd->key = LOCALE_ATTRIBUTE_KEY; + kwd->value = value->data(); + + if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + } + } + + if (pKwds) { + const char *pBcpKey = NULL; /* u extenstion key subtag */ + const char *pBcpType = NULL; /* beginning of u extension type subtag(s) */ + int32_t bcpKeyLen = 0; + int32_t bcpTypeLen = 0; + UBool isDone = FALSE; + + pTag = pKwds; + /* BCP47 representation of LDML key/type pairs */ + while (!isDone) { + const char *pNextBcpKey = NULL; + int32_t nextBcpKeyLen = 0; + UBool emitKeyword = FALSE; + + if (*pTag) { + /* locate next separator char */ + for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++); + + if (ultag_isUnicodeLocaleKey(pTag, len)) { + if (pBcpKey) { + emitKeyword = TRUE; + pNextBcpKey = pTag; + nextBcpKeyLen = len; + } else { + pBcpKey = pTag; + bcpKeyLen = len; + } + } else { + U_ASSERT(pBcpKey != NULL); + /* within LDML type subtags */ + if (pBcpType) { + bcpTypeLen += (len + 1); + } else { + pBcpType = pTag; + bcpTypeLen = len; + } + } + + /* next tag */ + pTag += len; + if (*pTag) { + /* next to the separator */ + pTag++; + } + } else { + /* processing last one */ + emitKeyword = TRUE; + isDone = TRUE; + } + + if (emitKeyword) { + const char *pKey = NULL; /* LDML key */ + const char *pType = NULL; /* LDML type */ + + char bcpKeyBuf[3]; /* BCP key length is always 2 for now */ + + U_ASSERT(pBcpKey != NULL); + + if (bcpKeyLen >= (int32_t)sizeof(bcpKeyBuf)) { + /* the BCP key is invalid */ + *status = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + U_ASSERT(bcpKeyLen <= 2); + + uprv_strncpy(bcpKeyBuf, pBcpKey, bcpKeyLen); + bcpKeyBuf[bcpKeyLen] = 0; + + /* u extension key to LDML key */ + pKey = uloc_toLegacyKey(bcpKeyBuf); + if (pKey == NULL) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + if (pKey == bcpKeyBuf) { + /* + The key returned by toLegacyKey points to the input buffer. + We normalize the result key to lower case. + */ + T_CString_toLowerCase(bcpKeyBuf); + icu::CharString* key = kwdBuf.create(bcpKeyBuf, bcpKeyLen, *status); + if (key == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + return; + } + if (U_FAILURE(*status)) { + return; + } + pKey = key->data(); + } + + if (pBcpType) { + char bcpTypeBuf[128]; /* practically long enough even considering multiple subtag type */ + if (bcpTypeLen >= (int32_t)sizeof(bcpTypeBuf)) { + /* the BCP type is too long */ + *status = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + uprv_strncpy(bcpTypeBuf, pBcpType, bcpTypeLen); + bcpTypeBuf[bcpTypeLen] = 0; + + /* BCP type to locale type */ + pType = uloc_toLegacyType(pKey, bcpTypeBuf); + if (pType == NULL) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + if (pType == bcpTypeBuf) { + /* + The type returned by toLegacyType points to the input buffer. + We normalize the result type to lower case. + */ + /* normalize to lower case */ + T_CString_toLowerCase(bcpTypeBuf); + icu::CharString* type = kwdBuf.create(bcpTypeBuf, bcpTypeLen, *status); + if (type == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + return; + } + if (U_FAILURE(*status)) { + return; + } + pType = type->data(); + } + } else { + /* typeless - default type value is "yes" */ + pType = LOCALE_TYPE_YES; + } + + /* Special handling for u-va-posix, since we want to treat this as a variant, + not as a keyword */ + if (!variantExists && !uprv_strcmp(pKey, POSIX_KEY) && !uprv_strcmp(pType, POSIX_VALUE) ) { + *posixVariant = TRUE; + } else { + /* create an ExtensionListEntry for this keyword */ + kwd = extPool.create(); + if (kwd == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + return; + } + + kwd->key = pKey; + kwd->value = pType; + + if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) { + // duplicate keyword is allowed, Only the first + // is honored. + } + } + + pBcpKey = pNextBcpKey; + bcpKeyLen = pNextBcpKey != NULL ? nextBcpKeyLen : 0; + pBcpType = NULL; + bcpTypeLen = 0; + } + } + } + + kwd = kwdFirst; + while (kwd != NULL) { + nextKwd = kwd->next; + _addExtensionToList(appendTo, kwd, FALSE); + kwd = nextKwd; + } +} + + +static void +_appendKeywords(ULanguageTag* langtag, icu::ByteSink& sink, UErrorCode* status) { + int32_t i, n; + int32_t len; + ExtensionListEntry *kwdFirst = NULL; + ExtensionListEntry *kwd; + const char *key, *type; + icu::MemoryPool<ExtensionListEntry> extPool; + icu::MemoryPool<icu::CharString> kwdBuf; + UBool posixVariant = FALSE; + + if (U_FAILURE(*status)) { + return; + } + + /* Determine if variants already exists */ + if (ultag_getVariantsSize(langtag)) { + posixVariant = TRUE; + } + + n = ultag_getExtensionsSize(langtag); + + /* resolve locale keywords and reordering keys */ + for (i = 0; i < n; i++) { + key = ultag_getExtensionKey(langtag, i); + type = ultag_getExtensionValue(langtag, i); + if (*key == LDMLEXT) { + _appendLDMLExtensionAsKeywords(type, &kwdFirst, extPool, kwdBuf, &posixVariant, status); + if (U_FAILURE(*status)) { + break; + } + } else { + kwd = extPool.create(); + if (kwd == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + break; + } + kwd->key = key; + kwd->value = type; + if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + break; + } + } + } + + if (U_SUCCESS(*status)) { + type = ultag_getPrivateUse(langtag); + if ((int32_t)uprv_strlen(type) > 0) { + /* add private use as a keyword */ + kwd = extPool.create(); + if (kwd == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + } else { + kwd->key = PRIVATEUSE_KEY; + kwd->value = type; + if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + } + } + } + } + + /* If a POSIX variant was in the extensions, write it out before writing the keywords. */ + + if (U_SUCCESS(*status) && posixVariant) { + len = (int32_t) uprv_strlen(_POSIX); + sink.Append(_POSIX, len); + } + + if (U_SUCCESS(*status) && kwdFirst != NULL) { + /* write out the sorted keywords */ + UBool firstValue = TRUE; + kwd = kwdFirst; + do { + if (firstValue) { + sink.Append("@", 1); + firstValue = FALSE; + } else { + sink.Append(";", 1); + } + + /* key */ + len = (int32_t)uprv_strlen(kwd->key); + sink.Append(kwd->key, len); + sink.Append("=", 1); + + /* type */ + len = (int32_t)uprv_strlen(kwd->value); + sink.Append(kwd->value, len); + + kwd = kwd->next; + } while (kwd); + } +} + +static void +_appendPrivateuseToLanguageTag(const char* localeID, icu::ByteSink& sink, UBool strict, UBool hadPosix, UErrorCode* status) { + (void)hadPosix; + char buf[ULOC_FULLNAME_CAPACITY]; + char tmpAppend[ULOC_FULLNAME_CAPACITY]; + UErrorCode tmpStatus = U_ZERO_ERROR; + int32_t len, i; + int32_t reslen = 0; + int32_t capacity = sizeof tmpAppend; + + if (U_FAILURE(*status)) { + return; + } + + len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus); + if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { + if (strict) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + } + return; + } + + if (len > 0) { + char *p, *pPriv; + UBool bNext = TRUE; + UBool firstValue = TRUE; + UBool writeValue; + + pPriv = NULL; + p = buf; + while (bNext) { + writeValue = FALSE; + if (*p == SEP || *p == LOCALE_SEP || *p == 0) { + if (*p == 0) { + bNext = FALSE; + } else { + *p = 0; /* terminate */ + } + if (pPriv != NULL) { + /* Private use in the canonical format is lowercase in BCP47 */ + for (i = 0; *(pPriv + i) != 0; i++) { + *(pPriv + i) = uprv_tolower(*(pPriv + i)); + } + + /* validate */ + if (_isPrivateuseValueSubtag(pPriv, -1)) { + if (firstValue) { + if (!_isVariantSubtag(pPriv, -1)) { + writeValue = TRUE; + } + } else { + writeValue = TRUE; + } + } else if (strict) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + break; + } else { + break; + } + + if (writeValue) { + if (reslen < capacity) { + tmpAppend[reslen++] = SEP; + } + + if (firstValue) { + if (reslen < capacity) { + tmpAppend[reslen++] = *PRIVATEUSE_KEY; + } + + if (reslen < capacity) { + tmpAppend[reslen++] = SEP; + } + + len = (int32_t)uprv_strlen(PRIVUSE_VARIANT_PREFIX); + if (reslen < capacity) { + uprv_memcpy(tmpAppend + reslen, PRIVUSE_VARIANT_PREFIX, uprv_min(len, capacity - reslen)); + } + reslen += len; + + if (reslen < capacity) { + tmpAppend[reslen++] = SEP; + } + + firstValue = FALSE; + } + + len = (int32_t)uprv_strlen(pPriv); + if (reslen < capacity) { + uprv_memcpy(tmpAppend + reslen, pPriv, uprv_min(len, capacity - reslen)); + } + reslen += len; + } + } + /* reset private use starting position */ + pPriv = NULL; + } else if (pPriv == NULL) { + pPriv = p; + } + p++; + } + + if (U_FAILURE(*status)) { + return; + } + } + + if (U_SUCCESS(*status)) { + len = reslen; + sink.Append(tmpAppend, len); + } +} + +/* +* ------------------------------------------------- +* +* ultag_ functions +* +* ------------------------------------------------- +*/ + +/* Bit flags used by the parser */ +#define LANG 0x0001 +#define EXTL 0x0002 +#define SCRT 0x0004 +#define REGN 0x0008 +#define VART 0x0010 +#define EXTS 0x0020 +#define EXTV 0x0040 +#define PRIV 0x0080 + +/** + * Ticket #12705 - The optimizer in Visual Studio 2015 Update 3 has problems optimizing this function. + * As a work-around, optimization is disabled for this function on VS2015 and VS2017. + * This work-around should be removed once the following versions of Visual Studio are no + * longer supported: All versions of VS2015/VS2017, and versions of VS2019 below 16.4. + */ +#if defined(_MSC_VER) && (_MSC_VER >= 1900) && (_MSC_VER < 1924) +#pragma optimize( "", off ) +#endif + +static ULanguageTag* +ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status) { + char *tagBuf; + int16_t next; + char *pSubtag, *pNext, *pLastGoodPosition; + int32_t subtagLen; + int32_t extlangIdx; + ExtensionListEntry *pExtension; + char *pExtValueSubtag, *pExtValueSubtagEnd; + int32_t i; + UBool privateuseVar = FALSE; + int32_t legacyLen = 0; + + if (parsedLen != NULL) { + *parsedLen = 0; + } + + if (U_FAILURE(*status)) { + return NULL; + } + + if (tagLen < 0) { + tagLen = (int32_t)uprv_strlen(tag); + } + + /* copy the entire string */ + tagBuf = (char*)uprv_malloc(tagLen + 1); + if (tagBuf == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + uprv_memcpy(tagBuf, tag, tagLen); + *(tagBuf + tagLen) = 0; + + /* create a ULanguageTag */ + icu::LocalULanguageTagPointer t( + (ULanguageTag*)uprv_malloc(sizeof(ULanguageTag))); + if (t.isNull()) { + uprv_free(tagBuf); + *status = U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + _initializeULanguageTag(t.getAlias()); + t->buf = tagBuf; + + if (tagLen < MINLEN) { + /* the input tag is too short - return empty ULanguageTag */ + return t.orphan(); + } + + size_t parsedLenDelta = 0; + // Legacy tag will be consider together. Legacy tag with intervening + // script and region such as art-DE-lojban or art-Latn-lojban won't be + // matched. + /* check if the tag is legacy */ + for (i = 0; i < UPRV_LENGTHOF(LEGACY); i += 2) { + int32_t checkLegacyLen = static_cast<int32_t>(uprv_strlen(LEGACY[i])); + if (tagLen < checkLegacyLen) { + continue; + } + if (tagLen > checkLegacyLen && tagBuf[checkLegacyLen] != '-') { + // make sure next char is '-'. + continue; + } + if (uprv_strnicmp(LEGACY[i], tagBuf, checkLegacyLen) == 0) { + int32_t newTagLength; + + legacyLen = checkLegacyLen; /* back up for output parsedLen */ + int32_t replacementLen = static_cast<int32_t>(uprv_strlen(LEGACY[i+1])); + newTagLength = replacementLen + tagLen - checkLegacyLen; + if (tagLen < newTagLength) { + uprv_free(tagBuf); + tagBuf = (char*)uprv_malloc(newTagLength + 1); + if (tagBuf == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + t->buf = tagBuf; + tagLen = newTagLength; + } + parsedLenDelta = checkLegacyLen - replacementLen; + uprv_strcpy(t->buf, LEGACY[i + 1]); + if (checkLegacyLen != tagLen) { + uprv_strcpy(t->buf + replacementLen, tag + checkLegacyLen); + } + break; + } + } + + if (legacyLen == 0) { + for (i = 0; i < UPRV_LENGTHOF(REDUNDANT); i += 2) { + const char* redundantTag = REDUNDANT[i]; + size_t redundantTagLen = uprv_strlen(redundantTag); + // The preferred tag for a redundant tag is always shorter than redundant + // tag. A redundant tag may or may not be followed by other subtags. + // (i.e. "zh-yue" or "zh-yue-u-co-pinyin"). + if (uprv_strnicmp(redundantTag, tagBuf, static_cast<uint32_t>(redundantTagLen)) == 0) { + const char* redundantTagEnd = tagBuf + redundantTagLen; + if (*redundantTagEnd == '\0' || *redundantTagEnd == SEP) { + const char* preferredTag = REDUNDANT[i + 1]; + size_t preferredTagLen = uprv_strlen(preferredTag); + uprv_strncpy(t->buf, preferredTag, preferredTagLen); + if (*redundantTagEnd == SEP) { + uprv_memmove(tagBuf + preferredTagLen, + redundantTagEnd, + tagLen - redundantTagLen + 1); + } else { + tagBuf[preferredTagLen] = '\0'; + } + // parsedLen should be the length of the input + // before redundantTag is replaced by preferredTag. + // Save the delta to add it back later. + parsedLenDelta = redundantTagLen - preferredTagLen; + break; + } + } + } + } + + /* + * langtag = language + * ["-" script] + * ["-" region] + * *("-" variant) + * *("-" extension) + * ["-" privateuse] + */ + + next = LANG | PRIV; + pNext = pLastGoodPosition = tagBuf; + extlangIdx = 0; + pExtension = NULL; + pExtValueSubtag = NULL; + pExtValueSubtagEnd = NULL; + + while (pNext) { + char *pSep; + + pSubtag = pNext; + + /* locate next separator char */ + pSep = pSubtag; + while (*pSep) { + if (*pSep == SEP) { + break; + } + pSep++; + } + if (*pSep == 0) { + /* last subtag */ + pNext = NULL; + } else { + pNext = pSep + 1; + } + subtagLen = (int32_t)(pSep - pSubtag); + + if (next & LANG) { + if (ultag_isLanguageSubtag(pSubtag, subtagLen)) { + *pSep = 0; /* terminate */ + // TODO: move deprecated language code handling here. + t->language = T_CString_toLowerCase(pSubtag); + + pLastGoodPosition = pSep; + next = SCRT | REGN | VART | EXTS | PRIV; + if (subtagLen <= 3) + next |= EXTL; + continue; + } + } + if (next & EXTL) { + if (_isExtlangSubtag(pSubtag, subtagLen)) { + *pSep = 0; + t->extlang[extlangIdx++] = T_CString_toLowerCase(pSubtag); + + pLastGoodPosition = pSep; + if (extlangIdx < 3) { + next = EXTL | SCRT | REGN | VART | EXTS | PRIV; + } else { + next = SCRT | REGN | VART | EXTS | PRIV; + } + continue; + } + } + if (next & SCRT) { + if (ultag_isScriptSubtag(pSubtag, subtagLen)) { + char *p = pSubtag; + + *pSep = 0; + + /* to title case */ + *p = uprv_toupper(*p); + p++; + for (; *p; p++) { + *p = uprv_tolower(*p); + } + + t->script = pSubtag; + + pLastGoodPosition = pSep; + next = REGN | VART | EXTS | PRIV; + continue; + } + } + if (next & REGN) { + if (ultag_isRegionSubtag(pSubtag, subtagLen)) { + *pSep = 0; + // TODO: move deprecated region code handling here. + t->region = T_CString_toUpperCase(pSubtag); + + pLastGoodPosition = pSep; + next = VART | EXTS | PRIV; + continue; + } + } + if (next & VART) { + if (_isVariantSubtag(pSubtag, subtagLen) || + (privateuseVar && _isPrivateuseVariantSubtag(pSubtag, subtagLen))) { + VariantListEntry *var; + UBool isAdded; + + var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry)); + if (var == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + *pSep = 0; + var->variant = T_CString_toUpperCase(pSubtag); + isAdded = _addVariantToList(&(t->variants), var); + if (!isAdded) { + /* duplicated variant entry */ + uprv_free(var); + break; + } + pLastGoodPosition = pSep; + next = VART | EXTS | PRIV; + continue; + } + } + if (next & EXTS) { + if (_isExtensionSingleton(pSubtag, subtagLen)) { + if (pExtension != NULL) { + if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) { + /* the previous extension is incomplete */ + uprv_free(pExtension); + pExtension = NULL; + break; + } + + /* terminate the previous extension value */ + *pExtValueSubtagEnd = 0; + pExtension->value = T_CString_toLowerCase(pExtValueSubtag); + + /* insert the extension to the list */ + if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) { + pLastGoodPosition = pExtValueSubtagEnd; + } else { + /* stop parsing here */ + uprv_free(pExtension); + pExtension = NULL; + break; + } + } + + /* create a new extension */ + pExtension = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); + if (pExtension == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + *pSep = 0; + pExtension->key = T_CString_toLowerCase(pSubtag); + pExtension->value = NULL; /* will be set later */ + + /* + * reset the start and the end location of extension value + * subtags for this extension + */ + pExtValueSubtag = NULL; + pExtValueSubtagEnd = NULL; + + next = EXTV; + continue; + } + } + if (next & EXTV) { + if (_isExtensionSubtag(pSubtag, subtagLen)) { + if (pExtValueSubtag == NULL) { + /* if the start postion of this extension's value is not yet, + this one is the first value subtag */ + pExtValueSubtag = pSubtag; + } + + /* Mark the end of this subtag */ + pExtValueSubtagEnd = pSep; + next = EXTS | EXTV | PRIV; + + continue; + } + } + if (next & PRIV) { + if (uprv_tolower(*pSubtag) == PRIVATEUSE && subtagLen == 1) { + char *pPrivuseVal; + + if (pExtension != NULL) { + /* Process the last extension */ + if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) { + /* the previous extension is incomplete */ + uprv_free(pExtension); + pExtension = NULL; + break; + } else { + /* terminate the previous extension value */ + *pExtValueSubtagEnd = 0; + pExtension->value = T_CString_toLowerCase(pExtValueSubtag); + + /* insert the extension to the list */ + if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) { + pLastGoodPosition = pExtValueSubtagEnd; + pExtension = NULL; + } else { + /* stop parsing here */ + uprv_free(pExtension); + pExtension = NULL; + break; + } + } + } + + /* The rest of part will be private use value subtags */ + if (pNext == NULL) { + /* empty private use subtag */ + break; + } + /* back up the private use value start position */ + pPrivuseVal = pNext; + + /* validate private use value subtags */ + while (pNext) { + pSubtag = pNext; + pSep = pSubtag; + while (*pSep) { + if (*pSep == SEP) { + break; + } + pSep++; + } + if (*pSep == 0) { + /* last subtag */ + pNext = NULL; + } else { + pNext = pSep + 1; + } + subtagLen = (int32_t)(pSep - pSubtag); + + if (uprv_strncmp(pSubtag, PRIVUSE_VARIANT_PREFIX, uprv_strlen(PRIVUSE_VARIANT_PREFIX)) == 0) { + *pSep = 0; + next = VART; + privateuseVar = TRUE; + break; + } else if (_isPrivateuseValueSubtag(pSubtag, subtagLen)) { + pLastGoodPosition = pSep; + } else { + break; + } + } + + if (next == VART) { + continue; + } + + if (pLastGoodPosition - pPrivuseVal > 0) { + *pLastGoodPosition = 0; + t->privateuse = T_CString_toLowerCase(pPrivuseVal); + } + /* No more subtags, exiting the parse loop */ + break; + } + break; + } + + /* If we fell through here, it means this subtag is illegal - quit parsing */ + break; + } + + if (pExtension != NULL) { + /* Process the last extension */ + if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) { + /* the previous extension is incomplete */ + uprv_free(pExtension); + } else { + /* terminate the previous extension value */ + *pExtValueSubtagEnd = 0; + pExtension->value = T_CString_toLowerCase(pExtValueSubtag); + /* insert the extension to the list */ + if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) { + pLastGoodPosition = pExtValueSubtagEnd; + } else { + uprv_free(pExtension); + } + } + } + + if (parsedLen != NULL) { + *parsedLen = (int32_t)(pLastGoodPosition - t->buf + parsedLenDelta); + } + + return t.orphan(); +} + +// Ticket #12705 - Turn optimization back on. +#if defined(_MSC_VER) && (_MSC_VER >= 1900) && (_MSC_VER < 1924) +#pragma optimize( "", on ) +#endif + +static void +ultag_close(ULanguageTag* langtag) { + + if (langtag == NULL) { + return; + } + + uprv_free(langtag->buf); + + if (langtag->variants) { + VariantListEntry *curVar = langtag->variants; + while (curVar) { + VariantListEntry *nextVar = curVar->next; + uprv_free(curVar); + curVar = nextVar; + } + } + + if (langtag->extensions) { + ExtensionListEntry *curExt = langtag->extensions; + while (curExt) { + ExtensionListEntry *nextExt = curExt->next; + uprv_free(curExt); + curExt = nextExt; + } + } + + uprv_free(langtag); +} + +static const char* +ultag_getLanguage(const ULanguageTag* langtag) { + return langtag->language; +} + +#if 0 +static const char* +ultag_getJDKLanguage(const ULanguageTag* langtag) { + int32_t i; + for (i = 0; DEPRECATEDLANGS[i] != NULL; i += 2) { + if (uprv_compareInvCharsAsAscii(DEPRECATEDLANGS[i], langtag->language) == 0) { + return DEPRECATEDLANGS[i + 1]; + } + } + return langtag->language; +} +#endif + +static const char* +ultag_getExtlang(const ULanguageTag* langtag, int32_t idx) { + if (idx >= 0 && idx < MAXEXTLANG) { + return langtag->extlang[idx]; + } + return NULL; +} + +static int32_t +ultag_getExtlangSize(const ULanguageTag* langtag) { + int32_t size = 0; + int32_t i; + for (i = 0; i < MAXEXTLANG; i++) { + if (langtag->extlang[i]) { + size++; + } + } + return size; +} + +static const char* +ultag_getScript(const ULanguageTag* langtag) { + return langtag->script; +} + +static const char* +ultag_getRegion(const ULanguageTag* langtag) { + return langtag->region; +} + +static const char* +ultag_getVariant(const ULanguageTag* langtag, int32_t idx) { + const char *var = NULL; + VariantListEntry *cur = langtag->variants; + int32_t i = 0; + while (cur) { + if (i == idx) { + var = cur->variant; + break; + } + cur = cur->next; + i++; + } + return var; +} + +static int32_t +ultag_getVariantsSize(const ULanguageTag* langtag) { + int32_t size = 0; + VariantListEntry *cur = langtag->variants; + while (TRUE) { + if (cur == NULL) { + break; + } + size++; + cur = cur->next; + } + return size; +} + +static const char* +ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx) { + const char *key = NULL; + ExtensionListEntry *cur = langtag->extensions; + int32_t i = 0; + while (cur) { + if (i == idx) { + key = cur->key; + break; + } + cur = cur->next; + i++; + } + return key; +} + +static const char* +ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx) { + const char *val = NULL; + ExtensionListEntry *cur = langtag->extensions; + int32_t i = 0; + while (cur) { + if (i == idx) { + val = cur->value; + break; + } + cur = cur->next; + i++; + } + return val; +} + +static int32_t +ultag_getExtensionsSize(const ULanguageTag* langtag) { + int32_t size = 0; + ExtensionListEntry *cur = langtag->extensions; + while (TRUE) { + if (cur == NULL) { + break; + } + size++; + cur = cur->next; + } + return size; +} + +static const char* +ultag_getPrivateUse(const ULanguageTag* langtag) { + return langtag->privateuse; +} + +#if 0 +static const char* +ultag_getLegacy(const ULanguageTag* langtag) { + return langtag->legacy; +} +#endif + + +/* +* ------------------------------------------------- +* +* Locale/BCP47 conversion APIs, exposed as uloc_* +* +* ------------------------------------------------- +*/ +U_CAPI int32_t U_EXPORT2 +uloc_toLanguageTag(const char* localeID, + char* langtag, + int32_t langtagCapacity, + UBool strict, + UErrorCode* status) { + if (U_FAILURE(*status)) { + return 0; + } + + icu::CheckedArrayByteSink sink(langtag, langtagCapacity); + ulocimp_toLanguageTag(localeID, sink, strict, status); + + int32_t reslen = sink.NumberOfBytesAppended(); + + if (U_FAILURE(*status)) { + return reslen; + } + + if (sink.Overflowed()) { + *status = U_BUFFER_OVERFLOW_ERROR; + } else { + u_terminateChars(langtag, langtagCapacity, reslen, status); + } + + return reslen; +} + + +U_CAPI void U_EXPORT2 +ulocimp_toLanguageTag(const char* localeID, + icu::ByteSink& sink, + UBool strict, + UErrorCode* status) { + icu::CharString canonical; + int32_t reslen; + UErrorCode tmpStatus = U_ZERO_ERROR; + UBool hadPosix = FALSE; + const char* pKeywordStart; + + /* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "". See #6835 */ + int32_t resultCapacity = static_cast<int32_t>(uprv_strlen(localeID)); + if (resultCapacity > 0) { + char* buffer; + + for (;;) { + buffer = canonical.getAppendBuffer( + /*minCapacity=*/resultCapacity, + /*desiredCapacityHint=*/resultCapacity, + resultCapacity, + tmpStatus); + + if (U_FAILURE(tmpStatus)) { + *status = tmpStatus; + return; + } + + reslen = + uloc_canonicalize(localeID, buffer, resultCapacity, &tmpStatus); + + if (tmpStatus != U_BUFFER_OVERFLOW_ERROR) { + break; + } + + resultCapacity = reslen; + tmpStatus = U_ZERO_ERROR; + } + + if (U_FAILURE(tmpStatus)) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + canonical.append(buffer, reslen, tmpStatus); + if (tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { + tmpStatus = U_ZERO_ERROR; // Terminators provided by CharString. + } + + if (U_FAILURE(tmpStatus)) { + *status = tmpStatus; + return; + } + } + + /* For handling special case - private use only tag */ + pKeywordStart = locale_getKeywordsStart(canonical.data()); + if (pKeywordStart == canonical.data()) { + int kwdCnt = 0; + UBool done = FALSE; + + icu::LocalUEnumerationPointer kwdEnum(uloc_openKeywords(canonical.data(), &tmpStatus)); + if (U_SUCCESS(tmpStatus)) { + kwdCnt = uenum_count(kwdEnum.getAlias(), &tmpStatus); + if (kwdCnt == 1) { + const char *key; + int32_t len = 0; + + key = uenum_next(kwdEnum.getAlias(), &len, &tmpStatus); + if (len == 1 && *key == PRIVATEUSE) { + icu::CharString buf; + { + icu::CharStringByteSink sink(&buf); + ulocimp_getKeywordValue(localeID, key, sink, &tmpStatus); + } + if (U_SUCCESS(tmpStatus)) { + if (ultag_isPrivateuseValueSubtags(buf.data(), buf.length())) { + /* return private use only tag */ + static const char PREFIX[] = { PRIVATEUSE, SEP }; + sink.Append(PREFIX, sizeof(PREFIX)); + sink.Append(buf.data(), buf.length()); + done = TRUE; + } else if (strict) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + done = TRUE; + } + /* if not strict mode, then "und" will be returned */ + } else { + *status = U_ILLEGAL_ARGUMENT_ERROR; + done = TRUE; + } + } + } + if (done) { + return; + } + } + } + + _appendLanguageToLanguageTag(canonical.data(), sink, strict, status); + _appendScriptToLanguageTag(canonical.data(), sink, strict, status); + _appendRegionToLanguageTag(canonical.data(), sink, strict, status); + _appendVariantsToLanguageTag(canonical.data(), sink, strict, &hadPosix, status); + _appendKeywordsToLanguageTag(canonical.data(), sink, strict, hadPosix, status); + _appendPrivateuseToLanguageTag(canonical.data(), sink, strict, hadPosix, status); +} + + +U_CAPI int32_t U_EXPORT2 +uloc_forLanguageTag(const char* langtag, + char* localeID, + int32_t localeIDCapacity, + int32_t* parsedLength, + UErrorCode* status) { + if (U_FAILURE(*status)) { + return 0; + } + + icu::CheckedArrayByteSink sink(localeID, localeIDCapacity); + ulocimp_forLanguageTag(langtag, -1, sink, parsedLength, status); + + int32_t reslen = sink.NumberOfBytesAppended(); + + if (U_FAILURE(*status)) { + return reslen; + } + + if (sink.Overflowed()) { + *status = U_BUFFER_OVERFLOW_ERROR; + } else { + u_terminateChars(localeID, localeIDCapacity, reslen, status); + } + + return reslen; +} + + +U_CAPI void U_EXPORT2 +ulocimp_forLanguageTag(const char* langtag, + int32_t tagLen, + icu::ByteSink& sink, + int32_t* parsedLength, + UErrorCode* status) { + UBool isEmpty = TRUE; + const char *subtag, *p; + int32_t len; + int32_t i, n; + UBool noRegion = TRUE; + + icu::LocalULanguageTagPointer lt(ultag_parse(langtag, tagLen, parsedLength, status)); + if (U_FAILURE(*status)) { + return; + } + + /* language */ + subtag = ultag_getExtlangSize(lt.getAlias()) > 0 ? ultag_getExtlang(lt.getAlias(), 0) : ultag_getLanguage(lt.getAlias()); + if (uprv_compareInvCharsAsAscii(subtag, LANG_UND) != 0) { + len = (int32_t)uprv_strlen(subtag); + if (len > 0) { + sink.Append(subtag, len); + isEmpty = FALSE; + } + } + + /* script */ + subtag = ultag_getScript(lt.getAlias()); + len = (int32_t)uprv_strlen(subtag); + if (len > 0) { + sink.Append("_", 1); + isEmpty = FALSE; + + /* write out the script in title case */ + char c = uprv_toupper(*subtag); + sink.Append(&c, 1); + sink.Append(subtag + 1, len - 1); + } + + /* region */ + subtag = ultag_getRegion(lt.getAlias()); + len = (int32_t)uprv_strlen(subtag); + if (len > 0) { + sink.Append("_", 1); + isEmpty = FALSE; + + /* write out the region in upper case */ + p = subtag; + while (*p) { + char c = uprv_toupper(*p); + sink.Append(&c, 1); + p++; + } + noRegion = FALSE; + } + + /* variants */ + _sortVariants(lt.getAlias()->variants); + n = ultag_getVariantsSize(lt.getAlias()); + if (n > 0) { + if (noRegion) { + sink.Append("_", 1); + isEmpty = FALSE; + } + + for (i = 0; i < n; i++) { + subtag = ultag_getVariant(lt.getAlias(), i); + sink.Append("_", 1); + + /* write out the variant in upper case */ + p = subtag; + while (*p) { + char c = uprv_toupper(*p); + sink.Append(&c, 1); + p++; + } + } + } + + /* keywords */ + n = ultag_getExtensionsSize(lt.getAlias()); + subtag = ultag_getPrivateUse(lt.getAlias()); + if (n > 0 || uprv_strlen(subtag) > 0) { + if (isEmpty && n > 0) { + /* need a language */ + sink.Append(LANG_UND, LANG_UND_LEN); + } + _appendKeywords(lt.getAlias(), sink, status); + } +} diff --git a/thirdparty/icu4c/common/ulocimp.h b/thirdparty/icu4c/common/ulocimp.h new file mode 100644 index 0000000000..5691fe9a77 --- /dev/null +++ b/thirdparty/icu4c/common/ulocimp.h @@ -0,0 +1,307 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 2004-2016, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +*/ + +#ifndef ULOCIMP_H +#define ULOCIMP_H + +#include "unicode/bytestream.h" +#include "unicode/uloc.h" + +#include "charstr.h" + +/** + * Create an iterator over the specified keywords list + * @param keywordList double-null terminated list. Will be copied. + * @param keywordListSize size in bytes of keywordList + * @param status err code + * @return enumeration (owned by caller) of the keyword list. + * @internal ICU 3.0 + */ +U_CAPI UEnumeration* U_EXPORT2 +uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status); + +/** + * Look up a resource bundle table item with fallback on the table level. + * This is accessible so it can be called by C++ code. + */ +U_CAPI const UChar * U_EXPORT2 +uloc_getTableStringWithFallback( + const char *path, + const char *locale, + const char *tableKey, + const char *subTableKey, + const char *itemKey, + int32_t *pLength, + UErrorCode *pErrorCode); + +/*returns true if a is an ID separator false otherwise*/ +#define _isIDSeparator(a) (a == '_' || a == '-') + +U_CFUNC const char* +uloc_getCurrentCountryID(const char* oldID); + +U_CFUNC const char* +uloc_getCurrentLanguageID(const char* oldID); + +U_CFUNC void +ulocimp_getKeywords(const char *localeID, + char prev, + icu::ByteSink& sink, + UBool valuesToo, + UErrorCode *status); + +icu::CharString U_EXPORT2 +ulocimp_getLanguage(const char *localeID, + const char **pEnd, + UErrorCode &status); + +icu::CharString U_EXPORT2 +ulocimp_getScript(const char *localeID, + const char **pEnd, + UErrorCode &status); + +icu::CharString U_EXPORT2 +ulocimp_getCountry(const char *localeID, + const char **pEnd, + UErrorCode &status); + +U_CAPI void U_EXPORT2 +ulocimp_getName(const char* localeID, + icu::ByteSink& sink, + UErrorCode* err); + +U_CAPI void U_EXPORT2 +ulocimp_getBaseName(const char* localeID, + icu::ByteSink& sink, + UErrorCode* err); + +U_CAPI void U_EXPORT2 +ulocimp_canonicalize(const char* localeID, + icu::ByteSink& sink, + UErrorCode* err); + +U_CAPI void U_EXPORT2 +ulocimp_getKeywordValue(const char* localeID, + const char* keywordName, + icu::ByteSink& sink, + UErrorCode* status); + +/** + * Writes a well-formed language tag for this locale ID. + * + * **Note**: When `strict` is false, any locale fields which do not satisfy the + * BCP47 syntax requirement will be omitted from the result. When `strict` is + * true, this function sets U_ILLEGAL_ARGUMENT_ERROR to the `err` if any locale + * fields do not satisfy the BCP47 syntax requirement. + * + * @param localeID the input locale ID + * @param sink the output sink receiving the BCP47 language + * tag for this Locale. + * @param strict boolean value indicating if the function returns + * an error for an ill-formed input locale ID. + * @param err error information if receiving the language + * tag failed. + * @return The length of the BCP47 language tag. + * + * @internal ICU 64 + */ +U_CAPI void U_EXPORT2 +ulocimp_toLanguageTag(const char* localeID, + icu::ByteSink& sink, + UBool strict, + UErrorCode* err); + +/** + * Returns a locale ID for the specified BCP47 language tag string. + * If the specified language tag contains any ill-formed subtags, + * the first such subtag and all following subtags are ignored. + * <p> + * This implements the 'Language-Tag' production of BCP 47, and so + * supports legacy language tags (marked as “Type: grandfathered” in BCP 47) + * (regular and irregular) as well as private use language tags. + * + * Private use tags are represented as 'x-whatever', + * and legacy tags are converted to their canonical replacements where they exist. + * + * Note that a few legacy tags have no modern replacement; + * these will be converted using the fallback described in + * the first paragraph, so some information might be lost. + * + * @param langtag the input BCP47 language tag. + * @param tagLen the length of langtag, or -1 to call uprv_strlen(). + * @param sink the output sink receiving a locale ID for the + * specified BCP47 language tag. + * @param parsedLength if not NULL, successfully parsed length + * for the input language tag is set. + * @param err error information if receiving the locald ID + * failed. + * @internal ICU 63 + */ +U_CAPI void U_EXPORT2 +ulocimp_forLanguageTag(const char* langtag, + int32_t tagLen, + icu::ByteSink& sink, + int32_t* parsedLength, + UErrorCode* err); + +/** + * Get the region to use for supplemental data lookup. Uses + * (1) any region specified by locale tag "rg"; if none then + * (2) any unicode_region_tag in the locale ID; if none then + * (3) if inferRegion is true, the region suggested by + * getLikelySubtags on the localeID. + * If no region is found, returns length 0. + * + * @param localeID + * The complete locale ID (with keywords) from which + * to get the region to use for supplemental data. + * @param inferRegion + * If true, will try to infer region from localeID if + * no other region is found. + * @param region + * Buffer in which to put the region ID found; should + * have a capacity at least ULOC_COUNTRY_CAPACITY. + * @param regionCapacity + * The actual capacity of the region buffer. + * @param status + * Pointer to in/out UErrorCode value for latest status. + * @return + * The length of any region code found, or 0 if none. + * @internal ICU 57 + */ +U_CAPI int32_t U_EXPORT2 +ulocimp_getRegionForSupplementalData(const char *localeID, UBool inferRegion, + char *region, int32_t regionCapacity, UErrorCode* status); + +/** + * Add the likely subtags for a provided locale ID, per the algorithm described + * in the following CLDR technical report: + * + * http://www.unicode.org/reports/tr35/#Likely_Subtags + * + * If localeID is already in the maximal form, or there is no data available + * for maximization, it will be copied to the output buffer. For example, + * "und-Zzzz" cannot be maximized, since there is no reasonable maximization. + * + * Examples: + * + * "en" maximizes to "en_Latn_US" + * + * "de" maximizes to "de_Latn_US" + * + * "sr" maximizes to "sr_Cyrl_RS" + * + * "sh" maximizes to "sr_Latn_RS" (Note this will not reverse.) + * + * "zh_Hani" maximizes to "zh_Hans_CN" (Note this will not reverse.) + * + * @param localeID The locale to maximize + * @param sink The output sink receiving the maximized locale + * @param err Error information if maximizing the locale failed. If the length + * of the localeID and the null-terminator is greater than the maximum allowed size, + * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR. + * @internal ICU 64 + */ +U_CAPI void U_EXPORT2 +ulocimp_addLikelySubtags(const char* localeID, + icu::ByteSink& sink, + UErrorCode* err); + +/** + * Minimize the subtags for a provided locale ID, per the algorithm described + * in the following CLDR technical report: + * + * http://www.unicode.org/reports/tr35/#Likely_Subtags + * + * If localeID is already in the minimal form, or there is no data available + * for minimization, it will be copied to the output buffer. Since the + * minimization algorithm relies on proper maximization, see the comments + * for ulocimp_addLikelySubtags for reasons why there might not be any data. + * + * Examples: + * + * "en_Latn_US" minimizes to "en" + * + * "de_Latn_US" minimizes to "de" + * + * "sr_Cyrl_RS" minimizes to "sr" + * + * "zh_Hant_TW" minimizes to "zh_TW" (The region is preferred to the + * script, and minimizing to "zh" would imply "zh_Hans_CN".) + * + * @param localeID The locale to minimize + * @param sink The output sink receiving the maximized locale + * @param err Error information if minimizing the locale failed. If the length + * of the localeID and the null-terminator is greater than the maximum allowed size, + * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR. + * @internal ICU 64 + */ +U_CAPI void U_EXPORT2 +ulocimp_minimizeSubtags(const char* localeID, + icu::ByteSink& sink, + UErrorCode* err); + +U_CAPI const char * U_EXPORT2 +locale_getKeywordsStart(const char *localeID); + +U_CFUNC UBool +ultag_isExtensionSubtags(const char* s, int32_t len); + +U_CFUNC UBool +ultag_isLanguageSubtag(const char* s, int32_t len); + +U_CFUNC UBool +ultag_isPrivateuseValueSubtags(const char* s, int32_t len); + +U_CFUNC UBool +ultag_isRegionSubtag(const char* s, int32_t len); + +U_CFUNC UBool +ultag_isScriptSubtag(const char* s, int32_t len); + +U_CFUNC UBool +ultag_isTransformedExtensionSubtags(const char* s, int32_t len); + +U_CFUNC UBool +ultag_isUnicodeExtensionSubtags(const char* s, int32_t len); + +U_CFUNC UBool +ultag_isUnicodeLocaleAttribute(const char* s, int32_t len); + +U_CFUNC UBool +ultag_isUnicodeLocaleAttributes(const char* s, int32_t len); + +U_CFUNC UBool +ultag_isUnicodeLocaleKey(const char* s, int32_t len); + +U_CFUNC UBool +ultag_isUnicodeLocaleType(const char* s, int32_t len); + +U_CFUNC UBool +ultag_isVariantSubtags(const char* s, int32_t len); + +U_CFUNC const char* +ulocimp_toBcpKey(const char* key); + +U_CFUNC const char* +ulocimp_toLegacyKey(const char* key); + +U_CFUNC const char* +ulocimp_toBcpType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType); + +U_CFUNC const char* +ulocimp_toLegacyType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType); + +/* Function for testing purpose */ +U_CAPI const char* const* ulocimp_getKnownCanonicalizedLocaleForTest(int32_t* length); + +// Return true if the value is already canonicalized. +U_CAPI bool ulocimp_isCanonicalizedLocaleForTest(const char* localeName); + +#endif diff --git a/thirdparty/icu4c/common/umapfile.cpp b/thirdparty/icu4c/common/umapfile.cpp new file mode 100644 index 0000000000..3e714876a4 --- /dev/null +++ b/thirdparty/icu4c/common/umapfile.cpp @@ -0,0 +1,530 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1999-2013, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************/ + + +/*---------------------------------------------------------------------------- + * + * Memory mapped file wrappers for use by the ICU Data Implementation + * All of the platform-specific implementation for mapping data files + * is here. The rest of the ICU Data implementation uses only the + * wrapper functions. + * + *----------------------------------------------------------------------------*/ +/* Defines _XOPEN_SOURCE for access to POSIX functions. + * Must be before any other #includes. */ +#include "uposixdefs.h" + +#include "unicode/putil.h" +#include "unicode/ustring.h" +#include "udatamem.h" +#include "umapfile.h" + +/* memory-mapping base definitions ------------------------------------------ */ + +#if MAP_IMPLEMENTATION==MAP_WIN32 +#ifndef WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN +#endif +# define VC_EXTRALEAN +# define NOUSER +# define NOSERVICE +# define NOIME +# define NOMCX + +# if U_PLATFORM_HAS_WINUWP_API == 1 + // Some previous versions of the Windows 10 SDK don't expose various APIs for UWP applications + // to use, even though UWP apps are allowed to call and use them. Temporarily change the + // WINAPI family partition below to Desktop, so that function declarations are visible for UWP. +# include <winapifamily.h> +# if !(WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_SYSTEM)) +# pragma push_macro("WINAPI_PARTITION_DESKTOP") +# undef WINAPI_PARTITION_DESKTOP +# define WINAPI_PARTITION_DESKTOP 1 +# define CHANGED_WINAPI_PARTITION_DESKTOP_VALUE +# endif +# endif + +# include <windows.h> + +# if U_PLATFORM_HAS_WINUWP_API == 1 && defined(CHANGED_WINAPI_PARTITION_DESKTOP_VALUE) +# pragma pop_macro("WINAPI_PARTITION_DESKTOP") +# endif + +# include "cmemory.h" + +typedef HANDLE MemoryMap; + +# define IS_MAP(map) ((map)!=nullptr) + +#elif MAP_IMPLEMENTATION==MAP_POSIX || MAP_IMPLEMENTATION==MAP_390DLL + typedef size_t MemoryMap; + +# define IS_MAP(map) ((map)!=0) + +# include <unistd.h> +# include <sys/mman.h> +# include <sys/stat.h> +# include <fcntl.h> + +# ifndef MAP_FAILED +# define MAP_FAILED ((void*)-1) +# endif + +# if MAP_IMPLEMENTATION==MAP_390DLL + /* No memory mapping for 390 batch mode. Fake it using dll loading. */ +# include <dll.h> +# include "cstring.h" +# include "cmemory.h" +# include "unicode/udata.h" +# define LIB_PREFIX "lib" +# define LIB_SUFFIX ".dll" + /* This is inconvenient until we figure out what to do with U_ICUDATA_NAME in utypes.h */ +# define U_ICUDATA_ENTRY_NAME "icudt" U_ICU_VERSION_SHORT U_LIB_SUFFIX_C_NAME_STRING "_dat" +# endif +#elif MAP_IMPLEMENTATION==MAP_STDIO +# include <stdio.h> +# include "cmemory.h" + + typedef void *MemoryMap; + +# define IS_MAP(map) ((map)!=nullptr) +#endif + +/*----------------------------------------------------------------------------* + * * + * Memory Mapped File support. Platform dependent implementation of * + * functions used by the rest of the implementation.* + * * + *----------------------------------------------------------------------------*/ +#if MAP_IMPLEMENTATION==MAP_NONE + U_CFUNC UBool + uprv_mapFile(UDataMemory *pData, const char *path, UErrorCode *status) { + if (U_FAILURE(*status)) { + return FALSE; + } + UDataMemory_init(pData); /* Clear the output struct. */ + return FALSE; /* no file access */ + } + + U_CFUNC void uprv_unmapFile(UDataMemory *pData) { + /* nothing to do */ + } +#elif MAP_IMPLEMENTATION==MAP_WIN32 + U_CFUNC UBool + uprv_mapFile( + UDataMemory *pData, /* Fill in with info on the result doing the mapping. */ + /* Output only; any original contents are cleared. */ + const char *path, /* File path to be opened/mapped. */ + UErrorCode *status /* Error status, used to report out-of-memory errors. */ + ) + { + if (U_FAILURE(*status)) { + return FALSE; + } + + HANDLE map = nullptr; + HANDLE file = INVALID_HANDLE_VALUE; + + UDataMemory_init(pData); /* Clear the output struct. */ + + /* open the input file */ +#if U_PLATFORM_HAS_WINUWP_API == 0 + // Note: In the non-UWP code-path (ie: Win32), the value of the path variable might have come from + // the CRT 'getenv' function, and would be therefore be encoded in the default ANSI code page. + // This means that we can't call the *W version of API below, whereas in the UWP code-path + // there is no 'getenv' call, and thus the string will be only UTF-8/Invariant characters. + file=CreateFileA(path, GENERIC_READ, FILE_SHARE_READ, nullptr, + OPEN_EXISTING, + FILE_ATTRIBUTE_NORMAL|FILE_FLAG_RANDOM_ACCESS, nullptr); +#else + // Convert from UTF-8 string to UTF-16 string. + wchar_t utf16Path[MAX_PATH]; + int32_t pathUtf16Len = 0; + u_strFromUTF8(reinterpret_cast<UChar*>(utf16Path), static_cast<int32_t>(UPRV_LENGTHOF(utf16Path)), &pathUtf16Len, path, -1, status); + + if (U_FAILURE(*status)) { + return FALSE; + } + if (*status == U_STRING_NOT_TERMINATED_WARNING) { + // Report back an error instead of a warning. + *status = U_BUFFER_OVERFLOW_ERROR; + return FALSE; + } + + file = CreateFileW(utf16Path, GENERIC_READ, FILE_SHARE_READ, nullptr, + OPEN_EXISTING, + FILE_ATTRIBUTE_NORMAL | FILE_FLAG_RANDOM_ACCESS, nullptr); +#endif + if (file == INVALID_HANDLE_VALUE) { + // If we failed to open the file due to an out-of-memory error, then we want + // to report that error back to the caller. + if (HRESULT_FROM_WIN32(GetLastError()) == E_OUTOFMEMORY) { + *status = U_MEMORY_ALLOCATION_ERROR; + } + return FALSE; + } + + // Note: We use NULL/nullptr for lpAttributes parameter below. + // This means our handle cannot be inherited and we will get the default security descriptor. + /* create an unnamed Windows file-mapping object for the specified file */ + map = CreateFileMappingW(file, nullptr, PAGE_READONLY, 0, 0, nullptr); + + CloseHandle(file); + if (map == nullptr) { + // If we failed to create the mapping due to an out-of-memory error, then + // we want to report that error back to the caller. + if (HRESULT_FROM_WIN32(GetLastError()) == E_OUTOFMEMORY) { + *status = U_MEMORY_ALLOCATION_ERROR; + } + return FALSE; + } + + /* map a view of the file into our address space */ + pData->pHeader = reinterpret_cast<const DataHeader *>(MapViewOfFile(map, FILE_MAP_READ, 0, 0, 0)); + if (pData->pHeader == nullptr) { + CloseHandle(map); + return FALSE; + } + pData->map = map; + return TRUE; + } + + U_CFUNC void + uprv_unmapFile(UDataMemory *pData) { + if (pData != nullptr && pData->map != nullptr) { + UnmapViewOfFile(pData->pHeader); + CloseHandle(pData->map); + pData->pHeader = nullptr; + pData->map = nullptr; + } + } + + + +#elif MAP_IMPLEMENTATION==MAP_POSIX + U_CFUNC UBool + uprv_mapFile(UDataMemory *pData, const char *path, UErrorCode *status) { + int fd; + int length; + struct stat mystat; + void *data; + + if (U_FAILURE(*status)) { + return FALSE; + } + + UDataMemory_init(pData); /* Clear the output struct. */ + + /* determine the length of the file */ + if(stat(path, &mystat)!=0 || mystat.st_size<=0) { + return FALSE; + } + length=mystat.st_size; + + /* open the file */ + fd=open(path, O_RDONLY); + if(fd==-1) { + return FALSE; + } + + /* get a view of the mapping */ +#if U_PLATFORM != U_PF_HPUX + data=mmap(0, length, PROT_READ, MAP_SHARED, fd, 0); +#else + data=mmap(0, length, PROT_READ, MAP_PRIVATE, fd, 0); +#endif + close(fd); /* no longer needed */ + if(data==MAP_FAILED) { + // Possibly check the errno value for ENOMEM, and report U_MEMORY_ALLOCATION_ERROR? + return FALSE; + } + + pData->map = (char *)data + length; + pData->pHeader=(const DataHeader *)data; + pData->mapAddr = data; +#if U_PLATFORM == U_PF_IPHONE + posix_madvise(data, length, POSIX_MADV_RANDOM); +#endif + return TRUE; + } + + U_CFUNC void + uprv_unmapFile(UDataMemory *pData) { + if(pData!=nullptr && pData->map!=nullptr) { + size_t dataLen = (char *)pData->map - (char *)pData->mapAddr; + if(munmap(pData->mapAddr, dataLen)==-1) { + } + pData->pHeader=nullptr; + pData->map=0; + pData->mapAddr=nullptr; + } + } + + + +#elif MAP_IMPLEMENTATION==MAP_STDIO + /* copy of the filestrm.c/T_FileStream_size() implementation */ + static int32_t + umap_fsize(FILE *f) { + int32_t savedPos = ftell(f); + int32_t size = 0; + + /*Changes by Bertrand A. D. doesn't affect the current position + goes to the end of the file before ftell*/ + fseek(f, 0, SEEK_END); + size = (int32_t)ftell(f); + fseek(f, savedPos, SEEK_SET); + return size; + } + + U_CFUNC UBool + uprv_mapFile(UDataMemory *pData, const char *path, UErrorCode *status) { + FILE *file; + int32_t fileLength; + void *p; + + if (U_FAILURE(*status)) { + return FALSE; + } + + UDataMemory_init(pData); /* Clear the output struct. */ + /* open the input file */ + file=fopen(path, "rb"); + if(file==nullptr) { + return FALSE; + } + + /* get the file length */ + fileLength=umap_fsize(file); + if(ferror(file) || fileLength<=20) { + fclose(file); + return FALSE; + } + + /* allocate the memory to hold the file data */ + p=uprv_malloc(fileLength); + if(p==nullptr) { + fclose(file); + *status = U_MEMORY_ALLOCATION_ERROR; + return FALSE; + } + + /* read the file */ + if(fileLength!=fread(p, 1, fileLength, file)) { + uprv_free(p); + fclose(file); + return FALSE; + } + + fclose(file); + pData->map=p; + pData->pHeader=(const DataHeader *)p; + pData->mapAddr=p; + return TRUE; + } + + U_CFUNC void + uprv_unmapFile(UDataMemory *pData) { + if(pData!=nullptr && pData->map!=nullptr) { + uprv_free(pData->map); + pData->map = nullptr; + pData->mapAddr = nullptr; + pData->pHeader = nullptr; + } + } + + +#elif MAP_IMPLEMENTATION==MAP_390DLL + /* 390 specific Library Loading. + * This is the only platform left that dynamically loads an ICU Data Library. + * All other platforms use .data files when dynamic loading is required, but + * this turn out to be awkward to support in 390 batch mode. + * + * The idea here is to hide the fact that 390 is using dll loading from the + * rest of ICU, and make it look like there is file loading happening. + * + */ + + static char *strcpy_returnEnd(char *dest, const char *src) + { + while((*dest=*src)!=0) { + ++dest; + ++src; + } + return dest; + } + + /*------------------------------------------------------------------------------ + * + * computeDirPath given a user-supplied path of an item to be opened, + * compute and return + * - the full directory path to be used + * when opening the file. + * - Pointer to null at end of above returned path + * + * Parameters: + * path: input path. Buffer is not altered. + * pathBuffer: Output buffer. Any contents are overwritten. + * + * Returns: + * Pointer to null termination in returned pathBuffer. + * + * TODO: This works the way ICU historically has, but the + * whole data fallback search path is so complicated that + * probably almost no one will ever really understand it, + * the potential for confusion is large. (It's not just + * this one function, but the whole scheme.) + * + *------------------------------------------------------------------------------*/ + static char *uprv_computeDirPath(const char *path, char *pathBuffer) + { + char *finalSlash; /* Ptr to last dir separator in input path, or null if none. */ + int32_t pathLen; /* Length of the returned directory path */ + + finalSlash = 0; + if (path != 0) { + finalSlash = uprv_strrchr(path, U_FILE_SEP_CHAR); + } + + *pathBuffer = 0; + if (finalSlash == 0) { + /* No user-supplied path. + * Copy the ICU_DATA path to the path buffer and return that*/ + const char *icuDataDir; + icuDataDir=u_getDataDirectory(); + if(icuDataDir!=nullptr && *icuDataDir!=0) { + return strcpy_returnEnd(pathBuffer, icuDataDir); + } else { + /* there is no icuDataDir either. Just return the empty pathBuffer. */ + return pathBuffer; + } + } + + /* User supplied path did contain a directory portion. + * Copy it to the output path buffer */ + pathLen = (int32_t)(finalSlash - path + 1); + uprv_memcpy(pathBuffer, path, pathLen); + *(pathBuffer+pathLen) = 0; + return pathBuffer+pathLen; + } + + +# define DATA_TYPE "dat" + + U_CFUNC UBool uprv_mapFile(UDataMemory *pData, const char *path, UErrorCode *status) { + const char *inBasename; + char *basename; + char pathBuffer[1024]; + const DataHeader *pHeader; + dllhandle *handle; + void *val=0; + + if (U_FAILURE(*status)) { + return FALSE; + } + + inBasename=uprv_strrchr(path, U_FILE_SEP_CHAR); + if(inBasename==nullptr) { + inBasename = path; + } else { + inBasename++; + } + basename=uprv_computeDirPath(path, pathBuffer); + if(uprv_strcmp(inBasename, U_ICUDATA_NAME".dat") != 0) { + /* must mmap file... for build */ + int fd; + int length; + struct stat mystat; + void *data; + UDataMemory_init(pData); /* Clear the output struct. */ + + /* determine the length of the file */ + if(stat(path, &mystat)!=0 || mystat.st_size<=0) { + return FALSE; + } + length=mystat.st_size; + + /* open the file */ + fd=open(path, O_RDONLY); + if(fd==-1) { + return FALSE; + } + + /* get a view of the mapping */ + data=mmap(0, length, PROT_READ, MAP_PRIVATE, fd, 0); + close(fd); /* no longer needed */ + if(data==MAP_FAILED) { + // Possibly check the errorno value for ENOMEM, and report U_MEMORY_ALLOCATION_ERROR? + return FALSE; + } + pData->map = (char *)data + length; + pData->pHeader=(const DataHeader *)data; + pData->mapAddr = data; + return TRUE; + } + +# ifdef OS390BATCH + /* ### hack: we still need to get u_getDataDirectory() fixed + for OS/390 (batch mode - always return "//"? ) + and this here straightened out with LIB_PREFIX and LIB_SUFFIX (both empty?!) + This is probably due to the strange file system on OS/390. It's more like + a database with short entry names than a typical file system. */ + /* U_ICUDATA_NAME should always have the correct name */ + /* BUT FOR BATCH MODE IT IS AN EXCEPTION BECAUSE */ + /* THE FIRST THREE LETTERS ARE PREASSIGNED TO THE */ + /* PROJECT!!!!! */ + uprv_strcpy(pathBuffer, "IXMI" U_ICU_VERSION_SHORT "DA"); +# else + /* set up the library name */ + uprv_strcpy(basename, LIB_PREFIX U_LIBICUDATA_NAME U_ICU_VERSION_SHORT LIB_SUFFIX); +# endif + +# ifdef UDATA_DEBUG + fprintf(stderr, "dllload: %s ", pathBuffer); +# endif + + handle=dllload(pathBuffer); + +# ifdef UDATA_DEBUG + fprintf(stderr, " -> %08X\n", handle ); +# endif + + if(handle != nullptr) { + /* we have a data DLL - what kind of lookup do we need here? */ + /* try to find the Table of Contents */ + UDataMemory_init(pData); /* Clear the output struct. */ + val=dllqueryvar((dllhandle*)handle, U_ICUDATA_ENTRY_NAME); + if(val == 0) { + /* failed... so keep looking */ + return FALSE; + } +# ifdef UDATA_DEBUG + fprintf(stderr, "dllqueryvar(%08X, %s) -> %08X\n", handle, U_ICUDATA_ENTRY_NAME, val); +# endif + + pData->pHeader=(const DataHeader *)val; + return TRUE; + } else { + return FALSE; /* no handle */ + } + } + + U_CFUNC void uprv_unmapFile(UDataMemory *pData) { + if(pData!=nullptr && pData->map!=nullptr) { + uprv_free(pData->map); + pData->map = nullptr; + pData->mapAddr = nullptr; + pData->pHeader = nullptr; + } + } + +#else +# error MAP_IMPLEMENTATION is set incorrectly +#endif diff --git a/thirdparty/icu4c/common/umapfile.h b/thirdparty/icu4c/common/umapfile.h new file mode 100644 index 0000000000..92bd567a2a --- /dev/null +++ b/thirdparty/icu4c/common/umapfile.h @@ -0,0 +1,57 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1999-2011, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************/ + +/*---------------------------------------------------------------------------------- + * + * Memory mapped file wrappers for use by the ICU Data Implementation + * + * Porting note: The implementation of these functions is very platform specific. + * Not all platforms can do real memory mapping. Those that can't + * still must implement these functions, getting the data into memory using + * whatever means are available. + * + * These functions are part of the ICU internal implementation, and + * are not inteded to be used directly by applications. + * + *----------------------------------------------------------------------------------*/ + +#ifndef __UMAPFILE_H__ +#define __UMAPFILE_H__ + +#include "unicode/putil.h" +#include "unicode/udata.h" +#include "putilimp.h" + +U_CFUNC UBool uprv_mapFile(UDataMemory *pdm, const char *path, UErrorCode *status); +U_CFUNC void uprv_unmapFile(UDataMemory *pData); + +/* MAP_NONE: no memory mapping, no file access at all */ +#define MAP_NONE 0 +#define MAP_WIN32 1 +#define MAP_POSIX 2 +#define MAP_STDIO 3 +#define MAP_390DLL 4 + +#if UCONFIG_NO_FILE_IO +# define MAP_IMPLEMENTATION MAP_NONE +#elif U_PLATFORM_USES_ONLY_WIN32_API +# define MAP_IMPLEMENTATION MAP_WIN32 +#elif U_HAVE_MMAP || U_PLATFORM == U_PF_OS390 +# if U_PLATFORM == U_PF_OS390 && defined (OS390_STUBDATA) + /* No memory mapping for 390 batch mode. Fake it using dll loading. */ +# define MAP_IMPLEMENTATION MAP_390DLL +# else +# define MAP_IMPLEMENTATION MAP_POSIX +# endif +#else /* unknown platform, no memory map implementation: use stdio.h and uprv_malloc() instead */ +# define MAP_IMPLEMENTATION MAP_STDIO +#endif + +#endif diff --git a/thirdparty/icu4c/common/umath.cpp b/thirdparty/icu4c/common/umath.cpp new file mode 100644 index 0000000000..7cf4b31749 --- /dev/null +++ b/thirdparty/icu4c/common/umath.cpp @@ -0,0 +1,26 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1997-2006, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* This file contains platform independent math. +*/ + +#include "putilimp.h" + +U_CAPI int32_t U_EXPORT2 +uprv_max(int32_t x, int32_t y) +{ + return (x > y ? x : y); +} + +U_CAPI int32_t U_EXPORT2 +uprv_min(int32_t x, int32_t y) +{ + return (x > y ? y : x); +} + diff --git a/thirdparty/icu4c/common/umutablecptrie.cpp b/thirdparty/icu4c/common/umutablecptrie.cpp new file mode 100644 index 0000000000..cdbe27080b --- /dev/null +++ b/thirdparty/icu4c/common/umutablecptrie.cpp @@ -0,0 +1,1852 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// umutablecptrie.cpp (inspired by utrie2_builder.cpp) +// created: 2017dec29 Markus W. Scherer + +// #define UCPTRIE_DEBUG +#ifdef UCPTRIE_DEBUG +# include <stdio.h> +#endif + +#include "unicode/utypes.h" +#include "unicode/ucptrie.h" +#include "unicode/umutablecptrie.h" +#include "unicode/uobject.h" +#include "unicode/utf16.h" +#include "cmemory.h" +#include "uassert.h" +#include "ucptrie_impl.h" + +// ICU-20235 In case Microsoft math.h has defined this, undefine it. +#ifdef OVERFLOW +#undef OVERFLOW +#endif + +U_NAMESPACE_BEGIN + +namespace { + +constexpr int32_t MAX_UNICODE = 0x10ffff; + +constexpr int32_t UNICODE_LIMIT = 0x110000; +constexpr int32_t BMP_LIMIT = 0x10000; +constexpr int32_t ASCII_LIMIT = 0x80; + +constexpr int32_t I_LIMIT = UNICODE_LIMIT >> UCPTRIE_SHIFT_3; +constexpr int32_t BMP_I_LIMIT = BMP_LIMIT >> UCPTRIE_SHIFT_3; +constexpr int32_t ASCII_I_LIMIT = ASCII_LIMIT >> UCPTRIE_SHIFT_3; + +constexpr int32_t SMALL_DATA_BLOCKS_PER_BMP_BLOCK = (1 << (UCPTRIE_FAST_SHIFT - UCPTRIE_SHIFT_3)); + +// Flag values for data blocks. +constexpr uint8_t ALL_SAME = 0; +constexpr uint8_t MIXED = 1; +constexpr uint8_t SAME_AS = 2; + +/** Start with allocation of 16k data entries. */ +constexpr int32_t INITIAL_DATA_LENGTH = ((int32_t)1 << 14); + +/** Grow about 8x each time. */ +constexpr int32_t MEDIUM_DATA_LENGTH = ((int32_t)1 << 17); + +/** + * Maximum length of the build-time data array. + * One entry per 0x110000 code points. + */ +constexpr int32_t MAX_DATA_LENGTH = UNICODE_LIMIT; + +// Flag values for index-3 blocks while compacting/building. +constexpr uint8_t I3_NULL = 0; +constexpr uint8_t I3_BMP = 1; +constexpr uint8_t I3_16 = 2; +constexpr uint8_t I3_18 = 3; + +constexpr int32_t INDEX_3_18BIT_BLOCK_LENGTH = UCPTRIE_INDEX_3_BLOCK_LENGTH + UCPTRIE_INDEX_3_BLOCK_LENGTH / 8; + +class AllSameBlocks; +class MixedBlocks; + +class MutableCodePointTrie : public UMemory { +public: + MutableCodePointTrie(uint32_t initialValue, uint32_t errorValue, UErrorCode &errorCode); + MutableCodePointTrie(const MutableCodePointTrie &other, UErrorCode &errorCode); + MutableCodePointTrie(const MutableCodePointTrie &other) = delete; + ~MutableCodePointTrie(); + + MutableCodePointTrie &operator=(const MutableCodePointTrie &other) = delete; + + static MutableCodePointTrie *fromUCPMap(const UCPMap *map, UErrorCode &errorCode); + static MutableCodePointTrie *fromUCPTrie(const UCPTrie *trie, UErrorCode &errorCode); + + uint32_t get(UChar32 c) const; + int32_t getRange(UChar32 start, UCPMapValueFilter *filter, const void *context, + uint32_t *pValue) const; + + void set(UChar32 c, uint32_t value, UErrorCode &errorCode); + void setRange(UChar32 start, UChar32 end, uint32_t value, UErrorCode &errorCode); + + UCPTrie *build(UCPTrieType type, UCPTrieValueWidth valueWidth, UErrorCode &errorCode); + +private: + void clear(); + + bool ensureHighStart(UChar32 c); + int32_t allocDataBlock(int32_t blockLength); + int32_t getDataBlock(int32_t i); + + void maskValues(uint32_t mask); + UChar32 findHighStart() const; + int32_t compactWholeDataBlocks(int32_t fastILimit, AllSameBlocks &allSameBlocks); + int32_t compactData( + int32_t fastILimit, uint32_t *newData, int32_t newDataCapacity, + int32_t dataNullIndex, MixedBlocks &mixedBlocks, UErrorCode &errorCode); + int32_t compactIndex(int32_t fastILimit, MixedBlocks &mixedBlocks, UErrorCode &errorCode); + int32_t compactTrie(int32_t fastILimit, UErrorCode &errorCode); + + uint32_t *index = nullptr; + int32_t indexCapacity = 0; + int32_t index3NullOffset = -1; + uint32_t *data = nullptr; + int32_t dataCapacity = 0; + int32_t dataLength = 0; + int32_t dataNullOffset = -1; + + uint32_t origInitialValue; + uint32_t initialValue; + uint32_t errorValue; + UChar32 highStart; + uint32_t highValue; +#ifdef UCPTRIE_DEBUG +public: + const char *name; +#endif +private: + /** Temporary array while building the final data. */ + uint16_t *index16 = nullptr; + uint8_t flags[UNICODE_LIMIT >> UCPTRIE_SHIFT_3]; +}; + +MutableCodePointTrie::MutableCodePointTrie(uint32_t iniValue, uint32_t errValue, UErrorCode &errorCode) : + origInitialValue(iniValue), initialValue(iniValue), errorValue(errValue), + highStart(0), highValue(initialValue) +#ifdef UCPTRIE_DEBUG + , name("open") +#endif + { + if (U_FAILURE(errorCode)) { return; } + index = (uint32_t *)uprv_malloc(BMP_I_LIMIT * 4); + data = (uint32_t *)uprv_malloc(INITIAL_DATA_LENGTH * 4); + if (index == nullptr || data == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return; + } + indexCapacity = BMP_I_LIMIT; + dataCapacity = INITIAL_DATA_LENGTH; +} + +MutableCodePointTrie::MutableCodePointTrie(const MutableCodePointTrie &other, UErrorCode &errorCode) : + index3NullOffset(other.index3NullOffset), + dataNullOffset(other.dataNullOffset), + origInitialValue(other.origInitialValue), initialValue(other.initialValue), + errorValue(other.errorValue), + highStart(other.highStart), highValue(other.highValue) +#ifdef UCPTRIE_DEBUG + , name("mutable clone") +#endif + { + if (U_FAILURE(errorCode)) { return; } + int32_t iCapacity = highStart <= BMP_LIMIT ? BMP_I_LIMIT : I_LIMIT; + index = (uint32_t *)uprv_malloc(iCapacity * 4); + data = (uint32_t *)uprv_malloc(other.dataCapacity * 4); + if (index == nullptr || data == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return; + } + indexCapacity = iCapacity; + dataCapacity = other.dataCapacity; + + int32_t iLimit = highStart >> UCPTRIE_SHIFT_3; + uprv_memcpy(flags, other.flags, iLimit); + uprv_memcpy(index, other.index, iLimit * 4); + uprv_memcpy(data, other.data, (size_t)other.dataLength * 4); + dataLength = other.dataLength; + U_ASSERT(other.index16 == nullptr); +} + +MutableCodePointTrie::~MutableCodePointTrie() { + uprv_free(index); + uprv_free(data); + uprv_free(index16); +} + +MutableCodePointTrie *MutableCodePointTrie::fromUCPMap(const UCPMap *map, UErrorCode &errorCode) { + // Use the highValue as the initialValue to reduce the highStart. + uint32_t errorValue = ucpmap_get(map, -1); + uint32_t initialValue = ucpmap_get(map, 0x10ffff); + LocalPointer<MutableCodePointTrie> mutableTrie( + new MutableCodePointTrie(initialValue, errorValue, errorCode), + errorCode); + if (U_FAILURE(errorCode)) { + return nullptr; + } + UChar32 start = 0, end; + uint32_t value; + while ((end = ucpmap_getRange(map, start, UCPMAP_RANGE_NORMAL, 0, + nullptr, nullptr, &value)) >= 0) { + if (value != initialValue) { + if (start == end) { + mutableTrie->set(start, value, errorCode); + } else { + mutableTrie->setRange(start, end, value, errorCode); + } + } + start = end + 1; + } + if (U_SUCCESS(errorCode)) { + return mutableTrie.orphan(); + } else { + return nullptr; + } +} + +MutableCodePointTrie *MutableCodePointTrie::fromUCPTrie(const UCPTrie *trie, UErrorCode &errorCode) { + // Use the highValue as the initialValue to reduce the highStart. + uint32_t errorValue; + uint32_t initialValue; + switch (trie->valueWidth) { + case UCPTRIE_VALUE_BITS_16: + errorValue = trie->data.ptr16[trie->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET]; + initialValue = trie->data.ptr16[trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET]; + break; + case UCPTRIE_VALUE_BITS_32: + errorValue = trie->data.ptr32[trie->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET]; + initialValue = trie->data.ptr32[trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET]; + break; + case UCPTRIE_VALUE_BITS_8: + errorValue = trie->data.ptr8[trie->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET]; + initialValue = trie->data.ptr8[trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET]; + break; + default: + // Unreachable if the trie is properly initialized. + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + return nullptr; + } + LocalPointer<MutableCodePointTrie> mutableTrie( + new MutableCodePointTrie(initialValue, errorValue, errorCode), + errorCode); + if (U_FAILURE(errorCode)) { + return nullptr; + } + UChar32 start = 0, end; + uint32_t value; + while ((end = ucptrie_getRange(trie, start, UCPMAP_RANGE_NORMAL, 0, + nullptr, nullptr, &value)) >= 0) { + if (value != initialValue) { + if (start == end) { + mutableTrie->set(start, value, errorCode); + } else { + mutableTrie->setRange(start, end, value, errorCode); + } + } + start = end + 1; + } + if (U_SUCCESS(errorCode)) { + return mutableTrie.orphan(); + } else { + return nullptr; + } +} + +void MutableCodePointTrie::clear() { + index3NullOffset = dataNullOffset = -1; + dataLength = 0; + highValue = initialValue = origInitialValue; + highStart = 0; + uprv_free(index16); + index16 = nullptr; +} + +uint32_t MutableCodePointTrie::get(UChar32 c) const { + if ((uint32_t)c > MAX_UNICODE) { + return errorValue; + } + if (c >= highStart) { + return highValue; + } + int32_t i = c >> UCPTRIE_SHIFT_3; + if (flags[i] == ALL_SAME) { + return index[i]; + } else { + return data[index[i] + (c & UCPTRIE_SMALL_DATA_MASK)]; + } +} + +inline uint32_t maybeFilterValue(uint32_t value, uint32_t initialValue, uint32_t nullValue, + UCPMapValueFilter *filter, const void *context) { + if (value == initialValue) { + value = nullValue; + } else if (filter != nullptr) { + value = filter(context, value); + } + return value; +} + +UChar32 MutableCodePointTrie::getRange( + UChar32 start, UCPMapValueFilter *filter, const void *context, + uint32_t *pValue) const { + if ((uint32_t)start > MAX_UNICODE) { + return U_SENTINEL; + } + if (start >= highStart) { + if (pValue != nullptr) { + uint32_t value = highValue; + if (filter != nullptr) { value = filter(context, value); } + *pValue = value; + } + return MAX_UNICODE; + } + uint32_t nullValue = initialValue; + if (filter != nullptr) { nullValue = filter(context, nullValue); } + UChar32 c = start; + uint32_t trieValue, value; + bool haveValue = false; + int32_t i = c >> UCPTRIE_SHIFT_3; + do { + if (flags[i] == ALL_SAME) { + uint32_t trieValue2 = index[i]; + if (haveValue) { + if (trieValue2 != trieValue) { + if (filter == nullptr || + maybeFilterValue(trieValue2, initialValue, nullValue, + filter, context) != value) { + return c - 1; + } + trieValue = trieValue2; // may or may not help + } + } else { + trieValue = trieValue2; + value = maybeFilterValue(trieValue2, initialValue, nullValue, filter, context); + if (pValue != nullptr) { *pValue = value; } + haveValue = true; + } + c = (c + UCPTRIE_SMALL_DATA_BLOCK_LENGTH) & ~UCPTRIE_SMALL_DATA_MASK; + } else /* MIXED */ { + int32_t di = index[i] + (c & UCPTRIE_SMALL_DATA_MASK); + uint32_t trieValue2 = data[di]; + if (haveValue) { + if (trieValue2 != trieValue) { + if (filter == nullptr || + maybeFilterValue(trieValue2, initialValue, nullValue, + filter, context) != value) { + return c - 1; + } + trieValue = trieValue2; // may or may not help + } + } else { + trieValue = trieValue2; + value = maybeFilterValue(trieValue2, initialValue, nullValue, filter, context); + if (pValue != nullptr) { *pValue = value; } + haveValue = true; + } + while ((++c & UCPTRIE_SMALL_DATA_MASK) != 0) { + trieValue2 = data[++di]; + if (trieValue2 != trieValue) { + if (filter == nullptr || + maybeFilterValue(trieValue2, initialValue, nullValue, + filter, context) != value) { + return c - 1; + } + } + trieValue = trieValue2; // may or may not help + } + } + ++i; + } while (c < highStart); + U_ASSERT(haveValue); + if (maybeFilterValue(highValue, initialValue, nullValue, + filter, context) != value) { + return c - 1; + } else { + return MAX_UNICODE; + } +} + +void +writeBlock(uint32_t *block, uint32_t value) { + uint32_t *limit = block + UCPTRIE_SMALL_DATA_BLOCK_LENGTH; + while (block < limit) { + *block++ = value; + } +} + +bool MutableCodePointTrie::ensureHighStart(UChar32 c) { + if (c >= highStart) { + // Round up to a UCPTRIE_CP_PER_INDEX_2_ENTRY boundary to simplify compaction. + c = (c + UCPTRIE_CP_PER_INDEX_2_ENTRY) & ~(UCPTRIE_CP_PER_INDEX_2_ENTRY - 1); + int32_t i = highStart >> UCPTRIE_SHIFT_3; + int32_t iLimit = c >> UCPTRIE_SHIFT_3; + if (iLimit > indexCapacity) { + uint32_t *newIndex = (uint32_t *)uprv_malloc(I_LIMIT * 4); + if (newIndex == nullptr) { return false; } + uprv_memcpy(newIndex, index, i * 4); + uprv_free(index); + index = newIndex; + indexCapacity = I_LIMIT; + } + do { + flags[i] = ALL_SAME; + index[i] = initialValue; + } while(++i < iLimit); + highStart = c; + } + return true; +} + +int32_t MutableCodePointTrie::allocDataBlock(int32_t blockLength) { + int32_t newBlock = dataLength; + int32_t newTop = newBlock + blockLength; + if (newTop > dataCapacity) { + int32_t capacity; + if (dataCapacity < MEDIUM_DATA_LENGTH) { + capacity = MEDIUM_DATA_LENGTH; + } else if (dataCapacity < MAX_DATA_LENGTH) { + capacity = MAX_DATA_LENGTH; + } else { + // Should never occur. + // Either MAX_DATA_LENGTH is incorrect, + // or the code writes more values than should be possible. + return -1; + } + uint32_t *newData = (uint32_t *)uprv_malloc(capacity * 4); + if (newData == nullptr) { + return -1; + } + uprv_memcpy(newData, data, (size_t)dataLength * 4); + uprv_free(data); + data = newData; + dataCapacity = capacity; + } + dataLength = newTop; + return newBlock; +} + +/** + * No error checking for illegal arguments. + * + * @return -1 if no new data block available (out of memory in data array) + * @internal + */ +int32_t MutableCodePointTrie::getDataBlock(int32_t i) { + if (flags[i] == MIXED) { + return index[i]; + } + if (i < BMP_I_LIMIT) { + int32_t newBlock = allocDataBlock(UCPTRIE_FAST_DATA_BLOCK_LENGTH); + if (newBlock < 0) { return newBlock; } + int32_t iStart = i & ~(SMALL_DATA_BLOCKS_PER_BMP_BLOCK -1); + int32_t iLimit = iStart + SMALL_DATA_BLOCKS_PER_BMP_BLOCK; + do { + U_ASSERT(flags[iStart] == ALL_SAME); + writeBlock(data + newBlock, index[iStart]); + flags[iStart] = MIXED; + index[iStart++] = newBlock; + newBlock += UCPTRIE_SMALL_DATA_BLOCK_LENGTH; + } while (iStart < iLimit); + return index[i]; + } else { + int32_t newBlock = allocDataBlock(UCPTRIE_SMALL_DATA_BLOCK_LENGTH); + if (newBlock < 0) { return newBlock; } + writeBlock(data + newBlock, index[i]); + flags[i] = MIXED; + index[i] = newBlock; + return newBlock; + } +} + +void MutableCodePointTrie::set(UChar32 c, uint32_t value, UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { + return; + } + if ((uint32_t)c > MAX_UNICODE) { + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + int32_t block; + if (!ensureHighStart(c) || (block = getDataBlock(c >> UCPTRIE_SHIFT_3)) < 0) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return; + } + + data[block + (c & UCPTRIE_SMALL_DATA_MASK)] = value; +} + +void +fillBlock(uint32_t *block, UChar32 start, UChar32 limit, uint32_t value) { + uint32_t *pLimit = block + limit; + block += start; + while (block < pLimit) { + *block++ = value; + } +} + +void MutableCodePointTrie::setRange(UChar32 start, UChar32 end, uint32_t value, UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { + return; + } + if ((uint32_t)start > MAX_UNICODE || (uint32_t)end > MAX_UNICODE || start > end) { + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + if (!ensureHighStart(end)) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return; + } + + UChar32 limit = end + 1; + if (start & UCPTRIE_SMALL_DATA_MASK) { + // Set partial block at [start..following block boundary[. + int32_t block = getDataBlock(start >> UCPTRIE_SHIFT_3); + if (block < 0) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return; + } + + UChar32 nextStart = (start + UCPTRIE_SMALL_DATA_MASK) & ~UCPTRIE_SMALL_DATA_MASK; + if (nextStart <= limit) { + fillBlock(data + block, start & UCPTRIE_SMALL_DATA_MASK, UCPTRIE_SMALL_DATA_BLOCK_LENGTH, + value); + start = nextStart; + } else { + fillBlock(data + block, start & UCPTRIE_SMALL_DATA_MASK, limit & UCPTRIE_SMALL_DATA_MASK, + value); + return; + } + } + + // Number of positions in the last, partial block. + int32_t rest = limit & UCPTRIE_SMALL_DATA_MASK; + + // Round down limit to a block boundary. + limit &= ~UCPTRIE_SMALL_DATA_MASK; + + // Iterate over all-value blocks. + while (start < limit) { + int32_t i = start >> UCPTRIE_SHIFT_3; + if (flags[i] == ALL_SAME) { + index[i] = value; + } else /* MIXED */ { + fillBlock(data + index[i], 0, UCPTRIE_SMALL_DATA_BLOCK_LENGTH, value); + } + start += UCPTRIE_SMALL_DATA_BLOCK_LENGTH; + } + + if (rest > 0) { + // Set partial block at [last block boundary..limit[. + int32_t block = getDataBlock(start >> UCPTRIE_SHIFT_3); + if (block < 0) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return; + } + + fillBlock(data + block, 0, rest, value); + } +} + +/* compaction --------------------------------------------------------------- */ + +void MutableCodePointTrie::maskValues(uint32_t mask) { + initialValue &= mask; + errorValue &= mask; + highValue &= mask; + int32_t iLimit = highStart >> UCPTRIE_SHIFT_3; + for (int32_t i = 0; i < iLimit; ++i) { + if (flags[i] == ALL_SAME) { + index[i] &= mask; + } + } + for (int32_t i = 0; i < dataLength; ++i) { + data[i] &= mask; + } +} + +template<typename UIntA, typename UIntB> +bool equalBlocks(const UIntA *s, const UIntB *t, int32_t length) { + while (length > 0 && *s == *t) { + ++s; + ++t; + --length; + } + return length == 0; +} + +bool allValuesSameAs(const uint32_t *p, int32_t length, uint32_t value) { + const uint32_t *pLimit = p + length; + while (p < pLimit && *p == value) { ++p; } + return p == pLimit; +} + +/** Search for an identical block. */ +int32_t findSameBlock(const uint16_t *p, int32_t pStart, int32_t length, + const uint16_t *q, int32_t qStart, int32_t blockLength) { + // Ensure that we do not even partially get past length. + length -= blockLength; + + q += qStart; + while (pStart <= length) { + if (equalBlocks(p + pStart, q, blockLength)) { + return pStart; + } + ++pStart; + } + return -1; +} + +int32_t findAllSameBlock(const uint32_t *p, int32_t start, int32_t limit, + uint32_t value, int32_t blockLength) { + // Ensure that we do not even partially get past limit. + limit -= blockLength; + + for (int32_t block = start; block <= limit; ++block) { + if (p[block] == value) { + for (int32_t i = 1;; ++i) { + if (i == blockLength) { + return block; + } + if (p[block + i] != value) { + block += i; + break; + } + } + } + } + return -1; +} + +/** + * Look for maximum overlap of the beginning of the other block + * with the previous, adjacent block. + */ +template<typename UIntA, typename UIntB> +int32_t getOverlap(const UIntA *p, int32_t length, + const UIntB *q, int32_t qStart, int32_t blockLength) { + int32_t overlap = blockLength - 1; + U_ASSERT(overlap <= length); + q += qStart; + while (overlap > 0 && !equalBlocks(p + (length - overlap), q, overlap)) { + --overlap; + } + return overlap; +} + +int32_t getAllSameOverlap(const uint32_t *p, int32_t length, uint32_t value, + int32_t blockLength) { + int32_t min = length - (blockLength - 1); + int32_t i = length; + while (min < i && p[i - 1] == value) { --i; } + return length - i; +} + +bool isStartOfSomeFastBlock(uint32_t dataOffset, const uint32_t index[], int32_t fastILimit) { + for (int32_t i = 0; i < fastILimit; i += SMALL_DATA_BLOCKS_PER_BMP_BLOCK) { + if (index[i] == dataOffset) { + return true; + } + } + return false; +} + +/** + * Finds the start of the last range in the trie by enumerating backward. + * Indexes for code points higher than this will be omitted. + */ +UChar32 MutableCodePointTrie::findHighStart() const { + int32_t i = highStart >> UCPTRIE_SHIFT_3; + while (i > 0) { + bool match; + if (flags[--i] == ALL_SAME) { + match = index[i] == highValue; + } else /* MIXED */ { + const uint32_t *p = data + index[i]; + for (int32_t j = 0;; ++j) { + if (j == UCPTRIE_SMALL_DATA_BLOCK_LENGTH) { + match = true; + break; + } + if (p[j] != highValue) { + match = false; + break; + } + } + } + if (!match) { + return (i + 1) << UCPTRIE_SHIFT_3; + } + } + return 0; +} + +class AllSameBlocks { +public: + static constexpr int32_t NEW_UNIQUE = -1; + static constexpr int32_t OVERFLOW = -2; + + AllSameBlocks() : length(0), mostRecent(-1) {} + + int32_t findOrAdd(int32_t index, int32_t count, uint32_t value) { + if (mostRecent >= 0 && values[mostRecent] == value) { + refCounts[mostRecent] += count; + return indexes[mostRecent]; + } + for (int32_t i = 0; i < length; ++i) { + if (values[i] == value) { + mostRecent = i; + refCounts[i] += count; + return indexes[i]; + } + } + if (length == CAPACITY) { + return OVERFLOW; + } + mostRecent = length; + indexes[length] = index; + values[length] = value; + refCounts[length++] = count; + return NEW_UNIQUE; + } + + /** Replaces the block which has the lowest reference count. */ + void add(int32_t index, int32_t count, uint32_t value) { + U_ASSERT(length == CAPACITY); + int32_t least = -1; + int32_t leastCount = I_LIMIT; + for (int32_t i = 0; i < length; ++i) { + U_ASSERT(values[i] != value); + if (refCounts[i] < leastCount) { + least = i; + leastCount = refCounts[i]; + } + } + U_ASSERT(least >= 0); + mostRecent = least; + indexes[least] = index; + values[least] = value; + refCounts[least] = count; + } + + int32_t findMostUsed() const { + if (length == 0) { return -1; } + int32_t max = -1; + int32_t maxCount = 0; + for (int32_t i = 0; i < length; ++i) { + if (refCounts[i] > maxCount) { + max = i; + maxCount = refCounts[i]; + } + } + return indexes[max]; + } + +private: + static constexpr int32_t CAPACITY = 32; + + int32_t length; + int32_t mostRecent; + + int32_t indexes[CAPACITY]; + uint32_t values[CAPACITY]; + int32_t refCounts[CAPACITY]; +}; + +// Custom hash table for mixed-value blocks to be found anywhere in the +// compacted data or index so far. +class MixedBlocks { +public: + MixedBlocks() {} + ~MixedBlocks() { + uprv_free(table); + } + + bool init(int32_t maxLength, int32_t newBlockLength) { + // We store actual data indexes + 1 to reserve 0 for empty entries. + int32_t maxDataIndex = maxLength - newBlockLength + 1; + int32_t newLength; + if (maxDataIndex <= 0xfff) { // 4k + newLength = 6007; + shift = 12; + mask = 0xfff; + } else if (maxDataIndex <= 0x7fff) { // 32k + newLength = 50021; + shift = 15; + mask = 0x7fff; + } else if (maxDataIndex <= 0x1ffff) { // 128k + newLength = 200003; + shift = 17; + mask = 0x1ffff; + } else { + // maxDataIndex up to around MAX_DATA_LENGTH, ca. 1.1M + newLength = 1500007; + shift = 21; + mask = 0x1fffff; + } + if (newLength > capacity) { + uprv_free(table); + table = (uint32_t *)uprv_malloc(newLength * 4); + if (table == nullptr) { + return false; + } + capacity = newLength; + } + length = newLength; + uprv_memset(table, 0, length * 4); + + blockLength = newBlockLength; + return true; + } + + template<typename UInt> + void extend(const UInt *data, int32_t minStart, int32_t prevDataLength, int32_t newDataLength) { + int32_t start = prevDataLength - blockLength; + if (start >= minStart) { + ++start; // Skip the last block that we added last time. + } else { + start = minStart; // Begin with the first full block. + } + for (int32_t end = newDataLength - blockLength; start <= end; ++start) { + uint32_t hashCode = makeHashCode(data, start); + addEntry(data, start, hashCode, start); + } + } + + template<typename UIntA, typename UIntB> + int32_t findBlock(const UIntA *data, const UIntB *blockData, int32_t blockStart) const { + uint32_t hashCode = makeHashCode(blockData, blockStart); + int32_t entryIndex = findEntry(data, blockData, blockStart, hashCode); + if (entryIndex >= 0) { + return (table[entryIndex] & mask) - 1; + } else { + return -1; + } + } + + int32_t findAllSameBlock(const uint32_t *data, uint32_t blockValue) const { + uint32_t hashCode = makeHashCode(blockValue); + int32_t entryIndex = findEntry(data, blockValue, hashCode); + if (entryIndex >= 0) { + return (table[entryIndex] & mask) - 1; + } else { + return -1; + } + } + +private: + template<typename UInt> + uint32_t makeHashCode(const UInt *blockData, int32_t blockStart) const { + int32_t blockLimit = blockStart + blockLength; + uint32_t hashCode = blockData[blockStart++]; + do { + hashCode = 37 * hashCode + blockData[blockStart++]; + } while (blockStart < blockLimit); + return hashCode; + } + + uint32_t makeHashCode(uint32_t blockValue) const { + uint32_t hashCode = blockValue; + for (int32_t i = 1; i < blockLength; ++i) { + hashCode = 37 * hashCode + blockValue; + } + return hashCode; + } + + template<typename UInt> + void addEntry(const UInt *data, int32_t blockStart, uint32_t hashCode, int32_t dataIndex) { + U_ASSERT(0 <= dataIndex && dataIndex < (int32_t)mask); + int32_t entryIndex = findEntry(data, data, blockStart, hashCode); + if (entryIndex < 0) { + table[~entryIndex] = (hashCode << shift) | (dataIndex + 1); + } + } + + template<typename UIntA, typename UIntB> + int32_t findEntry(const UIntA *data, const UIntB *blockData, int32_t blockStart, + uint32_t hashCode) const { + uint32_t shiftedHashCode = hashCode << shift; + int32_t initialEntryIndex = (hashCode % (length - 1)) + 1; // 1..length-1 + for (int32_t entryIndex = initialEntryIndex;;) { + uint32_t entry = table[entryIndex]; + if (entry == 0) { + return ~entryIndex; + } + if ((entry & ~mask) == shiftedHashCode) { + int32_t dataIndex = (entry & mask) - 1; + if (equalBlocks(data + dataIndex, blockData + blockStart, blockLength)) { + return entryIndex; + } + } + entryIndex = nextIndex(initialEntryIndex, entryIndex); + } + } + + int32_t findEntry(const uint32_t *data, uint32_t blockValue, uint32_t hashCode) const { + uint32_t shiftedHashCode = hashCode << shift; + int32_t initialEntryIndex = (hashCode % (length - 1)) + 1; // 1..length-1 + for (int32_t entryIndex = initialEntryIndex;;) { + uint32_t entry = table[entryIndex]; + if (entry == 0) { + return ~entryIndex; + } + if ((entry & ~mask) == shiftedHashCode) { + int32_t dataIndex = (entry & mask) - 1; + if (allValuesSameAs(data + dataIndex, blockLength, blockValue)) { + return entryIndex; + } + } + entryIndex = nextIndex(initialEntryIndex, entryIndex); + } + } + + inline int32_t nextIndex(int32_t initialEntryIndex, int32_t entryIndex) const { + // U_ASSERT(0 < initialEntryIndex && initialEntryIndex < length); + return (entryIndex + initialEntryIndex) % length; + } + + // Hash table. + // The length is a prime number, larger than the maximum data length. + // The "shift" lower bits store a data index + 1. + // The remaining upper bits store a partial hashCode of the block data values. + uint32_t *table = nullptr; + int32_t capacity = 0; + int32_t length = 0; + int32_t shift = 0; + uint32_t mask = 0; + + int32_t blockLength = 0; +}; + +int32_t MutableCodePointTrie::compactWholeDataBlocks(int32_t fastILimit, AllSameBlocks &allSameBlocks) { +#ifdef UCPTRIE_DEBUG + bool overflow = false; +#endif + + // ASCII data will be stored as a linear table, even if the following code + // does not yet count it that way. + int32_t newDataCapacity = ASCII_LIMIT; + // Add room for a small data null block in case it would match the start of + // a fast data block where dataNullOffset must not be set in that case. + newDataCapacity += UCPTRIE_SMALL_DATA_BLOCK_LENGTH; + // Add room for special values (errorValue, highValue) and padding. + newDataCapacity += 4; + int32_t iLimit = highStart >> UCPTRIE_SHIFT_3; + int32_t blockLength = UCPTRIE_FAST_DATA_BLOCK_LENGTH; + int32_t inc = SMALL_DATA_BLOCKS_PER_BMP_BLOCK; + for (int32_t i = 0; i < iLimit; i += inc) { + if (i == fastILimit) { + blockLength = UCPTRIE_SMALL_DATA_BLOCK_LENGTH; + inc = 1; + } + uint32_t value = index[i]; + if (flags[i] == MIXED) { + // Really mixed? + const uint32_t *p = data + value; + value = *p; + if (allValuesSameAs(p + 1, blockLength - 1, value)) { + flags[i] = ALL_SAME; + index[i] = value; + // Fall through to ALL_SAME handling. + } else { + newDataCapacity += blockLength; + continue; + } + } else { + U_ASSERT(flags[i] == ALL_SAME); + if (inc > 1) { + // Do all of the fast-range data block's ALL_SAME parts have the same value? + bool allSame = true; + int32_t next_i = i + inc; + for (int32_t j = i + 1; j < next_i; ++j) { + U_ASSERT(flags[j] == ALL_SAME); + if (index[j] != value) { + allSame = false; + break; + } + } + if (!allSame) { + // Turn it into a MIXED block. + if (getDataBlock(i) < 0) { + return -1; + } + newDataCapacity += blockLength; + continue; + } + } + } + // Is there another ALL_SAME block with the same value? + int32_t other = allSameBlocks.findOrAdd(i, inc, value); + if (other == AllSameBlocks::OVERFLOW) { + // The fixed-size array overflowed. Slow check for a duplicate block. +#ifdef UCPTRIE_DEBUG + if (!overflow) { + puts("UCPTrie AllSameBlocks overflow"); + overflow = true; + } +#endif + int32_t jInc = SMALL_DATA_BLOCKS_PER_BMP_BLOCK; + for (int32_t j = 0;; j += jInc) { + if (j == i) { + allSameBlocks.add(i, inc, value); + break; + } + if (j == fastILimit) { + jInc = 1; + } + if (flags[j] == ALL_SAME && index[j] == value) { + allSameBlocks.add(j, jInc + inc, value); + other = j; + break; + // We could keep counting blocks with the same value + // before we add the first one, which may improve compaction in rare cases, + // but it would make it slower. + } + } + } + if (other >= 0) { + flags[i] = SAME_AS; + index[i] = other; + } else { + // New unique same-value block. + newDataCapacity += blockLength; + } + } + return newDataCapacity; +} + +#ifdef UCPTRIE_DEBUG +# define DEBUG_DO(expr) expr +#else +# define DEBUG_DO(expr) +#endif + +#ifdef UCPTRIE_DEBUG +// Braille symbols: U+28xx = UTF-8 E2 A0 80..E2 A3 BF +int32_t appendValue(char s[], int32_t length, uint32_t value) { + value ^= value >> 16; + value ^= value >> 8; + s[length] = 0xE2; + s[length + 1] = (char)(0xA0 + ((value >> 6) & 3)); + s[length + 2] = (char)(0x80 + (value & 0x3F)); + return length + 3; +} + +void printBlock(const uint32_t *block, int32_t blockLength, uint32_t value, + UChar32 start, int32_t overlap, uint32_t initialValue) { + char s[UCPTRIE_FAST_DATA_BLOCK_LENGTH * 3 + 3]; + int32_t length = 0; + int32_t i; + for (i = 0; i < overlap; ++i) { + length = appendValue(s, length, 0); // Braille blank + } + s[length++] = '|'; + for (; i < blockLength; ++i) { + if (block != nullptr) { + value = block[i]; + } + if (value == initialValue) { + value = 0x40; // Braille lower left dot + } + length = appendValue(s, length, value); + } + s[length] = 0; + start += overlap; + if (start <= 0xffff) { + printf(" %04lX %s|\n", (long)start, s); + } else if (start <= 0xfffff) { + printf(" %5lX %s|\n", (long)start, s); + } else { + printf(" %6lX %s|\n", (long)start, s); + } +} +#endif + +/** + * Compacts a build-time trie. + * + * The compaction + * - removes blocks that are identical with earlier ones + * - overlaps each new non-duplicate block as much as possible with the previously-written one + * - works with fast-range data blocks whose length is a multiple of that of + * higher-code-point data blocks + * + * It does not try to find an optimal order of writing, deduplicating, and overlapping blocks. + */ +int32_t MutableCodePointTrie::compactData( + int32_t fastILimit, uint32_t *newData, int32_t newDataCapacity, + int32_t dataNullIndex, MixedBlocks &mixedBlocks, UErrorCode &errorCode) { +#ifdef UCPTRIE_DEBUG + int32_t countSame=0, sumOverlaps=0; + bool printData = dataLength == 29088 /* line.brk */ || + // dataLength == 30048 /* CanonIterData */ || + dataLength == 50400 /* zh.txt~stroke */; +#endif + + // The linear ASCII data has been copied into newData already. + int32_t newDataLength = 0; + for (int32_t i = 0; newDataLength < ASCII_LIMIT; + newDataLength += UCPTRIE_FAST_DATA_BLOCK_LENGTH, i += SMALL_DATA_BLOCKS_PER_BMP_BLOCK) { + index[i] = newDataLength; +#ifdef UCPTRIE_DEBUG + if (printData) { + printBlock(newData + newDataLength, UCPTRIE_FAST_DATA_BLOCK_LENGTH, 0, newDataLength, 0, initialValue); + } +#endif + } + + int32_t blockLength = UCPTRIE_FAST_DATA_BLOCK_LENGTH; + if (!mixedBlocks.init(newDataCapacity, blockLength)) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return 0; + } + mixedBlocks.extend(newData, 0, 0, newDataLength); + + int32_t iLimit = highStart >> UCPTRIE_SHIFT_3; + int32_t inc = SMALL_DATA_BLOCKS_PER_BMP_BLOCK; + int32_t fastLength = 0; + for (int32_t i = ASCII_I_LIMIT; i < iLimit; i += inc) { + if (i == fastILimit) { + blockLength = UCPTRIE_SMALL_DATA_BLOCK_LENGTH; + inc = 1; + fastLength = newDataLength; + if (!mixedBlocks.init(newDataCapacity, blockLength)) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return 0; + } + mixedBlocks.extend(newData, 0, 0, newDataLength); + } + if (flags[i] == ALL_SAME) { + uint32_t value = index[i]; + // Find an earlier part of the data array of length blockLength + // that is filled with this value. + int32_t n = mixedBlocks.findAllSameBlock(newData, value); + // If we find a match, and the current block is the data null block, + // and it is not a fast block but matches the start of a fast block, + // then we need to continue looking. + // This is because this small block is shorter than the fast block, + // and not all of the rest of the fast block is filled with this value. + // Otherwise trie.getRange() would detect that the fast block starts at + // dataNullOffset and assume incorrectly that it is filled with the null value. + while (n >= 0 && i == dataNullIndex && i >= fastILimit && n < fastLength && + isStartOfSomeFastBlock(n, index, fastILimit)) { + n = findAllSameBlock(newData, n + 1, newDataLength, value, blockLength); + } + if (n >= 0) { + DEBUG_DO(++countSame); + index[i] = n; + } else { + n = getAllSameOverlap(newData, newDataLength, value, blockLength); + DEBUG_DO(sumOverlaps += n); +#ifdef UCPTRIE_DEBUG + if (printData) { + printBlock(nullptr, blockLength, value, i << UCPTRIE_SHIFT_3, n, initialValue); + } +#endif + index[i] = newDataLength - n; + int32_t prevDataLength = newDataLength; + while (n < blockLength) { + newData[newDataLength++] = value; + ++n; + } + mixedBlocks.extend(newData, 0, prevDataLength, newDataLength); + } + } else if (flags[i] == MIXED) { + const uint32_t *block = data + index[i]; + int32_t n = mixedBlocks.findBlock(newData, block, 0); + if (n >= 0) { + DEBUG_DO(++countSame); + index[i] = n; + } else { + n = getOverlap(newData, newDataLength, block, 0, blockLength); + DEBUG_DO(sumOverlaps += n); +#ifdef UCPTRIE_DEBUG + if (printData) { + printBlock(block, blockLength, 0, i << UCPTRIE_SHIFT_3, n, initialValue); + } +#endif + index[i] = newDataLength - n; + int32_t prevDataLength = newDataLength; + while (n < blockLength) { + newData[newDataLength++] = block[n++]; + } + mixedBlocks.extend(newData, 0, prevDataLength, newDataLength); + } + } else /* SAME_AS */ { + uint32_t j = index[i]; + index[i] = index[j]; + } + } + +#ifdef UCPTRIE_DEBUG + /* we saved some space */ + printf("compacting UCPTrie: count of 32-bit data words %lu->%lu countSame=%ld sumOverlaps=%ld\n", + (long)dataLength, (long)newDataLength, (long)countSame, (long)sumOverlaps); +#endif + return newDataLength; +} + +int32_t MutableCodePointTrie::compactIndex(int32_t fastILimit, MixedBlocks &mixedBlocks, + UErrorCode &errorCode) { + int32_t fastIndexLength = fastILimit >> (UCPTRIE_FAST_SHIFT - UCPTRIE_SHIFT_3); + if ((highStart >> UCPTRIE_FAST_SHIFT) <= fastIndexLength) { + // Only the linear fast index, no multi-stage index tables. + index3NullOffset = UCPTRIE_NO_INDEX3_NULL_OFFSET; + return fastIndexLength; + } + + // Condense the fast index table. + // Also, does it contain an index-3 block with all dataNullOffset? + uint16_t fastIndex[UCPTRIE_BMP_INDEX_LENGTH]; // fastIndexLength + int32_t i3FirstNull = -1; + for (int32_t i = 0, j = 0; i < fastILimit; ++j) { + uint32_t i3 = index[i]; + fastIndex[j] = (uint16_t)i3; + if (i3 == (uint32_t)dataNullOffset) { + if (i3FirstNull < 0) { + i3FirstNull = j; + } else if (index3NullOffset < 0 && + (j - i3FirstNull + 1) == UCPTRIE_INDEX_3_BLOCK_LENGTH) { + index3NullOffset = i3FirstNull; + } + } else { + i3FirstNull = -1; + } + // Set the index entries that compactData() skipped. + // Needed when the multi-stage index covers the fast index range as well. + int32_t iNext = i + SMALL_DATA_BLOCKS_PER_BMP_BLOCK; + while (++i < iNext) { + i3 += UCPTRIE_SMALL_DATA_BLOCK_LENGTH; + index[i] = i3; + } + } + + if (!mixedBlocks.init(fastIndexLength, UCPTRIE_INDEX_3_BLOCK_LENGTH)) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return 0; + } + mixedBlocks.extend(fastIndex, 0, 0, fastIndexLength); + + // Examine index-3 blocks. For each determine one of: + // - same as the index-3 null block + // - same as a fast-index block + // - 16-bit indexes + // - 18-bit indexes + // We store this in the first flags entry for the index-3 block. + // + // Also determine an upper limit for the index-3 table length. + int32_t index3Capacity = 0; + i3FirstNull = index3NullOffset; + bool hasLongI3Blocks = false; + // If the fast index covers the whole BMP, then + // the multi-stage index is only for supplementary code points. + // Otherwise, the multi-stage index covers all of Unicode. + int32_t iStart = fastILimit < BMP_I_LIMIT ? 0 : BMP_I_LIMIT; + int32_t iLimit = highStart >> UCPTRIE_SHIFT_3; + for (int32_t i = iStart; i < iLimit;) { + int32_t j = i; + int32_t jLimit = i + UCPTRIE_INDEX_3_BLOCK_LENGTH; + uint32_t oredI3 = 0; + bool isNull = true; + do { + uint32_t i3 = index[j]; + oredI3 |= i3; + if (i3 != (uint32_t)dataNullOffset) { + isNull = false; + } + } while (++j < jLimit); + if (isNull) { + flags[i] = I3_NULL; + if (i3FirstNull < 0) { + if (oredI3 <= 0xffff) { + index3Capacity += UCPTRIE_INDEX_3_BLOCK_LENGTH; + } else { + index3Capacity += INDEX_3_18BIT_BLOCK_LENGTH; + hasLongI3Blocks = true; + } + i3FirstNull = 0; + } + } else { + if (oredI3 <= 0xffff) { + int32_t n = mixedBlocks.findBlock(fastIndex, index, i); + if (n >= 0) { + flags[i] = I3_BMP; + index[i] = n; + } else { + flags[i] = I3_16; + index3Capacity += UCPTRIE_INDEX_3_BLOCK_LENGTH; + } + } else { + flags[i] = I3_18; + index3Capacity += INDEX_3_18BIT_BLOCK_LENGTH; + hasLongI3Blocks = true; + } + } + i = j; + } + + int32_t index2Capacity = (iLimit - iStart) >> UCPTRIE_SHIFT_2_3; + + // Length of the index-1 table, rounded up. + int32_t index1Length = (index2Capacity + UCPTRIE_INDEX_2_MASK) >> UCPTRIE_SHIFT_1_2; + + // Index table: Fast index, index-1, index-3, index-2. + // +1 for possible index table padding. + int32_t index16Capacity = fastIndexLength + index1Length + index3Capacity + index2Capacity + 1; + index16 = (uint16_t *)uprv_malloc(index16Capacity * 2); + if (index16 == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return 0; + } + uprv_memcpy(index16, fastIndex, fastIndexLength * 2); + + if (!mixedBlocks.init(index16Capacity, UCPTRIE_INDEX_3_BLOCK_LENGTH)) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return 0; + } + MixedBlocks longI3Blocks; + if (hasLongI3Blocks) { + if (!longI3Blocks.init(index16Capacity, INDEX_3_18BIT_BLOCK_LENGTH)) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return 0; + } + } + + // Compact the index-3 table and write an uncompacted version of the index-2 table. + uint16_t index2[UNICODE_LIMIT >> UCPTRIE_SHIFT_2]; // index2Capacity + int32_t i2Length = 0; + i3FirstNull = index3NullOffset; + int32_t index3Start = fastIndexLength + index1Length; + int32_t indexLength = index3Start; + for (int32_t i = iStart; i < iLimit; i += UCPTRIE_INDEX_3_BLOCK_LENGTH) { + int32_t i3; + uint8_t f = flags[i]; + if (f == I3_NULL && i3FirstNull < 0) { + // First index-3 null block. Write & overlap it like a normal block, then remember it. + f = dataNullOffset <= 0xffff ? I3_16 : I3_18; + i3FirstNull = 0; + } + if (f == I3_NULL) { + i3 = index3NullOffset; + } else if (f == I3_BMP) { + i3 = index[i]; + } else if (f == I3_16) { + int32_t n = mixedBlocks.findBlock(index16, index, i); + if (n >= 0) { + i3 = n; + } else { + if (indexLength == index3Start) { + // No overlap at the boundary between the index-1 and index-3 tables. + n = 0; + } else { + n = getOverlap(index16, indexLength, + index, i, UCPTRIE_INDEX_3_BLOCK_LENGTH); + } + i3 = indexLength - n; + int32_t prevIndexLength = indexLength; + while (n < UCPTRIE_INDEX_3_BLOCK_LENGTH) { + index16[indexLength++] = index[i + n++]; + } + mixedBlocks.extend(index16, index3Start, prevIndexLength, indexLength); + if (hasLongI3Blocks) { + longI3Blocks.extend(index16, index3Start, prevIndexLength, indexLength); + } + } + } else { + U_ASSERT(f == I3_18); + U_ASSERT(hasLongI3Blocks); + // Encode an index-3 block that contains one or more data indexes exceeding 16 bits. + int32_t j = i; + int32_t jLimit = i + UCPTRIE_INDEX_3_BLOCK_LENGTH; + int32_t k = indexLength; + do { + ++k; + uint32_t v = index[j++]; + uint32_t upperBits = (v & 0x30000) >> 2; + index16[k++] = v; + v = index[j++]; + upperBits |= (v & 0x30000) >> 4; + index16[k++] = v; + v = index[j++]; + upperBits |= (v & 0x30000) >> 6; + index16[k++] = v; + v = index[j++]; + upperBits |= (v & 0x30000) >> 8; + index16[k++] = v; + v = index[j++]; + upperBits |= (v & 0x30000) >> 10; + index16[k++] = v; + v = index[j++]; + upperBits |= (v & 0x30000) >> 12; + index16[k++] = v; + v = index[j++]; + upperBits |= (v & 0x30000) >> 14; + index16[k++] = v; + v = index[j++]; + upperBits |= (v & 0x30000) >> 16; + index16[k++] = v; + index16[k - 9] = upperBits; + } while (j < jLimit); + int32_t n = longI3Blocks.findBlock(index16, index16, indexLength); + if (n >= 0) { + i3 = n | 0x8000; + } else { + if (indexLength == index3Start) { + // No overlap at the boundary between the index-1 and index-3 tables. + n = 0; + } else { + n = getOverlap(index16, indexLength, + index16, indexLength, INDEX_3_18BIT_BLOCK_LENGTH); + } + i3 = (indexLength - n) | 0x8000; + int32_t prevIndexLength = indexLength; + if (n > 0) { + int32_t start = indexLength; + while (n < INDEX_3_18BIT_BLOCK_LENGTH) { + index16[indexLength++] = index16[start + n++]; + } + } else { + indexLength += INDEX_3_18BIT_BLOCK_LENGTH; + } + mixedBlocks.extend(index16, index3Start, prevIndexLength, indexLength); + if (hasLongI3Blocks) { + longI3Blocks.extend(index16, index3Start, prevIndexLength, indexLength); + } + } + } + if (index3NullOffset < 0 && i3FirstNull >= 0) { + index3NullOffset = i3; + } + // Set the index-2 table entry. + index2[i2Length++] = i3; + } + U_ASSERT(i2Length == index2Capacity); + U_ASSERT(indexLength <= index3Start + index3Capacity); + + if (index3NullOffset < 0) { + index3NullOffset = UCPTRIE_NO_INDEX3_NULL_OFFSET; + } + if (indexLength >= (UCPTRIE_NO_INDEX3_NULL_OFFSET + UCPTRIE_INDEX_3_BLOCK_LENGTH)) { + // The index-3 offsets exceed 15 bits, or + // the last one cannot be distinguished from the no-null-block value. + errorCode = U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + + // Compact the index-2 table and write the index-1 table. + static_assert(UCPTRIE_INDEX_2_BLOCK_LENGTH == UCPTRIE_INDEX_3_BLOCK_LENGTH, + "must re-init mixedBlocks"); + int32_t blockLength = UCPTRIE_INDEX_2_BLOCK_LENGTH; + int32_t i1 = fastIndexLength; + for (int32_t i = 0; i < i2Length; i += blockLength) { + int32_t n; + if ((i2Length - i) >= blockLength) { + // normal block + U_ASSERT(blockLength == UCPTRIE_INDEX_2_BLOCK_LENGTH); + n = mixedBlocks.findBlock(index16, index2, i); + } else { + // highStart is inside the last index-2 block. Shorten it. + blockLength = i2Length - i; + n = findSameBlock(index16, index3Start, indexLength, + index2, i, blockLength); + } + int32_t i2; + if (n >= 0) { + i2 = n; + } else { + if (indexLength == index3Start) { + // No overlap at the boundary between the index-1 and index-3/2 tables. + n = 0; + } else { + n = getOverlap(index16, indexLength, index2, i, blockLength); + } + i2 = indexLength - n; + int32_t prevIndexLength = indexLength; + while (n < blockLength) { + index16[indexLength++] = index2[i + n++]; + } + mixedBlocks.extend(index16, index3Start, prevIndexLength, indexLength); + } + // Set the index-1 table entry. + index16[i1++] = i2; + } + U_ASSERT(i1 == index3Start); + U_ASSERT(indexLength <= index16Capacity); + +#ifdef UCPTRIE_DEBUG + /* we saved some space */ + printf("compacting UCPTrie: count of 16-bit index words %lu->%lu\n", + (long)iLimit, (long)indexLength); +#endif + + return indexLength; +} + +int32_t MutableCodePointTrie::compactTrie(int32_t fastILimit, UErrorCode &errorCode) { + // Find the real highStart and round it up. + U_ASSERT((highStart & (UCPTRIE_CP_PER_INDEX_2_ENTRY - 1)) == 0); + highValue = get(MAX_UNICODE); + int32_t realHighStart = findHighStart(); + realHighStart = (realHighStart + (UCPTRIE_CP_PER_INDEX_2_ENTRY - 1)) & + ~(UCPTRIE_CP_PER_INDEX_2_ENTRY - 1); + if (realHighStart == UNICODE_LIMIT) { + highValue = initialValue; + } + +#ifdef UCPTRIE_DEBUG + printf("UCPTrie: highStart U+%06lx highValue 0x%lx initialValue 0x%lx\n", + (long)realHighStart, (long)highValue, (long)initialValue); +#endif + + // We always store indexes and data values for the fast range. + // Pin highStart to the top of that range while building. + UChar32 fastLimit = fastILimit << UCPTRIE_SHIFT_3; + if (realHighStart < fastLimit) { + for (int32_t i = (realHighStart >> UCPTRIE_SHIFT_3); i < fastILimit; ++i) { + flags[i] = ALL_SAME; + index[i] = highValue; + } + highStart = fastLimit; + } else { + highStart = realHighStart; + } + + uint32_t asciiData[ASCII_LIMIT]; + for (int32_t i = 0; i < ASCII_LIMIT; ++i) { + asciiData[i] = get(i); + } + + // First we look for which data blocks have the same value repeated over the whole block, + // deduplicate such blocks, find a good null data block (for faster enumeration), + // and get an upper bound for the necessary data array length. + AllSameBlocks allSameBlocks; + int32_t newDataCapacity = compactWholeDataBlocks(fastILimit, allSameBlocks); + if (newDataCapacity < 0) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return 0; + } + uint32_t *newData = (uint32_t *)uprv_malloc(newDataCapacity * 4); + if (newData == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return 0; + } + uprv_memcpy(newData, asciiData, sizeof(asciiData)); + + int32_t dataNullIndex = allSameBlocks.findMostUsed(); + + MixedBlocks mixedBlocks; + int32_t newDataLength = compactData(fastILimit, newData, newDataCapacity, + dataNullIndex, mixedBlocks, errorCode); + if (U_FAILURE(errorCode)) { return 0; } + U_ASSERT(newDataLength <= newDataCapacity); + uprv_free(data); + data = newData; + dataCapacity = newDataCapacity; + dataLength = newDataLength; + if (dataLength > (0x3ffff + UCPTRIE_SMALL_DATA_BLOCK_LENGTH)) { + // The offset of the last data block is too high to be stored in the index table. + errorCode = U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + + if (dataNullIndex >= 0) { + dataNullOffset = index[dataNullIndex]; +#ifdef UCPTRIE_DEBUG + if (data[dataNullOffset] != initialValue) { + printf("UCPTrie initialValue %lx -> more common nullValue %lx\n", + (long)initialValue, (long)data[dataNullOffset]); + } +#endif + initialValue = data[dataNullOffset]; + } else { + dataNullOffset = UCPTRIE_NO_DATA_NULL_OFFSET; + } + + int32_t indexLength = compactIndex(fastILimit, mixedBlocks, errorCode); + highStart = realHighStart; + return indexLength; +} + +UCPTrie *MutableCodePointTrie::build(UCPTrieType type, UCPTrieValueWidth valueWidth, UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { + return nullptr; + } + if (type < UCPTRIE_TYPE_FAST || UCPTRIE_TYPE_SMALL < type || + valueWidth < UCPTRIE_VALUE_BITS_16 || UCPTRIE_VALUE_BITS_8 < valueWidth) { + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + return nullptr; + } + + // The mutable trie always stores 32-bit values. + // When we build a UCPTrie for a smaller value width, we first mask off unused bits + // before compacting the data. + switch (valueWidth) { + case UCPTRIE_VALUE_BITS_32: + break; + case UCPTRIE_VALUE_BITS_16: + maskValues(0xffff); + break; + case UCPTRIE_VALUE_BITS_8: + maskValues(0xff); + break; + default: + break; + } + + UChar32 fastLimit = type == UCPTRIE_TYPE_FAST ? BMP_LIMIT : UCPTRIE_SMALL_LIMIT; + int32_t indexLength = compactTrie(fastLimit >> UCPTRIE_SHIFT_3, errorCode); + if (U_FAILURE(errorCode)) { + clear(); + return nullptr; + } + + // Ensure data table alignment: The index length must be even for uint32_t data. + if (valueWidth == UCPTRIE_VALUE_BITS_32 && (indexLength & 1) != 0) { + index16[indexLength++] = 0xffee; // arbitrary value + } + + // Make the total trie structure length a multiple of 4 bytes by padding the data table, + // and store special values as the last two data values. + int32_t length = indexLength * 2; + if (valueWidth == UCPTRIE_VALUE_BITS_16) { + if (((indexLength ^ dataLength) & 1) != 0) { + // padding + data[dataLength++] = errorValue; + } + if (data[dataLength - 1] != errorValue || data[dataLength - 2] != highValue) { + data[dataLength++] = highValue; + data[dataLength++] = errorValue; + } + length += dataLength * 2; + } else if (valueWidth == UCPTRIE_VALUE_BITS_32) { + // 32-bit data words never need padding to a multiple of 4 bytes. + if (data[dataLength - 1] != errorValue || data[dataLength - 2] != highValue) { + if (data[dataLength - 1] != highValue) { + data[dataLength++] = highValue; + } + data[dataLength++] = errorValue; + } + length += dataLength * 4; + } else { + int32_t and3 = (length + dataLength) & 3; + if (and3 == 0 && data[dataLength - 1] == errorValue && data[dataLength - 2] == highValue) { + // all set + } else if(and3 == 3 && data[dataLength - 1] == highValue) { + data[dataLength++] = errorValue; + } else { + while (and3 != 2) { + data[dataLength++] = highValue; + and3 = (and3 + 1) & 3; + } + data[dataLength++] = highValue; + data[dataLength++] = errorValue; + } + length += dataLength; + } + + // Calculate the total length of the UCPTrie as a single memory block. + length += sizeof(UCPTrie); + U_ASSERT((length & 3) == 0); + + uint8_t *bytes = (uint8_t *)uprv_malloc(length); + if (bytes == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + clear(); + return nullptr; + } + UCPTrie *trie = reinterpret_cast<UCPTrie *>(bytes); + uprv_memset(trie, 0, sizeof(UCPTrie)); + trie->indexLength = indexLength; + trie->dataLength = dataLength; + + trie->highStart = highStart; + // Round up shifted12HighStart to a multiple of 0x1000 for easy testing from UTF-8 lead bytes. + // Runtime code needs to then test for the real highStart as well. + trie->shifted12HighStart = (highStart + 0xfff) >> 12; + trie->type = type; + trie->valueWidth = valueWidth; + + trie->index3NullOffset = index3NullOffset; + trie->dataNullOffset = dataNullOffset; + trie->nullValue = initialValue; + + bytes += sizeof(UCPTrie); + + // Fill the index and data arrays. + uint16_t *dest16 = (uint16_t *)bytes; + trie->index = dest16; + + if (highStart <= fastLimit) { + // Condense only the fast index from the mutable-trie index. + for (int32_t i = 0, j = 0; j < indexLength; i += SMALL_DATA_BLOCKS_PER_BMP_BLOCK, ++j) { + *dest16++ = (uint16_t)index[i]; // dest16[j] + } + } else { + uprv_memcpy(dest16, index16, indexLength * 2); + dest16 += indexLength; + } + bytes += indexLength * 2; + + // Write the data array. + const uint32_t *p = data; + switch (valueWidth) { + case UCPTRIE_VALUE_BITS_16: + // Write 16-bit data values. + trie->data.ptr16 = dest16; + for (int32_t i = dataLength; i > 0; --i) { + *dest16++ = (uint16_t)*p++; + } + break; + case UCPTRIE_VALUE_BITS_32: + // Write 32-bit data values. + trie->data.ptr32 = (uint32_t *)bytes; + uprv_memcpy(bytes, p, (size_t)dataLength * 4); + break; + case UCPTRIE_VALUE_BITS_8: + // Write 8-bit data values. + trie->data.ptr8 = bytes; + for (int32_t i = dataLength; i > 0; --i) { + *bytes++ = (uint8_t)*p++; + } + break; + default: + // Will not occur, valueWidth checked at the beginning. + break; + } + +#ifdef UCPTRIE_DEBUG + trie->name = name; + + ucptrie_printLengths(trie, ""); +#endif + + clear(); + return trie; +} + +} // namespace + +U_NAMESPACE_END + +U_NAMESPACE_USE + +U_CAPI UMutableCPTrie * U_EXPORT2 +umutablecptrie_open(uint32_t initialValue, uint32_t errorValue, UErrorCode *pErrorCode) { + if (U_FAILURE(*pErrorCode)) { + return nullptr; + } + LocalPointer<MutableCodePointTrie> trie( + new MutableCodePointTrie(initialValue, errorValue, *pErrorCode), *pErrorCode); + if (U_FAILURE(*pErrorCode)) { + return nullptr; + } + return reinterpret_cast<UMutableCPTrie *>(trie.orphan()); +} + +U_CAPI UMutableCPTrie * U_EXPORT2 +umutablecptrie_clone(const UMutableCPTrie *other, UErrorCode *pErrorCode) { + if (U_FAILURE(*pErrorCode)) { + return nullptr; + } + if (other == nullptr) { + return nullptr; + } + LocalPointer<MutableCodePointTrie> clone( + new MutableCodePointTrie(*reinterpret_cast<const MutableCodePointTrie *>(other), *pErrorCode), *pErrorCode); + if (U_FAILURE(*pErrorCode)) { + return nullptr; + } + return reinterpret_cast<UMutableCPTrie *>(clone.orphan()); +} + +U_CAPI void U_EXPORT2 +umutablecptrie_close(UMutableCPTrie *trie) { + delete reinterpret_cast<MutableCodePointTrie *>(trie); +} + +U_CAPI UMutableCPTrie * U_EXPORT2 +umutablecptrie_fromUCPMap(const UCPMap *map, UErrorCode *pErrorCode) { + if (U_FAILURE(*pErrorCode)) { + return nullptr; + } + if (map == nullptr) { + *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; + return nullptr; + } + return reinterpret_cast<UMutableCPTrie *>(MutableCodePointTrie::fromUCPMap(map, *pErrorCode)); +} + +U_CAPI UMutableCPTrie * U_EXPORT2 +umutablecptrie_fromUCPTrie(const UCPTrie *trie, UErrorCode *pErrorCode) { + if (U_FAILURE(*pErrorCode)) { + return nullptr; + } + if (trie == nullptr) { + *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; + return nullptr; + } + return reinterpret_cast<UMutableCPTrie *>(MutableCodePointTrie::fromUCPTrie(trie, *pErrorCode)); +} + +U_CAPI uint32_t U_EXPORT2 +umutablecptrie_get(const UMutableCPTrie *trie, UChar32 c) { + return reinterpret_cast<const MutableCodePointTrie *>(trie)->get(c); +} + +namespace { + +UChar32 getRange(const void *trie, UChar32 start, + UCPMapValueFilter *filter, const void *context, uint32_t *pValue) { + return reinterpret_cast<const MutableCodePointTrie *>(trie)-> + getRange(start, filter, context, pValue); +} + +} // namespace + +U_CAPI UChar32 U_EXPORT2 +umutablecptrie_getRange(const UMutableCPTrie *trie, UChar32 start, + UCPMapRangeOption option, uint32_t surrogateValue, + UCPMapValueFilter *filter, const void *context, uint32_t *pValue) { + return ucptrie_internalGetRange(getRange, trie, start, + option, surrogateValue, + filter, context, pValue); +} + +U_CAPI void U_EXPORT2 +umutablecptrie_set(UMutableCPTrie *trie, UChar32 c, uint32_t value, UErrorCode *pErrorCode) { + if (U_FAILURE(*pErrorCode)) { + return; + } + reinterpret_cast<MutableCodePointTrie *>(trie)->set(c, value, *pErrorCode); +} + +U_CAPI void U_EXPORT2 +umutablecptrie_setRange(UMutableCPTrie *trie, UChar32 start, UChar32 end, + uint32_t value, UErrorCode *pErrorCode) { + if (U_FAILURE(*pErrorCode)) { + return; + } + reinterpret_cast<MutableCodePointTrie *>(trie)->setRange(start, end, value, *pErrorCode); +} + +/* Compact and internally serialize the trie. */ +U_CAPI UCPTrie * U_EXPORT2 +umutablecptrie_buildImmutable(UMutableCPTrie *trie, UCPTrieType type, UCPTrieValueWidth valueWidth, + UErrorCode *pErrorCode) { + if (U_FAILURE(*pErrorCode)) { + return nullptr; + } + return reinterpret_cast<MutableCodePointTrie *>(trie)->build(type, valueWidth, *pErrorCode); +} + +#ifdef UCPTRIE_DEBUG +U_CFUNC void umutablecptrie_setName(UMutableCPTrie *trie, const char *name) { + reinterpret_cast<MutableCodePointTrie *>(trie)->name = name; +} +#endif diff --git a/thirdparty/icu4c/common/umutex.cpp b/thirdparty/icu4c/common/umutex.cpp new file mode 100644 index 0000000000..ccbee9960a --- /dev/null +++ b/thirdparty/icu4c/common/umutex.cpp @@ -0,0 +1,204 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1997-2016, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* +* File umutex.cpp +* +* Modification History: +* +* Date Name Description +* 04/02/97 aliu Creation. +* 04/07/99 srl updated +* 05/13/99 stephen Changed to umutex (from cmutex). +* 11/22/99 aliu Make non-global mutex autoinitialize [j151] +****************************************************************************** +*/ + +#include "umutex.h" + +#include "unicode/utypes.h" +#include "uassert.h" +#include "ucln_cmn.h" +#include "cmemory.h" + +U_NAMESPACE_BEGIN + + +#if defined(U_USER_MUTEX_CPP) +// Support for including an alternate implementation of mutexes has been withdrawn. +// See issue ICU-20185. +#error U_USER_MUTEX_CPP not supported +#endif + + +/************************************************************************************************* + * + * ICU Mutex wrappers. + * + *************************************************************************************************/ + +namespace { +std::mutex *initMutex; +std::condition_variable *initCondition; + +// The ICU global mutex. +// Used when ICU implementation code passes nullptr for the mutex pointer. +UMutex globalMutex; + +std::once_flag initFlag; +std::once_flag *pInitFlag = &initFlag; + +} // Anonymous namespace + +U_CDECL_BEGIN +static UBool U_CALLCONV umtx_cleanup() { + initMutex->~mutex(); + initCondition->~condition_variable(); + UMutex::cleanup(); + + // Reset the once_flag, by destructing it and creating a fresh one in its place. + // Do not use this trick anywhere else in ICU; use umtx_initOnce, not std::call_once(). + pInitFlag->~once_flag(); + pInitFlag = new(&initFlag) std::once_flag(); + return true; +} + +static void U_CALLCONV umtx_init() { + initMutex = STATIC_NEW(std::mutex); + initCondition = STATIC_NEW(std::condition_variable); + ucln_common_registerCleanup(UCLN_COMMON_MUTEX, umtx_cleanup); +} +U_CDECL_END + + +std::mutex *UMutex::getMutex() { + std::mutex *retPtr = fMutex.load(std::memory_order_acquire); + if (retPtr == nullptr) { + std::call_once(*pInitFlag, umtx_init); + std::lock_guard<std::mutex> guard(*initMutex); + retPtr = fMutex.load(std::memory_order_acquire); + if (retPtr == nullptr) { + fMutex = new(fStorage) std::mutex(); + retPtr = fMutex; + fListLink = gListHead; + gListHead = this; + } + } + U_ASSERT(retPtr != nullptr); + return retPtr; +} + +UMutex *UMutex::gListHead = nullptr; + +void UMutex::cleanup() { + UMutex *next = nullptr; + for (UMutex *m = gListHead; m != nullptr; m = next) { + (*m->fMutex).~mutex(); + m->fMutex = nullptr; + next = m->fListLink; + m->fListLink = nullptr; + } + gListHead = nullptr; +} + + +U_CAPI void U_EXPORT2 +umtx_lock(UMutex *mutex) { + if (mutex == nullptr) { + mutex = &globalMutex; + } + mutex->lock(); +} + + +U_CAPI void U_EXPORT2 +umtx_unlock(UMutex* mutex) +{ + if (mutex == nullptr) { + mutex = &globalMutex; + } + mutex->unlock(); +} + + +/************************************************************************************************* + * + * UInitOnce Implementation + * + *************************************************************************************************/ + +// This function is called when a test of a UInitOnce::fState reveals that +// initialization has not completed, that we either need to call the init +// function on this thread, or wait for some other thread to complete. +// +// The actual call to the init function is made inline by template code +// that knows the C++ types involved. This function returns true if +// the caller needs to call the Init function. +// +U_COMMON_API UBool U_EXPORT2 +umtx_initImplPreInit(UInitOnce &uio) { + std::call_once(*pInitFlag, umtx_init); + std::unique_lock<std::mutex> lock(*initMutex); + if (umtx_loadAcquire(uio.fState) == 0) { + umtx_storeRelease(uio.fState, 1); + return true; // Caller will next call the init function. + } else { + while (umtx_loadAcquire(uio.fState) == 1) { + // Another thread is currently running the initialization. + // Wait until it completes. + initCondition->wait(lock); + } + U_ASSERT(uio.fState == 2); + return false; + } +} + + +// This function is called by the thread that ran an initialization function, +// just after completing the function. +// Some threads may be waiting on the condition, requiring the broadcast wakeup. +// Some threads may be racing to test the fState variable outside of the mutex, +// requiring the use of store/release when changing its value. + +U_COMMON_API void U_EXPORT2 +umtx_initImplPostInit(UInitOnce &uio) { + { + std::unique_lock<std::mutex> lock(*initMutex); + umtx_storeRelease(uio.fState, 2); + } + initCondition->notify_all(); +} + +U_NAMESPACE_END + +/************************************************************************************************* + * + * Deprecated functions for setting user mutexes. + * + *************************************************************************************************/ + +U_DEPRECATED void U_EXPORT2 +u_setMutexFunctions(const void * /*context */, UMtxInitFn *, UMtxFn *, + UMtxFn *, UMtxFn *, UErrorCode *status) { + if (U_SUCCESS(*status)) { + *status = U_UNSUPPORTED_ERROR; + } + return; +} + + + +U_DEPRECATED void U_EXPORT2 +u_setAtomicIncDecFunctions(const void * /*context */, UMtxAtomicFn *, UMtxAtomicFn *, + UErrorCode *status) { + if (U_SUCCESS(*status)) { + *status = U_UNSUPPORTED_ERROR; + } + return; +} diff --git a/thirdparty/icu4c/common/umutex.h b/thirdparty/icu4c/common/umutex.h new file mode 100644 index 0000000000..8d76b3f3e6 --- /dev/null +++ b/thirdparty/icu4c/common/umutex.h @@ -0,0 +1,277 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 1997-2015, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* +* File UMUTEX.H +* +* Modification History: +* +* Date Name Description +* 04/02/97 aliu Creation. +* 04/07/99 srl rewrite - C interface, multiple mutices +* 05/13/99 stephen Changed to umutex (from cmutex) +****************************************************************************** +*/ + +#ifndef UMUTEX_H +#define UMUTEX_H + +#include <atomic> +#include <condition_variable> +#include <mutex> +#include <type_traits> + +#include "unicode/utypes.h" +#include "unicode/uclean.h" +#include "unicode/uobject.h" + +#include "putilimp.h" + +#if defined(U_USER_ATOMICS_H) || defined(U_USER_MUTEX_H) +// Support for including an alternate implementation of atomic & mutex operations has been withdrawn. +// See issue ICU-20185. +#error U_USER_ATOMICS and U_USER_MUTEX_H are not supported +#endif + +// Export an explicit template instantiation of std::atomic<int32_t>. +// When building DLLs for Windows this is required as it is used as a data member of the exported SharedObject class. +// See digitlst.h, pluralaffix.h, datefmt.h, and others for similar examples. +// +// Similar story for std::atomic<std::mutex *>, and the exported UMutex class. +#if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN && !defined(U_IN_DOXYGEN) +#if defined(__clang__) || defined(_MSC_VER) + #if defined(__clang__) + // Suppress the warning that the explicit instantiation after explicit specialization has no effect. + #pragma clang diagnostic push + #pragma clang diagnostic ignored "-Winstantiation-after-specialization" + #endif +template struct U_COMMON_API std::atomic<int32_t>; +template struct U_COMMON_API std::atomic<std::mutex *>; + #if defined(__clang__) + #pragma clang diagnostic pop + #endif +#elif defined(__GNUC__) +// For GCC this class is already exported/visible, so no need for U_COMMON_API. +template struct std::atomic<int32_t>; +template struct std::atomic<std::mutex *>; +#endif +#endif + + +U_NAMESPACE_BEGIN + +/**************************************************************************** + * + * Low Level Atomic Operations, ICU wrappers for. + * + ****************************************************************************/ + +typedef std::atomic<int32_t> u_atomic_int32_t; +#define ATOMIC_INT32_T_INITIALIZER(val) ATOMIC_VAR_INIT(val) + +inline int32_t umtx_loadAcquire(u_atomic_int32_t &var) { + return var.load(std::memory_order_acquire); +} + +inline void umtx_storeRelease(u_atomic_int32_t &var, int32_t val) { + var.store(val, std::memory_order_release); +} + +inline int32_t umtx_atomic_inc(u_atomic_int32_t *var) { + return var->fetch_add(1) + 1; +} + +inline int32_t umtx_atomic_dec(u_atomic_int32_t *var) { + return var->fetch_sub(1) - 1; +} + + +/************************************************************************************************* + * + * UInitOnce Definitions. + * + *************************************************************************************************/ + +struct UInitOnce { + u_atomic_int32_t fState; + UErrorCode fErrCode; + void reset() {fState = 0;} + UBool isReset() {return umtx_loadAcquire(fState) == 0;} +// Note: isReset() is used by service registration code. +// Thread safety of this usage needs review. +}; + +#define U_INITONCE_INITIALIZER {ATOMIC_INT32_T_INITIALIZER(0), U_ZERO_ERROR} + + +U_COMMON_API UBool U_EXPORT2 umtx_initImplPreInit(UInitOnce &); +U_COMMON_API void U_EXPORT2 umtx_initImplPostInit(UInitOnce &); + +template<class T> void umtx_initOnce(UInitOnce &uio, T *obj, void (U_CALLCONV T::*fp)()) { + if (umtx_loadAcquire(uio.fState) == 2) { + return; + } + if (umtx_initImplPreInit(uio)) { + (obj->*fp)(); + umtx_initImplPostInit(uio); + } +} + + +// umtx_initOnce variant for plain functions, or static class functions. +// No context parameter. +inline void umtx_initOnce(UInitOnce &uio, void (U_CALLCONV *fp)()) { + if (umtx_loadAcquire(uio.fState) == 2) { + return; + } + if (umtx_initImplPreInit(uio)) { + (*fp)(); + umtx_initImplPostInit(uio); + } +} + +// umtx_initOnce variant for plain functions, or static class functions. +// With ErrorCode, No context parameter. +inline void umtx_initOnce(UInitOnce &uio, void (U_CALLCONV *fp)(UErrorCode &), UErrorCode &errCode) { + if (U_FAILURE(errCode)) { + return; + } + if (umtx_loadAcquire(uio.fState) != 2 && umtx_initImplPreInit(uio)) { + // We run the initialization. + (*fp)(errCode); + uio.fErrCode = errCode; + umtx_initImplPostInit(uio); + } else { + // Someone else already ran the initialization. + if (U_FAILURE(uio.fErrCode)) { + errCode = uio.fErrCode; + } + } +} + +// umtx_initOnce variant for plain functions, or static class functions, +// with a context parameter. +template<class T> void umtx_initOnce(UInitOnce &uio, void (U_CALLCONV *fp)(T), T context) { + if (umtx_loadAcquire(uio.fState) == 2) { + return; + } + if (umtx_initImplPreInit(uio)) { + (*fp)(context); + umtx_initImplPostInit(uio); + } +} + +// umtx_initOnce variant for plain functions, or static class functions, +// with a context parameter and an error code. +template<class T> void umtx_initOnce(UInitOnce &uio, void (U_CALLCONV *fp)(T, UErrorCode &), T context, UErrorCode &errCode) { + if (U_FAILURE(errCode)) { + return; + } + if (umtx_loadAcquire(uio.fState) != 2 && umtx_initImplPreInit(uio)) { + // We run the initialization. + (*fp)(context, errCode); + uio.fErrCode = errCode; + umtx_initImplPostInit(uio); + } else { + // Someone else already ran the initialization. + if (U_FAILURE(uio.fErrCode)) { + errCode = uio.fErrCode; + } + } +} + +// UMutex should be constexpr-constructible, so that no initialization code +// is run during startup. +// This works on all C++ libraries except MS VS before VS2019. +#if (defined(_CPPLIB_VER) && !defined(_MSVC_STL_VERSION)) || \ + (defined(_MSVC_STL_VERSION) && _MSVC_STL_VERSION < 142) + // (VS std lib older than VS2017) || (VS std lib version < VS2019) +# define UMUTEX_CONSTEXPR +#else +# define UMUTEX_CONSTEXPR constexpr +#endif + +/** + * UMutex - ICU Mutex class. + * + * This is the preferred Mutex class for use within ICU implementation code. + * It is a thin wrapper over C++ std::mutex, with these additions: + * - Static instances are safe, not triggering static construction or destruction, + * and the associated order of construction or destruction issues. + * - Plumbed into u_cleanup() for destructing the underlying std::mutex, + * which frees any OS level resources they may be holding. + * + * Limitations: + * - Static or global instances only. Cannot be heap allocated. Cannot appear as a + * member of another class. + * - No condition variables or other advanced features. If needed, you will need to use + * std::mutex and std::condition_variable directly. For an example, see unifiedcache.cpp + * + * Typical Usage: + * static UMutex myMutex; + * + * { + * Mutex lock(myMutex); + * ... // Do stuff that is protected by myMutex; + * } // myMutex is released when lock goes out of scope. + */ + +class U_COMMON_API UMutex { +public: + UMUTEX_CONSTEXPR UMutex() {} + ~UMutex() = default; + + UMutex(const UMutex &other) = delete; + UMutex &operator =(const UMutex &other) = delete; + void *operator new(size_t) = delete; + + // requirements for C++ BasicLockable, allows UMutex to work with std::lock_guard + void lock() { + std::mutex *m = fMutex.load(std::memory_order_acquire); + if (m == nullptr) { m = getMutex(); } + m->lock(); + } + void unlock() { fMutex.load(std::memory_order_relaxed)->unlock(); } + + static void cleanup(); + +private: + alignas(std::mutex) char fStorage[sizeof(std::mutex)] {}; + std::atomic<std::mutex *> fMutex { nullptr }; + + /** All initialized UMutexes are kept in a linked list, so that they can be found, + * and the underlying std::mutex destructed, by u_cleanup(). + */ + UMutex *fListLink { nullptr }; + static UMutex *gListHead; + + /** Out-of-line function to lazily initialize a UMutex on first use. + * Initial fast check is inline, in lock(). The returned value may never + * be nullptr. + */ + std::mutex *getMutex(); +}; + + +/* Lock a mutex. + * @param mutex The given mutex to be locked. Pass NULL to specify + * the global ICU mutex. Recursive locks are an error + * and may cause a deadlock on some platforms. + */ +U_CAPI void U_EXPORT2 umtx_lock(UMutex* mutex); + +/* Unlock a mutex. + * @param mutex The given mutex to be unlocked. Pass NULL to specify + * the global ICU mutex. + */ +U_CAPI void U_EXPORT2 umtx_unlock (UMutex* mutex); + + +U_NAMESPACE_END + +#endif /* UMUTEX_H */ +/*eof*/ diff --git a/thirdparty/icu4c/common/unames.cpp b/thirdparty/icu4c/common/unames.cpp new file mode 100644 index 0000000000..5776058f95 --- /dev/null +++ b/thirdparty/icu4c/common/unames.cpp @@ -0,0 +1,2108 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1999-2014, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* file name: unames.c +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 1999oct04 +* created by: Markus W. Scherer +*/ + +#include "unicode/utypes.h" +#include "unicode/putil.h" +#include "unicode/uchar.h" +#include "unicode/udata.h" +#include "unicode/utf.h" +#include "unicode/utf16.h" +#include "uassert.h" +#include "ustr_imp.h" +#include "umutex.h" +#include "cmemory.h" +#include "cstring.h" +#include "ucln_cmn.h" +#include "udataswp.h" +#include "uprops.h" + +U_NAMESPACE_BEGIN + +/* prototypes ------------------------------------------------------------- */ + +static const char DATA_NAME[] = "unames"; +static const char DATA_TYPE[] = "icu"; + +#define GROUP_SHIFT 5 +#define LINES_PER_GROUP (1L<<GROUP_SHIFT) +#define GROUP_MASK (LINES_PER_GROUP-1) + +/* + * This struct was replaced by explicitly accessing equivalent + * fields from triples of uint16_t. + * The Group struct was padded to 8 bytes on compilers for early ARM CPUs, + * which broke the assumption that sizeof(Group)==6 and that the ++ operator + * would advance by 6 bytes (3 uint16_t). + * + * We can't just change the data structure because it's loaded from a data file, + * and we don't want to make it less compact, so we changed the access code. + * + * For details see ICU tickets 6331 and 6008. +typedef struct { + uint16_t groupMSB, + offsetHigh, offsetLow; / * avoid padding * / +} Group; + */ +enum { + GROUP_MSB, + GROUP_OFFSET_HIGH, + GROUP_OFFSET_LOW, + GROUP_LENGTH +}; + +/* + * Get the 32-bit group offset. + * @param group (const uint16_t *) pointer to a Group triple of uint16_t + * @return group offset (int32_t) + */ +#define GET_GROUP_OFFSET(group) ((int32_t)(group)[GROUP_OFFSET_HIGH]<<16|(group)[GROUP_OFFSET_LOW]) + +#define NEXT_GROUP(group) ((group)+GROUP_LENGTH) +#define PREV_GROUP(group) ((group)-GROUP_LENGTH) + +typedef struct { + uint32_t start, end; + uint8_t type, variant; + uint16_t size; +} AlgorithmicRange; + +typedef struct { + uint32_t tokenStringOffset, groupsOffset, groupStringOffset, algNamesOffset; +} UCharNames; + +/* + * Get the groups table from a UCharNames struct. + * The groups table consists of one uint16_t groupCount followed by + * groupCount groups. Each group is a triple of uint16_t, see GROUP_LENGTH + * and the comment for the old struct Group above. + * + * @param names (const UCharNames *) pointer to the UCharNames indexes + * @return (const uint16_t *) pointer to the groups table + */ +#define GET_GROUPS(names) (const uint16_t *)((const char *)names+names->groupsOffset) + +typedef struct { + const char *otherName; + UChar32 code; +} FindName; + +#define DO_FIND_NAME NULL + +static UDataMemory *uCharNamesData=NULL; +static UCharNames *uCharNames=NULL; +static icu::UInitOnce gCharNamesInitOnce = U_INITONCE_INITIALIZER; + +/* + * Maximum length of character names (regular & 1.0). + */ +static int32_t gMaxNameLength=0; + +/* + * Set of chars used in character names (regular & 1.0). + * Chars are platform-dependent (can be EBCDIC). + */ +static uint32_t gNameSet[8]={ 0 }; + +#define U_NONCHARACTER_CODE_POINT U_CHAR_CATEGORY_COUNT +#define U_LEAD_SURROGATE U_CHAR_CATEGORY_COUNT + 1 +#define U_TRAIL_SURROGATE U_CHAR_CATEGORY_COUNT + 2 + +#define U_CHAR_EXTENDED_CATEGORY_COUNT (U_CHAR_CATEGORY_COUNT + 3) + +static const char * const charCatNames[U_CHAR_EXTENDED_CATEGORY_COUNT] = { + "unassigned", + "uppercase letter", + "lowercase letter", + "titlecase letter", + "modifier letter", + "other letter", + "non spacing mark", + "enclosing mark", + "combining spacing mark", + "decimal digit number", + "letter number", + "other number", + "space separator", + "line separator", + "paragraph separator", + "control", + "format", + "private use area", + "surrogate", + "dash punctuation", + "start punctuation", + "end punctuation", + "connector punctuation", + "other punctuation", + "math symbol", + "currency symbol", + "modifier symbol", + "other symbol", + "initial punctuation", + "final punctuation", + "noncharacter", + "lead surrogate", + "trail surrogate" +}; + +/* implementation ----------------------------------------------------------- */ + +static UBool U_CALLCONV unames_cleanup(void) +{ + if(uCharNamesData) { + udata_close(uCharNamesData); + uCharNamesData = NULL; + } + if(uCharNames) { + uCharNames = NULL; + } + gCharNamesInitOnce.reset(); + gMaxNameLength=0; + return TRUE; +} + +static UBool U_CALLCONV +isAcceptable(void * /*context*/, + const char * /*type*/, const char * /*name*/, + const UDataInfo *pInfo) { + return (UBool)( + pInfo->size>=20 && + pInfo->isBigEndian==U_IS_BIG_ENDIAN && + pInfo->charsetFamily==U_CHARSET_FAMILY && + pInfo->dataFormat[0]==0x75 && /* dataFormat="unam" */ + pInfo->dataFormat[1]==0x6e && + pInfo->dataFormat[2]==0x61 && + pInfo->dataFormat[3]==0x6d && + pInfo->formatVersion[0]==1); +} + +static void U_CALLCONV +loadCharNames(UErrorCode &status) { + U_ASSERT(uCharNamesData == NULL); + U_ASSERT(uCharNames == NULL); + + uCharNamesData = udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, &status); + if(U_FAILURE(status)) { + uCharNamesData = NULL; + } else { + uCharNames = (UCharNames *)udata_getMemory(uCharNamesData); + } + ucln_common_registerCleanup(UCLN_COMMON_UNAMES, unames_cleanup); +} + + +static UBool +isDataLoaded(UErrorCode *pErrorCode) { + umtx_initOnce(gCharNamesInitOnce, &loadCharNames, *pErrorCode); + return U_SUCCESS(*pErrorCode); +} + +#define WRITE_CHAR(buffer, bufferLength, bufferPos, c) UPRV_BLOCK_MACRO_BEGIN { \ + if((bufferLength)>0) { \ + *(buffer)++=c; \ + --(bufferLength); \ + } \ + ++(bufferPos); \ +} UPRV_BLOCK_MACRO_END + +#define U_ISO_COMMENT U_CHAR_NAME_CHOICE_COUNT + +/* + * Important: expandName() and compareName() are almost the same - + * apply fixes to both. + * + * UnicodeData.txt uses ';' as a field separator, so no + * field can contain ';' as part of its contents. + * In unames.dat, it is marked as token[';']==-1 only if the + * semicolon is used in the data file - which is iff we + * have Unicode 1.0 names or ISO comments or aliases. + * So, it will be token[';']==-1 if we store U1.0 names/ISO comments/aliases + * although we know that it will never be part of a name. + */ +static uint16_t +expandName(UCharNames *names, + const uint8_t *name, uint16_t nameLength, UCharNameChoice nameChoice, + char *buffer, uint16_t bufferLength) { + uint16_t *tokens=(uint16_t *)names+8; + uint16_t token, tokenCount=*tokens++, bufferPos=0; + uint8_t *tokenStrings=(uint8_t *)names+names->tokenStringOffset; + uint8_t c; + + if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) { + /* + * skip the modern name if it is not requested _and_ + * if the semicolon byte value is a character, not a token number + */ + if((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) { + int fieldIndex= nameChoice==U_ISO_COMMENT ? 2 : nameChoice; + do { + while(nameLength>0) { + --nameLength; + if(*name++==';') { + break; + } + } + } while(--fieldIndex>0); + } else { + /* + * the semicolon byte value is a token number, therefore + * only modern names are stored in unames.dat and there is no + * such requested alternate name here + */ + nameLength=0; + } + } + + /* write each letter directly, and write a token word per token */ + while(nameLength>0) { + --nameLength; + c=*name++; + + if(c>=tokenCount) { + if(c!=';') { + /* implicit letter */ + WRITE_CHAR(buffer, bufferLength, bufferPos, c); + } else { + /* finished */ + break; + } + } else { + token=tokens[c]; + if(token==(uint16_t)(-2)) { + /* this is a lead byte for a double-byte token */ + token=tokens[c<<8|*name++]; + --nameLength; + } + if(token==(uint16_t)(-1)) { + if(c!=';') { + /* explicit letter */ + WRITE_CHAR(buffer, bufferLength, bufferPos, c); + } else { + /* stop, but skip the semicolon if we are seeking + extended names and there was no 2.0 name but there + is a 1.0 name. */ + if(!bufferPos && nameChoice == U_EXTENDED_CHAR_NAME) { + if ((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) { + continue; + } + } + /* finished */ + break; + } + } else { + /* write token word */ + uint8_t *tokenString=tokenStrings+token; + while((c=*tokenString++)!=0) { + WRITE_CHAR(buffer, bufferLength, bufferPos, c); + } + } + } + } + + /* zero-terminate */ + if(bufferLength>0) { + *buffer=0; + } + + return bufferPos; +} + +/* + * compareName() is almost the same as expandName() except that it compares + * the currently expanded name to an input name. + * It returns the match/no match result as soon as possible. + */ +static UBool +compareName(UCharNames *names, + const uint8_t *name, uint16_t nameLength, UCharNameChoice nameChoice, + const char *otherName) { + uint16_t *tokens=(uint16_t *)names+8; + uint16_t token, tokenCount=*tokens++; + uint8_t *tokenStrings=(uint8_t *)names+names->tokenStringOffset; + uint8_t c; + const char *origOtherName = otherName; + + if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) { + /* + * skip the modern name if it is not requested _and_ + * if the semicolon byte value is a character, not a token number + */ + if((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) { + int fieldIndex= nameChoice==U_ISO_COMMENT ? 2 : nameChoice; + do { + while(nameLength>0) { + --nameLength; + if(*name++==';') { + break; + } + } + } while(--fieldIndex>0); + } else { + /* + * the semicolon byte value is a token number, therefore + * only modern names are stored in unames.dat and there is no + * such requested alternate name here + */ + nameLength=0; + } + } + + /* compare each letter directly, and compare a token word per token */ + while(nameLength>0) { + --nameLength; + c=*name++; + + if(c>=tokenCount) { + if(c!=';') { + /* implicit letter */ + if((char)c!=*otherName++) { + return FALSE; + } + } else { + /* finished */ + break; + } + } else { + token=tokens[c]; + if(token==(uint16_t)(-2)) { + /* this is a lead byte for a double-byte token */ + token=tokens[c<<8|*name++]; + --nameLength; + } + if(token==(uint16_t)(-1)) { + if(c!=';') { + /* explicit letter */ + if((char)c!=*otherName++) { + return FALSE; + } + } else { + /* stop, but skip the semicolon if we are seeking + extended names and there was no 2.0 name but there + is a 1.0 name. */ + if(otherName == origOtherName && nameChoice == U_EXTENDED_CHAR_NAME) { + if ((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) { + continue; + } + } + /* finished */ + break; + } + } else { + /* write token word */ + uint8_t *tokenString=tokenStrings+token; + while((c=*tokenString++)!=0) { + if((char)c!=*otherName++) { + return FALSE; + } + } + } + } + } + + /* complete match? */ + return (UBool)(*otherName==0); +} + +static uint8_t getCharCat(UChar32 cp) { + uint8_t cat; + + if (U_IS_UNICODE_NONCHAR(cp)) { + return U_NONCHARACTER_CODE_POINT; + } + + if ((cat = u_charType(cp)) == U_SURROGATE) { + cat = U_IS_LEAD(cp) ? U_LEAD_SURROGATE : U_TRAIL_SURROGATE; + } + + return cat; +} + +static const char *getCharCatName(UChar32 cp) { + uint8_t cat = getCharCat(cp); + + /* Return unknown if the table of names above is not up to + date. */ + + if (cat >= UPRV_LENGTHOF(charCatNames)) { + return "unknown"; + } else { + return charCatNames[cat]; + } +} + +static uint16_t getExtName(uint32_t code, char *buffer, uint16_t bufferLength) { + const char *catname = getCharCatName(code); + uint16_t length = 0; + + UChar32 cp; + int ndigits, i; + + WRITE_CHAR(buffer, bufferLength, length, '<'); + while (catname[length - 1]) { + WRITE_CHAR(buffer, bufferLength, length, catname[length - 1]); + } + WRITE_CHAR(buffer, bufferLength, length, '-'); + for (cp = code, ndigits = 0; cp; ++ndigits, cp >>= 4) + ; + if (ndigits < 4) + ndigits = 4; + for (cp = code, i = ndigits; (cp || i > 0) && bufferLength; cp >>= 4, bufferLength--) { + uint8_t v = (uint8_t)(cp & 0xf); + buffer[--i] = (v < 10 ? '0' + v : 'A' + v - 10); + } + buffer += ndigits; + length += static_cast<uint16_t>(ndigits); + WRITE_CHAR(buffer, bufferLength, length, '>'); + + return length; +} + +/* + * getGroup() does a binary search for the group that contains the + * Unicode code point "code". + * The return value is always a valid Group* that may contain "code" + * or else is the highest group before "code". + * If the lowest group is after "code", then that one is returned. + */ +static const uint16_t * +getGroup(UCharNames *names, uint32_t code) { + const uint16_t *groups=GET_GROUPS(names); + uint16_t groupMSB=(uint16_t)(code>>GROUP_SHIFT), + start=0, + limit=*groups++, + number; + + /* binary search for the group of names that contains the one for code */ + while(start<limit-1) { + number=(uint16_t)((start+limit)/2); + if(groupMSB<groups[number*GROUP_LENGTH+GROUP_MSB]) { + limit=number; + } else { + start=number; + } + } + + /* return this regardless of whether it is an exact match */ + return groups+start*GROUP_LENGTH; +} + +/* + * expandGroupLengths() reads a block of compressed lengths of 32 strings and + * expands them into offsets and lengths for each string. + * Lengths are stored with a variable-width encoding in consecutive nibbles: + * If a nibble<0xc, then it is the length itself (0=empty string). + * If a nibble>=0xc, then it forms a length value with the following nibble. + * Calculation see below. + * The offsets and lengths arrays must be at least 33 (one more) long because + * there is no check here at the end if the last nibble is still used. + */ +static const uint8_t * +expandGroupLengths(const uint8_t *s, + uint16_t offsets[LINES_PER_GROUP+1], uint16_t lengths[LINES_PER_GROUP+1]) { + /* read the lengths of the 32 strings in this group and get each string's offset */ + uint16_t i=0, offset=0, length=0; + uint8_t lengthByte; + + /* all 32 lengths must be read to get the offset of the first group string */ + while(i<LINES_PER_GROUP) { + lengthByte=*s++; + + /* read even nibble - MSBs of lengthByte */ + if(length>=12) { + /* double-nibble length spread across two bytes */ + length=(uint16_t)(((length&0x3)<<4|lengthByte>>4)+12); + lengthByte&=0xf; + } else if((lengthByte /* &0xf0 */)>=0xc0) { + /* double-nibble length spread across this one byte */ + length=(uint16_t)((lengthByte&0x3f)+12); + } else { + /* single-nibble length in MSBs */ + length=(uint16_t)(lengthByte>>4); + lengthByte&=0xf; + } + + *offsets++=offset; + *lengths++=length; + + offset+=length; + ++i; + + /* read odd nibble - LSBs of lengthByte */ + if((lengthByte&0xf0)==0) { + /* this nibble was not consumed for a double-nibble length above */ + length=lengthByte; + if(length<12) { + /* single-nibble length in LSBs */ + *offsets++=offset; + *lengths++=length; + + offset+=length; + ++i; + } + } else { + length=0; /* prevent double-nibble detection in the next iteration */ + } + } + + /* now, s is at the first group string */ + return s; +} + +static uint16_t +expandGroupName(UCharNames *names, const uint16_t *group, + uint16_t lineNumber, UCharNameChoice nameChoice, + char *buffer, uint16_t bufferLength) { + uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2]; + const uint8_t *s=(uint8_t *)names+names->groupStringOffset+GET_GROUP_OFFSET(group); + s=expandGroupLengths(s, offsets, lengths); + return expandName(names, s+offsets[lineNumber], lengths[lineNumber], nameChoice, + buffer, bufferLength); +} + +static uint16_t +getName(UCharNames *names, uint32_t code, UCharNameChoice nameChoice, + char *buffer, uint16_t bufferLength) { + const uint16_t *group=getGroup(names, code); + if((uint16_t)(code>>GROUP_SHIFT)==group[GROUP_MSB]) { + return expandGroupName(names, group, (uint16_t)(code&GROUP_MASK), nameChoice, + buffer, bufferLength); + } else { + /* group not found */ + /* zero-terminate */ + if(bufferLength>0) { + *buffer=0; + } + return 0; + } +} + +/* + * enumGroupNames() enumerates all the names in a 32-group + * and either calls the enumerator function or finds a given input name. + */ +static UBool +enumGroupNames(UCharNames *names, const uint16_t *group, + UChar32 start, UChar32 end, + UEnumCharNamesFn *fn, void *context, + UCharNameChoice nameChoice) { + uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2]; + const uint8_t *s=(uint8_t *)names+names->groupStringOffset+GET_GROUP_OFFSET(group); + + s=expandGroupLengths(s, offsets, lengths); + if(fn!=DO_FIND_NAME) { + char buffer[200]; + uint16_t length; + + while(start<=end) { + length=expandName(names, s+offsets[start&GROUP_MASK], lengths[start&GROUP_MASK], nameChoice, buffer, sizeof(buffer)); + if (!length && nameChoice == U_EXTENDED_CHAR_NAME) { + buffer[length = getExtName(start, buffer, sizeof(buffer))] = 0; + } + /* here, we assume that the buffer is large enough */ + if(length>0) { + if(!fn(context, start, nameChoice, buffer, length)) { + return FALSE; + } + } + ++start; + } + } else { + const char *otherName=((FindName *)context)->otherName; + while(start<=end) { + if(compareName(names, s+offsets[start&GROUP_MASK], lengths[start&GROUP_MASK], nameChoice, otherName)) { + ((FindName *)context)->code=start; + return FALSE; + } + ++start; + } + } + return TRUE; +} + +/* + * enumExtNames enumerate extended names. + * It only needs to do it if it is called with a real function and not + * with the dummy DO_FIND_NAME, because u_charFromName() does a check + * for extended names by itself. + */ +static UBool +enumExtNames(UChar32 start, UChar32 end, + UEnumCharNamesFn *fn, void *context) +{ + if(fn!=DO_FIND_NAME) { + char buffer[200]; + uint16_t length; + + while(start<=end) { + buffer[length = getExtName(start, buffer, sizeof(buffer))] = 0; + /* here, we assume that the buffer is large enough */ + if(length>0) { + if(!fn(context, start, U_EXTENDED_CHAR_NAME, buffer, length)) { + return FALSE; + } + } + ++start; + } + } + + return TRUE; +} + +static UBool +enumNames(UCharNames *names, + UChar32 start, UChar32 limit, + UEnumCharNamesFn *fn, void *context, + UCharNameChoice nameChoice) { + uint16_t startGroupMSB, endGroupMSB, groupCount; + const uint16_t *group, *groupLimit; + + startGroupMSB=(uint16_t)(start>>GROUP_SHIFT); + endGroupMSB=(uint16_t)((limit-1)>>GROUP_SHIFT); + + /* find the group that contains start, or the highest before it */ + group=getGroup(names, start); + + if(startGroupMSB<group[GROUP_MSB] && nameChoice==U_EXTENDED_CHAR_NAME) { + /* enumerate synthetic names between start and the group start */ + UChar32 extLimit=((UChar32)group[GROUP_MSB]<<GROUP_SHIFT); + if(extLimit>limit) { + extLimit=limit; + } + if(!enumExtNames(start, extLimit-1, fn, context)) { + return FALSE; + } + start=extLimit; + } + + if(startGroupMSB==endGroupMSB) { + if(startGroupMSB==group[GROUP_MSB]) { + /* if start and limit-1 are in the same group, then enumerate only in that one */ + return enumGroupNames(names, group, start, limit-1, fn, context, nameChoice); + } + } else { + const uint16_t *groups=GET_GROUPS(names); + groupCount=*groups++; + groupLimit=groups+groupCount*GROUP_LENGTH; + + if(startGroupMSB==group[GROUP_MSB]) { + /* enumerate characters in the partial start group */ + if((start&GROUP_MASK)!=0) { + if(!enumGroupNames(names, group, + start, ((UChar32)startGroupMSB<<GROUP_SHIFT)+LINES_PER_GROUP-1, + fn, context, nameChoice)) { + return FALSE; + } + group=NEXT_GROUP(group); /* continue with the next group */ + } + } else if(startGroupMSB>group[GROUP_MSB]) { + /* make sure that we start enumerating with the first group after start */ + const uint16_t *nextGroup=NEXT_GROUP(group); + if (nextGroup < groupLimit && nextGroup[GROUP_MSB] > startGroupMSB && nameChoice == U_EXTENDED_CHAR_NAME) { + UChar32 end = nextGroup[GROUP_MSB] << GROUP_SHIFT; + if (end > limit) { + end = limit; + } + if (!enumExtNames(start, end - 1, fn, context)) { + return FALSE; + } + } + group=nextGroup; + } + + /* enumerate entire groups between the start- and end-groups */ + while(group<groupLimit && group[GROUP_MSB]<endGroupMSB) { + const uint16_t *nextGroup; + start=(UChar32)group[GROUP_MSB]<<GROUP_SHIFT; + if(!enumGroupNames(names, group, start, start+LINES_PER_GROUP-1, fn, context, nameChoice)) { + return FALSE; + } + nextGroup=NEXT_GROUP(group); + if (nextGroup < groupLimit && nextGroup[GROUP_MSB] > group[GROUP_MSB] + 1 && nameChoice == U_EXTENDED_CHAR_NAME) { + UChar32 end = nextGroup[GROUP_MSB] << GROUP_SHIFT; + if (end > limit) { + end = limit; + } + if (!enumExtNames((group[GROUP_MSB] + 1) << GROUP_SHIFT, end - 1, fn, context)) { + return FALSE; + } + } + group=nextGroup; + } + + /* enumerate within the end group (group[GROUP_MSB]==endGroupMSB) */ + if(group<groupLimit && group[GROUP_MSB]==endGroupMSB) { + return enumGroupNames(names, group, (limit-1)&~GROUP_MASK, limit-1, fn, context, nameChoice); + } else if (nameChoice == U_EXTENDED_CHAR_NAME && group == groupLimit) { + UChar32 next = (PREV_GROUP(group)[GROUP_MSB] + 1) << GROUP_SHIFT; + if (next > start) { + start = next; + } + } else { + return TRUE; + } + } + + /* we have not found a group, which means everything is made of + extended names. */ + if (nameChoice == U_EXTENDED_CHAR_NAME) { + if (limit > UCHAR_MAX_VALUE + 1) { + limit = UCHAR_MAX_VALUE + 1; + } + return enumExtNames(start, limit - 1, fn, context); + } + + return TRUE; +} + +static uint16_t +writeFactorSuffix(const uint16_t *factors, uint16_t count, + const char *s, /* suffix elements */ + uint32_t code, + uint16_t indexes[8], /* output fields from here */ + const char *elementBases[8], const char *elements[8], + char *buffer, uint16_t bufferLength) { + uint16_t i, factor, bufferPos=0; + char c; + + /* write elements according to the factors */ + + /* + * the factorized elements are determined by modulo arithmetic + * with the factors of this algorithm + * + * note that for fewer operations, count is decremented here + */ + --count; + for(i=count; i>0; --i) { + factor=factors[i]; + indexes[i]=(uint16_t)(code%factor); + code/=factor; + } + /* + * we don't need to calculate the last modulus because start<=code<=end + * guarantees here that code<=factors[0] + */ + indexes[0]=(uint16_t)code; + + /* write each element */ + for(;;) { + if(elementBases!=NULL) { + *elementBases++=s; + } + + /* skip indexes[i] strings */ + factor=indexes[i]; + while(factor>0) { + while(*s++!=0) {} + --factor; + } + if(elements!=NULL) { + *elements++=s; + } + + /* write element */ + while((c=*s++)!=0) { + WRITE_CHAR(buffer, bufferLength, bufferPos, c); + } + + /* we do not need to perform the rest of this loop for i==count - break here */ + if(i>=count) { + break; + } + + /* skip the rest of the strings for this factors[i] */ + factor=(uint16_t)(factors[i]-indexes[i]-1); + while(factor>0) { + while(*s++!=0) {} + --factor; + } + + ++i; + } + + /* zero-terminate */ + if(bufferLength>0) { + *buffer=0; + } + + return bufferPos; +} + +/* + * Important: + * Parts of findAlgName() are almost the same as some of getAlgName(). + * Fixes must be applied to both. + */ +static uint16_t +getAlgName(AlgorithmicRange *range, uint32_t code, UCharNameChoice nameChoice, + char *buffer, uint16_t bufferLength) { + uint16_t bufferPos=0; + + /* Only the normative character name can be algorithmic. */ + if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) { + /* zero-terminate */ + if(bufferLength>0) { + *buffer=0; + } + return 0; + } + + switch(range->type) { + case 0: { + /* name = prefix hex-digits */ + const char *s=(const char *)(range+1); + char c; + + uint16_t i, count; + + /* copy prefix */ + while((c=*s++)!=0) { + WRITE_CHAR(buffer, bufferLength, bufferPos, c); + } + + /* write hexadecimal code point value */ + count=range->variant; + + /* zero-terminate */ + if(count<bufferLength) { + buffer[count]=0; + } + + for(i=count; i>0;) { + if(--i<bufferLength) { + c=(char)(code&0xf); + if(c<10) { + c+='0'; + } else { + c+='A'-10; + } + buffer[i]=c; + } + code>>=4; + } + + bufferPos+=count; + break; + } + case 1: { + /* name = prefix factorized-elements */ + uint16_t indexes[8]; + const uint16_t *factors=(const uint16_t *)(range+1); + uint16_t count=range->variant; + const char *s=(const char *)(factors+count); + char c; + + /* copy prefix */ + while((c=*s++)!=0) { + WRITE_CHAR(buffer, bufferLength, bufferPos, c); + } + + bufferPos+=writeFactorSuffix(factors, count, + s, code-range->start, indexes, NULL, NULL, buffer, bufferLength); + break; + } + default: + /* undefined type */ + /* zero-terminate */ + if(bufferLength>0) { + *buffer=0; + } + break; + } + + return bufferPos; +} + +/* + * Important: enumAlgNames() and findAlgName() are almost the same. + * Any fix must be applied to both. + */ +static UBool +enumAlgNames(AlgorithmicRange *range, + UChar32 start, UChar32 limit, + UEnumCharNamesFn *fn, void *context, + UCharNameChoice nameChoice) { + char buffer[200]; + uint16_t length; + + if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) { + return TRUE; + } + + switch(range->type) { + case 0: { + char *s, *end; + char c; + + /* get the full name of the start character */ + length=getAlgName(range, (uint32_t)start, nameChoice, buffer, sizeof(buffer)); + if(length<=0) { + return TRUE; + } + + /* call the enumerator function with this first character */ + if(!fn(context, start, nameChoice, buffer, length)) { + return FALSE; + } + + /* go to the end of the name; all these names have the same length */ + end=buffer; + while(*end!=0) { + ++end; + } + + /* enumerate the rest of the names */ + while(++start<limit) { + /* increment the hexadecimal number on a character-basis */ + s=end; + for (;;) { + c=*--s; + if(('0'<=c && c<'9') || ('A'<=c && c<'F')) { + *s=(char)(c+1); + break; + } else if(c=='9') { + *s='A'; + break; + } else if(c=='F') { + *s='0'; + } + } + + if(!fn(context, start, nameChoice, buffer, length)) { + return FALSE; + } + } + break; + } + case 1: { + uint16_t indexes[8]; + const char *elementBases[8], *elements[8]; + const uint16_t *factors=(const uint16_t *)(range+1); + uint16_t count=range->variant; + const char *s=(const char *)(factors+count); + char *suffix, *t; + uint16_t prefixLength, i, idx; + + char c; + + /* name = prefix factorized-elements */ + + /* copy prefix */ + suffix=buffer; + prefixLength=0; + while((c=*s++)!=0) { + *suffix++=c; + ++prefixLength; + } + + /* append the suffix of the start character */ + length=(uint16_t)(prefixLength+writeFactorSuffix(factors, count, + s, (uint32_t)start-range->start, + indexes, elementBases, elements, + suffix, (uint16_t)(sizeof(buffer)-prefixLength))); + + /* call the enumerator function with this first character */ + if(!fn(context, start, nameChoice, buffer, length)) { + return FALSE; + } + + /* enumerate the rest of the names */ + while(++start<limit) { + /* increment the indexes in lexical order bound by the factors */ + i=count; + for (;;) { + idx=(uint16_t)(indexes[--i]+1); + if(idx<factors[i]) { + /* skip one index and its element string */ + indexes[i]=idx; + s=elements[i]; + while(*s++!=0) { + } + elements[i]=s; + break; + } else { + /* reset this index to 0 and its element string to the first one */ + indexes[i]=0; + elements[i]=elementBases[i]; + } + } + + /* to make matters a little easier, just append all elements to the suffix */ + t=suffix; + length=prefixLength; + for(i=0; i<count; ++i) { + s=elements[i]; + while((c=*s++)!=0) { + *t++=c; + ++length; + } + } + /* zero-terminate */ + *t=0; + + if(!fn(context, start, nameChoice, buffer, length)) { + return FALSE; + } + } + break; + } + default: + /* undefined type */ + break; + } + + return TRUE; +} + +/* + * findAlgName() is almost the same as enumAlgNames() except that it + * returns the code point for a name if it fits into the range. + * It returns 0xffff otherwise. + */ +static UChar32 +findAlgName(AlgorithmicRange *range, UCharNameChoice nameChoice, const char *otherName) { + UChar32 code; + + if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) { + return 0xffff; + } + + switch(range->type) { + case 0: { + /* name = prefix hex-digits */ + const char *s=(const char *)(range+1); + char c; + + uint16_t i, count; + + /* compare prefix */ + while((c=*s++)!=0) { + if((char)c!=*otherName++) { + return 0xffff; + } + } + + /* read hexadecimal code point value */ + count=range->variant; + code=0; + for(i=0; i<count; ++i) { + c=*otherName++; + if('0'<=c && c<='9') { + code=(code<<4)|(c-'0'); + } else if('A'<=c && c<='F') { + code=(code<<4)|(c-'A'+10); + } else { + return 0xffff; + } + } + + /* does it fit into the range? */ + if(*otherName==0 && range->start<=(uint32_t)code && (uint32_t)code<=range->end) { + return code; + } + break; + } + case 1: { + char buffer[64]; + uint16_t indexes[8]; + const char *elementBases[8], *elements[8]; + const uint16_t *factors=(const uint16_t *)(range+1); + uint16_t count=range->variant; + const char *s=(const char *)(factors+count), *t; + UChar32 start, limit; + uint16_t i, idx; + + char c; + + /* name = prefix factorized-elements */ + + /* compare prefix */ + while((c=*s++)!=0) { + if((char)c!=*otherName++) { + return 0xffff; + } + } + + start=(UChar32)range->start; + limit=(UChar32)(range->end+1); + + /* initialize the suffix elements for enumeration; indexes should all be set to 0 */ + writeFactorSuffix(factors, count, s, 0, + indexes, elementBases, elements, buffer, sizeof(buffer)); + + /* compare the first suffix */ + if(0==uprv_strcmp(otherName, buffer)) { + return start; + } + + /* enumerate and compare the rest of the suffixes */ + while(++start<limit) { + /* increment the indexes in lexical order bound by the factors */ + i=count; + for (;;) { + idx=(uint16_t)(indexes[--i]+1); + if(idx<factors[i]) { + /* skip one index and its element string */ + indexes[i]=idx; + s=elements[i]; + while(*s++!=0) {} + elements[i]=s; + break; + } else { + /* reset this index to 0 and its element string to the first one */ + indexes[i]=0; + elements[i]=elementBases[i]; + } + } + + /* to make matters a little easier, just compare all elements of the suffix */ + t=otherName; + for(i=0; i<count; ++i) { + s=elements[i]; + while((c=*s++)!=0) { + if(c!=*t++) { + s=""; /* does not match */ + i=99; + } + } + } + if(i<99 && *t==0) { + return start; + } + } + break; + } + default: + /* undefined type */ + break; + } + + return 0xffff; +} + +/* sets of name characters, maximum name lengths ---------------------------- */ + +#define SET_ADD(set, c) ((set)[(uint8_t)c>>5]|=((uint32_t)1<<((uint8_t)c&0x1f))) +#define SET_CONTAINS(set, c) (((set)[(uint8_t)c>>5]&((uint32_t)1<<((uint8_t)c&0x1f)))!=0) + +static int32_t +calcStringSetLength(uint32_t set[8], const char *s) { + int32_t length=0; + char c; + + while((c=*s++)!=0) { + SET_ADD(set, c); + ++length; + } + return length; +} + +static int32_t +calcAlgNameSetsLengths(int32_t maxNameLength) { + AlgorithmicRange *range; + uint32_t *p; + uint32_t rangeCount; + int32_t length; + + /* enumerate algorithmic ranges */ + p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset); + rangeCount=*p; + range=(AlgorithmicRange *)(p+1); + while(rangeCount>0) { + switch(range->type) { + case 0: + /* name = prefix + (range->variant times) hex-digits */ + /* prefix */ + length=calcStringSetLength(gNameSet, (const char *)(range+1))+range->variant; + if(length>maxNameLength) { + maxNameLength=length; + } + break; + case 1: { + /* name = prefix factorized-elements */ + const uint16_t *factors=(const uint16_t *)(range+1); + const char *s; + int32_t i, count=range->variant, factor, factorLength, maxFactorLength; + + /* prefix length */ + s=(const char *)(factors+count); + length=calcStringSetLength(gNameSet, s); + s+=length+1; /* start of factor suffixes */ + + /* get the set and maximum factor suffix length for each factor */ + for(i=0; i<count; ++i) { + maxFactorLength=0; + for(factor=factors[i]; factor>0; --factor) { + factorLength=calcStringSetLength(gNameSet, s); + s+=factorLength+1; + if(factorLength>maxFactorLength) { + maxFactorLength=factorLength; + } + } + length+=maxFactorLength; + } + + if(length>maxNameLength) { + maxNameLength=length; + } + break; + } + default: + /* unknown type */ + break; + } + + range=(AlgorithmicRange *)((uint8_t *)range+range->size); + --rangeCount; + } + return maxNameLength; +} + +static int32_t +calcExtNameSetsLengths(int32_t maxNameLength) { + int32_t i, length; + + for(i=0; i<UPRV_LENGTHOF(charCatNames); ++i) { + /* + * for each category, count the length of the category name + * plus 9= + * 2 for <> + * 1 for - + * 6 for most hex digits per code point + */ + length=9+calcStringSetLength(gNameSet, charCatNames[i]); + if(length>maxNameLength) { + maxNameLength=length; + } + } + return maxNameLength; +} + +static int32_t +calcNameSetLength(const uint16_t *tokens, uint16_t tokenCount, const uint8_t *tokenStrings, int8_t *tokenLengths, + uint32_t set[8], + const uint8_t **pLine, const uint8_t *lineLimit) { + const uint8_t *line=*pLine; + int32_t length=0, tokenLength; + uint16_t c, token; + + while(line!=lineLimit && (c=*line++)!=(uint8_t)';') { + if(c>=tokenCount) { + /* implicit letter */ + SET_ADD(set, c); + ++length; + } else { + token=tokens[c]; + if(token==(uint16_t)(-2)) { + /* this is a lead byte for a double-byte token */ + c=c<<8|*line++; + token=tokens[c]; + } + if(token==(uint16_t)(-1)) { + /* explicit letter */ + SET_ADD(set, c); + ++length; + } else { + /* count token word */ + if(tokenLengths!=NULL) { + /* use cached token length */ + tokenLength=tokenLengths[c]; + if(tokenLength==0) { + tokenLength=calcStringSetLength(set, (const char *)tokenStrings+token); + tokenLengths[c]=(int8_t)tokenLength; + } + } else { + tokenLength=calcStringSetLength(set, (const char *)tokenStrings+token); + } + length+=tokenLength; + } + } + } + + *pLine=line; + return length; +} + +static void +calcGroupNameSetsLengths(int32_t maxNameLength) { + uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2]; + + uint16_t *tokens=(uint16_t *)uCharNames+8; + uint16_t tokenCount=*tokens++; + uint8_t *tokenStrings=(uint8_t *)uCharNames+uCharNames->tokenStringOffset; + + int8_t *tokenLengths; + + const uint16_t *group; + const uint8_t *s, *line, *lineLimit; + + int32_t groupCount, lineNumber, length; + + tokenLengths=(int8_t *)uprv_malloc(tokenCount); + if(tokenLengths!=NULL) { + uprv_memset(tokenLengths, 0, tokenCount); + } + + group=GET_GROUPS(uCharNames); + groupCount=*group++; + + /* enumerate all groups */ + while(groupCount>0) { + s=(uint8_t *)uCharNames+uCharNames->groupStringOffset+GET_GROUP_OFFSET(group); + s=expandGroupLengths(s, offsets, lengths); + + /* enumerate all lines in each group */ + for(lineNumber=0; lineNumber<LINES_PER_GROUP; ++lineNumber) { + line=s+offsets[lineNumber]; + length=lengths[lineNumber]; + if(length==0) { + continue; + } + + lineLimit=line+length; + + /* read regular name */ + length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gNameSet, &line, lineLimit); + if(length>maxNameLength) { + maxNameLength=length; + } + if(line==lineLimit) { + continue; + } + + /* read Unicode 1.0 name */ + length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gNameSet, &line, lineLimit); + if(length>maxNameLength) { + maxNameLength=length; + } + if(line==lineLimit) { + continue; + } + + /* read ISO comment */ + /*length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gISOCommentSet, &line, lineLimit);*/ + } + + group=NEXT_GROUP(group); + --groupCount; + } + + if(tokenLengths!=NULL) { + uprv_free(tokenLengths); + } + + /* set gMax... - name length last for threading */ + gMaxNameLength=maxNameLength; +} + +static UBool +calcNameSetsLengths(UErrorCode *pErrorCode) { + static const char extChars[]="0123456789ABCDEF<>-"; + int32_t i, maxNameLength; + + if(gMaxNameLength!=0) { + return TRUE; + } + + if(!isDataLoaded(pErrorCode)) { + return FALSE; + } + + /* set hex digits, used in various names, and <>-, used in extended names */ + for(i=0; i<(int32_t)sizeof(extChars)-1; ++i) { + SET_ADD(gNameSet, extChars[i]); + } + + /* set sets and lengths from algorithmic names */ + maxNameLength=calcAlgNameSetsLengths(0); + + /* set sets and lengths from extended names */ + maxNameLength=calcExtNameSetsLengths(maxNameLength); + + /* set sets and lengths from group names, set global maximum values */ + calcGroupNameSetsLengths(maxNameLength); + + return TRUE; +} + +U_NAMESPACE_END + +/* public API --------------------------------------------------------------- */ + +U_NAMESPACE_USE + +U_CAPI int32_t U_EXPORT2 +u_charName(UChar32 code, UCharNameChoice nameChoice, + char *buffer, int32_t bufferLength, + UErrorCode *pErrorCode) { + AlgorithmicRange *algRange; + uint32_t *p; + uint32_t i; + int32_t length; + + /* check the argument values */ + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return 0; + } else if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT || + bufferLength<0 || (bufferLength>0 && buffer==NULL) + ) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + if((uint32_t)code>UCHAR_MAX_VALUE || !isDataLoaded(pErrorCode)) { + return u_terminateChars(buffer, bufferLength, 0, pErrorCode); + } + + length=0; + + /* try algorithmic names first */ + p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset); + i=*p; + algRange=(AlgorithmicRange *)(p+1); + while(i>0) { + if(algRange->start<=(uint32_t)code && (uint32_t)code<=algRange->end) { + length=getAlgName(algRange, (uint32_t)code, nameChoice, buffer, (uint16_t)bufferLength); + break; + } + algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size); + --i; + } + + if(i==0) { + if (nameChoice == U_EXTENDED_CHAR_NAME) { + length = getName(uCharNames, (uint32_t )code, U_EXTENDED_CHAR_NAME, buffer, (uint16_t) bufferLength); + if (!length) { + /* extended character name */ + length = getExtName((uint32_t) code, buffer, (uint16_t) bufferLength); + } + } else { + /* normal character name */ + length=getName(uCharNames, (uint32_t)code, nameChoice, buffer, (uint16_t)bufferLength); + } + } + + return u_terminateChars(buffer, bufferLength, length, pErrorCode); +} + +U_CAPI int32_t U_EXPORT2 +u_getISOComment(UChar32 /*c*/, + char *dest, int32_t destCapacity, + UErrorCode *pErrorCode) { + /* check the argument values */ + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return 0; + } else if(destCapacity<0 || (destCapacity>0 && dest==NULL)) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + return u_terminateChars(dest, destCapacity, 0, pErrorCode); +} + +U_CAPI UChar32 U_EXPORT2 +u_charFromName(UCharNameChoice nameChoice, + const char *name, + UErrorCode *pErrorCode) { + char upper[120] = {0}; + char lower[120] = {0}; + FindName findName; + AlgorithmicRange *algRange; + uint32_t *p; + uint32_t i; + UChar32 cp = 0; + char c0; + static constexpr UChar32 error = 0xffff; /* Undefined, but use this for backwards compatibility. */ + + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return error; + } + + if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT || name==NULL || *name==0) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return error; + } + + if(!isDataLoaded(pErrorCode)) { + return error; + } + + /* construct the uppercase and lowercase of the name first */ + for(i=0; i<sizeof(upper); ++i) { + if((c0=*name++)!=0) { + upper[i]=uprv_toupper(c0); + lower[i]=uprv_tolower(c0); + } else { + upper[i]=lower[i]=0; + break; + } + } + if(i==sizeof(upper)) { + /* name too long, there is no such character */ + *pErrorCode = U_ILLEGAL_CHAR_FOUND; + return error; + } + // i==strlen(name)==strlen(lower)==strlen(upper) + + /* try extended names first */ + if (lower[0] == '<') { + if (nameChoice == U_EXTENDED_CHAR_NAME && lower[--i] == '>') { + // Parse a string like "<category-HHHH>" where HHHH is a hex code point. + uint32_t limit = i; + while (i >= 3 && lower[--i] != '-') {} + + // There should be 1 to 8 hex digits. + int32_t hexLength = limit - (i + 1); + if (i >= 2 && lower[i] == '-' && 1 <= hexLength && hexLength <= 8) { + uint32_t cIdx; + + lower[i] = 0; + + for (++i; i < limit; ++i) { + if (lower[i] >= '0' && lower[i] <= '9') { + cp = (cp << 4) + lower[i] - '0'; + } else if (lower[i] >= 'a' && lower[i] <= 'f') { + cp = (cp << 4) + lower[i] - 'a' + 10; + } else { + *pErrorCode = U_ILLEGAL_CHAR_FOUND; + return error; + } + // Prevent signed-integer overflow and out-of-range code points. + if (cp > UCHAR_MAX_VALUE) { + *pErrorCode = U_ILLEGAL_CHAR_FOUND; + return error; + } + } + + /* Now validate the category name. + We could use a binary search, or a trie, if + we really wanted to. */ + uint8_t cat = getCharCat(cp); + for (lower[i] = 0, cIdx = 0; cIdx < UPRV_LENGTHOF(charCatNames); ++cIdx) { + + if (!uprv_strcmp(lower + 1, charCatNames[cIdx])) { + if (cat == cIdx) { + return cp; + } + break; + } + } + } + } + + *pErrorCode = U_ILLEGAL_CHAR_FOUND; + return error; + } + + /* try algorithmic names now */ + p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset); + i=*p; + algRange=(AlgorithmicRange *)(p+1); + while(i>0) { + if((cp=findAlgName(algRange, nameChoice, upper))!=0xffff) { + return cp; + } + algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size); + --i; + } + + /* normal character name */ + findName.otherName=upper; + findName.code=error; + enumNames(uCharNames, 0, UCHAR_MAX_VALUE + 1, DO_FIND_NAME, &findName, nameChoice); + if (findName.code == error) { + *pErrorCode = U_ILLEGAL_CHAR_FOUND; + } + return findName.code; +} + +U_CAPI void U_EXPORT2 +u_enumCharNames(UChar32 start, UChar32 limit, + UEnumCharNamesFn *fn, + void *context, + UCharNameChoice nameChoice, + UErrorCode *pErrorCode) { + AlgorithmicRange *algRange; + uint32_t *p; + uint32_t i; + + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return; + } + + if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT || fn==NULL) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + if((uint32_t) limit > UCHAR_MAX_VALUE + 1) { + limit = UCHAR_MAX_VALUE + 1; + } + if((uint32_t)start>=(uint32_t)limit) { + return; + } + + if(!isDataLoaded(pErrorCode)) { + return; + } + + /* interleave the data-driven ones with the algorithmic ones */ + /* iterate over all algorithmic ranges; assume that they are in ascending order */ + p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset); + i=*p; + algRange=(AlgorithmicRange *)(p+1); + while(i>0) { + /* enumerate the character names before the current algorithmic range */ + /* here: start<limit */ + if((uint32_t)start<algRange->start) { + if((uint32_t)limit<=algRange->start) { + enumNames(uCharNames, start, limit, fn, context, nameChoice); + return; + } + if(!enumNames(uCharNames, start, (UChar32)algRange->start, fn, context, nameChoice)) { + return; + } + start=(UChar32)algRange->start; + } + /* enumerate the character names in the current algorithmic range */ + /* here: algRange->start<=start<limit */ + if((uint32_t)start<=algRange->end) { + if((uint32_t)limit<=(algRange->end+1)) { + enumAlgNames(algRange, start, limit, fn, context, nameChoice); + return; + } + if(!enumAlgNames(algRange, start, (UChar32)algRange->end+1, fn, context, nameChoice)) { + return; + } + start=(UChar32)algRange->end+1; + } + /* continue to the next algorithmic range (here: start<limit) */ + algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size); + --i; + } + /* enumerate the character names after the last algorithmic range */ + enumNames(uCharNames, start, limit, fn, context, nameChoice); +} + +U_CAPI int32_t U_EXPORT2 +uprv_getMaxCharNameLength() { + UErrorCode errorCode=U_ZERO_ERROR; + if(calcNameSetsLengths(&errorCode)) { + return gMaxNameLength; + } else { + return 0; + } +} + +/** + * Converts the char set cset into a Unicode set uset. + * @param cset Set of 256 bit flags corresponding to a set of chars. + * @param uset USet to receive characters. Existing contents are deleted. + */ +static void +charSetToUSet(uint32_t cset[8], const USetAdder *sa) { + UChar us[256]; + char cs[256]; + + int32_t i, length; + UErrorCode errorCode; + + errorCode=U_ZERO_ERROR; + + if(!calcNameSetsLengths(&errorCode)) { + return; + } + + /* build a char string with all chars that are used in character names */ + length=0; + for(i=0; i<256; ++i) { + if(SET_CONTAINS(cset, i)) { + cs[length++]=(char)i; + } + } + + /* convert the char string to a UChar string */ + u_charsToUChars(cs, us, length); + + /* add each UChar to the USet */ + for(i=0; i<length; ++i) { + if(us[i]!=0 || cs[i]==0) { /* non-invariant chars become (UChar)0 */ + sa->add(sa->set, us[i]); + } + } +} + +/** + * Fills set with characters that are used in Unicode character names. + * @param set USet to receive characters. + */ +U_CAPI void U_EXPORT2 +uprv_getCharNameCharacters(const USetAdder *sa) { + charSetToUSet(gNameSet, sa); +} + +/* data swapping ------------------------------------------------------------ */ + +/* + * The token table contains non-negative entries for token bytes, + * and -1 for bytes that represent themselves in the data file's charset. + * -2 entries are used for lead bytes. + * + * Direct bytes (-1 entries) must be translated from the input charset family + * to the output charset family. + * makeTokenMap() writes a permutation mapping for this. + * Use it once for single-/lead-byte tokens and once more for all trail byte + * tokens. (';' is an unused trail byte marked with -1.) + */ +static void +makeTokenMap(const UDataSwapper *ds, + int16_t tokens[], uint16_t tokenCount, + uint8_t map[256], + UErrorCode *pErrorCode) { + UBool usedOutChar[256]; + uint16_t i, j; + uint8_t c1, c2; + + if(U_FAILURE(*pErrorCode)) { + return; + } + + if(ds->inCharset==ds->outCharset) { + /* Same charset family: identity permutation */ + for(i=0; i<256; ++i) { + map[i]=(uint8_t)i; + } + } else { + uprv_memset(map, 0, 256); + uprv_memset(usedOutChar, 0, 256); + + if(tokenCount>256) { + tokenCount=256; + } + + /* set the direct bytes (byte 0 always maps to itself) */ + for(i=1; i<tokenCount; ++i) { + if(tokens[i]==-1) { + /* convert the direct byte character */ + c1=(uint8_t)i; + ds->swapInvChars(ds, &c1, 1, &c2, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + udata_printError(ds, "unames/makeTokenMap() finds variant character 0x%02x used (input charset family %d)\n", + i, ds->inCharset); + return; + } + + /* enter the converted character into the map and mark it used */ + map[c1]=c2; + usedOutChar[c2]=TRUE; + } + } + + /* set the mappings for the rest of the permutation */ + for(i=j=1; i<tokenCount; ++i) { + /* set mappings that were not set for direct bytes */ + if(map[i]==0) { + /* set an output byte value that was not used as an output byte above */ + while(usedOutChar[j]) { + ++j; + } + map[i]=(uint8_t)j++; + } + } + + /* + * leave mappings at tokenCount and above unset if tokenCount<256 + * because they won't be used + */ + } +} + +U_CAPI int32_t U_EXPORT2 +uchar_swapNames(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode) { + const UDataInfo *pInfo; + int32_t headerSize; + + const uint8_t *inBytes; + uint8_t *outBytes; + + uint32_t tokenStringOffset, groupsOffset, groupStringOffset, algNamesOffset, + offset, i, count, stringsCount; + + const AlgorithmicRange *inRange; + AlgorithmicRange *outRange; + + /* udata_swapDataHeader checks the arguments */ + headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return 0; + } + + /* check data format and format version */ + pInfo=(const UDataInfo *)((const char *)inData+4); + if(!( + pInfo->dataFormat[0]==0x75 && /* dataFormat="unam" */ + pInfo->dataFormat[1]==0x6e && + pInfo->dataFormat[2]==0x61 && + pInfo->dataFormat[3]==0x6d && + pInfo->formatVersion[0]==1 + )) { + udata_printError(ds, "uchar_swapNames(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as unames.icu\n", + pInfo->dataFormat[0], pInfo->dataFormat[1], + pInfo->dataFormat[2], pInfo->dataFormat[3], + pInfo->formatVersion[0]); + *pErrorCode=U_UNSUPPORTED_ERROR; + return 0; + } + + inBytes=(const uint8_t *)inData+headerSize; + outBytes=(uint8_t *)outData+headerSize; + if(length<0) { + algNamesOffset=ds->readUInt32(((const uint32_t *)inBytes)[3]); + } else { + length-=headerSize; + if( length<20 || + (uint32_t)length<(algNamesOffset=ds->readUInt32(((const uint32_t *)inBytes)[3])) + ) { + udata_printError(ds, "uchar_swapNames(): too few bytes (%d after header) for unames.icu\n", + length); + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + } + + if(length<0) { + /* preflighting: iterate through algorithmic ranges */ + offset=algNamesOffset; + count=ds->readUInt32(*((const uint32_t *)(inBytes+offset))); + offset+=4; + + for(i=0; i<count; ++i) { + inRange=(const AlgorithmicRange *)(inBytes+offset); + offset+=ds->readUInt16(inRange->size); + } + } else { + /* swap data */ + const uint16_t *p; + uint16_t *q, *temp; + + int16_t tokens[512]; + uint16_t tokenCount; + + uint8_t map[256], trailMap[256]; + + /* copy the data for inaccessible bytes */ + if(inBytes!=outBytes) { + uprv_memcpy(outBytes, inBytes, length); + } + + /* the initial 4 offsets first */ + tokenStringOffset=ds->readUInt32(((const uint32_t *)inBytes)[0]); + groupsOffset=ds->readUInt32(((const uint32_t *)inBytes)[1]); + groupStringOffset=ds->readUInt32(((const uint32_t *)inBytes)[2]); + ds->swapArray32(ds, inBytes, 16, outBytes, pErrorCode); + + /* + * now the tokens table + * it needs to be permutated along with the compressed name strings + */ + p=(const uint16_t *)(inBytes+16); + q=(uint16_t *)(outBytes+16); + + /* read and swap the tokenCount */ + tokenCount=ds->readUInt16(*p); + ds->swapArray16(ds, p, 2, q, pErrorCode); + ++p; + ++q; + + /* read the first 512 tokens and make the token maps */ + if(tokenCount<=512) { + count=tokenCount; + } else { + count=512; + } + for(i=0; i<count; ++i) { + tokens[i]=udata_readInt16(ds, p[i]); + } + for(; i<512; ++i) { + tokens[i]=0; /* fill the rest of the tokens array if tokenCount<512 */ + } + makeTokenMap(ds, tokens, tokenCount, map, pErrorCode); + makeTokenMap(ds, tokens+256, (uint16_t)(tokenCount>256 ? tokenCount-256 : 0), trailMap, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + return 0; + } + + /* + * swap and permutate the tokens + * go through a temporary array to support in-place swapping + */ + temp=(uint16_t *)uprv_malloc(tokenCount*2); + if(temp==NULL) { + udata_printError(ds, "out of memory swapping %u unames.icu tokens\n", + tokenCount); + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + return 0; + } + + /* swap and permutate single-/lead-byte tokens */ + for(i=0; i<tokenCount && i<256; ++i) { + ds->swapArray16(ds, p+i, 2, temp+map[i], pErrorCode); + } + + /* swap and permutate trail-byte tokens */ + for(; i<tokenCount; ++i) { + ds->swapArray16(ds, p+i, 2, temp+(i&0xffffff00)+trailMap[i&0xff], pErrorCode); + } + + /* copy the result into the output and free the temporary array */ + uprv_memcpy(q, temp, tokenCount*2); + uprv_free(temp); + + /* + * swap the token strings but not a possible padding byte after + * the terminating NUL of the last string + */ + udata_swapInvStringBlock(ds, inBytes+tokenStringOffset, (int32_t)(groupsOffset-tokenStringOffset), + outBytes+tokenStringOffset, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + udata_printError(ds, "uchar_swapNames(token strings) failed\n"); + return 0; + } + + /* swap the group table */ + count=ds->readUInt16(*((const uint16_t *)(inBytes+groupsOffset))); + ds->swapArray16(ds, inBytes+groupsOffset, (int32_t)((1+count*3)*2), + outBytes+groupsOffset, pErrorCode); + + /* + * swap the group strings + * swap the string bytes but not the nibble-encoded string lengths + */ + if(ds->inCharset!=ds->outCharset) { + uint16_t offsets[LINES_PER_GROUP+1], lengths[LINES_PER_GROUP+1]; + + const uint8_t *inStrings, *nextInStrings; + uint8_t *outStrings; + + uint8_t c; + + inStrings=inBytes+groupStringOffset; + outStrings=outBytes+groupStringOffset; + + stringsCount=algNamesOffset-groupStringOffset; + + /* iterate through string groups until only a few padding bytes are left */ + while(stringsCount>32) { + nextInStrings=expandGroupLengths(inStrings, offsets, lengths); + + /* move past the length bytes */ + stringsCount-=(uint32_t)(nextInStrings-inStrings); + outStrings+=nextInStrings-inStrings; + inStrings=nextInStrings; + + count=offsets[31]+lengths[31]; /* total number of string bytes in this group */ + stringsCount-=count; + + /* swap the string bytes using map[] and trailMap[] */ + while(count>0) { + c=*inStrings++; + *outStrings++=map[c]; + if(tokens[c]!=-2) { + --count; + } else { + /* token lead byte: swap the trail byte, too */ + *outStrings++=trailMap[*inStrings++]; + count-=2; + } + } + } + } + + /* swap the algorithmic ranges */ + offset=algNamesOffset; + count=ds->readUInt32(*((const uint32_t *)(inBytes+offset))); + ds->swapArray32(ds, inBytes+offset, 4, outBytes+offset, pErrorCode); + offset+=4; + + for(i=0; i<count; ++i) { + if(offset>(uint32_t)length) { + udata_printError(ds, "uchar_swapNames(): too few bytes (%d after header) for unames.icu algorithmic range %u\n", + length, i); + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + + inRange=(const AlgorithmicRange *)(inBytes+offset); + outRange=(AlgorithmicRange *)(outBytes+offset); + offset+=ds->readUInt16(inRange->size); + + ds->swapArray32(ds, inRange, 8, outRange, pErrorCode); + ds->swapArray16(ds, &inRange->size, 2, &outRange->size, pErrorCode); + switch(inRange->type) { + case 0: + /* swap prefix string */ + ds->swapInvChars(ds, inRange+1, (int32_t)uprv_strlen((const char *)(inRange+1)), + outRange+1, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + udata_printError(ds, "uchar_swapNames(prefix string of algorithmic range %u) failed\n", + i); + return 0; + } + break; + case 1: + { + /* swap factors and the prefix and factor strings */ + uint32_t factorsCount; + + factorsCount=inRange->variant; + p=(const uint16_t *)(inRange+1); + q=(uint16_t *)(outRange+1); + ds->swapArray16(ds, p, (int32_t)(factorsCount*2), q, pErrorCode); + + /* swap the strings, up to the last terminating NUL */ + p+=factorsCount; + q+=factorsCount; + stringsCount=(uint32_t)((inBytes+offset)-(const uint8_t *)p); + while(stringsCount>0 && ((const uint8_t *)p)[stringsCount-1]!=0) { + --stringsCount; + } + ds->swapInvChars(ds, p, (int32_t)stringsCount, q, pErrorCode); + } + break; + default: + udata_printError(ds, "uchar_swapNames(): unknown type %u of algorithmic range %u\n", + inRange->type, i); + *pErrorCode=U_UNSUPPORTED_ERROR; + return 0; + } + } + } + + return headerSize+(int32_t)offset; +} + +/* + * Hey, Emacs, please set the following: + * + * Local Variables: + * indent-tabs-mode: nil + * End: + * + */ diff --git a/thirdparty/icu4c/common/unicode/appendable.h b/thirdparty/icu4c/common/unicode/appendable.h new file mode 100644 index 0000000000..fc99254de1 --- /dev/null +++ b/thirdparty/icu4c/common/unicode/appendable.h @@ -0,0 +1,239 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2011-2012, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* file name: appendable.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2010dec07 +* created by: Markus W. Scherer +*/ + +#ifndef __APPENDABLE_H__ +#define __APPENDABLE_H__ + +/** + * \file + * \brief C++ API: Appendable class: Sink for Unicode code points and 16-bit code units (char16_ts). + */ + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + +#include "unicode/uobject.h" + +U_NAMESPACE_BEGIN + +class UnicodeString; + +/** + * Base class for objects to which Unicode characters and strings can be appended. + * Combines elements of Java Appendable and ICU4C ByteSink. + * + * This class can be used in APIs where it does not matter whether the actual destination is + * a UnicodeString, a char16_t[] array, a UnicodeSet, or any other object + * that receives and processes characters and/or strings. + * + * Implementation classes must implement at least appendCodeUnit(char16_t). + * The base class provides default implementations for the other methods. + * + * The methods do not take UErrorCode parameters. + * If an error occurs (e.g., out-of-memory), + * in addition to returning false from failing operations, + * the implementation must prevent unexpected behavior (e.g., crashes) + * from further calls and should make the error condition available separately + * (e.g., store a UErrorCode, make/keep a UnicodeString bogus). + * @stable ICU 4.8 + */ +class U_COMMON_API Appendable : public UObject { +public: + /** + * Destructor. + * @stable ICU 4.8 + */ + ~Appendable(); + + /** + * Appends a 16-bit code unit. + * @param c code unit + * @return true if the operation succeeded + * @stable ICU 4.8 + */ + virtual UBool appendCodeUnit(char16_t c) = 0; + + /** + * Appends a code point. + * The default implementation calls appendCodeUnit(char16_t) once or twice. + * @param c code point 0..0x10ffff + * @return true if the operation succeeded + * @stable ICU 4.8 + */ + virtual UBool appendCodePoint(UChar32 c); + + /** + * Appends a string. + * The default implementation calls appendCodeUnit(char16_t) for each code unit. + * @param s string, must not be NULL if length!=0 + * @param length string length, or -1 if NUL-terminated + * @return true if the operation succeeded + * @stable ICU 4.8 + */ + virtual UBool appendString(const char16_t *s, int32_t length); + + /** + * Tells the object that the caller is going to append roughly + * appendCapacity char16_ts. A subclass might use this to pre-allocate + * a larger buffer if necessary. + * The default implementation does nothing. (It always returns true.) + * @param appendCapacity estimated number of char16_ts that will be appended + * @return true if the operation succeeded + * @stable ICU 4.8 + */ + virtual UBool reserveAppendCapacity(int32_t appendCapacity); + + /** + * Returns a writable buffer for appending and writes the buffer's capacity to + * *resultCapacity. Guarantees *resultCapacity>=minCapacity. + * May return a pointer to the caller-owned scratch buffer which must have + * scratchCapacity>=minCapacity. + * The returned buffer is only valid until the next operation + * on this Appendable. + * + * After writing at most *resultCapacity char16_ts, call appendString() with the + * pointer returned from this function and the number of char16_ts written. + * Many appendString() implementations will avoid copying char16_ts if this function + * returned an internal buffer. + * + * Partial usage example: + * \code + * int32_t capacity; + * char16_t* buffer = app.getAppendBuffer(..., &capacity); + * ... Write n char16_ts into buffer, with n <= capacity. + * app.appendString(buffer, n); + * \endcode + * In many implementations, that call to append will avoid copying char16_ts. + * + * If the Appendable allocates or reallocates an internal buffer, it should use + * the desiredCapacityHint if appropriate. + * If a caller cannot provide a reasonable guess at the desired capacity, + * it should pass desiredCapacityHint=0. + * + * If a non-scratch buffer is returned, the caller may only pass + * a prefix to it to appendString(). + * That is, it is not correct to pass an interior pointer to appendString(). + * + * The default implementation always returns the scratch buffer. + * + * @param minCapacity required minimum capacity of the returned buffer; + * must be non-negative + * @param desiredCapacityHint desired capacity of the returned buffer; + * must be non-negative + * @param scratch default caller-owned buffer + * @param scratchCapacity capacity of the scratch buffer + * @param resultCapacity pointer to an integer which will be set to the + * capacity of the returned buffer + * @return a buffer with *resultCapacity>=minCapacity + * @stable ICU 4.8 + */ + virtual char16_t *getAppendBuffer(int32_t minCapacity, + int32_t desiredCapacityHint, + char16_t *scratch, int32_t scratchCapacity, + int32_t *resultCapacity); +}; + +/** + * An Appendable implementation which writes to a UnicodeString. + * + * This class is not intended for public subclassing. + * @stable ICU 4.8 + */ +class U_COMMON_API UnicodeStringAppendable : public Appendable { +public: + /** + * Aliases the UnicodeString (keeps its reference) for writing. + * @param s The UnicodeString to which this Appendable will write. + * @stable ICU 4.8 + */ + explicit UnicodeStringAppendable(UnicodeString &s) : str(s) {} + + /** + * Destructor. + * @stable ICU 4.8 + */ + ~UnicodeStringAppendable(); + + /** + * Appends a 16-bit code unit to the string. + * @param c code unit + * @return true if the operation succeeded + * @stable ICU 4.8 + */ + virtual UBool appendCodeUnit(char16_t c); + + /** + * Appends a code point to the string. + * @param c code point 0..0x10ffff + * @return true if the operation succeeded + * @stable ICU 4.8 + */ + virtual UBool appendCodePoint(UChar32 c); + + /** + * Appends a string to the UnicodeString. + * @param s string, must not be NULL if length!=0 + * @param length string length, or -1 if NUL-terminated + * @return true if the operation succeeded + * @stable ICU 4.8 + */ + virtual UBool appendString(const char16_t *s, int32_t length); + + /** + * Tells the UnicodeString that the caller is going to append roughly + * appendCapacity char16_ts. + * @param appendCapacity estimated number of char16_ts that will be appended + * @return true if the operation succeeded + * @stable ICU 4.8 + */ + virtual UBool reserveAppendCapacity(int32_t appendCapacity); + + /** + * Returns a writable buffer for appending and writes the buffer's capacity to + * *resultCapacity. Guarantees *resultCapacity>=minCapacity. + * May return a pointer to the caller-owned scratch buffer which must have + * scratchCapacity>=minCapacity. + * The returned buffer is only valid until the next write operation + * on the UnicodeString. + * + * For details see Appendable::getAppendBuffer(). + * + * @param minCapacity required minimum capacity of the returned buffer; + * must be non-negative + * @param desiredCapacityHint desired capacity of the returned buffer; + * must be non-negative + * @param scratch default caller-owned buffer + * @param scratchCapacity capacity of the scratch buffer + * @param resultCapacity pointer to an integer which will be set to the + * capacity of the returned buffer + * @return a buffer with *resultCapacity>=minCapacity + * @stable ICU 4.8 + */ + virtual char16_t *getAppendBuffer(int32_t minCapacity, + int32_t desiredCapacityHint, + char16_t *scratch, int32_t scratchCapacity, + int32_t *resultCapacity); + +private: + UnicodeString &str; +}; + +U_NAMESPACE_END + +#endif /* U_SHOW_CPLUSPLUS_API */ + +#endif // __APPENDABLE_H__ diff --git a/thirdparty/icu4c/common/unicode/brkiter.h b/thirdparty/icu4c/common/unicode/brkiter.h new file mode 100644 index 0000000000..9bba5fcccc --- /dev/null +++ b/thirdparty/icu4c/common/unicode/brkiter.h @@ -0,0 +1,670 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************** +* Copyright (C) 1997-2016, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************** +* +* File brkiter.h +* +* Modification History: +* +* Date Name Description +* 02/18/97 aliu Added typedef for TextCount. Made DONE const. +* 05/07/97 aliu Fixed DLL declaration. +* 07/09/97 jfitz Renamed BreakIterator and interface synced with JDK +* 08/11/98 helena Sync-up JDK1.2. +* 01/13/2000 helena Added UErrorCode parameter to createXXXInstance methods. +******************************************************************************** +*/ + +#ifndef BRKITER_H +#define BRKITER_H + +#include "unicode/utypes.h" + +/** + * \file + * \brief C++ API: Break Iterator. + */ + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + +#if UCONFIG_NO_BREAK_ITERATION + +U_NAMESPACE_BEGIN + +/* + * Allow the declaration of APIs with pointers to BreakIterator + * even when break iteration is removed from the build. + */ +class BreakIterator; + +U_NAMESPACE_END + +#else + +#include "unicode/uobject.h" +#include "unicode/unistr.h" +#include "unicode/chariter.h" +#include "unicode/locid.h" +#include "unicode/ubrk.h" +#include "unicode/strenum.h" +#include "unicode/utext.h" +#include "unicode/umisc.h" + +U_NAMESPACE_BEGIN + +/** + * The BreakIterator class implements methods for finding the location + * of boundaries in text. BreakIterator is an abstract base class. + * Instances of BreakIterator maintain a current position and scan over + * text returning the index of characters where boundaries occur. + * <p> + * Line boundary analysis determines where a text string can be broken + * when line-wrapping. The mechanism correctly handles punctuation and + * hyphenated words. + * <p> + * Sentence boundary analysis allows selection with correct + * interpretation of periods within numbers and abbreviations, and + * trailing punctuation marks such as quotation marks and parentheses. + * <p> + * Word boundary analysis is used by search and replace functions, as + * well as within text editing applications that allow the user to + * select words with a double click. Word selection provides correct + * interpretation of punctuation marks within and following + * words. Characters that are not part of a word, such as symbols or + * punctuation marks, have word-breaks on both sides. + * <p> + * Character boundary analysis allows users to interact with + * characters as they expect to, for example, when moving the cursor + * through a text string. Character boundary analysis provides correct + * navigation of through character strings, regardless of how the + * character is stored. For example, an accented character might be + * stored as a base character and a diacritical mark. What users + * consider to be a character can differ between languages. + * <p> + * The text boundary positions are found according to the rules + * described in Unicode Standard Annex #29, Text Boundaries, and + * Unicode Standard Annex #14, Line Breaking Properties. These + * are available at http://www.unicode.org/reports/tr14/ and + * http://www.unicode.org/reports/tr29/. + * <p> + * In addition to the C++ API defined in this header file, a + * plain C API with equivalent functionality is defined in the + * file ubrk.h + * <p> + * Code snippets illustrating the use of the Break Iterator APIs + * are available in the ICU User Guide, + * http://icu-project.org/userguide/boundaryAnalysis.html + * and in the sample program icu/source/samples/break/break.cpp + * + */ +class U_COMMON_API BreakIterator : public UObject { +public: + /** + * destructor + * @stable ICU 2.0 + */ + virtual ~BreakIterator(); + + /** + * Return true if another object is semantically equal to this + * one. The other object should be an instance of the same subclass of + * BreakIterator. Objects of different subclasses are considered + * unequal. + * <P> + * Return true if this BreakIterator is at the same position in the + * same text, and is the same class and type (word, line, etc.) of + * BreakIterator, as the argument. Text is considered the same if + * it contains the same characters, it need not be the same + * object, and styles are not considered. + * @stable ICU 2.0 + */ + virtual UBool operator==(const BreakIterator&) const = 0; + + /** + * Returns the complement of the result of operator== + * @param rhs The BreakIterator to be compared for inequality + * @return the complement of the result of operator== + * @stable ICU 2.0 + */ + UBool operator!=(const BreakIterator& rhs) const { return !operator==(rhs); } + + /** + * Return a polymorphic copy of this object. This is an abstract + * method which subclasses implement. + * @stable ICU 2.0 + */ + virtual BreakIterator* clone() const = 0; + + /** + * Return a polymorphic class ID for this object. Different subclasses + * will return distinct unequal values. + * @stable ICU 2.0 + */ + virtual UClassID getDynamicClassID(void) const = 0; + + /** + * Return a CharacterIterator over the text being analyzed. + * @stable ICU 2.0 + */ + virtual CharacterIterator& getText(void) const = 0; + + + /** + * Get a UText for the text being analyzed. + * The returned UText is a shallow clone of the UText used internally + * by the break iterator implementation. It can safely be used to + * access the text without impacting any break iterator operations, + * but the underlying text itself must not be altered. + * + * @param fillIn A UText to be filled in. If NULL, a new UText will be + * allocated to hold the result. + * @param status receives any error codes. + * @return The current UText for this break iterator. If an input + * UText was provided, it will always be returned. + * @stable ICU 3.4 + */ + virtual UText *getUText(UText *fillIn, UErrorCode &status) const = 0; + + /** + * Change the text over which this operates. The text boundary is + * reset to the start. + * + * The BreakIterator will retain a reference to the supplied string. + * The caller must not modify or delete the text while the BreakIterator + * retains the reference. + * + * @param text The UnicodeString used to change the text. + * @stable ICU 2.0 + */ + virtual void setText(const UnicodeString &text) = 0; + + /** + * Reset the break iterator to operate over the text represented by + * the UText. The iterator position is reset to the start. + * + * This function makes a shallow clone of the supplied UText. This means + * that the caller is free to immediately close or otherwise reuse the + * Utext that was passed as a parameter, but that the underlying text itself + * must not be altered while being referenced by the break iterator. + * + * All index positions returned by break iterator functions are + * native indices from the UText. For example, when breaking UTF-8 + * encoded text, the break positions returned by next(), previous(), etc. + * will be UTF-8 string indices, not UTF-16 positions. + * + * @param text The UText used to change the text. + * @param status receives any error codes. + * @stable ICU 3.4 + */ + virtual void setText(UText *text, UErrorCode &status) = 0; + + /** + * Change the text over which this operates. The text boundary is + * reset to the start. + * Note that setText(UText *) provides similar functionality to this function, + * and is more efficient. + * @param it The CharacterIterator used to change the text. + * @stable ICU 2.0 + */ + virtual void adoptText(CharacterIterator* it) = 0; + + enum { + /** + * DONE is returned by previous() and next() after all valid + * boundaries have been returned. + * @stable ICU 2.0 + */ + DONE = (int32_t)-1 + }; + + /** + * Sets the current iteration position to the beginning of the text, position zero. + * @return The offset of the beginning of the text, zero. + * @stable ICU 2.0 + */ + virtual int32_t first(void) = 0; + + /** + * Set the iterator position to the index immediately BEYOND the last character in the text being scanned. + * @return The index immediately BEYOND the last character in the text being scanned. + * @stable ICU 2.0 + */ + virtual int32_t last(void) = 0; + + /** + * Set the iterator position to the boundary preceding the current boundary. + * @return The character index of the previous text boundary or DONE if all + * boundaries have been returned. + * @stable ICU 2.0 + */ + virtual int32_t previous(void) = 0; + + /** + * Advance the iterator to the boundary following the current boundary. + * @return The character index of the next text boundary or DONE if all + * boundaries have been returned. + * @stable ICU 2.0 + */ + virtual int32_t next(void) = 0; + + /** + * Return character index of the current iterator position within the text. + * @return The boundary most recently returned. + * @stable ICU 2.0 + */ + virtual int32_t current(void) const = 0; + + /** + * Advance the iterator to the first boundary following the specified offset. + * The value returned is always greater than the offset or + * the value BreakIterator.DONE + * @param offset the offset to begin scanning. + * @return The first boundary after the specified offset. + * @stable ICU 2.0 + */ + virtual int32_t following(int32_t offset) = 0; + + /** + * Set the iterator position to the first boundary preceding the specified offset. + * The value returned is always smaller than the offset or + * the value BreakIterator.DONE + * @param offset the offset to begin scanning. + * @return The first boundary before the specified offset. + * @stable ICU 2.0 + */ + virtual int32_t preceding(int32_t offset) = 0; + + /** + * Return true if the specified position is a boundary position. + * As a side effect, the current position of the iterator is set + * to the first boundary position at or following the specified offset. + * @param offset the offset to check. + * @return True if "offset" is a boundary position. + * @stable ICU 2.0 + */ + virtual UBool isBoundary(int32_t offset) = 0; + + /** + * Set the iterator position to the nth boundary from the current boundary + * @param n the number of boundaries to move by. A value of 0 + * does nothing. Negative values move to previous boundaries + * and positive values move to later boundaries. + * @return The new iterator position, or + * DONE if there are fewer than |n| boundaries in the specified direction. + * @stable ICU 2.0 + */ + virtual int32_t next(int32_t n) = 0; + + /** + * For RuleBasedBreakIterators, return the status tag from the break rule + * that determined the boundary at the current iteration position. + * <p> + * For break iterator types that do not support a rule status, + * a default value of 0 is returned. + * <p> + * @return the status from the break rule that determined the boundary at + * the current iteration position. + * @see RuleBaseBreakIterator::getRuleStatus() + * @see UWordBreak + * @stable ICU 52 + */ + virtual int32_t getRuleStatus() const; + + /** + * For RuleBasedBreakIterators, get the status (tag) values from the break rule(s) + * that determined the boundary at the current iteration position. + * <p> + * For break iterator types that do not support rule status, + * no values are returned. + * <p> + * The returned status value(s) are stored into an array provided by the caller. + * The values are stored in sorted (ascending) order. + * If the capacity of the output array is insufficient to hold the data, + * the output will be truncated to the available length, and a + * U_BUFFER_OVERFLOW_ERROR will be signaled. + * <p> + * @see RuleBaseBreakIterator::getRuleStatusVec + * + * @param fillInVec an array to be filled in with the status values. + * @param capacity the length of the supplied vector. A length of zero causes + * the function to return the number of status values, in the + * normal way, without attempting to store any values. + * @param status receives error codes. + * @return The number of rule status values from rules that determined + * the boundary at the current iteration position. + * In the event of a U_BUFFER_OVERFLOW_ERROR, the return value + * is the total number of status values that were available, + * not the reduced number that were actually returned. + * @see getRuleStatus + * @stable ICU 52 + */ + virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status); + + /** + * Create BreakIterator for word-breaks using the given locale. + * Returns an instance of a BreakIterator implementing word breaks. + * WordBreak is useful for word selection (ex. double click) + * @param where the locale. + * @param status the error code + * @return A BreakIterator for word-breaks. The UErrorCode& status + * parameter is used to return status information to the user. + * To check whether the construction succeeded or not, you should check + * the value of U_SUCCESS(err). If you wish more detailed information, you + * can check for informational error results which still indicate success. + * U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For + * example, 'de_CH' was requested, but nothing was found there, so 'de' was + * used. U_USING_DEFAULT_WARNING indicates that the default locale data was + * used; neither the requested locale nor any of its fall back locales + * could be found. + * The caller owns the returned object and is responsible for deleting it. + * @stable ICU 2.0 + */ + static BreakIterator* U_EXPORT2 + createWordInstance(const Locale& where, UErrorCode& status); + + /** + * Create BreakIterator for line-breaks using specified locale. + * Returns an instance of a BreakIterator implementing line breaks. Line + * breaks are logically possible line breaks, actual line breaks are + * usually determined based on display width. + * LineBreak is useful for word wrapping text. + * @param where the locale. + * @param status The error code. + * @return A BreakIterator for line-breaks. The UErrorCode& status + * parameter is used to return status information to the user. + * To check whether the construction succeeded or not, you should check + * the value of U_SUCCESS(err). If you wish more detailed information, you + * can check for informational error results which still indicate success. + * U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For + * example, 'de_CH' was requested, but nothing was found there, so 'de' was + * used. U_USING_DEFAULT_WARNING indicates that the default locale data was + * used; neither the requested locale nor any of its fall back locales + * could be found. + * The caller owns the returned object and is responsible for deleting it. + * @stable ICU 2.0 + */ + static BreakIterator* U_EXPORT2 + createLineInstance(const Locale& where, UErrorCode& status); + + /** + * Create BreakIterator for character-breaks using specified locale + * Returns an instance of a BreakIterator implementing character breaks. + * Character breaks are boundaries of combining character sequences. + * @param where the locale. + * @param status The error code. + * @return A BreakIterator for character-breaks. The UErrorCode& status + * parameter is used to return status information to the user. + * To check whether the construction succeeded or not, you should check + * the value of U_SUCCESS(err). If you wish more detailed information, you + * can check for informational error results which still indicate success. + * U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For + * example, 'de_CH' was requested, but nothing was found there, so 'de' was + * used. U_USING_DEFAULT_WARNING indicates that the default locale data was + * used; neither the requested locale nor any of its fall back locales + * could be found. + * The caller owns the returned object and is responsible for deleting it. + * @stable ICU 2.0 + */ + static BreakIterator* U_EXPORT2 + createCharacterInstance(const Locale& where, UErrorCode& status); + + /** + * Create BreakIterator for sentence-breaks using specified locale + * Returns an instance of a BreakIterator implementing sentence breaks. + * @param where the locale. + * @param status The error code. + * @return A BreakIterator for sentence-breaks. The UErrorCode& status + * parameter is used to return status information to the user. + * To check whether the construction succeeded or not, you should check + * the value of U_SUCCESS(err). If you wish more detailed information, you + * can check for informational error results which still indicate success. + * U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For + * example, 'de_CH' was requested, but nothing was found there, so 'de' was + * used. U_USING_DEFAULT_WARNING indicates that the default locale data was + * used; neither the requested locale nor any of its fall back locales + * could be found. + * The caller owns the returned object and is responsible for deleting it. + * @stable ICU 2.0 + */ + static BreakIterator* U_EXPORT2 + createSentenceInstance(const Locale& where, UErrorCode& status); + +#ifndef U_HIDE_DEPRECATED_API + /** + * Create BreakIterator for title-casing breaks using the specified locale + * Returns an instance of a BreakIterator implementing title breaks. + * The iterator returned locates title boundaries as described for + * Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration, + * please use a word boundary iterator. See {@link #createWordInstance }. + * + * @param where the locale. + * @param status The error code. + * @return A BreakIterator for title-breaks. The UErrorCode& status + * parameter is used to return status information to the user. + * To check whether the construction succeeded or not, you should check + * the value of U_SUCCESS(err). If you wish more detailed information, you + * can check for informational error results which still indicate success. + * U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For + * example, 'de_CH' was requested, but nothing was found there, so 'de' was + * used. U_USING_DEFAULT_WARNING indicates that the default locale data was + * used; neither the requested locale nor any of its fall back locales + * could be found. + * The caller owns the returned object and is responsible for deleting it. + * @deprecated ICU 64 Use createWordInstance instead. + */ + static BreakIterator* U_EXPORT2 + createTitleInstance(const Locale& where, UErrorCode& status); +#endif /* U_HIDE_DEPRECATED_API */ + + /** + * Get the set of Locales for which TextBoundaries are installed. + * <p><b>Note:</b> this will not return locales added through the register + * call. To see the registered locales too, use the getAvailableLocales + * function that returns a StringEnumeration object </p> + * @param count the output parameter of number of elements in the locale list + * @return available locales + * @stable ICU 2.0 + */ + static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count); + + /** + * Get name of the object for the desired Locale, in the desired language. + * @param objectLocale must be from getAvailableLocales. + * @param displayLocale specifies the desired locale for output. + * @param name the fill-in parameter of the return value + * Uses best match. + * @return user-displayable name + * @stable ICU 2.0 + */ + static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale, + const Locale& displayLocale, + UnicodeString& name); + + /** + * Get name of the object for the desired Locale, in the language of the + * default locale. + * @param objectLocale must be from getMatchingLocales + * @param name the fill-in parameter of the return value + * @return user-displayable name + * @stable ICU 2.0 + */ + static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale, + UnicodeString& name); + +#ifndef U_FORCE_HIDE_DEPRECATED_API + /** + * Deprecated functionality. Use clone() instead. + * + * Thread safe client-buffer-based cloning operation + * Do NOT call delete on a safeclone, since 'new' is not used to create it. + * @param stackBuffer user allocated space for the new clone. If NULL new memory will be allocated. + * If buffer is not large enough, new memory will be allocated. + * @param BufferSize reference to size of allocated space. + * If BufferSize == 0, a sufficient size for use in cloning will + * be returned ('pre-flighting') + * If BufferSize is not enough for a stack-based safe clone, + * new memory will be allocated. + * @param status to indicate whether the operation went on smoothly or there were errors + * An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any allocations were + * necessary. + * @return pointer to the new clone + * + * @deprecated ICU 52. Use clone() instead. + */ + virtual BreakIterator * createBufferClone(void *stackBuffer, + int32_t &BufferSize, + UErrorCode &status) = 0; +#endif // U_FORCE_HIDE_DEPRECATED_API + +#ifndef U_HIDE_DEPRECATED_API + + /** + * Determine whether the BreakIterator was created in user memory by + * createBufferClone(), and thus should not be deleted. Such objects + * must be closed by an explicit call to the destructor (not delete). + * @deprecated ICU 52. Always delete the BreakIterator. + */ + inline UBool isBufferClone(void); + +#endif /* U_HIDE_DEPRECATED_API */ + +#if !UCONFIG_NO_SERVICE + /** + * Register a new break iterator of the indicated kind, to use in the given locale. + * The break iterator will be adopted. Clones of the iterator will be returned + * if a request for a break iterator of the given kind matches or falls back to + * this locale. + * Because ICU may choose to cache BreakIterators internally, this must + * be called at application startup, prior to any calls to + * BreakIterator::createXXXInstance to avoid undefined behavior. + * @param toAdopt the BreakIterator instance to be adopted + * @param locale the Locale for which this instance is to be registered + * @param kind the type of iterator for which this instance is to be registered + * @param status the in/out status code, no special meanings are assigned + * @return a registry key that can be used to unregister this instance + * @stable ICU 2.4 + */ + static URegistryKey U_EXPORT2 registerInstance(BreakIterator* toAdopt, + const Locale& locale, + UBreakIteratorType kind, + UErrorCode& status); + + /** + * Unregister a previously-registered BreakIterator using the key returned from the + * register call. Key becomes invalid after a successful call and should not be used again. + * The BreakIterator corresponding to the key will be deleted. + * Because ICU may choose to cache BreakIterators internally, this should + * be called during application shutdown, after all calls to + * BreakIterator::createXXXInstance to avoid undefined behavior. + * @param key the registry key returned by a previous call to registerInstance + * @param status the in/out status code, no special meanings are assigned + * @return true if the iterator for the key was successfully unregistered + * @stable ICU 2.4 + */ + static UBool U_EXPORT2 unregister(URegistryKey key, UErrorCode& status); + + /** + * Return a StringEnumeration over the locales available at the time of the call, + * including registered locales. + * @return a StringEnumeration over the locales available at the time of the call + * @stable ICU 2.4 + */ + static StringEnumeration* U_EXPORT2 getAvailableLocales(void); +#endif + + /** + * Returns the locale for this break iterator. Two flavors are available: valid and + * actual locale. + * @stable ICU 2.8 + */ + Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const; + +#ifndef U_HIDE_INTERNAL_API + /** Get the locale for this break iterator object. You can choose between valid and actual locale. + * @param type type of the locale we're looking for (valid or actual) + * @param status error code for the operation + * @return the locale + * @internal + */ + const char *getLocaleID(ULocDataLocaleType type, UErrorCode& status) const; +#endif /* U_HIDE_INTERNAL_API */ + + /** + * Set the subject text string upon which the break iterator is operating + * without changing any other aspect of the matching state. + * The new and previous text strings must have the same content. + * + * This function is intended for use in environments where ICU is operating on + * strings that may move around in memory. It provides a mechanism for notifying + * ICU that the string has been relocated, and providing a new UText to access the + * string in its new position. + * + * Note that the break iterator implementation never copies the underlying text + * of a string being processed, but always operates directly on the original text + * provided by the user. Refreshing simply drops the references to the old text + * and replaces them with references to the new. + * + * Caution: this function is normally used only by very specialized, + * system-level code. One example use case is with garbage collection that moves + * the text in memory. + * + * @param input The new (moved) text string. + * @param status Receives errors detected by this function. + * @return *this + * + * @stable ICU 49 + */ + virtual BreakIterator &refreshInputText(UText *input, UErrorCode &status) = 0; + + private: + static BreakIterator* buildInstance(const Locale& loc, const char *type, UErrorCode& status); + static BreakIterator* createInstance(const Locale& loc, int32_t kind, UErrorCode& status); + static BreakIterator* makeInstance(const Locale& loc, int32_t kind, UErrorCode& status); + + friend class ICUBreakIteratorFactory; + friend class ICUBreakIteratorService; + +protected: + // Do not enclose protected default/copy constructors with #ifndef U_HIDE_INTERNAL_API + // or else the compiler will create a public ones. + /** @internal */ + BreakIterator(); + /** @internal */ + BreakIterator (const BreakIterator &other); +#ifndef U_HIDE_INTERNAL_API + /** @internal */ + BreakIterator (const Locale& valid, const Locale &actual); + /** @internal. Assignment Operator, used by RuleBasedBreakIterator. */ + BreakIterator &operator = (const BreakIterator &other); +#endif /* U_HIDE_INTERNAL_API */ + +private: + + /** @internal (private) */ + char actualLocale[ULOC_FULLNAME_CAPACITY]; + char validLocale[ULOC_FULLNAME_CAPACITY]; +}; + +#ifndef U_HIDE_DEPRECATED_API + +inline UBool BreakIterator::isBufferClone() +{ + return false; +} + +#endif /* U_HIDE_DEPRECATED_API */ + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ + +#endif /* U_SHOW_CPLUSPLUS_API */ + +#endif // BRKITER_H +//eof diff --git a/thirdparty/icu4c/common/unicode/bytestream.h b/thirdparty/icu4c/common/unicode/bytestream.h new file mode 100644 index 0000000000..044f7a77e7 --- /dev/null +++ b/thirdparty/icu4c/common/unicode/bytestream.h @@ -0,0 +1,309 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +// Copyright (C) 2009-2012, International Business Machines +// Corporation and others. All Rights Reserved. +// +// Copyright 2007 Google Inc. All Rights Reserved. +// Author: sanjay@google.com (Sanjay Ghemawat) +// +// Abstract interface that consumes a sequence of bytes (ByteSink). +// +// Used so that we can write a single piece of code that can operate +// on a variety of output string types. +// +// Various implementations of this interface are provided: +// ByteSink: +// CheckedArrayByteSink Write to a flat array, with bounds checking +// StringByteSink Write to an STL string + +// This code is a contribution of Google code, and the style used here is +// a compromise between the original Google code and the ICU coding guidelines. +// For example, data types are ICU-ified (size_t,int->int32_t), +// and API comments doxygen-ified, but function names and behavior are +// as in the original, if possible. +// Assertion-style error handling, not available in ICU, was changed to +// parameter "pinning" similar to UnicodeString. +// +// In addition, this is only a partial port of the original Google code, +// limited to what was needed so far. The (nearly) complete original code +// is in the ICU svn repository at icuhtml/trunk/design/strings/contrib +// (see ICU ticket 6765, r25517). + +#ifndef __BYTESTREAM_H__ +#define __BYTESTREAM_H__ + +/** + * \file + * \brief C++ API: Interface for writing bytes, and implementation classes. + */ + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + +#include "unicode/uobject.h" +#include "unicode/std_string.h" + +U_NAMESPACE_BEGIN + +/** + * A ByteSink can be filled with bytes. + * @stable ICU 4.2 + */ +class U_COMMON_API ByteSink : public UMemory { +public: + /** + * Default constructor. + * @stable ICU 4.2 + */ + ByteSink() { } + /** + * Virtual destructor. + * @stable ICU 4.2 + */ + virtual ~ByteSink(); + + /** + * Append "bytes[0,n-1]" to this. + * @param bytes the pointer to the bytes + * @param n the number of bytes; must be non-negative + * @stable ICU 4.2 + */ + virtual void Append(const char* bytes, int32_t n) = 0; + +#ifndef U_HIDE_DRAFT_API + /** + * Appends n bytes to this. Same as Append(). + * Call AppendU8() with u8"string literals" which are const char * in C++11 + * but const char8_t * in C++20. + * If the compiler does support char8_t as a distinct type, + * then an AppendU8() overload for that is defined and will be chosen. + * + * @param bytes the pointer to the bytes + * @param n the number of bytes; must be non-negative + * @draft ICU 67 + */ + inline void AppendU8(const char* bytes, int32_t n) { + Append(bytes, n); + } + +#if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN) + /** + * Appends n bytes to this. Same as Append() but for a const char8_t * pointer. + * Call AppendU8() with u8"string literals" which are const char * in C++11 + * but const char8_t * in C++20. + * If the compiler does support char8_t as a distinct type, + * then this AppendU8() overload for that is defined and will be chosen. + * + * @param bytes the pointer to the bytes + * @param n the number of bytes; must be non-negative + * @draft ICU 67 + */ + inline void AppendU8(const char8_t* bytes, int32_t n) { + Append(reinterpret_cast<const char*>(bytes), n); + } +#endif +#endif // U_HIDE_DRAFT_API + + /** + * Returns a writable buffer for appending and writes the buffer's capacity to + * *result_capacity. Guarantees *result_capacity>=min_capacity. + * May return a pointer to the caller-owned scratch buffer which must have + * scratch_capacity>=min_capacity. + * The returned buffer is only valid until the next operation + * on this ByteSink. + * + * After writing at most *result_capacity bytes, call Append() with the + * pointer returned from this function and the number of bytes written. + * Many Append() implementations will avoid copying bytes if this function + * returned an internal buffer. + * + * Partial usage example: + * int32_t capacity; + * char* buffer = sink->GetAppendBuffer(..., &capacity); + * ... Write n bytes into buffer, with n <= capacity. + * sink->Append(buffer, n); + * In many implementations, that call to Append will avoid copying bytes. + * + * If the ByteSink allocates or reallocates an internal buffer, it should use + * the desired_capacity_hint if appropriate. + * If a caller cannot provide a reasonable guess at the desired capacity, + * it should pass desired_capacity_hint=0. + * + * If a non-scratch buffer is returned, the caller may only pass + * a prefix to it to Append(). + * That is, it is not correct to pass an interior pointer to Append(). + * + * The default implementation always returns the scratch buffer. + * + * @param min_capacity required minimum capacity of the returned buffer; + * must be non-negative + * @param desired_capacity_hint desired capacity of the returned buffer; + * must be non-negative + * @param scratch default caller-owned buffer + * @param scratch_capacity capacity of the scratch buffer + * @param result_capacity pointer to an integer which will be set to the + * capacity of the returned buffer + * @return a buffer with *result_capacity>=min_capacity + * @stable ICU 4.2 + */ + virtual char* GetAppendBuffer(int32_t min_capacity, + int32_t desired_capacity_hint, + char* scratch, int32_t scratch_capacity, + int32_t* result_capacity); + + /** + * Flush internal buffers. + * Some byte sinks use internal buffers or provide buffering + * and require calling Flush() at the end of the stream. + * The ByteSink should be ready for further Append() calls after Flush(). + * The default implementation of Flush() does nothing. + * @stable ICU 4.2 + */ + virtual void Flush(); + +private: + ByteSink(const ByteSink &) = delete; + ByteSink &operator=(const ByteSink &) = delete; +}; + +// ------------------------------------------------------------- +// Some standard implementations + +/** + * Implementation of ByteSink that writes to a flat byte array, + * with bounds-checking: + * This sink will not write more than capacity bytes to outbuf. + * If more than capacity bytes are Append()ed, then excess bytes are ignored, + * and Overflowed() will return true. + * Overflow does not cause a runtime error. + * @stable ICU 4.2 + */ +class U_COMMON_API CheckedArrayByteSink : public ByteSink { +public: + /** + * Constructs a ByteSink that will write to outbuf[0..capacity-1]. + * @param outbuf buffer to write to + * @param capacity size of the buffer + * @stable ICU 4.2 + */ + CheckedArrayByteSink(char* outbuf, int32_t capacity); + /** + * Destructor. + * @stable ICU 4.2 + */ + virtual ~CheckedArrayByteSink(); + /** + * Returns the sink to its original state, without modifying the buffer. + * Useful for reusing both the buffer and the sink for multiple streams. + * Resets the state to NumberOfBytesWritten()=NumberOfBytesAppended()=0 + * and Overflowed()=false. + * @return *this + * @stable ICU 4.6 + */ + virtual CheckedArrayByteSink& Reset(); + /** + * Append "bytes[0,n-1]" to this. + * @param bytes the pointer to the bytes + * @param n the number of bytes; must be non-negative + * @stable ICU 4.2 + */ + virtual void Append(const char* bytes, int32_t n); + /** + * Returns a writable buffer for appending and writes the buffer's capacity to + * *result_capacity. For details see the base class documentation. + * @param min_capacity required minimum capacity of the returned buffer; + * must be non-negative + * @param desired_capacity_hint desired capacity of the returned buffer; + * must be non-negative + * @param scratch default caller-owned buffer + * @param scratch_capacity capacity of the scratch buffer + * @param result_capacity pointer to an integer which will be set to the + * capacity of the returned buffer + * @return a buffer with *result_capacity>=min_capacity + * @stable ICU 4.2 + */ + virtual char* GetAppendBuffer(int32_t min_capacity, + int32_t desired_capacity_hint, + char* scratch, int32_t scratch_capacity, + int32_t* result_capacity); + /** + * Returns the number of bytes actually written to the sink. + * @return number of bytes written to the buffer + * @stable ICU 4.2 + */ + int32_t NumberOfBytesWritten() const { return size_; } + /** + * Returns true if any bytes were discarded, i.e., if there was an + * attempt to write more than 'capacity' bytes. + * @return true if more than 'capacity' bytes were Append()ed + * @stable ICU 4.2 + */ + UBool Overflowed() const { return overflowed_; } + /** + * Returns the number of bytes appended to the sink. + * If Overflowed() then NumberOfBytesAppended()>NumberOfBytesWritten() + * else they return the same number. + * @return number of bytes written to the buffer + * @stable ICU 4.6 + */ + int32_t NumberOfBytesAppended() const { return appended_; } +private: + char* outbuf_; + const int32_t capacity_; + int32_t size_; + int32_t appended_; + UBool overflowed_; + + CheckedArrayByteSink() = delete; + CheckedArrayByteSink(const CheckedArrayByteSink &) = delete; + CheckedArrayByteSink &operator=(const CheckedArrayByteSink &) = delete; +}; + +/** + * Implementation of ByteSink that writes to a "string". + * The StringClass is usually instantiated with a std::string. + * @stable ICU 4.2 + */ +template<typename StringClass> +class StringByteSink : public ByteSink { + public: + /** + * Constructs a ByteSink that will append bytes to the dest string. + * @param dest pointer to string object to append to + * @stable ICU 4.2 + */ + StringByteSink(StringClass* dest) : dest_(dest) { } + /** + * Constructs a ByteSink that reserves append capacity and will append bytes to the dest string. + * + * @param dest pointer to string object to append to + * @param initialAppendCapacity capacity beyond dest->length() to be reserve()d + * @stable ICU 60 + */ + StringByteSink(StringClass* dest, int32_t initialAppendCapacity) : dest_(dest) { + if (initialAppendCapacity > 0 && + (uint32_t)initialAppendCapacity > (dest->capacity() - dest->length())) { + dest->reserve(dest->length() + initialAppendCapacity); + } + } + /** + * Append "bytes[0,n-1]" to this. + * @param data the pointer to the bytes + * @param n the number of bytes; must be non-negative + * @stable ICU 4.2 + */ + virtual void Append(const char* data, int32_t n) { dest_->append(data, n); } + private: + StringClass* dest_; + + StringByteSink() = delete; + StringByteSink(const StringByteSink &) = delete; + StringByteSink &operator=(const StringByteSink &) = delete; +}; + +U_NAMESPACE_END + +#endif /* U_SHOW_CPLUSPLUS_API */ + +#endif // __BYTESTREAM_H__ diff --git a/thirdparty/icu4c/common/unicode/bytestrie.h b/thirdparty/icu4c/common/unicode/bytestrie.h new file mode 100644 index 0000000000..85f802df42 --- /dev/null +++ b/thirdparty/icu4c/common/unicode/bytestrie.h @@ -0,0 +1,565 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2010-2012, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* file name: bytestrie.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2010sep25 +* created by: Markus W. Scherer +*/ + +#ifndef __BYTESTRIE_H__ +#define __BYTESTRIE_H__ + +/** + * \file + * \brief C++ API: Trie for mapping byte sequences to integer values. + */ + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + +#include "unicode/stringpiece.h" +#include "unicode/uobject.h" +#include "unicode/ustringtrie.h" + +U_NAMESPACE_BEGIN + +class ByteSink; +class BytesTrieBuilder; +class CharString; +class UVector32; + +/** + * Light-weight, non-const reader class for a BytesTrie. + * Traverses a byte-serialized data structure with minimal state, + * for mapping byte sequences to non-negative integer values. + * + * This class owns the serialized trie data only if it was constructed by + * the builder's build() method. + * The public constructor and the copy constructor only alias the data (only copy the pointer). + * There is no assignment operator. + * + * This class is not intended for public subclassing. + * @stable ICU 4.8 + */ +class U_COMMON_API BytesTrie : public UMemory { +public: + /** + * Constructs a BytesTrie reader instance. + * + * The trieBytes must contain a copy of a byte sequence from the BytesTrieBuilder, + * starting with the first byte of that sequence. + * The BytesTrie object will not read more bytes than + * the BytesTrieBuilder generated in the corresponding build() call. + * + * The array is not copied/cloned and must not be modified while + * the BytesTrie object is in use. + * + * @param trieBytes The byte array that contains the serialized trie. + * @stable ICU 4.8 + */ + BytesTrie(const void *trieBytes) + : ownedArray_(NULL), bytes_(static_cast<const uint8_t *>(trieBytes)), + pos_(bytes_), remainingMatchLength_(-1) {} + + /** + * Destructor. + * @stable ICU 4.8 + */ + ~BytesTrie(); + + /** + * Copy constructor, copies the other trie reader object and its state, + * but not the byte array which will be shared. (Shallow copy.) + * @param other Another BytesTrie object. + * @stable ICU 4.8 + */ + BytesTrie(const BytesTrie &other) + : ownedArray_(NULL), bytes_(other.bytes_), + pos_(other.pos_), remainingMatchLength_(other.remainingMatchLength_) {} + + /** + * Resets this trie to its initial state. + * @return *this + * @stable ICU 4.8 + */ + BytesTrie &reset() { + pos_=bytes_; + remainingMatchLength_=-1; + return *this; + } + + /** + * Returns the state of this trie as a 64-bit integer. + * The state value is never 0. + * + * @return opaque state value + * @see resetToState64 + * @stable ICU 65 + */ + uint64_t getState64() const { + return (static_cast<uint64_t>(remainingMatchLength_ + 2) << kState64RemainingShift) | + (uint64_t)(pos_ - bytes_); + } + + /** + * Resets this trie to the saved state. + * Unlike resetToState(State), the 64-bit state value + * must be from getState64() from the same trie object or + * from one initialized the exact same way. + * Because of no validation, this method is faster. + * + * @param state The opaque trie state value from getState64(). + * @return *this + * @see getState64 + * @see resetToState + * @see reset + * @stable ICU 65 + */ + BytesTrie &resetToState64(uint64_t state) { + remainingMatchLength_ = static_cast<int32_t>(state >> kState64RemainingShift) - 2; + pos_ = bytes_ + (state & kState64PosMask); + return *this; + } + + /** + * BytesTrie state object, for saving a trie's current state + * and resetting the trie back to this state later. + * @stable ICU 4.8 + */ + class State : public UMemory { + public: + /** + * Constructs an empty State. + * @stable ICU 4.8 + */ + State() { bytes=NULL; } + private: + friend class BytesTrie; + + const uint8_t *bytes; + const uint8_t *pos; + int32_t remainingMatchLength; + }; + + /** + * Saves the state of this trie. + * @param state The State object to hold the trie's state. + * @return *this + * @see resetToState + * @stable ICU 4.8 + */ + const BytesTrie &saveState(State &state) const { + state.bytes=bytes_; + state.pos=pos_; + state.remainingMatchLength=remainingMatchLength_; + return *this; + } + + /** + * Resets this trie to the saved state. + * If the state object contains no state, or the state of a different trie, + * then this trie remains unchanged. + * @param state The State object which holds a saved trie state. + * @return *this + * @see saveState + * @see reset + * @stable ICU 4.8 + */ + BytesTrie &resetToState(const State &state) { + if(bytes_==state.bytes && bytes_!=NULL) { + pos_=state.pos; + remainingMatchLength_=state.remainingMatchLength; + } + return *this; + } + + /** + * Determines whether the byte sequence so far matches, whether it has a value, + * and whether another input byte can continue a matching byte sequence. + * @return The match/value Result. + * @stable ICU 4.8 + */ + UStringTrieResult current() const; + + /** + * Traverses the trie from the initial state for this input byte. + * Equivalent to reset().next(inByte). + * @param inByte Input byte value. Values -0x100..-1 are treated like 0..0xff. + * Values below -0x100 and above 0xff will never match. + * @return The match/value Result. + * @stable ICU 4.8 + */ + inline UStringTrieResult first(int32_t inByte) { + remainingMatchLength_=-1; + if(inByte<0) { + inByte+=0x100; + } + return nextImpl(bytes_, inByte); + } + + /** + * Traverses the trie from the current state for this input byte. + * @param inByte Input byte value. Values -0x100..-1 are treated like 0..0xff. + * Values below -0x100 and above 0xff will never match. + * @return The match/value Result. + * @stable ICU 4.8 + */ + UStringTrieResult next(int32_t inByte); + + /** + * Traverses the trie from the current state for this byte sequence. + * Equivalent to + * \code + * Result result=current(); + * for(each c in s) + * if(!USTRINGTRIE_HAS_NEXT(result)) return USTRINGTRIE_NO_MATCH; + * result=next(c); + * return result; + * \endcode + * @param s A string or byte sequence. Can be NULL if length is 0. + * @param length The length of the byte sequence. Can be -1 if NUL-terminated. + * @return The match/value Result. + * @stable ICU 4.8 + */ + UStringTrieResult next(const char *s, int32_t length); + + /** + * Returns a matching byte sequence's value if called immediately after + * current()/first()/next() returned USTRINGTRIE_INTERMEDIATE_VALUE or USTRINGTRIE_FINAL_VALUE. + * getValue() can be called multiple times. + * + * Do not call getValue() after USTRINGTRIE_NO_MATCH or USTRINGTRIE_NO_VALUE! + * @return The value for the byte sequence so far. + * @stable ICU 4.8 + */ + inline int32_t getValue() const { + const uint8_t *pos=pos_; + int32_t leadByte=*pos++; + // U_ASSERT(leadByte>=kMinValueLead); + return readValue(pos, leadByte>>1); + } + + /** + * Determines whether all byte sequences reachable from the current state + * map to the same value. + * @param uniqueValue Receives the unique value, if this function returns true. + * (output-only) + * @return true if all byte sequences reachable from the current state + * map to the same value. + * @stable ICU 4.8 + */ + inline UBool hasUniqueValue(int32_t &uniqueValue) const { + const uint8_t *pos=pos_; + // Skip the rest of a pending linear-match node. + return pos!=NULL && findUniqueValue(pos+remainingMatchLength_+1, false, uniqueValue); + } + + /** + * Finds each byte which continues the byte sequence from the current state. + * That is, each byte b for which it would be next(b)!=USTRINGTRIE_NO_MATCH now. + * @param out Each next byte is appended to this object. + * (Only uses the out.Append(s, length) method.) + * @return the number of bytes which continue the byte sequence from here + * @stable ICU 4.8 + */ + int32_t getNextBytes(ByteSink &out) const; + + /** + * Iterator for all of the (byte sequence, value) pairs in a BytesTrie. + * @stable ICU 4.8 + */ + class U_COMMON_API Iterator : public UMemory { + public: + /** + * Iterates from the root of a byte-serialized BytesTrie. + * @param trieBytes The trie bytes. + * @param maxStringLength If 0, the iterator returns full strings/byte sequences. + * Otherwise, the iterator returns strings with this maximum length. + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @stable ICU 4.8 + */ + Iterator(const void *trieBytes, int32_t maxStringLength, UErrorCode &errorCode); + + /** + * Iterates from the current state of the specified BytesTrie. + * @param trie The trie whose state will be copied for iteration. + * @param maxStringLength If 0, the iterator returns full strings/byte sequences. + * Otherwise, the iterator returns strings with this maximum length. + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @stable ICU 4.8 + */ + Iterator(const BytesTrie &trie, int32_t maxStringLength, UErrorCode &errorCode); + + /** + * Destructor. + * @stable ICU 4.8 + */ + ~Iterator(); + + /** + * Resets this iterator to its initial state. + * @return *this + * @stable ICU 4.8 + */ + Iterator &reset(); + + /** + * @return true if there are more elements. + * @stable ICU 4.8 + */ + UBool hasNext() const; + + /** + * Finds the next (byte sequence, value) pair if there is one. + * + * If the byte sequence is truncated to the maximum length and does not + * have a real value, then the value is set to -1. + * In this case, this "not a real value" is indistinguishable from + * a real value of -1. + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return true if there is another element. + * @stable ICU 4.8 + */ + UBool next(UErrorCode &errorCode); + + /** + * @return The NUL-terminated byte sequence for the last successful next(). + * @stable ICU 4.8 + */ + StringPiece getString() const; + /** + * @return The value for the last successful next(). + * @stable ICU 4.8 + */ + int32_t getValue() const { return value_; } + + private: + UBool truncateAndStop(); + + const uint8_t *branchNext(const uint8_t *pos, int32_t length, UErrorCode &errorCode); + + const uint8_t *bytes_; + const uint8_t *pos_; + const uint8_t *initialPos_; + int32_t remainingMatchLength_; + int32_t initialRemainingMatchLength_; + + CharString *str_; + int32_t maxLength_; + int32_t value_; + + // The stack stores pairs of integers for backtracking to another + // outbound edge of a branch node. + // The first integer is an offset from bytes_. + // The second integer has the str_->length() from before the node in bits 15..0, + // and the remaining branch length in bits 24..16. (Bits 31..25 are unused.) + // (We could store the remaining branch length minus 1 in bits 23..16 and not use bits 31..24, + // but the code looks more confusing that way.) + UVector32 *stack_; + }; + +private: + friend class BytesTrieBuilder; + + /** + * Constructs a BytesTrie reader instance. + * Unlike the public constructor which just aliases an array, + * this constructor adopts the builder's array. + * This constructor is only called by the builder. + */ + BytesTrie(void *adoptBytes, const void *trieBytes) + : ownedArray_(static_cast<uint8_t *>(adoptBytes)), + bytes_(static_cast<const uint8_t *>(trieBytes)), + pos_(bytes_), remainingMatchLength_(-1) {} + + // No assignment operator. + BytesTrie &operator=(const BytesTrie &other); + + inline void stop() { + pos_=NULL; + } + + // Reads a compact 32-bit integer. + // pos is already after the leadByte, and the lead byte is already shifted right by 1. + static int32_t readValue(const uint8_t *pos, int32_t leadByte); + static inline const uint8_t *skipValue(const uint8_t *pos, int32_t leadByte) { + // U_ASSERT(leadByte>=kMinValueLead); + if(leadByte>=(kMinTwoByteValueLead<<1)) { + if(leadByte<(kMinThreeByteValueLead<<1)) { + ++pos; + } else if(leadByte<(kFourByteValueLead<<1)) { + pos+=2; + } else { + pos+=3+((leadByte>>1)&1); + } + } + return pos; + } + static inline const uint8_t *skipValue(const uint8_t *pos) { + int32_t leadByte=*pos++; + return skipValue(pos, leadByte); + } + + // Reads a jump delta and jumps. + static const uint8_t *jumpByDelta(const uint8_t *pos); + + static inline const uint8_t *skipDelta(const uint8_t *pos) { + int32_t delta=*pos++; + if(delta>=kMinTwoByteDeltaLead) { + if(delta<kMinThreeByteDeltaLead) { + ++pos; + } else if(delta<kFourByteDeltaLead) { + pos+=2; + } else { + pos+=3+(delta&1); + } + } + return pos; + } + + static inline UStringTrieResult valueResult(int32_t node) { + return (UStringTrieResult)(USTRINGTRIE_INTERMEDIATE_VALUE-(node&kValueIsFinal)); + } + + // Handles a branch node for both next(byte) and next(string). + UStringTrieResult branchNext(const uint8_t *pos, int32_t length, int32_t inByte); + + // Requires remainingLength_<0. + UStringTrieResult nextImpl(const uint8_t *pos, int32_t inByte); + + // Helper functions for hasUniqueValue(). + // Recursively finds a unique value (or whether there is not a unique one) + // from a branch. + static const uint8_t *findUniqueValueFromBranch(const uint8_t *pos, int32_t length, + UBool haveUniqueValue, int32_t &uniqueValue); + // Recursively finds a unique value (or whether there is not a unique one) + // starting from a position on a node lead byte. + static UBool findUniqueValue(const uint8_t *pos, UBool haveUniqueValue, int32_t &uniqueValue); + + // Helper functions for getNextBytes(). + // getNextBytes() when pos is on a branch node. + static void getNextBranchBytes(const uint8_t *pos, int32_t length, ByteSink &out); + static void append(ByteSink &out, int c); + + // BytesTrie data structure + // + // The trie consists of a series of byte-serialized nodes for incremental + // string/byte sequence matching. The root node is at the beginning of the trie data. + // + // Types of nodes are distinguished by their node lead byte ranges. + // After each node, except a final-value node, another node follows to + // encode match values or continue matching further bytes. + // + // Node types: + // - Value node: Stores a 32-bit integer in a compact, variable-length format. + // The value is for the string/byte sequence so far. + // One node bit indicates whether the value is final or whether + // matching continues with the next node. + // - Linear-match node: Matches a number of bytes. + // - Branch node: Branches to other nodes according to the current input byte. + // The node byte is the length of the branch (number of bytes to select from) + // minus 1. It is followed by a sub-node: + // - If the length is at most kMaxBranchLinearSubNodeLength, then + // there are length-1 (key, value) pairs and then one more comparison byte. + // If one of the key bytes matches, then the value is either a final value for + // the string/byte sequence so far, or a "jump" delta to the next node. + // If the last byte matches, then matching continues with the next node. + // (Values have the same encoding as value nodes.) + // - If the length is greater than kMaxBranchLinearSubNodeLength, then + // there is one byte and one "jump" delta. + // If the input byte is less than the sub-node byte, then "jump" by delta to + // the next sub-node which will have a length of length/2. + // (The delta has its own compact encoding.) + // Otherwise, skip the "jump" delta to the next sub-node + // which will have a length of length-length/2. + + // Node lead byte values. + + // 00..0f: Branch node. If node!=0 then the length is node+1, otherwise + // the length is one more than the next byte. + + // For a branch sub-node with at most this many entries, we drop down + // to a linear search. + static const int32_t kMaxBranchLinearSubNodeLength=5; + + // 10..1f: Linear-match node, match 1..16 bytes and continue reading the next node. + static const int32_t kMinLinearMatch=0x10; + static const int32_t kMaxLinearMatchLength=0x10; + + // 20..ff: Variable-length value node. + // If odd, the value is final. (Otherwise, intermediate value or jump delta.) + // Then shift-right by 1 bit. + // The remaining lead byte value indicates the number of following bytes (0..4) + // and contains the value's top bits. + static const int32_t kMinValueLead=kMinLinearMatch+kMaxLinearMatchLength; // 0x20 + // It is a final value if bit 0 is set. + static const int32_t kValueIsFinal=1; + + // Compact value: After testing bit 0, shift right by 1 and then use the following thresholds. + static const int32_t kMinOneByteValueLead=kMinValueLead/2; // 0x10 + static const int32_t kMaxOneByteValue=0x40; // At least 6 bits in the first byte. + + static const int32_t kMinTwoByteValueLead=kMinOneByteValueLead+kMaxOneByteValue+1; // 0x51 + static const int32_t kMaxTwoByteValue=0x1aff; + + static const int32_t kMinThreeByteValueLead=kMinTwoByteValueLead+(kMaxTwoByteValue>>8)+1; // 0x6c + static const int32_t kFourByteValueLead=0x7e; + + // A little more than Unicode code points. (0x11ffff) + static const int32_t kMaxThreeByteValue=((kFourByteValueLead-kMinThreeByteValueLead)<<16)-1; + + static const int32_t kFiveByteValueLead=0x7f; + + // Compact delta integers. + static const int32_t kMaxOneByteDelta=0xbf; + static const int32_t kMinTwoByteDeltaLead=kMaxOneByteDelta+1; // 0xc0 + static const int32_t kMinThreeByteDeltaLead=0xf0; + static const int32_t kFourByteDeltaLead=0xfe; + static const int32_t kFiveByteDeltaLead=0xff; + + static const int32_t kMaxTwoByteDelta=((kMinThreeByteDeltaLead-kMinTwoByteDeltaLead)<<8)-1; // 0x2fff + static const int32_t kMaxThreeByteDelta=((kFourByteDeltaLead-kMinThreeByteDeltaLead)<<16)-1; // 0xdffff + + // For getState64(): + // The remainingMatchLength_ is -1..14=(kMaxLinearMatchLength=0x10)-2 + // so we need at least 5 bits for that. + // We add 2 to store it as a positive value 1..16=kMaxLinearMatchLength. + static constexpr int32_t kState64RemainingShift = 59; + static constexpr uint64_t kState64PosMask = (UINT64_C(1) << kState64RemainingShift) - 1; + + uint8_t *ownedArray_; + + // Fixed value referencing the BytesTrie bytes. + const uint8_t *bytes_; + + // Iterator variables. + + // Pointer to next trie byte to read. NULL if no more matches. + const uint8_t *pos_; + // Remaining length of a linear-match node, minus 1. Negative if not in such a node. + int32_t remainingMatchLength_; +}; + +U_NAMESPACE_END + +#endif /* U_SHOW_CPLUSPLUS_API */ + +#endif // __BYTESTRIE_H__ diff --git a/thirdparty/icu4c/common/unicode/bytestriebuilder.h b/thirdparty/icu4c/common/unicode/bytestriebuilder.h new file mode 100644 index 0000000000..cae16e48b4 --- /dev/null +++ b/thirdparty/icu4c/common/unicode/bytestriebuilder.h @@ -0,0 +1,188 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2010-2016, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* file name: bytestriebuilder.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2010sep25 +* created by: Markus W. Scherer +*/ + +/** + * \file + * \brief C++ API: Builder for icu::BytesTrie + */ + +#ifndef __BYTESTRIEBUILDER_H__ +#define __BYTESTRIEBUILDER_H__ + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + +#include "unicode/bytestrie.h" +#include "unicode/stringpiece.h" +#include "unicode/stringtriebuilder.h" + +U_NAMESPACE_BEGIN + +class BytesTrieElement; +class CharString; +/** + * Builder class for BytesTrie. + * + * This class is not intended for public subclassing. + * @stable ICU 4.8 + */ +class U_COMMON_API BytesTrieBuilder : public StringTrieBuilder { +public: + /** + * Constructs an empty builder. + * @param errorCode Standard ICU error code. + * @stable ICU 4.8 + */ + BytesTrieBuilder(UErrorCode &errorCode); + + /** + * Destructor. + * @stable ICU 4.8 + */ + virtual ~BytesTrieBuilder(); + + /** + * Adds a (byte sequence, value) pair. + * The byte sequence must be unique. + * The bytes will be copied; the builder does not keep + * a reference to the input StringPiece or its data(). + * @param s The input byte sequence. + * @param value The value associated with this byte sequence. + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return *this + * @stable ICU 4.8 + */ + BytesTrieBuilder &add(StringPiece s, int32_t value, UErrorCode &errorCode); + + /** + * Builds a BytesTrie for the add()ed data. + * Once built, no further data can be add()ed until clear() is called. + * + * A BytesTrie cannot be empty. At least one (byte sequence, value) pair + * must have been add()ed. + * + * This method passes ownership of the builder's internal result array to the new trie object. + * Another call to any build() variant will re-serialize the trie. + * After clear() has been called, a new array will be used as well. + * @param buildOption Build option, see UStringTrieBuildOption. + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return A new BytesTrie for the add()ed data. + * @stable ICU 4.8 + */ + BytesTrie *build(UStringTrieBuildOption buildOption, UErrorCode &errorCode); + + /** + * Builds a BytesTrie for the add()ed data and byte-serializes it. + * Once built, no further data can be add()ed until clear() is called. + * + * A BytesTrie cannot be empty. At least one (byte sequence, value) pair + * must have been add()ed. + * + * Multiple calls to buildStringPiece() return StringPieces referring to the + * builder's same byte array, without rebuilding. + * If buildStringPiece() is called after build(), the trie will be + * re-serialized into a new array (because build() passes on ownership). + * If build() is called after buildStringPiece(), the trie object returned + * by build() will become the owner of the underlying string for the + * previously returned StringPiece. + * After clear() has been called, a new array will be used as well. + * @param buildOption Build option, see UStringTrieBuildOption. + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return A StringPiece which refers to the byte-serialized BytesTrie for the add()ed data. + * @stable ICU 4.8 + */ + StringPiece buildStringPiece(UStringTrieBuildOption buildOption, UErrorCode &errorCode); + + /** + * Removes all (byte sequence, value) pairs. + * New data can then be add()ed and a new trie can be built. + * @return *this + * @stable ICU 4.8 + */ + BytesTrieBuilder &clear(); + +private: + BytesTrieBuilder(const BytesTrieBuilder &other); // no copy constructor + BytesTrieBuilder &operator=(const BytesTrieBuilder &other); // no assignment operator + + void buildBytes(UStringTrieBuildOption buildOption, UErrorCode &errorCode); + + virtual int32_t getElementStringLength(int32_t i) const; + virtual char16_t getElementUnit(int32_t i, int32_t byteIndex) const; + virtual int32_t getElementValue(int32_t i) const; + + virtual int32_t getLimitOfLinearMatch(int32_t first, int32_t last, int32_t byteIndex) const; + + virtual int32_t countElementUnits(int32_t start, int32_t limit, int32_t byteIndex) const; + virtual int32_t skipElementsBySomeUnits(int32_t i, int32_t byteIndex, int32_t count) const; + virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t byteIndex, char16_t byte) const; + + virtual UBool matchNodesCanHaveValues() const { return false; } + + virtual int32_t getMaxBranchLinearSubNodeLength() const { return BytesTrie::kMaxBranchLinearSubNodeLength; } + virtual int32_t getMinLinearMatch() const { return BytesTrie::kMinLinearMatch; } + virtual int32_t getMaxLinearMatchLength() const { return BytesTrie::kMaxLinearMatchLength; } + + /** + * @internal (private) + */ + class BTLinearMatchNode : public LinearMatchNode { + public: + BTLinearMatchNode(const char *units, int32_t len, Node *nextNode); + virtual UBool operator==(const Node &other) const; + virtual void write(StringTrieBuilder &builder); + private: + const char *s; + }; + + virtual Node *createLinearMatchNode(int32_t i, int32_t byteIndex, int32_t length, + Node *nextNode) const; + + UBool ensureCapacity(int32_t length); + virtual int32_t write(int32_t byte); + int32_t write(const char *b, int32_t length); + virtual int32_t writeElementUnits(int32_t i, int32_t byteIndex, int32_t length); + virtual int32_t writeValueAndFinal(int32_t i, UBool isFinal); + virtual int32_t writeValueAndType(UBool hasValue, int32_t value, int32_t node); + virtual int32_t writeDeltaTo(int32_t jumpTarget); + + CharString *strings; // Pointer not object so we need not #include internal charstr.h. + BytesTrieElement *elements; + int32_t elementsCapacity; + int32_t elementsLength; + + // Byte serialization of the trie. + // Grows from the back: bytesLength measures from the end of the buffer! + char *bytes; + int32_t bytesCapacity; + int32_t bytesLength; +}; + +U_NAMESPACE_END + +#endif /* U_SHOW_CPLUSPLUS_API */ + +#endif // __BYTESTRIEBUILDER_H__ diff --git a/thirdparty/icu4c/common/unicode/caniter.h b/thirdparty/icu4c/common/unicode/caniter.h new file mode 100644 index 0000000000..4ed2b74b10 --- /dev/null +++ b/thirdparty/icu4c/common/unicode/caniter.h @@ -0,0 +1,214 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* + ******************************************************************************* + * Copyright (C) 1996-2014, International Business Machines Corporation and + * others. All Rights Reserved. + ******************************************************************************* + */ + +#ifndef CANITER_H +#define CANITER_H + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + +#if !UCONFIG_NO_NORMALIZATION + +#include "unicode/uobject.h" +#include "unicode/unistr.h" + +/** + * \file + * \brief C++ API: Canonical Iterator + */ + +/** Should permutation skip characters with combining class zero + * Should be either true or false. This is a compile time option + * @stable ICU 2.4 + */ +#ifndef CANITER_SKIP_ZEROES +#define CANITER_SKIP_ZEROES true +#endif + +U_NAMESPACE_BEGIN + +class Hashtable; +class Normalizer2; +class Normalizer2Impl; + +/** + * This class allows one to iterate through all the strings that are canonically equivalent to a given + * string. For example, here are some sample results: +Results for: {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA} +1: \\u0041\\u030A\\u0064\\u0307\\u0327 + = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA} +2: \\u0041\\u030A\\u0064\\u0327\\u0307 + = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE} +3: \\u0041\\u030A\\u1E0B\\u0327 + = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA} +4: \\u0041\\u030A\\u1E11\\u0307 + = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE} +5: \\u00C5\\u0064\\u0307\\u0327 + = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA} +6: \\u00C5\\u0064\\u0327\\u0307 + = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE} +7: \\u00C5\\u1E0B\\u0327 + = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA} +8: \\u00C5\\u1E11\\u0307 + = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE} +9: \\u212B\\u0064\\u0307\\u0327 + = {ANGSTROM SIGN}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA} +10: \\u212B\\u0064\\u0327\\u0307 + = {ANGSTROM SIGN}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE} +11: \\u212B\\u1E0B\\u0327 + = {ANGSTROM SIGN}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA} +12: \\u212B\\u1E11\\u0307 + = {ANGSTROM SIGN}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE} + *<br>Note: the code is intended for use with small strings, and is not suitable for larger ones, + * since it has not been optimized for that situation. + * Note, CanonicalIterator is not intended to be subclassed. + * @author M. Davis + * @author C++ port by V. Weinstein + * @stable ICU 2.4 + */ +class U_COMMON_API CanonicalIterator U_FINAL : public UObject { +public: + /** + * Construct a CanonicalIterator object + * @param source string to get results for + * @param status Fill-in parameter which receives the status of this operation. + * @stable ICU 2.4 + */ + CanonicalIterator(const UnicodeString &source, UErrorCode &status); + + /** Destructor + * Cleans pieces + * @stable ICU 2.4 + */ + virtual ~CanonicalIterator(); + + /** + * Gets the NFD form of the current source we are iterating over. + * @return gets the source: NOTE: it is the NFD form of source + * @stable ICU 2.4 + */ + UnicodeString getSource(); + + /** + * Resets the iterator so that one can start again from the beginning. + * @stable ICU 2.4 + */ + void reset(); + + /** + * Get the next canonically equivalent string. + * <br><b>Warning: The strings are not guaranteed to be in any particular order.</b> + * @return the next string that is canonically equivalent. A bogus string is returned when + * the iteration is done. + * @stable ICU 2.4 + */ + UnicodeString next(); + + /** + * Set a new source for this iterator. Allows object reuse. + * @param newSource the source string to iterate against. This allows the same iterator to be used + * while changing the source string, saving object creation. + * @param status Fill-in parameter which receives the status of this operation. + * @stable ICU 2.4 + */ + void setSource(const UnicodeString &newSource, UErrorCode &status); + +#ifndef U_HIDE_INTERNAL_API + /** + * Dumb recursive implementation of permutation. + * TODO: optimize + * @param source the string to find permutations for + * @param skipZeros determine if skip zeros + * @param result the results in a set. + * @param status Fill-in parameter which receives the status of this operation. + * @internal + */ + static void U_EXPORT2 permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status); +#endif /* U_HIDE_INTERNAL_API */ + + /** + * ICU "poor man's RTTI", returns a UClassID for this class. + * + * @stable ICU 2.2 + */ + static UClassID U_EXPORT2 getStaticClassID(); + + /** + * ICU "poor man's RTTI", returns a UClassID for the actual class. + * + * @stable ICU 2.2 + */ + virtual UClassID getDynamicClassID() const; + +private: + // ===================== PRIVATES ============================== + // private default constructor + CanonicalIterator(); + + + /** + * Copy constructor. Private for now. + * @internal (private) + */ + CanonicalIterator(const CanonicalIterator& other); + + /** + * Assignment operator. Private for now. + * @internal (private) + */ + CanonicalIterator& operator=(const CanonicalIterator& other); + + // fields + UnicodeString source; + UBool done; + + // 2 dimensional array holds the pieces of the string with + // their different canonically equivalent representations + UnicodeString **pieces; + int32_t pieces_length; + int32_t *pieces_lengths; + + // current is used in iterating to combine pieces + int32_t *current; + int32_t current_length; + + // transient fields + UnicodeString buffer; + + const Normalizer2 &nfd; + const Normalizer2Impl &nfcImpl; + + // we have a segment, in NFD. Find all the strings that are canonically equivalent to it. + UnicodeString *getEquivalents(const UnicodeString &segment, int32_t &result_len, UErrorCode &status); //private String[] getEquivalents(String segment) + + //Set getEquivalents2(String segment); + Hashtable *getEquivalents2(Hashtable *fillinResult, const char16_t *segment, int32_t segLen, UErrorCode &status); + //Hashtable *getEquivalents2(const UnicodeString &segment, int32_t segLen, UErrorCode &status); + + /** + * See if the decomposition of cp2 is at segment starting at segmentPos + * (with canonical rearrangment!) + * If so, take the remainder, and return the equivalents + */ + //Set extract(int comp, String segment, int segmentPos, StringBuffer buffer); + Hashtable *extract(Hashtable *fillinResult, UChar32 comp, const char16_t *segment, int32_t segLen, int32_t segmentPos, UErrorCode &status); + //Hashtable *extract(UChar32 comp, const UnicodeString &segment, int32_t segLen, int32_t segmentPos, UErrorCode &status); + + void cleanPieces(); + +}; + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_NORMALIZATION */ + +#endif /* U_SHOW_CPLUSPLUS_API */ + +#endif diff --git a/thirdparty/icu4c/common/unicode/casemap.h b/thirdparty/icu4c/common/unicode/casemap.h new file mode 100644 index 0000000000..53af84fa74 --- /dev/null +++ b/thirdparty/icu4c/common/unicode/casemap.h @@ -0,0 +1,497 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// casemap.h +// created: 2017jan12 Markus W. Scherer + +#ifndef __CASEMAP_H__ +#define __CASEMAP_H__ + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + +#include "unicode/stringpiece.h" +#include "unicode/uobject.h" + +/** + * \file + * \brief C++ API: Low-level C++ case mapping functions. + */ + +U_NAMESPACE_BEGIN + +class BreakIterator; +class ByteSink; +class Edits; + +/** + * Low-level C++ case mapping functions. + * + * @stable ICU 59 + */ +class U_COMMON_API CaseMap U_FINAL : public UMemory { +public: + /** + * Lowercases a UTF-16 string and optionally records edits. + * Casing is locale-dependent and context-sensitive. + * The result may be longer or shorter than the original. + * The source string and the destination buffer must not overlap. + * + * @param locale The locale ID. ("" = root locale, NULL = default locale.) + * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. + * @param src The original string. + * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. + * @param dest A buffer for the result string. The result will be NUL-terminated if + * the buffer is large enough. + * The contents is undefined in case of failure. + * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then + * dest may be NULL and the function will only return the length of the result + * without writing any of the result string. + * @param edits Records edits for index mapping, working with styled text, + * and getting only changes (if any). + * The Edits contents is undefined if any error occurs. + * This function calls edits->reset() first unless + * options includes U_EDITS_NO_RESET. edits can be NULL. + * @param errorCode Reference to an in/out error code value + * which must not indicate a failure before the function call. + * @return The length of the result string, if successful. + * When the result would be longer than destCapacity, + * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. + * + * @see u_strToLower + * @stable ICU 59 + */ + static int32_t toLower( + const char *locale, uint32_t options, + const char16_t *src, int32_t srcLength, + char16_t *dest, int32_t destCapacity, Edits *edits, + UErrorCode &errorCode); + + /** + * Uppercases a UTF-16 string and optionally records edits. + * Casing is locale-dependent and context-sensitive. + * The result may be longer or shorter than the original. + * The source string and the destination buffer must not overlap. + * + * @param locale The locale ID. ("" = root locale, NULL = default locale.) + * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. + * @param src The original string. + * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. + * @param dest A buffer for the result string. The result will be NUL-terminated if + * the buffer is large enough. + * The contents is undefined in case of failure. + * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then + * dest may be NULL and the function will only return the length of the result + * without writing any of the result string. + * @param edits Records edits for index mapping, working with styled text, + * and getting only changes (if any). + * The Edits contents is undefined if any error occurs. + * This function calls edits->reset() first unless + * options includes U_EDITS_NO_RESET. edits can be NULL. + * @param errorCode Reference to an in/out error code value + * which must not indicate a failure before the function call. + * @return The length of the result string, if successful. + * When the result would be longer than destCapacity, + * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. + * + * @see u_strToUpper + * @stable ICU 59 + */ + static int32_t toUpper( + const char *locale, uint32_t options, + const char16_t *src, int32_t srcLength, + char16_t *dest, int32_t destCapacity, Edits *edits, + UErrorCode &errorCode); + +#if !UCONFIG_NO_BREAK_ITERATION + + /** + * Titlecases a UTF-16 string and optionally records edits. + * Casing is locale-dependent and context-sensitive. + * The result may be longer or shorter than the original. + * The source string and the destination buffer must not overlap. + * + * Titlecasing uses a break iterator to find the first characters of words + * that are to be titlecased. It titlecases those characters and lowercases + * all others. (This can be modified with options bits.) + * + * @param locale The locale ID. ("" = root locale, NULL = default locale.) + * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET, + * U_TITLECASE_NO_LOWERCASE, + * U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED, + * U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES. + * @param iter A break iterator to find the first characters of words that are to be titlecased. + * It is set to the source string (setText()) + * and used one or more times for iteration (first() and next()). + * If NULL, then a word break iterator for the locale is used + * (or something equivalent). + * @param src The original string. + * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. + * @param dest A buffer for the result string. The result will be NUL-terminated if + * the buffer is large enough. + * The contents is undefined in case of failure. + * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then + * dest may be NULL and the function will only return the length of the result + * without writing any of the result string. + * @param edits Records edits for index mapping, working with styled text, + * and getting only changes (if any). + * The Edits contents is undefined if any error occurs. + * This function calls edits->reset() first unless + * options includes U_EDITS_NO_RESET. edits can be NULL. + * @param errorCode Reference to an in/out error code value + * which must not indicate a failure before the function call. + * @return The length of the result string, if successful. + * When the result would be longer than destCapacity, + * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. + * + * @see u_strToTitle + * @see ucasemap_toTitle + * @stable ICU 59 + */ + static int32_t toTitle( + const char *locale, uint32_t options, BreakIterator *iter, + const char16_t *src, int32_t srcLength, + char16_t *dest, int32_t destCapacity, Edits *edits, + UErrorCode &errorCode); + +#endif // UCONFIG_NO_BREAK_ITERATION + + /** + * Case-folds a UTF-16 string and optionally records edits. + * + * Case folding is locale-independent and not context-sensitive, + * but there is an option for whether to include or exclude mappings for dotted I + * and dotless i that are marked with 'T' in CaseFolding.txt. + * + * The result may be longer or shorter than the original. + * The source string and the destination buffer must not overlap. + * + * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET, + * U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I. + * @param src The original string. + * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. + * @param dest A buffer for the result string. The result will be NUL-terminated if + * the buffer is large enough. + * The contents is undefined in case of failure. + * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then + * dest may be NULL and the function will only return the length of the result + * without writing any of the result string. + * @param edits Records edits for index mapping, working with styled text, + * and getting only changes (if any). + * The Edits contents is undefined if any error occurs. + * This function calls edits->reset() first unless + * options includes U_EDITS_NO_RESET. edits can be NULL. + * @param errorCode Reference to an in/out error code value + * which must not indicate a failure before the function call. + * @return The length of the result string, if successful. + * When the result would be longer than destCapacity, + * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. + * + * @see u_strFoldCase + * @stable ICU 59 + */ + static int32_t fold( + uint32_t options, + const char16_t *src, int32_t srcLength, + char16_t *dest, int32_t destCapacity, Edits *edits, + UErrorCode &errorCode); + + /** + * Lowercases a UTF-8 string and optionally records edits. + * Casing is locale-dependent and context-sensitive. + * The result may be longer or shorter than the original. + * + * @param locale The locale ID. ("" = root locale, NULL = default locale.) + * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. + * @param src The original string. + * @param sink A ByteSink to which the result string is written. + * sink.Flush() is called at the end. + * @param edits Records edits for index mapping, working with styled text, + * and getting only changes (if any). + * The Edits contents is undefined if any error occurs. + * This function calls edits->reset() first unless + * options includes U_EDITS_NO_RESET. edits can be NULL. + * @param errorCode Reference to an in/out error code value + * which must not indicate a failure before the function call. + * + * @see ucasemap_utf8ToLower + * @stable ICU 60 + */ + static void utf8ToLower( + const char *locale, uint32_t options, + StringPiece src, ByteSink &sink, Edits *edits, + UErrorCode &errorCode); + + /** + * Uppercases a UTF-8 string and optionally records edits. + * Casing is locale-dependent and context-sensitive. + * The result may be longer or shorter than the original. + * + * @param locale The locale ID. ("" = root locale, NULL = default locale.) + * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. + * @param src The original string. + * @param sink A ByteSink to which the result string is written. + * sink.Flush() is called at the end. + * @param edits Records edits for index mapping, working with styled text, + * and getting only changes (if any). + * The Edits contents is undefined if any error occurs. + * This function calls edits->reset() first unless + * options includes U_EDITS_NO_RESET. edits can be NULL. + * @param errorCode Reference to an in/out error code value + * which must not indicate a failure before the function call. + * + * @see ucasemap_utf8ToUpper + * @stable ICU 60 + */ + static void utf8ToUpper( + const char *locale, uint32_t options, + StringPiece src, ByteSink &sink, Edits *edits, + UErrorCode &errorCode); + +#if !UCONFIG_NO_BREAK_ITERATION + + /** + * Titlecases a UTF-8 string and optionally records edits. + * Casing is locale-dependent and context-sensitive. + * The result may be longer or shorter than the original. + * + * Titlecasing uses a break iterator to find the first characters of words + * that are to be titlecased. It titlecases those characters and lowercases + * all others. (This can be modified with options bits.) + * + * @param locale The locale ID. ("" = root locale, NULL = default locale.) + * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET, + * U_TITLECASE_NO_LOWERCASE, + * U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED, + * U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES. + * @param iter A break iterator to find the first characters of words that are to be titlecased. + * It is set to the source string (setUText()) + * and used one or more times for iteration (first() and next()). + * If NULL, then a word break iterator for the locale is used + * (or something equivalent). + * @param src The original string. + * @param sink A ByteSink to which the result string is written. + * sink.Flush() is called at the end. + * @param edits Records edits for index mapping, working with styled text, + * and getting only changes (if any). + * The Edits contents is undefined if any error occurs. + * This function calls edits->reset() first unless + * options includes U_EDITS_NO_RESET. edits can be NULL. + * @param errorCode Reference to an in/out error code value + * which must not indicate a failure before the function call. + * + * @see ucasemap_utf8ToTitle + * @stable ICU 60 + */ + static void utf8ToTitle( + const char *locale, uint32_t options, BreakIterator *iter, + StringPiece src, ByteSink &sink, Edits *edits, + UErrorCode &errorCode); + +#endif // UCONFIG_NO_BREAK_ITERATION + + /** + * Case-folds a UTF-8 string and optionally records edits. + * + * Case folding is locale-independent and not context-sensitive, + * but there is an option for whether to include or exclude mappings for dotted I + * and dotless i that are marked with 'T' in CaseFolding.txt. + * + * The result may be longer or shorter than the original. + * + * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. + * @param src The original string. + * @param sink A ByteSink to which the result string is written. + * sink.Flush() is called at the end. + * @param edits Records edits for index mapping, working with styled text, + * and getting only changes (if any). + * The Edits contents is undefined if any error occurs. + * This function calls edits->reset() first unless + * options includes U_EDITS_NO_RESET. edits can be NULL. + * @param errorCode Reference to an in/out error code value + * which must not indicate a failure before the function call. + * + * @see ucasemap_utf8FoldCase + * @stable ICU 60 + */ + static void utf8Fold( + uint32_t options, + StringPiece src, ByteSink &sink, Edits *edits, + UErrorCode &errorCode); + + /** + * Lowercases a UTF-8 string and optionally records edits. + * Casing is locale-dependent and context-sensitive. + * The result may be longer or shorter than the original. + * The source string and the destination buffer must not overlap. + * + * @param locale The locale ID. ("" = root locale, NULL = default locale.) + * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. + * @param src The original string. + * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. + * @param dest A buffer for the result string. The result will be NUL-terminated if + * the buffer is large enough. + * The contents is undefined in case of failure. + * @param destCapacity The size of the buffer (number of bytes). If it is 0, then + * dest may be NULL and the function will only return the length of the result + * without writing any of the result string. + * @param edits Records edits for index mapping, working with styled text, + * and getting only changes (if any). + * The Edits contents is undefined if any error occurs. + * This function calls edits->reset() first unless + * options includes U_EDITS_NO_RESET. edits can be NULL. + * @param errorCode Reference to an in/out error code value + * which must not indicate a failure before the function call. + * @return The length of the result string, if successful. + * When the result would be longer than destCapacity, + * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. + * + * @see ucasemap_utf8ToLower + * @stable ICU 59 + */ + static int32_t utf8ToLower( + const char *locale, uint32_t options, + const char *src, int32_t srcLength, + char *dest, int32_t destCapacity, Edits *edits, + UErrorCode &errorCode); + + /** + * Uppercases a UTF-8 string and optionally records edits. + * Casing is locale-dependent and context-sensitive. + * The result may be longer or shorter than the original. + * The source string and the destination buffer must not overlap. + * + * @param locale The locale ID. ("" = root locale, NULL = default locale.) + * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. + * @param src The original string. + * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. + * @param dest A buffer for the result string. The result will be NUL-terminated if + * the buffer is large enough. + * The contents is undefined in case of failure. + * @param destCapacity The size of the buffer (number of bytes). If it is 0, then + * dest may be NULL and the function will only return the length of the result + * without writing any of the result string. + * @param edits Records edits for index mapping, working with styled text, + * and getting only changes (if any). + * The Edits contents is undefined if any error occurs. + * This function calls edits->reset() first unless + * options includes U_EDITS_NO_RESET. edits can be NULL. + * @param errorCode Reference to an in/out error code value + * which must not indicate a failure before the function call. + * @return The length of the result string, if successful. + * When the result would be longer than destCapacity, + * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. + * + * @see ucasemap_utf8ToUpper + * @stable ICU 59 + */ + static int32_t utf8ToUpper( + const char *locale, uint32_t options, + const char *src, int32_t srcLength, + char *dest, int32_t destCapacity, Edits *edits, + UErrorCode &errorCode); + +#if !UCONFIG_NO_BREAK_ITERATION + + /** + * Titlecases a UTF-8 string and optionally records edits. + * Casing is locale-dependent and context-sensitive. + * The result may be longer or shorter than the original. + * The source string and the destination buffer must not overlap. + * + * Titlecasing uses a break iterator to find the first characters of words + * that are to be titlecased. It titlecases those characters and lowercases + * all others. (This can be modified with options bits.) + * + * @param locale The locale ID. ("" = root locale, NULL = default locale.) + * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET, + * U_TITLECASE_NO_LOWERCASE, + * U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED, + * U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES. + * @param iter A break iterator to find the first characters of words that are to be titlecased. + * It is set to the source string (setUText()) + * and used one or more times for iteration (first() and next()). + * If NULL, then a word break iterator for the locale is used + * (or something equivalent). + * @param src The original string. + * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. + * @param dest A buffer for the result string. The result will be NUL-terminated if + * the buffer is large enough. + * The contents is undefined in case of failure. + * @param destCapacity The size of the buffer (number of bytes). If it is 0, then + * dest may be NULL and the function will only return the length of the result + * without writing any of the result string. + * @param edits Records edits for index mapping, working with styled text, + * and getting only changes (if any). + * The Edits contents is undefined if any error occurs. + * This function calls edits->reset() first unless + * options includes U_EDITS_NO_RESET. edits can be NULL. + * @param errorCode Reference to an in/out error code value + * which must not indicate a failure before the function call. + * @return The length of the result string, if successful. + * When the result would be longer than destCapacity, + * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. + * + * @see ucasemap_utf8ToTitle + * @stable ICU 59 + */ + static int32_t utf8ToTitle( + const char *locale, uint32_t options, BreakIterator *iter, + const char *src, int32_t srcLength, + char *dest, int32_t destCapacity, Edits *edits, + UErrorCode &errorCode); + +#endif // UCONFIG_NO_BREAK_ITERATION + + /** + * Case-folds a UTF-8 string and optionally records edits. + * + * Case folding is locale-independent and not context-sensitive, + * but there is an option for whether to include or exclude mappings for dotted I + * and dotless i that are marked with 'T' in CaseFolding.txt. + * + * The result may be longer or shorter than the original. + * The source string and the destination buffer must not overlap. + * + * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET, + * U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I. + * @param src The original string. + * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. + * @param dest A buffer for the result string. The result will be NUL-terminated if + * the buffer is large enough. + * The contents is undefined in case of failure. + * @param destCapacity The size of the buffer (number of bytes). If it is 0, then + * dest may be NULL and the function will only return the length of the result + * without writing any of the result string. + * @param edits Records edits for index mapping, working with styled text, + * and getting only changes (if any). + * The Edits contents is undefined if any error occurs. + * This function calls edits->reset() first unless + * options includes U_EDITS_NO_RESET. edits can be NULL. + * @param errorCode Reference to an in/out error code value + * which must not indicate a failure before the function call. + * @return The length of the result string, if successful. + * When the result would be longer than destCapacity, + * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set. + * + * @see ucasemap_utf8FoldCase + * @stable ICU 59 + */ + static int32_t utf8Fold( + uint32_t options, + const char *src, int32_t srcLength, + char *dest, int32_t destCapacity, Edits *edits, + UErrorCode &errorCode); + +private: + CaseMap() = delete; + CaseMap(const CaseMap &other) = delete; + CaseMap &operator=(const CaseMap &other) = delete; +}; + +U_NAMESPACE_END + +#endif /* U_SHOW_CPLUSPLUS_API */ + +#endif // __CASEMAP_H__ diff --git a/thirdparty/icu4c/common/unicode/char16ptr.h b/thirdparty/icu4c/common/unicode/char16ptr.h new file mode 100644 index 0000000000..c8a9ae6c35 --- /dev/null +++ b/thirdparty/icu4c/common/unicode/char16ptr.h @@ -0,0 +1,313 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// char16ptr.h +// created: 2017feb28 Markus W. Scherer + +#ifndef __CHAR16PTR_H__ +#define __CHAR16PTR_H__ + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + +#include <cstddef> + +/** + * \file + * \brief C++ API: char16_t pointer wrappers with + * implicit conversion from bit-compatible raw pointer types. + * Also conversion functions from char16_t * to UChar * and OldUChar *. + */ + +U_NAMESPACE_BEGIN + +/** + * \def U_ALIASING_BARRIER + * Barrier for pointer anti-aliasing optimizations even across function boundaries. + * @internal + */ +#ifdef U_ALIASING_BARRIER + // Use the predefined value. +#elif (defined(__clang__) || defined(__GNUC__)) && U_PLATFORM != U_PF_BROWSER_NATIVE_CLIENT +# define U_ALIASING_BARRIER(ptr) asm volatile("" : : "rm"(ptr) : "memory") +#elif defined(U_IN_DOXYGEN) +# define U_ALIASING_BARRIER(ptr) +#endif + +/** + * char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types. + * @stable ICU 59 + */ +class U_COMMON_API Char16Ptr U_FINAL { +public: + /** + * Copies the pointer. + * @param p pointer + * @stable ICU 59 + */ + inline Char16Ptr(char16_t *p); +#if !U_CHAR16_IS_TYPEDEF + /** + * Converts the pointer to char16_t *. + * @param p pointer to be converted + * @stable ICU 59 + */ + inline Char16Ptr(uint16_t *p); +#endif +#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) + /** + * Converts the pointer to char16_t *. + * (Only defined if U_SIZEOF_WCHAR_T==2.) + * @param p pointer to be converted + * @stable ICU 59 + */ + inline Char16Ptr(wchar_t *p); +#endif + /** + * nullptr constructor. + * @param p nullptr + * @stable ICU 59 + */ + inline Char16Ptr(std::nullptr_t p); + /** + * Destructor. + * @stable ICU 59 + */ + inline ~Char16Ptr(); + + /** + * Pointer access. + * @return the wrapped pointer + * @stable ICU 59 + */ + inline char16_t *get() const; + /** + * char16_t pointer access via type conversion (e.g., static_cast). + * @return the wrapped pointer + * @stable ICU 59 + */ + inline operator char16_t *() const { return get(); } + +private: + Char16Ptr() = delete; + +#ifdef U_ALIASING_BARRIER + template<typename T> static char16_t *cast(T *t) { + U_ALIASING_BARRIER(t); + return reinterpret_cast<char16_t *>(t); + } + + char16_t *p_; +#else + union { + char16_t *cp; + uint16_t *up; + wchar_t *wp; + } u_; +#endif +}; + +/// \cond +#ifdef U_ALIASING_BARRIER + +Char16Ptr::Char16Ptr(char16_t *p) : p_(p) {} +#if !U_CHAR16_IS_TYPEDEF +Char16Ptr::Char16Ptr(uint16_t *p) : p_(cast(p)) {} +#endif +#if U_SIZEOF_WCHAR_T==2 +Char16Ptr::Char16Ptr(wchar_t *p) : p_(cast(p)) {} +#endif +Char16Ptr::Char16Ptr(std::nullptr_t p) : p_(p) {} +Char16Ptr::~Char16Ptr() { + U_ALIASING_BARRIER(p_); +} + +char16_t *Char16Ptr::get() const { return p_; } + +#else + +Char16Ptr::Char16Ptr(char16_t *p) { u_.cp = p; } +#if !U_CHAR16_IS_TYPEDEF +Char16Ptr::Char16Ptr(uint16_t *p) { u_.up = p; } +#endif +#if U_SIZEOF_WCHAR_T==2 +Char16Ptr::Char16Ptr(wchar_t *p) { u_.wp = p; } +#endif +Char16Ptr::Char16Ptr(std::nullptr_t p) { u_.cp = p; } +Char16Ptr::~Char16Ptr() {} + +char16_t *Char16Ptr::get() const { return u_.cp; } + +#endif +/// \endcond + +/** + * const char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types. + * @stable ICU 59 + */ +class U_COMMON_API ConstChar16Ptr U_FINAL { +public: + /** + * Copies the pointer. + * @param p pointer + * @stable ICU 59 + */ + inline ConstChar16Ptr(const char16_t *p); +#if !U_CHAR16_IS_TYPEDEF + /** + * Converts the pointer to char16_t *. + * @param p pointer to be converted + * @stable ICU 59 + */ + inline ConstChar16Ptr(const uint16_t *p); +#endif +#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) + /** + * Converts the pointer to char16_t *. + * (Only defined if U_SIZEOF_WCHAR_T==2.) + * @param p pointer to be converted + * @stable ICU 59 + */ + inline ConstChar16Ptr(const wchar_t *p); +#endif + /** + * nullptr constructor. + * @param p nullptr + * @stable ICU 59 + */ + inline ConstChar16Ptr(const std::nullptr_t p); + + /** + * Destructor. + * @stable ICU 59 + */ + inline ~ConstChar16Ptr(); + + /** + * Pointer access. + * @return the wrapped pointer + * @stable ICU 59 + */ + inline const char16_t *get() const; + /** + * char16_t pointer access via type conversion (e.g., static_cast). + * @return the wrapped pointer + * @stable ICU 59 + */ + inline operator const char16_t *() const { return get(); } + +private: + ConstChar16Ptr() = delete; + +#ifdef U_ALIASING_BARRIER + template<typename T> static const char16_t *cast(const T *t) { + U_ALIASING_BARRIER(t); + return reinterpret_cast<const char16_t *>(t); + } + + const char16_t *p_; +#else + union { + const char16_t *cp; + const uint16_t *up; + const wchar_t *wp; + } u_; +#endif +}; + +/// \cond +#ifdef U_ALIASING_BARRIER + +ConstChar16Ptr::ConstChar16Ptr(const char16_t *p) : p_(p) {} +#if !U_CHAR16_IS_TYPEDEF +ConstChar16Ptr::ConstChar16Ptr(const uint16_t *p) : p_(cast(p)) {} +#endif +#if U_SIZEOF_WCHAR_T==2 +ConstChar16Ptr::ConstChar16Ptr(const wchar_t *p) : p_(cast(p)) {} +#endif +ConstChar16Ptr::ConstChar16Ptr(const std::nullptr_t p) : p_(p) {} +ConstChar16Ptr::~ConstChar16Ptr() { + U_ALIASING_BARRIER(p_); +} + +const char16_t *ConstChar16Ptr::get() const { return p_; } + +#else + +ConstChar16Ptr::ConstChar16Ptr(const char16_t *p) { u_.cp = p; } +#if !U_CHAR16_IS_TYPEDEF +ConstChar16Ptr::ConstChar16Ptr(const uint16_t *p) { u_.up = p; } +#endif +#if U_SIZEOF_WCHAR_T==2 +ConstChar16Ptr::ConstChar16Ptr(const wchar_t *p) { u_.wp = p; } +#endif +ConstChar16Ptr::ConstChar16Ptr(const std::nullptr_t p) { u_.cp = p; } +ConstChar16Ptr::~ConstChar16Ptr() {} + +const char16_t *ConstChar16Ptr::get() const { return u_.cp; } + +#endif +/// \endcond + +/** + * Converts from const char16_t * to const UChar *. + * Includes an aliasing barrier if available. + * @param p pointer + * @return p as const UChar * + * @stable ICU 59 + */ +inline const UChar *toUCharPtr(const char16_t *p) { +#ifdef U_ALIASING_BARRIER + U_ALIASING_BARRIER(p); +#endif + return reinterpret_cast<const UChar *>(p); +} + +/** + * Converts from char16_t * to UChar *. + * Includes an aliasing barrier if available. + * @param p pointer + * @return p as UChar * + * @stable ICU 59 + */ +inline UChar *toUCharPtr(char16_t *p) { +#ifdef U_ALIASING_BARRIER + U_ALIASING_BARRIER(p); +#endif + return reinterpret_cast<UChar *>(p); +} + +/** + * Converts from const char16_t * to const OldUChar *. + * Includes an aliasing barrier if available. + * @param p pointer + * @return p as const OldUChar * + * @stable ICU 59 + */ +inline const OldUChar *toOldUCharPtr(const char16_t *p) { +#ifdef U_ALIASING_BARRIER + U_ALIASING_BARRIER(p); +#endif + return reinterpret_cast<const OldUChar *>(p); +} + +/** + * Converts from char16_t * to OldUChar *. + * Includes an aliasing barrier if available. + * @param p pointer + * @return p as OldUChar * + * @stable ICU 59 + */ +inline OldUChar *toOldUCharPtr(char16_t *p) { +#ifdef U_ALIASING_BARRIER + U_ALIASING_BARRIER(p); +#endif + return reinterpret_cast<OldUChar *>(p); +} + +U_NAMESPACE_END + +#endif /* U_SHOW_CPLUSPLUS_API */ + +#endif // __CHAR16PTR_H__ diff --git a/thirdparty/icu4c/common/unicode/chariter.h b/thirdparty/icu4c/common/unicode/chariter.h new file mode 100644 index 0000000000..96dc5db7c9 --- /dev/null +++ b/thirdparty/icu4c/common/unicode/chariter.h @@ -0,0 +1,734 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************** +* +* Copyright (C) 1997-2011, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************** +*/ + +#ifndef CHARITER_H +#define CHARITER_H + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + +#include "unicode/uobject.h" +#include "unicode/unistr.h" +/** + * \file + * \brief C++ API: Character Iterator + */ + +U_NAMESPACE_BEGIN +/** + * Abstract class that defines an API for forward-only iteration + * on text objects. + * This is a minimal interface for iteration without random access + * or backwards iteration. It is especially useful for wrapping + * streams with converters into an object for collation or + * normalization. + * + * <p>Characters can be accessed in two ways: as code units or as + * code points. + * Unicode code points are 21-bit integers and are the scalar values + * of Unicode characters. ICU uses the type UChar32 for them. + * Unicode code units are the storage units of a given + * Unicode/UCS Transformation Format (a character encoding scheme). + * With UTF-16, all code points can be represented with either one + * or two code units ("surrogates"). + * String storage is typically based on code units, while properties + * of characters are typically determined using code point values. + * Some processes may be designed to work with sequences of code units, + * or it may be known that all characters that are important to an + * algorithm can be represented with single code units. + * Other processes will need to use the code point access functions.</p> + * + * <p>ForwardCharacterIterator provides nextPostInc() to access + * a code unit and advance an internal position into the text object, + * similar to a <code>return text[position++]</code>.<br> + * It provides next32PostInc() to access a code point and advance an internal + * position.</p> + * + * <p>next32PostInc() assumes that the current position is that of + * the beginning of a code point, i.e., of its first code unit. + * After next32PostInc(), this will be true again. + * In general, access to code units and code points in the same + * iteration loop should not be mixed. In UTF-16, if the current position + * is on a second code unit (Low Surrogate), then only that code unit + * is returned even by next32PostInc().</p> + * + * <p>For iteration with either function, there are two ways to + * check for the end of the iteration. When there are no more + * characters in the text object: + * <ul> + * <li>The hasNext() function returns false.</li> + * <li>nextPostInc() and next32PostInc() return DONE + * when one attempts to read beyond the end of the text object.</li> + * </ul> + * + * Example: + * \code + * void function1(ForwardCharacterIterator &it) { + * UChar32 c; + * while(it.hasNext()) { + * c=it.next32PostInc(); + * // use c + * } + * } + * + * void function1(ForwardCharacterIterator &it) { + * char16_t c; + * while((c=it.nextPostInc())!=ForwardCharacterIterator::DONE) { + * // use c + * } + * } + * \endcode + * </p> + * + * @stable ICU 2.0 + */ +class U_COMMON_API ForwardCharacterIterator : public UObject { +public: + /** + * Value returned by most of ForwardCharacterIterator's functions + * when the iterator has reached the limits of its iteration. + * @stable ICU 2.0 + */ + enum { DONE = 0xffff }; + + /** + * Destructor. + * @stable ICU 2.0 + */ + virtual ~ForwardCharacterIterator(); + + /** + * Returns true when both iterators refer to the same + * character in the same character-storage object. + * @param that The ForwardCharacterIterator to be compared for equality + * @return true when both iterators refer to the same + * character in the same character-storage object + * @stable ICU 2.0 + */ + virtual UBool operator==(const ForwardCharacterIterator& that) const = 0; + + /** + * Returns true when the iterators refer to different + * text-storage objects, or to different characters in the + * same text-storage object. + * @param that The ForwardCharacterIterator to be compared for inequality + * @return true when the iterators refer to different + * text-storage objects, or to different characters in the + * same text-storage object + * @stable ICU 2.0 + */ + inline UBool operator!=(const ForwardCharacterIterator& that) const; + + /** + * Generates a hash code for this iterator. + * @return the hash code. + * @stable ICU 2.0 + */ + virtual int32_t hashCode(void) const = 0; + + /** + * Returns a UClassID for this ForwardCharacterIterator ("poor man's + * RTTI").<P> Despite the fact that this function is public, + * DO NOT CONSIDER IT PART OF CHARACTERITERATOR'S API! + * @return a UClassID for this ForwardCharacterIterator + * @stable ICU 2.0 + */ + virtual UClassID getDynamicClassID(void) const = 0; + + /** + * Gets the current code unit for returning and advances to the next code unit + * in the iteration range + * (toward endIndex()). If there are + * no more code units to return, returns DONE. + * @return the current code unit. + * @stable ICU 2.0 + */ + virtual char16_t nextPostInc(void) = 0; + + /** + * Gets the current code point for returning and advances to the next code point + * in the iteration range + * (toward endIndex()). If there are + * no more code points to return, returns DONE. + * @return the current code point. + * @stable ICU 2.0 + */ + virtual UChar32 next32PostInc(void) = 0; + + /** + * Returns false if there are no more code units or code points + * at or after the current position in the iteration range. + * This is used with nextPostInc() or next32PostInc() in forward + * iteration. + * @returns false if there are no more code units or code points + * at or after the current position in the iteration range. + * @stable ICU 2.0 + */ + virtual UBool hasNext() = 0; + +protected: + /** Default constructor to be overridden in the implementing class. @stable ICU 2.0*/ + ForwardCharacterIterator(); + + /** Copy constructor to be overridden in the implementing class. @stable ICU 2.0*/ + ForwardCharacterIterator(const ForwardCharacterIterator &other); + + /** + * Assignment operator to be overridden in the implementing class. + * @stable ICU 2.0 + */ + ForwardCharacterIterator &operator=(const ForwardCharacterIterator&) { return *this; } +}; + +/** + * Abstract class that defines an API for iteration + * on text objects. + * This is an interface for forward and backward iteration + * and random access into a text object. + * + * <p>The API provides backward compatibility to the Java and older ICU + * CharacterIterator classes but extends them significantly: + * <ol> + * <li>CharacterIterator is now a subclass of ForwardCharacterIterator.</li> + * <li>While the old API functions provided forward iteration with + * "pre-increment" semantics, the new one also provides functions + * with "post-increment" semantics. They are more efficient and should + * be the preferred iterator functions for new implementations. + * The backward iteration always had "pre-decrement" semantics, which + * are efficient.</li> + * <li>Just like ForwardCharacterIterator, it provides access to + * both code units and code points. Code point access versions are available + * for the old and the new iteration semantics.</li> + * <li>There are new functions for setting and moving the current position + * without returning a character, for efficiency.</li> + * </ol> + * + * See ForwardCharacterIterator for examples for using the new forward iteration + * functions. For backward iteration, there is also a hasPrevious() function + * that can be used analogously to hasNext(). + * The old functions work as before and are shown below.</p> + * + * <p>Examples for some of the new functions:</p> + * + * Forward iteration with hasNext(): + * \code + * void forward1(CharacterIterator &it) { + * UChar32 c; + * for(it.setToStart(); it.hasNext();) { + * c=it.next32PostInc(); + * // use c + * } + * } + * \endcode + * Forward iteration more similar to loops with the old forward iteration, + * showing a way to convert simple for() loops: + * \code + * void forward2(CharacterIterator &it) { + * char16_t c; + * for(c=it.firstPostInc(); c!=CharacterIterator::DONE; c=it.nextPostInc()) { + * // use c + * } + * } + * \endcode + * Backward iteration with setToEnd() and hasPrevious(): + * \code + * void backward1(CharacterIterator &it) { + * UChar32 c; + * for(it.setToEnd(); it.hasPrevious();) { + * c=it.previous32(); + * // use c + * } + * } + * \endcode + * Backward iteration with a more traditional for() loop: + * \code + * void backward2(CharacterIterator &it) { + * char16_t c; + * for(c=it.last(); c!=CharacterIterator::DONE; c=it.previous()) { + * // use c + * } + * } + * \endcode + * + * Example for random access: + * \code + * void random(CharacterIterator &it) { + * // set to the third code point from the beginning + * it.move32(3, CharacterIterator::kStart); + * // get a code point from here without moving the position + * UChar32 c=it.current32(); + * // get the position + * int32_t pos=it.getIndex(); + * // get the previous code unit + * char16_t u=it.previous(); + * // move back one more code unit + * it.move(-1, CharacterIterator::kCurrent); + * // set the position back to where it was + * // and read the same code point c and move beyond it + * it.setIndex(pos); + * if(c!=it.next32PostInc()) { + * exit(1); // CharacterIterator inconsistent + * } + * } + * \endcode + * + * <p>Examples, especially for the old API:</p> + * + * Function processing characters, in this example simple output + * <pre> + * \code + * void processChar( char16_t c ) + * { + * cout << " " << c; + * } + * \endcode + * </pre> + * Traverse the text from start to finish + * <pre> + * \code + * void traverseForward(CharacterIterator& iter) + * { + * for(char16_t c = iter.first(); c != CharacterIterator.DONE; c = iter.next()) { + * processChar(c); + * } + * } + * \endcode + * </pre> + * Traverse the text backwards, from end to start + * <pre> + * \code + * void traverseBackward(CharacterIterator& iter) + * { + * for(char16_t c = iter.last(); c != CharacterIterator.DONE; c = iter.previous()) { + * processChar(c); + * } + * } + * \endcode + * </pre> + * Traverse both forward and backward from a given position in the text. + * Calls to notBoundary() in this example represents some additional stopping criteria. + * <pre> + * \code + * void traverseOut(CharacterIterator& iter, int32_t pos) + * { + * char16_t c; + * for (c = iter.setIndex(pos); + * c != CharacterIterator.DONE && (Unicode::isLetter(c) || Unicode::isDigit(c)); + * c = iter.next()) {} + * int32_t end = iter.getIndex(); + * for (c = iter.setIndex(pos); + * c != CharacterIterator.DONE && (Unicode::isLetter(c) || Unicode::isDigit(c)); + * c = iter.previous()) {} + * int32_t start = iter.getIndex() + 1; + * + * cout << "start: " << start << " end: " << end << endl; + * for (c = iter.setIndex(start); iter.getIndex() < end; c = iter.next() ) { + * processChar(c); + * } + * } + * \endcode + * </pre> + * Creating a StringCharacterIterator and calling the test functions + * <pre> + * \code + * void CharacterIterator_Example( void ) + * { + * cout << endl << "===== CharacterIterator_Example: =====" << endl; + * UnicodeString text("Ein kleiner Satz."); + * StringCharacterIterator iterator(text); + * cout << "----- traverseForward: -----------" << endl; + * traverseForward( iterator ); + * cout << endl << endl << "----- traverseBackward: ----------" << endl; + * traverseBackward( iterator ); + * cout << endl << endl << "----- traverseOut: ---------------" << endl; + * traverseOut( iterator, 7 ); + * cout << endl << endl << "-----" << endl; + * } + * \endcode + * </pre> + * + * @stable ICU 2.0 + */ +class U_COMMON_API CharacterIterator : public ForwardCharacterIterator { +public: + /** + * Origin enumeration for the move() and move32() functions. + * @stable ICU 2.0 + */ + enum EOrigin { kStart, kCurrent, kEnd }; + + /** + * Destructor. + * @stable ICU 2.0 + */ + virtual ~CharacterIterator(); + + /** + * Returns a pointer to a new CharacterIterator of the same + * concrete class as this one, and referring to the same + * character in the same text-storage object as this one. The + * caller is responsible for deleting the new clone. + * @return a pointer to a new CharacterIterator + * @stable ICU 2.0 + */ + virtual CharacterIterator* clone() const = 0; + + /** + * Sets the iterator to refer to the first code unit in its + * iteration range, and returns that code unit. + * This can be used to begin an iteration with next(). + * @return the first code unit in its iteration range. + * @stable ICU 2.0 + */ + virtual char16_t first(void) = 0; + + /** + * Sets the iterator to refer to the first code unit in its + * iteration range, returns that code unit, and moves the position + * to the second code unit. This is an alternative to setToStart() + * for forward iteration with nextPostInc(). + * @return the first code unit in its iteration range. + * @stable ICU 2.0 + */ + virtual char16_t firstPostInc(void); + + /** + * Sets the iterator to refer to the first code point in its + * iteration range, and returns that code unit, + * This can be used to begin an iteration with next32(). + * Note that an iteration with next32PostInc(), beginning with, + * e.g., setToStart() or firstPostInc(), is more efficient. + * @return the first code point in its iteration range. + * @stable ICU 2.0 + */ + virtual UChar32 first32(void) = 0; + + /** + * Sets the iterator to refer to the first code point in its + * iteration range, returns that code point, and moves the position + * to the second code point. This is an alternative to setToStart() + * for forward iteration with next32PostInc(). + * @return the first code point in its iteration range. + * @stable ICU 2.0 + */ + virtual UChar32 first32PostInc(void); + + /** + * Sets the iterator to refer to the first code unit or code point in its + * iteration range. This can be used to begin a forward + * iteration with nextPostInc() or next32PostInc(). + * @return the start position of the iteration range + * @stable ICU 2.0 + */ + inline int32_t setToStart(); + + /** + * Sets the iterator to refer to the last code unit in its + * iteration range, and returns that code unit. + * This can be used to begin an iteration with previous(). + * @return the last code unit. + * @stable ICU 2.0 + */ + virtual char16_t last(void) = 0; + + /** + * Sets the iterator to refer to the last code point in its + * iteration range, and returns that code unit. + * This can be used to begin an iteration with previous32(). + * @return the last code point. + * @stable ICU 2.0 + */ + virtual UChar32 last32(void) = 0; + + /** + * Sets the iterator to the end of its iteration range, just behind + * the last code unit or code point. This can be used to begin a backward + * iteration with previous() or previous32(). + * @return the end position of the iteration range + * @stable ICU 2.0 + */ + inline int32_t setToEnd(); + + /** + * Sets the iterator to refer to the "position"-th code unit + * in the text-storage object the iterator refers to, and + * returns that code unit. + * @param position the "position"-th code unit in the text-storage object + * @return the "position"-th code unit. + * @stable ICU 2.0 + */ + virtual char16_t setIndex(int32_t position) = 0; + + /** + * Sets the iterator to refer to the beginning of the code point + * that contains the "position"-th code unit + * in the text-storage object the iterator refers to, and + * returns that code point. + * The current position is adjusted to the beginning of the code point + * (its first code unit). + * @param position the "position"-th code unit in the text-storage object + * @return the "position"-th code point. + * @stable ICU 2.0 + */ + virtual UChar32 setIndex32(int32_t position) = 0; + + /** + * Returns the code unit the iterator currently refers to. + * @return the current code unit. + * @stable ICU 2.0 + */ + virtual char16_t current(void) const = 0; + + /** + * Returns the code point the iterator currently refers to. + * @return the current code point. + * @stable ICU 2.0 + */ + virtual UChar32 current32(void) const = 0; + + /** + * Advances to the next code unit in the iteration range + * (toward endIndex()), and returns that code unit. If there are + * no more code units to return, returns DONE. + * @return the next code unit. + * @stable ICU 2.0 + */ + virtual char16_t next(void) = 0; + + /** + * Advances to the next code point in the iteration range + * (toward endIndex()), and returns that code point. If there are + * no more code points to return, returns DONE. + * Note that iteration with "pre-increment" semantics is less + * efficient than iteration with "post-increment" semantics + * that is provided by next32PostInc(). + * @return the next code point. + * @stable ICU 2.0 + */ + virtual UChar32 next32(void) = 0; + + /** + * Advances to the previous code unit in the iteration range + * (toward startIndex()), and returns that code unit. If there are + * no more code units to return, returns DONE. + * @return the previous code unit. + * @stable ICU 2.0 + */ + virtual char16_t previous(void) = 0; + + /** + * Advances to the previous code point in the iteration range + * (toward startIndex()), and returns that code point. If there are + * no more code points to return, returns DONE. + * @return the previous code point. + * @stable ICU 2.0 + */ + virtual UChar32 previous32(void) = 0; + + /** + * Returns false if there are no more code units or code points + * before the current position in the iteration range. + * This is used with previous() or previous32() in backward + * iteration. + * @return false if there are no more code units or code points + * before the current position in the iteration range, return true otherwise. + * @stable ICU 2.0 + */ + virtual UBool hasPrevious() = 0; + + /** + * Returns the numeric index in the underlying text-storage + * object of the character returned by first(). Since it's + * possible to create an iterator that iterates across only + * part of a text-storage object, this number isn't + * necessarily 0. + * @returns the numeric index in the underlying text-storage + * object of the character returned by first(). + * @stable ICU 2.0 + */ + inline int32_t startIndex(void) const; + + /** + * Returns the numeric index in the underlying text-storage + * object of the position immediately BEYOND the character + * returned by last(). + * @return the numeric index in the underlying text-storage + * object of the position immediately BEYOND the character + * returned by last(). + * @stable ICU 2.0 + */ + inline int32_t endIndex(void) const; + + /** + * Returns the numeric index in the underlying text-storage + * object of the character the iterator currently refers to + * (i.e., the character returned by current()). + * @return the numeric index in the text-storage object of + * the character the iterator currently refers to + * @stable ICU 2.0 + */ + inline int32_t getIndex(void) const; + + /** + * Returns the length of the entire text in the underlying + * text-storage object. + * @return the length of the entire text in the text-storage object + * @stable ICU 2.0 + */ + inline int32_t getLength() const; + + /** + * Moves the current position relative to the start or end of the + * iteration range, or relative to the current position itself. + * The movement is expressed in numbers of code units forward + * or backward by specifying a positive or negative delta. + * @param delta the position relative to origin. A positive delta means forward; + * a negative delta means backward. + * @param origin Origin enumeration {kStart, kCurrent, kEnd} + * @return the new position + * @stable ICU 2.0 + */ + virtual int32_t move(int32_t delta, EOrigin origin) = 0; + + /** + * Moves the current position relative to the start or end of the + * iteration range, or relative to the current position itself. + * The movement is expressed in numbers of code points forward + * or backward by specifying a positive or negative delta. + * @param delta the position relative to origin. A positive delta means forward; + * a negative delta means backward. + * @param origin Origin enumeration {kStart, kCurrent, kEnd} + * @return the new position + * @stable ICU 2.0 + */ +#ifdef move32 + // One of the system headers right now is sometimes defining a conflicting macro we don't use +#undef move32 +#endif + virtual int32_t move32(int32_t delta, EOrigin origin) = 0; + + /** + * Copies the text under iteration into the UnicodeString + * referred to by "result". + * @param result Receives a copy of the text under iteration. + * @stable ICU 2.0 + */ + virtual void getText(UnicodeString& result) = 0; + +protected: + /** + * Empty constructor. + * @stable ICU 2.0 + */ + CharacterIterator(); + + /** + * Constructor, just setting the length field in this base class. + * @stable ICU 2.0 + */ + CharacterIterator(int32_t length); + + /** + * Constructor, just setting the length and position fields in this base class. + * @stable ICU 2.0 + */ + CharacterIterator(int32_t length, int32_t position); + + /** + * Constructor, just setting the length, start, end, and position fields in this base class. + * @stable ICU 2.0 + */ + CharacterIterator(int32_t length, int32_t textBegin, int32_t textEnd, int32_t position); + + /** + * Copy constructor. + * + * @param that The CharacterIterator to be copied + * @stable ICU 2.0 + */ + CharacterIterator(const CharacterIterator &that); + + /** + * Assignment operator. Sets this CharacterIterator to have the same behavior, + * as the one passed in. + * @param that The CharacterIterator passed in. + * @return the newly set CharacterIterator. + * @stable ICU 2.0 + */ + CharacterIterator &operator=(const CharacterIterator &that); + + /** + * Base class text length field. + * Necessary this for correct getText() and hashCode(). + * @stable ICU 2.0 + */ + int32_t textLength; + + /** + * Base class field for the current position. + * @stable ICU 2.0 + */ + int32_t pos; + + /** + * Base class field for the start of the iteration range. + * @stable ICU 2.0 + */ + int32_t begin; + + /** + * Base class field for the end of the iteration range. + * @stable ICU 2.0 + */ + int32_t end; +}; + +inline UBool +ForwardCharacterIterator::operator!=(const ForwardCharacterIterator& that) const { + return !operator==(that); +} + +inline int32_t +CharacterIterator::setToStart() { + return move(0, kStart); +} + +inline int32_t +CharacterIterator::setToEnd() { + return move(0, kEnd); +} + +inline int32_t +CharacterIterator::startIndex(void) const { + return begin; +} + +inline int32_t +CharacterIterator::endIndex(void) const { + return end; +} + +inline int32_t +CharacterIterator::getIndex(void) const { + return pos; +} + +inline int32_t +CharacterIterator::getLength(void) const { + return textLength; +} + +U_NAMESPACE_END + +#endif /* U_SHOW_CPLUSPLUS_API */ + +#endif diff --git a/thirdparty/icu4c/common/unicode/dbbi.h b/thirdparty/icu4c/common/unicode/dbbi.h new file mode 100644 index 0000000000..3de9cc3814 --- /dev/null +++ b/thirdparty/icu4c/common/unicode/dbbi.h @@ -0,0 +1,48 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 1999-2006,2013 IBM Corp. All rights reserved. +********************************************************************** +* Date Name Description +* 12/1/99 rgillam Complete port from Java. +* 01/13/2000 helena Added UErrorCode to ctors. +********************************************************************** +*/ + +#ifndef DBBI_H +#define DBBI_H + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + +#include "unicode/rbbi.h" + +#if !UCONFIG_NO_BREAK_ITERATION + +/** + * \file + * \brief C++ API: Dictionary Based Break Iterator + */ + +U_NAMESPACE_BEGIN + +#ifndef U_HIDE_DEPRECATED_API +/** + * An obsolete subclass of RuleBasedBreakIterator. Handling of dictionary- + * based break iteration has been folded into the base class. This class + * is deprecated as of ICU 3.6. + * @deprecated ICU 3.6 + */ +typedef RuleBasedBreakIterator DictionaryBasedBreakIterator; + +#endif /* U_HIDE_DEPRECATED_API */ + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ + +#endif /* U_SHOW_CPLUSPLUS_API */ + +#endif diff --git a/thirdparty/icu4c/common/unicode/docmain.h b/thirdparty/icu4c/common/unicode/docmain.h new file mode 100644 index 0000000000..b7984ada03 --- /dev/null +++ b/thirdparty/icu4c/common/unicode/docmain.h @@ -0,0 +1,232 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/******************************************************************** + * COPYRIGHT: + * Copyright (c) 1997-2012, International Business Machines Corporation and + * others. All Rights Reserved. + * + * FILE NAME: DOCMAIN.h + * + * Date Name Description + * 12/11/2000 Ram Creation. + */ + +/** + * \file + * \brief (Non API- contains Doxygen definitions) + * + * This file contains documentation for Doxygen and doesnot have + * any significance with respect to C or C++ API + */ + +/*! \mainpage + * + * \section API API Reference Usage + * + * <h3>C++ Programmers:</h3> + * <p>Use <a href="hierarchy.html">Class Hierarchy</a> or <a href="classes.html"> Alphabetical List </a> + * or <a href="annotated.html"> Compound List</a> + * to find the class you are interested in. For example, to find BreakIterator, + * you can go to the <a href="classes.html"> Alphabetical List</a>, then click on + * "BreakIterator". Once you are at the class, you will find an inheritance + * chart, a list of the public members, a detailed description of the class, + * then detailed member descriptions.</p> + * + * <h3>C Programmers:</h3> + * <p>Use <a href="#Module">Module List</a> or <a href="globals_u.html">File Members</a> + * to find a list of all the functions and constants. + * For example, to find BreakIterator functions you would click on + * <a href="files.html"> File List</a>, + * then find "ubrk.h" and click on it. You will find descriptions of Defines, + * Typedefs, Enumerations, and Functions, with detailed descriptions below. + * If you want to find a specific function, such as ubrk_next(), then click + * first on <a href="globals.html"> File Members</a>, then use your browser + * Find dialog to search for "ubrk_next()".</p> + * + * + * <h3>API References for Previous Releases</h3> + * <p>The API References for each release of ICU are also available as + * a zip file from the ICU + * <a href="http://site.icu-project.org/download">download page</a>.</p> + * + * <hr> + * + * <h2>Architecture (User's Guide)</h2> + * <ul> + * <li><a href="https://unicode-org.github.io/icu/userguide/">Introduction</a></li> + * <li><a href="https://unicode-org.github.io/icu/userguide/i18n">Internationalization</a></li> + * <li><a href="https://unicode-org.github.io/icu/userguide/design">Locale Model, Multithreading, Error Handling, etc.</a></li> + * <li><a href="https://unicode-org.github.io/icu/userguide/conversion">Conversion</a></li> + * </ul> + * + * <hr> + *\htmlonly <h2><a NAME="Module">Module List</a></h2> \endhtmlonly + * <table border="1" cols="3" align="center"> + * <tr> + * <td><strong>Module Name</strong></td> + * <td><strong>C</strong></td> + * <td><strong>C++</strong></td> + * </tr> + * <tr> + * <td>Basic Types and Constants</td> + * <td>utypes.h</td> + * <td>utypes.h</td> + * </tr> + * <tr> + * <td>Strings and Character Iteration</td> + * <td>ustring.h, utf8.h, utf16.h, UText, UCharIterator</td> + * <td>icu::UnicodeString, icu::CharacterIterator, icu::Appendable, icu::StringPiece,icu::ByteSink</td> + * </tr> + * <tr> + * <td>Unicode Character<br/>Properties and Names</td> + * <td>uchar.h, uscript.h</td> + * <td>C API</td> + * </tr> + * <tr> + * <td>Sets of Unicode Code Points and Strings</td> + * <td>uset.h</td> + * <td>icu::UnicodeSet</td> + * </tr> + * <tr> + * <td>Maps from Unicode Code Points to Integer Values</td> + * <td>ucptrie.h, umutablecptrie.h</td> + * <td>C API</td> + * </tr> + * <tr> + * <td>Maps from Strings to Integer Values</td> + * <td>(no C API)</td> + * <td>icu::BytesTrie, icu::UCharsTrie</td> + * </tr> + * <tr> + * <td>Codepage Conversion</td> + * <td>ucnv.h, ucnvsel.h</td> + * <td>C API</td> + * </tr> + * <tr> + * <td>Codepage Detection</td> + * <td>ucsdet.h</td> + * <td>C API</td> + * </tr> + * <tr> + * <td>Unicode Text Compression</td> + * <td>ucnv.h<br/>(encoding name "SCSU" or "BOCU-1")</td> + * <td>C API</td> + * </tr> + * <tr> + * <td>Locales </td> + * <td>uloc.h</a></td> + * <td>icu::Locale, icu::LocaleBuilder, icu::LocaleMatcher</td> + * </tr> + * <tr> + * <td>Resource Bundles</td> + * <td>ures.h</td> + * <td>icu::ResourceBundle</td> + * </tr> + * <tr> + * <td>Normalization</td> + * <td>unorm2.h</td> + * <td>icu::Normalizer2</td> + * </tr> + * <tr> + * <td>Calendars</td> + * <td>ucal.h</td> + * <td>icu::Calendar</td> + * </tr> + * <tr> + * <td>Date and Time Formatting</td> + * <td>udat.h</td> + * <td>icu::DateFormat</td> + * </tr> + * <tr> + * <td>Message Formatting</td> + * <td>umsg.h</td> + * <td>icu::MessageFormat</td> + * </tr> + * <tr> + * <td>Number Formatting<br/>(includes currency and unit formatting)</td> + * <td>unumberformatter.h, unum.h</td> + * <td>icu::number::NumberFormatter (ICU 60+) or icu::NumberFormat (older versions)</td> + * </tr> + * <tr> + * <td>Number Range Formatting<br />(includes currency and unit ranges)</td> + * <td>unumberrangeformatter.h</td> + * <td>icu::number::NumberRangeFormatter</td> + * </tr> + * <tr> + * <td>Number Spellout<br/>(Rule Based Number Formatting)</td> + * <td>unum.h<br/>(use UNUM_SPELLOUT)</td> + * <td>icu::RuleBasedNumberFormat</td> + * </tr> + * <tr> + * <td>Text Transformation<br/>(Transliteration)</td> + * <td>utrans.h</td> + * <td>icu::Transliterator</td> + * </tr> + * <tr> + * <td>Bidirectional Algorithm</td> + * <td>ubidi.h, ubiditransform.h</td> + * <td>C API</td> + * </tr> + * <tr> + * <td>Arabic Shaping</td> + * <td>ushape.h</td> + * <td>C API</td> + * </tr> + * <tr> + * <td>Collation</td> + * <td>ucol.h</td> + * <td>icu::Collator</td> + * </tr> + * <tr> + * <td>String Searching</td> + * <td>usearch.h</td> + * <td>icu::StringSearch</td> + * </tr> + * <tr> + * <td>Index Characters/<br/>Bucketing for Sorted Lists</td> + * <td>(no C API)</td> + * <td>icu::AlphabeticIndex</td> + * </tr> + * <tr> + * <td>Text Boundary Analysis<br/>(Break Iteration)</td> + * <td>ubrk.h</td> + * <td>icu::BreakIterator</td> + * </tr> + * <tr> + * <td>Regular Expressions</td> + * <td>uregex.h</td> + * <td>icu::RegexPattern, icu::RegexMatcher</td> + * </tr> + * <tr> + * <td>StringPrep</td> + * <td>usprep.h</td> + * <td>C API</td> + * </tr> + * <tr> + * <td>International Domain Names in Applications:<br/> + * UTS #46 in C/C++, IDNA2003 only via C API</td> + * <td>uidna.h</td> + * <td>idna.h</td> + * </tr> + * <tr> + * <td>Identifier Spoofing & Confusability</td> + * <td>uspoof.h</td> + * <td>C API</td> + * <tr> + * <td>Universal Time Scale</td> + * <td>utmscale.h</td> + * <td>C API</td> + * </tr> + * <tr> + * <td>Paragraph Layout / Complex Text Layout</td> + * <td>playout.h</td> + * <td>icu::ParagraphLayout</td> + * </tr> + * <tr> + * <td>ICU I/O</td> + * <td>ustdio.h</td> + * <td>ustream.h</td> + * </tr> + * </table> + * <i>This main page is generated from docmain.h</i> + */ diff --git a/thirdparty/icu4c/common/unicode/dtintrv.h b/thirdparty/icu4c/common/unicode/dtintrv.h new file mode 100644 index 0000000000..4f4b6bf7f4 --- /dev/null +++ b/thirdparty/icu4c/common/unicode/dtintrv.h @@ -0,0 +1,164 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2008-2009, International Business Machines Corporation and +* others. All Rights Reserved. +******************************************************************************* +* +* File DTINTRV.H +* +******************************************************************************* +*/ + +#ifndef __DTINTRV_H__ +#define __DTINTRV_H__ + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + +#include "unicode/uobject.h" + +/** + * \file + * \brief C++ API: Date Interval data type + */ + +U_NAMESPACE_BEGIN + + +/** + * This class represents a date interval. + * It is a pair of UDate representing from UDate 1 to UDate 2. + * @stable ICU 4.0 +**/ +class U_COMMON_API DateInterval : public UObject { +public: + + /** + * Construct a DateInterval given a from date and a to date. + * @param fromDate The from date in date interval. + * @param toDate The to date in date interval. + * @stable ICU 4.0 + */ + DateInterval(UDate fromDate, UDate toDate); + + /** + * destructor + * @stable ICU 4.0 + */ + virtual ~DateInterval(); + + /** + * Get the from date. + * @return the from date in dateInterval. + * @stable ICU 4.0 + */ + inline UDate getFromDate() const; + + /** + * Get the to date. + * @return the to date in dateInterval. + * @stable ICU 4.0 + */ + inline UDate getToDate() const; + + + /** + * Return the class ID for this class. This is useful only for comparing to + * a return value from getDynamicClassID(). For example: + * <pre> + * . Base* polymorphic_pointer = createPolymorphicObject(); + * . if (polymorphic_pointer->getDynamicClassID() == + * . derived::getStaticClassID()) ... + * </pre> + * @return The class ID for all objects of this class. + * @stable ICU 4.0 + */ + static UClassID U_EXPORT2 getStaticClassID(void); + + /** + * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This + * method is to implement a simple version of RTTI, since not all C++ + * compilers support genuine RTTI. Polymorphic operator==() and clone() + * methods call this method. + * + * @return The class ID for this object. All objects of a + * given class have the same class ID. Objects of + * other classes have different class IDs. + * @stable ICU 4.0 + */ + virtual UClassID getDynamicClassID(void) const; + + + /** + * Copy constructor. + * @stable ICU 4.0 + */ + DateInterval(const DateInterval& other); + + /** + * Default assignment operator + * @stable ICU 4.0 + */ + DateInterval& operator=(const DateInterval&); + + /** + * Equality operator. + * @return true if the two DateIntervals are the same + * @stable ICU 4.0 + */ + virtual UBool operator==(const DateInterval& other) const; + + /** + * Non-equality operator + * @return true if the two DateIntervals are not the same + * @stable ICU 4.0 + */ + inline UBool operator!=(const DateInterval& other) const; + + + /** + * clone this object. + * The caller owns the result and should delete it when done. + * @return a cloned DateInterval + * @stable ICU 4.0 + */ + virtual DateInterval* clone() const; + +private: + /** + * Default constructor, not implemented. + */ + DateInterval(); + + UDate fromDate; + UDate toDate; + +} ;// end class DateInterval + + +inline UDate +DateInterval::getFromDate() const { + return fromDate; +} + + +inline UDate +DateInterval::getToDate() const { + return toDate; +} + + +inline UBool +DateInterval::operator!=(const DateInterval& other) const { + return ( !operator==(other) ); +} + + +U_NAMESPACE_END + +#endif /* U_SHOW_CPLUSPLUS_API */ + +#endif diff --git a/thirdparty/icu4c/common/unicode/edits.h b/thirdparty/icu4c/common/unicode/edits.h new file mode 100644 index 0000000000..bfa07fa676 --- /dev/null +++ b/thirdparty/icu4c/common/unicode/edits.h @@ -0,0 +1,531 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// edits.h +// created: 2016dec30 Markus W. Scherer + +#ifndef __EDITS_H__ +#define __EDITS_H__ + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + +#include "unicode/uobject.h" + +/** + * \file + * \brief C++ API: C++ class Edits for low-level string transformations on styled text. + */ + +U_NAMESPACE_BEGIN + +class UnicodeString; + +/** + * Records lengths of string edits but not replacement text. Supports replacements, insertions, deletions + * in linear progression. Does not support moving/reordering of text. + * + * There are two types of edits: <em>change edits</em> and <em>no-change edits</em>. Add edits to + * instances of this class using {@link #addReplace(int32_t, int32_t)} (for change edits) and + * {@link #addUnchanged(int32_t)} (for no-change edits). Change edits are retained with full granularity, + * whereas adjacent no-change edits are always merged together. In no-change edits, there is a one-to-one + * mapping between code points in the source and destination strings. + * + * After all edits have been added, instances of this class should be considered immutable, and an + * {@link Edits::Iterator} can be used for queries. + * + * There are four flavors of Edits::Iterator: + * + * <ul> + * <li>{@link #getFineIterator()} retains full granularity of change edits. + * <li>{@link #getFineChangesIterator()} retains full granularity of change edits, and when calling + * next() on the iterator, skips over no-change edits (unchanged regions). + * <li>{@link #getCoarseIterator()} treats adjacent change edits as a single edit. (Adjacent no-change + * edits are automatically merged during the construction phase.) + * <li>{@link #getCoarseChangesIterator()} treats adjacent change edits as a single edit, and when + * calling next() on the iterator, skips over no-change edits (unchanged regions). + * </ul> + * + * For example, consider the string "abcßDeF", which case-folds to "abcssdef". This string has the + * following fine edits: + * <ul> + * <li>abc ⇨ abc (no-change) + * <li>ß ⇨ ss (change) + * <li>D ⇨ d (change) + * <li>e ⇨ e (no-change) + * <li>F ⇨ f (change) + * </ul> + * and the following coarse edits (note how adjacent change edits get merged together): + * <ul> + * <li>abc ⇨ abc (no-change) + * <li>ßD ⇨ ssd (change) + * <li>e ⇨ e (no-change) + * <li>F ⇨ f (change) + * </ul> + * + * The "fine changes" and "coarse changes" iterators will step through only the change edits when their + * `Edits::Iterator::next()` methods are called. They are identical to the non-change iterators when + * their `Edits::Iterator::findSourceIndex()` or `Edits::Iterator::findDestinationIndex()` + * methods are used to walk through the string. + * + * For examples of how to use this class, see the test `TestCaseMapEditsIteratorDocs` in + * UCharacterCaseTest.java. + * + * An Edits object tracks a separate UErrorCode, but ICU string transformation functions + * (e.g., case mapping functions) merge any such errors into their API's UErrorCode. + * + * @stable ICU 59 + */ +class U_COMMON_API Edits U_FINAL : public UMemory { +public: + /** + * Constructs an empty object. + * @stable ICU 59 + */ + Edits() : + array(stackArray), capacity(STACK_CAPACITY), length(0), delta(0), numChanges(0), + errorCode_(U_ZERO_ERROR) {} + /** + * Copy constructor. + * @param other source edits + * @stable ICU 60 + */ + Edits(const Edits &other) : + array(stackArray), capacity(STACK_CAPACITY), length(other.length), + delta(other.delta), numChanges(other.numChanges), + errorCode_(other.errorCode_) { + copyArray(other); + } + /** + * Move constructor, might leave src empty. + * This object will have the same contents that the source object had. + * @param src source edits + * @stable ICU 60 + */ + Edits(Edits &&src) U_NOEXCEPT : + array(stackArray), capacity(STACK_CAPACITY), length(src.length), + delta(src.delta), numChanges(src.numChanges), + errorCode_(src.errorCode_) { + moveArray(src); + } + + /** + * Destructor. + * @stable ICU 59 + */ + ~Edits(); + + /** + * Assignment operator. + * @param other source edits + * @return *this + * @stable ICU 60 + */ + Edits &operator=(const Edits &other); + + /** + * Move assignment operator, might leave src empty. + * This object will have the same contents that the source object had. + * The behavior is undefined if *this and src are the same object. + * @param src source edits + * @return *this + * @stable ICU 60 + */ + Edits &operator=(Edits &&src) U_NOEXCEPT; + + /** + * Resets the data but may not release memory. + * @stable ICU 59 + */ + void reset() U_NOEXCEPT; + + /** + * Adds a no-change edit: a record for an unchanged segment of text. + * Normally called from inside ICU string transformation functions, not user code. + * @stable ICU 59 + */ + void addUnchanged(int32_t unchangedLength); + /** + * Adds a change edit: a record for a text replacement/insertion/deletion. + * Normally called from inside ICU string transformation functions, not user code. + * @stable ICU 59 + */ + void addReplace(int32_t oldLength, int32_t newLength); + /** + * Sets the UErrorCode if an error occurred while recording edits. + * Preserves older error codes in the outErrorCode. + * Normally called from inside ICU string transformation functions, not user code. + * @param outErrorCode Set to an error code if it does not contain one already + * and an error occurred while recording edits. + * Otherwise unchanged. + * @return true if U_FAILURE(outErrorCode) + * @stable ICU 59 + */ + UBool copyErrorTo(UErrorCode &outErrorCode) const; + + /** + * How much longer is the new text compared with the old text? + * @return new length minus old length + * @stable ICU 59 + */ + int32_t lengthDelta() const { return delta; } + /** + * @return true if there are any change edits + * @stable ICU 59 + */ + UBool hasChanges() const { return numChanges != 0; } + + /** + * @return the number of change edits + * @stable ICU 60 + */ + int32_t numberOfChanges() const { return numChanges; } + + /** + * Access to the list of edits. + * + * At any moment in time, an instance of this class points to a single edit: a "window" into a span + * of the source string and the corresponding span of the destination string. The source string span + * starts at {@link #sourceIndex()} and runs for {@link #oldLength()} chars; the destination string + * span starts at {@link #destinationIndex()} and runs for {@link #newLength()} chars. + * + * The iterator can be moved between edits using the `next()`, `findSourceIndex(int32_t, UErrorCode &)`, + * and `findDestinationIndex(int32_t, UErrorCode &)` methods. + * Calling any of these methods mutates the iterator to make it point to the corresponding edit. + * + * For more information, see the documentation for {@link Edits}. + * + * @see getCoarseIterator + * @see getFineIterator + * @stable ICU 59 + */ + struct U_COMMON_API Iterator U_FINAL : public UMemory { + /** + * Default constructor, empty iterator. + * @stable ICU 60 + */ + Iterator() : + array(nullptr), index(0), length(0), + remaining(0), onlyChanges_(false), coarse(false), + dir(0), changed(false), oldLength_(0), newLength_(0), + srcIndex(0), replIndex(0), destIndex(0) {} + /** + * Copy constructor. + * @stable ICU 59 + */ + Iterator(const Iterator &other) = default; + /** + * Assignment operator. + * @stable ICU 59 + */ + Iterator &operator=(const Iterator &other) = default; + + /** + * Advances the iterator to the next edit. + * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test, + * or else the function returns immediately. Check for U_FAILURE() + * on output or use with function chaining. (See User Guide for details.) + * @return true if there is another edit + * @stable ICU 59 + */ + UBool next(UErrorCode &errorCode) { return next(onlyChanges_, errorCode); } + + /** + * Moves the iterator to the edit that contains the source index. + * The source index may be found in a no-change edit + * even if normal iteration would skip no-change edits. + * Normal iteration can continue from a found edit. + * + * The iterator state before this search logically does not matter. + * (It may affect the performance of the search.) + * + * The iterator state after this search is undefined + * if the source index is out of bounds for the source string. + * + * @param i source index + * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test, + * or else the function returns immediately. Check for U_FAILURE() + * on output or use with function chaining. (See User Guide for details.) + * @return true if the edit for the source index was found + * @stable ICU 59 + */ + UBool findSourceIndex(int32_t i, UErrorCode &errorCode) { + return findIndex(i, true, errorCode) == 0; + } + + /** + * Moves the iterator to the edit that contains the destination index. + * The destination index may be found in a no-change edit + * even if normal iteration would skip no-change edits. + * Normal iteration can continue from a found edit. + * + * The iterator state before this search logically does not matter. + * (It may affect the performance of the search.) + * + * The iterator state after this search is undefined + * if the source index is out of bounds for the source string. + * + * @param i destination index + * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test, + * or else the function returns immediately. Check for U_FAILURE() + * on output or use with function chaining. (See User Guide for details.) + * @return true if the edit for the destination index was found + * @stable ICU 60 + */ + UBool findDestinationIndex(int32_t i, UErrorCode &errorCode) { + return findIndex(i, false, errorCode) == 0; + } + + /** + * Computes the destination index corresponding to the given source index. + * If the source index is inside a change edit (not at its start), + * then the destination index at the end of that edit is returned, + * since there is no information about index mapping inside a change edit. + * + * (This means that indexes to the start and middle of an edit, + * for example around a grapheme cluster, are mapped to indexes + * encompassing the entire edit. + * The alternative, mapping an interior index to the start, + * would map such an interval to an empty one.) + * + * This operation will usually but not always modify this object. + * The iterator state after this search is undefined. + * + * @param i source index + * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test, + * or else the function returns immediately. Check for U_FAILURE() + * on output or use with function chaining. (See User Guide for details.) + * @return destination index; undefined if i is not 0..string length + * @stable ICU 60 + */ + int32_t destinationIndexFromSourceIndex(int32_t i, UErrorCode &errorCode); + + /** + * Computes the source index corresponding to the given destination index. + * If the destination index is inside a change edit (not at its start), + * then the source index at the end of that edit is returned, + * since there is no information about index mapping inside a change edit. + * + * (This means that indexes to the start and middle of an edit, + * for example around a grapheme cluster, are mapped to indexes + * encompassing the entire edit. + * The alternative, mapping an interior index to the start, + * would map such an interval to an empty one.) + * + * This operation will usually but not always modify this object. + * The iterator state after this search is undefined. + * + * @param i destination index + * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test, + * or else the function returns immediately. Check for U_FAILURE() + * on output or use with function chaining. (See User Guide for details.) + * @return source index; undefined if i is not 0..string length + * @stable ICU 60 + */ + int32_t sourceIndexFromDestinationIndex(int32_t i, UErrorCode &errorCode); + + /** + * Returns whether the edit currently represented by the iterator is a change edit. + * + * @return true if this edit replaces oldLength() units with newLength() different ones. + * false if oldLength units remain unchanged. + * @stable ICU 59 + */ + UBool hasChange() const { return changed; } + + /** + * The length of the current span in the source string, which starts at {@link #sourceIndex}. + * + * @return the number of units in the original string which are replaced or remain unchanged. + * @stable ICU 59 + */ + int32_t oldLength() const { return oldLength_; } + + /** + * The length of the current span in the destination string, which starts at + * {@link #destinationIndex}, or in the replacement string, which starts at + * {@link #replacementIndex}. + * + * @return the number of units in the modified string, if hasChange() is true. + * Same as oldLength if hasChange() is false. + * @stable ICU 59 + */ + int32_t newLength() const { return newLength_; } + + /** + * The start index of the current span in the source string; the span has length + * {@link #oldLength}. + * + * @return the current index into the source string + * @stable ICU 59 + */ + int32_t sourceIndex() const { return srcIndex; } + + /** + * The start index of the current span in the replacement string; the span has length + * {@link #newLength}. Well-defined only if the current edit is a change edit. + * + * The *replacement string* is the concatenation of all substrings of the destination + * string corresponding to change edits. + * + * This method is intended to be used together with operations that write only replacement + * characters (e.g. operations specifying the \ref U_OMIT_UNCHANGED_TEXT option). + * The source string can then be modified in-place. + * + * @return the current index into the replacement-characters-only string, + * not counting unchanged spans + * @stable ICU 59 + */ + int32_t replacementIndex() const { + // TODO: Throw an exception if we aren't in a change edit? + return replIndex; + } + + /** + * The start index of the current span in the destination string; the span has length + * {@link #newLength}. + * + * @return the current index into the full destination string + * @stable ICU 59 + */ + int32_t destinationIndex() const { return destIndex; } + +#ifndef U_HIDE_INTERNAL_API + /** + * A string representation of the current edit represented by the iterator for debugging. You + * should not depend on the contents of the return string. + * @internal + */ + UnicodeString& toString(UnicodeString& appendTo) const; +#endif // U_HIDE_INTERNAL_API + + private: + friend class Edits; + + Iterator(const uint16_t *a, int32_t len, UBool oc, UBool crs); + + int32_t readLength(int32_t head); + void updateNextIndexes(); + void updatePreviousIndexes(); + UBool noNext(); + UBool next(UBool onlyChanges, UErrorCode &errorCode); + UBool previous(UErrorCode &errorCode); + /** @return -1: error or i<0; 0: found; 1: i>=string length */ + int32_t findIndex(int32_t i, UBool findSource, UErrorCode &errorCode); + + const uint16_t *array; + int32_t index, length; + // 0 if we are not within compressed equal-length changes. + // Otherwise the number of remaining changes, including the current one. + int32_t remaining; + UBool onlyChanges_, coarse; + + int8_t dir; // iteration direction: back(<0), initial(0), forward(>0) + UBool changed; + int32_t oldLength_, newLength_; + int32_t srcIndex, replIndex, destIndex; + }; + + /** + * Returns an Iterator for coarse-grained change edits + * (adjacent change edits are treated as one). + * Can be used to perform simple string updates. + * Skips no-change edits. + * @return an Iterator that merges adjacent changes. + * @stable ICU 59 + */ + Iterator getCoarseChangesIterator() const { + return Iterator(array, length, true, true); + } + + /** + * Returns an Iterator for coarse-grained change and no-change edits + * (adjacent change edits are treated as one). + * Can be used to perform simple string updates. + * Adjacent change edits are treated as one edit. + * @return an Iterator that merges adjacent changes. + * @stable ICU 59 + */ + Iterator getCoarseIterator() const { + return Iterator(array, length, false, true); + } + + /** + * Returns an Iterator for fine-grained change edits + * (full granularity of change edits is retained). + * Can be used for modifying styled text. + * Skips no-change edits. + * @return an Iterator that separates adjacent changes. + * @stable ICU 59 + */ + Iterator getFineChangesIterator() const { + return Iterator(array, length, true, false); + } + + /** + * Returns an Iterator for fine-grained change and no-change edits + * (full granularity of change edits is retained). + * Can be used for modifying styled text. + * @return an Iterator that separates adjacent changes. + * @stable ICU 59 + */ + Iterator getFineIterator() const { + return Iterator(array, length, false, false); + } + + /** + * Merges the two input Edits and appends the result to this object. + * + * Consider two string transformations (for example, normalization and case mapping) + * where each records Edits in addition to writing an output string.<br> + * Edits ab reflect how substrings of input string a + * map to substrings of intermediate string b.<br> + * Edits bc reflect how substrings of intermediate string b + * map to substrings of output string c.<br> + * This function merges ab and bc such that the additional edits + * recorded in this object reflect how substrings of input string a + * map to substrings of output string c. + * + * If unrelated Edits are passed in where the output string of the first + * has a different length than the input string of the second, + * then a U_ILLEGAL_ARGUMENT_ERROR is reported. + * + * @param ab reflects how substrings of input string a + * map to substrings of intermediate string b. + * @param bc reflects how substrings of intermediate string b + * map to substrings of output string c. + * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test, + * or else the function returns immediately. Check for U_FAILURE() + * on output or use with function chaining. (See User Guide for details.) + * @return *this, with the merged edits appended + * @stable ICU 60 + */ + Edits &mergeAndAppend(const Edits &ab, const Edits &bc, UErrorCode &errorCode); + +private: + void releaseArray() U_NOEXCEPT; + Edits ©Array(const Edits &other); + Edits &moveArray(Edits &src) U_NOEXCEPT; + + void setLastUnit(int32_t last) { array[length - 1] = (uint16_t)last; } + int32_t lastUnit() const { return length > 0 ? array[length - 1] : 0xffff; } + + void append(int32_t r); + UBool growArray(); + + static const int32_t STACK_CAPACITY = 100; + uint16_t *array; + int32_t capacity; + int32_t length; + int32_t delta; + int32_t numChanges; + UErrorCode errorCode_; + uint16_t stackArray[STACK_CAPACITY]; +}; + +U_NAMESPACE_END + +#endif /* U_SHOW_CPLUSPLUS_API */ + +#endif // __EDITS_H__ diff --git a/thirdparty/icu4c/common/unicode/enumset.h b/thirdparty/icu4c/common/unicode/enumset.h new file mode 100644 index 0000000000..bde8c455c0 --- /dev/null +++ b/thirdparty/icu4c/common/unicode/enumset.h @@ -0,0 +1,69 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 2012,2014 International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +*/ + +/** + * \file + * \brief C++: internal template EnumSet<> + */ + +#ifndef ENUMSET_H +#define ENUMSET_H + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN + +/* Can't use #ifndef U_HIDE_INTERNAL_API for the entire EnumSet class, needed in .h file declarations */ +/** + * enum bitset for boolean fields. Similar to Java EnumSet<>. + * Needs to range check. Used for private instance variables. + * @internal + * \cond + */ +template<typename T, uint32_t minValue, uint32_t limitValue> +class EnumSet { +public: + inline EnumSet() : fBools(0) {} + inline EnumSet(const EnumSet<T,minValue,limitValue>& other) : fBools(other.fBools) {} + inline ~EnumSet() {} +#ifndef U_HIDE_INTERNAL_API + inline void clear() { fBools=0; } + inline void add(T toAdd) { set(toAdd, 1); } + inline void remove(T toRemove) { set(toRemove, 0); } + inline int32_t contains(T toCheck) const { return get(toCheck); } + inline void set(T toSet, int32_t v) { fBools=(fBools&(~flag(toSet)))|(v?(flag(toSet)):0); } + inline int32_t get(T toCheck) const { return (fBools & flag(toCheck))?1:0; } + inline UBool isValidEnum(T toCheck) const { return (toCheck>=minValue&&toCheck<limitValue); } + inline UBool isValidValue(int32_t v) const { return (v==0||v==1); } + inline const EnumSet<T,minValue,limitValue>& operator=(const EnumSet<T,minValue,limitValue>& other) { + fBools = other.fBools; + return *this; + } + + inline uint32_t getAll() const { + return fBools; + } +#endif /* U_HIDE_INTERNAL_API */ + +private: + inline uint32_t flag(T toCheck) const { return (1<<(toCheck-minValue)); } +private: + uint32_t fBools; +}; + +/** \endcond */ + +U_NAMESPACE_END + +#endif /* U_SHOW_CPLUSPLUS_API */ +#endif /* ENUMSET_H */ diff --git a/thirdparty/icu4c/common/unicode/errorcode.h b/thirdparty/icu4c/common/unicode/errorcode.h new file mode 100644 index 0000000000..fe7b518323 --- /dev/null +++ b/thirdparty/icu4c/common/unicode/errorcode.h @@ -0,0 +1,144 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2009-2011, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: errorcode.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2009mar10 +* created by: Markus W. Scherer +*/ + +#ifndef __ERRORCODE_H__ +#define __ERRORCODE_H__ + +/** + * \file + * \brief C++ API: ErrorCode class intended to make it easier to use + * ICU C and C++ APIs from C++ user code. + */ + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + +#include "unicode/uobject.h" + +U_NAMESPACE_BEGIN + +/** + * Wrapper class for UErrorCode, with conversion operators for direct use + * in ICU C and C++ APIs. + * Intended to be used as a base class, where a subclass overrides + * the handleFailure() function so that it throws an exception, + * does an assert(), logs an error, etc. + * This is not an abstract base class. This class can be used and instantiated + * by itself, although it will be more useful when subclassed. + * + * Features: + * - The constructor initializes the internal UErrorCode to U_ZERO_ERROR, + * removing one common source of errors. + * - Same use in C APIs taking a UErrorCode * (pointer) + * and C++ taking UErrorCode & (reference) via conversion operators. + * - Possible automatic checking for success when it goes out of scope. + * + * Note: For automatic checking for success in the destructor, a subclass + * must implement such logic in its own destructor because the base class + * destructor cannot call a subclass function (like handleFailure()). + * The ErrorCode base class destructor does nothing. + * + * Note also: While it is possible for a destructor to throw an exception, + * it is generally unsafe to do so. This means that in a subclass the destructor + * and the handleFailure() function may need to take different actions. + * + * Sample code: + * \code + * class IcuErrorCode: public icu::ErrorCode { + * public: + * virtual ~IcuErrorCode() { // should be defined in .cpp as "key function" + * // Safe because our handleFailure() does not throw exceptions. + * if(isFailure()) { handleFailure(); } + * } + * protected: + * virtual void handleFailure() const { + * log_failure(u_errorName(errorCode)); + * exit(errorCode); + * } + * }; + * IcuErrorCode error_code; + * UConverter *cnv = ucnv_open("Shift-JIS", error_code); + * length = ucnv_fromUChars(dest, capacity, src, length, error_code); + * ucnv_close(cnv); + * // IcuErrorCode destructor checks for success. + * \endcode + * + * @stable ICU 4.2 + */ +class U_COMMON_API ErrorCode: public UMemory { +public: + /** + * Default constructor. Initializes its UErrorCode to U_ZERO_ERROR. + * @stable ICU 4.2 + */ + ErrorCode() : errorCode(U_ZERO_ERROR) {} + /** Destructor, does nothing. See class documentation for details. @stable ICU 4.2 */ + virtual ~ErrorCode(); + /** Conversion operator, returns a reference. @stable ICU 4.2 */ + operator UErrorCode & () { return errorCode; } + /** Conversion operator, returns a pointer. @stable ICU 4.2 */ + operator UErrorCode * () { return &errorCode; } + /** Tests for U_SUCCESS(). @stable ICU 4.2 */ + UBool isSuccess() const { return U_SUCCESS(errorCode); } + /** Tests for U_FAILURE(). @stable ICU 4.2 */ + UBool isFailure() const { return U_FAILURE(errorCode); } + /** Returns the UErrorCode value. @stable ICU 4.2 */ + UErrorCode get() const { return errorCode; } + /** Sets the UErrorCode value. @stable ICU 4.2 */ + void set(UErrorCode value) { errorCode=value; } + /** Returns the UErrorCode value and resets it to U_ZERO_ERROR. @stable ICU 4.2 */ + UErrorCode reset(); + /** + * Asserts isSuccess(). + * In other words, this method checks for a failure code, + * and the base class handles it like this: + * \code + * if(isFailure()) { handleFailure(); } + * \endcode + * @stable ICU 4.4 + */ + void assertSuccess() const; + /** + * Return a string for the UErrorCode value. + * The string will be the same as the name of the error code constant + * in the UErrorCode enum. + * @stable ICU 4.4 + */ + const char* errorName() const; + +protected: + /** + * Internal UErrorCode, accessible to subclasses. + * @stable ICU 4.2 + */ + UErrorCode errorCode; + /** + * Called by assertSuccess() if isFailure() is true. + * A subclass should override this function to deal with a failure code: + * Throw an exception, log an error, terminate the program, or similar. + * @stable ICU 4.2 + */ + virtual void handleFailure() const {} +}; + +U_NAMESPACE_END + +#endif /* U_SHOW_CPLUSPLUS_API */ + +#endif // __ERRORCODE_H__ diff --git a/thirdparty/icu4c/common/unicode/filteredbrk.h b/thirdparty/icu4c/common/unicode/filteredbrk.h new file mode 100644 index 0000000000..8b07e39ae1 --- /dev/null +++ b/thirdparty/icu4c/common/unicode/filteredbrk.h @@ -0,0 +1,152 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************** +* Copyright (C) 1997-2015, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************** +*/ + +#ifndef FILTEREDBRK_H +#define FILTEREDBRK_H + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + +#include "unicode/brkiter.h" + +#if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION + +U_NAMESPACE_BEGIN + +/** + * \file + * \brief C++ API: FilteredBreakIteratorBuilder + */ + +/** + * The BreakIteratorFilter is used to modify the behavior of a BreakIterator + * by constructing a new BreakIterator which suppresses certain segment boundaries. + * See http://www.unicode.org/reports/tr35/tr35-general.html#Segmentation_Exceptions . + * For example, a typical English Sentence Break Iterator would break on the space + * in the string "Mr. Smith" (resulting in two segments), + * but with "Mr." as an exception, a filtered break iterator + * would consider the string "Mr. Smith" to be a single segment. + * + * @stable ICU 56 + */ +class U_COMMON_API FilteredBreakIteratorBuilder : public UObject { + public: + /** + * destructor. + * @stable ICU 56 + */ + virtual ~FilteredBreakIteratorBuilder(); + + /** + * Construct a FilteredBreakIteratorBuilder based on rules in a locale. + * The rules are taken from CLDR exception data for the locale, + * see http://www.unicode.org/reports/tr35/tr35-general.html#Segmentation_Exceptions + * This is the equivalent of calling createInstance(UErrorCode&) + * and then repeatedly calling addNoBreakAfter(...) with the contents + * of the CLDR exception data. + * @param where the locale. + * @param status The error code. + * @return the new builder + * @stable ICU 56 + */ + static FilteredBreakIteratorBuilder *createInstance(const Locale& where, UErrorCode& status); + +#ifndef U_HIDE_DEPRECATED_API + /** + * This function has been deprecated in favor of createEmptyInstance, which has + * identical behavior. + * @param status The error code. + * @return the new builder + * @deprecated ICU 60 use createEmptyInstance instead + * @see createEmptyInstance() + */ + static FilteredBreakIteratorBuilder *createInstance(UErrorCode &status); +#endif /* U_HIDE_DEPRECATED_API */ + + /** + * Construct an empty FilteredBreakIteratorBuilder. + * In this state, it will not suppress any segment boundaries. + * @param status The error code. + * @return the new builder + * @stable ICU 60 + */ + static FilteredBreakIteratorBuilder *createEmptyInstance(UErrorCode &status); + + /** + * Suppress a certain string from being the end of a segment. + * For example, suppressing "Mr.", then segments ending in "Mr." will not be returned + * by the iterator. + * @param string the string to suppress, such as "Mr." + * @param status error code + * @return returns true if the string was not present and now added, + * false if the call was a no-op because the string was already being suppressed. + * @stable ICU 56 + */ + virtual UBool suppressBreakAfter(const UnicodeString& string, UErrorCode& status) = 0; + + /** + * Stop suppressing a certain string from being the end of the segment. + * This function does not create any new segment boundaries, but only serves to un-do + * the effect of earlier calls to suppressBreakAfter, or to un-do the effect of + * locale data which may be suppressing certain strings. + * @param string the exception to remove + * @param status error code + * @return returns true if the string was present and now removed, + * false if the call was a no-op because the string was not being suppressed. + * @stable ICU 56 + */ + virtual UBool unsuppressBreakAfter(const UnicodeString& string, UErrorCode& status) = 0; + +#ifndef U_FORCE_HIDE_DEPRECATED_API + /** + * This function has been deprecated in favor of wrapIteratorWithFilter() + * The behavior is identical. + * @param adoptBreakIterator the break iterator to adopt + * @param status error code + * @return the new BreakIterator, owned by the caller. + * @deprecated ICU 60 use wrapIteratorWithFilter() instead + * @see wrapBreakIteratorWithFilter() + */ + virtual BreakIterator *build(BreakIterator* adoptBreakIterator, UErrorCode& status) = 0; +#endif // U_FORCE_HIDE_DEPRECATED_API + + /** + * Wrap (adopt) an existing break iterator in a new filtered instance. + * The resulting BreakIterator is owned by the caller. + * The BreakIteratorFilter may be destroyed before the BreakIterator is destroyed. + * Note that the adoptBreakIterator is adopted by the new BreakIterator + * and should no longer be used by the caller. + * The FilteredBreakIteratorBuilder may be reused. + * This function is an alias for build() + * @param adoptBreakIterator the break iterator to adopt + * @param status error code + * @return the new BreakIterator, owned by the caller. + * @stable ICU 60 + */ + inline BreakIterator *wrapIteratorWithFilter(BreakIterator* adoptBreakIterator, UErrorCode& status) { + return build(adoptBreakIterator, status); + } + + protected: + /** + * For subclass use + * @stable ICU 56 + */ + FilteredBreakIteratorBuilder(); +}; + + +U_NAMESPACE_END + +#endif // #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION + +#endif /* U_SHOW_CPLUSPLUS_API */ + +#endif // #ifndef FILTEREDBRK_H diff --git a/thirdparty/icu4c/common/unicode/icudataver.h b/thirdparty/icu4c/common/unicode/icudataver.h new file mode 100644 index 0000000000..f218ed8ebc --- /dev/null +++ b/thirdparty/icu4c/common/unicode/icudataver.h @@ -0,0 +1,43 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 2009-2013, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +*/ + + +/** + * \file + * \brief C API: access to ICU Data Version number + */ + +#ifndef __ICU_DATA_VER_H__ +#define __ICU_DATA_VER_H__ + +#include "unicode/utypes.h" + +/** + * @stable ICU 49 + */ +#define U_ICU_VERSION_BUNDLE "icuver" + +/** + * @stable ICU 49 + */ +#define U_ICU_DATA_KEY "DataVersion" + +/** + * Retrieves the data version from icuver and stores it in dataVersionFillin. + * + * @param dataVersionFillin icuver data version information to be filled in if not-null + * @param status stores the error code from the calls to resource bundle + * + * @stable ICU 49 + */ +U_CAPI void U_EXPORT2 u_getDataVersion(UVersionInfo dataVersionFillin, UErrorCode *status); + +#endif diff --git a/thirdparty/icu4c/common/unicode/icuplug.h b/thirdparty/icu4c/common/unicode/icuplug.h new file mode 100644 index 0000000000..52f810da57 --- /dev/null +++ b/thirdparty/icu4c/common/unicode/icuplug.h @@ -0,0 +1,388 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 2009-2015, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* +* FILE NAME : icuplug.h +* +* Date Name Description +* 10/29/2009 sl New. +****************************************************************************** +*/ + +/** + * \file + * \brief C API: ICU Plugin API + * + * <h2>C API: ICU Plugin API</h2> + * + * <p>C API allowing run-time loadable modules that extend or modify ICU functionality.</p> + * + * <h3>Loading and Configuration</h3> + * + * <p>At ICU startup time, the environment variable "ICU_PLUGINS" will be + * queried for a directory name. If it is not set, the preprocessor symbol + * "DEFAULT_ICU_PLUGINS" will be checked for a default value.</p> + * + * <p>Within the above-named directory, the file "icuplugins##.txt" will be + * opened, if present, where ## is the major+minor number of the currently + * running ICU (such as, 44 for ICU 4.4, thus icuplugins44.txt)</p> + * + * <p>The configuration file has this format:</p> + * + * <ul> + * <li>Hash (#) begins a comment line</li> + * + * <li>Non-comment lines have two or three components: + * LIBRARYNAME ENTRYPOINT [ CONFIGURATION .. ]</li> + * + * <li>Tabs or spaces separate the three items.</li> + * + * <li>LIBRARYNAME is the name of a shared library, either a short name if + * it is on the loader path, or a full pathname.</li> + * + * <li>ENTRYPOINT is the short (undecorated) symbol name of the plugin's + * entrypoint, as above.</li> + * + * <li>CONFIGURATION is the entire rest of the line . It's passed as-is to + * the plugin.</li> + * </ul> + * + * <p>An example configuration file is, in its entirety:</p> + * + * \code + * # this is icuplugins44.txt + * testplug.dll myPlugin hello=world + * \endcode + * <p>Plugins are categorized as "high" or "low" level. Low level are those + * which must be run BEFORE high level plugins, and before any operations + * which cause ICU to be 'initialized'. If a plugin is low level but + * causes ICU to allocate memory or become initialized, that plugin is said + * to cause a 'level change'. </p> + * + * <p>At load time, ICU first queries all plugins to determine their level, + * then loads all 'low' plugins first, and then loads all 'high' plugins. + * Plugins are otherwise loaded in the order listed in the configuration file.</p> + * + * <h3>Implementing a Plugin</h3> + * \code + * U_CAPI UPlugTokenReturn U_EXPORT2 + * myPlugin (UPlugData *plug, UPlugReason reason, UErrorCode *status) { + * if(reason==UPLUG_REASON_QUERY) { + * uplug_setPlugName(plug, "Simple Plugin"); + * uplug_setPlugLevel(plug, UPLUG_LEVEL_HIGH); + * } else if(reason==UPLUG_REASON_LOAD) { + * ... Set up some ICU things here.... + * } else if(reason==UPLUG_REASON_UNLOAD) { + * ... unload, clean up ... + * } + * return UPLUG_TOKEN; + * } + * \endcode + * + * <p>The UPlugData* is an opaque pointer to the plugin-specific data, and is + * used in all other API calls.</p> + * + * <p>The API contract is:</p> + * <ol><li>The plugin MUST always return UPLUG_TOKEN as a return value- to + * indicate that it is a valid plugin.</li> + * + * <li>When the 'reason' parameter is set to UPLUG_REASON_QUERY, the + * plugin MUST call uplug_setPlugLevel() to indicate whether it is a high + * level or low level plugin.</li> + * + * <li>When the 'reason' parameter is UPLUG_REASON_QUERY, the plugin + * SHOULD call uplug_setPlugName to indicate a human readable plugin name.</li></ol> + * + * + * \internal ICU 4.4 Technology Preview + */ + + +#ifndef ICUPLUG_H +#define ICUPLUG_H + +#include "unicode/utypes.h" + + +#if UCONFIG_ENABLE_PLUGINS || defined(U_IN_DOXYGEN) + + + +/* === Basic types === */ + +#ifndef U_HIDE_INTERNAL_API +/** + * @{ + * Opaque structure passed to/from a plugin. + * use the APIs to access it. + * @internal ICU 4.4 Technology Preview + */ + +struct UPlugData; +typedef struct UPlugData UPlugData; + +/** @} */ + +/** + * Random Token to identify a valid ICU plugin. Plugins must return this + * from the entrypoint. + * @internal ICU 4.4 Technology Preview + */ +#define UPLUG_TOKEN 0x54762486 + +/** + * Max width of names, symbols, and configuration strings + * @internal ICU 4.4 Technology Preview + */ +#define UPLUG_NAME_MAX 100 + + +/** + * Return value from a plugin entrypoint. + * Must always be set to UPLUG_TOKEN + * @see UPLUG_TOKEN + * @internal ICU 4.4 Technology Preview + */ +typedef uint32_t UPlugTokenReturn; + +/** + * Reason code for the entrypoint's call + * @internal ICU 4.4 Technology Preview + */ +typedef enum { + UPLUG_REASON_QUERY = 0, /**< The plugin is being queried for info. **/ + UPLUG_REASON_LOAD = 1, /**< The plugin is being loaded. **/ + UPLUG_REASON_UNLOAD = 2, /**< The plugin is being unloaded. **/ + /** + * Number of known reasons. + * @internal The numeric value may change over time, see ICU ticket #12420. + */ + UPLUG_REASON_COUNT +} UPlugReason; + + +/** + * Level of plugin loading + * INITIAL: UNKNOWN + * QUERY: INVALID -> { LOW | HIGH } + * ERR -> INVALID + * @internal ICU 4.4 Technology Preview + */ +typedef enum { + UPLUG_LEVEL_INVALID = 0, /**< The plugin is invalid, hasn't called uplug_setLevel, or can't load. **/ + UPLUG_LEVEL_UNKNOWN = 1, /**< The plugin is waiting to be installed. **/ + UPLUG_LEVEL_LOW = 2, /**< The plugin must be called before u_init completes **/ + UPLUG_LEVEL_HIGH = 3, /**< The plugin can run at any time. **/ + /** + * Number of known levels. + * @internal The numeric value may change over time, see ICU ticket #12420. + */ + UPLUG_LEVEL_COUNT +} UPlugLevel; + +/** + * Entrypoint for an ICU plugin. + * @param plug the UPlugData handle. + * @param status the plugin's extended status code. + * @return A valid plugin must return UPLUG_TOKEN + * @internal ICU 4.4 Technology Preview + */ +typedef UPlugTokenReturn (U_EXPORT2 UPlugEntrypoint) ( + UPlugData *plug, + UPlugReason reason, + UErrorCode *status); + +/* === Needed for Implementing === */ + +/** + * Request that this plugin not be unloaded at cleanup time. + * This is appropriate for plugins which cannot be cleaned up. + * @see u_cleanup() + * @param plug plugin + * @param dontUnload set true if this plugin can't be unloaded + * @internal ICU 4.4 Technology Preview + */ +U_CAPI void U_EXPORT2 +uplug_setPlugNoUnload(UPlugData *plug, UBool dontUnload); + +/** + * Set the level of this plugin. + * @param plug plugin data handle + * @param level the level of this plugin + * @internal ICU 4.4 Technology Preview + */ +U_CAPI void U_EXPORT2 +uplug_setPlugLevel(UPlugData *plug, UPlugLevel level); + +/** + * Get the level of this plugin. + * @param plug plugin data handle + * @return the level of this plugin + * @internal ICU 4.4 Technology Preview + */ +U_CAPI UPlugLevel U_EXPORT2 +uplug_getPlugLevel(UPlugData *plug); + +/** + * Get the lowest level of plug which can currently load. + * For example, if UPLUG_LEVEL_LOW is returned, then low level plugins may load + * if UPLUG_LEVEL_HIGH is returned, then only high level plugins may load. + * @return the lowest level of plug which can currently load + * @internal ICU 4.4 Technology Preview + */ +U_CAPI UPlugLevel U_EXPORT2 +uplug_getCurrentLevel(void); + + +/** + * Get plug load status + * @return The error code of this plugin's load attempt. + * @internal ICU 4.4 Technology Preview + */ +U_CAPI UErrorCode U_EXPORT2 +uplug_getPlugLoadStatus(UPlugData *plug); + +/** + * Set the human-readable name of this plugin. + * @param plug plugin data handle + * @param name the name of this plugin. The first UPLUG_NAME_MAX characters willi be copied into a new buffer. + * @internal ICU 4.4 Technology Preview + */ +U_CAPI void U_EXPORT2 +uplug_setPlugName(UPlugData *plug, const char *name); + +/** + * Get the human-readable name of this plugin. + * @param plug plugin data handle + * @return the name of this plugin + * @internal ICU 4.4 Technology Preview + */ +U_CAPI const char * U_EXPORT2 +uplug_getPlugName(UPlugData *plug); + +/** + * Return the symbol name for this plugin, if known. + * @param plug plugin data handle + * @return the symbol name, or NULL + * @internal ICU 4.4 Technology Preview + */ +U_CAPI const char * U_EXPORT2 +uplug_getSymbolName(UPlugData *plug); + +/** + * Return the library name for this plugin, if known. + * @param plug plugin data handle + * @param status error code + * @return the library name, or NULL + * @internal ICU 4.4 Technology Preview + */ +U_CAPI const char * U_EXPORT2 +uplug_getLibraryName(UPlugData *plug, UErrorCode *status); + +/** + * Return the library used for this plugin, if known. + * Plugins could use this to load data out of their + * @param plug plugin data handle + * @return the library, or NULL + * @internal ICU 4.4 Technology Preview + */ +U_CAPI void * U_EXPORT2 +uplug_getLibrary(UPlugData *plug); + +/** + * Return the plugin-specific context data. + * @param plug plugin data handle + * @return the context, or NULL if not set + * @internal ICU 4.4 Technology Preview + */ +U_CAPI void * U_EXPORT2 +uplug_getContext(UPlugData *plug); + +/** + * Set the plugin-specific context data. + * @param plug plugin data handle + * @param context new context to set + * @internal ICU 4.4 Technology Preview + */ +U_CAPI void U_EXPORT2 +uplug_setContext(UPlugData *plug, void *context); + + +/** + * Get the configuration string, if available. + * The string is in the platform default codepage. + * @param plug plugin data handle + * @return configuration string, or else null. + * @internal ICU 4.4 Technology Preview + */ +U_CAPI const char * U_EXPORT2 +uplug_getConfiguration(UPlugData *plug); + +/** + * Return all currently installed plugins, from newest to oldest + * Usage Example: + * \code + * UPlugData *plug = NULL; + * while(plug=uplug_nextPlug(plug)) { + * ... do something with 'plug' ... + * } + * \endcode + * Not thread safe- do not call while plugs are added or removed. + * @param prior pass in 'NULL' to get the first (most recent) plug, + * otherwise pass the value returned on a prior call to uplug_nextPlug + * @return the next oldest plugin, or NULL if no more. + * @internal ICU 4.4 Technology Preview + */ +U_CAPI UPlugData* U_EXPORT2 +uplug_nextPlug(UPlugData *prior); + +/** + * Inject a plugin as if it were loaded from a library. + * This is useful for testing plugins. + * Note that it will have a 'NULL' library pointer associated + * with it, and therefore no llibrary will be closed at cleanup time. + * Low level plugins may not be able to load, as ordering can't be enforced. + * @param entrypoint entrypoint to install + * @param config user specified configuration string, if available, or NULL. + * @param status error result + * @return the new UPlugData associated with this plugin, or NULL if error. + * @internal ICU 4.4 Technology Preview + */ +U_CAPI UPlugData* U_EXPORT2 +uplug_loadPlugFromEntrypoint(UPlugEntrypoint *entrypoint, const char *config, UErrorCode *status); + + +/** + * Inject a plugin from a library, as if the information came from a config file. + * Low level plugins may not be able to load, and ordering can't be enforced. + * @param libName DLL name to load + * @param sym symbol of plugin (UPlugEntrypoint function) + * @param config configuration string, or NULL + * @param status error result + * @return the new UPlugData associated with this plugin, or NULL if error. + * @internal ICU 4.4 Technology Preview + */ +U_CAPI UPlugData* U_EXPORT2 +uplug_loadPlugFromLibrary(const char *libName, const char *sym, const char *config, UErrorCode *status); + +/** + * Remove a plugin. + * Will request the plugin to be unloaded, and close the library if needed + * @param plug plugin handle to close + * @param status error result + * @internal ICU 4.4 Technology Preview + */ +U_CAPI void U_EXPORT2 +uplug_removePlug(UPlugData *plug, UErrorCode *status); +#endif /* U_HIDE_INTERNAL_API */ + +#endif /* UCONFIG_ENABLE_PLUGINS */ + +#endif /* _ICUPLUG */ + diff --git a/thirdparty/icu4c/common/unicode/idna.h b/thirdparty/icu4c/common/unicode/idna.h new file mode 100644 index 0000000000..1305dc6048 --- /dev/null +++ b/thirdparty/icu4c/common/unicode/idna.h @@ -0,0 +1,330 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2010-2012, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* file name: idna.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2010mar05 +* created by: Markus W. Scherer +*/ + +#ifndef __IDNA_H__ +#define __IDNA_H__ + +/** + * \file + * \brief C++ API: Internationalizing Domain Names in Applications (IDNA) + */ + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + +#if !UCONFIG_NO_IDNA + +#include "unicode/bytestream.h" +#include "unicode/stringpiece.h" +#include "unicode/uidna.h" +#include "unicode/unistr.h" + +U_NAMESPACE_BEGIN + +class IDNAInfo; + +/** + * Abstract base class for IDNA processing. + * See http://www.unicode.org/reports/tr46/ + * and http://www.ietf.org/rfc/rfc3490.txt + * + * The IDNA class is not intended for public subclassing. + * + * This C++ API currently only implements UTS #46. + * The uidna.h C API implements both UTS #46 (functions using UIDNA service object) + * and IDNA2003 (functions that do not use a service object). + * @stable ICU 4.6 + */ +class U_COMMON_API IDNA : public UObject { +public: + /** + * Destructor. + * @stable ICU 4.6 + */ + ~IDNA(); + + /** + * Returns an IDNA instance which implements UTS #46. + * Returns an unmodifiable instance, owned by the caller. + * Cache it for multiple operations, and delete it when done. + * The instance is thread-safe, that is, it can be used concurrently. + * + * UTS #46 defines Unicode IDNA Compatibility Processing, + * updated to the latest version of Unicode and compatible with both + * IDNA2003 and IDNA2008. + * + * The worker functions use transitional processing, including deviation mappings, + * unless UIDNA_NONTRANSITIONAL_TO_ASCII or UIDNA_NONTRANSITIONAL_TO_UNICODE + * is used in which case the deviation characters are passed through without change. + * + * Disallowed characters are mapped to U+FFFD. + * + * For available options see the uidna.h header. + * Operations with the UTS #46 instance do not support the + * UIDNA_ALLOW_UNASSIGNED option. + * + * By default, the UTS #46 implementation allows all ASCII characters (as valid or mapped). + * When the UIDNA_USE_STD3_RULES option is used, ASCII characters other than + * letters, digits, hyphen (LDH) and dot/full stop are disallowed and mapped to U+FFFD. + * + * @param options Bit set to modify the processing and error checking. + * See option bit set values in uidna.h. + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return the UTS #46 IDNA instance, if successful + * @stable ICU 4.6 + */ + static IDNA * + createUTS46Instance(uint32_t options, UErrorCode &errorCode); + + /** + * Converts a single domain name label into its ASCII form for DNS lookup. + * If any processing step fails, then info.hasErrors() will be true and + * the result might not be an ASCII string. + * The label might be modified according to the types of errors. + * Labels with severe errors will be left in (or turned into) their Unicode form. + * + * The UErrorCode indicates an error only in exceptional cases, + * such as a U_MEMORY_ALLOCATION_ERROR. + * + * @param label Input domain name label + * @param dest Destination string object + * @param info Output container of IDNA processing details. + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return dest + * @stable ICU 4.6 + */ + virtual UnicodeString & + labelToASCII(const UnicodeString &label, UnicodeString &dest, + IDNAInfo &info, UErrorCode &errorCode) const = 0; + + /** + * Converts a single domain name label into its Unicode form for human-readable display. + * If any processing step fails, then info.hasErrors() will be true. + * The label might be modified according to the types of errors. + * + * The UErrorCode indicates an error only in exceptional cases, + * such as a U_MEMORY_ALLOCATION_ERROR. + * + * @param label Input domain name label + * @param dest Destination string object + * @param info Output container of IDNA processing details. + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return dest + * @stable ICU 4.6 + */ + virtual UnicodeString & + labelToUnicode(const UnicodeString &label, UnicodeString &dest, + IDNAInfo &info, UErrorCode &errorCode) const = 0; + + /** + * Converts a whole domain name into its ASCII form for DNS lookup. + * If any processing step fails, then info.hasErrors() will be true and + * the result might not be an ASCII string. + * The domain name might be modified according to the types of errors. + * Labels with severe errors will be left in (or turned into) their Unicode form. + * + * The UErrorCode indicates an error only in exceptional cases, + * such as a U_MEMORY_ALLOCATION_ERROR. + * + * @param name Input domain name + * @param dest Destination string object + * @param info Output container of IDNA processing details. + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return dest + * @stable ICU 4.6 + */ + virtual UnicodeString & + nameToASCII(const UnicodeString &name, UnicodeString &dest, + IDNAInfo &info, UErrorCode &errorCode) const = 0; + + /** + * Converts a whole domain name into its Unicode form for human-readable display. + * If any processing step fails, then info.hasErrors() will be true. + * The domain name might be modified according to the types of errors. + * + * The UErrorCode indicates an error only in exceptional cases, + * such as a U_MEMORY_ALLOCATION_ERROR. + * + * @param name Input domain name + * @param dest Destination string object + * @param info Output container of IDNA processing details. + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return dest + * @stable ICU 4.6 + */ + virtual UnicodeString & + nameToUnicode(const UnicodeString &name, UnicodeString &dest, + IDNAInfo &info, UErrorCode &errorCode) const = 0; + + // UTF-8 versions of the processing methods ---------------------------- *** + + /** + * Converts a single domain name label into its ASCII form for DNS lookup. + * UTF-8 version of labelToASCII(), same behavior. + * + * @param label Input domain name label + * @param dest Destination byte sink; Flush()ed if successful + * @param info Output container of IDNA processing details. + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return dest + * @stable ICU 4.6 + */ + virtual void + labelToASCII_UTF8(StringPiece label, ByteSink &dest, + IDNAInfo &info, UErrorCode &errorCode) const; + + /** + * Converts a single domain name label into its Unicode form for human-readable display. + * UTF-8 version of labelToUnicode(), same behavior. + * + * @param label Input domain name label + * @param dest Destination byte sink; Flush()ed if successful + * @param info Output container of IDNA processing details. + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return dest + * @stable ICU 4.6 + */ + virtual void + labelToUnicodeUTF8(StringPiece label, ByteSink &dest, + IDNAInfo &info, UErrorCode &errorCode) const; + + /** + * Converts a whole domain name into its ASCII form for DNS lookup. + * UTF-8 version of nameToASCII(), same behavior. + * + * @param name Input domain name + * @param dest Destination byte sink; Flush()ed if successful + * @param info Output container of IDNA processing details. + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return dest + * @stable ICU 4.6 + */ + virtual void + nameToASCII_UTF8(StringPiece name, ByteSink &dest, + IDNAInfo &info, UErrorCode &errorCode) const; + + /** + * Converts a whole domain name into its Unicode form for human-readable display. + * UTF-8 version of nameToUnicode(), same behavior. + * + * @param name Input domain name + * @param dest Destination byte sink; Flush()ed if successful + * @param info Output container of IDNA processing details. + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return dest + * @stable ICU 4.6 + */ + virtual void + nameToUnicodeUTF8(StringPiece name, ByteSink &dest, + IDNAInfo &info, UErrorCode &errorCode) const; +}; + +class UTS46; + +/** + * Output container for IDNA processing errors. + * The IDNAInfo class is not suitable for subclassing. + * @stable ICU 4.6 + */ +class U_COMMON_API IDNAInfo : public UMemory { +public: + /** + * Constructor for stack allocation. + * @stable ICU 4.6 + */ + IDNAInfo() : errors(0), labelErrors(0), isTransDiff(false), isBiDi(false), isOkBiDi(true) {} + /** + * Were there IDNA processing errors? + * @return true if there were processing errors + * @stable ICU 4.6 + */ + UBool hasErrors() const { return errors!=0; } + /** + * Returns a bit set indicating IDNA processing errors. + * See UIDNA_ERROR_... constants in uidna.h. + * @return bit set of processing errors + * @stable ICU 4.6 + */ + uint32_t getErrors() const { return errors; } + /** + * Returns true if transitional and nontransitional processing produce different results. + * This is the case when the input label or domain name contains + * one or more deviation characters outside a Punycode label (see UTS #46). + * <ul> + * <li>With nontransitional processing, such characters are + * copied to the destination string. + * <li>With transitional processing, such characters are + * mapped (sharp s/sigma) or removed (joiner/nonjoiner). + * </ul> + * @return true if transitional and nontransitional processing produce different results + * @stable ICU 4.6 + */ + UBool isTransitionalDifferent() const { return isTransDiff; } + +private: + friend class UTS46; + + IDNAInfo(const IDNAInfo &other); // no copying + IDNAInfo &operator=(const IDNAInfo &other); // no copying + + void reset() { + errors=labelErrors=0; + isTransDiff=false; + isBiDi=false; + isOkBiDi=true; + } + + uint32_t errors, labelErrors; + UBool isTransDiff; + UBool isBiDi; + UBool isOkBiDi; +}; + +U_NAMESPACE_END + +#endif // UCONFIG_NO_IDNA + +#endif /* U_SHOW_CPLUSPLUS_API */ + +#endif // __IDNA_H__ diff --git a/thirdparty/icu4c/common/unicode/localebuilder.h b/thirdparty/icu4c/common/unicode/localebuilder.h new file mode 100644 index 0000000000..27a894de10 --- /dev/null +++ b/thirdparty/icu4c/common/unicode/localebuilder.h @@ -0,0 +1,311 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +#ifndef __LOCALEBUILDER_H__ +#define __LOCALEBUILDER_H__ + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + +#include "unicode/locid.h" +#include "unicode/localematcher.h" +#include "unicode/stringpiece.h" +#include "unicode/uobject.h" + +/** + * \file + * \brief C++ API: Builder API for Locale + */ + +U_NAMESPACE_BEGIN +class CharString; + +/** + * <code>LocaleBuilder</code> is used to build instances of <code>Locale</code> + * from values configured by the setters. Unlike the <code>Locale</code> + * constructors, the <code>LocaleBuilder</code> checks if a value configured by a + * setter satisfies the syntax requirements defined by the <code>Locale</code> + * class. A <code>Locale</code> object created by a <code>LocaleBuilder</code> is + * well-formed and can be transformed to a well-formed IETF BCP 47 language tag + * without losing information. + * + * <p>The following example shows how to create a <code>Locale</code> object + * with the <code>LocaleBuilder</code>. + * <blockquote> + * <pre> + * UErrorCode status = U_ZERO_ERROR; + * Locale aLocale = LocaleBuilder() + * .setLanguage("sr") + * .setScript("Latn") + * .setRegion("RS") + * .build(status); + * if (U_SUCCESS(status)) { + * // ... + * } + * </pre> + * </blockquote> + * + * <p>LocaleBuilders can be reused; <code>clear()</code> resets all + * fields to their default values. + * + * <p>LocaleBuilder tracks errors in an internal UErrorCode. For all setters, + * except setLanguageTag and setLocale, LocaleBuilder will return immediately + * if the internal UErrorCode is in error state. + * To reset internal state and error code, call clear method. + * The setLanguageTag and setLocale method will first clear the internal + * UErrorCode, then track the error of the validation of the input parameter + * into the internal UErrorCode. + * + * @stable ICU 64 + */ +class U_COMMON_API LocaleBuilder : public UObject { +public: + /** + * Constructs an empty LocaleBuilder. The default value of all + * fields, extensions, and private use information is the + * empty string. + * + * @stable ICU 64 + */ + LocaleBuilder(); + + /** + * Destructor + * @stable ICU 64 + */ + virtual ~LocaleBuilder(); + + /** + * Resets the <code>LocaleBuilder</code> to match the provided + * <code>locale</code>. Existing state is discarded. + * + * <p>All fields of the locale must be well-formed. + * <p>This method clears the internal UErrorCode. + * + * @param locale the locale + * @return This builder. + * + * @stable ICU 64 + */ + LocaleBuilder& setLocale(const Locale& locale); + + /** + * Resets the LocaleBuilder to match the provided + * [Unicode Locale Identifier](http://www.unicode.org/reports/tr35/tr35.html#unicode_locale_id) . + * Discards the existing state. + * The empty string causes the builder to be reset, like {@link #clear}. + * Legacy language tags (marked as “Type: grandfathered” in BCP 47) + * are converted to their canonical form before being processed. + * Otherwise, the <code>language tag</code> must be well-formed, + * or else the build() method will later report an U_ILLEGAL_ARGUMENT_ERROR. + * + * <p>This method clears the internal UErrorCode. + * + * @param tag the language tag, defined as + * [unicode_locale_id](http://www.unicode.org/reports/tr35/tr35.html#unicode_locale_id). + * @return This builder. + * @stable ICU 64 + */ + LocaleBuilder& setLanguageTag(StringPiece tag); + + /** + * Sets the language. If <code>language</code> is the empty string, the + * language in this <code>LocaleBuilder</code> is removed. Otherwise, the + * <code>language</code> must be well-formed, or else the build() method will + * later report an U_ILLEGAL_ARGUMENT_ERROR. + * + * <p>The syntax of language value is defined as + * [unicode_language_subtag](http://www.unicode.org/reports/tr35/tr35.html#unicode_language_subtag). + * + * @param language the language + * @return This builder. + * @stable ICU 64 + */ + LocaleBuilder& setLanguage(StringPiece language); + + /** + * Sets the script. If <code>script</code> is the empty string, the script in + * this <code>LocaleBuilder</code> is removed. + * Otherwise, the <code>script</code> must be well-formed, or else the build() + * method will later report an U_ILLEGAL_ARGUMENT_ERROR. + * + * <p>The script value is a four-letter script code as + * [unicode_script_subtag](http://www.unicode.org/reports/tr35/tr35.html#unicode_script_subtag) + * defined by ISO 15924 + * + * @param script the script + * @return This builder. + * @stable ICU 64 + */ + LocaleBuilder& setScript(StringPiece script); + + /** + * Sets the region. If region is the empty string, the region in this + * <code>LocaleBuilder</code> is removed. Otherwise, the <code>region</code> + * must be well-formed, or else the build() method will later report an + * U_ILLEGAL_ARGUMENT_ERROR. + * + * <p>The region value is defined by + * [unicode_region_subtag](http://www.unicode.org/reports/tr35/tr35.html#unicode_region_subtag) + * as a two-letter ISO 3166 code or a three-digit UN M.49 area code. + * + * <p>The region value in the <code>Locale</code> created by the + * <code>LocaleBuilder</code> is always normalized to upper case. + * + * @param region the region + * @return This builder. + * @stable ICU 64 + */ + LocaleBuilder& setRegion(StringPiece region); + + /** + * Sets the variant. If variant is the empty string, the variant in this + * <code>LocaleBuilder</code> is removed. Otherwise, the <code>variant</code> + * must be well-formed, or else the build() method will later report an + * U_ILLEGAL_ARGUMENT_ERROR. + * + * <p><b>Note:</b> This method checks if <code>variant</code> + * satisfies the + * [unicode_variant_subtag](http://www.unicode.org/reports/tr35/tr35.html#unicode_variant_subtag) + * syntax requirements, and normalizes the value to lowercase letters. However, + * the <code>Locale</code> class does not impose any syntactic + * restriction on variant. To set an ill-formed variant, use a Locale constructor. + * If there are multiple unicode_variant_subtag, the caller must concatenate + * them with '-' as separator (ex: "foobar-fibar"). + * + * @param variant the variant + * @return This builder. + * @stable ICU 64 + */ + LocaleBuilder& setVariant(StringPiece variant); + + /** + * Sets the extension for the given key. If the value is the empty string, + * the extension is removed. Otherwise, the <code>key</code> and + * <code>value</code> must be well-formed, or else the build() method will + * later report an U_ILLEGAL_ARGUMENT_ERROR. + * + * <p><b>Note:</b> The key ('u') is used for the Unicode locale extension. + * Setting a value for this key replaces any existing Unicode locale key/type + * pairs with those defined in the extension. + * + * <p><b>Note:</b> The key ('x') is used for the private use code. To be + * well-formed, the value for this key needs only to have subtags of one to + * eight alphanumeric characters, not two to eight as in the general case. + * + * @param key the extension key + * @param value the extension value + * @return This builder. + * @stable ICU 64 + */ + LocaleBuilder& setExtension(char key, StringPiece value); + + /** + * Sets the Unicode locale keyword type for the given key. If the type + * StringPiece is constructed with a nullptr, the keyword is removed. + * If the type is the empty string, the keyword is set without type subtags. + * Otherwise, the key and type must be well-formed, or else the build() + * method will later report an U_ILLEGAL_ARGUMENT_ERROR. + * + * <p>Keys and types are converted to lower case. + * + * <p><b>Note</b>:Setting the 'u' extension via {@link #setExtension} + * replaces all Unicode locale keywords with those defined in the + * extension. + * + * @param key the Unicode locale key + * @param type the Unicode locale type + * @return This builder. + * @stable ICU 64 + */ + LocaleBuilder& setUnicodeLocaleKeyword( + StringPiece key, StringPiece type); + + /** + * Adds a unicode locale attribute, if not already present, otherwise + * has no effect. The attribute must not be empty string and must be + * well-formed or U_ILLEGAL_ARGUMENT_ERROR will be set to status + * during the build() call. + * + * @param attribute the attribute + * @return This builder. + * @stable ICU 64 + */ + LocaleBuilder& addUnicodeLocaleAttribute(StringPiece attribute); + + /** + * Removes a unicode locale attribute, if present, otherwise has no + * effect. The attribute must not be empty string and must be well-formed + * or U_ILLEGAL_ARGUMENT_ERROR will be set to status during the build() call. + * + * <p>Attribute comparison for removal is case-insensitive. + * + * @param attribute the attribute + * @return This builder. + * @stable ICU 64 + */ + LocaleBuilder& removeUnicodeLocaleAttribute(StringPiece attribute); + + /** + * Resets the builder to its initial, empty state. + * <p>This method clears the internal UErrorCode. + * + * @return this builder + * @stable ICU 64 + */ + LocaleBuilder& clear(); + + /** + * Resets the extensions to their initial, empty state. + * Language, script, region and variant are unchanged. + * + * @return this builder + * @stable ICU 64 + */ + LocaleBuilder& clearExtensions(); + + /** + * Returns an instance of <code>Locale</code> created from the fields set + * on this builder. + * If any set methods or during the build() call require memory allocation + * but fail U_MEMORY_ALLOCATION_ERROR will be set to status. + * If any of the fields set by the setters are not well-formed, the status + * will be set to U_ILLEGAL_ARGUMENT_ERROR. The state of the builder will + * not change after the build() call and the caller is free to keep using + * the same builder to build more locales. + * + * @return a new Locale + * @stable ICU 64 + */ + Locale build(UErrorCode& status); + + /** + * Sets the UErrorCode if an error occurred while recording sets. + * Preserves older error codes in the outErrorCode. + * @param outErrorCode Set to an error code that occurred while setting subtags. + * Unchanged if there is no such error or if outErrorCode + * already contained an error. + * @return true if U_FAILURE(outErrorCode) + * @stable ICU 65 + */ + UBool copyErrorTo(UErrorCode &outErrorCode) const; + +private: + friend class LocaleMatcher::Result; + + void copyExtensionsFrom(const Locale& src, UErrorCode& errorCode); + + UErrorCode status_; + char language_[9]; + char script_[5]; + char region_[4]; + CharString *variant_; // Pointer not object so we need not #include internal charstr.h. + icu::Locale *extensions_; // Pointer not object. Storage for all other fields. + +}; + +U_NAMESPACE_END + +#endif /* U_SHOW_CPLUSPLUS_API */ + +#endif // __LOCALEBUILDER_H__ diff --git a/thirdparty/icu4c/common/unicode/localematcher.h b/thirdparty/icu4c/common/unicode/localematcher.h new file mode 100644 index 0000000000..63a68b0b7f --- /dev/null +++ b/thirdparty/icu4c/common/unicode/localematcher.h @@ -0,0 +1,720 @@ +// © 2019 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// localematcher.h +// created: 2019may08 Markus W. Scherer + +#ifndef __LOCALEMATCHER_H__ +#define __LOCALEMATCHER_H__ + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + +#include "unicode/locid.h" +#include "unicode/stringpiece.h" +#include "unicode/uobject.h" + +/** + * \file + * \brief C++ API: Locale matcher: User's desired locales vs. application's supported locales. + */ + +/** + * Builder option for whether the language subtag or the script subtag is most important. + * + * @see LocaleMatcher::Builder#setFavorSubtag(ULocMatchFavorSubtag) + * @stable ICU 65 + */ +enum ULocMatchFavorSubtag { + /** + * Language differences are most important, then script differences, then region differences. + * (This is the default behavior.) + * + * @stable ICU 65 + */ + ULOCMATCH_FAVOR_LANGUAGE, + /** + * Makes script differences matter relatively more than language differences. + * + * @stable ICU 65 + */ + ULOCMATCH_FAVOR_SCRIPT +}; +#ifndef U_IN_DOXYGEN +typedef enum ULocMatchFavorSubtag ULocMatchFavorSubtag; +#endif + +/** + * Builder option for whether all desired locales are treated equally or + * earlier ones are preferred. + * + * @see LocaleMatcher::Builder#setDemotionPerDesiredLocale(ULocMatchDemotion) + * @stable ICU 65 + */ +enum ULocMatchDemotion { + /** + * All desired locales are treated equally. + * + * @stable ICU 65 + */ + ULOCMATCH_DEMOTION_NONE, + /** + * Earlier desired locales are preferred. + * + * <p>From each desired locale to the next, + * the distance to any supported locale is increased by an additional amount + * which is at least as large as most region mismatches. + * A later desired locale has to have a better match with some supported locale + * due to more than merely having the same region subtag. + * + * <p>For example: <code>Supported={en, sv} desired=[en-GB, sv]</code> + * yields <code>Result(en-GB, en)</code> because + * with the demotion of sv its perfect match is no better than + * the region distance between the earlier desired locale en-GB and en=en-US. + * + * <p>Notes: + * <ul> + * <li>In some cases, language and/or script differences can be as small as + * the typical region difference. (Example: sr-Latn vs. sr-Cyrl) + * <li>It is possible for certain region differences to be larger than usual, + * and larger than the demotion. + * (As of CLDR 35 there is no such case, but + * this is possible in future versions of the data.) + * </ul> + * + * @stable ICU 65 + */ + ULOCMATCH_DEMOTION_REGION +}; +#ifndef U_IN_DOXYGEN +typedef enum ULocMatchDemotion ULocMatchDemotion; +#endif + +#ifndef U_FORCE_HIDE_DRAFT_API + +/** + * Builder option for whether to include or ignore one-way (fallback) match data. + * The LocaleMatcher uses CLDR languageMatch data which includes fallback (oneway=true) entries. + * Sometimes it is desirable to ignore those. + * + * <p>For example, consider a web application with the UI in a given language, + * with a link to another, related web app. + * The link should include the UI language, and the target server may also use + * the client’s Accept-Language header data. + * The target server has its own list of supported languages. + * One may want to favor UI language consistency, that is, + * if there is a decent match for the original UI language, we want to use it, + * but not if it is merely a fallback. + * + * @see LocaleMatcher::Builder#setDirection(ULocMatchDirection) + * @draft ICU 67 + */ +enum ULocMatchDirection { + /** + * Locale matching includes one-way matches such as Breton→French. (default) + * + * @draft ICU 67 + */ + ULOCMATCH_DIRECTION_WITH_ONE_WAY, + /** + * Locale matching limited to two-way matches including e.g. Danish↔Norwegian + * but ignoring one-way matches. + * + * @draft ICU 67 + */ + ULOCMATCH_DIRECTION_ONLY_TWO_WAY +}; +#ifndef U_IN_DOXYGEN +typedef enum ULocMatchDirection ULocMatchDirection; +#endif + +#endif // U_FORCE_HIDE_DRAFT_API + +struct UHashtable; + +U_NAMESPACE_BEGIN + +struct LSR; + +class LocaleDistance; +class LocaleLsrIterator; +class UVector; +class XLikelySubtags; + +/** + * Immutable class that picks the best match between a user's desired locales and + * an application's supported locales. + * Movable but not copyable. + * + * <p>Example: + * <pre> + * UErrorCode errorCode = U_ZERO_ERROR; + * LocaleMatcher matcher = LocaleMatcher::Builder().setSupportedLocales("fr, en-GB, en").build(errorCode); + * Locale *bestSupported = matcher.getBestLocale(Locale.US, errorCode); // "en" + * </pre> + * + * <p>A matcher takes into account when languages are close to one another, + * such as Danish and Norwegian, + * and when regional variants are close, like en-GB and en-AU as opposed to en-US. + * + * <p>If there are multiple supported locales with the same (language, script, region) + * likely subtags, then the current implementation returns the first of those locales. + * It ignores variant subtags (except for pseudolocale variants) and extensions. + * This may change in future versions. + * + * <p>For example, the current implementation does not distinguish between + * de, de-DE, de-Latn, de-1901, de-u-co-phonebk. + * + * <p>If you prefer one equivalent locale over another, then provide only the preferred one, + * or place it earlier in the list of supported locales. + * + * <p>Otherwise, the order of supported locales may have no effect on the best-match results. + * The current implementation compares each desired locale with supported locales + * in the following order: + * 1. Default locale, if supported; + * 2. CLDR "paradigm locales" like en-GB and es-419; + * 3. other supported locales. + * This may change in future versions. + * + * <p>Often a product will just need one matcher instance, built with the languages + * that it supports. However, it may want multiple instances with different + * default languages based on additional information, such as the domain. + * + * <p>This class is not intended for public subclassing. + * + * @stable ICU 65 + */ +class U_COMMON_API LocaleMatcher : public UMemory { +public: + /** + * Data for the best-matching pair of a desired and a supported locale. + * Movable but not copyable. + * + * @stable ICU 65 + */ + class U_COMMON_API Result : public UMemory { + public: + /** + * Move constructor; might modify the source. + * This object will have the same contents that the source object had. + * + * @param src Result to move contents from. + * @stable ICU 65 + */ + Result(Result &&src) U_NOEXCEPT; + + /** + * Destructor. + * + * @stable ICU 65 + */ + ~Result(); + + /** + * Move assignment; might modify the source. + * This object will have the same contents that the source object had. + * + * @param src Result to move contents from. + * @stable ICU 65 + */ + Result &operator=(Result &&src) U_NOEXCEPT; + + /** + * Returns the best-matching desired locale. + * nullptr if the list of desired locales is empty or if none matched well enough. + * + * @return the best-matching desired locale, or nullptr. + * @stable ICU 65 + */ + inline const Locale *getDesiredLocale() const { return desiredLocale; } + + /** + * Returns the best-matching supported locale. + * If none matched well enough, this is the default locale. + * The default locale is nullptr if Builder::setNoDefaultLocale() was called, + * or if the list of supported locales is empty and no explicit default locale is set. + * + * @return the best-matching supported locale, or nullptr. + * @stable ICU 65 + */ + inline const Locale *getSupportedLocale() const { return supportedLocale; } + + /** + * Returns the index of the best-matching desired locale in the input Iterable order. + * -1 if the list of desired locales is empty or if none matched well enough. + * + * @return the index of the best-matching desired locale, or -1. + * @stable ICU 65 + */ + inline int32_t getDesiredIndex() const { return desiredIndex; } + + /** + * Returns the index of the best-matching supported locale in the + * constructor’s or builder’s input order (“set” Collection plus “added” locales). + * If the matcher was built from a locale list string, then the iteration order is that + * of a LocalePriorityList built from the same string. + * -1 if the list of supported locales is empty or if none matched well enough. + * + * @return the index of the best-matching supported locale, or -1. + * @stable ICU 65 + */ + inline int32_t getSupportedIndex() const { return supportedIndex; } + + /** + * Takes the best-matching supported locale and adds relevant fields of the + * best-matching desired locale, such as the -t- and -u- extensions. + * May replace some fields of the supported locale. + * The result is the locale that should be used for date and number formatting, collation, etc. + * Returns the root locale if getSupportedLocale() returns nullptr. + * + * <p>Example: desired=ar-SA-u-nu-latn, supported=ar-EG, resolved locale=ar-SA-u-nu-latn + * + * @return a locale combining the best-matching desired and supported locales. + * @stable ICU 65 + */ + Locale makeResolvedLocale(UErrorCode &errorCode) const; + + private: + Result(const Locale *desired, const Locale *supported, + int32_t desIndex, int32_t suppIndex, UBool owned) : + desiredLocale(desired), supportedLocale(supported), + desiredIndex(desIndex), supportedIndex(suppIndex), + desiredIsOwned(owned) {} + + Result(const Result &other) = delete; + Result &operator=(const Result &other) = delete; + + const Locale *desiredLocale; + const Locale *supportedLocale; + int32_t desiredIndex; + int32_t supportedIndex; + UBool desiredIsOwned; + + friend class LocaleMatcher; + }; + + /** + * LocaleMatcher builder. + * Movable but not copyable. + * + * @stable ICU 65 + */ + class U_COMMON_API Builder : public UMemory { + public: + /** + * Constructs a builder used in chaining parameters for building a LocaleMatcher. + * + * @return a new Builder object + * @stable ICU 65 + */ + Builder() {} + + /** + * Move constructor; might modify the source. + * This builder will have the same contents that the source builder had. + * + * @param src Builder to move contents from. + * @stable ICU 65 + */ + Builder(Builder &&src) U_NOEXCEPT; + + /** + * Destructor. + * + * @stable ICU 65 + */ + ~Builder(); + + /** + * Move assignment; might modify the source. + * This builder will have the same contents that the source builder had. + * + * @param src Builder to move contents from. + * @stable ICU 65 + */ + Builder &operator=(Builder &&src) U_NOEXCEPT; + + /** + * Parses an Accept-Language string + * (<a href="https://tools.ietf.org/html/rfc2616#section-14.4">RFC 2616 Section 14.4</a>), + * such as "af, en, fr;q=0.9", and sets the supported locales accordingly. + * Allows whitespace in more places but does not allow "*". + * Clears any previously set/added supported locales first. + * + * @param locales the Accept-Language string of locales to set + * @return this Builder object + * @stable ICU 65 + */ + Builder &setSupportedLocalesFromListString(StringPiece locales); + + /** + * Copies the supported locales, preserving iteration order. + * Clears any previously set/added supported locales first. + * Duplicates are allowed, and are not removed. + * + * @param locales the list of locale + * @return this Builder object + * @stable ICU 65 + */ + Builder &setSupportedLocales(Locale::Iterator &locales); + + /** + * Copies the supported locales from the begin/end range, preserving iteration order. + * Clears any previously set/added supported locales first. + * Duplicates are allowed, and are not removed. + * + * Each of the iterator parameter values must be an + * input iterator whose value is convertible to const Locale &. + * + * @param begin Start of range. + * @param end Exclusive end of range. + * @return this Builder object + * @stable ICU 65 + */ + template<typename Iter> + Builder &setSupportedLocales(Iter begin, Iter end) { + if (U_FAILURE(errorCode_)) { return *this; } + clearSupportedLocales(); + while (begin != end) { + addSupportedLocale(*begin++); + } + return *this; + } + + /** + * Copies the supported locales from the begin/end range, preserving iteration order. + * Calls the converter to convert each *begin to a Locale or const Locale &. + * Clears any previously set/added supported locales first. + * Duplicates are allowed, and are not removed. + * + * Each of the iterator parameter values must be an + * input iterator whose value is convertible to const Locale &. + * + * @param begin Start of range. + * @param end Exclusive end of range. + * @param converter Converter from *begin to const Locale & or compatible. + * @return this Builder object + * @stable ICU 65 + */ + template<typename Iter, typename Conv> + Builder &setSupportedLocalesViaConverter(Iter begin, Iter end, Conv converter) { + if (U_FAILURE(errorCode_)) { return *this; } + clearSupportedLocales(); + while (begin != end) { + addSupportedLocale(converter(*begin++)); + } + return *this; + } + + /** + * Adds another supported locale. + * Duplicates are allowed, and are not removed. + * + * @param locale another locale + * @return this Builder object + * @stable ICU 65 + */ + Builder &addSupportedLocale(const Locale &locale); + +#ifndef U_HIDE_DRAFT_API + /** + * Sets no default locale. + * There will be no explicit or implicit default locale. + * If there is no good match, then the matcher will return nullptr for the + * best supported locale. + * + * @draft ICU 68 + */ + Builder &setNoDefaultLocale(); +#endif // U_HIDE_DRAFT_API + + /** + * Sets the default locale; if nullptr, or if it is not set explicitly, + * then the first supported locale is used as the default locale. + * There is no default locale at all (nullptr will be returned instead) + * if setNoDefaultLocale() is called. + * + * @param defaultLocale the default locale (will be copied) + * @return this Builder object + * @stable ICU 65 + */ + Builder &setDefaultLocale(const Locale *defaultLocale); + + /** + * If ULOCMATCH_FAVOR_SCRIPT, then the language differences are smaller than script + * differences. + * This is used in situations (such as maps) where + * it is better to fall back to the same script than a similar language. + * + * @param subtag the subtag to favor + * @return this Builder object + * @stable ICU 65 + */ + Builder &setFavorSubtag(ULocMatchFavorSubtag subtag); + + /** + * Option for whether all desired locales are treated equally or + * earlier ones are preferred (this is the default). + * + * @param demotion the demotion per desired locale to set. + * @return this Builder object + * @stable ICU 65 + */ + Builder &setDemotionPerDesiredLocale(ULocMatchDemotion demotion); + +#ifndef U_HIDE_DRAFT_API + /** + * Option for whether to include or ignore one-way (fallback) match data. + * By default, they are included. + * + * @param direction the match direction to set. + * @return this Builder object + * @draft ICU 67 + */ + Builder &setDirection(ULocMatchDirection direction) { + if (U_SUCCESS(errorCode_)) { + direction_ = direction; + } + return *this; + } +#endif // U_HIDE_DRAFT_API + +#ifndef U_HIDE_DRAFT_API + /** + * Sets the maximum distance for an acceptable match. + * The matcher will return a match for a pair of locales only if + * they match at least as well as the pair given here. + * + * For example, setMaxDistance(en-US, en-GB) limits matches to ones where the + * (desired, support) locales have a distance no greater than a region subtag difference. + * This is much stricter than the CLDR default. + * + * The details of locale matching are subject to changes in + * CLDR data and in the algorithm. + * Specifying a maximum distance in relative terms via a sample pair of locales + * insulates from changes that affect all distance metrics similarly, + * but some changes will necessarily affect relative distances between + * different pairs of locales. + * + * @param desired the desired locale for distance comparison. + * @param supported the supported locale for distance comparison. + * @return this Builder object + * @draft ICU 68 + */ + Builder &setMaxDistance(const Locale &desired, const Locale &supported); +#endif // U_HIDE_DRAFT_API + + /** + * Sets the UErrorCode if an error occurred while setting parameters. + * Preserves older error codes in the outErrorCode. + * + * @param outErrorCode Set to an error code if it does not contain one already + * and an error occurred while setting parameters. + * Otherwise unchanged. + * @return true if U_FAILURE(outErrorCode) + * @stable ICU 65 + */ + UBool copyErrorTo(UErrorCode &outErrorCode) const; + + /** + * Builds and returns a new locale matcher. + * This builder can continue to be used. + * + * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test, + * or else the function returns immediately. Check for U_FAILURE() + * on output or use with function chaining. (See User Guide for details.) + * @return LocaleMatcher + * @stable ICU 65 + */ + LocaleMatcher build(UErrorCode &errorCode) const; + + private: + friend class LocaleMatcher; + + Builder(const Builder &other) = delete; + Builder &operator=(const Builder &other) = delete; + + void clearSupportedLocales(); + bool ensureSupportedLocaleVector(); + + UErrorCode errorCode_ = U_ZERO_ERROR; + UVector *supportedLocales_ = nullptr; + int32_t thresholdDistance_ = -1; + ULocMatchDemotion demotion_ = ULOCMATCH_DEMOTION_REGION; + Locale *defaultLocale_ = nullptr; + bool withDefault_ = true; + ULocMatchFavorSubtag favor_ = ULOCMATCH_FAVOR_LANGUAGE; + ULocMatchDirection direction_ = ULOCMATCH_DIRECTION_WITH_ONE_WAY; + Locale *maxDistanceDesired_ = nullptr; + Locale *maxDistanceSupported_ = nullptr; + }; + + // FYI No public LocaleMatcher constructors in C++; use the Builder. + + /** + * Move copy constructor; might modify the source. + * This matcher will have the same settings that the source matcher had. + * @param src source matcher + * @stable ICU 65 + */ + LocaleMatcher(LocaleMatcher &&src) U_NOEXCEPT; + + /** + * Destructor. + * @stable ICU 65 + */ + ~LocaleMatcher(); + + /** + * Move assignment operator; might modify the source. + * This matcher will have the same settings that the source matcher had. + * The behavior is undefined if *this and src are the same object. + * @param src source matcher + * @return *this + * @stable ICU 65 + */ + LocaleMatcher &operator=(LocaleMatcher &&src) U_NOEXCEPT; + + /** + * Returns the supported locale which best matches the desired locale. + * + * @param desiredLocale Typically a user's language. + * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test, + * or else the function returns immediately. Check for U_FAILURE() + * on output or use with function chaining. (See User Guide for details.) + * @return the best-matching supported locale. + * @stable ICU 65 + */ + const Locale *getBestMatch(const Locale &desiredLocale, UErrorCode &errorCode) const; + + /** + * Returns the supported locale which best matches one of the desired locales. + * + * @param desiredLocales Typically a user's languages, in order of preference (descending). + * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test, + * or else the function returns immediately. Check for U_FAILURE() + * on output or use with function chaining. (See User Guide for details.) + * @return the best-matching supported locale. + * @stable ICU 65 + */ + const Locale *getBestMatch(Locale::Iterator &desiredLocales, UErrorCode &errorCode) const; + + /** + * Parses an Accept-Language string + * (<a href="https://tools.ietf.org/html/rfc2616#section-14.4">RFC 2616 Section 14.4</a>), + * such as "af, en, fr;q=0.9", + * and returns the supported locale which best matches one of the desired locales. + * Allows whitespace in more places but does not allow "*". + * + * @param desiredLocaleList Typically a user's languages, as an Accept-Language string. + * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test, + * or else the function returns immediately. Check for U_FAILURE() + * on output or use with function chaining. (See User Guide for details.) + * @return the best-matching supported locale. + * @stable ICU 65 + */ + const Locale *getBestMatchForListString(StringPiece desiredLocaleList, UErrorCode &errorCode) const; + + /** + * Returns the best match between the desired locale and the supported locales. + * If the result's desired locale is not nullptr, then it is the address of the input locale. + * It has not been cloned. + * + * @param desiredLocale Typically a user's language. + * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test, + * or else the function returns immediately. Check for U_FAILURE() + * on output or use with function chaining. (See User Guide for details.) + * @return the best-matching pair of the desired and a supported locale. + * @stable ICU 65 + */ + Result getBestMatchResult(const Locale &desiredLocale, UErrorCode &errorCode) const; + + /** + * Returns the best match between the desired and supported locales. + * If the result's desired locale is not nullptr, then it is a clone of + * the best-matching desired locale. The Result object owns the clone. + * + * @param desiredLocales Typically a user's languages, in order of preference (descending). + * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test, + * or else the function returns immediately. Check for U_FAILURE() + * on output or use with function chaining. (See User Guide for details.) + * @return the best-matching pair of a desired and a supported locale. + * @stable ICU 65 + */ + Result getBestMatchResult(Locale::Iterator &desiredLocales, UErrorCode &errorCode) const; + +#ifndef U_HIDE_DRAFT_API + /** + * Returns true if the pair of locales matches acceptably. + * This is influenced by Builder options such as setDirection(), setFavorSubtag(), + * and setMaxDistance(). + * + * @param desired The desired locale. + * @param supported The supported locale. + * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test, + * or else the function returns immediately. Check for U_FAILURE() + * on output or use with function chaining. (See User Guide for details.) + * @return true if the pair of locales matches acceptably. + * @draft ICU 68 + */ + UBool isMatch(const Locale &desired, const Locale &supported, UErrorCode &errorCode) const; +#endif // U_HIDE_DRAFT_API + +#ifndef U_HIDE_INTERNAL_API + /** + * Returns a fraction between 0 and 1, where 1 means that the languages are a + * perfect match, and 0 means that they are completely different. + * + * <p>This is mostly an implementation detail, and the precise values may change over time. + * The implementation may use either the maximized forms or the others ones, or both. + * The implementation may or may not rely on the forms to be consistent with each other. + * + * <p>Callers should construct and use a matcher rather than match pairs of locales directly. + * + * @param desired Desired locale. + * @param supported Supported locale. + * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test, + * or else the function returns immediately. Check for U_FAILURE() + * on output or use with function chaining. (See User Guide for details.) + * @return value between 0 and 1, inclusive. + * @internal (has a known user) + */ + double internalMatch(const Locale &desired, const Locale &supported, UErrorCode &errorCode) const; +#endif // U_HIDE_INTERNAL_API + +private: + LocaleMatcher(const Builder &builder, UErrorCode &errorCode); + LocaleMatcher(const LocaleMatcher &other) = delete; + LocaleMatcher &operator=(const LocaleMatcher &other) = delete; + + int32_t putIfAbsent(const LSR &lsr, int32_t i, int32_t suppLength, UErrorCode &errorCode); + + int32_t getBestSuppIndex(LSR desiredLSR, LocaleLsrIterator *remainingIter, UErrorCode &errorCode) const; + + const XLikelySubtags &likelySubtags; + const LocaleDistance &localeDistance; + int32_t thresholdDistance; + int32_t demotionPerDesiredLocale; + ULocMatchFavorSubtag favorSubtag; + ULocMatchDirection direction; + + // These are in input order. + const Locale ** supportedLocales; + LSR *lsrs; + int32_t supportedLocalesLength; + // These are in preference order: 1. Default locale 2. paradigm locales 3. others. + UHashtable *supportedLsrToIndex; // Map<LSR, Integer> stores index+1 because 0 is "not found" + // Array versions of the supportedLsrToIndex keys and values. + // The distance lookup loops over the supportedLSRs and returns the index of the best match. + const LSR **supportedLSRs; + int32_t *supportedIndexes; + int32_t supportedLSRsLength; + Locale *ownedDefaultLocale; + const Locale *defaultLocale; +}; + +U_NAMESPACE_END + +#endif // U_SHOW_CPLUSPLUS_API +#endif // __LOCALEMATCHER_H__ diff --git a/thirdparty/icu4c/common/unicode/localpointer.h b/thirdparty/icu4c/common/unicode/localpointer.h new file mode 100644 index 0000000000..2a65f2d382 --- /dev/null +++ b/thirdparty/icu4c/common/unicode/localpointer.h @@ -0,0 +1,595 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2009-2016, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: localpointer.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2009nov13 +* created by: Markus W. Scherer +*/ + +#ifndef __LOCALPOINTER_H__ +#define __LOCALPOINTER_H__ + +/** + * \file + * \brief C++ API: "Smart pointers" for use with and in ICU4C C++ code. + * + * These classes are inspired by + * - std::auto_ptr + * - boost::scoped_ptr & boost::scoped_array + * - Taligent Safe Pointers (TOnlyPointerTo) + * + * but none of those provide for all of the goals for ICU smart pointers: + * - Smart pointer owns the object and releases it when it goes out of scope. + * - No transfer of ownership via copy/assignment to reduce misuse. Simpler & more robust. + * - ICU-compatible: No exceptions. + * - Need to be able to orphan/release the pointer and its ownership. + * - Need variants for normal C++ object pointers, C++ arrays, and ICU C service objects. + * + * For details see http://site.icu-project.org/design/cpp/scoped_ptr + */ + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + +#include <memory> + +U_NAMESPACE_BEGIN + +/** + * "Smart pointer" base class; do not use directly: use LocalPointer etc. + * + * Base class for smart pointer classes that do not throw exceptions. + * + * Do not use this base class directly, since it does not delete its pointer. + * A subclass must implement methods that delete the pointer: + * Destructor and adoptInstead(). + * + * There is no operator T *() provided because the programmer must decide + * whether to use getAlias() (without transfer of ownership) or orphan() + * (with transfer of ownership and NULLing of the pointer). + * + * @see LocalPointer + * @see LocalArray + * @see U_DEFINE_LOCAL_OPEN_POINTER + * @stable ICU 4.4 + */ +template<typename T> +class LocalPointerBase { +public: + // No heap allocation. Use only on the stack. + static void* U_EXPORT2 operator new(size_t) = delete; + static void* U_EXPORT2 operator new[](size_t) = delete; +#if U_HAVE_PLACEMENT_NEW + static void* U_EXPORT2 operator new(size_t, void*) = delete; +#endif + + /** + * Constructor takes ownership. + * @param p simple pointer to an object that is adopted + * @stable ICU 4.4 + */ + explicit LocalPointerBase(T *p=NULL) : ptr(p) {} + /** + * Destructor deletes the object it owns. + * Subclass must override: Base class does nothing. + * @stable ICU 4.4 + */ + ~LocalPointerBase() { /* delete ptr; */ } + /** + * NULL check. + * @return true if ==NULL + * @stable ICU 4.4 + */ + UBool isNull() const { return ptr==NULL; } + /** + * NULL check. + * @return true if !=NULL + * @stable ICU 4.4 + */ + UBool isValid() const { return ptr!=NULL; } + /** + * Comparison with a simple pointer, so that existing code + * with ==NULL need not be changed. + * @param other simple pointer for comparison + * @return true if this pointer value equals other + * @stable ICU 4.4 + */ + bool operator==(const T *other) const { return ptr==other; } + /** + * Comparison with a simple pointer, so that existing code + * with !=NULL need not be changed. + * @param other simple pointer for comparison + * @return true if this pointer value differs from other + * @stable ICU 4.4 + */ + bool operator!=(const T *other) const { return ptr!=other; } + /** + * Access without ownership change. + * @return the pointer value + * @stable ICU 4.4 + */ + T *getAlias() const { return ptr; } + /** + * Access without ownership change. + * @return the pointer value as a reference + * @stable ICU 4.4 + */ + T &operator*() const { return *ptr; } + /** + * Access without ownership change. + * @return the pointer value + * @stable ICU 4.4 + */ + T *operator->() const { return ptr; } + /** + * Gives up ownership; the internal pointer becomes NULL. + * @return the pointer value; + * caller becomes responsible for deleting the object + * @stable ICU 4.4 + */ + T *orphan() { + T *p=ptr; + ptr=NULL; + return p; + } + /** + * Deletes the object it owns, + * and adopts (takes ownership of) the one passed in. + * Subclass must override: Base class does not delete the object. + * @param p simple pointer to an object that is adopted + * @stable ICU 4.4 + */ + void adoptInstead(T *p) { + // delete ptr; + ptr=p; + } +protected: + /** + * Actual pointer. + * @internal + */ + T *ptr; +private: + // No comparison operators with other LocalPointerBases. + bool operator==(const LocalPointerBase<T> &other); + bool operator!=(const LocalPointerBase<T> &other); + // No ownership sharing: No copy constructor, no assignment operator. + LocalPointerBase(const LocalPointerBase<T> &other); + void operator=(const LocalPointerBase<T> &other); +}; + +/** + * "Smart pointer" class, deletes objects via the standard C++ delete operator. + * For most methods see the LocalPointerBase base class. + * + * Usage example: + * \code + * LocalPointer<UnicodeString> s(new UnicodeString((UChar32)0x50005)); + * int32_t length=s->length(); // 2 + * char16_t lead=s->charAt(0); // 0xd900 + * if(some condition) { return; } // no need to explicitly delete the pointer + * s.adoptInstead(new UnicodeString((char16_t)0xfffc)); + * length=s->length(); // 1 + * // no need to explicitly delete the pointer + * \endcode + * + * @see LocalPointerBase + * @stable ICU 4.4 + */ +template<typename T> +class LocalPointer : public LocalPointerBase<T> { +public: + using LocalPointerBase<T>::operator*; + using LocalPointerBase<T>::operator->; + /** + * Constructor takes ownership. + * @param p simple pointer to an object that is adopted + * @stable ICU 4.4 + */ + explicit LocalPointer(T *p=NULL) : LocalPointerBase<T>(p) {} + /** + * Constructor takes ownership and reports an error if NULL. + * + * This constructor is intended to be used with other-class constructors + * that may report a failure UErrorCode, + * so that callers need to check only for U_FAILURE(errorCode) + * and not also separately for isNull(). + * + * @param p simple pointer to an object that is adopted + * @param errorCode in/out UErrorCode, set to U_MEMORY_ALLOCATION_ERROR + * if p==NULL and no other failure code had been set + * @stable ICU 55 + */ + LocalPointer(T *p, UErrorCode &errorCode) : LocalPointerBase<T>(p) { + if(p==NULL && U_SUCCESS(errorCode)) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + } + } + /** + * Move constructor, leaves src with isNull(). + * @param src source smart pointer + * @stable ICU 56 + */ + LocalPointer(LocalPointer<T> &&src) U_NOEXCEPT : LocalPointerBase<T>(src.ptr) { + src.ptr=NULL; + } + + /** + * Constructs a LocalPointer from a C++11 std::unique_ptr. + * The LocalPointer steals the object owned by the std::unique_ptr. + * + * This constructor works via move semantics. If your std::unique_ptr is + * in a local variable, you must use std::move. + * + * @param p The std::unique_ptr from which the pointer will be stolen. + * @stable ICU 64 + */ + explicit LocalPointer(std::unique_ptr<T> &&p) + : LocalPointerBase<T>(p.release()) {} + + /** + * Destructor deletes the object it owns. + * @stable ICU 4.4 + */ + ~LocalPointer() { + delete LocalPointerBase<T>::ptr; + } + /** + * Move assignment operator, leaves src with isNull(). + * The behavior is undefined if *this and src are the same object. + * @param src source smart pointer + * @return *this + * @stable ICU 56 + */ + LocalPointer<T> &operator=(LocalPointer<T> &&src) U_NOEXCEPT { + delete LocalPointerBase<T>::ptr; + LocalPointerBase<T>::ptr=src.ptr; + src.ptr=NULL; + return *this; + } + + /** + * Move-assign from an std::unique_ptr to this LocalPointer. + * Steals the pointer from the std::unique_ptr. + * + * @param p The std::unique_ptr from which the pointer will be stolen. + * @return *this + * @stable ICU 64 + */ + LocalPointer<T> &operator=(std::unique_ptr<T> &&p) U_NOEXCEPT { + adoptInstead(p.release()); + return *this; + } + + /** + * Swap pointers. + * @param other other smart pointer + * @stable ICU 56 + */ + void swap(LocalPointer<T> &other) U_NOEXCEPT { + T *temp=LocalPointerBase<T>::ptr; + LocalPointerBase<T>::ptr=other.ptr; + other.ptr=temp; + } + /** + * Non-member LocalPointer swap function. + * @param p1 will get p2's pointer + * @param p2 will get p1's pointer + * @stable ICU 56 + */ + friend inline void swap(LocalPointer<T> &p1, LocalPointer<T> &p2) U_NOEXCEPT { + p1.swap(p2); + } + /** + * Deletes the object it owns, + * and adopts (takes ownership of) the one passed in. + * @param p simple pointer to an object that is adopted + * @stable ICU 4.4 + */ + void adoptInstead(T *p) { + delete LocalPointerBase<T>::ptr; + LocalPointerBase<T>::ptr=p; + } + /** + * Deletes the object it owns, + * and adopts (takes ownership of) the one passed in. + * + * If U_FAILURE(errorCode), then the current object is retained and the new one deleted. + * + * If U_SUCCESS(errorCode) but the input pointer is NULL, + * then U_MEMORY_ALLOCATION_ERROR is set, + * the current object is deleted, and NULL is set. + * + * @param p simple pointer to an object that is adopted + * @param errorCode in/out UErrorCode, set to U_MEMORY_ALLOCATION_ERROR + * if p==NULL and no other failure code had been set + * @stable ICU 55 + */ + void adoptInsteadAndCheckErrorCode(T *p, UErrorCode &errorCode) { + if(U_SUCCESS(errorCode)) { + delete LocalPointerBase<T>::ptr; + LocalPointerBase<T>::ptr=p; + if(p==NULL) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + } + } else { + delete p; + } + } + + /** + * Conversion operator to a C++11 std::unique_ptr. + * Disowns the object and gives it to the returned std::unique_ptr. + * + * This operator works via move semantics. If your LocalPointer is + * in a local variable, you must use std::move. + * + * @return An std::unique_ptr owning the pointer previously owned by this + * icu::LocalPointer. + * @stable ICU 64 + */ + operator std::unique_ptr<T> () && { + return std::unique_ptr<T>(LocalPointerBase<T>::orphan()); + } +}; + +/** + * "Smart pointer" class, deletes objects via the C++ array delete[] operator. + * For most methods see the LocalPointerBase base class. + * Adds operator[] for array item access. + * + * Usage example: + * \code + * LocalArray<UnicodeString> a(new UnicodeString[2]); + * a[0].append((char16_t)0x61); + * if(some condition) { return; } // no need to explicitly delete the array + * a.adoptInstead(new UnicodeString[4]); + * a[3].append((char16_t)0x62).append((char16_t)0x63).reverse(); + * // no need to explicitly delete the array + * \endcode + * + * @see LocalPointerBase + * @stable ICU 4.4 + */ +template<typename T> +class LocalArray : public LocalPointerBase<T> { +public: + using LocalPointerBase<T>::operator*; + using LocalPointerBase<T>::operator->; + /** + * Constructor takes ownership. + * @param p simple pointer to an array of T objects that is adopted + * @stable ICU 4.4 + */ + explicit LocalArray(T *p=NULL) : LocalPointerBase<T>(p) {} + /** + * Constructor takes ownership and reports an error if NULL. + * + * This constructor is intended to be used with other-class constructors + * that may report a failure UErrorCode, + * so that callers need to check only for U_FAILURE(errorCode) + * and not also separately for isNull(). + * + * @param p simple pointer to an array of T objects that is adopted + * @param errorCode in/out UErrorCode, set to U_MEMORY_ALLOCATION_ERROR + * if p==NULL and no other failure code had been set + * @stable ICU 56 + */ + LocalArray(T *p, UErrorCode &errorCode) : LocalPointerBase<T>(p) { + if(p==NULL && U_SUCCESS(errorCode)) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + } + } + /** + * Move constructor, leaves src with isNull(). + * @param src source smart pointer + * @stable ICU 56 + */ + LocalArray(LocalArray<T> &&src) U_NOEXCEPT : LocalPointerBase<T>(src.ptr) { + src.ptr=NULL; + } + + /** + * Constructs a LocalArray from a C++11 std::unique_ptr of an array type. + * The LocalPointer steals the array owned by the std::unique_ptr. + * + * This constructor works via move semantics. If your std::unique_ptr is + * in a local variable, you must use std::move. + * + * @param p The std::unique_ptr from which the array will be stolen. + * @stable ICU 64 + */ + explicit LocalArray(std::unique_ptr<T[]> &&p) + : LocalPointerBase<T>(p.release()) {} + + /** + * Destructor deletes the array it owns. + * @stable ICU 4.4 + */ + ~LocalArray() { + delete[] LocalPointerBase<T>::ptr; + } + /** + * Move assignment operator, leaves src with isNull(). + * The behavior is undefined if *this and src are the same object. + * @param src source smart pointer + * @return *this + * @stable ICU 56 + */ + LocalArray<T> &operator=(LocalArray<T> &&src) U_NOEXCEPT { + delete[] LocalPointerBase<T>::ptr; + LocalPointerBase<T>::ptr=src.ptr; + src.ptr=NULL; + return *this; + } + + /** + * Move-assign from an std::unique_ptr to this LocalPointer. + * Steals the array from the std::unique_ptr. + * + * @param p The std::unique_ptr from which the array will be stolen. + * @return *this + * @stable ICU 64 + */ + LocalArray<T> &operator=(std::unique_ptr<T[]> &&p) U_NOEXCEPT { + adoptInstead(p.release()); + return *this; + } + + /** + * Swap pointers. + * @param other other smart pointer + * @stable ICU 56 + */ + void swap(LocalArray<T> &other) U_NOEXCEPT { + T *temp=LocalPointerBase<T>::ptr; + LocalPointerBase<T>::ptr=other.ptr; + other.ptr=temp; + } + /** + * Non-member LocalArray swap function. + * @param p1 will get p2's pointer + * @param p2 will get p1's pointer + * @stable ICU 56 + */ + friend inline void swap(LocalArray<T> &p1, LocalArray<T> &p2) U_NOEXCEPT { + p1.swap(p2); + } + /** + * Deletes the array it owns, + * and adopts (takes ownership of) the one passed in. + * @param p simple pointer to an array of T objects that is adopted + * @stable ICU 4.4 + */ + void adoptInstead(T *p) { + delete[] LocalPointerBase<T>::ptr; + LocalPointerBase<T>::ptr=p; + } + /** + * Deletes the array it owns, + * and adopts (takes ownership of) the one passed in. + * + * If U_FAILURE(errorCode), then the current array is retained and the new one deleted. + * + * If U_SUCCESS(errorCode) but the input pointer is NULL, + * then U_MEMORY_ALLOCATION_ERROR is set, + * the current array is deleted, and NULL is set. + * + * @param p simple pointer to an array of T objects that is adopted + * @param errorCode in/out UErrorCode, set to U_MEMORY_ALLOCATION_ERROR + * if p==NULL and no other failure code had been set + * @stable ICU 56 + */ + void adoptInsteadAndCheckErrorCode(T *p, UErrorCode &errorCode) { + if(U_SUCCESS(errorCode)) { + delete[] LocalPointerBase<T>::ptr; + LocalPointerBase<T>::ptr=p; + if(p==NULL) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + } + } else { + delete[] p; + } + } + /** + * Array item access (writable). + * No index bounds check. + * @param i array index + * @return reference to the array item + * @stable ICU 4.4 + */ + T &operator[](ptrdiff_t i) const { return LocalPointerBase<T>::ptr[i]; } + + /** + * Conversion operator to a C++11 std::unique_ptr. + * Disowns the object and gives it to the returned std::unique_ptr. + * + * This operator works via move semantics. If your LocalPointer is + * in a local variable, you must use std::move. + * + * @return An std::unique_ptr owning the pointer previously owned by this + * icu::LocalPointer. + * @stable ICU 64 + */ + operator std::unique_ptr<T[]> () && { + return std::unique_ptr<T[]>(LocalPointerBase<T>::orphan()); + } +}; + +/** + * \def U_DEFINE_LOCAL_OPEN_POINTER + * "Smart pointer" definition macro, deletes objects via the closeFunction. + * Defines a subclass of LocalPointerBase which works just + * like LocalPointer<Type> except that this subclass will use the closeFunction + * rather than the C++ delete operator. + * + * Usage example: + * \code + * LocalUCaseMapPointer csm(ucasemap_open(localeID, options, &errorCode)); + * utf8OutLength=ucasemap_utf8ToLower(csm.getAlias(), + * utf8Out, (int32_t)sizeof(utf8Out), + * utf8In, utf8InLength, &errorCode); + * if(U_FAILURE(errorCode)) { return; } // no need to explicitly delete the UCaseMap + * \endcode + * + * @see LocalPointerBase + * @see LocalPointer + * @stable ICU 4.4 + */ +#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction) \ + class LocalPointerClassName : public LocalPointerBase<Type> { \ + public: \ + using LocalPointerBase<Type>::operator*; \ + using LocalPointerBase<Type>::operator->; \ + explicit LocalPointerClassName(Type *p=NULL) : LocalPointerBase<Type>(p) {} \ + LocalPointerClassName(LocalPointerClassName &&src) U_NOEXCEPT \ + : LocalPointerBase<Type>(src.ptr) { \ + src.ptr=NULL; \ + } \ + /* TODO: Be agnostic of the deleter function signature from the user-provided std::unique_ptr? */ \ + explicit LocalPointerClassName(std::unique_ptr<Type, decltype(&closeFunction)> &&p) \ + : LocalPointerBase<Type>(p.release()) {} \ + ~LocalPointerClassName() { if (ptr != NULL) { closeFunction(ptr); } } \ + LocalPointerClassName &operator=(LocalPointerClassName &&src) U_NOEXCEPT { \ + if (ptr != NULL) { closeFunction(ptr); } \ + LocalPointerBase<Type>::ptr=src.ptr; \ + src.ptr=NULL; \ + return *this; \ + } \ + /* TODO: Be agnostic of the deleter function signature from the user-provided std::unique_ptr? */ \ + LocalPointerClassName &operator=(std::unique_ptr<Type, decltype(&closeFunction)> &&p) { \ + adoptInstead(p.release()); \ + return *this; \ + } \ + void swap(LocalPointerClassName &other) U_NOEXCEPT { \ + Type *temp=LocalPointerBase<Type>::ptr; \ + LocalPointerBase<Type>::ptr=other.ptr; \ + other.ptr=temp; \ + } \ + friend inline void swap(LocalPointerClassName &p1, LocalPointerClassName &p2) U_NOEXCEPT { \ + p1.swap(p2); \ + } \ + void adoptInstead(Type *p) { \ + if (ptr != NULL) { closeFunction(ptr); } \ + ptr=p; \ + } \ + operator std::unique_ptr<Type, decltype(&closeFunction)> () && { \ + return std::unique_ptr<Type, decltype(&closeFunction)>(LocalPointerBase<Type>::orphan(), closeFunction); \ + } \ + } + +U_NAMESPACE_END + +#endif /* U_SHOW_CPLUSPLUS_API */ +#endif /* __LOCALPOINTER_H__ */ diff --git a/thirdparty/icu4c/common/unicode/locdspnm.h b/thirdparty/icu4c/common/unicode/locdspnm.h new file mode 100644 index 0000000000..4f06f85704 --- /dev/null +++ b/thirdparty/icu4c/common/unicode/locdspnm.h @@ -0,0 +1,211 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* Copyright (C) 2010-2016, International Business Machines Corporation and +* others. All Rights Reserved. +****************************************************************************** +*/ + +#ifndef LOCDSPNM_H +#define LOCDSPNM_H + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + +/** + * \file + * \brief C++ API: Provides display names of Locale and its components. + */ + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/locid.h" +#include "unicode/strenum.h" +#include "unicode/uscript.h" +#include "unicode/uldnames.h" +#include "unicode/udisplaycontext.h" + +U_NAMESPACE_BEGIN + +/** + * Returns display names of Locales and components of Locales. For + * more information on language, script, region, variant, key, and + * values, see Locale. + * @stable ICU 4.4 + */ +class U_COMMON_API LocaleDisplayNames : public UObject { +public: + /** + * Destructor. + * @stable ICU 4.4 + */ + virtual ~LocaleDisplayNames(); + + /** + * Convenience overload of + * {@link #createInstance(const Locale& locale, UDialectHandling dialectHandling)} + * that specifies STANDARD dialect handling. + * @param locale the display locale + * @return a LocaleDisplayNames instance + * @stable ICU 4.4 + */ + inline static LocaleDisplayNames* U_EXPORT2 createInstance(const Locale& locale); + + /** + * Returns an instance of LocaleDisplayNames that returns names + * formatted for the provided locale, using the provided + * dialectHandling. + * + * @param locale the display locale + * @param dialectHandling how to select names for locales + * @return a LocaleDisplayNames instance + * @stable ICU 4.4 + */ + static LocaleDisplayNames* U_EXPORT2 createInstance(const Locale& locale, + UDialectHandling dialectHandling); + + /** + * Returns an instance of LocaleDisplayNames that returns names formatted + * for the provided locale, using the provided UDisplayContext settings. + * + * @param locale the display locale + * @param contexts List of one or more context settings (e.g. for dialect + * handling, capitalization, etc. + * @param length Number of items in the contexts list + * @return a LocaleDisplayNames instance + * @stable ICU 51 + */ + static LocaleDisplayNames* U_EXPORT2 createInstance(const Locale& locale, + UDisplayContext *contexts, int32_t length); + + // getters for state + /** + * Returns the locale used to determine the display names. This is + * not necessarily the same locale passed to {@link #createInstance}. + * @return the display locale + * @stable ICU 4.4 + */ + virtual const Locale& getLocale() const = 0; + + /** + * Returns the dialect handling used in the display names. + * @return the dialect handling enum + * @stable ICU 4.4 + */ + virtual UDialectHandling getDialectHandling() const = 0; + + /** + * Returns the UDisplayContext value for the specified UDisplayContextType. + * @param type the UDisplayContextType whose value to return + * @return the UDisplayContext for the specified type. + * @stable ICU 51 + */ + virtual UDisplayContext getContext(UDisplayContextType type) const = 0; + + // names for entire locales + /** + * Returns the display name of the provided locale. + * @param locale the locale whose display name to return + * @param result receives the locale's display name + * @return the display name of the provided locale + * @stable ICU 4.4 + */ + virtual UnicodeString& localeDisplayName(const Locale& locale, + UnicodeString& result) const = 0; + + /** + * Returns the display name of the provided locale id. + * @param localeId the id of the locale whose display name to return + * @param result receives the locale's display name + * @return the display name of the provided locale + * @stable ICU 4.4 + */ + virtual UnicodeString& localeDisplayName(const char* localeId, + UnicodeString& result) const = 0; + + // names for components of a locale id + /** + * Returns the display name of the provided language code. + * @param lang the language code + * @param result receives the language code's display name + * @return the display name of the provided language code + * @stable ICU 4.4 + */ + virtual UnicodeString& languageDisplayName(const char* lang, + UnicodeString& result) const = 0; + + /** + * Returns the display name of the provided script code. + * @param script the script code + * @param result receives the script code's display name + * @return the display name of the provided script code + * @stable ICU 4.4 + */ + virtual UnicodeString& scriptDisplayName(const char* script, + UnicodeString& result) const = 0; + + /** + * Returns the display name of the provided script code. + * @param scriptCode the script code number + * @param result receives the script code's display name + * @return the display name of the provided script code + * @stable ICU 4.4 + */ + virtual UnicodeString& scriptDisplayName(UScriptCode scriptCode, + UnicodeString& result) const = 0; + + /** + * Returns the display name of the provided region code. + * @param region the region code + * @param result receives the region code's display name + * @return the display name of the provided region code + * @stable ICU 4.4 + */ + virtual UnicodeString& regionDisplayName(const char* region, + UnicodeString& result) const = 0; + + /** + * Returns the display name of the provided variant. + * @param variant the variant string + * @param result receives the variant's display name + * @return the display name of the provided variant + * @stable ICU 4.4 + */ + virtual UnicodeString& variantDisplayName(const char* variant, + UnicodeString& result) const = 0; + + /** + * Returns the display name of the provided locale key. + * @param key the locale key name + * @param result receives the locale key's display name + * @return the display name of the provided locale key + * @stable ICU 4.4 + */ + virtual UnicodeString& keyDisplayName(const char* key, + UnicodeString& result) const = 0; + + /** + * Returns the display name of the provided value (used with the provided key). + * @param key the locale key name + * @param value the locale key's value + * @param result receives the value's display name + * @return the display name of the provided value + * @stable ICU 4.4 + */ + virtual UnicodeString& keyValueDisplayName(const char* key, const char* value, + UnicodeString& result) const = 0; +}; + +inline LocaleDisplayNames* LocaleDisplayNames::createInstance(const Locale& locale) { + return LocaleDisplayNames::createInstance(locale, ULDN_STANDARD_NAMES); +} + +U_NAMESPACE_END + +#endif + +#endif /* U_SHOW_CPLUSPLUS_API */ + +#endif diff --git a/thirdparty/icu4c/common/unicode/locid.h b/thirdparty/icu4c/common/unicode/locid.h new file mode 100644 index 0000000000..ba858d702a --- /dev/null +++ b/thirdparty/icu4c/common/unicode/locid.h @@ -0,0 +1,1274 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1996-2015, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* +* File locid.h +* +* Created by: Helena Shih +* +* Modification History: +* +* Date Name Description +* 02/11/97 aliu Changed gLocPath to fgLocPath and added methods to +* get and set it. +* 04/02/97 aliu Made operator!= inline; fixed return value of getName(). +* 04/15/97 aliu Cleanup for AIX/Win32. +* 04/24/97 aliu Numerous changes per code review. +* 08/18/98 stephen Added tokenizeString(),changed getDisplayName() +* 09/08/98 stephen Moved definition of kEmptyString for Mac Port +* 11/09/99 weiv Added const char * getName() const; +* 04/12/00 srl removing unicodestring api's and cached hash code +* 08/10/01 grhoten Change the static Locales to accessor functions +****************************************************************************** +*/ + +#ifndef LOCID_H +#define LOCID_H + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + +#include "unicode/bytestream.h" +#include "unicode/localpointer.h" +#include "unicode/strenum.h" +#include "unicode/stringpiece.h" +#include "unicode/uobject.h" +#include "unicode/putil.h" +#include "unicode/uloc.h" + +/** + * \file + * \brief C++ API: Locale ID object. + */ + +U_NAMESPACE_BEGIN + +// Forward Declarations +void U_CALLCONV locale_available_init(); /**< @internal */ + +class StringEnumeration; +class UnicodeString; + +/** + * A <code>Locale</code> object represents a specific geographical, political, + * or cultural region. An operation that requires a <code>Locale</code> to perform + * its task is called <em>locale-sensitive</em> and uses the <code>Locale</code> + * to tailor information for the user. For example, displaying a number + * is a locale-sensitive operation--the number should be formatted + * according to the customs/conventions of the user's native country, + * region, or culture. + * + * The Locale class is not suitable for subclassing. + * + * <P> + * You can create a <code>Locale</code> object using the constructor in + * this class: + * \htmlonly<blockquote>\endhtmlonly + * <pre> + * Locale( const char* language, + * const char* country, + * const char* variant); + * </pre> + * \htmlonly</blockquote>\endhtmlonly + * The first argument to the constructors is a valid <STRONG>ISO + * Language Code.</STRONG> These codes are the lower-case two-letter + * codes as defined by ISO-639. + * You can find a full list of these codes at: + * <BR><a href ="http://www.loc.gov/standards/iso639-2/"> + * http://www.loc.gov/standards/iso639-2/</a> + * + * <P> + * The second argument to the constructors is a valid <STRONG>ISO Country + * Code.</STRONG> These codes are the upper-case two-letter codes + * as defined by ISO-3166. + * You can find a full list of these codes at a number of sites, such as: + * <BR><a href="http://www.iso.org/iso/en/prods-services/iso3166ma/index.html"> + * http://www.iso.org/iso/en/prods-services/iso3166ma/index.html</a> + * + * <P> + * The third constructor requires a third argument--the <STRONG>Variant.</STRONG> + * The Variant codes are vendor and browser-specific. + * For example, use REVISED for a language's revised script orthography, and POSIX for POSIX. + * Where there are two variants, separate them with an underscore, and + * put the most important one first. For + * example, a Traditional Spanish collation might be referenced, with + * "ES", "ES", "Traditional_POSIX". + * + * <P> + * Because a <code>Locale</code> object is just an identifier for a region, + * no validity check is performed when you construct a <code>Locale</code>. + * If you want to see whether particular resources are available for the + * <code>Locale</code> you construct, you must query those resources. For + * example, ask the <code>NumberFormat</code> for the locales it supports + * using its <code>getAvailableLocales</code> method. + * <BR><STRONG>Note:</STRONG> When you ask for a resource for a particular + * locale, you get back the best available match, not necessarily + * precisely what you asked for. For more information, look at + * <code>ResourceBundle</code>. + * + * <P> + * The <code>Locale</code> class provides a number of convenient constants + * that you can use to create <code>Locale</code> objects for commonly used + * locales. For example, the following refers to a <code>Locale</code> object + * for the United States: + * \htmlonly<blockquote>\endhtmlonly + * <pre> + * Locale::getUS() + * </pre> + * \htmlonly</blockquote>\endhtmlonly + * + * <P> + * Once you've created a <code>Locale</code> you can query it for information about + * itself. Use <code>getCountry</code> to get the ISO Country Code and + * <code>getLanguage</code> to get the ISO Language Code. You can + * use <code>getDisplayCountry</code> to get the + * name of the country suitable for displaying to the user. Similarly, + * you can use <code>getDisplayLanguage</code> to get the name of + * the language suitable for displaying to the user. Interestingly, + * the <code>getDisplayXXX</code> methods are themselves locale-sensitive + * and have two versions: one that uses the default locale and one + * that takes a locale as an argument and displays the name or country in + * a language appropriate to that locale. + * + * <P> + * ICU provides a number of classes that perform locale-sensitive + * operations. For example, the <code>NumberFormat</code> class formats + * numbers, currency, or percentages in a locale-sensitive manner. Classes + * such as <code>NumberFormat</code> have a number of convenience methods + * for creating a default object of that type. For example, the + * <code>NumberFormat</code> class provides these three convenience methods + * for creating a default <code>NumberFormat</code> object: + * \htmlonly<blockquote>\endhtmlonly + * <pre> + * UErrorCode success = U_ZERO_ERROR; + * Locale myLocale; + * NumberFormat *nf; + * + * nf = NumberFormat::createInstance( success ); delete nf; + * nf = NumberFormat::createCurrencyInstance( success ); delete nf; + * nf = NumberFormat::createPercentInstance( success ); delete nf; + * </pre> + * \htmlonly</blockquote>\endhtmlonly + * Each of these methods has two variants; one with an explicit locale + * and one without; the latter using the default locale. + * \htmlonly<blockquote>\endhtmlonly + * <pre> + * nf = NumberFormat::createInstance( myLocale, success ); delete nf; + * nf = NumberFormat::createCurrencyInstance( myLocale, success ); delete nf; + * nf = NumberFormat::createPercentInstance( myLocale, success ); delete nf; + * </pre> + * \htmlonly</blockquote>\endhtmlonly + * A <code>Locale</code> is the mechanism for identifying the kind of object + * (<code>NumberFormat</code>) that you would like to get. The locale is + * <STRONG>just</STRONG> a mechanism for identifying objects, + * <STRONG>not</STRONG> a container for the objects themselves. + * + * <P> + * Each class that performs locale-sensitive operations allows you + * to get all the available objects of that type. You can sift + * through these objects by language, country, or variant, + * and use the display names to present a menu to the user. + * For example, you can create a menu of all the collation objects + * suitable for a given language. Such classes implement these + * three class methods: + * \htmlonly<blockquote>\endhtmlonly + * <pre> + * static Locale* getAvailableLocales(int32_t& numLocales) + * static UnicodeString& getDisplayName(const Locale& objectLocale, + * const Locale& displayLocale, + * UnicodeString& displayName) + * static UnicodeString& getDisplayName(const Locale& objectLocale, + * UnicodeString& displayName) + * </pre> + * \htmlonly</blockquote>\endhtmlonly + * + * @stable ICU 2.0 + * @see ResourceBundle + */ +class U_COMMON_API Locale : public UObject { +public: + /** Useful constant for the Root locale. @stable ICU 4.4 */ + static const Locale &U_EXPORT2 getRoot(void); + /** Useful constant for this language. @stable ICU 2.0 */ + static const Locale &U_EXPORT2 getEnglish(void); + /** Useful constant for this language. @stable ICU 2.0 */ + static const Locale &U_EXPORT2 getFrench(void); + /** Useful constant for this language. @stable ICU 2.0 */ + static const Locale &U_EXPORT2 getGerman(void); + /** Useful constant for this language. @stable ICU 2.0 */ + static const Locale &U_EXPORT2 getItalian(void); + /** Useful constant for this language. @stable ICU 2.0 */ + static const Locale &U_EXPORT2 getJapanese(void); + /** Useful constant for this language. @stable ICU 2.0 */ + static const Locale &U_EXPORT2 getKorean(void); + /** Useful constant for this language. @stable ICU 2.0 */ + static const Locale &U_EXPORT2 getChinese(void); + /** Useful constant for this language. @stable ICU 2.0 */ + static const Locale &U_EXPORT2 getSimplifiedChinese(void); + /** Useful constant for this language. @stable ICU 2.0 */ + static const Locale &U_EXPORT2 getTraditionalChinese(void); + + /** Useful constant for this country/region. @stable ICU 2.0 */ + static const Locale &U_EXPORT2 getFrance(void); + /** Useful constant for this country/region. @stable ICU 2.0 */ + static const Locale &U_EXPORT2 getGermany(void); + /** Useful constant for this country/region. @stable ICU 2.0 */ + static const Locale &U_EXPORT2 getItaly(void); + /** Useful constant for this country/region. @stable ICU 2.0 */ + static const Locale &U_EXPORT2 getJapan(void); + /** Useful constant for this country/region. @stable ICU 2.0 */ + static const Locale &U_EXPORT2 getKorea(void); + /** Useful constant for this country/region. @stable ICU 2.0 */ + static const Locale &U_EXPORT2 getChina(void); + /** Useful constant for this country/region. @stable ICU 2.0 */ + static const Locale &U_EXPORT2 getPRC(void); + /** Useful constant for this country/region. @stable ICU 2.0 */ + static const Locale &U_EXPORT2 getTaiwan(void); + /** Useful constant for this country/region. @stable ICU 2.0 */ + static const Locale &U_EXPORT2 getUK(void); + /** Useful constant for this country/region. @stable ICU 2.0 */ + static const Locale &U_EXPORT2 getUS(void); + /** Useful constant for this country/region. @stable ICU 2.0 */ + static const Locale &U_EXPORT2 getCanada(void); + /** Useful constant for this country/region. @stable ICU 2.0 */ + static const Locale &U_EXPORT2 getCanadaFrench(void); + + + /** + * Construct a default locale object, a Locale for the default locale ID. + * + * @see getDefault + * @see uloc_getDefault + * @stable ICU 2.0 + */ + Locale(); + + /** + * Construct a locale from language, country, variant. + * If an error occurs, then the constructed object will be "bogus" + * (isBogus() will return true). + * + * @param language Lowercase two-letter or three-letter ISO-639 code. + * This parameter can instead be an ICU style C locale (e.g. "en_US"), + * but the other parameters must not be used. + * This parameter can be NULL; if so, + * the locale is initialized to match the current default locale. + * (This is the same as using the default constructor.) + * Please note: The Java Locale class does NOT accept the form + * 'new Locale("en_US")' but only 'new Locale("en","US")' + * + * @param country Uppercase two-letter ISO-3166 code. (optional) + * @param variant Uppercase vendor and browser specific code. See class + * description. (optional) + * @param keywordsAndValues A string consisting of keyword/values pairs, such as + * "collation=phonebook;currency=euro" + * + * @see getDefault + * @see uloc_getDefault + * @stable ICU 2.0 + */ + Locale( const char * language, + const char * country = 0, + const char * variant = 0, + const char * keywordsAndValues = 0); + + /** + * Initializes a Locale object from another Locale object. + * + * @param other The Locale object being copied in. + * @stable ICU 2.0 + */ + Locale(const Locale& other); + + /** + * Move constructor; might leave source in bogus state. + * This locale will have the same contents that the source locale had. + * + * @param other The Locale object being moved in. + * @stable ICU 63 + */ + Locale(Locale&& other) U_NOEXCEPT; + + /** + * Destructor + * @stable ICU 2.0 + */ + virtual ~Locale() ; + + /** + * Replaces the entire contents of *this with the specified value. + * + * @param other The Locale object being copied in. + * @return *this + * @stable ICU 2.0 + */ + Locale& operator=(const Locale& other); + + /** + * Move assignment operator; might leave source in bogus state. + * This locale will have the same contents that the source locale had. + * The behavior is undefined if *this and the source are the same object. + * + * @param other The Locale object being moved in. + * @return *this + * @stable ICU 63 + */ + Locale& operator=(Locale&& other) U_NOEXCEPT; + + /** + * Checks if two locale keys are the same. + * + * @param other The locale key object to be compared with this. + * @return True if the two locale keys are the same, false otherwise. + * @stable ICU 2.0 + */ + UBool operator==(const Locale& other) const; + + /** + * Checks if two locale keys are not the same. + * + * @param other The locale key object to be compared with this. + * @return True if the two locale keys are not the same, false + * otherwise. + * @stable ICU 2.0 + */ + inline UBool operator!=(const Locale& other) const; + + /** + * Clone this object. + * Clones can be used concurrently in multiple threads. + * If an error occurs, then NULL is returned. + * The caller must delete the clone. + * + * @return a clone of this object + * + * @see getDynamicClassID + * @stable ICU 2.8 + */ + Locale *clone() const; + +#ifndef U_HIDE_SYSTEM_API + /** + * Common methods of getting the current default Locale. Used for the + * presentation: menus, dialogs, etc. Generally set once when your applet or + * application is initialized, then never reset. (If you do reset the + * default locale, you probably want to reload your GUI, so that the change + * is reflected in your interface.) + * + * More advanced programs will allow users to use different locales for + * different fields, e.g. in a spreadsheet. + * + * Note that the initial setting will match the host system. + * @return a reference to the Locale object for the default locale ID + * @system + * @stable ICU 2.0 + */ + static const Locale& U_EXPORT2 getDefault(void); + + /** + * Sets the default. Normally set once at the beginning of a process, + * then never reset. + * setDefault() only changes ICU's default locale ID, <strong>not</strong> + * the default locale ID of the runtime environment. + * + * @param newLocale Locale to set to. If NULL, set to the value obtained + * from the runtime environment. + * @param success The error code. + * @system + * @stable ICU 2.0 + */ + static void U_EXPORT2 setDefault(const Locale& newLocale, + UErrorCode& success); +#endif /* U_HIDE_SYSTEM_API */ + + /** + * Returns a Locale for the specified BCP47 language tag string. + * If the specified language tag contains any ill-formed subtags, + * the first such subtag and all following subtags are ignored. + * <p> + * This implements the 'Language-Tag' production of BCP 47, and so + * supports legacy language tags (marked as “Type: grandfathered” in BCP 47) + * (regular and irregular) as well as private use language tags. + * + * Private use tags are represented as 'x-whatever', + * and legacy tags are converted to their canonical replacements where they exist. + * + * Note that a few legacy tags have no modern replacement; + * these will be converted using the fallback described in + * the first paragraph, so some information might be lost. + * + * @param tag the input BCP47 language tag. + * @param status error information if creating the Locale failed. + * @return the Locale for the specified BCP47 language tag. + * @stable ICU 63 + */ + static Locale U_EXPORT2 forLanguageTag(StringPiece tag, UErrorCode& status); + + /** + * Returns a well-formed language tag for this Locale. + * <p> + * <b>Note</b>: Any locale fields which do not satisfy the BCP47 syntax + * requirement will be silently omitted from the result. + * + * If this function fails, partial output may have been written to the sink. + * + * @param sink the output sink receiving the BCP47 language + * tag for this Locale. + * @param status error information if creating the language tag failed. + * @stable ICU 63 + */ + void toLanguageTag(ByteSink& sink, UErrorCode& status) const; + + /** + * Returns a well-formed language tag for this Locale. + * <p> + * <b>Note</b>: Any locale fields which do not satisfy the BCP47 syntax + * requirement will be silently omitted from the result. + * + * @param status error information if creating the language tag failed. + * @return the BCP47 language tag for this Locale. + * @stable ICU 63 + */ + template<typename StringClass> + inline StringClass toLanguageTag(UErrorCode& status) const; + + /** + * Creates a locale which has had minimal canonicalization + * as per uloc_getName(). + * @param name The name to create from. If name is null, + * the default Locale is used. + * @return new locale object + * @stable ICU 2.0 + * @see uloc_getName + */ + static Locale U_EXPORT2 createFromName(const char *name); + + /** + * Creates a locale from the given string after canonicalizing + * the string according to CLDR by calling uloc_canonicalize(). + * @param name the locale ID to create from. Must not be NULL. + * @return a new locale object corresponding to the given name + * @stable ICU 3.0 + * @see uloc_canonicalize + */ + static Locale U_EXPORT2 createCanonical(const char* name); + + /** + * Returns the locale's ISO-639 language code. + * @return An alias to the code + * @stable ICU 2.0 + */ + inline const char * getLanguage( ) const; + + /** + * Returns the locale's ISO-15924 abbreviation script code. + * @return An alias to the code + * @see uscript_getShortName + * @see uscript_getCode + * @stable ICU 2.8 + */ + inline const char * getScript( ) const; + + /** + * Returns the locale's ISO-3166 country code. + * @return An alias to the code + * @stable ICU 2.0 + */ + inline const char * getCountry( ) const; + + /** + * Returns the locale's variant code. + * @return An alias to the code + * @stable ICU 2.0 + */ + inline const char * getVariant( ) const; + + /** + * Returns the programmatic name of the entire locale, with the language, + * country and variant separated by underbars. If a field is missing, up + * to two leading underbars will occur. Example: "en", "de_DE", "en_US_WIN", + * "de__POSIX", "fr__MAC", "__MAC", "_MT", "_FR_EURO" + * @return A pointer to "name". + * @stable ICU 2.0 + */ + inline const char * getName() const; + + /** + * Returns the programmatic name of the entire locale as getName() would return, + * but without keywords. + * @return A pointer to "name". + * @see getName + * @stable ICU 2.8 + */ + const char * getBaseName() const; + + /** + * Add the likely subtags for this Locale, per the algorithm described + * in the following CLDR technical report: + * + * http://www.unicode.org/reports/tr35/#Likely_Subtags + * + * If this Locale is already in the maximal form, or not valid, or there is + * no data available for maximization, the Locale will be unchanged. + * + * For example, "und-Zzzz" cannot be maximized, since there is no + * reasonable maximization. + * + * Examples: + * + * "en" maximizes to "en_Latn_US" + * + * "de" maximizes to "de_Latn_US" + * + * "sr" maximizes to "sr_Cyrl_RS" + * + * "sh" maximizes to "sr_Latn_RS" (Note this will not reverse.) + * + * "zh_Hani" maximizes to "zh_Hans_CN" (Note this will not reverse.) + * + * @param status error information if maximizing this Locale failed. + * If this Locale is not well-formed, the error code is + * U_ILLEGAL_ARGUMENT_ERROR. + * @stable ICU 63 + */ + void addLikelySubtags(UErrorCode& status); + + /** + * Minimize the subtags for this Locale, per the algorithm described + * in the following CLDR technical report: + * + * http://www.unicode.org/reports/tr35/#Likely_Subtags + * + * If this Locale is already in the minimal form, or not valid, or there is + * no data available for minimization, the Locale will be unchanged. + * + * Since the minimization algorithm relies on proper maximization, see the + * comments for addLikelySubtags for reasons why there might not be any + * data. + * + * Examples: + * + * "en_Latn_US" minimizes to "en" + * + * "de_Latn_US" minimizes to "de" + * + * "sr_Cyrl_RS" minimizes to "sr" + * + * "zh_Hant_TW" minimizes to "zh_TW" (The region is preferred to the + * script, and minimizing to "zh" would imply "zh_Hans_CN".) + * + * @param status error information if maximizing this Locale failed. + * If this Locale is not well-formed, the error code is + * U_ILLEGAL_ARGUMENT_ERROR. + * @stable ICU 63 + */ + void minimizeSubtags(UErrorCode& status); + +#ifndef U_HIDE_DRAFT_API + /** + * Canonicalize the locale ID of this object according to CLDR. + * @param status the status code + * @draft ICU 67 + * @see createCanonical + */ + void canonicalize(UErrorCode& status); +#endif // U_HIDE_DRAFT_API + + /** + * Gets the list of keywords for the specified locale. + * + * @param status the status code + * @return pointer to StringEnumeration class, or NULL if there are no keywords. + * Client must dispose of it by calling delete. + * @see getKeywords + * @stable ICU 2.8 + */ + StringEnumeration * createKeywords(UErrorCode &status) const; + + /** + * Gets the list of Unicode keywords for the specified locale. + * + * @param status the status code + * @return pointer to StringEnumeration class, or NULL if there are no keywords. + * Client must dispose of it by calling delete. + * @see getUnicodeKeywords + * @stable ICU 63 + */ + StringEnumeration * createUnicodeKeywords(UErrorCode &status) const; + + /** + * Gets the set of keywords for this Locale. + * + * A wrapper to call createKeywords() and write the resulting + * keywords as standard strings (or compatible objects) into any kind of + * container that can be written to by an STL style output iterator. + * + * @param iterator an STL style output iterator to write the keywords to. + * @param status error information if creating set of keywords failed. + * @stable ICU 63 + */ + template<typename StringClass, typename OutputIterator> + inline void getKeywords(OutputIterator iterator, UErrorCode& status) const; + + /** + * Gets the set of Unicode keywords for this Locale. + * + * A wrapper to call createUnicodeKeywords() and write the resulting + * keywords as standard strings (or compatible objects) into any kind of + * container that can be written to by an STL style output iterator. + * + * @param iterator an STL style output iterator to write the keywords to. + * @param status error information if creating set of keywords failed. + * @stable ICU 63 + */ + template<typename StringClass, typename OutputIterator> + inline void getUnicodeKeywords(OutputIterator iterator, UErrorCode& status) const; + + /** + * Gets the value for a keyword. + * + * This uses legacy keyword=value pairs, like "collation=phonebook". + * + * ICU4C doesn't do automatic conversion between legacy and Unicode + * keywords and values in getters and setters (as opposed to ICU4J). + * + * @param keywordName name of the keyword for which we want the value. Case insensitive. + * @param buffer The buffer to receive the keyword value. + * @param bufferCapacity The capacity of receiving buffer + * @param status Returns any error information while performing this operation. + * @return the length of the keyword value + * + * @stable ICU 2.8 + */ + int32_t getKeywordValue(const char* keywordName, char *buffer, int32_t bufferCapacity, UErrorCode &status) const; + + /** + * Gets the value for a keyword. + * + * This uses legacy keyword=value pairs, like "collation=phonebook". + * + * ICU4C doesn't do automatic conversion between legacy and Unicode + * keywords and values in getters and setters (as opposed to ICU4J). + * + * @param keywordName name of the keyword for which we want the value. + * @param sink the sink to receive the keyword value. + * @param status error information if getting the value failed. + * @stable ICU 63 + */ + void getKeywordValue(StringPiece keywordName, ByteSink& sink, UErrorCode& status) const; + + /** + * Gets the value for a keyword. + * + * This uses legacy keyword=value pairs, like "collation=phonebook". + * + * ICU4C doesn't do automatic conversion between legacy and Unicode + * keywords and values in getters and setters (as opposed to ICU4J). + * + * @param keywordName name of the keyword for which we want the value. + * @param status error information if getting the value failed. + * @return the keyword value. + * @stable ICU 63 + */ + template<typename StringClass> + inline StringClass getKeywordValue(StringPiece keywordName, UErrorCode& status) const; + + /** + * Gets the Unicode value for a Unicode keyword. + * + * This uses Unicode key-value pairs, like "co-phonebk". + * + * ICU4C doesn't do automatic conversion between legacy and Unicode + * keywords and values in getters and setters (as opposed to ICU4J). + * + * @param keywordName name of the keyword for which we want the value. + * @param sink the sink to receive the keyword value. + * @param status error information if getting the value failed. + * @stable ICU 63 + */ + void getUnicodeKeywordValue(StringPiece keywordName, ByteSink& sink, UErrorCode& status) const; + + /** + * Gets the Unicode value for a Unicode keyword. + * + * This uses Unicode key-value pairs, like "co-phonebk". + * + * ICU4C doesn't do automatic conversion between legacy and Unicode + * keywords and values in getters and setters (as opposed to ICU4J). + * + * @param keywordName name of the keyword for which we want the value. + * @param status error information if getting the value failed. + * @return the keyword value. + * @stable ICU 63 + */ + template<typename StringClass> + inline StringClass getUnicodeKeywordValue(StringPiece keywordName, UErrorCode& status) const; + + /** + * Sets or removes the value for a keyword. + * + * For removing all keywords, use getBaseName(), + * and construct a new Locale if it differs from getName(). + * + * This uses legacy keyword=value pairs, like "collation=phonebook". + * + * ICU4C doesn't do automatic conversion between legacy and Unicode + * keywords and values in getters and setters (as opposed to ICU4J). + * + * @param keywordName name of the keyword to be set. Case insensitive. + * @param keywordValue value of the keyword to be set. If 0-length or + * NULL, will result in the keyword being removed. No error is given if + * that keyword does not exist. + * @param status Returns any error information while performing this operation. + * + * @stable ICU 49 + */ + void setKeywordValue(const char* keywordName, const char* keywordValue, UErrorCode &status); + + /** + * Sets or removes the value for a keyword. + * + * For removing all keywords, use getBaseName(), + * and construct a new Locale if it differs from getName(). + * + * This uses legacy keyword=value pairs, like "collation=phonebook". + * + * ICU4C doesn't do automatic conversion between legacy and Unicode + * keywords and values in getters and setters (as opposed to ICU4J). + * + * @param keywordName name of the keyword to be set. + * @param keywordValue value of the keyword to be set. If 0-length or + * NULL, will result in the keyword being removed. No error is given if + * that keyword does not exist. + * @param status Returns any error information while performing this operation. + * @stable ICU 63 + */ + void setKeywordValue(StringPiece keywordName, StringPiece keywordValue, UErrorCode& status); + + /** + * Sets or removes the Unicode value for a Unicode keyword. + * + * For removing all keywords, use getBaseName(), + * and construct a new Locale if it differs from getName(). + * + * This uses Unicode key-value pairs, like "co-phonebk". + * + * ICU4C doesn't do automatic conversion between legacy and Unicode + * keywords and values in getters and setters (as opposed to ICU4J). + * + * @param keywordName name of the keyword to be set. + * @param keywordValue value of the keyword to be set. If 0-length or + * NULL, will result in the keyword being removed. No error is given if + * that keyword does not exist. + * @param status Returns any error information while performing this operation. + * @stable ICU 63 + */ + void setUnicodeKeywordValue(StringPiece keywordName, StringPiece keywordValue, UErrorCode& status); + + /** + * returns the locale's three-letter language code, as specified + * in ISO draft standard ISO-639-2. + * @return An alias to the code, or an empty string + * @stable ICU 2.0 + */ + const char * getISO3Language() const; + + /** + * Fills in "name" with the locale's three-letter ISO-3166 country code. + * @return An alias to the code, or an empty string + * @stable ICU 2.0 + */ + const char * getISO3Country() const; + + /** + * Returns the Windows LCID value corresponding to this locale. + * This value is stored in the resource data for the locale as a one-to-four-digit + * hexadecimal number. If the resource is missing, in the wrong format, or + * there is no Windows LCID value that corresponds to this locale, returns 0. + * @stable ICU 2.0 + */ + uint32_t getLCID(void) const; + + /** + * Returns whether this locale's script is written right-to-left. + * If there is no script subtag, then the likely script is used, see uloc_addLikelySubtags(). + * If no likely script is known, then false is returned. + * + * A script is right-to-left according to the CLDR script metadata + * which corresponds to whether the script's letters have Bidi_Class=R or AL. + * + * Returns true for "ar" and "en-Hebr", false for "zh" and "fa-Cyrl". + * + * @return true if the locale's script is written right-to-left + * @stable ICU 54 + */ + UBool isRightToLeft() const; + + /** + * Fills in "dispLang" with the name of this locale's language in a format suitable for + * user display in the default locale. For example, if the locale's language code is + * "fr" and the default locale's language code is "en", this function would set + * dispLang to "French". + * @param dispLang Receives the language's display name. + * @return A reference to "dispLang". + * @stable ICU 2.0 + */ + UnicodeString& getDisplayLanguage(UnicodeString& dispLang) const; + + /** + * Fills in "dispLang" with the name of this locale's language in a format suitable for + * user display in the locale specified by "displayLocale". For example, if the locale's + * language code is "en" and displayLocale's language code is "fr", this function would set + * dispLang to "Anglais". + * @param displayLocale Specifies the locale to be used to display the name. In other words, + * if the locale's language code is "en", passing Locale::getFrench() for + * displayLocale would result in "Anglais", while passing Locale::getGerman() + * for displayLocale would result in "Englisch". + * @param dispLang Receives the language's display name. + * @return A reference to "dispLang". + * @stable ICU 2.0 + */ + UnicodeString& getDisplayLanguage( const Locale& displayLocale, + UnicodeString& dispLang) const; + + /** + * Fills in "dispScript" with the name of this locale's script in a format suitable + * for user display in the default locale. For example, if the locale's script code + * is "LATN" and the default locale's language code is "en", this function would set + * dispScript to "Latin". + * @param dispScript Receives the scripts's display name. + * @return A reference to "dispScript". + * @stable ICU 2.8 + */ + UnicodeString& getDisplayScript( UnicodeString& dispScript) const; + + /** + * Fills in "dispScript" with the name of this locale's country in a format suitable + * for user display in the locale specified by "displayLocale". For example, if the locale's + * script code is "LATN" and displayLocale's language code is "en", this function would set + * dispScript to "Latin". + * @param displayLocale Specifies the locale to be used to display the name. In other + * words, if the locale's script code is "LATN", passing + * Locale::getFrench() for displayLocale would result in "", while + * passing Locale::getGerman() for displayLocale would result in + * "". + * @param dispScript Receives the scripts's display name. + * @return A reference to "dispScript". + * @stable ICU 2.8 + */ + UnicodeString& getDisplayScript( const Locale& displayLocale, + UnicodeString& dispScript) const; + + /** + * Fills in "dispCountry" with the name of this locale's country in a format suitable + * for user display in the default locale. For example, if the locale's country code + * is "FR" and the default locale's language code is "en", this function would set + * dispCountry to "France". + * @param dispCountry Receives the country's display name. + * @return A reference to "dispCountry". + * @stable ICU 2.0 + */ + UnicodeString& getDisplayCountry( UnicodeString& dispCountry) const; + + /** + * Fills in "dispCountry" with the name of this locale's country in a format suitable + * for user display in the locale specified by "displayLocale". For example, if the locale's + * country code is "US" and displayLocale's language code is "fr", this function would set + * dispCountry to "États-Unis". + * @param displayLocale Specifies the locale to be used to display the name. In other + * words, if the locale's country code is "US", passing + * Locale::getFrench() for displayLocale would result in "États-Unis", while + * passing Locale::getGerman() for displayLocale would result in + * "Vereinigte Staaten". + * @param dispCountry Receives the country's display name. + * @return A reference to "dispCountry". + * @stable ICU 2.0 + */ + UnicodeString& getDisplayCountry( const Locale& displayLocale, + UnicodeString& dispCountry) const; + + /** + * Fills in "dispVar" with the name of this locale's variant code in a format suitable + * for user display in the default locale. + * @param dispVar Receives the variant's name. + * @return A reference to "dispVar". + * @stable ICU 2.0 + */ + UnicodeString& getDisplayVariant( UnicodeString& dispVar) const; + + /** + * Fills in "dispVar" with the name of this locale's variant code in a format + * suitable for user display in the locale specified by "displayLocale". + * @param displayLocale Specifies the locale to be used to display the name. + * @param dispVar Receives the variant's display name. + * @return A reference to "dispVar". + * @stable ICU 2.0 + */ + UnicodeString& getDisplayVariant( const Locale& displayLocale, + UnicodeString& dispVar) const; + + /** + * Fills in "name" with the name of this locale in a format suitable for user display + * in the default locale. This function uses getDisplayLanguage(), getDisplayCountry(), + * and getDisplayVariant() to do its work, and outputs the display name in the format + * "language (country[,variant])". For example, if the default locale is en_US, then + * fr_FR's display name would be "French (France)", and es_MX_Traditional's display name + * would be "Spanish (Mexico,Traditional)". + * @param name Receives the locale's display name. + * @return A reference to "name". + * @stable ICU 2.0 + */ + UnicodeString& getDisplayName( UnicodeString& name) const; + + /** + * Fills in "name" with the name of this locale in a format suitable for user display + * in the locale specified by "displayLocale". This function uses getDisplayLanguage(), + * getDisplayCountry(), and getDisplayVariant() to do its work, and outputs the display + * name in the format "language (country[,variant])". For example, if displayLocale is + * fr_FR, then en_US's display name would be "Anglais (États-Unis)", and no_NO_NY's + * display name would be "norvégien (Norvège,NY)". + * @param displayLocale Specifies the locale to be used to display the name. + * @param name Receives the locale's display name. + * @return A reference to "name". + * @stable ICU 2.0 + */ + UnicodeString& getDisplayName( const Locale& displayLocale, + UnicodeString& name) const; + + /** + * Generates a hash code for the locale. + * @stable ICU 2.0 + */ + int32_t hashCode(void) const; + + /** + * Sets the locale to bogus + * A bogus locale represents a non-existing locale associated + * with services that can be instantiated from non-locale data + * in addition to locale (for example, collation can be + * instantiated from a locale and from a rule set). + * @stable ICU 2.1 + */ + void setToBogus(); + + /** + * Gets the bogus state. Locale object can be bogus if it doesn't exist + * @return false if it is a real locale, true if it is a bogus locale + * @stable ICU 2.1 + */ + inline UBool isBogus(void) const; + + /** + * Returns a list of all installed locales. + * @param count Receives the number of locales in the list. + * @return A pointer to an array of Locale objects. This array is the list + * of all locales with installed resource files. The called does NOT + * get ownership of this list, and must NOT delete it. + * @stable ICU 2.0 + */ + static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count); + + /** + * Gets a list of all available 2-letter country codes defined in ISO 3166. This is a + * pointer to an array of pointers to arrays of char. All of these pointers are + * owned by ICU-- do not delete them, and do not write through them. The array is + * terminated with a null pointer. + * @return a list of all available country codes + * @stable ICU 2.0 + */ + static const char* const* U_EXPORT2 getISOCountries(); + + /** + * Gets a list of all available language codes defined in ISO 639. This is a pointer + * to an array of pointers to arrays of char. All of these pointers are owned + * by ICU-- do not delete them, and do not write through them. The array is + * terminated with a null pointer. + * @return a list of all available language codes + * @stable ICU 2.0 + */ + static const char* const* U_EXPORT2 getISOLanguages(); + + /** + * ICU "poor man's RTTI", returns a UClassID for this class. + * + * @stable ICU 2.2 + */ + static UClassID U_EXPORT2 getStaticClassID(); + + /** + * ICU "poor man's RTTI", returns a UClassID for the actual class. + * + * @stable ICU 2.2 + */ + virtual UClassID getDynamicClassID() const; + + /** + * A Locale iterator interface similar to a Java Iterator<Locale>. + * @stable ICU 65 + */ + class U_COMMON_API Iterator /* not : public UObject because this is an interface/mixin class */ { + public: + /** @stable ICU 65 */ + virtual ~Iterator(); + + /** + * @return true if next() can be called again. + * @stable ICU 65 + */ + virtual UBool hasNext() const = 0; + + /** + * @return the next locale. + * @stable ICU 65 + */ + virtual const Locale &next() = 0; + }; + + /** + * A generic Locale iterator implementation over Locale input iterators. + * @stable ICU 65 + */ + template<typename Iter> + class RangeIterator : public Iterator, public UMemory { + public: + /** + * Constructs an iterator from a begin/end range. + * Each of the iterator parameter values must be an + * input iterator whose value is convertible to const Locale &. + * + * @param begin Start of range. + * @param end Exclusive end of range. + * @stable ICU 65 + */ + RangeIterator(Iter begin, Iter end) : it_(begin), end_(end) {} + + /** + * @return true if next() can be called again. + * @stable ICU 65 + */ + UBool hasNext() const override { return it_ != end_; } + + /** + * @return the next locale. + * @stable ICU 65 + */ + const Locale &next() override { return *it_++; } + + private: + Iter it_; + const Iter end_; + }; + + /** + * A generic Locale iterator implementation over Locale input iterators. + * Calls the converter to convert each *begin to a const Locale &. + * @stable ICU 65 + */ + template<typename Iter, typename Conv> + class ConvertingIterator : public Iterator, public UMemory { + public: + /** + * Constructs an iterator from a begin/end range. + * Each of the iterator parameter values must be an + * input iterator whose value the converter converts to const Locale &. + * + * @param begin Start of range. + * @param end Exclusive end of range. + * @param converter Converter from *begin to const Locale & or compatible. + * @stable ICU 65 + */ + ConvertingIterator(Iter begin, Iter end, Conv converter) : + it_(begin), end_(end), converter_(converter) {} + + /** + * @return true if next() can be called again. + * @stable ICU 65 + */ + UBool hasNext() const override { return it_ != end_; } + + /** + * @return the next locale. + * @stable ICU 65 + */ + const Locale &next() override { return converter_(*it_++); } + + private: + Iter it_; + const Iter end_; + Conv converter_; + }; + +protected: /* only protected for testing purposes. DO NOT USE. */ +#ifndef U_HIDE_INTERNAL_API + /** + * Set this from a single POSIX style locale string. + * @internal + */ + void setFromPOSIXID(const char *posixID); +#endif /* U_HIDE_INTERNAL_API */ + +private: + /** + * Initialize the locale object with a new name. + * Was deprecated - used in implementation - moved internal + * + * @param cLocaleID The new locale name. + * @param canonicalize whether to call uloc_canonicalize on cLocaleID + */ + Locale& init(const char* cLocaleID, UBool canonicalize); + + /* + * Internal constructor to allow construction of a locale object with + * NO side effects. (Default constructor tries to get + * the default locale.) + */ + enum ELocaleType { + eBOGUS + }; + Locale(ELocaleType); + + /** + * Initialize the locale cache for commonly used locales + */ + static Locale *getLocaleCache(void); + + char language[ULOC_LANG_CAPACITY]; + char script[ULOC_SCRIPT_CAPACITY]; + char country[ULOC_COUNTRY_CAPACITY]; + int32_t variantBegin; + char* fullName; + char fullNameBuffer[ULOC_FULLNAME_CAPACITY]; + // name without keywords + char* baseName; + void initBaseName(UErrorCode& status); + + UBool fIsBogus; + + static const Locale &getLocale(int locid); + + /** + * A friend to allow the default locale to be set by either the C or C++ API. + * @internal (private) + */ + friend Locale *locale_set_default_internal(const char *, UErrorCode& status); + + /** + * @internal (private) + */ + friend void U_CALLCONV locale_available_init(); +}; + +inline UBool +Locale::operator!=(const Locale& other) const +{ + return !operator==(other); +} + +template<typename StringClass> inline StringClass +Locale::toLanguageTag(UErrorCode& status) const +{ + StringClass result; + StringByteSink<StringClass> sink(&result); + toLanguageTag(sink, status); + return result; +} + +inline const char * +Locale::getCountry() const +{ + return country; +} + +inline const char * +Locale::getLanguage() const +{ + return language; +} + +inline const char * +Locale::getScript() const +{ + return script; +} + +inline const char * +Locale::getVariant() const +{ + return &baseName[variantBegin]; +} + +inline const char * +Locale::getName() const +{ + return fullName; +} + +template<typename StringClass, typename OutputIterator> inline void +Locale::getKeywords(OutputIterator iterator, UErrorCode& status) const +{ + LocalPointer<StringEnumeration> keys(createKeywords(status)); + if (U_FAILURE(status) || keys.isNull()) { + return; + } + for (;;) { + int32_t resultLength; + const char* buffer = keys->next(&resultLength, status); + if (U_FAILURE(status) || buffer == nullptr) { + return; + } + *iterator++ = StringClass(buffer, resultLength); + } +} + +template<typename StringClass, typename OutputIterator> inline void +Locale::getUnicodeKeywords(OutputIterator iterator, UErrorCode& status) const +{ + LocalPointer<StringEnumeration> keys(createUnicodeKeywords(status)); + if (U_FAILURE(status) || keys.isNull()) { + return; + } + for (;;) { + int32_t resultLength; + const char* buffer = keys->next(&resultLength, status); + if (U_FAILURE(status) || buffer == nullptr) { + return; + } + *iterator++ = StringClass(buffer, resultLength); + } +} + +template<typename StringClass> inline StringClass +Locale::getKeywordValue(StringPiece keywordName, UErrorCode& status) const +{ + StringClass result; + StringByteSink<StringClass> sink(&result); + getKeywordValue(keywordName, sink, status); + return result; +} + +template<typename StringClass> inline StringClass +Locale::getUnicodeKeywordValue(StringPiece keywordName, UErrorCode& status) const +{ + StringClass result; + StringByteSink<StringClass> sink(&result); + getUnicodeKeywordValue(keywordName, sink, status); + return result; +} + +inline UBool +Locale::isBogus(void) const { + return fIsBogus; +} + +U_NAMESPACE_END + +#endif /* U_SHOW_CPLUSPLUS_API */ + +#endif diff --git a/thirdparty/icu4c/common/unicode/messagepattern.h b/thirdparty/icu4c/common/unicode/messagepattern.h new file mode 100644 index 0000000000..98e7b70b1f --- /dev/null +++ b/thirdparty/icu4c/common/unicode/messagepattern.h @@ -0,0 +1,949 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2011-2013, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* file name: messagepattern.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2011mar14 +* created by: Markus W. Scherer +*/ + +#ifndef __MESSAGEPATTERN_H__ +#define __MESSAGEPATTERN_H__ + +/** + * \file + * \brief C++ API: MessagePattern class: Parses and represents ICU MessageFormat patterns. + */ + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/parseerr.h" +#include "unicode/unistr.h" + +/** + * Mode for when an apostrophe starts quoted literal text for MessageFormat output. + * The default is DOUBLE_OPTIONAL unless overridden via uconfig.h + * (UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE). + * <p> + * A pair of adjacent apostrophes always results in a single apostrophe in the output, + * even when the pair is between two single, text-quoting apostrophes. + * <p> + * The following table shows examples of desired MessageFormat.format() output + * with the pattern strings that yield that output. + * <p> + * <table> + * <tr> + * <th>Desired output</th> + * <th>DOUBLE_OPTIONAL</th> + * <th>DOUBLE_REQUIRED</th> + * </tr> + * <tr> + * <td>I see {many}</td> + * <td>I see '{many}'</td> + * <td>(same)</td> + * </tr> + * <tr> + * <td>I said {'Wow!'}</td> + * <td>I said '{''Wow!''}'</td> + * <td>(same)</td> + * </tr> + * <tr> + * <td>I don't know</td> + * <td>I don't know OR<br> I don''t know</td> + * <td>I don''t know</td> + * </tr> + * </table> + * @stable ICU 4.8 + * @see UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE + */ +enum UMessagePatternApostropheMode { + /** + * A literal apostrophe is represented by + * either a single or a double apostrophe pattern character. + * Within a MessageFormat pattern, a single apostrophe only starts quoted literal text + * if it immediately precedes a curly brace {}, + * or a pipe symbol | if inside a choice format, + * or a pound symbol # if inside a plural format. + * <p> + * This is the default behavior starting with ICU 4.8. + * @stable ICU 4.8 + */ + UMSGPAT_APOS_DOUBLE_OPTIONAL, + /** + * A literal apostrophe must be represented by + * a double apostrophe pattern character. + * A single apostrophe always starts quoted literal text. + * <p> + * This is the behavior of ICU 4.6 and earlier, and of the JDK. + * @stable ICU 4.8 + */ + UMSGPAT_APOS_DOUBLE_REQUIRED +}; +/** + * @stable ICU 4.8 + */ +typedef enum UMessagePatternApostropheMode UMessagePatternApostropheMode; + +/** + * MessagePattern::Part type constants. + * @stable ICU 4.8 + */ +enum UMessagePatternPartType { + /** + * Start of a message pattern (main or nested). + * The length is 0 for the top-level message + * and for a choice argument sub-message, otherwise 1 for the '{'. + * The value indicates the nesting level, starting with 0 for the main message. + * <p> + * There is always a later MSG_LIMIT part. + * @stable ICU 4.8 + */ + UMSGPAT_PART_TYPE_MSG_START, + /** + * End of a message pattern (main or nested). + * The length is 0 for the top-level message and + * the last sub-message of a choice argument, + * otherwise 1 for the '}' or (in a choice argument style) the '|'. + * The value indicates the nesting level, starting with 0 for the main message. + * @stable ICU 4.8 + */ + UMSGPAT_PART_TYPE_MSG_LIMIT, + /** + * Indicates a substring of the pattern string which is to be skipped when formatting. + * For example, an apostrophe that begins or ends quoted text + * would be indicated with such a part. + * The value is undefined and currently always 0. + * @stable ICU 4.8 + */ + UMSGPAT_PART_TYPE_SKIP_SYNTAX, + /** + * Indicates that a syntax character needs to be inserted for auto-quoting. + * The length is 0. + * The value is the character code of the insertion character. (U+0027=APOSTROPHE) + * @stable ICU 4.8 + */ + UMSGPAT_PART_TYPE_INSERT_CHAR, + /** + * Indicates a syntactic (non-escaped) # symbol in a plural variant. + * When formatting, replace this part's substring with the + * (value-offset) for the plural argument value. + * The value is undefined and currently always 0. + * @stable ICU 4.8 + */ + UMSGPAT_PART_TYPE_REPLACE_NUMBER, + /** + * Start of an argument. + * The length is 1 for the '{'. + * The value is the ordinal value of the ArgType. Use getArgType(). + * <p> + * This part is followed by either an ARG_NUMBER or ARG_NAME, + * followed by optional argument sub-parts (see UMessagePatternArgType constants) + * and finally an ARG_LIMIT part. + * @stable ICU 4.8 + */ + UMSGPAT_PART_TYPE_ARG_START, + /** + * End of an argument. + * The length is 1 for the '}'. + * The value is the ordinal value of the ArgType. Use getArgType(). + * @stable ICU 4.8 + */ + UMSGPAT_PART_TYPE_ARG_LIMIT, + /** + * The argument number, provided by the value. + * @stable ICU 4.8 + */ + UMSGPAT_PART_TYPE_ARG_NUMBER, + /** + * The argument name. + * The value is undefined and currently always 0. + * @stable ICU 4.8 + */ + UMSGPAT_PART_TYPE_ARG_NAME, + /** + * The argument type. + * The value is undefined and currently always 0. + * @stable ICU 4.8 + */ + UMSGPAT_PART_TYPE_ARG_TYPE, + /** + * The argument style text. + * The value is undefined and currently always 0. + * @stable ICU 4.8 + */ + UMSGPAT_PART_TYPE_ARG_STYLE, + /** + * A selector substring in a "complex" argument style. + * The value is undefined and currently always 0. + * @stable ICU 4.8 + */ + UMSGPAT_PART_TYPE_ARG_SELECTOR, + /** + * An integer value, for example the offset or an explicit selector value + * in a PluralFormat style. + * The part value is the integer value. + * @stable ICU 4.8 + */ + UMSGPAT_PART_TYPE_ARG_INT, + /** + * A numeric value, for example the offset or an explicit selector value + * in a PluralFormat style. + * The part value is an index into an internal array of numeric values; + * use getNumericValue(). + * @stable ICU 4.8 + */ + UMSGPAT_PART_TYPE_ARG_DOUBLE +}; +/** + * @stable ICU 4.8 + */ +typedef enum UMessagePatternPartType UMessagePatternPartType; + +/** + * Argument type constants. + * Returned by Part.getArgType() for ARG_START and ARG_LIMIT parts. + * + * Messages nested inside an argument are each delimited by MSG_START and MSG_LIMIT, + * with a nesting level one greater than the surrounding message. + * @stable ICU 4.8 + */ +enum UMessagePatternArgType { + /** + * The argument has no specified type. + * @stable ICU 4.8 + */ + UMSGPAT_ARG_TYPE_NONE, + /** + * The argument has a "simple" type which is provided by the ARG_TYPE part. + * An ARG_STYLE part might follow that. + * @stable ICU 4.8 + */ + UMSGPAT_ARG_TYPE_SIMPLE, + /** + * The argument is a ChoiceFormat with one or more + * ((ARG_INT | ARG_DOUBLE), ARG_SELECTOR, message) tuples. + * @stable ICU 4.8 + */ + UMSGPAT_ARG_TYPE_CHOICE, + /** + * The argument is a cardinal-number PluralFormat with an optional ARG_INT or ARG_DOUBLE offset + * (e.g., offset:1) + * and one or more (ARG_SELECTOR [explicit-value] message) tuples. + * If the selector has an explicit value (e.g., =2), then + * that value is provided by the ARG_INT or ARG_DOUBLE part preceding the message. + * Otherwise the message immediately follows the ARG_SELECTOR. + * @stable ICU 4.8 + */ + UMSGPAT_ARG_TYPE_PLURAL, + /** + * The argument is a SelectFormat with one or more (ARG_SELECTOR, message) pairs. + * @stable ICU 4.8 + */ + UMSGPAT_ARG_TYPE_SELECT, + /** + * The argument is an ordinal-number PluralFormat + * with the same style parts sequence and semantics as UMSGPAT_ARG_TYPE_PLURAL. + * @stable ICU 50 + */ + UMSGPAT_ARG_TYPE_SELECTORDINAL +}; +/** + * @stable ICU 4.8 + */ +typedef enum UMessagePatternArgType UMessagePatternArgType; + +/** + * \def UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE + * Returns true if the argument type has a plural style part sequence and semantics, + * for example UMSGPAT_ARG_TYPE_PLURAL and UMSGPAT_ARG_TYPE_SELECTORDINAL. + * @stable ICU 50 + */ +#define UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) \ + ((argType)==UMSGPAT_ARG_TYPE_PLURAL || (argType)==UMSGPAT_ARG_TYPE_SELECTORDINAL) + +enum { + /** + * Return value from MessagePattern.validateArgumentName() for when + * the string is a valid "pattern identifier" but not a number. + * @stable ICU 4.8 + */ + UMSGPAT_ARG_NAME_NOT_NUMBER=-1, + + /** + * Return value from MessagePattern.validateArgumentName() for when + * the string is invalid. + * It might not be a valid "pattern identifier", + * or it have only ASCII digits but there is a leading zero or the number is too large. + * @stable ICU 4.8 + */ + UMSGPAT_ARG_NAME_NOT_VALID=-2 +}; + +/** + * Special value that is returned by getNumericValue(Part) when no + * numeric value is defined for a part. + * @see MessagePattern.getNumericValue() + * @stable ICU 4.8 + */ +#define UMSGPAT_NO_NUMERIC_VALUE ((double)(-123456789)) + +U_NAMESPACE_BEGIN + +class MessagePatternDoubleList; +class MessagePatternPartsList; + +/** + * Parses and represents ICU MessageFormat patterns. + * Also handles patterns for ChoiceFormat, PluralFormat and SelectFormat. + * Used in the implementations of those classes as well as in tools + * for message validation, translation and format conversion. + * <p> + * The parser handles all syntax relevant for identifying message arguments. + * This includes "complex" arguments whose style strings contain + * nested MessageFormat pattern substrings. + * For "simple" arguments (with no nested MessageFormat pattern substrings), + * the argument style is not parsed any further. + * <p> + * The parser handles named and numbered message arguments and allows both in one message. + * <p> + * Once a pattern has been parsed successfully, iterate through the parsed data + * with countParts(), getPart() and related methods. + * <p> + * The data logically represents a parse tree, but is stored and accessed + * as a list of "parts" for fast and simple parsing and to minimize object allocations. + * Arguments and nested messages are best handled via recursion. + * For every _START "part", MessagePattern.getLimitPartIndex() efficiently returns + * the index of the corresponding _LIMIT "part". + * <p> + * List of "parts": + * <pre> + * message = MSG_START (SKIP_SYNTAX | INSERT_CHAR | REPLACE_NUMBER | argument)* MSG_LIMIT + * argument = noneArg | simpleArg | complexArg + * complexArg = choiceArg | pluralArg | selectArg + * + * noneArg = ARG_START.NONE (ARG_NAME | ARG_NUMBER) ARG_LIMIT.NONE + * simpleArg = ARG_START.SIMPLE (ARG_NAME | ARG_NUMBER) ARG_TYPE [ARG_STYLE] ARG_LIMIT.SIMPLE + * choiceArg = ARG_START.CHOICE (ARG_NAME | ARG_NUMBER) choiceStyle ARG_LIMIT.CHOICE + * pluralArg = ARG_START.PLURAL (ARG_NAME | ARG_NUMBER) pluralStyle ARG_LIMIT.PLURAL + * selectArg = ARG_START.SELECT (ARG_NAME | ARG_NUMBER) selectStyle ARG_LIMIT.SELECT + * + * choiceStyle = ((ARG_INT | ARG_DOUBLE) ARG_SELECTOR message)+ + * pluralStyle = [ARG_INT | ARG_DOUBLE] (ARG_SELECTOR [ARG_INT | ARG_DOUBLE] message)+ + * selectStyle = (ARG_SELECTOR message)+ + * </pre> + * <ul> + * <li>Literal output text is not represented directly by "parts" but accessed + * between parts of a message, from one part's getLimit() to the next part's getIndex(). + * <li><code>ARG_START.CHOICE</code> stands for an ARG_START Part with ArgType CHOICE. + * <li>In the choiceStyle, the ARG_SELECTOR has the '<', the '#' or + * the less-than-or-equal-to sign (U+2264). + * <li>In the pluralStyle, the first, optional numeric Part has the "offset:" value. + * The optional numeric Part between each (ARG_SELECTOR, message) pair + * is the value of an explicit-number selector like "=2", + * otherwise the selector is a non-numeric identifier. + * <li>The REPLACE_NUMBER Part can occur only in an immediate sub-message of the pluralStyle. + * </ul> + * <p> + * This class is not intended for public subclassing. + * + * @stable ICU 4.8 + */ +class U_COMMON_API MessagePattern : public UObject { +public: + /** + * Constructs an empty MessagePattern with default UMessagePatternApostropheMode. + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @stable ICU 4.8 + */ + MessagePattern(UErrorCode &errorCode); + + /** + * Constructs an empty MessagePattern. + * @param mode Explicit UMessagePatternApostropheMode. + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @stable ICU 4.8 + */ + MessagePattern(UMessagePatternApostropheMode mode, UErrorCode &errorCode); + + /** + * Constructs a MessagePattern with default UMessagePatternApostropheMode and + * parses the MessageFormat pattern string. + * @param pattern a MessageFormat pattern string + * @param parseError Struct to receive information on the position + * of an error within the pattern. + * Can be NULL. + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * TODO: turn @throws into UErrorCode specifics? + * @throws IllegalArgumentException for syntax errors in the pattern string + * @throws IndexOutOfBoundsException if certain limits are exceeded + * (e.g., argument number too high, argument name too long, etc.) + * @throws NumberFormatException if a number could not be parsed + * @stable ICU 4.8 + */ + MessagePattern(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode); + + /** + * Copy constructor. + * @param other Object to copy. + * @stable ICU 4.8 + */ + MessagePattern(const MessagePattern &other); + + /** + * Assignment operator. + * @param other Object to copy. + * @return *this=other + * @stable ICU 4.8 + */ + MessagePattern &operator=(const MessagePattern &other); + + /** + * Destructor. + * @stable ICU 4.8 + */ + virtual ~MessagePattern(); + + /** + * Parses a MessageFormat pattern string. + * @param pattern a MessageFormat pattern string + * @param parseError Struct to receive information on the position + * of an error within the pattern. + * Can be NULL. + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return *this + * @throws IllegalArgumentException for syntax errors in the pattern string + * @throws IndexOutOfBoundsException if certain limits are exceeded + * (e.g., argument number too high, argument name too long, etc.) + * @throws NumberFormatException if a number could not be parsed + * @stable ICU 4.8 + */ + MessagePattern &parse(const UnicodeString &pattern, + UParseError *parseError, UErrorCode &errorCode); + + /** + * Parses a ChoiceFormat pattern string. + * @param pattern a ChoiceFormat pattern string + * @param parseError Struct to receive information on the position + * of an error within the pattern. + * Can be NULL. + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return *this + * @throws IllegalArgumentException for syntax errors in the pattern string + * @throws IndexOutOfBoundsException if certain limits are exceeded + * (e.g., argument number too high, argument name too long, etc.) + * @throws NumberFormatException if a number could not be parsed + * @stable ICU 4.8 + */ + MessagePattern &parseChoiceStyle(const UnicodeString &pattern, + UParseError *parseError, UErrorCode &errorCode); + + /** + * Parses a PluralFormat pattern string. + * @param pattern a PluralFormat pattern string + * @param parseError Struct to receive information on the position + * of an error within the pattern. + * Can be NULL. + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return *this + * @throws IllegalArgumentException for syntax errors in the pattern string + * @throws IndexOutOfBoundsException if certain limits are exceeded + * (e.g., argument number too high, argument name too long, etc.) + * @throws NumberFormatException if a number could not be parsed + * @stable ICU 4.8 + */ + MessagePattern &parsePluralStyle(const UnicodeString &pattern, + UParseError *parseError, UErrorCode &errorCode); + + /** + * Parses a SelectFormat pattern string. + * @param pattern a SelectFormat pattern string + * @param parseError Struct to receive information on the position + * of an error within the pattern. + * Can be NULL. + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return *this + * @throws IllegalArgumentException for syntax errors in the pattern string + * @throws IndexOutOfBoundsException if certain limits are exceeded + * (e.g., argument number too high, argument name too long, etc.) + * @throws NumberFormatException if a number could not be parsed + * @stable ICU 4.8 + */ + MessagePattern &parseSelectStyle(const UnicodeString &pattern, + UParseError *parseError, UErrorCode &errorCode); + + /** + * Clears this MessagePattern. + * countParts() will return 0. + * @stable ICU 4.8 + */ + void clear(); + + /** + * Clears this MessagePattern and sets the UMessagePatternApostropheMode. + * countParts() will return 0. + * @param mode The new UMessagePatternApostropheMode. + * @stable ICU 4.8 + */ + void clearPatternAndSetApostropheMode(UMessagePatternApostropheMode mode) { + clear(); + aposMode=mode; + } + + /** + * @param other another object to compare with. + * @return true if this object is equivalent to the other one. + * @stable ICU 4.8 + */ + UBool operator==(const MessagePattern &other) const; + + /** + * @param other another object to compare with. + * @return false if this object is equivalent to the other one. + * @stable ICU 4.8 + */ + inline UBool operator!=(const MessagePattern &other) const { + return !operator==(other); + } + + /** + * @return A hash code for this object. + * @stable ICU 4.8 + */ + int32_t hashCode() const; + + /** + * @return this instance's UMessagePatternApostropheMode. + * @stable ICU 4.8 + */ + UMessagePatternApostropheMode getApostropheMode() const { + return aposMode; + } + + // Java has package-private jdkAposMode() here. + // In C++, this is declared in the MessageImpl class. + + /** + * @return the parsed pattern string (null if none was parsed). + * @stable ICU 4.8 + */ + const UnicodeString &getPatternString() const { + return msg; + } + + /** + * Does the parsed pattern have named arguments like {first_name}? + * @return true if the parsed pattern has at least one named argument. + * @stable ICU 4.8 + */ + UBool hasNamedArguments() const { + return hasArgNames; + } + + /** + * Does the parsed pattern have numbered arguments like {2}? + * @return true if the parsed pattern has at least one numbered argument. + * @stable ICU 4.8 + */ + UBool hasNumberedArguments() const { + return hasArgNumbers; + } + + /** + * Validates and parses an argument name or argument number string. + * An argument name must be a "pattern identifier", that is, it must contain + * no Unicode Pattern_Syntax or Pattern_White_Space characters. + * If it only contains ASCII digits, then it must be a small integer with no leading zero. + * @param name Input string. + * @return >=0 if the name is a valid number, + * ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits, + * ARG_NAME_NOT_VALID (-2) if it is neither. + * @stable ICU 4.8 + */ + static int32_t validateArgumentName(const UnicodeString &name); + + /** + * Returns a version of the parsed pattern string where each ASCII apostrophe + * is doubled (escaped) if it is not already, and if it is not interpreted as quoting syntax. + * <p> + * For example, this turns "I don't '{know}' {gender,select,female{h''er}other{h'im}}." + * into "I don''t '{know}' {gender,select,female{h''er}other{h''im}}." + * @return the deep-auto-quoted version of the parsed pattern string. + * @see MessageFormat.autoQuoteApostrophe() + * @stable ICU 4.8 + */ + UnicodeString autoQuoteApostropheDeep() const; + + class Part; + + /** + * Returns the number of "parts" created by parsing the pattern string. + * Returns 0 if no pattern has been parsed or clear() was called. + * @return the number of pattern parts. + * @stable ICU 4.8 + */ + int32_t countParts() const { + return partsLength; + } + + /** + * Gets the i-th pattern "part". + * @param i The index of the Part data. (0..countParts()-1) + * @return the i-th pattern "part". + * @stable ICU 4.8 + */ + const Part &getPart(int32_t i) const { + return parts[i]; + } + + /** + * Returns the UMessagePatternPartType of the i-th pattern "part". + * Convenience method for getPart(i).getType(). + * @param i The index of the Part data. (0..countParts()-1) + * @return The UMessagePatternPartType of the i-th Part. + * @stable ICU 4.8 + */ + UMessagePatternPartType getPartType(int32_t i) const { + return getPart(i).type; + } + + /** + * Returns the pattern index of the specified pattern "part". + * Convenience method for getPart(partIndex).getIndex(). + * @param partIndex The index of the Part data. (0..countParts()-1) + * @return The pattern index of this Part. + * @stable ICU 4.8 + */ + int32_t getPatternIndex(int32_t partIndex) const { + return getPart(partIndex).index; + } + + /** + * Returns the substring of the pattern string indicated by the Part. + * Convenience method for getPatternString().substring(part.getIndex(), part.getLimit()). + * @param part a part of this MessagePattern. + * @return the substring associated with part. + * @stable ICU 4.8 + */ + UnicodeString getSubstring(const Part &part) const { + return msg.tempSubString(part.index, part.length); + } + + /** + * Compares the part's substring with the input string s. + * @param part a part of this MessagePattern. + * @param s a string. + * @return true if getSubstring(part).equals(s). + * @stable ICU 4.8 + */ + UBool partSubstringMatches(const Part &part, const UnicodeString &s) const { + return 0==msg.compare(part.index, part.length, s); + } + + /** + * Returns the numeric value associated with an ARG_INT or ARG_DOUBLE. + * @param part a part of this MessagePattern. + * @return the part's numeric value, or UMSGPAT_NO_NUMERIC_VALUE if this is not a numeric part. + * @stable ICU 4.8 + */ + double getNumericValue(const Part &part) const; + + /** + * Returns the "offset:" value of a PluralFormat argument, or 0 if none is specified. + * @param pluralStart the index of the first PluralFormat argument style part. (0..countParts()-1) + * @return the "offset:" value. + * @stable ICU 4.8 + */ + double getPluralOffset(int32_t pluralStart) const; + + /** + * Returns the index of the ARG|MSG_LIMIT part corresponding to the ARG|MSG_START at start. + * @param start The index of some Part data (0..countParts()-1); + * this Part should be of Type ARG_START or MSG_START. + * @return The first i>start where getPart(i).getType()==ARG|MSG_LIMIT at the same nesting level, + * or start itself if getPartType(msgStart)!=ARG|MSG_START. + * @stable ICU 4.8 + */ + int32_t getLimitPartIndex(int32_t start) const { + int32_t limit=getPart(start).limitPartIndex; + if(limit<start) { + return start; + } + return limit; + } + + /** + * A message pattern "part", representing a pattern parsing event. + * There is a part for the start and end of a message or argument, + * for quoting and escaping of and with ASCII apostrophes, + * and for syntax elements of "complex" arguments. + * @stable ICU 4.8 + */ + class Part : public UMemory { + public: + /** + * Default constructor, do not use. + * @internal + */ + Part() {} + + /** + * Returns the type of this part. + * @return the part type. + * @stable ICU 4.8 + */ + UMessagePatternPartType getType() const { + return type; + } + + /** + * Returns the pattern string index associated with this Part. + * @return this part's pattern string index. + * @stable ICU 4.8 + */ + int32_t getIndex() const { + return index; + } + + /** + * Returns the length of the pattern substring associated with this Part. + * This is 0 for some parts. + * @return this part's pattern substring length. + * @stable ICU 4.8 + */ + int32_t getLength() const { + return length; + } + + /** + * Returns the pattern string limit (exclusive-end) index associated with this Part. + * Convenience method for getIndex()+getLength(). + * @return this part's pattern string limit index, same as getIndex()+getLength(). + * @stable ICU 4.8 + */ + int32_t getLimit() const { + return index+length; + } + + /** + * Returns a value associated with this part. + * See the documentation of each part type for details. + * @return the part value. + * @stable ICU 4.8 + */ + int32_t getValue() const { + return value; + } + + /** + * Returns the argument type if this part is of type ARG_START or ARG_LIMIT, + * otherwise UMSGPAT_ARG_TYPE_NONE. + * @return the argument type for this part. + * @stable ICU 4.8 + */ + UMessagePatternArgType getArgType() const { + UMessagePatternPartType msgType=getType(); + if(msgType ==UMSGPAT_PART_TYPE_ARG_START || msgType ==UMSGPAT_PART_TYPE_ARG_LIMIT) { + return (UMessagePatternArgType)value; + } else { + return UMSGPAT_ARG_TYPE_NONE; + } + } + + /** + * Indicates whether the Part type has a numeric value. + * If so, then that numeric value can be retrieved via MessagePattern.getNumericValue(). + * @param type The Part type to be tested. + * @return true if the Part type has a numeric value. + * @stable ICU 4.8 + */ + static UBool hasNumericValue(UMessagePatternPartType type) { + return type==UMSGPAT_PART_TYPE_ARG_INT || type==UMSGPAT_PART_TYPE_ARG_DOUBLE; + } + + /** + * @param other another object to compare with. + * @return true if this object is equivalent to the other one. + * @stable ICU 4.8 + */ + UBool operator==(const Part &other) const; + + /** + * @param other another object to compare with. + * @return false if this object is equivalent to the other one. + * @stable ICU 4.8 + */ + inline UBool operator!=(const Part &other) const { + return !operator==(other); + } + + /** + * @return A hash code for this object. + * @stable ICU 4.8 + */ + int32_t hashCode() const { + return ((type*37+index)*37+length)*37+value; + } + + private: + friend class MessagePattern; + + static const int32_t MAX_LENGTH=0xffff; + static const int32_t MAX_VALUE=0x7fff; + + // Some fields are not final because they are modified during pattern parsing. + // After pattern parsing, the parts are effectively immutable. + UMessagePatternPartType type; + int32_t index; + uint16_t length; + int16_t value; + int32_t limitPartIndex; + }; + +private: + void preParse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode); + + void postParse(); + + int32_t parseMessage(int32_t index, int32_t msgStartLength, + int32_t nestingLevel, UMessagePatternArgType parentType, + UParseError *parseError, UErrorCode &errorCode); + + int32_t parseArg(int32_t index, int32_t argStartLength, int32_t nestingLevel, + UParseError *parseError, UErrorCode &errorCode); + + int32_t parseSimpleStyle(int32_t index, UParseError *parseError, UErrorCode &errorCode); + + int32_t parseChoiceStyle(int32_t index, int32_t nestingLevel, + UParseError *parseError, UErrorCode &errorCode); + + int32_t parsePluralOrSelectStyle(UMessagePatternArgType argType, int32_t index, int32_t nestingLevel, + UParseError *parseError, UErrorCode &errorCode); + + /** + * Validates and parses an argument name or argument number string. + * This internal method assumes that the input substring is a "pattern identifier". + * @return >=0 if the name is a valid number, + * ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits, + * ARG_NAME_NOT_VALID (-2) if it is neither. + * @see #validateArgumentName(String) + */ + static int32_t parseArgNumber(const UnicodeString &s, int32_t start, int32_t limit); + + int32_t parseArgNumber(int32_t start, int32_t limit) { + return parseArgNumber(msg, start, limit); + } + + /** + * Parses a number from the specified message substring. + * @param start start index into the message string + * @param limit limit index into the message string, must be start<limit + * @param allowInfinity true if U+221E is allowed (for ChoiceFormat) + * @param parseError + * @param errorCode + */ + void parseDouble(int32_t start, int32_t limit, UBool allowInfinity, + UParseError *parseError, UErrorCode &errorCode); + + // Java has package-private appendReducedApostrophes() here. + // In C++, this is declared in the MessageImpl class. + + int32_t skipWhiteSpace(int32_t index); + + int32_t skipIdentifier(int32_t index); + + /** + * Skips a sequence of characters that could occur in a double value. + * Does not fully parse or validate the value. + */ + int32_t skipDouble(int32_t index); + + static UBool isArgTypeChar(UChar32 c); + + UBool isChoice(int32_t index); + + UBool isPlural(int32_t index); + + UBool isSelect(int32_t index); + + UBool isOrdinal(int32_t index); + + /** + * @return true if we are inside a MessageFormat (sub-)pattern, + * as opposed to inside a top-level choice/plural/select pattern. + */ + UBool inMessageFormatPattern(int32_t nestingLevel); + + /** + * @return true if we are in a MessageFormat sub-pattern + * of a top-level ChoiceFormat pattern. + */ + UBool inTopLevelChoiceMessage(int32_t nestingLevel, UMessagePatternArgType parentType); + + void addPart(UMessagePatternPartType type, int32_t index, int32_t length, + int32_t value, UErrorCode &errorCode); + + void addLimitPart(int32_t start, + UMessagePatternPartType type, int32_t index, int32_t length, + int32_t value, UErrorCode &errorCode); + + void addArgDoublePart(double numericValue, int32_t start, int32_t length, UErrorCode &errorCode); + + void setParseError(UParseError *parseError, int32_t index); + + UBool init(UErrorCode &errorCode); + UBool copyStorage(const MessagePattern &other, UErrorCode &errorCode); + + UMessagePatternApostropheMode aposMode; + UnicodeString msg; + // ArrayList<Part> parts=new ArrayList<Part>(); + MessagePatternPartsList *partsList; + Part *parts; + int32_t partsLength; + // ArrayList<Double> numericValues; + MessagePatternDoubleList *numericValuesList; + double *numericValues; + int32_t numericValuesLength; + UBool hasArgNames; + UBool hasArgNumbers; + UBool needsAutoQuoting; +}; + +U_NAMESPACE_END + +#endif // !UCONFIG_NO_FORMATTING + +#endif /* U_SHOW_CPLUSPLUS_API */ + +#endif // __MESSAGEPATTERN_H__ diff --git a/thirdparty/icu4c/common/unicode/normalizer2.h b/thirdparty/icu4c/common/unicode/normalizer2.h new file mode 100644 index 0000000000..5eb1d95caf --- /dev/null +++ b/thirdparty/icu4c/common/unicode/normalizer2.h @@ -0,0 +1,779 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2009-2013, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: normalizer2.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2009nov22 +* created by: Markus W. Scherer +*/ + +#ifndef __NORMALIZER2_H__ +#define __NORMALIZER2_H__ + +/** + * \file + * \brief C++ API: New API for Unicode Normalization. + */ + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + +#if !UCONFIG_NO_NORMALIZATION + +#include "unicode/stringpiece.h" +#include "unicode/uniset.h" +#include "unicode/unistr.h" +#include "unicode/unorm2.h" + +U_NAMESPACE_BEGIN + +class ByteSink; + +/** + * Unicode normalization functionality for standard Unicode normalization or + * for using custom mapping tables. + * All instances of this class are unmodifiable/immutable. + * Instances returned by getInstance() are singletons that must not be deleted by the caller. + * The Normalizer2 class is not intended for public subclassing. + * + * The primary functions are to produce a normalized string and to detect whether + * a string is already normalized. + * The most commonly used normalization forms are those defined in + * http://www.unicode.org/unicode/reports/tr15/ + * However, this API supports additional normalization forms for specialized purposes. + * For example, NFKC_Casefold is provided via getInstance("nfkc_cf", COMPOSE) + * and can be used in implementations of UTS #46. + * + * Not only are the standard compose and decompose modes supplied, + * but additional modes are provided as documented in the Mode enum. + * + * Some of the functions in this class identify normalization boundaries. + * At a normalization boundary, the portions of the string + * before it and starting from it do not interact and can be handled independently. + * + * The spanQuickCheckYes() stops at a normalization boundary. + * When the goal is a normalized string, then the text before the boundary + * can be copied, and the remainder can be processed with normalizeSecondAndAppend(). + * + * The hasBoundaryBefore(), hasBoundaryAfter() and isInert() functions test whether + * a character is guaranteed to be at a normalization boundary, + * regardless of context. + * This is used for moving from one normalization boundary to the next + * or preceding boundary, and for performing iterative normalization. + * + * Iterative normalization is useful when only a small portion of a + * longer string needs to be processed. + * For example, in ICU, iterative normalization is used by the NormalizationTransliterator + * (to avoid replacing already-normalized text) and ucol_nextSortKeyPart() + * (to process only the substring for which sort key bytes are computed). + * + * The set of normalization boundaries returned by these functions may not be + * complete: There may be more boundaries that could be returned. + * Different functions may return different boundaries. + * @stable ICU 4.4 + */ +class U_COMMON_API Normalizer2 : public UObject { +public: + /** + * Destructor. + * @stable ICU 4.4 + */ + ~Normalizer2(); + + /** + * Returns a Normalizer2 instance for Unicode NFC normalization. + * Same as getInstance(NULL, "nfc", UNORM2_COMPOSE, errorCode). + * Returns an unmodifiable singleton instance. Do not delete it. + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return the requested Normalizer2, if successful + * @stable ICU 49 + */ + static const Normalizer2 * + getNFCInstance(UErrorCode &errorCode); + + /** + * Returns a Normalizer2 instance for Unicode NFD normalization. + * Same as getInstance(NULL, "nfc", UNORM2_DECOMPOSE, errorCode). + * Returns an unmodifiable singleton instance. Do not delete it. + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return the requested Normalizer2, if successful + * @stable ICU 49 + */ + static const Normalizer2 * + getNFDInstance(UErrorCode &errorCode); + + /** + * Returns a Normalizer2 instance for Unicode NFKC normalization. + * Same as getInstance(NULL, "nfkc", UNORM2_COMPOSE, errorCode). + * Returns an unmodifiable singleton instance. Do not delete it. + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return the requested Normalizer2, if successful + * @stable ICU 49 + */ + static const Normalizer2 * + getNFKCInstance(UErrorCode &errorCode); + + /** + * Returns a Normalizer2 instance for Unicode NFKD normalization. + * Same as getInstance(NULL, "nfkc", UNORM2_DECOMPOSE, errorCode). + * Returns an unmodifiable singleton instance. Do not delete it. + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return the requested Normalizer2, if successful + * @stable ICU 49 + */ + static const Normalizer2 * + getNFKDInstance(UErrorCode &errorCode); + + /** + * Returns a Normalizer2 instance for Unicode NFKC_Casefold normalization. + * Same as getInstance(NULL, "nfkc_cf", UNORM2_COMPOSE, errorCode). + * Returns an unmodifiable singleton instance. Do not delete it. + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return the requested Normalizer2, if successful + * @stable ICU 49 + */ + static const Normalizer2 * + getNFKCCasefoldInstance(UErrorCode &errorCode); + + /** + * Returns a Normalizer2 instance which uses the specified data file + * (packageName/name similar to ucnv_openPackage() and ures_open()/ResourceBundle) + * and which composes or decomposes text according to the specified mode. + * Returns an unmodifiable singleton instance. Do not delete it. + * + * Use packageName=NULL for data files that are part of ICU's own data. + * Use name="nfc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFC/NFD. + * Use name="nfkc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFKC/NFKD. + * Use name="nfkc_cf" and UNORM2_COMPOSE for Unicode standard NFKC_CF=NFKC_Casefold. + * + * @param packageName NULL for ICU built-in data, otherwise application data package name + * @param name "nfc" or "nfkc" or "nfkc_cf" or name of custom data file + * @param mode normalization mode (compose or decompose etc.) + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return the requested Normalizer2, if successful + * @stable ICU 4.4 + */ + static const Normalizer2 * + getInstance(const char *packageName, + const char *name, + UNormalization2Mode mode, + UErrorCode &errorCode); + + /** + * Returns the normalized form of the source string. + * @param src source string + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return normalized src + * @stable ICU 4.4 + */ + UnicodeString + normalize(const UnicodeString &src, UErrorCode &errorCode) const { + UnicodeString result; + normalize(src, result, errorCode); + return result; + } + /** + * Writes the normalized form of the source string to the destination string + * (replacing its contents) and returns the destination string. + * The source and destination strings must be different objects. + * @param src source string + * @param dest destination string; its contents is replaced with normalized src + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return dest + * @stable ICU 4.4 + */ + virtual UnicodeString & + normalize(const UnicodeString &src, + UnicodeString &dest, + UErrorCode &errorCode) const = 0; + + /** + * Normalizes a UTF-8 string and optionally records how source substrings + * relate to changed and unchanged result substrings. + * + * Currently implemented completely only for "compose" modes, + * such as for NFC, NFKC, and NFKC_Casefold + * (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS). + * Otherwise currently converts to & from UTF-16 and does not support edits. + * + * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. + * @param src Source UTF-8 string. + * @param sink A ByteSink to which the normalized UTF-8 result string is written. + * sink.Flush() is called at the end. + * @param edits Records edits for index mapping, working with styled text, + * and getting only changes (if any). + * The Edits contents is undefined if any error occurs. + * This function calls edits->reset() first unless + * options includes U_EDITS_NO_RESET. edits can be nullptr. + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @stable ICU 60 + */ + virtual void + normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink, + Edits *edits, UErrorCode &errorCode) const; + + /** + * Appends the normalized form of the second string to the first string + * (merging them at the boundary) and returns the first string. + * The result is normalized if the first string was normalized. + * The first and second strings must be different objects. + * @param first string, should be normalized + * @param second string, will be normalized + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return first + * @stable ICU 4.4 + */ + virtual UnicodeString & + normalizeSecondAndAppend(UnicodeString &first, + const UnicodeString &second, + UErrorCode &errorCode) const = 0; + /** + * Appends the second string to the first string + * (merging them at the boundary) and returns the first string. + * The result is normalized if both the strings were normalized. + * The first and second strings must be different objects. + * @param first string, should be normalized + * @param second string, should be normalized + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return first + * @stable ICU 4.4 + */ + virtual UnicodeString & + append(UnicodeString &first, + const UnicodeString &second, + UErrorCode &errorCode) const = 0; + + /** + * Gets the decomposition mapping of c. + * Roughly equivalent to normalizing the String form of c + * on a UNORM2_DECOMPOSE Normalizer2 instance, but much faster, and except that this function + * returns false and does not write a string + * if c does not have a decomposition mapping in this instance's data. + * This function is independent of the mode of the Normalizer2. + * @param c code point + * @param decomposition String object which will be set to c's + * decomposition mapping, if there is one. + * @return true if c has a decomposition, otherwise false + * @stable ICU 4.6 + */ + virtual UBool + getDecomposition(UChar32 c, UnicodeString &decomposition) const = 0; + + /** + * Gets the raw decomposition mapping of c. + * + * This is similar to the getDecomposition() method but returns the + * raw decomposition mapping as specified in UnicodeData.txt or + * (for custom data) in the mapping files processed by the gennorm2 tool. + * By contrast, getDecomposition() returns the processed, + * recursively-decomposed version of this mapping. + * + * When used on a standard NFKC Normalizer2 instance, + * getRawDecomposition() returns the Unicode Decomposition_Mapping (dm) property. + * + * When used on a standard NFC Normalizer2 instance, + * it returns the Decomposition_Mapping only if the Decomposition_Type (dt) is Canonical (Can); + * in this case, the result contains either one or two code points (=1..4 char16_ts). + * + * This function is independent of the mode of the Normalizer2. + * The default implementation returns false. + * @param c code point + * @param decomposition String object which will be set to c's + * raw decomposition mapping, if there is one. + * @return true if c has a decomposition, otherwise false + * @stable ICU 49 + */ + virtual UBool + getRawDecomposition(UChar32 c, UnicodeString &decomposition) const; + + /** + * Performs pairwise composition of a & b and returns the composite if there is one. + * + * Returns a composite code point c only if c has a two-way mapping to a+b. + * In standard Unicode normalization, this means that + * c has a canonical decomposition to a+b + * and c does not have the Full_Composition_Exclusion property. + * + * This function is independent of the mode of the Normalizer2. + * The default implementation returns a negative value. + * @param a A (normalization starter) code point. + * @param b Another code point. + * @return The non-negative composite code point if there is one; otherwise a negative value. + * @stable ICU 49 + */ + virtual UChar32 + composePair(UChar32 a, UChar32 b) const; + + /** + * Gets the combining class of c. + * The default implementation returns 0 + * but all standard implementations return the Unicode Canonical_Combining_Class value. + * @param c code point + * @return c's combining class + * @stable ICU 49 + */ + virtual uint8_t + getCombiningClass(UChar32 c) const; + + /** + * Tests if the string is normalized. + * Internally, in cases where the quickCheck() method would return "maybe" + * (which is only possible for the two COMPOSE modes) this method + * resolves to "yes" or "no" to provide a definitive result, + * at the cost of doing more work in those cases. + * @param s input string + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return true if s is normalized + * @stable ICU 4.4 + */ + virtual UBool + isNormalized(const UnicodeString &s, UErrorCode &errorCode) const = 0; + /** + * Tests if the UTF-8 string is normalized. + * Internally, in cases where the quickCheck() method would return "maybe" + * (which is only possible for the two COMPOSE modes) this method + * resolves to "yes" or "no" to provide a definitive result, + * at the cost of doing more work in those cases. + * + * This works for all normalization modes, + * but it is currently optimized for UTF-8 only for "compose" modes, + * such as for NFC, NFKC, and NFKC_Casefold + * (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS). + * For other modes it currently converts to UTF-16 and calls isNormalized(). + * + * @param s UTF-8 input string + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return true if s is normalized + * @stable ICU 60 + */ + virtual UBool + isNormalizedUTF8(StringPiece s, UErrorCode &errorCode) const; + + + /** + * Tests if the string is normalized. + * For the two COMPOSE modes, the result could be "maybe" in cases that + * would take a little more work to resolve definitively. + * Use spanQuickCheckYes() and normalizeSecondAndAppend() for a faster + * combination of quick check + normalization, to avoid + * re-checking the "yes" prefix. + * @param s input string + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return UNormalizationCheckResult + * @stable ICU 4.4 + */ + virtual UNormalizationCheckResult + quickCheck(const UnicodeString &s, UErrorCode &errorCode) const = 0; + + /** + * Returns the end of the normalized substring of the input string. + * In other words, with <code>end=spanQuickCheckYes(s, ec);</code> + * the substring <code>UnicodeString(s, 0, end)</code> + * will pass the quick check with a "yes" result. + * + * The returned end index is usually one or more characters before the + * "no" or "maybe" character: The end index is at a normalization boundary. + * (See the class documentation for more about normalization boundaries.) + * + * When the goal is a normalized string and most input strings are expected + * to be normalized already, then call this method, + * and if it returns a prefix shorter than the input string, + * copy that prefix and use normalizeSecondAndAppend() for the remainder. + * @param s input string + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return "yes" span end index + * @stable ICU 4.4 + */ + virtual int32_t + spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const = 0; + + /** + * Tests if the character always has a normalization boundary before it, + * regardless of context. + * If true, then the character does not normalization-interact with + * preceding characters. + * In other words, a string containing this character can be normalized + * by processing portions before this character and starting from this + * character independently. + * This is used for iterative normalization. See the class documentation for details. + * @param c character to test + * @return true if c has a normalization boundary before it + * @stable ICU 4.4 + */ + virtual UBool hasBoundaryBefore(UChar32 c) const = 0; + + /** + * Tests if the character always has a normalization boundary after it, + * regardless of context. + * If true, then the character does not normalization-interact with + * following characters. + * In other words, a string containing this character can be normalized + * by processing portions up to this character and after this + * character independently. + * This is used for iterative normalization. See the class documentation for details. + * Note that this operation may be significantly slower than hasBoundaryBefore(). + * @param c character to test + * @return true if c has a normalization boundary after it + * @stable ICU 4.4 + */ + virtual UBool hasBoundaryAfter(UChar32 c) const = 0; + + /** + * Tests if the character is normalization-inert. + * If true, then the character does not change, nor normalization-interact with + * preceding or following characters. + * In other words, a string containing this character can be normalized + * by processing portions before this character and after this + * character independently. + * This is used for iterative normalization. See the class documentation for details. + * Note that this operation may be significantly slower than hasBoundaryBefore(). + * @param c character to test + * @return true if c is normalization-inert + * @stable ICU 4.4 + */ + virtual UBool isInert(UChar32 c) const = 0; +}; + +/** + * Normalization filtered by a UnicodeSet. + * Normalizes portions of the text contained in the filter set and leaves + * portions not contained in the filter set unchanged. + * Filtering is done via UnicodeSet::span(..., USET_SPAN_SIMPLE). + * Not-in-the-filter text is treated as "is normalized" and "quick check yes". + * This class implements all of (and only) the Normalizer2 API. + * An instance of this class is unmodifiable/immutable but is constructed and + * must be destructed by the owner. + * @stable ICU 4.4 + */ +class U_COMMON_API FilteredNormalizer2 : public Normalizer2 { +public: + /** + * Constructs a filtered normalizer wrapping any Normalizer2 instance + * and a filter set. + * Both are aliased and must not be modified or deleted while this object + * is used. + * The filter set should be frozen; otherwise the performance will suffer greatly. + * @param n2 wrapped Normalizer2 instance + * @param filterSet UnicodeSet which determines the characters to be normalized + * @stable ICU 4.4 + */ + FilteredNormalizer2(const Normalizer2 &n2, const UnicodeSet &filterSet) : + norm2(n2), set(filterSet) {} + + /** + * Destructor. + * @stable ICU 4.4 + */ + ~FilteredNormalizer2(); + + /** + * Writes the normalized form of the source string to the destination string + * (replacing its contents) and returns the destination string. + * The source and destination strings must be different objects. + * @param src source string + * @param dest destination string; its contents is replaced with normalized src + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return dest + * @stable ICU 4.4 + */ + virtual UnicodeString & + normalize(const UnicodeString &src, + UnicodeString &dest, + UErrorCode &errorCode) const U_OVERRIDE; + + /** + * Normalizes a UTF-8 string and optionally records how source substrings + * relate to changed and unchanged result substrings. + * + * Currently implemented completely only for "compose" modes, + * such as for NFC, NFKC, and NFKC_Casefold + * (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS). + * Otherwise currently converts to & from UTF-16 and does not support edits. + * + * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. + * @param src Source UTF-8 string. + * @param sink A ByteSink to which the normalized UTF-8 result string is written. + * sink.Flush() is called at the end. + * @param edits Records edits for index mapping, working with styled text, + * and getting only changes (if any). + * The Edits contents is undefined if any error occurs. + * This function calls edits->reset() first unless + * options includes U_EDITS_NO_RESET. edits can be nullptr. + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @stable ICU 60 + */ + virtual void + normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink, + Edits *edits, UErrorCode &errorCode) const U_OVERRIDE; + + /** + * Appends the normalized form of the second string to the first string + * (merging them at the boundary) and returns the first string. + * The result is normalized if the first string was normalized. + * The first and second strings must be different objects. + * @param first string, should be normalized + * @param second string, will be normalized + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return first + * @stable ICU 4.4 + */ + virtual UnicodeString & + normalizeSecondAndAppend(UnicodeString &first, + const UnicodeString &second, + UErrorCode &errorCode) const U_OVERRIDE; + /** + * Appends the second string to the first string + * (merging them at the boundary) and returns the first string. + * The result is normalized if both the strings were normalized. + * The first and second strings must be different objects. + * @param first string, should be normalized + * @param second string, should be normalized + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return first + * @stable ICU 4.4 + */ + virtual UnicodeString & + append(UnicodeString &first, + const UnicodeString &second, + UErrorCode &errorCode) const U_OVERRIDE; + + /** + * Gets the decomposition mapping of c. + * For details see the base class documentation. + * + * This function is independent of the mode of the Normalizer2. + * @param c code point + * @param decomposition String object which will be set to c's + * decomposition mapping, if there is one. + * @return true if c has a decomposition, otherwise false + * @stable ICU 4.6 + */ + virtual UBool + getDecomposition(UChar32 c, UnicodeString &decomposition) const U_OVERRIDE; + + /** + * Gets the raw decomposition mapping of c. + * For details see the base class documentation. + * + * This function is independent of the mode of the Normalizer2. + * @param c code point + * @param decomposition String object which will be set to c's + * raw decomposition mapping, if there is one. + * @return true if c has a decomposition, otherwise false + * @stable ICU 49 + */ + virtual UBool + getRawDecomposition(UChar32 c, UnicodeString &decomposition) const U_OVERRIDE; + + /** + * Performs pairwise composition of a & b and returns the composite if there is one. + * For details see the base class documentation. + * + * This function is independent of the mode of the Normalizer2. + * @param a A (normalization starter) code point. + * @param b Another code point. + * @return The non-negative composite code point if there is one; otherwise a negative value. + * @stable ICU 49 + */ + virtual UChar32 + composePair(UChar32 a, UChar32 b) const U_OVERRIDE; + + /** + * Gets the combining class of c. + * The default implementation returns 0 + * but all standard implementations return the Unicode Canonical_Combining_Class value. + * @param c code point + * @return c's combining class + * @stable ICU 49 + */ + virtual uint8_t + getCombiningClass(UChar32 c) const U_OVERRIDE; + + /** + * Tests if the string is normalized. + * For details see the Normalizer2 base class documentation. + * @param s input string + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return true if s is normalized + * @stable ICU 4.4 + */ + virtual UBool + isNormalized(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE; + /** + * Tests if the UTF-8 string is normalized. + * Internally, in cases where the quickCheck() method would return "maybe" + * (which is only possible for the two COMPOSE modes) this method + * resolves to "yes" or "no" to provide a definitive result, + * at the cost of doing more work in those cases. + * + * This works for all normalization modes, + * but it is currently optimized for UTF-8 only for "compose" modes, + * such as for NFC, NFKC, and NFKC_Casefold + * (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS). + * For other modes it currently converts to UTF-16 and calls isNormalized(). + * + * @param s UTF-8 input string + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return true if s is normalized + * @stable ICU 60 + */ + virtual UBool + isNormalizedUTF8(StringPiece s, UErrorCode &errorCode) const U_OVERRIDE; + /** + * Tests if the string is normalized. + * For details see the Normalizer2 base class documentation. + * @param s input string + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return UNormalizationCheckResult + * @stable ICU 4.4 + */ + virtual UNormalizationCheckResult + quickCheck(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE; + /** + * Returns the end of the normalized substring of the input string. + * For details see the Normalizer2 base class documentation. + * @param s input string + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return "yes" span end index + * @stable ICU 4.4 + */ + virtual int32_t + spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE; + + /** + * Tests if the character always has a normalization boundary before it, + * regardless of context. + * For details see the Normalizer2 base class documentation. + * @param c character to test + * @return true if c has a normalization boundary before it + * @stable ICU 4.4 + */ + virtual UBool hasBoundaryBefore(UChar32 c) const U_OVERRIDE; + + /** + * Tests if the character always has a normalization boundary after it, + * regardless of context. + * For details see the Normalizer2 base class documentation. + * @param c character to test + * @return true if c has a normalization boundary after it + * @stable ICU 4.4 + */ + virtual UBool hasBoundaryAfter(UChar32 c) const U_OVERRIDE; + + /** + * Tests if the character is normalization-inert. + * For details see the Normalizer2 base class documentation. + * @param c character to test + * @return true if c is normalization-inert + * @stable ICU 4.4 + */ + virtual UBool isInert(UChar32 c) const U_OVERRIDE; +private: + UnicodeString & + normalize(const UnicodeString &src, + UnicodeString &dest, + USetSpanCondition spanCondition, + UErrorCode &errorCode) const; + + void + normalizeUTF8(uint32_t options, const char *src, int32_t length, + ByteSink &sink, Edits *edits, + USetSpanCondition spanCondition, + UErrorCode &errorCode) const; + + UnicodeString & + normalizeSecondAndAppend(UnicodeString &first, + const UnicodeString &second, + UBool doNormalize, + UErrorCode &errorCode) const; + + const Normalizer2 &norm2; + const UnicodeSet &set; +}; + +U_NAMESPACE_END + +#endif // !UCONFIG_NO_NORMALIZATION + +#endif /* U_SHOW_CPLUSPLUS_API */ + +#endif // __NORMALIZER2_H__ diff --git a/thirdparty/icu4c/common/unicode/normlzr.h b/thirdparty/icu4c/common/unicode/normlzr.h new file mode 100644 index 0000000000..3352983cdc --- /dev/null +++ b/thirdparty/icu4c/common/unicode/normlzr.h @@ -0,0 +1,816 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* + ******************************************************************** + * COPYRIGHT: + * Copyright (c) 1996-2015, International Business Machines Corporation and + * others. All Rights Reserved. + ******************************************************************** + */ + +#ifndef NORMLZR_H +#define NORMLZR_H + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + +/** + * \file + * \brief C++ API: Unicode Normalization + */ + +#if !UCONFIG_NO_NORMALIZATION + +#include "unicode/chariter.h" +#include "unicode/normalizer2.h" +#include "unicode/unistr.h" +#include "unicode/unorm.h" +#include "unicode/uobject.h" + +U_NAMESPACE_BEGIN +/** + * Old Unicode normalization API. + * + * This API has been replaced by the Normalizer2 class and is only available + * for backward compatibility. This class simply delegates to the Normalizer2 class. + * There is one exception: The new API does not provide a replacement for Normalizer::compare(). + * + * The Normalizer class supports the standard normalization forms described in + * <a href="http://www.unicode.org/unicode/reports/tr15/" target="unicode"> + * Unicode Standard Annex #15: Unicode Normalization Forms</a>. + * + * The Normalizer class consists of two parts: + * - static functions that normalize strings or test if strings are normalized + * - a Normalizer object is an iterator that takes any kind of text and + * provides iteration over its normalized form + * + * The Normalizer class is not suitable for subclassing. + * + * For basic information about normalization forms and details about the C API + * please see the documentation in unorm.h. + * + * The iterator API with the Normalizer constructors and the non-static functions + * use a CharacterIterator as input. It is possible to pass a string which + * is then internally wrapped in a CharacterIterator. + * The input text is not normalized all at once, but incrementally where needed + * (providing efficient random access). + * This allows to pass in a large text but spend only a small amount of time + * normalizing a small part of that text. + * However, if the entire text is normalized, then the iterator will be + * slower than normalizing the entire text at once and iterating over the result. + * A possible use of the Normalizer iterator is also to report an index into the + * original text that is close to where the normalized characters come from. + * + * <em>Important:</em> The iterator API was cleaned up significantly for ICU 2.0. + * The earlier implementation reported the getIndex() inconsistently, + * and previous() could not be used after setIndex(), next(), first(), and current(). + * + * Normalizer allows to start normalizing from anywhere in the input text by + * calling setIndexOnly(), first(), or last(). + * Without calling any of these, the iterator will start at the beginning of the text. + * + * At any time, next() returns the next normalized code point (UChar32), + * with post-increment semantics (like CharacterIterator::next32PostInc()). + * previous() returns the previous normalized code point (UChar32), + * with pre-decrement semantics (like CharacterIterator::previous32()). + * + * current() returns the current code point + * (respectively the one at the newly set index) without moving + * the getIndex(). Note that if the text at the current position + * needs to be normalized, then these functions will do that. + * (This is why current() is not const.) + * It is more efficient to call setIndexOnly() instead, which does not + * normalize. + * + * getIndex() always refers to the position in the input text where the normalized + * code points are returned from. It does not always change with each returned + * code point. + * The code point that is returned from any of the functions + * corresponds to text at or after getIndex(), according to the + * function's iteration semantics (post-increment or pre-decrement). + * + * next() returns a code point from at or after the getIndex() + * from before the next() call. After the next() call, the getIndex() + * might have moved to where the next code point will be returned from + * (from a next() or current() call). + * This is semantically equivalent to array access with array[index++] + * (post-increment semantics). + * + * previous() returns a code point from at or after the getIndex() + * from after the previous() call. + * This is semantically equivalent to array access with array[--index] + * (pre-decrement semantics). + * + * Internally, the Normalizer iterator normalizes a small piece of text + * starting at the getIndex() and ending at a following "safe" index. + * The normalized results is stored in an internal string buffer, and + * the code points are iterated from there. + * With multiple iteration calls, this is repeated until the next piece + * of text needs to be normalized, and the getIndex() needs to be moved. + * + * The following "safe" index, the internal buffer, and the secondary + * iteration index into that buffer are not exposed on the API. + * This also means that it is currently not practical to return to + * a particular, arbitrary position in the text because one would need to + * know, and be able to set, in addition to the getIndex(), at least also the + * current index into the internal buffer. + * It is currently only possible to observe when getIndex() changes + * (with careful consideration of the iteration semantics), + * at which time the internal index will be 0. + * For example, if getIndex() is different after next() than before it, + * then the internal index is 0 and one can return to this getIndex() + * later with setIndexOnly(). + * + * Note: While the setIndex() and getIndex() refer to indices in the + * underlying Unicode input text, the next() and previous() methods + * iterate through characters in the normalized output. + * This means that there is not necessarily a one-to-one correspondence + * between characters returned by next() and previous() and the indices + * passed to and returned from setIndex() and getIndex(). + * It is for this reason that Normalizer does not implement the CharacterIterator interface. + * + * @author Laura Werner, Mark Davis, Markus Scherer + * @stable ICU 2.0 + */ +class U_COMMON_API Normalizer : public UObject { +public: +#ifndef U_HIDE_DEPRECATED_API + /** + * If DONE is returned from an iteration function that returns a code point, + * then there are no more normalization results available. + * @deprecated ICU 56 Use Normalizer2 instead. + */ + enum { + DONE=0xffff + }; + + // Constructors + + /** + * Creates a new <code>Normalizer</code> object for iterating over the + * normalized form of a given string. + * <p> + * @param str The string to be normalized. The normalization + * will start at the beginning of the string. + * + * @param mode The normalization mode. + * @deprecated ICU 56 Use Normalizer2 instead. + */ + Normalizer(const UnicodeString& str, UNormalizationMode mode); + + /** + * Creates a new <code>Normalizer</code> object for iterating over the + * normalized form of a given string. + * <p> + * @param str The string to be normalized. The normalization + * will start at the beginning of the string. + * + * @param length Length of the string, or -1 if NUL-terminated. + * @param mode The normalization mode. + * @deprecated ICU 56 Use Normalizer2 instead. + */ + Normalizer(ConstChar16Ptr str, int32_t length, UNormalizationMode mode); + + /** + * Creates a new <code>Normalizer</code> object for iterating over the + * normalized form of the given text. + * <p> + * @param iter The input text to be normalized. The normalization + * will start at the beginning of the string. + * + * @param mode The normalization mode. + * @deprecated ICU 56 Use Normalizer2 instead. + */ + Normalizer(const CharacterIterator& iter, UNormalizationMode mode); +#endif /* U_HIDE_DEPRECATED_API */ + +#ifndef U_FORCE_HIDE_DEPRECATED_API + /** + * Copy constructor. + * @param copy The object to be copied. + * @deprecated ICU 56 Use Normalizer2 instead. + */ + Normalizer(const Normalizer& copy); + + /** + * Destructor + * @deprecated ICU 56 Use Normalizer2 instead. + */ + virtual ~Normalizer(); +#endif // U_FORCE_HIDE_DEPRECATED_API + + //------------------------------------------------------------------------- + // Static utility methods + //------------------------------------------------------------------------- + +#ifndef U_HIDE_DEPRECATED_API + /** + * Normalizes a <code>UnicodeString</code> according to the specified normalization mode. + * This is a wrapper for unorm_normalize(), using UnicodeString's. + * + * The <code>options</code> parameter specifies which optional + * <code>Normalizer</code> features are to be enabled for this operation. + * + * @param source the input string to be normalized. + * @param mode the normalization mode + * @param options the optional features to be enabled (0 for no options) + * @param result The normalized string (on output). + * @param status The error code. + * @deprecated ICU 56 Use Normalizer2 instead. + */ + static void U_EXPORT2 normalize(const UnicodeString& source, + UNormalizationMode mode, int32_t options, + UnicodeString& result, + UErrorCode &status); + + /** + * Compose a <code>UnicodeString</code>. + * This is equivalent to normalize() with mode UNORM_NFC or UNORM_NFKC. + * This is a wrapper for unorm_normalize(), using UnicodeString's. + * + * The <code>options</code> parameter specifies which optional + * <code>Normalizer</code> features are to be enabled for this operation. + * + * @param source the string to be composed. + * @param compat Perform compatibility decomposition before composition. + * If this argument is <code>false</code>, only canonical + * decomposition will be performed. + * @param options the optional features to be enabled (0 for no options) + * @param result The composed string (on output). + * @param status The error code. + * @deprecated ICU 56 Use Normalizer2 instead. + */ + static void U_EXPORT2 compose(const UnicodeString& source, + UBool compat, int32_t options, + UnicodeString& result, + UErrorCode &status); + + /** + * Static method to decompose a <code>UnicodeString</code>. + * This is equivalent to normalize() with mode UNORM_NFD or UNORM_NFKD. + * This is a wrapper for unorm_normalize(), using UnicodeString's. + * + * The <code>options</code> parameter specifies which optional + * <code>Normalizer</code> features are to be enabled for this operation. + * + * @param source the string to be decomposed. + * @param compat Perform compatibility decomposition. + * If this argument is <code>false</code>, only canonical + * decomposition will be performed. + * @param options the optional features to be enabled (0 for no options) + * @param result The decomposed string (on output). + * @param status The error code. + * @deprecated ICU 56 Use Normalizer2 instead. + */ + static void U_EXPORT2 decompose(const UnicodeString& source, + UBool compat, int32_t options, + UnicodeString& result, + UErrorCode &status); + + /** + * Performing quick check on a string, to quickly determine if the string is + * in a particular normalization format. + * This is a wrapper for unorm_quickCheck(), using a UnicodeString. + * + * Three types of result can be returned UNORM_YES, UNORM_NO or + * UNORM_MAYBE. Result UNORM_YES indicates that the argument + * string is in the desired normalized format, UNORM_NO determines that + * argument string is not in the desired normalized format. A + * UNORM_MAYBE result indicates that a more thorough check is required, + * the user may have to put the string in its normalized form and compare the + * results. + * @param source string for determining if it is in a normalized format + * @param mode normalization format + * @param status A reference to a UErrorCode to receive any errors + * @return UNORM_YES, UNORM_NO or UNORM_MAYBE + * + * @see isNormalized + * @deprecated ICU 56 Use Normalizer2 instead. + */ + static inline UNormalizationCheckResult + quickCheck(const UnicodeString &source, UNormalizationMode mode, UErrorCode &status); + + /** + * Performing quick check on a string; same as the other version of quickCheck + * but takes an extra options parameter like most normalization functions. + * + * @param source string for determining if it is in a normalized format + * @param mode normalization format + * @param options the optional features to be enabled (0 for no options) + * @param status A reference to a UErrorCode to receive any errors + * @return UNORM_YES, UNORM_NO or UNORM_MAYBE + * + * @see isNormalized + * @deprecated ICU 56 Use Normalizer2 instead. + */ + static UNormalizationCheckResult + quickCheck(const UnicodeString &source, UNormalizationMode mode, int32_t options, UErrorCode &status); + + /** + * Test if a string is in a given normalization form. + * This is semantically equivalent to source.equals(normalize(source, mode)) . + * + * Unlike unorm_quickCheck(), this function returns a definitive result, + * never a "maybe". + * For NFD, NFKD, and FCD, both functions work exactly the same. + * For NFC and NFKC where quickCheck may return "maybe", this function will + * perform further tests to arrive at a true/false result. + * + * @param src String that is to be tested if it is in a normalization format. + * @param mode Which normalization form to test for. + * @param errorCode ICU error code in/out parameter. + * Must fulfill U_SUCCESS before the function call. + * @return Boolean value indicating whether the source string is in the + * "mode" normalization form. + * + * @see quickCheck + * @deprecated ICU 56 Use Normalizer2 instead. + */ + static inline UBool + isNormalized(const UnicodeString &src, UNormalizationMode mode, UErrorCode &errorCode); + + /** + * Test if a string is in a given normalization form; same as the other version of isNormalized + * but takes an extra options parameter like most normalization functions. + * + * @param src String that is to be tested if it is in a normalization format. + * @param mode Which normalization form to test for. + * @param options the optional features to be enabled (0 for no options) + * @param errorCode ICU error code in/out parameter. + * Must fulfill U_SUCCESS before the function call. + * @return Boolean value indicating whether the source string is in the + * "mode" normalization form. + * + * @see quickCheck + * @deprecated ICU 56 Use Normalizer2 instead. + */ + static UBool + isNormalized(const UnicodeString &src, UNormalizationMode mode, int32_t options, UErrorCode &errorCode); + + /** + * Concatenate normalized strings, making sure that the result is normalized as well. + * + * If both the left and the right strings are in + * the normalization form according to "mode/options", + * then the result will be + * + * \code + * dest=normalize(left+right, mode, options) + * \endcode + * + * For details see unorm_concatenate in unorm.h. + * + * @param left Left source string. + * @param right Right source string. + * @param result The output string. + * @param mode The normalization mode. + * @param options A bit set of normalization options. + * @param errorCode ICU error code in/out parameter. + * Must fulfill U_SUCCESS before the function call. + * @return result + * + * @see unorm_concatenate + * @see normalize + * @see unorm_next + * @see unorm_previous + * + * @deprecated ICU 56 Use Normalizer2 instead. + */ + static UnicodeString & + U_EXPORT2 concatenate(const UnicodeString &left, const UnicodeString &right, + UnicodeString &result, + UNormalizationMode mode, int32_t options, + UErrorCode &errorCode); +#endif /* U_HIDE_DEPRECATED_API */ + + /** + * Compare two strings for canonical equivalence. + * Further options include case-insensitive comparison and + * code point order (as opposed to code unit order). + * + * Canonical equivalence between two strings is defined as their normalized + * forms (NFD or NFC) being identical. + * This function compares strings incrementally instead of normalizing + * (and optionally case-folding) both strings entirely, + * improving performance significantly. + * + * Bulk normalization is only necessary if the strings do not fulfill the FCD + * conditions. Only in this case, and only if the strings are relatively long, + * is memory allocated temporarily. + * For FCD strings and short non-FCD strings there is no memory allocation. + * + * Semantically, this is equivalent to + * strcmp[CodePointOrder](NFD(foldCase(s1)), NFD(foldCase(s2))) + * where code point order and foldCase are all optional. + * + * UAX 21 2.5 Caseless Matching specifies that for a canonical caseless match + * the case folding must be performed first, then the normalization. + * + * @param s1 First source string. + * @param s2 Second source string. + * + * @param options A bit set of options: + * - U_FOLD_CASE_DEFAULT or 0 is used for default options: + * Case-sensitive comparison in code unit order, and the input strings + * are quick-checked for FCD. + * + * - UNORM_INPUT_IS_FCD + * Set if the caller knows that both s1 and s2 fulfill the FCD conditions. + * If not set, the function will quickCheck for FCD + * and normalize if necessary. + * + * - U_COMPARE_CODE_POINT_ORDER + * Set to choose code point order instead of code unit order + * (see u_strCompare for details). + * + * - U_COMPARE_IGNORE_CASE + * Set to compare strings case-insensitively using case folding, + * instead of case-sensitively. + * If set, then the following case folding options are used. + * + * - Options as used with case-insensitive comparisons, currently: + * + * - U_FOLD_CASE_EXCLUDE_SPECIAL_I + * (see u_strCaseCompare for details) + * + * - regular normalization options shifted left by UNORM_COMPARE_NORM_OPTIONS_SHIFT + * + * @param errorCode ICU error code in/out parameter. + * Must fulfill U_SUCCESS before the function call. + * @return <0 or 0 or >0 as usual for string comparisons + * + * @see unorm_compare + * @see normalize + * @see UNORM_FCD + * @see u_strCompare + * @see u_strCaseCompare + * + * @stable ICU 2.2 + */ + static inline int32_t + compare(const UnicodeString &s1, const UnicodeString &s2, + uint32_t options, + UErrorCode &errorCode); + +#ifndef U_HIDE_DEPRECATED_API + //------------------------------------------------------------------------- + // Iteration API + //------------------------------------------------------------------------- + + /** + * Return the current character in the normalized text. + * current() may need to normalize some text at getIndex(). + * The getIndex() is not changed. + * + * @return the current normalized code point + * @deprecated ICU 56 Use Normalizer2 instead. + */ + UChar32 current(void); + + /** + * Return the first character in the normalized text. + * This is equivalent to setIndexOnly(startIndex()) followed by next(). + * (Post-increment semantics.) + * + * @return the first normalized code point + * @deprecated ICU 56 Use Normalizer2 instead. + */ + UChar32 first(void); + + /** + * Return the last character in the normalized text. + * This is equivalent to setIndexOnly(endIndex()) followed by previous(). + * (Pre-decrement semantics.) + * + * @return the last normalized code point + * @deprecated ICU 56 Use Normalizer2 instead. + */ + UChar32 last(void); + + /** + * Return the next character in the normalized text. + * (Post-increment semantics.) + * If the end of the text has already been reached, DONE is returned. + * The DONE value could be confused with a U+FFFF non-character code point + * in the text. If this is possible, you can test getIndex()<endIndex() + * before calling next(), or (getIndex()<endIndex() || last()!=DONE) + * after calling next(). (Calling last() will change the iterator state!) + * + * The C API unorm_next() is more efficient and does not have this ambiguity. + * + * @return the next normalized code point + * @deprecated ICU 56 Use Normalizer2 instead. + */ + UChar32 next(void); + + /** + * Return the previous character in the normalized text and decrement. + * (Pre-decrement semantics.) + * If the beginning of the text has already been reached, DONE is returned. + * The DONE value could be confused with a U+FFFF non-character code point + * in the text. If this is possible, you can test + * (getIndex()>startIndex() || first()!=DONE). (Calling first() will change + * the iterator state!) + * + * The C API unorm_previous() is more efficient and does not have this ambiguity. + * + * @return the previous normalized code point + * @deprecated ICU 56 Use Normalizer2 instead. + */ + UChar32 previous(void); + + /** + * Set the iteration position in the input text that is being normalized, + * without any immediate normalization. + * After setIndexOnly(), getIndex() will return the same index that is + * specified here. + * + * @param index the desired index in the input text. + * @deprecated ICU 56 Use Normalizer2 instead. + */ + void setIndexOnly(int32_t index); + + /** + * Reset the index to the beginning of the text. + * This is equivalent to setIndexOnly(startIndex)). + * @deprecated ICU 56 Use Normalizer2 instead. + */ + void reset(void); + + /** + * Retrieve the current iteration position in the input text that is + * being normalized. + * + * A following call to next() will return a normalized code point from + * the input text at or after this index. + * + * After a call to previous(), getIndex() will point at or before the + * position in the input text where the normalized code point + * was returned from with previous(). + * + * @return the current index in the input text + * @deprecated ICU 56 Use Normalizer2 instead. + */ + int32_t getIndex(void) const; + + /** + * Retrieve the index of the start of the input text. This is the begin index + * of the <code>CharacterIterator</code> or the start (i.e. index 0) of the string + * over which this <code>Normalizer</code> is iterating. + * + * @return the smallest index in the input text where the Normalizer operates + * @deprecated ICU 56 Use Normalizer2 instead. + */ + int32_t startIndex(void) const; + + /** + * Retrieve the index of the end of the input text. This is the end index + * of the <code>CharacterIterator</code> or the length of the string + * over which this <code>Normalizer</code> is iterating. + * This end index is exclusive, i.e., the Normalizer operates only on characters + * before this index. + * + * @return the first index in the input text where the Normalizer does not operate + * @deprecated ICU 56 Use Normalizer2 instead. + */ + int32_t endIndex(void) const; + + /** + * Returns true when both iterators refer to the same character in the same + * input text. + * + * @param that a Normalizer object to compare this one to + * @return comparison result + * @deprecated ICU 56 Use Normalizer2 instead. + */ + UBool operator==(const Normalizer& that) const; + + /** + * Returns false when both iterators refer to the same character in the same + * input text. + * + * @param that a Normalizer object to compare this one to + * @return comparison result + * @deprecated ICU 56 Use Normalizer2 instead. + */ + inline UBool operator!=(const Normalizer& that) const; + + /** + * Returns a pointer to a new Normalizer that is a clone of this one. + * The caller is responsible for deleting the new clone. + * @return a pointer to a new Normalizer + * @deprecated ICU 56 Use Normalizer2 instead. + */ + Normalizer* clone() const; + + /** + * Generates a hash code for this iterator. + * + * @return the hash code + * @deprecated ICU 56 Use Normalizer2 instead. + */ + int32_t hashCode(void) const; + + //------------------------------------------------------------------------- + // Property access methods + //------------------------------------------------------------------------- + + /** + * Set the normalization mode for this object. + * <p> + * <b>Note:</b>If the normalization mode is changed while iterating + * over a string, calls to {@link #next() } and {@link #previous() } may + * return previously buffers characters in the old normalization mode + * until the iteration is able to re-sync at the next base character. + * It is safest to call {@link #setIndexOnly }, {@link #reset() }, + * {@link #setText }, {@link #first() }, + * {@link #last() }, etc. after calling <code>setMode</code>. + * <p> + * @param newMode the new mode for this <code>Normalizer</code>. + * @see #getUMode + * @deprecated ICU 56 Use Normalizer2 instead. + */ + void setMode(UNormalizationMode newMode); + + /** + * Return the normalization mode for this object. + * + * This is an unusual name because there used to be a getMode() that + * returned a different type. + * + * @return the mode for this <code>Normalizer</code> + * @see #setMode + * @deprecated ICU 56 Use Normalizer2 instead. + */ + UNormalizationMode getUMode(void) const; + + /** + * Set options that affect this <code>Normalizer</code>'s operation. + * Options do not change the basic composition or decomposition operation + * that is being performed, but they control whether + * certain optional portions of the operation are done. + * Currently the only available option is obsolete. + * + * It is possible to specify multiple options that are all turned on or off. + * + * @param option the option(s) whose value is/are to be set. + * @param value the new setting for the option. Use <code>true</code> to + * turn the option(s) on and <code>false</code> to turn it/them off. + * + * @see #getOption + * @deprecated ICU 56 Use Normalizer2 instead. + */ + void setOption(int32_t option, + UBool value); + + /** + * Determine whether an option is turned on or off. + * If multiple options are specified, then the result is true if any + * of them are set. + * <p> + * @param option the option(s) that are to be checked + * @return true if any of the option(s) are set + * @see #setOption + * @deprecated ICU 56 Use Normalizer2 instead. + */ + UBool getOption(int32_t option) const; + + /** + * Set the input text over which this <code>Normalizer</code> will iterate. + * The iteration position is set to the beginning. + * + * @param newText a string that replaces the current input text + * @param status a UErrorCode + * @deprecated ICU 56 Use Normalizer2 instead. + */ + void setText(const UnicodeString& newText, + UErrorCode &status); + + /** + * Set the input text over which this <code>Normalizer</code> will iterate. + * The iteration position is set to the beginning. + * + * @param newText a CharacterIterator object that replaces the current input text + * @param status a UErrorCode + * @deprecated ICU 56 Use Normalizer2 instead. + */ + void setText(const CharacterIterator& newText, + UErrorCode &status); + + /** + * Set the input text over which this <code>Normalizer</code> will iterate. + * The iteration position is set to the beginning. + * + * @param newText a string that replaces the current input text + * @param length the length of the string, or -1 if NUL-terminated + * @param status a UErrorCode + * @deprecated ICU 56 Use Normalizer2 instead. + */ + void setText(ConstChar16Ptr newText, + int32_t length, + UErrorCode &status); + /** + * Copies the input text into the UnicodeString argument. + * + * @param result Receives a copy of the text under iteration. + * @deprecated ICU 56 Use Normalizer2 instead. + */ + void getText(UnicodeString& result); + + /** + * ICU "poor man's RTTI", returns a UClassID for this class. + * @returns a UClassID for this class. + * @deprecated ICU 56 Use Normalizer2 instead. + */ + static UClassID U_EXPORT2 getStaticClassID(); +#endif /* U_HIDE_DEPRECATED_API */ + +#ifndef U_FORCE_HIDE_DEPRECATED_API + /** + * ICU "poor man's RTTI", returns a UClassID for the actual class. + * @return a UClassID for the actual class. + * @deprecated ICU 56 Use Normalizer2 instead. + */ + virtual UClassID getDynamicClassID() const; +#endif // U_FORCE_HIDE_DEPRECATED_API + +private: + //------------------------------------------------------------------------- + // Private functions + //------------------------------------------------------------------------- + + Normalizer(); // default constructor not implemented + Normalizer &operator=(const Normalizer &that); // assignment operator not implemented + + // Private utility methods for iteration + // For documentation, see the source code + UBool nextNormalize(); + UBool previousNormalize(); + + void init(); + void clearBuffer(void); + + //------------------------------------------------------------------------- + // Private data + //------------------------------------------------------------------------- + + FilteredNormalizer2*fFilteredNorm2; // owned if not NULL + const Normalizer2 *fNorm2; // not owned; may be equal to fFilteredNorm2 + UNormalizationMode fUMode; // deprecated + int32_t fOptions; + + // The input text and our position in it + CharacterIterator *text; + + // The normalization buffer is the result of normalization + // of the source in [currentIndex..nextIndex[ . + int32_t currentIndex, nextIndex; + + // A buffer for holding intermediate results + UnicodeString buffer; + int32_t bufferPos; +}; + +//------------------------------------------------------------------------- +// Inline implementations +//------------------------------------------------------------------------- + +#ifndef U_HIDE_DEPRECATED_API +inline UBool +Normalizer::operator!= (const Normalizer& other) const +{ return ! operator==(other); } + +inline UNormalizationCheckResult +Normalizer::quickCheck(const UnicodeString& source, + UNormalizationMode mode, + UErrorCode &status) { + return quickCheck(source, mode, 0, status); +} + +inline UBool +Normalizer::isNormalized(const UnicodeString& source, + UNormalizationMode mode, + UErrorCode &status) { + return isNormalized(source, mode, 0, status); +} +#endif /* U_HIDE_DEPRECATED_API */ + +inline int32_t +Normalizer::compare(const UnicodeString &s1, const UnicodeString &s2, + uint32_t options, + UErrorCode &errorCode) { + // all argument checking is done in unorm_compare + return unorm_compare(toUCharPtr(s1.getBuffer()), s1.length(), + toUCharPtr(s2.getBuffer()), s2.length(), + options, + &errorCode); +} + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_NORMALIZATION */ + +#endif // NORMLZR_H + +#endif /* U_SHOW_CPLUSPLUS_API */ diff --git a/thirdparty/icu4c/common/unicode/parseerr.h b/thirdparty/icu4c/common/unicode/parseerr.h new file mode 100644 index 0000000000..c23cc273b8 --- /dev/null +++ b/thirdparty/icu4c/common/unicode/parseerr.h @@ -0,0 +1,94 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 1999-2005, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* Date Name Description +* 03/14/00 aliu Creation. +* 06/27/00 aliu Change from C++ class to C struct +********************************************************************** +*/ +#ifndef PARSEERR_H +#define PARSEERR_H + +#include "unicode/utypes.h" + + +/** + * \file + * \brief C API: Parse Error Information + */ +/** + * The capacity of the context strings in UParseError. + * @stable ICU 2.0 + */ +enum { U_PARSE_CONTEXT_LEN = 16 }; + +/** + * A UParseError struct is used to returned detailed information about + * parsing errors. It is used by ICU parsing engines that parse long + * rules, patterns, or programs, where the text being parsed is long + * enough that more information than a UErrorCode is needed to + * localize the error. + * + * <p>The line, offset, and context fields are optional; parsing + * engines may choose not to use to use them. + * + * <p>The preContext and postContext strings include some part of the + * context surrounding the error. If the source text is "let for=7" + * and "for" is the error (e.g., because it is a reserved word), then + * some examples of what a parser might produce are the following: + * + * <pre> + * preContext postContext + * "" "" The parser does not support context + * "let " "=7" Pre- and post-context only + * "let " "for=7" Pre- and post-context and error text + * "" "for" Error text only + * </pre> + * + * <p>Examples of engines which use UParseError (or may use it in the + * future) are Transliterator, RuleBasedBreakIterator, and + * RegexPattern. + * + * @stable ICU 2.0 + */ +typedef struct UParseError { + + /** + * The line on which the error occurred. If the parser uses this + * field, it sets it to the line number of the source text line on + * which the error appears, which will be a value >= 1. If the + * parse does not support line numbers, the value will be <= 0. + * @stable ICU 2.0 + */ + int32_t line; + + /** + * The character offset to the error. If the line field is >= 1, + * then this is the offset from the start of the line. Otherwise, + * this is the offset from the start of the text. If the parser + * does not support this field, it will have a value < 0. + * @stable ICU 2.0 + */ + int32_t offset; + + /** + * Textual context before the error. Null-terminated. The empty + * string if not supported by parser. + * @stable ICU 2.0 + */ + UChar preContext[U_PARSE_CONTEXT_LEN]; + + /** + * The error itself and/or textual context after the error. + * Null-terminated. The empty string if not supported by parser. + * @stable ICU 2.0 + */ + UChar postContext[U_PARSE_CONTEXT_LEN]; + +} UParseError; + +#endif diff --git a/thirdparty/icu4c/common/unicode/parsepos.h b/thirdparty/icu4c/common/unicode/parsepos.h new file mode 100644 index 0000000000..260ed4cbb8 --- /dev/null +++ b/thirdparty/icu4c/common/unicode/parsepos.h @@ -0,0 +1,237 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +* Copyright (C) 1997-2005, International Business Machines Corporation and others. All Rights Reserved. +******************************************************************************* +* +* File PARSEPOS.H +* +* Modification History: +* +* Date Name Description +* 07/09/97 helena Converted from java. +* 07/17/98 stephen Added errorIndex support. +* 05/11/99 stephen Cleaned up. +******************************************************************************* +*/ + +#ifndef PARSEPOS_H +#define PARSEPOS_H + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + +#include "unicode/uobject.h" + + +U_NAMESPACE_BEGIN + +/** + * \file + * \brief C++ API: Canonical Iterator + */ +/** + * <code>ParsePosition</code> is a simple class used by <code>Format</code> + * and its subclasses to keep track of the current position during parsing. + * The <code>parseObject</code> method in the various <code>Format</code> + * classes requires a <code>ParsePosition</code> object as an argument. + * + * <p> + * By design, as you parse through a string with different formats, + * you can use the same <code>ParsePosition</code>, since the index parameter + * records the current position. + * + * The ParsePosition class is not suitable for subclassing. + * + * @version 1.3 10/30/97 + * @author Mark Davis, Helena Shih + * @see java.text.Format + */ + +class U_COMMON_API ParsePosition : public UObject { +public: + /** + * Default constructor, the index starts with 0 as default. + * @stable ICU 2.0 + */ + ParsePosition() + : UObject(), + index(0), + errorIndex(-1) + {} + + /** + * Create a new ParsePosition with the given initial index. + * @param newIndex the new text offset. + * @stable ICU 2.0 + */ + ParsePosition(int32_t newIndex) + : UObject(), + index(newIndex), + errorIndex(-1) + {} + + /** + * Copy constructor + * @param copy the object to be copied from. + * @stable ICU 2.0 + */ + ParsePosition(const ParsePosition& copy) + : UObject(copy), + index(copy.index), + errorIndex(copy.errorIndex) + {} + + /** + * Destructor + * @stable ICU 2.0 + */ + virtual ~ParsePosition(); + + /** + * Assignment operator + * @stable ICU 2.0 + */ + inline ParsePosition& operator=(const ParsePosition& copy); + + /** + * Equality operator. + * @return true if the two parse positions are equal, false otherwise. + * @stable ICU 2.0 + */ + inline UBool operator==(const ParsePosition& that) const; + + /** + * Equality operator. + * @return true if the two parse positions are not equal, false otherwise. + * @stable ICU 2.0 + */ + inline UBool operator!=(const ParsePosition& that) const; + + /** + * Clone this object. + * Clones can be used concurrently in multiple threads. + * If an error occurs, then NULL is returned. + * The caller must delete the clone. + * + * @return a clone of this object + * + * @see getDynamicClassID + * @stable ICU 2.8 + */ + ParsePosition *clone() const; + + /** + * Retrieve the current parse position. On input to a parse method, this + * is the index of the character at which parsing will begin; on output, it + * is the index of the character following the last character parsed. + * @return the current index. + * @stable ICU 2.0 + */ + inline int32_t getIndex(void) const; + + /** + * Set the current parse position. + * @param index the new index. + * @stable ICU 2.0 + */ + inline void setIndex(int32_t index); + + /** + * Set the index at which a parse error occurred. Formatters + * should set this before returning an error code from their + * parseObject method. The default value is -1 if this is not + * set. + * @stable ICU 2.0 + */ + inline void setErrorIndex(int32_t ei); + + /** + * Retrieve the index at which an error occurred, or -1 if the + * error index has not been set. + * @stable ICU 2.0 + */ + inline int32_t getErrorIndex(void) const; + + /** + * ICU "poor man's RTTI", returns a UClassID for this class. + * + * @stable ICU 2.2 + */ + static UClassID U_EXPORT2 getStaticClassID(); + + /** + * ICU "poor man's RTTI", returns a UClassID for the actual class. + * + * @stable ICU 2.2 + */ + virtual UClassID getDynamicClassID() const; + +private: + /** + * Input: the place you start parsing. + * <br>Output: position where the parse stopped. + * This is designed to be used serially, + * with each call setting index up for the next one. + */ + int32_t index; + + /** + * The index at which a parse error occurred. + */ + int32_t errorIndex; + +}; + +inline ParsePosition& +ParsePosition::operator=(const ParsePosition& copy) +{ + index = copy.index; + errorIndex = copy.errorIndex; + return *this; +} + +inline UBool +ParsePosition::operator==(const ParsePosition& copy) const +{ + if(index != copy.index || errorIndex != copy.errorIndex) + return false; + else + return true; +} + +inline UBool +ParsePosition::operator!=(const ParsePosition& copy) const +{ + return !operator==(copy); +} + +inline int32_t +ParsePosition::getIndex() const +{ + return index; +} + +inline void +ParsePosition::setIndex(int32_t offset) +{ + this->index = offset; +} + +inline int32_t +ParsePosition::getErrorIndex() const +{ + return errorIndex; +} + +inline void +ParsePosition::setErrorIndex(int32_t ei) +{ + this->errorIndex = ei; +} +U_NAMESPACE_END + +#endif /* U_SHOW_CPLUSPLUS_API */ + +#endif diff --git a/thirdparty/icu4c/common/unicode/platform.h b/thirdparty/icu4c/common/unicode/platform.h new file mode 100644 index 0000000000..2bb2f8b318 --- /dev/null +++ b/thirdparty/icu4c/common/unicode/platform.h @@ -0,0 +1,885 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1997-2016, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* +* FILE NAME : platform.h +* +* Date Name Description +* 05/13/98 nos Creation (content moved here from ptypes.h). +* 03/02/99 stephen Added AS400 support. +* 03/30/99 stephen Added Linux support. +* 04/13/99 stephen Reworked for autoconf. +****************************************************************************** +*/ + +#ifndef _PLATFORM_H +#define _PLATFORM_H + +#include "unicode/uconfig.h" +#include "unicode/uvernum.h" + +/** + * \file + * \brief Basic types for the platform. + * + * This file used to be generated by autoconf/configure. + * Starting with ICU 49, platform.h is a normal source file, + * to simplify cross-compiling and working with non-autoconf/make build systems. + * + * When a value in this file does not work on a platform, then please + * try to derive it from the U_PLATFORM value + * (for which we might need a new value constant in rare cases) + * and/or from other macros that are predefined by the compiler + * or defined in standard (POSIX or platform or compiler) headers. + * + * As a temporary workaround, you can add an explicit \#define for some macros + * before it is first tested, or add an equivalent -D macro definition + * to the compiler's command line. + * + * Note: Some compilers provide ways to show the predefined macros. + * For example, with gcc you can compile an empty .c file and have the compiler + * print the predefined macros with + * \code + * gcc -E -dM -x c /dev/null | sort + * \endcode + * (You can provide an actual empty .c file rather than /dev/null. + * <code>-x c++</code> is for C++.) + */ + +/** + * Define some things so that they can be documented. + * @internal + */ +#ifdef U_IN_DOXYGEN +/* + * Problem: "platform.h:335: warning: documentation for unknown define U_HAVE_STD_STRING found." means that U_HAVE_STD_STRING is not documented. + * Solution: #define any defines for non @internal API here, so that they are visible in the docs. If you just set PREDEFINED in Doxyfile.in, they won't be documented. + */ + +/* None for now. */ +#endif + +/** + * \def U_PLATFORM + * The U_PLATFORM macro defines the platform we're on. + * + * We used to define one different, value-less macro per platform. + * That made it hard to know the set of relevant platforms and macros, + * and hard to deal with variants of platforms. + * + * Starting with ICU 49, we define platforms as numeric macros, + * with ranges of values for related platforms and their variants. + * The U_PLATFORM macro is set to one of these values. + * + * Historical note from the Solaris Wikipedia article: + * AT&T and Sun collaborated on a project to merge the most popular Unix variants + * on the market at that time: BSD, System V, and Xenix. + * This became Unix System V Release 4 (SVR4). + * + * @internal + */ + +/** Unknown platform. @internal */ +#define U_PF_UNKNOWN 0 +/** Windows @internal */ +#define U_PF_WINDOWS 1000 +/** MinGW. Windows, calls to Win32 API, but using GNU gcc and binutils. @internal */ +#define U_PF_MINGW 1800 +/** + * Cygwin. Windows, calls to cygwin1.dll for Posix functions, + * using MSVC or GNU gcc and binutils. + * @internal + */ +#define U_PF_CYGWIN 1900 +/* Reserve 2000 for U_PF_UNIX? */ +/** HP-UX is based on UNIX System V. @internal */ +#define U_PF_HPUX 2100 +/** Solaris is a Unix operating system based on SVR4. @internal */ +#define U_PF_SOLARIS 2600 +/** BSD is a UNIX operating system derivative. @internal */ +#define U_PF_BSD 3000 +/** AIX is based on UNIX System V Releases and 4.3 BSD. @internal */ +#define U_PF_AIX 3100 +/** IRIX is based on UNIX System V with BSD extensions. @internal */ +#define U_PF_IRIX 3200 +/** + * Darwin is a POSIX-compliant operating system, composed of code developed by Apple, + * as well as code derived from NeXTSTEP, BSD, and other projects, + * built around the Mach kernel. + * Darwin forms the core set of components upon which Mac OS X, Apple TV, and iOS are based. + * (Original description modified from WikiPedia.) + * @internal + */ +#define U_PF_DARWIN 3500 +/** iPhone OS (iOS) is a derivative of Mac OS X. @internal */ +#define U_PF_IPHONE 3550 +/** QNX is a commercial Unix-like real-time operating system related to BSD. @internal */ +#define U_PF_QNX 3700 +/** Linux is a Unix-like operating system. @internal */ +#define U_PF_LINUX 4000 +/** + * Native Client is pretty close to Linux. + * See https://developer.chrome.com/native-client and + * http://www.chromium.org/nativeclient + * @internal + */ +#define U_PF_BROWSER_NATIVE_CLIENT 4020 +/** Android is based on Linux. @internal */ +#define U_PF_ANDROID 4050 +/** Fuchsia is a POSIX-ish platform. @internal */ +#define U_PF_FUCHSIA 4100 +/* Maximum value for Linux-based platform is 4499 */ +/** + * Emscripten is a C++ transpiler for the Web that can target asm.js or + * WebAssembly. It provides some POSIX-compatible wrappers and stubs and + * some Linux-like functionality, but is not fully compatible with + * either. + * @internal + */ +#define U_PF_EMSCRIPTEN 5010 +/** z/OS is the successor to OS/390 which was the successor to MVS. @internal */ +#define U_PF_OS390 9000 +/** "IBM i" is the current name of what used to be i5/OS and earlier OS/400. @internal */ +#define U_PF_OS400 9400 + +#ifdef U_PLATFORM + /* Use the predefined value. */ +#elif defined(__MINGW32__) +# define U_PLATFORM U_PF_MINGW +#elif defined(__CYGWIN__) +# define U_PLATFORM U_PF_CYGWIN +#elif defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) +# define U_PLATFORM U_PF_WINDOWS +#elif defined(__ANDROID__) +# define U_PLATFORM U_PF_ANDROID + /* Android wchar_t support depends on the API level. */ +# include <android/api-level.h> +#elif defined(__pnacl__) || defined(__native_client__) +# define U_PLATFORM U_PF_BROWSER_NATIVE_CLIENT +#elif defined(__Fuchsia__) +# define U_PLATFORM U_PF_FUCHSIA +#elif defined(linux) || defined(__linux__) || defined(__linux) +# define U_PLATFORM U_PF_LINUX +#elif defined(__APPLE__) && defined(__MACH__) +# include <TargetConditionals.h> +# if defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE /* variant of TARGET_OS_MAC */ +# define U_PLATFORM U_PF_IPHONE +# else +# define U_PLATFORM U_PF_DARWIN +# endif +#elif defined(BSD) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__MirBSD__) +# if defined(__FreeBSD__) +# include <sys/endian.h> +# endif +# define U_PLATFORM U_PF_BSD +#elif defined(sun) || defined(__sun) + /* Check defined(__SVR4) || defined(__svr4__) to distinguish Solaris from SunOS? */ +# define U_PLATFORM U_PF_SOLARIS +# if defined(__GNUC__) + /* Solaris/GCC needs this header file to get the proper endianness. Normally, this + * header file is included with stddef.h but on Solairs/GCC, the GCC version of stddef.h + * is included which does not include this header file. + */ +# include <sys/isa_defs.h> +# endif +#elif defined(_AIX) || defined(__TOS_AIX__) +# define U_PLATFORM U_PF_AIX +#elif defined(_hpux) || defined(hpux) || defined(__hpux) +# define U_PLATFORM U_PF_HPUX +#elif defined(sgi) || defined(__sgi) +# define U_PLATFORM U_PF_IRIX +#elif defined(__QNX__) || defined(__QNXNTO__) +# define U_PLATFORM U_PF_QNX +#elif defined(__TOS_MVS__) +# define U_PLATFORM U_PF_OS390 +#elif defined(__OS400__) || defined(__TOS_OS400__) +# define U_PLATFORM U_PF_OS400 +#elif defined(__EMSCRIPTEN__) +# define U_PLATFORM U_PF_EMSCRIPTEN +#else +# define U_PLATFORM U_PF_UNKNOWN +#endif + +/** + * \def CYGWINMSVC + * Defined if this is Windows with Cygwin, but using MSVC rather than gcc. + * Otherwise undefined. + * @internal + */ +/* Commented out because this is already set in mh-cygwin-msvc +#if U_PLATFORM == U_PF_CYGWIN && defined(_MSC_VER) +# define CYGWINMSVC +#endif +*/ +#ifdef U_IN_DOXYGEN +# define CYGWINMSVC +#endif + +/** + * \def U_PLATFORM_USES_ONLY_WIN32_API + * Defines whether the platform uses only the Win32 API. + * Set to 1 for Windows/MSVC and MinGW but not Cygwin. + * @internal + */ +#ifdef U_PLATFORM_USES_ONLY_WIN32_API + /* Use the predefined value. */ +#elif (U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_MINGW) || defined(CYGWINMSVC) +# define U_PLATFORM_USES_ONLY_WIN32_API 1 +#else + /* Cygwin implements POSIX. */ +# define U_PLATFORM_USES_ONLY_WIN32_API 0 +#endif + +/** + * \def U_PLATFORM_HAS_WIN32_API + * Defines whether the Win32 API is available on the platform. + * Set to 1 for Windows/MSVC, MinGW and Cygwin. + * @internal + */ +#ifdef U_PLATFORM_HAS_WIN32_API + /* Use the predefined value. */ +#elif U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN +# define U_PLATFORM_HAS_WIN32_API 1 +#else +# define U_PLATFORM_HAS_WIN32_API 0 +#endif + +/** + * \def U_PLATFORM_HAS_WINUWP_API + * Defines whether target is intended for Universal Windows Platform API + * Set to 1 for Windows10 Release Solution Configuration + * @internal + */ +#ifdef U_PLATFORM_HAS_WINUWP_API + /* Use the predefined value. */ +#else +# define U_PLATFORM_HAS_WINUWP_API 0 +#endif + +/** + * \def U_PLATFORM_IMPLEMENTS_POSIX + * Defines whether the platform implements (most of) the POSIX API. + * Set to 1 for Cygwin and most other platforms. + * @internal + */ +#ifdef U_PLATFORM_IMPLEMENTS_POSIX + /* Use the predefined value. */ +#elif U_PLATFORM_USES_ONLY_WIN32_API +# define U_PLATFORM_IMPLEMENTS_POSIX 0 +#else +# define U_PLATFORM_IMPLEMENTS_POSIX 1 +#endif + +/** + * \def U_PLATFORM_IS_LINUX_BASED + * Defines whether the platform is Linux or one of its derivatives. + * @internal + */ +#ifdef U_PLATFORM_IS_LINUX_BASED + /* Use the predefined value. */ +#elif U_PF_LINUX <= U_PLATFORM && U_PLATFORM <= 4499 +# define U_PLATFORM_IS_LINUX_BASED 1 +#else +# define U_PLATFORM_IS_LINUX_BASED 0 +#endif + +/** + * \def U_PLATFORM_IS_DARWIN_BASED + * Defines whether the platform is Darwin or one of its derivatives. + * @internal + */ +#ifdef U_PLATFORM_IS_DARWIN_BASED + /* Use the predefined value. */ +#elif U_PF_DARWIN <= U_PLATFORM && U_PLATFORM <= U_PF_IPHONE +# define U_PLATFORM_IS_DARWIN_BASED 1 +#else +# define U_PLATFORM_IS_DARWIN_BASED 0 +#endif + +/** + * \def U_HAVE_STDINT_H + * Defines whether stdint.h is available. It is a C99 standard header. + * We used to include inttypes.h which includes stdint.h but we usually do not need + * the additional definitions from inttypes.h. + * @internal + */ +#ifdef U_HAVE_STDINT_H + /* Use the predefined value. */ +#elif U_PLATFORM_USES_ONLY_WIN32_API +# if defined(__BORLANDC__) || U_PLATFORM == U_PF_MINGW || (defined(_MSC_VER) && _MSC_VER>=1600) + /* Windows Visual Studio 9 and below do not have stdint.h & inttypes.h, but VS 2010 adds them. */ +# define U_HAVE_STDINT_H 1 +# else +# define U_HAVE_STDINT_H 0 +# endif +#elif U_PLATFORM == U_PF_SOLARIS + /* Solaris has inttypes.h but not stdint.h. */ +# define U_HAVE_STDINT_H 0 +#elif U_PLATFORM == U_PF_AIX && !defined(_AIX51) && defined(_POWER) + /* PPC AIX <= 4.3 has inttypes.h but not stdint.h. */ +# define U_HAVE_STDINT_H 0 +#else +# define U_HAVE_STDINT_H 1 +#endif + +/** + * \def U_HAVE_INTTYPES_H + * Defines whether inttypes.h is available. It is a C99 standard header. + * We include inttypes.h where it is available but stdint.h is not. + * @internal + */ +#ifdef U_HAVE_INTTYPES_H + /* Use the predefined value. */ +#elif U_PLATFORM == U_PF_SOLARIS + /* Solaris has inttypes.h but not stdint.h. */ +# define U_HAVE_INTTYPES_H 1 +#elif U_PLATFORM == U_PF_AIX && !defined(_AIX51) && defined(_POWER) + /* PPC AIX <= 4.3 has inttypes.h but not stdint.h. */ +# define U_HAVE_INTTYPES_H 1 +#else + /* Most platforms have both inttypes.h and stdint.h, or neither. */ +# define U_HAVE_INTTYPES_H U_HAVE_STDINT_H +#endif + +/*===========================================================================*/ +/** @{ Compiler and environment features */ +/*===========================================================================*/ + +/** + * \def U_GCC_MAJOR_MINOR + * Indicates whether the compiler is gcc (test for != 0), + * and if so, contains its major (times 100) and minor version numbers. + * If the compiler is not gcc, then U_GCC_MAJOR_MINOR == 0. + * + * For example, for testing for whether we have gcc, and whether it's 4.6 or higher, + * use "#if U_GCC_MAJOR_MINOR >= 406". + * @internal + */ +#ifdef __GNUC__ +# define U_GCC_MAJOR_MINOR (__GNUC__ * 100 + __GNUC_MINOR__) +#else +# define U_GCC_MAJOR_MINOR 0 +#endif + +/** + * \def U_IS_BIG_ENDIAN + * Determines the endianness of the platform. + * @internal + */ +#ifdef U_IS_BIG_ENDIAN + /* Use the predefined value. */ +#elif defined(BYTE_ORDER) && defined(BIG_ENDIAN) +# define U_IS_BIG_ENDIAN (BYTE_ORDER == BIG_ENDIAN) +#elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) + /* gcc */ +# define U_IS_BIG_ENDIAN (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) +#elif defined(__BIG_ENDIAN__) || defined(_BIG_ENDIAN) +# define U_IS_BIG_ENDIAN 1 +#elif defined(__LITTLE_ENDIAN__) || defined(_LITTLE_ENDIAN) +# define U_IS_BIG_ENDIAN 0 +#elif U_PLATFORM == U_PF_OS390 || U_PLATFORM == U_PF_OS400 || defined(__s390__) || defined(__s390x__) + /* These platforms do not appear to predefine any endianness macros. */ +# define U_IS_BIG_ENDIAN 1 +#elif defined(_PA_RISC1_0) || defined(_PA_RISC1_1) || defined(_PA_RISC2_0) + /* HPPA do not appear to predefine any endianness macros. */ +# define U_IS_BIG_ENDIAN 1 +#elif defined(sparc) || defined(__sparc) || defined(__sparc__) + /* Some sparc based systems (e.g. Linux) do not predefine any endianness macros. */ +# define U_IS_BIG_ENDIAN 1 +#else +# define U_IS_BIG_ENDIAN 0 +#endif + +/** + * \def U_HAVE_PLACEMENT_NEW + * Determines whether to override placement new and delete for STL. + * @stable ICU 2.6 + */ +#ifdef U_HAVE_PLACEMENT_NEW + /* Use the predefined value. */ +#elif defined(__BORLANDC__) +# define U_HAVE_PLACEMENT_NEW 0 +#else +# define U_HAVE_PLACEMENT_NEW 1 +#endif + +/** + * \def U_HAVE_DEBUG_LOCATION_NEW + * Define this to define the MFC debug version of the operator new. + * + * @stable ICU 3.4 + */ +#ifdef U_HAVE_DEBUG_LOCATION_NEW + /* Use the predefined value. */ +#elif defined(_MSC_VER) +# define U_HAVE_DEBUG_LOCATION_NEW 1 +#else +# define U_HAVE_DEBUG_LOCATION_NEW 0 +#endif + +/* Compatibility with compilers other than clang: http://clang.llvm.org/docs/LanguageExtensions.html */ +#ifdef __has_attribute +# define UPRV_HAS_ATTRIBUTE(x) __has_attribute(x) +#else +# define UPRV_HAS_ATTRIBUTE(x) 0 +#endif +#ifdef __has_cpp_attribute +# define UPRV_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x) +#else +# define UPRV_HAS_CPP_ATTRIBUTE(x) 0 +#endif +#ifdef __has_declspec_attribute +# define UPRV_HAS_DECLSPEC_ATTRIBUTE(x) __has_declspec_attribute(x) +#else +# define UPRV_HAS_DECLSPEC_ATTRIBUTE(x) 0 +#endif +#ifdef __has_builtin +# define UPRV_HAS_BUILTIN(x) __has_builtin(x) +#else +# define UPRV_HAS_BUILTIN(x) 0 +#endif +#ifdef __has_feature +# define UPRV_HAS_FEATURE(x) __has_feature(x) +#else +# define UPRV_HAS_FEATURE(x) 0 +#endif +#ifdef __has_extension +# define UPRV_HAS_EXTENSION(x) __has_extension(x) +#else +# define UPRV_HAS_EXTENSION(x) 0 +#endif +#ifdef __has_warning +# define UPRV_HAS_WARNING(x) __has_warning(x) +#else +# define UPRV_HAS_WARNING(x) 0 +#endif + +/** + * \def U_MALLOC_ATTR + * Attribute to mark functions as malloc-like + * @internal + */ +#if defined(__GNUC__) && __GNUC__>=3 +# define U_MALLOC_ATTR __attribute__ ((__malloc__)) +#else +# define U_MALLOC_ATTR +#endif + +/** + * \def U_ALLOC_SIZE_ATTR + * Attribute to specify the size of the allocated buffer for malloc-like functions + * @internal + */ +#if (defined(__GNUC__) && \ + (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) || \ + UPRV_HAS_ATTRIBUTE(alloc_size) +# define U_ALLOC_SIZE_ATTR(X) __attribute__ ((alloc_size(X))) +# define U_ALLOC_SIZE_ATTR2(X,Y) __attribute__ ((alloc_size(X,Y))) +#else +# define U_ALLOC_SIZE_ATTR(X) +# define U_ALLOC_SIZE_ATTR2(X,Y) +#endif + +/** + * \def U_CPLUSPLUS_VERSION + * 0 if no C++; 1, 11, 14, ... if C++. + * Support for specific features cannot always be determined by the C++ version alone. + * @internal + */ +#ifdef U_CPLUSPLUS_VERSION +# if U_CPLUSPLUS_VERSION != 0 && !defined(__cplusplus) +# undef U_CPLUSPLUS_VERSION +# define U_CPLUSPLUS_VERSION 0 +# endif + /* Otherwise use the predefined value. */ +#elif !defined(__cplusplus) +# define U_CPLUSPLUS_VERSION 0 +#elif __cplusplus >= 201402L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201402L) +# define U_CPLUSPLUS_VERSION 14 +#elif __cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L) +# define U_CPLUSPLUS_VERSION 11 +#else + // C++98 or C++03 +# define U_CPLUSPLUS_VERSION 1 +#endif + +#if (U_PLATFORM == U_PF_AIX || U_PLATFORM == U_PF_OS390) && defined(__cplusplus) &&(U_CPLUSPLUS_VERSION < 11) +// add in std::nullptr_t +namespace std { + typedef decltype(nullptr) nullptr_t; +}; +#endif + +/** + * \def U_NOEXCEPT + * "noexcept" if supported, otherwise empty. + * Some code, especially STL containers, uses move semantics of objects only + * if the move constructor and the move operator are declared as not throwing exceptions. + * @internal + */ +#ifdef U_NOEXCEPT + /* Use the predefined value. */ +#else +# define U_NOEXCEPT noexcept +#endif + +/** + * \def U_FALLTHROUGH + * Annotate intentional fall-through between switch labels. + * http://clang.llvm.org/docs/AttributeReference.html#fallthrough-clang-fallthrough + * @internal + */ +#ifndef __cplusplus + // Not for C. +#elif defined(U_FALLTHROUGH) + // Use the predefined value. +#elif defined(__clang__) + // Test for compiler vs. feature separately. + // Other compilers might choke on the feature test. +# if UPRV_HAS_CPP_ATTRIBUTE(clang::fallthrough) || \ + (UPRV_HAS_FEATURE(cxx_attributes) && \ + UPRV_HAS_WARNING("-Wimplicit-fallthrough")) +# define U_FALLTHROUGH [[clang::fallthrough]] +# endif +#elif defined(__GNUC__) && (__GNUC__ >= 7) +# define U_FALLTHROUGH __attribute__((fallthrough)) +#endif + +#ifndef U_FALLTHROUGH +# define U_FALLTHROUGH +#endif + +/** @} */ + +/*===========================================================================*/ +/** @{ Character data types */ +/*===========================================================================*/ + +/** + * U_CHARSET_FAMILY is equal to this value when the platform is an ASCII based platform. + * @stable ICU 2.0 + */ +#define U_ASCII_FAMILY 0 + +/** + * U_CHARSET_FAMILY is equal to this value when the platform is an EBCDIC based platform. + * @stable ICU 2.0 + */ +#define U_EBCDIC_FAMILY 1 + +/** + * \def U_CHARSET_FAMILY + * + * <p>These definitions allow to specify the encoding of text + * in the char data type as defined by the platform and the compiler. + * It is enough to determine the code point values of "invariant characters", + * which are the ones shared by all encodings that are in use + * on a given platform.</p> + * + * <p>Those "invariant characters" should be all the uppercase and lowercase + * latin letters, the digits, the space, and "basic punctuation". + * Also, '\\n', '\\r', '\\t' should be available.</p> + * + * <p>The list of "invariant characters" is:<br> + * \code + * A-Z a-z 0-9 SPACE " % & ' ( ) * + , - . / : ; < = > ? _ + * \endcode + * <br> + * (52 letters + 10 numbers + 20 punc/sym/space = 82 total)</p> + * + * <p>This matches the IBM Syntactic Character Set (CS 640).</p> + * + * <p>In other words, all the graphic characters in 7-bit ASCII should + * be safely accessible except the following:</p> + * + * \code + * '\' <backslash> + * '[' <left bracket> + * ']' <right bracket> + * '{' <left brace> + * '}' <right brace> + * '^' <circumflex> + * '~' <tilde> + * '!' <exclamation mark> + * '#' <number sign> + * '|' <vertical line> + * '$' <dollar sign> + * '@' <commercial at> + * '`' <grave accent> + * \endcode + * @stable ICU 2.0 + */ +#ifdef U_CHARSET_FAMILY + /* Use the predefined value. */ +#elif U_PLATFORM == U_PF_OS390 && (!defined(__CHARSET_LIB) || !__CHARSET_LIB) +# define U_CHARSET_FAMILY U_EBCDIC_FAMILY +#elif U_PLATFORM == U_PF_OS400 && !defined(__UTF32__) +# define U_CHARSET_FAMILY U_EBCDIC_FAMILY +#else +# define U_CHARSET_FAMILY U_ASCII_FAMILY +#endif + +/** + * \def U_CHARSET_IS_UTF8 + * + * Hardcode the default charset to UTF-8. + * + * If this is set to 1, then + * - ICU will assume that all non-invariant char*, StringPiece, std::string etc. + * contain UTF-8 text, regardless of what the system API uses + * - some ICU code will use fast functions like u_strFromUTF8() + * rather than the more general and more heavy-weight conversion API (ucnv.h) + * - ucnv_getDefaultName() always returns "UTF-8" + * - ucnv_setDefaultName() is disabled and will not change the default charset + * - static builds of ICU are smaller + * - more functionality is available with the UCONFIG_NO_CONVERSION build-time + * configuration option (see unicode/uconfig.h) + * - the UCONFIG_NO_CONVERSION build option in uconfig.h is more usable + * + * @stable ICU 4.2 + * @see UCONFIG_NO_CONVERSION + */ +#ifdef U_CHARSET_IS_UTF8 + /* Use the predefined value. */ +#elif U_PLATFORM_IS_LINUX_BASED || U_PLATFORM_IS_DARWIN_BASED || \ + U_PLATFORM == U_PF_EMSCRIPTEN +# define U_CHARSET_IS_UTF8 1 +#else +# define U_CHARSET_IS_UTF8 0 +#endif + +/** @} */ + +/*===========================================================================*/ +/** @{ Information about wchar support */ +/*===========================================================================*/ + +/** + * \def U_HAVE_WCHAR_H + * Indicates whether <wchar.h> is available (1) or not (0). Set to 1 by default. + * + * @stable ICU 2.0 + */ +#ifdef U_HAVE_WCHAR_H + /* Use the predefined value. */ +#elif U_PLATFORM == U_PF_ANDROID && __ANDROID_API__ < 9 + /* + * Android before Gingerbread (Android 2.3, API level 9) did not support wchar_t. + * The type and header existed, but the library functions did not work as expected. + * The size of wchar_t was 1 but L"xyz" string literals had 32-bit units anyway. + */ +# define U_HAVE_WCHAR_H 0 +#else +# define U_HAVE_WCHAR_H 1 +#endif + +/** + * \def U_SIZEOF_WCHAR_T + * U_SIZEOF_WCHAR_T==sizeof(wchar_t) + * + * @stable ICU 2.0 + */ +#ifdef U_SIZEOF_WCHAR_T + /* Use the predefined value. */ +#elif (U_PLATFORM == U_PF_ANDROID && __ANDROID_API__ < 9) + /* + * Classic Mac OS and Mac OS X before 10.3 (Panther) did not support wchar_t or wstring. + * Newer Mac OS X has size 4. + */ +# define U_SIZEOF_WCHAR_T 1 +#elif U_PLATFORM_HAS_WIN32_API || U_PLATFORM == U_PF_CYGWIN +# define U_SIZEOF_WCHAR_T 2 +#elif U_PLATFORM == U_PF_AIX + /* + * AIX 6.1 information, section "Wide character data representation": + * "... the wchar_t datatype is 32-bit in the 64-bit environment and + * 16-bit in the 32-bit environment." + * and + * "All locales use Unicode for their wide character code values (process code), + * except the IBM-eucTW codeset." + */ +# ifdef __64BIT__ +# define U_SIZEOF_WCHAR_T 4 +# else +# define U_SIZEOF_WCHAR_T 2 +# endif +#elif U_PLATFORM == U_PF_OS390 + /* + * z/OS V1R11 information center, section "LP64 | ILP32": + * "In 31-bit mode, the size of long and pointers is 4 bytes and the size of wchar_t is 2 bytes. + * Under LP64, the size of long and pointer is 8 bytes and the size of wchar_t is 4 bytes." + */ +# ifdef _LP64 +# define U_SIZEOF_WCHAR_T 4 +# else +# define U_SIZEOF_WCHAR_T 2 +# endif +#elif U_PLATFORM == U_PF_OS400 +# if defined(__UTF32__) + /* + * LOCALETYPE(*LOCALEUTF) is specified. + * Wide-character strings are in UTF-32, + * narrow-character strings are in UTF-8. + */ +# define U_SIZEOF_WCHAR_T 4 +# elif defined(__UCS2__) + /* + * LOCALETYPE(*LOCALEUCS2) is specified. + * Wide-character strings are in UCS-2, + * narrow-character strings are in EBCDIC. + */ +# define U_SIZEOF_WCHAR_T 2 +# else + /* + * LOCALETYPE(*CLD) or LOCALETYPE(*LOCALE) is specified. + * Wide-character strings are in 16-bit EBCDIC, + * narrow-character strings are in EBCDIC. + */ +# define U_SIZEOF_WCHAR_T 2 +# endif +#else +# define U_SIZEOF_WCHAR_T 4 +#endif + +#ifndef U_HAVE_WCSCPY +#define U_HAVE_WCSCPY U_HAVE_WCHAR_H +#endif + +/** @} */ + +/** + * \def U_HAVE_CHAR16_T + * Defines whether the char16_t type is available for UTF-16 + * and u"abc" UTF-16 string literals are supported. + * This is a new standard type and standard string literal syntax in C++0x + * but has been available in some compilers before. + * @internal + */ +#ifdef U_HAVE_CHAR16_T + /* Use the predefined value. */ +#else + /* + * Notes: + * Visual Studio 2010 (_MSC_VER==1600) defines char16_t as a typedef + * and does not support u"abc" string literals. + * Visual Studio 2015 (_MSC_VER>=1900) and above adds support for + * both char16_t and u"abc" string literals. + * gcc 4.4 defines the __CHAR16_TYPE__ macro to a usable type but + * does not support u"abc" string literals. + * C++11 and C11 require support for UTF-16 literals + * TODO: Fix for plain C. Doesn't work on Mac. + */ +# if U_CPLUSPLUS_VERSION >= 11 || (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) +# define U_HAVE_CHAR16_T 1 +# else +# define U_HAVE_CHAR16_T 0 +# endif +#endif + +/** + * @{ + * \def U_DECLARE_UTF16 + * Do not use this macro because it is not defined on all platforms. + * Use the UNICODE_STRING or U_STRING_DECL macros instead. + * @internal + */ +#ifdef U_DECLARE_UTF16 + /* Use the predefined value. */ +#elif U_HAVE_CHAR16_T \ + || (defined(__xlC__) && defined(__IBM_UTF_LITERAL) && U_SIZEOF_WCHAR_T != 2) \ + || (defined(__HP_aCC) && __HP_aCC >= 035000) \ + || (defined(__HP_cc) && __HP_cc >= 111106) \ + || (defined(U_IN_DOXYGEN)) +# define U_DECLARE_UTF16(string) u ## string +#elif U_SIZEOF_WCHAR_T == 2 \ + && (U_CHARSET_FAMILY == 0 || (U_PF_OS390 <= U_PLATFORM && U_PLATFORM <= U_PF_OS400 && defined(__UCS2__))) +# define U_DECLARE_UTF16(string) L ## string +#else + /* Leave U_DECLARE_UTF16 undefined. See unistr.h. */ +#endif + +/** @} */ + +/*===========================================================================*/ +/** @{ Symbol import-export control */ +/*===========================================================================*/ + +#ifdef U_EXPORT + /* Use the predefined value. */ +#elif defined(U_STATIC_IMPLEMENTATION) +# define U_EXPORT +#elif defined(_MSC_VER) || (UPRV_HAS_DECLSPEC_ATTRIBUTE(dllexport) && \ + UPRV_HAS_DECLSPEC_ATTRIBUTE(dllimport)) +# define U_EXPORT __declspec(dllexport) +#elif defined(__GNUC__) +# define U_EXPORT __attribute__((visibility("default"))) +#elif (defined(__SUNPRO_CC) && __SUNPRO_CC >= 0x550) \ + || (defined(__SUNPRO_C) && __SUNPRO_C >= 0x550) +# define U_EXPORT __global +/*#elif defined(__HP_aCC) || defined(__HP_cc) +# define U_EXPORT __declspec(dllexport)*/ +#else +# define U_EXPORT +#endif + +/* U_CALLCONV is related to U_EXPORT2 */ +#ifdef U_EXPORT2 + /* Use the predefined value. */ +#elif defined(_MSC_VER) +# define U_EXPORT2 __cdecl +#else +# define U_EXPORT2 +#endif + +#ifdef U_IMPORT + /* Use the predefined value. */ +#elif defined(_MSC_VER) || (UPRV_HAS_DECLSPEC_ATTRIBUTE(dllexport) && \ + UPRV_HAS_DECLSPEC_ATTRIBUTE(dllimport)) + /* Windows needs to export/import data. */ +# define U_IMPORT __declspec(dllimport) +#else +# define U_IMPORT +#endif + +/** + * \def U_CALLCONV + * Similar to U_CDECL_BEGIN/U_CDECL_END, this qualifier is necessary + * in callback function typedefs to make sure that the calling convention + * is compatible. + * + * This is only used for non-ICU-API functions. + * When a function is a public ICU API, + * you must use the U_CAPI and U_EXPORT2 qualifiers. + * + * Please note, you need to use U_CALLCONV after the *. + * + * NO : "static const char U_CALLCONV *func( . . . )" + * YES: "static const char* U_CALLCONV func( . . . )" + * + * @stable ICU 2.0 + */ +#if U_PLATFORM == U_PF_OS390 && defined(__cplusplus) +# define U_CALLCONV __cdecl +#else +# define U_CALLCONV U_EXPORT2 +#endif + +/** + * \def U_CALLCONV_FPTR + * Similar to U_CALLCONV, but only used on function pointers. + * @internal + */ +#if U_PLATFORM == U_PF_OS390 && defined(__cplusplus) +# define U_CALLCONV_FPTR U_CALLCONV +#else +# define U_CALLCONV_FPTR +#endif +/* @} */ + +#endif // _PLATFORM_H diff --git a/thirdparty/icu4c/common/unicode/ptypes.h b/thirdparty/icu4c/common/unicode/ptypes.h new file mode 100644 index 0000000000..70324ffee3 --- /dev/null +++ b/thirdparty/icu4c/common/unicode/ptypes.h @@ -0,0 +1,130 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1997-2012, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* +* FILE NAME : ptypes.h +* +* Date Name Description +* 05/13/98 nos Creation (content moved here from ptypes.h). +* 03/02/99 stephen Added AS400 support. +* 03/30/99 stephen Added Linux support. +* 04/13/99 stephen Reworked for autoconf. +* 09/18/08 srl Moved basic types back to ptypes.h from platform.h +****************************************************************************** +*/ + +/** + * \file + * \brief C API: Definitions of integer types of various widths + */ + +#ifndef _PTYPES_H +#define _PTYPES_H + +/** + * \def __STDC_LIMIT_MACROS + * According to the Linux stdint.h, the ISO C99 standard specifies that in C++ implementations + * macros like INT32_MIN and UINTPTR_MAX should only be defined if explicitly requested. + * We need to define __STDC_LIMIT_MACROS before including stdint.h in C++ code + * that uses such limit macros. + * @internal + */ +#ifndef __STDC_LIMIT_MACROS +#define __STDC_LIMIT_MACROS +#endif + +/* NULL, size_t, wchar_t */ +#include <stddef.h> + +/* + * If all compilers provided all of the C99 headers and types, + * we would just unconditionally #include <stdint.h> here + * and not need any of the stuff after including platform.h. + */ + +/* Find out if we have stdint.h etc. */ +#include "unicode/platform.h" + +/*===========================================================================*/ +/* Generic data types */ +/*===========================================================================*/ + +/* If your platform does not have the <stdint.h> header, you may + need to edit the typedefs in the #else section below. + Use #if...#else...#endif with predefined compiler macros if possible. */ +#if U_HAVE_STDINT_H + +/* + * We mostly need <stdint.h> (which defines the standard integer types) but not <inttypes.h>. + * <inttypes.h> includes <stdint.h> and adds the printf/scanf helpers PRId32, SCNx16 etc. + * which we almost never use, plus stuff like imaxabs() which we never use. + */ +#include <stdint.h> + +#if U_PLATFORM == U_PF_OS390 +/* The features header is needed to get (u)int64_t sometimes. */ +#include <features.h> +/* z/OS has <stdint.h>, but some versions are missing uint8_t (APAR PK62248). */ +#if !defined(__uint8_t) +#define __uint8_t 1 +typedef unsigned char uint8_t; +#endif +#endif /* U_PLATFORM == U_PF_OS390 */ + +#elif U_HAVE_INTTYPES_H + +# include <inttypes.h> + +#else /* neither U_HAVE_STDINT_H nor U_HAVE_INTTYPES_H */ + +/// \cond +#if ! U_HAVE_INT8_T +typedef signed char int8_t; +#endif + +#if ! U_HAVE_UINT8_T +typedef unsigned char uint8_t; +#endif + +#if ! U_HAVE_INT16_T +typedef signed short int16_t; +#endif + +#if ! U_HAVE_UINT16_T +typedef unsigned short uint16_t; +#endif + +#if ! U_HAVE_INT32_T +typedef signed int int32_t; +#endif + +#if ! U_HAVE_UINT32_T +typedef unsigned int uint32_t; +#endif + +#if ! U_HAVE_INT64_T +#ifdef _MSC_VER + typedef signed __int64 int64_t; +#else + typedef signed long long int64_t; +#endif +#endif + +#if ! U_HAVE_UINT64_T +#ifdef _MSC_VER + typedef unsigned __int64 uint64_t; +#else + typedef unsigned long long uint64_t; +#endif +#endif +/// \endcond + +#endif /* U_HAVE_STDINT_H / U_HAVE_INTTYPES_H */ + +#endif /* _PTYPES_H */ diff --git a/thirdparty/icu4c/common/unicode/putil.h b/thirdparty/icu4c/common/unicode/putil.h new file mode 100644 index 0000000000..500c21252f --- /dev/null +++ b/thirdparty/icu4c/common/unicode/putil.h @@ -0,0 +1,183 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1997-2014, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* +* FILE NAME : putil.h +* +* Date Name Description +* 05/14/98 nos Creation (content moved here from utypes.h). +* 06/17/99 erm Added IEEE_754 +* 07/22/98 stephen Added IEEEremainder, max, min, trunc +* 08/13/98 stephen Added isNegativeInfinity, isPositiveInfinity +* 08/24/98 stephen Added longBitsFromDouble +* 03/02/99 stephen Removed openFile(). Added AS400 support. +* 04/15/99 stephen Converted to C +* 11/15/99 helena Integrated S/390 changes for IEEE support. +* 01/11/00 helena Added u_getVersion. +****************************************************************************** +*/ + +#ifndef PUTIL_H +#define PUTIL_H + +#include "unicode/utypes.h" + /** + * \file + * \brief C API: Platform Utilities + */ + +/*==========================================================================*/ +/* Platform utilities */ +/*==========================================================================*/ + +/** + * Platform utilities isolates the platform dependencies of the + * library. For each platform which this code is ported to, these + * functions may have to be re-implemented. + */ + +/** + * Return the ICU data directory. + * The data directory is where common format ICU data files (.dat files) + * are loaded from. Note that normal use of the built-in ICU + * facilities does not require loading of an external data file; + * unless you are adding custom data to ICU, the data directory + * does not need to be set. + * + * The data directory is determined as follows: + * If u_setDataDirectory() has been called, that is it, otherwise + * if the ICU_DATA environment variable is set, use that, otherwise + * If a data directory was specified at ICU build time + * <code> + * \code + * #define ICU_DATA_DIR "path" + * \endcode + * </code> use that, + * otherwise no data directory is available. + * + * @return the data directory, or an empty string ("") if no data directory has + * been specified. + * + * @stable ICU 2.0 + */ +U_CAPI const char* U_EXPORT2 u_getDataDirectory(void); + + +/** + * Set the ICU data directory. + * The data directory is where common format ICU data files (.dat files) + * are loaded from. Note that normal use of the built-in ICU + * facilities does not require loading of an external data file; + * unless you are adding custom data to ICU, the data directory + * does not need to be set. + * + * This function should be called at most once in a process, before the + * first ICU operation (e.g., u_init()) that will require the loading of an + * ICU data file. + * This function is not thread-safe. Use it before calling ICU APIs from + * multiple threads. + * + * @param directory The directory to be set. + * + * @see u_init + * @stable ICU 2.0 + */ +U_CAPI void U_EXPORT2 u_setDataDirectory(const char *directory); + +#ifndef U_HIDE_INTERNAL_API +/** + * Return the time zone files override directory, or an empty string if + * no directory was specified. Certain time zone resources will be preferentially + * loaded from individual files in this directory. + * + * @return the time zone data override directory. + * @internal + */ +U_CAPI const char * U_EXPORT2 u_getTimeZoneFilesDirectory(UErrorCode *status); + +/** + * Set the time zone files override directory. + * This function is not thread safe; it must not be called concurrently with + * u_getTimeZoneFilesDirectory() or any other use of ICU time zone functions. + * This function should only be called before using any ICU service that + * will access the time zone data. + * @internal + */ +U_CAPI void U_EXPORT2 u_setTimeZoneFilesDirectory(const char *path, UErrorCode *status); +#endif /* U_HIDE_INTERNAL_API */ + + +/** + * @{ + * Filesystem file and path separator characters. + * Example: '/' and ':' on Unix, '\\' and ';' on Windows. + * @stable ICU 2.0 + */ +#if U_PLATFORM_USES_ONLY_WIN32_API +# define U_FILE_SEP_CHAR '\\' +# define U_FILE_ALT_SEP_CHAR '/' +# define U_PATH_SEP_CHAR ';' +# define U_FILE_SEP_STRING "\\" +# define U_FILE_ALT_SEP_STRING "/" +# define U_PATH_SEP_STRING ";" +#else +# define U_FILE_SEP_CHAR '/' +# define U_FILE_ALT_SEP_CHAR '/' +# define U_PATH_SEP_CHAR ':' +# define U_FILE_SEP_STRING "/" +# define U_FILE_ALT_SEP_STRING "/" +# define U_PATH_SEP_STRING ":" +#endif + +/** @} */ + +/** + * Convert char characters to UChar characters. + * This utility function is useful only for "invariant characters" + * that are encoded in the platform default encoding. + * They are a small, constant subset of the encoding and include + * just the latin letters, digits, and some punctuation. + * For details, see U_CHARSET_FAMILY. + * + * @param cs Input string, points to <code>length</code> + * character bytes from a subset of the platform encoding. + * @param us Output string, points to memory for <code>length</code> + * Unicode characters. + * @param length The number of characters to convert; this may + * include the terminating <code>NUL</code>. + * + * @see U_CHARSET_FAMILY + * @stable ICU 2.0 + */ +U_CAPI void U_EXPORT2 +u_charsToUChars(const char *cs, UChar *us, int32_t length); + +/** + * Convert UChar characters to char characters. + * This utility function is useful only for "invariant characters" + * that can be encoded in the platform default encoding. + * They are a small, constant subset of the encoding and include + * just the latin letters, digits, and some punctuation. + * For details, see U_CHARSET_FAMILY. + * + * @param us Input string, points to <code>length</code> + * Unicode characters that can be encoded with the + * codepage-invariant subset of the platform encoding. + * @param cs Output string, points to memory for <code>length</code> + * character bytes. + * @param length The number of characters to convert; this may + * include the terminating <code>NUL</code>. + * + * @see U_CHARSET_FAMILY + * @stable ICU 2.0 + */ +U_CAPI void U_EXPORT2 +u_UCharsToChars(const UChar *us, char *cs, int32_t length); + +#endif diff --git a/thirdparty/icu4c/common/unicode/rbbi.h b/thirdparty/icu4c/common/unicode/rbbi.h new file mode 100644 index 0000000000..65117f616c --- /dev/null +++ b/thirdparty/icu4c/common/unicode/rbbi.h @@ -0,0 +1,732 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +*************************************************************************** +* Copyright (C) 1999-2016 International Business Machines Corporation * +* and others. All rights reserved. * +*************************************************************************** + +********************************************************************** +* Date Name Description +* 10/22/99 alan Creation. +* 11/11/99 rgillam Complete port from Java. +********************************************************************** +*/ + +#ifndef RBBI_H +#define RBBI_H + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + +/** + * \file + * \brief C++ API: Rule Based Break Iterator + */ + +#if !UCONFIG_NO_BREAK_ITERATION + +#include "unicode/brkiter.h" +#include "unicode/udata.h" +#include "unicode/parseerr.h" +#include "unicode/schriter.h" + +struct UCPTrie; + +U_NAMESPACE_BEGIN + +/** @internal */ +class LanguageBreakEngine; +struct RBBIDataHeader; +class RBBIDataWrapper; +class UnhandledEngine; +class UStack; + +/** + * + * A subclass of BreakIterator whose behavior is specified using a list of rules. + * <p>Instances of this class are most commonly created by the factory methods of + * BreakIterator::createWordInstance(), BreakIterator::createLineInstance(), etc., + * and then used via the abstract API in class BreakIterator</p> + * + * <p>See the ICU User Guide for information on Break Iterator Rules.</p> + * + * <p>This class is not intended to be subclassed.</p> + */ +class U_COMMON_API RuleBasedBreakIterator /*U_FINAL*/ : public BreakIterator { + +private: + /** + * The UText through which this BreakIterator accesses the text + * @internal (private) + */ + UText fText; + +#ifndef U_HIDE_INTERNAL_API +public: +#endif /* U_HIDE_INTERNAL_API */ + /** + * The rule data for this BreakIterator instance. + * Not for general use; Public only for testing purposes. + * @internal + */ + RBBIDataWrapper *fData; +private: + + /** + * The current position of the iterator. Pinned, 0 < fPosition <= text.length. + * Never has the value UBRK_DONE (-1). + */ + int32_t fPosition; + + /** + * TODO: + */ + int32_t fRuleStatusIndex; + + /** + * Cache of previously determined boundary positions. + */ + class BreakCache; + BreakCache *fBreakCache; + + /** + * Cache of boundary positions within a region of text that has been + * sub-divided by dictionary based breaking. + */ + class DictionaryCache; + DictionaryCache *fDictionaryCache; + + /** + * + * If present, UStack of LanguageBreakEngine objects that might handle + * dictionary characters. Searched from top to bottom to find an object to + * handle a given character. + * @internal (private) + */ + UStack *fLanguageBreakEngines; + + /** + * + * If present, the special LanguageBreakEngine used for handling + * characters that are in the dictionary set, but not handled by any + * LanguageBreakEngine. + * @internal (private) + */ + UnhandledEngine *fUnhandledBreakEngine; + + /** + * Counter for the number of characters encountered with the "dictionary" + * flag set. + * @internal (private) + */ + uint32_t fDictionaryCharCount; + + /** + * A character iterator that refers to the same text as the UText, above. + * Only included for compatibility with old API, which was based on CharacterIterators. + * Value may be adopted from outside, or one of fSCharIter or fDCharIter, below. + */ + CharacterIterator *fCharIter; + + /** + * When the input text is provided by a UnicodeString, this will point to + * a characterIterator that wraps that data. Needed only for the + * implementation of getText(), a backwards compatibility issue. + */ + StringCharacterIterator fSCharIter; + + /** + * True when iteration has run off the end, and iterator functions should return UBRK_DONE. + */ + UBool fDone; + + /** + * Array of look-ahead tentative results. + */ + int32_t *fLookAheadMatches; + + //======================================================================= + // constructors + //======================================================================= + + /** + * Constructor from a flattened set of RBBI data in malloced memory. + * RulesBasedBreakIterators built from a custom set of rules + * are created via this constructor; the rules are compiled + * into memory, then the break iterator is constructed here. + * + * The break iterator adopts the memory, and will + * free it when done. + * @internal (private) + */ + RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status); + + /** @internal */ + friend class RBBIRuleBuilder; + /** @internal */ + friend class BreakIterator; + +public: + + /** Default constructor. Creates an empty shell of an iterator, with no + * rules or text to iterate over. Object can subsequently be assigned to. + * @stable ICU 2.2 + */ + RuleBasedBreakIterator(); + + /** + * Copy constructor. Will produce a break iterator with the same behavior, + * and which iterates over the same text, as the one passed in. + * @param that The RuleBasedBreakIterator passed to be copied + * @stable ICU 2.0 + */ + RuleBasedBreakIterator(const RuleBasedBreakIterator& that); + + /** + * Construct a RuleBasedBreakIterator from a set of rules supplied as a string. + * @param rules The break rules to be used. + * @param parseError In the event of a syntax error in the rules, provides the location + * within the rules of the problem. + * @param status Information on any errors encountered. + * @stable ICU 2.2 + */ + RuleBasedBreakIterator( const UnicodeString &rules, + UParseError &parseError, + UErrorCode &status); + + /** + * Construct a RuleBasedBreakIterator from a set of precompiled binary rules. + * Binary rules are obtained from RulesBasedBreakIterator::getBinaryRules(). + * Construction of a break iterator in this way is substantially faster than + * construction from source rules. + * + * Ownership of the storage containing the compiled rules remains with the + * caller of this function. The compiled rules must not be modified or + * deleted during the life of the break iterator. + * + * The compiled rules are not compatible across different major versions of ICU. + * The compiled rules are compatible only between machines with the same + * byte ordering (little or big endian) and the same base character set family + * (ASCII or EBCDIC). + * + * @see #getBinaryRules + * @param compiledRules A pointer to the compiled break rules to be used. + * @param ruleLength The length of the compiled break rules, in bytes. This + * corresponds to the length value produced by getBinaryRules(). + * @param status Information on any errors encountered, including invalid + * binary rules. + * @stable ICU 4.8 + */ + RuleBasedBreakIterator(const uint8_t *compiledRules, + uint32_t ruleLength, + UErrorCode &status); + + /** + * This constructor uses the udata interface to create a BreakIterator + * whose internal tables live in a memory-mapped file. "image" is an + * ICU UDataMemory handle for the pre-compiled break iterator tables. + * @param image handle to the memory image for the break iterator data. + * Ownership of the UDataMemory handle passes to the Break Iterator, + * which will be responsible for closing it when it is no longer needed. + * @param status Information on any errors encountered. + * @see udata_open + * @see #getBinaryRules + * @stable ICU 2.8 + */ + RuleBasedBreakIterator(UDataMemory* image, UErrorCode &status); + + /** + * Destructor + * @stable ICU 2.0 + */ + virtual ~RuleBasedBreakIterator(); + + /** + * Assignment operator. Sets this iterator to have the same behavior, + * and iterate over the same text, as the one passed in. + * @param that The RuleBasedBreakItertor passed in + * @return the newly created RuleBasedBreakIterator + * @stable ICU 2.0 + */ + RuleBasedBreakIterator& operator=(const RuleBasedBreakIterator& that); + + /** + * Equality operator. Returns true if both BreakIterators are of the + * same class, have the same behavior, and iterate over the same text. + * @param that The BreakIterator to be compared for equality + * @return true if both BreakIterators are of the + * same class, have the same behavior, and iterate over the same text. + * @stable ICU 2.0 + */ + virtual UBool operator==(const BreakIterator& that) const; + + /** + * Not-equal operator. If operator== returns true, this returns false, + * and vice versa. + * @param that The BreakIterator to be compared for inequality + * @return true if both BreakIterators are not same. + * @stable ICU 2.0 + */ + inline UBool operator!=(const BreakIterator& that) const; + + /** + * Returns a newly-constructed RuleBasedBreakIterator with the same + * behavior, and iterating over the same text, as this one. + * Differs from the copy constructor in that it is polymorphic, and + * will correctly clone (copy) a derived class. + * clone() is thread safe. Multiple threads may simultaneously + * clone the same source break iterator. + * @return a newly-constructed RuleBasedBreakIterator + * @stable ICU 2.0 + */ + virtual RuleBasedBreakIterator* clone() const; + + /** + * Compute a hash code for this BreakIterator + * @return A hash code + * @stable ICU 2.0 + */ + virtual int32_t hashCode(void) const; + + /** + * Returns the description used to create this iterator + * @return the description used to create this iterator + * @stable ICU 2.0 + */ + virtual const UnicodeString& getRules(void) const; + + //======================================================================= + // BreakIterator overrides + //======================================================================= + + /** + * <p> + * Return a CharacterIterator over the text being analyzed. + * The returned character iterator is owned by the break iterator, and must + * not be deleted by the caller. Repeated calls to this function may + * return the same CharacterIterator. + * </p> + * <p> + * The returned character iterator must not be used concurrently with + * the break iterator. If concurrent operation is needed, clone the + * returned character iterator first and operate on the clone. + * </p> + * <p> + * When the break iterator is operating on text supplied via a UText, + * this function will fail. Lacking any way to signal failures, it + * returns an CharacterIterator containing no text. + * The function getUText() provides similar functionality, + * is reliable, and is more efficient. + * </p> + * + * TODO: deprecate this function? + * + * @return An iterator over the text being analyzed. + * @stable ICU 2.0 + */ + virtual CharacterIterator& getText(void) const; + + + /** + * Get a UText for the text being analyzed. + * The returned UText is a shallow clone of the UText used internally + * by the break iterator implementation. It can safely be used to + * access the text without impacting any break iterator operations, + * but the underlying text itself must not be altered. + * + * @param fillIn A UText to be filled in. If NULL, a new UText will be + * allocated to hold the result. + * @param status receives any error codes. + * @return The current UText for this break iterator. If an input + * UText was provided, it will always be returned. + * @stable ICU 3.4 + */ + virtual UText *getUText(UText *fillIn, UErrorCode &status) const; + + /** + * Set the iterator to analyze a new piece of text. This function resets + * the current iteration position to the beginning of the text. + * @param newText An iterator over the text to analyze. The BreakIterator + * takes ownership of the character iterator. The caller MUST NOT delete it! + * @stable ICU 2.0 + */ + virtual void adoptText(CharacterIterator* newText); + + /** + * Set the iterator to analyze a new piece of text. This function resets + * the current iteration position to the beginning of the text. + * + * The BreakIterator will retain a reference to the supplied string. + * The caller must not modify or delete the text while the BreakIterator + * retains the reference. + * + * @param newText The text to analyze. + * @stable ICU 2.0 + */ + virtual void setText(const UnicodeString& newText); + + /** + * Reset the break iterator to operate over the text represented by + * the UText. The iterator position is reset to the start. + * + * This function makes a shallow clone of the supplied UText. This means + * that the caller is free to immediately close or otherwise reuse the + * Utext that was passed as a parameter, but that the underlying text itself + * must not be altered while being referenced by the break iterator. + * + * @param text The UText used to change the text. + * @param status Receives any error codes. + * @stable ICU 3.4 + */ + virtual void setText(UText *text, UErrorCode &status); + + /** + * Sets the current iteration position to the beginning of the text, position zero. + * @return The offset of the beginning of the text, zero. + * @stable ICU 2.0 + */ + virtual int32_t first(void); + + /** + * Sets the current iteration position to the end of the text. + * @return The text's past-the-end offset. + * @stable ICU 2.0 + */ + virtual int32_t last(void); + + /** + * Advances the iterator either forward or backward the specified number of steps. + * Negative values move backward, and positive values move forward. This is + * equivalent to repeatedly calling next() or previous(). + * @param n The number of steps to move. The sign indicates the direction + * (negative is backwards, and positive is forwards). + * @return The character offset of the boundary position n boundaries away from + * the current one. + * @stable ICU 2.0 + */ + virtual int32_t next(int32_t n); + + /** + * Advances the iterator to the next boundary position. + * @return The position of the first boundary after this one. + * @stable ICU 2.0 + */ + virtual int32_t next(void); + + /** + * Moves the iterator backwards, to the last boundary preceding this one. + * @return The position of the last boundary position preceding this one. + * @stable ICU 2.0 + */ + virtual int32_t previous(void); + + /** + * Sets the iterator to refer to the first boundary position following + * the specified position. + * @param offset The position from which to begin searching for a break position. + * @return The position of the first break after the current position. + * @stable ICU 2.0 + */ + virtual int32_t following(int32_t offset); + + /** + * Sets the iterator to refer to the last boundary position before the + * specified position. + * @param offset The position to begin searching for a break from. + * @return The position of the last boundary before the starting position. + * @stable ICU 2.0 + */ + virtual int32_t preceding(int32_t offset); + + /** + * Returns true if the specified position is a boundary position. As a side + * effect, leaves the iterator pointing to the first boundary position at + * or after "offset". + * @param offset the offset to check. + * @return True if "offset" is a boundary position. + * @stable ICU 2.0 + */ + virtual UBool isBoundary(int32_t offset); + + /** + * Returns the current iteration position. Note that UBRK_DONE is never + * returned from this function; if iteration has run to the end of a + * string, current() will return the length of the string while + * next() will return UBRK_DONE). + * @return The current iteration position. + * @stable ICU 2.0 + */ + virtual int32_t current(void) const; + + + /** + * Return the status tag from the break rule that determined the boundary at + * the current iteration position. For break rules that do not specify a + * status, a default value of 0 is returned. If more than one break rule + * would cause a boundary to be located at some position in the text, + * the numerically largest of the applicable status values is returned. + * <p> + * Of the standard types of ICU break iterators, only word break and + * line break provide status values. The values are defined in + * the header file ubrk.h. For Word breaks, the status allows distinguishing between words + * that contain alphabetic letters, "words" that appear to be numbers, + * punctuation and spaces, words containing ideographic characters, and + * more. For Line Break, the status distinguishes between hard (mandatory) breaks + * and soft (potential) break positions. + * <p> + * <code>getRuleStatus()</code> can be called after obtaining a boundary + * position from <code>next()</code>, <code>previous()</code>, or + * any other break iterator functions that returns a boundary position. + * <p> + * Note that <code>getRuleStatus()</code> returns the value corresponding to + * <code>current()</code> index even after <code>next()</code> has returned DONE. + * <p> + * When creating custom break rules, one is free to define whatever + * status values may be convenient for the application. + * <p> + * @return the status from the break rule that determined the boundary + * at the current iteration position. + * + * @see UWordBreak + * @stable ICU 2.2 + */ + virtual int32_t getRuleStatus() const; + + /** + * Get the status (tag) values from the break rule(s) that determined the boundary + * at the current iteration position. + * <p> + * The returned status value(s) are stored into an array provided by the caller. + * The values are stored in sorted (ascending) order. + * If the capacity of the output array is insufficient to hold the data, + * the output will be truncated to the available length, and a + * U_BUFFER_OVERFLOW_ERROR will be signaled. + * + * @param fillInVec an array to be filled in with the status values. + * @param capacity the length of the supplied vector. A length of zero causes + * the function to return the number of status values, in the + * normal way, without attempting to store any values. + * @param status receives error codes. + * @return The number of rule status values from the rules that determined + * the boundary at the current iteration position. + * In the event of a U_BUFFER_OVERFLOW_ERROR, the return value + * is the total number of status values that were available, + * not the reduced number that were actually returned. + * @see getRuleStatus + * @stable ICU 3.0 + */ + virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status); + + /** + * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. + * This method is to implement a simple version of RTTI, since not all + * C++ compilers support genuine RTTI. Polymorphic operator==() and + * clone() methods call this method. + * + * @return The class ID for this object. All objects of a + * given class have the same class ID. Objects of + * other classes have different class IDs. + * @stable ICU 2.0 + */ + virtual UClassID getDynamicClassID(void) const; + + /** + * Returns the class ID for this class. This is useful only for + * comparing to a return value from getDynamicClassID(). For example: + * + * Base* polymorphic_pointer = createPolymorphicObject(); + * if (polymorphic_pointer->getDynamicClassID() == + * Derived::getStaticClassID()) ... + * + * @return The class ID for all objects of this class. + * @stable ICU 2.0 + */ + static UClassID U_EXPORT2 getStaticClassID(void); + +#ifndef U_FORCE_HIDE_DEPRECATED_API + /** + * Deprecated functionality. Use clone() instead. + * + * Create a clone (copy) of this break iterator in memory provided + * by the caller. The idea is to increase performance by avoiding + * a storage allocation. Use of this function is NOT RECOMMENDED. + * Performance gains are minimal, and correct buffer management is + * tricky. Use clone() instead. + * + * @param stackBuffer The pointer to the memory into which the cloned object + * should be placed. If NULL, allocate heap memory + * for the cloned object. + * @param BufferSize The size of the buffer. If zero, return the required + * buffer size, but do not clone the object. If the + * size was too small (but not zero), allocate heap + * storage for the cloned object. + * + * @param status Error status. U_SAFECLONE_ALLOCATED_WARNING will be + * returned if the provided buffer was too small, and + * the clone was therefore put on the heap. + * + * @return Pointer to the clone object. This may differ from the stackBuffer + * address if the byte alignment of the stack buffer was not suitable + * or if the stackBuffer was too small to hold the clone. + * @deprecated ICU 52. Use clone() instead. + */ + virtual RuleBasedBreakIterator *createBufferClone(void *stackBuffer, + int32_t &BufferSize, + UErrorCode &status); +#endif // U_FORCE_HIDE_DEPRECATED_API + + /** + * Return the binary form of compiled break rules, + * which can then be used to create a new break iterator at some + * time in the future. Creating a break iterator from pre-compiled rules + * is much faster than building one from the source form of the + * break rules. + * + * The binary data can only be used with the same version of ICU + * and on the same platform type (processor endian-ness) + * + * @param length Returns the length of the binary data. (Out parameter.) + * + * @return A pointer to the binary (compiled) rule data. The storage + * belongs to the RulesBasedBreakIterator object, not the + * caller, and must not be modified or deleted. + * @stable ICU 4.8 + */ + virtual const uint8_t *getBinaryRules(uint32_t &length); + + /** + * Set the subject text string upon which the break iterator is operating + * without changing any other aspect of the matching state. + * The new and previous text strings must have the same content. + * + * This function is intended for use in environments where ICU is operating on + * strings that may move around in memory. It provides a mechanism for notifying + * ICU that the string has been relocated, and providing a new UText to access the + * string in its new position. + * + * Note that the break iterator implementation never copies the underlying text + * of a string being processed, but always operates directly on the original text + * provided by the user. Refreshing simply drops the references to the old text + * and replaces them with references to the new. + * + * Caution: this function is normally used only by very specialized, + * system-level code. One example use case is with garbage collection that moves + * the text in memory. + * + * @param input The new (moved) text string. + * @param status Receives errors detected by this function. + * @return *this + * + * @stable ICU 49 + */ + virtual RuleBasedBreakIterator &refreshInputText(UText *input, UErrorCode &status); + + +private: + //======================================================================= + // implementation + //======================================================================= + /** + * Dumps caches and performs other actions associated with a complete change + * in text or iteration position. + * @internal (private) + */ + void reset(void); + + /** + * Common initialization function, used by constructors and bufferClone. + * @internal (private) + */ + void init(UErrorCode &status); + + /** + * Iterate backwards from an arbitrary position in the input text using the + * synthesized Safe Reverse rules. + * This locates a "Safe Position" from which the forward break rules + * will operate correctly. A Safe Position is not necessarily a boundary itself. + * + * @param fromPosition the position in the input text to begin the iteration. + * @internal (private) + */ + int32_t handleSafePrevious(int32_t fromPosition); + + /** + * Find a rule-based boundary by running the state machine. + * Input + * fPosition, the position in the text to begin from. + * Output + * fPosition: the boundary following the starting position. + * fDictionaryCharCount the number of dictionary characters encountered. + * If > 0, the segment will be further subdivided + * fRuleStatusIndex Info from the state table indicating which rules caused the boundary. + * + * @internal (private) + */ + int32_t handleNext(); + + /* + * Templatized version of handleNext() and handleSafePrevious(). + * + * There will be exactly four instantiations, two each for 8 and 16 bit tables, + * two each for 8 and 16 bit trie. + * Having separate instantiations for the table types keeps conditional tests of + * the table type out of the inner loops, at the expense of replicated code. + * + * The template parameter for the Trie access function is a value, not a type. + * Doing it this way, the compiler will inline the Trie function in the + * expanded functions. (Both the 8 and 16 bit access functions have the same type + * signature) + */ + + typedef uint16_t (*PTrieFunc)(const UCPTrie *, UChar32); + + template<typename RowType, PTrieFunc trieFunc> + int32_t handleSafePrevious(int32_t fromPosition); + + template<typename RowType, PTrieFunc trieFunc> + int32_t handleNext(); + + + /** + * This function returns the appropriate LanguageBreakEngine for a + * given character c. + * @param c A character in the dictionary set + * @internal (private) + */ + const LanguageBreakEngine *getLanguageBreakEngine(UChar32 c); + + public: +#ifndef U_HIDE_INTERNAL_API + /** + * Debugging function only. + * @internal + */ + void dumpCache(); + + /** + * Debugging function only. + * @internal + */ + void dumpTables(); +#endif /* U_HIDE_INTERNAL_API */ +}; + +//------------------------------------------------------------------------------ +// +// Inline Functions Definitions ... +// +//------------------------------------------------------------------------------ + +inline UBool RuleBasedBreakIterator::operator!=(const BreakIterator& that) const { + return !operator==(that); +} + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ + +#endif /* U_SHOW_CPLUSPLUS_API */ + +#endif diff --git a/thirdparty/icu4c/common/unicode/rep.h b/thirdparty/icu4c/common/unicode/rep.h new file mode 100644 index 0000000000..6dd4530647 --- /dev/null +++ b/thirdparty/icu4c/common/unicode/rep.h @@ -0,0 +1,266 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +************************************************************************** +* Copyright (C) 1999-2012, International Business Machines Corporation and +* others. All Rights Reserved. +************************************************************************** +* Date Name Description +* 11/17/99 aliu Creation. Ported from java. Modified to +* match current UnicodeString API. Forced +* to use name "handleReplaceBetween" because +* of existing methods in UnicodeString. +************************************************************************** +*/ + +#ifndef REP_H +#define REP_H + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + +#include "unicode/uobject.h" + +/** + * \file + * \brief C++ API: Replaceable String + */ + +U_NAMESPACE_BEGIN + +class UnicodeString; + +/** + * <code>Replaceable</code> is an abstract base class representing a + * string of characters that supports the replacement of a range of + * itself with a new string of characters. It is used by APIs that + * change a piece of text while retaining metadata. Metadata is data + * other than the Unicode characters returned by char32At(). One + * example of metadata is style attributes; another is an edit + * history, marking each character with an author and revision number. + * + * <p>An implicit aspect of the <code>Replaceable</code> API is that + * during a replace operation, new characters take on the metadata of + * the old characters. For example, if the string "the <b>bold</b> + * font" has range (4, 8) replaced with "strong", then it becomes "the + * <b>strong</b> font". + * + * <p><code>Replaceable</code> specifies ranges using a start + * offset and a limit offset. The range of characters thus specified + * includes the characters at offset start..limit-1. That is, the + * start offset is inclusive, and the limit offset is exclusive. + * + * <p><code>Replaceable</code> also includes API to access characters + * in the string: <code>length()</code>, <code>charAt()</code>, + * <code>char32At()</code>, and <code>extractBetween()</code>. + * + * <p>For a subclass to support metadata, typical behavior of + * <code>replace()</code> is the following: + * <ul> + * <li>Set the metadata of the new text to the metadata of the first + * character replaced</li> + * <li>If no characters are replaced, use the metadata of the + * previous character</li> + * <li>If there is no previous character (i.e. start == 0), use the + * following character</li> + * <li>If there is no following character (i.e. the replaceable was + * empty), use default metadata.<br> + * <li>If the code point U+FFFF is seen, it should be interpreted as + * a special marker having no metadata<li> + * </li> + * </ul> + * If this is not the behavior, the subclass should document any differences. + * @author Alan Liu + * @stable ICU 2.0 + */ +class U_COMMON_API Replaceable : public UObject { + +public: + /** + * Destructor. + * @stable ICU 2.0 + */ + virtual ~Replaceable(); + + /** + * Returns the number of 16-bit code units in the text. + * @return number of 16-bit code units in text + * @stable ICU 1.8 + */ + inline int32_t length() const; + + /** + * Returns the 16-bit code unit at the given offset into the text. + * @param offset an integer between 0 and <code>length()</code>-1 + * inclusive + * @return 16-bit code unit of text at given offset + * @stable ICU 1.8 + */ + inline char16_t charAt(int32_t offset) const; + + /** + * Returns the 32-bit code point at the given 16-bit offset into + * the text. This assumes the text is stored as 16-bit code units + * with surrogate pairs intermixed. If the offset of a leading or + * trailing code unit of a surrogate pair is given, return the + * code point of the surrogate pair. + * + * @param offset an integer between 0 and <code>length()</code>-1 + * inclusive + * @return 32-bit code point of text at given offset + * @stable ICU 1.8 + */ + inline UChar32 char32At(int32_t offset) const; + + /** + * Copies characters in the range [<tt>start</tt>, <tt>limit</tt>) + * into the UnicodeString <tt>target</tt>. + * @param start offset of first character which will be copied + * @param limit offset immediately following the last character to + * be copied + * @param target UnicodeString into which to copy characters. + * @return A reference to <TT>target</TT> + * @stable ICU 2.1 + */ + virtual void extractBetween(int32_t start, + int32_t limit, + UnicodeString& target) const = 0; + + /** + * Replaces a substring of this object with the given text. If the + * characters being replaced have metadata, the new characters + * that replace them should be given the same metadata. + * + * <p>Subclasses must ensure that if the text between start and + * limit is equal to the replacement text, that replace has no + * effect. That is, any metadata + * should be unaffected. In addition, subclasses are encouraged to + * check for initial and trailing identical characters, and make a + * smaller replacement if possible. This will preserve as much + * metadata as possible. + * @param start the beginning index, inclusive; <code>0 <= start + * <= limit</code>. + * @param limit the ending index, exclusive; <code>start <= limit + * <= length()</code>. + * @param text the text to replace characters <code>start</code> + * to <code>limit - 1</code> + * @stable ICU 2.0 + */ + virtual void handleReplaceBetween(int32_t start, + int32_t limit, + const UnicodeString& text) = 0; + // Note: All other methods in this class take the names of + // existing UnicodeString methods. This method is the exception. + // It is named differently because all replace methods of + // UnicodeString return a UnicodeString&. The 'between' is + // required in order to conform to the UnicodeString naming + // convention; API taking start/length are named <operation>, and + // those taking start/limit are named <operationBetween>. The + // 'handle' is added because 'replaceBetween' and + // 'doReplaceBetween' are already taken. + + /** + * Copies a substring of this object, retaining metadata. + * This method is used to duplicate or reorder substrings. + * The destination index must not overlap the source range. + * + * @param start the beginning index, inclusive; <code>0 <= start <= + * limit</code>. + * @param limit the ending index, exclusive; <code>start <= limit <= + * length()</code>. + * @param dest the destination index. The characters from + * <code>start..limit-1</code> will be copied to <code>dest</code>. + * Implementations of this method may assume that <code>dest <= start || + * dest >= limit</code>. + * @stable ICU 2.0 + */ + virtual void copy(int32_t start, int32_t limit, int32_t dest) = 0; + + /** + * Returns true if this object contains metadata. If a + * Replaceable object has metadata, calls to the Replaceable API + * must be made so as to preserve metadata. If it does not, calls + * to the Replaceable API may be optimized to improve performance. + * The default implementation returns true. + * @return true if this object contains metadata + * @stable ICU 2.2 + */ + virtual UBool hasMetaData() const; + + /** + * Clone this object, an instance of a subclass of Replaceable. + * Clones can be used concurrently in multiple threads. + * If a subclass does not implement clone(), or if an error occurs, + * then NULL is returned. + * The caller must delete the clone. + * + * @return a clone of this object + * + * @see getDynamicClassID + * @stable ICU 2.6 + */ + virtual Replaceable *clone() const; + +protected: + + /** + * Default constructor. + * @stable ICU 2.4 + */ + inline Replaceable(); + + /* + * Assignment operator not declared. The compiler will provide one + * which does nothing since this class does not contain any data members. + * API/code coverage may show the assignment operator as present and + * untested - ignore. + * Subclasses need this assignment operator if they use compiler-provided + * assignment operators of their own. An alternative to not declaring one + * here would be to declare and empty-implement a protected or public one. + Replaceable &Replaceable::operator=(const Replaceable &); + */ + + /** + * Virtual version of length(). + * @stable ICU 2.4 + */ + virtual int32_t getLength() const = 0; + + /** + * Virtual version of charAt(). + * @stable ICU 2.4 + */ + virtual char16_t getCharAt(int32_t offset) const = 0; + + /** + * Virtual version of char32At(). + * @stable ICU 2.4 + */ + virtual UChar32 getChar32At(int32_t offset) const = 0; +}; + +inline Replaceable::Replaceable() {} + +inline int32_t +Replaceable::length() const { + return getLength(); +} + +inline char16_t +Replaceable::charAt(int32_t offset) const { + return getCharAt(offset); +} + +inline UChar32 +Replaceable::char32At(int32_t offset) const { + return getChar32At(offset); +} + +// There is no rep.cpp, see unistr.cpp for Replaceable function implementations. + +U_NAMESPACE_END + +#endif /* U_SHOW_CPLUSPLUS_API */ + +#endif diff --git a/thirdparty/icu4c/common/unicode/resbund.h b/thirdparty/icu4c/common/unicode/resbund.h new file mode 100644 index 0000000000..37738e277b --- /dev/null +++ b/thirdparty/icu4c/common/unicode/resbund.h @@ -0,0 +1,498 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1996-2013, International Business Machines Corporation +* and others. All Rights Reserved. +* +****************************************************************************** +* +* File resbund.h +* +* CREATED BY +* Richard Gillam +* +* Modification History: +* +* Date Name Description +* 2/5/97 aliu Added scanForLocaleInFile. Added +* constructor which attempts to read resource bundle +* from a specific file, without searching other files. +* 2/11/97 aliu Added UErrorCode return values to constructors. Fixed +* infinite loops in scanForFile and scanForLocale. +* Modified getRawResourceData to not delete storage +* in localeData and resourceData which it doesn't own. +* Added Mac compatibility #ifdefs for tellp() and +* ios::nocreate. +* 2/18/97 helena Updated with 100% documentation coverage. +* 3/13/97 aliu Rewrote to load in entire resource bundle and store +* it as a Hashtable of ResourceBundleData objects. +* Added state table to govern parsing of files. +* Modified to load locale index out of new file +* distinct from default.txt. +* 3/25/97 aliu Modified to support 2-d arrays, needed for timezone +* data. Added support for custom file suffixes. Again, +* needed to support timezone data. +* 4/7/97 aliu Cleaned up. +* 03/02/99 stephen Removed dependency on FILE*. +* 03/29/99 helena Merged Bertrand and Stephen's changes. +* 06/11/99 stephen Removed parsing of .txt files. +* Reworked to use new binary format. +* Cleaned up. +* 06/14/99 stephen Removed methods taking a filename suffix. +* 11/09/99 weiv Added getLocale(), fRealLocale, removed fRealLocaleID +****************************************************************************** +*/ + +#ifndef RESBUND_H +#define RESBUND_H + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + +#include "unicode/uobject.h" +#include "unicode/ures.h" +#include "unicode/unistr.h" +#include "unicode/locid.h" + +/** + * \file + * \brief C++ API: Resource Bundle + */ + +U_NAMESPACE_BEGIN + +/** + * A class representing a collection of resource information pertaining to a given + * locale. A resource bundle provides a way of accessing locale- specfic information in + * a data file. You create a resource bundle that manages the resources for a given + * locale and then ask it for individual resources. + * <P> + * Resource bundles in ICU4C are currently defined using text files which conform to the following + * <a href="http://source.icu-project.org/repos/icu/icuhtml/trunk/design/bnf_rb.txt">BNF definition</a>. + * More on resource bundle concepts and syntax can be found in the + * <a href="http://icu-project.org/userguide/ResourceManagement.html">Users Guide</a>. + * <P> + * + * The ResourceBundle class is not suitable for subclassing. + * + * @stable ICU 2.0 + */ +class U_COMMON_API ResourceBundle : public UObject { +public: + /** + * Constructor + * + * @param packageName The packageName and locale together point to an ICU udata object, + * as defined by <code> udata_open( packageName, "res", locale, err) </code> + * or equivalent. Typically, packageName will refer to a (.dat) file, or to + * a package registered with udata_setAppData(). Using a full file or directory + * pathname for packageName is deprecated. + * @param locale This is the locale this resource bundle is for. To get resources + * for the French locale, for example, you would create a + * ResourceBundle passing Locale::FRENCH for the "locale" parameter, + * and all subsequent calls to that resource bundle will return + * resources that pertain to the French locale. If the caller doesn't + * pass a locale parameter, the default locale for the system (as + * returned by Locale::getDefault()) will be used. + * @param err The Error Code. + * The UErrorCode& err parameter is used to return status information to the user. To + * check whether the construction succeeded or not, you should check the value of + * U_SUCCESS(err). If you wish more detailed information, you can check for + * informational error results which still indicate success. U_USING_FALLBACK_WARNING + * indicates that a fall back locale was used. For example, 'de_CH' was requested, + * but nothing was found there, so 'de' was used. U_USING_DEFAULT_WARNING indicates that + * the default locale data was used; neither the requested locale nor any of its + * fall back locales could be found. + * @stable ICU 2.0 + */ + ResourceBundle(const UnicodeString& packageName, + const Locale& locale, + UErrorCode& err); + + /** + * Construct a resource bundle for the default bundle in the specified package. + * + * @param packageName The packageName and locale together point to an ICU udata object, + * as defined by <code> udata_open( packageName, "res", locale, err) </code> + * or equivalent. Typically, packageName will refer to a (.dat) file, or to + * a package registered with udata_setAppData(). Using a full file or directory + * pathname for packageName is deprecated. + * @param err A UErrorCode value + * @stable ICU 2.0 + */ + ResourceBundle(const UnicodeString& packageName, + UErrorCode& err); + + /** + * Construct a resource bundle for the ICU default bundle. + * + * @param err A UErrorCode value + * @stable ICU 2.0 + */ + ResourceBundle(UErrorCode &err); + + /** + * Standard constructor, constructs a resource bundle for the locale-specific + * bundle in the specified package. + * + * @param packageName The packageName and locale together point to an ICU udata object, + * as defined by <code> udata_open( packageName, "res", locale, err) </code> + * or equivalent. Typically, packageName will refer to a (.dat) file, or to + * a package registered with udata_setAppData(). Using a full file or directory + * pathname for packageName is deprecated. + * NULL is used to refer to ICU data. + * @param locale The locale for which to open a resource bundle. + * @param err A UErrorCode value + * @stable ICU 2.0 + */ + ResourceBundle(const char* packageName, + const Locale& locale, + UErrorCode& err); + + /** + * Copy constructor. + * + * @param original The resource bundle to copy. + * @stable ICU 2.0 + */ + ResourceBundle(const ResourceBundle &original); + + /** + * Constructor from a C UResourceBundle. The resource bundle is + * copied and not adopted. ures_close will still need to be used on the + * original resource bundle. + * + * @param res A pointer to the C resource bundle. + * @param status A UErrorCode value. + * @stable ICU 2.0 + */ + ResourceBundle(UResourceBundle *res, + UErrorCode &status); + + /** + * Assignment operator. + * + * @param other The resource bundle to copy. + * @stable ICU 2.0 + */ + ResourceBundle& + operator=(const ResourceBundle& other); + + /** Destructor. + * @stable ICU 2.0 + */ + virtual ~ResourceBundle(); + + /** + * Clone this object. + * Clones can be used concurrently in multiple threads. + * If an error occurs, then NULL is returned. + * The caller must delete the clone. + * + * @return a clone of this object + * + * @see getDynamicClassID + * @stable ICU 2.8 + */ + ResourceBundle *clone() const; + + /** + * Returns the size of a resource. Size for scalar types is always 1, and for vector/table types is + * the number of child resources. + * @warning Integer array is treated as a scalar type. There are no + * APIs to access individual members of an integer array. It + * is always returned as a whole. + * + * @return number of resources in a given resource. + * @stable ICU 2.0 + */ + int32_t + getSize(void) const; + + /** + * returns a string from a string resource type + * + * @param status fills in the outgoing error code + * could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found + * could be a warning + * e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT> + * @return a pointer to a zero-terminated char16_t array which lives in a memory mapped/DLL file. + * @stable ICU 2.0 + */ + UnicodeString + getString(UErrorCode& status) const; + + /** + * returns a binary data from a resource. Can be used at most primitive resource types (binaries, + * strings, ints) + * + * @param len fills in the length of resulting byte chunk + * @param status fills in the outgoing error code + * could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found + * could be a warning + * e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT> + * @return a pointer to a chunk of unsigned bytes which live in a memory mapped/DLL file. + * @stable ICU 2.0 + */ + const uint8_t* + getBinary(int32_t& len, UErrorCode& status) const; + + + /** + * returns an integer vector from a resource. + * + * @param len fills in the length of resulting integer vector + * @param status fills in the outgoing error code + * could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found + * could be a warning + * e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT> + * @return a pointer to a vector of integers that lives in a memory mapped/DLL file. + * @stable ICU 2.0 + */ + const int32_t* + getIntVector(int32_t& len, UErrorCode& status) const; + + /** + * returns an unsigned integer from a resource. + * This integer is originally 28 bits. + * + * @param status fills in the outgoing error code + * could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found + * could be a warning + * e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT> + * @return an unsigned integer value + * @stable ICU 2.0 + */ + uint32_t + getUInt(UErrorCode& status) const; + + /** + * returns a signed integer from a resource. + * This integer is originally 28 bit and the sign gets propagated. + * + * @param status fills in the outgoing error code + * could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found + * could be a warning + * e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT> + * @return a signed integer value + * @stable ICU 2.0 + */ + int32_t + getInt(UErrorCode& status) const; + + /** + * Checks whether the resource has another element to iterate over. + * + * @return true if there are more elements, false if there is no more elements + * @stable ICU 2.0 + */ + UBool + hasNext(void) const; + + /** + * Resets the internal context of a resource so that iteration starts from the first element. + * + * @stable ICU 2.0 + */ + void + resetIterator(void); + + /** + * Returns the key associated with this resource. Not all the resources have a key - only + * those that are members of a table. + * + * @return a key associated to this resource, or NULL if it doesn't have a key + * @stable ICU 2.0 + */ + const char* + getKey(void) const; + + /** + * Gets the locale ID of the resource bundle as a string. + * Same as getLocale().getName() . + * + * @return the locale ID of the resource bundle as a string + * @stable ICU 2.0 + */ + const char* + getName(void) const; + + + /** + * Returns the type of a resource. Available types are defined in enum UResType + * + * @return type of the given resource. + * @stable ICU 2.0 + */ + UResType + getType(void) const; + + /** + * Returns the next resource in a given resource or NULL if there are no more resources + * + * @param status fills in the outgoing error code + * @return ResourceBundle object. + * @stable ICU 2.0 + */ + ResourceBundle + getNext(UErrorCode& status); + + /** + * Returns the next string in a resource or NULL if there are no more resources + * to iterate over. + * + * @param status fills in the outgoing error code + * @return an UnicodeString object. + * @stable ICU 2.0 + */ + UnicodeString + getNextString(UErrorCode& status); + + /** + * Returns the next string in a resource or NULL if there are no more resources + * to iterate over. + * + * @param key fill in for key associated with this string + * @param status fills in the outgoing error code + * @return an UnicodeString object. + * @stable ICU 2.0 + */ + UnicodeString + getNextString(const char ** key, + UErrorCode& status); + + /** + * Returns the resource in a resource at the specified index. + * + * @param index an index to the wanted resource. + * @param status fills in the outgoing error code + * @return ResourceBundle object. If there is an error, resource is invalid. + * @stable ICU 2.0 + */ + ResourceBundle + get(int32_t index, + UErrorCode& status) const; + + /** + * Returns the string in a given resource at the specified index. + * + * @param index an index to the wanted string. + * @param status fills in the outgoing error code + * @return an UnicodeString object. If there is an error, string is bogus + * @stable ICU 2.0 + */ + UnicodeString + getStringEx(int32_t index, + UErrorCode& status) const; + + /** + * Returns a resource in a resource that has a given key. This procedure works only with table + * resources. + * + * @param key a key associated with the wanted resource + * @param status fills in the outgoing error code. + * @return ResourceBundle object. If there is an error, resource is invalid. + * @stable ICU 2.0 + */ + ResourceBundle + get(const char* key, + UErrorCode& status) const; + + /** + * Returns a string in a resource that has a given key. This procedure works only with table + * resources. + * + * @param key a key associated with the wanted string + * @param status fills in the outgoing error code + * @return an UnicodeString object. If there is an error, string is bogus + * @stable ICU 2.0 + */ + UnicodeString + getStringEx(const char* key, + UErrorCode& status) const; + +#ifndef U_HIDE_DEPRECATED_API + /** + * Return the version number associated with this ResourceBundle as a string. Please + * use getVersion, as this method is going to be deprecated. + * + * @return A version number string as specified in the resource bundle or its parent. + * The caller does not own this string. + * @see getVersion + * @deprecated ICU 2.8 Use getVersion instead. + */ + const char* + getVersionNumber(void) const; +#endif /* U_HIDE_DEPRECATED_API */ + + /** + * Return the version number associated with this ResourceBundle as a UVersionInfo array. + * + * @param versionInfo A UVersionInfo array that is filled with the version number + * as specified in the resource bundle or its parent. + * @stable ICU 2.0 + */ + void + getVersion(UVersionInfo versionInfo) const; + +#ifndef U_HIDE_DEPRECATED_API + /** + * Return the Locale associated with this ResourceBundle. + * + * @return a Locale object + * @deprecated ICU 2.8 Use getLocale(ULocDataLocaleType type, UErrorCode &status) overload instead. + */ + const Locale& + getLocale(void) const; +#endif /* U_HIDE_DEPRECATED_API */ + + /** + * Return the Locale associated with this ResourceBundle. + * @param type You can choose between requested, valid and actual + * locale. For description see the definition of + * ULocDataLocaleType in uloc.h + * @param status just for catching illegal arguments + * + * @return a Locale object + * @stable ICU 2.8 + */ + const Locale + getLocale(ULocDataLocaleType type, UErrorCode &status) const; +#ifndef U_HIDE_INTERNAL_API + /** + * This API implements multilevel fallback + * @internal + */ + ResourceBundle + getWithFallback(const char* key, UErrorCode& status); +#endif /* U_HIDE_INTERNAL_API */ + /** + * ICU "poor man's RTTI", returns a UClassID for the actual class. + * + * @stable ICU 2.2 + */ + virtual UClassID getDynamicClassID() const; + + /** + * ICU "poor man's RTTI", returns a UClassID for this class. + * + * @stable ICU 2.2 + */ + static UClassID U_EXPORT2 getStaticClassID(); + +private: + ResourceBundle(); // default constructor not implemented + + UResourceBundle *fResource; + void constructForLocale(const UnicodeString& path, const Locale& locale, UErrorCode& error); + Locale *fLocale; +}; + +U_NAMESPACE_END + +#endif /* U_SHOW_CPLUSPLUS_API */ + +#endif diff --git a/thirdparty/icu4c/common/unicode/schriter.h b/thirdparty/icu4c/common/unicode/schriter.h new file mode 100644 index 0000000000..1ca5b70fca --- /dev/null +++ b/thirdparty/icu4c/common/unicode/schriter.h @@ -0,0 +1,195 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1998-2005, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* +* File schriter.h +* +* Modification History: +* +* Date Name Description +* 05/05/99 stephen Cleaned up. +****************************************************************************** +*/ + +#ifndef SCHRITER_H +#define SCHRITER_H + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + +#include "unicode/chariter.h" +#include "unicode/uchriter.h" + +/** + * \file + * \brief C++ API: String Character Iterator + */ + +U_NAMESPACE_BEGIN +/** + * A concrete subclass of CharacterIterator that iterates over the + * characters (code units or code points) in a UnicodeString. + * It's possible not only to create an + * iterator that iterates over an entire UnicodeString, but also to + * create one that iterates over only a subrange of a UnicodeString + * (iterators over different subranges of the same UnicodeString don't + * compare equal). + * @see CharacterIterator + * @see ForwardCharacterIterator + * @stable ICU 2.0 + */ +class U_COMMON_API StringCharacterIterator : public UCharCharacterIterator { +public: + /** + * Create an iterator over the UnicodeString referred to by "textStr". + * The UnicodeString object is copied. + * The iteration range is the whole string, and the starting position is 0. + * @param textStr The unicode string used to create an iterator + * @stable ICU 2.0 + */ + StringCharacterIterator(const UnicodeString& textStr); + + /** + * Create an iterator over the UnicodeString referred to by "textStr". + * The iteration range is the whole string, and the starting + * position is specified by "textPos". If "textPos" is outside the valid + * iteration range, the behavior of this object is undefined. + * @param textStr The unicode string used to create an iterator + * @param textPos The starting position of the iteration + * @stable ICU 2.0 + */ + StringCharacterIterator(const UnicodeString& textStr, + int32_t textPos); + + /** + * Create an iterator over the UnicodeString referred to by "textStr". + * The UnicodeString object is copied. + * The iteration range begins with the code unit specified by + * "textBegin" and ends with the code unit BEFORE the code unit specified + * by "textEnd". The starting position is specified by "textPos". If + * "textBegin" and "textEnd" don't form a valid range on "text" (i.e., + * textBegin >= textEnd or either is negative or greater than text.size()), + * or "textPos" is outside the range defined by "textBegin" and "textEnd", + * the behavior of this iterator is undefined. + * @param textStr The unicode string used to create the StringCharacterIterator + * @param textBegin The begin position of the iteration range + * @param textEnd The end position of the iteration range + * @param textPos The starting position of the iteration + * @stable ICU 2.0 + */ + StringCharacterIterator(const UnicodeString& textStr, + int32_t textBegin, + int32_t textEnd, + int32_t textPos); + + /** + * Copy constructor. The new iterator iterates over the same range + * of the same string as "that", and its initial position is the + * same as "that"'s current position. + * The UnicodeString object in "that" is copied. + * @param that The StringCharacterIterator to be copied + * @stable ICU 2.0 + */ + StringCharacterIterator(const StringCharacterIterator& that); + + /** + * Destructor. + * @stable ICU 2.0 + */ + virtual ~StringCharacterIterator(); + + /** + * Assignment operator. *this is altered to iterate over the same + * range of the same string as "that", and refers to the same + * character within that string as "that" does. + * @param that The object to be copied. + * @return the newly created object. + * @stable ICU 2.0 + */ + StringCharacterIterator& + operator=(const StringCharacterIterator& that); + + /** + * Returns true if the iterators iterate over the same range of the + * same string and are pointing at the same character. + * @param that The ForwardCharacterIterator to be compared for equality + * @return true if the iterators iterate over the same range of the + * same string and are pointing at the same character. + * @stable ICU 2.0 + */ + virtual UBool operator==(const ForwardCharacterIterator& that) const; + + /** + * Returns a new StringCharacterIterator referring to the same + * character in the same range of the same string as this one. The + * caller must delete the new iterator. + * @return the newly cloned object. + * @stable ICU 2.0 + */ + virtual StringCharacterIterator* clone() const; + + /** + * Sets the iterator to iterate over the provided string. + * @param newText The string to be iterated over + * @stable ICU 2.0 + */ + void setText(const UnicodeString& newText); + + /** + * Copies the UnicodeString under iteration into the UnicodeString + * referred to by "result". Even if this iterator iterates across + * only a part of this string, the whole string is copied. + * @param result Receives a copy of the text under iteration. + * @stable ICU 2.0 + */ + virtual void getText(UnicodeString& result); + + /** + * Return a class ID for this object (not really public) + * @return a class ID for this object. + * @stable ICU 2.0 + */ + virtual UClassID getDynamicClassID(void) const; + + /** + * Return a class ID for this class (not really public) + * @return a class ID for this class + * @stable ICU 2.0 + */ + static UClassID U_EXPORT2 getStaticClassID(void); + +protected: + /** + * Default constructor, iteration over empty string. + * @stable ICU 2.0 + */ + StringCharacterIterator(); + + /** + * Sets the iterator to iterate over the provided string. + * @param newText The string to be iterated over + * @param newTextLength The length of the String + * @stable ICU 2.0 + */ + void setText(const char16_t* newText, int32_t newTextLength); + + /** + * Copy of the iterated string object. + * @stable ICU 2.0 + */ + UnicodeString text; + +}; + +U_NAMESPACE_END + +#endif /* U_SHOW_CPLUSPLUS_API */ + +#endif diff --git a/thirdparty/icu4c/common/unicode/simpleformatter.h b/thirdparty/icu4c/common/unicode/simpleformatter.h new file mode 100644 index 0000000000..6d9c04ace2 --- /dev/null +++ b/thirdparty/icu4c/common/unicode/simpleformatter.h @@ -0,0 +1,341 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* Copyright (C) 2014-2016, International Business Machines +* Corporation and others. All Rights Reserved. +****************************************************************************** +* simpleformatter.h +*/ + +#ifndef __SIMPLEFORMATTER_H__ +#define __SIMPLEFORMATTER_H__ + +/** + * \file + * \brief C++ API: Simple formatter, minimal subset of MessageFormat. + */ + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + +#include "unicode/unistr.h" + +U_NAMESPACE_BEGIN + +// Forward declaration: +namespace number { +namespace impl { +class SimpleModifier; +} +} + +/** + * Formats simple patterns like "{1} was born in {0}". + * Minimal subset of MessageFormat; fast, simple, minimal dependencies. + * Supports only numbered arguments with no type nor style parameters, + * and formats only string values. + * Quoting via ASCII apostrophe compatible with ICU MessageFormat default behavior. + * + * Factory methods set error codes for syntax errors + * and for too few or too many arguments/placeholders. + * + * SimpleFormatter objects are thread-safe except for assignment and applying new patterns. + * + * Example: + * <pre> + * UErrorCode errorCode = U_ZERO_ERROR; + * SimpleFormatter fmt("{1} '{born}' in {0}", errorCode); + * UnicodeString result; + * + * // Output: "paul {born} in england" + * fmt.format("england", "paul", result, errorCode); + * </pre> + * + * This class is not intended for public subclassing. + * + * @see MessageFormat + * @see UMessagePatternApostropheMode + * @stable ICU 57 + */ +class U_COMMON_API SimpleFormatter U_FINAL : public UMemory { +public: + /** + * Default constructor. + * @stable ICU 57 + */ + SimpleFormatter() : compiledPattern((char16_t)0) {} + + /** + * Constructs a formatter from the pattern string. + * + * @param pattern The pattern string. + * @param errorCode ICU error code in/out parameter. + * Must fulfill U_SUCCESS before the function call. + * Set to U_ILLEGAL_ARGUMENT_ERROR for bad argument syntax. + * @stable ICU 57 + */ + SimpleFormatter(const UnicodeString& pattern, UErrorCode &errorCode) { + applyPattern(pattern, errorCode); + } + + /** + * Constructs a formatter from the pattern string. + * The number of arguments checked against the given limits is the + * highest argument number plus one, not the number of occurrences of arguments. + * + * @param pattern The pattern string. + * @param min The pattern must have at least this many arguments. + * @param max The pattern must have at most this many arguments. + * @param errorCode ICU error code in/out parameter. + * Must fulfill U_SUCCESS before the function call. + * Set to U_ILLEGAL_ARGUMENT_ERROR for bad argument syntax and + * too few or too many arguments. + * @stable ICU 57 + */ + SimpleFormatter(const UnicodeString& pattern, int32_t min, int32_t max, + UErrorCode &errorCode) { + applyPatternMinMaxArguments(pattern, min, max, errorCode); + } + + /** + * Copy constructor. + * @stable ICU 57 + */ + SimpleFormatter(const SimpleFormatter& other) + : compiledPattern(other.compiledPattern) {} + + /** + * Assignment operator. + * @stable ICU 57 + */ + SimpleFormatter &operator=(const SimpleFormatter& other); + + /** + * Destructor. + * @stable ICU 57 + */ + ~SimpleFormatter(); + + /** + * Changes this object according to the new pattern. + * + * @param pattern The pattern string. + * @param errorCode ICU error code in/out parameter. + * Must fulfill U_SUCCESS before the function call. + * Set to U_ILLEGAL_ARGUMENT_ERROR for bad argument syntax. + * @return true if U_SUCCESS(errorCode). + * @stable ICU 57 + */ + UBool applyPattern(const UnicodeString &pattern, UErrorCode &errorCode) { + return applyPatternMinMaxArguments(pattern, 0, INT32_MAX, errorCode); + } + + /** + * Changes this object according to the new pattern. + * The number of arguments checked against the given limits is the + * highest argument number plus one, not the number of occurrences of arguments. + * + * @param pattern The pattern string. + * @param min The pattern must have at least this many arguments. + * @param max The pattern must have at most this many arguments. + * @param errorCode ICU error code in/out parameter. + * Must fulfill U_SUCCESS before the function call. + * Set to U_ILLEGAL_ARGUMENT_ERROR for bad argument syntax and + * too few or too many arguments. + * @return true if U_SUCCESS(errorCode). + * @stable ICU 57 + */ + UBool applyPatternMinMaxArguments(const UnicodeString &pattern, + int32_t min, int32_t max, UErrorCode &errorCode); + + /** + * @return The max argument number + 1. + * @stable ICU 57 + */ + int32_t getArgumentLimit() const { + return getArgumentLimit(compiledPattern.getBuffer(), compiledPattern.length()); + } + + /** + * Formats the given value, appending to the appendTo builder. + * The argument value must not be the same object as appendTo. + * getArgumentLimit() must be at most 1. + * + * @param value0 Value for argument {0}. + * @param appendTo Gets the formatted pattern and value appended. + * @param errorCode ICU error code in/out parameter. + * Must fulfill U_SUCCESS before the function call. + * @return appendTo + * @stable ICU 57 + */ + UnicodeString &format( + const UnicodeString &value0, + UnicodeString &appendTo, UErrorCode &errorCode) const; + + /** + * Formats the given values, appending to the appendTo builder. + * An argument value must not be the same object as appendTo. + * getArgumentLimit() must be at most 2. + * + * @param value0 Value for argument {0}. + * @param value1 Value for argument {1}. + * @param appendTo Gets the formatted pattern and values appended. + * @param errorCode ICU error code in/out parameter. + * Must fulfill U_SUCCESS before the function call. + * @return appendTo + * @stable ICU 57 + */ + UnicodeString &format( + const UnicodeString &value0, + const UnicodeString &value1, + UnicodeString &appendTo, UErrorCode &errorCode) const; + + /** + * Formats the given values, appending to the appendTo builder. + * An argument value must not be the same object as appendTo. + * getArgumentLimit() must be at most 3. + * + * @param value0 Value for argument {0}. + * @param value1 Value for argument {1}. + * @param value2 Value for argument {2}. + * @param appendTo Gets the formatted pattern and values appended. + * @param errorCode ICU error code in/out parameter. + * Must fulfill U_SUCCESS before the function call. + * @return appendTo + * @stable ICU 57 + */ + UnicodeString &format( + const UnicodeString &value0, + const UnicodeString &value1, + const UnicodeString &value2, + UnicodeString &appendTo, UErrorCode &errorCode) const; + + /** + * Formats the given values, appending to the appendTo string. + * + * @param values The argument values. + * An argument value must not be the same object as appendTo. + * Can be NULL if valuesLength==getArgumentLimit()==0. + * @param valuesLength The length of the values array. + * Must be at least getArgumentLimit(). + * @param appendTo Gets the formatted pattern and values appended. + * @param offsets offsets[i] receives the offset of where + * values[i] replaced pattern argument {i}. + * Can be shorter or longer than values. Can be NULL if offsetsLength==0. + * If there is no {i} in the pattern, then offsets[i] is set to -1. + * @param offsetsLength The length of the offsets array. + * @param errorCode ICU error code in/out parameter. + * Must fulfill U_SUCCESS before the function call. + * @return appendTo + * @stable ICU 57 + */ + UnicodeString &formatAndAppend( + const UnicodeString *const *values, int32_t valuesLength, + UnicodeString &appendTo, + int32_t *offsets, int32_t offsetsLength, UErrorCode &errorCode) const; + + /** + * Formats the given values, replacing the contents of the result string. + * May optimize by actually appending to the result if it is the same object + * as the value corresponding to the initial argument in the pattern. + * + * @param values The argument values. + * An argument value may be the same object as result. + * Can be NULL if valuesLength==getArgumentLimit()==0. + * @param valuesLength The length of the values array. + * Must be at least getArgumentLimit(). + * @param result Gets its contents replaced by the formatted pattern and values. + * @param offsets offsets[i] receives the offset of where + * values[i] replaced pattern argument {i}. + * Can be shorter or longer than values. Can be NULL if offsetsLength==0. + * If there is no {i} in the pattern, then offsets[i] is set to -1. + * @param offsetsLength The length of the offsets array. + * @param errorCode ICU error code in/out parameter. + * Must fulfill U_SUCCESS before the function call. + * @return result + * @stable ICU 57 + */ + UnicodeString &formatAndReplace( + const UnicodeString *const *values, int32_t valuesLength, + UnicodeString &result, + int32_t *offsets, int32_t offsetsLength, UErrorCode &errorCode) const; + + /** + * Returns the pattern text with none of the arguments. + * Like formatting with all-empty string values. + * @stable ICU 57 + */ + UnicodeString getTextWithNoArguments() const { + return getTextWithNoArguments( + compiledPattern.getBuffer(), + compiledPattern.length(), + nullptr, + 0); + } + +#ifndef U_HIDE_INTERNAL_API + /** + * Returns the pattern text with none of the arguments. + * Like formatting with all-empty string values. + * + * TODO(ICU-20406): Replace this with an Iterator interface. + * + * @param offsets offsets[i] receives the offset of where {i} was located + * before it was replaced by an empty string. + * For example, "a{0}b{1}" produces offset 1 for i=0 and 2 for i=1. + * Can be nullptr if offsetsLength==0. + * If there is no {i} in the pattern, then offsets[i] is set to -1. + * @param offsetsLength The length of the offsets array. + * + * @internal + */ + UnicodeString getTextWithNoArguments(int32_t *offsets, int32_t offsetsLength) const { + return getTextWithNoArguments( + compiledPattern.getBuffer(), + compiledPattern.length(), + offsets, + offsetsLength); + } +#endif // U_HIDE_INTERNAL_API + +private: + /** + * Binary representation of the compiled pattern. + * Index 0: One more than the highest argument number. + * Followed by zero or more arguments or literal-text segments. + * + * An argument is stored as its number, less than ARG_NUM_LIMIT. + * A literal-text segment is stored as its length (at least 1) offset by ARG_NUM_LIMIT, + * followed by that many chars. + */ + UnicodeString compiledPattern; + + static inline int32_t getArgumentLimit(const char16_t *compiledPattern, + int32_t compiledPatternLength) { + return compiledPatternLength == 0 ? 0 : compiledPattern[0]; + } + + static UnicodeString getTextWithNoArguments( + const char16_t *compiledPattern, + int32_t compiledPatternLength, + int32_t *offsets, + int32_t offsetsLength); + + static UnicodeString &format( + const char16_t *compiledPattern, int32_t compiledPatternLength, + const UnicodeString *const *values, + UnicodeString &result, const UnicodeString *resultCopy, UBool forbidResultAsValue, + int32_t *offsets, int32_t offsetsLength, + UErrorCode &errorCode); + + // Give access to internals to SimpleModifier for number formatting + friend class number::impl::SimpleModifier; +}; + +U_NAMESPACE_END + +#endif /* U_SHOW_CPLUSPLUS_API */ + +#endif // __SIMPLEFORMATTER_H__ diff --git a/thirdparty/icu4c/common/unicode/std_string.h b/thirdparty/icu4c/common/unicode/std_string.h new file mode 100644 index 0000000000..bf87230167 --- /dev/null +++ b/thirdparty/icu4c/common/unicode/std_string.h @@ -0,0 +1,41 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2009-2014, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: std_string.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2009feb19 +* created by: Markus W. Scherer +*/ + +#ifndef __STD_STRING_H__ +#define __STD_STRING_H__ + +/** + * \file + * \brief C++ API: Central ICU header for including the C++ standard <string> + * header and for related definitions. + */ + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + +// Workaround for a libstdc++ bug before libstdc++4.6 (2011). +// https://bugs.llvm.org/show_bug.cgi?id=13364 +#if defined(__GLIBCXX__) +namespace std { class type_info; } +#endif +#include <string> + +#endif /* U_SHOW_CPLUSPLUS_API */ + +#endif // __STD_STRING_H__ diff --git a/thirdparty/icu4c/common/unicode/strenum.h b/thirdparty/icu4c/common/unicode/strenum.h new file mode 100644 index 0000000000..df72b4b7e8 --- /dev/null +++ b/thirdparty/icu4c/common/unicode/strenum.h @@ -0,0 +1,281 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2002-2012, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +*/ + +#ifndef STRENUM_H +#define STRENUM_H + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + +#include "unicode/uobject.h" +#include "unicode/unistr.h" + +/** + * \file + * \brief C++ API: String Enumeration + */ + +U_NAMESPACE_BEGIN + +/** + * Base class for 'pure' C++ implementations of uenum api. Adds a + * method that returns the next UnicodeString since in C++ this can + * be a common storage format for strings. + * + * <p>The model is that the enumeration is over strings maintained by + * a 'service.' At any point, the service might change, invalidating + * the enumerator (though this is expected to be rare). The iterator + * returns an error if this has occurred. Lack of the error is no + * guarantee that the service didn't change immediately after the + * call, so the returned string still might not be 'valid' on + * subsequent use.</p> + * + * <p>Strings may take the form of const char*, const char16_t*, or const + * UnicodeString*. The type you get is determine by the variant of + * 'next' that you call. In general the StringEnumeration is + * optimized for one of these types, but all StringEnumerations can + * return all types. Returned strings are each terminated with a NUL. + * Depending on the service data, they might also include embedded NUL + * characters, so API is provided to optionally return the true + * length, counting the embedded NULs but not counting the terminating + * NUL.</p> + * + * <p>The pointers returned by next, unext, and snext become invalid + * upon any subsequent call to the enumeration's destructor, next, + * unext, snext, or reset.</p> + * + * ICU 2.8 adds some default implementations and helper functions + * for subclasses. + * + * @stable ICU 2.4 + */ +class U_COMMON_API StringEnumeration : public UObject { +public: + /** + * Destructor. + * @stable ICU 2.4 + */ + virtual ~StringEnumeration(); + + /** + * Clone this object, an instance of a subclass of StringEnumeration. + * Clones can be used concurrently in multiple threads. + * If a subclass does not implement clone(), or if an error occurs, + * then NULL is returned. + * The caller must delete the clone. + * + * @return a clone of this object + * + * @see getDynamicClassID + * @stable ICU 2.8 + */ + virtual StringEnumeration *clone() const; + + /** + * <p>Return the number of elements that the iterator traverses. If + * the iterator is out of sync with its service, status is set to + * U_ENUM_OUT_OF_SYNC_ERROR, and the return value is zero.</p> + * + * <p>The return value will not change except possibly as a result of + * a subsequent call to reset, or if the iterator becomes out of sync.</p> + * + * <p>This is a convenience function. It can end up being very + * expensive as all the items might have to be pre-fetched + * (depending on the storage format of the data being + * traversed).</p> + * + * @param status the error code. + * @return number of elements in the iterator. + * + * @stable ICU 2.4 */ + virtual int32_t count(UErrorCode& status) const = 0; + + /** + * <p>Returns the next element as a NUL-terminated char*. If there + * are no more elements, returns NULL. If the resultLength pointer + * is not NULL, the length of the string (not counting the + * terminating NUL) is returned at that address. If an error + * status is returned, the value at resultLength is undefined.</p> + * + * <p>The returned pointer is owned by this iterator and must not be + * deleted by the caller. The pointer is valid until the next call + * to next, unext, snext, reset, or the enumerator's destructor.</p> + * + * <p>If the iterator is out of sync with its service, status is set + * to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned.</p> + * + * <p>If the native service string is a char16_t* string, it is + * converted to char* with the invariant converter. If the + * conversion fails (because a character cannot be converted) then + * status is set to U_INVARIANT_CONVERSION_ERROR and the return + * value is undefined (though not NULL).</p> + * + * Starting with ICU 2.8, the default implementation calls snext() + * and handles the conversion. + * Either next() or snext() must be implemented differently by a subclass. + * + * @param status the error code. + * @param resultLength a pointer to receive the length, can be NULL. + * @return a pointer to the string, or NULL. + * + * @stable ICU 2.4 + */ + virtual const char* next(int32_t *resultLength, UErrorCode& status); + + /** + * <p>Returns the next element as a NUL-terminated char16_t*. If there + * are no more elements, returns NULL. If the resultLength pointer + * is not NULL, the length of the string (not counting the + * terminating NUL) is returned at that address. If an error + * status is returned, the value at resultLength is undefined.</p> + * + * <p>The returned pointer is owned by this iterator and must not be + * deleted by the caller. The pointer is valid until the next call + * to next, unext, snext, reset, or the enumerator's destructor.</p> + * + * <p>If the iterator is out of sync with its service, status is set + * to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned.</p> + * + * Starting with ICU 2.8, the default implementation calls snext() + * and handles the conversion. + * + * @param status the error code. + * @param resultLength a ponter to receive the length, can be NULL. + * @return a pointer to the string, or NULL. + * + * @stable ICU 2.4 + */ + virtual const char16_t* unext(int32_t *resultLength, UErrorCode& status); + + /** + * <p>Returns the next element a UnicodeString*. If there are no + * more elements, returns NULL.</p> + * + * <p>The returned pointer is owned by this iterator and must not be + * deleted by the caller. The pointer is valid until the next call + * to next, unext, snext, reset, or the enumerator's destructor.</p> + * + * <p>If the iterator is out of sync with its service, status is set + * to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned.</p> + * + * Starting with ICU 2.8, the default implementation calls next() + * and handles the conversion. + * Either next() or snext() must be implemented differently by a subclass. + * + * @param status the error code. + * @return a pointer to the string, or NULL. + * + * @stable ICU 2.4 + */ + virtual const UnicodeString* snext(UErrorCode& status); + + /** + * <p>Resets the iterator. This re-establishes sync with the + * service and rewinds the iterator to start at the first + * element.</p> + * + * <p>Previous pointers returned by next, unext, or snext become + * invalid, and the value returned by count might change.</p> + * + * @param status the error code. + * + * @stable ICU 2.4 + */ + virtual void reset(UErrorCode& status) = 0; + + /** + * Compares this enumeration to other to check if both are equal + * + * @param that The other string enumeration to compare this object to + * @return true if the enumerations are equal. false if not. + * @stable ICU 3.6 + */ + virtual UBool operator==(const StringEnumeration& that)const; + /** + * Compares this enumeration to other to check if both are not equal + * + * @param that The other string enumeration to compare this object to + * @return true if the enumerations are equal. false if not. + * @stable ICU 3.6 + */ + virtual UBool operator!=(const StringEnumeration& that)const; + +protected: + /** + * UnicodeString field for use with default implementations and subclasses. + * @stable ICU 2.8 + */ + UnicodeString unistr; + /** + * char * default buffer for use with default implementations and subclasses. + * @stable ICU 2.8 + */ + char charsBuffer[32]; + /** + * char * buffer for use with default implementations and subclasses. + * Allocated in constructor and in ensureCharsCapacity(). + * @stable ICU 2.8 + */ + char *chars; + /** + * Capacity of chars, for use with default implementations and subclasses. + * @stable ICU 2.8 + */ + int32_t charsCapacity; + + /** + * Default constructor for use with default implementations and subclasses. + * @stable ICU 2.8 + */ + StringEnumeration(); + + /** + * Ensures that chars is at least as large as the requested capacity. + * For use with default implementations and subclasses. + * + * @param capacity Requested capacity. + * @param status ICU in/out error code. + * @stable ICU 2.8 + */ + void ensureCharsCapacity(int32_t capacity, UErrorCode &status); + + /** + * Converts s to Unicode and sets unistr to the result. + * For use with default implementations and subclasses, + * especially for implementations of snext() in terms of next(). + * This is provided with a helper function instead of a default implementation + * of snext() to avoid potential infinite loops between next() and snext(). + * + * For example: + * \code + * const UnicodeString* snext(UErrorCode& status) { + * int32_t resultLength=0; + * const char *s=next(&resultLength, status); + * return setChars(s, resultLength, status); + * } + * \endcode + * + * @param s String to be converted to Unicode. + * @param length Length of the string. + * @param status ICU in/out error code. + * @return A pointer to unistr. + * @stable ICU 2.8 + */ + UnicodeString *setChars(const char *s, int32_t length, UErrorCode &status); +}; + +U_NAMESPACE_END + +#endif /* U_SHOW_CPLUSPLUS_API */ + +/* STRENUM_H */ +#endif diff --git a/thirdparty/icu4c/common/unicode/stringoptions.h b/thirdparty/icu4c/common/unicode/stringoptions.h new file mode 100644 index 0000000000..7b9f70944f --- /dev/null +++ b/thirdparty/icu4c/common/unicode/stringoptions.h @@ -0,0 +1,190 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// stringoptions.h +// created: 2017jun08 Markus W. Scherer + +#ifndef __STRINGOPTIONS_H__ +#define __STRINGOPTIONS_H__ + +#include "unicode/utypes.h" + +/** + * \file + * \brief C API: Bit set option bit constants for various string and character processing functions. + */ + +/** + * Option value for case folding: Use default mappings defined in CaseFolding.txt. + * + * @stable ICU 2.0 + */ +#define U_FOLD_CASE_DEFAULT 0 + +/** + * Option value for case folding: + * + * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I + * and dotless i appropriately for Turkic languages (tr, az). + * + * Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that + * are to be included for default mappings and + * excluded for the Turkic-specific mappings. + * + * Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that + * are to be excluded for default mappings and + * included for the Turkic-specific mappings. + * + * @stable ICU 2.0 + */ +#define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1 + +/** + * Titlecase the string as a whole rather than each word. + * (Titlecase only the character at index 0, possibly adjusted.) + * Option bits value for titlecasing APIs that take an options bit set. + * + * It is an error to specify multiple titlecasing iterator options together, + * including both an options bit and an explicit BreakIterator. + * + * @see U_TITLECASE_ADJUST_TO_CASED + * @stable ICU 60 + */ +#define U_TITLECASE_WHOLE_STRING 0x20 + +/** + * Titlecase sentences rather than words. + * (Titlecase only the first character of each sentence, possibly adjusted.) + * Option bits value for titlecasing APIs that take an options bit set. + * + * It is an error to specify multiple titlecasing iterator options together, + * including both an options bit and an explicit BreakIterator. + * + * @see U_TITLECASE_ADJUST_TO_CASED + * @stable ICU 60 + */ +#define U_TITLECASE_SENTENCES 0x40 + +/** + * Do not lowercase non-initial parts of words when titlecasing. + * Option bit for titlecasing APIs that take an options bit set. + * + * By default, titlecasing will titlecase the character at each + * (possibly adjusted) BreakIterator index and + * lowercase all other characters up to the next iterator index. + * With this option, the other characters will not be modified. + * + * @see U_TITLECASE_ADJUST_TO_CASED + * @see UnicodeString::toTitle + * @see CaseMap::toTitle + * @see ucasemap_setOptions + * @see ucasemap_toTitle + * @see ucasemap_utf8ToTitle + * @stable ICU 3.8 + */ +#define U_TITLECASE_NO_LOWERCASE 0x100 + +/** + * Do not adjust the titlecasing BreakIterator indexes; + * titlecase exactly the characters at breaks from the iterator. + * Option bit for titlecasing APIs that take an options bit set. + * + * By default, titlecasing will take each break iterator index, + * adjust it to the next relevant character (see U_TITLECASE_ADJUST_TO_CASED), + * and titlecase that one. + * + * Other characters are lowercased. + * + * It is an error to specify multiple titlecasing adjustment options together. + * + * @see U_TITLECASE_ADJUST_TO_CASED + * @see U_TITLECASE_NO_LOWERCASE + * @see UnicodeString::toTitle + * @see CaseMap::toTitle + * @see ucasemap_setOptions + * @see ucasemap_toTitle + * @see ucasemap_utf8ToTitle + * @stable ICU 3.8 + */ +#define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200 + +/** + * Adjust each titlecasing BreakIterator index to the next cased character. + * (See the Unicode Standard, chapter 3, Default Case Conversion, R3 toTitlecase(X).) + * Option bit for titlecasing APIs that take an options bit set. + * + * This used to be the default index adjustment in ICU. + * Since ICU 60, the default index adjustment is to the next character that is + * a letter, number, symbol, or private use code point. + * (Uncased modifier letters are skipped.) + * The difference in behavior is small for word titlecasing, + * but the new adjustment is much better for whole-string and sentence titlecasing: + * It yields "49ers" and "«丰(abc)»" instead of "49Ers" and "«丰(Abc)»". + * + * It is an error to specify multiple titlecasing adjustment options together. + * + * @see U_TITLECASE_NO_BREAK_ADJUSTMENT + * @stable ICU 60 + */ +#define U_TITLECASE_ADJUST_TO_CASED 0x400 + +/** + * Option for string transformation functions to not first reset the Edits object. + * Used for example in some case-mapping and normalization functions. + * + * @see CaseMap + * @see Edits + * @see Normalizer2 + * @stable ICU 60 + */ +#define U_EDITS_NO_RESET 0x2000 + +/** + * Omit unchanged text when recording how source substrings + * relate to changed and unchanged result substrings. + * Used for example in some case-mapping and normalization functions. + * + * @see CaseMap + * @see Edits + * @see Normalizer2 + * @stable ICU 60 + */ +#define U_OMIT_UNCHANGED_TEXT 0x4000 + +/** + * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc: + * Compare strings in code point order instead of code unit order. + * @stable ICU 2.2 + */ +#define U_COMPARE_CODE_POINT_ORDER 0x8000 + +/** + * Option bit for unorm_compare: + * Perform case-insensitive comparison. + * @stable ICU 2.2 + */ +#define U_COMPARE_IGNORE_CASE 0x10000 + +/** + * Option bit for unorm_compare: + * Both input strings are assumed to fulfill FCD conditions. + * @stable ICU 2.2 + */ +#define UNORM_INPUT_IS_FCD 0x20000 + +// Related definitions elsewhere. +// Options that are not meaningful in the same functions +// can share the same bits. +// +// Public: +// unicode/unorm.h #define UNORM_COMPARE_NORM_OPTIONS_SHIFT 20 +// +// Internal: (may change or be removed) +// ucase.h #define _STRCASECMP_OPTIONS_MASK 0xffff +// ucase.h #define _FOLD_CASE_OPTIONS_MASK 7 +// ucasemap_imp.h #define U_TITLECASE_ITERATOR_MASK 0xe0 +// ucasemap_imp.h #define U_TITLECASE_ADJUSTMENT_MASK 0x600 +// ustr_imp.h #define _STRNCMP_STYLE 0x1000 +// unormcmp.cpp #define _COMPARE_EQUIV 0x80000 + +#endif // __STRINGOPTIONS_H__ diff --git a/thirdparty/icu4c/common/unicode/stringpiece.h b/thirdparty/icu4c/common/unicode/stringpiece.h new file mode 100644 index 0000000000..7d7d871e1f --- /dev/null +++ b/thirdparty/icu4c/common/unicode/stringpiece.h @@ -0,0 +1,353 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +// Copyright (C) 2009-2013, International Business Machines +// Corporation and others. All Rights Reserved. +// +// Copyright 2001 and onwards Google Inc. +// Author: Sanjay Ghemawat + +// This code is a contribution of Google code, and the style used here is +// a compromise between the original Google code and the ICU coding guidelines. +// For example, data types are ICU-ified (size_t,int->int32_t), +// and API comments doxygen-ified, but function names and behavior are +// as in the original, if possible. +// Assertion-style error handling, not available in ICU, was changed to +// parameter "pinning" similar to UnicodeString. +// +// In addition, this is only a partial port of the original Google code, +// limited to what was needed so far. The (nearly) complete original code +// is in the ICU svn repository at icuhtml/trunk/design/strings/contrib +// (see ICU ticket 6765, r25517). + +#ifndef __STRINGPIECE_H__ +#define __STRINGPIECE_H__ + +/** + * \file + * \brief C++ API: StringPiece: Read-only byte string wrapper class. + */ + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + +#include <cstddef> +#include <type_traits> + +#include "unicode/uobject.h" +#include "unicode/std_string.h" + +// Arghh! I wish C++ literals were "string". + +U_NAMESPACE_BEGIN + +/** + * A string-like object that points to a sized piece of memory. + * + * We provide non-explicit singleton constructors so users can pass + * in a "const char*" or a "string" wherever a "StringPiece" is + * expected. + * + * Functions or methods may use StringPiece parameters to accept either a + * "const char*" or a "string" value that will be implicitly converted to a + * StringPiece. + * + * Systematic usage of StringPiece is encouraged as it will reduce unnecessary + * conversions from "const char*" to "string" and back again. + * + * @stable ICU 4.2 + */ +class U_COMMON_API StringPiece : public UMemory { + private: + const char* ptr_; + int32_t length_; + + public: + /** + * Default constructor, creates an empty StringPiece. + * @stable ICU 4.2 + */ + StringPiece() : ptr_(nullptr), length_(0) { } + + /** + * Constructs from a NUL-terminated const char * pointer. + * @param str a NUL-terminated const char * pointer + * @stable ICU 4.2 + */ + StringPiece(const char* str); +#ifndef U_HIDE_DRAFT_API +#if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN) + /** + * Constructs from a NUL-terminated const char8_t * pointer. + * @param str a NUL-terminated const char8_t * pointer + * @draft ICU 67 + */ + StringPiece(const char8_t* str) : StringPiece(reinterpret_cast<const char*>(str)) {} +#endif + /** + * Constructs an empty StringPiece. + * Needed for type disambiguation from multiple other overloads. + * @param p nullptr + * @draft ICU 67 + */ + StringPiece(std::nullptr_t p) : ptr_(p), length_(0) {} +#endif // U_HIDE_DRAFT_API + + /** + * Constructs from a std::string. + * @stable ICU 4.2 + */ + StringPiece(const std::string& str) + : ptr_(str.data()), length_(static_cast<int32_t>(str.size())) { } +#ifndef U_HIDE_DRAFT_API +#if defined(__cpp_lib_char8_t) || defined(U_IN_DOXYGEN) + /** + * Constructs from a std::u8string. + * @draft ICU 67 + */ + StringPiece(const std::u8string& str) + : ptr_(reinterpret_cast<const char*>(str.data())), + length_(static_cast<int32_t>(str.size())) { } +#endif +#endif // U_HIDE_DRAFT_API + + /** + * Constructs from some other implementation of a string piece class, from any + * C++ record type that has these two methods: + * + * \code{.cpp} + * + * struct OtherStringPieceClass { + * const char* data(); // or const char8_t* + * size_t size(); + * }; + * + * \endcode + * + * The other string piece class will typically be std::string_view from C++17 + * or absl::string_view from Abseil. + * + * Starting with C++20, data() may also return a const char8_t* pointer, + * as from std::u8string_view. + * + * @param str the other string piece + * @stable ICU 65 + */ + template <typename T, + typename = typename std::enable_if< + (std::is_same<decltype(T().data()), const char*>::value +#if defined(__cpp_char8_t) + || std::is_same<decltype(T().data()), const char8_t*>::value +#endif + ) && + std::is_same<decltype(T().size()), size_t>::value>::type> + StringPiece(T str) + : ptr_(reinterpret_cast<const char*>(str.data())), + length_(static_cast<int32_t>(str.size())) {} + + /** + * Constructs from a const char * pointer and a specified length. + * @param offset a const char * pointer (need not be terminated) + * @param len the length of the string; must be non-negative + * @stable ICU 4.2 + */ + StringPiece(const char* offset, int32_t len) : ptr_(offset), length_(len) { } +#ifndef U_HIDE_DRAFT_API +#if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN) + /** + * Constructs from a const char8_t * pointer and a specified length. + * @param str a const char8_t * pointer (need not be terminated) + * @param len the length of the string; must be non-negative + * @draft ICU 67 + */ + StringPiece(const char8_t* str, int32_t len) : + StringPiece(reinterpret_cast<const char*>(str), len) {} +#endif +#endif // U_HIDE_DRAFT_API + + /** + * Substring of another StringPiece. + * @param x the other StringPiece + * @param pos start position in x; must be non-negative and <= x.length(). + * @stable ICU 4.2 + */ + StringPiece(const StringPiece& x, int32_t pos); + /** + * Substring of another StringPiece. + * @param x the other StringPiece + * @param pos start position in x; must be non-negative and <= x.length(). + * @param len length of the substring; + * must be non-negative and will be pinned to at most x.length() - pos. + * @stable ICU 4.2 + */ + StringPiece(const StringPiece& x, int32_t pos, int32_t len); + + /** + * Returns the string pointer. May be nullptr if it is empty. + * + * data() may return a pointer to a buffer with embedded NULs, and the + * returned buffer may or may not be null terminated. Therefore it is + * typically a mistake to pass data() to a routine that expects a NUL + * terminated string. + * @return the string pointer + * @stable ICU 4.2 + */ + const char* data() const { return ptr_; } + /** + * Returns the string length. Same as length(). + * @return the string length + * @stable ICU 4.2 + */ + int32_t size() const { return length_; } + /** + * Returns the string length. Same as size(). + * @return the string length + * @stable ICU 4.2 + */ + int32_t length() const { return length_; } + /** + * Returns whether the string is empty. + * @return true if the string is empty + * @stable ICU 4.2 + */ + UBool empty() const { return length_ == 0; } + + /** + * Sets to an empty string. + * @stable ICU 4.2 + */ + void clear() { ptr_ = nullptr; length_ = 0; } + + /** + * Reset the stringpiece to refer to new data. + * @param xdata pointer the new string data. Need not be nul terminated. + * @param len the length of the new data + * @stable ICU 4.8 + */ + void set(const char* xdata, int32_t len) { ptr_ = xdata; length_ = len; } + + /** + * Reset the stringpiece to refer to new data. + * @param str a pointer to a NUL-terminated string. + * @stable ICU 4.8 + */ + void set(const char* str); + +#ifndef U_HIDE_DRAFT_API +#if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN) + /** + * Resets the stringpiece to refer to new data. + * @param xdata pointer the new string data. Need not be NUL-terminated. + * @param len the length of the new data + * @draft ICU 67 + */ + inline void set(const char8_t* xdata, int32_t len) { + set(reinterpret_cast<const char*>(xdata), len); + } + + /** + * Resets the stringpiece to refer to new data. + * @param str a pointer to a NUL-terminated string. + * @draft ICU 67 + */ + inline void set(const char8_t* str) { + set(reinterpret_cast<const char*>(str)); + } +#endif +#endif // U_HIDE_DRAFT_API + + /** + * Removes the first n string units. + * @param n prefix length, must be non-negative and <=length() + * @stable ICU 4.2 + */ + void remove_prefix(int32_t n) { + if (n >= 0) { + if (n > length_) { + n = length_; + } + ptr_ += n; + length_ -= n; + } + } + + /** + * Removes the last n string units. + * @param n suffix length, must be non-negative and <=length() + * @stable ICU 4.2 + */ + void remove_suffix(int32_t n) { + if (n >= 0) { + if (n <= length_) { + length_ -= n; + } else { + length_ = 0; + } + } + } + +#ifndef U_HIDE_DRAFT_API + /** + * Searches the StringPiece for the given search string (needle); + * @param needle The string for which to search. + * @param offset Where to start searching within this string (haystack). + * @return The offset of needle in haystack, or -1 if not found. + * @draft ICU 67 + */ + int32_t find(StringPiece needle, int32_t offset); + + /** + * Compares this StringPiece with the other StringPiece, with semantics + * similar to std::string::compare(). + * @param other The string to compare to. + * @return below zero if this < other; above zero if this > other; 0 if this == other. + * @draft ICU 67 + */ + int32_t compare(StringPiece other); +#endif // U_HIDE_DRAFT_API + + /** + * Maximum integer, used as a default value for substring methods. + * @stable ICU 4.2 + */ + static const int32_t npos; // = 0x7fffffff; + + /** + * Returns a substring of this StringPiece. + * @param pos start position; must be non-negative and <= length(). + * @param len length of the substring; + * must be non-negative and will be pinned to at most length() - pos. + * @return the substring StringPiece + * @stable ICU 4.2 + */ + StringPiece substr(int32_t pos, int32_t len = npos) const { + return StringPiece(*this, pos, len); + } +}; + +/** + * Global operator == for StringPiece + * @param x The first StringPiece to compare. + * @param y The second StringPiece to compare. + * @return true if the string data is equal + * @stable ICU 4.8 + */ +U_EXPORT UBool U_EXPORT2 +operator==(const StringPiece& x, const StringPiece& y); + +/** + * Global operator != for StringPiece + * @param x The first StringPiece to compare. + * @param y The second StringPiece to compare. + * @return true if the string data is not equal + * @stable ICU 4.8 + */ +inline UBool operator!=(const StringPiece& x, const StringPiece& y) { + return !(x == y); +} + +U_NAMESPACE_END + +#endif /* U_SHOW_CPLUSPLUS_API */ + +#endif // __STRINGPIECE_H__ diff --git a/thirdparty/icu4c/common/unicode/stringtriebuilder.h b/thirdparty/icu4c/common/unicode/stringtriebuilder.h new file mode 100644 index 0000000000..fe471bbbf9 --- /dev/null +++ b/thirdparty/icu4c/common/unicode/stringtriebuilder.h @@ -0,0 +1,426 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2010-2012,2014, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* file name: stringtriebuilder.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2010dec24 +* created by: Markus W. Scherer +*/ + +#ifndef __STRINGTRIEBUILDER_H__ +#define __STRINGTRIEBUILDER_H__ + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + +#include "unicode/uobject.h" + +/** + * \file + * \brief C++ API: Builder API for trie builders + */ + +// Forward declaration. +/// \cond +struct UHashtable; +typedef struct UHashtable UHashtable; +/// \endcond + +/** + * Build options for BytesTrieBuilder and CharsTrieBuilder. + * @stable ICU 4.8 + */ +enum UStringTrieBuildOption { + /** + * Builds a trie quickly. + * @stable ICU 4.8 + */ + USTRINGTRIE_BUILD_FAST, + /** + * Builds a trie more slowly, attempting to generate + * a shorter but equivalent serialization. + * This build option also uses more memory. + * + * This option can be effective when many integer values are the same + * and string/byte sequence suffixes can be shared. + * Runtime speed is not expected to improve. + * @stable ICU 4.8 + */ + USTRINGTRIE_BUILD_SMALL +}; + +U_NAMESPACE_BEGIN + +/** + * Base class for string trie builder classes. + * + * This class is not intended for public subclassing. + * @stable ICU 4.8 + */ +class U_COMMON_API StringTrieBuilder : public UObject { +public: +#ifndef U_HIDE_INTERNAL_API + /** @internal */ + static int32_t hashNode(const void *node); + /** @internal */ + static UBool equalNodes(const void *left, const void *right); +#endif /* U_HIDE_INTERNAL_API */ + +protected: + // Do not enclose the protected default constructor with #ifndef U_HIDE_INTERNAL_API + // or else the compiler will create a public default constructor. + /** @internal */ + StringTrieBuilder(); + /** @internal */ + virtual ~StringTrieBuilder(); + +#ifndef U_HIDE_INTERNAL_API + /** @internal */ + void createCompactBuilder(int32_t sizeGuess, UErrorCode &errorCode); + /** @internal */ + void deleteCompactBuilder(); + + /** @internal */ + void build(UStringTrieBuildOption buildOption, int32_t elementsLength, UErrorCode &errorCode); + + /** @internal */ + int32_t writeNode(int32_t start, int32_t limit, int32_t unitIndex); + /** @internal */ + int32_t writeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex, int32_t length); +#endif /* U_HIDE_INTERNAL_API */ + + class Node; + +#ifndef U_HIDE_INTERNAL_API + /** @internal */ + Node *makeNode(int32_t start, int32_t limit, int32_t unitIndex, UErrorCode &errorCode); + /** @internal */ + Node *makeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex, + int32_t length, UErrorCode &errorCode); +#endif /* U_HIDE_INTERNAL_API */ + + /** @internal */ + virtual int32_t getElementStringLength(int32_t i) const = 0; + /** @internal */ + virtual char16_t getElementUnit(int32_t i, int32_t unitIndex) const = 0; + /** @internal */ + virtual int32_t getElementValue(int32_t i) const = 0; + + // Finds the first unit index after this one where + // the first and last element have different units again. + /** @internal */ + virtual int32_t getLimitOfLinearMatch(int32_t first, int32_t last, int32_t unitIndex) const = 0; + + // Number of different units at unitIndex. + /** @internal */ + virtual int32_t countElementUnits(int32_t start, int32_t limit, int32_t unitIndex) const = 0; + /** @internal */ + virtual int32_t skipElementsBySomeUnits(int32_t i, int32_t unitIndex, int32_t count) const = 0; + /** @internal */ + virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t unitIndex, char16_t unit) const = 0; + + /** @internal */ + virtual UBool matchNodesCanHaveValues() const = 0; + + /** @internal */ + virtual int32_t getMaxBranchLinearSubNodeLength() const = 0; + /** @internal */ + virtual int32_t getMinLinearMatch() const = 0; + /** @internal */ + virtual int32_t getMaxLinearMatchLength() const = 0; + +#ifndef U_HIDE_INTERNAL_API + // max(BytesTrie::kMaxBranchLinearSubNodeLength, UCharsTrie::kMaxBranchLinearSubNodeLength). + /** @internal */ + static const int32_t kMaxBranchLinearSubNodeLength=5; + + // Maximum number of nested split-branch levels for a branch on all 2^16 possible char16_t units. + // log2(2^16/kMaxBranchLinearSubNodeLength) rounded up. + /** @internal */ + static const int32_t kMaxSplitBranchLevels=14; + + /** + * Makes sure that there is only one unique node registered that is + * equivalent to newNode. + * @param newNode Input node. The builder takes ownership. + * @param errorCode ICU in/out UErrorCode. + Set to U_MEMORY_ALLOCATION_ERROR if it was success but newNode==NULL. + * @return newNode if it is the first of its kind, or + * an equivalent node if newNode is a duplicate. + * @internal + */ + Node *registerNode(Node *newNode, UErrorCode &errorCode); + /** + * Makes sure that there is only one unique FinalValueNode registered + * with this value. + * Avoids creating a node if the value is a duplicate. + * @param value A final value. + * @param errorCode ICU in/out UErrorCode. + Set to U_MEMORY_ALLOCATION_ERROR if it was success but newNode==NULL. + * @return A FinalValueNode with the given value. + * @internal + */ + Node *registerFinalValue(int32_t value, UErrorCode &errorCode); +#endif /* U_HIDE_INTERNAL_API */ + + /* + * C++ note: + * registerNode() and registerFinalValue() take ownership of their input nodes, + * and only return owned nodes. + * If they see a failure UErrorCode, they will delete the input node. + * If they get a NULL pointer, they will record a U_MEMORY_ALLOCATION_ERROR. + * If there is a failure, they return NULL. + * + * NULL Node pointers can be safely passed into other Nodes because + * they call the static Node::hashCode() which checks for a NULL pointer first. + * + * Therefore, as long as builder functions register a new node, + * they need to check for failures only before explicitly dereferencing + * a Node pointer, or before setting a new UErrorCode. + */ + + // Hash set of nodes, maps from nodes to integer 1. + /** @internal */ + UHashtable *nodes; + + // Do not conditionalize the following with #ifndef U_HIDE_INTERNAL_API, + // it is needed for layout of other objects. + /** + * @internal + * \cond + */ + class Node : public UObject { + public: + Node(int32_t initialHash) : hash(initialHash), offset(0) {} + inline int32_t hashCode() const { return hash; } + // Handles node==NULL. + static inline int32_t hashCode(const Node *node) { return node==NULL ? 0 : node->hashCode(); } + // Base class operator==() compares the actual class types. + virtual UBool operator==(const Node &other) const; + inline UBool operator!=(const Node &other) const { return !operator==(other); } + /** + * Traverses the Node graph and numbers branch edges, with rightmost edges first. + * This is to avoid writing a duplicate node twice. + * + * Branch nodes in this trie data structure are not symmetric. + * Most branch edges "jump" to other nodes but the rightmost branch edges + * just continue without a jump. + * Therefore, write() must write the rightmost branch edge last + * (trie units are written backwards), and must write it at that point even if + * it is a duplicate of a node previously written elsewhere. + * + * This function visits and marks right branch edges first. + * Edges are numbered with increasingly negative values because we share the + * offset field which gets positive values when nodes are written. + * A branch edge also remembers the first number for any of its edges. + * + * When a further-left branch edge has a number in the range of the rightmost + * edge's numbers, then it will be written as part of the required right edge + * and we can avoid writing it first. + * + * After root.markRightEdgesFirst(-1) the offsets of all nodes are negative + * edge numbers. + * + * @param edgeNumber The first edge number for this node and its sub-nodes. + * @return An edge number that is at least the maximum-negative + * of the input edge number and the numbers of this node and all of its sub-nodes. + */ + virtual int32_t markRightEdgesFirst(int32_t edgeNumber); + // write() must set the offset to a positive value. + virtual void write(StringTrieBuilder &builder) = 0; + // See markRightEdgesFirst. + inline void writeUnlessInsideRightEdge(int32_t firstRight, int32_t lastRight, + StringTrieBuilder &builder) { + // Note: Edge numbers are negative, lastRight<=firstRight. + // If offset>0 then this node and its sub-nodes have been written already + // and we need not write them again. + // If this node is part of the unwritten right branch edge, + // then we wait until that is written. + if(offset<0 && (offset<lastRight || firstRight<offset)) { + write(builder); + } + } + inline int32_t getOffset() const { return offset; } + protected: + int32_t hash; + int32_t offset; + }; + +#ifndef U_HIDE_INTERNAL_API + // This class should not be overridden because + // registerFinalValue() compares a stack-allocated FinalValueNode + // (stack-allocated so that we don't unnecessarily create lots of duplicate nodes) + // with the input node, and the + // !Node::operator==(other) used inside FinalValueNode::operator==(other) + // will be false if the typeid's are different. + /** @internal */ + class FinalValueNode : public Node { + public: + FinalValueNode(int32_t v) : Node(0x111111u*37u+v), value(v) {} + virtual UBool operator==(const Node &other) const; + virtual void write(StringTrieBuilder &builder); + protected: + int32_t value; + }; +#endif /* U_HIDE_INTERNAL_API */ + + // Do not conditionalize the following with #ifndef U_HIDE_INTERNAL_API, + // it is needed for layout of other objects. + /** + * @internal + */ + class ValueNode : public Node { + public: + ValueNode(int32_t initialHash) : Node(initialHash), hasValue(false), value(0) {} + virtual UBool operator==(const Node &other) const; + void setValue(int32_t v) { + hasValue=true; + value=v; + hash=hash*37u+v; + } + protected: + UBool hasValue; + int32_t value; + }; + +#ifndef U_HIDE_INTERNAL_API + /** + * @internal + */ + class IntermediateValueNode : public ValueNode { + public: + IntermediateValueNode(int32_t v, Node *nextNode) + : ValueNode(0x222222u*37u+hashCode(nextNode)), next(nextNode) { setValue(v); } + virtual UBool operator==(const Node &other) const; + virtual int32_t markRightEdgesFirst(int32_t edgeNumber); + virtual void write(StringTrieBuilder &builder); + protected: + Node *next; + }; +#endif /* U_HIDE_INTERNAL_API */ + + // Do not conditionalize the following with #ifndef U_HIDE_INTERNAL_API, + // it is needed for layout of other objects. + /** + * @internal + */ + class LinearMatchNode : public ValueNode { + public: + LinearMatchNode(int32_t len, Node *nextNode) + : ValueNode((0x333333u*37u+len)*37u+hashCode(nextNode)), + length(len), next(nextNode) {} + virtual UBool operator==(const Node &other) const; + virtual int32_t markRightEdgesFirst(int32_t edgeNumber); + protected: + int32_t length; + Node *next; + }; + +#ifndef U_HIDE_INTERNAL_API + /** + * @internal + */ + class BranchNode : public Node { + public: + BranchNode(int32_t initialHash) : Node(initialHash) {} + protected: + int32_t firstEdgeNumber; + }; + + /** + * @internal + */ + class ListBranchNode : public BranchNode { + public: + ListBranchNode() : BranchNode(0x444444), length(0) {} + virtual UBool operator==(const Node &other) const; + virtual int32_t markRightEdgesFirst(int32_t edgeNumber); + virtual void write(StringTrieBuilder &builder); + // Adds a unit with a final value. + void add(int32_t c, int32_t value) { + units[length]=(char16_t)c; + equal[length]=NULL; + values[length]=value; + ++length; + hash=(hash*37u+c)*37u+value; + } + // Adds a unit which leads to another match node. + void add(int32_t c, Node *node) { + units[length]=(char16_t)c; + equal[length]=node; + values[length]=0; + ++length; + hash=(hash*37u+c)*37u+hashCode(node); + } + protected: + Node *equal[kMaxBranchLinearSubNodeLength]; // NULL means "has final value". + int32_t length; + int32_t values[kMaxBranchLinearSubNodeLength]; + char16_t units[kMaxBranchLinearSubNodeLength]; + }; + + /** + * @internal + */ + class SplitBranchNode : public BranchNode { + public: + SplitBranchNode(char16_t middleUnit, Node *lessThanNode, Node *greaterOrEqualNode) + : BranchNode(((0x555555u*37u+middleUnit)*37u+ + hashCode(lessThanNode))*37u+hashCode(greaterOrEqualNode)), + unit(middleUnit), lessThan(lessThanNode), greaterOrEqual(greaterOrEqualNode) {} + virtual UBool operator==(const Node &other) const; + virtual int32_t markRightEdgesFirst(int32_t edgeNumber); + virtual void write(StringTrieBuilder &builder); + protected: + char16_t unit; + Node *lessThan; + Node *greaterOrEqual; + }; + + // Branch head node, for writing the actual node lead unit. + /** @internal */ + class BranchHeadNode : public ValueNode { + public: + BranchHeadNode(int32_t len, Node *subNode) + : ValueNode((0x666666u*37u+len)*37u+hashCode(subNode)), + length(len), next(subNode) {} + virtual UBool operator==(const Node &other) const; + virtual int32_t markRightEdgesFirst(int32_t edgeNumber); + virtual void write(StringTrieBuilder &builder); + protected: + int32_t length; + Node *next; // A branch sub-node. + }; + +#endif /* U_HIDE_INTERNAL_API */ + /// \endcond + + /** @internal */ + virtual Node *createLinearMatchNode(int32_t i, int32_t unitIndex, int32_t length, + Node *nextNode) const = 0; + + /** @internal */ + virtual int32_t write(int32_t unit) = 0; + /** @internal */ + virtual int32_t writeElementUnits(int32_t i, int32_t unitIndex, int32_t length) = 0; + /** @internal */ + virtual int32_t writeValueAndFinal(int32_t i, UBool isFinal) = 0; + /** @internal */ + virtual int32_t writeValueAndType(UBool hasValue, int32_t value, int32_t node) = 0; + /** @internal */ + virtual int32_t writeDeltaTo(int32_t jumpTarget) = 0; +}; + +U_NAMESPACE_END + +#endif /* U_SHOW_CPLUSPLUS_API */ + +#endif // __STRINGTRIEBUILDER_H__ diff --git a/thirdparty/icu4c/common/unicode/symtable.h b/thirdparty/icu4c/common/unicode/symtable.h new file mode 100644 index 0000000000..b64d877f97 --- /dev/null +++ b/thirdparty/icu4c/common/unicode/symtable.h @@ -0,0 +1,119 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (c) 2000-2005, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* Date Name Description +* 02/04/00 aliu Creation. +********************************************************************** +*/ +#ifndef SYMTABLE_H +#define SYMTABLE_H + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + +#include "unicode/uobject.h" + +/** + * \file + * \brief C++ API: An interface that defines both lookup protocol and parsing of + * symbolic names. + */ + +U_NAMESPACE_BEGIN + +class ParsePosition; +class UnicodeFunctor; +class UnicodeSet; +class UnicodeString; + +/** + * An interface that defines both lookup protocol and parsing of + * symbolic names. + * + * <p>A symbol table maintains two kinds of mappings. The first is + * between symbolic names and their values. For example, if the + * variable with the name "start" is set to the value "alpha" + * (perhaps, though not necessarily, through an expression such as + * "$start=alpha"), then the call lookup("start") will return the + * char[] array ['a', 'l', 'p', 'h', 'a']. + * + * <p>The second kind of mapping is between character values and + * UnicodeMatcher objects. This is used by RuleBasedTransliterator, + * which uses characters in the private use area to represent objects + * such as UnicodeSets. If U+E015 is mapped to the UnicodeSet [a-z], + * then lookupMatcher(0xE015) will return the UnicodeSet [a-z]. + * + * <p>Finally, a symbol table defines parsing behavior for symbolic + * names. All symbolic names start with the SYMBOL_REF character. + * When a parser encounters this character, it calls parseReference() + * with the position immediately following the SYMBOL_REF. The symbol + * table parses the name, if there is one, and returns it. + * + * @stable ICU 2.8 + */ +class U_COMMON_API SymbolTable /* not : public UObject because this is an interface/mixin class */ { +public: + + /** + * The character preceding a symbol reference name. + * @stable ICU 2.8 + */ + enum { SYMBOL_REF = 0x0024 /*$*/ }; + + /** + * Destructor. + * @stable ICU 2.8 + */ + virtual ~SymbolTable(); + + /** + * Lookup the characters associated with this string and return it. + * Return <tt>NULL</tt> if no such name exists. The resultant + * string may have length zero. + * @param s the symbolic name to lookup + * @return a string containing the name's value, or <tt>NULL</tt> if + * there is no mapping for s. + * @stable ICU 2.8 + */ + virtual const UnicodeString* lookup(const UnicodeString& s) const = 0; + + /** + * Lookup the UnicodeMatcher associated with the given character, and + * return it. Return <tt>NULL</tt> if not found. + * @param ch a 32-bit code point from 0 to 0x10FFFF inclusive. + * @return the UnicodeMatcher object represented by the given + * character, or NULL if there is no mapping for ch. + * @stable ICU 2.8 + */ + virtual const UnicodeFunctor* lookupMatcher(UChar32 ch) const = 0; + + /** + * Parse a symbol reference name from the given string, starting + * at the given position. If no valid symbol reference name is + * found, return the empty string and leave pos unchanged. That is, if the + * character at pos cannot start a name, or if pos is at or after + * text.length(), then return an empty string. This indicates an + * isolated SYMBOL_REF character. + * @param text the text to parse for the name + * @param pos on entry, the index of the first character to parse. + * This is the character following the SYMBOL_REF character. On + * exit, the index after the last parsed character. If the parse + * failed, pos is unchanged on exit. + * @param limit the index after the last character to be parsed. + * @return the parsed name, or an empty string if there is no + * valid symbolic name at the given position. + * @stable ICU 2.8 + */ + virtual UnicodeString parseReference(const UnicodeString& text, + ParsePosition& pos, int32_t limit) const = 0; +}; +U_NAMESPACE_END + +#endif /* U_SHOW_CPLUSPLUS_API */ + +#endif diff --git a/thirdparty/icu4c/common/unicode/ubidi.h b/thirdparty/icu4c/common/unicode/ubidi.h new file mode 100644 index 0000000000..63d0e45cb7 --- /dev/null +++ b/thirdparty/icu4c/common/unicode/ubidi.h @@ -0,0 +1,2210 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1999-2013, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* file name: ubidi.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 1999jul27 +* created by: Markus W. Scherer, updated by Matitiahu Allouche +*/ + +#ifndef UBIDI_H +#define UBIDI_H + +#include "unicode/utypes.h" +#include "unicode/uchar.h" + +#if U_SHOW_CPLUSPLUS_API +#include "unicode/localpointer.h" +#endif // U_SHOW_CPLUSPLUS_API + +/** + *\file + * \brief C API: Bidi algorithm + * + * <h2>Bidi algorithm for ICU</h2> + * + * This is an implementation of the Unicode Bidirectional Algorithm. + * The algorithm is defined in the + * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>.<p> + * + * Note: Libraries that perform a bidirectional algorithm and + * reorder strings accordingly are sometimes called "Storage Layout Engines". + * ICU's Bidi and shaping (u_shapeArabic()) APIs can be used at the core of such + * "Storage Layout Engines". + * + * <h3>General remarks about the API:</h3> + * + * In functions with an error code parameter, + * the <code>pErrorCode</code> pointer must be valid + * and the value that it points to must not indicate a failure before + * the function call. Otherwise, the function returns immediately. + * After the function call, the value indicates success or failure.<p> + * + * The "limit" of a sequence of characters is the position just after their + * last character, i.e., one more than that position.<p> + * + * Some of the API functions provide access to "runs". + * Such a "run" is defined as a sequence of characters + * that are at the same embedding level + * after performing the Bidi algorithm.<p> + * + * @author Markus W. Scherer + * @version 1.0 + * + * + * <h4> Sample code for the ICU Bidi API </h4> + * + * <h5>Rendering a paragraph with the ICU Bidi API</h5> + * + * This is (hypothetical) sample code that illustrates + * how the ICU Bidi API could be used to render a paragraph of text. + * Rendering code depends highly on the graphics system, + * therefore this sample code must make a lot of assumptions, + * which may or may not match any existing graphics system's properties. + * + * <p>The basic assumptions are:</p> + * <ul> + * <li>Rendering is done from left to right on a horizontal line.</li> + * <li>A run of single-style, unidirectional text can be rendered at once.</li> + * <li>Such a run of text is passed to the graphics system with + * characters (code units) in logical order.</li> + * <li>The line-breaking algorithm is very complicated + * and Locale-dependent - + * and therefore its implementation omitted from this sample code.</li> + * </ul> + * + * <pre> + * \code + *#include "unicode/ubidi.h" + * + *typedef enum { + * styleNormal=0, styleSelected=1, + * styleBold=2, styleItalics=4, + * styleSuper=8, styleSub=16 + *} Style; + * + *typedef struct { int32_t limit; Style style; } StyleRun; + * + *int getTextWidth(const UChar *text, int32_t start, int32_t limit, + * const StyleRun *styleRuns, int styleRunCount); + * + * // set *pLimit and *pStyleRunLimit for a line + * // from text[start] and from styleRuns[styleRunStart] + * // using ubidi_getLogicalRun(para, ...) + *void getLineBreak(const UChar *text, int32_t start, int32_t *pLimit, + * UBiDi *para, + * const StyleRun *styleRuns, int styleRunStart, int *pStyleRunLimit, + * int *pLineWidth); + * + * // render runs on a line sequentially, always from left to right + * + * // prepare rendering a new line + * void startLine(UBiDiDirection textDirection, int lineWidth); + * + * // render a run of text and advance to the right by the run width + * // the text[start..limit-1] is always in logical order + * void renderRun(const UChar *text, int32_t start, int32_t limit, + * UBiDiDirection textDirection, Style style); + * + * // We could compute a cross-product + * // from the style runs with the directional runs + * // and then reorder it. + * // Instead, here we iterate over each run type + * // and render the intersections - + * // with shortcuts in simple (and common) cases. + * // renderParagraph() is the main function. + * + * // render a directional run with + * // (possibly) multiple style runs intersecting with it + * void renderDirectionalRun(const UChar *text, + * int32_t start, int32_t limit, + * UBiDiDirection direction, + * const StyleRun *styleRuns, int styleRunCount) { + * int i; + * + * // iterate over style runs + * if(direction==UBIDI_LTR) { + * int styleLimit; + * + * for(i=0; i<styleRunCount; ++i) { + * styleLimit=styleRun[i].limit; + * if(start<styleLimit) { + * if(styleLimit>limit) { styleLimit=limit; } + * renderRun(text, start, styleLimit, + * direction, styleRun[i].style); + * if(styleLimit==limit) { break; } + * start=styleLimit; + * } + * } + * } else { + * int styleStart; + * + * for(i=styleRunCount-1; i>=0; --i) { + * if(i>0) { + * styleStart=styleRun[i-1].limit; + * } else { + * styleStart=0; + * } + * if(limit>=styleStart) { + * if(styleStart<start) { styleStart=start; } + * renderRun(text, styleStart, limit, + * direction, styleRun[i].style); + * if(styleStart==start) { break; } + * limit=styleStart; + * } + * } + * } + * } + * + * // the line object represents text[start..limit-1] + * void renderLine(UBiDi *line, const UChar *text, + * int32_t start, int32_t limit, + * const StyleRun *styleRuns, int styleRunCount) { + * UBiDiDirection direction=ubidi_getDirection(line); + * if(direction!=UBIDI_MIXED) { + * // unidirectional + * if(styleRunCount<=1) { + * renderRun(text, start, limit, direction, styleRuns[0].style); + * } else { + * renderDirectionalRun(text, start, limit, + * direction, styleRuns, styleRunCount); + * } + * } else { + * // mixed-directional + * int32_t count, i, length; + * UBiDiLevel level; + * + * count=ubidi_countRuns(para, pErrorCode); + * if(U_SUCCESS(*pErrorCode)) { + * if(styleRunCount<=1) { + * Style style=styleRuns[0].style; + * + * // iterate over directional runs + * for(i=0; i<count; ++i) { + * direction=ubidi_getVisualRun(para, i, &start, &length); + * renderRun(text, start, start+length, direction, style); + * } + * } else { + * int32_t j; + * + * // iterate over both directional and style runs + * for(i=0; i<count; ++i) { + * direction=ubidi_getVisualRun(line, i, &start, &length); + * renderDirectionalRun(text, start, start+length, + * direction, styleRuns, styleRunCount); + * } + * } + * } + * } + * } + * + *void renderParagraph(const UChar *text, int32_t length, + * UBiDiDirection textDirection, + * const StyleRun *styleRuns, int styleRunCount, + * int lineWidth, + * UErrorCode *pErrorCode) { + * UBiDi *para; + * + * if(pErrorCode==NULL || U_FAILURE(*pErrorCode) || length<=0) { + * return; + * } + * + * para=ubidi_openSized(length, 0, pErrorCode); + * if(para==NULL) { return; } + * + * ubidi_setPara(para, text, length, + * textDirection ? UBIDI_DEFAULT_RTL : UBIDI_DEFAULT_LTR, + * NULL, pErrorCode); + * if(U_SUCCESS(*pErrorCode)) { + * UBiDiLevel paraLevel=1&ubidi_getParaLevel(para); + * StyleRun styleRun={ length, styleNormal }; + * int width; + * + * if(styleRuns==NULL || styleRunCount<=0) { + * styleRunCount=1; + * styleRuns=&styleRun; + * } + * + * // assume styleRuns[styleRunCount-1].limit>=length + * + * width=getTextWidth(text, 0, length, styleRuns, styleRunCount); + * if(width<=lineWidth) { + * // everything fits onto one line + * + * // prepare rendering a new line from either left or right + * startLine(paraLevel, width); + * + * renderLine(para, text, 0, length, + * styleRuns, styleRunCount); + * } else { + * UBiDi *line; + * + * // we need to render several lines + * line=ubidi_openSized(length, 0, pErrorCode); + * if(line!=NULL) { + * int32_t start=0, limit; + * int styleRunStart=0, styleRunLimit; + * + * for(;;) { + * limit=length; + * styleRunLimit=styleRunCount; + * getLineBreak(text, start, &limit, para, + * styleRuns, styleRunStart, &styleRunLimit, + * &width); + * ubidi_setLine(para, start, limit, line, pErrorCode); + * if(U_SUCCESS(*pErrorCode)) { + * // prepare rendering a new line + * // from either left or right + * startLine(paraLevel, width); + * + * renderLine(line, text, start, limit, + * styleRuns+styleRunStart, + * styleRunLimit-styleRunStart); + * } + * if(limit==length) { break; } + * start=limit; + * styleRunStart=styleRunLimit-1; + * if(start>=styleRuns[styleRunStart].limit) { + * ++styleRunStart; + * } + * } + * + * ubidi_close(line); + * } + * } + * } + * + * ubidi_close(para); + *} + *\endcode + * </pre> + */ + +/*DOCXX_TAG*/ +/*@{*/ + +/** + * UBiDiLevel is the type of the level values in this + * Bidi implementation. + * It holds an embedding level and indicates the visual direction + * by its bit 0 (even/odd value).<p> + * + * It can also hold non-level values for the + * <code>paraLevel</code> and <code>embeddingLevels</code> + * arguments of <code>ubidi_setPara()</code>; there: + * <ul> + * <li>bit 7 of an <code>embeddingLevels[]</code> + * value indicates whether the using application is + * specifying the level of a character to <i>override</i> whatever the + * Bidi implementation would resolve it to.</li> + * <li><code>paraLevel</code> can be set to the + * pseudo-level values <code>UBIDI_DEFAULT_LTR</code> + * and <code>UBIDI_DEFAULT_RTL</code>.</li> + * </ul> + * + * @see ubidi_setPara + * + * <p>The related constants are not real, valid level values. + * <code>UBIDI_DEFAULT_XXX</code> can be used to specify + * a default for the paragraph level for + * when the <code>ubidi_setPara()</code> function + * shall determine it but there is no + * strongly typed character in the input.<p> + * + * Note that the value for <code>UBIDI_DEFAULT_LTR</code> is even + * and the one for <code>UBIDI_DEFAULT_RTL</code> is odd, + * just like with normal LTR and RTL level values - + * these special values are designed that way. Also, the implementation + * assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd. + * + * Note: The numeric values of the related constants will not change: + * They are tied to the use of 7-bit byte values (plus the override bit) + * and of the UBiDiLevel=uint8_t data type in this API. + * + * @see UBIDI_DEFAULT_LTR + * @see UBIDI_DEFAULT_RTL + * @see UBIDI_LEVEL_OVERRIDE + * @see UBIDI_MAX_EXPLICIT_LEVEL + * @stable ICU 2.0 + */ +typedef uint8_t UBiDiLevel; + +/** Paragraph level setting.<p> + * + * Constant indicating that the base direction depends on the first strong + * directional character in the text according to the Unicode Bidirectional + * Algorithm. If no strong directional character is present, + * then set the paragraph level to 0 (left-to-right).<p> + * + * If this value is used in conjunction with reordering modes + * <code>UBIDI_REORDER_INVERSE_LIKE_DIRECT</code> or + * <code>UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the text to reorder + * is assumed to be visual LTR, and the text after reordering is required + * to be the corresponding logical string with appropriate contextual + * direction. The direction of the result string will be RTL if either + * the righmost or leftmost strong character of the source text is RTL + * or Arabic Letter, the direction will be LTR otherwise.<p> + * + * If reordering option <code>UBIDI_OPTION_INSERT_MARKS</code> is set, an RLM may + * be added at the beginning of the result string to ensure round trip + * (that the result string, when reordered back to visual, will produce + * the original source text). + * @see UBIDI_REORDER_INVERSE_LIKE_DIRECT + * @see UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL + * @stable ICU 2.0 + */ +#define UBIDI_DEFAULT_LTR 0xfe + +/** Paragraph level setting.<p> + * + * Constant indicating that the base direction depends on the first strong + * directional character in the text according to the Unicode Bidirectional + * Algorithm. If no strong directional character is present, + * then set the paragraph level to 1 (right-to-left).<p> + * + * If this value is used in conjunction with reordering modes + * <code>UBIDI_REORDER_INVERSE_LIKE_DIRECT</code> or + * <code>UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the text to reorder + * is assumed to be visual LTR, and the text after reordering is required + * to be the corresponding logical string with appropriate contextual + * direction. The direction of the result string will be RTL if either + * the righmost or leftmost strong character of the source text is RTL + * or Arabic Letter, or if the text contains no strong character; + * the direction will be LTR otherwise.<p> + * + * If reordering option <code>UBIDI_OPTION_INSERT_MARKS</code> is set, an RLM may + * be added at the beginning of the result string to ensure round trip + * (that the result string, when reordered back to visual, will produce + * the original source text). + * @see UBIDI_REORDER_INVERSE_LIKE_DIRECT + * @see UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL + * @stable ICU 2.0 + */ +#define UBIDI_DEFAULT_RTL 0xff + +/** + * Maximum explicit embedding level. + * Same as the max_depth value in the + * <a href="http://www.unicode.org/reports/tr9/#BD2">Unicode Bidirectional Algorithm</a>. + * (The maximum resolved level can be up to <code>UBIDI_MAX_EXPLICIT_LEVEL+1</code>). + * @stable ICU 2.0 + */ +#define UBIDI_MAX_EXPLICIT_LEVEL 125 + +/** Bit flag for level input. + * Overrides directional properties. + * @stable ICU 2.0 + */ +#define UBIDI_LEVEL_OVERRIDE 0x80 + +/** + * Special value which can be returned by the mapping functions when a logical + * index has no corresponding visual index or vice-versa. This may happen + * for the logical-to-visual mapping of a Bidi control when option + * <code>#UBIDI_OPTION_REMOVE_CONTROLS</code> is specified. This can also happen + * for the visual-to-logical mapping of a Bidi mark (LRM or RLM) inserted + * by option <code>#UBIDI_OPTION_INSERT_MARKS</code>. + * @see ubidi_getVisualIndex + * @see ubidi_getVisualMap + * @see ubidi_getLogicalIndex + * @see ubidi_getLogicalMap + * @stable ICU 3.6 + */ +#define UBIDI_MAP_NOWHERE (-1) + +/** + * <code>UBiDiDirection</code> values indicate the text direction. + * @stable ICU 2.0 + */ +enum UBiDiDirection { + /** Left-to-right text. This is a 0 value. + * <ul> + * <li>As return value for <code>ubidi_getDirection()</code>, it means + * that the source string contains no right-to-left characters, or + * that the source string is empty and the paragraph level is even. + * <li> As return value for <code>ubidi_getBaseDirection()</code>, it + * means that the first strong character of the source string has + * a left-to-right direction. + * </ul> + * @stable ICU 2.0 + */ + UBIDI_LTR, + /** Right-to-left text. This is a 1 value. + * <ul> + * <li>As return value for <code>ubidi_getDirection()</code>, it means + * that the source string contains no left-to-right characters, or + * that the source string is empty and the paragraph level is odd. + * <li> As return value for <code>ubidi_getBaseDirection()</code>, it + * means that the first strong character of the source string has + * a right-to-left direction. + * </ul> + * @stable ICU 2.0 + */ + UBIDI_RTL, + /** Mixed-directional text. + * <p>As return value for <code>ubidi_getDirection()</code>, it means + * that the source string contains both left-to-right and + * right-to-left characters. + * @stable ICU 2.0 + */ + UBIDI_MIXED, + /** No strongly directional text. + * <p>As return value for <code>ubidi_getBaseDirection()</code>, it means + * that the source string is missing or empty, or contains neither left-to-right + * nor right-to-left characters. + * @stable ICU 4.6 + */ + UBIDI_NEUTRAL +}; + +/** @stable ICU 2.0 */ +typedef enum UBiDiDirection UBiDiDirection; + +/** + * Forward declaration of the <code>UBiDi</code> structure for the declaration of + * the API functions. Its fields are implementation-specific.<p> + * This structure holds information about a paragraph (or multiple paragraphs) + * of text with Bidi-algorithm-related details, or about one line of + * such a paragraph.<p> + * Reordering can be done on a line, or on one or more paragraphs which are + * then interpreted each as one single line. + * @stable ICU 2.0 + */ +struct UBiDi; + +/** @stable ICU 2.0 */ +typedef struct UBiDi UBiDi; + +/** + * Allocate a <code>UBiDi</code> structure. + * Such an object is initially empty. It is assigned + * the Bidi properties of a piece of text containing one or more paragraphs + * by <code>ubidi_setPara()</code> + * or the Bidi properties of a line within a paragraph by + * <code>ubidi_setLine()</code>.<p> + * This object can be reused for as long as it is not deallocated + * by calling <code>ubidi_close()</code>.<p> + * <code>ubidi_setPara()</code> and <code>ubidi_setLine()</code> will allocate + * additional memory for internal structures as necessary. + * + * @return An empty <code>UBiDi</code> object. + * @stable ICU 2.0 + */ +U_CAPI UBiDi * U_EXPORT2 +ubidi_open(void); + +/** + * Allocate a <code>UBiDi</code> structure with preallocated memory + * for internal structures. + * This function provides a <code>UBiDi</code> object like <code>ubidi_open()</code> + * with no arguments, but it also preallocates memory for internal structures + * according to the sizings supplied by the caller.<p> + * Subsequent functions will not allocate any more memory, and are thus + * guaranteed not to fail because of lack of memory.<p> + * The preallocation can be limited to some of the internal memory + * by setting some values to 0 here. That means that if, e.g., + * <code>maxRunCount</code> cannot be reasonably predetermined and should not + * be set to <code>maxLength</code> (the only failproof value) to avoid + * wasting memory, then <code>maxRunCount</code> could be set to 0 here + * and the internal structures that are associated with it will be allocated + * on demand, just like with <code>ubidi_open()</code>. + * + * @param maxLength is the maximum text or line length that internal memory + * will be preallocated for. An attempt to associate this object with a + * longer text will fail, unless this value is 0, which leaves the allocation + * up to the implementation. + * + * @param maxRunCount is the maximum anticipated number of same-level runs + * that internal memory will be preallocated for. An attempt to access + * visual runs on an object that was not preallocated for as many runs + * as the text was actually resolved to will fail, + * unless this value is 0, which leaves the allocation up to the implementation.<br><br> + * The number of runs depends on the actual text and maybe anywhere between + * 1 and <code>maxLength</code>. It is typically small. + * + * @param pErrorCode must be a valid pointer to an error code value. + * + * @return An empty <code>UBiDi</code> object with preallocated memory. + * @stable ICU 2.0 + */ +U_CAPI UBiDi * U_EXPORT2 +ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode); + +/** + * <code>ubidi_close()</code> must be called to free the memory + * associated with a UBiDi object.<p> + * + * <strong>Important: </strong> + * A parent <code>UBiDi</code> object must not be destroyed or reused if + * it still has children. + * If a <code>UBiDi</code> object has become the <i>child</i> + * of another one (its <i>parent</i>) by calling + * <code>ubidi_setLine()</code>, then the child object must + * be destroyed (closed) or reused (by calling + * <code>ubidi_setPara()</code> or <code>ubidi_setLine()</code>) + * before the parent object. + * + * @param pBiDi is a <code>UBiDi</code> object. + * + * @see ubidi_setPara + * @see ubidi_setLine + * @stable ICU 2.0 + */ +U_CAPI void U_EXPORT2 +ubidi_close(UBiDi *pBiDi); + +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN + +/** + * \class LocalUBiDiPointer + * "Smart pointer" class, closes a UBiDi via ubidi_close(). + * For most methods see the LocalPointerBase base class. + * + * @see LocalPointerBase + * @see LocalPointer + * @stable ICU 4.4 + */ +U_DEFINE_LOCAL_OPEN_POINTER(LocalUBiDiPointer, UBiDi, ubidi_close); + +U_NAMESPACE_END + +#endif + +/** + * Modify the operation of the Bidi algorithm such that it + * approximates an "inverse Bidi" algorithm. This function + * must be called before <code>ubidi_setPara()</code>. + * + * <p>The normal operation of the Bidi algorithm as described + * in the Unicode Technical Report is to take text stored in logical + * (keyboard, typing) order and to determine the reordering of it for visual + * rendering. + * Some legacy systems store text in visual order, and for operations + * with standard, Unicode-based algorithms, the text needs to be transformed + * to logical order. This is effectively the inverse algorithm of the + * described Bidi algorithm. Note that there is no standard algorithm for + * this "inverse Bidi" and that the current implementation provides only an + * approximation of "inverse Bidi".</p> + * + * <p>With <code>isInverse</code> set to <code>true</code>, + * this function changes the behavior of some of the subsequent functions + * in a way that they can be used for the inverse Bidi algorithm. + * Specifically, runs of text with numeric characters will be treated in a + * special way and may need to be surrounded with LRM characters when they are + * written in reordered sequence.</p> + * + * <p>Output runs should be retrieved using <code>ubidi_getVisualRun()</code>. + * Since the actual input for "inverse Bidi" is visually ordered text and + * <code>ubidi_getVisualRun()</code> gets the reordered runs, these are actually + * the runs of the logically ordered output.</p> + * + * <p>Calling this function with argument <code>isInverse</code> set to + * <code>true</code> is equivalent to calling + * <code>ubidi_setReorderingMode</code> with argument + * <code>reorderingMode</code> + * set to <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>.<br> + * Calling this function with argument <code>isInverse</code> set to + * <code>false</code> is equivalent to calling + * <code>ubidi_setReorderingMode</code> with argument + * <code>reorderingMode</code> + * set to <code>#UBIDI_REORDER_DEFAULT</code>. + * + * @param pBiDi is a <code>UBiDi</code> object. + * + * @param isInverse specifies "forward" or "inverse" Bidi operation. + * + * @see ubidi_setPara + * @see ubidi_writeReordered + * @see ubidi_setReorderingMode + * @stable ICU 2.0 + */ +U_CAPI void U_EXPORT2 +ubidi_setInverse(UBiDi *pBiDi, UBool isInverse); + +/** + * Is this Bidi object set to perform the inverse Bidi algorithm? + * <p>Note: calling this function after setting the reordering mode with + * <code>ubidi_setReorderingMode</code> will return <code>true</code> if the + * reordering mode was set to <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>, + * <code>false</code> for all other values.</p> + * + * @param pBiDi is a <code>UBiDi</code> object. + * @return true if the Bidi object is set to perform the inverse Bidi algorithm + * by handling numbers as L. + * + * @see ubidi_setInverse + * @see ubidi_setReorderingMode + * @stable ICU 2.0 + */ + +U_CAPI UBool U_EXPORT2 +ubidi_isInverse(UBiDi *pBiDi); + +/** + * Specify whether block separators must be allocated level zero, + * so that successive paragraphs will progress from left to right. + * This function must be called before <code>ubidi_setPara()</code>. + * Paragraph separators (B) may appear in the text. Setting them to level zero + * means that all paragraph separators (including one possibly appearing + * in the last text position) are kept in the reordered text after the text + * that they follow in the source text. + * When this feature is not enabled, a paragraph separator at the last + * position of the text before reordering will go to the first position + * of the reordered text when the paragraph level is odd. + * + * @param pBiDi is a <code>UBiDi</code> object. + * + * @param orderParagraphsLTR specifies whether paragraph separators (B) must + * receive level 0, so that successive paragraphs progress from left to right. + * + * @see ubidi_setPara + * @stable ICU 3.4 + */ +U_CAPI void U_EXPORT2 +ubidi_orderParagraphsLTR(UBiDi *pBiDi, UBool orderParagraphsLTR); + +/** + * Is this Bidi object set to allocate level 0 to block separators so that + * successive paragraphs progress from left to right? + * + * @param pBiDi is a <code>UBiDi</code> object. + * @return true if the Bidi object is set to allocate level 0 to block + * separators. + * + * @see ubidi_orderParagraphsLTR + * @stable ICU 3.4 + */ +U_CAPI UBool U_EXPORT2 +ubidi_isOrderParagraphsLTR(UBiDi *pBiDi); + +/** + * <code>UBiDiReorderingMode</code> values indicate which variant of the Bidi + * algorithm to use. + * + * @see ubidi_setReorderingMode + * @stable ICU 3.6 + */ +typedef enum UBiDiReorderingMode { + /** Regular Logical to Visual Bidi algorithm according to Unicode. + * This is a 0 value. + * @stable ICU 3.6 */ + UBIDI_REORDER_DEFAULT = 0, + /** Logical to Visual algorithm which handles numbers in a way which + * mimics the behavior of Windows XP. + * @stable ICU 3.6 */ + UBIDI_REORDER_NUMBERS_SPECIAL, + /** Logical to Visual algorithm grouping numbers with adjacent R characters + * (reversible algorithm). + * @stable ICU 3.6 */ + UBIDI_REORDER_GROUP_NUMBERS_WITH_R, + /** Reorder runs only to transform a Logical LTR string to the Logical RTL + * string with the same display, or vice-versa.<br> + * If this mode is set together with option + * <code>#UBIDI_OPTION_INSERT_MARKS</code>, some Bidi controls in the source + * text may be removed and other controls may be added to produce the + * minimum combination which has the required display. + * @stable ICU 3.6 */ + UBIDI_REORDER_RUNS_ONLY, + /** Visual to Logical algorithm which handles numbers like L + * (same algorithm as selected by <code>ubidi_setInverse(true)</code>. + * @see ubidi_setInverse + * @stable ICU 3.6 */ + UBIDI_REORDER_INVERSE_NUMBERS_AS_L, + /** Visual to Logical algorithm equivalent to the regular Logical to Visual + * algorithm. + * @stable ICU 3.6 */ + UBIDI_REORDER_INVERSE_LIKE_DIRECT, + /** Inverse Bidi (Visual to Logical) algorithm for the + * <code>UBIDI_REORDER_NUMBERS_SPECIAL</code> Bidi algorithm. + * @stable ICU 3.6 */ + UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL, +#ifndef U_HIDE_DEPRECATED_API + /** + * Number of values for reordering mode. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + UBIDI_REORDER_COUNT +#endif // U_HIDE_DEPRECATED_API +} UBiDiReorderingMode; + +/** + * Modify the operation of the Bidi algorithm such that it implements some + * variant to the basic Bidi algorithm or approximates an "inverse Bidi" + * algorithm, depending on different values of the "reordering mode". + * This function must be called before <code>ubidi_setPara()</code>, and stays + * in effect until called again with a different argument. + * + * <p>The normal operation of the Bidi algorithm as described + * in the Unicode Standard Annex #9 is to take text stored in logical + * (keyboard, typing) order and to determine how to reorder it for visual + * rendering.</p> + * + * <p>With the reordering mode set to a value other than + * <code>#UBIDI_REORDER_DEFAULT</code>, this function changes the behavior of + * some of the subsequent functions in a way such that they implement an + * inverse Bidi algorithm or some other algorithm variants.</p> + * + * <p>Some legacy systems store text in visual order, and for operations + * with standard, Unicode-based algorithms, the text needs to be transformed + * into logical order. This is effectively the inverse algorithm of the + * described Bidi algorithm. Note that there is no standard algorithm for + * this "inverse Bidi", so a number of variants are implemented here.</p> + * + * <p>In other cases, it may be desirable to emulate some variant of the + * Logical to Visual algorithm (e.g. one used in MS Windows), or perform a + * Logical to Logical transformation.</p> + * + * <ul> + * <li>When the reordering mode is set to <code>#UBIDI_REORDER_DEFAULT</code>, + * the standard Bidi Logical to Visual algorithm is applied.</li> + * + * <li>When the reordering mode is set to + * <code>#UBIDI_REORDER_NUMBERS_SPECIAL</code>, + * the algorithm used to perform Bidi transformations when calling + * <code>ubidi_setPara</code> should approximate the algorithm used in + * Microsoft Windows XP rather than strictly conform to the Unicode Bidi + * algorithm. + * <br> + * The differences between the basic algorithm and the algorithm addressed + * by this option are as follows: + * <ul> + * <li>Within text at an even embedding level, the sequence "123AB" + * (where AB represent R or AL letters) is transformed to "123BA" by the + * Unicode algorithm and to "BA123" by the Windows algorithm.</li> + * <li>Arabic-Indic numbers (AN) are handled by the Windows algorithm just + * like regular numbers (EN).</li> + * </ul></li> + * + * <li>When the reordering mode is set to + * <code>#UBIDI_REORDER_GROUP_NUMBERS_WITH_R</code>, + * numbers located between LTR text and RTL text are associated with the RTL + * text. For instance, an LTR paragraph with content "abc 123 DEF" (where + * upper case letters represent RTL characters) will be transformed to + * "abc FED 123" (and not "abc 123 FED"), "DEF 123 abc" will be transformed + * to "123 FED abc" and "123 FED abc" will be transformed to "DEF 123 abc". + * This makes the algorithm reversible and makes it useful when round trip + * (from visual to logical and back to visual) must be achieved without + * adding LRM characters. However, this is a variation from the standard + * Unicode Bidi algorithm.<br> + * The source text should not contain Bidi control characters other than LRM + * or RLM.</li> + * + * <li>When the reordering mode is set to + * <code>#UBIDI_REORDER_RUNS_ONLY</code>, + * a "Logical to Logical" transformation must be performed: + * <ul> + * <li>If the default text level of the source text (argument <code>paraLevel</code> + * in <code>ubidi_setPara</code>) is even, the source text will be handled as + * LTR logical text and will be transformed to the RTL logical text which has + * the same LTR visual display.</li> + * <li>If the default level of the source text is odd, the source text + * will be handled as RTL logical text and will be transformed to the + * LTR logical text which has the same LTR visual display.</li> + * </ul> + * This mode may be needed when logical text which is basically Arabic or + * Hebrew, with possible included numbers or phrases in English, has to be + * displayed as if it had an even embedding level (this can happen if the + * displaying application treats all text as if it was basically LTR). + * <br> + * This mode may also be needed in the reverse case, when logical text which is + * basically English, with possible included phrases in Arabic or Hebrew, has to + * be displayed as if it had an odd embedding level. + * <br> + * Both cases could be handled by adding LRE or RLE at the head of the text, + * if the display subsystem supports these formatting controls. If it does not, + * the problem may be handled by transforming the source text in this mode + * before displaying it, so that it will be displayed properly.<br> + * The source text should not contain Bidi control characters other than LRM + * or RLM.</li> + * + * <li>When the reordering mode is set to + * <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>, an "inverse Bidi" algorithm + * is applied. + * Runs of text with numeric characters will be treated like LTR letters and + * may need to be surrounded with LRM characters when they are written in + * reordered sequence (the option <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code> can + * be used with function <code>ubidi_writeReordered</code> to this end. This + * mode is equivalent to calling <code>ubidi_setInverse()</code> with + * argument <code>isInverse</code> set to <code>true</code>.</li> + * + * <li>When the reordering mode is set to + * <code>#UBIDI_REORDER_INVERSE_LIKE_DIRECT</code>, the "direct" Logical to Visual + * Bidi algorithm is used as an approximation of an "inverse Bidi" algorithm. + * This mode is similar to mode <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code> + * but is closer to the regular Bidi algorithm. + * <br> + * For example, an LTR paragraph with the content "FED 123 456 CBA" (where + * upper case represents RTL characters) will be transformed to + * "ABC 456 123 DEF", as opposed to "DEF 123 456 ABC" + * with mode <code>UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>.<br> + * When used in conjunction with option + * <code>#UBIDI_OPTION_INSERT_MARKS</code>, this mode generally + * adds Bidi marks to the output significantly more sparingly than mode + * <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code> with option + * <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code> in calls to + * <code>ubidi_writeReordered</code>.</li> + * + * <li>When the reordering mode is set to + * <code>#UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the Logical to Visual + * Bidi algorithm used in Windows XP is used as an approximation of an "inverse Bidi" algorithm. + * <br> + * For example, an LTR paragraph with the content "abc FED123" (where + * upper case represents RTL characters) will be transformed to "abc 123DEF."</li> + * </ul> + * + * <p>In all the reordering modes specifying an "inverse Bidi" algorithm + * (i.e. those with a name starting with <code>UBIDI_REORDER_INVERSE</code>), + * output runs should be retrieved using + * <code>ubidi_getVisualRun()</code>, and the output text with + * <code>ubidi_writeReordered()</code>. The caller should keep in mind that in + * "inverse Bidi" modes the input is actually visually ordered text and + * reordered output returned by <code>ubidi_getVisualRun()</code> or + * <code>ubidi_writeReordered()</code> are actually runs or character string + * of logically ordered output.<br> + * For all the "inverse Bidi" modes, the source text should not contain + * Bidi control characters other than LRM or RLM.</p> + * + * <p>Note that option <code>#UBIDI_OUTPUT_REVERSE</code> of + * <code>ubidi_writeReordered</code> has no useful meaning and should not be + * used in conjunction with any value of the reordering mode specifying + * "inverse Bidi" or with value <code>UBIDI_REORDER_RUNS_ONLY</code>. + * + * @param pBiDi is a <code>UBiDi</code> object. + * @param reorderingMode specifies the required variant of the Bidi algorithm. + * + * @see UBiDiReorderingMode + * @see ubidi_setInverse + * @see ubidi_setPara + * @see ubidi_writeReordered + * @stable ICU 3.6 + */ +U_CAPI void U_EXPORT2 +ubidi_setReorderingMode(UBiDi *pBiDi, UBiDiReorderingMode reorderingMode); + +/** + * What is the requested reordering mode for a given Bidi object? + * + * @param pBiDi is a <code>UBiDi</code> object. + * @return the current reordering mode of the Bidi object + * @see ubidi_setReorderingMode + * @stable ICU 3.6 + */ +U_CAPI UBiDiReorderingMode U_EXPORT2 +ubidi_getReorderingMode(UBiDi *pBiDi); + +/** + * <code>UBiDiReorderingOption</code> values indicate which options are + * specified to affect the Bidi algorithm. + * + * @see ubidi_setReorderingOptions + * @stable ICU 3.6 + */ +typedef enum UBiDiReorderingOption { + /** + * option value for <code>ubidi_setReorderingOptions</code>: + * disable all the options which can be set with this function + * @see ubidi_setReorderingOptions + * @stable ICU 3.6 + */ + UBIDI_OPTION_DEFAULT = 0, + + /** + * option bit for <code>ubidi_setReorderingOptions</code>: + * insert Bidi marks (LRM or RLM) when needed to ensure correct result of + * a reordering to a Logical order + * + * <p>This option must be set or reset before calling + * <code>ubidi_setPara</code>.</p> + * + * <p>This option is significant only with reordering modes which generate + * a result with Logical order, specifically:</p> + * <ul> + * <li><code>#UBIDI_REORDER_RUNS_ONLY</code></li> + * <li><code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code></li> + * <li><code>#UBIDI_REORDER_INVERSE_LIKE_DIRECT</code></li> + * <li><code>#UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code></li> + * </ul> + * + * <p>If this option is set in conjunction with reordering mode + * <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code> or with calling + * <code>ubidi_setInverse(true)</code>, it implies + * option <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code> + * in calls to function <code>ubidi_writeReordered()</code>.</p> + * + * <p>For other reordering modes, a minimum number of LRM or RLM characters + * will be added to the source text after reordering it so as to ensure + * round trip, i.e. when applying the inverse reordering mode on the + * resulting logical text with removal of Bidi marks + * (option <code>#UBIDI_OPTION_REMOVE_CONTROLS</code> set before calling + * <code>ubidi_setPara()</code> or option <code>#UBIDI_REMOVE_BIDI_CONTROLS</code> + * in <code>ubidi_writeReordered</code>), the result will be identical to the + * source text in the first transformation. + * + * <p>This option will be ignored if specified together with option + * <code>#UBIDI_OPTION_REMOVE_CONTROLS</code>. It inhibits option + * <code>UBIDI_REMOVE_BIDI_CONTROLS</code> in calls to function + * <code>ubidi_writeReordered()</code> and it implies option + * <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code> in calls to function + * <code>ubidi_writeReordered()</code> if the reordering mode is + * <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>.</p> + * + * @see ubidi_setReorderingMode + * @see ubidi_setReorderingOptions + * @stable ICU 3.6 + */ + UBIDI_OPTION_INSERT_MARKS = 1, + + /** + * option bit for <code>ubidi_setReorderingOptions</code>: + * remove Bidi control characters + * + * <p>This option must be set or reset before calling + * <code>ubidi_setPara</code>.</p> + * + * <p>This option nullifies option <code>#UBIDI_OPTION_INSERT_MARKS</code>. + * It inhibits option <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code> in calls + * to function <code>ubidi_writeReordered()</code> and it implies option + * <code>#UBIDI_REMOVE_BIDI_CONTROLS</code> in calls to that function.</p> + * + * @see ubidi_setReorderingMode + * @see ubidi_setReorderingOptions + * @stable ICU 3.6 + */ + UBIDI_OPTION_REMOVE_CONTROLS = 2, + + /** + * option bit for <code>ubidi_setReorderingOptions</code>: + * process the output as part of a stream to be continued + * + * <p>This option must be set or reset before calling + * <code>ubidi_setPara</code>.</p> + * + * <p>This option specifies that the caller is interested in processing large + * text object in parts. + * The results of the successive calls are expected to be concatenated by the + * caller. Only the call for the last part will have this option bit off.</p> + * + * <p>When this option bit is on, <code>ubidi_setPara()</code> may process + * less than the full source text in order to truncate the text at a meaningful + * boundary. The caller should call <code>ubidi_getProcessedLength()</code> + * immediately after calling <code>ubidi_setPara()</code> in order to + * determine how much of the source text has been processed. + * Source text beyond that length should be resubmitted in following calls to + * <code>ubidi_setPara</code>. The processed length may be less than + * the length of the source text if a character preceding the last character of + * the source text constitutes a reasonable boundary (like a block separator) + * for text to be continued.<br> + * If the last character of the source text constitutes a reasonable + * boundary, the whole text will be processed at once.<br> + * If nowhere in the source text there exists + * such a reasonable boundary, the processed length will be zero.<br> + * The caller should check for such an occurrence and do one of the following: + * <ul><li>submit a larger amount of text with a better chance to include + * a reasonable boundary.</li> + * <li>resubmit the same text after turning off option + * <code>UBIDI_OPTION_STREAMING</code>.</li></ul> + * In all cases, this option should be turned off before processing the last + * part of the text.</p> + * + * <p>When the <code>UBIDI_OPTION_STREAMING</code> option is used, + * it is recommended to call <code>ubidi_orderParagraphsLTR()</code> with + * argument <code>orderParagraphsLTR</code> set to <code>true</code> before + * calling <code>ubidi_setPara</code> so that later paragraphs may be + * concatenated to previous paragraphs on the right.</p> + * + * @see ubidi_setReorderingMode + * @see ubidi_setReorderingOptions + * @see ubidi_getProcessedLength + * @see ubidi_orderParagraphsLTR + * @stable ICU 3.6 + */ + UBIDI_OPTION_STREAMING = 4 +} UBiDiReorderingOption; + +/** + * Specify which of the reordering options + * should be applied during Bidi transformations. + * + * @param pBiDi is a <code>UBiDi</code> object. + * @param reorderingOptions is a combination of zero or more of the following + * options: + * <code>#UBIDI_OPTION_DEFAULT</code>, <code>#UBIDI_OPTION_INSERT_MARKS</code>, + * <code>#UBIDI_OPTION_REMOVE_CONTROLS</code>, <code>#UBIDI_OPTION_STREAMING</code>. + * + * @see ubidi_getReorderingOptions + * @stable ICU 3.6 + */ +U_CAPI void U_EXPORT2 +ubidi_setReorderingOptions(UBiDi *pBiDi, uint32_t reorderingOptions); + +/** + * What are the reordering options applied to a given Bidi object? + * + * @param pBiDi is a <code>UBiDi</code> object. + * @return the current reordering options of the Bidi object + * @see ubidi_setReorderingOptions + * @stable ICU 3.6 + */ +U_CAPI uint32_t U_EXPORT2 +ubidi_getReorderingOptions(UBiDi *pBiDi); + +/** + * Set the context before a call to ubidi_setPara().<p> + * + * ubidi_setPara() computes the left-right directionality for a given piece + * of text which is supplied as one of its arguments. Sometimes this piece + * of text (the "main text") should be considered in context, because text + * appearing before ("prologue") and/or after ("epilogue") the main text + * may affect the result of this computation.<p> + * + * This function specifies the prologue and/or the epilogue for the next + * call to ubidi_setPara(). The characters specified as prologue and + * epilogue should not be modified by the calling program until the call + * to ubidi_setPara() has returned. If successive calls to ubidi_setPara() + * all need specification of a context, ubidi_setContext() must be called + * before each call to ubidi_setPara(). In other words, a context is not + * "remembered" after the following successful call to ubidi_setPara().<p> + * + * If a call to ubidi_setPara() specifies UBIDI_DEFAULT_LTR or + * UBIDI_DEFAULT_RTL as paraLevel and is preceded by a call to + * ubidi_setContext() which specifies a prologue, the paragraph level will + * be computed taking in consideration the text in the prologue.<p> + * + * When ubidi_setPara() is called without a previous call to + * ubidi_setContext, the main text is handled as if preceded and followed + * by strong directional characters at the current paragraph level. + * Calling ubidi_setContext() with specification of a prologue will change + * this behavior by handling the main text as if preceded by the last + * strong character appearing in the prologue, if any. + * Calling ubidi_setContext() with specification of an epilogue will change + * the behavior of ubidi_setPara() by handling the main text as if followed + * by the first strong character or digit appearing in the epilogue, if any.<p> + * + * Note 1: if <code>ubidi_setContext</code> is called repeatedly without + * calling <code>ubidi_setPara</code>, the earlier calls have no effect, + * only the last call will be remembered for the next call to + * <code>ubidi_setPara</code>.<p> + * + * Note 2: calling <code>ubidi_setContext(pBiDi, NULL, 0, NULL, 0, &errorCode)</code> + * cancels any previous setting of non-empty prologue or epilogue. + * The next call to <code>ubidi_setPara()</code> will process no + * prologue or epilogue.<p> + * + * Note 3: users must be aware that even after setting the context + * before a call to ubidi_setPara() to perform e.g. a logical to visual + * transformation, the resulting string may not be identical to what it + * would have been if all the text, including prologue and epilogue, had + * been processed together.<br> + * Example (upper case letters represent RTL characters):<br> + * prologue = "<code>abc DE</code>"<br> + * epilogue = none<br> + * main text = "<code>FGH xyz</code>"<br> + * paraLevel = UBIDI_LTR<br> + * display without prologue = "<code>HGF xyz</code>" + * ("HGF" is adjacent to "xyz")<br> + * display with prologue = "<code>abc HGFED xyz</code>" + * ("HGF" is not adjacent to "xyz")<br> + * + * @param pBiDi is a paragraph <code>UBiDi</code> object. + * + * @param prologue is a pointer to the text which precedes the text that + * will be specified in a coming call to ubidi_setPara(). + * If there is no prologue to consider, then <code>proLength</code> + * must be zero and this pointer can be NULL. + * + * @param proLength is the length of the prologue; if <code>proLength==-1</code> + * then the prologue must be zero-terminated. + * Otherwise proLength must be >= 0. If <code>proLength==0</code>, it means + * that there is no prologue to consider. + * + * @param epilogue is a pointer to the text which follows the text that + * will be specified in a coming call to ubidi_setPara(). + * If there is no epilogue to consider, then <code>epiLength</code> + * must be zero and this pointer can be NULL. + * + * @param epiLength is the length of the epilogue; if <code>epiLength==-1</code> + * then the epilogue must be zero-terminated. + * Otherwise epiLength must be >= 0. If <code>epiLength==0</code>, it means + * that there is no epilogue to consider. + * + * @param pErrorCode must be a valid pointer to an error code value. + * + * @see ubidi_setPara + * @stable ICU 4.8 + */ +U_CAPI void U_EXPORT2 +ubidi_setContext(UBiDi *pBiDi, + const UChar *prologue, int32_t proLength, + const UChar *epilogue, int32_t epiLength, + UErrorCode *pErrorCode); + +/** + * Perform the Unicode Bidi algorithm. It is defined in the + * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>, + * version 13, + * also described in The Unicode Standard, Version 4.0 .<p> + * + * This function takes a piece of plain text containing one or more paragraphs, + * with or without externally specified embedding levels from <i>styled</i> + * text and computes the left-right-directionality of each character.<p> + * + * If the entire text is all of the same directionality, then + * the function may not perform all the steps described by the algorithm, + * i.e., some levels may not be the same as if all steps were performed. + * This is not relevant for unidirectional text.<br> + * For example, in pure LTR text with numbers the numbers would get + * a resolved level of 2 higher than the surrounding text according to + * the algorithm. This implementation may set all resolved levels to + * the same value in such a case.<p> + * + * The text can be composed of multiple paragraphs. Occurrence of a block + * separator in the text terminates a paragraph, and whatever comes next starts + * a new paragraph. The exception to this rule is when a Carriage Return (CR) + * is followed by a Line Feed (LF). Both CR and LF are block separators, but + * in that case, the pair of characters is considered as terminating the + * preceding paragraph, and a new paragraph will be started by a character + * coming after the LF. + * + * @param pBiDi A <code>UBiDi</code> object allocated with <code>ubidi_open()</code> + * which will be set to contain the reordering information, + * especially the resolved levels for all the characters in <code>text</code>. + * + * @param text is a pointer to the text that the Bidi algorithm will be performed on. + * This pointer is stored in the UBiDi object and can be retrieved + * with <code>ubidi_getText()</code>.<br> + * <strong>Note:</strong> the text must be (at least) <code>length</code> long. + * + * @param length is the length of the text; if <code>length==-1</code> then + * the text must be zero-terminated. + * + * @param paraLevel specifies the default level for the text; + * it is typically 0 (LTR) or 1 (RTL). + * If the function shall determine the paragraph level from the text, + * then <code>paraLevel</code> can be set to + * either <code>#UBIDI_DEFAULT_LTR</code> + * or <code>#UBIDI_DEFAULT_RTL</code>; if the text contains multiple + * paragraphs, the paragraph level shall be determined separately for + * each paragraph; if a paragraph does not include any strongly typed + * character, then the desired default is used (0 for LTR or 1 for RTL). + * Any other value between 0 and <code>#UBIDI_MAX_EXPLICIT_LEVEL</code> + * is also valid, with odd levels indicating RTL. + * + * @param embeddingLevels (in) may be used to preset the embedding and override levels, + * ignoring characters like LRE and PDF in the text. + * A level overrides the directional property of its corresponding + * (same index) character if the level has the + * <code>#UBIDI_LEVEL_OVERRIDE</code> bit set.<br><br> + * Aside from that bit, it must be + * <code>paraLevel<=embeddingLevels[]<=UBIDI_MAX_EXPLICIT_LEVEL</code>, + * except that level 0 is always allowed. + * Level 0 for a paragraph separator prevents reordering of paragraphs; + * this only works reliably if <code>#UBIDI_LEVEL_OVERRIDE</code> + * is also set for paragraph separators. + * Level 0 for other characters is treated as a wildcard + * and is lifted up to the resolved level of the surrounding paragraph.<br><br> + * <strong>Caution: </strong>A copy of this pointer, not of the levels, + * will be stored in the <code>UBiDi</code> object; + * the <code>embeddingLevels</code> array must not be + * deallocated before the <code>UBiDi</code> structure is destroyed or reused, + * and the <code>embeddingLevels</code> + * should not be modified to avoid unexpected results on subsequent Bidi operations. + * However, the <code>ubidi_setPara()</code> and + * <code>ubidi_setLine()</code> functions may modify some or all of the levels.<br><br> + * After the <code>UBiDi</code> object is reused or destroyed, the caller + * must take care of the deallocation of the <code>embeddingLevels</code> array.<br><br> + * <strong>Note:</strong> the <code>embeddingLevels</code> array must be + * at least <code>length</code> long. + * This pointer can be <code>NULL</code> if this + * value is not necessary. + * + * @param pErrorCode must be a valid pointer to an error code value. + * @stable ICU 2.0 + */ +U_CAPI void U_EXPORT2 +ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length, + UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels, + UErrorCode *pErrorCode); + +/** + * <code>ubidi_setLine()</code> sets a <code>UBiDi</code> to + * contain the reordering information, especially the resolved levels, + * for all the characters in a line of text. This line of text is + * specified by referring to a <code>UBiDi</code> object representing + * this information for a piece of text containing one or more paragraphs, + * and by specifying a range of indexes in this text.<p> + * In the new line object, the indexes will range from 0 to <code>limit-start-1</code>.<p> + * + * This is used after calling <code>ubidi_setPara()</code> + * for a piece of text, and after line-breaking on that text. + * It is not necessary if each paragraph is treated as a single line.<p> + * + * After line-breaking, rules (L1) and (L2) for the treatment of + * trailing WS and for reordering are performed on + * a <code>UBiDi</code> object that represents a line.<p> + * + * <strong>Important: </strong><code>pLineBiDi</code> shares data with + * <code>pParaBiDi</code>. + * You must destroy or reuse <code>pLineBiDi</code> before <code>pParaBiDi</code>. + * In other words, you must destroy or reuse the <code>UBiDi</code> object for a line + * before the object for its parent paragraph.<p> + * + * The text pointer that was stored in <code>pParaBiDi</code> is also copied, + * and <code>start</code> is added to it so that it points to the beginning of the + * line for this object. + * + * @param pParaBiDi is the parent paragraph object. It must have been set + * by a successful call to ubidi_setPara. + * + * @param start is the line's first index into the text. + * + * @param limit is just behind the line's last index into the text + * (its last index +1).<br> + * It must be <code>0<=start<limit<=</code>containing paragraph limit. + * If the specified line crosses a paragraph boundary, the function + * will terminate with error code U_ILLEGAL_ARGUMENT_ERROR. + * + * @param pLineBiDi is the object that will now represent a line of the text. + * + * @param pErrorCode must be a valid pointer to an error code value. + * + * @see ubidi_setPara + * @see ubidi_getProcessedLength + * @stable ICU 2.0 + */ +U_CAPI void U_EXPORT2 +ubidi_setLine(const UBiDi *pParaBiDi, + int32_t start, int32_t limit, + UBiDi *pLineBiDi, + UErrorCode *pErrorCode); + +/** + * Get the directionality of the text. + * + * @param pBiDi is the paragraph or line <code>UBiDi</code> object. + * + * @return a value of <code>UBIDI_LTR</code>, <code>UBIDI_RTL</code> + * or <code>UBIDI_MIXED</code> + * that indicates if the entire text + * represented by this object is unidirectional, + * and which direction, or if it is mixed-directional. + * Note - The value <code>UBIDI_NEUTRAL</code> is never returned from this method. + * + * @see UBiDiDirection + * @stable ICU 2.0 + */ +U_CAPI UBiDiDirection U_EXPORT2 +ubidi_getDirection(const UBiDi *pBiDi); + +/** + * Gets the base direction of the text provided according + * to the Unicode Bidirectional Algorithm. The base direction + * is derived from the first character in the string with bidirectional + * character type L, R, or AL. If the first such character has type L, + * <code>UBIDI_LTR</code> is returned. If the first such character has + * type R or AL, <code>UBIDI_RTL</code> is returned. If the string does + * not contain any character of these types, then + * <code>UBIDI_NEUTRAL</code> is returned. + * + * This is a lightweight function for use when only the base direction + * is needed and no further bidi processing of the text is needed. + * + * @param text is a pointer to the text whose base + * direction is needed. + * Note: the text must be (at least) @c length long. + * + * @param length is the length of the text; + * if <code>length==-1</code> then the text + * must be zero-terminated. + * + * @return <code>UBIDI_LTR</code>, <code>UBIDI_RTL</code>, + * <code>UBIDI_NEUTRAL</code> + * + * @see UBiDiDirection + * @stable ICU 4.6 + */ +U_CAPI UBiDiDirection U_EXPORT2 +ubidi_getBaseDirection(const UChar *text, int32_t length ); + +/** + * Get the pointer to the text. + * + * @param pBiDi is the paragraph or line <code>UBiDi</code> object. + * + * @return The pointer to the text that the UBiDi object was created for. + * + * @see ubidi_setPara + * @see ubidi_setLine + * @stable ICU 2.0 + */ +U_CAPI const UChar * U_EXPORT2 +ubidi_getText(const UBiDi *pBiDi); + +/** + * Get the length of the text. + * + * @param pBiDi is the paragraph or line <code>UBiDi</code> object. + * + * @return The length of the text that the UBiDi object was created for. + * @stable ICU 2.0 + */ +U_CAPI int32_t U_EXPORT2 +ubidi_getLength(const UBiDi *pBiDi); + +/** + * Get the paragraph level of the text. + * + * @param pBiDi is the paragraph or line <code>UBiDi</code> object. + * + * @return The paragraph level. If there are multiple paragraphs, their + * level may vary if the required paraLevel is UBIDI_DEFAULT_LTR or + * UBIDI_DEFAULT_RTL. In that case, the level of the first paragraph + * is returned. + * + * @see UBiDiLevel + * @see ubidi_getParagraph + * @see ubidi_getParagraphByIndex + * @stable ICU 2.0 + */ +U_CAPI UBiDiLevel U_EXPORT2 +ubidi_getParaLevel(const UBiDi *pBiDi); + +/** + * Get the number of paragraphs. + * + * @param pBiDi is the paragraph or line <code>UBiDi</code> object. + * + * @return The number of paragraphs. + * @stable ICU 3.4 + */ +U_CAPI int32_t U_EXPORT2 +ubidi_countParagraphs(UBiDi *pBiDi); + +/** + * Get a paragraph, given a position within the text. + * This function returns information about a paragraph.<br> + * Note: if the paragraph index is known, it is more efficient to + * retrieve the paragraph information using ubidi_getParagraphByIndex().<p> + * + * @param pBiDi is the paragraph or line <code>UBiDi</code> object. + * + * @param charIndex is the index of a character within the text, in the + * range <code>[0..ubidi_getProcessedLength(pBiDi)-1]</code>. + * + * @param pParaStart will receive the index of the first character of the + * paragraph in the text. + * This pointer can be <code>NULL</code> if this + * value is not necessary. + * + * @param pParaLimit will receive the limit of the paragraph. + * The l-value that you point to here may be the + * same expression (variable) as the one for + * <code>charIndex</code>. + * This pointer can be <code>NULL</code> if this + * value is not necessary. + * + * @param pParaLevel will receive the level of the paragraph. + * This pointer can be <code>NULL</code> if this + * value is not necessary. + * + * @param pErrorCode must be a valid pointer to an error code value. + * + * @return The index of the paragraph containing the specified position. + * + * @see ubidi_getProcessedLength + * @stable ICU 3.4 + */ +U_CAPI int32_t U_EXPORT2 +ubidi_getParagraph(const UBiDi *pBiDi, int32_t charIndex, int32_t *pParaStart, + int32_t *pParaLimit, UBiDiLevel *pParaLevel, + UErrorCode *pErrorCode); + +/** + * Get a paragraph, given the index of this paragraph. + * + * This function returns information about a paragraph.<p> + * + * @param pBiDi is the paragraph <code>UBiDi</code> object. + * + * @param paraIndex is the number of the paragraph, in the + * range <code>[0..ubidi_countParagraphs(pBiDi)-1]</code>. + * + * @param pParaStart will receive the index of the first character of the + * paragraph in the text. + * This pointer can be <code>NULL</code> if this + * value is not necessary. + * + * @param pParaLimit will receive the limit of the paragraph. + * This pointer can be <code>NULL</code> if this + * value is not necessary. + * + * @param pParaLevel will receive the level of the paragraph. + * This pointer can be <code>NULL</code> if this + * value is not necessary. + * + * @param pErrorCode must be a valid pointer to an error code value. + * + * @stable ICU 3.4 + */ +U_CAPI void U_EXPORT2 +ubidi_getParagraphByIndex(const UBiDi *pBiDi, int32_t paraIndex, + int32_t *pParaStart, int32_t *pParaLimit, + UBiDiLevel *pParaLevel, UErrorCode *pErrorCode); + +/** + * Get the level for one character. + * + * @param pBiDi is the paragraph or line <code>UBiDi</code> object. + * + * @param charIndex the index of a character. It must be in the range + * [0..ubidi_getProcessedLength(pBiDi)]. + * + * @return The level for the character at charIndex (0 if charIndex is not + * in the valid range). + * + * @see UBiDiLevel + * @see ubidi_getProcessedLength + * @stable ICU 2.0 + */ +U_CAPI UBiDiLevel U_EXPORT2 +ubidi_getLevelAt(const UBiDi *pBiDi, int32_t charIndex); + +/** + * Get an array of levels for each character.<p> + * + * Note that this function may allocate memory under some + * circumstances, unlike <code>ubidi_getLevelAt()</code>. + * + * @param pBiDi is the paragraph or line <code>UBiDi</code> object, whose + * text length must be strictly positive. + * + * @param pErrorCode must be a valid pointer to an error code value. + * + * @return The levels array for the text, + * or <code>NULL</code> if an error occurs. + * + * @see UBiDiLevel + * @see ubidi_getProcessedLength + * @stable ICU 2.0 + */ +U_CAPI const UBiDiLevel * U_EXPORT2 +ubidi_getLevels(UBiDi *pBiDi, UErrorCode *pErrorCode); + +/** + * Get a logical run. + * This function returns information about a run and is used + * to retrieve runs in logical order.<p> + * This is especially useful for line-breaking on a paragraph. + * + * @param pBiDi is the paragraph or line <code>UBiDi</code> object. + * + * @param logicalPosition is a logical position within the source text. + * + * @param pLogicalLimit will receive the limit of the corresponding run. + * The l-value that you point to here may be the + * same expression (variable) as the one for + * <code>logicalPosition</code>. + * This pointer can be <code>NULL</code> if this + * value is not necessary. + * + * @param pLevel will receive the level of the corresponding run. + * This pointer can be <code>NULL</code> if this + * value is not necessary. + * + * @see ubidi_getProcessedLength + * @stable ICU 2.0 + */ +U_CAPI void U_EXPORT2 +ubidi_getLogicalRun(const UBiDi *pBiDi, int32_t logicalPosition, + int32_t *pLogicalLimit, UBiDiLevel *pLevel); + +/** + * Get the number of runs. + * This function may invoke the actual reordering on the + * <code>UBiDi</code> object, after <code>ubidi_setPara()</code> + * may have resolved only the levels of the text. Therefore, + * <code>ubidi_countRuns()</code> may have to allocate memory, + * and may fail doing so. + * + * @param pBiDi is the paragraph or line <code>UBiDi</code> object. + * + * @param pErrorCode must be a valid pointer to an error code value. + * + * @return The number of runs. + * @stable ICU 2.0 + */ +U_CAPI int32_t U_EXPORT2 +ubidi_countRuns(UBiDi *pBiDi, UErrorCode *pErrorCode); + +/** + * Get one run's logical start, length, and directionality, + * which can be 0 for LTR or 1 for RTL. + * In an RTL run, the character at the logical start is + * visually on the right of the displayed run. + * The length is the number of characters in the run.<p> + * <code>ubidi_countRuns()</code> should be called + * before the runs are retrieved. + * + * @param pBiDi is the paragraph or line <code>UBiDi</code> object. + * + * @param runIndex is the number of the run in visual order, in the + * range <code>[0..ubidi_countRuns(pBiDi)-1]</code>. + * + * @param pLogicalStart is the first logical character index in the text. + * The pointer may be <code>NULL</code> if this index is not needed. + * + * @param pLength is the number of characters (at least one) in the run. + * The pointer may be <code>NULL</code> if this is not needed. + * + * @return the directionality of the run, + * <code>UBIDI_LTR==0</code> or <code>UBIDI_RTL==1</code>, + * never <code>UBIDI_MIXED</code>, + * never <code>UBIDI_NEUTRAL</code>. + * + * @see ubidi_countRuns + * + * Example: + * <pre> + * \code + * int32_t i, count=ubidi_countRuns(pBiDi), + * logicalStart, visualIndex=0, length; + * for(i=0; i<count; ++i) { + * if(UBIDI_LTR==ubidi_getVisualRun(pBiDi, i, &logicalStart, &length)) { + * do { // LTR + * show_char(text[logicalStart++], visualIndex++); + * } while(--length>0); + * } else { + * logicalStart+=length; // logicalLimit + * do { // RTL + * show_char(text[--logicalStart], visualIndex++); + * } while(--length>0); + * } + * } + *\endcode + * </pre> + * + * Note that in right-to-left runs, code like this places + * second surrogates before first ones (which is generally a bad idea) + * and combining characters before base characters. + * <p> + * Use of <code>ubidi_writeReordered()</code>, optionally with the + * <code>#UBIDI_KEEP_BASE_COMBINING</code> option, can be considered in order + * to avoid these issues. + * @stable ICU 2.0 + */ +U_CAPI UBiDiDirection U_EXPORT2 +ubidi_getVisualRun(UBiDi *pBiDi, int32_t runIndex, + int32_t *pLogicalStart, int32_t *pLength); + +/** + * Get the visual position from a logical text position. + * If such a mapping is used many times on the same + * <code>UBiDi</code> object, then calling + * <code>ubidi_getLogicalMap()</code> is more efficient.<p> + * + * The value returned may be <code>#UBIDI_MAP_NOWHERE</code> if there is no + * visual position because the corresponding text character is a Bidi control + * removed from output by the option <code>#UBIDI_OPTION_REMOVE_CONTROLS</code>. + * <p> + * When the visual output is altered by using options of + * <code>ubidi_writeReordered()</code> such as <code>UBIDI_INSERT_LRM_FOR_NUMERIC</code>, + * <code>UBIDI_KEEP_BASE_COMBINING</code>, <code>UBIDI_OUTPUT_REVERSE</code>, + * <code>UBIDI_REMOVE_BIDI_CONTROLS</code>, the visual position returned may not + * be correct. It is advised to use, when possible, reordering options + * such as <code>UBIDI_OPTION_INSERT_MARKS</code> and <code>UBIDI_OPTION_REMOVE_CONTROLS</code>. + * <p> + * Note that in right-to-left runs, this mapping places + * second surrogates before first ones (which is generally a bad idea) + * and combining characters before base characters. + * Use of <code>ubidi_writeReordered()</code>, optionally with the + * <code>#UBIDI_KEEP_BASE_COMBINING</code> option can be considered instead + * of using the mapping, in order to avoid these issues. + * + * @param pBiDi is the paragraph or line <code>UBiDi</code> object. + * + * @param logicalIndex is the index of a character in the text. + * + * @param pErrorCode must be a valid pointer to an error code value. + * + * @return The visual position of this character. + * + * @see ubidi_getLogicalMap + * @see ubidi_getLogicalIndex + * @see ubidi_getProcessedLength + * @stable ICU 2.0 + */ +U_CAPI int32_t U_EXPORT2 +ubidi_getVisualIndex(UBiDi *pBiDi, int32_t logicalIndex, UErrorCode *pErrorCode); + +/** + * Get the logical text position from a visual position. + * If such a mapping is used many times on the same + * <code>UBiDi</code> object, then calling + * <code>ubidi_getVisualMap()</code> is more efficient.<p> + * + * The value returned may be <code>#UBIDI_MAP_NOWHERE</code> if there is no + * logical position because the corresponding text character is a Bidi mark + * inserted in the output by option <code>#UBIDI_OPTION_INSERT_MARKS</code>. + * <p> + * This is the inverse function to <code>ubidi_getVisualIndex()</code>. + * <p> + * When the visual output is altered by using options of + * <code>ubidi_writeReordered()</code> such as <code>UBIDI_INSERT_LRM_FOR_NUMERIC</code>, + * <code>UBIDI_KEEP_BASE_COMBINING</code>, <code>UBIDI_OUTPUT_REVERSE</code>, + * <code>UBIDI_REMOVE_BIDI_CONTROLS</code>, the logical position returned may not + * be correct. It is advised to use, when possible, reordering options + * such as <code>UBIDI_OPTION_INSERT_MARKS</code> and <code>UBIDI_OPTION_REMOVE_CONTROLS</code>. + * + * @param pBiDi is the paragraph or line <code>UBiDi</code> object. + * + * @param visualIndex is the visual position of a character. + * + * @param pErrorCode must be a valid pointer to an error code value. + * + * @return The index of this character in the text. + * + * @see ubidi_getVisualMap + * @see ubidi_getVisualIndex + * @see ubidi_getResultLength + * @stable ICU 2.0 + */ +U_CAPI int32_t U_EXPORT2 +ubidi_getLogicalIndex(UBiDi *pBiDi, int32_t visualIndex, UErrorCode *pErrorCode); + +/** + * Get a logical-to-visual index map (array) for the characters in the UBiDi + * (paragraph or line) object. + * <p> + * Some values in the map may be <code>#UBIDI_MAP_NOWHERE</code> if the + * corresponding text characters are Bidi controls removed from the visual + * output by the option <code>#UBIDI_OPTION_REMOVE_CONTROLS</code>. + * <p> + * When the visual output is altered by using options of + * <code>ubidi_writeReordered()</code> such as <code>UBIDI_INSERT_LRM_FOR_NUMERIC</code>, + * <code>UBIDI_KEEP_BASE_COMBINING</code>, <code>UBIDI_OUTPUT_REVERSE</code>, + * <code>UBIDI_REMOVE_BIDI_CONTROLS</code>, the visual positions returned may not + * be correct. It is advised to use, when possible, reordering options + * such as <code>UBIDI_OPTION_INSERT_MARKS</code> and <code>UBIDI_OPTION_REMOVE_CONTROLS</code>. + * <p> + * Note that in right-to-left runs, this mapping places + * second surrogates before first ones (which is generally a bad idea) + * and combining characters before base characters. + * Use of <code>ubidi_writeReordered()</code>, optionally with the + * <code>#UBIDI_KEEP_BASE_COMBINING</code> option can be considered instead + * of using the mapping, in order to avoid these issues. + * + * @param pBiDi is the paragraph or line <code>UBiDi</code> object. + * + * @param indexMap is a pointer to an array of <code>ubidi_getProcessedLength()</code> + * indexes which will reflect the reordering of the characters. + * If option <code>#UBIDI_OPTION_INSERT_MARKS</code> is set, the number + * of elements allocated in <code>indexMap</code> must be no less than + * <code>ubidi_getResultLength()</code>. + * The array does not need to be initialized.<br><br> + * The index map will result in <code>indexMap[logicalIndex]==visualIndex</code>. + * + * @param pErrorCode must be a valid pointer to an error code value. + * + * @see ubidi_getVisualMap + * @see ubidi_getVisualIndex + * @see ubidi_getProcessedLength + * @see ubidi_getResultLength + * @stable ICU 2.0 + */ +U_CAPI void U_EXPORT2 +ubidi_getLogicalMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode); + +/** + * Get a visual-to-logical index map (array) for the characters in the UBiDi + * (paragraph or line) object. + * <p> + * Some values in the map may be <code>#UBIDI_MAP_NOWHERE</code> if the + * corresponding text characters are Bidi marks inserted in the visual output + * by the option <code>#UBIDI_OPTION_INSERT_MARKS</code>. + * <p> + * When the visual output is altered by using options of + * <code>ubidi_writeReordered()</code> such as <code>UBIDI_INSERT_LRM_FOR_NUMERIC</code>, + * <code>UBIDI_KEEP_BASE_COMBINING</code>, <code>UBIDI_OUTPUT_REVERSE</code>, + * <code>UBIDI_REMOVE_BIDI_CONTROLS</code>, the logical positions returned may not + * be correct. It is advised to use, when possible, reordering options + * such as <code>UBIDI_OPTION_INSERT_MARKS</code> and <code>UBIDI_OPTION_REMOVE_CONTROLS</code>. + * + * @param pBiDi is the paragraph or line <code>UBiDi</code> object. + * + * @param indexMap is a pointer to an array of <code>ubidi_getResultLength()</code> + * indexes which will reflect the reordering of the characters. + * If option <code>#UBIDI_OPTION_REMOVE_CONTROLS</code> is set, the number + * of elements allocated in <code>indexMap</code> must be no less than + * <code>ubidi_getProcessedLength()</code>. + * The array does not need to be initialized.<br><br> + * The index map will result in <code>indexMap[visualIndex]==logicalIndex</code>. + * + * @param pErrorCode must be a valid pointer to an error code value. + * + * @see ubidi_getLogicalMap + * @see ubidi_getLogicalIndex + * @see ubidi_getProcessedLength + * @see ubidi_getResultLength + * @stable ICU 2.0 + */ +U_CAPI void U_EXPORT2 +ubidi_getVisualMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode); + +/** + * This is a convenience function that does not use a UBiDi object. + * It is intended to be used for when an application has determined the levels + * of objects (character sequences) and just needs to have them reordered (L2). + * This is equivalent to using <code>ubidi_getLogicalMap()</code> on a + * <code>UBiDi</code> object. + * + * @param levels is an array with <code>length</code> levels that have been determined by + * the application. + * + * @param length is the number of levels in the array, or, semantically, + * the number of objects to be reordered. + * It must be <code>length>0</code>. + * + * @param indexMap is a pointer to an array of <code>length</code> + * indexes which will reflect the reordering of the characters. + * The array does not need to be initialized.<p> + * The index map will result in <code>indexMap[logicalIndex]==visualIndex</code>. + * @stable ICU 2.0 + */ +U_CAPI void U_EXPORT2 +ubidi_reorderLogical(const UBiDiLevel *levels, int32_t length, int32_t *indexMap); + +/** + * This is a convenience function that does not use a UBiDi object. + * It is intended to be used for when an application has determined the levels + * of objects (character sequences) and just needs to have them reordered (L2). + * This is equivalent to using <code>ubidi_getVisualMap()</code> on a + * <code>UBiDi</code> object. + * + * @param levels is an array with <code>length</code> levels that have been determined by + * the application. + * + * @param length is the number of levels in the array, or, semantically, + * the number of objects to be reordered. + * It must be <code>length>0</code>. + * + * @param indexMap is a pointer to an array of <code>length</code> + * indexes which will reflect the reordering of the characters. + * The array does not need to be initialized.<p> + * The index map will result in <code>indexMap[visualIndex]==logicalIndex</code>. + * @stable ICU 2.0 + */ +U_CAPI void U_EXPORT2 +ubidi_reorderVisual(const UBiDiLevel *levels, int32_t length, int32_t *indexMap); + +/** + * Invert an index map. + * The index mapping of the first map is inverted and written to + * the second one. + * + * @param srcMap is an array with <code>length</code> elements + * which defines the original mapping from a source array containing + * <code>length</code> elements to a destination array. + * Some elements of the source array may have no mapping in the + * destination array. In that case, their value will be + * the special value <code>UBIDI_MAP_NOWHERE</code>. + * All elements must be >=0 or equal to <code>UBIDI_MAP_NOWHERE</code>. + * Some elements may have a value >= <code>length</code>, if the + * destination array has more elements than the source array. + * There must be no duplicate indexes (two or more elements with the + * same value except <code>UBIDI_MAP_NOWHERE</code>). + * + * @param destMap is an array with a number of elements equal to 1 + the highest + * value in <code>srcMap</code>. + * <code>destMap</code> will be filled with the inverse mapping. + * If element with index i in <code>srcMap</code> has a value k different + * from <code>UBIDI_MAP_NOWHERE</code>, this means that element i of + * the source array maps to element k in the destination array. + * The inverse map will have value i in its k-th element. + * For all elements of the destination array which do not map to + * an element in the source array, the corresponding element in the + * inverse map will have a value equal to <code>UBIDI_MAP_NOWHERE</code>. + * + * @param length is the length of each array. + * @see UBIDI_MAP_NOWHERE + * @stable ICU 2.0 + */ +U_CAPI void U_EXPORT2 +ubidi_invertMap(const int32_t *srcMap, int32_t *destMap, int32_t length); + +/** option flags for ubidi_writeReordered() */ + +/** + * option bit for ubidi_writeReordered(): + * keep combining characters after their base characters in RTL runs + * + * @see ubidi_writeReordered + * @stable ICU 2.0 + */ +#define UBIDI_KEEP_BASE_COMBINING 1 + +/** + * option bit for ubidi_writeReordered(): + * replace characters with the "mirrored" property in RTL runs + * by their mirror-image mappings + * + * @see ubidi_writeReordered + * @stable ICU 2.0 + */ +#define UBIDI_DO_MIRRORING 2 + +/** + * option bit for ubidi_writeReordered(): + * surround the run with LRMs if necessary; + * this is part of the approximate "inverse Bidi" algorithm + * + * <p>This option does not imply corresponding adjustment of the index + * mappings.</p> + * + * @see ubidi_setInverse + * @see ubidi_writeReordered + * @stable ICU 2.0 + */ +#define UBIDI_INSERT_LRM_FOR_NUMERIC 4 + +/** + * option bit for ubidi_writeReordered(): + * remove Bidi control characters + * (this does not affect #UBIDI_INSERT_LRM_FOR_NUMERIC) + * + * <p>This option does not imply corresponding adjustment of the index + * mappings.</p> + * + * @see ubidi_writeReordered + * @stable ICU 2.0 + */ +#define UBIDI_REMOVE_BIDI_CONTROLS 8 + +/** + * option bit for ubidi_writeReordered(): + * write the output in reverse order + * + * <p>This has the same effect as calling <code>ubidi_writeReordered()</code> + * first without this option, and then calling + * <code>ubidi_writeReverse()</code> without mirroring. + * Doing this in the same step is faster and avoids a temporary buffer. + * An example for using this option is output to a character terminal that + * is designed for RTL scripts and stores text in reverse order.</p> + * + * @see ubidi_writeReordered + * @stable ICU 2.0 + */ +#define UBIDI_OUTPUT_REVERSE 16 + +/** + * Get the length of the source text processed by the last call to + * <code>ubidi_setPara()</code>. This length may be different from the length + * of the source text if option <code>#UBIDI_OPTION_STREAMING</code> + * has been set. + * <br> + * Note that whenever the length of the text affects the execution or the + * result of a function, it is the processed length which must be considered, + * except for <code>ubidi_setPara</code> (which receives unprocessed source + * text) and <code>ubidi_getLength</code> (which returns the original length + * of the source text).<br> + * In particular, the processed length is the one to consider in the following + * cases: + * <ul> + * <li>maximum value of the <code>limit</code> argument of + * <code>ubidi_setLine</code></li> + * <li>maximum value of the <code>charIndex</code> argument of + * <code>ubidi_getParagraph</code></li> + * <li>maximum value of the <code>charIndex</code> argument of + * <code>ubidi_getLevelAt</code></li> + * <li>number of elements in the array returned by <code>ubidi_getLevels</code></li> + * <li>maximum value of the <code>logicalStart</code> argument of + * <code>ubidi_getLogicalRun</code></li> + * <li>maximum value of the <code>logicalIndex</code> argument of + * <code>ubidi_getVisualIndex</code></li> + * <li>number of elements filled in the <code>*indexMap</code> argument of + * <code>ubidi_getLogicalMap</code></li> + * <li>length of text processed by <code>ubidi_writeReordered</code></li> + * </ul> + * + * @param pBiDi is the paragraph <code>UBiDi</code> object. + * + * @return The length of the part of the source text processed by + * the last call to <code>ubidi_setPara</code>. + * @see ubidi_setPara + * @see UBIDI_OPTION_STREAMING + * @stable ICU 3.6 + */ +U_CAPI int32_t U_EXPORT2 +ubidi_getProcessedLength(const UBiDi *pBiDi); + +/** + * Get the length of the reordered text resulting from the last call to + * <code>ubidi_setPara()</code>. This length may be different from the length + * of the source text if option <code>#UBIDI_OPTION_INSERT_MARKS</code> + * or option <code>#UBIDI_OPTION_REMOVE_CONTROLS</code> has been set. + * <br> + * This resulting length is the one to consider in the following cases: + * <ul> + * <li>maximum value of the <code>visualIndex</code> argument of + * <code>ubidi_getLogicalIndex</code></li> + * <li>number of elements of the <code>*indexMap</code> argument of + * <code>ubidi_getVisualMap</code></li> + * </ul> + * Note that this length stays identical to the source text length if + * Bidi marks are inserted or removed using option bits of + * <code>ubidi_writeReordered</code>, or if option + * <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code> has been set. + * + * @param pBiDi is the paragraph <code>UBiDi</code> object. + * + * @return The length of the reordered text resulting from + * the last call to <code>ubidi_setPara</code>. + * @see ubidi_setPara + * @see UBIDI_OPTION_INSERT_MARKS + * @see UBIDI_OPTION_REMOVE_CONTROLS + * @stable ICU 3.6 + */ +U_CAPI int32_t U_EXPORT2 +ubidi_getResultLength(const UBiDi *pBiDi); + +U_CDECL_BEGIN + +#ifndef U_HIDE_DEPRECATED_API +/** + * Value returned by <code>UBiDiClassCallback</code> callbacks when + * there is no need to override the standard Bidi class for a given code point. + * + * This constant is deprecated; use u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS)+1 instead. + * + * @see UBiDiClassCallback + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ +#define U_BIDI_CLASS_DEFAULT U_CHAR_DIRECTION_COUNT +#endif // U_HIDE_DEPRECATED_API + +/** + * Callback type declaration for overriding default Bidi class values with + * custom ones. + * <p>Usually, the function pointer will be propagated to a <code>UBiDi</code> + * object by calling the <code>ubidi_setClassCallback()</code> function; + * then the callback will be invoked by the UBA implementation any time the + * class of a character is to be determined.</p> + * + * @param context is a pointer to the callback private data. + * + * @param c is the code point to get a Bidi class for. + * + * @return The directional property / Bidi class for the given code point + * <code>c</code> if the default class has been overridden, or + * <code>u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS)+1</code> + * if the standard Bidi class value for <code>c</code> is to be used. + * @see ubidi_setClassCallback + * @see ubidi_getClassCallback + * @stable ICU 3.6 + */ +typedef UCharDirection U_CALLCONV +UBiDiClassCallback(const void *context, UChar32 c); + +U_CDECL_END + +/** + * Retrieve the Bidi class for a given code point. + * <p>If a <code>#UBiDiClassCallback</code> callback is defined and returns a + * value other than <code>u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS)+1</code>, + * that value is used; otherwise the default class determination mechanism is invoked.</p> + * + * @param pBiDi is the paragraph <code>UBiDi</code> object. + * + * @param c is the code point whose Bidi class must be retrieved. + * + * @return The Bidi class for character <code>c</code> based + * on the given <code>pBiDi</code> instance. + * @see UBiDiClassCallback + * @stable ICU 3.6 + */ +U_CAPI UCharDirection U_EXPORT2 +ubidi_getCustomizedClass(UBiDi *pBiDi, UChar32 c); + +/** + * Set the callback function and callback data used by the UBA + * implementation for Bidi class determination. + * <p>This may be useful for assigning Bidi classes to PUA characters, or + * for special application needs. For instance, an application may want to + * handle all spaces like L or R characters (according to the base direction) + * when creating the visual ordering of logical lines which are part of a report + * organized in columns: there should not be interaction between adjacent + * cells.<p> + * + * @param pBiDi is the paragraph <code>UBiDi</code> object. + * + * @param newFn is the new callback function pointer. + * + * @param newContext is the new callback context pointer. This can be NULL. + * + * @param oldFn fillin: Returns the old callback function pointer. This can be + * NULL. + * + * @param oldContext fillin: Returns the old callback's context. This can be + * NULL. + * + * @param pErrorCode must be a valid pointer to an error code value. + * + * @see ubidi_getClassCallback + * @stable ICU 3.6 + */ +U_CAPI void U_EXPORT2 +ubidi_setClassCallback(UBiDi *pBiDi, UBiDiClassCallback *newFn, + const void *newContext, UBiDiClassCallback **oldFn, + const void **oldContext, UErrorCode *pErrorCode); + +/** + * Get the current callback function used for Bidi class determination. + * + * @param pBiDi is the paragraph <code>UBiDi</code> object. + * + * @param fn fillin: Returns the callback function pointer. + * + * @param context fillin: Returns the callback's private context. + * + * @see ubidi_setClassCallback + * @stable ICU 3.6 + */ +U_CAPI void U_EXPORT2 +ubidi_getClassCallback(UBiDi *pBiDi, UBiDiClassCallback **fn, const void **context); + +/** + * Take a <code>UBiDi</code> object containing the reordering + * information for a piece of text (one or more paragraphs) set by + * <code>ubidi_setPara()</code> or for a line of text set by + * <code>ubidi_setLine()</code> and write a reordered string to the + * destination buffer. + * + * This function preserves the integrity of characters with multiple + * code units and (optionally) combining characters. + * Characters in RTL runs can be replaced by mirror-image characters + * in the destination buffer. Note that "real" mirroring has + * to be done in a rendering engine by glyph selection + * and that for many "mirrored" characters there are no + * Unicode characters as mirror-image equivalents. + * There are also options to insert or remove Bidi control + * characters; see the description of the <code>destSize</code> + * and <code>options</code> parameters and of the option bit flags. + * + * @param pBiDi A pointer to a <code>UBiDi</code> object that + * is set by <code>ubidi_setPara()</code> or + * <code>ubidi_setLine()</code> and contains the reordering + * information for the text that it was defined for, + * as well as a pointer to that text.<br><br> + * The text was aliased (only the pointer was stored + * without copying the contents) and must not have been modified + * since the <code>ubidi_setPara()</code> call. + * + * @param dest A pointer to where the reordered text is to be copied. + * The source text and <code>dest[destSize]</code> + * must not overlap. + * + * @param destSize The size of the <code>dest</code> buffer, + * in number of UChars. + * If the <code>UBIDI_INSERT_LRM_FOR_NUMERIC</code> + * option is set, then the destination length could be + * as large as + * <code>ubidi_getLength(pBiDi)+2*ubidi_countRuns(pBiDi)</code>. + * If the <code>UBIDI_REMOVE_BIDI_CONTROLS</code> option + * is set, then the destination length may be less than + * <code>ubidi_getLength(pBiDi)</code>. + * If none of these options is set, then the destination length + * will be exactly <code>ubidi_getProcessedLength(pBiDi)</code>. + * + * @param options A bit set of options for the reordering that control + * how the reordered text is written. + * The options include mirroring the characters on a code + * point basis and inserting LRM characters, which is used + * especially for transforming visually stored text + * to logically stored text (although this is still an + * imperfect implementation of an "inverse Bidi" algorithm + * because it uses the "forward Bidi" algorithm at its core). + * The available options are: + * <code>#UBIDI_DO_MIRRORING</code>, + * <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code>, + * <code>#UBIDI_KEEP_BASE_COMBINING</code>, + * <code>#UBIDI_OUTPUT_REVERSE</code>, + * <code>#UBIDI_REMOVE_BIDI_CONTROLS</code> + * + * @param pErrorCode must be a valid pointer to an error code value. + * + * @return The length of the output string. + * + * @see ubidi_getProcessedLength + * @stable ICU 2.0 + */ +U_CAPI int32_t U_EXPORT2 +ubidi_writeReordered(UBiDi *pBiDi, + UChar *dest, int32_t destSize, + uint16_t options, + UErrorCode *pErrorCode); + +/** + * Reverse a Right-To-Left run of Unicode text. + * + * This function preserves the integrity of characters with multiple + * code units and (optionally) combining characters. + * Characters can be replaced by mirror-image characters + * in the destination buffer. Note that "real" mirroring has + * to be done in a rendering engine by glyph selection + * and that for many "mirrored" characters there are no + * Unicode characters as mirror-image equivalents. + * There are also options to insert or remove Bidi control + * characters. + * + * This function is the implementation for reversing RTL runs as part + * of <code>ubidi_writeReordered()</code>. For detailed descriptions + * of the parameters, see there. + * Since no Bidi controls are inserted here, the output string length + * will never exceed <code>srcLength</code>. + * + * @see ubidi_writeReordered + * + * @param src A pointer to the RTL run text. + * + * @param srcLength The length of the RTL run. + * + * @param dest A pointer to where the reordered text is to be copied. + * <code>src[srcLength]</code> and <code>dest[destSize]</code> + * must not overlap. + * + * @param destSize The size of the <code>dest</code> buffer, + * in number of UChars. + * If the <code>UBIDI_REMOVE_BIDI_CONTROLS</code> option + * is set, then the destination length may be less than + * <code>srcLength</code>. + * If this option is not set, then the destination length + * will be exactly <code>srcLength</code>. + * + * @param options A bit set of options for the reordering that control + * how the reordered text is written. + * See the <code>options</code> parameter in <code>ubidi_writeReordered()</code>. + * + * @param pErrorCode must be a valid pointer to an error code value. + * + * @return The length of the output string. + * @stable ICU 2.0 + */ +U_CAPI int32_t U_EXPORT2 +ubidi_writeReverse(const UChar *src, int32_t srcLength, + UChar *dest, int32_t destSize, + uint16_t options, + UErrorCode *pErrorCode); + +/*#define BIDI_SAMPLE_CODE*/ +/*@}*/ + +#endif diff --git a/thirdparty/icu4c/common/unicode/ubiditransform.h b/thirdparty/icu4c/common/unicode/ubiditransform.h new file mode 100644 index 0000000000..2dd7564010 --- /dev/null +++ b/thirdparty/icu4c/common/unicode/ubiditransform.h @@ -0,0 +1,326 @@ +/* +****************************************************************************** +* +* © 2016 and later: Unicode, Inc. and others. +* License & terms of use: http://www.unicode.org/copyright.html +* +****************************************************************************** +* file name: ubiditransform.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2016jul24 +* created by: Lina Kemmel +* +*/ + +#ifndef UBIDITRANSFORM_H +#define UBIDITRANSFORM_H + +#include "unicode/utypes.h" +#include "unicode/ubidi.h" +#include "unicode/uchar.h" + +#if U_SHOW_CPLUSPLUS_API +#include "unicode/localpointer.h" +#endif // U_SHOW_CPLUSPLUS_API + +/** + * \file + * \brief Bidi Transformations + */ + +/** + * `UBiDiOrder` indicates the order of text. + * + * This bidi transformation engine supports all possible combinations (4 in + * total) of input and output text order: + * + * - <logical input, visual output>: unless the output direction is RTL, this + * corresponds to a normal operation of the Bidi algorithm as described in the + * Unicode Technical Report and implemented by `UBiDi` when the + * reordering mode is set to `UBIDI_REORDER_DEFAULT`. Visual RTL + * mode is not supported by `UBiDi` and is accomplished through + * reversing a visual LTR string, + * + * - <visual input, logical output>: unless the input direction is RTL, this + * corresponds to an "inverse bidi algorithm" in `UBiDi` with the + * reordering mode set to `UBIDI_REORDER_INVERSE_LIKE_DIRECT`. + * Visual RTL mode is not not supported by `UBiDi` and is + * accomplished through reversing a visual LTR string, + * + * - <logical input, logical output>: if the input and output base directions + * mismatch, this corresponds to the `UBiDi` implementation with the + * reordering mode set to `UBIDI_REORDER_RUNS_ONLY`; and if the + * input and output base directions are identical, the transformation engine + * will only handle character mirroring and Arabic shaping operations without + * reordering, + * + * - <visual input, visual output>: this reordering mode is not supported by + * the `UBiDi` engine; it implies character mirroring, Arabic + * shaping, and - if the input/output base directions mismatch - string + * reverse operations. + * @see ubidi_setInverse + * @see ubidi_setReorderingMode + * @see UBIDI_REORDER_DEFAULT + * @see UBIDI_REORDER_INVERSE_LIKE_DIRECT + * @see UBIDI_REORDER_RUNS_ONLY + * @stable ICU 58 + */ +typedef enum { + /** 0: Constant indicating a logical order. + * This is the default for input text. + * @stable ICU 58 + */ + UBIDI_LOGICAL = 0, + /** 1: Constant indicating a visual order. + * This is a default for output text. + * @stable ICU 58 + */ + UBIDI_VISUAL +} UBiDiOrder; + +/** + * <code>UBiDiMirroring</code> indicates whether or not characters with the + * "mirrored" property in RTL runs should be replaced with their mirror-image + * counterparts. + * @see UBIDI_DO_MIRRORING + * @see ubidi_setReorderingOptions + * @see ubidi_writeReordered + * @see ubidi_writeReverse + * @stable ICU 58 + */ +typedef enum { + /** 0: Constant indicating that character mirroring should not be + * performed. + * This is the default. + * @stable ICU 58 + */ + UBIDI_MIRRORING_OFF = 0, + /** 1: Constant indicating that character mirroring should be performed. + * This corresponds to calling <code>ubidi_writeReordered</code> or + * <code>ubidi_writeReverse</code> with the + * <code>UBIDI_DO_MIRRORING</code> option bit set. + * @stable ICU 58 + */ + UBIDI_MIRRORING_ON +} UBiDiMirroring; + +/** + * Forward declaration of the <code>UBiDiTransform</code> structure that stores + * information used by the layout transformation engine. + * @stable ICU 58 + */ +typedef struct UBiDiTransform UBiDiTransform; + +/** + * Performs transformation of text from the bidi layout defined by the input + * ordering scheme to the bidi layout defined by the output ordering scheme, + * and applies character mirroring and Arabic shaping operations.<p> + * In terms of <code>UBiDi</code>, such a transformation implies: + * <ul> + * <li>calling <code>ubidi_setReorderingMode</code> as needed (when the + * reordering mode is other than normal),</li> + * <li>calling <code>ubidi_setInverse</code> as needed (when text should be + * transformed from a visual to a logical form),</li> + * <li>resolving embedding levels of each character in the input text by + * calling <code>ubidi_setPara</code>,</li> + * <li>reordering the characters based on the computed embedding levels, also + * performing character mirroring as needed, and streaming the result to the + * output, by calling <code>ubidi_writeReordered</code>,</li> + * <li>performing Arabic digit and letter shaping on the output text by calling + * <code>u_shapeArabic</code>.</li> + * </ul> + * An "ordering scheme" encompasses the base direction and the order of text, + * and these characteristics must be defined by the caller for both input and + * output explicitly .<p> + * There are 36 possible combinations of <input, output> ordering schemes, + * which are partially supported by <code>UBiDi</code> already. Examples of the + * currently supported combinations: + * <ul> + * <li><Logical LTR, Visual LTR>: this is equivalent to calling + * <code>ubidi_setPara</code> with <code>paraLevel == UBIDI_LTR</code>,</li> + * <li><Logical RTL, Visual LTR>: this is equivalent to calling + * <code>ubidi_setPara</code> with <code>paraLevel == UBIDI_RTL</code>,</li> + * <li><Logical Default ("Auto") LTR, Visual LTR>: this is equivalent to + * calling <code>ubidi_setPara</code> with + * <code>paraLevel == UBIDI_DEFAULT_LTR</code>,</li> + * <li><Logical Default ("Auto") RTL, Visual LTR>: this is equivalent to + * calling <code>ubidi_setPara</code> with + * <code>paraLevel == UBIDI_DEFAULT_RTL</code>,</li> + * <li><Visual LTR, Logical LTR>: this is equivalent to + * calling <code>ubidi_setInverse(UBiDi*, true)</code> and then + * <code>ubidi_setPara</code> with <code>paraLevel == UBIDI_LTR</code>,</li> + * <li><Visual LTR, Logical RTL>: this is equivalent to + * calling <code>ubidi_setInverse(UBiDi*, true)</code> and then + * <code>ubidi_setPara</code> with <code>paraLevel == UBIDI_RTL</code>.</li> + * </ul> + * All combinations that involve the Visual RTL scheme are unsupported by + * <code>UBiDi</code>, for instance: + * <ul> + * <li><Logical LTR, Visual RTL>,</li> + * <li><Visual RTL, Logical RTL>.</li> + * </ul> + * <p>Example of usage of the transformation engine:<br> + * <pre> + * \code + * UChar text1[] = {'a', 'b', 'c', 0x0625, '1', 0}; + * UChar text2[] = {'a', 'b', 'c', 0x0625, '1', 0}; + * UErrorCode errorCode = U_ZERO_ERROR; + * // Run a transformation. + * ubiditransform_transform(pBidiTransform, + * text1, -1, text2, -1, + * UBIDI_LTR, UBIDI_VISUAL, + * UBIDI_RTL, UBIDI_LOGICAL, + * UBIDI_MIRRORING_OFF, + * U_SHAPE_DIGITS_AN2EN | U_SHAPE_DIGIT_TYPE_AN_EXTENDED, + * &errorCode); + * // Do something with text2. + * text2[4] = '2'; + * // Run a reverse transformation. + * ubiditransform_transform(pBidiTransform, + * text2, -1, text1, -1, + * UBIDI_RTL, UBIDI_LOGICAL, + * UBIDI_LTR, UBIDI_VISUAL, + * UBIDI_MIRRORING_OFF, + * U_SHAPE_DIGITS_EN2AN | U_SHAPE_DIGIT_TYPE_AN_EXTENDED, + * &errorCode); + *\endcode + * </pre> + * </p> + * + * @param pBiDiTransform A pointer to a <code>UBiDiTransform</code> object + * allocated with <code>ubiditransform_open()</code> or + * <code>NULL</code>.<p> + * This object serves for one-time setup to amortize initialization + * overheads. Use of this object is not thread-safe. All other threads + * should allocate a new <code>UBiDiTransform</code> object by calling + * <code>ubiditransform_open()</code> before using it. Alternatively, + * a caller can set this parameter to <code>NULL</code>, in which case + * the object will be allocated by the engine on the fly.</p> + * @param src A pointer to the text that the Bidi layout transformations will + * be performed on. + * <p><strong>Note:</strong> the text must be (at least) + * <code>srcLength</code> long.</p> + * @param srcLength The length of the text, in number of UChars. If + * <code>length == -1</code> then the text must be zero-terminated. + * @param dest A pointer to where the processed text is to be copied. + * @param destSize The size of the <code>dest</code> buffer, in number of + * UChars. If the <code>U_SHAPE_LETTERS_UNSHAPE</code> option is set, + * then the destination length could be as large as + * <code>srcLength * 2</code>. Otherwise, the destination length will + * not exceed <code>srcLength</code>. If the caller reserves the last + * position for zero-termination, it should be excluded from + * <code>destSize</code>. + * <p><code>destSize == -1</code> is allowed and makes sense when + * <code>dest</code> was holds some meaningful value, e.g. that of + * <code>src</code>. In this case <code>dest</code> must be + * zero-terminated.</p> + * @param inParaLevel A base embedding level of the input as defined in + * <code>ubidi_setPara</code> documentation for the + * <code>paraLevel</code> parameter. + * @param inOrder An order of the input, which can be one of the + * <code>UBiDiOrder</code> values. + * @param outParaLevel A base embedding level of the output as defined in + * <code>ubidi_setPara</code> documentation for the + * <code>paraLevel</code> parameter. + * @param outOrder An order of the output, which can be one of the + * <code>UBiDiOrder</code> values. + * @param doMirroring Indicates whether or not to perform character mirroring, + * and can accept one of the <code>UBiDiMirroring</code> values. + * @param shapingOptions Arabic digit and letter shaping options defined in the + * ushape.h documentation. + * <p><strong>Note:</strong> Direction indicator options are computed by + * the transformation engine based on the effective ordering schemes, so + * user-defined direction indicators will be ignored.</p> + * @param pErrorCode A pointer to an error code value. + * + * @return The destination length, i.e. the number of UChars written to + * <code>dest</code>. If the transformation fails, the return value + * will be 0 (and the error code will be written to + * <code>pErrorCode</code>). + * + * @see UBiDiLevel + * @see UBiDiOrder + * @see UBiDiMirroring + * @see ubidi_setPara + * @see u_shapeArabic + * @stable ICU 58 + */ +U_CAPI uint32_t U_EXPORT2 +ubiditransform_transform(UBiDiTransform *pBiDiTransform, + const UChar *src, int32_t srcLength, + UChar *dest, int32_t destSize, + UBiDiLevel inParaLevel, UBiDiOrder inOrder, + UBiDiLevel outParaLevel, UBiDiOrder outOrder, + UBiDiMirroring doMirroring, uint32_t shapingOptions, + UErrorCode *pErrorCode); + +/** + * Allocates a <code>UBiDiTransform</code> object. This object can be reused, + * e.g. with different ordering schemes, mirroring or shaping options.<p> + * <strong>Note:</strong>The object can only be reused in the same thread. + * All other threads should allocate a new <code>UBiDiTransform</code> object + * before using it.<p> + * Example of usage:<p> + * <pre> + * \code + * UErrorCode errorCode = U_ZERO_ERROR; + * // Open a new UBiDiTransform. + * UBiDiTransform* transform = ubiditransform_open(&errorCode); + * // Run a transformation. + * ubiditransform_transform(transform, + * text1, -1, text2, -1, + * UBIDI_RTL, UBIDI_LOGICAL, + * UBIDI_LTR, UBIDI_VISUAL, + * UBIDI_MIRRORING_ON, + * U_SHAPE_DIGITS_EN2AN, + * &errorCode); + * // Do something with the output text and invoke another transformation using + * // that text as input. + * ubiditransform_transform(transform, + * text2, -1, text3, -1, + * UBIDI_LTR, UBIDI_VISUAL, + * UBIDI_RTL, UBIDI_VISUAL, + * UBIDI_MIRRORING_ON, + * 0, &errorCode); + *\endcode + * </pre> + * <p> + * The <code>UBiDiTransform</code> object must be deallocated by calling + * <code>ubiditransform_close()</code>. + * + * @return An empty <code>UBiDiTransform</code> object. + * @stable ICU 58 + */ +U_CAPI UBiDiTransform* U_EXPORT2 +ubiditransform_open(UErrorCode *pErrorCode); + +/** + * Deallocates the given <code>UBiDiTransform</code> object. + * @stable ICU 58 + */ +U_CAPI void U_EXPORT2 +ubiditransform_close(UBiDiTransform *pBidiTransform); + +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN + +/** + * \class LocalUBiDiTransformPointer + * "Smart pointer" class, closes a UBiDiTransform via ubiditransform_close(). + * For most methods see the LocalPointerBase base class. + * + * @see LocalPointerBase + * @see LocalPointer + * @stable ICU 58 + */ +U_DEFINE_LOCAL_OPEN_POINTER(LocalUBiDiTransformPointer, UBiDiTransform, ubiditransform_close); + +U_NAMESPACE_END + +#endif + +#endif diff --git a/thirdparty/icu4c/common/unicode/ubrk.h b/thirdparty/icu4c/common/unicode/ubrk.h new file mode 100644 index 0000000000..37189a8598 --- /dev/null +++ b/thirdparty/icu4c/common/unicode/ubrk.h @@ -0,0 +1,631 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* Copyright (C) 1996-2015, International Business Machines Corporation and others. +* All Rights Reserved. +****************************************************************************** +*/ + +#ifndef UBRK_H +#define UBRK_H + +#include "unicode/utypes.h" +#include "unicode/uloc.h" +#include "unicode/utext.h" + +#if U_SHOW_CPLUSPLUS_API +#include "unicode/localpointer.h" +#endif // U_SHOW_CPLUSPLUS_API + +/** + * A text-break iterator. + * For usage in C programs. + */ +#ifndef UBRK_TYPEDEF_UBREAK_ITERATOR +# define UBRK_TYPEDEF_UBREAK_ITERATOR + /** + * Opaque type representing an ICU Break iterator object. + * @stable ICU 2.0 + */ + typedef struct UBreakIterator UBreakIterator; +#endif + +#if !UCONFIG_NO_BREAK_ITERATION + +#include "unicode/parseerr.h" + +/** + * \file + * \brief C API: BreakIterator + * + * <h2> BreakIterator C API </h2> + * + * The BreakIterator C API defines methods for finding the location + * of boundaries in text. Pointer to a UBreakIterator maintain a + * current position and scan over text returning the index of characters + * where boundaries occur. + * <p> + * Line boundary analysis determines where a text string can be broken + * when line-wrapping. The mechanism correctly handles punctuation and + * hyphenated words. + * <p> + * Note: The locale keyword "lb" can be used to modify line break + * behavior according to the CSS level 3 line-break options, see + * <http://dev.w3.org/csswg/css-text/#line-breaking>. For example: + * "ja@lb=strict", "zh@lb=loose". + * <p> + * Sentence boundary analysis allows selection with correct + * interpretation of periods within numbers and abbreviations, and + * trailing punctuation marks such as quotation marks and parentheses. + * <p> + * Note: The locale keyword "ss" can be used to enable use of + * segmentation suppression data (preventing breaks in English after + * abbreviations such as "Mr." or "Est.", for example), as follows: + * "en@ss=standard". + * <p> + * Word boundary analysis is used by search and replace functions, as + * well as within text editing applications that allow the user to + * select words with a double click. Word selection provides correct + * interpretation of punctuation marks within and following + * words. Characters that are not part of a word, such as symbols or + * punctuation marks, have word-breaks on both sides. + * <p> + * Character boundary analysis identifies the boundaries of + * "Extended Grapheme Clusters", which are groupings of codepoints + * that should be treated as character-like units for many text operations. + * Please see Unicode Standard Annex #29, Unicode Text Segmentation, + * http://www.unicode.org/reports/tr29/ for additional information + * on grapheme clusters and guidelines on their use. + * <p> + * Title boundary analysis locates all positions, + * typically starts of words, that should be set to Title Case + * when title casing the text. + * <p> + * The text boundary positions are found according to the rules + * described in Unicode Standard Annex #29, Text Boundaries, and + * Unicode Standard Annex #14, Line Breaking Properties. These + * are available at http://www.unicode.org/reports/tr14/ and + * http://www.unicode.org/reports/tr29/. + * <p> + * In addition to the plain C API defined in this header file, an + * object oriented C++ API with equivalent functionality is defined in the + * file brkiter.h. + * <p> + * Code snippets illustrating the use of the Break Iterator APIs + * are available in the ICU User Guide, + * http://icu-project.org/userguide/boundaryAnalysis.html + * and in the sample program icu/source/samples/break/break.cpp + */ + +/** The possible types of text boundaries. @stable ICU 2.0 */ +typedef enum UBreakIteratorType { + /** Character breaks @stable ICU 2.0 */ + UBRK_CHARACTER = 0, + /** Word breaks @stable ICU 2.0 */ + UBRK_WORD = 1, + /** Line breaks @stable ICU 2.0 */ + UBRK_LINE = 2, + /** Sentence breaks @stable ICU 2.0 */ + UBRK_SENTENCE = 3, + +#ifndef U_HIDE_DEPRECATED_API + /** + * Title Case breaks + * The iterator created using this type locates title boundaries as described for + * Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration, + * please use Word Boundary iterator. + * + * @deprecated ICU 2.8 Use the word break iterator for titlecasing for Unicode 4 and later. + */ + UBRK_TITLE = 4, + /** + * One more than the highest normal UBreakIteratorType value. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + UBRK_COUNT = 5 +#endif // U_HIDE_DEPRECATED_API +} UBreakIteratorType; + +/** Value indicating all text boundaries have been returned. + * @stable ICU 2.0 + */ +#define UBRK_DONE ((int32_t) -1) + + +/** + * Enum constants for the word break tags returned by + * getRuleStatus(). A range of values is defined for each category of + * word, to allow for further subdivisions of a category in future releases. + * Applications should check for tag values falling within the range, rather + * than for single individual values. + * + * The numeric values of all of these constants are stable (will not change). + * + * @stable ICU 2.2 +*/ +typedef enum UWordBreak { + /** Tag value for "words" that do not fit into any of other categories. + * Includes spaces and most punctuation. */ + UBRK_WORD_NONE = 0, + /** Upper bound for tags for uncategorized words. */ + UBRK_WORD_NONE_LIMIT = 100, + /** Tag value for words that appear to be numbers, lower limit. */ + UBRK_WORD_NUMBER = 100, + /** Tag value for words that appear to be numbers, upper limit. */ + UBRK_WORD_NUMBER_LIMIT = 200, + /** Tag value for words that contain letters, excluding + * hiragana, katakana or ideographic characters, lower limit. */ + UBRK_WORD_LETTER = 200, + /** Tag value for words containing letters, upper limit */ + UBRK_WORD_LETTER_LIMIT = 300, + /** Tag value for words containing kana characters, lower limit */ + UBRK_WORD_KANA = 300, + /** Tag value for words containing kana characters, upper limit */ + UBRK_WORD_KANA_LIMIT = 400, + /** Tag value for words containing ideographic characters, lower limit */ + UBRK_WORD_IDEO = 400, + /** Tag value for words containing ideographic characters, upper limit */ + UBRK_WORD_IDEO_LIMIT = 500 +} UWordBreak; + +/** + * Enum constants for the line break tags returned by getRuleStatus(). + * A range of values is defined for each category of + * word, to allow for further subdivisions of a category in future releases. + * Applications should check for tag values falling within the range, rather + * than for single individual values. + * + * The numeric values of all of these constants are stable (will not change). + * + * @stable ICU 2.8 +*/ +typedef enum ULineBreakTag { + /** Tag value for soft line breaks, positions at which a line break + * is acceptable but not required */ + UBRK_LINE_SOFT = 0, + /** Upper bound for soft line breaks. */ + UBRK_LINE_SOFT_LIMIT = 100, + /** Tag value for a hard, or mandatory line break */ + UBRK_LINE_HARD = 100, + /** Upper bound for hard line breaks. */ + UBRK_LINE_HARD_LIMIT = 200 +} ULineBreakTag; + + + +/** + * Enum constants for the sentence break tags returned by getRuleStatus(). + * A range of values is defined for each category of + * sentence, to allow for further subdivisions of a category in future releases. + * Applications should check for tag values falling within the range, rather + * than for single individual values. + * + * The numeric values of all of these constants are stable (will not change). + * + * @stable ICU 2.8 +*/ +typedef enum USentenceBreakTag { + /** Tag value for for sentences ending with a sentence terminator + * ('.', '?', '!', etc.) character, possibly followed by a + * hard separator (CR, LF, PS, etc.) + */ + UBRK_SENTENCE_TERM = 0, + /** Upper bound for tags for sentences ended by sentence terminators. */ + UBRK_SENTENCE_TERM_LIMIT = 100, + /** Tag value for for sentences that do not contain an ending + * sentence terminator ('.', '?', '!', etc.) character, but + * are ended only by a hard separator (CR, LF, PS, etc.) or end of input. + */ + UBRK_SENTENCE_SEP = 100, + /** Upper bound for tags for sentences ended by a separator. */ + UBRK_SENTENCE_SEP_LIMIT = 200 + /** Tag value for a hard, or mandatory line break */ +} USentenceBreakTag; + + +/** + * Open a new UBreakIterator for locating text boundaries for a specified locale. + * A UBreakIterator may be used for detecting character, line, word, + * and sentence breaks in text. + * @param type The type of UBreakIterator to open: one of UBRK_CHARACTER, UBRK_WORD, + * UBRK_LINE, UBRK_SENTENCE + * @param locale The locale specifying the text-breaking conventions. Note that + * locale keys such as "lb" and "ss" may be used to modify text break behavior, + * see general discussion of BreakIterator C API. + * @param text The text to be iterated over. May be null, in which case ubrk_setText() is + * used to specify the text to be iterated. + * @param textLength The number of characters in text, or -1 if null-terminated. + * @param status A UErrorCode to receive any errors. + * @return A UBreakIterator for the specified locale. + * @see ubrk_openRules + * @stable ICU 2.0 + */ +U_CAPI UBreakIterator* U_EXPORT2 +ubrk_open(UBreakIteratorType type, + const char *locale, + const UChar *text, + int32_t textLength, + UErrorCode *status); + +/** + * Open a new UBreakIterator for locating text boundaries using specified breaking rules. + * The rule syntax is ... (TBD) + * @param rules A set of rules specifying the text breaking conventions. + * @param rulesLength The number of characters in rules, or -1 if null-terminated. + * @param text The text to be iterated over. May be null, in which case ubrk_setText() is + * used to specify the text to be iterated. + * @param textLength The number of characters in text, or -1 if null-terminated. + * @param parseErr Receives position and context information for any syntax errors + * detected while parsing the rules. + * @param status A UErrorCode to receive any errors. + * @return A UBreakIterator for the specified rules. + * @see ubrk_open + * @stable ICU 2.2 + */ +U_CAPI UBreakIterator* U_EXPORT2 +ubrk_openRules(const UChar *rules, + int32_t rulesLength, + const UChar *text, + int32_t textLength, + UParseError *parseErr, + UErrorCode *status); + +/** + * Open a new UBreakIterator for locating text boundaries using precompiled binary rules. + * Opening a UBreakIterator this way is substantially faster than using ubrk_openRules. + * Binary rules may be obtained using ubrk_getBinaryRules. The compiled rules are not + * compatible across different major versions of ICU, nor across platforms of different + * endianness or different base character set family (ASCII vs EBCDIC). + * @param binaryRules A set of compiled binary rules specifying the text breaking + * conventions. Ownership of the storage containing the compiled + * rules remains with the caller of this function. The compiled + * rules must not be modified or deleted during the life of the + * break iterator. + * @param rulesLength The length of binaryRules in bytes; must be >= 0. + * @param text The text to be iterated over. May be null, in which case + * ubrk_setText() is used to specify the text to be iterated. + * @param textLength The number of characters in text, or -1 if null-terminated. + * @param status Pointer to UErrorCode to receive any errors. + * @return UBreakIterator for the specified rules. + * @see ubrk_getBinaryRules + * @stable ICU 59 + */ +U_CAPI UBreakIterator* U_EXPORT2 +ubrk_openBinaryRules(const uint8_t *binaryRules, int32_t rulesLength, + const UChar * text, int32_t textLength, + UErrorCode * status); + +/** + * Thread safe cloning operation + * @param bi iterator to be cloned + * @param stackBuffer <em>Deprecated functionality as of ICU 52, use NULL.</em><br> + * user allocated space for the new clone. If NULL new memory will be allocated. + * If buffer is not large enough, new memory will be allocated. + * Clients can use the U_BRK_SAFECLONE_BUFFERSIZE. + * @param pBufferSize <em>Deprecated functionality as of ICU 52, use NULL or 1.</em><br> + * pointer to size of allocated space. + * If *pBufferSize == 0, a sufficient size for use in cloning will + * be returned ('pre-flighting') + * If *pBufferSize is not enough for a stack-based safe clone, + * new memory will be allocated. + * @param status to indicate whether the operation went on smoothly or there were errors + * An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any allocations were necessary. + * @return pointer to the new clone + * @stable ICU 2.0 + */ +U_CAPI UBreakIterator * U_EXPORT2 +ubrk_safeClone( + const UBreakIterator *bi, + void *stackBuffer, + int32_t *pBufferSize, + UErrorCode *status); + +#ifndef U_HIDE_DEPRECATED_API + +/** + * A recommended size (in bytes) for the memory buffer to be passed to ubrk_saveClone(). + * @deprecated ICU 52. Do not rely on ubrk_safeClone() cloning into any provided buffer. + */ +#define U_BRK_SAFECLONE_BUFFERSIZE 1 + +#endif /* U_HIDE_DEPRECATED_API */ + +/** +* Close a UBreakIterator. +* Once closed, a UBreakIterator may no longer be used. +* @param bi The break iterator to close. + * @stable ICU 2.0 +*/ +U_CAPI void U_EXPORT2 +ubrk_close(UBreakIterator *bi); + +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN + +/** + * \class LocalUBreakIteratorPointer + * "Smart pointer" class, closes a UBreakIterator via ubrk_close(). + * For most methods see the LocalPointerBase base class. + * + * @see LocalPointerBase + * @see LocalPointer + * @stable ICU 4.4 + */ +U_DEFINE_LOCAL_OPEN_POINTER(LocalUBreakIteratorPointer, UBreakIterator, ubrk_close); + +U_NAMESPACE_END + +#endif + +/** + * Sets an existing iterator to point to a new piece of text. + * The break iterator retains a pointer to the supplied text. + * The caller must not modify or delete the text while the BreakIterator + * retains the reference. + * + * @param bi The iterator to use + * @param text The text to be set + * @param textLength The length of the text + * @param status The error code + * @stable ICU 2.0 + */ +U_CAPI void U_EXPORT2 +ubrk_setText(UBreakIterator* bi, + const UChar* text, + int32_t textLength, + UErrorCode* status); + + +/** + * Sets an existing iterator to point to a new piece of text. + * + * All index positions returned by break iterator functions are + * native indices from the UText. For example, when breaking UTF-8 + * encoded text, the break positions returned by \ref ubrk_next, \ref ubrk_previous, etc. + * will be UTF-8 string indices, not UTF-16 positions. + * + * @param bi The iterator to use + * @param text The text to be set. + * This function makes a shallow clone of the supplied UText. This means + * that the caller is free to immediately close or otherwise reuse the + * UText that was passed as a parameter, but that the underlying text itself + * must not be altered while being referenced by the break iterator. + * @param status The error code + * @stable ICU 3.4 + */ +U_CAPI void U_EXPORT2 +ubrk_setUText(UBreakIterator* bi, + UText* text, + UErrorCode* status); + + + +/** + * Determine the most recently-returned text boundary. + * + * @param bi The break iterator to use. + * @return The character index most recently returned by \ref ubrk_next, \ref ubrk_previous, + * \ref ubrk_first, or \ref ubrk_last. + * @stable ICU 2.0 + */ +U_CAPI int32_t U_EXPORT2 +ubrk_current(const UBreakIterator *bi); + +/** + * Advance the iterator to the boundary following the current boundary. + * + * @param bi The break iterator to use. + * @return The character index of the next text boundary, or UBRK_DONE + * if all text boundaries have been returned. + * @see ubrk_previous + * @stable ICU 2.0 + */ +U_CAPI int32_t U_EXPORT2 +ubrk_next(UBreakIterator *bi); + +/** + * Set the iterator position to the boundary preceding the current boundary. + * + * @param bi The break iterator to use. + * @return The character index of the preceding text boundary, or UBRK_DONE + * if all text boundaries have been returned. + * @see ubrk_next + * @stable ICU 2.0 + */ +U_CAPI int32_t U_EXPORT2 +ubrk_previous(UBreakIterator *bi); + +/** + * Set the iterator position to zero, the start of the text being scanned. + * @param bi The break iterator to use. + * @return The new iterator position (zero). + * @see ubrk_last + * @stable ICU 2.0 + */ +U_CAPI int32_t U_EXPORT2 +ubrk_first(UBreakIterator *bi); + +/** + * Set the iterator position to the index immediately <EM>beyond</EM> the last character in the text being scanned. + * This is not the same as the last character. + * @param bi The break iterator to use. + * @return The character offset immediately <EM>beyond</EM> the last character in the + * text being scanned. + * @see ubrk_first + * @stable ICU 2.0 + */ +U_CAPI int32_t U_EXPORT2 +ubrk_last(UBreakIterator *bi); + +/** + * Set the iterator position to the first boundary preceding the specified offset. + * The new position is always smaller than offset, or UBRK_DONE. + * @param bi The break iterator to use. + * @param offset The offset to begin scanning. + * @return The text boundary preceding offset, or UBRK_DONE. + * @see ubrk_following + * @stable ICU 2.0 + */ +U_CAPI int32_t U_EXPORT2 +ubrk_preceding(UBreakIterator *bi, + int32_t offset); + +/** + * Advance the iterator to the first boundary following the specified offset. + * The value returned is always greater than offset, or UBRK_DONE. + * @param bi The break iterator to use. + * @param offset The offset to begin scanning. + * @return The text boundary following offset, or UBRK_DONE. + * @see ubrk_preceding + * @stable ICU 2.0 + */ +U_CAPI int32_t U_EXPORT2 +ubrk_following(UBreakIterator *bi, + int32_t offset); + +/** +* Get a locale for which text breaking information is available. +* A UBreakIterator in a locale returned by this function will perform the correct +* text breaking for the locale. +* @param index The index of the desired locale. +* @return A locale for which number text breaking information is available, or 0 if none. +* @see ubrk_countAvailable +* @stable ICU 2.0 +*/ +U_CAPI const char* U_EXPORT2 +ubrk_getAvailable(int32_t index); + +/** +* Determine how many locales have text breaking information available. +* This function is most useful as determining the loop ending condition for +* calls to \ref ubrk_getAvailable. +* @return The number of locales for which text breaking information is available. +* @see ubrk_getAvailable +* @stable ICU 2.0 +*/ +U_CAPI int32_t U_EXPORT2 +ubrk_countAvailable(void); + + +/** +* Returns true if the specified position is a boundary position. As a side +* effect, leaves the iterator pointing to the first boundary position at +* or after "offset". +* @param bi The break iterator to use. +* @param offset the offset to check. +* @return True if "offset" is a boundary position. +* @stable ICU 2.0 +*/ +U_CAPI UBool U_EXPORT2 +ubrk_isBoundary(UBreakIterator *bi, int32_t offset); + +/** + * Return the status from the break rule that determined the most recently + * returned break position. The values appear in the rule source + * within brackets, {123}, for example. For rules that do not specify a + * status, a default value of 0 is returned. + * <p> + * For word break iterators, the possible values are defined in enum UWordBreak. + * @stable ICU 2.2 + */ +U_CAPI int32_t U_EXPORT2 +ubrk_getRuleStatus(UBreakIterator *bi); + +/** + * Get the statuses from the break rules that determined the most recently + * returned break position. The values appear in the rule source + * within brackets, {123}, for example. The default status value for rules + * that do not explicitly provide one is zero. + * <p> + * For word break iterators, the possible values are defined in enum UWordBreak. + * @param bi The break iterator to use + * @param fillInVec an array to be filled in with the status values. + * @param capacity the length of the supplied vector. A length of zero causes + * the function to return the number of status values, in the + * normal way, without attempting to store any values. + * @param status receives error codes. + * @return The number of rule status values from rules that determined + * the most recent boundary returned by the break iterator. + * @stable ICU 3.0 + */ +U_CAPI int32_t U_EXPORT2 +ubrk_getRuleStatusVec(UBreakIterator *bi, int32_t *fillInVec, int32_t capacity, UErrorCode *status); + +/** + * Return the locale of the break iterator. You can choose between the valid and + * the actual locale. + * @param bi break iterator + * @param type locale type (valid or actual) + * @param status error code + * @return locale string + * @stable ICU 2.8 + */ +U_CAPI const char* U_EXPORT2 +ubrk_getLocaleByType(const UBreakIterator *bi, ULocDataLocaleType type, UErrorCode* status); + +/** + * Set the subject text string upon which the break iterator is operating + * without changing any other aspect of the state. + * The new and previous text strings must have the same content. + * + * This function is intended for use in environments where ICU is operating on + * strings that may move around in memory. It provides a mechanism for notifying + * ICU that the string has been relocated, and providing a new UText to access the + * string in its new position. + * + * Note that the break iterator never copies the underlying text + * of a string being processed, but always operates directly on the original text + * provided by the user. Refreshing simply drops the references to the old text + * and replaces them with references to the new. + * + * Caution: this function is normally used only by very specialized + * system-level code. One example use case is with garbage collection + * that moves the text in memory. + * + * @param bi The break iterator. + * @param text The new (moved) text string. + * @param status Receives errors detected by this function. + * + * @stable ICU 49 + */ +U_CAPI void U_EXPORT2 +ubrk_refreshUText(UBreakIterator *bi, + UText *text, + UErrorCode *status); + + +/** + * Get a compiled binary version of the rules specifying the behavior of a UBreakIterator. + * The binary rules may be used with ubrk_openBinaryRules to open a new UBreakIterator + * more quickly than using ubrk_openRules. The compiled rules are not compatible across + * different major versions of ICU, nor across platforms of different endianness or + * different base character set family (ASCII vs EBCDIC). Supports preflighting (with + * binaryRules=NULL and rulesCapacity=0) to get the rules length without copying them to + * the binaryRules buffer. However, whether preflighting or not, if the actual length + * is greater than INT32_MAX, then the function returns 0 and sets *status to + * U_INDEX_OUTOFBOUNDS_ERROR. + + * @param bi The break iterator to use. + * @param binaryRules Buffer to receive the compiled binary rules; set to NULL for + * preflighting. + * @param rulesCapacity Capacity (in bytes) of the binaryRules buffer; set to 0 for + * preflighting. Must be >= 0. + * @param status Pointer to UErrorCode to receive any errors, such as + * U_BUFFER_OVERFLOW_ERROR, U_INDEX_OUTOFBOUNDS_ERROR, or + * U_ILLEGAL_ARGUMENT_ERROR. + * @return The actual byte length of the binary rules, if <= INT32_MAX; + * otherwise 0. If not preflighting and this is larger than + * rulesCapacity, *status will be set to an error. + * @see ubrk_openBinaryRules + * @stable ICU 59 + */ +U_CAPI int32_t U_EXPORT2 +ubrk_getBinaryRules(UBreakIterator *bi, + uint8_t * binaryRules, int32_t rulesCapacity, + UErrorCode * status); + +#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ + +#endif diff --git a/thirdparty/icu4c/common/unicode/ucasemap.h b/thirdparty/icu4c/common/unicode/ucasemap.h new file mode 100644 index 0000000000..d1c1b483ab --- /dev/null +++ b/thirdparty/icu4c/common/unicode/ucasemap.h @@ -0,0 +1,388 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2005-2012, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: ucasemap.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2005may06 +* created by: Markus W. Scherer +* +* Case mapping service object and functions using it. +*/ + +#ifndef __UCASEMAP_H__ +#define __UCASEMAP_H__ + +#include "unicode/utypes.h" +#include "unicode/stringoptions.h" +#include "unicode/ustring.h" + +#if U_SHOW_CPLUSPLUS_API +#include "unicode/localpointer.h" +#endif // U_SHOW_CPLUSPLUS_API + +/** + * \file + * \brief C API: Unicode case mapping functions using a UCaseMap service object. + * + * The service object takes care of memory allocations, data loading, and setup + * for the attributes, as usual. + * + * Currently, the functionality provided here does not overlap with uchar.h + * and ustring.h, except for ucasemap_toTitle(). + * + * ucasemap_utf8XYZ() functions operate directly on UTF-8 strings. + */ + +/** + * UCaseMap is an opaque service object for newer ICU case mapping functions. + * Older functions did not use a service object. + * @stable ICU 3.4 + */ +struct UCaseMap; +typedef struct UCaseMap UCaseMap; /**< C typedef for struct UCaseMap. @stable ICU 3.4 */ + +/** + * Open a UCaseMap service object for a locale and a set of options. + * The locale ID and options are preprocessed so that functions using the + * service object need not process them in each call. + * + * @param locale ICU locale ID, used for language-dependent + * upper-/lower-/title-casing according to the Unicode standard. + * Usual semantics: ""=root, NULL=default locale, etc. + * @param options Options bit set, used for case folding and string comparisons. + * Same flags as for u_foldCase(), u_strFoldCase(), + * u_strCaseCompare(), etc. + * Use 0 or U_FOLD_CASE_DEFAULT for default behavior. + * @param pErrorCode Must be a valid pointer to an error code value, + * which must not indicate a failure before the function call. + * @return Pointer to a UCaseMap service object, if successful. + * + * @see U_FOLD_CASE_DEFAULT + * @see U_FOLD_CASE_EXCLUDE_SPECIAL_I + * @see U_TITLECASE_NO_LOWERCASE + * @see U_TITLECASE_NO_BREAK_ADJUSTMENT + * @stable ICU 3.4 + */ +U_CAPI UCaseMap * U_EXPORT2 +ucasemap_open(const char *locale, uint32_t options, UErrorCode *pErrorCode); + +/** + * Close a UCaseMap service object. + * @param csm Object to be closed. + * @stable ICU 3.4 + */ +U_CAPI void U_EXPORT2 +ucasemap_close(UCaseMap *csm); + +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN + +/** + * \class LocalUCaseMapPointer + * "Smart pointer" class, closes a UCaseMap via ucasemap_close(). + * For most methods see the LocalPointerBase base class. + * + * @see LocalPointerBase + * @see LocalPointer + * @stable ICU 4.4 + */ +U_DEFINE_LOCAL_OPEN_POINTER(LocalUCaseMapPointer, UCaseMap, ucasemap_close); + +U_NAMESPACE_END + +#endif + +/** + * Get the locale ID that is used for language-dependent case mappings. + * @param csm UCaseMap service object. + * @return locale ID + * @stable ICU 3.4 + */ +U_CAPI const char * U_EXPORT2 +ucasemap_getLocale(const UCaseMap *csm); + +/** + * Get the options bit set that is used for case folding and string comparisons. + * @param csm UCaseMap service object. + * @return options bit set + * @stable ICU 3.4 + */ +U_CAPI uint32_t U_EXPORT2 +ucasemap_getOptions(const UCaseMap *csm); + +/** + * Set the locale ID that is used for language-dependent case mappings. + * + * @param csm UCaseMap service object. + * @param locale Locale ID, see ucasemap_open(). + * @param pErrorCode Must be a valid pointer to an error code value, + * which must not indicate a failure before the function call. + * + * @see ucasemap_open + * @stable ICU 3.4 + */ +U_CAPI void U_EXPORT2 +ucasemap_setLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode); + +/** + * Set the options bit set that is used for case folding and string comparisons. + * + * @param csm UCaseMap service object. + * @param options Options bit set, see ucasemap_open(). + * @param pErrorCode Must be a valid pointer to an error code value, + * which must not indicate a failure before the function call. + * + * @see ucasemap_open + * @stable ICU 3.4 + */ +U_CAPI void U_EXPORT2 +ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode *pErrorCode); + +#if !UCONFIG_NO_BREAK_ITERATION + +/** + * Get the break iterator that is used for titlecasing. + * Do not modify the returned break iterator. + * @param csm UCaseMap service object. + * @return titlecasing break iterator + * @stable ICU 3.8 + */ +U_CAPI const UBreakIterator * U_EXPORT2 +ucasemap_getBreakIterator(const UCaseMap *csm); + +/** + * Set the break iterator that is used for titlecasing. + * The UCaseMap service object releases a previously set break iterator + * and "adopts" this new one, taking ownership of it. + * It will be released in a subsequent call to ucasemap_setBreakIterator() + * or ucasemap_close(). + * + * Break iterator operations are not thread-safe. Therefore, titlecasing + * functions use non-const UCaseMap objects. It is not possible to titlecase + * strings concurrently using the same UCaseMap. + * + * @param csm UCaseMap service object. + * @param iterToAdopt Break iterator to be adopted for titlecasing. + * @param pErrorCode Must be a valid pointer to an error code value, + * which must not indicate a failure before the function call. + * + * @see ucasemap_toTitle + * @see ucasemap_utf8ToTitle + * @stable ICU 3.8 + */ +U_CAPI void U_EXPORT2 +ucasemap_setBreakIterator(UCaseMap *csm, UBreakIterator *iterToAdopt, UErrorCode *pErrorCode); + +/** + * Titlecase a UTF-16 string. This function is almost a duplicate of u_strToTitle(), + * except that it takes ucasemap_setOptions() into account and has performance + * advantages from being able to use a UCaseMap object for multiple case mapping + * operations, saving setup time. + * + * Casing is locale-dependent and context-sensitive. + * Titlecasing uses a break iterator to find the first characters of words + * that are to be titlecased. It titlecases those characters and lowercases + * all others. (This can be modified with ucasemap_setOptions().) + * + * Note: This function takes a non-const UCaseMap pointer because it will + * open a default break iterator if no break iterator was set yet, + * and effectively call ucasemap_setBreakIterator(); + * also because the break iterator is stateful and will be modified during + * the iteration. + * + * The titlecase break iterator can be provided to customize for arbitrary + * styles, using rules and dictionaries beyond the standard iterators. + * The standard titlecase iterator for the root locale implements the + * algorithm of Unicode TR 21. + * + * This function uses only the setText(), first() and next() methods of the + * provided break iterator. + * + * The result may be longer or shorter than the original. + * The source string and the destination buffer must not overlap. + * + * @param csm UCaseMap service object. This pointer is non-const! + * See the note above for details. + * @param dest A buffer for the result string. The result will be NUL-terminated if + * the buffer is large enough. + * The contents is undefined in case of failure. + * @param destCapacity The size of the buffer (number of UChars). If it is 0, then + * dest may be NULL and the function will only return the length of the result + * without writing any of the result string. + * @param src The original string. + * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. + * @param pErrorCode Must be a valid pointer to an error code value, + * which must not indicate a failure before the function call. + * @return The length of the result string, if successful - or in case of a buffer overflow, + * in which case it will be greater than destCapacity. + * + * @see u_strToTitle + * @stable ICU 3.8 + */ +U_CAPI int32_t U_EXPORT2 +ucasemap_toTitle(UCaseMap *csm, + UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + UErrorCode *pErrorCode); + +#endif // UCONFIG_NO_BREAK_ITERATION + +/** + * Lowercase the characters in a UTF-8 string. + * Casing is locale-dependent and context-sensitive. + * The result may be longer or shorter than the original. + * The source string and the destination buffer must not overlap. + * + * @param csm UCaseMap service object. + * @param dest A buffer for the result string. The result will be NUL-terminated if + * the buffer is large enough. + * The contents is undefined in case of failure. + * @param destCapacity The size of the buffer (number of bytes). If it is 0, then + * dest may be NULL and the function will only return the length of the result + * without writing any of the result string. + * @param src The original string. + * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. + * @param pErrorCode Must be a valid pointer to an error code value, + * which must not indicate a failure before the function call. + * @return The length of the result string, if successful - or in case of a buffer overflow, + * in which case it will be greater than destCapacity. + * + * @see u_strToLower + * @stable ICU 3.4 + */ +U_CAPI int32_t U_EXPORT2 +ucasemap_utf8ToLower(const UCaseMap *csm, + char *dest, int32_t destCapacity, + const char *src, int32_t srcLength, + UErrorCode *pErrorCode); + +/** + * Uppercase the characters in a UTF-8 string. + * Casing is locale-dependent and context-sensitive. + * The result may be longer or shorter than the original. + * The source string and the destination buffer must not overlap. + * + * @param csm UCaseMap service object. + * @param dest A buffer for the result string. The result will be NUL-terminated if + * the buffer is large enough. + * The contents is undefined in case of failure. + * @param destCapacity The size of the buffer (number of bytes). If it is 0, then + * dest may be NULL and the function will only return the length of the result + * without writing any of the result string. + * @param src The original string. + * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. + * @param pErrorCode Must be a valid pointer to an error code value, + * which must not indicate a failure before the function call. + * @return The length of the result string, if successful - or in case of a buffer overflow, + * in which case it will be greater than destCapacity. + * + * @see u_strToUpper + * @stable ICU 3.4 + */ +U_CAPI int32_t U_EXPORT2 +ucasemap_utf8ToUpper(const UCaseMap *csm, + char *dest, int32_t destCapacity, + const char *src, int32_t srcLength, + UErrorCode *pErrorCode); + +#if !UCONFIG_NO_BREAK_ITERATION + +/** + * Titlecase a UTF-8 string. + * Casing is locale-dependent and context-sensitive. + * Titlecasing uses a break iterator to find the first characters of words + * that are to be titlecased. It titlecases those characters and lowercases + * all others. (This can be modified with ucasemap_setOptions().) + * + * Note: This function takes a non-const UCaseMap pointer because it will + * open a default break iterator if no break iterator was set yet, + * and effectively call ucasemap_setBreakIterator(); + * also because the break iterator is stateful and will be modified during + * the iteration. + * + * The titlecase break iterator can be provided to customize for arbitrary + * styles, using rules and dictionaries beyond the standard iterators. + * The standard titlecase iterator for the root locale implements the + * algorithm of Unicode TR 21. + * + * This function uses only the setUText(), first(), next() and close() methods of the + * provided break iterator. + * + * The result may be longer or shorter than the original. + * The source string and the destination buffer must not overlap. + * + * @param csm UCaseMap service object. This pointer is non-const! + * See the note above for details. + * @param dest A buffer for the result string. The result will be NUL-terminated if + * the buffer is large enough. + * The contents is undefined in case of failure. + * @param destCapacity The size of the buffer (number of bytes). If it is 0, then + * dest may be NULL and the function will only return the length of the result + * without writing any of the result string. + * @param src The original string. + * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. + * @param pErrorCode Must be a valid pointer to an error code value, + * which must not indicate a failure before the function call. + * @return The length of the result string, if successful - or in case of a buffer overflow, + * in which case it will be greater than destCapacity. + * + * @see u_strToTitle + * @see U_TITLECASE_NO_LOWERCASE + * @see U_TITLECASE_NO_BREAK_ADJUSTMENT + * @stable ICU 3.8 + */ +U_CAPI int32_t U_EXPORT2 +ucasemap_utf8ToTitle(UCaseMap *csm, + char *dest, int32_t destCapacity, + const char *src, int32_t srcLength, + UErrorCode *pErrorCode); + +#endif + +/** + * Case-folds the characters in a UTF-8 string. + * + * Case-folding is locale-independent and not context-sensitive, + * but there is an option for whether to include or exclude mappings for dotted I + * and dotless i that are marked with 'T' in CaseFolding.txt. + * + * The result may be longer or shorter than the original. + * The source string and the destination buffer must not overlap. + * + * @param csm UCaseMap service object. + * @param dest A buffer for the result string. The result will be NUL-terminated if + * the buffer is large enough. + * The contents is undefined in case of failure. + * @param destCapacity The size of the buffer (number of bytes). If it is 0, then + * dest may be NULL and the function will only return the length of the result + * without writing any of the result string. + * @param src The original string. + * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. + * @param pErrorCode Must be a valid pointer to an error code value, + * which must not indicate a failure before the function call. + * @return The length of the result string, if successful - or in case of a buffer overflow, + * in which case it will be greater than destCapacity. + * + * @see u_strFoldCase + * @see ucasemap_setOptions + * @see U_FOLD_CASE_DEFAULT + * @see U_FOLD_CASE_EXCLUDE_SPECIAL_I + * @stable ICU 3.8 + */ +U_CAPI int32_t U_EXPORT2 +ucasemap_utf8FoldCase(const UCaseMap *csm, + char *dest, int32_t destCapacity, + const char *src, int32_t srcLength, + UErrorCode *pErrorCode); + +#endif diff --git a/thirdparty/icu4c/common/unicode/ucat.h b/thirdparty/icu4c/common/unicode/ucat.h new file mode 100644 index 0000000000..93850348ff --- /dev/null +++ b/thirdparty/icu4c/common/unicode/ucat.h @@ -0,0 +1,160 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (c) 2003-2004, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* Author: Alan Liu +* Created: March 19 2003 +* Since: ICU 2.6 +********************************************************************** +*/ +#ifndef UCAT_H +#define UCAT_H + +#include "unicode/utypes.h" +#include "unicode/ures.h" + +/** + * \file + * \brief C API: Message Catalog Wrappers + * + * This C API provides look-alike functions that deliberately resemble + * the POSIX catopen, catclose, and catgets functions. The underlying + * implementation is in terms of ICU resource bundles, rather than + * POSIX message catalogs. + * + * The ICU resource bundles obey standard ICU inheritance policies. + * To facilitate this, sets and messages are flattened into one tier. + * This is done by creating resource bundle keys of the form + * <set_num>%<msg_num> where set_num is the set number and msg_num is + * the message number, formatted as decimal strings. + * + * Example: Consider a message catalog containing two sets: + * + * Set 1: Message 4 = "Good morning." + * Message 5 = "Good afternoon." + * Message 7 = "Good evening." + * Message 8 = "Good night." + * Set 4: Message 14 = "Please " + * Message 19 = "Thank you." + * Message 20 = "Sincerely," + * + * The ICU resource bundle source file would, assuming it is named + * "greet.txt", would look like this: + * + * greet + * { + * 1%4 { "Good morning." } + * 1%5 { "Good afternoon." } + * 1%7 { "Good evening." } + * 1%8 { "Good night." } + * + * 4%14 { "Please " } + * 4%19 { "Thank you." } + * 4%20 { "Sincerely," } + * } + * + * The catgets function is commonly used in combination with functions + * like printf and strftime. ICU components like message format can + * be used instead, although they use a different format syntax. + * There is an ICU package, icuio, that provides some of + * the POSIX-style formatting API. + */ + +U_CDECL_BEGIN + +/** + * An ICU message catalog descriptor, analogous to nl_catd. + * + * @stable ICU 2.6 + */ +typedef UResourceBundle* u_nl_catd; + +/** + * Open and return an ICU message catalog descriptor. The descriptor + * may be passed to u_catgets() to retrieve localized strings. + * + * @param name string containing the full path pointing to the + * directory where the resources reside followed by the package name + * e.g. "/usr/resource/my_app/resources/guimessages" on a Unix system. + * If NULL, ICU default data files will be used. + * + * Unlike POSIX, environment variables are not interpolated within the + * name. + * + * @param locale the locale for which we want to open the resource. If + * NULL, the default ICU locale will be used (see uloc_getDefault). If + * strlen(locale) == 0, the root locale will be used. + * + * @param ec input/output error code. Upon output, + * U_USING_FALLBACK_WARNING indicates that a fallback locale was + * used. For example, 'de_CH' was requested, but nothing was found + * there, so 'de' was used. U_USING_DEFAULT_WARNING indicates that the + * default locale data or root locale data was used; neither the + * requested locale nor any of its fallback locales were found. + * + * @return a message catalog descriptor that may be passed to + * u_catgets(). If the ec parameter indicates success, then the caller + * is responsible for calling u_catclose() to close the message + * catalog. If the ec parameter indicates failure, then NULL will be + * returned. + * + * @stable ICU 2.6 + */ +U_CAPI u_nl_catd U_EXPORT2 +u_catopen(const char* name, const char* locale, UErrorCode* ec); + +/** + * Close an ICU message catalog, given its descriptor. + * + * @param catd a message catalog descriptor to be closed. May be NULL, + * in which case no action is taken. + * + * @stable ICU 2.6 + */ +U_CAPI void U_EXPORT2 +u_catclose(u_nl_catd catd); + +/** + * Retrieve a localized string from an ICU message catalog. + * + * @param catd a message catalog descriptor returned by u_catopen. + * + * @param set_num the message catalog set number. Sets need not be + * numbered consecutively. + * + * @param msg_num the message catalog message number within the + * set. Messages need not be numbered consecutively. + * + * @param s the default string. This is returned if the string + * specified by the set_num and msg_num is not found. It must be + * zero-terminated. + * + * @param len fill-in parameter to receive the length of the result. + * May be NULL, in which case it is ignored. + * + * @param ec input/output error code. May be U_USING_FALLBACK_WARNING + * or U_USING_DEFAULT_WARNING. U_MISSING_RESOURCE_ERROR indicates that + * the set_num/msg_num tuple does not specify a valid message string + * in this catalog. + * + * @return a pointer to a zero-terminated UChar array which lives in + * an internal buffer area, typically a memory mapped/DLL file. The + * caller must NOT delete this pointer. If the call is unsuccessful + * for any reason, then s is returned. This includes the situation in + * which ec indicates a failing error code upon entry to this + * function. + * + * @stable ICU 2.6 + */ +U_CAPI const UChar* U_EXPORT2 +u_catgets(u_nl_catd catd, int32_t set_num, int32_t msg_num, + const UChar* s, + int32_t* len, UErrorCode* ec); + +U_CDECL_END + +#endif /*UCAT_H*/ +/*eof*/ diff --git a/thirdparty/icu4c/common/unicode/uchar.h b/thirdparty/icu4c/common/unicode/uchar.h new file mode 100644 index 0000000000..1e0f82e706 --- /dev/null +++ b/thirdparty/icu4c/common/unicode/uchar.h @@ -0,0 +1,4056 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 1997-2016, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* +* File UCHAR.H +* +* Modification History: +* +* Date Name Description +* 04/02/97 aliu Creation. +* 03/29/99 helena Updated for C APIs. +* 4/15/99 Madhu Updated for C Implementation and Javadoc +* 5/20/99 Madhu Added the function u_getVersion() +* 8/19/1999 srl Upgraded scripts to Unicode 3.0 +* 8/27/1999 schererm UCharDirection constants: U_... +* 11/11/1999 weiv added u_isalnum(), cleaned comments +* 01/11/2000 helena Renamed u_getVersion to u_getUnicodeVersion(). +****************************************************************************** +*/ + +#ifndef UCHAR_H +#define UCHAR_H + +#include "unicode/utypes.h" +#include "unicode/stringoptions.h" +#include "unicode/ucpmap.h" + +#if !defined(USET_DEFINED) && !defined(U_IN_DOXYGEN) + +#define USET_DEFINED + +/** + * USet is the C API type corresponding to C++ class UnicodeSet. + * It is forward-declared here to avoid including unicode/uset.h file if related + * APIs are not used. + * + * @see ucnv_getUnicodeSet + * @stable ICU 2.4 + */ +typedef struct USet USet; + +#endif + + +U_CDECL_BEGIN + +/*==========================================================================*/ +/* Unicode version number */ +/*==========================================================================*/ +/** + * Unicode version number, default for the current ICU version. + * The actual Unicode Character Database (UCD) data is stored in uprops.dat + * and may be generated from UCD files from a different Unicode version. + * Call u_getUnicodeVersion to get the actual Unicode version of the data. + * + * @see u_getUnicodeVersion + * @stable ICU 2.0 + */ +#define U_UNICODE_VERSION "13.0" + +/** + * \file + * \brief C API: Unicode Properties + * + * This C API provides low-level access to the Unicode Character Database. + * In addition to raw property values, some convenience functions calculate + * derived properties, for example for Java-style programming. + * + * Unicode assigns each code point (not just assigned character) values for + * many properties. + * Most of them are simple boolean flags, or constants from a small enumerated list. + * For some properties, values are strings or other relatively more complex types. + * + * For more information see + * "About the Unicode Character Database" (http://www.unicode.org/ucd/) + * and the ICU User Guide chapter on Properties (http://icu-project.org/userguide/properties.html). + * + * Many properties are accessible via generic functions that take a UProperty selector. + * - u_hasBinaryProperty() returns a binary value (true/false) per property and code point. + * - u_getIntPropertyValue() returns an integer value per property and code point. + * For each supported enumerated or catalog property, there is + * an enum type for all of the property's values, and + * u_getIntPropertyValue() returns the numeric values of those constants. + * - u_getBinaryPropertySet() returns a set for each ICU-supported binary property with + * all code points for which the property is true. + * - u_getIntPropertyMap() returns a map for each + * ICU-supported enumerated/catalog/int-valued property which + * maps all Unicode code points to their values for that property. + * + * Many functions are designed to match java.lang.Character functions. + * See the individual function documentation, + * and see the JDK 1.4 java.lang.Character documentation + * at http://java.sun.com/j2se/1.4/docs/api/java/lang/Character.html + * + * There are also functions that provide easy migration from C/POSIX functions + * like isblank(). Their use is generally discouraged because the C/POSIX + * standards do not define their semantics beyond the ASCII range, which means + * that different implementations exhibit very different behavior. + * Instead, Unicode properties should be used directly. + * + * There are also only a few, broad C/POSIX character classes, and they tend + * to be used for conflicting purposes. For example, the "isalpha()" class + * is sometimes used to determine word boundaries, while a more sophisticated + * approach would at least distinguish initial letters from continuation + * characters (the latter including combining marks). + * (In ICU, BreakIterator is the most sophisticated API for word boundaries.) + * Another example: There is no "istitle()" class for titlecase characters. + * + * ICU 3.4 and later provides API access for all twelve C/POSIX character classes. + * ICU implements them according to the Standard Recommendations in + * Annex C: Compatibility Properties of UTS #18 Unicode Regular Expressions + * (http://www.unicode.org/reports/tr18/#Compatibility_Properties). + * + * API access for C/POSIX character classes is as follows: + * - alpha: u_isUAlphabetic(c) or u_hasBinaryProperty(c, UCHAR_ALPHABETIC) + * - lower: u_isULowercase(c) or u_hasBinaryProperty(c, UCHAR_LOWERCASE) + * - upper: u_isUUppercase(c) or u_hasBinaryProperty(c, UCHAR_UPPERCASE) + * - punct: u_ispunct(c) + * - digit: u_isdigit(c) or u_charType(c)==U_DECIMAL_DIGIT_NUMBER + * - xdigit: u_isxdigit(c) or u_hasBinaryProperty(c, UCHAR_POSIX_XDIGIT) + * - alnum: u_hasBinaryProperty(c, UCHAR_POSIX_ALNUM) + * - space: u_isUWhiteSpace(c) or u_hasBinaryProperty(c, UCHAR_WHITE_SPACE) + * - blank: u_isblank(c) or u_hasBinaryProperty(c, UCHAR_POSIX_BLANK) + * - cntrl: u_charType(c)==U_CONTROL_CHAR + * - graph: u_hasBinaryProperty(c, UCHAR_POSIX_GRAPH) + * - print: u_hasBinaryProperty(c, UCHAR_POSIX_PRINT) + * + * Note: Some of the u_isxyz() functions in uchar.h predate, and do not match, + * the Standard Recommendations in UTS #18. Instead, they match Java + * functions according to their API documentation. + * + * \htmlonly + * The C/POSIX character classes are also available in UnicodeSet patterns, + * using patterns like [:graph:] or \p{graph}. + * \endhtmlonly + * + * Note: There are several ICU whitespace functions. + * Comparison: + * - u_isUWhiteSpace=UCHAR_WHITE_SPACE: Unicode White_Space property; + * most of general categories "Z" (separators) + most whitespace ISO controls + * (including no-break spaces, but excluding IS1..IS4) + * - u_isWhitespace: Java isWhitespace; Z + whitespace ISO controls but excluding no-break spaces + * - u_isJavaSpaceChar: Java isSpaceChar; just Z (including no-break spaces) + * - u_isspace: Z + whitespace ISO controls (including no-break spaces) + * - u_isblank: "horizontal spaces" = TAB + Zs + */ + +/** + * Constants. + */ + +/** The lowest Unicode code point value. Code points are non-negative. @stable ICU 2.0 */ +#define UCHAR_MIN_VALUE 0 + +/** + * The highest Unicode code point value (scalar value) according to + * The Unicode Standard. This is a 21-bit value (20.1 bits, rounded up). + * For a single character, UChar32 is a simple type that can hold any code point value. + * + * @see UChar32 + * @stable ICU 2.0 + */ +#define UCHAR_MAX_VALUE 0x10ffff + +/** + * Get a single-bit bit set (a flag) from a bit number 0..31. + * @stable ICU 2.1 + */ +#define U_MASK(x) ((uint32_t)1<<(x)) + +/** + * Selection constants for Unicode properties. + * These constants are used in functions like u_hasBinaryProperty to select + * one of the Unicode properties. + * + * The properties APIs are intended to reflect Unicode properties as defined + * in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR). + * + * For details about the properties see + * UAX #44: Unicode Character Database (http://www.unicode.org/reports/tr44/). + * + * Important: If ICU is built with UCD files from Unicode versions below, e.g., 3.2, + * then properties marked with "new in Unicode 3.2" are not or not fully available. + * Check u_getUnicodeVersion to be sure. + * + * @see u_hasBinaryProperty + * @see u_getIntPropertyValue + * @see u_getUnicodeVersion + * @stable ICU 2.1 + */ +typedef enum UProperty { + /* + * Note: UProperty constants are parsed by preparseucd.py. + * It matches lines like + * UCHAR_<Unicode property name>=<integer>, + */ + + /* Note: Place UCHAR_ALPHABETIC before UCHAR_BINARY_START so that + debuggers display UCHAR_ALPHABETIC as the symbolic name for 0, + rather than UCHAR_BINARY_START. Likewise for other *_START + identifiers. */ + + /** Binary property Alphabetic. Same as u_isUAlphabetic, different from u_isalpha. + Lu+Ll+Lt+Lm+Lo+Nl+Other_Alphabetic @stable ICU 2.1 */ + UCHAR_ALPHABETIC=0, + /** First constant for binary Unicode properties. @stable ICU 2.1 */ + UCHAR_BINARY_START=UCHAR_ALPHABETIC, + /** Binary property ASCII_Hex_Digit. 0-9 A-F a-f @stable ICU 2.1 */ + UCHAR_ASCII_HEX_DIGIT=1, + /** Binary property Bidi_Control. + Format controls which have specific functions + in the Bidi Algorithm. @stable ICU 2.1 */ + UCHAR_BIDI_CONTROL=2, + /** Binary property Bidi_Mirrored. + Characters that may change display in RTL text. + Same as u_isMirrored. + See Bidi Algorithm, UTR 9. @stable ICU 2.1 */ + UCHAR_BIDI_MIRRORED=3, + /** Binary property Dash. Variations of dashes. @stable ICU 2.1 */ + UCHAR_DASH=4, + /** Binary property Default_Ignorable_Code_Point (new in Unicode 3.2). + Ignorable in most processing. + <2060..206F, FFF0..FFFB, E0000..E0FFF>+Other_Default_Ignorable_Code_Point+(Cf+Cc+Cs-White_Space) @stable ICU 2.1 */ + UCHAR_DEFAULT_IGNORABLE_CODE_POINT=5, + /** Binary property Deprecated (new in Unicode 3.2). + The usage of deprecated characters is strongly discouraged. @stable ICU 2.1 */ + UCHAR_DEPRECATED=6, + /** Binary property Diacritic. Characters that linguistically modify + the meaning of another character to which they apply. @stable ICU 2.1 */ + UCHAR_DIACRITIC=7, + /** Binary property Extender. + Extend the value or shape of a preceding alphabetic character, + e.g., length and iteration marks. @stable ICU 2.1 */ + UCHAR_EXTENDER=8, + /** Binary property Full_Composition_Exclusion. + CompositionExclusions.txt+Singleton Decompositions+ + Non-Starter Decompositions. @stable ICU 2.1 */ + UCHAR_FULL_COMPOSITION_EXCLUSION=9, + /** Binary property Grapheme_Base (new in Unicode 3.2). + For programmatic determination of grapheme cluster boundaries. + [0..10FFFF]-Cc-Cf-Cs-Co-Cn-Zl-Zp-Grapheme_Link-Grapheme_Extend-CGJ @stable ICU 2.1 */ + UCHAR_GRAPHEME_BASE=10, + /** Binary property Grapheme_Extend (new in Unicode 3.2). + For programmatic determination of grapheme cluster boundaries. + Me+Mn+Mc+Other_Grapheme_Extend-Grapheme_Link-CGJ @stable ICU 2.1 */ + UCHAR_GRAPHEME_EXTEND=11, + /** Binary property Grapheme_Link (new in Unicode 3.2). + For programmatic determination of grapheme cluster boundaries. @stable ICU 2.1 */ + UCHAR_GRAPHEME_LINK=12, + /** Binary property Hex_Digit. + Characters commonly used for hexadecimal numbers. @stable ICU 2.1 */ + UCHAR_HEX_DIGIT=13, + /** Binary property Hyphen. Dashes used to mark connections + between pieces of words, plus the Katakana middle dot. @stable ICU 2.1 */ + UCHAR_HYPHEN=14, + /** Binary property ID_Continue. + Characters that can continue an identifier. + DerivedCoreProperties.txt also says "NOTE: Cf characters should be filtered out." + ID_Start+Mn+Mc+Nd+Pc @stable ICU 2.1 */ + UCHAR_ID_CONTINUE=15, + /** Binary property ID_Start. + Characters that can start an identifier. + Lu+Ll+Lt+Lm+Lo+Nl @stable ICU 2.1 */ + UCHAR_ID_START=16, + /** Binary property Ideographic. + CJKV ideographs. @stable ICU 2.1 */ + UCHAR_IDEOGRAPHIC=17, + /** Binary property IDS_Binary_Operator (new in Unicode 3.2). + For programmatic determination of + Ideographic Description Sequences. @stable ICU 2.1 */ + UCHAR_IDS_BINARY_OPERATOR=18, + /** Binary property IDS_Trinary_Operator (new in Unicode 3.2). + For programmatic determination of + Ideographic Description Sequences. @stable ICU 2.1 */ + UCHAR_IDS_TRINARY_OPERATOR=19, + /** Binary property Join_Control. + Format controls for cursive joining and ligation. @stable ICU 2.1 */ + UCHAR_JOIN_CONTROL=20, + /** Binary property Logical_Order_Exception (new in Unicode 3.2). + Characters that do not use logical order and + require special handling in most processing. @stable ICU 2.1 */ + UCHAR_LOGICAL_ORDER_EXCEPTION=21, + /** Binary property Lowercase. Same as u_isULowercase, different from u_islower. + Ll+Other_Lowercase @stable ICU 2.1 */ + UCHAR_LOWERCASE=22, + /** Binary property Math. Sm+Other_Math @stable ICU 2.1 */ + UCHAR_MATH=23, + /** Binary property Noncharacter_Code_Point. + Code points that are explicitly defined as illegal + for the encoding of characters. @stable ICU 2.1 */ + UCHAR_NONCHARACTER_CODE_POINT=24, + /** Binary property Quotation_Mark. @stable ICU 2.1 */ + UCHAR_QUOTATION_MARK=25, + /** Binary property Radical (new in Unicode 3.2). + For programmatic determination of + Ideographic Description Sequences. @stable ICU 2.1 */ + UCHAR_RADICAL=26, + /** Binary property Soft_Dotted (new in Unicode 3.2). + Characters with a "soft dot", like i or j. + An accent placed on these characters causes + the dot to disappear. @stable ICU 2.1 */ + UCHAR_SOFT_DOTTED=27, + /** Binary property Terminal_Punctuation. + Punctuation characters that generally mark + the end of textual units. @stable ICU 2.1 */ + UCHAR_TERMINAL_PUNCTUATION=28, + /** Binary property Unified_Ideograph (new in Unicode 3.2). + For programmatic determination of + Ideographic Description Sequences. @stable ICU 2.1 */ + UCHAR_UNIFIED_IDEOGRAPH=29, + /** Binary property Uppercase. Same as u_isUUppercase, different from u_isupper. + Lu+Other_Uppercase @stable ICU 2.1 */ + UCHAR_UPPERCASE=30, + /** Binary property White_Space. + Same as u_isUWhiteSpace, different from u_isspace and u_isWhitespace. + Space characters+TAB+CR+LF-ZWSP-ZWNBSP @stable ICU 2.1 */ + UCHAR_WHITE_SPACE=31, + /** Binary property XID_Continue. + ID_Continue modified to allow closure under + normalization forms NFKC and NFKD. @stable ICU 2.1 */ + UCHAR_XID_CONTINUE=32, + /** Binary property XID_Start. ID_Start modified to allow + closure under normalization forms NFKC and NFKD. @stable ICU 2.1 */ + UCHAR_XID_START=33, + /** Binary property Case_Sensitive. Either the source of a case + mapping or _in_ the target of a case mapping. Not the same as + the general category Cased_Letter. @stable ICU 2.6 */ + UCHAR_CASE_SENSITIVE=34, + /** Binary property STerm (new in Unicode 4.0.1). + Sentence Terminal. Used in UAX #29: Text Boundaries + (http://www.unicode.org/reports/tr29/) + @stable ICU 3.0 */ + UCHAR_S_TERM=35, + /** Binary property Variation_Selector (new in Unicode 4.0.1). + Indicates all those characters that qualify as Variation Selectors. + For details on the behavior of these characters, + see StandardizedVariants.html and 15.6 Variation Selectors. + @stable ICU 3.0 */ + UCHAR_VARIATION_SELECTOR=36, + /** Binary property NFD_Inert. + ICU-specific property for characters that are inert under NFD, + i.e., they do not interact with adjacent characters. + See the documentation for the Normalizer2 class and the + Normalizer2::isInert() method. + @stable ICU 3.0 */ + UCHAR_NFD_INERT=37, + /** Binary property NFKD_Inert. + ICU-specific property for characters that are inert under NFKD, + i.e., they do not interact with adjacent characters. + See the documentation for the Normalizer2 class and the + Normalizer2::isInert() method. + @stable ICU 3.0 */ + UCHAR_NFKD_INERT=38, + /** Binary property NFC_Inert. + ICU-specific property for characters that are inert under NFC, + i.e., they do not interact with adjacent characters. + See the documentation for the Normalizer2 class and the + Normalizer2::isInert() method. + @stable ICU 3.0 */ + UCHAR_NFC_INERT=39, + /** Binary property NFKC_Inert. + ICU-specific property for characters that are inert under NFKC, + i.e., they do not interact with adjacent characters. + See the documentation for the Normalizer2 class and the + Normalizer2::isInert() method. + @stable ICU 3.0 */ + UCHAR_NFKC_INERT=40, + /** Binary Property Segment_Starter. + ICU-specific property for characters that are starters in terms of + Unicode normalization and combining character sequences. + They have ccc=0 and do not occur in non-initial position of the + canonical decomposition of any character + (like a-umlaut in NFD and a Jamo T in an NFD(Hangul LVT)). + ICU uses this property for segmenting a string for generating a set of + canonically equivalent strings, e.g. for canonical closure while + processing collation tailoring rules. + @stable ICU 3.0 */ + UCHAR_SEGMENT_STARTER=41, + /** Binary property Pattern_Syntax (new in Unicode 4.1). + See UAX #31 Identifier and Pattern Syntax + (http://www.unicode.org/reports/tr31/) + @stable ICU 3.4 */ + UCHAR_PATTERN_SYNTAX=42, + /** Binary property Pattern_White_Space (new in Unicode 4.1). + See UAX #31 Identifier and Pattern Syntax + (http://www.unicode.org/reports/tr31/) + @stable ICU 3.4 */ + UCHAR_PATTERN_WHITE_SPACE=43, + /** Binary property alnum (a C/POSIX character class). + Implemented according to the UTS #18 Annex C Standard Recommendation. + See the uchar.h file documentation. + @stable ICU 3.4 */ + UCHAR_POSIX_ALNUM=44, + /** Binary property blank (a C/POSIX character class). + Implemented according to the UTS #18 Annex C Standard Recommendation. + See the uchar.h file documentation. + @stable ICU 3.4 */ + UCHAR_POSIX_BLANK=45, + /** Binary property graph (a C/POSIX character class). + Implemented according to the UTS #18 Annex C Standard Recommendation. + See the uchar.h file documentation. + @stable ICU 3.4 */ + UCHAR_POSIX_GRAPH=46, + /** Binary property print (a C/POSIX character class). + Implemented according to the UTS #18 Annex C Standard Recommendation. + See the uchar.h file documentation. + @stable ICU 3.4 */ + UCHAR_POSIX_PRINT=47, + /** Binary property xdigit (a C/POSIX character class). + Implemented according to the UTS #18 Annex C Standard Recommendation. + See the uchar.h file documentation. + @stable ICU 3.4 */ + UCHAR_POSIX_XDIGIT=48, + /** Binary property Cased. For Lowercase, Uppercase and Titlecase characters. @stable ICU 4.4 */ + UCHAR_CASED=49, + /** Binary property Case_Ignorable. Used in context-sensitive case mappings. @stable ICU 4.4 */ + UCHAR_CASE_IGNORABLE=50, + /** Binary property Changes_When_Lowercased. @stable ICU 4.4 */ + UCHAR_CHANGES_WHEN_LOWERCASED=51, + /** Binary property Changes_When_Uppercased. @stable ICU 4.4 */ + UCHAR_CHANGES_WHEN_UPPERCASED=52, + /** Binary property Changes_When_Titlecased. @stable ICU 4.4 */ + UCHAR_CHANGES_WHEN_TITLECASED=53, + /** Binary property Changes_When_Casefolded. @stable ICU 4.4 */ + UCHAR_CHANGES_WHEN_CASEFOLDED=54, + /** Binary property Changes_When_Casemapped. @stable ICU 4.4 */ + UCHAR_CHANGES_WHEN_CASEMAPPED=55, + /** Binary property Changes_When_NFKC_Casefolded. @stable ICU 4.4 */ + UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED=56, + /** + * Binary property Emoji. + * See http://www.unicode.org/reports/tr51/#Emoji_Properties + * + * @stable ICU 57 + */ + UCHAR_EMOJI=57, + /** + * Binary property Emoji_Presentation. + * See http://www.unicode.org/reports/tr51/#Emoji_Properties + * + * @stable ICU 57 + */ + UCHAR_EMOJI_PRESENTATION=58, + /** + * Binary property Emoji_Modifier. + * See http://www.unicode.org/reports/tr51/#Emoji_Properties + * + * @stable ICU 57 + */ + UCHAR_EMOJI_MODIFIER=59, + /** + * Binary property Emoji_Modifier_Base. + * See http://www.unicode.org/reports/tr51/#Emoji_Properties + * + * @stable ICU 57 + */ + UCHAR_EMOJI_MODIFIER_BASE=60, + /** + * Binary property Emoji_Component. + * See http://www.unicode.org/reports/tr51/#Emoji_Properties + * + * @stable ICU 60 + */ + UCHAR_EMOJI_COMPONENT=61, + /** + * Binary property Regional_Indicator. + * @stable ICU 60 + */ + UCHAR_REGIONAL_INDICATOR=62, + /** + * Binary property Prepended_Concatenation_Mark. + * @stable ICU 60 + */ + UCHAR_PREPENDED_CONCATENATION_MARK=63, + /** + * Binary property Extended_Pictographic. + * See http://www.unicode.org/reports/tr51/#Emoji_Properties + * + * @stable ICU 62 + */ + UCHAR_EXTENDED_PICTOGRAPHIC=64, +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the last constant for binary Unicode properties. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + UCHAR_BINARY_LIMIT, +#endif // U_HIDE_DEPRECATED_API + + /** Enumerated property Bidi_Class. + Same as u_charDirection, returns UCharDirection values. @stable ICU 2.2 */ + UCHAR_BIDI_CLASS=0x1000, + /** First constant for enumerated/integer Unicode properties. @stable ICU 2.2 */ + UCHAR_INT_START=UCHAR_BIDI_CLASS, + /** Enumerated property Block. + Same as ublock_getCode, returns UBlockCode values. @stable ICU 2.2 */ + UCHAR_BLOCK=0x1001, + /** Enumerated property Canonical_Combining_Class. + Same as u_getCombiningClass, returns 8-bit numeric values. @stable ICU 2.2 */ + UCHAR_CANONICAL_COMBINING_CLASS=0x1002, + /** Enumerated property Decomposition_Type. + Returns UDecompositionType values. @stable ICU 2.2 */ + UCHAR_DECOMPOSITION_TYPE=0x1003, + /** Enumerated property East_Asian_Width. + See http://www.unicode.org/reports/tr11/ + Returns UEastAsianWidth values. @stable ICU 2.2 */ + UCHAR_EAST_ASIAN_WIDTH=0x1004, + /** Enumerated property General_Category. + Same as u_charType, returns UCharCategory values. @stable ICU 2.2 */ + UCHAR_GENERAL_CATEGORY=0x1005, + /** Enumerated property Joining_Group. + Returns UJoiningGroup values. @stable ICU 2.2 */ + UCHAR_JOINING_GROUP=0x1006, + /** Enumerated property Joining_Type. + Returns UJoiningType values. @stable ICU 2.2 */ + UCHAR_JOINING_TYPE=0x1007, + /** Enumerated property Line_Break. + Returns ULineBreak values. @stable ICU 2.2 */ + UCHAR_LINE_BREAK=0x1008, + /** Enumerated property Numeric_Type. + Returns UNumericType values. @stable ICU 2.2 */ + UCHAR_NUMERIC_TYPE=0x1009, + /** Enumerated property Script. + Same as uscript_getScript, returns UScriptCode values. @stable ICU 2.2 */ + UCHAR_SCRIPT=0x100A, + /** Enumerated property Hangul_Syllable_Type, new in Unicode 4. + Returns UHangulSyllableType values. @stable ICU 2.6 */ + UCHAR_HANGUL_SYLLABLE_TYPE=0x100B, + /** Enumerated property NFD_Quick_Check. + Returns UNormalizationCheckResult values. @stable ICU 3.0 */ + UCHAR_NFD_QUICK_CHECK=0x100C, + /** Enumerated property NFKD_Quick_Check. + Returns UNormalizationCheckResult values. @stable ICU 3.0 */ + UCHAR_NFKD_QUICK_CHECK=0x100D, + /** Enumerated property NFC_Quick_Check. + Returns UNormalizationCheckResult values. @stable ICU 3.0 */ + UCHAR_NFC_QUICK_CHECK=0x100E, + /** Enumerated property NFKC_Quick_Check. + Returns UNormalizationCheckResult values. @stable ICU 3.0 */ + UCHAR_NFKC_QUICK_CHECK=0x100F, + /** Enumerated property Lead_Canonical_Combining_Class. + ICU-specific property for the ccc of the first code point + of the decomposition, or lccc(c)=ccc(NFD(c)[0]). + Useful for checking for canonically ordered text; + see UNORM_FCD and http://www.unicode.org/notes/tn5/#FCD . + Returns 8-bit numeric values like UCHAR_CANONICAL_COMBINING_CLASS. @stable ICU 3.0 */ + UCHAR_LEAD_CANONICAL_COMBINING_CLASS=0x1010, + /** Enumerated property Trail_Canonical_Combining_Class. + ICU-specific property for the ccc of the last code point + of the decomposition, or tccc(c)=ccc(NFD(c)[last]). + Useful for checking for canonically ordered text; + see UNORM_FCD and http://www.unicode.org/notes/tn5/#FCD . + Returns 8-bit numeric values like UCHAR_CANONICAL_COMBINING_CLASS. @stable ICU 3.0 */ + UCHAR_TRAIL_CANONICAL_COMBINING_CLASS=0x1011, + /** Enumerated property Grapheme_Cluster_Break (new in Unicode 4.1). + Used in UAX #29: Text Boundaries + (http://www.unicode.org/reports/tr29/) + Returns UGraphemeClusterBreak values. @stable ICU 3.4 */ + UCHAR_GRAPHEME_CLUSTER_BREAK=0x1012, + /** Enumerated property Sentence_Break (new in Unicode 4.1). + Used in UAX #29: Text Boundaries + (http://www.unicode.org/reports/tr29/) + Returns USentenceBreak values. @stable ICU 3.4 */ + UCHAR_SENTENCE_BREAK=0x1013, + /** Enumerated property Word_Break (new in Unicode 4.1). + Used in UAX #29: Text Boundaries + (http://www.unicode.org/reports/tr29/) + Returns UWordBreakValues values. @stable ICU 3.4 */ + UCHAR_WORD_BREAK=0x1014, + /** Enumerated property Bidi_Paired_Bracket_Type (new in Unicode 6.3). + Used in UAX #9: Unicode Bidirectional Algorithm + (http://www.unicode.org/reports/tr9/) + Returns UBidiPairedBracketType values. @stable ICU 52 */ + UCHAR_BIDI_PAIRED_BRACKET_TYPE=0x1015, + /** + * Enumerated property Indic_Positional_Category. + * New in Unicode 6.0 as provisional property Indic_Matra_Category; + * renamed and changed to informative in Unicode 8.0. + * See http://www.unicode.org/reports/tr44/#IndicPositionalCategory.txt + * @stable ICU 63 + */ + UCHAR_INDIC_POSITIONAL_CATEGORY=0x1016, + /** + * Enumerated property Indic_Syllabic_Category. + * New in Unicode 6.0 as provisional; informative since Unicode 8.0. + * See http://www.unicode.org/reports/tr44/#IndicSyllabicCategory.txt + * @stable ICU 63 + */ + UCHAR_INDIC_SYLLABIC_CATEGORY=0x1017, + /** + * Enumerated property Vertical_Orientation. + * Used for UAX #50 Unicode Vertical Text Layout (https://www.unicode.org/reports/tr50/). + * New as a UCD property in Unicode 10.0. + * @stable ICU 63 + */ + UCHAR_VERTICAL_ORIENTATION=0x1018, +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the last constant for enumerated/integer Unicode properties. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + UCHAR_INT_LIMIT=0x1019, +#endif // U_HIDE_DEPRECATED_API + + /** Bitmask property General_Category_Mask. + This is the General_Category property returned as a bit mask. + When used in u_getIntPropertyValue(c), same as U_MASK(u_charType(c)), + returns bit masks for UCharCategory values where exactly one bit is set. + When used with u_getPropertyValueName() and u_getPropertyValueEnum(), + a multi-bit mask is used for sets of categories like "Letters". + Mask values should be cast to uint32_t. + @stable ICU 2.4 */ + UCHAR_GENERAL_CATEGORY_MASK=0x2000, + /** First constant for bit-mask Unicode properties. @stable ICU 2.4 */ + UCHAR_MASK_START=UCHAR_GENERAL_CATEGORY_MASK, +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the last constant for bit-mask Unicode properties. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + UCHAR_MASK_LIMIT=0x2001, +#endif // U_HIDE_DEPRECATED_API + + /** Double property Numeric_Value. + Corresponds to u_getNumericValue. @stable ICU 2.4 */ + UCHAR_NUMERIC_VALUE=0x3000, + /** First constant for double Unicode properties. @stable ICU 2.4 */ + UCHAR_DOUBLE_START=UCHAR_NUMERIC_VALUE, +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the last constant for double Unicode properties. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + UCHAR_DOUBLE_LIMIT=0x3001, +#endif // U_HIDE_DEPRECATED_API + + /** String property Age. + Corresponds to u_charAge. @stable ICU 2.4 */ + UCHAR_AGE=0x4000, + /** First constant for string Unicode properties. @stable ICU 2.4 */ + UCHAR_STRING_START=UCHAR_AGE, + /** String property Bidi_Mirroring_Glyph. + Corresponds to u_charMirror. @stable ICU 2.4 */ + UCHAR_BIDI_MIRRORING_GLYPH=0x4001, + /** String property Case_Folding. + Corresponds to u_strFoldCase in ustring.h. @stable ICU 2.4 */ + UCHAR_CASE_FOLDING=0x4002, +#ifndef U_HIDE_DEPRECATED_API + /** Deprecated string property ISO_Comment. + Corresponds to u_getISOComment. @deprecated ICU 49 */ + UCHAR_ISO_COMMENT=0x4003, +#endif /* U_HIDE_DEPRECATED_API */ + /** String property Lowercase_Mapping. + Corresponds to u_strToLower in ustring.h. @stable ICU 2.4 */ + UCHAR_LOWERCASE_MAPPING=0x4004, + /** String property Name. + Corresponds to u_charName. @stable ICU 2.4 */ + UCHAR_NAME=0x4005, + /** String property Simple_Case_Folding. + Corresponds to u_foldCase. @stable ICU 2.4 */ + UCHAR_SIMPLE_CASE_FOLDING=0x4006, + /** String property Simple_Lowercase_Mapping. + Corresponds to u_tolower. @stable ICU 2.4 */ + UCHAR_SIMPLE_LOWERCASE_MAPPING=0x4007, + /** String property Simple_Titlecase_Mapping. + Corresponds to u_totitle. @stable ICU 2.4 */ + UCHAR_SIMPLE_TITLECASE_MAPPING=0x4008, + /** String property Simple_Uppercase_Mapping. + Corresponds to u_toupper. @stable ICU 2.4 */ + UCHAR_SIMPLE_UPPERCASE_MAPPING=0x4009, + /** String property Titlecase_Mapping. + Corresponds to u_strToTitle in ustring.h. @stable ICU 2.4 */ + UCHAR_TITLECASE_MAPPING=0x400A, +#ifndef U_HIDE_DEPRECATED_API + /** String property Unicode_1_Name. + This property is of little practical value. + Beginning with ICU 49, ICU APIs return an empty string for this property. + Corresponds to u_charName(U_UNICODE_10_CHAR_NAME). @deprecated ICU 49 */ + UCHAR_UNICODE_1_NAME=0x400B, +#endif /* U_HIDE_DEPRECATED_API */ + /** String property Uppercase_Mapping. + Corresponds to u_strToUpper in ustring.h. @stable ICU 2.4 */ + UCHAR_UPPERCASE_MAPPING=0x400C, + /** String property Bidi_Paired_Bracket (new in Unicode 6.3). + Corresponds to u_getBidiPairedBracket. @stable ICU 52 */ + UCHAR_BIDI_PAIRED_BRACKET=0x400D, +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the last constant for string Unicode properties. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + UCHAR_STRING_LIMIT=0x400E, +#endif // U_HIDE_DEPRECATED_API + + /** Miscellaneous property Script_Extensions (new in Unicode 6.0). + Some characters are commonly used in multiple scripts. + For more information, see UAX #24: http://www.unicode.org/reports/tr24/. + Corresponds to uscript_hasScript and uscript_getScriptExtensions in uscript.h. + @stable ICU 4.6 */ + UCHAR_SCRIPT_EXTENSIONS=0x7000, + /** First constant for Unicode properties with unusual value types. @stable ICU 4.6 */ + UCHAR_OTHER_PROPERTY_START=UCHAR_SCRIPT_EXTENSIONS, +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the last constant for Unicode properties with unusual value types. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + UCHAR_OTHER_PROPERTY_LIMIT=0x7001, +#endif // U_HIDE_DEPRECATED_API + + /** Represents a nonexistent or invalid property or property value. @stable ICU 2.4 */ + UCHAR_INVALID_CODE = -1 +} UProperty; + +/** + * Data for enumerated Unicode general category types. + * See http://www.unicode.org/Public/UNIDATA/UnicodeData.html . + * @stable ICU 2.0 + */ +typedef enum UCharCategory +{ + /* + * Note: UCharCategory constants and their API comments are parsed by preparseucd.py. + * It matches pairs of lines like + * / ** <Unicode 2-letter General_Category value> comment... * / + * U_<[A-Z_]+> = <integer>, + */ + + /** Non-category for unassigned and non-character code points. @stable ICU 2.0 */ + U_UNASSIGNED = 0, + /** Cn "Other, Not Assigned (no characters in [UnicodeData.txt] have this property)" (same as U_UNASSIGNED!) @stable ICU 2.0 */ + U_GENERAL_OTHER_TYPES = 0, + /** Lu @stable ICU 2.0 */ + U_UPPERCASE_LETTER = 1, + /** Ll @stable ICU 2.0 */ + U_LOWERCASE_LETTER = 2, + /** Lt @stable ICU 2.0 */ + U_TITLECASE_LETTER = 3, + /** Lm @stable ICU 2.0 */ + U_MODIFIER_LETTER = 4, + /** Lo @stable ICU 2.0 */ + U_OTHER_LETTER = 5, + /** Mn @stable ICU 2.0 */ + U_NON_SPACING_MARK = 6, + /** Me @stable ICU 2.0 */ + U_ENCLOSING_MARK = 7, + /** Mc @stable ICU 2.0 */ + U_COMBINING_SPACING_MARK = 8, + /** Nd @stable ICU 2.0 */ + U_DECIMAL_DIGIT_NUMBER = 9, + /** Nl @stable ICU 2.0 */ + U_LETTER_NUMBER = 10, + /** No @stable ICU 2.0 */ + U_OTHER_NUMBER = 11, + /** Zs @stable ICU 2.0 */ + U_SPACE_SEPARATOR = 12, + /** Zl @stable ICU 2.0 */ + U_LINE_SEPARATOR = 13, + /** Zp @stable ICU 2.0 */ + U_PARAGRAPH_SEPARATOR = 14, + /** Cc @stable ICU 2.0 */ + U_CONTROL_CHAR = 15, + /** Cf @stable ICU 2.0 */ + U_FORMAT_CHAR = 16, + /** Co @stable ICU 2.0 */ + U_PRIVATE_USE_CHAR = 17, + /** Cs @stable ICU 2.0 */ + U_SURROGATE = 18, + /** Pd @stable ICU 2.0 */ + U_DASH_PUNCTUATION = 19, + /** Ps @stable ICU 2.0 */ + U_START_PUNCTUATION = 20, + /** Pe @stable ICU 2.0 */ + U_END_PUNCTUATION = 21, + /** Pc @stable ICU 2.0 */ + U_CONNECTOR_PUNCTUATION = 22, + /** Po @stable ICU 2.0 */ + U_OTHER_PUNCTUATION = 23, + /** Sm @stable ICU 2.0 */ + U_MATH_SYMBOL = 24, + /** Sc @stable ICU 2.0 */ + U_CURRENCY_SYMBOL = 25, + /** Sk @stable ICU 2.0 */ + U_MODIFIER_SYMBOL = 26, + /** So @stable ICU 2.0 */ + U_OTHER_SYMBOL = 27, + /** Pi @stable ICU 2.0 */ + U_INITIAL_PUNCTUATION = 28, + /** Pf @stable ICU 2.0 */ + U_FINAL_PUNCTUATION = 29, + /** + * One higher than the last enum UCharCategory constant. + * This numeric value is stable (will not change), see + * http://www.unicode.org/policies/stability_policy.html#Property_Value + * + * @stable ICU 2.0 + */ + U_CHAR_CATEGORY_COUNT +} UCharCategory; + +/** + * U_GC_XX_MASK constants are bit flags corresponding to Unicode + * general category values. + * For each category, the nth bit is set if the numeric value of the + * corresponding UCharCategory constant is n. + * + * There are also some U_GC_Y_MASK constants for groups of general categories + * like L for all letter categories. + * + * @see u_charType + * @see U_GET_GC_MASK + * @see UCharCategory + * @stable ICU 2.1 + */ +#define U_GC_CN_MASK U_MASK(U_GENERAL_OTHER_TYPES) + +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_LU_MASK U_MASK(U_UPPERCASE_LETTER) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_LL_MASK U_MASK(U_LOWERCASE_LETTER) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_LT_MASK U_MASK(U_TITLECASE_LETTER) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_LM_MASK U_MASK(U_MODIFIER_LETTER) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_LO_MASK U_MASK(U_OTHER_LETTER) + +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_MN_MASK U_MASK(U_NON_SPACING_MARK) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_ME_MASK U_MASK(U_ENCLOSING_MARK) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_MC_MASK U_MASK(U_COMBINING_SPACING_MARK) + +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_ND_MASK U_MASK(U_DECIMAL_DIGIT_NUMBER) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_NL_MASK U_MASK(U_LETTER_NUMBER) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_NO_MASK U_MASK(U_OTHER_NUMBER) + +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_ZS_MASK U_MASK(U_SPACE_SEPARATOR) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_ZL_MASK U_MASK(U_LINE_SEPARATOR) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_ZP_MASK U_MASK(U_PARAGRAPH_SEPARATOR) + +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_CC_MASK U_MASK(U_CONTROL_CHAR) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_CF_MASK U_MASK(U_FORMAT_CHAR) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_CO_MASK U_MASK(U_PRIVATE_USE_CHAR) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_CS_MASK U_MASK(U_SURROGATE) + +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_PD_MASK U_MASK(U_DASH_PUNCTUATION) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_PS_MASK U_MASK(U_START_PUNCTUATION) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_PE_MASK U_MASK(U_END_PUNCTUATION) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_PC_MASK U_MASK(U_CONNECTOR_PUNCTUATION) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_PO_MASK U_MASK(U_OTHER_PUNCTUATION) + +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_SM_MASK U_MASK(U_MATH_SYMBOL) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_SC_MASK U_MASK(U_CURRENCY_SYMBOL) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_SK_MASK U_MASK(U_MODIFIER_SYMBOL) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_SO_MASK U_MASK(U_OTHER_SYMBOL) + +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_PI_MASK U_MASK(U_INITIAL_PUNCTUATION) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_PF_MASK U_MASK(U_FINAL_PUNCTUATION) + + +/** Mask constant for multiple UCharCategory bits (L Letters). @stable ICU 2.1 */ +#define U_GC_L_MASK \ + (U_GC_LU_MASK|U_GC_LL_MASK|U_GC_LT_MASK|U_GC_LM_MASK|U_GC_LO_MASK) + +/** Mask constant for multiple UCharCategory bits (LC Cased Letters). @stable ICU 2.1 */ +#define U_GC_LC_MASK \ + (U_GC_LU_MASK|U_GC_LL_MASK|U_GC_LT_MASK) + +/** Mask constant for multiple UCharCategory bits (M Marks). @stable ICU 2.1 */ +#define U_GC_M_MASK (U_GC_MN_MASK|U_GC_ME_MASK|U_GC_MC_MASK) + +/** Mask constant for multiple UCharCategory bits (N Numbers). @stable ICU 2.1 */ +#define U_GC_N_MASK (U_GC_ND_MASK|U_GC_NL_MASK|U_GC_NO_MASK) + +/** Mask constant for multiple UCharCategory bits (Z Separators). @stable ICU 2.1 */ +#define U_GC_Z_MASK (U_GC_ZS_MASK|U_GC_ZL_MASK|U_GC_ZP_MASK) + +/** Mask constant for multiple UCharCategory bits (C Others). @stable ICU 2.1 */ +#define U_GC_C_MASK \ + (U_GC_CN_MASK|U_GC_CC_MASK|U_GC_CF_MASK|U_GC_CO_MASK|U_GC_CS_MASK) + +/** Mask constant for multiple UCharCategory bits (P Punctuation). @stable ICU 2.1 */ +#define U_GC_P_MASK \ + (U_GC_PD_MASK|U_GC_PS_MASK|U_GC_PE_MASK|U_GC_PC_MASK|U_GC_PO_MASK| \ + U_GC_PI_MASK|U_GC_PF_MASK) + +/** Mask constant for multiple UCharCategory bits (S Symbols). @stable ICU 2.1 */ +#define U_GC_S_MASK (U_GC_SM_MASK|U_GC_SC_MASK|U_GC_SK_MASK|U_GC_SO_MASK) + +/** + * This specifies the language directional property of a character set. + * @stable ICU 2.0 + */ +typedef enum UCharDirection { + /* + * Note: UCharDirection constants and their API comments are parsed by preparseucd.py. + * It matches pairs of lines like + * / ** <Unicode 1..3-letter Bidi_Class value> comment... * / + * U_<[A-Z_]+> = <integer>, + */ + + /** L @stable ICU 2.0 */ + U_LEFT_TO_RIGHT = 0, + /** R @stable ICU 2.0 */ + U_RIGHT_TO_LEFT = 1, + /** EN @stable ICU 2.0 */ + U_EUROPEAN_NUMBER = 2, + /** ES @stable ICU 2.0 */ + U_EUROPEAN_NUMBER_SEPARATOR = 3, + /** ET @stable ICU 2.0 */ + U_EUROPEAN_NUMBER_TERMINATOR = 4, + /** AN @stable ICU 2.0 */ + U_ARABIC_NUMBER = 5, + /** CS @stable ICU 2.0 */ + U_COMMON_NUMBER_SEPARATOR = 6, + /** B @stable ICU 2.0 */ + U_BLOCK_SEPARATOR = 7, + /** S @stable ICU 2.0 */ + U_SEGMENT_SEPARATOR = 8, + /** WS @stable ICU 2.0 */ + U_WHITE_SPACE_NEUTRAL = 9, + /** ON @stable ICU 2.0 */ + U_OTHER_NEUTRAL = 10, + /** LRE @stable ICU 2.0 */ + U_LEFT_TO_RIGHT_EMBEDDING = 11, + /** LRO @stable ICU 2.0 */ + U_LEFT_TO_RIGHT_OVERRIDE = 12, + /** AL @stable ICU 2.0 */ + U_RIGHT_TO_LEFT_ARABIC = 13, + /** RLE @stable ICU 2.0 */ + U_RIGHT_TO_LEFT_EMBEDDING = 14, + /** RLO @stable ICU 2.0 */ + U_RIGHT_TO_LEFT_OVERRIDE = 15, + /** PDF @stable ICU 2.0 */ + U_POP_DIRECTIONAL_FORMAT = 16, + /** NSM @stable ICU 2.0 */ + U_DIR_NON_SPACING_MARK = 17, + /** BN @stable ICU 2.0 */ + U_BOUNDARY_NEUTRAL = 18, + /** FSI @stable ICU 52 */ + U_FIRST_STRONG_ISOLATE = 19, + /** LRI @stable ICU 52 */ + U_LEFT_TO_RIGHT_ISOLATE = 20, + /** RLI @stable ICU 52 */ + U_RIGHT_TO_LEFT_ISOLATE = 21, + /** PDI @stable ICU 52 */ + U_POP_DIRECTIONAL_ISOLATE = 22, +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest UCharDirection value. + * The highest value is available via u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS). + * + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + U_CHAR_DIRECTION_COUNT +#endif // U_HIDE_DEPRECATED_API +} UCharDirection; + +/** + * Bidi Paired Bracket Type constants. + * + * @see UCHAR_BIDI_PAIRED_BRACKET_TYPE + * @stable ICU 52 + */ +typedef enum UBidiPairedBracketType { + /* + * Note: UBidiPairedBracketType constants are parsed by preparseucd.py. + * It matches lines like + * U_BPT_<Unicode Bidi_Paired_Bracket_Type value name> + */ + + /** Not a paired bracket. @stable ICU 52 */ + U_BPT_NONE, + /** Open paired bracket. @stable ICU 52 */ + U_BPT_OPEN, + /** Close paired bracket. @stable ICU 52 */ + U_BPT_CLOSE, +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal UBidiPairedBracketType value. + * The highest value is available via u_getIntPropertyMaxValue(UCHAR_BIDI_PAIRED_BRACKET_TYPE). + * + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + U_BPT_COUNT /* 3 */ +#endif // U_HIDE_DEPRECATED_API +} UBidiPairedBracketType; + +/** + * Constants for Unicode blocks, see the Unicode Data file Blocks.txt + * @stable ICU 2.0 + */ +enum UBlockCode { + /* + * Note: UBlockCode constants are parsed by preparseucd.py. + * It matches lines like + * UBLOCK_<Unicode Block value name> = <integer>, + */ + + /** New No_Block value in Unicode 4. @stable ICU 2.6 */ + UBLOCK_NO_BLOCK = 0, /*[none]*/ /* Special range indicating No_Block */ + + /** @stable ICU 2.0 */ + UBLOCK_BASIC_LATIN = 1, /*[0000]*/ + + /** @stable ICU 2.0 */ + UBLOCK_LATIN_1_SUPPLEMENT=2, /*[0080]*/ + + /** @stable ICU 2.0 */ + UBLOCK_LATIN_EXTENDED_A =3, /*[0100]*/ + + /** @stable ICU 2.0 */ + UBLOCK_LATIN_EXTENDED_B =4, /*[0180]*/ + + /** @stable ICU 2.0 */ + UBLOCK_IPA_EXTENSIONS =5, /*[0250]*/ + + /** @stable ICU 2.0 */ + UBLOCK_SPACING_MODIFIER_LETTERS =6, /*[02B0]*/ + + /** @stable ICU 2.0 */ + UBLOCK_COMBINING_DIACRITICAL_MARKS =7, /*[0300]*/ + + /** + * Unicode 3.2 renames this block to "Greek and Coptic". + * @stable ICU 2.0 + */ + UBLOCK_GREEK =8, /*[0370]*/ + + /** @stable ICU 2.0 */ + UBLOCK_CYRILLIC =9, /*[0400]*/ + + /** @stable ICU 2.0 */ + UBLOCK_ARMENIAN =10, /*[0530]*/ + + /** @stable ICU 2.0 */ + UBLOCK_HEBREW =11, /*[0590]*/ + + /** @stable ICU 2.0 */ + UBLOCK_ARABIC =12, /*[0600]*/ + + /** @stable ICU 2.0 */ + UBLOCK_SYRIAC =13, /*[0700]*/ + + /** @stable ICU 2.0 */ + UBLOCK_THAANA =14, /*[0780]*/ + + /** @stable ICU 2.0 */ + UBLOCK_DEVANAGARI =15, /*[0900]*/ + + /** @stable ICU 2.0 */ + UBLOCK_BENGALI =16, /*[0980]*/ + + /** @stable ICU 2.0 */ + UBLOCK_GURMUKHI =17, /*[0A00]*/ + + /** @stable ICU 2.0 */ + UBLOCK_GUJARATI =18, /*[0A80]*/ + + /** @stable ICU 2.0 */ + UBLOCK_ORIYA =19, /*[0B00]*/ + + /** @stable ICU 2.0 */ + UBLOCK_TAMIL =20, /*[0B80]*/ + + /** @stable ICU 2.0 */ + UBLOCK_TELUGU =21, /*[0C00]*/ + + /** @stable ICU 2.0 */ + UBLOCK_KANNADA =22, /*[0C80]*/ + + /** @stable ICU 2.0 */ + UBLOCK_MALAYALAM =23, /*[0D00]*/ + + /** @stable ICU 2.0 */ + UBLOCK_SINHALA =24, /*[0D80]*/ + + /** @stable ICU 2.0 */ + UBLOCK_THAI =25, /*[0E00]*/ + + /** @stable ICU 2.0 */ + UBLOCK_LAO =26, /*[0E80]*/ + + /** @stable ICU 2.0 */ + UBLOCK_TIBETAN =27, /*[0F00]*/ + + /** @stable ICU 2.0 */ + UBLOCK_MYANMAR =28, /*[1000]*/ + + /** @stable ICU 2.0 */ + UBLOCK_GEORGIAN =29, /*[10A0]*/ + + /** @stable ICU 2.0 */ + UBLOCK_HANGUL_JAMO =30, /*[1100]*/ + + /** @stable ICU 2.0 */ + UBLOCK_ETHIOPIC =31, /*[1200]*/ + + /** @stable ICU 2.0 */ + UBLOCK_CHEROKEE =32, /*[13A0]*/ + + /** @stable ICU 2.0 */ + UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =33, /*[1400]*/ + + /** @stable ICU 2.0 */ + UBLOCK_OGHAM =34, /*[1680]*/ + + /** @stable ICU 2.0 */ + UBLOCK_RUNIC =35, /*[16A0]*/ + + /** @stable ICU 2.0 */ + UBLOCK_KHMER =36, /*[1780]*/ + + /** @stable ICU 2.0 */ + UBLOCK_MONGOLIAN =37, /*[1800]*/ + + /** @stable ICU 2.0 */ + UBLOCK_LATIN_EXTENDED_ADDITIONAL =38, /*[1E00]*/ + + /** @stable ICU 2.0 */ + UBLOCK_GREEK_EXTENDED =39, /*[1F00]*/ + + /** @stable ICU 2.0 */ + UBLOCK_GENERAL_PUNCTUATION =40, /*[2000]*/ + + /** @stable ICU 2.0 */ + UBLOCK_SUPERSCRIPTS_AND_SUBSCRIPTS =41, /*[2070]*/ + + /** @stable ICU 2.0 */ + UBLOCK_CURRENCY_SYMBOLS =42, /*[20A0]*/ + + /** + * Unicode 3.2 renames this block to "Combining Diacritical Marks for Symbols". + * @stable ICU 2.0 + */ + UBLOCK_COMBINING_MARKS_FOR_SYMBOLS =43, /*[20D0]*/ + + /** @stable ICU 2.0 */ + UBLOCK_LETTERLIKE_SYMBOLS =44, /*[2100]*/ + + /** @stable ICU 2.0 */ + UBLOCK_NUMBER_FORMS =45, /*[2150]*/ + + /** @stable ICU 2.0 */ + UBLOCK_ARROWS =46, /*[2190]*/ + + /** @stable ICU 2.0 */ + UBLOCK_MATHEMATICAL_OPERATORS =47, /*[2200]*/ + + /** @stable ICU 2.0 */ + UBLOCK_MISCELLANEOUS_TECHNICAL =48, /*[2300]*/ + + /** @stable ICU 2.0 */ + UBLOCK_CONTROL_PICTURES =49, /*[2400]*/ + + /** @stable ICU 2.0 */ + UBLOCK_OPTICAL_CHARACTER_RECOGNITION =50, /*[2440]*/ + + /** @stable ICU 2.0 */ + UBLOCK_ENCLOSED_ALPHANUMERICS =51, /*[2460]*/ + + /** @stable ICU 2.0 */ + UBLOCK_BOX_DRAWING =52, /*[2500]*/ + + /** @stable ICU 2.0 */ + UBLOCK_BLOCK_ELEMENTS =53, /*[2580]*/ + + /** @stable ICU 2.0 */ + UBLOCK_GEOMETRIC_SHAPES =54, /*[25A0]*/ + + /** @stable ICU 2.0 */ + UBLOCK_MISCELLANEOUS_SYMBOLS =55, /*[2600]*/ + + /** @stable ICU 2.0 */ + UBLOCK_DINGBATS =56, /*[2700]*/ + + /** @stable ICU 2.0 */ + UBLOCK_BRAILLE_PATTERNS =57, /*[2800]*/ + + /** @stable ICU 2.0 */ + UBLOCK_CJK_RADICALS_SUPPLEMENT =58, /*[2E80]*/ + + /** @stable ICU 2.0 */ + UBLOCK_KANGXI_RADICALS =59, /*[2F00]*/ + + /** @stable ICU 2.0 */ + UBLOCK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS =60, /*[2FF0]*/ + + /** @stable ICU 2.0 */ + UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION =61, /*[3000]*/ + + /** @stable ICU 2.0 */ + UBLOCK_HIRAGANA =62, /*[3040]*/ + + /** @stable ICU 2.0 */ + UBLOCK_KATAKANA =63, /*[30A0]*/ + + /** @stable ICU 2.0 */ + UBLOCK_BOPOMOFO =64, /*[3100]*/ + + /** @stable ICU 2.0 */ + UBLOCK_HANGUL_COMPATIBILITY_JAMO =65, /*[3130]*/ + + /** @stable ICU 2.0 */ + UBLOCK_KANBUN =66, /*[3190]*/ + + /** @stable ICU 2.0 */ + UBLOCK_BOPOMOFO_EXTENDED =67, /*[31A0]*/ + + /** @stable ICU 2.0 */ + UBLOCK_ENCLOSED_CJK_LETTERS_AND_MONTHS =68, /*[3200]*/ + + /** @stable ICU 2.0 */ + UBLOCK_CJK_COMPATIBILITY =69, /*[3300]*/ + + /** @stable ICU 2.0 */ + UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =70, /*[3400]*/ + + /** @stable ICU 2.0 */ + UBLOCK_CJK_UNIFIED_IDEOGRAPHS =71, /*[4E00]*/ + + /** @stable ICU 2.0 */ + UBLOCK_YI_SYLLABLES =72, /*[A000]*/ + + /** @stable ICU 2.0 */ + UBLOCK_YI_RADICALS =73, /*[A490]*/ + + /** @stable ICU 2.0 */ + UBLOCK_HANGUL_SYLLABLES =74, /*[AC00]*/ + + /** @stable ICU 2.0 */ + UBLOCK_HIGH_SURROGATES =75, /*[D800]*/ + + /** @stable ICU 2.0 */ + UBLOCK_HIGH_PRIVATE_USE_SURROGATES =76, /*[DB80]*/ + + /** @stable ICU 2.0 */ + UBLOCK_LOW_SURROGATES =77, /*[DC00]*/ + + /** + * Same as UBLOCK_PRIVATE_USE. + * Until Unicode 3.1.1, the corresponding block name was "Private Use", + * and multiple code point ranges had this block. + * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" and + * adds separate blocks for the supplementary PUAs. + * + * @stable ICU 2.0 + */ + UBLOCK_PRIVATE_USE_AREA =78, /*[E000]*/ + /** + * Same as UBLOCK_PRIVATE_USE_AREA. + * Until Unicode 3.1.1, the corresponding block name was "Private Use", + * and multiple code point ranges had this block. + * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" and + * adds separate blocks for the supplementary PUAs. + * + * @stable ICU 2.0 + */ + UBLOCK_PRIVATE_USE = UBLOCK_PRIVATE_USE_AREA, + + /** @stable ICU 2.0 */ + UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS =79, /*[F900]*/ + + /** @stable ICU 2.0 */ + UBLOCK_ALPHABETIC_PRESENTATION_FORMS =80, /*[FB00]*/ + + /** @stable ICU 2.0 */ + UBLOCK_ARABIC_PRESENTATION_FORMS_A =81, /*[FB50]*/ + + /** @stable ICU 2.0 */ + UBLOCK_COMBINING_HALF_MARKS =82, /*[FE20]*/ + + /** @stable ICU 2.0 */ + UBLOCK_CJK_COMPATIBILITY_FORMS =83, /*[FE30]*/ + + /** @stable ICU 2.0 */ + UBLOCK_SMALL_FORM_VARIANTS =84, /*[FE50]*/ + + /** @stable ICU 2.0 */ + UBLOCK_ARABIC_PRESENTATION_FORMS_B =85, /*[FE70]*/ + + /** @stable ICU 2.0 */ + UBLOCK_SPECIALS =86, /*[FFF0]*/ + + /** @stable ICU 2.0 */ + UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS =87, /*[FF00]*/ + + /* New blocks in Unicode 3.1 */ + + /** @stable ICU 2.0 */ + UBLOCK_OLD_ITALIC = 88, /*[10300]*/ + /** @stable ICU 2.0 */ + UBLOCK_GOTHIC = 89, /*[10330]*/ + /** @stable ICU 2.0 */ + UBLOCK_DESERET = 90, /*[10400]*/ + /** @stable ICU 2.0 */ + UBLOCK_BYZANTINE_MUSICAL_SYMBOLS = 91, /*[1D000]*/ + /** @stable ICU 2.0 */ + UBLOCK_MUSICAL_SYMBOLS = 92, /*[1D100]*/ + /** @stable ICU 2.0 */ + UBLOCK_MATHEMATICAL_ALPHANUMERIC_SYMBOLS = 93, /*[1D400]*/ + /** @stable ICU 2.0 */ + UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = 94, /*[20000]*/ + /** @stable ICU 2.0 */ + UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = 95, /*[2F800]*/ + /** @stable ICU 2.0 */ + UBLOCK_TAGS = 96, /*[E0000]*/ + + /* New blocks in Unicode 3.2 */ + + /** @stable ICU 3.0 */ + UBLOCK_CYRILLIC_SUPPLEMENT = 97, /*[0500]*/ + /** + * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". + * @stable ICU 2.2 + */ + UBLOCK_CYRILLIC_SUPPLEMENTARY = UBLOCK_CYRILLIC_SUPPLEMENT, + /** @stable ICU 2.2 */ + UBLOCK_TAGALOG = 98, /*[1700]*/ + /** @stable ICU 2.2 */ + UBLOCK_HANUNOO = 99, /*[1720]*/ + /** @stable ICU 2.2 */ + UBLOCK_BUHID = 100, /*[1740]*/ + /** @stable ICU 2.2 */ + UBLOCK_TAGBANWA = 101, /*[1760]*/ + /** @stable ICU 2.2 */ + UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = 102, /*[27C0]*/ + /** @stable ICU 2.2 */ + UBLOCK_SUPPLEMENTAL_ARROWS_A = 103, /*[27F0]*/ + /** @stable ICU 2.2 */ + UBLOCK_SUPPLEMENTAL_ARROWS_B = 104, /*[2900]*/ + /** @stable ICU 2.2 */ + UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = 105, /*[2980]*/ + /** @stable ICU 2.2 */ + UBLOCK_SUPPLEMENTAL_MATHEMATICAL_OPERATORS = 106, /*[2A00]*/ + /** @stable ICU 2.2 */ + UBLOCK_KATAKANA_PHONETIC_EXTENSIONS = 107, /*[31F0]*/ + /** @stable ICU 2.2 */ + UBLOCK_VARIATION_SELECTORS = 108, /*[FE00]*/ + /** @stable ICU 2.2 */ + UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_A = 109, /*[F0000]*/ + /** @stable ICU 2.2 */ + UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B = 110, /*[100000]*/ + + /* New blocks in Unicode 4 */ + + /** @stable ICU 2.6 */ + UBLOCK_LIMBU = 111, /*[1900]*/ + /** @stable ICU 2.6 */ + UBLOCK_TAI_LE = 112, /*[1950]*/ + /** @stable ICU 2.6 */ + UBLOCK_KHMER_SYMBOLS = 113, /*[19E0]*/ + /** @stable ICU 2.6 */ + UBLOCK_PHONETIC_EXTENSIONS = 114, /*[1D00]*/ + /** @stable ICU 2.6 */ + UBLOCK_MISCELLANEOUS_SYMBOLS_AND_ARROWS = 115, /*[2B00]*/ + /** @stable ICU 2.6 */ + UBLOCK_YIJING_HEXAGRAM_SYMBOLS = 116, /*[4DC0]*/ + /** @stable ICU 2.6 */ + UBLOCK_LINEAR_B_SYLLABARY = 117, /*[10000]*/ + /** @stable ICU 2.6 */ + UBLOCK_LINEAR_B_IDEOGRAMS = 118, /*[10080]*/ + /** @stable ICU 2.6 */ + UBLOCK_AEGEAN_NUMBERS = 119, /*[10100]*/ + /** @stable ICU 2.6 */ + UBLOCK_UGARITIC = 120, /*[10380]*/ + /** @stable ICU 2.6 */ + UBLOCK_SHAVIAN = 121, /*[10450]*/ + /** @stable ICU 2.6 */ + UBLOCK_OSMANYA = 122, /*[10480]*/ + /** @stable ICU 2.6 */ + UBLOCK_CYPRIOT_SYLLABARY = 123, /*[10800]*/ + /** @stable ICU 2.6 */ + UBLOCK_TAI_XUAN_JING_SYMBOLS = 124, /*[1D300]*/ + /** @stable ICU 2.6 */ + UBLOCK_VARIATION_SELECTORS_SUPPLEMENT = 125, /*[E0100]*/ + + /* New blocks in Unicode 4.1 */ + + /** @stable ICU 3.4 */ + UBLOCK_ANCIENT_GREEK_MUSICAL_NOTATION = 126, /*[1D200]*/ + /** @stable ICU 3.4 */ + UBLOCK_ANCIENT_GREEK_NUMBERS = 127, /*[10140]*/ + /** @stable ICU 3.4 */ + UBLOCK_ARABIC_SUPPLEMENT = 128, /*[0750]*/ + /** @stable ICU 3.4 */ + UBLOCK_BUGINESE = 129, /*[1A00]*/ + /** @stable ICU 3.4 */ + UBLOCK_CJK_STROKES = 130, /*[31C0]*/ + /** @stable ICU 3.4 */ + UBLOCK_COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = 131, /*[1DC0]*/ + /** @stable ICU 3.4 */ + UBLOCK_COPTIC = 132, /*[2C80]*/ + /** @stable ICU 3.4 */ + UBLOCK_ETHIOPIC_EXTENDED = 133, /*[2D80]*/ + /** @stable ICU 3.4 */ + UBLOCK_ETHIOPIC_SUPPLEMENT = 134, /*[1380]*/ + /** @stable ICU 3.4 */ + UBLOCK_GEORGIAN_SUPPLEMENT = 135, /*[2D00]*/ + /** @stable ICU 3.4 */ + UBLOCK_GLAGOLITIC = 136, /*[2C00]*/ + /** @stable ICU 3.4 */ + UBLOCK_KHAROSHTHI = 137, /*[10A00]*/ + /** @stable ICU 3.4 */ + UBLOCK_MODIFIER_TONE_LETTERS = 138, /*[A700]*/ + /** @stable ICU 3.4 */ + UBLOCK_NEW_TAI_LUE = 139, /*[1980]*/ + /** @stable ICU 3.4 */ + UBLOCK_OLD_PERSIAN = 140, /*[103A0]*/ + /** @stable ICU 3.4 */ + UBLOCK_PHONETIC_EXTENSIONS_SUPPLEMENT = 141, /*[1D80]*/ + /** @stable ICU 3.4 */ + UBLOCK_SUPPLEMENTAL_PUNCTUATION = 142, /*[2E00]*/ + /** @stable ICU 3.4 */ + UBLOCK_SYLOTI_NAGRI = 143, /*[A800]*/ + /** @stable ICU 3.4 */ + UBLOCK_TIFINAGH = 144, /*[2D30]*/ + /** @stable ICU 3.4 */ + UBLOCK_VERTICAL_FORMS = 145, /*[FE10]*/ + + /* New blocks in Unicode 5.0 */ + + /** @stable ICU 3.6 */ + UBLOCK_NKO = 146, /*[07C0]*/ + /** @stable ICU 3.6 */ + UBLOCK_BALINESE = 147, /*[1B00]*/ + /** @stable ICU 3.6 */ + UBLOCK_LATIN_EXTENDED_C = 148, /*[2C60]*/ + /** @stable ICU 3.6 */ + UBLOCK_LATIN_EXTENDED_D = 149, /*[A720]*/ + /** @stable ICU 3.6 */ + UBLOCK_PHAGS_PA = 150, /*[A840]*/ + /** @stable ICU 3.6 */ + UBLOCK_PHOENICIAN = 151, /*[10900]*/ + /** @stable ICU 3.6 */ + UBLOCK_CUNEIFORM = 152, /*[12000]*/ + /** @stable ICU 3.6 */ + UBLOCK_CUNEIFORM_NUMBERS_AND_PUNCTUATION = 153, /*[12400]*/ + /** @stable ICU 3.6 */ + UBLOCK_COUNTING_ROD_NUMERALS = 154, /*[1D360]*/ + + /* New blocks in Unicode 5.1 */ + + /** @stable ICU 4.0 */ + UBLOCK_SUNDANESE = 155, /*[1B80]*/ + /** @stable ICU 4.0 */ + UBLOCK_LEPCHA = 156, /*[1C00]*/ + /** @stable ICU 4.0 */ + UBLOCK_OL_CHIKI = 157, /*[1C50]*/ + /** @stable ICU 4.0 */ + UBLOCK_CYRILLIC_EXTENDED_A = 158, /*[2DE0]*/ + /** @stable ICU 4.0 */ + UBLOCK_VAI = 159, /*[A500]*/ + /** @stable ICU 4.0 */ + UBLOCK_CYRILLIC_EXTENDED_B = 160, /*[A640]*/ + /** @stable ICU 4.0 */ + UBLOCK_SAURASHTRA = 161, /*[A880]*/ + /** @stable ICU 4.0 */ + UBLOCK_KAYAH_LI = 162, /*[A900]*/ + /** @stable ICU 4.0 */ + UBLOCK_REJANG = 163, /*[A930]*/ + /** @stable ICU 4.0 */ + UBLOCK_CHAM = 164, /*[AA00]*/ + /** @stable ICU 4.0 */ + UBLOCK_ANCIENT_SYMBOLS = 165, /*[10190]*/ + /** @stable ICU 4.0 */ + UBLOCK_PHAISTOS_DISC = 166, /*[101D0]*/ + /** @stable ICU 4.0 */ + UBLOCK_LYCIAN = 167, /*[10280]*/ + /** @stable ICU 4.0 */ + UBLOCK_CARIAN = 168, /*[102A0]*/ + /** @stable ICU 4.0 */ + UBLOCK_LYDIAN = 169, /*[10920]*/ + /** @stable ICU 4.0 */ + UBLOCK_MAHJONG_TILES = 170, /*[1F000]*/ + /** @stable ICU 4.0 */ + UBLOCK_DOMINO_TILES = 171, /*[1F030]*/ + + /* New blocks in Unicode 5.2 */ + + /** @stable ICU 4.4 */ + UBLOCK_SAMARITAN = 172, /*[0800]*/ + /** @stable ICU 4.4 */ + UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = 173, /*[18B0]*/ + /** @stable ICU 4.4 */ + UBLOCK_TAI_THAM = 174, /*[1A20]*/ + /** @stable ICU 4.4 */ + UBLOCK_VEDIC_EXTENSIONS = 175, /*[1CD0]*/ + /** @stable ICU 4.4 */ + UBLOCK_LISU = 176, /*[A4D0]*/ + /** @stable ICU 4.4 */ + UBLOCK_BAMUM = 177, /*[A6A0]*/ + /** @stable ICU 4.4 */ + UBLOCK_COMMON_INDIC_NUMBER_FORMS = 178, /*[A830]*/ + /** @stable ICU 4.4 */ + UBLOCK_DEVANAGARI_EXTENDED = 179, /*[A8E0]*/ + /** @stable ICU 4.4 */ + UBLOCK_HANGUL_JAMO_EXTENDED_A = 180, /*[A960]*/ + /** @stable ICU 4.4 */ + UBLOCK_JAVANESE = 181, /*[A980]*/ + /** @stable ICU 4.4 */ + UBLOCK_MYANMAR_EXTENDED_A = 182, /*[AA60]*/ + /** @stable ICU 4.4 */ + UBLOCK_TAI_VIET = 183, /*[AA80]*/ + /** @stable ICU 4.4 */ + UBLOCK_MEETEI_MAYEK = 184, /*[ABC0]*/ + /** @stable ICU 4.4 */ + UBLOCK_HANGUL_JAMO_EXTENDED_B = 185, /*[D7B0]*/ + /** @stable ICU 4.4 */ + UBLOCK_IMPERIAL_ARAMAIC = 186, /*[10840]*/ + /** @stable ICU 4.4 */ + UBLOCK_OLD_SOUTH_ARABIAN = 187, /*[10A60]*/ + /** @stable ICU 4.4 */ + UBLOCK_AVESTAN = 188, /*[10B00]*/ + /** @stable ICU 4.4 */ + UBLOCK_INSCRIPTIONAL_PARTHIAN = 189, /*[10B40]*/ + /** @stable ICU 4.4 */ + UBLOCK_INSCRIPTIONAL_PAHLAVI = 190, /*[10B60]*/ + /** @stable ICU 4.4 */ + UBLOCK_OLD_TURKIC = 191, /*[10C00]*/ + /** @stable ICU 4.4 */ + UBLOCK_RUMI_NUMERAL_SYMBOLS = 192, /*[10E60]*/ + /** @stable ICU 4.4 */ + UBLOCK_KAITHI = 193, /*[11080]*/ + /** @stable ICU 4.4 */ + UBLOCK_EGYPTIAN_HIEROGLYPHS = 194, /*[13000]*/ + /** @stable ICU 4.4 */ + UBLOCK_ENCLOSED_ALPHANUMERIC_SUPPLEMENT = 195, /*[1F100]*/ + /** @stable ICU 4.4 */ + UBLOCK_ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = 196, /*[1F200]*/ + /** @stable ICU 4.4 */ + UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = 197, /*[2A700]*/ + + /* New blocks in Unicode 6.0 */ + + /** @stable ICU 4.6 */ + UBLOCK_MANDAIC = 198, /*[0840]*/ + /** @stable ICU 4.6 */ + UBLOCK_BATAK = 199, /*[1BC0]*/ + /** @stable ICU 4.6 */ + UBLOCK_ETHIOPIC_EXTENDED_A = 200, /*[AB00]*/ + /** @stable ICU 4.6 */ + UBLOCK_BRAHMI = 201, /*[11000]*/ + /** @stable ICU 4.6 */ + UBLOCK_BAMUM_SUPPLEMENT = 202, /*[16800]*/ + /** @stable ICU 4.6 */ + UBLOCK_KANA_SUPPLEMENT = 203, /*[1B000]*/ + /** @stable ICU 4.6 */ + UBLOCK_PLAYING_CARDS = 204, /*[1F0A0]*/ + /** @stable ICU 4.6 */ + UBLOCK_MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = 205, /*[1F300]*/ + /** @stable ICU 4.6 */ + UBLOCK_EMOTICONS = 206, /*[1F600]*/ + /** @stable ICU 4.6 */ + UBLOCK_TRANSPORT_AND_MAP_SYMBOLS = 207, /*[1F680]*/ + /** @stable ICU 4.6 */ + UBLOCK_ALCHEMICAL_SYMBOLS = 208, /*[1F700]*/ + /** @stable ICU 4.6 */ + UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = 209, /*[2B740]*/ + + /* New blocks in Unicode 6.1 */ + + /** @stable ICU 49 */ + UBLOCK_ARABIC_EXTENDED_A = 210, /*[08A0]*/ + /** @stable ICU 49 */ + UBLOCK_ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS = 211, /*[1EE00]*/ + /** @stable ICU 49 */ + UBLOCK_CHAKMA = 212, /*[11100]*/ + /** @stable ICU 49 */ + UBLOCK_MEETEI_MAYEK_EXTENSIONS = 213, /*[AAE0]*/ + /** @stable ICU 49 */ + UBLOCK_MEROITIC_CURSIVE = 214, /*[109A0]*/ + /** @stable ICU 49 */ + UBLOCK_MEROITIC_HIEROGLYPHS = 215, /*[10980]*/ + /** @stable ICU 49 */ + UBLOCK_MIAO = 216, /*[16F00]*/ + /** @stable ICU 49 */ + UBLOCK_SHARADA = 217, /*[11180]*/ + /** @stable ICU 49 */ + UBLOCK_SORA_SOMPENG = 218, /*[110D0]*/ + /** @stable ICU 49 */ + UBLOCK_SUNDANESE_SUPPLEMENT = 219, /*[1CC0]*/ + /** @stable ICU 49 */ + UBLOCK_TAKRI = 220, /*[11680]*/ + + /* New blocks in Unicode 7.0 */ + + /** @stable ICU 54 */ + UBLOCK_BASSA_VAH = 221, /*[16AD0]*/ + /** @stable ICU 54 */ + UBLOCK_CAUCASIAN_ALBANIAN = 222, /*[10530]*/ + /** @stable ICU 54 */ + UBLOCK_COPTIC_EPACT_NUMBERS = 223, /*[102E0]*/ + /** @stable ICU 54 */ + UBLOCK_COMBINING_DIACRITICAL_MARKS_EXTENDED = 224, /*[1AB0]*/ + /** @stable ICU 54 */ + UBLOCK_DUPLOYAN = 225, /*[1BC00]*/ + /** @stable ICU 54 */ + UBLOCK_ELBASAN = 226, /*[10500]*/ + /** @stable ICU 54 */ + UBLOCK_GEOMETRIC_SHAPES_EXTENDED = 227, /*[1F780]*/ + /** @stable ICU 54 */ + UBLOCK_GRANTHA = 228, /*[11300]*/ + /** @stable ICU 54 */ + UBLOCK_KHOJKI = 229, /*[11200]*/ + /** @stable ICU 54 */ + UBLOCK_KHUDAWADI = 230, /*[112B0]*/ + /** @stable ICU 54 */ + UBLOCK_LATIN_EXTENDED_E = 231, /*[AB30]*/ + /** @stable ICU 54 */ + UBLOCK_LINEAR_A = 232, /*[10600]*/ + /** @stable ICU 54 */ + UBLOCK_MAHAJANI = 233, /*[11150]*/ + /** @stable ICU 54 */ + UBLOCK_MANICHAEAN = 234, /*[10AC0]*/ + /** @stable ICU 54 */ + UBLOCK_MENDE_KIKAKUI = 235, /*[1E800]*/ + /** @stable ICU 54 */ + UBLOCK_MODI = 236, /*[11600]*/ + /** @stable ICU 54 */ + UBLOCK_MRO = 237, /*[16A40]*/ + /** @stable ICU 54 */ + UBLOCK_MYANMAR_EXTENDED_B = 238, /*[A9E0]*/ + /** @stable ICU 54 */ + UBLOCK_NABATAEAN = 239, /*[10880]*/ + /** @stable ICU 54 */ + UBLOCK_OLD_NORTH_ARABIAN = 240, /*[10A80]*/ + /** @stable ICU 54 */ + UBLOCK_OLD_PERMIC = 241, /*[10350]*/ + /** @stable ICU 54 */ + UBLOCK_ORNAMENTAL_DINGBATS = 242, /*[1F650]*/ + /** @stable ICU 54 */ + UBLOCK_PAHAWH_HMONG = 243, /*[16B00]*/ + /** @stable ICU 54 */ + UBLOCK_PALMYRENE = 244, /*[10860]*/ + /** @stable ICU 54 */ + UBLOCK_PAU_CIN_HAU = 245, /*[11AC0]*/ + /** @stable ICU 54 */ + UBLOCK_PSALTER_PAHLAVI = 246, /*[10B80]*/ + /** @stable ICU 54 */ + UBLOCK_SHORTHAND_FORMAT_CONTROLS = 247, /*[1BCA0]*/ + /** @stable ICU 54 */ + UBLOCK_SIDDHAM = 248, /*[11580]*/ + /** @stable ICU 54 */ + UBLOCK_SINHALA_ARCHAIC_NUMBERS = 249, /*[111E0]*/ + /** @stable ICU 54 */ + UBLOCK_SUPPLEMENTAL_ARROWS_C = 250, /*[1F800]*/ + /** @stable ICU 54 */ + UBLOCK_TIRHUTA = 251, /*[11480]*/ + /** @stable ICU 54 */ + UBLOCK_WARANG_CITI = 252, /*[118A0]*/ + + /* New blocks in Unicode 8.0 */ + + /** @stable ICU 56 */ + UBLOCK_AHOM = 253, /*[11700]*/ + /** @stable ICU 56 */ + UBLOCK_ANATOLIAN_HIEROGLYPHS = 254, /*[14400]*/ + /** @stable ICU 56 */ + UBLOCK_CHEROKEE_SUPPLEMENT = 255, /*[AB70]*/ + /** @stable ICU 56 */ + UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E = 256, /*[2B820]*/ + /** @stable ICU 56 */ + UBLOCK_EARLY_DYNASTIC_CUNEIFORM = 257, /*[12480]*/ + /** @stable ICU 56 */ + UBLOCK_HATRAN = 258, /*[108E0]*/ + /** @stable ICU 56 */ + UBLOCK_MULTANI = 259, /*[11280]*/ + /** @stable ICU 56 */ + UBLOCK_OLD_HUNGARIAN = 260, /*[10C80]*/ + /** @stable ICU 56 */ + UBLOCK_SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS = 261, /*[1F900]*/ + /** @stable ICU 56 */ + UBLOCK_SUTTON_SIGNWRITING = 262, /*[1D800]*/ + + /* New blocks in Unicode 9.0 */ + + /** @stable ICU 58 */ + UBLOCK_ADLAM = 263, /*[1E900]*/ + /** @stable ICU 58 */ + UBLOCK_BHAIKSUKI = 264, /*[11C00]*/ + /** @stable ICU 58 */ + UBLOCK_CYRILLIC_EXTENDED_C = 265, /*[1C80]*/ + /** @stable ICU 58 */ + UBLOCK_GLAGOLITIC_SUPPLEMENT = 266, /*[1E000]*/ + /** @stable ICU 58 */ + UBLOCK_IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION = 267, /*[16FE0]*/ + /** @stable ICU 58 */ + UBLOCK_MARCHEN = 268, /*[11C70]*/ + /** @stable ICU 58 */ + UBLOCK_MONGOLIAN_SUPPLEMENT = 269, /*[11660]*/ + /** @stable ICU 58 */ + UBLOCK_NEWA = 270, /*[11400]*/ + /** @stable ICU 58 */ + UBLOCK_OSAGE = 271, /*[104B0]*/ + /** @stable ICU 58 */ + UBLOCK_TANGUT = 272, /*[17000]*/ + /** @stable ICU 58 */ + UBLOCK_TANGUT_COMPONENTS = 273, /*[18800]*/ + + // New blocks in Unicode 10.0 + + /** @stable ICU 60 */ + UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F = 274, /*[2CEB0]*/ + /** @stable ICU 60 */ + UBLOCK_KANA_EXTENDED_A = 275, /*[1B100]*/ + /** @stable ICU 60 */ + UBLOCK_MASARAM_GONDI = 276, /*[11D00]*/ + /** @stable ICU 60 */ + UBLOCK_NUSHU = 277, /*[1B170]*/ + /** @stable ICU 60 */ + UBLOCK_SOYOMBO = 278, /*[11A50]*/ + /** @stable ICU 60 */ + UBLOCK_SYRIAC_SUPPLEMENT = 279, /*[0860]*/ + /** @stable ICU 60 */ + UBLOCK_ZANABAZAR_SQUARE = 280, /*[11A00]*/ + + // New blocks in Unicode 11.0 + + /** @stable ICU 62 */ + UBLOCK_CHESS_SYMBOLS = 281, /*[1FA00]*/ + /** @stable ICU 62 */ + UBLOCK_DOGRA = 282, /*[11800]*/ + /** @stable ICU 62 */ + UBLOCK_GEORGIAN_EXTENDED = 283, /*[1C90]*/ + /** @stable ICU 62 */ + UBLOCK_GUNJALA_GONDI = 284, /*[11D60]*/ + /** @stable ICU 62 */ + UBLOCK_HANIFI_ROHINGYA = 285, /*[10D00]*/ + /** @stable ICU 62 */ + UBLOCK_INDIC_SIYAQ_NUMBERS = 286, /*[1EC70]*/ + /** @stable ICU 62 */ + UBLOCK_MAKASAR = 287, /*[11EE0]*/ + /** @stable ICU 62 */ + UBLOCK_MAYAN_NUMERALS = 288, /*[1D2E0]*/ + /** @stable ICU 62 */ + UBLOCK_MEDEFAIDRIN = 289, /*[16E40]*/ + /** @stable ICU 62 */ + UBLOCK_OLD_SOGDIAN = 290, /*[10F00]*/ + /** @stable ICU 62 */ + UBLOCK_SOGDIAN = 291, /*[10F30]*/ + + // New blocks in Unicode 12.0 + + /** @stable ICU 64 */ + UBLOCK_EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS = 292, /*[13430]*/ + /** @stable ICU 64 */ + UBLOCK_ELYMAIC = 293, /*[10FE0]*/ + /** @stable ICU 64 */ + UBLOCK_NANDINAGARI = 294, /*[119A0]*/ + /** @stable ICU 64 */ + UBLOCK_NYIAKENG_PUACHUE_HMONG = 295, /*[1E100]*/ + /** @stable ICU 64 */ + UBLOCK_OTTOMAN_SIYAQ_NUMBERS = 296, /*[1ED00]*/ + /** @stable ICU 64 */ + UBLOCK_SMALL_KANA_EXTENSION = 297, /*[1B130]*/ + /** @stable ICU 64 */ + UBLOCK_SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A = 298, /*[1FA70]*/ + /** @stable ICU 64 */ + UBLOCK_TAMIL_SUPPLEMENT = 299, /*[11FC0]*/ + /** @stable ICU 64 */ + UBLOCK_WANCHO = 300, /*[1E2C0]*/ + + // New blocks in Unicode 13.0 + + /** @stable ICU 66 */ + UBLOCK_CHORASMIAN = 301, /*[10FB0]*/ + /** @stable ICU 66 */ + UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G = 302, /*[30000]*/ + /** @stable ICU 66 */ + UBLOCK_DIVES_AKURU = 303, /*[11900]*/ + /** @stable ICU 66 */ + UBLOCK_KHITAN_SMALL_SCRIPT = 304, /*[18B00]*/ + /** @stable ICU 66 */ + UBLOCK_LISU_SUPPLEMENT = 305, /*[11FB0]*/ + /** @stable ICU 66 */ + UBLOCK_SYMBOLS_FOR_LEGACY_COMPUTING = 306, /*[1FB00]*/ + /** @stable ICU 66 */ + UBLOCK_TANGUT_SUPPLEMENT = 307, /*[18D00]*/ + /** @stable ICU 66 */ + UBLOCK_YEZIDI = 308, /*[10E80]*/ + +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal UBlockCode value. + * The highest value is available via u_getIntPropertyMaxValue(UCHAR_BLOCK). + * + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + UBLOCK_COUNT = 309, +#endif // U_HIDE_DEPRECATED_API + + /** @stable ICU 2.0 */ + UBLOCK_INVALID_CODE=-1 +}; + +/** @stable ICU 2.0 */ +typedef enum UBlockCode UBlockCode; + +/** + * East Asian Width constants. + * + * @see UCHAR_EAST_ASIAN_WIDTH + * @see u_getIntPropertyValue + * @stable ICU 2.2 + */ +typedef enum UEastAsianWidth { + /* + * Note: UEastAsianWidth constants are parsed by preparseucd.py. + * It matches lines like + * U_EA_<Unicode East_Asian_Width value name> + */ + + U_EA_NEUTRAL, /*[N]*/ + U_EA_AMBIGUOUS, /*[A]*/ + U_EA_HALFWIDTH, /*[H]*/ + U_EA_FULLWIDTH, /*[F]*/ + U_EA_NARROW, /*[Na]*/ + U_EA_WIDE, /*[W]*/ +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal UEastAsianWidth value. + * The highest value is available via u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH). + * + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + U_EA_COUNT +#endif // U_HIDE_DEPRECATED_API +} UEastAsianWidth; + +/** + * Selector constants for u_charName(). + * u_charName() returns the "modern" name of a + * Unicode character; or the name that was defined in + * Unicode version 1.0, before the Unicode standard merged + * with ISO-10646; or an "extended" name that gives each + * Unicode code point a unique name. + * + * @see u_charName + * @stable ICU 2.0 + */ +typedef enum UCharNameChoice { + /** Unicode character name (Name property). @stable ICU 2.0 */ + U_UNICODE_CHAR_NAME, +#ifndef U_HIDE_DEPRECATED_API + /** + * The Unicode_1_Name property value which is of little practical value. + * Beginning with ICU 49, ICU APIs return an empty string for this name choice. + * @deprecated ICU 49 + */ + U_UNICODE_10_CHAR_NAME, +#endif /* U_HIDE_DEPRECATED_API */ + /** Standard or synthetic character name. @stable ICU 2.0 */ + U_EXTENDED_CHAR_NAME = U_UNICODE_CHAR_NAME+2, + /** Corrected name from NameAliases.txt. @stable ICU 4.4 */ + U_CHAR_NAME_ALIAS, +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal UCharNameChoice value. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + U_CHAR_NAME_CHOICE_COUNT +#endif // U_HIDE_DEPRECATED_API +} UCharNameChoice; + +/** + * Selector constants for u_getPropertyName() and + * u_getPropertyValueName(). These selectors are used to choose which + * name is returned for a given property or value. All properties and + * values have a long name. Most have a short name, but some do not. + * Unicode allows for additional names, beyond the long and short + * name, which would be indicated by U_LONG_PROPERTY_NAME + i, where + * i=1, 2,... + * + * @see u_getPropertyName() + * @see u_getPropertyValueName() + * @stable ICU 2.4 + */ +typedef enum UPropertyNameChoice { + U_SHORT_PROPERTY_NAME, + U_LONG_PROPERTY_NAME, +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal UPropertyNameChoice value. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + U_PROPERTY_NAME_CHOICE_COUNT +#endif // U_HIDE_DEPRECATED_API +} UPropertyNameChoice; + +/** + * Decomposition Type constants. + * + * @see UCHAR_DECOMPOSITION_TYPE + * @stable ICU 2.2 + */ +typedef enum UDecompositionType { + /* + * Note: UDecompositionType constants are parsed by preparseucd.py. + * It matches lines like + * U_DT_<Unicode Decomposition_Type value name> + */ + + U_DT_NONE, /*[none]*/ + U_DT_CANONICAL, /*[can]*/ + U_DT_COMPAT, /*[com]*/ + U_DT_CIRCLE, /*[enc]*/ + U_DT_FINAL, /*[fin]*/ + U_DT_FONT, /*[font]*/ + U_DT_FRACTION, /*[fra]*/ + U_DT_INITIAL, /*[init]*/ + U_DT_ISOLATED, /*[iso]*/ + U_DT_MEDIAL, /*[med]*/ + U_DT_NARROW, /*[nar]*/ + U_DT_NOBREAK, /*[nb]*/ + U_DT_SMALL, /*[sml]*/ + U_DT_SQUARE, /*[sqr]*/ + U_DT_SUB, /*[sub]*/ + U_DT_SUPER, /*[sup]*/ + U_DT_VERTICAL, /*[vert]*/ + U_DT_WIDE, /*[wide]*/ +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal UDecompositionType value. + * The highest value is available via u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE). + * + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + U_DT_COUNT /* 18 */ +#endif // U_HIDE_DEPRECATED_API +} UDecompositionType; + +/** + * Joining Type constants. + * + * @see UCHAR_JOINING_TYPE + * @stable ICU 2.2 + */ +typedef enum UJoiningType { + /* + * Note: UJoiningType constants are parsed by preparseucd.py. + * It matches lines like + * U_JT_<Unicode Joining_Type value name> + */ + + U_JT_NON_JOINING, /*[U]*/ + U_JT_JOIN_CAUSING, /*[C]*/ + U_JT_DUAL_JOINING, /*[D]*/ + U_JT_LEFT_JOINING, /*[L]*/ + U_JT_RIGHT_JOINING, /*[R]*/ + U_JT_TRANSPARENT, /*[T]*/ +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal UJoiningType value. + * The highest value is available via u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE). + * + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + U_JT_COUNT /* 6 */ +#endif // U_HIDE_DEPRECATED_API +} UJoiningType; + +/** + * Joining Group constants. + * + * @see UCHAR_JOINING_GROUP + * @stable ICU 2.2 + */ +typedef enum UJoiningGroup { + /* + * Note: UJoiningGroup constants are parsed by preparseucd.py. + * It matches lines like + * U_JG_<Unicode Joining_Group value name> + */ + + U_JG_NO_JOINING_GROUP, + U_JG_AIN, + U_JG_ALAPH, + U_JG_ALEF, + U_JG_BEH, + U_JG_BETH, + U_JG_DAL, + U_JG_DALATH_RISH, + U_JG_E, + U_JG_FEH, + U_JG_FINAL_SEMKATH, + U_JG_GAF, + U_JG_GAMAL, + U_JG_HAH, + U_JG_TEH_MARBUTA_GOAL, /**< @stable ICU 4.6 */ + U_JG_HAMZA_ON_HEH_GOAL=U_JG_TEH_MARBUTA_GOAL, + U_JG_HE, + U_JG_HEH, + U_JG_HEH_GOAL, + U_JG_HETH, + U_JG_KAF, + U_JG_KAPH, + U_JG_KNOTTED_HEH, + U_JG_LAM, + U_JG_LAMADH, + U_JG_MEEM, + U_JG_MIM, + U_JG_NOON, + U_JG_NUN, + U_JG_PE, + U_JG_QAF, + U_JG_QAPH, + U_JG_REH, + U_JG_REVERSED_PE, + U_JG_SAD, + U_JG_SADHE, + U_JG_SEEN, + U_JG_SEMKATH, + U_JG_SHIN, + U_JG_SWASH_KAF, + U_JG_SYRIAC_WAW, + U_JG_TAH, + U_JG_TAW, + U_JG_TEH_MARBUTA, + U_JG_TETH, + U_JG_WAW, + U_JG_YEH, + U_JG_YEH_BARREE, + U_JG_YEH_WITH_TAIL, + U_JG_YUDH, + U_JG_YUDH_HE, + U_JG_ZAIN, + U_JG_FE, /**< @stable ICU 2.6 */ + U_JG_KHAPH, /**< @stable ICU 2.6 */ + U_JG_ZHAIN, /**< @stable ICU 2.6 */ + U_JG_BURUSHASKI_YEH_BARREE, /**< @stable ICU 4.0 */ + U_JG_FARSI_YEH, /**< @stable ICU 4.4 */ + U_JG_NYA, /**< @stable ICU 4.4 */ + U_JG_ROHINGYA_YEH, /**< @stable ICU 49 */ + U_JG_MANICHAEAN_ALEPH, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_AYIN, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_BETH, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_DALETH, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_DHAMEDH, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_FIVE, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_GIMEL, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_HETH, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_HUNDRED, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_KAPH, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_LAMEDH, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_MEM, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_NUN, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_ONE, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_PE, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_QOPH, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_RESH, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_SADHE, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_SAMEKH, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_TAW, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_TEN, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_TETH, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_THAMEDH, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_TWENTY, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_WAW, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_YODH, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_ZAYIN, /**< @stable ICU 54 */ + U_JG_STRAIGHT_WAW, /**< @stable ICU 54 */ + U_JG_AFRICAN_FEH, /**< @stable ICU 58 */ + U_JG_AFRICAN_NOON, /**< @stable ICU 58 */ + U_JG_AFRICAN_QAF, /**< @stable ICU 58 */ + + U_JG_MALAYALAM_BHA, /**< @stable ICU 60 */ + U_JG_MALAYALAM_JA, /**< @stable ICU 60 */ + U_JG_MALAYALAM_LLA, /**< @stable ICU 60 */ + U_JG_MALAYALAM_LLLA, /**< @stable ICU 60 */ + U_JG_MALAYALAM_NGA, /**< @stable ICU 60 */ + U_JG_MALAYALAM_NNA, /**< @stable ICU 60 */ + U_JG_MALAYALAM_NNNA, /**< @stable ICU 60 */ + U_JG_MALAYALAM_NYA, /**< @stable ICU 60 */ + U_JG_MALAYALAM_RA, /**< @stable ICU 60 */ + U_JG_MALAYALAM_SSA, /**< @stable ICU 60 */ + U_JG_MALAYALAM_TTA, /**< @stable ICU 60 */ + + U_JG_HANIFI_ROHINGYA_KINNA_YA, /**< @stable ICU 62 */ + U_JG_HANIFI_ROHINGYA_PA, /**< @stable ICU 62 */ + +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal UJoiningGroup value. + * The highest value is available via u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP). + * + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + U_JG_COUNT +#endif // U_HIDE_DEPRECATED_API +} UJoiningGroup; + +/** + * Grapheme Cluster Break constants. + * + * @see UCHAR_GRAPHEME_CLUSTER_BREAK + * @stable ICU 3.4 + */ +typedef enum UGraphemeClusterBreak { + /* + * Note: UGraphemeClusterBreak constants are parsed by preparseucd.py. + * It matches lines like + * U_GCB_<Unicode Grapheme_Cluster_Break value name> + */ + + U_GCB_OTHER = 0, /*[XX]*/ + U_GCB_CONTROL = 1, /*[CN]*/ + U_GCB_CR = 2, /*[CR]*/ + U_GCB_EXTEND = 3, /*[EX]*/ + U_GCB_L = 4, /*[L]*/ + U_GCB_LF = 5, /*[LF]*/ + U_GCB_LV = 6, /*[LV]*/ + U_GCB_LVT = 7, /*[LVT]*/ + U_GCB_T = 8, /*[T]*/ + U_GCB_V = 9, /*[V]*/ + /** @stable ICU 4.0 */ + U_GCB_SPACING_MARK = 10, /*[SM]*/ /* from here on: new in Unicode 5.1/ICU 4.0 */ + /** @stable ICU 4.0 */ + U_GCB_PREPEND = 11, /*[PP]*/ + /** @stable ICU 50 */ + U_GCB_REGIONAL_INDICATOR = 12, /*[RI]*/ /* new in Unicode 6.2/ICU 50 */ + /** @stable ICU 58 */ + U_GCB_E_BASE = 13, /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */ + /** @stable ICU 58 */ + U_GCB_E_BASE_GAZ = 14, /*[EBG]*/ + /** @stable ICU 58 */ + U_GCB_E_MODIFIER = 15, /*[EM]*/ + /** @stable ICU 58 */ + U_GCB_GLUE_AFTER_ZWJ = 16, /*[GAZ]*/ + /** @stable ICU 58 */ + U_GCB_ZWJ = 17, /*[ZWJ]*/ + +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal UGraphemeClusterBreak value. + * The highest value is available via u_getIntPropertyMaxValue(UCHAR_GRAPHEME_CLUSTER_BREAK). + * + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + U_GCB_COUNT = 18 +#endif // U_HIDE_DEPRECATED_API +} UGraphemeClusterBreak; + +/** + * Word Break constants. + * (UWordBreak is a pre-existing enum type in ubrk.h for word break status tags.) + * + * @see UCHAR_WORD_BREAK + * @stable ICU 3.4 + */ +typedef enum UWordBreakValues { + /* + * Note: UWordBreakValues constants are parsed by preparseucd.py. + * It matches lines like + * U_WB_<Unicode Word_Break value name> + */ + + U_WB_OTHER = 0, /*[XX]*/ + U_WB_ALETTER = 1, /*[LE]*/ + U_WB_FORMAT = 2, /*[FO]*/ + U_WB_KATAKANA = 3, /*[KA]*/ + U_WB_MIDLETTER = 4, /*[ML]*/ + U_WB_MIDNUM = 5, /*[MN]*/ + U_WB_NUMERIC = 6, /*[NU]*/ + U_WB_EXTENDNUMLET = 7, /*[EX]*/ + /** @stable ICU 4.0 */ + U_WB_CR = 8, /*[CR]*/ /* from here on: new in Unicode 5.1/ICU 4.0 */ + /** @stable ICU 4.0 */ + U_WB_EXTEND = 9, /*[Extend]*/ + /** @stable ICU 4.0 */ + U_WB_LF = 10, /*[LF]*/ + /** @stable ICU 4.0 */ + U_WB_MIDNUMLET =11, /*[MB]*/ + /** @stable ICU 4.0 */ + U_WB_NEWLINE =12, /*[NL]*/ + /** @stable ICU 50 */ + U_WB_REGIONAL_INDICATOR = 13, /*[RI]*/ /* new in Unicode 6.2/ICU 50 */ + /** @stable ICU 52 */ + U_WB_HEBREW_LETTER = 14, /*[HL]*/ /* from here on: new in Unicode 6.3/ICU 52 */ + /** @stable ICU 52 */ + U_WB_SINGLE_QUOTE = 15, /*[SQ]*/ + /** @stable ICU 52 */ + U_WB_DOUBLE_QUOTE = 16, /*[DQ]*/ + /** @stable ICU 58 */ + U_WB_E_BASE = 17, /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */ + /** @stable ICU 58 */ + U_WB_E_BASE_GAZ = 18, /*[EBG]*/ + /** @stable ICU 58 */ + U_WB_E_MODIFIER = 19, /*[EM]*/ + /** @stable ICU 58 */ + U_WB_GLUE_AFTER_ZWJ = 20, /*[GAZ]*/ + /** @stable ICU 58 */ + U_WB_ZWJ = 21, /*[ZWJ]*/ + /** @stable ICU 62 */ + U_WB_WSEGSPACE = 22, /*[WSEGSPACE]*/ + +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal UWordBreakValues value. + * The highest value is available via u_getIntPropertyMaxValue(UCHAR_WORD_BREAK). + * + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + U_WB_COUNT = 23 +#endif // U_HIDE_DEPRECATED_API +} UWordBreakValues; + +/** + * Sentence Break constants. + * + * @see UCHAR_SENTENCE_BREAK + * @stable ICU 3.4 + */ +typedef enum USentenceBreak { + /* + * Note: USentenceBreak constants are parsed by preparseucd.py. + * It matches lines like + * U_SB_<Unicode Sentence_Break value name> + */ + + U_SB_OTHER = 0, /*[XX]*/ + U_SB_ATERM = 1, /*[AT]*/ + U_SB_CLOSE = 2, /*[CL]*/ + U_SB_FORMAT = 3, /*[FO]*/ + U_SB_LOWER = 4, /*[LO]*/ + U_SB_NUMERIC = 5, /*[NU]*/ + U_SB_OLETTER = 6, /*[LE]*/ + U_SB_SEP = 7, /*[SE]*/ + U_SB_SP = 8, /*[SP]*/ + U_SB_STERM = 9, /*[ST]*/ + U_SB_UPPER = 10, /*[UP]*/ + U_SB_CR = 11, /*[CR]*/ /* from here on: new in Unicode 5.1/ICU 4.0 */ + U_SB_EXTEND = 12, /*[EX]*/ + U_SB_LF = 13, /*[LF]*/ + U_SB_SCONTINUE = 14, /*[SC]*/ +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal USentenceBreak value. + * The highest value is available via u_getIntPropertyMaxValue(UCHAR_SENTENCE_BREAK). + * + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + U_SB_COUNT = 15 +#endif // U_HIDE_DEPRECATED_API +} USentenceBreak; + +/** + * Line Break constants. + * + * @see UCHAR_LINE_BREAK + * @stable ICU 2.2 + */ +typedef enum ULineBreak { + /* + * Note: ULineBreak constants are parsed by preparseucd.py. + * It matches lines like + * U_LB_<Unicode Line_Break value name> + */ + + U_LB_UNKNOWN = 0, /*[XX]*/ + U_LB_AMBIGUOUS = 1, /*[AI]*/ + U_LB_ALPHABETIC = 2, /*[AL]*/ + U_LB_BREAK_BOTH = 3, /*[B2]*/ + U_LB_BREAK_AFTER = 4, /*[BA]*/ + U_LB_BREAK_BEFORE = 5, /*[BB]*/ + U_LB_MANDATORY_BREAK = 6, /*[BK]*/ + U_LB_CONTINGENT_BREAK = 7, /*[CB]*/ + U_LB_CLOSE_PUNCTUATION = 8, /*[CL]*/ + U_LB_COMBINING_MARK = 9, /*[CM]*/ + U_LB_CARRIAGE_RETURN = 10, /*[CR]*/ + U_LB_EXCLAMATION = 11, /*[EX]*/ + U_LB_GLUE = 12, /*[GL]*/ + U_LB_HYPHEN = 13, /*[HY]*/ + U_LB_IDEOGRAPHIC = 14, /*[ID]*/ + /** Renamed from the misspelled "inseperable" in Unicode 4.0.1/ICU 3.0 @stable ICU 3.0 */ + U_LB_INSEPARABLE = 15, /*[IN]*/ + U_LB_INSEPERABLE = U_LB_INSEPARABLE, + U_LB_INFIX_NUMERIC = 16, /*[IS]*/ + U_LB_LINE_FEED = 17, /*[LF]*/ + U_LB_NONSTARTER = 18, /*[NS]*/ + U_LB_NUMERIC = 19, /*[NU]*/ + U_LB_OPEN_PUNCTUATION = 20, /*[OP]*/ + U_LB_POSTFIX_NUMERIC = 21, /*[PO]*/ + U_LB_PREFIX_NUMERIC = 22, /*[PR]*/ + U_LB_QUOTATION = 23, /*[QU]*/ + U_LB_COMPLEX_CONTEXT = 24, /*[SA]*/ + U_LB_SURROGATE = 25, /*[SG]*/ + U_LB_SPACE = 26, /*[SP]*/ + U_LB_BREAK_SYMBOLS = 27, /*[SY]*/ + U_LB_ZWSPACE = 28, /*[ZW]*/ + /** @stable ICU 2.6 */ + U_LB_NEXT_LINE = 29, /*[NL]*/ /* from here on: new in Unicode 4/ICU 2.6 */ + /** @stable ICU 2.6 */ + U_LB_WORD_JOINER = 30, /*[WJ]*/ + /** @stable ICU 3.4 */ + U_LB_H2 = 31, /*[H2]*/ /* from here on: new in Unicode 4.1/ICU 3.4 */ + /** @stable ICU 3.4 */ + U_LB_H3 = 32, /*[H3]*/ + /** @stable ICU 3.4 */ + U_LB_JL = 33, /*[JL]*/ + /** @stable ICU 3.4 */ + U_LB_JT = 34, /*[JT]*/ + /** @stable ICU 3.4 */ + U_LB_JV = 35, /*[JV]*/ + /** @stable ICU 4.4 */ + U_LB_CLOSE_PARENTHESIS = 36, /*[CP]*/ /* new in Unicode 5.2/ICU 4.4 */ + /** @stable ICU 49 */ + U_LB_CONDITIONAL_JAPANESE_STARTER = 37,/*[CJ]*/ /* new in Unicode 6.1/ICU 49 */ + /** @stable ICU 49 */ + U_LB_HEBREW_LETTER = 38, /*[HL]*/ /* new in Unicode 6.1/ICU 49 */ + /** @stable ICU 50 */ + U_LB_REGIONAL_INDICATOR = 39,/*[RI]*/ /* new in Unicode 6.2/ICU 50 */ + /** @stable ICU 58 */ + U_LB_E_BASE = 40, /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */ + /** @stable ICU 58 */ + U_LB_E_MODIFIER = 41, /*[EM]*/ + /** @stable ICU 58 */ + U_LB_ZWJ = 42, /*[ZWJ]*/ +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal ULineBreak value. + * The highest value is available via u_getIntPropertyMaxValue(UCHAR_LINE_BREAK). + * + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + U_LB_COUNT = 43 +#endif // U_HIDE_DEPRECATED_API +} ULineBreak; + +/** + * Numeric Type constants. + * + * @see UCHAR_NUMERIC_TYPE + * @stable ICU 2.2 + */ +typedef enum UNumericType { + /* + * Note: UNumericType constants are parsed by preparseucd.py. + * It matches lines like + * U_NT_<Unicode Numeric_Type value name> + */ + + U_NT_NONE, /*[None]*/ + U_NT_DECIMAL, /*[de]*/ + U_NT_DIGIT, /*[di]*/ + U_NT_NUMERIC, /*[nu]*/ +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal UNumericType value. + * The highest value is available via u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE). + * + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + U_NT_COUNT +#endif // U_HIDE_DEPRECATED_API +} UNumericType; + +/** + * Hangul Syllable Type constants. + * + * @see UCHAR_HANGUL_SYLLABLE_TYPE + * @stable ICU 2.6 + */ +typedef enum UHangulSyllableType { + /* + * Note: UHangulSyllableType constants are parsed by preparseucd.py. + * It matches lines like + * U_HST_<Unicode Hangul_Syllable_Type value name> + */ + + U_HST_NOT_APPLICABLE, /*[NA]*/ + U_HST_LEADING_JAMO, /*[L]*/ + U_HST_VOWEL_JAMO, /*[V]*/ + U_HST_TRAILING_JAMO, /*[T]*/ + U_HST_LV_SYLLABLE, /*[LV]*/ + U_HST_LVT_SYLLABLE, /*[LVT]*/ +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal UHangulSyllableType value. + * The highest value is available via u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE). + * + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + U_HST_COUNT +#endif // U_HIDE_DEPRECATED_API +} UHangulSyllableType; + +/** + * Indic Positional Category constants. + * + * @see UCHAR_INDIC_POSITIONAL_CATEGORY + * @stable ICU 63 + */ +typedef enum UIndicPositionalCategory { + /* + * Note: UIndicPositionalCategory constants are parsed by preparseucd.py. + * It matches lines like + * U_INPC_<Unicode Indic_Positional_Category value name> + */ + + /** @stable ICU 63 */ + U_INPC_NA, + /** @stable ICU 63 */ + U_INPC_BOTTOM, + /** @stable ICU 63 */ + U_INPC_BOTTOM_AND_LEFT, + /** @stable ICU 63 */ + U_INPC_BOTTOM_AND_RIGHT, + /** @stable ICU 63 */ + U_INPC_LEFT, + /** @stable ICU 63 */ + U_INPC_LEFT_AND_RIGHT, + /** @stable ICU 63 */ + U_INPC_OVERSTRUCK, + /** @stable ICU 63 */ + U_INPC_RIGHT, + /** @stable ICU 63 */ + U_INPC_TOP, + /** @stable ICU 63 */ + U_INPC_TOP_AND_BOTTOM, + /** @stable ICU 63 */ + U_INPC_TOP_AND_BOTTOM_AND_RIGHT, + /** @stable ICU 63 */ + U_INPC_TOP_AND_LEFT, + /** @stable ICU 63 */ + U_INPC_TOP_AND_LEFT_AND_RIGHT, + /** @stable ICU 63 */ + U_INPC_TOP_AND_RIGHT, + /** @stable ICU 63 */ + U_INPC_VISUAL_ORDER_LEFT, + /** @stable ICU 66 */ + U_INPC_TOP_AND_BOTTOM_AND_LEFT, +} UIndicPositionalCategory; + +/** + * Indic Syllabic Category constants. + * + * @see UCHAR_INDIC_SYLLABIC_CATEGORY + * @stable ICU 63 + */ +typedef enum UIndicSyllabicCategory { + /* + * Note: UIndicSyllabicCategory constants are parsed by preparseucd.py. + * It matches lines like + * U_INSC_<Unicode Indic_Syllabic_Category value name> + */ + + /** @stable ICU 63 */ + U_INSC_OTHER, + /** @stable ICU 63 */ + U_INSC_AVAGRAHA, + /** @stable ICU 63 */ + U_INSC_BINDU, + /** @stable ICU 63 */ + U_INSC_BRAHMI_JOINING_NUMBER, + /** @stable ICU 63 */ + U_INSC_CANTILLATION_MARK, + /** @stable ICU 63 */ + U_INSC_CONSONANT, + /** @stable ICU 63 */ + U_INSC_CONSONANT_DEAD, + /** @stable ICU 63 */ + U_INSC_CONSONANT_FINAL, + /** @stable ICU 63 */ + U_INSC_CONSONANT_HEAD_LETTER, + /** @stable ICU 63 */ + U_INSC_CONSONANT_INITIAL_POSTFIXED, + /** @stable ICU 63 */ + U_INSC_CONSONANT_KILLER, + /** @stable ICU 63 */ + U_INSC_CONSONANT_MEDIAL, + /** @stable ICU 63 */ + U_INSC_CONSONANT_PLACEHOLDER, + /** @stable ICU 63 */ + U_INSC_CONSONANT_PRECEDING_REPHA, + /** @stable ICU 63 */ + U_INSC_CONSONANT_PREFIXED, + /** @stable ICU 63 */ + U_INSC_CONSONANT_SUBJOINED, + /** @stable ICU 63 */ + U_INSC_CONSONANT_SUCCEEDING_REPHA, + /** @stable ICU 63 */ + U_INSC_CONSONANT_WITH_STACKER, + /** @stable ICU 63 */ + U_INSC_GEMINATION_MARK, + /** @stable ICU 63 */ + U_INSC_INVISIBLE_STACKER, + /** @stable ICU 63 */ + U_INSC_JOINER, + /** @stable ICU 63 */ + U_INSC_MODIFYING_LETTER, + /** @stable ICU 63 */ + U_INSC_NON_JOINER, + /** @stable ICU 63 */ + U_INSC_NUKTA, + /** @stable ICU 63 */ + U_INSC_NUMBER, + /** @stable ICU 63 */ + U_INSC_NUMBER_JOINER, + /** @stable ICU 63 */ + U_INSC_PURE_KILLER, + /** @stable ICU 63 */ + U_INSC_REGISTER_SHIFTER, + /** @stable ICU 63 */ + U_INSC_SYLLABLE_MODIFIER, + /** @stable ICU 63 */ + U_INSC_TONE_LETTER, + /** @stable ICU 63 */ + U_INSC_TONE_MARK, + /** @stable ICU 63 */ + U_INSC_VIRAMA, + /** @stable ICU 63 */ + U_INSC_VISARGA, + /** @stable ICU 63 */ + U_INSC_VOWEL, + /** @stable ICU 63 */ + U_INSC_VOWEL_DEPENDENT, + /** @stable ICU 63 */ + U_INSC_VOWEL_INDEPENDENT, +} UIndicSyllabicCategory; + +/** + * Vertical Orientation constants. + * + * @see UCHAR_VERTICAL_ORIENTATION + * @stable ICU 63 + */ +typedef enum UVerticalOrientation { + /* + * Note: UVerticalOrientation constants are parsed by preparseucd.py. + * It matches lines like + * U_VO_<Unicode Vertical_Orientation value name> + */ + + /** @stable ICU 63 */ + U_VO_ROTATED, + /** @stable ICU 63 */ + U_VO_TRANSFORMED_ROTATED, + /** @stable ICU 63 */ + U_VO_TRANSFORMED_UPRIGHT, + /** @stable ICU 63 */ + U_VO_UPRIGHT, +} UVerticalOrientation; + +/** + * Check a binary Unicode property for a code point. + * + * Unicode, especially in version 3.2, defines many more properties than the + * original set in UnicodeData.txt. + * + * The properties APIs are intended to reflect Unicode properties as defined + * in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR). + * For details about the properties see http://www.unicode.org/ucd/ . + * For names of Unicode properties see the UCD file PropertyAliases.txt. + * + * Important: If ICU is built with UCD files from Unicode versions below 3.2, + * then properties marked with "new in Unicode 3.2" are not or not fully available. + * + * @param c Code point to test. + * @param which UProperty selector constant, identifies which binary property to check. + * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT. + * @return true or false according to the binary Unicode property value for c. + * Also false if 'which' is out of bounds or if the Unicode version + * does not have data for the property at all, or not for this code point. + * + * @see UProperty + * @see u_getBinaryPropertySet + * @see u_getIntPropertyValue + * @see u_getUnicodeVersion + * @stable ICU 2.1 + */ +U_CAPI UBool U_EXPORT2 +u_hasBinaryProperty(UChar32 c, UProperty which); + +/** + * Returns a frozen USet for a binary property. + * The library retains ownership over the returned object. + * Sets an error code if the property number is not one for a binary property. + * + * The returned set contains all code points for which the property is true. + * + * @param property UCHAR_BINARY_START..UCHAR_BINARY_LIMIT-1 + * @param pErrorCode an in/out ICU UErrorCode + * @return the property as a set + * @see UProperty + * @see u_hasBinaryProperty + * @see Unicode::fromUSet + * @stable ICU 63 + */ +U_CAPI const USet * U_EXPORT2 +u_getBinaryPropertySet(UProperty property, UErrorCode *pErrorCode); + +/** + * Check if a code point has the Alphabetic Unicode property. + * Same as u_hasBinaryProperty(c, UCHAR_ALPHABETIC). + * This is different from u_isalpha! + * @param c Code point to test + * @return true if the code point has the Alphabetic Unicode property, false otherwise + * + * @see UCHAR_ALPHABETIC + * @see u_isalpha + * @see u_hasBinaryProperty + * @stable ICU 2.1 + */ +U_CAPI UBool U_EXPORT2 +u_isUAlphabetic(UChar32 c); + +/** + * Check if a code point has the Lowercase Unicode property. + * Same as u_hasBinaryProperty(c, UCHAR_LOWERCASE). + * This is different from u_islower! + * @param c Code point to test + * @return true if the code point has the Lowercase Unicode property, false otherwise + * + * @see UCHAR_LOWERCASE + * @see u_islower + * @see u_hasBinaryProperty + * @stable ICU 2.1 + */ +U_CAPI UBool U_EXPORT2 +u_isULowercase(UChar32 c); + +/** + * Check if a code point has the Uppercase Unicode property. + * Same as u_hasBinaryProperty(c, UCHAR_UPPERCASE). + * This is different from u_isupper! + * @param c Code point to test + * @return true if the code point has the Uppercase Unicode property, false otherwise + * + * @see UCHAR_UPPERCASE + * @see u_isupper + * @see u_hasBinaryProperty + * @stable ICU 2.1 + */ +U_CAPI UBool U_EXPORT2 +u_isUUppercase(UChar32 c); + +/** + * Check if a code point has the White_Space Unicode property. + * Same as u_hasBinaryProperty(c, UCHAR_WHITE_SPACE). + * This is different from both u_isspace and u_isWhitespace! + * + * Note: There are several ICU whitespace functions; please see the uchar.h + * file documentation for a detailed comparison. + * + * @param c Code point to test + * @return true if the code point has the White_Space Unicode property, false otherwise. + * + * @see UCHAR_WHITE_SPACE + * @see u_isWhitespace + * @see u_isspace + * @see u_isJavaSpaceChar + * @see u_hasBinaryProperty + * @stable ICU 2.1 + */ +U_CAPI UBool U_EXPORT2 +u_isUWhiteSpace(UChar32 c); + +/** + * Get the property value for an enumerated or integer Unicode property for a code point. + * Also returns binary and mask property values. + * + * Unicode, especially in version 3.2, defines many more properties than the + * original set in UnicodeData.txt. + * + * The properties APIs are intended to reflect Unicode properties as defined + * in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR). + * For details about the properties see http://www.unicode.org/ . + * For names of Unicode properties see the UCD file PropertyAliases.txt. + * + * Sample usage: + * UEastAsianWidth ea=(UEastAsianWidth)u_getIntPropertyValue(c, UCHAR_EAST_ASIAN_WIDTH); + * UBool b=(UBool)u_getIntPropertyValue(c, UCHAR_IDEOGRAPHIC); + * + * @param c Code point to test. + * @param which UProperty selector constant, identifies which property to check. + * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT + * or UCHAR_INT_START<=which<UCHAR_INT_LIMIT + * or UCHAR_MASK_START<=which<UCHAR_MASK_LIMIT. + * @return Numeric value that is directly the property value or, + * for enumerated properties, corresponds to the numeric value of the enumerated + * constant of the respective property value enumeration type + * (cast to enum type if necessary). + * Returns 0 or 1 (for false/true) for binary Unicode properties. + * Returns a bit-mask for mask properties. + * Returns 0 if 'which' is out of bounds or if the Unicode version + * does not have data for the property at all, or not for this code point. + * + * @see UProperty + * @see u_hasBinaryProperty + * @see u_getIntPropertyMinValue + * @see u_getIntPropertyMaxValue + * @see u_getIntPropertyMap + * @see u_getUnicodeVersion + * @stable ICU 2.2 + */ +U_CAPI int32_t U_EXPORT2 +u_getIntPropertyValue(UChar32 c, UProperty which); + +/** + * Get the minimum value for an enumerated/integer/binary Unicode property. + * Can be used together with u_getIntPropertyMaxValue + * to allocate arrays of UnicodeSet or similar. + * + * @param which UProperty selector constant, identifies which binary property to check. + * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT + * or UCHAR_INT_START<=which<UCHAR_INT_LIMIT. + * @return Minimum value returned by u_getIntPropertyValue for a Unicode property. + * 0 if the property selector is out of range. + * + * @see UProperty + * @see u_hasBinaryProperty + * @see u_getUnicodeVersion + * @see u_getIntPropertyMaxValue + * @see u_getIntPropertyValue + * @stable ICU 2.2 + */ +U_CAPI int32_t U_EXPORT2 +u_getIntPropertyMinValue(UProperty which); + +/** + * Get the maximum value for an enumerated/integer/binary Unicode property. + * Can be used together with u_getIntPropertyMinValue + * to allocate arrays of UnicodeSet or similar. + * + * Examples for min/max values (for Unicode 3.2): + * + * - UCHAR_BIDI_CLASS: 0/18 (U_LEFT_TO_RIGHT/U_BOUNDARY_NEUTRAL) + * - UCHAR_SCRIPT: 0/45 (USCRIPT_COMMON/USCRIPT_TAGBANWA) + * - UCHAR_IDEOGRAPHIC: 0/1 (false/true) + * + * For undefined UProperty constant values, min/max values will be 0/-1. + * + * @param which UProperty selector constant, identifies which binary property to check. + * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT + * or UCHAR_INT_START<=which<UCHAR_INT_LIMIT. + * @return Maximum value returned by u_getIntPropertyValue for a Unicode property. + * <=0 if the property selector is out of range. + * + * @see UProperty + * @see u_hasBinaryProperty + * @see u_getUnicodeVersion + * @see u_getIntPropertyMaxValue + * @see u_getIntPropertyValue + * @stable ICU 2.2 + */ +U_CAPI int32_t U_EXPORT2 +u_getIntPropertyMaxValue(UProperty which); + +/** + * Returns an immutable UCPMap for an enumerated/catalog/int-valued property. + * The library retains ownership over the returned object. + * Sets an error code if the property number is not one for an "int property". + * + * The returned object maps all Unicode code points to their values for that property. + * For documentation of the integer values see u_getIntPropertyValue(). + * + * @param property UCHAR_INT_START..UCHAR_INT_LIMIT-1 + * @param pErrorCode an in/out ICU UErrorCode + * @return the property as a map + * @see UProperty + * @see u_getIntPropertyValue + * @stable ICU 63 + */ +U_CAPI const UCPMap * U_EXPORT2 +u_getIntPropertyMap(UProperty property, UErrorCode *pErrorCode); + +/** + * Get the numeric value for a Unicode code point as defined in the + * Unicode Character Database. + * + * A "double" return type is necessary because + * some numeric values are fractions, negative, or too large for int32_t. + * + * For characters without any numeric values in the Unicode Character Database, + * this function will return U_NO_NUMERIC_VALUE. + * Note: This is different from the Unicode Standard which specifies NaN as the default value. + * (NaN is not available on all platforms.) + * + * Similar to java.lang.Character.getNumericValue(), but u_getNumericValue() + * also supports negative values, large values, and fractions, + * while Java's getNumericValue() returns values 10..35 for ASCII letters. + * + * @param c Code point to get the numeric value for. + * @return Numeric value of c, or U_NO_NUMERIC_VALUE if none is defined. + * + * @see U_NO_NUMERIC_VALUE + * @stable ICU 2.2 + */ +U_CAPI double U_EXPORT2 +u_getNumericValue(UChar32 c); + +/** + * Special value that is returned by u_getNumericValue when + * no numeric value is defined for a code point. + * + * @see u_getNumericValue + * @stable ICU 2.2 + */ +#define U_NO_NUMERIC_VALUE ((double)-123456789.) + +/** + * Determines whether the specified code point has the general category "Ll" + * (lowercase letter). + * + * Same as java.lang.Character.isLowerCase(). + * + * This misses some characters that are also lowercase but + * have a different general category value. + * In order to include those, use UCHAR_LOWERCASE. + * + * In addition to being equivalent to a Java function, this also serves + * as a C/POSIX migration function. + * See the comments about C/POSIX character classification functions in the + * documentation at the top of this header file. + * + * @param c the code point to be tested + * @return true if the code point is an Ll lowercase letter + * + * @see UCHAR_LOWERCASE + * @see u_isupper + * @see u_istitle + * @stable ICU 2.0 + */ +U_CAPI UBool U_EXPORT2 +u_islower(UChar32 c); + +/** + * Determines whether the specified code point has the general category "Lu" + * (uppercase letter). + * + * Same as java.lang.Character.isUpperCase(). + * + * This misses some characters that are also uppercase but + * have a different general category value. + * In order to include those, use UCHAR_UPPERCASE. + * + * In addition to being equivalent to a Java function, this also serves + * as a C/POSIX migration function. + * See the comments about C/POSIX character classification functions in the + * documentation at the top of this header file. + * + * @param c the code point to be tested + * @return true if the code point is an Lu uppercase letter + * + * @see UCHAR_UPPERCASE + * @see u_islower + * @see u_istitle + * @see u_tolower + * @stable ICU 2.0 + */ +U_CAPI UBool U_EXPORT2 +u_isupper(UChar32 c); + +/** + * Determines whether the specified code point is a titlecase letter. + * True for general category "Lt" (titlecase letter). + * + * Same as java.lang.Character.isTitleCase(). + * + * @param c the code point to be tested + * @return true if the code point is an Lt titlecase letter + * + * @see u_isupper + * @see u_islower + * @see u_totitle + * @stable ICU 2.0 + */ +U_CAPI UBool U_EXPORT2 +u_istitle(UChar32 c); + +/** + * Determines whether the specified code point is a digit character according to Java. + * True for characters with general category "Nd" (decimal digit numbers). + * Beginning with Unicode 4, this is the same as + * testing for the Numeric_Type of Decimal. + * + * Same as java.lang.Character.isDigit(). + * + * In addition to being equivalent to a Java function, this also serves + * as a C/POSIX migration function. + * See the comments about C/POSIX character classification functions in the + * documentation at the top of this header file. + * + * @param c the code point to be tested + * @return true if the code point is a digit character according to Character.isDigit() + * + * @stable ICU 2.0 + */ +U_CAPI UBool U_EXPORT2 +u_isdigit(UChar32 c); + +/** + * Determines whether the specified code point is a letter character. + * True for general categories "L" (letters). + * + * Same as java.lang.Character.isLetter(). + * + * In addition to being equivalent to a Java function, this also serves + * as a C/POSIX migration function. + * See the comments about C/POSIX character classification functions in the + * documentation at the top of this header file. + * + * @param c the code point to be tested + * @return true if the code point is a letter character + * + * @see u_isdigit + * @see u_isalnum + * @stable ICU 2.0 + */ +U_CAPI UBool U_EXPORT2 +u_isalpha(UChar32 c); + +/** + * Determines whether the specified code point is an alphanumeric character + * (letter or digit) according to Java. + * True for characters with general categories + * "L" (letters) and "Nd" (decimal digit numbers). + * + * Same as java.lang.Character.isLetterOrDigit(). + * + * In addition to being equivalent to a Java function, this also serves + * as a C/POSIX migration function. + * See the comments about C/POSIX character classification functions in the + * documentation at the top of this header file. + * + * @param c the code point to be tested + * @return true if the code point is an alphanumeric character according to Character.isLetterOrDigit() + * + * @stable ICU 2.0 + */ +U_CAPI UBool U_EXPORT2 +u_isalnum(UChar32 c); + +/** + * Determines whether the specified code point is a hexadecimal digit. + * This is equivalent to u_digit(c, 16)>=0. + * True for characters with general category "Nd" (decimal digit numbers) + * as well as Latin letters a-f and A-F in both ASCII and Fullwidth ASCII. + * (That is, for letters with code points + * 0041..0046, 0061..0066, FF21..FF26, FF41..FF46.) + * + * In order to narrow the definition of hexadecimal digits to only ASCII + * characters, use (c<=0x7f && u_isxdigit(c)). + * + * This is a C/POSIX migration function. + * See the comments about C/POSIX character classification functions in the + * documentation at the top of this header file. + * + * @param c the code point to be tested + * @return true if the code point is a hexadecimal digit + * + * @stable ICU 2.6 + */ +U_CAPI UBool U_EXPORT2 +u_isxdigit(UChar32 c); + +/** + * Determines whether the specified code point is a punctuation character. + * True for characters with general categories "P" (punctuation). + * + * This is a C/POSIX migration function. + * See the comments about C/POSIX character classification functions in the + * documentation at the top of this header file. + * + * @param c the code point to be tested + * @return true if the code point is a punctuation character + * + * @stable ICU 2.6 + */ +U_CAPI UBool U_EXPORT2 +u_ispunct(UChar32 c); + +/** + * Determines whether the specified code point is a "graphic" character + * (printable, excluding spaces). + * true for all characters except those with general categories + * "Cc" (control codes), "Cf" (format controls), "Cs" (surrogates), + * "Cn" (unassigned), and "Z" (separators). + * + * This is a C/POSIX migration function. + * See the comments about C/POSIX character classification functions in the + * documentation at the top of this header file. + * + * @param c the code point to be tested + * @return true if the code point is a "graphic" character + * + * @stable ICU 2.6 + */ +U_CAPI UBool U_EXPORT2 +u_isgraph(UChar32 c); + +/** + * Determines whether the specified code point is a "blank" or "horizontal space", + * a character that visibly separates words on a line. + * The following are equivalent definitions: + * + * true for Unicode White_Space characters except for "vertical space controls" + * where "vertical space controls" are the following characters: + * U+000A (LF) U+000B (VT) U+000C (FF) U+000D (CR) U+0085 (NEL) U+2028 (LS) U+2029 (PS) + * + * same as + * + * true for U+0009 (TAB) and characters with general category "Zs" (space separators). + * + * Note: There are several ICU whitespace functions; please see the uchar.h + * file documentation for a detailed comparison. + * + * This is a C/POSIX migration function. + * See the comments about C/POSIX character classification functions in the + * documentation at the top of this header file. + * + * @param c the code point to be tested + * @return true if the code point is a "blank" + * + * @stable ICU 2.6 + */ +U_CAPI UBool U_EXPORT2 +u_isblank(UChar32 c); + +/** + * Determines whether the specified code point is "defined", + * which usually means that it is assigned a character. + * True for general categories other than "Cn" (other, not assigned), + * i.e., true for all code points mentioned in UnicodeData.txt. + * + * Note that non-character code points (e.g., U+FDD0) are not "defined" + * (they are Cn), but surrogate code points are "defined" (Cs). + * + * Same as java.lang.Character.isDefined(). + * + * @param c the code point to be tested + * @return true if the code point is assigned a character + * + * @see u_isdigit + * @see u_isalpha + * @see u_isalnum + * @see u_isupper + * @see u_islower + * @see u_istitle + * @stable ICU 2.0 + */ +U_CAPI UBool U_EXPORT2 +u_isdefined(UChar32 c); + +/** + * Determines if the specified character is a space character or not. + * + * Note: There are several ICU whitespace functions; please see the uchar.h + * file documentation for a detailed comparison. + * + * This is a C/POSIX migration function. + * See the comments about C/POSIX character classification functions in the + * documentation at the top of this header file. + * + * @param c the character to be tested + * @return true if the character is a space character; false otherwise. + * + * @see u_isJavaSpaceChar + * @see u_isWhitespace + * @see u_isUWhiteSpace + * @stable ICU 2.0 + */ +U_CAPI UBool U_EXPORT2 +u_isspace(UChar32 c); + +/** + * Determine if the specified code point is a space character according to Java. + * True for characters with general categories "Z" (separators), + * which does not include control codes (e.g., TAB or Line Feed). + * + * Same as java.lang.Character.isSpaceChar(). + * + * Note: There are several ICU whitespace functions; please see the uchar.h + * file documentation for a detailed comparison. + * + * @param c the code point to be tested + * @return true if the code point is a space character according to Character.isSpaceChar() + * + * @see u_isspace + * @see u_isWhitespace + * @see u_isUWhiteSpace + * @stable ICU 2.6 + */ +U_CAPI UBool U_EXPORT2 +u_isJavaSpaceChar(UChar32 c); + +/** + * Determines if the specified code point is a whitespace character according to Java/ICU. + * A character is considered to be a Java whitespace character if and only + * if it satisfies one of the following criteria: + * + * - It is a Unicode Separator character (categories "Z" = "Zs" or "Zl" or "Zp"), but is not + * also a non-breaking space (U+00A0 NBSP or U+2007 Figure Space or U+202F Narrow NBSP). + * - It is U+0009 HORIZONTAL TABULATION. + * - It is U+000A LINE FEED. + * - It is U+000B VERTICAL TABULATION. + * - It is U+000C FORM FEED. + * - It is U+000D CARRIAGE RETURN. + * - It is U+001C FILE SEPARATOR. + * - It is U+001D GROUP SEPARATOR. + * - It is U+001E RECORD SEPARATOR. + * - It is U+001F UNIT SEPARATOR. + * + * This API tries to sync with the semantics of Java's + * java.lang.Character.isWhitespace(), but it may not return + * the exact same results because of the Unicode version + * difference. + * + * Note: Unicode 4.0.1 changed U+200B ZERO WIDTH SPACE from a Space Separator (Zs) + * to a Format Control (Cf). Since then, isWhitespace(0x200b) returns false. + * See http://www.unicode.org/versions/Unicode4.0.1/ + * + * Note: There are several ICU whitespace functions; please see the uchar.h + * file documentation for a detailed comparison. + * + * @param c the code point to be tested + * @return true if the code point is a whitespace character according to Java/ICU + * + * @see u_isspace + * @see u_isJavaSpaceChar + * @see u_isUWhiteSpace + * @stable ICU 2.0 + */ +U_CAPI UBool U_EXPORT2 +u_isWhitespace(UChar32 c); + +/** + * Determines whether the specified code point is a control character + * (as defined by this function). + * A control character is one of the following: + * - ISO 8-bit control character (U+0000..U+001f and U+007f..U+009f) + * - U_CONTROL_CHAR (Cc) + * - U_FORMAT_CHAR (Cf) + * - U_LINE_SEPARATOR (Zl) + * - U_PARAGRAPH_SEPARATOR (Zp) + * + * This is a C/POSIX migration function. + * See the comments about C/POSIX character classification functions in the + * documentation at the top of this header file. + * + * @param c the code point to be tested + * @return true if the code point is a control character + * + * @see UCHAR_DEFAULT_IGNORABLE_CODE_POINT + * @see u_isprint + * @stable ICU 2.0 + */ +U_CAPI UBool U_EXPORT2 +u_iscntrl(UChar32 c); + +/** + * Determines whether the specified code point is an ISO control code. + * True for U+0000..U+001f and U+007f..U+009f (general category "Cc"). + * + * Same as java.lang.Character.isISOControl(). + * + * @param c the code point to be tested + * @return true if the code point is an ISO control code + * + * @see u_iscntrl + * @stable ICU 2.6 + */ +U_CAPI UBool U_EXPORT2 +u_isISOControl(UChar32 c); + +/** + * Determines whether the specified code point is a printable character. + * True for general categories <em>other</em> than "C" (controls). + * + * This is a C/POSIX migration function. + * See the comments about C/POSIX character classification functions in the + * documentation at the top of this header file. + * + * @param c the code point to be tested + * @return true if the code point is a printable character + * + * @see UCHAR_DEFAULT_IGNORABLE_CODE_POINT + * @see u_iscntrl + * @stable ICU 2.0 + */ +U_CAPI UBool U_EXPORT2 +u_isprint(UChar32 c); + +/** + * Non-standard: Determines whether the specified code point is a base character. + * True for general categories "L" (letters), "N" (numbers), + * "Mc" (spacing combining marks), and "Me" (enclosing marks). + * + * Note that this is different from the Unicode Standard definition in + * chapter 3.6, conformance clause D51 “Base character”, + * which defines base characters as the code points with general categories + * Letter (L), Number (N), Punctuation (P), Symbol (S), or Space Separator (Zs). + * + * @param c the code point to be tested + * @return true if the code point is a base character according to this function + * + * @see u_isalpha + * @see u_isdigit + * @stable ICU 2.0 + */ +U_CAPI UBool U_EXPORT2 +u_isbase(UChar32 c); + +/** + * Returns the bidirectional category value for the code point, + * which is used in the Unicode bidirectional algorithm + * (UAX #9 http://www.unicode.org/reports/tr9/). + * Note that some <em>unassigned</em> code points have bidi values + * of R or AL because they are in blocks that are reserved + * for Right-To-Left scripts. + * + * Same as java.lang.Character.getDirectionality() + * + * @param c the code point to be tested + * @return the bidirectional category (UCharDirection) value + * + * @see UCharDirection + * @stable ICU 2.0 + */ +U_CAPI UCharDirection U_EXPORT2 +u_charDirection(UChar32 c); + +/** + * Determines whether the code point has the Bidi_Mirrored property. + * This property is set for characters that are commonly used in + * Right-To-Left contexts and need to be displayed with a "mirrored" + * glyph. + * + * Same as java.lang.Character.isMirrored(). + * Same as UCHAR_BIDI_MIRRORED + * + * @param c the code point to be tested + * @return true if the character has the Bidi_Mirrored property + * + * @see UCHAR_BIDI_MIRRORED + * @stable ICU 2.0 + */ +U_CAPI UBool U_EXPORT2 +u_isMirrored(UChar32 c); + +/** + * Maps the specified character to a "mirror-image" character. + * For characters with the Bidi_Mirrored property, implementations + * sometimes need a "poor man's" mapping to another Unicode + * character (code point) such that the default glyph may serve + * as the mirror-image of the default glyph of the specified + * character. This is useful for text conversion to and from + * codepages with visual order, and for displays without glyph + * selection capabilities. + * + * @param c the code point to be mapped + * @return another Unicode code point that may serve as a mirror-image + * substitute, or c itself if there is no such mapping or c + * does not have the Bidi_Mirrored property + * + * @see UCHAR_BIDI_MIRRORED + * @see u_isMirrored + * @stable ICU 2.0 + */ +U_CAPI UChar32 U_EXPORT2 +u_charMirror(UChar32 c); + +/** + * Maps the specified character to its paired bracket character. + * For Bidi_Paired_Bracket_Type!=None, this is the same as u_charMirror(). + * Otherwise c itself is returned. + * See http://www.unicode.org/reports/tr9/ + * + * @param c the code point to be mapped + * @return the paired bracket code point, + * or c itself if there is no such mapping + * (Bidi_Paired_Bracket_Type=None) + * + * @see UCHAR_BIDI_PAIRED_BRACKET + * @see UCHAR_BIDI_PAIRED_BRACKET_TYPE + * @see u_charMirror + * @stable ICU 52 + */ +U_CAPI UChar32 U_EXPORT2 +u_getBidiPairedBracket(UChar32 c); + +/** + * Returns the general category value for the code point. + * + * Same as java.lang.Character.getType(). + * + * @param c the code point to be tested + * @return the general category (UCharCategory) value + * + * @see UCharCategory + * @stable ICU 2.0 + */ +U_CAPI int8_t U_EXPORT2 +u_charType(UChar32 c); + +/** + * Get a single-bit bit set for the general category of a character. + * This bit set can be compared bitwise with U_GC_SM_MASK, U_GC_L_MASK, etc. + * Same as U_MASK(u_charType(c)). + * + * @param c the code point to be tested + * @return a single-bit mask corresponding to the general category (UCharCategory) value + * + * @see u_charType + * @see UCharCategory + * @see U_GC_CN_MASK + * @stable ICU 2.1 + */ +#define U_GET_GC_MASK(c) U_MASK(u_charType(c)) + +/** + * Callback from u_enumCharTypes(), is called for each contiguous range + * of code points c (where start<=c<limit) + * with the same Unicode general category ("character type"). + * + * The callback function can stop the enumeration by returning false. + * + * @param context an opaque pointer, as passed into utrie_enum() + * @param start the first code point in a contiguous range with value + * @param limit one past the last code point in a contiguous range with value + * @param type the general category for all code points in [start..limit[ + * @return false to stop the enumeration + * + * @stable ICU 2.1 + * @see UCharCategory + * @see u_enumCharTypes + */ +typedef UBool U_CALLCONV +UCharEnumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type); + +/** + * Enumerate efficiently all code points with their Unicode general categories. + * + * This is useful for building data structures (e.g., UnicodeSet's), + * for enumerating all assigned code points (type!=U_UNASSIGNED), etc. + * + * For each contiguous range of code points with a given general category ("character type"), + * the UCharEnumTypeRange function is called. + * Adjacent ranges have different types. + * The Unicode Standard guarantees that the numeric value of the type is 0..31. + * + * @param enumRange a pointer to a function that is called for each contiguous range + * of code points with the same general category + * @param context an opaque pointer that is passed on to the callback function + * + * @stable ICU 2.1 + * @see UCharCategory + * @see UCharEnumTypeRange + */ +U_CAPI void U_EXPORT2 +u_enumCharTypes(UCharEnumTypeRange *enumRange, const void *context); + +#if !UCONFIG_NO_NORMALIZATION + +/** + * Returns the combining class of the code point as specified in UnicodeData.txt. + * + * @param c the code point of the character + * @return the combining class of the character + * @stable ICU 2.0 + */ +U_CAPI uint8_t U_EXPORT2 +u_getCombiningClass(UChar32 c); + +#endif + +/** + * Returns the decimal digit value of a decimal digit character. + * Such characters have the general category "Nd" (decimal digit numbers) + * and a Numeric_Type of Decimal. + * + * Unlike ICU releases before 2.6, no digit values are returned for any + * Han characters because Han number characters are often used with a special + * Chinese-style number format (with characters for powers of 10 in between) + * instead of in decimal-positional notation. + * Unicode 4 explicitly assigns Han number characters the Numeric_Type + * Numeric instead of Decimal. + * See Jitterbug 1483 for more details. + * + * Use u_getIntPropertyValue(c, UCHAR_NUMERIC_TYPE) and u_getNumericValue() + * for complete numeric Unicode properties. + * + * @param c the code point for which to get the decimal digit value + * @return the decimal digit value of c, + * or -1 if c is not a decimal digit character + * + * @see u_getNumericValue + * @stable ICU 2.0 + */ +U_CAPI int32_t U_EXPORT2 +u_charDigitValue(UChar32 c); + +/** + * Returns the Unicode allocation block that contains the character. + * + * @param c the code point to be tested + * @return the block value (UBlockCode) for c + * + * @see UBlockCode + * @stable ICU 2.0 + */ +U_CAPI UBlockCode U_EXPORT2 +ublock_getCode(UChar32 c); + +/** + * Retrieve the name of a Unicode character. + * Depending on <code>nameChoice</code>, the character name written + * into the buffer is the "modern" name or the name that was defined + * in Unicode version 1.0. + * The name contains only "invariant" characters + * like A-Z, 0-9, space, and '-'. + * Unicode 1.0 names are only retrieved if they are different from the modern + * names and if the data file contains the data for them. gennames may or may + * not be called with a command line option to include 1.0 names in unames.dat. + * + * @param code The character (code point) for which to get the name. + * It must be <code>0<=code<=0x10ffff</code>. + * @param nameChoice Selector for which name to get. + * @param buffer Destination address for copying the name. + * The name will always be zero-terminated. + * If there is no name, then the buffer will be set to the empty string. + * @param bufferLength <code>==sizeof(buffer)</code> + * @param pErrorCode Pointer to a UErrorCode variable; + * check for <code>U_SUCCESS()</code> after <code>u_charName()</code> + * returns. + * @return The length of the name, or 0 if there is no name for this character. + * If the bufferLength is less than or equal to the length, then the buffer + * contains the truncated name and the returned length indicates the full + * length of the name. + * The length does not include the zero-termination. + * + * @see UCharNameChoice + * @see u_charFromName + * @see u_enumCharNames + * @stable ICU 2.0 + */ +U_CAPI int32_t U_EXPORT2 +u_charName(UChar32 code, UCharNameChoice nameChoice, + char *buffer, int32_t bufferLength, + UErrorCode *pErrorCode); + +#ifndef U_HIDE_DEPRECATED_API +/** + * Returns an empty string. + * Used to return the ISO 10646 comment for a character. + * The Unicode ISO_Comment property is deprecated and has no values. + * + * @param c The character (code point) for which to get the ISO comment. + * It must be <code>0<=c<=0x10ffff</code>. + * @param dest Destination address for copying the comment. + * The comment will be zero-terminated if possible. + * If there is no comment, then the buffer will be set to the empty string. + * @param destCapacity <code>==sizeof(dest)</code> + * @param pErrorCode Pointer to a UErrorCode variable; + * check for <code>U_SUCCESS()</code> after <code>u_getISOComment()</code> + * returns. + * @return 0 + * + * @deprecated ICU 49 + */ +U_DEPRECATED int32_t U_EXPORT2 +u_getISOComment(UChar32 c, + char *dest, int32_t destCapacity, + UErrorCode *pErrorCode); +#endif /* U_HIDE_DEPRECATED_API */ + +/** + * Find a Unicode character by its name and return its code point value. + * The name is matched exactly and completely. + * If the name does not correspond to a code point, <i>pErrorCode</i> + * is set to <code>U_INVALID_CHAR_FOUND</code>. + * A Unicode 1.0 name is matched only if it differs from the modern name. + * Unicode names are all uppercase. Extended names are lowercase followed + * by an uppercase hexadecimal number, and within angle brackets. + * + * @param nameChoice Selector for which name to match. + * @param name The name to match. + * @param pErrorCode Pointer to a UErrorCode variable + * @return The Unicode value of the code point with the given name, + * or an undefined value if there is no such code point. + * + * @see UCharNameChoice + * @see u_charName + * @see u_enumCharNames + * @stable ICU 1.7 + */ +U_CAPI UChar32 U_EXPORT2 +u_charFromName(UCharNameChoice nameChoice, + const char *name, + UErrorCode *pErrorCode); + +/** + * Type of a callback function for u_enumCharNames() that gets called + * for each Unicode character with the code point value and + * the character name. + * If such a function returns false, then the enumeration is stopped. + * + * @param context The context pointer that was passed to u_enumCharNames(). + * @param code The Unicode code point for the character with this name. + * @param nameChoice Selector for which kind of names is enumerated. + * @param name The character's name, zero-terminated. + * @param length The length of the name. + * @return true if the enumeration should continue, false to stop it. + * + * @see UCharNameChoice + * @see u_enumCharNames + * @stable ICU 1.7 + */ +typedef UBool U_CALLCONV UEnumCharNamesFn(void *context, + UChar32 code, + UCharNameChoice nameChoice, + const char *name, + int32_t length); + +/** + * Enumerate all assigned Unicode characters between the start and limit + * code points (start inclusive, limit exclusive) and call a function + * for each, passing the code point value and the character name. + * For Unicode 1.0 names, only those are enumerated that differ from the + * modern names. + * + * @param start The first code point in the enumeration range. + * @param limit One more than the last code point in the enumeration range + * (the first one after the range). + * @param fn The function that is to be called for each character name. + * @param context An arbitrary pointer that is passed to the function. + * @param nameChoice Selector for which kind of names to enumerate. + * @param pErrorCode Pointer to a UErrorCode variable + * + * @see UCharNameChoice + * @see UEnumCharNamesFn + * @see u_charName + * @see u_charFromName + * @stable ICU 1.7 + */ +U_CAPI void U_EXPORT2 +u_enumCharNames(UChar32 start, UChar32 limit, + UEnumCharNamesFn *fn, + void *context, + UCharNameChoice nameChoice, + UErrorCode *pErrorCode); + +/** + * Return the Unicode name for a given property, as given in the + * Unicode database file PropertyAliases.txt. + * + * In addition, this function maps the property + * UCHAR_GENERAL_CATEGORY_MASK to the synthetic names "gcm" / + * "General_Category_Mask". These names are not in + * PropertyAliases.txt. + * + * @param property UProperty selector other than UCHAR_INVALID_CODE. + * If out of range, NULL is returned. + * + * @param nameChoice selector for which name to get. If out of range, + * NULL is returned. All properties have a long name. Most + * have a short name, but some do not. Unicode allows for + * additional names; if present these will be returned by + * U_LONG_PROPERTY_NAME + i, where i=1, 2,... + * + * @return a pointer to the name, or NULL if either the + * property or the nameChoice is out of range. If a given + * nameChoice returns NULL, then all larger values of + * nameChoice will return NULL, with one exception: if NULL is + * returned for U_SHORT_PROPERTY_NAME, then + * U_LONG_PROPERTY_NAME (and higher) may still return a + * non-NULL value. The returned pointer is valid until + * u_cleanup() is called. + * + * @see UProperty + * @see UPropertyNameChoice + * @stable ICU 2.4 + */ +U_CAPI const char* U_EXPORT2 +u_getPropertyName(UProperty property, + UPropertyNameChoice nameChoice); + +/** + * Return the UProperty enum for a given property name, as specified + * in the Unicode database file PropertyAliases.txt. Short, long, and + * any other variants are recognized. + * + * In addition, this function maps the synthetic names "gcm" / + * "General_Category_Mask" to the property + * UCHAR_GENERAL_CATEGORY_MASK. These names are not in + * PropertyAliases.txt. + * + * @param alias the property name to be matched. The name is compared + * using "loose matching" as described in PropertyAliases.txt. + * + * @return a UProperty enum, or UCHAR_INVALID_CODE if the given name + * does not match any property. + * + * @see UProperty + * @stable ICU 2.4 + */ +U_CAPI UProperty U_EXPORT2 +u_getPropertyEnum(const char* alias); + +/** + * Return the Unicode name for a given property value, as given in the + * Unicode database file PropertyValueAliases.txt. + * + * Note: Some of the names in PropertyValueAliases.txt can only be + * retrieved using UCHAR_GENERAL_CATEGORY_MASK, not + * UCHAR_GENERAL_CATEGORY. These include: "C" / "Other", "L" / + * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P" + * / "Punctuation", "S" / "Symbol", and "Z" / "Separator". + * + * @param property UProperty selector constant. + * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT + * or UCHAR_INT_START<=which<UCHAR_INT_LIMIT + * or UCHAR_MASK_START<=which<UCHAR_MASK_LIMIT. + * If out of range, NULL is returned. + * + * @param value selector for a value for the given property. If out + * of range, NULL is returned. In general, valid values range + * from 0 up to some maximum. There are a few exceptions: + * (1.) UCHAR_BLOCK values begin at the non-zero value + * UBLOCK_BASIC_LATIN. (2.) UCHAR_CANONICAL_COMBINING_CLASS + * values are not contiguous and range from 0..240. (3.) + * UCHAR_GENERAL_CATEGORY_MASK values are not values of + * UCharCategory, but rather mask values produced by + * U_GET_GC_MASK(). This allows grouped categories such as + * [:L:] to be represented. Mask values range + * non-contiguously from 1..U_GC_P_MASK. + * + * @param nameChoice selector for which name to get. If out of range, + * NULL is returned. All values have a long name. Most have + * a short name, but some do not. Unicode allows for + * additional names; if present these will be returned by + * U_LONG_PROPERTY_NAME + i, where i=1, 2,... + + * @return a pointer to the name, or NULL if either the + * property or the nameChoice is out of range. If a given + * nameChoice returns NULL, then all larger values of + * nameChoice will return NULL, with one exception: if NULL is + * returned for U_SHORT_PROPERTY_NAME, then + * U_LONG_PROPERTY_NAME (and higher) may still return a + * non-NULL value. The returned pointer is valid until + * u_cleanup() is called. + * + * @see UProperty + * @see UPropertyNameChoice + * @stable ICU 2.4 + */ +U_CAPI const char* U_EXPORT2 +u_getPropertyValueName(UProperty property, + int32_t value, + UPropertyNameChoice nameChoice); + +/** + * Return the property value integer for a given value name, as + * specified in the Unicode database file PropertyValueAliases.txt. + * Short, long, and any other variants are recognized. + * + * Note: Some of the names in PropertyValueAliases.txt will only be + * recognized with UCHAR_GENERAL_CATEGORY_MASK, not + * UCHAR_GENERAL_CATEGORY. These include: "C" / "Other", "L" / + * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P" + * / "Punctuation", "S" / "Symbol", and "Z" / "Separator". + * + * @param property UProperty selector constant. + * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT + * or UCHAR_INT_START<=which<UCHAR_INT_LIMIT + * or UCHAR_MASK_START<=which<UCHAR_MASK_LIMIT. + * If out of range, UCHAR_INVALID_CODE is returned. + * + * @param alias the value name to be matched. The name is compared + * using "loose matching" as described in + * PropertyValueAliases.txt. + * + * @return a value integer or UCHAR_INVALID_CODE if the given name + * does not match any value of the given property, or if the + * property is invalid. Note: UCHAR_GENERAL_CATEGORY_MASK values + * are not values of UCharCategory, but rather mask values + * produced by U_GET_GC_MASK(). This allows grouped + * categories such as [:L:] to be represented. + * + * @see UProperty + * @stable ICU 2.4 + */ +U_CAPI int32_t U_EXPORT2 +u_getPropertyValueEnum(UProperty property, + const char* alias); + +/** + * Determines if the specified character is permissible as the + * first character in an identifier according to Unicode + * (The Unicode Standard, Version 3.0, chapter 5.16 Identifiers). + * True for characters with general categories "L" (letters) and "Nl" (letter numbers). + * + * Same as java.lang.Character.isUnicodeIdentifierStart(). + * Same as UCHAR_ID_START + * + * @param c the code point to be tested + * @return true if the code point may start an identifier + * + * @see UCHAR_ID_START + * @see u_isalpha + * @see u_isIDPart + * @stable ICU 2.0 + */ +U_CAPI UBool U_EXPORT2 +u_isIDStart(UChar32 c); + +/** + * Determines if the specified character is permissible + * in an identifier according to Java. + * True for characters with general categories "L" (letters), + * "Nl" (letter numbers), "Nd" (decimal digits), + * "Mc" and "Mn" (combining marks), "Pc" (connecting punctuation), and + * u_isIDIgnorable(c). + * + * Same as java.lang.Character.isUnicodeIdentifierPart(). + * Almost the same as Unicode's ID_Continue (UCHAR_ID_CONTINUE) + * except that Unicode recommends to ignore Cf which is less than + * u_isIDIgnorable(c). + * + * @param c the code point to be tested + * @return true if the code point may occur in an identifier according to Java + * + * @see UCHAR_ID_CONTINUE + * @see u_isIDStart + * @see u_isIDIgnorable + * @stable ICU 2.0 + */ +U_CAPI UBool U_EXPORT2 +u_isIDPart(UChar32 c); + +/** + * Determines if the specified character should be regarded + * as an ignorable character in an identifier, + * according to Java. + * True for characters with general category "Cf" (format controls) as well as + * non-whitespace ISO controls + * (U+0000..U+0008, U+000E..U+001B, U+007F..U+009F). + * + * Same as java.lang.Character.isIdentifierIgnorable(). + * + * Note that Unicode just recommends to ignore Cf (format controls). + * + * @param c the code point to be tested + * @return true if the code point is ignorable in identifiers according to Java + * + * @see UCHAR_DEFAULT_IGNORABLE_CODE_POINT + * @see u_isIDStart + * @see u_isIDPart + * @stable ICU 2.0 + */ +U_CAPI UBool U_EXPORT2 +u_isIDIgnorable(UChar32 c); + +/** + * Determines if the specified character is permissible as the + * first character in a Java identifier. + * In addition to u_isIDStart(c), true for characters with + * general categories "Sc" (currency symbols) and "Pc" (connecting punctuation). + * + * Same as java.lang.Character.isJavaIdentifierStart(). + * + * @param c the code point to be tested + * @return true if the code point may start a Java identifier + * + * @see u_isJavaIDPart + * @see u_isalpha + * @see u_isIDStart + * @stable ICU 2.0 + */ +U_CAPI UBool U_EXPORT2 +u_isJavaIDStart(UChar32 c); + +/** + * Determines if the specified character is permissible + * in a Java identifier. + * In addition to u_isIDPart(c), true for characters with + * general category "Sc" (currency symbols). + * + * Same as java.lang.Character.isJavaIdentifierPart(). + * + * @param c the code point to be tested + * @return true if the code point may occur in a Java identifier + * + * @see u_isIDIgnorable + * @see u_isJavaIDStart + * @see u_isalpha + * @see u_isdigit + * @see u_isIDPart + * @stable ICU 2.0 + */ +U_CAPI UBool U_EXPORT2 +u_isJavaIDPart(UChar32 c); + +/** + * The given character is mapped to its lowercase equivalent according to + * UnicodeData.txt; if the character has no lowercase equivalent, the character + * itself is returned. + * + * Same as java.lang.Character.toLowerCase(). + * + * This function only returns the simple, single-code point case mapping. + * Full case mappings should be used whenever possible because they produce + * better results by working on whole strings. + * They take into account the string context and the language and can map + * to a result string with a different length as appropriate. + * Full case mappings are applied by the string case mapping functions, + * see ustring.h and the UnicodeString class. + * See also the User Guide chapter on C/POSIX migration: + * http://icu-project.org/userguide/posix.html#case_mappings + * + * @param c the code point to be mapped + * @return the Simple_Lowercase_Mapping of the code point, if any; + * otherwise the code point itself. + * @stable ICU 2.0 + */ +U_CAPI UChar32 U_EXPORT2 +u_tolower(UChar32 c); + +/** + * The given character is mapped to its uppercase equivalent according to UnicodeData.txt; + * if the character has no uppercase equivalent, the character itself is + * returned. + * + * Same as java.lang.Character.toUpperCase(). + * + * This function only returns the simple, single-code point case mapping. + * Full case mappings should be used whenever possible because they produce + * better results by working on whole strings. + * They take into account the string context and the language and can map + * to a result string with a different length as appropriate. + * Full case mappings are applied by the string case mapping functions, + * see ustring.h and the UnicodeString class. + * See also the User Guide chapter on C/POSIX migration: + * http://icu-project.org/userguide/posix.html#case_mappings + * + * @param c the code point to be mapped + * @return the Simple_Uppercase_Mapping of the code point, if any; + * otherwise the code point itself. + * @stable ICU 2.0 + */ +U_CAPI UChar32 U_EXPORT2 +u_toupper(UChar32 c); + +/** + * The given character is mapped to its titlecase equivalent + * according to UnicodeData.txt; + * if none is defined, the character itself is returned. + * + * Same as java.lang.Character.toTitleCase(). + * + * This function only returns the simple, single-code point case mapping. + * Full case mappings should be used whenever possible because they produce + * better results by working on whole strings. + * They take into account the string context and the language and can map + * to a result string with a different length as appropriate. + * Full case mappings are applied by the string case mapping functions, + * see ustring.h and the UnicodeString class. + * See also the User Guide chapter on C/POSIX migration: + * http://icu-project.org/userguide/posix.html#case_mappings + * + * @param c the code point to be mapped + * @return the Simple_Titlecase_Mapping of the code point, if any; + * otherwise the code point itself. + * @stable ICU 2.0 + */ +U_CAPI UChar32 U_EXPORT2 +u_totitle(UChar32 c); + +/** + * The given character is mapped to its case folding equivalent according to + * UnicodeData.txt and CaseFolding.txt; + * if the character has no case folding equivalent, the character + * itself is returned. + * + * This function only returns the simple, single-code point case mapping. + * Full case mappings should be used whenever possible because they produce + * better results by working on whole strings. + * They take into account the string context and the language and can map + * to a result string with a different length as appropriate. + * Full case mappings are applied by the string case mapping functions, + * see ustring.h and the UnicodeString class. + * See also the User Guide chapter on C/POSIX migration: + * http://icu-project.org/userguide/posix.html#case_mappings + * + * @param c the code point to be mapped + * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I + * @return the Simple_Case_Folding of the code point, if any; + * otherwise the code point itself. + * @stable ICU 2.0 + */ +U_CAPI UChar32 U_EXPORT2 +u_foldCase(UChar32 c, uint32_t options); + +/** + * Returns the decimal digit value of the code point in the + * specified radix. + * + * If the radix is not in the range <code>2<=radix<=36</code> or if the + * value of <code>c</code> is not a valid digit in the specified + * radix, <code>-1</code> is returned. A character is a valid digit + * if at least one of the following is true: + * <ul> + * <li>The character has a decimal digit value. + * Such characters have the general category "Nd" (decimal digit numbers) + * and a Numeric_Type of Decimal. + * In this case the value is the character's decimal digit value.</li> + * <li>The character is one of the uppercase Latin letters + * <code>'A'</code> through <code>'Z'</code>. + * In this case the value is <code>c-'A'+10</code>.</li> + * <li>The character is one of the lowercase Latin letters + * <code>'a'</code> through <code>'z'</code>. + * In this case the value is <code>ch-'a'+10</code>.</li> + * <li>Latin letters from both the ASCII range (0061..007A, 0041..005A) + * as well as from the Fullwidth ASCII range (FF41..FF5A, FF21..FF3A) + * are recognized.</li> + * </ul> + * + * Same as java.lang.Character.digit(). + * + * @param ch the code point to be tested. + * @param radix the radix. + * @return the numeric value represented by the character in the + * specified radix, + * or -1 if there is no value or if the value exceeds the radix. + * + * @see UCHAR_NUMERIC_TYPE + * @see u_forDigit + * @see u_charDigitValue + * @see u_isdigit + * @stable ICU 2.0 + */ +U_CAPI int32_t U_EXPORT2 +u_digit(UChar32 ch, int8_t radix); + +/** + * Determines the character representation for a specific digit in + * the specified radix. If the value of <code>radix</code> is not a + * valid radix, or the value of <code>digit</code> is not a valid + * digit in the specified radix, the null character + * (<code>U+0000</code>) is returned. + * <p> + * The <code>radix</code> argument is valid if it is greater than or + * equal to 2 and less than or equal to 36. + * The <code>digit</code> argument is valid if + * <code>0 <= digit < radix</code>. + * <p> + * If the digit is less than 10, then + * <code>'0' + digit</code> is returned. Otherwise, the value + * <code>'a' + digit - 10</code> is returned. + * + * Same as java.lang.Character.forDigit(). + * + * @param digit the number to convert to a character. + * @param radix the radix. + * @return the <code>char</code> representation of the specified digit + * in the specified radix. + * + * @see u_digit + * @see u_charDigitValue + * @see u_isdigit + * @stable ICU 2.0 + */ +U_CAPI UChar32 U_EXPORT2 +u_forDigit(int32_t digit, int8_t radix); + +/** + * Get the "age" of the code point. + * The "age" is the Unicode version when the code point was first + * designated (as a non-character or for Private Use) + * or assigned a character. + * This can be useful to avoid emitting code points to receiving + * processes that do not accept newer characters. + * The data is from the UCD file DerivedAge.txt. + * + * @param c The code point. + * @param versionArray The Unicode version number array, to be filled in. + * + * @stable ICU 2.1 + */ +U_CAPI void U_EXPORT2 +u_charAge(UChar32 c, UVersionInfo versionArray); + +/** + * Gets the Unicode version information. + * The version array is filled in with the version information + * for the Unicode standard that is currently used by ICU. + * For example, Unicode version 3.1.1 is represented as an array with + * the values { 3, 1, 1, 0 }. + * + * @param versionArray an output array that will be filled in with + * the Unicode version number + * @stable ICU 2.0 + */ +U_CAPI void U_EXPORT2 +u_getUnicodeVersion(UVersionInfo versionArray); + +#if !UCONFIG_NO_NORMALIZATION +/** + * Get the FC_NFKC_Closure property string for a character. + * See Unicode Standard Annex #15 for details, search for "FC_NFKC_Closure" + * or for "FNC": http://www.unicode.org/reports/tr15/ + * + * @param c The character (code point) for which to get the FC_NFKC_Closure string. + * It must be <code>0<=c<=0x10ffff</code>. + * @param dest Destination address for copying the string. + * The string will be zero-terminated if possible. + * If there is no FC_NFKC_Closure string, + * then the buffer will be set to the empty string. + * @param destCapacity <code>==sizeof(dest)</code> + * @param pErrorCode Pointer to a UErrorCode variable. + * @return The length of the string, or 0 if there is no FC_NFKC_Closure string for this character. + * If the destCapacity is less than or equal to the length, then the buffer + * contains the truncated name and the returned length indicates the full + * length of the name. + * The length does not include the zero-termination. + * + * @stable ICU 2.2 + */ +U_CAPI int32_t U_EXPORT2 +u_getFC_NFKC_Closure(UChar32 c, UChar *dest, int32_t destCapacity, UErrorCode *pErrorCode); + +#endif + + +U_CDECL_END + +#endif /*_UCHAR*/ +/*eof*/ diff --git a/thirdparty/icu4c/common/unicode/ucharstrie.h b/thirdparty/icu4c/common/unicode/ucharstrie.h new file mode 100644 index 0000000000..b6f9e3e075 --- /dev/null +++ b/thirdparty/icu4c/common/unicode/ucharstrie.h @@ -0,0 +1,623 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2010-2012, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* file name: ucharstrie.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2010nov14 +* created by: Markus W. Scherer +*/ + +#ifndef __UCHARSTRIE_H__ +#define __UCHARSTRIE_H__ + +/** + * \file + * \brief C++ API: Trie for mapping Unicode strings (or 16-bit-unit sequences) + * to integer values. + */ + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + +#include "unicode/unistr.h" +#include "unicode/uobject.h" +#include "unicode/ustringtrie.h" + +U_NAMESPACE_BEGIN + +class Appendable; +class UCharsTrieBuilder; +class UVector32; + +/** + * Light-weight, non-const reader class for a UCharsTrie. + * Traverses a char16_t-serialized data structure with minimal state, + * for mapping strings (16-bit-unit sequences) to non-negative integer values. + * + * This class owns the serialized trie data only if it was constructed by + * the builder's build() method. + * The public constructor and the copy constructor only alias the data (only copy the pointer). + * There is no assignment operator. + * + * This class is not intended for public subclassing. + * @stable ICU 4.8 + */ +class U_COMMON_API UCharsTrie : public UMemory { +public: + /** + * Constructs a UCharsTrie reader instance. + * + * The trieUChars must contain a copy of a char16_t sequence from the UCharsTrieBuilder, + * starting with the first char16_t of that sequence. + * The UCharsTrie object will not read more char16_ts than + * the UCharsTrieBuilder generated in the corresponding build() call. + * + * The array is not copied/cloned and must not be modified while + * the UCharsTrie object is in use. + * + * @param trieUChars The char16_t array that contains the serialized trie. + * @stable ICU 4.8 + */ + UCharsTrie(ConstChar16Ptr trieUChars) + : ownedArray_(NULL), uchars_(trieUChars), + pos_(uchars_), remainingMatchLength_(-1) {} + + /** + * Destructor. + * @stable ICU 4.8 + */ + ~UCharsTrie(); + + /** + * Copy constructor, copies the other trie reader object and its state, + * but not the char16_t array which will be shared. (Shallow copy.) + * @param other Another UCharsTrie object. + * @stable ICU 4.8 + */ + UCharsTrie(const UCharsTrie &other) + : ownedArray_(NULL), uchars_(other.uchars_), + pos_(other.pos_), remainingMatchLength_(other.remainingMatchLength_) {} + + /** + * Resets this trie to its initial state. + * @return *this + * @stable ICU 4.8 + */ + UCharsTrie &reset() { + pos_=uchars_; + remainingMatchLength_=-1; + return *this; + } + + /** + * Returns the state of this trie as a 64-bit integer. + * The state value is never 0. + * + * @return opaque state value + * @see resetToState64 + * @stable ICU 65 + */ + uint64_t getState64() const { + return (static_cast<uint64_t>(remainingMatchLength_ + 2) << kState64RemainingShift) | + (uint64_t)(pos_ - uchars_); + } + + /** + * Resets this trie to the saved state. + * Unlike resetToState(State), the 64-bit state value + * must be from getState64() from the same trie object or + * from one initialized the exact same way. + * Because of no validation, this method is faster. + * + * @param state The opaque trie state value from getState64(). + * @return *this + * @see getState64 + * @see resetToState + * @see reset + * @stable ICU 65 + */ + UCharsTrie &resetToState64(uint64_t state) { + remainingMatchLength_ = static_cast<int32_t>(state >> kState64RemainingShift) - 2; + pos_ = uchars_ + (state & kState64PosMask); + return *this; + } + + /** + * UCharsTrie state object, for saving a trie's current state + * and resetting the trie back to this state later. + * @stable ICU 4.8 + */ + class State : public UMemory { + public: + /** + * Constructs an empty State. + * @stable ICU 4.8 + */ + State() { uchars=NULL; } + private: + friend class UCharsTrie; + + const char16_t *uchars; + const char16_t *pos; + int32_t remainingMatchLength; + }; + + /** + * Saves the state of this trie. + * @param state The State object to hold the trie's state. + * @return *this + * @see resetToState + * @stable ICU 4.8 + */ + const UCharsTrie &saveState(State &state) const { + state.uchars=uchars_; + state.pos=pos_; + state.remainingMatchLength=remainingMatchLength_; + return *this; + } + + /** + * Resets this trie to the saved state. + * If the state object contains no state, or the state of a different trie, + * then this trie remains unchanged. + * @param state The State object which holds a saved trie state. + * @return *this + * @see saveState + * @see reset + * @stable ICU 4.8 + */ + UCharsTrie &resetToState(const State &state) { + if(uchars_==state.uchars && uchars_!=NULL) { + pos_=state.pos; + remainingMatchLength_=state.remainingMatchLength; + } + return *this; + } + + /** + * Determines whether the string so far matches, whether it has a value, + * and whether another input char16_t can continue a matching string. + * @return The match/value Result. + * @stable ICU 4.8 + */ + UStringTrieResult current() const; + + /** + * Traverses the trie from the initial state for this input char16_t. + * Equivalent to reset().next(uchar). + * @param uchar Input char value. Values below 0 and above 0xffff will never match. + * @return The match/value Result. + * @stable ICU 4.8 + */ + inline UStringTrieResult first(int32_t uchar) { + remainingMatchLength_=-1; + return nextImpl(uchars_, uchar); + } + + /** + * Traverses the trie from the initial state for the + * one or two UTF-16 code units for this input code point. + * Equivalent to reset().nextForCodePoint(cp). + * @param cp A Unicode code point 0..0x10ffff. + * @return The match/value Result. + * @stable ICU 4.8 + */ + UStringTrieResult firstForCodePoint(UChar32 cp); + + /** + * Traverses the trie from the current state for this input char16_t. + * @param uchar Input char value. Values below 0 and above 0xffff will never match. + * @return The match/value Result. + * @stable ICU 4.8 + */ + UStringTrieResult next(int32_t uchar); + + /** + * Traverses the trie from the current state for the + * one or two UTF-16 code units for this input code point. + * @param cp A Unicode code point 0..0x10ffff. + * @return The match/value Result. + * @stable ICU 4.8 + */ + UStringTrieResult nextForCodePoint(UChar32 cp); + + /** + * Traverses the trie from the current state for this string. + * Equivalent to + * \code + * Result result=current(); + * for(each c in s) + * if(!USTRINGTRIE_HAS_NEXT(result)) return USTRINGTRIE_NO_MATCH; + * result=next(c); + * return result; + * \endcode + * @param s A string. Can be NULL if length is 0. + * @param length The length of the string. Can be -1 if NUL-terminated. + * @return The match/value Result. + * @stable ICU 4.8 + */ + UStringTrieResult next(ConstChar16Ptr s, int32_t length); + + /** + * Returns a matching string's value if called immediately after + * current()/first()/next() returned USTRINGTRIE_INTERMEDIATE_VALUE or USTRINGTRIE_FINAL_VALUE. + * getValue() can be called multiple times. + * + * Do not call getValue() after USTRINGTRIE_NO_MATCH or USTRINGTRIE_NO_VALUE! + * @return The value for the string so far. + * @stable ICU 4.8 + */ + inline int32_t getValue() const { + const char16_t *pos=pos_; + int32_t leadUnit=*pos++; + // U_ASSERT(leadUnit>=kMinValueLead); + return leadUnit&kValueIsFinal ? + readValue(pos, leadUnit&0x7fff) : readNodeValue(pos, leadUnit); + } + + /** + * Determines whether all strings reachable from the current state + * map to the same value. + * @param uniqueValue Receives the unique value, if this function returns true. + * (output-only) + * @return true if all strings reachable from the current state + * map to the same value. + * @stable ICU 4.8 + */ + inline UBool hasUniqueValue(int32_t &uniqueValue) const { + const char16_t *pos=pos_; + // Skip the rest of a pending linear-match node. + return pos!=NULL && findUniqueValue(pos+remainingMatchLength_+1, false, uniqueValue); + } + + /** + * Finds each char16_t which continues the string from the current state. + * That is, each char16_t c for which it would be next(c)!=USTRINGTRIE_NO_MATCH now. + * @param out Each next char16_t is appended to this object. + * @return the number of char16_ts which continue the string from here + * @stable ICU 4.8 + */ + int32_t getNextUChars(Appendable &out) const; + + /** + * Iterator for all of the (string, value) pairs in a UCharsTrie. + * @stable ICU 4.8 + */ + class U_COMMON_API Iterator : public UMemory { + public: + /** + * Iterates from the root of a char16_t-serialized UCharsTrie. + * @param trieUChars The trie char16_ts. + * @param maxStringLength If 0, the iterator returns full strings. + * Otherwise, the iterator returns strings with this maximum length. + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @stable ICU 4.8 + */ + Iterator(ConstChar16Ptr trieUChars, int32_t maxStringLength, UErrorCode &errorCode); + + /** + * Iterates from the current state of the specified UCharsTrie. + * @param trie The trie whose state will be copied for iteration. + * @param maxStringLength If 0, the iterator returns full strings. + * Otherwise, the iterator returns strings with this maximum length. + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @stable ICU 4.8 + */ + Iterator(const UCharsTrie &trie, int32_t maxStringLength, UErrorCode &errorCode); + + /** + * Destructor. + * @stable ICU 4.8 + */ + ~Iterator(); + + /** + * Resets this iterator to its initial state. + * @return *this + * @stable ICU 4.8 + */ + Iterator &reset(); + + /** + * @return true if there are more elements. + * @stable ICU 4.8 + */ + UBool hasNext() const; + + /** + * Finds the next (string, value) pair if there is one. + * + * If the string is truncated to the maximum length and does not + * have a real value, then the value is set to -1. + * In this case, this "not a real value" is indistinguishable from + * a real value of -1. + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return true if there is another element. + * @stable ICU 4.8 + */ + UBool next(UErrorCode &errorCode); + + /** + * @return The string for the last successful next(). + * @stable ICU 4.8 + */ + const UnicodeString &getString() const { return str_; } + /** + * @return The value for the last successful next(). + * @stable ICU 4.8 + */ + int32_t getValue() const { return value_; } + + private: + UBool truncateAndStop() { + pos_=NULL; + value_=-1; // no real value for str + return true; + } + + const char16_t *branchNext(const char16_t *pos, int32_t length, UErrorCode &errorCode); + + const char16_t *uchars_; + const char16_t *pos_; + const char16_t *initialPos_; + int32_t remainingMatchLength_; + int32_t initialRemainingMatchLength_; + UBool skipValue_; // Skip intermediate value which was already delivered. + + UnicodeString str_; + int32_t maxLength_; + int32_t value_; + + // The stack stores pairs of integers for backtracking to another + // outbound edge of a branch node. + // The first integer is an offset from uchars_. + // The second integer has the str_.length() from before the node in bits 15..0, + // and the remaining branch length in bits 31..16. + // (We could store the remaining branch length minus 1 in bits 30..16 and not use the sign bit, + // but the code looks more confusing that way.) + UVector32 *stack_; + }; + +private: + friend class UCharsTrieBuilder; + + /** + * Constructs a UCharsTrie reader instance. + * Unlike the public constructor which just aliases an array, + * this constructor adopts the builder's array. + * This constructor is only called by the builder. + */ + UCharsTrie(char16_t *adoptUChars, const char16_t *trieUChars) + : ownedArray_(adoptUChars), uchars_(trieUChars), + pos_(uchars_), remainingMatchLength_(-1) {} + + // No assignment operator. + UCharsTrie &operator=(const UCharsTrie &other); + + inline void stop() { + pos_=NULL; + } + + // Reads a compact 32-bit integer. + // pos is already after the leadUnit, and the lead unit has bit 15 reset. + static inline int32_t readValue(const char16_t *pos, int32_t leadUnit) { + int32_t value; + if(leadUnit<kMinTwoUnitValueLead) { + value=leadUnit; + } else if(leadUnit<kThreeUnitValueLead) { + value=((leadUnit-kMinTwoUnitValueLead)<<16)|*pos; + } else { + value=(pos[0]<<16)|pos[1]; + } + return value; + } + static inline const char16_t *skipValue(const char16_t *pos, int32_t leadUnit) { + if(leadUnit>=kMinTwoUnitValueLead) { + if(leadUnit<kThreeUnitValueLead) { + ++pos; + } else { + pos+=2; + } + } + return pos; + } + static inline const char16_t *skipValue(const char16_t *pos) { + int32_t leadUnit=*pos++; + return skipValue(pos, leadUnit&0x7fff); + } + + static inline int32_t readNodeValue(const char16_t *pos, int32_t leadUnit) { + // U_ASSERT(kMinValueLead<=leadUnit && leadUnit<kValueIsFinal); + int32_t value; + if(leadUnit<kMinTwoUnitNodeValueLead) { + value=(leadUnit>>6)-1; + } else if(leadUnit<kThreeUnitNodeValueLead) { + value=(((leadUnit&0x7fc0)-kMinTwoUnitNodeValueLead)<<10)|*pos; + } else { + value=(pos[0]<<16)|pos[1]; + } + return value; + } + static inline const char16_t *skipNodeValue(const char16_t *pos, int32_t leadUnit) { + // U_ASSERT(kMinValueLead<=leadUnit && leadUnit<kValueIsFinal); + if(leadUnit>=kMinTwoUnitNodeValueLead) { + if(leadUnit<kThreeUnitNodeValueLead) { + ++pos; + } else { + pos+=2; + } + } + return pos; + } + + static inline const char16_t *jumpByDelta(const char16_t *pos) { + int32_t delta=*pos++; + if(delta>=kMinTwoUnitDeltaLead) { + if(delta==kThreeUnitDeltaLead) { + delta=(pos[0]<<16)|pos[1]; + pos+=2; + } else { + delta=((delta-kMinTwoUnitDeltaLead)<<16)|*pos++; + } + } + return pos+delta; + } + + static const char16_t *skipDelta(const char16_t *pos) { + int32_t delta=*pos++; + if(delta>=kMinTwoUnitDeltaLead) { + if(delta==kThreeUnitDeltaLead) { + pos+=2; + } else { + ++pos; + } + } + return pos; + } + + static inline UStringTrieResult valueResult(int32_t node) { + return (UStringTrieResult)(USTRINGTRIE_INTERMEDIATE_VALUE-(node>>15)); + } + + // Handles a branch node for both next(uchar) and next(string). + UStringTrieResult branchNext(const char16_t *pos, int32_t length, int32_t uchar); + + // Requires remainingLength_<0. + UStringTrieResult nextImpl(const char16_t *pos, int32_t uchar); + + // Helper functions for hasUniqueValue(). + // Recursively finds a unique value (or whether there is not a unique one) + // from a branch. + static const char16_t *findUniqueValueFromBranch(const char16_t *pos, int32_t length, + UBool haveUniqueValue, int32_t &uniqueValue); + // Recursively finds a unique value (or whether there is not a unique one) + // starting from a position on a node lead unit. + static UBool findUniqueValue(const char16_t *pos, UBool haveUniqueValue, int32_t &uniqueValue); + + // Helper functions for getNextUChars(). + // getNextUChars() when pos is on a branch node. + static void getNextBranchUChars(const char16_t *pos, int32_t length, Appendable &out); + + // UCharsTrie data structure + // + // The trie consists of a series of char16_t-serialized nodes for incremental + // Unicode string/char16_t sequence matching. (char16_t=16-bit unsigned integer) + // The root node is at the beginning of the trie data. + // + // Types of nodes are distinguished by their node lead unit ranges. + // After each node, except a final-value node, another node follows to + // encode match values or continue matching further units. + // + // Node types: + // - Final-value node: Stores a 32-bit integer in a compact, variable-length format. + // The value is for the string/char16_t sequence so far. + // - Match node, optionally with an intermediate value in a different compact format. + // The value, if present, is for the string/char16_t sequence so far. + // + // Aside from the value, which uses the node lead unit's high bits: + // + // - Linear-match node: Matches a number of units. + // - Branch node: Branches to other nodes according to the current input unit. + // The node unit is the length of the branch (number of units to select from) + // minus 1. It is followed by a sub-node: + // - If the length is at most kMaxBranchLinearSubNodeLength, then + // there are length-1 (key, value) pairs and then one more comparison unit. + // If one of the key units matches, then the value is either a final value for + // the string so far, or a "jump" delta to the next node. + // If the last unit matches, then matching continues with the next node. + // (Values have the same encoding as final-value nodes.) + // - If the length is greater than kMaxBranchLinearSubNodeLength, then + // there is one unit and one "jump" delta. + // If the input unit is less than the sub-node unit, then "jump" by delta to + // the next sub-node which will have a length of length/2. + // (The delta has its own compact encoding.) + // Otherwise, skip the "jump" delta to the next sub-node + // which will have a length of length-length/2. + + // Match-node lead unit values, after masking off intermediate-value bits: + + // 0000..002f: Branch node. If node!=0 then the length is node+1, otherwise + // the length is one more than the next unit. + + // For a branch sub-node with at most this many entries, we drop down + // to a linear search. + static const int32_t kMaxBranchLinearSubNodeLength=5; + + // 0030..003f: Linear-match node, match 1..16 units and continue reading the next node. + static const int32_t kMinLinearMatch=0x30; + static const int32_t kMaxLinearMatchLength=0x10; + + // Match-node lead unit bits 14..6 for the optional intermediate value. + // If these bits are 0, then there is no intermediate value. + // Otherwise, see the *NodeValue* constants below. + static const int32_t kMinValueLead=kMinLinearMatch+kMaxLinearMatchLength; // 0x0040 + static const int32_t kNodeTypeMask=kMinValueLead-1; // 0x003f + + // A final-value node has bit 15 set. + static const int32_t kValueIsFinal=0x8000; + + // Compact value: After testing and masking off bit 15, use the following thresholds. + static const int32_t kMaxOneUnitValue=0x3fff; + + static const int32_t kMinTwoUnitValueLead=kMaxOneUnitValue+1; // 0x4000 + static const int32_t kThreeUnitValueLead=0x7fff; + + static const int32_t kMaxTwoUnitValue=((kThreeUnitValueLead-kMinTwoUnitValueLead)<<16)-1; // 0x3ffeffff + + // Compact intermediate-value integer, lead unit shared with a branch or linear-match node. + static const int32_t kMaxOneUnitNodeValue=0xff; + static const int32_t kMinTwoUnitNodeValueLead=kMinValueLead+((kMaxOneUnitNodeValue+1)<<6); // 0x4040 + static const int32_t kThreeUnitNodeValueLead=0x7fc0; + + static const int32_t kMaxTwoUnitNodeValue= + ((kThreeUnitNodeValueLead-kMinTwoUnitNodeValueLead)<<10)-1; // 0xfdffff + + // Compact delta integers. + static const int32_t kMaxOneUnitDelta=0xfbff; + static const int32_t kMinTwoUnitDeltaLead=kMaxOneUnitDelta+1; // 0xfc00 + static const int32_t kThreeUnitDeltaLead=0xffff; + + static const int32_t kMaxTwoUnitDelta=((kThreeUnitDeltaLead-kMinTwoUnitDeltaLead)<<16)-1; // 0x03feffff + + // For getState64(): + // The remainingMatchLength_ is -1..14=(kMaxLinearMatchLength=0x10)-2 + // so we need at least 5 bits for that. + // We add 2 to store it as a positive value 1..16=kMaxLinearMatchLength. + static constexpr int32_t kState64RemainingShift = 59; + static constexpr uint64_t kState64PosMask = (UINT64_C(1) << kState64RemainingShift) - 1; + + char16_t *ownedArray_; + + // Fixed value referencing the UCharsTrie words. + const char16_t *uchars_; + + // Iterator variables. + + // Pointer to next trie unit to read. NULL if no more matches. + const char16_t *pos_; + // Remaining length of a linear-match node, minus 1. Negative if not in such a node. + int32_t remainingMatchLength_; +}; + +U_NAMESPACE_END + +#endif /* U_SHOW_CPLUSPLUS_API */ + +#endif // __UCHARSTRIE_H__ diff --git a/thirdparty/icu4c/common/unicode/ucharstriebuilder.h b/thirdparty/icu4c/common/unicode/ucharstriebuilder.h new file mode 100644 index 0000000000..15657702f9 --- /dev/null +++ b/thirdparty/icu4c/common/unicode/ucharstriebuilder.h @@ -0,0 +1,193 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2010-2016, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* file name: ucharstriebuilder.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2010nov14 +* created by: Markus W. Scherer +*/ + +#ifndef __UCHARSTRIEBUILDER_H__ +#define __UCHARSTRIEBUILDER_H__ + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + +#include "unicode/stringtriebuilder.h" +#include "unicode/ucharstrie.h" +#include "unicode/unistr.h" + +/** + * \file + * \brief C++ API: Builder for icu::UCharsTrie + */ + +U_NAMESPACE_BEGIN + +class UCharsTrieElement; + +/** + * Builder class for UCharsTrie. + * + * This class is not intended for public subclassing. + * @stable ICU 4.8 + */ +class U_COMMON_API UCharsTrieBuilder : public StringTrieBuilder { +public: + /** + * Constructs an empty builder. + * @param errorCode Standard ICU error code. + * @stable ICU 4.8 + */ + UCharsTrieBuilder(UErrorCode &errorCode); + + /** + * Destructor. + * @stable ICU 4.8 + */ + virtual ~UCharsTrieBuilder(); + + /** + * Adds a (string, value) pair. + * The string must be unique. + * The string contents will be copied; the builder does not keep + * a reference to the input UnicodeString or its buffer. + * @param s The input string. + * @param value The value associated with this string. + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return *this + * @stable ICU 4.8 + */ + UCharsTrieBuilder &add(const UnicodeString &s, int32_t value, UErrorCode &errorCode); + + /** + * Builds a UCharsTrie for the add()ed data. + * Once built, no further data can be add()ed until clear() is called. + * + * A UCharsTrie cannot be empty. At least one (string, value) pair + * must have been add()ed. + * + * This method passes ownership of the builder's internal result array to the new trie object. + * Another call to any build() variant will re-serialize the trie. + * After clear() has been called, a new array will be used as well. + * @param buildOption Build option, see UStringTrieBuildOption. + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return A new UCharsTrie for the add()ed data. + * @stable ICU 4.8 + */ + UCharsTrie *build(UStringTrieBuildOption buildOption, UErrorCode &errorCode); + + /** + * Builds a UCharsTrie for the add()ed data and char16_t-serializes it. + * Once built, no further data can be add()ed until clear() is called. + * + * A UCharsTrie cannot be empty. At least one (string, value) pair + * must have been add()ed. + * + * Multiple calls to buildUnicodeString() set the UnicodeStrings to the + * builder's same char16_t array, without rebuilding. + * If buildUnicodeString() is called after build(), the trie will be + * re-serialized into a new array (because build() passes on ownership). + * If build() is called after buildUnicodeString(), the trie object returned + * by build() will become the owner of the underlying data for the + * previously returned UnicodeString. + * After clear() has been called, a new array will be used as well. + * @param buildOption Build option, see UStringTrieBuildOption. + * @param result A UnicodeString which will be set to the char16_t-serialized + * UCharsTrie for the add()ed data. + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return result + * @stable ICU 4.8 + */ + UnicodeString &buildUnicodeString(UStringTrieBuildOption buildOption, UnicodeString &result, + UErrorCode &errorCode); + + /** + * Removes all (string, value) pairs. + * New data can then be add()ed and a new trie can be built. + * @return *this + * @stable ICU 4.8 + */ + UCharsTrieBuilder &clear() { + strings.remove(); + elementsLength=0; + ucharsLength=0; + return *this; + } + +private: + UCharsTrieBuilder(const UCharsTrieBuilder &other); // no copy constructor + UCharsTrieBuilder &operator=(const UCharsTrieBuilder &other); // no assignment operator + + void buildUChars(UStringTrieBuildOption buildOption, UErrorCode &errorCode); + + virtual int32_t getElementStringLength(int32_t i) const; + virtual char16_t getElementUnit(int32_t i, int32_t unitIndex) const; + virtual int32_t getElementValue(int32_t i) const; + + virtual int32_t getLimitOfLinearMatch(int32_t first, int32_t last, int32_t unitIndex) const; + + virtual int32_t countElementUnits(int32_t start, int32_t limit, int32_t unitIndex) const; + virtual int32_t skipElementsBySomeUnits(int32_t i, int32_t unitIndex, int32_t count) const; + virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t unitIndex, char16_t unit) const; + + virtual UBool matchNodesCanHaveValues() const { return true; } + + virtual int32_t getMaxBranchLinearSubNodeLength() const { return UCharsTrie::kMaxBranchLinearSubNodeLength; } + virtual int32_t getMinLinearMatch() const { return UCharsTrie::kMinLinearMatch; } + virtual int32_t getMaxLinearMatchLength() const { return UCharsTrie::kMaxLinearMatchLength; } + + class UCTLinearMatchNode : public LinearMatchNode { + public: + UCTLinearMatchNode(const char16_t *units, int32_t len, Node *nextNode); + virtual UBool operator==(const Node &other) const; + virtual void write(StringTrieBuilder &builder); + private: + const char16_t *s; + }; + + virtual Node *createLinearMatchNode(int32_t i, int32_t unitIndex, int32_t length, + Node *nextNode) const; + + UBool ensureCapacity(int32_t length); + virtual int32_t write(int32_t unit); + int32_t write(const char16_t *s, int32_t length); + virtual int32_t writeElementUnits(int32_t i, int32_t unitIndex, int32_t length); + virtual int32_t writeValueAndFinal(int32_t i, UBool isFinal); + virtual int32_t writeValueAndType(UBool hasValue, int32_t value, int32_t node); + virtual int32_t writeDeltaTo(int32_t jumpTarget); + + UnicodeString strings; + UCharsTrieElement *elements; + int32_t elementsCapacity; + int32_t elementsLength; + + // char16_t serialization of the trie. + // Grows from the back: ucharsLength measures from the end of the buffer! + char16_t *uchars; + int32_t ucharsCapacity; + int32_t ucharsLength; +}; + +U_NAMESPACE_END + +#endif /* U_SHOW_CPLUSPLUS_API */ + +#endif // __UCHARSTRIEBUILDER_H__ diff --git a/thirdparty/icu4c/common/unicode/uchriter.h b/thirdparty/icu4c/common/unicode/uchriter.h new file mode 100644 index 0000000000..f5083561a8 --- /dev/null +++ b/thirdparty/icu4c/common/unicode/uchriter.h @@ -0,0 +1,393 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 1998-2005, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +*/ + +#ifndef UCHRITER_H +#define UCHRITER_H + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + +#include "unicode/chariter.h" + +/** + * \file + * \brief C++ API: char16_t Character Iterator + */ + +U_NAMESPACE_BEGIN + +/** + * A concrete subclass of CharacterIterator that iterates over the + * characters (code units or code points) in a char16_t array. + * It's possible not only to create an + * iterator that iterates over an entire char16_t array, but also to + * create one that iterates over only a subrange of a char16_t array + * (iterators over different subranges of the same char16_t array don't + * compare equal). + * @see CharacterIterator + * @see ForwardCharacterIterator + * @stable ICU 2.0 + */ +class U_COMMON_API UCharCharacterIterator : public CharacterIterator { +public: + /** + * Create an iterator over the char16_t array referred to by "textPtr". + * The iteration range is 0 to <code>length-1</code>. + * text is only aliased, not adopted (the + * destructor will not delete it). + * @param textPtr The char16_t array to be iterated over + * @param length The length of the char16_t array + * @stable ICU 2.0 + */ + UCharCharacterIterator(ConstChar16Ptr textPtr, int32_t length); + + /** + * Create an iterator over the char16_t array referred to by "textPtr". + * The iteration range is 0 to <code>length-1</code>. + * text is only aliased, not adopted (the + * destructor will not delete it). + * The starting + * position is specified by "position". If "position" is outside the valid + * iteration range, the behavior of this object is undefined. + * @param textPtr The char16_t array to be iteratd over + * @param length The length of the char16_t array + * @param position The starting position of the iteration + * @stable ICU 2.0 + */ + UCharCharacterIterator(ConstChar16Ptr textPtr, int32_t length, + int32_t position); + + /** + * Create an iterator over the char16_t array referred to by "textPtr". + * The iteration range is 0 to <code>end-1</code>. + * text is only aliased, not adopted (the + * destructor will not delete it). + * The starting + * position is specified by "position". If begin and end do not + * form a valid iteration range or "position" is outside the valid + * iteration range, the behavior of this object is undefined. + * @param textPtr The char16_t array to be iterated over + * @param length The length of the char16_t array + * @param textBegin The begin position of the iteration range + * @param textEnd The end position of the iteration range + * @param position The starting position of the iteration + * @stable ICU 2.0 + */ + UCharCharacterIterator(ConstChar16Ptr textPtr, int32_t length, + int32_t textBegin, + int32_t textEnd, + int32_t position); + + /** + * Copy constructor. The new iterator iterates over the same range + * of the same string as "that", and its initial position is the + * same as "that"'s current position. + * @param that The UCharCharacterIterator to be copied + * @stable ICU 2.0 + */ + UCharCharacterIterator(const UCharCharacterIterator& that); + + /** + * Destructor. + * @stable ICU 2.0 + */ + virtual ~UCharCharacterIterator(); + + /** + * Assignment operator. *this is altered to iterate over the sane + * range of the same string as "that", and refers to the same + * character within that string as "that" does. + * @param that The object to be copied + * @return the newly created object + * @stable ICU 2.0 + */ + UCharCharacterIterator& + operator=(const UCharCharacterIterator& that); + + /** + * Returns true if the iterators iterate over the same range of the + * same string and are pointing at the same character. + * @param that The ForwardCharacterIterator used to be compared for equality + * @return true if the iterators iterate over the same range of the + * same string and are pointing at the same character. + * @stable ICU 2.0 + */ + virtual UBool operator==(const ForwardCharacterIterator& that) const; + + /** + * Generates a hash code for this iterator. + * @return the hash code. + * @stable ICU 2.0 + */ + virtual int32_t hashCode(void) const; + + /** + * Returns a new UCharCharacterIterator referring to the same + * character in the same range of the same string as this one. The + * caller must delete the new iterator. + * @return the CharacterIterator newly created + * @stable ICU 2.0 + */ + virtual UCharCharacterIterator* clone() const; + + /** + * Sets the iterator to refer to the first code unit in its + * iteration range, and returns that code unit. + * This can be used to begin an iteration with next(). + * @return the first code unit in its iteration range. + * @stable ICU 2.0 + */ + virtual char16_t first(void); + + /** + * Sets the iterator to refer to the first code unit in its + * iteration range, returns that code unit, and moves the position + * to the second code unit. This is an alternative to setToStart() + * for forward iteration with nextPostInc(). + * @return the first code unit in its iteration range + * @stable ICU 2.0 + */ + virtual char16_t firstPostInc(void); + + /** + * Sets the iterator to refer to the first code point in its + * iteration range, and returns that code unit, + * This can be used to begin an iteration with next32(). + * Note that an iteration with next32PostInc(), beginning with, + * e.g., setToStart() or firstPostInc(), is more efficient. + * @return the first code point in its iteration range + * @stable ICU 2.0 + */ + virtual UChar32 first32(void); + + /** + * Sets the iterator to refer to the first code point in its + * iteration range, returns that code point, and moves the position + * to the second code point. This is an alternative to setToStart() + * for forward iteration with next32PostInc(). + * @return the first code point in its iteration range. + * @stable ICU 2.0 + */ + virtual UChar32 first32PostInc(void); + + /** + * Sets the iterator to refer to the last code unit in its + * iteration range, and returns that code unit. + * This can be used to begin an iteration with previous(). + * @return the last code unit in its iteration range. + * @stable ICU 2.0 + */ + virtual char16_t last(void); + + /** + * Sets the iterator to refer to the last code point in its + * iteration range, and returns that code unit. + * This can be used to begin an iteration with previous32(). + * @return the last code point in its iteration range. + * @stable ICU 2.0 + */ + virtual UChar32 last32(void); + + /** + * Sets the iterator to refer to the "position"-th code unit + * in the text-storage object the iterator refers to, and + * returns that code unit. + * @param position the position within the text-storage object + * @return the code unit + * @stable ICU 2.0 + */ + virtual char16_t setIndex(int32_t position); + + /** + * Sets the iterator to refer to the beginning of the code point + * that contains the "position"-th code unit + * in the text-storage object the iterator refers to, and + * returns that code point. + * The current position is adjusted to the beginning of the code point + * (its first code unit). + * @param position the position within the text-storage object + * @return the code unit + * @stable ICU 2.0 + */ + virtual UChar32 setIndex32(int32_t position); + + /** + * Returns the code unit the iterator currently refers to. + * @return the code unit the iterator currently refers to. + * @stable ICU 2.0 + */ + virtual char16_t current(void) const; + + /** + * Returns the code point the iterator currently refers to. + * @return the code point the iterator currently refers to. + * @stable ICU 2.0 + */ + virtual UChar32 current32(void) const; + + /** + * Advances to the next code unit in the iteration range (toward + * endIndex()), and returns that code unit. If there are no more + * code units to return, returns DONE. + * @return the next code unit in the iteration range. + * @stable ICU 2.0 + */ + virtual char16_t next(void); + + /** + * Gets the current code unit for returning and advances to the next code unit + * in the iteration range + * (toward endIndex()). If there are + * no more code units to return, returns DONE. + * @return the current code unit. + * @stable ICU 2.0 + */ + virtual char16_t nextPostInc(void); + + /** + * Advances to the next code point in the iteration range (toward + * endIndex()), and returns that code point. If there are no more + * code points to return, returns DONE. + * Note that iteration with "pre-increment" semantics is less + * efficient than iteration with "post-increment" semantics + * that is provided by next32PostInc(). + * @return the next code point in the iteration range. + * @stable ICU 2.0 + */ + virtual UChar32 next32(void); + + /** + * Gets the current code point for returning and advances to the next code point + * in the iteration range + * (toward endIndex()). If there are + * no more code points to return, returns DONE. + * @return the current point. + * @stable ICU 2.0 + */ + virtual UChar32 next32PostInc(void); + + /** + * Returns false if there are no more code units or code points + * at or after the current position in the iteration range. + * This is used with nextPostInc() or next32PostInc() in forward + * iteration. + * @return false if there are no more code units or code points + * at or after the current position in the iteration range. + * @stable ICU 2.0 + */ + virtual UBool hasNext(); + + /** + * Advances to the previous code unit in the iteration range (toward + * startIndex()), and returns that code unit. If there are no more + * code units to return, returns DONE. + * @return the previous code unit in the iteration range. + * @stable ICU 2.0 + */ + virtual char16_t previous(void); + + /** + * Advances to the previous code point in the iteration range (toward + * startIndex()), and returns that code point. If there are no more + * code points to return, returns DONE. + * @return the previous code point in the iteration range. + * @stable ICU 2.0 + */ + virtual UChar32 previous32(void); + + /** + * Returns false if there are no more code units or code points + * before the current position in the iteration range. + * This is used with previous() or previous32() in backward + * iteration. + * @return false if there are no more code units or code points + * before the current position in the iteration range. + * @stable ICU 2.0 + */ + virtual UBool hasPrevious(); + + /** + * Moves the current position relative to the start or end of the + * iteration range, or relative to the current position itself. + * The movement is expressed in numbers of code units forward + * or backward by specifying a positive or negative delta. + * @param delta the position relative to origin. A positive delta means forward; + * a negative delta means backward. + * @param origin Origin enumeration {kStart, kCurrent, kEnd} + * @return the new position + * @stable ICU 2.0 + */ + virtual int32_t move(int32_t delta, EOrigin origin); + + /** + * Moves the current position relative to the start or end of the + * iteration range, or relative to the current position itself. + * The movement is expressed in numbers of code points forward + * or backward by specifying a positive or negative delta. + * @param delta the position relative to origin. A positive delta means forward; + * a negative delta means backward. + * @param origin Origin enumeration {kStart, kCurrent, kEnd} + * @return the new position + * @stable ICU 2.0 + */ +#ifdef move32 + // One of the system headers right now is sometimes defining a conflicting macro we don't use +#undef move32 +#endif + virtual int32_t move32(int32_t delta, EOrigin origin); + + /** + * Sets the iterator to iterate over a new range of text + * @stable ICU 2.0 + */ + void setText(ConstChar16Ptr newText, int32_t newTextLength); + + /** + * Copies the char16_t array under iteration into the UnicodeString + * referred to by "result". Even if this iterator iterates across + * only a part of this string, the whole string is copied. + * @param result Receives a copy of the text under iteration. + * @stable ICU 2.0 + */ + virtual void getText(UnicodeString& result); + + /** + * Return a class ID for this class (not really public) + * @return a class ID for this class + * @stable ICU 2.0 + */ + static UClassID U_EXPORT2 getStaticClassID(void); + + /** + * Return a class ID for this object (not really public) + * @return a class ID for this object. + * @stable ICU 2.0 + */ + virtual UClassID getDynamicClassID(void) const; + +protected: + /** + * Protected constructor + * @stable ICU 2.0 + */ + UCharCharacterIterator(); + /** + * Protected member text + * @stable ICU 2.0 + */ + const char16_t* text; + +}; + +U_NAMESPACE_END + +#endif /* U_SHOW_CPLUSPLUS_API */ + +#endif diff --git a/thirdparty/icu4c/common/unicode/uclean.h b/thirdparty/icu4c/common/unicode/uclean.h new file mode 100644 index 0000000000..c2d920a16e --- /dev/null +++ b/thirdparty/icu4c/common/unicode/uclean.h @@ -0,0 +1,262 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* Copyright (C) 2001-2014, International Business Machines +* Corporation and others. All Rights Reserved. +****************************************************************************** +* file name: uclean.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2001July05 +* created by: George Rhoten +*/ + +#ifndef __UCLEAN_H__ +#define __UCLEAN_H__ + +#include "unicode/utypes.h" +/** + * \file + * \brief C API: Initialize and clean up ICU + */ + +/** + * Initialize ICU. + * + * Use of this function is optional. It is OK to simply use ICU + * services and functions without first having initialized + * ICU by calling u_init(). + * + * u_init() will attempt to load some part of ICU's data, and is + * useful as a test for configuration or installation problems that + * leave the ICU data inaccessible. A successful invocation of u_init() + * does not, however, guarantee that all ICU data is accessible. + * + * Multiple calls to u_init() cause no harm, aside from the small amount + * of time required. + * + * In old versions of ICU, u_init() was required in multi-threaded applications + * to ensure the thread safety of ICU. u_init() is no longer needed for this purpose. + * + * @param status An ICU UErrorCode parameter. It must not be <code>NULL</code>. + * An Error will be returned if some required part of ICU data can not + * be loaded or initialized. + * The function returns immediately if the input error code indicates a + * failure, as usual. + * + * @stable ICU 2.6 + */ +U_CAPI void U_EXPORT2 +u_init(UErrorCode *status); + +#ifndef U_HIDE_SYSTEM_API +/** + * Clean up the system resources, such as allocated memory or open files, + * used in all ICU libraries. This will free/delete all memory owned by the + * ICU libraries, and return them to their original load state. All open ICU + * items (collators, resource bundles, converters, etc.) must be closed before + * calling this function, otherwise ICU may not free its allocated memory + * (e.g. close your converters and resource bundles before calling this + * function). Generally, this function should be called once just before + * an application exits. For applications that dynamically load and unload + * the ICU libraries (relatively uncommon), u_cleanup() should be called + * just before the library unload. + * <p> + * u_cleanup() also clears any ICU heap functions, mutex functions or + * trace functions that may have been set for the process. + * This has the effect of restoring ICU to its initial condition, before + * any of these override functions were installed. Refer to + * u_setMemoryFunctions(), u_setMutexFunctions and + * utrace_setFunctions(). If ICU is to be reinitialized after + * calling u_cleanup(), these runtime override functions will need to + * be set up again if they are still required. + * <p> + * u_cleanup() is not thread safe. All other threads should stop using ICU + * before calling this function. + * <p> + * Any open ICU items will be left in an undefined state by u_cleanup(), + * and any subsequent attempt to use such an item will give unpredictable + * results. + * <p> + * After calling u_cleanup(), an application may continue to use ICU by + * calling u_init(). An application must invoke u_init() first from one single + * thread before allowing other threads call u_init(). All threads existing + * at the time of the first thread's call to u_init() must also call + * u_init() themselves before continuing with other ICU operations. + * <p> + * The use of u_cleanup() just before an application terminates is optional, + * but it should be called only once for performance reasons. The primary + * benefit is to eliminate reports of memory or resource leaks originating + * in ICU code from the results generated by heap analysis tools. + * <p> + * <strong>Use this function with great care!</strong> + * </p> + * + * @stable ICU 2.0 + * @system + */ +U_CAPI void U_EXPORT2 +u_cleanup(void); + +U_CDECL_BEGIN +/** + * Pointer type for a user supplied memory allocation function. + * @param context user supplied value, obtained from u_setMemoryFunctions(). + * @param size The number of bytes to be allocated + * @return Pointer to the newly allocated memory, or NULL if the allocation failed. + * @stable ICU 2.8 + * @system + */ +typedef void *U_CALLCONV UMemAllocFn(const void *context, size_t size); +/** + * Pointer type for a user supplied memory re-allocation function. + * @param context user supplied value, obtained from u_setMemoryFunctions(). + * @param size The number of bytes to be allocated + * @return Pointer to the newly allocated memory, or NULL if the allocation failed. + * @stable ICU 2.8 + * @system + */ +typedef void *U_CALLCONV UMemReallocFn(const void *context, void *mem, size_t size); +/** + * Pointer type for a user supplied memory free function. Behavior should be + * similar the standard C library free(). + * @param context user supplied value, obtained from u_setMemoryFunctions(). + * @param mem Pointer to the memory block to be resized + * @param size The new size for the block + * @return Pointer to the resized memory block, or NULL if the resizing failed. + * @stable ICU 2.8 + * @system + */ +typedef void U_CALLCONV UMemFreeFn (const void *context, void *mem); + +/** + * Set the functions that ICU will use for memory allocation. + * Use of this function is optional; by default (without this function), ICU will + * use the standard C library malloc() and free() functions. + * This function can only be used when ICU is in an initial, unused state, before + * u_init() has been called. + * @param context This pointer value will be saved, and then (later) passed as + * a parameter to the memory functions each time they + * are called. + * @param a Pointer to a user-supplied malloc function. + * @param r Pointer to a user-supplied realloc function. + * @param f Pointer to a user-supplied free function. + * @param status Receives error values. + * @stable ICU 2.8 + * @system + */ +U_CAPI void U_EXPORT2 +u_setMemoryFunctions(const void *context, UMemAllocFn * U_CALLCONV_FPTR a, UMemReallocFn * U_CALLCONV_FPTR r, UMemFreeFn * U_CALLCONV_FPTR f, + UErrorCode *status); + +U_CDECL_END + +#ifndef U_HIDE_DEPRECATED_API +/********************************************************************************* + * + * Deprecated Functions + * + * The following functions for user supplied mutexes are no longer supported. + * Any attempt to use them will return a U_UNSUPPORTED_ERROR. + * + **********************************************************************************/ + +/** + * An opaque pointer type that represents an ICU mutex. + * For user-implemented mutexes, the value will typically point to a + * struct or object that implements the mutex. + * @deprecated ICU 52. This type is no longer supported. + * @system + */ +typedef void *UMTX; + +U_CDECL_BEGIN +/** + * Function Pointer type for a user supplied mutex initialization function. + * The user-supplied function will be called by ICU whenever ICU needs to create a + * new mutex. The function implementation should create a mutex, and store a pointer + * to something that uniquely identifies the mutex into the UMTX that is supplied + * as a parameter. + * @param context user supplied value, obtained from u_setMutexFunctions(). + * @param mutex Receives a pointer that identifies the new mutex. + * The mutex init function must set the UMTX to a non-null value. + * Subsequent calls by ICU to lock, unlock, or destroy a mutex will + * identify the mutex by the UMTX value. + * @param status Error status. Report errors back to ICU by setting this variable + * with an error code. + * @deprecated ICU 52. This function is no longer supported. + * @system + */ +typedef void U_CALLCONV UMtxInitFn (const void *context, UMTX *mutex, UErrorCode* status); + + +/** + * Function Pointer type for a user supplied mutex functions. + * One of the user-supplied functions with this signature will be called by ICU + * whenever ICU needs to lock, unlock, or destroy a mutex. + * @param context user supplied value, obtained from u_setMutexFunctions(). + * @param mutex specify the mutex on which to operate. + * @deprecated ICU 52. This function is no longer supported. + * @system + */ +typedef void U_CALLCONV UMtxFn (const void *context, UMTX *mutex); +U_CDECL_END + +/** + * Set the functions that ICU will use for mutex operations + * Use of this function is optional; by default (without this function), ICU will + * directly access system functions for mutex operations + * This function can only be used when ICU is in an initial, unused state, before + * u_init() has been called. + * @param context This pointer value will be saved, and then (later) passed as + * a parameter to the user-supplied mutex functions each time they + * are called. + * @param init Pointer to a mutex initialization function. Must be non-null. + * @param destroy Pointer to the mutex destroy function. Must be non-null. + * @param lock pointer to the mutex lock function. Must be non-null. + * @param unlock Pointer to the mutex unlock function. Must be non-null. + * @param status Receives error values. + * @deprecated ICU 52. This function is no longer supported. + * @system + */ +U_DEPRECATED void U_EXPORT2 +u_setMutexFunctions(const void *context, UMtxInitFn *init, UMtxFn *destroy, UMtxFn *lock, UMtxFn *unlock, + UErrorCode *status); + + +/** + * Pointer type for a user supplied atomic increment or decrement function. + * @param context user supplied value, obtained from u_setAtomicIncDecFunctions(). + * @param p Pointer to a 32 bit int to be incremented or decremented + * @return The value of the variable after the inc or dec operation. + * @deprecated ICU 52. This function is no longer supported. + * @system + */ +typedef int32_t U_CALLCONV UMtxAtomicFn(const void *context, int32_t *p); + +/** + * Set the functions that ICU will use for atomic increment and decrement of int32_t values. + * Use of this function is optional; by default (without this function), ICU will + * use its own internal implementation of atomic increment/decrement. + * This function can only be used when ICU is in an initial, unused state, before + * u_init() has been called. + * @param context This pointer value will be saved, and then (later) passed as + * a parameter to the increment and decrement functions each time they + * are called. This function can only be called + * @param inc Pointer to a function to do an atomic increment operation. Must be non-null. + * @param dec Pointer to a function to do an atomic decrement operation. Must be non-null. + * @param status Receives error values. + * @deprecated ICU 52. This function is no longer supported. + * @system + */ +U_DEPRECATED void U_EXPORT2 +u_setAtomicIncDecFunctions(const void *context, UMtxAtomicFn *inc, UMtxAtomicFn *dec, + UErrorCode *status); + +#endif /* U_HIDE_DEPRECATED_API */ +#endif /* U_HIDE_SYSTEM_API */ + +#endif diff --git a/thirdparty/icu4c/common/unicode/ucnv.h b/thirdparty/icu4c/common/unicode/ucnv.h new file mode 100644 index 0000000000..58f271cfb5 --- /dev/null +++ b/thirdparty/icu4c/common/unicode/ucnv.h @@ -0,0 +1,2045 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 1999-2014, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** + * ucnv.h: + * External APIs for the ICU's codeset conversion library + * Bertrand A. Damiba + * + * Modification History: + * + * Date Name Description + * 04/04/99 helena Fixed internal header inclusion. + * 05/11/00 helena Added setFallback and usesFallback APIs. + * 06/29/2000 helena Major rewrite of the callback APIs. + * 12/07/2000 srl Update of documentation + */ + +/** + * \file + * \brief C API: Character conversion + * + * <h2>Character Conversion C API</h2> + * + * <p>This API is used to convert codepage or character encoded data to and + * from UTF-16. You can open a converter with {@link ucnv_open() }. With that + * converter, you can get its properties, set options, convert your data and + * close the converter.</p> + * + * <p>Since many software programs recognize different converter names for + * different types of converters, there are other functions in this API to + * iterate over the converter aliases. The functions {@link ucnv_getAvailableName() }, + * {@link ucnv_getAlias() } and {@link ucnv_getStandardName() } are some of the + * more frequently used alias functions to get this information.</p> + * + * <p>When a converter encounters an illegal, irregular, invalid or unmappable character + * its default behavior is to use a substitution character to replace the + * bad byte sequence. This behavior can be changed by using {@link ucnv_setFromUCallBack() } + * or {@link ucnv_setToUCallBack() } on the converter. The header ucnv_err.h defines + * many other callback actions that can be used instead of a character substitution.</p> + * + * <p>More information about this API can be found in our + * <a href="http://icu-project.org/userguide/conversion.html">User's + * Guide</a>.</p> + */ + +#ifndef UCNV_H +#define UCNV_H + +#include "unicode/ucnv_err.h" +#include "unicode/uenum.h" + +#if U_SHOW_CPLUSPLUS_API +#include "unicode/localpointer.h" +#endif // U_SHOW_CPLUSPLUS_API + +#if !defined(USET_DEFINED) && !defined(U_IN_DOXYGEN) + +#define USET_DEFINED + +/** + * USet is the C API type corresponding to C++ class UnicodeSet. + * It is forward-declared here to avoid including unicode/uset.h file if related + * conversion APIs are not used. + * + * @see ucnv_getUnicodeSet + * @stable ICU 2.4 + */ +typedef struct USet USet; + +#endif + +#if !UCONFIG_NO_CONVERSION + +U_CDECL_BEGIN + +/** Maximum length of a converter name including the terminating NULL @stable ICU 2.0 */ +#define UCNV_MAX_CONVERTER_NAME_LENGTH 60 +/** Maximum length of a converter name including path and terminating NULL @stable ICU 2.0 */ +#define UCNV_MAX_FULL_FILE_NAME_LENGTH (600+UCNV_MAX_CONVERTER_NAME_LENGTH) + +/** Shift in for EBDCDIC_STATEFUL and iso2022 states @stable ICU 2.0 */ +#define UCNV_SI 0x0F +/** Shift out for EBDCDIC_STATEFUL and iso2022 states @stable ICU 2.0 */ +#define UCNV_SO 0x0E + +/** + * Enum for specifying basic types of converters + * @see ucnv_getType + * @stable ICU 2.0 + */ +typedef enum { + /** @stable ICU 2.0 */ + UCNV_UNSUPPORTED_CONVERTER = -1, + /** @stable ICU 2.0 */ + UCNV_SBCS = 0, + /** @stable ICU 2.0 */ + UCNV_DBCS = 1, + /** @stable ICU 2.0 */ + UCNV_MBCS = 2, + /** @stable ICU 2.0 */ + UCNV_LATIN_1 = 3, + /** @stable ICU 2.0 */ + UCNV_UTF8 = 4, + /** @stable ICU 2.0 */ + UCNV_UTF16_BigEndian = 5, + /** @stable ICU 2.0 */ + UCNV_UTF16_LittleEndian = 6, + /** @stable ICU 2.0 */ + UCNV_UTF32_BigEndian = 7, + /** @stable ICU 2.0 */ + UCNV_UTF32_LittleEndian = 8, + /** @stable ICU 2.0 */ + UCNV_EBCDIC_STATEFUL = 9, + /** @stable ICU 2.0 */ + UCNV_ISO_2022 = 10, + + /** @stable ICU 2.0 */ + UCNV_LMBCS_1 = 11, + /** @stable ICU 2.0 */ + UCNV_LMBCS_2, + /** @stable ICU 2.0 */ + UCNV_LMBCS_3, + /** @stable ICU 2.0 */ + UCNV_LMBCS_4, + /** @stable ICU 2.0 */ + UCNV_LMBCS_5, + /** @stable ICU 2.0 */ + UCNV_LMBCS_6, + /** @stable ICU 2.0 */ + UCNV_LMBCS_8, + /** @stable ICU 2.0 */ + UCNV_LMBCS_11, + /** @stable ICU 2.0 */ + UCNV_LMBCS_16, + /** @stable ICU 2.0 */ + UCNV_LMBCS_17, + /** @stable ICU 2.0 */ + UCNV_LMBCS_18, + /** @stable ICU 2.0 */ + UCNV_LMBCS_19, + /** @stable ICU 2.0 */ + UCNV_LMBCS_LAST = UCNV_LMBCS_19, + /** @stable ICU 2.0 */ + UCNV_HZ, + /** @stable ICU 2.0 */ + UCNV_SCSU, + /** @stable ICU 2.0 */ + UCNV_ISCII, + /** @stable ICU 2.0 */ + UCNV_US_ASCII, + /** @stable ICU 2.0 */ + UCNV_UTF7, + /** @stable ICU 2.2 */ + UCNV_BOCU1, + /** @stable ICU 2.2 */ + UCNV_UTF16, + /** @stable ICU 2.2 */ + UCNV_UTF32, + /** @stable ICU 2.2 */ + UCNV_CESU8, + /** @stable ICU 2.4 */ + UCNV_IMAP_MAILBOX, + /** @stable ICU 4.8 */ + UCNV_COMPOUND_TEXT, + + /* Number of converter types for which we have conversion routines. */ + UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES +} UConverterType; + +/** + * Enum for specifying which platform a converter ID refers to. + * The use of platform/CCSID is not recommended. See ucnv_openCCSID(). + * + * @see ucnv_getPlatform + * @see ucnv_openCCSID + * @see ucnv_getCCSID + * @stable ICU 2.0 + */ +typedef enum { + UCNV_UNKNOWN = -1, + UCNV_IBM = 0 +} UConverterPlatform; + +/** + * Function pointer for error callback in the codepage to unicode direction. + * Called when an error has occurred in conversion to unicode, or on open/close of the callback (see reason). + * @param context Pointer to the callback's private data + * @param args Information about the conversion in progress + * @param codeUnits Points to 'length' bytes of the concerned codepage sequence + * @param length Size (in bytes) of the concerned codepage sequence + * @param reason Defines the reason the callback was invoked + * @param pErrorCode ICU error code in/out parameter. + * For converter callback functions, set to a conversion error + * before the call, and the callback may reset it to U_ZERO_ERROR. + * @see ucnv_setToUCallBack + * @see UConverterToUnicodeArgs + * @stable ICU 2.0 + */ +typedef void (U_EXPORT2 *UConverterToUCallback) ( + const void* context, + UConverterToUnicodeArgs *args, + const char *codeUnits, + int32_t length, + UConverterCallbackReason reason, + UErrorCode *pErrorCode); + +/** + * Function pointer for error callback in the unicode to codepage direction. + * Called when an error has occurred in conversion from unicode, or on open/close of the callback (see reason). + * @param context Pointer to the callback's private data + * @param args Information about the conversion in progress + * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence + * @param length Size (in bytes) of the concerned codepage sequence + * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint. + * @param reason Defines the reason the callback was invoked + * @param pErrorCode ICU error code in/out parameter. + * For converter callback functions, set to a conversion error + * before the call, and the callback may reset it to U_ZERO_ERROR. + * @see ucnv_setFromUCallBack + * @stable ICU 2.0 + */ +typedef void (U_EXPORT2 *UConverterFromUCallback) ( + const void* context, + UConverterFromUnicodeArgs *args, + const UChar* codeUnits, + int32_t length, + UChar32 codePoint, + UConverterCallbackReason reason, + UErrorCode *pErrorCode); + +U_CDECL_END + +/** + * Character that separates converter names from options and options from each other. + * @see ucnv_open + * @stable ICU 2.0 + */ +#define UCNV_OPTION_SEP_CHAR ',' + +/** + * String version of UCNV_OPTION_SEP_CHAR. + * @see ucnv_open + * @stable ICU 2.0 + */ +#define UCNV_OPTION_SEP_STRING "," + +/** + * Character that separates a converter option from its value. + * @see ucnv_open + * @stable ICU 2.0 + */ +#define UCNV_VALUE_SEP_CHAR '=' + +/** + * String version of UCNV_VALUE_SEP_CHAR. + * @see ucnv_open + * @stable ICU 2.0 + */ +#define UCNV_VALUE_SEP_STRING "=" + +/** + * Converter option for specifying a locale. + * For example, ucnv_open("SCSU,locale=ja", &errorCode); + * See convrtrs.txt. + * + * @see ucnv_open + * @stable ICU 2.0 + */ +#define UCNV_LOCALE_OPTION_STRING ",locale=" + +/** + * Converter option for specifying a version selector (0..9) for some converters. + * For example, + * \code + * ucnv_open("UTF-7,version=1", &errorCode); + * \endcode + * See convrtrs.txt. + * + * @see ucnv_open + * @stable ICU 2.4 + */ +#define UCNV_VERSION_OPTION_STRING ",version=" + +/** + * Converter option for EBCDIC SBCS or mixed-SBCS/DBCS (stateful) codepages. + * Swaps Unicode mappings for EBCDIC LF and NL codes, as used on + * S/390 (z/OS) Unix System Services (Open Edition). + * For example, ucnv_open("ibm-1047,swaplfnl", &errorCode); + * See convrtrs.txt. + * + * @see ucnv_open + * @stable ICU 2.4 + */ +#define UCNV_SWAP_LFNL_OPTION_STRING ",swaplfnl" + +/** + * Do a fuzzy compare of two converter/alias names. + * The comparison is case-insensitive, ignores leading zeroes if they are not + * followed by further digits, and ignores all but letters and digits. + * Thus the strings "UTF-8", "utf_8", "u*T@f08" and "Utf 8" are exactly equivalent. + * See section 1.4, Charset Alias Matching in Unicode Technical Standard #22 + * at http://www.unicode.org/reports/tr22/ + * + * @param name1 a converter name or alias, zero-terminated + * @param name2 a converter name or alias, zero-terminated + * @return 0 if the names match, or a negative value if the name1 + * lexically precedes name2, or a positive value if the name1 + * lexically follows name2. + * @stable ICU 2.0 + */ +U_CAPI int U_EXPORT2 +ucnv_compareNames(const char *name1, const char *name2); + + +/** + * Creates a UConverter object with the name of a coded character set specified as a C string. + * The actual name will be resolved with the alias file + * using a case-insensitive string comparison that ignores + * leading zeroes and all non-alphanumeric characters. + * E.g., the names "UTF8", "utf-8", "u*T@f08" and "Utf 8" are all equivalent. + * (See also ucnv_compareNames().) + * If <code>NULL</code> is passed for the converter name, it will create one with the + * getDefaultName return value. + * + * <p>A converter name for ICU 1.5 and above may contain options + * like a locale specification to control the specific behavior of + * the newly instantiated converter. + * The meaning of the options depends on the particular converter. + * If an option is not defined for or recognized by a given converter, then it is ignored.</p> + * + * <p>Options are appended to the converter name string, with a + * <code>UCNV_OPTION_SEP_CHAR</code> between the name and the first option and + * also between adjacent options.</p> + * + * <p>If the alias is ambiguous, then the preferred converter is used + * and the status is set to U_AMBIGUOUS_ALIAS_WARNING.</p> + * + * <p>The conversion behavior and names can vary between platforms. ICU may + * convert some characters differently from other platforms. Details on this topic + * are in the <a href="http://icu-project.org/userguide/conversion.html">User's + * Guide</a>. Aliases starting with a "cp" prefix have no specific meaning + * other than its an alias starting with the letters "cp". Please do not + * associate any meaning to these aliases.</p> + * + * \snippet samples/ucnv/convsamp.cpp ucnv_open + * + * @param converterName Name of the coded character set table. + * This may have options appended to the string. + * IANA alias character set names, IBM CCSIDs starting with "ibm-", + * Windows codepage numbers starting with "windows-" are frequently + * used for this parameter. See ucnv_getAvailableName and + * ucnv_getAlias for a complete list that is available. + * If this parameter is NULL, the default converter will be used. + * @param err outgoing error status <TT>U_MEMORY_ALLOCATION_ERROR, U_FILE_ACCESS_ERROR</TT> + * @return the created Unicode converter object, or <TT>NULL</TT> if an error occurred + * @see ucnv_openU + * @see ucnv_openCCSID + * @see ucnv_getAvailableName + * @see ucnv_getAlias + * @see ucnv_getDefaultName + * @see ucnv_close + * @see ucnv_compareNames + * @stable ICU 2.0 + */ +U_CAPI UConverter* U_EXPORT2 +ucnv_open(const char *converterName, UErrorCode *err); + + +/** + * Creates a Unicode converter with the names specified as unicode string. + * The name should be limited to the ASCII-7 alphanumerics range. + * The actual name will be resolved with the alias file + * using a case-insensitive string comparison that ignores + * leading zeroes and all non-alphanumeric characters. + * E.g., the names "UTF8", "utf-8", "u*T@f08" and "Utf 8" are all equivalent. + * (See also ucnv_compareNames().) + * If <TT>NULL</TT> is passed for the converter name, it will create + * one with the ucnv_getDefaultName() return value. + * If the alias is ambiguous, then the preferred converter is used + * and the status is set to U_AMBIGUOUS_ALIAS_WARNING. + * + * <p>See ucnv_open for the complete details</p> + * @param name Name of the UConverter table in a zero terminated + * Unicode string + * @param err outgoing error status <TT>U_MEMORY_ALLOCATION_ERROR, + * U_FILE_ACCESS_ERROR</TT> + * @return the created Unicode converter object, or <TT>NULL</TT> if an + * error occurred + * @see ucnv_open + * @see ucnv_openCCSID + * @see ucnv_close + * @see ucnv_compareNames + * @stable ICU 2.0 + */ +U_CAPI UConverter* U_EXPORT2 +ucnv_openU(const UChar *name, + UErrorCode *err); + +/** + * Creates a UConverter object from a CCSID number and platform pair. + * Note that the usefulness of this function is limited to platforms with numeric + * encoding IDs. Only IBM and Microsoft platforms use numeric (16-bit) identifiers for + * encodings. + * + * In addition, IBM CCSIDs and Unicode conversion tables are not 1:1 related. + * For many IBM CCSIDs there are multiple (up to six) Unicode conversion tables, and + * for some Unicode conversion tables there are multiple CCSIDs. + * Some "alternate" Unicode conversion tables are provided by the + * IBM CDRA conversion table registry. + * The most prominent example of a systematic modification of conversion tables that is + * not provided in the form of conversion table files in the repository is + * that S/390 Unix System Services swaps the codes for Line Feed and New Line in all + * EBCDIC codepages, which requires such a swap in the Unicode conversion tables as well. + * + * Only IBM default conversion tables are accessible with ucnv_openCCSID(). + * ucnv_getCCSID() will return the same CCSID for all conversion tables that are associated + * with that CCSID. + * + * Currently, the only "platform" supported in the ICU converter API is UCNV_IBM. + * + * In summary, the use of CCSIDs and the associated API functions is not recommended. + * + * In order to open a converter with the default IBM CDRA Unicode conversion table, + * you can use this function or use the prefix "ibm-": + * \code + * char name[20]; + * sprintf(name, "ibm-%hu", ccsid); + * cnv=ucnv_open(name, &errorCode); + * \endcode + * + * In order to open a converter with the IBM S/390 Unix System Services variant + * of a Unicode/EBCDIC conversion table, + * you can use the prefix "ibm-" together with the option string UCNV_SWAP_LFNL_OPTION_STRING: + * \code + * char name[20]; + * sprintf(name, "ibm-%hu" UCNV_SWAP_LFNL_OPTION_STRING, ccsid); + * cnv=ucnv_open(name, &errorCode); + * \endcode + * + * In order to open a converter from a Microsoft codepage number, use the prefix "cp": + * \code + * char name[20]; + * sprintf(name, "cp%hu", codepageID); + * cnv=ucnv_open(name, &errorCode); + * \endcode + * + * If the alias is ambiguous, then the preferred converter is used + * and the status is set to U_AMBIGUOUS_ALIAS_WARNING. + * + * @param codepage codepage number to create + * @param platform the platform in which the codepage number exists + * @param err error status <TT>U_MEMORY_ALLOCATION_ERROR, U_FILE_ACCESS_ERROR</TT> + * @return the created Unicode converter object, or <TT>NULL</TT> if an error + * occurred. + * @see ucnv_open + * @see ucnv_openU + * @see ucnv_close + * @see ucnv_getCCSID + * @see ucnv_getPlatform + * @see UConverterPlatform + * @stable ICU 2.0 + */ +U_CAPI UConverter* U_EXPORT2 +ucnv_openCCSID(int32_t codepage, + UConverterPlatform platform, + UErrorCode * err); + +/** + * <p>Creates a UConverter object specified from a packageName and a converterName.</p> + * + * <p>The packageName and converterName must point to an ICU udata object, as defined by + * <code> udata_open( packageName, "cnv", converterName, err) </code> or equivalent. + * Typically, packageName will refer to a (.dat) file, or to a package registered with + * udata_setAppData(). Using a full file or directory pathname for packageName is deprecated.</p> + * + * <p>The name will NOT be looked up in the alias mechanism, nor will the converter be + * stored in the converter cache or the alias table. The only way to open further converters + * is call this function multiple times, or use the ucnv_safeClone() function to clone a + * 'primary' converter.</p> + * + * <p>A future version of ICU may add alias table lookups and/or caching + * to this function.</p> + * + * <p>Example Use: + * <code>cnv = ucnv_openPackage("myapp", "myconverter", &err);</code> + * </p> + * + * @param packageName name of the package (equivalent to 'path' in udata_open() call) + * @param converterName name of the data item to be used, without suffix. + * @param err outgoing error status <TT>U_MEMORY_ALLOCATION_ERROR, U_FILE_ACCESS_ERROR</TT> + * @return the created Unicode converter object, or <TT>NULL</TT> if an error occurred + * @see udata_open + * @see ucnv_open + * @see ucnv_safeClone + * @see ucnv_close + * @stable ICU 2.2 + */ +U_CAPI UConverter* U_EXPORT2 +ucnv_openPackage(const char *packageName, const char *converterName, UErrorCode *err); + +/** + * Thread safe converter cloning operation. + * For most efficient operation, pass in a stackBuffer (and a *pBufferSize) + * with at least U_CNV_SAFECLONE_BUFFERSIZE bytes of space. + * If the buffer size is sufficient, then the clone will use the stack buffer; + * otherwise, it will be allocated, and *pBufferSize will indicate + * the actual size. (This should not occur with U_CNV_SAFECLONE_BUFFERSIZE.) + * + * You must ucnv_close() the clone in any case. + * + * If *pBufferSize==0, (regardless of whether stackBuffer==NULL or not) + * then *pBufferSize will be changed to a sufficient size + * for cloning this converter, + * without actually cloning the converter ("pure pre-flighting"). + * + * If *pBufferSize is greater than zero but not large enough for a stack-based + * clone, then the converter is cloned using newly allocated memory + * and *pBufferSize is changed to the necessary size. + * + * If the converter clone fits into the stack buffer but the stack buffer is not + * sufficiently aligned for the clone, then the clone will use an + * adjusted pointer and use an accordingly smaller buffer size. + * + * @param cnv converter to be cloned + * @param stackBuffer <em>Deprecated functionality as of ICU 52, use NULL.</em><br> + * user allocated space for the new clone. If NULL new memory will be allocated. + * If buffer is not large enough, new memory will be allocated. + * Clients can use the U_CNV_SAFECLONE_BUFFERSIZE. This will probably be enough to avoid memory allocations. + * @param pBufferSize <em>Deprecated functionality as of ICU 52, use NULL or 1.</em><br> + * pointer to size of allocated space. + * @param status to indicate whether the operation went on smoothly or there were errors + * An informational status value, U_SAFECLONE_ALLOCATED_WARNING, + * is used if any allocations were necessary. + * However, it is better to check if *pBufferSize grew for checking for + * allocations because warning codes can be overridden by subsequent + * function calls. + * @return pointer to the new clone + * @stable ICU 2.0 + */ +U_CAPI UConverter * U_EXPORT2 +ucnv_safeClone(const UConverter *cnv, + void *stackBuffer, + int32_t *pBufferSize, + UErrorCode *status); + +#ifndef U_HIDE_DEPRECATED_API + +/** + * \def U_CNV_SAFECLONE_BUFFERSIZE + * Definition of a buffer size that is designed to be large enough for + * converters to be cloned with ucnv_safeClone(). + * @deprecated ICU 52. Do not rely on ucnv_safeClone() cloning into any provided buffer. + */ +#define U_CNV_SAFECLONE_BUFFERSIZE 1024 + +#endif /* U_HIDE_DEPRECATED_API */ + +/** + * Deletes the unicode converter and releases resources associated + * with just this instance. + * Does not free up shared converter tables. + * + * @param converter the converter object to be deleted + * @see ucnv_open + * @see ucnv_openU + * @see ucnv_openCCSID + * @stable ICU 2.0 + */ +U_CAPI void U_EXPORT2 +ucnv_close(UConverter * converter); + +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN + +/** + * \class LocalUConverterPointer + * "Smart pointer" class, closes a UConverter via ucnv_close(). + * For most methods see the LocalPointerBase base class. + * + * @see LocalPointerBase + * @see LocalPointer + * @stable ICU 4.4 + */ +U_DEFINE_LOCAL_OPEN_POINTER(LocalUConverterPointer, UConverter, ucnv_close); + +U_NAMESPACE_END + +#endif + +/** + * Fills in the output parameter, subChars, with the substitution characters + * as multiple bytes. + * If ucnv_setSubstString() set a Unicode string because the converter is + * stateful, then subChars will be an empty string. + * + * @param converter the Unicode converter + * @param subChars the substitution characters + * @param len on input the capacity of subChars, on output the number + * of bytes copied to it + * @param err the outgoing error status code. + * If the substitution character array is too small, an + * <TT>U_INDEX_OUTOFBOUNDS_ERROR</TT> will be returned. + * @see ucnv_setSubstString + * @see ucnv_setSubstChars + * @stable ICU 2.0 + */ +U_CAPI void U_EXPORT2 +ucnv_getSubstChars(const UConverter *converter, + char *subChars, + int8_t *len, + UErrorCode *err); + +/** + * Sets the substitution chars when converting from unicode to a codepage. The + * substitution is specified as a string of 1-4 bytes, and may contain + * <TT>NULL</TT> bytes. + * The subChars must represent a single character. The caller needs to know the + * byte sequence of a valid character in the converter's charset. + * For some converters, for example some ISO 2022 variants, only single-byte + * substitution characters may be supported. + * The newer ucnv_setSubstString() function relaxes these limitations. + * + * @param converter the Unicode converter + * @param subChars the substitution character byte sequence we want set + * @param len the number of bytes in subChars + * @param err the error status code. <TT>U_INDEX_OUTOFBOUNDS_ERROR </TT> if + * len is bigger than the maximum number of bytes allowed in subchars + * @see ucnv_setSubstString + * @see ucnv_getSubstChars + * @stable ICU 2.0 + */ +U_CAPI void U_EXPORT2 +ucnv_setSubstChars(UConverter *converter, + const char *subChars, + int8_t len, + UErrorCode *err); + +/** + * Set a substitution string for converting from Unicode to a charset. + * The caller need not know the charset byte sequence for each charset. + * + * Unlike ucnv_setSubstChars() which is designed to set a charset byte sequence + * for a single character, this function takes a Unicode string with + * zero, one or more characters, and immediately verifies that the string can be + * converted to the charset. + * If not, or if the result is too long (more than 32 bytes as of ICU 3.6), + * then the function returns with an error accordingly. + * + * Also unlike ucnv_setSubstChars(), this function works for stateful charsets + * by converting on the fly at the point of substitution rather than setting + * a fixed byte sequence. + * + * @param cnv The UConverter object. + * @param s The Unicode string. + * @param length The number of UChars in s, or -1 for a NUL-terminated string. + * @param err Pointer to a standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * + * @see ucnv_setSubstChars + * @see ucnv_getSubstChars + * @stable ICU 3.6 + */ +U_CAPI void U_EXPORT2 +ucnv_setSubstString(UConverter *cnv, + const UChar *s, + int32_t length, + UErrorCode *err); + +/** + * Fills in the output parameter, errBytes, with the error characters from the + * last failing conversion. + * + * @param converter the Unicode converter + * @param errBytes the codepage bytes which were in error + * @param len on input the capacity of errBytes, on output the number of + * bytes which were copied to it + * @param err the error status code. + * If the substitution character array is too small, an + * <TT>U_INDEX_OUTOFBOUNDS_ERROR</TT> will be returned. + * @stable ICU 2.0 + */ +U_CAPI void U_EXPORT2 +ucnv_getInvalidChars(const UConverter *converter, + char *errBytes, + int8_t *len, + UErrorCode *err); + +/** + * Fills in the output parameter, errChars, with the error characters from the + * last failing conversion. + * + * @param converter the Unicode converter + * @param errUChars the UChars which were in error + * @param len on input the capacity of errUChars, on output the number of + * UChars which were copied to it + * @param err the error status code. + * If the substitution character array is too small, an + * <TT>U_INDEX_OUTOFBOUNDS_ERROR</TT> will be returned. + * @stable ICU 2.0 + */ +U_CAPI void U_EXPORT2 +ucnv_getInvalidUChars(const UConverter *converter, + UChar *errUChars, + int8_t *len, + UErrorCode *err); + +/** + * Resets the state of a converter to the default state. This is used + * in the case of an error, to restart a conversion from a known default state. + * It will also empty the internal output buffers. + * @param converter the Unicode converter + * @stable ICU 2.0 + */ +U_CAPI void U_EXPORT2 +ucnv_reset(UConverter *converter); + +/** + * Resets the to-Unicode part of a converter state to the default state. + * This is used in the case of an error to restart a conversion to + * Unicode to a known default state. It will also empty the internal + * output buffers used for the conversion to Unicode codepoints. + * @param converter the Unicode converter + * @stable ICU 2.0 + */ +U_CAPI void U_EXPORT2 +ucnv_resetToUnicode(UConverter *converter); + +/** + * Resets the from-Unicode part of a converter state to the default state. + * This is used in the case of an error to restart a conversion from + * Unicode to a known default state. It will also empty the internal output + * buffers used for the conversion from Unicode codepoints. + * @param converter the Unicode converter + * @stable ICU 2.0 + */ +U_CAPI void U_EXPORT2 +ucnv_resetFromUnicode(UConverter *converter); + +/** + * Returns the maximum number of bytes that are output per UChar in conversion + * from Unicode using this converter. + * The returned number can be used with UCNV_GET_MAX_BYTES_FOR_STRING + * to calculate the size of a target buffer for conversion from Unicode. + * + * Note: Before ICU 2.8, this function did not return reliable numbers for + * some stateful converters (EBCDIC_STATEFUL, ISO-2022) and LMBCS. + * + * This number may not be the same as the maximum number of bytes per + * "conversion unit". In other words, it may not be the intuitively expected + * number of bytes per character that would be published for a charset, + * and may not fulfill any other purpose than the allocation of an output + * buffer of guaranteed sufficient size for a given input length and converter. + * + * Examples for special cases that are taken into account: + * - Supplementary code points may convert to more bytes than BMP code points. + * This function returns bytes per UChar (UTF-16 code unit), not per + * Unicode code point, for efficient buffer allocation. + * - State-shifting output (SI/SO, escapes, etc.) from stateful converters. + * - When m input UChars are converted to n output bytes, then the maximum m/n + * is taken into account. + * + * The number returned here does not take into account + * (see UCNV_GET_MAX_BYTES_FOR_STRING): + * - callbacks which output more than one charset character sequence per call, + * like escape callbacks + * - initial and final non-character bytes that are output by some converters + * (automatic BOMs, initial escape sequence, final SI, etc.) + * + * Examples for returned values: + * - SBCS charsets: 1 + * - Shift-JIS: 2 + * - UTF-16: 2 (2 per BMP, 4 per surrogate _pair_, BOM not counted) + * - UTF-8: 3 (3 per BMP, 4 per surrogate _pair_) + * - EBCDIC_STATEFUL (EBCDIC mixed SBCS/DBCS): 3 (SO + DBCS) + * - ISO-2022: 3 (always outputs UTF-8) + * - ISO-2022-JP: 6 (4-byte escape sequences + DBCS) + * - ISO-2022-CN: 8 (4-byte designator sequences + 2-byte SS2/SS3 + DBCS) + * + * @param converter The Unicode converter. + * @return The maximum number of bytes per UChar (16 bit code unit) + * that are output by ucnv_fromUnicode(), + * to be used together with UCNV_GET_MAX_BYTES_FOR_STRING + * for buffer allocation. + * + * @see UCNV_GET_MAX_BYTES_FOR_STRING + * @see ucnv_getMinCharSize + * @stable ICU 2.0 + */ +U_CAPI int8_t U_EXPORT2 +ucnv_getMaxCharSize(const UConverter *converter); + +/** + * Calculates the size of a buffer for conversion from Unicode to a charset. + * The calculated size is guaranteed to be sufficient for this conversion. + * + * It takes into account initial and final non-character bytes that are output + * by some converters. + * It does not take into account callbacks which output more than one charset + * character sequence per call, like escape callbacks. + * The default (substitution) callback only outputs one charset character sequence. + * + * @param length Number of UChars to be converted. + * @param maxCharSize Return value from ucnv_getMaxCharSize() for the converter + * that will be used. + * @return Size of a buffer that will be large enough to hold the output bytes of + * converting length UChars with the converter that returned the maxCharSize. + * + * @see ucnv_getMaxCharSize + * @stable ICU 2.8 + */ +#define UCNV_GET_MAX_BYTES_FOR_STRING(length, maxCharSize) \ + (((int32_t)(length)+10)*(int32_t)(maxCharSize)) + +/** + * Returns the minimum byte length (per codepoint) for characters in this codepage. + * This is usually either 1 or 2. + * @param converter the Unicode converter + * @return the minimum number of bytes per codepoint allowed by this particular converter + * @see ucnv_getMaxCharSize + * @stable ICU 2.0 + */ +U_CAPI int8_t U_EXPORT2 +ucnv_getMinCharSize(const UConverter *converter); + +/** + * Returns the display name of the converter passed in based on the Locale + * passed in. If the locale contains no display name, the internal ASCII + * name will be filled in. + * + * @param converter the Unicode converter. + * @param displayLocale is the specific Locale we want to localized for + * @param displayName user provided buffer to be filled in + * @param displayNameCapacity size of displayName Buffer + * @param err error status code + * @return displayNameLength number of UChar needed in displayName + * @see ucnv_getName + * @stable ICU 2.0 + */ +U_CAPI int32_t U_EXPORT2 +ucnv_getDisplayName(const UConverter *converter, + const char *displayLocale, + UChar *displayName, + int32_t displayNameCapacity, + UErrorCode *err); + +/** + * Gets the internal, canonical name of the converter (zero-terminated). + * The lifetime of the returned string will be that of the converter + * passed to this function. + * @param converter the Unicode converter + * @param err UErrorCode status + * @return the internal name of the converter + * @see ucnv_getDisplayName + * @stable ICU 2.0 + */ +U_CAPI const char * U_EXPORT2 +ucnv_getName(const UConverter *converter, UErrorCode *err); + +/** + * Gets a codepage number associated with the converter. This is not guaranteed + * to be the one used to create the converter. Some converters do not represent + * platform registered codepages and return zero for the codepage number. + * The error code fill-in parameter indicates if the codepage number + * is available. + * Does not check if the converter is <TT>NULL</TT> or if converter's data + * table is <TT>NULL</TT>. + * + * Important: The use of CCSIDs is not recommended because it is limited + * to only two platforms in principle and only one (UCNV_IBM) in the current + * ICU converter API. + * Also, CCSIDs are insufficient to identify IBM Unicode conversion tables precisely. + * For more details see ucnv_openCCSID(). + * + * @param converter the Unicode converter + * @param err the error status code. + * @return If any error occurs, -1 will be returned otherwise, the codepage number + * will be returned + * @see ucnv_openCCSID + * @see ucnv_getPlatform + * @stable ICU 2.0 + */ +U_CAPI int32_t U_EXPORT2 +ucnv_getCCSID(const UConverter *converter, + UErrorCode *err); + +/** + * Gets a codepage platform associated with the converter. Currently, + * only <TT>UCNV_IBM</TT> will be returned. + * Does not test if the converter is <TT>NULL</TT> or if converter's data + * table is <TT>NULL</TT>. + * @param converter the Unicode converter + * @param err the error status code. + * @return The codepage platform + * @stable ICU 2.0 + */ +U_CAPI UConverterPlatform U_EXPORT2 +ucnv_getPlatform(const UConverter *converter, + UErrorCode *err); + +/** + * Gets the type of the converter + * e.g. SBCS, MBCS, DBCS, UTF8, UTF16_BE, UTF16_LE, ISO_2022, + * EBCDIC_STATEFUL, LATIN_1 + * @param converter a valid, opened converter + * @return the type of the converter + * @stable ICU 2.0 + */ +U_CAPI UConverterType U_EXPORT2 +ucnv_getType(const UConverter * converter); + +/** + * Gets the "starter" (lead) bytes for converters of type MBCS. + * Will fill in an <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> if converter passed in + * is not MBCS. Fills in an array of type UBool, with the value of the byte + * as offset to the array. For example, if (starters[0x20] == true) at return, + * it means that the byte 0x20 is a starter byte in this converter. + * Context pointers are always owned by the caller. + * + * @param converter a valid, opened converter of type MBCS + * @param starters an array of size 256 to be filled in + * @param err error status, <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> if the + * converter is not a type which can return starters. + * @see ucnv_getType + * @stable ICU 2.0 + */ +U_CAPI void U_EXPORT2 +ucnv_getStarters(const UConverter* converter, + UBool starters[256], + UErrorCode* err); + + +/** + * Selectors for Unicode sets that can be returned by ucnv_getUnicodeSet(). + * @see ucnv_getUnicodeSet + * @stable ICU 2.6 + */ +typedef enum UConverterUnicodeSet { + /** Select the set of roundtrippable Unicode code points. @stable ICU 2.6 */ + UCNV_ROUNDTRIP_SET, + /** Select the set of Unicode code points with roundtrip or fallback mappings. @stable ICU 4.0 */ + UCNV_ROUNDTRIP_AND_FALLBACK_SET, +#ifndef U_HIDE_DEPRECATED_API + /** + * Number of UConverterUnicodeSet selectors. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + UCNV_SET_COUNT +#endif // U_HIDE_DEPRECATED_API +} UConverterUnicodeSet; + + +/** + * Returns the set of Unicode code points that can be converted by an ICU converter. + * + * Returns one of several kinds of set: + * + * 1. UCNV_ROUNDTRIP_SET + * + * The set of all Unicode code points that can be roundtrip-converted + * (converted without any data loss) with the converter (ucnv_fromUnicode()). + * This set will not include code points that have fallback mappings + * or are only the result of reverse fallback mappings. + * This set will also not include PUA code points with fallbacks, although + * ucnv_fromUnicode() will always uses those mappings despite ucnv_setFallback(). + * See UTR #22 "Character Mapping Markup Language" + * at http://www.unicode.org/reports/tr22/ + * + * This is useful for example for + * - checking that a string or document can be roundtrip-converted with a converter, + * without/before actually performing the conversion + * - testing if a converter can be used for text for typical text for a certain locale, + * by comparing its roundtrip set with the set of ExemplarCharacters from + * ICU's locale data or other sources + * + * 2. UCNV_ROUNDTRIP_AND_FALLBACK_SET + * + * The set of all Unicode code points that can be converted with the converter (ucnv_fromUnicode()) + * when fallbacks are turned on (see ucnv_setFallback()). + * This set includes all code points with roundtrips and fallbacks (but not reverse fallbacks). + * + * In the future, there may be more UConverterUnicodeSet choices to select + * sets with different properties. + * + * @param cnv The converter for which a set is requested. + * @param setFillIn A valid USet *. It will be cleared by this function before + * the converter's specific set is filled into the USet. + * @param whichSet A UConverterUnicodeSet selector; + * currently UCNV_ROUNDTRIP_SET is the only supported value. + * @param pErrorCode ICU error code in/out parameter. + * Must fulfill U_SUCCESS before the function call. + * + * @see UConverterUnicodeSet + * @see uset_open + * @see uset_close + * @stable ICU 2.6 + */ +U_CAPI void U_EXPORT2 +ucnv_getUnicodeSet(const UConverter *cnv, + USet *setFillIn, + UConverterUnicodeSet whichSet, + UErrorCode *pErrorCode); + +/** + * Gets the current calback function used by the converter when an illegal + * or invalid codepage sequence is found. + * Context pointers are always owned by the caller. + * + * @param converter the unicode converter + * @param action fillin: returns the callback function pointer + * @param context fillin: returns the callback's private void* context + * @see ucnv_setToUCallBack + * @stable ICU 2.0 + */ +U_CAPI void U_EXPORT2 +ucnv_getToUCallBack (const UConverter * converter, + UConverterToUCallback *action, + const void **context); + +/** + * Gets the current callback function used by the converter when illegal + * or invalid Unicode sequence is found. + * Context pointers are always owned by the caller. + * + * @param converter the unicode converter + * @param action fillin: returns the callback function pointer + * @param context fillin: returns the callback's private void* context + * @see ucnv_setFromUCallBack + * @stable ICU 2.0 + */ +U_CAPI void U_EXPORT2 +ucnv_getFromUCallBack (const UConverter * converter, + UConverterFromUCallback *action, + const void **context); + +/** + * Changes the callback function used by the converter when + * an illegal or invalid sequence is found. + * Context pointers are always owned by the caller. + * Predefined actions and contexts can be found in the ucnv_err.h header. + * + * @param converter the unicode converter + * @param newAction the new callback function + * @param newContext the new toUnicode callback context pointer. This can be NULL. + * @param oldAction fillin: returns the old callback function pointer. This can be NULL. + * @param oldContext fillin: returns the old callback's private void* context. This can be NULL. + * @param err The error code status + * @see ucnv_getToUCallBack + * @stable ICU 2.0 + */ +U_CAPI void U_EXPORT2 +ucnv_setToUCallBack (UConverter * converter, + UConverterToUCallback newAction, + const void* newContext, + UConverterToUCallback *oldAction, + const void** oldContext, + UErrorCode * err); + +/** + * Changes the current callback function used by the converter when + * an illegal or invalid sequence is found. + * Context pointers are always owned by the caller. + * Predefined actions and contexts can be found in the ucnv_err.h header. + * + * @param converter the unicode converter + * @param newAction the new callback function + * @param newContext the new fromUnicode callback context pointer. This can be NULL. + * @param oldAction fillin: returns the old callback function pointer. This can be NULL. + * @param oldContext fillin: returns the old callback's private void* context. This can be NULL. + * @param err The error code status + * @see ucnv_getFromUCallBack + * @stable ICU 2.0 + */ +U_CAPI void U_EXPORT2 +ucnv_setFromUCallBack (UConverter * converter, + UConverterFromUCallback newAction, + const void *newContext, + UConverterFromUCallback *oldAction, + const void **oldContext, + UErrorCode * err); + +/** + * Converts an array of unicode characters to an array of codepage + * characters. This function is optimized for converting a continuous + * stream of data in buffer-sized chunks, where the entire source and + * target does not fit in available buffers. + * + * The source pointer is an in/out parameter. It starts out pointing where the + * conversion is to begin, and ends up pointing after the last UChar consumed. + * + * Target similarly starts out pointer at the first available byte in the output + * buffer, and ends up pointing after the last byte written to the output. + * + * The converter always attempts to consume the entire source buffer, unless + * (1.) the target buffer is full, or (2.) a failing error is returned from the + * current callback function. When a successful error status has been + * returned, it means that all of the source buffer has been + * consumed. At that point, the caller should reset the source and + * sourceLimit pointers to point to the next chunk. + * + * At the end of the stream (flush==true), the input is completely consumed + * when *source==sourceLimit and no error code is set. + * The converter object is then automatically reset by this function. + * (This means that a converter need not be reset explicitly between data + * streams if it finishes the previous stream without errors.) + * + * This is a <I>stateful</I> conversion. Additionally, even when all source data has + * been consumed, some data may be in the converters' internal state. + * Call this function repeatedly, updating the target pointers with + * the next empty chunk of target in case of a + * <TT>U_BUFFER_OVERFLOW_ERROR</TT>, and updating the source pointers + * with the next chunk of source when a successful error status is + * returned, until there are no more chunks of source data. + * @param converter the Unicode converter + * @param target I/O parameter. Input : Points to the beginning of the buffer to copy + * codepage characters to. Output : points to after the last codepage character copied + * to <TT>target</TT>. + * @param targetLimit the pointer just after last of the <TT>target</TT> buffer + * @param source I/O parameter, pointer to pointer to the source Unicode character buffer. + * @param sourceLimit the pointer just after the last of the source buffer + * @param offsets if NULL is passed, nothing will happen to it, otherwise it needs to have the same number + * of allocated cells as <TT>target</TT>. Will fill in offsets from target to source pointer + * e.g: <TT>offsets[3]</TT> is equal to 6, it means that the <TT>target[3]</TT> was a result of transcoding <TT>source[6]</TT> + * For output data carried across calls, and other data without a specific source character + * (such as from escape sequences or callbacks) -1 will be placed for offsets. + * @param flush set to <TT>true</TT> if the current source buffer is the last available + * chunk of the source, <TT>false</TT> otherwise. Note that if a failing status is returned, + * this function may have to be called multiple times with flush set to <TT>true</TT> until + * the source buffer is consumed. + * @param err the error status. <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> will be set if the + * converter is <TT>NULL</TT>. + * <code>U_BUFFER_OVERFLOW_ERROR</code> will be set if the target is full and there is + * still data to be written to the target. + * @see ucnv_fromUChars + * @see ucnv_convert + * @see ucnv_getMinCharSize + * @see ucnv_setToUCallBack + * @stable ICU 2.0 + */ +U_CAPI void U_EXPORT2 +ucnv_fromUnicode (UConverter * converter, + char **target, + const char *targetLimit, + const UChar ** source, + const UChar * sourceLimit, + int32_t* offsets, + UBool flush, + UErrorCode * err); + +/** + * Converts a buffer of codepage bytes into an array of unicode UChars + * characters. This function is optimized for converting a continuous + * stream of data in buffer-sized chunks, where the entire source and + * target does not fit in available buffers. + * + * The source pointer is an in/out parameter. It starts out pointing where the + * conversion is to begin, and ends up pointing after the last byte of source consumed. + * + * Target similarly starts out pointer at the first available UChar in the output + * buffer, and ends up pointing after the last UChar written to the output. + * It does NOT necessarily keep UChar sequences together. + * + * The converter always attempts to consume the entire source buffer, unless + * (1.) the target buffer is full, or (2.) a failing error is returned from the + * current callback function. When a successful error status has been + * returned, it means that all of the source buffer has been + * consumed. At that point, the caller should reset the source and + * sourceLimit pointers to point to the next chunk. + * + * At the end of the stream (flush==true), the input is completely consumed + * when *source==sourceLimit and no error code is set + * The converter object is then automatically reset by this function. + * (This means that a converter need not be reset explicitly between data + * streams if it finishes the previous stream without errors.) + * + * This is a <I>stateful</I> conversion. Additionally, even when all source data has + * been consumed, some data may be in the converters' internal state. + * Call this function repeatedly, updating the target pointers with + * the next empty chunk of target in case of a + * <TT>U_BUFFER_OVERFLOW_ERROR</TT>, and updating the source pointers + * with the next chunk of source when a successful error status is + * returned, until there are no more chunks of source data. + * @param converter the Unicode converter + * @param target I/O parameter. Input : Points to the beginning of the buffer to copy + * UChars into. Output : points to after the last UChar copied. + * @param targetLimit the pointer just after the end of the <TT>target</TT> buffer + * @param source I/O parameter, pointer to pointer to the source codepage buffer. + * @param sourceLimit the pointer to the byte after the end of the source buffer + * @param offsets if NULL is passed, nothing will happen to it, otherwise it needs to have the same number + * of allocated cells as <TT>target</TT>. Will fill in offsets from target to source pointer + * e.g: <TT>offsets[3]</TT> is equal to 6, it means that the <TT>target[3]</TT> was a result of transcoding <TT>source[6]</TT> + * For output data carried across calls, and other data without a specific source character + * (such as from escape sequences or callbacks) -1 will be placed for offsets. + * @param flush set to <TT>true</TT> if the current source buffer is the last available + * chunk of the source, <TT>false</TT> otherwise. Note that if a failing status is returned, + * this function may have to be called multiple times with flush set to <TT>true</TT> until + * the source buffer is consumed. + * @param err the error status. <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> will be set if the + * converter is <TT>NULL</TT>. + * <code>U_BUFFER_OVERFLOW_ERROR</code> will be set if the target is full and there is + * still data to be written to the target. + * @see ucnv_fromUChars + * @see ucnv_convert + * @see ucnv_getMinCharSize + * @see ucnv_setFromUCallBack + * @see ucnv_getNextUChar + * @stable ICU 2.0 + */ +U_CAPI void U_EXPORT2 +ucnv_toUnicode(UConverter *converter, + UChar **target, + const UChar *targetLimit, + const char **source, + const char *sourceLimit, + int32_t *offsets, + UBool flush, + UErrorCode *err); + +/** + * Convert the Unicode string into a codepage string using an existing UConverter. + * The output string is NUL-terminated if possible. + * + * This function is a more convenient but less powerful version of ucnv_fromUnicode(). + * It is only useful for whole strings, not for streaming conversion. + * + * The maximum output buffer capacity required (barring output from callbacks) will be + * UCNV_GET_MAX_BYTES_FOR_STRING(srcLength, ucnv_getMaxCharSize(cnv)). + * + * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called) + * @param src the input Unicode string + * @param srcLength the input string length, or -1 if NUL-terminated + * @param dest destination string buffer, can be NULL if destCapacity==0 + * @param destCapacity the number of chars available at dest + * @param pErrorCode normal ICU error code; + * common error codes that may be set by this function include + * U_BUFFER_OVERFLOW_ERROR, U_STRING_NOT_TERMINATED_WARNING, + * U_ILLEGAL_ARGUMENT_ERROR, and conversion errors + * @return the length of the output string, not counting the terminating NUL; + * if the length is greater than destCapacity, then the string will not fit + * and a buffer of the indicated length would need to be passed in + * @see ucnv_fromUnicode + * @see ucnv_convert + * @see UCNV_GET_MAX_BYTES_FOR_STRING + * @stable ICU 2.0 + */ +U_CAPI int32_t U_EXPORT2 +ucnv_fromUChars(UConverter *cnv, + char *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + UErrorCode *pErrorCode); + +/** + * Convert the codepage string into a Unicode string using an existing UConverter. + * The output string is NUL-terminated if possible. + * + * This function is a more convenient but less powerful version of ucnv_toUnicode(). + * It is only useful for whole strings, not for streaming conversion. + * + * The maximum output buffer capacity required (barring output from callbacks) will be + * 2*srcLength (each char may be converted into a surrogate pair). + * + * @param cnv the converter object to be used (ucnv_resetToUnicode() will be called) + * @param src the input codepage string + * @param srcLength the input string length, or -1 if NUL-terminated + * @param dest destination string buffer, can be NULL if destCapacity==0 + * @param destCapacity the number of UChars available at dest + * @param pErrorCode normal ICU error code; + * common error codes that may be set by this function include + * U_BUFFER_OVERFLOW_ERROR, U_STRING_NOT_TERMINATED_WARNING, + * U_ILLEGAL_ARGUMENT_ERROR, and conversion errors + * @return the length of the output string, not counting the terminating NUL; + * if the length is greater than destCapacity, then the string will not fit + * and a buffer of the indicated length would need to be passed in + * @see ucnv_toUnicode + * @see ucnv_convert + * @stable ICU 2.0 + */ +U_CAPI int32_t U_EXPORT2 +ucnv_toUChars(UConverter *cnv, + UChar *dest, int32_t destCapacity, + const char *src, int32_t srcLength, + UErrorCode *pErrorCode); + +/** + * Convert a codepage buffer into Unicode one character at a time. + * The input is completely consumed when the U_INDEX_OUTOFBOUNDS_ERROR is set. + * + * Advantage compared to ucnv_toUnicode() or ucnv_toUChars(): + * - Faster for small amounts of data, for most converters, e.g., + * US-ASCII, ISO-8859-1, UTF-8/16/32, and most "normal" charsets. + * (For complex converters, e.g., SCSU, UTF-7 and ISO 2022 variants, + * it uses ucnv_toUnicode() internally.) + * - Convenient. + * + * Limitations compared to ucnv_toUnicode(): + * - Always assumes flush=true. + * This makes ucnv_getNextUChar() unsuitable for "streaming" conversion, + * that is, for where the input is supplied in multiple buffers, + * because ucnv_getNextUChar() will assume the end of the input at the end + * of the first buffer. + * - Does not provide offset output. + * + * It is possible to "mix" ucnv_getNextUChar() and ucnv_toUnicode() because + * ucnv_getNextUChar() uses the current state of the converter + * (unlike ucnv_toUChars() which always resets first). + * However, if ucnv_getNextUChar() is called after ucnv_toUnicode() + * stopped in the middle of a character sequence (with flush=false), + * then ucnv_getNextUChar() will always use the slower ucnv_toUnicode() + * internally until the next character boundary. + * (This is new in ICU 2.6. In earlier releases, ucnv_getNextUChar() had to + * start at a character boundary.) + * + * Instead of using ucnv_getNextUChar(), it is recommended + * to convert using ucnv_toUnicode() or ucnv_toUChars() + * and then iterate over the text using U16_NEXT() or a UCharIterator (uiter.h) + * or a C++ CharacterIterator or similar. + * This allows streaming conversion and offset output, for example. + * + * <p>Handling of surrogate pairs and supplementary-plane code points:<br> + * There are two different kinds of codepages that provide mappings for surrogate characters: + * <ul> + * <li>Codepages like UTF-8, UTF-32, and GB 18030 provide direct representations for Unicode + * code points U+10000-U+10ffff as well as for single surrogates U+d800-U+dfff. + * Each valid sequence will result in exactly one returned code point. + * If a sequence results in a single surrogate, then that will be returned + * by itself, even if a neighboring sequence encodes the matching surrogate.</li> + * <li>Codepages like SCSU and LMBCS (and UTF-16) provide direct representations only for BMP code points + * including surrogates. Code points in supplementary planes are represented with + * two sequences, each encoding a surrogate. + * For these codepages, matching pairs of surrogates will be combined into single + * code points for returning from this function. + * (Note that SCSU is actually a mix of these codepage types.)</li> + * </ul></p> + * + * @param converter an open UConverter + * @param source the address of a pointer to the codepage buffer, will be + * updated to point after the bytes consumed in the conversion call. + * @param sourceLimit points to the end of the input buffer + * @param err fills in error status (see ucnv_toUnicode) + * <code>U_INDEX_OUTOFBOUNDS_ERROR</code> will be set if the input + * is empty or does not convert to any output (e.g.: pure state-change + * codes SI/SO, escape sequences for ISO 2022, + * or if the callback did not output anything, ...). + * This function will not set a <code>U_BUFFER_OVERFLOW_ERROR</code> because + * the "buffer" is the return code. However, there might be subsequent output + * stored in the converter object + * that will be returned in following calls to this function. + * @return a UChar32 resulting from the partial conversion of source + * @see ucnv_toUnicode + * @see ucnv_toUChars + * @see ucnv_convert + * @stable ICU 2.0 + */ +U_CAPI UChar32 U_EXPORT2 +ucnv_getNextUChar(UConverter * converter, + const char **source, + const char * sourceLimit, + UErrorCode * err); + +/** + * Convert from one external charset to another using two existing UConverters. + * Internally, two conversions - ucnv_toUnicode() and ucnv_fromUnicode() - + * are used, "pivoting" through 16-bit Unicode. + * + * Important: For streaming conversion (multiple function calls for successive + * parts of a text stream), the caller must provide a pivot buffer explicitly, + * and must preserve the pivot buffer and associated pointers from one + * call to another. (The buffer may be moved if its contents and the relative + * pointer positions are preserved.) + * + * There is a similar function, ucnv_convert(), + * which has the following limitations: + * - it takes charset names, not converter objects, so that + * - two converters are opened for each call + * - only single-string conversion is possible, not streaming operation + * - it does not provide enough information to find out, + * in case of failure, whether the toUnicode or + * the fromUnicode conversion failed + * + * By contrast, ucnv_convertEx() + * - takes UConverter parameters instead of charset names + * - fully exposes the pivot buffer for streaming conversion and complete error handling + * + * ucnv_convertEx() also provides further convenience: + * - an option to reset the converters at the beginning + * (if reset==true, see parameters; + * also sets *pivotTarget=*pivotSource=pivotStart) + * - allow NUL-terminated input + * (only a single NUL byte, will not work for charsets with multi-byte NULs) + * (if sourceLimit==NULL, see parameters) + * - terminate with a NUL on output + * (only a single NUL byte, not useful for charsets with multi-byte NULs), + * or set U_STRING_NOT_TERMINATED_WARNING if the output exactly fills + * the target buffer + * - the pivot buffer can be provided internally; + * possible only for whole-string conversion, not streaming conversion; + * in this case, the caller will not be able to get details about where an + * error occurred + * (if pivotStart==NULL, see below) + * + * The function returns when one of the following is true: + * - the entire source text has been converted successfully to the target buffer + * - a target buffer overflow occurred (U_BUFFER_OVERFLOW_ERROR) + * - a conversion error occurred + * (other U_FAILURE(), see description of pErrorCode) + * + * Limitation compared to the direct use of + * ucnv_fromUnicode() and ucnv_toUnicode(): + * ucnv_convertEx() does not provide offset information. + * + * Limitation compared to ucnv_fromUChars() and ucnv_toUChars(): + * ucnv_convertEx() does not support preflighting directly. + * + * Sample code for converting a single string from + * one external charset to UTF-8, ignoring the location of errors: + * + * \code + * int32_t + * myToUTF8(UConverter *cnv, + * const char *s, int32_t length, + * char *u8, int32_t capacity, + * UErrorCode *pErrorCode) { + * UConverter *utf8Cnv; + * char *target; + * + * if(U_FAILURE(*pErrorCode)) { + * return 0; + * } + * + * utf8Cnv=myGetCachedUTF8Converter(pErrorCode); + * if(U_FAILURE(*pErrorCode)) { + * return 0; + * } + * + * if(length<0) { + * length=strlen(s); + * } + * target=u8; + * ucnv_convertEx(utf8Cnv, cnv, + * &target, u8+capacity, + * &s, s+length, + * NULL, NULL, NULL, NULL, + * true, true, + * pErrorCode); + * + * myReleaseCachedUTF8Converter(utf8Cnv); + * + * // return the output string length, but without preflighting + * return (int32_t)(target-u8); + * } + * \endcode + * + * @param targetCnv Output converter, used to convert from the UTF-16 pivot + * to the target using ucnv_fromUnicode(). + * @param sourceCnv Input converter, used to convert from the source to + * the UTF-16 pivot using ucnv_toUnicode(). + * @param target I/O parameter, same as for ucnv_fromUChars(). + * Input: *target points to the beginning of the target buffer. + * Output: *target points to the first unit after the last char written. + * @param targetLimit Pointer to the first unit after the target buffer. + * @param source I/O parameter, same as for ucnv_toUChars(). + * Input: *source points to the beginning of the source buffer. + * Output: *source points to the first unit after the last char read. + * @param sourceLimit Pointer to the first unit after the source buffer. + * @param pivotStart Pointer to the UTF-16 pivot buffer. If pivotStart==NULL, + * then an internal buffer is used and the other pivot + * arguments are ignored and can be NULL as well. + * @param pivotSource I/O parameter, same as source in ucnv_fromUChars() for + * conversion from the pivot buffer to the target buffer. + * @param pivotTarget I/O parameter, same as target in ucnv_toUChars() for + * conversion from the source buffer to the pivot buffer. + * It must be pivotStart<=*pivotSource<=*pivotTarget<=pivotLimit + * and pivotStart<pivotLimit (unless pivotStart==NULL). + * @param pivotLimit Pointer to the first unit after the pivot buffer. + * @param reset If true, then ucnv_resetToUnicode(sourceCnv) and + * ucnv_resetFromUnicode(targetCnv) are called, and the + * pivot pointers are reset (*pivotTarget=*pivotSource=pivotStart). + * @param flush If true, indicates the end of the input. + * Passed directly to ucnv_toUnicode(), and carried over to + * ucnv_fromUnicode() when the source is empty as well. + * @param pErrorCode ICU error code in/out parameter. + * Must fulfill U_SUCCESS before the function call. + * U_BUFFER_OVERFLOW_ERROR always refers to the target buffer + * because overflows into the pivot buffer are handled internally. + * Other conversion errors are from the source-to-pivot + * conversion if *pivotSource==pivotStart, otherwise from + * the pivot-to-target conversion. + * + * @see ucnv_convert + * @see ucnv_fromAlgorithmic + * @see ucnv_toAlgorithmic + * @see ucnv_fromUnicode + * @see ucnv_toUnicode + * @see ucnv_fromUChars + * @see ucnv_toUChars + * @stable ICU 2.6 + */ +U_CAPI void U_EXPORT2 +ucnv_convertEx(UConverter *targetCnv, UConverter *sourceCnv, + char **target, const char *targetLimit, + const char **source, const char *sourceLimit, + UChar *pivotStart, UChar **pivotSource, + UChar **pivotTarget, const UChar *pivotLimit, + UBool reset, UBool flush, + UErrorCode *pErrorCode); + +/** + * Convert from one external charset to another. + * Internally, two converters are opened according to the name arguments, + * then the text is converted to and from the 16-bit Unicode "pivot" + * using ucnv_convertEx(), then the converters are closed again. + * + * This is a convenience function, not an efficient way to convert a lot of text: + * ucnv_convert() + * - takes charset names, not converter objects, so that + * - two converters are opened for each call + * - only single-string conversion is possible, not streaming operation + * - does not provide enough information to find out, + * in case of failure, whether the toUnicode or + * the fromUnicode conversion failed + * - allows NUL-terminated input + * (only a single NUL byte, will not work for charsets with multi-byte NULs) + * (if sourceLength==-1, see parameters) + * - terminate with a NUL on output + * (only a single NUL byte, not useful for charsets with multi-byte NULs), + * or set U_STRING_NOT_TERMINATED_WARNING if the output exactly fills + * the target buffer + * - a pivot buffer is provided internally + * + * The function returns when one of the following is true: + * - the entire source text has been converted successfully to the target buffer + * and either the target buffer is terminated with a single NUL byte + * or the error code is set to U_STRING_NOT_TERMINATED_WARNING + * - a target buffer overflow occurred (U_BUFFER_OVERFLOW_ERROR) + * and the full output string length is returned ("preflighting") + * - a conversion error occurred + * (other U_FAILURE(), see description of pErrorCode) + * + * @param toConverterName The name of the converter that is used to convert + * from the UTF-16 pivot buffer to the target. + * @param fromConverterName The name of the converter that is used to convert + * from the source to the UTF-16 pivot buffer. + * @param target Pointer to the output buffer. + * @param targetCapacity Capacity of the target, in bytes. + * @param source Pointer to the input buffer. + * @param sourceLength Length of the input text, in bytes, or -1 for NUL-terminated input. + * @param pErrorCode ICU error code in/out parameter. + * Must fulfill U_SUCCESS before the function call. + * @return Length of the complete output text in bytes, even if it exceeds the targetCapacity + * and a U_BUFFER_OVERFLOW_ERROR is set. + * + * @see ucnv_convertEx + * @see ucnv_fromAlgorithmic + * @see ucnv_toAlgorithmic + * @see ucnv_fromUnicode + * @see ucnv_toUnicode + * @see ucnv_fromUChars + * @see ucnv_toUChars + * @see ucnv_getNextUChar + * @stable ICU 2.0 + */ +U_CAPI int32_t U_EXPORT2 +ucnv_convert(const char *toConverterName, + const char *fromConverterName, + char *target, + int32_t targetCapacity, + const char *source, + int32_t sourceLength, + UErrorCode *pErrorCode); + +/** + * Convert from one external charset to another. + * Internally, the text is converted to and from the 16-bit Unicode "pivot" + * using ucnv_convertEx(). ucnv_toAlgorithmic() works exactly like ucnv_convert() + * except that the two converters need not be looked up and opened completely. + * + * The source-to-pivot conversion uses the cnv converter parameter. + * The pivot-to-target conversion uses a purely algorithmic converter + * according to the specified type, e.g., UCNV_UTF8 for a UTF-8 converter. + * + * Internally, the algorithmic converter is opened and closed for each + * function call, which is more efficient than using the public ucnv_open() + * but somewhat less efficient than only resetting an existing converter + * and using ucnv_convertEx(). + * + * This function is more convenient than ucnv_convertEx() for single-string + * conversions, especially when "preflighting" is desired (returning the length + * of the complete output even if it does not fit into the target buffer; + * see the User Guide Strings chapter). See ucnv_convert() for details. + * + * @param algorithmicType UConverterType constant identifying the desired target + * charset as a purely algorithmic converter. + * Those are converters for Unicode charsets like + * UTF-8, BOCU-1, SCSU, UTF-7, IMAP-mailbox-name, etc., + * as well as US-ASCII and ISO-8859-1. + * @param cnv The converter that is used to convert + * from the source to the UTF-16 pivot buffer. + * @param target Pointer to the output buffer. + * @param targetCapacity Capacity of the target, in bytes. + * @param source Pointer to the input buffer. + * @param sourceLength Length of the input text, in bytes + * @param pErrorCode ICU error code in/out parameter. + * Must fulfill U_SUCCESS before the function call. + * @return Length of the complete output text in bytes, even if it exceeds the targetCapacity + * and a U_BUFFER_OVERFLOW_ERROR is set. + * + * @see ucnv_fromAlgorithmic + * @see ucnv_convert + * @see ucnv_convertEx + * @see ucnv_fromUnicode + * @see ucnv_toUnicode + * @see ucnv_fromUChars + * @see ucnv_toUChars + * @stable ICU 2.6 + */ +U_CAPI int32_t U_EXPORT2 +ucnv_toAlgorithmic(UConverterType algorithmicType, + UConverter *cnv, + char *target, int32_t targetCapacity, + const char *source, int32_t sourceLength, + UErrorCode *pErrorCode); + +/** + * Convert from one external charset to another. + * Internally, the text is converted to and from the 16-bit Unicode "pivot" + * using ucnv_convertEx(). ucnv_fromAlgorithmic() works exactly like ucnv_convert() + * except that the two converters need not be looked up and opened completely. + * + * The source-to-pivot conversion uses a purely algorithmic converter + * according to the specified type, e.g., UCNV_UTF8 for a UTF-8 converter. + * The pivot-to-target conversion uses the cnv converter parameter. + * + * Internally, the algorithmic converter is opened and closed for each + * function call, which is more efficient than using the public ucnv_open() + * but somewhat less efficient than only resetting an existing converter + * and using ucnv_convertEx(). + * + * This function is more convenient than ucnv_convertEx() for single-string + * conversions, especially when "preflighting" is desired (returning the length + * of the complete output even if it does not fit into the target buffer; + * see the User Guide Strings chapter). See ucnv_convert() for details. + * + * @param cnv The converter that is used to convert + * from the UTF-16 pivot buffer to the target. + * @param algorithmicType UConverterType constant identifying the desired source + * charset as a purely algorithmic converter. + * Those are converters for Unicode charsets like + * UTF-8, BOCU-1, SCSU, UTF-7, IMAP-mailbox-name, etc., + * as well as US-ASCII and ISO-8859-1. + * @param target Pointer to the output buffer. + * @param targetCapacity Capacity of the target, in bytes. + * @param source Pointer to the input buffer. + * @param sourceLength Length of the input text, in bytes + * @param pErrorCode ICU error code in/out parameter. + * Must fulfill U_SUCCESS before the function call. + * @return Length of the complete output text in bytes, even if it exceeds the targetCapacity + * and a U_BUFFER_OVERFLOW_ERROR is set. + * + * @see ucnv_fromAlgorithmic + * @see ucnv_convert + * @see ucnv_convertEx + * @see ucnv_fromUnicode + * @see ucnv_toUnicode + * @see ucnv_fromUChars + * @see ucnv_toUChars + * @stable ICU 2.6 + */ +U_CAPI int32_t U_EXPORT2 +ucnv_fromAlgorithmic(UConverter *cnv, + UConverterType algorithmicType, + char *target, int32_t targetCapacity, + const char *source, int32_t sourceLength, + UErrorCode *pErrorCode); + +/** + * Frees up memory occupied by unused, cached converter shared data. + * + * @return the number of cached converters successfully deleted + * @see ucnv_close + * @stable ICU 2.0 + */ +U_CAPI int32_t U_EXPORT2 +ucnv_flushCache(void); + +/** + * Returns the number of available converters, as per the alias file. + * + * @return the number of available converters + * @see ucnv_getAvailableName + * @stable ICU 2.0 + */ +U_CAPI int32_t U_EXPORT2 +ucnv_countAvailable(void); + +/** + * Gets the canonical converter name of the specified converter from a list of + * all available converters contaied in the alias file. All converters + * in this list can be opened. + * + * @param n the index to a converter available on the system (in the range <TT>[0..ucnv_countAvaiable()]</TT>) + * @return a pointer a string (library owned), or <TT>NULL</TT> if the index is out of bounds. + * @see ucnv_countAvailable + * @stable ICU 2.0 + */ +U_CAPI const char* U_EXPORT2 +ucnv_getAvailableName(int32_t n); + +/** + * Returns a UEnumeration to enumerate all of the canonical converter + * names, as per the alias file, regardless of the ability to open each + * converter. + * + * @return A UEnumeration object for getting all the recognized canonical + * converter names. + * @see ucnv_getAvailableName + * @see uenum_close + * @see uenum_next + * @stable ICU 2.4 + */ +U_CAPI UEnumeration * U_EXPORT2 +ucnv_openAllNames(UErrorCode *pErrorCode); + +/** + * Gives the number of aliases for a given converter or alias name. + * If the alias is ambiguous, then the preferred converter is used + * and the status is set to U_AMBIGUOUS_ALIAS_WARNING. + * This method only enumerates the listed entries in the alias file. + * @param alias alias name + * @param pErrorCode error status + * @return number of names on alias list for given alias + * @stable ICU 2.0 + */ +U_CAPI uint16_t U_EXPORT2 +ucnv_countAliases(const char *alias, UErrorCode *pErrorCode); + +/** + * Gives the name of the alias at given index of alias list. + * This method only enumerates the listed entries in the alias file. + * If the alias is ambiguous, then the preferred converter is used + * and the status is set to U_AMBIGUOUS_ALIAS_WARNING. + * @param alias alias name + * @param n index in alias list + * @param pErrorCode result of operation + * @return returns the name of the alias at given index + * @see ucnv_countAliases + * @stable ICU 2.0 + */ +U_CAPI const char * U_EXPORT2 +ucnv_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode); + +/** + * Fill-up the list of alias names for the given alias. + * This method only enumerates the listed entries in the alias file. + * If the alias is ambiguous, then the preferred converter is used + * and the status is set to U_AMBIGUOUS_ALIAS_WARNING. + * @param alias alias name + * @param aliases fill-in list, aliases is a pointer to an array of + * <code>ucnv_countAliases()</code> string-pointers + * (<code>const char *</code>) that will be filled in. + * The strings themselves are owned by the library. + * @param pErrorCode result of operation + * @stable ICU 2.0 + */ +U_CAPI void U_EXPORT2 +ucnv_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCode); + +/** + * Return a new UEnumeration object for enumerating all the + * alias names for a given converter that are recognized by a standard. + * This method only enumerates the listed entries in the alias file. + * The convrtrs.txt file can be modified to change the results of + * this function. + * The first result in this list is the same result given by + * <code>ucnv_getStandardName</code>, which is the default alias for + * the specified standard name. The returned object must be closed with + * <code>uenum_close</code> when you are done with the object. + * + * @param convName original converter name + * @param standard name of the standard governing the names; MIME and IANA + * are such standards + * @param pErrorCode The error code + * @return A UEnumeration object for getting all aliases that are recognized + * by a standard. If any of the parameters are invalid, NULL + * is returned. + * @see ucnv_getStandardName + * @see uenum_close + * @see uenum_next + * @stable ICU 2.2 + */ +U_CAPI UEnumeration * U_EXPORT2 +ucnv_openStandardNames(const char *convName, + const char *standard, + UErrorCode *pErrorCode); + +/** + * Gives the number of standards associated to converter names. + * @return number of standards + * @stable ICU 2.0 + */ +U_CAPI uint16_t U_EXPORT2 +ucnv_countStandards(void); + +/** + * Gives the name of the standard at given index of standard list. + * @param n index in standard list + * @param pErrorCode result of operation + * @return returns the name of the standard at given index. Owned by the library. + * @stable ICU 2.0 + */ +U_CAPI const char * U_EXPORT2 +ucnv_getStandard(uint16_t n, UErrorCode *pErrorCode); + +/** + * Returns a standard name for a given converter name. + * <p> + * Example alias table:<br> + * conv alias1 { STANDARD1 } alias2 { STANDARD1* } + * <p> + * Result of ucnv_getStandardName("conv", "STANDARD1") from example + * alias table:<br> + * <b>"alias2"</b> + * + * @param name original converter name + * @param standard name of the standard governing the names; MIME and IANA + * are such standards + * @param pErrorCode result of operation + * @return returns the standard converter name; + * if a standard converter name cannot be determined, + * then <code>NULL</code> is returned. Owned by the library. + * @stable ICU 2.0 + */ +U_CAPI const char * U_EXPORT2 +ucnv_getStandardName(const char *name, const char *standard, UErrorCode *pErrorCode); + +/** + * This function will return the internal canonical converter name of the + * tagged alias. This is the opposite of ucnv_openStandardNames, which + * returns the tagged alias given the canonical name. + * <p> + * Example alias table:<br> + * conv alias1 { STANDARD1 } alias2 { STANDARD1* } + * <p> + * Result of ucnv_getStandardName("alias1", "STANDARD1") from example + * alias table:<br> + * <b>"conv"</b> + * + * @return returns the canonical converter name; + * if a standard or alias name cannot be determined, + * then <code>NULL</code> is returned. The returned string is + * owned by the library. + * @see ucnv_getStandardName + * @stable ICU 2.4 + */ +U_CAPI const char * U_EXPORT2 +ucnv_getCanonicalName(const char *alias, const char *standard, UErrorCode *pErrorCode); + +/** + * Returns the current default converter name. If you want to open + * a default converter, you do not need to use this function. + * It is faster if you pass a NULL argument to ucnv_open the + * default converter. + * + * If U_CHARSET_IS_UTF8 is defined to 1 in utypes.h then this function + * always returns "UTF-8". + * + * @return returns the current default converter name. + * Storage owned by the library + * @see ucnv_setDefaultName + * @stable ICU 2.0 + */ +U_CAPI const char * U_EXPORT2 +ucnv_getDefaultName(void); + +#ifndef U_HIDE_SYSTEM_API +/** + * This function is not thread safe. DO NOT call this function when ANY ICU + * function is being used from more than one thread! This function sets the + * current default converter name. If this function needs to be called, it + * should be called during application initialization. Most of the time, the + * results from ucnv_getDefaultName() or ucnv_open with a NULL string argument + * is sufficient for your application. + * + * If U_CHARSET_IS_UTF8 is defined to 1 in utypes.h then this function + * does nothing. + * + * @param name the converter name to be the default (must be known by ICU). + * @see ucnv_getDefaultName + * @system + * @stable ICU 2.0 + */ +U_CAPI void U_EXPORT2 +ucnv_setDefaultName(const char *name); +#endif /* U_HIDE_SYSTEM_API */ + +/** + * Fixes the backslash character mismapping. For example, in SJIS, the backslash + * character in the ASCII portion is also used to represent the yen currency sign. + * When mapping from Unicode character 0x005C, it's unclear whether to map the + * character back to yen or backslash in SJIS. This function will take the input + * buffer and replace all the yen sign characters with backslash. This is necessary + * when the user tries to open a file with the input buffer on Windows. + * This function will test the converter to see whether such mapping is + * required. You can sometimes avoid using this function by using the correct version + * of Shift-JIS. + * + * @param cnv The converter representing the target codepage. + * @param source the input buffer to be fixed + * @param sourceLen the length of the input buffer + * @see ucnv_isAmbiguous + * @stable ICU 2.0 + */ +U_CAPI void U_EXPORT2 +ucnv_fixFileSeparator(const UConverter *cnv, UChar *source, int32_t sourceLen); + +/** + * Determines if the converter contains ambiguous mappings of the same + * character or not. + * @param cnv the converter to be tested + * @return true if the converter contains ambiguous mapping of the same + * character, false otherwise. + * @stable ICU 2.0 + */ +U_CAPI UBool U_EXPORT2 +ucnv_isAmbiguous(const UConverter *cnv); + +/** + * Sets the converter to use fallback mappings or not. + * Regardless of this flag, the converter will always use + * fallbacks from Unicode Private Use code points, as well as + * reverse fallbacks (to Unicode). + * For details see ".ucm File Format" + * in the Conversion Data chapter of the ICU User Guide: + * http://www.icu-project.org/userguide/conversion-data.html#ucmformat + * + * @param cnv The converter to set the fallback mapping usage on. + * @param usesFallback true if the user wants the converter to take advantage of the fallback + * mapping, false otherwise. + * @stable ICU 2.0 + * @see ucnv_usesFallback + */ +U_CAPI void U_EXPORT2 +ucnv_setFallback(UConverter *cnv, UBool usesFallback); + +/** + * Determines if the converter uses fallback mappings or not. + * This flag has restrictions, see ucnv_setFallback(). + * + * @param cnv The converter to be tested + * @return true if the converter uses fallback, false otherwise. + * @stable ICU 2.0 + * @see ucnv_setFallback + */ +U_CAPI UBool U_EXPORT2 +ucnv_usesFallback(const UConverter *cnv); + +/** + * Detects Unicode signature byte sequences at the start of the byte stream + * and returns the charset name of the indicated Unicode charset. + * NULL is returned when no Unicode signature is recognized. + * The number of bytes in the signature is output as well. + * + * The caller can ucnv_open() a converter using the charset name. + * The first code unit (UChar) from the start of the stream will be U+FEFF + * (the Unicode BOM/signature character) and can usually be ignored. + * + * For most Unicode charsets it is also possible to ignore the indicated + * number of initial stream bytes and start converting after them. + * However, there are stateful Unicode charsets (UTF-7 and BOCU-1) for which + * this will not work. Therefore, it is best to ignore the first output UChar + * instead of the input signature bytes. + * <p> + * Usage: + * \snippet samples/ucnv/convsamp.cpp ucnv_detectUnicodeSignature + * + * @param source The source string in which the signature should be detected. + * @param sourceLength Length of the input string, or -1 if terminated with a NUL byte. + * @param signatureLength A pointer to int32_t to receive the number of bytes that make up the signature + * of the detected UTF. 0 if not detected. + * Can be a NULL pointer. + * @param pErrorCode ICU error code in/out parameter. + * Must fulfill U_SUCCESS before the function call. + * @return The name of the encoding detected. NULL if encoding is not detected. + * @stable ICU 2.4 + */ +U_CAPI const char* U_EXPORT2 +ucnv_detectUnicodeSignature(const char* source, + int32_t sourceLength, + int32_t *signatureLength, + UErrorCode *pErrorCode); + +/** + * Returns the number of UChars held in the converter's internal state + * because more input is needed for completing the conversion. This function is + * useful for mapping semantics of ICU's converter interface to those of iconv, + * and this information is not needed for normal conversion. + * @param cnv The converter in which the input is held + * @param status ICU error code in/out parameter. + * Must fulfill U_SUCCESS before the function call. + * @return The number of UChars in the state. -1 if an error is encountered. + * @stable ICU 3.4 + */ +U_CAPI int32_t U_EXPORT2 +ucnv_fromUCountPending(const UConverter* cnv, UErrorCode* status); + +/** + * Returns the number of chars held in the converter's internal state + * because more input is needed for completing the conversion. This function is + * useful for mapping semantics of ICU's converter interface to those of iconv, + * and this information is not needed for normal conversion. + * @param cnv The converter in which the input is held as internal state + * @param status ICU error code in/out parameter. + * Must fulfill U_SUCCESS before the function call. + * @return The number of chars in the state. -1 if an error is encountered. + * @stable ICU 3.4 + */ +U_CAPI int32_t U_EXPORT2 +ucnv_toUCountPending(const UConverter* cnv, UErrorCode* status); + +/** + * Returns whether or not the charset of the converter has a fixed number of bytes + * per charset character. + * An example of this are converters that are of the type UCNV_SBCS or UCNV_DBCS. + * Another example is UTF-32 which is always 4 bytes per character. + * A Unicode code point may be represented by more than one UTF-8 or UTF-16 code unit + * but a UTF-32 converter encodes each code point with 4 bytes. + * Note: This method is not intended to be used to determine whether the charset has a + * fixed ratio of bytes to Unicode codes <i>units</i> for any particular Unicode encoding form. + * false is returned with the UErrorCode if error occurs or cnv is NULL. + * @param cnv The converter to be tested + * @param status ICU error code in/out paramter + * @return true if the converter is fixed-width + * @stable ICU 4.8 + */ +U_CAPI UBool U_EXPORT2 +ucnv_isFixedWidth(UConverter *cnv, UErrorCode *status); + +#endif + +#endif +/*_UCNV*/ diff --git a/thirdparty/icu4c/common/unicode/ucnv_cb.h b/thirdparty/icu4c/common/unicode/ucnv_cb.h new file mode 100644 index 0000000000..41845d1bca --- /dev/null +++ b/thirdparty/icu4c/common/unicode/ucnv_cb.h @@ -0,0 +1,164 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 2000-2004, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** + * ucnv_cb.h: + * External APIs for the ICU's codeset conversion library + * Helena Shih + * + * Modification History: + * + * Date Name Description + */ + +/** + * \file + * \brief C UConverter functions to aid the writers of callbacks + * + * <h2> Callback API for UConverter </h2> + * + * These functions are provided here for the convenience of the callback + * writer. If you are just looking for callback functions to use, please + * see ucnv_err.h. DO NOT call these functions directly when you are + * working with converters, unless your code has been called as a callback + * via ucnv_setFromUCallback or ucnv_setToUCallback !! + * + * A note about error codes and overflow. Unlike other ICU functions, + * these functions do not expect the error status to be U_ZERO_ERROR. + * Callbacks must be much more careful about their error codes. + * The error codes used here are in/out parameters, which should be passed + * back in the callback's error parameter. + * + * For example, if you call ucnv_cbfromUWriteBytes to write data out + * to the output codepage, it may return U_BUFFER_OVERFLOW_ERROR if + * the data did not fit in the target. But this isn't a failing error, + * in fact, ucnv_cbfromUWriteBytes may be called AGAIN with the error + * status still U_BUFFER_OVERFLOW_ERROR to attempt to write further bytes, + * which will also go into the internal overflow buffers. + * + * Concerning offsets, the 'offset' parameters here are relative to the start + * of SOURCE. For example, Suppose the string "ABCD" was being converted + * from Unicode into a codepage which doesn't have a mapping for 'B'. + * 'A' will be written out correctly, but + * The FromU Callback will be called on an unassigned character for 'B'. + * At this point, this is the state of the world: + * Target: A [..] [points after A] + * Source: A B [C] D [points to C - B has been consumed] + * 0 1 2 3 + * codePoint = "B" [the unassigned codepoint] + * + * Now, suppose a callback wants to write the substitution character '?' to + * the target. It calls ucnv_cbFromUWriteBytes() to write the ?. + * It should pass ZERO as the offset, because the offset as far as the + * callback is concerned is relative to the SOURCE pointer [which points + * before 'C'.] If the callback goes into the args and consumes 'C' also, + * it would call FromUWriteBytes with an offset of 1 (and advance the source + * pointer). + * + */ + +#ifndef UCNV_CB_H +#define UCNV_CB_H + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_CONVERSION + +#include "unicode/ucnv.h" +#include "unicode/ucnv_err.h" + +/** + * ONLY used by FromU callback functions. + * Writes out the specified byte output bytes to the target byte buffer or to converter internal buffers. + * + * @param args callback fromUnicode arguments + * @param source source bytes to write + * @param length length of bytes to write + * @param offsetIndex the relative offset index from callback. + * @param err error status. If <TT>U_BUFFER_OVERFLOW</TT> is returned, then U_BUFFER_OVERFLOW <STRONG>must</STRONG> + * be returned to the user, because it means that not all data could be written into the target buffer, and some is + * in the converter error buffer. + * @see ucnv_cbFromUWriteSub + * @stable ICU 2.0 + */ +U_CAPI void U_EXPORT2 +ucnv_cbFromUWriteBytes (UConverterFromUnicodeArgs *args, + const char* source, + int32_t length, + int32_t offsetIndex, + UErrorCode * err); + +/** + * ONLY used by FromU callback functions. + * This function will write out the correct substitution character sequence + * to the target. + * + * @param args callback fromUnicode arguments + * @param offsetIndex the relative offset index from the current source pointer to be used + * @param err error status. If <TT>U_BUFFER_OVERFLOW</TT> is returned, then U_BUFFER_OVERFLOW <STRONG>must</STRONG> + * be returned to the user, because it means that not all data could be written into the target buffer, and some is + * in the converter error buffer. + * @see ucnv_cbFromUWriteBytes + * @stable ICU 2.0 + */ +U_CAPI void U_EXPORT2 +ucnv_cbFromUWriteSub (UConverterFromUnicodeArgs *args, + int32_t offsetIndex, + UErrorCode * err); + +/** + * ONLY used by fromU callback functions. + * This function will write out the error character(s) to the target UChar buffer. + * + * @param args callback fromUnicode arguments + * @param source pointer to pointer to first UChar to write [on exit: 1 after last UChar processed] + * @param sourceLimit pointer after last UChar to write + * @param offsetIndex the relative offset index from callback which will be set + * @param err error status <TT>U_BUFFER_OVERFLOW</TT> + * @see ucnv_cbToUWriteSub + * @stable ICU 2.0 + */ +U_CAPI void U_EXPORT2 ucnv_cbFromUWriteUChars(UConverterFromUnicodeArgs *args, + const UChar** source, + const UChar* sourceLimit, + int32_t offsetIndex, + UErrorCode * err); + +/** + * ONLY used by ToU callback functions. + * This function will write out the specified characters to the target + * UChar buffer. + * + * @param args callback toUnicode arguments + * @param source source string to write + * @param length the length of source string + * @param offsetIndex the relative offset index which will be written. + * @param err error status <TT>U_BUFFER_OVERFLOW</TT> + * @see ucnv_cbToUWriteSub + * @stable ICU 2.0 + */ +U_CAPI void U_EXPORT2 ucnv_cbToUWriteUChars (UConverterToUnicodeArgs *args, + const UChar* source, + int32_t length, + int32_t offsetIndex, + UErrorCode * err); + +/** + * ONLY used by ToU callback functions. + * This function will write out the Unicode substitution character (U+FFFD). + * + * @param args callback fromUnicode arguments + * @param offsetIndex the relative offset index from callback. + * @param err error status <TT>U_BUFFER_OVERFLOW</TT> + * @see ucnv_cbToUWriteUChars + * @stable ICU 2.0 + */ +U_CAPI void U_EXPORT2 ucnv_cbToUWriteSub (UConverterToUnicodeArgs *args, + int32_t offsetIndex, + UErrorCode * err); +#endif + +#endif diff --git a/thirdparty/icu4c/common/unicode/ucnv_err.h b/thirdparty/icu4c/common/unicode/ucnv_err.h new file mode 100644 index 0000000000..7209ba5f7b --- /dev/null +++ b/thirdparty/icu4c/common/unicode/ucnv_err.h @@ -0,0 +1,465 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 1999-2009, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** + * + * + * ucnv_err.h: + */ + +/** + * \file + * \brief C UConverter predefined error callbacks + * + * <h2>Error Behaviour Functions</h2> + * Defines some error behaviour functions called by ucnv_{from,to}Unicode + * These are provided as part of ICU and many are stable, but they + * can also be considered only as an example of what can be done with + * callbacks. You may of course write your own. + * + * If you want to write your own, you may also find the functions from + * ucnv_cb.h useful when writing your own callbacks. + * + * These functions, although public, should NEVER be called directly. + * They should be used as parameters to the ucnv_setFromUCallback + * and ucnv_setToUCallback functions, to set the behaviour of a converter + * when it encounters ILLEGAL/UNMAPPED/INVALID sequences. + * + * usage example: 'STOP' doesn't need any context, but newContext + * could be set to something other than 'NULL' if needed. The available + * contexts in this header can modify the default behavior of the callback. + * + * \code + * UErrorCode err = U_ZERO_ERROR; + * UConverter *myConverter = ucnv_open("ibm-949", &err); + * const void *oldContext; + * UConverterFromUCallback oldAction; + * + * + * if (U_SUCCESS(err)) + * { + * ucnv_setFromUCallBack(myConverter, + * UCNV_FROM_U_CALLBACK_STOP, + * NULL, + * &oldAction, + * &oldContext, + * &status); + * } + * \endcode + * + * The code above tells "myConverter" to stop when it encounters an + * ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from + * Unicode -> Codepage. The behavior from Codepage to Unicode is not changed, + * and ucnv_setToUCallBack would need to be called in order to change + * that behavior too. + * + * Here is an example with a context: + * + * \code + * UErrorCode err = U_ZERO_ERROR; + * UConverter *myConverter = ucnv_open("ibm-949", &err); + * const void *oldContext; + * UConverterFromUCallback oldAction; + * + * + * if (U_SUCCESS(err)) + * { + * ucnv_setToUCallBack(myConverter, + * UCNV_TO_U_CALLBACK_SUBSTITUTE, + * UCNV_SUB_STOP_ON_ILLEGAL, + * &oldAction, + * &oldContext, + * &status); + * } + * \endcode + * + * The code above tells "myConverter" to stop when it encounters an + * ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from + * Codepage -> Unicode. Any unmapped and legal characters will be + * substituted to be the default substitution character. + */ + +#ifndef UCNV_ERR_H +#define UCNV_ERR_H + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_CONVERSION + +/** Forward declaring the UConverter structure. @stable ICU 2.0 */ +struct UConverter; + +/** @stable ICU 2.0 */ +typedef struct UConverter UConverter; + +/** + * FROM_U, TO_U context options for sub callback + * @stable ICU 2.0 + */ +#define UCNV_SUB_STOP_ON_ILLEGAL "i" + +/** + * FROM_U, TO_U context options for skip callback + * @stable ICU 2.0 + */ +#define UCNV_SKIP_STOP_ON_ILLEGAL "i" + +/** + * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to ICU (%UXXXX) + * @stable ICU 2.0 + */ +#define UCNV_ESCAPE_ICU NULL +/** + * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to JAVA (\\uXXXX) + * @stable ICU 2.0 + */ +#define UCNV_ESCAPE_JAVA "J" +/** + * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to C (\\uXXXX \\UXXXXXXXX) + * TO_U_CALLBACK_ESCAPE option to escape the character value according to C (\\xXXXX) + * @stable ICU 2.0 + */ +#define UCNV_ESCAPE_C "C" +/** + * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Decimal escape \htmlonly(&#DDDD;)\endhtmlonly + * TO_U_CALLBACK_ESCAPE context option to escape the character value according to XML Decimal escape \htmlonly(&#DDDD;)\endhtmlonly + * @stable ICU 2.0 + */ +#define UCNV_ESCAPE_XML_DEC "D" +/** + * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Hex escape \htmlonly(&#xXXXX;)\endhtmlonly + * TO_U_CALLBACK_ESCAPE context option to escape the character value according to XML Hex escape \htmlonly(&#xXXXX;)\endhtmlonly + * @stable ICU 2.0 + */ +#define UCNV_ESCAPE_XML_HEX "X" +/** + * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to Unicode (U+XXXXX) + * @stable ICU 2.0 + */ +#define UCNV_ESCAPE_UNICODE "U" + +/** + * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to CSS2 conventions (\\HH..H<space>, that is, + * a backslash, 1..6 hex digits, and a space) + * @stable ICU 4.0 + */ +#define UCNV_ESCAPE_CSS2 "S" + +/** + * The process condition code to be used with the callbacks. + * Codes which are greater than UCNV_IRREGULAR should be + * passed on to any chained callbacks. + * @stable ICU 2.0 + */ +typedef enum { + UCNV_UNASSIGNED = 0, /**< The code point is unassigned. + The error code U_INVALID_CHAR_FOUND will be set. */ + UCNV_ILLEGAL = 1, /**< The code point is illegal. For example, + \\x81\\x2E is illegal in SJIS because \\x2E + is not a valid trail byte for the \\x81 + lead byte. + Also, starting with Unicode 3.0.1, non-shortest byte sequences + in UTF-8 (like \\xC1\\xA1 instead of \\x61 for U+0061) + are also illegal, not just irregular. + The error code U_ILLEGAL_CHAR_FOUND will be set. */ + UCNV_IRREGULAR = 2, /**< The codepoint is not a regular sequence in + the encoding. For example, \\xED\\xA0\\x80..\\xED\\xBF\\xBF + are irregular UTF-8 byte sequences for single surrogate + code points. + The error code U_INVALID_CHAR_FOUND will be set. */ + UCNV_RESET = 3, /**< The callback is called with this reason when a + 'reset' has occurred. Callback should reset all + state. */ + UCNV_CLOSE = 4, /**< Called when the converter is closed. The + callback should release any allocated memory.*/ + UCNV_CLONE = 5 /**< Called when ucnv_safeClone() is called on the + converter. the pointer available as the + 'context' is an alias to the original converters' + context pointer. If the context must be owned + by the new converter, the callback must clone + the data and call ucnv_setFromUCallback + (or setToUCallback) with the correct pointer. + @stable ICU 2.2 + */ +} UConverterCallbackReason; + + +/** + * The structure for the fromUnicode callback function parameter. + * @stable ICU 2.0 + */ +typedef struct { + uint16_t size; /**< The size of this struct. @stable ICU 2.0 */ + UBool flush; /**< The internal state of converter will be reset and data flushed if set to true. @stable ICU 2.0 */ + UConverter *converter; /**< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0 */ + const UChar *source; /**< Pointer to the source source buffer. @stable ICU 2.0 */ + const UChar *sourceLimit; /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0 */ + char *target; /**< Pointer to the target buffer. @stable ICU 2.0 */ + const char *targetLimit; /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0 */ + int32_t *offsets; /**< Pointer to the buffer that receives the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */ +} UConverterFromUnicodeArgs; + + +/** + * The structure for the toUnicode callback function parameter. + * @stable ICU 2.0 + */ +typedef struct { + uint16_t size; /**< The size of this struct @stable ICU 2.0 */ + UBool flush; /**< The internal state of converter will be reset and data flushed if set to true. @stable ICU 2.0 */ + UConverter *converter; /**< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0 */ + const char *source; /**< Pointer to the source source buffer. @stable ICU 2.0 */ + const char *sourceLimit; /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0 */ + UChar *target; /**< Pointer to the target buffer. @stable ICU 2.0 */ + const UChar *targetLimit; /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0 */ + int32_t *offsets; /**< Pointer to the buffer that receives the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */ +} UConverterToUnicodeArgs; + + +/** + * DO NOT CALL THIS FUNCTION DIRECTLY! + * This From Unicode callback STOPS at the ILLEGAL_SEQUENCE, + * returning the error code back to the caller immediately. + * + * @param context Pointer to the callback's private data + * @param fromUArgs Information about the conversion in progress + * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence + * @param length Size (in bytes) of the concerned codepage sequence + * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint. + * @param reason Defines the reason the callback was invoked + * @param err This should always be set to a failure status prior to calling. + * @stable ICU 2.0 + */ +U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_STOP ( + const void *context, + UConverterFromUnicodeArgs *fromUArgs, + const UChar* codeUnits, + int32_t length, + UChar32 codePoint, + UConverterCallbackReason reason, + UErrorCode * err); + + + +/** + * DO NOT CALL THIS FUNCTION DIRECTLY! + * This To Unicode callback STOPS at the ILLEGAL_SEQUENCE, + * returning the error code back to the caller immediately. + * + * @param context Pointer to the callback's private data + * @param toUArgs Information about the conversion in progress + * @param codeUnits Points to 'length' bytes of the concerned codepage sequence + * @param length Size (in bytes) of the concerned codepage sequence + * @param reason Defines the reason the callback was invoked + * @param err This should always be set to a failure status prior to calling. + * @stable ICU 2.0 + */ +U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_STOP ( + const void *context, + UConverterToUnicodeArgs *toUArgs, + const char* codeUnits, + int32_t length, + UConverterCallbackReason reason, + UErrorCode * err); + +/** + * DO NOT CALL THIS FUNCTION DIRECTLY! + * This From Unicode callback skips any ILLEGAL_SEQUENCE, or + * skips only UNASSINGED_SEQUENCE depending on the context parameter + * simply ignoring those characters. + * + * @param context The function currently recognizes the callback options: + * UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE, + * returning the error code back to the caller immediately. + * NULL: Skips any ILLEGAL_SEQUENCE + * @param fromUArgs Information about the conversion in progress + * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence + * @param length Size (in bytes) of the concerned codepage sequence + * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint. + * @param reason Defines the reason the callback was invoked + * @param err Return value will be set to success if the callback was handled, + * otherwise this value will be set to a failure status. + * @stable ICU 2.0 + */ +U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_SKIP ( + const void *context, + UConverterFromUnicodeArgs *fromUArgs, + const UChar* codeUnits, + int32_t length, + UChar32 codePoint, + UConverterCallbackReason reason, + UErrorCode * err); + +/** + * DO NOT CALL THIS FUNCTION DIRECTLY! + * This From Unicode callback will Substitute the ILLEGAL SEQUENCE, or + * UNASSIGNED_SEQUENCE depending on context parameter, with the + * current substitution string for the converter. This is the default + * callback. + * + * @param context The function currently recognizes the callback options: + * UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE, + * returning the error code back to the caller immediately. + * NULL: Substitutes any ILLEGAL_SEQUENCE + * @param fromUArgs Information about the conversion in progress + * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence + * @param length Size (in bytes) of the concerned codepage sequence + * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint. + * @param reason Defines the reason the callback was invoked + * @param err Return value will be set to success if the callback was handled, + * otherwise this value will be set to a failure status. + * @see ucnv_setSubstChars + * @stable ICU 2.0 + */ +U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_SUBSTITUTE ( + const void *context, + UConverterFromUnicodeArgs *fromUArgs, + const UChar* codeUnits, + int32_t length, + UChar32 codePoint, + UConverterCallbackReason reason, + UErrorCode * err); + +/** + * DO NOT CALL THIS FUNCTION DIRECTLY! + * This From Unicode callback will Substitute the ILLEGAL SEQUENCE with the + * hexadecimal representation of the illegal codepoints + * + * @param context The function currently recognizes the callback options: + * <ul> + * <li>UCNV_ESCAPE_ICU: Substitues the ILLEGAL SEQUENCE with the hexadecimal + * representation in the format %UXXXX, e.g. "%uFFFE%u00AC%uC8FE"). + * In the Event the converter doesn't support the characters {%,U}[A-F][0-9], + * it will substitute the illegal sequence with the substitution characters. + * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as + * %UD84D%UDC56</li> + * <li>UCNV_ESCAPE_JAVA: Substitues the ILLEGAL SEQUENCE with the hexadecimal + * representation in the format \\uXXXX, e.g. "\\uFFFE\\u00AC\\uC8FE"). + * In the Event the converter doesn't support the characters {\,u}[A-F][0-9], + * it will substitute the illegal sequence with the substitution characters. + * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as + * \\uD84D\\uDC56</li> + * <li>UCNV_ESCAPE_C: Substitues the ILLEGAL SEQUENCE with the hexadecimal + * representation in the format \\uXXXX, e.g. "\\uFFFE\\u00AC\\uC8FE"). + * In the Event the converter doesn't support the characters {\,u,U}[A-F][0-9], + * it will substitute the illegal sequence with the substitution characters. + * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as + * \\U00023456</li> + * <li>UCNV_ESCAPE_XML_DEC: Substitues the ILLEGAL SEQUENCE with the decimal + * representation in the format \htmlonly&#DDDDDDDD;, e.g. "&#65534;&#172;&#51454;")\endhtmlonly. + * In the Event the converter doesn't support the characters {&,#}[0-9], + * it will substitute the illegal sequence with the substitution characters. + * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as + * &#144470; and Zero padding is ignored.</li> + * <li>UCNV_ESCAPE_XML_HEX:Substitues the ILLEGAL SEQUENCE with the decimal + * representation in the format \htmlonly&#xXXXX; e.g. "&#xFFFE;&#x00AC;&#xC8FE;")\endhtmlonly. + * In the Event the converter doesn't support the characters {&,#,x}[0-9], + * it will substitute the illegal sequence with the substitution characters. + * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as + * \htmlonly&#x23456;\endhtmlonly</li> + * </ul> + * @param fromUArgs Information about the conversion in progress + * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence + * @param length Size (in bytes) of the concerned codepage sequence + * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint. + * @param reason Defines the reason the callback was invoked + * @param err Return value will be set to success if the callback was handled, + * otherwise this value will be set to a failure status. + * @stable ICU 2.0 + */ +U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_ESCAPE ( + const void *context, + UConverterFromUnicodeArgs *fromUArgs, + const UChar* codeUnits, + int32_t length, + UChar32 codePoint, + UConverterCallbackReason reason, + UErrorCode * err); + + +/** + * DO NOT CALL THIS FUNCTION DIRECTLY! + * This To Unicode callback skips any ILLEGAL_SEQUENCE, or + * skips only UNASSINGED_SEQUENCE depending on the context parameter + * simply ignoring those characters. + * + * @param context The function currently recognizes the callback options: + * UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE, + * returning the error code back to the caller immediately. + * NULL: Skips any ILLEGAL_SEQUENCE + * @param toUArgs Information about the conversion in progress + * @param codeUnits Points to 'length' bytes of the concerned codepage sequence + * @param length Size (in bytes) of the concerned codepage sequence + * @param reason Defines the reason the callback was invoked + * @param err Return value will be set to success if the callback was handled, + * otherwise this value will be set to a failure status. + * @stable ICU 2.0 + */ +U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_SKIP ( + const void *context, + UConverterToUnicodeArgs *toUArgs, + const char* codeUnits, + int32_t length, + UConverterCallbackReason reason, + UErrorCode * err); + +/** + * DO NOT CALL THIS FUNCTION DIRECTLY! + * This To Unicode callback will Substitute the ILLEGAL SEQUENCE,or + * UNASSIGNED_SEQUENCE depending on context parameter, with the + * Unicode substitution character, U+FFFD. + * + * @param context The function currently recognizes the callback options: + * UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE, + * returning the error code back to the caller immediately. + * NULL: Substitutes any ILLEGAL_SEQUENCE + * @param toUArgs Information about the conversion in progress + * @param codeUnits Points to 'length' bytes of the concerned codepage sequence + * @param length Size (in bytes) of the concerned codepage sequence + * @param reason Defines the reason the callback was invoked + * @param err Return value will be set to success if the callback was handled, + * otherwise this value will be set to a failure status. + * @stable ICU 2.0 + */ +U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_SUBSTITUTE ( + const void *context, + UConverterToUnicodeArgs *toUArgs, + const char* codeUnits, + int32_t length, + UConverterCallbackReason reason, + UErrorCode * err); + +/** + * DO NOT CALL THIS FUNCTION DIRECTLY! + * This To Unicode callback will Substitute the ILLEGAL SEQUENCE with the + * hexadecimal representation of the illegal bytes + * (in the format %XNN, e.g. "%XFF%X0A%XC8%X03"). + * + * @param context This function currently recognizes the callback options: + * UCNV_ESCAPE_ICU, UCNV_ESCAPE_JAVA, UCNV_ESCAPE_C, UCNV_ESCAPE_XML_DEC, + * UCNV_ESCAPE_XML_HEX and UCNV_ESCAPE_UNICODE. + * @param toUArgs Information about the conversion in progress + * @param codeUnits Points to 'length' bytes of the concerned codepage sequence + * @param length Size (in bytes) of the concerned codepage sequence + * @param reason Defines the reason the callback was invoked + * @param err Return value will be set to success if the callback was handled, + * otherwise this value will be set to a failure status. + * @stable ICU 2.0 + */ + +U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_ESCAPE ( + const void *context, + UConverterToUnicodeArgs *toUArgs, + const char* codeUnits, + int32_t length, + UConverterCallbackReason reason, + UErrorCode * err); + +#endif + +#endif + +/*UCNV_ERR_H*/ diff --git a/thirdparty/icu4c/common/unicode/ucnvsel.h b/thirdparty/icu4c/common/unicode/ucnvsel.h new file mode 100644 index 0000000000..5e0a71cf35 --- /dev/null +++ b/thirdparty/icu4c/common/unicode/ucnvsel.h @@ -0,0 +1,192 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2008-2011, International Business Machines +* Corporation, Google and others. All Rights Reserved. +* +******************************************************************************* +*/ +/* + * Author : eldawy@google.com (Mohamed Eldawy) + * ucnvsel.h + * + * Purpose: To generate a list of encodings capable of handling + * a given Unicode text + * + * Started 09-April-2008 + */ + +#ifndef __ICU_UCNV_SEL_H__ +#define __ICU_UCNV_SEL_H__ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_CONVERSION + +#include "unicode/uset.h" +#include "unicode/utf16.h" +#include "unicode/uenum.h" +#include "unicode/ucnv.h" + +#if U_SHOW_CPLUSPLUS_API +#include "unicode/localpointer.h" +#endif // U_SHOW_CPLUSPLUS_API + +/** + * \file + * + * A converter selector is built with a set of encoding/charset names + * and given an input string returns the set of names of the + * corresponding converters which can convert the string. + * + * A converter selector can be serialized into a buffer and reopened + * from the serialized form. + */ + +/** + * @{ + * The selector data structure + */ +struct UConverterSelector; +typedef struct UConverterSelector UConverterSelector; +/** @} */ + +/** + * Open a selector. + * If converterListSize is 0, build for all available converters. + * If excludedCodePoints is NULL, don't exclude any code points. + * + * @param converterList a pointer to encoding names needed to be involved. + * Can be NULL if converterListSize==0. + * The list and the names will be cloned, and the caller + * retains ownership of the original. + * @param converterListSize number of encodings in above list. + * If 0, builds a selector for all available converters. + * @param excludedCodePoints a set of code points to be excluded from consideration. + * That is, excluded code points in a string do not change + * the selection result. (They might be handled by a callback.) + * Use NULL to exclude nothing. + * @param whichSet what converter set to use? Use this to determine whether + * to consider only roundtrip mappings or also fallbacks. + * @param status an in/out ICU UErrorCode + * @return the new selector + * + * @stable ICU 4.2 + */ +U_CAPI UConverterSelector* U_EXPORT2 +ucnvsel_open(const char* const* converterList, int32_t converterListSize, + const USet* excludedCodePoints, + const UConverterUnicodeSet whichSet, UErrorCode* status); + +/** + * Closes a selector. + * If any Enumerations were returned by ucnv_select*, they become invalid. + * They can be closed before or after calling ucnv_closeSelector, + * but should never be used after the selector is closed. + * + * @see ucnv_selectForString + * @see ucnv_selectForUTF8 + * + * @param sel selector to close + * + * @stable ICU 4.2 + */ +U_CAPI void U_EXPORT2 +ucnvsel_close(UConverterSelector *sel); + +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN + +/** + * \class LocalUConverterSelectorPointer + * "Smart pointer" class, closes a UConverterSelector via ucnvsel_close(). + * For most methods see the LocalPointerBase base class. + * + * @see LocalPointerBase + * @see LocalPointer + * @stable ICU 4.4 + */ +U_DEFINE_LOCAL_OPEN_POINTER(LocalUConverterSelectorPointer, UConverterSelector, ucnvsel_close); + +U_NAMESPACE_END + +#endif + +/** + * Open a selector from its serialized form. + * The buffer must remain valid and unchanged for the lifetime of the selector. + * This is much faster than creating a selector from scratch. + * Using a serialized form from a different machine (endianness/charset) is supported. + * + * @param buffer pointer to the serialized form of a converter selector; + * must be 32-bit-aligned + * @param length the capacity of this buffer (can be equal to or larger than + * the actual data length) + * @param status an in/out ICU UErrorCode + * @return the new selector + * + * @stable ICU 4.2 + */ +U_CAPI UConverterSelector* U_EXPORT2 +ucnvsel_openFromSerialized(const void* buffer, int32_t length, UErrorCode* status); + +/** + * Serialize a selector into a linear buffer. + * The serialized form is portable to different machines. + * + * @param sel selector to consider + * @param buffer pointer to 32-bit-aligned memory to be filled with the + * serialized form of this converter selector + * @param bufferCapacity the capacity of this buffer + * @param status an in/out ICU UErrorCode + * @return the required buffer capacity to hold serialize data (even if the call fails + * with a U_BUFFER_OVERFLOW_ERROR, it will return the required capacity) + * + * @stable ICU 4.2 + */ +U_CAPI int32_t U_EXPORT2 +ucnvsel_serialize(const UConverterSelector* sel, + void* buffer, int32_t bufferCapacity, UErrorCode* status); + +/** + * Select converters that can map all characters in a UTF-16 string, + * ignoring the excluded code points. + * + * @param sel a selector + * @param s UTF-16 string + * @param length length of the string, or -1 if NUL-terminated + * @param status an in/out ICU UErrorCode + * @return an enumeration containing encoding names. + * The returned encoding names and their order will be the same as + * supplied when building the selector. + * + * @stable ICU 4.2 + */ +U_CAPI UEnumeration * U_EXPORT2 +ucnvsel_selectForString(const UConverterSelector* sel, + const UChar *s, int32_t length, UErrorCode *status); + +/** + * Select converters that can map all characters in a UTF-8 string, + * ignoring the excluded code points. + * + * @param sel a selector + * @param s UTF-8 string + * @param length length of the string, or -1 if NUL-terminated + * @param status an in/out ICU UErrorCode + * @return an enumeration containing encoding names. + * The returned encoding names and their order will be the same as + * supplied when building the selector. + * + * @stable ICU 4.2 + */ +U_CAPI UEnumeration * U_EXPORT2 +ucnvsel_selectForUTF8(const UConverterSelector* sel, + const char *s, int32_t length, UErrorCode *status); + +#endif /* !UCONFIG_NO_CONVERSION */ + +#endif /* __ICU_UCNV_SEL_H__ */ diff --git a/thirdparty/icu4c/common/unicode/uconfig.h b/thirdparty/icu4c/common/unicode/uconfig.h new file mode 100644 index 0000000000..bbc232d1ed --- /dev/null +++ b/thirdparty/icu4c/common/unicode/uconfig.h @@ -0,0 +1,456 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 2002-2016, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* file name: uconfig.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2002sep19 +* created by: Markus W. Scherer +*/ + +#ifndef __UCONFIG_H__ +#define __UCONFIG_H__ + + +/*! + * \file + * \brief User-configurable settings + * + * Miscellaneous switches: + * + * A number of macros affect a variety of minor aspects of ICU. + * Most of them used to be defined elsewhere (e.g., in utypes.h or platform.h) + * and moved here to make them easier to find. + * + * Switches for excluding parts of ICU library code modules: + * + * Changing these macros allows building partial, smaller libraries for special purposes. + * By default, all modules are built. + * The switches are fairly coarse, controlling large modules. + * Basic services cannot be turned off. + * + * Building with any of these options does not guarantee that the + * ICU build process will completely work. It is recommended that + * the ICU libraries and data be built using the normal build. + * At that time you should remove the data used by those services. + * After building the ICU data library, you should rebuild the ICU + * libraries with these switches customized to your needs. + * + * @stable ICU 2.4 + */ + +/** + * If this switch is defined, ICU will attempt to load a header file named "uconfig_local.h" + * prior to determining default settings for uconfig variables. + * + * @internal ICU 4.0 + */ +#if defined(UCONFIG_USE_LOCAL) +#include "uconfig_local.h" +#endif + +/** + * \def U_DEBUG + * Determines whether to include debugging code. + * Automatically set on Windows, but most compilers do not have + * related predefined macros. + * @internal + */ +#ifdef U_DEBUG + /* Use the predefined value. */ +#elif defined(_DEBUG) + /* + * _DEBUG is defined by Visual Studio debug compilation. + * Do *not* test for its NDEBUG macro: It is an orthogonal macro + * which disables assert(). + */ +# define U_DEBUG 1 +# else +# define U_DEBUG 0 +#endif + +/** + * Determines whether to enable auto cleanup of libraries. + * @internal + */ +#ifndef UCLN_NO_AUTO_CLEANUP +#define UCLN_NO_AUTO_CLEANUP 1 +#endif + +/** + * \def U_DISABLE_RENAMING + * Determines whether to disable renaming or not. + * @internal + */ +#ifndef U_DISABLE_RENAMING +#define U_DISABLE_RENAMING 0 +#endif + +/** + * \def U_NO_DEFAULT_INCLUDE_UTF_HEADERS + * Determines whether utypes.h includes utf.h, utf8.h, utf16.h and utf_old.h. + * utypes.h includes those headers if this macro is defined to 0. + * Otherwise, each those headers must be included explicitly when using one of their macros. + * Defaults to 0 for backward compatibility, except inside ICU. + * @stable ICU 49 + */ +#ifdef U_NO_DEFAULT_INCLUDE_UTF_HEADERS + /* Use the predefined value. */ +#elif defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || \ + defined(U_IO_IMPLEMENTATION) || defined(U_LAYOUT_IMPLEMENTATION) || defined(U_LAYOUTEX_IMPLEMENTATION) || \ + defined(U_TOOLUTIL_IMPLEMENTATION) +# define U_NO_DEFAULT_INCLUDE_UTF_HEADERS 1 +#else +# define U_NO_DEFAULT_INCLUDE_UTF_HEADERS 0 +#endif + +/** + * \def U_OVERRIDE_CXX_ALLOCATION + * Determines whether to override new and delete. + * ICU is normally built such that all of its C++ classes, via their UMemory base, + * override operators new and delete to use its internal, customizable, + * non-exception-throwing memory allocation functions. (Default value 1 for this macro.) + * + * This is especially important when the application and its libraries use multiple heaps. + * For example, on Windows, this allows the ICU DLL to be used by + * applications that statically link the C Runtime library. + * + * @stable ICU 2.2 + */ +#ifndef U_OVERRIDE_CXX_ALLOCATION +#define U_OVERRIDE_CXX_ALLOCATION 1 +#endif + +/** + * \def U_ENABLE_TRACING + * Determines whether to enable tracing. + * @internal + */ +#ifndef U_ENABLE_TRACING +#define U_ENABLE_TRACING 0 +#endif + +/** + * \def UCONFIG_ENABLE_PLUGINS + * Determines whether to enable ICU plugins. + * @internal + */ +#ifndef UCONFIG_ENABLE_PLUGINS +#define UCONFIG_ENABLE_PLUGINS 0 +#endif + +/** + * \def U_ENABLE_DYLOAD + * Whether to enable Dynamic loading in ICU. + * @internal + */ +#ifndef U_ENABLE_DYLOAD +#define U_ENABLE_DYLOAD 1 +#endif + +/** + * \def U_CHECK_DYLOAD + * Whether to test Dynamic loading as an OS capability. + * @internal + */ +#ifndef U_CHECK_DYLOAD +#define U_CHECK_DYLOAD 1 +#endif + +/** + * \def U_DEFAULT_SHOW_DRAFT + * Do we allow ICU users to use the draft APIs by default? + * @internal + */ +#ifndef U_DEFAULT_SHOW_DRAFT +#define U_DEFAULT_SHOW_DRAFT 1 +#endif + +/*===========================================================================*/ +/* Custom icu entry point renaming */ +/*===========================================================================*/ + +/** + * \def U_HAVE_LIB_SUFFIX + * 1 if a custom library suffix is set. + * @internal + */ +#ifdef U_HAVE_LIB_SUFFIX + /* Use the predefined value. */ +#elif defined(U_LIB_SUFFIX_C_NAME) || defined(U_IN_DOXYGEN) +# define U_HAVE_LIB_SUFFIX 1 +#endif + +/** + * \def U_LIB_SUFFIX_C_NAME_STRING + * Defines the library suffix as a string with C syntax. + * @internal + */ +#ifdef U_LIB_SUFFIX_C_NAME_STRING + /* Use the predefined value. */ +#elif defined(U_LIB_SUFFIX_C_NAME) +# define CONVERT_TO_STRING(s) #s +# define U_LIB_SUFFIX_C_NAME_STRING CONVERT_TO_STRING(U_LIB_SUFFIX_C_NAME) +#else +# define U_LIB_SUFFIX_C_NAME_STRING "" +#endif + +/* common/i18n library switches --------------------------------------------- */ + +/** + * \def UCONFIG_ONLY_COLLATION + * This switch turns off modules that are not needed for collation. + * + * It does not turn off legacy conversion because that is necessary + * for ICU to work on EBCDIC platforms (for the default converter). + * If you want "only collation" and do not build for EBCDIC, + * then you can define UCONFIG_NO_CONVERSION or UCONFIG_NO_LEGACY_CONVERSION to 1 as well. + * + * @stable ICU 2.4 + */ +#ifndef UCONFIG_ONLY_COLLATION +# define UCONFIG_ONLY_COLLATION 0 +#endif + +#if UCONFIG_ONLY_COLLATION + /* common library */ +# define UCONFIG_NO_BREAK_ITERATION 1 +# define UCONFIG_NO_IDNA 1 + + /* i18n library */ +# if UCONFIG_NO_COLLATION +# error Contradictory collation switches in uconfig.h. +# endif +# define UCONFIG_NO_FORMATTING 1 +# define UCONFIG_NO_TRANSLITERATION 1 +# define UCONFIG_NO_REGULAR_EXPRESSIONS 1 +#endif + +/* common library switches -------------------------------------------------- */ + +/** + * \def UCONFIG_NO_FILE_IO + * This switch turns off all file access in the common library + * where file access is only used for data loading. + * ICU data must then be provided in the form of a data DLL (or with an + * equivalent way to link to the data residing in an executable, + * as in building a combined library with both the common library's code and + * the data), or via udata_setCommonData(). + * Application data must be provided via udata_setAppData() or by using + * "open" functions that take pointers to data, for example ucol_openBinary(). + * + * File access is not used at all in the i18n library. + * + * File access cannot be turned off for the icuio library or for the ICU + * test suites and ICU tools. + * + * @stable ICU 3.6 + */ +#ifndef UCONFIG_NO_FILE_IO +# define UCONFIG_NO_FILE_IO 0 +#endif + +#if UCONFIG_NO_FILE_IO && defined(U_TIMEZONE_FILES_DIR) +# error Contradictory file io switches in uconfig.h. +#endif + +/** + * \def UCONFIG_NO_CONVERSION + * ICU will not completely build (compiling the tools fails) with this + * switch turned on. + * This switch turns off all converters. + * + * You may want to use this together with U_CHARSET_IS_UTF8 defined to 1 + * in utypes.h if char* strings in your environment are always in UTF-8. + * + * @stable ICU 3.2 + * @see U_CHARSET_IS_UTF8 + */ +#ifndef UCONFIG_NO_CONVERSION +# define UCONFIG_NO_CONVERSION 0 +#endif + +#if UCONFIG_NO_CONVERSION +# define UCONFIG_NO_LEGACY_CONVERSION 1 +#endif + +/** + * \def UCONFIG_ONLY_HTML_CONVERSION + * This switch turns off all of the converters NOT listed in + * the HTML encoding standard: + * http://www.w3.org/TR/encoding/#names-and-labels + * + * This is not possible on EBCDIC platforms + * because they need ibm-37 or ibm-1047 default converters. + * + * @stable ICU 55 + */ +#ifndef UCONFIG_ONLY_HTML_CONVERSION +# define UCONFIG_ONLY_HTML_CONVERSION 0 +#endif + +/** + * \def UCONFIG_NO_LEGACY_CONVERSION + * This switch turns off all converters except for + * - Unicode charsets (UTF-7/8/16/32, CESU-8, SCSU, BOCU-1) + * - US-ASCII + * - ISO-8859-1 + * + * Turning off legacy conversion is not possible on EBCDIC platforms + * because they need ibm-37 or ibm-1047 default converters. + * + * @stable ICU 2.4 + */ +#ifndef UCONFIG_NO_LEGACY_CONVERSION +# define UCONFIG_NO_LEGACY_CONVERSION 0 +#endif + +/** + * \def UCONFIG_NO_NORMALIZATION + * This switch turns off normalization. + * It implies turning off several other services as well, for example + * collation and IDNA. + * + * @stable ICU 2.6 + */ +#ifndef UCONFIG_NO_NORMALIZATION +# define UCONFIG_NO_NORMALIZATION 0 +#endif + +#if UCONFIG_NO_NORMALIZATION + /* common library */ + /* ICU 50 CJK dictionary BreakIterator uses normalization */ +# define UCONFIG_NO_BREAK_ITERATION 1 + /* IDNA (UTS #46) is implemented via normalization */ +# define UCONFIG_NO_IDNA 1 + + /* i18n library */ +# if UCONFIG_ONLY_COLLATION +# error Contradictory collation switches in uconfig.h. +# endif +# define UCONFIG_NO_COLLATION 1 +# define UCONFIG_NO_TRANSLITERATION 1 +#endif + +/** + * \def UCONFIG_NO_BREAK_ITERATION + * This switch turns off break iteration. + * + * @stable ICU 2.4 + */ +#ifndef UCONFIG_NO_BREAK_ITERATION +# define UCONFIG_NO_BREAK_ITERATION 0 +#endif + +/** + * \def UCONFIG_NO_IDNA + * This switch turns off IDNA. + * + * @stable ICU 2.6 + */ +#ifndef UCONFIG_NO_IDNA +# define UCONFIG_NO_IDNA 0 +#endif + +/** + * \def UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE + * Determines the default UMessagePatternApostropheMode. + * See the documentation for that enum. + * + * @stable ICU 4.8 + */ +#ifndef UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE +# define UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE UMSGPAT_APOS_DOUBLE_OPTIONAL +#endif + +/** + * \def UCONFIG_USE_WINDOWS_LCID_MAPPING_API + * On platforms where U_PLATFORM_HAS_WIN32_API is true, this switch determines + * if the Windows platform APIs are used for LCID<->Locale Name conversions. + * Otherwise, only the built-in ICU tables are used. + * + * @internal ICU 64 + */ +#ifndef UCONFIG_USE_WINDOWS_LCID_MAPPING_API +# define UCONFIG_USE_WINDOWS_LCID_MAPPING_API 1 +#endif + +/* i18n library switches ---------------------------------------------------- */ + +/** + * \def UCONFIG_NO_COLLATION + * This switch turns off collation and collation-based string search. + * + * @stable ICU 2.4 + */ +#ifndef UCONFIG_NO_COLLATION +# define UCONFIG_NO_COLLATION 0 +#endif + +/** + * \def UCONFIG_NO_FORMATTING + * This switch turns off formatting and calendar/timezone services. + * + * @stable ICU 2.4 + */ +#ifndef UCONFIG_NO_FORMATTING +# define UCONFIG_NO_FORMATTING 0 +#endif + +/** + * \def UCONFIG_NO_TRANSLITERATION + * This switch turns off transliteration. + * + * @stable ICU 2.4 + */ +#ifndef UCONFIG_NO_TRANSLITERATION +# define UCONFIG_NO_TRANSLITERATION 0 +#endif + +/** + * \def UCONFIG_NO_REGULAR_EXPRESSIONS + * This switch turns off regular expressions. + * + * @stable ICU 2.4 + */ +#ifndef UCONFIG_NO_REGULAR_EXPRESSIONS +# define UCONFIG_NO_REGULAR_EXPRESSIONS 0 +#endif + +/** + * \def UCONFIG_NO_SERVICE + * This switch turns off service registration. + * + * @stable ICU 3.2 + */ +#ifndef UCONFIG_NO_SERVICE +# define UCONFIG_NO_SERVICE 0 +#endif + +/** + * \def UCONFIG_HAVE_PARSEALLINPUT + * This switch turns on the "parse all input" attribute. Binary incompatible. + * + * @internal + */ +#ifndef UCONFIG_HAVE_PARSEALLINPUT +# define UCONFIG_HAVE_PARSEALLINPUT 1 +#endif + +/** + * \def UCONFIG_NO_FILTERED_BREAK_ITERATION + * This switch turns off filtered break iteration code. + * + * @internal + */ +#ifndef UCONFIG_NO_FILTERED_BREAK_ITERATION +# define UCONFIG_NO_FILTERED_BREAK_ITERATION 0 +#endif + +#endif // __UCONFIG_H__ diff --git a/thirdparty/icu4c/common/unicode/ucpmap.h b/thirdparty/icu4c/common/unicode/ucpmap.h new file mode 100644 index 0000000000..31e1365cac --- /dev/null +++ b/thirdparty/icu4c/common/unicode/ucpmap.h @@ -0,0 +1,159 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// ucpmap.h +// created: 2018sep03 Markus W. Scherer + +#ifndef __UCPMAP_H__ +#define __UCPMAP_H__ + +#include "unicode/utypes.h" + +U_CDECL_BEGIN + +/** + * \file + * + * This file defines an abstract map from Unicode code points to integer values. + * + * @see UCPMap + * @see UCPTrie + * @see UMutableCPTrie + */ + +/** + * Abstract map from Unicode code points (U+0000..U+10FFFF) to integer values. + * + * @see UCPTrie + * @see UMutableCPTrie + * @stable ICU 63 + */ +typedef struct UCPMap UCPMap; + +/** + * Selectors for how ucpmap_getRange() etc. should report value ranges overlapping with surrogates. + * Most users should use UCPMAP_RANGE_NORMAL. + * + * @see ucpmap_getRange + * @see ucptrie_getRange + * @see umutablecptrie_getRange + * @stable ICU 63 + */ +enum UCPMapRangeOption { + /** + * ucpmap_getRange() enumerates all same-value ranges as stored in the map. + * Most users should use this option. + * @stable ICU 63 + */ + UCPMAP_RANGE_NORMAL, + /** + * ucpmap_getRange() enumerates all same-value ranges as stored in the map, + * except that lead surrogates (U+D800..U+DBFF) are treated as having the + * surrogateValue, which is passed to getRange() as a separate parameter. + * The surrogateValue is not transformed via filter(). + * See U_IS_LEAD(c). + * + * Most users should use UCPMAP_RANGE_NORMAL instead. + * + * This option is useful for maps that map surrogate code *units* to + * special values optimized for UTF-16 string processing + * or for special error behavior for unpaired surrogates, + * but those values are not to be associated with the lead surrogate code *points*. + * @stable ICU 63 + */ + UCPMAP_RANGE_FIXED_LEAD_SURROGATES, + /** + * ucpmap_getRange() enumerates all same-value ranges as stored in the map, + * except that all surrogates (U+D800..U+DFFF) are treated as having the + * surrogateValue, which is passed to getRange() as a separate parameter. + * The surrogateValue is not transformed via filter(). + * See U_IS_SURROGATE(c). + * + * Most users should use UCPMAP_RANGE_NORMAL instead. + * + * This option is useful for maps that map surrogate code *units* to + * special values optimized for UTF-16 string processing + * or for special error behavior for unpaired surrogates, + * but those values are not to be associated with the lead surrogate code *points*. + * @stable ICU 63 + */ + UCPMAP_RANGE_FIXED_ALL_SURROGATES +}; +#ifndef U_IN_DOXYGEN +typedef enum UCPMapRangeOption UCPMapRangeOption; +#endif + +/** + * Returns the value for a code point as stored in the map, with range checking. + * Returns an implementation-defined error value if c is not in the range 0..U+10FFFF. + * + * @param map the map + * @param c the code point + * @return the map value, + * or an implementation-defined error value if the code point is not in the range 0..U+10FFFF + * @stable ICU 63 + */ +U_CAPI uint32_t U_EXPORT2 +ucpmap_get(const UCPMap *map, UChar32 c); + +/** + * Callback function type: Modifies a map value. + * Optionally called by ucpmap_getRange()/ucptrie_getRange()/umutablecptrie_getRange(). + * The modified value will be returned by the getRange function. + * + * Can be used to ignore some of the value bits, + * make a filter for one of several values, + * return a value index computed from the map value, etc. + * + * @param context an opaque pointer, as passed into the getRange function + * @param value a value from the map + * @return the modified value + * @stable ICU 63 + */ +typedef uint32_t U_CALLCONV +UCPMapValueFilter(const void *context, uint32_t value); + +/** + * Returns the last code point such that all those from start to there have the same value. + * Can be used to efficiently iterate over all same-value ranges in a map. + * (This is normally faster than iterating over code points and get()ting each value, + * but much slower than a data structure that stores ranges directly.) + * + * If the UCPMapValueFilter function pointer is not NULL, then + * the value to be delivered is passed through that function, and the return value is the end + * of the range where all values are modified to the same actual value. + * The value is unchanged if that function pointer is NULL. + * + * Example: + * \code + * UChar32 start = 0, end; + * uint32_t value; + * while ((end = ucpmap_getRange(map, start, UCPMAP_RANGE_NORMAL, 0, + * NULL, NULL, &value)) >= 0) { + * // Work with the range start..end and its value. + * start = end + 1; + * } + * \endcode + * + * @param map the map + * @param start range start + * @param option defines whether surrogates are treated normally, + * or as having the surrogateValue; usually UCPMAP_RANGE_NORMAL + * @param surrogateValue value for surrogates; ignored if option==UCPMAP_RANGE_NORMAL + * @param filter a pointer to a function that may modify the map data value, + * or NULL if the values from the map are to be used unmodified + * @param context an opaque pointer that is passed on to the filter function + * @param pValue if not NULL, receives the value that every code point start..end has; + * may have been modified by filter(context, map value) + * if that function pointer is not NULL + * @return the range end code point, or -1 if start is not a valid code point + * @stable ICU 63 + */ +U_CAPI UChar32 U_EXPORT2 +ucpmap_getRange(const UCPMap *map, UChar32 start, + UCPMapRangeOption option, uint32_t surrogateValue, + UCPMapValueFilter *filter, const void *context, uint32_t *pValue); + +U_CDECL_END + +#endif diff --git a/thirdparty/icu4c/common/unicode/ucptrie.h b/thirdparty/icu4c/common/unicode/ucptrie.h new file mode 100644 index 0000000000..b95491b183 --- /dev/null +++ b/thirdparty/icu4c/common/unicode/ucptrie.h @@ -0,0 +1,646 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// ucptrie.h (modified from utrie2.h) +// created: 2017dec29 Markus W. Scherer + +#ifndef __UCPTRIE_H__ +#define __UCPTRIE_H__ + +#include "unicode/utypes.h" +#include "unicode/ucpmap.h" +#include "unicode/utf8.h" + +#if U_SHOW_CPLUSPLUS_API +#include "unicode/localpointer.h" +#endif // U_SHOW_CPLUSPLUS_API + +U_CDECL_BEGIN + +/** + * \file + * + * This file defines an immutable Unicode code point trie. + * + * @see UCPTrie + * @see UMutableCPTrie + */ + +#ifndef U_IN_DOXYGEN +/** @internal */ +typedef union UCPTrieData { + /** @internal */ + const void *ptr0; + /** @internal */ + const uint16_t *ptr16; + /** @internal */ + const uint32_t *ptr32; + /** @internal */ + const uint8_t *ptr8; +} UCPTrieData; +#endif + +/** + * Immutable Unicode code point trie structure. + * Fast, reasonably compact, map from Unicode code points (U+0000..U+10FFFF) to integer values. + * For details see http://site.icu-project.org/design/struct/utrie + * + * Do not access UCPTrie fields directly; use public functions and macros. + * Functions are easy to use: They support all trie types and value widths. + * + * When performance is really important, macros provide faster access. + * Most macros are specific to either "fast" or "small" tries, see UCPTrieType. + * There are "fast" macros for special optimized use cases. + * + * The macros will return bogus values, or may crash, if used on the wrong type or value width. + * + * @see UMutableCPTrie + * @stable ICU 63 + */ +struct UCPTrie { +#ifndef U_IN_DOXYGEN + /** @internal */ + const uint16_t *index; + /** @internal */ + UCPTrieData data; + + /** @internal */ + int32_t indexLength; + /** @internal */ + int32_t dataLength; + /** Start of the last range which ends at U+10FFFF. @internal */ + UChar32 highStart; + /** highStart>>12 @internal */ + uint16_t shifted12HighStart; + + /** @internal */ + int8_t type; // UCPTrieType + /** @internal */ + int8_t valueWidth; // UCPTrieValueWidth + + /** padding/reserved @internal */ + uint32_t reserved32; + /** padding/reserved @internal */ + uint16_t reserved16; + + /** + * Internal index-3 null block offset. + * Set to an impossibly high value (e.g., 0xffff) if there is no dedicated index-3 null block. + * @internal + */ + uint16_t index3NullOffset; + /** + * Internal data null block offset, not shifted. + * Set to an impossibly high value (e.g., 0xfffff) if there is no dedicated data null block. + * @internal + */ + int32_t dataNullOffset; + /** @internal */ + uint32_t nullValue; + +#ifdef UCPTRIE_DEBUG + /** @internal */ + const char *name; +#endif +#endif +}; +#ifndef U_IN_DOXYGEN +typedef struct UCPTrie UCPTrie; +#endif + +/** + * Selectors for the type of a UCPTrie. + * Different trade-offs for size vs. speed. + * + * @see umutablecptrie_buildImmutable + * @see ucptrie_openFromBinary + * @see ucptrie_getType + * @stable ICU 63 + */ +enum UCPTrieType { + /** + * For ucptrie_openFromBinary() to accept any type. + * ucptrie_getType() will return the actual type. + * @stable ICU 63 + */ + UCPTRIE_TYPE_ANY = -1, + /** + * Fast/simple/larger BMP data structure. Use functions and "fast" macros. + * @stable ICU 63 + */ + UCPTRIE_TYPE_FAST, + /** + * Small/slower BMP data structure. Use functions and "small" macros. + * @stable ICU 63 + */ + UCPTRIE_TYPE_SMALL +}; +#ifndef U_IN_DOXYGEN +typedef enum UCPTrieType UCPTrieType; +#endif + +/** + * Selectors for the number of bits in a UCPTrie data value. + * + * @see umutablecptrie_buildImmutable + * @see ucptrie_openFromBinary + * @see ucptrie_getValueWidth + * @stable ICU 63 + */ +enum UCPTrieValueWidth { + /** + * For ucptrie_openFromBinary() to accept any data value width. + * ucptrie_getValueWidth() will return the actual data value width. + * @stable ICU 63 + */ + UCPTRIE_VALUE_BITS_ANY = -1, + /** + * The trie stores 16 bits per data value. + * It returns them as unsigned values 0..0xffff=65535. + * @stable ICU 63 + */ + UCPTRIE_VALUE_BITS_16, + /** + * The trie stores 32 bits per data value. + * @stable ICU 63 + */ + UCPTRIE_VALUE_BITS_32, + /** + * The trie stores 8 bits per data value. + * It returns them as unsigned values 0..0xff=255. + * @stable ICU 63 + */ + UCPTRIE_VALUE_BITS_8 +}; +#ifndef U_IN_DOXYGEN +typedef enum UCPTrieValueWidth UCPTrieValueWidth; +#endif + +/** + * Opens a trie from its binary form, stored in 32-bit-aligned memory. + * Inverse of ucptrie_toBinary(). + * + * The memory must remain valid and unchanged as long as the trie is used. + * You must ucptrie_close() the trie once you are done using it. + * + * @param type selects the trie type; results in an + * U_INVALID_FORMAT_ERROR if it does not match the binary data; + * use UCPTRIE_TYPE_ANY to accept any type + * @param valueWidth selects the number of bits in a data value; results in an + * U_INVALID_FORMAT_ERROR if it does not match the binary data; + * use UCPTRIE_VALUE_BITS_ANY to accept any data value width + * @param data a pointer to 32-bit-aligned memory containing the binary data of a UCPTrie + * @param length the number of bytes available at data; + * can be more than necessary + * @param pActualLength receives the actual number of bytes at data taken up by the trie data; + * can be NULL + * @param pErrorCode an in/out ICU UErrorCode + * @return the trie + * + * @see umutablecptrie_open + * @see umutablecptrie_buildImmutable + * @see ucptrie_toBinary + * @stable ICU 63 + */ +U_CAPI UCPTrie * U_EXPORT2 +ucptrie_openFromBinary(UCPTrieType type, UCPTrieValueWidth valueWidth, + const void *data, int32_t length, int32_t *pActualLength, + UErrorCode *pErrorCode); + +/** + * Closes a trie and releases associated memory. + * + * @param trie the trie + * @stable ICU 63 + */ +U_CAPI void U_EXPORT2 +ucptrie_close(UCPTrie *trie); + +/** + * Returns the trie type. + * + * @param trie the trie + * @return the trie type + * @see ucptrie_openFromBinary + * @see UCPTRIE_TYPE_ANY + * @stable ICU 63 + */ +U_CAPI UCPTrieType U_EXPORT2 +ucptrie_getType(const UCPTrie *trie); + +/** + * Returns the number of bits in a trie data value. + * + * @param trie the trie + * @return the number of bits in a trie data value + * @see ucptrie_openFromBinary + * @see UCPTRIE_VALUE_BITS_ANY + * @stable ICU 63 + */ +U_CAPI UCPTrieValueWidth U_EXPORT2 +ucptrie_getValueWidth(const UCPTrie *trie); + +/** + * Returns the value for a code point as stored in the trie, with range checking. + * Returns the trie error value if c is not in the range 0..U+10FFFF. + * + * Easier to use than UCPTRIE_FAST_GET() and similar macros but slower. + * Easier to use because, unlike the macros, this function works on all UCPTrie + * objects, for all types and value widths. + * + * @param trie the trie + * @param c the code point + * @return the trie value, + * or the trie error value if the code point is not in the range 0..U+10FFFF + * @stable ICU 63 + */ +U_CAPI uint32_t U_EXPORT2 +ucptrie_get(const UCPTrie *trie, UChar32 c); + +/** + * Returns the last code point such that all those from start to there have the same value. + * Can be used to efficiently iterate over all same-value ranges in a trie. + * (This is normally faster than iterating over code points and get()ting each value, + * but much slower than a data structure that stores ranges directly.) + * + * If the UCPMapValueFilter function pointer is not NULL, then + * the value to be delivered is passed through that function, and the return value is the end + * of the range where all values are modified to the same actual value. + * The value is unchanged if that function pointer is NULL. + * + * Example: + * \code + * UChar32 start = 0, end; + * uint32_t value; + * while ((end = ucptrie_getRange(trie, start, UCPMAP_RANGE_NORMAL, 0, + * NULL, NULL, &value)) >= 0) { + * // Work with the range start..end and its value. + * start = end + 1; + * } + * \endcode + * + * @param trie the trie + * @param start range start + * @param option defines whether surrogates are treated normally, + * or as having the surrogateValue; usually UCPMAP_RANGE_NORMAL + * @param surrogateValue value for surrogates; ignored if option==UCPMAP_RANGE_NORMAL + * @param filter a pointer to a function that may modify the trie data value, + * or NULL if the values from the trie are to be used unmodified + * @param context an opaque pointer that is passed on to the filter function + * @param pValue if not NULL, receives the value that every code point start..end has; + * may have been modified by filter(context, trie value) + * if that function pointer is not NULL + * @return the range end code point, or -1 if start is not a valid code point + * @stable ICU 63 + */ +U_CAPI UChar32 U_EXPORT2 +ucptrie_getRange(const UCPTrie *trie, UChar32 start, + UCPMapRangeOption option, uint32_t surrogateValue, + UCPMapValueFilter *filter, const void *context, uint32_t *pValue); + +/** + * Writes a memory-mappable form of the trie into 32-bit aligned memory. + * Inverse of ucptrie_openFromBinary(). + * + * @param trie the trie + * @param data a pointer to 32-bit-aligned memory to be filled with the trie data; + * can be NULL if capacity==0 + * @param capacity the number of bytes available at data, or 0 for pure preflighting + * @param pErrorCode an in/out ICU UErrorCode; + * U_BUFFER_OVERFLOW_ERROR if the capacity is too small + * @return the number of bytes written or (if buffer overflow) needed for the trie + * + * @see ucptrie_openFromBinary() + * @stable ICU 63 + */ +U_CAPI int32_t U_EXPORT2 +ucptrie_toBinary(const UCPTrie *trie, void *data, int32_t capacity, UErrorCode *pErrorCode); + +/** + * Macro parameter value for a trie with 16-bit data values. + * Use the name of this macro as a "dataAccess" parameter in other macros. + * Do not use this macro in any other way. + * + * @see UCPTRIE_VALUE_BITS_16 + * @stable ICU 63 + */ +#define UCPTRIE_16(trie, i) ((trie)->data.ptr16[i]) + +/** + * Macro parameter value for a trie with 32-bit data values. + * Use the name of this macro as a "dataAccess" parameter in other macros. + * Do not use this macro in any other way. + * + * @see UCPTRIE_VALUE_BITS_32 + * @stable ICU 63 + */ +#define UCPTRIE_32(trie, i) ((trie)->data.ptr32[i]) + +/** + * Macro parameter value for a trie with 8-bit data values. + * Use the name of this macro as a "dataAccess" parameter in other macros. + * Do not use this macro in any other way. + * + * @see UCPTRIE_VALUE_BITS_8 + * @stable ICU 63 + */ +#define UCPTRIE_8(trie, i) ((trie)->data.ptr8[i]) + +/** + * Returns a trie value for a code point, with range checking. + * Returns the trie error value if c is not in the range 0..U+10FFFF. + * + * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST + * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width + * @param c (UChar32, in) the input code point + * @return The code point's trie value. + * @stable ICU 63 + */ +#define UCPTRIE_FAST_GET(trie, dataAccess, c) dataAccess(trie, _UCPTRIE_CP_INDEX(trie, 0xffff, c)) + +/** + * Returns a 16-bit trie value for a code point, with range checking. + * Returns the trie error value if c is not in the range U+0000..U+10FFFF. + * + * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_SMALL + * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width + * @param c (UChar32, in) the input code point + * @return The code point's trie value. + * @stable ICU 63 + */ +#define UCPTRIE_SMALL_GET(trie, dataAccess, c) \ + dataAccess(trie, _UCPTRIE_CP_INDEX(trie, UCPTRIE_SMALL_MAX, c)) + +/** + * UTF-16: Reads the next code point (UChar32 c, out), post-increments src, + * and gets a value from the trie. + * Sets the trie error value if c is an unpaired surrogate. + * + * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST + * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width + * @param src (const UChar *, in/out) the source text pointer + * @param limit (const UChar *, in) the limit pointer for the text, or NULL if NUL-terminated + * @param c (UChar32, out) variable for the code point + * @param result (out) variable for the trie lookup result + * @stable ICU 63 + */ +#define UCPTRIE_FAST_U16_NEXT(trie, dataAccess, src, limit, c, result) UPRV_BLOCK_MACRO_BEGIN { \ + (c) = *(src)++; \ + int32_t __index; \ + if (!U16_IS_SURROGATE(c)) { \ + __index = _UCPTRIE_FAST_INDEX(trie, c); \ + } else { \ + uint16_t __c2; \ + if (U16_IS_SURROGATE_LEAD(c) && (src) != (limit) && U16_IS_TRAIL(__c2 = *(src))) { \ + ++(src); \ + (c) = U16_GET_SUPPLEMENTARY((c), __c2); \ + __index = _UCPTRIE_SMALL_INDEX(trie, c); \ + } else { \ + __index = (trie)->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET; \ + } \ + } \ + (result) = dataAccess(trie, __index); \ +} UPRV_BLOCK_MACRO_END + +/** + * UTF-16: Reads the previous code point (UChar32 c, out), pre-decrements src, + * and gets a value from the trie. + * Sets the trie error value if c is an unpaired surrogate. + * + * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST + * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width + * @param start (const UChar *, in) the start pointer for the text + * @param src (const UChar *, in/out) the source text pointer + * @param c (UChar32, out) variable for the code point + * @param result (out) variable for the trie lookup result + * @stable ICU 63 + */ +#define UCPTRIE_FAST_U16_PREV(trie, dataAccess, start, src, c, result) UPRV_BLOCK_MACRO_BEGIN { \ + (c) = *--(src); \ + int32_t __index; \ + if (!U16_IS_SURROGATE(c)) { \ + __index = _UCPTRIE_FAST_INDEX(trie, c); \ + } else { \ + uint16_t __c2; \ + if (U16_IS_SURROGATE_TRAIL(c) && (src) != (start) && U16_IS_LEAD(__c2 = *((src) - 1))) { \ + --(src); \ + (c) = U16_GET_SUPPLEMENTARY(__c2, (c)); \ + __index = _UCPTRIE_SMALL_INDEX(trie, c); \ + } else { \ + __index = (trie)->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET; \ + } \ + } \ + (result) = dataAccess(trie, __index); \ +} UPRV_BLOCK_MACRO_END + +/** + * UTF-8: Post-increments src and gets a value from the trie. + * Sets the trie error value for an ill-formed byte sequence. + * + * Unlike UCPTRIE_FAST_U16_NEXT() this UTF-8 macro does not provide the code point + * because it would be more work to do so and is often not needed. + * If the trie value differs from the error value, then the byte sequence is well-formed, + * and the code point can be assembled without revalidation. + * + * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST + * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width + * @param src (const char *, in/out) the source text pointer + * @param limit (const char *, in) the limit pointer for the text (must not be NULL) + * @param result (out) variable for the trie lookup result + * @stable ICU 63 + */ +#define UCPTRIE_FAST_U8_NEXT(trie, dataAccess, src, limit, result) UPRV_BLOCK_MACRO_BEGIN { \ + int32_t __lead = (uint8_t)*(src)++; \ + if (!U8_IS_SINGLE(__lead)) { \ + uint8_t __t1, __t2, __t3; \ + if ((src) != (limit) && \ + (__lead >= 0xe0 ? \ + __lead < 0xf0 ? /* U+0800..U+FFFF except surrogates */ \ + U8_LEAD3_T1_BITS[__lead &= 0xf] & (1 << ((__t1 = *(src)) >> 5)) && \ + ++(src) != (limit) && (__t2 = *(src) - 0x80) <= 0x3f && \ + (__lead = ((int32_t)(trie)->index[(__lead << 6) + (__t1 & 0x3f)]) + __t2, 1) \ + : /* U+10000..U+10FFFF */ \ + (__lead -= 0xf0) <= 4 && \ + U8_LEAD4_T1_BITS[(__t1 = *(src)) >> 4] & (1 << __lead) && \ + (__lead = (__lead << 6) | (__t1 & 0x3f), ++(src) != (limit)) && \ + (__t2 = *(src) - 0x80) <= 0x3f && \ + ++(src) != (limit) && (__t3 = *(src) - 0x80) <= 0x3f && \ + (__lead = __lead >= (trie)->shifted12HighStart ? \ + (trie)->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET : \ + ucptrie_internalSmallU8Index((trie), __lead, __t2, __t3), 1) \ + : /* U+0080..U+07FF */ \ + __lead >= 0xc2 && (__t1 = *(src) - 0x80) <= 0x3f && \ + (__lead = (int32_t)(trie)->index[__lead & 0x1f] + __t1, 1))) { \ + ++(src); \ + } else { \ + __lead = (trie)->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET; /* ill-formed*/ \ + } \ + } \ + (result) = dataAccess(trie, __lead); \ +} UPRV_BLOCK_MACRO_END + +/** + * UTF-8: Pre-decrements src and gets a value from the trie. + * Sets the trie error value for an ill-formed byte sequence. + * + * Unlike UCPTRIE_FAST_U16_PREV() this UTF-8 macro does not provide the code point + * because it would be more work to do so and is often not needed. + * If the trie value differs from the error value, then the byte sequence is well-formed, + * and the code point can be assembled without revalidation. + * + * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST + * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width + * @param start (const char *, in) the start pointer for the text + * @param src (const char *, in/out) the source text pointer + * @param result (out) variable for the trie lookup result + * @stable ICU 63 + */ +#define UCPTRIE_FAST_U8_PREV(trie, dataAccess, start, src, result) UPRV_BLOCK_MACRO_BEGIN { \ + int32_t __index = (uint8_t)*--(src); \ + if (!U8_IS_SINGLE(__index)) { \ + __index = ucptrie_internalU8PrevIndex((trie), __index, (const uint8_t *)(start), \ + (const uint8_t *)(src)); \ + (src) -= __index & 7; \ + __index >>= 3; \ + } \ + (result) = dataAccess(trie, __index); \ +} UPRV_BLOCK_MACRO_END + +/** + * Returns a trie value for an ASCII code point, without range checking. + * + * @param trie (const UCPTrie *, in) the trie (of either fast or small type) + * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width + * @param c (UChar32, in) the input code point; must be U+0000..U+007F + * @return The ASCII code point's trie value. + * @stable ICU 63 + */ +#define UCPTRIE_ASCII_GET(trie, dataAccess, c) dataAccess(trie, c) + +/** + * Returns a trie value for a BMP code point (U+0000..U+FFFF), without range checking. + * Can be used to look up a value for a UTF-16 code unit if other parts of + * the string processing check for surrogates. + * + * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST + * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width + * @param c (UChar32, in) the input code point, must be U+0000..U+FFFF + * @return The BMP code point's trie value. + * @stable ICU 63 + */ +#define UCPTRIE_FAST_BMP_GET(trie, dataAccess, c) dataAccess(trie, _UCPTRIE_FAST_INDEX(trie, c)) + +/** + * Returns a trie value for a supplementary code point (U+10000..U+10FFFF), + * without range checking. + * + * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST + * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width + * @param c (UChar32, in) the input code point, must be U+10000..U+10FFFF + * @return The supplementary code point's trie value. + * @stable ICU 63 + */ +#define UCPTRIE_FAST_SUPP_GET(trie, dataAccess, c) dataAccess(trie, _UCPTRIE_SMALL_INDEX(trie, c)) + +/* Internal definitions ----------------------------------------------------- */ + +#ifndef U_IN_DOXYGEN + +/** + * Internal implementation constants. + * These are needed for the API macros, but users should not use these directly. + * @internal + */ +enum { + /** @internal */ + UCPTRIE_FAST_SHIFT = 6, + + /** Number of entries in a data block for code points below the fast limit. 64=0x40 @internal */ + UCPTRIE_FAST_DATA_BLOCK_LENGTH = 1 << UCPTRIE_FAST_SHIFT, + + /** Mask for getting the lower bits for the in-fast-data-block offset. @internal */ + UCPTRIE_FAST_DATA_MASK = UCPTRIE_FAST_DATA_BLOCK_LENGTH - 1, + + /** @internal */ + UCPTRIE_SMALL_MAX = 0xfff, + + /** + * Offset from dataLength (to be subtracted) for fetching the + * value returned for out-of-range code points and ill-formed UTF-8/16. + * @internal + */ + UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET = 1, + /** + * Offset from dataLength (to be subtracted) for fetching the + * value returned for code points highStart..U+10FFFF. + * @internal + */ + UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET = 2 +}; + +/* Internal functions and macros -------------------------------------------- */ +// Do not conditionalize with #ifndef U_HIDE_INTERNAL_API, needed for public API + +/** @internal */ +U_CAPI int32_t U_EXPORT2 +ucptrie_internalSmallIndex(const UCPTrie *trie, UChar32 c); + +/** @internal */ +U_CAPI int32_t U_EXPORT2 +ucptrie_internalSmallU8Index(const UCPTrie *trie, int32_t lt1, uint8_t t2, uint8_t t3); + +/** + * Internal function for part of the UCPTRIE_FAST_U8_PREVxx() macro implementations. + * Do not call directly. + * @internal + */ +U_CAPI int32_t U_EXPORT2 +ucptrie_internalU8PrevIndex(const UCPTrie *trie, UChar32 c, + const uint8_t *start, const uint8_t *src); + +/** Internal trie getter for a code point below the fast limit. Returns the data index. @internal */ +#define _UCPTRIE_FAST_INDEX(trie, c) \ + ((int32_t)(trie)->index[(c) >> UCPTRIE_FAST_SHIFT] + ((c) & UCPTRIE_FAST_DATA_MASK)) + +/** Internal trie getter for a code point at or above the fast limit. Returns the data index. @internal */ +#define _UCPTRIE_SMALL_INDEX(trie, c) \ + ((c) >= (trie)->highStart ? \ + (trie)->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET : \ + ucptrie_internalSmallIndex(trie, c)) + +/** + * Internal trie getter for a code point, with checking that c is in U+0000..10FFFF. + * Returns the data index. + * @internal + */ +#define _UCPTRIE_CP_INDEX(trie, fastMax, c) \ + ((uint32_t)(c) <= (uint32_t)(fastMax) ? \ + _UCPTRIE_FAST_INDEX(trie, c) : \ + (uint32_t)(c) <= 0x10ffff ? \ + _UCPTRIE_SMALL_INDEX(trie, c) : \ + (trie)->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET) + +U_CDECL_END + +#endif // U_IN_DOXYGEN + +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN + +/** + * \class LocalUCPTriePointer + * "Smart pointer" class, closes a UCPTrie via ucptrie_close(). + * For most methods see the LocalPointerBase base class. + * + * @see LocalPointerBase + * @see LocalPointer + * @stable ICU 63 + */ +U_DEFINE_LOCAL_OPEN_POINTER(LocalUCPTriePointer, UCPTrie, ucptrie_close); + +U_NAMESPACE_END + +#endif // U_SHOW_CPLUSPLUS_API + +#endif diff --git a/thirdparty/icu4c/common/unicode/ucurr.h b/thirdparty/icu4c/common/unicode/ucurr.h new file mode 100644 index 0000000000..35c2a39389 --- /dev/null +++ b/thirdparty/icu4c/common/unicode/ucurr.h @@ -0,0 +1,468 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (c) 2002-2016, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +*/ +#ifndef _UCURR_H_ +#define _UCURR_H_ + +#include "unicode/utypes.h" +#include "unicode/uenum.h" + +/** + * \file + * \brief C API: Encapsulates information about a currency. + * + * The ucurr API encapsulates information about a currency, as defined by + * ISO 4217. A currency is represented by a 3-character string + * containing its ISO 4217 code. This API can return various data + * necessary the proper display of a currency: + * + * <ul><li>A display symbol, for a specific locale + * <li>The number of fraction digits to display + * <li>A rounding increment + * </ul> + * + * The <tt>DecimalFormat</tt> class uses these data to display + * currencies. + * @author Alan Liu + * @since ICU 2.2 + */ + +#if !UCONFIG_NO_FORMATTING + +/** + * Currency Usage used for Decimal Format + * @stable ICU 54 + */ +enum UCurrencyUsage { + /** + * a setting to specify currency usage which determines currency digit + * and rounding for standard usage, for example: "50.00 NT$" + * used as DEFAULT value + * @stable ICU 54 + */ + UCURR_USAGE_STANDARD=0, + /** + * a setting to specify currency usage which determines currency digit + * and rounding for cash usage, for example: "50 NT$" + * @stable ICU 54 + */ + UCURR_USAGE_CASH=1, +#ifndef U_HIDE_DEPRECATED_API + /** + * One higher than the last enum UCurrencyUsage constant. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + UCURR_USAGE_COUNT=2 +#endif // U_HIDE_DEPRECATED_API +}; +/** Currency Usage used for Decimal Format */ +typedef enum UCurrencyUsage UCurrencyUsage; + +/** + * Finds a currency code for the given locale. + * @param locale the locale for which to retrieve a currency code. + * Currency can be specified by the "currency" keyword + * in which case it overrides the default currency code + * @param buff fill in buffer. Can be NULL for preflighting. + * @param buffCapacity capacity of the fill in buffer. Can be 0 for + * preflighting. If it is non-zero, the buff parameter + * must not be NULL. + * @param ec error code + * @return length of the currency string. It should always be 3. If 0, + * currency couldn't be found or the input values are + * invalid. + * @stable ICU 2.8 + */ +U_CAPI int32_t U_EXPORT2 +ucurr_forLocale(const char* locale, + UChar* buff, + int32_t buffCapacity, + UErrorCode* ec); + +/** + * Selector constants for ucurr_getName(). + * + * @see ucurr_getName + * @stable ICU 2.6 + */ +typedef enum UCurrNameStyle { + /** + * Selector for ucurr_getName indicating a symbolic name for a + * currency, such as "$" for USD. + * @stable ICU 2.6 + */ + UCURR_SYMBOL_NAME, + + /** + * Selector for ucurr_getName indicating the long name for a + * currency, such as "US Dollar" for USD. + * @stable ICU 2.6 + */ + UCURR_LONG_NAME, + + /** + * Selector for getName() indicating the narrow currency symbol. + * The narrow currency symbol is similar to the regular currency + * symbol, but it always takes the shortest form: for example, + * "$" instead of "US$" for USD in en-CA. + * + * @stable ICU 61 + */ + UCURR_NARROW_SYMBOL_NAME, + +#ifndef U_HIDE_DRAFT_API + /** + * Selector for getName() indicating the formal currency symbol. + * The formal currency symbol is similar to the regular currency + * symbol, but it always takes the form used in formal settings + * such as banking; for example, "NT$" instead of "$" for TWD in zh-TW. + * + * @draft ICU 68 + */ + UCURR_FORMAL_SYMBOL_NAME, + + /** + * Selector for getName() indicating the variant currency symbol. + * The variant symbol for a currency is an alternative symbol + * that is not necessarily as widely used as the regular symbol. + * + * @draft ICU 68 + */ + UCURR_VARIANT_SYMBOL_NAME +#endif // U_HIDE_DRAFT_API + +} UCurrNameStyle; + +#if !UCONFIG_NO_SERVICE +/** + * @stable ICU 2.6 + */ +typedef const void* UCurrRegistryKey; + +/** + * Register an (existing) ISO 4217 currency code for the given locale. + * Only the country code and the two variants EURO and PRE_EURO are + * recognized. + * @param isoCode the three-letter ISO 4217 currency code + * @param locale the locale for which to register this currency code + * @param status the in/out status code + * @return a registry key that can be used to unregister this currency code, or NULL + * if there was an error. + * @stable ICU 2.6 + */ +U_CAPI UCurrRegistryKey U_EXPORT2 +ucurr_register(const UChar* isoCode, + const char* locale, + UErrorCode* status); +/** + * Unregister the previously-registered currency definitions using the + * URegistryKey returned from ucurr_register. Key becomes invalid after + * a successful call and should not be used again. Any currency + * that might have been hidden by the original ucurr_register call is + * restored. + * @param key the registry key returned by a previous call to ucurr_register + * @param status the in/out status code, no special meanings are assigned + * @return true if the currency for this key was successfully unregistered + * @stable ICU 2.6 + */ +U_CAPI UBool U_EXPORT2 +ucurr_unregister(UCurrRegistryKey key, UErrorCode* status); +#endif /* UCONFIG_NO_SERVICE */ + +/** + * Returns the display name for the given currency in the + * given locale. For example, the display name for the USD + * currency object in the en_US locale is "$". + * @param currency null-terminated 3-letter ISO 4217 code + * @param locale locale in which to display currency + * @param nameStyle selector for which kind of name to return + * @param isChoiceFormat always set to false, or can be NULL; + * display names are static strings; + * since ICU 4.4, ChoiceFormat patterns are no longer supported + * @param len fill-in parameter to receive length of result + * @param ec error code + * @return pointer to display string of 'len' UChars. If the resource + * data contains no entry for 'currency', then 'currency' itself is + * returned. + * @stable ICU 2.6 + */ +U_CAPI const UChar* U_EXPORT2 +ucurr_getName(const UChar* currency, + const char* locale, + UCurrNameStyle nameStyle, + UBool* isChoiceFormat, + int32_t* len, + UErrorCode* ec); + +/** + * Returns the plural name for the given currency in the + * given locale. For example, the plural name for the USD + * currency object in the en_US locale is "US dollar" or "US dollars". + * @param currency null-terminated 3-letter ISO 4217 code + * @param locale locale in which to display currency + * @param isChoiceFormat always set to false, or can be NULL; + * display names are static strings; + * since ICU 4.4, ChoiceFormat patterns are no longer supported + * @param pluralCount plural count + * @param len fill-in parameter to receive length of result + * @param ec error code + * @return pointer to display string of 'len' UChars. If the resource + * data contains no entry for 'currency', then 'currency' itself is + * returned. + * @stable ICU 4.2 + */ +U_CAPI const UChar* U_EXPORT2 +ucurr_getPluralName(const UChar* currency, + const char* locale, + UBool* isChoiceFormat, + const char* pluralCount, + int32_t* len, + UErrorCode* ec); + +/** + * Returns the number of the number of fraction digits that should + * be displayed for the given currency. + * This is equivalent to ucurr_getDefaultFractionDigitsForUsage(currency,UCURR_USAGE_STANDARD,ec); + * + * Important: The number of fraction digits for a given currency is NOT + * guaranteed to be constant across versions of ICU or CLDR. For example, + * do NOT use this value as a mechanism for deciding the magnitude used + * to store currency values in a database. You should use this value for + * display purposes only. + * + * @param currency null-terminated 3-letter ISO 4217 code + * @param ec input-output error code + * @return a non-negative number of fraction digits to be + * displayed, or 0 if there is an error + * @stable ICU 3.0 + */ +U_CAPI int32_t U_EXPORT2 +ucurr_getDefaultFractionDigits(const UChar* currency, + UErrorCode* ec); + +/** + * Returns the number of the number of fraction digits that should + * be displayed for the given currency with usage. + * + * Important: The number of fraction digits for a given currency is NOT + * guaranteed to be constant across versions of ICU or CLDR. For example, + * do NOT use this value as a mechanism for deciding the magnitude used + * to store currency values in a database. You should use this value for + * display purposes only. + * + * @param currency null-terminated 3-letter ISO 4217 code + * @param usage enum usage for the currency + * @param ec input-output error code + * @return a non-negative number of fraction digits to be + * displayed, or 0 if there is an error + * @stable ICU 54 + */ +U_CAPI int32_t U_EXPORT2 +ucurr_getDefaultFractionDigitsForUsage(const UChar* currency, + const UCurrencyUsage usage, + UErrorCode* ec); + +/** + * Returns the rounding increment for the given currency, or 0.0 if no + * rounding is done by the currency. + * This is equivalent to ucurr_getRoundingIncrementForUsage(currency,UCURR_USAGE_STANDARD,ec); + * @param currency null-terminated 3-letter ISO 4217 code + * @param ec input-output error code + * @return the non-negative rounding increment, or 0.0 if none, + * or 0.0 if there is an error + * @stable ICU 3.0 + */ +U_CAPI double U_EXPORT2 +ucurr_getRoundingIncrement(const UChar* currency, + UErrorCode* ec); + +/** + * Returns the rounding increment for the given currency, or 0.0 if no + * rounding is done by the currency given usage. + * @param currency null-terminated 3-letter ISO 4217 code + * @param usage enum usage for the currency + * @param ec input-output error code + * @return the non-negative rounding increment, or 0.0 if none, + * or 0.0 if there is an error + * @stable ICU 54 + */ +U_CAPI double U_EXPORT2 +ucurr_getRoundingIncrementForUsage(const UChar* currency, + const UCurrencyUsage usage, + UErrorCode* ec); + +/** + * Selector constants for ucurr_openCurrencies(). + * + * @see ucurr_openCurrencies + * @stable ICU 3.2 + */ +typedef enum UCurrCurrencyType { + /** + * Select all ISO-4217 currency codes. + * @stable ICU 3.2 + */ + UCURR_ALL = INT32_MAX, + /** + * Select only ISO-4217 commonly used currency codes. + * These currencies can be found in common use, and they usually have + * bank notes or coins associated with the currency code. + * This does not include fund codes, precious metals and other + * various ISO-4217 codes limited to special financial products. + * @stable ICU 3.2 + */ + UCURR_COMMON = 1, + /** + * Select ISO-4217 uncommon currency codes. + * These codes respresent fund codes, precious metals and other + * various ISO-4217 codes limited to special financial products. + * A fund code is a monetary resource associated with a currency. + * @stable ICU 3.2 + */ + UCURR_UNCOMMON = 2, + /** + * Select only deprecated ISO-4217 codes. + * These codes are no longer in general public use. + * @stable ICU 3.2 + */ + UCURR_DEPRECATED = 4, + /** + * Select only non-deprecated ISO-4217 codes. + * These codes are in general public use. + * @stable ICU 3.2 + */ + UCURR_NON_DEPRECATED = 8 +} UCurrCurrencyType; + +/** + * Provides a UEnumeration object for listing ISO-4217 codes. + * @param currType You can use one of several UCurrCurrencyType values for this + * variable. You can also | (or) them together to get a specific list of + * currencies. Most people will want to use the (UCURR_COMMON|UCURR_NON_DEPRECATED) value to + * get a list of current currencies. + * @param pErrorCode Error code + * @stable ICU 3.2 + */ +U_CAPI UEnumeration * U_EXPORT2 +ucurr_openISOCurrencies(uint32_t currType, UErrorCode *pErrorCode); + +/** + * Queries if the given ISO 4217 3-letter code is available on the specified date range. + * + * Note: For checking availability of a currency on a specific date, specify the date on both 'from' and 'to' + * + * When 'from' is U_DATE_MIN and 'to' is U_DATE_MAX, this method checks if the specified currency is available any time. + * If 'from' and 'to' are same UDate value, this method checks if the specified currency is available on that date. + * + * @param isoCode + * The ISO 4217 3-letter code. + * + * @param from + * The lower bound of the date range, inclusive. When 'from' is U_DATE_MIN, check the availability + * of the currency any date before 'to' + * + * @param to + * The upper bound of the date range, inclusive. When 'to' is U_DATE_MAX, check the availability of + * the currency any date after 'from' + * + * @param errorCode + * ICU error code + * + * @return true if the given ISO 4217 3-letter code is supported on the specified date range. + * + * @stable ICU 4.8 + */ +U_CAPI UBool U_EXPORT2 +ucurr_isAvailable(const UChar* isoCode, + UDate from, + UDate to, + UErrorCode* errorCode); + +/** + * Finds the number of valid currency codes for the + * given locale and date. + * @param locale the locale for which to retrieve the + * currency count. + * @param date the date for which to retrieve the + * currency count for the given locale. + * @param ec error code + * @return the number of currency codes for the + * given locale and date. If 0, currency + * codes couldn't be found for the input + * values are invalid. + * @stable ICU 4.0 + */ +U_CAPI int32_t U_EXPORT2 +ucurr_countCurrencies(const char* locale, + UDate date, + UErrorCode* ec); + +/** + * Finds a currency code for the given locale and date + * @param locale the locale for which to retrieve a currency code. + * Currency can be specified by the "currency" keyword + * in which case it overrides the default currency code + * @param date the date for which to retrieve a currency code for + * the given locale. + * @param index the index within the available list of currency codes + * for the given locale on the given date. + * @param buff fill in buffer. Can be NULL for preflighting. + * @param buffCapacity capacity of the fill in buffer. Can be 0 for + * preflighting. If it is non-zero, the buff parameter + * must not be NULL. + * @param ec error code + * @return length of the currency string. It should always be 3. + * If 0, currency couldn't be found or the input values are + * invalid. + * @stable ICU 4.0 + */ +U_CAPI int32_t U_EXPORT2 +ucurr_forLocaleAndDate(const char* locale, + UDate date, + int32_t index, + UChar* buff, + int32_t buffCapacity, + UErrorCode* ec); + +/** + * Given a key and a locale, returns an array of string values in a preferred + * order that would make a difference. These are all and only those values where + * the open (creation) of the service with the locale formed from the input locale + * plus input keyword and that value has different behavior than creation with the + * input locale alone. + * @param key one of the keys supported by this service. For now, only + * "currency" is supported. + * @param locale the locale + * @param commonlyUsed if set to true it will return only commonly used values + * with the given locale in preferred order. Otherwise, + * it will return all the available values for the locale. + * @param status error status + * @return a string enumeration over keyword values for the given key and the locale. + * @stable ICU 4.2 + */ +U_CAPI UEnumeration* U_EXPORT2 +ucurr_getKeywordValuesForLocale(const char* key, + const char* locale, + UBool commonlyUsed, + UErrorCode* status); + +/** + * Returns the ISO 4217 numeric code for the currency. + * <p>Note: If the ISO 4217 numeric code is not assigned for the currency or + * the currency is unknown, this function returns 0. + * + * @param currency null-terminated 3-letter ISO 4217 code + * @return The ISO 4217 numeric code of the currency + * @stable ICU 49 + */ +U_CAPI int32_t U_EXPORT2 +ucurr_getNumericCode(const UChar* currency); + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif diff --git a/thirdparty/icu4c/common/unicode/udata.h b/thirdparty/icu4c/common/unicode/udata.h new file mode 100644 index 0000000000..6caa849c42 --- /dev/null +++ b/thirdparty/icu4c/common/unicode/udata.h @@ -0,0 +1,440 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1999-2014, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* file name: udata.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 1999oct25 +* created by: Markus W. Scherer +*/ + +#ifndef __UDATA_H__ +#define __UDATA_H__ + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API +#include "unicode/localpointer.h" +#endif // U_SHOW_CPLUSPLUS_API + +U_CDECL_BEGIN + +/** + * \file + * \brief C API: Data loading interface + * + * <h2>Information about data loading interface</h2> + * + * This API is used to find and efficiently load data for ICU and applications + * using ICU. It provides an abstract interface that specifies a data type and + * name to find and load the data. Normally this API is used by other ICU APIs + * to load required data out of the ICU data library, but it can be used to + * load data out of other places. + * + * See the User Guide Data Management chapter. + */ + +#ifndef U_HIDE_INTERNAL_API +/** + * Character used to separate package names from tree names + * @internal ICU 3.0 + */ +#define U_TREE_SEPARATOR '-' + +/** + * String used to separate package names from tree names + * @internal ICU 3.0 + */ +#define U_TREE_SEPARATOR_STRING "-" + +/** + * Character used to separate parts of entry names + * @internal ICU 3.0 + */ +#define U_TREE_ENTRY_SEP_CHAR '/' + +/** + * String used to separate parts of entry names + * @internal ICU 3.0 + */ +#define U_TREE_ENTRY_SEP_STRING "/" + +/** + * Alias for standard ICU data + * @internal ICU 3.0 + */ +#define U_ICUDATA_ALIAS "ICUDATA" + +#endif /* U_HIDE_INTERNAL_API */ + +/** + * UDataInfo contains the properties about the requested data. + * This is meta data. + * + * <p>This structure may grow in the future, indicated by the + * <code>size</code> field.</p> + * + * <p>ICU data must be at least 8-aligned, and should be 16-aligned. + * The UDataInfo struct begins 4 bytes after the start of the data item, + * so it is 4-aligned. + * + * <p>The platform data property fields help determine if a data + * file can be efficiently used on a given machine. + * The particular fields are of importance only if the data + * is affected by the properties - if there is integer data + * with word sizes > 1 byte, char* text, or UChar* text.</p> + * + * <p>The implementation for the <code>udata_open[Choice]()</code> + * functions may reject data based on the value in <code>isBigEndian</code>. + * No other field is used by the <code>udata</code> API implementation.</p> + * + * <p>The <code>dataFormat</code> may be used to identify + * the kind of data, e.g. a converter table.</p> + * + * <p>The <code>formatVersion</code> field should be used to + * make sure that the format can be interpreted. + * It may be a good idea to check only for the one or two highest + * of the version elements to allow the data memory to + * get more or somewhat rearranged contents, for as long + * as the using code can still interpret the older contents.</p> + * + * <p>The <code>dataVersion</code> field is intended to be a + * common place to store the source version of the data; + * for data from the Unicode character database, this could + * reflect the Unicode version.</p> + * + * @stable ICU 2.0 + */ +typedef struct { + /** sizeof(UDataInfo) + * @stable ICU 2.0 */ + uint16_t size; + + /** unused, set to 0 + * @stable ICU 2.0*/ + uint16_t reservedWord; + + /* platform data properties */ + /** 0 for little-endian machine, 1 for big-endian + * @stable ICU 2.0 */ + uint8_t isBigEndian; + + /** see U_CHARSET_FAMILY values in utypes.h + * @stable ICU 2.0*/ + uint8_t charsetFamily; + + /** sizeof(UChar), one of { 1, 2, 4 } + * @stable ICU 2.0*/ + uint8_t sizeofUChar; + + /** unused, set to 0 + * @stable ICU 2.0*/ + uint8_t reservedByte; + + /** data format identifier + * @stable ICU 2.0*/ + uint8_t dataFormat[4]; + + /** versions: [0] major [1] minor [2] milli [3] micro + * @stable ICU 2.0*/ + uint8_t formatVersion[4]; + + /** versions: [0] major [1] minor [2] milli [3] micro + * @stable ICU 2.0*/ + uint8_t dataVersion[4]; +} UDataInfo; + +/* API for reading data -----------------------------------------------------*/ + +/** + * Forward declaration of the data memory type. + * @stable ICU 2.0 + */ +typedef struct UDataMemory UDataMemory; + +/** + * Callback function for udata_openChoice(). + * @param context parameter passed into <code>udata_openChoice()</code>. + * @param type The type of the data as passed into <code>udata_openChoice()</code>. + * It may be <code>NULL</code>. + * @param name The name of the data as passed into <code>udata_openChoice()</code>. + * @param pInfo A pointer to the <code>UDataInfo</code> structure + * of data that has been loaded and will be returned + * by <code>udata_openChoice()</code> if this function + * returns <code>true</code>. + * @return true if the current data memory is acceptable + * @stable ICU 2.0 + */ +typedef UBool U_CALLCONV +UDataMemoryIsAcceptable(void *context, + const char *type, const char *name, + const UDataInfo *pInfo); + + +/** + * Convenience function. + * This function works the same as <code>udata_openChoice</code> + * except that any data that matches the type and name + * is assumed to be acceptable. + * @param path Specifies an absolute path and/or a basename for the + * finding of the data in the file system. + * <code>NULL</code> for ICU data. + * @param type A string that specifies the type of data to be loaded. + * For example, resource bundles are loaded with type "res", + * conversion tables with type "cnv". + * This may be <code>NULL</code> or empty. + * @param name A string that specifies the name of the data. + * @param pErrorCode An ICU UErrorCode parameter. It must not be <code>NULL</code>. + * @return A pointer (handle) to a data memory object, or <code>NULL</code> + * if an error occurs. Call <code>udata_getMemory()</code> + * to get a pointer to the actual data. + * + * @see udata_openChoice + * @stable ICU 2.0 + */ +U_CAPI UDataMemory * U_EXPORT2 +udata_open(const char *path, const char *type, const char *name, + UErrorCode *pErrorCode); + +/** + * Data loading function. + * This function is used to find and load efficiently data for + * ICU and applications using ICU. + * It provides an abstract interface that allows to specify a data + * type and name to find and load the data. + * + * <p>The implementation depends on platform properties and user preferences + * and may involve loading shared libraries (DLLs), mapping + * files into memory, or fopen()/fread() files. + * It may also involve using static memory or database queries etc. + * Several or all data items may be combined into one entity + * (DLL, memory-mappable file).</p> + * + * <p>The data is always preceded by a header that includes + * a <code>UDataInfo</code> structure. + * The caller's <code>isAcceptable()</code> function is called to make + * sure that the data is useful. It may be called several times if it + * rejects the data and there is more than one location with data + * matching the type and name.</p> + * + * <p>If <code>path==NULL</code>, then ICU data is loaded. + * Otherwise, it is separated into a basename and a basename-less directory string. + * The basename is used as the data package name, and the directory is + * logically prepended to the ICU data directory string.</p> + * + * <p>For details about ICU data loading see the User Guide + * Data Management chapter. (http://icu-project.org/userguide/icudata.html)</p> + * + * @param path Specifies an absolute path and/or a basename for the + * finding of the data in the file system. + * <code>NULL</code> for ICU data. + * @param type A string that specifies the type of data to be loaded. + * For example, resource bundles are loaded with type "res", + * conversion tables with type "cnv". + * This may be <code>NULL</code> or empty. + * @param name A string that specifies the name of the data. + * @param isAcceptable This function is called to verify that loaded data + * is useful for the client code. If it returns false + * for all data items, then <code>udata_openChoice()</code> + * will return with an error. + * @param context Arbitrary parameter to be passed into isAcceptable. + * @param pErrorCode An ICU UErrorCode parameter. It must not be <code>NULL</code>. + * @return A pointer (handle) to a data memory object, or <code>NULL</code> + * if an error occurs. Call <code>udata_getMemory()</code> + * to get a pointer to the actual data. + * @stable ICU 2.0 + */ +U_CAPI UDataMemory * U_EXPORT2 +udata_openChoice(const char *path, const char *type, const char *name, + UDataMemoryIsAcceptable *isAcceptable, void *context, + UErrorCode *pErrorCode); + +/** + * Close the data memory. + * This function must be called to allow the system to + * release resources associated with this data memory. + * @param pData The pointer to data memory object + * @stable ICU 2.0 + */ +U_CAPI void U_EXPORT2 +udata_close(UDataMemory *pData); + +/** + * Get the pointer to the actual data inside the data memory. + * The data is read-only. + * + * ICU data must be at least 8-aligned, and should be 16-aligned. + * + * @param pData The pointer to data memory object + * @stable ICU 2.0 + */ +U_CAPI const void * U_EXPORT2 +udata_getMemory(UDataMemory *pData); + +/** + * Get the information from the data memory header. + * This allows to get access to the header containing + * platform data properties etc. which is not part of + * the data itself and can therefore not be accessed + * via the pointer that <code>udata_getMemory()</code> returns. + * + * @param pData pointer to the data memory object + * @param pInfo pointer to a UDataInfo object; + * its <code>size</code> field must be set correctly, + * typically to <code>sizeof(UDataInfo)</code>. + * + * <code>*pInfo</code> will be filled with the UDataInfo structure + * in the data memory object. If this structure is smaller than + * <code>pInfo->size</code>, then the <code>size</code> will be + * adjusted and only part of the structure will be filled. + * @stable ICU 2.0 + */ +U_CAPI void U_EXPORT2 +udata_getInfo(UDataMemory *pData, UDataInfo *pInfo); + +/** + * This function bypasses the normal ICU data loading process and + * allows you to force ICU's system data to come out of a user-specified + * area in memory. + * + * ICU data must be at least 8-aligned, and should be 16-aligned. + * See https://unicode-org.github.io/icu/userguide/icudata + * + * The format of this data is that of the icu common data file, as is + * generated by the pkgdata tool with mode=common or mode=dll. + * You can read in a whole common mode file and pass the address to the start of the + * data, or (with the appropriate link options) pass in the pointer to + * the data that has been loaded from a dll by the operating system, + * as shown in this code: + * + * extern const char U_IMPORT U_ICUDATA_ENTRY_POINT []; + * // U_ICUDATA_ENTRY_POINT is same as entry point specified to pkgdata tool + * UErrorCode status = U_ZERO_ERROR; + * + * udata_setCommonData(&U_ICUDATA_ENTRY_POINT, &status); + * + * It is important that the declaration be as above. The entry point + * must not be declared as an extern void*. + * + * Starting with ICU 4.4, it is possible to set several data packages, + * one per call to this function. + * udata_open() will look for data in the multiple data packages in the order + * in which they were set. + * The position of the linked-in or default-name ICU .data package in the + * search list depends on when the first data item is loaded that is not contained + * in the already explicitly set packages. + * If data was loaded implicitly before the first call to this function + * (for example, via opening a converter, constructing a UnicodeString + * from default-codepage data, using formatting or collation APIs, etc.), + * then the default data will be first in the list. + * + * This function has no effect on application (non ICU) data. See udata_setAppData() + * for similar functionality for application data. + * + * @param data pointer to ICU common data + * @param err outgoing error status <code>U_USING_DEFAULT_WARNING, U_UNSUPPORTED_ERROR</code> + * @stable ICU 2.0 + */ +U_CAPI void U_EXPORT2 +udata_setCommonData(const void *data, UErrorCode *err); + + +/** + * This function bypasses the normal ICU data loading process for application-specific + * data and allows you to force the it to come out of a user-specified + * pointer. + * + * ICU data must be at least 8-aligned, and should be 16-aligned. + * See https://unicode-org.github.io/icu/userguide/icudata + * + * The format of this data is that of the icu common data file, like 'icudt26l.dat' + * or the corresponding shared library (DLL) file. + * The application must read in or otherwise construct an image of the data and then + * pass the address of it to this function. + * + * + * Warning: setAppData will set a U_USING_DEFAULT_WARNING code if + * data with the specifed path that has already been opened, or + * if setAppData with the same path has already been called. + * Any such calls to setAppData will have no effect. + * + * + * @param packageName the package name by which the application will refer + * to (open) this data + * @param data pointer to the data + * @param err outgoing error status <code>U_USING_DEFAULT_WARNING, U_UNSUPPORTED_ERROR</code> + * @see udata_setCommonData + * @stable ICU 2.0 + */ +U_CAPI void U_EXPORT2 +udata_setAppData(const char *packageName, const void *data, UErrorCode *err); + +/** + * Possible settings for udata_setFileAccess() + * @see udata_setFileAccess + * @stable ICU 3.4 + */ +typedef enum UDataFileAccess { + /** ICU looks for data in single files first, then in packages. (default) @stable ICU 3.4 */ + UDATA_FILES_FIRST, + /** An alias for the default access mode. @stable ICU 3.4 */ + UDATA_DEFAULT_ACCESS = UDATA_FILES_FIRST, + /** ICU only loads data from packages, not from single files. @stable ICU 3.4 */ + UDATA_ONLY_PACKAGES, + /** ICU loads data from packages first, and only from single files + if the data cannot be found in a package. @stable ICU 3.4 */ + UDATA_PACKAGES_FIRST, + /** ICU does not access the file system for data loading. @stable ICU 3.4 */ + UDATA_NO_FILES, +#ifndef U_HIDE_DEPRECATED_API + /** + * Number of real UDataFileAccess values. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + UDATA_FILE_ACCESS_COUNT +#endif // U_HIDE_DEPRECATED_API +} UDataFileAccess; + +/** + * This function may be called to control how ICU loads data. It must be called + * before any ICU data is loaded, including application data loaded with + * ures/ResourceBundle or udata APIs. This function is not multithread safe. + * The results of calling it while other threads are loading data are undefined. + * @param access The type of file access to be used + * @param status Error code. + * @see UDataFileAccess + * @stable ICU 3.4 + */ +U_CAPI void U_EXPORT2 +udata_setFileAccess(UDataFileAccess access, UErrorCode *status); + +U_CDECL_END + +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN + +/** + * \class LocalUDataMemoryPointer + * "Smart pointer" class, closes a UDataMemory via udata_close(). + * For most methods see the LocalPointerBase base class. + * + * @see LocalPointerBase + * @see LocalPointer + * @stable ICU 4.4 + */ +U_DEFINE_LOCAL_OPEN_POINTER(LocalUDataMemoryPointer, UDataMemory, udata_close); + +U_NAMESPACE_END + +#endif // U_SHOW_CPLUSPLUS_API + +#endif diff --git a/thirdparty/icu4c/common/unicode/udisplaycontext.h b/thirdparty/icu4c/common/unicode/udisplaycontext.h new file mode 100644 index 0000000000..6e14217980 --- /dev/null +++ b/thirdparty/icu4c/common/unicode/udisplaycontext.h @@ -0,0 +1,173 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +***************************************************************************************** +* Copyright (C) 2014-2016, International Business Machines +* Corporation and others. All Rights Reserved. +***************************************************************************************** +*/ + +#ifndef UDISPLAYCONTEXT_H +#define UDISPLAYCONTEXT_H + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +/** + * \file + * \brief C API: Display context types (enum values) + */ + +/** + * Display context types, for getting values of a particular setting. + * Note, the specific numeric values are internal and may change. + * @stable ICU 51 + */ +enum UDisplayContextType { + /** + * Type to retrieve the dialect handling setting, e.g. + * UDISPCTX_STANDARD_NAMES or UDISPCTX_DIALECT_NAMES. + * @stable ICU 51 + */ + UDISPCTX_TYPE_DIALECT_HANDLING = 0, + /** + * Type to retrieve the capitalization context setting, e.g. + * UDISPCTX_CAPITALIZATION_NONE, UDISPCTX_CAPITALIZATION_FOR_MIDDLE_OF_SENTENCE, + * UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE, etc. + * @stable ICU 51 + */ + UDISPCTX_TYPE_CAPITALIZATION = 1, + /** + * Type to retrieve the display length setting, e.g. + * UDISPCTX_LENGTH_FULL, UDISPCTX_LENGTH_SHORT. + * @stable ICU 54 + */ + UDISPCTX_TYPE_DISPLAY_LENGTH = 2, + /** + * Type to retrieve the substitute handling setting, e.g. + * UDISPCTX_SUBSTITUTE, UDISPCTX_NO_SUBSTITUTE. + * @stable ICU 58 + */ + UDISPCTX_TYPE_SUBSTITUTE_HANDLING = 3 +}; +/** +* @stable ICU 51 +*/ +typedef enum UDisplayContextType UDisplayContextType; + +/** + * Display context settings. + * Note, the specific numeric values are internal and may change. + * @stable ICU 51 + */ +enum UDisplayContext { + /** + * ================================ + * DIALECT_HANDLING can be set to one of UDISPCTX_STANDARD_NAMES or + * UDISPCTX_DIALECT_NAMES. Use UDisplayContextType UDISPCTX_TYPE_DIALECT_HANDLING + * to get the value. + */ + /** + * A possible setting for DIALECT_HANDLING: + * use standard names when generating a locale name, + * e.g. en_GB displays as 'English (United Kingdom)'. + * @stable ICU 51 + */ + UDISPCTX_STANDARD_NAMES = (UDISPCTX_TYPE_DIALECT_HANDLING<<8) + 0, + /** + * A possible setting for DIALECT_HANDLING: + * use dialect names, when generating a locale name, + * e.g. en_GB displays as 'British English'. + * @stable ICU 51 + */ + UDISPCTX_DIALECT_NAMES = (UDISPCTX_TYPE_DIALECT_HANDLING<<8) + 1, + /** + * ================================ + * CAPITALIZATION can be set to one of UDISPCTX_CAPITALIZATION_NONE, + * UDISPCTX_CAPITALIZATION_FOR_MIDDLE_OF_SENTENCE, + * UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE, + * UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU, or + * UDISPCTX_CAPITALIZATION_FOR_STANDALONE. + * Use UDisplayContextType UDISPCTX_TYPE_CAPITALIZATION to get the value. + */ + /** + * The capitalization context to be used is unknown (this is the default value). + * @stable ICU 51 + */ + UDISPCTX_CAPITALIZATION_NONE = (UDISPCTX_TYPE_CAPITALIZATION<<8) + 0, + /** + * The capitalization context if a date, date symbol or display name is to be + * formatted with capitalization appropriate for the middle of a sentence. + * @stable ICU 51 + */ + UDISPCTX_CAPITALIZATION_FOR_MIDDLE_OF_SENTENCE = (UDISPCTX_TYPE_CAPITALIZATION<<8) + 1, + /** + * The capitalization context if a date, date symbol or display name is to be + * formatted with capitalization appropriate for the beginning of a sentence. + * @stable ICU 51 + */ + UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE = (UDISPCTX_TYPE_CAPITALIZATION<<8) + 2, + /** + * The capitalization context if a date, date symbol or display name is to be + * formatted with capitalization appropriate for a user-interface list or menu item. + * @stable ICU 51 + */ + UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU = (UDISPCTX_TYPE_CAPITALIZATION<<8) + 3, + /** + * The capitalization context if a date, date symbol or display name is to be + * formatted with capitalization appropriate for stand-alone usage such as an + * isolated name on a calendar page. + * @stable ICU 51 + */ + UDISPCTX_CAPITALIZATION_FOR_STANDALONE = (UDISPCTX_TYPE_CAPITALIZATION<<8) + 4, + /** + * ================================ + * DISPLAY_LENGTH can be set to one of UDISPCTX_LENGTH_FULL or + * UDISPCTX_LENGTH_SHORT. Use UDisplayContextType UDISPCTX_TYPE_DISPLAY_LENGTH + * to get the value. + */ + /** + * A possible setting for DISPLAY_LENGTH: + * use full names when generating a locale name, + * e.g. "United States" for US. + * @stable ICU 54 + */ + UDISPCTX_LENGTH_FULL = (UDISPCTX_TYPE_DISPLAY_LENGTH<<8) + 0, + /** + * A possible setting for DISPLAY_LENGTH: + * use short names when generating a locale name, + * e.g. "U.S." for US. + * @stable ICU 54 + */ + UDISPCTX_LENGTH_SHORT = (UDISPCTX_TYPE_DISPLAY_LENGTH<<8) + 1, + /** + * ================================ + * SUBSTITUTE_HANDLING can be set to one of UDISPCTX_SUBSTITUTE or + * UDISPCTX_NO_SUBSTITUTE. Use UDisplayContextType UDISPCTX_TYPE_SUBSTITUTE_HANDLING + * to get the value. + */ + /** + * A possible setting for SUBSTITUTE_HANDLING: + * Returns a fallback value (e.g., the input code) when no data is available. + * This is the default value. + * @stable ICU 58 + */ + UDISPCTX_SUBSTITUTE = (UDISPCTX_TYPE_SUBSTITUTE_HANDLING<<8) + 0, + /** + * A possible setting for SUBSTITUTE_HANDLING: + * Returns a null value with error code set to U_ILLEGAL_ARGUMENT_ERROR when no + * data is available. + * @stable ICU 58 + */ + UDISPCTX_NO_SUBSTITUTE = (UDISPCTX_TYPE_SUBSTITUTE_HANDLING<<8) + 1 + +}; +/** +* @stable ICU 51 +*/ +typedef enum UDisplayContext UDisplayContext; + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif diff --git a/thirdparty/icu4c/common/unicode/uenum.h b/thirdparty/icu4c/common/unicode/uenum.h new file mode 100644 index 0000000000..d9c893e06d --- /dev/null +++ b/thirdparty/icu4c/common/unicode/uenum.h @@ -0,0 +1,209 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2002-2013, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: uenum.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:2 +* +* created on: 2002jul08 +* created by: Vladimir Weinstein +*/ + +#ifndef __UENUM_H +#define __UENUM_H + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API +#include "unicode/localpointer.h" + +U_NAMESPACE_BEGIN +class StringEnumeration; +U_NAMESPACE_END +#endif // U_SHOW_CPLUSPLUS_API + +/** + * \file + * \brief C API: String Enumeration + */ + +/** + * An enumeration object. + * For usage in C programs. + * @stable ICU 2.2 + */ +struct UEnumeration; +/** structure representing an enumeration object instance @stable ICU 2.2 */ +typedef struct UEnumeration UEnumeration; + +/** + * Disposes of resources in use by the iterator. If en is NULL, + * does nothing. After this call, any char* or UChar* pointer + * returned by uenum_unext() or uenum_next() is invalid. + * @param en UEnumeration structure pointer + * @stable ICU 2.2 + */ +U_CAPI void U_EXPORT2 +uenum_close(UEnumeration* en); + +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN + +/** + * \class LocalUEnumerationPointer + * "Smart pointer" class, closes a UEnumeration via uenum_close(). + * For most methods see the LocalPointerBase base class. + * + * @see LocalPointerBase + * @see LocalPointer + * @stable ICU 4.4 + */ +U_DEFINE_LOCAL_OPEN_POINTER(LocalUEnumerationPointer, UEnumeration, uenum_close); + +U_NAMESPACE_END + +#endif + +/** + * Returns the number of elements that the iterator traverses. If + * the iterator is out-of-sync with its service, status is set to + * U_ENUM_OUT_OF_SYNC_ERROR. + * This is a convenience function. It can end up being very + * expensive as all the items might have to be pre-fetched (depending + * on the type of data being traversed). Use with caution and only + * when necessary. + * @param en UEnumeration structure pointer + * @param status error code, can be U_ENUM_OUT_OF_SYNC_ERROR if the + * iterator is out of sync. + * @return number of elements in the iterator + * @stable ICU 2.2 + */ +U_CAPI int32_t U_EXPORT2 +uenum_count(UEnumeration* en, UErrorCode* status); + +/** + * Returns the next element in the iterator's list. If there are + * no more elements, returns NULL. If the iterator is out-of-sync + * with its service, status is set to U_ENUM_OUT_OF_SYNC_ERROR and + * NULL is returned. If the native service string is a char* string, + * it is converted to UChar* with the invariant converter. + * The result is terminated by (UChar)0. + * @param en the iterator object + * @param resultLength pointer to receive the length of the result + * (not including the terminating \\0). + * If the pointer is NULL it is ignored. + * @param status the error code, set to U_ENUM_OUT_OF_SYNC_ERROR if + * the iterator is out of sync with its service. + * @return a pointer to the string. The string will be + * zero-terminated. The return pointer is owned by this iterator + * and must not be deleted by the caller. The pointer is valid + * until the next call to any uenum_... method, including + * uenum_next() or uenum_unext(). When all strings have been + * traversed, returns NULL. + * @stable ICU 2.2 + */ +U_CAPI const UChar* U_EXPORT2 +uenum_unext(UEnumeration* en, + int32_t* resultLength, + UErrorCode* status); + +/** + * Returns the next element in the iterator's list. If there are + * no more elements, returns NULL. If the iterator is out-of-sync + * with its service, status is set to U_ENUM_OUT_OF_SYNC_ERROR and + * NULL is returned. If the native service string is a UChar* + * string, it is converted to char* with the invariant converter. + * The result is terminated by (char)0. If the conversion fails + * (because a character cannot be converted) then status is set to + * U_INVARIANT_CONVERSION_ERROR and the return value is undefined + * (but non-NULL). + * @param en the iterator object + * @param resultLength pointer to receive the length of the result + * (not including the terminating \\0). + * If the pointer is NULL it is ignored. + * @param status the error code, set to U_ENUM_OUT_OF_SYNC_ERROR if + * the iterator is out of sync with its service. Set to + * U_INVARIANT_CONVERSION_ERROR if the underlying native string is + * UChar* and conversion to char* with the invariant converter + * fails. This error pertains only to current string, so iteration + * might be able to continue successfully. + * @return a pointer to the string. The string will be + * zero-terminated. The return pointer is owned by this iterator + * and must not be deleted by the caller. The pointer is valid + * until the next call to any uenum_... method, including + * uenum_next() or uenum_unext(). When all strings have been + * traversed, returns NULL. + * @stable ICU 2.2 + */ +U_CAPI const char* U_EXPORT2 +uenum_next(UEnumeration* en, + int32_t* resultLength, + UErrorCode* status); + +/** + * Resets the iterator to the current list of service IDs. This + * re-establishes sync with the service and rewinds the iterator + * to start at the first element. + * @param en the iterator object + * @param status the error code, set to U_ENUM_OUT_OF_SYNC_ERROR if + * the iterator is out of sync with its service. + * @stable ICU 2.2 + */ +U_CAPI void U_EXPORT2 +uenum_reset(UEnumeration* en, UErrorCode* status); + +#if U_SHOW_CPLUSPLUS_API + +/** + * Given a StringEnumeration, wrap it in a UEnumeration. The + * StringEnumeration is adopted; after this call, the caller must not + * delete it (regardless of error status). + * @param adopted the C++ StringEnumeration to be wrapped in a UEnumeration. + * @param ec the error code. + * @return a UEnumeration wrapping the adopted StringEnumeration. + * @stable ICU 4.2 + */ +U_CAPI UEnumeration* U_EXPORT2 +uenum_openFromStringEnumeration(icu::StringEnumeration* adopted, UErrorCode* ec); + +#endif + +/** + * Given an array of const UChar* strings, return a UEnumeration. String pointers from 0..count-1 must not be null. + * Do not free or modify either the string array or the characters it points to until this object has been destroyed with uenum_close. + * \snippet test/cintltst/uenumtst.c uenum_openUCharStringsEnumeration + * @param strings array of const UChar* strings (each null terminated). All storage is owned by the caller. + * @param count length of the array + * @param ec error code + * @return the new UEnumeration object. Caller is responsible for calling uenum_close to free memory. + * @see uenum_close + * @stable ICU 50 + */ +U_CAPI UEnumeration* U_EXPORT2 +uenum_openUCharStringsEnumeration(const UChar* const strings[], int32_t count, + UErrorCode* ec); + +/** + * Given an array of const char* strings (invariant chars only), return a UEnumeration. String pointers from 0..count-1 must not be null. + * Do not free or modify either the string array or the characters it points to until this object has been destroyed with uenum_close. + * \snippet test/cintltst/uenumtst.c uenum_openCharStringsEnumeration + * @param strings array of char* strings (each null terminated). All storage is owned by the caller. + * @param count length of the array + * @param ec error code + * @return the new UEnumeration object. Caller is responsible for calling uenum_close to free memory + * @see uenum_close + * @stable ICU 50 + */ +U_CAPI UEnumeration* U_EXPORT2 +uenum_openCharStringsEnumeration(const char* const strings[], int32_t count, + UErrorCode* ec); + +#endif diff --git a/thirdparty/icu4c/common/unicode/uidna.h b/thirdparty/icu4c/common/unicode/uidna.h new file mode 100644 index 0000000000..24a81ceadd --- /dev/null +++ b/thirdparty/icu4c/common/unicode/uidna.h @@ -0,0 +1,776 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* + ******************************************************************************* + * + * Copyright (C) 2003-2014, International Business Machines + * Corporation and others. All Rights Reserved. + * + ******************************************************************************* + * file name: uidna.h + * encoding: UTF-8 + * tab size: 8 (not used) + * indentation:4 + * + * created on: 2003feb1 + * created by: Ram Viswanadha + */ + +#ifndef __UIDNA_H__ +#define __UIDNA_H__ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_IDNA + +#include <stdbool.h> +#include "unicode/parseerr.h" + +#if U_SHOW_CPLUSPLUS_API +#include "unicode/localpointer.h" +#endif // U_SHOW_CPLUSPLUS_API + +/** + * \file + * \brief C API: Internationalizing Domain Names in Applications (IDNA) + * + * IDNA2008 is implemented according to UTS #46, see the IDNA C++ class in idna.h. + * + * The C API functions which do take a UIDNA * service object pointer + * implement UTS #46 and IDNA2008. + * + * IDNA2003 is obsolete. + * The C API functions which do not take a service object pointer + * implement IDNA2003. They are all deprecated. + */ + +/* + * IDNA option bit set values. + */ +enum { + /** + * Default options value: None of the other options are set. + * For use in static worker and factory methods. + * @stable ICU 2.6 + */ + UIDNA_DEFAULT=0, +#ifndef U_HIDE_DEPRECATED_API + /** + * Option to allow unassigned code points in domain names and labels. + * For use in static worker and factory methods. + * <p>This option is ignored by the UTS46 implementation. + * (UTS #46 disallows unassigned code points.) + * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA. + */ + UIDNA_ALLOW_UNASSIGNED=1, +#endif /* U_HIDE_DEPRECATED_API */ + /** + * Option to check whether the input conforms to the STD3 ASCII rules, + * for example the restriction of labels to LDH characters + * (ASCII Letters, Digits and Hyphen-Minus). + * For use in static worker and factory methods. + * @stable ICU 2.6 + */ + UIDNA_USE_STD3_RULES=2, + /** + * IDNA option to check for whether the input conforms to the BiDi rules. + * For use in static worker and factory methods. + * <p>This option is ignored by the IDNA2003 implementation. + * (IDNA2003 always performs a BiDi check.) + * @stable ICU 4.6 + */ + UIDNA_CHECK_BIDI=4, + /** + * IDNA option to check for whether the input conforms to the CONTEXTJ rules. + * For use in static worker and factory methods. + * <p>This option is ignored by the IDNA2003 implementation. + * (The CONTEXTJ check is new in IDNA2008.) + * @stable ICU 4.6 + */ + UIDNA_CHECK_CONTEXTJ=8, + /** + * IDNA option for nontransitional processing in ToASCII(). + * For use in static worker and factory methods. + * <p>By default, ToASCII() uses transitional processing. + * <p>This option is ignored by the IDNA2003 implementation. + * (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.) + * @stable ICU 4.6 + */ + UIDNA_NONTRANSITIONAL_TO_ASCII=0x10, + /** + * IDNA option for nontransitional processing in ToUnicode(). + * For use in static worker and factory methods. + * <p>By default, ToUnicode() uses transitional processing. + * <p>This option is ignored by the IDNA2003 implementation. + * (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.) + * @stable ICU 4.6 + */ + UIDNA_NONTRANSITIONAL_TO_UNICODE=0x20, + /** + * IDNA option to check for whether the input conforms to the CONTEXTO rules. + * For use in static worker and factory methods. + * <p>This option is ignored by the IDNA2003 implementation. + * (The CONTEXTO check is new in IDNA2008.) + * <p>This is for use by registries for IDNA2008 conformance. + * UTS #46 does not require the CONTEXTO check. + * @stable ICU 49 + */ + UIDNA_CHECK_CONTEXTO=0x40 +}; + +/** + * Opaque C service object type for the new IDNA API. + * @stable ICU 4.6 + */ +struct UIDNA; +typedef struct UIDNA UIDNA; /**< C typedef for struct UIDNA. @stable ICU 4.6 */ + +/** + * Returns a UIDNA instance which implements UTS #46. + * Returns an unmodifiable instance, owned by the caller. + * Cache it for multiple operations, and uidna_close() it when done. + * The instance is thread-safe, that is, it can be used concurrently. + * + * For details about the UTS #46 implementation see the IDNA C++ class in idna.h. + * + * @param options Bit set to modify the processing and error checking. + * See option bit set values in uidna.h. + * @param pErrorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return the UTS #46 UIDNA instance, if successful + * @stable ICU 4.6 + */ +U_CAPI UIDNA * U_EXPORT2 +uidna_openUTS46(uint32_t options, UErrorCode *pErrorCode); + +/** + * Closes a UIDNA instance. + * @param idna UIDNA instance to be closed + * @stable ICU 4.6 + */ +U_CAPI void U_EXPORT2 +uidna_close(UIDNA *idna); + +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN + +/** + * \class LocalUIDNAPointer + * "Smart pointer" class, closes a UIDNA via uidna_close(). + * For most methods see the LocalPointerBase base class. + * + * @see LocalPointerBase + * @see LocalPointer + * @stable ICU 4.6 + */ +U_DEFINE_LOCAL_OPEN_POINTER(LocalUIDNAPointer, UIDNA, uidna_close); + +U_NAMESPACE_END + +#endif + +/** + * Output container for IDNA processing errors. + * Initialize with UIDNA_INFO_INITIALIZER: + * \code + * UIDNAInfo info = UIDNA_INFO_INITIALIZER; + * int32_t length = uidna_nameToASCII(..., &info, &errorCode); + * if(U_SUCCESS(errorCode) && info.errors!=0) { ... } + * \endcode + * @stable ICU 4.6 + */ +typedef struct UIDNAInfo { + /** sizeof(UIDNAInfo) @stable ICU 4.6 */ + int16_t size; + /** + * Set to true if transitional and nontransitional processing produce different results. + * For details see C++ IDNAInfo::isTransitionalDifferent(). + * @stable ICU 4.6 + */ + UBool isTransitionalDifferent; + UBool reservedB3; /**< Reserved field, do not use. @internal */ + /** + * Bit set indicating IDNA processing errors. 0 if no errors. + * See UIDNA_ERROR_... constants. + * @stable ICU 4.6 + */ + uint32_t errors; + int32_t reservedI2; /**< Reserved field, do not use. @internal */ + int32_t reservedI3; /**< Reserved field, do not use. @internal */ +} UIDNAInfo; + +/** + * Static initializer for a UIDNAInfo struct. + * @stable ICU 4.6 + */ +#define UIDNA_INFO_INITIALIZER { \ + (int16_t)sizeof(UIDNAInfo), \ + false, false, \ + 0, 0, 0 } + +/** + * Converts a single domain name label into its ASCII form for DNS lookup. + * If any processing step fails, then pInfo->errors will be non-zero and + * the result might not be an ASCII string. + * The label might be modified according to the types of errors. + * Labels with severe errors will be left in (or turned into) their Unicode form. + * + * The UErrorCode indicates an error only in exceptional cases, + * such as a U_MEMORY_ALLOCATION_ERROR. + * + * @param idna UIDNA instance + * @param label Input domain name label + * @param length Label length, or -1 if NUL-terminated + * @param dest Destination string buffer + * @param capacity Destination buffer capacity + * @param pInfo Output container of IDNA processing details. + * @param pErrorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return destination string length + * @stable ICU 4.6 + */ +U_CAPI int32_t U_EXPORT2 +uidna_labelToASCII(const UIDNA *idna, + const UChar *label, int32_t length, + UChar *dest, int32_t capacity, + UIDNAInfo *pInfo, UErrorCode *pErrorCode); + +/** + * Converts a single domain name label into its Unicode form for human-readable display. + * If any processing step fails, then pInfo->errors will be non-zero. + * The label might be modified according to the types of errors. + * + * The UErrorCode indicates an error only in exceptional cases, + * such as a U_MEMORY_ALLOCATION_ERROR. + * + * @param idna UIDNA instance + * @param label Input domain name label + * @param length Label length, or -1 if NUL-terminated + * @param dest Destination string buffer + * @param capacity Destination buffer capacity + * @param pInfo Output container of IDNA processing details. + * @param pErrorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return destination string length + * @stable ICU 4.6 + */ +U_CAPI int32_t U_EXPORT2 +uidna_labelToUnicode(const UIDNA *idna, + const UChar *label, int32_t length, + UChar *dest, int32_t capacity, + UIDNAInfo *pInfo, UErrorCode *pErrorCode); + +/** + * Converts a whole domain name into its ASCII form for DNS lookup. + * If any processing step fails, then pInfo->errors will be non-zero and + * the result might not be an ASCII string. + * The domain name might be modified according to the types of errors. + * Labels with severe errors will be left in (or turned into) their Unicode form. + * + * The UErrorCode indicates an error only in exceptional cases, + * such as a U_MEMORY_ALLOCATION_ERROR. + * + * @param idna UIDNA instance + * @param name Input domain name + * @param length Domain name length, or -1 if NUL-terminated + * @param dest Destination string buffer + * @param capacity Destination buffer capacity + * @param pInfo Output container of IDNA processing details. + * @param pErrorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return destination string length + * @stable ICU 4.6 + */ +U_CAPI int32_t U_EXPORT2 +uidna_nameToASCII(const UIDNA *idna, + const UChar *name, int32_t length, + UChar *dest, int32_t capacity, + UIDNAInfo *pInfo, UErrorCode *pErrorCode); + +/** + * Converts a whole domain name into its Unicode form for human-readable display. + * If any processing step fails, then pInfo->errors will be non-zero. + * The domain name might be modified according to the types of errors. + * + * The UErrorCode indicates an error only in exceptional cases, + * such as a U_MEMORY_ALLOCATION_ERROR. + * + * @param idna UIDNA instance + * @param name Input domain name + * @param length Domain name length, or -1 if NUL-terminated + * @param dest Destination string buffer + * @param capacity Destination buffer capacity + * @param pInfo Output container of IDNA processing details. + * @param pErrorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return destination string length + * @stable ICU 4.6 + */ +U_CAPI int32_t U_EXPORT2 +uidna_nameToUnicode(const UIDNA *idna, + const UChar *name, int32_t length, + UChar *dest, int32_t capacity, + UIDNAInfo *pInfo, UErrorCode *pErrorCode); + +/* UTF-8 versions of the processing methods --------------------------------- */ + +/** + * Converts a single domain name label into its ASCII form for DNS lookup. + * UTF-8 version of uidna_labelToASCII(), same behavior. + * + * @param idna UIDNA instance + * @param label Input domain name label + * @param length Label length, or -1 if NUL-terminated + * @param dest Destination string buffer + * @param capacity Destination buffer capacity + * @param pInfo Output container of IDNA processing details. + * @param pErrorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return destination string length + * @stable ICU 4.6 + */ +U_CAPI int32_t U_EXPORT2 +uidna_labelToASCII_UTF8(const UIDNA *idna, + const char *label, int32_t length, + char *dest, int32_t capacity, + UIDNAInfo *pInfo, UErrorCode *pErrorCode); + +/** + * Converts a single domain name label into its Unicode form for human-readable display. + * UTF-8 version of uidna_labelToUnicode(), same behavior. + * + * @param idna UIDNA instance + * @param label Input domain name label + * @param length Label length, or -1 if NUL-terminated + * @param dest Destination string buffer + * @param capacity Destination buffer capacity + * @param pInfo Output container of IDNA processing details. + * @param pErrorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return destination string length + * @stable ICU 4.6 + */ +U_CAPI int32_t U_EXPORT2 +uidna_labelToUnicodeUTF8(const UIDNA *idna, + const char *label, int32_t length, + char *dest, int32_t capacity, + UIDNAInfo *pInfo, UErrorCode *pErrorCode); + +/** + * Converts a whole domain name into its ASCII form for DNS lookup. + * UTF-8 version of uidna_nameToASCII(), same behavior. + * + * @param idna UIDNA instance + * @param name Input domain name + * @param length Domain name length, or -1 if NUL-terminated + * @param dest Destination string buffer + * @param capacity Destination buffer capacity + * @param pInfo Output container of IDNA processing details. + * @param pErrorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return destination string length + * @stable ICU 4.6 + */ +U_CAPI int32_t U_EXPORT2 +uidna_nameToASCII_UTF8(const UIDNA *idna, + const char *name, int32_t length, + char *dest, int32_t capacity, + UIDNAInfo *pInfo, UErrorCode *pErrorCode); + +/** + * Converts a whole domain name into its Unicode form for human-readable display. + * UTF-8 version of uidna_nameToUnicode(), same behavior. + * + * @param idna UIDNA instance + * @param name Input domain name + * @param length Domain name length, or -1 if NUL-terminated + * @param dest Destination string buffer + * @param capacity Destination buffer capacity + * @param pInfo Output container of IDNA processing details. + * @param pErrorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return destination string length + * @stable ICU 4.6 + */ +U_CAPI int32_t U_EXPORT2 +uidna_nameToUnicodeUTF8(const UIDNA *idna, + const char *name, int32_t length, + char *dest, int32_t capacity, + UIDNAInfo *pInfo, UErrorCode *pErrorCode); + +/* + * IDNA error bit set values. + * When a domain name or label fails a processing step or does not meet the + * validity criteria, then one or more of these error bits are set. + */ +enum { + /** + * A non-final domain name label (or the whole domain name) is empty. + * @stable ICU 4.6 + */ + UIDNA_ERROR_EMPTY_LABEL=1, + /** + * A domain name label is longer than 63 bytes. + * (See STD13/RFC1034 3.1. Name space specifications and terminology.) + * This is only checked in ToASCII operations, and only if the output label is all-ASCII. + * @stable ICU 4.6 + */ + UIDNA_ERROR_LABEL_TOO_LONG=2, + /** + * A domain name is longer than 255 bytes in its storage form. + * (See STD13/RFC1034 3.1. Name space specifications and terminology.) + * This is only checked in ToASCII operations, and only if the output domain name is all-ASCII. + * @stable ICU 4.6 + */ + UIDNA_ERROR_DOMAIN_NAME_TOO_LONG=4, + /** + * A label starts with a hyphen-minus ('-'). + * @stable ICU 4.6 + */ + UIDNA_ERROR_LEADING_HYPHEN=8, + /** + * A label ends with a hyphen-minus ('-'). + * @stable ICU 4.6 + */ + UIDNA_ERROR_TRAILING_HYPHEN=0x10, + /** + * A label contains hyphen-minus ('-') in the third and fourth positions. + * @stable ICU 4.6 + */ + UIDNA_ERROR_HYPHEN_3_4=0x20, + /** + * A label starts with a combining mark. + * @stable ICU 4.6 + */ + UIDNA_ERROR_LEADING_COMBINING_MARK=0x40, + /** + * A label or domain name contains disallowed characters. + * @stable ICU 4.6 + */ + UIDNA_ERROR_DISALLOWED=0x80, + /** + * A label starts with "xn--" but does not contain valid Punycode. + * That is, an xn-- label failed Punycode decoding. + * @stable ICU 4.6 + */ + UIDNA_ERROR_PUNYCODE=0x100, + /** + * A label contains a dot=full stop. + * This can occur in an input string for a single-label function. + * @stable ICU 4.6 + */ + UIDNA_ERROR_LABEL_HAS_DOT=0x200, + /** + * An ACE label does not contain a valid label string. + * The label was successfully ACE (Punycode) decoded but the resulting + * string had severe validation errors. For example, + * it might contain characters that are not allowed in ACE labels, + * or it might not be normalized. + * @stable ICU 4.6 + */ + UIDNA_ERROR_INVALID_ACE_LABEL=0x400, + /** + * A label does not meet the IDNA BiDi requirements (for right-to-left characters). + * @stable ICU 4.6 + */ + UIDNA_ERROR_BIDI=0x800, + /** + * A label does not meet the IDNA CONTEXTJ requirements. + * @stable ICU 4.6 + */ + UIDNA_ERROR_CONTEXTJ=0x1000, + /** + * A label does not meet the IDNA CONTEXTO requirements for punctuation characters. + * Some punctuation characters "Would otherwise have been DISALLOWED" + * but are allowed in certain contexts. (RFC 5892) + * @stable ICU 49 + */ + UIDNA_ERROR_CONTEXTO_PUNCTUATION=0x2000, + /** + * A label does not meet the IDNA CONTEXTO requirements for digits. + * Arabic-Indic Digits (U+066x) must not be mixed with Extended Arabic-Indic Digits (U+06Fx). + * @stable ICU 49 + */ + UIDNA_ERROR_CONTEXTO_DIGITS=0x4000 +}; + +#ifndef U_HIDE_DEPRECATED_API + +/* IDNA2003 API ------------------------------------------------------------- */ + +/** + * IDNA2003: This function implements the ToASCII operation as defined in the IDNA RFC. + * This operation is done on <b>single labels</b> before sending it to something that expects + * ASCII names. A label is an individual part of a domain name. Labels are usually + * separated by dots; e.g. "www.example.com" is composed of 3 labels "www","example", and "com". + * + * IDNA2003 API Overview: + * + * The uidna_ API implements the IDNA protocol as defined in the IDNA RFC + * (http://www.ietf.org/rfc/rfc3490.txt). + * The RFC defines 2 operations: ToASCII and ToUnicode. Domain name labels + * containing non-ASCII code points are processed by the + * ToASCII operation before passing it to resolver libraries. Domain names + * that are obtained from resolver libraries are processed by the + * ToUnicode operation before displaying the domain name to the user. + * IDNA requires that implementations process input strings with Nameprep + * (http://www.ietf.org/rfc/rfc3491.txt), + * which is a profile of Stringprep (http://www.ietf.org/rfc/rfc3454.txt), + * and then with Punycode (http://www.ietf.org/rfc/rfc3492.txt). + * Implementations of IDNA MUST fully implement Nameprep and Punycode; + * neither Nameprep nor Punycode are optional. + * The input and output of ToASCII and ToUnicode operations are Unicode + * and are designed to be chainable, i.e., applying ToASCII or ToUnicode operations + * multiple times to an input string will yield the same result as applying the operation + * once. + * ToUnicode(ToUnicode(ToUnicode...(ToUnicode(string)))) == ToUnicode(string) + * ToASCII(ToASCII(ToASCII...(ToASCII(string))) == ToASCII(string). + * + * @param src Input UChar array containing label in Unicode. + * @param srcLength Number of UChars in src, or -1 if NUL-terminated. + * @param dest Output UChar array with ASCII (ACE encoded) label. + * @param destCapacity Size of dest. + * @param options A bit set of options: + * + * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points + * and do not use STD3 ASCII rules + * If unassigned code points are found the operation fails with + * U_UNASSIGNED_ERROR error code. + * + * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations + * If this option is set, the unassigned code points are in the input + * are treated as normal Unicode code points. + * + * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions + * If this option is set and the input does not satisfy STD3 rules, + * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR + * + * @param parseError Pointer to UParseError struct to receive information on position + * of error if an error is encountered. Can be NULL. + * @param status ICU in/out error code parameter. + * U_INVALID_CHAR_FOUND if src contains + * unmatched single surrogates. + * U_INDEX_OUTOFBOUNDS_ERROR if src contains + * too many code points. + * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough + * @return The length of the result string, if successful - or in case of a buffer overflow, + * in which case it will be greater than destCapacity. + * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA. + */ +U_DEPRECATED int32_t U_EXPORT2 +uidna_toASCII(const UChar* src, int32_t srcLength, + UChar* dest, int32_t destCapacity, + int32_t options, + UParseError* parseError, + UErrorCode* status); + + +/** + * IDNA2003: This function implements the ToUnicode operation as defined in the IDNA RFC. + * This operation is done on <b>single labels</b> before sending it to something that expects + * Unicode names. A label is an individual part of a domain name. Labels are usually + * separated by dots; for e.g. "www.example.com" is composed of 3 labels "www","example", and "com". + * + * @param src Input UChar array containing ASCII (ACE encoded) label. + * @param srcLength Number of UChars in src, or -1 if NUL-terminated. + * @param dest Output Converted UChar array containing Unicode equivalent of label. + * @param destCapacity Size of dest. + * @param options A bit set of options: + * + * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points + * and do not use STD3 ASCII rules + * If unassigned code points are found the operation fails with + * U_UNASSIGNED_ERROR error code. + * + * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations + * If this option is set, the unassigned code points are in the input + * are treated as normal Unicode code points. <b> Note: </b> This option is + * required on toUnicode operation because the RFC mandates + * verification of decoded ACE input by applying toASCII and comparing + * its output with source + * + * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions + * If this option is set and the input does not satisfy STD3 rules, + * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR + * + * @param parseError Pointer to UParseError struct to receive information on position + * of error if an error is encountered. Can be NULL. + * @param status ICU in/out error code parameter. + * U_INVALID_CHAR_FOUND if src contains + * unmatched single surrogates. + * U_INDEX_OUTOFBOUNDS_ERROR if src contains + * too many code points. + * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough + * @return The length of the result string, if successful - or in case of a buffer overflow, + * in which case it will be greater than destCapacity. + * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA. + */ +U_DEPRECATED int32_t U_EXPORT2 +uidna_toUnicode(const UChar* src, int32_t srcLength, + UChar* dest, int32_t destCapacity, + int32_t options, + UParseError* parseError, + UErrorCode* status); + + +/** + * IDNA2003: Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC. + * This operation is done on complete domain names, e.g: "www.example.com". + * It is important to note that this operation can fail. If it fails, then the input + * domain name cannot be used as an Internationalized Domain Name and the application + * should have methods defined to deal with the failure. + * + * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name + * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each, + * and then convert. This function does not offer that level of granularity. The options once + * set will apply to all labels in the domain name + * + * @param src Input UChar array containing IDN in Unicode. + * @param srcLength Number of UChars in src, or -1 if NUL-terminated. + * @param dest Output UChar array with ASCII (ACE encoded) IDN. + * @param destCapacity Size of dest. + * @param options A bit set of options: + * + * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points + * and do not use STD3 ASCII rules + * If unassigned code points are found the operation fails with + * U_UNASSIGNED_CODE_POINT_FOUND error code. + * + * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations + * If this option is set, the unassigned code points are in the input + * are treated as normal Unicode code points. + * + * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions + * If this option is set and the input does not satisfy STD3 rules, + * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR + * + * @param parseError Pointer to UParseError struct to receive information on position + * of error if an error is encountered. Can be NULL. + * @param status ICU in/out error code parameter. + * U_INVALID_CHAR_FOUND if src contains + * unmatched single surrogates. + * U_INDEX_OUTOFBOUNDS_ERROR if src contains + * too many code points. + * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough + * @return The length of the result string, if successful - or in case of a buffer overflow, + * in which case it will be greater than destCapacity. + * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA. + */ +U_DEPRECATED int32_t U_EXPORT2 +uidna_IDNToASCII( const UChar* src, int32_t srcLength, + UChar* dest, int32_t destCapacity, + int32_t options, + UParseError* parseError, + UErrorCode* status); + +/** + * IDNA2003: Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC. + * This operation is done on complete domain names, e.g: "www.example.com". + * + * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name + * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each, + * and then convert. This function does not offer that level of granularity. The options once + * set will apply to all labels in the domain name + * + * @param src Input UChar array containing IDN in ASCII (ACE encoded) form. + * @param srcLength Number of UChars in src, or -1 if NUL-terminated. + * @param dest Output UChar array containing Unicode equivalent of source IDN. + * @param destCapacity Size of dest. + * @param options A bit set of options: + * + * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points + * and do not use STD3 ASCII rules + * If unassigned code points are found the operation fails with + * U_UNASSIGNED_CODE_POINT_FOUND error code. + * + * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations + * If this option is set, the unassigned code points are in the input + * are treated as normal Unicode code points. + * + * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions + * If this option is set and the input does not satisfy STD3 rules, + * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR + * + * @param parseError Pointer to UParseError struct to receive information on position + * of error if an error is encountered. Can be NULL. + * @param status ICU in/out error code parameter. + * U_INVALID_CHAR_FOUND if src contains + * unmatched single surrogates. + * U_INDEX_OUTOFBOUNDS_ERROR if src contains + * too many code points. + * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough + * @return The length of the result string, if successful - or in case of a buffer overflow, + * in which case it will be greater than destCapacity. + * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA. + */ +U_DEPRECATED int32_t U_EXPORT2 +uidna_IDNToUnicode( const UChar* src, int32_t srcLength, + UChar* dest, int32_t destCapacity, + int32_t options, + UParseError* parseError, + UErrorCode* status); + +/** + * IDNA2003: Compare two IDN strings for equivalence. + * This function splits the domain names into labels and compares them. + * According to IDN RFC, whenever two labels are compared, they are + * considered equal if and only if their ASCII forms (obtained by + * applying toASCII) match using an case-insensitive ASCII comparison. + * Two domain names are considered a match if and only if all labels + * match regardless of whether label separators match. + * + * @param s1 First source string. + * @param length1 Length of first source string, or -1 if NUL-terminated. + * + * @param s2 Second source string. + * @param length2 Length of second source string, or -1 if NUL-terminated. + * @param options A bit set of options: + * + * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points + * and do not use STD3 ASCII rules + * If unassigned code points are found the operation fails with + * U_UNASSIGNED_CODE_POINT_FOUND error code. + * + * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations + * If this option is set, the unassigned code points are in the input + * are treated as normal Unicode code points. + * + * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions + * If this option is set and the input does not satisfy STD3 rules, + * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR + * + * @param status ICU error code in/out parameter. + * Must fulfill U_SUCCESS before the function call. + * @return <0 or 0 or >0 as usual for string comparisons + * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA. + */ +U_DEPRECATED int32_t U_EXPORT2 +uidna_compare( const UChar *s1, int32_t length1, + const UChar *s2, int32_t length2, + int32_t options, + UErrorCode* status); + +#endif /* U_HIDE_DEPRECATED_API */ + +#endif /* #if !UCONFIG_NO_IDNA */ + +#endif diff --git a/thirdparty/icu4c/common/unicode/uiter.h b/thirdparty/icu4c/common/unicode/uiter.h new file mode 100644 index 0000000000..be232c774d --- /dev/null +++ b/thirdparty/icu4c/common/unicode/uiter.h @@ -0,0 +1,709 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2002-2011 International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: uiter.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2002jan18 +* created by: Markus W. Scherer +*/ + +#ifndef __UITER_H__ +#define __UITER_H__ + +/** + * \file + * \brief C API: Unicode Character Iteration + * + * @see UCharIterator + */ + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + U_NAMESPACE_BEGIN + + class CharacterIterator; + class Replaceable; + + U_NAMESPACE_END +#endif + +U_CDECL_BEGIN + +struct UCharIterator; +typedef struct UCharIterator UCharIterator; /**< C typedef for struct UCharIterator. @stable ICU 2.1 */ + +/** + * Origin constants for UCharIterator.getIndex() and UCharIterator.move(). + * @see UCharIteratorMove + * @see UCharIterator + * @stable ICU 2.1 + */ +typedef enum UCharIteratorOrigin { + UITER_START, UITER_CURRENT, UITER_LIMIT, UITER_ZERO, UITER_LENGTH +} UCharIteratorOrigin; + +/** Constants for UCharIterator. @stable ICU 2.6 */ +enum { + /** + * Constant value that may be returned by UCharIteratorMove + * indicating that the final UTF-16 index is not known, but that the move succeeded. + * This can occur when moving relative to limit or length, or + * when moving relative to the current index after a setState() + * when the current UTF-16 index is not known. + * + * It would be very inefficient to have to count from the beginning of the text + * just to get the current/limit/length index after moving relative to it. + * The actual index can be determined with getIndex(UITER_CURRENT) + * which will count the UChars if necessary. + * + * @stable ICU 2.6 + */ + UITER_UNKNOWN_INDEX=-2 +}; + + +/** + * Constant for UCharIterator getState() indicating an error or + * an unknown state. + * Returned by uiter_getState()/UCharIteratorGetState + * when an error occurs. + * Also, some UCharIterator implementations may not be able to return + * a valid state for each position. This will be clearly documented + * for each such iterator (none of the public ones here). + * + * @stable ICU 2.6 + */ +#define UITER_NO_STATE ((uint32_t)0xffffffff) + +/** + * Function type declaration for UCharIterator.getIndex(). + * + * Gets the current position, or the start or limit of the + * iteration range. + * + * This function may perform slowly for UITER_CURRENT after setState() was called, + * or for UITER_LENGTH, because an iterator implementation may have to count + * UChars if the underlying storage is not UTF-16. + * + * @param iter the UCharIterator structure ("this pointer") + * @param origin get the 0, start, limit, length, or current index + * @return the requested index, or U_SENTINEL in an error condition + * + * @see UCharIteratorOrigin + * @see UCharIterator + * @stable ICU 2.1 + */ +typedef int32_t U_CALLCONV +UCharIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin); + +/** + * Function type declaration for UCharIterator.move(). + * + * Use iter->move(iter, index, UITER_ZERO) like CharacterIterator::setIndex(index). + * + * Moves the current position relative to the start or limit of the + * iteration range, or relative to the current position itself. + * The movement is expressed in numbers of code units forward + * or backward by specifying a positive or negative delta. + * Out of bounds movement will be pinned to the start or limit. + * + * This function may perform slowly for moving relative to UITER_LENGTH + * because an iterator implementation may have to count the rest of the + * UChars if the native storage is not UTF-16. + * + * When moving relative to the limit or length, or + * relative to the current position after setState() was called, + * move() may return UITER_UNKNOWN_INDEX (-2) to avoid an inefficient + * determination of the actual UTF-16 index. + * The actual index can be determined with getIndex(UITER_CURRENT) + * which will count the UChars if necessary. + * See UITER_UNKNOWN_INDEX for details. + * + * @param iter the UCharIterator structure ("this pointer") + * @param delta can be positive, zero, or negative + * @param origin move relative to the 0, start, limit, length, or current index + * @return the new index, or U_SENTINEL on an error condition, + * or UITER_UNKNOWN_INDEX when the index is not known. + * + * @see UCharIteratorOrigin + * @see UCharIterator + * @see UITER_UNKNOWN_INDEX + * @stable ICU 2.1 + */ +typedef int32_t U_CALLCONV +UCharIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin); + +/** + * Function type declaration for UCharIterator.hasNext(). + * + * Check if current() and next() can still + * return another code unit. + * + * @param iter the UCharIterator structure ("this pointer") + * @return boolean value for whether current() and next() can still return another code unit + * + * @see UCharIterator + * @stable ICU 2.1 + */ +typedef UBool U_CALLCONV +UCharIteratorHasNext(UCharIterator *iter); + +/** + * Function type declaration for UCharIterator.hasPrevious(). + * + * Check if previous() can still return another code unit. + * + * @param iter the UCharIterator structure ("this pointer") + * @return boolean value for whether previous() can still return another code unit + * + * @see UCharIterator + * @stable ICU 2.1 + */ +typedef UBool U_CALLCONV +UCharIteratorHasPrevious(UCharIterator *iter); + +/** + * Function type declaration for UCharIterator.current(). + * + * Return the code unit at the current position, + * or U_SENTINEL if there is none (index is at the limit). + * + * @param iter the UCharIterator structure ("this pointer") + * @return the current code unit + * + * @see UCharIterator + * @stable ICU 2.1 + */ +typedef UChar32 U_CALLCONV +UCharIteratorCurrent(UCharIterator *iter); + +/** + * Function type declaration for UCharIterator.next(). + * + * Return the code unit at the current index and increment + * the index (post-increment, like s[i++]), + * or return U_SENTINEL if there is none (index is at the limit). + * + * @param iter the UCharIterator structure ("this pointer") + * @return the current code unit (and post-increment the current index) + * + * @see UCharIterator + * @stable ICU 2.1 + */ +typedef UChar32 U_CALLCONV +UCharIteratorNext(UCharIterator *iter); + +/** + * Function type declaration for UCharIterator.previous(). + * + * Decrement the index and return the code unit from there + * (pre-decrement, like s[--i]), + * or return U_SENTINEL if there is none (index is at the start). + * + * @param iter the UCharIterator structure ("this pointer") + * @return the previous code unit (after pre-decrementing the current index) + * + * @see UCharIterator + * @stable ICU 2.1 + */ +typedef UChar32 U_CALLCONV +UCharIteratorPrevious(UCharIterator *iter); + +/** + * Function type declaration for UCharIterator.reservedFn(). + * Reserved for future use. + * + * @param iter the UCharIterator structure ("this pointer") + * @param something some integer argument + * @return some integer + * + * @see UCharIterator + * @stable ICU 2.1 + */ +typedef int32_t U_CALLCONV +UCharIteratorReserved(UCharIterator *iter, int32_t something); + +/** + * Function type declaration for UCharIterator.getState(). + * + * Get the "state" of the iterator in the form of a single 32-bit word. + * It is recommended that the state value be calculated to be as small as + * is feasible. For strings with limited lengths, fewer than 32 bits may + * be sufficient. + * + * This is used together with setState()/UCharIteratorSetState + * to save and restore the iterator position more efficiently than with + * getIndex()/move(). + * + * The iterator state is defined as a uint32_t value because it is designed + * for use in ucol_nextSortKeyPart() which provides 32 bits to store the state + * of the character iterator. + * + * With some UCharIterator implementations (e.g., UTF-8), + * getting and setting the UTF-16 index with existing functions + * (getIndex(UITER_CURRENT) followed by move(pos, UITER_ZERO)) is possible but + * relatively slow because the iterator has to "walk" from a known index + * to the requested one. + * This takes more time the farther it needs to go. + * + * An opaque state value allows an iterator implementation to provide + * an internal index (UTF-8: the source byte array index) for + * fast, constant-time restoration. + * + * After calling setState(), a getIndex(UITER_CURRENT) may be slow because + * the UTF-16 index may not be restored as well, but the iterator can deliver + * the correct text contents and move relative to the current position + * without performance degradation. + * + * Some UCharIterator implementations may not be able to return + * a valid state for each position, in which case they return UITER_NO_STATE instead. + * This will be clearly documented for each such iterator (none of the public ones here). + * + * @param iter the UCharIterator structure ("this pointer") + * @return the state word + * + * @see UCharIterator + * @see UCharIteratorSetState + * @see UITER_NO_STATE + * @stable ICU 2.6 + */ +typedef uint32_t U_CALLCONV +UCharIteratorGetState(const UCharIterator *iter); + +/** + * Function type declaration for UCharIterator.setState(). + * + * Restore the "state" of the iterator using a state word from a getState() call. + * The iterator object need not be the same one as for which getState() was called, + * but it must be of the same type (set up using the same uiter_setXYZ function) + * and it must iterate over the same string + * (binary identical regardless of memory address). + * For more about the state word see UCharIteratorGetState. + * + * After calling setState(), a getIndex(UITER_CURRENT) may be slow because + * the UTF-16 index may not be restored as well, but the iterator can deliver + * the correct text contents and move relative to the current position + * without performance degradation. + * + * @param iter the UCharIterator structure ("this pointer") + * @param state the state word from a getState() call + * on a same-type, same-string iterator + * @param pErrorCode Must be a valid pointer to an error code value, + * which must not indicate a failure before the function call. + * + * @see UCharIterator + * @see UCharIteratorGetState + * @stable ICU 2.6 + */ +typedef void U_CALLCONV +UCharIteratorSetState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode); + + +/** + * C API for code unit iteration. + * This can be used as a C wrapper around + * CharacterIterator, Replaceable, or implemented using simple strings, etc. + * + * There are two roles for using UCharIterator: + * + * A "provider" sets the necessary function pointers and controls the "protected" + * fields of the UCharIterator structure. A "provider" passes a UCharIterator + * into C APIs that need a UCharIterator as an abstract, flexible string interface. + * + * Implementations of such C APIs are "callers" of UCharIterator functions; + * they only use the "public" function pointers and never access the "protected" + * fields directly. + * + * The current() and next() functions only check the current index against the + * limit, and previous() only checks the current index against the start, + * to see if the iterator already reached the end of the iteration range. + * + * The assumption - in all iterators - is that the index is moved via the API, + * which means it won't go out of bounds, or the index is modified by + * user code that knows enough about the iterator implementation to set valid + * index values. + * + * UCharIterator functions return code unit values 0..0xffff, + * or U_SENTINEL if the iteration bounds are reached. + * + * @stable ICU 2.1 + */ +struct UCharIterator { + /** + * (protected) Pointer to string or wrapped object or similar. + * Not used by caller. + * @stable ICU 2.1 + */ + const void *context; + + /** + * (protected) Length of string or similar. + * Not used by caller. + * @stable ICU 2.1 + */ + int32_t length; + + /** + * (protected) Start index or similar. + * Not used by caller. + * @stable ICU 2.1 + */ + int32_t start; + + /** + * (protected) Current index or similar. + * Not used by caller. + * @stable ICU 2.1 + */ + int32_t index; + + /** + * (protected) Limit index or similar. + * Not used by caller. + * @stable ICU 2.1 + */ + int32_t limit; + + /** + * (protected) Used by UTF-8 iterators and possibly others. + * @stable ICU 2.1 + */ + int32_t reservedField; + + /** + * (public) Returns the current position or the + * start or limit index of the iteration range. + * + * @see UCharIteratorGetIndex + * @stable ICU 2.1 + */ + UCharIteratorGetIndex *getIndex; + + /** + * (public) Moves the current position relative to the start or limit of the + * iteration range, or relative to the current position itself. + * The movement is expressed in numbers of code units forward + * or backward by specifying a positive or negative delta. + * + * @see UCharIteratorMove + * @stable ICU 2.1 + */ + UCharIteratorMove *move; + + /** + * (public) Check if current() and next() can still + * return another code unit. + * + * @see UCharIteratorHasNext + * @stable ICU 2.1 + */ + UCharIteratorHasNext *hasNext; + + /** + * (public) Check if previous() can still return another code unit. + * + * @see UCharIteratorHasPrevious + * @stable ICU 2.1 + */ + UCharIteratorHasPrevious *hasPrevious; + + /** + * (public) Return the code unit at the current position, + * or U_SENTINEL if there is none (index is at the limit). + * + * @see UCharIteratorCurrent + * @stable ICU 2.1 + */ + UCharIteratorCurrent *current; + + /** + * (public) Return the code unit at the current index and increment + * the index (post-increment, like s[i++]), + * or return U_SENTINEL if there is none (index is at the limit). + * + * @see UCharIteratorNext + * @stable ICU 2.1 + */ + UCharIteratorNext *next; + + /** + * (public) Decrement the index and return the code unit from there + * (pre-decrement, like s[--i]), + * or return U_SENTINEL if there is none (index is at the start). + * + * @see UCharIteratorPrevious + * @stable ICU 2.1 + */ + UCharIteratorPrevious *previous; + + /** + * (public) Reserved for future use. Currently NULL. + * + * @see UCharIteratorReserved + * @stable ICU 2.1 + */ + UCharIteratorReserved *reservedFn; + + /** + * (public) Return the state of the iterator, to be restored later with setState(). + * This function pointer is NULL if the iterator does not implement it. + * + * @see UCharIteratorGet + * @stable ICU 2.6 + */ + UCharIteratorGetState *getState; + + /** + * (public) Restore the iterator state from the state word from a call + * to getState(). + * This function pointer is NULL if the iterator does not implement it. + * + * @see UCharIteratorSet + * @stable ICU 2.6 + */ + UCharIteratorSetState *setState; +}; + +/** + * Helper function for UCharIterator to get the code point + * at the current index. + * + * Return the code point that includes the code unit at the current position, + * or U_SENTINEL if there is none (index is at the limit). + * If the current code unit is a lead or trail surrogate, + * then the following or preceding surrogate is used to form + * the code point value. + * + * @param iter the UCharIterator structure ("this pointer") + * @return the current code point + * + * @see UCharIterator + * @see U16_GET + * @see UnicodeString::char32At() + * @stable ICU 2.1 + */ +U_CAPI UChar32 U_EXPORT2 +uiter_current32(UCharIterator *iter); + +/** + * Helper function for UCharIterator to get the next code point. + * + * Return the code point at the current index and increment + * the index (post-increment, like s[i++]), + * or return U_SENTINEL if there is none (index is at the limit). + * + * @param iter the UCharIterator structure ("this pointer") + * @return the current code point (and post-increment the current index) + * + * @see UCharIterator + * @see U16_NEXT + * @stable ICU 2.1 + */ +U_CAPI UChar32 U_EXPORT2 +uiter_next32(UCharIterator *iter); + +/** + * Helper function for UCharIterator to get the previous code point. + * + * Decrement the index and return the code point from there + * (pre-decrement, like s[--i]), + * or return U_SENTINEL if there is none (index is at the start). + * + * @param iter the UCharIterator structure ("this pointer") + * @return the previous code point (after pre-decrementing the current index) + * + * @see UCharIterator + * @see U16_PREV + * @stable ICU 2.1 + */ +U_CAPI UChar32 U_EXPORT2 +uiter_previous32(UCharIterator *iter); + +/** + * Get the "state" of the iterator in the form of a single 32-bit word. + * This is a convenience function that calls iter->getState(iter) + * if iter->getState is not NULL; + * if it is NULL or any other error occurs, then UITER_NO_STATE is returned. + * + * Some UCharIterator implementations may not be able to return + * a valid state for each position, in which case they return UITER_NO_STATE instead. + * This will be clearly documented for each such iterator (none of the public ones here). + * + * @param iter the UCharIterator structure ("this pointer") + * @return the state word + * + * @see UCharIterator + * @see UCharIteratorGetState + * @see UITER_NO_STATE + * @stable ICU 2.6 + */ +U_CAPI uint32_t U_EXPORT2 +uiter_getState(const UCharIterator *iter); + +/** + * Restore the "state" of the iterator using a state word from a getState() call. + * This is a convenience function that calls iter->setState(iter, state, pErrorCode) + * if iter->setState is not NULL; if it is NULL, then U_UNSUPPORTED_ERROR is set. + * + * @param iter the UCharIterator structure ("this pointer") + * @param state the state word from a getState() call + * on a same-type, same-string iterator + * @param pErrorCode Must be a valid pointer to an error code value, + * which must not indicate a failure before the function call. + * + * @see UCharIterator + * @see UCharIteratorSetState + * @stable ICU 2.6 + */ +U_CAPI void U_EXPORT2 +uiter_setState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode); + +/** + * Set up a UCharIterator to iterate over a string. + * + * Sets the UCharIterator function pointers for iteration over the string s + * with iteration boundaries start=index=0 and length=limit=string length. + * The "provider" may set the start, index, and limit values at any time + * within the range 0..length. + * The length field will be ignored. + * + * The string pointer s is set into UCharIterator.context without copying + * or reallocating the string contents. + * + * getState() simply returns the current index. + * move() will always return the final index. + * + * @param iter UCharIterator structure to be set for iteration + * @param s String to iterate over + * @param length Length of s, or -1 if NUL-terminated + * + * @see UCharIterator + * @stable ICU 2.1 + */ +U_CAPI void U_EXPORT2 +uiter_setString(UCharIterator *iter, const UChar *s, int32_t length); + +/** + * Set up a UCharIterator to iterate over a UTF-16BE string + * (byte vector with a big-endian pair of bytes per UChar). + * + * Everything works just like with a normal UChar iterator (uiter_setString), + * except that UChars are assembled from byte pairs, + * and that the length argument here indicates an even number of bytes. + * + * getState() simply returns the current index. + * move() will always return the final index. + * + * @param iter UCharIterator structure to be set for iteration + * @param s UTF-16BE string to iterate over + * @param length Length of s as an even number of bytes, or -1 if NUL-terminated + * (NUL means pair of 0 bytes at even index from s) + * + * @see UCharIterator + * @see uiter_setString + * @stable ICU 2.6 + */ +U_CAPI void U_EXPORT2 +uiter_setUTF16BE(UCharIterator *iter, const char *s, int32_t length); + +/** + * Set up a UCharIterator to iterate over a UTF-8 string. + * + * Sets the UCharIterator function pointers for iteration over the UTF-8 string s + * with UTF-8 iteration boundaries 0 and length. + * The implementation counts the UTF-16 index on the fly and + * lazily evaluates the UTF-16 length of the text. + * + * The start field is used as the UTF-8 offset, the limit field as the UTF-8 length. + * When the reservedField is not 0, then it contains a supplementary code point + * and the UTF-16 index is between the two corresponding surrogates. + * At that point, the UTF-8 index is behind that code point. + * + * The UTF-8 string pointer s is set into UCharIterator.context without copying + * or reallocating the string contents. + * + * getState() returns a state value consisting of + * - the current UTF-8 source byte index (bits 31..1) + * - a flag (bit 0) that indicates whether the UChar position is in the middle + * of a surrogate pair + * (from a 4-byte UTF-8 sequence for the corresponding supplementary code point) + * + * getState() cannot also encode the UTF-16 index in the state value. + * move(relative to limit or length), or + * move(relative to current) after setState(), may return UITER_UNKNOWN_INDEX. + * + * @param iter UCharIterator structure to be set for iteration + * @param s UTF-8 string to iterate over + * @param length Length of s in bytes, or -1 if NUL-terminated + * + * @see UCharIterator + * @stable ICU 2.6 + */ +U_CAPI void U_EXPORT2 +uiter_setUTF8(UCharIterator *iter, const char *s, int32_t length); + +#if U_SHOW_CPLUSPLUS_API + +/** + * Set up a UCharIterator to wrap around a C++ CharacterIterator. + * + * Sets the UCharIterator function pointers for iteration using the + * CharacterIterator charIter. + * + * The CharacterIterator pointer charIter is set into UCharIterator.context + * without copying or cloning the CharacterIterator object. + * The other "protected" UCharIterator fields are set to 0 and will be ignored. + * The iteration index and boundaries are controlled by the CharacterIterator. + * + * getState() simply returns the current index. + * move() will always return the final index. + * + * @param iter UCharIterator structure to be set for iteration + * @param charIter CharacterIterator to wrap + * + * @see UCharIterator + * @stable ICU 2.1 + */ +U_CAPI void U_EXPORT2 +uiter_setCharacterIterator(UCharIterator *iter, icu::CharacterIterator *charIter); + +/** + * Set up a UCharIterator to iterate over a C++ Replaceable. + * + * Sets the UCharIterator function pointers for iteration over the + * Replaceable rep with iteration boundaries start=index=0 and + * length=limit=rep->length(). + * The "provider" may set the start, index, and limit values at any time + * within the range 0..length=rep->length(). + * The length field will be ignored. + * + * The Replaceable pointer rep is set into UCharIterator.context without copying + * or cloning/reallocating the Replaceable object. + * + * getState() simply returns the current index. + * move() will always return the final index. + * + * @param iter UCharIterator structure to be set for iteration + * @param rep Replaceable to iterate over + * + * @see UCharIterator + * @stable ICU 2.1 + */ +U_CAPI void U_EXPORT2 +uiter_setReplaceable(UCharIterator *iter, const icu::Replaceable *rep); + +#endif + +U_CDECL_END + +#endif diff --git a/thirdparty/icu4c/common/unicode/uldnames.h b/thirdparty/icu4c/common/unicode/uldnames.h new file mode 100644 index 0000000000..47b047ece9 --- /dev/null +++ b/thirdparty/icu4c/common/unicode/uldnames.h @@ -0,0 +1,307 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2010-2016, International Business Machines Corporation and +* others. All Rights Reserved. +******************************************************************************* +*/ + +#ifndef __ULDNAMES_H__ +#define __ULDNAMES_H__ + +/** + * \file + * \brief C API: Provides display names of Locale ids and their components. + */ + +#include "unicode/utypes.h" +#include "unicode/uscript.h" +#include "unicode/udisplaycontext.h" + +#if U_SHOW_CPLUSPLUS_API +#include "unicode/localpointer.h" +#endif // U_SHOW_CPLUSPLUS_API + +/** + * Enum used in LocaleDisplayNames::createInstance. + * @stable ICU 4.4 + */ +typedef enum { + /** + * Use standard names when generating a locale name, + * e.g. en_GB displays as 'English (United Kingdom)'. + * @stable ICU 4.4 + */ + ULDN_STANDARD_NAMES = 0, + /** + * Use dialect names, when generating a locale name, + * e.g. en_GB displays as 'British English'. + * @stable ICU 4.4 + */ + ULDN_DIALECT_NAMES +} UDialectHandling; + +/** + * Opaque C service object type for the locale display names API + * @stable ICU 4.4 + */ +struct ULocaleDisplayNames; + +/** + * C typedef for struct ULocaleDisplayNames. + * @stable ICU 4.4 + */ +typedef struct ULocaleDisplayNames ULocaleDisplayNames; + +#if !UCONFIG_NO_FORMATTING + +/** + * Returns an instance of LocaleDisplayNames that returns names + * formatted for the provided locale, using the provided + * dialectHandling. The usual value for dialectHandling is + * ULOC_STANDARD_NAMES. + * + * @param locale the display locale + * @param dialectHandling how to select names for locales + * @return a ULocaleDisplayNames instance + * @param pErrorCode the status code + * @stable ICU 4.4 + */ +U_CAPI ULocaleDisplayNames * U_EXPORT2 +uldn_open(const char * locale, + UDialectHandling dialectHandling, + UErrorCode *pErrorCode); + +/** + * Closes a ULocaleDisplayNames instance obtained from uldn_open(). + * @param ldn the ULocaleDisplayNames instance to be closed + * @stable ICU 4.4 + */ +U_CAPI void U_EXPORT2 +uldn_close(ULocaleDisplayNames *ldn); + +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN + +/** + * \class LocalULocaleDisplayNamesPointer + * "Smart pointer" class, closes a ULocaleDisplayNames via uldn_close(). + * For most methods see the LocalPointerBase base class. + * + * @see LocalPointerBase + * @see LocalPointer + * @stable ICU 4.4 + */ +U_DEFINE_LOCAL_OPEN_POINTER(LocalULocaleDisplayNamesPointer, ULocaleDisplayNames, uldn_close); + +U_NAMESPACE_END + +#endif + +/* getters for state */ + +/** + * Returns the locale used to determine the display names. This is + * not necessarily the same locale passed to {@link #uldn_open}. + * @param ldn the LocaleDisplayNames instance + * @return the display locale + * @stable ICU 4.4 + */ +U_CAPI const char * U_EXPORT2 +uldn_getLocale(const ULocaleDisplayNames *ldn); + +/** + * Returns the dialect handling used in the display names. + * @param ldn the LocaleDisplayNames instance + * @return the dialect handling enum + * @stable ICU 4.4 + */ +U_CAPI UDialectHandling U_EXPORT2 +uldn_getDialectHandling(const ULocaleDisplayNames *ldn); + +/* names for entire locales */ + +/** + * Returns the display name of the provided locale. + * @param ldn the LocaleDisplayNames instance + * @param locale the locale whose display name to return + * @param result receives the display name + * @param maxResultSize the size of the result buffer + * @param pErrorCode the status code + * @return the actual buffer size needed for the display name. If it's + * greater than maxResultSize, the returned name will be truncated. + * @stable ICU 4.4 + */ +U_CAPI int32_t U_EXPORT2 +uldn_localeDisplayName(const ULocaleDisplayNames *ldn, + const char *locale, + UChar *result, + int32_t maxResultSize, + UErrorCode *pErrorCode); + +/* names for components of a locale */ + +/** + * Returns the display name of the provided language code. + * @param ldn the LocaleDisplayNames instance + * @param lang the language code whose display name to return + * @param result receives the display name + * @param maxResultSize the size of the result buffer + * @param pErrorCode the status code + * @return the actual buffer size needed for the display name. If it's + * greater than maxResultSize, the returned name will be truncated. + * @stable ICU 4.4 + */ +U_CAPI int32_t U_EXPORT2 +uldn_languageDisplayName(const ULocaleDisplayNames *ldn, + const char *lang, + UChar *result, + int32_t maxResultSize, + UErrorCode *pErrorCode); + +/** + * Returns the display name of the provided script. + * @param ldn the LocaleDisplayNames instance + * @param script the script whose display name to return + * @param result receives the display name + * @param maxResultSize the size of the result buffer + * @param pErrorCode the status code + * @return the actual buffer size needed for the display name. If it's + * greater than maxResultSize, the returned name will be truncated. + * @stable ICU 4.4 + */ +U_CAPI int32_t U_EXPORT2 +uldn_scriptDisplayName(const ULocaleDisplayNames *ldn, + const char *script, + UChar *result, + int32_t maxResultSize, + UErrorCode *pErrorCode); + +/** + * Returns the display name of the provided script code. + * @param ldn the LocaleDisplayNames instance + * @param scriptCode the script code whose display name to return + * @param result receives the display name + * @param maxResultSize the size of the result buffer + * @param pErrorCode the status code + * @return the actual buffer size needed for the display name. If it's + * greater than maxResultSize, the returned name will be truncated. + * @stable ICU 4.4 + */ +U_CAPI int32_t U_EXPORT2 +uldn_scriptCodeDisplayName(const ULocaleDisplayNames *ldn, + UScriptCode scriptCode, + UChar *result, + int32_t maxResultSize, + UErrorCode *pErrorCode); + +/** + * Returns the display name of the provided region code. + * @param ldn the LocaleDisplayNames instance + * @param region the region code whose display name to return + * @param result receives the display name + * @param maxResultSize the size of the result buffer + * @param pErrorCode the status code + * @return the actual buffer size needed for the display name. If it's + * greater than maxResultSize, the returned name will be truncated. + * @stable ICU 4.4 + */ +U_CAPI int32_t U_EXPORT2 +uldn_regionDisplayName(const ULocaleDisplayNames *ldn, + const char *region, + UChar *result, + int32_t maxResultSize, + UErrorCode *pErrorCode); + +/** + * Returns the display name of the provided variant + * @param ldn the LocaleDisplayNames instance + * @param variant the variant whose display name to return + * @param result receives the display name + * @param maxResultSize the size of the result buffer + * @param pErrorCode the status code + * @return the actual buffer size needed for the display name. If it's + * greater than maxResultSize, the returned name will be truncated. + * @stable ICU 4.4 + */ +U_CAPI int32_t U_EXPORT2 +uldn_variantDisplayName(const ULocaleDisplayNames *ldn, + const char *variant, + UChar *result, + int32_t maxResultSize, + UErrorCode *pErrorCode); + +/** + * Returns the display name of the provided locale key + * @param ldn the LocaleDisplayNames instance + * @param key the locale key whose display name to return + * @param result receives the display name + * @param maxResultSize the size of the result buffer + * @param pErrorCode the status code + * @return the actual buffer size needed for the display name. If it's + * greater than maxResultSize, the returned name will be truncated. + * @stable ICU 4.4 + */ +U_CAPI int32_t U_EXPORT2 +uldn_keyDisplayName(const ULocaleDisplayNames *ldn, + const char *key, + UChar *result, + int32_t maxResultSize, + UErrorCode *pErrorCode); + +/** + * Returns the display name of the provided value (used with the provided key). + * @param ldn the LocaleDisplayNames instance + * @param key the locale key + * @param value the locale key's value + * @param result receives the display name + * @param maxResultSize the size of the result buffer + * @param pErrorCode the status code + * @return the actual buffer size needed for the display name. If it's + * greater than maxResultSize, the returned name will be truncated. + * @stable ICU 4.4 + */ +U_CAPI int32_t U_EXPORT2 +uldn_keyValueDisplayName(const ULocaleDisplayNames *ldn, + const char *key, + const char *value, + UChar *result, + int32_t maxResultSize, + UErrorCode *pErrorCode); + +/** +* Returns an instance of LocaleDisplayNames that returns names formatted +* for the provided locale, using the provided UDisplayContext settings. +* +* @param locale The display locale +* @param contexts List of one or more context settings (e.g. for dialect +* handling, capitalization, etc. +* @param length Number of items in the contexts list +* @param pErrorCode Pointer to UErrorCode input/output status. If at entry this indicates +* a failure status, the function will do nothing; otherwise this will be +* updated with any new status from the function. +* @return a ULocaleDisplayNames instance +* @stable ICU 51 +*/ +U_CAPI ULocaleDisplayNames * U_EXPORT2 +uldn_openForContext(const char * locale, UDisplayContext *contexts, + int32_t length, UErrorCode *pErrorCode); + +/** +* Returns the UDisplayContext value for the specified UDisplayContextType. +* @param ldn the ULocaleDisplayNames instance +* @param type the UDisplayContextType whose value to return +* @param pErrorCode Pointer to UErrorCode input/output status. If at entry this indicates +* a failure status, the function will do nothing; otherwise this will be +* updated with any new status from the function. +* @return the UDisplayContextValue for the specified type. +* @stable ICU 51 +*/ +U_CAPI UDisplayContext U_EXPORT2 +uldn_getContext(const ULocaleDisplayNames *ldn, UDisplayContextType type, + UErrorCode *pErrorCode); + +#endif /* !UCONFIG_NO_FORMATTING */ +#endif /* __ULDNAMES_H__ */ diff --git a/thirdparty/icu4c/common/unicode/uloc.h b/thirdparty/icu4c/common/unicode/uloc.h new file mode 100644 index 0000000000..3addb847e7 --- /dev/null +++ b/thirdparty/icu4c/common/unicode/uloc.h @@ -0,0 +1,1393 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 1997-2016, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* +* File ULOC.H +* +* Modification History: +* +* Date Name Description +* 04/01/97 aliu Creation. +* 08/22/98 stephen JDK 1.2 sync. +* 12/08/98 rtg New C API for Locale +* 03/30/99 damiba overhaul +* 03/31/99 helena Javadoc for uloc functions. +* 04/15/99 Madhu Updated Javadoc +******************************************************************************** +*/ + +#ifndef ULOC_H +#define ULOC_H + +#include "unicode/utypes.h" +#include "unicode/uenum.h" + +/** + * \file + * \brief C API: Locale + * + * <h2> ULoc C API for Locale </h2> + * A <code>Locale</code> represents a specific geographical, political, + * or cultural region. An operation that requires a <code>Locale</code> to perform + * its task is called <em>locale-sensitive</em> and uses the <code>Locale</code> + * to tailor information for the user. For example, displaying a number + * is a locale-sensitive operation--the number should be formatted + * according to the customs/conventions of the user's native country, + * region, or culture. In the C APIs, a locales is simply a const char string. + * + * <P> + * You create a <code>Locale</code> with one of the three options listed below. + * Each of the component is separated by '_' in the locale string. + * \htmlonly<blockquote>\endhtmlonly + * <pre> + * \code + * newLanguage + * + * newLanguage + newCountry + * + * newLanguage + newCountry + newVariant + * \endcode + * </pre> + * \htmlonly</blockquote>\endhtmlonly + * The first option is a valid <STRONG>ISO + * Language Code.</STRONG> These codes are the lower-case two-letter + * codes as defined by ISO-639. + * You can find a full list of these codes at a number of sites, such as: + * <BR><a href ="http://www.ics.uci.edu/pub/ietf/http/related/iso639.txt"> + * http://www.ics.uci.edu/pub/ietf/http/related/iso639.txt</a> + * + * <P> + * The second option includes an additional <STRONG>ISO Country + * Code.</STRONG> These codes are the upper-case two-letter codes + * as defined by ISO-3166. + * You can find a full list of these codes at a number of sites, such as: + * <BR><a href="http://www.chemie.fu-berlin.de/diverse/doc/ISO_3166.html"> + * http://www.chemie.fu-berlin.de/diverse/doc/ISO_3166.html</a> + * + * <P> + * The third option requires another additional information--the + * <STRONG>Variant.</STRONG> + * The Variant codes are vendor and browser-specific. + * For example, use WIN for Windows, MAC for Macintosh, and POSIX for POSIX. + * Where there are two variants, separate them with an underscore, and + * put the most important one first. For + * example, a Traditional Spanish collation might be referenced, with + * "ES", "ES", "Traditional_WIN". + * + * <P> + * Because a <code>Locale</code> is just an identifier for a region, + * no validity check is performed when you specify a <code>Locale</code>. + * If you want to see whether particular resources are available for the + * <code>Locale</code> you asked for, you must query those resources. For + * example, ask the <code>UNumberFormat</code> for the locales it supports + * using its <code>getAvailable</code> method. + * <BR><STRONG>Note:</STRONG> When you ask for a resource for a particular + * locale, you get back the best available match, not necessarily + * precisely what you asked for. For more information, look at + * <code>UResourceBundle</code>. + * + * <P> + * The <code>Locale</code> provides a number of convenient constants + * that you can use to specify the commonly used + * locales. For example, the following refers to a locale + * for the United States: + * \htmlonly<blockquote>\endhtmlonly + * <pre> + * \code + * ULOC_US + * \endcode + * </pre> + * \htmlonly</blockquote>\endhtmlonly + * + * <P> + * Once you've specified a locale you can query it for information about + * itself. Use <code>uloc_getCountry</code> to get the ISO Country Code and + * <code>uloc_getLanguage</code> to get the ISO Language Code. You can + * use <code>uloc_getDisplayCountry</code> to get the + * name of the country suitable for displaying to the user. Similarly, + * you can use <code>uloc_getDisplayLanguage</code> to get the name of + * the language suitable for displaying to the user. Interestingly, + * the <code>uloc_getDisplayXXX</code> methods are themselves locale-sensitive + * and have two versions: one that uses the default locale and one + * that takes a locale as an argument and displays the name or country in + * a language appropriate to that locale. + * + * <P> + * The ICU provides a number of services that perform locale-sensitive + * operations. For example, the <code>unum_xxx</code> functions format + * numbers, currency, or percentages in a locale-sensitive manner. + * </P> + * \htmlonly<blockquote>\endhtmlonly + * <pre> + * \code + * UErrorCode success = U_ZERO_ERROR; + * UNumberFormat *nf; + * const char* myLocale = "fr_FR"; + * + * nf = unum_open( UNUM_DEFAULT, NULL, success ); + * unum_close(nf); + * nf = unum_open( UNUM_CURRENCY, NULL, success ); + * unum_close(nf); + * nf = unum_open( UNUM_PERCENT, NULL, success ); + * unum_close(nf); + * \endcode + * </pre> + * \htmlonly</blockquote>\endhtmlonly + * Each of these methods has two variants; one with an explicit locale + * and one without; the latter using the default locale. + * \htmlonly<blockquote>\endhtmlonly + * <pre> + * \code + * + * nf = unum_open( UNUM_DEFAULT, myLocale, success ); + * unum_close(nf); + * nf = unum_open( UNUM_CURRENCY, myLocale, success ); + * unum_close(nf); + * nf = unum_open( UNUM_PERCENT, myLocale, success ); + * unum_close(nf); + * \endcode + * </pre> + * \htmlonly</blockquote>\endhtmlonly + * A <code>Locale</code> is the mechanism for identifying the kind of services + * (<code>UNumberFormat</code>) that you would like to get. The locale is + * <STRONG>just</STRONG> a mechanism for identifying these services. + * + * <P> + * Each international service that performs locale-sensitive operations + * allows you + * to get all the available objects of that type. You can sift + * through these objects by language, country, or variant, + * and use the display names to present a menu to the user. + * For example, you can create a menu of all the collation objects + * suitable for a given language. Such classes implement these + * three class methods: + * \htmlonly<blockquote>\endhtmlonly + * <pre> + * \code + * const char* uloc_getAvailable(int32_t index); + * int32_t uloc_countAvailable(); + * int32_t + * uloc_getDisplayName(const char* localeID, + * const char* inLocaleID, + * UChar* result, + * int32_t maxResultSize, + * UErrorCode* err); + * + * \endcode + * </pre> + * \htmlonly</blockquote>\endhtmlonly + * <P> + * Concerning POSIX/RFC1766 Locale IDs, + * the getLanguage/getCountry/getVariant/getName functions do understand + * the POSIX type form of language_COUNTRY.ENCODING\@VARIANT + * and if there is not an ICU-stype variant, uloc_getVariant() for example + * will return the one listed after the \@at sign. As well, the hyphen + * "-" is recognized as a country/variant separator similarly to RFC1766. + * So for example, "en-us" will be interpreted as en_US. + * As a result, uloc_getName() is far from a no-op, and will have the + * effect of converting POSIX/RFC1766 IDs into ICU form, although it does + * NOT map any of the actual codes (i.e. russian->ru) in any way. + * Applications should call uloc_getName() at the point where a locale ID + * is coming from an external source (user entry, OS, web browser) + * and pass the resulting string to other ICU functions. For example, + * don't use de-de\@EURO as an argument to resourcebundle. + * + * @see UResourceBundle + */ + +/** Useful constant for this language. @stable ICU 2.0 */ +#define ULOC_CHINESE "zh" +/** Useful constant for this language. @stable ICU 2.0 */ +#define ULOC_ENGLISH "en" +/** Useful constant for this language. @stable ICU 2.0 */ +#define ULOC_FRENCH "fr" +/** Useful constant for this language. @stable ICU 2.0 */ +#define ULOC_GERMAN "de" +/** Useful constant for this language. @stable ICU 2.0 */ +#define ULOC_ITALIAN "it" +/** Useful constant for this language. @stable ICU 2.0 */ +#define ULOC_JAPANESE "ja" +/** Useful constant for this language. @stable ICU 2.0 */ +#define ULOC_KOREAN "ko" +/** Useful constant for this language. @stable ICU 2.0 */ +#define ULOC_SIMPLIFIED_CHINESE "zh_CN" +/** Useful constant for this language. @stable ICU 2.0 */ +#define ULOC_TRADITIONAL_CHINESE "zh_TW" + +/** Useful constant for this country/region. @stable ICU 2.0 */ +#define ULOC_CANADA "en_CA" +/** Useful constant for this country/region. @stable ICU 2.0 */ +#define ULOC_CANADA_FRENCH "fr_CA" +/** Useful constant for this country/region. @stable ICU 2.0 */ +#define ULOC_CHINA "zh_CN" +/** Useful constant for this country/region. @stable ICU 2.0 */ +#define ULOC_PRC "zh_CN" +/** Useful constant for this country/region. @stable ICU 2.0 */ +#define ULOC_FRANCE "fr_FR" +/** Useful constant for this country/region. @stable ICU 2.0 */ +#define ULOC_GERMANY "de_DE" +/** Useful constant for this country/region. @stable ICU 2.0 */ +#define ULOC_ITALY "it_IT" +/** Useful constant for this country/region. @stable ICU 2.0 */ +#define ULOC_JAPAN "ja_JP" +/** Useful constant for this country/region. @stable ICU 2.0 */ +#define ULOC_KOREA "ko_KR" +/** Useful constant for this country/region. @stable ICU 2.0 */ +#define ULOC_TAIWAN "zh_TW" +/** Useful constant for this country/region. @stable ICU 2.0 */ +#define ULOC_UK "en_GB" +/** Useful constant for this country/region. @stable ICU 2.0 */ +#define ULOC_US "en_US" + +/** + * Useful constant for the maximum size of the language part of a locale ID. + * (including the terminating NULL). + * @stable ICU 2.0 + */ +#define ULOC_LANG_CAPACITY 12 + +/** + * Useful constant for the maximum size of the country part of a locale ID + * (including the terminating NULL). + * @stable ICU 2.0 + */ +#define ULOC_COUNTRY_CAPACITY 4 +/** + * Useful constant for the maximum size of the whole locale ID + * (including the terminating NULL and all keywords). + * @stable ICU 2.0 + */ +#define ULOC_FULLNAME_CAPACITY 157 + +/** + * Useful constant for the maximum size of the script part of a locale ID + * (including the terminating NULL). + * @stable ICU 2.8 + */ +#define ULOC_SCRIPT_CAPACITY 6 + +/** + * Useful constant for the maximum size of keywords in a locale + * @stable ICU 2.8 + */ +#define ULOC_KEYWORDS_CAPACITY 96 + +/** + * Useful constant for the maximum total size of keywords and their values in a locale + * @stable ICU 2.8 + */ +#define ULOC_KEYWORD_AND_VALUES_CAPACITY 100 + +/** + * Invariant character separating keywords from the locale string + * @stable ICU 2.8 + */ +#define ULOC_KEYWORD_SEPARATOR '@' + +/** + * Unicode code point for '@' separating keywords from the locale string. + * @see ULOC_KEYWORD_SEPARATOR + * @stable ICU 4.6 + */ +#define ULOC_KEYWORD_SEPARATOR_UNICODE 0x40 + +/** + * Invariant character for assigning value to a keyword + * @stable ICU 2.8 + */ +#define ULOC_KEYWORD_ASSIGN '=' + +/** + * Unicode code point for '=' for assigning value to a keyword. + * @see ULOC_KEYWORD_ASSIGN + * @stable ICU 4.6 + */ +#define ULOC_KEYWORD_ASSIGN_UNICODE 0x3D + +/** + * Invariant character separating keywords + * @stable ICU 2.8 + */ +#define ULOC_KEYWORD_ITEM_SEPARATOR ';' + +/** + * Unicode code point for ';' separating keywords + * @see ULOC_KEYWORD_ITEM_SEPARATOR + * @stable ICU 4.6 + */ +#define ULOC_KEYWORD_ITEM_SEPARATOR_UNICODE 0x3B + +/** + * Constants for *_getLocale() + * Allow user to select whether she wants information on + * requested, valid or actual locale. + * For example, a collator for "en_US_CALIFORNIA" was + * requested. In the current state of ICU (2.0), + * the requested locale is "en_US_CALIFORNIA", + * the valid locale is "en_US" (most specific locale supported by ICU) + * and the actual locale is "root" (the collation data comes unmodified + * from the UCA) + * The locale is considered supported by ICU if there is a core ICU bundle + * for that locale (although it may be empty). + * @stable ICU 2.1 + */ +typedef enum { + /** This is locale the data actually comes from + * @stable ICU 2.1 + */ + ULOC_ACTUAL_LOCALE = 0, + /** This is the most specific locale supported by ICU + * @stable ICU 2.1 + */ + ULOC_VALID_LOCALE = 1, + +#ifndef U_HIDE_DEPRECATED_API + /** This is the requested locale + * @deprecated ICU 2.8 + */ + ULOC_REQUESTED_LOCALE = 2, + + /** + * One more than the highest normal ULocDataLocaleType value. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + ULOC_DATA_LOCALE_TYPE_LIMIT = 3 +#endif // U_HIDE_DEPRECATED_API +} ULocDataLocaleType; + +#ifndef U_HIDE_SYSTEM_API +/** + * Gets ICU's default locale. + * The returned string is a snapshot in time, and will remain valid + * and unchanged even when uloc_setDefault() is called. + * The returned storage is owned by ICU, and must not be altered or deleted + * by the caller. + * + * @return the ICU default locale + * @system + * @stable ICU 2.0 + */ +U_CAPI const char* U_EXPORT2 +uloc_getDefault(void); + +/** + * Sets ICU's default locale. + * By default (without calling this function), ICU's default locale will be based + * on information obtained from the underlying system environment. + * <p> + * Changes to ICU's default locale do not propagate back to the + * system environment. + * <p> + * Changes to ICU's default locale to not affect any ICU services that + * may already be open based on the previous default locale value. + * + * @param localeID the new ICU default locale. A value of NULL will try to get + * the system's default locale. + * @param status the error information if the setting of default locale fails + * @system + * @stable ICU 2.0 + */ +U_CAPI void U_EXPORT2 +uloc_setDefault(const char* localeID, + UErrorCode* status); +#endif /* U_HIDE_SYSTEM_API */ + +/** + * Gets the language code for the specified locale. + * + * @param localeID the locale to get the ISO language code with + * @param language the language code for localeID + * @param languageCapacity the size of the language buffer to store the + * language code with + * @param err error information if retrieving the language code failed + * @return the actual buffer size needed for the language code. If it's greater + * than languageCapacity, the returned language code will be truncated. + * @stable ICU 2.0 + */ +U_CAPI int32_t U_EXPORT2 +uloc_getLanguage(const char* localeID, + char* language, + int32_t languageCapacity, + UErrorCode* err); + +/** + * Gets the script code for the specified locale. + * + * @param localeID the locale to get the ISO language code with + * @param script the language code for localeID + * @param scriptCapacity the size of the language buffer to store the + * language code with + * @param err error information if retrieving the language code failed + * @return the actual buffer size needed for the language code. If it's greater + * than scriptCapacity, the returned language code will be truncated. + * @stable ICU 2.8 + */ +U_CAPI int32_t U_EXPORT2 +uloc_getScript(const char* localeID, + char* script, + int32_t scriptCapacity, + UErrorCode* err); + +/** + * Gets the country code for the specified locale. + * + * @param localeID the locale to get the country code with + * @param country the country code for localeID + * @param countryCapacity the size of the country buffer to store the + * country code with + * @param err error information if retrieving the country code failed + * @return the actual buffer size needed for the country code. If it's greater + * than countryCapacity, the returned country code will be truncated. + * @stable ICU 2.0 + */ +U_CAPI int32_t U_EXPORT2 +uloc_getCountry(const char* localeID, + char* country, + int32_t countryCapacity, + UErrorCode* err); + +/** + * Gets the variant code for the specified locale. + * + * @param localeID the locale to get the variant code with + * @param variant the variant code for localeID + * @param variantCapacity the size of the variant buffer to store the + * variant code with + * @param err error information if retrieving the variant code failed + * @return the actual buffer size needed for the variant code. If it's greater + * than variantCapacity, the returned variant code will be truncated. + * @stable ICU 2.0 + */ +U_CAPI int32_t U_EXPORT2 +uloc_getVariant(const char* localeID, + char* variant, + int32_t variantCapacity, + UErrorCode* err); + + +/** + * Gets the full name for the specified locale. + * Note: This has the effect of 'canonicalizing' the ICU locale ID to + * a certain extent. Upper and lower case are set as needed. + * It does NOT map aliased names in any way. + * See the top of this header file. + * This API supports preflighting. + * + * @param localeID the locale to get the full name with + * @param name fill in buffer for the name without keywords. + * @param nameCapacity capacity of the fill in buffer. + * @param err error information if retrieving the full name failed + * @return the actual buffer size needed for the full name. If it's greater + * than nameCapacity, the returned full name will be truncated. + * @stable ICU 2.0 + */ +U_CAPI int32_t U_EXPORT2 +uloc_getName(const char* localeID, + char* name, + int32_t nameCapacity, + UErrorCode* err); + +/** + * Gets the full name for the specified locale. + * Note: This has the effect of 'canonicalizing' the string to + * a certain extent. Upper and lower case are set as needed, + * and if the components were in 'POSIX' format they are changed to + * ICU format. It does NOT map aliased names in any way. + * See the top of this header file. + * + * @param localeID the locale to get the full name with + * @param name the full name for localeID + * @param nameCapacity the size of the name buffer to store the + * full name with + * @param err error information if retrieving the full name failed + * @return the actual buffer size needed for the full name. If it's greater + * than nameCapacity, the returned full name will be truncated. + * @stable ICU 2.8 + */ +U_CAPI int32_t U_EXPORT2 +uloc_canonicalize(const char* localeID, + char* name, + int32_t nameCapacity, + UErrorCode* err); + +/** + * Gets the ISO language code for the specified locale. + * + * @param localeID the locale to get the ISO language code with + * @return language the ISO language code for localeID + * @stable ICU 2.0 + */ +U_CAPI const char* U_EXPORT2 +uloc_getISO3Language(const char* localeID); + + +/** + * Gets the ISO country code for the specified locale. + * + * @param localeID the locale to get the ISO country code with + * @return country the ISO country code for localeID + * @stable ICU 2.0 + */ +U_CAPI const char* U_EXPORT2 +uloc_getISO3Country(const char* localeID); + +/** + * Gets the Win32 LCID value for the specified locale. + * If the ICU locale is not recognized by Windows, 0 will be returned. + * + * LCIDs were deprecated with Windows Vista and Microsoft recommends + * that developers use BCP47 style tags instead (uloc_toLanguageTag). + * + * @param localeID the locale to get the Win32 LCID value with + * @return country the Win32 LCID for localeID + * @stable ICU 2.0 + */ +U_CAPI uint32_t U_EXPORT2 +uloc_getLCID(const char* localeID); + +/** + * Gets the language name suitable for display for the specified locale. + * + * @param locale the locale to get the ISO language code with + * @param displayLocale Specifies the locale to be used to display the name. In + * other words, if the locale's language code is "en", passing + * Locale::getFrench() for inLocale would result in "Anglais", + * while passing Locale::getGerman() for inLocale would result + * in "Englisch". + * @param language the displayable language code for localeID + * @param languageCapacity the size of the language buffer to store the + * displayable language code with. + * @param status error information if retrieving the displayable language code + * failed. U_USING_DEFAULT_WARNING indicates that no data was + * found from the locale resources and a case canonicalized + * language code is placed into language as fallback. + * @return the actual buffer size needed for the displayable language code. If + * it's greater than languageCapacity, the returned language + * code will be truncated. + * @stable ICU 2.0 + */ +U_CAPI int32_t U_EXPORT2 +uloc_getDisplayLanguage(const char* locale, + const char* displayLocale, + UChar* language, + int32_t languageCapacity, + UErrorCode* status); + +/** + * Gets the script name suitable for display for the specified locale. + * + * @param locale the locale to get the displayable script code with. NULL may be + * used to specify the default. + * @param displayLocale Specifies the locale to be used to display the name. In + * other words, if the locale's language code is "en", passing + * Locale::getFrench() for inLocale would result in "", while + * passing Locale::getGerman() for inLocale would result in "". + * NULL may be used to specify the default. + * @param script the displayable script for the localeID. + * @param scriptCapacity the size of the script buffer to store the displayable + * script code with. + * @param status error information if retrieving the displayable script code + * failed. U_USING_DEFAULT_WARNING indicates that no data was + * found from the locale resources and a case canonicalized + * script code is placed into script as fallback. + * @return the actual buffer size needed for the displayable script code. If + * it's greater than scriptCapacity, the returned displayable + * script code will be truncated. + * @stable ICU 2.8 + */ +U_CAPI int32_t U_EXPORT2 +uloc_getDisplayScript(const char* locale, + const char* displayLocale, + UChar* script, + int32_t scriptCapacity, + UErrorCode* status); + +/** + * Gets the country name suitable for display for the specified locale. + * Warning: this is for the region part of a valid locale ID; it cannot just be + * the region code (like "FR"). To get the display name for a region alone, or + * for other options, use ULocaleDisplayNames instead. + * + * @param locale the locale to get the displayable country code with. NULL may + * be used to specify the default. + * @param displayLocale Specifies the locale to be used to display the name. In + * other words, if the locale's language code is "en", passing + * Locale::getFrench() for inLocale would result in "Anglais", + * while passing Locale::getGerman() for inLocale would result + * in "Englisch". NULL may be used to specify the default. + * @param country the displayable country code for localeID. + * @param countryCapacity the size of the country buffer to store the + * displayable country code with. + * @param status error information if retrieving the displayable country code + * failed. U_USING_DEFAULT_WARNING indicates that no data was + * found from the locale resources and a case canonicalized + * country code is placed into country as fallback. + * @return the actual buffer size needed for the displayable country code. If + * it's greater than countryCapacity, the returned displayable + * country code will be truncated. + * @stable ICU 2.0 + */ +U_CAPI int32_t U_EXPORT2 +uloc_getDisplayCountry(const char* locale, + const char* displayLocale, + UChar* country, + int32_t countryCapacity, + UErrorCode* status); + + +/** + * Gets the variant name suitable for display for the specified locale. + * + * @param locale the locale to get the displayable variant code with. NULL may + * be used to specify the default. + * @param displayLocale Specifies the locale to be used to display the name. In + * other words, if the locale's language code is "en", passing + * Locale::getFrench() for inLocale would result in "Anglais", + * while passing Locale::getGerman() for inLocale would result + * in "Englisch". NULL may be used to specify the default. + * @param variant the displayable variant code for localeID. + * @param variantCapacity the size of the variant buffer to store the + * displayable variant code with. + * @param status error information if retrieving the displayable variant code + * failed. U_USING_DEFAULT_WARNING indicates that no data was + * found from the locale resources and a case canonicalized + * variant code is placed into variant as fallback. + * @return the actual buffer size needed for the displayable variant code. If + * it's greater than variantCapacity, the returned displayable + * variant code will be truncated. + * @stable ICU 2.0 + */ +U_CAPI int32_t U_EXPORT2 +uloc_getDisplayVariant(const char* locale, + const char* displayLocale, + UChar* variant, + int32_t variantCapacity, + UErrorCode* status); + +/** + * Gets the keyword name suitable for display for the specified locale. E.g: + * for the locale string de_DE\@collation=PHONEBOOK, this API gets the display + * string for the keyword collation. + * Usage: + * <code> + * UErrorCode status = U_ZERO_ERROR; + * const char* keyword =NULL; + * int32_t keywordLen = 0; + * int32_t keywordCount = 0; + * UChar displayKeyword[256]; + * int32_t displayKeywordLen = 0; + * UEnumeration* keywordEnum = uloc_openKeywords("de_DE@collation=PHONEBOOK;calendar=TRADITIONAL", &status); + * for(keywordCount = uenum_count(keywordEnum, &status); keywordCount > 0 ; keywordCount--){ + * if(U_FAILURE(status)){ + * ...something went wrong so handle the error... + * break; + * } + * // the uenum_next returns NUL terminated string + * keyword = uenum_next(keywordEnum, &keywordLen, &status); + * displayKeywordLen = uloc_getDisplayKeyword(keyword, "en_US", displayKeyword, 256); + * ... do something interesting ..... + * } + * uenum_close(keywordEnum); + * </code> + * @param keyword The keyword whose display string needs to be returned. + * @param displayLocale Specifies the locale to be used to display the name. In other words, + * if the locale's language code is "en", passing Locale::getFrench() for + * inLocale would result in "Anglais", while passing Locale::getGerman() + * for inLocale would result in "Englisch". NULL may be used to specify the default. + * @param dest the buffer to which the displayable keyword should be written. + * @param destCapacity The size of the buffer (number of UChars). If it is 0, then + * dest may be NULL and the function will only return the length of the + * result without writing any of the result string (pre-flighting). + * @param status error information if retrieving the displayable string failed. + * Should not be NULL and should not indicate failure on entry. + * U_USING_DEFAULT_WARNING indicates that no data was found from the locale + * resources and the keyword is placed into dest as fallback. + * @return the actual buffer size needed for the displayable variant code. + * @see #uloc_openKeywords + * @stable ICU 2.8 + */ +U_CAPI int32_t U_EXPORT2 +uloc_getDisplayKeyword(const char* keyword, + const char* displayLocale, + UChar* dest, + int32_t destCapacity, + UErrorCode* status); +/** + * Gets the value of the keyword suitable for display for the specified locale. + * E.g: for the locale string de_DE\@collation=PHONEBOOK, this API gets the display + * string for PHONEBOOK, in the display locale, when "collation" is specified as the keyword. + * + * @param locale The locale to get the displayable variant code with. NULL may be used to specify the default. + * @param keyword The keyword for whose value should be used. + * @param displayLocale Specifies the locale to be used to display the name. In other words, + * if the locale's language code is "en", passing Locale::getFrench() for + * inLocale would result in "Anglais", while passing Locale::getGerman() + * for inLocale would result in "Englisch". NULL may be used to specify the default. + * @param dest the buffer to which the displayable keyword should be written. + * @param destCapacity The size of the buffer (number of UChars). If it is 0, then + * dest may be NULL and the function will only return the length of the + * result without writing any of the result string (pre-flighting). + * @param status error information if retrieving the displayable string failed. + * Should not be NULL and must not indicate failure on entry. + * U_USING_DEFAULT_WARNING indicates that no data was found from the locale + * resources and the value of the keyword is placed into dest as fallback. + * @return the actual buffer size needed for the displayable variant code. + * @stable ICU 2.8 + */ +U_CAPI int32_t U_EXPORT2 +uloc_getDisplayKeywordValue( const char* locale, + const char* keyword, + const char* displayLocale, + UChar* dest, + int32_t destCapacity, + UErrorCode* status); +/** + * Gets the full name suitable for display for the specified locale. + * + * @param localeID the locale to get the displayable name with. NULL may be used to specify the default. + * @param inLocaleID Specifies the locale to be used to display the name. In other words, + * if the locale's language code is "en", passing Locale::getFrench() for + * inLocale would result in "Anglais", while passing Locale::getGerman() + * for inLocale would result in "Englisch". NULL may be used to specify the default. + * @param result the displayable name for localeID + * @param maxResultSize the size of the name buffer to store the + * displayable full name with + * @param err error information if retrieving the displayable name failed + * @return the actual buffer size needed for the displayable name. If it's greater + * than maxResultSize, the returned displayable name will be truncated. + * @stable ICU 2.0 + */ +U_CAPI int32_t U_EXPORT2 +uloc_getDisplayName(const char* localeID, + const char* inLocaleID, + UChar* result, + int32_t maxResultSize, + UErrorCode* err); + + +/** + * Gets the specified locale from a list of available locales. + * + * This method corresponds to uloc_openAvailableByType called with the + * ULOC_AVAILABLE_DEFAULT type argument. + * + * The return value is a pointer to an item of a locale name array. Both this + * array and the pointers it contains are owned by ICU and should not be + * deleted or written through by the caller. The locale name is terminated by + * a null pointer. + * + * @param n the specific locale name index of the available locale list; + * should not exceed the number returned by uloc_countAvailable. + * @return a specified locale name of all available locales + * @stable ICU 2.0 + */ +U_CAPI const char* U_EXPORT2 +uloc_getAvailable(int32_t n); + +/** + * Gets the size of the all available locale list. + * + * @return the size of the locale list + * @stable ICU 2.0 + */ +U_CAPI int32_t U_EXPORT2 uloc_countAvailable(void); + +/** + * Types for uloc_getAvailableByType and uloc_countAvailableByType. + * + * @stable ICU 65 + */ +typedef enum ULocAvailableType { + /** + * Locales that return data when passed to ICU APIs, + * but not including legacy or alias locales. + * + * @stable ICU 65 + */ + ULOC_AVAILABLE_DEFAULT, + + /** + * Legacy or alias locales that return data when passed to ICU APIs. + * Examples of supported legacy or alias locales: + * + * - iw (alias to he) + * - mo (alias to ro) + * - zh_CN (alias to zh_Hans_CN) + * - sr_BA (alias to sr_Cyrl_BA) + * - ars (alias to ar_SA) + * + * The locales in this set are disjoint from the ones in + * ULOC_AVAILABLE_DEFAULT. To get both sets at the same time, use + * ULOC_AVAILABLE_WITH_LEGACY_ALIASES. + * + * @stable ICU 65 + */ + ULOC_AVAILABLE_ONLY_LEGACY_ALIASES, + + /** + * The union of the locales in ULOC_AVAILABLE_DEFAULT and + * ULOC_AVAILABLE_ONLY_LEGACY_ALIAS. + * + * @stable ICU 65 + */ + ULOC_AVAILABLE_WITH_LEGACY_ALIASES, + +#ifndef U_HIDE_INTERNAL_API + /** + * @internal + */ + ULOC_AVAILABLE_COUNT +#endif +} ULocAvailableType; + +/** + * Gets a list of available locales according to the type argument, allowing + * the user to access different sets of supported locales in ICU. + * + * The returned UEnumeration must be closed by the caller. + * + * @param type Type choice from ULocAvailableType. + * @param status Set if an error occurred. + * @return a UEnumeration owned by the caller, or nullptr on failure. + * @stable ICU 65 + */ +U_CAPI UEnumeration* U_EXPORT2 +uloc_openAvailableByType(ULocAvailableType type, UErrorCode* status); + +/** + * + * Gets a list of all available 2-letter language codes defined in ISO 639, + * plus additional 3-letter codes determined to be useful for locale generation as + * defined by Unicode CLDR. This is a pointer + * to an array of pointers to arrays of char. All of these pointers are owned + * by ICU-- do not delete them, and do not write through them. The array is + * terminated with a null pointer. + * @return a list of all available language codes + * @stable ICU 2.0 + */ +U_CAPI const char* const* U_EXPORT2 +uloc_getISOLanguages(void); + +/** + * + * Gets a list of all available 2-letter country codes defined in ISO 639. This is a + * pointer to an array of pointers to arrays of char. All of these pointers are + * owned by ICU-- do not delete them, and do not write through them. The array is + * terminated with a null pointer. + * @return a list of all available country codes + * @stable ICU 2.0 + */ +U_CAPI const char* const* U_EXPORT2 +uloc_getISOCountries(void); + +/** + * Truncate the locale ID string to get the parent locale ID. + * Copies the part of the string before the last underscore. + * The parent locale ID will be an empty string if there is no + * underscore, or if there is only one underscore at localeID[0]. + * + * @param localeID Input locale ID string. + * @param parent Output string buffer for the parent locale ID. + * @param parentCapacity Size of the output buffer. + * @param err A UErrorCode value. + * @return The length of the parent locale ID. + * @stable ICU 2.0 + */ +U_CAPI int32_t U_EXPORT2 +uloc_getParent(const char* localeID, + char* parent, + int32_t parentCapacity, + UErrorCode* err); + + + + +/** + * Gets the full name for the specified locale, like uloc_getName(), + * but without keywords. + * + * Note: This has the effect of 'canonicalizing' the string to + * a certain extent. Upper and lower case are set as needed, + * and if the components were in 'POSIX' format they are changed to + * ICU format. It does NOT map aliased names in any way. + * See the top of this header file. + * + * This API strips off the keyword part, so "de_DE\@collation=phonebook" + * will become "de_DE". + * This API supports preflighting. + * + * @param localeID the locale to get the full name with + * @param name fill in buffer for the name without keywords. + * @param nameCapacity capacity of the fill in buffer. + * @param err error information if retrieving the full name failed + * @return the actual buffer size needed for the full name. If it's greater + * than nameCapacity, the returned full name will be truncated. + * @stable ICU 2.8 + */ +U_CAPI int32_t U_EXPORT2 +uloc_getBaseName(const char* localeID, + char* name, + int32_t nameCapacity, + UErrorCode* err); + +/** + * Gets an enumeration of keywords for the specified locale. Enumeration + * must get disposed of by the client using uenum_close function. + * + * @param localeID the locale to get the variant code with + * @param status error information if retrieving the keywords failed + * @return enumeration of keywords or NULL if there are no keywords. + * @stable ICU 2.8 + */ +U_CAPI UEnumeration* U_EXPORT2 +uloc_openKeywords(const char* localeID, + UErrorCode* status); + +/** + * Get the value for a keyword. Locale name does not need to be normalized. + * + * @param localeID locale name containing the keyword ("de_DE@currency=EURO;collation=PHONEBOOK") + * @param keywordName name of the keyword for which we want the value; must not be + * NULL or empty, and must consist only of [A-Za-z0-9]. Case insensitive. + * @param buffer receiving buffer + * @param bufferCapacity capacity of receiving buffer + * @param status containing error code: e.g. buffer not big enough or ill-formed localeID + * or keywordName parameters. + * @return the length of keyword value + * @stable ICU 2.8 + */ +U_CAPI int32_t U_EXPORT2 +uloc_getKeywordValue(const char* localeID, + const char* keywordName, + char* buffer, int32_t bufferCapacity, + UErrorCode* status); + + +/** + * Sets or removes the value of the specified keyword. + * + * For removing all keywords, use uloc_getBaseName(). + * + * NOTE: Unlike almost every other ICU function which takes a + * buffer, this function will NOT truncate the output text, and will + * not update the buffer with unterminated text setting a status of + * U_STRING_NOT_TERMINATED_WARNING. If a BUFFER_OVERFLOW_ERROR is received, + * it means a terminated version of the updated locale ID would not fit + * in the buffer, and the original buffer is untouched. This is done to + * prevent incorrect or possibly even malformed locales from being generated + * and used. + * + * @param keywordName name of the keyword to be set; must not be + * NULL or empty, and must consist only of [A-Za-z0-9]. Case insensitive. + * @param keywordValue value of the keyword to be set. If 0-length or + * NULL, will result in the keyword being removed; no error is given if + * that keyword does not exist. Otherwise, must consist only of + * [A-Za-z0-9] and [/_+-]. + * @param buffer input buffer containing well-formed locale ID to be + * modified. + * @param bufferCapacity capacity of receiving buffer + * @param status containing error code: e.g. buffer not big enough + * or ill-formed keywordName or keywordValue parameters, or ill-formed + * locale ID in buffer on input. + * @return the length needed for the buffer + * @see uloc_getKeywordValue + * @stable ICU 3.2 + */ +U_CAPI int32_t U_EXPORT2 +uloc_setKeywordValue(const char* keywordName, + const char* keywordValue, + char* buffer, int32_t bufferCapacity, + UErrorCode* status); + +/** + * Returns whether the locale's script is written right-to-left. + * If there is no script subtag, then the likely script is used, see uloc_addLikelySubtags(). + * If no likely script is known, then false is returned. + * + * A script is right-to-left according to the CLDR script metadata + * which corresponds to whether the script's letters have Bidi_Class=R or AL. + * + * Returns true for "ar" and "en-Hebr", false for "zh" and "fa-Cyrl". + * + * @param locale input locale ID + * @return true if the locale's script is written right-to-left + * @stable ICU 54 + */ +U_CAPI UBool U_EXPORT2 +uloc_isRightToLeft(const char *locale); + +/** + * enums for the return value for the character and line orientation + * functions. + * @stable ICU 4.0 + */ +typedef enum { + ULOC_LAYOUT_LTR = 0, /* left-to-right. */ + ULOC_LAYOUT_RTL = 1, /* right-to-left. */ + ULOC_LAYOUT_TTB = 2, /* top-to-bottom. */ + ULOC_LAYOUT_BTT = 3, /* bottom-to-top. */ + ULOC_LAYOUT_UNKNOWN +} ULayoutType; + +/** + * Get the layout character orientation for the specified locale. + * + * @param localeId locale name + * @param status Error status + * @return an enum indicating the layout orientation for characters. + * @stable ICU 4.0 + */ +U_CAPI ULayoutType U_EXPORT2 +uloc_getCharacterOrientation(const char* localeId, + UErrorCode *status); + +/** + * Get the layout line orientation for the specified locale. + * + * @param localeId locale name + * @param status Error status + * @return an enum indicating the layout orientation for lines. + * @stable ICU 4.0 + */ +U_CAPI ULayoutType U_EXPORT2 +uloc_getLineOrientation(const char* localeId, + UErrorCode *status); + +/** + * Output values which uloc_acceptLanguage() writes to the 'outResult' parameter. + * + * @see uloc_acceptLanguageFromHTTP + * @see uloc_acceptLanguage + * @stable ICU 3.2 + */ +typedef enum { + /** + * No exact match was found. + * @stable ICU 3.2 + */ + ULOC_ACCEPT_FAILED = 0, + /** + * An exact match was found. + * @stable ICU 3.2 + */ + ULOC_ACCEPT_VALID = 1, + /** + * A fallback was found. For example, the Accept-Language list includes 'ja_JP' + * and is matched with available locale 'ja'. + * @stable ICU 3.2 + */ + ULOC_ACCEPT_FALLBACK = 2 /* */ +} UAcceptResult; + +/** + * Based on a HTTP header from a web browser and a list of available locales, + * determine an acceptable locale for the user. + * + * This is a thin wrapper over C++ class LocaleMatcher. + * + * @param result - buffer to accept the result locale + * @param resultAvailable the size of the result buffer. + * @param outResult - An out parameter that contains the fallback status + * @param httpAcceptLanguage - "Accept-Language:" header as per HTTP. + * @param availableLocales - list of available locales to match + * @param status ICU error code. Its input value must pass the U_SUCCESS() test, + * or else the function returns immediately. Check for U_FAILURE() + * on output or use with function chaining. (See User Guide for details.) + * @return length needed for the locale. + * @stable ICU 3.2 + */ +U_CAPI int32_t U_EXPORT2 +uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable, + UAcceptResult *outResult, + const char *httpAcceptLanguage, + UEnumeration* availableLocales, + UErrorCode *status); + +/** + * Based on a list of available locales, + * determine an acceptable locale for the user. + * + * This is a thin wrapper over C++ class LocaleMatcher. + * + * @param result - buffer to accept the result locale + * @param resultAvailable the size of the result buffer. + * @param outResult - An out parameter that contains the fallback status + * @param acceptList - list of acceptable languages + * @param acceptListCount - count of acceptList items + * @param availableLocales - list of available locales to match + * @param status ICU error code. Its input value must pass the U_SUCCESS() test, + * or else the function returns immediately. Check for U_FAILURE() + * on output or use with function chaining. (See User Guide for details.) + * @return length needed for the locale. + * @stable ICU 3.2 + */ +U_CAPI int32_t U_EXPORT2 +uloc_acceptLanguage(char *result, int32_t resultAvailable, + UAcceptResult *outResult, const char **acceptList, + int32_t acceptListCount, + UEnumeration* availableLocales, + UErrorCode *status); + + +/** + * Gets the ICU locale ID for the specified Win32 LCID value. + * + * @param hostID the Win32 LCID to translate + * @param locale the output buffer for the ICU locale ID, which will be NUL-terminated + * if there is room. + * @param localeCapacity the size of the output buffer + * @param status an error is returned if the LCID is unrecognized or the output buffer + * is too small + * @return actual the actual size of the locale ID, not including NUL-termination + * @stable ICU 3.8 + */ +U_CAPI int32_t U_EXPORT2 +uloc_getLocaleForLCID(uint32_t hostID, char *locale, int32_t localeCapacity, + UErrorCode *status); + + +/** + * Add the likely subtags for a provided locale ID, per the algorithm described + * in the following CLDR technical report: + * + * http://www.unicode.org/reports/tr35/#Likely_Subtags + * + * If localeID is already in the maximal form, or there is no data available + * for maximization, it will be copied to the output buffer. For example, + * "und-Zzzz" cannot be maximized, since there is no reasonable maximization. + * + * Examples: + * + * "en" maximizes to "en_Latn_US" + * + * "de" maximizes to "de_Latn_US" + * + * "sr" maximizes to "sr_Cyrl_RS" + * + * "sh" maximizes to "sr_Latn_RS" (Note this will not reverse.) + * + * "zh_Hani" maximizes to "zh_Hans_CN" (Note this will not reverse.) + * + * @param localeID The locale to maximize + * @param maximizedLocaleID The maximized locale + * @param maximizedLocaleIDCapacity The capacity of the maximizedLocaleID buffer + * @param err Error information if maximizing the locale failed. If the length + * of the localeID and the null-terminator is greater than the maximum allowed size, + * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR. + * @return The actual buffer size needed for the maximized locale. If it's + * greater than maximizedLocaleIDCapacity, the returned ID will be truncated. + * On error, the return value is -1. + * @stable ICU 4.0 + */ +U_CAPI int32_t U_EXPORT2 +uloc_addLikelySubtags(const char* localeID, + char* maximizedLocaleID, + int32_t maximizedLocaleIDCapacity, + UErrorCode* err); + + +/** + * Minimize the subtags for a provided locale ID, per the algorithm described + * in the following CLDR technical report: + * + * http://www.unicode.org/reports/tr35/#Likely_Subtags + * + * If localeID is already in the minimal form, or there is no data available + * for minimization, it will be copied to the output buffer. Since the + * minimization algorithm relies on proper maximization, see the comments + * for uloc_addLikelySubtags for reasons why there might not be any data. + * + * Examples: + * + * "en_Latn_US" minimizes to "en" + * + * "de_Latn_US" minimizes to "de" + * + * "sr_Cyrl_RS" minimizes to "sr" + * + * "zh_Hant_TW" minimizes to "zh_TW" (The region is preferred to the + * script, and minimizing to "zh" would imply "zh_Hans_CN".) + * + * @param localeID The locale to minimize + * @param minimizedLocaleID The minimized locale + * @param minimizedLocaleIDCapacity The capacity of the minimizedLocaleID buffer + * @param err Error information if minimizing the locale failed. If the length + * of the localeID and the null-terminator is greater than the maximum allowed size, + * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR. + * @return The actual buffer size needed for the minimized locale. If it's + * greater than minimizedLocaleIDCapacity, the returned ID will be truncated. + * On error, the return value is -1. + * @stable ICU 4.0 + */ +U_CAPI int32_t U_EXPORT2 +uloc_minimizeSubtags(const char* localeID, + char* minimizedLocaleID, + int32_t minimizedLocaleIDCapacity, + UErrorCode* err); + +/** + * Returns a locale ID for the specified BCP47 language tag string. + * If the specified language tag contains any ill-formed subtags, + * the first such subtag and all following subtags are ignored. + * <p> + * This implements the 'Language-Tag' production of BCP 47, and so + * supports legacy language tags (marked as “Type: grandfathered” in BCP 47) + * (regular and irregular) as well as private use language tags. + * + * Private use tags are represented as 'x-whatever', + * and legacy tags are converted to their canonical replacements where they exist. + * + * Note that a few legacy tags have no modern replacement; + * these will be converted using the fallback described in + * the first paragraph, so some information might be lost. + * + * @param langtag the input BCP47 language tag. + * @param localeID the output buffer receiving a locale ID for the + * specified BCP47 language tag. + * @param localeIDCapacity the size of the locale ID output buffer. + * @param parsedLength if not NULL, successfully parsed length + * for the input language tag is set. + * @param err error information if receiving the locald ID + * failed. + * @return the length of the locale ID. + * @stable ICU 4.2 + */ +U_CAPI int32_t U_EXPORT2 +uloc_forLanguageTag(const char* langtag, + char* localeID, + int32_t localeIDCapacity, + int32_t* parsedLength, + UErrorCode* err); + +/** + * Returns a well-formed language tag for this locale ID. + * <p> + * <b>Note</b>: When <code>strict</code> is false, any locale + * fields which do not satisfy the BCP47 syntax requirement will + * be omitted from the result. When <code>strict</code> is + * true, this function sets U_ILLEGAL_ARGUMENT_ERROR to the + * <code>err</code> if any locale fields do not satisfy the + * BCP47 syntax requirement. + * @param localeID the input locale ID + * @param langtag the output buffer receiving BCP47 language + * tag for the locale ID. + * @param langtagCapacity the size of the BCP47 language tag + * output buffer. + * @param strict boolean value indicating if the function returns + * an error for an ill-formed input locale ID. + * @param err error information if receiving the language + * tag failed. + * @return The length of the BCP47 language tag. + * @stable ICU 4.2 + */ +U_CAPI int32_t U_EXPORT2 +uloc_toLanguageTag(const char* localeID, + char* langtag, + int32_t langtagCapacity, + UBool strict, + UErrorCode* err); + +/** + * Converts the specified keyword (legacy key, or BCP 47 Unicode locale + * extension key) to the equivalent BCP 47 Unicode locale extension key. + * For example, BCP 47 Unicode locale extension key "co" is returned for + * the input keyword "collation". + * <p> + * When the specified keyword is unknown, but satisfies the BCP syntax, + * then the pointer to the input keyword itself will be returned. + * For example, + * <code>uloc_toUnicodeLocaleKey("ZZ")</code> returns "ZZ". + * + * @param keyword the input locale keyword (either legacy key + * such as "collation" or BCP 47 Unicode locale extension + * key such as "co"). + * @return the well-formed BCP 47 Unicode locale extension key, + * or NULL if the specified locale keyword cannot be + * mapped to a well-formed BCP 47 Unicode locale extension + * key. + * @see uloc_toLegacyKey + * @stable ICU 54 + */ +U_CAPI const char* U_EXPORT2 +uloc_toUnicodeLocaleKey(const char* keyword); + +/** + * Converts the specified keyword value (legacy type, or BCP 47 + * Unicode locale extension type) to the well-formed BCP 47 Unicode locale + * extension type for the specified keyword (category). For example, BCP 47 + * Unicode locale extension type "phonebk" is returned for the input + * keyword value "phonebook", with the keyword "collation" (or "co"). + * <p> + * When the specified keyword is not recognized, but the specified value + * satisfies the syntax of the BCP 47 Unicode locale extension type, + * or when the specified keyword allows 'variable' type and the specified + * value satisfies the syntax, then the pointer to the input type value itself + * will be returned. + * For example, + * <code>uloc_toUnicodeLocaleType("Foo", "Bar")</code> returns "Bar", + * <code>uloc_toUnicodeLocaleType("variableTop", "00A4")</code> returns "00A4". + * + * @param keyword the locale keyword (either legacy key such as + * "collation" or BCP 47 Unicode locale extension + * key such as "co"). + * @param value the locale keyword value (either legacy type + * such as "phonebook" or BCP 47 Unicode locale extension + * type such as "phonebk"). + * @return the well-formed BCP47 Unicode locale extension type, + * or NULL if the locale keyword value cannot be mapped to + * a well-formed BCP 47 Unicode locale extension type. + * @see uloc_toLegacyType + * @stable ICU 54 + */ +U_CAPI const char* U_EXPORT2 +uloc_toUnicodeLocaleType(const char* keyword, const char* value); + +/** + * Converts the specified keyword (BCP 47 Unicode locale extension key, or + * legacy key) to the legacy key. For example, legacy key "collation" is + * returned for the input BCP 47 Unicode locale extension key "co". + * + * @param keyword the input locale keyword (either BCP 47 Unicode locale + * extension key or legacy key). + * @return the well-formed legacy key, or NULL if the specified + * keyword cannot be mapped to a well-formed legacy key. + * @see toUnicodeLocaleKey + * @stable ICU 54 + */ +U_CAPI const char* U_EXPORT2 +uloc_toLegacyKey(const char* keyword); + +/** + * Converts the specified keyword value (BCP 47 Unicode locale extension type, + * or legacy type or type alias) to the canonical legacy type. For example, + * the legacy type "phonebook" is returned for the input BCP 47 Unicode + * locale extension type "phonebk" with the keyword "collation" (or "co"). + * <p> + * When the specified keyword is not recognized, but the specified value + * satisfies the syntax of legacy key, or when the specified keyword + * allows 'variable' type and the specified value satisfies the syntax, + * then the pointer to the input type value itself will be returned. + * For example, + * <code>uloc_toLegacyType("Foo", "Bar")</code> returns "Bar", + * <code>uloc_toLegacyType("vt", "00A4")</code> returns "00A4". + * + * @param keyword the locale keyword (either legacy keyword such as + * "collation" or BCP 47 Unicode locale extension + * key such as "co"). + * @param value the locale keyword value (either BCP 47 Unicode locale + * extension type such as "phonebk" or legacy keyword value + * such as "phonebook"). + * @return the well-formed legacy type, or NULL if the specified + * keyword value cannot be mapped to a well-formed legacy + * type. + * @see toUnicodeLocaleType + * @stable ICU 54 + */ +U_CAPI const char* U_EXPORT2 +uloc_toLegacyType(const char* keyword, const char* value); + +#endif /*_ULOC*/ diff --git a/thirdparty/icu4c/common/unicode/umachine.h b/thirdparty/icu4c/common/unicode/umachine.h new file mode 100644 index 0000000000..09c887c80e --- /dev/null +++ b/thirdparty/icu4c/common/unicode/umachine.h @@ -0,0 +1,491 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1999-2015, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* file name: umachine.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 1999sep13 +* created by: Markus W. Scherer +* +* This file defines basic types and constants for ICU to be +* platform-independent. umachine.h and utf.h are included into +* utypes.h to provide all the general definitions for ICU. +* All of these definitions used to be in utypes.h before +* the UTF-handling macros made this unmaintainable. +*/ + +#ifndef __UMACHINE_H__ +#define __UMACHINE_H__ + + +/** + * \file + * \brief Basic types and constants for UTF + * + * <h2> Basic types and constants for UTF </h2> + * This file defines basic types and constants for utf.h to be + * platform-independent. umachine.h and utf.h are included into + * utypes.h to provide all the general definitions for ICU. + * All of these definitions used to be in utypes.h before + * the UTF-handling macros made this unmaintainable. + * + */ +/*==========================================================================*/ +/* Include platform-dependent definitions */ +/* which are contained in the platform-specific file platform.h */ +/*==========================================================================*/ + +#include "unicode/ptypes.h" /* platform.h is included in ptypes.h */ + +/* + * ANSI C headers: + * stddef.h defines wchar_t + */ +#include <stdbool.h> +#include <stddef.h> + +/*==========================================================================*/ +/* For C wrappers, we use the symbol U_CAPI. */ +/* This works properly if the includer is C or C++. */ +/* Functions are declared U_CAPI return-type U_EXPORT2 function-name()... */ +/*==========================================================================*/ + +/** + * \def U_CFUNC + * This is used in a declaration of a library private ICU C function. + * @stable ICU 2.4 + */ + +/** + * \def U_CDECL_BEGIN + * This is used to begin a declaration of a library private ICU C API. + * @stable ICU 2.4 + */ + +/** + * \def U_CDECL_END + * This is used to end a declaration of a library private ICU C API + * @stable ICU 2.4 + */ + +#ifdef __cplusplus +# define U_CFUNC extern "C" +# define U_CDECL_BEGIN extern "C" { +# define U_CDECL_END } +#else +# define U_CFUNC extern +# define U_CDECL_BEGIN +# define U_CDECL_END +#endif + +#ifndef U_ATTRIBUTE_DEPRECATED +/** + * \def U_ATTRIBUTE_DEPRECATED + * This is used for GCC specific attributes + * @internal + */ +#if U_GCC_MAJOR_MINOR >= 302 +# define U_ATTRIBUTE_DEPRECATED __attribute__ ((deprecated)) +/** + * \def U_ATTRIBUTE_DEPRECATED + * This is used for Visual C++ specific attributes + * @internal + */ +#elif defined(_MSC_VER) && (_MSC_VER >= 1400) +# define U_ATTRIBUTE_DEPRECATED __declspec(deprecated) +#else +# define U_ATTRIBUTE_DEPRECATED +#endif +#endif + +/** This is used to declare a function as a public ICU C API @stable ICU 2.0*/ +#define U_CAPI U_CFUNC U_EXPORT +/** Obsolete/same as U_CAPI; was used to declare a function as a stable public ICU C API*/ +#define U_STABLE U_CAPI +/** Obsolete/same as U_CAPI; was used to declare a function as a draft public ICU C API */ +#define U_DRAFT U_CAPI +/** This is used to declare a function as a deprecated public ICU C API */ +#define U_DEPRECATED U_CAPI U_ATTRIBUTE_DEPRECATED +/** Obsolete/same as U_CAPI; was used to declare a function as an obsolete public ICU C API */ +#define U_OBSOLETE U_CAPI +/** Obsolete/same as U_CAPI; was used to declare a function as an internal ICU C API */ +#define U_INTERNAL U_CAPI + +/** + * \def U_OVERRIDE + * Defined to the C++11 "override" keyword if available. + * Denotes a class or member which is an override of the base class. + * May result in an error if it applied to something not an override. + * @internal + */ +#ifndef U_OVERRIDE +#define U_OVERRIDE override +#endif + +/** + * \def U_FINAL + * Defined to the C++11 "final" keyword if available. + * Denotes a class or member which may not be overridden in subclasses. + * May result in an error if subclasses attempt to override. + * @internal + */ +#if !defined(U_FINAL) || defined(U_IN_DOXYGEN) +#define U_FINAL final +#endif + +// Before ICU 65, function-like, multi-statement ICU macros were just defined as +// series of statements wrapped in { } blocks and the caller could choose to +// either treat them as if they were actual functions and end the invocation +// with a trailing ; creating an empty statement after the block or else omit +// this trailing ; using the knowledge that the macro would expand to { }. +// +// But doing so doesn't work well with macros that look like functions and +// compiler warnings about empty statements (ICU-20601) and ICU 65 therefore +// switches to the standard solution of wrapping such macros in do { } while. +// +// This will however break existing code that depends on being able to invoke +// these macros without a trailing ; so to be able to remain compatible with +// such code the wrapper is itself defined as macros so that it's possible to +// build ICU 65 and later with the old macro behaviour, like this: +// +// export CPPFLAGS='-DUPRV_BLOCK_MACRO_BEGIN="" -DUPRV_BLOCK_MACRO_END=""' +// runConfigureICU ... +// + +/** + * \def UPRV_BLOCK_MACRO_BEGIN + * Defined as the "do" keyword by default. + * @internal + */ +#ifndef UPRV_BLOCK_MACRO_BEGIN +#define UPRV_BLOCK_MACRO_BEGIN do +#endif + +/** + * \def UPRV_BLOCK_MACRO_END + * Defined as "while (false)" by default. + * @internal + */ +#ifndef UPRV_BLOCK_MACRO_END +#define UPRV_BLOCK_MACRO_END while (false) +#endif + +/*==========================================================================*/ +/* limits for int32_t etc., like in POSIX inttypes.h */ +/*==========================================================================*/ + +#ifndef INT8_MIN +/** The smallest value an 8 bit signed integer can hold @stable ICU 2.0 */ +# define INT8_MIN ((int8_t)(-128)) +#endif +#ifndef INT16_MIN +/** The smallest value a 16 bit signed integer can hold @stable ICU 2.0 */ +# define INT16_MIN ((int16_t)(-32767-1)) +#endif +#ifndef INT32_MIN +/** The smallest value a 32 bit signed integer can hold @stable ICU 2.0 */ +# define INT32_MIN ((int32_t)(-2147483647-1)) +#endif + +#ifndef INT8_MAX +/** The largest value an 8 bit signed integer can hold @stable ICU 2.0 */ +# define INT8_MAX ((int8_t)(127)) +#endif +#ifndef INT16_MAX +/** The largest value a 16 bit signed integer can hold @stable ICU 2.0 */ +# define INT16_MAX ((int16_t)(32767)) +#endif +#ifndef INT32_MAX +/** The largest value a 32 bit signed integer can hold @stable ICU 2.0 */ +# define INT32_MAX ((int32_t)(2147483647)) +#endif + +#ifndef UINT8_MAX +/** The largest value an 8 bit unsigned integer can hold @stable ICU 2.0 */ +# define UINT8_MAX ((uint8_t)(255U)) +#endif +#ifndef UINT16_MAX +/** The largest value a 16 bit unsigned integer can hold @stable ICU 2.0 */ +# define UINT16_MAX ((uint16_t)(65535U)) +#endif +#ifndef UINT32_MAX +/** The largest value a 32 bit unsigned integer can hold @stable ICU 2.0 */ +# define UINT32_MAX ((uint32_t)(4294967295U)) +#endif + +#if defined(U_INT64_T_UNAVAILABLE) +# error int64_t is required for decimal format and rule-based number format. +#else +# ifndef INT64_C +/** + * Provides a platform independent way to specify a signed 64-bit integer constant. + * note: may be wrong for some 64 bit platforms - ensure your compiler provides INT64_C + * @stable ICU 2.8 + */ +# define INT64_C(c) c ## LL +# endif +# ifndef UINT64_C +/** + * Provides a platform independent way to specify an unsigned 64-bit integer constant. + * note: may be wrong for some 64 bit platforms - ensure your compiler provides UINT64_C + * @stable ICU 2.8 + */ +# define UINT64_C(c) c ## ULL +# endif +# ifndef U_INT64_MIN +/** The smallest value a 64 bit signed integer can hold @stable ICU 2.8 */ +# define U_INT64_MIN ((int64_t)(INT64_C(-9223372036854775807)-1)) +# endif +# ifndef U_INT64_MAX +/** The largest value a 64 bit signed integer can hold @stable ICU 2.8 */ +# define U_INT64_MAX ((int64_t)(INT64_C(9223372036854775807))) +# endif +# ifndef U_UINT64_MAX +/** The largest value a 64 bit unsigned integer can hold @stable ICU 2.8 */ +# define U_UINT64_MAX ((uint64_t)(UINT64_C(18446744073709551615))) +# endif +#endif + +/*==========================================================================*/ +/* Boolean data type */ +/*==========================================================================*/ + +/** + * The ICU boolean type, a signed-byte integer. + * ICU-specific for historical reasons: The C and C++ standards used to not define type bool. + * Also provides a fixed type definition, as opposed to + * type bool whose details (e.g., sizeof) may vary by compiler and between C and C++. + * + * @stable ICU 2.0 + */ +typedef int8_t UBool; + +/** + * \def U_DEFINE_FALSE_AND_TRUE + * Normally turns off defining macros FALSE=0 & TRUE=1 in public ICU headers. + * These obsolete macros sometimes break compilation of other code that + * defines enum constants or similar with these names. + * C++ has long defined bool/false/true. + * C99 also added definitions for these, although as macros; see stdbool.h. + * + * You may transitionally define U_DEFINE_FALSE_AND_TRUE=1 if you need time to migrate code. + * + * @internal ICU 68 + */ +#ifdef U_DEFINE_FALSE_AND_TRUE + // Use the predefined value. +#elif defined(U_COMBINED_IMPLEMENTATION) || \ + defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || \ + defined(U_IO_IMPLEMENTATION) || defined(U_LAYOUTEX_IMPLEMENTATION) || \ + defined(U_TOOLUTIL_IMPLEMENTATION) + // Inside ICU: Keep FALSE & TRUE available. +# define U_DEFINE_FALSE_AND_TRUE 1 +#else + // Outside ICU: Avoid collision with non-macro definitions of FALSE & TRUE. +# define U_DEFINE_FALSE_AND_TRUE 0 +#endif + +#if U_DEFINE_FALSE_AND_TRUE || defined(U_IN_DOXYGEN) +#ifndef TRUE +/** + * The TRUE value of a UBool. + * + * @deprecated ICU 68 Use standard "true" instead. + */ +# define TRUE 1 +#endif +#ifndef FALSE +/** + * The FALSE value of a UBool. + * + * @deprecated ICU 68 Use standard "false" instead. + */ +# define FALSE 0 +#endif +#endif // U_DEFINE_FALSE_AND_TRUE + +/*==========================================================================*/ +/* Unicode data types */ +/*==========================================================================*/ + +/* wchar_t-related definitions -------------------------------------------- */ + +/* + * \def U_WCHAR_IS_UTF16 + * Defined if wchar_t uses UTF-16. + * + * @stable ICU 2.0 + */ +/* + * \def U_WCHAR_IS_UTF32 + * Defined if wchar_t uses UTF-32. + * + * @stable ICU 2.0 + */ +#if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) +# ifdef __STDC_ISO_10646__ +# if (U_SIZEOF_WCHAR_T==2) +# define U_WCHAR_IS_UTF16 +# elif (U_SIZEOF_WCHAR_T==4) +# define U_WCHAR_IS_UTF32 +# endif +# elif defined __UCS2__ +# if (U_PF_OS390 <= U_PLATFORM && U_PLATFORM <= U_PF_OS400) && (U_SIZEOF_WCHAR_T==2) +# define U_WCHAR_IS_UTF16 +# endif +# elif defined(__UCS4__) || (U_PLATFORM == U_PF_OS400 && defined(__UTF32__)) +# if (U_SIZEOF_WCHAR_T==4) +# define U_WCHAR_IS_UTF32 +# endif +# elif U_PLATFORM_IS_DARWIN_BASED || (U_SIZEOF_WCHAR_T==4 && U_PLATFORM_IS_LINUX_BASED) +# define U_WCHAR_IS_UTF32 +# elif U_PLATFORM_HAS_WIN32_API +# define U_WCHAR_IS_UTF16 +# endif +#endif + +/* UChar and UChar32 definitions -------------------------------------------- */ + +/** Number of bytes in a UChar. @stable ICU 2.0 */ +#define U_SIZEOF_UCHAR 2 + +/** + * \def U_CHAR16_IS_TYPEDEF + * If 1, then char16_t is a typedef and not a real type (yet) + * @internal + */ +#if (U_PLATFORM == U_PF_AIX) && defined(__cplusplus) &&(U_CPLUSPLUS_VERSION < 11) +// for AIX, uchar.h needs to be included +# include <uchar.h> +# define U_CHAR16_IS_TYPEDEF 1 +#elif defined(_MSC_VER) && (_MSC_VER < 1900) +// Versions of Visual Studio/MSVC below 2015 do not support char16_t as a real type, +// and instead use a typedef. https://msdn.microsoft.com/library/bb531344.aspx +# define U_CHAR16_IS_TYPEDEF 1 +#else +# define U_CHAR16_IS_TYPEDEF 0 +#endif + + +/** + * \var UChar + * + * The base type for UTF-16 code units and pointers. + * Unsigned 16-bit integer. + * Starting with ICU 59, C++ API uses char16_t directly, while C API continues to use UChar. + * + * UChar is configurable by defining the macro UCHAR_TYPE + * on the preprocessor or compiler command line: + * -DUCHAR_TYPE=uint16_t or -DUCHAR_TYPE=wchar_t (if U_SIZEOF_WCHAR_T==2) etc. + * (The UCHAR_TYPE can also be \#defined earlier in this file, for outside the ICU library code.) + * This is for transitional use from application code that uses uint16_t or wchar_t for UTF-16. + * + * The default is UChar=char16_t. + * + * C++11 defines char16_t as bit-compatible with uint16_t, but as a distinct type. + * + * In C, char16_t is a simple typedef of uint_least16_t. + * ICU requires uint_least16_t=uint16_t for data memory mapping. + * On macOS, char16_t is not available because the uchar.h standard header is missing. + * + * @stable ICU 4.4 + */ + +#if 1 + // #if 1 is normal. UChar defaults to char16_t in C++. + // For configuration testing of UChar=uint16_t temporarily change this to #if 0. + // The intltest Makefile #defines UCHAR_TYPE=char16_t, + // so we only #define it to uint16_t if it is undefined so far. +#elif !defined(UCHAR_TYPE) +# define UCHAR_TYPE uint16_t +#endif + +#if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || \ + defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) + // Inside the ICU library code, never configurable. + typedef char16_t UChar; +#elif defined(UCHAR_TYPE) + typedef UCHAR_TYPE UChar; +#elif (U_CPLUSPLUS_VERSION >= 11) + typedef char16_t UChar; +#else + typedef uint16_t UChar; +#endif + +/** + * \var OldUChar + * Default ICU 58 definition of UChar. + * A base type for UTF-16 code units and pointers. + * Unsigned 16-bit integer. + * + * Define OldUChar to be wchar_t if that is 16 bits wide. + * If wchar_t is not 16 bits wide, then define UChar to be uint16_t. + * + * This makes the definition of OldUChar platform-dependent + * but allows direct string type compatibility with platforms with + * 16-bit wchar_t types. + * + * This is how UChar was defined in ICU 58, for transition convenience. + * Exception: ICU 58 UChar was defined to UCHAR_TYPE if that macro was defined. + * The current UChar responds to UCHAR_TYPE but OldUChar does not. + * + * @stable ICU 59 + */ +#if U_SIZEOF_WCHAR_T==2 + typedef wchar_t OldUChar; +#elif defined(__CHAR16_TYPE__) + typedef __CHAR16_TYPE__ OldUChar; +#else + typedef uint16_t OldUChar; +#endif + +/** + * Define UChar32 as a type for single Unicode code points. + * UChar32 is a signed 32-bit integer (same as int32_t). + * + * The Unicode code point range is 0..0x10ffff. + * All other values (negative or >=0x110000) are illegal as Unicode code points. + * They may be used as sentinel values to indicate "done", "error" + * or similar non-code point conditions. + * + * Before ICU 2.4 (Jitterbug 2146), UChar32 was defined + * to be wchar_t if that is 32 bits wide (wchar_t may be signed or unsigned) + * or else to be uint32_t. + * That is, the definition of UChar32 was platform-dependent. + * + * @see U_SENTINEL + * @stable ICU 2.4 + */ +typedef int32_t UChar32; + +/** + * This value is intended for sentinel values for APIs that + * (take or) return single code points (UChar32). + * It is outside of the Unicode code point range 0..0x10ffff. + * + * For example, a "done" or "error" value in a new API + * could be indicated with U_SENTINEL. + * + * ICU APIs designed before ICU 2.4 usually define service-specific "done" + * values, mostly 0xffff. + * Those may need to be distinguished from + * actual U+ffff text contents by calling functions like + * CharacterIterator::hasNext() or UnicodeString::length(). + * + * @return -1 + * @see UChar32 + * @stable ICU 2.4 + */ +#define U_SENTINEL (-1) + +#include "unicode/urename.h" + +#endif diff --git a/thirdparty/icu4c/common/unicode/umisc.h b/thirdparty/icu4c/common/unicode/umisc.h new file mode 100644 index 0000000000..213290b9af --- /dev/null +++ b/thirdparty/icu4c/common/unicode/umisc.h @@ -0,0 +1,62 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 1999-2006, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* file name: umisc.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 1999oct15 +* created by: Markus W. Scherer +*/ + +#ifndef UMISC_H +#define UMISC_H + +#include "unicode/utypes.h" + +/** + * \file + * \brief C API:misc definitions + * + * This file contains miscellaneous definitions for the C APIs. + */ + +U_CDECL_BEGIN + +/** A struct representing a range of text containing a specific field + * @stable ICU 2.0 + */ +typedef struct UFieldPosition { + /** + * The field + * @stable ICU 2.0 + */ + int32_t field; + /** + * The start of the text range containing field + * @stable ICU 2.0 + */ + int32_t beginIndex; + /** + * The limit of the text range containing field + * @stable ICU 2.0 + */ + int32_t endIndex; +} UFieldPosition; + +#if !UCONFIG_NO_SERVICE +/** + * Opaque type returned by registerInstance, registerFactory and unregister for service registration. + * @stable ICU 2.6 + */ +typedef const void* URegistryKey; +#endif + +U_CDECL_END + +#endif diff --git a/thirdparty/icu4c/common/unicode/umutablecptrie.h b/thirdparty/icu4c/common/unicode/umutablecptrie.h new file mode 100644 index 0000000000..5325d58147 --- /dev/null +++ b/thirdparty/icu4c/common/unicode/umutablecptrie.h @@ -0,0 +1,241 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// umutablecptrie.h (split out of ucptrie.h) +// created: 2018jan24 Markus W. Scherer + +#ifndef __UMUTABLECPTRIE_H__ +#define __UMUTABLECPTRIE_H__ + +#include "unicode/utypes.h" + +#include "unicode/ucpmap.h" +#include "unicode/ucptrie.h" +#include "unicode/utf8.h" + +#if U_SHOW_CPLUSPLUS_API +#include "unicode/localpointer.h" +#endif // U_SHOW_CPLUSPLUS_API + +U_CDECL_BEGIN + +/** + * \file + * + * This file defines a mutable Unicode code point trie. + * + * @see UCPTrie + * @see UMutableCPTrie + */ + +/** + * Mutable Unicode code point trie. + * Fast map from Unicode code points (U+0000..U+10FFFF) to 32-bit integer values. + * For details see http://site.icu-project.org/design/struct/utrie + * + * Setting values (especially ranges) and lookup is fast. + * The mutable trie is only somewhat space-efficient. + * It builds a compacted, immutable UCPTrie. + * + * This trie can be modified while iterating over its contents. + * For example, it is possible to merge its values with those from another + * set of ranges (e.g., another mutable or immutable trie): + * Iterate over those source ranges; for each of them iterate over this trie; + * add the source value into the value of each trie range. + * + * @see UCPTrie + * @see umutablecptrie_buildImmutable + * @stable ICU 63 + */ +typedef struct UMutableCPTrie UMutableCPTrie; + +/** + * Creates a mutable trie that initially maps each Unicode code point to the same value. + * It uses 32-bit data values until umutablecptrie_buildImmutable() is called. + * umutablecptrie_buildImmutable() takes a valueWidth parameter which + * determines the number of bits in the data value in the resulting UCPTrie. + * You must umutablecptrie_close() the trie once you are done using it. + * + * @param initialValue the initial value that is set for all code points + * @param errorValue the value for out-of-range code points and ill-formed UTF-8/16 + * @param pErrorCode an in/out ICU UErrorCode + * @return the trie + * @stable ICU 63 + */ +U_CAPI UMutableCPTrie * U_EXPORT2 +umutablecptrie_open(uint32_t initialValue, uint32_t errorValue, UErrorCode *pErrorCode); + +/** + * Clones a mutable trie. + * You must umutablecptrie_close() the clone once you are done using it. + * + * @param other the trie to clone + * @param pErrorCode an in/out ICU UErrorCode + * @return the trie clone + * @stable ICU 63 + */ +U_CAPI UMutableCPTrie * U_EXPORT2 +umutablecptrie_clone(const UMutableCPTrie *other, UErrorCode *pErrorCode); + +/** + * Closes a mutable trie and releases associated memory. + * + * @param trie the trie + * @stable ICU 63 + */ +U_CAPI void U_EXPORT2 +umutablecptrie_close(UMutableCPTrie *trie); + +/** + * Creates a mutable trie with the same contents as the UCPMap. + * You must umutablecptrie_close() the mutable trie once you are done using it. + * + * @param map the source map + * @param pErrorCode an in/out ICU UErrorCode + * @return the mutable trie + * @stable ICU 63 + */ +U_CAPI UMutableCPTrie * U_EXPORT2 +umutablecptrie_fromUCPMap(const UCPMap *map, UErrorCode *pErrorCode); + +/** + * Creates a mutable trie with the same contents as the immutable one. + * You must umutablecptrie_close() the mutable trie once you are done using it. + * + * @param trie the immutable trie + * @param pErrorCode an in/out ICU UErrorCode + * @return the mutable trie + * @stable ICU 63 + */ +U_CAPI UMutableCPTrie * U_EXPORT2 +umutablecptrie_fromUCPTrie(const UCPTrie *trie, UErrorCode *pErrorCode); + +/** + * Returns the value for a code point as stored in the trie. + * + * @param trie the trie + * @param c the code point + * @return the value + * @stable ICU 63 + */ +U_CAPI uint32_t U_EXPORT2 +umutablecptrie_get(const UMutableCPTrie *trie, UChar32 c); + +/** + * Returns the last code point such that all those from start to there have the same value. + * Can be used to efficiently iterate over all same-value ranges in a trie. + * (This is normally faster than iterating over code points and get()ting each value, + * but much slower than a data structure that stores ranges directly.) + * + * The trie can be modified between calls to this function. + * + * If the UCPMapValueFilter function pointer is not NULL, then + * the value to be delivered is passed through that function, and the return value is the end + * of the range where all values are modified to the same actual value. + * The value is unchanged if that function pointer is NULL. + * + * See the same-signature ucptrie_getRange() for a code sample. + * + * @param trie the trie + * @param start range start + * @param option defines whether surrogates are treated normally, + * or as having the surrogateValue; usually UCPMAP_RANGE_NORMAL + * @param surrogateValue value for surrogates; ignored if option==UCPMAP_RANGE_NORMAL + * @param filter a pointer to a function that may modify the trie data value, + * or NULL if the values from the trie are to be used unmodified + * @param context an opaque pointer that is passed on to the filter function + * @param pValue if not NULL, receives the value that every code point start..end has; + * may have been modified by filter(context, trie value) + * if that function pointer is not NULL + * @return the range end code point, or -1 if start is not a valid code point + * @stable ICU 63 + */ +U_CAPI UChar32 U_EXPORT2 +umutablecptrie_getRange(const UMutableCPTrie *trie, UChar32 start, + UCPMapRangeOption option, uint32_t surrogateValue, + UCPMapValueFilter *filter, const void *context, uint32_t *pValue); + +/** + * Sets a value for a code point. + * + * @param trie the trie + * @param c the code point + * @param value the value + * @param pErrorCode an in/out ICU UErrorCode + * @stable ICU 63 + */ +U_CAPI void U_EXPORT2 +umutablecptrie_set(UMutableCPTrie *trie, UChar32 c, uint32_t value, UErrorCode *pErrorCode); + +/** + * Sets a value for each code point [start..end]. + * Faster and more space-efficient than setting the value for each code point separately. + * + * @param trie the trie + * @param start the first code point to get the value + * @param end the last code point to get the value (inclusive) + * @param value the value + * @param pErrorCode an in/out ICU UErrorCode + * @stable ICU 63 + */ +U_CAPI void U_EXPORT2 +umutablecptrie_setRange(UMutableCPTrie *trie, + UChar32 start, UChar32 end, + uint32_t value, UErrorCode *pErrorCode); + +/** + * Compacts the data and builds an immutable UCPTrie according to the parameters. + * After this, the mutable trie will be empty. + * + * The mutable trie stores 32-bit values until buildImmutable() is called. + * If values shorter than 32 bits are to be stored in the immutable trie, + * then the upper bits are discarded. + * For example, when the mutable trie contains values 0x81, -0x7f, and 0xa581, + * and the value width is 8 bits, then each of these is stored as 0x81 + * and the immutable trie will return that as an unsigned value. + * (Some implementations may want to make productive temporary use of the upper bits + * until buildImmutable() discards them.) + * + * Not every possible set of mappings can be built into a UCPTrie, + * because of limitations resulting from speed and space optimizations. + * Every Unicode assigned character can be mapped to a unique value. + * Typical data yields data structures far smaller than the limitations. + * + * It is possible to construct extremely unusual mappings that exceed the data structure limits. + * In such a case this function will fail with a U_INDEX_OUTOFBOUNDS_ERROR. + * + * @param trie the trie trie + * @param type selects the trie type + * @param valueWidth selects the number of bits in a trie data value; if smaller than 32 bits, + * then the values stored in the trie will be truncated first + * @param pErrorCode an in/out ICU UErrorCode + * + * @see umutablecptrie_fromUCPTrie + * @stable ICU 63 + */ +U_CAPI UCPTrie * U_EXPORT2 +umutablecptrie_buildImmutable(UMutableCPTrie *trie, UCPTrieType type, UCPTrieValueWidth valueWidth, + UErrorCode *pErrorCode); + +U_CDECL_END + +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN + +/** + * \class LocalUMutableCPTriePointer + * "Smart pointer" class, closes a UMutableCPTrie via umutablecptrie_close(). + * For most methods see the LocalPointerBase base class. + * + * @see LocalPointerBase + * @see LocalPointer + * @stable ICU 63 + */ +U_DEFINE_LOCAL_OPEN_POINTER(LocalUMutableCPTriePointer, UMutableCPTrie, umutablecptrie_close); + +U_NAMESPACE_END + +#endif + +#endif diff --git a/thirdparty/icu4c/common/unicode/unifilt.h b/thirdparty/icu4c/common/unicode/unifilt.h new file mode 100644 index 0000000000..420e1a1905 --- /dev/null +++ b/thirdparty/icu4c/common/unicode/unifilt.h @@ -0,0 +1,136 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 1999-2010, International Business Machines Corporation and others. +* All Rights Reserved. +********************************************************************** +* Date Name Description +* 11/17/99 aliu Creation. +********************************************************************** +*/ +#ifndef UNIFILT_H +#define UNIFILT_H + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + +#include "unicode/unifunct.h" +#include "unicode/unimatch.h" + +/** + * \file + * \brief C++ API: Unicode Filter + */ + +U_NAMESPACE_BEGIN + +/** + * U_ETHER is used to represent character values for positions outside + * a range. For example, transliterator uses this to represent + * characters outside the range contextStart..contextLimit-1. This + * allows explicit matching by rules and UnicodeSets of text outside a + * defined range. + * @stable ICU 3.0 + */ +#define U_ETHER ((char16_t)0xFFFF) + +/** + * + * <code>UnicodeFilter</code> defines a protocol for selecting a + * subset of the full range (U+0000 to U+10FFFF) of Unicode characters. + * Currently, filters are used in conjunction with classes like {@link + * Transliterator} to only process selected characters through a + * transformation. + * + * <p>Note: UnicodeFilter currently stubs out two pure virtual methods + * of its base class, UnicodeMatcher. These methods are toPattern() + * and matchesIndexValue(). This is done so that filter classes that + * are not actually used as matchers -- specifically, those in the + * UnicodeFilterLogic component, and those in tests -- can continue to + * work without defining these methods. As long as a filter is not + * used in an RBT during real transliteration, these methods will not + * be called. However, this breaks the UnicodeMatcher base class + * protocol, and it is not a correct solution. + * + * <p>In the future we may revisit the UnicodeMatcher / UnicodeFilter + * hierarchy and either redesign it, or simply remove the stubs in + * UnicodeFilter and force subclasses to implement the full + * UnicodeMatcher protocol. + * + * @see UnicodeFilterLogic + * @stable ICU 2.0 + */ +class U_COMMON_API UnicodeFilter : public UnicodeFunctor, public UnicodeMatcher { + +public: + /** + * Destructor + * @stable ICU 2.0 + */ + virtual ~UnicodeFilter(); + + /** + * Clones this object polymorphically. + * The caller owns the result and should delete it when done. + * @return clone, or nullptr if an error occurred + * @stable ICU 2.4 + */ + virtual UnicodeFilter* clone() const = 0; + + /** + * Returns <tt>true</tt> for characters that are in the selected + * subset. In other words, if a character is <b>to be + * filtered</b>, then <tt>contains()</tt> returns + * <b><tt>false</tt></b>. + * @stable ICU 2.0 + */ + virtual UBool contains(UChar32 c) const = 0; + + /** + * UnicodeFunctor API. Cast 'this' to a UnicodeMatcher* pointer + * and return the pointer. + * @stable ICU 2.4 + */ + virtual UnicodeMatcher* toMatcher() const; + + /** + * Implement UnicodeMatcher API. + * @stable ICU 2.4 + */ + virtual UMatchDegree matches(const Replaceable& text, + int32_t& offset, + int32_t limit, + UBool incremental); + + /** + * UnicodeFunctor API. Nothing to do. + * @stable ICU 2.4 + */ + virtual void setData(const TransliterationRuleData*); + + /** + * ICU "poor man's RTTI", returns a UClassID for this class. + * + * @stable ICU 2.2 + */ + static UClassID U_EXPORT2 getStaticClassID(); + +protected: + + /* + * Since this class has pure virtual functions, + * a constructor can't be used. + * @stable ICU 2.0 + */ +/* UnicodeFilter();*/ +}; + +/*inline UnicodeFilter::UnicodeFilter() {}*/ + +U_NAMESPACE_END + +#endif /* U_SHOW_CPLUSPLUS_API */ + +#endif diff --git a/thirdparty/icu4c/common/unicode/unifunct.h b/thirdparty/icu4c/common/unicode/unifunct.h new file mode 100644 index 0000000000..7d31af7daf --- /dev/null +++ b/thirdparty/icu4c/common/unicode/unifunct.h @@ -0,0 +1,132 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (c) 2002-2005, International Business Machines Corporation +* and others. All Rights Reserved. +********************************************************************** +* Date Name Description +* 01/14/2002 aliu Creation. +********************************************************************** +*/ +#ifndef UNIFUNCT_H +#define UNIFUNCT_H + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + +#include "unicode/uobject.h" + +/** + * \file + * \brief C++ API: Unicode Functor + */ + +U_NAMESPACE_BEGIN + +class UnicodeMatcher; +class UnicodeReplacer; +class TransliterationRuleData; + +/** + * <code>UnicodeFunctor</code> is an abstract base class for objects + * that perform match and/or replace operations on Unicode strings. + * @author Alan Liu + * @stable ICU 2.4 + */ +class U_COMMON_API UnicodeFunctor : public UObject { + +public: + + /** + * Destructor + * @stable ICU 2.4 + */ + virtual ~UnicodeFunctor(); + + /** + * Return a copy of this object. All UnicodeFunctor objects + * have to support cloning in order to allow classes using + * UnicodeFunctor to implement cloning. + * @stable ICU 2.4 + */ + virtual UnicodeFunctor* clone() const = 0; + + /** + * Cast 'this' to a UnicodeMatcher* pointer and return the + * pointer, or null if this is not a UnicodeMatcher*. Subclasses + * that mix in UnicodeMatcher as a base class must override this. + * This protocol is required because a pointer to a UnicodeFunctor + * cannot be cast to a pointer to a UnicodeMatcher, since + * UnicodeMatcher is a mixin that does not derive from + * UnicodeFunctor. + * @stable ICU 2.4 + */ + virtual UnicodeMatcher* toMatcher() const; + + /** + * Cast 'this' to a UnicodeReplacer* pointer and return the + * pointer, or null if this is not a UnicodeReplacer*. Subclasses + * that mix in UnicodeReplacer as a base class must override this. + * This protocol is required because a pointer to a UnicodeFunctor + * cannot be cast to a pointer to a UnicodeReplacer, since + * UnicodeReplacer is a mixin that does not derive from + * UnicodeFunctor. + * @stable ICU 2.4 + */ + virtual UnicodeReplacer* toReplacer() const; + + /** + * Return the class ID for this class. This is useful only for + * comparing to a return value from getDynamicClassID(). + * @return The class ID for all objects of this class. + * @stable ICU 2.0 + */ + static UClassID U_EXPORT2 getStaticClassID(void); + + /** + * Returns a unique class ID <b>polymorphically</b>. This method + * is to implement a simple version of RTTI, since not all C++ + * compilers support genuine RTTI. Polymorphic operator==() and + * clone() methods call this method. + * + * <p>Concrete subclasses of UnicodeFunctor should use the macro + * UOBJECT_DEFINE_RTTI_IMPLEMENTATION from uobject.h to + * provide definitios getStaticClassID and getDynamicClassID. + * + * @return The class ID for this object. All objects of a given + * class have the same class ID. Objects of other classes have + * different class IDs. + * @stable ICU 2.4 + */ + virtual UClassID getDynamicClassID(void) const = 0; + + /** + * Set the data object associated with this functor. The data + * object provides context for functor-to-standin mapping. This + * method is required when assigning a functor to a different data + * object. This function MAY GO AWAY later if the architecture is + * changed to pass data object pointers through the API. + * @internal ICU 2.1 + */ + virtual void setData(const TransliterationRuleData*) = 0; + +protected: + + /** + * Since this class has pure virtual functions, + * a constructor can't be used. + * @stable ICU 2.0 + */ + /*UnicodeFunctor();*/ + +}; + +/*inline UnicodeFunctor::UnicodeFunctor() {}*/ + +U_NAMESPACE_END + +#endif /* U_SHOW_CPLUSPLUS_API */ + +#endif diff --git a/thirdparty/icu4c/common/unicode/unimatch.h b/thirdparty/icu4c/common/unicode/unimatch.h new file mode 100644 index 0000000000..302332f455 --- /dev/null +++ b/thirdparty/icu4c/common/unicode/unimatch.h @@ -0,0 +1,168 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +* Copyright (C) 2001-2005, International Business Machines Corporation and others. All Rights Reserved. +********************************************************************** +* Date Name Description +* 07/18/01 aliu Creation. +********************************************************************** +*/ +#ifndef UNIMATCH_H +#define UNIMATCH_H + +#include "unicode/utypes.h" + +/** + * \file + * \brief C++ API: Unicode Matcher + */ + +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN + +class Replaceable; +class UnicodeString; +class UnicodeSet; + +/** + * Constants returned by <code>UnicodeMatcher::matches()</code> + * indicating the degree of match. + * @stable ICU 2.4 + */ +enum UMatchDegree { + /** + * Constant returned by <code>matches()</code> indicating a + * mismatch between the text and this matcher. The text contains + * a character which does not match, or the text does not contain + * all desired characters for a non-incremental match. + * @stable ICU 2.4 + */ + U_MISMATCH, + + /** + * Constant returned by <code>matches()</code> indicating a + * partial match between the text and this matcher. This value is + * only returned for incremental match operations. All characters + * of the text match, but more characters are required for a + * complete match. Alternatively, for variable-length matchers, + * all characters of the text match, and if more characters were + * supplied at limit, they might also match. + * @stable ICU 2.4 + */ + U_PARTIAL_MATCH, + + /** + * Constant returned by <code>matches()</code> indicating a + * complete match between the text and this matcher. For an + * incremental variable-length match, this value is returned if + * the given text matches, and it is known that additional + * characters would not alter the extent of the match. + * @stable ICU 2.4 + */ + U_MATCH +}; + +/** + * <code>UnicodeMatcher</code> defines a protocol for objects that can + * match a range of characters in a Replaceable string. + * @stable ICU 2.4 + */ +class U_COMMON_API UnicodeMatcher /* not : public UObject because this is an interface/mixin class */ { + +public: + /** + * Destructor. + * @stable ICU 2.4 + */ + virtual ~UnicodeMatcher(); + + /** + * Return a UMatchDegree value indicating the degree of match for + * the given text at the given offset. Zero, one, or more + * characters may be matched. + * + * Matching in the forward direction is indicated by limit > + * offset. Characters from offset forwards to limit-1 will be + * considered for matching. + * + * Matching in the reverse direction is indicated by limit < + * offset. Characters from offset backwards to limit+1 will be + * considered for matching. + * + * If limit == offset then the only match possible is a zero + * character match (which subclasses may implement if desired). + * + * As a side effect, advance the offset parameter to the limit of + * the matched substring. In the forward direction, this will be + * the index of the last matched character plus one. In the + * reverse direction, this will be the index of the last matched + * character minus one. + * + * <p>Note: This method is not const because some classes may + * modify their state as the result of a match. + * + * @param text the text to be matched + * @param offset on input, the index into text at which to begin + * matching. On output, the limit of the matched text. The + * number of matched characters is the output value of offset + * minus the input value. Offset should always point to the + * HIGH SURROGATE (leading code unit) of a pair of surrogates, + * both on entry and upon return. + * @param limit the limit index of text to be matched. Greater + * than offset for a forward direction match, less than offset for + * a backward direction match. The last character to be + * considered for matching will be text.charAt(limit-1) in the + * forward direction or text.charAt(limit+1) in the backward + * direction. + * @param incremental if true, then assume further characters may + * be inserted at limit and check for partial matching. Otherwise + * assume the text as given is complete. + * @return a match degree value indicating a full match, a partial + * match, or a mismatch. If incremental is false then + * U_PARTIAL_MATCH should never be returned. + * @stable ICU 2.4 + */ + virtual UMatchDegree matches(const Replaceable& text, + int32_t& offset, + int32_t limit, + UBool incremental) = 0; + + /** + * Returns a string representation of this matcher. If the result of + * calling this function is passed to the appropriate parser, it + * will produce another matcher that is equal to this one. + * @param result the string to receive the pattern. Previous + * contents will be deleted. + * @param escapeUnprintable if true then convert unprintable + * character to their hex escape representations, \\uxxxx or + * \\Uxxxxxxxx. Unprintable characters are those other than + * U+000A, U+0020..U+007E. + * @stable ICU 2.4 + */ + virtual UnicodeString& toPattern(UnicodeString& result, + UBool escapeUnprintable = false) const = 0; + + /** + * Returns true if this matcher will match a character c, where c + * & 0xFF == v, at offset, in the forward direction (with limit > + * offset). This is used by <tt>RuleBasedTransliterator</tt> for + * indexing. + * @stable ICU 2.4 + */ + virtual UBool matchesIndexValue(uint8_t v) const = 0; + + /** + * Union the set of all characters that may be matched by this object + * into the given set. + * @param toUnionTo the set into which to union the source characters + * @stable ICU 2.4 + */ + virtual void addMatchSetTo(UnicodeSet& toUnionTo) const = 0; +}; + +U_NAMESPACE_END + +#endif /* U_SHOW_CPLUSPLUS_API */ + +#endif diff --git a/thirdparty/icu4c/common/unicode/uniset.h b/thirdparty/icu4c/common/unicode/uniset.h new file mode 100644 index 0000000000..50b6360f3a --- /dev/null +++ b/thirdparty/icu4c/common/unicode/uniset.h @@ -0,0 +1,1744 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +*************************************************************************** +* Copyright (C) 1999-2016, International Business Machines Corporation +* and others. All Rights Reserved. +*************************************************************************** +* Date Name Description +* 10/20/99 alan Creation. +*************************************************************************** +*/ + +#ifndef UNICODESET_H +#define UNICODESET_H + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + +#include "unicode/ucpmap.h" +#include "unicode/unifilt.h" +#include "unicode/unistr.h" +#include "unicode/uset.h" + +/** + * \file + * \brief C++ API: Unicode Set + */ + +U_NAMESPACE_BEGIN + +// Forward Declarations. +class BMPSet; +class ParsePosition; +class RBBIRuleScanner; +class SymbolTable; +class UnicodeSetStringSpan; +class UVector; +class RuleCharacterIterator; + +/** + * A mutable set of Unicode characters and multicharacter strings. Objects of this class + * represent <em>character classes</em> used in regular expressions. + * A character specifies a subset of Unicode code points. Legal + * code points are U+0000 to U+10FFFF, inclusive. + * + * <p>The UnicodeSet class is not designed to be subclassed. + * + * <p><code>UnicodeSet</code> supports two APIs. The first is the + * <em>operand</em> API that allows the caller to modify the value of + * a <code>UnicodeSet</code> object. It conforms to Java 2's + * <code>java.util.Set</code> interface, although + * <code>UnicodeSet</code> does not actually implement that + * interface. All methods of <code>Set</code> are supported, with the + * modification that they take a character range or single character + * instead of an <code>Object</code>, and they take a + * <code>UnicodeSet</code> instead of a <code>Collection</code>. The + * operand API may be thought of in terms of boolean logic: a boolean + * OR is implemented by <code>add</code>, a boolean AND is implemented + * by <code>retain</code>, a boolean XOR is implemented by + * <code>complement</code> taking an argument, and a boolean NOT is + * implemented by <code>complement</code> with no argument. In terms + * of traditional set theory function names, <code>add</code> is a + * union, <code>retain</code> is an intersection, <code>remove</code> + * is an asymmetric difference, and <code>complement</code> with no + * argument is a set complement with respect to the superset range + * <code>MIN_VALUE-MAX_VALUE</code> + * + * <p>The second API is the + * <code>applyPattern()</code>/<code>toPattern()</code> API from the + * <code>java.text.Format</code>-derived classes. Unlike the + * methods that add characters, add categories, and control the logic + * of the set, the method <code>applyPattern()</code> sets all + * attributes of a <code>UnicodeSet</code> at once, based on a + * string pattern. + * + * <p><b>Pattern syntax</b></p> + * + * Patterns are accepted by the constructors and the + * <code>applyPattern()</code> methods and returned by the + * <code>toPattern()</code> method. These patterns follow a syntax + * similar to that employed by version 8 regular expression character + * classes. Here are some simple examples: + * + * \htmlonly<blockquote>\endhtmlonly + * <table> + * <tr align="top"> + * <td nowrap valign="top" align="left"><code>[]</code></td> + * <td valign="top">No characters</td> + * </tr><tr align="top"> + * <td nowrap valign="top" align="left"><code>[a]</code></td> + * <td valign="top">The character 'a'</td> + * </tr><tr align="top"> + * <td nowrap valign="top" align="left"><code>[ae]</code></td> + * <td valign="top">The characters 'a' and 'e'</td> + * </tr> + * <tr> + * <td nowrap valign="top" align="left"><code>[a-e]</code></td> + * <td valign="top">The characters 'a' through 'e' inclusive, in Unicode code + * point order</td> + * </tr> + * <tr> + * <td nowrap valign="top" align="left"><code>[\\u4E01]</code></td> + * <td valign="top">The character U+4E01</td> + * </tr> + * <tr> + * <td nowrap valign="top" align="left"><code>[a{ab}{ac}]</code></td> + * <td valign="top">The character 'a' and the multicharacter strings "ab" and + * "ac"</td> + * </tr> + * <tr> + * <td nowrap valign="top" align="left"><code>[\\p{Lu}]</code></td> + * <td valign="top">All characters in the general category Uppercase Letter</td> + * </tr> + * </table> + * \htmlonly</blockquote>\endhtmlonly + * + * Any character may be preceded by a backslash in order to remove any special + * meaning. White space characters, as defined by UCharacter.isWhitespace(), are + * ignored, unless they are escaped. + * + * <p>Property patterns specify a set of characters having a certain + * property as defined by the Unicode standard. Both the POSIX-like + * "[:Lu:]" and the Perl-like syntax "\\p{Lu}" are recognized. For a + * complete list of supported property patterns, see the User's Guide + * for UnicodeSet at + * <a href="http://icu-project.org/userguide/unicodeSet.html"> + * http://icu-project.org/userguide/unicodeSet.html</a>. + * Actual determination of property data is defined by the underlying + * Unicode database as implemented by UCharacter. + * + * <p>Patterns specify individual characters, ranges of characters, and + * Unicode property sets. When elements are concatenated, they + * specify their union. To complement a set, place a '^' immediately + * after the opening '['. Property patterns are inverted by modifying + * their delimiters; "[:^foo]" and "\\P{foo}". In any other location, + * '^' has no special meaning. + * + * <p>Ranges are indicated by placing two a '-' between two + * characters, as in "a-z". This specifies the range of all + * characters from the left to the right, in Unicode order. If the + * left character is greater than or equal to the + * right character it is a syntax error. If a '-' occurs as the first + * character after the opening '[' or '[^', or if it occurs as the + * last character before the closing ']', then it is taken as a + * literal. Thus "[a\-b]", "[-ab]", and "[ab-]" all indicate the same + * set of three characters, 'a', 'b', and '-'. + * + * <p>Sets may be intersected using the '&' operator or the asymmetric + * set difference may be taken using the '-' operator, for example, + * "[[:L:]&[\\u0000-\\u0FFF]]" indicates the set of all Unicode letters + * with values less than 4096. Operators ('&' and '|') have equal + * precedence and bind left-to-right. Thus + * "[[:L:]-[a-z]-[\\u0100-\\u01FF]]" is equivalent to + * "[[[:L:]-[a-z]]-[\\u0100-\\u01FF]]". This only really matters for + * difference; intersection is commutative. + * + * <table> + * <tr valign=top><td nowrap><code>[a]</code><td>The set containing 'a' + * <tr valign=top><td nowrap><code>[a-z]</code><td>The set containing 'a' + * through 'z' and all letters in between, in Unicode order + * <tr valign=top><td nowrap><code>[^a-z]</code><td>The set containing + * all characters but 'a' through 'z', + * that is, U+0000 through 'a'-1 and 'z'+1 through U+10FFFF + * <tr valign=top><td nowrap><code>[[<em>pat1</em>][<em>pat2</em>]]</code> + * <td>The union of sets specified by <em>pat1</em> and <em>pat2</em> + * <tr valign=top><td nowrap><code>[[<em>pat1</em>]&[<em>pat2</em>]]</code> + * <td>The intersection of sets specified by <em>pat1</em> and <em>pat2</em> + * <tr valign=top><td nowrap><code>[[<em>pat1</em>]-[<em>pat2</em>]]</code> + * <td>The asymmetric difference of sets specified by <em>pat1</em> and + * <em>pat2</em> + * <tr valign=top><td nowrap><code>[:Lu:] or \\p{Lu}</code> + * <td>The set of characters having the specified + * Unicode property; in + * this case, Unicode uppercase letters + * <tr valign=top><td nowrap><code>[:^Lu:] or \\P{Lu}</code> + * <td>The set of characters <em>not</em> having the given + * Unicode property + * </table> + * + * <p><b>Warning</b>: you cannot add an empty string ("") to a UnicodeSet.</p> + * + * <p><b>Formal syntax</b></p> + * + * \htmlonly<blockquote>\endhtmlonly + * <table> + * <tr align="top"> + * <td nowrap valign="top" align="right"><code>pattern := </code></td> + * <td valign="top"><code>('[' '^'? item* ']') | + * property</code></td> + * </tr> + * <tr align="top"> + * <td nowrap valign="top" align="right"><code>item := </code></td> + * <td valign="top"><code>char | (char '-' char) | pattern-expr<br> + * </code></td> + * </tr> + * <tr align="top"> + * <td nowrap valign="top" align="right"><code>pattern-expr := </code></td> + * <td valign="top"><code>pattern | pattern-expr pattern | + * pattern-expr op pattern<br> + * </code></td> + * </tr> + * <tr align="top"> + * <td nowrap valign="top" align="right"><code>op := </code></td> + * <td valign="top"><code>'&' | '-'<br> + * </code></td> + * </tr> + * <tr align="top"> + * <td nowrap valign="top" align="right"><code>special := </code></td> + * <td valign="top"><code>'[' | ']' | '-'<br> + * </code></td> + * </tr> + * <tr align="top"> + * <td nowrap valign="top" align="right"><code>char := </code></td> + * <td valign="top"><em>any character that is not</em><code> special<br> + * | ('\' </code><em>any character</em><code>)<br> + * | ('\\u' hex hex hex hex)<br> + * </code></td> + * </tr> + * <tr align="top"> + * <td nowrap valign="top" align="right"><code>hex := </code></td> + * <td valign="top"><em>any character for which + * </em><code>Character.digit(c, 16)</code><em> + * returns a non-negative result</em></td> + * </tr> + * <tr> + * <td nowrap valign="top" align="right"><code>property := </code></td> + * <td valign="top"><em>a Unicode property set pattern</em></td> + * </tr> + * </table> + * <br> + * <table border="1"> + * <tr> + * <td>Legend: <table> + * <tr> + * <td nowrap valign="top"><code>a := b</code></td> + * <td width="20" valign="top"> </td> + * <td valign="top"><code>a</code> may be replaced by <code>b</code> </td> + * </tr> + * <tr> + * <td nowrap valign="top"><code>a?</code></td> + * <td valign="top"></td> + * <td valign="top">zero or one instance of <code>a</code><br> + * </td> + * </tr> + * <tr> + * <td nowrap valign="top"><code>a*</code></td> + * <td valign="top"></td> + * <td valign="top">one or more instances of <code>a</code><br> + * </td> + * </tr> + * <tr> + * <td nowrap valign="top"><code>a | b</code></td> + * <td valign="top"></td> + * <td valign="top">either <code>a</code> or <code>b</code><br> + * </td> + * </tr> + * <tr> + * <td nowrap valign="top"><code>'a'</code></td> + * <td valign="top"></td> + * <td valign="top">the literal string between the quotes </td> + * </tr> + * </table> + * </td> + * </tr> + * </table> + * \htmlonly</blockquote>\endhtmlonly + * + * <p>Note: + * - Most UnicodeSet methods do not take a UErrorCode parameter because + * there are usually very few opportunities for failure other than a shortage + * of memory, error codes in low-level C++ string methods would be inconvenient, + * and the error code as the last parameter (ICU convention) would prevent + * the use of default parameter values. + * Instead, such methods set the UnicodeSet into a "bogus" state + * (see isBogus()) if an error occurs. + * + * @author Alan Liu + * @stable ICU 2.0 + */ +class U_COMMON_API UnicodeSet U_FINAL : public UnicodeFilter { +private: + /** + * Enough for sets with few ranges. + * For example, White_Space has 10 ranges, list length 21. + */ + static constexpr int32_t INITIAL_CAPACITY = 25; + // fFlags constant + static constexpr uint8_t kIsBogus = 1; // This set is bogus (i.e. not valid) + + UChar32* list = stackList; // MUST be terminated with HIGH + int32_t capacity = INITIAL_CAPACITY; // capacity of list + int32_t len = 1; // length of list used; 1 <= len <= capacity + uint8_t fFlags = 0; // Bit flag (see constants above) + + BMPSet *bmpSet = nullptr; // The set is frozen iff either bmpSet or stringSpan is not NULL. + UChar32* buffer = nullptr; // internal buffer, may be NULL + int32_t bufferCapacity = 0; // capacity of buffer + + /** + * The pattern representation of this set. This may not be the + * most economical pattern. It is the pattern supplied to + * applyPattern(), with variables substituted and whitespace + * removed. For sets constructed without applyPattern(), or + * modified using the non-pattern API, this string will be empty, + * indicating that toPattern() must generate a pattern + * representation from the inversion list. + */ + char16_t *pat = nullptr; + int32_t patLen = 0; + + UVector* strings = nullptr; // maintained in sorted order + UnicodeSetStringSpan *stringSpan = nullptr; + + /** + * Initial list array. + * Avoids some heap allocations, and list is never nullptr. + * Increases the object size a bit. + */ + UChar32 stackList[INITIAL_CAPACITY]; + +public: + /** + * Determine if this object contains a valid set. + * A bogus set has no value. It is different from an empty set. + * It can be used to indicate that no set value is available. + * + * @return true if the set is bogus/invalid, false otherwise + * @see setToBogus() + * @stable ICU 4.0 + */ + inline UBool isBogus(void) const; + + /** + * Make this UnicodeSet object invalid. + * The string will test true with isBogus(). + * + * A bogus set has no value. It is different from an empty set. + * It can be used to indicate that no set value is available. + * + * This utility function is used throughout the UnicodeSet + * implementation to indicate that a UnicodeSet operation failed, + * and may be used in other functions, + * especially but not exclusively when such functions do not + * take a UErrorCode for simplicity. + * + * @see isBogus() + * @stable ICU 4.0 + */ + void setToBogus(); + +public: + + enum { + /** + * Minimum value that can be stored in a UnicodeSet. + * @stable ICU 2.4 + */ + MIN_VALUE = 0, + + /** + * Maximum value that can be stored in a UnicodeSet. + * @stable ICU 2.4 + */ + MAX_VALUE = 0x10ffff + }; + + //---------------------------------------------------------------- + // Constructors &c + //---------------------------------------------------------------- + +public: + + /** + * Constructs an empty set. + * @stable ICU 2.0 + */ + UnicodeSet(); + + /** + * Constructs a set containing the given range. If <code>end < + * start</code> then an empty set is created. + * + * @param start first character, inclusive, of range + * @param end last character, inclusive, of range + * @stable ICU 2.4 + */ + UnicodeSet(UChar32 start, UChar32 end); + +#ifndef U_HIDE_INTERNAL_API + /** + * @internal + */ + enum ESerialization { + kSerialized /* result of serialize() */ + }; + + /** + * Constructs a set from the output of serialize(). + * + * @param buffer the 16 bit array + * @param bufferLen the original length returned from serialize() + * @param serialization the value 'kSerialized' + * @param status error code + * + * @internal + */ + UnicodeSet(const uint16_t buffer[], int32_t bufferLen, + ESerialization serialization, UErrorCode &status); +#endif /* U_HIDE_INTERNAL_API */ + + /** + * Constructs a set from the given pattern. See the class + * description for the syntax of the pattern language. + * @param pattern a string specifying what characters are in the set + * @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern + * contains a syntax error. + * @stable ICU 2.0 + */ + UnicodeSet(const UnicodeString& pattern, + UErrorCode& status); + +#ifndef U_HIDE_INTERNAL_API + /** + * Constructs a set from the given pattern. See the class + * description for the syntax of the pattern language. + * @param pattern a string specifying what characters are in the set + * @param options bitmask for options to apply to the pattern. + * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE. + * @param symbols a symbol table mapping variable names to values + * and stand-in characters to UnicodeSets; may be NULL + * @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern + * contains a syntax error. + * @internal + */ + UnicodeSet(const UnicodeString& pattern, + uint32_t options, + const SymbolTable* symbols, + UErrorCode& status); +#endif /* U_HIDE_INTERNAL_API */ + + /** + * Constructs a set from the given pattern. See the class description + * for the syntax of the pattern language. + * @param pattern a string specifying what characters are in the set + * @param pos on input, the position in pattern at which to start parsing. + * On output, the position after the last character parsed. + * @param options bitmask for options to apply to the pattern. + * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE. + * @param symbols a symbol table mapping variable names to values + * and stand-in characters to UnicodeSets; may be NULL + * @param status input-output error code + * @stable ICU 2.8 + */ + UnicodeSet(const UnicodeString& pattern, ParsePosition& pos, + uint32_t options, + const SymbolTable* symbols, + UErrorCode& status); + + /** + * Constructs a set that is identical to the given UnicodeSet. + * @stable ICU 2.0 + */ + UnicodeSet(const UnicodeSet& o); + + /** + * Destructs the set. + * @stable ICU 2.0 + */ + virtual ~UnicodeSet(); + + /** + * Assigns this object to be a copy of another. + * A frozen set will not be modified. + * @stable ICU 2.0 + */ + UnicodeSet& operator=(const UnicodeSet& o); + + /** + * Compares the specified object with this set for equality. Returns + * <tt>true</tt> if the two sets + * have the same size, and every member of the specified set is + * contained in this set (or equivalently, every member of this set is + * contained in the specified set). + * + * @param o set to be compared for equality with this set. + * @return <tt>true</tt> if the specified set is equal to this set. + * @stable ICU 2.0 + */ + virtual UBool operator==(const UnicodeSet& o) const; + + /** + * Compares the specified object with this set for equality. Returns + * <tt>true</tt> if the specified set is not equal to this set. + * @stable ICU 2.0 + */ + inline UBool operator!=(const UnicodeSet& o) const; + + /** + * Returns a copy of this object. All UnicodeFunctor objects have + * to support cloning in order to allow classes using + * UnicodeFunctors, such as Transliterator, to implement cloning. + * If this set is frozen, then the clone will be frozen as well. + * Use cloneAsThawed() for a mutable clone of a frozen set. + * @see cloneAsThawed + * @stable ICU 2.0 + */ + virtual UnicodeSet* clone() const; + + /** + * Returns the hash code value for this set. + * + * @return the hash code value for this set. + * @see Object#hashCode() + * @stable ICU 2.0 + */ + virtual int32_t hashCode(void) const; + + /** + * Get a UnicodeSet pointer from a USet + * + * @param uset a USet (the ICU plain C type for UnicodeSet) + * @return the corresponding UnicodeSet pointer. + * + * @stable ICU 4.2 + */ + inline static UnicodeSet *fromUSet(USet *uset); + + /** + * Get a UnicodeSet pointer from a const USet + * + * @param uset a const USet (the ICU plain C type for UnicodeSet) + * @return the corresponding UnicodeSet pointer. + * + * @stable ICU 4.2 + */ + inline static const UnicodeSet *fromUSet(const USet *uset); + + /** + * Produce a USet * pointer for this UnicodeSet. + * USet is the plain C type for UnicodeSet + * + * @return a USet pointer for this UnicodeSet + * @stable ICU 4.2 + */ + inline USet *toUSet(); + + + /** + * Produce a const USet * pointer for this UnicodeSet. + * USet is the plain C type for UnicodeSet + * + * @return a const USet pointer for this UnicodeSet + * @stable ICU 4.2 + */ + inline const USet * toUSet() const; + + + //---------------------------------------------------------------- + // Freezable API + //---------------------------------------------------------------- + + /** + * Determines whether the set has been frozen (made immutable) or not. + * See the ICU4J Freezable interface for details. + * @return true/false for whether the set has been frozen + * @see freeze + * @see cloneAsThawed + * @stable ICU 3.8 + */ + inline UBool isFrozen() const; + + /** + * Freeze the set (make it immutable). + * Once frozen, it cannot be unfrozen and is therefore thread-safe + * until it is deleted. + * See the ICU4J Freezable interface for details. + * Freezing the set may also make some operations faster, for example + * contains() and span(). + * A frozen set will not be modified. (It remains frozen.) + * @return this set. + * @see isFrozen + * @see cloneAsThawed + * @stable ICU 3.8 + */ + UnicodeSet *freeze(); + + /** + * Clone the set and make the clone mutable. + * See the ICU4J Freezable interface for details. + * @return the mutable clone + * @see freeze + * @see isFrozen + * @stable ICU 3.8 + */ + UnicodeSet *cloneAsThawed() const; + + //---------------------------------------------------------------- + // Public API + //---------------------------------------------------------------- + + /** + * Make this object represent the range `start - end`. + * If `end > start` then this object is set to an empty range. + * A frozen set will not be modified. + * + * @param start first character in the set, inclusive + * @param end last character in the set, inclusive + * @stable ICU 2.4 + */ + UnicodeSet& set(UChar32 start, UChar32 end); + + /** + * Return true if the given position, in the given pattern, appears + * to be the start of a UnicodeSet pattern. + * @stable ICU 2.4 + */ + static UBool resemblesPattern(const UnicodeString& pattern, + int32_t pos); + + /** + * Modifies this set to represent the set specified by the given + * pattern, ignoring Unicode Pattern_White_Space characters. + * See the class description for the syntax of the pattern language. + * A frozen set will not be modified. + * @param pattern a string specifying what characters are in the set + * @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern + * contains a syntax error. + * <em> Empties the set passed before applying the pattern.</em> + * @return a reference to this + * @stable ICU 2.0 + */ + UnicodeSet& applyPattern(const UnicodeString& pattern, + UErrorCode& status); + +#ifndef U_HIDE_INTERNAL_API + /** + * Modifies this set to represent the set specified by the given + * pattern, optionally ignoring Unicode Pattern_White_Space characters. + * See the class description for the syntax of the pattern language. + * A frozen set will not be modified. + * @param pattern a string specifying what characters are in the set + * @param options bitmask for options to apply to the pattern. + * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE. + * @param symbols a symbol table mapping variable names to + * values and stand-ins to UnicodeSets; may be NULL + * @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern + * contains a syntax error. + *<em> Empties the set passed before applying the pattern.</em> + * @return a reference to this + * @internal + */ + UnicodeSet& applyPattern(const UnicodeString& pattern, + uint32_t options, + const SymbolTable* symbols, + UErrorCode& status); +#endif /* U_HIDE_INTERNAL_API */ + + /** + * Parses the given pattern, starting at the given position. The + * character at pattern.charAt(pos.getIndex()) must be '[', or the + * parse fails. Parsing continues until the corresponding closing + * ']'. If a syntax error is encountered between the opening and + * closing brace, the parse fails. Upon return from a successful + * parse, the ParsePosition is updated to point to the character + * following the closing ']', and a StringBuffer containing a + * pairs list for the parsed pattern is returned. This method calls + * itself recursively to parse embedded subpatterns. + *<em> Empties the set passed before applying the pattern.</em> + * A frozen set will not be modified. + * + * @param pattern the string containing the pattern to be parsed. + * The portion of the string from pos.getIndex(), which must be a + * '[', to the corresponding closing ']', is parsed. + * @param pos upon entry, the position at which to being parsing. + * The character at pattern.charAt(pos.getIndex()) must be a '['. + * Upon return from a successful parse, pos.getIndex() is either + * the character after the closing ']' of the parsed pattern, or + * pattern.length() if the closing ']' is the last character of + * the pattern string. + * @param options bitmask for options to apply to the pattern. + * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE. + * @param symbols a symbol table mapping variable names to + * values and stand-ins to UnicodeSets; may be NULL + * @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern + * contains a syntax error. + * @return a reference to this + * @stable ICU 2.8 + */ + UnicodeSet& applyPattern(const UnicodeString& pattern, + ParsePosition& pos, + uint32_t options, + const SymbolTable* symbols, + UErrorCode& status); + + /** + * Returns a string representation of this set. If the result of + * calling this function is passed to a UnicodeSet constructor, it + * will produce another set that is equal to this one. + * A frozen set will not be modified. + * @param result the string to receive the rules. Previous + * contents will be deleted. + * @param escapeUnprintable if true then convert unprintable + * character to their hex escape representations, \\uxxxx or + * \\Uxxxxxxxx. Unprintable characters are those other than + * U+000A, U+0020..U+007E. + * @stable ICU 2.0 + */ + virtual UnicodeString& toPattern(UnicodeString& result, + UBool escapeUnprintable = false) const; + + /** + * Modifies this set to contain those code points which have the given value + * for the given binary or enumerated property, as returned by + * u_getIntPropertyValue. Prior contents of this set are lost. + * A frozen set will not be modified. + * + * @param prop a property in the range UCHAR_BIN_START..UCHAR_BIN_LIMIT-1 + * or UCHAR_INT_START..UCHAR_INT_LIMIT-1 + * or UCHAR_MASK_START..UCHAR_MASK_LIMIT-1. + * + * @param value a value in the range u_getIntPropertyMinValue(prop).. + * u_getIntPropertyMaxValue(prop), with one exception. If prop is + * UCHAR_GENERAL_CATEGORY_MASK, then value should not be a UCharCategory, but + * rather a mask value produced by U_GET_GC_MASK(). This allows grouped + * categories such as [:L:] to be represented. + * + * @param ec error code input/output parameter + * + * @return a reference to this set + * + * @stable ICU 2.4 + */ + UnicodeSet& applyIntPropertyValue(UProperty prop, + int32_t value, + UErrorCode& ec); + + /** + * Modifies this set to contain those code points which have the + * given value for the given property. Prior contents of this + * set are lost. + * A frozen set will not be modified. + * + * @param prop a property alias, either short or long. The name is matched + * loosely. See PropertyAliases.txt for names and a description of loose + * matching. If the value string is empty, then this string is interpreted + * as either a General_Category value alias, a Script value alias, a binary + * property alias, or a special ID. Special IDs are matched loosely and + * correspond to the following sets: + * + * "ANY" = [\\u0000-\\U0010FFFF], + * "ASCII" = [\\u0000-\\u007F], + * "Assigned" = [:^Cn:]. + * + * @param value a value alias, either short or long. The name is matched + * loosely. See PropertyValueAliases.txt for names and a description of + * loose matching. In addition to aliases listed, numeric values and + * canonical combining classes may be expressed numerically, e.g., ("nv", + * "0.5") or ("ccc", "220"). The value string may also be empty. + * + * @param ec error code input/output parameter + * + * @return a reference to this set + * + * @stable ICU 2.4 + */ + UnicodeSet& applyPropertyAlias(const UnicodeString& prop, + const UnicodeString& value, + UErrorCode& ec); + + /** + * Returns the number of elements in this set (its cardinality). + * Note than the elements of a set may include both individual + * codepoints and strings. + * + * @return the number of elements in this set (its cardinality). + * @stable ICU 2.0 + */ + virtual int32_t size(void) const; + + /** + * Returns <tt>true</tt> if this set contains no elements. + * + * @return <tt>true</tt> if this set contains no elements. + * @stable ICU 2.0 + */ + virtual UBool isEmpty(void) const; + + /** + * Returns true if this set contains the given character. + * This function works faster with a frozen set. + * @param c character to be checked for containment + * @return true if the test condition is met + * @stable ICU 2.0 + */ + virtual UBool contains(UChar32 c) const; + + /** + * Returns true if this set contains every character + * of the given range. + * @param start first character, inclusive, of the range + * @param end last character, inclusive, of the range + * @return true if the test condition is met + * @stable ICU 2.0 + */ + virtual UBool contains(UChar32 start, UChar32 end) const; + + /** + * Returns <tt>true</tt> if this set contains the given + * multicharacter string. + * @param s string to be checked for containment + * @return <tt>true</tt> if this set contains the specified string + * @stable ICU 2.4 + */ + UBool contains(const UnicodeString& s) const; + + /** + * Returns true if this set contains all the characters and strings + * of the given set. + * @param c set to be checked for containment + * @return true if the test condition is met + * @stable ICU 2.4 + */ + virtual UBool containsAll(const UnicodeSet& c) const; + + /** + * Returns true if this set contains all the characters + * of the given string. + * @param s string containing characters to be checked for containment + * @return true if the test condition is met + * @stable ICU 2.4 + */ + UBool containsAll(const UnicodeString& s) const; + + /** + * Returns true if this set contains none of the characters + * of the given range. + * @param start first character, inclusive, of the range + * @param end last character, inclusive, of the range + * @return true if the test condition is met + * @stable ICU 2.4 + */ + UBool containsNone(UChar32 start, UChar32 end) const; + + /** + * Returns true if this set contains none of the characters and strings + * of the given set. + * @param c set to be checked for containment + * @return true if the test condition is met + * @stable ICU 2.4 + */ + UBool containsNone(const UnicodeSet& c) const; + + /** + * Returns true if this set contains none of the characters + * of the given string. + * @param s string containing characters to be checked for containment + * @return true if the test condition is met + * @stable ICU 2.4 + */ + UBool containsNone(const UnicodeString& s) const; + + /** + * Returns true if this set contains one or more of the characters + * in the given range. + * @param start first character, inclusive, of the range + * @param end last character, inclusive, of the range + * @return true if the condition is met + * @stable ICU 2.4 + */ + inline UBool containsSome(UChar32 start, UChar32 end) const; + + /** + * Returns true if this set contains one or more of the characters + * and strings of the given set. + * @param s The set to be checked for containment + * @return true if the condition is met + * @stable ICU 2.4 + */ + inline UBool containsSome(const UnicodeSet& s) const; + + /** + * Returns true if this set contains one or more of the characters + * of the given string. + * @param s string containing characters to be checked for containment + * @return true if the condition is met + * @stable ICU 2.4 + */ + inline UBool containsSome(const UnicodeString& s) const; + + /** + * Returns the length of the initial substring of the input string which + * consists only of characters and strings that are contained in this set + * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE), + * or only of characters and strings that are not contained + * in this set (USET_SPAN_NOT_CONTAINED). + * See USetSpanCondition for details. + * Similar to the strspn() C library function. + * Unpaired surrogates are treated according to contains() of their surrogate code points. + * This function works faster with a frozen set and with a non-negative string length argument. + * @param s start of the string + * @param length of the string; can be -1 for NUL-terminated + * @param spanCondition specifies the containment condition + * @return the length of the initial substring according to the spanCondition; + * 0 if the start of the string does not fit the spanCondition + * @stable ICU 3.8 + * @see USetSpanCondition + */ + int32_t span(const char16_t *s, int32_t length, USetSpanCondition spanCondition) const; + + /** + * Returns the end of the substring of the input string according to the USetSpanCondition. + * Same as <code>start+span(s.getBuffer()+start, s.length()-start, spanCondition)</code> + * after pinning start to 0<=start<=s.length(). + * @param s the string + * @param start the start index in the string for the span operation + * @param spanCondition specifies the containment condition + * @return the exclusive end of the substring according to the spanCondition; + * the substring s.tempSubStringBetween(start, end) fulfills the spanCondition + * @stable ICU 4.4 + * @see USetSpanCondition + */ + inline int32_t span(const UnicodeString &s, int32_t start, USetSpanCondition spanCondition) const; + + /** + * Returns the start of the trailing substring of the input string which + * consists only of characters and strings that are contained in this set + * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE), + * or only of characters and strings that are not contained + * in this set (USET_SPAN_NOT_CONTAINED). + * See USetSpanCondition for details. + * Unpaired surrogates are treated according to contains() of their surrogate code points. + * This function works faster with a frozen set and with a non-negative string length argument. + * @param s start of the string + * @param length of the string; can be -1 for NUL-terminated + * @param spanCondition specifies the containment condition + * @return the start of the trailing substring according to the spanCondition; + * the string length if the end of the string does not fit the spanCondition + * @stable ICU 3.8 + * @see USetSpanCondition + */ + int32_t spanBack(const char16_t *s, int32_t length, USetSpanCondition spanCondition) const; + + /** + * Returns the start of the substring of the input string according to the USetSpanCondition. + * Same as <code>spanBack(s.getBuffer(), limit, spanCondition)</code> + * after pinning limit to 0<=end<=s.length(). + * @param s the string + * @param limit the exclusive-end index in the string for the span operation + * (use s.length() or INT32_MAX for spanning back from the end of the string) + * @param spanCondition specifies the containment condition + * @return the start of the substring according to the spanCondition; + * the substring s.tempSubStringBetween(start, limit) fulfills the spanCondition + * @stable ICU 4.4 + * @see USetSpanCondition + */ + inline int32_t spanBack(const UnicodeString &s, int32_t limit, USetSpanCondition spanCondition) const; + + /** + * Returns the length of the initial substring of the input string which + * consists only of characters and strings that are contained in this set + * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE), + * or only of characters and strings that are not contained + * in this set (USET_SPAN_NOT_CONTAINED). + * See USetSpanCondition for details. + * Similar to the strspn() C library function. + * Malformed byte sequences are treated according to contains(0xfffd). + * This function works faster with a frozen set and with a non-negative string length argument. + * @param s start of the string (UTF-8) + * @param length of the string; can be -1 for NUL-terminated + * @param spanCondition specifies the containment condition + * @return the length of the initial substring according to the spanCondition; + * 0 if the start of the string does not fit the spanCondition + * @stable ICU 3.8 + * @see USetSpanCondition + */ + int32_t spanUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const; + + /** + * Returns the start of the trailing substring of the input string which + * consists only of characters and strings that are contained in this set + * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE), + * or only of characters and strings that are not contained + * in this set (USET_SPAN_NOT_CONTAINED). + * See USetSpanCondition for details. + * Malformed byte sequences are treated according to contains(0xfffd). + * This function works faster with a frozen set and with a non-negative string length argument. + * @param s start of the string (UTF-8) + * @param length of the string; can be -1 for NUL-terminated + * @param spanCondition specifies the containment condition + * @return the start of the trailing substring according to the spanCondition; + * the string length if the end of the string does not fit the spanCondition + * @stable ICU 3.8 + * @see USetSpanCondition + */ + int32_t spanBackUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const; + + /** + * Implement UnicodeMatcher::matches() + * @stable ICU 2.4 + */ + virtual UMatchDegree matches(const Replaceable& text, + int32_t& offset, + int32_t limit, + UBool incremental); + +private: + /** + * Returns the longest match for s in text at the given position. + * If limit > start then match forward from start+1 to limit + * matching all characters except s.charAt(0). If limit < start, + * go backward starting from start-1 matching all characters + * except s.charAt(s.length()-1). This method assumes that the + * first character, text.charAt(start), matches s, so it does not + * check it. + * @param text the text to match + * @param start the first character to match. In the forward + * direction, text.charAt(start) is matched against s.charAt(0). + * In the reverse direction, it is matched against + * s.charAt(s.length()-1). + * @param limit the limit offset for matching, either last+1 in + * the forward direction, or last-1 in the reverse direction, + * where last is the index of the last character to match. + * @param s + * @return If part of s matches up to the limit, return |limit - + * start|. If all of s matches before reaching the limit, return + * s.length(). If there is a mismatch between s and text, return + * 0 + */ + static int32_t matchRest(const Replaceable& text, + int32_t start, int32_t limit, + const UnicodeString& s); + + /** + * Returns the smallest value i such that c < list[i]. Caller + * must ensure that c is a legal value or this method will enter + * an infinite loop. This method performs a binary search. + * @param c a character in the range MIN_VALUE..MAX_VALUE + * inclusive + * @return the smallest integer i in the range 0..len-1, + * inclusive, such that c < list[i] + */ + int32_t findCodePoint(UChar32 c) const; + +public: + + /** + * Implementation of UnicodeMatcher API. Union the set of all + * characters that may be matched by this object into the given + * set. + * @param toUnionTo the set into which to union the source characters + * @stable ICU 2.4 + */ + virtual void addMatchSetTo(UnicodeSet& toUnionTo) const; + + /** + * Returns the index of the given character within this set, where + * the set is ordered by ascending code point. If the character + * is not in this set, return -1. The inverse of this method is + * <code>charAt()</code>. + * @return an index from 0..size()-1, or -1 + * @stable ICU 2.4 + */ + int32_t indexOf(UChar32 c) const; + + /** + * Returns the character at the given index within this set, where + * the set is ordered by ascending code point. If the index is + * out of range, return (UChar32)-1. The inverse of this method is + * <code>indexOf()</code>. + * @param index an index from 0..size()-1 + * @return the character at the given index, or (UChar32)-1. + * @stable ICU 2.4 + */ + UChar32 charAt(int32_t index) const; + + /** + * Adds the specified range to this set if it is not already + * present. If this set already contains the specified range, + * the call leaves this set unchanged. If <code>end > start</code> + * then an empty range is added, leaving the set unchanged. + * This is equivalent to a boolean logic OR, or a set UNION. + * A frozen set will not be modified. + * + * @param start first character, inclusive, of range to be added + * to this set. + * @param end last character, inclusive, of range to be added + * to this set. + * @stable ICU 2.0 + */ + virtual UnicodeSet& add(UChar32 start, UChar32 end); + + /** + * Adds the specified character to this set if it is not already + * present. If this set already contains the specified character, + * the call leaves this set unchanged. + * A frozen set will not be modified. + * @stable ICU 2.0 + */ + UnicodeSet& add(UChar32 c); + + /** + * Adds the specified multicharacter to this set if it is not already + * present. If this set already contains the multicharacter, + * the call leaves this set unchanged. + * Thus "ch" => {"ch"} + * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b> + * A frozen set will not be modified. + * @param s the source string + * @return this object, for chaining + * @stable ICU 2.4 + */ + UnicodeSet& add(const UnicodeString& s); + + private: + /** + * @return a code point IF the string consists of a single one. + * otherwise returns -1. + * @param s string to test + */ + static int32_t getSingleCP(const UnicodeString& s); + + void _add(const UnicodeString& s); + + public: + /** + * Adds each of the characters in this string to the set. Thus "ch" => {"c", "h"} + * If this set already any particular character, it has no effect on that character. + * A frozen set will not be modified. + * @param s the source string + * @return this object, for chaining + * @stable ICU 2.4 + */ + UnicodeSet& addAll(const UnicodeString& s); + + /** + * Retains EACH of the characters in this string. Note: "ch" == {"c", "h"} + * If this set already any particular character, it has no effect on that character. + * A frozen set will not be modified. + * @param s the source string + * @return this object, for chaining + * @stable ICU 2.4 + */ + UnicodeSet& retainAll(const UnicodeString& s); + + /** + * Complement EACH of the characters in this string. Note: "ch" == {"c", "h"} + * If this set already any particular character, it has no effect on that character. + * A frozen set will not be modified. + * @param s the source string + * @return this object, for chaining + * @stable ICU 2.4 + */ + UnicodeSet& complementAll(const UnicodeString& s); + + /** + * Remove EACH of the characters in this string. Note: "ch" == {"c", "h"} + * If this set already any particular character, it has no effect on that character. + * A frozen set will not be modified. + * @param s the source string + * @return this object, for chaining + * @stable ICU 2.4 + */ + UnicodeSet& removeAll(const UnicodeString& s); + + /** + * Makes a set from a multicharacter string. Thus "ch" => {"ch"} + * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b> + * @param s the source string + * @return a newly created set containing the given string. + * The caller owns the return object and is responsible for deleting it. + * @stable ICU 2.4 + */ + static UnicodeSet* U_EXPORT2 createFrom(const UnicodeString& s); + + + /** + * Makes a set from each of the characters in the string. Thus "ch" => {"c", "h"} + * @param s the source string + * @return a newly created set containing the given characters + * The caller owns the return object and is responsible for deleting it. + * @stable ICU 2.4 + */ + static UnicodeSet* U_EXPORT2 createFromAll(const UnicodeString& s); + + /** + * Retain only the elements in this set that are contained in the + * specified range. If <code>end > start</code> then an empty range is + * retained, leaving the set empty. This is equivalent to + * a boolean logic AND, or a set INTERSECTION. + * A frozen set will not be modified. + * + * @param start first character, inclusive, of range to be retained + * to this set. + * @param end last character, inclusive, of range to be retained + * to this set. + * @stable ICU 2.0 + */ + virtual UnicodeSet& retain(UChar32 start, UChar32 end); + + + /** + * Retain the specified character from this set if it is present. + * A frozen set will not be modified. + * @stable ICU 2.0 + */ + UnicodeSet& retain(UChar32 c); + + /** + * Removes the specified range from this set if it is present. + * The set will not contain the specified range once the call + * returns. If <code>end > start</code> then an empty range is + * removed, leaving the set unchanged. + * A frozen set will not be modified. + * + * @param start first character, inclusive, of range to be removed + * from this set. + * @param end last character, inclusive, of range to be removed + * from this set. + * @stable ICU 2.0 + */ + virtual UnicodeSet& remove(UChar32 start, UChar32 end); + + /** + * Removes the specified character from this set if it is present. + * The set will not contain the specified range once the call + * returns. + * A frozen set will not be modified. + * @stable ICU 2.0 + */ + UnicodeSet& remove(UChar32 c); + + /** + * Removes the specified string from this set if it is present. + * The set will not contain the specified character once the call + * returns. + * A frozen set will not be modified. + * @param s the source string + * @return this object, for chaining + * @stable ICU 2.4 + */ + UnicodeSet& remove(const UnicodeString& s); + + /** + * Inverts this set. This operation modifies this set so that + * its value is its complement. This is equivalent to + * <code>complement(MIN_VALUE, MAX_VALUE)</code>. + * A frozen set will not be modified. + * @stable ICU 2.0 + */ + virtual UnicodeSet& complement(void); + + /** + * Complements the specified range in this set. Any character in + * the range will be removed if it is in this set, or will be + * added if it is not in this set. If <code>end > start</code> + * then an empty range is complemented, leaving the set unchanged. + * This is equivalent to a boolean logic XOR. + * A frozen set will not be modified. + * + * @param start first character, inclusive, of range to be removed + * from this set. + * @param end last character, inclusive, of range to be removed + * from this set. + * @stable ICU 2.0 + */ + virtual UnicodeSet& complement(UChar32 start, UChar32 end); + + /** + * Complements the specified character in this set. The character + * will be removed if it is in this set, or will be added if it is + * not in this set. + * A frozen set will not be modified. + * @stable ICU 2.0 + */ + UnicodeSet& complement(UChar32 c); + + /** + * Complement the specified string in this set. + * The set will not contain the specified string once the call + * returns. + * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b> + * A frozen set will not be modified. + * @param s the string to complement + * @return this object, for chaining + * @stable ICU 2.4 + */ + UnicodeSet& complement(const UnicodeString& s); + + /** + * Adds all of the elements in the specified set to this set if + * they're not already present. This operation effectively + * modifies this set so that its value is the <i>union</i> of the two + * sets. The behavior of this operation is unspecified if the specified + * collection is modified while the operation is in progress. + * A frozen set will not be modified. + * + * @param c set whose elements are to be added to this set. + * @see #add(UChar32, UChar32) + * @stable ICU 2.0 + */ + virtual UnicodeSet& addAll(const UnicodeSet& c); + + /** + * Retains only the elements in this set that are contained in the + * specified set. In other words, removes from this set all of + * its elements that are not contained in the specified set. This + * operation effectively modifies this set so that its value is + * the <i>intersection</i> of the two sets. + * A frozen set will not be modified. + * + * @param c set that defines which elements this set will retain. + * @stable ICU 2.0 + */ + virtual UnicodeSet& retainAll(const UnicodeSet& c); + + /** + * Removes from this set all of its elements that are contained in the + * specified set. This operation effectively modifies this + * set so that its value is the <i>asymmetric set difference</i> of + * the two sets. + * A frozen set will not be modified. + * + * @param c set that defines which elements will be removed from + * this set. + * @stable ICU 2.0 + */ + virtual UnicodeSet& removeAll(const UnicodeSet& c); + + /** + * Complements in this set all elements contained in the specified + * set. Any character in the other set will be removed if it is + * in this set, or will be added if it is not in this set. + * A frozen set will not be modified. + * + * @param c set that defines which elements will be xor'ed from + * this set. + * @stable ICU 2.4 + */ + virtual UnicodeSet& complementAll(const UnicodeSet& c); + + /** + * Removes all of the elements from this set. This set will be + * empty after this call returns. + * A frozen set will not be modified. + * @stable ICU 2.0 + */ + virtual UnicodeSet& clear(void); + + /** + * Close this set over the given attribute. For the attribute + * USET_CASE, the result is to modify this set so that: + * + * 1. For each character or string 'a' in this set, all strings or + * characters 'b' such that foldCase(a) == foldCase(b) are added + * to this set. + * + * 2. For each string 'e' in the resulting set, if e != + * foldCase(e), 'e' will be removed. + * + * Example: [aq\\u00DF{Bc}{bC}{Fi}] => [aAqQ\\u00DF\\uFB01{ss}{bc}{fi}] + * + * (Here foldCase(x) refers to the operation u_strFoldCase, and a + * == b denotes that the contents are the same, not pointer + * comparison.) + * + * A frozen set will not be modified. + * + * @param attribute bitmask for attributes to close over. + * Currently only the USET_CASE bit is supported. Any undefined bits + * are ignored. + * @return a reference to this set. + * @stable ICU 4.2 + */ + UnicodeSet& closeOver(int32_t attribute); + + /** + * Remove all strings from this set. + * + * @return a reference to this set. + * @stable ICU 4.2 + */ + virtual UnicodeSet &removeAllStrings(); + + /** + * Iteration method that returns the number of ranges contained in + * this set. + * @see #getRangeStart + * @see #getRangeEnd + * @stable ICU 2.4 + */ + virtual int32_t getRangeCount(void) const; + + /** + * Iteration method that returns the first character in the + * specified range of this set. + * @see #getRangeCount + * @see #getRangeEnd + * @stable ICU 2.4 + */ + virtual UChar32 getRangeStart(int32_t index) const; + + /** + * Iteration method that returns the last character in the + * specified range of this set. + * @see #getRangeStart + * @see #getRangeEnd + * @stable ICU 2.4 + */ + virtual UChar32 getRangeEnd(int32_t index) const; + + /** + * Serializes this set into an array of 16-bit integers. Serialization + * (currently) only records the characters in the set; multicharacter + * strings are ignored. + * + * The array has following format (each line is one 16-bit + * integer): + * + * length = (n+2*m) | (m!=0?0x8000:0) + * bmpLength = n; present if m!=0 + * bmp[0] + * bmp[1] + * ... + * bmp[n-1] + * supp-high[0] + * supp-low[0] + * supp-high[1] + * supp-low[1] + * ... + * supp-high[m-1] + * supp-low[m-1] + * + * The array starts with a header. After the header are n bmp + * code points, then m supplementary code points. Either n or m + * or both may be zero. n+2*m is always <= 0x7FFF. + * + * If there are no supplementary characters (if m==0) then the + * header is one 16-bit integer, 'length', with value n. + * + * If there are supplementary characters (if m!=0) then the header + * is two 16-bit integers. The first, 'length', has value + * (n+2*m)|0x8000. The second, 'bmpLength', has value n. + * + * After the header the code points are stored in ascending order. + * Supplementary code points are stored as most significant 16 + * bits followed by least significant 16 bits. + * + * @param dest pointer to buffer of destCapacity 16-bit integers. + * May be NULL only if destCapacity is zero. + * @param destCapacity size of dest, or zero. Must not be negative. + * @param ec error code. Will be set to U_INDEX_OUTOFBOUNDS_ERROR + * if n+2*m > 0x7FFF. Will be set to U_BUFFER_OVERFLOW_ERROR if + * n+2*m+(m!=0?2:1) > destCapacity. + * @return the total length of the serialized format, including + * the header, that is, n+2*m+(m!=0?2:1), or 0 on error other + * than U_BUFFER_OVERFLOW_ERROR. + * @stable ICU 2.4 + */ + int32_t serialize(uint16_t *dest, int32_t destCapacity, UErrorCode& ec) const; + + /** + * Reallocate this objects internal structures to take up the least + * possible space, without changing this object's value. + * A frozen set will not be modified. + * @stable ICU 2.4 + */ + virtual UnicodeSet& compact(); + + /** + * Return the class ID for this class. This is useful only for + * comparing to a return value from getDynamicClassID(). For example: + * <pre> + * . Base* polymorphic_pointer = createPolymorphicObject(); + * . if (polymorphic_pointer->getDynamicClassID() == + * . Derived::getStaticClassID()) ... + * </pre> + * @return The class ID for all objects of this class. + * @stable ICU 2.0 + */ + static UClassID U_EXPORT2 getStaticClassID(void); + + /** + * Implement UnicodeFunctor API. + * + * @return The class ID for this object. All objects of a given + * class have the same class ID. Objects of other classes have + * different class IDs. + * @stable ICU 2.4 + */ + virtual UClassID getDynamicClassID(void) const; + +private: + + // Private API for the USet API + + friend class USetAccess; + + const UnicodeString* getString(int32_t index) const; + + //---------------------------------------------------------------- + // RuleBasedTransliterator support + //---------------------------------------------------------------- + +private: + + /** + * Returns <tt>true</tt> if this set contains any character whose low byte + * is the given value. This is used by <tt>RuleBasedTransliterator</tt> for + * indexing. + */ + virtual UBool matchesIndexValue(uint8_t v) const; + +private: + friend class RBBIRuleScanner; + + //---------------------------------------------------------------- + // Implementation: Clone as thawed (see ICU4J Freezable) + //---------------------------------------------------------------- + + UnicodeSet(const UnicodeSet& o, UBool /* asThawed */); + UnicodeSet& copyFrom(const UnicodeSet& o, UBool asThawed); + + //---------------------------------------------------------------- + // Implementation: Pattern parsing + //---------------------------------------------------------------- + + void applyPatternIgnoreSpace(const UnicodeString& pattern, + ParsePosition& pos, + const SymbolTable* symbols, + UErrorCode& status); + + void applyPattern(RuleCharacterIterator& chars, + const SymbolTable* symbols, + UnicodeString& rebuiltPat, + uint32_t options, + UnicodeSet& (UnicodeSet::*caseClosure)(int32_t attribute), + int32_t depth, + UErrorCode& ec); + + //---------------------------------------------------------------- + // Implementation: Utility methods + //---------------------------------------------------------------- + + static int32_t nextCapacity(int32_t minCapacity); + + bool ensureCapacity(int32_t newLen); + + bool ensureBufferCapacity(int32_t newLen); + + void swapBuffers(void); + + UBool allocateStrings(UErrorCode &status); + UBool hasStrings() const; + int32_t stringsSize() const; + UBool stringsContains(const UnicodeString &s) const; + + UnicodeString& _toPattern(UnicodeString& result, + UBool escapeUnprintable) const; + + UnicodeString& _generatePattern(UnicodeString& result, + UBool escapeUnprintable) const; + + static void _appendToPat(UnicodeString& buf, const UnicodeString& s, UBool escapeUnprintable); + + static void _appendToPat(UnicodeString& buf, UChar32 c, UBool escapeUnprintable); + + //---------------------------------------------------------------- + // Implementation: Fundamental operators + //---------------------------------------------------------------- + + void exclusiveOr(const UChar32* other, int32_t otherLen, int8_t polarity); + + void add(const UChar32* other, int32_t otherLen, int8_t polarity); + + void retain(const UChar32* other, int32_t otherLen, int8_t polarity); + + /** + * Return true if the given position, in the given pattern, appears + * to be the start of a property set pattern [:foo:], \\p{foo}, or + * \\P{foo}, or \\N{name}. + */ + static UBool resemblesPropertyPattern(const UnicodeString& pattern, + int32_t pos); + + static UBool resemblesPropertyPattern(RuleCharacterIterator& chars, + int32_t iterOpts); + + /** + * Parse the given property pattern at the given parse position + * and set this UnicodeSet to the result. + * + * The original design document is out of date, but still useful. + * Ignore the property and value names: + * http://source.icu-project.org/repos/icu/icuhtml/trunk/design/unicodeset_properties.html + * + * Recognized syntax: + * + * [:foo:] [:^foo:] - white space not allowed within "[:" or ":]" + * \\p{foo} \\P{foo} - white space not allowed within "\\p" or "\\P" + * \\N{name} - white space not allowed within "\\N" + * + * Other than the above restrictions, Unicode Pattern_White_Space characters are ignored. + * Case is ignored except in "\\p" and "\\P" and "\\N". In 'name' leading + * and trailing space is deleted, and internal runs of whitespace + * are collapsed to a single space. + * + * We support binary properties, enumerated properties, and the + * following non-enumerated properties: + * + * Numeric_Value + * Name + * Unicode_1_Name + * + * @param pattern the pattern string + * @param ppos on entry, the position at which to begin parsing. + * This should be one of the locations marked '^': + * + * [:blah:] \\p{blah} \\P{blah} \\N{name} + * ^ % ^ % ^ % ^ % + * + * On return, the position after the last character parsed, that is, + * the locations marked '%'. If the parse fails, ppos is returned + * unchanged. + * @param ec status + * @return a reference to this. + */ + UnicodeSet& applyPropertyPattern(const UnicodeString& pattern, + ParsePosition& ppos, + UErrorCode &ec); + + void applyPropertyPattern(RuleCharacterIterator& chars, + UnicodeString& rebuiltPat, + UErrorCode& ec); + + static const UnicodeSet* getInclusions(int32_t src, UErrorCode &status); + + /** + * A filter that returns true if the given code point should be + * included in the UnicodeSet being constructed. + */ + typedef UBool (*Filter)(UChar32 codePoint, void* context); + + /** + * Given a filter, set this UnicodeSet to the code points + * contained by that filter. The filter MUST be + * property-conformant. That is, if it returns value v for one + * code point, then it must return v for all affiliated code + * points, as defined by the inclusions list. See + * getInclusions(). + * src is a UPropertySource value. + */ + void applyFilter(Filter filter, + void* context, + const UnicodeSet* inclusions, + UErrorCode &status); + + // UCPMap is now stable ICU 63 + void applyIntPropertyValue(const UCPMap *map, + UCPMapValueFilter *filter, const void *context, + UErrorCode &errorCode); + + /** + * Set the new pattern to cache. + */ + void setPattern(const UnicodeString& newPat) { + setPattern(newPat.getBuffer(), newPat.length()); + } + void setPattern(const char16_t *newPat, int32_t newPatLen); + /** + * Release existing cached pattern. + */ + void releasePattern(); + + friend class UnicodeSetIterator; +}; + + + +inline UBool UnicodeSet::operator!=(const UnicodeSet& o) const { + return !operator==(o); +} + +inline UBool UnicodeSet::isFrozen() const { + return (UBool)(bmpSet!=NULL || stringSpan!=NULL); +} + +inline UBool UnicodeSet::containsSome(UChar32 start, UChar32 end) const { + return !containsNone(start, end); +} + +inline UBool UnicodeSet::containsSome(const UnicodeSet& s) const { + return !containsNone(s); +} + +inline UBool UnicodeSet::containsSome(const UnicodeString& s) const { + return !containsNone(s); +} + +inline UBool UnicodeSet::isBogus() const { + return (UBool)(fFlags & kIsBogus); +} + +inline UnicodeSet *UnicodeSet::fromUSet(USet *uset) { + return reinterpret_cast<UnicodeSet *>(uset); +} + +inline const UnicodeSet *UnicodeSet::fromUSet(const USet *uset) { + return reinterpret_cast<const UnicodeSet *>(uset); +} + +inline USet *UnicodeSet::toUSet() { + return reinterpret_cast<USet *>(this); +} + +inline const USet *UnicodeSet::toUSet() const { + return reinterpret_cast<const USet *>(this); +} + +inline int32_t UnicodeSet::span(const UnicodeString &s, int32_t start, USetSpanCondition spanCondition) const { + int32_t sLength=s.length(); + if(start<0) { + start=0; + } else if(start>sLength) { + start=sLength; + } + return start+span(s.getBuffer()+start, sLength-start, spanCondition); +} + +inline int32_t UnicodeSet::spanBack(const UnicodeString &s, int32_t limit, USetSpanCondition spanCondition) const { + int32_t sLength=s.length(); + if(limit<0) { + limit=0; + } else if(limit>sLength) { + limit=sLength; + } + return spanBack(s.getBuffer(), limit, spanCondition); +} + +U_NAMESPACE_END + +#endif /* U_SHOW_CPLUSPLUS_API */ + +#endif diff --git a/thirdparty/icu4c/common/unicode/unistr.h b/thirdparty/icu4c/common/unicode/unistr.h new file mode 100644 index 0000000000..456389f265 --- /dev/null +++ b/thirdparty/icu4c/common/unicode/unistr.h @@ -0,0 +1,4757 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 1998-2016, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* +* File unistr.h +* +* Modification History: +* +* Date Name Description +* 09/25/98 stephen Creation. +* 11/11/98 stephen Changed per 11/9 code review. +* 04/20/99 stephen Overhauled per 4/16 code review. +* 11/18/99 aliu Made to inherit from Replaceable. Added method +* handleReplaceBetween(); other methods unchanged. +* 06/25/01 grhoten Remove dependency on iostream. +****************************************************************************** +*/ + +#ifndef UNISTR_H +#define UNISTR_H + +/** + * \file + * \brief C++ API: Unicode String + */ + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + +#include <cstddef> +#include "unicode/char16ptr.h" +#include "unicode/rep.h" +#include "unicode/std_string.h" +#include "unicode/stringpiece.h" +#include "unicode/bytestream.h" + +struct UConverter; // unicode/ucnv.h + +#ifndef USTRING_H +/** + * \ingroup ustring_ustrlen + */ +U_CAPI int32_t U_EXPORT2 +u_strlen(const UChar *s); +#endif + +U_NAMESPACE_BEGIN + +#if !UCONFIG_NO_BREAK_ITERATION +class BreakIterator; // unicode/brkiter.h +#endif +class Edits; + +U_NAMESPACE_END + +// Not #ifndef U_HIDE_INTERNAL_API because UnicodeString needs the UStringCaseMapper. +/** + * Internal string case mapping function type. + * All error checking must be done. + * src and dest must not overlap. + * @internal + */ +typedef int32_t U_CALLCONV +UStringCaseMapper(int32_t caseLocale, uint32_t options, +#if !UCONFIG_NO_BREAK_ITERATION + icu::BreakIterator *iter, +#endif + char16_t *dest, int32_t destCapacity, + const char16_t *src, int32_t srcLength, + icu::Edits *edits, + UErrorCode &errorCode); + +U_NAMESPACE_BEGIN + +class Locale; // unicode/locid.h +class StringCharacterIterator; +class UnicodeStringAppendable; // unicode/appendable.h + +/* The <iostream> include has been moved to unicode/ustream.h */ + +/** + * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor + * which constructs a Unicode string from an invariant-character char * string. + * About invariant characters see utypes.h. + * This constructor has no runtime dependency on conversion code and is + * therefore recommended over ones taking a charset name string + * (where the empty string "" indicates invariant-character conversion). + * + * @stable ICU 3.2 + */ +#define US_INV icu::UnicodeString::kInvariant + +/** + * Unicode String literals in C++. + * + * Note: these macros are not recommended for new code. + * Prior to the availability of C++11 and u"unicode string literals", + * these macros were provided for portability and efficiency when + * initializing UnicodeStrings from literals. + * + * They work only for strings that contain "invariant characters", i.e., + * only latin letters, digits, and some punctuation. + * See utypes.h for details. + * + * The string parameter must be a C string literal. + * The length of the string, not including the terminating + * `NUL`, must be specified as a constant. + * @stable ICU 2.0 + */ +#if !U_CHAR16_IS_TYPEDEF +# define UNICODE_STRING(cs, _length) icu::UnicodeString(true, u ## cs, _length) +#else +# define UNICODE_STRING(cs, _length) icu::UnicodeString(true, (const char16_t*)u ## cs, _length) +#endif + +/** + * Unicode String literals in C++. + * Dependent on the platform properties, different UnicodeString + * constructors should be used to create a UnicodeString object from + * a string literal. + * The macros are defined for improved performance. + * They work only for strings that contain "invariant characters", i.e., + * only latin letters, digits, and some punctuation. + * See utypes.h for details. + * + * The string parameter must be a C string literal. + * @stable ICU 2.0 + */ +#define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1) + +/** + * \def UNISTR_FROM_CHAR_EXPLICIT + * This can be defined to be empty or "explicit". + * If explicit, then the UnicodeString(char16_t) and UnicodeString(UChar32) + * constructors are marked as explicit, preventing their inadvertent use. + * @stable ICU 49 + */ +#ifndef UNISTR_FROM_CHAR_EXPLICIT +# if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) + // Auto-"explicit" in ICU library code. +# define UNISTR_FROM_CHAR_EXPLICIT explicit +# else + // Empty by default for source code compatibility. +# define UNISTR_FROM_CHAR_EXPLICIT +# endif +#endif + +/** + * \def UNISTR_FROM_STRING_EXPLICIT + * This can be defined to be empty or "explicit". + * If explicit, then the UnicodeString(const char *) and UnicodeString(const char16_t *) + * constructors are marked as explicit, preventing their inadvertent use. + * + * In particular, this helps prevent accidentally depending on ICU conversion code + * by passing a string literal into an API with a const UnicodeString & parameter. + * @stable ICU 49 + */ +#ifndef UNISTR_FROM_STRING_EXPLICIT +# if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) + // Auto-"explicit" in ICU library code. +# define UNISTR_FROM_STRING_EXPLICIT explicit +# else + // Empty by default for source code compatibility. +# define UNISTR_FROM_STRING_EXPLICIT +# endif +#endif + +/** + * \def UNISTR_OBJECT_SIZE + * Desired sizeof(UnicodeString) in bytes. + * It should be a multiple of sizeof(pointer) to avoid unusable space for padding. + * The object size may want to be a multiple of 16 bytes, + * which is a common granularity for heap allocation. + * + * Any space inside the object beyond sizeof(vtable pointer) + 2 + * is available for storing short strings inside the object. + * The bigger the object, the longer a string that can be stored inside the object, + * without additional heap allocation. + * + * Depending on a platform's pointer size, pointer alignment requirements, + * and struct padding, the compiler will usually round up sizeof(UnicodeString) + * to 4 * sizeof(pointer) (or 3 * sizeof(pointer) for P128 data models), + * to hold the fields for heap-allocated strings. + * Such a minimum size also ensures that the object is easily large enough + * to hold at least 2 char16_ts, for one supplementary code point (U16_MAX_LENGTH). + * + * sizeof(UnicodeString) >= 48 should work for all known platforms. + * + * For example, on a 64-bit machine where sizeof(vtable pointer) is 8, + * sizeof(UnicodeString) = 64 would leave space for + * (64 - sizeof(vtable pointer) - 2) / U_SIZEOF_UCHAR = (64 - 8 - 2) / 2 = 27 + * char16_ts stored inside the object. + * + * The minimum object size on a 64-bit machine would be + * 4 * sizeof(pointer) = 4 * 8 = 32 bytes, + * and the internal buffer would hold up to 11 char16_ts in that case. + * + * @see U16_MAX_LENGTH + * @stable ICU 56 + */ +#ifndef UNISTR_OBJECT_SIZE +# define UNISTR_OBJECT_SIZE 64 +#endif + +/** + * UnicodeString is a string class that stores Unicode characters directly and provides + * similar functionality as the Java String and StringBuffer/StringBuilder classes. + * It is a concrete implementation of the abstract class Replaceable (for transliteration). + * + * The UnicodeString equivalent of std::string’s clear() is remove(). + * + * A UnicodeString may "alias" an external array of characters + * (that is, point to it, rather than own the array) + * whose lifetime must then at least match the lifetime of the aliasing object. + * This aliasing may be preserved when returning a UnicodeString by value, + * depending on the compiler and the function implementation, + * via Return Value Optimization (RVO) or the move assignment operator. + * (However, the copy assignment operator does not preserve aliasing.) + * For details see the description of storage models at the end of the class API docs + * and in the User Guide chapter linked from there. + * + * The UnicodeString class is not suitable for subclassing. + * + * For an overview of Unicode strings in C and C++ see the + * [User Guide Strings chapter](https://unicode-org.github.io/icu/userguide/strings#strings-in-cc). + * + * In ICU, a Unicode string consists of 16-bit Unicode *code units*. + * A Unicode character may be stored with either one code unit + * (the most common case) or with a matched pair of special code units + * ("surrogates"). The data type for code units is char16_t. + * For single-character handling, a Unicode character code *point* is a value + * in the range 0..0x10ffff. ICU uses the UChar32 type for code points. + * + * Indexes and offsets into and lengths of strings always count code units, not code points. + * This is the same as with multi-byte char* strings in traditional string handling. + * Operations on partial strings typically do not test for code point boundaries. + * If necessary, the user needs to take care of such boundaries by testing for the code unit + * values or by using functions like + * UnicodeString::getChar32Start() and UnicodeString::getChar32Limit() + * (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), see utf.h). + * + * UnicodeString methods are more lenient with regard to input parameter values + * than other ICU APIs. In particular: + * - If indexes are out of bounds for a UnicodeString object + * (< 0 or > length()) then they are "pinned" to the nearest boundary. + * - If the buffer passed to an insert/append/replace operation is owned by the + * target object, e.g., calling str.append(str), an extra copy may take place + * to ensure safety. + * - If primitive string pointer values (e.g., const char16_t * or char *) + * for input strings are NULL, then those input string parameters are treated + * as if they pointed to an empty string. + * However, this is *not* the case for char * parameters for charset names + * or other IDs. + * - Most UnicodeString methods do not take a UErrorCode parameter because + * there are usually very few opportunities for failure other than a shortage + * of memory, error codes in low-level C++ string methods would be inconvenient, + * and the error code as the last parameter (ICU convention) would prevent + * the use of default parameter values. + * Instead, such methods set the UnicodeString into a "bogus" state + * (see isBogus()) if an error occurs. + * + * In string comparisons, two UnicodeString objects that are both "bogus" + * compare equal (to be transitive and prevent endless loops in sorting), + * and a "bogus" string compares less than any non-"bogus" one. + * + * Const UnicodeString methods are thread-safe. Multiple threads can use + * const methods on the same UnicodeString object simultaneously, + * but non-const methods must not be called concurrently (in multiple threads) + * with any other (const or non-const) methods. + * + * Similarly, const UnicodeString & parameters are thread-safe. + * One object may be passed in as such a parameter concurrently in multiple threads. + * This includes the const UnicodeString & parameters for + * copy construction, assignment, and cloning. + * + * UnicodeString uses several storage methods. + * String contents can be stored inside the UnicodeString object itself, + * in an allocated and shared buffer, or in an outside buffer that is "aliased". + * Most of this is done transparently, but careful aliasing in particular provides + * significant performance improvements. + * Also, the internal buffer is accessible via special functions. + * For details see the + * [User Guide Strings chapter](https://unicode-org.github.io/icu/userguide/strings#maximizing-performance-with-the-unicodestring-storage-model). + * + * @see utf.h + * @see CharacterIterator + * @stable ICU 2.0 + */ +class U_COMMON_API UnicodeString : public Replaceable +{ +public: + + /** + * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor + * which constructs a Unicode string from an invariant-character char * string. + * Use the macro US_INV instead of the full qualification for this value. + * + * @see US_INV + * @stable ICU 3.2 + */ + enum EInvariant { + /** + * @see EInvariant + * @stable ICU 3.2 + */ + kInvariant + }; + + //======================================== + // Read-only operations + //======================================== + + /* Comparison - bitwise only - for international comparison use collation */ + + /** + * Equality operator. Performs only bitwise comparison. + * @param text The UnicodeString to compare to this one. + * @return true if `text` contains the same characters as this one, + * false otherwise. + * @stable ICU 2.0 + */ + inline UBool operator== (const UnicodeString& text) const; + + /** + * Inequality operator. Performs only bitwise comparison. + * @param text The UnicodeString to compare to this one. + * @return false if `text` contains the same characters as this one, + * true otherwise. + * @stable ICU 2.0 + */ + inline UBool operator!= (const UnicodeString& text) const; + + /** + * Greater than operator. Performs only bitwise comparison. + * @param text The UnicodeString to compare to this one. + * @return true if the characters in this are bitwise + * greater than the characters in `text`, false otherwise + * @stable ICU 2.0 + */ + inline UBool operator> (const UnicodeString& text) const; + + /** + * Less than operator. Performs only bitwise comparison. + * @param text The UnicodeString to compare to this one. + * @return true if the characters in this are bitwise + * less than the characters in `text`, false otherwise + * @stable ICU 2.0 + */ + inline UBool operator< (const UnicodeString& text) const; + + /** + * Greater than or equal operator. Performs only bitwise comparison. + * @param text The UnicodeString to compare to this one. + * @return true if the characters in this are bitwise + * greater than or equal to the characters in `text`, false otherwise + * @stable ICU 2.0 + */ + inline UBool operator>= (const UnicodeString& text) const; + + /** + * Less than or equal operator. Performs only bitwise comparison. + * @param text The UnicodeString to compare to this one. + * @return true if the characters in this are bitwise + * less than or equal to the characters in `text`, false otherwise + * @stable ICU 2.0 + */ + inline UBool operator<= (const UnicodeString& text) const; + + /** + * Compare the characters bitwise in this UnicodeString to + * the characters in `text`. + * @param text The UnicodeString to compare to this one. + * @return The result of bitwise character comparison: 0 if this + * contains the same characters as `text`, -1 if the characters in + * this are bitwise less than the characters in `text`, +1 if the + * characters in this are bitwise greater than the characters + * in `text`. + * @stable ICU 2.0 + */ + inline int8_t compare(const UnicodeString& text) const; + + /** + * Compare the characters bitwise in the range + * [`start`, `start + length`) with the characters + * in the **entire string** `text`. + * (The parameters "start" and "length" are not applied to the other text "text".) + * @param start the offset at which the compare operation begins + * @param length the number of characters of text to compare. + * @param text the other text to be compared against this string. + * @return The result of bitwise character comparison: 0 if this + * contains the same characters as `text`, -1 if the characters in + * this are bitwise less than the characters in `text`, +1 if the + * characters in this are bitwise greater than the characters + * in `text`. + * @stable ICU 2.0 + */ + inline int8_t compare(int32_t start, + int32_t length, + const UnicodeString& text) const; + + /** + * Compare the characters bitwise in the range + * [`start`, `start + length`) with the characters + * in `srcText` in the range + * [`srcStart`, `srcStart + srcLength`). + * @param start the offset at which the compare operation begins + * @param length the number of characters in this to compare. + * @param srcText the text to be compared + * @param srcStart the offset into `srcText` to start comparison + * @param srcLength the number of characters in `src` to compare + * @return The result of bitwise character comparison: 0 if this + * contains the same characters as `srcText`, -1 if the characters in + * this are bitwise less than the characters in `srcText`, +1 if the + * characters in this are bitwise greater than the characters + * in `srcText`. + * @stable ICU 2.0 + */ + inline int8_t compare(int32_t start, + int32_t length, + const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength) const; + + /** + * Compare the characters bitwise in this UnicodeString with the first + * `srcLength` characters in `srcChars`. + * @param srcChars The characters to compare to this UnicodeString. + * @param srcLength the number of characters in `srcChars` to compare + * @return The result of bitwise character comparison: 0 if this + * contains the same characters as `srcChars`, -1 if the characters in + * this are bitwise less than the characters in `srcChars`, +1 if the + * characters in this are bitwise greater than the characters + * in `srcChars`. + * @stable ICU 2.0 + */ + inline int8_t compare(ConstChar16Ptr srcChars, + int32_t srcLength) const; + + /** + * Compare the characters bitwise in the range + * [`start`, `start + length`) with the first + * `length` characters in `srcChars` + * @param start the offset at which the compare operation begins + * @param length the number of characters to compare. + * @param srcChars the characters to be compared + * @return The result of bitwise character comparison: 0 if this + * contains the same characters as `srcChars`, -1 if the characters in + * this are bitwise less than the characters in `srcChars`, +1 if the + * characters in this are bitwise greater than the characters + * in `srcChars`. + * @stable ICU 2.0 + */ + inline int8_t compare(int32_t start, + int32_t length, + const char16_t *srcChars) const; + + /** + * Compare the characters bitwise in the range + * [`start`, `start + length`) with the characters + * in `srcChars` in the range + * [`srcStart`, `srcStart + srcLength`). + * @param start the offset at which the compare operation begins + * @param length the number of characters in this to compare + * @param srcChars the characters to be compared + * @param srcStart the offset into `srcChars` to start comparison + * @param srcLength the number of characters in `srcChars` to compare + * @return The result of bitwise character comparison: 0 if this + * contains the same characters as `srcChars`, -1 if the characters in + * this are bitwise less than the characters in `srcChars`, +1 if the + * characters in this are bitwise greater than the characters + * in `srcChars`. + * @stable ICU 2.0 + */ + inline int8_t compare(int32_t start, + int32_t length, + const char16_t *srcChars, + int32_t srcStart, + int32_t srcLength) const; + + /** + * Compare the characters bitwise in the range + * [`start`, `limit`) with the characters + * in `srcText` in the range + * [`srcStart`, `srcLimit`). + * @param start the offset at which the compare operation begins + * @param limit the offset immediately following the compare operation + * @param srcText the text to be compared + * @param srcStart the offset into `srcText` to start comparison + * @param srcLimit the offset into `srcText` to limit comparison + * @return The result of bitwise character comparison: 0 if this + * contains the same characters as `srcText`, -1 if the characters in + * this are bitwise less than the characters in `srcText`, +1 if the + * characters in this are bitwise greater than the characters + * in `srcText`. + * @stable ICU 2.0 + */ + inline int8_t compareBetween(int32_t start, + int32_t limit, + const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLimit) const; + + /** + * Compare two Unicode strings in code point order. + * The result may be different from the results of compare(), operator<, etc. + * if supplementary characters are present: + * + * In UTF-16, supplementary characters (with code points U+10000 and above) are + * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, + * which means that they compare as less than some other BMP characters like U+feff. + * This function compares Unicode strings in code point order. + * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. + * + * @param text Another string to compare this one to. + * @return a negative/zero/positive integer corresponding to whether + * this string is less than/equal to/greater than the second one + * in code point order + * @stable ICU 2.0 + */ + inline int8_t compareCodePointOrder(const UnicodeString& text) const; + + /** + * Compare two Unicode strings in code point order. + * The result may be different from the results of compare(), operator<, etc. + * if supplementary characters are present: + * + * In UTF-16, supplementary characters (with code points U+10000 and above) are + * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, + * which means that they compare as less than some other BMP characters like U+feff. + * This function compares Unicode strings in code point order. + * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. + * + * @param start The start offset in this string at which the compare operation begins. + * @param length The number of code units from this string to compare. + * @param srcText Another string to compare this one to. + * @return a negative/zero/positive integer corresponding to whether + * this string is less than/equal to/greater than the second one + * in code point order + * @stable ICU 2.0 + */ + inline int8_t compareCodePointOrder(int32_t start, + int32_t length, + const UnicodeString& srcText) const; + + /** + * Compare two Unicode strings in code point order. + * The result may be different from the results of compare(), operator<, etc. + * if supplementary characters are present: + * + * In UTF-16, supplementary characters (with code points U+10000 and above) are + * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, + * which means that they compare as less than some other BMP characters like U+feff. + * This function compares Unicode strings in code point order. + * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. + * + * @param start The start offset in this string at which the compare operation begins. + * @param length The number of code units from this string to compare. + * @param srcText Another string to compare this one to. + * @param srcStart The start offset in that string at which the compare operation begins. + * @param srcLength The number of code units from that string to compare. + * @return a negative/zero/positive integer corresponding to whether + * this string is less than/equal to/greater than the second one + * in code point order + * @stable ICU 2.0 + */ + inline int8_t compareCodePointOrder(int32_t start, + int32_t length, + const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength) const; + + /** + * Compare two Unicode strings in code point order. + * The result may be different from the results of compare(), operator<, etc. + * if supplementary characters are present: + * + * In UTF-16, supplementary characters (with code points U+10000 and above) are + * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, + * which means that they compare as less than some other BMP characters like U+feff. + * This function compares Unicode strings in code point order. + * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. + * + * @param srcChars A pointer to another string to compare this one to. + * @param srcLength The number of code units from that string to compare. + * @return a negative/zero/positive integer corresponding to whether + * this string is less than/equal to/greater than the second one + * in code point order + * @stable ICU 2.0 + */ + inline int8_t compareCodePointOrder(ConstChar16Ptr srcChars, + int32_t srcLength) const; + + /** + * Compare two Unicode strings in code point order. + * The result may be different from the results of compare(), operator<, etc. + * if supplementary characters are present: + * + * In UTF-16, supplementary characters (with code points U+10000 and above) are + * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, + * which means that they compare as less than some other BMP characters like U+feff. + * This function compares Unicode strings in code point order. + * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. + * + * @param start The start offset in this string at which the compare operation begins. + * @param length The number of code units from this string to compare. + * @param srcChars A pointer to another string to compare this one to. + * @return a negative/zero/positive integer corresponding to whether + * this string is less than/equal to/greater than the second one + * in code point order + * @stable ICU 2.0 + */ + inline int8_t compareCodePointOrder(int32_t start, + int32_t length, + const char16_t *srcChars) const; + + /** + * Compare two Unicode strings in code point order. + * The result may be different from the results of compare(), operator<, etc. + * if supplementary characters are present: + * + * In UTF-16, supplementary characters (with code points U+10000 and above) are + * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, + * which means that they compare as less than some other BMP characters like U+feff. + * This function compares Unicode strings in code point order. + * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. + * + * @param start The start offset in this string at which the compare operation begins. + * @param length The number of code units from this string to compare. + * @param srcChars A pointer to another string to compare this one to. + * @param srcStart The start offset in that string at which the compare operation begins. + * @param srcLength The number of code units from that string to compare. + * @return a negative/zero/positive integer corresponding to whether + * this string is less than/equal to/greater than the second one + * in code point order + * @stable ICU 2.0 + */ + inline int8_t compareCodePointOrder(int32_t start, + int32_t length, + const char16_t *srcChars, + int32_t srcStart, + int32_t srcLength) const; + + /** + * Compare two Unicode strings in code point order. + * The result may be different from the results of compare(), operator<, etc. + * if supplementary characters are present: + * + * In UTF-16, supplementary characters (with code points U+10000 and above) are + * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, + * which means that they compare as less than some other BMP characters like U+feff. + * This function compares Unicode strings in code point order. + * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. + * + * @param start The start offset in this string at which the compare operation begins. + * @param limit The offset after the last code unit from this string to compare. + * @param srcText Another string to compare this one to. + * @param srcStart The start offset in that string at which the compare operation begins. + * @param srcLimit The offset after the last code unit from that string to compare. + * @return a negative/zero/positive integer corresponding to whether + * this string is less than/equal to/greater than the second one + * in code point order + * @stable ICU 2.0 + */ + inline int8_t compareCodePointOrderBetween(int32_t start, + int32_t limit, + const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLimit) const; + + /** + * Compare two strings case-insensitively using full case folding. + * This is equivalent to this->foldCase(options).compare(text.foldCase(options)). + * + * @param text Another string to compare this one to. + * @param options A bit set of options: + * - U_FOLD_CASE_DEFAULT or 0 is used for default options: + * Comparison in code unit order with default case folding. + * + * - U_COMPARE_CODE_POINT_ORDER + * Set to choose code point order instead of code unit order + * (see u_strCompare for details). + * + * - U_FOLD_CASE_EXCLUDE_SPECIAL_I + * + * @return A negative, zero, or positive integer indicating the comparison result. + * @stable ICU 2.0 + */ + inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const; + + /** + * Compare two strings case-insensitively using full case folding. + * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)). + * + * @param start The start offset in this string at which the compare operation begins. + * @param length The number of code units from this string to compare. + * @param srcText Another string to compare this one to. + * @param options A bit set of options: + * - U_FOLD_CASE_DEFAULT or 0 is used for default options: + * Comparison in code unit order with default case folding. + * + * - U_COMPARE_CODE_POINT_ORDER + * Set to choose code point order instead of code unit order + * (see u_strCompare for details). + * + * - U_FOLD_CASE_EXCLUDE_SPECIAL_I + * + * @return A negative, zero, or positive integer indicating the comparison result. + * @stable ICU 2.0 + */ + inline int8_t caseCompare(int32_t start, + int32_t length, + const UnicodeString& srcText, + uint32_t options) const; + + /** + * Compare two strings case-insensitively using full case folding. + * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)). + * + * @param start The start offset in this string at which the compare operation begins. + * @param length The number of code units from this string to compare. + * @param srcText Another string to compare this one to. + * @param srcStart The start offset in that string at which the compare operation begins. + * @param srcLength The number of code units from that string to compare. + * @param options A bit set of options: + * - U_FOLD_CASE_DEFAULT or 0 is used for default options: + * Comparison in code unit order with default case folding. + * + * - U_COMPARE_CODE_POINT_ORDER + * Set to choose code point order instead of code unit order + * (see u_strCompare for details). + * + * - U_FOLD_CASE_EXCLUDE_SPECIAL_I + * + * @return A negative, zero, or positive integer indicating the comparison result. + * @stable ICU 2.0 + */ + inline int8_t caseCompare(int32_t start, + int32_t length, + const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength, + uint32_t options) const; + + /** + * Compare two strings case-insensitively using full case folding. + * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)). + * + * @param srcChars A pointer to another string to compare this one to. + * @param srcLength The number of code units from that string to compare. + * @param options A bit set of options: + * - U_FOLD_CASE_DEFAULT or 0 is used for default options: + * Comparison in code unit order with default case folding. + * + * - U_COMPARE_CODE_POINT_ORDER + * Set to choose code point order instead of code unit order + * (see u_strCompare for details). + * + * - U_FOLD_CASE_EXCLUDE_SPECIAL_I + * + * @return A negative, zero, or positive integer indicating the comparison result. + * @stable ICU 2.0 + */ + inline int8_t caseCompare(ConstChar16Ptr srcChars, + int32_t srcLength, + uint32_t options) const; + + /** + * Compare two strings case-insensitively using full case folding. + * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)). + * + * @param start The start offset in this string at which the compare operation begins. + * @param length The number of code units from this string to compare. + * @param srcChars A pointer to another string to compare this one to. + * @param options A bit set of options: + * - U_FOLD_CASE_DEFAULT or 0 is used for default options: + * Comparison in code unit order with default case folding. + * + * - U_COMPARE_CODE_POINT_ORDER + * Set to choose code point order instead of code unit order + * (see u_strCompare for details). + * + * - U_FOLD_CASE_EXCLUDE_SPECIAL_I + * + * @return A negative, zero, or positive integer indicating the comparison result. + * @stable ICU 2.0 + */ + inline int8_t caseCompare(int32_t start, + int32_t length, + const char16_t *srcChars, + uint32_t options) const; + + /** + * Compare two strings case-insensitively using full case folding. + * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)). + * + * @param start The start offset in this string at which the compare operation begins. + * @param length The number of code units from this string to compare. + * @param srcChars A pointer to another string to compare this one to. + * @param srcStart The start offset in that string at which the compare operation begins. + * @param srcLength The number of code units from that string to compare. + * @param options A bit set of options: + * - U_FOLD_CASE_DEFAULT or 0 is used for default options: + * Comparison in code unit order with default case folding. + * + * - U_COMPARE_CODE_POINT_ORDER + * Set to choose code point order instead of code unit order + * (see u_strCompare for details). + * + * - U_FOLD_CASE_EXCLUDE_SPECIAL_I + * + * @return A negative, zero, or positive integer indicating the comparison result. + * @stable ICU 2.0 + */ + inline int8_t caseCompare(int32_t start, + int32_t length, + const char16_t *srcChars, + int32_t srcStart, + int32_t srcLength, + uint32_t options) const; + + /** + * Compare two strings case-insensitively using full case folding. + * This is equivalent to this->foldCase(options).compareBetween(text.foldCase(options)). + * + * @param start The start offset in this string at which the compare operation begins. + * @param limit The offset after the last code unit from this string to compare. + * @param srcText Another string to compare this one to. + * @param srcStart The start offset in that string at which the compare operation begins. + * @param srcLimit The offset after the last code unit from that string to compare. + * @param options A bit set of options: + * - U_FOLD_CASE_DEFAULT or 0 is used for default options: + * Comparison in code unit order with default case folding. + * + * - U_COMPARE_CODE_POINT_ORDER + * Set to choose code point order instead of code unit order + * (see u_strCompare for details). + * + * - U_FOLD_CASE_EXCLUDE_SPECIAL_I + * + * @return A negative, zero, or positive integer indicating the comparison result. + * @stable ICU 2.0 + */ + inline int8_t caseCompareBetween(int32_t start, + int32_t limit, + const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLimit, + uint32_t options) const; + + /** + * Determine if this starts with the characters in `text` + * @param text The text to match. + * @return true if this starts with the characters in `text`, + * false otherwise + * @stable ICU 2.0 + */ + inline UBool startsWith(const UnicodeString& text) const; + + /** + * Determine if this starts with the characters in `srcText` + * in the range [`srcStart`, `srcStart + srcLength`). + * @param srcText The text to match. + * @param srcStart the offset into `srcText` to start matching + * @param srcLength the number of characters in `srcText` to match + * @return true if this starts with the characters in `text`, + * false otherwise + * @stable ICU 2.0 + */ + inline UBool startsWith(const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength) const; + + /** + * Determine if this starts with the characters in `srcChars` + * @param srcChars The characters to match. + * @param srcLength the number of characters in `srcChars` + * @return true if this starts with the characters in `srcChars`, + * false otherwise + * @stable ICU 2.0 + */ + inline UBool startsWith(ConstChar16Ptr srcChars, + int32_t srcLength) const; + + /** + * Determine if this ends with the characters in `srcChars` + * in the range [`srcStart`, `srcStart + srcLength`). + * @param srcChars The characters to match. + * @param srcStart the offset into `srcText` to start matching + * @param srcLength the number of characters in `srcChars` to match + * @return true if this ends with the characters in `srcChars`, false otherwise + * @stable ICU 2.0 + */ + inline UBool startsWith(const char16_t *srcChars, + int32_t srcStart, + int32_t srcLength) const; + + /** + * Determine if this ends with the characters in `text` + * @param text The text to match. + * @return true if this ends with the characters in `text`, + * false otherwise + * @stable ICU 2.0 + */ + inline UBool endsWith(const UnicodeString& text) const; + + /** + * Determine if this ends with the characters in `srcText` + * in the range [`srcStart`, `srcStart + srcLength`). + * @param srcText The text to match. + * @param srcStart the offset into `srcText` to start matching + * @param srcLength the number of characters in `srcText` to match + * @return true if this ends with the characters in `text`, + * false otherwise + * @stable ICU 2.0 + */ + inline UBool endsWith(const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength) const; + + /** + * Determine if this ends with the characters in `srcChars` + * @param srcChars The characters to match. + * @param srcLength the number of characters in `srcChars` + * @return true if this ends with the characters in `srcChars`, + * false otherwise + * @stable ICU 2.0 + */ + inline UBool endsWith(ConstChar16Ptr srcChars, + int32_t srcLength) const; + + /** + * Determine if this ends with the characters in `srcChars` + * in the range [`srcStart`, `srcStart + srcLength`). + * @param srcChars The characters to match. + * @param srcStart the offset into `srcText` to start matching + * @param srcLength the number of characters in `srcChars` to match + * @return true if this ends with the characters in `srcChars`, + * false otherwise + * @stable ICU 2.0 + */ + inline UBool endsWith(const char16_t *srcChars, + int32_t srcStart, + int32_t srcLength) const; + + + /* Searching - bitwise only */ + + /** + * Locate in this the first occurrence of the characters in `text`, + * using bitwise comparison. + * @param text The text to search for. + * @return The offset into this of the start of `text`, + * or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t indexOf(const UnicodeString& text) const; + + /** + * Locate in this the first occurrence of the characters in `text` + * starting at offset `start`, using bitwise comparison. + * @param text The text to search for. + * @param start The offset at which searching will start. + * @return The offset into this of the start of `text`, + * or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t indexOf(const UnicodeString& text, + int32_t start) const; + + /** + * Locate in this the first occurrence in the range + * [`start`, `start + length`) of the characters + * in `text`, using bitwise comparison. + * @param text The text to search for. + * @param start The offset at which searching will start. + * @param length The number of characters to search + * @return The offset into this of the start of `text`, + * or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t indexOf(const UnicodeString& text, + int32_t start, + int32_t length) const; + + /** + * Locate in this the first occurrence in the range + * [`start`, `start + length`) of the characters + * in `srcText` in the range + * [`srcStart`, `srcStart + srcLength`), + * using bitwise comparison. + * @param srcText The text to search for. + * @param srcStart the offset into `srcText` at which + * to start matching + * @param srcLength the number of characters in `srcText` to match + * @param start the offset into this at which to start matching + * @param length the number of characters in this to search + * @return The offset into this of the start of `text`, + * or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t indexOf(const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength, + int32_t start, + int32_t length) const; + + /** + * Locate in this the first occurrence of the characters in + * `srcChars` + * starting at offset `start`, using bitwise comparison. + * @param srcChars The text to search for. + * @param srcLength the number of characters in `srcChars` to match + * @param start the offset into this at which to start matching + * @return The offset into this of the start of `text`, + * or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t indexOf(const char16_t *srcChars, + int32_t srcLength, + int32_t start) const; + + /** + * Locate in this the first occurrence in the range + * [`start`, `start + length`) of the characters + * in `srcChars`, using bitwise comparison. + * @param srcChars The text to search for. + * @param srcLength the number of characters in `srcChars` + * @param start The offset at which searching will start. + * @param length The number of characters to search + * @return The offset into this of the start of `srcChars`, + * or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t indexOf(ConstChar16Ptr srcChars, + int32_t srcLength, + int32_t start, + int32_t length) const; + + /** + * Locate in this the first occurrence in the range + * [`start`, `start + length`) of the characters + * in `srcChars` in the range + * [`srcStart`, `srcStart + srcLength`), + * using bitwise comparison. + * @param srcChars The text to search for. + * @param srcStart the offset into `srcChars` at which + * to start matching + * @param srcLength the number of characters in `srcChars` to match + * @param start the offset into this at which to start matching + * @param length the number of characters in this to search + * @return The offset into this of the start of `text`, + * or -1 if not found. + * @stable ICU 2.0 + */ + int32_t indexOf(const char16_t *srcChars, + int32_t srcStart, + int32_t srcLength, + int32_t start, + int32_t length) const; + + /** + * Locate in this the first occurrence of the BMP code point `c`, + * using bitwise comparison. + * @param c The code unit to search for. + * @return The offset into this of `c`, or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t indexOf(char16_t c) const; + + /** + * Locate in this the first occurrence of the code point `c`, + * using bitwise comparison. + * + * @param c The code point to search for. + * @return The offset into this of `c`, or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t indexOf(UChar32 c) const; + + /** + * Locate in this the first occurrence of the BMP code point `c`, + * starting at offset `start`, using bitwise comparison. + * @param c The code unit to search for. + * @param start The offset at which searching will start. + * @return The offset into this of `c`, or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t indexOf(char16_t c, + int32_t start) const; + + /** + * Locate in this the first occurrence of the code point `c` + * starting at offset `start`, using bitwise comparison. + * + * @param c The code point to search for. + * @param start The offset at which searching will start. + * @return The offset into this of `c`, or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t indexOf(UChar32 c, + int32_t start) const; + + /** + * Locate in this the first occurrence of the BMP code point `c` + * in the range [`start`, `start + length`), + * using bitwise comparison. + * @param c The code unit to search for. + * @param start the offset into this at which to start matching + * @param length the number of characters in this to search + * @return The offset into this of `c`, or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t indexOf(char16_t c, + int32_t start, + int32_t length) const; + + /** + * Locate in this the first occurrence of the code point `c` + * in the range [`start`, `start + length`), + * using bitwise comparison. + * + * @param c The code point to search for. + * @param start the offset into this at which to start matching + * @param length the number of characters in this to search + * @return The offset into this of `c`, or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t indexOf(UChar32 c, + int32_t start, + int32_t length) const; + + /** + * Locate in this the last occurrence of the characters in `text`, + * using bitwise comparison. + * @param text The text to search for. + * @return The offset into this of the start of `text`, + * or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t lastIndexOf(const UnicodeString& text) const; + + /** + * Locate in this the last occurrence of the characters in `text` + * starting at offset `start`, using bitwise comparison. + * @param text The text to search for. + * @param start The offset at which searching will start. + * @return The offset into this of the start of `text`, + * or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t lastIndexOf(const UnicodeString& text, + int32_t start) const; + + /** + * Locate in this the last occurrence in the range + * [`start`, `start + length`) of the characters + * in `text`, using bitwise comparison. + * @param text The text to search for. + * @param start The offset at which searching will start. + * @param length The number of characters to search + * @return The offset into this of the start of `text`, + * or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t lastIndexOf(const UnicodeString& text, + int32_t start, + int32_t length) const; + + /** + * Locate in this the last occurrence in the range + * [`start`, `start + length`) of the characters + * in `srcText` in the range + * [`srcStart`, `srcStart + srcLength`), + * using bitwise comparison. + * @param srcText The text to search for. + * @param srcStart the offset into `srcText` at which + * to start matching + * @param srcLength the number of characters in `srcText` to match + * @param start the offset into this at which to start matching + * @param length the number of characters in this to search + * @return The offset into this of the start of `text`, + * or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t lastIndexOf(const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength, + int32_t start, + int32_t length) const; + + /** + * Locate in this the last occurrence of the characters in `srcChars` + * starting at offset `start`, using bitwise comparison. + * @param srcChars The text to search for. + * @param srcLength the number of characters in `srcChars` to match + * @param start the offset into this at which to start matching + * @return The offset into this of the start of `text`, + * or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t lastIndexOf(const char16_t *srcChars, + int32_t srcLength, + int32_t start) const; + + /** + * Locate in this the last occurrence in the range + * [`start`, `start + length`) of the characters + * in `srcChars`, using bitwise comparison. + * @param srcChars The text to search for. + * @param srcLength the number of characters in `srcChars` + * @param start The offset at which searching will start. + * @param length The number of characters to search + * @return The offset into this of the start of `srcChars`, + * or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t lastIndexOf(ConstChar16Ptr srcChars, + int32_t srcLength, + int32_t start, + int32_t length) const; + + /** + * Locate in this the last occurrence in the range + * [`start`, `start + length`) of the characters + * in `srcChars` in the range + * [`srcStart`, `srcStart + srcLength`), + * using bitwise comparison. + * @param srcChars The text to search for. + * @param srcStart the offset into `srcChars` at which + * to start matching + * @param srcLength the number of characters in `srcChars` to match + * @param start the offset into this at which to start matching + * @param length the number of characters in this to search + * @return The offset into this of the start of `text`, + * or -1 if not found. + * @stable ICU 2.0 + */ + int32_t lastIndexOf(const char16_t *srcChars, + int32_t srcStart, + int32_t srcLength, + int32_t start, + int32_t length) const; + + /** + * Locate in this the last occurrence of the BMP code point `c`, + * using bitwise comparison. + * @param c The code unit to search for. + * @return The offset into this of `c`, or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t lastIndexOf(char16_t c) const; + + /** + * Locate in this the last occurrence of the code point `c`, + * using bitwise comparison. + * + * @param c The code point to search for. + * @return The offset into this of `c`, or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t lastIndexOf(UChar32 c) const; + + /** + * Locate in this the last occurrence of the BMP code point `c` + * starting at offset `start`, using bitwise comparison. + * @param c The code unit to search for. + * @param start The offset at which searching will start. + * @return The offset into this of `c`, or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t lastIndexOf(char16_t c, + int32_t start) const; + + /** + * Locate in this the last occurrence of the code point `c` + * starting at offset `start`, using bitwise comparison. + * + * @param c The code point to search for. + * @param start The offset at which searching will start. + * @return The offset into this of `c`, or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t lastIndexOf(UChar32 c, + int32_t start) const; + + /** + * Locate in this the last occurrence of the BMP code point `c` + * in the range [`start`, `start + length`), + * using bitwise comparison. + * @param c The code unit to search for. + * @param start the offset into this at which to start matching + * @param length the number of characters in this to search + * @return The offset into this of `c`, or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t lastIndexOf(char16_t c, + int32_t start, + int32_t length) const; + + /** + * Locate in this the last occurrence of the code point `c` + * in the range [`start`, `start + length`), + * using bitwise comparison. + * + * @param c The code point to search for. + * @param start the offset into this at which to start matching + * @param length the number of characters in this to search + * @return The offset into this of `c`, or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t lastIndexOf(UChar32 c, + int32_t start, + int32_t length) const; + + + /* Character access */ + + /** + * Return the code unit at offset `offset`. + * If the offset is not valid (0..length()-1) then U+ffff is returned. + * @param offset a valid offset into the text + * @return the code unit at offset `offset` + * or 0xffff if the offset is not valid for this string + * @stable ICU 2.0 + */ + inline char16_t charAt(int32_t offset) const; + + /** + * Return the code unit at offset `offset`. + * If the offset is not valid (0..length()-1) then U+ffff is returned. + * @param offset a valid offset into the text + * @return the code unit at offset `offset` + * @stable ICU 2.0 + */ + inline char16_t operator[] (int32_t offset) const; + + /** + * Return the code point that contains the code unit + * at offset `offset`. + * If the offset is not valid (0..length()-1) then U+ffff is returned. + * @param offset a valid offset into the text + * that indicates the text offset of any of the code units + * that will be assembled into a code point (21-bit value) and returned + * @return the code point of text at `offset` + * or 0xffff if the offset is not valid for this string + * @stable ICU 2.0 + */ + UChar32 char32At(int32_t offset) const; + + /** + * Adjust a random-access offset so that + * it points to the beginning of a Unicode character. + * The offset that is passed in points to + * any code unit of a code point, + * while the returned offset will point to the first code unit + * of the same code point. + * In UTF-16, if the input offset points to a second surrogate + * of a surrogate pair, then the returned offset will point + * to the first surrogate. + * @param offset a valid offset into one code point of the text + * @return offset of the first code unit of the same code point + * @see U16_SET_CP_START + * @stable ICU 2.0 + */ + int32_t getChar32Start(int32_t offset) const; + + /** + * Adjust a random-access offset so that + * it points behind a Unicode character. + * The offset that is passed in points behind + * any code unit of a code point, + * while the returned offset will point behind the last code unit + * of the same code point. + * In UTF-16, if the input offset points behind the first surrogate + * (i.e., to the second surrogate) + * of a surrogate pair, then the returned offset will point + * behind the second surrogate (i.e., to the first surrogate). + * @param offset a valid offset after any code unit of a code point of the text + * @return offset of the first code unit after the same code point + * @see U16_SET_CP_LIMIT + * @stable ICU 2.0 + */ + int32_t getChar32Limit(int32_t offset) const; + + /** + * Move the code unit index along the string by delta code points. + * Interpret the input index as a code unit-based offset into the string, + * move the index forward or backward by delta code points, and + * return the resulting index. + * The input index should point to the first code unit of a code point, + * if there is more than one. + * + * Both input and output indexes are code unit-based as for all + * string indexes/offsets in ICU (and other libraries, like MBCS char*). + * If delta<0 then the index is moved backward (toward the start of the string). + * If delta>0 then the index is moved forward (toward the end of the string). + * + * This behaves like CharacterIterator::move32(delta, kCurrent). + * + * Behavior for out-of-bounds indexes: + * `moveIndex32` pins the input index to 0..length(), i.e., + * if the input index<0 then it is pinned to 0; + * if it is index>length() then it is pinned to length(). + * Afterwards, the index is moved by `delta` code points + * forward or backward, + * but no further backward than to 0 and no further forward than to length(). + * The resulting index return value will be in between 0 and length(), inclusively. + * + * Examples: + * \code + * // s has code points 'a' U+10000 'b' U+10ffff U+2029 + * UnicodeString s(u"a\U00010000b\U0010ffff\u2029"); + * + * // initial index: position of U+10000 + * int32_t index=1; + * + * // the following examples will all result in index==4, position of U+10ffff + * + * // skip 2 code points from some position in the string + * index=s.moveIndex32(index, 2); // skips U+10000 and 'b' + * + * // go to the 3rd code point from the start of s (0-based) + * index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b' + * + * // go to the next-to-last code point of s + * index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff + * \endcode + * + * @param index input code unit index + * @param delta (signed) code point count to move the index forward or backward + * in the string + * @return the resulting code unit index + * @stable ICU 2.0 + */ + int32_t moveIndex32(int32_t index, int32_t delta) const; + + /* Substring extraction */ + + /** + * Copy the characters in the range + * [`start`, `start + length`) into the array `dst`, + * beginning at `dstStart`. + * If the string aliases to `dst` itself as an external buffer, + * then extract() will not copy the contents. + * + * @param start offset of first character which will be copied into the array + * @param length the number of characters to extract + * @param dst array in which to copy characters. The length of `dst` + * must be at least (`dstStart + length`). + * @param dstStart the offset in `dst` where the first character + * will be extracted + * @stable ICU 2.0 + */ + inline void extract(int32_t start, + int32_t length, + Char16Ptr dst, + int32_t dstStart = 0) const; + + /** + * Copy the contents of the string into dest. + * This is a convenience function that + * checks if there is enough space in dest, + * extracts the entire string if possible, + * and NUL-terminates dest if possible. + * + * If the string fits into dest but cannot be NUL-terminated + * (length()==destCapacity) then the error code is set to U_STRING_NOT_TERMINATED_WARNING. + * If the string itself does not fit into dest + * (length()>destCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERROR. + * + * If the string aliases to `dest` itself as an external buffer, + * then extract() will not copy the contents. + * + * @param dest Destination string buffer. + * @param destCapacity Number of char16_ts available at dest. + * @param errorCode ICU error code. + * @return length() + * @stable ICU 2.0 + */ + int32_t + extract(Char16Ptr dest, int32_t destCapacity, + UErrorCode &errorCode) const; + + /** + * Copy the characters in the range + * [`start`, `start + length`) into the UnicodeString + * `target`. + * @param start offset of first character which will be copied + * @param length the number of characters to extract + * @param target UnicodeString into which to copy characters. + * @stable ICU 2.0 + */ + inline void extract(int32_t start, + int32_t length, + UnicodeString& target) const; + + /** + * Copy the characters in the range [`start`, `limit`) + * into the array `dst`, beginning at `dstStart`. + * @param start offset of first character which will be copied into the array + * @param limit offset immediately following the last character to be copied + * @param dst array in which to copy characters. The length of `dst` + * must be at least (`dstStart + (limit - start)`). + * @param dstStart the offset in `dst` where the first character + * will be extracted + * @stable ICU 2.0 + */ + inline void extractBetween(int32_t start, + int32_t limit, + char16_t *dst, + int32_t dstStart = 0) const; + + /** + * Copy the characters in the range [`start`, `limit`) + * into the UnicodeString `target`. Replaceable API. + * @param start offset of first character which will be copied + * @param limit offset immediately following the last character to be copied + * @param target UnicodeString into which to copy characters. + * @stable ICU 2.0 + */ + virtual void extractBetween(int32_t start, + int32_t limit, + UnicodeString& target) const; + + /** + * Copy the characters in the range + * [`start`, `start + startLength`) into an array of characters. + * All characters must be invariant (see utypes.h). + * Use US_INV as the last, signature-distinguishing parameter. + * + * This function does not write any more than `targetCapacity` + * characters but returns the length of the entire output string + * so that one can allocate a larger buffer and call the function again + * if necessary. + * The output string is NUL-terminated if possible. + * + * @param start offset of first character which will be copied + * @param startLength the number of characters to extract + * @param target the target buffer for extraction, can be NULL + * if targetLength is 0 + * @param targetCapacity the length of the target buffer + * @param inv Signature-distinguishing paramater, use US_INV. + * @return the output string length, not including the terminating NUL + * @stable ICU 3.2 + */ + int32_t extract(int32_t start, + int32_t startLength, + char *target, + int32_t targetCapacity, + enum EInvariant inv) const; + +#if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION + + /** + * Copy the characters in the range + * [`start`, `start + length`) into an array of characters + * in the platform's default codepage. + * This function does not write any more than `targetLength` + * characters but returns the length of the entire output string + * so that one can allocate a larger buffer and call the function again + * if necessary. + * The output string is NUL-terminated if possible. + * + * @param start offset of first character which will be copied + * @param startLength the number of characters to extract + * @param target the target buffer for extraction + * @param targetLength the length of the target buffer + * If `target` is NULL, then the number of bytes required for + * `target` is returned. + * @return the output string length, not including the terminating NUL + * @stable ICU 2.0 + */ + int32_t extract(int32_t start, + int32_t startLength, + char *target, + uint32_t targetLength) const; + +#endif + +#if !UCONFIG_NO_CONVERSION + + /** + * Copy the characters in the range + * [`start`, `start + length`) into an array of characters + * in a specified codepage. + * The output string is NUL-terminated. + * + * Recommendation: For invariant-character strings use + * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const + * because it avoids object code dependencies of UnicodeString on + * the conversion code. + * + * @param start offset of first character which will be copied + * @param startLength the number of characters to extract + * @param target the target buffer for extraction + * @param codepage the desired codepage for the characters. 0 has + * the special meaning of the default codepage + * If `codepage` is an empty string (`""`), + * then a simple conversion is performed on the codepage-invariant + * subset ("invariant characters") of the platform encoding. See utypes.h. + * If `target` is NULL, then the number of bytes required for + * `target` is returned. It is assumed that the target is big enough + * to fit all of the characters. + * @return the output string length, not including the terminating NUL + * @stable ICU 2.0 + */ + inline int32_t extract(int32_t start, + int32_t startLength, + char *target, + const char *codepage = 0) const; + + /** + * Copy the characters in the range + * [`start`, `start + length`) into an array of characters + * in a specified codepage. + * This function does not write any more than `targetLength` + * characters but returns the length of the entire output string + * so that one can allocate a larger buffer and call the function again + * if necessary. + * The output string is NUL-terminated if possible. + * + * Recommendation: For invariant-character strings use + * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const + * because it avoids object code dependencies of UnicodeString on + * the conversion code. + * + * @param start offset of first character which will be copied + * @param startLength the number of characters to extract + * @param target the target buffer for extraction + * @param targetLength the length of the target buffer + * @param codepage the desired codepage for the characters. 0 has + * the special meaning of the default codepage + * If `codepage` is an empty string (`""`), + * then a simple conversion is performed on the codepage-invariant + * subset ("invariant characters") of the platform encoding. See utypes.h. + * If `target` is NULL, then the number of bytes required for + * `target` is returned. + * @return the output string length, not including the terminating NUL + * @stable ICU 2.0 + */ + int32_t extract(int32_t start, + int32_t startLength, + char *target, + uint32_t targetLength, + const char *codepage) const; + + /** + * Convert the UnicodeString into a codepage string using an existing UConverter. + * The output string is NUL-terminated if possible. + * + * This function avoids the overhead of opening and closing a converter if + * multiple strings are extracted. + * + * @param dest destination string buffer, can be NULL if destCapacity==0 + * @param destCapacity the number of chars available at dest + * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called), + * or NULL for the default converter + * @param errorCode normal ICU error code + * @return the length of the output string, not counting the terminating NUL; + * if the length is greater than destCapacity, then the string will not fit + * and a buffer of the indicated length would need to be passed in + * @stable ICU 2.0 + */ + int32_t extract(char *dest, int32_t destCapacity, + UConverter *cnv, + UErrorCode &errorCode) const; + +#endif + + /** + * Create a temporary substring for the specified range. + * Unlike the substring constructor and setTo() functions, + * the object returned here will be a read-only alias (using getBuffer()) + * rather than copying the text. + * As a result, this substring operation is much faster but requires + * that the original string not be modified or deleted during the lifetime + * of the returned substring object. + * @param start offset of the first character visible in the substring + * @param length length of the substring + * @return a read-only alias UnicodeString object for the substring + * @stable ICU 4.4 + */ + UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const; + + /** + * Create a temporary substring for the specified range. + * Same as tempSubString(start, length) except that the substring range + * is specified as a (start, limit) pair (with an exclusive limit index) + * rather than a (start, length) pair. + * @param start offset of the first character visible in the substring + * @param limit offset immediately following the last character visible in the substring + * @return a read-only alias UnicodeString object for the substring + * @stable ICU 4.4 + */ + inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const; + + /** + * Convert the UnicodeString to UTF-8 and write the result + * to a ByteSink. This is called by toUTF8String(). + * Unpaired surrogates are replaced with U+FFFD. + * Calls u_strToUTF8WithSub(). + * + * @param sink A ByteSink to which the UTF-8 version of the string is written. + * sink.Flush() is called at the end. + * @stable ICU 4.2 + * @see toUTF8String + */ + void toUTF8(ByteSink &sink) const; + + /** + * Convert the UnicodeString to UTF-8 and append the result + * to a standard string. + * Unpaired surrogates are replaced with U+FFFD. + * Calls toUTF8(). + * + * @param result A standard string (or a compatible object) + * to which the UTF-8 version of the string is appended. + * @return The string object. + * @stable ICU 4.2 + * @see toUTF8 + */ + template<typename StringClass> + StringClass &toUTF8String(StringClass &result) const { + StringByteSink<StringClass> sbs(&result, length()); + toUTF8(sbs); + return result; + } + + /** + * Convert the UnicodeString to UTF-32. + * Unpaired surrogates are replaced with U+FFFD. + * Calls u_strToUTF32WithSub(). + * + * @param utf32 destination string buffer, can be NULL if capacity==0 + * @param capacity the number of UChar32s available at utf32 + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return The length of the UTF-32 string. + * @see fromUTF32 + * @stable ICU 4.2 + */ + int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const; + + /* Length operations */ + + /** + * Return the length of the UnicodeString object. + * The length is the number of char16_t code units are in the UnicodeString. + * If you want the number of code points, please use countChar32(). + * @return the length of the UnicodeString object + * @see countChar32 + * @stable ICU 2.0 + */ + inline int32_t length(void) const; + + /** + * Count Unicode code points in the length char16_t code units of the string. + * A code point may occupy either one or two char16_t code units. + * Counting code points involves reading all code units. + * + * This functions is basically the inverse of moveIndex32(). + * + * @param start the index of the first code unit to check + * @param length the number of char16_t code units to check + * @return the number of code points in the specified code units + * @see length + * @stable ICU 2.0 + */ + int32_t + countChar32(int32_t start=0, int32_t length=INT32_MAX) const; + + /** + * Check if the length char16_t code units of the string + * contain more Unicode code points than a certain number. + * This is more efficient than counting all code points in this part of the string + * and comparing that number with a threshold. + * This function may not need to scan the string at all if the length + * falls within a certain range, and + * never needs to count more than 'number+1' code points. + * Logically equivalent to (countChar32(start, length)>number). + * A Unicode code point may occupy either one or two char16_t code units. + * + * @param start the index of the first code unit to check (0 for the entire string) + * @param length the number of char16_t code units to check + * (use INT32_MAX for the entire string; remember that start/length + * values are pinned) + * @param number The number of code points in the (sub)string is compared against + * the 'number' parameter. + * @return Boolean value for whether the string contains more Unicode code points + * than 'number'. Same as (u_countChar32(s, length)>number). + * @see countChar32 + * @see u_strHasMoreChar32Than + * @stable ICU 2.4 + */ + UBool + hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const; + + /** + * Determine if this string is empty. + * @return true if this string contains 0 characters, false otherwise. + * @stable ICU 2.0 + */ + inline UBool isEmpty(void) const; + + /** + * Return the capacity of the internal buffer of the UnicodeString object. + * This is useful together with the getBuffer functions. + * See there for details. + * + * @return the number of char16_ts available in the internal buffer + * @see getBuffer + * @stable ICU 2.0 + */ + inline int32_t getCapacity(void) const; + + /* Other operations */ + + /** + * Generate a hash code for this object. + * @return The hash code of this UnicodeString. + * @stable ICU 2.0 + */ + inline int32_t hashCode(void) const; + + /** + * Determine if this object contains a valid string. + * A bogus string has no value. It is different from an empty string, + * although in both cases isEmpty() returns true and length() returns 0. + * setToBogus() and isBogus() can be used to indicate that no string value is available. + * For a bogus string, getBuffer() and getTerminatedBuffer() return NULL, and + * length() returns 0. + * + * @return true if the string is bogus/invalid, false otherwise + * @see setToBogus() + * @stable ICU 2.0 + */ + inline UBool isBogus(void) const; + + + //======================================== + // Write operations + //======================================== + + /* Assignment operations */ + + /** + * Assignment operator. Replace the characters in this UnicodeString + * with the characters from `srcText`. + * + * Starting with ICU 2.4, the assignment operator and the copy constructor + * allocate a new buffer and copy the buffer contents even for readonly aliases. + * By contrast, the fastCopyFrom() function implements the old, + * more efficient but less safe behavior + * of making this string also a readonly alias to the same buffer. + * + * If the source object has an "open" buffer from getBuffer(minCapacity), + * then the copy is an empty string. + * + * @param srcText The text containing the characters to replace + * @return a reference to this + * @stable ICU 2.0 + * @see fastCopyFrom + */ + UnicodeString &operator=(const UnicodeString &srcText); + + /** + * Almost the same as the assignment operator. + * Replace the characters in this UnicodeString + * with the characters from `srcText`. + * + * This function works the same as the assignment operator + * for all strings except for ones that are readonly aliases. + * + * Starting with ICU 2.4, the assignment operator and the copy constructor + * allocate a new buffer and copy the buffer contents even for readonly aliases. + * This function implements the old, more efficient but less safe behavior + * of making this string also a readonly alias to the same buffer. + * + * The fastCopyFrom function must be used only if it is known that the lifetime of + * this UnicodeString does not exceed the lifetime of the aliased buffer + * including its contents, for example for strings from resource bundles + * or aliases to string constants. + * + * If the source object has an "open" buffer from getBuffer(minCapacity), + * then the copy is an empty string. + * + * @param src The text containing the characters to replace. + * @return a reference to this + * @stable ICU 2.4 + */ + UnicodeString &fastCopyFrom(const UnicodeString &src); + + /** + * Move assignment operator; might leave src in bogus state. + * This string will have the same contents and state that the source string had. + * The behavior is undefined if *this and src are the same object. + * @param src source string + * @return *this + * @stable ICU 56 + */ + UnicodeString &operator=(UnicodeString &&src) U_NOEXCEPT; + + /** + * Swap strings. + * @param other other string + * @stable ICU 56 + */ + void swap(UnicodeString &other) U_NOEXCEPT; + + /** + * Non-member UnicodeString swap function. + * @param s1 will get s2's contents and state + * @param s2 will get s1's contents and state + * @stable ICU 56 + */ + friend inline void U_EXPORT2 + swap(UnicodeString &s1, UnicodeString &s2) U_NOEXCEPT { + s1.swap(s2); + } + + /** + * Assignment operator. Replace the characters in this UnicodeString + * with the code unit `ch`. + * @param ch the code unit to replace + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& operator= (char16_t ch); + + /** + * Assignment operator. Replace the characters in this UnicodeString + * with the code point `ch`. + * @param ch the code point to replace + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& operator= (UChar32 ch); + + /** + * Set the text in the UnicodeString object to the characters + * in `srcText` in the range + * [`srcStart`, `srcText.length()`). + * `srcText` is not modified. + * @param srcText the source for the new characters + * @param srcStart the offset into `srcText` where new characters + * will be obtained + * @return a reference to this + * @stable ICU 2.2 + */ + inline UnicodeString& setTo(const UnicodeString& srcText, + int32_t srcStart); + + /** + * Set the text in the UnicodeString object to the characters + * in `srcText` in the range + * [`srcStart`, `srcStart + srcLength`). + * `srcText` is not modified. + * @param srcText the source for the new characters + * @param srcStart the offset into `srcText` where new characters + * will be obtained + * @param srcLength the number of characters in `srcText` in the + * replace string. + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& setTo(const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength); + + /** + * Set the text in the UnicodeString object to the characters in + * `srcText`. + * `srcText` is not modified. + * @param srcText the source for the new characters + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& setTo(const UnicodeString& srcText); + + /** + * Set the characters in the UnicodeString object to the characters + * in `srcChars`. `srcChars` is not modified. + * @param srcChars the source for the new characters + * @param srcLength the number of Unicode characters in srcChars. + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& setTo(const char16_t *srcChars, + int32_t srcLength); + + /** + * Set the characters in the UnicodeString object to the code unit + * `srcChar`. + * @param srcChar the code unit which becomes the UnicodeString's character + * content + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& setTo(char16_t srcChar); + + /** + * Set the characters in the UnicodeString object to the code point + * `srcChar`. + * @param srcChar the code point which becomes the UnicodeString's character + * content + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& setTo(UChar32 srcChar); + + /** + * Aliasing setTo() function, analogous to the readonly-aliasing char16_t* constructor. + * The text will be used for the UnicodeString object, but + * it will not be released when the UnicodeString is destroyed. + * This has copy-on-write semantics: + * When the string is modified, then the buffer is first copied into + * newly allocated memory. + * The aliased buffer is never modified. + * + * In an assignment to another UnicodeString, when using the copy constructor + * or the assignment operator, the text will be copied. + * When using fastCopyFrom(), the text will be aliased again, + * so that both strings then alias the same readonly-text. + * + * @param isTerminated specifies if `text` is `NUL`-terminated. + * This must be true if `textLength==-1`. + * @param text The characters to alias for the UnicodeString. + * @param textLength The number of Unicode characters in `text` to alias. + * If -1, then this constructor will determine the length + * by calling `u_strlen()`. + * @return a reference to this + * @stable ICU 2.0 + */ + UnicodeString &setTo(UBool isTerminated, + ConstChar16Ptr text, + int32_t textLength); + + /** + * Aliasing setTo() function, analogous to the writable-aliasing char16_t* constructor. + * The text will be used for the UnicodeString object, but + * it will not be released when the UnicodeString is destroyed. + * This has write-through semantics: + * For as long as the capacity of the buffer is sufficient, write operations + * will directly affect the buffer. When more capacity is necessary, then + * a new buffer will be allocated and the contents copied as with regularly + * constructed strings. + * In an assignment to another UnicodeString, the buffer will be copied. + * The extract(Char16Ptr dst) function detects whether the dst pointer is the same + * as the string buffer itself and will in this case not copy the contents. + * + * @param buffer The characters to alias for the UnicodeString. + * @param buffLength The number of Unicode characters in `buffer` to alias. + * @param buffCapacity The size of `buffer` in char16_ts. + * @return a reference to this + * @stable ICU 2.0 + */ + UnicodeString &setTo(char16_t *buffer, + int32_t buffLength, + int32_t buffCapacity); + + /** + * Make this UnicodeString object invalid. + * The string will test true with isBogus(). + * + * A bogus string has no value. It is different from an empty string. + * It can be used to indicate that no string value is available. + * getBuffer() and getTerminatedBuffer() return NULL, and + * length() returns 0. + * + * This utility function is used throughout the UnicodeString + * implementation to indicate that a UnicodeString operation failed, + * and may be used in other functions, + * especially but not exclusively when such functions do not + * take a UErrorCode for simplicity. + * + * The following methods, and no others, will clear a string object's bogus flag: + * - remove() + * - remove(0, INT32_MAX) + * - truncate(0) + * - operator=() (assignment operator) + * - setTo(...) + * + * The simplest ways to turn a bogus string into an empty one + * is to use the remove() function. + * Examples for other functions that are equivalent to "set to empty string": + * \code + * if(s.isBogus()) { + * s.remove(); // set to an empty string (remove all), or + * s.remove(0, INT32_MAX); // set to an empty string (remove all), or + * s.truncate(0); // set to an empty string (complete truncation), or + * s=UnicodeString(); // assign an empty string, or + * s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or + * s.setTo(u"", 0); // set to an empty C Unicode string + * } + * \endcode + * + * @see isBogus() + * @stable ICU 2.0 + */ + void setToBogus(); + + /** + * Set the character at the specified offset to the specified character. + * @param offset A valid offset into the text of the character to set + * @param ch The new character + * @return A reference to this + * @stable ICU 2.0 + */ + UnicodeString& setCharAt(int32_t offset, + char16_t ch); + + + /* Append operations */ + + /** + * Append operator. Append the code unit `ch` to the UnicodeString + * object. + * @param ch the code unit to be appended + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& operator+= (char16_t ch); + + /** + * Append operator. Append the code point `ch` to the UnicodeString + * object. + * @param ch the code point to be appended + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& operator+= (UChar32 ch); + + /** + * Append operator. Append the characters in `srcText` to the + * UnicodeString object. `srcText` is not modified. + * @param srcText the source for the new characters + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& operator+= (const UnicodeString& srcText); + + /** + * Append the characters + * in `srcText` in the range + * [`srcStart`, `srcStart + srcLength`) to the + * UnicodeString object at offset `start`. `srcText` + * is not modified. + * @param srcText the source for the new characters + * @param srcStart the offset into `srcText` where new characters + * will be obtained + * @param srcLength the number of characters in `srcText` in + * the append string + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& append(const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength); + + /** + * Append the characters in `srcText` to the UnicodeString object. + * `srcText` is not modified. + * @param srcText the source for the new characters + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& append(const UnicodeString& srcText); + + /** + * Append the characters in `srcChars` in the range + * [`srcStart`, `srcStart + srcLength`) to the UnicodeString + * object at offset + * `start`. `srcChars` is not modified. + * @param srcChars the source for the new characters + * @param srcStart the offset into `srcChars` where new characters + * will be obtained + * @param srcLength the number of characters in `srcChars` in + * the append string; can be -1 if `srcChars` is NUL-terminated + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& append(const char16_t *srcChars, + int32_t srcStart, + int32_t srcLength); + + /** + * Append the characters in `srcChars` to the UnicodeString object + * at offset `start`. `srcChars` is not modified. + * @param srcChars the source for the new characters + * @param srcLength the number of Unicode characters in `srcChars`; + * can be -1 if `srcChars` is NUL-terminated + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& append(ConstChar16Ptr srcChars, + int32_t srcLength); + + /** + * Append the code unit `srcChar` to the UnicodeString object. + * @param srcChar the code unit to append + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& append(char16_t srcChar); + + /** + * Append the code point `srcChar` to the UnicodeString object. + * @param srcChar the code point to append + * @return a reference to this + * @stable ICU 2.0 + */ + UnicodeString& append(UChar32 srcChar); + + + /* Insert operations */ + + /** + * Insert the characters in `srcText` in the range + * [`srcStart`, `srcStart + srcLength`) into the UnicodeString + * object at offset `start`. `srcText` is not modified. + * @param start the offset where the insertion begins + * @param srcText the source for the new characters + * @param srcStart the offset into `srcText` where new characters + * will be obtained + * @param srcLength the number of characters in `srcText` in + * the insert string + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& insert(int32_t start, + const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength); + + /** + * Insert the characters in `srcText` into the UnicodeString object + * at offset `start`. `srcText` is not modified. + * @param start the offset where the insertion begins + * @param srcText the source for the new characters + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& insert(int32_t start, + const UnicodeString& srcText); + + /** + * Insert the characters in `srcChars` in the range + * [`srcStart`, `srcStart + srcLength`) into the UnicodeString + * object at offset `start`. `srcChars` is not modified. + * @param start the offset at which the insertion begins + * @param srcChars the source for the new characters + * @param srcStart the offset into `srcChars` where new characters + * will be obtained + * @param srcLength the number of characters in `srcChars` + * in the insert string + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& insert(int32_t start, + const char16_t *srcChars, + int32_t srcStart, + int32_t srcLength); + + /** + * Insert the characters in `srcChars` into the UnicodeString object + * at offset `start`. `srcChars` is not modified. + * @param start the offset where the insertion begins + * @param srcChars the source for the new characters + * @param srcLength the number of Unicode characters in srcChars. + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& insert(int32_t start, + ConstChar16Ptr srcChars, + int32_t srcLength); + + /** + * Insert the code unit `srcChar` into the UnicodeString object at + * offset `start`. + * @param start the offset at which the insertion occurs + * @param srcChar the code unit to insert + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& insert(int32_t start, + char16_t srcChar); + + /** + * Insert the code point `srcChar` into the UnicodeString object at + * offset `start`. + * @param start the offset at which the insertion occurs + * @param srcChar the code point to insert + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& insert(int32_t start, + UChar32 srcChar); + + + /* Replace operations */ + + /** + * Replace the characters in the range + * [`start`, `start + length`) with the characters in + * `srcText` in the range + * [`srcStart`, `srcStart + srcLength`). + * `srcText` is not modified. + * @param start the offset at which the replace operation begins + * @param length the number of characters to replace. The character at + * `start + length` is not modified. + * @param srcText the source for the new characters + * @param srcStart the offset into `srcText` where new characters + * will be obtained + * @param srcLength the number of characters in `srcText` in + * the replace string + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& replace(int32_t start, + int32_t length, + const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength); + + /** + * Replace the characters in the range + * [`start`, `start + length`) + * with the characters in `srcText`. `srcText` is + * not modified. + * @param start the offset at which the replace operation begins + * @param length the number of characters to replace. The character at + * `start + length` is not modified. + * @param srcText the source for the new characters + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& replace(int32_t start, + int32_t length, + const UnicodeString& srcText); + + /** + * Replace the characters in the range + * [`start`, `start + length`) with the characters in + * `srcChars` in the range + * [`srcStart`, `srcStart + srcLength`). `srcChars` + * is not modified. + * @param start the offset at which the replace operation begins + * @param length the number of characters to replace. The character at + * `start + length` is not modified. + * @param srcChars the source for the new characters + * @param srcStart the offset into `srcChars` where new characters + * will be obtained + * @param srcLength the number of characters in `srcChars` + * in the replace string + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& replace(int32_t start, + int32_t length, + const char16_t *srcChars, + int32_t srcStart, + int32_t srcLength); + + /** + * Replace the characters in the range + * [`start`, `start + length`) with the characters in + * `srcChars`. `srcChars` is not modified. + * @param start the offset at which the replace operation begins + * @param length number of characters to replace. The character at + * `start + length` is not modified. + * @param srcChars the source for the new characters + * @param srcLength the number of Unicode characters in srcChars + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& replace(int32_t start, + int32_t length, + ConstChar16Ptr srcChars, + int32_t srcLength); + + /** + * Replace the characters in the range + * [`start`, `start + length`) with the code unit + * `srcChar`. + * @param start the offset at which the replace operation begins + * @param length the number of characters to replace. The character at + * `start + length` is not modified. + * @param srcChar the new code unit + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& replace(int32_t start, + int32_t length, + char16_t srcChar); + + /** + * Replace the characters in the range + * [`start`, `start + length`) with the code point + * `srcChar`. + * @param start the offset at which the replace operation begins + * @param length the number of characters to replace. The character at + * `start + length` is not modified. + * @param srcChar the new code point + * @return a reference to this + * @stable ICU 2.0 + */ + UnicodeString& replace(int32_t start, int32_t length, UChar32 srcChar); + + /** + * Replace the characters in the range [`start`, `limit`) + * with the characters in `srcText`. `srcText` is not modified. + * @param start the offset at which the replace operation begins + * @param limit the offset immediately following the replace range + * @param srcText the source for the new characters + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& replaceBetween(int32_t start, + int32_t limit, + const UnicodeString& srcText); + + /** + * Replace the characters in the range [`start`, `limit`) + * with the characters in `srcText` in the range + * [`srcStart`, `srcLimit`). `srcText` is not modified. + * @param start the offset at which the replace operation begins + * @param limit the offset immediately following the replace range + * @param srcText the source for the new characters + * @param srcStart the offset into `srcChars` where new characters + * will be obtained + * @param srcLimit the offset immediately following the range to copy + * in `srcText` + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& replaceBetween(int32_t start, + int32_t limit, + const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLimit); + + /** + * Replace a substring of this object with the given text. + * @param start the beginning index, inclusive; `0 <= start <= limit`. + * @param limit the ending index, exclusive; `start <= limit <= length()`. + * @param text the text to replace characters `start` to `limit - 1` + * @stable ICU 2.0 + */ + virtual void handleReplaceBetween(int32_t start, + int32_t limit, + const UnicodeString& text); + + /** + * Replaceable API + * @return true if it has MetaData + * @stable ICU 2.4 + */ + virtual UBool hasMetaData() const; + + /** + * Copy a substring of this object, retaining attribute (out-of-band) + * information. This method is used to duplicate or reorder substrings. + * The destination index must not overlap the source range. + * + * @param start the beginning index, inclusive; `0 <= start <= limit`. + * @param limit the ending index, exclusive; `start <= limit <= length()`. + * @param dest the destination index. The characters from + * `start..limit-1` will be copied to `dest`. + * Implementations of this method may assume that `dest <= start || + * dest >= limit`. + * @stable ICU 2.0 + */ + virtual void copy(int32_t start, int32_t limit, int32_t dest); + + /* Search and replace operations */ + + /** + * Replace all occurrences of characters in oldText with the characters + * in newText + * @param oldText the text containing the search text + * @param newText the text containing the replacement text + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& findAndReplace(const UnicodeString& oldText, + const UnicodeString& newText); + + /** + * Replace all occurrences of characters in oldText with characters + * in newText + * in the range [`start`, `start + length`). + * @param start the start of the range in which replace will performed + * @param length the length of the range in which replace will be performed + * @param oldText the text containing the search text + * @param newText the text containing the replacement text + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& findAndReplace(int32_t start, + int32_t length, + const UnicodeString& oldText, + const UnicodeString& newText); + + /** + * Replace all occurrences of characters in oldText in the range + * [`oldStart`, `oldStart + oldLength`) with the characters + * in newText in the range + * [`newStart`, `newStart + newLength`) + * in the range [`start`, `start + length`). + * @param start the start of the range in which replace will performed + * @param length the length of the range in which replace will be performed + * @param oldText the text containing the search text + * @param oldStart the start of the search range in `oldText` + * @param oldLength the length of the search range in `oldText` + * @param newText the text containing the replacement text + * @param newStart the start of the replacement range in `newText` + * @param newLength the length of the replacement range in `newText` + * @return a reference to this + * @stable ICU 2.0 + */ + UnicodeString& findAndReplace(int32_t start, + int32_t length, + const UnicodeString& oldText, + int32_t oldStart, + int32_t oldLength, + const UnicodeString& newText, + int32_t newStart, + int32_t newLength); + + + /* Remove operations */ + + /** + * Removes all characters from the UnicodeString object and clears the bogus flag. + * This is the UnicodeString equivalent of std::string’s clear(). + * + * @return a reference to this + * @see setToBogus + * @stable ICU 2.0 + */ + inline UnicodeString& remove(); + + /** + * Remove the characters in the range + * [`start`, `start + length`) from the UnicodeString object. + * @param start the offset of the first character to remove + * @param length the number of characters to remove + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& remove(int32_t start, + int32_t length = (int32_t)INT32_MAX); + + /** + * Remove the characters in the range + * [`start`, `limit`) from the UnicodeString object. + * @param start the offset of the first character to remove + * @param limit the offset immediately following the range to remove + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& removeBetween(int32_t start, + int32_t limit = (int32_t)INT32_MAX); + + /** + * Retain only the characters in the range + * [`start`, `limit`) from the UnicodeString object. + * Removes characters before `start` and at and after `limit`. + * @param start the offset of the first character to retain + * @param limit the offset immediately following the range to retain + * @return a reference to this + * @stable ICU 4.4 + */ + inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX); + + /* Length operations */ + + /** + * Pad the start of this UnicodeString with the character `padChar`. + * If the length of this UnicodeString is less than targetLength, + * length() - targetLength copies of padChar will be added to the + * beginning of this UnicodeString. + * @param targetLength the desired length of the string + * @param padChar the character to use for padding. Defaults to + * space (U+0020) + * @return true if the text was padded, false otherwise. + * @stable ICU 2.0 + */ + UBool padLeading(int32_t targetLength, + char16_t padChar = 0x0020); + + /** + * Pad the end of this UnicodeString with the character `padChar`. + * If the length of this UnicodeString is less than targetLength, + * length() - targetLength copies of padChar will be added to the + * end of this UnicodeString. + * @param targetLength the desired length of the string + * @param padChar the character to use for padding. Defaults to + * space (U+0020) + * @return true if the text was padded, false otherwise. + * @stable ICU 2.0 + */ + UBool padTrailing(int32_t targetLength, + char16_t padChar = 0x0020); + + /** + * Truncate this UnicodeString to the `targetLength`. + * @param targetLength the desired length of this UnicodeString. + * @return true if the text was truncated, false otherwise + * @stable ICU 2.0 + */ + inline UBool truncate(int32_t targetLength); + + /** + * Trims leading and trailing whitespace from this UnicodeString. + * @return a reference to this + * @stable ICU 2.0 + */ + UnicodeString& trim(void); + + + /* Miscellaneous operations */ + + /** + * Reverse this UnicodeString in place. + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& reverse(void); + + /** + * Reverse the range [`start`, `start + length`) in + * this UnicodeString. + * @param start the start of the range to reverse + * @param length the number of characters to to reverse + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& reverse(int32_t start, + int32_t length); + + /** + * Convert the characters in this to UPPER CASE following the conventions of + * the default locale. + * @return A reference to this. + * @stable ICU 2.0 + */ + UnicodeString& toUpper(void); + + /** + * Convert the characters in this to UPPER CASE following the conventions of + * a specific locale. + * @param locale The locale containing the conventions to use. + * @return A reference to this. + * @stable ICU 2.0 + */ + UnicodeString& toUpper(const Locale& locale); + + /** + * Convert the characters in this to lower case following the conventions of + * the default locale. + * @return A reference to this. + * @stable ICU 2.0 + */ + UnicodeString& toLower(void); + + /** + * Convert the characters in this to lower case following the conventions of + * a specific locale. + * @param locale The locale containing the conventions to use. + * @return A reference to this. + * @stable ICU 2.0 + */ + UnicodeString& toLower(const Locale& locale); + +#if !UCONFIG_NO_BREAK_ITERATION + + /** + * Titlecase this string, convenience function using the default locale. + * + * Casing is locale-dependent and context-sensitive. + * Titlecasing uses a break iterator to find the first characters of words + * that are to be titlecased. It titlecases those characters and lowercases + * all others. + * + * The titlecase break iterator can be provided to customize for arbitrary + * styles, using rules and dictionaries beyond the standard iterators. + * It may be more efficient to always provide an iterator to avoid + * opening and closing one for each string. + * The standard titlecase iterator for the root locale implements the + * algorithm of Unicode TR 21. + * + * This function uses only the setText(), first() and next() methods of the + * provided break iterator. + * + * @param titleIter A break iterator to find the first characters of words + * that are to be titlecased. + * If none is provided (0), then a standard titlecase + * break iterator is opened. + * Otherwise the provided iterator is set to the string's text. + * @return A reference to this. + * @stable ICU 2.1 + */ + UnicodeString &toTitle(BreakIterator *titleIter); + + /** + * Titlecase this string. + * + * Casing is locale-dependent and context-sensitive. + * Titlecasing uses a break iterator to find the first characters of words + * that are to be titlecased. It titlecases those characters and lowercases + * all others. + * + * The titlecase break iterator can be provided to customize for arbitrary + * styles, using rules and dictionaries beyond the standard iterators. + * It may be more efficient to always provide an iterator to avoid + * opening and closing one for each string. + * The standard titlecase iterator for the root locale implements the + * algorithm of Unicode TR 21. + * + * This function uses only the setText(), first() and next() methods of the + * provided break iterator. + * + * @param titleIter A break iterator to find the first characters of words + * that are to be titlecased. + * If none is provided (0), then a standard titlecase + * break iterator is opened. + * Otherwise the provided iterator is set to the string's text. + * @param locale The locale to consider. + * @return A reference to this. + * @stable ICU 2.1 + */ + UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale); + + /** + * Titlecase this string, with options. + * + * Casing is locale-dependent and context-sensitive. + * Titlecasing uses a break iterator to find the first characters of words + * that are to be titlecased. It titlecases those characters and lowercases + * all others. (This can be modified with options.) + * + * The titlecase break iterator can be provided to customize for arbitrary + * styles, using rules and dictionaries beyond the standard iterators. + * It may be more efficient to always provide an iterator to avoid + * opening and closing one for each string. + * The standard titlecase iterator for the root locale implements the + * algorithm of Unicode TR 21. + * + * This function uses only the setText(), first() and next() methods of the + * provided break iterator. + * + * @param titleIter A break iterator to find the first characters of words + * that are to be titlecased. + * If none is provided (0), then a standard titlecase + * break iterator is opened. + * Otherwise the provided iterator is set to the string's text. + * @param locale The locale to consider. + * @param options Options bit set, usually 0. See U_TITLECASE_NO_LOWERCASE, + * U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED, + * U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES. + * @param options Options bit set, see ucasemap_open(). + * @return A reference to this. + * @stable ICU 3.8 + */ + UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options); + +#endif + + /** + * Case-folds the characters in this string. + * + * Case-folding is locale-independent and not context-sensitive, + * but there is an option for whether to include or exclude mappings for dotted I + * and dotless i that are marked with 'T' in CaseFolding.txt. + * + * The result may be longer or shorter than the original. + * + * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I + * @return A reference to this. + * @stable ICU 2.0 + */ + UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/); + + //======================================== + // Access to the internal buffer + //======================================== + + /** + * Get a read/write pointer to the internal buffer. + * The buffer is guaranteed to be large enough for at least minCapacity char16_ts, + * writable, and is still owned by the UnicodeString object. + * Calls to getBuffer(minCapacity) must not be nested, and + * must be matched with calls to releaseBuffer(newLength). + * If the string buffer was read-only or shared, + * then it will be reallocated and copied. + * + * An attempted nested call will return 0, and will not further modify the + * state of the UnicodeString object. + * It also returns 0 if the string is bogus. + * + * The actual capacity of the string buffer may be larger than minCapacity. + * getCapacity() returns the actual capacity. + * For many operations, the full capacity should be used to avoid reallocations. + * + * While the buffer is "open" between getBuffer(minCapacity) + * and releaseBuffer(newLength), the following applies: + * - The string length is set to 0. + * - Any read API call on the UnicodeString object will behave like on a 0-length string. + * - Any write API call on the UnicodeString object is disallowed and will have no effect. + * - You can read from and write to the returned buffer. + * - The previous string contents will still be in the buffer; + * if you want to use it, then you need to call length() before getBuffer(minCapacity). + * If the length() was greater than minCapacity, then any contents after minCapacity + * may be lost. + * The buffer contents is not NUL-terminated by getBuffer(). + * If length() < getCapacity() then you can terminate it by writing a NUL + * at index length(). + * - You must call releaseBuffer(newLength) before and in order to + * return to normal UnicodeString operation. + * + * @param minCapacity the minimum number of char16_ts that are to be available + * in the buffer, starting at the returned pointer; + * default to the current string capacity if minCapacity==-1 + * @return a writable pointer to the internal string buffer, + * or nullptr if an error occurs (nested calls, out of memory) + * + * @see releaseBuffer + * @see getTerminatedBuffer() + * @stable ICU 2.0 + */ + char16_t *getBuffer(int32_t minCapacity); + + /** + * Release a read/write buffer on a UnicodeString object with an + * "open" getBuffer(minCapacity). + * This function must be called in a matched pair with getBuffer(minCapacity). + * releaseBuffer(newLength) must be called if and only if a getBuffer(minCapacity) is "open". + * + * It will set the string length to newLength, at most to the current capacity. + * If newLength==-1 then it will set the length according to the + * first NUL in the buffer, or to the capacity if there is no NUL. + * + * After calling releaseBuffer(newLength) the UnicodeString is back to normal operation. + * + * @param newLength the new length of the UnicodeString object; + * defaults to the current capacity if newLength is greater than that; + * if newLength==-1, it defaults to u_strlen(buffer) but not more than + * the current capacity of the string + * + * @see getBuffer(int32_t minCapacity) + * @stable ICU 2.0 + */ + void releaseBuffer(int32_t newLength=-1); + + /** + * Get a read-only pointer to the internal buffer. + * This can be called at any time on a valid UnicodeString. + * + * It returns 0 if the string is bogus, or + * during an "open" getBuffer(minCapacity). + * + * It can be called as many times as desired. + * The pointer that it returns will remain valid until the UnicodeString object is modified, + * at which time the pointer is semantically invalidated and must not be used any more. + * + * The capacity of the buffer can be determined with getCapacity(). + * The part after length() may or may not be initialized and valid, + * depending on the history of the UnicodeString object. + * + * The buffer contents is (probably) not NUL-terminated. + * You can check if it is with + * `(s.length() < s.getCapacity() && buffer[s.length()]==0)`. + * (See getTerminatedBuffer().) + * + * The buffer may reside in read-only memory. Its contents must not + * be modified. + * + * @return a read-only pointer to the internal string buffer, + * or nullptr if the string is empty or bogus + * + * @see getBuffer(int32_t minCapacity) + * @see getTerminatedBuffer() + * @stable ICU 2.0 + */ + inline const char16_t *getBuffer() const; + + /** + * Get a read-only pointer to the internal buffer, + * making sure that it is NUL-terminated. + * This can be called at any time on a valid UnicodeString. + * + * It returns 0 if the string is bogus, or + * during an "open" getBuffer(minCapacity), or if the buffer cannot + * be NUL-terminated (because memory allocation failed). + * + * It can be called as many times as desired. + * The pointer that it returns will remain valid until the UnicodeString object is modified, + * at which time the pointer is semantically invalidated and must not be used any more. + * + * The capacity of the buffer can be determined with getCapacity(). + * The part after length()+1 may or may not be initialized and valid, + * depending on the history of the UnicodeString object. + * + * The buffer contents is guaranteed to be NUL-terminated. + * getTerminatedBuffer() may reallocate the buffer if a terminating NUL + * is written. + * For this reason, this function is not const, unlike getBuffer(). + * Note that a UnicodeString may also contain NUL characters as part of its contents. + * + * The buffer may reside in read-only memory. Its contents must not + * be modified. + * + * @return a read-only pointer to the internal string buffer, + * or 0 if the string is empty or bogus + * + * @see getBuffer(int32_t minCapacity) + * @see getBuffer() + * @stable ICU 2.2 + */ + const char16_t *getTerminatedBuffer(); + + //======================================== + // Constructors + //======================================== + + /** Construct an empty UnicodeString. + * @stable ICU 2.0 + */ + inline UnicodeString(); + + /** + * Construct a UnicodeString with capacity to hold `capacity` char16_ts + * @param capacity the number of char16_ts this UnicodeString should hold + * before a resize is necessary; if count is greater than 0 and count + * code points c take up more space than capacity, then capacity is adjusted + * accordingly. + * @param c is used to initially fill the string + * @param count specifies how many code points c are to be written in the + * string + * @stable ICU 2.0 + */ + UnicodeString(int32_t capacity, UChar32 c, int32_t count); + + /** + * Single char16_t (code unit) constructor. + * + * It is recommended to mark this constructor "explicit" by + * `-DUNISTR_FROM_CHAR_EXPLICIT=explicit` + * on the compiler command line or similar. + * @param ch the character to place in the UnicodeString + * @stable ICU 2.0 + */ + UNISTR_FROM_CHAR_EXPLICIT UnicodeString(char16_t ch); + + /** + * Single UChar32 (code point) constructor. + * + * It is recommended to mark this constructor "explicit" by + * `-DUNISTR_FROM_CHAR_EXPLICIT=explicit` + * on the compiler command line or similar. + * @param ch the character to place in the UnicodeString + * @stable ICU 2.0 + */ + UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar32 ch); + + /** + * char16_t* constructor. + * + * It is recommended to mark this constructor "explicit" by + * `-DUNISTR_FROM_STRING_EXPLICIT=explicit` + * on the compiler command line or similar. + * @param text The characters to place in the UnicodeString. `text` + * must be NULL (U+0000) terminated. + * @stable ICU 2.0 + */ + UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char16_t *text); + +#if !U_CHAR16_IS_TYPEDEF + /** + * uint16_t * constructor. + * Delegates to UnicodeString(const char16_t *). + * + * It is recommended to mark this constructor "explicit" by + * `-DUNISTR_FROM_STRING_EXPLICIT=explicit` + * on the compiler command line or similar. + * @param text NUL-terminated UTF-16 string + * @stable ICU 59 + */ + UNISTR_FROM_STRING_EXPLICIT UnicodeString(const uint16_t *text) : + UnicodeString(ConstChar16Ptr(text)) {} +#endif + +#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) + /** + * wchar_t * constructor. + * (Only defined if U_SIZEOF_WCHAR_T==2.) + * Delegates to UnicodeString(const char16_t *). + * + * It is recommended to mark this constructor "explicit" by + * `-DUNISTR_FROM_STRING_EXPLICIT=explicit` + * on the compiler command line or similar. + * @param text NUL-terminated UTF-16 string + * @stable ICU 59 + */ + UNISTR_FROM_STRING_EXPLICIT UnicodeString(const wchar_t *text) : + UnicodeString(ConstChar16Ptr(text)) {} +#endif + + /** + * nullptr_t constructor. + * Effectively the same as the default constructor, makes an empty string object. + * + * It is recommended to mark this constructor "explicit" by + * `-DUNISTR_FROM_STRING_EXPLICIT=explicit` + * on the compiler command line or similar. + * @param text nullptr + * @stable ICU 59 + */ + UNISTR_FROM_STRING_EXPLICIT inline UnicodeString(const std::nullptr_t text); + + /** + * char16_t* constructor. + * @param text The characters to place in the UnicodeString. + * @param textLength The number of Unicode characters in `text` + * to copy. + * @stable ICU 2.0 + */ + UnicodeString(const char16_t *text, + int32_t textLength); + +#if !U_CHAR16_IS_TYPEDEF + /** + * uint16_t * constructor. + * Delegates to UnicodeString(const char16_t *, int32_t). + * @param text UTF-16 string + * @param textLength string length + * @stable ICU 59 + */ + UnicodeString(const uint16_t *text, int32_t textLength) : + UnicodeString(ConstChar16Ptr(text), textLength) {} +#endif + +#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) + /** + * wchar_t * constructor. + * (Only defined if U_SIZEOF_WCHAR_T==2.) + * Delegates to UnicodeString(const char16_t *, int32_t). + * @param text NUL-terminated UTF-16 string + * @param textLength string length + * @stable ICU 59 + */ + UnicodeString(const wchar_t *text, int32_t textLength) : + UnicodeString(ConstChar16Ptr(text), textLength) {} +#endif + + /** + * nullptr_t constructor. + * Effectively the same as the default constructor, makes an empty string object. + * @param text nullptr + * @param textLength ignored + * @stable ICU 59 + */ + inline UnicodeString(const std::nullptr_t text, int32_t textLength); + + /** + * Readonly-aliasing char16_t* constructor. + * The text will be used for the UnicodeString object, but + * it will not be released when the UnicodeString is destroyed. + * This has copy-on-write semantics: + * When the string is modified, then the buffer is first copied into + * newly allocated memory. + * The aliased buffer is never modified. + * + * In an assignment to another UnicodeString, when using the copy constructor + * or the assignment operator, the text will be copied. + * When using fastCopyFrom(), the text will be aliased again, + * so that both strings then alias the same readonly-text. + * + * @param isTerminated specifies if `text` is `NUL`-terminated. + * This must be true if `textLength==-1`. + * @param text The characters to alias for the UnicodeString. + * @param textLength The number of Unicode characters in `text` to alias. + * If -1, then this constructor will determine the length + * by calling `u_strlen()`. + * @stable ICU 2.0 + */ + UnicodeString(UBool isTerminated, + ConstChar16Ptr text, + int32_t textLength); + + /** + * Writable-aliasing char16_t* constructor. + * The text will be used for the UnicodeString object, but + * it will not be released when the UnicodeString is destroyed. + * This has write-through semantics: + * For as long as the capacity of the buffer is sufficient, write operations + * will directly affect the buffer. When more capacity is necessary, then + * a new buffer will be allocated and the contents copied as with regularly + * constructed strings. + * In an assignment to another UnicodeString, the buffer will be copied. + * The extract(Char16Ptr dst) function detects whether the dst pointer is the same + * as the string buffer itself and will in this case not copy the contents. + * + * @param buffer The characters to alias for the UnicodeString. + * @param buffLength The number of Unicode characters in `buffer` to alias. + * @param buffCapacity The size of `buffer` in char16_ts. + * @stable ICU 2.0 + */ + UnicodeString(char16_t *buffer, int32_t buffLength, int32_t buffCapacity); + +#if !U_CHAR16_IS_TYPEDEF + /** + * Writable-aliasing uint16_t * constructor. + * Delegates to UnicodeString(const char16_t *, int32_t, int32_t). + * @param buffer writable buffer of/for UTF-16 text + * @param buffLength length of the current buffer contents + * @param buffCapacity buffer capacity + * @stable ICU 59 + */ + UnicodeString(uint16_t *buffer, int32_t buffLength, int32_t buffCapacity) : + UnicodeString(Char16Ptr(buffer), buffLength, buffCapacity) {} +#endif + +#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) + /** + * Writable-aliasing wchar_t * constructor. + * (Only defined if U_SIZEOF_WCHAR_T==2.) + * Delegates to UnicodeString(const char16_t *, int32_t, int32_t). + * @param buffer writable buffer of/for UTF-16 text + * @param buffLength length of the current buffer contents + * @param buffCapacity buffer capacity + * @stable ICU 59 + */ + UnicodeString(wchar_t *buffer, int32_t buffLength, int32_t buffCapacity) : + UnicodeString(Char16Ptr(buffer), buffLength, buffCapacity) {} +#endif + + /** + * Writable-aliasing nullptr_t constructor. + * Effectively the same as the default constructor, makes an empty string object. + * @param buffer nullptr + * @param buffLength ignored + * @param buffCapacity ignored + * @stable ICU 59 + */ + inline UnicodeString(std::nullptr_t buffer, int32_t buffLength, int32_t buffCapacity); + +#if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION + + /** + * char* constructor. + * Uses the default converter (and thus depends on the ICU conversion code) + * unless U_CHARSET_IS_UTF8 is set to 1. + * + * For ASCII (really "invariant character") strings it is more efficient to use + * the constructor that takes a US_INV (for its enum EInvariant). + * For ASCII (invariant-character) string literals, see UNICODE_STRING and + * UNICODE_STRING_SIMPLE. + * + * It is recommended to mark this constructor "explicit" by + * `-DUNISTR_FROM_STRING_EXPLICIT=explicit` + * on the compiler command line or similar. + * @param codepageData an array of bytes, null-terminated, + * in the platform's default codepage. + * @stable ICU 2.0 + * @see UNICODE_STRING + * @see UNICODE_STRING_SIMPLE + */ + UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData); + + /** + * char* constructor. + * Uses the default converter (and thus depends on the ICU conversion code) + * unless U_CHARSET_IS_UTF8 is set to 1. + * @param codepageData an array of bytes in the platform's default codepage. + * @param dataLength The number of bytes in `codepageData`. + * @stable ICU 2.0 + */ + UnicodeString(const char *codepageData, int32_t dataLength); + +#endif + +#if !UCONFIG_NO_CONVERSION + + /** + * char* constructor. + * @param codepageData an array of bytes, null-terminated + * @param codepage the encoding of `codepageData`. The special + * value 0 for `codepage` indicates that the text is in the + * platform's default codepage. + * + * If `codepage` is an empty string (`""`), + * then a simple conversion is performed on the codepage-invariant + * subset ("invariant characters") of the platform encoding. See utypes.h. + * Recommendation: For invariant-character strings use the constructor + * UnicodeString(const char *src, int32_t length, enum EInvariant inv) + * because it avoids object code dependencies of UnicodeString on + * the conversion code. + * + * @stable ICU 2.0 + */ + UnicodeString(const char *codepageData, const char *codepage); + + /** + * char* constructor. + * @param codepageData an array of bytes. + * @param dataLength The number of bytes in `codepageData`. + * @param codepage the encoding of `codepageData`. The special + * value 0 for `codepage` indicates that the text is in the + * platform's default codepage. + * If `codepage` is an empty string (`""`), + * then a simple conversion is performed on the codepage-invariant + * subset ("invariant characters") of the platform encoding. See utypes.h. + * Recommendation: For invariant-character strings use the constructor + * UnicodeString(const char *src, int32_t length, enum EInvariant inv) + * because it avoids object code dependencies of UnicodeString on + * the conversion code. + * + * @stable ICU 2.0 + */ + UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage); + + /** + * char * / UConverter constructor. + * This constructor uses an existing UConverter object to + * convert the codepage string to Unicode and construct a UnicodeString + * from that. + * + * The converter is reset at first. + * If the error code indicates a failure before this constructor is called, + * or if an error occurs during conversion or construction, + * then the string will be bogus. + * + * This function avoids the overhead of opening and closing a converter if + * multiple strings are constructed. + * + * @param src input codepage string + * @param srcLength length of the input string, can be -1 for NUL-terminated strings + * @param cnv converter object (ucnv_resetToUnicode() will be called), + * can be NULL for the default converter + * @param errorCode normal ICU error code + * @stable ICU 2.0 + */ + UnicodeString( + const char *src, int32_t srcLength, + UConverter *cnv, + UErrorCode &errorCode); + +#endif + + /** + * Constructs a Unicode string from an invariant-character char * string. + * About invariant characters see utypes.h. + * This constructor has no runtime dependency on conversion code and is + * therefore recommended over ones taking a charset name string + * (where the empty string "" indicates invariant-character conversion). + * + * Use the macro US_INV as the third, signature-distinguishing parameter. + * + * For example: + * \code + * void fn(const char *s) { + * UnicodeString ustr(s, -1, US_INV); + * // use ustr ... + * } + * \endcode + * @param src String using only invariant characters. + * @param textLength Length of src, or -1 if NUL-terminated. + * @param inv Signature-distinguishing paramater, use US_INV. + * + * @see US_INV + * @stable ICU 3.2 + */ + UnicodeString(const char *src, int32_t textLength, enum EInvariant inv); + + + /** + * Copy constructor. + * + * Starting with ICU 2.4, the assignment operator and the copy constructor + * allocate a new buffer and copy the buffer contents even for readonly aliases. + * By contrast, the fastCopyFrom() function implements the old, + * more efficient but less safe behavior + * of making this string also a readonly alias to the same buffer. + * + * If the source object has an "open" buffer from getBuffer(minCapacity), + * then the copy is an empty string. + * + * @param that The UnicodeString object to copy. + * @stable ICU 2.0 + * @see fastCopyFrom + */ + UnicodeString(const UnicodeString& that); + + /** + * Move constructor; might leave src in bogus state. + * This string will have the same contents and state that the source string had. + * @param src source string + * @stable ICU 56 + */ + UnicodeString(UnicodeString &&src) U_NOEXCEPT; + + /** + * 'Substring' constructor from tail of source string. + * @param src The UnicodeString object to copy. + * @param srcStart The offset into `src` at which to start copying. + * @stable ICU 2.2 + */ + UnicodeString(const UnicodeString& src, int32_t srcStart); + + /** + * 'Substring' constructor from subrange of source string. + * @param src The UnicodeString object to copy. + * @param srcStart The offset into `src` at which to start copying. + * @param srcLength The number of characters from `src` to copy. + * @stable ICU 2.2 + */ + UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength); + + /** + * Clone this object, an instance of a subclass of Replaceable. + * Clones can be used concurrently in multiple threads. + * If a subclass does not implement clone(), or if an error occurs, + * then NULL is returned. + * The caller must delete the clone. + * + * @return a clone of this object + * + * @see Replaceable::clone + * @see getDynamicClassID + * @stable ICU 2.6 + */ + virtual UnicodeString *clone() const; + + /** Destructor. + * @stable ICU 2.0 + */ + virtual ~UnicodeString(); + + /** + * Create a UnicodeString from a UTF-8 string. + * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string. + * Calls u_strFromUTF8WithSub(). + * + * @param utf8 UTF-8 input string. + * Note that a StringPiece can be implicitly constructed + * from a std::string or a NUL-terminated const char * string. + * @return A UnicodeString with equivalent UTF-16 contents. + * @see toUTF8 + * @see toUTF8String + * @stable ICU 4.2 + */ + static UnicodeString fromUTF8(StringPiece utf8); + + /** + * Create a UnicodeString from a UTF-32 string. + * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string. + * Calls u_strFromUTF32WithSub(). + * + * @param utf32 UTF-32 input string. Must not be NULL. + * @param length Length of the input string, or -1 if NUL-terminated. + * @return A UnicodeString with equivalent UTF-16 contents. + * @see toUTF32 + * @stable ICU 4.2 + */ + static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length); + + /* Miscellaneous operations */ + + /** + * Unescape a string of characters and return a string containing + * the result. The following escape sequences are recognized: + * + * \\uhhhh 4 hex digits; h in [0-9A-Fa-f] + * \\Uhhhhhhhh 8 hex digits + * \\xhh 1-2 hex digits + * \\ooo 1-3 octal digits; o in [0-7] + * \\cX control-X; X is masked with 0x1F + * + * as well as the standard ANSI C escapes: + * + * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A, + * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B, + * \\" => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C + * + * Anything else following a backslash is generically escaped. For + * example, "[a\\-z]" returns "[a-z]". + * + * If an escape sequence is ill-formed, this method returns an empty + * string. An example of an ill-formed sequence is "\\u" followed by + * fewer than 4 hex digits. + * + * This function is similar to u_unescape() but not identical to it. + * The latter takes a source char*, so it does escape recognition + * and also invariant conversion. + * + * @return a string with backslash escapes interpreted, or an + * empty string on error. + * @see UnicodeString#unescapeAt() + * @see u_unescape() + * @see u_unescapeAt() + * @stable ICU 2.0 + */ + UnicodeString unescape() const; + + /** + * Unescape a single escape sequence and return the represented + * character. See unescape() for a listing of the recognized escape + * sequences. The character at offset-1 is assumed (without + * checking) to be a backslash. If the escape sequence is + * ill-formed, or the offset is out of range, U_SENTINEL=-1 is + * returned. + * + * @param offset an input output parameter. On input, it is the + * offset into this string where the escape sequence is located, + * after the initial backslash. On output, it is advanced after the + * last character parsed. On error, it is not advanced at all. + * @return the character represented by the escape sequence at + * offset, or U_SENTINEL=-1 on error. + * @see UnicodeString#unescape() + * @see u_unescape() + * @see u_unescapeAt() + * @stable ICU 2.0 + */ + UChar32 unescapeAt(int32_t &offset) const; + + /** + * ICU "poor man's RTTI", returns a UClassID for this class. + * + * @stable ICU 2.2 + */ + static UClassID U_EXPORT2 getStaticClassID(); + + /** + * ICU "poor man's RTTI", returns a UClassID for the actual class. + * + * @stable ICU 2.2 + */ + virtual UClassID getDynamicClassID() const; + + //======================================== + // Implementation methods + //======================================== + +protected: + /** + * Implement Replaceable::getLength() (see jitterbug 1027). + * @stable ICU 2.4 + */ + virtual int32_t getLength() const; + + /** + * The change in Replaceable to use virtual getCharAt() allows + * UnicodeString::charAt() to be inline again (see jitterbug 709). + * @stable ICU 2.4 + */ + virtual char16_t getCharAt(int32_t offset) const; + + /** + * The change in Replaceable to use virtual getChar32At() allows + * UnicodeString::char32At() to be inline again (see jitterbug 709). + * @stable ICU 2.4 + */ + virtual UChar32 getChar32At(int32_t offset) const; + +private: + // For char* constructors. Could be made public. + UnicodeString &setToUTF8(StringPiece utf8); + // For extract(char*). + // We could make a toUTF8(target, capacity, errorCode) public but not + // this version: New API will be cleaner if we make callers create substrings + // rather than having start+length on every method, + // and it should take a UErrorCode&. + int32_t + toUTF8(int32_t start, int32_t len, + char *target, int32_t capacity) const; + + /** + * Internal string contents comparison, called by operator==. + * Requires: this & text not bogus and have same lengths. + */ + UBool doEquals(const UnicodeString &text, int32_t len) const; + + inline int8_t + doCompare(int32_t start, + int32_t length, + const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength) const; + + int8_t doCompare(int32_t start, + int32_t length, + const char16_t *srcChars, + int32_t srcStart, + int32_t srcLength) const; + + inline int8_t + doCompareCodePointOrder(int32_t start, + int32_t length, + const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength) const; + + int8_t doCompareCodePointOrder(int32_t start, + int32_t length, + const char16_t *srcChars, + int32_t srcStart, + int32_t srcLength) const; + + inline int8_t + doCaseCompare(int32_t start, + int32_t length, + const UnicodeString &srcText, + int32_t srcStart, + int32_t srcLength, + uint32_t options) const; + + int8_t + doCaseCompare(int32_t start, + int32_t length, + const char16_t *srcChars, + int32_t srcStart, + int32_t srcLength, + uint32_t options) const; + + int32_t doIndexOf(char16_t c, + int32_t start, + int32_t length) const; + + int32_t doIndexOf(UChar32 c, + int32_t start, + int32_t length) const; + + int32_t doLastIndexOf(char16_t c, + int32_t start, + int32_t length) const; + + int32_t doLastIndexOf(UChar32 c, + int32_t start, + int32_t length) const; + + void doExtract(int32_t start, + int32_t length, + char16_t *dst, + int32_t dstStart) const; + + inline void doExtract(int32_t start, + int32_t length, + UnicodeString& target) const; + + inline char16_t doCharAt(int32_t offset) const; + + UnicodeString& doReplace(int32_t start, + int32_t length, + const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength); + + UnicodeString& doReplace(int32_t start, + int32_t length, + const char16_t *srcChars, + int32_t srcStart, + int32_t srcLength); + + UnicodeString& doAppend(const UnicodeString& src, int32_t srcStart, int32_t srcLength); + UnicodeString& doAppend(const char16_t *srcChars, int32_t srcStart, int32_t srcLength); + + UnicodeString& doReverse(int32_t start, + int32_t length); + + // calculate hash code + int32_t doHashCode(void) const; + + // get pointer to start of array + // these do not check for kOpenGetBuffer, unlike the public getBuffer() function + inline char16_t* getArrayStart(void); + inline const char16_t* getArrayStart(void) const; + + inline UBool hasShortLength() const; + inline int32_t getShortLength() const; + + // A UnicodeString object (not necessarily its current buffer) + // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity). + inline UBool isWritable() const; + + // Is the current buffer writable? + inline UBool isBufferWritable() const; + + // None of the following does releaseArray(). + inline void setZeroLength(); + inline void setShortLength(int32_t len); + inline void setLength(int32_t len); + inline void setToEmpty(); + inline void setArray(char16_t *array, int32_t len, int32_t capacity); // sets length but not flags + + // allocate the array; result may be the stack buffer + // sets refCount to 1 if appropriate + // sets fArray, fCapacity, and flags + // sets length to 0 + // returns boolean for success or failure + UBool allocate(int32_t capacity); + + // release the array if owned + void releaseArray(void); + + // turn a bogus string into an empty one + void unBogus(); + + // implements assigment operator, copy constructor, and fastCopyFrom() + UnicodeString ©From(const UnicodeString &src, UBool fastCopy=false); + + // Copies just the fields without memory management. + void copyFieldsFrom(UnicodeString &src, UBool setSrcToBogus) U_NOEXCEPT; + + // Pin start and limit to acceptable values. + inline void pinIndex(int32_t& start) const; + inline void pinIndices(int32_t& start, + int32_t& length) const; + +#if !UCONFIG_NO_CONVERSION + + /* Internal extract() using UConverter. */ + int32_t doExtract(int32_t start, int32_t length, + char *dest, int32_t destCapacity, + UConverter *cnv, + UErrorCode &errorCode) const; + + /* + * Real constructor for converting from codepage data. + * It assumes that it is called with !fRefCounted. + * + * If `codepage==0`, then the default converter + * is used for the platform encoding. + * If `codepage` is an empty string (`""`), + * then a simple conversion is performed on the codepage-invariant + * subset ("invariant characters") of the platform encoding. See utypes.h. + */ + void doCodepageCreate(const char *codepageData, + int32_t dataLength, + const char *codepage); + + /* + * Worker function for creating a UnicodeString from + * a codepage string using a UConverter. + */ + void + doCodepageCreate(const char *codepageData, + int32_t dataLength, + UConverter *converter, + UErrorCode &status); + +#endif + + /* + * This function is called when write access to the array + * is necessary. + * + * We need to make a copy of the array if + * the buffer is read-only, or + * the buffer is refCounted (shared), and refCount>1, or + * the buffer is too small. + * + * Return false if memory could not be allocated. + */ + UBool cloneArrayIfNeeded(int32_t newCapacity = -1, + int32_t growCapacity = -1, + UBool doCopyArray = true, + int32_t **pBufferToDelete = 0, + UBool forceClone = false); + + /** + * Common function for UnicodeString case mappings. + * The stringCaseMapper has the same type UStringCaseMapper + * as in ustr_imp.h for ustrcase_map(). + */ + UnicodeString & + caseMap(int32_t caseLocale, uint32_t options, +#if !UCONFIG_NO_BREAK_ITERATION + BreakIterator *iter, +#endif + UStringCaseMapper *stringCaseMapper); + + // ref counting + void addRef(void); + int32_t removeRef(void); + int32_t refCount(void) const; + + // constants + enum { + /** + * Size of stack buffer for short strings. + * Must be at least U16_MAX_LENGTH for the single-code point constructor to work. + * @see UNISTR_OBJECT_SIZE + */ + US_STACKBUF_SIZE=(int32_t)(UNISTR_OBJECT_SIZE-sizeof(void *)-2)/U_SIZEOF_UCHAR, + kInvalidUChar=0xffff, // U+FFFF returned by charAt(invalid index) + kInvalidHashCode=0, // invalid hash code + kEmptyHashCode=1, // hash code for empty string + + // bit flag values for fLengthAndFlags + kIsBogus=1, // this string is bogus, i.e., not valid or NULL + kUsingStackBuffer=2,// using fUnion.fStackFields instead of fUnion.fFields + kRefCounted=4, // there is a refCount field before the characters in fArray + kBufferIsReadonly=8,// do not write to this buffer + kOpenGetBuffer=16, // getBuffer(minCapacity) was called (is "open"), + // and releaseBuffer(newLength) must be called + kAllStorageFlags=0x1f, + + kLengthShift=5, // remaining 11 bits for non-negative short length, or negative if long + kLength1=1<<kLengthShift, + kMaxShortLength=0x3ff, // max non-negative short length (leaves top bit 0) + kLengthIsLarge=0xffe0, // short length < 0, real length is in fUnion.fFields.fLength + + // combined values for convenience + kShortString=kUsingStackBuffer, + kLongString=kRefCounted, + kReadonlyAlias=kBufferIsReadonly, + kWritableAlias=0 + }; + + friend class UnicodeStringAppendable; + + union StackBufferOrFields; // forward declaration necessary before friend declaration + friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion + + /* + * The following are all the class fields that are stored + * in each UnicodeString object. + * Note that UnicodeString has virtual functions, + * therefore there is an implicit vtable pointer + * as the first real field. + * The fields should be aligned such that no padding is necessary. + * On 32-bit machines, the size should be 32 bytes, + * on 64-bit machines (8-byte pointers), it should be 40 bytes. + * + * We use a hack to achieve this. + * + * With at least some compilers, each of the following is forced to + * a multiple of sizeof(pointer) [the largest field base unit here is a data pointer], + * rounded up with additional padding if the fields do not already fit that requirement: + * - sizeof(class UnicodeString) + * - offsetof(UnicodeString, fUnion) + * - sizeof(fUnion) + * - sizeof(fStackFields) + * + * We optimize for the longest possible internal buffer for short strings. + * fUnion.fStackFields begins with 2 bytes for storage flags + * and the length of relatively short strings, + * followed by the buffer for short string contents. + * There is no padding inside fStackFields. + * + * Heap-allocated and aliased strings use fUnion.fFields. + * Both fStackFields and fFields must begin with the same fields for flags and short length, + * that is, those must have the same memory offsets inside the object, + * because the flags must be inspected in order to decide which half of fUnion is being used. + * We assume that the compiler does not reorder the fields. + * + * (Padding at the end of fFields is ok: + * As long as it is no larger than fStackFields, it is not wasted space.) + * + * For some of the history of the UnicodeString class fields layout, see + * - ICU ticket #11551 "longer UnicodeString contents in stack buffer" + * - ICU ticket #11336 "UnicodeString: recombine stack buffer arrays" + * - ICU ticket #8322 "why is sizeof(UnicodeString)==48?" + */ + // (implicit) *vtable; + union StackBufferOrFields { + // fStackFields is used iff (fLengthAndFlags&kUsingStackBuffer) else fFields is used. + // Each struct of the union must begin with fLengthAndFlags. + struct { + int16_t fLengthAndFlags; // bit fields: see constants above + char16_t fBuffer[US_STACKBUF_SIZE]; // buffer for short strings + } fStackFields; + struct { + int16_t fLengthAndFlags; // bit fields: see constants above + int32_t fLength; // number of characters in fArray if >127; else undefined + int32_t fCapacity; // capacity of fArray (in char16_ts) + // array pointer last to minimize padding for machines with P128 data model + // or pointer sizes that are not a power of 2 + char16_t *fArray; // the Unicode data + } fFields; + } fUnion; +}; + +/** + * Create a new UnicodeString with the concatenation of two others. + * + * @param s1 The first string to be copied to the new one. + * @param s2 The second string to be copied to the new one, after s1. + * @return UnicodeString(s1).append(s2) + * @stable ICU 2.8 + */ +U_COMMON_API UnicodeString U_EXPORT2 +operator+ (const UnicodeString &s1, const UnicodeString &s2); + +//======================================== +// Inline members +//======================================== + +//======================================== +// Privates +//======================================== + +inline void +UnicodeString::pinIndex(int32_t& start) const +{ + // pin index + if(start < 0) { + start = 0; + } else if(start > length()) { + start = length(); + } +} + +inline void +UnicodeString::pinIndices(int32_t& start, + int32_t& _length) const +{ + // pin indices + int32_t len = length(); + if(start < 0) { + start = 0; + } else if(start > len) { + start = len; + } + if(_length < 0) { + _length = 0; + } else if(_length > (len - start)) { + _length = (len - start); + } +} + +inline char16_t* +UnicodeString::getArrayStart() { + return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ? + fUnion.fStackFields.fBuffer : fUnion.fFields.fArray; +} + +inline const char16_t* +UnicodeString::getArrayStart() const { + return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ? + fUnion.fStackFields.fBuffer : fUnion.fFields.fArray; +} + +//======================================== +// Default constructor +//======================================== + +inline +UnicodeString::UnicodeString() { + fUnion.fStackFields.fLengthAndFlags=kShortString; +} + +inline UnicodeString::UnicodeString(const std::nullptr_t /*text*/) { + fUnion.fStackFields.fLengthAndFlags=kShortString; +} + +inline UnicodeString::UnicodeString(const std::nullptr_t /*text*/, int32_t /*length*/) { + fUnion.fStackFields.fLengthAndFlags=kShortString; +} + +inline UnicodeString::UnicodeString(std::nullptr_t /*buffer*/, int32_t /*buffLength*/, int32_t /*buffCapacity*/) { + fUnion.fStackFields.fLengthAndFlags=kShortString; +} + +//======================================== +// Read-only implementation methods +//======================================== +inline UBool +UnicodeString::hasShortLength() const { + return fUnion.fFields.fLengthAndFlags>=0; +} + +inline int32_t +UnicodeString::getShortLength() const { + // fLengthAndFlags must be non-negative -> short length >= 0 + // and arithmetic or logical shift does not matter. + return fUnion.fFields.fLengthAndFlags>>kLengthShift; +} + +inline int32_t +UnicodeString::length() const { + return hasShortLength() ? getShortLength() : fUnion.fFields.fLength; +} + +inline int32_t +UnicodeString::getCapacity() const { + return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ? + US_STACKBUF_SIZE : fUnion.fFields.fCapacity; +} + +inline int32_t +UnicodeString::hashCode() const +{ return doHashCode(); } + +inline UBool +UnicodeString::isBogus() const +{ return (UBool)(fUnion.fFields.fLengthAndFlags & kIsBogus); } + +inline UBool +UnicodeString::isWritable() const +{ return (UBool)!(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kIsBogus)); } + +inline UBool +UnicodeString::isBufferWritable() const +{ + return (UBool)( + !(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) && + (!(fUnion.fFields.fLengthAndFlags&kRefCounted) || refCount()==1)); +} + +inline const char16_t * +UnicodeString::getBuffer() const { + if(fUnion.fFields.fLengthAndFlags&(kIsBogus|kOpenGetBuffer)) { + return nullptr; + } else if(fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) { + return fUnion.fStackFields.fBuffer; + } else { + return fUnion.fFields.fArray; + } +} + +//======================================== +// Read-only alias methods +//======================================== +inline int8_t +UnicodeString::doCompare(int32_t start, + int32_t thisLength, + const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength) const +{ + if(srcText.isBogus()) { + return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise + } else { + srcText.pinIndices(srcStart, srcLength); + return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength); + } +} + +inline UBool +UnicodeString::operator== (const UnicodeString& text) const +{ + if(isBogus()) { + return text.isBogus(); + } else { + int32_t len = length(), textLength = text.length(); + return !text.isBogus() && len == textLength && doEquals(text, len); + } +} + +inline UBool +UnicodeString::operator!= (const UnicodeString& text) const +{ return (! operator==(text)); } + +inline UBool +UnicodeString::operator> (const UnicodeString& text) const +{ return doCompare(0, length(), text, 0, text.length()) == 1; } + +inline UBool +UnicodeString::operator< (const UnicodeString& text) const +{ return doCompare(0, length(), text, 0, text.length()) == -1; } + +inline UBool +UnicodeString::operator>= (const UnicodeString& text) const +{ return doCompare(0, length(), text, 0, text.length()) != -1; } + +inline UBool +UnicodeString::operator<= (const UnicodeString& text) const +{ return doCompare(0, length(), text, 0, text.length()) != 1; } + +inline int8_t +UnicodeString::compare(const UnicodeString& text) const +{ return doCompare(0, length(), text, 0, text.length()); } + +inline int8_t +UnicodeString::compare(int32_t start, + int32_t _length, + const UnicodeString& srcText) const +{ return doCompare(start, _length, srcText, 0, srcText.length()); } + +inline int8_t +UnicodeString::compare(ConstChar16Ptr srcChars, + int32_t srcLength) const +{ return doCompare(0, length(), srcChars, 0, srcLength); } + +inline int8_t +UnicodeString::compare(int32_t start, + int32_t _length, + const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength) const +{ return doCompare(start, _length, srcText, srcStart, srcLength); } + +inline int8_t +UnicodeString::compare(int32_t start, + int32_t _length, + const char16_t *srcChars) const +{ return doCompare(start, _length, srcChars, 0, _length); } + +inline int8_t +UnicodeString::compare(int32_t start, + int32_t _length, + const char16_t *srcChars, + int32_t srcStart, + int32_t srcLength) const +{ return doCompare(start, _length, srcChars, srcStart, srcLength); } + +inline int8_t +UnicodeString::compareBetween(int32_t start, + int32_t limit, + const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLimit) const +{ return doCompare(start, limit - start, + srcText, srcStart, srcLimit - srcStart); } + +inline int8_t +UnicodeString::doCompareCodePointOrder(int32_t start, + int32_t thisLength, + const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength) const +{ + if(srcText.isBogus()) { + return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise + } else { + srcText.pinIndices(srcStart, srcLength); + return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength); + } +} + +inline int8_t +UnicodeString::compareCodePointOrder(const UnicodeString& text) const +{ return doCompareCodePointOrder(0, length(), text, 0, text.length()); } + +inline int8_t +UnicodeString::compareCodePointOrder(int32_t start, + int32_t _length, + const UnicodeString& srcText) const +{ return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); } + +inline int8_t +UnicodeString::compareCodePointOrder(ConstChar16Ptr srcChars, + int32_t srcLength) const +{ return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); } + +inline int8_t +UnicodeString::compareCodePointOrder(int32_t start, + int32_t _length, + const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength) const +{ return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); } + +inline int8_t +UnicodeString::compareCodePointOrder(int32_t start, + int32_t _length, + const char16_t *srcChars) const +{ return doCompareCodePointOrder(start, _length, srcChars, 0, _length); } + +inline int8_t +UnicodeString::compareCodePointOrder(int32_t start, + int32_t _length, + const char16_t *srcChars, + int32_t srcStart, + int32_t srcLength) const +{ return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); } + +inline int8_t +UnicodeString::compareCodePointOrderBetween(int32_t start, + int32_t limit, + const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLimit) const +{ return doCompareCodePointOrder(start, limit - start, + srcText, srcStart, srcLimit - srcStart); } + +inline int8_t +UnicodeString::doCaseCompare(int32_t start, + int32_t thisLength, + const UnicodeString &srcText, + int32_t srcStart, + int32_t srcLength, + uint32_t options) const +{ + if(srcText.isBogus()) { + return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise + } else { + srcText.pinIndices(srcStart, srcLength); + return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options); + } +} + +inline int8_t +UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const { + return doCaseCompare(0, length(), text, 0, text.length(), options); +} + +inline int8_t +UnicodeString::caseCompare(int32_t start, + int32_t _length, + const UnicodeString &srcText, + uint32_t options) const { + return doCaseCompare(start, _length, srcText, 0, srcText.length(), options); +} + +inline int8_t +UnicodeString::caseCompare(ConstChar16Ptr srcChars, + int32_t srcLength, + uint32_t options) const { + return doCaseCompare(0, length(), srcChars, 0, srcLength, options); +} + +inline int8_t +UnicodeString::caseCompare(int32_t start, + int32_t _length, + const UnicodeString &srcText, + int32_t srcStart, + int32_t srcLength, + uint32_t options) const { + return doCaseCompare(start, _length, srcText, srcStart, srcLength, options); +} + +inline int8_t +UnicodeString::caseCompare(int32_t start, + int32_t _length, + const char16_t *srcChars, + uint32_t options) const { + return doCaseCompare(start, _length, srcChars, 0, _length, options); +} + +inline int8_t +UnicodeString::caseCompare(int32_t start, + int32_t _length, + const char16_t *srcChars, + int32_t srcStart, + int32_t srcLength, + uint32_t options) const { + return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options); +} + +inline int8_t +UnicodeString::caseCompareBetween(int32_t start, + int32_t limit, + const UnicodeString &srcText, + int32_t srcStart, + int32_t srcLimit, + uint32_t options) const { + return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options); +} + +inline int32_t +UnicodeString::indexOf(const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength, + int32_t start, + int32_t _length) const +{ + if(!srcText.isBogus()) { + srcText.pinIndices(srcStart, srcLength); + if(srcLength > 0) { + return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length); + } + } + return -1; +} + +inline int32_t +UnicodeString::indexOf(const UnicodeString& text) const +{ return indexOf(text, 0, text.length(), 0, length()); } + +inline int32_t +UnicodeString::indexOf(const UnicodeString& text, + int32_t start) const { + pinIndex(start); + return indexOf(text, 0, text.length(), start, length() - start); +} + +inline int32_t +UnicodeString::indexOf(const UnicodeString& text, + int32_t start, + int32_t _length) const +{ return indexOf(text, 0, text.length(), start, _length); } + +inline int32_t +UnicodeString::indexOf(const char16_t *srcChars, + int32_t srcLength, + int32_t start) const { + pinIndex(start); + return indexOf(srcChars, 0, srcLength, start, length() - start); +} + +inline int32_t +UnicodeString::indexOf(ConstChar16Ptr srcChars, + int32_t srcLength, + int32_t start, + int32_t _length) const +{ return indexOf(srcChars, 0, srcLength, start, _length); } + +inline int32_t +UnicodeString::indexOf(char16_t c, + int32_t start, + int32_t _length) const +{ return doIndexOf(c, start, _length); } + +inline int32_t +UnicodeString::indexOf(UChar32 c, + int32_t start, + int32_t _length) const +{ return doIndexOf(c, start, _length); } + +inline int32_t +UnicodeString::indexOf(char16_t c) const +{ return doIndexOf(c, 0, length()); } + +inline int32_t +UnicodeString::indexOf(UChar32 c) const +{ return indexOf(c, 0, length()); } + +inline int32_t +UnicodeString::indexOf(char16_t c, + int32_t start) const { + pinIndex(start); + return doIndexOf(c, start, length() - start); +} + +inline int32_t +UnicodeString::indexOf(UChar32 c, + int32_t start) const { + pinIndex(start); + return indexOf(c, start, length() - start); +} + +inline int32_t +UnicodeString::lastIndexOf(ConstChar16Ptr srcChars, + int32_t srcLength, + int32_t start, + int32_t _length) const +{ return lastIndexOf(srcChars, 0, srcLength, start, _length); } + +inline int32_t +UnicodeString::lastIndexOf(const char16_t *srcChars, + int32_t srcLength, + int32_t start) const { + pinIndex(start); + return lastIndexOf(srcChars, 0, srcLength, start, length() - start); +} + +inline int32_t +UnicodeString::lastIndexOf(const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength, + int32_t start, + int32_t _length) const +{ + if(!srcText.isBogus()) { + srcText.pinIndices(srcStart, srcLength); + if(srcLength > 0) { + return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length); + } + } + return -1; +} + +inline int32_t +UnicodeString::lastIndexOf(const UnicodeString& text, + int32_t start, + int32_t _length) const +{ return lastIndexOf(text, 0, text.length(), start, _length); } + +inline int32_t +UnicodeString::lastIndexOf(const UnicodeString& text, + int32_t start) const { + pinIndex(start); + return lastIndexOf(text, 0, text.length(), start, length() - start); +} + +inline int32_t +UnicodeString::lastIndexOf(const UnicodeString& text) const +{ return lastIndexOf(text, 0, text.length(), 0, length()); } + +inline int32_t +UnicodeString::lastIndexOf(char16_t c, + int32_t start, + int32_t _length) const +{ return doLastIndexOf(c, start, _length); } + +inline int32_t +UnicodeString::lastIndexOf(UChar32 c, + int32_t start, + int32_t _length) const { + return doLastIndexOf(c, start, _length); +} + +inline int32_t +UnicodeString::lastIndexOf(char16_t c) const +{ return doLastIndexOf(c, 0, length()); } + +inline int32_t +UnicodeString::lastIndexOf(UChar32 c) const { + return lastIndexOf(c, 0, length()); +} + +inline int32_t +UnicodeString::lastIndexOf(char16_t c, + int32_t start) const { + pinIndex(start); + return doLastIndexOf(c, start, length() - start); +} + +inline int32_t +UnicodeString::lastIndexOf(UChar32 c, + int32_t start) const { + pinIndex(start); + return lastIndexOf(c, start, length() - start); +} + +inline UBool +UnicodeString::startsWith(const UnicodeString& text) const +{ return compare(0, text.length(), text, 0, text.length()) == 0; } + +inline UBool +UnicodeString::startsWith(const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength) const +{ return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; } + +inline UBool +UnicodeString::startsWith(ConstChar16Ptr srcChars, int32_t srcLength) const { + if(srcLength < 0) { + srcLength = u_strlen(toUCharPtr(srcChars)); + } + return doCompare(0, srcLength, srcChars, 0, srcLength) == 0; +} + +inline UBool +UnicodeString::startsWith(const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const { + if(srcLength < 0) { + srcLength = u_strlen(toUCharPtr(srcChars)); + } + return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0; +} + +inline UBool +UnicodeString::endsWith(const UnicodeString& text) const +{ return doCompare(length() - text.length(), text.length(), + text, 0, text.length()) == 0; } + +inline UBool +UnicodeString::endsWith(const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength) const { + srcText.pinIndices(srcStart, srcLength); + return doCompare(length() - srcLength, srcLength, + srcText, srcStart, srcLength) == 0; +} + +inline UBool +UnicodeString::endsWith(ConstChar16Ptr srcChars, + int32_t srcLength) const { + if(srcLength < 0) { + srcLength = u_strlen(toUCharPtr(srcChars)); + } + return doCompare(length() - srcLength, srcLength, + srcChars, 0, srcLength) == 0; +} + +inline UBool +UnicodeString::endsWith(const char16_t *srcChars, + int32_t srcStart, + int32_t srcLength) const { + if(srcLength < 0) { + srcLength = u_strlen(toUCharPtr(srcChars + srcStart)); + } + return doCompare(length() - srcLength, srcLength, + srcChars, srcStart, srcLength) == 0; +} + +//======================================== +// replace +//======================================== +inline UnicodeString& +UnicodeString::replace(int32_t start, + int32_t _length, + const UnicodeString& srcText) +{ return doReplace(start, _length, srcText, 0, srcText.length()); } + +inline UnicodeString& +UnicodeString::replace(int32_t start, + int32_t _length, + const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength) +{ return doReplace(start, _length, srcText, srcStart, srcLength); } + +inline UnicodeString& +UnicodeString::replace(int32_t start, + int32_t _length, + ConstChar16Ptr srcChars, + int32_t srcLength) +{ return doReplace(start, _length, srcChars, 0, srcLength); } + +inline UnicodeString& +UnicodeString::replace(int32_t start, + int32_t _length, + const char16_t *srcChars, + int32_t srcStart, + int32_t srcLength) +{ return doReplace(start, _length, srcChars, srcStart, srcLength); } + +inline UnicodeString& +UnicodeString::replace(int32_t start, + int32_t _length, + char16_t srcChar) +{ return doReplace(start, _length, &srcChar, 0, 1); } + +inline UnicodeString& +UnicodeString::replaceBetween(int32_t start, + int32_t limit, + const UnicodeString& srcText) +{ return doReplace(start, limit - start, srcText, 0, srcText.length()); } + +inline UnicodeString& +UnicodeString::replaceBetween(int32_t start, + int32_t limit, + const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLimit) +{ return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); } + +inline UnicodeString& +UnicodeString::findAndReplace(const UnicodeString& oldText, + const UnicodeString& newText) +{ return findAndReplace(0, length(), oldText, 0, oldText.length(), + newText, 0, newText.length()); } + +inline UnicodeString& +UnicodeString::findAndReplace(int32_t start, + int32_t _length, + const UnicodeString& oldText, + const UnicodeString& newText) +{ return findAndReplace(start, _length, oldText, 0, oldText.length(), + newText, 0, newText.length()); } + +// ============================ +// extract +// ============================ +inline void +UnicodeString::doExtract(int32_t start, + int32_t _length, + UnicodeString& target) const +{ target.replace(0, target.length(), *this, start, _length); } + +inline void +UnicodeString::extract(int32_t start, + int32_t _length, + Char16Ptr target, + int32_t targetStart) const +{ doExtract(start, _length, target, targetStart); } + +inline void +UnicodeString::extract(int32_t start, + int32_t _length, + UnicodeString& target) const +{ doExtract(start, _length, target); } + +#if !UCONFIG_NO_CONVERSION + +inline int32_t +UnicodeString::extract(int32_t start, + int32_t _length, + char *dst, + const char *codepage) const + +{ + // This dstSize value will be checked explicitly + return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage); +} + +#endif + +inline void +UnicodeString::extractBetween(int32_t start, + int32_t limit, + char16_t *dst, + int32_t dstStart) const { + pinIndex(start); + pinIndex(limit); + doExtract(start, limit - start, dst, dstStart); +} + +inline UnicodeString +UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const { + return tempSubString(start, limit - start); +} + +inline char16_t +UnicodeString::doCharAt(int32_t offset) const +{ + if((uint32_t)offset < (uint32_t)length()) { + return getArrayStart()[offset]; + } else { + return kInvalidUChar; + } +} + +inline char16_t +UnicodeString::charAt(int32_t offset) const +{ return doCharAt(offset); } + +inline char16_t +UnicodeString::operator[] (int32_t offset) const +{ return doCharAt(offset); } + +inline UBool +UnicodeString::isEmpty() const { + // Arithmetic or logical right shift does not matter: only testing for 0. + return (fUnion.fFields.fLengthAndFlags>>kLengthShift) == 0; +} + +//======================================== +// Write implementation methods +//======================================== +inline void +UnicodeString::setZeroLength() { + fUnion.fFields.fLengthAndFlags &= kAllStorageFlags; +} + +inline void +UnicodeString::setShortLength(int32_t len) { + // requires 0 <= len <= kMaxShortLength + fUnion.fFields.fLengthAndFlags = + (int16_t)((fUnion.fFields.fLengthAndFlags & kAllStorageFlags) | (len << kLengthShift)); +} + +inline void +UnicodeString::setLength(int32_t len) { + if(len <= kMaxShortLength) { + setShortLength(len); + } else { + fUnion.fFields.fLengthAndFlags |= kLengthIsLarge; + fUnion.fFields.fLength = len; + } +} + +inline void +UnicodeString::setToEmpty() { + fUnion.fFields.fLengthAndFlags = kShortString; +} + +inline void +UnicodeString::setArray(char16_t *array, int32_t len, int32_t capacity) { + setLength(len); + fUnion.fFields.fArray = array; + fUnion.fFields.fCapacity = capacity; +} + +inline UnicodeString& +UnicodeString::operator= (char16_t ch) +{ return doReplace(0, length(), &ch, 0, 1); } + +inline UnicodeString& +UnicodeString::operator= (UChar32 ch) +{ return replace(0, length(), ch); } + +inline UnicodeString& +UnicodeString::setTo(const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength) +{ + unBogus(); + return doReplace(0, length(), srcText, srcStart, srcLength); +} + +inline UnicodeString& +UnicodeString::setTo(const UnicodeString& srcText, + int32_t srcStart) +{ + unBogus(); + srcText.pinIndex(srcStart); + return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart); +} + +inline UnicodeString& +UnicodeString::setTo(const UnicodeString& srcText) +{ + return copyFrom(srcText); +} + +inline UnicodeString& +UnicodeString::setTo(const char16_t *srcChars, + int32_t srcLength) +{ + unBogus(); + return doReplace(0, length(), srcChars, 0, srcLength); +} + +inline UnicodeString& +UnicodeString::setTo(char16_t srcChar) +{ + unBogus(); + return doReplace(0, length(), &srcChar, 0, 1); +} + +inline UnicodeString& +UnicodeString::setTo(UChar32 srcChar) +{ + unBogus(); + return replace(0, length(), srcChar); +} + +inline UnicodeString& +UnicodeString::append(const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength) +{ return doAppend(srcText, srcStart, srcLength); } + +inline UnicodeString& +UnicodeString::append(const UnicodeString& srcText) +{ return doAppend(srcText, 0, srcText.length()); } + +inline UnicodeString& +UnicodeString::append(const char16_t *srcChars, + int32_t srcStart, + int32_t srcLength) +{ return doAppend(srcChars, srcStart, srcLength); } + +inline UnicodeString& +UnicodeString::append(ConstChar16Ptr srcChars, + int32_t srcLength) +{ return doAppend(srcChars, 0, srcLength); } + +inline UnicodeString& +UnicodeString::append(char16_t srcChar) +{ return doAppend(&srcChar, 0, 1); } + +inline UnicodeString& +UnicodeString::operator+= (char16_t ch) +{ return doAppend(&ch, 0, 1); } + +inline UnicodeString& +UnicodeString::operator+= (UChar32 ch) { + return append(ch); +} + +inline UnicodeString& +UnicodeString::operator+= (const UnicodeString& srcText) +{ return doAppend(srcText, 0, srcText.length()); } + +inline UnicodeString& +UnicodeString::insert(int32_t start, + const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength) +{ return doReplace(start, 0, srcText, srcStart, srcLength); } + +inline UnicodeString& +UnicodeString::insert(int32_t start, + const UnicodeString& srcText) +{ return doReplace(start, 0, srcText, 0, srcText.length()); } + +inline UnicodeString& +UnicodeString::insert(int32_t start, + const char16_t *srcChars, + int32_t srcStart, + int32_t srcLength) +{ return doReplace(start, 0, srcChars, srcStart, srcLength); } + +inline UnicodeString& +UnicodeString::insert(int32_t start, + ConstChar16Ptr srcChars, + int32_t srcLength) +{ return doReplace(start, 0, srcChars, 0, srcLength); } + +inline UnicodeString& +UnicodeString::insert(int32_t start, + char16_t srcChar) +{ return doReplace(start, 0, &srcChar, 0, 1); } + +inline UnicodeString& +UnicodeString::insert(int32_t start, + UChar32 srcChar) +{ return replace(start, 0, srcChar); } + + +inline UnicodeString& +UnicodeString::remove() +{ + // remove() of a bogus string makes the string empty and non-bogus + if(isBogus()) { + setToEmpty(); + } else { + setZeroLength(); + } + return *this; +} + +inline UnicodeString& +UnicodeString::remove(int32_t start, + int32_t _length) +{ + if(start <= 0 && _length == INT32_MAX) { + // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus + return remove(); + } + return doReplace(start, _length, NULL, 0, 0); +} + +inline UnicodeString& +UnicodeString::removeBetween(int32_t start, + int32_t limit) +{ return doReplace(start, limit - start, NULL, 0, 0); } + +inline UnicodeString & +UnicodeString::retainBetween(int32_t start, int32_t limit) { + truncate(limit); + return doReplace(0, start, NULL, 0, 0); +} + +inline UBool +UnicodeString::truncate(int32_t targetLength) +{ + if(isBogus() && targetLength == 0) { + // truncate(0) of a bogus string makes the string empty and non-bogus + unBogus(); + return false; + } else if((uint32_t)targetLength < (uint32_t)length()) { + setLength(targetLength); + return true; + } else { + return false; + } +} + +inline UnicodeString& +UnicodeString::reverse() +{ return doReverse(0, length()); } + +inline UnicodeString& +UnicodeString::reverse(int32_t start, + int32_t _length) +{ return doReverse(start, _length); } + +U_NAMESPACE_END + +#endif /* U_SHOW_CPLUSPLUS_API */ + +#endif diff --git a/thirdparty/icu4c/common/unicode/unorm.h b/thirdparty/icu4c/common/unicode/unorm.h new file mode 100644 index 0000000000..c3c57582d4 --- /dev/null +++ b/thirdparty/icu4c/common/unicode/unorm.h @@ -0,0 +1,476 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (c) 1996-2016, International Business Machines Corporation +* and others. All Rights Reserved. +******************************************************************************* +* File unorm.h +* +* Created by: Vladimir Weinstein 12052000 +* +* Modification history : +* +* Date Name Description +* 02/01/01 synwee Added normalization quickcheck enum and method. +*/ +#ifndef UNORM_H +#define UNORM_H + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_NORMALIZATION + +#include "unicode/uiter.h" +#include "unicode/unorm2.h" + +/** + * \file + * \brief C API: Unicode Normalization + * + * Old Unicode normalization API. + * + * This API has been replaced by the unorm2.h API and is only available + * for backward compatibility. The functions here simply delegate to the + * unorm2.h functions, for example unorm2_getInstance() and unorm2_normalize(). + * There is one exception: The new API does not provide a replacement for unorm_compare(). + * Its declaration has been moved to unorm2.h. + * + * <code>unorm_normalize</code> transforms Unicode text into an equivalent composed or + * decomposed form, allowing for easier sorting and searching of text. + * <code>unorm_normalize</code> supports the standard normalization forms described in + * <a href="http://www.unicode.org/unicode/reports/tr15/" target="unicode"> + * Unicode Standard Annex #15: Unicode Normalization Forms</a>. + * + * Characters with accents or other adornments can be encoded in + * several different ways in Unicode. For example, take the character A-acute. + * In Unicode, this can be encoded as a single character (the + * "composed" form): + * + * \code + * 00C1 LATIN CAPITAL LETTER A WITH ACUTE + * \endcode + * + * or as two separate characters (the "decomposed" form): + * + * \code + * 0041 LATIN CAPITAL LETTER A + * 0301 COMBINING ACUTE ACCENT + * \endcode + * + * To a user of your program, however, both of these sequences should be + * treated as the same "user-level" character "A with acute accent". When you are searching or + * comparing text, you must ensure that these two sequences are treated + * equivalently. In addition, you must handle characters with more than one + * accent. Sometimes the order of a character's combining accents is + * significant, while in other cases accent sequences in different orders are + * really equivalent. + * + * Similarly, the string "ffi" can be encoded as three separate letters: + * + * \code + * 0066 LATIN SMALL LETTER F + * 0066 LATIN SMALL LETTER F + * 0069 LATIN SMALL LETTER I + * \endcode + * + * or as the single character + * + * \code + * FB03 LATIN SMALL LIGATURE FFI + * \endcode + * + * The ffi ligature is not a distinct semantic character, and strictly speaking + * it shouldn't be in Unicode at all, but it was included for compatibility + * with existing character sets that already provided it. The Unicode standard + * identifies such characters by giving them "compatibility" decompositions + * into the corresponding semantic characters. When sorting and searching, you + * will often want to use these mappings. + * + * <code>unorm_normalize</code> helps solve these problems by transforming text into the + * canonical composed and decomposed forms as shown in the first example above. + * In addition, you can have it perform compatibility decompositions so that + * you can treat compatibility characters the same as their equivalents. + * Finally, <code>unorm_normalize</code> rearranges accents into the proper canonical + * order, so that you do not have to worry about accent rearrangement on your + * own. + * + * Form FCD, "Fast C or D", is also designed for collation. + * It allows to work on strings that are not necessarily normalized + * with an algorithm (like in collation) that works under "canonical closure", i.e., it treats precomposed + * characters and their decomposed equivalents the same. + * + * It is not a normalization form because it does not provide for uniqueness of representation. Multiple strings + * may be canonically equivalent (their NFDs are identical) and may all conform to FCD without being identical + * themselves. + * + * The form is defined such that the "raw decomposition", the recursive canonical decomposition of each character, + * results in a string that is canonically ordered. This means that precomposed characters are allowed for as long + * as their decompositions do not need canonical reordering. + * + * Its advantage for a process like collation is that all NFD and most NFC texts - and many unnormalized texts - + * already conform to FCD and do not need to be normalized (NFD) for such a process. The FCD quick check will + * return UNORM_YES for most strings in practice. + * + * unorm_normalize(UNORM_FCD) may be implemented with UNORM_NFD. + * + * For more details on FCD see the collation design document: + * http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm + * + * ICU collation performs either NFD or FCD normalization automatically if normalization + * is turned on for the collator object. + * Beyond collation and string search, normalized strings may be useful for string equivalence comparisons, + * transliteration/transcription, unique representations, etc. + * + * The W3C generally recommends to exchange texts in NFC. + * Note also that most legacy character encodings use only precomposed forms and often do not + * encode any combining marks by themselves. For conversion to such character encodings the + * Unicode text needs to be normalized to NFC. + * For more usage examples, see the Unicode Standard Annex. + */ + +// Do not conditionalize the following enum with #ifndef U_HIDE_DEPRECATED_API, +// it is needed for layout of Normalizer object. +#ifndef U_FORCE_HIDE_DEPRECATED_API + +/** + * Constants for normalization modes. + * @deprecated ICU 56 Use unorm2.h instead. + */ +typedef enum { + /** No decomposition/composition. @deprecated ICU 56 Use unorm2.h instead. */ + UNORM_NONE = 1, + /** Canonical decomposition. @deprecated ICU 56 Use unorm2.h instead. */ + UNORM_NFD = 2, + /** Compatibility decomposition. @deprecated ICU 56 Use unorm2.h instead. */ + UNORM_NFKD = 3, + /** Canonical decomposition followed by canonical composition. @deprecated ICU 56 Use unorm2.h instead. */ + UNORM_NFC = 4, + /** Default normalization. @deprecated ICU 56 Use unorm2.h instead. */ + UNORM_DEFAULT = UNORM_NFC, + /** Compatibility decomposition followed by canonical composition. @deprecated ICU 56 Use unorm2.h instead. */ + UNORM_NFKC =5, + /** "Fast C or D" form. @deprecated ICU 56 Use unorm2.h instead. */ + UNORM_FCD = 6, + + /** One more than the highest normalization mode constant. @deprecated ICU 56 Use unorm2.h instead. */ + UNORM_MODE_COUNT +} UNormalizationMode; + +#endif // U_FORCE_HIDE_DEPRECATED_API + +#ifndef U_HIDE_DEPRECATED_API + +/** + * Constants for options flags for normalization. + * Use 0 for default options, + * including normalization according to the Unicode version + * that is currently supported by ICU (see u_getUnicodeVersion). + * @deprecated ICU 56 Use unorm2.h instead. + */ +enum { + /** + * Options bit set value to select Unicode 3.2 normalization + * (except NormalizationCorrections). + * At most one Unicode version can be selected at a time. + * @deprecated ICU 56 Use unorm2.h instead. + */ + UNORM_UNICODE_3_2=0x20 +}; + +/** + * Lowest-order bit number of unorm_compare() options bits corresponding to + * normalization options bits. + * + * The options parameter for unorm_compare() uses most bits for + * itself and for various comparison and folding flags. + * The most significant bits, however, are shifted down and passed on + * to the normalization implementation. + * (That is, from unorm_compare(..., options, ...), + * options>>UNORM_COMPARE_NORM_OPTIONS_SHIFT will be passed on to the + * internal normalization functions.) + * + * @see unorm_compare + * @deprecated ICU 56 Use unorm2.h instead. + */ +#define UNORM_COMPARE_NORM_OPTIONS_SHIFT 20 + +/** + * Normalize a string. + * The string will be normalized according the specified normalization mode + * and options. + * The source and result buffers must not be the same, nor overlap. + * + * @param source The string to normalize. + * @param sourceLength The length of source, or -1 if NUL-terminated. + * @param mode The normalization mode; one of UNORM_NONE, + * UNORM_NFD, UNORM_NFC, UNORM_NFKC, UNORM_NFKD, UNORM_DEFAULT. + * @param options The normalization options, ORed together (0 for no options). + * @param result A pointer to a buffer to receive the result string. + * The result string is NUL-terminated if possible. + * @param resultLength The maximum size of result. + * @param status A pointer to a UErrorCode to receive any errors. + * @return The total buffer size needed; if greater than resultLength, + * the output was truncated, and the error code is set to U_BUFFER_OVERFLOW_ERROR. + * @deprecated ICU 56 Use unorm2.h instead. + */ +U_DEPRECATED int32_t U_EXPORT2 +unorm_normalize(const UChar *source, int32_t sourceLength, + UNormalizationMode mode, int32_t options, + UChar *result, int32_t resultLength, + UErrorCode *status); + +/** + * Performing quick check on a string, to quickly determine if the string is + * in a particular normalization format. + * Three types of result can be returned UNORM_YES, UNORM_NO or + * UNORM_MAYBE. Result UNORM_YES indicates that the argument + * string is in the desired normalized format, UNORM_NO determines that + * argument string is not in the desired normalized format. A + * UNORM_MAYBE result indicates that a more thorough check is required, + * the user may have to put the string in its normalized form and compare the + * results. + * + * @param source string for determining if it is in a normalized format + * @param sourcelength length of source to test, or -1 if NUL-terminated + * @param mode which normalization form to test for + * @param status a pointer to a UErrorCode to receive any errors + * @return UNORM_YES, UNORM_NO or UNORM_MAYBE + * + * @see unorm_isNormalized + * @deprecated ICU 56 Use unorm2.h instead. + */ +U_DEPRECATED UNormalizationCheckResult U_EXPORT2 +unorm_quickCheck(const UChar *source, int32_t sourcelength, + UNormalizationMode mode, + UErrorCode *status); + +/** + * Performing quick check on a string; same as unorm_quickCheck but + * takes an extra options parameter like most normalization functions. + * + * @param src String that is to be tested if it is in a normalization format. + * @param srcLength Length of source to test, or -1 if NUL-terminated. + * @param mode Which normalization form to test for. + * @param options The normalization options, ORed together (0 for no options). + * @param pErrorCode ICU error code in/out parameter. + * Must fulfill U_SUCCESS before the function call. + * @return UNORM_YES, UNORM_NO or UNORM_MAYBE + * + * @see unorm_quickCheck + * @see unorm_isNormalized + * @deprecated ICU 56 Use unorm2.h instead. + */ +U_DEPRECATED UNormalizationCheckResult U_EXPORT2 +unorm_quickCheckWithOptions(const UChar *src, int32_t srcLength, + UNormalizationMode mode, int32_t options, + UErrorCode *pErrorCode); + +/** + * Test if a string is in a given normalization form. + * This is semantically equivalent to source.equals(normalize(source, mode)) . + * + * Unlike unorm_quickCheck(), this function returns a definitive result, + * never a "maybe". + * For NFD, NFKD, and FCD, both functions work exactly the same. + * For NFC and NFKC where quickCheck may return "maybe", this function will + * perform further tests to arrive at a true/false result. + * + * @param src String that is to be tested if it is in a normalization format. + * @param srcLength Length of source to test, or -1 if NUL-terminated. + * @param mode Which normalization form to test for. + * @param pErrorCode ICU error code in/out parameter. + * Must fulfill U_SUCCESS before the function call. + * @return Boolean value indicating whether the source string is in the + * "mode" normalization form. + * + * @see unorm_quickCheck + * @deprecated ICU 56 Use unorm2.h instead. + */ +U_DEPRECATED UBool U_EXPORT2 +unorm_isNormalized(const UChar *src, int32_t srcLength, + UNormalizationMode mode, + UErrorCode *pErrorCode); + +/** + * Test if a string is in a given normalization form; same as unorm_isNormalized but + * takes an extra options parameter like most normalization functions. + * + * @param src String that is to be tested if it is in a normalization format. + * @param srcLength Length of source to test, or -1 if NUL-terminated. + * @param mode Which normalization form to test for. + * @param options The normalization options, ORed together (0 for no options). + * @param pErrorCode ICU error code in/out parameter. + * Must fulfill U_SUCCESS before the function call. + * @return Boolean value indicating whether the source string is in the + * "mode/options" normalization form. + * + * @see unorm_quickCheck + * @see unorm_isNormalized + * @deprecated ICU 56 Use unorm2.h instead. + */ +U_DEPRECATED UBool U_EXPORT2 +unorm_isNormalizedWithOptions(const UChar *src, int32_t srcLength, + UNormalizationMode mode, int32_t options, + UErrorCode *pErrorCode); + +/** + * Iterative normalization forward. + * This function (together with unorm_previous) is somewhat + * similar to the C++ Normalizer class (see its non-static functions). + * + * Iterative normalization is useful when only a small portion of a longer + * string/text needs to be processed. + * + * For example, the likelihood may be high that processing the first 10% of some + * text will be sufficient to find certain data. + * Another example: When one wants to concatenate two normalized strings and get a + * normalized result, it is much more efficient to normalize just a small part of + * the result around the concatenation place instead of re-normalizing everything. + * + * The input text is an instance of the C character iteration API UCharIterator. + * It may wrap around a simple string, a CharacterIterator, a Replaceable, or any + * other kind of text object. + * + * If a buffer overflow occurs, then the caller needs to reset the iterator to the + * old index and call the function again with a larger buffer - if the caller cares + * for the actual output. + * Regardless of the output buffer, the iterator will always be moved to the next + * normalization boundary. + * + * This function (like unorm_previous) serves two purposes: + * + * 1) To find the next boundary so that the normalization of the part of the text + * from the current position to that boundary does not affect and is not affected + * by the part of the text beyond that boundary. + * + * 2) To normalize the text up to the boundary. + * + * The second step is optional, per the doNormalize parameter. + * It is omitted for operations like string concatenation, where the two adjacent + * string ends need to be normalized together. + * In such a case, the output buffer will just contain a copy of the text up to the + * boundary. + * + * pNeededToNormalize is an output-only parameter. Its output value is only defined + * if normalization was requested (doNormalize) and successful (especially, no + * buffer overflow). + * It is useful for operations like a normalizing transliterator, where one would + * not want to replace a piece of text if it is not modified. + * + * If doNormalize==true and pNeededToNormalize!=NULL then *pNeeded... is set true + * if the normalization was necessary. + * + * If doNormalize==false then *pNeededToNormalize will be set to false. + * + * If the buffer overflows, then *pNeededToNormalize will be undefined; + * essentially, whenever U_FAILURE is true (like in buffer overflows), this result + * will be undefined. + * + * @param src The input text in the form of a C character iterator. + * @param dest The output buffer; can be NULL if destCapacity==0 for pure preflighting. + * @param destCapacity The number of UChars that fit into dest. + * @param mode The normalization mode. + * @param options The normalization options, ORed together (0 for no options). + * @param doNormalize Indicates if the source text up to the next boundary + * is to be normalized (true) or just copied (false). + * @param pNeededToNormalize Output flag indicating if the normalization resulted in + * different text from the input. + * Not defined if an error occurs including buffer overflow. + * Always false if !doNormalize. + * @param pErrorCode ICU error code in/out parameter. + * Must fulfill U_SUCCESS before the function call. + * @return Length of output (number of UChars) when successful or buffer overflow. + * + * @see unorm_previous + * @see unorm_normalize + * + * @deprecated ICU 56 Use unorm2.h instead. + */ +U_DEPRECATED int32_t U_EXPORT2 +unorm_next(UCharIterator *src, + UChar *dest, int32_t destCapacity, + UNormalizationMode mode, int32_t options, + UBool doNormalize, UBool *pNeededToNormalize, + UErrorCode *pErrorCode); + +/** + * Iterative normalization backward. + * This function (together with unorm_next) is somewhat + * similar to the C++ Normalizer class (see its non-static functions). + * For all details see unorm_next. + * + * @param src The input text in the form of a C character iterator. + * @param dest The output buffer; can be NULL if destCapacity==0 for pure preflighting. + * @param destCapacity The number of UChars that fit into dest. + * @param mode The normalization mode. + * @param options The normalization options, ORed together (0 for no options). + * @param doNormalize Indicates if the source text up to the next boundary + * is to be normalized (true) or just copied (false). + * @param pNeededToNormalize Output flag indicating if the normalization resulted in + * different text from the input. + * Not defined if an error occurs including buffer overflow. + * Always false if !doNormalize. + * @param pErrorCode ICU error code in/out parameter. + * Must fulfill U_SUCCESS before the function call. + * @return Length of output (number of UChars) when successful or buffer overflow. + * + * @see unorm_next + * @see unorm_normalize + * + * @deprecated ICU 56 Use unorm2.h instead. + */ +U_DEPRECATED int32_t U_EXPORT2 +unorm_previous(UCharIterator *src, + UChar *dest, int32_t destCapacity, + UNormalizationMode mode, int32_t options, + UBool doNormalize, UBool *pNeededToNormalize, + UErrorCode *pErrorCode); + +/** + * Concatenate normalized strings, making sure that the result is normalized as well. + * + * If both the left and the right strings are in + * the normalization form according to "mode/options", + * then the result will be + * + * \code + * dest=normalize(left+right, mode, options) + * \endcode + * + * With the input strings already being normalized, + * this function will use unorm_next() and unorm_previous() + * to find the adjacent end pieces of the input strings. + * Only the concatenation of these end pieces will be normalized and + * then concatenated with the remaining parts of the input strings. + * + * It is allowed to have dest==left to avoid copying the entire left string. + * + * @param left Left source string, may be same as dest. + * @param leftLength Length of left source string, or -1 if NUL-terminated. + * @param right Right source string. Must not be the same as dest, nor overlap. + * @param rightLength Length of right source string, or -1 if NUL-terminated. + * @param dest The output buffer; can be NULL if destCapacity==0 for pure preflighting. + * @param destCapacity The number of UChars that fit into dest. + * @param mode The normalization mode. + * @param options The normalization options, ORed together (0 for no options). + * @param pErrorCode ICU error code in/out parameter. + * Must fulfill U_SUCCESS before the function call. + * @return Length of output (number of UChars) when successful or buffer overflow. + * + * @see unorm_normalize + * @see unorm_next + * @see unorm_previous + * + * @deprecated ICU 56 Use unorm2.h instead. + */ +U_DEPRECATED int32_t U_EXPORT2 +unorm_concatenate(const UChar *left, int32_t leftLength, + const UChar *right, int32_t rightLength, + UChar *dest, int32_t destCapacity, + UNormalizationMode mode, int32_t options, + UErrorCode *pErrorCode); + +#endif /* U_HIDE_DEPRECATED_API */ +#endif /* #if !UCONFIG_NO_NORMALIZATION */ +#endif diff --git a/thirdparty/icu4c/common/unicode/unorm2.h b/thirdparty/icu4c/common/unicode/unorm2.h new file mode 100644 index 0000000000..24417b7103 --- /dev/null +++ b/thirdparty/icu4c/common/unicode/unorm2.h @@ -0,0 +1,606 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2009-2015, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: unorm2.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2009dec15 +* created by: Markus W. Scherer +*/ + +#ifndef __UNORM2_H__ +#define __UNORM2_H__ + +/** + * \file + * \brief C API: New API for Unicode Normalization. + * + * Unicode normalization functionality for standard Unicode normalization or + * for using custom mapping tables. + * All instances of UNormalizer2 are unmodifiable/immutable. + * Instances returned by unorm2_getInstance() are singletons that must not be deleted by the caller. + * For more details see the Normalizer2 C++ class. + */ + +#include "unicode/utypes.h" +#include "unicode/stringoptions.h" +#include "unicode/uset.h" + +#if U_SHOW_CPLUSPLUS_API +#include "unicode/localpointer.h" +#endif // U_SHOW_CPLUSPLUS_API + +/** + * Constants for normalization modes. + * For details about standard Unicode normalization forms + * and about the algorithms which are also used with custom mapping tables + * see http://www.unicode.org/unicode/reports/tr15/ + * @stable ICU 4.4 + */ +typedef enum { + /** + * Decomposition followed by composition. + * Same as standard NFC when using an "nfc" instance. + * Same as standard NFKC when using an "nfkc" instance. + * For details about standard Unicode normalization forms + * see http://www.unicode.org/unicode/reports/tr15/ + * @stable ICU 4.4 + */ + UNORM2_COMPOSE, + /** + * Map, and reorder canonically. + * Same as standard NFD when using an "nfc" instance. + * Same as standard NFKD when using an "nfkc" instance. + * For details about standard Unicode normalization forms + * see http://www.unicode.org/unicode/reports/tr15/ + * @stable ICU 4.4 + */ + UNORM2_DECOMPOSE, + /** + * "Fast C or D" form. + * If a string is in this form, then further decomposition <i>without reordering</i> + * would yield the same form as DECOMPOSE. + * Text in "Fast C or D" form can be processed efficiently with data tables + * that are "canonically closed", that is, that provide equivalent data for + * equivalent text, without having to be fully normalized. + * Not a standard Unicode normalization form. + * Not a unique form: Different FCD strings can be canonically equivalent. + * For details see http://www.unicode.org/notes/tn5/#FCD + * @stable ICU 4.4 + */ + UNORM2_FCD, + /** + * Compose only contiguously. + * Also known as "FCC" or "Fast C Contiguous". + * The result will often but not always be in NFC. + * The result will conform to FCD which is useful for processing. + * Not a standard Unicode normalization form. + * For details see http://www.unicode.org/notes/tn5/#FCC + * @stable ICU 4.4 + */ + UNORM2_COMPOSE_CONTIGUOUS +} UNormalization2Mode; + +/** + * Result values for normalization quick check functions. + * For details see http://www.unicode.org/reports/tr15/#Detecting_Normalization_Forms + * @stable ICU 2.0 + */ +typedef enum UNormalizationCheckResult { + /** + * The input string is not in the normalization form. + * @stable ICU 2.0 + */ + UNORM_NO, + /** + * The input string is in the normalization form. + * @stable ICU 2.0 + */ + UNORM_YES, + /** + * The input string may or may not be in the normalization form. + * This value is only returned for composition forms like NFC and FCC, + * when a backward-combining character is found for which the surrounding text + * would have to be analyzed further. + * @stable ICU 2.0 + */ + UNORM_MAYBE +} UNormalizationCheckResult; + +/** + * Opaque C service object type for the new normalization API. + * @stable ICU 4.4 + */ +struct UNormalizer2; +typedef struct UNormalizer2 UNormalizer2; /**< C typedef for struct UNormalizer2. @stable ICU 4.4 */ + +#if !UCONFIG_NO_NORMALIZATION + +/** + * Returns a UNormalizer2 instance for Unicode NFC normalization. + * Same as unorm2_getInstance(NULL, "nfc", UNORM2_COMPOSE, pErrorCode). + * Returns an unmodifiable singleton instance. Do not delete it. + * @param pErrorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return the requested Normalizer2, if successful + * @stable ICU 49 + */ +U_CAPI const UNormalizer2 * U_EXPORT2 +unorm2_getNFCInstance(UErrorCode *pErrorCode); + +/** + * Returns a UNormalizer2 instance for Unicode NFD normalization. + * Same as unorm2_getInstance(NULL, "nfc", UNORM2_DECOMPOSE, pErrorCode). + * Returns an unmodifiable singleton instance. Do not delete it. + * @param pErrorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return the requested Normalizer2, if successful + * @stable ICU 49 + */ +U_CAPI const UNormalizer2 * U_EXPORT2 +unorm2_getNFDInstance(UErrorCode *pErrorCode); + +/** + * Returns a UNormalizer2 instance for Unicode NFKC normalization. + * Same as unorm2_getInstance(NULL, "nfkc", UNORM2_COMPOSE, pErrorCode). + * Returns an unmodifiable singleton instance. Do not delete it. + * @param pErrorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return the requested Normalizer2, if successful + * @stable ICU 49 + */ +U_CAPI const UNormalizer2 * U_EXPORT2 +unorm2_getNFKCInstance(UErrorCode *pErrorCode); + +/** + * Returns a UNormalizer2 instance for Unicode NFKD normalization. + * Same as unorm2_getInstance(NULL, "nfkc", UNORM2_DECOMPOSE, pErrorCode). + * Returns an unmodifiable singleton instance. Do not delete it. + * @param pErrorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return the requested Normalizer2, if successful + * @stable ICU 49 + */ +U_CAPI const UNormalizer2 * U_EXPORT2 +unorm2_getNFKDInstance(UErrorCode *pErrorCode); + +/** + * Returns a UNormalizer2 instance for Unicode NFKC_Casefold normalization. + * Same as unorm2_getInstance(NULL, "nfkc_cf", UNORM2_COMPOSE, pErrorCode). + * Returns an unmodifiable singleton instance. Do not delete it. + * @param pErrorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return the requested Normalizer2, if successful + * @stable ICU 49 + */ +U_CAPI const UNormalizer2 * U_EXPORT2 +unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode); + +/** + * Returns a UNormalizer2 instance which uses the specified data file + * (packageName/name similar to ucnv_openPackage() and ures_open()/ResourceBundle) + * and which composes or decomposes text according to the specified mode. + * Returns an unmodifiable singleton instance. Do not delete it. + * + * Use packageName=NULL for data files that are part of ICU's own data. + * Use name="nfc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFC/NFD. + * Use name="nfkc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFKC/NFKD. + * Use name="nfkc_cf" and UNORM2_COMPOSE for Unicode standard NFKC_CF=NFKC_Casefold. + * + * @param packageName NULL for ICU built-in data, otherwise application data package name + * @param name "nfc" or "nfkc" or "nfkc_cf" or name of custom data file + * @param mode normalization mode (compose or decompose etc.) + * @param pErrorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return the requested UNormalizer2, if successful + * @stable ICU 4.4 + */ +U_CAPI const UNormalizer2 * U_EXPORT2 +unorm2_getInstance(const char *packageName, + const char *name, + UNormalization2Mode mode, + UErrorCode *pErrorCode); + +/** + * Constructs a filtered normalizer wrapping any UNormalizer2 instance + * and a filter set. + * Both are aliased and must not be modified or deleted while this object + * is used. + * The filter set should be frozen; otherwise the performance will suffer greatly. + * @param norm2 wrapped UNormalizer2 instance + * @param filterSet USet which determines the characters to be normalized + * @param pErrorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return the requested UNormalizer2, if successful + * @stable ICU 4.4 + */ +U_CAPI UNormalizer2 * U_EXPORT2 +unorm2_openFiltered(const UNormalizer2 *norm2, const USet *filterSet, UErrorCode *pErrorCode); + +/** + * Closes a UNormalizer2 instance from unorm2_openFiltered(). + * Do not close instances from unorm2_getInstance()! + * @param norm2 UNormalizer2 instance to be closed + * @stable ICU 4.4 + */ +U_CAPI void U_EXPORT2 +unorm2_close(UNormalizer2 *norm2); + +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN + +/** + * \class LocalUNormalizer2Pointer + * "Smart pointer" class, closes a UNormalizer2 via unorm2_close(). + * For most methods see the LocalPointerBase base class. + * + * @see LocalPointerBase + * @see LocalPointer + * @stable ICU 4.4 + */ +U_DEFINE_LOCAL_OPEN_POINTER(LocalUNormalizer2Pointer, UNormalizer2, unorm2_close); + +U_NAMESPACE_END + +#endif + +/** + * Writes the normalized form of the source string to the destination string + * (replacing its contents) and returns the length of the destination string. + * The source and destination strings must be different buffers. + * @param norm2 UNormalizer2 instance + * @param src source string + * @param length length of the source string, or -1 if NUL-terminated + * @param dest destination string; its contents is replaced with normalized src + * @param capacity number of UChars that can be written to dest + * @param pErrorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return dest + * @stable ICU 4.4 + */ +U_CAPI int32_t U_EXPORT2 +unorm2_normalize(const UNormalizer2 *norm2, + const UChar *src, int32_t length, + UChar *dest, int32_t capacity, + UErrorCode *pErrorCode); +/** + * Appends the normalized form of the second string to the first string + * (merging them at the boundary) and returns the length of the first string. + * The result is normalized if the first string was normalized. + * The first and second strings must be different buffers. + * @param norm2 UNormalizer2 instance + * @param first string, should be normalized + * @param firstLength length of the first string, or -1 if NUL-terminated + * @param firstCapacity number of UChars that can be written to first + * @param second string, will be normalized + * @param secondLength length of the source string, or -1 if NUL-terminated + * @param pErrorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return first + * @stable ICU 4.4 + */ +U_CAPI int32_t U_EXPORT2 +unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2, + UChar *first, int32_t firstLength, int32_t firstCapacity, + const UChar *second, int32_t secondLength, + UErrorCode *pErrorCode); +/** + * Appends the second string to the first string + * (merging them at the boundary) and returns the length of the first string. + * The result is normalized if both the strings were normalized. + * The first and second strings must be different buffers. + * @param norm2 UNormalizer2 instance + * @param first string, should be normalized + * @param firstLength length of the first string, or -1 if NUL-terminated + * @param firstCapacity number of UChars that can be written to first + * @param second string, should be normalized + * @param secondLength length of the source string, or -1 if NUL-terminated + * @param pErrorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return first + * @stable ICU 4.4 + */ +U_CAPI int32_t U_EXPORT2 +unorm2_append(const UNormalizer2 *norm2, + UChar *first, int32_t firstLength, int32_t firstCapacity, + const UChar *second, int32_t secondLength, + UErrorCode *pErrorCode); + +/** + * Gets the decomposition mapping of c. + * Roughly equivalent to normalizing the String form of c + * on a UNORM2_DECOMPOSE UNormalizer2 instance, but much faster, and except that this function + * returns a negative value and does not write a string + * if c does not have a decomposition mapping in this instance's data. + * This function is independent of the mode of the UNormalizer2. + * @param norm2 UNormalizer2 instance + * @param c code point + * @param decomposition String buffer which will be set to c's + * decomposition mapping, if there is one. + * @param capacity number of UChars that can be written to decomposition + * @param pErrorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return the non-negative length of c's decomposition, if there is one; otherwise a negative value + * @stable ICU 4.6 + */ +U_CAPI int32_t U_EXPORT2 +unorm2_getDecomposition(const UNormalizer2 *norm2, + UChar32 c, UChar *decomposition, int32_t capacity, + UErrorCode *pErrorCode); + +/** + * Gets the raw decomposition mapping of c. + * + * This is similar to the unorm2_getDecomposition() function but returns the + * raw decomposition mapping as specified in UnicodeData.txt or + * (for custom data) in the mapping files processed by the gennorm2 tool. + * By contrast, unorm2_getDecomposition() returns the processed, + * recursively-decomposed version of this mapping. + * + * When used on a standard NFKC Normalizer2 instance, + * unorm2_getRawDecomposition() returns the Unicode Decomposition_Mapping (dm) property. + * + * When used on a standard NFC Normalizer2 instance, + * it returns the Decomposition_Mapping only if the Decomposition_Type (dt) is Canonical (Can); + * in this case, the result contains either one or two code points (=1..4 UChars). + * + * This function is independent of the mode of the UNormalizer2. + * @param norm2 UNormalizer2 instance + * @param c code point + * @param decomposition String buffer which will be set to c's + * raw decomposition mapping, if there is one. + * @param capacity number of UChars that can be written to decomposition + * @param pErrorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return the non-negative length of c's raw decomposition, if there is one; otherwise a negative value + * @stable ICU 49 + */ +U_CAPI int32_t U_EXPORT2 +unorm2_getRawDecomposition(const UNormalizer2 *norm2, + UChar32 c, UChar *decomposition, int32_t capacity, + UErrorCode *pErrorCode); + +/** + * Performs pairwise composition of a & b and returns the composite if there is one. + * + * Returns a composite code point c only if c has a two-way mapping to a+b. + * In standard Unicode normalization, this means that + * c has a canonical decomposition to a+b + * and c does not have the Full_Composition_Exclusion property. + * + * This function is independent of the mode of the UNormalizer2. + * @param norm2 UNormalizer2 instance + * @param a A (normalization starter) code point. + * @param b Another code point. + * @return The non-negative composite code point if there is one; otherwise a negative value. + * @stable ICU 49 + */ +U_CAPI UChar32 U_EXPORT2 +unorm2_composePair(const UNormalizer2 *norm2, UChar32 a, UChar32 b); + +/** + * Gets the combining class of c. + * The default implementation returns 0 + * but all standard implementations return the Unicode Canonical_Combining_Class value. + * @param norm2 UNormalizer2 instance + * @param c code point + * @return c's combining class + * @stable ICU 49 + */ +U_CAPI uint8_t U_EXPORT2 +unorm2_getCombiningClass(const UNormalizer2 *norm2, UChar32 c); + +/** + * Tests if the string is normalized. + * Internally, in cases where the quickCheck() method would return "maybe" + * (which is only possible for the two COMPOSE modes) this method + * resolves to "yes" or "no" to provide a definitive result, + * at the cost of doing more work in those cases. + * @param norm2 UNormalizer2 instance + * @param s input string + * @param length length of the string, or -1 if NUL-terminated + * @param pErrorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return true if s is normalized + * @stable ICU 4.4 + */ +U_CAPI UBool U_EXPORT2 +unorm2_isNormalized(const UNormalizer2 *norm2, + const UChar *s, int32_t length, + UErrorCode *pErrorCode); + +/** + * Tests if the string is normalized. + * For the two COMPOSE modes, the result could be "maybe" in cases that + * would take a little more work to resolve definitively. + * Use spanQuickCheckYes() and normalizeSecondAndAppend() for a faster + * combination of quick check + normalization, to avoid + * re-checking the "yes" prefix. + * @param norm2 UNormalizer2 instance + * @param s input string + * @param length length of the string, or -1 if NUL-terminated + * @param pErrorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return UNormalizationCheckResult + * @stable ICU 4.4 + */ +U_CAPI UNormalizationCheckResult U_EXPORT2 +unorm2_quickCheck(const UNormalizer2 *norm2, + const UChar *s, int32_t length, + UErrorCode *pErrorCode); + +/** + * Returns the end of the normalized substring of the input string. + * In other words, with <code>end=spanQuickCheckYes(s, ec);</code> + * the substring <code>UnicodeString(s, 0, end)</code> + * will pass the quick check with a "yes" result. + * + * The returned end index is usually one or more characters before the + * "no" or "maybe" character: The end index is at a normalization boundary. + * (See the class documentation for more about normalization boundaries.) + * + * When the goal is a normalized string and most input strings are expected + * to be normalized already, then call this method, + * and if it returns a prefix shorter than the input string, + * copy that prefix and use normalizeSecondAndAppend() for the remainder. + * @param norm2 UNormalizer2 instance + * @param s input string + * @param length length of the string, or -1 if NUL-terminated + * @param pErrorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return "yes" span end index + * @stable ICU 4.4 + */ +U_CAPI int32_t U_EXPORT2 +unorm2_spanQuickCheckYes(const UNormalizer2 *norm2, + const UChar *s, int32_t length, + UErrorCode *pErrorCode); + +/** + * Tests if the character always has a normalization boundary before it, + * regardless of context. + * For details see the Normalizer2 base class documentation. + * @param norm2 UNormalizer2 instance + * @param c character to test + * @return true if c has a normalization boundary before it + * @stable ICU 4.4 + */ +U_CAPI UBool U_EXPORT2 +unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c); + +/** + * Tests if the character always has a normalization boundary after it, + * regardless of context. + * For details see the Normalizer2 base class documentation. + * @param norm2 UNormalizer2 instance + * @param c character to test + * @return true if c has a normalization boundary after it + * @stable ICU 4.4 + */ +U_CAPI UBool U_EXPORT2 +unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c); + +/** + * Tests if the character is normalization-inert. + * For details see the Normalizer2 base class documentation. + * @param norm2 UNormalizer2 instance + * @param c character to test + * @return true if c is normalization-inert + * @stable ICU 4.4 + */ +U_CAPI UBool U_EXPORT2 +unorm2_isInert(const UNormalizer2 *norm2, UChar32 c); + +/** + * Compares two strings for canonical equivalence. + * Further options include case-insensitive comparison and + * code point order (as opposed to code unit order). + * + * Canonical equivalence between two strings is defined as their normalized + * forms (NFD or NFC) being identical. + * This function compares strings incrementally instead of normalizing + * (and optionally case-folding) both strings entirely, + * improving performance significantly. + * + * Bulk normalization is only necessary if the strings do not fulfill the FCD + * conditions. Only in this case, and only if the strings are relatively long, + * is memory allocated temporarily. + * For FCD strings and short non-FCD strings there is no memory allocation. + * + * Semantically, this is equivalent to + * strcmp[CodePointOrder](NFD(foldCase(NFD(s1))), NFD(foldCase(NFD(s2)))) + * where code point order and foldCase are all optional. + * + * UAX 21 2.5 Caseless Matching specifies that for a canonical caseless match + * the case folding must be performed first, then the normalization. + * + * @param s1 First source string. + * @param length1 Length of first source string, or -1 if NUL-terminated. + * + * @param s2 Second source string. + * @param length2 Length of second source string, or -1 if NUL-terminated. + * + * @param options A bit set of options: + * - U_FOLD_CASE_DEFAULT or 0 is used for default options: + * Case-sensitive comparison in code unit order, and the input strings + * are quick-checked for FCD. + * + * - UNORM_INPUT_IS_FCD + * Set if the caller knows that both s1 and s2 fulfill the FCD conditions. + * If not set, the function will quickCheck for FCD + * and normalize if necessary. + * + * - U_COMPARE_CODE_POINT_ORDER + * Set to choose code point order instead of code unit order + * (see u_strCompare for details). + * + * - U_COMPARE_IGNORE_CASE + * Set to compare strings case-insensitively using case folding, + * instead of case-sensitively. + * If set, then the following case folding options are used. + * + * - Options as used with case-insensitive comparisons, currently: + * + * - U_FOLD_CASE_EXCLUDE_SPECIAL_I + * (see u_strCaseCompare for details) + * + * - regular normalization options shifted left by UNORM_COMPARE_NORM_OPTIONS_SHIFT + * + * @param pErrorCode ICU error code in/out parameter. + * Must fulfill U_SUCCESS before the function call. + * @return <0 or 0 or >0 as usual for string comparisons + * + * @see unorm_normalize + * @see UNORM_FCD + * @see u_strCompare + * @see u_strCaseCompare + * + * @stable ICU 2.2 + */ +U_CAPI int32_t U_EXPORT2 +unorm_compare(const UChar *s1, int32_t length1, + const UChar *s2, int32_t length2, + uint32_t options, + UErrorCode *pErrorCode); + +#endif /* !UCONFIG_NO_NORMALIZATION */ +#endif /* __UNORM2_H__ */ diff --git a/thirdparty/icu4c/common/unicode/uobject.h b/thirdparty/icu4c/common/unicode/uobject.h new file mode 100644 index 0000000000..eeb331ce97 --- /dev/null +++ b/thirdparty/icu4c/common/unicode/uobject.h @@ -0,0 +1,324 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 2002-2012, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* file name: uobject.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2002jun26 +* created by: Markus W. Scherer +*/ + +#ifndef __UOBJECT_H__ +#define __UOBJECT_H__ + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + +#include "unicode/platform.h" + +/** + * \file + * \brief C++ API: Common ICU base class UObject. + */ + +/** + * \def U_NO_THROW + * Since ICU 64, use U_NOEXCEPT instead. + * + * Previously, define this to define the throw() specification so + * certain functions do not throw any exceptions + * + * UMemory operator new methods should have the throw() specification + * appended to them, so that the compiler adds the additional NULL check + * before calling constructors. Without, if <code>operator new</code> returns NULL the + * constructor is still called, and if the constructor references member + * data, (which it typically does), the result is a segmentation violation. + * + * @stable ICU 4.2. Since ICU 64, Use U_NOEXCEPT instead. See ICU-20422. + */ +#ifndef U_NO_THROW +#define U_NO_THROW U_NOEXCEPT +#endif + +/*===========================================================================*/ +/* UClassID-based RTTI */ +/*===========================================================================*/ + +/** + * UClassID is used to identify classes without using the compiler's RTTI. + * This was used before C++ compilers consistently supported RTTI. + * ICU 4.6 requires compiler RTTI to be turned on. + * + * Each class hierarchy which needs + * to implement polymorphic clone() or operator==() defines two methods, + * described in detail below. UClassID values can be compared using + * operator==(). Nothing else should be done with them. + * + * \par + * In class hierarchies that implement "poor man's RTTI", + * each concrete subclass implements getDynamicClassID() in the same way: + * + * \code + * class Derived { + * public: + * virtual UClassID getDynamicClassID() const + * { return Derived::getStaticClassID(); } + * } + * \endcode + * + * Each concrete class implements getStaticClassID() as well, which allows + * clients to test for a specific type. + * + * \code + * class Derived { + * public: + * static UClassID U_EXPORT2 getStaticClassID(); + * private: + * static char fgClassID; + * } + * + * // In Derived.cpp: + * UClassID Derived::getStaticClassID() + * { return (UClassID)&Derived::fgClassID; } + * char Derived::fgClassID = 0; // Value is irrelevant + * \endcode + * @stable ICU 2.0 + */ +typedef void* UClassID; + +U_NAMESPACE_BEGIN + +/** + * UMemory is the common ICU base class. + * All other ICU C++ classes are derived from UMemory (starting with ICU 2.4). + * + * This is primarily to make it possible and simple to override the + * C++ memory management by adding new/delete operators to this base class. + * + * To override ALL ICU memory management, including that from plain C code, + * replace the allocation functions declared in cmemory.h + * + * UMemory does not contain any virtual functions. + * Common "boilerplate" functions are defined in UObject. + * + * @stable ICU 2.4 + */ +class U_COMMON_API UMemory { +public: + +/* test versions for debugging shaper heap memory problems */ +#ifdef SHAPER_MEMORY_DEBUG + static void * NewArray(int size, int count); + static void * GrowArray(void * array, int newSize ); + static void FreeArray(void * array ); +#endif + +#if U_OVERRIDE_CXX_ALLOCATION + /** + * Override for ICU4C C++ memory management. + * simple, non-class types are allocated using the macros in common/cmemory.h + * (uprv_malloc(), uprv_free(), uprv_realloc()); + * they or something else could be used here to implement C++ new/delete + * for ICU4C C++ classes + * @stable ICU 2.4 + */ + static void * U_EXPORT2 operator new(size_t size) U_NOEXCEPT; + + /** + * Override for ICU4C C++ memory management. + * See new(). + * @stable ICU 2.4 + */ + static void * U_EXPORT2 operator new[](size_t size) U_NOEXCEPT; + + /** + * Override for ICU4C C++ memory management. + * simple, non-class types are allocated using the macros in common/cmemory.h + * (uprv_malloc(), uprv_free(), uprv_realloc()); + * they or something else could be used here to implement C++ new/delete + * for ICU4C C++ classes + * @stable ICU 2.4 + */ + static void U_EXPORT2 operator delete(void *p) U_NOEXCEPT; + + /** + * Override for ICU4C C++ memory management. + * See delete(). + * @stable ICU 2.4 + */ + static void U_EXPORT2 operator delete[](void *p) U_NOEXCEPT; + +#if U_HAVE_PLACEMENT_NEW + /** + * Override for ICU4C C++ memory management for STL. + * See new(). + * @stable ICU 2.6 + */ + static inline void * U_EXPORT2 operator new(size_t, void *ptr) U_NOEXCEPT { return ptr; } + + /** + * Override for ICU4C C++ memory management for STL. + * See delete(). + * @stable ICU 2.6 + */ + static inline void U_EXPORT2 operator delete(void *, void *) U_NOEXCEPT {} +#endif /* U_HAVE_PLACEMENT_NEW */ +#if U_HAVE_DEBUG_LOCATION_NEW + /** + * This method overrides the MFC debug version of the operator new + * + * @param size The requested memory size + * @param file The file where the allocation was requested + * @param line The line where the allocation was requested + */ + static void * U_EXPORT2 operator new(size_t size, const char* file, int line) U_NOEXCEPT; + /** + * This method provides a matching delete for the MFC debug new + * + * @param p The pointer to the allocated memory + * @param file The file where the allocation was requested + * @param line The line where the allocation was requested + */ + static void U_EXPORT2 operator delete(void* p, const char* file, int line) U_NOEXCEPT; +#endif /* U_HAVE_DEBUG_LOCATION_NEW */ +#endif /* U_OVERRIDE_CXX_ALLOCATION */ + + /* + * Assignment operator not declared. The compiler will provide one + * which does nothing since this class does not contain any data members. + * API/code coverage may show the assignment operator as present and + * untested - ignore. + * Subclasses need this assignment operator if they use compiler-provided + * assignment operators of their own. An alternative to not declaring one + * here would be to declare and empty-implement a protected or public one. + UMemory &UMemory::operator=(const UMemory &); + */ +}; + +/** + * UObject is the common ICU "boilerplate" class. + * UObject inherits UMemory (starting with ICU 2.4), + * and all other public ICU C++ classes + * are derived from UObject (starting with ICU 2.2). + * + * UObject contains common virtual functions, in particular a virtual destructor. + * + * The clone() function is not available in UObject because it is not + * implemented by all ICU classes. + * Many ICU services provide a clone() function for their class trees, + * defined on the service's C++ base class + * (which itself is a subclass of UObject). + * + * @stable ICU 2.2 + */ +class U_COMMON_API UObject : public UMemory { +public: + /** + * Destructor. + * + * @stable ICU 2.2 + */ + virtual ~UObject(); + + /** + * ICU4C "poor man's RTTI", returns a UClassID for the actual ICU class. + * The base class implementation returns a dummy value. + * + * Use compiler RTTI rather than ICU's "poor man's RTTI". + * Since ICU 4.6, new ICU C++ class hierarchies do not implement "poor man's RTTI". + * + * @stable ICU 2.2 + */ + virtual UClassID getDynamicClassID() const; + +protected: + // the following functions are protected to prevent instantiation and + // direct use of UObject itself + + // default constructor + // inline UObject() {} + + // copy constructor + // inline UObject(const UObject &other) {} + +#if 0 + // TODO Sometime in the future. Implement operator==(). + // (This comment inserted in 2.2) + // some or all of the following "boilerplate" functions may be made public + // in a future ICU4C release when all subclasses implement them + + // assignment operator + // (not virtual, see "Taligent's Guide to Designing Programs" pp.73..74) + // commented out because the implementation is the same as a compiler's default + // UObject &operator=(const UObject &other) { return *this; } + + // comparison operators + virtual inline UBool operator==(const UObject &other) const { return this==&other; } + inline UBool operator!=(const UObject &other) const { return !operator==(other); } + + // clone() commented out from the base class: + // some compilers do not support co-variant return types + // (i.e., subclasses would have to return UObject * as well, instead of SubClass *) + // see also UObject class documentation. + // virtual UObject *clone() const; +#endif + + /* + * Assignment operator not declared. The compiler will provide one + * which does nothing since this class does not contain any data members. + * API/code coverage may show the assignment operator as present and + * untested - ignore. + * Subclasses need this assignment operator if they use compiler-provided + * assignment operators of their own. An alternative to not declaring one + * here would be to declare and empty-implement a protected or public one. + UObject &UObject::operator=(const UObject &); + */ +}; + +#ifndef U_HIDE_INTERNAL_API +/** + * This is a simple macro to add ICU RTTI to an ICU object implementation. + * This does not go into the header. This should only be used in *.cpp files. + * + * @param myClass The name of the class that needs RTTI defined. + * @internal + */ +#define UOBJECT_DEFINE_RTTI_IMPLEMENTATION(myClass) \ + UClassID U_EXPORT2 myClass::getStaticClassID() { \ + static char classID = 0; \ + return (UClassID)&classID; \ + } \ + UClassID myClass::getDynamicClassID() const \ + { return myClass::getStaticClassID(); } + + +/** + * This macro adds ICU RTTI to an ICU abstract class implementation. + * This macro should be invoked in *.cpp files. The corresponding + * header should declare getStaticClassID. + * + * @param myClass The name of the class that needs RTTI defined. + * @internal + */ +#define UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(myClass) \ + UClassID U_EXPORT2 myClass::getStaticClassID() { \ + static char classID = 0; \ + return (UClassID)&classID; \ + } + +#endif /* U_HIDE_INTERNAL_API */ + +U_NAMESPACE_END + +#endif /* U_SHOW_CPLUSPLUS_API */ + +#endif diff --git a/thirdparty/icu4c/common/unicode/urename.h b/thirdparty/icu4c/common/unicode/urename.h new file mode 100644 index 0000000000..20232cd209 --- /dev/null +++ b/thirdparty/icu4c/common/unicode/urename.h @@ -0,0 +1,1922 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2002-2016, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* +* file name: urename.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* Created by: Perl script tools/genren.pl written by Vladimir Weinstein +* +* Contains data for renaming ICU exports. +* Gets included by umachine.h +* +* THIS FILE IS MACHINE-GENERATED, DON'T PLAY WITH IT IF YOU DON'T KNOW WHAT +* YOU ARE DOING, OTHERWISE VERY BAD THINGS WILL HAPPEN! +*/ + +#ifndef URENAME_H +#define URENAME_H + +/* U_DISABLE_RENAMING can be defined in the following ways: + * - when running configure, e.g. + * runConfigureICU Linux --disable-renaming + * - by changing the default setting of U_DISABLE_RENAMING in uconfig.h + */ + +#include "unicode/uconfig.h" + +#if !U_DISABLE_RENAMING + +// Disable Renaming for Visual Studio's IntelliSense feature, so that 'Go-to-Definition' (F12) will work. +#if !(defined(_MSC_VER) && defined(__INTELLISENSE__)) + +/* We need the U_ICU_ENTRY_POINT_RENAME definition. There's a default one in unicode/uvernum.h we can use, but we will give + the platform a chance to define it first. + Normally (if utypes.h or umachine.h was included first) this will not be necessary as it will already be defined. + */ + +#ifndef U_ICU_ENTRY_POINT_RENAME +#include "unicode/umachine.h" +#endif + +/* If we still don't have U_ICU_ENTRY_POINT_RENAME use the default. */ +#ifndef U_ICU_ENTRY_POINT_RENAME +#include "unicode/uvernum.h" +#endif + +/* Error out before the following defines cause very strange and unexpected code breakage */ +#ifndef U_ICU_ENTRY_POINT_RENAME +#error U_ICU_ENTRY_POINT_RENAME is not defined - cannot continue. Consider defining U_DISABLE_RENAMING if renaming should not be used. +#endif + + +/* C exports renaming data */ + +#define T_CString_int64ToString U_ICU_ENTRY_POINT_RENAME(T_CString_int64ToString) +#define T_CString_integerToString U_ICU_ENTRY_POINT_RENAME(T_CString_integerToString) +#define T_CString_stringToInteger U_ICU_ENTRY_POINT_RENAME(T_CString_stringToInteger) +#define T_CString_toLowerCase U_ICU_ENTRY_POINT_RENAME(T_CString_toLowerCase) +#define T_CString_toUpperCase U_ICU_ENTRY_POINT_RENAME(T_CString_toUpperCase) +#define UCNV_FROM_U_CALLBACK_ESCAPE U_ICU_ENTRY_POINT_RENAME(UCNV_FROM_U_CALLBACK_ESCAPE) +#define UCNV_FROM_U_CALLBACK_SKIP U_ICU_ENTRY_POINT_RENAME(UCNV_FROM_U_CALLBACK_SKIP) +#define UCNV_FROM_U_CALLBACK_STOP U_ICU_ENTRY_POINT_RENAME(UCNV_FROM_U_CALLBACK_STOP) +#define UCNV_FROM_U_CALLBACK_SUBSTITUTE U_ICU_ENTRY_POINT_RENAME(UCNV_FROM_U_CALLBACK_SUBSTITUTE) +#define UCNV_TO_U_CALLBACK_ESCAPE U_ICU_ENTRY_POINT_RENAME(UCNV_TO_U_CALLBACK_ESCAPE) +#define UCNV_TO_U_CALLBACK_SKIP U_ICU_ENTRY_POINT_RENAME(UCNV_TO_U_CALLBACK_SKIP) +#define UCNV_TO_U_CALLBACK_STOP U_ICU_ENTRY_POINT_RENAME(UCNV_TO_U_CALLBACK_STOP) +#define UCNV_TO_U_CALLBACK_SUBSTITUTE U_ICU_ENTRY_POINT_RENAME(UCNV_TO_U_CALLBACK_SUBSTITUTE) +#define UDataMemory_createNewInstance U_ICU_ENTRY_POINT_RENAME(UDataMemory_createNewInstance) +#define UDataMemory_init U_ICU_ENTRY_POINT_RENAME(UDataMemory_init) +#define UDataMemory_isLoaded U_ICU_ENTRY_POINT_RENAME(UDataMemory_isLoaded) +#define UDataMemory_normalizeDataPointer U_ICU_ENTRY_POINT_RENAME(UDataMemory_normalizeDataPointer) +#define UDataMemory_setData U_ICU_ENTRY_POINT_RENAME(UDataMemory_setData) +#define UDatamemory_assign U_ICU_ENTRY_POINT_RENAME(UDatamemory_assign) +#define _ASCIIData U_ICU_ENTRY_POINT_RENAME(_ASCIIData) +#define _Bocu1Data U_ICU_ENTRY_POINT_RENAME(_Bocu1Data) +#define _CESU8Data U_ICU_ENTRY_POINT_RENAME(_CESU8Data) +#define _CompoundTextData U_ICU_ENTRY_POINT_RENAME(_CompoundTextData) +#define _HZData U_ICU_ENTRY_POINT_RENAME(_HZData) +#define _IMAPData U_ICU_ENTRY_POINT_RENAME(_IMAPData) +#define _ISCIIData U_ICU_ENTRY_POINT_RENAME(_ISCIIData) +#define _ISO2022Data U_ICU_ENTRY_POINT_RENAME(_ISO2022Data) +#define _LMBCSData1 U_ICU_ENTRY_POINT_RENAME(_LMBCSData1) +#define _LMBCSData11 U_ICU_ENTRY_POINT_RENAME(_LMBCSData11) +#define _LMBCSData16 U_ICU_ENTRY_POINT_RENAME(_LMBCSData16) +#define _LMBCSData17 U_ICU_ENTRY_POINT_RENAME(_LMBCSData17) +#define _LMBCSData18 U_ICU_ENTRY_POINT_RENAME(_LMBCSData18) +#define _LMBCSData19 U_ICU_ENTRY_POINT_RENAME(_LMBCSData19) +#define _LMBCSData2 U_ICU_ENTRY_POINT_RENAME(_LMBCSData2) +#define _LMBCSData3 U_ICU_ENTRY_POINT_RENAME(_LMBCSData3) +#define _LMBCSData4 U_ICU_ENTRY_POINT_RENAME(_LMBCSData4) +#define _LMBCSData5 U_ICU_ENTRY_POINT_RENAME(_LMBCSData5) +#define _LMBCSData6 U_ICU_ENTRY_POINT_RENAME(_LMBCSData6) +#define _LMBCSData8 U_ICU_ENTRY_POINT_RENAME(_LMBCSData8) +#define _Latin1Data U_ICU_ENTRY_POINT_RENAME(_Latin1Data) +#define _MBCSData U_ICU_ENTRY_POINT_RENAME(_MBCSData) +#define _SCSUData U_ICU_ENTRY_POINT_RENAME(_SCSUData) +#define _UTF16BEData U_ICU_ENTRY_POINT_RENAME(_UTF16BEData) +#define _UTF16Data U_ICU_ENTRY_POINT_RENAME(_UTF16Data) +#define _UTF16LEData U_ICU_ENTRY_POINT_RENAME(_UTF16LEData) +#define _UTF16v2Data U_ICU_ENTRY_POINT_RENAME(_UTF16v2Data) +#define _UTF32BEData U_ICU_ENTRY_POINT_RENAME(_UTF32BEData) +#define _UTF32Data U_ICU_ENTRY_POINT_RENAME(_UTF32Data) +#define _UTF32LEData U_ICU_ENTRY_POINT_RENAME(_UTF32LEData) +#define _UTF7Data U_ICU_ENTRY_POINT_RENAME(_UTF7Data) +#define _UTF8Data U_ICU_ENTRY_POINT_RENAME(_UTF8Data) +#define _isUnicodeLocaleTypeSubtag U_ICU_ENTRY_POINT_RENAME(_isUnicodeLocaleTypeSubtag) +#define allowedHourFormatsCleanup U_ICU_ENTRY_POINT_RENAME(allowedHourFormatsCleanup) +#define cmemory_cleanup U_ICU_ENTRY_POINT_RENAME(cmemory_cleanup) +#define dayPeriodRulesCleanup U_ICU_ENTRY_POINT_RENAME(dayPeriodRulesCleanup) +#define deleteAllowedHourFormats U_ICU_ENTRY_POINT_RENAME(deleteAllowedHourFormats) +#define gTimeZoneFilesInitOnce U_ICU_ENTRY_POINT_RENAME(gTimeZoneFilesInitOnce) +#define initNumsysNames U_ICU_ENTRY_POINT_RENAME(initNumsysNames) +#define izrule_clone U_ICU_ENTRY_POINT_RENAME(izrule_clone) +#define izrule_close U_ICU_ENTRY_POINT_RENAME(izrule_close) +#define izrule_equals U_ICU_ENTRY_POINT_RENAME(izrule_equals) +#define izrule_getDSTSavings U_ICU_ENTRY_POINT_RENAME(izrule_getDSTSavings) +#define izrule_getDynamicClassID U_ICU_ENTRY_POINT_RENAME(izrule_getDynamicClassID) +#define izrule_getFinalStart U_ICU_ENTRY_POINT_RENAME(izrule_getFinalStart) +#define izrule_getFirstStart U_ICU_ENTRY_POINT_RENAME(izrule_getFirstStart) +#define izrule_getName U_ICU_ENTRY_POINT_RENAME(izrule_getName) +#define izrule_getNextStart U_ICU_ENTRY_POINT_RENAME(izrule_getNextStart) +#define izrule_getPreviousStart U_ICU_ENTRY_POINT_RENAME(izrule_getPreviousStart) +#define izrule_getRawOffset U_ICU_ENTRY_POINT_RENAME(izrule_getRawOffset) +#define izrule_getStaticClassID U_ICU_ENTRY_POINT_RENAME(izrule_getStaticClassID) +#define izrule_isEquivalentTo U_ICU_ENTRY_POINT_RENAME(izrule_isEquivalentTo) +#define izrule_open U_ICU_ENTRY_POINT_RENAME(izrule_open) +#define locale_getKeywordsStart U_ICU_ENTRY_POINT_RENAME(locale_getKeywordsStart) +#define locale_get_default U_ICU_ENTRY_POINT_RENAME(locale_get_default) +#define locale_set_default U_ICU_ENTRY_POINT_RENAME(locale_set_default) +#define numSysCleanup U_ICU_ENTRY_POINT_RENAME(numSysCleanup) +#define rbbi_cleanup U_ICU_ENTRY_POINT_RENAME(rbbi_cleanup) +#define pl_addFontRun U_ICU_ENTRY_POINT_RENAME(pl_addFontRun) +#define pl_addLocaleRun U_ICU_ENTRY_POINT_RENAME(pl_addLocaleRun) +#define pl_addValueRun U_ICU_ENTRY_POINT_RENAME(pl_addValueRun) +#define pl_close U_ICU_ENTRY_POINT_RENAME(pl_close) +#define pl_closeFontRuns U_ICU_ENTRY_POINT_RENAME(pl_closeFontRuns) +#define pl_closeLine U_ICU_ENTRY_POINT_RENAME(pl_closeLine) +#define pl_closeLocaleRuns U_ICU_ENTRY_POINT_RENAME(pl_closeLocaleRuns) +#define pl_closeValueRuns U_ICU_ENTRY_POINT_RENAME(pl_closeValueRuns) +#define pl_countLineRuns U_ICU_ENTRY_POINT_RENAME(pl_countLineRuns) +#define pl_create U_ICU_ENTRY_POINT_RENAME(pl_create) +#define pl_getAscent U_ICU_ENTRY_POINT_RENAME(pl_getAscent) +#define pl_getDescent U_ICU_ENTRY_POINT_RENAME(pl_getDescent) +#define pl_getFontRunCount U_ICU_ENTRY_POINT_RENAME(pl_getFontRunCount) +#define pl_getFontRunFont U_ICU_ENTRY_POINT_RENAME(pl_getFontRunFont) +#define pl_getFontRunLastLimit U_ICU_ENTRY_POINT_RENAME(pl_getFontRunLastLimit) +#define pl_getFontRunLimit U_ICU_ENTRY_POINT_RENAME(pl_getFontRunLimit) +#define pl_getLeading U_ICU_ENTRY_POINT_RENAME(pl_getLeading) +#define pl_getLineAscent U_ICU_ENTRY_POINT_RENAME(pl_getLineAscent) +#define pl_getLineDescent U_ICU_ENTRY_POINT_RENAME(pl_getLineDescent) +#define pl_getLineLeading U_ICU_ENTRY_POINT_RENAME(pl_getLineLeading) +#define pl_getLineVisualRun U_ICU_ENTRY_POINT_RENAME(pl_getLineVisualRun) +#define pl_getLineWidth U_ICU_ENTRY_POINT_RENAME(pl_getLineWidth) +#define pl_getLocaleRunCount U_ICU_ENTRY_POINT_RENAME(pl_getLocaleRunCount) +#define pl_getLocaleRunLastLimit U_ICU_ENTRY_POINT_RENAME(pl_getLocaleRunLastLimit) +#define pl_getLocaleRunLimit U_ICU_ENTRY_POINT_RENAME(pl_getLocaleRunLimit) +#define pl_getLocaleRunLocale U_ICU_ENTRY_POINT_RENAME(pl_getLocaleRunLocale) +#define pl_getParagraphLevel U_ICU_ENTRY_POINT_RENAME(pl_getParagraphLevel) +#define pl_getTextDirection U_ICU_ENTRY_POINT_RENAME(pl_getTextDirection) +#define pl_getValueRunCount U_ICU_ENTRY_POINT_RENAME(pl_getValueRunCount) +#define pl_getValueRunLastLimit U_ICU_ENTRY_POINT_RENAME(pl_getValueRunLastLimit) +#define pl_getValueRunLimit U_ICU_ENTRY_POINT_RENAME(pl_getValueRunLimit) +#define pl_getValueRunValue U_ICU_ENTRY_POINT_RENAME(pl_getValueRunValue) +#define pl_getVisualRunAscent U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunAscent) +#define pl_getVisualRunDescent U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunDescent) +#define pl_getVisualRunDirection U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunDirection) +#define pl_getVisualRunFont U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunFont) +#define pl_getVisualRunGlyphCount U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunGlyphCount) +#define pl_getVisualRunGlyphToCharMap U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunGlyphToCharMap) +#define pl_getVisualRunGlyphs U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunGlyphs) +#define pl_getVisualRunLeading U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunLeading) +#define pl_getVisualRunPositions U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunPositions) +#define pl_isComplex U_ICU_ENTRY_POINT_RENAME(pl_isComplex) +#define pl_nextLine U_ICU_ENTRY_POINT_RENAME(pl_nextLine) +#define pl_openEmptyFontRuns U_ICU_ENTRY_POINT_RENAME(pl_openEmptyFontRuns) +#define pl_openEmptyLocaleRuns U_ICU_ENTRY_POINT_RENAME(pl_openEmptyLocaleRuns) +#define pl_openEmptyValueRuns U_ICU_ENTRY_POINT_RENAME(pl_openEmptyValueRuns) +#define pl_openFontRuns U_ICU_ENTRY_POINT_RENAME(pl_openFontRuns) +#define pl_openLocaleRuns U_ICU_ENTRY_POINT_RENAME(pl_openLocaleRuns) +#define pl_openValueRuns U_ICU_ENTRY_POINT_RENAME(pl_openValueRuns) +#define pl_reflow U_ICU_ENTRY_POINT_RENAME(pl_reflow) +#define pl_resetFontRuns U_ICU_ENTRY_POINT_RENAME(pl_resetFontRuns) +#define pl_resetLocaleRuns U_ICU_ENTRY_POINT_RENAME(pl_resetLocaleRuns) +#define pl_resetValueRuns U_ICU_ENTRY_POINT_RENAME(pl_resetValueRuns) +#define res_countArrayItems U_ICU_ENTRY_POINT_RENAME(res_countArrayItems) +#define res_findResource U_ICU_ENTRY_POINT_RENAME(res_findResource) +#define res_getAlias U_ICU_ENTRY_POINT_RENAME(res_getAlias) +#define res_getArrayItem U_ICU_ENTRY_POINT_RENAME(res_getArrayItem) +#define res_getBinaryNoTrace U_ICU_ENTRY_POINT_RENAME(res_getBinaryNoTrace) +#define res_getIntVectorNoTrace U_ICU_ENTRY_POINT_RENAME(res_getIntVectorNoTrace) +#define res_getPublicType U_ICU_ENTRY_POINT_RENAME(res_getPublicType) +#define res_getResource U_ICU_ENTRY_POINT_RENAME(res_getResource) +#define res_getStringNoTrace U_ICU_ENTRY_POINT_RENAME(res_getStringNoTrace) +#define res_getTableItemByIndex U_ICU_ENTRY_POINT_RENAME(res_getTableItemByIndex) +#define res_getTableItemByKey U_ICU_ENTRY_POINT_RENAME(res_getTableItemByKey) +#define res_load U_ICU_ENTRY_POINT_RENAME(res_load) +#define res_read U_ICU_ENTRY_POINT_RENAME(res_read) +#define res_unload U_ICU_ENTRY_POINT_RENAME(res_unload) +#define u_UCharsToChars U_ICU_ENTRY_POINT_RENAME(u_UCharsToChars) +#define u_asciiToUpper U_ICU_ENTRY_POINT_RENAME(u_asciiToUpper) +#define u_austrcpy U_ICU_ENTRY_POINT_RENAME(u_austrcpy) +#define u_austrncpy U_ICU_ENTRY_POINT_RENAME(u_austrncpy) +#define u_caseInsensitivePrefixMatch U_ICU_ENTRY_POINT_RENAME(u_caseInsensitivePrefixMatch) +#define u_catclose U_ICU_ENTRY_POINT_RENAME(u_catclose) +#define u_catgets U_ICU_ENTRY_POINT_RENAME(u_catgets) +#define u_catopen U_ICU_ENTRY_POINT_RENAME(u_catopen) +#define u_charAge U_ICU_ENTRY_POINT_RENAME(u_charAge) +#define u_charDigitValue U_ICU_ENTRY_POINT_RENAME(u_charDigitValue) +#define u_charDirection U_ICU_ENTRY_POINT_RENAME(u_charDirection) +#define u_charFromName U_ICU_ENTRY_POINT_RENAME(u_charFromName) +#define u_charMirror U_ICU_ENTRY_POINT_RENAME(u_charMirror) +#define u_charName U_ICU_ENTRY_POINT_RENAME(u_charName) +#define u_charType U_ICU_ENTRY_POINT_RENAME(u_charType) +#define u_charsToUChars U_ICU_ENTRY_POINT_RENAME(u_charsToUChars) +#define u_cleanup U_ICU_ENTRY_POINT_RENAME(u_cleanup) +#define u_countChar32 U_ICU_ENTRY_POINT_RENAME(u_countChar32) +#define u_digit U_ICU_ENTRY_POINT_RENAME(u_digit) +#define u_enumCharNames U_ICU_ENTRY_POINT_RENAME(u_enumCharNames) +#define u_enumCharTypes U_ICU_ENTRY_POINT_RENAME(u_enumCharTypes) +#define u_errorName U_ICU_ENTRY_POINT_RENAME(u_errorName) +#define u_fadopt U_ICU_ENTRY_POINT_RENAME(u_fadopt) +#define u_fclose U_ICU_ENTRY_POINT_RENAME(u_fclose) +#define u_feof U_ICU_ENTRY_POINT_RENAME(u_feof) +#define u_fflush U_ICU_ENTRY_POINT_RENAME(u_fflush) +#define u_fgetConverter U_ICU_ENTRY_POINT_RENAME(u_fgetConverter) +#define u_fgetNumberFormat U_ICU_ENTRY_POINT_RENAME(u_fgetNumberFormat) +#define u_fgetc U_ICU_ENTRY_POINT_RENAME(u_fgetc) +#define u_fgetcodepage U_ICU_ENTRY_POINT_RENAME(u_fgetcodepage) +#define u_fgetcx U_ICU_ENTRY_POINT_RENAME(u_fgetcx) +#define u_fgetfile U_ICU_ENTRY_POINT_RENAME(u_fgetfile) +#define u_fgetlocale U_ICU_ENTRY_POINT_RENAME(u_fgetlocale) +#define u_fgets U_ICU_ENTRY_POINT_RENAME(u_fgets) +#define u_file_read U_ICU_ENTRY_POINT_RENAME(u_file_read) +#define u_file_write U_ICU_ENTRY_POINT_RENAME(u_file_write) +#define u_file_write_flush U_ICU_ENTRY_POINT_RENAME(u_file_write_flush) +#define u_finit U_ICU_ENTRY_POINT_RENAME(u_finit) +#define u_flushDefaultConverter U_ICU_ENTRY_POINT_RENAME(u_flushDefaultConverter) +#define u_foldCase U_ICU_ENTRY_POINT_RENAME(u_foldCase) +#define u_fopen U_ICU_ENTRY_POINT_RENAME(u_fopen) +#define u_fopen_u U_ICU_ENTRY_POINT_RENAME(u_fopen_u) +#define u_forDigit U_ICU_ENTRY_POINT_RENAME(u_forDigit) +#define u_formatMessage U_ICU_ENTRY_POINT_RENAME(u_formatMessage) +#define u_formatMessageWithError U_ICU_ENTRY_POINT_RENAME(u_formatMessageWithError) +#define u_fprintf U_ICU_ENTRY_POINT_RENAME(u_fprintf) +#define u_fprintf_u U_ICU_ENTRY_POINT_RENAME(u_fprintf_u) +#define u_fputc U_ICU_ENTRY_POINT_RENAME(u_fputc) +#define u_fputs U_ICU_ENTRY_POINT_RENAME(u_fputs) +#define u_frewind U_ICU_ENTRY_POINT_RENAME(u_frewind) +#define u_fscanf U_ICU_ENTRY_POINT_RENAME(u_fscanf) +#define u_fscanf_u U_ICU_ENTRY_POINT_RENAME(u_fscanf_u) +#define u_fsetcodepage U_ICU_ENTRY_POINT_RENAME(u_fsetcodepage) +#define u_fsetlocale U_ICU_ENTRY_POINT_RENAME(u_fsetlocale) +#define u_fsettransliterator U_ICU_ENTRY_POINT_RENAME(u_fsettransliterator) +#define u_fstropen U_ICU_ENTRY_POINT_RENAME(u_fstropen) +#define u_fungetc U_ICU_ENTRY_POINT_RENAME(u_fungetc) +#define u_getBidiPairedBracket U_ICU_ENTRY_POINT_RENAME(u_getBidiPairedBracket) +#define u_getBinaryPropertySet U_ICU_ENTRY_POINT_RENAME(u_getBinaryPropertySet) +#define u_getCombiningClass U_ICU_ENTRY_POINT_RENAME(u_getCombiningClass) +#define u_getDataDirectory U_ICU_ENTRY_POINT_RENAME(u_getDataDirectory) +#define u_getDataVersion U_ICU_ENTRY_POINT_RENAME(u_getDataVersion) +#define u_getDefaultConverter U_ICU_ENTRY_POINT_RENAME(u_getDefaultConverter) +#define u_getFC_NFKC_Closure U_ICU_ENTRY_POINT_RENAME(u_getFC_NFKC_Closure) +#define u_getISOComment U_ICU_ENTRY_POINT_RENAME(u_getISOComment) +#define u_getIntPropertyMap U_ICU_ENTRY_POINT_RENAME(u_getIntPropertyMap) +#define u_getIntPropertyMaxValue U_ICU_ENTRY_POINT_RENAME(u_getIntPropertyMaxValue) +#define u_getIntPropertyMinValue U_ICU_ENTRY_POINT_RENAME(u_getIntPropertyMinValue) +#define u_getIntPropertyValue U_ICU_ENTRY_POINT_RENAME(u_getIntPropertyValue) +#define u_getMainProperties U_ICU_ENTRY_POINT_RENAME(u_getMainProperties) +#define u_getNumericValue U_ICU_ENTRY_POINT_RENAME(u_getNumericValue) +#define u_getPropertyEnum U_ICU_ENTRY_POINT_RENAME(u_getPropertyEnum) +#define u_getPropertyName U_ICU_ENTRY_POINT_RENAME(u_getPropertyName) +#define u_getPropertyValueEnum U_ICU_ENTRY_POINT_RENAME(u_getPropertyValueEnum) +#define u_getPropertyValueName U_ICU_ENTRY_POINT_RENAME(u_getPropertyValueName) +#define u_getTimeZoneFilesDirectory U_ICU_ENTRY_POINT_RENAME(u_getTimeZoneFilesDirectory) +#define u_getUnicodeProperties U_ICU_ENTRY_POINT_RENAME(u_getUnicodeProperties) +#define u_getUnicodeVersion U_ICU_ENTRY_POINT_RENAME(u_getUnicodeVersion) +#define u_getVersion U_ICU_ENTRY_POINT_RENAME(u_getVersion) +#define u_get_stdout U_ICU_ENTRY_POINT_RENAME(u_get_stdout) +#define u_hasBinaryProperty U_ICU_ENTRY_POINT_RENAME(u_hasBinaryProperty) +#define u_init U_ICU_ENTRY_POINT_RENAME(u_init) +#define u_isIDIgnorable U_ICU_ENTRY_POINT_RENAME(u_isIDIgnorable) +#define u_isIDPart U_ICU_ENTRY_POINT_RENAME(u_isIDPart) +#define u_isIDStart U_ICU_ENTRY_POINT_RENAME(u_isIDStart) +#define u_isISOControl U_ICU_ENTRY_POINT_RENAME(u_isISOControl) +#define u_isJavaIDPart U_ICU_ENTRY_POINT_RENAME(u_isJavaIDPart) +#define u_isJavaIDStart U_ICU_ENTRY_POINT_RENAME(u_isJavaIDStart) +#define u_isJavaSpaceChar U_ICU_ENTRY_POINT_RENAME(u_isJavaSpaceChar) +#define u_isMirrored U_ICU_ENTRY_POINT_RENAME(u_isMirrored) +#define u_isUAlphabetic U_ICU_ENTRY_POINT_RENAME(u_isUAlphabetic) +#define u_isULowercase U_ICU_ENTRY_POINT_RENAME(u_isULowercase) +#define u_isUUppercase U_ICU_ENTRY_POINT_RENAME(u_isUUppercase) +#define u_isUWhiteSpace U_ICU_ENTRY_POINT_RENAME(u_isUWhiteSpace) +#define u_isWhitespace U_ICU_ENTRY_POINT_RENAME(u_isWhitespace) +#define u_isalnum U_ICU_ENTRY_POINT_RENAME(u_isalnum) +#define u_isalnumPOSIX U_ICU_ENTRY_POINT_RENAME(u_isalnumPOSIX) +#define u_isalpha U_ICU_ENTRY_POINT_RENAME(u_isalpha) +#define u_isbase U_ICU_ENTRY_POINT_RENAME(u_isbase) +#define u_isblank U_ICU_ENTRY_POINT_RENAME(u_isblank) +#define u_iscntrl U_ICU_ENTRY_POINT_RENAME(u_iscntrl) +#define u_isdefined U_ICU_ENTRY_POINT_RENAME(u_isdefined) +#define u_isdigit U_ICU_ENTRY_POINT_RENAME(u_isdigit) +#define u_isgraph U_ICU_ENTRY_POINT_RENAME(u_isgraph) +#define u_isgraphPOSIX U_ICU_ENTRY_POINT_RENAME(u_isgraphPOSIX) +#define u_islower U_ICU_ENTRY_POINT_RENAME(u_islower) +#define u_isprint U_ICU_ENTRY_POINT_RENAME(u_isprint) +#define u_isprintPOSIX U_ICU_ENTRY_POINT_RENAME(u_isprintPOSIX) +#define u_ispunct U_ICU_ENTRY_POINT_RENAME(u_ispunct) +#define u_isspace U_ICU_ENTRY_POINT_RENAME(u_isspace) +#define u_istitle U_ICU_ENTRY_POINT_RENAME(u_istitle) +#define u_isupper U_ICU_ENTRY_POINT_RENAME(u_isupper) +#define u_isxdigit U_ICU_ENTRY_POINT_RENAME(u_isxdigit) +#define u_locbund_close U_ICU_ENTRY_POINT_RENAME(u_locbund_close) +#define u_locbund_getNumberFormat U_ICU_ENTRY_POINT_RENAME(u_locbund_getNumberFormat) +#define u_locbund_init U_ICU_ENTRY_POINT_RENAME(u_locbund_init) +#define u_memcasecmp U_ICU_ENTRY_POINT_RENAME(u_memcasecmp) +#define u_memchr U_ICU_ENTRY_POINT_RENAME(u_memchr) +#define u_memchr32 U_ICU_ENTRY_POINT_RENAME(u_memchr32) +#define u_memcmp U_ICU_ENTRY_POINT_RENAME(u_memcmp) +#define u_memcmpCodePointOrder U_ICU_ENTRY_POINT_RENAME(u_memcmpCodePointOrder) +#define u_memcpy U_ICU_ENTRY_POINT_RENAME(u_memcpy) +#define u_memmove U_ICU_ENTRY_POINT_RENAME(u_memmove) +#define u_memrchr U_ICU_ENTRY_POINT_RENAME(u_memrchr) +#define u_memrchr32 U_ICU_ENTRY_POINT_RENAME(u_memrchr32) +#define u_memset U_ICU_ENTRY_POINT_RENAME(u_memset) +#define u_parseMessage U_ICU_ENTRY_POINT_RENAME(u_parseMessage) +#define u_parseMessageWithError U_ICU_ENTRY_POINT_RENAME(u_parseMessageWithError) +#define u_printf U_ICU_ENTRY_POINT_RENAME(u_printf) +#define u_printf_parse U_ICU_ENTRY_POINT_RENAME(u_printf_parse) +#define u_printf_u U_ICU_ENTRY_POINT_RENAME(u_printf_u) +#define u_releaseDefaultConverter U_ICU_ENTRY_POINT_RENAME(u_releaseDefaultConverter) +#define u_scanf_parse U_ICU_ENTRY_POINT_RENAME(u_scanf_parse) +#define u_setAtomicIncDecFunctions U_ICU_ENTRY_POINT_RENAME(u_setAtomicIncDecFunctions) +#define u_setDataDirectory U_ICU_ENTRY_POINT_RENAME(u_setDataDirectory) +#define u_setMemoryFunctions U_ICU_ENTRY_POINT_RENAME(u_setMemoryFunctions) +#define u_setMutexFunctions U_ICU_ENTRY_POINT_RENAME(u_setMutexFunctions) +#define u_setTimeZoneFilesDirectory U_ICU_ENTRY_POINT_RENAME(u_setTimeZoneFilesDirectory) +#define u_shapeArabic U_ICU_ENTRY_POINT_RENAME(u_shapeArabic) +#define u_snprintf U_ICU_ENTRY_POINT_RENAME(u_snprintf) +#define u_snprintf_u U_ICU_ENTRY_POINT_RENAME(u_snprintf_u) +#define u_sprintf U_ICU_ENTRY_POINT_RENAME(u_sprintf) +#define u_sprintf_u U_ICU_ENTRY_POINT_RENAME(u_sprintf_u) +#define u_sscanf U_ICU_ENTRY_POINT_RENAME(u_sscanf) +#define u_sscanf_u U_ICU_ENTRY_POINT_RENAME(u_sscanf_u) +#define u_strCaseCompare U_ICU_ENTRY_POINT_RENAME(u_strCaseCompare) +#define u_strCompare U_ICU_ENTRY_POINT_RENAME(u_strCompare) +#define u_strCompareIter U_ICU_ENTRY_POINT_RENAME(u_strCompareIter) +#define u_strFindFirst U_ICU_ENTRY_POINT_RENAME(u_strFindFirst) +#define u_strFindLast U_ICU_ENTRY_POINT_RENAME(u_strFindLast) +#define u_strFoldCase U_ICU_ENTRY_POINT_RENAME(u_strFoldCase) +#define u_strFromJavaModifiedUTF8WithSub U_ICU_ENTRY_POINT_RENAME(u_strFromJavaModifiedUTF8WithSub) +#define u_strFromPunycode U_ICU_ENTRY_POINT_RENAME(u_strFromPunycode) +#define u_strFromUTF32 U_ICU_ENTRY_POINT_RENAME(u_strFromUTF32) +#define u_strFromUTF32WithSub U_ICU_ENTRY_POINT_RENAME(u_strFromUTF32WithSub) +#define u_strFromUTF8 U_ICU_ENTRY_POINT_RENAME(u_strFromUTF8) +#define u_strFromUTF8Lenient U_ICU_ENTRY_POINT_RENAME(u_strFromUTF8Lenient) +#define u_strFromUTF8WithSub U_ICU_ENTRY_POINT_RENAME(u_strFromUTF8WithSub) +#define u_strFromWCS U_ICU_ENTRY_POINT_RENAME(u_strFromWCS) +#define u_strHasMoreChar32Than U_ICU_ENTRY_POINT_RENAME(u_strHasMoreChar32Than) +#define u_strToJavaModifiedUTF8 U_ICU_ENTRY_POINT_RENAME(u_strToJavaModifiedUTF8) +#define u_strToLower U_ICU_ENTRY_POINT_RENAME(u_strToLower) +#define u_strToPunycode U_ICU_ENTRY_POINT_RENAME(u_strToPunycode) +#define u_strToTitle U_ICU_ENTRY_POINT_RENAME(u_strToTitle) +#define u_strToUTF32 U_ICU_ENTRY_POINT_RENAME(u_strToUTF32) +#define u_strToUTF32WithSub U_ICU_ENTRY_POINT_RENAME(u_strToUTF32WithSub) +#define u_strToUTF8 U_ICU_ENTRY_POINT_RENAME(u_strToUTF8) +#define u_strToUTF8WithSub U_ICU_ENTRY_POINT_RENAME(u_strToUTF8WithSub) +#define u_strToUpper U_ICU_ENTRY_POINT_RENAME(u_strToUpper) +#define u_strToWCS U_ICU_ENTRY_POINT_RENAME(u_strToWCS) +#define u_strcasecmp U_ICU_ENTRY_POINT_RENAME(u_strcasecmp) +#define u_strcat U_ICU_ENTRY_POINT_RENAME(u_strcat) +#define u_strchr U_ICU_ENTRY_POINT_RENAME(u_strchr) +#define u_strchr32 U_ICU_ENTRY_POINT_RENAME(u_strchr32) +#define u_strcmp U_ICU_ENTRY_POINT_RENAME(u_strcmp) +#define u_strcmpCodePointOrder U_ICU_ENTRY_POINT_RENAME(u_strcmpCodePointOrder) +#define u_strcmpFold U_ICU_ENTRY_POINT_RENAME(u_strcmpFold) +#define u_strcpy U_ICU_ENTRY_POINT_RENAME(u_strcpy) +#define u_strcspn U_ICU_ENTRY_POINT_RENAME(u_strcspn) +#define u_strlen U_ICU_ENTRY_POINT_RENAME(u_strlen) +#define u_strncasecmp U_ICU_ENTRY_POINT_RENAME(u_strncasecmp) +#define u_strncat U_ICU_ENTRY_POINT_RENAME(u_strncat) +#define u_strncmp U_ICU_ENTRY_POINT_RENAME(u_strncmp) +#define u_strncmpCodePointOrder U_ICU_ENTRY_POINT_RENAME(u_strncmpCodePointOrder) +#define u_strncpy U_ICU_ENTRY_POINT_RENAME(u_strncpy) +#define u_strpbrk U_ICU_ENTRY_POINT_RENAME(u_strpbrk) +#define u_strrchr U_ICU_ENTRY_POINT_RENAME(u_strrchr) +#define u_strrchr32 U_ICU_ENTRY_POINT_RENAME(u_strrchr32) +#define u_strrstr U_ICU_ENTRY_POINT_RENAME(u_strrstr) +#define u_strspn U_ICU_ENTRY_POINT_RENAME(u_strspn) +#define u_strstr U_ICU_ENTRY_POINT_RENAME(u_strstr) +#define u_strtok_r U_ICU_ENTRY_POINT_RENAME(u_strtok_r) +#define u_terminateChars U_ICU_ENTRY_POINT_RENAME(u_terminateChars) +#define u_terminateUChar32s U_ICU_ENTRY_POINT_RENAME(u_terminateUChar32s) +#define u_terminateUChars U_ICU_ENTRY_POINT_RENAME(u_terminateUChars) +#define u_terminateWChars U_ICU_ENTRY_POINT_RENAME(u_terminateWChars) +#define u_tolower U_ICU_ENTRY_POINT_RENAME(u_tolower) +#define u_totitle U_ICU_ENTRY_POINT_RENAME(u_totitle) +#define u_toupper U_ICU_ENTRY_POINT_RENAME(u_toupper) +#define u_uastrcpy U_ICU_ENTRY_POINT_RENAME(u_uastrcpy) +#define u_uastrncpy U_ICU_ENTRY_POINT_RENAME(u_uastrncpy) +#define u_unescape U_ICU_ENTRY_POINT_RENAME(u_unescape) +#define u_unescapeAt U_ICU_ENTRY_POINT_RENAME(u_unescapeAt) +#define u_versionFromString U_ICU_ENTRY_POINT_RENAME(u_versionFromString) +#define u_versionFromUString U_ICU_ENTRY_POINT_RENAME(u_versionFromUString) +#define u_versionToString U_ICU_ENTRY_POINT_RENAME(u_versionToString) +#define u_vformatMessage U_ICU_ENTRY_POINT_RENAME(u_vformatMessage) +#define u_vformatMessageWithError U_ICU_ENTRY_POINT_RENAME(u_vformatMessageWithError) +#define u_vfprintf U_ICU_ENTRY_POINT_RENAME(u_vfprintf) +#define u_vfprintf_u U_ICU_ENTRY_POINT_RENAME(u_vfprintf_u) +#define u_vfscanf U_ICU_ENTRY_POINT_RENAME(u_vfscanf) +#define u_vfscanf_u U_ICU_ENTRY_POINT_RENAME(u_vfscanf_u) +#define u_vparseMessage U_ICU_ENTRY_POINT_RENAME(u_vparseMessage) +#define u_vparseMessageWithError U_ICU_ENTRY_POINT_RENAME(u_vparseMessageWithError) +#define u_vsnprintf U_ICU_ENTRY_POINT_RENAME(u_vsnprintf) +#define u_vsnprintf_u U_ICU_ENTRY_POINT_RENAME(u_vsnprintf_u) +#define u_vsprintf U_ICU_ENTRY_POINT_RENAME(u_vsprintf) +#define u_vsprintf_u U_ICU_ENTRY_POINT_RENAME(u_vsprintf_u) +#define u_vsscanf U_ICU_ENTRY_POINT_RENAME(u_vsscanf) +#define u_vsscanf_u U_ICU_ENTRY_POINT_RENAME(u_vsscanf_u) +#define u_writeIdenticalLevelRun U_ICU_ENTRY_POINT_RENAME(u_writeIdenticalLevelRun) +#define ubidi_addPropertyStarts U_ICU_ENTRY_POINT_RENAME(ubidi_addPropertyStarts) +#define ubidi_close U_ICU_ENTRY_POINT_RENAME(ubidi_close) +#define ubidi_countParagraphs U_ICU_ENTRY_POINT_RENAME(ubidi_countParagraphs) +#define ubidi_countRuns U_ICU_ENTRY_POINT_RENAME(ubidi_countRuns) +#define ubidi_getBaseDirection U_ICU_ENTRY_POINT_RENAME(ubidi_getBaseDirection) +#define ubidi_getClass U_ICU_ENTRY_POINT_RENAME(ubidi_getClass) +#define ubidi_getClassCallback U_ICU_ENTRY_POINT_RENAME(ubidi_getClassCallback) +#define ubidi_getCustomizedClass U_ICU_ENTRY_POINT_RENAME(ubidi_getCustomizedClass) +#define ubidi_getDirection U_ICU_ENTRY_POINT_RENAME(ubidi_getDirection) +#define ubidi_getJoiningGroup U_ICU_ENTRY_POINT_RENAME(ubidi_getJoiningGroup) +#define ubidi_getJoiningType U_ICU_ENTRY_POINT_RENAME(ubidi_getJoiningType) +#define ubidi_getLength U_ICU_ENTRY_POINT_RENAME(ubidi_getLength) +#define ubidi_getLevelAt U_ICU_ENTRY_POINT_RENAME(ubidi_getLevelAt) +#define ubidi_getLevels U_ICU_ENTRY_POINT_RENAME(ubidi_getLevels) +#define ubidi_getLogicalIndex U_ICU_ENTRY_POINT_RENAME(ubidi_getLogicalIndex) +#define ubidi_getLogicalMap U_ICU_ENTRY_POINT_RENAME(ubidi_getLogicalMap) +#define ubidi_getLogicalRun U_ICU_ENTRY_POINT_RENAME(ubidi_getLogicalRun) +#define ubidi_getMaxValue U_ICU_ENTRY_POINT_RENAME(ubidi_getMaxValue) +#define ubidi_getMemory U_ICU_ENTRY_POINT_RENAME(ubidi_getMemory) +#define ubidi_getMirror U_ICU_ENTRY_POINT_RENAME(ubidi_getMirror) +#define ubidi_getPairedBracket U_ICU_ENTRY_POINT_RENAME(ubidi_getPairedBracket) +#define ubidi_getPairedBracketType U_ICU_ENTRY_POINT_RENAME(ubidi_getPairedBracketType) +#define ubidi_getParaLevel U_ICU_ENTRY_POINT_RENAME(ubidi_getParaLevel) +#define ubidi_getParaLevelAtIndex U_ICU_ENTRY_POINT_RENAME(ubidi_getParaLevelAtIndex) +#define ubidi_getParagraph U_ICU_ENTRY_POINT_RENAME(ubidi_getParagraph) +#define ubidi_getParagraphByIndex U_ICU_ENTRY_POINT_RENAME(ubidi_getParagraphByIndex) +#define ubidi_getProcessedLength U_ICU_ENTRY_POINT_RENAME(ubidi_getProcessedLength) +#define ubidi_getReorderingMode U_ICU_ENTRY_POINT_RENAME(ubidi_getReorderingMode) +#define ubidi_getReorderingOptions U_ICU_ENTRY_POINT_RENAME(ubidi_getReorderingOptions) +#define ubidi_getResultLength U_ICU_ENTRY_POINT_RENAME(ubidi_getResultLength) +#define ubidi_getRuns U_ICU_ENTRY_POINT_RENAME(ubidi_getRuns) +#define ubidi_getText U_ICU_ENTRY_POINT_RENAME(ubidi_getText) +#define ubidi_getVisualIndex U_ICU_ENTRY_POINT_RENAME(ubidi_getVisualIndex) +#define ubidi_getVisualMap U_ICU_ENTRY_POINT_RENAME(ubidi_getVisualMap) +#define ubidi_getVisualRun U_ICU_ENTRY_POINT_RENAME(ubidi_getVisualRun) +#define ubidi_invertMap U_ICU_ENTRY_POINT_RENAME(ubidi_invertMap) +#define ubidi_isBidiControl U_ICU_ENTRY_POINT_RENAME(ubidi_isBidiControl) +#define ubidi_isInverse U_ICU_ENTRY_POINT_RENAME(ubidi_isInverse) +#define ubidi_isJoinControl U_ICU_ENTRY_POINT_RENAME(ubidi_isJoinControl) +#define ubidi_isMirrored U_ICU_ENTRY_POINT_RENAME(ubidi_isMirrored) +#define ubidi_isOrderParagraphsLTR U_ICU_ENTRY_POINT_RENAME(ubidi_isOrderParagraphsLTR) +#define ubidi_open U_ICU_ENTRY_POINT_RENAME(ubidi_open) +#define ubidi_openSized U_ICU_ENTRY_POINT_RENAME(ubidi_openSized) +#define ubidi_orderParagraphsLTR U_ICU_ENTRY_POINT_RENAME(ubidi_orderParagraphsLTR) +#define ubidi_reorderLogical U_ICU_ENTRY_POINT_RENAME(ubidi_reorderLogical) +#define ubidi_reorderVisual U_ICU_ENTRY_POINT_RENAME(ubidi_reorderVisual) +#define ubidi_setClassCallback U_ICU_ENTRY_POINT_RENAME(ubidi_setClassCallback) +#define ubidi_setContext U_ICU_ENTRY_POINT_RENAME(ubidi_setContext) +#define ubidi_setInverse U_ICU_ENTRY_POINT_RENAME(ubidi_setInverse) +#define ubidi_setLine U_ICU_ENTRY_POINT_RENAME(ubidi_setLine) +#define ubidi_setPara U_ICU_ENTRY_POINT_RENAME(ubidi_setPara) +#define ubidi_setReorderingMode U_ICU_ENTRY_POINT_RENAME(ubidi_setReorderingMode) +#define ubidi_setReorderingOptions U_ICU_ENTRY_POINT_RENAME(ubidi_setReorderingOptions) +#define ubidi_writeReordered U_ICU_ENTRY_POINT_RENAME(ubidi_writeReordered) +#define ubidi_writeReverse U_ICU_ENTRY_POINT_RENAME(ubidi_writeReverse) +#define ubiditransform_close U_ICU_ENTRY_POINT_RENAME(ubiditransform_close) +#define ubiditransform_open U_ICU_ENTRY_POINT_RENAME(ubiditransform_open) +#define ubiditransform_transform U_ICU_ENTRY_POINT_RENAME(ubiditransform_transform) +#define ublock_getCode U_ICU_ENTRY_POINT_RENAME(ublock_getCode) +#define ubrk_close U_ICU_ENTRY_POINT_RENAME(ubrk_close) +#define ubrk_countAvailable U_ICU_ENTRY_POINT_RENAME(ubrk_countAvailable) +#define ubrk_current U_ICU_ENTRY_POINT_RENAME(ubrk_current) +#define ubrk_first U_ICU_ENTRY_POINT_RENAME(ubrk_first) +#define ubrk_following U_ICU_ENTRY_POINT_RENAME(ubrk_following) +#define ubrk_getAvailable U_ICU_ENTRY_POINT_RENAME(ubrk_getAvailable) +#define ubrk_getBinaryRules U_ICU_ENTRY_POINT_RENAME(ubrk_getBinaryRules) +#define ubrk_getLocaleByType U_ICU_ENTRY_POINT_RENAME(ubrk_getLocaleByType) +#define ubrk_getRuleStatus U_ICU_ENTRY_POINT_RENAME(ubrk_getRuleStatus) +#define ubrk_getRuleStatusVec U_ICU_ENTRY_POINT_RENAME(ubrk_getRuleStatusVec) +#define ubrk_isBoundary U_ICU_ENTRY_POINT_RENAME(ubrk_isBoundary) +#define ubrk_last U_ICU_ENTRY_POINT_RENAME(ubrk_last) +#define ubrk_next U_ICU_ENTRY_POINT_RENAME(ubrk_next) +#define ubrk_open U_ICU_ENTRY_POINT_RENAME(ubrk_open) +#define ubrk_openBinaryRules U_ICU_ENTRY_POINT_RENAME(ubrk_openBinaryRules) +#define ubrk_openRules U_ICU_ENTRY_POINT_RENAME(ubrk_openRules) +#define ubrk_preceding U_ICU_ENTRY_POINT_RENAME(ubrk_preceding) +#define ubrk_previous U_ICU_ENTRY_POINT_RENAME(ubrk_previous) +#define ubrk_refreshUText U_ICU_ENTRY_POINT_RENAME(ubrk_refreshUText) +#define ubrk_safeClone U_ICU_ENTRY_POINT_RENAME(ubrk_safeClone) +#define ubrk_setText U_ICU_ENTRY_POINT_RENAME(ubrk_setText) +#define ubrk_setUText U_ICU_ENTRY_POINT_RENAME(ubrk_setUText) +#define ubrk_swap U_ICU_ENTRY_POINT_RENAME(ubrk_swap) +#define ucache_compareKeys U_ICU_ENTRY_POINT_RENAME(ucache_compareKeys) +#define ucache_deleteKey U_ICU_ENTRY_POINT_RENAME(ucache_deleteKey) +#define ucache_hashKeys U_ICU_ENTRY_POINT_RENAME(ucache_hashKeys) +#define ucal_add U_ICU_ENTRY_POINT_RENAME(ucal_add) +#define ucal_clear U_ICU_ENTRY_POINT_RENAME(ucal_clear) +#define ucal_clearField U_ICU_ENTRY_POINT_RENAME(ucal_clearField) +#define ucal_clone U_ICU_ENTRY_POINT_RENAME(ucal_clone) +#define ucal_close U_ICU_ENTRY_POINT_RENAME(ucal_close) +#define ucal_countAvailable U_ICU_ENTRY_POINT_RENAME(ucal_countAvailable) +#define ucal_equivalentTo U_ICU_ENTRY_POINT_RENAME(ucal_equivalentTo) +#define ucal_get U_ICU_ENTRY_POINT_RENAME(ucal_get) +#define ucal_getAttribute U_ICU_ENTRY_POINT_RENAME(ucal_getAttribute) +#define ucal_getAvailable U_ICU_ENTRY_POINT_RENAME(ucal_getAvailable) +#define ucal_getCanonicalTimeZoneID U_ICU_ENTRY_POINT_RENAME(ucal_getCanonicalTimeZoneID) +#define ucal_getDSTSavings U_ICU_ENTRY_POINT_RENAME(ucal_getDSTSavings) +#define ucal_getDayOfWeekType U_ICU_ENTRY_POINT_RENAME(ucal_getDayOfWeekType) +#define ucal_getDefaultTimeZone U_ICU_ENTRY_POINT_RENAME(ucal_getDefaultTimeZone) +#define ucal_getFieldDifference U_ICU_ENTRY_POINT_RENAME(ucal_getFieldDifference) +#define ucal_getGregorianChange U_ICU_ENTRY_POINT_RENAME(ucal_getGregorianChange) +#define ucal_getHostTimeZone U_ICU_ENTRY_POINT_RENAME(ucal_getHostTimeZone) +#define ucal_getKeywordValuesForLocale U_ICU_ENTRY_POINT_RENAME(ucal_getKeywordValuesForLocale) +#define ucal_getLimit U_ICU_ENTRY_POINT_RENAME(ucal_getLimit) +#define ucal_getLocaleByType U_ICU_ENTRY_POINT_RENAME(ucal_getLocaleByType) +#define ucal_getMillis U_ICU_ENTRY_POINT_RENAME(ucal_getMillis) +#define ucal_getNow U_ICU_ENTRY_POINT_RENAME(ucal_getNow) +#define ucal_getTZDataVersion U_ICU_ENTRY_POINT_RENAME(ucal_getTZDataVersion) +#define ucal_getTimeZoneDisplayName U_ICU_ENTRY_POINT_RENAME(ucal_getTimeZoneDisplayName) +#define ucal_getTimeZoneID U_ICU_ENTRY_POINT_RENAME(ucal_getTimeZoneID) +#define ucal_getTimeZoneIDForWindowsID U_ICU_ENTRY_POINT_RENAME(ucal_getTimeZoneIDForWindowsID) +#define ucal_getTimeZoneTransitionDate U_ICU_ENTRY_POINT_RENAME(ucal_getTimeZoneTransitionDate) +#define ucal_getType U_ICU_ENTRY_POINT_RENAME(ucal_getType) +#define ucal_getWeekendTransition U_ICU_ENTRY_POINT_RENAME(ucal_getWeekendTransition) +#define ucal_getWindowsTimeZoneID U_ICU_ENTRY_POINT_RENAME(ucal_getWindowsTimeZoneID) +#define ucal_inDaylightTime U_ICU_ENTRY_POINT_RENAME(ucal_inDaylightTime) +#define ucal_isSet U_ICU_ENTRY_POINT_RENAME(ucal_isSet) +#define ucal_isWeekend U_ICU_ENTRY_POINT_RENAME(ucal_isWeekend) +#define ucal_open U_ICU_ENTRY_POINT_RENAME(ucal_open) +#define ucal_openCountryTimeZones U_ICU_ENTRY_POINT_RENAME(ucal_openCountryTimeZones) +#define ucal_openTimeZoneIDEnumeration U_ICU_ENTRY_POINT_RENAME(ucal_openTimeZoneIDEnumeration) +#define ucal_openTimeZones U_ICU_ENTRY_POINT_RENAME(ucal_openTimeZones) +#define ucal_roll U_ICU_ENTRY_POINT_RENAME(ucal_roll) +#define ucal_set U_ICU_ENTRY_POINT_RENAME(ucal_set) +#define ucal_setAttribute U_ICU_ENTRY_POINT_RENAME(ucal_setAttribute) +#define ucal_setDate U_ICU_ENTRY_POINT_RENAME(ucal_setDate) +#define ucal_setDateTime U_ICU_ENTRY_POINT_RENAME(ucal_setDateTime) +#define ucal_setDefaultTimeZone U_ICU_ENTRY_POINT_RENAME(ucal_setDefaultTimeZone) +#define ucal_setGregorianChange U_ICU_ENTRY_POINT_RENAME(ucal_setGregorianChange) +#define ucal_setMillis U_ICU_ENTRY_POINT_RENAME(ucal_setMillis) +#define ucal_setTimeZone U_ICU_ENTRY_POINT_RENAME(ucal_setTimeZone) +#define ucase_addCaseClosure U_ICU_ENTRY_POINT_RENAME(ucase_addCaseClosure) +#define ucase_addPropertyStarts U_ICU_ENTRY_POINT_RENAME(ucase_addPropertyStarts) +#define ucase_addStringCaseClosure U_ICU_ENTRY_POINT_RENAME(ucase_addStringCaseClosure) +#define ucase_fold U_ICU_ENTRY_POINT_RENAME(ucase_fold) +#define ucase_getCaseLocale U_ICU_ENTRY_POINT_RENAME(ucase_getCaseLocale) +#define ucase_getTrie U_ICU_ENTRY_POINT_RENAME(ucase_getTrie) +#define ucase_getType U_ICU_ENTRY_POINT_RENAME(ucase_getType) +#define ucase_getTypeOrIgnorable U_ICU_ENTRY_POINT_RENAME(ucase_getTypeOrIgnorable) +#define ucase_hasBinaryProperty U_ICU_ENTRY_POINT_RENAME(ucase_hasBinaryProperty) +#define ucase_isCaseSensitive U_ICU_ENTRY_POINT_RENAME(ucase_isCaseSensitive) +#define ucase_isSoftDotted U_ICU_ENTRY_POINT_RENAME(ucase_isSoftDotted) +#define ucase_toFullFolding U_ICU_ENTRY_POINT_RENAME(ucase_toFullFolding) +#define ucase_toFullLower U_ICU_ENTRY_POINT_RENAME(ucase_toFullLower) +#define ucase_toFullTitle U_ICU_ENTRY_POINT_RENAME(ucase_toFullTitle) +#define ucase_toFullUpper U_ICU_ENTRY_POINT_RENAME(ucase_toFullUpper) +#define ucase_tolower U_ICU_ENTRY_POINT_RENAME(ucase_tolower) +#define ucase_totitle U_ICU_ENTRY_POINT_RENAME(ucase_totitle) +#define ucase_toupper U_ICU_ENTRY_POINT_RENAME(ucase_toupper) +#define ucasemap_close U_ICU_ENTRY_POINT_RENAME(ucasemap_close) +#define ucasemap_getBreakIterator U_ICU_ENTRY_POINT_RENAME(ucasemap_getBreakIterator) +#define ucasemap_getLocale U_ICU_ENTRY_POINT_RENAME(ucasemap_getLocale) +#define ucasemap_getOptions U_ICU_ENTRY_POINT_RENAME(ucasemap_getOptions) +#define ucasemap_internalUTF8ToTitle U_ICU_ENTRY_POINT_RENAME(ucasemap_internalUTF8ToTitle) +#define ucasemap_open U_ICU_ENTRY_POINT_RENAME(ucasemap_open) +#define ucasemap_setBreakIterator U_ICU_ENTRY_POINT_RENAME(ucasemap_setBreakIterator) +#define ucasemap_setLocale U_ICU_ENTRY_POINT_RENAME(ucasemap_setLocale) +#define ucasemap_setOptions U_ICU_ENTRY_POINT_RENAME(ucasemap_setOptions) +#define ucasemap_toTitle U_ICU_ENTRY_POINT_RENAME(ucasemap_toTitle) +#define ucasemap_utf8FoldCase U_ICU_ENTRY_POINT_RENAME(ucasemap_utf8FoldCase) +#define ucasemap_utf8ToLower U_ICU_ENTRY_POINT_RENAME(ucasemap_utf8ToLower) +#define ucasemap_utf8ToTitle U_ICU_ENTRY_POINT_RENAME(ucasemap_utf8ToTitle) +#define ucasemap_utf8ToUpper U_ICU_ENTRY_POINT_RENAME(ucasemap_utf8ToUpper) +#define ucfpos_close U_ICU_ENTRY_POINT_RENAME(ucfpos_close) +#define ucfpos_constrainCategory U_ICU_ENTRY_POINT_RENAME(ucfpos_constrainCategory) +#define ucfpos_constrainField U_ICU_ENTRY_POINT_RENAME(ucfpos_constrainField) +#define ucfpos_getCategory U_ICU_ENTRY_POINT_RENAME(ucfpos_getCategory) +#define ucfpos_getField U_ICU_ENTRY_POINT_RENAME(ucfpos_getField) +#define ucfpos_getIndexes U_ICU_ENTRY_POINT_RENAME(ucfpos_getIndexes) +#define ucfpos_getInt64IterationContext U_ICU_ENTRY_POINT_RENAME(ucfpos_getInt64IterationContext) +#define ucfpos_matchesField U_ICU_ENTRY_POINT_RENAME(ucfpos_matchesField) +#define ucfpos_open U_ICU_ENTRY_POINT_RENAME(ucfpos_open) +#define ucfpos_reset U_ICU_ENTRY_POINT_RENAME(ucfpos_reset) +#define ucfpos_setInt64IterationContext U_ICU_ENTRY_POINT_RENAME(ucfpos_setInt64IterationContext) +#define ucfpos_setState U_ICU_ENTRY_POINT_RENAME(ucfpos_setState) +#define uchar_addPropertyStarts U_ICU_ENTRY_POINT_RENAME(uchar_addPropertyStarts) +#define uchar_swapNames U_ICU_ENTRY_POINT_RENAME(uchar_swapNames) +#define ucln_cleanupOne U_ICU_ENTRY_POINT_RENAME(ucln_cleanupOne) +#define ucln_common_registerCleanup U_ICU_ENTRY_POINT_RENAME(ucln_common_registerCleanup) +#define ucln_i18n_registerCleanup U_ICU_ENTRY_POINT_RENAME(ucln_i18n_registerCleanup) +#define ucln_io_registerCleanup U_ICU_ENTRY_POINT_RENAME(ucln_io_registerCleanup) +#define ucln_lib_cleanup U_ICU_ENTRY_POINT_RENAME(ucln_lib_cleanup) +#define ucln_registerCleanup U_ICU_ENTRY_POINT_RENAME(ucln_registerCleanup) +#define ucnv_MBCSFromUChar32 U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSFromUChar32) +#define ucnv_MBCSFromUnicodeWithOffsets U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSFromUnicodeWithOffsets) +#define ucnv_MBCSGetFilteredUnicodeSetForUnicode U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSGetFilteredUnicodeSetForUnicode) +#define ucnv_MBCSGetType U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSGetType) +#define ucnv_MBCSGetUnicodeSetForUnicode U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSGetUnicodeSetForUnicode) +#define ucnv_MBCSIsLeadByte U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSIsLeadByte) +#define ucnv_MBCSSimpleGetNextUChar U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSSimpleGetNextUChar) +#define ucnv_MBCSToUnicodeWithOffsets U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSToUnicodeWithOffsets) +#define ucnv_bld_countAvailableConverters U_ICU_ENTRY_POINT_RENAME(ucnv_bld_countAvailableConverters) +#define ucnv_bld_getAvailableConverter U_ICU_ENTRY_POINT_RENAME(ucnv_bld_getAvailableConverter) +#define ucnv_canCreateConverter U_ICU_ENTRY_POINT_RENAME(ucnv_canCreateConverter) +#define ucnv_cbFromUWriteBytes U_ICU_ENTRY_POINT_RENAME(ucnv_cbFromUWriteBytes) +#define ucnv_cbFromUWriteSub U_ICU_ENTRY_POINT_RENAME(ucnv_cbFromUWriteSub) +#define ucnv_cbFromUWriteUChars U_ICU_ENTRY_POINT_RENAME(ucnv_cbFromUWriteUChars) +#define ucnv_cbToUWriteSub U_ICU_ENTRY_POINT_RENAME(ucnv_cbToUWriteSub) +#define ucnv_cbToUWriteUChars U_ICU_ENTRY_POINT_RENAME(ucnv_cbToUWriteUChars) +#define ucnv_close U_ICU_ENTRY_POINT_RENAME(ucnv_close) +#define ucnv_compareNames U_ICU_ENTRY_POINT_RENAME(ucnv_compareNames) +#define ucnv_convert U_ICU_ENTRY_POINT_RENAME(ucnv_convert) +#define ucnv_convertEx U_ICU_ENTRY_POINT_RENAME(ucnv_convertEx) +#define ucnv_countAliases U_ICU_ENTRY_POINT_RENAME(ucnv_countAliases) +#define ucnv_countAvailable U_ICU_ENTRY_POINT_RENAME(ucnv_countAvailable) +#define ucnv_countStandards U_ICU_ENTRY_POINT_RENAME(ucnv_countStandards) +#define ucnv_createAlgorithmicConverter U_ICU_ENTRY_POINT_RENAME(ucnv_createAlgorithmicConverter) +#define ucnv_createConverter U_ICU_ENTRY_POINT_RENAME(ucnv_createConverter) +#define ucnv_createConverterFromPackage U_ICU_ENTRY_POINT_RENAME(ucnv_createConverterFromPackage) +#define ucnv_createConverterFromSharedData U_ICU_ENTRY_POINT_RENAME(ucnv_createConverterFromSharedData) +#define ucnv_detectUnicodeSignature U_ICU_ENTRY_POINT_RENAME(ucnv_detectUnicodeSignature) +#define ucnv_enableCleanup U_ICU_ENTRY_POINT_RENAME(ucnv_enableCleanup) +#define ucnv_extContinueMatchFromU U_ICU_ENTRY_POINT_RENAME(ucnv_extContinueMatchFromU) +#define ucnv_extContinueMatchToU U_ICU_ENTRY_POINT_RENAME(ucnv_extContinueMatchToU) +#define ucnv_extGetUnicodeSet U_ICU_ENTRY_POINT_RENAME(ucnv_extGetUnicodeSet) +#define ucnv_extInitialMatchFromU U_ICU_ENTRY_POINT_RENAME(ucnv_extInitialMatchFromU) +#define ucnv_extInitialMatchToU U_ICU_ENTRY_POINT_RENAME(ucnv_extInitialMatchToU) +#define ucnv_extSimpleMatchFromU U_ICU_ENTRY_POINT_RENAME(ucnv_extSimpleMatchFromU) +#define ucnv_extSimpleMatchToU U_ICU_ENTRY_POINT_RENAME(ucnv_extSimpleMatchToU) +#define ucnv_fixFileSeparator U_ICU_ENTRY_POINT_RENAME(ucnv_fixFileSeparator) +#define ucnv_flushCache U_ICU_ENTRY_POINT_RENAME(ucnv_flushCache) +#define ucnv_fromAlgorithmic U_ICU_ENTRY_POINT_RENAME(ucnv_fromAlgorithmic) +#define ucnv_fromUChars U_ICU_ENTRY_POINT_RENAME(ucnv_fromUChars) +#define ucnv_fromUCountPending U_ICU_ENTRY_POINT_RENAME(ucnv_fromUCountPending) +#define ucnv_fromUWriteBytes U_ICU_ENTRY_POINT_RENAME(ucnv_fromUWriteBytes) +#define ucnv_fromUnicode U_ICU_ENTRY_POINT_RENAME(ucnv_fromUnicode) +#define ucnv_fromUnicode_UTF8 U_ICU_ENTRY_POINT_RENAME(ucnv_fromUnicode_UTF8) +#define ucnv_fromUnicode_UTF8_OFFSETS_LOGIC U_ICU_ENTRY_POINT_RENAME(ucnv_fromUnicode_UTF8_OFFSETS_LOGIC) +#define ucnv_getAlias U_ICU_ENTRY_POINT_RENAME(ucnv_getAlias) +#define ucnv_getAliases U_ICU_ENTRY_POINT_RENAME(ucnv_getAliases) +#define ucnv_getAvailableName U_ICU_ENTRY_POINT_RENAME(ucnv_getAvailableName) +#define ucnv_getCCSID U_ICU_ENTRY_POINT_RENAME(ucnv_getCCSID) +#define ucnv_getCanonicalName U_ICU_ENTRY_POINT_RENAME(ucnv_getCanonicalName) +#define ucnv_getCompleteUnicodeSet U_ICU_ENTRY_POINT_RENAME(ucnv_getCompleteUnicodeSet) +#define ucnv_getDefaultName U_ICU_ENTRY_POINT_RENAME(ucnv_getDefaultName) +#define ucnv_getDisplayName U_ICU_ENTRY_POINT_RENAME(ucnv_getDisplayName) +#define ucnv_getFromUCallBack U_ICU_ENTRY_POINT_RENAME(ucnv_getFromUCallBack) +#define ucnv_getInvalidChars U_ICU_ENTRY_POINT_RENAME(ucnv_getInvalidChars) +#define ucnv_getInvalidUChars U_ICU_ENTRY_POINT_RENAME(ucnv_getInvalidUChars) +#define ucnv_getMaxCharSize U_ICU_ENTRY_POINT_RENAME(ucnv_getMaxCharSize) +#define ucnv_getMinCharSize U_ICU_ENTRY_POINT_RENAME(ucnv_getMinCharSize) +#define ucnv_getName U_ICU_ENTRY_POINT_RENAME(ucnv_getName) +#define ucnv_getNextUChar U_ICU_ENTRY_POINT_RENAME(ucnv_getNextUChar) +#define ucnv_getNonSurrogateUnicodeSet U_ICU_ENTRY_POINT_RENAME(ucnv_getNonSurrogateUnicodeSet) +#define ucnv_getPlatform U_ICU_ENTRY_POINT_RENAME(ucnv_getPlatform) +#define ucnv_getStandard U_ICU_ENTRY_POINT_RENAME(ucnv_getStandard) +#define ucnv_getStandardName U_ICU_ENTRY_POINT_RENAME(ucnv_getStandardName) +#define ucnv_getStarters U_ICU_ENTRY_POINT_RENAME(ucnv_getStarters) +#define ucnv_getSubstChars U_ICU_ENTRY_POINT_RENAME(ucnv_getSubstChars) +#define ucnv_getToUCallBack U_ICU_ENTRY_POINT_RENAME(ucnv_getToUCallBack) +#define ucnv_getType U_ICU_ENTRY_POINT_RENAME(ucnv_getType) +#define ucnv_getUnicodeSet U_ICU_ENTRY_POINT_RENAME(ucnv_getUnicodeSet) +#define ucnv_incrementRefCount U_ICU_ENTRY_POINT_RENAME(ucnv_incrementRefCount) +#define ucnv_io_countKnownConverters U_ICU_ENTRY_POINT_RENAME(ucnv_io_countKnownConverters) +#define ucnv_io_getConverterName U_ICU_ENTRY_POINT_RENAME(ucnv_io_getConverterName) +#define ucnv_io_stripASCIIForCompare U_ICU_ENTRY_POINT_RENAME(ucnv_io_stripASCIIForCompare) +#define ucnv_io_stripEBCDICForCompare U_ICU_ENTRY_POINT_RENAME(ucnv_io_stripEBCDICForCompare) +#define ucnv_isAmbiguous U_ICU_ENTRY_POINT_RENAME(ucnv_isAmbiguous) +#define ucnv_isFixedWidth U_ICU_ENTRY_POINT_RENAME(ucnv_isFixedWidth) +#define ucnv_load U_ICU_ENTRY_POINT_RENAME(ucnv_load) +#define ucnv_loadSharedData U_ICU_ENTRY_POINT_RENAME(ucnv_loadSharedData) +#define ucnv_open U_ICU_ENTRY_POINT_RENAME(ucnv_open) +#define ucnv_openAllNames U_ICU_ENTRY_POINT_RENAME(ucnv_openAllNames) +#define ucnv_openCCSID U_ICU_ENTRY_POINT_RENAME(ucnv_openCCSID) +#define ucnv_openPackage U_ICU_ENTRY_POINT_RENAME(ucnv_openPackage) +#define ucnv_openStandardNames U_ICU_ENTRY_POINT_RENAME(ucnv_openStandardNames) +#define ucnv_openU U_ICU_ENTRY_POINT_RENAME(ucnv_openU) +#define ucnv_reset U_ICU_ENTRY_POINT_RENAME(ucnv_reset) +#define ucnv_resetFromUnicode U_ICU_ENTRY_POINT_RENAME(ucnv_resetFromUnicode) +#define ucnv_resetToUnicode U_ICU_ENTRY_POINT_RENAME(ucnv_resetToUnicode) +#define ucnv_safeClone U_ICU_ENTRY_POINT_RENAME(ucnv_safeClone) +#define ucnv_setDefaultName U_ICU_ENTRY_POINT_RENAME(ucnv_setDefaultName) +#define ucnv_setFallback U_ICU_ENTRY_POINT_RENAME(ucnv_setFallback) +#define ucnv_setFromUCallBack U_ICU_ENTRY_POINT_RENAME(ucnv_setFromUCallBack) +#define ucnv_setSubstChars U_ICU_ENTRY_POINT_RENAME(ucnv_setSubstChars) +#define ucnv_setSubstString U_ICU_ENTRY_POINT_RENAME(ucnv_setSubstString) +#define ucnv_setToUCallBack U_ICU_ENTRY_POINT_RENAME(ucnv_setToUCallBack) +#define ucnv_swap U_ICU_ENTRY_POINT_RENAME(ucnv_swap) +#define ucnv_swapAliases U_ICU_ENTRY_POINT_RENAME(ucnv_swapAliases) +#define ucnv_toAlgorithmic U_ICU_ENTRY_POINT_RENAME(ucnv_toAlgorithmic) +#define ucnv_toUChars U_ICU_ENTRY_POINT_RENAME(ucnv_toUChars) +#define ucnv_toUCountPending U_ICU_ENTRY_POINT_RENAME(ucnv_toUCountPending) +#define ucnv_toUWriteCodePoint U_ICU_ENTRY_POINT_RENAME(ucnv_toUWriteCodePoint) +#define ucnv_toUWriteUChars U_ICU_ENTRY_POINT_RENAME(ucnv_toUWriteUChars) +#define ucnv_toUnicode U_ICU_ENTRY_POINT_RENAME(ucnv_toUnicode) +#define ucnv_unload U_ICU_ENTRY_POINT_RENAME(ucnv_unload) +#define ucnv_unloadSharedDataIfReady U_ICU_ENTRY_POINT_RENAME(ucnv_unloadSharedDataIfReady) +#define ucnv_usesFallback U_ICU_ENTRY_POINT_RENAME(ucnv_usesFallback) +#define ucnvsel_close U_ICU_ENTRY_POINT_RENAME(ucnvsel_close) +#define ucnvsel_open U_ICU_ENTRY_POINT_RENAME(ucnvsel_open) +#define ucnvsel_openFromSerialized U_ICU_ENTRY_POINT_RENAME(ucnvsel_openFromSerialized) +#define ucnvsel_selectForString U_ICU_ENTRY_POINT_RENAME(ucnvsel_selectForString) +#define ucnvsel_selectForUTF8 U_ICU_ENTRY_POINT_RENAME(ucnvsel_selectForUTF8) +#define ucnvsel_serialize U_ICU_ENTRY_POINT_RENAME(ucnvsel_serialize) +#define ucol_cloneBinary U_ICU_ENTRY_POINT_RENAME(ucol_cloneBinary) +#define ucol_close U_ICU_ENTRY_POINT_RENAME(ucol_close) +#define ucol_closeElements U_ICU_ENTRY_POINT_RENAME(ucol_closeElements) +#define ucol_countAvailable U_ICU_ENTRY_POINT_RENAME(ucol_countAvailable) +#define ucol_equal U_ICU_ENTRY_POINT_RENAME(ucol_equal) +#define ucol_equals U_ICU_ENTRY_POINT_RENAME(ucol_equals) +#define ucol_getAttribute U_ICU_ENTRY_POINT_RENAME(ucol_getAttribute) +#define ucol_getAvailable U_ICU_ENTRY_POINT_RENAME(ucol_getAvailable) +#define ucol_getBound U_ICU_ENTRY_POINT_RENAME(ucol_getBound) +#define ucol_getContractions U_ICU_ENTRY_POINT_RENAME(ucol_getContractions) +#define ucol_getContractionsAndExpansions U_ICU_ENTRY_POINT_RENAME(ucol_getContractionsAndExpansions) +#define ucol_getDisplayName U_ICU_ENTRY_POINT_RENAME(ucol_getDisplayName) +#define ucol_getEquivalentReorderCodes U_ICU_ENTRY_POINT_RENAME(ucol_getEquivalentReorderCodes) +#define ucol_getFunctionalEquivalent U_ICU_ENTRY_POINT_RENAME(ucol_getFunctionalEquivalent) +#define ucol_getKeywordValues U_ICU_ENTRY_POINT_RENAME(ucol_getKeywordValues) +#define ucol_getKeywordValuesForLocale U_ICU_ENTRY_POINT_RENAME(ucol_getKeywordValuesForLocale) +#define ucol_getKeywords U_ICU_ENTRY_POINT_RENAME(ucol_getKeywords) +#define ucol_getLocale U_ICU_ENTRY_POINT_RENAME(ucol_getLocale) +#define ucol_getLocaleByType U_ICU_ENTRY_POINT_RENAME(ucol_getLocaleByType) +#define ucol_getMaxExpansion U_ICU_ENTRY_POINT_RENAME(ucol_getMaxExpansion) +#define ucol_getMaxVariable U_ICU_ENTRY_POINT_RENAME(ucol_getMaxVariable) +#define ucol_getOffset U_ICU_ENTRY_POINT_RENAME(ucol_getOffset) +#define ucol_getReorderCodes U_ICU_ENTRY_POINT_RENAME(ucol_getReorderCodes) +#define ucol_getRules U_ICU_ENTRY_POINT_RENAME(ucol_getRules) +#define ucol_getRulesEx U_ICU_ENTRY_POINT_RENAME(ucol_getRulesEx) +#define ucol_getShortDefinitionString U_ICU_ENTRY_POINT_RENAME(ucol_getShortDefinitionString) +#define ucol_getSortKey U_ICU_ENTRY_POINT_RENAME(ucol_getSortKey) +#define ucol_getStrength U_ICU_ENTRY_POINT_RENAME(ucol_getStrength) +#define ucol_getTailoredSet U_ICU_ENTRY_POINT_RENAME(ucol_getTailoredSet) +#define ucol_getUCAVersion U_ICU_ENTRY_POINT_RENAME(ucol_getUCAVersion) +#define ucol_getUnsafeSet U_ICU_ENTRY_POINT_RENAME(ucol_getUnsafeSet) +#define ucol_getVariableTop U_ICU_ENTRY_POINT_RENAME(ucol_getVariableTop) +#define ucol_getVersion U_ICU_ENTRY_POINT_RENAME(ucol_getVersion) +#define ucol_greater U_ICU_ENTRY_POINT_RENAME(ucol_greater) +#define ucol_greaterOrEqual U_ICU_ENTRY_POINT_RENAME(ucol_greaterOrEqual) +#define ucol_keyHashCode U_ICU_ENTRY_POINT_RENAME(ucol_keyHashCode) +#define ucol_looksLikeCollationBinary U_ICU_ENTRY_POINT_RENAME(ucol_looksLikeCollationBinary) +#define ucol_mergeSortkeys U_ICU_ENTRY_POINT_RENAME(ucol_mergeSortkeys) +#define ucol_next U_ICU_ENTRY_POINT_RENAME(ucol_next) +#define ucol_nextSortKeyPart U_ICU_ENTRY_POINT_RENAME(ucol_nextSortKeyPart) +#define ucol_normalizeShortDefinitionString U_ICU_ENTRY_POINT_RENAME(ucol_normalizeShortDefinitionString) +#define ucol_open U_ICU_ENTRY_POINT_RENAME(ucol_open) +#define ucol_openAvailableLocales U_ICU_ENTRY_POINT_RENAME(ucol_openAvailableLocales) +#define ucol_openBinary U_ICU_ENTRY_POINT_RENAME(ucol_openBinary) +#define ucol_openElements U_ICU_ENTRY_POINT_RENAME(ucol_openElements) +#define ucol_openFromShortString U_ICU_ENTRY_POINT_RENAME(ucol_openFromShortString) +#define ucol_openRules U_ICU_ENTRY_POINT_RENAME(ucol_openRules) +#define ucol_prepareShortStringOpen U_ICU_ENTRY_POINT_RENAME(ucol_prepareShortStringOpen) +#define ucol_previous U_ICU_ENTRY_POINT_RENAME(ucol_previous) +#define ucol_primaryOrder U_ICU_ENTRY_POINT_RENAME(ucol_primaryOrder) +#define ucol_reset U_ICU_ENTRY_POINT_RENAME(ucol_reset) +#define ucol_restoreVariableTop U_ICU_ENTRY_POINT_RENAME(ucol_restoreVariableTop) +#define ucol_safeClone U_ICU_ENTRY_POINT_RENAME(ucol_safeClone) +#define ucol_secondaryOrder U_ICU_ENTRY_POINT_RENAME(ucol_secondaryOrder) +#define ucol_setAttribute U_ICU_ENTRY_POINT_RENAME(ucol_setAttribute) +#define ucol_setMaxVariable U_ICU_ENTRY_POINT_RENAME(ucol_setMaxVariable) +#define ucol_setOffset U_ICU_ENTRY_POINT_RENAME(ucol_setOffset) +#define ucol_setReorderCodes U_ICU_ENTRY_POINT_RENAME(ucol_setReorderCodes) +#define ucol_setStrength U_ICU_ENTRY_POINT_RENAME(ucol_setStrength) +#define ucol_setText U_ICU_ENTRY_POINT_RENAME(ucol_setText) +#define ucol_setVariableTop U_ICU_ENTRY_POINT_RENAME(ucol_setVariableTop) +#define ucol_strcoll U_ICU_ENTRY_POINT_RENAME(ucol_strcoll) +#define ucol_strcollIter U_ICU_ENTRY_POINT_RENAME(ucol_strcollIter) +#define ucol_strcollUTF8 U_ICU_ENTRY_POINT_RENAME(ucol_strcollUTF8) +#define ucol_swap U_ICU_ENTRY_POINT_RENAME(ucol_swap) +#define ucol_swapInverseUCA U_ICU_ENTRY_POINT_RENAME(ucol_swapInverseUCA) +#define ucol_tertiaryOrder U_ICU_ENTRY_POINT_RENAME(ucol_tertiaryOrder) +#define ucpmap_get U_ICU_ENTRY_POINT_RENAME(ucpmap_get) +#define ucpmap_getRange U_ICU_ENTRY_POINT_RENAME(ucpmap_getRange) +#define ucptrie_close U_ICU_ENTRY_POINT_RENAME(ucptrie_close) +#define ucptrie_get U_ICU_ENTRY_POINT_RENAME(ucptrie_get) +#define ucptrie_getRange U_ICU_ENTRY_POINT_RENAME(ucptrie_getRange) +#define ucptrie_getType U_ICU_ENTRY_POINT_RENAME(ucptrie_getType) +#define ucptrie_getValueWidth U_ICU_ENTRY_POINT_RENAME(ucptrie_getValueWidth) +#define ucptrie_internalGetRange U_ICU_ENTRY_POINT_RENAME(ucptrie_internalGetRange) +#define ucptrie_internalSmallIndex U_ICU_ENTRY_POINT_RENAME(ucptrie_internalSmallIndex) +#define ucptrie_internalSmallU8Index U_ICU_ENTRY_POINT_RENAME(ucptrie_internalSmallU8Index) +#define ucptrie_internalU8PrevIndex U_ICU_ENTRY_POINT_RENAME(ucptrie_internalU8PrevIndex) +#define ucptrie_openFromBinary U_ICU_ENTRY_POINT_RENAME(ucptrie_openFromBinary) +#define ucptrie_swap U_ICU_ENTRY_POINT_RENAME(ucptrie_swap) +#define ucptrie_toBinary U_ICU_ENTRY_POINT_RENAME(ucptrie_toBinary) +#define ucsdet_close U_ICU_ENTRY_POINT_RENAME(ucsdet_close) +#define ucsdet_detect U_ICU_ENTRY_POINT_RENAME(ucsdet_detect) +#define ucsdet_detectAll U_ICU_ENTRY_POINT_RENAME(ucsdet_detectAll) +#define ucsdet_enableInputFilter U_ICU_ENTRY_POINT_RENAME(ucsdet_enableInputFilter) +#define ucsdet_getAllDetectableCharsets U_ICU_ENTRY_POINT_RENAME(ucsdet_getAllDetectableCharsets) +#define ucsdet_getConfidence U_ICU_ENTRY_POINT_RENAME(ucsdet_getConfidence) +#define ucsdet_getDetectableCharsets U_ICU_ENTRY_POINT_RENAME(ucsdet_getDetectableCharsets) +#define ucsdet_getLanguage U_ICU_ENTRY_POINT_RENAME(ucsdet_getLanguage) +#define ucsdet_getName U_ICU_ENTRY_POINT_RENAME(ucsdet_getName) +#define ucsdet_getUChars U_ICU_ENTRY_POINT_RENAME(ucsdet_getUChars) +#define ucsdet_isInputFilterEnabled U_ICU_ENTRY_POINT_RENAME(ucsdet_isInputFilterEnabled) +#define ucsdet_open U_ICU_ENTRY_POINT_RENAME(ucsdet_open) +#define ucsdet_setDeclaredEncoding U_ICU_ENTRY_POINT_RENAME(ucsdet_setDeclaredEncoding) +#define ucsdet_setDetectableCharset U_ICU_ENTRY_POINT_RENAME(ucsdet_setDetectableCharset) +#define ucsdet_setText U_ICU_ENTRY_POINT_RENAME(ucsdet_setText) +#define ucurr_countCurrencies U_ICU_ENTRY_POINT_RENAME(ucurr_countCurrencies) +#define ucurr_forLocale U_ICU_ENTRY_POINT_RENAME(ucurr_forLocale) +#define ucurr_forLocaleAndDate U_ICU_ENTRY_POINT_RENAME(ucurr_forLocaleAndDate) +#define ucurr_getDefaultFractionDigits U_ICU_ENTRY_POINT_RENAME(ucurr_getDefaultFractionDigits) +#define ucurr_getDefaultFractionDigitsForUsage U_ICU_ENTRY_POINT_RENAME(ucurr_getDefaultFractionDigitsForUsage) +#define ucurr_getKeywordValuesForLocale U_ICU_ENTRY_POINT_RENAME(ucurr_getKeywordValuesForLocale) +#define ucurr_getName U_ICU_ENTRY_POINT_RENAME(ucurr_getName) +#define ucurr_getNumericCode U_ICU_ENTRY_POINT_RENAME(ucurr_getNumericCode) +#define ucurr_getPluralName U_ICU_ENTRY_POINT_RENAME(ucurr_getPluralName) +#define ucurr_getRoundingIncrement U_ICU_ENTRY_POINT_RENAME(ucurr_getRoundingIncrement) +#define ucurr_getRoundingIncrementForUsage U_ICU_ENTRY_POINT_RENAME(ucurr_getRoundingIncrementForUsage) +#define ucurr_isAvailable U_ICU_ENTRY_POINT_RENAME(ucurr_isAvailable) +#define ucurr_openISOCurrencies U_ICU_ENTRY_POINT_RENAME(ucurr_openISOCurrencies) +#define ucurr_register U_ICU_ENTRY_POINT_RENAME(ucurr_register) +#define ucurr_unregister U_ICU_ENTRY_POINT_RENAME(ucurr_unregister) +#define udat_adoptNumberFormat U_ICU_ENTRY_POINT_RENAME(udat_adoptNumberFormat) +#define udat_adoptNumberFormatForFields U_ICU_ENTRY_POINT_RENAME(udat_adoptNumberFormatForFields) +#define udat_applyPattern U_ICU_ENTRY_POINT_RENAME(udat_applyPattern) +#define udat_applyPatternRelative U_ICU_ENTRY_POINT_RENAME(udat_applyPatternRelative) +#define udat_clone U_ICU_ENTRY_POINT_RENAME(udat_clone) +#define udat_close U_ICU_ENTRY_POINT_RENAME(udat_close) +#define udat_countAvailable U_ICU_ENTRY_POINT_RENAME(udat_countAvailable) +#define udat_countSymbols U_ICU_ENTRY_POINT_RENAME(udat_countSymbols) +#define udat_format U_ICU_ENTRY_POINT_RENAME(udat_format) +#define udat_formatCalendar U_ICU_ENTRY_POINT_RENAME(udat_formatCalendar) +#define udat_formatCalendarForFields U_ICU_ENTRY_POINT_RENAME(udat_formatCalendarForFields) +#define udat_formatForFields U_ICU_ENTRY_POINT_RENAME(udat_formatForFields) +#define udat_get2DigitYearStart U_ICU_ENTRY_POINT_RENAME(udat_get2DigitYearStart) +#define udat_getAvailable U_ICU_ENTRY_POINT_RENAME(udat_getAvailable) +#define udat_getBooleanAttribute U_ICU_ENTRY_POINT_RENAME(udat_getBooleanAttribute) +#define udat_getCalendar U_ICU_ENTRY_POINT_RENAME(udat_getCalendar) +#define udat_getContext U_ICU_ENTRY_POINT_RENAME(udat_getContext) +#define udat_getLocaleByType U_ICU_ENTRY_POINT_RENAME(udat_getLocaleByType) +#define udat_getNumberFormat U_ICU_ENTRY_POINT_RENAME(udat_getNumberFormat) +#define udat_getNumberFormatForField U_ICU_ENTRY_POINT_RENAME(udat_getNumberFormatForField) +#define udat_getSymbols U_ICU_ENTRY_POINT_RENAME(udat_getSymbols) +#define udat_isLenient U_ICU_ENTRY_POINT_RENAME(udat_isLenient) +#define udat_open U_ICU_ENTRY_POINT_RENAME(udat_open) +#define udat_parse U_ICU_ENTRY_POINT_RENAME(udat_parse) +#define udat_parseCalendar U_ICU_ENTRY_POINT_RENAME(udat_parseCalendar) +#define udat_registerOpener U_ICU_ENTRY_POINT_RENAME(udat_registerOpener) +#define udat_set2DigitYearStart U_ICU_ENTRY_POINT_RENAME(udat_set2DigitYearStart) +#define udat_setBooleanAttribute U_ICU_ENTRY_POINT_RENAME(udat_setBooleanAttribute) +#define udat_setCalendar U_ICU_ENTRY_POINT_RENAME(udat_setCalendar) +#define udat_setContext U_ICU_ENTRY_POINT_RENAME(udat_setContext) +#define udat_setLenient U_ICU_ENTRY_POINT_RENAME(udat_setLenient) +#define udat_setNumberFormat U_ICU_ENTRY_POINT_RENAME(udat_setNumberFormat) +#define udat_setSymbols U_ICU_ENTRY_POINT_RENAME(udat_setSymbols) +#define udat_toCalendarDateField U_ICU_ENTRY_POINT_RENAME(udat_toCalendarDateField) +#define udat_toPattern U_ICU_ENTRY_POINT_RENAME(udat_toPattern) +#define udat_toPatternRelativeDate U_ICU_ENTRY_POINT_RENAME(udat_toPatternRelativeDate) +#define udat_toPatternRelativeTime U_ICU_ENTRY_POINT_RENAME(udat_toPatternRelativeTime) +#define udat_unregisterOpener U_ICU_ENTRY_POINT_RENAME(udat_unregisterOpener) +#define udata_checkCommonData U_ICU_ENTRY_POINT_RENAME(udata_checkCommonData) +#define udata_close U_ICU_ENTRY_POINT_RENAME(udata_close) +#define udata_closeSwapper U_ICU_ENTRY_POINT_RENAME(udata_closeSwapper) +#define udata_getHeaderSize U_ICU_ENTRY_POINT_RENAME(udata_getHeaderSize) +#define udata_getInfo U_ICU_ENTRY_POINT_RENAME(udata_getInfo) +#define udata_getInfoSize U_ICU_ENTRY_POINT_RENAME(udata_getInfoSize) +#define udata_getLength U_ICU_ENTRY_POINT_RENAME(udata_getLength) +#define udata_getMemory U_ICU_ENTRY_POINT_RENAME(udata_getMemory) +#define udata_getRawMemory U_ICU_ENTRY_POINT_RENAME(udata_getRawMemory) +#define udata_open U_ICU_ENTRY_POINT_RENAME(udata_open) +#define udata_openChoice U_ICU_ENTRY_POINT_RENAME(udata_openChoice) +#define udata_openSwapper U_ICU_ENTRY_POINT_RENAME(udata_openSwapper) +#define udata_openSwapperForInputData U_ICU_ENTRY_POINT_RENAME(udata_openSwapperForInputData) +#define udata_printError U_ICU_ENTRY_POINT_RENAME(udata_printError) +#define udata_readInt16 U_ICU_ENTRY_POINT_RENAME(udata_readInt16) +#define udata_readInt32 U_ICU_ENTRY_POINT_RENAME(udata_readInt32) +#define udata_setAppData U_ICU_ENTRY_POINT_RENAME(udata_setAppData) +#define udata_setCommonData U_ICU_ENTRY_POINT_RENAME(udata_setCommonData) +#define udata_setFileAccess U_ICU_ENTRY_POINT_RENAME(udata_setFileAccess) +#define udata_swapDataHeader U_ICU_ENTRY_POINT_RENAME(udata_swapDataHeader) +#define udata_swapInvStringBlock U_ICU_ENTRY_POINT_RENAME(udata_swapInvStringBlock) +#define udatpg_addPattern U_ICU_ENTRY_POINT_RENAME(udatpg_addPattern) +#define udatpg_clone U_ICU_ENTRY_POINT_RENAME(udatpg_clone) +#define udatpg_close U_ICU_ENTRY_POINT_RENAME(udatpg_close) +#define udatpg_getAppendItemFormat U_ICU_ENTRY_POINT_RENAME(udatpg_getAppendItemFormat) +#define udatpg_getAppendItemName U_ICU_ENTRY_POINT_RENAME(udatpg_getAppendItemName) +#define udatpg_getBaseSkeleton U_ICU_ENTRY_POINT_RENAME(udatpg_getBaseSkeleton) +#define udatpg_getBestPattern U_ICU_ENTRY_POINT_RENAME(udatpg_getBestPattern) +#define udatpg_getBestPatternWithOptions U_ICU_ENTRY_POINT_RENAME(udatpg_getBestPatternWithOptions) +#define udatpg_getDateTimeFormat U_ICU_ENTRY_POINT_RENAME(udatpg_getDateTimeFormat) +#define udatpg_getDecimal U_ICU_ENTRY_POINT_RENAME(udatpg_getDecimal) +#define udatpg_getDefaultHourCycle U_ICU_ENTRY_POINT_RENAME(udatpg_getDefaultHourCycle) +#define udatpg_getFieldDisplayName U_ICU_ENTRY_POINT_RENAME(udatpg_getFieldDisplayName) +#define udatpg_getPatternForSkeleton U_ICU_ENTRY_POINT_RENAME(udatpg_getPatternForSkeleton) +#define udatpg_getSkeleton U_ICU_ENTRY_POINT_RENAME(udatpg_getSkeleton) +#define udatpg_open U_ICU_ENTRY_POINT_RENAME(udatpg_open) +#define udatpg_openBaseSkeletons U_ICU_ENTRY_POINT_RENAME(udatpg_openBaseSkeletons) +#define udatpg_openEmpty U_ICU_ENTRY_POINT_RENAME(udatpg_openEmpty) +#define udatpg_openSkeletons U_ICU_ENTRY_POINT_RENAME(udatpg_openSkeletons) +#define udatpg_replaceFieldTypes U_ICU_ENTRY_POINT_RENAME(udatpg_replaceFieldTypes) +#define udatpg_replaceFieldTypesWithOptions U_ICU_ENTRY_POINT_RENAME(udatpg_replaceFieldTypesWithOptions) +#define udatpg_setAppendItemFormat U_ICU_ENTRY_POINT_RENAME(udatpg_setAppendItemFormat) +#define udatpg_setAppendItemName U_ICU_ENTRY_POINT_RENAME(udatpg_setAppendItemName) +#define udatpg_setDateTimeFormat U_ICU_ENTRY_POINT_RENAME(udatpg_setDateTimeFormat) +#define udatpg_setDecimal U_ICU_ENTRY_POINT_RENAME(udatpg_setDecimal) +#define udict_swap U_ICU_ENTRY_POINT_RENAME(udict_swap) +#define udtitvfmt_close U_ICU_ENTRY_POINT_RENAME(udtitvfmt_close) +#define udtitvfmt_closeResult U_ICU_ENTRY_POINT_RENAME(udtitvfmt_closeResult) +#define udtitvfmt_format U_ICU_ENTRY_POINT_RENAME(udtitvfmt_format) +#define udtitvfmt_formatCalendarToResult U_ICU_ENTRY_POINT_RENAME(udtitvfmt_formatCalendarToResult) +#define udtitvfmt_formatToResult U_ICU_ENTRY_POINT_RENAME(udtitvfmt_formatToResult) +#define udtitvfmt_getContext U_ICU_ENTRY_POINT_RENAME(udtitvfmt_getContext) +#define udtitvfmt_open U_ICU_ENTRY_POINT_RENAME(udtitvfmt_open) +#define udtitvfmt_openResult U_ICU_ENTRY_POINT_RENAME(udtitvfmt_openResult) +#define udtitvfmt_resultAsValue U_ICU_ENTRY_POINT_RENAME(udtitvfmt_resultAsValue) +#define udtitvfmt_setContext U_ICU_ENTRY_POINT_RENAME(udtitvfmt_setContext) +#define uenum_close U_ICU_ENTRY_POINT_RENAME(uenum_close) +#define uenum_count U_ICU_ENTRY_POINT_RENAME(uenum_count) +#define uenum_next U_ICU_ENTRY_POINT_RENAME(uenum_next) +#define uenum_nextDefault U_ICU_ENTRY_POINT_RENAME(uenum_nextDefault) +#define uenum_openCharStringsEnumeration U_ICU_ENTRY_POINT_RENAME(uenum_openCharStringsEnumeration) +#define uenum_openFromStringEnumeration U_ICU_ENTRY_POINT_RENAME(uenum_openFromStringEnumeration) +#define uenum_openUCharStringsEnumeration U_ICU_ENTRY_POINT_RENAME(uenum_openUCharStringsEnumeration) +#define uenum_reset U_ICU_ENTRY_POINT_RENAME(uenum_reset) +#define uenum_unext U_ICU_ENTRY_POINT_RENAME(uenum_unext) +#define uenum_unextDefault U_ICU_ENTRY_POINT_RENAME(uenum_unextDefault) +#define ufieldpositer_close U_ICU_ENTRY_POINT_RENAME(ufieldpositer_close) +#define ufieldpositer_next U_ICU_ENTRY_POINT_RENAME(ufieldpositer_next) +#define ufieldpositer_open U_ICU_ENTRY_POINT_RENAME(ufieldpositer_open) +#define ufile_getch U_ICU_ENTRY_POINT_RENAME(ufile_getch) +#define ufile_getch32 U_ICU_ENTRY_POINT_RENAME(ufile_getch32) +#define ufmt_close U_ICU_ENTRY_POINT_RENAME(ufmt_close) +#define ufmt_getArrayItemByIndex U_ICU_ENTRY_POINT_RENAME(ufmt_getArrayItemByIndex) +#define ufmt_getArrayLength U_ICU_ENTRY_POINT_RENAME(ufmt_getArrayLength) +#define ufmt_getDate U_ICU_ENTRY_POINT_RENAME(ufmt_getDate) +#define ufmt_getDecNumChars U_ICU_ENTRY_POINT_RENAME(ufmt_getDecNumChars) +#define ufmt_getDouble U_ICU_ENTRY_POINT_RENAME(ufmt_getDouble) +#define ufmt_getInt64 U_ICU_ENTRY_POINT_RENAME(ufmt_getInt64) +#define ufmt_getLong U_ICU_ENTRY_POINT_RENAME(ufmt_getLong) +#define ufmt_getObject U_ICU_ENTRY_POINT_RENAME(ufmt_getObject) +#define ufmt_getType U_ICU_ENTRY_POINT_RENAME(ufmt_getType) +#define ufmt_getUChars U_ICU_ENTRY_POINT_RENAME(ufmt_getUChars) +#define ufmt_isNumeric U_ICU_ENTRY_POINT_RENAME(ufmt_isNumeric) +#define ufmt_open U_ICU_ENTRY_POINT_RENAME(ufmt_open) +#define ufmtval_getString U_ICU_ENTRY_POINT_RENAME(ufmtval_getString) +#define ufmtval_nextPosition U_ICU_ENTRY_POINT_RENAME(ufmtval_nextPosition) +#define ugender_getInstance U_ICU_ENTRY_POINT_RENAME(ugender_getInstance) +#define ugender_getListGender U_ICU_ENTRY_POINT_RENAME(ugender_getListGender) +#define uhash_close U_ICU_ENTRY_POINT_RENAME(uhash_close) +#define uhash_compareCaselessUnicodeString U_ICU_ENTRY_POINT_RENAME(uhash_compareCaselessUnicodeString) +#define uhash_compareChars U_ICU_ENTRY_POINT_RENAME(uhash_compareChars) +#define uhash_compareIChars U_ICU_ENTRY_POINT_RENAME(uhash_compareIChars) +#define uhash_compareLong U_ICU_ENTRY_POINT_RENAME(uhash_compareLong) +#define uhash_compareScriptSet U_ICU_ENTRY_POINT_RENAME(uhash_compareScriptSet) +#define uhash_compareUChars U_ICU_ENTRY_POINT_RENAME(uhash_compareUChars) +#define uhash_compareUnicodeString U_ICU_ENTRY_POINT_RENAME(uhash_compareUnicodeString) +#define uhash_count U_ICU_ENTRY_POINT_RENAME(uhash_count) +#define uhash_deleteHashtable U_ICU_ENTRY_POINT_RENAME(uhash_deleteHashtable) +#define uhash_deleteScriptSet U_ICU_ENTRY_POINT_RENAME(uhash_deleteScriptSet) +#define uhash_equals U_ICU_ENTRY_POINT_RENAME(uhash_equals) +#define uhash_equalsScriptSet U_ICU_ENTRY_POINT_RENAME(uhash_equalsScriptSet) +#define uhash_find U_ICU_ENTRY_POINT_RENAME(uhash_find) +#define uhash_get U_ICU_ENTRY_POINT_RENAME(uhash_get) +#define uhash_geti U_ICU_ENTRY_POINT_RENAME(uhash_geti) +#define uhash_hashCaselessUnicodeString U_ICU_ENTRY_POINT_RENAME(uhash_hashCaselessUnicodeString) +#define uhash_hashChars U_ICU_ENTRY_POINT_RENAME(uhash_hashChars) +#define uhash_hashIChars U_ICU_ENTRY_POINT_RENAME(uhash_hashIChars) +#define uhash_hashLong U_ICU_ENTRY_POINT_RENAME(uhash_hashLong) +#define uhash_hashScriptSet U_ICU_ENTRY_POINT_RENAME(uhash_hashScriptSet) +#define uhash_hashUChars U_ICU_ENTRY_POINT_RENAME(uhash_hashUChars) +#define uhash_hashUnicodeString U_ICU_ENTRY_POINT_RENAME(uhash_hashUnicodeString) +#define uhash_iget U_ICU_ENTRY_POINT_RENAME(uhash_iget) +#define uhash_igeti U_ICU_ENTRY_POINT_RENAME(uhash_igeti) +#define uhash_init U_ICU_ENTRY_POINT_RENAME(uhash_init) +#define uhash_initSize U_ICU_ENTRY_POINT_RENAME(uhash_initSize) +#define uhash_iput U_ICU_ENTRY_POINT_RENAME(uhash_iput) +#define uhash_iputi U_ICU_ENTRY_POINT_RENAME(uhash_iputi) +#define uhash_iremove U_ICU_ENTRY_POINT_RENAME(uhash_iremove) +#define uhash_iremovei U_ICU_ENTRY_POINT_RENAME(uhash_iremovei) +#define uhash_nextElement U_ICU_ENTRY_POINT_RENAME(uhash_nextElement) +#define uhash_open U_ICU_ENTRY_POINT_RENAME(uhash_open) +#define uhash_openSize U_ICU_ENTRY_POINT_RENAME(uhash_openSize) +#define uhash_put U_ICU_ENTRY_POINT_RENAME(uhash_put) +#define uhash_puti U_ICU_ENTRY_POINT_RENAME(uhash_puti) +#define uhash_remove U_ICU_ENTRY_POINT_RENAME(uhash_remove) +#define uhash_removeAll U_ICU_ENTRY_POINT_RENAME(uhash_removeAll) +#define uhash_removeElement U_ICU_ENTRY_POINT_RENAME(uhash_removeElement) +#define uhash_removei U_ICU_ENTRY_POINT_RENAME(uhash_removei) +#define uhash_setKeyComparator U_ICU_ENTRY_POINT_RENAME(uhash_setKeyComparator) +#define uhash_setKeyDeleter U_ICU_ENTRY_POINT_RENAME(uhash_setKeyDeleter) +#define uhash_setKeyHasher U_ICU_ENTRY_POINT_RENAME(uhash_setKeyHasher) +#define uhash_setResizePolicy U_ICU_ENTRY_POINT_RENAME(uhash_setResizePolicy) +#define uhash_setValueComparator U_ICU_ENTRY_POINT_RENAME(uhash_setValueComparator) +#define uhash_setValueDeleter U_ICU_ENTRY_POINT_RENAME(uhash_setValueDeleter) +#define uidna_IDNToASCII U_ICU_ENTRY_POINT_RENAME(uidna_IDNToASCII) +#define uidna_IDNToUnicode U_ICU_ENTRY_POINT_RENAME(uidna_IDNToUnicode) +#define uidna_close U_ICU_ENTRY_POINT_RENAME(uidna_close) +#define uidna_compare U_ICU_ENTRY_POINT_RENAME(uidna_compare) +#define uidna_labelToASCII U_ICU_ENTRY_POINT_RENAME(uidna_labelToASCII) +#define uidna_labelToASCII_UTF8 U_ICU_ENTRY_POINT_RENAME(uidna_labelToASCII_UTF8) +#define uidna_labelToUnicode U_ICU_ENTRY_POINT_RENAME(uidna_labelToUnicode) +#define uidna_labelToUnicodeUTF8 U_ICU_ENTRY_POINT_RENAME(uidna_labelToUnicodeUTF8) +#define uidna_nameToASCII U_ICU_ENTRY_POINT_RENAME(uidna_nameToASCII) +#define uidna_nameToASCII_UTF8 U_ICU_ENTRY_POINT_RENAME(uidna_nameToASCII_UTF8) +#define uidna_nameToUnicode U_ICU_ENTRY_POINT_RENAME(uidna_nameToUnicode) +#define uidna_nameToUnicodeUTF8 U_ICU_ENTRY_POINT_RENAME(uidna_nameToUnicodeUTF8) +#define uidna_openUTS46 U_ICU_ENTRY_POINT_RENAME(uidna_openUTS46) +#define uidna_toASCII U_ICU_ENTRY_POINT_RENAME(uidna_toASCII) +#define uidna_toUnicode U_ICU_ENTRY_POINT_RENAME(uidna_toUnicode) +#define uiter_current32 U_ICU_ENTRY_POINT_RENAME(uiter_current32) +#define uiter_getState U_ICU_ENTRY_POINT_RENAME(uiter_getState) +#define uiter_next32 U_ICU_ENTRY_POINT_RENAME(uiter_next32) +#define uiter_previous32 U_ICU_ENTRY_POINT_RENAME(uiter_previous32) +#define uiter_setCharacterIterator U_ICU_ENTRY_POINT_RENAME(uiter_setCharacterIterator) +#define uiter_setReplaceable U_ICU_ENTRY_POINT_RENAME(uiter_setReplaceable) +#define uiter_setState U_ICU_ENTRY_POINT_RENAME(uiter_setState) +#define uiter_setString U_ICU_ENTRY_POINT_RENAME(uiter_setString) +#define uiter_setUTF16BE U_ICU_ENTRY_POINT_RENAME(uiter_setUTF16BE) +#define uiter_setUTF8 U_ICU_ENTRY_POINT_RENAME(uiter_setUTF8) +#define uldn_close U_ICU_ENTRY_POINT_RENAME(uldn_close) +#define uldn_getContext U_ICU_ENTRY_POINT_RENAME(uldn_getContext) +#define uldn_getDialectHandling U_ICU_ENTRY_POINT_RENAME(uldn_getDialectHandling) +#define uldn_getLocale U_ICU_ENTRY_POINT_RENAME(uldn_getLocale) +#define uldn_keyDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_keyDisplayName) +#define uldn_keyValueDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_keyValueDisplayName) +#define uldn_languageDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_languageDisplayName) +#define uldn_localeDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_localeDisplayName) +#define uldn_open U_ICU_ENTRY_POINT_RENAME(uldn_open) +#define uldn_openForContext U_ICU_ENTRY_POINT_RENAME(uldn_openForContext) +#define uldn_regionDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_regionDisplayName) +#define uldn_scriptCodeDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_scriptCodeDisplayName) +#define uldn_scriptDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_scriptDisplayName) +#define uldn_variantDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_variantDisplayName) +#define ulist_addItemBeginList U_ICU_ENTRY_POINT_RENAME(ulist_addItemBeginList) +#define ulist_addItemEndList U_ICU_ENTRY_POINT_RENAME(ulist_addItemEndList) +#define ulist_close_keyword_values_iterator U_ICU_ENTRY_POINT_RENAME(ulist_close_keyword_values_iterator) +#define ulist_containsString U_ICU_ENTRY_POINT_RENAME(ulist_containsString) +#define ulist_count_keyword_values U_ICU_ENTRY_POINT_RENAME(ulist_count_keyword_values) +#define ulist_createEmptyList U_ICU_ENTRY_POINT_RENAME(ulist_createEmptyList) +#define ulist_deleteList U_ICU_ENTRY_POINT_RENAME(ulist_deleteList) +#define ulist_getListFromEnum U_ICU_ENTRY_POINT_RENAME(ulist_getListFromEnum) +#define ulist_getListSize U_ICU_ENTRY_POINT_RENAME(ulist_getListSize) +#define ulist_getNext U_ICU_ENTRY_POINT_RENAME(ulist_getNext) +#define ulist_next_keyword_value U_ICU_ENTRY_POINT_RENAME(ulist_next_keyword_value) +#define ulist_removeString U_ICU_ENTRY_POINT_RENAME(ulist_removeString) +#define ulist_resetList U_ICU_ENTRY_POINT_RENAME(ulist_resetList) +#define ulist_reset_keyword_values_iterator U_ICU_ENTRY_POINT_RENAME(ulist_reset_keyword_values_iterator) +#define ulistfmt_close U_ICU_ENTRY_POINT_RENAME(ulistfmt_close) +#define ulistfmt_closeResult U_ICU_ENTRY_POINT_RENAME(ulistfmt_closeResult) +#define ulistfmt_format U_ICU_ENTRY_POINT_RENAME(ulistfmt_format) +#define ulistfmt_formatStringsToResult U_ICU_ENTRY_POINT_RENAME(ulistfmt_formatStringsToResult) +#define ulistfmt_open U_ICU_ENTRY_POINT_RENAME(ulistfmt_open) +#define ulistfmt_openForType U_ICU_ENTRY_POINT_RENAME(ulistfmt_openForType) +#define ulistfmt_openResult U_ICU_ENTRY_POINT_RENAME(ulistfmt_openResult) +#define ulistfmt_resultAsValue U_ICU_ENTRY_POINT_RENAME(ulistfmt_resultAsValue) +#define uloc_acceptLanguage U_ICU_ENTRY_POINT_RENAME(uloc_acceptLanguage) +#define uloc_acceptLanguageFromHTTP U_ICU_ENTRY_POINT_RENAME(uloc_acceptLanguageFromHTTP) +#define uloc_addLikelySubtags U_ICU_ENTRY_POINT_RENAME(uloc_addLikelySubtags) +#define uloc_canonicalize U_ICU_ENTRY_POINT_RENAME(uloc_canonicalize) +#define uloc_countAvailable U_ICU_ENTRY_POINT_RENAME(uloc_countAvailable) +#define uloc_forLanguageTag U_ICU_ENTRY_POINT_RENAME(uloc_forLanguageTag) +#define uloc_getAvailable U_ICU_ENTRY_POINT_RENAME(uloc_getAvailable) +#define uloc_getBaseName U_ICU_ENTRY_POINT_RENAME(uloc_getBaseName) +#define uloc_getCharacterOrientation U_ICU_ENTRY_POINT_RENAME(uloc_getCharacterOrientation) +#define uloc_getCountry U_ICU_ENTRY_POINT_RENAME(uloc_getCountry) +#define uloc_getCurrentCountryID U_ICU_ENTRY_POINT_RENAME(uloc_getCurrentCountryID) +#define uloc_getCurrentLanguageID U_ICU_ENTRY_POINT_RENAME(uloc_getCurrentLanguageID) +#define uloc_getDefault U_ICU_ENTRY_POINT_RENAME(uloc_getDefault) +#define uloc_getDisplayCountry U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayCountry) +#define uloc_getDisplayKeyword U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayKeyword) +#define uloc_getDisplayKeywordValue U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayKeywordValue) +#define uloc_getDisplayLanguage U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayLanguage) +#define uloc_getDisplayName U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayName) +#define uloc_getDisplayScript U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayScript) +#define uloc_getDisplayVariant U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayVariant) +#define uloc_getISO3Country U_ICU_ENTRY_POINT_RENAME(uloc_getISO3Country) +#define uloc_getISO3Language U_ICU_ENTRY_POINT_RENAME(uloc_getISO3Language) +#define uloc_getISOCountries U_ICU_ENTRY_POINT_RENAME(uloc_getISOCountries) +#define uloc_getISOLanguages U_ICU_ENTRY_POINT_RENAME(uloc_getISOLanguages) +#define uloc_getKeywordValue U_ICU_ENTRY_POINT_RENAME(uloc_getKeywordValue) +#define uloc_getLCID U_ICU_ENTRY_POINT_RENAME(uloc_getLCID) +#define uloc_getLanguage U_ICU_ENTRY_POINT_RENAME(uloc_getLanguage) +#define uloc_getLineOrientation U_ICU_ENTRY_POINT_RENAME(uloc_getLineOrientation) +#define uloc_getLocaleForLCID U_ICU_ENTRY_POINT_RENAME(uloc_getLocaleForLCID) +#define uloc_getName U_ICU_ENTRY_POINT_RENAME(uloc_getName) +#define uloc_getParent U_ICU_ENTRY_POINT_RENAME(uloc_getParent) +#define uloc_getScript U_ICU_ENTRY_POINT_RENAME(uloc_getScript) +#define uloc_getTableStringWithFallback U_ICU_ENTRY_POINT_RENAME(uloc_getTableStringWithFallback) +#define uloc_getVariant U_ICU_ENTRY_POINT_RENAME(uloc_getVariant) +#define uloc_isRightToLeft U_ICU_ENTRY_POINT_RENAME(uloc_isRightToLeft) +#define uloc_minimizeSubtags U_ICU_ENTRY_POINT_RENAME(uloc_minimizeSubtags) +#define uloc_openAvailableByType U_ICU_ENTRY_POINT_RENAME(uloc_openAvailableByType) +#define uloc_openKeywordList U_ICU_ENTRY_POINT_RENAME(uloc_openKeywordList) +#define uloc_openKeywords U_ICU_ENTRY_POINT_RENAME(uloc_openKeywords) +#define uloc_setDefault U_ICU_ENTRY_POINT_RENAME(uloc_setDefault) +#define uloc_setKeywordValue U_ICU_ENTRY_POINT_RENAME(uloc_setKeywordValue) +#define uloc_toLanguageTag U_ICU_ENTRY_POINT_RENAME(uloc_toLanguageTag) +#define uloc_toLegacyKey U_ICU_ENTRY_POINT_RENAME(uloc_toLegacyKey) +#define uloc_toLegacyType U_ICU_ENTRY_POINT_RENAME(uloc_toLegacyType) +#define uloc_toUnicodeLocaleKey U_ICU_ENTRY_POINT_RENAME(uloc_toUnicodeLocaleKey) +#define uloc_toUnicodeLocaleType U_ICU_ENTRY_POINT_RENAME(uloc_toUnicodeLocaleType) +#define ulocdata_close U_ICU_ENTRY_POINT_RENAME(ulocdata_close) +#define ulocdata_getCLDRVersion U_ICU_ENTRY_POINT_RENAME(ulocdata_getCLDRVersion) +#define ulocdata_getDelimiter U_ICU_ENTRY_POINT_RENAME(ulocdata_getDelimiter) +#define ulocdata_getExemplarSet U_ICU_ENTRY_POINT_RENAME(ulocdata_getExemplarSet) +#define ulocdata_getLocaleDisplayPattern U_ICU_ENTRY_POINT_RENAME(ulocdata_getLocaleDisplayPattern) +#define ulocdata_getLocaleSeparator U_ICU_ENTRY_POINT_RENAME(ulocdata_getLocaleSeparator) +#define ulocdata_getMeasurementSystem U_ICU_ENTRY_POINT_RENAME(ulocdata_getMeasurementSystem) +#define ulocdata_getNoSubstitute U_ICU_ENTRY_POINT_RENAME(ulocdata_getNoSubstitute) +#define ulocdata_getPaperSize U_ICU_ENTRY_POINT_RENAME(ulocdata_getPaperSize) +#define ulocdata_open U_ICU_ENTRY_POINT_RENAME(ulocdata_open) +#define ulocdata_setNoSubstitute U_ICU_ENTRY_POINT_RENAME(ulocdata_setNoSubstitute) +#define ulocimp_addLikelySubtags U_ICU_ENTRY_POINT_RENAME(ulocimp_addLikelySubtags) +#define ulocimp_canonicalize U_ICU_ENTRY_POINT_RENAME(ulocimp_canonicalize) +#define ulocimp_forLanguageTag U_ICU_ENTRY_POINT_RENAME(ulocimp_forLanguageTag) +#define ulocimp_getBaseName U_ICU_ENTRY_POINT_RENAME(ulocimp_getBaseName) +#define ulocimp_getCountry U_ICU_ENTRY_POINT_RENAME(ulocimp_getCountry) +#define ulocimp_getKeywordValue U_ICU_ENTRY_POINT_RENAME(ulocimp_getKeywordValue) +#define ulocimp_getKeywords U_ICU_ENTRY_POINT_RENAME(ulocimp_getKeywords) +#define ulocimp_getKnownCanonicalizedLocaleForTest U_ICU_ENTRY_POINT_RENAME(ulocimp_getKnownCanonicalizedLocaleForTest) +#define ulocimp_getLanguage U_ICU_ENTRY_POINT_RENAME(ulocimp_getLanguage) +#define ulocimp_getName U_ICU_ENTRY_POINT_RENAME(ulocimp_getName) +#define ulocimp_getRegionForSupplementalData U_ICU_ENTRY_POINT_RENAME(ulocimp_getRegionForSupplementalData) +#define ulocimp_getScript U_ICU_ENTRY_POINT_RENAME(ulocimp_getScript) +#define ulocimp_isCanonicalizedLocaleForTest U_ICU_ENTRY_POINT_RENAME(ulocimp_isCanonicalizedLocaleForTest) +#define ulocimp_minimizeSubtags U_ICU_ENTRY_POINT_RENAME(ulocimp_minimizeSubtags) +#define ulocimp_toBcpKey U_ICU_ENTRY_POINT_RENAME(ulocimp_toBcpKey) +#define ulocimp_toBcpType U_ICU_ENTRY_POINT_RENAME(ulocimp_toBcpType) +#define ulocimp_toLanguageTag U_ICU_ENTRY_POINT_RENAME(ulocimp_toLanguageTag) +#define ulocimp_toLegacyKey U_ICU_ENTRY_POINT_RENAME(ulocimp_toLegacyKey) +#define ulocimp_toLegacyType U_ICU_ENTRY_POINT_RENAME(ulocimp_toLegacyType) +#define ultag_isExtensionSubtags U_ICU_ENTRY_POINT_RENAME(ultag_isExtensionSubtags) +#define ultag_isLanguageSubtag U_ICU_ENTRY_POINT_RENAME(ultag_isLanguageSubtag) +#define ultag_isPrivateuseValueSubtags U_ICU_ENTRY_POINT_RENAME(ultag_isPrivateuseValueSubtags) +#define ultag_isRegionSubtag U_ICU_ENTRY_POINT_RENAME(ultag_isRegionSubtag) +#define ultag_isScriptSubtag U_ICU_ENTRY_POINT_RENAME(ultag_isScriptSubtag) +#define ultag_isTransformedExtensionSubtags U_ICU_ENTRY_POINT_RENAME(ultag_isTransformedExtensionSubtags) +#define ultag_isUnicodeExtensionSubtags U_ICU_ENTRY_POINT_RENAME(ultag_isUnicodeExtensionSubtags) +#define ultag_isUnicodeLocaleAttribute U_ICU_ENTRY_POINT_RENAME(ultag_isUnicodeLocaleAttribute) +#define ultag_isUnicodeLocaleAttributes U_ICU_ENTRY_POINT_RENAME(ultag_isUnicodeLocaleAttributes) +#define ultag_isUnicodeLocaleKey U_ICU_ENTRY_POINT_RENAME(ultag_isUnicodeLocaleKey) +#define ultag_isUnicodeLocaleType U_ICU_ENTRY_POINT_RENAME(ultag_isUnicodeLocaleType) +#define ultag_isVariantSubtags U_ICU_ENTRY_POINT_RENAME(ultag_isVariantSubtags) +#define umsg_applyPattern U_ICU_ENTRY_POINT_RENAME(umsg_applyPattern) +#define umsg_autoQuoteApostrophe U_ICU_ENTRY_POINT_RENAME(umsg_autoQuoteApostrophe) +#define umsg_clone U_ICU_ENTRY_POINT_RENAME(umsg_clone) +#define umsg_close U_ICU_ENTRY_POINT_RENAME(umsg_close) +#define umsg_format U_ICU_ENTRY_POINT_RENAME(umsg_format) +#define umsg_getLocale U_ICU_ENTRY_POINT_RENAME(umsg_getLocale) +#define umsg_open U_ICU_ENTRY_POINT_RENAME(umsg_open) +#define umsg_parse U_ICU_ENTRY_POINT_RENAME(umsg_parse) +#define umsg_setLocale U_ICU_ENTRY_POINT_RENAME(umsg_setLocale) +#define umsg_toPattern U_ICU_ENTRY_POINT_RENAME(umsg_toPattern) +#define umsg_vformat U_ICU_ENTRY_POINT_RENAME(umsg_vformat) +#define umsg_vparse U_ICU_ENTRY_POINT_RENAME(umsg_vparse) +#define umtx_lock U_ICU_ENTRY_POINT_RENAME(umtx_lock) +#define umtx_unlock U_ICU_ENTRY_POINT_RENAME(umtx_unlock) +#define umutablecptrie_buildImmutable U_ICU_ENTRY_POINT_RENAME(umutablecptrie_buildImmutable) +#define umutablecptrie_clone U_ICU_ENTRY_POINT_RENAME(umutablecptrie_clone) +#define umutablecptrie_close U_ICU_ENTRY_POINT_RENAME(umutablecptrie_close) +#define umutablecptrie_fromUCPMap U_ICU_ENTRY_POINT_RENAME(umutablecptrie_fromUCPMap) +#define umutablecptrie_fromUCPTrie U_ICU_ENTRY_POINT_RENAME(umutablecptrie_fromUCPTrie) +#define umutablecptrie_get U_ICU_ENTRY_POINT_RENAME(umutablecptrie_get) +#define umutablecptrie_getRange U_ICU_ENTRY_POINT_RENAME(umutablecptrie_getRange) +#define umutablecptrie_open U_ICU_ENTRY_POINT_RENAME(umutablecptrie_open) +#define umutablecptrie_set U_ICU_ENTRY_POINT_RENAME(umutablecptrie_set) +#define umutablecptrie_setRange U_ICU_ENTRY_POINT_RENAME(umutablecptrie_setRange) +#define uniset_getUnicode32Instance U_ICU_ENTRY_POINT_RENAME(uniset_getUnicode32Instance) +#define unorm2_append U_ICU_ENTRY_POINT_RENAME(unorm2_append) +#define unorm2_close U_ICU_ENTRY_POINT_RENAME(unorm2_close) +#define unorm2_composePair U_ICU_ENTRY_POINT_RENAME(unorm2_composePair) +#define unorm2_getCombiningClass U_ICU_ENTRY_POINT_RENAME(unorm2_getCombiningClass) +#define unorm2_getDecomposition U_ICU_ENTRY_POINT_RENAME(unorm2_getDecomposition) +#define unorm2_getInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getInstance) +#define unorm2_getNFCInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getNFCInstance) +#define unorm2_getNFDInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getNFDInstance) +#define unorm2_getNFKCCasefoldInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getNFKCCasefoldInstance) +#define unorm2_getNFKCInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getNFKCInstance) +#define unorm2_getNFKDInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getNFKDInstance) +#define unorm2_getRawDecomposition U_ICU_ENTRY_POINT_RENAME(unorm2_getRawDecomposition) +#define unorm2_hasBoundaryAfter U_ICU_ENTRY_POINT_RENAME(unorm2_hasBoundaryAfter) +#define unorm2_hasBoundaryBefore U_ICU_ENTRY_POINT_RENAME(unorm2_hasBoundaryBefore) +#define unorm2_isInert U_ICU_ENTRY_POINT_RENAME(unorm2_isInert) +#define unorm2_isNormalized U_ICU_ENTRY_POINT_RENAME(unorm2_isNormalized) +#define unorm2_normalize U_ICU_ENTRY_POINT_RENAME(unorm2_normalize) +#define unorm2_normalizeSecondAndAppend U_ICU_ENTRY_POINT_RENAME(unorm2_normalizeSecondAndAppend) +#define unorm2_openFiltered U_ICU_ENTRY_POINT_RENAME(unorm2_openFiltered) +#define unorm2_quickCheck U_ICU_ENTRY_POINT_RENAME(unorm2_quickCheck) +#define unorm2_spanQuickCheckYes U_ICU_ENTRY_POINT_RENAME(unorm2_spanQuickCheckYes) +#define unorm2_swap U_ICU_ENTRY_POINT_RENAME(unorm2_swap) +#define unorm_compare U_ICU_ENTRY_POINT_RENAME(unorm_compare) +#define unorm_concatenate U_ICU_ENTRY_POINT_RENAME(unorm_concatenate) +#define unorm_getFCD16 U_ICU_ENTRY_POINT_RENAME(unorm_getFCD16) +#define unorm_getQuickCheck U_ICU_ENTRY_POINT_RENAME(unorm_getQuickCheck) +#define unorm_isNormalized U_ICU_ENTRY_POINT_RENAME(unorm_isNormalized) +#define unorm_isNormalizedWithOptions U_ICU_ENTRY_POINT_RENAME(unorm_isNormalizedWithOptions) +#define unorm_next U_ICU_ENTRY_POINT_RENAME(unorm_next) +#define unorm_normalize U_ICU_ENTRY_POINT_RENAME(unorm_normalize) +#define unorm_previous U_ICU_ENTRY_POINT_RENAME(unorm_previous) +#define unorm_quickCheck U_ICU_ENTRY_POINT_RENAME(unorm_quickCheck) +#define unorm_quickCheckWithOptions U_ICU_ENTRY_POINT_RENAME(unorm_quickCheckWithOptions) +#define unum_applyPattern U_ICU_ENTRY_POINT_RENAME(unum_applyPattern) +#define unum_clone U_ICU_ENTRY_POINT_RENAME(unum_clone) +#define unum_close U_ICU_ENTRY_POINT_RENAME(unum_close) +#define unum_countAvailable U_ICU_ENTRY_POINT_RENAME(unum_countAvailable) +#define unum_format U_ICU_ENTRY_POINT_RENAME(unum_format) +#define unum_formatDecimal U_ICU_ENTRY_POINT_RENAME(unum_formatDecimal) +#define unum_formatDouble U_ICU_ENTRY_POINT_RENAME(unum_formatDouble) +#define unum_formatDoubleCurrency U_ICU_ENTRY_POINT_RENAME(unum_formatDoubleCurrency) +#define unum_formatDoubleForFields U_ICU_ENTRY_POINT_RENAME(unum_formatDoubleForFields) +#define unum_formatInt64 U_ICU_ENTRY_POINT_RENAME(unum_formatInt64) +#define unum_formatUFormattable U_ICU_ENTRY_POINT_RENAME(unum_formatUFormattable) +#define unum_getAttribute U_ICU_ENTRY_POINT_RENAME(unum_getAttribute) +#define unum_getAvailable U_ICU_ENTRY_POINT_RENAME(unum_getAvailable) +#define unum_getContext U_ICU_ENTRY_POINT_RENAME(unum_getContext) +#define unum_getDoubleAttribute U_ICU_ENTRY_POINT_RENAME(unum_getDoubleAttribute) +#define unum_getLocaleByType U_ICU_ENTRY_POINT_RENAME(unum_getLocaleByType) +#define unum_getSymbol U_ICU_ENTRY_POINT_RENAME(unum_getSymbol) +#define unum_getTextAttribute U_ICU_ENTRY_POINT_RENAME(unum_getTextAttribute) +#define unum_open U_ICU_ENTRY_POINT_RENAME(unum_open) +#define unum_parse U_ICU_ENTRY_POINT_RENAME(unum_parse) +#define unum_parseDecimal U_ICU_ENTRY_POINT_RENAME(unum_parseDecimal) +#define unum_parseDouble U_ICU_ENTRY_POINT_RENAME(unum_parseDouble) +#define unum_parseDoubleCurrency U_ICU_ENTRY_POINT_RENAME(unum_parseDoubleCurrency) +#define unum_parseInt64 U_ICU_ENTRY_POINT_RENAME(unum_parseInt64) +#define unum_parseToUFormattable U_ICU_ENTRY_POINT_RENAME(unum_parseToUFormattable) +#define unum_setAttribute U_ICU_ENTRY_POINT_RENAME(unum_setAttribute) +#define unum_setContext U_ICU_ENTRY_POINT_RENAME(unum_setContext) +#define unum_setDoubleAttribute U_ICU_ENTRY_POINT_RENAME(unum_setDoubleAttribute) +#define unum_setSymbol U_ICU_ENTRY_POINT_RENAME(unum_setSymbol) +#define unum_setTextAttribute U_ICU_ENTRY_POINT_RENAME(unum_setTextAttribute) +#define unum_toPattern U_ICU_ENTRY_POINT_RENAME(unum_toPattern) +#define unumf_close U_ICU_ENTRY_POINT_RENAME(unumf_close) +#define unumf_closeResult U_ICU_ENTRY_POINT_RENAME(unumf_closeResult) +#define unumf_formatDecimal U_ICU_ENTRY_POINT_RENAME(unumf_formatDecimal) +#define unumf_formatDouble U_ICU_ENTRY_POINT_RENAME(unumf_formatDouble) +#define unumf_formatInt U_ICU_ENTRY_POINT_RENAME(unumf_formatInt) +#define unumf_openForSkeletonAndLocale U_ICU_ENTRY_POINT_RENAME(unumf_openForSkeletonAndLocale) +#define unumf_openForSkeletonAndLocaleWithError U_ICU_ENTRY_POINT_RENAME(unumf_openForSkeletonAndLocaleWithError) +#define unumf_openResult U_ICU_ENTRY_POINT_RENAME(unumf_openResult) +#define unumf_resultAsValue U_ICU_ENTRY_POINT_RENAME(unumf_resultAsValue) +#define unumf_resultGetAllFieldPositions U_ICU_ENTRY_POINT_RENAME(unumf_resultGetAllFieldPositions) +#define unumf_resultNextFieldPosition U_ICU_ENTRY_POINT_RENAME(unumf_resultNextFieldPosition) +#define unumf_resultToDecimalNumber U_ICU_ENTRY_POINT_RENAME(unumf_resultToDecimalNumber) +#define unumf_resultToString U_ICU_ENTRY_POINT_RENAME(unumf_resultToString) +#define unumrf_close U_ICU_ENTRY_POINT_RENAME(unumrf_close) +#define unumrf_closeResult U_ICU_ENTRY_POINT_RENAME(unumrf_closeResult) +#define unumrf_formatDecimalRange U_ICU_ENTRY_POINT_RENAME(unumrf_formatDecimalRange) +#define unumrf_formatDoubleRange U_ICU_ENTRY_POINT_RENAME(unumrf_formatDoubleRange) +#define unumrf_openForSkeletonWithCollapseAndIdentityFallback U_ICU_ENTRY_POINT_RENAME(unumrf_openForSkeletonWithCollapseAndIdentityFallback) +#define unumrf_openResult U_ICU_ENTRY_POINT_RENAME(unumrf_openResult) +#define unumrf_resultAsValue U_ICU_ENTRY_POINT_RENAME(unumrf_resultAsValue) +#define unumrf_resultGetFirstDecimalNumber U_ICU_ENTRY_POINT_RENAME(unumrf_resultGetFirstDecimalNumber) +#define unumrf_resultGetIdentityResult U_ICU_ENTRY_POINT_RENAME(unumrf_resultGetIdentityResult) +#define unumrf_resultGetSecondDecimalNumber U_ICU_ENTRY_POINT_RENAME(unumrf_resultGetSecondDecimalNumber) +#define unumsys_close U_ICU_ENTRY_POINT_RENAME(unumsys_close) +#define unumsys_getDescription U_ICU_ENTRY_POINT_RENAME(unumsys_getDescription) +#define unumsys_getName U_ICU_ENTRY_POINT_RENAME(unumsys_getName) +#define unumsys_getRadix U_ICU_ENTRY_POINT_RENAME(unumsys_getRadix) +#define unumsys_isAlgorithmic U_ICU_ENTRY_POINT_RENAME(unumsys_isAlgorithmic) +#define unumsys_open U_ICU_ENTRY_POINT_RENAME(unumsys_open) +#define unumsys_openAvailableNames U_ICU_ENTRY_POINT_RENAME(unumsys_openAvailableNames) +#define unumsys_openByName U_ICU_ENTRY_POINT_RENAME(unumsys_openByName) +#define uplrules_close U_ICU_ENTRY_POINT_RENAME(uplrules_close) +#define uplrules_getKeywords U_ICU_ENTRY_POINT_RENAME(uplrules_getKeywords) +#define uplrules_open U_ICU_ENTRY_POINT_RENAME(uplrules_open) +#define uplrules_openForType U_ICU_ENTRY_POINT_RENAME(uplrules_openForType) +#define uplrules_select U_ICU_ENTRY_POINT_RENAME(uplrules_select) +#define uplrules_selectForRange U_ICU_ENTRY_POINT_RENAME(uplrules_selectForRange) +#define uplrules_selectFormatted U_ICU_ENTRY_POINT_RENAME(uplrules_selectFormatted) +#define uplrules_selectWithFormat U_ICU_ENTRY_POINT_RENAME(uplrules_selectWithFormat) +#define uplug_closeLibrary U_ICU_ENTRY_POINT_RENAME(uplug_closeLibrary) +#define uplug_findLibrary U_ICU_ENTRY_POINT_RENAME(uplug_findLibrary) +#define uplug_getConfiguration U_ICU_ENTRY_POINT_RENAME(uplug_getConfiguration) +#define uplug_getContext U_ICU_ENTRY_POINT_RENAME(uplug_getContext) +#define uplug_getCurrentLevel U_ICU_ENTRY_POINT_RENAME(uplug_getCurrentLevel) +#define uplug_getLibrary U_ICU_ENTRY_POINT_RENAME(uplug_getLibrary) +#define uplug_getLibraryName U_ICU_ENTRY_POINT_RENAME(uplug_getLibraryName) +#define uplug_getPlugInternal U_ICU_ENTRY_POINT_RENAME(uplug_getPlugInternal) +#define uplug_getPlugLevel U_ICU_ENTRY_POINT_RENAME(uplug_getPlugLevel) +#define uplug_getPlugLoadStatus U_ICU_ENTRY_POINT_RENAME(uplug_getPlugLoadStatus) +#define uplug_getPlugName U_ICU_ENTRY_POINT_RENAME(uplug_getPlugName) +#define uplug_getPluginFile U_ICU_ENTRY_POINT_RENAME(uplug_getPluginFile) +#define uplug_getSymbolName U_ICU_ENTRY_POINT_RENAME(uplug_getSymbolName) +#define uplug_init U_ICU_ENTRY_POINT_RENAME(uplug_init) +#define uplug_loadPlugFromEntrypoint U_ICU_ENTRY_POINT_RENAME(uplug_loadPlugFromEntrypoint) +#define uplug_loadPlugFromLibrary U_ICU_ENTRY_POINT_RENAME(uplug_loadPlugFromLibrary) +#define uplug_nextPlug U_ICU_ENTRY_POINT_RENAME(uplug_nextPlug) +#define uplug_openLibrary U_ICU_ENTRY_POINT_RENAME(uplug_openLibrary) +#define uplug_removePlug U_ICU_ENTRY_POINT_RENAME(uplug_removePlug) +#define uplug_setContext U_ICU_ENTRY_POINT_RENAME(uplug_setContext) +#define uplug_setPlugLevel U_ICU_ENTRY_POINT_RENAME(uplug_setPlugLevel) +#define uplug_setPlugName U_ICU_ENTRY_POINT_RENAME(uplug_setPlugName) +#define uplug_setPlugNoUnload U_ICU_ENTRY_POINT_RENAME(uplug_setPlugNoUnload) +#define uprops_addPropertyStarts U_ICU_ENTRY_POINT_RENAME(uprops_addPropertyStarts) +#define uprops_getSource U_ICU_ENTRY_POINT_RENAME(uprops_getSource) +#define upropsvec_addPropertyStarts U_ICU_ENTRY_POINT_RENAME(upropsvec_addPropertyStarts) +#define uprv_add32_overflow U_ICU_ENTRY_POINT_RENAME(uprv_add32_overflow) +#define uprv_aestrncpy U_ICU_ENTRY_POINT_RENAME(uprv_aestrncpy) +#define uprv_asciiFromEbcdic U_ICU_ENTRY_POINT_RENAME(uprv_asciiFromEbcdic) +#define uprv_asciitolower U_ICU_ENTRY_POINT_RENAME(uprv_asciitolower) +#define uprv_calloc U_ICU_ENTRY_POINT_RENAME(uprv_calloc) +#define uprv_ceil U_ICU_ENTRY_POINT_RENAME(uprv_ceil) +#define uprv_compareASCIIPropertyNames U_ICU_ENTRY_POINT_RENAME(uprv_compareASCIIPropertyNames) +#define uprv_compareEBCDICPropertyNames U_ICU_ENTRY_POINT_RENAME(uprv_compareEBCDICPropertyNames) +#define uprv_compareInvAscii U_ICU_ENTRY_POINT_RENAME(uprv_compareInvAscii) +#define uprv_compareInvEbcdic U_ICU_ENTRY_POINT_RENAME(uprv_compareInvEbcdic) +#define uprv_compareInvEbcdicAsAscii U_ICU_ENTRY_POINT_RENAME(uprv_compareInvEbcdicAsAscii) +#define uprv_convertToLCID U_ICU_ENTRY_POINT_RENAME(uprv_convertToLCID) +#define uprv_convertToLCIDPlatform U_ICU_ENTRY_POINT_RENAME(uprv_convertToLCIDPlatform) +#define uprv_convertToPosix U_ICU_ENTRY_POINT_RENAME(uprv_convertToPosix) +#define uprv_copyAscii U_ICU_ENTRY_POINT_RENAME(uprv_copyAscii) +#define uprv_copyEbcdic U_ICU_ENTRY_POINT_RENAME(uprv_copyEbcdic) +#define uprv_decContextClearStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextClearStatus) +#define uprv_decContextDefault U_ICU_ENTRY_POINT_RENAME(uprv_decContextDefault) +#define uprv_decContextGetRounding U_ICU_ENTRY_POINT_RENAME(uprv_decContextGetRounding) +#define uprv_decContextGetStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextGetStatus) +#define uprv_decContextRestoreStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextRestoreStatus) +#define uprv_decContextSaveStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextSaveStatus) +#define uprv_decContextSetRounding U_ICU_ENTRY_POINT_RENAME(uprv_decContextSetRounding) +#define uprv_decContextSetStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextSetStatus) +#define uprv_decContextSetStatusFromString U_ICU_ENTRY_POINT_RENAME(uprv_decContextSetStatusFromString) +#define uprv_decContextSetStatusFromStringQuiet U_ICU_ENTRY_POINT_RENAME(uprv_decContextSetStatusFromStringQuiet) +#define uprv_decContextSetStatusQuiet U_ICU_ENTRY_POINT_RENAME(uprv_decContextSetStatusQuiet) +#define uprv_decContextStatusToString U_ICU_ENTRY_POINT_RENAME(uprv_decContextStatusToString) +#define uprv_decContextTestSavedStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextTestSavedStatus) +#define uprv_decContextTestStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextTestStatus) +#define uprv_decContextZeroStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextZeroStatus) +#define uprv_decNumberAbs U_ICU_ENTRY_POINT_RENAME(uprv_decNumberAbs) +#define uprv_decNumberAdd U_ICU_ENTRY_POINT_RENAME(uprv_decNumberAdd) +#define uprv_decNumberAnd U_ICU_ENTRY_POINT_RENAME(uprv_decNumberAnd) +#define uprv_decNumberClassToString U_ICU_ENTRY_POINT_RENAME(uprv_decNumberClassToString) +#define uprv_decNumberCompare U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCompare) +#define uprv_decNumberCompareSignal U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCompareSignal) +#define uprv_decNumberCompareTotal U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCompareTotal) +#define uprv_decNumberCompareTotalMag U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCompareTotalMag) +#define uprv_decNumberCopy U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCopy) +#define uprv_decNumberCopyAbs U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCopyAbs) +#define uprv_decNumberCopyNegate U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCopyNegate) +#define uprv_decNumberCopySign U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCopySign) +#define uprv_decNumberDivide U_ICU_ENTRY_POINT_RENAME(uprv_decNumberDivide) +#define uprv_decNumberDivideInteger U_ICU_ENTRY_POINT_RENAME(uprv_decNumberDivideInteger) +#define uprv_decNumberExp U_ICU_ENTRY_POINT_RENAME(uprv_decNumberExp) +#define uprv_decNumberFMA U_ICU_ENTRY_POINT_RENAME(uprv_decNumberFMA) +#define uprv_decNumberFromInt32 U_ICU_ENTRY_POINT_RENAME(uprv_decNumberFromInt32) +#define uprv_decNumberFromString U_ICU_ENTRY_POINT_RENAME(uprv_decNumberFromString) +#define uprv_decNumberFromUInt32 U_ICU_ENTRY_POINT_RENAME(uprv_decNumberFromUInt32) +#define uprv_decNumberGetBCD U_ICU_ENTRY_POINT_RENAME(uprv_decNumberGetBCD) +#define uprv_decNumberInvert U_ICU_ENTRY_POINT_RENAME(uprv_decNumberInvert) +#define uprv_decNumberIsNormal U_ICU_ENTRY_POINT_RENAME(uprv_decNumberIsNormal) +#define uprv_decNumberIsSubnormal U_ICU_ENTRY_POINT_RENAME(uprv_decNumberIsSubnormal) +#define uprv_decNumberLn U_ICU_ENTRY_POINT_RENAME(uprv_decNumberLn) +#define uprv_decNumberLog10 U_ICU_ENTRY_POINT_RENAME(uprv_decNumberLog10) +#define uprv_decNumberLogB U_ICU_ENTRY_POINT_RENAME(uprv_decNumberLogB) +#define uprv_decNumberMax U_ICU_ENTRY_POINT_RENAME(uprv_decNumberMax) +#define uprv_decNumberMaxMag U_ICU_ENTRY_POINT_RENAME(uprv_decNumberMaxMag) +#define uprv_decNumberMin U_ICU_ENTRY_POINT_RENAME(uprv_decNumberMin) +#define uprv_decNumberMinMag U_ICU_ENTRY_POINT_RENAME(uprv_decNumberMinMag) +#define uprv_decNumberMinus U_ICU_ENTRY_POINT_RENAME(uprv_decNumberMinus) +#define uprv_decNumberMultiply U_ICU_ENTRY_POINT_RENAME(uprv_decNumberMultiply) +#define uprv_decNumberNextMinus U_ICU_ENTRY_POINT_RENAME(uprv_decNumberNextMinus) +#define uprv_decNumberNextPlus U_ICU_ENTRY_POINT_RENAME(uprv_decNumberNextPlus) +#define uprv_decNumberNextToward U_ICU_ENTRY_POINT_RENAME(uprv_decNumberNextToward) +#define uprv_decNumberNormalize U_ICU_ENTRY_POINT_RENAME(uprv_decNumberNormalize) +#define uprv_decNumberOr U_ICU_ENTRY_POINT_RENAME(uprv_decNumberOr) +#define uprv_decNumberPlus U_ICU_ENTRY_POINT_RENAME(uprv_decNumberPlus) +#define uprv_decNumberPower U_ICU_ENTRY_POINT_RENAME(uprv_decNumberPower) +#define uprv_decNumberQuantize U_ICU_ENTRY_POINT_RENAME(uprv_decNumberQuantize) +#define uprv_decNumberReduce U_ICU_ENTRY_POINT_RENAME(uprv_decNumberReduce) +#define uprv_decNumberRemainder U_ICU_ENTRY_POINT_RENAME(uprv_decNumberRemainder) +#define uprv_decNumberRemainderNear U_ICU_ENTRY_POINT_RENAME(uprv_decNumberRemainderNear) +#define uprv_decNumberRescale U_ICU_ENTRY_POINT_RENAME(uprv_decNumberRescale) +#define uprv_decNumberRotate U_ICU_ENTRY_POINT_RENAME(uprv_decNumberRotate) +#define uprv_decNumberSameQuantum U_ICU_ENTRY_POINT_RENAME(uprv_decNumberSameQuantum) +#define uprv_decNumberScaleB U_ICU_ENTRY_POINT_RENAME(uprv_decNumberScaleB) +#define uprv_decNumberSetBCD U_ICU_ENTRY_POINT_RENAME(uprv_decNumberSetBCD) +#define uprv_decNumberShift U_ICU_ENTRY_POINT_RENAME(uprv_decNumberShift) +#define uprv_decNumberSquareRoot U_ICU_ENTRY_POINT_RENAME(uprv_decNumberSquareRoot) +#define uprv_decNumberSubtract U_ICU_ENTRY_POINT_RENAME(uprv_decNumberSubtract) +#define uprv_decNumberToEngString U_ICU_ENTRY_POINT_RENAME(uprv_decNumberToEngString) +#define uprv_decNumberToInt32 U_ICU_ENTRY_POINT_RENAME(uprv_decNumberToInt32) +#define uprv_decNumberToIntegralExact U_ICU_ENTRY_POINT_RENAME(uprv_decNumberToIntegralExact) +#define uprv_decNumberToIntegralValue U_ICU_ENTRY_POINT_RENAME(uprv_decNumberToIntegralValue) +#define uprv_decNumberToString U_ICU_ENTRY_POINT_RENAME(uprv_decNumberToString) +#define uprv_decNumberToUInt32 U_ICU_ENTRY_POINT_RENAME(uprv_decNumberToUInt32) +#define uprv_decNumberTrim U_ICU_ENTRY_POINT_RENAME(uprv_decNumberTrim) +#define uprv_decNumberVersion U_ICU_ENTRY_POINT_RENAME(uprv_decNumberVersion) +#define uprv_decNumberXor U_ICU_ENTRY_POINT_RENAME(uprv_decNumberXor) +#define uprv_decNumberZero U_ICU_ENTRY_POINT_RENAME(uprv_decNumberZero) +#define uprv_deleteConditionalCE32 U_ICU_ENTRY_POINT_RENAME(uprv_deleteConditionalCE32) +#define uprv_deleteUObject U_ICU_ENTRY_POINT_RENAME(uprv_deleteUObject) +#define uprv_dl_close U_ICU_ENTRY_POINT_RENAME(uprv_dl_close) +#define uprv_dl_open U_ICU_ENTRY_POINT_RENAME(uprv_dl_open) +#define uprv_dlsym_func U_ICU_ENTRY_POINT_RENAME(uprv_dlsym_func) +#define uprv_eastrncpy U_ICU_ENTRY_POINT_RENAME(uprv_eastrncpy) +#define uprv_ebcdicFromAscii U_ICU_ENTRY_POINT_RENAME(uprv_ebcdicFromAscii) +#define uprv_ebcdicToAscii U_ICU_ENTRY_POINT_RENAME(uprv_ebcdicToAscii) +#define uprv_ebcdicToLowercaseAscii U_ICU_ENTRY_POINT_RENAME(uprv_ebcdicToLowercaseAscii) +#define uprv_ebcdictolower U_ICU_ENTRY_POINT_RENAME(uprv_ebcdictolower) +#define uprv_fabs U_ICU_ENTRY_POINT_RENAME(uprv_fabs) +#define uprv_floor U_ICU_ENTRY_POINT_RENAME(uprv_floor) +#define uprv_fmax U_ICU_ENTRY_POINT_RENAME(uprv_fmax) +#define uprv_fmin U_ICU_ENTRY_POINT_RENAME(uprv_fmin) +#define uprv_fmod U_ICU_ENTRY_POINT_RENAME(uprv_fmod) +#define uprv_free U_ICU_ENTRY_POINT_RENAME(uprv_free) +#define uprv_getCharNameCharacters U_ICU_ENTRY_POINT_RENAME(uprv_getCharNameCharacters) +#define uprv_getDefaultLocaleID U_ICU_ENTRY_POINT_RENAME(uprv_getDefaultLocaleID) +#define uprv_getInfinity U_ICU_ENTRY_POINT_RENAME(uprv_getInfinity) +#define uprv_getMaxCharNameLength U_ICU_ENTRY_POINT_RENAME(uprv_getMaxCharNameLength) +#define uprv_getMaxValues U_ICU_ENTRY_POINT_RENAME(uprv_getMaxValues) +#define uprv_getNaN U_ICU_ENTRY_POINT_RENAME(uprv_getNaN) +#define uprv_getRawUTCtime U_ICU_ENTRY_POINT_RENAME(uprv_getRawUTCtime) +#define uprv_getStaticCurrencyName U_ICU_ENTRY_POINT_RENAME(uprv_getStaticCurrencyName) +#define uprv_getUTCtime U_ICU_ENTRY_POINT_RENAME(uprv_getUTCtime) +#define uprv_int32Comparator U_ICU_ENTRY_POINT_RENAME(uprv_int32Comparator) +#define uprv_isASCIILetter U_ICU_ENTRY_POINT_RENAME(uprv_isASCIILetter) +#define uprv_isEbcdicAtSign U_ICU_ENTRY_POINT_RENAME(uprv_isEbcdicAtSign) +#define uprv_isInfinite U_ICU_ENTRY_POINT_RENAME(uprv_isInfinite) +#define uprv_isInvariantString U_ICU_ENTRY_POINT_RENAME(uprv_isInvariantString) +#define uprv_isInvariantUString U_ICU_ENTRY_POINT_RENAME(uprv_isInvariantUString) +#define uprv_isNaN U_ICU_ENTRY_POINT_RENAME(uprv_isNaN) +#define uprv_isNegativeInfinity U_ICU_ENTRY_POINT_RENAME(uprv_isNegativeInfinity) +#define uprv_isPositiveInfinity U_ICU_ENTRY_POINT_RENAME(uprv_isPositiveInfinity) +#define uprv_itou U_ICU_ENTRY_POINT_RENAME(uprv_itou) +#define uprv_log U_ICU_ENTRY_POINT_RENAME(uprv_log) +#define uprv_malloc U_ICU_ENTRY_POINT_RENAME(uprv_malloc) +#define uprv_mapFile U_ICU_ENTRY_POINT_RENAME(uprv_mapFile) +#define uprv_max U_ICU_ENTRY_POINT_RENAME(uprv_max) +#define uprv_maxMantissa U_ICU_ENTRY_POINT_RENAME(uprv_maxMantissa) +#define uprv_maximumPtr U_ICU_ENTRY_POINT_RENAME(uprv_maximumPtr) +#define uprv_min U_ICU_ENTRY_POINT_RENAME(uprv_min) +#define uprv_modf U_ICU_ENTRY_POINT_RENAME(uprv_modf) +#define uprv_mul32_overflow U_ICU_ENTRY_POINT_RENAME(uprv_mul32_overflow) +#define uprv_parseCurrency U_ICU_ENTRY_POINT_RENAME(uprv_parseCurrency) +#define uprv_pathIsAbsolute U_ICU_ENTRY_POINT_RENAME(uprv_pathIsAbsolute) +#define uprv_pow U_ICU_ENTRY_POINT_RENAME(uprv_pow) +#define uprv_pow10 U_ICU_ENTRY_POINT_RENAME(uprv_pow10) +#define uprv_realloc U_ICU_ENTRY_POINT_RENAME(uprv_realloc) +#define uprv_round U_ICU_ENTRY_POINT_RENAME(uprv_round) +#define uprv_sortArray U_ICU_ENTRY_POINT_RENAME(uprv_sortArray) +#define uprv_stableBinarySearch U_ICU_ENTRY_POINT_RENAME(uprv_stableBinarySearch) +#define uprv_strCompare U_ICU_ENTRY_POINT_RENAME(uprv_strCompare) +#define uprv_strdup U_ICU_ENTRY_POINT_RENAME(uprv_strdup) +#define uprv_stricmp U_ICU_ENTRY_POINT_RENAME(uprv_stricmp) +#define uprv_strndup U_ICU_ENTRY_POINT_RENAME(uprv_strndup) +#define uprv_strnicmp U_ICU_ENTRY_POINT_RENAME(uprv_strnicmp) +#define uprv_syntaxError U_ICU_ENTRY_POINT_RENAME(uprv_syntaxError) +#define uprv_timezone U_ICU_ENTRY_POINT_RENAME(uprv_timezone) +#define uprv_toupper U_ICU_ENTRY_POINT_RENAME(uprv_toupper) +#define uprv_trunc U_ICU_ENTRY_POINT_RENAME(uprv_trunc) +#define uprv_tzname U_ICU_ENTRY_POINT_RENAME(uprv_tzname) +#define uprv_tzname_clear_cache U_ICU_ENTRY_POINT_RENAME(uprv_tzname_clear_cache) +#define uprv_tzset U_ICU_ENTRY_POINT_RENAME(uprv_tzset) +#define uprv_uint16Comparator U_ICU_ENTRY_POINT_RENAME(uprv_uint16Comparator) +#define uprv_uint32Comparator U_ICU_ENTRY_POINT_RENAME(uprv_uint32Comparator) +#define uprv_unmapFile U_ICU_ENTRY_POINT_RENAME(uprv_unmapFile) +#define upvec_cloneArray U_ICU_ENTRY_POINT_RENAME(upvec_cloneArray) +#define upvec_close U_ICU_ENTRY_POINT_RENAME(upvec_close) +#define upvec_compact U_ICU_ENTRY_POINT_RENAME(upvec_compact) +#define upvec_compactToUTrie2Handler U_ICU_ENTRY_POINT_RENAME(upvec_compactToUTrie2Handler) +#define upvec_compactToUTrie2WithRowIndexes U_ICU_ENTRY_POINT_RENAME(upvec_compactToUTrie2WithRowIndexes) +#define upvec_getArray U_ICU_ENTRY_POINT_RENAME(upvec_getArray) +#define upvec_getRow U_ICU_ENTRY_POINT_RENAME(upvec_getRow) +#define upvec_getValue U_ICU_ENTRY_POINT_RENAME(upvec_getValue) +#define upvec_open U_ICU_ENTRY_POINT_RENAME(upvec_open) +#define upvec_setValue U_ICU_ENTRY_POINT_RENAME(upvec_setValue) +#define uregex_appendReplacement U_ICU_ENTRY_POINT_RENAME(uregex_appendReplacement) +#define uregex_appendReplacementUText U_ICU_ENTRY_POINT_RENAME(uregex_appendReplacementUText) +#define uregex_appendTail U_ICU_ENTRY_POINT_RENAME(uregex_appendTail) +#define uregex_appendTailUText U_ICU_ENTRY_POINT_RENAME(uregex_appendTailUText) +#define uregex_clone U_ICU_ENTRY_POINT_RENAME(uregex_clone) +#define uregex_close U_ICU_ENTRY_POINT_RENAME(uregex_close) +#define uregex_end U_ICU_ENTRY_POINT_RENAME(uregex_end) +#define uregex_end64 U_ICU_ENTRY_POINT_RENAME(uregex_end64) +#define uregex_find U_ICU_ENTRY_POINT_RENAME(uregex_find) +#define uregex_find64 U_ICU_ENTRY_POINT_RENAME(uregex_find64) +#define uregex_findNext U_ICU_ENTRY_POINT_RENAME(uregex_findNext) +#define uregex_flags U_ICU_ENTRY_POINT_RENAME(uregex_flags) +#define uregex_getFindProgressCallback U_ICU_ENTRY_POINT_RENAME(uregex_getFindProgressCallback) +#define uregex_getMatchCallback U_ICU_ENTRY_POINT_RENAME(uregex_getMatchCallback) +#define uregex_getStackLimit U_ICU_ENTRY_POINT_RENAME(uregex_getStackLimit) +#define uregex_getText U_ICU_ENTRY_POINT_RENAME(uregex_getText) +#define uregex_getTimeLimit U_ICU_ENTRY_POINT_RENAME(uregex_getTimeLimit) +#define uregex_getUText U_ICU_ENTRY_POINT_RENAME(uregex_getUText) +#define uregex_group U_ICU_ENTRY_POINT_RENAME(uregex_group) +#define uregex_groupCount U_ICU_ENTRY_POINT_RENAME(uregex_groupCount) +#define uregex_groupNumberFromCName U_ICU_ENTRY_POINT_RENAME(uregex_groupNumberFromCName) +#define uregex_groupNumberFromName U_ICU_ENTRY_POINT_RENAME(uregex_groupNumberFromName) +#define uregex_groupUText U_ICU_ENTRY_POINT_RENAME(uregex_groupUText) +#define uregex_hasAnchoringBounds U_ICU_ENTRY_POINT_RENAME(uregex_hasAnchoringBounds) +#define uregex_hasTransparentBounds U_ICU_ENTRY_POINT_RENAME(uregex_hasTransparentBounds) +#define uregex_hitEnd U_ICU_ENTRY_POINT_RENAME(uregex_hitEnd) +#define uregex_lookingAt U_ICU_ENTRY_POINT_RENAME(uregex_lookingAt) +#define uregex_lookingAt64 U_ICU_ENTRY_POINT_RENAME(uregex_lookingAt64) +#define uregex_matches U_ICU_ENTRY_POINT_RENAME(uregex_matches) +#define uregex_matches64 U_ICU_ENTRY_POINT_RENAME(uregex_matches64) +#define uregex_open U_ICU_ENTRY_POINT_RENAME(uregex_open) +#define uregex_openC U_ICU_ENTRY_POINT_RENAME(uregex_openC) +#define uregex_openUText U_ICU_ENTRY_POINT_RENAME(uregex_openUText) +#define uregex_pattern U_ICU_ENTRY_POINT_RENAME(uregex_pattern) +#define uregex_patternUText U_ICU_ENTRY_POINT_RENAME(uregex_patternUText) +#define uregex_refreshUText U_ICU_ENTRY_POINT_RENAME(uregex_refreshUText) +#define uregex_regionEnd U_ICU_ENTRY_POINT_RENAME(uregex_regionEnd) +#define uregex_regionEnd64 U_ICU_ENTRY_POINT_RENAME(uregex_regionEnd64) +#define uregex_regionStart U_ICU_ENTRY_POINT_RENAME(uregex_regionStart) +#define uregex_regionStart64 U_ICU_ENTRY_POINT_RENAME(uregex_regionStart64) +#define uregex_replaceAll U_ICU_ENTRY_POINT_RENAME(uregex_replaceAll) +#define uregex_replaceAllUText U_ICU_ENTRY_POINT_RENAME(uregex_replaceAllUText) +#define uregex_replaceFirst U_ICU_ENTRY_POINT_RENAME(uregex_replaceFirst) +#define uregex_replaceFirstUText U_ICU_ENTRY_POINT_RENAME(uregex_replaceFirstUText) +#define uregex_requireEnd U_ICU_ENTRY_POINT_RENAME(uregex_requireEnd) +#define uregex_reset U_ICU_ENTRY_POINT_RENAME(uregex_reset) +#define uregex_reset64 U_ICU_ENTRY_POINT_RENAME(uregex_reset64) +#define uregex_setFindProgressCallback U_ICU_ENTRY_POINT_RENAME(uregex_setFindProgressCallback) +#define uregex_setMatchCallback U_ICU_ENTRY_POINT_RENAME(uregex_setMatchCallback) +#define uregex_setRegion U_ICU_ENTRY_POINT_RENAME(uregex_setRegion) +#define uregex_setRegion64 U_ICU_ENTRY_POINT_RENAME(uregex_setRegion64) +#define uregex_setRegionAndStart U_ICU_ENTRY_POINT_RENAME(uregex_setRegionAndStart) +#define uregex_setStackLimit U_ICU_ENTRY_POINT_RENAME(uregex_setStackLimit) +#define uregex_setText U_ICU_ENTRY_POINT_RENAME(uregex_setText) +#define uregex_setTimeLimit U_ICU_ENTRY_POINT_RENAME(uregex_setTimeLimit) +#define uregex_setUText U_ICU_ENTRY_POINT_RENAME(uregex_setUText) +#define uregex_split U_ICU_ENTRY_POINT_RENAME(uregex_split) +#define uregex_splitUText U_ICU_ENTRY_POINT_RENAME(uregex_splitUText) +#define uregex_start U_ICU_ENTRY_POINT_RENAME(uregex_start) +#define uregex_start64 U_ICU_ENTRY_POINT_RENAME(uregex_start64) +#define uregex_ucstr_unescape_charAt U_ICU_ENTRY_POINT_RENAME(uregex_ucstr_unescape_charAt) +#define uregex_useAnchoringBounds U_ICU_ENTRY_POINT_RENAME(uregex_useAnchoringBounds) +#define uregex_useTransparentBounds U_ICU_ENTRY_POINT_RENAME(uregex_useTransparentBounds) +#define uregex_utext_unescape_charAt U_ICU_ENTRY_POINT_RENAME(uregex_utext_unescape_charAt) +#define uregion_areEqual U_ICU_ENTRY_POINT_RENAME(uregion_areEqual) +#define uregion_contains U_ICU_ENTRY_POINT_RENAME(uregion_contains) +#define uregion_getAvailable U_ICU_ENTRY_POINT_RENAME(uregion_getAvailable) +#define uregion_getContainedRegions U_ICU_ENTRY_POINT_RENAME(uregion_getContainedRegions) +#define uregion_getContainedRegionsOfType U_ICU_ENTRY_POINT_RENAME(uregion_getContainedRegionsOfType) +#define uregion_getContainingRegion U_ICU_ENTRY_POINT_RENAME(uregion_getContainingRegion) +#define uregion_getContainingRegionOfType U_ICU_ENTRY_POINT_RENAME(uregion_getContainingRegionOfType) +#define uregion_getNumericCode U_ICU_ENTRY_POINT_RENAME(uregion_getNumericCode) +#define uregion_getPreferredValues U_ICU_ENTRY_POINT_RENAME(uregion_getPreferredValues) +#define uregion_getRegionCode U_ICU_ENTRY_POINT_RENAME(uregion_getRegionCode) +#define uregion_getRegionFromCode U_ICU_ENTRY_POINT_RENAME(uregion_getRegionFromCode) +#define uregion_getRegionFromNumericCode U_ICU_ENTRY_POINT_RENAME(uregion_getRegionFromNumericCode) +#define uregion_getType U_ICU_ENTRY_POINT_RENAME(uregion_getType) +#define ureldatefmt_close U_ICU_ENTRY_POINT_RENAME(ureldatefmt_close) +#define ureldatefmt_closeResult U_ICU_ENTRY_POINT_RENAME(ureldatefmt_closeResult) +#define ureldatefmt_combineDateAndTime U_ICU_ENTRY_POINT_RENAME(ureldatefmt_combineDateAndTime) +#define ureldatefmt_format U_ICU_ENTRY_POINT_RENAME(ureldatefmt_format) +#define ureldatefmt_formatNumeric U_ICU_ENTRY_POINT_RENAME(ureldatefmt_formatNumeric) +#define ureldatefmt_formatNumericToResult U_ICU_ENTRY_POINT_RENAME(ureldatefmt_formatNumericToResult) +#define ureldatefmt_formatToResult U_ICU_ENTRY_POINT_RENAME(ureldatefmt_formatToResult) +#define ureldatefmt_open U_ICU_ENTRY_POINT_RENAME(ureldatefmt_open) +#define ureldatefmt_openResult U_ICU_ENTRY_POINT_RENAME(ureldatefmt_openResult) +#define ureldatefmt_resultAsValue U_ICU_ENTRY_POINT_RENAME(ureldatefmt_resultAsValue) +#define ures_close U_ICU_ENTRY_POINT_RENAME(ures_close) +#define ures_copyResb U_ICU_ENTRY_POINT_RENAME(ures_copyResb) +#define ures_countArrayItems U_ICU_ENTRY_POINT_RENAME(ures_countArrayItems) +#define ures_findResource U_ICU_ENTRY_POINT_RENAME(ures_findResource) +#define ures_findSubResource U_ICU_ENTRY_POINT_RENAME(ures_findSubResource) +#define ures_getAllItemsWithFallback U_ICU_ENTRY_POINT_RENAME(ures_getAllItemsWithFallback) +#define ures_getBinary U_ICU_ENTRY_POINT_RENAME(ures_getBinary) +#define ures_getByIndex U_ICU_ENTRY_POINT_RENAME(ures_getByIndex) +#define ures_getByKey U_ICU_ENTRY_POINT_RENAME(ures_getByKey) +#define ures_getByKeyWithFallback U_ICU_ENTRY_POINT_RENAME(ures_getByKeyWithFallback) +#define ures_getFunctionalEquivalent U_ICU_ENTRY_POINT_RENAME(ures_getFunctionalEquivalent) +#define ures_getInt U_ICU_ENTRY_POINT_RENAME(ures_getInt) +#define ures_getIntVector U_ICU_ENTRY_POINT_RENAME(ures_getIntVector) +#define ures_getKey U_ICU_ENTRY_POINT_RENAME(ures_getKey) +#define ures_getKeywordValues U_ICU_ENTRY_POINT_RENAME(ures_getKeywordValues) +#define ures_getLocale U_ICU_ENTRY_POINT_RENAME(ures_getLocale) +#define ures_getLocaleByType U_ICU_ENTRY_POINT_RENAME(ures_getLocaleByType) +#define ures_getLocaleInternal U_ICU_ENTRY_POINT_RENAME(ures_getLocaleInternal) +#define ures_getName U_ICU_ENTRY_POINT_RENAME(ures_getName) +#define ures_getNextResource U_ICU_ENTRY_POINT_RENAME(ures_getNextResource) +#define ures_getNextString U_ICU_ENTRY_POINT_RENAME(ures_getNextString) +#define ures_getSize U_ICU_ENTRY_POINT_RENAME(ures_getSize) +#define ures_getString U_ICU_ENTRY_POINT_RENAME(ures_getString) +#define ures_getStringByIndex U_ICU_ENTRY_POINT_RENAME(ures_getStringByIndex) +#define ures_getStringByKey U_ICU_ENTRY_POINT_RENAME(ures_getStringByKey) +#define ures_getStringByKeyWithFallback U_ICU_ENTRY_POINT_RENAME(ures_getStringByKeyWithFallback) +#define ures_getType U_ICU_ENTRY_POINT_RENAME(ures_getType) +#define ures_getUInt U_ICU_ENTRY_POINT_RENAME(ures_getUInt) +#define ures_getUTF8String U_ICU_ENTRY_POINT_RENAME(ures_getUTF8String) +#define ures_getUTF8StringByIndex U_ICU_ENTRY_POINT_RENAME(ures_getUTF8StringByIndex) +#define ures_getUTF8StringByKey U_ICU_ENTRY_POINT_RENAME(ures_getUTF8StringByKey) +#define ures_getValueWithFallback U_ICU_ENTRY_POINT_RENAME(ures_getValueWithFallback) +#define ures_getVersion U_ICU_ENTRY_POINT_RENAME(ures_getVersion) +#define ures_getVersionByKey U_ICU_ENTRY_POINT_RENAME(ures_getVersionByKey) +#define ures_getVersionNumber U_ICU_ENTRY_POINT_RENAME(ures_getVersionNumber) +#define ures_getVersionNumberInternal U_ICU_ENTRY_POINT_RENAME(ures_getVersionNumberInternal) +#define ures_hasNext U_ICU_ENTRY_POINT_RENAME(ures_hasNext) +#define ures_initStackObject U_ICU_ENTRY_POINT_RENAME(ures_initStackObject) +#define ures_open U_ICU_ENTRY_POINT_RENAME(ures_open) +#define ures_openAvailableLocales U_ICU_ENTRY_POINT_RENAME(ures_openAvailableLocales) +#define ures_openDirect U_ICU_ENTRY_POINT_RENAME(ures_openDirect) +#define ures_openDirectFillIn U_ICU_ENTRY_POINT_RENAME(ures_openDirectFillIn) +#define ures_openFillIn U_ICU_ENTRY_POINT_RENAME(ures_openFillIn) +#define ures_openNoDefault U_ICU_ENTRY_POINT_RENAME(ures_openNoDefault) +#define ures_openU U_ICU_ENTRY_POINT_RENAME(ures_openU) +#define ures_resetIterator U_ICU_ENTRY_POINT_RENAME(ures_resetIterator) +#define ures_swap U_ICU_ENTRY_POINT_RENAME(ures_swap) +#define uscript_breaksBetweenLetters U_ICU_ENTRY_POINT_RENAME(uscript_breaksBetweenLetters) +#define uscript_closeRun U_ICU_ENTRY_POINT_RENAME(uscript_closeRun) +#define uscript_getCode U_ICU_ENTRY_POINT_RENAME(uscript_getCode) +#define uscript_getName U_ICU_ENTRY_POINT_RENAME(uscript_getName) +#define uscript_getSampleString U_ICU_ENTRY_POINT_RENAME(uscript_getSampleString) +#define uscript_getSampleUnicodeString U_ICU_ENTRY_POINT_RENAME(uscript_getSampleUnicodeString) +#define uscript_getScript U_ICU_ENTRY_POINT_RENAME(uscript_getScript) +#define uscript_getScriptExtensions U_ICU_ENTRY_POINT_RENAME(uscript_getScriptExtensions) +#define uscript_getShortName U_ICU_ENTRY_POINT_RENAME(uscript_getShortName) +#define uscript_getUsage U_ICU_ENTRY_POINT_RENAME(uscript_getUsage) +#define uscript_hasScript U_ICU_ENTRY_POINT_RENAME(uscript_hasScript) +#define uscript_isCased U_ICU_ENTRY_POINT_RENAME(uscript_isCased) +#define uscript_isRightToLeft U_ICU_ENTRY_POINT_RENAME(uscript_isRightToLeft) +#define uscript_nextRun U_ICU_ENTRY_POINT_RENAME(uscript_nextRun) +#define uscript_openRun U_ICU_ENTRY_POINT_RENAME(uscript_openRun) +#define uscript_resetRun U_ICU_ENTRY_POINT_RENAME(uscript_resetRun) +#define uscript_setRunText U_ICU_ENTRY_POINT_RENAME(uscript_setRunText) +#define usearch_close U_ICU_ENTRY_POINT_RENAME(usearch_close) +#define usearch_first U_ICU_ENTRY_POINT_RENAME(usearch_first) +#define usearch_following U_ICU_ENTRY_POINT_RENAME(usearch_following) +#define usearch_getAttribute U_ICU_ENTRY_POINT_RENAME(usearch_getAttribute) +#define usearch_getBreakIterator U_ICU_ENTRY_POINT_RENAME(usearch_getBreakIterator) +#define usearch_getCollator U_ICU_ENTRY_POINT_RENAME(usearch_getCollator) +#define usearch_getMatchedLength U_ICU_ENTRY_POINT_RENAME(usearch_getMatchedLength) +#define usearch_getMatchedStart U_ICU_ENTRY_POINT_RENAME(usearch_getMatchedStart) +#define usearch_getMatchedText U_ICU_ENTRY_POINT_RENAME(usearch_getMatchedText) +#define usearch_getOffset U_ICU_ENTRY_POINT_RENAME(usearch_getOffset) +#define usearch_getPattern U_ICU_ENTRY_POINT_RENAME(usearch_getPattern) +#define usearch_getText U_ICU_ENTRY_POINT_RENAME(usearch_getText) +#define usearch_handleNextCanonical U_ICU_ENTRY_POINT_RENAME(usearch_handleNextCanonical) +#define usearch_handleNextExact U_ICU_ENTRY_POINT_RENAME(usearch_handleNextExact) +#define usearch_handlePreviousCanonical U_ICU_ENTRY_POINT_RENAME(usearch_handlePreviousCanonical) +#define usearch_handlePreviousExact U_ICU_ENTRY_POINT_RENAME(usearch_handlePreviousExact) +#define usearch_last U_ICU_ENTRY_POINT_RENAME(usearch_last) +#define usearch_next U_ICU_ENTRY_POINT_RENAME(usearch_next) +#define usearch_open U_ICU_ENTRY_POINT_RENAME(usearch_open) +#define usearch_openFromCollator U_ICU_ENTRY_POINT_RENAME(usearch_openFromCollator) +#define usearch_preceding U_ICU_ENTRY_POINT_RENAME(usearch_preceding) +#define usearch_previous U_ICU_ENTRY_POINT_RENAME(usearch_previous) +#define usearch_reset U_ICU_ENTRY_POINT_RENAME(usearch_reset) +#define usearch_search U_ICU_ENTRY_POINT_RENAME(usearch_search) +#define usearch_searchBackwards U_ICU_ENTRY_POINT_RENAME(usearch_searchBackwards) +#define usearch_setAttribute U_ICU_ENTRY_POINT_RENAME(usearch_setAttribute) +#define usearch_setBreakIterator U_ICU_ENTRY_POINT_RENAME(usearch_setBreakIterator) +#define usearch_setCollator U_ICU_ENTRY_POINT_RENAME(usearch_setCollator) +#define usearch_setOffset U_ICU_ENTRY_POINT_RENAME(usearch_setOffset) +#define usearch_setPattern U_ICU_ENTRY_POINT_RENAME(usearch_setPattern) +#define usearch_setText U_ICU_ENTRY_POINT_RENAME(usearch_setText) +#define uset_add U_ICU_ENTRY_POINT_RENAME(uset_add) +#define uset_addAll U_ICU_ENTRY_POINT_RENAME(uset_addAll) +#define uset_addAllCodePoints U_ICU_ENTRY_POINT_RENAME(uset_addAllCodePoints) +#define uset_addRange U_ICU_ENTRY_POINT_RENAME(uset_addRange) +#define uset_addString U_ICU_ENTRY_POINT_RENAME(uset_addString) +#define uset_applyIntPropertyValue U_ICU_ENTRY_POINT_RENAME(uset_applyIntPropertyValue) +#define uset_applyPattern U_ICU_ENTRY_POINT_RENAME(uset_applyPattern) +#define uset_applyPropertyAlias U_ICU_ENTRY_POINT_RENAME(uset_applyPropertyAlias) +#define uset_charAt U_ICU_ENTRY_POINT_RENAME(uset_charAt) +#define uset_clear U_ICU_ENTRY_POINT_RENAME(uset_clear) +#define uset_clone U_ICU_ENTRY_POINT_RENAME(uset_clone) +#define uset_cloneAsThawed U_ICU_ENTRY_POINT_RENAME(uset_cloneAsThawed) +#define uset_close U_ICU_ENTRY_POINT_RENAME(uset_close) +#define uset_closeOver U_ICU_ENTRY_POINT_RENAME(uset_closeOver) +#define uset_compact U_ICU_ENTRY_POINT_RENAME(uset_compact) +#define uset_complement U_ICU_ENTRY_POINT_RENAME(uset_complement) +#define uset_complementAll U_ICU_ENTRY_POINT_RENAME(uset_complementAll) +#define uset_contains U_ICU_ENTRY_POINT_RENAME(uset_contains) +#define uset_containsAll U_ICU_ENTRY_POINT_RENAME(uset_containsAll) +#define uset_containsAllCodePoints U_ICU_ENTRY_POINT_RENAME(uset_containsAllCodePoints) +#define uset_containsNone U_ICU_ENTRY_POINT_RENAME(uset_containsNone) +#define uset_containsRange U_ICU_ENTRY_POINT_RENAME(uset_containsRange) +#define uset_containsSome U_ICU_ENTRY_POINT_RENAME(uset_containsSome) +#define uset_containsString U_ICU_ENTRY_POINT_RENAME(uset_containsString) +#define uset_equals U_ICU_ENTRY_POINT_RENAME(uset_equals) +#define uset_freeze U_ICU_ENTRY_POINT_RENAME(uset_freeze) +#define uset_getItem U_ICU_ENTRY_POINT_RENAME(uset_getItem) +#define uset_getItemCount U_ICU_ENTRY_POINT_RENAME(uset_getItemCount) +#define uset_getSerializedRange U_ICU_ENTRY_POINT_RENAME(uset_getSerializedRange) +#define uset_getSerializedRangeCount U_ICU_ENTRY_POINT_RENAME(uset_getSerializedRangeCount) +#define uset_getSerializedSet U_ICU_ENTRY_POINT_RENAME(uset_getSerializedSet) +#define uset_indexOf U_ICU_ENTRY_POINT_RENAME(uset_indexOf) +#define uset_isEmpty U_ICU_ENTRY_POINT_RENAME(uset_isEmpty) +#define uset_isFrozen U_ICU_ENTRY_POINT_RENAME(uset_isFrozen) +#define uset_open U_ICU_ENTRY_POINT_RENAME(uset_open) +#define uset_openEmpty U_ICU_ENTRY_POINT_RENAME(uset_openEmpty) +#define uset_openPattern U_ICU_ENTRY_POINT_RENAME(uset_openPattern) +#define uset_openPatternOptions U_ICU_ENTRY_POINT_RENAME(uset_openPatternOptions) +#define uset_remove U_ICU_ENTRY_POINT_RENAME(uset_remove) +#define uset_removeAll U_ICU_ENTRY_POINT_RENAME(uset_removeAll) +#define uset_removeAllStrings U_ICU_ENTRY_POINT_RENAME(uset_removeAllStrings) +#define uset_removeRange U_ICU_ENTRY_POINT_RENAME(uset_removeRange) +#define uset_removeString U_ICU_ENTRY_POINT_RENAME(uset_removeString) +#define uset_resemblesPattern U_ICU_ENTRY_POINT_RENAME(uset_resemblesPattern) +#define uset_retain U_ICU_ENTRY_POINT_RENAME(uset_retain) +#define uset_retainAll U_ICU_ENTRY_POINT_RENAME(uset_retainAll) +#define uset_serialize U_ICU_ENTRY_POINT_RENAME(uset_serialize) +#define uset_serializedContains U_ICU_ENTRY_POINT_RENAME(uset_serializedContains) +#define uset_set U_ICU_ENTRY_POINT_RENAME(uset_set) +#define uset_setSerializedToOne U_ICU_ENTRY_POINT_RENAME(uset_setSerializedToOne) +#define uset_size U_ICU_ENTRY_POINT_RENAME(uset_size) +#define uset_span U_ICU_ENTRY_POINT_RENAME(uset_span) +#define uset_spanBack U_ICU_ENTRY_POINT_RENAME(uset_spanBack) +#define uset_spanBackUTF8 U_ICU_ENTRY_POINT_RENAME(uset_spanBackUTF8) +#define uset_spanUTF8 U_ICU_ENTRY_POINT_RENAME(uset_spanUTF8) +#define uset_toPattern U_ICU_ENTRY_POINT_RENAME(uset_toPattern) +#define uspoof_areConfusable U_ICU_ENTRY_POINT_RENAME(uspoof_areConfusable) +#define uspoof_areConfusableUTF8 U_ICU_ENTRY_POINT_RENAME(uspoof_areConfusableUTF8) +#define uspoof_areConfusableUnicodeString U_ICU_ENTRY_POINT_RENAME(uspoof_areConfusableUnicodeString) +#define uspoof_check U_ICU_ENTRY_POINT_RENAME(uspoof_check) +#define uspoof_check2 U_ICU_ENTRY_POINT_RENAME(uspoof_check2) +#define uspoof_check2UTF8 U_ICU_ENTRY_POINT_RENAME(uspoof_check2UTF8) +#define uspoof_check2UnicodeString U_ICU_ENTRY_POINT_RENAME(uspoof_check2UnicodeString) +#define uspoof_checkUTF8 U_ICU_ENTRY_POINT_RENAME(uspoof_checkUTF8) +#define uspoof_checkUnicodeString U_ICU_ENTRY_POINT_RENAME(uspoof_checkUnicodeString) +#define uspoof_clone U_ICU_ENTRY_POINT_RENAME(uspoof_clone) +#define uspoof_close U_ICU_ENTRY_POINT_RENAME(uspoof_close) +#define uspoof_closeCheckResult U_ICU_ENTRY_POINT_RENAME(uspoof_closeCheckResult) +#define uspoof_getAllowedChars U_ICU_ENTRY_POINT_RENAME(uspoof_getAllowedChars) +#define uspoof_getAllowedLocales U_ICU_ENTRY_POINT_RENAME(uspoof_getAllowedLocales) +#define uspoof_getAllowedUnicodeSet U_ICU_ENTRY_POINT_RENAME(uspoof_getAllowedUnicodeSet) +#define uspoof_getCheckResultChecks U_ICU_ENTRY_POINT_RENAME(uspoof_getCheckResultChecks) +#define uspoof_getCheckResultNumerics U_ICU_ENTRY_POINT_RENAME(uspoof_getCheckResultNumerics) +#define uspoof_getCheckResultRestrictionLevel U_ICU_ENTRY_POINT_RENAME(uspoof_getCheckResultRestrictionLevel) +#define uspoof_getChecks U_ICU_ENTRY_POINT_RENAME(uspoof_getChecks) +#define uspoof_getInclusionSet U_ICU_ENTRY_POINT_RENAME(uspoof_getInclusionSet) +#define uspoof_getInclusionUnicodeSet U_ICU_ENTRY_POINT_RENAME(uspoof_getInclusionUnicodeSet) +#define uspoof_getRecommendedSet U_ICU_ENTRY_POINT_RENAME(uspoof_getRecommendedSet) +#define uspoof_getRecommendedUnicodeSet U_ICU_ENTRY_POINT_RENAME(uspoof_getRecommendedUnicodeSet) +#define uspoof_getRestrictionLevel U_ICU_ENTRY_POINT_RENAME(uspoof_getRestrictionLevel) +#define uspoof_getSkeleton U_ICU_ENTRY_POINT_RENAME(uspoof_getSkeleton) +#define uspoof_getSkeletonUTF8 U_ICU_ENTRY_POINT_RENAME(uspoof_getSkeletonUTF8) +#define uspoof_getSkeletonUnicodeString U_ICU_ENTRY_POINT_RENAME(uspoof_getSkeletonUnicodeString) +#define uspoof_internalInitStatics U_ICU_ENTRY_POINT_RENAME(uspoof_internalInitStatics) +#define uspoof_open U_ICU_ENTRY_POINT_RENAME(uspoof_open) +#define uspoof_openCheckResult U_ICU_ENTRY_POINT_RENAME(uspoof_openCheckResult) +#define uspoof_openFromSerialized U_ICU_ENTRY_POINT_RENAME(uspoof_openFromSerialized) +#define uspoof_openFromSource U_ICU_ENTRY_POINT_RENAME(uspoof_openFromSource) +#define uspoof_serialize U_ICU_ENTRY_POINT_RENAME(uspoof_serialize) +#define uspoof_setAllowedChars U_ICU_ENTRY_POINT_RENAME(uspoof_setAllowedChars) +#define uspoof_setAllowedLocales U_ICU_ENTRY_POINT_RENAME(uspoof_setAllowedLocales) +#define uspoof_setAllowedUnicodeSet U_ICU_ENTRY_POINT_RENAME(uspoof_setAllowedUnicodeSet) +#define uspoof_setChecks U_ICU_ENTRY_POINT_RENAME(uspoof_setChecks) +#define uspoof_setRestrictionLevel U_ICU_ENTRY_POINT_RENAME(uspoof_setRestrictionLevel) +#define uspoof_swap U_ICU_ENTRY_POINT_RENAME(uspoof_swap) +#define usprep_close U_ICU_ENTRY_POINT_RENAME(usprep_close) +#define usprep_open U_ICU_ENTRY_POINT_RENAME(usprep_open) +#define usprep_openByType U_ICU_ENTRY_POINT_RENAME(usprep_openByType) +#define usprep_prepare U_ICU_ENTRY_POINT_RENAME(usprep_prepare) +#define usprep_swap U_ICU_ENTRY_POINT_RENAME(usprep_swap) +#define ustr_hashCharsN U_ICU_ENTRY_POINT_RENAME(ustr_hashCharsN) +#define ustr_hashICharsN U_ICU_ENTRY_POINT_RENAME(ustr_hashICharsN) +#define ustr_hashUCharsN U_ICU_ENTRY_POINT_RENAME(ustr_hashUCharsN) +#define ustrcase_getCaseLocale U_ICU_ENTRY_POINT_RENAME(ustrcase_getCaseLocale) +#define ustrcase_getTitleBreakIterator U_ICU_ENTRY_POINT_RENAME(ustrcase_getTitleBreakIterator) +#define ustrcase_internalFold U_ICU_ENTRY_POINT_RENAME(ustrcase_internalFold) +#define ustrcase_internalToLower U_ICU_ENTRY_POINT_RENAME(ustrcase_internalToLower) +#define ustrcase_internalToTitle U_ICU_ENTRY_POINT_RENAME(ustrcase_internalToTitle) +#define ustrcase_internalToUpper U_ICU_ENTRY_POINT_RENAME(ustrcase_internalToUpper) +#define ustrcase_map U_ICU_ENTRY_POINT_RENAME(ustrcase_map) +#define ustrcase_mapWithOverlap U_ICU_ENTRY_POINT_RENAME(ustrcase_mapWithOverlap) +#define utext_char32At U_ICU_ENTRY_POINT_RENAME(utext_char32At) +#define utext_clone U_ICU_ENTRY_POINT_RENAME(utext_clone) +#define utext_close U_ICU_ENTRY_POINT_RENAME(utext_close) +#define utext_copy U_ICU_ENTRY_POINT_RENAME(utext_copy) +#define utext_current32 U_ICU_ENTRY_POINT_RENAME(utext_current32) +#define utext_equals U_ICU_ENTRY_POINT_RENAME(utext_equals) +#define utext_extract U_ICU_ENTRY_POINT_RENAME(utext_extract) +#define utext_freeze U_ICU_ENTRY_POINT_RENAME(utext_freeze) +#define utext_getNativeIndex U_ICU_ENTRY_POINT_RENAME(utext_getNativeIndex) +#define utext_getPreviousNativeIndex U_ICU_ENTRY_POINT_RENAME(utext_getPreviousNativeIndex) +#define utext_hasMetaData U_ICU_ENTRY_POINT_RENAME(utext_hasMetaData) +#define utext_isLengthExpensive U_ICU_ENTRY_POINT_RENAME(utext_isLengthExpensive) +#define utext_isWritable U_ICU_ENTRY_POINT_RENAME(utext_isWritable) +#define utext_moveIndex32 U_ICU_ENTRY_POINT_RENAME(utext_moveIndex32) +#define utext_nativeLength U_ICU_ENTRY_POINT_RENAME(utext_nativeLength) +#define utext_next32 U_ICU_ENTRY_POINT_RENAME(utext_next32) +#define utext_next32From U_ICU_ENTRY_POINT_RENAME(utext_next32From) +#define utext_openCharacterIterator U_ICU_ENTRY_POINT_RENAME(utext_openCharacterIterator) +#define utext_openConstUnicodeString U_ICU_ENTRY_POINT_RENAME(utext_openConstUnicodeString) +#define utext_openReplaceable U_ICU_ENTRY_POINT_RENAME(utext_openReplaceable) +#define utext_openUChars U_ICU_ENTRY_POINT_RENAME(utext_openUChars) +#define utext_openUTF8 U_ICU_ENTRY_POINT_RENAME(utext_openUTF8) +#define utext_openUnicodeString U_ICU_ENTRY_POINT_RENAME(utext_openUnicodeString) +#define utext_previous32 U_ICU_ENTRY_POINT_RENAME(utext_previous32) +#define utext_previous32From U_ICU_ENTRY_POINT_RENAME(utext_previous32From) +#define utext_replace U_ICU_ENTRY_POINT_RENAME(utext_replace) +#define utext_setNativeIndex U_ICU_ENTRY_POINT_RENAME(utext_setNativeIndex) +#define utext_setup U_ICU_ENTRY_POINT_RENAME(utext_setup) +#define utf8_appendCharSafeBody U_ICU_ENTRY_POINT_RENAME(utf8_appendCharSafeBody) +#define utf8_back1SafeBody U_ICU_ENTRY_POINT_RENAME(utf8_back1SafeBody) +#define utf8_countTrailBytes U_ICU_ENTRY_POINT_RENAME(utf8_countTrailBytes) +#define utf8_nextCharSafeBody U_ICU_ENTRY_POINT_RENAME(utf8_nextCharSafeBody) +#define utf8_prevCharSafeBody U_ICU_ENTRY_POINT_RENAME(utf8_prevCharSafeBody) +#define utmscale_fromInt64 U_ICU_ENTRY_POINT_RENAME(utmscale_fromInt64) +#define utmscale_getTimeScaleValue U_ICU_ENTRY_POINT_RENAME(utmscale_getTimeScaleValue) +#define utmscale_toInt64 U_ICU_ENTRY_POINT_RENAME(utmscale_toInt64) +#define utrace_cleanup U_ICU_ENTRY_POINT_RENAME(utrace_cleanup) +#define utrace_data U_ICU_ENTRY_POINT_RENAME(utrace_data) +#define utrace_entry U_ICU_ENTRY_POINT_RENAME(utrace_entry) +#define utrace_exit U_ICU_ENTRY_POINT_RENAME(utrace_exit) +#define utrace_format U_ICU_ENTRY_POINT_RENAME(utrace_format) +#define utrace_functionName U_ICU_ENTRY_POINT_RENAME(utrace_functionName) +#define utrace_getFunctions U_ICU_ENTRY_POINT_RENAME(utrace_getFunctions) +#define utrace_getLevel U_ICU_ENTRY_POINT_RENAME(utrace_getLevel) +#define utrace_setFunctions U_ICU_ENTRY_POINT_RENAME(utrace_setFunctions) +#define utrace_setLevel U_ICU_ENTRY_POINT_RENAME(utrace_setLevel) +#define utrace_vformat U_ICU_ENTRY_POINT_RENAME(utrace_vformat) +#define utrans_clone U_ICU_ENTRY_POINT_RENAME(utrans_clone) +#define utrans_close U_ICU_ENTRY_POINT_RENAME(utrans_close) +#define utrans_countAvailableIDs U_ICU_ENTRY_POINT_RENAME(utrans_countAvailableIDs) +#define utrans_getAvailableID U_ICU_ENTRY_POINT_RENAME(utrans_getAvailableID) +#define utrans_getID U_ICU_ENTRY_POINT_RENAME(utrans_getID) +#define utrans_getSourceSet U_ICU_ENTRY_POINT_RENAME(utrans_getSourceSet) +#define utrans_getUnicodeID U_ICU_ENTRY_POINT_RENAME(utrans_getUnicodeID) +#define utrans_open U_ICU_ENTRY_POINT_RENAME(utrans_open) +#define utrans_openIDs U_ICU_ENTRY_POINT_RENAME(utrans_openIDs) +#define utrans_openInverse U_ICU_ENTRY_POINT_RENAME(utrans_openInverse) +#define utrans_openU U_ICU_ENTRY_POINT_RENAME(utrans_openU) +#define utrans_register U_ICU_ENTRY_POINT_RENAME(utrans_register) +#define utrans_rep_caseContextIterator U_ICU_ENTRY_POINT_RENAME(utrans_rep_caseContextIterator) +#define utrans_setFilter U_ICU_ENTRY_POINT_RENAME(utrans_setFilter) +#define utrans_stripRules U_ICU_ENTRY_POINT_RENAME(utrans_stripRules) +#define utrans_toRules U_ICU_ENTRY_POINT_RENAME(utrans_toRules) +#define utrans_trans U_ICU_ENTRY_POINT_RENAME(utrans_trans) +#define utrans_transIncremental U_ICU_ENTRY_POINT_RENAME(utrans_transIncremental) +#define utrans_transIncrementalUChars U_ICU_ENTRY_POINT_RENAME(utrans_transIncrementalUChars) +#define utrans_transUChars U_ICU_ENTRY_POINT_RENAME(utrans_transUChars) +#define utrans_transliterator_cleanup U_ICU_ENTRY_POINT_RENAME(utrans_transliterator_cleanup) +#define utrans_unregister U_ICU_ENTRY_POINT_RENAME(utrans_unregister) +#define utrans_unregisterID U_ICU_ENTRY_POINT_RENAME(utrans_unregisterID) +#define utrie2_clone U_ICU_ENTRY_POINT_RENAME(utrie2_clone) +#define utrie2_cloneAsThawed U_ICU_ENTRY_POINT_RENAME(utrie2_cloneAsThawed) +#define utrie2_close U_ICU_ENTRY_POINT_RENAME(utrie2_close) +#define utrie2_enum U_ICU_ENTRY_POINT_RENAME(utrie2_enum) +#define utrie2_enumForLeadSurrogate U_ICU_ENTRY_POINT_RENAME(utrie2_enumForLeadSurrogate) +#define utrie2_freeze U_ICU_ENTRY_POINT_RENAME(utrie2_freeze) +#define utrie2_fromUTrie U_ICU_ENTRY_POINT_RENAME(utrie2_fromUTrie) +#define utrie2_get32 U_ICU_ENTRY_POINT_RENAME(utrie2_get32) +#define utrie2_get32FromLeadSurrogateCodeUnit U_ICU_ENTRY_POINT_RENAME(utrie2_get32FromLeadSurrogateCodeUnit) +#define utrie2_internalU8NextIndex U_ICU_ENTRY_POINT_RENAME(utrie2_internalU8NextIndex) +#define utrie2_internalU8PrevIndex U_ICU_ENTRY_POINT_RENAME(utrie2_internalU8PrevIndex) +#define utrie2_isFrozen U_ICU_ENTRY_POINT_RENAME(utrie2_isFrozen) +#define utrie2_open U_ICU_ENTRY_POINT_RENAME(utrie2_open) +#define utrie2_openDummy U_ICU_ENTRY_POINT_RENAME(utrie2_openDummy) +#define utrie2_openFromSerialized U_ICU_ENTRY_POINT_RENAME(utrie2_openFromSerialized) +#define utrie2_serialize U_ICU_ENTRY_POINT_RENAME(utrie2_serialize) +#define utrie2_set32 U_ICU_ENTRY_POINT_RENAME(utrie2_set32) +#define utrie2_set32ForLeadSurrogateCodeUnit U_ICU_ENTRY_POINT_RENAME(utrie2_set32ForLeadSurrogateCodeUnit) +#define utrie2_setRange32 U_ICU_ENTRY_POINT_RENAME(utrie2_setRange32) +#define utrie2_swap U_ICU_ENTRY_POINT_RENAME(utrie2_swap) +#define utrie_clone U_ICU_ENTRY_POINT_RENAME(utrie_clone) +#define utrie_close U_ICU_ENTRY_POINT_RENAME(utrie_close) +#define utrie_defaultGetFoldingOffset U_ICU_ENTRY_POINT_RENAME(utrie_defaultGetFoldingOffset) +#define utrie_enum U_ICU_ENTRY_POINT_RENAME(utrie_enum) +#define utrie_get32 U_ICU_ENTRY_POINT_RENAME(utrie_get32) +#define utrie_getData U_ICU_ENTRY_POINT_RENAME(utrie_getData) +#define utrie_open U_ICU_ENTRY_POINT_RENAME(utrie_open) +#define utrie_serialize U_ICU_ENTRY_POINT_RENAME(utrie_serialize) +#define utrie_set32 U_ICU_ENTRY_POINT_RENAME(utrie_set32) +#define utrie_setRange32 U_ICU_ENTRY_POINT_RENAME(utrie_setRange32) +#define utrie_swap U_ICU_ENTRY_POINT_RENAME(utrie_swap) +#define utrie_swapAnyVersion U_ICU_ENTRY_POINT_RENAME(utrie_swapAnyVersion) +#define utrie_unserialize U_ICU_ENTRY_POINT_RENAME(utrie_unserialize) +#define utrie_unserializeDummy U_ICU_ENTRY_POINT_RENAME(utrie_unserializeDummy) +#define vzone_clone U_ICU_ENTRY_POINT_RENAME(vzone_clone) +#define vzone_close U_ICU_ENTRY_POINT_RENAME(vzone_close) +#define vzone_countTransitionRules U_ICU_ENTRY_POINT_RENAME(vzone_countTransitionRules) +#define vzone_equals U_ICU_ENTRY_POINT_RENAME(vzone_equals) +#define vzone_getDynamicClassID U_ICU_ENTRY_POINT_RENAME(vzone_getDynamicClassID) +#define vzone_getLastModified U_ICU_ENTRY_POINT_RENAME(vzone_getLastModified) +#define vzone_getNextTransition U_ICU_ENTRY_POINT_RENAME(vzone_getNextTransition) +#define vzone_getOffset U_ICU_ENTRY_POINT_RENAME(vzone_getOffset) +#define vzone_getOffset2 U_ICU_ENTRY_POINT_RENAME(vzone_getOffset2) +#define vzone_getOffset3 U_ICU_ENTRY_POINT_RENAME(vzone_getOffset3) +#define vzone_getPreviousTransition U_ICU_ENTRY_POINT_RENAME(vzone_getPreviousTransition) +#define vzone_getRawOffset U_ICU_ENTRY_POINT_RENAME(vzone_getRawOffset) +#define vzone_getStaticClassID U_ICU_ENTRY_POINT_RENAME(vzone_getStaticClassID) +#define vzone_getTZURL U_ICU_ENTRY_POINT_RENAME(vzone_getTZURL) +#define vzone_hasSameRules U_ICU_ENTRY_POINT_RENAME(vzone_hasSameRules) +#define vzone_inDaylightTime U_ICU_ENTRY_POINT_RENAME(vzone_inDaylightTime) +#define vzone_openData U_ICU_ENTRY_POINT_RENAME(vzone_openData) +#define vzone_openID U_ICU_ENTRY_POINT_RENAME(vzone_openID) +#define vzone_setLastModified U_ICU_ENTRY_POINT_RENAME(vzone_setLastModified) +#define vzone_setRawOffset U_ICU_ENTRY_POINT_RENAME(vzone_setRawOffset) +#define vzone_setTZURL U_ICU_ENTRY_POINT_RENAME(vzone_setTZURL) +#define vzone_useDaylightTime U_ICU_ENTRY_POINT_RENAME(vzone_useDaylightTime) +#define vzone_write U_ICU_ENTRY_POINT_RENAME(vzone_write) +#define vzone_writeFromStart U_ICU_ENTRY_POINT_RENAME(vzone_writeFromStart) +#define vzone_writeSimple U_ICU_ENTRY_POINT_RENAME(vzone_writeSimple) +#define zrule_close U_ICU_ENTRY_POINT_RENAME(zrule_close) +#define zrule_equals U_ICU_ENTRY_POINT_RENAME(zrule_equals) +#define zrule_getDSTSavings U_ICU_ENTRY_POINT_RENAME(zrule_getDSTSavings) +#define zrule_getName U_ICU_ENTRY_POINT_RENAME(zrule_getName) +#define zrule_getRawOffset U_ICU_ENTRY_POINT_RENAME(zrule_getRawOffset) +#define zrule_isEquivalentTo U_ICU_ENTRY_POINT_RENAME(zrule_isEquivalentTo) +#define ztrans_adoptFrom U_ICU_ENTRY_POINT_RENAME(ztrans_adoptFrom) +#define ztrans_adoptTo U_ICU_ENTRY_POINT_RENAME(ztrans_adoptTo) +#define ztrans_clone U_ICU_ENTRY_POINT_RENAME(ztrans_clone) +#define ztrans_close U_ICU_ENTRY_POINT_RENAME(ztrans_close) +#define ztrans_equals U_ICU_ENTRY_POINT_RENAME(ztrans_equals) +#define ztrans_getDynamicClassID U_ICU_ENTRY_POINT_RENAME(ztrans_getDynamicClassID) +#define ztrans_getFrom U_ICU_ENTRY_POINT_RENAME(ztrans_getFrom) +#define ztrans_getStaticClassID U_ICU_ENTRY_POINT_RENAME(ztrans_getStaticClassID) +#define ztrans_getTime U_ICU_ENTRY_POINT_RENAME(ztrans_getTime) +#define ztrans_getTo U_ICU_ENTRY_POINT_RENAME(ztrans_getTo) +#define ztrans_open U_ICU_ENTRY_POINT_RENAME(ztrans_open) +#define ztrans_openEmpty U_ICU_ENTRY_POINT_RENAME(ztrans_openEmpty) +#define ztrans_setFrom U_ICU_ENTRY_POINT_RENAME(ztrans_setFrom) +#define ztrans_setTime U_ICU_ENTRY_POINT_RENAME(ztrans_setTime) +#define ztrans_setTo U_ICU_ENTRY_POINT_RENAME(ztrans_setTo) + +#endif /* !(defined(_MSC_VER) && defined(__INTELLISENSE__)) */ +#endif /* U_DISABLE_RENAMING */ +#endif /* URENAME_H */ + diff --git a/thirdparty/icu4c/common/unicode/urep.h b/thirdparty/icu4c/common/unicode/urep.h new file mode 100644 index 0000000000..932202ddb0 --- /dev/null +++ b/thirdparty/icu4c/common/unicode/urep.h @@ -0,0 +1,157 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* Copyright (C) 1997-2010, International Business Machines +* Corporation and others. All Rights Reserved. +****************************************************************************** +* Date Name Description +* 06/23/00 aliu Creation. +****************************************************************************** +*/ + +#ifndef __UREP_H +#define __UREP_H + +#include "unicode/utypes.h" + +U_CDECL_BEGIN + +/******************************************************************** + * General Notes + ******************************************************************** + * TODO + * Add usage scenario + * Add test code + * Talk about pinning + * Talk about "can truncate result if out of memory" + */ + +/******************************************************************** + * Data Structures + ********************************************************************/ +/** + * \file + * \brief C API: Callbacks for UReplaceable + */ +/** + * An opaque replaceable text object. This will be manipulated only + * through the caller-supplied UReplaceableFunctor struct. Related + * to the C++ class Replaceable. + * This is currently only used in the Transliterator C API, see utrans.h . + * @stable ICU 2.0 + */ +typedef void* UReplaceable; + +/** + * A set of function pointers that transliterators use to manipulate a + * UReplaceable. The caller should supply the required functions to + * manipulate their text appropriately. Related to the C++ class + * Replaceable. + * @stable ICU 2.0 + */ +typedef struct UReplaceableCallbacks { + + /** + * Function pointer that returns the number of UChar code units in + * this text. + * + * @param rep A pointer to "this" UReplaceable object. + * @return The length of the text. + * @stable ICU 2.0 + */ + int32_t (*length)(const UReplaceable* rep); + + /** + * Function pointer that returns a UChar code units at the given + * offset into this text; 0 <= offset < n, where n is the value + * returned by (*length)(rep). See unistr.h for a description of + * charAt() vs. char32At(). + * + * @param rep A pointer to "this" UReplaceable object. + * @param offset The index at which to fetch the UChar (code unit). + * @return The UChar (code unit) at offset, or U+FFFF if the offset is out of bounds. + * @stable ICU 2.0 + */ + UChar (*charAt)(const UReplaceable* rep, + int32_t offset); + + /** + * Function pointer that returns a UChar32 code point at the given + * offset into this text. See unistr.h for a description of + * charAt() vs. char32At(). + * + * @param rep A pointer to "this" UReplaceable object. + * @param offset The index at which to fetch the UChar32 (code point). + * @return The UChar32 (code point) at offset, or U+FFFF if the offset is out of bounds. + * @stable ICU 2.0 + */ + UChar32 (*char32At)(const UReplaceable* rep, + int32_t offset); + + /** + * Function pointer that replaces text between start and limit in + * this text with the given text. Attributes (out of band info) + * should be retained. + * + * @param rep A pointer to "this" UReplaceable object. + * @param start the starting index of the text to be replaced, + * inclusive. + * @param limit the ending index of the text to be replaced, + * exclusive. + * @param text the new text to replace the UChars from + * start..limit-1. + * @param textLength the number of UChars at text, or -1 if text + * is null-terminated. + * @stable ICU 2.0 + */ + void (*replace)(UReplaceable* rep, + int32_t start, + int32_t limit, + const UChar* text, + int32_t textLength); + + /** + * Function pointer that copies the characters in the range + * [<tt>start</tt>, <tt>limit</tt>) into the array <tt>dst</tt>. + * + * @param rep A pointer to "this" UReplaceable object. + * @param start offset of first character which will be copied + * into the array + * @param limit offset immediately following the last character to + * be copied + * @param dst array in which to copy characters. The length of + * <tt>dst</tt> must be at least <tt>(limit - start)</tt>. + * @stable ICU 2.1 + */ + void (*extract)(UReplaceable* rep, + int32_t start, + int32_t limit, + UChar* dst); + + /** + * Function pointer that copies text between start and limit in + * this text to another index in the text. Attributes (out of + * band info) should be retained. After this call, there will be + * (at least) two copies of the characters originally located at + * start..limit-1. + * + * @param rep A pointer to "this" UReplaceable object. + * @param start the starting index of the text to be copied, + * inclusive. + * @param limit the ending index of the text to be copied, + * exclusive. + * @param dest the index at which the copy of the UChars should be + * inserted. + * @stable ICU 2.0 + */ + void (*copy)(UReplaceable* rep, + int32_t start, + int32_t limit, + int32_t dest); + +} UReplaceableCallbacks; + +U_CDECL_END + +#endif diff --git a/thirdparty/icu4c/common/unicode/ures.h b/thirdparty/icu4c/common/unicode/ures.h new file mode 100644 index 0000000000..fff84043e8 --- /dev/null +++ b/thirdparty/icu4c/common/unicode/ures.h @@ -0,0 +1,911 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 1997-2016, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* +* File URES.H (formerly CRESBUND.H) +* +* Modification History: +* +* Date Name Description +* 04/01/97 aliu Creation. +* 02/22/99 damiba overhaul. +* 04/04/99 helena Fixed internal header inclusion. +* 04/15/99 Madhu Updated Javadoc +* 06/14/99 stephen Removed functions taking a filename suffix. +* 07/20/99 stephen Language-independent typedef to void* +* 11/09/99 weiv Added ures_getLocale() +* 06/24/02 weiv Added support for resource sharing +****************************************************************************** +*/ + +#ifndef URES_H +#define URES_H + +#include "unicode/utypes.h" +#include "unicode/uloc.h" + +#if U_SHOW_CPLUSPLUS_API +#include "unicode/localpointer.h" +#endif // U_SHOW_CPLUSPLUS_API + +/** + * \file + * \brief C API: Resource Bundle + * + * <h2>C API: Resource Bundle</h2> + * + * C API representing a collection of resource information pertaining to a given + * locale. A resource bundle provides a way of accessing locale- specific information in + * a data file. You create a resource bundle that manages the resources for a given + * locale and then ask it for individual resources. + * <P> + * Resource bundles in ICU4C are currently defined using text files which conform to the following + * <a href="http://source.icu-project.org/repos/icu/icuhtml/trunk/design/bnf_rb.txt">BNF definition</a>. + * More on resource bundle concepts and syntax can be found in the + * <a href="http://icu-project.org/userguide/ResourceManagement.html">Users Guide</a>. + * <P> + */ + +/** + * UResourceBundle is an opaque type for handles for resource bundles in C APIs. + * @stable ICU 2.0 + */ +struct UResourceBundle; + +/** + * @stable ICU 2.0 + */ +typedef struct UResourceBundle UResourceBundle; + +/** + * Numeric constants for types of resource items. + * @see ures_getType + * @stable ICU 2.0 + */ +typedef enum { + /** Resource type constant for "no resource". @stable ICU 2.6 */ + URES_NONE=-1, + + /** Resource type constant for 16-bit Unicode strings. @stable ICU 2.6 */ + URES_STRING=0, + + /** Resource type constant for binary data. @stable ICU 2.6 */ + URES_BINARY=1, + + /** Resource type constant for tables of key-value pairs. @stable ICU 2.6 */ + URES_TABLE=2, + + /** + * Resource type constant for aliases; + * internally stores a string which identifies the actual resource + * storing the data (can be in a different resource bundle). + * Resolved internally before delivering the actual resource through the API. + * @stable ICU 2.6 + */ + URES_ALIAS=3, + + /** + * Resource type constant for a single 28-bit integer, interpreted as + * signed or unsigned by the ures_getInt() or ures_getUInt() function. + * @see ures_getInt + * @see ures_getUInt + * @stable ICU 2.6 + */ + URES_INT=7, + + /** Resource type constant for arrays of resources. @stable ICU 2.6 */ + URES_ARRAY=8, + + /** + * Resource type constant for vectors of 32-bit integers. + * @see ures_getIntVector + * @stable ICU 2.6 + */ + URES_INT_VECTOR = 14, +#ifndef U_HIDE_DEPRECATED_API + /** @deprecated ICU 2.6 Use the URES_ constant instead. */ + RES_NONE=URES_NONE, + /** @deprecated ICU 2.6 Use the URES_ constant instead. */ + RES_STRING=URES_STRING, + /** @deprecated ICU 2.6 Use the URES_ constant instead. */ + RES_BINARY=URES_BINARY, + /** @deprecated ICU 2.6 Use the URES_ constant instead. */ + RES_TABLE=URES_TABLE, + /** @deprecated ICU 2.6 Use the URES_ constant instead. */ + RES_ALIAS=URES_ALIAS, + /** @deprecated ICU 2.6 Use the URES_ constant instead. */ + RES_INT=URES_INT, + /** @deprecated ICU 2.6 Use the URES_ constant instead. */ + RES_ARRAY=URES_ARRAY, + /** @deprecated ICU 2.6 Use the URES_ constant instead. */ + RES_INT_VECTOR=URES_INT_VECTOR, + /** @deprecated ICU 2.6 Not used. */ + RES_RESERVED=15, + + /** + * One more than the highest normal UResType value. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + URES_LIMIT = 16 +#endif // U_HIDE_DEPRECATED_API +} UResType; + +/* + * Functions to create and destroy resource bundles. + */ + +/** + * Opens a UResourceBundle, from which users can extract strings by using + * their corresponding keys. + * Note that the caller is responsible of calling <TT>ures_close</TT> on each successfully + * opened resource bundle. + * @param packageName The packageName and locale together point to an ICU udata object, + * as defined by <code> udata_open( packageName, "res", locale, err) </code> + * or equivalent. Typically, packageName will refer to a (.dat) file, or to + * a package registered with udata_setAppData(). Using a full file or directory + * pathname for packageName is deprecated. If NULL, ICU data will be used. + * @param locale specifies the locale for which we want to open the resource + * if NULL, the default locale will be used. If strlen(locale) == 0 + * root locale will be used. + * + * @param status fills in the outgoing error code. + * The UErrorCode err parameter is used to return status information to the user. To + * check whether the construction succeeded or not, you should check the value of + * U_SUCCESS(err). If you wish more detailed information, you can check for + * informational status results which still indicate success. U_USING_FALLBACK_WARNING + * indicates that a fall back locale was used. For example, 'de_CH' was requested, + * but nothing was found there, so 'de' was used. U_USING_DEFAULT_WARNING indicates that + * the default locale data or root locale data was used; neither the requested locale + * nor any of its fall back locales could be found. Please see the users guide for more + * information on this topic. + * @return a newly allocated resource bundle. + * @see ures_close + * @stable ICU 2.0 + */ +U_CAPI UResourceBundle* U_EXPORT2 +ures_open(const char* packageName, + const char* locale, + UErrorCode* status); + + +/** This function does not care what kind of localeID is passed in. It simply opens a bundle with + * that name. Fallback mechanism is disabled for the new bundle. If the requested bundle contains + * an %%ALIAS directive, the results are undefined. + * @param packageName The packageName and locale together point to an ICU udata object, + * as defined by <code> udata_open( packageName, "res", locale, err) </code> + * or equivalent. Typically, packageName will refer to a (.dat) file, or to + * a package registered with udata_setAppData(). Using a full file or directory + * pathname for packageName is deprecated. If NULL, ICU data will be used. + * @param locale specifies the locale for which we want to open the resource + * if NULL, the default locale will be used. If strlen(locale) == 0 + * root locale will be used. + * + * @param status fills in the outgoing error code. Either U_ZERO_ERROR or U_MISSING_RESOURCE_ERROR + * @return a newly allocated resource bundle or NULL if it doesn't exist. + * @see ures_close + * @stable ICU 2.0 + */ +U_CAPI UResourceBundle* U_EXPORT2 +ures_openDirect(const char* packageName, + const char* locale, + UErrorCode* status); + +/** + * Same as ures_open() but takes a const UChar *path. + * This path will be converted to char * using the default converter, + * then ures_open() is called. + * + * @param packageName The packageName and locale together point to an ICU udata object, + * as defined by <code> udata_open( packageName, "res", locale, err) </code> + * or equivalent. Typically, packageName will refer to a (.dat) file, or to + * a package registered with udata_setAppData(). Using a full file or directory + * pathname for packageName is deprecated. If NULL, ICU data will be used. + * @param locale specifies the locale for which we want to open the resource + * if NULL, the default locale will be used. If strlen(locale) == 0 + * root locale will be used. + * @param status fills in the outgoing error code. + * @return a newly allocated resource bundle. + * @see ures_open + * @stable ICU 2.0 + */ +U_CAPI UResourceBundle* U_EXPORT2 +ures_openU(const UChar* packageName, + const char* locale, + UErrorCode* status); + +#ifndef U_HIDE_DEPRECATED_API +/** + * Returns the number of strings/arrays in resource bundles. + * Better to use ures_getSize, as this function will be deprecated. + * + *@param resourceBundle resource bundle containing the desired strings + *@param resourceKey key tagging the resource + *@param err fills in the outgoing error code + * could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found + * could be a non-failing error + * e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_FALLBACK_WARNING </TT> + *@return: for <STRONG>Arrays</STRONG>: returns the number of resources in the array + * <STRONG>Tables</STRONG>: returns the number of resources in the table + * <STRONG>single string</STRONG>: returns 1 + *@see ures_getSize + * @deprecated ICU 2.8 User ures_getSize instead + */ +U_DEPRECATED int32_t U_EXPORT2 +ures_countArrayItems(const UResourceBundle* resourceBundle, + const char* resourceKey, + UErrorCode* err); +#endif /* U_HIDE_DEPRECATED_API */ + +/** + * Close a resource bundle, all pointers returned from the various ures_getXXX calls + * on this particular bundle should be considered invalid henceforth. + * + * @param resourceBundle a pointer to a resourceBundle struct. Can be NULL. + * @see ures_open + * @stable ICU 2.0 + */ +U_CAPI void U_EXPORT2 +ures_close(UResourceBundle* resourceBundle); + +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN + +/** + * \class LocalUResourceBundlePointer + * "Smart pointer" class, closes a UResourceBundle via ures_close(). + * For most methods see the LocalPointerBase base class. + * + * @see LocalPointerBase + * @see LocalPointer + * @stable ICU 4.4 + */ +U_DEFINE_LOCAL_OPEN_POINTER(LocalUResourceBundlePointer, UResourceBundle, ures_close); + +U_NAMESPACE_END + +#endif + +#ifndef U_HIDE_DEPRECATED_API +/** + * Return the version number associated with this ResourceBundle as a string. Please + * use ures_getVersion as this function is going to be deprecated. + * + * @param resourceBundle The resource bundle for which the version is checked. + * @return A version number string as specified in the resource bundle or its parent. + * The caller does not own this string. + * @see ures_getVersion + * @deprecated ICU 2.8 Use ures_getVersion instead. + */ +U_DEPRECATED const char* U_EXPORT2 +ures_getVersionNumber(const UResourceBundle* resourceBundle); +#endif /* U_HIDE_DEPRECATED_API */ + +/** + * Return the version number associated with this ResourceBundle as an + * UVersionInfo array. + * + * @param resB The resource bundle for which the version is checked. + * @param versionInfo A UVersionInfo array that is filled with the version number + * as specified in the resource bundle or its parent. + * @stable ICU 2.0 + */ +U_CAPI void U_EXPORT2 +ures_getVersion(const UResourceBundle* resB, + UVersionInfo versionInfo); + +#ifndef U_HIDE_DEPRECATED_API +/** + * Return the name of the Locale associated with this ResourceBundle. This API allows + * you to query for the real locale of the resource. For example, if you requested + * "en_US_CALIFORNIA" and only "en_US" bundle exists, "en_US" will be returned. + * For subresources, the locale where this resource comes from will be returned. + * If fallback has occurred, getLocale will reflect this. + * + * @param resourceBundle resource bundle in question + * @param status just for catching illegal arguments + * @return A Locale name + * @deprecated ICU 2.8 Use ures_getLocaleByType instead. + */ +U_DEPRECATED const char* U_EXPORT2 +ures_getLocale(const UResourceBundle* resourceBundle, + UErrorCode* status); +#endif /* U_HIDE_DEPRECATED_API */ + +/** + * Return the name of the Locale associated with this ResourceBundle. + * You can choose between requested, valid and real locale. + * + * @param resourceBundle resource bundle in question + * @param type You can choose between requested, valid and actual + * locale. For description see the definition of + * ULocDataLocaleType in uloc.h + * @param status just for catching illegal arguments + * @return A Locale name + * @stable ICU 2.8 + */ +U_CAPI const char* U_EXPORT2 +ures_getLocaleByType(const UResourceBundle* resourceBundle, + ULocDataLocaleType type, + UErrorCode* status); + + +#ifndef U_HIDE_INTERNAL_API +/** + * Same as ures_open() but uses the fill-in parameter instead of allocating a new bundle. + * + * TODO need to revisit usefulness of this function + * and usage model for fillIn parameters without knowing sizeof(UResourceBundle) + * @param r The existing UResourceBundle to fill in. If NULL then status will be + * set to U_ILLEGAL_ARGUMENT_ERROR. + * @param packageName The packageName and locale together point to an ICU udata object, + * as defined by <code> udata_open( packageName, "res", locale, err) </code> + * or equivalent. Typically, packageName will refer to a (.dat) file, or to + * a package registered with udata_setAppData(). Using a full file or directory + * pathname for packageName is deprecated. If NULL, ICU data will be used. + * @param localeID specifies the locale for which we want to open the resource + * @param status The error code. + * @internal + */ +U_CAPI void U_EXPORT2 +ures_openFillIn(UResourceBundle *r, + const char* packageName, + const char* localeID, + UErrorCode* status); +#endif /* U_HIDE_INTERNAL_API */ + +/** + * Returns a string from a string resource type + * + * @param resourceBundle a string resource + * @param len fills in the length of resulting string + * @param status fills in the outgoing error code + * could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found + * Always check the value of status. Don't count on returning NULL. + * could be a non-failing error + * e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT> + * @return a pointer to a zero-terminated UChar array which lives in a memory mapped/DLL file. + * @see ures_getBinary + * @see ures_getIntVector + * @see ures_getInt + * @see ures_getUInt + * @stable ICU 2.0 + */ +U_CAPI const UChar* U_EXPORT2 +ures_getString(const UResourceBundle* resourceBundle, + int32_t* len, + UErrorCode* status); + +/** + * Returns a UTF-8 string from a string resource. + * The UTF-8 string may be returnable directly as a pointer, or + * it may need to be copied, or transformed from UTF-16 using u_strToUTF8() + * or equivalent. + * + * If forceCopy==true, then the string is always written to the dest buffer + * and dest is returned. + * + * If forceCopy==false, then the string is returned as a pointer if possible, + * without needing a dest buffer (it can be NULL). If the string needs to be + * copied or transformed, then it may be placed into dest at an arbitrary offset. + * + * If the string is to be written to dest, then U_BUFFER_OVERFLOW_ERROR and + * U_STRING_NOT_TERMINATED_WARNING are set if appropriate, as usual. + * + * If the string is transformed from UTF-16, then a conversion error may occur + * if an unpaired surrogate is encountered. If the function is successful, then + * the output UTF-8 string is always well-formed. + * + * @param resB Resource bundle. + * @param dest Destination buffer. Can be NULL only if capacity=*length==0. + * @param length Input: Capacity of destination buffer. + * Output: Actual length of the UTF-8 string, not counting the + * terminating NUL, even in case of U_BUFFER_OVERFLOW_ERROR. + * Can be NULL, meaning capacity=0 and the string length is not + * returned to the caller. + * @param forceCopy If true, then the output string will always be written to + * dest, with U_BUFFER_OVERFLOW_ERROR and + * U_STRING_NOT_TERMINATED_WARNING set if appropriate. + * If false, then the dest buffer may or may not contain a + * copy of the string. dest may or may not be modified. + * If a copy needs to be written, then the UErrorCode parameter + * indicates overflow etc. as usual. + * @param status Pointer to a standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return The pointer to the UTF-8 string. It may be dest, or at some offset + * from dest (only if !forceCopy), or in unrelated memory. + * Always NUL-terminated unless the string was written to dest and + * length==capacity (in which case U_STRING_NOT_TERMINATED_WARNING is set). + * + * @see ures_getString + * @see u_strToUTF8 + * @stable ICU 3.6 + */ +U_CAPI const char * U_EXPORT2 +ures_getUTF8String(const UResourceBundle *resB, + char *dest, int32_t *length, + UBool forceCopy, + UErrorCode *status); + +/** + * Returns a binary data from a binary resource. + * + * @param resourceBundle a string resource + * @param len fills in the length of resulting byte chunk + * @param status fills in the outgoing error code + * could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found + * Always check the value of status. Don't count on returning NULL. + * could be a non-failing error + * e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT> + * @return a pointer to a chunk of unsigned bytes which live in a memory mapped/DLL file. + * @see ures_getString + * @see ures_getIntVector + * @see ures_getInt + * @see ures_getUInt + * @stable ICU 2.0 + */ +U_CAPI const uint8_t* U_EXPORT2 +ures_getBinary(const UResourceBundle* resourceBundle, + int32_t* len, + UErrorCode* status); + +/** + * Returns a 32 bit integer array from a resource. + * + * @param resourceBundle an int vector resource + * @param len fills in the length of resulting byte chunk + * @param status fills in the outgoing error code + * could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found + * Always check the value of status. Don't count on returning NULL. + * could be a non-failing error + * e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT> + * @return a pointer to a chunk of integers which live in a memory mapped/DLL file. + * @see ures_getBinary + * @see ures_getString + * @see ures_getInt + * @see ures_getUInt + * @stable ICU 2.0 + */ +U_CAPI const int32_t* U_EXPORT2 +ures_getIntVector(const UResourceBundle* resourceBundle, + int32_t* len, + UErrorCode* status); + +/** + * Returns an unsigned integer from a resource. + * This integer is originally 28 bits. + * + * @param resourceBundle a string resource + * @param status fills in the outgoing error code + * could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found + * could be a non-failing error + * e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT> + * @return an integer value + * @see ures_getInt + * @see ures_getIntVector + * @see ures_getBinary + * @see ures_getString + * @stable ICU 2.0 + */ +U_CAPI uint32_t U_EXPORT2 +ures_getUInt(const UResourceBundle* resourceBundle, + UErrorCode *status); + +/** + * Returns a signed integer from a resource. + * This integer is originally 28 bit and the sign gets propagated. + * + * @param resourceBundle a string resource + * @param status fills in the outgoing error code + * could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found + * could be a non-failing error + * e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT> + * @return an integer value + * @see ures_getUInt + * @see ures_getIntVector + * @see ures_getBinary + * @see ures_getString + * @stable ICU 2.0 + */ +U_CAPI int32_t U_EXPORT2 +ures_getInt(const UResourceBundle* resourceBundle, + UErrorCode *status); + +/** + * Returns the size of a resource. Size for scalar types is always 1, + * and for vector/table types is the number of child resources. + * @warning Integer array is treated as a scalar type. There are no + * APIs to access individual members of an integer array. It + * is always returned as a whole. + * @param resourceBundle a resource + * @return number of resources in a given resource. + * @stable ICU 2.0 + */ +U_CAPI int32_t U_EXPORT2 +ures_getSize(const UResourceBundle *resourceBundle); + +/** + * Returns the type of a resource. Available types are defined in enum UResType + * + * @param resourceBundle a resource + * @return type of the given resource. + * @see UResType + * @stable ICU 2.0 + */ +U_CAPI UResType U_EXPORT2 +ures_getType(const UResourceBundle *resourceBundle); + +/** + * Returns the key associated with a given resource. Not all the resources have a key - only + * those that are members of a table. + * + * @param resourceBundle a resource + * @return a key associated to this resource, or NULL if it doesn't have a key + * @stable ICU 2.0 + */ +U_CAPI const char * U_EXPORT2 +ures_getKey(const UResourceBundle *resourceBundle); + +/* ITERATION API + This API provides means for iterating through a resource +*/ + +/** + * Resets the internal context of a resource so that iteration starts from the first element. + * + * @param resourceBundle a resource + * @stable ICU 2.0 + */ +U_CAPI void U_EXPORT2 +ures_resetIterator(UResourceBundle *resourceBundle); + +/** + * Checks whether the given resource has another element to iterate over. + * + * @param resourceBundle a resource + * @return true if there are more elements, false if there is no more elements + * @stable ICU 2.0 + */ +U_CAPI UBool U_EXPORT2 +ures_hasNext(const UResourceBundle *resourceBundle); + +/** + * Returns the next resource in a given resource or NULL if there are no more resources + * to iterate over. Features a fill-in parameter. + * + * @param resourceBundle a resource + * @param fillIn if NULL a new UResourceBundle struct is allocated and must be closed by the caller. + * Alternatively, you can supply a struct to be filled by this function. + * @param status fills in the outgoing error code. You may still get a non NULL result even if an + * error occurred. Check status instead. + * @return a pointer to a UResourceBundle struct. If fill in param was NULL, caller must close it + * @stable ICU 2.0 + */ +U_CAPI UResourceBundle* U_EXPORT2 +ures_getNextResource(UResourceBundle *resourceBundle, + UResourceBundle *fillIn, + UErrorCode *status); + +/** + * Returns the next string in a given resource or NULL if there are no more resources + * to iterate over. + * + * @param resourceBundle a resource + * @param len fill in length of the string + * @param key fill in for key associated with this string. NULL if no key + * @param status fills in the outgoing error code. If an error occurred, we may return NULL, but don't + * count on it. Check status instead! + * @return a pointer to a zero-terminated UChar array which lives in a memory mapped/DLL file. + * @stable ICU 2.0 + */ +U_CAPI const UChar* U_EXPORT2 +ures_getNextString(UResourceBundle *resourceBundle, + int32_t* len, + const char ** key, + UErrorCode *status); + +/** + * Returns the resource in a given resource at the specified index. Features a fill-in parameter. + * + * @param resourceBundle the resource bundle from which to get a sub-resource + * @param indexR an index to the wanted resource. + * @param fillIn if NULL a new UResourceBundle struct is allocated and must be closed by the caller. + * Alternatively, you can supply a struct to be filled by this function. + * @param status fills in the outgoing error code. Don't count on NULL being returned if an error has + * occurred. Check status instead. + * @return a pointer to a UResourceBundle struct. If fill in param was NULL, caller must close it + * @stable ICU 2.0 + */ +U_CAPI UResourceBundle* U_EXPORT2 +ures_getByIndex(const UResourceBundle *resourceBundle, + int32_t indexR, + UResourceBundle *fillIn, + UErrorCode *status); + +/** + * Returns the string in a given resource at the specified index. + * + * @param resourceBundle a resource + * @param indexS an index to the wanted string. + * @param len fill in length of the string + * @param status fills in the outgoing error code. If an error occurred, we may return NULL, but don't + * count on it. Check status instead! + * @return a pointer to a zero-terminated UChar array which lives in a memory mapped/DLL file. + * @stable ICU 2.0 + */ +U_CAPI const UChar* U_EXPORT2 +ures_getStringByIndex(const UResourceBundle *resourceBundle, + int32_t indexS, + int32_t* len, + UErrorCode *status); + +/** + * Returns a UTF-8 string from a resource at the specified index. + * The UTF-8 string may be returnable directly as a pointer, or + * it may need to be copied, or transformed from UTF-16 using u_strToUTF8() + * or equivalent. + * + * If forceCopy==true, then the string is always written to the dest buffer + * and dest is returned. + * + * If forceCopy==false, then the string is returned as a pointer if possible, + * without needing a dest buffer (it can be NULL). If the string needs to be + * copied or transformed, then it may be placed into dest at an arbitrary offset. + * + * If the string is to be written to dest, then U_BUFFER_OVERFLOW_ERROR and + * U_STRING_NOT_TERMINATED_WARNING are set if appropriate, as usual. + * + * If the string is transformed from UTF-16, then a conversion error may occur + * if an unpaired surrogate is encountered. If the function is successful, then + * the output UTF-8 string is always well-formed. + * + * @param resB Resource bundle. + * @param stringIndex An index to the wanted string. + * @param dest Destination buffer. Can be NULL only if capacity=*length==0. + * @param pLength Input: Capacity of destination buffer. + * Output: Actual length of the UTF-8 string, not counting the + * terminating NUL, even in case of U_BUFFER_OVERFLOW_ERROR. + * Can be NULL, meaning capacity=0 and the string length is not + * returned to the caller. + * @param forceCopy If true, then the output string will always be written to + * dest, with U_BUFFER_OVERFLOW_ERROR and + * U_STRING_NOT_TERMINATED_WARNING set if appropriate. + * If false, then the dest buffer may or may not contain a + * copy of the string. dest may or may not be modified. + * If a copy needs to be written, then the UErrorCode parameter + * indicates overflow etc. as usual. + * @param status Pointer to a standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return The pointer to the UTF-8 string. It may be dest, or at some offset + * from dest (only if !forceCopy), or in unrelated memory. + * Always NUL-terminated unless the string was written to dest and + * length==capacity (in which case U_STRING_NOT_TERMINATED_WARNING is set). + * + * @see ures_getStringByIndex + * @see u_strToUTF8 + * @stable ICU 3.6 + */ +U_CAPI const char * U_EXPORT2 +ures_getUTF8StringByIndex(const UResourceBundle *resB, + int32_t stringIndex, + char *dest, int32_t *pLength, + UBool forceCopy, + UErrorCode *status); + +/** + * Returns a resource in a given resource that has a given key. This procedure works only with table + * resources. Features a fill-in parameter. + * + * @param resourceBundle a resource + * @param key a key associated with the wanted resource + * @param fillIn if NULL a new UResourceBundle struct is allocated and must be closed by the caller. + * Alternatively, you can supply a struct to be filled by this function. + * @param status fills in the outgoing error code. + * @return a pointer to a UResourceBundle struct. If fill in param was NULL, caller must close it + * @stable ICU 2.0 + */ +U_CAPI UResourceBundle* U_EXPORT2 +ures_getByKey(const UResourceBundle *resourceBundle, + const char* key, + UResourceBundle *fillIn, + UErrorCode *status); + +/** + * Returns a string in a given resource that has a given key. This procedure works only with table + * resources. + * + * @param resB a resource + * @param key a key associated with the wanted string + * @param len fill in length of the string + * @param status fills in the outgoing error code. If an error occurred, we may return NULL, but don't + * count on it. Check status instead! + * @return a pointer to a zero-terminated UChar array which lives in a memory mapped/DLL file. + * @stable ICU 2.0 + */ +U_CAPI const UChar* U_EXPORT2 +ures_getStringByKey(const UResourceBundle *resB, + const char* key, + int32_t* len, + UErrorCode *status); + +/** + * Returns a UTF-8 string from a resource and a key. + * This function works only with table resources. + * + * The UTF-8 string may be returnable directly as a pointer, or + * it may need to be copied, or transformed from UTF-16 using u_strToUTF8() + * or equivalent. + * + * If forceCopy==true, then the string is always written to the dest buffer + * and dest is returned. + * + * If forceCopy==false, then the string is returned as a pointer if possible, + * without needing a dest buffer (it can be NULL). If the string needs to be + * copied or transformed, then it may be placed into dest at an arbitrary offset. + * + * If the string is to be written to dest, then U_BUFFER_OVERFLOW_ERROR and + * U_STRING_NOT_TERMINATED_WARNING are set if appropriate, as usual. + * + * If the string is transformed from UTF-16, then a conversion error may occur + * if an unpaired surrogate is encountered. If the function is successful, then + * the output UTF-8 string is always well-formed. + * + * @param resB Resource bundle. + * @param key A key associated with the wanted resource + * @param dest Destination buffer. Can be NULL only if capacity=*length==0. + * @param pLength Input: Capacity of destination buffer. + * Output: Actual length of the UTF-8 string, not counting the + * terminating NUL, even in case of U_BUFFER_OVERFLOW_ERROR. + * Can be NULL, meaning capacity=0 and the string length is not + * returned to the caller. + * @param forceCopy If true, then the output string will always be written to + * dest, with U_BUFFER_OVERFLOW_ERROR and + * U_STRING_NOT_TERMINATED_WARNING set if appropriate. + * If false, then the dest buffer may or may not contain a + * copy of the string. dest may or may not be modified. + * If a copy needs to be written, then the UErrorCode parameter + * indicates overflow etc. as usual. + * @param status Pointer to a standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return The pointer to the UTF-8 string. It may be dest, or at some offset + * from dest (only if !forceCopy), or in unrelated memory. + * Always NUL-terminated unless the string was written to dest and + * length==capacity (in which case U_STRING_NOT_TERMINATED_WARNING is set). + * + * @see ures_getStringByKey + * @see u_strToUTF8 + * @stable ICU 3.6 + */ +U_CAPI const char * U_EXPORT2 +ures_getUTF8StringByKey(const UResourceBundle *resB, + const char *key, + char *dest, int32_t *pLength, + UBool forceCopy, + UErrorCode *status); + +#if U_SHOW_CPLUSPLUS_API +#include "unicode/unistr.h" + +U_NAMESPACE_BEGIN +/** + * Returns the string value from a string resource bundle. + * + * @param resB a resource, should have type URES_STRING + * @param status: fills in the outgoing error code + * could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found + * could be a non-failing error + * e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT> + * @return The string value, or a bogus string if there is a failure UErrorCode. + * @stable ICU 2.0 + */ +inline UnicodeString +ures_getUnicodeString(const UResourceBundle *resB, UErrorCode* status) { + UnicodeString result; + int32_t len = 0; + const UChar *r = ures_getString(resB, &len, status); + if(U_SUCCESS(*status)) { + result.setTo(true, r, len); + } else { + result.setToBogus(); + } + return result; +} + +/** + * Returns the next string in a resource, or an empty string if there are no more resources + * to iterate over. + * Use ures_getNextString() instead to distinguish between + * the end of the iteration and a real empty string value. + * + * @param resB a resource + * @param key fill in for key associated with this string + * @param status fills in the outgoing error code + * @return The string value, or a bogus string if there is a failure UErrorCode. + * @stable ICU 2.0 + */ +inline UnicodeString +ures_getNextUnicodeString(UResourceBundle *resB, const char ** key, UErrorCode* status) { + UnicodeString result; + int32_t len = 0; + const UChar* r = ures_getNextString(resB, &len, key, status); + if(U_SUCCESS(*status)) { + result.setTo(true, r, len); + } else { + result.setToBogus(); + } + return result; +} + +/** + * Returns the string in a given resource array or table at the specified index. + * + * @param resB a resource + * @param indexS an index to the wanted string. + * @param status fills in the outgoing error code + * @return The string value, or a bogus string if there is a failure UErrorCode. + * @stable ICU 2.0 + */ +inline UnicodeString +ures_getUnicodeStringByIndex(const UResourceBundle *resB, int32_t indexS, UErrorCode* status) { + UnicodeString result; + int32_t len = 0; + const UChar* r = ures_getStringByIndex(resB, indexS, &len, status); + if(U_SUCCESS(*status)) { + result.setTo(true, r, len); + } else { + result.setToBogus(); + } + return result; +} + +/** + * Returns a string in a resource that has a given key. + * This procedure works only with table resources. + * + * @param resB a resource + * @param key a key associated with the wanted string + * @param status fills in the outgoing error code + * @return The string value, or a bogus string if there is a failure UErrorCode. + * @stable ICU 2.0 + */ +inline UnicodeString +ures_getUnicodeStringByKey(const UResourceBundle *resB, const char* key, UErrorCode* status) { + UnicodeString result; + int32_t len = 0; + const UChar* r = ures_getStringByKey(resB, key, &len, status); + if(U_SUCCESS(*status)) { + result.setTo(true, r, len); + } else { + result.setToBogus(); + } + return result; +} + +U_NAMESPACE_END + +#endif + +/** + * Create a string enumerator, owned by the caller, of all locales located within + * the specified resource tree. + * @param packageName name of the tree, such as (NULL) or U_ICUDATA_ALIAS or or "ICUDATA-coll" + * This call is similar to uloc_getAvailable(). + * @param status error code + * @stable ICU 3.2 + */ +U_CAPI UEnumeration* U_EXPORT2 +ures_openAvailableLocales(const char *packageName, UErrorCode *status); + + +#endif /*_URES*/ +/*eof*/ diff --git a/thirdparty/icu4c/common/unicode/uscript.h b/thirdparty/icu4c/common/unicode/uscript.h new file mode 100644 index 0000000000..8448afda76 --- /dev/null +++ b/thirdparty/icu4c/common/unicode/uscript.h @@ -0,0 +1,708 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* + ********************************************************************** + * Copyright (C) 1997-2016, International Business Machines + * Corporation and others. All Rights Reserved. + ********************************************************************** + * + * File USCRIPT.H + * + * Modification History: + * + * Date Name Description + * 07/06/2001 Ram Creation. + ****************************************************************************** + */ + +#ifndef USCRIPT_H +#define USCRIPT_H +#include "unicode/utypes.h" + +/** + * \file + * \brief C API: Unicode Script Information + */ + +/** + * Constants for ISO 15924 script codes. + * + * The current set of script code constants supports at least all scripts + * that are encoded in the version of Unicode which ICU currently supports. + * The names of the constants are usually derived from the + * Unicode script property value aliases. + * See UAX #24 Unicode Script Property (http://www.unicode.org/reports/tr24/) + * and http://www.unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt . + * + * In addition, constants for many ISO 15924 script codes + * are included, for use with language tags, CLDR data, and similar. + * Some of those codes are not used in the Unicode Character Database (UCD). + * For example, there are no characters that have a UCD script property value of + * Hans or Hant. All Han ideographs have the Hani script property value in Unicode. + * + * Private-use codes Qaaa..Qabx are not included, except as used in the UCD or in CLDR. + * + * Starting with ICU 55, script codes are only added when their scripts + * have been or will certainly be encoded in Unicode, + * and have been assigned Unicode script property value aliases, + * to ensure that their script names are stable and match the names of the constants. + * Script codes like Latf and Aran that are not subject to separate encoding + * may be added at any time. + * + * @stable ICU 2.2 + */ +typedef enum UScriptCode { + /* + * Note: UScriptCode constants and their ISO script code comments + * are parsed by preparseucd.py. + * It matches lines like + * USCRIPT_<Unicode Script value name> = <integer>, / * <ISO script code> * / + */ + + /** @stable ICU 2.2 */ + USCRIPT_INVALID_CODE = -1, + /** @stable ICU 2.2 */ + USCRIPT_COMMON = 0, /* Zyyy */ + /** @stable ICU 2.2 */ + USCRIPT_INHERITED = 1, /* Zinh */ /* "Code for inherited script", for non-spacing combining marks; also Qaai */ + /** @stable ICU 2.2 */ + USCRIPT_ARABIC = 2, /* Arab */ + /** @stable ICU 2.2 */ + USCRIPT_ARMENIAN = 3, /* Armn */ + /** @stable ICU 2.2 */ + USCRIPT_BENGALI = 4, /* Beng */ + /** @stable ICU 2.2 */ + USCRIPT_BOPOMOFO = 5, /* Bopo */ + /** @stable ICU 2.2 */ + USCRIPT_CHEROKEE = 6, /* Cher */ + /** @stable ICU 2.2 */ + USCRIPT_COPTIC = 7, /* Copt */ + /** @stable ICU 2.2 */ + USCRIPT_CYRILLIC = 8, /* Cyrl */ + /** @stable ICU 2.2 */ + USCRIPT_DESERET = 9, /* Dsrt */ + /** @stable ICU 2.2 */ + USCRIPT_DEVANAGARI = 10, /* Deva */ + /** @stable ICU 2.2 */ + USCRIPT_ETHIOPIC = 11, /* Ethi */ + /** @stable ICU 2.2 */ + USCRIPT_GEORGIAN = 12, /* Geor */ + /** @stable ICU 2.2 */ + USCRIPT_GOTHIC = 13, /* Goth */ + /** @stable ICU 2.2 */ + USCRIPT_GREEK = 14, /* Grek */ + /** @stable ICU 2.2 */ + USCRIPT_GUJARATI = 15, /* Gujr */ + /** @stable ICU 2.2 */ + USCRIPT_GURMUKHI = 16, /* Guru */ + /** @stable ICU 2.2 */ + USCRIPT_HAN = 17, /* Hani */ + /** @stable ICU 2.2 */ + USCRIPT_HANGUL = 18, /* Hang */ + /** @stable ICU 2.2 */ + USCRIPT_HEBREW = 19, /* Hebr */ + /** @stable ICU 2.2 */ + USCRIPT_HIRAGANA = 20, /* Hira */ + /** @stable ICU 2.2 */ + USCRIPT_KANNADA = 21, /* Knda */ + /** @stable ICU 2.2 */ + USCRIPT_KATAKANA = 22, /* Kana */ + /** @stable ICU 2.2 */ + USCRIPT_KHMER = 23, /* Khmr */ + /** @stable ICU 2.2 */ + USCRIPT_LAO = 24, /* Laoo */ + /** @stable ICU 2.2 */ + USCRIPT_LATIN = 25, /* Latn */ + /** @stable ICU 2.2 */ + USCRIPT_MALAYALAM = 26, /* Mlym */ + /** @stable ICU 2.2 */ + USCRIPT_MONGOLIAN = 27, /* Mong */ + /** @stable ICU 2.2 */ + USCRIPT_MYANMAR = 28, /* Mymr */ + /** @stable ICU 2.2 */ + USCRIPT_OGHAM = 29, /* Ogam */ + /** @stable ICU 2.2 */ + USCRIPT_OLD_ITALIC = 30, /* Ital */ + /** @stable ICU 2.2 */ + USCRIPT_ORIYA = 31, /* Orya */ + /** @stable ICU 2.2 */ + USCRIPT_RUNIC = 32, /* Runr */ + /** @stable ICU 2.2 */ + USCRIPT_SINHALA = 33, /* Sinh */ + /** @stable ICU 2.2 */ + USCRIPT_SYRIAC = 34, /* Syrc */ + /** @stable ICU 2.2 */ + USCRIPT_TAMIL = 35, /* Taml */ + /** @stable ICU 2.2 */ + USCRIPT_TELUGU = 36, /* Telu */ + /** @stable ICU 2.2 */ + USCRIPT_THAANA = 37, /* Thaa */ + /** @stable ICU 2.2 */ + USCRIPT_THAI = 38, /* Thai */ + /** @stable ICU 2.2 */ + USCRIPT_TIBETAN = 39, /* Tibt */ + /** Canadian_Aboriginal script. @stable ICU 2.6 */ + USCRIPT_CANADIAN_ABORIGINAL = 40, /* Cans */ + /** Canadian_Aboriginal script (alias). @stable ICU 2.2 */ + USCRIPT_UCAS = USCRIPT_CANADIAN_ABORIGINAL, + /** @stable ICU 2.2 */ + USCRIPT_YI = 41, /* Yiii */ + /* New scripts in Unicode 3.2 */ + /** @stable ICU 2.2 */ + USCRIPT_TAGALOG = 42, /* Tglg */ + /** @stable ICU 2.2 */ + USCRIPT_HANUNOO = 43, /* Hano */ + /** @stable ICU 2.2 */ + USCRIPT_BUHID = 44, /* Buhd */ + /** @stable ICU 2.2 */ + USCRIPT_TAGBANWA = 45, /* Tagb */ + + /* New scripts in Unicode 4 */ + /** @stable ICU 2.6 */ + USCRIPT_BRAILLE = 46, /* Brai */ + /** @stable ICU 2.6 */ + USCRIPT_CYPRIOT = 47, /* Cprt */ + /** @stable ICU 2.6 */ + USCRIPT_LIMBU = 48, /* Limb */ + /** @stable ICU 2.6 */ + USCRIPT_LINEAR_B = 49, /* Linb */ + /** @stable ICU 2.6 */ + USCRIPT_OSMANYA = 50, /* Osma */ + /** @stable ICU 2.6 */ + USCRIPT_SHAVIAN = 51, /* Shaw */ + /** @stable ICU 2.6 */ + USCRIPT_TAI_LE = 52, /* Tale */ + /** @stable ICU 2.6 */ + USCRIPT_UGARITIC = 53, /* Ugar */ + + /** New script code in Unicode 4.0.1 @stable ICU 3.0 */ + USCRIPT_KATAKANA_OR_HIRAGANA = 54,/*Hrkt */ + + /* New scripts in Unicode 4.1 */ + /** @stable ICU 3.4 */ + USCRIPT_BUGINESE = 55, /* Bugi */ + /** @stable ICU 3.4 */ + USCRIPT_GLAGOLITIC = 56, /* Glag */ + /** @stable ICU 3.4 */ + USCRIPT_KHAROSHTHI = 57, /* Khar */ + /** @stable ICU 3.4 */ + USCRIPT_SYLOTI_NAGRI = 58, /* Sylo */ + /** @stable ICU 3.4 */ + USCRIPT_NEW_TAI_LUE = 59, /* Talu */ + /** @stable ICU 3.4 */ + USCRIPT_TIFINAGH = 60, /* Tfng */ + /** @stable ICU 3.4 */ + USCRIPT_OLD_PERSIAN = 61, /* Xpeo */ + + /* New script codes from Unicode and ISO 15924 */ + /** @stable ICU 3.6 */ + USCRIPT_BALINESE = 62, /* Bali */ + /** @stable ICU 3.6 */ + USCRIPT_BATAK = 63, /* Batk */ + /** @stable ICU 3.6 */ + USCRIPT_BLISSYMBOLS = 64, /* Blis */ + /** @stable ICU 3.6 */ + USCRIPT_BRAHMI = 65, /* Brah */ + /** @stable ICU 3.6 */ + USCRIPT_CHAM = 66, /* Cham */ + /** @stable ICU 3.6 */ + USCRIPT_CIRTH = 67, /* Cirt */ + /** @stable ICU 3.6 */ + USCRIPT_OLD_CHURCH_SLAVONIC_CYRILLIC = 68, /* Cyrs */ + /** @stable ICU 3.6 */ + USCRIPT_DEMOTIC_EGYPTIAN = 69, /* Egyd */ + /** @stable ICU 3.6 */ + USCRIPT_HIERATIC_EGYPTIAN = 70, /* Egyh */ + /** @stable ICU 3.6 */ + USCRIPT_EGYPTIAN_HIEROGLYPHS = 71, /* Egyp */ + /** @stable ICU 3.6 */ + USCRIPT_KHUTSURI = 72, /* Geok */ + /** @stable ICU 3.6 */ + USCRIPT_SIMPLIFIED_HAN = 73, /* Hans */ + /** @stable ICU 3.6 */ + USCRIPT_TRADITIONAL_HAN = 74, /* Hant */ + /** @stable ICU 3.6 */ + USCRIPT_PAHAWH_HMONG = 75, /* Hmng */ + /** @stable ICU 3.6 */ + USCRIPT_OLD_HUNGARIAN = 76, /* Hung */ + /** @stable ICU 3.6 */ + USCRIPT_HARAPPAN_INDUS = 77, /* Inds */ + /** @stable ICU 3.6 */ + USCRIPT_JAVANESE = 78, /* Java */ + /** @stable ICU 3.6 */ + USCRIPT_KAYAH_LI = 79, /* Kali */ + /** @stable ICU 3.6 */ + USCRIPT_LATIN_FRAKTUR = 80, /* Latf */ + /** @stable ICU 3.6 */ + USCRIPT_LATIN_GAELIC = 81, /* Latg */ + /** @stable ICU 3.6 */ + USCRIPT_LEPCHA = 82, /* Lepc */ + /** @stable ICU 3.6 */ + USCRIPT_LINEAR_A = 83, /* Lina */ + /** @stable ICU 4.6 */ + USCRIPT_MANDAIC = 84, /* Mand */ + /** @stable ICU 3.6 */ + USCRIPT_MANDAEAN = USCRIPT_MANDAIC, + /** @stable ICU 3.6 */ + USCRIPT_MAYAN_HIEROGLYPHS = 85, /* Maya */ + /** @stable ICU 4.6 */ + USCRIPT_MEROITIC_HIEROGLYPHS = 86, /* Mero */ + /** @stable ICU 3.6 */ + USCRIPT_MEROITIC = USCRIPT_MEROITIC_HIEROGLYPHS, + /** @stable ICU 3.6 */ + USCRIPT_NKO = 87, /* Nkoo */ + /** @stable ICU 3.6 */ + USCRIPT_ORKHON = 88, /* Orkh */ + /** @stable ICU 3.6 */ + USCRIPT_OLD_PERMIC = 89, /* Perm */ + /** @stable ICU 3.6 */ + USCRIPT_PHAGS_PA = 90, /* Phag */ + /** @stable ICU 3.6 */ + USCRIPT_PHOENICIAN = 91, /* Phnx */ + /** @stable ICU 52 */ + USCRIPT_MIAO = 92, /* Plrd */ + /** @stable ICU 3.6 */ + USCRIPT_PHONETIC_POLLARD = USCRIPT_MIAO, + /** @stable ICU 3.6 */ + USCRIPT_RONGORONGO = 93, /* Roro */ + /** @stable ICU 3.6 */ + USCRIPT_SARATI = 94, /* Sara */ + /** @stable ICU 3.6 */ + USCRIPT_ESTRANGELO_SYRIAC = 95, /* Syre */ + /** @stable ICU 3.6 */ + USCRIPT_WESTERN_SYRIAC = 96, /* Syrj */ + /** @stable ICU 3.6 */ + USCRIPT_EASTERN_SYRIAC = 97, /* Syrn */ + /** @stable ICU 3.6 */ + USCRIPT_TENGWAR = 98, /* Teng */ + /** @stable ICU 3.6 */ + USCRIPT_VAI = 99, /* Vaii */ + /** @stable ICU 3.6 */ + USCRIPT_VISIBLE_SPEECH = 100,/* Visp */ + /** @stable ICU 3.6 */ + USCRIPT_CUNEIFORM = 101,/* Xsux */ + /** @stable ICU 3.6 */ + USCRIPT_UNWRITTEN_LANGUAGES = 102,/* Zxxx */ + /** @stable ICU 3.6 */ + USCRIPT_UNKNOWN = 103,/* Zzzz */ /* Unknown="Code for uncoded script", for unassigned code points */ + + /** @stable ICU 3.8 */ + USCRIPT_CARIAN = 104,/* Cari */ + /** @stable ICU 3.8 */ + USCRIPT_JAPANESE = 105,/* Jpan */ + /** @stable ICU 3.8 */ + USCRIPT_LANNA = 106,/* Lana */ + /** @stable ICU 3.8 */ + USCRIPT_LYCIAN = 107,/* Lyci */ + /** @stable ICU 3.8 */ + USCRIPT_LYDIAN = 108,/* Lydi */ + /** @stable ICU 3.8 */ + USCRIPT_OL_CHIKI = 109,/* Olck */ + /** @stable ICU 3.8 */ + USCRIPT_REJANG = 110,/* Rjng */ + /** @stable ICU 3.8 */ + USCRIPT_SAURASHTRA = 111,/* Saur */ + /** Sutton SignWriting @stable ICU 3.8 */ + USCRIPT_SIGN_WRITING = 112,/* Sgnw */ + /** @stable ICU 3.8 */ + USCRIPT_SUNDANESE = 113,/* Sund */ + /** @stable ICU 3.8 */ + USCRIPT_MOON = 114,/* Moon */ + /** @stable ICU 3.8 */ + USCRIPT_MEITEI_MAYEK = 115,/* Mtei */ + + /** @stable ICU 4.0 */ + USCRIPT_IMPERIAL_ARAMAIC = 116,/* Armi */ + /** @stable ICU 4.0 */ + USCRIPT_AVESTAN = 117,/* Avst */ + /** @stable ICU 4.0 */ + USCRIPT_CHAKMA = 118,/* Cakm */ + /** @stable ICU 4.0 */ + USCRIPT_KOREAN = 119,/* Kore */ + /** @stable ICU 4.0 */ + USCRIPT_KAITHI = 120,/* Kthi */ + /** @stable ICU 4.0 */ + USCRIPT_MANICHAEAN = 121,/* Mani */ + /** @stable ICU 4.0 */ + USCRIPT_INSCRIPTIONAL_PAHLAVI = 122,/* Phli */ + /** @stable ICU 4.0 */ + USCRIPT_PSALTER_PAHLAVI = 123,/* Phlp */ + /** @stable ICU 4.0 */ + USCRIPT_BOOK_PAHLAVI = 124,/* Phlv */ + /** @stable ICU 4.0 */ + USCRIPT_INSCRIPTIONAL_PARTHIAN = 125,/* Prti */ + /** @stable ICU 4.0 */ + USCRIPT_SAMARITAN = 126,/* Samr */ + /** @stable ICU 4.0 */ + USCRIPT_TAI_VIET = 127,/* Tavt */ + /** @stable ICU 4.0 */ + USCRIPT_MATHEMATICAL_NOTATION = 128,/* Zmth */ + /** @stable ICU 4.0 */ + USCRIPT_SYMBOLS = 129,/* Zsym */ + + /** @stable ICU 4.4 */ + USCRIPT_BAMUM = 130,/* Bamu */ + /** @stable ICU 4.4 */ + USCRIPT_LISU = 131,/* Lisu */ + /** @stable ICU 4.4 */ + USCRIPT_NAKHI_GEBA = 132,/* Nkgb */ + /** @stable ICU 4.4 */ + USCRIPT_OLD_SOUTH_ARABIAN = 133,/* Sarb */ + + /** @stable ICU 4.6 */ + USCRIPT_BASSA_VAH = 134,/* Bass */ + /** @stable ICU 54 */ + USCRIPT_DUPLOYAN = 135,/* Dupl */ +#ifndef U_HIDE_DEPRECATED_API + /** @deprecated ICU 54 Typo, use USCRIPT_DUPLOYAN */ + USCRIPT_DUPLOYAN_SHORTAND = USCRIPT_DUPLOYAN, +#endif /* U_HIDE_DEPRECATED_API */ + /** @stable ICU 4.6 */ + USCRIPT_ELBASAN = 136,/* Elba */ + /** @stable ICU 4.6 */ + USCRIPT_GRANTHA = 137,/* Gran */ + /** @stable ICU 4.6 */ + USCRIPT_KPELLE = 138,/* Kpel */ + /** @stable ICU 4.6 */ + USCRIPT_LOMA = 139,/* Loma */ + /** Mende Kikakui @stable ICU 4.6 */ + USCRIPT_MENDE = 140,/* Mend */ + /** @stable ICU 4.6 */ + USCRIPT_MEROITIC_CURSIVE = 141,/* Merc */ + /** @stable ICU 4.6 */ + USCRIPT_OLD_NORTH_ARABIAN = 142,/* Narb */ + /** @stable ICU 4.6 */ + USCRIPT_NABATAEAN = 143,/* Nbat */ + /** @stable ICU 4.6 */ + USCRIPT_PALMYRENE = 144,/* Palm */ + /** @stable ICU 54 */ + USCRIPT_KHUDAWADI = 145,/* Sind */ + /** @stable ICU 4.6 */ + USCRIPT_SINDHI = USCRIPT_KHUDAWADI, + /** @stable ICU 4.6 */ + USCRIPT_WARANG_CITI = 146,/* Wara */ + + /** @stable ICU 4.8 */ + USCRIPT_AFAKA = 147,/* Afak */ + /** @stable ICU 4.8 */ + USCRIPT_JURCHEN = 148,/* Jurc */ + /** @stable ICU 4.8 */ + USCRIPT_MRO = 149,/* Mroo */ + /** @stable ICU 4.8 */ + USCRIPT_NUSHU = 150,/* Nshu */ + /** @stable ICU 4.8 */ + USCRIPT_SHARADA = 151,/* Shrd */ + /** @stable ICU 4.8 */ + USCRIPT_SORA_SOMPENG = 152,/* Sora */ + /** @stable ICU 4.8 */ + USCRIPT_TAKRI = 153,/* Takr */ + /** @stable ICU 4.8 */ + USCRIPT_TANGUT = 154,/* Tang */ + /** @stable ICU 4.8 */ + USCRIPT_WOLEAI = 155,/* Wole */ + + /** @stable ICU 49 */ + USCRIPT_ANATOLIAN_HIEROGLYPHS = 156,/* Hluw */ + /** @stable ICU 49 */ + USCRIPT_KHOJKI = 157,/* Khoj */ + /** @stable ICU 49 */ + USCRIPT_TIRHUTA = 158,/* Tirh */ + + /** @stable ICU 52 */ + USCRIPT_CAUCASIAN_ALBANIAN = 159,/* Aghb */ + /** @stable ICU 52 */ + USCRIPT_MAHAJANI = 160,/* Mahj */ + + /** @stable ICU 54 */ + USCRIPT_AHOM = 161,/* Ahom */ + /** @stable ICU 54 */ + USCRIPT_HATRAN = 162,/* Hatr */ + /** @stable ICU 54 */ + USCRIPT_MODI = 163,/* Modi */ + /** @stable ICU 54 */ + USCRIPT_MULTANI = 164,/* Mult */ + /** @stable ICU 54 */ + USCRIPT_PAU_CIN_HAU = 165,/* Pauc */ + /** @stable ICU 54 */ + USCRIPT_SIDDHAM = 166,/* Sidd */ + + /** @stable ICU 58 */ + USCRIPT_ADLAM = 167,/* Adlm */ + /** @stable ICU 58 */ + USCRIPT_BHAIKSUKI = 168,/* Bhks */ + /** @stable ICU 58 */ + USCRIPT_MARCHEN = 169,/* Marc */ + /** @stable ICU 58 */ + USCRIPT_NEWA = 170,/* Newa */ + /** @stable ICU 58 */ + USCRIPT_OSAGE = 171,/* Osge */ + + /** @stable ICU 58 */ + USCRIPT_HAN_WITH_BOPOMOFO = 172,/* Hanb */ + /** @stable ICU 58 */ + USCRIPT_JAMO = 173,/* Jamo */ + /** @stable ICU 58 */ + USCRIPT_SYMBOLS_EMOJI = 174,/* Zsye */ + + /** @stable ICU 60 */ + USCRIPT_MASARAM_GONDI = 175,/* Gonm */ + /** @stable ICU 60 */ + USCRIPT_SOYOMBO = 176,/* Soyo */ + /** @stable ICU 60 */ + USCRIPT_ZANABAZAR_SQUARE = 177,/* Zanb */ + + /** @stable ICU 62 */ + USCRIPT_DOGRA = 178,/* Dogr */ + /** @stable ICU 62 */ + USCRIPT_GUNJALA_GONDI = 179,/* Gong */ + /** @stable ICU 62 */ + USCRIPT_MAKASAR = 180,/* Maka */ + /** @stable ICU 62 */ + USCRIPT_MEDEFAIDRIN = 181,/* Medf */ + /** @stable ICU 62 */ + USCRIPT_HANIFI_ROHINGYA = 182,/* Rohg */ + /** @stable ICU 62 */ + USCRIPT_SOGDIAN = 183,/* Sogd */ + /** @stable ICU 62 */ + USCRIPT_OLD_SOGDIAN = 184,/* Sogo */ + + /** @stable ICU 64 */ + USCRIPT_ELYMAIC = 185,/* Elym */ + /** @stable ICU 64 */ + USCRIPT_NYIAKENG_PUACHUE_HMONG = 186,/* Hmnp */ + /** @stable ICU 64 */ + USCRIPT_NANDINAGARI = 187,/* Nand */ + /** @stable ICU 64 */ + USCRIPT_WANCHO = 188,/* Wcho */ + + /** @stable ICU 66 */ + USCRIPT_CHORASMIAN = 189,/* Chrs */ + /** @stable ICU 66 */ + USCRIPT_DIVES_AKURU = 190,/* Diak */ + /** @stable ICU 66 */ + USCRIPT_KHITAN_SMALL_SCRIPT = 191,/* Kits */ + /** @stable ICU 66 */ + USCRIPT_YEZIDI = 192,/* Yezi */ + +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal UScriptCode value. + * The highest value is available via u_getIntPropertyMaxValue(UCHAR_SCRIPT). + * + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + USCRIPT_CODE_LIMIT = 193 +#endif // U_HIDE_DEPRECATED_API +} UScriptCode; + +/** + * Gets the script codes associated with the given locale or ISO 15924 abbreviation or name. + * Fills in USCRIPT_MALAYALAM given "Malayam" OR "Mlym". + * Fills in USCRIPT_LATIN given "en" OR "en_US" + * If the required capacity is greater than the capacity of the destination buffer, + * then the error code is set to U_BUFFER_OVERFLOW_ERROR and the required capacity is returned. + * + * <p>Note: To search by short or long script alias only, use + * u_getPropertyValueEnum(UCHAR_SCRIPT, alias) instead. That does + * a fast lookup with no access of the locale data. + * + * @param nameOrAbbrOrLocale name of the script, as given in + * PropertyValueAliases.txt, or ISO 15924 code or locale + * @param fillIn the UScriptCode buffer to fill in the script code + * @param capacity the capacity (size) of UScriptCode buffer passed in. + * @param err the error status code. + * @return The number of script codes filled in the buffer passed in + * @stable ICU 2.4 + */ +U_CAPI int32_t U_EXPORT2 +uscript_getCode(const char* nameOrAbbrOrLocale,UScriptCode* fillIn,int32_t capacity,UErrorCode *err); + +/** + * Returns the long Unicode script name, if there is one. + * Otherwise returns the 4-letter ISO 15924 script code. + * Returns "Malayam" given USCRIPT_MALAYALAM. + * + * @param scriptCode UScriptCode enum + * @return long script name as given in PropertyValueAliases.txt, or the 4-letter code, + * or NULL if scriptCode is invalid + * @stable ICU 2.4 + */ +U_CAPI const char* U_EXPORT2 +uscript_getName(UScriptCode scriptCode); + +/** + * Returns the 4-letter ISO 15924 script code, + * which is the same as the short Unicode script name if Unicode has names for the script. + * Returns "Mlym" given USCRIPT_MALAYALAM. + * + * @param scriptCode UScriptCode enum + * @return short script name (4-letter code), or NULL if scriptCode is invalid + * @stable ICU 2.4 + */ +U_CAPI const char* U_EXPORT2 +uscript_getShortName(UScriptCode scriptCode); + +/** + * Gets the script code associated with the given codepoint. + * Returns USCRIPT_MALAYALAM given 0x0D02 + * @param codepoint UChar32 codepoint + * @param err the error status code. + * @return The UScriptCode, or 0 if codepoint is invalid + * @stable ICU 2.4 + */ +U_CAPI UScriptCode U_EXPORT2 +uscript_getScript(UChar32 codepoint, UErrorCode *err); + +/** + * Do the Script_Extensions of code point c contain script sc? + * If c does not have explicit Script_Extensions, then this tests whether + * c has the Script property value sc. + * + * Some characters are commonly used in multiple scripts. + * For more information, see UAX #24: http://www.unicode.org/reports/tr24/. + * @param c code point + * @param sc script code + * @return true if sc is in Script_Extensions(c) + * @stable ICU 49 + */ +U_CAPI UBool U_EXPORT2 +uscript_hasScript(UChar32 c, UScriptCode sc); + +/** + * Writes code point c's Script_Extensions as a list of UScriptCode values + * to the output scripts array and returns the number of script codes. + * - If c does have Script_Extensions, then the Script property value + * (normally Common or Inherited) is not included. + * - If c does not have Script_Extensions, then the one Script code is written to the output array. + * - If c is not a valid code point, then the one USCRIPT_UNKNOWN code is written. + * In other words, if the return value is 1, + * then the output array contains exactly c's single Script code. + * If the return value is n>=2, then the output array contains c's n Script_Extensions script codes. + * + * Some characters are commonly used in multiple scripts. + * For more information, see UAX #24: http://www.unicode.org/reports/tr24/. + * + * If there are more than capacity script codes to be written, then + * U_BUFFER_OVERFLOW_ERROR is set and the number of Script_Extensions is returned. + * (Usual ICU buffer handling behavior.) + * + * @param c code point + * @param scripts output script code array + * @param capacity capacity of the scripts array + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return number of script codes in c's Script_Extensions, or 1 for the single Script value, + * written to scripts unless U_BUFFER_OVERFLOW_ERROR indicates insufficient capacity + * @stable ICU 49 + */ +U_CAPI int32_t U_EXPORT2 +uscript_getScriptExtensions(UChar32 c, + UScriptCode *scripts, int32_t capacity, + UErrorCode *errorCode); + +/** + * Script usage constants. + * See UAX #31 Unicode Identifier and Pattern Syntax. + * http://www.unicode.org/reports/tr31/#Table_Candidate_Characters_for_Exclusion_from_Identifiers + * + * @stable ICU 51 + */ +typedef enum UScriptUsage { + /** Not encoded in Unicode. @stable ICU 51 */ + USCRIPT_USAGE_NOT_ENCODED, + /** Unknown script usage. @stable ICU 51 */ + USCRIPT_USAGE_UNKNOWN, + /** Candidate for Exclusion from Identifiers. @stable ICU 51 */ + USCRIPT_USAGE_EXCLUDED, + /** Limited Use script. @stable ICU 51 */ + USCRIPT_USAGE_LIMITED_USE, + /** Aspirational Use script. @stable ICU 51 */ + USCRIPT_USAGE_ASPIRATIONAL, + /** Recommended script. @stable ICU 51 */ + USCRIPT_USAGE_RECOMMENDED +} UScriptUsage; + +/** + * Writes the script sample character string. + * This string normally consists of one code point but might be longer. + * The string is empty if the script is not encoded. + * + * @param script script code + * @param dest output string array + * @param capacity number of UChars in the dest array + * @param pErrorCode standard ICU in/out error code, must pass U_SUCCESS() on input + * @return the string length, even if U_BUFFER_OVERFLOW_ERROR + * @stable ICU 51 + */ +U_CAPI int32_t U_EXPORT2 +uscript_getSampleString(UScriptCode script, UChar *dest, int32_t capacity, UErrorCode *pErrorCode); + +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN +class UnicodeString; +U_NAMESPACE_END + +/** + * Returns the script sample character string. + * This string normally consists of one code point but might be longer. + * The string is empty if the script is not encoded. + * + * @param script script code + * @return the sample character string + * @stable ICU 51 + */ +U_COMMON_API icu::UnicodeString U_EXPORT2 +uscript_getSampleUnicodeString(UScriptCode script); + +#endif + +/** + * Returns the script usage according to UAX #31 Unicode Identifier and Pattern Syntax. + * Returns USCRIPT_USAGE_NOT_ENCODED if the script is not encoded in Unicode. + * + * @param script script code + * @return script usage + * @see UScriptUsage + * @stable ICU 51 + */ +U_CAPI UScriptUsage U_EXPORT2 +uscript_getUsage(UScriptCode script); + +/** + * Returns true if the script is written right-to-left. + * For example, Arab and Hebr. + * + * @param script script code + * @return true if the script is right-to-left + * @stable ICU 51 + */ +U_CAPI UBool U_EXPORT2 +uscript_isRightToLeft(UScriptCode script); + +/** + * Returns true if the script allows line breaks between letters (excluding hyphenation). + * Such a script typically requires dictionary-based line breaking. + * For example, Hani and Thai. + * + * @param script script code + * @return true if the script allows line breaks between letters + * @stable ICU 51 + */ +U_CAPI UBool U_EXPORT2 +uscript_breaksBetweenLetters(UScriptCode script); + +/** + * Returns true if in modern (or most recent) usage of the script case distinctions are customary. + * For example, Latn and Cyrl. + * + * @param script script code + * @return true if the script is cased + * @stable ICU 51 + */ +U_CAPI UBool U_EXPORT2 +uscript_isCased(UScriptCode script); + +#endif diff --git a/thirdparty/icu4c/common/unicode/uset.h b/thirdparty/icu4c/common/unicode/uset.h new file mode 100644 index 0000000000..502ea8dc14 --- /dev/null +++ b/thirdparty/icu4c/common/unicode/uset.h @@ -0,0 +1,1137 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2002-2014, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: uset.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2002mar07 +* created by: Markus W. Scherer +* +* C version of UnicodeSet. +*/ + + +/** + * \file + * \brief C API: Unicode Set + * + * <p>This is a C wrapper around the C++ UnicodeSet class.</p> + */ + +#ifndef __USET_H__ +#define __USET_H__ + +#include "unicode/utypes.h" +#include "unicode/uchar.h" + +#if U_SHOW_CPLUSPLUS_API +#include "unicode/localpointer.h" +#endif // U_SHOW_CPLUSPLUS_API + +#ifndef USET_DEFINED + +#ifndef U_IN_DOXYGEN +#define USET_DEFINED +#endif +/** + * USet is the C API type corresponding to C++ class UnicodeSet. + * Use the uset_* API to manipulate. Create with + * uset_open*, and destroy with uset_close. + * @stable ICU 2.4 + */ +typedef struct USet USet; +#endif + +/** + * Bitmask values to be passed to uset_openPatternOptions() or + * uset_applyPattern() taking an option parameter. + * @stable ICU 2.4 + */ +enum { + /** + * Ignore white space within patterns unless quoted or escaped. + * @stable ICU 2.4 + */ + USET_IGNORE_SPACE = 1, + + /** + * Enable case insensitive matching. E.g., "[ab]" with this flag + * will match 'a', 'A', 'b', and 'B'. "[^ab]" with this flag will + * match all except 'a', 'A', 'b', and 'B'. This performs a full + * closure over case mappings, e.g. U+017F for s. + * + * The resulting set is a superset of the input for the code points but + * not for the strings. + * It performs a case mapping closure of the code points and adds + * full case folding strings for the code points, and reduces strings of + * the original set to their full case folding equivalents. + * + * This is designed for case-insensitive matches, for example + * in regular expressions. The full code point case closure allows checking of + * an input character directly against the closure set. + * Strings are matched by comparing the case-folded form from the closure + * set with an incremental case folding of the string in question. + * + * The closure set will also contain single code points if the original + * set contained case-equivalent strings (like U+00DF for "ss" or "Ss" etc.). + * This is not necessary (that is, redundant) for the above matching method + * but results in the same closure sets regardless of whether the original + * set contained the code point or a string. + * + * @stable ICU 2.4 + */ + USET_CASE_INSENSITIVE = 2, + + /** + * Enable case insensitive matching. E.g., "[ab]" with this flag + * will match 'a', 'A', 'b', and 'B'. "[^ab]" with this flag will + * match all except 'a', 'A', 'b', and 'B'. This adds the lower-, + * title-, and uppercase mappings as well as the case folding + * of each existing element in the set. + * @stable ICU 3.2 + */ + USET_ADD_CASE_MAPPINGS = 4 +}; + +/** + * Argument values for whether span() and similar functions continue while + * the current character is contained vs. not contained in the set. + * + * The functionality is straightforward for sets with only single code points, + * without strings (which is the common case): + * - USET_SPAN_CONTAINED and USET_SPAN_SIMPLE work the same. + * - USET_SPAN_CONTAINED and USET_SPAN_SIMPLE are inverses of USET_SPAN_NOT_CONTAINED. + * - span() and spanBack() partition any string the same way when + * alternating between span(USET_SPAN_NOT_CONTAINED) and + * span(either "contained" condition). + * - Using a complemented (inverted) set and the opposite span conditions + * yields the same results. + * + * When a set contains multi-code point strings, then these statements may not + * be true, depending on the strings in the set (for example, whether they + * overlap with each other) and the string that is processed. + * For a set with strings: + * - The complement of the set contains the opposite set of code points, + * but the same set of strings. + * Therefore, complementing both the set and the span conditions + * may yield different results. + * - When starting spans at different positions in a string + * (span(s, ...) vs. span(s+1, ...)) the ends of the spans may be different + * because a set string may start before the later position. + * - span(USET_SPAN_SIMPLE) may be shorter than + * span(USET_SPAN_CONTAINED) because it will not recursively try + * all possible paths. + * For example, with a set which contains the three strings "xy", "xya" and "ax", + * span("xyax", USET_SPAN_CONTAINED) will return 4 but + * span("xyax", USET_SPAN_SIMPLE) will return 3. + * span(USET_SPAN_SIMPLE) will never be longer than + * span(USET_SPAN_CONTAINED). + * - With either "contained" condition, span() and spanBack() may partition + * a string in different ways. + * For example, with a set which contains the two strings "ab" and "ba", + * and when processing the string "aba", + * span() will yield contained/not-contained boundaries of { 0, 2, 3 } + * while spanBack() will yield boundaries of { 0, 1, 3 }. + * + * Note: If it is important to get the same boundaries whether iterating forward + * or backward through a string, then either only span() should be used and + * the boundaries cached for backward operation, or an ICU BreakIterator + * could be used. + * + * Note: Unpaired surrogates are treated like surrogate code points. + * Similarly, set strings match only on code point boundaries, + * never in the middle of a surrogate pair. + * Illegal UTF-8 sequences are treated like U+FFFD. + * When processing UTF-8 strings, malformed set strings + * (strings with unpaired surrogates which cannot be converted to UTF-8) + * are ignored. + * + * @stable ICU 3.8 + */ +typedef enum USetSpanCondition { + /** + * Continues a span() while there is no set element at the current position. + * Increments by one code point at a time. + * Stops before the first set element (character or string). + * (For code points only, this is like while contains(current)==false). + * + * When span() returns, the substring between where it started and the position + * it returned consists only of characters that are not in the set, + * and none of its strings overlap with the span. + * + * @stable ICU 3.8 + */ + USET_SPAN_NOT_CONTAINED = 0, + /** + * Spans the longest substring that is a concatenation of set elements (characters or strings). + * (For characters only, this is like while contains(current)==true). + * + * When span() returns, the substring between where it started and the position + * it returned consists only of set elements (characters or strings) that are in the set. + * + * If a set contains strings, then the span will be the longest substring for which there + * exists at least one non-overlapping concatenation of set elements (characters or strings). + * This is equivalent to a POSIX regular expression for <code>(OR of each set element)*</code>. + * (Java/ICU/Perl regex stops at the first match of an OR.) + * + * @stable ICU 3.8 + */ + USET_SPAN_CONTAINED = 1, + /** + * Continues a span() while there is a set element at the current position. + * Increments by the longest matching element at each position. + * (For characters only, this is like while contains(current)==true). + * + * When span() returns, the substring between where it started and the position + * it returned consists only of set elements (characters or strings) that are in the set. + * + * If a set only contains single characters, then this is the same + * as USET_SPAN_CONTAINED. + * + * If a set contains strings, then the span will be the longest substring + * with a match at each position with the longest single set element (character or string). + * + * Use this span condition together with other longest-match algorithms, + * such as ICU converters (ucnv_getUnicodeSet()). + * + * @stable ICU 3.8 + */ + USET_SPAN_SIMPLE = 2, +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the last span condition. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + USET_SPAN_CONDITION_COUNT +#endif // U_HIDE_DEPRECATED_API +} USetSpanCondition; + +enum { + /** + * Capacity of USerializedSet::staticArray. + * Enough for any single-code point set. + * Also provides padding for nice sizeof(USerializedSet). + * @stable ICU 2.4 + */ + USET_SERIALIZED_STATIC_ARRAY_CAPACITY=8 +}; + +/** + * A serialized form of a Unicode set. Limited manipulations are + * possible directly on a serialized set. See below. + * @stable ICU 2.4 + */ +typedef struct USerializedSet { + /** + * The serialized Unicode Set. + * @stable ICU 2.4 + */ + const uint16_t *array; + /** + * The length of the array that contains BMP characters. + * @stable ICU 2.4 + */ + int32_t bmpLength; + /** + * The total length of the array. + * @stable ICU 2.4 + */ + int32_t length; + /** + * A small buffer for the array to reduce memory allocations. + * @stable ICU 2.4 + */ + uint16_t staticArray[USET_SERIALIZED_STATIC_ARRAY_CAPACITY]; +} USerializedSet; + +/********************************************************************* + * USet API + *********************************************************************/ + +/** + * Create an empty USet object. + * Equivalent to uset_open(1, 0). + * @return a newly created USet. The caller must call uset_close() on + * it when done. + * @stable ICU 4.2 + */ +U_CAPI USet* U_EXPORT2 +uset_openEmpty(void); + +/** + * Creates a USet object that contains the range of characters + * start..end, inclusive. If <code>start > end</code> + * then an empty set is created (same as using uset_openEmpty()). + * @param start first character of the range, inclusive + * @param end last character of the range, inclusive + * @return a newly created USet. The caller must call uset_close() on + * it when done. + * @stable ICU 2.4 + */ +U_CAPI USet* U_EXPORT2 +uset_open(UChar32 start, UChar32 end); + +/** + * Creates a set from the given pattern. See the UnicodeSet class + * description for the syntax of the pattern language. + * @param pattern a string specifying what characters are in the set + * @param patternLength the length of the pattern, or -1 if null + * terminated + * @param ec the error code + * @stable ICU 2.4 + */ +U_CAPI USet* U_EXPORT2 +uset_openPattern(const UChar* pattern, int32_t patternLength, + UErrorCode* ec); + +/** + * Creates a set from the given pattern. See the UnicodeSet class + * description for the syntax of the pattern language. + * @param pattern a string specifying what characters are in the set + * @param patternLength the length of the pattern, or -1 if null + * terminated + * @param options bitmask for options to apply to the pattern. + * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE. + * @param ec the error code + * @stable ICU 2.4 + */ +U_CAPI USet* U_EXPORT2 +uset_openPatternOptions(const UChar* pattern, int32_t patternLength, + uint32_t options, + UErrorCode* ec); + +/** + * Disposes of the storage used by a USet object. This function should + * be called exactly once for objects returned by uset_open(). + * @param set the object to dispose of + * @stable ICU 2.4 + */ +U_CAPI void U_EXPORT2 +uset_close(USet* set); + +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN + +/** + * \class LocalUSetPointer + * "Smart pointer" class, closes a USet via uset_close(). + * For most methods see the LocalPointerBase base class. + * + * @see LocalPointerBase + * @see LocalPointer + * @stable ICU 4.4 + */ +U_DEFINE_LOCAL_OPEN_POINTER(LocalUSetPointer, USet, uset_close); + +U_NAMESPACE_END + +#endif + +/** + * Returns a copy of this object. + * If this set is frozen, then the clone will be frozen as well. + * Use uset_cloneAsThawed() for a mutable clone of a frozen set. + * @param set the original set + * @return the newly allocated copy of the set + * @see uset_cloneAsThawed + * @stable ICU 3.8 + */ +U_CAPI USet * U_EXPORT2 +uset_clone(const USet *set); + +/** + * Determines whether the set has been frozen (made immutable) or not. + * See the ICU4J Freezable interface for details. + * @param set the set + * @return true/false for whether the set has been frozen + * @see uset_freeze + * @see uset_cloneAsThawed + * @stable ICU 3.8 + */ +U_CAPI UBool U_EXPORT2 +uset_isFrozen(const USet *set); + +/** + * Freeze the set (make it immutable). + * Once frozen, it cannot be unfrozen and is therefore thread-safe + * until it is deleted. + * See the ICU4J Freezable interface for details. + * Freezing the set may also make some operations faster, for example + * uset_contains() and uset_span(). + * A frozen set will not be modified. (It remains frozen.) + * @param set the set + * @return the same set, now frozen + * @see uset_isFrozen + * @see uset_cloneAsThawed + * @stable ICU 3.8 + */ +U_CAPI void U_EXPORT2 +uset_freeze(USet *set); + +/** + * Clone the set and make the clone mutable. + * See the ICU4J Freezable interface for details. + * @param set the set + * @return the mutable clone + * @see uset_freeze + * @see uset_isFrozen + * @see uset_clone + * @stable ICU 3.8 + */ +U_CAPI USet * U_EXPORT2 +uset_cloneAsThawed(const USet *set); + +/** + * Causes the USet object to represent the range <code>start - end</code>. + * If <code>start > end</code> then this USet is set to an empty range. + * A frozen set will not be modified. + * @param set the object to set to the given range + * @param start first character in the set, inclusive + * @param end last character in the set, inclusive + * @stable ICU 3.2 + */ +U_CAPI void U_EXPORT2 +uset_set(USet* set, + UChar32 start, UChar32 end); + +/** + * Modifies the set to represent the set specified by the given + * pattern. See the UnicodeSet class description for the syntax of + * the pattern language. See also the User Guide chapter about UnicodeSet. + * <em>Empties the set passed before applying the pattern.</em> + * A frozen set will not be modified. + * @param set The set to which the pattern is to be applied. + * @param pattern A pointer to UChar string specifying what characters are in the set. + * The character at pattern[0] must be a '['. + * @param patternLength The length of the UChar string. -1 if NUL terminated. + * @param options A bitmask for options to apply to the pattern. + * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE. + * @param status Returns an error if the pattern cannot be parsed. + * @return Upon successful parse, the value is either + * the index of the character after the closing ']' + * of the parsed pattern. + * If the status code indicates failure, then the return value + * is the index of the error in the source. + * + * @stable ICU 2.8 + */ +U_CAPI int32_t U_EXPORT2 +uset_applyPattern(USet *set, + const UChar *pattern, int32_t patternLength, + uint32_t options, + UErrorCode *status); + +/** + * Modifies the set to contain those code points which have the given value + * for the given binary or enumerated property, as returned by + * u_getIntPropertyValue. Prior contents of this set are lost. + * A frozen set will not be modified. + * + * @param set the object to contain the code points defined by the property + * + * @param prop a property in the range UCHAR_BIN_START..UCHAR_BIN_LIMIT-1 + * or UCHAR_INT_START..UCHAR_INT_LIMIT-1 + * or UCHAR_MASK_START..UCHAR_MASK_LIMIT-1. + * + * @param value a value in the range u_getIntPropertyMinValue(prop).. + * u_getIntPropertyMaxValue(prop), with one exception. If prop is + * UCHAR_GENERAL_CATEGORY_MASK, then value should not be a UCharCategory, but + * rather a mask value produced by U_GET_GC_MASK(). This allows grouped + * categories such as [:L:] to be represented. + * + * @param ec error code input/output parameter + * + * @stable ICU 3.2 + */ +U_CAPI void U_EXPORT2 +uset_applyIntPropertyValue(USet* set, + UProperty prop, int32_t value, UErrorCode* ec); + +/** + * Modifies the set to contain those code points which have the + * given value for the given property. Prior contents of this + * set are lost. + * A frozen set will not be modified. + * + * @param set the object to contain the code points defined by the given + * property and value alias + * + * @param prop a string specifying a property alias, either short or long. + * The name is matched loosely. See PropertyAliases.txt for names and a + * description of loose matching. If the value string is empty, then this + * string is interpreted as either a General_Category value alias, a Script + * value alias, a binary property alias, or a special ID. Special IDs are + * matched loosely and correspond to the following sets: + * + * "ANY" = [\\u0000-\\U0010FFFF], + * "ASCII" = [\\u0000-\\u007F], + * "Assigned" = [:^Cn:]. + * + * @param propLength the length of the prop, or -1 if NULL + * + * @param value a string specifying a value alias, either short or long. + * The name is matched loosely. See PropertyValueAliases.txt for names + * and a description of loose matching. In addition to aliases listed, + * numeric values and canonical combining classes may be expressed + * numerically, e.g., ("nv", "0.5") or ("ccc", "220"). The value string + * may also be empty. + * + * @param valueLength the length of the value, or -1 if NULL + * + * @param ec error code input/output parameter + * + * @stable ICU 3.2 + */ +U_CAPI void U_EXPORT2 +uset_applyPropertyAlias(USet* set, + const UChar *prop, int32_t propLength, + const UChar *value, int32_t valueLength, + UErrorCode* ec); + +/** + * Return true if the given position, in the given pattern, appears + * to be the start of a UnicodeSet pattern. + * + * @param pattern a string specifying the pattern + * @param patternLength the length of the pattern, or -1 if NULL + * @param pos the given position + * @stable ICU 3.2 + */ +U_CAPI UBool U_EXPORT2 +uset_resemblesPattern(const UChar *pattern, int32_t patternLength, + int32_t pos); + +/** + * Returns a string representation of this set. If the result of + * calling this function is passed to a uset_openPattern(), it + * will produce another set that is equal to this one. + * @param set the set + * @param result the string to receive the rules, may be NULL + * @param resultCapacity the capacity of result, may be 0 if result is NULL + * @param escapeUnprintable if true then convert unprintable + * character to their hex escape representations, \\uxxxx or + * \\Uxxxxxxxx. Unprintable characters are those other than + * U+000A, U+0020..U+007E. + * @param ec error code. + * @return length of string, possibly larger than resultCapacity + * @stable ICU 2.4 + */ +U_CAPI int32_t U_EXPORT2 +uset_toPattern(const USet* set, + UChar* result, int32_t resultCapacity, + UBool escapeUnprintable, + UErrorCode* ec); + +/** + * Adds the given character to the given USet. After this call, + * uset_contains(set, c) will return true. + * A frozen set will not be modified. + * @param set the object to which to add the character + * @param c the character to add + * @stable ICU 2.4 + */ +U_CAPI void U_EXPORT2 +uset_add(USet* set, UChar32 c); + +/** + * Adds all of the elements in the specified set to this set if + * they're not already present. This operation effectively + * modifies this set so that its value is the <i>union</i> of the two + * sets. The behavior of this operation is unspecified if the specified + * collection is modified while the operation is in progress. + * A frozen set will not be modified. + * + * @param set the object to which to add the set + * @param additionalSet the source set whose elements are to be added to this set. + * @stable ICU 2.6 + */ +U_CAPI void U_EXPORT2 +uset_addAll(USet* set, const USet *additionalSet); + +/** + * Adds the given range of characters to the given USet. After this call, + * uset_contains(set, start, end) will return true. + * A frozen set will not be modified. + * @param set the object to which to add the character + * @param start the first character of the range to add, inclusive + * @param end the last character of the range to add, inclusive + * @stable ICU 2.2 + */ +U_CAPI void U_EXPORT2 +uset_addRange(USet* set, UChar32 start, UChar32 end); + +/** + * Adds the given string to the given USet. After this call, + * uset_containsString(set, str, strLen) will return true. + * A frozen set will not be modified. + * @param set the object to which to add the character + * @param str the string to add + * @param strLen the length of the string or -1 if null terminated. + * @stable ICU 2.4 + */ +U_CAPI void U_EXPORT2 +uset_addString(USet* set, const UChar* str, int32_t strLen); + +/** + * Adds each of the characters in this string to the set. Thus "ch" => {"c", "h"} + * If this set already any particular character, it has no effect on that character. + * A frozen set will not be modified. + * @param set the object to which to add the character + * @param str the source string + * @param strLen the length of the string or -1 if null terminated. + * @stable ICU 3.4 + */ +U_CAPI void U_EXPORT2 +uset_addAllCodePoints(USet* set, const UChar *str, int32_t strLen); + +/** + * Removes the given character from the given USet. After this call, + * uset_contains(set, c) will return false. + * A frozen set will not be modified. + * @param set the object from which to remove the character + * @param c the character to remove + * @stable ICU 2.4 + */ +U_CAPI void U_EXPORT2 +uset_remove(USet* set, UChar32 c); + +/** + * Removes the given range of characters from the given USet. After this call, + * uset_contains(set, start, end) will return false. + * A frozen set will not be modified. + * @param set the object to which to add the character + * @param start the first character of the range to remove, inclusive + * @param end the last character of the range to remove, inclusive + * @stable ICU 2.2 + */ +U_CAPI void U_EXPORT2 +uset_removeRange(USet* set, UChar32 start, UChar32 end); + +/** + * Removes the given string to the given USet. After this call, + * uset_containsString(set, str, strLen) will return false. + * A frozen set will not be modified. + * @param set the object to which to add the character + * @param str the string to remove + * @param strLen the length of the string or -1 if null terminated. + * @stable ICU 2.4 + */ +U_CAPI void U_EXPORT2 +uset_removeString(USet* set, const UChar* str, int32_t strLen); + +/** + * Removes from this set all of its elements that are contained in the + * specified set. This operation effectively modifies this + * set so that its value is the <i>asymmetric set difference</i> of + * the two sets. + * A frozen set will not be modified. + * @param set the object from which the elements are to be removed + * @param removeSet the object that defines which elements will be + * removed from this set + * @stable ICU 3.2 + */ +U_CAPI void U_EXPORT2 +uset_removeAll(USet* set, const USet* removeSet); + +/** + * Retain only the elements in this set that are contained in the + * specified range. If <code>start > end</code> then an empty range is + * retained, leaving the set empty. This is equivalent to + * a boolean logic AND, or a set INTERSECTION. + * A frozen set will not be modified. + * + * @param set the object for which to retain only the specified range + * @param start first character, inclusive, of range to be retained + * to this set. + * @param end last character, inclusive, of range to be retained + * to this set. + * @stable ICU 3.2 + */ +U_CAPI void U_EXPORT2 +uset_retain(USet* set, UChar32 start, UChar32 end); + +/** + * Retains only the elements in this set that are contained in the + * specified set. In other words, removes from this set all of + * its elements that are not contained in the specified set. This + * operation effectively modifies this set so that its value is + * the <i>intersection</i> of the two sets. + * A frozen set will not be modified. + * + * @param set the object on which to perform the retain + * @param retain set that defines which elements this set will retain + * @stable ICU 3.2 + */ +U_CAPI void U_EXPORT2 +uset_retainAll(USet* set, const USet* retain); + +/** + * Reallocate this objects internal structures to take up the least + * possible space, without changing this object's value. + * A frozen set will not be modified. + * + * @param set the object on which to perfrom the compact + * @stable ICU 3.2 + */ +U_CAPI void U_EXPORT2 +uset_compact(USet* set); + +/** + * Inverts this set. This operation modifies this set so that + * its value is its complement. This operation does not affect + * the multicharacter strings, if any. + * A frozen set will not be modified. + * @param set the set + * @stable ICU 2.4 + */ +U_CAPI void U_EXPORT2 +uset_complement(USet* set); + +/** + * Complements in this set all elements contained in the specified + * set. Any character in the other set will be removed if it is + * in this set, or will be added if it is not in this set. + * A frozen set will not be modified. + * + * @param set the set with which to complement + * @param complement set that defines which elements will be xor'ed + * from this set. + * @stable ICU 3.2 + */ +U_CAPI void U_EXPORT2 +uset_complementAll(USet* set, const USet* complement); + +/** + * Removes all of the elements from this set. This set will be + * empty after this call returns. + * A frozen set will not be modified. + * @param set the set + * @stable ICU 2.4 + */ +U_CAPI void U_EXPORT2 +uset_clear(USet* set); + +/** + * Close this set over the given attribute. For the attribute + * USET_CASE, the result is to modify this set so that: + * + * 1. For each character or string 'a' in this set, all strings or + * characters 'b' such that foldCase(a) == foldCase(b) are added + * to this set. + * + * 2. For each string 'e' in the resulting set, if e != + * foldCase(e), 'e' will be removed. + * + * Example: [aq\\u00DF{Bc}{bC}{Fi}] => [aAqQ\\u00DF\\uFB01{ss}{bc}{fi}] + * + * (Here foldCase(x) refers to the operation u_strFoldCase, and a + * == b denotes that the contents are the same, not pointer + * comparison.) + * + * A frozen set will not be modified. + * + * @param set the set + * + * @param attributes bitmask for attributes to close over. + * Currently only the USET_CASE bit is supported. Any undefined bits + * are ignored. + * @stable ICU 4.2 + */ +U_CAPI void U_EXPORT2 +uset_closeOver(USet* set, int32_t attributes); + +/** + * Remove all strings from this set. + * + * @param set the set + * @stable ICU 4.2 + */ +U_CAPI void U_EXPORT2 +uset_removeAllStrings(USet* set); + +/** + * Returns true if the given USet contains no characters and no + * strings. + * @param set the set + * @return true if set is empty + * @stable ICU 2.4 + */ +U_CAPI UBool U_EXPORT2 +uset_isEmpty(const USet* set); + +/** + * Returns true if the given USet contains the given character. + * This function works faster with a frozen set. + * @param set the set + * @param c The codepoint to check for within the set + * @return true if set contains c + * @stable ICU 2.4 + */ +U_CAPI UBool U_EXPORT2 +uset_contains(const USet* set, UChar32 c); + +/** + * Returns true if the given USet contains all characters c + * where start <= c && c <= end. + * @param set the set + * @param start the first character of the range to test, inclusive + * @param end the last character of the range to test, inclusive + * @return true if set contains the range + * @stable ICU 2.2 + */ +U_CAPI UBool U_EXPORT2 +uset_containsRange(const USet* set, UChar32 start, UChar32 end); + +/** + * Returns true if the given USet contains the given string. + * @param set the set + * @param str the string + * @param strLen the length of the string or -1 if null terminated. + * @return true if set contains str + * @stable ICU 2.4 + */ +U_CAPI UBool U_EXPORT2 +uset_containsString(const USet* set, const UChar* str, int32_t strLen); + +/** + * Returns the index of the given character within this set, where + * the set is ordered by ascending code point. If the character + * is not in this set, return -1. The inverse of this method is + * <code>charAt()</code>. + * @param set the set + * @param c the character to obtain the index for + * @return an index from 0..size()-1, or -1 + * @stable ICU 3.2 + */ +U_CAPI int32_t U_EXPORT2 +uset_indexOf(const USet* set, UChar32 c); + +/** + * Returns the character at the given index within this set, where + * the set is ordered by ascending code point. If the index is + * out of range, return (UChar32)-1. The inverse of this method is + * <code>indexOf()</code>. + * @param set the set + * @param charIndex an index from 0..size()-1 to obtain the char for + * @return the character at the given index, or (UChar32)-1. + * @stable ICU 3.2 + */ +U_CAPI UChar32 U_EXPORT2 +uset_charAt(const USet* set, int32_t charIndex); + +/** + * Returns the number of characters and strings contained in the given + * USet. + * @param set the set + * @return a non-negative integer counting the characters and strings + * contained in set + * @stable ICU 2.4 + */ +U_CAPI int32_t U_EXPORT2 +uset_size(const USet* set); + +/** + * Returns the number of items in this set. An item is either a range + * of characters or a single multicharacter string. + * @param set the set + * @return a non-negative integer counting the character ranges + * and/or strings contained in set + * @stable ICU 2.4 + */ +U_CAPI int32_t U_EXPORT2 +uset_getItemCount(const USet* set); + +/** + * Returns an item of this set. An item is either a range of + * characters or a single multicharacter string. + * @param set the set + * @param itemIndex a non-negative integer in the range 0.. + * uset_getItemCount(set)-1 + * @param start pointer to variable to receive first character + * in range, inclusive + * @param end pointer to variable to receive last character in range, + * inclusive + * @param str buffer to receive the string, may be NULL + * @param strCapacity capacity of str, or 0 if str is NULL + * @param ec error code + * @return the length of the string (>= 2), or 0 if the item is a + * range, in which case it is the range *start..*end, or -1 if + * itemIndex is out of range + * @stable ICU 2.4 + */ +U_CAPI int32_t U_EXPORT2 +uset_getItem(const USet* set, int32_t itemIndex, + UChar32* start, UChar32* end, + UChar* str, int32_t strCapacity, + UErrorCode* ec); + +/** + * Returns true if set1 contains all the characters and strings + * of set2. It answers the question, 'Is set1 a superset of set2?' + * @param set1 set to be checked for containment + * @param set2 set to be checked for containment + * @return true if the test condition is met + * @stable ICU 3.2 + */ +U_CAPI UBool U_EXPORT2 +uset_containsAll(const USet* set1, const USet* set2); + +/** + * Returns true if this set contains all the characters + * of the given string. This is does not check containment of grapheme + * clusters, like uset_containsString. + * @param set set of characters to be checked for containment + * @param str string containing codepoints to be checked for containment + * @param strLen the length of the string or -1 if null terminated. + * @return true if the test condition is met + * @stable ICU 3.4 + */ +U_CAPI UBool U_EXPORT2 +uset_containsAllCodePoints(const USet* set, const UChar *str, int32_t strLen); + +/** + * Returns true if set1 contains none of the characters and strings + * of set2. It answers the question, 'Is set1 a disjoint set of set2?' + * @param set1 set to be checked for containment + * @param set2 set to be checked for containment + * @return true if the test condition is met + * @stable ICU 3.2 + */ +U_CAPI UBool U_EXPORT2 +uset_containsNone(const USet* set1, const USet* set2); + +/** + * Returns true if set1 contains some of the characters and strings + * of set2. It answers the question, 'Does set1 and set2 have an intersection?' + * @param set1 set to be checked for containment + * @param set2 set to be checked for containment + * @return true if the test condition is met + * @stable ICU 3.2 + */ +U_CAPI UBool U_EXPORT2 +uset_containsSome(const USet* set1, const USet* set2); + +/** + * Returns the length of the initial substring of the input string which + * consists only of characters and strings that are contained in this set + * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE), + * or only of characters and strings that are not contained + * in this set (USET_SPAN_NOT_CONTAINED). + * See USetSpanCondition for details. + * Similar to the strspn() C library function. + * Unpaired surrogates are treated according to contains() of their surrogate code points. + * This function works faster with a frozen set and with a non-negative string length argument. + * @param set the set + * @param s start of the string + * @param length of the string; can be -1 for NUL-terminated + * @param spanCondition specifies the containment condition + * @return the length of the initial substring according to the spanCondition; + * 0 if the start of the string does not fit the spanCondition + * @stable ICU 3.8 + * @see USetSpanCondition + */ +U_CAPI int32_t U_EXPORT2 +uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition); + +/** + * Returns the start of the trailing substring of the input string which + * consists only of characters and strings that are contained in this set + * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE), + * or only of characters and strings that are not contained + * in this set (USET_SPAN_NOT_CONTAINED). + * See USetSpanCondition for details. + * Unpaired surrogates are treated according to contains() of their surrogate code points. + * This function works faster with a frozen set and with a non-negative string length argument. + * @param set the set + * @param s start of the string + * @param length of the string; can be -1 for NUL-terminated + * @param spanCondition specifies the containment condition + * @return the start of the trailing substring according to the spanCondition; + * the string length if the end of the string does not fit the spanCondition + * @stable ICU 3.8 + * @see USetSpanCondition + */ +U_CAPI int32_t U_EXPORT2 +uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition); + +/** + * Returns the length of the initial substring of the input string which + * consists only of characters and strings that are contained in this set + * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE), + * or only of characters and strings that are not contained + * in this set (USET_SPAN_NOT_CONTAINED). + * See USetSpanCondition for details. + * Similar to the strspn() C library function. + * Malformed byte sequences are treated according to contains(0xfffd). + * This function works faster with a frozen set and with a non-negative string length argument. + * @param set the set + * @param s start of the string (UTF-8) + * @param length of the string; can be -1 for NUL-terminated + * @param spanCondition specifies the containment condition + * @return the length of the initial substring according to the spanCondition; + * 0 if the start of the string does not fit the spanCondition + * @stable ICU 3.8 + * @see USetSpanCondition + */ +U_CAPI int32_t U_EXPORT2 +uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition); + +/** + * Returns the start of the trailing substring of the input string which + * consists only of characters and strings that are contained in this set + * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE), + * or only of characters and strings that are not contained + * in this set (USET_SPAN_NOT_CONTAINED). + * See USetSpanCondition for details. + * Malformed byte sequences are treated according to contains(0xfffd). + * This function works faster with a frozen set and with a non-negative string length argument. + * @param set the set + * @param s start of the string (UTF-8) + * @param length of the string; can be -1 for NUL-terminated + * @param spanCondition specifies the containment condition + * @return the start of the trailing substring according to the spanCondition; + * the string length if the end of the string does not fit the spanCondition + * @stable ICU 3.8 + * @see USetSpanCondition + */ +U_CAPI int32_t U_EXPORT2 +uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition); + +/** + * Returns true if set1 contains all of the characters and strings + * of set2, and vis versa. It answers the question, 'Is set1 equal to set2?' + * @param set1 set to be checked for containment + * @param set2 set to be checked for containment + * @return true if the test condition is met + * @stable ICU 3.2 + */ +U_CAPI UBool U_EXPORT2 +uset_equals(const USet* set1, const USet* set2); + +/********************************************************************* + * Serialized set API + *********************************************************************/ + +/** + * Serializes this set into an array of 16-bit integers. Serialization + * (currently) only records the characters in the set; multicharacter + * strings are ignored. + * + * The array + * has following format (each line is one 16-bit integer): + * + * length = (n+2*m) | (m!=0?0x8000:0) + * bmpLength = n; present if m!=0 + * bmp[0] + * bmp[1] + * ... + * bmp[n-1] + * supp-high[0] + * supp-low[0] + * supp-high[1] + * supp-low[1] + * ... + * supp-high[m-1] + * supp-low[m-1] + * + * The array starts with a header. After the header are n bmp + * code points, then m supplementary code points. Either n or m + * or both may be zero. n+2*m is always <= 0x7FFF. + * + * If there are no supplementary characters (if m==0) then the + * header is one 16-bit integer, 'length', with value n. + * + * If there are supplementary characters (if m!=0) then the header + * is two 16-bit integers. The first, 'length', has value + * (n+2*m)|0x8000. The second, 'bmpLength', has value n. + * + * After the header the code points are stored in ascending order. + * Supplementary code points are stored as most significant 16 + * bits followed by least significant 16 bits. + * + * @param set the set + * @param dest pointer to buffer of destCapacity 16-bit integers. + * May be NULL only if destCapacity is zero. + * @param destCapacity size of dest, or zero. Must not be negative. + * @param pErrorCode pointer to the error code. Will be set to + * U_INDEX_OUTOFBOUNDS_ERROR if n+2*m > 0x7FFF. Will be set to + * U_BUFFER_OVERFLOW_ERROR if n+2*m+(m!=0?2:1) > destCapacity. + * @return the total length of the serialized format, including + * the header, that is, n+2*m+(m!=0?2:1), or 0 on error other + * than U_BUFFER_OVERFLOW_ERROR. + * @stable ICU 2.4 + */ +U_CAPI int32_t U_EXPORT2 +uset_serialize(const USet* set, uint16_t* dest, int32_t destCapacity, UErrorCode* pErrorCode); + +/** + * Given a serialized array, fill in the given serialized set object. + * @param fillSet pointer to result + * @param src pointer to start of array + * @param srcLength length of array + * @return true if the given array is valid, otherwise false + * @stable ICU 2.4 + */ +U_CAPI UBool U_EXPORT2 +uset_getSerializedSet(USerializedSet* fillSet, const uint16_t* src, int32_t srcLength); + +/** + * Set the USerializedSet to contain the given character (and nothing + * else). + * @param fillSet pointer to result + * @param c The codepoint to set + * @stable ICU 2.4 + */ +U_CAPI void U_EXPORT2 +uset_setSerializedToOne(USerializedSet* fillSet, UChar32 c); + +/** + * Returns true if the given USerializedSet contains the given + * character. + * @param set the serialized set + * @param c The codepoint to check for within the set + * @return true if set contains c + * @stable ICU 2.4 + */ +U_CAPI UBool U_EXPORT2 +uset_serializedContains(const USerializedSet* set, UChar32 c); + +/** + * Returns the number of disjoint ranges of characters contained in + * the given serialized set. Ignores any strings contained in the + * set. + * @param set the serialized set + * @return a non-negative integer counting the character ranges + * contained in set + * @stable ICU 2.4 + */ +U_CAPI int32_t U_EXPORT2 +uset_getSerializedRangeCount(const USerializedSet* set); + +/** + * Returns a range of characters contained in the given serialized + * set. + * @param set the serialized set + * @param rangeIndex a non-negative integer in the range 0.. + * uset_getSerializedRangeCount(set)-1 + * @param pStart pointer to variable to receive first character + * in range, inclusive + * @param pEnd pointer to variable to receive last character in range, + * inclusive + * @return true if rangeIndex is valid, otherwise false + * @stable ICU 2.4 + */ +U_CAPI UBool U_EXPORT2 +uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex, + UChar32* pStart, UChar32* pEnd); + +#endif diff --git a/thirdparty/icu4c/common/unicode/usetiter.h b/thirdparty/icu4c/common/unicode/usetiter.h new file mode 100644 index 0000000000..a817ef72b3 --- /dev/null +++ b/thirdparty/icu4c/common/unicode/usetiter.h @@ -0,0 +1,325 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (c) 2002-2014, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +*/ +#ifndef USETITER_H +#define USETITER_H + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + +#include "unicode/uobject.h" +#include "unicode/unistr.h" + +/** + * \file + * \brief C++ API: UnicodeSetIterator iterates over the contents of a UnicodeSet. + */ + +U_NAMESPACE_BEGIN + +class UnicodeSet; +class UnicodeString; + +/** + * + * UnicodeSetIterator iterates over the contents of a UnicodeSet. It + * iterates over either code points or code point ranges. After all + * code points or ranges have been returned, it returns the + * multicharacter strings of the UnicodeSet, if any. + * + * This class is not intended to be subclassed. Consider any fields + * or methods declared as "protected" to be private. The use of + * protected in this class is an artifact of history. + * + * <p>To iterate over code points and strings, use a loop like this: + * <pre> + * UnicodeSetIterator it(set); + * while (it.next()) { + * processItem(it.getString()); + * } + * </pre> + * <p>Each item in the set is accessed as a string. Set elements + * consisting of single code points are returned as strings containing + * just the one code point. + * + * <p>To iterate over code point ranges, instead of individual code points, + * use a loop like this: + * <pre> + * UnicodeSetIterator it(set); + * while (it.nextRange()) { + * if (it.isString()) { + * processString(it.getString()); + * } else { + * processCodepointRange(it.getCodepoint(), it.getCodepointEnd()); + * } + * } + * </pre> + * @author M. Davis + * @stable ICU 2.4 + */ +class U_COMMON_API UnicodeSetIterator : public UObject { + + protected: + + /** + * Value of <tt>codepoint</tt> if the iterator points to a string. + * If <tt>codepoint == IS_STRING</tt>, then examine + * <tt>string</tt> for the current iteration result. + * @stable ICU 2.4 + */ + enum { IS_STRING = -1 }; + + /** + * Current code point, or the special value <tt>IS_STRING</tt>, if + * the iterator points to a string. + * @stable ICU 2.4 + */ + UChar32 codepoint; + + /** + * When iterating over ranges using <tt>nextRange()</tt>, + * <tt>codepointEnd</tt> contains the inclusive end of the + * iteration range, if <tt>codepoint != IS_STRING</tt>. If + * iterating over code points using <tt>next()</tt>, or if + * <tt>codepoint == IS_STRING</tt>, then the value of + * <tt>codepointEnd</tt> is undefined. + * @stable ICU 2.4 + */ + UChar32 codepointEnd; + + /** + * If <tt>codepoint == IS_STRING</tt>, then <tt>string</tt> points + * to the current string. If <tt>codepoint != IS_STRING</tt>, the + * value of <tt>string</tt> is undefined. + * @stable ICU 2.4 + */ + const UnicodeString* string; + + public: + + /** + * Create an iterator over the given set. The iterator is valid + * only so long as <tt>set</tt> is valid. + * @param set set to iterate over + * @stable ICU 2.4 + */ + UnicodeSetIterator(const UnicodeSet& set); + + /** + * Create an iterator over nothing. <tt>next()</tt> and + * <tt>nextRange()</tt> return false. This is a convenience + * constructor allowing the target to be set later. + * @stable ICU 2.4 + */ + UnicodeSetIterator(); + + /** + * Destructor. + * @stable ICU 2.4 + */ + virtual ~UnicodeSetIterator(); + + /** + * Returns true if the current element is a string. If so, the + * caller can retrieve it with <tt>getString()</tt>. If this + * method returns false, the current element is a code point or + * code point range, depending on whether <tt>next()</tt> or + * <tt>nextRange()</tt> was called. + * Elements of types string and codepoint can both be retrieved + * with the function <tt>getString()</tt>. + * Elements of type codepoint can also be retrieved with + * <tt>getCodepoint()</tt>. + * For ranges, <tt>getCodepoint()</tt> returns the starting codepoint + * of the range, and <tt>getCodepointEnd()</tt> returns the end + * of the range. + * @stable ICU 2.4 + */ + inline UBool isString() const; + + /** + * Returns the current code point, if <tt>isString()</tt> returned + * false. Otherwise returns an undefined result. + * @stable ICU 2.4 + */ + inline UChar32 getCodepoint() const; + + /** + * Returns the end of the current code point range, if + * <tt>isString()</tt> returned false and <tt>nextRange()</tt> was + * called. Otherwise returns an undefined result. + * @stable ICU 2.4 + */ + inline UChar32 getCodepointEnd() const; + + /** + * Returns the current string, if <tt>isString()</tt> returned + * true. If the current iteration item is a code point, a UnicodeString + * containing that single code point is returned. + * + * Ownership of the returned string remains with the iterator. + * The string is guaranteed to remain valid only until the iterator is + * advanced to the next item, or until the iterator is deleted. + * + * @stable ICU 2.4 + */ + const UnicodeString& getString(); + + /** + * Advances the iteration position to the next element in the set, + * which can be either a single code point or a string. + * If there are no more elements in the set, return false. + * + * <p> + * If <tt>isString() == true</tt>, the value is a + * string, otherwise the value is a + * single code point. Elements of either type can be retrieved + * with the function <tt>getString()</tt>, while elements of + * consisting of a single code point can be retrieved with + * <tt>getCodepoint()</tt> + * + * <p>The order of iteration is all code points in sorted order, + * followed by all strings sorted order. Do not mix + * calls to <tt>next()</tt> and <tt>nextRange()</tt> without + * calling <tt>reset()</tt> between them. The results of doing so + * are undefined. + * + * @return true if there was another element in the set. + * @stable ICU 2.4 + */ + UBool next(); + + /** + * Returns the next element in the set, either a code point range + * or a string. If there are no more elements in the set, return + * false. If <tt>isString() == true</tt>, the value is a + * string and can be accessed with <tt>getString()</tt>. Otherwise the value is a + * range of one or more code points from <tt>getCodepoint()</tt> to + * <tt>getCodepointeEnd()</tt> inclusive. + * + * <p>The order of iteration is all code points ranges in sorted + * order, followed by all strings sorted order. Ranges are + * disjoint and non-contiguous. The value returned from <tt>getString()</tt> + * is undefined unless <tt>isString() == true</tt>. Do not mix calls to + * <tt>next()</tt> and <tt>nextRange()</tt> without calling + * <tt>reset()</tt> between them. The results of doing so are + * undefined. + * + * @return true if there was another element in the set. + * @stable ICU 2.4 + */ + UBool nextRange(); + + /** + * Sets this iterator to visit the elements of the given set and + * resets it to the start of that set. The iterator is valid only + * so long as <tt>set</tt> is valid. + * @param set the set to iterate over. + * @stable ICU 2.4 + */ + void reset(const UnicodeSet& set); + + /** + * Resets this iterator to the start of the set. + * @stable ICU 2.4 + */ + void reset(); + + /** + * ICU "poor man's RTTI", returns a UClassID for this class. + * + * @stable ICU 2.4 + */ + static UClassID U_EXPORT2 getStaticClassID(); + + /** + * ICU "poor man's RTTI", returns a UClassID for the actual class. + * + * @stable ICU 2.4 + */ + virtual UClassID getDynamicClassID() const; + + // ======================= PRIVATES =========================== + + protected: + + // endElement and nextElements are really UChar32's, but we keep + // them as signed int32_t's so we can do comparisons with + // endElement set to -1. Leave them as int32_t's. + /** The set + * @stable ICU 2.4 + */ + const UnicodeSet* set; + /** End range + * @stable ICU 2.4 + */ + int32_t endRange; + /** Range + * @stable ICU 2.4 + */ + int32_t range; + /** End element + * @stable ICU 2.4 + */ + int32_t endElement; + /** Next element + * @stable ICU 2.4 + */ + int32_t nextElement; + //UBool abbreviated; + /** Next string + * @stable ICU 2.4 + */ + int32_t nextString; + /** String count + * @stable ICU 2.4 + */ + int32_t stringCount; + + /** + * Points to the string to use when the caller asks for a + * string and the current iteration item is a code point, not a string. + * @internal + */ + UnicodeString *cpString; + + /** Copy constructor. Disallowed. + * @stable ICU 2.4 + */ + UnicodeSetIterator(const UnicodeSetIterator&); // disallow + + /** Assignment operator. Disallowed. + * @stable ICU 2.4 + */ + UnicodeSetIterator& operator=(const UnicodeSetIterator&); // disallow + + /** Load range + * @stable ICU 2.4 + */ + virtual void loadRange(int32_t range); + +}; + +inline UBool UnicodeSetIterator::isString() const { + return codepoint == (UChar32)IS_STRING; +} + +inline UChar32 UnicodeSetIterator::getCodepoint() const { + return codepoint; +} + +inline UChar32 UnicodeSetIterator::getCodepointEnd() const { + return codepointEnd; +} + + +U_NAMESPACE_END + +#endif /* U_SHOW_CPLUSPLUS_API */ + +#endif diff --git a/thirdparty/icu4c/common/unicode/ushape.h b/thirdparty/icu4c/common/unicode/ushape.h new file mode 100644 index 0000000000..fed4869abd --- /dev/null +++ b/thirdparty/icu4c/common/unicode/ushape.h @@ -0,0 +1,476 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 2000-2012, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* file name: ushape.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2000jun29 +* created by: Markus W. Scherer +*/ + +#ifndef __USHAPE_H__ +#define __USHAPE_H__ + +#include "unicode/utypes.h" + +/** + * \file + * \brief C API: Arabic shaping + * + */ + +/** + * Shape Arabic text on a character basis. + * + * <p>This function performs basic operations for "shaping" Arabic text. It is most + * useful for use with legacy data formats and legacy display technology + * (simple terminals). All operations are performed on Unicode characters.</p> + * + * <p>Text-based shaping means that some character code points in the text are + * replaced by others depending on the context. It transforms one kind of text + * into another. In comparison, modern displays for Arabic text select + * appropriate, context-dependent font glyphs for each text element, which means + * that they transform text into a glyph vector.</p> + * + * <p>Text transformations are necessary when modern display technology is not + * available or when text needs to be transformed to or from legacy formats that + * use "shaped" characters. Since the Arabic script is cursive, connecting + * adjacent letters to each other, computers select images for each letter based + * on the surrounding letters. This usually results in four images per Arabic + * letter: initial, middle, final, and isolated forms. In Unicode, on the other + * hand, letters are normally stored abstract, and a display system is expected + * to select the necessary glyphs. (This makes searching and other text + * processing easier because the same letter has only one code.) It is possible + * to mimic this with text transformations because there are characters in + * Unicode that are rendered as letters with a specific shape + * (or cursive connectivity). They were included for interoperability with + * legacy systems and codepages, and for unsophisticated display systems.</p> + * + * <p>A second kind of text transformations is supported for Arabic digits: + * For compatibility with legacy codepages that only include European digits, + * it is possible to replace one set of digits by another, changing the + * character code points. These operations can be performed for either + * Arabic-Indic Digits (U+0660...U+0669) or Eastern (Extended) Arabic-Indic + * digits (U+06f0...U+06f9).</p> + * + * <p>Some replacements may result in more or fewer characters (code points). + * By default, this means that the destination buffer may receive text with a + * length different from the source length. Some legacy systems rely on the + * length of the text to be constant. They expect extra spaces to be added + * or consumed either next to the affected character or at the end of the + * text.</p> + * + * <p>For details about the available operations, see the description of the + * <code>U_SHAPE_...</code> options.</p> + * + * @param source The input text. + * + * @param sourceLength The number of UChars in <code>source</code>. + * + * @param dest The destination buffer that will receive the results of the + * requested operations. It may be <code>NULL</code> only if + * <code>destSize</code> is 0. The source and destination must not + * overlap. + * + * @param destSize The size (capacity) of the destination buffer in UChars. + * If <code>destSize</code> is 0, then no output is produced, + * but the necessary buffer size is returned ("preflighting"). + * + * @param options This is a 32-bit set of flags that specify the operations + * that are performed on the input text. If no error occurs, + * then the result will always be written to the destination + * buffer. + * + * @param pErrorCode must be a valid pointer to an error code value, + * which must not indicate a failure before the function call. + * + * @return The number of UChars written to the destination buffer. + * If an error occurred, then no output was written, or it may be + * incomplete. If <code>U_BUFFER_OVERFLOW_ERROR</code> is set, then + * the return value indicates the necessary destination buffer size. + * @stable ICU 2.0 + */ +U_CAPI int32_t U_EXPORT2 +u_shapeArabic(const UChar *source, int32_t sourceLength, + UChar *dest, int32_t destSize, + uint32_t options, + UErrorCode *pErrorCode); + +/** + * Memory option: allow the result to have a different length than the source. + * Affects: LamAlef options + * @stable ICU 2.0 + */ +#define U_SHAPE_LENGTH_GROW_SHRINK 0 + +/** + * Memory option: allow the result to have a different length than the source. + * Affects: LamAlef options + * This option is an alias to U_SHAPE_LENGTH_GROW_SHRINK + * @stable ICU 4.2 + */ +#define U_SHAPE_LAMALEF_RESIZE 0 + +/** + * Memory option: the result must have the same length as the source. + * If more room is necessary, then try to consume spaces next to modified characters. + * @stable ICU 2.0 + */ +#define U_SHAPE_LENGTH_FIXED_SPACES_NEAR 1 + +/** + * Memory option: the result must have the same length as the source. + * If more room is necessary, then try to consume spaces next to modified characters. + * Affects: LamAlef options + * This option is an alias to U_SHAPE_LENGTH_FIXED_SPACES_NEAR + * @stable ICU 4.2 + */ +#define U_SHAPE_LAMALEF_NEAR 1 + +/** + * Memory option: the result must have the same length as the source. + * If more room is necessary, then try to consume spaces at the end of the text. + * @stable ICU 2.0 + */ +#define U_SHAPE_LENGTH_FIXED_SPACES_AT_END 2 + +/** + * Memory option: the result must have the same length as the source. + * If more room is necessary, then try to consume spaces at the end of the text. + * Affects: LamAlef options + * This option is an alias to U_SHAPE_LENGTH_FIXED_SPACES_AT_END + * @stable ICU 4.2 + */ +#define U_SHAPE_LAMALEF_END 2 + +/** + * Memory option: the result must have the same length as the source. + * If more room is necessary, then try to consume spaces at the beginning of the text. + * @stable ICU 2.0 + */ +#define U_SHAPE_LENGTH_FIXED_SPACES_AT_BEGINNING 3 + +/** + * Memory option: the result must have the same length as the source. + * If more room is necessary, then try to consume spaces at the beginning of the text. + * Affects: LamAlef options + * This option is an alias to U_SHAPE_LENGTH_FIXED_SPACES_AT_BEGINNING + * @stable ICU 4.2 + */ +#define U_SHAPE_LAMALEF_BEGIN 3 + + +/** + * Memory option: the result must have the same length as the source. + * Shaping Mode: For each LAMALEF character found, expand LAMALEF using space at end. + * If there is no space at end, use spaces at beginning of the buffer. If there + * is no space at beginning of the buffer, use spaces at the near (i.e. the space + * after the LAMALEF character). + * If there are no spaces found, an error U_NO_SPACE_AVAILABLE (as defined in utypes.h) + * will be set in pErrorCode + * + * Deshaping Mode: Perform the same function as the flag equals U_SHAPE_LAMALEF_END. + * Affects: LamAlef options + * @stable ICU 4.2 + */ +#define U_SHAPE_LAMALEF_AUTO 0x10000 + +/** Bit mask for memory options. @stable ICU 2.0 */ +#define U_SHAPE_LENGTH_MASK 0x10003 /* Changed old value 3 */ + + +/** + * Bit mask for LamAlef memory options. + * @stable ICU 4.2 + */ +#define U_SHAPE_LAMALEF_MASK 0x10003 /* updated */ + +/** Direction indicator: the source is in logical (keyboard) order. @stable ICU 2.0 */ +#define U_SHAPE_TEXT_DIRECTION_LOGICAL 0 + +/** + * Direction indicator: + * the source is in visual RTL order, + * the rightmost displayed character stored first. + * This option is an alias to U_SHAPE_TEXT_DIRECTION_LOGICAL + * @stable ICU 4.2 + */ +#define U_SHAPE_TEXT_DIRECTION_VISUAL_RTL 0 + +/** + * Direction indicator: + * the source is in visual LTR order, + * the leftmost displayed character stored first. + * @stable ICU 2.0 + */ +#define U_SHAPE_TEXT_DIRECTION_VISUAL_LTR 4 + +/** Bit mask for direction indicators. @stable ICU 2.0 */ +#define U_SHAPE_TEXT_DIRECTION_MASK 4 + + +/** Letter shaping option: do not perform letter shaping. @stable ICU 2.0 */ +#define U_SHAPE_LETTERS_NOOP 0 + +/** Letter shaping option: replace abstract letter characters by "shaped" ones. @stable ICU 2.0 */ +#define U_SHAPE_LETTERS_SHAPE 8 + +/** Letter shaping option: replace "shaped" letter characters by abstract ones. @stable ICU 2.0 */ +#define U_SHAPE_LETTERS_UNSHAPE 0x10 + +/** + * Letter shaping option: replace abstract letter characters by "shaped" ones. + * The only difference with U_SHAPE_LETTERS_SHAPE is that Tashkeel letters + * are always "shaped" into the isolated form instead of the medial form + * (selecting code points from the Arabic Presentation Forms-B block). + * @stable ICU 2.0 + */ +#define U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED 0x18 + + +/** Bit mask for letter shaping options. @stable ICU 2.0 */ +#define U_SHAPE_LETTERS_MASK 0x18 + + +/** Digit shaping option: do not perform digit shaping. @stable ICU 2.0 */ +#define U_SHAPE_DIGITS_NOOP 0 + +/** + * Digit shaping option: + * Replace European digits (U+0030...) by Arabic-Indic digits. + * @stable ICU 2.0 + */ +#define U_SHAPE_DIGITS_EN2AN 0x20 + +/** + * Digit shaping option: + * Replace Arabic-Indic digits by European digits (U+0030...). + * @stable ICU 2.0 + */ +#define U_SHAPE_DIGITS_AN2EN 0x40 + +/** + * Digit shaping option: + * Replace European digits (U+0030...) by Arabic-Indic digits if the most recent + * strongly directional character is an Arabic letter + * (<code>u_charDirection()</code> result <code>U_RIGHT_TO_LEFT_ARABIC</code> [AL]).<br> + * The direction of "preceding" depends on the direction indicator option. + * For the first characters, the preceding strongly directional character + * (initial state) is assumed to be not an Arabic letter + * (it is <code>U_LEFT_TO_RIGHT</code> [L] or <code>U_RIGHT_TO_LEFT</code> [R]). + * @stable ICU 2.0 + */ +#define U_SHAPE_DIGITS_ALEN2AN_INIT_LR 0x60 + +/** + * Digit shaping option: + * Replace European digits (U+0030...) by Arabic-Indic digits if the most recent + * strongly directional character is an Arabic letter + * (<code>u_charDirection()</code> result <code>U_RIGHT_TO_LEFT_ARABIC</code> [AL]).<br> + * The direction of "preceding" depends on the direction indicator option. + * For the first characters, the preceding strongly directional character + * (initial state) is assumed to be an Arabic letter. + * @stable ICU 2.0 + */ +#define U_SHAPE_DIGITS_ALEN2AN_INIT_AL 0x80 + +/** Not a valid option value. May be replaced by a new option. @stable ICU 2.0 */ +#define U_SHAPE_DIGITS_RESERVED 0xa0 + +/** Bit mask for digit shaping options. @stable ICU 2.0 */ +#define U_SHAPE_DIGITS_MASK 0xe0 + + +/** Digit type option: Use Arabic-Indic digits (U+0660...U+0669). @stable ICU 2.0 */ +#define U_SHAPE_DIGIT_TYPE_AN 0 + +/** Digit type option: Use Eastern (Extended) Arabic-Indic digits (U+06f0...U+06f9). @stable ICU 2.0 */ +#define U_SHAPE_DIGIT_TYPE_AN_EXTENDED 0x100 + +/** Not a valid option value. May be replaced by a new option. @stable ICU 2.0 */ +#define U_SHAPE_DIGIT_TYPE_RESERVED 0x200 + +/** Bit mask for digit type options. @stable ICU 2.0 */ +#define U_SHAPE_DIGIT_TYPE_MASK 0x300 /* I need to change this from 0x3f00 to 0x300 */ + +/** + * Tashkeel aggregation option: + * Replaces any combination of U+0651 with one of + * U+064C, U+064D, U+064E, U+064F, U+0650 with + * U+FC5E, U+FC5F, U+FC60, U+FC61, U+FC62 consecutively. + * @stable ICU 3.6 + */ +#define U_SHAPE_AGGREGATE_TASHKEEL 0x4000 +/** Tashkeel aggregation option: do not aggregate tashkeels. @stable ICU 3.6 */ +#define U_SHAPE_AGGREGATE_TASHKEEL_NOOP 0 +/** Bit mask for tashkeel aggregation. @stable ICU 3.6 */ +#define U_SHAPE_AGGREGATE_TASHKEEL_MASK 0x4000 + +/** + * Presentation form option: + * Don't replace Arabic Presentation Forms-A and Arabic Presentation Forms-B + * characters with 0+06xx characters, before shaping. + * @stable ICU 3.6 + */ +#define U_SHAPE_PRESERVE_PRESENTATION 0x8000 +/** Presentation form option: + * Replace Arabic Presentation Forms-A and Arabic Presentationo Forms-B with + * their unshaped correspondants in range 0+06xx, before shaping. + * @stable ICU 3.6 + */ +#define U_SHAPE_PRESERVE_PRESENTATION_NOOP 0 +/** Bit mask for preserve presentation form. @stable ICU 3.6 */ +#define U_SHAPE_PRESERVE_PRESENTATION_MASK 0x8000 + +/* Seen Tail option */ +/** + * Memory option: the result must have the same length as the source. + * Shaping mode: The SEEN family character will expand into two characters using space near + * the SEEN family character(i.e. the space after the character). + * If there are no spaces found, an error U_NO_SPACE_AVAILABLE (as defined in utypes.h) + * will be set in pErrorCode + * + * De-shaping mode: Any Seen character followed by Tail character will be + * replaced by one cell Seen and a space will replace the Tail. + * Affects: Seen options + * @stable ICU 4.2 + */ +#define U_SHAPE_SEEN_TWOCELL_NEAR 0x200000 + +/** + * Bit mask for Seen memory options. + * @stable ICU 4.2 + */ +#define U_SHAPE_SEEN_MASK 0x700000 + +/* YehHamza option */ +/** + * Memory option: the result must have the same length as the source. + * Shaping mode: The YEHHAMZA character will expand into two characters using space near it + * (i.e. the space after the character + * If there are no spaces found, an error U_NO_SPACE_AVAILABLE (as defined in utypes.h) + * will be set in pErrorCode + * + * De-shaping mode: Any Yeh (final or isolated) character followed by Hamza character will be + * replaced by one cell YehHamza and space will replace the Hamza. + * Affects: YehHamza options + * @stable ICU 4.2 + */ +#define U_SHAPE_YEHHAMZA_TWOCELL_NEAR 0x1000000 + + +/** + * Bit mask for YehHamza memory options. + * @stable ICU 4.2 + */ +#define U_SHAPE_YEHHAMZA_MASK 0x3800000 + +/* New Tashkeel options */ +/** + * Memory option: the result must have the same length as the source. + * Shaping mode: Tashkeel characters will be replaced by spaces. + * Spaces will be placed at beginning of the buffer + * + * De-shaping mode: N/A + * Affects: Tashkeel options + * @stable ICU 4.2 + */ +#define U_SHAPE_TASHKEEL_BEGIN 0x40000 + +/** + * Memory option: the result must have the same length as the source. + * Shaping mode: Tashkeel characters will be replaced by spaces. + * Spaces will be placed at end of the buffer + * + * De-shaping mode: N/A + * Affects: Tashkeel options + * @stable ICU 4.2 + */ +#define U_SHAPE_TASHKEEL_END 0x60000 + +/** + * Memory option: allow the result to have a different length than the source. + * Shaping mode: Tashkeel characters will be removed, buffer length will shrink. + * De-shaping mode: N/A + * + * Affect: Tashkeel options + * @stable ICU 4.2 + */ +#define U_SHAPE_TASHKEEL_RESIZE 0x80000 + +/** + * Memory option: the result must have the same length as the source. + * Shaping mode: Tashkeel characters will be replaced by Tatweel if it is connected to adjacent + * characters (i.e. shaped on Tatweel) or replaced by space if it is not connected. + * + * De-shaping mode: N/A + * Affects: YehHamza options + * @stable ICU 4.2 + */ +#define U_SHAPE_TASHKEEL_REPLACE_BY_TATWEEL 0xC0000 + +/** + * Bit mask for Tashkeel replacement with Space or Tatweel memory options. + * @stable ICU 4.2 + */ +#define U_SHAPE_TASHKEEL_MASK 0xE0000 + + +/* Space location Control options */ +/** + * This option affect the meaning of BEGIN and END options. if this option is not used the default + * for BEGIN and END will be as following: + * The Default (for both Visual LTR, Visual RTL and Logical Text) + * 1. BEGIN always refers to the start address of physical memory. + * 2. END always refers to the end address of physical memory. + * + * If this option is used it will swap the meaning of BEGIN and END only for Visual LTR text. + * + * The effect on BEGIN and END Memory Options will be as following: + * A. BEGIN For Visual LTR text: This will be the beginning (right side) of the visual text( + * corresponding to the physical memory address end for Visual LTR text, Same as END in + * default behavior) + * B. BEGIN For Logical text: Same as BEGIN in default behavior. + * C. END For Visual LTR text: This will be the end (left side) of the visual text (corresponding + * to the physical memory address beginning for Visual LTR text, Same as BEGIN in default behavior. + * D. END For Logical text: Same as END in default behavior). + * Affects: All LamAlef BEGIN, END and AUTO options. + * @stable ICU 4.2 + */ +#define U_SHAPE_SPACES_RELATIVE_TO_TEXT_BEGIN_END 0x4000000 + +/** + * Bit mask for swapping BEGIN and END for Visual LTR text + * @stable ICU 4.2 + */ +#define U_SHAPE_SPACES_RELATIVE_TO_TEXT_MASK 0x4000000 + +/** + * If this option is used, shaping will use the new Unicode code point for TAIL (i.e. 0xFE73). + * If this option is not specified (Default), old unofficial Unicode TAIL code point is used (i.e. 0x200B) + * De-shaping will not use this option as it will always search for both the new Unicode code point for the + * TAIL (i.e. 0xFE73) or the old unofficial Unicode TAIL code point (i.e. 0x200B) and de-shape the + * Seen-Family letter accordingly. + * + * Shaping Mode: Only shaping. + * De-shaping Mode: N/A. + * Affects: All Seen options + * @stable ICU 4.8 + */ +#define U_SHAPE_TAIL_NEW_UNICODE 0x8000000 + +/** + * Bit mask for new Unicode Tail option + * @stable ICU 4.8 + */ +#define U_SHAPE_TAIL_TYPE_MASK 0x8000000 + +#endif diff --git a/thirdparty/icu4c/common/unicode/usprep.h b/thirdparty/icu4c/common/unicode/usprep.h new file mode 100644 index 0000000000..f8a0f58e0d --- /dev/null +++ b/thirdparty/icu4c/common/unicode/usprep.h @@ -0,0 +1,274 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* + ******************************************************************************* + * + * Copyright (C) 2003-2014, International Business Machines + * Corporation and others. All Rights Reserved. + * + ******************************************************************************* + * file name: usprep.h + * encoding: UTF-8 + * tab size: 8 (not used) + * indentation:4 + * + * created on: 2003jul2 + * created by: Ram Viswanadha + */ + +#ifndef __USPREP_H__ +#define __USPREP_H__ + +/** + * \file + * \brief C API: Implements the StringPrep algorithm. + */ + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API +#include "unicode/localpointer.h" +#endif // U_SHOW_CPLUSPLUS_API + +/** + * + * StringPrep API implements the StingPrep framework as described by RFC 3454. + * StringPrep prepares Unicode strings for use in network protocols. + * Profiles of StingPrep are set of rules and data according to with the + * Unicode Strings are prepared. Each profiles contains tables which describe + * how a code point should be treated. The tables are broadly classified into + * <ul> + * <li> Unassigned Table: Contains code points that are unassigned + * in the Unicode Version supported by StringPrep. Currently + * RFC 3454 supports Unicode 3.2. </li> + * <li> Prohibited Table: Contains code points that are prohibited from + * the output of the StringPrep processing function. </li> + * <li> Mapping Table: Contains code points that are deleted from the output or case mapped. </li> + * </ul> + * + * The procedure for preparing Unicode strings: + * <ol> + * <li> Map: For each character in the input, check if it has a mapping + * and, if so, replace it with its mapping. </li> + * <li> Normalize: Possibly normalize the result of step 1 using Unicode + * normalization. </li> + * <li> Prohibit: Check for any characters that are not allowed in the + * output. If any are found, return an error.</li> + * <li> Check bidi: Possibly check for right-to-left characters, and if + * any are found, make sure that the whole string satisfies the + * requirements for bidirectional strings. If the string does not + * satisfy the requirements for bidirectional strings, return an + * error. </li> + * </ol> + * @author Ram Viswanadha + */ +#if !UCONFIG_NO_IDNA + +#include "unicode/parseerr.h" + +/** + * The StringPrep profile + * @stable ICU 2.8 + */ +typedef struct UStringPrepProfile UStringPrepProfile; + + +/** + * Option to prohibit processing of unassigned code points in the input + * + * @see usprep_prepare + * @stable ICU 2.8 + */ +#define USPREP_DEFAULT 0x0000 + +/** + * Option to allow processing of unassigned code points in the input + * + * @see usprep_prepare + * @stable ICU 2.8 + */ +#define USPREP_ALLOW_UNASSIGNED 0x0001 + +/** + * enums for the standard stringprep profile types + * supported by usprep_openByType. + * @see usprep_openByType + * @stable ICU 4.2 + */ +typedef enum UStringPrepProfileType { + /** + * RFC3491 Nameprep + * @stable ICU 4.2 + */ + USPREP_RFC3491_NAMEPREP, + /** + * RFC3530 nfs4_cs_prep + * @stable ICU 4.2 + */ + USPREP_RFC3530_NFS4_CS_PREP, + /** + * RFC3530 nfs4_cs_prep with case insensitive option + * @stable ICU 4.2 + */ + USPREP_RFC3530_NFS4_CS_PREP_CI, + /** + * RFC3530 nfs4_cis_prep + * @stable ICU 4.2 + */ + USPREP_RFC3530_NFS4_CIS_PREP, + /** + * RFC3530 nfs4_mixed_prep for prefix + * @stable ICU 4.2 + */ + USPREP_RFC3530_NFS4_MIXED_PREP_PREFIX, + /** + * RFC3530 nfs4_mixed_prep for suffix + * @stable ICU 4.2 + */ + USPREP_RFC3530_NFS4_MIXED_PREP_SUFFIX, + /** + * RFC3722 iSCSI + * @stable ICU 4.2 + */ + USPREP_RFC3722_ISCSI, + /** + * RFC3920 XMPP Nodeprep + * @stable ICU 4.2 + */ + USPREP_RFC3920_NODEPREP, + /** + * RFC3920 XMPP Resourceprep + * @stable ICU 4.2 + */ + USPREP_RFC3920_RESOURCEPREP, + /** + * RFC4011 Policy MIB Stringprep + * @stable ICU 4.2 + */ + USPREP_RFC4011_MIB, + /** + * RFC4013 SASLprep + * @stable ICU 4.2 + */ + USPREP_RFC4013_SASLPREP, + /** + * RFC4505 trace + * @stable ICU 4.2 + */ + USPREP_RFC4505_TRACE, + /** + * RFC4518 LDAP + * @stable ICU 4.2 + */ + USPREP_RFC4518_LDAP, + /** + * RFC4518 LDAP for case ignore, numeric and stored prefix + * matching rules + * @stable ICU 4.2 + */ + USPREP_RFC4518_LDAP_CI +} UStringPrepProfileType; + +/** + * Creates a StringPrep profile from the data file. + * + * @param path string containing the full path pointing to the directory + * where the profile reside followed by the package name + * e.g. "/usr/resource/my_app/profiles/mydata" on a Unix system. + * if NULL, ICU default data files will be used. + * @param fileName name of the profile file to be opened + * @param status ICU error code in/out parameter. Must not be NULL. + * Must fulfill U_SUCCESS before the function call. + * @return Pointer to UStringPrepProfile that is opened. Should be closed by + * calling usprep_close() + * @see usprep_close() + * @stable ICU 2.8 + */ +U_CAPI UStringPrepProfile* U_EXPORT2 +usprep_open(const char* path, + const char* fileName, + UErrorCode* status); + +/** + * Creates a StringPrep profile for the specified profile type. + * + * @param type The profile type + * @param status ICU error code in/out parameter. Must not be NULL. + * Must fulfill U_SUCCESS before the function call. + * @return Pointer to UStringPrepProfile that is opened. Should be closed by + * calling usprep_close() + * @see usprep_close() + * @stable ICU 4.2 + */ +U_CAPI UStringPrepProfile* U_EXPORT2 +usprep_openByType(UStringPrepProfileType type, + UErrorCode* status); + +/** + * Closes the profile + * @param profile The profile to close + * @stable ICU 2.8 + */ +U_CAPI void U_EXPORT2 +usprep_close(UStringPrepProfile* profile); + +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN + +/** + * \class LocalUStringPrepProfilePointer + * "Smart pointer" class, closes a UStringPrepProfile via usprep_close(). + * For most methods see the LocalPointerBase base class. + * + * @see LocalPointerBase + * @see LocalPointer + * @stable ICU 4.4 + */ +U_DEFINE_LOCAL_OPEN_POINTER(LocalUStringPrepProfilePointer, UStringPrepProfile, usprep_close); + +U_NAMESPACE_END + +#endif + +/** + * Prepare the input buffer for use in applications with the given profile. This operation maps, normalizes(NFKC), + * checks for prohibited and BiDi characters in the order defined by RFC 3454 + * depending on the options specified in the profile. + * + * @param prep The profile to use + * @param src Pointer to UChar buffer containing the string to prepare + * @param srcLength Number of characters in the source string + * @param dest Pointer to the destination buffer to receive the output + * @param destCapacity The capacity of destination array + * @param options A bit set of options: + * + * - USPREP_DEFAULT Prohibit processing of unassigned code points in the input + * + * - USPREP_ALLOW_UNASSIGNED Treat the unassigned code points are in the input + * as normal Unicode code points. + * + * @param parseError Pointer to UParseError struct to receive information on position + * of error if an error is encountered. Can be NULL. + * @param status ICU in/out error code parameter. + * U_INVALID_CHAR_FOUND if src contains + * unmatched single surrogates. + * U_INDEX_OUTOFBOUNDS_ERROR if src contains + * too many code points. + * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough + * @return The number of UChars in the destination buffer + * @stable ICU 2.8 + */ + +U_CAPI int32_t U_EXPORT2 +usprep_prepare( const UStringPrepProfile* prep, + const UChar* src, int32_t srcLength, + UChar* dest, int32_t destCapacity, + int32_t options, + UParseError* parseError, + UErrorCode* status ); + + +#endif /* #if !UCONFIG_NO_IDNA */ + +#endif diff --git a/thirdparty/icu4c/common/unicode/ustring.h b/thirdparty/icu4c/common/unicode/ustring.h new file mode 100644 index 0000000000..10ea45ead1 --- /dev/null +++ b/thirdparty/icu4c/common/unicode/ustring.h @@ -0,0 +1,1689 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 1998-2014, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* +* File ustring.h +* +* Modification History: +* +* Date Name Description +* 12/07/98 bertrand Creation. +****************************************************************************** +*/ + +#ifndef USTRING_H +#define USTRING_H + +#include "unicode/utypes.h" +#include "unicode/putil.h" +#include "unicode/uiter.h" + +/** + * \def UBRK_TYPEDEF_UBREAK_ITERATOR + * @internal + */ + +#ifndef UBRK_TYPEDEF_UBREAK_ITERATOR +# define UBRK_TYPEDEF_UBREAK_ITERATOR +/** Simple declaration for u_strToTitle() to avoid including unicode/ubrk.h. @stable ICU 2.1*/ + typedef struct UBreakIterator UBreakIterator; +#endif + +/** + * \file + * \brief C API: Unicode string handling functions + * + * These C API functions provide general Unicode string handling. + * + * Some functions are equivalent in name, signature, and behavior to the ANSI C <string.h> + * functions. (For example, they do not check for bad arguments like NULL string pointers.) + * In some cases, only the thread-safe variant of such a function is implemented here + * (see u_strtok_r()). + * + * Other functions provide more Unicode-specific functionality like locale-specific + * upper/lower-casing and string comparison in code point order. + * + * ICU uses 16-bit Unicode (UTF-16) in the form of arrays of UChar code units. + * UTF-16 encodes each Unicode code point with either one or two UChar code units. + * (This is the default form of Unicode, and a forward-compatible extension of the original, + * fixed-width form that was known as UCS-2. UTF-16 superseded UCS-2 with Unicode 2.0 + * in 1996.) + * + * Some APIs accept a 32-bit UChar32 value for a single code point. + * + * ICU also handles 16-bit Unicode text with unpaired surrogates. + * Such text is not well-formed UTF-16. + * Code-point-related functions treat unpaired surrogates as surrogate code points, + * i.e., as separate units. + * + * Although UTF-16 is a variable-width encoding form (like some legacy multi-byte encodings), + * it is much more efficient even for random access because the code unit values + * for single-unit characters vs. lead units vs. trail units are completely disjoint. + * This means that it is easy to determine character (code point) boundaries from + * random offsets in the string. + * + * Unicode (UTF-16) string processing is optimized for the single-unit case. + * Although it is important to support supplementary characters + * (which use pairs of lead/trail code units called "surrogates"), + * their occurrence is rare. Almost all characters in modern use require only + * a single UChar code unit (i.e., their code point values are <=0xffff). + * + * For more details see the User Guide Strings chapter (http://icu-project.org/userguide/strings.html). + * For a discussion of the handling of unpaired surrogates see also + * Jitterbug 2145 and its icu mailing list proposal on 2002-sep-18. + */ + +/** + * \defgroup ustring_ustrlen String Length + * \ingroup ustring_strlen + */ +/*@{*/ +/** + * Determine the length of an array of UChar. + * + * @param s The array of UChars, NULL (U+0000) terminated. + * @return The number of UChars in <code>chars</code>, minus the terminator. + * @stable ICU 2.0 + */ +U_CAPI int32_t U_EXPORT2 +u_strlen(const UChar *s); +/*@}*/ + +/** + * Count Unicode code points in the length UChar code units of the string. + * A code point may occupy either one or two UChar code units. + * Counting code points involves reading all code units. + * + * This functions is basically the inverse of the U16_FWD_N() macro (see utf.h). + * + * @param s The input string. + * @param length The number of UChar code units to be checked, or -1 to count all + * code points before the first NUL (U+0000). + * @return The number of code points in the specified code units. + * @stable ICU 2.0 + */ +U_CAPI int32_t U_EXPORT2 +u_countChar32(const UChar *s, int32_t length); + +/** + * Check if the string contains more Unicode code points than a certain number. + * This is more efficient than counting all code points in the entire string + * and comparing that number with a threshold. + * This function may not need to scan the string at all if the length is known + * (not -1 for NUL-termination) and falls within a certain range, and + * never needs to count more than 'number+1' code points. + * Logically equivalent to (u_countChar32(s, length)>number). + * A Unicode code point may occupy either one or two UChar code units. + * + * @param s The input string. + * @param length The length of the string, or -1 if it is NUL-terminated. + * @param number The number of code points in the string is compared against + * the 'number' parameter. + * @return Boolean value for whether the string contains more Unicode code points + * than 'number'. Same as (u_countChar32(s, length)>number). + * @stable ICU 2.4 + */ +U_CAPI UBool U_EXPORT2 +u_strHasMoreChar32Than(const UChar *s, int32_t length, int32_t number); + +/** + * Concatenate two ustrings. Appends a copy of <code>src</code>, + * including the null terminator, to <code>dst</code>. The initial copied + * character from <code>src</code> overwrites the null terminator in <code>dst</code>. + * + * @param dst The destination string. + * @param src The source string. + * @return A pointer to <code>dst</code>. + * @stable ICU 2.0 + */ +U_CAPI UChar* U_EXPORT2 +u_strcat(UChar *dst, + const UChar *src); + +/** + * Concatenate two ustrings. + * Appends at most <code>n</code> characters from <code>src</code> to <code>dst</code>. + * Adds a terminating NUL. + * If src is too long, then only <code>n-1</code> characters will be copied + * before the terminating NUL. + * If <code>n<=0</code> then dst is not modified. + * + * @param dst The destination string. + * @param src The source string (can be NULL/invalid if n<=0). + * @param n The maximum number of characters to append; no-op if <=0. + * @return A pointer to <code>dst</code>. + * @stable ICU 2.0 + */ +U_CAPI UChar* U_EXPORT2 +u_strncat(UChar *dst, + const UChar *src, + int32_t n); + +/** + * Find the first occurrence of a substring in a string. + * The substring is found at code point boundaries. + * That means that if the substring begins with + * a trail surrogate or ends with a lead surrogate, + * then it is found only if these surrogates stand alone in the text. + * Otherwise, the substring edge units would be matched against + * halves of surrogate pairs. + * + * @param s The string to search (NUL-terminated). + * @param substring The substring to find (NUL-terminated). + * @return A pointer to the first occurrence of <code>substring</code> in <code>s</code>, + * or <code>s</code> itself if the <code>substring</code> is empty, + * or <code>NULL</code> if <code>substring</code> is not in <code>s</code>. + * @stable ICU 2.0 + * + * @see u_strrstr + * @see u_strFindFirst + * @see u_strFindLast + */ +U_CAPI UChar * U_EXPORT2 +u_strstr(const UChar *s, const UChar *substring); + +/** + * Find the first occurrence of a substring in a string. + * The substring is found at code point boundaries. + * That means that if the substring begins with + * a trail surrogate or ends with a lead surrogate, + * then it is found only if these surrogates stand alone in the text. + * Otherwise, the substring edge units would be matched against + * halves of surrogate pairs. + * + * @param s The string to search. + * @param length The length of s (number of UChars), or -1 if it is NUL-terminated. + * @param substring The substring to find (NUL-terminated). + * @param subLength The length of substring (number of UChars), or -1 if it is NUL-terminated. + * @return A pointer to the first occurrence of <code>substring</code> in <code>s</code>, + * or <code>s</code> itself if the <code>substring</code> is empty, + * or <code>NULL</code> if <code>substring</code> is not in <code>s</code>. + * @stable ICU 2.4 + * + * @see u_strstr + * @see u_strFindLast + */ +U_CAPI UChar * U_EXPORT2 +u_strFindFirst(const UChar *s, int32_t length, const UChar *substring, int32_t subLength); + +/** + * Find the first occurrence of a BMP code point in a string. + * A surrogate code point is found only if its match in the text is not + * part of a surrogate pair. + * A NUL character is found at the string terminator. + * + * @param s The string to search (NUL-terminated). + * @param c The BMP code point to find. + * @return A pointer to the first occurrence of <code>c</code> in <code>s</code> + * or <code>NULL</code> if <code>c</code> is not in <code>s</code>. + * @stable ICU 2.0 + * + * @see u_strchr32 + * @see u_memchr + * @see u_strstr + * @see u_strFindFirst + */ +U_CAPI UChar * U_EXPORT2 +u_strchr(const UChar *s, UChar c); + +/** + * Find the first occurrence of a code point in a string. + * A surrogate code point is found only if its match in the text is not + * part of a surrogate pair. + * A NUL character is found at the string terminator. + * + * @param s The string to search (NUL-terminated). + * @param c The code point to find. + * @return A pointer to the first occurrence of <code>c</code> in <code>s</code> + * or <code>NULL</code> if <code>c</code> is not in <code>s</code>. + * @stable ICU 2.0 + * + * @see u_strchr + * @see u_memchr32 + * @see u_strstr + * @see u_strFindFirst + */ +U_CAPI UChar * U_EXPORT2 +u_strchr32(const UChar *s, UChar32 c); + +/** + * Find the last occurrence of a substring in a string. + * The substring is found at code point boundaries. + * That means that if the substring begins with + * a trail surrogate or ends with a lead surrogate, + * then it is found only if these surrogates stand alone in the text. + * Otherwise, the substring edge units would be matched against + * halves of surrogate pairs. + * + * @param s The string to search (NUL-terminated). + * @param substring The substring to find (NUL-terminated). + * @return A pointer to the last occurrence of <code>substring</code> in <code>s</code>, + * or <code>s</code> itself if the <code>substring</code> is empty, + * or <code>NULL</code> if <code>substring</code> is not in <code>s</code>. + * @stable ICU 2.4 + * + * @see u_strstr + * @see u_strFindFirst + * @see u_strFindLast + */ +U_CAPI UChar * U_EXPORT2 +u_strrstr(const UChar *s, const UChar *substring); + +/** + * Find the last occurrence of a substring in a string. + * The substring is found at code point boundaries. + * That means that if the substring begins with + * a trail surrogate or ends with a lead surrogate, + * then it is found only if these surrogates stand alone in the text. + * Otherwise, the substring edge units would be matched against + * halves of surrogate pairs. + * + * @param s The string to search. + * @param length The length of s (number of UChars), or -1 if it is NUL-terminated. + * @param substring The substring to find (NUL-terminated). + * @param subLength The length of substring (number of UChars), or -1 if it is NUL-terminated. + * @return A pointer to the last occurrence of <code>substring</code> in <code>s</code>, + * or <code>s</code> itself if the <code>substring</code> is empty, + * or <code>NULL</code> if <code>substring</code> is not in <code>s</code>. + * @stable ICU 2.4 + * + * @see u_strstr + * @see u_strFindLast + */ +U_CAPI UChar * U_EXPORT2 +u_strFindLast(const UChar *s, int32_t length, const UChar *substring, int32_t subLength); + +/** + * Find the last occurrence of a BMP code point in a string. + * A surrogate code point is found only if its match in the text is not + * part of a surrogate pair. + * A NUL character is found at the string terminator. + * + * @param s The string to search (NUL-terminated). + * @param c The BMP code point to find. + * @return A pointer to the last occurrence of <code>c</code> in <code>s</code> + * or <code>NULL</code> if <code>c</code> is not in <code>s</code>. + * @stable ICU 2.4 + * + * @see u_strrchr32 + * @see u_memrchr + * @see u_strrstr + * @see u_strFindLast + */ +U_CAPI UChar * U_EXPORT2 +u_strrchr(const UChar *s, UChar c); + +/** + * Find the last occurrence of a code point in a string. + * A surrogate code point is found only if its match in the text is not + * part of a surrogate pair. + * A NUL character is found at the string terminator. + * + * @param s The string to search (NUL-terminated). + * @param c The code point to find. + * @return A pointer to the last occurrence of <code>c</code> in <code>s</code> + * or <code>NULL</code> if <code>c</code> is not in <code>s</code>. + * @stable ICU 2.4 + * + * @see u_strrchr + * @see u_memchr32 + * @see u_strrstr + * @see u_strFindLast + */ +U_CAPI UChar * U_EXPORT2 +u_strrchr32(const UChar *s, UChar32 c); + +/** + * Locates the first occurrence in the string <code>string</code> of any of the characters + * in the string <code>matchSet</code>. + * Works just like C's strpbrk but with Unicode. + * + * @param string The string in which to search, NUL-terminated. + * @param matchSet A NUL-terminated string defining a set of code points + * for which to search in the text string. + * @return A pointer to the character in <code>string</code> that matches one of the + * characters in <code>matchSet</code>, or NULL if no such character is found. + * @stable ICU 2.0 + */ +U_CAPI UChar * U_EXPORT2 +u_strpbrk(const UChar *string, const UChar *matchSet); + +/** + * Returns the number of consecutive characters in <code>string</code>, + * beginning with the first, that do not occur somewhere in <code>matchSet</code>. + * Works just like C's strcspn but with Unicode. + * + * @param string The string in which to search, NUL-terminated. + * @param matchSet A NUL-terminated string defining a set of code points + * for which to search in the text string. + * @return The number of initial characters in <code>string</code> that do not + * occur in <code>matchSet</code>. + * @see u_strspn + * @stable ICU 2.0 + */ +U_CAPI int32_t U_EXPORT2 +u_strcspn(const UChar *string, const UChar *matchSet); + +/** + * Returns the number of consecutive characters in <code>string</code>, + * beginning with the first, that occur somewhere in <code>matchSet</code>. + * Works just like C's strspn but with Unicode. + * + * @param string The string in which to search, NUL-terminated. + * @param matchSet A NUL-terminated string defining a set of code points + * for which to search in the text string. + * @return The number of initial characters in <code>string</code> that do + * occur in <code>matchSet</code>. + * @see u_strcspn + * @stable ICU 2.0 + */ +U_CAPI int32_t U_EXPORT2 +u_strspn(const UChar *string, const UChar *matchSet); + +/** + * The string tokenizer API allows an application to break a string into + * tokens. Unlike strtok(), the saveState (the current pointer within the + * original string) is maintained in saveState. In the first call, the + * argument src is a pointer to the string. In subsequent calls to + * return successive tokens of that string, src must be specified as + * NULL. The value saveState is set by this function to maintain the + * function's position within the string, and on each subsequent call + * you must give this argument the same variable. This function does + * handle surrogate pairs. This function is similar to the strtok_r() + * the POSIX Threads Extension (1003.1c-1995) version. + * + * @param src String containing token(s). This string will be modified. + * After the first call to u_strtok_r(), this argument must + * be NULL to get to the next token. + * @param delim Set of delimiter characters (Unicode code points). + * @param saveState The current pointer within the original string, + * which is set by this function. The saveState + * parameter should the address of a local variable of type + * UChar *. (i.e. defined "UChar *myLocalSaveState" and use + * &myLocalSaveState for this parameter). + * @return A pointer to the next token found in src, or NULL + * when there are no more tokens. + * @stable ICU 2.0 + */ +U_CAPI UChar * U_EXPORT2 +u_strtok_r(UChar *src, + const UChar *delim, + UChar **saveState); + +/** + * Compare two Unicode strings for bitwise equality (code unit order). + * + * @param s1 A string to compare. + * @param s2 A string to compare. + * @return 0 if <code>s1</code> and <code>s2</code> are bitwise equal; a negative + * value if <code>s1</code> is bitwise less than <code>s2,</code>; a positive + * value if <code>s1</code> is bitwise greater than <code>s2</code>. + * @stable ICU 2.0 + */ +U_CAPI int32_t U_EXPORT2 +u_strcmp(const UChar *s1, + const UChar *s2); + +/** + * Compare two Unicode strings in code point order. + * See u_strCompare for details. + * + * @param s1 A string to compare. + * @param s2 A string to compare. + * @return a negative/zero/positive integer corresponding to whether + * the first string is less than/equal to/greater than the second one + * in code point order + * @stable ICU 2.0 + */ +U_CAPI int32_t U_EXPORT2 +u_strcmpCodePointOrder(const UChar *s1, const UChar *s2); + +/** + * Compare two Unicode strings (binary order). + * + * The comparison can be done in code unit order or in code point order. + * They differ only in UTF-16 when + * comparing supplementary code points (U+10000..U+10ffff) + * to BMP code points near the end of the BMP (i.e., U+e000..U+ffff). + * In code unit order, high BMP code points sort after supplementary code points + * because they are stored as pairs of surrogates which are at U+d800..U+dfff. + * + * This functions works with strings of different explicitly specified lengths + * unlike the ANSI C-like u_strcmp() and u_memcmp() etc. + * NUL-terminated strings are possible with length arguments of -1. + * + * @param s1 First source string. + * @param length1 Length of first source string, or -1 if NUL-terminated. + * + * @param s2 Second source string. + * @param length2 Length of second source string, or -1 if NUL-terminated. + * + * @param codePointOrder Choose between code unit order (false) + * and code point order (true). + * + * @return <0 or 0 or >0 as usual for string comparisons + * + * @stable ICU 2.2 + */ +U_CAPI int32_t U_EXPORT2 +u_strCompare(const UChar *s1, int32_t length1, + const UChar *s2, int32_t length2, + UBool codePointOrder); + +/** + * Compare two Unicode strings (binary order) + * as presented by UCharIterator objects. + * Works otherwise just like u_strCompare(). + * + * Both iterators are reset to their start positions. + * When the function returns, it is undefined where the iterators + * have stopped. + * + * @param iter1 First source string iterator. + * @param iter2 Second source string iterator. + * @param codePointOrder Choose between code unit order (false) + * and code point order (true). + * + * @return <0 or 0 or >0 as usual for string comparisons + * + * @see u_strCompare + * + * @stable ICU 2.6 + */ +U_CAPI int32_t U_EXPORT2 +u_strCompareIter(UCharIterator *iter1, UCharIterator *iter2, UBool codePointOrder); + +/** + * Compare two strings case-insensitively using full case folding. + * This is equivalent to + * u_strCompare(u_strFoldCase(s1, options), + * u_strFoldCase(s2, options), + * (options&U_COMPARE_CODE_POINT_ORDER)!=0). + * + * The comparison can be done in UTF-16 code unit order or in code point order. + * They differ only when comparing supplementary code points (U+10000..U+10ffff) + * to BMP code points near the end of the BMP (i.e., U+e000..U+ffff). + * In code unit order, high BMP code points sort after supplementary code points + * because they are stored as pairs of surrogates which are at U+d800..U+dfff. + * + * This functions works with strings of different explicitly specified lengths + * unlike the ANSI C-like u_strcmp() and u_memcmp() etc. + * NUL-terminated strings are possible with length arguments of -1. + * + * @param s1 First source string. + * @param length1 Length of first source string, or -1 if NUL-terminated. + * + * @param s2 Second source string. + * @param length2 Length of second source string, or -1 if NUL-terminated. + * + * @param options A bit set of options: + * - U_FOLD_CASE_DEFAULT or 0 is used for default options: + * Comparison in code unit order with default case folding. + * + * - U_COMPARE_CODE_POINT_ORDER + * Set to choose code point order instead of code unit order + * (see u_strCompare for details). + * + * - U_FOLD_CASE_EXCLUDE_SPECIAL_I + * + * @param pErrorCode Must be a valid pointer to an error code value, + * which must not indicate a failure before the function call. + * + * @return <0 or 0 or >0 as usual for string comparisons + * + * @stable ICU 2.2 + */ +U_CAPI int32_t U_EXPORT2 +u_strCaseCompare(const UChar *s1, int32_t length1, + const UChar *s2, int32_t length2, + uint32_t options, + UErrorCode *pErrorCode); + +/** + * Compare two ustrings for bitwise equality. + * Compares at most <code>n</code> characters. + * + * @param ucs1 A string to compare (can be NULL/invalid if n<=0). + * @param ucs2 A string to compare (can be NULL/invalid if n<=0). + * @param n The maximum number of characters to compare; always returns 0 if n<=0. + * @return 0 if <code>s1</code> and <code>s2</code> are bitwise equal; a negative + * value if <code>s1</code> is bitwise less than <code>s2</code>; a positive + * value if <code>s1</code> is bitwise greater than <code>s2</code>. + * @stable ICU 2.0 + */ +U_CAPI int32_t U_EXPORT2 +u_strncmp(const UChar *ucs1, + const UChar *ucs2, + int32_t n); + +/** + * Compare two Unicode strings in code point order. + * This is different in UTF-16 from u_strncmp() if supplementary characters are present. + * For details, see u_strCompare(). + * + * @param s1 A string to compare. + * @param s2 A string to compare. + * @param n The maximum number of characters to compare. + * @return a negative/zero/positive integer corresponding to whether + * the first string is less than/equal to/greater than the second one + * in code point order + * @stable ICU 2.0 + */ +U_CAPI int32_t U_EXPORT2 +u_strncmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t n); + +/** + * Compare two strings case-insensitively using full case folding. + * This is equivalent to u_strcmp(u_strFoldCase(s1, options), u_strFoldCase(s2, options)). + * + * @param s1 A string to compare. + * @param s2 A string to compare. + * @param options A bit set of options: + * - U_FOLD_CASE_DEFAULT or 0 is used for default options: + * Comparison in code unit order with default case folding. + * + * - U_COMPARE_CODE_POINT_ORDER + * Set to choose code point order instead of code unit order + * (see u_strCompare for details). + * + * - U_FOLD_CASE_EXCLUDE_SPECIAL_I + * + * @return A negative, zero, or positive integer indicating the comparison result. + * @stable ICU 2.0 + */ +U_CAPI int32_t U_EXPORT2 +u_strcasecmp(const UChar *s1, const UChar *s2, uint32_t options); + +/** + * Compare two strings case-insensitively using full case folding. + * This is equivalent to u_strcmp(u_strFoldCase(s1, at most n, options), + * u_strFoldCase(s2, at most n, options)). + * + * @param s1 A string to compare. + * @param s2 A string to compare. + * @param n The maximum number of characters each string to case-fold and then compare. + * @param options A bit set of options: + * - U_FOLD_CASE_DEFAULT or 0 is used for default options: + * Comparison in code unit order with default case folding. + * + * - U_COMPARE_CODE_POINT_ORDER + * Set to choose code point order instead of code unit order + * (see u_strCompare for details). + * + * - U_FOLD_CASE_EXCLUDE_SPECIAL_I + * + * @return A negative, zero, or positive integer indicating the comparison result. + * @stable ICU 2.0 + */ +U_CAPI int32_t U_EXPORT2 +u_strncasecmp(const UChar *s1, const UChar *s2, int32_t n, uint32_t options); + +/** + * Compare two strings case-insensitively using full case folding. + * This is equivalent to u_strcmp(u_strFoldCase(s1, n, options), + * u_strFoldCase(s2, n, options)). + * + * @param s1 A string to compare. + * @param s2 A string to compare. + * @param length The number of characters in each string to case-fold and then compare. + * @param options A bit set of options: + * - U_FOLD_CASE_DEFAULT or 0 is used for default options: + * Comparison in code unit order with default case folding. + * + * - U_COMPARE_CODE_POINT_ORDER + * Set to choose code point order instead of code unit order + * (see u_strCompare for details). + * + * - U_FOLD_CASE_EXCLUDE_SPECIAL_I + * + * @return A negative, zero, or positive integer indicating the comparison result. + * @stable ICU 2.0 + */ +U_CAPI int32_t U_EXPORT2 +u_memcasecmp(const UChar *s1, const UChar *s2, int32_t length, uint32_t options); + +/** + * Copy a ustring. Adds a null terminator. + * + * @param dst The destination string. + * @param src The source string. + * @return A pointer to <code>dst</code>. + * @stable ICU 2.0 + */ +U_CAPI UChar* U_EXPORT2 +u_strcpy(UChar *dst, + const UChar *src); + +/** + * Copy a ustring. + * Copies at most <code>n</code> characters. The result will be null terminated + * if the length of <code>src</code> is less than <code>n</code>. + * + * @param dst The destination string. + * @param src The source string (can be NULL/invalid if n<=0). + * @param n The maximum number of characters to copy; no-op if <=0. + * @return A pointer to <code>dst</code>. + * @stable ICU 2.0 + */ +U_CAPI UChar* U_EXPORT2 +u_strncpy(UChar *dst, + const UChar *src, + int32_t n); + +#if !UCONFIG_NO_CONVERSION + +/** + * Copy a byte string encoded in the default codepage to a ustring. + * Adds a null terminator. + * Performs a host byte to UChar conversion + * + * @param dst The destination string. + * @param src The source string. + * @return A pointer to <code>dst</code>. + * @stable ICU 2.0 + */ +U_CAPI UChar* U_EXPORT2 u_uastrcpy(UChar *dst, + const char *src ); + +/** + * Copy a byte string encoded in the default codepage to a ustring. + * Copies at most <code>n</code> characters. The result will be null terminated + * if the length of <code>src</code> is less than <code>n</code>. + * Performs a host byte to UChar conversion + * + * @param dst The destination string. + * @param src The source string. + * @param n The maximum number of characters to copy. + * @return A pointer to <code>dst</code>. + * @stable ICU 2.0 + */ +U_CAPI UChar* U_EXPORT2 u_uastrncpy(UChar *dst, + const char *src, + int32_t n); + +/** + * Copy ustring to a byte string encoded in the default codepage. + * Adds a null terminator. + * Performs a UChar to host byte conversion + * + * @param dst The destination string. + * @param src The source string. + * @return A pointer to <code>dst</code>. + * @stable ICU 2.0 + */ +U_CAPI char* U_EXPORT2 u_austrcpy(char *dst, + const UChar *src ); + +/** + * Copy ustring to a byte string encoded in the default codepage. + * Copies at most <code>n</code> characters. The result will be null terminated + * if the length of <code>src</code> is less than <code>n</code>. + * Performs a UChar to host byte conversion + * + * @param dst The destination string. + * @param src The source string. + * @param n The maximum number of characters to copy. + * @return A pointer to <code>dst</code>. + * @stable ICU 2.0 + */ +U_CAPI char* U_EXPORT2 u_austrncpy(char *dst, + const UChar *src, + int32_t n ); + +#endif + +/** + * Synonym for memcpy(), but with UChars only. + * @param dest The destination string + * @param src The source string (can be NULL/invalid if count<=0) + * @param count The number of characters to copy; no-op if <=0 + * @return A pointer to <code>dest</code> + * @stable ICU 2.0 + */ +U_CAPI UChar* U_EXPORT2 +u_memcpy(UChar *dest, const UChar *src, int32_t count); + +/** + * Synonym for memmove(), but with UChars only. + * @param dest The destination string + * @param src The source string (can be NULL/invalid if count<=0) + * @param count The number of characters to move; no-op if <=0 + * @return A pointer to <code>dest</code> + * @stable ICU 2.0 + */ +U_CAPI UChar* U_EXPORT2 +u_memmove(UChar *dest, const UChar *src, int32_t count); + +/** + * Initialize <code>count</code> characters of <code>dest</code> to <code>c</code>. + * + * @param dest The destination string. + * @param c The character to initialize the string. + * @param count The maximum number of characters to set. + * @return A pointer to <code>dest</code>. + * @stable ICU 2.0 + */ +U_CAPI UChar* U_EXPORT2 +u_memset(UChar *dest, UChar c, int32_t count); + +/** + * Compare the first <code>count</code> UChars of each buffer. + * + * @param buf1 The first string to compare. + * @param buf2 The second string to compare. + * @param count The maximum number of UChars to compare. + * @return When buf1 < buf2, a negative number is returned. + * When buf1 == buf2, 0 is returned. + * When buf1 > buf2, a positive number is returned. + * @stable ICU 2.0 + */ +U_CAPI int32_t U_EXPORT2 +u_memcmp(const UChar *buf1, const UChar *buf2, int32_t count); + +/** + * Compare two Unicode strings in code point order. + * This is different in UTF-16 from u_memcmp() if supplementary characters are present. + * For details, see u_strCompare(). + * + * @param s1 A string to compare. + * @param s2 A string to compare. + * @param count The maximum number of characters to compare. + * @return a negative/zero/positive integer corresponding to whether + * the first string is less than/equal to/greater than the second one + * in code point order + * @stable ICU 2.0 + */ +U_CAPI int32_t U_EXPORT2 +u_memcmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t count); + +/** + * Find the first occurrence of a BMP code point in a string. + * A surrogate code point is found only if its match in the text is not + * part of a surrogate pair. + * A NUL character is found at the string terminator. + * + * @param s The string to search (contains <code>count</code> UChars). + * @param c The BMP code point to find. + * @param count The length of the string. + * @return A pointer to the first occurrence of <code>c</code> in <code>s</code> + * or <code>NULL</code> if <code>c</code> is not in <code>s</code>. + * @stable ICU 2.0 + * + * @see u_strchr + * @see u_memchr32 + * @see u_strFindFirst + */ +U_CAPI UChar* U_EXPORT2 +u_memchr(const UChar *s, UChar c, int32_t count); + +/** + * Find the first occurrence of a code point in a string. + * A surrogate code point is found only if its match in the text is not + * part of a surrogate pair. + * A NUL character is found at the string terminator. + * + * @param s The string to search (contains <code>count</code> UChars). + * @param c The code point to find. + * @param count The length of the string. + * @return A pointer to the first occurrence of <code>c</code> in <code>s</code> + * or <code>NULL</code> if <code>c</code> is not in <code>s</code>. + * @stable ICU 2.0 + * + * @see u_strchr32 + * @see u_memchr + * @see u_strFindFirst + */ +U_CAPI UChar* U_EXPORT2 +u_memchr32(const UChar *s, UChar32 c, int32_t count); + +/** + * Find the last occurrence of a BMP code point in a string. + * A surrogate code point is found only if its match in the text is not + * part of a surrogate pair. + * A NUL character is found at the string terminator. + * + * @param s The string to search (contains <code>count</code> UChars). + * @param c The BMP code point to find. + * @param count The length of the string. + * @return A pointer to the last occurrence of <code>c</code> in <code>s</code> + * or <code>NULL</code> if <code>c</code> is not in <code>s</code>. + * @stable ICU 2.4 + * + * @see u_strrchr + * @see u_memrchr32 + * @see u_strFindLast + */ +U_CAPI UChar* U_EXPORT2 +u_memrchr(const UChar *s, UChar c, int32_t count); + +/** + * Find the last occurrence of a code point in a string. + * A surrogate code point is found only if its match in the text is not + * part of a surrogate pair. + * A NUL character is found at the string terminator. + * + * @param s The string to search (contains <code>count</code> UChars). + * @param c The code point to find. + * @param count The length of the string. + * @return A pointer to the last occurrence of <code>c</code> in <code>s</code> + * or <code>NULL</code> if <code>c</code> is not in <code>s</code>. + * @stable ICU 2.4 + * + * @see u_strrchr32 + * @see u_memrchr + * @see u_strFindLast + */ +U_CAPI UChar* U_EXPORT2 +u_memrchr32(const UChar *s, UChar32 c, int32_t count); + +/** + * Unicode String literals in C. + * We need one macro to declare a variable for the string + * and to statically preinitialize it if possible, + * and a second macro to dynamically initialize such a string variable if necessary. + * + * The macros are defined for maximum performance. + * They work only for strings that contain "invariant characters", i.e., + * only latin letters, digits, and some punctuation. + * See utypes.h for details. + * + * A pair of macros for a single string must be used with the same + * parameters. + * The string parameter must be a C string literal. + * The length of the string, not including the terminating + * `NUL`, must be specified as a constant. + * The U_STRING_DECL macro should be invoked exactly once for one + * such string variable before it is used. + * + * Usage: + * + * U_STRING_DECL(ustringVar1, "Quick-Fox 2", 11); + * U_STRING_DECL(ustringVar2, "jumps 5%", 8); + * static UBool didInit=false; + * + * int32_t function() { + * if(!didInit) { + * U_STRING_INIT(ustringVar1, "Quick-Fox 2", 11); + * U_STRING_INIT(ustringVar2, "jumps 5%", 8); + * didInit=true; + * } + * return u_strcmp(ustringVar1, ustringVar2); + * } + * + * Note that the macros will NOT consistently work if their argument is another #`define`. + * The following will not work on all platforms, don't use it. + * + * #define GLUCK "Mr. Gluck" + * U_STRING_DECL(var, GLUCK, 9) + * U_STRING_INIT(var, GLUCK, 9) + * + * Instead, use the string literal "Mr. Gluck" as the argument to both macro + * calls. + * + * + * @stable ICU 2.0 + */ +#if defined(U_DECLARE_UTF16) +# define U_STRING_DECL(var, cs, length) static const UChar *var=(const UChar *)U_DECLARE_UTF16(cs) + /**@stable ICU 2.0 */ +# define U_STRING_INIT(var, cs, length) +#elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16))) +# define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]=L ## cs + /**@stable ICU 2.0 */ +# define U_STRING_INIT(var, cs, length) +#elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY +# define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]=cs + /**@stable ICU 2.0 */ +# define U_STRING_INIT(var, cs, length) +#else +# define U_STRING_DECL(var, cs, length) static UChar var[(length)+1] + /**@stable ICU 2.0 */ +# define U_STRING_INIT(var, cs, length) u_charsToUChars(cs, var, length+1) +#endif + +/** + * Unescape a string of characters and write the resulting + * Unicode characters to the destination buffer. The following escape + * sequences are recognized: + * + * \\uhhhh 4 hex digits; h in [0-9A-Fa-f] + * \\Uhhhhhhhh 8 hex digits + * \\xhh 1-2 hex digits + * \\x{h...} 1-8 hex digits + * \\ooo 1-3 octal digits; o in [0-7] + * \\cX control-X; X is masked with 0x1F + * + * as well as the standard ANSI C escapes: + * + * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A, + * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B, + * \\" => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C + * + * Anything else following a backslash is generically escaped. For + * example, "[a\\-z]" returns "[a-z]". + * + * If an escape sequence is ill-formed, this method returns an empty + * string. An example of an ill-formed sequence is "\\u" followed by + * fewer than 4 hex digits. + * + * The above characters are recognized in the compiler's codepage, + * that is, they are coded as 'u', '\\', etc. Characters that are + * not parts of escape sequences are converted using u_charsToUChars(). + * + * This function is similar to UnicodeString::unescape() but not + * identical to it. The latter takes a source UnicodeString, so it + * does escape recognition but no conversion. + * + * @param src a zero-terminated string of invariant characters + * @param dest pointer to buffer to receive converted and unescaped + * text and, if there is room, a zero terminator. May be NULL for + * preflighting, in which case no UChars will be written, but the + * return value will still be valid. On error, an empty string is + * stored here (if possible). + * @param destCapacity the number of UChars that may be written at + * dest. Ignored if dest == NULL. + * @return the length of unescaped string. + * @see u_unescapeAt + * @see UnicodeString#unescape() + * @see UnicodeString#unescapeAt() + * @stable ICU 2.0 + */ +U_CAPI int32_t U_EXPORT2 +u_unescape(const char *src, + UChar *dest, int32_t destCapacity); + +U_CDECL_BEGIN +/** + * Callback function for u_unescapeAt() that returns a character of + * the source text given an offset and a context pointer. The context + * pointer will be whatever is passed into u_unescapeAt(). + * + * @param offset pointer to the offset that will be passed to u_unescapeAt(). + * @param context an opaque pointer passed directly into u_unescapeAt() + * @return the character represented by the escape sequence at + * offset + * @see u_unescapeAt + * @stable ICU 2.0 + */ +typedef UChar (U_CALLCONV *UNESCAPE_CHAR_AT)(int32_t offset, void *context); +U_CDECL_END + +/** + * Unescape a single sequence. The character at offset-1 is assumed + * (without checking) to be a backslash. This method takes a callback + * pointer to a function that returns the UChar at a given offset. By + * varying this callback, ICU functions are able to unescape char* + * strings, UnicodeString objects, and UFILE pointers. + * + * If offset is out of range, or if the escape sequence is ill-formed, + * (UChar32)0xFFFFFFFF is returned. See documentation of u_unescape() + * for a list of recognized sequences. + * + * @param charAt callback function that returns a UChar of the source + * text given an offset and a context pointer. + * @param offset pointer to the offset that will be passed to charAt. + * The offset value will be updated upon return to point after the + * last parsed character of the escape sequence. On error the offset + * is unchanged. + * @param length the number of characters in the source text. The + * last character of the source text is considered to be at offset + * length-1. + * @param context an opaque pointer passed directly into charAt. + * @return the character represented by the escape sequence at + * offset, or (UChar32)0xFFFFFFFF on error. + * @see u_unescape() + * @see UnicodeString#unescape() + * @see UnicodeString#unescapeAt() + * @stable ICU 2.0 + */ +U_CAPI UChar32 U_EXPORT2 +u_unescapeAt(UNESCAPE_CHAR_AT charAt, + int32_t *offset, + int32_t length, + void *context); + +/** + * Uppercase the characters in a string. + * Casing is locale-dependent and context-sensitive. + * The result may be longer or shorter than the original. + * The source string and the destination buffer are allowed to overlap. + * + * @param dest A buffer for the result string. The result will be zero-terminated if + * the buffer is large enough. + * @param destCapacity The size of the buffer (number of UChars). If it is 0, then + * dest may be NULL and the function will only return the length of the result + * without writing any of the result string. + * @param src The original string + * @param srcLength The length of the original string. If -1, then src must be zero-terminated. + * @param locale The locale to consider, or "" for the root locale or NULL for the default locale. + * @param pErrorCode Must be a valid pointer to an error code value, + * which must not indicate a failure before the function call. + * @return The length of the result string. It may be greater than destCapacity. In that case, + * only some of the result was written to the destination buffer. + * @stable ICU 2.0 + */ +U_CAPI int32_t U_EXPORT2 +u_strToUpper(UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + const char *locale, + UErrorCode *pErrorCode); + +/** + * Lowercase the characters in a string. + * Casing is locale-dependent and context-sensitive. + * The result may be longer or shorter than the original. + * The source string and the destination buffer are allowed to overlap. + * + * @param dest A buffer for the result string. The result will be zero-terminated if + * the buffer is large enough. + * @param destCapacity The size of the buffer (number of UChars). If it is 0, then + * dest may be NULL and the function will only return the length of the result + * without writing any of the result string. + * @param src The original string + * @param srcLength The length of the original string. If -1, then src must be zero-terminated. + * @param locale The locale to consider, or "" for the root locale or NULL for the default locale. + * @param pErrorCode Must be a valid pointer to an error code value, + * which must not indicate a failure before the function call. + * @return The length of the result string. It may be greater than destCapacity. In that case, + * only some of the result was written to the destination buffer. + * @stable ICU 2.0 + */ +U_CAPI int32_t U_EXPORT2 +u_strToLower(UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + const char *locale, + UErrorCode *pErrorCode); + +#if !UCONFIG_NO_BREAK_ITERATION + +/** + * Titlecase a string. + * Casing is locale-dependent and context-sensitive. + * Titlecasing uses a break iterator to find the first characters of words + * that are to be titlecased. It titlecases those characters and lowercases + * all others. + * + * The titlecase break iterator can be provided to customize for arbitrary + * styles, using rules and dictionaries beyond the standard iterators. + * It may be more efficient to always provide an iterator to avoid + * opening and closing one for each string. + * The standard titlecase iterator for the root locale implements the + * algorithm of Unicode TR 21. + * + * This function uses only the setText(), first() and next() methods of the + * provided break iterator. + * + * The result may be longer or shorter than the original. + * The source string and the destination buffer are allowed to overlap. + * + * @param dest A buffer for the result string. The result will be zero-terminated if + * the buffer is large enough. + * @param destCapacity The size of the buffer (number of UChars). If it is 0, then + * dest may be NULL and the function will only return the length of the result + * without writing any of the result string. + * @param src The original string + * @param srcLength The length of the original string. If -1, then src must be zero-terminated. + * @param titleIter A break iterator to find the first characters of words + * that are to be titlecased. + * If none is provided (NULL), then a standard titlecase + * break iterator is opened. + * @param locale The locale to consider, or "" for the root locale or NULL for the default locale. + * @param pErrorCode Must be a valid pointer to an error code value, + * which must not indicate a failure before the function call. + * @return The length of the result string. It may be greater than destCapacity. In that case, + * only some of the result was written to the destination buffer. + * @stable ICU 2.1 + */ +U_CAPI int32_t U_EXPORT2 +u_strToTitle(UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + UBreakIterator *titleIter, + const char *locale, + UErrorCode *pErrorCode); + +#endif + +/** + * Case-folds the characters in a string. + * + * Case-folding is locale-independent and not context-sensitive, + * but there is an option for whether to include or exclude mappings for dotted I + * and dotless i that are marked with 'T' in CaseFolding.txt. + * + * The result may be longer or shorter than the original. + * The source string and the destination buffer are allowed to overlap. + * + * @param dest A buffer for the result string. The result will be zero-terminated if + * the buffer is large enough. + * @param destCapacity The size of the buffer (number of UChars). If it is 0, then + * dest may be NULL and the function will only return the length of the result + * without writing any of the result string. + * @param src The original string + * @param srcLength The length of the original string. If -1, then src must be zero-terminated. + * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I + * @param pErrorCode Must be a valid pointer to an error code value, + * which must not indicate a failure before the function call. + * @return The length of the result string. It may be greater than destCapacity. In that case, + * only some of the result was written to the destination buffer. + * @stable ICU 2.0 + */ +U_CAPI int32_t U_EXPORT2 +u_strFoldCase(UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + uint32_t options, + UErrorCode *pErrorCode); + +#if defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION +/** + * Convert a UTF-16 string to a wchar_t string. + * If it is known at compile time that wchar_t strings are in UTF-16 or UTF-32, then + * this function simply calls the fast, dedicated function for that. + * Otherwise, two conversions UTF-16 -> default charset -> wchar_t* are performed. + * + * @param dest A buffer for the result string. The result will be zero-terminated if + * the buffer is large enough. + * @param destCapacity The size of the buffer (number of wchar_t's). If it is 0, then + * dest may be NULL and the function will only return the length of the + * result without writing any of the result string (pre-flighting). + * @param pDestLength A pointer to receive the number of units written to the destination. If + * pDestLength!=NULL then *pDestLength is always set to the + * number of output units corresponding to the transformation of + * all the input units, even in case of a buffer overflow. + * @param src The original source string + * @param srcLength The length of the original string. If -1, then src must be zero-terminated. + * @param pErrorCode Must be a valid pointer to an error code value, + * which must not indicate a failure before the function call. + * @return The pointer to destination buffer. + * @stable ICU 2.0 + */ +U_CAPI wchar_t* U_EXPORT2 +u_strToWCS(wchar_t *dest, + int32_t destCapacity, + int32_t *pDestLength, + const UChar *src, + int32_t srcLength, + UErrorCode *pErrorCode); +/** + * Convert a wchar_t string to UTF-16. + * If it is known at compile time that wchar_t strings are in UTF-16 or UTF-32, then + * this function simply calls the fast, dedicated function for that. + * Otherwise, two conversions wchar_t* -> default charset -> UTF-16 are performed. + * + * @param dest A buffer for the result string. The result will be zero-terminated if + * the buffer is large enough. + * @param destCapacity The size of the buffer (number of UChars). If it is 0, then + * dest may be NULL and the function will only return the length of the + * result without writing any of the result string (pre-flighting). + * @param pDestLength A pointer to receive the number of units written to the destination. If + * pDestLength!=NULL then *pDestLength is always set to the + * number of output units corresponding to the transformation of + * all the input units, even in case of a buffer overflow. + * @param src The original source string + * @param srcLength The length of the original string. If -1, then src must be zero-terminated. + * @param pErrorCode Must be a valid pointer to an error code value, + * which must not indicate a failure before the function call. + * @return The pointer to destination buffer. + * @stable ICU 2.0 + */ +U_CAPI UChar* U_EXPORT2 +u_strFromWCS(UChar *dest, + int32_t destCapacity, + int32_t *pDestLength, + const wchar_t *src, + int32_t srcLength, + UErrorCode *pErrorCode); +#endif /* defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION */ + +/** + * Convert a UTF-16 string to UTF-8. + * If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set. + * + * @param dest A buffer for the result string. The result will be zero-terminated if + * the buffer is large enough. + * @param destCapacity The size of the buffer (number of chars). If it is 0, then + * dest may be NULL and the function will only return the length of the + * result without writing any of the result string (pre-flighting). + * @param pDestLength A pointer to receive the number of units written to the destination. If + * pDestLength!=NULL then *pDestLength is always set to the + * number of output units corresponding to the transformation of + * all the input units, even in case of a buffer overflow. + * @param src The original source string + * @param srcLength The length of the original string. If -1, then src must be zero-terminated. + * @param pErrorCode Must be a valid pointer to an error code value, + * which must not indicate a failure before the function call. + * @return The pointer to destination buffer. + * @stable ICU 2.0 + * @see u_strToUTF8WithSub + * @see u_strFromUTF8 + */ +U_CAPI char* U_EXPORT2 +u_strToUTF8(char *dest, + int32_t destCapacity, + int32_t *pDestLength, + const UChar *src, + int32_t srcLength, + UErrorCode *pErrorCode); + +/** + * Convert a UTF-8 string to UTF-16. + * If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set. + * + * @param dest A buffer for the result string. The result will be zero-terminated if + * the buffer is large enough. + * @param destCapacity The size of the buffer (number of UChars). If it is 0, then + * dest may be NULL and the function will only return the length of the + * result without writing any of the result string (pre-flighting). + * @param pDestLength A pointer to receive the number of units written to the destination. If + * pDestLength!=NULL then *pDestLength is always set to the + * number of output units corresponding to the transformation of + * all the input units, even in case of a buffer overflow. + * @param src The original source string + * @param srcLength The length of the original string. If -1, then src must be zero-terminated. + * @param pErrorCode Must be a valid pointer to an error code value, + * which must not indicate a failure before the function call. + * @return The pointer to destination buffer. + * @stable ICU 2.0 + * @see u_strFromUTF8WithSub + * @see u_strFromUTF8Lenient + */ +U_CAPI UChar* U_EXPORT2 +u_strFromUTF8(UChar *dest, + int32_t destCapacity, + int32_t *pDestLength, + const char *src, + int32_t srcLength, + UErrorCode *pErrorCode); + +/** + * Convert a UTF-16 string to UTF-8. + * + * Same as u_strToUTF8() except for the additional subchar which is output for + * illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code. + * With subchar==U_SENTINEL, this function behaves exactly like u_strToUTF8(). + * + * @param dest A buffer for the result string. The result will be zero-terminated if + * the buffer is large enough. + * @param destCapacity The size of the buffer (number of chars). If it is 0, then + * dest may be NULL and the function will only return the length of the + * result without writing any of the result string (pre-flighting). + * @param pDestLength A pointer to receive the number of units written to the destination. If + * pDestLength!=NULL then *pDestLength is always set to the + * number of output units corresponding to the transformation of + * all the input units, even in case of a buffer overflow. + * @param src The original source string + * @param srcLength The length of the original string. If -1, then src must be zero-terminated. + * @param subchar The substitution character to use in place of an illegal input sequence, + * or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead. + * A substitution character can be any valid Unicode code point (up to U+10FFFF) + * except for surrogate code points (U+D800..U+DFFF). + * The recommended value is U+FFFD "REPLACEMENT CHARACTER". + * @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0. + * Set to 0 if no substitutions occur or subchar<0. + * pNumSubstitutions can be NULL. + * @param pErrorCode Pointer to a standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return The pointer to destination buffer. + * @see u_strToUTF8 + * @see u_strFromUTF8WithSub + * @stable ICU 3.6 + */ +U_CAPI char* U_EXPORT2 +u_strToUTF8WithSub(char *dest, + int32_t destCapacity, + int32_t *pDestLength, + const UChar *src, + int32_t srcLength, + UChar32 subchar, int32_t *pNumSubstitutions, + UErrorCode *pErrorCode); + +/** + * Convert a UTF-8 string to UTF-16. + * + * Same as u_strFromUTF8() except for the additional subchar which is output for + * illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code. + * With subchar==U_SENTINEL, this function behaves exactly like u_strFromUTF8(). + * + * @param dest A buffer for the result string. The result will be zero-terminated if + * the buffer is large enough. + * @param destCapacity The size of the buffer (number of UChars). If it is 0, then + * dest may be NULL and the function will only return the length of the + * result without writing any of the result string (pre-flighting). + * @param pDestLength A pointer to receive the number of units written to the destination. If + * pDestLength!=NULL then *pDestLength is always set to the + * number of output units corresponding to the transformation of + * all the input units, even in case of a buffer overflow. + * @param src The original source string + * @param srcLength The length of the original string. If -1, then src must be zero-terminated. + * @param subchar The substitution character to use in place of an illegal input sequence, + * or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead. + * A substitution character can be any valid Unicode code point (up to U+10FFFF) + * except for surrogate code points (U+D800..U+DFFF). + * The recommended value is U+FFFD "REPLACEMENT CHARACTER". + * @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0. + * Set to 0 if no substitutions occur or subchar<0. + * pNumSubstitutions can be NULL. + * @param pErrorCode Pointer to a standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return The pointer to destination buffer. + * @see u_strFromUTF8 + * @see u_strFromUTF8Lenient + * @see u_strToUTF8WithSub + * @stable ICU 3.6 + */ +U_CAPI UChar* U_EXPORT2 +u_strFromUTF8WithSub(UChar *dest, + int32_t destCapacity, + int32_t *pDestLength, + const char *src, + int32_t srcLength, + UChar32 subchar, int32_t *pNumSubstitutions, + UErrorCode *pErrorCode); + +/** + * Convert a UTF-8 string to UTF-16. + * + * Same as u_strFromUTF8() except that this function is designed to be very fast, + * which it achieves by being lenient about malformed UTF-8 sequences. + * This function is intended for use in environments where UTF-8 text is + * expected to be well-formed. + * + * Its semantics are: + * - Well-formed UTF-8 text is correctly converted to well-formed UTF-16 text. + * - The function will not read beyond the input string, nor write beyond + * the destCapacity. + * - Malformed UTF-8 results in "garbage" 16-bit Unicode strings which may not + * be well-formed UTF-16. + * The function will resynchronize to valid code point boundaries + * within a small number of code points after an illegal sequence. + * - Non-shortest forms are not detected and will result in "spoofing" output. + * + * For further performance improvement, if srcLength is given (>=0), + * then it must be destCapacity>=srcLength. + * + * There is no inverse u_strToUTF8Lenient() function because there is practically + * no performance gain from not checking that a UTF-16 string is well-formed. + * + * @param dest A buffer for the result string. The result will be zero-terminated if + * the buffer is large enough. + * @param destCapacity The size of the buffer (number of UChars). If it is 0, then + * dest may be NULL and the function will only return the length of the + * result without writing any of the result string (pre-flighting). + * Unlike for other ICU functions, if srcLength>=0 then it + * must be destCapacity>=srcLength. + * @param pDestLength A pointer to receive the number of units written to the destination. If + * pDestLength!=NULL then *pDestLength is always set to the + * number of output units corresponding to the transformation of + * all the input units, even in case of a buffer overflow. + * Unlike for other ICU functions, if srcLength>=0 but + * destCapacity<srcLength, then *pDestLength will be set to srcLength + * (and U_BUFFER_OVERFLOW_ERROR will be set) + * regardless of the actual result length. + * @param src The original source string + * @param srcLength The length of the original string. If -1, then src must be zero-terminated. + * @param pErrorCode Pointer to a standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return The pointer to destination buffer. + * @see u_strFromUTF8 + * @see u_strFromUTF8WithSub + * @see u_strToUTF8WithSub + * @stable ICU 3.6 + */ +U_CAPI UChar * U_EXPORT2 +u_strFromUTF8Lenient(UChar *dest, + int32_t destCapacity, + int32_t *pDestLength, + const char *src, + int32_t srcLength, + UErrorCode *pErrorCode); + +/** + * Convert a UTF-16 string to UTF-32. + * If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set. + * + * @param dest A buffer for the result string. The result will be zero-terminated if + * the buffer is large enough. + * @param destCapacity The size of the buffer (number of UChar32s). If it is 0, then + * dest may be NULL and the function will only return the length of the + * result without writing any of the result string (pre-flighting). + * @param pDestLength A pointer to receive the number of units written to the destination. If + * pDestLength!=NULL then *pDestLength is always set to the + * number of output units corresponding to the transformation of + * all the input units, even in case of a buffer overflow. + * @param src The original source string + * @param srcLength The length of the original string. If -1, then src must be zero-terminated. + * @param pErrorCode Must be a valid pointer to an error code value, + * which must not indicate a failure before the function call. + * @return The pointer to destination buffer. + * @see u_strToUTF32WithSub + * @see u_strFromUTF32 + * @stable ICU 2.0 + */ +U_CAPI UChar32* U_EXPORT2 +u_strToUTF32(UChar32 *dest, + int32_t destCapacity, + int32_t *pDestLength, + const UChar *src, + int32_t srcLength, + UErrorCode *pErrorCode); + +/** + * Convert a UTF-32 string to UTF-16. + * If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set. + * + * @param dest A buffer for the result string. The result will be zero-terminated if + * the buffer is large enough. + * @param destCapacity The size of the buffer (number of UChars). If it is 0, then + * dest may be NULL and the function will only return the length of the + * result without writing any of the result string (pre-flighting). + * @param pDestLength A pointer to receive the number of units written to the destination. If + * pDestLength!=NULL then *pDestLength is always set to the + * number of output units corresponding to the transformation of + * all the input units, even in case of a buffer overflow. + * @param src The original source string + * @param srcLength The length of the original string. If -1, then src must be zero-terminated. + * @param pErrorCode Must be a valid pointer to an error code value, + * which must not indicate a failure before the function call. + * @return The pointer to destination buffer. + * @see u_strFromUTF32WithSub + * @see u_strToUTF32 + * @stable ICU 2.0 + */ +U_CAPI UChar* U_EXPORT2 +u_strFromUTF32(UChar *dest, + int32_t destCapacity, + int32_t *pDestLength, + const UChar32 *src, + int32_t srcLength, + UErrorCode *pErrorCode); + +/** + * Convert a UTF-16 string to UTF-32. + * + * Same as u_strToUTF32() except for the additional subchar which is output for + * illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code. + * With subchar==U_SENTINEL, this function behaves exactly like u_strToUTF32(). + * + * @param dest A buffer for the result string. The result will be zero-terminated if + * the buffer is large enough. + * @param destCapacity The size of the buffer (number of UChar32s). If it is 0, then + * dest may be NULL and the function will only return the length of the + * result without writing any of the result string (pre-flighting). + * @param pDestLength A pointer to receive the number of units written to the destination. If + * pDestLength!=NULL then *pDestLength is always set to the + * number of output units corresponding to the transformation of + * all the input units, even in case of a buffer overflow. + * @param src The original source string + * @param srcLength The length of the original string. If -1, then src must be zero-terminated. + * @param subchar The substitution character to use in place of an illegal input sequence, + * or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead. + * A substitution character can be any valid Unicode code point (up to U+10FFFF) + * except for surrogate code points (U+D800..U+DFFF). + * The recommended value is U+FFFD "REPLACEMENT CHARACTER". + * @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0. + * Set to 0 if no substitutions occur or subchar<0. + * pNumSubstitutions can be NULL. + * @param pErrorCode Pointer to a standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return The pointer to destination buffer. + * @see u_strToUTF32 + * @see u_strFromUTF32WithSub + * @stable ICU 4.2 + */ +U_CAPI UChar32* U_EXPORT2 +u_strToUTF32WithSub(UChar32 *dest, + int32_t destCapacity, + int32_t *pDestLength, + const UChar *src, + int32_t srcLength, + UChar32 subchar, int32_t *pNumSubstitutions, + UErrorCode *pErrorCode); + +/** + * Convert a UTF-32 string to UTF-16. + * + * Same as u_strFromUTF32() except for the additional subchar which is output for + * illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code. + * With subchar==U_SENTINEL, this function behaves exactly like u_strFromUTF32(). + * + * @param dest A buffer for the result string. The result will be zero-terminated if + * the buffer is large enough. + * @param destCapacity The size of the buffer (number of UChars). If it is 0, then + * dest may be NULL and the function will only return the length of the + * result without writing any of the result string (pre-flighting). + * @param pDestLength A pointer to receive the number of units written to the destination. If + * pDestLength!=NULL then *pDestLength is always set to the + * number of output units corresponding to the transformation of + * all the input units, even in case of a buffer overflow. + * @param src The original source string + * @param srcLength The length of the original string. If -1, then src must be zero-terminated. + * @param subchar The substitution character to use in place of an illegal input sequence, + * or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead. + * A substitution character can be any valid Unicode code point (up to U+10FFFF) + * except for surrogate code points (U+D800..U+DFFF). + * The recommended value is U+FFFD "REPLACEMENT CHARACTER". + * @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0. + * Set to 0 if no substitutions occur or subchar<0. + * pNumSubstitutions can be NULL. + * @param pErrorCode Pointer to a standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return The pointer to destination buffer. + * @see u_strFromUTF32 + * @see u_strToUTF32WithSub + * @stable ICU 4.2 + */ +U_CAPI UChar* U_EXPORT2 +u_strFromUTF32WithSub(UChar *dest, + int32_t destCapacity, + int32_t *pDestLength, + const UChar32 *src, + int32_t srcLength, + UChar32 subchar, int32_t *pNumSubstitutions, + UErrorCode *pErrorCode); + +/** + * Convert a 16-bit Unicode string to Java Modified UTF-8. + * See http://java.sun.com/javase/6/docs/api/java/io/DataInput.html#modified-utf-8 + * + * This function behaves according to the documentation for Java DataOutput.writeUTF() + * except that it does not encode the output length in the destination buffer + * and does not have an output length restriction. + * See http://java.sun.com/javase/6/docs/api/java/io/DataOutput.html#writeUTF(java.lang.String) + * + * The input string need not be well-formed UTF-16. + * (Therefore there is no subchar parameter.) + * + * @param dest A buffer for the result string. The result will be zero-terminated if + * the buffer is large enough. + * @param destCapacity The size of the buffer (number of chars). If it is 0, then + * dest may be NULL and the function will only return the length of the + * result without writing any of the result string (pre-flighting). + * @param pDestLength A pointer to receive the number of units written to the destination. If + * pDestLength!=NULL then *pDestLength is always set to the + * number of output units corresponding to the transformation of + * all the input units, even in case of a buffer overflow. + * @param src The original source string + * @param srcLength The length of the original string. If -1, then src must be zero-terminated. + * @param pErrorCode Pointer to a standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return The pointer to destination buffer. + * @stable ICU 4.4 + * @see u_strToUTF8WithSub + * @see u_strFromJavaModifiedUTF8WithSub + */ +U_CAPI char* U_EXPORT2 +u_strToJavaModifiedUTF8( + char *dest, + int32_t destCapacity, + int32_t *pDestLength, + const UChar *src, + int32_t srcLength, + UErrorCode *pErrorCode); + +/** + * Convert a Java Modified UTF-8 string to a 16-bit Unicode string. + * If the input string is not well-formed and no substitution char is specified, + * then the U_INVALID_CHAR_FOUND error code is set. + * + * This function behaves according to the documentation for Java DataInput.readUTF() + * except that it takes a length parameter rather than + * interpreting the first two input bytes as the length. + * See http://java.sun.com/javase/6/docs/api/java/io/DataInput.html#readUTF() + * + * The output string may not be well-formed UTF-16. + * + * @param dest A buffer for the result string. The result will be zero-terminated if + * the buffer is large enough. + * @param destCapacity The size of the buffer (number of UChars). If it is 0, then + * dest may be NULL and the function will only return the length of the + * result without writing any of the result string (pre-flighting). + * @param pDestLength A pointer to receive the number of units written to the destination. If + * pDestLength!=NULL then *pDestLength is always set to the + * number of output units corresponding to the transformation of + * all the input units, even in case of a buffer overflow. + * @param src The original source string + * @param srcLength The length of the original string. If -1, then src must be zero-terminated. + * @param subchar The substitution character to use in place of an illegal input sequence, + * or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead. + * A substitution character can be any valid Unicode code point (up to U+10FFFF) + * except for surrogate code points (U+D800..U+DFFF). + * The recommended value is U+FFFD "REPLACEMENT CHARACTER". + * @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0. + * Set to 0 if no substitutions occur or subchar<0. + * pNumSubstitutions can be NULL. + * @param pErrorCode Pointer to a standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return The pointer to destination buffer. + * @see u_strFromUTF8WithSub + * @see u_strFromUTF8Lenient + * @see u_strToJavaModifiedUTF8 + * @stable ICU 4.4 + */ +U_CAPI UChar* U_EXPORT2 +u_strFromJavaModifiedUTF8WithSub( + UChar *dest, + int32_t destCapacity, + int32_t *pDestLength, + const char *src, + int32_t srcLength, + UChar32 subchar, int32_t *pNumSubstitutions, + UErrorCode *pErrorCode); + +#endif diff --git a/thirdparty/icu4c/common/unicode/ustringtrie.h b/thirdparty/icu4c/common/unicode/ustringtrie.h new file mode 100644 index 0000000000..fd85648225 --- /dev/null +++ b/thirdparty/icu4c/common/unicode/ustringtrie.h @@ -0,0 +1,97 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2010-2012, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* file name: udicttrie.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2010dec17 +* created by: Markus W. Scherer +*/ + +#ifndef __USTRINGTRIE_H__ +#define __USTRINGTRIE_H__ + +/** + * \file + * \brief C API: Helper definitions for dictionary trie APIs. + */ + +#include "unicode/utypes.h" + + +/** + * Return values for BytesTrie::next(), UCharsTrie::next() and similar methods. + * @see USTRINGTRIE_MATCHES + * @see USTRINGTRIE_HAS_VALUE + * @see USTRINGTRIE_HAS_NEXT + * @stable ICU 4.8 + */ +enum UStringTrieResult { + /** + * The input unit(s) did not continue a matching string. + * Once current()/next() return USTRINGTRIE_NO_MATCH, + * all further calls to current()/next() will also return USTRINGTRIE_NO_MATCH, + * until the trie is reset to its original state or to a saved state. + * @stable ICU 4.8 + */ + USTRINGTRIE_NO_MATCH, + /** + * The input unit(s) continued a matching string + * but there is no value for the string so far. + * (It is a prefix of a longer string.) + * @stable ICU 4.8 + */ + USTRINGTRIE_NO_VALUE, + /** + * The input unit(s) continued a matching string + * and there is a value for the string so far. + * This value will be returned by getValue(). + * No further input byte/unit can continue a matching string. + * @stable ICU 4.8 + */ + USTRINGTRIE_FINAL_VALUE, + /** + * The input unit(s) continued a matching string + * and there is a value for the string so far. + * This value will be returned by getValue(). + * Another input byte/unit can continue a matching string. + * @stable ICU 4.8 + */ + USTRINGTRIE_INTERMEDIATE_VALUE +}; + +/** + * Same as (result!=USTRINGTRIE_NO_MATCH). + * @param result A result from BytesTrie::first(), UCharsTrie::next() etc. + * @return true if the input bytes/units so far are part of a matching string/byte sequence. + * @stable ICU 4.8 + */ +#define USTRINGTRIE_MATCHES(result) ((result)!=USTRINGTRIE_NO_MATCH) + +/** + * Equivalent to (result==USTRINGTRIE_INTERMEDIATE_VALUE || result==USTRINGTRIE_FINAL_VALUE) but + * this macro evaluates result exactly once. + * @param result A result from BytesTrie::first(), UCharsTrie::next() etc. + * @return true if there is a value for the input bytes/units so far. + * @see BytesTrie::getValue + * @see UCharsTrie::getValue + * @stable ICU 4.8 + */ +#define USTRINGTRIE_HAS_VALUE(result) ((result)>=USTRINGTRIE_FINAL_VALUE) + +/** + * Equivalent to (result==USTRINGTRIE_NO_VALUE || result==USTRINGTRIE_INTERMEDIATE_VALUE) but + * this macro evaluates result exactly once. + * @param result A result from BytesTrie::first(), UCharsTrie::next() etc. + * @return true if another input byte/unit can continue a matching string. + * @stable ICU 4.8 + */ +#define USTRINGTRIE_HAS_NEXT(result) ((result)&1) + +#endif /* __USTRINGTRIE_H__ */ diff --git a/thirdparty/icu4c/common/unicode/utext.h b/thirdparty/icu4c/common/unicode/utext.h new file mode 100644 index 0000000000..c6d1e53a8c --- /dev/null +++ b/thirdparty/icu4c/common/unicode/utext.h @@ -0,0 +1,1603 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2004-2012, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: utext.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2004oct06 +* created by: Markus W. Scherer +*/ + +#ifndef __UTEXT_H__ +#define __UTEXT_H__ + +/** + * \file + * \brief C API: Abstract Unicode Text API + * + * The Text Access API provides a means to allow text that is stored in alternative + * formats to work with ICU services. ICU normally operates on text that is + * stored in UTF-16 format, in (UChar *) arrays for the C APIs or as type + * UnicodeString for C++ APIs. + * + * ICU Text Access allows other formats, such as UTF-8 or non-contiguous + * UTF-16 strings, to be placed in a UText wrapper and then passed to ICU services. + * + * There are three general classes of usage for UText: + * + * Application Level Use. This is the simplest usage - applications would + * use one of the utext_open() functions on their input text, and pass + * the resulting UText to the desired ICU service. + * + * Second is usage in ICU Services, such as break iteration, that will need to + * operate on input presented to them as a UText. These implementations + * will need to use the iteration and related UText functions to gain + * access to the actual text. + * + * The third class of UText users are "text providers." These are the + * UText implementations for the various text storage formats. An application + * or system with a unique text storage format can implement a set of + * UText provider functions for that format, which will then allow + * ICU services to operate on that format. + * + * + * <em>Iterating over text</em> + * + * Here is sample code for a forward iteration over the contents of a UText + * + * \code + * UChar32 c; + * UText *ut = whatever(); + * + * for (c=utext_next32From(ut, 0); c>=0; c=utext_next32(ut)) { + * // do whatever with the codepoint c here. + * } + * \endcode + * + * And here is similar code to iterate in the reverse direction, from the end + * of the text towards the beginning. + * + * \code + * UChar32 c; + * UText *ut = whatever(); + * int textLength = utext_nativeLength(ut); + * for (c=utext_previous32From(ut, textLength); c>=0; c=utext_previous32(ut)) { + * // do whatever with the codepoint c here. + * } + * \endcode + * + * <em>Characters and Indexing</em> + * + * Indexing into text by UText functions is nearly always in terms of the native + * indexing of the underlying text storage. The storage format could be UTF-8 + * or UTF-32, for example. When coding to the UText access API, no assumptions + * can be made regarding the size of characters, or how far an index + * may move when iterating between characters. + * + * All indices supplied to UText functions are pinned to the length of the + * text. An out-of-bounds index is not considered to be an error, but is + * adjusted to be in the range 0 <= index <= length of input text. + * + * + * When an index position is returned from a UText function, it will be + * a native index to the underlying text. In the case of multi-unit characters, + * it will always refer to the first position of the character, + * never to the interior. This is essentially the same thing as saying that + * a returned index will always point to a boundary between characters. + * + * When a native index is supplied to a UText function, all indices that + * refer to any part of a multi-unit character representation are considered + * to be equivalent. In the case of multi-unit characters, an incoming index + * will be logically normalized to refer to the start of the character. + * + * It is possible to test whether a native index is on a code point boundary + * by doing a utext_setNativeIndex() followed by a utext_getNativeIndex(). + * If the index is returned unchanged, it was on a code point boundary. If + * an adjusted index is returned, the original index referred to the + * interior of a character. + * + * <em>Conventions for calling UText functions</em> + * + * Most UText access functions have as their first parameter a (UText *) pointer, + * which specifies the UText to be used. Unless otherwise noted, the + * pointer must refer to a valid, open UText. Attempting to + * use a closed UText or passing a NULL pointer is a programming error and + * will produce undefined results or NULL pointer exceptions. + * + * The UText_Open family of functions can either open an existing (closed) + * UText, or heap allocate a new UText. Here is sample code for creating + * a stack-allocated UText. + * + * \code + * char *s = whatever(); // A utf-8 string + * U_ErrorCode status = U_ZERO_ERROR; + * UText ut = UTEXT_INITIALIZER; + * utext_openUTF8(ut, s, -1, &status); + * if (U_FAILURE(status)) { + * // error handling + * } else { + * // work with the UText + * } + * \endcode + * + * Any existing UText passed to an open function _must_ have been initialized, + * either by the UTEXT_INITIALIZER, or by having been originally heap-allocated + * by an open function. Passing NULL will cause the open function to + * heap-allocate and fully initialize a new UText. + * + */ + + + +#include "unicode/utypes.h" +#include "unicode/uchar.h" +#if U_SHOW_CPLUSPLUS_API +#include "unicode/localpointer.h" +#include "unicode/rep.h" +#include "unicode/unistr.h" +#include "unicode/chariter.h" +#endif + + +U_CDECL_BEGIN + +struct UText; +typedef struct UText UText; /**< C typedef for struct UText. @stable ICU 3.6 */ + + +/*************************************************************************************** + * + * C Functions for creating UText wrappers around various kinds of text strings. + * + ****************************************************************************************/ + + +/** + * Close function for UText instances. + * Cleans up, releases any resources being held by an open UText. + * <p> + * If the UText was originally allocated by one of the utext_open functions, + * the storage associated with the utext will also be freed. + * If the UText storage originated with the application, as it would with + * a local or static instance, the storage will not be deleted. + * + * An open UText can be reset to refer to new string by using one of the utext_open() + * functions without first closing the UText. + * + * @param ut The UText to be closed. + * @return NULL if the UText struct was deleted by the close. If the UText struct + * was originally provided by the caller to the open function, it is + * returned by this function, and may be safely used again in + * a subsequent utext_open. + * + * @stable ICU 3.4 + */ +U_CAPI UText * U_EXPORT2 +utext_close(UText *ut); + +/** + * Open a read-only UText implementation for UTF-8 strings. + * + * \htmlonly + * Any invalid UTF-8 in the input will be handled in this way: + * a sequence of bytes that has the form of a truncated, but otherwise valid, + * UTF-8 sequence will be replaced by a single unicode replacement character, \uFFFD. + * Any other illegal bytes will each be replaced by a \uFFFD. + * \endhtmlonly + * + * @param ut Pointer to a UText struct. If NULL, a new UText will be created. + * If non-NULL, must refer to an initialized UText struct, which will then + * be reset to reference the specified UTF-8 string. + * @param s A UTF-8 string. Must not be NULL. + * @param length The length of the UTF-8 string in bytes, or -1 if the string is + * zero terminated. + * @param status Errors are returned here. + * @return A pointer to the UText. If a pre-allocated UText was provided, it + * will always be used and returned. + * @stable ICU 3.4 + */ +U_CAPI UText * U_EXPORT2 +utext_openUTF8(UText *ut, const char *s, int64_t length, UErrorCode *status); + + +/** + * Open a read-only UText for UChar * string. + * + * @param ut Pointer to a UText struct. If NULL, a new UText will be created. + * If non-NULL, must refer to an initialized UText struct, which will then + * be reset to reference the specified UChar string. + * @param s A UChar (UTF-16) string + * @param length The number of UChars in the input string, or -1 if the string is + * zero terminated. + * @param status Errors are returned here. + * @return A pointer to the UText. If a pre-allocated UText was provided, it + * will always be used and returned. + * @stable ICU 3.4 + */ +U_CAPI UText * U_EXPORT2 +utext_openUChars(UText *ut, const UChar *s, int64_t length, UErrorCode *status); + + +#if U_SHOW_CPLUSPLUS_API +/** + * Open a writable UText for a non-const UnicodeString. + * + * @param ut Pointer to a UText struct. If NULL, a new UText will be created. + * If non-NULL, must refer to an initialized UText struct, which will then + * be reset to reference the specified input string. + * @param s A UnicodeString. + * @param status Errors are returned here. + * @return Pointer to the UText. If a UText was supplied as input, this + * will always be used and returned. + * @stable ICU 3.4 + */ +U_CAPI UText * U_EXPORT2 +utext_openUnicodeString(UText *ut, icu::UnicodeString *s, UErrorCode *status); + + +/** + * Open a UText for a const UnicodeString. The resulting UText will not be writable. + * + * @param ut Pointer to a UText struct. If NULL, a new UText will be created. + * If non-NULL, must refer to an initialized UText struct, which will then + * be reset to reference the specified input string. + * @param s A const UnicodeString to be wrapped. + * @param status Errors are returned here. + * @return Pointer to the UText. If a UText was supplied as input, this + * will always be used and returned. + * @stable ICU 3.4 + */ +U_CAPI UText * U_EXPORT2 +utext_openConstUnicodeString(UText *ut, const icu::UnicodeString *s, UErrorCode *status); + + +/** + * Open a writable UText implementation for an ICU Replaceable object. + * @param ut Pointer to a UText struct. If NULL, a new UText will be created. + * If non-NULL, must refer to an already existing UText, which will then + * be reset to reference the specified replaceable text. + * @param rep A Replaceable text object. + * @param status Errors are returned here. + * @return Pointer to the UText. If a UText was supplied as input, this + * will always be used and returned. + * @see Replaceable + * @stable ICU 3.4 + */ +U_CAPI UText * U_EXPORT2 +utext_openReplaceable(UText *ut, icu::Replaceable *rep, UErrorCode *status); + +/** + * Open a UText implementation over an ICU CharacterIterator. + * @param ut Pointer to a UText struct. If NULL, a new UText will be created. + * If non-NULL, must refer to an already existing UText, which will then + * be reset to reference the specified replaceable text. + * @param ci A Character Iterator. + * @param status Errors are returned here. + * @return Pointer to the UText. If a UText was supplied as input, this + * will always be used and returned. + * @see Replaceable + * @stable ICU 3.4 + */ +U_CAPI UText * U_EXPORT2 +utext_openCharacterIterator(UText *ut, icu::CharacterIterator *ci, UErrorCode *status); + +#endif + + +/** + * Clone a UText. This is much like opening a UText where the source text is itself + * another UText. + * + * A deep clone will copy both the UText data structures and the underlying text. + * The original and cloned UText will operate completely independently; modifications + * made to the text in one will not affect the other. Text providers are not + * required to support deep clones. The user of clone() must check the status return + * and be prepared to handle failures. + * + * The standard UText implementations for UTF8, UChar *, UnicodeString and + * Replaceable all support deep cloning. + * + * The UText returned from a deep clone will be writable, assuming that the text + * provider is able to support writing, even if the source UText had been made + * non-writable by means of UText_freeze(). + * + * A shallow clone replicates only the UText data structures; it does not make + * a copy of the underlying text. Shallow clones can be used as an efficient way to + * have multiple iterators active in a single text string that is not being + * modified. + * + * A shallow clone operation will not fail, barring truly exceptional conditions such + * as memory allocation failures. + * + * Shallow UText clones should be avoided if the UText functions that modify the + * text are expected to be used, either on the original or the cloned UText. + * Any such modifications can cause unpredictable behavior. Read Only + * shallow clones provide some protection against errors of this type by + * disabling text modification via the cloned UText. + * + * A shallow clone made with the readOnly parameter == false will preserve the + * utext_isWritable() state of the source object. Note, however, that + * write operations must be avoided while more than one UText exists that refer + * to the same underlying text. + * + * A UText and its clone may be safely concurrently accessed by separate threads. + * This is true for read access only with shallow clones, and for both read and + * write access with deep clones. + * It is the responsibility of the Text Provider to ensure that this thread safety + * constraint is met. + * + * @param dest A UText struct to be filled in with the result of the clone operation, + * or NULL if the clone function should heap-allocate a new UText struct. + * If non-NULL, must refer to an already existing UText, which will then + * be reset to become the clone. + * @param src The UText to be cloned. + * @param deep true to request a deep clone, false for a shallow clone. + * @param readOnly true to request that the cloned UText have read only access to the + * underlying text. + + * @param status Errors are returned here. For deep clones, U_UNSUPPORTED_ERROR + * will be returned if the text provider is unable to clone the + * original text. + * @return The newly created clone, or NULL if the clone operation failed. + * @stable ICU 3.4 + */ +U_CAPI UText * U_EXPORT2 +utext_clone(UText *dest, const UText *src, UBool deep, UBool readOnly, UErrorCode *status); + + +/** + * Compare two UText objects for equality. + * UTexts are equal if they are iterating over the same text, and + * have the same iteration position within the text. + * If either or both of the parameters are NULL, the comparison is false. + * + * @param a The first of the two UTexts to compare. + * @param b The other UText to be compared. + * @return true if the two UTexts are equal. + * @stable ICU 3.6 + */ +U_CAPI UBool U_EXPORT2 +utext_equals(const UText *a, const UText *b); + + +/***************************************************************************** + * + * Functions to work with the text represented by a UText wrapper + * + *****************************************************************************/ + +/** + * Get the length of the text. Depending on the characteristics + * of the underlying text representation, this may be expensive. + * @see utext_isLengthExpensive() + * + * + * @param ut the text to be accessed. + * @return the length of the text, expressed in native units. + * + * @stable ICU 3.4 + */ +U_CAPI int64_t U_EXPORT2 +utext_nativeLength(UText *ut); + +/** + * Return true if calculating the length of the text could be expensive. + * Finding the length of NUL terminated strings is considered to be expensive. + * + * Note that the value of this function may change + * as the result of other operations on a UText. + * Once the length of a string has been discovered, it will no longer + * be expensive to report it. + * + * @param ut the text to be accessed. + * @return true if determining the length of the text could be time consuming. + * @stable ICU 3.4 + */ +U_CAPI UBool U_EXPORT2 +utext_isLengthExpensive(const UText *ut); + +/** + * Returns the code point at the requested index, + * or U_SENTINEL (-1) if it is out of bounds. + * + * If the specified index points to the interior of a multi-unit + * character - one of the trail bytes of a UTF-8 sequence, for example - + * the complete code point will be returned. + * + * The iteration position will be set to the start of the returned code point. + * + * This function is roughly equivalent to the sequence + * utext_setNativeIndex(index); + * utext_current32(); + * (There is a subtle difference if the index is out of bounds by being less than zero - + * utext_setNativeIndex(negative value) sets the index to zero, after which utext_current() + * will return the char at zero. utext_char32At(negative index), on the other hand, will + * return the U_SENTINEL value of -1.) + * + * @param ut the text to be accessed + * @param nativeIndex the native index of the character to be accessed. If the index points + * to other than the first unit of a multi-unit character, it will be adjusted + * to the start of the character. + * @return the code point at the specified index. + * @stable ICU 3.4 + */ +U_CAPI UChar32 U_EXPORT2 +utext_char32At(UText *ut, int64_t nativeIndex); + + +/** + * + * Get the code point at the current iteration position, + * or U_SENTINEL (-1) if the iteration has reached the end of + * the input text. + * + * @param ut the text to be accessed. + * @return the Unicode code point at the current iterator position. + * @stable ICU 3.4 + */ +U_CAPI UChar32 U_EXPORT2 +utext_current32(UText *ut); + + +/** + * Get the code point at the current iteration position of the UText, and + * advance the position to the first index following the character. + * + * If the position is at the end of the text (the index following + * the last character, which is also the length of the text), + * return U_SENTINEL (-1) and do not advance the index. + * + * This is a post-increment operation. + * + * An inline macro version of this function, UTEXT_NEXT32(), + * is available for performance critical use. + * + * @param ut the text to be accessed. + * @return the Unicode code point at the iteration position. + * @see UTEXT_NEXT32 + * @stable ICU 3.4 + */ +U_CAPI UChar32 U_EXPORT2 +utext_next32(UText *ut); + + +/** + * Move the iterator position to the character (code point) whose + * index precedes the current position, and return that character. + * This is a pre-decrement operation. + * + * If the initial position is at the start of the text (index of 0) + * return U_SENTINEL (-1), and leave the position unchanged. + * + * An inline macro version of this function, UTEXT_PREVIOUS32(), + * is available for performance critical use. + * + * @param ut the text to be accessed. + * @return the previous UChar32 code point, or U_SENTINEL (-1) + * if the iteration has reached the start of the text. + * @see UTEXT_PREVIOUS32 + * @stable ICU 3.4 + */ +U_CAPI UChar32 U_EXPORT2 +utext_previous32(UText *ut); + + +/** + * Set the iteration index and return the code point at that index. + * Leave the iteration index at the start of the following code point. + * + * This function is the most efficient and convenient way to + * begin a forward iteration. The results are identical to the those + * from the sequence + * \code + * utext_setIndex(); + * utext_next32(); + * \endcode + * + * @param ut the text to be accessed. + * @param nativeIndex Iteration index, in the native units of the text provider. + * @return Code point which starts at or before index, + * or U_SENTINEL (-1) if it is out of bounds. + * @stable ICU 3.4 + */ +U_CAPI UChar32 U_EXPORT2 +utext_next32From(UText *ut, int64_t nativeIndex); + + + +/** + * Set the iteration index, and return the code point preceding the + * one specified by the initial index. Leave the iteration position + * at the start of the returned code point. + * + * This function is the most efficient and convenient way to + * begin a backwards iteration. + * + * @param ut the text to be accessed. + * @param nativeIndex Iteration index in the native units of the text provider. + * @return Code point preceding the one at the initial index, + * or U_SENTINEL (-1) if it is out of bounds. + * + * @stable ICU 3.4 + */ +U_CAPI UChar32 U_EXPORT2 +utext_previous32From(UText *ut, int64_t nativeIndex); + +/** + * Get the current iterator position, which can range from 0 to + * the length of the text. + * The position is a native index into the input text, in whatever format it + * may have (possibly UTF-8 for example), and may not always be the same as + * the corresponding UChar (UTF-16) index. + * The returned position will always be aligned to a code point boundary. + * + * @param ut the text to be accessed. + * @return the current index position, in the native units of the text provider. + * @stable ICU 3.4 + */ +U_CAPI int64_t U_EXPORT2 +utext_getNativeIndex(const UText *ut); + +/** + * Set the current iteration position to the nearest code point + * boundary at or preceding the specified index. + * The index is in the native units of the original input text. + * If the index is out of range, it will be pinned to be within + * the range of the input text. + * <p> + * It will usually be more efficient to begin an iteration + * using the functions utext_next32From() or utext_previous32From() + * rather than setIndex(). + * <p> + * Moving the index position to an adjacent character is best done + * with utext_next32(), utext_previous32() or utext_moveIndex32(). + * Attempting to do direct arithmetic on the index position is + * complicated by the fact that the size (in native units) of a + * character depends on the underlying representation of the character + * (UTF-8, UTF-16, UTF-32, arbitrary codepage), and is not + * easily knowable. + * + * @param ut the text to be accessed. + * @param nativeIndex the native unit index of the new iteration position. + * @stable ICU 3.4 + */ +U_CAPI void U_EXPORT2 +utext_setNativeIndex(UText *ut, int64_t nativeIndex); + +/** + * Move the iterator position by delta code points. The number of code points + * is a signed number; a negative delta will move the iterator backwards, + * towards the start of the text. + * <p> + * The index is moved by <code>delta</code> code points + * forward or backward, but no further backward than to 0 and + * no further forward than to utext_nativeLength(). + * The resulting index value will be in between 0 and length, inclusive. + * + * @param ut the text to be accessed. + * @param delta the signed number of code points to move the iteration position. + * @return true if the position could be moved the requested number of positions while + * staying within the range [0 - text length]. + * @stable ICU 3.4 + */ +U_CAPI UBool U_EXPORT2 +utext_moveIndex32(UText *ut, int32_t delta); + +/** + * Get the native index of the character preceding the current position. + * If the iteration position is already at the start of the text, zero + * is returned. + * The value returned is the same as that obtained from the following sequence, + * but without the side effect of changing the iteration position. + * + * \code + * UText *ut = whatever; + * ... + * utext_previous(ut) + * utext_getNativeIndex(ut); + * \endcode + * + * This function is most useful during forwards iteration, where it will get the + * native index of the character most recently returned from utext_next(). + * + * @param ut the text to be accessed + * @return the native index of the character preceding the current index position, + * or zero if the current position is at the start of the text. + * @stable ICU 3.6 + */ +U_CAPI int64_t U_EXPORT2 +utext_getPreviousNativeIndex(UText *ut); + + +/** + * + * Extract text from a UText into a UChar buffer. The range of text to be extracted + * is specified in the native indices of the UText provider. These may not necessarily + * be UTF-16 indices. + * <p> + * The size (number of 16 bit UChars) of the data to be extracted is returned. The + * full number of UChars is returned, even when the extracted text is truncated + * because the specified buffer size is too small. + * <p> + * The extracted string will (if you are a user) / must (if you are a text provider) + * be NUL-terminated if there is sufficient space in the destination buffer. This + * terminating NUL is not included in the returned length. + * <p> + * The iteration index is left at the position following the last extracted character. + * + * @param ut the UText from which to extract data. + * @param nativeStart the native index of the first character to extract.\ + * If the specified index is out of range, + * it will be pinned to be within 0 <= index <= textLength + * @param nativeLimit the native string index of the position following the last + * character to extract. If the specified index is out of range, + * it will be pinned to be within 0 <= index <= textLength. + * nativeLimit must be >= nativeStart. + * @param dest the UChar (UTF-16) buffer into which the extracted text is placed + * @param destCapacity The size, in UChars, of the destination buffer. May be zero + * for precomputing the required size. + * @param status receives any error status. + * U_BUFFER_OVERFLOW_ERROR: the extracted text was truncated because the + * buffer was too small. Returns number of UChars for preflighting. + * @return Number of UChars in the data to be extracted. Does not include a trailing NUL. + * + * @stable ICU 3.4 + */ +U_CAPI int32_t U_EXPORT2 +utext_extract(UText *ut, + int64_t nativeStart, int64_t nativeLimit, + UChar *dest, int32_t destCapacity, + UErrorCode *status); + + + +/************************************************************************************ + * + * #define inline versions of selected performance-critical text access functions + * Caution: do not use auto increment++ or decrement-- expressions + * as parameters to these macros. + * + * For most use, where there is no extreme performance constraint, the + * normal, non-inline functions are a better choice. The resulting code + * will be smaller, and, if the need ever arises, easier to debug. + * + * These are implemented as #defines rather than real functions + * because there is no fully portable way to do inline functions in plain C. + * + ************************************************************************************/ + +#ifndef U_HIDE_INTERNAL_API +/** + * inline version of utext_current32(), for performance-critical situations. + * + * Get the code point at the current iteration position of the UText. + * Returns U_SENTINEL (-1) if the position is at the end of the + * text. + * + * @internal ICU 4.4 technology preview + */ +#define UTEXT_CURRENT32(ut) \ + ((ut)->chunkOffset < (ut)->chunkLength && ((ut)->chunkContents)[(ut)->chunkOffset]<0xd800 ? \ + ((ut)->chunkContents)[((ut)->chunkOffset)] : utext_current32(ut)) +#endif /* U_HIDE_INTERNAL_API */ + +/** + * inline version of utext_next32(), for performance-critical situations. + * + * Get the code point at the current iteration position of the UText, and + * advance the position to the first index following the character. + * This is a post-increment operation. + * Returns U_SENTINEL (-1) if the position is at the end of the + * text. + * + * @stable ICU 3.4 + */ +#define UTEXT_NEXT32(ut) \ + ((ut)->chunkOffset < (ut)->chunkLength && ((ut)->chunkContents)[(ut)->chunkOffset]<0xd800 ? \ + ((ut)->chunkContents)[((ut)->chunkOffset)++] : utext_next32(ut)) + +/** + * inline version of utext_previous32(), for performance-critical situations. + * + * Move the iterator position to the character (code point) whose + * index precedes the current position, and return that character. + * This is a pre-decrement operation. + * Returns U_SENTINEL (-1) if the position is at the start of the text. + * + * @stable ICU 3.4 + */ +#define UTEXT_PREVIOUS32(ut) \ + ((ut)->chunkOffset > 0 && \ + (ut)->chunkContents[(ut)->chunkOffset-1] < 0xd800 ? \ + (ut)->chunkContents[--((ut)->chunkOffset)] : utext_previous32(ut)) + +/** + * inline version of utext_getNativeIndex(), for performance-critical situations. + * + * Get the current iterator position, which can range from 0 to + * the length of the text. + * The position is a native index into the input text, in whatever format it + * may have (possibly UTF-8 for example), and may not always be the same as + * the corresponding UChar (UTF-16) index. + * The returned position will always be aligned to a code point boundary. + * + * @stable ICU 3.6 + */ +#define UTEXT_GETNATIVEINDEX(ut) \ + ((ut)->chunkOffset <= (ut)->nativeIndexingLimit? \ + (ut)->chunkNativeStart+(ut)->chunkOffset : \ + (ut)->pFuncs->mapOffsetToNative(ut)) + +/** + * inline version of utext_setNativeIndex(), for performance-critical situations. + * + * Set the current iteration position to the nearest code point + * boundary at or preceding the specified index. + * The index is in the native units of the original input text. + * If the index is out of range, it will be pinned to be within + * the range of the input text. + * + * @stable ICU 3.8 + */ +#define UTEXT_SETNATIVEINDEX(ut, ix) UPRV_BLOCK_MACRO_BEGIN { \ + int64_t __offset = (ix) - (ut)->chunkNativeStart; \ + if (__offset>=0 && __offset<(int64_t)(ut)->nativeIndexingLimit && (ut)->chunkContents[__offset]<0xdc00) { \ + (ut)->chunkOffset=(int32_t)__offset; \ + } else { \ + utext_setNativeIndex((ut), (ix)); \ + } \ +} UPRV_BLOCK_MACRO_END + + + +/************************************************************************************ + * + * Functions related to writing or modifying the text. + * These will work only with modifiable UTexts. Attempting to + * modify a read-only UText will return an error status. + * + ************************************************************************************/ + + +/** + * Return true if the text can be written (modified) with utext_replace() or + * utext_copy(). For the text to be writable, the text provider must + * be of a type that supports writing and the UText must not be frozen. + * + * Attempting to modify text when utext_isWriteable() is false will fail - + * the text will not be modified, and an error will be returned from the function + * that attempted the modification. + * + * @param ut the UText to be tested. + * @return true if the text is modifiable. + * + * @see utext_freeze() + * @see utext_replace() + * @see utext_copy() + * @stable ICU 3.4 + * + */ +U_CAPI UBool U_EXPORT2 +utext_isWritable(const UText *ut); + + +/** + * Test whether there is meta data associated with the text. + * @see Replaceable::hasMetaData() + * + * @param ut The UText to be tested + * @return true if the underlying text includes meta data. + * @stable ICU 3.4 + */ +U_CAPI UBool U_EXPORT2 +utext_hasMetaData(const UText *ut); + + +/** + * Replace a range of the original text with a replacement text. + * + * Leaves the current iteration position at the position following the + * newly inserted replacement text. + * + * This function is only available on UText types that support writing, + * that is, ones where utext_isWritable() returns true. + * + * When using this function, there should be only a single UText opened onto the + * underlying native text string. Behavior after a replace operation + * on a UText is undefined for any other additional UTexts that refer to the + * modified string. + * + * @param ut the UText representing the text to be operated on. + * @param nativeStart the native index of the start of the region to be replaced + * @param nativeLimit the native index of the character following the region to be replaced. + * @param replacementText pointer to the replacement text + * @param replacementLength length of the replacement text, or -1 if the text is NUL terminated. + * @param status receives any error status. Possible errors include + * U_NO_WRITE_PERMISSION + * + * @return The signed number of (native) storage units by which + * the length of the text expanded or contracted. + * + * @stable ICU 3.4 + */ +U_CAPI int32_t U_EXPORT2 +utext_replace(UText *ut, + int64_t nativeStart, int64_t nativeLimit, + const UChar *replacementText, int32_t replacementLength, + UErrorCode *status); + + + +/** + * + * Copy or move a substring from one position to another within the text, + * while retaining any metadata associated with the text. + * This function is used to duplicate or reorder substrings. + * The destination index must not overlap the source range. + * + * The text to be copied or moved is inserted at destIndex; + * it does not replace or overwrite any existing text. + * + * The iteration position is left following the newly inserted text + * at the destination position. + * + * This function is only available on UText types that support writing, + * that is, ones where utext_isWritable() returns true. + * + * When using this function, there should be only a single UText opened onto the + * underlying native text string. Behavior after a copy operation + * on a UText is undefined in any other additional UTexts that refer to the + * modified string. + * + * @param ut The UText representing the text to be operated on. + * @param nativeStart The native index of the start of the region to be copied or moved + * @param nativeLimit The native index of the character position following the region + * to be copied. + * @param destIndex The native destination index to which the source substring is + * copied or moved. + * @param move If true, then the substring is moved, not copied/duplicated. + * @param status receives any error status. Possible errors include U_NO_WRITE_PERMISSION + * + * @stable ICU 3.4 + */ +U_CAPI void U_EXPORT2 +utext_copy(UText *ut, + int64_t nativeStart, int64_t nativeLimit, + int64_t destIndex, + UBool move, + UErrorCode *status); + + +/** + * <p> + * Freeze a UText. This prevents any modification to the underlying text itself + * by means of functions operating on this UText. + * </p> + * <p> + * Once frozen, a UText can not be unfrozen. The intent is to ensure + * that a the text underlying a frozen UText wrapper cannot be modified via that UText. + * </p> + * <p> + * Caution: freezing a UText will disable changes made via the specific + * frozen UText wrapper only; it will not have any effect on the ability to + * directly modify the text by bypassing the UText. Any such backdoor modifications + * are always an error while UText access is occurring because the underlying + * text can get out of sync with UText's buffering. + * </p> + * + * @param ut The UText to be frozen. + * @see utext_isWritable() + * @stable ICU 3.6 + */ +U_CAPI void U_EXPORT2 +utext_freeze(UText *ut); + + +/** + * UText provider properties (bit field indexes). + * + * @see UText + * @stable ICU 3.4 + */ +enum { + /** + * It is potentially time consuming for the provider to determine the length of the text. + * @stable ICU 3.4 + */ + UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE = 1, + /** + * Text chunks remain valid and usable until the text object is modified or + * deleted, not just until the next time the access() function is called + * (which is the default). + * @stable ICU 3.4 + */ + UTEXT_PROVIDER_STABLE_CHUNKS = 2, + /** + * The provider supports modifying the text via the replace() and copy() + * functions. + * @see Replaceable + * @stable ICU 3.4 + */ + UTEXT_PROVIDER_WRITABLE = 3, + /** + * There is meta data associated with the text. + * @see Replaceable::hasMetaData() + * @stable ICU 3.4 + */ + UTEXT_PROVIDER_HAS_META_DATA = 4, + /** + * Text provider owns the text storage. + * Generally occurs as the result of a deep clone of the UText. + * When closing the UText, the associated text must + * also be closed/deleted/freed/ whatever is appropriate. + * @stable ICU 3.6 + */ + UTEXT_PROVIDER_OWNS_TEXT = 5 +}; + +/** + * Function type declaration for UText.clone(). + * + * clone a UText. Much like opening a UText where the source text is itself + * another UText. + * + * A deep clone will copy both the UText data structures and the underlying text. + * The original and cloned UText will operate completely independently; modifications + * made to the text in one will not effect the other. Text providers are not + * required to support deep clones. The user of clone() must check the status return + * and be prepared to handle failures. + * + * A shallow clone replicates only the UText data structures; it does not make + * a copy of the underlying text. Shallow clones can be used as an efficient way to + * have multiple iterators active in a single text string that is not being + * modified. + * + * A shallow clone operation must not fail except for truly exceptional conditions such + * as memory allocation failures. + * + * A UText and its clone may be safely concurrently accessed by separate threads. + * This is true for both shallow and deep clones. + * It is the responsibility of the Text Provider to ensure that this thread safety + * constraint is met. + + * + * @param dest A UText struct to be filled in with the result of the clone operation, + * or NULL if the clone function should heap-allocate a new UText struct. + * @param src The UText to be cloned. + * @param deep true to request a deep clone, false for a shallow clone. + * @param status Errors are returned here. For deep clones, U_UNSUPPORTED_ERROR + * should be returned if the text provider is unable to clone the + * original text. + * @return The newly created clone, or NULL if the clone operation failed. + * + * @stable ICU 3.4 + */ +typedef UText * U_CALLCONV +UTextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status); + + +/** + * Function type declaration for UText.nativeLength(). + * + * @param ut the UText to get the length of. + * @return the length, in the native units of the original text string. + * @see UText + * @stable ICU 3.4 + */ +typedef int64_t U_CALLCONV +UTextNativeLength(UText *ut); + +/** + * Function type declaration for UText.access(). Get the description of the text chunk + * containing the text at a requested native index. The UText's iteration + * position will be left at the requested index. If the index is out + * of bounds, the iteration position will be left at the start or end + * of the string, as appropriate. + * + * Chunks must begin and end on code point boundaries. A single code point + * comprised of multiple storage units must never span a chunk boundary. + * + * + * @param ut the UText being accessed. + * @param nativeIndex Requested index of the text to be accessed. + * @param forward If true, then the returned chunk must contain text + * starting from the index, so that start<=index<limit. + * If false, then the returned chunk must contain text + * before the index, so that start<index<=limit. + * @return True if the requested index could be accessed. The chunk + * will contain the requested text. + * False value if a chunk cannot be accessed + * (the requested index is out of bounds). + * + * @see UText + * @stable ICU 3.4 + */ +typedef UBool U_CALLCONV +UTextAccess(UText *ut, int64_t nativeIndex, UBool forward); + +/** + * Function type declaration for UText.extract(). + * + * Extract text from a UText into a UChar buffer. The range of text to be extracted + * is specified in the native indices of the UText provider. These may not necessarily + * be UTF-16 indices. + * <p> + * The size (number of 16 bit UChars) in the data to be extracted is returned. The + * full amount is returned, even when the specified buffer size is smaller. + * <p> + * The extracted string will (if you are a user) / must (if you are a text provider) + * be NUL-terminated if there is sufficient space in the destination buffer. + * + * @param ut the UText from which to extract data. + * @param nativeStart the native index of the first character to extract. + * @param nativeLimit the native string index of the position following the last + * character to extract. + * @param dest the UChar (UTF-16) buffer into which the extracted text is placed + * @param destCapacity The size, in UChars, of the destination buffer. May be zero + * for precomputing the required size. + * @param status receives any error status. + * If U_BUFFER_OVERFLOW_ERROR: Returns number of UChars for + * preflighting. + * @return Number of UChars in the data. Does not include a trailing NUL. + * + * @stable ICU 3.4 + */ +typedef int32_t U_CALLCONV +UTextExtract(UText *ut, + int64_t nativeStart, int64_t nativeLimit, + UChar *dest, int32_t destCapacity, + UErrorCode *status); + +/** + * Function type declaration for UText.replace(). + * + * Replace a range of the original text with a replacement text. + * + * Leaves the current iteration position at the position following the + * newly inserted replacement text. + * + * This function need only be implemented on UText types that support writing. + * + * When using this function, there should be only a single UText opened onto the + * underlying native text string. The function is responsible for updating the + * text chunk within the UText to reflect the updated iteration position, + * taking into account any changes to the underlying string's structure caused + * by the replace operation. + * + * @param ut the UText representing the text to be operated on. + * @param nativeStart the index of the start of the region to be replaced + * @param nativeLimit the index of the character following the region to be replaced. + * @param replacementText pointer to the replacement text + * @param replacmentLength length of the replacement text in UChars, or -1 if the text is NUL terminated. + * @param status receives any error status. Possible errors include + * U_NO_WRITE_PERMISSION + * + * @return The signed number of (native) storage units by which + * the length of the text expanded or contracted. + * + * @stable ICU 3.4 + */ +typedef int32_t U_CALLCONV +UTextReplace(UText *ut, + int64_t nativeStart, int64_t nativeLimit, + const UChar *replacementText, int32_t replacmentLength, + UErrorCode *status); + +/** + * Function type declaration for UText.copy(). + * + * Copy or move a substring from one position to another within the text, + * while retaining any metadata associated with the text. + * This function is used to duplicate or reorder substrings. + * The destination index must not overlap the source range. + * + * The text to be copied or moved is inserted at destIndex; + * it does not replace or overwrite any existing text. + * + * This function need only be implemented for UText types that support writing. + * + * When using this function, there should be only a single UText opened onto the + * underlying native text string. The function is responsible for updating the + * text chunk within the UText to reflect the updated iteration position, + * taking into account any changes to the underlying string's structure caused + * by the replace operation. + * + * @param ut The UText representing the text to be operated on. + * @param nativeStart The index of the start of the region to be copied or moved + * @param nativeLimit The index of the character following the region to be replaced. + * @param nativeDest The destination index to which the source substring is copied or moved. + * @param move If true, then the substring is moved, not copied/duplicated. + * @param status receives any error status. Possible errors include U_NO_WRITE_PERMISSION + * + * @stable ICU 3.4 + */ +typedef void U_CALLCONV +UTextCopy(UText *ut, + int64_t nativeStart, int64_t nativeLimit, + int64_t nativeDest, + UBool move, + UErrorCode *status); + +/** + * Function type declaration for UText.mapOffsetToNative(). + * Map from the current UChar offset within the current text chunk to + * the corresponding native index in the original source text. + * + * This is required only for text providers that do not use native UTF-16 indexes. + * + * @param ut the UText. + * @return Absolute (native) index corresponding to chunkOffset in the current chunk. + * The returned native index should always be to a code point boundary. + * + * @stable ICU 3.4 + */ +typedef int64_t U_CALLCONV +UTextMapOffsetToNative(const UText *ut); + +/** + * Function type declaration for UText.mapIndexToUTF16(). + * Map from a native index to a UChar offset within a text chunk. + * Behavior is undefined if the native index does not fall within the + * current chunk. + * + * This function is required only for text providers that do not use native UTF-16 indexes. + * + * @param ut The UText containing the text chunk. + * @param nativeIndex Absolute (native) text index, chunk->start<=index<=chunk->limit. + * @return Chunk-relative UTF-16 offset corresponding to the specified native + * index. + * + * @stable ICU 3.4 + */ +typedef int32_t U_CALLCONV +UTextMapNativeIndexToUTF16(const UText *ut, int64_t nativeIndex); + + +/** + * Function type declaration for UText.utextClose(). + * + * A Text Provider close function is only required for provider types that make + * allocations in their open function (or other functions) that must be + * cleaned when the UText is closed. + * + * The allocation of the UText struct itself and any "extra" storage + * associated with the UText is handled by the common UText implementation + * and does not require provider specific cleanup in a close function. + * + * Most UText provider implementations do not need to implement this function. + * + * @param ut A UText object to be closed. + * + * @stable ICU 3.4 + */ +typedef void U_CALLCONV +UTextClose(UText *ut); + + +/** + * (public) Function dispatch table for UText. + * Conceptually very much like a C++ Virtual Function Table. + * This struct defines the organization of the table. + * Each text provider implementation must provide an + * actual table that is initialized with the appropriate functions + * for the type of text being handled. + * @stable ICU 3.6 + */ +struct UTextFuncs { + /** + * (public) Function table size, sizeof(UTextFuncs) + * Intended for use should the table grow to accommodate added + * functions in the future, to allow tests for older format + * function tables that do not contain the extensions. + * + * Fields are placed for optimal alignment on + * 32/64/128-bit-pointer machines, by normally grouping together + * 4 32-bit fields, + * 4 pointers, + * 2 64-bit fields + * in sequence. + * @stable ICU 3.6 + */ + int32_t tableSize; + + /** + * (private) Alignment padding. + * Do not use, reserved for use by the UText framework only. + * @internal + */ + int32_t reserved1, /** @internal */ reserved2, /** @internal */ reserved3; + + + /** + * (public) Function pointer for UTextClone + * + * @see UTextClone + * @stable ICU 3.6 + */ + UTextClone *clone; + + /** + * (public) function pointer for UTextLength + * May be expensive to compute! + * + * @see UTextLength + * @stable ICU 3.6 + */ + UTextNativeLength *nativeLength; + + /** + * (public) Function pointer for UTextAccess. + * + * @see UTextAccess + * @stable ICU 3.6 + */ + UTextAccess *access; + + /** + * (public) Function pointer for UTextExtract. + * + * @see UTextExtract + * @stable ICU 3.6 + */ + UTextExtract *extract; + + /** + * (public) Function pointer for UTextReplace. + * + * @see UTextReplace + * @stable ICU 3.6 + */ + UTextReplace *replace; + + /** + * (public) Function pointer for UTextCopy. + * + * @see UTextCopy + * @stable ICU 3.6 + */ + UTextCopy *copy; + + /** + * (public) Function pointer for UTextMapOffsetToNative. + * + * @see UTextMapOffsetToNative + * @stable ICU 3.6 + */ + UTextMapOffsetToNative *mapOffsetToNative; + + /** + * (public) Function pointer for UTextMapNativeIndexToUTF16. + * + * @see UTextMapNativeIndexToUTF16 + * @stable ICU 3.6 + */ + UTextMapNativeIndexToUTF16 *mapNativeIndexToUTF16; + + /** + * (public) Function pointer for UTextClose. + * + * @see UTextClose + * @stable ICU 3.6 + */ + UTextClose *close; + + /** + * (private) Spare function pointer + * @internal + */ + UTextClose *spare1; + + /** + * (private) Spare function pointer + * @internal + */ + UTextClose *spare2; + + /** + * (private) Spare function pointer + * @internal + */ + UTextClose *spare3; + +}; +/** + * Function dispatch table for UText + * @see UTextFuncs + */ +typedef struct UTextFuncs UTextFuncs; + + /** + * UText struct. Provides the interface between the generic UText access code + * and the UText provider code that works on specific kinds of + * text (UTF-8, noncontiguous UTF-16, whatever.) + * + * Applications that are using predefined types of text providers + * to pass text data to ICU services will have no need to view the + * internals of the UText structs that they open. + * + * @stable ICU 3.6 + */ +struct UText { + /** + * (private) Magic. Used to help detect when UText functions are handed + * invalid or uninitialized UText structs. + * utext_openXYZ() functions take an initialized, + * but not necessarily open, UText struct as an + * optional fill-in parameter. This magic field + * is used to check for that initialization. + * Text provider close functions must NOT clear + * the magic field because that would prevent + * reuse of the UText struct. + * @internal + */ + uint32_t magic; + + + /** + * (private) Flags for managing the allocation and freeing of + * memory associated with this UText. + * @internal + */ + int32_t flags; + + + /** + * Text provider properties. This set of flags is maintained by the + * text provider implementation. + * @stable ICU 3.4 + */ + int32_t providerProperties; + + /** + * (public) sizeOfStruct=sizeof(UText) + * Allows possible backward compatible extension. + * + * @stable ICU 3.4 + */ + int32_t sizeOfStruct; + + /* ------ 16 byte alignment boundary ----------- */ + + + /** + * (protected) Native index of the first character position following + * the current chunk. + * @stable ICU 3.6 + */ + int64_t chunkNativeLimit; + + /** + * (protected) Size in bytes of the extra space (pExtra). + * @stable ICU 3.4 + */ + int32_t extraSize; + + /** + * (protected) The highest chunk offset where native indexing and + * chunk (UTF-16) indexing correspond. For UTF-16 sources, value + * will be equal to chunkLength. + * + * @stable ICU 3.6 + */ + int32_t nativeIndexingLimit; + + /* ---- 16 byte alignment boundary------ */ + + /** + * (protected) Native index of the first character in the text chunk. + * @stable ICU 3.6 + */ + int64_t chunkNativeStart; + + /** + * (protected) Current iteration position within the text chunk (UTF-16 buffer). + * This is the index to the character that will be returned by utext_next32(). + * @stable ICU 3.6 + */ + int32_t chunkOffset; + + /** + * (protected) Length the text chunk (UTF-16 buffer), in UChars. + * @stable ICU 3.6 + */ + int32_t chunkLength; + + /* ---- 16 byte alignment boundary-- */ + + + /** + * (protected) pointer to a chunk of text in UTF-16 format. + * May refer either to original storage of the source of the text, or + * if conversion was required, to a buffer owned by the UText. + * @stable ICU 3.6 + */ + const UChar *chunkContents; + + /** + * (public) Pointer to Dispatch table for accessing functions for this UText. + * @stable ICU 3.6 + */ + const UTextFuncs *pFuncs; + + /** + * (protected) Pointer to additional space requested by the + * text provider during the utext_open operation. + * @stable ICU 3.4 + */ + void *pExtra; + + /** + * (protected) Pointer to string or text-containing object or similar. + * This is the source of the text that this UText is wrapping, in a format + * that is known to the text provider functions. + * @stable ICU 3.4 + */ + const void *context; + + /* --- 16 byte alignment boundary--- */ + + /** + * (protected) Pointer fields available for use by the text provider. + * Not used by UText common code. + * @stable ICU 3.6 + */ + const void *p; + /** + * (protected) Pointer fields available for use by the text provider. + * Not used by UText common code. + * @stable ICU 3.6 + */ + const void *q; + /** + * (protected) Pointer fields available for use by the text provider. + * Not used by UText common code. + * @stable ICU 3.6 + */ + const void *r; + + /** + * Private field reserved for future use by the UText framework + * itself. This is not to be touched by the text providers. + * @internal ICU 3.4 + */ + void *privP; + + + /* --- 16 byte alignment boundary--- */ + + + /** + * (protected) Integer field reserved for use by the text provider. + * Not used by the UText framework, or by the client (user) of the UText. + * @stable ICU 3.4 + */ + int64_t a; + + /** + * (protected) Integer field reserved for use by the text provider. + * Not used by the UText framework, or by the client (user) of the UText. + * @stable ICU 3.4 + */ + int32_t b; + + /** + * (protected) Integer field reserved for use by the text provider. + * Not used by the UText framework, or by the client (user) of the UText. + * @stable ICU 3.4 + */ + int32_t c; + + /* ---- 16 byte alignment boundary---- */ + + + /** + * Private field reserved for future use by the UText framework + * itself. This is not to be touched by the text providers. + * @internal ICU 3.4 + */ + int64_t privA; + /** + * Private field reserved for future use by the UText framework + * itself. This is not to be touched by the text providers. + * @internal ICU 3.4 + */ + int32_t privB; + /** + * Private field reserved for future use by the UText framework + * itself. This is not to be touched by the text providers. + * @internal ICU 3.4 + */ + int32_t privC; +}; + + +/** + * Common function for use by Text Provider implementations to allocate and/or initialize + * a new UText struct. To be called in the implementation of utext_open() functions. + * If the supplied UText parameter is null, a new UText struct will be allocated on the heap. + * If the supplied UText is already open, the provider's close function will be called + * so that the struct can be reused by the open that is in progress. + * + * @param ut pointer to a UText struct to be re-used, or null if a new UText + * should be allocated. + * @param extraSpace The amount of additional space to be allocated as part + * of this UText, for use by types of providers that require + * additional storage. + * @param status Errors are returned here. + * @return pointer to the UText, allocated if necessary, with extra space set up if requested. + * @stable ICU 3.4 + */ +U_CAPI UText * U_EXPORT2 +utext_setup(UText *ut, int32_t extraSpace, UErrorCode *status); + +// do not use #ifndef U_HIDE_INTERNAL_API around the following! +/** + * @internal + * Value used to help identify correctly initialized UText structs. + * Note: must be publicly visible so that UTEXT_INITIALIZER can access it. + */ +enum { + UTEXT_MAGIC = 0x345ad82c +}; + +/** + * initializer to be used with local (stack) instances of a UText + * struct. UText structs must be initialized before passing + * them to one of the utext_open functions. + * + * @stable ICU 3.6 + */ +#define UTEXT_INITIALIZER { \ + UTEXT_MAGIC, /* magic */ \ + 0, /* flags */ \ + 0, /* providerProps */ \ + sizeof(UText), /* sizeOfStruct */ \ + 0, /* chunkNativeLimit */ \ + 0, /* extraSize */ \ + 0, /* nativeIndexingLimit */ \ + 0, /* chunkNativeStart */ \ + 0, /* chunkOffset */ \ + 0, /* chunkLength */ \ + NULL, /* chunkContents */ \ + NULL, /* pFuncs */ \ + NULL, /* pExtra */ \ + NULL, /* context */ \ + NULL, NULL, NULL, /* p, q, r */ \ + NULL, /* privP */ \ + 0, 0, 0, /* a, b, c */ \ + 0, 0, 0 /* privA,B,C, */ \ + } + + +U_CDECL_END + + +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN + +/** + * \class LocalUTextPointer + * "Smart pointer" class, closes a UText via utext_close(). + * For most methods see the LocalPointerBase base class. + * + * @see LocalPointerBase + * @see LocalPointer + * @stable ICU 4.4 + */ +U_DEFINE_LOCAL_OPEN_POINTER(LocalUTextPointer, UText, utext_close); + +U_NAMESPACE_END + +#endif + + +#endif diff --git a/thirdparty/icu4c/common/unicode/utf.h b/thirdparty/icu4c/common/unicode/utf.h new file mode 100644 index 0000000000..c9d5f5785c --- /dev/null +++ b/thirdparty/icu4c/common/unicode/utf.h @@ -0,0 +1,225 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 1999-2011, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: utf.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 1999sep09 +* created by: Markus W. Scherer +*/ + +/** + * \file + * \brief C API: Code point macros + * + * This file defines macros for checking whether a code point is + * a surrogate or a non-character etc. + * + * If U_NO_DEFAULT_INCLUDE_UTF_HEADERS is 0 then utf.h is included by utypes.h + * and itself includes utf8.h and utf16.h after some + * common definitions. + * If U_NO_DEFAULT_INCLUDE_UTF_HEADERS is 1 then each of these headers must be + * included explicitly if their definitions are used. + * + * utf8.h and utf16.h define macros for efficiently getting code points + * in and out of UTF-8/16 strings. + * utf16.h macros have "U16_" prefixes. + * utf8.h defines similar macros with "U8_" prefixes for UTF-8 string handling. + * + * ICU mostly processes 16-bit Unicode strings. + * Most of the time, such strings are well-formed UTF-16. + * Single, unpaired surrogates must be handled as well, and are treated in ICU + * like regular code points where possible. + * (Pairs of surrogate code points are indistinguishable from supplementary + * code points encoded as pairs of supplementary code units.) + * + * In fact, almost all Unicode code points in normal text (>99%) + * are on the BMP (<=U+ffff) and even <=U+d7ff. + * ICU functions handle supplementary code points (U+10000..U+10ffff) + * but are optimized for the much more frequently occurring BMP code points. + * + * umachine.h defines UChar to be an unsigned 16-bit integer. + * Since ICU 59, ICU uses char16_t in C++, UChar only in C, + * and defines UChar=char16_t by default. See the UChar API docs for details. + * + * UChar32 is defined to be a signed 32-bit integer (int32_t), large enough for a 21-bit + * Unicode code point (Unicode scalar value, 0..0x10ffff) and U_SENTINEL (-1). + * Before ICU 2.4, the definition of UChar32 was similarly platform-dependent as + * the definition of UChar. For details see the documentation for UChar32 itself. + * + * utf.h defines a small number of C macros for single Unicode code points. + * These are simple checks for surrogates and non-characters. + * For actual Unicode character properties see uchar.h. + * + * By default, string operations must be done with error checking in case + * a string is not well-formed UTF-16 or UTF-8. + * + * The U16_ macros detect if a surrogate code unit is unpaired + * (lead unit without trail unit or vice versa) and just return the unit itself + * as the code point. + * + * The U8_ macros detect illegal byte sequences and return a negative value. + * Starting with ICU 60, the observable length of a single illegal byte sequence + * skipped by one of these macros follows the Unicode 6+ recommendation + * which is consistent with the W3C Encoding Standard. + * + * There are ..._OR_FFFD versions of both U16_ and U8_ macros + * that return U+FFFD for illegal code unit sequences. + * + * The regular "safe" macros require that the initial, passed-in string index + * is within bounds. They only check the index when they read more than one + * code unit. This is usually done with code similar to the following loop: + * <pre>while(i<length) { + * U16_NEXT(s, i, length, c); + * // use c + * }</pre> + * + * When it is safe to assume that text is well-formed UTF-16 + * (does not contain single, unpaired surrogates), then one can use + * U16_..._UNSAFE macros. + * These do not check for proper code unit sequences or truncated text and may + * yield wrong results or even cause a crash if they are used with "malformed" + * text. + * In practice, U16_..._UNSAFE macros will produce slightly less code but + * should not be faster because the processing is only different when a + * surrogate code unit is detected, which will be rare. + * + * Similarly for UTF-8, there are "safe" macros without a suffix, + * and U8_..._UNSAFE versions. + * The performance differences are much larger here because UTF-8 provides so + * many opportunities for malformed sequences. + * The unsafe UTF-8 macros are entirely implemented inside the macro definitions + * and are fast, while the safe UTF-8 macros call functions for some complicated cases. + * + * Unlike with UTF-16, malformed sequences cannot be expressed with distinct + * code point values (0..U+10ffff). They are indicated with negative values instead. + * + * For more information see the ICU User Guide Strings chapter + * (https://unicode-org.github.io/icu/userguide/strings). + * + * <em>Usage:</em> + * ICU coding guidelines for if() statements should be followed when using these macros. + * Compound statements (curly braces {}) must be used for if-else-while... + * bodies and all macro statements should be terminated with semicolon. + * + * @stable ICU 2.4 + */ + +#ifndef __UTF_H__ +#define __UTF_H__ + +#include "unicode/umachine.h" +/* include the utfXX.h after the following definitions */ + +/* single-code point definitions -------------------------------------------- */ + +/** + * Is this code point a Unicode noncharacter? + * @param c 32-bit code point + * @return true or false + * @stable ICU 2.4 + */ +#define U_IS_UNICODE_NONCHAR(c) \ + ((c)>=0xfdd0 && \ + ((c)<=0xfdef || ((c)&0xfffe)==0xfffe) && (c)<=0x10ffff) + +/** + * Is c a Unicode code point value (0..U+10ffff) + * that can be assigned a character? + * + * Code points that are not characters include: + * - single surrogate code points (U+d800..U+dfff, 2048 code points) + * - the last two code points on each plane (U+__fffe and U+__ffff, 34 code points) + * - U+fdd0..U+fdef (new with Unicode 3.1, 32 code points) + * - the highest Unicode code point value is U+10ffff + * + * This means that all code points below U+d800 are character code points, + * and that boundary is tested first for performance. + * + * @param c 32-bit code point + * @return true or false + * @stable ICU 2.4 + */ +#define U_IS_UNICODE_CHAR(c) \ + ((uint32_t)(c)<0xd800 || \ + (0xdfff<(c) && (c)<=0x10ffff && !U_IS_UNICODE_NONCHAR(c))) + +/** + * Is this code point a BMP code point (U+0000..U+ffff)? + * @param c 32-bit code point + * @return true or false + * @stable ICU 2.8 + */ +#define U_IS_BMP(c) ((uint32_t)(c)<=0xffff) + +/** + * Is this code point a supplementary code point (U+10000..U+10ffff)? + * @param c 32-bit code point + * @return true or false + * @stable ICU 2.8 + */ +#define U_IS_SUPPLEMENTARY(c) ((uint32_t)((c)-0x10000)<=0xfffff) + +/** + * Is this code point a lead surrogate (U+d800..U+dbff)? + * @param c 32-bit code point + * @return true or false + * @stable ICU 2.4 + */ +#define U_IS_LEAD(c) (((c)&0xfffffc00)==0xd800) + +/** + * Is this code point a trail surrogate (U+dc00..U+dfff)? + * @param c 32-bit code point + * @return true or false + * @stable ICU 2.4 + */ +#define U_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00) + +/** + * Is this code point a surrogate (U+d800..U+dfff)? + * @param c 32-bit code point + * @return true or false + * @stable ICU 2.4 + */ +#define U_IS_SURROGATE(c) (((c)&0xfffff800)==0xd800) + +/** + * Assuming c is a surrogate code point (U_IS_SURROGATE(c)), + * is it a lead surrogate? + * @param c 32-bit code point + * @return true or false + * @stable ICU 2.4 + */ +#define U_IS_SURROGATE_LEAD(c) (((c)&0x400)==0) + +/** + * Assuming c is a surrogate code point (U_IS_SURROGATE(c)), + * is it a trail surrogate? + * @param c 32-bit code point + * @return true or false + * @stable ICU 4.2 + */ +#define U_IS_SURROGATE_TRAIL(c) (((c)&0x400)!=0) + +/* include the utfXX.h ------------------------------------------------------ */ + +#if !U_NO_DEFAULT_INCLUDE_UTF_HEADERS + +#include "unicode/utf8.h" +#include "unicode/utf16.h" + +/* utf_old.h contains deprecated, pre-ICU 2.4 definitions */ +#include "unicode/utf_old.h" + +#endif /* !U_NO_DEFAULT_INCLUDE_UTF_HEADERS */ + +#endif /* __UTF_H__ */ diff --git a/thirdparty/icu4c/common/unicode/utf16.h b/thirdparty/icu4c/common/unicode/utf16.h new file mode 100644 index 0000000000..3902c60e95 --- /dev/null +++ b/thirdparty/icu4c/common/unicode/utf16.h @@ -0,0 +1,734 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 1999-2012, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: utf16.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 1999sep09 +* created by: Markus W. Scherer +*/ + +/** + * \file + * \brief C API: 16-bit Unicode handling macros + * + * This file defines macros to deal with 16-bit Unicode (UTF-16) code units and strings. + * + * For more information see utf.h and the ICU User Guide Strings chapter + * (https://unicode-org.github.io/icu/userguide/strings). + * + * <em>Usage:</em> + * ICU coding guidelines for if() statements should be followed when using these macros. + * Compound statements (curly braces {}) must be used for if-else-while... + * bodies and all macro statements should be terminated with semicolon. + */ + +#ifndef __UTF16_H__ +#define __UTF16_H__ + +#include <stdbool.h> +#include "unicode/umachine.h" +#ifndef __UTF_H__ +# include "unicode/utf.h" +#endif + +/* single-code point definitions -------------------------------------------- */ + +/** + * Does this code unit alone encode a code point (BMP, not a surrogate)? + * @param c 16-bit code unit + * @return true or false + * @stable ICU 2.4 + */ +#define U16_IS_SINGLE(c) !U_IS_SURROGATE(c) + +/** + * Is this code unit a lead surrogate (U+d800..U+dbff)? + * @param c 16-bit code unit + * @return true or false + * @stable ICU 2.4 + */ +#define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800) + +/** + * Is this code unit a trail surrogate (U+dc00..U+dfff)? + * @param c 16-bit code unit + * @return true or false + * @stable ICU 2.4 + */ +#define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00) + +/** + * Is this code unit a surrogate (U+d800..U+dfff)? + * @param c 16-bit code unit + * @return true or false + * @stable ICU 2.4 + */ +#define U16_IS_SURROGATE(c) U_IS_SURROGATE(c) + +/** + * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)), + * is it a lead surrogate? + * @param c 16-bit code unit + * @return true or false + * @stable ICU 2.4 + */ +#define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0) + +/** + * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)), + * is it a trail surrogate? + * @param c 16-bit code unit + * @return true or false + * @stable ICU 4.2 + */ +#define U16_IS_SURROGATE_TRAIL(c) (((c)&0x400)!=0) + +/** + * Helper constant for U16_GET_SUPPLEMENTARY. + * @internal + */ +#define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000) + +/** + * Get a supplementary code point value (U+10000..U+10ffff) + * from its lead and trail surrogates. + * The result is undefined if the input values are not + * lead and trail surrogates. + * + * @param lead lead surrogate (U+d800..U+dbff) + * @param trail trail surrogate (U+dc00..U+dfff) + * @return supplementary code point (U+10000..U+10ffff) + * @stable ICU 2.4 + */ +#define U16_GET_SUPPLEMENTARY(lead, trail) \ + (((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET) + + +/** + * Get the lead surrogate (0xd800..0xdbff) for a + * supplementary code point (0x10000..0x10ffff). + * @param supplementary 32-bit code point (U+10000..U+10ffff) + * @return lead surrogate (U+d800..U+dbff) for supplementary + * @stable ICU 2.4 + */ +#define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0) + +/** + * Get the trail surrogate (0xdc00..0xdfff) for a + * supplementary code point (0x10000..0x10ffff). + * @param supplementary 32-bit code point (U+10000..U+10ffff) + * @return trail surrogate (U+dc00..U+dfff) for supplementary + * @stable ICU 2.4 + */ +#define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00) + +/** + * How many 16-bit code units are used to encode this Unicode code point? (1 or 2) + * The result is not defined if c is not a Unicode code point (U+0000..U+10ffff). + * @param c 32-bit code point + * @return 1 or 2 + * @stable ICU 2.4 + */ +#define U16_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2) + +/** + * The maximum number of 16-bit code units per Unicode code point (U+0000..U+10ffff). + * @return 2 + * @stable ICU 2.4 + */ +#define U16_MAX_LENGTH 2 + +/** + * Get a code point from a string at a random-access offset, + * without changing the offset. + * "Unsafe" macro, assumes well-formed UTF-16. + * + * The offset may point to either the lead or trail surrogate unit + * for a supplementary code point, in which case the macro will read + * the adjacent matching surrogate as well. + * The result is undefined if the offset points to a single, unpaired surrogate. + * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT. + * + * @param s const UChar * string + * @param i string offset + * @param c output UChar32 variable + * @see U16_GET + * @stable ICU 2.4 + */ +#define U16_GET_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ + (c)=(s)[i]; \ + if(U16_IS_SURROGATE(c)) { \ + if(U16_IS_SURROGATE_LEAD(c)) { \ + (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)+1]); \ + } else { \ + (c)=U16_GET_SUPPLEMENTARY((s)[(i)-1], (c)); \ + } \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Get a code point from a string at a random-access offset, + * without changing the offset. + * "Safe" macro, handles unpaired surrogates and checks for string boundaries. + * + * The offset may point to either the lead or trail surrogate unit + * for a supplementary code point, in which case the macro will read + * the adjacent matching surrogate as well. + * + * The length can be negative for a NUL-terminated string. + * + * If the offset points to a single, unpaired surrogate, then + * c is set to that unpaired surrogate. + * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT. + * + * @param s const UChar * string + * @param start starting string offset (usually 0) + * @param i string offset, must be start<=i<length + * @param length string length + * @param c output UChar32 variable + * @see U16_GET_UNSAFE + * @stable ICU 2.4 + */ +#define U16_GET(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \ + (c)=(s)[i]; \ + if(U16_IS_SURROGATE(c)) { \ + uint16_t __c2; \ + if(U16_IS_SURROGATE_LEAD(c)) { \ + if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \ + (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ + } \ + } else { \ + if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ + (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ + } \ + } \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Get a code point from a string at a random-access offset, + * without changing the offset. + * "Safe" macro, handles unpaired surrogates and checks for string boundaries. + * + * The offset may point to either the lead or trail surrogate unit + * for a supplementary code point, in which case the macro will read + * the adjacent matching surrogate as well. + * + * The length can be negative for a NUL-terminated string. + * + * If the offset points to a single, unpaired surrogate, then + * c is set to U+FFFD. + * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT_OR_FFFD. + * + * @param s const UChar * string + * @param start starting string offset (usually 0) + * @param i string offset, must be start<=i<length + * @param length string length + * @param c output UChar32 variable + * @see U16_GET_UNSAFE + * @stable ICU 60 + */ +#define U16_GET_OR_FFFD(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \ + (c)=(s)[i]; \ + if(U16_IS_SURROGATE(c)) { \ + uint16_t __c2; \ + if(U16_IS_SURROGATE_LEAD(c)) { \ + if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \ + (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ + } else { \ + (c)=0xfffd; \ + } \ + } else { \ + if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ + (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ + } else { \ + (c)=0xfffd; \ + } \ + } \ + } \ +} UPRV_BLOCK_MACRO_END + +/* definitions with forward iteration --------------------------------------- */ + +/** + * Get a code point from a string at a code point boundary offset, + * and advance the offset to the next code point boundary. + * (Post-incrementing forward iteration.) + * "Unsafe" macro, assumes well-formed UTF-16. + * + * The offset may point to the lead surrogate unit + * for a supplementary code point, in which case the macro will read + * the following trail surrogate as well. + * If the offset points to a trail surrogate, then that itself + * will be returned as the code point. + * The result is undefined if the offset points to a single, unpaired lead surrogate. + * + * @param s const UChar * string + * @param i string offset + * @param c output UChar32 variable + * @see U16_NEXT + * @stable ICU 2.4 + */ +#define U16_NEXT_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ + (c)=(s)[(i)++]; \ + if(U16_IS_LEAD(c)) { \ + (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)++]); \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Get a code point from a string at a code point boundary offset, + * and advance the offset to the next code point boundary. + * (Post-incrementing forward iteration.) + * "Safe" macro, handles unpaired surrogates and checks for string boundaries. + * + * The length can be negative for a NUL-terminated string. + * + * The offset may point to the lead surrogate unit + * for a supplementary code point, in which case the macro will read + * the following trail surrogate as well. + * If the offset points to a trail surrogate or + * to a single, unpaired lead surrogate, then c is set to that unpaired surrogate. + * + * @param s const UChar * string + * @param i string offset, must be i<length + * @param length string length + * @param c output UChar32 variable + * @see U16_NEXT_UNSAFE + * @stable ICU 2.4 + */ +#define U16_NEXT(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \ + (c)=(s)[(i)++]; \ + if(U16_IS_LEAD(c)) { \ + uint16_t __c2; \ + if((i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \ + ++(i); \ + (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ + } \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Get a code point from a string at a code point boundary offset, + * and advance the offset to the next code point boundary. + * (Post-incrementing forward iteration.) + * "Safe" macro, handles unpaired surrogates and checks for string boundaries. + * + * The length can be negative for a NUL-terminated string. + * + * The offset may point to the lead surrogate unit + * for a supplementary code point, in which case the macro will read + * the following trail surrogate as well. + * If the offset points to a trail surrogate or + * to a single, unpaired lead surrogate, then c is set to U+FFFD. + * + * @param s const UChar * string + * @param i string offset, must be i<length + * @param length string length + * @param c output UChar32 variable + * @see U16_NEXT_UNSAFE + * @stable ICU 60 + */ +#define U16_NEXT_OR_FFFD(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \ + (c)=(s)[(i)++]; \ + if(U16_IS_SURROGATE(c)) { \ + uint16_t __c2; \ + if(U16_IS_SURROGATE_LEAD(c) && (i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \ + ++(i); \ + (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ + } else { \ + (c)=0xfffd; \ + } \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Append a code point to a string, overwriting 1 or 2 code units. + * The offset points to the current end of the string contents + * and is advanced (post-increment). + * "Unsafe" macro, assumes a valid code point and sufficient space in the string. + * Otherwise, the result is undefined. + * + * @param s const UChar * string buffer + * @param i string offset + * @param c code point to append + * @see U16_APPEND + * @stable ICU 2.4 + */ +#define U16_APPEND_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ + if((uint32_t)(c)<=0xffff) { \ + (s)[(i)++]=(uint16_t)(c); \ + } else { \ + (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \ + (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Append a code point to a string, overwriting 1 or 2 code units. + * The offset points to the current end of the string contents + * and is advanced (post-increment). + * "Safe" macro, checks for a valid code point. + * If a surrogate pair is written, checks for sufficient space in the string. + * If the code point is not valid or a trail surrogate does not fit, + * then isError is set to true. + * + * @param s const UChar * string buffer + * @param i string offset, must be i<capacity + * @param capacity size of the string buffer + * @param c code point to append + * @param isError output UBool set to true if an error occurs, otherwise not modified + * @see U16_APPEND_UNSAFE + * @stable ICU 2.4 + */ +#define U16_APPEND(s, i, capacity, c, isError) UPRV_BLOCK_MACRO_BEGIN { \ + if((uint32_t)(c)<=0xffff) { \ + (s)[(i)++]=(uint16_t)(c); \ + } else if((uint32_t)(c)<=0x10ffff && (i)+1<(capacity)) { \ + (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \ + (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ + } else /* c>0x10ffff or not enough space */ { \ + (isError)=true; \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Advance the string offset from one code point boundary to the next. + * (Post-incrementing iteration.) + * "Unsafe" macro, assumes well-formed UTF-16. + * + * @param s const UChar * string + * @param i string offset + * @see U16_FWD_1 + * @stable ICU 2.4 + */ +#define U16_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ + if(U16_IS_LEAD((s)[(i)++])) { \ + ++(i); \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Advance the string offset from one code point boundary to the next. + * (Post-incrementing iteration.) + * "Safe" macro, handles unpaired surrogates and checks for string boundaries. + * + * The length can be negative for a NUL-terminated string. + * + * @param s const UChar * string + * @param i string offset, must be i<length + * @param length string length + * @see U16_FWD_1_UNSAFE + * @stable ICU 2.4 + */ +#define U16_FWD_1(s, i, length) UPRV_BLOCK_MACRO_BEGIN { \ + if(U16_IS_LEAD((s)[(i)++]) && (i)!=(length) && U16_IS_TRAIL((s)[i])) { \ + ++(i); \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Advance the string offset from one code point boundary to the n-th next one, + * i.e., move forward by n code points. + * (Post-incrementing iteration.) + * "Unsafe" macro, assumes well-formed UTF-16. + * + * @param s const UChar * string + * @param i string offset + * @param n number of code points to skip + * @see U16_FWD_N + * @stable ICU 2.4 + */ +#define U16_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \ + int32_t __N=(n); \ + while(__N>0) { \ + U16_FWD_1_UNSAFE(s, i); \ + --__N; \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Advance the string offset from one code point boundary to the n-th next one, + * i.e., move forward by n code points. + * (Post-incrementing iteration.) + * "Safe" macro, handles unpaired surrogates and checks for string boundaries. + * + * The length can be negative for a NUL-terminated string. + * + * @param s const UChar * string + * @param i int32_t string offset, must be i<length + * @param length int32_t string length + * @param n number of code points to skip + * @see U16_FWD_N_UNSAFE + * @stable ICU 2.4 + */ +#define U16_FWD_N(s, i, length, n) UPRV_BLOCK_MACRO_BEGIN { \ + int32_t __N=(n); \ + while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \ + U16_FWD_1(s, i, length); \ + --__N; \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Adjust a random-access offset to a code point boundary + * at the start of a code point. + * If the offset points to the trail surrogate of a surrogate pair, + * then the offset is decremented. + * Otherwise, it is not modified. + * "Unsafe" macro, assumes well-formed UTF-16. + * + * @param s const UChar * string + * @param i string offset + * @see U16_SET_CP_START + * @stable ICU 2.4 + */ +#define U16_SET_CP_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ + if(U16_IS_TRAIL((s)[i])) { \ + --(i); \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Adjust a random-access offset to a code point boundary + * at the start of a code point. + * If the offset points to the trail surrogate of a surrogate pair, + * then the offset is decremented. + * Otherwise, it is not modified. + * "Safe" macro, handles unpaired surrogates and checks for string boundaries. + * + * @param s const UChar * string + * @param start starting string offset (usually 0) + * @param i string offset, must be start<=i + * @see U16_SET_CP_START_UNSAFE + * @stable ICU 2.4 + */ +#define U16_SET_CP_START(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \ + if(U16_IS_TRAIL((s)[i]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \ + --(i); \ + } \ +} UPRV_BLOCK_MACRO_END + +/* definitions with backward iteration -------------------------------------- */ + +/** + * Move the string offset from one code point boundary to the previous one + * and get the code point between them. + * (Pre-decrementing backward iteration.) + * "Unsafe" macro, assumes well-formed UTF-16. + * + * The input offset may be the same as the string length. + * If the offset is behind a trail surrogate unit + * for a supplementary code point, then the macro will read + * the preceding lead surrogate as well. + * If the offset is behind a lead surrogate, then that itself + * will be returned as the code point. + * The result is undefined if the offset is behind a single, unpaired trail surrogate. + * + * @param s const UChar * string + * @param i string offset + * @param c output UChar32 variable + * @see U16_PREV + * @stable ICU 2.4 + */ +#define U16_PREV_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ + (c)=(s)[--(i)]; \ + if(U16_IS_TRAIL(c)) { \ + (c)=U16_GET_SUPPLEMENTARY((s)[--(i)], (c)); \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Move the string offset from one code point boundary to the previous one + * and get the code point between them. + * (Pre-decrementing backward iteration.) + * "Safe" macro, handles unpaired surrogates and checks for string boundaries. + * + * The input offset may be the same as the string length. + * If the offset is behind a trail surrogate unit + * for a supplementary code point, then the macro will read + * the preceding lead surrogate as well. + * If the offset is behind a lead surrogate or behind a single, unpaired + * trail surrogate, then c is set to that unpaired surrogate. + * + * @param s const UChar * string + * @param start starting string offset (usually 0) + * @param i string offset, must be start<i + * @param c output UChar32 variable + * @see U16_PREV_UNSAFE + * @stable ICU 2.4 + */ +#define U16_PREV(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \ + (c)=(s)[--(i)]; \ + if(U16_IS_TRAIL(c)) { \ + uint16_t __c2; \ + if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ + --(i); \ + (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ + } \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Move the string offset from one code point boundary to the previous one + * and get the code point between them. + * (Pre-decrementing backward iteration.) + * "Safe" macro, handles unpaired surrogates and checks for string boundaries. + * + * The input offset may be the same as the string length. + * If the offset is behind a trail surrogate unit + * for a supplementary code point, then the macro will read + * the preceding lead surrogate as well. + * If the offset is behind a lead surrogate or behind a single, unpaired + * trail surrogate, then c is set to U+FFFD. + * + * @param s const UChar * string + * @param start starting string offset (usually 0) + * @param i string offset, must be start<i + * @param c output UChar32 variable + * @see U16_PREV_UNSAFE + * @stable ICU 60 + */ +#define U16_PREV_OR_FFFD(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \ + (c)=(s)[--(i)]; \ + if(U16_IS_SURROGATE(c)) { \ + uint16_t __c2; \ + if(U16_IS_SURROGATE_TRAIL(c) && (i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ + --(i); \ + (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ + } else { \ + (c)=0xfffd; \ + } \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Move the string offset from one code point boundary to the previous one. + * (Pre-decrementing backward iteration.) + * The input offset may be the same as the string length. + * "Unsafe" macro, assumes well-formed UTF-16. + * + * @param s const UChar * string + * @param i string offset + * @see U16_BACK_1 + * @stable ICU 2.4 + */ +#define U16_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ + if(U16_IS_TRAIL((s)[--(i)])) { \ + --(i); \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Move the string offset from one code point boundary to the previous one. + * (Pre-decrementing backward iteration.) + * The input offset may be the same as the string length. + * "Safe" macro, handles unpaired surrogates and checks for string boundaries. + * + * @param s const UChar * string + * @param start starting string offset (usually 0) + * @param i string offset, must be start<i + * @see U16_BACK_1_UNSAFE + * @stable ICU 2.4 + */ +#define U16_BACK_1(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \ + if(U16_IS_TRAIL((s)[--(i)]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \ + --(i); \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Move the string offset from one code point boundary to the n-th one before it, + * i.e., move backward by n code points. + * (Pre-decrementing backward iteration.) + * The input offset may be the same as the string length. + * "Unsafe" macro, assumes well-formed UTF-16. + * + * @param s const UChar * string + * @param i string offset + * @param n number of code points to skip + * @see U16_BACK_N + * @stable ICU 2.4 + */ +#define U16_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \ + int32_t __N=(n); \ + while(__N>0) { \ + U16_BACK_1_UNSAFE(s, i); \ + --__N; \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Move the string offset from one code point boundary to the n-th one before it, + * i.e., move backward by n code points. + * (Pre-decrementing backward iteration.) + * The input offset may be the same as the string length. + * "Safe" macro, handles unpaired surrogates and checks for string boundaries. + * + * @param s const UChar * string + * @param start start of string + * @param i string offset, must be start<i + * @param n number of code points to skip + * @see U16_BACK_N_UNSAFE + * @stable ICU 2.4 + */ +#define U16_BACK_N(s, start, i, n) UPRV_BLOCK_MACRO_BEGIN { \ + int32_t __N=(n); \ + while(__N>0 && (i)>(start)) { \ + U16_BACK_1(s, start, i); \ + --__N; \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Adjust a random-access offset to a code point boundary after a code point. + * If the offset is behind the lead surrogate of a surrogate pair, + * then the offset is incremented. + * Otherwise, it is not modified. + * The input offset may be the same as the string length. + * "Unsafe" macro, assumes well-formed UTF-16. + * + * @param s const UChar * string + * @param i string offset + * @see U16_SET_CP_LIMIT + * @stable ICU 2.4 + */ +#define U16_SET_CP_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ + if(U16_IS_LEAD((s)[(i)-1])) { \ + ++(i); \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Adjust a random-access offset to a code point boundary after a code point. + * If the offset is behind the lead surrogate of a surrogate pair, + * then the offset is incremented. + * Otherwise, it is not modified. + * The input offset may be the same as the string length. + * "Safe" macro, handles unpaired surrogates and checks for string boundaries. + * + * The length can be negative for a NUL-terminated string. + * + * @param s const UChar * string + * @param start int32_t starting string offset (usually 0) + * @param i int32_t string offset, start<=i<=length + * @param length int32_t string length + * @see U16_SET_CP_LIMIT_UNSAFE + * @stable ICU 2.4 + */ +#define U16_SET_CP_LIMIT(s, start, i, length) UPRV_BLOCK_MACRO_BEGIN { \ + if((start)<(i) && ((i)<(length) || (length)<0) && U16_IS_LEAD((s)[(i)-1]) && U16_IS_TRAIL((s)[i])) { \ + ++(i); \ + } \ +} UPRV_BLOCK_MACRO_END + +#endif diff --git a/thirdparty/icu4c/common/unicode/utf32.h b/thirdparty/icu4c/common/unicode/utf32.h new file mode 100644 index 0000000000..8822c4dd09 --- /dev/null +++ b/thirdparty/icu4c/common/unicode/utf32.h @@ -0,0 +1,25 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 1999-2001, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: utf32.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 1999sep20 +* created by: Markus W. Scherer +*/ +/** + * \file + * \brief C API: UTF-32 macros + * + * This file is obsolete and its contents moved to utf_old.h. + * See utf_old.h and Jitterbug 2150 and its discussion on the ICU mailing list + * in September 2002. + */ diff --git a/thirdparty/icu4c/common/unicode/utf8.h b/thirdparty/icu4c/common/unicode/utf8.h new file mode 100644 index 0000000000..5a07435fcf --- /dev/null +++ b/thirdparty/icu4c/common/unicode/utf8.h @@ -0,0 +1,882 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 1999-2015, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: utf8.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 1999sep13 +* created by: Markus W. Scherer +*/ + +/** + * \file + * \brief C API: 8-bit Unicode handling macros + * + * This file defines macros to deal with 8-bit Unicode (UTF-8) code units (bytes) and strings. + * + * For more information see utf.h and the ICU User Guide Strings chapter + * (https://unicode-org.github.io/icu/userguide/strings). + * + * <em>Usage:</em> + * ICU coding guidelines for if() statements should be followed when using these macros. + * Compound statements (curly braces {}) must be used for if-else-while... + * bodies and all macro statements should be terminated with semicolon. + */ + +#ifndef __UTF8_H__ +#define __UTF8_H__ + +#include <stdbool.h> +#include "unicode/umachine.h" +#ifndef __UTF_H__ +# include "unicode/utf.h" +#endif + +/* internal definitions ----------------------------------------------------- */ + +/** + * Counts the trail bytes for a UTF-8 lead byte. + * Returns 0 for 0..0xc1 as well as for 0xf5..0xff. + * leadByte might be evaluated multiple times. + * + * This is internal since it is not meant to be called directly by external clients; + * however it is called by public macros in this file and thus must remain stable. + * + * @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff. + * @internal + */ +#define U8_COUNT_TRAIL_BYTES(leadByte) \ + (U8_IS_LEAD(leadByte) ? \ + ((uint8_t)(leadByte)>=0xe0)+((uint8_t)(leadByte)>=0xf0)+1 : 0) + +/** + * Counts the trail bytes for a UTF-8 lead byte of a valid UTF-8 sequence. + * Returns 0 for 0..0xc1. Undefined for 0xf5..0xff. + * leadByte might be evaluated multiple times. + * + * This is internal since it is not meant to be called directly by external clients; + * however it is called by public macros in this file and thus must remain stable. + * + * @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff. + * @internal + */ +#define U8_COUNT_TRAIL_BYTES_UNSAFE(leadByte) \ + (((uint8_t)(leadByte)>=0xc2)+((uint8_t)(leadByte)>=0xe0)+((uint8_t)(leadByte)>=0xf0)) + +/** + * Mask a UTF-8 lead byte, leave only the lower bits that form part of the code point value. + * + * This is internal since it is not meant to be called directly by external clients; + * however it is called by public macros in this file and thus must remain stable. + * @internal + */ +#define U8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1) + +/** + * Internal bit vector for 3-byte UTF-8 validity check, for use in U8_IS_VALID_LEAD3_AND_T1. + * Each bit indicates whether one lead byte + first trail byte pair starts a valid sequence. + * Lead byte E0..EF bits 3..0 are used as byte index, + * first trail byte bits 7..5 are used as bit index into that byte. + * @see U8_IS_VALID_LEAD3_AND_T1 + * @internal + */ +#define U8_LEAD3_T1_BITS "\x20\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x10\x30\x30" + +/** + * Internal 3-byte UTF-8 validity check. + * Non-zero if lead byte E0..EF and first trail byte 00..FF start a valid sequence. + * @internal + */ +#define U8_IS_VALID_LEAD3_AND_T1(lead, t1) (U8_LEAD3_T1_BITS[(lead)&0xf]&(1<<((uint8_t)(t1)>>5))) + +/** + * Internal bit vector for 4-byte UTF-8 validity check, for use in U8_IS_VALID_LEAD4_AND_T1. + * Each bit indicates whether one lead byte + first trail byte pair starts a valid sequence. + * First trail byte bits 7..4 are used as byte index, + * lead byte F0..F4 bits 2..0 are used as bit index into that byte. + * @see U8_IS_VALID_LEAD4_AND_T1 + * @internal + */ +#define U8_LEAD4_T1_BITS "\x00\x00\x00\x00\x00\x00\x00\x00\x1E\x0F\x0F\x0F\x00\x00\x00\x00" + +/** + * Internal 4-byte UTF-8 validity check. + * Non-zero if lead byte F0..F4 and first trail byte 00..FF start a valid sequence. + * @internal + */ +#define U8_IS_VALID_LEAD4_AND_T1(lead, t1) (U8_LEAD4_T1_BITS[(uint8_t)(t1)>>4]&(1<<((lead)&7))) + +/** + * Function for handling "next code point" with error-checking. + * + * This is internal since it is not meant to be called directly by external clients; + * however it is called by public macros in this + * file and thus must remain stable, and should not be hidden when other internal + * functions are hidden (otherwise public macros would fail to compile). + * @internal + */ +U_CAPI UChar32 U_EXPORT2 +utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, UBool strict); + +/** + * Function for handling "append code point" with error-checking. + * + * This is internal since it is not meant to be called directly by external clients; + * however it is called by public macros in this + * file and thus must remain stable, and should not be hidden when other internal + * functions are hidden (otherwise public macros would fail to compile). + * @internal + */ +U_CAPI int32_t U_EXPORT2 +utf8_appendCharSafeBody(uint8_t *s, int32_t i, int32_t length, UChar32 c, UBool *pIsError); + +/** + * Function for handling "previous code point" with error-checking. + * + * This is internal since it is not meant to be called directly by external clients; + * however it is called by public macros in this + * file and thus must remain stable, and should not be hidden when other internal + * functions are hidden (otherwise public macros would fail to compile). + * @internal + */ +U_CAPI UChar32 U_EXPORT2 +utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, UBool strict); + +/** + * Function for handling "skip backward one code point" with error-checking. + * + * This is internal since it is not meant to be called directly by external clients; + * however it is called by public macros in this + * file and thus must remain stable, and should not be hidden when other internal + * functions are hidden (otherwise public macros would fail to compile). + * @internal + */ +U_CAPI int32_t U_EXPORT2 +utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i); + +/* single-code point definitions -------------------------------------------- */ + +/** + * Does this code unit (byte) encode a code point by itself (US-ASCII 0..0x7f)? + * @param c 8-bit code unit (byte) + * @return true or false + * @stable ICU 2.4 + */ +#define U8_IS_SINGLE(c) (((c)&0x80)==0) + +/** + * Is this code unit (byte) a UTF-8 lead byte? (0xC2..0xF4) + * @param c 8-bit code unit (byte) + * @return true or false + * @stable ICU 2.4 + */ +#define U8_IS_LEAD(c) ((uint8_t)((c)-0xc2)<=0x32) +// 0x32=0xf4-0xc2 + +/** + * Is this code unit (byte) a UTF-8 trail byte? (0x80..0xBF) + * @param c 8-bit code unit (byte) + * @return true or false + * @stable ICU 2.4 + */ +#define U8_IS_TRAIL(c) ((int8_t)(c)<-0x40) + +/** + * How many code units (bytes) are used for the UTF-8 encoding + * of this Unicode code point? + * @param c 32-bit code point + * @return 1..4, or 0 if c is a surrogate or not a Unicode code point + * @stable ICU 2.4 + */ +#define U8_LENGTH(c) \ + ((uint32_t)(c)<=0x7f ? 1 : \ + ((uint32_t)(c)<=0x7ff ? 2 : \ + ((uint32_t)(c)<=0xd7ff ? 3 : \ + ((uint32_t)(c)<=0xdfff || (uint32_t)(c)>0x10ffff ? 0 : \ + ((uint32_t)(c)<=0xffff ? 3 : 4)\ + ) \ + ) \ + ) \ + ) + +/** + * The maximum number of UTF-8 code units (bytes) per Unicode code point (U+0000..U+10ffff). + * @return 4 + * @stable ICU 2.4 + */ +#define U8_MAX_LENGTH 4 + +/** + * Get a code point from a string at a random-access offset, + * without changing the offset. + * The offset may point to either the lead byte or one of the trail bytes + * for a code point, in which case the macro will read all of the bytes + * for the code point. + * The result is undefined if the offset points to an illegal UTF-8 + * byte sequence. + * Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT. + * + * @param s const uint8_t * string + * @param i string offset + * @param c output UChar32 variable + * @see U8_GET + * @stable ICU 2.4 + */ +#define U8_GET_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ + int32_t _u8_get_unsafe_index=(int32_t)(i); \ + U8_SET_CP_START_UNSAFE(s, _u8_get_unsafe_index); \ + U8_NEXT_UNSAFE(s, _u8_get_unsafe_index, c); \ +} UPRV_BLOCK_MACRO_END + +/** + * Get a code point from a string at a random-access offset, + * without changing the offset. + * The offset may point to either the lead byte or one of the trail bytes + * for a code point, in which case the macro will read all of the bytes + * for the code point. + * + * The length can be negative for a NUL-terminated string. + * + * If the offset points to an illegal UTF-8 byte sequence, then + * c is set to a negative value. + * Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT. + * + * @param s const uint8_t * string + * @param start int32_t starting string offset + * @param i int32_t string offset, must be start<=i<length + * @param length int32_t string length + * @param c output UChar32 variable, set to <0 in case of an error + * @see U8_GET_UNSAFE + * @stable ICU 2.4 + */ +#define U8_GET(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \ + int32_t _u8_get_index=(i); \ + U8_SET_CP_START(s, start, _u8_get_index); \ + U8_NEXT(s, _u8_get_index, length, c); \ +} UPRV_BLOCK_MACRO_END + +/** + * Get a code point from a string at a random-access offset, + * without changing the offset. + * The offset may point to either the lead byte or one of the trail bytes + * for a code point, in which case the macro will read all of the bytes + * for the code point. + * + * The length can be negative for a NUL-terminated string. + * + * If the offset points to an illegal UTF-8 byte sequence, then + * c is set to U+FFFD. + * Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT_OR_FFFD. + * + * This macro does not distinguish between a real U+FFFD in the text + * and U+FFFD returned for an ill-formed sequence. + * Use U8_GET() if that distinction is important. + * + * @param s const uint8_t * string + * @param start int32_t starting string offset + * @param i int32_t string offset, must be start<=i<length + * @param length int32_t string length + * @param c output UChar32 variable, set to U+FFFD in case of an error + * @see U8_GET + * @stable ICU 51 + */ +#define U8_GET_OR_FFFD(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \ + int32_t _u8_get_index=(i); \ + U8_SET_CP_START(s, start, _u8_get_index); \ + U8_NEXT_OR_FFFD(s, _u8_get_index, length, c); \ +} UPRV_BLOCK_MACRO_END + +/* definitions with forward iteration --------------------------------------- */ + +/** + * Get a code point from a string at a code point boundary offset, + * and advance the offset to the next code point boundary. + * (Post-incrementing forward iteration.) + * "Unsafe" macro, assumes well-formed UTF-8. + * + * The offset may point to the lead byte of a multi-byte sequence, + * in which case the macro will read the whole sequence. + * The result is undefined if the offset points to a trail byte + * or an illegal UTF-8 sequence. + * + * @param s const uint8_t * string + * @param i string offset + * @param c output UChar32 variable + * @see U8_NEXT + * @stable ICU 2.4 + */ +#define U8_NEXT_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ + (c)=(uint8_t)(s)[(i)++]; \ + if(!U8_IS_SINGLE(c)) { \ + if((c)<0xe0) { \ + (c)=(((c)&0x1f)<<6)|((s)[(i)++]&0x3f); \ + } else if((c)<0xf0) { \ + /* no need for (c&0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ \ + (c)=(UChar)(((c)<<12)|(((s)[i]&0x3f)<<6)|((s)[(i)+1]&0x3f)); \ + (i)+=2; \ + } else { \ + (c)=(((c)&7)<<18)|(((s)[i]&0x3f)<<12)|(((s)[(i)+1]&0x3f)<<6)|((s)[(i)+2]&0x3f); \ + (i)+=3; \ + } \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Get a code point from a string at a code point boundary offset, + * and advance the offset to the next code point boundary. + * (Post-incrementing forward iteration.) + * "Safe" macro, checks for illegal sequences and for string boundaries. + * + * The length can be negative for a NUL-terminated string. + * + * The offset may point to the lead byte of a multi-byte sequence, + * in which case the macro will read the whole sequence. + * If the offset points to a trail byte or an illegal UTF-8 sequence, then + * c is set to a negative value. + * + * @param s const uint8_t * string + * @param i int32_t string offset, must be i<length + * @param length int32_t string length + * @param c output UChar32 variable, set to <0 in case of an error + * @see U8_NEXT_UNSAFE + * @stable ICU 2.4 + */ +#define U8_NEXT(s, i, length, c) U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, U_SENTINEL) + +/** + * Get a code point from a string at a code point boundary offset, + * and advance the offset to the next code point boundary. + * (Post-incrementing forward iteration.) + * "Safe" macro, checks for illegal sequences and for string boundaries. + * + * The length can be negative for a NUL-terminated string. + * + * The offset may point to the lead byte of a multi-byte sequence, + * in which case the macro will read the whole sequence. + * If the offset points to a trail byte or an illegal UTF-8 sequence, then + * c is set to U+FFFD. + * + * This macro does not distinguish between a real U+FFFD in the text + * and U+FFFD returned for an ill-formed sequence. + * Use U8_NEXT() if that distinction is important. + * + * @param s const uint8_t * string + * @param i int32_t string offset, must be i<length + * @param length int32_t string length + * @param c output UChar32 variable, set to U+FFFD in case of an error + * @see U8_NEXT + * @stable ICU 51 + */ +#define U8_NEXT_OR_FFFD(s, i, length, c) U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, 0xfffd) + +/** @internal */ +#define U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, sub) UPRV_BLOCK_MACRO_BEGIN { \ + (c)=(uint8_t)(s)[(i)++]; \ + if(!U8_IS_SINGLE(c)) { \ + uint8_t __t = 0; \ + if((i)!=(length) && \ + /* fetch/validate/assemble all but last trail byte */ \ + ((c)>=0xe0 ? \ + ((c)<0xf0 ? /* U+0800..U+FFFF except surrogates */ \ + U8_LEAD3_T1_BITS[(c)&=0xf]&(1<<((__t=(s)[i])>>5)) && \ + (__t&=0x3f, 1) \ + : /* U+10000..U+10FFFF */ \ + ((c)-=0xf0)<=4 && \ + U8_LEAD4_T1_BITS[(__t=(s)[i])>>4]&(1<<(c)) && \ + ((c)=((c)<<6)|(__t&0x3f), ++(i)!=(length)) && \ + (__t=(s)[i]-0x80)<=0x3f) && \ + /* valid second-to-last trail byte */ \ + ((c)=((c)<<6)|__t, ++(i)!=(length)) \ + : /* U+0080..U+07FF */ \ + (c)>=0xc2 && ((c)&=0x1f, 1)) && \ + /* last trail byte */ \ + (__t=(s)[i]-0x80)<=0x3f && \ + ((c)=((c)<<6)|__t, ++(i), 1)) { \ + } else { \ + (c)=(sub); /* ill-formed*/ \ + } \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Append a code point to a string, overwriting 1 to 4 bytes. + * The offset points to the current end of the string contents + * and is advanced (post-increment). + * "Unsafe" macro, assumes a valid code point and sufficient space in the string. + * Otherwise, the result is undefined. + * + * @param s const uint8_t * string buffer + * @param i string offset + * @param c code point to append + * @see U8_APPEND + * @stable ICU 2.4 + */ +#define U8_APPEND_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ + uint32_t __uc=(c); \ + if(__uc<=0x7f) { \ + (s)[(i)++]=(uint8_t)__uc; \ + } else { \ + if(__uc<=0x7ff) { \ + (s)[(i)++]=(uint8_t)((__uc>>6)|0xc0); \ + } else { \ + if(__uc<=0xffff) { \ + (s)[(i)++]=(uint8_t)((__uc>>12)|0xe0); \ + } else { \ + (s)[(i)++]=(uint8_t)((__uc>>18)|0xf0); \ + (s)[(i)++]=(uint8_t)(((__uc>>12)&0x3f)|0x80); \ + } \ + (s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \ + } \ + (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Append a code point to a string, overwriting 1 to 4 bytes. + * The offset points to the current end of the string contents + * and is advanced (post-increment). + * "Safe" macro, checks for a valid code point. + * If a non-ASCII code point is written, checks for sufficient space in the string. + * If the code point is not valid or trail bytes do not fit, + * then isError is set to true. + * + * @param s const uint8_t * string buffer + * @param i int32_t string offset, must be i<capacity + * @param capacity int32_t size of the string buffer + * @param c UChar32 code point to append + * @param isError output UBool set to true if an error occurs, otherwise not modified + * @see U8_APPEND_UNSAFE + * @stable ICU 2.4 + */ +#define U8_APPEND(s, i, capacity, c, isError) UPRV_BLOCK_MACRO_BEGIN { \ + uint32_t __uc=(c); \ + if(__uc<=0x7f) { \ + (s)[(i)++]=(uint8_t)__uc; \ + } else if(__uc<=0x7ff && (i)+1<(capacity)) { \ + (s)[(i)++]=(uint8_t)((__uc>>6)|0xc0); \ + (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \ + } else if((__uc<=0xd7ff || (0xe000<=__uc && __uc<=0xffff)) && (i)+2<(capacity)) { \ + (s)[(i)++]=(uint8_t)((__uc>>12)|0xe0); \ + (s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \ + (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \ + } else if(0xffff<__uc && __uc<=0x10ffff && (i)+3<(capacity)) { \ + (s)[(i)++]=(uint8_t)((__uc>>18)|0xf0); \ + (s)[(i)++]=(uint8_t)(((__uc>>12)&0x3f)|0x80); \ + (s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \ + (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \ + } else { \ + (isError)=true; \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Advance the string offset from one code point boundary to the next. + * (Post-incrementing iteration.) + * "Unsafe" macro, assumes well-formed UTF-8. + * + * @param s const uint8_t * string + * @param i string offset + * @see U8_FWD_1 + * @stable ICU 2.4 + */ +#define U8_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ + (i)+=1+U8_COUNT_TRAIL_BYTES_UNSAFE((s)[i]); \ +} UPRV_BLOCK_MACRO_END + +/** + * Advance the string offset from one code point boundary to the next. + * (Post-incrementing iteration.) + * "Safe" macro, checks for illegal sequences and for string boundaries. + * + * The length can be negative for a NUL-terminated string. + * + * @param s const uint8_t * string + * @param i int32_t string offset, must be i<length + * @param length int32_t string length + * @see U8_FWD_1_UNSAFE + * @stable ICU 2.4 + */ +#define U8_FWD_1(s, i, length) UPRV_BLOCK_MACRO_BEGIN { \ + uint8_t __b=(s)[(i)++]; \ + if(U8_IS_LEAD(__b) && (i)!=(length)) { \ + uint8_t __t1=(s)[i]; \ + if((0xe0<=__b && __b<0xf0)) { \ + if(U8_IS_VALID_LEAD3_AND_T1(__b, __t1) && \ + ++(i)!=(length) && U8_IS_TRAIL((s)[i])) { \ + ++(i); \ + } \ + } else if(__b<0xe0) { \ + if(U8_IS_TRAIL(__t1)) { \ + ++(i); \ + } \ + } else /* c>=0xf0 */ { \ + if(U8_IS_VALID_LEAD4_AND_T1(__b, __t1) && \ + ++(i)!=(length) && U8_IS_TRAIL((s)[i]) && \ + ++(i)!=(length) && U8_IS_TRAIL((s)[i])) { \ + ++(i); \ + } \ + } \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Advance the string offset from one code point boundary to the n-th next one, + * i.e., move forward by n code points. + * (Post-incrementing iteration.) + * "Unsafe" macro, assumes well-formed UTF-8. + * + * @param s const uint8_t * string + * @param i string offset + * @param n number of code points to skip + * @see U8_FWD_N + * @stable ICU 2.4 + */ +#define U8_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \ + int32_t __N=(n); \ + while(__N>0) { \ + U8_FWD_1_UNSAFE(s, i); \ + --__N; \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Advance the string offset from one code point boundary to the n-th next one, + * i.e., move forward by n code points. + * (Post-incrementing iteration.) + * "Safe" macro, checks for illegal sequences and for string boundaries. + * + * The length can be negative for a NUL-terminated string. + * + * @param s const uint8_t * string + * @param i int32_t string offset, must be i<length + * @param length int32_t string length + * @param n number of code points to skip + * @see U8_FWD_N_UNSAFE + * @stable ICU 2.4 + */ +#define U8_FWD_N(s, i, length, n) UPRV_BLOCK_MACRO_BEGIN { \ + int32_t __N=(n); \ + while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \ + U8_FWD_1(s, i, length); \ + --__N; \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Adjust a random-access offset to a code point boundary + * at the start of a code point. + * If the offset points to a UTF-8 trail byte, + * then the offset is moved backward to the corresponding lead byte. + * Otherwise, it is not modified. + * "Unsafe" macro, assumes well-formed UTF-8. + * + * @param s const uint8_t * string + * @param i string offset + * @see U8_SET_CP_START + * @stable ICU 2.4 + */ +#define U8_SET_CP_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ + while(U8_IS_TRAIL((s)[i])) { --(i); } \ +} UPRV_BLOCK_MACRO_END + +/** + * Adjust a random-access offset to a code point boundary + * at the start of a code point. + * If the offset points to a UTF-8 trail byte, + * then the offset is moved backward to the corresponding lead byte. + * Otherwise, it is not modified. + * + * "Safe" macro, checks for illegal sequences and for string boundaries. + * Unlike U8_TRUNCATE_IF_INCOMPLETE(), this macro always reads s[i]. + * + * @param s const uint8_t * string + * @param start int32_t starting string offset (usually 0) + * @param i int32_t string offset, must be start<=i + * @see U8_SET_CP_START_UNSAFE + * @see U8_TRUNCATE_IF_INCOMPLETE + * @stable ICU 2.4 + */ +#define U8_SET_CP_START(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \ + if(U8_IS_TRAIL((s)[(i)])) { \ + (i)=utf8_back1SafeBody(s, start, (i)); \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * If the string ends with a UTF-8 byte sequence that is valid so far + * but incomplete, then reduce the length of the string to end before + * the lead byte of that incomplete sequence. + * For example, if the string ends with E1 80, the length is reduced by 2. + * + * In all other cases (the string ends with a complete sequence, or it is not + * possible for any further trail byte to extend the trailing sequence) + * the length remains unchanged. + * + * Useful for processing text split across multiple buffers + * (save the incomplete sequence for later) + * and for optimizing iteration + * (check for string length only once per character). + * + * "Safe" macro, checks for illegal sequences and for string boundaries. + * Unlike U8_SET_CP_START(), this macro never reads s[length]. + * + * (In UTF-16, simply check for U16_IS_LEAD(last code unit).) + * + * @param s const uint8_t * string + * @param start int32_t starting string offset (usually 0) + * @param length int32_t string length (usually start<=length) + * @see U8_SET_CP_START + * @stable ICU 61 + */ +#define U8_TRUNCATE_IF_INCOMPLETE(s, start, length) UPRV_BLOCK_MACRO_BEGIN { \ + if((length)>(start)) { \ + uint8_t __b1=s[(length)-1]; \ + if(U8_IS_SINGLE(__b1)) { \ + /* common ASCII character */ \ + } else if(U8_IS_LEAD(__b1)) { \ + --(length); \ + } else if(U8_IS_TRAIL(__b1) && ((length)-2)>=(start)) { \ + uint8_t __b2=s[(length)-2]; \ + if(0xe0<=__b2 && __b2<=0xf4) { \ + if(__b2<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(__b2, __b1) : \ + U8_IS_VALID_LEAD4_AND_T1(__b2, __b1)) { \ + (length)-=2; \ + } \ + } else if(U8_IS_TRAIL(__b2) && ((length)-3)>=(start)) { \ + uint8_t __b3=s[(length)-3]; \ + if(0xf0<=__b3 && __b3<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(__b3, __b2)) { \ + (length)-=3; \ + } \ + } \ + } \ + } \ +} UPRV_BLOCK_MACRO_END + +/* definitions with backward iteration -------------------------------------- */ + +/** + * Move the string offset from one code point boundary to the previous one + * and get the code point between them. + * (Pre-decrementing backward iteration.) + * "Unsafe" macro, assumes well-formed UTF-8. + * + * The input offset may be the same as the string length. + * If the offset is behind a multi-byte sequence, then the macro will read + * the whole sequence. + * If the offset is behind a lead byte, then that itself + * will be returned as the code point. + * The result is undefined if the offset is behind an illegal UTF-8 sequence. + * + * @param s const uint8_t * string + * @param i string offset + * @param c output UChar32 variable + * @see U8_PREV + * @stable ICU 2.4 + */ +#define U8_PREV_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ + (c)=(uint8_t)(s)[--(i)]; \ + if(U8_IS_TRAIL(c)) { \ + uint8_t __b, __count=1, __shift=6; \ +\ + /* c is a trail byte */ \ + (c)&=0x3f; \ + for(;;) { \ + __b=(s)[--(i)]; \ + if(__b>=0xc0) { \ + U8_MASK_LEAD_BYTE(__b, __count); \ + (c)|=(UChar32)__b<<__shift; \ + break; \ + } else { \ + (c)|=(UChar32)(__b&0x3f)<<__shift; \ + ++__count; \ + __shift+=6; \ + } \ + } \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Move the string offset from one code point boundary to the previous one + * and get the code point between them. + * (Pre-decrementing backward iteration.) + * "Safe" macro, checks for illegal sequences and for string boundaries. + * + * The input offset may be the same as the string length. + * If the offset is behind a multi-byte sequence, then the macro will read + * the whole sequence. + * If the offset is behind a lead byte, then that itself + * will be returned as the code point. + * If the offset is behind an illegal UTF-8 sequence, then c is set to a negative value. + * + * @param s const uint8_t * string + * @param start int32_t starting string offset (usually 0) + * @param i int32_t string offset, must be start<i + * @param c output UChar32 variable, set to <0 in case of an error + * @see U8_PREV_UNSAFE + * @stable ICU 2.4 + */ +#define U8_PREV(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \ + (c)=(uint8_t)(s)[--(i)]; \ + if(!U8_IS_SINGLE(c)) { \ + (c)=utf8_prevCharSafeBody((const uint8_t *)s, start, &(i), c, -1); \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Move the string offset from one code point boundary to the previous one + * and get the code point between them. + * (Pre-decrementing backward iteration.) + * "Safe" macro, checks for illegal sequences and for string boundaries. + * + * The input offset may be the same as the string length. + * If the offset is behind a multi-byte sequence, then the macro will read + * the whole sequence. + * If the offset is behind a lead byte, then that itself + * will be returned as the code point. + * If the offset is behind an illegal UTF-8 sequence, then c is set to U+FFFD. + * + * This macro does not distinguish between a real U+FFFD in the text + * and U+FFFD returned for an ill-formed sequence. + * Use U8_PREV() if that distinction is important. + * + * @param s const uint8_t * string + * @param start int32_t starting string offset (usually 0) + * @param i int32_t string offset, must be start<i + * @param c output UChar32 variable, set to U+FFFD in case of an error + * @see U8_PREV + * @stable ICU 51 + */ +#define U8_PREV_OR_FFFD(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \ + (c)=(uint8_t)(s)[--(i)]; \ + if(!U8_IS_SINGLE(c)) { \ + (c)=utf8_prevCharSafeBody((const uint8_t *)s, start, &(i), c, -3); \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Move the string offset from one code point boundary to the previous one. + * (Pre-decrementing backward iteration.) + * The input offset may be the same as the string length. + * "Unsafe" macro, assumes well-formed UTF-8. + * + * @param s const uint8_t * string + * @param i string offset + * @see U8_BACK_1 + * @stable ICU 2.4 + */ +#define U8_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ + while(U8_IS_TRAIL((s)[--(i)])) {} \ +} UPRV_BLOCK_MACRO_END + +/** + * Move the string offset from one code point boundary to the previous one. + * (Pre-decrementing backward iteration.) + * The input offset may be the same as the string length. + * "Safe" macro, checks for illegal sequences and for string boundaries. + * + * @param s const uint8_t * string + * @param start int32_t starting string offset (usually 0) + * @param i int32_t string offset, must be start<i + * @see U8_BACK_1_UNSAFE + * @stable ICU 2.4 + */ +#define U8_BACK_1(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \ + if(U8_IS_TRAIL((s)[--(i)])) { \ + (i)=utf8_back1SafeBody(s, start, (i)); \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Move the string offset from one code point boundary to the n-th one before it, + * i.e., move backward by n code points. + * (Pre-decrementing backward iteration.) + * The input offset may be the same as the string length. + * "Unsafe" macro, assumes well-formed UTF-8. + * + * @param s const uint8_t * string + * @param i string offset + * @param n number of code points to skip + * @see U8_BACK_N + * @stable ICU 2.4 + */ +#define U8_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \ + int32_t __N=(n); \ + while(__N>0) { \ + U8_BACK_1_UNSAFE(s, i); \ + --__N; \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Move the string offset from one code point boundary to the n-th one before it, + * i.e., move backward by n code points. + * (Pre-decrementing backward iteration.) + * The input offset may be the same as the string length. + * "Safe" macro, checks for illegal sequences and for string boundaries. + * + * @param s const uint8_t * string + * @param start int32_t index of the start of the string + * @param i int32_t string offset, must be start<i + * @param n number of code points to skip + * @see U8_BACK_N_UNSAFE + * @stable ICU 2.4 + */ +#define U8_BACK_N(s, start, i, n) UPRV_BLOCK_MACRO_BEGIN { \ + int32_t __N=(n); \ + while(__N>0 && (i)>(start)) { \ + U8_BACK_1(s, start, i); \ + --__N; \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Adjust a random-access offset to a code point boundary after a code point. + * If the offset is behind a partial multi-byte sequence, + * then the offset is incremented to behind the whole sequence. + * Otherwise, it is not modified. + * The input offset may be the same as the string length. + * "Unsafe" macro, assumes well-formed UTF-8. + * + * @param s const uint8_t * string + * @param i string offset + * @see U8_SET_CP_LIMIT + * @stable ICU 2.4 + */ +#define U8_SET_CP_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ + U8_BACK_1_UNSAFE(s, i); \ + U8_FWD_1_UNSAFE(s, i); \ +} UPRV_BLOCK_MACRO_END + +/** + * Adjust a random-access offset to a code point boundary after a code point. + * If the offset is behind a partial multi-byte sequence, + * then the offset is incremented to behind the whole sequence. + * Otherwise, it is not modified. + * The input offset may be the same as the string length. + * "Safe" macro, checks for illegal sequences and for string boundaries. + * + * The length can be negative for a NUL-terminated string. + * + * @param s const uint8_t * string + * @param start int32_t starting string offset (usually 0) + * @param i int32_t string offset, must be start<=i<=length + * @param length int32_t string length + * @see U8_SET_CP_LIMIT_UNSAFE + * @stable ICU 2.4 + */ +#define U8_SET_CP_LIMIT(s, start, i, length) UPRV_BLOCK_MACRO_BEGIN { \ + if((start)<(i) && ((i)<(length) || (length)<0)) { \ + U8_BACK_1(s, start, i); \ + U8_FWD_1(s, i, length); \ + } \ +} UPRV_BLOCK_MACRO_END + +#endif diff --git a/thirdparty/icu4c/common/unicode/utf_old.h b/thirdparty/icu4c/common/unicode/utf_old.h new file mode 100644 index 0000000000..160f5ad0a9 --- /dev/null +++ b/thirdparty/icu4c/common/unicode/utf_old.h @@ -0,0 +1,1201 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2002-2012, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: utf_old.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2002sep21 +* created by: Markus W. Scherer +*/ + +/** + * \file + * \brief C API: Deprecated macros for Unicode string handling + * + * The macros in utf_old.h are all deprecated and their use discouraged. + * Some of the design principles behind the set of UTF macros + * have changed or proved impractical. + * Almost all of the old "UTF macros" are at least renamed. + * If you are looking for a new equivalent to an old macro, please see the + * comment at the old one. + * + * Brief summary of reasons for deprecation: + * - Switch on UTF_SIZE (selection of UTF-8/16/32 default string processing) + * was impractical. + * - Switch on UTF_SAFE etc. (selection of unsafe/safe/strict default string processing) + * was of little use and impractical. + * - Whole classes of macros became obsolete outside of the UTF_SIZE/UTF_SAFE + * selection framework: UTF32_ macros (all trivial) + * and UTF_ default and intermediate macros (all aliases). + * - The selection framework also caused many macro aliases. + * - Change in Unicode standard: "irregular" sequences (3.0) became illegal (3.2). + * - Change of language in Unicode standard: + * Growing distinction between internal x-bit Unicode strings and external UTF-x + * forms, with the former more lenient. + * Suggests renaming of UTF16_ macros to U16_. + * - The prefix "UTF_" without a width number confused some users. + * - "Safe" append macros needed the addition of an error indicator output. + * - "Safe" UTF-8 macros used legitimate (if rarely used) code point values + * to indicate error conditions. + * - The use of the "_CHAR" infix for code point operations confused some users. + * + * More details: + * + * Until ICU 2.2, utf.h theoretically allowed to choose among UTF-8/16/32 + * for string processing, and among unsafe/safe/strict default macros for that. + * + * It proved nearly impossible to write non-trivial, high-performance code + * that is UTF-generic. + * Unsafe default macros would be dangerous for default string processing, + * and the main reason for the "strict" versions disappeared: + * Between Unicode 3.0 and 3.2 all "irregular" UTF-8 sequences became illegal. + * The only other conditions that "strict" checked for were non-characters, + * which are valid during processing. Only during text input/output should they + * be checked, and at that time other well-formedness checks may be + * necessary or useful as well. + * This can still be done by using U16_NEXT and U_IS_UNICODE_NONCHAR + * or U_IS_UNICODE_CHAR. + * + * The old UTF8_..._SAFE macros also used some normal Unicode code points + * to indicate malformed sequences. + * The new UTF8_ macros without suffix use negative values instead. + * + * The entire contents of utf32.h was moved here without replacement + * because all those macros were trivial and + * were meaningful only in the framework of choosing the UTF size. + * + * See Jitterbug 2150 and its discussion on the ICU mailing list + * in September 2002. + * + * <hr> + * + * <em>Obsolete part</em> of pre-ICU 2.4 utf.h file documentation: + * + * <p>The original concept for these files was for ICU to allow + * in principle to set which UTF (UTF-8/16/32) is used internally + * by defining UTF_SIZE to either 8, 16, or 32. utf.h would then define the UChar type + * accordingly. UTF-16 was the default.</p> + * + * <p>This concept has been abandoned. + * A lot of the ICU source code assumes UChar strings are in UTF-16. + * This is especially true for low-level code like + * conversion, normalization, and collation. + * The utf.h header enforces the default of UTF-16. + * The UTF-8 and UTF-32 macros remain for now for completeness and backward compatibility.</p> + * + * <p>Accordingly, utf.h defines UChar to be an unsigned 16-bit integer. If this matches wchar_t, then + * UChar is defined to be exactly wchar_t, otherwise uint16_t.</p> + * + * <p>UChar32 is defined to be a signed 32-bit integer (int32_t), large enough for a 21-bit + * Unicode code point (Unicode scalar value, 0..0x10ffff). + * Before ICU 2.4, the definition of UChar32 was similarly platform-dependent as + * the definition of UChar. For details see the documentation for UChar32 itself.</p> + * + * <p>utf.h also defines a number of C macros for handling single Unicode code points and + * for using UTF Unicode strings. It includes utf8.h, utf16.h, and utf32.h for the actual + * implementations of those macros and then aliases one set of them (for UTF-16) for general use. + * The UTF-specific macros have the UTF size in the macro name prefixes (UTF16_...), while + * the general alias macros always begin with UTF_...</p> + * + * <p>Many string operations can be done with or without error checking. + * Where such a distinction is useful, there are two versions of the macros, "unsafe" and "safe" + * ones with ..._UNSAFE and ..._SAFE suffixes. The unsafe macros are fast but may cause + * program failures if the strings are not well-formed. The safe macros have an additional, boolean + * parameter "strict". If strict is false, then only illegal sequences are detected. + * Otherwise, irregular sequences and non-characters are detected as well (like single surrogates). + * Safe macros return special error code points for illegal/irregular sequences: + * Typically, U+ffff, or values that would result in a code unit sequence of the same length + * as the erroneous input sequence.<br> + * Note that _UNSAFE macros have fewer parameters: They do not have the strictness parameter, and + * they do not have start/length parameters for boundary checking.</p> + * + * <p>Here, the macros are aliased in two steps: + * In the first step, the UTF-specific macros with UTF16_ prefix and _UNSAFE and _SAFE suffixes are + * aliased according to the UTF_SIZE to macros with UTF_ prefix and the same suffixes and signatures. + * Then, in a second step, the default, general alias macros are set to use either the unsafe or + * the safe/not strict (default) or the safe/strict macro; + * these general macros do not have a strictness parameter.</p> + * + * <p>It is possible to change the default choice for the general alias macros to be unsafe, safe/not strict or safe/strict. + * The default is safe/not strict. It is not recommended to select the unsafe macros as the basis for + * Unicode string handling in ICU! To select this, define UTF_SAFE, UTF_STRICT, or UTF_UNSAFE.</p> + * + * <p>For general use, one should use the default, general macros with UTF_ prefix and no _SAFE/_UNSAFE suffix. + * Only in some cases it may be necessary to control the choice of macro directly and use a less generic alias. + * For example, if it can be assumed that a string is well-formed and the index will stay within the bounds, + * then the _UNSAFE version may be used. + * If a UTF-8 string is to be processed, then the macros with UTF8_ prefixes need to be used.</p> + * + * <hr> + * + * Deprecated ICU 2.4. Use the macros in utf.h, utf16.h, utf8.h instead. + */ + +#ifndef __UTF_OLD_H__ +#define __UTF_OLD_H__ + +#include "unicode/utf.h" +#include "unicode/utf8.h" +#include "unicode/utf16.h" + +/** + * \def U_HIDE_OBSOLETE_UTF_OLD_H + * + * Hides the obsolete definitions in unicode/utf_old.h. + * Recommended to be set to 1 at compile time to make sure + * the long-deprecated macros are no longer used. + * + * For reasons for the deprecation see the utf_old.h file comments. + * + * @internal + */ +#ifndef U_HIDE_OBSOLETE_UTF_OLD_H +# define U_HIDE_OBSOLETE_UTF_OLD_H 0 +#endif + +#if !defined(U_HIDE_DEPRECATED_API) && !U_HIDE_OBSOLETE_UTF_OLD_H + +/* Formerly utf.h, part 1 --------------------------------------------------- */ + +#ifdef U_USE_UTF_DEPRECATES +/** + * Unicode string and array offset and index type. + * ICU always counts Unicode code units (UChars) for + * string offsets, indexes, and lengths, not Unicode code points. + * + * @obsolete ICU 2.6. Use int32_t directly instead since this API will be removed in that release. + */ +typedef int32_t UTextOffset; +#endif + +/** Number of bits in a Unicode string code unit - ICU uses 16-bit Unicode. @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF_SIZE 16 + +/** + * The default choice for general Unicode string macros is to use the ..._SAFE macro implementations + * with strict=false. + * + * @deprecated ICU 2.4. Obsolete, see utf_old.h. + */ +#define UTF_SAFE +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#undef UTF_UNSAFE +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#undef UTF_STRICT + +/** + * UTF8_ERROR_VALUE_1 and UTF8_ERROR_VALUE_2 are special error values for UTF-8, + * which need 1 or 2 bytes in UTF-8: + * \code + * U+0015 = NAK = Negative Acknowledge, C0 control character + * U+009f = highest C1 control character + * \endcode + * + * These are used by UTF8_..._SAFE macros so that they can return an error value + * that needs the same number of code units (bytes) as were seen by + * a macro. They should be tested with UTF_IS_ERROR() or UTF_IS_VALID(). + * + * @deprecated ICU 2.4. Obsolete, see utf_old.h. + */ +#define UTF8_ERROR_VALUE_1 0x15 + +/** + * See documentation on UTF8_ERROR_VALUE_1 for details. + * + * @deprecated ICU 2.4. Obsolete, see utf_old.h. + */ +#define UTF8_ERROR_VALUE_2 0x9f + +/** + * Error value for all UTFs. This code point value will be set by macros with error + * checking if an error is detected. + * + * @deprecated ICU 2.4. Obsolete, see utf_old.h. + */ +#define UTF_ERROR_VALUE 0xffff + +/** + * Is a given 32-bit code an error value + * as returned by one of the macros for any UTF? + * + * @deprecated ICU 2.4. Obsolete, see utf_old.h. + */ +#define UTF_IS_ERROR(c) \ + (((c)&0xfffe)==0xfffe || (c)==UTF8_ERROR_VALUE_1 || (c)==UTF8_ERROR_VALUE_2) + +/** + * This is a combined macro: Is c a valid Unicode value _and_ not an error code? + * + * @deprecated ICU 2.4. Obsolete, see utf_old.h. + */ +#define UTF_IS_VALID(c) \ + (UTF_IS_UNICODE_CHAR(c) && \ + (c)!=UTF8_ERROR_VALUE_1 && (c)!=UTF8_ERROR_VALUE_2) + +/** + * Is this code unit or code point a surrogate (U+d800..U+dfff)? + * @deprecated ICU 2.4. Renamed to U_IS_SURROGATE and U16_IS_SURROGATE, see utf_old.h. + */ +#define UTF_IS_SURROGATE(uchar) (((uchar)&0xfffff800)==0xd800) + +/** + * Is a given 32-bit code point a Unicode noncharacter? + * + * @deprecated ICU 2.4. Renamed to U_IS_UNICODE_NONCHAR, see utf_old.h. + */ +#define UTF_IS_UNICODE_NONCHAR(c) \ + ((c)>=0xfdd0 && \ + ((uint32_t)(c)<=0xfdef || ((c)&0xfffe)==0xfffe) && \ + (uint32_t)(c)<=0x10ffff) + +/** + * Is a given 32-bit value a Unicode code point value (0..U+10ffff) + * that can be assigned a character? + * + * Code points that are not characters include: + * - single surrogate code points (U+d800..U+dfff, 2048 code points) + * - the last two code points on each plane (U+__fffe and U+__ffff, 34 code points) + * - U+fdd0..U+fdef (new with Unicode 3.1, 32 code points) + * - the highest Unicode code point value is U+10ffff + * + * This means that all code points below U+d800 are character code points, + * and that boundary is tested first for performance. + * + * @deprecated ICU 2.4. Renamed to U_IS_UNICODE_CHAR, see utf_old.h. + */ +#define UTF_IS_UNICODE_CHAR(c) \ + ((uint32_t)(c)<0xd800 || \ + ((uint32_t)(c)>0xdfff && \ + (uint32_t)(c)<=0x10ffff && \ + !UTF_IS_UNICODE_NONCHAR(c))) + +/* Formerly utf8.h ---------------------------------------------------------- */ + +/** +* \var utf8_countTrailBytes +* Internal array with numbers of trail bytes for any given byte used in +* lead byte position. +* +* This is internal since it is not meant to be called directly by external clients; +* however it is called by public macros in this file and thus must remain stable, +* and should not be hidden when other internal functions are hidden (otherwise +* public macros would fail to compile). +* @internal +*/ +#ifdef U_UTF8_IMPL +// No forward declaration if compiling utf_impl.cpp, which defines utf8_countTrailBytes. +#elif defined(U_STATIC_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) +U_CFUNC const uint8_t utf8_countTrailBytes[]; +#else +U_CFUNC U_IMPORT const uint8_t utf8_countTrailBytes[]; /* U_IMPORT2? */ /*U_IMPORT*/ +#endif + +/** + * Count the trail bytes for a UTF-8 lead byte. + * @deprecated ICU 2.4. Renamed to U8_COUNT_TRAIL_BYTES, see utf_old.h. + */ +#define UTF8_COUNT_TRAIL_BYTES(leadByte) (utf8_countTrailBytes[(uint8_t)leadByte]) + +/** + * Mask a UTF-8 lead byte, leave only the lower bits that form part of the code point value. + * @deprecated ICU 2.4. Renamed to U8_MASK_LEAD_BYTE, see utf_old.h. + */ +#define UTF8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1) + +/** Is this this code point a single code unit (byte)? @deprecated ICU 2.4. Renamed to U8_IS_SINGLE, see utf_old.h. */ +#define UTF8_IS_SINGLE(uchar) (((uchar)&0x80)==0) +/** Is this this code unit the lead code unit (byte) of a code point? @deprecated ICU 2.4. Renamed to U8_IS_LEAD, see utf_old.h. */ +#define UTF8_IS_LEAD(uchar) ((uint8_t)((uchar)-0xc0)<0x3e) +/** Is this this code unit a trailing code unit (byte) of a code point? @deprecated ICU 2.4. Renamed to U8_IS_TRAIL, see utf_old.h. */ +#define UTF8_IS_TRAIL(uchar) (((uchar)&0xc0)==0x80) + +/** Does this scalar Unicode value need multiple code units for storage? @deprecated ICU 2.4. Use U8_LENGTH or test ((uint32_t)(c)>0x7f) instead, see utf_old.h. */ +#define UTF8_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0x7f) + +/** + * Given the lead character, how many bytes are taken by this code point. + * ICU does not deal with code points >0x10ffff + * unless necessary for advancing in the byte stream. + * + * These length macros take into account that for values >0x10ffff + * the UTF8_APPEND_CHAR_SAFE macros would write the error code point 0xffff + * with 3 bytes. + * Code point comparisons need to be in uint32_t because UChar32 + * may be a signed type, and negative values must be recognized. + * + * @deprecated ICU 2.4. Use U8_LENGTH instead, see utf.h. + */ +#if 1 +# define UTF8_CHAR_LENGTH(c) \ + ((uint32_t)(c)<=0x7f ? 1 : \ + ((uint32_t)(c)<=0x7ff ? 2 : \ + ((uint32_t)((c)-0x10000)>0xfffff ? 3 : 4) \ + ) \ + ) +#else +# define UTF8_CHAR_LENGTH(c) \ + ((uint32_t)(c)<=0x7f ? 1 : \ + ((uint32_t)(c)<=0x7ff ? 2 : \ + ((uint32_t)(c)<=0xffff ? 3 : \ + ((uint32_t)(c)<=0x10ffff ? 4 : \ + ((uint32_t)(c)<=0x3ffffff ? 5 : \ + ((uint32_t)(c)<=0x7fffffff ? 6 : 3) \ + ) \ + ) \ + ) \ + ) \ + ) +#endif + +/** The maximum number of bytes per code point. @deprecated ICU 2.4. Renamed to U8_MAX_LENGTH, see utf_old.h. */ +#define UTF8_MAX_CHAR_LENGTH 4 + +/** Average number of code units compared to UTF-16. @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF8_ARRAY_SIZE(size) ((5*(size))/2) + +/** @deprecated ICU 2.4. Renamed to U8_GET_UNSAFE, see utf_old.h. */ +#define UTF8_GET_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ + int32_t _utf8_get_char_unsafe_index=(int32_t)(i); \ + UTF8_SET_CHAR_START_UNSAFE(s, _utf8_get_char_unsafe_index); \ + UTF8_NEXT_CHAR_UNSAFE(s, _utf8_get_char_unsafe_index, c); \ +} UPRV_BLOCK_MACRO_END + +/** @deprecated ICU 2.4. Use U8_GET instead, see utf_old.h. */ +#define UTF8_GET_CHAR_SAFE(s, start, i, length, c, strict) UPRV_BLOCK_MACRO_BEGIN { \ + int32_t _utf8_get_char_safe_index=(int32_t)(i); \ + UTF8_SET_CHAR_START_SAFE(s, start, _utf8_get_char_safe_index); \ + UTF8_NEXT_CHAR_SAFE(s, _utf8_get_char_safe_index, length, c, strict); \ +} UPRV_BLOCK_MACRO_END + +/** @deprecated ICU 2.4. Renamed to U8_NEXT_UNSAFE, see utf_old.h. */ +#define UTF8_NEXT_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ + (c)=(s)[(i)++]; \ + if((uint8_t)((c)-0xc0)<0x35) { \ + uint8_t __count=UTF8_COUNT_TRAIL_BYTES(c); \ + UTF8_MASK_LEAD_BYTE(c, __count); \ + switch(__count) { \ + /* each following branch falls through to the next one */ \ + case 3: \ + (c)=((c)<<6)|((s)[(i)++]&0x3f); \ + case 2: \ + (c)=((c)<<6)|((s)[(i)++]&0x3f); \ + case 1: \ + (c)=((c)<<6)|((s)[(i)++]&0x3f); \ + /* no other branches to optimize switch() */ \ + break; \ + } \ + } \ +} UPRV_BLOCK_MACRO_END + +/** @deprecated ICU 2.4. Renamed to U8_APPEND_UNSAFE, see utf_old.h. */ +#define UTF8_APPEND_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ + if((uint32_t)(c)<=0x7f) { \ + (s)[(i)++]=(uint8_t)(c); \ + } else { \ + if((uint32_t)(c)<=0x7ff) { \ + (s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \ + } else { \ + if((uint32_t)(c)<=0xffff) { \ + (s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \ + } else { \ + (s)[(i)++]=(uint8_t)(((c)>>18)|0xf0); \ + (s)[(i)++]=(uint8_t)((((c)>>12)&0x3f)|0x80); \ + } \ + (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \ + } \ + (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \ + } \ +} UPRV_BLOCK_MACRO_END + +/** @deprecated ICU 2.4. Renamed to U8_FWD_1_UNSAFE, see utf_old.h. */ +#define UTF8_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ + (i)+=1+UTF8_COUNT_TRAIL_BYTES((s)[i]); \ +} UPRV_BLOCK_MACRO_END + +/** @deprecated ICU 2.4. Renamed to U8_FWD_N_UNSAFE, see utf_old.h. */ +#define UTF8_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \ + int32_t __N=(n); \ + while(__N>0) { \ + UTF8_FWD_1_UNSAFE(s, i); \ + --__N; \ + } \ +} UPRV_BLOCK_MACRO_END + +/** @deprecated ICU 2.4. Renamed to U8_SET_CP_START_UNSAFE, see utf_old.h. */ +#define UTF8_SET_CHAR_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ + while(UTF8_IS_TRAIL((s)[i])) { --(i); } \ +} UPRV_BLOCK_MACRO_END + +/** @deprecated ICU 2.4. Use U8_NEXT instead, see utf_old.h. */ +#define UTF8_NEXT_CHAR_SAFE(s, i, length, c, strict) UPRV_BLOCK_MACRO_BEGIN { \ + (c)=(s)[(i)++]; \ + if((c)>=0x80) { \ + if(UTF8_IS_LEAD(c)) { \ + (c)=utf8_nextCharSafeBody(s, &(i), (int32_t)(length), c, strict); \ + } else { \ + (c)=UTF8_ERROR_VALUE_1; \ + } \ + } \ +} UPRV_BLOCK_MACRO_END + +/** @deprecated ICU 2.4. Use U8_APPEND instead, see utf_old.h. */ +#define UTF8_APPEND_CHAR_SAFE(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \ + if((uint32_t)(c)<=0x7f) { \ + (s)[(i)++]=(uint8_t)(c); \ + } else { \ + (i)=utf8_appendCharSafeBody(s, (int32_t)(i), (int32_t)(length), c, NULL); \ + } \ +} UPRV_BLOCK_MACRO_END + +/** @deprecated ICU 2.4. Renamed to U8_FWD_1, see utf_old.h. */ +#define UTF8_FWD_1_SAFE(s, i, length) U8_FWD_1(s, i, length) + +/** @deprecated ICU 2.4. Renamed to U8_FWD_N, see utf_old.h. */ +#define UTF8_FWD_N_SAFE(s, i, length, n) U8_FWD_N(s, i, length, n) + +/** @deprecated ICU 2.4. Renamed to U8_SET_CP_START, see utf_old.h. */ +#define UTF8_SET_CHAR_START_SAFE(s, start, i) U8_SET_CP_START(s, start, i) + +/** @deprecated ICU 2.4. Renamed to U8_PREV_UNSAFE, see utf_old.h. */ +#define UTF8_PREV_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ + (c)=(s)[--(i)]; \ + if(UTF8_IS_TRAIL(c)) { \ + uint8_t __b, __count=1, __shift=6; \ +\ + /* c is a trail byte */ \ + (c)&=0x3f; \ + for(;;) { \ + __b=(s)[--(i)]; \ + if(__b>=0xc0) { \ + UTF8_MASK_LEAD_BYTE(__b, __count); \ + (c)|=(UChar32)__b<<__shift; \ + break; \ + } else { \ + (c)|=(UChar32)(__b&0x3f)<<__shift; \ + ++__count; \ + __shift+=6; \ + } \ + } \ + } \ +} UPRV_BLOCK_MACRO_END + +/** @deprecated ICU 2.4. Renamed to U8_BACK_1_UNSAFE, see utf_old.h. */ +#define UTF8_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ + while(UTF8_IS_TRAIL((s)[--(i)])) {} \ +} UPRV_BLOCK_MACRO_END + +/** @deprecated ICU 2.4. Renamed to U8_BACK_N_UNSAFE, see utf_old.h. */ +#define UTF8_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \ + int32_t __N=(n); \ + while(__N>0) { \ + UTF8_BACK_1_UNSAFE(s, i); \ + --__N; \ + } \ +} UPRV_BLOCK_MACRO_END + +/** @deprecated ICU 2.4. Renamed to U8_SET_CP_LIMIT_UNSAFE, see utf_old.h. */ +#define UTF8_SET_CHAR_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ + UTF8_BACK_1_UNSAFE(s, i); \ + UTF8_FWD_1_UNSAFE(s, i); \ +} UPRV_BLOCK_MACRO_END + +/** @deprecated ICU 2.4. Use U8_PREV instead, see utf_old.h. */ +#define UTF8_PREV_CHAR_SAFE(s, start, i, c, strict) UPRV_BLOCK_MACRO_BEGIN { \ + (c)=(s)[--(i)]; \ + if((c)>=0x80) { \ + if((c)<=0xbf) { \ + (c)=utf8_prevCharSafeBody(s, start, &(i), c, strict); \ + } else { \ + (c)=UTF8_ERROR_VALUE_1; \ + } \ + } \ +} UPRV_BLOCK_MACRO_END + +/** @deprecated ICU 2.4. Renamed to U8_BACK_1, see utf_old.h. */ +#define UTF8_BACK_1_SAFE(s, start, i) U8_BACK_1(s, start, i) + +/** @deprecated ICU 2.4. Renamed to U8_BACK_N, see utf_old.h. */ +#define UTF8_BACK_N_SAFE(s, start, i, n) U8_BACK_N(s, start, i, n) + +/** @deprecated ICU 2.4. Renamed to U8_SET_CP_LIMIT, see utf_old.h. */ +#define UTF8_SET_CHAR_LIMIT_SAFE(s, start, i, length) U8_SET_CP_LIMIT(s, start, i, length) + +/* Formerly utf16.h --------------------------------------------------------- */ + +/** Is uchar a first/lead surrogate? @deprecated ICU 2.4. Renamed to U_IS_LEAD and U16_IS_LEAD, see utf_old.h. */ +#define UTF_IS_FIRST_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xd800) + +/** Is uchar a second/trail surrogate? @deprecated ICU 2.4. Renamed to U_IS_TRAIL and U16_IS_TRAIL, see utf_old.h. */ +#define UTF_IS_SECOND_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xdc00) + +/** Assuming c is a surrogate, is it a first/lead surrogate? @deprecated ICU 2.4. Renamed to U_IS_SURROGATE_LEAD and U16_IS_SURROGATE_LEAD, see utf_old.h. */ +#define UTF_IS_SURROGATE_FIRST(c) (((c)&0x400)==0) + +/** Helper constant for UTF16_GET_PAIR_VALUE. @deprecated ICU 2.4. Renamed to U16_SURROGATE_OFFSET, see utf_old.h. */ +#define UTF_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000) + +/** Get the UTF-32 value from the surrogate code units. @deprecated ICU 2.4. Renamed to U16_GET_SUPPLEMENTARY, see utf_old.h. */ +#define UTF16_GET_PAIR_VALUE(first, second) \ + (((first)<<10UL)+(second)-UTF_SURROGATE_OFFSET) + +/** @deprecated ICU 2.4. Renamed to U16_LEAD, see utf_old.h. */ +#define UTF_FIRST_SURROGATE(supplementary) (UChar)(((supplementary)>>10)+0xd7c0) + +/** @deprecated ICU 2.4. Renamed to U16_TRAIL, see utf_old.h. */ +#define UTF_SECOND_SURROGATE(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00) + +/** @deprecated ICU 2.4. Renamed to U16_LEAD, see utf_old.h. */ +#define UTF16_LEAD(supplementary) UTF_FIRST_SURROGATE(supplementary) + +/** @deprecated ICU 2.4. Renamed to U16_TRAIL, see utf_old.h. */ +#define UTF16_TRAIL(supplementary) UTF_SECOND_SURROGATE(supplementary) + +/** @deprecated ICU 2.4. Renamed to U16_IS_SINGLE, see utf_old.h. */ +#define UTF16_IS_SINGLE(uchar) !UTF_IS_SURROGATE(uchar) + +/** @deprecated ICU 2.4. Renamed to U16_IS_LEAD, see utf_old.h. */ +#define UTF16_IS_LEAD(uchar) UTF_IS_FIRST_SURROGATE(uchar) + +/** @deprecated ICU 2.4. Renamed to U16_IS_TRAIL, see utf_old.h. */ +#define UTF16_IS_TRAIL(uchar) UTF_IS_SECOND_SURROGATE(uchar) + +/** Does this scalar Unicode value need multiple code units for storage? @deprecated ICU 2.4. Use U16_LENGTH or test ((uint32_t)(c)>0xffff) instead, see utf_old.h. */ +#define UTF16_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0xffff) + +/** @deprecated ICU 2.4. Renamed to U16_LENGTH, see utf_old.h. */ +#define UTF16_CHAR_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2) + +/** @deprecated ICU 2.4. Renamed to U16_MAX_LENGTH, see utf_old.h. */ +#define UTF16_MAX_CHAR_LENGTH 2 + +/** Average number of code units compared to UTF-16. @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF16_ARRAY_SIZE(size) (size) + +/** + * Get a single code point from an offset that points to any + * of the code units that belong to that code point. + * Assume 0<=i<length. + * + * This could be used for iteration together with + * UTF16_CHAR_LENGTH() and UTF_IS_ERROR(), + * but the use of UTF16_NEXT_CHAR[_UNSAFE]() and + * UTF16_PREV_CHAR[_UNSAFE]() is more efficient for that. + * @deprecated ICU 2.4. Renamed to U16_GET_UNSAFE, see utf_old.h. + */ +#define UTF16_GET_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ + (c)=(s)[i]; \ + if(UTF_IS_SURROGATE(c)) { \ + if(UTF_IS_SURROGATE_FIRST(c)) { \ + (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)+1]); \ + } else { \ + (c)=UTF16_GET_PAIR_VALUE((s)[(i)-1], (c)); \ + } \ + } \ +} UPRV_BLOCK_MACRO_END + +/** @deprecated ICU 2.4. Use U16_GET instead, see utf_old.h. */ +#define UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict) UPRV_BLOCK_MACRO_BEGIN { \ + (c)=(s)[i]; \ + if(UTF_IS_SURROGATE(c)) { \ + uint16_t __c2; \ + if(UTF_IS_SURROGATE_FIRST(c)) { \ + if((i)+1<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)+1])) { \ + (c)=UTF16_GET_PAIR_VALUE((c), __c2); \ + /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \ + } else if(strict) {\ + /* unmatched first surrogate */ \ + (c)=UTF_ERROR_VALUE; \ + } \ + } else { \ + if((i)-1>=(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \ + (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \ + /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \ + } else if(strict) {\ + /* unmatched second surrogate */ \ + (c)=UTF_ERROR_VALUE; \ + } \ + } \ + } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \ + (c)=UTF_ERROR_VALUE; \ + } \ +} UPRV_BLOCK_MACRO_END + +/** @deprecated ICU 2.4. Renamed to U16_NEXT_UNSAFE, see utf_old.h. */ +#define UTF16_NEXT_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ + (c)=(s)[(i)++]; \ + if(UTF_IS_FIRST_SURROGATE(c)) { \ + (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)++]); \ + } \ +} UPRV_BLOCK_MACRO_END + +/** @deprecated ICU 2.4. Renamed to U16_APPEND_UNSAFE, see utf_old.h. */ +#define UTF16_APPEND_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ + if((uint32_t)(c)<=0xffff) { \ + (s)[(i)++]=(uint16_t)(c); \ + } else { \ + (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \ + (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ + } \ +} UPRV_BLOCK_MACRO_END + +/** @deprecated ICU 2.4. Renamed to U16_FWD_1_UNSAFE, see utf_old.h. */ +#define UTF16_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ + if(UTF_IS_FIRST_SURROGATE((s)[(i)++])) { \ + ++(i); \ + } \ +} UPRV_BLOCK_MACRO_END + +/** @deprecated ICU 2.4. Renamed to U16_FWD_N_UNSAFE, see utf_old.h. */ +#define UTF16_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \ + int32_t __N=(n); \ + while(__N>0) { \ + UTF16_FWD_1_UNSAFE(s, i); \ + --__N; \ + } \ +} UPRV_BLOCK_MACRO_END + +/** @deprecated ICU 2.4. Renamed to U16_SET_CP_START_UNSAFE, see utf_old.h. */ +#define UTF16_SET_CHAR_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ + if(UTF_IS_SECOND_SURROGATE((s)[i])) { \ + --(i); \ + } \ +} UPRV_BLOCK_MACRO_END + +/** @deprecated ICU 2.4. Use U16_NEXT instead, see utf_old.h. */ +#define UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict) UPRV_BLOCK_MACRO_BEGIN { \ + (c)=(s)[(i)++]; \ + if(UTF_IS_FIRST_SURROGATE(c)) { \ + uint16_t __c2; \ + if((i)<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)])) { \ + ++(i); \ + (c)=UTF16_GET_PAIR_VALUE((c), __c2); \ + /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \ + } else if(strict) {\ + /* unmatched first surrogate */ \ + (c)=UTF_ERROR_VALUE; \ + } \ + } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \ + /* unmatched second surrogate or other non-character */ \ + (c)=UTF_ERROR_VALUE; \ + } \ +} UPRV_BLOCK_MACRO_END + +/** @deprecated ICU 2.4. Use U16_APPEND instead, see utf_old.h. */ +#define UTF16_APPEND_CHAR_SAFE(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \ + if((uint32_t)(c)<=0xffff) { \ + (s)[(i)++]=(uint16_t)(c); \ + } else if((uint32_t)(c)<=0x10ffff) { \ + if((i)+1<(length)) { \ + (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \ + (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ + } else /* not enough space */ { \ + (s)[(i)++]=UTF_ERROR_VALUE; \ + } \ + } else /* c>0x10ffff, write error value */ { \ + (s)[(i)++]=UTF_ERROR_VALUE; \ + } \ +} UPRV_BLOCK_MACRO_END + +/** @deprecated ICU 2.4. Renamed to U16_FWD_1, see utf_old.h. */ +#define UTF16_FWD_1_SAFE(s, i, length) U16_FWD_1(s, i, length) + +/** @deprecated ICU 2.4. Renamed to U16_FWD_N, see utf_old.h. */ +#define UTF16_FWD_N_SAFE(s, i, length, n) U16_FWD_N(s, i, length, n) + +/** @deprecated ICU 2.4. Renamed to U16_SET_CP_START, see utf_old.h. */ +#define UTF16_SET_CHAR_START_SAFE(s, start, i) U16_SET_CP_START(s, start, i) + +/** @deprecated ICU 2.4. Renamed to U16_PREV_UNSAFE, see utf_old.h. */ +#define UTF16_PREV_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ + (c)=(s)[--(i)]; \ + if(UTF_IS_SECOND_SURROGATE(c)) { \ + (c)=UTF16_GET_PAIR_VALUE((s)[--(i)], (c)); \ + } \ +} UPRV_BLOCK_MACRO_END + +/** @deprecated ICU 2.4. Renamed to U16_BACK_1_UNSAFE, see utf_old.h. */ +#define UTF16_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ + if(UTF_IS_SECOND_SURROGATE((s)[--(i)])) { \ + --(i); \ + } \ +} UPRV_BLOCK_MACRO_END + +/** @deprecated ICU 2.4. Renamed to U16_BACK_N_UNSAFE, see utf_old.h. */ +#define UTF16_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \ + int32_t __N=(n); \ + while(__N>0) { \ + UTF16_BACK_1_UNSAFE(s, i); \ + --__N; \ + } \ +} UPRV_BLOCK_MACRO_END + +/** @deprecated ICU 2.4. Renamed to U16_SET_CP_LIMIT_UNSAFE, see utf_old.h. */ +#define UTF16_SET_CHAR_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ + if(UTF_IS_FIRST_SURROGATE((s)[(i)-1])) { \ + ++(i); \ + } \ +} UPRV_BLOCK_MACRO_END + +/** @deprecated ICU 2.4. Use U16_PREV instead, see utf_old.h. */ +#define UTF16_PREV_CHAR_SAFE(s, start, i, c, strict) UPRV_BLOCK_MACRO_BEGIN { \ + (c)=(s)[--(i)]; \ + if(UTF_IS_SECOND_SURROGATE(c)) { \ + uint16_t __c2; \ + if((i)>(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \ + --(i); \ + (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \ + /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \ + } else if(strict) {\ + /* unmatched second surrogate */ \ + (c)=UTF_ERROR_VALUE; \ + } \ + } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \ + /* unmatched first surrogate or other non-character */ \ + (c)=UTF_ERROR_VALUE; \ + } \ +} UPRV_BLOCK_MACRO_END + +/** @deprecated ICU 2.4. Renamed to U16_BACK_1, see utf_old.h. */ +#define UTF16_BACK_1_SAFE(s, start, i) U16_BACK_1(s, start, i) + +/** @deprecated ICU 2.4. Renamed to U16_BACK_N, see utf_old.h. */ +#define UTF16_BACK_N_SAFE(s, start, i, n) U16_BACK_N(s, start, i, n) + +/** @deprecated ICU 2.4. Renamed to U16_SET_CP_LIMIT, see utf_old.h. */ +#define UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length) U16_SET_CP_LIMIT(s, start, i, length) + +/* Formerly utf32.h --------------------------------------------------------- */ + +/* +* Old documentation: +* +* This file defines macros to deal with UTF-32 code units and code points. +* Signatures and semantics are the same as for the similarly named macros +* in utf16.h. +* utf32.h is included by utf.h after unicode/umachine.h</p> +* and some common definitions. +* <p><b>Usage:</b> ICU coding guidelines for if() statements should be followed when using these macros. +* Compound statements (curly braces {}) must be used for if-else-while... +* bodies and all macro statements should be terminated with semicolon.</p> +*/ + +/* internal definitions ----------------------------------------------------- */ + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_IS_SAFE(c, strict) \ + (!(strict) ? \ + (uint32_t)(c)<=0x10ffff : \ + UTF_IS_UNICODE_CHAR(c)) + +/* + * For the semantics of all of these macros, see utf16.h. + * The UTF-32 versions are trivial because any code point is + * encoded using exactly one code unit. + */ + +/* single-code point definitions -------------------------------------------- */ + +/* classes of code unit values */ + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_IS_SINGLE(uchar) 1 +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_IS_LEAD(uchar) 0 +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_IS_TRAIL(uchar) 0 + +/* number of code units per code point */ + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_NEED_MULTIPLE_UCHAR(c) 0 +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_CHAR_LENGTH(c) 1 +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_MAX_CHAR_LENGTH 1 + +/* average number of code units compared to UTF-16 */ + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_ARRAY_SIZE(size) (size) + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_GET_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ + (c)=(s)[i]; \ +} UPRV_BLOCK_MACRO_END + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_GET_CHAR_SAFE(s, start, i, length, c, strict) UPRV_BLOCK_MACRO_BEGIN { \ + (c)=(s)[i]; \ + if(!UTF32_IS_SAFE(c, strict)) { \ + (c)=UTF_ERROR_VALUE; \ + } \ +} UPRV_BLOCK_MACRO_END + +/* definitions with forward iteration --------------------------------------- */ + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_NEXT_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ + (c)=(s)[(i)++]; \ +} UPRV_BLOCK_MACRO_END + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_APPEND_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ + (s)[(i)++]=(c); \ +} UPRV_BLOCK_MACRO_END + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ + ++(i); \ +} UPRV_BLOCK_MACRO_END + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \ + (i)+=(n); \ +} UPRV_BLOCK_MACRO_END + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_SET_CHAR_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ +} UPRV_BLOCK_MACRO_END + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_NEXT_CHAR_SAFE(s, i, length, c, strict) UPRV_BLOCK_MACRO_BEGIN { \ + (c)=(s)[(i)++]; \ + if(!UTF32_IS_SAFE(c, strict)) { \ + (c)=UTF_ERROR_VALUE; \ + } \ +} UPRV_BLOCK_MACRO_END + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_APPEND_CHAR_SAFE(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \ + if((uint32_t)(c)<=0x10ffff) { \ + (s)[(i)++]=(c); \ + } else /* c>0x10ffff, write 0xfffd */ { \ + (s)[(i)++]=0xfffd; \ + } \ +} UPRV_BLOCK_MACRO_END + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_FWD_1_SAFE(s, i, length) UPRV_BLOCK_MACRO_BEGIN { \ + ++(i); \ +} UPRV_BLOCK_MACRO_END + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_FWD_N_SAFE(s, i, length, n) UPRV_BLOCK_MACRO_BEGIN { \ + if(((i)+=(n))>(length)) { \ + (i)=(length); \ + } \ +} UPRV_BLOCK_MACRO_END + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_SET_CHAR_START_SAFE(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \ +} UPRV_BLOCK_MACRO_END + +/* definitions with backward iteration -------------------------------------- */ + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_PREV_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ + (c)=(s)[--(i)]; \ +} UPRV_BLOCK_MACRO_END + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ + --(i); \ +} UPRV_BLOCK_MACRO_END + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \ + (i)-=(n); \ +} UPRV_BLOCK_MACRO_END + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_SET_CHAR_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ +} UPRV_BLOCK_MACRO_END + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_PREV_CHAR_SAFE(s, start, i, c, strict) UPRV_BLOCK_MACRO_BEGIN { \ + (c)=(s)[--(i)]; \ + if(!UTF32_IS_SAFE(c, strict)) { \ + (c)=UTF_ERROR_VALUE; \ + } \ +} UPRV_BLOCK_MACRO_END + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_BACK_1_SAFE(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \ + --(i); \ +} UPRV_BLOCK_MACRO_END + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_BACK_N_SAFE(s, start, i, n) UPRV_BLOCK_MACRO_BEGIN { \ + (i)-=(n); \ + if((i)<(start)) { \ + (i)=(start); \ + } \ +} UPRV_BLOCK_MACRO_END + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_SET_CHAR_LIMIT_SAFE(s, i, length) UPRV_BLOCK_MACRO_BEGIN { \ +} UPRV_BLOCK_MACRO_END + +/* Formerly utf.h, part 2 --------------------------------------------------- */ + +/** + * Estimate the number of code units for a string based on the number of UTF-16 code units. + * + * @deprecated ICU 2.4. Obsolete, see utf_old.h. + */ +#define UTF_ARRAY_SIZE(size) UTF16_ARRAY_SIZE(size) + +/** @deprecated ICU 2.4. Renamed to U16_GET_UNSAFE, see utf_old.h. */ +#define UTF_GET_CHAR_UNSAFE(s, i, c) UTF16_GET_CHAR_UNSAFE(s, i, c) + +/** @deprecated ICU 2.4. Use U16_GET instead, see utf_old.h. */ +#define UTF_GET_CHAR_SAFE(s, start, i, length, c, strict) UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict) + + +/** @deprecated ICU 2.4. Renamed to U16_NEXT_UNSAFE, see utf_old.h. */ +#define UTF_NEXT_CHAR_UNSAFE(s, i, c) UTF16_NEXT_CHAR_UNSAFE(s, i, c) + +/** @deprecated ICU 2.4. Use U16_NEXT instead, see utf_old.h. */ +#define UTF_NEXT_CHAR_SAFE(s, i, length, c, strict) UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict) + + +/** @deprecated ICU 2.4. Renamed to U16_APPEND_UNSAFE, see utf_old.h. */ +#define UTF_APPEND_CHAR_UNSAFE(s, i, c) UTF16_APPEND_CHAR_UNSAFE(s, i, c) + +/** @deprecated ICU 2.4. Use U16_APPEND instead, see utf_old.h. */ +#define UTF_APPEND_CHAR_SAFE(s, i, length, c) UTF16_APPEND_CHAR_SAFE(s, i, length, c) + + +/** @deprecated ICU 2.4. Renamed to U16_FWD_1_UNSAFE, see utf_old.h. */ +#define UTF_FWD_1_UNSAFE(s, i) UTF16_FWD_1_UNSAFE(s, i) + +/** @deprecated ICU 2.4. Renamed to U16_FWD_1, see utf_old.h. */ +#define UTF_FWD_1_SAFE(s, i, length) UTF16_FWD_1_SAFE(s, i, length) + + +/** @deprecated ICU 2.4. Renamed to U16_FWD_N_UNSAFE, see utf_old.h. */ +#define UTF_FWD_N_UNSAFE(s, i, n) UTF16_FWD_N_UNSAFE(s, i, n) + +/** @deprecated ICU 2.4. Renamed to U16_FWD_N, see utf_old.h. */ +#define UTF_FWD_N_SAFE(s, i, length, n) UTF16_FWD_N_SAFE(s, i, length, n) + + +/** @deprecated ICU 2.4. Renamed to U16_SET_CP_START_UNSAFE, see utf_old.h. */ +#define UTF_SET_CHAR_START_UNSAFE(s, i) UTF16_SET_CHAR_START_UNSAFE(s, i) + +/** @deprecated ICU 2.4. Renamed to U16_SET_CP_START, see utf_old.h. */ +#define UTF_SET_CHAR_START_SAFE(s, start, i) UTF16_SET_CHAR_START_SAFE(s, start, i) + + +/** @deprecated ICU 2.4. Renamed to U16_PREV_UNSAFE, see utf_old.h. */ +#define UTF_PREV_CHAR_UNSAFE(s, i, c) UTF16_PREV_CHAR_UNSAFE(s, i, c) + +/** @deprecated ICU 2.4. Use U16_PREV instead, see utf_old.h. */ +#define UTF_PREV_CHAR_SAFE(s, start, i, c, strict) UTF16_PREV_CHAR_SAFE(s, start, i, c, strict) + + +/** @deprecated ICU 2.4. Renamed to U16_BACK_1_UNSAFE, see utf_old.h. */ +#define UTF_BACK_1_UNSAFE(s, i) UTF16_BACK_1_UNSAFE(s, i) + +/** @deprecated ICU 2.4. Renamed to U16_BACK_1, see utf_old.h. */ +#define UTF_BACK_1_SAFE(s, start, i) UTF16_BACK_1_SAFE(s, start, i) + + +/** @deprecated ICU 2.4. Renamed to U16_BACK_N_UNSAFE, see utf_old.h. */ +#define UTF_BACK_N_UNSAFE(s, i, n) UTF16_BACK_N_UNSAFE(s, i, n) + +/** @deprecated ICU 2.4. Renamed to U16_BACK_N, see utf_old.h. */ +#define UTF_BACK_N_SAFE(s, start, i, n) UTF16_BACK_N_SAFE(s, start, i, n) + + +/** @deprecated ICU 2.4. Renamed to U16_SET_CP_LIMIT_UNSAFE, see utf_old.h. */ +#define UTF_SET_CHAR_LIMIT_UNSAFE(s, i) UTF16_SET_CHAR_LIMIT_UNSAFE(s, i) + +/** @deprecated ICU 2.4. Renamed to U16_SET_CP_LIMIT, see utf_old.h. */ +#define UTF_SET_CHAR_LIMIT_SAFE(s, start, i, length) UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length) + +/* Define default macros (UTF-16 "safe") ------------------------------------ */ + +/** + * Does this code unit alone encode a code point (BMP, not a surrogate)? + * Same as UTF16_IS_SINGLE. + * @deprecated ICU 2.4. Renamed to U_IS_SINGLE and U16_IS_SINGLE, see utf_old.h. + */ +#define UTF_IS_SINGLE(uchar) U16_IS_SINGLE(uchar) + +/** + * Is this code unit the first one of several (a lead surrogate)? + * Same as UTF16_IS_LEAD. + * @deprecated ICU 2.4. Renamed to U_IS_LEAD and U16_IS_LEAD, see utf_old.h. + */ +#define UTF_IS_LEAD(uchar) U16_IS_LEAD(uchar) + +/** + * Is this code unit one of several but not the first one (a trail surrogate)? + * Same as UTF16_IS_TRAIL. + * @deprecated ICU 2.4. Renamed to U_IS_TRAIL and U16_IS_TRAIL, see utf_old.h. + */ +#define UTF_IS_TRAIL(uchar) U16_IS_TRAIL(uchar) + +/** + * Does this code point require multiple code units (is it a supplementary code point)? + * Same as UTF16_NEED_MULTIPLE_UCHAR. + * @deprecated ICU 2.4. Use U16_LENGTH or test ((uint32_t)(c)>0xffff) instead. + */ +#define UTF_NEED_MULTIPLE_UCHAR(c) UTF16_NEED_MULTIPLE_UCHAR(c) + +/** + * How many code units are used to encode this code point (1 or 2)? + * Same as UTF16_CHAR_LENGTH. + * @deprecated ICU 2.4. Renamed to U16_LENGTH, see utf_old.h. + */ +#define UTF_CHAR_LENGTH(c) U16_LENGTH(c) + +/** + * How many code units are used at most for any Unicode code point (2)? + * Same as UTF16_MAX_CHAR_LENGTH. + * @deprecated ICU 2.4. Renamed to U16_MAX_LENGTH, see utf_old.h. + */ +#define UTF_MAX_CHAR_LENGTH U16_MAX_LENGTH + +/** + * Set c to the code point that contains the code unit i. + * i could point to the lead or the trail surrogate for the code point. + * i is not modified. + * Same as UTF16_GET_CHAR. + * \pre 0<=i<length + * + * @deprecated ICU 2.4. Renamed to U16_GET, see utf_old.h. + */ +#define UTF_GET_CHAR(s, start, i, length, c) U16_GET(s, start, i, length, c) + +/** + * Set c to the code point that starts at code unit i + * and advance i to beyond the code units of this code point (post-increment). + * i must point to the first code unit of a code point. + * Otherwise c is set to the trail unit (surrogate) itself. + * Same as UTF16_NEXT_CHAR. + * \pre 0<=i<length + * \post 0<i<=length + * + * @deprecated ICU 2.4. Renamed to U16_NEXT, see utf_old.h. + */ +#define UTF_NEXT_CHAR(s, i, length, c) U16_NEXT(s, i, length, c) + +/** + * Append the code units of code point c to the string at index i + * and advance i to beyond the new code units (post-increment). + * The code units beginning at index i will be overwritten. + * Same as UTF16_APPEND_CHAR. + * \pre 0<=c<=0x10ffff + * \pre 0<=i<length + * \post 0<i<=length + * + * @deprecated ICU 2.4. Use U16_APPEND instead, see utf_old.h. + */ +#define UTF_APPEND_CHAR(s, i, length, c) UTF16_APPEND_CHAR_SAFE(s, i, length, c) + +/** + * Advance i to beyond the code units of the code point that begins at i. + * I.e., advance i by one code point. + * Same as UTF16_FWD_1. + * \pre 0<=i<length + * \post 0<i<=length + * + * @deprecated ICU 2.4. Renamed to U16_FWD_1, see utf_old.h. + */ +#define UTF_FWD_1(s, i, length) U16_FWD_1(s, i, length) + +/** + * Advance i to beyond the code units of the n code points where the first one begins at i. + * I.e., advance i by n code points. + * Same as UT16_FWD_N. + * \pre 0<=i<length + * \post 0<i<=length + * + * @deprecated ICU 2.4. Renamed to U16_FWD_N, see utf_old.h. + */ +#define UTF_FWD_N(s, i, length, n) U16_FWD_N(s, i, length, n) + +/** + * Take the random-access index i and adjust it so that it points to the beginning + * of a code point. + * The input index points to any code unit of a code point and is moved to point to + * the first code unit of the same code point. i is never incremented. + * In other words, if i points to a trail surrogate that is preceded by a matching + * lead surrogate, then i is decremented. Otherwise it is not modified. + * This can be used to start an iteration with UTF_NEXT_CHAR() from a random index. + * Same as UTF16_SET_CHAR_START. + * \pre start<=i<length + * \post start<=i<length + * + * @deprecated ICU 2.4. Renamed to U16_SET_CP_START, see utf_old.h. + */ +#define UTF_SET_CHAR_START(s, start, i) U16_SET_CP_START(s, start, i) + +/** + * Set c to the code point that has code units before i + * and move i backward (towards the beginning of the string) + * to the first code unit of this code point (pre-increment). + * i must point to the first code unit after the last unit of a code point (i==length is allowed). + * Same as UTF16_PREV_CHAR. + * \pre start<i<=length + * \post start<=i<length + * + * @deprecated ICU 2.4. Renamed to U16_PREV, see utf_old.h. + */ +#define UTF_PREV_CHAR(s, start, i, c) U16_PREV(s, start, i, c) + +/** + * Move i backward (towards the beginning of the string) + * to the first code unit of the code point that has code units before i. + * I.e., move i backward by one code point. + * i must point to the first code unit after the last unit of a code point (i==length is allowed). + * Same as UTF16_BACK_1. + * \pre start<i<=length + * \post start<=i<length + * + * @deprecated ICU 2.4. Renamed to U16_BACK_1, see utf_old.h. + */ +#define UTF_BACK_1(s, start, i) U16_BACK_1(s, start, i) + +/** + * Move i backward (towards the beginning of the string) + * to the first code unit of the n code points that have code units before i. + * I.e., move i backward by n code points. + * i must point to the first code unit after the last unit of a code point (i==length is allowed). + * Same as UTF16_BACK_N. + * \pre start<i<=length + * \post start<=i<length + * + * @deprecated ICU 2.4. Renamed to U16_BACK_N, see utf_old.h. + */ +#define UTF_BACK_N(s, start, i, n) U16_BACK_N(s, start, i, n) + +/** + * Take the random-access index i and adjust it so that it points beyond + * a code point. The input index points beyond any code unit + * of a code point and is moved to point beyond the last code unit of the same + * code point. i is never decremented. + * In other words, if i points to a trail surrogate that is preceded by a matching + * lead surrogate, then i is incremented. Otherwise it is not modified. + * This can be used to start an iteration with UTF_PREV_CHAR() from a random index. + * Same as UTF16_SET_CHAR_LIMIT. + * \pre start<i<=length + * \post start<i<=length + * + * @deprecated ICU 2.4. Renamed to U16_SET_CP_LIMIT, see utf_old.h. + */ +#define UTF_SET_CHAR_LIMIT(s, start, i, length) U16_SET_CP_LIMIT(s, start, i, length) + +#endif // !U_HIDE_DEPRECATED_API && !U_HIDE_OBSOLETE_UTF_OLD_H + +#endif diff --git a/thirdparty/icu4c/common/unicode/utrace.h b/thirdparty/icu4c/common/unicode/utrace.h new file mode 100644 index 0000000000..28c313c582 --- /dev/null +++ b/thirdparty/icu4c/common/unicode/utrace.h @@ -0,0 +1,509 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2003-2013, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: utrace.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2003aug06 +* created by: Markus W. Scherer +* +* Definitions for ICU tracing/logging. +* +*/ + +#ifndef __UTRACE_H__ +#define __UTRACE_H__ + +#include <stdarg.h> +#include "unicode/utypes.h" + +/** + * \file + * \brief C API: Definitions for ICU tracing/logging. + * + * This provides API for debugging the internals of ICU without the use of + * a traditional debugger. + * + * By default, tracing is disabled in ICU. If you need to debug ICU with + * tracing, please compile ICU with the --enable-tracing configure option. + */ + +U_CDECL_BEGIN + +/** + * Trace severity levels. Higher levels increase the verbosity of the trace output. + * @see utrace_setLevel + * @stable ICU 2.8 + */ +typedef enum UTraceLevel { + /** Disable all tracing @stable ICU 2.8*/ + UTRACE_OFF=-1, + /** Trace error conditions only @stable ICU 2.8*/ + UTRACE_ERROR=0, + /** Trace errors and warnings @stable ICU 2.8*/ + UTRACE_WARNING=3, + /** Trace opens and closes of ICU services @stable ICU 2.8*/ + UTRACE_OPEN_CLOSE=5, + /** Trace an intermediate number of ICU operations @stable ICU 2.8*/ + UTRACE_INFO=7, + /** Trace the maximum number of ICU operations @stable ICU 2.8*/ + UTRACE_VERBOSE=9 +} UTraceLevel; + +/** + * These are the ICU functions that will be traced when tracing is enabled. + * @stable ICU 2.8 + */ +typedef enum UTraceFunctionNumber { + UTRACE_FUNCTION_START=0, + UTRACE_U_INIT=UTRACE_FUNCTION_START, + UTRACE_U_CLEANUP, + +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal collation trace location. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + UTRACE_FUNCTION_LIMIT, +#endif // U_HIDE_DEPRECATED_API + + UTRACE_CONVERSION_START=0x1000, + UTRACE_UCNV_OPEN=UTRACE_CONVERSION_START, + UTRACE_UCNV_OPEN_PACKAGE, + UTRACE_UCNV_OPEN_ALGORITHMIC, + UTRACE_UCNV_CLONE, + UTRACE_UCNV_CLOSE, + UTRACE_UCNV_FLUSH_CACHE, + UTRACE_UCNV_LOAD, + UTRACE_UCNV_UNLOAD, + +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal collation trace location. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + UTRACE_CONVERSION_LIMIT, +#endif // U_HIDE_DEPRECATED_API + + UTRACE_COLLATION_START=0x2000, + UTRACE_UCOL_OPEN=UTRACE_COLLATION_START, + UTRACE_UCOL_CLOSE, + UTRACE_UCOL_STRCOLL, + UTRACE_UCOL_GET_SORTKEY, + UTRACE_UCOL_GETLOCALE, + UTRACE_UCOL_NEXTSORTKEYPART, + UTRACE_UCOL_STRCOLLITER, + UTRACE_UCOL_OPEN_FROM_SHORT_STRING, + UTRACE_UCOL_STRCOLLUTF8, /**< @stable ICU 50 */ + +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal collation trace location. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + UTRACE_COLLATION_LIMIT, +#endif // U_HIDE_DEPRECATED_API + + /** + * The lowest resource/data location. + * @stable ICU 65 + */ + UTRACE_UDATA_START=0x3000, + + /** + * Indicates that a value was read from a resource bundle. Provides three + * C-style strings to UTraceData: type, file name, and resource path. The + * possible types are: + * + * - "string" (a string value was accessed) + * - "binary" (a binary value was accessed) + * - "intvector" (a integer vector value was accessed) + * - "int" (a signed integer value was accessed) + * - "uint" (a unsigned integer value was accessed) + * - "get" (a path was loaded, but the value was not accessed) + * - "getalias" (a path was loaded, and an alias was resolved) + * + * @stable ICU 65 + */ + UTRACE_UDATA_RESOURCE=UTRACE_UDATA_START, + + /** + * Indicates that a resource bundle was opened. + * + * Provides one C-style string to UTraceData: file name. + * @stable ICU 65 + */ + UTRACE_UDATA_BUNDLE, + + /** + * Indicates that a data file was opened, but not *.res files. + * + * Provides one C-style string to UTraceData: file name. + * + * @stable ICU 65 + */ + UTRACE_UDATA_DATA_FILE, + + /** + * Indicates that a *.res file was opened. + * + * This differs from UTRACE_UDATA_BUNDLE because a res file is typically + * opened only once per application runtime, but the bundle corresponding + * to that res file may be opened many times. + * + * Provides one C-style string to UTraceData: file name. + * + * @stable ICU 65 + */ + UTRACE_UDATA_RES_FILE, + +#ifndef U_HIDE_INTERNAL_API + /** + * One more than the highest normal resource/data trace location. + * @internal The numeric value may change over time, see ICU ticket #12420. + */ + UTRACE_RES_DATA_LIMIT, +#endif // U_HIDE_INTERNAL_API + +#ifndef U_HIDE_DRAFT_API + /** + * The lowest break iterator location. + * @draft ICU 67 + */ + UTRACE_UBRK_START=0x4000, + + /** + * Indicates that a character instance of break iterator was created. + * + * @draft ICU 67 + */ + UTRACE_UBRK_CREATE_CHARACTER = UTRACE_UBRK_START, + + /** + * Indicates that a word instance of break iterator was created. + * + * @draft ICU 67 + */ + UTRACE_UBRK_CREATE_WORD, + + /** + * Indicates that a line instance of break iterator was created. + * + * Provides one C-style string to UTraceData: the lb value ("", + * "loose", "strict", or "normal"). + * + * @draft ICU 67 + */ + UTRACE_UBRK_CREATE_LINE, + + /** + * Indicates that a sentence instance of break iterator was created. + * + * @draft ICU 67 + */ + UTRACE_UBRK_CREATE_SENTENCE, + + /** + * Indicates that a title instance of break iterator was created. + * + * @draft ICU 67 + */ + UTRACE_UBRK_CREATE_TITLE, + + /** + * Indicates that an internal dictionary break engine was created. + * + * Provides one C-style string to UTraceData: the script code of what + * the break engine cover ("Hani", "Khmr", "Laoo", "Mymr", or "Thai"). + * + * @draft ICU 67 + */ + UTRACE_UBRK_CREATE_BREAK_ENGINE, + +#endif // U_HIDE_DRAFT_API + +#ifndef U_HIDE_INTERNAL_API + /** + * One more than the highest normal break iterator trace location. + * @internal The numeric value may change over time, see ICU ticket #12420. + */ + UTRACE_UBRK_LIMIT, +#endif // U_HIDE_INTERNAL_API + +} UTraceFunctionNumber; + +/** + * Setter for the trace level. + * @param traceLevel A UTraceLevel value. + * @stable ICU 2.8 + */ +U_CAPI void U_EXPORT2 +utrace_setLevel(int32_t traceLevel); + +/** + * Getter for the trace level. + * @return The UTraceLevel value being used by ICU. + * @stable ICU 2.8 + */ +U_CAPI int32_t U_EXPORT2 +utrace_getLevel(void); + +/* Trace function pointers types ----------------------------- */ + +/** + * Type signature for the trace function to be called when entering a function. + * @param context value supplied at the time the trace functions are set. + * @param fnNumber Enum value indicating the ICU function being entered. + * @stable ICU 2.8 + */ +typedef void U_CALLCONV +UTraceEntry(const void *context, int32_t fnNumber); + +/** + * Type signature for the trace function to be called when exiting from a function. + * @param context value supplied at the time the trace functions are set. + * @param fnNumber Enum value indicating the ICU function being exited. + * @param fmt A formatting string that describes the number and types + * of arguments included with the variable args. The fmt + * string has the same form as the utrace_vformat format + * string. + * @param args A variable arguments list. Contents are described by + * the fmt parameter. + * @see utrace_vformat + * @stable ICU 2.8 + */ +typedef void U_CALLCONV +UTraceExit(const void *context, int32_t fnNumber, + const char *fmt, va_list args); + +/** + * Type signature for the trace function to be called from within an ICU function + * to display data or messages. + * @param context value supplied at the time the trace functions are set. + * @param fnNumber Enum value indicating the ICU function being exited. + * @param level The current tracing level + * @param fmt A format string describing the tracing data that is supplied + * as variable args + * @param args The data being traced, passed as variable args. + * @stable ICU 2.8 + */ +typedef void U_CALLCONV +UTraceData(const void *context, int32_t fnNumber, int32_t level, + const char *fmt, va_list args); + +/** + * Set ICU Tracing functions. Installs application-provided tracing + * functions into ICU. After doing this, subsequent ICU operations + * will call back to the installed functions, providing a trace + * of the use of ICU. Passing a NULL pointer for a tracing function + * is allowed, and inhibits tracing action at points where that function + * would be called. + * <p> + * Tracing and Threads: Tracing functions are global to a process, and + * will be called in response to ICU operations performed by any + * thread. If tracing of an individual thread is desired, the + * tracing functions must themselves filter by checking that the + * current thread is the desired thread. + * + * @param context an uninterpreted pointer. Whatever is passed in + * here will in turn be passed to each of the tracing + * functions UTraceEntry, UTraceExit and UTraceData. + * ICU does not use or alter this pointer. + * @param e Callback function to be called on entry to a + * a traced ICU function. + * @param x Callback function to be called on exit from a + * traced ICU function. + * @param d Callback function to be called from within a + * traced ICU function, for the purpose of providing + * data to the trace. + * + * @stable ICU 2.8 + */ +U_CAPI void U_EXPORT2 +utrace_setFunctions(const void *context, + UTraceEntry *e, UTraceExit *x, UTraceData *d); + +/** + * Get the currently installed ICU tracing functions. Note that a null function + * pointer will be returned if no trace function has been set. + * + * @param context The currently installed tracing context. + * @param e The currently installed UTraceEntry function. + * @param x The currently installed UTraceExit function. + * @param d The currently installed UTraceData function. + * @stable ICU 2.8 + */ +U_CAPI void U_EXPORT2 +utrace_getFunctions(const void **context, + UTraceEntry **e, UTraceExit **x, UTraceData **d); + + + +/* + * + * ICU trace format string syntax + * + * Format Strings are passed to UTraceData functions, and define the + * number and types of the trace data being passed on each call. + * + * The UTraceData function, which is supplied by the application, + * not by ICU, can either forward the trace data (passed via + * varargs) and the format string back to ICU for formatting into + * a displayable string, or it can interpret the format itself, + * and do as it wishes with the trace data. + * + * + * Goals for the format string + * - basic data output + * - easy to use for trace programmer + * - sufficient provision for data types for trace output readability + * - well-defined types and binary portable APIs + * + * Non-goals + * - printf compatibility + * - fancy formatting + * - argument reordering and other internationalization features + * + * ICU trace format strings contain plain text with argument inserts, + * much like standard printf format strings. + * Each insert begins with a '%', then optionally contains a 'v', + * then exactly one type character. + * Two '%' in a row represent a '%' instead of an insert. + * The trace format strings need not have \n at the end. + * + * + * Types + * ----- + * + * Type characters: + * - c A char character in the default codepage. + * - s A NUL-terminated char * string in the default codepage. + * - S A UChar * string. Requires two params, (ptr, length). Length=-1 for nul term. + * - b A byte (8-bit integer). + * - h A 16-bit integer. Also a 16 bit Unicode code unit. + * - d A 32-bit integer. Also a 20 bit Unicode code point value. + * - l A 64-bit integer. + * - p A data pointer. + * + * Vectors + * ------- + * + * If the 'v' is not specified, then one item of the specified type + * is passed in. + * If the 'v' (for "vector") is specified, then a vector of items of the + * specified type is passed in, via a pointer to the first item + * and an int32_t value for the length of the vector. + * Length==-1 means zero or NUL termination. Works for vectors of all types. + * + * Note: %vS is a vector of (UChar *) strings. The strings must + * be nul terminated as there is no way to provide a + * separate length parameter for each string. The length + * parameter (required for all vectors) is the number of + * strings, not the length of the strings. + * + * Examples + * -------- + * + * These examples show the parameters that will be passed to an application's + * UTraceData() function for various formats. + * + * - the precise formatting is up to the application! + * - the examples use type casts for arguments only to _show_ the types of + * arguments without needing variable declarations in the examples; + * the type casts will not be necessary in actual code + * + * UTraceDataFunc(context, fnNumber, level, + * "There is a character %c in the string %s.", // Format String + * (char)c, (const char *)s); // varargs parameters + * -> There is a character 0x42 'B' in the string "Bravo". + * + * UTraceDataFunc(context, fnNumber, level, + * "Vector of bytes %vb vector of chars %vc", + * (const uint8_t *)bytes, (int32_t)bytesLength, + * (const char *)chars, (int32_t)charsLength); + * -> Vector of bytes + * 42 63 64 3f [4] + * vector of chars + * "Bcd?"[4] + * + * UTraceDataFunc(context, fnNumber, level, + * "An int32_t %d and a whole bunch of them %vd", + * (int32_t)-5, (const int32_t *)ints, (int32_t)intsLength); + * -> An int32_t 0xfffffffb and a whole bunch of them + * fffffffb 00000005 0000010a [3] + * + */ + + + +/** + * Trace output Formatter. An application's UTraceData tracing functions may call + * back to this function to format the trace output in a + * human readable form. Note that a UTraceData function may choose + * to not format the data; it could, for example, save it in + * in the raw form it was received (more compact), leaving + * formatting for a later trace analysis tool. + * @param outBuf pointer to a buffer to receive the formatted output. Output + * will be nul terminated if there is space in the buffer - + * if the length of the requested output < the output buffer size. + * @param capacity Length of the output buffer. + * @param indent Number of spaces to indent the output. Intended to allow + * data displayed from nested functions to be indented for readability. + * @param fmt Format specification for the data to output + * @param args Data to be formatted. + * @return Length of formatted output, including the terminating NUL. + * If buffer capacity is insufficient, the required capacity is returned. + * @stable ICU 2.8 + */ +U_CAPI int32_t U_EXPORT2 +utrace_vformat(char *outBuf, int32_t capacity, + int32_t indent, const char *fmt, va_list args); + +/** + * Trace output Formatter. An application's UTraceData tracing functions may call + * this function to format any additional trace data, beyond that + * provided by default, in human readable form with the same + * formatting conventions used by utrace_vformat(). + * @param outBuf pointer to a buffer to receive the formatted output. Output + * will be nul terminated if there is space in the buffer - + * if the length of the requested output < the output buffer size. + * @param capacity Length of the output buffer. + * @param indent Number of spaces to indent the output. Intended to allow + * data displayed from nested functions to be indented for readability. + * @param fmt Format specification for the data to output + * @param ... Data to be formatted. + * @return Length of formatted output, including the terminating NUL. + * If buffer capacity is insufficient, the required capacity is returned. + * @stable ICU 2.8 + */ +U_CAPI int32_t U_EXPORT2 +utrace_format(char *outBuf, int32_t capacity, + int32_t indent, const char *fmt, ...); + + + +/* Trace function numbers --------------------------------------------------- */ + +/** + * Get the name of a function from its trace function number. + * + * @param fnNumber The trace number for an ICU function. + * @return The name string for the function. + * + * @see UTraceFunctionNumber + * @stable ICU 2.8 + */ +U_CAPI const char * U_EXPORT2 +utrace_functionName(int32_t fnNumber); + +U_CDECL_END + +#endif diff --git a/thirdparty/icu4c/common/unicode/utypes.h b/thirdparty/icu4c/common/unicode/utypes.h new file mode 100644 index 0000000000..7c4ea7ac28 --- /dev/null +++ b/thirdparty/icu4c/common/unicode/utypes.h @@ -0,0 +1,732 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 1996-2016, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* +* FILE NAME : UTYPES.H (formerly ptypes.h) +* +* Date Name Description +* 12/11/96 helena Creation. +* 02/27/97 aliu Added typedefs for UClassID, int8, int16, int32, +* uint8, uint16, and uint32. +* 04/01/97 aliu Added XP_CPLUSPLUS and modified to work under C as +* well as C++. +* Modified to use memcpy() for uprv_arrayCopy() fns. +* 04/14/97 aliu Added TPlatformUtilities. +* 05/07/97 aliu Added import/export specifiers (replacing the old +* broken EXT_CLASS). Added version number for our +* code. Cleaned up header. +* 6/20/97 helena Java class name change. +* 08/11/98 stephen UErrorCode changed from typedef to enum +* 08/12/98 erm Changed T_ANALYTIC_PACKAGE_VERSION to 3 +* 08/14/98 stephen Added uprv_arrayCopy() for int8_t, int16_t, int32_t +* 12/09/98 jfitz Added BUFFER_OVERFLOW_ERROR (bug 1100066) +* 04/20/99 stephen Cleaned up & reworked for autoconf. +* Renamed to utypes.h. +* 05/05/99 stephen Changed to use <inttypes.h> +* 12/07/99 helena Moved copyright notice string from ucnv_bld.h here. +******************************************************************************* +*/ + +#ifndef UTYPES_H +#define UTYPES_H + + +#include "unicode/umachine.h" +#include "unicode/uversion.h" +#include "unicode/uconfig.h" +#include <float.h> + +#if !U_NO_DEFAULT_INCLUDE_UTF_HEADERS +# include "unicode/utf.h" +#endif + +/*! + * \file + * \brief Basic definitions for ICU, for both C and C++ APIs + * + * This file defines basic types, constants, and enumerations directly or + * indirectly by including other header files, especially utf.h for the + * basic character and string definitions and umachine.h for consistent + * integer and other types. + */ + + +/** + * \def U_SHOW_CPLUSPLUS_API + * @internal + */ +#ifdef __cplusplus +# ifndef U_SHOW_CPLUSPLUS_API +# define U_SHOW_CPLUSPLUS_API 1 +# endif +#else +# undef U_SHOW_CPLUSPLUS_API +# define U_SHOW_CPLUSPLUS_API 0 +#endif + +/** @{ API visibility control */ + +/** + * \def U_HIDE_DRAFT_API + * Define this to 1 to request that draft API be "hidden" + * @internal + */ +/** + * \def U_HIDE_INTERNAL_API + * Define this to 1 to request that internal API be "hidden" + * @internal + */ +#if !U_DEFAULT_SHOW_DRAFT && !defined(U_SHOW_DRAFT_API) +#define U_HIDE_DRAFT_API 1 +#endif +#if !U_DEFAULT_SHOW_DRAFT && !defined(U_SHOW_INTERNAL_API) +#define U_HIDE_INTERNAL_API 1 +#endif + +/** @} */ + +/*===========================================================================*/ +/* ICUDATA naming scheme */ +/*===========================================================================*/ + +/** + * \def U_ICUDATA_TYPE_LETTER + * + * This is a platform-dependent string containing one letter: + * - b for big-endian, ASCII-family platforms + * - l for little-endian, ASCII-family platforms + * - e for big-endian, EBCDIC-family platforms + * This letter is part of the common data file name. + * @stable ICU 2.0 + */ + +/** + * \def U_ICUDATA_TYPE_LITLETTER + * The non-string form of U_ICUDATA_TYPE_LETTER + * @stable ICU 2.0 + */ +#if U_CHARSET_FAMILY +# if U_IS_BIG_ENDIAN + /* EBCDIC - should always be BE */ +# define U_ICUDATA_TYPE_LETTER "e" +# define U_ICUDATA_TYPE_LITLETTER e +# else +# error "Don't know what to do with little endian EBCDIC!" +# define U_ICUDATA_TYPE_LETTER "x" +# define U_ICUDATA_TYPE_LITLETTER x +# endif +#else +# if U_IS_BIG_ENDIAN + /* Big-endian ASCII */ +# define U_ICUDATA_TYPE_LETTER "b" +# define U_ICUDATA_TYPE_LITLETTER b +# else + /* Little-endian ASCII */ +# define U_ICUDATA_TYPE_LETTER "l" +# define U_ICUDATA_TYPE_LITLETTER l +# endif +#endif + +/** + * A single string literal containing the icudata stub name. i.e. 'icudt18e' for + * ICU 1.8.x on EBCDIC, etc.. + * @stable ICU 2.0 + */ +#define U_ICUDATA_NAME "icudt" U_ICU_VERSION_SHORT U_ICUDATA_TYPE_LETTER +#ifndef U_HIDE_INTERNAL_API +#define U_USRDATA_NAME "usrdt" U_ICU_VERSION_SHORT U_ICUDATA_TYPE_LETTER /**< @internal */ +#define U_USE_USRDATA 0 /**< @internal */ +#endif /* U_HIDE_INTERNAL_API */ + +/** + * U_ICU_ENTRY_POINT is the name of the DLL entry point to the ICU data library. + * Defined as a literal, not a string. + * Tricky Preprocessor use - ## operator replaces macro parameters with the literal string + * from the corresponding macro invocation, _before_ other macro substitutions. + * Need a nested \#defines to get the actual version numbers rather than + * the literal text U_ICU_VERSION_MAJOR_NUM into the name. + * The net result will be something of the form + * \#define U_ICU_ENTRY_POINT icudt19_dat + * @stable ICU 2.4 + */ +#define U_ICUDATA_ENTRY_POINT U_DEF2_ICUDATA_ENTRY_POINT(U_ICU_VERSION_MAJOR_NUM,U_LIB_SUFFIX_C_NAME) + +#ifndef U_HIDE_INTERNAL_API +/** + * Do not use. Note that it's OK for the 2nd argument to be undefined (literal). + * @internal + */ +#define U_DEF2_ICUDATA_ENTRY_POINT(major,suff) U_DEF_ICUDATA_ENTRY_POINT(major,suff) + +/** + * Do not use. + * @internal + */ +#ifndef U_DEF_ICUDATA_ENTRY_POINT +/* affected by symbol renaming. See platform.h */ +#ifndef U_LIB_SUFFIX_C_NAME +#define U_DEF_ICUDATA_ENTRY_POINT(major, suff) icudt##major##_dat +#else +#define U_DEF_ICUDATA_ENTRY_POINT(major, suff) icudt##suff ## major##_dat +#endif +#endif +#endif /* U_HIDE_INTERNAL_API */ + +/** + * \def NULL + * Define NULL if necessary, to nullptr for C++ and to ((void *)0) for C. + * @stable ICU 2.0 + */ +#ifndef NULL +#ifdef __cplusplus +#define NULL nullptr +#else +#define NULL ((void *)0) +#endif +#endif + +/*===========================================================================*/ +/* Calendar/TimeZone data types */ +/*===========================================================================*/ + +/** + * Date and Time data type. + * This is a primitive data type that holds the date and time + * as the number of milliseconds since 1970-jan-01, 00:00 UTC. + * UTC leap seconds are ignored. + * @stable ICU 2.0 + */ +typedef double UDate; + +/** The number of milliseconds per second @stable ICU 2.0 */ +#define U_MILLIS_PER_SECOND (1000) +/** The number of milliseconds per minute @stable ICU 2.0 */ +#define U_MILLIS_PER_MINUTE (60000) +/** The number of milliseconds per hour @stable ICU 2.0 */ +#define U_MILLIS_PER_HOUR (3600000) +/** The number of milliseconds per day @stable ICU 2.0 */ +#define U_MILLIS_PER_DAY (86400000) + +/** + * Maximum UDate value + * @stable ICU 4.8 + */ +#define U_DATE_MAX DBL_MAX + +/** + * Minimum UDate value + * @stable ICU 4.8 + */ +#define U_DATE_MIN -U_DATE_MAX + +/*===========================================================================*/ +/* Shared library/DLL import-export API control */ +/*===========================================================================*/ + +/* + * Control of symbol import/export. + * ICU is separated into three libraries. + */ + +/** + * \def U_COMBINED_IMPLEMENTATION + * Set to export library symbols from inside the ICU library + * when all of ICU is in a single library. + * This can be set as a compiler option while building ICU, and it + * needs to be the first one tested to override U_COMMON_API, U_I18N_API, etc. + * @stable ICU 2.0 + */ + +/** + * \def U_DATA_API + * Set to export library symbols from inside the stubdata library, + * and to import them from outside. + * @stable ICU 3.0 + */ + +/** + * \def U_COMMON_API + * Set to export library symbols from inside the common library, + * and to import them from outside. + * @stable ICU 2.0 + */ + +/** + * \def U_I18N_API + * Set to export library symbols from inside the i18n library, + * and to import them from outside. + * @stable ICU 2.0 + */ + +/** + * \def U_LAYOUT_API + * Set to export library symbols from inside the layout engine library, + * and to import them from outside. + * @stable ICU 2.0 + */ + +/** + * \def U_LAYOUTEX_API + * Set to export library symbols from inside the layout extensions library, + * and to import them from outside. + * @stable ICU 2.6 + */ + +/** + * \def U_IO_API + * Set to export library symbols from inside the ustdio library, + * and to import them from outside. + * @stable ICU 2.0 + */ + +/** + * \def U_TOOLUTIL_API + * Set to export library symbols from inside the toolutil library, + * and to import them from outside. + * @stable ICU 3.4 + */ + +#ifdef U_IN_DOXYGEN +// This definition is required when generating the API docs. +#define U_COMBINED_IMPLEMENTATION 1 +#endif + +#if defined(U_COMBINED_IMPLEMENTATION) +#define U_DATA_API U_EXPORT +#define U_COMMON_API U_EXPORT +#define U_I18N_API U_EXPORT +#define U_LAYOUT_API U_EXPORT +#define U_LAYOUTEX_API U_EXPORT +#define U_IO_API U_EXPORT +#define U_TOOLUTIL_API U_EXPORT +#elif defined(U_STATIC_IMPLEMENTATION) +#define U_DATA_API +#define U_COMMON_API +#define U_I18N_API +#define U_LAYOUT_API +#define U_LAYOUTEX_API +#define U_IO_API +#define U_TOOLUTIL_API +#elif defined(U_COMMON_IMPLEMENTATION) +#define U_DATA_API U_IMPORT +#define U_COMMON_API U_EXPORT +#define U_I18N_API U_IMPORT +#define U_LAYOUT_API U_IMPORT +#define U_LAYOUTEX_API U_IMPORT +#define U_IO_API U_IMPORT +#define U_TOOLUTIL_API U_IMPORT +#elif defined(U_I18N_IMPLEMENTATION) +#define U_DATA_API U_IMPORT +#define U_COMMON_API U_IMPORT +#define U_I18N_API U_EXPORT +#define U_LAYOUT_API U_IMPORT +#define U_LAYOUTEX_API U_IMPORT +#define U_IO_API U_IMPORT +#define U_TOOLUTIL_API U_IMPORT +#elif defined(U_LAYOUT_IMPLEMENTATION) +#define U_DATA_API U_IMPORT +#define U_COMMON_API U_IMPORT +#define U_I18N_API U_IMPORT +#define U_LAYOUT_API U_EXPORT +#define U_LAYOUTEX_API U_IMPORT +#define U_IO_API U_IMPORT +#define U_TOOLUTIL_API U_IMPORT +#elif defined(U_LAYOUTEX_IMPLEMENTATION) +#define U_DATA_API U_IMPORT +#define U_COMMON_API U_IMPORT +#define U_I18N_API U_IMPORT +#define U_LAYOUT_API U_IMPORT +#define U_LAYOUTEX_API U_EXPORT +#define U_IO_API U_IMPORT +#define U_TOOLUTIL_API U_IMPORT +#elif defined(U_IO_IMPLEMENTATION) +#define U_DATA_API U_IMPORT +#define U_COMMON_API U_IMPORT +#define U_I18N_API U_IMPORT +#define U_LAYOUT_API U_IMPORT +#define U_LAYOUTEX_API U_IMPORT +#define U_IO_API U_EXPORT +#define U_TOOLUTIL_API U_IMPORT +#elif defined(U_TOOLUTIL_IMPLEMENTATION) +#define U_DATA_API U_IMPORT +#define U_COMMON_API U_IMPORT +#define U_I18N_API U_IMPORT +#define U_LAYOUT_API U_IMPORT +#define U_LAYOUTEX_API U_IMPORT +#define U_IO_API U_IMPORT +#define U_TOOLUTIL_API U_EXPORT +#else +#define U_DATA_API U_IMPORT +#define U_COMMON_API U_IMPORT +#define U_I18N_API U_IMPORT +#define U_LAYOUT_API U_IMPORT +#define U_LAYOUTEX_API U_IMPORT +#define U_IO_API U_IMPORT +#define U_TOOLUTIL_API U_IMPORT +#endif + +/** + * \def U_STANDARD_CPP_NAMESPACE + * Control of C++ Namespace + * @stable ICU 2.0 + */ +#ifdef __cplusplus +#define U_STANDARD_CPP_NAMESPACE :: +#else +#define U_STANDARD_CPP_NAMESPACE +#endif + +/*===========================================================================*/ +/* UErrorCode */ +/*===========================================================================*/ + +/** + * Standard ICU4C error code type, a substitute for exceptions. + * + * Initialize the UErrorCode with U_ZERO_ERROR, and check for success or + * failure using U_SUCCESS() or U_FAILURE(): + * + * UErrorCode errorCode = U_ZERO_ERROR; + * // call ICU API that needs an error code parameter. + * if (U_FAILURE(errorCode)) { + * // An error occurred. Handle it here. + * } + * + * C++ code should use icu::ErrorCode, available in unicode/errorcode.h, or a + * suitable subclass. + * + * For more information, see: + * http://icu-project.org/userguide/conventions + * + * Note: By convention, ICU functions that take a reference (C++) or a pointer + * (C) to a UErrorCode first test: + * + * if (U_FAILURE(errorCode)) { return immediately; } + * + * so that in a chain of such functions the first one that sets an error code + * causes the following ones to not perform any operations. + * + * @stable ICU 2.0 + */ +typedef enum UErrorCode { + /* The ordering of U_ERROR_INFO_START Vs U_USING_FALLBACK_WARNING looks weird + * and is that way because VC++ debugger displays first encountered constant, + * which is not the what the code is used for + */ + + U_USING_FALLBACK_WARNING = -128, /**< A resource bundle lookup returned a fallback result (not an error) */ + + U_ERROR_WARNING_START = -128, /**< Start of information results (semantically successful) */ + + U_USING_DEFAULT_WARNING = -127, /**< A resource bundle lookup returned a result from the root locale (not an error) */ + + U_SAFECLONE_ALLOCATED_WARNING = -126, /**< A SafeClone operation required allocating memory (informational only) */ + + U_STATE_OLD_WARNING = -125, /**< ICU has to use compatibility layer to construct the service. Expect performance/memory usage degradation. Consider upgrading */ + + U_STRING_NOT_TERMINATED_WARNING = -124,/**< An output string could not be NUL-terminated because output length==destCapacity. */ + + U_SORT_KEY_TOO_SHORT_WARNING = -123, /**< Number of levels requested in getBound is higher than the number of levels in the sort key */ + + U_AMBIGUOUS_ALIAS_WARNING = -122, /**< This converter alias can go to different converter implementations */ + + U_DIFFERENT_UCA_VERSION = -121, /**< ucol_open encountered a mismatch between UCA version and collator image version, so the collator was constructed from rules. No impact to further function */ + + U_PLUGIN_CHANGED_LEVEL_WARNING = -120, /**< A plugin caused a level change. May not be an error, but later plugins may not load. */ + +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal UErrorCode warning value. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + U_ERROR_WARNING_LIMIT, +#endif // U_HIDE_DEPRECATED_API + + U_ZERO_ERROR = 0, /**< No error, no warning. */ + + U_ILLEGAL_ARGUMENT_ERROR = 1, /**< Start of codes indicating failure */ + U_MISSING_RESOURCE_ERROR = 2, /**< The requested resource cannot be found */ + U_INVALID_FORMAT_ERROR = 3, /**< Data format is not what is expected */ + U_FILE_ACCESS_ERROR = 4, /**< The requested file cannot be found */ + U_INTERNAL_PROGRAM_ERROR = 5, /**< Indicates a bug in the library code */ + U_MESSAGE_PARSE_ERROR = 6, /**< Unable to parse a message (message format) */ + U_MEMORY_ALLOCATION_ERROR = 7, /**< Memory allocation error */ + U_INDEX_OUTOFBOUNDS_ERROR = 8, /**< Trying to access the index that is out of bounds */ + U_PARSE_ERROR = 9, /**< Equivalent to Java ParseException */ + U_INVALID_CHAR_FOUND = 10, /**< Character conversion: Unmappable input sequence. In other APIs: Invalid character. */ + U_TRUNCATED_CHAR_FOUND = 11, /**< Character conversion: Incomplete input sequence. */ + U_ILLEGAL_CHAR_FOUND = 12, /**< Character conversion: Illegal input sequence/combination of input units. */ + U_INVALID_TABLE_FORMAT = 13, /**< Conversion table file found, but corrupted */ + U_INVALID_TABLE_FILE = 14, /**< Conversion table file not found */ + U_BUFFER_OVERFLOW_ERROR = 15, /**< A result would not fit in the supplied buffer */ + U_UNSUPPORTED_ERROR = 16, /**< Requested operation not supported in current context */ + U_RESOURCE_TYPE_MISMATCH = 17, /**< an operation is requested over a resource that does not support it */ + U_ILLEGAL_ESCAPE_SEQUENCE = 18, /**< ISO-2022 illegal escape sequence */ + U_UNSUPPORTED_ESCAPE_SEQUENCE = 19, /**< ISO-2022 unsupported escape sequence */ + U_NO_SPACE_AVAILABLE = 20, /**< No space available for in-buffer expansion for Arabic shaping */ + U_CE_NOT_FOUND_ERROR = 21, /**< Currently used only while setting variable top, but can be used generally */ + U_PRIMARY_TOO_LONG_ERROR = 22, /**< User tried to set variable top to a primary that is longer than two bytes */ + U_STATE_TOO_OLD_ERROR = 23, /**< ICU cannot construct a service from this state, as it is no longer supported */ + U_TOO_MANY_ALIASES_ERROR = 24, /**< There are too many aliases in the path to the requested resource. + It is very possible that a circular alias definition has occurred */ + U_ENUM_OUT_OF_SYNC_ERROR = 25, /**< UEnumeration out of sync with underlying collection */ + U_INVARIANT_CONVERSION_ERROR = 26, /**< Unable to convert a UChar* string to char* with the invariant converter. */ + U_INVALID_STATE_ERROR = 27, /**< Requested operation can not be completed with ICU in its current state */ + U_COLLATOR_VERSION_MISMATCH = 28, /**< Collator version is not compatible with the base version */ + U_USELESS_COLLATOR_ERROR = 29, /**< Collator is options only and no base is specified */ + U_NO_WRITE_PERMISSION = 30, /**< Attempt to modify read-only or constant data. */ +#ifndef U_HIDE_DRAFT_API + /** + * The input is impractically long for an operation. + * It is rejected because it may lead to problems such as excessive + * processing time, stack depth, or heap memory requirements. + * + * @draft ICU 68 + */ + U_INPUT_TOO_LONG_ERROR = 31, +#endif // U_HIDE_DRAFT_API + +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest standard error code. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + U_STANDARD_ERROR_LIMIT = 32, +#endif // U_HIDE_DEPRECATED_API + + /* + * Error codes in the range 0x10000 0x10100 are reserved for Transliterator. + */ + U_BAD_VARIABLE_DEFINITION=0x10000,/**< Missing '$' or duplicate variable name */ + U_PARSE_ERROR_START = 0x10000, /**< Start of Transliterator errors */ + U_MALFORMED_RULE, /**< Elements of a rule are misplaced */ + U_MALFORMED_SET, /**< A UnicodeSet pattern is invalid*/ + U_MALFORMED_SYMBOL_REFERENCE, /**< UNUSED as of ICU 2.4 */ + U_MALFORMED_UNICODE_ESCAPE, /**< A Unicode escape pattern is invalid*/ + U_MALFORMED_VARIABLE_DEFINITION, /**< A variable definition is invalid */ + U_MALFORMED_VARIABLE_REFERENCE, /**< A variable reference is invalid */ + U_MISMATCHED_SEGMENT_DELIMITERS, /**< UNUSED as of ICU 2.4 */ + U_MISPLACED_ANCHOR_START, /**< A start anchor appears at an illegal position */ + U_MISPLACED_CURSOR_OFFSET, /**< A cursor offset occurs at an illegal position */ + U_MISPLACED_QUANTIFIER, /**< A quantifier appears after a segment close delimiter */ + U_MISSING_OPERATOR, /**< A rule contains no operator */ + U_MISSING_SEGMENT_CLOSE, /**< UNUSED as of ICU 2.4 */ + U_MULTIPLE_ANTE_CONTEXTS, /**< More than one ante context */ + U_MULTIPLE_CURSORS, /**< More than one cursor */ + U_MULTIPLE_POST_CONTEXTS, /**< More than one post context */ + U_TRAILING_BACKSLASH, /**< A dangling backslash */ + U_UNDEFINED_SEGMENT_REFERENCE, /**< A segment reference does not correspond to a defined segment */ + U_UNDEFINED_VARIABLE, /**< A variable reference does not correspond to a defined variable */ + U_UNQUOTED_SPECIAL, /**< A special character was not quoted or escaped */ + U_UNTERMINATED_QUOTE, /**< A closing single quote is missing */ + U_RULE_MASK_ERROR, /**< A rule is hidden by an earlier more general rule */ + U_MISPLACED_COMPOUND_FILTER, /**< A compound filter is in an invalid location */ + U_MULTIPLE_COMPOUND_FILTERS, /**< More than one compound filter */ + U_INVALID_RBT_SYNTAX, /**< A "::id" rule was passed to the RuleBasedTransliterator parser */ + U_INVALID_PROPERTY_PATTERN, /**< UNUSED as of ICU 2.4 */ + U_MALFORMED_PRAGMA, /**< A 'use' pragma is invalid */ + U_UNCLOSED_SEGMENT, /**< A closing ')' is missing */ + U_ILLEGAL_CHAR_IN_SEGMENT, /**< UNUSED as of ICU 2.4 */ + U_VARIABLE_RANGE_EXHAUSTED, /**< Too many stand-ins generated for the given variable range */ + U_VARIABLE_RANGE_OVERLAP, /**< The variable range overlaps characters used in rules */ + U_ILLEGAL_CHARACTER, /**< A special character is outside its allowed context */ + U_INTERNAL_TRANSLITERATOR_ERROR, /**< Internal transliterator system error */ + U_INVALID_ID, /**< A "::id" rule specifies an unknown transliterator */ + U_INVALID_FUNCTION, /**< A "&fn()" rule specifies an unknown transliterator */ +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal Transliterator error code. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + U_PARSE_ERROR_LIMIT, +#endif // U_HIDE_DEPRECATED_API + + /* + * Error codes in the range 0x10100 0x10200 are reserved for the formatting API. + */ + U_UNEXPECTED_TOKEN=0x10100, /**< Syntax error in format pattern */ + U_FMT_PARSE_ERROR_START=0x10100, /**< Start of format library errors */ + U_MULTIPLE_DECIMAL_SEPARATORS, /**< More than one decimal separator in number pattern */ + U_MULTIPLE_DECIMAL_SEPERATORS = U_MULTIPLE_DECIMAL_SEPARATORS, /**< Typo: kept for backward compatibility. Use U_MULTIPLE_DECIMAL_SEPARATORS */ + U_MULTIPLE_EXPONENTIAL_SYMBOLS, /**< More than one exponent symbol in number pattern */ + U_MALFORMED_EXPONENTIAL_PATTERN, /**< Grouping symbol in exponent pattern */ + U_MULTIPLE_PERCENT_SYMBOLS, /**< More than one percent symbol in number pattern */ + U_MULTIPLE_PERMILL_SYMBOLS, /**< More than one permill symbol in number pattern */ + U_MULTIPLE_PAD_SPECIFIERS, /**< More than one pad symbol in number pattern */ + U_PATTERN_SYNTAX_ERROR, /**< Syntax error in format pattern */ + U_ILLEGAL_PAD_POSITION, /**< Pad symbol misplaced in number pattern */ + U_UNMATCHED_BRACES, /**< Braces do not match in message pattern */ + U_UNSUPPORTED_PROPERTY, /**< UNUSED as of ICU 2.4 */ + U_UNSUPPORTED_ATTRIBUTE, /**< UNUSED as of ICU 2.4 */ + U_ARGUMENT_TYPE_MISMATCH, /**< Argument name and argument index mismatch in MessageFormat functions */ + U_DUPLICATE_KEYWORD, /**< Duplicate keyword in PluralFormat */ + U_UNDEFINED_KEYWORD, /**< Undefined Plural keyword */ + U_DEFAULT_KEYWORD_MISSING, /**< Missing DEFAULT rule in plural rules */ + U_DECIMAL_NUMBER_SYNTAX_ERROR, /**< Decimal number syntax error */ + U_FORMAT_INEXACT_ERROR, /**< Cannot format a number exactly and rounding mode is ROUND_UNNECESSARY @stable ICU 4.8 */ + U_NUMBER_ARG_OUTOFBOUNDS_ERROR, /**< The argument to a NumberFormatter helper method was out of bounds; the bounds are usually 0 to 999. @stable ICU 61 */ + U_NUMBER_SKELETON_SYNTAX_ERROR, /**< The number skeleton passed to C++ NumberFormatter or C UNumberFormatter was invalid or contained a syntax error. @stable ICU 62 */ +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal formatting API error code. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + U_FMT_PARSE_ERROR_LIMIT = 0x10114, +#endif // U_HIDE_DEPRECATED_API + + /* + * Error codes in the range 0x10200 0x102ff are reserved for BreakIterator. + */ + U_BRK_INTERNAL_ERROR=0x10200, /**< An internal error (bug) was detected. */ + U_BRK_ERROR_START=0x10200, /**< Start of codes indicating Break Iterator failures */ + U_BRK_HEX_DIGITS_EXPECTED, /**< Hex digits expected as part of a escaped char in a rule. */ + U_BRK_SEMICOLON_EXPECTED, /**< Missing ';' at the end of a RBBI rule. */ + U_BRK_RULE_SYNTAX, /**< Syntax error in RBBI rule. */ + U_BRK_UNCLOSED_SET, /**< UnicodeSet writing an RBBI rule missing a closing ']'. */ + U_BRK_ASSIGN_ERROR, /**< Syntax error in RBBI rule assignment statement. */ + U_BRK_VARIABLE_REDFINITION, /**< RBBI rule $Variable redefined. */ + U_BRK_MISMATCHED_PAREN, /**< Mis-matched parentheses in an RBBI rule. */ + U_BRK_NEW_LINE_IN_QUOTED_STRING, /**< Missing closing quote in an RBBI rule. */ + U_BRK_UNDEFINED_VARIABLE, /**< Use of an undefined $Variable in an RBBI rule. */ + U_BRK_INIT_ERROR, /**< Initialization failure. Probable missing ICU Data. */ + U_BRK_RULE_EMPTY_SET, /**< Rule contains an empty Unicode Set. */ + U_BRK_UNRECOGNIZED_OPTION, /**< !!option in RBBI rules not recognized. */ + U_BRK_MALFORMED_RULE_TAG, /**< The {nnn} tag on a rule is malformed */ +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal BreakIterator error code. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + U_BRK_ERROR_LIMIT, +#endif // U_HIDE_DEPRECATED_API + + /* + * Error codes in the range 0x10300-0x103ff are reserved for regular expression related errors. + */ + U_REGEX_INTERNAL_ERROR=0x10300, /**< An internal error (bug) was detected. */ + U_REGEX_ERROR_START=0x10300, /**< Start of codes indicating Regexp failures */ + U_REGEX_RULE_SYNTAX, /**< Syntax error in regexp pattern. */ + U_REGEX_INVALID_STATE, /**< RegexMatcher in invalid state for requested operation */ + U_REGEX_BAD_ESCAPE_SEQUENCE, /**< Unrecognized backslash escape sequence in pattern */ + U_REGEX_PROPERTY_SYNTAX, /**< Incorrect Unicode property */ + U_REGEX_UNIMPLEMENTED, /**< Use of regexp feature that is not yet implemented. */ + U_REGEX_MISMATCHED_PAREN, /**< Incorrectly nested parentheses in regexp pattern. */ + U_REGEX_NUMBER_TOO_BIG, /**< Decimal number is too large. */ + U_REGEX_BAD_INTERVAL, /**< Error in {min,max} interval */ + U_REGEX_MAX_LT_MIN, /**< In {min,max}, max is less than min. */ + U_REGEX_INVALID_BACK_REF, /**< Back-reference to a non-existent capture group. */ + U_REGEX_INVALID_FLAG, /**< Invalid value for match mode flags. */ + U_REGEX_LOOK_BEHIND_LIMIT, /**< Look-Behind pattern matches must have a bounded maximum length. */ + U_REGEX_SET_CONTAINS_STRING, /**< Regexps cannot have UnicodeSets containing strings.*/ +#ifndef U_HIDE_DEPRECATED_API + U_REGEX_OCTAL_TOO_BIG, /**< Octal character constants must be <= 0377. @deprecated ICU 54. This error cannot occur. */ +#endif /* U_HIDE_DEPRECATED_API */ + U_REGEX_MISSING_CLOSE_BRACKET=U_REGEX_SET_CONTAINS_STRING+2, /**< Missing closing bracket on a bracket expression. */ + U_REGEX_INVALID_RANGE, /**< In a character range [x-y], x is greater than y. */ + U_REGEX_STACK_OVERFLOW, /**< Regular expression backtrack stack overflow. */ + U_REGEX_TIME_OUT, /**< Maximum allowed match time exceeded */ + U_REGEX_STOPPED_BY_CALLER, /**< Matching operation aborted by user callback fn. */ + U_REGEX_PATTERN_TOO_BIG, /**< Pattern exceeds limits on size or complexity. @stable ICU 55 */ + U_REGEX_INVALID_CAPTURE_GROUP_NAME, /**< Invalid capture group name. @stable ICU 55 */ +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal regular expression error code. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + U_REGEX_ERROR_LIMIT=U_REGEX_STOPPED_BY_CALLER+3, +#endif // U_HIDE_DEPRECATED_API + + /* + * Error codes in the range 0x10400-0x104ff are reserved for IDNA related error codes. + */ + U_IDNA_PROHIBITED_ERROR=0x10400, + U_IDNA_ERROR_START=0x10400, + U_IDNA_UNASSIGNED_ERROR, + U_IDNA_CHECK_BIDI_ERROR, + U_IDNA_STD3_ASCII_RULES_ERROR, + U_IDNA_ACE_PREFIX_ERROR, + U_IDNA_VERIFICATION_ERROR, + U_IDNA_LABEL_TOO_LONG_ERROR, + U_IDNA_ZERO_LENGTH_LABEL_ERROR, + U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR, +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal IDNA error code. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + U_IDNA_ERROR_LIMIT, +#endif // U_HIDE_DEPRECATED_API + /* + * Aliases for StringPrep + */ + U_STRINGPREP_PROHIBITED_ERROR = U_IDNA_PROHIBITED_ERROR, + U_STRINGPREP_UNASSIGNED_ERROR = U_IDNA_UNASSIGNED_ERROR, + U_STRINGPREP_CHECK_BIDI_ERROR = U_IDNA_CHECK_BIDI_ERROR, + + /* + * Error codes in the range 0x10500-0x105ff are reserved for Plugin related error codes. + */ + U_PLUGIN_ERROR_START=0x10500, /**< Start of codes indicating plugin failures */ + U_PLUGIN_TOO_HIGH=0x10500, /**< The plugin's level is too high to be loaded right now. */ + U_PLUGIN_DIDNT_SET_LEVEL, /**< The plugin didn't call uplug_setPlugLevel in response to a QUERY */ +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal plug-in error code. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + U_PLUGIN_ERROR_LIMIT, +#endif // U_HIDE_DEPRECATED_API + +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal error code. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + U_ERROR_LIMIT=U_PLUGIN_ERROR_LIMIT +#endif // U_HIDE_DEPRECATED_API +} UErrorCode; + +/* Use the following to determine if an UErrorCode represents */ +/* operational success or failure. */ + +#ifdef __cplusplus + /** + * Does the error code indicate success? + * @stable ICU 2.0 + */ + static + inline UBool U_SUCCESS(UErrorCode code) { return (UBool)(code<=U_ZERO_ERROR); } + /** + * Does the error code indicate a failure? + * @stable ICU 2.0 + */ + static + inline UBool U_FAILURE(UErrorCode code) { return (UBool)(code>U_ZERO_ERROR); } +#else + /** + * Does the error code indicate success? + * @stable ICU 2.0 + */ +# define U_SUCCESS(x) ((x)<=U_ZERO_ERROR) + /** + * Does the error code indicate a failure? + * @stable ICU 2.0 + */ +# define U_FAILURE(x) ((x)>U_ZERO_ERROR) +#endif + +/** + * Return a string for a UErrorCode value. + * The string will be the same as the name of the error code constant + * in the UErrorCode enum above. + * @stable ICU 2.0 + */ +U_CAPI const char * U_EXPORT2 +u_errorName(UErrorCode code); + + +#endif /* _UTYPES */ diff --git a/thirdparty/icu4c/common/unicode/uvernum.h b/thirdparty/icu4c/common/unicode/uvernum.h new file mode 100644 index 0000000000..a4cbb9e0fe --- /dev/null +++ b/thirdparty/icu4c/common/unicode/uvernum.h @@ -0,0 +1,198 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2000-2016, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* +* file name: uvernum.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* Created by: Vladimir Weinstein +* Updated by: Steven R. Loomis +* +*/ + +/** + * \file + * \brief C API: definitions of ICU version numbers + * + * This file is included by uversion.h and other files. This file contains only + * macros and definitions. The actual version numbers are defined here. + */ + + /* + * IMPORTANT: When updating version, the following things need to be done: + * source/common/unicode/uvernum.h - this file: update major, minor, + * patchlevel, suffix, version, short version constants, namespace, + * renaming macro, and copyright + * + * The following files need to be updated as well, which can be done + * by running the UNIX makefile target 'update-windows-makefiles' in icu/source. + * + * + * source/common/common_uwp.vcxproj + * source/common/common.vcxproj - update 'Output file name' on the link tab so + * that it contains the new major/minor combination + * source/i18n/i18n.vcxproj - same as for the common.vcxproj + * source/i18n/i18n_uwp.vcxproj - same as for the common_uwp.vcxproj + * source/layoutex/layoutex.vcproj - same + * source/stubdata/stubdata.vcproj - same as for the common.vcxproj + * source/io/io.vcproj - same as for the common.vcxproj + * source/data/makedata.mak - change U_ICUDATA_NAME so that it contains + * the new major/minor combination and the Unicode version. + */ + +#ifndef UVERNUM_H +#define UVERNUM_H + +/** The standard copyright notice that gets compiled into each library. + * This value will change in the subsequent releases of ICU + * @stable ICU 2.4 + */ +#define U_COPYRIGHT_STRING \ + " Copyright (C) 2016 and later: Unicode, Inc. and others. License & terms of use: http://www.unicode.org/copyright.html " + +/** The current ICU major version as an integer. + * This value will change in the subsequent releases of ICU + * @stable ICU 2.4 + */ +#define U_ICU_VERSION_MAJOR_NUM 68 + +/** The current ICU minor version as an integer. + * This value will change in the subsequent releases of ICU + * @stable ICU 2.6 + */ +#define U_ICU_VERSION_MINOR_NUM 1 + +/** The current ICU patchlevel version as an integer. + * This value will change in the subsequent releases of ICU + * @stable ICU 2.4 + */ +#define U_ICU_VERSION_PATCHLEVEL_NUM 0 + +/** The current ICU build level version as an integer. + * This value is for use by ICU clients. It defaults to 0. + * @stable ICU 4.0 + */ +#ifndef U_ICU_VERSION_BUILDLEVEL_NUM +#define U_ICU_VERSION_BUILDLEVEL_NUM 0 +#endif + +/** Glued version suffix for renamers + * This value will change in the subsequent releases of ICU + * @stable ICU 2.6 + */ +#define U_ICU_VERSION_SUFFIX _68 + +/** + * \def U_DEF2_ICU_ENTRY_POINT_RENAME + * @internal + */ +/** + * \def U_DEF_ICU_ENTRY_POINT_RENAME + * @internal + */ +/** Glued version suffix function for renamers + * This value will change in the subsequent releases of ICU. + * If a custom suffix (such as matching library suffixes) is desired, this can be modified. + * Note that if present, platform.h may contain an earlier definition of this macro. + * \def U_ICU_ENTRY_POINT_RENAME + * @stable ICU 4.2 + */ +/** + * Disable the version suffix. Use the custom suffix if exists. + * \def U_DISABLE_VERSION_SUFFIX + * @internal + */ +#ifndef U_DISABLE_VERSION_SUFFIX +#define U_DISABLE_VERSION_SUFFIX 0 +#endif + +#ifndef U_ICU_ENTRY_POINT_RENAME +#ifdef U_HAVE_LIB_SUFFIX +# if !U_DISABLE_VERSION_SUFFIX +# define U_DEF_ICU_ENTRY_POINT_RENAME(x,y,z) x ## y ## z +# define U_DEF2_ICU_ENTRY_POINT_RENAME(x,y,z) U_DEF_ICU_ENTRY_POINT_RENAME(x,y,z) +# define U_ICU_ENTRY_POINT_RENAME(x) U_DEF2_ICU_ENTRY_POINT_RENAME(x,U_ICU_VERSION_SUFFIX,U_LIB_SUFFIX_C_NAME) +# else +# define U_DEF_ICU_ENTRY_POINT_RENAME(x,y) x ## y +# define U_DEF2_ICU_ENTRY_POINT_RENAME(x,y) U_DEF_ICU_ENTRY_POINT_RENAME(x,y) +# define U_ICU_ENTRY_POINT_RENAME(x) U_DEF2_ICU_ENTRY_POINT_RENAME(x,U_LIB_SUFFIX_C_NAME) +# endif +#else +# if !U_DISABLE_VERSION_SUFFIX +# define U_DEF_ICU_ENTRY_POINT_RENAME(x,y) x ## y +# define U_DEF2_ICU_ENTRY_POINT_RENAME(x,y) U_DEF_ICU_ENTRY_POINT_RENAME(x,y) +# define U_ICU_ENTRY_POINT_RENAME(x) U_DEF2_ICU_ENTRY_POINT_RENAME(x,U_ICU_VERSION_SUFFIX) +# else +# define U_ICU_ENTRY_POINT_RENAME(x) x +# endif +#endif +#endif + +/** The current ICU library version as a dotted-decimal string. The patchlevel + * only appears in this string if it non-zero. + * This value will change in the subsequent releases of ICU + * @stable ICU 2.4 + */ +#define U_ICU_VERSION "68.1" + +/** + * The current ICU library major version number as a string, for library name suffixes. + * This value will change in subsequent releases of ICU. + * + * Until ICU 4.8, this was the combination of the single-digit major and minor ICU version numbers + * into one string without dots ("48"). + * Since ICU 49, it is the double-digit major ICU version number. + * See https://unicode-org.github.io/icu/userguide/design#version-numbers-in-icu + * + * @stable ICU 2.6 + */ +#define U_ICU_VERSION_SHORT "68" + +#ifndef U_HIDE_INTERNAL_API +/** Data version in ICU4C. + * @internal ICU 4.4 Internal Use Only + **/ +#define U_ICU_DATA_VERSION "68.1" +#endif /* U_HIDE_INTERNAL_API */ + +/*=========================================================================== + * ICU collation framework version information + * Version info that can be obtained from a collator is affected by these + * numbers in a secret and magic way. Please use collator version as whole + *=========================================================================== + */ + +/** + * Collation runtime version (sort key generator, strcoll). + * If the version is different, sort keys for the same string could be different. + * This value may change in subsequent releases of ICU. + * @stable ICU 2.4 + */ +#define UCOL_RUNTIME_VERSION 9 + +/** + * Collation builder code version. + * When this is different, the same tailoring might result + * in assigning different collation elements to code points. + * This value may change in subsequent releases of ICU. + * @stable ICU 2.4 + */ +#define UCOL_BUILDER_VERSION 9 + +#ifndef U_HIDE_DEPRECATED_API +/** + * Constant 1. + * This was intended to be the version of collation tailorings, + * but instead the tailoring data carries a version number. + * @deprecated ICU 54 + */ +#define UCOL_TAILORINGS_VERSION 1 +#endif /* U_HIDE_DEPRECATED_API */ + +#endif diff --git a/thirdparty/icu4c/common/unicode/uversion.h b/thirdparty/icu4c/common/unicode/uversion.h new file mode 100644 index 0000000000..113568df8c --- /dev/null +++ b/thirdparty/icu4c/common/unicode/uversion.h @@ -0,0 +1,187 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2000-2011, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* +* file name: uversion.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* Created by: Vladimir Weinstein +* +* Gets included by utypes.h and Windows .rc files +*/ + +/** + * \file + * \brief C API: API for accessing ICU version numbers. + */ +/*===========================================================================*/ +/* Main ICU version information */ +/*===========================================================================*/ + +#ifndef UVERSION_H +#define UVERSION_H + +#include "unicode/umachine.h" + +/* Actual version info lives in uvernum.h */ +#include "unicode/uvernum.h" + +/** Maximum length of the copyright string. + * @stable ICU 2.4 + */ +#define U_COPYRIGHT_STRING_LENGTH 128 + +/** An ICU version consists of up to 4 numbers from 0..255. + * @stable ICU 2.4 + */ +#define U_MAX_VERSION_LENGTH 4 + +/** In a string, ICU version fields are delimited by dots. + * @stable ICU 2.4 + */ +#define U_VERSION_DELIMITER '.' + +/** The maximum length of an ICU version string. + * @stable ICU 2.4 + */ +#define U_MAX_VERSION_STRING_LENGTH 20 + +/** The binary form of a version on ICU APIs is an array of 4 uint8_t. + * To compare two versions, use memcmp(v1,v2,sizeof(UVersionInfo)). + * @stable ICU 2.4 + */ +typedef uint8_t UVersionInfo[U_MAX_VERSION_LENGTH]; + +/*===========================================================================*/ +/* C++ namespace if supported. Versioned unless versioning is disabled. */ +/*===========================================================================*/ + +/* Define C++ namespace symbols. */ +#ifdef __cplusplus + +/** + * \def U_NAMESPACE_BEGIN + * This is used to begin a declaration of a public ICU C++ API within + * versioned-ICU-namespace block. + * + * @stable ICU 2.4 + */ + +/** + * \def U_NAMESPACE_END + * This is used to end a declaration of a public ICU C++ API. + * It ends the versioned-ICU-namespace block begun by U_NAMESPACE_BEGIN. + * + * @stable ICU 2.4 + */ + +/** + * \def U_NAMESPACE_USE + * This is used to specify that the rest of the code uses the + * public ICU C++ API namespace. + * @stable ICU 2.4 + */ + +/** + * \def U_NAMESPACE_QUALIFIER + * This is used to qualify that a function or class is part of + * the public ICU C++ API namespace. + * + * This macro is unnecessary since ICU 49 requires namespace support. + * You can just use "icu::" instead. + * @stable ICU 2.4 + */ + +# if U_DISABLE_RENAMING +# define U_ICU_NAMESPACE icu + namespace U_ICU_NAMESPACE { } +# else +# define U_ICU_NAMESPACE U_ICU_ENTRY_POINT_RENAME(icu) + namespace U_ICU_NAMESPACE { } + namespace icu = U_ICU_NAMESPACE; +# endif + +# define U_NAMESPACE_BEGIN namespace U_ICU_NAMESPACE { +# define U_NAMESPACE_END } +# define U_NAMESPACE_USE using namespace U_ICU_NAMESPACE; +# define U_NAMESPACE_QUALIFIER U_ICU_NAMESPACE:: + +# ifndef U_USING_ICU_NAMESPACE +# if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || \ + defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) || \ + defined(U_LAYOUTEX_IMPLEMENTATION) || defined(U_TOOLUTIL_IMPLEMENTATION) +# define U_USING_ICU_NAMESPACE 0 +# else +# define U_USING_ICU_NAMESPACE 0 +# endif +# endif +# if U_USING_ICU_NAMESPACE + U_NAMESPACE_USE +# endif +#endif /* __cplusplus */ + +/*===========================================================================*/ +/* General version helper functions. Definitions in putil.c */ +/*===========================================================================*/ + +/** + * Parse a string with dotted-decimal version information and + * fill in a UVersionInfo structure with the result. + * Definition of this function lives in putil.c + * + * @param versionArray The destination structure for the version information. + * @param versionString A string with dotted-decimal version information, + * with up to four non-negative number fields with + * values of up to 255 each. + * @stable ICU 2.4 + */ +U_CAPI void U_EXPORT2 +u_versionFromString(UVersionInfo versionArray, const char *versionString); + +/** + * Parse a Unicode string with dotted-decimal version information and + * fill in a UVersionInfo structure with the result. + * Definition of this function lives in putil.c + * + * @param versionArray The destination structure for the version information. + * @param versionString A Unicode string with dotted-decimal version + * information, with up to four non-negative number + * fields with values of up to 255 each. + * @stable ICU 4.2 + */ +U_CAPI void U_EXPORT2 +u_versionFromUString(UVersionInfo versionArray, const UChar *versionString); + + +/** + * Write a string with dotted-decimal version information according + * to the input UVersionInfo. + * Definition of this function lives in putil.c + * + * @param versionArray The version information to be written as a string. + * @param versionString A string buffer that will be filled in with + * a string corresponding to the numeric version + * information in versionArray. + * The buffer size must be at least U_MAX_VERSION_STRING_LENGTH. + * @stable ICU 2.4 + */ +U_CAPI void U_EXPORT2 +u_versionToString(const UVersionInfo versionArray, char *versionString); + +/** + * Gets the ICU release version. The version array stores the version information + * for ICU. For example, release "1.3.31.2" is then represented as 0x01031F02. + * Definition of this function lives in putil.c + * + * @param versionArray the version # information, the result will be filled in + * @stable ICU 2.0 + */ +U_CAPI void U_EXPORT2 +u_getVersion(UVersionInfo versionArray); +#endif diff --git a/thirdparty/icu4c/common/unifiedcache.cpp b/thirdparty/icu4c/common/unifiedcache.cpp new file mode 100644 index 0000000000..493ab79f6d --- /dev/null +++ b/thirdparty/icu4c/common/unifiedcache.cpp @@ -0,0 +1,522 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* Copyright (C) 2015, International Business Machines Corporation and +* others. All Rights Reserved. +****************************************************************************** +* +* File unifiedcache.cpp +****************************************************************************** +*/ + +#include "unifiedcache.h" + +#include <algorithm> // For std::max() +#include <mutex> + +#include "uassert.h" +#include "uhash.h" +#include "ucln_cmn.h" + +static icu::UnifiedCache *gCache = NULL; +static std::mutex *gCacheMutex = nullptr; +static std::condition_variable *gInProgressValueAddedCond; +static icu::UInitOnce gCacheInitOnce = U_INITONCE_INITIALIZER; + +static const int32_t MAX_EVICT_ITERATIONS = 10; +static const int32_t DEFAULT_MAX_UNUSED = 1000; +static const int32_t DEFAULT_PERCENTAGE_OF_IN_USE = 100; + + +U_CDECL_BEGIN +static UBool U_CALLCONV unifiedcache_cleanup() { + gCacheInitOnce.reset(); + delete gCache; + gCache = nullptr; + gCacheMutex->~mutex(); + gCacheMutex = nullptr; + gInProgressValueAddedCond->~condition_variable(); + gInProgressValueAddedCond = nullptr; + return TRUE; +} +U_CDECL_END + + +U_NAMESPACE_BEGIN + +U_CAPI int32_t U_EXPORT2 +ucache_hashKeys(const UHashTok key) { + const CacheKeyBase *ckey = (const CacheKeyBase *) key.pointer; + return ckey->hashCode(); +} + +U_CAPI UBool U_EXPORT2 +ucache_compareKeys(const UHashTok key1, const UHashTok key2) { + const CacheKeyBase *p1 = (const CacheKeyBase *) key1.pointer; + const CacheKeyBase *p2 = (const CacheKeyBase *) key2.pointer; + return *p1 == *p2; +} + +U_CAPI void U_EXPORT2 +ucache_deleteKey(void *obj) { + CacheKeyBase *p = (CacheKeyBase *) obj; + delete p; +} + +CacheKeyBase::~CacheKeyBase() { +} + +static void U_CALLCONV cacheInit(UErrorCode &status) { + U_ASSERT(gCache == NULL); + ucln_common_registerCleanup( + UCLN_COMMON_UNIFIED_CACHE, unifiedcache_cleanup); + + gCacheMutex = STATIC_NEW(std::mutex); + gInProgressValueAddedCond = STATIC_NEW(std::condition_variable); + gCache = new UnifiedCache(status); + if (gCache == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + } + if (U_FAILURE(status)) { + delete gCache; + gCache = NULL; + return; + } +} + +UnifiedCache *UnifiedCache::getInstance(UErrorCode &status) { + umtx_initOnce(gCacheInitOnce, &cacheInit, status); + if (U_FAILURE(status)) { + return NULL; + } + U_ASSERT(gCache != NULL); + return gCache; +} + +UnifiedCache::UnifiedCache(UErrorCode &status) : + fHashtable(NULL), + fEvictPos(UHASH_FIRST), + fNumValuesTotal(0), + fNumValuesInUse(0), + fMaxUnused(DEFAULT_MAX_UNUSED), + fMaxPercentageOfInUse(DEFAULT_PERCENTAGE_OF_IN_USE), + fAutoEvictedCount(0), + fNoValue(nullptr) { + if (U_FAILURE(status)) { + return; + } + fNoValue = new SharedObject(); + if (fNoValue == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + fNoValue->softRefCount = 1; // Add fake references to prevent fNoValue from being deleted + fNoValue->hardRefCount = 1; // when other references to it are removed. + fNoValue->cachePtr = this; + + fHashtable = uhash_open( + &ucache_hashKeys, + &ucache_compareKeys, + NULL, + &status); + if (U_FAILURE(status)) { + return; + } + uhash_setKeyDeleter(fHashtable, &ucache_deleteKey); +} + +void UnifiedCache::setEvictionPolicy( + int32_t count, int32_t percentageOfInUseItems, UErrorCode &status) { + if (U_FAILURE(status)) { + return; + } + if (count < 0 || percentageOfInUseItems < 0) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + std::lock_guard<std::mutex> lock(*gCacheMutex); + fMaxUnused = count; + fMaxPercentageOfInUse = percentageOfInUseItems; +} + +int32_t UnifiedCache::unusedCount() const { + std::lock_guard<std::mutex> lock(*gCacheMutex); + return uhash_count(fHashtable) - fNumValuesInUse; +} + +int64_t UnifiedCache::autoEvictedCount() const { + std::lock_guard<std::mutex> lock(*gCacheMutex); + return fAutoEvictedCount; +} + +int32_t UnifiedCache::keyCount() const { + std::lock_guard<std::mutex> lock(*gCacheMutex); + return uhash_count(fHashtable); +} + +void UnifiedCache::flush() const { + std::lock_guard<std::mutex> lock(*gCacheMutex); + + // Use a loop in case cache items that are flushed held hard references to + // other cache items making those additional cache items eligible for + // flushing. + while (_flush(FALSE)); +} + +void UnifiedCache::handleUnreferencedObject() const { + std::lock_guard<std::mutex> lock(*gCacheMutex); + --fNumValuesInUse; + _runEvictionSlice(); +} + +#ifdef UNIFIED_CACHE_DEBUG +#include <stdio.h> + +void UnifiedCache::dump() { + UErrorCode status = U_ZERO_ERROR; + const UnifiedCache *cache = getInstance(status); + if (U_FAILURE(status)) { + fprintf(stderr, "Unified Cache: Error fetching cache.\n"); + return; + } + cache->dumpContents(); +} + +void UnifiedCache::dumpContents() const { + std::lock_guard<std::mutex> lock(*gCacheMutex); + _dumpContents(); +} + +// Dumps content of cache. +// On entry, gCacheMutex must be held. +// On exit, cache contents dumped to stderr. +void UnifiedCache::_dumpContents() const { + int32_t pos = UHASH_FIRST; + const UHashElement *element = uhash_nextElement(fHashtable, &pos); + char buffer[256]; + int32_t cnt = 0; + for (; element != NULL; element = uhash_nextElement(fHashtable, &pos)) { + const SharedObject *sharedObject = + (const SharedObject *) element->value.pointer; + const CacheKeyBase *key = + (const CacheKeyBase *) element->key.pointer; + if (sharedObject->hasHardReferences()) { + ++cnt; + fprintf( + stderr, + "Unified Cache: Key '%s', error %d, value %p, total refcount %d, soft refcount %d\n", + key->writeDescription(buffer, 256), + key->creationStatus, + sharedObject == fNoValue ? NULL :sharedObject, + sharedObject->getRefCount(), + sharedObject->getSoftRefCount()); + } + } + fprintf(stderr, "Unified Cache: %d out of a total of %d still have hard references\n", cnt, uhash_count(fHashtable)); +} +#endif + +UnifiedCache::~UnifiedCache() { + // Try our best to clean up first. + flush(); + { + // Now all that should be left in the cache are entries that refer to + // each other and entries with hard references from outside the cache. + // Nothing we can do about these so proceed to wipe out the cache. + std::lock_guard<std::mutex> lock(*gCacheMutex); + _flush(TRUE); + } + uhash_close(fHashtable); + fHashtable = nullptr; + delete fNoValue; + fNoValue = nullptr; +} + +const UHashElement * +UnifiedCache::_nextElement() const { + const UHashElement *element = uhash_nextElement(fHashtable, &fEvictPos); + if (element == NULL) { + fEvictPos = UHASH_FIRST; + return uhash_nextElement(fHashtable, &fEvictPos); + } + return element; +} + +UBool UnifiedCache::_flush(UBool all) const { + UBool result = FALSE; + int32_t origSize = uhash_count(fHashtable); + for (int32_t i = 0; i < origSize; ++i) { + const UHashElement *element = _nextElement(); + if (element == nullptr) { + break; + } + if (all || _isEvictable(element)) { + const SharedObject *sharedObject = + (const SharedObject *) element->value.pointer; + U_ASSERT(sharedObject->cachePtr == this); + uhash_removeElement(fHashtable, element); + removeSoftRef(sharedObject); // Deletes the sharedObject when softRefCount goes to zero. + result = TRUE; + } + } + return result; +} + +int32_t UnifiedCache::_computeCountOfItemsToEvict() const { + int32_t totalItems = uhash_count(fHashtable); + int32_t evictableItems = totalItems - fNumValuesInUse; + + int32_t unusedLimitByPercentage = fNumValuesInUse * fMaxPercentageOfInUse / 100; + int32_t unusedLimit = std::max(unusedLimitByPercentage, fMaxUnused); + int32_t countOfItemsToEvict = std::max(0, evictableItems - unusedLimit); + return countOfItemsToEvict; +} + +void UnifiedCache::_runEvictionSlice() const { + int32_t maxItemsToEvict = _computeCountOfItemsToEvict(); + if (maxItemsToEvict <= 0) { + return; + } + for (int32_t i = 0; i < MAX_EVICT_ITERATIONS; ++i) { + const UHashElement *element = _nextElement(); + if (element == nullptr) { + break; + } + if (_isEvictable(element)) { + const SharedObject *sharedObject = + (const SharedObject *) element->value.pointer; + uhash_removeElement(fHashtable, element); + removeSoftRef(sharedObject); // Deletes sharedObject when SoftRefCount goes to zero. + ++fAutoEvictedCount; + if (--maxItemsToEvict == 0) { + break; + } + } + } +} + +void UnifiedCache::_putNew( + const CacheKeyBase &key, + const SharedObject *value, + const UErrorCode creationStatus, + UErrorCode &status) const { + if (U_FAILURE(status)) { + return; + } + CacheKeyBase *keyToAdopt = key.clone(); + if (keyToAdopt == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + keyToAdopt->fCreationStatus = creationStatus; + if (value->softRefCount == 0) { + _registerPrimary(keyToAdopt, value); + } + void *oldValue = uhash_put(fHashtable, keyToAdopt, (void *) value, &status); + U_ASSERT(oldValue == nullptr); + (void)oldValue; + if (U_SUCCESS(status)) { + value->softRefCount++; + } +} + +void UnifiedCache::_putIfAbsentAndGet( + const CacheKeyBase &key, + const SharedObject *&value, + UErrorCode &status) const { + std::lock_guard<std::mutex> lock(*gCacheMutex); + const UHashElement *element = uhash_find(fHashtable, &key); + if (element != NULL && !_inProgress(element)) { + _fetch(element, value, status); + return; + } + if (element == NULL) { + UErrorCode putError = U_ZERO_ERROR; + // best-effort basis only. + _putNew(key, value, status, putError); + } else { + _put(element, value, status); + } + // Run an eviction slice. This will run even if we added a primary entry + // which doesn't increase the unused count, but that is still o.k + _runEvictionSlice(); +} + + +UBool UnifiedCache::_poll( + const CacheKeyBase &key, + const SharedObject *&value, + UErrorCode &status) const { + U_ASSERT(value == NULL); + U_ASSERT(status == U_ZERO_ERROR); + std::unique_lock<std::mutex> lock(*gCacheMutex); + const UHashElement *element = uhash_find(fHashtable, &key); + + // If the hash table contains an inProgress placeholder entry for this key, + // this means that another thread is currently constructing the value object. + // Loop, waiting for that construction to complete. + while (element != NULL && _inProgress(element)) { + gInProgressValueAddedCond->wait(lock); + element = uhash_find(fHashtable, &key); + } + + // If the hash table contains an entry for the key, + // fetch out the contents and return them. + if (element != NULL) { + _fetch(element, value, status); + return TRUE; + } + + // The hash table contained nothing for this key. + // Insert an inProgress place holder value. + // Our caller will create the final value and update the hash table. + _putNew(key, fNoValue, U_ZERO_ERROR, status); + return FALSE; +} + +void UnifiedCache::_get( + const CacheKeyBase &key, + const SharedObject *&value, + const void *creationContext, + UErrorCode &status) const { + U_ASSERT(value == NULL); + U_ASSERT(status == U_ZERO_ERROR); + if (_poll(key, value, status)) { + if (value == fNoValue) { + SharedObject::clearPtr(value); + } + return; + } + if (U_FAILURE(status)) { + return; + } + value = key.createObject(creationContext, status); + U_ASSERT(value == NULL || value->hasHardReferences()); + U_ASSERT(value != NULL || status != U_ZERO_ERROR); + if (value == NULL) { + SharedObject::copyPtr(fNoValue, value); + } + _putIfAbsentAndGet(key, value, status); + if (value == fNoValue) { + SharedObject::clearPtr(value); + } +} + +void UnifiedCache::_registerPrimary( + const CacheKeyBase *theKey, const SharedObject *value) const { + theKey->fIsPrimary = true; + value->cachePtr = this; + ++fNumValuesTotal; + ++fNumValuesInUse; +} + +void UnifiedCache::_put( + const UHashElement *element, + const SharedObject *value, + const UErrorCode status) const { + U_ASSERT(_inProgress(element)); + const CacheKeyBase *theKey = (const CacheKeyBase *) element->key.pointer; + const SharedObject *oldValue = (const SharedObject *) element->value.pointer; + theKey->fCreationStatus = status; + if (value->softRefCount == 0) { + _registerPrimary(theKey, value); + } + value->softRefCount++; + UHashElement *ptr = const_cast<UHashElement *>(element); + ptr->value.pointer = (void *) value; + U_ASSERT(oldValue == fNoValue); + removeSoftRef(oldValue); + + // Tell waiting threads that we replace in-progress status with + // an error. + gInProgressValueAddedCond->notify_all(); +} + +void UnifiedCache::_fetch( + const UHashElement *element, + const SharedObject *&value, + UErrorCode &status) const { + const CacheKeyBase *theKey = (const CacheKeyBase *) element->key.pointer; + status = theKey->fCreationStatus; + + // Since we have the cache lock, calling regular SharedObject add/removeRef + // could cause us to deadlock on ourselves since they may need to lock + // the cache mutex. + removeHardRef(value); + value = static_cast<const SharedObject *>(element->value.pointer); + addHardRef(value); +} + + +UBool UnifiedCache::_inProgress(const UHashElement* element) const { + UErrorCode status = U_ZERO_ERROR; + const SharedObject * value = NULL; + _fetch(element, value, status); + UBool result = _inProgress(value, status); + removeHardRef(value); + return result; +} + +UBool UnifiedCache::_inProgress( + const SharedObject* theValue, UErrorCode creationStatus) const { + return (theValue == fNoValue && creationStatus == U_ZERO_ERROR); +} + +UBool UnifiedCache::_isEvictable(const UHashElement *element) const +{ + const CacheKeyBase *theKey = (const CacheKeyBase *) element->key.pointer; + const SharedObject *theValue = + (const SharedObject *) element->value.pointer; + + // Entries that are under construction are never evictable + if (_inProgress(theValue, theKey->fCreationStatus)) { + return FALSE; + } + + // We can evict entries that are either not a primary or have just + // one reference (The one reference being from the cache itself). + return (!theKey->fIsPrimary || (theValue->softRefCount == 1 && theValue->noHardReferences())); +} + +void UnifiedCache::removeSoftRef(const SharedObject *value) const { + U_ASSERT(value->cachePtr == this); + U_ASSERT(value->softRefCount > 0); + if (--value->softRefCount == 0) { + --fNumValuesTotal; + if (value->noHardReferences()) { + delete value; + } else { + // This path only happens from flush(all). Which only happens from the + // UnifiedCache destructor. Nulling out value.cacheptr changes the behavior + // of value.removeRef(), causing the deletion to be done there. + value->cachePtr = nullptr; + } + } +} + +int32_t UnifiedCache::removeHardRef(const SharedObject *value) const { + int refCount = 0; + if (value) { + refCount = umtx_atomic_dec(&value->hardRefCount); + U_ASSERT(refCount >= 0); + if (refCount == 0) { + --fNumValuesInUse; + } + } + return refCount; +} + +int32_t UnifiedCache::addHardRef(const SharedObject *value) const { + int refCount = 0; + if (value) { + refCount = umtx_atomic_inc(&value->hardRefCount); + U_ASSERT(refCount >= 1); + if (refCount == 1) { + fNumValuesInUse++; + } + } + return refCount; +} + +U_NAMESPACE_END diff --git a/thirdparty/icu4c/common/unifiedcache.h b/thirdparty/icu4c/common/unifiedcache.h new file mode 100644 index 0000000000..a31998db20 --- /dev/null +++ b/thirdparty/icu4c/common/unifiedcache.h @@ -0,0 +1,556 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* Copyright (C) 2015, International Business Machines Corporation and +* others. All Rights Reserved. +****************************************************************************** +* +* File UNIFIEDCACHE.H - The ICU Unified cache. +****************************************************************************** +*/ + +#ifndef __UNIFIED_CACHE_H__ +#define __UNIFIED_CACHE_H__ + +#include "utypeinfo.h" // for 'typeid' to work + +#include "unicode/uobject.h" +#include "unicode/locid.h" +#include "sharedobject.h" +#include "unicode/unistr.h" +#include "cstring.h" +#include "ustr_imp.h" + +struct UHashtable; +struct UHashElement; + +U_NAMESPACE_BEGIN + +class UnifiedCache; + +/** + * A base class for all cache keys. + */ +class U_COMMON_API CacheKeyBase : public UObject { + public: + CacheKeyBase() : fCreationStatus(U_ZERO_ERROR), fIsPrimary(false) {} + + /** + * Copy constructor. Needed to support cloning. + */ + CacheKeyBase(const CacheKeyBase &other) + : UObject(other), fCreationStatus(other.fCreationStatus), fIsPrimary(false) { } + virtual ~CacheKeyBase(); + + /** + * Returns the hash code for this object. + */ + virtual int32_t hashCode() const = 0; + + /** + * Clones this object polymorphically. Caller owns returned value. + */ + virtual CacheKeyBase *clone() const = 0; + + /** + * Equality operator. + */ + virtual UBool operator == (const CacheKeyBase &other) const = 0; + + /** + * Create a new object for this key. Called by cache on cache miss. + * createObject must add a reference to the object it returns. Note + * that getting an object from the cache and returning it without calling + * removeRef on it satisfies this requirement. It can also return NULL + * and set status to an error. + * + * @param creationContext the context in which the object is being + * created. May be NULL. + * @param status Implementations can return a failure here. + * In addition, implementations may return a + * non NULL object and set a warning status. + */ + virtual const SharedObject *createObject( + const void *creationContext, UErrorCode &status) const = 0; + + /** + * Writes a description of this key to buffer and returns buffer. Written + * description is NULL terminated. + */ + virtual char *writeDescription(char *buffer, int32_t bufSize) const = 0; + + /** + * Inequality operator. + */ + UBool operator != (const CacheKeyBase &other) const { + return !(*this == other); + } + private: + mutable UErrorCode fCreationStatus; + mutable UBool fIsPrimary; + friend class UnifiedCache; +}; + + + +/** + * Templated version of CacheKeyBase. + * A key of type LocaleCacheKey<T> maps to a value of type T. + */ +template<typename T> +class CacheKey : public CacheKeyBase { + public: + virtual ~CacheKey() { } + /** + * The template parameter, T, determines the hash code returned. + */ + virtual int32_t hashCode() const { + const char *s = typeid(T).name(); + return ustr_hashCharsN(s, static_cast<int32_t>(uprv_strlen(s))); + } + + /** + * Use the value type, T, as the description. + */ + virtual char *writeDescription(char *buffer, int32_t bufLen) const { + const char *s = typeid(T).name(); + uprv_strncpy(buffer, s, bufLen); + buffer[bufLen - 1] = 0; + return buffer; + } + + /** + * Two objects are equal if they are of the same type. + */ + virtual UBool operator == (const CacheKeyBase &other) const { + return typeid(*this) == typeid(other); + } +}; + +/** + * Cache key based on locale. + * A key of type LocaleCacheKey<T> maps to a value of type T. + */ +template<typename T> +class LocaleCacheKey : public CacheKey<T> { + protected: + Locale fLoc; + public: + LocaleCacheKey(const Locale &loc) : fLoc(loc) {} + LocaleCacheKey(const LocaleCacheKey<T> &other) + : CacheKey<T>(other), fLoc(other.fLoc) { } + virtual ~LocaleCacheKey() { } + virtual int32_t hashCode() const { + return (int32_t)(37u * (uint32_t)CacheKey<T>::hashCode() + (uint32_t)fLoc.hashCode()); + } + virtual UBool operator == (const CacheKeyBase &other) const { + // reflexive + if (this == &other) { + return true; + } + if (!CacheKey<T>::operator == (other)) { + return false; + } + // We know this and other are of same class because operator== on + // CacheKey returned true. + const LocaleCacheKey<T> *fOther = + static_cast<const LocaleCacheKey<T> *>(&other); + return fLoc == fOther->fLoc; + } + virtual CacheKeyBase *clone() const { + return new LocaleCacheKey<T>(*this); + } + virtual const T *createObject( + const void *creationContext, UErrorCode &status) const; + /** + * Use the locale id as the description. + */ + virtual char *writeDescription(char *buffer, int32_t bufLen) const { + const char *s = fLoc.getName(); + uprv_strncpy(buffer, s, bufLen); + buffer[bufLen - 1] = 0; + return buffer; + } + +}; + +/** + * The unified cache. A singleton type. + * Design doc here: + * https://docs.google.com/document/d/1RwGQJs4N4tawNbf809iYDRCvXoMKqDJihxzYt1ysmd8/edit?usp=sharing + */ +class U_COMMON_API UnifiedCache : public UnifiedCacheBase { + public: + /** + * @internal + * Do not call directly. Instead use UnifiedCache::getInstance() as + * there should be only one UnifiedCache in an application. + */ + UnifiedCache(UErrorCode &status); + + /** + * Return a pointer to the global cache instance. + */ + static UnifiedCache *getInstance(UErrorCode &status); + + /** + * Fetches a value from the cache by key. Equivalent to + * get(key, NULL, ptr, status); + */ + template<typename T> + void get( + const CacheKey<T>& key, + const T *&ptr, + UErrorCode &status) const { + get(key, NULL, ptr, status); + } + + /** + * Fetches value from the cache by key. + * + * @param key the cache key. + * @param creationContext passed verbatim to createObject method of key + * @param ptr On entry, ptr must be NULL or be included if + * the reference count of the object it points + * to. On exit, ptr points to the fetched object + * from the cache or is left unchanged on + * failure. Caller must call removeRef on ptr + * if set to a non NULL value. + * @param status Any error returned here. May be set to a + * warning value even if ptr is set. + */ + template<typename T> + void get( + const CacheKey<T>& key, + const void *creationContext, + const T *&ptr, + UErrorCode &status) const { + if (U_FAILURE(status)) { + return; + } + UErrorCode creationStatus = U_ZERO_ERROR; + const SharedObject *value = NULL; + _get(key, value, creationContext, creationStatus); + const T *tvalue = (const T *) value; + if (U_SUCCESS(creationStatus)) { + SharedObject::copyPtr(tvalue, ptr); + } + SharedObject::clearPtr(tvalue); + // Take care not to overwrite a warning status passed in with + // another warning or U_ZERO_ERROR. + if (status == U_ZERO_ERROR || U_FAILURE(creationStatus)) { + status = creationStatus; + } + } + +#ifdef UNIFIED_CACHE_DEBUG + /** + * Dumps the contents of this cache to standard error. Used for testing of + * cache only. + */ + void dumpContents() const; +#endif + + /** + * Convenience method to get a value of type T from cache for a + * particular locale with creationContext == NULL. + * @param loc the locale + * @param ptr On entry, must be NULL or included in the ref count + * of the object to which it points. + * On exit, fetched value stored here or is left + * unchanged on failure. Caller must call removeRef on + * ptr if set to a non NULL value. + * @param status Any error returned here. May be set to a + * warning value even if ptr is set. + */ + template<typename T> + static void getByLocale( + const Locale &loc, const T *&ptr, UErrorCode &status) { + const UnifiedCache *cache = getInstance(status); + if (U_FAILURE(status)) { + return; + } + cache->get(LocaleCacheKey<T>(loc), ptr, status); + } + +#ifdef UNIFIED_CACHE_DEBUG + /** + * Dumps the cache contents to stderr. For testing only. + */ + static void dump(); +#endif + + /** + * Returns the number of keys in this cache. For testing only. + */ + int32_t keyCount() const; + + /** + * Removes any values from cache that are not referenced outside + * the cache. + */ + void flush() const; + + /** + * Configures at what point evcition of unused entries will begin. + * Eviction is triggered whenever the number of evictable keys exeeds + * BOTH count AND (number of in-use items) * (percentageOfInUseItems / 100). + * Once the number of unused entries drops below one of these, + * eviction ceases. Because eviction happens incrementally, + * the actual unused entry count may exceed both these numbers + * from time to time. + * + * A cache entry is defined as unused if it is not essential to guarantee + * that for a given key X, the cache returns the same reference to the + * same value as long as the client already holds a reference to that + * value. + * + * If this method is never called, the default settings are 1000 and 100%. + * + * Although this method is thread-safe, it is designed to be called at + * application startup. If it is called in the middle of execution, it + * will have no immediate effect on the cache. However over time, the + * cache will perform eviction slices in an attempt to honor the new + * settings. + * + * If a client already holds references to many different unique values + * in the cache such that the number of those unique values far exeeds + * "count" then the cache may not be able to maintain this maximum. + * However, if this happens, the cache still guarantees that the number of + * unused entries will remain only a small percentage of the total cache + * size. + * + * If the parameters passed are negative, setEvctionPolicy sets status to + * U_ILLEGAL_ARGUMENT_ERROR. + */ + void setEvictionPolicy( + int32_t count, int32_t percentageOfInUseItems, UErrorCode &status); + + + /** + * Returns how many entries have been auto evicted during the lifetime + * of this cache. This only includes auto evicted entries, not + * entries evicted because of a call to flush(). + */ + int64_t autoEvictedCount() const; + + /** + * Returns the unused entry count in this cache. For testing only, + * Regular clients will not need this. + */ + int32_t unusedCount() const; + + virtual void handleUnreferencedObject() const; + virtual ~UnifiedCache(); + + private: + UHashtable *fHashtable; + mutable int32_t fEvictPos; + mutable int32_t fNumValuesTotal; + mutable int32_t fNumValuesInUse; + int32_t fMaxUnused; + int32_t fMaxPercentageOfInUse; + mutable int64_t fAutoEvictedCount; + SharedObject *fNoValue; + + UnifiedCache(const UnifiedCache &other); + UnifiedCache &operator=(const UnifiedCache &other); + + /** + * Flushes the contents of the cache. If cache values hold references to other + * cache values then _flush should be called in a loop until it returns false. + * + * On entry, gCacheMutex must be held. + * On exit, those values with are evictable are flushed. + * + * @param all if false flush evictable items only, which are those with no external + * references, plus those that can be safely recreated.<br> + * if true, flush all elements. Any values (sharedObjects) with remaining + * hard (external) references are not deleted, but are detached from + * the cache, so that a subsequent removeRefs can delete them. + * _flush is not thread safe when all is true. + * @return true if any value in cache was flushed or false otherwise. + */ + UBool _flush(UBool all) const; + + /** + * Gets value out of cache. + * On entry. gCacheMutex must not be held. value must be NULL. status + * must be U_ZERO_ERROR. + * On exit. value and status set to what is in cache at key or on cache + * miss the key's createObject() is called and value and status are set to + * the result of that. In this latter case, best effort is made to add the + * value and status to the cache. If createObject() fails to create a value, + * fNoValue is stored in cache, and value is set to NULL. Caller must call + * removeRef on value if non NULL. + */ + void _get( + const CacheKeyBase &key, + const SharedObject *&value, + const void *creationContext, + UErrorCode &status) const; + + /** + * Attempts to fetch value and status for key from cache. + * On entry, gCacheMutex must not be held value must be NULL and status must + * be U_ZERO_ERROR. + * On exit, either returns false (In this + * case caller should try to create the object) or returns true with value + * pointing to the fetched value and status set to fetched status. When + * false is returned status may be set to failure if an in progress hash + * entry could not be made but value will remain unchanged. When true is + * returned, caller must call removeRef() on value. + */ + UBool _poll( + const CacheKeyBase &key, + const SharedObject *&value, + UErrorCode &status) const; + + /** + * Places a new value and creationStatus in the cache for the given key. + * On entry, gCacheMutex must be held. key must not exist in the cache. + * On exit, value and creation status placed under key. Soft reference added + * to value on successful add. On error sets status. + */ + void _putNew( + const CacheKeyBase &key, + const SharedObject *value, + const UErrorCode creationStatus, + UErrorCode &status) const; + + /** + * Places value and status at key if there is no value at key or if cache + * entry for key is in progress. Otherwise, it leaves the current value and + * status there. + * + * On entry. gCacheMutex must not be held. Value must be + * included in the reference count of the object to which it points. + * + * On exit, value and status are changed to what was already in the cache if + * something was there and not in progress. Otherwise, value and status are left + * unchanged in which case they are placed in the cache on a best-effort basis. + * Caller must call removeRef() on value. + */ + void _putIfAbsentAndGet( + const CacheKeyBase &key, + const SharedObject *&value, + UErrorCode &status) const; + + /** + * Returns the next element in the cache round robin style. + * Returns nullptr if the cache is empty. + * On entry, gCacheMutex must be held. + */ + const UHashElement *_nextElement() const; + + /** + * Return the number of cache items that would need to be evicted + * to bring usage into conformance with eviction policy. + * + * An item corresponds to an entry in the hash table, a hash table element. + * + * On entry, gCacheMutex must be held. + */ + int32_t _computeCountOfItemsToEvict() const; + + /** + * Run an eviction slice. + * On entry, gCacheMutex must be held. + * _runEvictionSlice runs a slice of the evict pipeline by examining the next + * 10 entries in the cache round robin style evicting them if they are eligible. + */ + void _runEvictionSlice() const; + + /** + * Register a primary cache entry. A primary key is the first key to create + * a given SharedObject value. Subsequent keys whose create function + * produce referneces to an already existing SharedObject are not primary - + * they can be evicted and subsequently recreated. + * + * On entry, gCacheMutex must be held. + * On exit, items in use count incremented, entry is marked as a primary + * entry, and value registered with cache so that subsequent calls to + * addRef() and removeRef() on it correctly interact with the cache. + */ + void _registerPrimary(const CacheKeyBase *theKey, const SharedObject *value) const; + + /** + * Store a value and creation error status in given hash entry. + * On entry, gCacheMutex must be held. Hash entry element must be in progress. + * value must be non NULL. + * On Exit, soft reference added to value. value and status stored in hash + * entry. Soft reference removed from previous stored value. Waiting + * threads notified. + */ + void _put( + const UHashElement *element, + const SharedObject *value, + const UErrorCode status) const; + /** + * Remove a soft reference, and delete the SharedObject if no references remain. + * To be used from within the UnifiedCache implementation only. + * gCacheMutex must be held by caller. + * @param value the SharedObject to be acted on. + */ + void removeSoftRef(const SharedObject *value) const; + + /** + * Increment the hard reference count of the given SharedObject. + * gCacheMutex must be held by the caller. + * Update numValuesEvictable on transitions between zero and one reference. + * + * @param value The SharedObject to be referenced. + * @return the hard reference count after the addition. + */ + int32_t addHardRef(const SharedObject *value) const; + + /** + * Decrement the hard reference count of the given SharedObject. + * gCacheMutex must be held by the caller. + * Update numValuesEvictable on transitions between one and zero reference. + * + * @param value The SharedObject to be referenced. + * @return the hard reference count after the removal. + */ + int32_t removeHardRef(const SharedObject *value) const; + + +#ifdef UNIFIED_CACHE_DEBUG + void _dumpContents() const; +#endif + + /** + * Fetch value and error code from a particular hash entry. + * On entry, gCacheMutex must be held. value must be either NULL or must be + * included in the ref count of the object to which it points. + * On exit, value and status set to what is in the hash entry. Caller must + * eventually call removeRef on value. + * If hash entry is in progress, value will be set to gNoValue and status will + * be set to U_ZERO_ERROR. + */ + void _fetch(const UHashElement *element, const SharedObject *&value, + UErrorCode &status) const; + + /** + * Determine if given hash entry is in progress. + * On entry, gCacheMutex must be held. + */ + UBool _inProgress(const UHashElement *element) const; + + /** + * Determine if given hash entry is in progress. + * On entry, gCacheMutex must be held. + */ + UBool _inProgress(const SharedObject *theValue, UErrorCode creationStatus) const; + + /** + * Determine if given hash entry is eligible for eviction. + * On entry, gCacheMutex must be held. + */ + UBool _isEvictable(const UHashElement *element) const; +}; + +U_NAMESPACE_END + +#endif diff --git a/thirdparty/icu4c/common/unifilt.cpp b/thirdparty/icu4c/common/unifilt.cpp new file mode 100644 index 0000000000..4ab0d9b5f9 --- /dev/null +++ b/thirdparty/icu4c/common/unifilt.cpp @@ -0,0 +1,71 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (c) 2001-2012, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* Date Name Description +* 07/18/01 aliu Creation. +********************************************************************** +*/ + +#include "unicode/unifilt.h" +#include "unicode/rep.h" +#include "unicode/utf16.h" + +U_NAMESPACE_BEGIN +UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(UnicodeFilter) + + +/* Define this here due to the lack of another file. + It can't be defined in the header */ +UnicodeMatcher::~UnicodeMatcher() {} + +UnicodeFilter::~UnicodeFilter() {} + +/** + * UnicodeFunctor API. + * Note that UnicodeMatcher is a base class of UnicodeFilter. + */ +UnicodeMatcher* UnicodeFilter::toMatcher() const { + return const_cast<UnicodeFilter *>(this); +} + +void UnicodeFilter::setData(const TransliterationRuleData*) {} + +/** + * Default implementation of UnicodeMatcher::matches() for Unicode + * filters. Matches a single code point at offset (either one or + * two 16-bit code units). + */ +UMatchDegree UnicodeFilter::matches(const Replaceable& text, + int32_t& offset, + int32_t limit, + UBool incremental) { + UChar32 c; + if (offset < limit && + contains(c = text.char32At(offset))) { + offset += U16_LENGTH(c); + return U_MATCH; + } + if (offset > limit && + contains(c = text.char32At(offset))) { + // Backup offset by 1, unless the preceding character is a + // surrogate pair -- then backup by 2 (keep offset pointing at + // the lead surrogate). + --offset; + if (offset >= 0) { + offset -= U16_LENGTH(text.char32At(offset)) - 1; + } + return U_MATCH; + } + if (incremental && offset == limit) { + return U_PARTIAL_MATCH; + } + return U_MISMATCH; +} + +U_NAMESPACE_END + +//eof diff --git a/thirdparty/icu4c/common/unifunct.cpp b/thirdparty/icu4c/common/unifunct.cpp new file mode 100644 index 0000000000..f3995b298d --- /dev/null +++ b/thirdparty/icu4c/common/unifunct.cpp @@ -0,0 +1,28 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (c) 2002-2004, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +*/ + +#include "unicode/unifunct.h" + +U_NAMESPACE_BEGIN + +UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(UnicodeFunctor) + +UnicodeFunctor::~UnicodeFunctor() {} + +UnicodeMatcher* UnicodeFunctor::toMatcher() const { + return 0; +} + +UnicodeReplacer* UnicodeFunctor::toReplacer() const { + return 0; +} + +U_NAMESPACE_END + +//eof diff --git a/thirdparty/icu4c/common/uniquecharstr.h b/thirdparty/icu4c/common/uniquecharstr.h new file mode 100644 index 0000000000..10cc924f7f --- /dev/null +++ b/thirdparty/icu4c/common/uniquecharstr.h @@ -0,0 +1,98 @@ +// © 2020 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// uniquecharstr.h +// created: 2020sep01 Frank Yung-Fong Tang + +#ifndef __UNIQUECHARSTR_H__ +#define __UNIQUECHARSTR_H__ + +#include "charstr.h" +#include "uassert.h" +#include "uhash.h" + +U_NAMESPACE_BEGIN + +/** + * Stores NUL-terminated strings with duplicate elimination. + * Checks for unique UTF-16 string pointers and converts to invariant characters. + * + * Intended to be stack-allocated. Add strings, get a unique number for each, + * freeze the object, get a char * pointer for each string, + * call orphanCharStrings() to capture the string storage, and let this object go out of scope. + */ +class UniqueCharStrings { +public: + UniqueCharStrings(UErrorCode &errorCode) : strings(nullptr) { + // Note: We hash on string contents but store stable char16_t * pointers. + // If the strings are stored in resource bundles which should be built with + // duplicate elimination, then we should be able to hash on just the pointer values. + uhash_init(&map, uhash_hashUChars, uhash_compareUChars, uhash_compareLong, &errorCode); + if (U_FAILURE(errorCode)) { return; } + strings = new CharString(); + if (strings == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + } + ~UniqueCharStrings() { + uhash_close(&map); + delete strings; + } + + /** Returns/orphans the CharString that contains all strings. */ + CharString *orphanCharStrings() { + CharString *result = strings; + strings = nullptr; + return result; + } + + /** + * Adds a string and returns a unique number for it. + * The string's buffer contents must not change, nor move around in memory, + * while this UniqueCharStrings is in use. + * The string contents must be NUL-terminated exactly at s.length(). + * + * Best used with read-only-alias UnicodeString objects that point to + * stable storage, such as strings returned by resource bundle functions. + */ + int32_t add(const UnicodeString &s, UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { return 0; } + if (isFrozen) { + errorCode = U_NO_WRITE_PERMISSION; + return 0; + } + // The string points into the resource bundle. + const char16_t *p = s.getBuffer(); + int32_t oldIndex = uhash_geti(&map, p); + if (oldIndex != 0) { // found duplicate + return oldIndex; + } + // Explicit NUL terminator for the previous string. + // The strings object is also terminated with one implicit NUL. + strings->append(0, errorCode); + int32_t newIndex = strings->length(); + strings->appendInvariantChars(s, errorCode); + uhash_puti(&map, const_cast<char16_t *>(p), newIndex, &errorCode); + return newIndex; + } + + void freeze() { isFrozen = true; } + + /** + * Returns a string pointer for its unique number, if this object is frozen. + * Otherwise nullptr. + */ + const char *get(int32_t i) const { + U_ASSERT(isFrozen); + return isFrozen && i > 0 ? strings->data() + i : nullptr; + } + +private: + UHashtable map; + CharString *strings; + bool isFrozen = false; +}; + +U_NAMESPACE_END + +#endif // __UNIQUECHARSTR_H__ diff --git a/thirdparty/icu4c/common/uniset.cpp b/thirdparty/icu4c/common/uniset.cpp new file mode 100644 index 0000000000..b73d612f24 --- /dev/null +++ b/thirdparty/icu4c/common/uniset.cpp @@ -0,0 +1,2356 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 1999-2015, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* Date Name Description +* 10/20/99 alan Creation. +********************************************************************** +*/ + +#include "unicode/utypes.h" +#include "unicode/parsepos.h" +#include "unicode/symtable.h" +#include "unicode/uniset.h" +#include "unicode/ustring.h" +#include "unicode/utf8.h" +#include "unicode/utf16.h" +#include "ruleiter.h" +#include "cmemory.h" +#include "cstring.h" +#include "patternprops.h" +#include "uelement.h" +#include "util.h" +#include "uvector.h" +#include "charstr.h" +#include "ustrfmt.h" +#include "uassert.h" +#include "bmpset.h" +#include "unisetspan.h" + +// Define UChar constants using hex for EBCDIC compatibility +// Used #define to reduce private static exports and memory access time. +#define SET_OPEN ((UChar)0x005B) /*[*/ +#define SET_CLOSE ((UChar)0x005D) /*]*/ +#define HYPHEN ((UChar)0x002D) /*-*/ +#define COMPLEMENT ((UChar)0x005E) /*^*/ +#define COLON ((UChar)0x003A) /*:*/ +#define BACKSLASH ((UChar)0x005C) /*\*/ +#define INTERSECTION ((UChar)0x0026) /*&*/ +#define UPPER_U ((UChar)0x0055) /*U*/ +#define LOWER_U ((UChar)0x0075) /*u*/ +#define OPEN_BRACE ((UChar)123) /*{*/ +#define CLOSE_BRACE ((UChar)125) /*}*/ +#define UPPER_P ((UChar)0x0050) /*P*/ +#define LOWER_P ((UChar)0x0070) /*p*/ +#define UPPER_N ((UChar)78) /*N*/ +#define EQUALS ((UChar)0x003D) /*=*/ + +// HIGH_VALUE > all valid values. 110000 for codepoints +#define UNICODESET_HIGH 0x0110000 + +// LOW <= all valid values. ZERO for codepoints +#define UNICODESET_LOW 0x000000 + +/** Max list [0, 1, 2, ..., max code point, HIGH] */ +constexpr int32_t MAX_LENGTH = UNICODESET_HIGH + 1; + +U_NAMESPACE_BEGIN + +SymbolTable::~SymbolTable() {} + +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeSet) + +/** + * Modify the given UChar32 variable so that it is in range, by + * pinning values < UNICODESET_LOW to UNICODESET_LOW, and + * pinning values > UNICODESET_HIGH-1 to UNICODESET_HIGH-1. + * It modifies its argument in-place and also returns it. + */ +static inline UChar32 pinCodePoint(UChar32& c) { + if (c < UNICODESET_LOW) { + c = UNICODESET_LOW; + } else if (c > (UNICODESET_HIGH-1)) { + c = (UNICODESET_HIGH-1); + } + return c; +} + +//---------------------------------------------------------------- +// Debugging +//---------------------------------------------------------------- + +// DO NOT DELETE THIS CODE. This code is used to debug memory leaks. +// To enable the debugging, define the symbol DEBUG_MEM in the line +// below. This will result in text being sent to stdout that looks +// like this: +// DEBUG UnicodeSet: ct 0x00A39B20; 397 [\u0A81-\u0A83\u0A85- +// DEBUG UnicodeSet: dt 0x00A39B20; 396 [\u0A81-\u0A83\u0A85- +// Each line lists a construction (ct) or destruction (dt) event, the +// object address, the number of outstanding objects after the event, +// and the pattern of the object in question. + +// #define DEBUG_MEM + +#ifdef DEBUG_MEM +#include <stdio.h> +static int32_t _dbgCount = 0; + +static inline void _dbgct(UnicodeSet* set) { + UnicodeString str; + set->toPattern(str, TRUE); + char buf[40]; + str.extract(0, 39, buf, ""); + printf("DEBUG UnicodeSet: ct 0x%08X; %d %s\n", set, ++_dbgCount, buf); +} + +static inline void _dbgdt(UnicodeSet* set) { + UnicodeString str; + set->toPattern(str, TRUE); + char buf[40]; + str.extract(0, 39, buf, ""); + printf("DEBUG UnicodeSet: dt 0x%08X; %d %s\n", set, --_dbgCount, buf); +} + +#else + +#define _dbgct(set) +#define _dbgdt(set) + +#endif + +//---------------------------------------------------------------- +// UnicodeString in UVector support +//---------------------------------------------------------------- + +static void U_CALLCONV cloneUnicodeString(UElement *dst, UElement *src) { + dst->pointer = new UnicodeString(*(UnicodeString*)src->pointer); +} + +static int8_t U_CALLCONV compareUnicodeString(UElement t1, UElement t2) { + const UnicodeString &a = *(const UnicodeString*)t1.pointer; + const UnicodeString &b = *(const UnicodeString*)t2.pointer; + return a.compare(b); +} + +UBool UnicodeSet::hasStrings() const { + return strings != nullptr && !strings->isEmpty(); +} + +int32_t UnicodeSet::stringsSize() const { + return strings == nullptr ? 0 : strings->size(); +} + +UBool UnicodeSet::stringsContains(const UnicodeString &s) const { + return strings != nullptr && strings->contains((void*) &s); +} + +//---------------------------------------------------------------- +// Constructors &c +//---------------------------------------------------------------- + +/** + * Constructs an empty set. + */ +UnicodeSet::UnicodeSet() { + list[0] = UNICODESET_HIGH; + _dbgct(this); +} + +/** + * Constructs a set containing the given range. If <code>end > + * start</code> then an empty set is created. + * + * @param start first character, inclusive, of range + * @param end last character, inclusive, of range + */ +UnicodeSet::UnicodeSet(UChar32 start, UChar32 end) { + list[0] = UNICODESET_HIGH; + add(start, end); + _dbgct(this); +} + +/** + * Constructs a set that is identical to the given UnicodeSet. + */ +UnicodeSet::UnicodeSet(const UnicodeSet& o) : UnicodeFilter(o) { + *this = o; + _dbgct(this); +} + +// Copy-construct as thawed. +UnicodeSet::UnicodeSet(const UnicodeSet& o, UBool /* asThawed */) : UnicodeFilter(o) { + if (ensureCapacity(o.len)) { + // *this = o except for bmpSet and stringSpan + len = o.len; + uprv_memcpy(list, o.list, (size_t)len*sizeof(UChar32)); + if (o.hasStrings()) { + UErrorCode status = U_ZERO_ERROR; + if (!allocateStrings(status) || + (strings->assign(*o.strings, cloneUnicodeString, status), U_FAILURE(status))) { + setToBogus(); + return; + } + } + if (o.pat) { + setPattern(o.pat, o.patLen); + } + _dbgct(this); + } +} + +/** + * Destructs the set. + */ +UnicodeSet::~UnicodeSet() { + _dbgdt(this); // first! + if (list != stackList) { + uprv_free(list); + } + delete bmpSet; + if (buffer != stackList) { + uprv_free(buffer); + } + delete strings; + delete stringSpan; + releasePattern(); +} + +/** + * Assigns this object to be a copy of another. + */ +UnicodeSet& UnicodeSet::operator=(const UnicodeSet& o) { + return copyFrom(o, FALSE); +} + +UnicodeSet& UnicodeSet::copyFrom(const UnicodeSet& o, UBool asThawed) { + if (this == &o) { + return *this; + } + if (isFrozen()) { + return *this; + } + if (o.isBogus()) { + setToBogus(); + return *this; + } + if (!ensureCapacity(o.len)) { + // ensureCapacity will mark the UnicodeSet as Bogus if OOM failure happens. + return *this; + } + len = o.len; + uprv_memcpy(list, o.list, (size_t)len*sizeof(UChar32)); + if (o.bmpSet != nullptr && !asThawed) { + bmpSet = new BMPSet(*o.bmpSet, list, len); + if (bmpSet == NULL) { // Check for memory allocation error. + setToBogus(); + return *this; + } + } + if (o.hasStrings()) { + UErrorCode status = U_ZERO_ERROR; + if ((strings == nullptr && !allocateStrings(status)) || + (strings->assign(*o.strings, cloneUnicodeString, status), U_FAILURE(status))) { + setToBogus(); + return *this; + } + } else if (hasStrings()) { + strings->removeAllElements(); + } + if (o.stringSpan != nullptr && !asThawed) { + stringSpan = new UnicodeSetStringSpan(*o.stringSpan, *strings); + if (stringSpan == NULL) { // Check for memory allocation error. + setToBogus(); + return *this; + } + } + releasePattern(); + if (o.pat) { + setPattern(o.pat, o.patLen); + } + return *this; +} + +/** + * Returns a copy of this object. All UnicodeMatcher objects have + * to support cloning in order to allow classes using + * UnicodeMatchers, such as Transliterator, to implement cloning. + */ +UnicodeSet* UnicodeSet::clone() const { + return new UnicodeSet(*this); +} + +UnicodeSet *UnicodeSet::cloneAsThawed() const { + return new UnicodeSet(*this, TRUE); +} + +/** + * Compares the specified object with this set for equality. Returns + * <tt>true</tt> if the two sets + * have the same size, and every member of the specified set is + * contained in this set (or equivalently, every member of this set is + * contained in the specified set). + * + * @param o set to be compared for equality with this set. + * @return <tt>true</tt> if the specified set is equal to this set. + */ +UBool UnicodeSet::operator==(const UnicodeSet& o) const { + if (len != o.len) return FALSE; + for (int32_t i = 0; i < len; ++i) { + if (list[i] != o.list[i]) return FALSE; + } + if (hasStrings() != o.hasStrings()) { return FALSE; } + if (hasStrings() && *strings != *o.strings) return FALSE; + return TRUE; +} + +/** + * Returns the hash code value for this set. + * + * @return the hash code value for this set. + * @see Object#hashCode() + */ +int32_t UnicodeSet::hashCode(void) const { + uint32_t result = static_cast<uint32_t>(len); + for (int32_t i = 0; i < len; ++i) { + result *= 1000003u; + result += list[i]; + } + return static_cast<int32_t>(result); +} + +//---------------------------------------------------------------- +// Public API +//---------------------------------------------------------------- + +/** + * Returns the number of elements in this set (its cardinality), + * Note than the elements of a set may include both individual + * codepoints and strings. + * + * @return the number of elements in this set (its cardinality). + */ +int32_t UnicodeSet::size(void) const { + int32_t n = 0; + int32_t count = getRangeCount(); + for (int32_t i = 0; i < count; ++i) { + n += getRangeEnd(i) - getRangeStart(i) + 1; + } + return n + stringsSize(); +} + +/** + * Returns <tt>true</tt> if this set contains no elements. + * + * @return <tt>true</tt> if this set contains no elements. + */ +UBool UnicodeSet::isEmpty(void) const { + return len == 1 && !hasStrings(); +} + +/** + * Returns true if this set contains the given character. + * @param c character to be checked for containment + * @return true if the test condition is met + */ +UBool UnicodeSet::contains(UChar32 c) const { + // Set i to the index of the start item greater than ch + // We know we will terminate without length test! + // LATER: for large sets, add binary search + //int32_t i = -1; + //for (;;) { + // if (c < list[++i]) break; + //} + if (bmpSet != NULL) { + return bmpSet->contains(c); + } + if (stringSpan != NULL) { + return stringSpan->contains(c); + } + if (c >= UNICODESET_HIGH) { // Don't need to check LOW bound + return FALSE; + } + int32_t i = findCodePoint(c); + return (UBool)(i & 1); // return true if odd +} + +/** + * Returns the smallest value i such that c < list[i]. Caller + * must ensure that c is a legal value or this method will enter + * an infinite loop. This method performs a binary search. + * @param c a character in the range MIN_VALUE..MAX_VALUE + * inclusive + * @return the smallest integer i in the range 0..len-1, + * inclusive, such that c < list[i] + */ +int32_t UnicodeSet::findCodePoint(UChar32 c) const { + /* Examples: + findCodePoint(c) + set list[] c=0 1 3 4 7 8 + === ============== =========== + [] [110000] 0 0 0 0 0 0 + [\u0000-\u0003] [0, 4, 110000] 1 1 1 2 2 2 + [\u0004-\u0007] [4, 8, 110000] 0 0 0 1 1 2 + [:Any:] [0, 110000] 1 1 1 1 1 1 + */ + + // Return the smallest i such that c < list[i]. Assume + // list[len - 1] == HIGH and that c is legal (0..HIGH-1). + if (c < list[0]) + return 0; + // High runner test. c is often after the last range, so an + // initial check for this condition pays off. + int32_t lo = 0; + int32_t hi = len - 1; + if (lo >= hi || c >= list[hi-1]) + return hi; + // invariant: c >= list[lo] + // invariant: c < list[hi] + for (;;) { + int32_t i = (lo + hi) >> 1; + if (i == lo) { + break; // Found! + } else if (c < list[i]) { + hi = i; + } else { + lo = i; + } + } + return hi; +} + +/** + * Returns true if this set contains every character + * of the given range. + * @param start first character, inclusive, of the range + * @param end last character, inclusive, of the range + * @return true if the test condition is met + */ +UBool UnicodeSet::contains(UChar32 start, UChar32 end) const { + //int32_t i = -1; + //for (;;) { + // if (start < list[++i]) break; + //} + int32_t i = findCodePoint(start); + return ((i & 1) != 0 && end < list[i]); +} + +/** + * Returns <tt>true</tt> if this set contains the given + * multicharacter string. + * @param s string to be checked for containment + * @return <tt>true</tt> if this set contains the specified string + */ +UBool UnicodeSet::contains(const UnicodeString& s) const { + if (s.length() == 0) return FALSE; + int32_t cp = getSingleCP(s); + if (cp < 0) { + return stringsContains(s); + } else { + return contains((UChar32) cp); + } +} + +/** + * Returns true if this set contains all the characters and strings + * of the given set. + * @param c set to be checked for containment + * @return true if the test condition is met + */ +UBool UnicodeSet::containsAll(const UnicodeSet& c) const { + // The specified set is a subset if all of its pairs are contained in + // this set. It's possible to code this more efficiently in terms of + // direct manipulation of the inversion lists if the need arises. + int32_t n = c.getRangeCount(); + for (int i=0; i<n; ++i) { + if (!contains(c.getRangeStart(i), c.getRangeEnd(i))) { + return FALSE; + } + } + return !c.hasStrings() || (strings != nullptr && strings->containsAll(*c.strings)); +} + +/** + * Returns true if this set contains all the characters + * of the given string. + * @param s string containing characters to be checked for containment + * @return true if the test condition is met + */ +UBool UnicodeSet::containsAll(const UnicodeString& s) const { + return (UBool)(span(s.getBuffer(), s.length(), USET_SPAN_CONTAINED) == + s.length()); +} + +/** + * Returns true if this set contains none of the characters + * of the given range. + * @param start first character, inclusive, of the range + * @param end last character, inclusive, of the range + * @return true if the test condition is met + */ +UBool UnicodeSet::containsNone(UChar32 start, UChar32 end) const { + //int32_t i = -1; + //for (;;) { + // if (start < list[++i]) break; + //} + int32_t i = findCodePoint(start); + return ((i & 1) == 0 && end < list[i]); +} + +/** + * Returns true if this set contains none of the characters and strings + * of the given set. + * @param c set to be checked for containment + * @return true if the test condition is met + */ +UBool UnicodeSet::containsNone(const UnicodeSet& c) const { + // The specified set is a subset if all of its pairs are contained in + // this set. It's possible to code this more efficiently in terms of + // direct manipulation of the inversion lists if the need arises. + int32_t n = c.getRangeCount(); + for (int32_t i=0; i<n; ++i) { + if (!containsNone(c.getRangeStart(i), c.getRangeEnd(i))) { + return FALSE; + } + } + return strings == nullptr || !c.hasStrings() || strings->containsNone(*c.strings); +} + +/** + * Returns true if this set contains none of the characters + * of the given string. + * @param s string containing characters to be checked for containment + * @return true if the test condition is met + */ +UBool UnicodeSet::containsNone(const UnicodeString& s) const { + return (UBool)(span(s.getBuffer(), s.length(), USET_SPAN_NOT_CONTAINED) == + s.length()); +} + +/** + * Returns <tt>true</tt> if this set contains any character whose low byte + * is the given value. This is used by <tt>RuleBasedTransliterator</tt> for + * indexing. + */ +UBool UnicodeSet::matchesIndexValue(uint8_t v) const { + /* The index value v, in the range [0,255], is contained in this set if + * it is contained in any pair of this set. Pairs either have the high + * bytes equal, or unequal. If the high bytes are equal, then we have + * aaxx..aayy, where aa is the high byte. Then v is contained if xx <= + * v <= yy. If the high bytes are unequal we have aaxx..bbyy, bb>aa. + * Then v is contained if xx <= v || v <= yy. (This is identical to the + * time zone month containment logic.) + */ + int32_t i; + int32_t rangeCount=getRangeCount(); + for (i=0; i<rangeCount; ++i) { + UChar32 low = getRangeStart(i); + UChar32 high = getRangeEnd(i); + if ((low & ~0xFF) == (high & ~0xFF)) { + if ((low & 0xFF) <= v && v <= (high & 0xFF)) { + return TRUE; + } + } else if ((low & 0xFF) <= v || v <= (high & 0xFF)) { + return TRUE; + } + } + if (hasStrings()) { + for (i=0; i<strings->size(); ++i) { + const UnicodeString& s = *(const UnicodeString*)strings->elementAt(i); + //if (s.length() == 0) { + // // Empty strings match everything + // return TRUE; + //} + // assert(s.length() != 0); // We enforce this elsewhere + UChar32 c = s.char32At(0); + if ((c & 0xFF) == v) { + return TRUE; + } + } + } + return FALSE; +} + +/** + * Implementation of UnicodeMatcher::matches(). Always matches the + * longest possible multichar string. + */ +UMatchDegree UnicodeSet::matches(const Replaceable& text, + int32_t& offset, + int32_t limit, + UBool incremental) { + if (offset == limit) { + // Strings, if any, have length != 0, so we don't worry + // about them here. If we ever allow zero-length strings + // we much check for them here. + if (contains(U_ETHER)) { + return incremental ? U_PARTIAL_MATCH : U_MATCH; + } else { + return U_MISMATCH; + } + } else { + if (hasStrings()) { // try strings first + + // might separate forward and backward loops later + // for now they are combined + + // TODO Improve efficiency of this, at least in the forward + // direction, if not in both. In the forward direction we + // can assume the strings are sorted. + + int32_t i; + UBool forward = offset < limit; + + // firstChar is the leftmost char to match in the + // forward direction or the rightmost char to match in + // the reverse direction. + UChar firstChar = text.charAt(offset); + + // If there are multiple strings that can match we + // return the longest match. + int32_t highWaterLength = 0; + + for (i=0; i<strings->size(); ++i) { + const UnicodeString& trial = *(const UnicodeString*)strings->elementAt(i); + + //if (trial.length() == 0) { + // return U_MATCH; // null-string always matches + //} + // assert(trial.length() != 0); // We ensure this elsewhere + + UChar c = trial.charAt(forward ? 0 : trial.length() - 1); + + // Strings are sorted, so we can optimize in the + // forward direction. + if (forward && c > firstChar) break; + if (c != firstChar) continue; + + int32_t matchLen = matchRest(text, offset, limit, trial); + + if (incremental) { + int32_t maxLen = forward ? limit-offset : offset-limit; + if (matchLen == maxLen) { + // We have successfully matched but only up to limit. + return U_PARTIAL_MATCH; + } + } + + if (matchLen == trial.length()) { + // We have successfully matched the whole string. + if (matchLen > highWaterLength) { + highWaterLength = matchLen; + } + // In the forward direction we know strings + // are sorted so we can bail early. + if (forward && matchLen < highWaterLength) { + break; + } + continue; + } + } + + // We've checked all strings without a partial match. + // If we have full matches, return the longest one. + if (highWaterLength != 0) { + offset += forward ? highWaterLength : -highWaterLength; + return U_MATCH; + } + } + return UnicodeFilter::matches(text, offset, limit, incremental); + } +} + +/** + * Returns the longest match for s in text at the given position. + * If limit > start then match forward from start+1 to limit + * matching all characters except s.charAt(0). If limit < start, + * go backward starting from start-1 matching all characters + * except s.charAt(s.length()-1). This method assumes that the + * first character, text.charAt(start), matches s, so it does not + * check it. + * @param text the text to match + * @param start the first character to match. In the forward + * direction, text.charAt(start) is matched against s.charAt(0). + * In the reverse direction, it is matched against + * s.charAt(s.length()-1). + * @param limit the limit offset for matching, either last+1 in + * the forward direction, or last-1 in the reverse direction, + * where last is the index of the last character to match. + * @return If part of s matches up to the limit, return |limit - + * start|. If all of s matches before reaching the limit, return + * s.length(). If there is a mismatch between s and text, return + * 0 + */ +int32_t UnicodeSet::matchRest(const Replaceable& text, + int32_t start, int32_t limit, + const UnicodeString& s) { + int32_t i; + int32_t maxLen; + int32_t slen = s.length(); + if (start < limit) { + maxLen = limit - start; + if (maxLen > slen) maxLen = slen; + for (i = 1; i < maxLen; ++i) { + if (text.charAt(start + i) != s.charAt(i)) return 0; + } + } else { + maxLen = start - limit; + if (maxLen > slen) maxLen = slen; + --slen; // <=> slen = s.length() - 1; + for (i = 1; i < maxLen; ++i) { + if (text.charAt(start - i) != s.charAt(slen - i)) return 0; + } + } + return maxLen; +} + +/** + * Implement of UnicodeMatcher + */ +void UnicodeSet::addMatchSetTo(UnicodeSet& toUnionTo) const { + toUnionTo.addAll(*this); +} + +/** + * Returns the index of the given character within this set, where + * the set is ordered by ascending code point. If the character + * is not in this set, return -1. The inverse of this method is + * <code>charAt()</code>. + * @return an index from 0..size()-1, or -1 + */ +int32_t UnicodeSet::indexOf(UChar32 c) const { + if (c < MIN_VALUE || c > MAX_VALUE) { + return -1; + } + int32_t i = 0; + int32_t n = 0; + for (;;) { + UChar32 start = list[i++]; + if (c < start) { + return -1; + } + UChar32 limit = list[i++]; + if (c < limit) { + return n + c - start; + } + n += limit - start; + } +} + +/** + * Returns the character at the given index within this set, where + * the set is ordered by ascending code point. If the index is + * out of range, return (UChar32)-1. The inverse of this method is + * <code>indexOf()</code>. + * @param index an index from 0..size()-1 + * @return the character at the given index, or (UChar32)-1. + */ +UChar32 UnicodeSet::charAt(int32_t index) const { + if (index >= 0) { + // len2 is the largest even integer <= len, that is, it is len + // for even values and len-1 for odd values. With odd values + // the last entry is UNICODESET_HIGH. + int32_t len2 = len & ~1; + for (int32_t i=0; i < len2;) { + UChar32 start = list[i++]; + int32_t count = list[i++] - start; + if (index < count) { + return (UChar32)(start + index); + } + index -= count; + } + } + return (UChar32)-1; +} + +/** + * Make this object represent the range <code>start - end</code>. + * If <code>end > start</code> then this object is set to an + * an empty range. + * + * @param start first character in the set, inclusive + * @rparam end last character in the set, inclusive + */ +UnicodeSet& UnicodeSet::set(UChar32 start, UChar32 end) { + clear(); + complement(start, end); + return *this; +} + +/** + * Adds the specified range to this set if it is not already + * present. If this set already contains the specified range, + * the call leaves this set unchanged. If <code>end > start</code> + * then an empty range is added, leaving the set unchanged. + * + * @param start first character, inclusive, of range to be added + * to this set. + * @param end last character, inclusive, of range to be added + * to this set. + */ +UnicodeSet& UnicodeSet::add(UChar32 start, UChar32 end) { + if (pinCodePoint(start) < pinCodePoint(end)) { + UChar32 limit = end + 1; + // Fast path for adding a new range after the last one. + // Odd list length: [..., lastStart, lastLimit, HIGH] + if ((len & 1) != 0) { + // If the list is empty, set lastLimit low enough to not be adjacent to 0. + UChar32 lastLimit = len == 1 ? -2 : list[len - 2]; + if (lastLimit <= start && !isFrozen() && !isBogus()) { + if (lastLimit == start) { + // Extend the last range. + list[len - 2] = limit; + if (limit == UNICODESET_HIGH) { + --len; + } + } else { + list[len - 1] = start; + if (limit < UNICODESET_HIGH) { + if (ensureCapacity(len + 2)) { + list[len++] = limit; + list[len++] = UNICODESET_HIGH; + } + } else { // limit == UNICODESET_HIGH + if (ensureCapacity(len + 1)) { + list[len++] = UNICODESET_HIGH; + } + } + } + releasePattern(); + return *this; + } + } + // This is slow. Could be much faster using findCodePoint(start) + // and modifying the list, dealing with adjacent & overlapping ranges. + UChar32 range[3] = { start, limit, UNICODESET_HIGH }; + add(range, 2, 0); + } else if (start == end) { + add(start); + } + return *this; +} + +// #define DEBUG_US_ADD + +#ifdef DEBUG_US_ADD +#include <stdio.h> +void dump(UChar32 c) { + if (c <= 0xFF) { + printf("%c", (char)c); + } else { + printf("U+%04X", c); + } +} +void dump(const UChar32* list, int32_t len) { + printf("["); + for (int32_t i=0; i<len; ++i) { + if (i != 0) printf(", "); + dump(list[i]); + } + printf("]"); +} +#endif + +/** + * Adds the specified character to this set if it is not already + * present. If this set already contains the specified character, + * the call leaves this set unchanged. + */ +UnicodeSet& UnicodeSet::add(UChar32 c) { + // find smallest i such that c < list[i] + // if odd, then it is IN the set + // if even, then it is OUT of the set + int32_t i = findCodePoint(pinCodePoint(c)); + + // already in set? + if ((i & 1) != 0 || isFrozen() || isBogus()) return *this; + + // HIGH is 0x110000 + // assert(list[len-1] == HIGH); + + // empty = [HIGH] + // [start_0, limit_0, start_1, limit_1, HIGH] + + // [..., start_k-1, limit_k-1, start_k, limit_k, ..., HIGH] + // ^ + // list[i] + + // i == 0 means c is before the first range + +#ifdef DEBUG_US_ADD + printf("Add of "); + dump(c); + printf(" found at %d", i); + printf(": "); + dump(list, len); + printf(" => "); +#endif + + if (c == list[i]-1) { + // c is before start of next range + list[i] = c; + // if we touched the HIGH mark, then add a new one + if (c == (UNICODESET_HIGH - 1)) { + if (!ensureCapacity(len+1)) { + // ensureCapacity will mark the object as Bogus if OOM failure happens. + return *this; + } + list[len++] = UNICODESET_HIGH; + } + if (i > 0 && c == list[i-1]) { + // collapse adjacent ranges + + // [..., start_k-1, c, c, limit_k, ..., HIGH] + // ^ + // list[i] + + //for (int32_t k=i-1; k<len-2; ++k) { + // list[k] = list[k+2]; + //} + UChar32* dst = list + i - 1; + UChar32* src = dst + 2; + UChar32* srclimit = list + len; + while (src < srclimit) *(dst++) = *(src++); + + len -= 2; + } + } + + else if (i > 0 && c == list[i-1]) { + // c is after end of prior range + list[i-1]++; + // no need to check for collapse here + } + + else { + // At this point we know the new char is not adjacent to + // any existing ranges, and it is not 10FFFF. + + + // [..., start_k-1, limit_k-1, start_k, limit_k, ..., HIGH] + // ^ + // list[i] + + // [..., start_k-1, limit_k-1, c, c+1, start_k, limit_k, ..., HIGH] + // ^ + // list[i] + + if (!ensureCapacity(len+2)) { + // ensureCapacity will mark the object as Bogus if OOM failure happens. + return *this; + } + + UChar32 *p = list + i; + uprv_memmove(p + 2, p, (len - i) * sizeof(*p)); + list[i] = c; + list[i+1] = c+1; + len += 2; + } + +#ifdef DEBUG_US_ADD + dump(list, len); + printf("\n"); + + for (i=1; i<len; ++i) { + if (list[i] <= list[i-1]) { + // Corrupt array! + printf("ERROR: list has been corrupted\n"); + exit(1); + } + } +#endif + + releasePattern(); + return *this; +} + +/** + * Adds the specified multicharacter to this set if it is not already + * present. If this set already contains the multicharacter, + * the call leaves this set unchanged. + * Thus "ch" => {"ch"} + * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b> + * @param s the source string + * @return the modified set, for chaining + */ +UnicodeSet& UnicodeSet::add(const UnicodeString& s) { + if (s.length() == 0 || isFrozen() || isBogus()) return *this; + int32_t cp = getSingleCP(s); + if (cp < 0) { + if (!stringsContains(s)) { + _add(s); + releasePattern(); + } + } else { + add((UChar32)cp); + } + return *this; +} + +/** + * Adds the given string, in order, to 'strings'. The given string + * must have been checked by the caller to not be empty and to not + * already be in 'strings'. + */ +void UnicodeSet::_add(const UnicodeString& s) { + if (isFrozen() || isBogus()) { + return; + } + UErrorCode ec = U_ZERO_ERROR; + if (strings == nullptr && !allocateStrings(ec)) { + setToBogus(); + return; + } + UnicodeString* t = new UnicodeString(s); + if (t == NULL) { // Check for memory allocation error. + setToBogus(); + return; + } + strings->sortedInsert(t, compareUnicodeString, ec); + if (U_FAILURE(ec)) { + setToBogus(); + delete t; + } +} + +/** + * @return a code point IF the string consists of a single one. + * otherwise returns -1. + * @param string to test + */ +int32_t UnicodeSet::getSingleCP(const UnicodeString& s) { + //if (s.length() < 1) { + // throw new IllegalArgumentException("Can't use zero-length strings in UnicodeSet"); + //} + if (s.length() > 2) return -1; + if (s.length() == 1) return s.charAt(0); + + // at this point, len = 2 + UChar32 cp = s.char32At(0); + if (cp > 0xFFFF) { // is surrogate pair + return cp; + } + return -1; +} + +/** + * Adds each of the characters in this string to the set. Thus "ch" => {"c", "h"} + * If this set already any particular character, it has no effect on that character. + * @param the source string + * @return the modified set, for chaining + */ +UnicodeSet& UnicodeSet::addAll(const UnicodeString& s) { + UChar32 cp; + for (int32_t i = 0; i < s.length(); i += U16_LENGTH(cp)) { + cp = s.char32At(i); + add(cp); + } + return *this; +} + +/** + * Retains EACH of the characters in this string. Note: "ch" == {"c", "h"} + * If this set already any particular character, it has no effect on that character. + * @param the source string + * @return the modified set, for chaining + */ +UnicodeSet& UnicodeSet::retainAll(const UnicodeString& s) { + UnicodeSet set; + set.addAll(s); + retainAll(set); + return *this; +} + +/** + * Complement EACH of the characters in this string. Note: "ch" == {"c", "h"} + * If this set already any particular character, it has no effect on that character. + * @param the source string + * @return the modified set, for chaining + */ +UnicodeSet& UnicodeSet::complementAll(const UnicodeString& s) { + UnicodeSet set; + set.addAll(s); + complementAll(set); + return *this; +} + +/** + * Remove EACH of the characters in this string. Note: "ch" == {"c", "h"} + * If this set already any particular character, it has no effect on that character. + * @param the source string + * @return the modified set, for chaining + */ +UnicodeSet& UnicodeSet::removeAll(const UnicodeString& s) { + UnicodeSet set; + set.addAll(s); + removeAll(set); + return *this; +} + +UnicodeSet& UnicodeSet::removeAllStrings() { + if (!isFrozen() && hasStrings()) { + strings->removeAllElements(); + releasePattern(); + } + return *this; +} + + +/** + * Makes a set from a multicharacter string. Thus "ch" => {"ch"} + * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b> + * @param the source string + * @return a newly created set containing the given string + */ +UnicodeSet* U_EXPORT2 UnicodeSet::createFrom(const UnicodeString& s) { + UnicodeSet *set = new UnicodeSet(); + if (set != NULL) { // Check for memory allocation error. + set->add(s); + } + return set; +} + + +/** + * Makes a set from each of the characters in the string. Thus "ch" => {"c", "h"} + * @param the source string + * @return a newly created set containing the given characters + */ +UnicodeSet* U_EXPORT2 UnicodeSet::createFromAll(const UnicodeString& s) { + UnicodeSet *set = new UnicodeSet(); + if (set != NULL) { // Check for memory allocation error. + set->addAll(s); + } + return set; +} + +/** + * Retain only the elements in this set that are contained in the + * specified range. If <code>end > start</code> then an empty range is + * retained, leaving the set empty. + * + * @param start first character, inclusive, of range to be retained + * to this set. + * @param end last character, inclusive, of range to be retained + * to this set. + */ +UnicodeSet& UnicodeSet::retain(UChar32 start, UChar32 end) { + if (pinCodePoint(start) <= pinCodePoint(end)) { + UChar32 range[3] = { start, end+1, UNICODESET_HIGH }; + retain(range, 2, 0); + } else { + clear(); + } + return *this; +} + +UnicodeSet& UnicodeSet::retain(UChar32 c) { + return retain(c, c); +} + +/** + * Removes the specified range from this set if it is present. + * The set will not contain the specified range once the call + * returns. If <code>end > start</code> then an empty range is + * removed, leaving the set unchanged. + * + * @param start first character, inclusive, of range to be removed + * from this set. + * @param end last character, inclusive, of range to be removed + * from this set. + */ +UnicodeSet& UnicodeSet::remove(UChar32 start, UChar32 end) { + if (pinCodePoint(start) <= pinCodePoint(end)) { + UChar32 range[3] = { start, end+1, UNICODESET_HIGH }; + retain(range, 2, 2); + } + return *this; +} + +/** + * Removes the specified character from this set if it is present. + * The set will not contain the specified range once the call + * returns. + */ +UnicodeSet& UnicodeSet::remove(UChar32 c) { + return remove(c, c); +} + +/** + * Removes the specified string from this set if it is present. + * The set will not contain the specified character once the call + * returns. + * @param the source string + * @return the modified set, for chaining + */ +UnicodeSet& UnicodeSet::remove(const UnicodeString& s) { + if (s.length() == 0 || isFrozen() || isBogus()) return *this; + int32_t cp = getSingleCP(s); + if (cp < 0) { + if (strings != nullptr && strings->removeElement((void*) &s)) { + releasePattern(); + } + } else { + remove((UChar32)cp, (UChar32)cp); + } + return *this; +} + +/** + * Complements the specified range in this set. Any character in + * the range will be removed if it is in this set, or will be + * added if it is not in this set. If <code>end > start</code> + * then an empty range is xor'ed, leaving the set unchanged. + * + * @param start first character, inclusive, of range to be removed + * from this set. + * @param end last character, inclusive, of range to be removed + * from this set. + */ +UnicodeSet& UnicodeSet::complement(UChar32 start, UChar32 end) { + if (isFrozen() || isBogus()) { + return *this; + } + if (pinCodePoint(start) <= pinCodePoint(end)) { + UChar32 range[3] = { start, end+1, UNICODESET_HIGH }; + exclusiveOr(range, 2, 0); + } + releasePattern(); + return *this; +} + +UnicodeSet& UnicodeSet::complement(UChar32 c) { + return complement(c, c); +} + +/** + * This is equivalent to + * <code>complement(MIN_VALUE, MAX_VALUE)</code>. + */ +UnicodeSet& UnicodeSet::complement(void) { + if (isFrozen() || isBogus()) { + return *this; + } + if (list[0] == UNICODESET_LOW) { + uprv_memmove(list, list + 1, (size_t)(len-1)*sizeof(UChar32)); + --len; + } else { + if (!ensureCapacity(len+1)) { + return *this; + } + uprv_memmove(list + 1, list, (size_t)len*sizeof(UChar32)); + list[0] = UNICODESET_LOW; + ++len; + } + releasePattern(); + return *this; +} + +/** + * Complement the specified string in this set. + * The set will not contain the specified string once the call + * returns. + * <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b> + * @param s the string to complement + * @return this object, for chaining + */ +UnicodeSet& UnicodeSet::complement(const UnicodeString& s) { + if (s.length() == 0 || isFrozen() || isBogus()) return *this; + int32_t cp = getSingleCP(s); + if (cp < 0) { + if (stringsContains(s)) { + strings->removeElement((void*) &s); + } else { + _add(s); + } + releasePattern(); + } else { + complement((UChar32)cp, (UChar32)cp); + } + return *this; +} + +/** + * Adds all of the elements in the specified set to this set if + * they're not already present. This operation effectively + * modifies this set so that its value is the <i>union</i> of the two + * sets. The behavior of this operation is unspecified if the specified + * collection is modified while the operation is in progress. + * + * @param c set whose elements are to be added to this set. + * @see #add(char, char) + */ +UnicodeSet& UnicodeSet::addAll(const UnicodeSet& c) { + if ( c.len>0 && c.list!=NULL ) { + add(c.list, c.len, 0); + } + + // Add strings in order + if ( c.strings!=NULL ) { + for (int32_t i=0; i<c.strings->size(); ++i) { + const UnicodeString* s = (const UnicodeString*)c.strings->elementAt(i); + if (!stringsContains(*s)) { + _add(*s); + } + } + } + return *this; +} + +/** + * Retains only the elements in this set that are contained in the + * specified set. In other words, removes from this set all of + * its elements that are not contained in the specified set. This + * operation effectively modifies this set so that its value is + * the <i>intersection</i> of the two sets. + * + * @param c set that defines which elements this set will retain. + */ +UnicodeSet& UnicodeSet::retainAll(const UnicodeSet& c) { + if (isFrozen() || isBogus()) { + return *this; + } + retain(c.list, c.len, 0); + if (hasStrings()) { + if (!c.hasStrings()) { + strings->removeAllElements(); + } else { + strings->retainAll(*c.strings); + } + } + return *this; +} + +/** + * Removes from this set all of its elements that are contained in the + * specified set. This operation effectively modifies this + * set so that its value is the <i>asymmetric set difference</i> of + * the two sets. + * + * @param c set that defines which elements will be removed from + * this set. + */ +UnicodeSet& UnicodeSet::removeAll(const UnicodeSet& c) { + if (isFrozen() || isBogus()) { + return *this; + } + retain(c.list, c.len, 2); + if (hasStrings() && c.hasStrings()) { + strings->removeAll(*c.strings); + } + return *this; +} + +/** + * Complements in this set all elements contained in the specified + * set. Any character in the other set will be removed if it is + * in this set, or will be added if it is not in this set. + * + * @param c set that defines which elements will be xor'ed from + * this set. + */ +UnicodeSet& UnicodeSet::complementAll(const UnicodeSet& c) { + if (isFrozen() || isBogus()) { + return *this; + } + exclusiveOr(c.list, c.len, 0); + + if (c.strings != nullptr) { + for (int32_t i=0; i<c.strings->size(); ++i) { + void* e = c.strings->elementAt(i); + if (strings == nullptr || !strings->removeElement(e)) { + _add(*(const UnicodeString*)e); + } + } + } + return *this; +} + +/** + * Removes all of the elements from this set. This set will be + * empty after this call returns. + */ +UnicodeSet& UnicodeSet::clear(void) { + if (isFrozen()) { + return *this; + } + list[0] = UNICODESET_HIGH; + len = 1; + releasePattern(); + if (strings != NULL) { + strings->removeAllElements(); + } + // Remove bogus + fFlags = 0; + return *this; +} + +/** + * Iteration method that returns the number of ranges contained in + * this set. + * @see #getRangeStart + * @see #getRangeEnd + */ +int32_t UnicodeSet::getRangeCount() const { + return len/2; +} + +/** + * Iteration method that returns the first character in the + * specified range of this set. + * @see #getRangeCount + * @see #getRangeEnd + */ +UChar32 UnicodeSet::getRangeStart(int32_t index) const { + return list[index*2]; +} + +/** + * Iteration method that returns the last character in the + * specified range of this set. + * @see #getRangeStart + * @see #getRangeEnd + */ +UChar32 UnicodeSet::getRangeEnd(int32_t index) const { + return list[index*2 + 1] - 1; +} + +const UnicodeString* UnicodeSet::getString(int32_t index) const { + return (const UnicodeString*) strings->elementAt(index); +} + +/** + * Reallocate this objects internal structures to take up the least + * possible space, without changing this object's value. + */ +UnicodeSet& UnicodeSet::compact() { + if (isFrozen() || isBogus()) { + return *this; + } + // Delete buffer first to defragment memory less. + if (buffer != stackList) { + uprv_free(buffer); + buffer = NULL; + bufferCapacity = 0; + } + if (list == stackList) { + // pass + } else if (len <= INITIAL_CAPACITY) { + uprv_memcpy(stackList, list, len * sizeof(UChar32)); + uprv_free(list); + list = stackList; + capacity = INITIAL_CAPACITY; + } else if ((len + 7) < capacity) { + // If we have more than a little unused capacity, shrink it to len. + UChar32* temp = (UChar32*) uprv_realloc(list, sizeof(UChar32) * len); + if (temp) { + list = temp; + capacity = len; + } + // else what the heck happened?! We allocated less memory! + // Oh well. We'll keep our original array. + } + if (strings != nullptr && strings->isEmpty()) { + delete strings; + strings = nullptr; + } + return *this; +} + +#ifdef DEBUG_SERIALIZE +#include <stdio.h> +#endif + +/** + * Deserialize constructor. + */ +UnicodeSet::UnicodeSet(const uint16_t data[], int32_t dataLen, ESerialization serialization, + UErrorCode &ec) { + + if(U_FAILURE(ec)) { + setToBogus(); + return; + } + + if( (serialization != kSerialized) + || (data==NULL) + || (dataLen < 1)) { + ec = U_ILLEGAL_ARGUMENT_ERROR; + setToBogus(); + return; + } + + // bmp? + int32_t headerSize = ((data[0]&0x8000)) ?2:1; + int32_t bmpLength = (headerSize==1)?data[0]:data[1]; + + int32_t newLength = (((data[0]&0x7FFF)-bmpLength)/2)+bmpLength; +#ifdef DEBUG_SERIALIZE + printf("dataLen %d headerSize %d bmpLen %d len %d. data[0]=%X/%X/%X/%X\n", dataLen,headerSize,bmpLength,newLength, data[0],data[1],data[2],data[3]); +#endif + if(!ensureCapacity(newLength + 1)) { // +1 for HIGH + return; + } + // copy bmp + int32_t i; + for(i = 0; i< bmpLength;i++) { + list[i] = data[i+headerSize]; +#ifdef DEBUG_SERIALIZE + printf("<<16@%d[%d] %X\n", i+headerSize, i, list[i]); +#endif + } + // copy smp + for(i=bmpLength;i<newLength;i++) { + list[i] = ((UChar32)data[headerSize+bmpLength+(i-bmpLength)*2+0] << 16) + + ((UChar32)data[headerSize+bmpLength+(i-bmpLength)*2+1]); +#ifdef DEBUG_SERIALIZE + printf("<<32@%d+[%d] %lX\n", headerSize+bmpLength+i, i, list[i]); +#endif + } + U_ASSERT(i == newLength); + if (i == 0 || list[i - 1] != UNICODESET_HIGH) { + list[i++] = UNICODESET_HIGH; + } + len = i; +} + + +int32_t UnicodeSet::serialize(uint16_t *dest, int32_t destCapacity, UErrorCode& ec) const { + int32_t bmpLength, length, destLength; + + if (U_FAILURE(ec)) { + return 0; + } + + if (destCapacity<0 || (destCapacity>0 && dest==NULL)) { + ec=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + /* count necessary 16-bit units */ + length=this->len-1; // Subtract 1 to ignore final UNICODESET_HIGH + // assert(length>=0); + if (length==0) { + /* empty set */ + if (destCapacity>0) { + *dest=0; + } else { + ec=U_BUFFER_OVERFLOW_ERROR; + } + return 1; + } + /* now length>0 */ + + if (this->list[length-1]<=0xffff) { + /* all BMP */ + bmpLength=length; + } else if (this->list[0]>=0x10000) { + /* all supplementary */ + bmpLength=0; + length*=2; + } else { + /* some BMP, some supplementary */ + for (bmpLength=0; bmpLength<length && this->list[bmpLength]<=0xffff; ++bmpLength) {} + length=bmpLength+2*(length-bmpLength); + } +#ifdef DEBUG_SERIALIZE + printf(">> bmpLength%d length%d len%d\n", bmpLength, length, len); +#endif + /* length: number of 16-bit array units */ + if (length>0x7fff) { + /* there are only 15 bits for the length in the first serialized word */ + ec=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + + /* + * total serialized length: + * number of 16-bit array units (length) + + * 1 length unit (always) + + * 1 bmpLength unit (if there are supplementary values) + */ + destLength=length+((length>bmpLength)?2:1); + if (destLength<=destCapacity) { + const UChar32 *p; + int32_t i; + +#ifdef DEBUG_SERIALIZE + printf("writeHdr\n"); +#endif + *dest=(uint16_t)length; + if (length>bmpLength) { + *dest|=0x8000; + *++dest=(uint16_t)bmpLength; + } + ++dest; + + /* write the BMP part of the array */ + p=this->list; + for (i=0; i<bmpLength; ++i) { +#ifdef DEBUG_SERIALIZE + printf("writebmp: %x\n", (int)*p); +#endif + *dest++=(uint16_t)*p++; + } + + /* write the supplementary part of the array */ + for (; i<length; i+=2) { +#ifdef DEBUG_SERIALIZE + printf("write32: %x\n", (int)*p); +#endif + *dest++=(uint16_t)(*p>>16); + *dest++=(uint16_t)*p++; + } + } else { + ec=U_BUFFER_OVERFLOW_ERROR; + } + return destLength; +} + +//---------------------------------------------------------------- +// Implementation: Utility methods +//---------------------------------------------------------------- + +/** + * Allocate our strings vector and return TRUE if successful. + */ +UBool UnicodeSet::allocateStrings(UErrorCode &status) { + if (U_FAILURE(status)) { + return FALSE; + } + strings = new UVector(uprv_deleteUObject, + uhash_compareUnicodeString, 1, status); + if (strings == NULL) { // Check for memory allocation error. + status = U_MEMORY_ALLOCATION_ERROR; + return FALSE; + } + if (U_FAILURE(status)) { + delete strings; + strings = NULL; + return FALSE; + } + return TRUE; +} + +int32_t UnicodeSet::nextCapacity(int32_t minCapacity) { + // Grow exponentially to reduce the frequency of allocations. + if (minCapacity < INITIAL_CAPACITY) { + return minCapacity + INITIAL_CAPACITY; + } else if (minCapacity <= 2500) { + return 5 * minCapacity; + } else { + int32_t newCapacity = 2 * minCapacity; + if (newCapacity > MAX_LENGTH) { + newCapacity = MAX_LENGTH; + } + return newCapacity; + } +} + +bool UnicodeSet::ensureCapacity(int32_t newLen) { + if (newLen > MAX_LENGTH) { + newLen = MAX_LENGTH; + } + if (newLen <= capacity) { + return true; + } + int32_t newCapacity = nextCapacity(newLen); + UChar32* temp = (UChar32*) uprv_malloc(newCapacity * sizeof(UChar32)); + if (temp == NULL) { + setToBogus(); // set the object to bogus state if an OOM failure occurred. + return false; + } + // Copy only the actual contents. + uprv_memcpy(temp, list, len * sizeof(UChar32)); + if (list != stackList) { + uprv_free(list); + } + list = temp; + capacity = newCapacity; + return true; +} + +bool UnicodeSet::ensureBufferCapacity(int32_t newLen) { + if (newLen > MAX_LENGTH) { + newLen = MAX_LENGTH; + } + if (newLen <= bufferCapacity) { + return true; + } + int32_t newCapacity = nextCapacity(newLen); + UChar32* temp = (UChar32*) uprv_malloc(newCapacity * sizeof(UChar32)); + if (temp == NULL) { + setToBogus(); + return false; + } + // The buffer has no contents to be copied. + // It is always filled from scratch after this call. + if (buffer != stackList) { + uprv_free(buffer); + } + buffer = temp; + bufferCapacity = newCapacity; + return true; +} + +/** + * Swap list and buffer. + */ +void UnicodeSet::swapBuffers(void) { + // swap list and buffer + UChar32* temp = list; + list = buffer; + buffer = temp; + + int32_t c = capacity; + capacity = bufferCapacity; + bufferCapacity = c; +} + +void UnicodeSet::setToBogus() { + clear(); // Remove everything in the set. + fFlags = kIsBogus; +} + +//---------------------------------------------------------------- +// Implementation: Fundamental operators +//---------------------------------------------------------------- + +static inline UChar32 max(UChar32 a, UChar32 b) { + return (a > b) ? a : b; +} + +// polarity = 0, 3 is normal: x xor y +// polarity = 1, 2: x xor ~y == x === y + +void UnicodeSet::exclusiveOr(const UChar32* other, int32_t otherLen, int8_t polarity) { + if (isFrozen() || isBogus()) { + return; + } + if (!ensureBufferCapacity(len + otherLen)) { + return; + } + + int32_t i = 0, j = 0, k = 0; + UChar32 a = list[i++]; + UChar32 b; + if (polarity == 1 || polarity == 2) { + b = UNICODESET_LOW; + if (other[j] == UNICODESET_LOW) { // skip base if already LOW + ++j; + b = other[j]; + } + } else { + b = other[j++]; + } + // simplest of all the routines + // sort the values, discarding identicals! + for (;;) { + if (a < b) { + buffer[k++] = a; + a = list[i++]; + } else if (b < a) { + buffer[k++] = b; + b = other[j++]; + } else if (a != UNICODESET_HIGH) { // at this point, a == b + // discard both values! + a = list[i++]; + b = other[j++]; + } else { // DONE! + buffer[k++] = UNICODESET_HIGH; + len = k; + break; + } + } + swapBuffers(); + releasePattern(); +} + +// polarity = 0 is normal: x union y +// polarity = 2: x union ~y +// polarity = 1: ~x union y +// polarity = 3: ~x union ~y + +void UnicodeSet::add(const UChar32* other, int32_t otherLen, int8_t polarity) { + if (isFrozen() || isBogus() || other==NULL) { + return; + } + if (!ensureBufferCapacity(len + otherLen)) { + return; + } + + int32_t i = 0, j = 0, k = 0; + UChar32 a = list[i++]; + UChar32 b = other[j++]; + // change from xor is that we have to check overlapping pairs + // polarity bit 1 means a is second, bit 2 means b is. + for (;;) { + switch (polarity) { + case 0: // both first; take lower if unequal + if (a < b) { // take a + // Back up over overlapping ranges in buffer[] + if (k > 0 && a <= buffer[k-1]) { + // Pick latter end value in buffer[] vs. list[] + a = max(list[i], buffer[--k]); + } else { + // No overlap + buffer[k++] = a; + a = list[i]; + } + i++; // Common if/else code factored out + polarity ^= 1; + } else if (b < a) { // take b + if (k > 0 && b <= buffer[k-1]) { + b = max(other[j], buffer[--k]); + } else { + buffer[k++] = b; + b = other[j]; + } + j++; + polarity ^= 2; + } else { // a == b, take a, drop b + if (a == UNICODESET_HIGH) goto loop_end; + // This is symmetrical; it doesn't matter if + // we backtrack with a or b. - liu + if (k > 0 && a <= buffer[k-1]) { + a = max(list[i], buffer[--k]); + } else { + // No overlap + buffer[k++] = a; + a = list[i]; + } + i++; + polarity ^= 1; + b = other[j++]; + polarity ^= 2; + } + break; + case 3: // both second; take higher if unequal, and drop other + if (b <= a) { // take a + if (a == UNICODESET_HIGH) goto loop_end; + buffer[k++] = a; + } else { // take b + if (b == UNICODESET_HIGH) goto loop_end; + buffer[k++] = b; + } + a = list[i++]; + polarity ^= 1; // factored common code + b = other[j++]; + polarity ^= 2; + break; + case 1: // a second, b first; if b < a, overlap + if (a < b) { // no overlap, take a + buffer[k++] = a; a = list[i++]; polarity ^= 1; + } else if (b < a) { // OVERLAP, drop b + b = other[j++]; + polarity ^= 2; + } else { // a == b, drop both! + if (a == UNICODESET_HIGH) goto loop_end; + a = list[i++]; + polarity ^= 1; + b = other[j++]; + polarity ^= 2; + } + break; + case 2: // a first, b second; if a < b, overlap + if (b < a) { // no overlap, take b + buffer[k++] = b; + b = other[j++]; + polarity ^= 2; + } else if (a < b) { // OVERLAP, drop a + a = list[i++]; + polarity ^= 1; + } else { // a == b, drop both! + if (a == UNICODESET_HIGH) goto loop_end; + a = list[i++]; + polarity ^= 1; + b = other[j++]; + polarity ^= 2; + } + break; + } + } + loop_end: + buffer[k++] = UNICODESET_HIGH; // terminate + len = k; + swapBuffers(); + releasePattern(); +} + +// polarity = 0 is normal: x intersect y +// polarity = 2: x intersect ~y == set-minus +// polarity = 1: ~x intersect y +// polarity = 3: ~x intersect ~y + +void UnicodeSet::retain(const UChar32* other, int32_t otherLen, int8_t polarity) { + if (isFrozen() || isBogus()) { + return; + } + if (!ensureBufferCapacity(len + otherLen)) { + return; + } + + int32_t i = 0, j = 0, k = 0; + UChar32 a = list[i++]; + UChar32 b = other[j++]; + // change from xor is that we have to check overlapping pairs + // polarity bit 1 means a is second, bit 2 means b is. + for (;;) { + switch (polarity) { + case 0: // both first; drop the smaller + if (a < b) { // drop a + a = list[i++]; + polarity ^= 1; + } else if (b < a) { // drop b + b = other[j++]; + polarity ^= 2; + } else { // a == b, take one, drop other + if (a == UNICODESET_HIGH) goto loop_end; + buffer[k++] = a; + a = list[i++]; + polarity ^= 1; + b = other[j++]; + polarity ^= 2; + } + break; + case 3: // both second; take lower if unequal + if (a < b) { // take a + buffer[k++] = a; + a = list[i++]; + polarity ^= 1; + } else if (b < a) { // take b + buffer[k++] = b; + b = other[j++]; + polarity ^= 2; + } else { // a == b, take one, drop other + if (a == UNICODESET_HIGH) goto loop_end; + buffer[k++] = a; + a = list[i++]; + polarity ^= 1; + b = other[j++]; + polarity ^= 2; + } + break; + case 1: // a second, b first; + if (a < b) { // NO OVERLAP, drop a + a = list[i++]; + polarity ^= 1; + } else if (b < a) { // OVERLAP, take b + buffer[k++] = b; + b = other[j++]; + polarity ^= 2; + } else { // a == b, drop both! + if (a == UNICODESET_HIGH) goto loop_end; + a = list[i++]; + polarity ^= 1; + b = other[j++]; + polarity ^= 2; + } + break; + case 2: // a first, b second; if a < b, overlap + if (b < a) { // no overlap, drop b + b = other[j++]; + polarity ^= 2; + } else if (a < b) { // OVERLAP, take a + buffer[k++] = a; + a = list[i++]; + polarity ^= 1; + } else { // a == b, drop both! + if (a == UNICODESET_HIGH) goto loop_end; + a = list[i++]; + polarity ^= 1; + b = other[j++]; + polarity ^= 2; + } + break; + } + } + loop_end: + buffer[k++] = UNICODESET_HIGH; // terminate + len = k; + swapBuffers(); + releasePattern(); +} + +/** + * Append the <code>toPattern()</code> representation of a + * string to the given <code>StringBuffer</code>. + */ +void UnicodeSet::_appendToPat(UnicodeString& buf, const UnicodeString& s, UBool +escapeUnprintable) { + UChar32 cp; + for (int32_t i = 0; i < s.length(); i += U16_LENGTH(cp)) { + _appendToPat(buf, cp = s.char32At(i), escapeUnprintable); + } +} + +/** + * Append the <code>toPattern()</code> representation of a + * character to the given <code>StringBuffer</code>. + */ +void UnicodeSet::_appendToPat(UnicodeString& buf, UChar32 c, UBool +escapeUnprintable) { + if (escapeUnprintable && ICU_Utility::isUnprintable(c)) { + // Use hex escape notation (\uxxxx or \Uxxxxxxxx) for anything + // unprintable + if (ICU_Utility::escapeUnprintable(buf, c)) { + return; + } + } + // Okay to let ':' pass through + switch (c) { + case SET_OPEN: + case SET_CLOSE: + case HYPHEN: + case COMPLEMENT: + case INTERSECTION: + case BACKSLASH: + case OPEN_BRACE: + case CLOSE_BRACE: + case COLON: + case SymbolTable::SYMBOL_REF: + buf.append(BACKSLASH); + break; + default: + // Escape whitespace + if (PatternProps::isWhiteSpace(c)) { + buf.append(BACKSLASH); + } + break; + } + buf.append(c); +} + +/** + * Append a string representation of this set to result. This will be + * a cleaned version of the string passed to applyPattern(), if there + * is one. Otherwise it will be generated. + */ +UnicodeString& UnicodeSet::_toPattern(UnicodeString& result, + UBool escapeUnprintable) const +{ + if (pat != NULL) { + int32_t i; + int32_t backslashCount = 0; + for (i=0; i<patLen; ) { + UChar32 c; + U16_NEXT(pat, i, patLen, c); + if (escapeUnprintable && ICU_Utility::isUnprintable(c)) { + // If the unprintable character is preceded by an odd + // number of backslashes, then it has been escaped. + // Before unescaping it, we delete the final + // backslash. + if ((backslashCount % 2) == 1) { + result.truncate(result.length() - 1); + } + ICU_Utility::escapeUnprintable(result, c); + backslashCount = 0; + } else { + result.append(c); + if (c == BACKSLASH) { + ++backslashCount; + } else { + backslashCount = 0; + } + } + } + return result; + } + + return _generatePattern(result, escapeUnprintable); +} + +/** + * Returns a string representation of this set. If the result of + * calling this function is passed to a UnicodeSet constructor, it + * will produce another set that is equal to this one. + */ +UnicodeString& UnicodeSet::toPattern(UnicodeString& result, + UBool escapeUnprintable) const +{ + result.truncate(0); + return _toPattern(result, escapeUnprintable); +} + +/** + * Generate and append a string representation of this set to result. + * This does not use this.pat, the cleaned up copy of the string + * passed to applyPattern(). + */ +UnicodeString& UnicodeSet::_generatePattern(UnicodeString& result, + UBool escapeUnprintable) const +{ + result.append(SET_OPEN); + +// // Check against the predefined categories. We implicitly build +// // up ALL category sets the first time toPattern() is called. +// for (int8_t cat=0; cat<Unicode::GENERAL_TYPES_COUNT; ++cat) { +// if (*this == getCategorySet(cat)) { +// result.append(COLON); +// result.append(CATEGORY_NAMES, cat*2, 2); +// return result.append(CATEGORY_CLOSE); +// } +// } + + int32_t count = getRangeCount(); + + // If the set contains at least 2 intervals and includes both + // MIN_VALUE and MAX_VALUE, then the inverse representation will + // be more economical. + if (count > 1 && + getRangeStart(0) == MIN_VALUE && + getRangeEnd(count-1) == MAX_VALUE) { + + // Emit the inverse + result.append(COMPLEMENT); + + for (int32_t i = 1; i < count; ++i) { + UChar32 start = getRangeEnd(i-1)+1; + UChar32 end = getRangeStart(i)-1; + _appendToPat(result, start, escapeUnprintable); + if (start != end) { + if ((start+1) != end) { + result.append(HYPHEN); + } + _appendToPat(result, end, escapeUnprintable); + } + } + } + + // Default; emit the ranges as pairs + else { + for (int32_t i = 0; i < count; ++i) { + UChar32 start = getRangeStart(i); + UChar32 end = getRangeEnd(i); + _appendToPat(result, start, escapeUnprintable); + if (start != end) { + if ((start+1) != end) { + result.append(HYPHEN); + } + _appendToPat(result, end, escapeUnprintable); + } + } + } + + if (strings != nullptr) { + for (int32_t i = 0; i<strings->size(); ++i) { + result.append(OPEN_BRACE); + _appendToPat(result, + *(const UnicodeString*) strings->elementAt(i), + escapeUnprintable); + result.append(CLOSE_BRACE); + } + } + return result.append(SET_CLOSE); +} + +/** +* Release existing cached pattern +*/ +void UnicodeSet::releasePattern() { + if (pat) { + uprv_free(pat); + pat = NULL; + patLen = 0; + } +} + +/** +* Set the new pattern to cache. +*/ +void UnicodeSet::setPattern(const char16_t *newPat, int32_t newPatLen) { + releasePattern(); + pat = (UChar *)uprv_malloc((newPatLen + 1) * sizeof(UChar)); + if (pat) { + patLen = newPatLen; + u_memcpy(pat, newPat, patLen); + pat[patLen] = 0; + } + // else we don't care if malloc failed. This was just a nice cache. + // We can regenerate an equivalent pattern later when requested. +} + +UnicodeSet *UnicodeSet::freeze() { + if(!isFrozen() && !isBogus()) { + compact(); + + // Optimize contains() and span() and similar functions. + if (hasStrings()) { + stringSpan = new UnicodeSetStringSpan(*this, *strings, UnicodeSetStringSpan::ALL); + if (stringSpan == nullptr) { + setToBogus(); + return this; + } else if (!stringSpan->needsStringSpanUTF16()) { + // All strings are irrelevant for span() etc. because + // all of each string's code points are contained in this set. + // Do not check needsStringSpanUTF8() because UTF-8 has at most as + // many relevant strings as UTF-16. + // (Thus needsStringSpanUTF8() implies needsStringSpanUTF16().) + delete stringSpan; + stringSpan = NULL; + } + } + if (stringSpan == NULL) { + // No span-relevant strings: Optimize for code point spans. + bmpSet=new BMPSet(list, len); + if (bmpSet == NULL) { // Check for memory allocation error. + setToBogus(); + } + } + } + return this; +} + +int32_t UnicodeSet::span(const UChar *s, int32_t length, USetSpanCondition spanCondition) const { + if(length>0 && bmpSet!=NULL) { + return (int32_t)(bmpSet->span(s, s+length, spanCondition)-s); + } + if(length<0) { + length=u_strlen(s); + } + if(length==0) { + return 0; + } + if(stringSpan!=NULL) { + return stringSpan->span(s, length, spanCondition); + } else if(hasStrings()) { + uint32_t which= spanCondition==USET_SPAN_NOT_CONTAINED ? + UnicodeSetStringSpan::FWD_UTF16_NOT_CONTAINED : + UnicodeSetStringSpan::FWD_UTF16_CONTAINED; + UnicodeSetStringSpan strSpan(*this, *strings, which); + if(strSpan.needsStringSpanUTF16()) { + return strSpan.span(s, length, spanCondition); + } + } + + if(spanCondition!=USET_SPAN_NOT_CONTAINED) { + spanCondition=USET_SPAN_CONTAINED; // Pin to 0/1 values. + } + + UChar32 c; + int32_t start=0, prev=0; + do { + U16_NEXT(s, start, length, c); + if(spanCondition!=contains(c)) { + break; + } + } while((prev=start)<length); + return prev; +} + +int32_t UnicodeSet::spanBack(const UChar *s, int32_t length, USetSpanCondition spanCondition) const { + if(length>0 && bmpSet!=NULL) { + return (int32_t)(bmpSet->spanBack(s, s+length, spanCondition)-s); + } + if(length<0) { + length=u_strlen(s); + } + if(length==0) { + return 0; + } + if(stringSpan!=NULL) { + return stringSpan->spanBack(s, length, spanCondition); + } else if(hasStrings()) { + uint32_t which= spanCondition==USET_SPAN_NOT_CONTAINED ? + UnicodeSetStringSpan::BACK_UTF16_NOT_CONTAINED : + UnicodeSetStringSpan::BACK_UTF16_CONTAINED; + UnicodeSetStringSpan strSpan(*this, *strings, which); + if(strSpan.needsStringSpanUTF16()) { + return strSpan.spanBack(s, length, spanCondition); + } + } + + if(spanCondition!=USET_SPAN_NOT_CONTAINED) { + spanCondition=USET_SPAN_CONTAINED; // Pin to 0/1 values. + } + + UChar32 c; + int32_t prev=length; + do { + U16_PREV(s, 0, length, c); + if(spanCondition!=contains(c)) { + break; + } + } while((prev=length)>0); + return prev; +} + +int32_t UnicodeSet::spanUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const { + if(length>0 && bmpSet!=NULL) { + const uint8_t *s0=(const uint8_t *)s; + return (int32_t)(bmpSet->spanUTF8(s0, length, spanCondition)-s0); + } + if(length<0) { + length=(int32_t)uprv_strlen(s); + } + if(length==0) { + return 0; + } + if(stringSpan!=NULL) { + return stringSpan->spanUTF8((const uint8_t *)s, length, spanCondition); + } else if(hasStrings()) { + uint32_t which= spanCondition==USET_SPAN_NOT_CONTAINED ? + UnicodeSetStringSpan::FWD_UTF8_NOT_CONTAINED : + UnicodeSetStringSpan::FWD_UTF8_CONTAINED; + UnicodeSetStringSpan strSpan(*this, *strings, which); + if(strSpan.needsStringSpanUTF8()) { + return strSpan.spanUTF8((const uint8_t *)s, length, spanCondition); + } + } + + if(spanCondition!=USET_SPAN_NOT_CONTAINED) { + spanCondition=USET_SPAN_CONTAINED; // Pin to 0/1 values. + } + + UChar32 c; + int32_t start=0, prev=0; + do { + U8_NEXT_OR_FFFD(s, start, length, c); + if(spanCondition!=contains(c)) { + break; + } + } while((prev=start)<length); + return prev; +} + +int32_t UnicodeSet::spanBackUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const { + if(length>0 && bmpSet!=NULL) { + const uint8_t *s0=(const uint8_t *)s; + return bmpSet->spanBackUTF8(s0, length, spanCondition); + } + if(length<0) { + length=(int32_t)uprv_strlen(s); + } + if(length==0) { + return 0; + } + if(stringSpan!=NULL) { + return stringSpan->spanBackUTF8((const uint8_t *)s, length, spanCondition); + } else if(hasStrings()) { + uint32_t which= spanCondition==USET_SPAN_NOT_CONTAINED ? + UnicodeSetStringSpan::BACK_UTF8_NOT_CONTAINED : + UnicodeSetStringSpan::BACK_UTF8_CONTAINED; + UnicodeSetStringSpan strSpan(*this, *strings, which); + if(strSpan.needsStringSpanUTF8()) { + return strSpan.spanBackUTF8((const uint8_t *)s, length, spanCondition); + } + } + + if(spanCondition!=USET_SPAN_NOT_CONTAINED) { + spanCondition=USET_SPAN_CONTAINED; // Pin to 0/1 values. + } + + UChar32 c; + int32_t prev=length; + do { + U8_PREV_OR_FFFD(s, 0, length, c); + if(spanCondition!=contains(c)) { + break; + } + } while((prev=length)>0); + return prev; +} + +U_NAMESPACE_END diff --git a/thirdparty/icu4c/common/uniset_closure.cpp b/thirdparty/icu4c/common/uniset_closure.cpp new file mode 100644 index 0000000000..882231ba1a --- /dev/null +++ b/thirdparty/icu4c/common/uniset_closure.cpp @@ -0,0 +1,250 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2011, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: uniset_closure.cpp +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2011may30 +* created by: Markus W. Scherer +* +* UnicodeSet::closeOver() and related methods moved here from uniset_props.cpp +* to simplify dependencies. +* In particular, this depends on the BreakIterator, but the BreakIterator +* code also builds UnicodeSets from patterns and needs uniset_props. +*/ + +#include "unicode/brkiter.h" +#include "unicode/locid.h" +#include "unicode/parsepos.h" +#include "unicode/uniset.h" +#include "cmemory.h" +#include "ruleiter.h" +#include "ucase.h" +#include "util.h" +#include "uvector.h" + +U_NAMESPACE_BEGIN + +// TODO memory debugging provided inside uniset.cpp +// could be made available here but probably obsolete with use of modern +// memory leak checker tools +#define _dbgct(me) + +//---------------------------------------------------------------- +// Constructors &c +//---------------------------------------------------------------- + +UnicodeSet::UnicodeSet(const UnicodeString& pattern, + uint32_t options, + const SymbolTable* symbols, + UErrorCode& status) { + applyPattern(pattern, options, symbols, status); + _dbgct(this); +} + +UnicodeSet::UnicodeSet(const UnicodeString& pattern, ParsePosition& pos, + uint32_t options, + const SymbolTable* symbols, + UErrorCode& status) { + applyPattern(pattern, pos, options, symbols, status); + _dbgct(this); +} + +//---------------------------------------------------------------- +// Public API +//---------------------------------------------------------------- + +UnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern, + uint32_t options, + const SymbolTable* symbols, + UErrorCode& status) { + ParsePosition pos(0); + applyPattern(pattern, pos, options, symbols, status); + if (U_FAILURE(status)) return *this; + + int32_t i = pos.getIndex(); + + if (options & USET_IGNORE_SPACE) { + // Skip over trailing whitespace + ICU_Utility::skipWhitespace(pattern, i, TRUE); + } + + if (i != pattern.length()) { + status = U_ILLEGAL_ARGUMENT_ERROR; + } + return *this; +} + +UnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern, + ParsePosition& pos, + uint32_t options, + const SymbolTable* symbols, + UErrorCode& status) { + if (U_FAILURE(status)) { + return *this; + } + if (isFrozen()) { + status = U_NO_WRITE_PERMISSION; + return *this; + } + // Need to build the pattern in a temporary string because + // _applyPattern calls add() etc., which set pat to empty. + UnicodeString rebuiltPat; + RuleCharacterIterator chars(pattern, symbols, pos); + applyPattern(chars, symbols, rebuiltPat, options, &UnicodeSet::closeOver, 0, status); + if (U_FAILURE(status)) return *this; + if (chars.inVariable()) { + // syntaxError(chars, "Extra chars in variable value"); + status = U_MALFORMED_SET; + return *this; + } + setPattern(rebuiltPat); + return *this; +} + +// USetAdder implementation +// Does not use uset.h to reduce code dependencies +static void U_CALLCONV +_set_add(USet *set, UChar32 c) { + ((UnicodeSet *)set)->add(c); +} + +static void U_CALLCONV +_set_addRange(USet *set, UChar32 start, UChar32 end) { + ((UnicodeSet *)set)->add(start, end); +} + +static void U_CALLCONV +_set_addString(USet *set, const UChar *str, int32_t length) { + ((UnicodeSet *)set)->add(UnicodeString((UBool)(length<0), str, length)); +} + +//---------------------------------------------------------------- +// Case folding API +//---------------------------------------------------------------- + +// add the result of a full case mapping to the set +// use str as a temporary string to avoid constructing one +static inline void +addCaseMapping(UnicodeSet &set, int32_t result, const UChar *full, UnicodeString &str) { + if(result >= 0) { + if(result > UCASE_MAX_STRING_LENGTH) { + // add a single-code point case mapping + set.add(result); + } else { + // add a string case mapping from full with length result + str.setTo((UBool)FALSE, full, result); + set.add(str); + } + } + // result < 0: the code point mapped to itself, no need to add it + // see ucase.h +} + +UnicodeSet& UnicodeSet::closeOver(int32_t attribute) { + if (isFrozen() || isBogus()) { + return *this; + } + if (attribute & (USET_CASE_INSENSITIVE | USET_ADD_CASE_MAPPINGS)) { + { + UnicodeSet foldSet(*this); + UnicodeString str; + USetAdder sa = { + foldSet.toUSet(), + _set_add, + _set_addRange, + _set_addString, + NULL, // don't need remove() + NULL // don't need removeRange() + }; + + // start with input set to guarantee inclusion + // USET_CASE: remove strings because the strings will actually be reduced (folded); + // therefore, start with no strings and add only those needed + if ((attribute & USET_CASE_INSENSITIVE) && foldSet.hasStrings()) { + foldSet.strings->removeAllElements(); + } + + int32_t n = getRangeCount(); + UChar32 result; + const UChar *full; + + for (int32_t i=0; i<n; ++i) { + UChar32 start = getRangeStart(i); + UChar32 end = getRangeEnd(i); + + if (attribute & USET_CASE_INSENSITIVE) { + // full case closure + for (UChar32 cp=start; cp<=end; ++cp) { + ucase_addCaseClosure(cp, &sa); + } + } else { + // add case mappings + // (does not add long s for regular s, or Kelvin for k, for example) + for (UChar32 cp=start; cp<=end; ++cp) { + result = ucase_toFullLower(cp, NULL, NULL, &full, UCASE_LOC_ROOT); + addCaseMapping(foldSet, result, full, str); + + result = ucase_toFullTitle(cp, NULL, NULL, &full, UCASE_LOC_ROOT); + addCaseMapping(foldSet, result, full, str); + + result = ucase_toFullUpper(cp, NULL, NULL, &full, UCASE_LOC_ROOT); + addCaseMapping(foldSet, result, full, str); + + result = ucase_toFullFolding(cp, &full, 0); + addCaseMapping(foldSet, result, full, str); + } + } + } + if (hasStrings()) { + if (attribute & USET_CASE_INSENSITIVE) { + for (int32_t j=0; j<strings->size(); ++j) { + str = *(const UnicodeString *) strings->elementAt(j); + str.foldCase(); + if(!ucase_addStringCaseClosure(str.getBuffer(), str.length(), &sa)) { + foldSet.add(str); // does not map to code points: add the folded string itself + } + } + } else { + Locale root(""); +#if !UCONFIG_NO_BREAK_ITERATION + UErrorCode status = U_ZERO_ERROR; + BreakIterator *bi = BreakIterator::createWordInstance(root, status); + if (U_SUCCESS(status)) { +#endif + const UnicodeString *pStr; + + for (int32_t j=0; j<strings->size(); ++j) { + pStr = (const UnicodeString *) strings->elementAt(j); + (str = *pStr).toLower(root); + foldSet.add(str); +#if !UCONFIG_NO_BREAK_ITERATION + (str = *pStr).toTitle(bi, root); + foldSet.add(str); +#endif + (str = *pStr).toUpper(root); + foldSet.add(str); + (str = *pStr).foldCase(); + foldSet.add(str); + } +#if !UCONFIG_NO_BREAK_ITERATION + } + delete bi; +#endif + } + } + *this = foldSet; + } + } + return *this; +} + +U_NAMESPACE_END diff --git a/thirdparty/icu4c/common/uniset_props.cpp b/thirdparty/icu4c/common/uniset_props.cpp new file mode 100644 index 0000000000..37277fcb75 --- /dev/null +++ b/thirdparty/icu4c/common/uniset_props.cpp @@ -0,0 +1,1174 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 1999-2014, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: uniset_props.cpp +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2004aug25 +* created by: Markus W. Scherer +* +* Character property dependent functions moved here from uniset.cpp +*/ + +#include "unicode/utypes.h" +#include "unicode/uniset.h" +#include "unicode/parsepos.h" +#include "unicode/uchar.h" +#include "unicode/uscript.h" +#include "unicode/symtable.h" +#include "unicode/uset.h" +#include "unicode/locid.h" +#include "unicode/brkiter.h" +#include "uset_imp.h" +#include "ruleiter.h" +#include "cmemory.h" +#include "ucln_cmn.h" +#include "util.h" +#include "uvector.h" +#include "uprops.h" +#include "propname.h" +#include "normalizer2impl.h" +#include "uinvchar.h" +#include "uprops.h" +#include "charstr.h" +#include "cstring.h" +#include "mutex.h" +#include "umutex.h" +#include "uassert.h" +#include "hash.h" + +U_NAMESPACE_USE + +// Define UChar constants using hex for EBCDIC compatibility +// Used #define to reduce private static exports and memory access time. +#define SET_OPEN ((UChar)0x005B) /*[*/ +#define SET_CLOSE ((UChar)0x005D) /*]*/ +#define HYPHEN ((UChar)0x002D) /*-*/ +#define COMPLEMENT ((UChar)0x005E) /*^*/ +#define COLON ((UChar)0x003A) /*:*/ +#define BACKSLASH ((UChar)0x005C) /*\*/ +#define INTERSECTION ((UChar)0x0026) /*&*/ +#define UPPER_U ((UChar)0x0055) /*U*/ +#define LOWER_U ((UChar)0x0075) /*u*/ +#define OPEN_BRACE ((UChar)123) /*{*/ +#define CLOSE_BRACE ((UChar)125) /*}*/ +#define UPPER_P ((UChar)0x0050) /*P*/ +#define LOWER_P ((UChar)0x0070) /*p*/ +#define UPPER_N ((UChar)78) /*N*/ +#define EQUALS ((UChar)0x003D) /*=*/ + +//static const UChar POSIX_OPEN[] = { SET_OPEN,COLON,0 }; // "[:" +static const UChar POSIX_CLOSE[] = { COLON,SET_CLOSE,0 }; // ":]" +//static const UChar PERL_OPEN[] = { BACKSLASH,LOWER_P,0 }; // "\\p" +//static const UChar PERL_CLOSE[] = { CLOSE_BRACE,0 }; // "}" +//static const UChar NAME_OPEN[] = { BACKSLASH,UPPER_N,0 }; // "\\N" +static const UChar HYPHEN_RIGHT_BRACE[] = {HYPHEN,SET_CLOSE,0}; /*-]*/ + +// Special property set IDs +static const char ANY[] = "ANY"; // [\u0000-\U0010FFFF] +static const char ASCII[] = "ASCII"; // [\u0000-\u007F] +static const char ASSIGNED[] = "Assigned"; // [:^Cn:] + +// Unicode name property alias +#define NAME_PROP "na" +#define NAME_PROP_LENGTH 2 + +/** + * Delimiter string used in patterns to close a category reference: + * ":]". Example: "[:Lu:]". + */ +//static const UChar CATEGORY_CLOSE[] = {COLON, SET_CLOSE, 0x0000}; /* ":]" */ + +// Cached sets ------------------------------------------------------------- *** + +U_CDECL_BEGIN +static UBool U_CALLCONV uset_cleanup(); + +static UnicodeSet *uni32Singleton; +static icu::UInitOnce uni32InitOnce = U_INITONCE_INITIALIZER; + +/** + * Cleanup function for UnicodeSet + */ +static UBool U_CALLCONV uset_cleanup(void) { + delete uni32Singleton; + uni32Singleton = NULL; + uni32InitOnce.reset(); + return TRUE; +} + +U_CDECL_END + +U_NAMESPACE_BEGIN + +namespace { + +// Cache some sets for other services -------------------------------------- *** +void U_CALLCONV createUni32Set(UErrorCode &errorCode) { + U_ASSERT(uni32Singleton == NULL); + uni32Singleton = new UnicodeSet(UNICODE_STRING_SIMPLE("[:age=3.2:]"), errorCode); + if(uni32Singleton==NULL) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + } else { + uni32Singleton->freeze(); + } + ucln_common_registerCleanup(UCLN_COMMON_USET, uset_cleanup); +} + + +U_CFUNC UnicodeSet * +uniset_getUnicode32Instance(UErrorCode &errorCode) { + umtx_initOnce(uni32InitOnce, &createUni32Set, errorCode); + return uni32Singleton; +} + +// helper functions for matching of pattern syntax pieces ------------------ *** +// these functions are parallel to the PERL_OPEN etc. strings above + +// using these functions is not only faster than UnicodeString::compare() and +// caseCompare(), but they also make UnicodeSet work for simple patterns when +// no Unicode properties data is available - when caseCompare() fails + +static inline UBool +isPerlOpen(const UnicodeString &pattern, int32_t pos) { + UChar c; + return pattern.charAt(pos)==BACKSLASH && ((c=pattern.charAt(pos+1))==LOWER_P || c==UPPER_P); +} + +/*static inline UBool +isPerlClose(const UnicodeString &pattern, int32_t pos) { + return pattern.charAt(pos)==CLOSE_BRACE; +}*/ + +static inline UBool +isNameOpen(const UnicodeString &pattern, int32_t pos) { + return pattern.charAt(pos)==BACKSLASH && pattern.charAt(pos+1)==UPPER_N; +} + +static inline UBool +isPOSIXOpen(const UnicodeString &pattern, int32_t pos) { + return pattern.charAt(pos)==SET_OPEN && pattern.charAt(pos+1)==COLON; +} + +/*static inline UBool +isPOSIXClose(const UnicodeString &pattern, int32_t pos) { + return pattern.charAt(pos)==COLON && pattern.charAt(pos+1)==SET_CLOSE; +}*/ + +// TODO memory debugging provided inside uniset.cpp +// could be made available here but probably obsolete with use of modern +// memory leak checker tools +#define _dbgct(me) + +} // namespace + +//---------------------------------------------------------------- +// Constructors &c +//---------------------------------------------------------------- + +/** + * Constructs a set from the given pattern, optionally ignoring + * white space. See the class description for the syntax of the + * pattern language. + * @param pattern a string specifying what characters are in the set + */ +UnicodeSet::UnicodeSet(const UnicodeString& pattern, + UErrorCode& status) { + applyPattern(pattern, status); + _dbgct(this); +} + +//---------------------------------------------------------------- +// Public API +//---------------------------------------------------------------- + +UnicodeSet& UnicodeSet::applyPattern(const UnicodeString& pattern, + UErrorCode& status) { + // Equivalent to + // return applyPattern(pattern, USET_IGNORE_SPACE, NULL, status); + // but without dependency on closeOver(). + ParsePosition pos(0); + applyPatternIgnoreSpace(pattern, pos, NULL, status); + if (U_FAILURE(status)) return *this; + + int32_t i = pos.getIndex(); + // Skip over trailing whitespace + ICU_Utility::skipWhitespace(pattern, i, TRUE); + if (i != pattern.length()) { + status = U_ILLEGAL_ARGUMENT_ERROR; + } + return *this; +} + +void +UnicodeSet::applyPatternIgnoreSpace(const UnicodeString& pattern, + ParsePosition& pos, + const SymbolTable* symbols, + UErrorCode& status) { + if (U_FAILURE(status)) { + return; + } + if (isFrozen()) { + status = U_NO_WRITE_PERMISSION; + return; + } + // Need to build the pattern in a temporary string because + // _applyPattern calls add() etc., which set pat to empty. + UnicodeString rebuiltPat; + RuleCharacterIterator chars(pattern, symbols, pos); + applyPattern(chars, symbols, rebuiltPat, USET_IGNORE_SPACE, NULL, 0, status); + if (U_FAILURE(status)) return; + if (chars.inVariable()) { + // syntaxError(chars, "Extra chars in variable value"); + status = U_MALFORMED_SET; + return; + } + setPattern(rebuiltPat); +} + +/** + * Return true if the given position, in the given pattern, appears + * to be the start of a UnicodeSet pattern. + */ +UBool UnicodeSet::resemblesPattern(const UnicodeString& pattern, int32_t pos) { + return ((pos+1) < pattern.length() && + pattern.charAt(pos) == (UChar)91/*[*/) || + resemblesPropertyPattern(pattern, pos); +} + +//---------------------------------------------------------------- +// Implementation: Pattern parsing +//---------------------------------------------------------------- + +namespace { + +/** + * A small all-inline class to manage a UnicodeSet pointer. Add + * operator->() etc. as needed. + */ +class UnicodeSetPointer { + UnicodeSet* p; +public: + inline UnicodeSetPointer() : p(0) {} + inline ~UnicodeSetPointer() { delete p; } + inline UnicodeSet* pointer() { return p; } + inline UBool allocate() { + if (p == 0) { + p = new UnicodeSet(); + } + return p != 0; + } +}; + +constexpr int32_t MAX_DEPTH = 100; + +} // namespace + +/** + * Parse the pattern from the given RuleCharacterIterator. The + * iterator is advanced over the parsed pattern. + * @param chars iterator over the pattern characters. Upon return + * it will be advanced to the first character after the parsed + * pattern, or the end of the iteration if all characters are + * parsed. + * @param symbols symbol table to use to parse and dereference + * variables, or null if none. + * @param rebuiltPat the pattern that was parsed, rebuilt or + * copied from the input pattern, as appropriate. + * @param options a bit mask of zero or more of the following: + * IGNORE_SPACE, CASE. + */ +void UnicodeSet::applyPattern(RuleCharacterIterator& chars, + const SymbolTable* symbols, + UnicodeString& rebuiltPat, + uint32_t options, + UnicodeSet& (UnicodeSet::*caseClosure)(int32_t attribute), + int32_t depth, + UErrorCode& ec) { + if (U_FAILURE(ec)) return; + if (depth > MAX_DEPTH) { + ec = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + // Syntax characters: [ ] ^ - & { } + + // Recognized special forms for chars, sets: c-c s-s s&s + + int32_t opts = RuleCharacterIterator::PARSE_VARIABLES | + RuleCharacterIterator::PARSE_ESCAPES; + if ((options & USET_IGNORE_SPACE) != 0) { + opts |= RuleCharacterIterator::SKIP_WHITESPACE; + } + + UnicodeString patLocal, buf; + UBool usePat = FALSE; + UnicodeSetPointer scratch; + RuleCharacterIterator::Pos backup; + + // mode: 0=before [, 1=between [...], 2=after ] + // lastItem: 0=none, 1=char, 2=set + int8_t lastItem = 0, mode = 0; + UChar32 lastChar = 0; + UChar op = 0; + + UBool invert = FALSE; + + clear(); + + while (mode != 2 && !chars.atEnd()) { + U_ASSERT((lastItem == 0 && op == 0) || + (lastItem == 1 && (op == 0 || op == HYPHEN /*'-'*/)) || + (lastItem == 2 && (op == 0 || op == HYPHEN /*'-'*/ || + op == INTERSECTION /*'&'*/))); + + UChar32 c = 0; + UBool literal = FALSE; + UnicodeSet* nested = 0; // alias - do not delete + + // -------- Check for property pattern + + // setMode: 0=none, 1=unicodeset, 2=propertypat, 3=preparsed + int8_t setMode = 0; + if (resemblesPropertyPattern(chars, opts)) { + setMode = 2; + } + + // -------- Parse '[' of opening delimiter OR nested set. + // If there is a nested set, use `setMode' to define how + // the set should be parsed. If the '[' is part of the + // opening delimiter for this pattern, parse special + // strings "[", "[^", "[-", and "[^-". Check for stand-in + // characters representing a nested set in the symbol + // table. + + else { + // Prepare to backup if necessary + chars.getPos(backup); + c = chars.next(opts, literal, ec); + if (U_FAILURE(ec)) return; + + if (c == 0x5B /*'['*/ && !literal) { + if (mode == 1) { + chars.setPos(backup); // backup + setMode = 1; + } else { + // Handle opening '[' delimiter + mode = 1; + patLocal.append((UChar) 0x5B /*'['*/); + chars.getPos(backup); // prepare to backup + c = chars.next(opts, literal, ec); + if (U_FAILURE(ec)) return; + if (c == 0x5E /*'^'*/ && !literal) { + invert = TRUE; + patLocal.append((UChar) 0x5E /*'^'*/); + chars.getPos(backup); // prepare to backup + c = chars.next(opts, literal, ec); + if (U_FAILURE(ec)) return; + } + // Fall through to handle special leading '-'; + // otherwise restart loop for nested [], \p{}, etc. + if (c == HYPHEN /*'-'*/) { + literal = TRUE; + // Fall through to handle literal '-' below + } else { + chars.setPos(backup); // backup + continue; + } + } + } else if (symbols != 0) { + const UnicodeFunctor *m = symbols->lookupMatcher(c); + if (m != 0) { + const UnicodeSet *ms = dynamic_cast<const UnicodeSet *>(m); + if (ms == NULL) { + ec = U_MALFORMED_SET; + return; + } + // casting away const, but `nested' won't be modified + // (important not to modify stored set) + nested = const_cast<UnicodeSet*>(ms); + setMode = 3; + } + } + } + + // -------- Handle a nested set. This either is inline in + // the pattern or represented by a stand-in that has + // previously been parsed and was looked up in the symbol + // table. + + if (setMode != 0) { + if (lastItem == 1) { + if (op != 0) { + // syntaxError(chars, "Char expected after operator"); + ec = U_MALFORMED_SET; + return; + } + add(lastChar, lastChar); + _appendToPat(patLocal, lastChar, FALSE); + lastItem = 0; + op = 0; + } + + if (op == HYPHEN /*'-'*/ || op == INTERSECTION /*'&'*/) { + patLocal.append(op); + } + + if (nested == 0) { + // lazy allocation + if (!scratch.allocate()) { + ec = U_MEMORY_ALLOCATION_ERROR; + return; + } + nested = scratch.pointer(); + } + switch (setMode) { + case 1: + nested->applyPattern(chars, symbols, patLocal, options, caseClosure, depth + 1, ec); + break; + case 2: + chars.skipIgnored(opts); + nested->applyPropertyPattern(chars, patLocal, ec); + if (U_FAILURE(ec)) return; + break; + case 3: // `nested' already parsed + nested->_toPattern(patLocal, FALSE); + break; + } + + usePat = TRUE; + + if (mode == 0) { + // Entire pattern is a category; leave parse loop + *this = *nested; + mode = 2; + break; + } + + switch (op) { + case HYPHEN: /*'-'*/ + removeAll(*nested); + break; + case INTERSECTION: /*'&'*/ + retainAll(*nested); + break; + case 0: + addAll(*nested); + break; + } + + op = 0; + lastItem = 2; + + continue; + } + + if (mode == 0) { + // syntaxError(chars, "Missing '['"); + ec = U_MALFORMED_SET; + return; + } + + // -------- Parse special (syntax) characters. If the + // current character is not special, or if it is escaped, + // then fall through and handle it below. + + if (!literal) { + switch (c) { + case 0x5D /*']'*/: + if (lastItem == 1) { + add(lastChar, lastChar); + _appendToPat(patLocal, lastChar, FALSE); + } + // Treat final trailing '-' as a literal + if (op == HYPHEN /*'-'*/) { + add(op, op); + patLocal.append(op); + } else if (op == INTERSECTION /*'&'*/) { + // syntaxError(chars, "Trailing '&'"); + ec = U_MALFORMED_SET; + return; + } + patLocal.append((UChar) 0x5D /*']'*/); + mode = 2; + continue; + case HYPHEN /*'-'*/: + if (op == 0) { + if (lastItem != 0) { + op = (UChar) c; + continue; + } else { + // Treat final trailing '-' as a literal + add(c, c); + c = chars.next(opts, literal, ec); + if (U_FAILURE(ec)) return; + if (c == 0x5D /*']'*/ && !literal) { + patLocal.append(HYPHEN_RIGHT_BRACE, 2); + mode = 2; + continue; + } + } + } + // syntaxError(chars, "'-' not after char or set"); + ec = U_MALFORMED_SET; + return; + case INTERSECTION /*'&'*/: + if (lastItem == 2 && op == 0) { + op = (UChar) c; + continue; + } + // syntaxError(chars, "'&' not after set"); + ec = U_MALFORMED_SET; + return; + case 0x5E /*'^'*/: + // syntaxError(chars, "'^' not after '['"); + ec = U_MALFORMED_SET; + return; + case 0x7B /*'{'*/: + if (op != 0) { + // syntaxError(chars, "Missing operand after operator"); + ec = U_MALFORMED_SET; + return; + } + if (lastItem == 1) { + add(lastChar, lastChar); + _appendToPat(patLocal, lastChar, FALSE); + } + lastItem = 0; + buf.truncate(0); + { + UBool ok = FALSE; + while (!chars.atEnd()) { + c = chars.next(opts, literal, ec); + if (U_FAILURE(ec)) return; + if (c == 0x7D /*'}'*/ && !literal) { + ok = TRUE; + break; + } + buf.append(c); + } + if (buf.length() < 1 || !ok) { + // syntaxError(chars, "Invalid multicharacter string"); + ec = U_MALFORMED_SET; + return; + } + } + // We have new string. Add it to set and continue; + // we don't need to drop through to the further + // processing + add(buf); + patLocal.append((UChar) 0x7B /*'{'*/); + _appendToPat(patLocal, buf, FALSE); + patLocal.append((UChar) 0x7D /*'}'*/); + continue; + case SymbolTable::SYMBOL_REF: + // symbols nosymbols + // [a-$] error error (ambiguous) + // [a$] anchor anchor + // [a-$x] var "x"* literal '$' + // [a-$.] error literal '$' + // *We won't get here in the case of var "x" + { + chars.getPos(backup); + c = chars.next(opts, literal, ec); + if (U_FAILURE(ec)) return; + UBool anchor = (c == 0x5D /*']'*/ && !literal); + if (symbols == 0 && !anchor) { + c = SymbolTable::SYMBOL_REF; + chars.setPos(backup); + break; // literal '$' + } + if (anchor && op == 0) { + if (lastItem == 1) { + add(lastChar, lastChar); + _appendToPat(patLocal, lastChar, FALSE); + } + add(U_ETHER); + usePat = TRUE; + patLocal.append((UChar) SymbolTable::SYMBOL_REF); + patLocal.append((UChar) 0x5D /*']'*/); + mode = 2; + continue; + } + // syntaxError(chars, "Unquoted '$'"); + ec = U_MALFORMED_SET; + return; + } + default: + break; + } + } + + // -------- Parse literal characters. This includes both + // escaped chars ("\u4E01") and non-syntax characters + // ("a"). + + switch (lastItem) { + case 0: + lastItem = 1; + lastChar = c; + break; + case 1: + if (op == HYPHEN /*'-'*/) { + if (lastChar >= c) { + // Don't allow redundant (a-a) or empty (b-a) ranges; + // these are most likely typos. + // syntaxError(chars, "Invalid range"); + ec = U_MALFORMED_SET; + return; + } + add(lastChar, c); + _appendToPat(patLocal, lastChar, FALSE); + patLocal.append(op); + _appendToPat(patLocal, c, FALSE); + lastItem = 0; + op = 0; + } else { + add(lastChar, lastChar); + _appendToPat(patLocal, lastChar, FALSE); + lastChar = c; + } + break; + case 2: + if (op != 0) { + // syntaxError(chars, "Set expected after operator"); + ec = U_MALFORMED_SET; + return; + } + lastChar = c; + lastItem = 1; + break; + } + } + + if (mode != 2) { + // syntaxError(chars, "Missing ']'"); + ec = U_MALFORMED_SET; + return; + } + + chars.skipIgnored(opts); + + /** + * Handle global flags (invert, case insensitivity). If this + * pattern should be compiled case-insensitive, then we need + * to close over case BEFORE COMPLEMENTING. This makes + * patterns like /[^abc]/i work. + */ + if ((options & USET_CASE_INSENSITIVE) != 0) { + (this->*caseClosure)(USET_CASE_INSENSITIVE); + } + else if ((options & USET_ADD_CASE_MAPPINGS) != 0) { + (this->*caseClosure)(USET_ADD_CASE_MAPPINGS); + } + if (invert) { + complement(); + } + + // Use the rebuilt pattern (patLocal) only if necessary. Prefer the + // generated pattern. + if (usePat) { + rebuiltPat.append(patLocal); + } else { + _generatePattern(rebuiltPat, FALSE); + } + if (isBogus() && U_SUCCESS(ec)) { + // We likely ran out of memory. AHHH! + ec = U_MEMORY_ALLOCATION_ERROR; + } +} + +//---------------------------------------------------------------- +// Property set implementation +//---------------------------------------------------------------- + +namespace { + +static UBool numericValueFilter(UChar32 ch, void* context) { + return u_getNumericValue(ch) == *(double*)context; +} + +static UBool generalCategoryMaskFilter(UChar32 ch, void* context) { + int32_t value = *(int32_t*)context; + return (U_GET_GC_MASK((UChar32) ch) & value) != 0; +} + +static UBool versionFilter(UChar32 ch, void* context) { + static const UVersionInfo none = { 0, 0, 0, 0 }; + UVersionInfo v; + u_charAge(ch, v); + UVersionInfo* version = (UVersionInfo*)context; + return uprv_memcmp(&v, &none, sizeof(v)) > 0 && uprv_memcmp(&v, version, sizeof(v)) <= 0; +} + +typedef struct { + UProperty prop; + int32_t value; +} IntPropertyContext; + +static UBool intPropertyFilter(UChar32 ch, void* context) { + IntPropertyContext* c = (IntPropertyContext*)context; + return u_getIntPropertyValue((UChar32) ch, c->prop) == c->value; +} + +static UBool scriptExtensionsFilter(UChar32 ch, void* context) { + return uscript_hasScript(ch, *(UScriptCode*)context); +} + +} // namespace + +/** + * Generic filter-based scanning code for UCD property UnicodeSets. + */ +void UnicodeSet::applyFilter(UnicodeSet::Filter filter, + void* context, + const UnicodeSet* inclusions, + UErrorCode &status) { + if (U_FAILURE(status)) return; + + // Logically, walk through all Unicode characters, noting the start + // and end of each range for which filter.contain(c) is + // true. Add each range to a set. + // + // To improve performance, use an inclusions set which + // encodes information about character ranges that are known + // to have identical properties. + // inclusions contains the first characters of + // same-value ranges for the given property. + + clear(); + + UChar32 startHasProperty = -1; + int32_t limitRange = inclusions->getRangeCount(); + + for (int j=0; j<limitRange; ++j) { + // get current range + UChar32 start = inclusions->getRangeStart(j); + UChar32 end = inclusions->getRangeEnd(j); + + // for all the code points in the range, process + for (UChar32 ch = start; ch <= end; ++ch) { + // only add to this UnicodeSet on inflection points -- + // where the hasProperty value changes to false + if ((*filter)(ch, context)) { + if (startHasProperty < 0) { + startHasProperty = ch; + } + } else if (startHasProperty >= 0) { + add(startHasProperty, ch-1); + startHasProperty = -1; + } + } + } + if (startHasProperty >= 0) { + add((UChar32)startHasProperty, (UChar32)0x10FFFF); + } + if (isBogus() && U_SUCCESS(status)) { + // We likely ran out of memory. AHHH! + status = U_MEMORY_ALLOCATION_ERROR; + } +} + +namespace { + +static UBool mungeCharName(char* dst, const char* src, int32_t dstCapacity) { + /* Note: we use ' ' in compiler code page */ + int32_t j = 0; + char ch; + --dstCapacity; /* make room for term. zero */ + while ((ch = *src++) != 0) { + if (ch == ' ' && (j==0 || (j>0 && dst[j-1]==' '))) { + continue; + } + if (j >= dstCapacity) return FALSE; + dst[j++] = ch; + } + if (j > 0 && dst[j-1] == ' ') --j; + dst[j] = 0; + return TRUE; +} + +} // namespace + +//---------------------------------------------------------------- +// Property set API +//---------------------------------------------------------------- + +#define FAIL(ec) UPRV_BLOCK_MACRO_BEGIN { \ + ec=U_ILLEGAL_ARGUMENT_ERROR; \ + return *this; \ +} UPRV_BLOCK_MACRO_END + +UnicodeSet& +UnicodeSet::applyIntPropertyValue(UProperty prop, int32_t value, UErrorCode& ec) { + if (U_FAILURE(ec) || isFrozen()) { return *this; } + if (prop == UCHAR_GENERAL_CATEGORY_MASK) { + const UnicodeSet* inclusions = CharacterProperties::getInclusionsForProperty(prop, ec); + applyFilter(generalCategoryMaskFilter, &value, inclusions, ec); + } else if (prop == UCHAR_SCRIPT_EXTENSIONS) { + const UnicodeSet* inclusions = CharacterProperties::getInclusionsForProperty(prop, ec); + UScriptCode script = (UScriptCode)value; + applyFilter(scriptExtensionsFilter, &script, inclusions, ec); + } else if (0 <= prop && prop < UCHAR_BINARY_LIMIT) { + if (value == 0 || value == 1) { + const USet *set = u_getBinaryPropertySet(prop, &ec); + if (U_FAILURE(ec)) { return *this; } + copyFrom(*UnicodeSet::fromUSet(set), TRUE); + if (value == 0) { + complement(); + } + } else { + clear(); + } + } else if (UCHAR_INT_START <= prop && prop < UCHAR_INT_LIMIT) { + const UnicodeSet* inclusions = CharacterProperties::getInclusionsForProperty(prop, ec); + IntPropertyContext c = {prop, value}; + applyFilter(intPropertyFilter, &c, inclusions, ec); + } else { + ec = U_ILLEGAL_ARGUMENT_ERROR; + } + return *this; +} + +UnicodeSet& +UnicodeSet::applyPropertyAlias(const UnicodeString& prop, + const UnicodeString& value, + UErrorCode& ec) { + if (U_FAILURE(ec) || isFrozen()) return *this; + + // prop and value used to be converted to char * using the default + // converter instead of the invariant conversion. + // This should not be necessary because all Unicode property and value + // names use only invariant characters. + // If there are any variant characters, then we won't find them anyway. + // Checking first avoids assertion failures in the conversion. + if( !uprv_isInvariantUString(prop.getBuffer(), prop.length()) || + !uprv_isInvariantUString(value.getBuffer(), value.length()) + ) { + FAIL(ec); + } + CharString pname, vname; + pname.appendInvariantChars(prop, ec); + vname.appendInvariantChars(value, ec); + if (U_FAILURE(ec)) return *this; + + UProperty p; + int32_t v; + UBool invert = FALSE; + + if (value.length() > 0) { + p = u_getPropertyEnum(pname.data()); + if (p == UCHAR_INVALID_CODE) FAIL(ec); + + // Treat gc as gcm + if (p == UCHAR_GENERAL_CATEGORY) { + p = UCHAR_GENERAL_CATEGORY_MASK; + } + + if ((p >= UCHAR_BINARY_START && p < UCHAR_BINARY_LIMIT) || + (p >= UCHAR_INT_START && p < UCHAR_INT_LIMIT) || + (p >= UCHAR_MASK_START && p < UCHAR_MASK_LIMIT)) { + v = u_getPropertyValueEnum(p, vname.data()); + if (v == UCHAR_INVALID_CODE) { + // Handle numeric CCC + if (p == UCHAR_CANONICAL_COMBINING_CLASS || + p == UCHAR_TRAIL_CANONICAL_COMBINING_CLASS || + p == UCHAR_LEAD_CANONICAL_COMBINING_CLASS) { + char* end; + double val = uprv_strtod(vname.data(), &end); + // Anything between 0 and 255 is valid even if unused. + // Cast double->int only after range check. + // We catch NaN here because comparing it with both 0 and 255 will be false + // (as are all comparisons with NaN). + if (*end != 0 || !(0 <= val && val <= 255) || + (v = (int32_t)val) != val) { + // non-integral value or outside 0..255, or trailing junk + FAIL(ec); + } + } else { + FAIL(ec); + } + } + } + + else { + + switch (p) { + case UCHAR_NUMERIC_VALUE: + { + char* end; + double val = uprv_strtod(vname.data(), &end); + if (*end != 0) { + FAIL(ec); + } + applyFilter(numericValueFilter, &val, + CharacterProperties::getInclusionsForProperty(p, ec), ec); + return *this; + } + case UCHAR_NAME: + { + // Must munge name, since u_charFromName() does not do + // 'loose' matching. + char buf[128]; // it suffices that this be > uprv_getMaxCharNameLength + if (!mungeCharName(buf, vname.data(), sizeof(buf))) FAIL(ec); + UChar32 ch = u_charFromName(U_EXTENDED_CHAR_NAME, buf, &ec); + if (U_SUCCESS(ec)) { + clear(); + add(ch); + return *this; + } else { + FAIL(ec); + } + } + case UCHAR_UNICODE_1_NAME: + // ICU 49 deprecates the Unicode_1_Name property APIs. + FAIL(ec); + case UCHAR_AGE: + { + // Must munge name, since u_versionFromString() does not do + // 'loose' matching. + char buf[128]; + if (!mungeCharName(buf, vname.data(), sizeof(buf))) FAIL(ec); + UVersionInfo version; + u_versionFromString(version, buf); + applyFilter(versionFilter, &version, + CharacterProperties::getInclusionsForProperty(p, ec), ec); + return *this; + } + case UCHAR_SCRIPT_EXTENSIONS: + v = u_getPropertyValueEnum(UCHAR_SCRIPT, vname.data()); + if (v == UCHAR_INVALID_CODE) { + FAIL(ec); + } + // fall through to calling applyIntPropertyValue() + break; + default: + // p is a non-binary, non-enumerated property that we + // don't support (yet). + FAIL(ec); + } + } + } + + else { + // value is empty. Interpret as General Category, Script, or + // Binary property. + p = UCHAR_GENERAL_CATEGORY_MASK; + v = u_getPropertyValueEnum(p, pname.data()); + if (v == UCHAR_INVALID_CODE) { + p = UCHAR_SCRIPT; + v = u_getPropertyValueEnum(p, pname.data()); + if (v == UCHAR_INVALID_CODE) { + p = u_getPropertyEnum(pname.data()); + if (p >= UCHAR_BINARY_START && p < UCHAR_BINARY_LIMIT) { + v = 1; + } else if (0 == uprv_comparePropertyNames(ANY, pname.data())) { + set(MIN_VALUE, MAX_VALUE); + return *this; + } else if (0 == uprv_comparePropertyNames(ASCII, pname.data())) { + set(0, 0x7F); + return *this; + } else if (0 == uprv_comparePropertyNames(ASSIGNED, pname.data())) { + // [:Assigned:]=[:^Cn:] + p = UCHAR_GENERAL_CATEGORY_MASK; + v = U_GC_CN_MASK; + invert = TRUE; + } else { + FAIL(ec); + } + } + } + } + + applyIntPropertyValue(p, v, ec); + if(invert) { + complement(); + } + + if (isBogus() && U_SUCCESS(ec)) { + // We likely ran out of memory. AHHH! + ec = U_MEMORY_ALLOCATION_ERROR; + } + return *this; +} + +//---------------------------------------------------------------- +// Property set patterns +//---------------------------------------------------------------- + +/** + * Return true if the given position, in the given pattern, appears + * to be the start of a property set pattern. + */ +UBool UnicodeSet::resemblesPropertyPattern(const UnicodeString& pattern, + int32_t pos) { + // Patterns are at least 5 characters long + if ((pos+5) > pattern.length()) { + return FALSE; + } + + // Look for an opening [:, [:^, \p, or \P + return isPOSIXOpen(pattern, pos) || isPerlOpen(pattern, pos) || isNameOpen(pattern, pos); +} + +/** + * Return true if the given iterator appears to point at a + * property pattern. Regardless of the result, return with the + * iterator unchanged. + * @param chars iterator over the pattern characters. Upon return + * it will be unchanged. + * @param iterOpts RuleCharacterIterator options + */ +UBool UnicodeSet::resemblesPropertyPattern(RuleCharacterIterator& chars, + int32_t iterOpts) { + // NOTE: literal will always be FALSE, because we don't parse escapes. + UBool result = FALSE, literal; + UErrorCode ec = U_ZERO_ERROR; + iterOpts &= ~RuleCharacterIterator::PARSE_ESCAPES; + RuleCharacterIterator::Pos pos; + chars.getPos(pos); + UChar32 c = chars.next(iterOpts, literal, ec); + if (c == 0x5B /*'['*/ || c == 0x5C /*'\\'*/) { + UChar32 d = chars.next(iterOpts & ~RuleCharacterIterator::SKIP_WHITESPACE, + literal, ec); + result = (c == 0x5B /*'['*/) ? (d == 0x3A /*':'*/) : + (d == 0x4E /*'N'*/ || d == 0x70 /*'p'*/ || d == 0x50 /*'P'*/); + } + chars.setPos(pos); + return result && U_SUCCESS(ec); +} + +/** + * Parse the given property pattern at the given parse position. + */ +UnicodeSet& UnicodeSet::applyPropertyPattern(const UnicodeString& pattern, + ParsePosition& ppos, + UErrorCode &ec) { + int32_t pos = ppos.getIndex(); + + UBool posix = FALSE; // true for [:pat:], false for \p{pat} \P{pat} \N{pat} + UBool isName = FALSE; // true for \N{pat}, o/w false + UBool invert = FALSE; + + if (U_FAILURE(ec)) return *this; + + // Minimum length is 5 characters, e.g. \p{L} + if ((pos+5) > pattern.length()) { + FAIL(ec); + } + + // On entry, ppos should point to one of the following locations: + // Look for an opening [:, [:^, \p, or \P + if (isPOSIXOpen(pattern, pos)) { + posix = TRUE; + pos += 2; + pos = ICU_Utility::skipWhitespace(pattern, pos); + if (pos < pattern.length() && pattern.charAt(pos) == COMPLEMENT) { + ++pos; + invert = TRUE; + } + } else if (isPerlOpen(pattern, pos) || isNameOpen(pattern, pos)) { + UChar c = pattern.charAt(pos+1); + invert = (c == UPPER_P); + isName = (c == UPPER_N); + pos += 2; + pos = ICU_Utility::skipWhitespace(pattern, pos); + if (pos == pattern.length() || pattern.charAt(pos++) != OPEN_BRACE) { + // Syntax error; "\p" or "\P" not followed by "{" + FAIL(ec); + } + } else { + // Open delimiter not seen + FAIL(ec); + } + + // Look for the matching close delimiter, either :] or } + int32_t close; + if (posix) { + close = pattern.indexOf(POSIX_CLOSE, 2, pos); + } else { + close = pattern.indexOf(CLOSE_BRACE, pos); + } + if (close < 0) { + // Syntax error; close delimiter missing + FAIL(ec); + } + + // Look for an '=' sign. If this is present, we will parse a + // medium \p{gc=Cf} or long \p{GeneralCategory=Format} + // pattern. + int32_t equals = pattern.indexOf(EQUALS, pos); + UnicodeString propName, valueName; + if (equals >= 0 && equals < close && !isName) { + // Equals seen; parse medium/long pattern + pattern.extractBetween(pos, equals, propName); + pattern.extractBetween(equals+1, close, valueName); + } + + else { + // Handle case where no '=' is seen, and \N{} + pattern.extractBetween(pos, close, propName); + + // Handle \N{name} + if (isName) { + // This is a little inefficient since it means we have to + // parse NAME_PROP back to UCHAR_NAME even though we already + // know it's UCHAR_NAME. If we refactor the API to + // support args of (UProperty, char*) then we can remove + // NAME_PROP and make this a little more efficient. + valueName = propName; + propName = UnicodeString(NAME_PROP, NAME_PROP_LENGTH, US_INV); + } + } + + applyPropertyAlias(propName, valueName, ec); + + if (U_SUCCESS(ec)) { + if (invert) { + complement(); + } + + // Move to the limit position after the close delimiter if the + // parse succeeded. + ppos.setIndex(close + (posix ? 2 : 1)); + } + + return *this; +} + +/** + * Parse a property pattern. + * @param chars iterator over the pattern characters. Upon return + * it will be advanced to the first character after the parsed + * pattern, or the end of the iteration if all characters are + * parsed. + * @param rebuiltPat the pattern that was parsed, rebuilt or + * copied from the input pattern, as appropriate. + */ +void UnicodeSet::applyPropertyPattern(RuleCharacterIterator& chars, + UnicodeString& rebuiltPat, + UErrorCode& ec) { + if (U_FAILURE(ec)) return; + UnicodeString pattern; + chars.lookahead(pattern); + ParsePosition pos(0); + applyPropertyPattern(pattern, pos, ec); + if (U_FAILURE(ec)) return; + if (pos.getIndex() == 0) { + // syntaxError(chars, "Invalid property pattern"); + ec = U_MALFORMED_SET; + return; + } + chars.jumpahead(pos.getIndex()); + rebuiltPat.append(pattern, 0, pos.getIndex()); +} + +U_NAMESPACE_END diff --git a/thirdparty/icu4c/common/unisetspan.cpp b/thirdparty/icu4c/common/unisetspan.cpp new file mode 100644 index 0000000000..68e44d91ee --- /dev/null +++ b/thirdparty/icu4c/common/unisetspan.cpp @@ -0,0 +1,1509 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 2007-2012, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* file name: unisetspan.cpp +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2007mar01 +* created by: Markus W. Scherer +*/ + +#include "unicode/utypes.h" +#include "unicode/uniset.h" +#include "unicode/ustring.h" +#include "unicode/utf8.h" +#include "unicode/utf16.h" +#include "cmemory.h" +#include "uvector.h" +#include "unisetspan.h" + +U_NAMESPACE_BEGIN + +/* + * List of offsets from the current position from where to try matching + * a code point or a string. + * Store offsets rather than indexes to simplify the code and use the same list + * for both increments (in span()) and decrements (in spanBack()). + * + * Assumption: The maximum offset is limited, and the offsets that are stored + * at any one time are relatively dense, that is, there are normally no gaps of + * hundreds or thousands of offset values. + * + * The implementation uses a circular buffer of byte flags, + * each indicating whether the corresponding offset is in the list. + * This avoids inserting into a sorted list of offsets (or absolute indexes) and + * physically moving part of the list. + * + * Note: In principle, the caller should setMaxLength() to the maximum of the + * max string length and U16_LENGTH/U8_LENGTH to account for + * "long" single code points. + * However, this implementation uses at least a staticList with more than + * U8_LENGTH entries anyway. + * + * Note: If maxLength were guaranteed to be no more than 32 or 64, + * the list could be stored as bit flags in a single integer. + * Rather than handling a circular buffer with a start list index, + * the integer would simply be shifted when lower offsets are removed. + * UnicodeSet does not have a limit on the lengths of strings. + */ +class OffsetList { // Only ever stack-allocated, does not need to inherit UMemory. +public: + OffsetList() : list(staticList), capacity(0), length(0), start(0) {} + + ~OffsetList() { + if(list!=staticList) { + uprv_free(list); + } + } + + // Call exactly once if the list is to be used. + void setMaxLength(int32_t maxLength) { + if(maxLength<=(int32_t)sizeof(staticList)) { + capacity=(int32_t)sizeof(staticList); + } else { + UBool *l=(UBool *)uprv_malloc(maxLength); + if(l!=NULL) { + list=l; + capacity=maxLength; + } + } + uprv_memset(list, 0, capacity); + } + + void clear() { + uprv_memset(list, 0, capacity); + start=length=0; + } + + UBool isEmpty() const { + return (UBool)(length==0); + } + + // Reduce all stored offsets by delta, used when the current position + // moves by delta. + // There must not be any offsets lower than delta. + // If there is an offset equal to delta, it is removed. + // delta=[1..maxLength] + void shift(int32_t delta) { + int32_t i=start+delta; + if(i>=capacity) { + i-=capacity; + } + if(list[i]) { + list[i]=FALSE; + --length; + } + start=i; + } + + // Add an offset. The list must not contain it yet. + // offset=[1..maxLength] + void addOffset(int32_t offset) { + int32_t i=start+offset; + if(i>=capacity) { + i-=capacity; + } + list[i]=TRUE; + ++length; + } + + // offset=[1..maxLength] + UBool containsOffset(int32_t offset) const { + int32_t i=start+offset; + if(i>=capacity) { + i-=capacity; + } + return list[i]; + } + + // Find the lowest stored offset from a non-empty list, remove it, + // and reduce all other offsets by this minimum. + // Returns [1..maxLength]. + int32_t popMinimum() { + // Look for the next offset in list[start+1..capacity-1]. + int32_t i=start, result; + while(++i<capacity) { + if(list[i]) { + list[i]=FALSE; + --length; + result=i-start; + start=i; + return result; + } + } + // i==capacity + + // Wrap around and look for the next offset in list[0..start]. + // Since the list is not empty, there will be one. + result=capacity-start; + i=0; + while(!list[i]) { + ++i; + } + list[i]=FALSE; + --length; + start=i; + return result+=i; + } + +private: + UBool *list; + int32_t capacity; + int32_t length; + int32_t start; + + UBool staticList[16]; +}; + +// Get the number of UTF-8 bytes for a UTF-16 (sub)string. +static int32_t +getUTF8Length(const UChar *s, int32_t length) { + UErrorCode errorCode=U_ZERO_ERROR; + int32_t length8=0; + u_strToUTF8(NULL, 0, &length8, s, length, &errorCode); + if(U_SUCCESS(errorCode) || errorCode==U_BUFFER_OVERFLOW_ERROR) { + return length8; + } else { + // The string contains an unpaired surrogate. + // Ignore this string. + return 0; + } +} + +// Append the UTF-8 version of the string to t and return the appended UTF-8 length. +static int32_t +appendUTF8(const UChar *s, int32_t length, uint8_t *t, int32_t capacity) { + UErrorCode errorCode=U_ZERO_ERROR; + int32_t length8=0; + u_strToUTF8((char *)t, capacity, &length8, s, length, &errorCode); + if(U_SUCCESS(errorCode)) { + return length8; + } else { + // The string contains an unpaired surrogate. + // Ignore this string. + return 0; + } +} + +static inline uint8_t +makeSpanLengthByte(int32_t spanLength) { + // 0xfe==UnicodeSetStringSpan::LONG_SPAN + return spanLength<0xfe ? (uint8_t)spanLength : (uint8_t)0xfe; +} + +// Construct for all variants of span(), or only for any one variant. +// Initialize as little as possible, for single use. +UnicodeSetStringSpan::UnicodeSetStringSpan(const UnicodeSet &set, + const UVector &setStrings, + uint32_t which) + : spanSet(0, 0x10ffff), pSpanNotSet(NULL), strings(setStrings), + utf8Lengths(NULL), spanLengths(NULL), utf8(NULL), + utf8Length(0), + maxLength16(0), maxLength8(0), + all((UBool)(which==ALL)) { + spanSet.retainAll(set); + if(which&NOT_CONTAINED) { + // Default to the same sets. + // addToSpanNotSet() will create a separate set if necessary. + pSpanNotSet=&spanSet; + } + + // Determine if the strings even need to be taken into account at all for span() etc. + // If any string is relevant, then all strings need to be used for + // span(longest match) but only the relevant ones for span(while contained). + // TODO: Possible optimization: Distinguish CONTAINED vs. LONGEST_MATCH + // and do not store UTF-8 strings if !thisRelevant and CONTAINED. + // (Only store irrelevant UTF-8 strings for LONGEST_MATCH where they are relevant after all.) + // Also count the lengths of the UTF-8 versions of the strings for memory allocation. + int32_t stringsLength=strings.size(); + + int32_t i, spanLength; + UBool someRelevant=FALSE; + for(i=0; i<stringsLength; ++i) { + const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i); + const UChar *s16=string.getBuffer(); + int32_t length16=string.length(); + UBool thisRelevant; + spanLength=spanSet.span(s16, length16, USET_SPAN_CONTAINED); + if(spanLength<length16) { // Relevant string. + someRelevant=thisRelevant=TRUE; + } else { + thisRelevant=FALSE; + } + if((which&UTF16) && length16>maxLength16) { + maxLength16=length16; + } + if((which&UTF8) && (thisRelevant || (which&CONTAINED))) { + int32_t length8=getUTF8Length(s16, length16); + utf8Length+=length8; + if(length8>maxLength8) { + maxLength8=length8; + } + } + } + if(!someRelevant) { + maxLength16=maxLength8=0; + return; + } + + // Freeze after checking for the need to use strings at all because freezing + // a set takes some time and memory which are wasted if there are no relevant strings. + if(all) { + spanSet.freeze(); + } + + uint8_t *spanBackLengths; + uint8_t *spanUTF8Lengths; + uint8_t *spanBackUTF8Lengths; + + // Allocate a block of meta data. + int32_t allocSize; + if(all) { + // UTF-8 lengths, 4 sets of span lengths, UTF-8 strings. + allocSize=stringsLength*(4+1+1+1+1)+utf8Length; + } else { + allocSize=stringsLength; // One set of span lengths. + if(which&UTF8) { + // UTF-8 lengths and UTF-8 strings. + allocSize+=stringsLength*4+utf8Length; + } + } + if(allocSize<=(int32_t)sizeof(staticLengths)) { + utf8Lengths=staticLengths; + } else { + utf8Lengths=(int32_t *)uprv_malloc(allocSize); + if(utf8Lengths==NULL) { + maxLength16=maxLength8=0; // Prevent usage by making needsStringSpanUTF16/8() return FALSE. + return; // Out of memory. + } + } + + if(all) { + // Store span lengths for all span() variants. + spanLengths=(uint8_t *)(utf8Lengths+stringsLength); + spanBackLengths=spanLengths+stringsLength; + spanUTF8Lengths=spanBackLengths+stringsLength; + spanBackUTF8Lengths=spanUTF8Lengths+stringsLength; + utf8=spanBackUTF8Lengths+stringsLength; + } else { + // Store span lengths for only one span() variant. + if(which&UTF8) { + spanLengths=(uint8_t *)(utf8Lengths+stringsLength); + utf8=spanLengths+stringsLength; + } else { + spanLengths=(uint8_t *)utf8Lengths; + } + spanBackLengths=spanUTF8Lengths=spanBackUTF8Lengths=spanLengths; + } + + // Set the meta data and pSpanNotSet and write the UTF-8 strings. + int32_t utf8Count=0; // Count UTF-8 bytes written so far. + + for(i=0; i<stringsLength; ++i) { + const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i); + const UChar *s16=string.getBuffer(); + int32_t length16=string.length(); + spanLength=spanSet.span(s16, length16, USET_SPAN_CONTAINED); + if(spanLength<length16) { // Relevant string. + if(which&UTF16) { + if(which&CONTAINED) { + if(which&FWD) { + spanLengths[i]=makeSpanLengthByte(spanLength); + } + if(which&BACK) { + spanLength=length16-spanSet.spanBack(s16, length16, USET_SPAN_CONTAINED); + spanBackLengths[i]=makeSpanLengthByte(spanLength); + } + } else /* not CONTAINED, not all, but NOT_CONTAINED */ { + spanLengths[i]=spanBackLengths[i]=0; // Only store a relevant/irrelevant flag. + } + } + if(which&UTF8) { + uint8_t *s8=utf8+utf8Count; + int32_t length8=appendUTF8(s16, length16, s8, utf8Length-utf8Count); + utf8Count+=utf8Lengths[i]=length8; + if(length8==0) { // Irrelevant for UTF-8 because not representable in UTF-8. + spanUTF8Lengths[i]=spanBackUTF8Lengths[i]=(uint8_t)ALL_CP_CONTAINED; + } else { // Relevant for UTF-8. + if(which&CONTAINED) { + if(which&FWD) { + spanLength=spanSet.spanUTF8((const char *)s8, length8, USET_SPAN_CONTAINED); + spanUTF8Lengths[i]=makeSpanLengthByte(spanLength); + } + if(which&BACK) { + spanLength=length8-spanSet.spanBackUTF8((const char *)s8, length8, USET_SPAN_CONTAINED); + spanBackUTF8Lengths[i]=makeSpanLengthByte(spanLength); + } + } else /* not CONTAINED, not all, but NOT_CONTAINED */ { + spanUTF8Lengths[i]=spanBackUTF8Lengths[i]=0; // Only store a relevant/irrelevant flag. + } + } + } + if(which&NOT_CONTAINED) { + // Add string start and end code points to the spanNotSet so that + // a span(while not contained) stops before any string. + UChar32 c; + if(which&FWD) { + int32_t len=0; + U16_NEXT(s16, len, length16, c); + addToSpanNotSet(c); + } + if(which&BACK) { + int32_t len=length16; + U16_PREV(s16, 0, len, c); + addToSpanNotSet(c); + } + } + } else { // Irrelevant string. + if(which&UTF8) { + if(which&CONTAINED) { // Only necessary for LONGEST_MATCH. + uint8_t *s8=utf8+utf8Count; + int32_t length8=appendUTF8(s16, length16, s8, utf8Length-utf8Count); + utf8Count+=utf8Lengths[i]=length8; + } else { + utf8Lengths[i]=0; + } + } + if(all) { + spanLengths[i]=spanBackLengths[i]= + spanUTF8Lengths[i]=spanBackUTF8Lengths[i]= + (uint8_t)ALL_CP_CONTAINED; + } else { + // All spanXYZLengths pointers contain the same address. + spanLengths[i]=(uint8_t)ALL_CP_CONTAINED; + } + } + } + + // Finish. + if(all) { + pSpanNotSet->freeze(); + } +} + +// Copy constructor. Assumes which==ALL for a frozen set. +UnicodeSetStringSpan::UnicodeSetStringSpan(const UnicodeSetStringSpan &otherStringSpan, + const UVector &newParentSetStrings) + : spanSet(otherStringSpan.spanSet), pSpanNotSet(NULL), strings(newParentSetStrings), + utf8Lengths(NULL), spanLengths(NULL), utf8(NULL), + utf8Length(otherStringSpan.utf8Length), + maxLength16(otherStringSpan.maxLength16), maxLength8(otherStringSpan.maxLength8), + all(TRUE) { + if(otherStringSpan.pSpanNotSet==&otherStringSpan.spanSet) { + pSpanNotSet=&spanSet; + } else { + pSpanNotSet=otherStringSpan.pSpanNotSet->clone(); + } + + // Allocate a block of meta data. + // UTF-8 lengths, 4 sets of span lengths, UTF-8 strings. + int32_t stringsLength=strings.size(); + int32_t allocSize=stringsLength*(4+1+1+1+1)+utf8Length; + if(allocSize<=(int32_t)sizeof(staticLengths)) { + utf8Lengths=staticLengths; + } else { + utf8Lengths=(int32_t *)uprv_malloc(allocSize); + if(utf8Lengths==NULL) { + maxLength16=maxLength8=0; // Prevent usage by making needsStringSpanUTF16/8() return FALSE. + return; // Out of memory. + } + } + + spanLengths=(uint8_t *)(utf8Lengths+stringsLength); + utf8=spanLengths+stringsLength*4; + uprv_memcpy(utf8Lengths, otherStringSpan.utf8Lengths, allocSize); +} + +UnicodeSetStringSpan::~UnicodeSetStringSpan() { + if(pSpanNotSet!=NULL && pSpanNotSet!=&spanSet) { + delete pSpanNotSet; + } + if(utf8Lengths!=NULL && utf8Lengths!=staticLengths) { + uprv_free(utf8Lengths); + } +} + +void UnicodeSetStringSpan::addToSpanNotSet(UChar32 c) { + if(pSpanNotSet==NULL || pSpanNotSet==&spanSet) { + if(spanSet.contains(c)) { + return; // Nothing to do. + } + UnicodeSet *newSet=spanSet.cloneAsThawed(); + if(newSet==NULL) { + return; // Out of memory. + } else { + pSpanNotSet=newSet; + } + } + pSpanNotSet->add(c); +} + +// Compare strings without any argument checks. Requires length>0. +static inline UBool +matches16(const UChar *s, const UChar *t, int32_t length) { + do { + if(*s++!=*t++) { + return FALSE; + } + } while(--length>0); + return TRUE; +} + +static inline UBool +matches8(const uint8_t *s, const uint8_t *t, int32_t length) { + do { + if(*s++!=*t++) { + return FALSE; + } + } while(--length>0); + return TRUE; +} + +// Compare 16-bit Unicode strings (which may be malformed UTF-16) +// at code point boundaries. +// That is, each edge of a match must not be in the middle of a surrogate pair. +static inline UBool +matches16CPB(const UChar *s, int32_t start, int32_t limit, const UChar *t, int32_t length) { + s+=start; + limit-=start; + return matches16(s, t, length) && + !(0<start && U16_IS_LEAD(s[-1]) && U16_IS_TRAIL(s[0])) && + !(length<limit && U16_IS_LEAD(s[length-1]) && U16_IS_TRAIL(s[length])); +} + +// Does the set contain the next code point? +// If so, return its length; otherwise return its negative length. +static inline int32_t +spanOne(const UnicodeSet &set, const UChar *s, int32_t length) { + UChar c=*s, c2; + if(c>=0xd800 && c<=0xdbff && length>=2 && U16_IS_TRAIL(c2=s[1])) { + return set.contains(U16_GET_SUPPLEMENTARY(c, c2)) ? 2 : -2; + } + return set.contains(c) ? 1 : -1; +} + +static inline int32_t +spanOneBack(const UnicodeSet &set, const UChar *s, int32_t length) { + UChar c=s[length-1], c2; + if(c>=0xdc00 && c<=0xdfff && length>=2 && U16_IS_LEAD(c2=s[length-2])) { + return set.contains(U16_GET_SUPPLEMENTARY(c2, c)) ? 2 : -2; + } + return set.contains(c) ? 1 : -1; +} + +static inline int32_t +spanOneUTF8(const UnicodeSet &set, const uint8_t *s, int32_t length) { + UChar32 c=*s; + if(U8_IS_SINGLE(c)) { + return set.contains(c) ? 1 : -1; + } + // Take advantage of non-ASCII fastpaths in U8_NEXT_OR_FFFD(). + int32_t i=0; + U8_NEXT_OR_FFFD(s, i, length, c); + return set.contains(c) ? i : -i; +} + +static inline int32_t +spanOneBackUTF8(const UnicodeSet &set, const uint8_t *s, int32_t length) { + UChar32 c=s[length-1]; + if(U8_IS_SINGLE(c)) { + return set.contains(c) ? 1 : -1; + } + int32_t i=length-1; + c=utf8_prevCharSafeBody(s, 0, &i, c, -3); + length-=i; + return set.contains(c) ? length : -length; +} + +/* + * Note: In span() when spanLength==0 (after a string match, or at the beginning + * after an empty code point span) and in spanNot() and spanNotUTF8(), + * string matching could use a binary search + * because all string matches are done from the same start index. + * + * For UTF-8, this would require a comparison function that returns UTF-16 order. + * + * This optimization should not be necessary for normal UnicodeSets because + * most sets have no strings, and most sets with strings have + * very few very short strings. + * For cases with many strings, it might be better to use a different API + * and implementation with a DFA (state machine). + */ + +/* + * Algorithm for span(USET_SPAN_CONTAINED) + * + * Theoretical algorithm: + * - Iterate through the string, and at each code point boundary: + * + If the code point there is in the set, then remember to continue after it. + * + If a set string matches at the current position, then remember to continue after it. + * + Either recursively span for each code point or string match, + * or recursively span for all but the shortest one and + * iteratively continue the span with the shortest local match. + * + Remember the longest recursive span (the farthest end point). + * + If there is no match at the current position, neither for the code point there + * nor for any set string, then stop and return the longest recursive span length. + * + * Optimized implementation: + * + * (We assume that most sets will have very few very short strings. + * A span using a string-less set is extremely fast.) + * + * Create and cache a spanSet which contains all of the single code points + * of the original set but none of its strings. + * + * - Start with spanLength=spanSet.span(USET_SPAN_CONTAINED). + * - Loop: + * + Try to match each set string at the end of the spanLength. + * ~ Set strings that start with set-contained code points must be matched + * with a partial overlap because the recursive algorithm would have tried + * to match them at every position. + * ~ Set strings that entirely consist of set-contained code points + * are irrelevant for span(USET_SPAN_CONTAINED) because the + * recursive algorithm would continue after them anyway + * and find the longest recursive match from their end. + * ~ Rather than recursing, note each end point of a set string match. + * + If no set string matched after spanSet.span(), then return + * with where the spanSet.span() ended. + * + If at least one set string matched after spanSet.span(), then + * pop the shortest string match end point and continue + * the loop, trying to match all set strings from there. + * + If at least one more set string matched after a previous string match, + * then test if the code point after the previous string match is also + * contained in the set. + * Continue the loop with the shortest end point of either this code point + * or a matching set string. + * + If no more set string matched after a previous string match, + * then try another spanLength=spanSet.span(USET_SPAN_CONTAINED). + * Stop if spanLength==0, otherwise continue the loop. + * + * By noting each end point of a set string match, + * the function visits each string position at most once and finishes + * in linear time. + * + * The recursive algorithm may visit the same string position many times + * if multiple paths lead to it and finishes in exponential time. + */ + +/* + * Algorithm for span(USET_SPAN_SIMPLE) + * + * Theoretical algorithm: + * - Iterate through the string, and at each code point boundary: + * + If the code point there is in the set, then remember to continue after it. + * + If a set string matches at the current position, then remember to continue after it. + * + Continue from the farthest match position and ignore all others. + * + If there is no match at the current position, + * then stop and return the current position. + * + * Optimized implementation: + * + * (Same assumption and spanSet as above.) + * + * - Start with spanLength=spanSet.span(USET_SPAN_CONTAINED). + * - Loop: + * + Try to match each set string at the end of the spanLength. + * ~ Set strings that start with set-contained code points must be matched + * with a partial overlap because the standard algorithm would have tried + * to match them earlier. + * ~ Set strings that entirely consist of set-contained code points + * must be matched with a full overlap because the longest-match algorithm + * would hide set string matches that end earlier. + * Such set strings need not be matched earlier inside the code point span + * because the standard algorithm would then have continued after + * the set string match anyway. + * ~ Remember the longest set string match (farthest end point) from the earliest + * starting point. + * + If no set string matched after spanSet.span(), then return + * with where the spanSet.span() ended. + * + If at least one set string matched, then continue the loop after the + * longest match from the earliest position. + * + If no more set string matched after a previous string match, + * then try another spanLength=spanSet.span(USET_SPAN_CONTAINED). + * Stop if spanLength==0, otherwise continue the loop. + */ + +int32_t UnicodeSetStringSpan::span(const UChar *s, int32_t length, USetSpanCondition spanCondition) const { + if(spanCondition==USET_SPAN_NOT_CONTAINED) { + return spanNot(s, length); + } + int32_t spanLength=spanSet.span(s, length, USET_SPAN_CONTAINED); + if(spanLength==length) { + return length; + } + + // Consider strings; they may overlap with the span. + OffsetList offsets; + if(spanCondition==USET_SPAN_CONTAINED) { + // Use offset list to try all possibilities. + offsets.setMaxLength(maxLength16); + } + int32_t pos=spanLength, rest=length-pos; + int32_t i, stringsLength=strings.size(); + for(;;) { + if(spanCondition==USET_SPAN_CONTAINED) { + for(i=0; i<stringsLength; ++i) { + int32_t overlap=spanLengths[i]; + if(overlap==ALL_CP_CONTAINED) { + continue; // Irrelevant string. + } + const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i); + const UChar *s16=string.getBuffer(); + int32_t length16=string.length(); + + // Try to match this string at pos-overlap..pos. + if(overlap>=LONG_SPAN) { + overlap=length16; + // While contained: No point matching fully inside the code point span. + U16_BACK_1(s16, 0, overlap); // Length of the string minus the last code point. + } + if(overlap>spanLength) { + overlap=spanLength; + } + int32_t inc=length16-overlap; // Keep overlap+inc==length16. + for(;;) { + if(inc>rest) { + break; + } + // Try to match if the increment is not listed already. + if(!offsets.containsOffset(inc) && matches16CPB(s, pos-overlap, length, s16, length16)) { + if(inc==rest) { + return length; // Reached the end of the string. + } + offsets.addOffset(inc); + } + if(overlap==0) { + break; + } + --overlap; + ++inc; + } + } + } else /* USET_SPAN_SIMPLE */ { + int32_t maxInc=0, maxOverlap=0; + for(i=0; i<stringsLength; ++i) { + int32_t overlap=spanLengths[i]; + // For longest match, we do need to try to match even an all-contained string + // to find the match from the earliest start. + + const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i); + const UChar *s16=string.getBuffer(); + int32_t length16=string.length(); + + // Try to match this string at pos-overlap..pos. + if(overlap>=LONG_SPAN) { + overlap=length16; + // Longest match: Need to match fully inside the code point span + // to find the match from the earliest start. + } + if(overlap>spanLength) { + overlap=spanLength; + } + int32_t inc=length16-overlap; // Keep overlap+inc==length16. + for(;;) { + if(inc>rest || overlap<maxOverlap) { + break; + } + // Try to match if the string is longer or starts earlier. + if( (overlap>maxOverlap || /* redundant overlap==maxOverlap && */ inc>maxInc) && + matches16CPB(s, pos-overlap, length, s16, length16) + ) { + maxInc=inc; // Longest match from earliest start. + maxOverlap=overlap; + break; + } + --overlap; + ++inc; + } + } + + if(maxInc!=0 || maxOverlap!=0) { + // Longest-match algorithm, and there was a string match. + // Simply continue after it. + pos+=maxInc; + rest-=maxInc; + if(rest==0) { + return length; // Reached the end of the string. + } + spanLength=0; // Match strings from after a string match. + continue; + } + } + // Finished trying to match all strings at pos. + + if(spanLength!=0 || pos==0) { + // The position is after an unlimited code point span (spanLength!=0), + // not after a string match. + // The only position where spanLength==0 after a span is pos==0. + // Otherwise, an unlimited code point span is only tried again when no + // strings match, and if such a non-initial span fails we stop. + if(offsets.isEmpty()) { + return pos; // No strings matched after a span. + } + // Match strings from after the next string match. + } else { + // The position is after a string match (or a single code point). + if(offsets.isEmpty()) { + // No more strings matched after a previous string match. + // Try another code point span from after the last string match. + spanLength=spanSet.span(s+pos, rest, USET_SPAN_CONTAINED); + if( spanLength==rest || // Reached the end of the string, or + spanLength==0 // neither strings nor span progressed. + ) { + return pos+spanLength; + } + pos+=spanLength; + rest-=spanLength; + continue; // spanLength>0: Match strings from after a span. + } else { + // Try to match only one code point from after a string match if some + // string matched beyond it, so that we try all possible positions + // and don't overshoot. + spanLength=spanOne(spanSet, s+pos, rest); + if(spanLength>0) { + if(spanLength==rest) { + return length; // Reached the end of the string. + } + // Match strings after this code point. + // There cannot be any increments below it because UnicodeSet strings + // contain multiple code points. + pos+=spanLength; + rest-=spanLength; + offsets.shift(spanLength); + spanLength=0; + continue; // Match strings from after a single code point. + } + // Match strings from after the next string match. + } + } + int32_t minOffset=offsets.popMinimum(); + pos+=minOffset; + rest-=minOffset; + spanLength=0; // Match strings from after a string match. + } +} + +int32_t UnicodeSetStringSpan::spanBack(const UChar *s, int32_t length, USetSpanCondition spanCondition) const { + if(spanCondition==USET_SPAN_NOT_CONTAINED) { + return spanNotBack(s, length); + } + int32_t pos=spanSet.spanBack(s, length, USET_SPAN_CONTAINED); + if(pos==0) { + return 0; + } + int32_t spanLength=length-pos; + + // Consider strings; they may overlap with the span. + OffsetList offsets; + if(spanCondition==USET_SPAN_CONTAINED) { + // Use offset list to try all possibilities. + offsets.setMaxLength(maxLength16); + } + int32_t i, stringsLength=strings.size(); + uint8_t *spanBackLengths=spanLengths; + if(all) { + spanBackLengths+=stringsLength; + } + for(;;) { + if(spanCondition==USET_SPAN_CONTAINED) { + for(i=0; i<stringsLength; ++i) { + int32_t overlap=spanBackLengths[i]; + if(overlap==ALL_CP_CONTAINED) { + continue; // Irrelevant string. + } + const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i); + const UChar *s16=string.getBuffer(); + int32_t length16=string.length(); + + // Try to match this string at pos-(length16-overlap)..pos-length16. + if(overlap>=LONG_SPAN) { + overlap=length16; + // While contained: No point matching fully inside the code point span. + int32_t len1=0; + U16_FWD_1(s16, len1, overlap); + overlap-=len1; // Length of the string minus the first code point. + } + if(overlap>spanLength) { + overlap=spanLength; + } + int32_t dec=length16-overlap; // Keep dec+overlap==length16. + for(;;) { + if(dec>pos) { + break; + } + // Try to match if the decrement is not listed already. + if(!offsets.containsOffset(dec) && matches16CPB(s, pos-dec, length, s16, length16)) { + if(dec==pos) { + return 0; // Reached the start of the string. + } + offsets.addOffset(dec); + } + if(overlap==0) { + break; + } + --overlap; + ++dec; + } + } + } else /* USET_SPAN_SIMPLE */ { + int32_t maxDec=0, maxOverlap=0; + for(i=0; i<stringsLength; ++i) { + int32_t overlap=spanBackLengths[i]; + // For longest match, we do need to try to match even an all-contained string + // to find the match from the latest end. + + const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i); + const UChar *s16=string.getBuffer(); + int32_t length16=string.length(); + + // Try to match this string at pos-(length16-overlap)..pos-length16. + if(overlap>=LONG_SPAN) { + overlap=length16; + // Longest match: Need to match fully inside the code point span + // to find the match from the latest end. + } + if(overlap>spanLength) { + overlap=spanLength; + } + int32_t dec=length16-overlap; // Keep dec+overlap==length16. + for(;;) { + if(dec>pos || overlap<maxOverlap) { + break; + } + // Try to match if the string is longer or ends later. + if( (overlap>maxOverlap || /* redundant overlap==maxOverlap && */ dec>maxDec) && + matches16CPB(s, pos-dec, length, s16, length16) + ) { + maxDec=dec; // Longest match from latest end. + maxOverlap=overlap; + break; + } + --overlap; + ++dec; + } + } + + if(maxDec!=0 || maxOverlap!=0) { + // Longest-match algorithm, and there was a string match. + // Simply continue before it. + pos-=maxDec; + if(pos==0) { + return 0; // Reached the start of the string. + } + spanLength=0; // Match strings from before a string match. + continue; + } + } + // Finished trying to match all strings at pos. + + if(spanLength!=0 || pos==length) { + // The position is before an unlimited code point span (spanLength!=0), + // not before a string match. + // The only position where spanLength==0 before a span is pos==length. + // Otherwise, an unlimited code point span is only tried again when no + // strings match, and if such a non-initial span fails we stop. + if(offsets.isEmpty()) { + return pos; // No strings matched before a span. + } + // Match strings from before the next string match. + } else { + // The position is before a string match (or a single code point). + if(offsets.isEmpty()) { + // No more strings matched before a previous string match. + // Try another code point span from before the last string match. + int32_t oldPos=pos; + pos=spanSet.spanBack(s, oldPos, USET_SPAN_CONTAINED); + spanLength=oldPos-pos; + if( pos==0 || // Reached the start of the string, or + spanLength==0 // neither strings nor span progressed. + ) { + return pos; + } + continue; // spanLength>0: Match strings from before a span. + } else { + // Try to match only one code point from before a string match if some + // string matched beyond it, so that we try all possible positions + // and don't overshoot. + spanLength=spanOneBack(spanSet, s, pos); + if(spanLength>0) { + if(spanLength==pos) { + return 0; // Reached the start of the string. + } + // Match strings before this code point. + // There cannot be any decrements below it because UnicodeSet strings + // contain multiple code points. + pos-=spanLength; + offsets.shift(spanLength); + spanLength=0; + continue; // Match strings from before a single code point. + } + // Match strings from before the next string match. + } + } + pos-=offsets.popMinimum(); + spanLength=0; // Match strings from before a string match. + } +} + +int32_t UnicodeSetStringSpan::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const { + if(spanCondition==USET_SPAN_NOT_CONTAINED) { + return spanNotUTF8(s, length); + } + int32_t spanLength=spanSet.spanUTF8((const char *)s, length, USET_SPAN_CONTAINED); + if(spanLength==length) { + return length; + } + + // Consider strings; they may overlap with the span. + OffsetList offsets; + if(spanCondition==USET_SPAN_CONTAINED) { + // Use offset list to try all possibilities. + offsets.setMaxLength(maxLength8); + } + int32_t pos=spanLength, rest=length-pos; + int32_t i, stringsLength=strings.size(); + uint8_t *spanUTF8Lengths=spanLengths; + if(all) { + spanUTF8Lengths+=2*stringsLength; + } + for(;;) { + const uint8_t *s8=utf8; + int32_t length8; + if(spanCondition==USET_SPAN_CONTAINED) { + for(i=0; i<stringsLength; ++i) { + length8=utf8Lengths[i]; + if(length8==0) { + continue; // String not representable in UTF-8. + } + int32_t overlap=spanUTF8Lengths[i]; + if(overlap==ALL_CP_CONTAINED) { + s8+=length8; + continue; // Irrelevant string. + } + + // Try to match this string at pos-overlap..pos. + if(overlap>=LONG_SPAN) { + overlap=length8; + // While contained: No point matching fully inside the code point span. + U8_BACK_1(s8, 0, overlap); // Length of the string minus the last code point. + } + if(overlap>spanLength) { + overlap=spanLength; + } + int32_t inc=length8-overlap; // Keep overlap+inc==length8. + for(;;) { + if(inc>rest) { + break; + } + // Try to match if the increment is not listed already. + // Match at code point boundaries. (The UTF-8 strings were converted + // from UTF-16 and are guaranteed to be well-formed.) + if(!U8_IS_TRAIL(s[pos-overlap]) && + !offsets.containsOffset(inc) && + matches8(s+pos-overlap, s8, length8)) { + if(inc==rest) { + return length; // Reached the end of the string. + } + offsets.addOffset(inc); + } + if(overlap==0) { + break; + } + --overlap; + ++inc; + } + s8+=length8; + } + } else /* USET_SPAN_SIMPLE */ { + int32_t maxInc=0, maxOverlap=0; + for(i=0; i<stringsLength; ++i) { + length8=utf8Lengths[i]; + if(length8==0) { + continue; // String not representable in UTF-8. + } + int32_t overlap=spanUTF8Lengths[i]; + // For longest match, we do need to try to match even an all-contained string + // to find the match from the earliest start. + + // Try to match this string at pos-overlap..pos. + if(overlap>=LONG_SPAN) { + overlap=length8; + // Longest match: Need to match fully inside the code point span + // to find the match from the earliest start. + } + if(overlap>spanLength) { + overlap=spanLength; + } + int32_t inc=length8-overlap; // Keep overlap+inc==length8. + for(;;) { + if(inc>rest || overlap<maxOverlap) { + break; + } + // Try to match if the string is longer or starts earlier. + // Match at code point boundaries. (The UTF-8 strings were converted + // from UTF-16 and are guaranteed to be well-formed.) + if(!U8_IS_TRAIL(s[pos-overlap]) && + (overlap>maxOverlap || + /* redundant overlap==maxOverlap && */ inc>maxInc) && + matches8(s+pos-overlap, s8, length8)) { + maxInc=inc; // Longest match from earliest start. + maxOverlap=overlap; + break; + } + --overlap; + ++inc; + } + s8+=length8; + } + + if(maxInc!=0 || maxOverlap!=0) { + // Longest-match algorithm, and there was a string match. + // Simply continue after it. + pos+=maxInc; + rest-=maxInc; + if(rest==0) { + return length; // Reached the end of the string. + } + spanLength=0; // Match strings from after a string match. + continue; + } + } + // Finished trying to match all strings at pos. + + if(spanLength!=0 || pos==0) { + // The position is after an unlimited code point span (spanLength!=0), + // not after a string match. + // The only position where spanLength==0 after a span is pos==0. + // Otherwise, an unlimited code point span is only tried again when no + // strings match, and if such a non-initial span fails we stop. + if(offsets.isEmpty()) { + return pos; // No strings matched after a span. + } + // Match strings from after the next string match. + } else { + // The position is after a string match (or a single code point). + if(offsets.isEmpty()) { + // No more strings matched after a previous string match. + // Try another code point span from after the last string match. + spanLength=spanSet.spanUTF8((const char *)s+pos, rest, USET_SPAN_CONTAINED); + if( spanLength==rest || // Reached the end of the string, or + spanLength==0 // neither strings nor span progressed. + ) { + return pos+spanLength; + } + pos+=spanLength; + rest-=spanLength; + continue; // spanLength>0: Match strings from after a span. + } else { + // Try to match only one code point from after a string match if some + // string matched beyond it, so that we try all possible positions + // and don't overshoot. + spanLength=spanOneUTF8(spanSet, s+pos, rest); + if(spanLength>0) { + if(spanLength==rest) { + return length; // Reached the end of the string. + } + // Match strings after this code point. + // There cannot be any increments below it because UnicodeSet strings + // contain multiple code points. + pos+=spanLength; + rest-=spanLength; + offsets.shift(spanLength); + spanLength=0; + continue; // Match strings from after a single code point. + } + // Match strings from after the next string match. + } + } + int32_t minOffset=offsets.popMinimum(); + pos+=minOffset; + rest-=minOffset; + spanLength=0; // Match strings from after a string match. + } +} + +int32_t UnicodeSetStringSpan::spanBackUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const { + if(spanCondition==USET_SPAN_NOT_CONTAINED) { + return spanNotBackUTF8(s, length); + } + int32_t pos=spanSet.spanBackUTF8((const char *)s, length, USET_SPAN_CONTAINED); + if(pos==0) { + return 0; + } + int32_t spanLength=length-pos; + + // Consider strings; they may overlap with the span. + OffsetList offsets; + if(spanCondition==USET_SPAN_CONTAINED) { + // Use offset list to try all possibilities. + offsets.setMaxLength(maxLength8); + } + int32_t i, stringsLength=strings.size(); + uint8_t *spanBackUTF8Lengths=spanLengths; + if(all) { + spanBackUTF8Lengths+=3*stringsLength; + } + for(;;) { + const uint8_t *s8=utf8; + int32_t length8; + if(spanCondition==USET_SPAN_CONTAINED) { + for(i=0; i<stringsLength; ++i) { + length8=utf8Lengths[i]; + if(length8==0) { + continue; // String not representable in UTF-8. + } + int32_t overlap=spanBackUTF8Lengths[i]; + if(overlap==ALL_CP_CONTAINED) { + s8+=length8; + continue; // Irrelevant string. + } + + // Try to match this string at pos-(length8-overlap)..pos-length8. + if(overlap>=LONG_SPAN) { + overlap=length8; + // While contained: No point matching fully inside the code point span. + int32_t len1=0; + U8_FWD_1(s8, len1, overlap); + overlap-=len1; // Length of the string minus the first code point. + } + if(overlap>spanLength) { + overlap=spanLength; + } + int32_t dec=length8-overlap; // Keep dec+overlap==length8. + for(;;) { + if(dec>pos) { + break; + } + // Try to match if the decrement is not listed already. + // Match at code point boundaries. (The UTF-8 strings were converted + // from UTF-16 and are guaranteed to be well-formed.) + if( !U8_IS_TRAIL(s[pos-dec]) && + !offsets.containsOffset(dec) && + matches8(s+pos-dec, s8, length8) + ) { + if(dec==pos) { + return 0; // Reached the start of the string. + } + offsets.addOffset(dec); + } + if(overlap==0) { + break; + } + --overlap; + ++dec; + } + s8+=length8; + } + } else /* USET_SPAN_SIMPLE */ { + int32_t maxDec=0, maxOverlap=0; + for(i=0; i<stringsLength; ++i) { + length8=utf8Lengths[i]; + if(length8==0) { + continue; // String not representable in UTF-8. + } + int32_t overlap=spanBackUTF8Lengths[i]; + // For longest match, we do need to try to match even an all-contained string + // to find the match from the latest end. + + // Try to match this string at pos-(length8-overlap)..pos-length8. + if(overlap>=LONG_SPAN) { + overlap=length8; + // Longest match: Need to match fully inside the code point span + // to find the match from the latest end. + } + if(overlap>spanLength) { + overlap=spanLength; + } + int32_t dec=length8-overlap; // Keep dec+overlap==length8. + for(;;) { + if(dec>pos || overlap<maxOverlap) { + break; + } + // Try to match if the string is longer or ends later. + // Match at code point boundaries. (The UTF-8 strings were converted + // from UTF-16 and are guaranteed to be well-formed.) + if( !U8_IS_TRAIL(s[pos-dec]) && + (overlap>maxOverlap || /* redundant overlap==maxOverlap && */ dec>maxDec) && + matches8(s+pos-dec, s8, length8) + ) { + maxDec=dec; // Longest match from latest end. + maxOverlap=overlap; + break; + } + --overlap; + ++dec; + } + s8+=length8; + } + + if(maxDec!=0 || maxOverlap!=0) { + // Longest-match algorithm, and there was a string match. + // Simply continue before it. + pos-=maxDec; + if(pos==0) { + return 0; // Reached the start of the string. + } + spanLength=0; // Match strings from before a string match. + continue; + } + } + // Finished trying to match all strings at pos. + + if(spanLength!=0 || pos==length) { + // The position is before an unlimited code point span (spanLength!=0), + // not before a string match. + // The only position where spanLength==0 before a span is pos==length. + // Otherwise, an unlimited code point span is only tried again when no + // strings match, and if such a non-initial span fails we stop. + if(offsets.isEmpty()) { + return pos; // No strings matched before a span. + } + // Match strings from before the next string match. + } else { + // The position is before a string match (or a single code point). + if(offsets.isEmpty()) { + // No more strings matched before a previous string match. + // Try another code point span from before the last string match. + int32_t oldPos=pos; + pos=spanSet.spanBackUTF8((const char *)s, oldPos, USET_SPAN_CONTAINED); + spanLength=oldPos-pos; + if( pos==0 || // Reached the start of the string, or + spanLength==0 // neither strings nor span progressed. + ) { + return pos; + } + continue; // spanLength>0: Match strings from before a span. + } else { + // Try to match only one code point from before a string match if some + // string matched beyond it, so that we try all possible positions + // and don't overshoot. + spanLength=spanOneBackUTF8(spanSet, s, pos); + if(spanLength>0) { + if(spanLength==pos) { + return 0; // Reached the start of the string. + } + // Match strings before this code point. + // There cannot be any decrements below it because UnicodeSet strings + // contain multiple code points. + pos-=spanLength; + offsets.shift(spanLength); + spanLength=0; + continue; // Match strings from before a single code point. + } + // Match strings from before the next string match. + } + } + pos-=offsets.popMinimum(); + spanLength=0; // Match strings from before a string match. + } +} + +/* + * Algorithm for spanNot()==span(USET_SPAN_NOT_CONTAINED) + * + * Theoretical algorithm: + * - Iterate through the string, and at each code point boundary: + * + If the code point there is in the set, then return with the current position. + * + If a set string matches at the current position, then return with the current position. + * + * Optimized implementation: + * + * (Same assumption as for span() above.) + * + * Create and cache a spanNotSet which contains all of the single code points + * of the original set but none of its strings. + * For each set string add its initial code point to the spanNotSet. + * (Also add its final code point for spanNotBack().) + * + * - Loop: + * + Do spanLength=spanNotSet.span(USET_SPAN_NOT_CONTAINED). + * + If the current code point is in the original set, then + * return the current position. + * + If any set string matches at the current position, then + * return the current position. + * + If there is no match at the current position, neither for the code point there + * nor for any set string, then skip this code point and continue the loop. + * This happens for set-string-initial code points that were added to spanNotSet + * when there is not actually a match for such a set string. + */ + +int32_t UnicodeSetStringSpan::spanNot(const UChar *s, int32_t length) const { + int32_t pos=0, rest=length; + int32_t i, stringsLength=strings.size(); + do { + // Span until we find a code point from the set, + // or a code point that starts or ends some string. + i=pSpanNotSet->span(s+pos, rest, USET_SPAN_NOT_CONTAINED); + if(i==rest) { + return length; // Reached the end of the string. + } + pos+=i; + rest-=i; + + // Check whether the current code point is in the original set, + // without the string starts and ends. + int32_t cpLength=spanOne(spanSet, s+pos, rest); + if(cpLength>0) { + return pos; // There is a set element at pos. + } + + // Try to match the strings at pos. + for(i=0; i<stringsLength; ++i) { + if(spanLengths[i]==ALL_CP_CONTAINED) { + continue; // Irrelevant string. + } + const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i); + const UChar *s16=string.getBuffer(); + int32_t length16=string.length(); + if(length16<=rest && matches16CPB(s, pos, length, s16, length16)) { + return pos; // There is a set element at pos. + } + } + + // The span(while not contained) ended on a string start/end which is + // not in the original set. Skip this code point and continue. + // cpLength<0 + pos-=cpLength; + rest+=cpLength; + } while(rest!=0); + return length; // Reached the end of the string. +} + +int32_t UnicodeSetStringSpan::spanNotBack(const UChar *s, int32_t length) const { + int32_t pos=length; + int32_t i, stringsLength=strings.size(); + do { + // Span until we find a code point from the set, + // or a code point that starts or ends some string. + pos=pSpanNotSet->spanBack(s, pos, USET_SPAN_NOT_CONTAINED); + if(pos==0) { + return 0; // Reached the start of the string. + } + + // Check whether the current code point is in the original set, + // without the string starts and ends. + int32_t cpLength=spanOneBack(spanSet, s, pos); + if(cpLength>0) { + return pos; // There is a set element at pos. + } + + // Try to match the strings at pos. + for(i=0; i<stringsLength; ++i) { + // Use spanLengths rather than a spanBackLengths pointer because + // it is easier and we only need to know whether the string is irrelevant + // which is the same in either array. + if(spanLengths[i]==ALL_CP_CONTAINED) { + continue; // Irrelevant string. + } + const UnicodeString &string=*(const UnicodeString *)strings.elementAt(i); + const UChar *s16=string.getBuffer(); + int32_t length16=string.length(); + if(length16<=pos && matches16CPB(s, pos-length16, length, s16, length16)) { + return pos; // There is a set element at pos. + } + } + + // The span(while not contained) ended on a string start/end which is + // not in the original set. Skip this code point and continue. + // cpLength<0 + pos+=cpLength; + } while(pos!=0); + return 0; // Reached the start of the string. +} + +int32_t UnicodeSetStringSpan::spanNotUTF8(const uint8_t *s, int32_t length) const { + int32_t pos=0, rest=length; + int32_t i, stringsLength=strings.size(); + uint8_t *spanUTF8Lengths=spanLengths; + if(all) { + spanUTF8Lengths+=2*stringsLength; + } + do { + // Span until we find a code point from the set, + // or a code point that starts or ends some string. + i=pSpanNotSet->spanUTF8((const char *)s+pos, rest, USET_SPAN_NOT_CONTAINED); + if(i==rest) { + return length; // Reached the end of the string. + } + pos+=i; + rest-=i; + + // Check whether the current code point is in the original set, + // without the string starts and ends. + int32_t cpLength=spanOneUTF8(spanSet, s+pos, rest); + if(cpLength>0) { + return pos; // There is a set element at pos. + } + + // Try to match the strings at pos. + const uint8_t *s8=utf8; + int32_t length8; + for(i=0; i<stringsLength; ++i) { + length8=utf8Lengths[i]; + // ALL_CP_CONTAINED: Irrelevant string. + if(length8!=0 && spanUTF8Lengths[i]!=ALL_CP_CONTAINED && length8<=rest && matches8(s+pos, s8, length8)) { + return pos; // There is a set element at pos. + } + s8+=length8; + } + + // The span(while not contained) ended on a string start/end which is + // not in the original set. Skip this code point and continue. + // cpLength<0 + pos-=cpLength; + rest+=cpLength; + } while(rest!=0); + return length; // Reached the end of the string. +} + +int32_t UnicodeSetStringSpan::spanNotBackUTF8(const uint8_t *s, int32_t length) const { + int32_t pos=length; + int32_t i, stringsLength=strings.size(); + uint8_t *spanBackUTF8Lengths=spanLengths; + if(all) { + spanBackUTF8Lengths+=3*stringsLength; + } + do { + // Span until we find a code point from the set, + // or a code point that starts or ends some string. + pos=pSpanNotSet->spanBackUTF8((const char *)s, pos, USET_SPAN_NOT_CONTAINED); + if(pos==0) { + return 0; // Reached the start of the string. + } + + // Check whether the current code point is in the original set, + // without the string starts and ends. + int32_t cpLength=spanOneBackUTF8(spanSet, s, pos); + if(cpLength>0) { + return pos; // There is a set element at pos. + } + + // Try to match the strings at pos. + const uint8_t *s8=utf8; + int32_t length8; + for(i=0; i<stringsLength; ++i) { + length8=utf8Lengths[i]; + // ALL_CP_CONTAINED: Irrelevant string. + if(length8!=0 && spanBackUTF8Lengths[i]!=ALL_CP_CONTAINED && length8<=pos && matches8(s+pos-length8, s8, length8)) { + return pos; // There is a set element at pos. + } + s8+=length8; + } + + // The span(while not contained) ended on a string start/end which is + // not in the original set. Skip this code point and continue. + // cpLength<0 + pos+=cpLength; + } while(pos!=0); + return 0; // Reached the start of the string. +} + +U_NAMESPACE_END diff --git a/thirdparty/icu4c/common/unisetspan.h b/thirdparty/icu4c/common/unisetspan.h new file mode 100644 index 0000000000..9a1307a907 --- /dev/null +++ b/thirdparty/icu4c/common/unisetspan.h @@ -0,0 +1,157 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 2007, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* file name: unisetspan.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2007mar01 +* created by: Markus W. Scherer +*/ + +#ifndef __UNISETSPAN_H__ +#define __UNISETSPAN_H__ + +#include "unicode/utypes.h" +#include "unicode/uniset.h" + +U_NAMESPACE_BEGIN + +/* + * Implement span() etc. for a set with strings. + * Avoid recursion because of its exponential complexity. + * Instead, try multiple paths at once and track them with an IndexList. + */ +class UnicodeSetStringSpan : public UMemory { +public: + /* + * Which span() variant will be used? + * The object is either built for one variant and used once, + * or built for all and may be used many times. + */ + enum { + FWD = 0x20, + BACK = 0x10, + UTF16 = 8, + UTF8 = 4, + CONTAINED = 2, + NOT_CONTAINED = 1, + + ALL = 0x3f, + + FWD_UTF16_CONTAINED = FWD | UTF16 | CONTAINED, + FWD_UTF16_NOT_CONTAINED = FWD | UTF16 | NOT_CONTAINED, + FWD_UTF8_CONTAINED = FWD | UTF8 | CONTAINED, + FWD_UTF8_NOT_CONTAINED = FWD | UTF8 | NOT_CONTAINED, + BACK_UTF16_CONTAINED = BACK | UTF16 | CONTAINED, + BACK_UTF16_NOT_CONTAINED= BACK | UTF16 | NOT_CONTAINED, + BACK_UTF8_CONTAINED = BACK | UTF8 | CONTAINED, + BACK_UTF8_NOT_CONTAINED = BACK | UTF8 | NOT_CONTAINED + }; + + UnicodeSetStringSpan(const UnicodeSet &set, const UVector &setStrings, uint32_t which); + + // Copy constructor. Assumes which==ALL for a frozen set. + UnicodeSetStringSpan(const UnicodeSetStringSpan &otherStringSpan, const UVector &newParentSetStrings); + + ~UnicodeSetStringSpan(); + + /* + * Do the strings need to be checked in span() etc.? + * @return true if strings need to be checked (call span() here), + * false if not (use a BMPSet for best performance). + */ + inline UBool needsStringSpanUTF16(); + inline UBool needsStringSpanUTF8(); + + // For fast UnicodeSet::contains(c). + inline UBool contains(UChar32 c) const; + + int32_t span(const UChar *s, int32_t length, USetSpanCondition spanCondition) const; + + int32_t spanBack(const UChar *s, int32_t length, USetSpanCondition spanCondition) const; + + int32_t spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const; + + int32_t spanBackUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCondition) const; + +private: + // Special spanLength byte values. + enum { + // The spanLength is >=0xfe. + LONG_SPAN=0xfe, + // All code points in the string are contained in the parent set. + ALL_CP_CONTAINED=0xff + }; + + // Add a starting or ending string character to the spanNotSet + // so that a character span ends before any string. + void addToSpanNotSet(UChar32 c); + + int32_t spanNot(const UChar *s, int32_t length) const; + int32_t spanNotBack(const UChar *s, int32_t length) const; + int32_t spanNotUTF8(const uint8_t *s, int32_t length) const; + int32_t spanNotBackUTF8(const uint8_t *s, int32_t length) const; + + // Set for span(). Same as parent but without strings. + UnicodeSet spanSet; + + // Set for span(not contained). + // Same as spanSet, plus characters that start or end strings. + UnicodeSet *pSpanNotSet; + + // The strings of the parent set. + const UVector &strings; + + // Pointer to the UTF-8 string lengths. + // Also pointer to further allocated storage for meta data and + // UTF-8 string contents as necessary. + int32_t *utf8Lengths; + + // Pointer to the part of the (utf8Lengths) memory block that stores + // the lengths of span(), spanBack() etc. for each string. + uint8_t *spanLengths; + + // Pointer to the part of the (utf8Lengths) memory block that stores + // the UTF-8 versions of the parent set's strings. + uint8_t *utf8; + + // Number of bytes for all UTF-8 versions of strings together. + int32_t utf8Length; + + // Maximum lengths of relevant strings. + int32_t maxLength16; + int32_t maxLength8; + + // Set up for all variants of span()? + UBool all; + + // Memory for small numbers and lengths of strings. + // For example, for 8 strings: + // 8 UTF-8 lengths, 8*4 bytes span lengths, 8*2 3-byte UTF-8 characters + // = 112 bytes = int32_t[28]. + int32_t staticLengths[32]; +}; + +UBool UnicodeSetStringSpan::needsStringSpanUTF16() { + return (UBool)(maxLength16!=0); +} + +UBool UnicodeSetStringSpan::needsStringSpanUTF8() { + return (UBool)(maxLength8!=0); +} + +UBool UnicodeSetStringSpan::contains(UChar32 c) const { + return spanSet.contains(c); +} + +U_NAMESPACE_END + +#endif diff --git a/thirdparty/icu4c/common/unistr.cpp b/thirdparty/icu4c/common/unistr.cpp new file mode 100644 index 0000000000..077b4d6ef2 --- /dev/null +++ b/thirdparty/icu4c/common/unistr.cpp @@ -0,0 +1,1982 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* Copyright (C) 1999-2016, International Business Machines Corporation and +* others. All Rights Reserved. +****************************************************************************** +* +* File unistr.cpp +* +* Modification History: +* +* Date Name Description +* 09/25/98 stephen Creation. +* 04/20/99 stephen Overhauled per 4/16 code review. +* 07/09/99 stephen Renamed {hi,lo},{byte,word} to icu_X for HP/UX +* 11/18/99 aliu Added handleReplaceBetween() to make inherit from +* Replaceable. +* 06/25/01 grhoten Removed the dependency on iostream +****************************************************************************** +*/ + +#include "unicode/utypes.h" +#include "unicode/appendable.h" +#include "unicode/putil.h" +#include "cstring.h" +#include "cmemory.h" +#include "unicode/ustring.h" +#include "unicode/unistr.h" +#include "unicode/utf.h" +#include "unicode/utf16.h" +#include "uelement.h" +#include "ustr_imp.h" +#include "umutex.h" +#include "uassert.h" + +#if 0 + +#include <iostream> +using namespace std; + +//DEBUGGING +void +print(const UnicodeString& s, + const char *name) +{ + UChar c; + cout << name << ":|"; + for(int i = 0; i < s.length(); ++i) { + c = s[i]; + if(c>= 0x007E || c < 0x0020) + cout << "[0x" << hex << s[i] << "]"; + else + cout << (char) s[i]; + } + cout << '|' << endl; +} + +void +print(const UChar *s, + int32_t len, + const char *name) +{ + UChar c; + cout << name << ":|"; + for(int i = 0; i < len; ++i) { + c = s[i]; + if(c>= 0x007E || c < 0x0020) + cout << "[0x" << hex << s[i] << "]"; + else + cout << (char) s[i]; + } + cout << '|' << endl; +} +// END DEBUGGING +#endif + +// Local function definitions for now + +// need to copy areas that may overlap +static +inline void +us_arrayCopy(const UChar *src, int32_t srcStart, + UChar *dst, int32_t dstStart, int32_t count) +{ + if(count>0) { + uprv_memmove(dst+dstStart, src+srcStart, (size_t)count*sizeof(*src)); + } +} + +// u_unescapeAt() callback to get a UChar from a UnicodeString +U_CDECL_BEGIN +static UChar U_CALLCONV +UnicodeString_charAt(int32_t offset, void *context) { + return ((icu::UnicodeString*) context)->charAt(offset); +} +U_CDECL_END + +U_NAMESPACE_BEGIN + +/* The Replaceable virtual destructor can't be defined in the header + due to how AIX works with multiple definitions of virtual functions. +*/ +Replaceable::~Replaceable() {} + +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeString) + +UnicodeString U_EXPORT2 +operator+ (const UnicodeString &s1, const UnicodeString &s2) { + return + UnicodeString(s1.length()+s2.length()+1, (UChar32)0, 0). + append(s1). + append(s2); +} + +//======================================== +// Reference Counting functions, put at top of file so that optimizing compilers +// have a chance to automatically inline. +//======================================== + +void +UnicodeString::addRef() { + umtx_atomic_inc((u_atomic_int32_t *)fUnion.fFields.fArray - 1); +} + +int32_t +UnicodeString::removeRef() { + return umtx_atomic_dec((u_atomic_int32_t *)fUnion.fFields.fArray - 1); +} + +int32_t +UnicodeString::refCount() const { + return umtx_loadAcquire(*((u_atomic_int32_t *)fUnion.fFields.fArray - 1)); +} + +void +UnicodeString::releaseArray() { + if((fUnion.fFields.fLengthAndFlags & kRefCounted) && removeRef() == 0) { + uprv_free((int32_t *)fUnion.fFields.fArray - 1); + } +} + + + +//======================================== +// Constructors +//======================================== + +// The default constructor is inline in unistr.h. + +UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count) { + fUnion.fFields.fLengthAndFlags = 0; + if(count <= 0 || (uint32_t)c > 0x10ffff) { + // just allocate and do not do anything else + allocate(capacity); + } else if(c <= 0xffff) { + int32_t length = count; + if(capacity < length) { + capacity = length; + } + if(allocate(capacity)) { + UChar *array = getArrayStart(); + UChar unit = (UChar)c; + for(int32_t i = 0; i < length; ++i) { + array[i] = unit; + } + setLength(length); + } + } else { // supplementary code point, write surrogate pairs + if(count > (INT32_MAX / 2)) { + // We would get more than 2G UChars. + allocate(capacity); + return; + } + int32_t length = count * 2; + if(capacity < length) { + capacity = length; + } + if(allocate(capacity)) { + UChar *array = getArrayStart(); + UChar lead = U16_LEAD(c); + UChar trail = U16_TRAIL(c); + for(int32_t i = 0; i < length; i += 2) { + array[i] = lead; + array[i + 1] = trail; + } + setLength(length); + } + } +} + +UnicodeString::UnicodeString(UChar ch) { + fUnion.fFields.fLengthAndFlags = kLength1 | kShortString; + fUnion.fStackFields.fBuffer[0] = ch; +} + +UnicodeString::UnicodeString(UChar32 ch) { + fUnion.fFields.fLengthAndFlags = kShortString; + int32_t i = 0; + UBool isError = FALSE; + U16_APPEND(fUnion.fStackFields.fBuffer, i, US_STACKBUF_SIZE, ch, isError); + // We test isError so that the compiler does not complain that we don't. + // If isError then i==0 which is what we want anyway. + if(!isError) { + setShortLength(i); + } +} + +UnicodeString::UnicodeString(const UChar *text) { + fUnion.fFields.fLengthAndFlags = kShortString; + doAppend(text, 0, -1); +} + +UnicodeString::UnicodeString(const UChar *text, + int32_t textLength) { + fUnion.fFields.fLengthAndFlags = kShortString; + doAppend(text, 0, textLength); +} + +UnicodeString::UnicodeString(UBool isTerminated, + ConstChar16Ptr textPtr, + int32_t textLength) { + fUnion.fFields.fLengthAndFlags = kReadonlyAlias; + const UChar *text = textPtr; + if(text == NULL) { + // treat as an empty string, do not alias + setToEmpty(); + } else if(textLength < -1 || + (textLength == -1 && !isTerminated) || + (textLength >= 0 && isTerminated && text[textLength] != 0) + ) { + setToBogus(); + } else { + if(textLength == -1) { + // text is terminated, or else it would have failed the above test + textLength = u_strlen(text); + } + setArray(const_cast<UChar *>(text), textLength, + isTerminated ? textLength + 1 : textLength); + } +} + +UnicodeString::UnicodeString(UChar *buff, + int32_t buffLength, + int32_t buffCapacity) { + fUnion.fFields.fLengthAndFlags = kWritableAlias; + if(buff == NULL) { + // treat as an empty string, do not alias + setToEmpty(); + } else if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) { + setToBogus(); + } else { + if(buffLength == -1) { + // fLength = u_strlen(buff); but do not look beyond buffCapacity + const UChar *p = buff, *limit = buff + buffCapacity; + while(p != limit && *p != 0) { + ++p; + } + buffLength = (int32_t)(p - buff); + } + setArray(buff, buffLength, buffCapacity); + } +} + +UnicodeString::UnicodeString(const char *src, int32_t length, EInvariant) { + fUnion.fFields.fLengthAndFlags = kShortString; + if(src==NULL) { + // treat as an empty string + } else { + if(length<0) { + length=(int32_t)uprv_strlen(src); + } + if(cloneArrayIfNeeded(length, length, FALSE)) { + u_charsToUChars(src, getArrayStart(), length); + setLength(length); + } else { + setToBogus(); + } + } +} + +#if U_CHARSET_IS_UTF8 + +UnicodeString::UnicodeString(const char *codepageData) { + fUnion.fFields.fLengthAndFlags = kShortString; + if(codepageData != 0) { + setToUTF8(codepageData); + } +} + +UnicodeString::UnicodeString(const char *codepageData, int32_t dataLength) { + fUnion.fFields.fLengthAndFlags = kShortString; + // if there's nothing to convert, do nothing + if(codepageData == 0 || dataLength == 0 || dataLength < -1) { + return; + } + if(dataLength == -1) { + dataLength = (int32_t)uprv_strlen(codepageData); + } + setToUTF8(StringPiece(codepageData, dataLength)); +} + +// else see unistr_cnv.cpp +#endif + +UnicodeString::UnicodeString(const UnicodeString& that) { + fUnion.fFields.fLengthAndFlags = kShortString; + copyFrom(that); +} + +UnicodeString::UnicodeString(UnicodeString &&src) U_NOEXCEPT { + copyFieldsFrom(src, TRUE); +} + +UnicodeString::UnicodeString(const UnicodeString& that, + int32_t srcStart) { + fUnion.fFields.fLengthAndFlags = kShortString; + setTo(that, srcStart); +} + +UnicodeString::UnicodeString(const UnicodeString& that, + int32_t srcStart, + int32_t srcLength) { + fUnion.fFields.fLengthAndFlags = kShortString; + setTo(that, srcStart, srcLength); +} + +// Replaceable base class clone() default implementation, does not clone +Replaceable * +Replaceable::clone() const { + return NULL; +} + +// UnicodeString overrides clone() with a real implementation +UnicodeString * +UnicodeString::clone() const { + return new UnicodeString(*this); +} + +//======================================== +// array allocation +//======================================== + +namespace { + +const int32_t kGrowSize = 128; + +// The number of bytes for one int32_t reference counter and capacity UChars +// must fit into a 32-bit size_t (at least when on a 32-bit platform). +// We also add one for the NUL terminator, to avoid reallocation in getTerminatedBuffer(), +// and round up to a multiple of 16 bytes. +// This means that capacity must be at most (0xfffffff0 - 4) / 2 - 1 = 0x7ffffff5. +// (With more complicated checks we could go up to 0x7ffffffd without rounding up, +// but that does not seem worth it.) +const int32_t kMaxCapacity = 0x7ffffff5; + +int32_t getGrowCapacity(int32_t newLength) { + int32_t growSize = (newLength >> 2) + kGrowSize; + if(growSize <= (kMaxCapacity - newLength)) { + return newLength + growSize; + } else { + return kMaxCapacity; + } +} + +} // namespace + +UBool +UnicodeString::allocate(int32_t capacity) { + if(capacity <= US_STACKBUF_SIZE) { + fUnion.fFields.fLengthAndFlags = kShortString; + return TRUE; + } + if(capacity <= kMaxCapacity) { + ++capacity; // for the NUL + // Switch to size_t which is unsigned so that we can allocate up to 4GB. + // Reference counter + UChars. + size_t numBytes = sizeof(int32_t) + (size_t)capacity * U_SIZEOF_UCHAR; + // Round up to a multiple of 16. + numBytes = (numBytes + 15) & ~15; + int32_t *array = (int32_t *) uprv_malloc(numBytes); + if(array != NULL) { + // set initial refCount and point behind the refCount + *array++ = 1; + numBytes -= sizeof(int32_t); + + // have fArray point to the first UChar + fUnion.fFields.fArray = (UChar *)array; + fUnion.fFields.fCapacity = (int32_t)(numBytes / U_SIZEOF_UCHAR); + fUnion.fFields.fLengthAndFlags = kLongString; + return TRUE; + } + } + fUnion.fFields.fLengthAndFlags = kIsBogus; + fUnion.fFields.fArray = 0; + fUnion.fFields.fCapacity = 0; + return FALSE; +} + +//======================================== +// Destructor +//======================================== + +#ifdef UNISTR_COUNT_FINAL_STRING_LENGTHS +static u_atomic_int32_t finalLengthCounts[0x400]; // UnicodeString::kMaxShortLength+1 +static u_atomic_int32_t beyondCount(0); + +U_CAPI void unistr_printLengths() { + int32_t i; + for(i = 0; i <= 59; ++i) { + printf("%2d, %9d\n", i, (int32_t)finalLengthCounts[i]); + } + int32_t beyond = beyondCount; + for(; i < UPRV_LENGTHOF(finalLengthCounts); ++i) { + beyond += finalLengthCounts[i]; + } + printf(">59, %9d\n", beyond); +} +#endif + +UnicodeString::~UnicodeString() +{ +#ifdef UNISTR_COUNT_FINAL_STRING_LENGTHS + // Count lengths of strings at the end of their lifetime. + // Useful for discussion of a desirable stack buffer size. + // Count the contents length, not the optional NUL terminator nor further capacity. + // Ignore open-buffer strings and strings which alias external storage. + if((fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kReadonlyAlias|kWritableAlias)) == 0) { + if(hasShortLength()) { + umtx_atomic_inc(finalLengthCounts + getShortLength()); + } else { + umtx_atomic_inc(&beyondCount); + } + } +#endif + + releaseArray(); +} + +//======================================== +// Factory methods +//======================================== + +UnicodeString UnicodeString::fromUTF8(StringPiece utf8) { + UnicodeString result; + result.setToUTF8(utf8); + return result; +} + +UnicodeString UnicodeString::fromUTF32(const UChar32 *utf32, int32_t length) { + UnicodeString result; + int32_t capacity; + // Most UTF-32 strings will be BMP-only and result in a same-length + // UTF-16 string. We overestimate the capacity just slightly, + // just in case there are a few supplementary characters. + if(length <= US_STACKBUF_SIZE) { + capacity = US_STACKBUF_SIZE; + } else { + capacity = length + (length >> 4) + 4; + } + do { + UChar *utf16 = result.getBuffer(capacity); + int32_t length16; + UErrorCode errorCode = U_ZERO_ERROR; + u_strFromUTF32WithSub(utf16, result.getCapacity(), &length16, + utf32, length, + 0xfffd, // Substitution character. + NULL, // Don't care about number of substitutions. + &errorCode); + result.releaseBuffer(length16); + if(errorCode == U_BUFFER_OVERFLOW_ERROR) { + capacity = length16 + 1; // +1 for the terminating NUL. + continue; + } else if(U_FAILURE(errorCode)) { + result.setToBogus(); + } + break; + } while(TRUE); + return result; +} + +//======================================== +// Assignment +//======================================== + +UnicodeString & +UnicodeString::operator=(const UnicodeString &src) { + return copyFrom(src); +} + +UnicodeString & +UnicodeString::fastCopyFrom(const UnicodeString &src) { + return copyFrom(src, TRUE); +} + +UnicodeString & +UnicodeString::copyFrom(const UnicodeString &src, UBool fastCopy) { + // if assigning to ourselves, do nothing + if(this == &src) { + return *this; + } + + // is the right side bogus? + if(src.isBogus()) { + setToBogus(); + return *this; + } + + // delete the current contents + releaseArray(); + + if(src.isEmpty()) { + // empty string - use the stack buffer + setToEmpty(); + return *this; + } + + // fLength>0 and not an "open" src.getBuffer(minCapacity) + fUnion.fFields.fLengthAndFlags = src.fUnion.fFields.fLengthAndFlags; + switch(src.fUnion.fFields.fLengthAndFlags & kAllStorageFlags) { + case kShortString: + // short string using the stack buffer, do the same + uprv_memcpy(fUnion.fStackFields.fBuffer, src.fUnion.fStackFields.fBuffer, + getShortLength() * U_SIZEOF_UCHAR); + break; + case kLongString: + // src uses a refCounted string buffer, use that buffer with refCount + // src is const, use a cast - we don't actually change it + ((UnicodeString &)src).addRef(); + // copy all fields, share the reference-counted buffer + fUnion.fFields.fArray = src.fUnion.fFields.fArray; + fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity; + if(!hasShortLength()) { + fUnion.fFields.fLength = src.fUnion.fFields.fLength; + } + break; + case kReadonlyAlias: + if(fastCopy) { + // src is a readonly alias, do the same + // -> maintain the readonly alias as such + fUnion.fFields.fArray = src.fUnion.fFields.fArray; + fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity; + if(!hasShortLength()) { + fUnion.fFields.fLength = src.fUnion.fFields.fLength; + } + break; + } + // else if(!fastCopy) fall through to case kWritableAlias + // -> allocate a new buffer and copy the contents + U_FALLTHROUGH; + case kWritableAlias: { + // src is a writable alias; we make a copy of that instead + int32_t srcLength = src.length(); + if(allocate(srcLength)) { + u_memcpy(getArrayStart(), src.getArrayStart(), srcLength); + setLength(srcLength); + break; + } + // if there is not enough memory, then fall through to setting to bogus + U_FALLTHROUGH; + } + default: + // if src is bogus, set ourselves to bogus + // do not call setToBogus() here because fArray and flags are not consistent here + fUnion.fFields.fLengthAndFlags = kIsBogus; + fUnion.fFields.fArray = 0; + fUnion.fFields.fCapacity = 0; + break; + } + + return *this; +} + +UnicodeString &UnicodeString::operator=(UnicodeString &&src) U_NOEXCEPT { + // No explicit check for self move assignment, consistent with standard library. + // Self move assignment causes no crash nor leak but might make the object bogus. + releaseArray(); + copyFieldsFrom(src, TRUE); + return *this; +} + +// Same as move assignment except without memory management. +void UnicodeString::copyFieldsFrom(UnicodeString &src, UBool setSrcToBogus) U_NOEXCEPT { + int16_t lengthAndFlags = fUnion.fFields.fLengthAndFlags = src.fUnion.fFields.fLengthAndFlags; + if(lengthAndFlags & kUsingStackBuffer) { + // Short string using the stack buffer, copy the contents. + // Check for self assignment to prevent "overlap in memcpy" warnings, + // although it should be harmless to copy a buffer to itself exactly. + if(this != &src) { + uprv_memcpy(fUnion.fStackFields.fBuffer, src.fUnion.fStackFields.fBuffer, + getShortLength() * U_SIZEOF_UCHAR); + } + } else { + // In all other cases, copy all fields. + fUnion.fFields.fArray = src.fUnion.fFields.fArray; + fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity; + if(!hasShortLength()) { + fUnion.fFields.fLength = src.fUnion.fFields.fLength; + } + if(setSrcToBogus) { + // Set src to bogus without releasing any memory. + src.fUnion.fFields.fLengthAndFlags = kIsBogus; + src.fUnion.fFields.fArray = NULL; + src.fUnion.fFields.fCapacity = 0; + } + } +} + +void UnicodeString::swap(UnicodeString &other) U_NOEXCEPT { + UnicodeString temp; // Empty short string: Known not to need releaseArray(). + // Copy fields without resetting source values in between. + temp.copyFieldsFrom(*this, FALSE); + this->copyFieldsFrom(other, FALSE); + other.copyFieldsFrom(temp, FALSE); + // Set temp to an empty string so that other's memory is not released twice. + temp.fUnion.fFields.fLengthAndFlags = kShortString; +} + +//======================================== +// Miscellaneous operations +//======================================== + +UnicodeString UnicodeString::unescape() const { + UnicodeString result(length(), (UChar32)0, (int32_t)0); // construct with capacity + if (result.isBogus()) { + return result; + } + const UChar *array = getBuffer(); + int32_t len = length(); + int32_t prev = 0; + for (int32_t i=0;;) { + if (i == len) { + result.append(array, prev, len - prev); + break; + } + if (array[i++] == 0x5C /*'\\'*/) { + result.append(array, prev, (i - 1) - prev); + UChar32 c = unescapeAt(i); // advances i + if (c < 0) { + result.remove(); // return empty string + break; // invalid escape sequence + } + result.append(c); + prev = i; + } + } + return result; +} + +UChar32 UnicodeString::unescapeAt(int32_t &offset) const { + return u_unescapeAt(UnicodeString_charAt, &offset, length(), (void*)this); +} + +//======================================== +// Read-only implementation +//======================================== +UBool +UnicodeString::doEquals(const UnicodeString &text, int32_t len) const { + // Requires: this & text not bogus and have same lengths. + // Byte-wise comparison works for equality regardless of endianness. + return uprv_memcmp(getArrayStart(), text.getArrayStart(), len * U_SIZEOF_UCHAR) == 0; +} + +int8_t +UnicodeString::doCompare( int32_t start, + int32_t length, + const UChar *srcChars, + int32_t srcStart, + int32_t srcLength) const +{ + // compare illegal string values + if(isBogus()) { + return -1; + } + + // pin indices to legal values + pinIndices(start, length); + + if(srcChars == NULL) { + // treat const UChar *srcChars==NULL as an empty string + return length == 0 ? 0 : 1; + } + + // get the correct pointer + const UChar *chars = getArrayStart(); + + chars += start; + srcChars += srcStart; + + int32_t minLength; + int8_t lengthResult; + + // get the srcLength if necessary + if(srcLength < 0) { + srcLength = u_strlen(srcChars + srcStart); + } + + // are we comparing different lengths? + if(length != srcLength) { + if(length < srcLength) { + minLength = length; + lengthResult = -1; + } else { + minLength = srcLength; + lengthResult = 1; + } + } else { + minLength = length; + lengthResult = 0; + } + + /* + * note that uprv_memcmp() returns an int but we return an int8_t; + * we need to take care not to truncate the result - + * one way to do this is to right-shift the value to + * move the sign bit into the lower 8 bits and making sure that this + * does not become 0 itself + */ + + if(minLength > 0 && chars != srcChars) { + int32_t result; + +# if U_IS_BIG_ENDIAN + // big-endian: byte comparison works + result = uprv_memcmp(chars, srcChars, minLength * sizeof(UChar)); + if(result != 0) { + return (int8_t)(result >> 15 | 1); + } +# else + // little-endian: compare UChar units + do { + result = ((int32_t)*(chars++) - (int32_t)*(srcChars++)); + if(result != 0) { + return (int8_t)(result >> 15 | 1); + } + } while(--minLength > 0); +# endif + } + return lengthResult; +} + +/* String compare in code point order - doCompare() compares in code unit order. */ +int8_t +UnicodeString::doCompareCodePointOrder(int32_t start, + int32_t length, + const UChar *srcChars, + int32_t srcStart, + int32_t srcLength) const +{ + // compare illegal string values + // treat const UChar *srcChars==NULL as an empty string + if(isBogus()) { + return -1; + } + + // pin indices to legal values + pinIndices(start, length); + + if(srcChars == NULL) { + srcStart = srcLength = 0; + } + + int32_t diff = uprv_strCompare(getArrayStart() + start, length, (srcChars!=NULL)?(srcChars + srcStart):NULL, srcLength, FALSE, TRUE); + /* translate the 32-bit result into an 8-bit one */ + if(diff!=0) { + return (int8_t)(diff >> 15 | 1); + } else { + return 0; + } +} + +int32_t +UnicodeString::getLength() const { + return length(); +} + +UChar +UnicodeString::getCharAt(int32_t offset) const { + return charAt(offset); +} + +UChar32 +UnicodeString::getChar32At(int32_t offset) const { + return char32At(offset); +} + +UChar32 +UnicodeString::char32At(int32_t offset) const +{ + int32_t len = length(); + if((uint32_t)offset < (uint32_t)len) { + const UChar *array = getArrayStart(); + UChar32 c; + U16_GET(array, 0, offset, len, c); + return c; + } else { + return kInvalidUChar; + } +} + +int32_t +UnicodeString::getChar32Start(int32_t offset) const { + if((uint32_t)offset < (uint32_t)length()) { + const UChar *array = getArrayStart(); + U16_SET_CP_START(array, 0, offset); + return offset; + } else { + return 0; + } +} + +int32_t +UnicodeString::getChar32Limit(int32_t offset) const { + int32_t len = length(); + if((uint32_t)offset < (uint32_t)len) { + const UChar *array = getArrayStart(); + U16_SET_CP_LIMIT(array, 0, offset, len); + return offset; + } else { + return len; + } +} + +int32_t +UnicodeString::countChar32(int32_t start, int32_t length) const { + pinIndices(start, length); + // if(isBogus()) then fArray==0 and start==0 - u_countChar32() checks for NULL + return u_countChar32(getArrayStart()+start, length); +} + +UBool +UnicodeString::hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const { + pinIndices(start, length); + // if(isBogus()) then fArray==0 and start==0 - u_strHasMoreChar32Than() checks for NULL + return u_strHasMoreChar32Than(getArrayStart()+start, length, number); +} + +int32_t +UnicodeString::moveIndex32(int32_t index, int32_t delta) const { + // pin index + int32_t len = length(); + if(index<0) { + index=0; + } else if(index>len) { + index=len; + } + + const UChar *array = getArrayStart(); + if(delta>0) { + U16_FWD_N(array, index, len, delta); + } else { + U16_BACK_N(array, 0, index, -delta); + } + + return index; +} + +void +UnicodeString::doExtract(int32_t start, + int32_t length, + UChar *dst, + int32_t dstStart) const +{ + // pin indices to legal values + pinIndices(start, length); + + // do not copy anything if we alias dst itself + const UChar *array = getArrayStart(); + if(array + start != dst + dstStart) { + us_arrayCopy(array, start, dst, dstStart, length); + } +} + +int32_t +UnicodeString::extract(Char16Ptr dest, int32_t destCapacity, + UErrorCode &errorCode) const { + int32_t len = length(); + if(U_SUCCESS(errorCode)) { + if(isBogus() || destCapacity<0 || (destCapacity>0 && dest==0)) { + errorCode=U_ILLEGAL_ARGUMENT_ERROR; + } else { + const UChar *array = getArrayStart(); + if(len>0 && len<=destCapacity && array!=dest) { + u_memcpy(dest, array, len); + } + return u_terminateUChars(dest, destCapacity, len, &errorCode); + } + } + + return len; +} + +int32_t +UnicodeString::extract(int32_t start, + int32_t length, + char *target, + int32_t targetCapacity, + enum EInvariant) const +{ + // if the arguments are illegal, then do nothing + if(targetCapacity < 0 || (targetCapacity > 0 && target == NULL)) { + return 0; + } + + // pin the indices to legal values + pinIndices(start, length); + + if(length <= targetCapacity) { + u_UCharsToChars(getArrayStart() + start, target, length); + } + UErrorCode status = U_ZERO_ERROR; + return u_terminateChars(target, targetCapacity, length, &status); +} + +UnicodeString +UnicodeString::tempSubString(int32_t start, int32_t len) const { + pinIndices(start, len); + const UChar *array = getBuffer(); // not getArrayStart() to check kIsBogus & kOpenGetBuffer + if(array==NULL) { + array=fUnion.fStackFields.fBuffer; // anything not NULL because that would make an empty string + len=-2; // bogus result string + } + return UnicodeString(FALSE, array + start, len); +} + +int32_t +UnicodeString::toUTF8(int32_t start, int32_t len, + char *target, int32_t capacity) const { + pinIndices(start, len); + int32_t length8; + UErrorCode errorCode = U_ZERO_ERROR; + u_strToUTF8WithSub(target, capacity, &length8, + getBuffer() + start, len, + 0xFFFD, // Standard substitution character. + NULL, // Don't care about number of substitutions. + &errorCode); + return length8; +} + +#if U_CHARSET_IS_UTF8 + +int32_t +UnicodeString::extract(int32_t start, int32_t len, + char *target, uint32_t dstSize) const { + // if the arguments are illegal, then do nothing + if(/*dstSize < 0 || */(dstSize > 0 && target == 0)) { + return 0; + } + return toUTF8(start, len, target, dstSize <= 0x7fffffff ? (int32_t)dstSize : 0x7fffffff); +} + +// else see unistr_cnv.cpp +#endif + +void +UnicodeString::extractBetween(int32_t start, + int32_t limit, + UnicodeString& target) const { + pinIndex(start); + pinIndex(limit); + doExtract(start, limit - start, target); +} + +// When converting from UTF-16 to UTF-8, the result will have at most 3 times +// as many bytes as the source has UChars. +// The "worst cases" are writing systems like Indic, Thai and CJK with +// 3:1 bytes:UChars. +void +UnicodeString::toUTF8(ByteSink &sink) const { + int32_t length16 = length(); + if(length16 != 0) { + char stackBuffer[1024]; + int32_t capacity = (int32_t)sizeof(stackBuffer); + UBool utf8IsOwned = FALSE; + char *utf8 = sink.GetAppendBuffer(length16 < capacity ? length16 : capacity, + 3*length16, + stackBuffer, capacity, + &capacity); + int32_t length8 = 0; + UErrorCode errorCode = U_ZERO_ERROR; + u_strToUTF8WithSub(utf8, capacity, &length8, + getBuffer(), length16, + 0xFFFD, // Standard substitution character. + NULL, // Don't care about number of substitutions. + &errorCode); + if(errorCode == U_BUFFER_OVERFLOW_ERROR) { + utf8 = (char *)uprv_malloc(length8); + if(utf8 != NULL) { + utf8IsOwned = TRUE; + errorCode = U_ZERO_ERROR; + u_strToUTF8WithSub(utf8, length8, &length8, + getBuffer(), length16, + 0xFFFD, // Standard substitution character. + NULL, // Don't care about number of substitutions. + &errorCode); + } else { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + } + if(U_SUCCESS(errorCode)) { + sink.Append(utf8, length8); + sink.Flush(); + } + if(utf8IsOwned) { + uprv_free(utf8); + } + } +} + +int32_t +UnicodeString::toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const { + int32_t length32=0; + if(U_SUCCESS(errorCode)) { + // getBuffer() and u_strToUTF32WithSub() check for illegal arguments. + u_strToUTF32WithSub(utf32, capacity, &length32, + getBuffer(), length(), + 0xfffd, // Substitution character. + NULL, // Don't care about number of substitutions. + &errorCode); + } + return length32; +} + +int32_t +UnicodeString::indexOf(const UChar *srcChars, + int32_t srcStart, + int32_t srcLength, + int32_t start, + int32_t length) const +{ + if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) { + return -1; + } + + // UnicodeString does not find empty substrings + if(srcLength < 0 && srcChars[srcStart] == 0) { + return -1; + } + + // get the indices within bounds + pinIndices(start, length); + + // find the first occurrence of the substring + const UChar *array = getArrayStart(); + const UChar *match = u_strFindFirst(array + start, length, srcChars + srcStart, srcLength); + if(match == NULL) { + return -1; + } else { + return (int32_t)(match - array); + } +} + +int32_t +UnicodeString::doIndexOf(UChar c, + int32_t start, + int32_t length) const +{ + // pin indices + pinIndices(start, length); + + // find the first occurrence of c + const UChar *array = getArrayStart(); + const UChar *match = u_memchr(array + start, c, length); + if(match == NULL) { + return -1; + } else { + return (int32_t)(match - array); + } +} + +int32_t +UnicodeString::doIndexOf(UChar32 c, + int32_t start, + int32_t length) const { + // pin indices + pinIndices(start, length); + + // find the first occurrence of c + const UChar *array = getArrayStart(); + const UChar *match = u_memchr32(array + start, c, length); + if(match == NULL) { + return -1; + } else { + return (int32_t)(match - array); + } +} + +int32_t +UnicodeString::lastIndexOf(const UChar *srcChars, + int32_t srcStart, + int32_t srcLength, + int32_t start, + int32_t length) const +{ + if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) { + return -1; + } + + // UnicodeString does not find empty substrings + if(srcLength < 0 && srcChars[srcStart] == 0) { + return -1; + } + + // get the indices within bounds + pinIndices(start, length); + + // find the last occurrence of the substring + const UChar *array = getArrayStart(); + const UChar *match = u_strFindLast(array + start, length, srcChars + srcStart, srcLength); + if(match == NULL) { + return -1; + } else { + return (int32_t)(match - array); + } +} + +int32_t +UnicodeString::doLastIndexOf(UChar c, + int32_t start, + int32_t length) const +{ + if(isBogus()) { + return -1; + } + + // pin indices + pinIndices(start, length); + + // find the last occurrence of c + const UChar *array = getArrayStart(); + const UChar *match = u_memrchr(array + start, c, length); + if(match == NULL) { + return -1; + } else { + return (int32_t)(match - array); + } +} + +int32_t +UnicodeString::doLastIndexOf(UChar32 c, + int32_t start, + int32_t length) const { + // pin indices + pinIndices(start, length); + + // find the last occurrence of c + const UChar *array = getArrayStart(); + const UChar *match = u_memrchr32(array + start, c, length); + if(match == NULL) { + return -1; + } else { + return (int32_t)(match - array); + } +} + +//======================================== +// Write implementation +//======================================== + +UnicodeString& +UnicodeString::findAndReplace(int32_t start, + int32_t length, + const UnicodeString& oldText, + int32_t oldStart, + int32_t oldLength, + const UnicodeString& newText, + int32_t newStart, + int32_t newLength) +{ + if(isBogus() || oldText.isBogus() || newText.isBogus()) { + return *this; + } + + pinIndices(start, length); + oldText.pinIndices(oldStart, oldLength); + newText.pinIndices(newStart, newLength); + + if(oldLength == 0) { + return *this; + } + + while(length > 0 && length >= oldLength) { + int32_t pos = indexOf(oldText, oldStart, oldLength, start, length); + if(pos < 0) { + // no more oldText's here: done + break; + } else { + // we found oldText, replace it by newText and go beyond it + replace(pos, oldLength, newText, newStart, newLength); + length -= pos + oldLength - start; + start = pos + newLength; + } + } + + return *this; +} + + +void +UnicodeString::setToBogus() +{ + releaseArray(); + + fUnion.fFields.fLengthAndFlags = kIsBogus; + fUnion.fFields.fArray = 0; + fUnion.fFields.fCapacity = 0; +} + +// turn a bogus string into an empty one +void +UnicodeString::unBogus() { + if(fUnion.fFields.fLengthAndFlags & kIsBogus) { + setToEmpty(); + } +} + +const char16_t * +UnicodeString::getTerminatedBuffer() { + if(!isWritable()) { + return nullptr; + } + UChar *array = getArrayStart(); + int32_t len = length(); + if(len < getCapacity()) { + if(fUnion.fFields.fLengthAndFlags & kBufferIsReadonly) { + // If len<capacity on a read-only alias, then array[len] is + // either the original NUL (if constructed with (TRUE, s, length)) + // or one of the original string contents characters (if later truncated), + // therefore we can assume that array[len] is initialized memory. + if(array[len] == 0) { + return array; + } + } else if(((fUnion.fFields.fLengthAndFlags & kRefCounted) == 0 || refCount() == 1)) { + // kRefCounted: Do not write the NUL if the buffer is shared. + // That is mostly safe, except when the length of one copy was modified + // without copy-on-write, e.g., via truncate(newLength) or remove(void). + // Then the NUL would be written into the middle of another copy's string. + + // Otherwise, the buffer is fully writable and it is anyway safe to write the NUL. + // Do not test if there is a NUL already because it might be uninitialized memory. + // (That would be safe, but tools like valgrind & Purify would complain.) + array[len] = 0; + return array; + } + } + if(len<INT32_MAX && cloneArrayIfNeeded(len+1)) { + array = getArrayStart(); + array[len] = 0; + return array; + } else { + return nullptr; + } +} + +// setTo() analogous to the readonly-aliasing constructor with the same signature +UnicodeString & +UnicodeString::setTo(UBool isTerminated, + ConstChar16Ptr textPtr, + int32_t textLength) +{ + if(fUnion.fFields.fLengthAndFlags & kOpenGetBuffer) { + // do not modify a string that has an "open" getBuffer(minCapacity) + return *this; + } + + const UChar *text = textPtr; + if(text == NULL) { + // treat as an empty string, do not alias + releaseArray(); + setToEmpty(); + return *this; + } + + if( textLength < -1 || + (textLength == -1 && !isTerminated) || + (textLength >= 0 && isTerminated && text[textLength] != 0) + ) { + setToBogus(); + return *this; + } + + releaseArray(); + + if(textLength == -1) { + // text is terminated, or else it would have failed the above test + textLength = u_strlen(text); + } + fUnion.fFields.fLengthAndFlags = kReadonlyAlias; + setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength); + return *this; +} + +// setTo() analogous to the writable-aliasing constructor with the same signature +UnicodeString & +UnicodeString::setTo(UChar *buffer, + int32_t buffLength, + int32_t buffCapacity) { + if(fUnion.fFields.fLengthAndFlags & kOpenGetBuffer) { + // do not modify a string that has an "open" getBuffer(minCapacity) + return *this; + } + + if(buffer == NULL) { + // treat as an empty string, do not alias + releaseArray(); + setToEmpty(); + return *this; + } + + if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) { + setToBogus(); + return *this; + } else if(buffLength == -1) { + // buffLength = u_strlen(buff); but do not look beyond buffCapacity + const UChar *p = buffer, *limit = buffer + buffCapacity; + while(p != limit && *p != 0) { + ++p; + } + buffLength = (int32_t)(p - buffer); + } + + releaseArray(); + + fUnion.fFields.fLengthAndFlags = kWritableAlias; + setArray(buffer, buffLength, buffCapacity); + return *this; +} + +UnicodeString &UnicodeString::setToUTF8(StringPiece utf8) { + unBogus(); + int32_t length = utf8.length(); + int32_t capacity; + // The UTF-16 string will be at most as long as the UTF-8 string. + if(length <= US_STACKBUF_SIZE) { + capacity = US_STACKBUF_SIZE; + } else { + capacity = length + 1; // +1 for the terminating NUL. + } + UChar *utf16 = getBuffer(capacity); + int32_t length16; + UErrorCode errorCode = U_ZERO_ERROR; + u_strFromUTF8WithSub(utf16, getCapacity(), &length16, + utf8.data(), length, + 0xfffd, // Substitution character. + NULL, // Don't care about number of substitutions. + &errorCode); + releaseBuffer(length16); + if(U_FAILURE(errorCode)) { + setToBogus(); + } + return *this; +} + +UnicodeString& +UnicodeString::setCharAt(int32_t offset, + UChar c) +{ + int32_t len = length(); + if(cloneArrayIfNeeded() && len > 0) { + if(offset < 0) { + offset = 0; + } else if(offset >= len) { + offset = len - 1; + } + + getArrayStart()[offset] = c; + } + return *this; +} + +UnicodeString& +UnicodeString::replace(int32_t start, + int32_t _length, + UChar32 srcChar) { + UChar buffer[U16_MAX_LENGTH]; + int32_t count = 0; + UBool isError = FALSE; + U16_APPEND(buffer, count, U16_MAX_LENGTH, srcChar, isError); + // We test isError so that the compiler does not complain that we don't. + // If isError (srcChar is not a valid code point) then count==0 which means + // we remove the source segment rather than replacing it with srcChar. + return doReplace(start, _length, buffer, 0, isError ? 0 : count); +} + +UnicodeString& +UnicodeString::append(UChar32 srcChar) { + UChar buffer[U16_MAX_LENGTH]; + int32_t _length = 0; + UBool isError = FALSE; + U16_APPEND(buffer, _length, U16_MAX_LENGTH, srcChar, isError); + // We test isError so that the compiler does not complain that we don't. + // If isError then _length==0 which turns the doAppend() into a no-op anyway. + return isError ? *this : doAppend(buffer, 0, _length); +} + +UnicodeString& +UnicodeString::doReplace( int32_t start, + int32_t length, + const UnicodeString& src, + int32_t srcStart, + int32_t srcLength) +{ + // pin the indices to legal values + src.pinIndices(srcStart, srcLength); + + // get the characters from src + // and replace the range in ourselves with them + return doReplace(start, length, src.getArrayStart(), srcStart, srcLength); +} + +UnicodeString& +UnicodeString::doReplace(int32_t start, + int32_t length, + const UChar *srcChars, + int32_t srcStart, + int32_t srcLength) +{ + if(!isWritable()) { + return *this; + } + + int32_t oldLength = this->length(); + + // optimize (read-only alias).remove(0, start) and .remove(start, end) + if((fUnion.fFields.fLengthAndFlags&kBufferIsReadonly) && srcLength == 0) { + if(start == 0) { + // remove prefix by adjusting the array pointer + pinIndex(length); + fUnion.fFields.fArray += length; + fUnion.fFields.fCapacity -= length; + setLength(oldLength - length); + return *this; + } else { + pinIndex(start); + if(length >= (oldLength - start)) { + // remove suffix by reducing the length (like truncate()) + setLength(start); + fUnion.fFields.fCapacity = start; // not NUL-terminated any more + return *this; + } + } + } + + if(start == oldLength) { + return doAppend(srcChars, srcStart, srcLength); + } + + if(srcChars == 0) { + srcLength = 0; + } else { + // Perform all remaining operations relative to srcChars + srcStart. + // From this point forward, do not use srcStart. + srcChars += srcStart; + if (srcLength < 0) { + // get the srcLength if necessary + srcLength = u_strlen(srcChars); + } + } + + // pin the indices to legal values + pinIndices(start, length); + + // Calculate the size of the string after the replace. + // Avoid int32_t overflow. + int32_t newLength = oldLength - length; + if(srcLength > (INT32_MAX - newLength)) { + setToBogus(); + return *this; + } + newLength += srcLength; + + // Check for insertion into ourself + const UChar *oldArray = getArrayStart(); + if (isBufferWritable() && + oldArray < srcChars + srcLength && + srcChars < oldArray + oldLength) { + // Copy into a new UnicodeString and start over + UnicodeString copy(srcChars, srcLength); + if (copy.isBogus()) { + setToBogus(); + return *this; + } + return doReplace(start, length, copy.getArrayStart(), 0, srcLength); + } + + // cloneArrayIfNeeded(doCopyArray=FALSE) may change fArray but will not copy the current contents; + // therefore we need to keep the current fArray + UChar oldStackBuffer[US_STACKBUF_SIZE]; + if((fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) && (newLength > US_STACKBUF_SIZE)) { + // copy the stack buffer contents because it will be overwritten with + // fUnion.fFields values + u_memcpy(oldStackBuffer, oldArray, oldLength); + oldArray = oldStackBuffer; + } + + // clone our array and allocate a bigger array if needed + int32_t *bufferToDelete = 0; + if(!cloneArrayIfNeeded(newLength, getGrowCapacity(newLength), + FALSE, &bufferToDelete) + ) { + return *this; + } + + // now do the replace + + UChar *newArray = getArrayStart(); + if(newArray != oldArray) { + // if fArray changed, then we need to copy everything except what will change + us_arrayCopy(oldArray, 0, newArray, 0, start); + us_arrayCopy(oldArray, start + length, + newArray, start + srcLength, + oldLength - (start + length)); + } else if(length != srcLength) { + // fArray did not change; copy only the portion that isn't changing, leaving a hole + us_arrayCopy(oldArray, start + length, + newArray, start + srcLength, + oldLength - (start + length)); + } + + // now fill in the hole with the new string + us_arrayCopy(srcChars, 0, newArray, start, srcLength); + + setLength(newLength); + + // delayed delete in case srcChars == fArray when we started, and + // to keep oldArray alive for the above operations + if (bufferToDelete) { + uprv_free(bufferToDelete); + } + + return *this; +} + +// Versions of doReplace() only for append() variants. +// doReplace() and doAppend() optimize for different cases. + +UnicodeString& +UnicodeString::doAppend(const UnicodeString& src, int32_t srcStart, int32_t srcLength) { + if(srcLength == 0) { + return *this; + } + + // pin the indices to legal values + src.pinIndices(srcStart, srcLength); + return doAppend(src.getArrayStart(), srcStart, srcLength); +} + +UnicodeString& +UnicodeString::doAppend(const UChar *srcChars, int32_t srcStart, int32_t srcLength) { + if(!isWritable() || srcLength == 0 || srcChars == NULL) { + return *this; + } + + // Perform all remaining operations relative to srcChars + srcStart. + // From this point forward, do not use srcStart. + srcChars += srcStart; + + if(srcLength < 0) { + // get the srcLength if necessary + if((srcLength = u_strlen(srcChars)) == 0) { + return *this; + } + } + + int32_t oldLength = length(); + int32_t newLength; + if (uprv_add32_overflow(oldLength, srcLength, &newLength)) { + setToBogus(); + return *this; + } + + // Check for append onto ourself + const UChar* oldArray = getArrayStart(); + if (isBufferWritable() && + oldArray < srcChars + srcLength && + srcChars < oldArray + oldLength) { + // Copy into a new UnicodeString and start over + UnicodeString copy(srcChars, srcLength); + if (copy.isBogus()) { + setToBogus(); + return *this; + } + return doAppend(copy.getArrayStart(), 0, srcLength); + } + + // optimize append() onto a large-enough, owned string + if((newLength <= getCapacity() && isBufferWritable()) || + cloneArrayIfNeeded(newLength, getGrowCapacity(newLength))) { + UChar *newArray = getArrayStart(); + // Do not copy characters when + // UChar *buffer=str.getAppendBuffer(...); + // is followed by + // str.append(buffer, length); + // or + // str.appendString(buffer, length) + // or similar. + if(srcChars != newArray + oldLength) { + us_arrayCopy(srcChars, 0, newArray, oldLength, srcLength); + } + setLength(newLength); + } + return *this; +} + +/** + * Replaceable API + */ +void +UnicodeString::handleReplaceBetween(int32_t start, + int32_t limit, + const UnicodeString& text) { + replaceBetween(start, limit, text); +} + +/** + * Replaceable API + */ +void +UnicodeString::copy(int32_t start, int32_t limit, int32_t dest) { + if (limit <= start) { + return; // Nothing to do; avoid bogus malloc call + } + UChar* text = (UChar*) uprv_malloc( sizeof(UChar) * (limit - start) ); + // Check to make sure text is not null. + if (text != NULL) { + extractBetween(start, limit, text, 0); + insert(dest, text, 0, limit - start); + uprv_free(text); + } +} + +/** + * Replaceable API + * + * NOTE: This is for the Replaceable class. There is no rep.cpp, + * so we implement this function here. + */ +UBool Replaceable::hasMetaData() const { + return TRUE; +} + +/** + * Replaceable API + */ +UBool UnicodeString::hasMetaData() const { + return FALSE; +} + +UnicodeString& +UnicodeString::doReverse(int32_t start, int32_t length) { + if(length <= 1 || !cloneArrayIfNeeded()) { + return *this; + } + + // pin the indices to legal values + pinIndices(start, length); + if(length <= 1) { // pinIndices() might have shrunk the length + return *this; + } + + UChar *left = getArrayStart() + start; + UChar *right = left + length - 1; // -1 for inclusive boundary (length>=2) + UChar swap; + UBool hasSupplementary = FALSE; + + // Before the loop we know left<right because length>=2. + do { + hasSupplementary |= (UBool)U16_IS_LEAD(swap = *left); + hasSupplementary |= (UBool)U16_IS_LEAD(*left++ = *right); + *right-- = swap; + } while(left < right); + // Make sure to test the middle code unit of an odd-length string. + // Redundant if the length is even. + hasSupplementary |= (UBool)U16_IS_LEAD(*left); + + /* if there are supplementary code points in the reversed range, then re-swap their surrogates */ + if(hasSupplementary) { + UChar swap2; + + left = getArrayStart() + start; + right = left + length - 1; // -1 so that we can look at *(left+1) if left<right + while(left < right) { + if(U16_IS_TRAIL(swap = *left) && U16_IS_LEAD(swap2 = *(left + 1))) { + *left++ = swap2; + *left++ = swap; + } else { + ++left; + } + } + } + + return *this; +} + +UBool +UnicodeString::padLeading(int32_t targetLength, + UChar padChar) +{ + int32_t oldLength = length(); + if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) { + return FALSE; + } else { + // move contents up by padding width + UChar *array = getArrayStart(); + int32_t start = targetLength - oldLength; + us_arrayCopy(array, 0, array, start, oldLength); + + // fill in padding character + while(--start >= 0) { + array[start] = padChar; + } + setLength(targetLength); + return TRUE; + } +} + +UBool +UnicodeString::padTrailing(int32_t targetLength, + UChar padChar) +{ + int32_t oldLength = length(); + if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) { + return FALSE; + } else { + // fill in padding character + UChar *array = getArrayStart(); + int32_t length = targetLength; + while(--length >= oldLength) { + array[length] = padChar; + } + setLength(targetLength); + return TRUE; + } +} + +//======================================== +// Hashing +//======================================== +int32_t +UnicodeString::doHashCode() const +{ + /* Delegate hash computation to uhash. This makes UnicodeString + * hashing consistent with UChar* hashing. */ + int32_t hashCode = ustr_hashUCharsN(getArrayStart(), length()); + if (hashCode == kInvalidHashCode) { + hashCode = kEmptyHashCode; + } + return hashCode; +} + +//======================================== +// External Buffer +//======================================== + +char16_t * +UnicodeString::getBuffer(int32_t minCapacity) { + if(minCapacity>=-1 && cloneArrayIfNeeded(minCapacity)) { + fUnion.fFields.fLengthAndFlags|=kOpenGetBuffer; + setZeroLength(); + return getArrayStart(); + } else { + return nullptr; + } +} + +void +UnicodeString::releaseBuffer(int32_t newLength) { + if(fUnion.fFields.fLengthAndFlags&kOpenGetBuffer && newLength>=-1) { + // set the new fLength + int32_t capacity=getCapacity(); + if(newLength==-1) { + // the new length is the string length, capped by fCapacity + const UChar *array=getArrayStart(), *p=array, *limit=array+capacity; + while(p<limit && *p!=0) { + ++p; + } + newLength=(int32_t)(p-array); + } else if(newLength>capacity) { + newLength=capacity; + } + setLength(newLength); + fUnion.fFields.fLengthAndFlags&=~kOpenGetBuffer; + } +} + +//======================================== +// Miscellaneous +//======================================== +UBool +UnicodeString::cloneArrayIfNeeded(int32_t newCapacity, + int32_t growCapacity, + UBool doCopyArray, + int32_t **pBufferToDelete, + UBool forceClone) { + // default parameters need to be static, therefore + // the defaults are -1 to have convenience defaults + if(newCapacity == -1) { + newCapacity = getCapacity(); + } + + // while a getBuffer(minCapacity) is "open", + // prevent any modifications of the string by returning FALSE here + // if the string is bogus, then only an assignment or similar can revive it + if(!isWritable()) { + return FALSE; + } + + /* + * We need to make a copy of the array if + * the buffer is read-only, or + * the buffer is refCounted (shared), and refCount>1, or + * the buffer is too small. + * Return FALSE if memory could not be allocated. + */ + if(forceClone || + fUnion.fFields.fLengthAndFlags & kBufferIsReadonly || + (fUnion.fFields.fLengthAndFlags & kRefCounted && refCount() > 1) || + newCapacity > getCapacity() + ) { + // check growCapacity for default value and use of the stack buffer + if(growCapacity < 0) { + growCapacity = newCapacity; + } else if(newCapacity <= US_STACKBUF_SIZE && growCapacity > US_STACKBUF_SIZE) { + growCapacity = US_STACKBUF_SIZE; + } + + // save old values + UChar oldStackBuffer[US_STACKBUF_SIZE]; + UChar *oldArray; + int32_t oldLength = length(); + int16_t flags = fUnion.fFields.fLengthAndFlags; + + if(flags&kUsingStackBuffer) { + U_ASSERT(!(flags&kRefCounted)); /* kRefCounted and kUsingStackBuffer are mutally exclusive */ + if(doCopyArray && growCapacity > US_STACKBUF_SIZE) { + // copy the stack buffer contents because it will be overwritten with + // fUnion.fFields values + us_arrayCopy(fUnion.fStackFields.fBuffer, 0, oldStackBuffer, 0, oldLength); + oldArray = oldStackBuffer; + } else { + oldArray = NULL; // no need to copy from the stack buffer to itself + } + } else { + oldArray = fUnion.fFields.fArray; + U_ASSERT(oldArray!=NULL); /* when stack buffer is not used, oldArray must have a non-NULL reference */ + } + + // allocate a new array + if(allocate(growCapacity) || + (newCapacity < growCapacity && allocate(newCapacity)) + ) { + if(doCopyArray) { + // copy the contents + // do not copy more than what fits - it may be smaller than before + int32_t minLength = oldLength; + newCapacity = getCapacity(); + if(newCapacity < minLength) { + minLength = newCapacity; + } + if(oldArray != NULL) { + us_arrayCopy(oldArray, 0, getArrayStart(), 0, minLength); + } + setLength(minLength); + } else { + setZeroLength(); + } + + // release the old array + if(flags & kRefCounted) { + // the array is refCounted; decrement and release if 0 + u_atomic_int32_t *pRefCount = ((u_atomic_int32_t *)oldArray - 1); + if(umtx_atomic_dec(pRefCount) == 0) { + if(pBufferToDelete == 0) { + // Note: cast to (void *) is needed with MSVC, where u_atomic_int32_t + // is defined as volatile. (Volatile has useful non-standard behavior + // with this compiler.) + uprv_free((void *)pRefCount); + } else { + // the caller requested to delete it himself + *pBufferToDelete = (int32_t *)pRefCount; + } + } + } + } else { + // not enough memory for growCapacity and not even for the smaller newCapacity + // reset the old values for setToBogus() to release the array + if(!(flags&kUsingStackBuffer)) { + fUnion.fFields.fArray = oldArray; + } + fUnion.fFields.fLengthAndFlags = flags; + setToBogus(); + return FALSE; + } + } + return TRUE; +} + +// UnicodeStringAppendable ------------------------------------------------- *** + +UnicodeStringAppendable::~UnicodeStringAppendable() {} + +UBool +UnicodeStringAppendable::appendCodeUnit(UChar c) { + return str.doAppend(&c, 0, 1).isWritable(); +} + +UBool +UnicodeStringAppendable::appendCodePoint(UChar32 c) { + UChar buffer[U16_MAX_LENGTH]; + int32_t cLength = 0; + UBool isError = FALSE; + U16_APPEND(buffer, cLength, U16_MAX_LENGTH, c, isError); + return !isError && str.doAppend(buffer, 0, cLength).isWritable(); +} + +UBool +UnicodeStringAppendable::appendString(const UChar *s, int32_t length) { + return str.doAppend(s, 0, length).isWritable(); +} + +UBool +UnicodeStringAppendable::reserveAppendCapacity(int32_t appendCapacity) { + return str.cloneArrayIfNeeded(str.length() + appendCapacity); +} + +UChar * +UnicodeStringAppendable::getAppendBuffer(int32_t minCapacity, + int32_t desiredCapacityHint, + UChar *scratch, int32_t scratchCapacity, + int32_t *resultCapacity) { + if(minCapacity < 1 || scratchCapacity < minCapacity) { + *resultCapacity = 0; + return NULL; + } + int32_t oldLength = str.length(); + if(minCapacity <= (kMaxCapacity - oldLength) && + desiredCapacityHint <= (kMaxCapacity - oldLength) && + str.cloneArrayIfNeeded(oldLength + minCapacity, oldLength + desiredCapacityHint)) { + *resultCapacity = str.getCapacity() - oldLength; + return str.getArrayStart() + oldLength; + } + *resultCapacity = scratchCapacity; + return scratch; +} + +U_NAMESPACE_END + +U_NAMESPACE_USE + +U_CAPI int32_t U_EXPORT2 +uhash_hashUnicodeString(const UElement key) { + const UnicodeString *str = (const UnicodeString*) key.pointer; + return (str == NULL) ? 0 : str->hashCode(); +} + +// Moved here from uhash_us.cpp so that using a UVector of UnicodeString* +// does not depend on hashtable code. +U_CAPI UBool U_EXPORT2 +uhash_compareUnicodeString(const UElement key1, const UElement key2) { + const UnicodeString *str1 = (const UnicodeString*) key1.pointer; + const UnicodeString *str2 = (const UnicodeString*) key2.pointer; + if (str1 == str2) { + return TRUE; + } + if (str1 == NULL || str2 == NULL) { + return FALSE; + } + return *str1 == *str2; +} + +#ifdef U_STATIC_IMPLEMENTATION +/* +This should never be called. It is defined here to make sure that the +virtual vector deleting destructor is defined within unistr.cpp. +The vector deleting destructor is already a part of UObject, +but defining it here makes sure that it is included with this object file. +This makes sure that static library dependencies are kept to a minimum. +*/ +static void uprv_UnicodeStringDummy(void) { + delete [] (new UnicodeString[2]); +} +#endif diff --git a/thirdparty/icu4c/common/unistr_case.cpp b/thirdparty/icu4c/common/unistr_case.cpp new file mode 100644 index 0000000000..2138d60c01 --- /dev/null +++ b/thirdparty/icu4c/common/unistr_case.cpp @@ -0,0 +1,250 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 1999-2014, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: unistr_case.cpp +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:2 +* +* created on: 2004aug19 +* created by: Markus W. Scherer +* +* Case-mapping functions moved here from unistr.cpp +*/ + +#include "unicode/utypes.h" +#include "unicode/brkiter.h" +#include "unicode/casemap.h" +#include "unicode/edits.h" +#include "unicode/putil.h" +#include "cstring.h" +#include "cmemory.h" +#include "unicode/ustring.h" +#include "unicode/unistr.h" +#include "unicode/uchar.h" +#include "uassert.h" +#include "ucasemap_imp.h" +#include "uelement.h" + +U_NAMESPACE_BEGIN + +//======================================== +// Read-only implementation +//======================================== + +int8_t +UnicodeString::doCaseCompare(int32_t start, + int32_t length, + const UChar *srcChars, + int32_t srcStart, + int32_t srcLength, + uint32_t options) const +{ + // compare illegal string values + // treat const UChar *srcChars==NULL as an empty string + if(isBogus()) { + return -1; + } + + // pin indices to legal values + pinIndices(start, length); + + if(srcChars == NULL) { + srcStart = srcLength = 0; + } + + // get the correct pointer + const UChar *chars = getArrayStart(); + + chars += start; + if(srcStart!=0) { + srcChars += srcStart; + } + + if(chars != srcChars) { + UErrorCode errorCode=U_ZERO_ERROR; + int32_t result=u_strcmpFold(chars, length, srcChars, srcLength, + options|U_COMPARE_IGNORE_CASE, &errorCode); + if(result!=0) { + return (int8_t)(result >> 24 | 1); + } + } else { + // get the srcLength if necessary + if(srcLength < 0) { + srcLength = u_strlen(srcChars + srcStart); + } + if(length != srcLength) { + return (int8_t)((length - srcLength) >> 24 | 1); + } + } + return 0; +} + +//======================================== +// Write implementation +//======================================== + +UnicodeString & +UnicodeString::caseMap(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM + UStringCaseMapper *stringCaseMapper) { + if(isEmpty() || !isWritable()) { + // nothing to do + return *this; + } + + UChar oldBuffer[2 * US_STACKBUF_SIZE]; + UChar *oldArray; + int32_t oldLength = length(); + int32_t newLength; + UBool writable = isBufferWritable(); + UErrorCode errorCode = U_ZERO_ERROR; + +#if !UCONFIG_NO_BREAK_ITERATION + // Read-only alias to the original string contents for the titlecasing BreakIterator. + // We cannot set the iterator simply to *this because *this is being modified. + UnicodeString oldString; +#endif + + // Try to avoid heap-allocating a new character array for this string. + if (writable ? oldLength <= UPRV_LENGTHOF(oldBuffer) : oldLength < US_STACKBUF_SIZE) { + // Short string: Copy the contents into a temporary buffer and + // case-map back into the current array, or into the stack buffer. + UChar *buffer = getArrayStart(); + int32_t capacity; + oldArray = oldBuffer; + u_memcpy(oldBuffer, buffer, oldLength); + if (writable) { + capacity = getCapacity(); + } else { + // Switch from the read-only alias or shared heap buffer to the stack buffer. + if (!cloneArrayIfNeeded(US_STACKBUF_SIZE, US_STACKBUF_SIZE, /* doCopyArray= */ FALSE)) { + return *this; + } + U_ASSERT(fUnion.fFields.fLengthAndFlags & kUsingStackBuffer); + buffer = fUnion.fStackFields.fBuffer; + capacity = US_STACKBUF_SIZE; + } +#if !UCONFIG_NO_BREAK_ITERATION + if (iter != nullptr) { + oldString.setTo(FALSE, oldArray, oldLength); + iter->setText(oldString); + } +#endif + newLength = stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR + buffer, capacity, + oldArray, oldLength, NULL, errorCode); + if (U_SUCCESS(errorCode)) { + setLength(newLength); + return *this; + } else if (errorCode == U_BUFFER_OVERFLOW_ERROR) { + // common overflow handling below + } else { + setToBogus(); + return *this; + } + } else { + // Longer string or read-only buffer: + // Collect only changes and then apply them to this string. + // Case mapping often changes only small parts of a string, + // and often does not change its length. + oldArray = getArrayStart(); + Edits edits; + UChar replacementChars[200]; +#if !UCONFIG_NO_BREAK_ITERATION + if (iter != nullptr) { + oldString.setTo(FALSE, oldArray, oldLength); + iter->setText(oldString); + } +#endif + stringCaseMapper(caseLocale, options | U_OMIT_UNCHANGED_TEXT, UCASEMAP_BREAK_ITERATOR + replacementChars, UPRV_LENGTHOF(replacementChars), + oldArray, oldLength, &edits, errorCode); + if (U_SUCCESS(errorCode)) { + // Grow the buffer at most once, not for multiple doReplace() calls. + newLength = oldLength + edits.lengthDelta(); + if (newLength > oldLength && !cloneArrayIfNeeded(newLength, newLength)) { + return *this; + } + for (Edits::Iterator ei = edits.getCoarseChangesIterator(); ei.next(errorCode);) { + doReplace(ei.destinationIndex(), ei.oldLength(), + replacementChars, ei.replacementIndex(), ei.newLength()); + } + if (U_FAILURE(errorCode)) { + setToBogus(); + } + return *this; + } else if (errorCode == U_BUFFER_OVERFLOW_ERROR) { + // common overflow handling below + newLength = oldLength + edits.lengthDelta(); + } else { + setToBogus(); + return *this; + } + } + + // Handle buffer overflow, newLength is known. + // We need to allocate a new buffer for the internal string case mapping function. + // This is very similar to how doReplace() keeps the old array pointer + // and deletes the old array itself after it is done. + // In addition, we are forcing cloneArrayIfNeeded() to always allocate a new array. + int32_t *bufferToDelete = 0; + if (!cloneArrayIfNeeded(newLength, newLength, FALSE, &bufferToDelete, TRUE)) { + return *this; + } + errorCode = U_ZERO_ERROR; + // No need to iter->setText() again: The case mapper restarts via iter->first(). + newLength = stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR + getArrayStart(), getCapacity(), + oldArray, oldLength, NULL, errorCode); + if (bufferToDelete) { + uprv_free(bufferToDelete); + } + if (U_SUCCESS(errorCode)) { + setLength(newLength); + } else { + setToBogus(); + } + return *this; +} + +UnicodeString & +UnicodeString::foldCase(uint32_t options) { + return caseMap(UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalFold); +} + +U_NAMESPACE_END + +// Defined here to reduce dependencies on break iterator +U_CAPI int32_t U_EXPORT2 +uhash_hashCaselessUnicodeString(const UElement key) { + U_NAMESPACE_USE + const UnicodeString *str = (const UnicodeString*) key.pointer; + if (str == NULL) { + return 0; + } + // Inefficient; a better way would be to have a hash function in + // UnicodeString that does case folding on the fly. + UnicodeString copy(*str); + return copy.foldCase().hashCode(); +} + +// Defined here to reduce dependencies on break iterator +U_CAPI UBool U_EXPORT2 +uhash_compareCaselessUnicodeString(const UElement key1, const UElement key2) { + U_NAMESPACE_USE + const UnicodeString *str1 = (const UnicodeString*) key1.pointer; + const UnicodeString *str2 = (const UnicodeString*) key2.pointer; + if (str1 == str2) { + return TRUE; + } + if (str1 == NULL || str2 == NULL) { + return FALSE; + } + return str1->caseCompare(*str2, U_FOLD_CASE_DEFAULT) == 0; +} diff --git a/thirdparty/icu4c/common/unistr_case_locale.cpp b/thirdparty/icu4c/common/unistr_case_locale.cpp new file mode 100644 index 0000000000..f0f3048d06 --- /dev/null +++ b/thirdparty/icu4c/common/unistr_case_locale.cpp @@ -0,0 +1,56 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2011, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* file name: unistr_case_locale.cpp +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2011may31 +* created by: Markus W. Scherer +* +* Locale-sensitive case mapping functions (ones that call uloc_getDefault()) +* were moved here to break dependency cycles among parts of the common library. +*/ + +#include "unicode/utypes.h" +#include "unicode/locid.h" +#include "unicode/ucasemap.h" +#include "unicode/unistr.h" +#include "ucasemap_imp.h" + +U_NAMESPACE_BEGIN + +//======================================== +// Write implementation +//======================================== + +UnicodeString & +UnicodeString::toLower() { + return caseMap(ustrcase_getCaseLocale(NULL), 0, + UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalToLower); +} + +UnicodeString & +UnicodeString::toLower(const Locale &locale) { + return caseMap(ustrcase_getCaseLocale(locale.getBaseName()), 0, + UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalToLower); +} + +UnicodeString & +UnicodeString::toUpper() { + return caseMap(ustrcase_getCaseLocale(NULL), 0, + UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalToUpper); +} + +UnicodeString & +UnicodeString::toUpper(const Locale &locale) { + return caseMap(ustrcase_getCaseLocale(locale.getBaseName()), 0, + UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalToUpper); +} + +U_NAMESPACE_END diff --git a/thirdparty/icu4c/common/unistr_cnv.cpp b/thirdparty/icu4c/common/unistr_cnv.cpp new file mode 100644 index 0000000000..64d3c16801 --- /dev/null +++ b/thirdparty/icu4c/common/unistr_cnv.cpp @@ -0,0 +1,417 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 1999-2014, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: unistr_cnv.cpp +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:2 +* +* created on: 2004aug19 +* created by: Markus W. Scherer +* +* Character conversion functions moved here from unistr.cpp +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_CONVERSION + +#include "unicode/putil.h" +#include "cstring.h" +#include "cmemory.h" +#include "unicode/ustring.h" +#include "unicode/unistr.h" +#include "unicode/ucnv.h" +#include "ucnv_imp.h" +#include "putilimp.h" +#include "ustr_cnv.h" +#include "ustr_imp.h" + +U_NAMESPACE_BEGIN + +//======================================== +// Constructors +//======================================== + +#if !U_CHARSET_IS_UTF8 + +UnicodeString::UnicodeString(const char *codepageData) { + fUnion.fFields.fLengthAndFlags = kShortString; + if(codepageData != 0) { + doCodepageCreate(codepageData, (int32_t)uprv_strlen(codepageData), 0); + } +} + +UnicodeString::UnicodeString(const char *codepageData, + int32_t dataLength) { + fUnion.fFields.fLengthAndFlags = kShortString; + if(codepageData != 0) { + doCodepageCreate(codepageData, dataLength, 0); + } +} + +// else see unistr.cpp +#endif + +UnicodeString::UnicodeString(const char *codepageData, + const char *codepage) { + fUnion.fFields.fLengthAndFlags = kShortString; + if(codepageData != 0) { + doCodepageCreate(codepageData, (int32_t)uprv_strlen(codepageData), codepage); + } +} + +UnicodeString::UnicodeString(const char *codepageData, + int32_t dataLength, + const char *codepage) { + fUnion.fFields.fLengthAndFlags = kShortString; + if(codepageData != 0) { + doCodepageCreate(codepageData, dataLength, codepage); + } +} + +UnicodeString::UnicodeString(const char *src, int32_t srcLength, + UConverter *cnv, + UErrorCode &errorCode) { + fUnion.fFields.fLengthAndFlags = kShortString; + if(U_SUCCESS(errorCode)) { + // check arguments + if(src==NULL) { + // treat as an empty string, do nothing more + } else if(srcLength<-1) { + errorCode=U_ILLEGAL_ARGUMENT_ERROR; + } else { + // get input length + if(srcLength==-1) { + srcLength=(int32_t)uprv_strlen(src); + } + if(srcLength>0) { + if(cnv!=0) { + // use the provided converter + ucnv_resetToUnicode(cnv); + doCodepageCreate(src, srcLength, cnv, errorCode); + } else { + // use the default converter + cnv=u_getDefaultConverter(&errorCode); + doCodepageCreate(src, srcLength, cnv, errorCode); + u_releaseDefaultConverter(cnv); + } + } + } + + if(U_FAILURE(errorCode)) { + setToBogus(); + } + } +} + +//======================================== +// Codeset conversion +//======================================== + +#if !U_CHARSET_IS_UTF8 + +int32_t +UnicodeString::extract(int32_t start, + int32_t length, + char *target, + uint32_t dstSize) const { + return extract(start, length, target, dstSize, 0); +} + +// else see unistr.cpp +#endif + +int32_t +UnicodeString::extract(int32_t start, + int32_t length, + char *target, + uint32_t dstSize, + const char *codepage) const +{ + // if the arguments are illegal, then do nothing + if(/*dstSize < 0 || */(dstSize > 0 && target == 0)) { + return 0; + } + + // pin the indices to legal values + pinIndices(start, length); + + // We need to cast dstSize to int32_t for all subsequent code. + // I don't know why the API was defined with uint32_t but we are stuck with it. + // Also, dstSize==0xffffffff means "unlimited" but if we use target+dstSize + // as a limit in some functions, it may wrap around and yield a pointer + // that compares less-than target. + int32_t capacity; + if(dstSize < 0x7fffffff) { + // Assume that the capacity is real and a limit pointer won't wrap around. + capacity = (int32_t)dstSize; + } else { + // Pin the capacity so that a limit pointer does not wrap around. + char *targetLimit = (char *)U_MAX_PTR(target); + // U_MAX_PTR(target) returns a targetLimit that is at most 0x7fffffff + // greater than target and does not wrap around the top of the address space. + capacity = (int32_t)(targetLimit - target); + } + + // create the converter + UConverter *converter; + UErrorCode status = U_ZERO_ERROR; + + // just write the NUL if the string length is 0 + if(length == 0) { + return u_terminateChars(target, capacity, 0, &status); + } + + // if the codepage is the default, use our cache + // if it is an empty string, then use the "invariant character" conversion + if (codepage == 0) { + const char *defaultName = ucnv_getDefaultName(); + if(UCNV_FAST_IS_UTF8(defaultName)) { + return toUTF8(start, length, target, capacity); + } + converter = u_getDefaultConverter(&status); + } else if (*codepage == 0) { + // use the "invariant characters" conversion + int32_t destLength; + if(length <= capacity) { + destLength = length; + } else { + destLength = capacity; + } + u_UCharsToChars(getArrayStart() + start, target, destLength); + return u_terminateChars(target, capacity, length, &status); + } else { + converter = ucnv_open(codepage, &status); + } + + length = doExtract(start, length, target, capacity, converter, status); + + // close the converter + if (codepage == 0) { + u_releaseDefaultConverter(converter); + } else { + ucnv_close(converter); + } + + return length; +} + +int32_t +UnicodeString::extract(char *dest, int32_t destCapacity, + UConverter *cnv, + UErrorCode &errorCode) const +{ + if(U_FAILURE(errorCode)) { + return 0; + } + + if(isBogus() || destCapacity<0 || (destCapacity>0 && dest==0)) { + errorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + // nothing to do? + if(isEmpty()) { + return u_terminateChars(dest, destCapacity, 0, &errorCode); + } + + // get the converter + UBool isDefaultConverter; + if(cnv==0) { + isDefaultConverter=TRUE; + cnv=u_getDefaultConverter(&errorCode); + if(U_FAILURE(errorCode)) { + return 0; + } + } else { + isDefaultConverter=FALSE; + ucnv_resetFromUnicode(cnv); + } + + // convert + int32_t len=doExtract(0, length(), dest, destCapacity, cnv, errorCode); + + // release the converter + if(isDefaultConverter) { + u_releaseDefaultConverter(cnv); + } + + return len; +} + +int32_t +UnicodeString::doExtract(int32_t start, int32_t length, + char *dest, int32_t destCapacity, + UConverter *cnv, + UErrorCode &errorCode) const +{ + if(U_FAILURE(errorCode)) { + if(destCapacity!=0) { + *dest=0; + } + return 0; + } + + const UChar *src=getArrayStart()+start, *srcLimit=src+length; + char *originalDest=dest; + const char *destLimit; + + if(destCapacity==0) { + destLimit=dest=0; + } else if(destCapacity==-1) { + // Pin the limit to U_MAX_PTR if the "magic" destCapacity is used. + destLimit=(char*)U_MAX_PTR(dest); + // for NUL-termination, translate into highest int32_t + destCapacity=0x7fffffff; + } else { + destLimit=dest+destCapacity; + } + + // perform the conversion + ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, &errorCode); + length=(int32_t)(dest-originalDest); + + // if an overflow occurs, then get the preflighting length + if(errorCode==U_BUFFER_OVERFLOW_ERROR) { + char buffer[1024]; + + destLimit=buffer+sizeof(buffer); + do { + dest=buffer; + errorCode=U_ZERO_ERROR; + ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, &errorCode); + length+=(int32_t)(dest-buffer); + } while(errorCode==U_BUFFER_OVERFLOW_ERROR); + } + + return u_terminateChars(originalDest, destCapacity, length, &errorCode); +} + +void +UnicodeString::doCodepageCreate(const char *codepageData, + int32_t dataLength, + const char *codepage) +{ + // if there's nothing to convert, do nothing + if(codepageData == 0 || dataLength == 0 || dataLength < -1) { + return; + } + if(dataLength == -1) { + dataLength = (int32_t)uprv_strlen(codepageData); + } + + UErrorCode status = U_ZERO_ERROR; + + // create the converter + // if the codepage is the default, use our cache + // if it is an empty string, then use the "invariant character" conversion + UConverter *converter; + if (codepage == 0) { + const char *defaultName = ucnv_getDefaultName(); + if(UCNV_FAST_IS_UTF8(defaultName)) { + setToUTF8(StringPiece(codepageData, dataLength)); + return; + } + converter = u_getDefaultConverter(&status); + } else if(*codepage == 0) { + // use the "invariant characters" conversion + if(cloneArrayIfNeeded(dataLength, dataLength, FALSE)) { + u_charsToUChars(codepageData, getArrayStart(), dataLength); + setLength(dataLength); + } else { + setToBogus(); + } + return; + } else { + converter = ucnv_open(codepage, &status); + } + + // if we failed, set the appropriate flags and return + if(U_FAILURE(status)) { + setToBogus(); + return; + } + + // perform the conversion + doCodepageCreate(codepageData, dataLength, converter, status); + if(U_FAILURE(status)) { + setToBogus(); + } + + // close the converter + if(codepage == 0) { + u_releaseDefaultConverter(converter); + } else { + ucnv_close(converter); + } +} + +void +UnicodeString::doCodepageCreate(const char *codepageData, + int32_t dataLength, + UConverter *converter, + UErrorCode &status) +{ + if(U_FAILURE(status)) { + return; + } + + // set up the conversion parameters + const char *mySource = codepageData; + const char *mySourceEnd = mySource + dataLength; + UChar *array, *myTarget; + + // estimate the size needed: + int32_t arraySize; + if(dataLength <= US_STACKBUF_SIZE) { + // try to use the stack buffer + arraySize = US_STACKBUF_SIZE; + } else { + // 1.25 UChar's per source byte should cover most cases + arraySize = dataLength + (dataLength >> 2); + } + + // we do not care about the current contents + UBool doCopyArray = FALSE; + for(;;) { + if(!cloneArrayIfNeeded(arraySize, arraySize, doCopyArray)) { + setToBogus(); + break; + } + + // perform the conversion + array = getArrayStart(); + myTarget = array + length(); + ucnv_toUnicode(converter, &myTarget, array + getCapacity(), + &mySource, mySourceEnd, 0, TRUE, &status); + + // update the conversion parameters + setLength((int32_t)(myTarget - array)); + + // allocate more space and copy data, if needed + if(status == U_BUFFER_OVERFLOW_ERROR) { + // reset the error code + status = U_ZERO_ERROR; + + // keep the previous conversion results + doCopyArray = TRUE; + + // estimate the new size needed, larger than before + // try 2 UChar's per remaining source byte + arraySize = (int32_t)(length() + 2 * (mySourceEnd - mySource)); + } else { + break; + } + } +} + +U_NAMESPACE_END + +#endif diff --git a/thirdparty/icu4c/common/unistr_props.cpp b/thirdparty/icu4c/common/unistr_props.cpp new file mode 100644 index 0000000000..4006475790 --- /dev/null +++ b/thirdparty/icu4c/common/unistr_props.cpp @@ -0,0 +1,77 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 1999-2011, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: unistr_props.cpp +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:2 +* +* created on: 2004aug25 +* created by: Markus W. Scherer +* +* Character property dependent functions moved here from unistr.cpp +*/ + +#include "unicode/utypes.h" +#include "unicode/uchar.h" +#include "unicode/unistr.h" +#include "unicode/utf16.h" + +U_NAMESPACE_BEGIN + +UnicodeString& +UnicodeString::trim() +{ + if(isBogus()) { + return *this; + } + + UChar *array = getArrayStart(); + UChar32 c; + int32_t oldLength = this->length(); + int32_t i = oldLength, length; + + // first cut off trailing white space + for(;;) { + length = i; + if(i <= 0) { + break; + } + U16_PREV(array, 0, i, c); + if(!(c == 0x20 || u_isWhitespace(c))) { + break; + } + } + if(length < oldLength) { + setLength(length); + } + + // find leading white space + int32_t start; + i = 0; + for(;;) { + start = i; + if(i >= length) { + break; + } + U16_NEXT(array, i, length, c); + if(!(c == 0x20 || u_isWhitespace(c))) { + break; + } + } + + // move string forward over leading white space + if(start > 0) { + doReplace(0, start, 0, 0, 0); + } + + return *this; +} + +U_NAMESPACE_END diff --git a/thirdparty/icu4c/common/unistr_titlecase_brkiter.cpp b/thirdparty/icu4c/common/unistr_titlecase_brkiter.cpp new file mode 100644 index 0000000000..4969884b0d --- /dev/null +++ b/thirdparty/icu4c/common/unistr_titlecase_brkiter.cpp @@ -0,0 +1,57 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2011, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* file name: unistr_titlecase_brkiter.cpp +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:2 +* +* created on: 2011may30 +* created by: Markus W. Scherer +* +* Titlecasing functions that are based on BreakIterator +* were moved here to break dependency cycles among parts of the common library. +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_BREAK_ITERATION + +#include "unicode/brkiter.h" +#include "unicode/locid.h" +#include "unicode/ucasemap.h" +#include "unicode/unistr.h" +#include "ucasemap_imp.h" + +U_NAMESPACE_BEGIN + +UnicodeString & +UnicodeString::toTitle(BreakIterator *iter) { + return toTitle(iter, Locale::getDefault(), 0); +} + +UnicodeString & +UnicodeString::toTitle(BreakIterator *iter, const Locale &locale) { + return toTitle(iter, locale, 0); +} + +UnicodeString & +UnicodeString::toTitle(BreakIterator *iter, const Locale &locale, uint32_t options) { + LocalPointer<BreakIterator> ownedIter; + UErrorCode errorCode = U_ZERO_ERROR; + iter = ustrcase_getTitleBreakIterator(&locale, "", options, iter, ownedIter, errorCode); + if (iter == nullptr) { + setToBogus(); + return *this; + } + caseMap(ustrcase_getCaseLocale(locale.getBaseName()), options, iter, ustrcase_internalToTitle); + return *this; +} + +U_NAMESPACE_END + +#endif // !UCONFIG_NO_BREAK_ITERATION diff --git a/thirdparty/icu4c/common/unistrappender.h b/thirdparty/icu4c/common/unistrappender.h new file mode 100644 index 0000000000..75fcb9e775 --- /dev/null +++ b/thirdparty/icu4c/common/unistrappender.h @@ -0,0 +1,90 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* Copyright (C) 2015, International Business Machines Corporation and +* others. All Rights Reserved. +****************************************************************************** +* +* File unistrappender.h +****************************************************************************** +*/ + +#ifndef __UNISTRAPPENDER_H__ +#define __UNISTRAPPENDER_H__ + +#include "unicode/unistr.h" +#include "unicode/uobject.h" +#include "unicode/utf16.h" +#include "unicode/utypes.h" +#include "cmemory.h" + +U_NAMESPACE_BEGIN + +/** + * An optimization for the slowness of calling UnicodeString::append() + * one character at a time in a loop. It stores appends in a buffer while + * never actually calling append on the unicode string unless the buffer + * fills up or is flushed. + * + * proper usage: + * { + * UnicodeStringAppender appender(astring); + * for (int32_t i = 0; i < 100; ++i) { + * appender.append((UChar) i); + * } + * // appender flushed automatically when it goes out of scope. + * } + */ +class UnicodeStringAppender : public UMemory { +public: + + /** + * dest is the UnicodeString being appended to. It must always + * exist while this instance exists. + */ + UnicodeStringAppender(UnicodeString &dest) : fDest(&dest), fIdx(0) { } + + inline void append(UChar x) { + if (fIdx == UPRV_LENGTHOF(fBuffer)) { + fDest->append(fBuffer, 0, fIdx); + fIdx = 0; + } + fBuffer[fIdx++] = x; + } + + inline void append(UChar32 x) { + if (fIdx >= UPRV_LENGTHOF(fBuffer) - 1) { + fDest->append(fBuffer, 0, fIdx); + fIdx = 0; + } + U16_APPEND_UNSAFE(fBuffer, fIdx, x); + } + + /** + * Ensures that all appended characters have been written out to dest. + */ + inline void flush() { + if (fIdx) { + fDest->append(fBuffer, 0, fIdx); + } + fIdx = 0; + } + + /** + * flush the buffer when we go out of scope. + */ + ~UnicodeStringAppender() { + flush(); + } +private: + UnicodeString *fDest; + int32_t fIdx; + UChar fBuffer[32]; + UnicodeStringAppender(const UnicodeStringAppender &other); + UnicodeStringAppender &operator=(const UnicodeStringAppender &other); +}; + +U_NAMESPACE_END + +#endif diff --git a/thirdparty/icu4c/common/unorm.cpp b/thirdparty/icu4c/common/unorm.cpp new file mode 100644 index 0000000000..2d9f46052f --- /dev/null +++ b/thirdparty/icu4c/common/unorm.cpp @@ -0,0 +1,280 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* Copyright (c) 1996-2014, International Business Machines +* Corporation and others. All Rights Reserved. +****************************************************************************** +* File unorm.cpp +* +* Created by: Vladimir Weinstein 12052000 +* +* Modification history : +* +* Date Name Description +* 02/01/01 synwee Added normalization quickcheck enum and method. +* 02/12/01 synwee Commented out quickcheck util api has been approved +* Added private method for doing FCD checks +* 02/23/01 synwee Modified quickcheck and checkFCE to run through +* string for codepoints < 0x300 for the normalization +* mode NFC. +* 05/25/01+ Markus Scherer total rewrite, implement all normalization here +* instead of just wrappers around normlzr.cpp, +* load unorm.dat, support Unicode 3.1 with +* supplementary code points, etc. +* 2009-nov..2010-jan Markus Scherer total rewrite, new Normalizer2 API & code +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_NORMALIZATION + +#include "unicode/udata.h" +#include "unicode/ustring.h" +#include "unicode/uiter.h" +#include "unicode/unorm.h" +#include "unicode/unorm2.h" +#include "normalizer2impl.h" +#include "unormimp.h" +#include "uprops.h" +#include "ustr_imp.h" + +U_NAMESPACE_USE + +/* quick check functions ---------------------------------------------------- */ + +U_CAPI UNormalizationCheckResult U_EXPORT2 +unorm_quickCheck(const UChar *src, + int32_t srcLength, + UNormalizationMode mode, + UErrorCode *pErrorCode) { + const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, *pErrorCode); + return unorm2_quickCheck((const UNormalizer2 *)n2, src, srcLength, pErrorCode); +} + +U_CAPI UNormalizationCheckResult U_EXPORT2 +unorm_quickCheckWithOptions(const UChar *src, int32_t srcLength, + UNormalizationMode mode, int32_t options, + UErrorCode *pErrorCode) { + const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, *pErrorCode); + if(options&UNORM_UNICODE_3_2) { + FilteredNormalizer2 fn2(*n2, *uniset_getUnicode32Instance(*pErrorCode)); + return unorm2_quickCheck( + reinterpret_cast<const UNormalizer2 *>(static_cast<Normalizer2 *>(&fn2)), + src, srcLength, pErrorCode); + } else { + return unorm2_quickCheck((const UNormalizer2 *)n2, src, srcLength, pErrorCode); + } +} + +U_CAPI UBool U_EXPORT2 +unorm_isNormalized(const UChar *src, int32_t srcLength, + UNormalizationMode mode, + UErrorCode *pErrorCode) { + const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, *pErrorCode); + return unorm2_isNormalized((const UNormalizer2 *)n2, src, srcLength, pErrorCode); +} + +U_CAPI UBool U_EXPORT2 +unorm_isNormalizedWithOptions(const UChar *src, int32_t srcLength, + UNormalizationMode mode, int32_t options, + UErrorCode *pErrorCode) { + const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, *pErrorCode); + if(options&UNORM_UNICODE_3_2) { + FilteredNormalizer2 fn2(*n2, *uniset_getUnicode32Instance(*pErrorCode)); + return unorm2_isNormalized( + reinterpret_cast<const UNormalizer2 *>(static_cast<Normalizer2 *>(&fn2)), + src, srcLength, pErrorCode); + } else { + return unorm2_isNormalized((const UNormalizer2 *)n2, src, srcLength, pErrorCode); + } +} + +/* normalize() API ---------------------------------------------------------- */ + +/** Public API for normalizing. */ +U_CAPI int32_t U_EXPORT2 +unorm_normalize(const UChar *src, int32_t srcLength, + UNormalizationMode mode, int32_t options, + UChar *dest, int32_t destCapacity, + UErrorCode *pErrorCode) { + const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, *pErrorCode); + if(options&UNORM_UNICODE_3_2) { + FilteredNormalizer2 fn2(*n2, *uniset_getUnicode32Instance(*pErrorCode)); + return unorm2_normalize( + reinterpret_cast<const UNormalizer2 *>(static_cast<Normalizer2 *>(&fn2)), + src, srcLength, dest, destCapacity, pErrorCode); + } else { + return unorm2_normalize((const UNormalizer2 *)n2, + src, srcLength, dest, destCapacity, pErrorCode); + } +} + + +/* iteration functions ------------------------------------------------------ */ + +static int32_t +_iterate(UCharIterator *src, UBool forward, + UChar *dest, int32_t destCapacity, + const Normalizer2 *n2, + UBool doNormalize, UBool *pNeededToNormalize, + UErrorCode *pErrorCode) { + if(U_FAILURE(*pErrorCode)) { + return 0; + } + if(destCapacity<0 || (dest==NULL && destCapacity>0) || src==NULL) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + if(pNeededToNormalize!=NULL) { + *pNeededToNormalize=FALSE; + } + if(!(forward ? src->hasNext(src) : src->hasPrevious(src))) { + return u_terminateUChars(dest, destCapacity, 0, pErrorCode); + } + + UnicodeString buffer; + UChar32 c; + if(forward) { + /* get one character and ignore its properties */ + buffer.append(uiter_next32(src)); + /* get all following characters until we see a boundary */ + while((c=uiter_next32(src))>=0) { + if(n2->hasBoundaryBefore(c)) { + /* back out the latest movement to stop at the boundary */ + src->move(src, -U16_LENGTH(c), UITER_CURRENT); + break; + } else { + buffer.append(c); + } + } + } else { + while((c=uiter_previous32(src))>=0) { + /* always write this character to the front of the buffer */ + buffer.insert(0, c); + /* stop if this just-copied character is a boundary */ + if(n2->hasBoundaryBefore(c)) { + break; + } + } + } + + UnicodeString destString(dest, 0, destCapacity); + if(buffer.length()>0 && doNormalize) { + n2->normalize(buffer, destString, *pErrorCode).extract(dest, destCapacity, *pErrorCode); + if(pNeededToNormalize!=NULL && U_SUCCESS(*pErrorCode)) { + *pNeededToNormalize= destString!=buffer; + } + return destString.length(); + } else { + /* just copy the source characters */ + return buffer.extract(dest, destCapacity, *pErrorCode); + } +} + +static int32_t +unorm_iterate(UCharIterator *src, UBool forward, + UChar *dest, int32_t destCapacity, + UNormalizationMode mode, int32_t options, + UBool doNormalize, UBool *pNeededToNormalize, + UErrorCode *pErrorCode) { + const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, *pErrorCode); + if(options&UNORM_UNICODE_3_2) { + const UnicodeSet *uni32 = uniset_getUnicode32Instance(*pErrorCode); + if(U_FAILURE(*pErrorCode)) { + return 0; + } + FilteredNormalizer2 fn2(*n2, *uni32); + return _iterate(src, forward, dest, destCapacity, + &fn2, doNormalize, pNeededToNormalize, pErrorCode); + } + return _iterate(src, forward, dest, destCapacity, + n2, doNormalize, pNeededToNormalize, pErrorCode); +} + +U_CAPI int32_t U_EXPORT2 +unorm_previous(UCharIterator *src, + UChar *dest, int32_t destCapacity, + UNormalizationMode mode, int32_t options, + UBool doNormalize, UBool *pNeededToNormalize, + UErrorCode *pErrorCode) { + return unorm_iterate(src, FALSE, + dest, destCapacity, + mode, options, + doNormalize, pNeededToNormalize, + pErrorCode); +} + +U_CAPI int32_t U_EXPORT2 +unorm_next(UCharIterator *src, + UChar *dest, int32_t destCapacity, + UNormalizationMode mode, int32_t options, + UBool doNormalize, UBool *pNeededToNormalize, + UErrorCode *pErrorCode) { + return unorm_iterate(src, TRUE, + dest, destCapacity, + mode, options, + doNormalize, pNeededToNormalize, + pErrorCode); +} + +/* Concatenation of normalized strings -------------------------------------- */ + +static int32_t +_concatenate(const UChar *left, int32_t leftLength, + const UChar *right, int32_t rightLength, + UChar *dest, int32_t destCapacity, + const Normalizer2 *n2, + UErrorCode *pErrorCode) { + if(U_FAILURE(*pErrorCode)) { + return 0; + } + if(destCapacity<0 || (dest==NULL && destCapacity>0) || + left==NULL || leftLength<-1 || right==NULL || rightLength<-1) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + /* check for overlapping right and destination */ + if( dest!=NULL && + ((right>=dest && right<(dest+destCapacity)) || + (rightLength>0 && dest>=right && dest<(right+rightLength))) + ) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + /* allow left==dest */ + UnicodeString destString; + if(left==dest) { + destString.setTo(dest, leftLength, destCapacity); + } else { + destString.setTo(dest, 0, destCapacity); + destString.append(left, leftLength); + } + return n2->append(destString, UnicodeString(rightLength<0, right, rightLength), *pErrorCode). + extract(dest, destCapacity, *pErrorCode); +} + +U_CAPI int32_t U_EXPORT2 +unorm_concatenate(const UChar *left, int32_t leftLength, + const UChar *right, int32_t rightLength, + UChar *dest, int32_t destCapacity, + UNormalizationMode mode, int32_t options, + UErrorCode *pErrorCode) { + const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, *pErrorCode); + if(options&UNORM_UNICODE_3_2) { + const UnicodeSet *uni32 = uniset_getUnicode32Instance(*pErrorCode); + if(U_FAILURE(*pErrorCode)) { + return 0; + } + FilteredNormalizer2 fn2(*n2, *uni32); + return _concatenate(left, leftLength, right, rightLength, + dest, destCapacity, &fn2, pErrorCode); + } + return _concatenate(left, leftLength, right, rightLength, + dest, destCapacity, n2, pErrorCode); +} + +#endif /* #if !UCONFIG_NO_NORMALIZATION */ diff --git a/thirdparty/icu4c/common/unormcmp.cpp b/thirdparty/icu4c/common/unormcmp.cpp new file mode 100644 index 0000000000..689b0b53b2 --- /dev/null +++ b/thirdparty/icu4c/common/unormcmp.cpp @@ -0,0 +1,640 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2001-2014, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: unormcmp.cpp +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2004sep13 +* created by: Markus W. Scherer +* +* unorm_compare() function moved here from unorm.cpp for better modularization. +* Depends on both normalization and case folding. +* Allows unorm.cpp to not depend on any character properties code. +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_NORMALIZATION + +#include "unicode/unorm.h" +#include "unicode/ustring.h" +#include "cmemory.h" +#include "normalizer2impl.h" +#include "ucase.h" +#include "uprops.h" +#include "ustr_imp.h" + +U_NAMESPACE_USE + +/* compare canonically equivalent ------------------------------------------- */ + +/* + * Compare two strings for canonical equivalence. + * Further options include case-insensitive comparison and + * code point order (as opposed to code unit order). + * + * In this function, canonical equivalence is optional as well. + * If canonical equivalence is tested, then both strings must fulfill + * the FCD check. + * + * Semantically, this is equivalent to + * strcmp[CodePointOrder](NFD(foldCase(s1)), NFD(foldCase(s2))) + * where code point order, NFD and foldCase are all optional. + * + * String comparisons almost always yield results before processing both strings + * completely. + * They are generally more efficient working incrementally instead of + * performing the sub-processing (strlen, normalization, case-folding) + * on the entire strings first. + * + * It is also unnecessary to not normalize identical characters. + * + * This function works in principle as follows: + * + * loop { + * get one code unit c1 from s1 (-1 if end of source) + * get one code unit c2 from s2 (-1 if end of source) + * + * if(either string finished) { + * return result; + * } + * if(c1==c2) { + * continue; + * } + * + * // c1!=c2 + * try to decompose/case-fold c1/c2, and continue if one does; + * + * // still c1!=c2 and neither decomposes/case-folds, return result + * return c1-c2; + * } + * + * When a character decomposes, then the pointer for that source changes to + * the decomposition, pushing the previous pointer onto a stack. + * When the end of the decomposition is reached, then the code unit reader + * pops the previous source from the stack. + * (Same for case-folding.) + * + * This is complicated further by operating on variable-width UTF-16. + * The top part of the loop works on code units, while lookups for decomposition + * and case-folding need code points. + * Code points are assembled after the equality/end-of-source part. + * The source pointer is only advanced beyond all code units when the code point + * actually decomposes/case-folds. + * + * If we were on a trail surrogate unit when assembling a code point, + * and the code point decomposes/case-folds, then the decomposition/folding + * result must be compared with the part of the other string that corresponds to + * this string's lead surrogate. + * Since we only assemble a code point when hitting a trail unit when the + * preceding lead units were identical, we back up the other string by one unit + * in such a case. + * + * The optional code point order comparison at the end works with + * the same fix-up as the other code point order comparison functions. + * See ustring.c and the comment near the end of this function. + * + * Assumption: A decomposition or case-folding result string never contains + * a single surrogate. This is a safe assumption in the Unicode Standard. + * Therefore, we do not need to check for surrogate pairs across + * decomposition/case-folding boundaries. + * + * Further assumptions (see verifications tstnorm.cpp): + * The API function checks for FCD first, while the core function + * first case-folds and then decomposes. This requires that case-folding does not + * un-FCD any strings. + * + * The API function may also NFD the input and turn off decomposition. + * This requires that case-folding does not un-NFD strings either. + * + * TODO If any of the above two assumptions is violated, + * then this entire code must be re-thought. + * If this happens, then a simple solution is to case-fold both strings up front + * and to turn off UNORM_INPUT_IS_FCD. + * We already do this when not both strings are in FCD because makeFCD + * would be a partial NFD before the case folding, which does not work. + * Note that all of this is only a problem when case-folding _and_ + * canonical equivalence come together. + * (Comments in unorm_compare() are more up to date than this TODO.) + */ + +/* stack element for previous-level source/decomposition pointers */ +struct CmpEquivLevel { + const UChar *start, *s, *limit; +}; +typedef struct CmpEquivLevel CmpEquivLevel; + +/** + * Internal option for unorm_cmpEquivFold() for decomposing. + * If not set, just do strcasecmp(). + */ +#define _COMPARE_EQUIV 0x80000 + +/* internal function */ +static int32_t +unorm_cmpEquivFold(const UChar *s1, int32_t length1, + const UChar *s2, int32_t length2, + uint32_t options, + UErrorCode *pErrorCode) { + const Normalizer2Impl *nfcImpl; + + /* current-level start/limit - s1/s2 as current */ + const UChar *start1, *start2, *limit1, *limit2; + + /* decomposition and case folding variables */ + const UChar *p; + int32_t length; + + /* stacks of previous-level start/current/limit */ + CmpEquivLevel stack1[2], stack2[2]; + + /* buffers for algorithmic decompositions */ + UChar decomp1[4], decomp2[4]; + + /* case folding buffers, only use current-level start/limit */ + UChar fold1[UCASE_MAX_STRING_LENGTH+1], fold2[UCASE_MAX_STRING_LENGTH+1]; + + /* track which is the current level per string */ + int32_t level1, level2; + + /* current code units, and code points for lookups */ + UChar32 c1, c2, cp1, cp2; + + /* no argument error checking because this itself is not an API */ + + /* + * assume that at least one of the options _COMPARE_EQUIV and U_COMPARE_IGNORE_CASE is set + * otherwise this function must behave exactly as uprv_strCompare() + * not checking for that here makes testing this function easier + */ + + /* normalization/properties data loaded? */ + if((options&_COMPARE_EQUIV)!=0) { + nfcImpl=Normalizer2Factory::getNFCImpl(*pErrorCode); + } else { + nfcImpl=NULL; + } + if(U_FAILURE(*pErrorCode)) { + return 0; + } + + /* initialize */ + start1=s1; + if(length1==-1) { + limit1=NULL; + } else { + limit1=s1+length1; + } + + start2=s2; + if(length2==-1) { + limit2=NULL; + } else { + limit2=s2+length2; + } + + level1=level2=0; + c1=c2=-1; + + /* comparison loop */ + for(;;) { + /* + * here a code unit value of -1 means "get another code unit" + * below it will mean "this source is finished" + */ + + if(c1<0) { + /* get next code unit from string 1, post-increment */ + for(;;) { + if(s1==limit1 || ((c1=*s1)==0 && (limit1==NULL || (options&_STRNCMP_STYLE)))) { + if(level1==0) { + c1=-1; + break; + } + } else { + ++s1; + break; + } + + /* reached end of level buffer, pop one level */ + do { + --level1; + start1=stack1[level1].start; /*Not uninitialized*/ + } while(start1==NULL); + s1=stack1[level1].s; /*Not uninitialized*/ + limit1=stack1[level1].limit; /*Not uninitialized*/ + } + } + + if(c2<0) { + /* get next code unit from string 2, post-increment */ + for(;;) { + if(s2==limit2 || ((c2=*s2)==0 && (limit2==NULL || (options&_STRNCMP_STYLE)))) { + if(level2==0) { + c2=-1; + break; + } + } else { + ++s2; + break; + } + + /* reached end of level buffer, pop one level */ + do { + --level2; + start2=stack2[level2].start; /*Not uninitialized*/ + } while(start2==NULL); + s2=stack2[level2].s; /*Not uninitialized*/ + limit2=stack2[level2].limit; /*Not uninitialized*/ + } + } + + /* + * compare c1 and c2 + * either variable c1, c2 is -1 only if the corresponding string is finished + */ + if(c1==c2) { + if(c1<0) { + return 0; /* c1==c2==-1 indicating end of strings */ + } + c1=c2=-1; /* make us fetch new code units */ + continue; + } else if(c1<0) { + return -1; /* string 1 ends before string 2 */ + } else if(c2<0) { + return 1; /* string 2 ends before string 1 */ + } + /* c1!=c2 && c1>=0 && c2>=0 */ + + /* get complete code points for c1, c2 for lookups if either is a surrogate */ + cp1=c1; + if(U_IS_SURROGATE(c1)) { + UChar c; + + if(U_IS_SURROGATE_LEAD(c1)) { + if(s1!=limit1 && U16_IS_TRAIL(c=*s1)) { + /* advance ++s1; only below if cp1 decomposes/case-folds */ + cp1=U16_GET_SUPPLEMENTARY(c1, c); + } + } else /* isTrail(c1) */ { + if(start1<=(s1-2) && U16_IS_LEAD(c=*(s1-2))) { + cp1=U16_GET_SUPPLEMENTARY(c, c1); + } + } + } + + cp2=c2; + if(U_IS_SURROGATE(c2)) { + UChar c; + + if(U_IS_SURROGATE_LEAD(c2)) { + if(s2!=limit2 && U16_IS_TRAIL(c=*s2)) { + /* advance ++s2; only below if cp2 decomposes/case-folds */ + cp2=U16_GET_SUPPLEMENTARY(c2, c); + } + } else /* isTrail(c2) */ { + if(start2<=(s2-2) && U16_IS_LEAD(c=*(s2-2))) { + cp2=U16_GET_SUPPLEMENTARY(c, c2); + } + } + } + + /* + * go down one level for each string + * continue with the main loop as soon as there is a real change + */ + + if( level1==0 && (options&U_COMPARE_IGNORE_CASE) && + (length=ucase_toFullFolding((UChar32)cp1, &p, options))>=0 + ) { + /* cp1 case-folds to the code point "length" or to p[length] */ + if(U_IS_SURROGATE(c1)) { + if(U_IS_SURROGATE_LEAD(c1)) { + /* advance beyond source surrogate pair if it case-folds */ + ++s1; + } else /* isTrail(c1) */ { + /* + * we got a supplementary code point when hitting its trail surrogate, + * therefore the lead surrogate must have been the same as in the other string; + * compare this decomposition with the lead surrogate in the other string + * remember that this simulates bulk text replacement: + * the decomposition would replace the entire code point + */ + --s2; + c2=*(s2-1); + } + } + + /* push current level pointers */ + stack1[0].start=start1; + stack1[0].s=s1; + stack1[0].limit=limit1; + ++level1; + + /* copy the folding result to fold1[] */ + if(length<=UCASE_MAX_STRING_LENGTH) { + u_memcpy(fold1, p, length); + } else { + int32_t i=0; + U16_APPEND_UNSAFE(fold1, i, length); + length=i; + } + + /* set next level pointers to case folding */ + start1=s1=fold1; + limit1=fold1+length; + + /* get ready to read from decomposition, continue with loop */ + c1=-1; + continue; + } + + if( level2==0 && (options&U_COMPARE_IGNORE_CASE) && + (length=ucase_toFullFolding((UChar32)cp2, &p, options))>=0 + ) { + /* cp2 case-folds to the code point "length" or to p[length] */ + if(U_IS_SURROGATE(c2)) { + if(U_IS_SURROGATE_LEAD(c2)) { + /* advance beyond source surrogate pair if it case-folds */ + ++s2; + } else /* isTrail(c2) */ { + /* + * we got a supplementary code point when hitting its trail surrogate, + * therefore the lead surrogate must have been the same as in the other string; + * compare this decomposition with the lead surrogate in the other string + * remember that this simulates bulk text replacement: + * the decomposition would replace the entire code point + */ + --s1; + c1=*(s1-1); + } + } + + /* push current level pointers */ + stack2[0].start=start2; + stack2[0].s=s2; + stack2[0].limit=limit2; + ++level2; + + /* copy the folding result to fold2[] */ + if(length<=UCASE_MAX_STRING_LENGTH) { + u_memcpy(fold2, p, length); + } else { + int32_t i=0; + U16_APPEND_UNSAFE(fold2, i, length); + length=i; + } + + /* set next level pointers to case folding */ + start2=s2=fold2; + limit2=fold2+length; + + /* get ready to read from decomposition, continue with loop */ + c2=-1; + continue; + } + + if( level1<2 && (options&_COMPARE_EQUIV) && + 0!=(p=nfcImpl->getDecomposition((UChar32)cp1, decomp1, length)) + ) { + /* cp1 decomposes into p[length] */ + if(U_IS_SURROGATE(c1)) { + if(U_IS_SURROGATE_LEAD(c1)) { + /* advance beyond source surrogate pair if it decomposes */ + ++s1; + } else /* isTrail(c1) */ { + /* + * we got a supplementary code point when hitting its trail surrogate, + * therefore the lead surrogate must have been the same as in the other string; + * compare this decomposition with the lead surrogate in the other string + * remember that this simulates bulk text replacement: + * the decomposition would replace the entire code point + */ + --s2; + c2=*(s2-1); + } + } + + /* push current level pointers */ + stack1[level1].start=start1; + stack1[level1].s=s1; + stack1[level1].limit=limit1; + ++level1; + + /* set empty intermediate level if skipped */ + if(level1<2) { + stack1[level1++].start=NULL; + } + + /* set next level pointers to decomposition */ + start1=s1=p; + limit1=p+length; + + /* get ready to read from decomposition, continue with loop */ + c1=-1; + continue; + } + + if( level2<2 && (options&_COMPARE_EQUIV) && + 0!=(p=nfcImpl->getDecomposition((UChar32)cp2, decomp2, length)) + ) { + /* cp2 decomposes into p[length] */ + if(U_IS_SURROGATE(c2)) { + if(U_IS_SURROGATE_LEAD(c2)) { + /* advance beyond source surrogate pair if it decomposes */ + ++s2; + } else /* isTrail(c2) */ { + /* + * we got a supplementary code point when hitting its trail surrogate, + * therefore the lead surrogate must have been the same as in the other string; + * compare this decomposition with the lead surrogate in the other string + * remember that this simulates bulk text replacement: + * the decomposition would replace the entire code point + */ + --s1; + c1=*(s1-1); + } + } + + /* push current level pointers */ + stack2[level2].start=start2; + stack2[level2].s=s2; + stack2[level2].limit=limit2; + ++level2; + + /* set empty intermediate level if skipped */ + if(level2<2) { + stack2[level2++].start=NULL; + } + + /* set next level pointers to decomposition */ + start2=s2=p; + limit2=p+length; + + /* get ready to read from decomposition, continue with loop */ + c2=-1; + continue; + } + + /* + * no decomposition/case folding, max level for both sides: + * return difference result + * + * code point order comparison must not just return cp1-cp2 + * because when single surrogates are present then the surrogate pairs + * that formed cp1 and cp2 may be from different string indexes + * + * example: { d800 d800 dc01 } vs. { d800 dc00 }, compare at second code units + * c1=d800 cp1=10001 c2=dc00 cp2=10000 + * cp1-cp2>0 but c1-c2<0 and in fact in UTF-32 it is { d800 10001 } < { 10000 } + * + * therefore, use same fix-up as in ustring.c/uprv_strCompare() + * except: uprv_strCompare() fetches c=*s while this functions fetches c=*s++ + * so we have slightly different pointer/start/limit comparisons here + */ + + if(c1>=0xd800 && c2>=0xd800 && (options&U_COMPARE_CODE_POINT_ORDER)) { + /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */ + if( + (c1<=0xdbff && s1!=limit1 && U16_IS_TRAIL(*s1)) || + (U16_IS_TRAIL(c1) && start1!=(s1-1) && U16_IS_LEAD(*(s1-2))) + ) { + /* part of a surrogate pair, leave >=d800 */ + } else { + /* BMP code point - may be surrogate code point - make <d800 */ + c1-=0x2800; + } + + if( + (c2<=0xdbff && s2!=limit2 && U16_IS_TRAIL(*s2)) || + (U16_IS_TRAIL(c2) && start2!=(s2-1) && U16_IS_LEAD(*(s2-2))) + ) { + /* part of a surrogate pair, leave >=d800 */ + } else { + /* BMP code point - may be surrogate code point - make <d800 */ + c2-=0x2800; + } + } + + return c1-c2; + } +} + +static +UBool _normalize(const Normalizer2 *n2, const UChar *s, int32_t length, + UnicodeString &normalized, UErrorCode *pErrorCode) { + UnicodeString str(length<0, s, length); + + // check if s fulfill the conditions + int32_t spanQCYes=n2->spanQuickCheckYes(str, *pErrorCode); + if (U_FAILURE(*pErrorCode)) { + return FALSE; + } + /* + * ICU 2.4 had a further optimization: + * If both strings were not in FCD, then they were both NFD'ed, + * and the _COMPARE_EQUIV option was turned off. + * It is not entirely clear that this is valid with the current + * definition of the canonical caseless match. + * Therefore, ICU 2.6 removes that optimization. + */ + if(spanQCYes<str.length()) { + UnicodeString unnormalized=str.tempSubString(spanQCYes); + normalized.setTo(FALSE, str.getBuffer(), spanQCYes); + n2->normalizeSecondAndAppend(normalized, unnormalized, *pErrorCode); + if (U_SUCCESS(*pErrorCode)) { + return TRUE; + } + } + return FALSE; +} + +U_CAPI int32_t U_EXPORT2 +unorm_compare(const UChar *s1, int32_t length1, + const UChar *s2, int32_t length2, + uint32_t options, + UErrorCode *pErrorCode) { + /* argument checking */ + if(U_FAILURE(*pErrorCode)) { + return 0; + } + if(s1==0 || length1<-1 || s2==0 || length2<-1) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + UnicodeString fcd1, fcd2; + int32_t normOptions=(int32_t)(options>>UNORM_COMPARE_NORM_OPTIONS_SHIFT); + options|=_COMPARE_EQUIV; + + /* + * UAX #21 Case Mappings, as fixed for Unicode version 4 + * (see Jitterbug 2021), defines a canonical caseless match as + * + * A string X is a canonical caseless match + * for a string Y if and only if + * NFD(toCasefold(NFD(X))) = NFD(toCasefold(NFD(Y))) + * + * For better performance, we check for FCD (or let the caller tell us that + * both strings are in FCD) for the inner normalization. + * BasicNormalizerTest::FindFoldFCDExceptions() makes sure that + * case-folding preserves the FCD-ness of a string. + * The outer normalization is then only performed by unorm_cmpEquivFold() + * when there is a difference. + * + * Exception: When using the Turkic case-folding option, we do perform + * full NFD first. This is because in the Turkic case precomposed characters + * with 0049 capital I or 0069 small i fold differently whether they + * are first decomposed or not, so an FCD check - a check only for + * canonical order - is not sufficient. + */ + if(!(options&UNORM_INPUT_IS_FCD) || (options&U_FOLD_CASE_EXCLUDE_SPECIAL_I)) { + const Normalizer2 *n2; + if(options&U_FOLD_CASE_EXCLUDE_SPECIAL_I) { + n2=Normalizer2::getNFDInstance(*pErrorCode); + } else { + n2=Normalizer2Factory::getFCDInstance(*pErrorCode); + } + if (U_FAILURE(*pErrorCode)) { + return 0; + } + + if(normOptions&UNORM_UNICODE_3_2) { + const UnicodeSet *uni32=uniset_getUnicode32Instance(*pErrorCode); + FilteredNormalizer2 fn2(*n2, *uni32); + if(_normalize(&fn2, s1, length1, fcd1, pErrorCode)) { + s1=fcd1.getBuffer(); + length1=fcd1.length(); + } + if(_normalize(&fn2, s2, length2, fcd2, pErrorCode)) { + s2=fcd2.getBuffer(); + length2=fcd2.length(); + } + } else { + if(_normalize(n2, s1, length1, fcd1, pErrorCode)) { + s1=fcd1.getBuffer(); + length1=fcd1.length(); + } + if(_normalize(n2, s2, length2, fcd2, pErrorCode)) { + s2=fcd2.getBuffer(); + length2=fcd2.length(); + } + } + } + + if(U_SUCCESS(*pErrorCode)) { + return unorm_cmpEquivFold(s1, length1, s2, length2, options, pErrorCode); + } else { + return 0; + } +} + +#endif /* #if !UCONFIG_NO_NORMALIZATION */ diff --git a/thirdparty/icu4c/common/unormimp.h b/thirdparty/icu4c/common/unormimp.h new file mode 100644 index 0000000000..d2604adb4a --- /dev/null +++ b/thirdparty/icu4c/common/unormimp.h @@ -0,0 +1,488 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2001-2011, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: unormimp.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2001may25 +* created by: Markus W. Scherer +*/ + +#ifndef __UNORMIMP_H__ +#define __UNORMIMP_H__ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_NORMALIZATION + +#include "udataswp.h" + +/* + * The 2001-2010 implementation of the normalization code loads its data from + * unorm.icu, which is generated with the gennorm tool. + * The format of that file is described at the end of this file. + */ + +/* norm32 value constants */ +enum { + /* quick check flags 0..3 set mean "no" for their forms */ + _NORM_QC_NFC=0x11, /* no|maybe */ + _NORM_QC_NFKC=0x22, /* no|maybe */ + _NORM_QC_NFD=4, /* no */ + _NORM_QC_NFKD=8, /* no */ + + _NORM_QC_ANY_NO=0xf, + + /* quick check flags 4..5 mean "maybe" for their forms; test flags>=_NORM_QC_MAYBE */ + _NORM_QC_MAYBE=0x10, + _NORM_QC_ANY_MAYBE=0x30, + + _NORM_QC_MASK=0x3f, + + _NORM_COMBINES_FWD=0x40, + _NORM_COMBINES_BACK=0x80, + _NORM_COMBINES_ANY=0xc0, + + _NORM_CC_SHIFT=8, /* UnicodeData.txt combining class in bits 15..8 */ + _NORM_CC_MASK=0xff00, + + _NORM_EXTRA_SHIFT=16, /* 16 bits for the index to UChars and other extra data */ + _NORM_EXTRA_INDEX_TOP=0xfc00, /* start of surrogate specials after shift */ + + _NORM_EXTRA_SURROGATE_MASK=0x3ff, + _NORM_EXTRA_SURROGATE_TOP=0x3f0, /* hangul etc. */ + + _NORM_EXTRA_HANGUL=_NORM_EXTRA_SURROGATE_TOP, + _NORM_EXTRA_JAMO_L, + _NORM_EXTRA_JAMO_V, + _NORM_EXTRA_JAMO_T +}; + +/* norm32 value constants using >16 bits */ +#define _NORM_MIN_SPECIAL 0xfc000000 +#define _NORM_SURROGATES_TOP 0xfff00000 +#define _NORM_MIN_HANGUL 0xfff00000 +#define _NORM_MIN_JAMO_V 0xfff20000 +#define _NORM_JAMO_V_TOP 0xfff30000 + +/* value constants for auxTrie */ +enum { + _NORM_AUX_COMP_EX_SHIFT=10, + _NORM_AUX_UNSAFE_SHIFT=11, + _NORM_AUX_NFC_SKIPPABLE_F_SHIFT=12 +}; + +#define _NORM_AUX_MAX_FNC ((int32_t)1<<_NORM_AUX_COMP_EX_SHIFT) + +#define _NORM_AUX_FNC_MASK (uint32_t)(_NORM_AUX_MAX_FNC-1) +#define _NORM_AUX_COMP_EX_MASK ((uint32_t)1<<_NORM_AUX_COMP_EX_SHIFT) +#define _NORM_AUX_UNSAFE_MASK ((uint32_t)1<<_NORM_AUX_UNSAFE_SHIFT) +#define _NORM_AUX_NFC_SKIP_F_MASK ((uint32_t)1<<_NORM_AUX_NFC_SKIPPABLE_F_SHIFT) + +/* canonStartSets[0..31] contains indexes for what is in the array */ +enum { + _NORM_SET_INDEX_CANON_SETS_LENGTH, /* number of uint16_t in canonical starter sets */ + _NORM_SET_INDEX_CANON_BMP_TABLE_LENGTH, /* number of uint16_t in the BMP search table (contains pairs) */ + _NORM_SET_INDEX_CANON_SUPP_TABLE_LENGTH,/* number of uint16_t in the supplementary search table (contains triplets) */ + + /* from formatVersion 2.3: */ + _NORM_SET_INDEX_NX_CJK_COMPAT_OFFSET, /* uint16_t offset from canonStartSets[0] to the + exclusion set for CJK compatibility characters */ + _NORM_SET_INDEX_NX_UNICODE32_OFFSET, /* uint16_t offset from canonStartSets[0] to the + exclusion set for Unicode 3.2 characters */ + _NORM_SET_INDEX_NX_RESERVED_OFFSET, /* uint16_t offset from canonStartSets[0] to the + end of the previous exclusion set */ + + _NORM_SET_INDEX_TOP=32 /* changing this requires a new formatVersion */ +}; + +/* more constants for canonical starter sets */ + +/* 14 bit indexes to canonical USerializedSets */ +#define _NORM_MAX_CANON_SETS 0x4000 + +/* single-code point BMP sets are encoded directly in the search table except if result=0x4000..0x7fff */ +#define _NORM_CANON_SET_BMP_MASK 0xc000 +#define _NORM_CANON_SET_BMP_IS_INDEX 0x4000 + +/* indexes[] value names */ +enum { + _NORM_INDEX_TRIE_SIZE, /* number of bytes in normalization trie */ + _NORM_INDEX_UCHAR_COUNT, /* number of UChars in extra data */ + + _NORM_INDEX_COMBINE_DATA_COUNT, /* number of uint16_t words for combining data */ + _NORM_INDEX_COMBINE_FWD_COUNT, /* number of code points that combine forward */ + _NORM_INDEX_COMBINE_BOTH_COUNT, /* number of code points that combine forward and backward */ + _NORM_INDEX_COMBINE_BACK_COUNT, /* number of code points that combine backward */ + + _NORM_INDEX_MIN_NFC_NO_MAYBE, /* first code point with quick check NFC NO/MAYBE */ + _NORM_INDEX_MIN_NFKC_NO_MAYBE, /* first code point with quick check NFKC NO/MAYBE */ + _NORM_INDEX_MIN_NFD_NO_MAYBE, /* first code point with quick check NFD NO/MAYBE */ + _NORM_INDEX_MIN_NFKD_NO_MAYBE, /* first code point with quick check NFKD NO/MAYBE */ + + _NORM_INDEX_FCD_TRIE_SIZE, /* number of bytes in FCD trie */ + + _NORM_INDEX_AUX_TRIE_SIZE, /* number of bytes in the auxiliary trie */ + _NORM_INDEX_CANON_SET_COUNT, /* number of uint16_t in the array of serialized USet */ + + _NORM_INDEX_TOP=32 /* changing this requires a new formatVersion */ +}; + +enum { + /* FCD check: everything below this code point is known to have a 0 lead combining class */ + _NORM_MIN_WITH_LEAD_CC=0x300 +}; + +enum { + /** + * Bit 7 of the length byte for a decomposition string in extra data is + * a flag indicating whether the decomposition string is + * preceded by a 16-bit word with the leading and trailing cc + * of the decomposition (like for A-umlaut); + * if not, then both cc's are zero (like for compatibility ideographs). + */ + _NORM_DECOMP_FLAG_LENGTH_HAS_CC=0x80, + /** + * Bits 6..0 of the length byte contain the actual length. + */ + _NORM_DECOMP_LENGTH_MASK=0x7f +}; + +/** Constants for options flags for normalization. */ +enum { + /** Options bit 0, do not decompose Hangul syllables. */ + UNORM_NX_HANGUL=1, + /** Options bit 1, do not decompose CJK compatibility characters. */ + UNORM_NX_CJK_COMPAT=2 +}; + +/** + * Description of the format of unorm.icu version 2.3. + * + * Main change from version 1 to version 2: + * Use of new, common UTrie instead of normalization-specific tries. + * Change to version 2.1: add third/auxiliary trie with associated data. + * Change to version 2.2: add skippable (f) flag data (_NORM_AUX_NFC_SKIP_F_MASK). + * Change to version 2.3: add serialized sets for normalization exclusions + * stored inside canonStartSets[] + * + * For more details of how to use the data structures see the code + * in unorm.cpp (runtime normalization code) and + * in gennorm.c and gennorm/store.c (build-time data generation). + * + * For the serialized format of UTrie see utrie.c/UTrieHeader. + * + * - Overall partition + * + * unorm.dat customarily begins with a UDataInfo structure, see udata.h and .c. + * After that there are the following structures: + * + * int32_t indexes[_NORM_INDEX_TOP]; -- _NORM_INDEX_TOP=32, see enum in this file + * + * UTrie normTrie; -- size in bytes=indexes[_NORM_INDEX_TRIE_SIZE] + * + * uint16_t extraData[extraDataTop]; -- extraDataTop=indexes[_NORM_INDEX_UCHAR_COUNT] + * extraData[0] contains the number of units for + * FC_NFKC_Closure (formatVersion>=2.1) + * + * uint16_t combiningTable[combiningTableTop]; -- combiningTableTop=indexes[_NORM_INDEX_COMBINE_DATA_COUNT] + * combiningTableTop may include one 16-bit padding unit + * to make sure that fcdTrie is 32-bit-aligned + * + * UTrie fcdTrie; -- size in bytes=indexes[_NORM_INDEX_FCD_TRIE_SIZE] + * + * UTrie auxTrie; -- size in bytes=indexes[_NORM_INDEX_AUX_TRIE_SIZE] + * + * uint16_t canonStartSets[canonStartSetsTop] -- canonStartSetsTop=indexes[_NORM_INDEX_CANON_SET_COUNT] + * serialized USets and binary search tables, see below + * + * + * The indexes array contains lengths and sizes of the following arrays and structures + * as well as the following values: + * indexes[_NORM_INDEX_COMBINE_FWD_COUNT]=combineFwdTop + * -- one more than the highest combining index computed for forward-only-combining characters + * indexes[_NORM_INDEX_COMBINE_BOTH_COUNT]=combineBothTop-combineFwdTop + * -- number of combining indexes computed for both-ways-combining characters + * indexes[_NORM_INDEX_COMBINE_BACK_COUNT]=combineBackTop-combineBothTop + * -- number of combining indexes computed for backward-only-combining characters + * + * indexes[_NORM_INDEX_MIN_NF*_NO_MAYBE] (where *={ C, D, KC, KD }) + * -- first code point with a quick check NF* value of NO/MAYBE + * + * + * - Tries + * + * The main structures are two UTrie tables ("compact arrays"), + * each with one index array and one data array. + * See utrie.h and utrie.c. + * + * + * - Tries in unorm.dat + * + * The first trie (normTrie above) + * provides data for the NF* quick checks and normalization. + * The second trie (fcdTrie above) provides data just for FCD checks. + * + * + * - norm32 data words from the first trie + * + * The norm32Table contains one 32-bit word "norm32" per code point. + * It contains the following bit fields: + * 31..16 extra data index, _NORM_EXTRA_SHIFT is used to shift this field down + * if this index is <_NORM_EXTRA_INDEX_TOP then it is an index into + * extraData[] where variable-length normalization data for this + * code point is found + * if this index is <_NORM_EXTRA_INDEX_TOP+_NORM_EXTRA_SURROGATE_TOP + * then this is a norm32 for a leading surrogate, and the index + * value is used together with the following trailing surrogate + * code unit in the second trie access + * if this index is >=_NORM_EXTRA_INDEX_TOP+_NORM_EXTRA_SURROGATE_TOP + * then this is a norm32 for a "special" character, + * i.e., the character is a Hangul syllable or a Jamo + * see _NORM_EXTRA_HANGUL etc. + * generally, instead of extracting this index from the norm32 and + * comparing it with the above constants, + * the normalization code compares the entire norm32 value + * with _NORM_MIN_SPECIAL, _NORM_SURROGATES_TOP, _NORM_MIN_HANGUL etc. + * + * 15..8 combining class (cc) according to UnicodeData.txt + * + * 7..6 _NORM_COMBINES_ANY flags, used in composition to see if a character + * combines with any following or preceding character(s) + * at all + * 7 _NORM_COMBINES_BACK + * 6 _NORM_COMBINES_FWD + * + * 5..0 quick check flags, set for "no" or "maybe", with separate flags for + * each normalization form + * the higher bits are "maybe" flags; for NF*D there are no such flags + * the lower bits are "no" flags for all forms, in the same order + * as the "maybe" flags, + * which is (MSB to LSB): NFKD NFD NFKC NFC + * 5..4 _NORM_QC_ANY_MAYBE + * 3..0 _NORM_QC_ANY_NO + * see further related constants + * + * + * - Extra data per code point + * + * "Extra data" is referenced by the index in norm32. + * It is variable-length data. It is only present, and only those parts + * of it are, as needed for a given character. + * The norm32 extra data index is added to the beginning of extraData[] + * to get to a vector of 16-bit words with data at the following offsets: + * + * [-1] Combining index for composition. + * Stored only if norm32&_NORM_COMBINES_ANY . + * [0] Lengths of the canonical and compatibility decomposition strings. + * Stored only if there are decompositions, i.e., + * if norm32&(_NORM_QC_NFD|_NORM_QC_NFKD) + * High byte: length of NFKD, or 0 if none + * Low byte: length of NFD, or 0 if none + * Each length byte also has another flag: + * Bit 7 of a length byte is set if there are non-zero + * combining classes (cc's) associated with the respective + * decomposition. If this flag is set, then the decomposition + * is preceded by a 16-bit word that contains the + * leading and trailing cc's. + * Bits 6..0 of a length byte are the length of the + * decomposition string, not counting the cc word. + * [1..n] NFD + * [n+1..] NFKD + * + * Each of the two decompositions consists of up to two parts: + * - The 16-bit words with the leading and trailing cc's. + * This is only stored if bit 7 of the corresponding length byte + * is set. In this case, at least one of the cc's is not zero. + * High byte: leading cc==cc of the first code point in the decomposition string + * Low byte: trailing cc==cc of the last code point in the decomposition string + * - The decomposition string in UTF-16, with length code units. + * + * + * - Combining indexes and combiningTable[] + * + * Combining indexes are stored at the [-1] offset of the extra data + * if the character combines forward or backward with any other characters. + * They are used for (re)composition in NF*C. + * Values of combining indexes are arranged according to whether a character + * combines forward, backward, or both ways: + * forward-only < both ways < backward-only + * + * The index values for forward-only and both-ways combining characters + * are indexes into the combiningTable[]. + * The index values for backward-only combining characters are simply + * incremented from the preceding index values to be unique. + * + * In the combiningTable[], a variable-length list + * of variable-length (back-index, code point) pair entries is stored + * for each forward-combining character. + * + * These back-indexes are the combining indexes of both-ways or backward-only + * combining characters that the forward-combining character combines with. + * + * Each list is sorted in ascending order of back-indexes. + * Each list is terminated with the last back-index having bit 15 set. + * + * Each pair (back-index, code point) takes up either 2 or 3 + * 16-bit words. + * The first word of a list entry is the back-index, with its bit 15 set if + * this is the last pair in the list. + * + * The second word contains flags in bits 15..13 that determine + * if there is a third word and how the combined character is encoded: + * 15 set if there is a third word in this list entry + * 14 set if the result is a supplementary character + * 13 set if the result itself combines forward + * + * According to these bits 15..14 of the second word, + * the result character is encoded as follows: + * 00 or 01 The result is <=0x1fff and stored in bits 12..0 of + * the second word. + * 10 The result is 0x2000..0xffff and stored in the third word. + * Bits 12..0 of the second word are not used. + * 11 The result is a supplementary character. + * Bits 9..0 of the leading surrogate are in bits 9..0 of + * the second word. + * Add 0xd800 to these bits to get the complete surrogate. + * Bits 12..10 of the second word are not used. + * The trailing surrogate is stored in the third word. + * + * + * - FCD trie + * + * The FCD trie is very simple. + * It is a folded trie with 16-bit data words. + * In each word, the high byte contains the leading cc of the character, + * and the low byte contains the trailing cc of the character. + * These cc's are the cc's of the first and last code points in the + * canonical decomposition of the character. + * + * Since all 16 bits are used for cc's, lead surrogates must be tested + * by checking the code unit instead of the trie data. + * This is done only if the 16-bit data word is not zero. + * If the code unit is a leading surrogate and the data word is not zero, + * then instead of cc's it contains the offset for the second trie lookup. + * + * + * - Auxiliary trie and data + * + * The auxiliary 16-bit trie contains data for additional properties. + * Bits + * 15..13 reserved + * 12 not NFC_Skippable (f) (formatVersion>=2.2) + * 11 flag: not a safe starter for canonical closure + * 10 composition exclusion + * 9.. 0 index into extraData[] to FC_NFKC_Closure string + * (not for lead surrogate), + * or lead surrogate offset (for lead surrogate, if 9..0 not zero) + * + * - FC_NFKC_Closure strings in extraData[] + * + * Strings are either stored as a single code unit or as the length + * followed by that many units. + * const UChar *s=extraData+(index from auxTrie data bits 9..0); + * int32_t length; + * if(*s<0xff00) { + * // s points to the single-unit string + * length=1; + * } else { + * length=*s&0xff; + * ++s; + * } + * + * Conditions for "NF* Skippable" from Mark Davis' com.ibm.text.UCD.NFSkippable: + * (used in NormalizerTransliterator) + * + * A skippable character is + * a) unassigned, or ALL of the following: + * b) of combining class 0. + * c) not decomposed by this normalization form. + * AND if NFC or NFKC, + * d) can never compose with a previous character. + * e) can never compose with a following character. + * f) can never change if another character is added. + * Example: a-breve might satisfy all but f, but if you + * add an ogonek it changes to a-ogonek + breve + * + * a)..e) must be tested from norm32. + * Since f) is more complicated, the (not-)NFC_Skippable flag (f) is built + * into the auxiliary trie. + * The same bit is used for NFC and NFKC; (c) differs for them. + * As usual, we build the "not skippable" flags so that unassigned + * code points get a 0 bit. + * This bit is only valid after (a)..(e) test false; test NFD_NO before (f) as well. + * Test Hangul LV syllables entirely in code. + * + * + * - structure inside canonStartSets[] + * + * This array maps from code points c to sets of code points (USerializedSet). + * The result sets are the code points whose canonical decompositions start + * with c. + * + * canonStartSets[] contains the following sub-arrays: + * + * indexes[_NORM_SET_INDEX_TOP] + * - contains lengths of sub-arrays etc. + * + * startSets[indexes[_NORM_SET_INDEX_CANON_SETS_LENGTH]-_NORM_SET_INDEX_TOP] + * - contains serialized sets (USerializedSet) of canonical starters for + * enumerating canonically equivalent strings + * indexes[_NORM_SET_INDEX_CANON_SETS_LENGTH] includes _NORM_SET_INDEX_TOP + * for details about the structure see uset.c + * + * bmpTable[indexes[_NORM_SET_INDEX_CANON_BMP_TABLE_LENGTH]] + * - a sorted search table for BMP code points whose results are + * either indexes to USerializedSets or single code points for + * single-code point sets; + * each entry is a pair of { code point, result } with result=(binary) yy xxxxxx xxxxxxxx + * if yy==01 then there is a USerializedSet at canonStartSets+x + * else build a USerializedSet with result as the single code point + * + * suppTable[indexes[_NORM_SET_INDEX_CANON_SUPP_TABLE_LENGTH]] + * - a sorted search table for supplementary code points whose results are + * either indexes to USerializedSets or single code points for + * single-code point sets; + * each entry is a triplet of { high16(cp), low16(cp), result } + * each code point's high-word may contain extra data in bits 15..5: + * if the high word has bit 15 set, then build a set with a single code point + * which is (((high16(cp)&0x1f00)<<8)|result; + * else there is a USerializedSet at canonStartSets+result + * + * FormatVersion 2.3 adds 2 serialized sets for normalization exclusions. + * They are stored in the data file so that the runtime normalization code need + * not depend on other properties and their data and implementation files. + * The _NORM_SET_INDEX_NX_..._OFFSET offsets in the canonStartSets index table + * give the location for each set. + * There is no set stored for UNORM_NX_HANGUL because it's trivial to create + * without using properties. + * + * Set contents: + * + * _NORM_SET_INDEX_NX_CJK_COMPAT_OFFSET (for UNORM_NX_CJK_COMPAT) + * [[:Ideographic:]&[:NFD_QC=No:]] + * =[CJK Ideographs]&[has canonical decomposition] + * + * _NORM_SET_INDEX_NX_UNICODE32_OFFSET (for UNORM_UNICODE_3_2) + * [:^Age=3.2:] + * =set with all code points that were not designated by the specified Unicode version + * + * _NORM_SET_INDEX_NX_RESERVED_OFFSET + * This is an offset that points to where the next, future set would start. + * Currently it indicates where the previous set ends, and thus its length. + * The name for this enum constant may in the future be applied to different + * index slots. In order to get the limit of a set, use its index slot and + * the immediately following one regardless of that one's enum name. + */ + +#endif /* #if !UCONFIG_NO_NORMALIZATION */ + +#endif diff --git a/thirdparty/icu4c/common/uobject.cpp b/thirdparty/icu4c/common/uobject.cpp new file mode 100644 index 0000000000..e222b2ce9b --- /dev/null +++ b/thirdparty/icu4c/common/uobject.cpp @@ -0,0 +1,105 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 2002-2012, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* file name: uobject.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2002jun26 +* created by: Markus W. Scherer +*/ + +#include "unicode/uobject.h" +#include "cmemory.h" + +U_NAMESPACE_BEGIN + +#if U_OVERRIDE_CXX_ALLOCATION + +/* + * Default implementation of UMemory::new/delete + * using uprv_malloc() and uprv_free(). + * + * For testing, this is used together with a list of imported symbols to verify + * that ICU is not using the global ::new and ::delete operators. + * + * These operators can be implemented like this or any other appropriate way + * when customizing ICU for certain environments. + * Whenever ICU is customized in binary incompatible ways please be sure + * to use library name suffixes to distinguish such libraries from + * the standard build. + * + * Instead of just modifying these C++ new/delete operators, it is usually best + * to modify the uprv_malloc()/uprv_free()/uprv_realloc() functions in cmemory.c. + * + * Memory test on Windows/MSVC 6: + * The global operators new and delete look as follows: + * 04F 00000000 UNDEF notype () External | ??2@YAPAXI@Z (void * __cdecl operator new(unsigned int)) + * 03F 00000000 UNDEF notype () External | ??3@YAXPAX@Z (void __cdecl operator delete(void *)) + * + * These lines are from output generated by the MSVC 6 tool dumpbin with + * dumpbin /symbols *.obj + * + * ??2@YAPAXI@Z and ??3@YAXPAX@Z are the linker symbols in the .obj + * files and are imported from msvcrtd.dll (in a debug build). + * + * Make sure that with the UMemory operators new and delete defined these two symbols + * do not appear in the dumpbin /symbols output for the ICU libraries! + * + * If such a symbol appears in the output then look in the preceding lines in the output + * for which file and function calls the global new or delete operator, + * and replace with uprv_malloc/uprv_free. + */ + +void * U_EXPORT2 UMemory::operator new(size_t size) U_NOEXCEPT { + return uprv_malloc(size); +} + +void U_EXPORT2 UMemory::operator delete(void *p) U_NOEXCEPT { + if(p!=NULL) { + uprv_free(p); + } +} + +void * U_EXPORT2 UMemory::operator new[](size_t size) U_NOEXCEPT { + return uprv_malloc(size); +} + +void U_EXPORT2 UMemory::operator delete[](void *p) U_NOEXCEPT { + if(p!=NULL) { + uprv_free(p); + } +} + +#if U_HAVE_DEBUG_LOCATION_NEW +void * U_EXPORT2 UMemory::operator new(size_t size, const char* /*file*/, int /*line*/) U_NOEXCEPT { + return UMemory::operator new(size); +} + +void U_EXPORT2 UMemory::operator delete(void* p, const char* /*file*/, int /*line*/) U_NOEXCEPT { + UMemory::operator delete(p); +} +#endif /* U_HAVE_DEBUG_LOCATION_NEW */ + + +#endif + +UObject::~UObject() {} + +UClassID UObject::getDynamicClassID() const { return NULL; } + +U_NAMESPACE_END + +U_NAMESPACE_USE + +U_CAPI void U_EXPORT2 +uprv_deleteUObject(void *obj) { + delete static_cast<UObject *>(obj); +} diff --git a/thirdparty/icu4c/common/uposixdefs.h b/thirdparty/icu4c/common/uposixdefs.h new file mode 100644 index 0000000000..23c3f6d466 --- /dev/null +++ b/thirdparty/icu4c/common/uposixdefs.h @@ -0,0 +1,77 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2011-2015, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* file name: uposixdefs.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2011jul25 +* created by: Markus W. Scherer +* +* Common definitions for implementation files working with POSIX functions. +* *Important*: #include this file before any other header files! +*/ + +#ifndef __UPOSIXDEFS_H__ +#define __UPOSIXDEFS_H__ + +/* + * Define _XOPEN_SOURCE for access to POSIX functions. + * + * We cannot use U_PLATFORM from platform.h/utypes.h because + * "The Open Group Base Specifications" + * chapter "2.2 The Compilation Environment" says: + * "In the compilation of an application that #defines a feature test macro + * specified by IEEE Std 1003.1-2001, + * no header defined by IEEE Std 1003.1-2001 shall be included prior to + * the definition of the feature test macro." + */ +#ifdef _XOPEN_SOURCE + /* Use the predefined value. */ +#else + /* + * Version 6.0: + * The Open Group Base Specifications Issue 6 (IEEE Std 1003.1, 2004 Edition) + * also known as + * SUSv3 = Open Group Single UNIX Specification, Version 3 (UNIX03) + * + * Note: This definition used to be in C source code (e.g., putil.c) + * and define _XOPEN_SOURCE to different values depending on __STDC_VERSION__. + * In C++ source code (e.g., putil.cpp), __STDC_VERSION__ is not defined at all. + */ +# define _XOPEN_SOURCE 600 +#endif + +/* + * Make sure things like readlink and such functions work. + * Poorly upgraded Solaris machines can't have this defined. + * Cleanly installed Solaris can use this #define. + * + * z/OS needs this definition for timeval and to get usleep. + */ +#if !defined(_XOPEN_SOURCE_EXTENDED) && defined(__TOS_MVS__) +# define _XOPEN_SOURCE_EXTENDED 1 +#endif + +/** + * Solaris says: + * "...it is invalid to compile an XPG6 or a POSIX.1-2001 application with anything other + * than a c99 or later compiler." + * Apparently C++11 is not "or later". Work around this. + */ +#if defined(__cplusplus) && (defined(sun) || defined(__sun)) && !defined (_STDC_C99) +# define _STDC_C99 +#endif + +#if !defined _POSIX_C_SOURCE && \ + defined(__APPLE__) && defined(__MACH__) && !defined(__clang__) +// Needed to prevent EOWNERDEAD issues with GCC on Mac +#define _POSIX_C_SOURCE 200809L +#endif + +#endif /* __UPOSIXDEFS_H__ */ diff --git a/thirdparty/icu4c/common/uprops.cpp b/thirdparty/icu4c/common/uprops.cpp new file mode 100644 index 0000000000..1604ad9a17 --- /dev/null +++ b/thirdparty/icu4c/common/uprops.cpp @@ -0,0 +1,797 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2002-2016, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: uprops.cpp +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2002feb24 +* created by: Markus W. Scherer +* +* Implementations for mostly non-core Unicode character properties +* stored in uprops.icu. +* +* With the APIs implemented here, almost all properties files and +* their associated implementation files are used from this file, +* including those for normalization and case mappings. +*/ + +#include "unicode/utypes.h" +#include "unicode/uchar.h" +#include "unicode/ucptrie.h" +#include "unicode/udata.h" +#include "unicode/unorm2.h" +#include "unicode/uscript.h" +#include "unicode/ustring.h" +#include "cstring.h" +#include "mutex.h" +#include "normalizer2impl.h" +#include "umutex.h" +#include "ubidi_props.h" +#include "uprops.h" +#include "ucase.h" +#include "ucln_cmn.h" +#include "ulayout_props.h" +#include "ustr_imp.h" + +U_NAMESPACE_USE + +// Unicode text layout properties data ----------------------------------------- + +namespace { + +icu::UInitOnce gLayoutInitOnce = U_INITONCE_INITIALIZER; +UDataMemory *gLayoutMemory = nullptr; + +UCPTrie *gInpcTrie = nullptr; // Indic_Positional_Category +UCPTrie *gInscTrie = nullptr; // Indic_Syllabic_Category +UCPTrie *gVoTrie = nullptr; // Vertical_Orientation + +int32_t gMaxInpcValue = 0; +int32_t gMaxInscValue = 0; +int32_t gMaxVoValue = 0; + +UBool U_CALLCONV uprops_cleanup() { + udata_close(gLayoutMemory); + gLayoutMemory = nullptr; + + ucptrie_close(gInpcTrie); + gInpcTrie = nullptr; + ucptrie_close(gInscTrie); + gInscTrie = nullptr; + ucptrie_close(gVoTrie); + gVoTrie = nullptr; + + gMaxInpcValue = 0; + gMaxInscValue = 0; + gMaxVoValue = 0; + + gLayoutInitOnce.reset(); + return TRUE; +} + +UBool U_CALLCONV +ulayout_isAcceptable(void * /*context*/, + const char * /* type */, const char * /*name*/, + const UDataInfo *pInfo) { + return pInfo->size >= 20 && + pInfo->isBigEndian == U_IS_BIG_ENDIAN && + pInfo->charsetFamily == U_CHARSET_FAMILY && + pInfo->dataFormat[0] == ULAYOUT_FMT_0 && + pInfo->dataFormat[1] == ULAYOUT_FMT_1 && + pInfo->dataFormat[2] == ULAYOUT_FMT_2 && + pInfo->dataFormat[3] == ULAYOUT_FMT_3 && + pInfo->formatVersion[0] == 1; +} + +// UInitOnce singleton initialization function +void U_CALLCONV ulayout_load(UErrorCode &errorCode) { + gLayoutMemory = udata_openChoice( + nullptr, ULAYOUT_DATA_TYPE, ULAYOUT_DATA_NAME, + ulayout_isAcceptable, nullptr, &errorCode); + if (U_FAILURE(errorCode)) { return; } + + const uint8_t *inBytes = (const uint8_t *)udata_getMemory(gLayoutMemory); + const int32_t *inIndexes = (const int32_t *)inBytes; + int32_t indexesLength = inIndexes[ULAYOUT_IX_INDEXES_LENGTH]; + if (indexesLength < 12) { + errorCode = U_INVALID_FORMAT_ERROR; // Not enough indexes. + return; + } + int32_t offset = indexesLength * 4; + int32_t top = inIndexes[ULAYOUT_IX_INPC_TRIE_TOP]; + int32_t trieSize = top - offset; + if (trieSize >= 16) { + gInpcTrie = ucptrie_openFromBinary( + UCPTRIE_TYPE_ANY, UCPTRIE_VALUE_BITS_ANY, + inBytes + offset, trieSize, nullptr, &errorCode); + } + offset = top; + top = inIndexes[ULAYOUT_IX_INSC_TRIE_TOP]; + trieSize = top - offset; + if (trieSize >= 16) { + gInscTrie = ucptrie_openFromBinary( + UCPTRIE_TYPE_ANY, UCPTRIE_VALUE_BITS_ANY, + inBytes + offset, trieSize, nullptr, &errorCode); + } + offset = top; + top = inIndexes[ULAYOUT_IX_VO_TRIE_TOP]; + trieSize = top - offset; + if (trieSize >= 16) { + gVoTrie = ucptrie_openFromBinary( + UCPTRIE_TYPE_ANY, UCPTRIE_VALUE_BITS_ANY, + inBytes + offset, trieSize, nullptr, &errorCode); + } + + uint32_t maxValues = inIndexes[ULAYOUT_IX_MAX_VALUES]; + gMaxInpcValue = maxValues >> ULAYOUT_MAX_INPC_SHIFT; + gMaxInscValue = (maxValues >> ULAYOUT_MAX_INSC_SHIFT) & 0xff; + gMaxVoValue = (maxValues >> ULAYOUT_MAX_VO_SHIFT) & 0xff; + + ucln_common_registerCleanup(UCLN_COMMON_UPROPS, uprops_cleanup); +} + +UBool ulayout_ensureData(UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { return FALSE; } + umtx_initOnce(gLayoutInitOnce, &ulayout_load, errorCode); + return U_SUCCESS(errorCode); +} + +UBool ulayout_ensureData() { + UErrorCode errorCode = U_ZERO_ERROR; + return ulayout_ensureData(errorCode); +} + +} // namespace + +/* general properties API functions ----------------------------------------- */ + +struct BinaryProperty; + +typedef UBool BinaryPropertyContains(const BinaryProperty &prop, UChar32 c, UProperty which); + +struct BinaryProperty { + int32_t column; // SRC_PROPSVEC column, or "source" if mask==0 + uint32_t mask; + BinaryPropertyContains *contains; +}; + +static UBool defaultContains(const BinaryProperty &prop, UChar32 c, UProperty /*which*/) { + /* systematic, directly stored properties */ + return (u_getUnicodeProperties(c, prop.column)&prop.mask)!=0; +} + +static UBool caseBinaryPropertyContains(const BinaryProperty &/*prop*/, UChar32 c, UProperty which) { + return static_cast<UBool>(ucase_hasBinaryProperty(c, which)); +} + +static UBool isBidiControl(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { + return ubidi_isBidiControl(c); +} + +static UBool isMirrored(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { + return ubidi_isMirrored(c); +} + +static UBool isJoinControl(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { + return ubidi_isJoinControl(c); +} + +#if UCONFIG_NO_NORMALIZATION +static UBool hasFullCompositionExclusion(const BinaryProperty &, UChar32, UProperty) { + return FALSE; +} +#else +static UBool hasFullCompositionExclusion(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { + // By definition, Full_Composition_Exclusion is the same as NFC_QC=No. + UErrorCode errorCode=U_ZERO_ERROR; + const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode); + return U_SUCCESS(errorCode) && impl->isCompNo(impl->getNorm16(c)); +} +#endif + +// UCHAR_NF*_INERT properties +#if UCONFIG_NO_NORMALIZATION +static UBool isNormInert(const BinaryProperty &, UChar32, UProperty) { + return FALSE; +} +#else +static UBool isNormInert(const BinaryProperty &/*prop*/, UChar32 c, UProperty which) { + UErrorCode errorCode=U_ZERO_ERROR; + const Normalizer2 *norm2=Normalizer2Factory::getInstance( + (UNormalizationMode)(which-UCHAR_NFD_INERT+UNORM_NFD), errorCode); + return U_SUCCESS(errorCode) && norm2->isInert(c); +} +#endif + +#if UCONFIG_NO_NORMALIZATION +static UBool changesWhenCasefolded(const BinaryProperty &, UChar32, UProperty) { + return FALSE; +} +#else +static UBool changesWhenCasefolded(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { + UnicodeString nfd; + UErrorCode errorCode=U_ZERO_ERROR; + const Normalizer2 *nfcNorm2=Normalizer2::getNFCInstance(errorCode); + if(U_FAILURE(errorCode)) { + return FALSE; + } + if(nfcNorm2->getDecomposition(c, nfd)) { + /* c has a decomposition */ + if(nfd.length()==1) { + c=nfd[0]; /* single BMP code point */ + } else if(nfd.length()<=U16_MAX_LENGTH && + nfd.length()==U16_LENGTH(c=nfd.char32At(0)) + ) { + /* single supplementary code point */ + } else { + c=U_SENTINEL; + } + } else if(c<0) { + return FALSE; /* protect against bad input */ + } + if(c>=0) { + /* single code point */ + const UChar *resultString; + return (UBool)(ucase_toFullFolding(c, &resultString, U_FOLD_CASE_DEFAULT)>=0); + } else { + /* guess some large but stack-friendly capacity */ + UChar dest[2*UCASE_MAX_STRING_LENGTH]; + int32_t destLength; + destLength=u_strFoldCase(dest, UPRV_LENGTHOF(dest), + nfd.getBuffer(), nfd.length(), + U_FOLD_CASE_DEFAULT, &errorCode); + return (UBool)(U_SUCCESS(errorCode) && + 0!=u_strCompare(nfd.getBuffer(), nfd.length(), + dest, destLength, FALSE)); + } +} +#endif + +#if UCONFIG_NO_NORMALIZATION +static UBool changesWhenNFKC_Casefolded(const BinaryProperty &, UChar32, UProperty) { + return FALSE; +} +#else +static UBool changesWhenNFKC_Casefolded(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { + UErrorCode errorCode=U_ZERO_ERROR; + const Normalizer2Impl *kcf=Normalizer2Factory::getNFKC_CFImpl(errorCode); + if(U_FAILURE(errorCode)) { + return FALSE; + } + UnicodeString src(c); + UnicodeString dest; + { + // The ReorderingBuffer must be in a block because its destructor + // needs to release dest's buffer before we look at its contents. + ReorderingBuffer buffer(*kcf, dest); + // Small destCapacity for NFKC_CF(c). + if(buffer.init(5, errorCode)) { + const UChar *srcArray=src.getBuffer(); + kcf->compose(srcArray, srcArray+src.length(), FALSE, + TRUE, buffer, errorCode); + } + } + return U_SUCCESS(errorCode) && dest!=src; +} +#endif + +#if UCONFIG_NO_NORMALIZATION +static UBool isCanonSegmentStarter(const BinaryProperty &, UChar32, UProperty) { + return FALSE; +} +#else +static UBool isCanonSegmentStarter(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { + UErrorCode errorCode=U_ZERO_ERROR; + const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode); + return + U_SUCCESS(errorCode) && impl->ensureCanonIterData(errorCode) && + impl->isCanonSegmentStarter(c); +} +#endif + +static UBool isPOSIX_alnum(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { + return u_isalnumPOSIX(c); +} + +static UBool isPOSIX_blank(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { + return u_isblank(c); +} + +static UBool isPOSIX_graph(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { + return u_isgraphPOSIX(c); +} + +static UBool isPOSIX_print(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { + return u_isprintPOSIX(c); +} + +static UBool isPOSIX_xdigit(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { + return u_isxdigit(c); +} + +static UBool isRegionalIndicator(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { + // Property starts are a subset of lb=RI etc. + return 0x1F1E6<=c && c<=0x1F1FF; +} + +static const BinaryProperty binProps[UCHAR_BINARY_LIMIT]={ + /* + * column and mask values for binary properties from u_getUnicodeProperties(). + * Must be in order of corresponding UProperty, + * and there must be exactly one entry per binary UProperty. + * + * Properties with mask==0 are handled in code. + * For them, column is the UPropertySource value. + */ + { 1, U_MASK(UPROPS_ALPHABETIC), defaultContains }, + { 1, U_MASK(UPROPS_ASCII_HEX_DIGIT), defaultContains }, + { UPROPS_SRC_BIDI, 0, isBidiControl }, + { UPROPS_SRC_BIDI, 0, isMirrored }, + { 1, U_MASK(UPROPS_DASH), defaultContains }, + { 1, U_MASK(UPROPS_DEFAULT_IGNORABLE_CODE_POINT), defaultContains }, + { 1, U_MASK(UPROPS_DEPRECATED), defaultContains }, + { 1, U_MASK(UPROPS_DIACRITIC), defaultContains }, + { 1, U_MASK(UPROPS_EXTENDER), defaultContains }, + { UPROPS_SRC_NFC, 0, hasFullCompositionExclusion }, + { 1, U_MASK(UPROPS_GRAPHEME_BASE), defaultContains }, + { 1, U_MASK(UPROPS_GRAPHEME_EXTEND), defaultContains }, + { 1, U_MASK(UPROPS_GRAPHEME_LINK), defaultContains }, + { 1, U_MASK(UPROPS_HEX_DIGIT), defaultContains }, + { 1, U_MASK(UPROPS_HYPHEN), defaultContains }, + { 1, U_MASK(UPROPS_ID_CONTINUE), defaultContains }, + { 1, U_MASK(UPROPS_ID_START), defaultContains }, + { 1, U_MASK(UPROPS_IDEOGRAPHIC), defaultContains }, + { 1, U_MASK(UPROPS_IDS_BINARY_OPERATOR), defaultContains }, + { 1, U_MASK(UPROPS_IDS_TRINARY_OPERATOR), defaultContains }, + { UPROPS_SRC_BIDI, 0, isJoinControl }, + { 1, U_MASK(UPROPS_LOGICAL_ORDER_EXCEPTION), defaultContains }, + { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_LOWERCASE + { 1, U_MASK(UPROPS_MATH), defaultContains }, + { 1, U_MASK(UPROPS_NONCHARACTER_CODE_POINT), defaultContains }, + { 1, U_MASK(UPROPS_QUOTATION_MARK), defaultContains }, + { 1, U_MASK(UPROPS_RADICAL), defaultContains }, + { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_SOFT_DOTTED + { 1, U_MASK(UPROPS_TERMINAL_PUNCTUATION), defaultContains }, + { 1, U_MASK(UPROPS_UNIFIED_IDEOGRAPH), defaultContains }, + { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_UPPERCASE + { 1, U_MASK(UPROPS_WHITE_SPACE), defaultContains }, + { 1, U_MASK(UPROPS_XID_CONTINUE), defaultContains }, + { 1, U_MASK(UPROPS_XID_START), defaultContains }, + { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CASE_SENSITIVE + { 1, U_MASK(UPROPS_S_TERM), defaultContains }, + { 1, U_MASK(UPROPS_VARIATION_SELECTOR), defaultContains }, + { UPROPS_SRC_NFC, 0, isNormInert }, // UCHAR_NFD_INERT + { UPROPS_SRC_NFKC, 0, isNormInert }, // UCHAR_NFKD_INERT + { UPROPS_SRC_NFC, 0, isNormInert }, // UCHAR_NFC_INERT + { UPROPS_SRC_NFKC, 0, isNormInert }, // UCHAR_NFKC_INERT + { UPROPS_SRC_NFC_CANON_ITER, 0, isCanonSegmentStarter }, + { 1, U_MASK(UPROPS_PATTERN_SYNTAX), defaultContains }, + { 1, U_MASK(UPROPS_PATTERN_WHITE_SPACE), defaultContains }, + { UPROPS_SRC_CHAR_AND_PROPSVEC, 0, isPOSIX_alnum }, + { UPROPS_SRC_CHAR, 0, isPOSIX_blank }, + { UPROPS_SRC_CHAR, 0, isPOSIX_graph }, + { UPROPS_SRC_CHAR, 0, isPOSIX_print }, + { UPROPS_SRC_CHAR, 0, isPOSIX_xdigit }, + { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CASED + { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CASE_IGNORABLE + { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CHANGES_WHEN_LOWERCASED + { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CHANGES_WHEN_UPPERCASED + { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CHANGES_WHEN_TITLECASED + { UPROPS_SRC_CASE_AND_NORM, 0, changesWhenCasefolded }, + { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CHANGES_WHEN_CASEMAPPED + { UPROPS_SRC_NFKC_CF, 0, changesWhenNFKC_Casefolded }, + { 2, U_MASK(UPROPS_2_EMOJI), defaultContains }, + { 2, U_MASK(UPROPS_2_EMOJI_PRESENTATION), defaultContains }, + { 2, U_MASK(UPROPS_2_EMOJI_MODIFIER), defaultContains }, + { 2, U_MASK(UPROPS_2_EMOJI_MODIFIER_BASE), defaultContains }, + { 2, U_MASK(UPROPS_2_EMOJI_COMPONENT), defaultContains }, + { 2, 0, isRegionalIndicator }, + { 1, U_MASK(UPROPS_PREPENDED_CONCATENATION_MARK), defaultContains }, + { 2, U_MASK(UPROPS_2_EXTENDED_PICTOGRAPHIC), defaultContains }, +}; + +U_CAPI UBool U_EXPORT2 +u_hasBinaryProperty(UChar32 c, UProperty which) { + /* c is range-checked in the functions that are called from here */ + if(which<UCHAR_BINARY_START || UCHAR_BINARY_LIMIT<=which) { + /* not a known binary property */ + return FALSE; + } else { + const BinaryProperty &prop=binProps[which]; + return prop.contains(prop, c, which); + } +} + +struct IntProperty; + +typedef int32_t IntPropertyGetValue(const IntProperty &prop, UChar32 c, UProperty which); +typedef int32_t IntPropertyGetMaxValue(const IntProperty &prop, UProperty which); + +struct IntProperty { + int32_t column; // SRC_PROPSVEC column, or "source" if mask==0 + uint32_t mask; + int32_t shift; // =maxValue if getMaxValueFromShift() is used + IntPropertyGetValue *getValue; + IntPropertyGetMaxValue *getMaxValue; +}; + +static int32_t defaultGetValue(const IntProperty &prop, UChar32 c, UProperty /*which*/) { + /* systematic, directly stored properties */ + return (int32_t)(u_getUnicodeProperties(c, prop.column)&prop.mask)>>prop.shift; +} + +static int32_t defaultGetMaxValue(const IntProperty &prop, UProperty /*which*/) { + return (uprv_getMaxValues(prop.column)&prop.mask)>>prop.shift; +} + +static int32_t getMaxValueFromShift(const IntProperty &prop, UProperty /*which*/) { + return prop.shift; +} + +static int32_t getBiDiClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { + return (int32_t)u_charDirection(c); +} + +static int32_t getBiDiPairedBracketType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { + return (int32_t)ubidi_getPairedBracketType(c); +} + +static int32_t biDiGetMaxValue(const IntProperty &/*prop*/, UProperty which) { + return ubidi_getMaxValue(which); +} + +#if UCONFIG_NO_NORMALIZATION +static int32_t getCombiningClass(const IntProperty &, UChar32, UProperty) { + return 0; +} +#else +static int32_t getCombiningClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { + return u_getCombiningClass(c); +} +#endif + +static int32_t getGeneralCategory(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { + return (int32_t)u_charType(c); +} + +static int32_t getJoiningGroup(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { + return ubidi_getJoiningGroup(c); +} + +static int32_t getJoiningType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { + return ubidi_getJoiningType(c); +} + +static int32_t getNumericType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { + int32_t ntv=(int32_t)GET_NUMERIC_TYPE_VALUE(u_getMainProperties(c)); + return UPROPS_NTV_GET_TYPE(ntv); +} + +static int32_t getScript(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { + UErrorCode errorCode=U_ZERO_ERROR; + return (int32_t)uscript_getScript(c, &errorCode); +} + +static int32_t scriptGetMaxValue(const IntProperty &/*prop*/, UProperty /*which*/) { + uint32_t scriptX=uprv_getMaxValues(0)&UPROPS_SCRIPT_X_MASK; + return uprops_mergeScriptCodeOrIndex(scriptX); +} + +/* + * Map some of the Grapheme Cluster Break values to Hangul Syllable Types. + * Hangul_Syllable_Type is fully redundant with a subset of Grapheme_Cluster_Break. + */ +static const UHangulSyllableType gcbToHst[]={ + U_HST_NOT_APPLICABLE, /* U_GCB_OTHER */ + U_HST_NOT_APPLICABLE, /* U_GCB_CONTROL */ + U_HST_NOT_APPLICABLE, /* U_GCB_CR */ + U_HST_NOT_APPLICABLE, /* U_GCB_EXTEND */ + U_HST_LEADING_JAMO, /* U_GCB_L */ + U_HST_NOT_APPLICABLE, /* U_GCB_LF */ + U_HST_LV_SYLLABLE, /* U_GCB_LV */ + U_HST_LVT_SYLLABLE, /* U_GCB_LVT */ + U_HST_TRAILING_JAMO, /* U_GCB_T */ + U_HST_VOWEL_JAMO /* U_GCB_V */ + /* + * Omit GCB values beyond what we need for hst. + * The code below checks for the array length. + */ +}; + +static int32_t getHangulSyllableType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { + /* see comments on gcbToHst[] above */ + int32_t gcb=(int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_GCB_MASK)>>UPROPS_GCB_SHIFT; + if(gcb<UPRV_LENGTHOF(gcbToHst)) { + return gcbToHst[gcb]; + } else { + return U_HST_NOT_APPLICABLE; + } +} + +#if UCONFIG_NO_NORMALIZATION +static int32_t getNormQuickCheck(const IntProperty &, UChar32, UProperty) { + return 0; +} +#else +static int32_t getNormQuickCheck(const IntProperty &/*prop*/, UChar32 c, UProperty which) { + return (int32_t)unorm_getQuickCheck(c, (UNormalizationMode)(which-UCHAR_NFD_QUICK_CHECK+UNORM_NFD)); +} +#endif + +#if UCONFIG_NO_NORMALIZATION +static int32_t getLeadCombiningClass(const IntProperty &, UChar32, UProperty) { + return 0; +} +#else +static int32_t getLeadCombiningClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { + return unorm_getFCD16(c)>>8; +} +#endif + +#if UCONFIG_NO_NORMALIZATION +static int32_t getTrailCombiningClass(const IntProperty &, UChar32, UProperty) { + return 0; +} +#else +static int32_t getTrailCombiningClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { + return unorm_getFCD16(c)&0xff; +} +#endif + +static int32_t getInPC(const IntProperty &, UChar32 c, UProperty) { + return ulayout_ensureData() && gInpcTrie != nullptr ? ucptrie_get(gInpcTrie, c) : 0; +} + +static int32_t getInSC(const IntProperty &, UChar32 c, UProperty) { + return ulayout_ensureData() && gInscTrie != nullptr ? ucptrie_get(gInscTrie, c) : 0; +} + +static int32_t getVo(const IntProperty &, UChar32 c, UProperty) { + return ulayout_ensureData() && gVoTrie != nullptr ? ucptrie_get(gVoTrie, c) : 0; +} + +static int32_t layoutGetMaxValue(const IntProperty &/*prop*/, UProperty which) { + if (!ulayout_ensureData()) { return 0; } + switch (which) { + case UCHAR_INDIC_POSITIONAL_CATEGORY: + return gMaxInpcValue; + case UCHAR_INDIC_SYLLABIC_CATEGORY: + return gMaxInscValue; + case UCHAR_VERTICAL_ORIENTATION: + return gMaxVoValue; + default: + return 0; + } +} + +static const IntProperty intProps[UCHAR_INT_LIMIT-UCHAR_INT_START]={ + /* + * column, mask and shift values for int-value properties from u_getUnicodeProperties(). + * Must be in order of corresponding UProperty, + * and there must be exactly one entry per int UProperty. + * + * Properties with mask==0 are handled in code. + * For them, column is the UPropertySource value. + */ + { UPROPS_SRC_BIDI, 0, 0, getBiDiClass, biDiGetMaxValue }, + { 0, UPROPS_BLOCK_MASK, UPROPS_BLOCK_SHIFT, defaultGetValue, defaultGetMaxValue }, + { UPROPS_SRC_NFC, 0, 0xff, getCombiningClass, getMaxValueFromShift }, + { 2, UPROPS_DT_MASK, 0, defaultGetValue, defaultGetMaxValue }, + { 0, UPROPS_EA_MASK, UPROPS_EA_SHIFT, defaultGetValue, defaultGetMaxValue }, + { UPROPS_SRC_CHAR, 0, (int32_t)U_CHAR_CATEGORY_COUNT-1,getGeneralCategory, getMaxValueFromShift }, + { UPROPS_SRC_BIDI, 0, 0, getJoiningGroup, biDiGetMaxValue }, + { UPROPS_SRC_BIDI, 0, 0, getJoiningType, biDiGetMaxValue }, + { 2, UPROPS_LB_MASK, UPROPS_LB_SHIFT, defaultGetValue, defaultGetMaxValue }, + { UPROPS_SRC_CHAR, 0, (int32_t)U_NT_COUNT-1, getNumericType, getMaxValueFromShift }, + { UPROPS_SRC_PROPSVEC, 0, 0, getScript, scriptGetMaxValue }, + { UPROPS_SRC_PROPSVEC, 0, (int32_t)U_HST_COUNT-1, getHangulSyllableType, getMaxValueFromShift }, + // UCHAR_NFD_QUICK_CHECK: max=1=YES -- never "maybe", only "no" or "yes" + { UPROPS_SRC_NFC, 0, (int32_t)UNORM_YES, getNormQuickCheck, getMaxValueFromShift }, + // UCHAR_NFKD_QUICK_CHECK: max=1=YES -- never "maybe", only "no" or "yes" + { UPROPS_SRC_NFKC, 0, (int32_t)UNORM_YES, getNormQuickCheck, getMaxValueFromShift }, + // UCHAR_NFC_QUICK_CHECK: max=2=MAYBE + { UPROPS_SRC_NFC, 0, (int32_t)UNORM_MAYBE, getNormQuickCheck, getMaxValueFromShift }, + // UCHAR_NFKC_QUICK_CHECK: max=2=MAYBE + { UPROPS_SRC_NFKC, 0, (int32_t)UNORM_MAYBE, getNormQuickCheck, getMaxValueFromShift }, + { UPROPS_SRC_NFC, 0, 0xff, getLeadCombiningClass, getMaxValueFromShift }, + { UPROPS_SRC_NFC, 0, 0xff, getTrailCombiningClass, getMaxValueFromShift }, + { 2, UPROPS_GCB_MASK, UPROPS_GCB_SHIFT, defaultGetValue, defaultGetMaxValue }, + { 2, UPROPS_SB_MASK, UPROPS_SB_SHIFT, defaultGetValue, defaultGetMaxValue }, + { 2, UPROPS_WB_MASK, UPROPS_WB_SHIFT, defaultGetValue, defaultGetMaxValue }, + { UPROPS_SRC_BIDI, 0, 0, getBiDiPairedBracketType, biDiGetMaxValue }, + { UPROPS_SRC_INPC, 0, 0, getInPC, layoutGetMaxValue }, + { UPROPS_SRC_INSC, 0, 0, getInSC, layoutGetMaxValue }, + { UPROPS_SRC_VO, 0, 0, getVo, layoutGetMaxValue }, +}; + +U_CAPI int32_t U_EXPORT2 +u_getIntPropertyValue(UChar32 c, UProperty which) { + if(which<UCHAR_INT_START) { + if(UCHAR_BINARY_START<=which && which<UCHAR_BINARY_LIMIT) { + const BinaryProperty &prop=binProps[which]; + return prop.contains(prop, c, which); + } + } else if(which<UCHAR_INT_LIMIT) { + const IntProperty &prop=intProps[which-UCHAR_INT_START]; + return prop.getValue(prop, c, which); + } else if(which==UCHAR_GENERAL_CATEGORY_MASK) { + return U_MASK(u_charType(c)); + } + return 0; // undefined +} + +U_CAPI int32_t U_EXPORT2 +u_getIntPropertyMinValue(UProperty /*which*/) { + return 0; /* all binary/enum/int properties have a minimum value of 0 */ +} + +U_CAPI int32_t U_EXPORT2 +u_getIntPropertyMaxValue(UProperty which) { + if(which<UCHAR_INT_START) { + if(UCHAR_BINARY_START<=which && which<UCHAR_BINARY_LIMIT) { + return 1; // maximum TRUE for all binary properties + } + } else if(which<UCHAR_INT_LIMIT) { + const IntProperty &prop=intProps[which-UCHAR_INT_START]; + return prop.getMaxValue(prop, which); + } + return -1; // undefined +} + +U_CFUNC UPropertySource U_EXPORT2 +uprops_getSource(UProperty which) { + if(which<UCHAR_BINARY_START) { + return UPROPS_SRC_NONE; /* undefined */ + } else if(which<UCHAR_BINARY_LIMIT) { + const BinaryProperty &prop=binProps[which]; + if(prop.mask!=0) { + return UPROPS_SRC_PROPSVEC; + } else { + return (UPropertySource)prop.column; + } + } else if(which<UCHAR_INT_START) { + return UPROPS_SRC_NONE; /* undefined */ + } else if(which<UCHAR_INT_LIMIT) { + const IntProperty &prop=intProps[which-UCHAR_INT_START]; + if(prop.mask!=0) { + return UPROPS_SRC_PROPSVEC; + } else { + return (UPropertySource)prop.column; + } + } else if(which<UCHAR_STRING_START) { + switch(which) { + case UCHAR_GENERAL_CATEGORY_MASK: + case UCHAR_NUMERIC_VALUE: + return UPROPS_SRC_CHAR; + + default: + return UPROPS_SRC_NONE; + } + } else if(which<UCHAR_STRING_LIMIT) { + switch(which) { + case UCHAR_AGE: + return UPROPS_SRC_PROPSVEC; + + case UCHAR_BIDI_MIRRORING_GLYPH: + return UPROPS_SRC_BIDI; + + case UCHAR_CASE_FOLDING: + case UCHAR_LOWERCASE_MAPPING: + case UCHAR_SIMPLE_CASE_FOLDING: + case UCHAR_SIMPLE_LOWERCASE_MAPPING: + case UCHAR_SIMPLE_TITLECASE_MAPPING: + case UCHAR_SIMPLE_UPPERCASE_MAPPING: + case UCHAR_TITLECASE_MAPPING: + case UCHAR_UPPERCASE_MAPPING: + return UPROPS_SRC_CASE; + + case UCHAR_ISO_COMMENT: + case UCHAR_NAME: + case UCHAR_UNICODE_1_NAME: + return UPROPS_SRC_NAMES; + + default: + return UPROPS_SRC_NONE; + } + } else { + switch(which) { + case UCHAR_SCRIPT_EXTENSIONS: + return UPROPS_SRC_PROPSVEC; + default: + return UPROPS_SRC_NONE; /* undefined */ + } + } +} + +U_CFUNC void U_EXPORT2 +uprops_addPropertyStarts(UPropertySource src, const USetAdder *sa, UErrorCode *pErrorCode) { + if (!ulayout_ensureData(*pErrorCode)) { return; } + const UCPTrie *trie; + switch (src) { + case UPROPS_SRC_INPC: + trie = gInpcTrie; + break; + case UPROPS_SRC_INSC: + trie = gInscTrie; + break; + case UPROPS_SRC_VO: + trie = gVoTrie; + break; + default: + *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + if (trie == nullptr) { + *pErrorCode = U_MISSING_RESOURCE_ERROR; + return; + } + + // Add the start code point of each same-value range of the trie. + UChar32 start = 0, end; + while ((end = ucptrie_getRange(trie, start, UCPMAP_RANGE_NORMAL, 0, + nullptr, nullptr, nullptr)) >= 0) { + sa->add(sa->set, start); + start = end + 1; + } +} + +#if !UCONFIG_NO_NORMALIZATION + +U_CAPI int32_t U_EXPORT2 +u_getFC_NFKC_Closure(UChar32 c, UChar *dest, int32_t destCapacity, UErrorCode *pErrorCode) { + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return 0; + } + if(destCapacity<0 || (dest==NULL && destCapacity>0)) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + // Compute the FC_NFKC_Closure on the fly: + // We have the API for complete coverage of Unicode properties, although + // this value by itself is not useful via API. + // (What could be useful is a custom normalization table that combines + // case folding and NFKC.) + // For the derivation, see Unicode's DerivedNormalizationProps.txt. + const Normalizer2 *nfkc=Normalizer2::getNFKCInstance(*pErrorCode); + if(U_FAILURE(*pErrorCode)) { + return 0; + } + // first: b = NFKC(Fold(a)) + UnicodeString folded1String; + const UChar *folded1; + int32_t folded1Length=ucase_toFullFolding(c, &folded1, U_FOLD_CASE_DEFAULT); + if(folded1Length<0) { + const Normalizer2Impl *nfkcImpl=Normalizer2Factory::getImpl(nfkc); + if(nfkcImpl->getCompQuickCheck(nfkcImpl->getNorm16(c))!=UNORM_NO) { + return u_terminateUChars(dest, destCapacity, 0, pErrorCode); // c does not change at all under CaseFolding+NFKC + } + folded1String.setTo(c); + } else { + if(folded1Length>UCASE_MAX_STRING_LENGTH) { + folded1String.setTo(folded1Length); + } else { + folded1String.setTo(FALSE, folded1, folded1Length); + } + } + UnicodeString kc1=nfkc->normalize(folded1String, *pErrorCode); + // second: c = NFKC(Fold(b)) + UnicodeString folded2String(kc1); + UnicodeString kc2=nfkc->normalize(folded2String.foldCase(), *pErrorCode); + // if (c != b) add the mapping from a to c + if(U_FAILURE(*pErrorCode) || kc1==kc2) { + return u_terminateUChars(dest, destCapacity, 0, pErrorCode); + } else { + return kc2.extract(dest, destCapacity, *pErrorCode); + } +} + +#endif diff --git a/thirdparty/icu4c/common/uprops.h b/thirdparty/icu4c/common/uprops.h new file mode 100644 index 0000000000..8bf929919f --- /dev/null +++ b/thirdparty/icu4c/common/uprops.h @@ -0,0 +1,504 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2002-2016, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: uprops.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2002feb24 +* created by: Markus W. Scherer +* +* Constants for mostly non-core Unicode character properties +* stored in uprops.icu. +*/ + +#ifndef __UPROPS_H__ +#define __UPROPS_H__ + +#include "unicode/utypes.h" +#include "unicode/uset.h" +#include "uset_imp.h" +#include "udataswp.h" + +/* indexes[] entries */ +enum { + UPROPS_PROPS32_INDEX, + UPROPS_EXCEPTIONS_INDEX, + UPROPS_EXCEPTIONS_TOP_INDEX, + + UPROPS_ADDITIONAL_TRIE_INDEX, + UPROPS_ADDITIONAL_VECTORS_INDEX, + UPROPS_ADDITIONAL_VECTORS_COLUMNS_INDEX, + + UPROPS_SCRIPT_EXTENSIONS_INDEX, + + UPROPS_RESERVED_INDEX_7, + UPROPS_RESERVED_INDEX_8, + + /* size of the data file (number of 32-bit units after the header) */ + UPROPS_DATA_TOP_INDEX, + + /* maximum values for code values in vector word 0 */ + UPROPS_MAX_VALUES_INDEX=10, + /* maximum values for code values in vector word 2 */ + UPROPS_MAX_VALUES_2_INDEX, + + UPROPS_INDEX_COUNT=16 +}; + +/* definitions for the main properties words */ +enum { + /* general category shift==0 0 (5 bits) */ + /* reserved 5 (1 bit) */ + UPROPS_NUMERIC_TYPE_VALUE_SHIFT=6 /* 6 (10 bits) */ +}; + +#define GET_CATEGORY(props) ((props)&0x1f) +#define CAT_MASK(props) U_MASK(GET_CATEGORY(props)) + +#define GET_NUMERIC_TYPE_VALUE(props) ((props)>>UPROPS_NUMERIC_TYPE_VALUE_SHIFT) + +/* constants for the storage form of numeric types and values */ +enum { + /** No numeric value. */ + UPROPS_NTV_NONE=0, + /** Decimal digits: nv=0..9 */ + UPROPS_NTV_DECIMAL_START=1, + /** Other digits: nv=0..9 */ + UPROPS_NTV_DIGIT_START=11, + /** Small integers: nv=0..154 */ + UPROPS_NTV_NUMERIC_START=21, + /** Fractions: ((ntv>>4)-12) / ((ntv&0xf)+1) = -1..17 / 1..16 */ + UPROPS_NTV_FRACTION_START=0xb0, + /** + * Large integers: + * ((ntv>>5)-14) * 10^((ntv&0x1f)+2) = (1..9)*(10^2..10^33) + * (only one significant decimal digit) + */ + UPROPS_NTV_LARGE_START=0x1e0, + /** + * Sexagesimal numbers: + * ((ntv>>2)-0xbf) * 60^((ntv&3)+1) = (1..9)*(60^1..60^4) + */ + UPROPS_NTV_BASE60_START=0x300, + /** + * Fraction-20 values: + * frac20 = ntv-0x324 = 0..0x17 -> 1|3|5|7 / 20|40|80|160|320|640 + * numerator: num = 2*(frac20&3)+1 + * denominator: den = 20<<(frac20>>2) + */ + UPROPS_NTV_FRACTION20_START=UPROPS_NTV_BASE60_START+36, // 0x300+9*4=0x324 + /** + * Fraction-32 values: + * frac32 = ntv-0x34c = 0..15 -> 1|3|5|7 / 32|64|128|256 + * numerator: num = 2*(frac32&3)+1 + * denominator: den = 32<<(frac32>>2) + */ + UPROPS_NTV_FRACTION32_START=UPROPS_NTV_FRACTION20_START+24, // 0x324+6*4=0x34c + /** No numeric value (yet). */ + UPROPS_NTV_RESERVED_START=UPROPS_NTV_FRACTION32_START+16, // 0x34c+4*4=0x35c + + UPROPS_NTV_MAX_SMALL_INT=UPROPS_NTV_FRACTION_START-UPROPS_NTV_NUMERIC_START-1 +}; + +#define UPROPS_NTV_GET_TYPE(ntv) \ + ((ntv==UPROPS_NTV_NONE) ? U_NT_NONE : \ + (ntv<UPROPS_NTV_DIGIT_START) ? U_NT_DECIMAL : \ + (ntv<UPROPS_NTV_NUMERIC_START) ? U_NT_DIGIT : \ + U_NT_NUMERIC) + +/* number of properties vector words */ +#define UPROPS_VECTOR_WORDS 3 + +/* + * Properties in vector word 0 + * Bits + * 31..24 DerivedAge version major/minor one nibble each + * 23..22 3..1: Bits 21..20 & 7..0 = Script_Extensions index + * 3: Script value from Script_Extensions + * 2: Script=Inherited + * 1: Script=Common + * 0: Script=bits 21..20 & 7..0 + * 21..20 Bits 9..8 of the UScriptCode, or index to Script_Extensions + * 19..17 East Asian Width + * 16.. 8 UBlockCode + * 7.. 0 UScriptCode, or index to Script_Extensions + */ + +/* derived age: one nibble each for major and minor version numbers */ +#define UPROPS_AGE_MASK 0xff000000 +#define UPROPS_AGE_SHIFT 24 + +/* Script_Extensions: mask includes Script */ +#define UPROPS_SCRIPT_X_MASK 0x00f000ff +#define UPROPS_SCRIPT_X_SHIFT 22 + +// The UScriptCode or Script_Extensions index is split across two bit fields. +// (Starting with Unicode 13/ICU 66/2019 due to more varied Script_Extensions.) +// Shift the high bits right by 12 to assemble the full value. +#define UPROPS_SCRIPT_HIGH_MASK 0x00300000 +#define UPROPS_SCRIPT_HIGH_SHIFT 12 +#define UPROPS_MAX_SCRIPT 0x3ff + +#define UPROPS_EA_MASK 0x000e0000 +#define UPROPS_EA_SHIFT 17 + +#define UPROPS_BLOCK_MASK 0x0001ff00 +#define UPROPS_BLOCK_SHIFT 8 + +#define UPROPS_SCRIPT_LOW_MASK 0x000000ff + +/* UPROPS_SCRIPT_X_WITH_COMMON must be the lowest value that involves Script_Extensions. */ +#define UPROPS_SCRIPT_X_WITH_COMMON 0x400000 +#define UPROPS_SCRIPT_X_WITH_INHERITED 0x800000 +#define UPROPS_SCRIPT_X_WITH_OTHER 0xc00000 + +#ifdef __cplusplus + +namespace { + +inline uint32_t uprops_mergeScriptCodeOrIndex(uint32_t scriptX) { + return + ((scriptX & UPROPS_SCRIPT_HIGH_MASK) >> UPROPS_SCRIPT_HIGH_SHIFT) | + (scriptX & UPROPS_SCRIPT_LOW_MASK); +} + +} // namespace + +#endif // __cplusplus + +/* + * Properties in vector word 1 + * Each bit encodes one binary property. + * The following constants represent the bit number, use 1<<UPROPS_XYZ. + * UPROPS_BINARY_1_TOP<=32! + * + * Keep this list of property enums in sync with + * propListNames[] in icu/source/tools/genprops/props2.c! + * + * ICU 2.6/uprops format version 3.2 stores full properties instead of "Other_". + */ +enum { + UPROPS_WHITE_SPACE, + UPROPS_DASH, + UPROPS_HYPHEN, + UPROPS_QUOTATION_MARK, + UPROPS_TERMINAL_PUNCTUATION, + UPROPS_MATH, + UPROPS_HEX_DIGIT, + UPROPS_ASCII_HEX_DIGIT, + UPROPS_ALPHABETIC, + UPROPS_IDEOGRAPHIC, + UPROPS_DIACRITIC, + UPROPS_EXTENDER, + UPROPS_NONCHARACTER_CODE_POINT, + UPROPS_GRAPHEME_EXTEND, + UPROPS_GRAPHEME_LINK, + UPROPS_IDS_BINARY_OPERATOR, + UPROPS_IDS_TRINARY_OPERATOR, + UPROPS_RADICAL, + UPROPS_UNIFIED_IDEOGRAPH, + UPROPS_DEFAULT_IGNORABLE_CODE_POINT, + UPROPS_DEPRECATED, + UPROPS_LOGICAL_ORDER_EXCEPTION, + UPROPS_XID_START, + UPROPS_XID_CONTINUE, + UPROPS_ID_START, /* ICU 2.6, uprops format version 3.2 */ + UPROPS_ID_CONTINUE, + UPROPS_GRAPHEME_BASE, + UPROPS_S_TERM, /* new in ICU 3.0 and Unicode 4.0.1 */ + UPROPS_VARIATION_SELECTOR, + UPROPS_PATTERN_SYNTAX, /* new in ICU 3.4 and Unicode 4.1 */ + UPROPS_PATTERN_WHITE_SPACE, + UPROPS_PREPENDED_CONCATENATION_MARK, // new in ICU 60 and Unicode 10 + UPROPS_BINARY_1_TOP /* ==32 - full! */ +}; + +/* + * Properties in vector word 2 + * Bits + * 31..26 http://www.unicode.org/reports/tr51/#Emoji_Properties + * 25..20 Line Break + * 19..15 Sentence Break + * 14..10 Word Break + * 9.. 5 Grapheme Cluster Break + * 4.. 0 Decomposition Type + */ +enum { + UPROPS_2_EXTENDED_PICTOGRAPHIC=26, + UPROPS_2_EMOJI_COMPONENT, + UPROPS_2_EMOJI, + UPROPS_2_EMOJI_PRESENTATION, + UPROPS_2_EMOJI_MODIFIER, + UPROPS_2_EMOJI_MODIFIER_BASE +}; + +#define UPROPS_LB_MASK 0x03f00000 +#define UPROPS_LB_SHIFT 20 + +#define UPROPS_SB_MASK 0x000f8000 +#define UPROPS_SB_SHIFT 15 + +#define UPROPS_WB_MASK 0x00007c00 +#define UPROPS_WB_SHIFT 10 + +#define UPROPS_GCB_MASK 0x000003e0 +#define UPROPS_GCB_SHIFT 5 + +#define UPROPS_DT_MASK 0x0000001f + +/** + * Gets the main properties value for a code point. + * Implemented in uchar.c for uprops.cpp. + */ +U_CFUNC uint32_t +u_getMainProperties(UChar32 c); + +/** + * Get a properties vector word for a code point. + * Implemented in uchar.c for uprops.cpp. + * @return 0 if no data or illegal argument + */ +U_CFUNC uint32_t +u_getUnicodeProperties(UChar32 c, int32_t column); + +/** + * Get the the maximum values for some enum/int properties. + * Use the same column numbers as for u_getUnicodeProperties(). + * The returned value will contain maximum values stored in the same bit fields + * as where the enum values are stored in the u_getUnicodeProperties() + * return values for the same columns. + * + * Valid columns are those for properties words that contain enumerated values. + * (ICU 2.6: columns 0 and 2) + * For other column numbers, this function will return 0. + * + * @internal + */ +U_CFUNC int32_t +uprv_getMaxValues(int32_t column); + +/** + * Checks if c is alphabetic, or a decimal digit; implements UCHAR_POSIX_ALNUM. + * @internal + */ +U_CFUNC UBool +u_isalnumPOSIX(UChar32 c); + +/** + * Checks if c is in + * [^\p{space}\p{gc=Control}\p{gc=Surrogate}\p{gc=Unassigned}] + * with space=\p{Whitespace} and Control=Cc. + * Implements UCHAR_POSIX_GRAPH. + * @internal + */ +U_CFUNC UBool +u_isgraphPOSIX(UChar32 c); + +/** + * Checks if c is in \p{graph}\p{blank} - \p{cntrl}. + * Implements UCHAR_POSIX_PRINT. + * @internal + */ +U_CFUNC UBool +u_isprintPOSIX(UChar32 c); + +/** Turn a bit index into a bit flag. @internal */ +#define FLAG(n) ((uint32_t)1<<(n)) + +/** Flags for general categories in the order of UCharCategory. @internal */ +#define _Cn FLAG(U_GENERAL_OTHER_TYPES) +#define _Lu FLAG(U_UPPERCASE_LETTER) +#define _Ll FLAG(U_LOWERCASE_LETTER) +#define _Lt FLAG(U_TITLECASE_LETTER) +#define _Lm FLAG(U_MODIFIER_LETTER) +/* #define _Lo FLAG(U_OTHER_LETTER) -- conflicts with MS Visual Studio 9.0 xiosbase */ +#define _Mn FLAG(U_NON_SPACING_MARK) +#define _Me FLAG(U_ENCLOSING_MARK) +#define _Mc FLAG(U_COMBINING_SPACING_MARK) +#define _Nd FLAG(U_DECIMAL_DIGIT_NUMBER) +#define _Nl FLAG(U_LETTER_NUMBER) +#define _No FLAG(U_OTHER_NUMBER) +#define _Zs FLAG(U_SPACE_SEPARATOR) +#define _Zl FLAG(U_LINE_SEPARATOR) +#define _Zp FLAG(U_PARAGRAPH_SEPARATOR) +#define _Cc FLAG(U_CONTROL_CHAR) +#define _Cf FLAG(U_FORMAT_CHAR) +#define _Co FLAG(U_PRIVATE_USE_CHAR) +#define _Cs FLAG(U_SURROGATE) +#define _Pd FLAG(U_DASH_PUNCTUATION) +#define _Ps FLAG(U_START_PUNCTUATION) +/* #define _Pe FLAG(U_END_PUNCTUATION) -- conflicts with MS Visual Studio 9.0 xlocnum */ +/* #define _Pc FLAG(U_CONNECTOR_PUNCTUATION) -- conflicts with MS Visual Studio 9.0 streambuf */ +#define _Po FLAG(U_OTHER_PUNCTUATION) +#define _Sm FLAG(U_MATH_SYMBOL) +#define _Sc FLAG(U_CURRENCY_SYMBOL) +#define _Sk FLAG(U_MODIFIER_SYMBOL) +#define _So FLAG(U_OTHER_SYMBOL) +#define _Pi FLAG(U_INITIAL_PUNCTUATION) +/* #define _Pf FLAG(U_FINAL_PUNCTUATION) -- conflicts with MS Visual Studio 9.0 streambuf */ + +/** Some code points. @internal */ +enum { + TAB =0x0009, + LF =0x000a, + FF =0x000c, + CR =0x000d, + U_A =0x0041, + U_F =0x0046, + U_Z =0x005a, + U_a =0x0061, + U_f =0x0066, + U_z =0x007a, + DEL =0x007f, + NL =0x0085, + NBSP =0x00a0, + CGJ =0x034f, + FIGURESP=0x2007, + HAIRSP =0x200a, + ZWNJ =0x200c, + ZWJ =0x200d, + RLM =0x200f, + NNBSP =0x202f, + WJ =0x2060, + INHSWAP =0x206a, + NOMDIG =0x206f, + U_FW_A =0xff21, + U_FW_F =0xff26, + U_FW_Z =0xff3a, + U_FW_a =0xff41, + U_FW_f =0xff46, + U_FW_z =0xff5a, + ZWNBSP =0xfeff +}; + +/** + * Get the maximum length of a (regular/1.0/extended) character name. + * @return 0 if no character names available. + */ +U_CAPI int32_t U_EXPORT2 +uprv_getMaxCharNameLength(void); + +/** + * Fills set with characters that are used in Unicode character names. + * Includes all characters that are used in regular/Unicode 1.0/extended names. + * Just empties the set if no character names are available. + * @param sa USetAdder to receive characters. + */ +U_CAPI void U_EXPORT2 +uprv_getCharNameCharacters(const USetAdder *sa); + +/** + * Constants for which data and implementation files provide which properties. + * Used by UnicodeSet for service-specific property enumeration. + * @internal + */ +enum UPropertySource { + /** No source, not a supported property. */ + UPROPS_SRC_NONE, + /** From uchar.c/uprops.icu main trie */ + UPROPS_SRC_CHAR, + /** From uchar.c/uprops.icu properties vectors trie */ + UPROPS_SRC_PROPSVEC, + /** From unames.c/unames.icu */ + UPROPS_SRC_NAMES, + /** From ucase.c/ucase.icu */ + UPROPS_SRC_CASE, + /** From ubidi_props.c/ubidi.icu */ + UPROPS_SRC_BIDI, + /** From uchar.c/uprops.icu main trie as well as properties vectors trie */ + UPROPS_SRC_CHAR_AND_PROPSVEC, + /** From ucase.c/ucase.icu as well as unorm.cpp/unorm.icu */ + UPROPS_SRC_CASE_AND_NORM, + /** From normalizer2impl.cpp/nfc.nrm */ + UPROPS_SRC_NFC, + /** From normalizer2impl.cpp/nfkc.nrm */ + UPROPS_SRC_NFKC, + /** From normalizer2impl.cpp/nfkc_cf.nrm */ + UPROPS_SRC_NFKC_CF, + /** From normalizer2impl.cpp/nfc.nrm canonical iterator data */ + UPROPS_SRC_NFC_CANON_ITER, + // Text layout properties. + UPROPS_SRC_INPC, + UPROPS_SRC_INSC, + UPROPS_SRC_VO, + /** One more than the highest UPropertySource (UPROPS_SRC_) constant. */ + UPROPS_SRC_COUNT +}; +typedef enum UPropertySource UPropertySource; + +/** + * @see UPropertySource + * @internal + */ +U_CFUNC UPropertySource U_EXPORT2 +uprops_getSource(UProperty which); + +/** + * Enumerate uprops.icu's main data trie and add the + * start of each range of same properties to the set. + * @internal + */ +U_CFUNC void U_EXPORT2 +uchar_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode); + +/** + * Enumerate uprops.icu's properties vectors trie and add the + * start of each range of same properties to the set. + * @internal + */ +U_CFUNC void U_EXPORT2 +upropsvec_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode); + +U_CFUNC void U_EXPORT2 +uprops_addPropertyStarts(UPropertySource src, const USetAdder *sa, UErrorCode *pErrorCode); + +/** + * Return a set of characters for property enumeration. + * For each two consecutive characters (start, limit) in the set, + * all of the properties for start..limit-1 are all the same. + * + * @param sa USetAdder to receive result. Existing contents are lost. + * @internal + */ +/*U_CFUNC void U_EXPORT2 +uprv_getInclusions(const USetAdder *sa, UErrorCode *pErrorCode); +*/ + +/** + * Swap the ICU Unicode character names file. See uchar.c. + * @internal + */ +U_CAPI int32_t U_EXPORT2 +uchar_swapNames(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode); + +#ifdef __cplusplus + +U_NAMESPACE_BEGIN + +class UnicodeSet; + +class CharacterProperties { +public: + CharacterProperties() = delete; + static const UnicodeSet *getInclusionsForProperty(UProperty prop, UErrorCode &errorCode); +}; + +// implemented in uniset_props.cpp +U_CFUNC UnicodeSet * +uniset_getUnicode32Instance(UErrorCode &errorCode); + +U_NAMESPACE_END + +#endif + +#endif diff --git a/thirdparty/icu4c/common/ures_cnv.cpp b/thirdparty/icu4c/common/ures_cnv.cpp new file mode 100644 index 0000000000..1aa58e753c --- /dev/null +++ b/thirdparty/icu4c/common/ures_cnv.cpp @@ -0,0 +1,78 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 1997-2006, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: ures_cnv.c +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2004aug25 +* created by: Markus W. Scherer +* +* Character conversion functions moved here from uresbund.c +*/ + +#include "unicode/utypes.h" +#include "unicode/putil.h" +#include "unicode/ustring.h" +#include "unicode/ucnv.h" +#include "unicode/ures.h" +#include "uinvchar.h" +#include "ustr_cnv.h" + +U_CAPI UResourceBundle * U_EXPORT2 +ures_openU(const UChar *myPath, + const char *localeID, + UErrorCode *status) +{ + char pathBuffer[1024]; + int32_t length; + char *path = pathBuffer; + + if(status==NULL || U_FAILURE(*status)) { + return NULL; + } + if(myPath==NULL) { + path = NULL; + } + else { + length=u_strlen(myPath); + if(length>=(int32_t)sizeof(pathBuffer)) { + *status=U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } else if(uprv_isInvariantUString(myPath, length)) { + /* + * the invariant converter is sufficient for package and tree names + * and is more efficient + */ + u_UCharsToChars(myPath, path, length+1); /* length+1 to include the NUL */ + } else { +#if !UCONFIG_NO_CONVERSION + /* use the default converter to support variant-character paths */ + UConverter *cnv=u_getDefaultConverter(status); + length=ucnv_fromUChars(cnv, path, (int32_t)sizeof(pathBuffer), myPath, length, status); + u_releaseDefaultConverter(cnv); + if(U_FAILURE(*status)) { + return NULL; + } + if(length>=(int32_t)sizeof(pathBuffer)) { + /* not NUL-terminated - path too long */ + *status=U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } +#else + /* the default converter is not available */ + *status=U_UNSUPPORTED_ERROR; + return NULL; +#endif + } + } + + return ures_open(path, localeID, status); +} diff --git a/thirdparty/icu4c/common/uresbund.cpp b/thirdparty/icu4c/common/uresbund.cpp new file mode 100644 index 0000000000..2ece87897d --- /dev/null +++ b/thirdparty/icu4c/common/uresbund.cpp @@ -0,0 +1,3090 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* Copyright (C) 1997-2016, International Business Machines Corporation and +* others. All Rights Reserved. +****************************************************************************** +* +* File uresbund.cpp +* +* Modification History: +* +* Date Name Description +* 04/01/97 aliu Creation. +* 06/14/99 stephen Removed functions taking a filename suffix. +* 07/20/99 stephen Changed for UResourceBundle typedef'd to void* +* 11/09/99 weiv Added ures_getLocale() +* March 2000 weiv Total overhaul - using data in DLLs +* 06/20/2000 helena OS/400 port changes; mostly typecast. +* 06/24/02 weiv Added support for resource sharing +****************************************************************************** +*/ + +#include "unicode/ures.h" +#include "unicode/ustring.h" +#include "unicode/ucnv.h" +#include "charstr.h" +#include "uresimp.h" +#include "ustr_imp.h" +#include "cwchar.h" +#include "ucln_cmn.h" +#include "cmemory.h" +#include "cstring.h" +#include "mutex.h" +#include "uhash.h" +#include "unicode/uenum.h" +#include "uenumimp.h" +#include "ulocimp.h" +#include "umutex.h" +#include "putilimp.h" +#include "uassert.h" +#include "uresdata.h" + +using namespace icu; + +/* +Static cache for already opened resource bundles - mostly for keeping fallback info +TODO: This cache should probably be removed when the deprecated code is + completely removed. +*/ +static UHashtable *cache = NULL; +static icu::UInitOnce gCacheInitOnce = U_INITONCE_INITIALIZER; + +static UMutex resbMutex; + +/* INTERNAL: hashes an entry */ +static int32_t U_CALLCONV hashEntry(const UHashTok parm) { + UResourceDataEntry *b = (UResourceDataEntry *)parm.pointer; + UHashTok namekey, pathkey; + namekey.pointer = b->fName; + pathkey.pointer = b->fPath; + return uhash_hashChars(namekey)+37u*uhash_hashChars(pathkey); +} + +/* INTERNAL: compares two entries */ +static UBool U_CALLCONV compareEntries(const UHashTok p1, const UHashTok p2) { + UResourceDataEntry *b1 = (UResourceDataEntry *)p1.pointer; + UResourceDataEntry *b2 = (UResourceDataEntry *)p2.pointer; + UHashTok name1, name2, path1, path2; + name1.pointer = b1->fName; + name2.pointer = b2->fName; + path1.pointer = b1->fPath; + path2.pointer = b2->fPath; + return (UBool)(uhash_compareChars(name1, name2) && + uhash_compareChars(path1, path2)); +} + + +/** + * Internal function, gets parts of locale name according + * to the position of '_' character + */ +static UBool chopLocale(char *name) { + char *i = uprv_strrchr(name, '_'); + + if(i != NULL) { + *i = '\0'; + return TRUE; + } + + return FALSE; +} + +/** + * Internal function + */ +static void entryIncrease(UResourceDataEntry *entry) { + Mutex lock(&resbMutex); + entry->fCountExisting++; + while(entry->fParent != NULL) { + entry = entry->fParent; + entry->fCountExisting++; + } +} + +/** + * Internal function. Tries to find a resource in given Resource + * Bundle, as well as in its parents + */ +static const ResourceData *getFallbackData(const UResourceBundle* resBundle, const char* * resTag, UResourceDataEntry* *realData, Resource *res, UErrorCode *status) { + UResourceDataEntry *resB = resBundle->fData; + int32_t indexR = -1; + int32_t i = 0; + *res = RES_BOGUS; + if(resB != NULL) { + if(resB->fBogus == U_ZERO_ERROR) { /* if this resource is real, */ + *res = res_getTableItemByKey(&(resB->fData), resB->fData.rootRes, &indexR, resTag); /* try to get data from there */ + i++; + } + if(resBundle->fHasFallback == TRUE) { + while(*res == RES_BOGUS && resB->fParent != NULL) { /* Otherwise, we'll look in parents */ + resB = resB->fParent; + if(resB->fBogus == U_ZERO_ERROR) { + i++; + *res = res_getTableItemByKey(&(resB->fData), resB->fData.rootRes, &indexR, resTag); + } + } + } + + if(*res != RES_BOGUS) { /* If the resource is found in parents, we need to adjust the error */ + if(i>1) { + if(uprv_strcmp(resB->fName, uloc_getDefault())==0 || uprv_strcmp(resB->fName, kRootLocaleName)==0) { + *status = U_USING_DEFAULT_WARNING; + } else { + *status = U_USING_FALLBACK_WARNING; + } + } + *realData = resB; + return (&(resB->fData)); + } else { /* If resource is not found, we need to give an error */ + *status = U_MISSING_RESOURCE_ERROR; + return NULL; + } + } else { + *status = U_MISSING_RESOURCE_ERROR; + return NULL; + } +} + +static void +free_entry(UResourceDataEntry *entry) { + UResourceDataEntry *alias; + res_unload(&(entry->fData)); + if(entry->fName != NULL && entry->fName != entry->fNameBuffer) { + uprv_free(entry->fName); + } + if(entry->fPath != NULL) { + uprv_free(entry->fPath); + } + if(entry->fPool != NULL) { + --entry->fPool->fCountExisting; + } + alias = entry->fAlias; + if(alias != NULL) { + while(alias->fAlias != NULL) { + alias = alias->fAlias; + } + --alias->fCountExisting; + } + uprv_free(entry); +} + +/* Works just like ucnv_flushCache() */ +static int32_t ures_flushCache() +{ + UResourceDataEntry *resB; + int32_t pos; + int32_t rbDeletedNum = 0; + const UHashElement *e; + UBool deletedMore; + + /*if shared data hasn't even been lazy evaluated yet + * return 0 + */ + Mutex lock(&resbMutex); + if (cache == NULL) { + return 0; + } + + do { + deletedMore = FALSE; + /*creates an enumeration to iterate through every element in the table */ + pos = UHASH_FIRST; + while ((e = uhash_nextElement(cache, &pos)) != NULL) + { + resB = (UResourceDataEntry *) e->value.pointer; + /* Deletes only if reference counter == 0 + * Don't worry about the children of this node. + * Those will eventually get deleted too, if not already. + * Don't worry about the parents of this node. + * Those will eventually get deleted too, if not already. + */ + /* 04/05/2002 [weiv] fCountExisting should now be accurate. If it's not zero, that means that */ + /* some resource bundles are still open somewhere. */ + + if (resB->fCountExisting == 0) { + rbDeletedNum++; + deletedMore = TRUE; + uhash_removeElement(cache, e); + free_entry(resB); + } + } + /* + * Do it again to catch bundles (aliases, pool bundle) whose fCountExisting + * got decremented by free_entry(). + */ + } while(deletedMore); + + return rbDeletedNum; +} + +#ifdef URES_DEBUG +#include <stdio.h> + +U_CAPI UBool U_EXPORT2 ures_dumpCacheContents(void) { + UBool cacheNotEmpty = FALSE; + int32_t pos = UHASH_FIRST; + const UHashElement *e; + UResourceDataEntry *resB; + + Mutex lock(&resbMutex); + if (cache == NULL) { + fprintf(stderr,"%s:%d: RB Cache is NULL.\n", __FILE__, __LINE__); + return FALSE; + } + + while ((e = uhash_nextElement(cache, &pos)) != NULL) { + cacheNotEmpty=TRUE; + resB = (UResourceDataEntry *) e->value.pointer; + fprintf(stderr,"%s:%d: RB Cache: Entry @0x%p, refcount %d, name %s:%s. Pool 0x%p, alias 0x%p, parent 0x%p\n", + __FILE__, __LINE__, + (void*)resB, resB->fCountExisting, + resB->fName?resB->fName:"NULL", + resB->fPath?resB->fPath:"NULL", + (void*)resB->fPool, + (void*)resB->fAlias, + (void*)resB->fParent); + } + + fprintf(stderr,"%s:%d: RB Cache still contains %d items.\n", __FILE__, __LINE__, uhash_count(cache)); + return cacheNotEmpty; +} + +#endif + +static UBool U_CALLCONV ures_cleanup(void) +{ + if (cache != NULL) { + ures_flushCache(); + uhash_close(cache); + cache = NULL; + } + gCacheInitOnce.reset(); + return TRUE; +} + +/** INTERNAL: Initializes the cache for resources */ +static void U_CALLCONV createCache(UErrorCode &status) { + U_ASSERT(cache == NULL); + cache = uhash_open(hashEntry, compareEntries, NULL, &status); + ucln_common_registerCleanup(UCLN_COMMON_URES, ures_cleanup); +} + +static void initCache(UErrorCode *status) { + umtx_initOnce(gCacheInitOnce, &createCache, *status); +} + +/** INTERNAL: sets the name (locale) of the resource bundle to given name */ + +static void setEntryName(UResourceDataEntry *res, const char *name, UErrorCode *status) { + int32_t len = (int32_t)uprv_strlen(name); + if(res->fName != NULL && res->fName != res->fNameBuffer) { + uprv_free(res->fName); + } + if (len < (int32_t)sizeof(res->fNameBuffer)) { + res->fName = res->fNameBuffer; + } + else { + res->fName = (char *)uprv_malloc(len+1); + } + if(res->fName == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + } else { + uprv_strcpy(res->fName, name); + } +} + +static UResourceDataEntry * +getPoolEntry(const char *path, UErrorCode *status); + +/** + * INTERNAL: Inits and opens an entry from a data DLL. + * CAUTION: resbMutex must be locked when calling this function. + */ +static UResourceDataEntry *init_entry(const char *localeID, const char *path, UErrorCode *status) { + UResourceDataEntry *r = NULL; + UResourceDataEntry find; + /*int32_t hashValue;*/ + const char *name; + char aliasName[100] = { 0 }; + int32_t aliasLen = 0; + /*UBool isAlias = FALSE;*/ + /*UHashTok hashkey; */ + + if(U_FAILURE(*status)) { + return NULL; + } + + /* here we try to deduce the right locale name */ + if(localeID == NULL) { /* if localeID is NULL, we're trying to open default locale */ + name = uloc_getDefault(); + } else if(*localeID == 0) { /* if localeID is "" then we try to open root locale */ + name = kRootLocaleName; + } else { /* otherwise, we'll open what we're given */ + name = localeID; + } + + find.fName = (char *)name; + find.fPath = (char *)path; + + /* calculate the hash value of the entry */ + /*hashkey.pointer = (void *)&find;*/ + /*hashValue = hashEntry(hashkey);*/ + + /* check to see if we already have this entry */ + r = (UResourceDataEntry *)uhash_get(cache, &find); + if(r == NULL) { + /* if the entry is not yet in the hash table, we'll try to construct a new one */ + r = (UResourceDataEntry *) uprv_malloc(sizeof(UResourceDataEntry)); + if(r == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + + uprv_memset(r, 0, sizeof(UResourceDataEntry)); + /*r->fHashKey = hashValue;*/ + + setEntryName(r, name, status); + if (U_FAILURE(*status)) { + uprv_free(r); + return NULL; + } + + if(path != NULL) { + r->fPath = (char *)uprv_strdup(path); + if(r->fPath == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + uprv_free(r); + return NULL; + } + } + + /* this is the actual loading */ + res_load(&(r->fData), r->fPath, r->fName, status); + + if (U_FAILURE(*status)) { + /* if we failed to load due to an out-of-memory error, exit early. */ + if (*status == U_MEMORY_ALLOCATION_ERROR) { + uprv_free(r); + return NULL; + } + /* we have no such entry in dll, so it will always use fallback */ + *status = U_USING_FALLBACK_WARNING; + r->fBogus = U_USING_FALLBACK_WARNING; + } else { /* if we have a regular entry */ + Resource aliasres; + if (r->fData.usesPoolBundle) { + r->fPool = getPoolEntry(r->fPath, status); + if (U_SUCCESS(*status)) { + const int32_t *poolIndexes = r->fPool->fData.pRoot + 1; + if(r->fData.pRoot[1 + URES_INDEX_POOL_CHECKSUM] == poolIndexes[URES_INDEX_POOL_CHECKSUM]) { + r->fData.poolBundleKeys = (const char *)(poolIndexes + (poolIndexes[URES_INDEX_LENGTH] & 0xff)); + r->fData.poolBundleStrings = r->fPool->fData.p16BitUnits; + } else { + r->fBogus = *status = U_INVALID_FORMAT_ERROR; + } + } else { + r->fBogus = *status; + } + } + if (U_SUCCESS(*status)) { + /* handle the alias by trying to get out the %%Alias tag.*/ + /* We'll try to get alias string from the bundle */ + aliasres = res_getResource(&(r->fData), "%%ALIAS"); + if (aliasres != RES_BOGUS) { + // No tracing: called during initial data loading + const UChar *alias = res_getStringNoTrace(&(r->fData), aliasres, &aliasLen); + if(alias != NULL && aliasLen > 0) { /* if there is actual alias - unload and load new data */ + u_UCharsToChars(alias, aliasName, aliasLen+1); + r->fAlias = init_entry(aliasName, path, status); + } + } + } + } + + { + UResourceDataEntry *oldR = NULL; + if((oldR = (UResourceDataEntry *)uhash_get(cache, r)) == NULL) { /* if the data is not cached */ + /* just insert it in the cache */ + UErrorCode cacheStatus = U_ZERO_ERROR; + uhash_put(cache, (void *)r, r, &cacheStatus); + if (U_FAILURE(cacheStatus)) { + *status = cacheStatus; + free_entry(r); + r = NULL; + } + } else { + /* somebody have already inserted it while we were working, discard newly opened data */ + /* Also, we could get here IF we opened an alias */ + free_entry(r); + r = oldR; + } + } + + } + if(r != NULL) { + /* return the real bundle */ + while(r->fAlias != NULL) { + r = r->fAlias; + } + r->fCountExisting++; /* we increase its reference count */ + /* if the resource has a warning */ + /* we don't want to overwrite a status with no error */ + if(r->fBogus != U_ZERO_ERROR && U_SUCCESS(*status)) { + *status = r->fBogus; /* set the returning status */ + } + } + return r; +} + +static UResourceDataEntry * +getPoolEntry(const char *path, UErrorCode *status) { + UResourceDataEntry *poolBundle = init_entry(kPoolBundleName, path, status); + if( U_SUCCESS(*status) && + (poolBundle == NULL || poolBundle->fBogus != U_ZERO_ERROR || !poolBundle->fData.isPoolBundle) + ) { + *status = U_INVALID_FORMAT_ERROR; + } + return poolBundle; +} + +/* INTERNAL: */ +/* CAUTION: resbMutex must be locked when calling this function! */ +static UResourceDataEntry * +findFirstExisting(const char* path, char* name, + UBool *isRoot, UBool *hasChopped, UBool *isDefault, UErrorCode* status) { + UResourceDataEntry *r = NULL; + UBool hasRealData = FALSE; + const char *defaultLoc = uloc_getDefault(); + *hasChopped = TRUE; /* we're starting with a fresh name */ + + while(*hasChopped && !hasRealData) { + r = init_entry(name, path, status); + /* Null pointer test */ + if (U_FAILURE(*status)) { + return NULL; + } + *isDefault = (UBool)(uprv_strncmp(name, defaultLoc, uprv_strlen(name)) == 0); + hasRealData = (UBool)(r->fBogus == U_ZERO_ERROR); + if(!hasRealData) { + /* this entry is not real. We will discard it. */ + /* However, the parent line for this entry is */ + /* not to be used - as there might be parent */ + /* lines in cache from previous openings that */ + /* are not updated yet. */ + r->fCountExisting--; + /*entryCloseInt(r);*/ + r = NULL; + *status = U_USING_FALLBACK_WARNING; + } else { + uprv_strcpy(name, r->fName); /* this is needed for supporting aliases */ + } + + *isRoot = (UBool)(uprv_strcmp(name, kRootLocaleName) == 0); + + /*Fallback data stuff*/ + *hasChopped = chopLocale(name); + if (*hasChopped && *name == '\0') { + uprv_strcpy(name, "und"); + } + } + return r; +} + +static void ures_setIsStackObject( UResourceBundle* resB, UBool state) { + if(state) { + resB->fMagic1 = 0; + resB->fMagic2 = 0; + } else { + resB->fMagic1 = MAGIC1; + resB->fMagic2 = MAGIC2; + } +} + +static UBool ures_isStackObject(const UResourceBundle* resB) { + return((resB->fMagic1 == MAGIC1 && resB->fMagic2 == MAGIC2)?FALSE:TRUE); +} + + +U_CFUNC void ures_initStackObject(UResourceBundle* resB) { + uprv_memset(resB, 0, sizeof(UResourceBundle)); + ures_setIsStackObject(resB, TRUE); +} + +U_NAMESPACE_BEGIN + +StackUResourceBundle::StackUResourceBundle() { + ures_initStackObject(&bundle); +} + +StackUResourceBundle::~StackUResourceBundle() { + ures_close(&bundle); +} + +U_NAMESPACE_END + +static UBool // returns U_SUCCESS(*status) +loadParentsExceptRoot(UResourceDataEntry *&t1, + char name[], int32_t nameCapacity, + UBool usingUSRData, char usrDataPath[], UErrorCode *status) { + if (U_FAILURE(*status)) { return FALSE; } + UBool hasChopped = TRUE; + while (hasChopped && t1->fParent == NULL && !t1->fData.noFallback && + res_getResource(&t1->fData,"%%ParentIsRoot") == RES_BOGUS) { + Resource parentRes = res_getResource(&t1->fData, "%%Parent"); + if (parentRes != RES_BOGUS) { // An explicit parent was found. + int32_t parentLocaleLen = 0; + // No tracing: called during initial data loading + const UChar *parentLocaleName = res_getStringNoTrace(&(t1->fData), parentRes, &parentLocaleLen); + if(parentLocaleName != NULL && 0 < parentLocaleLen && parentLocaleLen < nameCapacity) { + u_UCharsToChars(parentLocaleName, name, parentLocaleLen + 1); + if (uprv_strcmp(name, kRootLocaleName) == 0) { + return TRUE; + } + } + } + // Insert regular parents. + UErrorCode parentStatus = U_ZERO_ERROR; + UResourceDataEntry *t2 = init_entry(name, t1->fPath, &parentStatus); + if (U_FAILURE(parentStatus)) { + *status = parentStatus; + return FALSE; + } + UResourceDataEntry *u2 = NULL; + UErrorCode usrStatus = U_ZERO_ERROR; + if (usingUSRData) { // This code inserts user override data into the inheritance chain. + u2 = init_entry(name, usrDataPath, &usrStatus); + // If we failed due to out-of-memory, report that to the caller and exit early. + if (usrStatus == U_MEMORY_ALLOCATION_ERROR) { + *status = usrStatus; + return FALSE; + } + } + + if (usingUSRData && U_SUCCESS(usrStatus) && u2->fBogus == U_ZERO_ERROR) { + t1->fParent = u2; + u2->fParent = t2; + } else { + t1->fParent = t2; + if (usingUSRData) { + // The USR override data wasn't found, set it to be deleted. + u2->fCountExisting = 0; + } + } + t1 = t2; + hasChopped = chopLocale(name); + } + return TRUE; +} + +static UBool // returns U_SUCCESS(*status) +insertRootBundle(UResourceDataEntry *&t1, UErrorCode *status) { + if (U_FAILURE(*status)) { return FALSE; } + UErrorCode parentStatus = U_ZERO_ERROR; + UResourceDataEntry *t2 = init_entry(kRootLocaleName, t1->fPath, &parentStatus); + if (U_FAILURE(parentStatus)) { + *status = parentStatus; + return FALSE; + } + t1->fParent = t2; + t1 = t2; + return TRUE; +} + +enum UResOpenType { + /** + * Open a resource bundle for the locale; + * if there is not even a base language bundle, then fall back to the default locale; + * if there is no bundle for that either, then load the root bundle. + * + * This is the default bundle loading behavior. + */ + URES_OPEN_LOCALE_DEFAULT_ROOT, + // TODO: ICU ticket #11271 "consistent default locale across locale trees" + // Add an option to look at the main locale tree for whether to + // fall back to root directly (if the locale has main data) or + // fall back to the default locale first (if the locale does not even have main data). + /** + * Open a resource bundle for the locale; + * if there is not even a base language bundle, then load the root bundle; + * never fall back to the default locale. + * + * This is used for algorithms that have good pan-Unicode default behavior, + * such as case mappings, collation, and segmentation (BreakIterator). + */ + URES_OPEN_LOCALE_ROOT, + /** + * Open a resource bundle for the exact bundle name as requested; + * no fallbacks, do not load parent bundles. + * + * This is used for supplemental (non-locale) data. + */ + URES_OPEN_DIRECT +}; +typedef enum UResOpenType UResOpenType; + +static UResourceDataEntry *entryOpen(const char* path, const char* localeID, + UResOpenType openType, UErrorCode* status) { + U_ASSERT(openType != URES_OPEN_DIRECT); + UErrorCode intStatus = U_ZERO_ERROR; + UResourceDataEntry *r = NULL; + UResourceDataEntry *t1 = NULL; + UBool isDefault = FALSE; + UBool isRoot = FALSE; + UBool hasRealData = FALSE; + UBool hasChopped = TRUE; + UBool usingUSRData = U_USE_USRDATA && ( path == NULL || uprv_strncmp(path,U_ICUDATA_NAME,8) == 0); + + char name[ULOC_FULLNAME_CAPACITY]; + char usrDataPath[96]; + + initCache(status); + + if(U_FAILURE(*status)) { + return NULL; + } + + uprv_strncpy(name, localeID, sizeof(name) - 1); + name[sizeof(name) - 1] = 0; + + if ( usingUSRData ) { + if ( path == NULL ) { + uprv_strcpy(usrDataPath, U_USRDATA_NAME); + } else { + uprv_strncpy(usrDataPath, path, sizeof(usrDataPath) - 1); + usrDataPath[0] = 'u'; + usrDataPath[1] = 's'; + usrDataPath[2] = 'r'; + usrDataPath[sizeof(usrDataPath) - 1] = 0; + } + } + + Mutex lock(&resbMutex); // Lock resbMutex until the end of this function. + + /* We're going to skip all the locales that do not have any data */ + r = findFirstExisting(path, name, &isRoot, &hasChopped, &isDefault, &intStatus); + + // If we failed due to out-of-memory, report the failure and exit early. + if (intStatus == U_MEMORY_ALLOCATION_ERROR) { + *status = intStatus; + goto finish; + } + + if(r != NULL) { /* if there is one real locale, we can look for parents. */ + t1 = r; + hasRealData = TRUE; + if ( usingUSRData ) { /* This code inserts user override data into the inheritance chain */ + UErrorCode usrStatus = U_ZERO_ERROR; + UResourceDataEntry *u1 = init_entry(t1->fName, usrDataPath, &usrStatus); + // If we failed due to out-of-memory, report the failure and exit early. + if (intStatus == U_MEMORY_ALLOCATION_ERROR) { + *status = intStatus; + goto finish; + } + if ( u1 != NULL ) { + if(u1->fBogus == U_ZERO_ERROR) { + u1->fParent = t1; + r = u1; + } else { + /* the USR override data wasn't found, set it to be deleted */ + u1->fCountExisting = 0; + } + } + } + if (hasChopped && !isRoot) { + if (!loadParentsExceptRoot(t1, name, UPRV_LENGTHOF(name), usingUSRData, usrDataPath, status)) { + goto finish; + } + } + } + + /* we could have reached this point without having any real data */ + /* if that is the case, we need to chain in the default locale */ + if(r==NULL && openType == URES_OPEN_LOCALE_DEFAULT_ROOT && !isDefault && !isRoot) { + /* insert default locale */ + uprv_strcpy(name, uloc_getDefault()); + r = findFirstExisting(path, name, &isRoot, &hasChopped, &isDefault, &intStatus); + // If we failed due to out-of-memory, report the failure and exit early. + if (intStatus == U_MEMORY_ALLOCATION_ERROR) { + *status = intStatus; + goto finish; + } + intStatus = U_USING_DEFAULT_WARNING; + if(r != NULL) { /* the default locale exists */ + t1 = r; + hasRealData = TRUE; + isDefault = TRUE; + // TODO: Why not if (usingUSRData) { ... } like in the non-default-locale code path? + if (hasChopped && !isRoot) { + if (!loadParentsExceptRoot(t1, name, UPRV_LENGTHOF(name), usingUSRData, usrDataPath, status)) { + goto finish; + } + } + } + } + + /* we could still have r == NULL at this point - maybe even default locale is not */ + /* present */ + if(r == NULL) { + uprv_strcpy(name, kRootLocaleName); + r = findFirstExisting(path, name, &isRoot, &hasChopped, &isDefault, &intStatus); + // If we failed due to out-of-memory, report the failure and exit early. + if (intStatus == U_MEMORY_ALLOCATION_ERROR) { + *status = intStatus; + goto finish; + } + if(r != NULL) { + t1 = r; + intStatus = U_USING_DEFAULT_WARNING; + hasRealData = TRUE; + } else { /* we don't even have the root locale */ + *status = U_MISSING_RESOURCE_ERROR; + goto finish; + } + } else if(!isRoot && uprv_strcmp(t1->fName, kRootLocaleName) != 0 && + t1->fParent == NULL && !r->fData.noFallback) { + if (!insertRootBundle(t1, status)) { + goto finish; + } + if(!hasRealData) { + r->fBogus = U_USING_DEFAULT_WARNING; + } + } + + // TODO: Does this ever loop? + while(r != NULL && !isRoot && t1->fParent != NULL) { + t1->fParent->fCountExisting++; + t1 = t1->fParent; + } + +finish: + if(U_SUCCESS(*status)) { + if(intStatus != U_ZERO_ERROR) { + *status = intStatus; + } + return r; + } else { + return NULL; + } +} + +/** + * Version of entryOpen() and findFirstExisting() for ures_openDirect(), + * with no fallbacks. + * Parent and root locale bundles are loaded if + * the requested bundle does not have the "nofallback" flag. + */ +static UResourceDataEntry * +entryOpenDirect(const char* path, const char* localeID, UErrorCode* status) { + initCache(status); + if(U_FAILURE(*status)) { + return NULL; + } + + Mutex lock(&resbMutex); + // findFirstExisting() without fallbacks. + UResourceDataEntry *r = init_entry(localeID, path, status); + if(U_SUCCESS(*status)) { + if(r->fBogus != U_ZERO_ERROR) { + r->fCountExisting--; + r = NULL; + } + } else { + r = NULL; + } + + // Some code depends on the ures_openDirect() bundle to have a parent bundle chain, + // unless it is marked with "nofallback". + UResourceDataEntry *t1 = r; + if(r != NULL && uprv_strcmp(localeID, kRootLocaleName) != 0 && // not root + r->fParent == NULL && !r->fData.noFallback && + uprv_strlen(localeID) < ULOC_FULLNAME_CAPACITY) { + char name[ULOC_FULLNAME_CAPACITY]; + uprv_strcpy(name, localeID); + if(!chopLocale(name) || uprv_strcmp(name, kRootLocaleName) == 0 || + loadParentsExceptRoot(t1, name, UPRV_LENGTHOF(name), FALSE, NULL, status)) { + if(uprv_strcmp(t1->fName, kRootLocaleName) != 0 && t1->fParent == NULL) { + insertRootBundle(t1, status); + } + } + if(U_FAILURE(*status)) { + r = NULL; + } + } + + if(r != NULL) { + // TODO: Does this ever loop? + while(t1->fParent != NULL) { + t1->fParent->fCountExisting++; + t1 = t1->fParent; + } + } + return r; +} + +/** + * Functions to create and destroy resource bundles. + * CAUTION: resbMutex must be locked when calling this function. + */ +/* INTERNAL: */ +static void entryCloseInt(UResourceDataEntry *resB) { + UResourceDataEntry *p = resB; + + while(resB != NULL) { + p = resB->fParent; + resB->fCountExisting--; + + /* Entries are left in the cache. TODO: add ures_flushCache() to force a flush + of the cache. */ +/* + if(resB->fCountExisting <= 0) { + uhash_remove(cache, resB); + if(resB->fBogus == U_ZERO_ERROR) { + res_unload(&(resB->fData)); + } + if(resB->fName != NULL) { + uprv_free(resB->fName); + } + if(resB->fPath != NULL) { + uprv_free(resB->fPath); + } + uprv_free(resB); + } +*/ + + resB = p; + } +} + +/** + * API: closes a resource bundle and cleans up. + */ + +static void entryClose(UResourceDataEntry *resB) { + Mutex lock(&resbMutex); + entryCloseInt(resB); +} + +/* +U_CFUNC void ures_setResPath(UResourceBundle *resB, const char* toAdd) { + if(resB->fResPath == NULL) { + resB->fResPath = resB->fResBuf; + *(resB->fResPath) = 0; + } + resB->fResPathLen = uprv_strlen(toAdd); + if(RES_BUFSIZE <= resB->fResPathLen+1) { + if(resB->fResPath == resB->fResBuf) { + resB->fResPath = (char *)uprv_malloc((resB->fResPathLen+1)*sizeof(char)); + } else { + resB->fResPath = (char *)uprv_realloc(resB->fResPath, (resB->fResPathLen+1)*sizeof(char)); + } + } + uprv_strcpy(resB->fResPath, toAdd); +} +*/ +static void ures_appendResPath(UResourceBundle *resB, const char* toAdd, int32_t lenToAdd, UErrorCode *status) { + int32_t resPathLenOrig = resB->fResPathLen; + if(resB->fResPath == NULL) { + resB->fResPath = resB->fResBuf; + *(resB->fResPath) = 0; + resB->fResPathLen = 0; + } + resB->fResPathLen += lenToAdd; + if(RES_BUFSIZE <= resB->fResPathLen+1) { + if(resB->fResPath == resB->fResBuf) { + resB->fResPath = (char *)uprv_malloc((resB->fResPathLen+1)*sizeof(char)); + /* Check that memory was allocated correctly. */ + if (resB->fResPath == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + return; + } + uprv_strcpy(resB->fResPath, resB->fResBuf); + } else { + char *temp = (char *)uprv_realloc(resB->fResPath, (resB->fResPathLen+1)*sizeof(char)); + /* Check that memory was reallocated correctly. */ + if (temp == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + return; + } + resB->fResPath = temp; + } + } + uprv_strcpy(resB->fResPath + resPathLenOrig, toAdd); +} + +static void ures_freeResPath(UResourceBundle *resB) { + if (resB->fResPath && resB->fResPath != resB->fResBuf) { + uprv_free(resB->fResPath); + } + resB->fResPath = NULL; + resB->fResPathLen = 0; +} + +static void +ures_closeBundle(UResourceBundle* resB, UBool freeBundleObj) +{ + if(resB != NULL) { + if(resB->fData != NULL) { + entryClose(resB->fData); + } + if(resB->fVersion != NULL) { + uprv_free(resB->fVersion); + } + ures_freeResPath(resB); + + if(ures_isStackObject(resB) == FALSE && freeBundleObj) { + uprv_free(resB); + } +#if 0 /*U_DEBUG*/ + else { + /* poison the data */ + uprv_memset(resB, -1, sizeof(UResourceBundle)); + } +#endif + } +} + +U_CAPI void U_EXPORT2 +ures_close(UResourceBundle* resB) +{ + ures_closeBundle(resB, TRUE); +} + +static UResourceBundle *init_resb_result(const ResourceData *rdata, Resource r, + const char *key, int32_t idx, UResourceDataEntry *realData, + const UResourceBundle *parent, int32_t noAlias, + UResourceBundle *resB, UErrorCode *status) +{ + if(status == NULL || U_FAILURE(*status)) { + return resB; + } + if (parent == NULL) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + if(RES_GET_TYPE(r) == URES_ALIAS) { /* This is an alias, need to exchange with real data */ + if(noAlias < URES_MAX_ALIAS_LEVEL) { + int32_t len = 0; + const UChar *alias = res_getAlias(rdata, r, &len); + if(len > 0) { + /* we have an alias, now let's cut it up */ + char stackAlias[200]; + char *chAlias = NULL, *path = NULL, *locale = NULL, *keyPath = NULL; + int32_t capacity; + + /* + * Allocate enough space for both the char * version + * of the alias and parent->fResPath. + * + * We do this so that res_findResource() can modify the path, + * which allows us to remove redundant _res_findResource() variants + * in uresdata.c. + * res_findResource() now NUL-terminates each segment so that table keys + * can always be compared with strcmp() instead of strncmp(). + * Saves code there and simplifies testing and code coverage. + * + * markus 2003oct17 + */ + ++len; /* count the terminating NUL */ + if(parent->fResPath != NULL) { + capacity = (int32_t)uprv_strlen(parent->fResPath) + 1; + } else { + capacity = 0; + } + if(capacity < len) { + capacity = len; + } + if(capacity <= (int32_t)sizeof(stackAlias)) { + capacity = (int32_t)sizeof(stackAlias); + chAlias = stackAlias; + } else { + chAlias = (char *)uprv_malloc(capacity); + /* test for NULL */ + if(chAlias == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + } + u_UCharsToChars(alias, chAlias, len); + + if(*chAlias == RES_PATH_SEPARATOR) { + /* there is a path included */ + locale = uprv_strchr(chAlias+1, RES_PATH_SEPARATOR); + if(locale == NULL) { + locale = uprv_strchr(chAlias, 0); /* avoid locale == NULL to make code below work */ + } else { + *locale = 0; + locale++; + } + path = chAlias+1; + if(uprv_strcmp(path, "LOCALE") == 0) { + /* this is an XPath alias, starting with "/LOCALE/" */ + /* it contains the path to a resource which should be looked up */ + /* starting in the requested locale */ + keyPath = locale; + locale = parent->fTopLevelData->fName; /* this is the requested locale's name */ + path = realData->fPath; /* we will be looking in the same package */ + } else { + if(uprv_strcmp(path, "ICUDATA") == 0) { /* want ICU data */ + path = NULL; + } + keyPath = uprv_strchr(locale, RES_PATH_SEPARATOR); + if(keyPath) { + *keyPath = 0; + keyPath++; + } + } + } else { + /* no path, start with a locale */ + locale = chAlias; + keyPath = uprv_strchr(locale, RES_PATH_SEPARATOR); + if(keyPath) { + *keyPath = 0; + keyPath++; + } + path = realData->fPath; + } + + + { + /* got almost everything, let's try to open */ + /* first, open the bundle with real data */ + UResourceBundle *result = resB; + const char* temp = NULL; + UErrorCode intStatus = U_ZERO_ERROR; + UResourceBundle *mainRes = ures_openDirect(path, locale, &intStatus); + if(U_SUCCESS(intStatus)) { + if(keyPath == NULL) { + /* no key path. This means that we are going to + * to use the corresponding resource from + * another bundle + */ + /* first, we are going to get a corresponding parent + * resource to the one we are searching. + */ + char *aKey = parent->fResPath; + if(aKey) { + uprv_strcpy(chAlias, aKey); /* allocated large enough above */ + aKey = chAlias; + r = res_findResource(&(mainRes->fResData), mainRes->fRes, &aKey, &temp); + } else { + r = mainRes->fRes; + } + if(key) { + /* we need to make keyPath from parent's fResPath and + * current key, if there is a key associated + */ + len = (int32_t)(uprv_strlen(key) + 1); + if(len > capacity) { + capacity = len; + if(chAlias == stackAlias) { + chAlias = (char *)uprv_malloc(capacity); + } else { + chAlias = (char *)uprv_realloc(chAlias, capacity); + } + if(chAlias == NULL) { + ures_close(mainRes); + *status = U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + } + uprv_memcpy(chAlias, key, len); + aKey = chAlias; + r = res_findResource(&(mainRes->fResData), r, &aKey, &temp); + } else if(idx != -1) { + /* if there is no key, but there is an index, try to get by the index */ + /* here we have either a table or an array, so get the element */ + int32_t type = RES_GET_TYPE(r); + if(URES_IS_TABLE(type)) { + r = res_getTableItemByIndex(&(mainRes->fResData), r, idx, (const char **)&aKey); + } else { /* array */ + r = res_getArrayItem(&(mainRes->fResData), r, idx); + } + } + if(r != RES_BOGUS) { + result = init_resb_result(&(mainRes->fResData), r, temp, -1, mainRes->fData, mainRes, noAlias+1, resB, status); + } else { + *status = U_MISSING_RESOURCE_ERROR; + result = resB; + } + } else { + /* this one is a bit trickier. + * we start finding keys, but after we resolve one alias, the path might continue. + * Consider: + * aliastest:alias { "testtypes/anotheralias/Sequence" } + * anotheralias:alias { "/ICUDATA/sh/CollationElements" } + * aliastest resource should finally have the sequence, not collation elements. + */ + UResourceDataEntry *dataEntry = mainRes->fData; + char stackPath[URES_MAX_BUFFER_SIZE]; + char *pathBuf = stackPath, *myPath = pathBuf; + if(uprv_strlen(keyPath) >= UPRV_LENGTHOF(stackPath)) { + pathBuf = (char *)uprv_malloc((uprv_strlen(keyPath)+1)*sizeof(char)); + if(pathBuf == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + ures_close(mainRes); + return NULL; + } + } + uprv_strcpy(pathBuf, keyPath); + result = mainRes; + /* now we have fallback following here */ + do { + r = dataEntry->fData.rootRes; + /* this loop handles 'found' resources over several levels */ + while(*myPath && U_SUCCESS(*status)) { + r = res_findResource(&(dataEntry->fData), r, &myPath, &temp); + if(r != RES_BOGUS) { /* found a resource, but it might be an indirection */ + resB = init_resb_result(&(dataEntry->fData), r, temp, -1, dataEntry, result, noAlias+1, resB, status); + result = resB; + if(result) { + r = result->fRes; /* switch to a new resource, possibly a new tree */ + dataEntry = result->fData; + } + } else { /* no resource found, we don't really want to look anymore on this level */ + break; + } + } + dataEntry = dataEntry->fParent; + uprv_strcpy(pathBuf, keyPath); + myPath = pathBuf; + } while(r == RES_BOGUS && dataEntry != NULL); + if(r == RES_BOGUS) { + *status = U_MISSING_RESOURCE_ERROR; + result = resB; + } + if(pathBuf != stackPath) { + uprv_free(pathBuf); + } + } + } else { /* we failed to open the resource we're aliasing to */ + *status = intStatus; + } + if(chAlias != stackAlias) { + uprv_free(chAlias); + } + if(mainRes != result) { + ures_close(mainRes); + } + ResourceTracer(resB).maybeTrace("getalias"); + return result; + } + } else { + /* bad alias, should be an error */ + *status = U_ILLEGAL_ARGUMENT_ERROR; + return resB; + } + } else { + *status = U_TOO_MANY_ALIASES_ERROR; + return resB; + } + } + if(resB == NULL) { + resB = (UResourceBundle *)uprv_malloc(sizeof(UResourceBundle)); + /* test for NULL */ + if (resB == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + ures_setIsStackObject(resB, FALSE); + resB->fResPath = NULL; + resB->fResPathLen = 0; + } else { + if(resB->fData != NULL) { + entryClose(resB->fData); + } + if(resB->fVersion != NULL) { + uprv_free(resB->fVersion); + } + /* + weiv: if stack object was passed in, it doesn't really need to be reinited, + since the purpose of initing is to remove stack junk. However, at this point + we would not do anything to an allocated object, so stack object should be + treated the same + */ + /* + if(ures_isStackObject(resB) != FALSE) { + ures_initStackObject(resB); + } + */ + if(parent != resB) { + ures_freeResPath(resB); + } + } + resB->fData = realData; + entryIncrease(resB->fData); + resB->fHasFallback = FALSE; + resB->fIsTopLevel = FALSE; + resB->fIndex = -1; + resB->fKey = key; + /*resB->fParentRes = parent;*/ + resB->fTopLevelData = parent->fTopLevelData; + if(parent->fResPath && parent != resB) { + ures_appendResPath(resB, parent->fResPath, parent->fResPathLen, status); + } + if(key != NULL) { + ures_appendResPath(resB, key, (int32_t)uprv_strlen(key), status); + if(resB->fResPath[resB->fResPathLen-1] != RES_PATH_SEPARATOR) { + ures_appendResPath(resB, RES_PATH_SEPARATOR_S, 1, status); + } + } else if(idx >= 0) { + char buf[256]; + int32_t len = T_CString_integerToString(buf, idx, 10); + ures_appendResPath(resB, buf, len, status); + if(resB->fResPath[resB->fResPathLen-1] != RES_PATH_SEPARATOR) { + ures_appendResPath(resB, RES_PATH_SEPARATOR_S, 1, status); + } + } + /* Make sure that Purify doesn't complain about uninitialized memory copies. */ + { + int32_t usedLen = ((resB->fResBuf == resB->fResPath) ? resB->fResPathLen : 0); + uprv_memset(resB->fResBuf + usedLen, 0, sizeof(resB->fResBuf) - usedLen); + } + + resB->fVersion = NULL; + resB->fRes = r; + /*resB->fParent = parent->fRes;*/ + uprv_memmove(&resB->fResData, rdata, sizeof(ResourceData)); + resB->fSize = res_countArrayItems(&(resB->fResData), resB->fRes); + ResourceTracer(resB).trace("get"); + return resB; +} + +UResourceBundle *ures_copyResb(UResourceBundle *r, const UResourceBundle *original, UErrorCode *status) { + UBool isStackObject; + if(U_FAILURE(*status) || r == original) { + return r; + } + if(original != NULL) { + if(r == NULL) { + isStackObject = FALSE; + r = (UResourceBundle *)uprv_malloc(sizeof(UResourceBundle)); + /* test for NULL */ + if (r == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + } else { + isStackObject = ures_isStackObject(r); + ures_closeBundle(r, FALSE); + } + uprv_memcpy(r, original, sizeof(UResourceBundle)); + r->fResPath = NULL; + r->fResPathLen = 0; + if(original->fResPath) { + ures_appendResPath(r, original->fResPath, original->fResPathLen, status); + } + ures_setIsStackObject(r, isStackObject); + if(r->fData != NULL) { + entryIncrease(r->fData); + } + } + return r; +} + +/** + * Functions to retrieve data from resource bundles. + */ + +U_CAPI const UChar* U_EXPORT2 ures_getString(const UResourceBundle* resB, int32_t* len, UErrorCode* status) { + const UChar *s; + if (status==NULL || U_FAILURE(*status)) { + return NULL; + } + if(resB == NULL) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + s = res_getString({resB}, &(resB->fResData), resB->fRes, len); + if (s == NULL) { + *status = U_RESOURCE_TYPE_MISMATCH; + } + return s; +} + +static const char * +ures_toUTF8String(const UChar *s16, int32_t length16, + char *dest, int32_t *pLength, + UBool forceCopy, + UErrorCode *status) { + int32_t capacity; + + if (U_FAILURE(*status)) { + return NULL; + } + if (pLength != NULL) { + capacity = *pLength; + } else { + capacity = 0; + } + if (capacity < 0 || (capacity > 0 && dest == NULL)) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + + if (length16 == 0) { + /* empty string, return as read-only pointer */ + if (pLength != NULL) { + *pLength = 0; + } + if (forceCopy) { + u_terminateChars(dest, capacity, 0, status); + return dest; + } else { + return ""; + } + } else { + /* We need to transform the string to the destination buffer. */ + if (capacity < length16) { + /* No chance for the string to fit. Pure preflighting. */ + return u_strToUTF8(NULL, 0, pLength, s16, length16, status); + } + if (!forceCopy && (length16 <= 0x2aaaaaaa)) { + /* + * We know the string will fit into dest because each UChar turns + * into at most three UTF-8 bytes. Fill the latter part of dest + * so that callers do not expect to use dest as a string pointer, + * hopefully leading to more robust code for when resource bundles + * may store UTF-8 natively. + * (In which case dest would not be used at all.) + * + * We do not do this if forceCopy=TRUE because then the caller + * expects the string to start exactly at dest. + * + * The test above for <= 0x2aaaaaaa prevents overflows. + * The +1 is for the NUL terminator. + */ + int32_t maxLength = 3 * length16 + 1; + if (capacity > maxLength) { + dest += capacity - maxLength; + capacity = maxLength; + } + } + return u_strToUTF8(dest, capacity, pLength, s16, length16, status); + } +} + +U_CAPI const char * U_EXPORT2 +ures_getUTF8String(const UResourceBundle *resB, + char *dest, int32_t *pLength, + UBool forceCopy, + UErrorCode *status) { + int32_t length16; + const UChar *s16 = ures_getString(resB, &length16, status); + return ures_toUTF8String(s16, length16, dest, pLength, forceCopy, status); +} + +U_CAPI const uint8_t* U_EXPORT2 ures_getBinary(const UResourceBundle* resB, int32_t* len, + UErrorCode* status) { + const uint8_t *p; + if (status==NULL || U_FAILURE(*status)) { + return NULL; + } + if(resB == NULL) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + p = res_getBinary({resB}, &(resB->fResData), resB->fRes, len); + if (p == NULL) { + *status = U_RESOURCE_TYPE_MISMATCH; + } + return p; +} + +U_CAPI const int32_t* U_EXPORT2 ures_getIntVector(const UResourceBundle* resB, int32_t* len, + UErrorCode* status) { + const int32_t *p; + if (status==NULL || U_FAILURE(*status)) { + return NULL; + } + if(resB == NULL) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + p = res_getIntVector({resB}, &(resB->fResData), resB->fRes, len); + if (p == NULL) { + *status = U_RESOURCE_TYPE_MISMATCH; + } + return p; +} + +/* this function returns a signed integer */ +/* it performs sign extension */ +U_CAPI int32_t U_EXPORT2 ures_getInt(const UResourceBundle* resB, UErrorCode *status) { + if (status==NULL || U_FAILURE(*status)) { + return 0xffffffff; + } + if(resB == NULL) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return 0xffffffff; + } + if(RES_GET_TYPE(resB->fRes) != URES_INT) { + *status = U_RESOURCE_TYPE_MISMATCH; + return 0xffffffff; + } + return res_getInt({resB}, resB->fRes); +} + +U_CAPI uint32_t U_EXPORT2 ures_getUInt(const UResourceBundle* resB, UErrorCode *status) { + if (status==NULL || U_FAILURE(*status)) { + return 0xffffffff; + } + if(resB == NULL) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return 0xffffffff; + } + if(RES_GET_TYPE(resB->fRes) != URES_INT) { + *status = U_RESOURCE_TYPE_MISMATCH; + return 0xffffffff; + } + return res_getUInt({resB}, resB->fRes); +} + +U_CAPI UResType U_EXPORT2 ures_getType(const UResourceBundle *resB) { + if(resB == NULL) { + return URES_NONE; + } + return res_getPublicType(resB->fRes); +} + +U_CAPI const char * U_EXPORT2 ures_getKey(const UResourceBundle *resB) { + // + // TODO: Trace ures_getKey? I guess not usually. + // + // We usually get the key string to decide whether we want the value, or to + // make a key-value pair. Tracing the value should suffice. + // + // However, I believe we have some data (e.g., in res_index) where the key + // strings are the data. Tracing the enclosing table should suffice. + // + if(resB == NULL) { + return NULL; + } + return(resB->fKey); +} + +U_CAPI int32_t U_EXPORT2 ures_getSize(const UResourceBundle *resB) { + if(resB == NULL) { + return 0; + } + + return resB->fSize; +} + +static const UChar* ures_getStringWithAlias(const UResourceBundle *resB, Resource r, int32_t sIndex, int32_t *len, UErrorCode *status) { + if(RES_GET_TYPE(r) == URES_ALIAS) { + const UChar* result = 0; + UResourceBundle *tempRes = ures_getByIndex(resB, sIndex, NULL, status); + result = ures_getString(tempRes, len, status); + ures_close(tempRes); + return result; + } else { + return res_getString({resB, sIndex}, &(resB->fResData), r, len); + } +} + +U_CAPI void U_EXPORT2 ures_resetIterator(UResourceBundle *resB){ + if(resB == NULL) { + return; + } + resB->fIndex = -1; +} + +U_CAPI UBool U_EXPORT2 ures_hasNext(const UResourceBundle *resB) { + if(resB == NULL) { + return FALSE; + } + return (UBool)(resB->fIndex < resB->fSize-1); +} + +U_CAPI const UChar* U_EXPORT2 ures_getNextString(UResourceBundle *resB, int32_t* len, const char ** key, UErrorCode *status) { + Resource r = RES_BOGUS; + + if (status==NULL || U_FAILURE(*status)) { + return NULL; + } + if(resB == NULL) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + + if(resB->fIndex == resB->fSize-1) { + *status = U_INDEX_OUTOFBOUNDS_ERROR; + } else { + resB->fIndex++; + switch(RES_GET_TYPE(resB->fRes)) { + case URES_STRING: + case URES_STRING_V2: + return res_getString({resB}, &(resB->fResData), resB->fRes, len); + case URES_TABLE: + case URES_TABLE16: + case URES_TABLE32: + r = res_getTableItemByIndex(&(resB->fResData), resB->fRes, resB->fIndex, key); + if(r == RES_BOGUS && resB->fHasFallback) { + /* TODO: do the fallback */ + } + return ures_getStringWithAlias(resB, r, resB->fIndex, len, status); + case URES_ARRAY: + case URES_ARRAY16: + r = res_getArrayItem(&(resB->fResData), resB->fRes, resB->fIndex); + if(r == RES_BOGUS && resB->fHasFallback) { + /* TODO: do the fallback */ + } + return ures_getStringWithAlias(resB, r, resB->fIndex, len, status); + case URES_ALIAS: + return ures_getStringWithAlias(resB, resB->fRes, resB->fIndex, len, status); + case URES_INT: + case URES_BINARY: + case URES_INT_VECTOR: + *status = U_RESOURCE_TYPE_MISMATCH; + U_FALLTHROUGH; + default: + return NULL; + } + } + + return NULL; +} + +U_CAPI UResourceBundle* U_EXPORT2 ures_getNextResource(UResourceBundle *resB, UResourceBundle *fillIn, UErrorCode *status) { + const char *key = NULL; + Resource r = RES_BOGUS; + + if (status==NULL || U_FAILURE(*status)) { + /*return NULL;*/ + return fillIn; + } + if(resB == NULL) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + /*return NULL;*/ + return fillIn; + } + + if(resB->fIndex == resB->fSize-1) { + *status = U_INDEX_OUTOFBOUNDS_ERROR; + /*return NULL;*/ + } else { + resB->fIndex++; + switch(RES_GET_TYPE(resB->fRes)) { + case URES_INT: + case URES_BINARY: + case URES_STRING: + case URES_STRING_V2: + case URES_INT_VECTOR: + return ures_copyResb(fillIn, resB, status); + case URES_TABLE: + case URES_TABLE16: + case URES_TABLE32: + r = res_getTableItemByIndex(&(resB->fResData), resB->fRes, resB->fIndex, &key); + if(r == RES_BOGUS && resB->fHasFallback) { + /* TODO: do the fallback */ + } + return init_resb_result(&(resB->fResData), r, key, resB->fIndex, resB->fData, resB, 0, fillIn, status); + case URES_ARRAY: + case URES_ARRAY16: + r = res_getArrayItem(&(resB->fResData), resB->fRes, resB->fIndex); + if(r == RES_BOGUS && resB->fHasFallback) { + /* TODO: do the fallback */ + } + return init_resb_result(&(resB->fResData), r, key, resB->fIndex, resB->fData, resB, 0, fillIn, status); + default: + /*return NULL;*/ + return fillIn; + } + } + /*return NULL;*/ + return fillIn; +} + +U_CAPI UResourceBundle* U_EXPORT2 ures_getByIndex(const UResourceBundle *resB, int32_t indexR, UResourceBundle *fillIn, UErrorCode *status) { + const char* key = NULL; + Resource r = RES_BOGUS; + + if (status==NULL || U_FAILURE(*status)) { + /*return NULL;*/ + return fillIn; + } + if(resB == NULL) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + /*return NULL;*/ + return fillIn; + } + + if(indexR >= 0 && resB->fSize > indexR) { + switch(RES_GET_TYPE(resB->fRes)) { + case URES_INT: + case URES_BINARY: + case URES_STRING: + case URES_STRING_V2: + case URES_INT_VECTOR: + return ures_copyResb(fillIn, resB, status); + case URES_TABLE: + case URES_TABLE16: + case URES_TABLE32: + r = res_getTableItemByIndex(&(resB->fResData), resB->fRes, indexR, &key); + if(r == RES_BOGUS && resB->fHasFallback) { + /* TODO: do the fallback */ + } + return init_resb_result(&(resB->fResData), r, key, indexR, resB->fData, resB, 0, fillIn, status); + case URES_ARRAY: + case URES_ARRAY16: + r = res_getArrayItem(&(resB->fResData), resB->fRes, indexR); + if(r == RES_BOGUS && resB->fHasFallback) { + /* TODO: do the fallback */ + } + return init_resb_result(&(resB->fResData), r, key, indexR, resB->fData, resB, 0, fillIn, status); + default: + /*return NULL;*/ + return fillIn; + } + } else { + *status = U_MISSING_RESOURCE_ERROR; + } + /*return NULL;*/ + return fillIn; +} + +U_CAPI const UChar* U_EXPORT2 ures_getStringByIndex(const UResourceBundle *resB, int32_t indexS, int32_t* len, UErrorCode *status) { + const char* key = NULL; + Resource r = RES_BOGUS; + + if (status==NULL || U_FAILURE(*status)) { + return NULL; + } + if(resB == NULL) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + + if(indexS >= 0 && resB->fSize > indexS) { + switch(RES_GET_TYPE(resB->fRes)) { + case URES_STRING: + case URES_STRING_V2: + return res_getString({resB}, &(resB->fResData), resB->fRes, len); + case URES_TABLE: + case URES_TABLE16: + case URES_TABLE32: + r = res_getTableItemByIndex(&(resB->fResData), resB->fRes, indexS, &key); + if(r == RES_BOGUS && resB->fHasFallback) { + /* TODO: do the fallback */ + } + return ures_getStringWithAlias(resB, r, indexS, len, status); + case URES_ARRAY: + case URES_ARRAY16: + r = res_getArrayItem(&(resB->fResData), resB->fRes, indexS); + if(r == RES_BOGUS && resB->fHasFallback) { + /* TODO: do the fallback */ + } + return ures_getStringWithAlias(resB, r, indexS, len, status); + case URES_ALIAS: + return ures_getStringWithAlias(resB, resB->fRes, indexS, len, status); + case URES_INT: + case URES_BINARY: + case URES_INT_VECTOR: + *status = U_RESOURCE_TYPE_MISMATCH; + break; + default: + /* must not occur */ + *status = U_INTERNAL_PROGRAM_ERROR; + break; + } + } else { + *status = U_MISSING_RESOURCE_ERROR; + } + return NULL; +} + +U_CAPI const char * U_EXPORT2 +ures_getUTF8StringByIndex(const UResourceBundle *resB, + int32_t idx, + char *dest, int32_t *pLength, + UBool forceCopy, + UErrorCode *status) { + int32_t length16; + const UChar *s16 = ures_getStringByIndex(resB, idx, &length16, status); + return ures_toUTF8String(s16, length16, dest, pLength, forceCopy, status); +} + +/*U_CAPI const char *ures_getResPath(UResourceBundle *resB) { + return resB->fResPath; +}*/ + +U_CAPI UResourceBundle* U_EXPORT2 +ures_findResource(const char* path, UResourceBundle *fillIn, UErrorCode *status) +{ + UResourceBundle *first = NULL; + UResourceBundle *result = fillIn; + char *packageName = NULL; + char *pathToResource = NULL, *save = NULL; + char *locale = NULL, *localeEnd = NULL; + int32_t length; + + if(status == NULL || U_FAILURE(*status)) { + return result; + } + + length = (int32_t)(uprv_strlen(path)+1); + save = pathToResource = (char *)uprv_malloc(length*sizeof(char)); + /* test for NULL */ + if(pathToResource == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + return result; + } + uprv_memcpy(pathToResource, path, length); + + locale = pathToResource; + if(*pathToResource == RES_PATH_SEPARATOR) { /* there is a path specification */ + pathToResource++; + packageName = pathToResource; + pathToResource = uprv_strchr(pathToResource, RES_PATH_SEPARATOR); + if(pathToResource == NULL) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + } else { + *pathToResource = 0; + locale = pathToResource+1; + } + } + + localeEnd = uprv_strchr(locale, RES_PATH_SEPARATOR); + if(localeEnd != NULL) { + *localeEnd = 0; + } + + first = ures_open(packageName, locale, status); + + if(U_SUCCESS(*status)) { + if(localeEnd) { + result = ures_findSubResource(first, localeEnd+1, fillIn, status); + } else { + result = ures_copyResb(fillIn, first, status); + } + ures_close(first); + } + uprv_free(save); + return result; +} + +U_CAPI UResourceBundle* U_EXPORT2 +ures_findSubResource(const UResourceBundle *resB, char* path, UResourceBundle *fillIn, UErrorCode *status) +{ + Resource res = RES_BOGUS; + UResourceBundle *result = fillIn; + const char *key; + + if(status == NULL || U_FAILURE(*status)) { + return result; + } + + /* here we do looping and circular alias checking */ + /* this loop is here because aliasing is resolved on this level, not on res level */ + /* so, when we encounter an alias, it is not an aggregate resource, so we return */ + do { + res = res_findResource(&(resB->fResData), resB->fRes, &path, &key); + if(res != RES_BOGUS) { + result = init_resb_result(&(resB->fResData), res, key, -1, resB->fData, resB, 0, fillIn, status); + resB = result; + } else { + *status = U_MISSING_RESOURCE_ERROR; + break; + } + } while(*path); /* there is more stuff in the path */ + + return result; +} +U_CAPI const UChar* U_EXPORT2 +ures_getStringByKeyWithFallback(const UResourceBundle *resB, + const char* inKey, + int32_t* len, + UErrorCode *status) { + + UResourceBundle stack; + const UChar* retVal = NULL; + ures_initStackObject(&stack); + ures_getByKeyWithFallback(resB, inKey, &stack, status); + int32_t length; + retVal = ures_getString(&stack, &length, status); + ures_close(&stack); + if (U_FAILURE(*status)) { + return NULL; + } + if (length == 3 && retVal[0] == EMPTY_SET && retVal[1] == EMPTY_SET && retVal[2] == EMPTY_SET ) { + retVal = NULL; + length = 0; + *status = U_MISSING_RESOURCE_ERROR; + } + if (len != NULL) { + *len = length; + } + return retVal; +} + +/* + Like res_getTableItemByKey but accepts full paths like "NumberElements/latn/patternsShort". +*/ +static Resource getTableItemByKeyPath(const ResourceData *pResData, Resource table, const char *key) { + Resource resource = table; /* The current resource */ + icu::CharString path; + UErrorCode errorCode = U_ZERO_ERROR; + path.append(key, errorCode); + if (U_FAILURE(errorCode)) { return RES_BOGUS; } + char *pathPart = path.data(); /* Path from current resource to desired resource */ + UResType type = (UResType)RES_GET_TYPE(resource); /* the current resource type */ + while (*pathPart && resource != RES_BOGUS && URES_IS_CONTAINER(type)) { + char *nextPathPart = uprv_strchr(pathPart, RES_PATH_SEPARATOR); + if (nextPathPart != NULL) { + *nextPathPart = 0; /* Terminating null for this part of path. */ + nextPathPart++; + } else { + nextPathPart = uprv_strchr(pathPart, 0); + } + int32_t t; + const char *pathP = pathPart; + resource = res_getTableItemByKey(pResData, resource, &t, &pathP); + type = (UResType)RES_GET_TYPE(resource); + pathPart = nextPathPart; + } + if (*pathPart) { + return RES_BOGUS; + } + return resource; +} + +U_CAPI UResourceBundle* U_EXPORT2 +ures_getByKeyWithFallback(const UResourceBundle *resB, + const char* inKey, + UResourceBundle *fillIn, + UErrorCode *status) { + Resource res = RES_BOGUS, rootRes = RES_BOGUS; + /*UResourceDataEntry *realData = NULL;*/ + UResourceBundle *helper = NULL; + + if (status==NULL || U_FAILURE(*status)) { + return fillIn; + } + if(resB == NULL) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return fillIn; + } + + int32_t type = RES_GET_TYPE(resB->fRes); + if(URES_IS_TABLE(type)) { + res = getTableItemByKeyPath(&(resB->fResData), resB->fRes, inKey); + const char* key = inKey; + if(res == RES_BOGUS) { + UResourceDataEntry *dataEntry = resB->fData; + CharString path; + char *myPath = NULL; + const char* resPath = resB->fResPath; + int32_t len = resB->fResPathLen; + while(res == RES_BOGUS && dataEntry->fParent != NULL) { /* Otherwise, we'll look in parents */ + dataEntry = dataEntry->fParent; + rootRes = dataEntry->fData.rootRes; + + if(dataEntry->fBogus == U_ZERO_ERROR) { + path.clear(); + if (len > 0) { + path.append(resPath, len, *status); + } + path.append(inKey, *status); + if (U_FAILURE(*status)) { + ures_close(helper); + return fillIn; + } + myPath = path.data(); + key = inKey; + do { + res = res_findResource(&(dataEntry->fData), rootRes, &myPath, &key); + if (RES_GET_TYPE(res) == URES_ALIAS && *myPath) { + /* We hit an alias, but we didn't finish following the path. */ + helper = init_resb_result(&(dataEntry->fData), res, NULL, -1, dataEntry, resB, 0, helper, status); + /*helper = init_resb_result(&(dataEntry->fData), res, inKey, -1, dataEntry, resB, 0, helper, status);*/ + if(helper) { + dataEntry = helper->fData; + rootRes = helper->fRes; + resPath = helper->fResPath; + len = helper->fResPathLen; + + } else { + break; + } + } + } while(*myPath); /* Continue until the whole path is consumed */ + } + } + /*const ResourceData *rd = getFallbackData(resB, &key, &realData, &res, status);*/ + if(res != RES_BOGUS) { + /* check if resB->fResPath gives the right name here */ + if(uprv_strcmp(dataEntry->fName, uloc_getDefault())==0 || uprv_strcmp(dataEntry->fName, kRootLocaleName)==0) { + *status = U_USING_DEFAULT_WARNING; + } else { + *status = U_USING_FALLBACK_WARNING; + } + + fillIn = init_resb_result(&(dataEntry->fData), res, inKey, -1, dataEntry, resB, 0, fillIn, status); + } else { + *status = U_MISSING_RESOURCE_ERROR; + } + } else { + fillIn = init_resb_result(&(resB->fResData), res, key, -1, resB->fData, resB, 0, fillIn, status); + } + } + else { + *status = U_RESOURCE_TYPE_MISMATCH; + } + ures_close(helper); + return fillIn; +} + +namespace { + +void getAllItemsWithFallback( + const UResourceBundle *bundle, ResourceDataValue &value, + ResourceSink &sink, + UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { return; } + // We recursively enumerate child-first, + // only storing parent items in the absence of child items. + // The sink needs to store a placeholder value for the no-fallback/no-inheritance marker + // to prevent a parent item from being stored. + // + // It would be possible to recursively enumerate parent-first, + // overriding parent items with child items. + // When the sink sees the no-fallback/no-inheritance marker, + // then it would remove the parent's item. + // We would deserialize parent values even though they are overridden in a child bundle. + value.setData(&bundle->fResData); + UResourceDataEntry *parentEntry = bundle->fData->fParent; + UBool hasParent = parentEntry != NULL && U_SUCCESS(parentEntry->fBogus); + value.setResource(bundle->fRes, ResourceTracer(bundle)); + sink.put(bundle->fKey, value, !hasParent, errorCode); + if (hasParent) { + // We might try to query the sink whether + // any fallback from the parent bundle is still possible. + + // Turn the parent UResourceDataEntry into a UResourceBundle, + // much like in ures_openWithType(). + // TODO: See if we can refactor ures_getByKeyWithFallback() + // and pull out an inner function that takes and returns a UResourceDataEntry + // so that we need not create UResourceBundle objects. + UResourceBundle parentBundle; + ures_initStackObject(&parentBundle); + parentBundle.fTopLevelData = parentBundle.fData = parentEntry; + // TODO: What is the difference between bundle fData and fTopLevelData? + uprv_memcpy(&parentBundle.fResData, &parentEntry->fData, sizeof(ResourceData)); + // TODO: Try to replace bundle.fResData with just using bundle.fData->fData. + parentBundle.fHasFallback = !parentBundle.fResData.noFallback; + parentBundle.fIsTopLevel = TRUE; + parentBundle.fRes = parentBundle.fResData.rootRes; + parentBundle.fSize = res_countArrayItems(&(parentBundle.fResData), parentBundle.fRes); + parentBundle.fIndex = -1; + entryIncrease(parentEntry); + + // Look up the container item in the parent bundle. + UResourceBundle containerBundle; + ures_initStackObject(&containerBundle); + const UResourceBundle *rb; + UErrorCode pathErrorCode = U_ZERO_ERROR; // Ignore if parents up to root do not have this path. + if (bundle->fResPath == NULL || *bundle->fResPath == 0) { + rb = &parentBundle; + } else { + rb = ures_getByKeyWithFallback(&parentBundle, bundle->fResPath, + &containerBundle, &pathErrorCode); + } + if (U_SUCCESS(pathErrorCode)) { + getAllItemsWithFallback(rb, value, sink, errorCode); + } + ures_close(&containerBundle); + ures_close(&parentBundle); + } +} + +} // namespace + +// Requires a ResourceDataValue fill-in, so that we need not cast from a ResourceValue. +// Unfortunately, the caller must know which subclass to make and pass in. +// Alternatively, we could make it as polymorphic as in Java by +// returning a ResourceValue pointer (possibly wrapped into a LocalPointer) +// that the caller then owns. +// +// Also requires a UResourceBundle fill-in, so that the value's ResourceTracer +// can point to a non-local bundle. +// Without tracing, the child bundle could be a function-local object. +U_CAPI void U_EXPORT2 +ures_getValueWithFallback(const UResourceBundle *bundle, const char *path, + UResourceBundle *tempFillIn, + ResourceDataValue &value, UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { return; } + if (path == nullptr) { + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + const UResourceBundle *rb; + if (*path == 0) { + // empty path + rb = bundle; + } else { + rb = ures_getByKeyWithFallback(bundle, path, tempFillIn, &errorCode); + if (U_FAILURE(errorCode)) { + return; + } + } + value.setData(&rb->fResData); + value.setResource(rb->fRes, ResourceTracer(rb)); +} + +U_CAPI void U_EXPORT2 +ures_getAllItemsWithFallback(const UResourceBundle *bundle, const char *path, + icu::ResourceSink &sink, UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { return; } + if (path == nullptr) { + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + StackUResourceBundle stackBundle; + const UResourceBundle *rb; + if (*path == 0) { + // empty path + rb = bundle; + } else { + rb = ures_getByKeyWithFallback(bundle, path, stackBundle.getAlias(), &errorCode); + if (U_FAILURE(errorCode)) { + return; + } + } + // Get all table items with fallback. + ResourceDataValue value; + getAllItemsWithFallback(rb, value, sink, errorCode); +} + +U_CAPI UResourceBundle* U_EXPORT2 ures_getByKey(const UResourceBundle *resB, const char* inKey, UResourceBundle *fillIn, UErrorCode *status) { + Resource res = RES_BOGUS; + UResourceDataEntry *realData = NULL; + const char *key = inKey; + + if (status==NULL || U_FAILURE(*status)) { + return fillIn; + } + if(resB == NULL) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return fillIn; + } + + int32_t type = RES_GET_TYPE(resB->fRes); + if(URES_IS_TABLE(type)) { + int32_t t; + res = res_getTableItemByKey(&(resB->fResData), resB->fRes, &t, &key); + if(res == RES_BOGUS) { + key = inKey; + if(resB->fHasFallback == TRUE) { + const ResourceData *rd = getFallbackData(resB, &key, &realData, &res, status); + if(U_SUCCESS(*status)) { + /* check if resB->fResPath gives the right name here */ + return init_resb_result(rd, res, key, -1, realData, resB, 0, fillIn, status); + } else { + *status = U_MISSING_RESOURCE_ERROR; + } + } else { + *status = U_MISSING_RESOURCE_ERROR; + } + } else { + return init_resb_result(&(resB->fResData), res, key, -1, resB->fData, resB, 0, fillIn, status); + } + } +#if 0 + /* this is a kind of TODO item. If we have an array with an index table, we could do this. */ + /* not currently */ + else if(RES_GET_TYPE(resB->fRes) == URES_ARRAY && resB->fHasFallback == TRUE) { + /* here should go a first attempt to locate the key using index table */ + const ResourceData *rd = getFallbackData(resB, &key, &realData, &res, status); + if(U_SUCCESS(*status)) { + return init_resb_result(rd, res, key, realData, resB, fillIn, status); + } else { + *status = U_MISSING_RESOURCE_ERROR; + } + } +#endif + else { + *status = U_RESOURCE_TYPE_MISMATCH; + } + return fillIn; +} + +U_CAPI const UChar* U_EXPORT2 ures_getStringByKey(const UResourceBundle *resB, const char* inKey, int32_t* len, UErrorCode *status) { + Resource res = RES_BOGUS; + UResourceDataEntry *realData = NULL; + const char* key = inKey; + + if (status==NULL || U_FAILURE(*status)) { + return NULL; + } + if(resB == NULL) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + + int32_t type = RES_GET_TYPE(resB->fRes); + if(URES_IS_TABLE(type)) { + int32_t t=0; + + res = res_getTableItemByKey(&(resB->fResData), resB->fRes, &t, &key); + + if(res == RES_BOGUS) { + key = inKey; + if(resB->fHasFallback == TRUE) { + const ResourceData *rd = getFallbackData(resB, &key, &realData, &res, status); + if(U_SUCCESS(*status)) { + switch (RES_GET_TYPE(res)) { + case URES_STRING: + case URES_STRING_V2: + return res_getString({resB, key}, rd, res, len); + case URES_ALIAS: + { + const UChar* result = 0; + UResourceBundle *tempRes = ures_getByKey(resB, inKey, NULL, status); + result = ures_getString(tempRes, len, status); + ures_close(tempRes); + return result; + } + default: + *status = U_RESOURCE_TYPE_MISMATCH; + } + } else { + *status = U_MISSING_RESOURCE_ERROR; + } + } else { + *status = U_MISSING_RESOURCE_ERROR; + } + } else { + switch (RES_GET_TYPE(res)) { + case URES_STRING: + case URES_STRING_V2: + return res_getString({resB, key}, &(resB->fResData), res, len); + case URES_ALIAS: + { + const UChar* result = 0; + UResourceBundle *tempRes = ures_getByKey(resB, inKey, NULL, status); + result = ures_getString(tempRes, len, status); + ures_close(tempRes); + return result; + } + default: + *status = U_RESOURCE_TYPE_MISMATCH; + } + } + } +#if 0 + /* this is a kind of TODO item. If we have an array with an index table, we could do this. */ + /* not currently */ + else if(RES_GET_TYPE(resB->fRes) == URES_ARRAY && resB->fHasFallback == TRUE) { + /* here should go a first attempt to locate the key using index table */ + const ResourceData *rd = getFallbackData(resB, &key, &realData, &res, status); + if(U_SUCCESS(*status)) { + // TODO: Tracing + return res_getString(rd, res, len); + } else { + *status = U_MISSING_RESOURCE_ERROR; + } + } +#endif + else { + *status = U_RESOURCE_TYPE_MISMATCH; + } + return NULL; +} + +U_CAPI const char * U_EXPORT2 +ures_getUTF8StringByKey(const UResourceBundle *resB, + const char *key, + char *dest, int32_t *pLength, + UBool forceCopy, + UErrorCode *status) { + int32_t length16; + const UChar *s16 = ures_getStringByKey(resB, key, &length16, status); + return ures_toUTF8String(s16, length16, dest, pLength, forceCopy, status); +} + +/* TODO: clean from here down */ + +/** + * INTERNAL: Get the name of the first real locale (not placeholder) + * that has resource bundle data. + */ +U_CAPI const char* U_EXPORT2 +ures_getLocaleInternal(const UResourceBundle* resourceBundle, UErrorCode* status) +{ + if (status==NULL || U_FAILURE(*status)) { + return NULL; + } + if (!resourceBundle) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } else { + return resourceBundle->fData->fName; + } +} + +U_CAPI const char* U_EXPORT2 +ures_getLocale(const UResourceBundle* resourceBundle, + UErrorCode* status) +{ + return ures_getLocaleInternal(resourceBundle, status); +} + + +U_CAPI const char* U_EXPORT2 +ures_getLocaleByType(const UResourceBundle* resourceBundle, + ULocDataLocaleType type, + UErrorCode* status) { + if (status==NULL || U_FAILURE(*status)) { + return NULL; + } + if (!resourceBundle) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } else { + switch(type) { + case ULOC_ACTUAL_LOCALE: + return resourceBundle->fData->fName; + case ULOC_VALID_LOCALE: + return resourceBundle->fTopLevelData->fName; + case ULOC_REQUESTED_LOCALE: + default: + *status = U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + } +} + +U_CFUNC const char* ures_getName(const UResourceBundle* resB) { + if(resB == NULL) { + return NULL; + } + + return resB->fData->fName; +} + +#ifdef URES_DEBUG +U_CFUNC const char* ures_getPath(const UResourceBundle* resB) { + if(resB == NULL) { + return NULL; + } + + return resB->fData->fPath; +} +#endif + +static UResourceBundle* +ures_openWithType(UResourceBundle *r, const char* path, const char* localeID, + UResOpenType openType, UErrorCode* status) { + if(U_FAILURE(*status)) { + return NULL; + } + + UResourceDataEntry *entry; + if(openType != URES_OPEN_DIRECT) { + /* first "canonicalize" the locale ID */ + char canonLocaleID[ULOC_FULLNAME_CAPACITY]; + uloc_getBaseName(localeID, canonLocaleID, UPRV_LENGTHOF(canonLocaleID), status); + if(U_FAILURE(*status) || *status == U_STRING_NOT_TERMINATED_WARNING) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + entry = entryOpen(path, canonLocaleID, openType, status); + } else { + entry = entryOpenDirect(path, localeID, status); + } + if(U_FAILURE(*status)) { + return NULL; + } + if(entry == NULL) { + *status = U_MISSING_RESOURCE_ERROR; + return NULL; + } + + UBool isStackObject; + if(r == NULL) { + r = (UResourceBundle *)uprv_malloc(sizeof(UResourceBundle)); + if(r == NULL) { + entryClose(entry); + *status = U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + isStackObject = FALSE; + } else { // fill-in + isStackObject = ures_isStackObject(r); + ures_closeBundle(r, FALSE); + } + uprv_memset(r, 0, sizeof(UResourceBundle)); + ures_setIsStackObject(r, isStackObject); + + r->fTopLevelData = r->fData = entry; + uprv_memcpy(&r->fResData, &entry->fData, sizeof(ResourceData)); + r->fHasFallback = openType != URES_OPEN_DIRECT && !r->fResData.noFallback; + r->fIsTopLevel = TRUE; + r->fRes = r->fResData.rootRes; + r->fSize = res_countArrayItems(&(r->fResData), r->fRes); + r->fIndex = -1; + + ResourceTracer(r).traceOpen(); + + return r; +} + +U_CAPI UResourceBundle* U_EXPORT2 +ures_open(const char* path, const char* localeID, UErrorCode* status) { + return ures_openWithType(NULL, path, localeID, URES_OPEN_LOCALE_DEFAULT_ROOT, status); +} + +U_CAPI UResourceBundle* U_EXPORT2 +ures_openNoDefault(const char* path, const char* localeID, UErrorCode* status) { + return ures_openWithType(NULL, path, localeID, URES_OPEN_LOCALE_ROOT, status); +} + +/** + * Opens a resource bundle without "canonicalizing" the locale name. No fallback will be performed + * or sought. However, alias substitution will happen! + */ +U_CAPI UResourceBundle* U_EXPORT2 +ures_openDirect(const char* path, const char* localeID, UErrorCode* status) { + return ures_openWithType(NULL, path, localeID, URES_OPEN_DIRECT, status); +} + +/** + * Internal API: This function is used to open a resource bundle + * proper fallback chaining is executed while initialization. + * The result is stored in cache for later fallback search. + * + * Same as ures_open(), but uses the fill-in parameter and does not allocate a new bundle. + */ +U_CAPI void U_EXPORT2 +ures_openFillIn(UResourceBundle *r, const char* path, + const char* localeID, UErrorCode* status) { + if(U_SUCCESS(*status) && r == NULL) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + ures_openWithType(r, path, localeID, URES_OPEN_LOCALE_DEFAULT_ROOT, status); +} + +/** + * Same as ures_openDirect(), but uses the fill-in parameter and does not allocate a new bundle. + */ +U_CAPI void U_EXPORT2 +ures_openDirectFillIn(UResourceBundle *r, const char* path, const char* localeID, UErrorCode* status) { + if(U_SUCCESS(*status) && r == NULL) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + ures_openWithType(r, path, localeID, URES_OPEN_DIRECT, status); +} + +/** + * API: Counts members. For arrays and tables, returns number of resources. + * For strings, returns 1. + */ +U_CAPI int32_t U_EXPORT2 +ures_countArrayItems(const UResourceBundle* resourceBundle, + const char* resourceKey, + UErrorCode* status) +{ + UResourceBundle resData; + ures_initStackObject(&resData); + if (status==NULL || U_FAILURE(*status)) { + return 0; + } + if(resourceBundle == NULL) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + ures_getByKey(resourceBundle, resourceKey, &resData, status); + + if(resData.fResData.data != NULL) { + int32_t result = res_countArrayItems(&resData.fResData, resData.fRes); + ures_close(&resData); + return result; + } else { + *status = U_MISSING_RESOURCE_ERROR; + ures_close(&resData); + return 0; + } +} + +/** + * Internal function. + * Return the version number associated with this ResourceBundle as a string. + * + * @param resourceBundle The resource bundle for which the version is checked. + * @return A version number string as specified in the resource bundle or its parent. + * The caller does not own this string. + * @see ures_getVersion + * @internal + */ +U_CAPI const char* U_EXPORT2 +ures_getVersionNumberInternal(const UResourceBundle *resourceBundle) +{ + if (!resourceBundle) return NULL; + + if(resourceBundle->fVersion == NULL) { + + /* If the version ID has not been built yet, then do so. Retrieve */ + /* the minor version from the file. */ + UErrorCode status = U_ZERO_ERROR; + int32_t minor_len = 0; + int32_t len; + + const UChar* minor_version = ures_getStringByKey(resourceBundle, kVersionTag, &minor_len, &status); + + /* Determine the length of of the final version string. This is */ + /* the length of the major part + the length of the separator */ + /* (==1) + the length of the minor part (+ 1 for the zero byte at */ + /* the end). */ + + len = (minor_len > 0) ? minor_len : 1; + + /* Allocate the string, and build it up. */ + /* + 1 for zero byte */ + + + ((UResourceBundle *)resourceBundle)->fVersion = (char *)uprv_malloc(1 + len); + /* Check for null pointer. */ + if (((UResourceBundle *)resourceBundle)->fVersion == NULL) { + return NULL; + } + + if(minor_len > 0) { + u_UCharsToChars(minor_version, resourceBundle->fVersion , minor_len); + resourceBundle->fVersion[len] = '\0'; + } + else { + uprv_strcpy(resourceBundle->fVersion, kDefaultMinorVersion); + } + } + + return resourceBundle->fVersion; +} + +U_CAPI const char* U_EXPORT2 +ures_getVersionNumber(const UResourceBundle* resourceBundle) +{ + return ures_getVersionNumberInternal(resourceBundle); +} + +U_CAPI void U_EXPORT2 ures_getVersion(const UResourceBundle* resB, UVersionInfo versionInfo) { + if (!resB) return; + + u_versionFromString(versionInfo, ures_getVersionNumberInternal(resB)); +} + +/** Tree support functions *******************************/ +#define INDEX_LOCALE_NAME "res_index" +#define INDEX_TAG "InstalledLocales" +#define DEFAULT_TAG "default" + +#if defined(URES_TREE_DEBUG) +#include <stdio.h> +#endif + +typedef struct ULocalesContext { + UResourceBundle installed; + UResourceBundle curr; +} ULocalesContext; + +static void U_CALLCONV +ures_loc_closeLocales(UEnumeration *enumerator) { + ULocalesContext *ctx = (ULocalesContext *)enumerator->context; + ures_close(&ctx->curr); + ures_close(&ctx->installed); + uprv_free(ctx); + uprv_free(enumerator); +} + +static int32_t U_CALLCONV +ures_loc_countLocales(UEnumeration *en, UErrorCode * /*status*/) { + ULocalesContext *ctx = (ULocalesContext *)en->context; + return ures_getSize(&ctx->installed); +} + +U_CDECL_BEGIN + + +static const char * U_CALLCONV +ures_loc_nextLocale(UEnumeration* en, + int32_t* resultLength, + UErrorCode* status) { + ULocalesContext *ctx = (ULocalesContext *)en->context; + UResourceBundle *res = &(ctx->installed); + UResourceBundle *k = NULL; + const char *result = NULL; + int32_t len = 0; + if(ures_hasNext(res) && (k = ures_getNextResource(res, &ctx->curr, status)) != 0) { + result = ures_getKey(k); + len = (int32_t)uprv_strlen(result); + } + if (resultLength) { + *resultLength = len; + } + return result; +} + +static void U_CALLCONV +ures_loc_resetLocales(UEnumeration* en, + UErrorCode* /*status*/) { + UResourceBundle *res = &((ULocalesContext *)en->context)->installed; + ures_resetIterator(res); +} + +U_CDECL_END + +static const UEnumeration gLocalesEnum = { + NULL, + NULL, + ures_loc_closeLocales, + ures_loc_countLocales, + uenum_unextDefault, + ures_loc_nextLocale, + ures_loc_resetLocales +}; + + +U_CAPI UEnumeration* U_EXPORT2 +ures_openAvailableLocales(const char *path, UErrorCode *status) +{ + UResourceBundle *idx = NULL; + UEnumeration *en = NULL; + ULocalesContext *myContext = NULL; + + if(U_FAILURE(*status)) { + return NULL; + } + myContext = static_cast<ULocalesContext *>(uprv_malloc(sizeof(ULocalesContext))); + en = (UEnumeration *)uprv_malloc(sizeof(UEnumeration)); + if(!en || !myContext) { + *status = U_MEMORY_ALLOCATION_ERROR; + uprv_free(en); + uprv_free(myContext); + return NULL; + } + uprv_memcpy(en, &gLocalesEnum, sizeof(UEnumeration)); + + ures_initStackObject(&myContext->installed); + ures_initStackObject(&myContext->curr); + idx = ures_openDirect(path, INDEX_LOCALE_NAME, status); + ures_getByKey(idx, INDEX_TAG, &myContext->installed, status); + if(U_SUCCESS(*status)) { +#if defined(URES_TREE_DEBUG) + fprintf(stderr, "Got %s::%s::[%s] : %s\n", + path, INDEX_LOCALE_NAME, INDEX_TAG, ures_getKey(&myContext->installed)); +#endif + en->context = myContext; + } else { +#if defined(URES_TREE_DEBUG) + fprintf(stderr, "%s open failed - %s\n", path, u_errorName(*status)); +#endif + ures_close(&myContext->installed); + uprv_free(myContext); + uprv_free(en); + en = NULL; + } + + ures_close(idx); + + return en; +} + +static UBool isLocaleInList(UEnumeration *locEnum, const char *locToSearch, UErrorCode *status) { + const char *loc; + while ((loc = uenum_next(locEnum, NULL, status)) != NULL) { + if (uprv_strcmp(loc, locToSearch) == 0) { + return TRUE; + } + } + return FALSE; +} + +U_CAPI int32_t U_EXPORT2 +ures_getFunctionalEquivalent(char *result, int32_t resultCapacity, + const char *path, const char *resName, const char *keyword, const char *locid, + UBool *isAvailable, UBool omitDefault, UErrorCode *status) +{ + char kwVal[1024] = ""; /* value of keyword 'keyword' */ + char defVal[1024] = ""; /* default value for given locale */ + char defLoc[1024] = ""; /* default value for given locale */ + char base[1024] = ""; /* base locale */ + char found[1024] = ""; + char parent[1024] = ""; + char full[1024] = ""; + UResourceBundle bund1, bund2; + UResourceBundle *res = NULL; + UErrorCode subStatus = U_ZERO_ERROR; + int32_t length = 0; + if(U_FAILURE(*status)) return 0; + uloc_getKeywordValue(locid, keyword, kwVal, 1024-1,&subStatus); + if(!uprv_strcmp(kwVal, DEFAULT_TAG)) { + kwVal[0]=0; + } + uloc_getBaseName(locid, base, 1024-1,&subStatus); +#if defined(URES_TREE_DEBUG) + fprintf(stderr, "getFunctionalEquivalent: \"%s\" [%s=%s] in %s - %s\n", + locid, keyword, kwVal, base, u_errorName(subStatus)); +#endif + ures_initStackObject(&bund1); + ures_initStackObject(&bund2); + + + uprv_strcpy(parent, base); + uprv_strcpy(found, base); + + if(isAvailable) { + UEnumeration *locEnum = ures_openAvailableLocales(path, &subStatus); + *isAvailable = TRUE; + if (U_SUCCESS(subStatus)) { + *isAvailable = isLocaleInList(locEnum, parent, &subStatus); + } + uenum_close(locEnum); + } + + if(U_FAILURE(subStatus)) { + *status = subStatus; + return 0; + } + + do { + subStatus = U_ZERO_ERROR; + res = ures_open(path, parent, &subStatus); + if(((subStatus == U_USING_FALLBACK_WARNING) || + (subStatus == U_USING_DEFAULT_WARNING)) && isAvailable) + { + *isAvailable = FALSE; + } + isAvailable = NULL; /* only want to set this the first time around */ + +#if defined(URES_TREE_DEBUG) + fprintf(stderr, "%s;%s -> %s [%s]\n", path?path:"ICUDATA", parent, u_errorName(subStatus), ures_getLocale(res, &subStatus)); +#endif + if(U_FAILURE(subStatus)) { + *status = subStatus; + } else if(subStatus == U_ZERO_ERROR) { + ures_getByKey(res,resName,&bund1, &subStatus); + if(subStatus == U_ZERO_ERROR) { + const UChar *defUstr; + int32_t defLen; + /* look for default item */ +#if defined(URES_TREE_DEBUG) + fprintf(stderr, "%s;%s : loaded default -> %s\n", + path?path:"ICUDATA", parent, u_errorName(subStatus)); +#endif + defUstr = ures_getStringByKey(&bund1, DEFAULT_TAG, &defLen, &subStatus); + if(U_SUCCESS(subStatus) && defLen) { + u_UCharsToChars(defUstr, defVal, u_strlen(defUstr)); +#if defined(URES_TREE_DEBUG) + fprintf(stderr, "%s;%s -> default %s=%s, %s\n", + path?path:"ICUDATA", parent, keyword, defVal, u_errorName(subStatus)); +#endif + uprv_strcpy(defLoc, parent); + if(kwVal[0]==0) { + uprv_strcpy(kwVal, defVal); +#if defined(URES_TREE_DEBUG) + fprintf(stderr, "%s;%s -> kwVal = %s\n", + path?path:"ICUDATA", parent, keyword, kwVal); +#endif + } + } + } + } + + subStatus = U_ZERO_ERROR; + + if (res != NULL) { + uprv_strcpy(found, ures_getLocaleByType(res, ULOC_VALID_LOCALE, &subStatus)); + } + + uloc_getParent(found,parent,sizeof(parent),&subStatus); + ures_close(res); + } while(!defVal[0] && *found && uprv_strcmp(found, "root") != 0 && U_SUCCESS(*status)); + + /* Now, see if we can find the kwVal collator.. start the search over.. */ + uprv_strcpy(parent, base); + uprv_strcpy(found, base); + + do { + subStatus = U_ZERO_ERROR; + res = ures_open(path, parent, &subStatus); + if((subStatus == U_USING_FALLBACK_WARNING) && isAvailable) { + *isAvailable = FALSE; + } + isAvailable = NULL; /* only want to set this the first time around */ + +#if defined(URES_TREE_DEBUG) + fprintf(stderr, "%s;%s -> %s (looking for %s)\n", + path?path:"ICUDATA", parent, u_errorName(subStatus), kwVal); +#endif + if(U_FAILURE(subStatus)) { + *status = subStatus; + } else if(subStatus == U_ZERO_ERROR) { + ures_getByKey(res,resName,&bund1, &subStatus); +#if defined(URES_TREE_DEBUG) +/**/ fprintf(stderr,"@%d [%s] %s\n", __LINE__, resName, u_errorName(subStatus)); +#endif + if(subStatus == U_ZERO_ERROR) { + ures_getByKey(&bund1, kwVal, &bund2, &subStatus); +#if defined(URES_TREE_DEBUG) +/**/ fprintf(stderr,"@%d [%s] %s\n", __LINE__, kwVal, u_errorName(subStatus)); +#endif + if(subStatus == U_ZERO_ERROR) { +#if defined(URES_TREE_DEBUG) + fprintf(stderr, "%s;%s -> full0 %s=%s, %s\n", + path?path:"ICUDATA", parent, keyword, kwVal, u_errorName(subStatus)); +#endif + uprv_strcpy(full, parent); + if(*full == 0) { + uprv_strcpy(full, "root"); + } + /* now, recalculate default kw if need be */ + if(uprv_strlen(defLoc) > uprv_strlen(full)) { + const UChar *defUstr; + int32_t defLen; + /* look for default item */ +#if defined(URES_TREE_DEBUG) + fprintf(stderr, "%s;%s -> recalculating Default0\n", + path?path:"ICUDATA", full); +#endif + defUstr = ures_getStringByKey(&bund1, DEFAULT_TAG, &defLen, &subStatus); + if(U_SUCCESS(subStatus) && defLen) { + u_UCharsToChars(defUstr, defVal, u_strlen(defUstr)); +#if defined(URES_TREE_DEBUG) + fprintf(stderr, "%s;%s -> default0 %s=%s, %s\n", + path?path:"ICUDATA", full, keyword, defVal, u_errorName(subStatus)); +#endif + uprv_strcpy(defLoc, full); + } + } /* end of recalculate default KW */ +#if defined(URES_TREE_DEBUG) + else { + fprintf(stderr, "No trim0, %s <= %s\n", defLoc, full); + } +#endif + } else { +#if defined(URES_TREE_DEBUG) + fprintf(stderr, "err=%s in %s looking for %s\n", + u_errorName(subStatus), parent, kwVal); +#endif + } + } + } + + subStatus = U_ZERO_ERROR; + + uprv_strcpy(found, parent); + uloc_getParent(found,parent,1023,&subStatus); + ures_close(res); + } while(!full[0] && *found && U_SUCCESS(*status)); + + if((full[0]==0) && uprv_strcmp(kwVal, defVal)) { +#if defined(URES_TREE_DEBUG) + fprintf(stderr, "Failed to locate kw %s - try default %s\n", kwVal, defVal); +#endif + uprv_strcpy(kwVal, defVal); + uprv_strcpy(parent, base); + uprv_strcpy(found, base); + + do { /* search for 'default' named item */ + subStatus = U_ZERO_ERROR; + res = ures_open(path, parent, &subStatus); + if((subStatus == U_USING_FALLBACK_WARNING) && isAvailable) { + *isAvailable = FALSE; + } + isAvailable = NULL; /* only want to set this the first time around */ + +#if defined(URES_TREE_DEBUG) + fprintf(stderr, "%s;%s -> %s (looking for default %s)\n", + path?path:"ICUDATA", parent, u_errorName(subStatus), kwVal); +#endif + if(U_FAILURE(subStatus)) { + *status = subStatus; + } else if(subStatus == U_ZERO_ERROR) { + ures_getByKey(res,resName,&bund1, &subStatus); + if(subStatus == U_ZERO_ERROR) { + ures_getByKey(&bund1, kwVal, &bund2, &subStatus); + if(subStatus == U_ZERO_ERROR) { +#if defined(URES_TREE_DEBUG) + fprintf(stderr, "%s;%s -> full1 %s=%s, %s\n", path?path:"ICUDATA", + parent, keyword, kwVal, u_errorName(subStatus)); +#endif + uprv_strcpy(full, parent); + if(*full == 0) { + uprv_strcpy(full, "root"); + } + + /* now, recalculate default kw if need be */ + if(uprv_strlen(defLoc) > uprv_strlen(full)) { + const UChar *defUstr; + int32_t defLen; + /* look for default item */ +#if defined(URES_TREE_DEBUG) + fprintf(stderr, "%s;%s -> recalculating Default1\n", + path?path:"ICUDATA", full); +#endif + defUstr = ures_getStringByKey(&bund1, DEFAULT_TAG, &defLen, &subStatus); + if(U_SUCCESS(subStatus) && defLen) { + u_UCharsToChars(defUstr, defVal, u_strlen(defUstr)); +#if defined(URES_TREE_DEBUG) + fprintf(stderr, "%s;%s -> default %s=%s, %s\n", + path?path:"ICUDATA", full, keyword, defVal, u_errorName(subStatus)); +#endif + uprv_strcpy(defLoc, full); + } + } /* end of recalculate default KW */ +#if defined(URES_TREE_DEBUG) + else { + fprintf(stderr, "No trim1, %s <= %s\n", defLoc, full); + } +#endif + } + } + } + subStatus = U_ZERO_ERROR; + + uprv_strcpy(found, parent); + uloc_getParent(found,parent,1023,&subStatus); + ures_close(res); + } while(!full[0] && *found && U_SUCCESS(*status)); + } + + if(U_SUCCESS(*status)) { + if(!full[0]) { +#if defined(URES_TREE_DEBUG) + fprintf(stderr, "Still could not load keyword %s=%s\n", keyword, kwVal); +#endif + *status = U_MISSING_RESOURCE_ERROR; + } else if(omitDefault) { +#if defined(URES_TREE_DEBUG) + fprintf(stderr,"Trim? full=%s, defLoc=%s, found=%s\n", full, defLoc, found); +#endif + if(uprv_strlen(defLoc) <= uprv_strlen(full)) { + /* found the keyword in a *child* of where the default tag was present. */ + if(!uprv_strcmp(kwVal, defVal)) { /* if the requested kw is default, */ + /* and the default is in or in an ancestor of the current locale */ +#if defined(URES_TREE_DEBUG) + fprintf(stderr, "Removing unneeded var %s=%s\n", keyword, kwVal); +#endif + kwVal[0]=0; + } + } + } + uprv_strcpy(found, full); + if(kwVal[0]) { + uprv_strcat(found, "@"); + uprv_strcat(found, keyword); + uprv_strcat(found, "="); + uprv_strcat(found, kwVal); + } else if(!omitDefault) { + uprv_strcat(found, "@"); + uprv_strcat(found, keyword); + uprv_strcat(found, "="); + uprv_strcat(found, defVal); + } + } + /* we found the default locale - no need to repeat it.*/ + + ures_close(&bund1); + ures_close(&bund2); + + length = (int32_t)uprv_strlen(found); + + if(U_SUCCESS(*status)) { + int32_t copyLength = uprv_min(length, resultCapacity); + if(copyLength>0) { + uprv_strncpy(result, found, copyLength); + } + if(length == 0) { + *status = U_MISSING_RESOURCE_ERROR; + } + } else { + length = 0; + result[0]=0; + } + return u_terminateChars(result, resultCapacity, length, status); +} + +U_CAPI UEnumeration* U_EXPORT2 +ures_getKeywordValues(const char *path, const char *keyword, UErrorCode *status) +{ +#define VALUES_BUF_SIZE 2048 +#define VALUES_LIST_SIZE 512 + + char valuesBuf[VALUES_BUF_SIZE]; + int32_t valuesIndex = 0; + const char *valuesList[VALUES_LIST_SIZE]; + int32_t valuesCount = 0; + + const char *locale; + int32_t locLen; + + UEnumeration *locs = NULL; + + UResourceBundle item; + UResourceBundle subItem; + + ures_initStackObject(&item); + ures_initStackObject(&subItem); + locs = ures_openAvailableLocales(path, status); + + if(U_FAILURE(*status)) { + ures_close(&item); + ures_close(&subItem); + return NULL; + } + + valuesBuf[0]=0; + valuesBuf[1]=0; + + while((locale = uenum_next(locs, &locLen, status)) != 0) { + UResourceBundle *bund = NULL; + UResourceBundle *subPtr = NULL; + UErrorCode subStatus = U_ZERO_ERROR; /* don't fail if a bundle is unopenable */ + bund = ures_openDirect(path, locale, &subStatus); + +#if defined(URES_TREE_DEBUG) + if(!bund || U_FAILURE(subStatus)) { + fprintf(stderr, "%s-%s values: Can't open %s locale - skipping. (%s)\n", + path?path:"<ICUDATA>", keyword, locale, u_errorName(subStatus)); + } +#endif + + ures_getByKey(bund, keyword, &item, &subStatus); + + if(!bund || U_FAILURE(subStatus)) { +#if defined(URES_TREE_DEBUG) + fprintf(stderr, "%s-%s values: Can't find in %s - skipping. (%s)\n", + path?path:"<ICUDATA>", keyword, locale, u_errorName(subStatus)); +#endif + ures_close(bund); + bund = NULL; + continue; + } + + while((subPtr = ures_getNextResource(&item,&subItem,&subStatus)) != 0 + && U_SUCCESS(subStatus)) { + const char *k; + int32_t i; + k = ures_getKey(subPtr); + +#if defined(URES_TREE_DEBUG) + /* fprintf(stderr, "%s | %s | %s | %s\n", path?path:"<ICUDATA>", keyword, locale, k); */ +#endif + if(k == NULL || *k == 0 || + uprv_strcmp(k, DEFAULT_TAG) == 0 || uprv_strncmp(k, "private-", 8) == 0) { + // empty or "default" or unlisted type + continue; + } + for(i=0; i<valuesCount; i++) { + if(!uprv_strcmp(valuesList[i],k)) { + k = NULL; /* found duplicate */ + break; + } + } + if(k != NULL) { + int32_t kLen = (int32_t)uprv_strlen(k); + if((valuesCount >= (VALUES_LIST_SIZE-1)) || /* no more space in list .. */ + ((valuesIndex+kLen+1+1) >= VALUES_BUF_SIZE)) { /* no more space in buffer (string + 2 nulls) */ + *status = U_ILLEGAL_ARGUMENT_ERROR; /* out of space.. */ + } else { + uprv_strcpy(valuesBuf+valuesIndex, k); + valuesList[valuesCount++] = valuesBuf+valuesIndex; + valuesIndex += kLen; +#if defined(URES_TREE_DEBUG) + fprintf(stderr, "%s | %s | %s | [%s] (UNIQUE)\n", + path?path:"<ICUDATA>", keyword, locale, k); +#endif + valuesBuf[valuesIndex++] = 0; /* terminate */ + } + } + } + ures_close(bund); + } + valuesBuf[valuesIndex++] = 0; /* terminate */ + + ures_close(&item); + ures_close(&subItem); + uenum_close(locs); +#if defined(URES_TREE_DEBUG) + fprintf(stderr, "%s: size %d, #%d\n", u_errorName(*status), + valuesIndex, valuesCount); +#endif + return uloc_openKeywordList(valuesBuf, valuesIndex, status); +} +#if 0 +/* This code isn't needed, and given the documentation warnings the implementation is suspect */ +U_CAPI UBool U_EXPORT2 +ures_equal(const UResourceBundle* res1, const UResourceBundle* res2){ + if(res1==NULL || res2==NULL){ + return res1==res2; /* pointer comparision */ + } + if(res1->fKey==NULL|| res2->fKey==NULL){ + return (res1->fKey==res2->fKey); + }else{ + if(uprv_strcmp(res1->fKey, res2->fKey)!=0){ + return FALSE; + } + } + if(uprv_strcmp(res1->fData->fName, res2->fData->fName)!=0){ + return FALSE; + } + if(res1->fData->fPath == NULL|| res2->fData->fPath==NULL){ + return (res1->fData->fPath == res2->fData->fPath); + }else{ + if(uprv_strcmp(res1->fData->fPath, res2->fData->fPath)!=0){ + return FALSE; + } + } + if(uprv_strcmp(res1->fData->fParent->fName, res2->fData->fParent->fName)!=0){ + return FALSE; + } + if(uprv_strcmp(res1->fData->fParent->fPath, res2->fData->fParent->fPath)!=0){ + return FALSE; + } + if(uprv_strncmp(res1->fResPath, res2->fResPath, res1->fResPathLen)!=0){ + return FALSE; + } + if(res1->fRes != res2->fRes){ + return FALSE; + } + return TRUE; +} +U_CAPI UResourceBundle* U_EXPORT2 +ures_clone(const UResourceBundle* res, UErrorCode* status){ + UResourceBundle* bundle = NULL; + UResourceBundle* ret = NULL; + if(U_FAILURE(*status) || res == NULL){ + return NULL; + } + bundle = ures_open(res->fData->fPath, res->fData->fName, status); + if(res->fResPath!=NULL){ + ret = ures_findSubResource(bundle, res->fResPath, NULL, status); + ures_close(bundle); + }else{ + ret = bundle; + } + return ret; +} +U_CAPI const UResourceBundle* U_EXPORT2 +ures_getParentBundle(const UResourceBundle* res){ + if(res==NULL){ + return NULL; + } + return res->fParentRes; +} +#endif + +U_CAPI void U_EXPORT2 +ures_getVersionByKey(const UResourceBundle* res, const char *key, UVersionInfo ver, UErrorCode *status) { + const UChar *str; + int32_t len; + str = ures_getStringByKey(res, key, &len, status); + if(U_SUCCESS(*status)) { + u_versionFromUString(ver, str); + } +} + +/* eof */ diff --git a/thirdparty/icu4c/common/uresdata.cpp b/thirdparty/icu4c/common/uresdata.cpp new file mode 100644 index 0000000000..ae731e4544 --- /dev/null +++ b/thirdparty/icu4c/common/uresdata.cpp @@ -0,0 +1,1518 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 1999-2016, International Business Machines Corporation +* and others. All Rights Reserved. +******************************************************************************* +* file name: uresdata.cpp +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 1999dec08 +* created by: Markus W. Scherer +* Modification History: +* +* Date Name Description +* 06/20/2000 helena OS/400 port changes; mostly typecast. +* 06/24/02 weiv Added support for resource sharing +*/ + +#include "unicode/utypes.h" +#include "unicode/udata.h" +#include "unicode/ustring.h" +#include "unicode/utf16.h" +#include "cmemory.h" +#include "cstring.h" +#include "resource.h" +#include "uarrsort.h" +#include "uassert.h" +#include "ucol_swp.h" +#include "udataswp.h" +#include "uinvchar.h" +#include "uresdata.h" +#include "uresimp.h" +#include "utracimp.h" + +/* + * Resource access helpers + */ + +/* get a const char* pointer to the key with the keyOffset byte offset from pRoot */ +#define RES_GET_KEY16(pResData, keyOffset) \ + ((keyOffset)<(pResData)->localKeyLimit ? \ + (const char *)(pResData)->pRoot+(keyOffset) : \ + (pResData)->poolBundleKeys+(keyOffset)-(pResData)->localKeyLimit) + +#define RES_GET_KEY32(pResData, keyOffset) \ + ((keyOffset)>=0 ? \ + (const char *)(pResData)->pRoot+(keyOffset) : \ + (pResData)->poolBundleKeys+((keyOffset)&0x7fffffff)) + +#define URESDATA_ITEM_NOT_FOUND -1 + +/* empty resources, returned when the resource offset is 0 */ +static const uint16_t gEmpty16=0; + +static const struct { + int32_t length; + int32_t res; +} gEmpty32={ 0, 0 }; + +static const struct { + int32_t length; + UChar nul; + UChar pad; +} gEmptyString={ 0, 0, 0 }; + +/* + * All the type-access functions assume that + * the resource is of the expected type. + */ + +static int32_t +_res_findTableItem(const ResourceData *pResData, const uint16_t *keyOffsets, int32_t length, + const char *key, const char **realKey) { + const char *tableKey; + int32_t mid, start, limit; + int result; + + /* do a binary search for the key */ + start=0; + limit=length; + while(start<limit) { + mid = (start + limit) / 2; + tableKey = RES_GET_KEY16(pResData, keyOffsets[mid]); + if (pResData->useNativeStrcmp) { + result = uprv_strcmp(key, tableKey); + } else { + result = uprv_compareInvCharsAsAscii(key, tableKey); + } + if (result < 0) { + limit = mid; + } else if (result > 0) { + start = mid + 1; + } else { + /* We found it! */ + *realKey=tableKey; + return mid; + } + } + return URESDATA_ITEM_NOT_FOUND; /* not found or table is empty. */ +} + +static int32_t +_res_findTable32Item(const ResourceData *pResData, const int32_t *keyOffsets, int32_t length, + const char *key, const char **realKey) { + const char *tableKey; + int32_t mid, start, limit; + int result; + + /* do a binary search for the key */ + start=0; + limit=length; + while(start<limit) { + mid = (start + limit) / 2; + tableKey = RES_GET_KEY32(pResData, keyOffsets[mid]); + if (pResData->useNativeStrcmp) { + result = uprv_strcmp(key, tableKey); + } else { + result = uprv_compareInvCharsAsAscii(key, tableKey); + } + if (result < 0) { + limit = mid; + } else if (result > 0) { + start = mid + 1; + } else { + /* We found it! */ + *realKey=tableKey; + return mid; + } + } + return URESDATA_ITEM_NOT_FOUND; /* not found or table is empty. */ +} + +/* helper for res_load() ---------------------------------------------------- */ + +static UBool U_CALLCONV +isAcceptable(void *context, + const char * /*type*/, const char * /*name*/, + const UDataInfo *pInfo) { + uprv_memcpy(context, pInfo->formatVersion, 4); + return (UBool)( + pInfo->size>=20 && + pInfo->isBigEndian==U_IS_BIG_ENDIAN && + pInfo->charsetFamily==U_CHARSET_FAMILY && + pInfo->sizeofUChar==U_SIZEOF_UCHAR && + pInfo->dataFormat[0]==0x52 && /* dataFormat="ResB" */ + pInfo->dataFormat[1]==0x65 && + pInfo->dataFormat[2]==0x73 && + pInfo->dataFormat[3]==0x42 && + (1<=pInfo->formatVersion[0] && pInfo->formatVersion[0]<=3)); +} + +/* semi-public functions ---------------------------------------------------- */ + +static void +res_init(ResourceData *pResData, + UVersionInfo formatVersion, const void *inBytes, int32_t length, + UErrorCode *errorCode) { + UResType rootType; + + /* get the root resource */ + pResData->pRoot=(const int32_t *)inBytes; + pResData->rootRes=(Resource)*pResData->pRoot; + pResData->p16BitUnits=&gEmpty16; + + /* formatVersion 1.1 must have a root item and at least 5 indexes */ + if(length>=0 && (length/4)<((formatVersion[0]==1 && formatVersion[1]==0) ? 1 : 1+5)) { + *errorCode=U_INVALID_FORMAT_ERROR; + res_unload(pResData); + return; + } + + /* currently, we accept only resources that have a Table as their roots */ + rootType=(UResType)RES_GET_TYPE(pResData->rootRes); + if(!URES_IS_TABLE(rootType)) { + *errorCode=U_INVALID_FORMAT_ERROR; + res_unload(pResData); + return; + } + + if(formatVersion[0]==1 && formatVersion[1]==0) { + pResData->localKeyLimit=0x10000; /* greater than any 16-bit key string offset */ + } else { + /* bundles with formatVersion 1.1 and later contain an indexes[] array */ + const int32_t *indexes=pResData->pRoot+1; + int32_t indexLength=indexes[URES_INDEX_LENGTH]&0xff; + if(indexLength<=URES_INDEX_MAX_TABLE_LENGTH) { + *errorCode=U_INVALID_FORMAT_ERROR; + res_unload(pResData); + return; + } + if( length>=0 && + (length<((1+indexLength)<<2) || + length<(indexes[URES_INDEX_BUNDLE_TOP]<<2)) + ) { + *errorCode=U_INVALID_FORMAT_ERROR; + res_unload(pResData); + return; + } + if(indexes[URES_INDEX_KEYS_TOP]>(1+indexLength)) { + pResData->localKeyLimit=indexes[URES_INDEX_KEYS_TOP]<<2; + } + if(formatVersion[0]>=3) { + // In formatVersion 1, the indexLength took up this whole int. + // In version 2, bits 31..8 were reserved and always 0. + // In version 3, they contain bits 23..0 of the poolStringIndexLimit. + // Bits 27..24 are in indexes[URES_INDEX_ATTRIBUTES] bits 15..12. + pResData->poolStringIndexLimit=(int32_t)((uint32_t)indexes[URES_INDEX_LENGTH]>>8); + } + if(indexLength>URES_INDEX_ATTRIBUTES) { + int32_t att=indexes[URES_INDEX_ATTRIBUTES]; + pResData->noFallback=(UBool)(att&URES_ATT_NO_FALLBACK); + pResData->isPoolBundle=(UBool)((att&URES_ATT_IS_POOL_BUNDLE)!=0); + pResData->usesPoolBundle=(UBool)((att&URES_ATT_USES_POOL_BUNDLE)!=0); + pResData->poolStringIndexLimit|=(att&0xf000)<<12; // bits 15..12 -> 27..24 + pResData->poolStringIndex16Limit=(int32_t)((uint32_t)att>>16); + } + if((pResData->isPoolBundle || pResData->usesPoolBundle) && indexLength<=URES_INDEX_POOL_CHECKSUM) { + *errorCode=U_INVALID_FORMAT_ERROR; + res_unload(pResData); + return; + } + if( indexLength>URES_INDEX_16BIT_TOP && + indexes[URES_INDEX_16BIT_TOP]>indexes[URES_INDEX_KEYS_TOP] + ) { + pResData->p16BitUnits=(const uint16_t *)(pResData->pRoot+indexes[URES_INDEX_KEYS_TOP]); + } + } + + if(formatVersion[0]==1 || U_CHARSET_FAMILY==U_ASCII_FAMILY) { + /* + * formatVersion 1: compare key strings in native-charset order + * formatVersion 2 and up: compare key strings in ASCII order + */ + pResData->useNativeStrcmp=TRUE; + } +} + +U_CAPI void U_EXPORT2 +res_read(ResourceData *pResData, + const UDataInfo *pInfo, const void *inBytes, int32_t length, + UErrorCode *errorCode) { + UVersionInfo formatVersion; + + uprv_memset(pResData, 0, sizeof(ResourceData)); + if(U_FAILURE(*errorCode)) { + return; + } + if(!isAcceptable(formatVersion, NULL, NULL, pInfo)) { + *errorCode=U_INVALID_FORMAT_ERROR; + return; + } + res_init(pResData, formatVersion, inBytes, length, errorCode); +} + +U_CFUNC void +res_load(ResourceData *pResData, + const char *path, const char *name, UErrorCode *errorCode) { + UVersionInfo formatVersion; + + uprv_memset(pResData, 0, sizeof(ResourceData)); + + /* load the ResourceBundle file */ + pResData->data=udata_openChoice(path, "res", name, isAcceptable, formatVersion, errorCode); + if(U_FAILURE(*errorCode)) { + return; + } + + /* get its memory and initialize *pResData */ + res_init(pResData, formatVersion, udata_getMemory(pResData->data), -1, errorCode); +} + +U_CFUNC void +res_unload(ResourceData *pResData) { + if(pResData->data!=NULL) { + udata_close(pResData->data); + pResData->data=NULL; + } +} + +static const int8_t gPublicTypes[URES_LIMIT] = { + URES_STRING, + URES_BINARY, + URES_TABLE, + URES_ALIAS, + + URES_TABLE, /* URES_TABLE32 */ + URES_TABLE, /* URES_TABLE16 */ + URES_STRING, /* URES_STRING_V2 */ + URES_INT, + + URES_ARRAY, + URES_ARRAY, /* URES_ARRAY16 */ + URES_NONE, + URES_NONE, + + URES_NONE, + URES_NONE, + URES_INT_VECTOR, + URES_NONE +}; + +U_CAPI UResType U_EXPORT2 +res_getPublicType(Resource res) { + return (UResType)gPublicTypes[RES_GET_TYPE(res)]; +} + +U_CAPI const UChar * U_EXPORT2 +res_getStringNoTrace(const ResourceData *pResData, Resource res, int32_t *pLength) { + const UChar *p; + uint32_t offset=RES_GET_OFFSET(res); + int32_t length; + if(RES_GET_TYPE(res)==URES_STRING_V2) { + int32_t first; + if((int32_t)offset<pResData->poolStringIndexLimit) { + p=(const UChar *)pResData->poolBundleStrings+offset; + } else { + p=(const UChar *)pResData->p16BitUnits+(offset-pResData->poolStringIndexLimit); + } + first=*p; + if(!U16_IS_TRAIL(first)) { + length=u_strlen(p); + } else if(first<0xdfef) { + length=first&0x3ff; + ++p; + } else if(first<0xdfff) { + length=((first-0xdfef)<<16)|p[1]; + p+=2; + } else { + length=((int32_t)p[1]<<16)|p[2]; + p+=3; + } + } else if(res==offset) /* RES_GET_TYPE(res)==URES_STRING */ { + const int32_t *p32= res==0 ? &gEmptyString.length : pResData->pRoot+res; + length=*p32++; + p=(const UChar *)p32; + } else { + p=NULL; + length=0; + } + if(pLength) { + *pLength=length; + } + return p; +} + +namespace { + +/** + * CLDR string value (three empty-set symbols)=={2205, 2205, 2205} + * prevents fallback to the parent bundle. + * TODO: combine with other code that handles this marker, use EMPTY_SET constant. + * TODO: maybe move to uresbund.cpp? + */ +UBool isNoInheritanceMarker(const ResourceData *pResData, Resource res) { + uint32_t offset=RES_GET_OFFSET(res); + if (offset == 0) { + // empty string + } else if (res == offset) { + const int32_t *p32=pResData->pRoot+res; + int32_t length=*p32; + const UChar *p=(const UChar *)p32; + return length == 3 && p[2] == 0x2205 && p[3] == 0x2205 && p[4] == 0x2205; + } else if (RES_GET_TYPE(res) == URES_STRING_V2) { + const UChar *p; + if((int32_t)offset<pResData->poolStringIndexLimit) { + p=(const UChar *)pResData->poolBundleStrings+offset; + } else { + p=(const UChar *)pResData->p16BitUnits+(offset-pResData->poolStringIndexLimit); + } + int32_t first=*p; + if (first == 0x2205) { // implicit length + return p[1] == 0x2205 && p[2] == 0x2205 && p[3] == 0; + } else if (first == 0xdc03) { // explicit length 3 (should not occur) + return p[1] == 0x2205 && p[2] == 0x2205 && p[3] == 0x2205; + } else { + // Assume that the string has not been stored with more length units than necessary. + return FALSE; + } + } + return FALSE; +} + +int32_t getStringArray(const ResourceData *pResData, const icu::ResourceArray &array, + icu::UnicodeString *dest, int32_t capacity, + UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { + return 0; + } + if(dest == NULL ? capacity != 0 : capacity < 0) { + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + int32_t length = array.getSize(); + if(length == 0) { + return 0; + } + if(length > capacity) { + errorCode = U_BUFFER_OVERFLOW_ERROR; + return length; + } + for(int32_t i = 0; i < length; ++i) { + int32_t sLength; + // No tracing: handled by the caller + const UChar *s = res_getStringNoTrace(pResData, array.internalGetResource(pResData, i), &sLength); + if(s == NULL) { + errorCode = U_RESOURCE_TYPE_MISMATCH; + return 0; + } + dest[i].setTo(TRUE, s, sLength); + } + return length; +} + +} // namespace + +U_CAPI const UChar * U_EXPORT2 +res_getAlias(const ResourceData *pResData, Resource res, int32_t *pLength) { + const UChar *p; + uint32_t offset=RES_GET_OFFSET(res); + int32_t length; + if(RES_GET_TYPE(res)==URES_ALIAS) { + const int32_t *p32= offset==0 ? &gEmptyString.length : pResData->pRoot+offset; + length=*p32++; + p=(const UChar *)p32; + } else { + p=NULL; + length=0; + } + if(pLength) { + *pLength=length; + } + return p; +} + +U_CAPI const uint8_t * U_EXPORT2 +res_getBinaryNoTrace(const ResourceData *pResData, Resource res, int32_t *pLength) { + const uint8_t *p; + uint32_t offset=RES_GET_OFFSET(res); + int32_t length; + if(RES_GET_TYPE(res)==URES_BINARY) { + const int32_t *p32= offset==0 ? (const int32_t*)&gEmpty32 : pResData->pRoot+offset; + length=*p32++; + p=(const uint8_t *)p32; + } else { + p=NULL; + length=0; + } + if(pLength) { + *pLength=length; + } + return p; +} + + +U_CAPI const int32_t * U_EXPORT2 +res_getIntVectorNoTrace(const ResourceData *pResData, Resource res, int32_t *pLength) { + const int32_t *p; + uint32_t offset=RES_GET_OFFSET(res); + int32_t length; + if(RES_GET_TYPE(res)==URES_INT_VECTOR) { + p= offset==0 ? (const int32_t *)&gEmpty32 : pResData->pRoot+offset; + length=*p++; + } else { + p=NULL; + length=0; + } + if(pLength) { + *pLength=length; + } + return p; +} + +U_CAPI int32_t U_EXPORT2 +res_countArrayItems(const ResourceData *pResData, Resource res) { + uint32_t offset=RES_GET_OFFSET(res); + switch(RES_GET_TYPE(res)) { + case URES_STRING: + case URES_STRING_V2: + case URES_BINARY: + case URES_ALIAS: + case URES_INT: + case URES_INT_VECTOR: + return 1; + case URES_ARRAY: + case URES_TABLE32: + return offset==0 ? 0 : *(pResData->pRoot+offset); + case URES_TABLE: + return offset==0 ? 0 : *((const uint16_t *)(pResData->pRoot+offset)); + case URES_ARRAY16: + case URES_TABLE16: + return pResData->p16BitUnits[offset]; + default: + return 0; + } +} + +U_NAMESPACE_BEGIN + +ResourceDataValue::~ResourceDataValue() {} + +UResType ResourceDataValue::getType() const { + return res_getPublicType(res); +} + +const UChar *ResourceDataValue::getString(int32_t &length, UErrorCode &errorCode) const { + if(U_FAILURE(errorCode)) { + return NULL; + } + const UChar *s = res_getString(fTraceInfo, &getData(), res, &length); + if(s == NULL) { + errorCode = U_RESOURCE_TYPE_MISMATCH; + } + return s; +} + +const UChar *ResourceDataValue::getAliasString(int32_t &length, UErrorCode &errorCode) const { + if(U_FAILURE(errorCode)) { + return NULL; + } + const UChar *s = res_getAlias(&getData(), res, &length); + if(s == NULL) { + errorCode = U_RESOURCE_TYPE_MISMATCH; + } + return s; +} + +int32_t ResourceDataValue::getInt(UErrorCode &errorCode) const { + if(U_FAILURE(errorCode)) { + return 0; + } + if(RES_GET_TYPE(res) != URES_INT) { + errorCode = U_RESOURCE_TYPE_MISMATCH; + } + return res_getInt(fTraceInfo, res); +} + +uint32_t ResourceDataValue::getUInt(UErrorCode &errorCode) const { + if(U_FAILURE(errorCode)) { + return 0; + } + if(RES_GET_TYPE(res) != URES_INT) { + errorCode = U_RESOURCE_TYPE_MISMATCH; + } + return res_getUInt(fTraceInfo, res); +} + +const int32_t *ResourceDataValue::getIntVector(int32_t &length, UErrorCode &errorCode) const { + if(U_FAILURE(errorCode)) { + return NULL; + } + const int32_t *iv = res_getIntVector(fTraceInfo, &getData(), res, &length); + if(iv == NULL) { + errorCode = U_RESOURCE_TYPE_MISMATCH; + } + return iv; +} + +const uint8_t *ResourceDataValue::getBinary(int32_t &length, UErrorCode &errorCode) const { + if(U_FAILURE(errorCode)) { + return NULL; + } + const uint8_t *b = res_getBinary(fTraceInfo, &getData(), res, &length); + if(b == NULL) { + errorCode = U_RESOURCE_TYPE_MISMATCH; + } + return b; +} + +ResourceArray ResourceDataValue::getArray(UErrorCode &errorCode) const { + if(U_FAILURE(errorCode)) { + return ResourceArray(); + } + const uint16_t *items16 = NULL; + const Resource *items32 = NULL; + uint32_t offset=RES_GET_OFFSET(res); + int32_t length = 0; + switch(RES_GET_TYPE(res)) { + case URES_ARRAY: + if (offset!=0) { // empty if offset==0 + items32 = (const Resource *)getData().pRoot+offset; + length = *items32++; + } + break; + case URES_ARRAY16: + items16 = getData().p16BitUnits+offset; + length = *items16++; + break; + default: + errorCode = U_RESOURCE_TYPE_MISMATCH; + return ResourceArray(); + } + return ResourceArray(items16, items32, length, fTraceInfo); +} + +ResourceTable ResourceDataValue::getTable(UErrorCode &errorCode) const { + if(U_FAILURE(errorCode)) { + return ResourceTable(); + } + const uint16_t *keys16 = NULL; + const int32_t *keys32 = NULL; + const uint16_t *items16 = NULL; + const Resource *items32 = NULL; + uint32_t offset = RES_GET_OFFSET(res); + int32_t length = 0; + switch(RES_GET_TYPE(res)) { + case URES_TABLE: + if (offset != 0) { // empty if offset==0 + keys16 = (const uint16_t *)(getData().pRoot+offset); + length = *keys16++; + items32 = (const Resource *)(keys16+length+(~length&1)); + } + break; + case URES_TABLE16: + keys16 = getData().p16BitUnits+offset; + length = *keys16++; + items16 = keys16 + length; + break; + case URES_TABLE32: + if (offset != 0) { // empty if offset==0 + keys32 = getData().pRoot+offset; + length = *keys32++; + items32 = (const Resource *)keys32 + length; + } + break; + default: + errorCode = U_RESOURCE_TYPE_MISMATCH; + return ResourceTable(); + } + return ResourceTable(keys16, keys32, items16, items32, length, fTraceInfo); +} + +UBool ResourceDataValue::isNoInheritanceMarker() const { + return ::isNoInheritanceMarker(&getData(), res); +} + +int32_t ResourceDataValue::getStringArray(UnicodeString *dest, int32_t capacity, + UErrorCode &errorCode) const { + return ::getStringArray(&getData(), getArray(errorCode), dest, capacity, errorCode); +} + +int32_t ResourceDataValue::getStringArrayOrStringAsArray(UnicodeString *dest, int32_t capacity, + UErrorCode &errorCode) const { + if(URES_IS_ARRAY(res)) { + return ::getStringArray(&getData(), getArray(errorCode), dest, capacity, errorCode); + } + if(U_FAILURE(errorCode)) { + return 0; + } + if(dest == NULL ? capacity != 0 : capacity < 0) { + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + if(capacity < 1) { + errorCode = U_BUFFER_OVERFLOW_ERROR; + return 1; + } + int32_t sLength; + const UChar *s = res_getString(fTraceInfo, &getData(), res, &sLength); + if(s != NULL) { + dest[0].setTo(TRUE, s, sLength); + return 1; + } + errorCode = U_RESOURCE_TYPE_MISMATCH; + return 0; +} + +UnicodeString ResourceDataValue::getStringOrFirstOfArray(UErrorCode &errorCode) const { + UnicodeString us; + if(U_FAILURE(errorCode)) { + return us; + } + int32_t sLength; + const UChar *s = res_getString(fTraceInfo, &getData(), res, &sLength); + if(s != NULL) { + us.setTo(TRUE, s, sLength); + return us; + } + ResourceArray array = getArray(errorCode); + if(U_FAILURE(errorCode)) { + return us; + } + if(array.getSize() > 0) { + // Tracing is already performed above (unimportant for trace that this is an array) + s = res_getStringNoTrace(&getData(), array.internalGetResource(&getData(), 0), &sLength); + if(s != NULL) { + us.setTo(TRUE, s, sLength); + return us; + } + } + errorCode = U_RESOURCE_TYPE_MISMATCH; + return us; +} + +U_NAMESPACE_END + +static Resource +makeResourceFrom16(const ResourceData *pResData, int32_t res16) { + if(res16<pResData->poolStringIndex16Limit) { + // Pool string, nothing to do. + } else { + // Local string, adjust the 16-bit offset to a regular one, + // with a larger pool string index limit. + res16=res16-pResData->poolStringIndex16Limit+pResData->poolStringIndexLimit; + } + return URES_MAKE_RESOURCE(URES_STRING_V2, res16); +} + +U_CAPI Resource U_EXPORT2 +res_getTableItemByKey(const ResourceData *pResData, Resource table, + int32_t *indexR, const char **key) { + uint32_t offset=RES_GET_OFFSET(table); + int32_t length; + int32_t idx; + if(key == NULL || *key == NULL) { + return RES_BOGUS; + } + switch(RES_GET_TYPE(table)) { + case URES_TABLE: { + if (offset!=0) { /* empty if offset==0 */ + const uint16_t *p= (const uint16_t *)(pResData->pRoot+offset); + length=*p++; + *indexR=idx=_res_findTableItem(pResData, p, length, *key, key); + if(idx>=0) { + const Resource *p32=(const Resource *)(p+length+(~length&1)); + return p32[idx]; + } + } + break; + } + case URES_TABLE16: { + const uint16_t *p=pResData->p16BitUnits+offset; + length=*p++; + *indexR=idx=_res_findTableItem(pResData, p, length, *key, key); + if(idx>=0) { + return makeResourceFrom16(pResData, p[length+idx]); + } + break; + } + case URES_TABLE32: { + if (offset!=0) { /* empty if offset==0 */ + const int32_t *p= pResData->pRoot+offset; + length=*p++; + *indexR=idx=_res_findTable32Item(pResData, p, length, *key, key); + if(idx>=0) { + return (Resource)p[length+idx]; + } + } + break; + } + default: + break; + } + return RES_BOGUS; +} + +U_CAPI Resource U_EXPORT2 +res_getTableItemByIndex(const ResourceData *pResData, Resource table, + int32_t indexR, const char **key) { + uint32_t offset=RES_GET_OFFSET(table); + int32_t length; + if (indexR < 0) { + return RES_BOGUS; + } + switch(RES_GET_TYPE(table)) { + case URES_TABLE: { + if (offset != 0) { /* empty if offset==0 */ + const uint16_t *p= (const uint16_t *)(pResData->pRoot+offset); + length=*p++; + if(indexR<length) { + const Resource *p32=(const Resource *)(p+length+(~length&1)); + if(key!=NULL) { + *key=RES_GET_KEY16(pResData, p[indexR]); + } + return p32[indexR]; + } + } + break; + } + case URES_TABLE16: { + const uint16_t *p=pResData->p16BitUnits+offset; + length=*p++; + if(indexR<length) { + if(key!=NULL) { + *key=RES_GET_KEY16(pResData, p[indexR]); + } + return makeResourceFrom16(pResData, p[length+indexR]); + } + break; + } + case URES_TABLE32: { + if (offset != 0) { /* empty if offset==0 */ + const int32_t *p= pResData->pRoot+offset; + length=*p++; + if(indexR<length) { + if(key!=NULL) { + *key=RES_GET_KEY32(pResData, p[indexR]); + } + return (Resource)p[length+indexR]; + } + } + break; + } + default: + break; + } + return RES_BOGUS; +} + +U_CAPI Resource U_EXPORT2 +res_getResource(const ResourceData *pResData, const char *key) { + const char *realKey=key; + int32_t idx; + return res_getTableItemByKey(pResData, pResData->rootRes, &idx, &realKey); +} + + +UBool icu::ResourceTable::getKeyAndValue(int32_t i, + const char *&key, icu::ResourceValue &value) const { + if(0 <= i && i < length) { + icu::ResourceDataValue &rdValue = static_cast<icu::ResourceDataValue &>(value); + if (keys16 != nullptr) { + key = RES_GET_KEY16(&rdValue.getData(), keys16[i]); + } else { + key = RES_GET_KEY32(&rdValue.getData(), keys32[i]); + } + Resource res; + if (items16 != nullptr) { + res = makeResourceFrom16(&rdValue.getData(), items16[i]); + } else { + res = items32[i]; + } + // Note: the ResourceTracer keeps a reference to the field of this + // ResourceTable. This is OK because the ResourceTable should remain + // alive for the duration that fields are being read from it + // (including nested fields). + rdValue.setResource(res, ResourceTracer(fTraceInfo, key)); + return TRUE; + } + return FALSE; +} + +UBool icu::ResourceTable::findValue(const char *key, ResourceValue &value) const { + icu::ResourceDataValue &rdValue = static_cast<icu::ResourceDataValue &>(value); + const char *realKey = nullptr; + int32_t i; + if (keys16 != nullptr) { + i = _res_findTableItem(&rdValue.getData(), keys16, length, key, &realKey); + } else { + i = _res_findTable32Item(&rdValue.getData(), keys32, length, key, &realKey); + } + if (i >= 0) { + Resource res; + if (items16 != nullptr) { + res = makeResourceFrom16(&rdValue.getData(), items16[i]); + } else { + res = items32[i]; + } + // Same note about lifetime as in getKeyAndValue(). + rdValue.setResource(res, ResourceTracer(fTraceInfo, key)); + return TRUE; + } + return FALSE; +} + +U_CAPI Resource U_EXPORT2 +res_getArrayItem(const ResourceData *pResData, Resource array, int32_t indexR) { + uint32_t offset=RES_GET_OFFSET(array); + if (indexR < 0) { + return RES_BOGUS; + } + switch(RES_GET_TYPE(array)) { + case URES_ARRAY: { + if (offset!=0) { /* empty if offset==0 */ + const int32_t *p= pResData->pRoot+offset; + if(indexR<*p) { + return (Resource)p[1+indexR]; + } + } + break; + } + case URES_ARRAY16: { + const uint16_t *p=pResData->p16BitUnits+offset; + if(indexR<*p) { + return makeResourceFrom16(pResData, p[1+indexR]); + } + break; + } + default: + break; + } + return RES_BOGUS; +} + +uint32_t icu::ResourceArray::internalGetResource(const ResourceData *pResData, int32_t i) const { + if (items16 != NULL) { + return makeResourceFrom16(pResData, items16[i]); + } else { + return items32[i]; + } +} + +UBool icu::ResourceArray::getValue(int32_t i, icu::ResourceValue &value) const { + if(0 <= i && i < length) { + icu::ResourceDataValue &rdValue = static_cast<icu::ResourceDataValue &>(value); + // Note: the ResourceTracer keeps a reference to the field of this + // ResourceArray. This is OK because the ResourceArray should remain + // alive for the duration that fields are being read from it + // (including nested fields). + rdValue.setResource( + internalGetResource(&rdValue.getData(), i), + ResourceTracer(fTraceInfo, i)); + return TRUE; + } + return FALSE; +} + +U_CFUNC Resource +res_findResource(const ResourceData *pResData, Resource r, char** path, const char** key) { + char *pathP = *path, *nextSepP = *path; + char *closeIndex = NULL; + Resource t1 = r; + Resource t2; + int32_t indexR = 0; + UResType type = (UResType)RES_GET_TYPE(t1); + + /* if you come in with an empty path, you'll be getting back the same resource */ + if(!uprv_strlen(pathP)) { + return r; + } + + /* one needs to have an aggregate resource in order to search in it */ + if(!URES_IS_CONTAINER(type)) { + return RES_BOGUS; + } + + while(nextSepP && *pathP && t1 != RES_BOGUS && URES_IS_CONTAINER(type)) { + /* Iteration stops if: the path has been consumed, we found a non-existing + * resource (t1 == RES_BOGUS) or we found a scalar resource (including alias) + */ + nextSepP = uprv_strchr(pathP, RES_PATH_SEPARATOR); + /* if there are more separators, terminate string + * and set path to the remaining part of the string + */ + if(nextSepP != NULL) { + if(nextSepP == pathP) { + // Empty key string. + return RES_BOGUS; + } + *nextSepP = 0; /* overwrite the separator with a NUL to terminate the key */ + *path = nextSepP+1; + } else { + *path = uprv_strchr(pathP, 0); + } + + /* if the resource is a table */ + /* try the key based access */ + if(URES_IS_TABLE(type)) { + *key = pathP; + t2 = res_getTableItemByKey(pResData, t1, &indexR, key); + if(t2 == RES_BOGUS) { + /* if we fail to get the resource by key, maybe we got an index */ + indexR = uprv_strtol(pathP, &closeIndex, 10); + if(indexR >= 0 && *closeIndex == 0 && (*pathP != '0' || closeIndex - pathP == 1)) { + /* if we indeed have an index, try to get the item by index */ + t2 = res_getTableItemByIndex(pResData, t1, indexR, key); + } // else t2 is already RES_BOGUS + } + } else if(URES_IS_ARRAY(type)) { + indexR = uprv_strtol(pathP, &closeIndex, 10); + if(indexR >= 0 && *closeIndex == 0) { + t2 = res_getArrayItem(pResData, t1, indexR); + } else { + t2 = RES_BOGUS; /* have an array, but don't have a valid index */ + } + *key = NULL; + } else { /* can't do much here, except setting t2 to bogus */ + t2 = RES_BOGUS; + } + t1 = t2; + type = (UResType)RES_GET_TYPE(t1); + /* position pathP to next resource key/index */ + pathP = *path; + } + + return t1; +} + +/* resource bundle swapping ------------------------------------------------- */ + +/* + * Need to always enumerate the entire item tree, + * track the lowest address of any item to use as the limit for char keys[], + * track the highest address of any item to return the size of the data. + * + * We should have thought of storing those in the data... + * It is possible to extend the data structure by putting additional values + * in places that are inaccessible by ordinary enumeration of the item tree. + * For example, additional integers could be stored at the beginning or + * end of the key strings; this could be indicated by a minor version number, + * and the data swapping would have to know about these values. + * + * The data structure does not forbid keys to be shared, so we must swap + * all keys once instead of each key when it is referenced. + * + * These swapping functions assume that a resource bundle always has a length + * that is a multiple of 4 bytes. + * Currently, this is trivially true because genrb writes bundle tree leaves + * physically first, before their branches, so that the root table with its + * array of resource items (uint32_t values) is always last. + */ + +/* definitions for table sorting ------------------------ */ + +/* + * row of a temporary array + * + * gets platform-endian key string indexes and sorting indexes; + * after sorting this array by keys, the actual key/value arrays are permutated + * according to the sorting indexes + */ +typedef struct Row { + int32_t keyIndex, sortIndex; +} Row; + +static int32_t U_CALLCONV +ures_compareRows(const void *context, const void *left, const void *right) { + const char *keyChars=(const char *)context; + return (int32_t)uprv_strcmp(keyChars+((const Row *)left)->keyIndex, + keyChars+((const Row *)right)->keyIndex); +} + +typedef struct TempTable { + const char *keyChars; + Row *rows; + int32_t *resort; + uint32_t *resFlags; + int32_t localKeyLimit; + uint8_t majorFormatVersion; +} TempTable; + +enum { + STACK_ROW_CAPACITY=200 +}; + +/* The table item key string is not locally available. */ +static const char *const gUnknownKey=""; + +/* resource table key for collation binaries: "%%CollationBin" */ +static const UChar gCollationBinKey[]={ + 0x25, 0x25, + 0x43, 0x6f, 0x6c, 0x6c, 0x61, 0x74, 0x69, 0x6f, 0x6e, + 0x42, 0x69, 0x6e, + 0 +}; + +/* + * swap one resource item + */ +static void +ures_swapResource(const UDataSwapper *ds, + const Resource *inBundle, Resource *outBundle, + Resource res, /* caller swaps res itself */ + const char *key, + TempTable *pTempTable, + UErrorCode *pErrorCode) { + const Resource *p; + Resource *q; + int32_t offset, count; + + switch(RES_GET_TYPE(res)) { + case URES_TABLE16: + case URES_STRING_V2: + case URES_INT: + case URES_ARRAY16: + /* integer, or points to 16-bit units, nothing to do here */ + return; + default: + break; + } + + /* all other types use an offset to point to their data */ + offset=(int32_t)RES_GET_OFFSET(res); + if(offset==0) { + /* special offset indicating an empty item */ + return; + } + if(pTempTable->resFlags[offset>>5]&((uint32_t)1<<(offset&0x1f))) { + /* we already swapped this resource item */ + return; + } else { + /* mark it as swapped now */ + pTempTable->resFlags[offset>>5]|=((uint32_t)1<<(offset&0x1f)); + } + + p=inBundle+offset; + q=outBundle+offset; + + switch(RES_GET_TYPE(res)) { + case URES_ALIAS: + /* physically same value layout as string, fall through */ + U_FALLTHROUGH; + case URES_STRING: + count=udata_readInt32(ds, (int32_t)*p); + /* swap length */ + ds->swapArray32(ds, p, 4, q, pErrorCode); + /* swap each UChar (the terminating NUL would not change) */ + ds->swapArray16(ds, p+1, 2*count, q+1, pErrorCode); + break; + case URES_BINARY: + count=udata_readInt32(ds, (int32_t)*p); + /* swap length */ + ds->swapArray32(ds, p, 4, q, pErrorCode); + /* no need to swap or copy bytes - ures_swap() copied them all */ + + /* swap known formats */ +#if !UCONFIG_NO_COLLATION + if( key!=NULL && /* the binary is in a table */ + (key!=gUnknownKey ? + /* its table key string is "%%CollationBin" */ + 0==ds->compareInvChars(ds, key, -1, + gCollationBinKey, UPRV_LENGTHOF(gCollationBinKey)-1) : + /* its table key string is unknown but it looks like a collation binary */ + ucol_looksLikeCollationBinary(ds, p+1, count)) + ) { + ucol_swap(ds, p+1, count, q+1, pErrorCode); + } +#endif + break; + case URES_TABLE: + case URES_TABLE32: + { + const uint16_t *pKey16; + uint16_t *qKey16; + + const int32_t *pKey32; + int32_t *qKey32; + + Resource item; + int32_t i, oldIndex; + + if(RES_GET_TYPE(res)==URES_TABLE) { + /* get table item count */ + pKey16=(const uint16_t *)p; + qKey16=(uint16_t *)q; + count=ds->readUInt16(*pKey16); + + pKey32=qKey32=NULL; + + /* swap count */ + ds->swapArray16(ds, pKey16++, 2, qKey16++, pErrorCode); + + offset+=((1+count)+1)/2; + } else { + /* get table item count */ + pKey32=(const int32_t *)p; + qKey32=(int32_t *)q; + count=udata_readInt32(ds, *pKey32); + + pKey16=qKey16=NULL; + + /* swap count */ + ds->swapArray32(ds, pKey32++, 4, qKey32++, pErrorCode); + + offset+=1+count; + } + + if(count==0) { + break; + } + + p=inBundle+offset; /* pointer to table resources */ + q=outBundle+offset; + + /* recurse */ + for(i=0; i<count; ++i) { + const char *itemKey=gUnknownKey; + if(pKey16!=NULL) { + int32_t keyOffset=ds->readUInt16(pKey16[i]); + if(keyOffset<pTempTable->localKeyLimit) { + itemKey=(const char *)outBundle+keyOffset; + } + } else { + int32_t keyOffset=udata_readInt32(ds, pKey32[i]); + if(keyOffset>=0) { + itemKey=(const char *)outBundle+keyOffset; + } + } + item=ds->readUInt32(p[i]); + ures_swapResource(ds, inBundle, outBundle, item, itemKey, pTempTable, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + udata_printError(ds, "ures_swapResource(table res=%08x)[%d].recurse(%08x) failed\n", + res, i, item); + return; + } + } + + if(pTempTable->majorFormatVersion>1 || ds->inCharset==ds->outCharset) { + /* no need to sort, just swap the offset/value arrays */ + if(pKey16!=NULL) { + ds->swapArray16(ds, pKey16, count*2, qKey16, pErrorCode); + ds->swapArray32(ds, p, count*4, q, pErrorCode); + } else { + /* swap key offsets and items as one array */ + ds->swapArray32(ds, pKey32, count*2*4, qKey32, pErrorCode); + } + break; + } + + /* + * We need to sort tables by outCharset key strings because they + * sort differently for different charset families. + * ures_swap() already set pTempTable->keyChars appropriately. + * First we set up a temporary table with the key indexes and + * sorting indexes and sort that. + * Then we permutate and copy/swap the actual values. + */ + if(pKey16!=NULL) { + for(i=0; i<count; ++i) { + pTempTable->rows[i].keyIndex=ds->readUInt16(pKey16[i]); + pTempTable->rows[i].sortIndex=i; + } + } else { + for(i=0; i<count; ++i) { + pTempTable->rows[i].keyIndex=udata_readInt32(ds, pKey32[i]); + pTempTable->rows[i].sortIndex=i; + } + } + uprv_sortArray(pTempTable->rows, count, sizeof(Row), + ures_compareRows, pTempTable->keyChars, + FALSE, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + udata_printError(ds, "ures_swapResource(table res=%08x).uprv_sortArray(%d items) failed\n", + res, count); + return; + } + + /* + * copy/swap/permutate items + * + * If we swap in-place, then the permutation must use another + * temporary array (pTempTable->resort) + * before the results are copied to the outBundle. + */ + /* keys */ + if(pKey16!=NULL) { + uint16_t *rKey16; + + if(pKey16!=qKey16) { + rKey16=qKey16; + } else { + rKey16=(uint16_t *)pTempTable->resort; + } + for(i=0; i<count; ++i) { + oldIndex=pTempTable->rows[i].sortIndex; + ds->swapArray16(ds, pKey16+oldIndex, 2, rKey16+i, pErrorCode); + } + if(qKey16!=rKey16) { + uprv_memcpy(qKey16, rKey16, 2*count); + } + } else { + int32_t *rKey32; + + if(pKey32!=qKey32) { + rKey32=qKey32; + } else { + rKey32=pTempTable->resort; + } + for(i=0; i<count; ++i) { + oldIndex=pTempTable->rows[i].sortIndex; + ds->swapArray32(ds, pKey32+oldIndex, 4, rKey32+i, pErrorCode); + } + if(qKey32!=rKey32) { + uprv_memcpy(qKey32, rKey32, 4*count); + } + } + + /* resources */ + { + Resource *r; + + + if(p!=q) { + r=q; + } else { + r=(Resource *)pTempTable->resort; + } + for(i=0; i<count; ++i) { + oldIndex=pTempTable->rows[i].sortIndex; + ds->swapArray32(ds, p+oldIndex, 4, r+i, pErrorCode); + } + if(q!=r) { + uprv_memcpy(q, r, 4*count); + } + } + } + break; + case URES_ARRAY: + { + Resource item; + int32_t i; + + count=udata_readInt32(ds, (int32_t)*p); + /* swap length */ + ds->swapArray32(ds, p++, 4, q++, pErrorCode); + + /* recurse */ + for(i=0; i<count; ++i) { + item=ds->readUInt32(p[i]); + ures_swapResource(ds, inBundle, outBundle, item, NULL, pTempTable, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + udata_printError(ds, "ures_swapResource(array res=%08x)[%d].recurse(%08x) failed\n", + res, i, item); + return; + } + } + + /* swap items */ + ds->swapArray32(ds, p, 4*count, q, pErrorCode); + } + break; + case URES_INT_VECTOR: + count=udata_readInt32(ds, (int32_t)*p); + /* swap length and each integer */ + ds->swapArray32(ds, p, 4*(1+count), q, pErrorCode); + break; + default: + /* also catches RES_BOGUS */ + *pErrorCode=U_UNSUPPORTED_ERROR; + break; + } +} + +U_CAPI int32_t U_EXPORT2 +ures_swap(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode) { + const UDataInfo *pInfo; + const Resource *inBundle; + Resource rootRes; + int32_t headerSize, maxTableLength; + + Row rows[STACK_ROW_CAPACITY]; + int32_t resort[STACK_ROW_CAPACITY]; + TempTable tempTable; + + const int32_t *inIndexes; + + /* the following integers count Resource item offsets (4 bytes each), not bytes */ + int32_t bundleLength, indexLength, keysBottom, keysTop, resBottom, top; + + /* udata_swapDataHeader checks the arguments */ + headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return 0; + } + + /* check data format and format version */ + pInfo=(const UDataInfo *)((const char *)inData+4); + if(!( + pInfo->dataFormat[0]==0x52 && /* dataFormat="ResB" */ + pInfo->dataFormat[1]==0x65 && + pInfo->dataFormat[2]==0x73 && + pInfo->dataFormat[3]==0x42 && + /* formatVersion 1.1+ or 2.x or 3.x */ + ((pInfo->formatVersion[0]==1 && pInfo->formatVersion[1]>=1) || + pInfo->formatVersion[0]==2 || pInfo->formatVersion[0]==3) + )) { + udata_printError(ds, "ures_swap(): data format %02x.%02x.%02x.%02x (format version %02x.%02x) is not a resource bundle\n", + pInfo->dataFormat[0], pInfo->dataFormat[1], + pInfo->dataFormat[2], pInfo->dataFormat[3], + pInfo->formatVersion[0], pInfo->formatVersion[1]); + *pErrorCode=U_UNSUPPORTED_ERROR; + return 0; + } + tempTable.majorFormatVersion=pInfo->formatVersion[0]; + + /* a resource bundle must contain at least one resource item */ + if(length<0) { + bundleLength=-1; + } else { + bundleLength=(length-headerSize)/4; + + /* formatVersion 1.1 must have a root item and at least 5 indexes */ + if(bundleLength<(1+5)) { + udata_printError(ds, "ures_swap(): too few bytes (%d after header) for a resource bundle\n", + length-headerSize); + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + } + + inBundle=(const Resource *)((const char *)inData+headerSize); + rootRes=ds->readUInt32(*inBundle); + + /* formatVersion 1.1 adds the indexes[] array */ + inIndexes=(const int32_t *)(inBundle+1); + + indexLength=udata_readInt32(ds, inIndexes[URES_INDEX_LENGTH])&0xff; + if(indexLength<=URES_INDEX_MAX_TABLE_LENGTH) { + udata_printError(ds, "ures_swap(): too few indexes for a 1.1+ resource bundle\n"); + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + keysBottom=1+indexLength; + keysTop=udata_readInt32(ds, inIndexes[URES_INDEX_KEYS_TOP]); + if(indexLength>URES_INDEX_16BIT_TOP) { + resBottom=udata_readInt32(ds, inIndexes[URES_INDEX_16BIT_TOP]); + } else { + resBottom=keysTop; + } + top=udata_readInt32(ds, inIndexes[URES_INDEX_BUNDLE_TOP]); + maxTableLength=udata_readInt32(ds, inIndexes[URES_INDEX_MAX_TABLE_LENGTH]); + + if(0<=bundleLength && bundleLength<top) { + udata_printError(ds, "ures_swap(): resource top %d exceeds bundle length %d\n", + top, bundleLength); + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + if(keysTop>(1+indexLength)) { + tempTable.localKeyLimit=keysTop<<2; + } else { + tempTable.localKeyLimit=0; + } + + if(length>=0) { + Resource *outBundle=(Resource *)((char *)outData+headerSize); + + /* track which resources we have already swapped */ + uint32_t stackResFlags[STACK_ROW_CAPACITY]; + int32_t resFlagsLength; + + /* + * We need one bit per 4 resource bundle bytes so that we can track + * every possible Resource for whether we have swapped it already. + * Multiple Resource words can refer to the same bundle offsets + * for sharing identical values. + * We could optimize this by allocating only for locations above + * where Resource values are stored (above keys & strings). + */ + resFlagsLength=(length+31)>>5; /* number of bytes needed */ + resFlagsLength=(resFlagsLength+3)&~3; /* multiple of 4 bytes for uint32_t */ + if(resFlagsLength<=(int32_t)sizeof(stackResFlags)) { + tempTable.resFlags=stackResFlags; + } else { + tempTable.resFlags=(uint32_t *)uprv_malloc(resFlagsLength); + if(tempTable.resFlags==NULL) { + udata_printError(ds, "ures_swap(): unable to allocate memory for tracking resources\n"); + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + return 0; + } + } + uprv_memset(tempTable.resFlags, 0, resFlagsLength); + + /* copy the bundle for binary and inaccessible data */ + if(inData!=outData) { + uprv_memcpy(outBundle, inBundle, 4*top); + } + + /* swap the key strings, but not the padding bytes (0xaa) after the last string and its NUL */ + udata_swapInvStringBlock(ds, inBundle+keysBottom, 4*(keysTop-keysBottom), + outBundle+keysBottom, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + udata_printError(ds, "ures_swap().udata_swapInvStringBlock(keys[%d]) failed\n", 4*(keysTop-keysBottom)); + return 0; + } + + /* swap the 16-bit units (strings, table16, array16) */ + if(keysTop<resBottom) { + ds->swapArray16(ds, inBundle+keysTop, (resBottom-keysTop)*4, outBundle+keysTop, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + udata_printError(ds, "ures_swap().swapArray16(16-bit units[%d]) failed\n", 2*(resBottom-keysTop)); + return 0; + } + } + + /* allocate the temporary table for sorting resource tables */ + tempTable.keyChars=(const char *)outBundle; /* sort by outCharset */ + if(tempTable.majorFormatVersion>1 || maxTableLength<=STACK_ROW_CAPACITY) { + tempTable.rows=rows; + tempTable.resort=resort; + } else { + tempTable.rows=(Row *)uprv_malloc(maxTableLength*sizeof(Row)+maxTableLength*4); + if(tempTable.rows==NULL) { + udata_printError(ds, "ures_swap(): unable to allocate memory for sorting tables (max length: %d)\n", + maxTableLength); + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + if(tempTable.resFlags!=stackResFlags) { + uprv_free(tempTable.resFlags); + } + return 0; + } + tempTable.resort=(int32_t *)(tempTable.rows+maxTableLength); + } + + /* swap the resources */ + ures_swapResource(ds, inBundle, outBundle, rootRes, NULL, &tempTable, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + udata_printError(ds, "ures_swapResource(root res=%08x) failed\n", + rootRes); + } + + if(tempTable.rows!=rows) { + uprv_free(tempTable.rows); + } + if(tempTable.resFlags!=stackResFlags) { + uprv_free(tempTable.resFlags); + } + + /* swap the root resource and indexes */ + ds->swapArray32(ds, inBundle, keysBottom*4, outBundle, pErrorCode); + } + + return headerSize+4*top; +} diff --git a/thirdparty/icu4c/common/uresdata.h b/thirdparty/icu4c/common/uresdata.h new file mode 100644 index 0000000000..7c2152e57b --- /dev/null +++ b/thirdparty/icu4c/common/uresdata.h @@ -0,0 +1,565 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* Copyright (C) 1999-2016, International Business Machines +* Corporation and others. All Rights Reserved. +****************************************************************************** +* file name: uresdata.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 1999dec08 +* created by: Markus W. Scherer +* 06/24/02 weiv Added support for resource sharing +*/ + +#ifndef __RESDATA_H__ +#define __RESDATA_H__ + +#include "unicode/utypes.h" +#include "unicode/udata.h" +#include "unicode/ures.h" +#include "putilimp.h" +#include "udataswp.h" + +/** + * Numeric constants for internal-only types of resource items. + * These must use different numeric values than UResType constants + * because they are used together. + * Internal types are never returned by ures_getType(). + */ +typedef enum { + /** Include a negative value so that the compiler uses the same int type as for UResType. */ + URES_INTERNAL_NONE=-1, + + /** Resource type constant for tables with 32-bit count, key offsets and values. */ + URES_TABLE32=4, + + /** + * Resource type constant for tables with 16-bit count, key offsets and values. + * All values are URES_STRING_V2 strings. + */ + URES_TABLE16=5, + + /** Resource type constant for 16-bit Unicode strings in formatVersion 2. */ + URES_STRING_V2=6, + + /** + * Resource type constant for arrays with 16-bit count and values. + * All values are URES_STRING_V2 strings. + */ + URES_ARRAY16=9 + + /* Resource type 15 is not defined but effectively used by RES_BOGUS=0xffffffff. */ +} UResInternalType; + +/* + * A Resource is a 32-bit value that has 2 bit fields: + * 31..28 4-bit type, see enum below + * 27..0 28-bit four-byte-offset or value according to the type + */ +typedef uint32_t Resource; + +#define RES_BOGUS 0xffffffff +#define RES_MAX_OFFSET 0x0fffffff + +#define RES_GET_TYPE(res) ((int32_t)((res)>>28UL)) +#define RES_GET_OFFSET(res) ((res)&0x0fffffff) +#define RES_GET_POINTER(pRoot, res) ((pRoot)+RES_GET_OFFSET(res)) + +/* get signed and unsigned integer values directly from the Resource handle + * NOTE: For proper logging, please use the res_getInt() constexpr + */ +#if U_SIGNED_RIGHT_SHIFT_IS_ARITHMETIC +# define RES_GET_INT_NO_TRACE(res) (((int32_t)((res)<<4L))>>4L) +#else +# define RES_GET_INT_NO_TRACE(res) (int32_t)(((res)&0x08000000) ? (res)|0xf0000000 : (res)&0x07ffffff) +#endif + +#define RES_GET_UINT_NO_TRACE(res) ((res)&0x0fffffff) + +#define URES_IS_ARRAY(type) ((int32_t)(type)==URES_ARRAY || (int32_t)(type)==URES_ARRAY16) +#define URES_IS_TABLE(type) ((int32_t)(type)==URES_TABLE || (int32_t)(type)==URES_TABLE16 || (int32_t)(type)==URES_TABLE32) +#define URES_IS_CONTAINER(type) (URES_IS_TABLE(type) || URES_IS_ARRAY(type)) + +#define URES_MAKE_RESOURCE(type, offset) (((Resource)(type)<<28)|(Resource)(offset)) +#define URES_MAKE_EMPTY_RESOURCE(type) ((Resource)(type)<<28) + +/* indexes[] value names; indexes are generally 32-bit (Resource) indexes */ +enum { + /** + * [0] contains the length of indexes[] + * which is at most URES_INDEX_TOP of the latest format version + * + * formatVersion==1: all bits contain the length of indexes[] + * but the length is much less than 0xff; + * formatVersion>1: + * only bits 7..0 contain the length of indexes[], + * bits 31..8 are reserved and set to 0 + * formatVersion>=3: + * bits 31..8 poolStringIndexLimit bits 23..0 + */ + URES_INDEX_LENGTH, + /** + * [1] contains the top of the key strings, + * same as the bottom of resources or UTF-16 strings, rounded up + */ + URES_INDEX_KEYS_TOP, + /** [2] contains the top of all resources */ + URES_INDEX_RESOURCES_TOP, + /** + * [3] contains the top of the bundle, + * in case it were ever different from [2] + */ + URES_INDEX_BUNDLE_TOP, + /** [4] max. length of any table */ + URES_INDEX_MAX_TABLE_LENGTH, + /** + * [5] attributes bit set, see URES_ATT_* (new in formatVersion 1.2) + * + * formatVersion>=3: + * bits 31..16 poolStringIndex16Limit + * bits 15..12 poolStringIndexLimit bits 27..24 + */ + URES_INDEX_ATTRIBUTES, + /** + * [6] top of the 16-bit units (UTF-16 string v2 UChars, URES_TABLE16, URES_ARRAY16), + * rounded up (new in formatVersion 2.0, ICU 4.4) + */ + URES_INDEX_16BIT_TOP, + /** [7] checksum of the pool bundle (new in formatVersion 2.0, ICU 4.4) */ + URES_INDEX_POOL_CHECKSUM, + URES_INDEX_TOP +}; + +/* + * Nofallback attribute, attribute bit 0 in indexes[URES_INDEX_ATTRIBUTES]. + * New in formatVersion 1.2 (ICU 3.6). + * + * If set, then this resource bundle is a standalone bundle. + * If not set, then the bundle participates in locale fallback, eventually + * all the way to the root bundle. + * If indexes[] is missing or too short, then the attribute cannot be determined + * reliably. Dependency checking should ignore such bundles, and loading should + * use fallbacks. + */ +#define URES_ATT_NO_FALLBACK 1 + +/* + * Attributes for bundles that are, or use, a pool bundle. + * A pool bundle provides key strings that are shared among several other bundles + * to reduce their total size. + * New in formatVersion 2 (ICU 4.4). + */ +#define URES_ATT_IS_POOL_BUNDLE 2 +#define URES_ATT_USES_POOL_BUNDLE 4 + +/* + * File format for .res resource bundle files + * + * ICU 56: New in formatVersion 3 compared with 2: ------------- + * + * Resource bundles can optionally use shared string-v2 values + * stored in the pool bundle. + * If so, then the indexes[] contain two new values + * in previously-unused bits of existing indexes[] slots: + * - poolStringIndexLimit: + * String-v2 offsets (in 32-bit Resource words) below this limit + * point to pool bundle string-v2 values. + * - poolStringIndex16Limit: + * Resource16 string-v2 offsets below this limit + * point to pool bundle string-v2 values. + * Guarantee: poolStringIndex16Limit <= poolStringIndexLimit + * + * The local bundle's poolStringIndexLimit is greater than + * any pool bundle string index used in the local bundle. + * The poolStringIndexLimit should not be greater than + * the maximum possible pool bundle string index. + * + * The maximum possible pool bundle string index is the index to the last non-NUL + * pool string character, due to suffix sharing. + * + * In the pool bundle, there is no structure that lists the strings. + * (The root resource is an empty Table.) + * If the strings need to be enumerated (as genrb --usePoolBundle does), + * then iterate through the pool bundle's 16-bit-units array from the beginning. + * Stop at the end of the array, or when an explicit or implicit string length + * would lead beyond the end of the array, + * or when an apparent string is not NUL-terminated. + * (Future genrb version might terminate the strings with + * what looks like a large explicit string length.) + * + * ICU 4.4: New in formatVersion 2 compared with 1.3: ------------- + * + * Three new resource types -- String-v2, Table16 and Array16 -- have their + * values stored in a new array of 16-bit units between the table key strings + * and the start of the other resources. + * + * genrb eliminates duplicates among Unicode string-v2 values. + * Multiple Unicode strings may use the same offset and string data, + * or a short string may point to the suffix of a longer string. ("Suffix sharing") + * For example, one string "abc" may be reused for another string "bc" by pointing + * to the second character. (Short strings-v2 are NUL-terminated + * and not preceded by an explicit length value.) + * + * It is allowed for all resource types to share values. + * The swapper code (ures_swap()) has been modified so that it swaps each item + * exactly once. + * + * A resource bundle may use a special pool bundle. Some or all of the table key strings + * of the using-bundle are omitted, and the key string offsets for such key strings refer + * to offsets in the pool bundle. + * The using-bundle's and the pool-bundle's indexes[URES_INDEX_POOL_CHECKSUM] values + * must match. + * Two bits in indexes[URES_INDEX_ATTRIBUTES] indicate whether a resource bundle + * is or uses a pool bundle. + * + * Table key strings must be compared in ASCII order, even if they are not + * stored in ASCII. + * + * New in formatVersion 1.3 compared with 1.2: ------------- + * + * genrb eliminates duplicates among key strings. + * Multiple table items may share one key string, or one item may point + * to the suffix of another's key string. ("Suffix sharing") + * For example, one key "abc" may be reused for another key "bc" by pointing + * to the second character. (Key strings are NUL-terminated.) + * + * ------------- + * + * An ICU4C resource bundle file (.res) is a binary, memory-mappable file + * with nested, hierarchical data structures. + * It physically contains the following: + * + * Resource root; -- 32-bit Resource item, root item for this bundle's tree; + * currently, the root item must be a table or table32 resource item + * int32_t indexes[indexes[0]]; -- array of indexes for friendly + * reading and swapping; see URES_INDEX_* above + * new in formatVersion 1.1 (ICU 2.8) + * char keys[]; -- characters for key strings + * (formatVersion 1.0: up to 65k of characters; 1.1: <2G) + * (minus the space for root and indexes[]), + * which consist of invariant characters (ASCII/EBCDIC) and are NUL-terminated; + * padded to multiple of 4 bytes for 4-alignment of the following data + * uint16_t 16BitUnits[]; -- resources that are stored entirely as sequences of 16-bit units + * (new in formatVersion 2/ICU 4.4) + * data is indexed by the offset values in 16-bit resource types, + * with offset 0 pointing to the beginning of this array; + * there is a 0 at offset 0, for empty resources; + * padded to multiple of 4 bytes for 4-alignment of the following data + * data; -- data directly and indirectly indexed by the root item; + * the structure is determined by walking the tree + * + * Each resource bundle item has a 32-bit Resource handle (see typedef above) + * which contains the item type number in its upper 4 bits (31..28) and either + * an offset or a direct value in its lower 28 bits (27..0). + * The order of items is undefined and only determined by walking the tree. + * Leaves of the tree may be stored first or last or anywhere in between, + * and it is in theory possible to have unreferenced holes in the file. + * + * 16-bit-unit values: + * Starting with formatVersion 2/ICU 4.4, some resources are stored in a special + * array of 16-bit units. Each resource value is a sequence of 16-bit units, + * with no per-resource padding to a 4-byte boundary. + * 16-bit container types (Table16 and Array16) contain Resource16 values + * which are offsets to String-v2 resources in the same 16-bit-units array. + * + * Direct values: + * - Empty Unicode strings have an offset value of 0 in the Resource handle itself. + * - Starting with formatVersion 2/ICU 4.4, an offset value of 0 for + * _any_ resource type indicates an empty value. + * - Integer values are 28-bit values stored in the Resource handle itself; + * the interpretation of unsigned vs. signed integers is up to the application. + * + * All other types and values use 28-bit offsets to point to the item's data. + * The offset is an index to the first 32-bit word of the value, relative to the + * start of the resource data (i.e., the root item handle is at offset 0). + * To get byte offsets, the offset is multiplied by 4 (or shifted left by 2 bits). + * All resource item values are 4-aligned. + * + * New in formatVersion 2/ICU 4.4: Some types use offsets into the 16-bit-units array, + * indexing 16-bit units in that array. + * + * The structures (memory layouts) for the values for each item type are listed + * in the table below. + * + * Nested, hierarchical structures: ------------- + * + * Table items contain key-value pairs where the keys are offsets to char * key strings. + * The values of these pairs are either Resource handles or + * offsets into the 16-bit-units array, depending on the table type. + * + * Array items are simple vectors of Resource handles, + * or of offsets into the 16-bit-units array, depending on the array type. + * + * Table key string offsets: ------- + * + * Key string offsets are relative to the start of the resource data (of the root handle), + * i.e., the first string has an offset of 4+sizeof(indexes). + * (After the 4-byte root handle and after the indexes array.) + * + * If the resource bundle uses a pool bundle, then some key strings are stored + * in the pool bundle rather than in the local bundle itself. + * - In a Table or Table16, the 16-bit key string offset is local if it is + * less than indexes[URES_INDEX_KEYS_TOP]<<2. + * Otherwise, subtract indexes[URES_INDEX_KEYS_TOP]<<2 to get the offset into + * the pool bundle key strings. + * - In a Table32, the 32-bit key string offset is local if it is non-negative. + * Otherwise, reset bit 31 to get the pool key string offset. + * + * Unlike the local offset, the pool key offset is relative to + * the start of the key strings, not to the start of the bundle. + * + * An alias item is special (and new in ICU 2.4): -------------- + * + * Its memory layout is just like for a UnicodeString, but at runtime it resolves to + * another resource bundle's item according to the path in the string. + * This is used to share items across bundles that are in different lookup/fallback + * chains (e.g., large collation data among zh_TW and zh_HK). + * This saves space (for large items) and maintenance effort (less duplication of data). + * + * -------------------------------------------------------------------------- + * + * Resource types: + * + * Most resources have their values stored at four-byte offsets from the start + * of the resource data. These values are at least 4-aligned. + * Some resource values are stored directly in the offset field of the Resource itself. + * See UResType in unicode/ures.h for enumeration constants for Resource types. + * + * Some resources have their values stored as sequences of 16-bit units, + * at 2-byte offsets from the start of a contiguous 16-bit-unit array between + * the table key strings and the other resources. (new in formatVersion 2/ICU 4.4) + * At offset 0 of that array is a 16-bit zero value for empty 16-bit resources. + * + * Resource16 values in Table16 and Array16 are 16-bit offsets to String-v2 + * resources, with the offsets relative to the start of the 16-bit-units array. + * Starting with formatVersion 3/ICU 56, if offset<poolStringIndex16Limit + * then use the pool bundle's 16-bit-units array, + * otherwise subtract that limit and use the local 16-bit-units array. + * + * Type Name Memory layout of values + * (in parentheses: scalar, non-offset values) + * + * 0 Unicode String: int32_t length, UChar[length], (UChar)0, (padding) + * or (empty string ("") if offset==0) + * 1 Binary: int32_t length, uint8_t[length], (padding) + * - the start of the bytes is 16-aligned - + * 2 Table: uint16_t count, uint16_t keyStringOffsets[count], (uint16_t padding), Resource[count] + * 3 Alias: (physically same value layout as string, new in ICU 2.4) + * 4 Table32: int32_t count, int32_t keyStringOffsets[count], Resource[count] + * (new in formatVersion 1.1/ICU 2.8) + * 5 Table16: uint16_t count, uint16_t keyStringOffsets[count], Resource16[count] + * (stored in the 16-bit-units array; new in formatVersion 2/ICU 4.4) + * 6 Unicode String-v2:UChar[length], (UChar)0; length determined by the first UChar: + * - if first is not a trail surrogate, then the length is implicit + * and u_strlen() needs to be called + * - if first<0xdfef then length=first&0x3ff (and skip first) + * - if first<0xdfff then length=((first-0xdfef)<<16) | second UChar + * - if first==0xdfff then length=((second UChar)<<16) | third UChar + * (stored in the 16-bit-units array; new in formatVersion 2/ICU 4.4) + * + * Starting with formatVersion 3/ICU 56, if offset<poolStringIndexLimit + * then use the pool bundle's 16-bit-units array, + * otherwise subtract that limit and use the local 16-bit-units array. + * (Note different limits for Resource16 vs. Resource.) + * + * 7 Integer: (28-bit offset is integer value) + * 8 Array: int32_t count, Resource[count] + * 9 Array16: uint16_t count, Resource16[count] + * (stored in the 16-bit-units array; new in formatVersion 2/ICU 4.4) + * 14 Integer Vector: int32_t length, int32_t[length] + * 15 Reserved: This value denotes special purpose resources and is for internal use. + * + * Note that there are 3 types with data vector values: + * - Vectors of 8-bit bytes stored as type Binary. + * - Vectors of 16-bit words stored as type Unicode String or Unicode String-v2 + * (no value restrictions, all values 0..ffff allowed!). + * - Vectors of 32-bit words stored as type Integer Vector. + */ + +/* + * Structure for a single, memory-mapped ResourceBundle. + */ +typedef struct ResourceData { + UDataMemory *data; + const int32_t *pRoot; + const uint16_t *p16BitUnits; + const char *poolBundleKeys; + Resource rootRes; + int32_t localKeyLimit; + const uint16_t *poolBundleStrings; + int32_t poolStringIndexLimit; + int32_t poolStringIndex16Limit; + UBool noFallback; /* see URES_ATT_NO_FALLBACK */ + UBool isPoolBundle; + UBool usesPoolBundle; + UBool useNativeStrcmp; +} ResourceData; + +/* + * Read a resource bundle from memory. + */ +U_CAPI void U_EXPORT2 +res_read(ResourceData *pResData, + const UDataInfo *pInfo, const void *inBytes, int32_t length, + UErrorCode *errorCode); + +/* + * Load a resource bundle file. + * The ResourceData structure must be allocated externally. + */ +U_CFUNC void +res_load(ResourceData *pResData, + const char *path, const char *name, UErrorCode *errorCode); + +/* + * Release a resource bundle file. + * This does not release the ResourceData structure itself. + */ +U_CFUNC void +res_unload(ResourceData *pResData); + +U_CAPI UResType U_EXPORT2 +res_getPublicType(Resource res); + +/////////////////////////////////////////////////////////////////////////// +// To enable tracing, use the inline versions of the res_get* functions. // +/////////////////////////////////////////////////////////////////////////// + +/* + * Return a pointer to a zero-terminated, const UChar* string + * and set its length in *pLength. + * Returns NULL if not found. + */ +U_CAPI const UChar * U_EXPORT2 +res_getStringNoTrace(const ResourceData *pResData, Resource res, int32_t *pLength); + +U_CAPI const uint8_t * U_EXPORT2 +res_getBinaryNoTrace(const ResourceData *pResData, Resource res, int32_t *pLength); + +U_CAPI const int32_t * U_EXPORT2 +res_getIntVectorNoTrace(const ResourceData *pResData, Resource res, int32_t *pLength); + +U_CAPI const UChar * U_EXPORT2 +res_getAlias(const ResourceData *pResData, Resource res, int32_t *pLength); + +U_CAPI Resource U_EXPORT2 +res_getResource(const ResourceData *pResData, const char *key); + +U_CAPI int32_t U_EXPORT2 +res_countArrayItems(const ResourceData *pResData, Resource res); + +U_CAPI Resource U_EXPORT2 +res_getArrayItem(const ResourceData *pResData, Resource array, int32_t indexS); + +U_CAPI Resource U_EXPORT2 +res_getTableItemByIndex(const ResourceData *pResData, Resource table, int32_t indexS, const char ** key); + +U_CAPI Resource U_EXPORT2 +res_getTableItemByKey(const ResourceData *pResData, Resource table, int32_t *indexS, const char* * key); + +/** + * Iterates over the path and stops when a scalar resource is found. + * Follows aliases. + * Modifies the contents of *path (replacing separators with NULs), + * and also moves *path forward while it finds items. + * + * @param path input: "CollationElements/Sequence" or "zoneStrings/3/2" etc.; + * output: points to the part that has not yet been processed + */ +U_CFUNC Resource res_findResource(const ResourceData *pResData, Resource r, + char** path, const char** key); + +#ifdef __cplusplus + +#include "resource.h" +#include "restrace.h" + +U_NAMESPACE_BEGIN + +inline const UChar* res_getString(const ResourceTracer& traceInfo, + const ResourceData *pResData, Resource res, int32_t *pLength) { + traceInfo.trace("string"); + return res_getStringNoTrace(pResData, res, pLength); +} + +inline const uint8_t* res_getBinary(const ResourceTracer& traceInfo, + const ResourceData *pResData, Resource res, int32_t *pLength) { + traceInfo.trace("binary"); + return res_getBinaryNoTrace(pResData, res, pLength); +} + +inline const int32_t* res_getIntVector(const ResourceTracer& traceInfo, + const ResourceData *pResData, Resource res, int32_t *pLength) { + traceInfo.trace("intvector"); + return res_getIntVectorNoTrace(pResData, res, pLength); +} + +inline int32_t res_getInt(const ResourceTracer& traceInfo, Resource res) { + traceInfo.trace("int"); + return RES_GET_INT_NO_TRACE(res); +} + +inline uint32_t res_getUInt(const ResourceTracer& traceInfo, Resource res) { + traceInfo.trace("uint"); + return RES_GET_UINT_NO_TRACE(res); +} + +class ResourceDataValue : public ResourceValue { +public: + ResourceDataValue() : + res(static_cast<Resource>(URES_NONE)), + fTraceInfo() {} + virtual ~ResourceDataValue(); + + void setData(const ResourceData *data) { + resData = *data; + } + + void setResource(Resource r, ResourceTracer&& traceInfo) { + res = r; + fTraceInfo = traceInfo; + } + + const ResourceData &getData() const { return resData; } + virtual UResType getType() const; + virtual const UChar *getString(int32_t &length, UErrorCode &errorCode) const; + virtual const UChar *getAliasString(int32_t &length, UErrorCode &errorCode) const; + virtual int32_t getInt(UErrorCode &errorCode) const; + virtual uint32_t getUInt(UErrorCode &errorCode) const; + virtual const int32_t *getIntVector(int32_t &length, UErrorCode &errorCode) const; + virtual const uint8_t *getBinary(int32_t &length, UErrorCode &errorCode) const; + virtual ResourceArray getArray(UErrorCode &errorCode) const; + virtual ResourceTable getTable(UErrorCode &errorCode) const; + virtual UBool isNoInheritanceMarker() const; + virtual int32_t getStringArray(UnicodeString *dest, int32_t capacity, + UErrorCode &errorCode) const; + virtual int32_t getStringArrayOrStringAsArray(UnicodeString *dest, int32_t capacity, + UErrorCode &errorCode) const; + virtual UnicodeString getStringOrFirstOfArray(UErrorCode &errorCode) const; + +private: + // TODO(ICU-20769): If UResourceBundle.fResData becomes a pointer, + // then remove this value field again and just store a pResData pointer. + ResourceData resData; + Resource res; + ResourceTracer fTraceInfo; +}; + +U_NAMESPACE_END + +#endif /* __cplusplus */ + +/** + * Swap an ICU resource bundle. See udataswp.h. + * @internal + */ +U_CAPI int32_t U_EXPORT2 +ures_swap(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode); + +#endif diff --git a/thirdparty/icu4c/common/uresimp.h b/thirdparty/icu4c/common/uresimp.h new file mode 100644 index 0000000000..69d82566fe --- /dev/null +++ b/thirdparty/icu4c/common/uresimp.h @@ -0,0 +1,364 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 2000-2016, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +*/ + +#ifndef URESIMP_H +#define URESIMP_H + +#include "unicode/ures.h" +#include "unicode/utypes.h" + +#include "uresdata.h" + +#define kRootLocaleName "root" +#define kPoolBundleName "pool" + +/* + The default minor version and the version separator must be exactly one + character long. +*/ + +#define kDefaultMinorVersion "0" +#define kVersionSeparator "." +#define kVersionTag "Version" + +#define MAGIC1 19700503 +#define MAGIC2 19641227 + +#define URES_MAX_ALIAS_LEVEL 256 +#define URES_MAX_BUFFER_SIZE 256 + +#define EMPTY_SET 0x2205 + +struct UResourceDataEntry; +typedef struct UResourceDataEntry UResourceDataEntry; + +/* + * Note: If we wanted to make this structure smaller, then we could try + * to use one UResourceDataEntry pointer for fAlias and fPool, with a separate + * flag to distinguish whether this struct is for a real bundle with a pool, + * or for an alias entry for which we won't use the pool after loading. + */ +struct UResourceDataEntry { + char *fName; /* name of the locale for bundle - still to decide whether it is original or fallback */ + char *fPath; /* path to bundle - used for distinguishing between resources with the same name */ + UResourceDataEntry *fParent; /*next resource in fallback chain*/ + UResourceDataEntry *fAlias; + UResourceDataEntry *fPool; + ResourceData fData; /* data for low level access */ + char fNameBuffer[3]; /* A small buffer of free space for fName. The free space is due to struct padding. */ + uint32_t fCountExisting; /* how much is this resource used */ + UErrorCode fBogus; + /* int32_t fHashKey;*/ /* for faster access in the hashtable */ +}; + +#define RES_BUFSIZE 64 +#define RES_PATH_SEPARATOR '/' +#define RES_PATH_SEPARATOR_S "/" + +struct UResourceBundle { + const char *fKey; /*tag*/ + UResourceDataEntry *fData; /*for low-level access*/ + char *fVersion; + UResourceDataEntry *fTopLevelData; /* for getting the valid locale */ + char *fResPath; /* full path to the resource: "zh_TW/CollationElements/Sequence" */ + // TODO(ICU-20769): Try to change the by-value fResData into a pointer, + // with the struct in only one place for each bundle. + // Also replace class ResourceDataValue.resData with a pResData pointer again. + ResourceData fResData; + char fResBuf[RES_BUFSIZE]; + int32_t fResPathLen; + Resource fRes; + UBool fHasFallback; + UBool fIsTopLevel; + uint32_t fMagic1; /* For determining if it's a stack object */ + uint32_t fMagic2; /* For determining if it's a stack object */ + int32_t fIndex; + int32_t fSize; + + /*const UResourceBundle *fParentRes;*/ /* needed to get the actual locale for a child resource */ +}; + +U_CAPI void U_EXPORT2 ures_initStackObject(UResourceBundle* resB); + +#ifdef __cplusplus + +U_NAMESPACE_BEGIN + +/** + * \class StackUResourceBundle + * "Smart pointer" like class, closes a UResourceBundle via ures_close(). + * + * This code: + * + * StackUResourceBundle bundle; + * foo(bundle.getAlias()); + * + * Is equivalent to this code: + * + * UResourceBundle bundle; + * ures_initStackObject(&bundle); + * foo(&bundle); + * ures_close(&bundle); + * + * @see LocalUResourceBundlePointer + * @internal + */ +class U_COMMON_API StackUResourceBundle { +public: + // No heap allocation. Use only on the stack. + static void* U_EXPORT2 operator new(size_t) U_NOEXCEPT = delete; + static void* U_EXPORT2 operator new[](size_t) U_NOEXCEPT = delete; +#if U_HAVE_PLACEMENT_NEW + static void* U_EXPORT2 operator new(size_t, void*) U_NOEXCEPT = delete; +#endif + + StackUResourceBundle(); + ~StackUResourceBundle(); + + UResourceBundle* getAlias() { return &bundle; } + + UResourceBundle& ref() { return bundle; } + const UResourceBundle& ref() const { return bundle; } + + StackUResourceBundle(const StackUResourceBundle&) = delete; + StackUResourceBundle& operator=(const StackUResourceBundle&) = delete; + + StackUResourceBundle(StackUResourceBundle&&) = delete; + StackUResourceBundle& operator=(StackUResourceBundle&&) = delete; + +private: + UResourceBundle bundle; +}; + +U_NAMESPACE_END + +#endif /* __cplusplus */ + +/** + * Opens a resource bundle for the locale; + * if there is not even a base language bundle, then loads the root bundle; + * never falls back to the default locale. + * + * This is used for algorithms that have good pan-Unicode default behavior, + * such as case mappings, collation, and segmentation (BreakIterator). + */ +U_CAPI UResourceBundle* U_EXPORT2 +ures_openNoDefault(const char* path, const char* localeID, UErrorCode* status); + +/* Some getters used by the copy constructor */ +U_CFUNC const char* ures_getName(const UResourceBundle* resB); +#ifdef URES_DEBUG +U_CFUNC const char* ures_getPath(const UResourceBundle* resB); +/** + * If anything was in the RB cache, dump it to the screen. + * @return true if there was anything into the cache + */ +U_CAPI UBool U_EXPORT2 ures_dumpCacheContents(void); +#endif +/*U_CFUNC void ures_appendResPath(UResourceBundle *resB, const char* toAdd, int32_t lenToAdd);*/ +/*U_CFUNC void ures_setResPath(UResourceBundle *resB, const char* toAdd);*/ +/*U_CFUNC void ures_freeResPath(UResourceBundle *resB);*/ + +/* Candidates for export */ +U_CFUNC UResourceBundle *ures_copyResb(UResourceBundle *r, const UResourceBundle *original, UErrorCode *status); + +/** + * Returns a resource that can be located using the pathToResource argument. One needs optional package, locale + * and path inside the locale, for example: "/myData/en/zoneStrings/3". Keys and indexes are supported. Keys + * need to reference data in named structures, while indexes can reference both named and anonymous resources. + * Features a fill-in parameter. + * + * Note, this function does NOT have a syntax for specifying items within a tree. May want to consider a + * syntax that delineates between package/tree and resource. + * + * @param pathToResource a path that will lead to the requested resource + * @param fillIn if NULL a new UResourceBundle struct is allocated and must be deleted by the caller. + * Alternatively, you can supply a struct to be filled by this function. + * @param status fills in the outgoing error code. + * @return a pointer to a UResourceBundle struct. If fill in param was NULL, caller must delete it + */ +U_CAPI UResourceBundle* U_EXPORT2 +ures_findResource(const char* pathToResource, + UResourceBundle *fillIn, UErrorCode *status); + +/** + * Returns a sub resource that can be located using the pathToResource argument. One needs a path inside + * the supplied resource, for example, if you have "en_US" resource bundle opened, you might ask for + * "zoneStrings/3". Keys and indexes are supported. Keys + * need to reference data in named structures, while indexes can reference both + * named and anonymous resources. + * Features a fill-in parameter. + * + * @param resourceBundle a resource + * @param pathToResource a path that will lead to the requested resource + * @param fillIn if NULL a new UResourceBundle struct is allocated and must be deleted by the caller. + * Alternatively, you can supply a struct to be filled by this function. + * @param status fills in the outgoing error code. + * @return a pointer to a UResourceBundle struct. If fill in param was NULL, caller must delete it + */ +U_CAPI UResourceBundle* U_EXPORT2 +ures_findSubResource(const UResourceBundle *resB, + char* pathToResource, + UResourceBundle *fillIn, UErrorCode *status); + +/** + * Returns a functionally equivalent locale (considering keywords) for the specified keyword. + * @param result fillin for the equivalent locale + * @param resultCapacity capacity of the fillin buffer + * @param path path to the tree, or NULL for ICU data + * @param resName top level resource. Example: "collations" + * @param keyword locale keyword. Example: "collation" + * @param locid The requested locale + * @param isAvailable If non-null, pointer to fillin parameter that indicates whether the + * requested locale was available. The locale is defined as 'available' if it physically + * exists within the specified tree. + * @param omitDefault if true, omit keyword and value if default. 'de_DE\@collation=standard' -> 'de_DE' + * @param status error code + * @return the actual buffer size needed for the full locale. If it's greater + * than resultCapacity, the returned full name will be truncated and an error code will be returned. + */ +U_CAPI int32_t U_EXPORT2 +ures_getFunctionalEquivalent(char *result, int32_t resultCapacity, + const char *path, const char *resName, const char *keyword, const char *locid, + UBool *isAvailable, UBool omitDefault, UErrorCode *status); + +/** + * Given a tree path and keyword, return a string enumeration of all possible values for that keyword. + * @param path path to the tree, or NULL for ICU data + * @param keyword a particular keyword to consider, must match a top level resource name + * within the tree. + * @param status error code + */ +U_CAPI UEnumeration* U_EXPORT2 +ures_getKeywordValues(const char *path, const char *keyword, UErrorCode *status); + + +/** + * Get a resource with multi-level fallback. Normally only the top level resources will + * fallback to its parent. This performs fallback on subresources. For example, when a table + * is defined in a resource bundle and a parent resource bundle, normally no fallback occurs + * on the sub-resources because the table is defined in the current resource bundle, but this + * function can perform fallback on the sub-resources of the table. + * @param resB a resource + * @param inKey a key associated with the requested resource + * @param fillIn if NULL a new UResourceBundle struct is allocated and must be deleted by the caller. + * Alternatively, you can supply a struct to be filled by this function. + * @param status: fills in the outgoing error code + * could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found + * could be a non-failing error + * e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT> + * @return a pointer to a UResourceBundle struct. If fill in param was NULL, caller must delete it + */ +U_CAPI UResourceBundle* U_EXPORT2 +ures_getByKeyWithFallback(const UResourceBundle *resB, + const char* inKey, + UResourceBundle *fillIn, + UErrorCode *status); + + +/** + * Get a String with multi-level fallback. Normally only the top level resources will + * fallback to its parent. This performs fallback on subresources. For example, when a table + * is defined in a resource bundle and a parent resource bundle, normally no fallback occurs + * on the sub-resources because the table is defined in the current resource bundle, but this + * function can perform fallback on the sub-resources of the table. + * @param resB a resource + * @param inKey a key associated with the requested resource + * @param status: fills in the outgoing error code + * could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found + * could be a non-failing error + * e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT> + * @return a pointer to a UResourceBundle struct. If fill in param was NULL, caller must delete it + */ +U_CAPI const UChar* U_EXPORT2 +ures_getStringByKeyWithFallback(const UResourceBundle *resB, + const char* inKey, + int32_t* len, + UErrorCode *status); + +#ifdef __cplusplus + +U_CAPI void U_EXPORT2 +ures_getValueWithFallback(const UResourceBundle *bundle, const char *path, + UResourceBundle *tempFillIn, + icu::ResourceDataValue &value, UErrorCode &errorCode); + +U_CAPI void U_EXPORT2 +ures_getAllItemsWithFallback(const UResourceBundle *bundle, const char *path, + icu::ResourceSink &sink, UErrorCode &errorCode); + +#endif /* __cplusplus */ + +/** + * Get a version number by key + * @param resB bundle containing version number + * @param key the key for the version number + * @param ver fillin for the version number + * @param status error code + */ +U_CAPI void U_EXPORT2 +ures_getVersionByKey(const UResourceBundle *resB, + const char *key, + UVersionInfo ver, + UErrorCode *status); + + +/** + * Internal function. + * Return the version number associated with this ResourceBundle as a string. + * + * @param resourceBundle The resource bundle for which the version is checked. + * @return A version number string as specified in the resource bundle or its parent. + * The caller does not own this string. + * @see ures_getVersion + */ +U_CAPI const char* U_EXPORT2 +ures_getVersionNumberInternal(const UResourceBundle *resourceBundle); + +/** + * Return the name of the Locale associated with this ResourceBundle. This API allows + * you to query for the real locale of the resource. For example, if you requested + * "en_US_CALIFORNIA" and only "en_US" bundle exists, "en_US" will be returned. + * For subresources, the locale where this resource comes from will be returned. + * If fallback has occured, getLocale will reflect this. + * + * This internal version avoids deprecated-warnings in ICU code. + * + * @param resourceBundle resource bundle in question + * @param status just for catching illegal arguments + * @return A Locale name + */ +U_CAPI const char* U_EXPORT2 +ures_getLocaleInternal(const UResourceBundle* resourceBundle, + UErrorCode* status); + +/** + * Same as ures_openDirect() but uses the fill-in parameter instead of allocating a new bundle. + * + * @param r The existing UResourceBundle to fill in. If NULL then status will be + * set to U_ILLEGAL_ARGUMENT_ERROR. + * @param packageName The packageName and locale together point to an ICU udata object, + * as defined by <code> udata_open( packageName, "res", locale, err) </code> + * or equivalent. Typically, packageName will refer to a (.dat) file, or to + * a package registered with udata_setAppData(). Using a full file or directory + * pathname for packageName is deprecated. If NULL, ICU data will be used. + * @param locale specifies the locale for which we want to open the resource + * if NULL, the default locale will be used. If strlen(locale) == 0 + * root locale will be used. + * @param status The error code. + * @see ures_openDirect + * @internal + */ +U_CAPI void U_EXPORT2 +ures_openDirectFillIn(UResourceBundle *r, + const char *packageName, + const char *locale, + UErrorCode *status); + +#endif /*URESIMP_H*/ diff --git a/thirdparty/icu4c/common/ureslocs.h b/thirdparty/icu4c/common/ureslocs.h new file mode 100644 index 0000000000..f7c3344ef2 --- /dev/null +++ b/thirdparty/icu4c/common/ureslocs.h @@ -0,0 +1,27 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 2009-2014 International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +*/ + +#ifndef __URESLOCS_H__ +#define __URESLOCS_H__ + +#include "unicode/utypes.h" +#include "unicode/udata.h" + +U_CDECL_BEGIN + + +#define U_ICUDATA_LANG U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "lang" +#define U_ICUDATA_REGION U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "region" +#define U_ICUDATA_CURR U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "curr" +#define U_ICUDATA_ZONE U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "zone" +#define U_ICUDATA_UNIT U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "unit" + +U_CDECL_END + +#endif diff --git a/thirdparty/icu4c/common/usc_impl.cpp b/thirdparty/icu4c/common/usc_impl.cpp new file mode 100644 index 0000000000..111029b974 --- /dev/null +++ b/thirdparty/icu4c/common/usc_impl.cpp @@ -0,0 +1,361 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 1999-2016, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* +* File USC_IMPL.C +* +* Modification History: +* +* Date Name Description +* 07/08/2002 Eric Mader Creation. +****************************************************************************** +*/ + +#include "unicode/uscript.h" +#include "usc_impl.h" +#include "cmemory.h" + +#define PAREN_STACK_DEPTH 32 + +#define MOD(sp) ((sp) % PAREN_STACK_DEPTH) +#define LIMIT_INC(sp) (((sp) < PAREN_STACK_DEPTH)? (sp) + 1 : PAREN_STACK_DEPTH) +#define INC(sp,count) (MOD((sp) + (count))) +#define INC1(sp) (INC(sp, 1)) +#define DEC(sp,count) (MOD((sp) + PAREN_STACK_DEPTH - (count))) +#define DEC1(sp) (DEC(sp, 1)) +#define STACK_IS_EMPTY(scriptRun) ((scriptRun)->pushCount <= 0) +#define STACK_IS_NOT_EMPTY(scriptRun) (! STACK_IS_EMPTY(scriptRun)) +#define TOP(scriptRun) ((scriptRun)->parenStack[(scriptRun)->parenSP]) +#define SYNC_FIXUP(scriptRun) ((scriptRun)->fixupCount = 0) + +struct ParenStackEntry +{ + int32_t pairIndex; + UScriptCode scriptCode; +}; + +struct UScriptRun +{ + int32_t textLength; + const UChar *textArray; + + int32_t scriptStart; + int32_t scriptLimit; + UScriptCode scriptCode; + + struct ParenStackEntry parenStack[PAREN_STACK_DEPTH]; + int32_t parenSP; + int32_t pushCount; + int32_t fixupCount; +}; + +static int8_t highBit(int32_t value); + +static const UChar32 pairedChars[] = { + 0x0028, 0x0029, /* ascii paired punctuation */ + 0x003c, 0x003e, + 0x005b, 0x005d, + 0x007b, 0x007d, + 0x00ab, 0x00bb, /* guillemets */ + 0x2018, 0x2019, /* general punctuation */ + 0x201c, 0x201d, + 0x2039, 0x203a, + 0x3008, 0x3009, /* chinese paired punctuation */ + 0x300a, 0x300b, + 0x300c, 0x300d, + 0x300e, 0x300f, + 0x3010, 0x3011, + 0x3014, 0x3015, + 0x3016, 0x3017, + 0x3018, 0x3019, + 0x301a, 0x301b +}; + +static void push(UScriptRun *scriptRun, int32_t pairIndex, UScriptCode scriptCode) +{ + scriptRun->pushCount = LIMIT_INC(scriptRun->pushCount); + scriptRun->fixupCount = LIMIT_INC(scriptRun->fixupCount); + + scriptRun->parenSP = INC1(scriptRun->parenSP); + scriptRun->parenStack[scriptRun->parenSP].pairIndex = pairIndex; + scriptRun->parenStack[scriptRun->parenSP].scriptCode = scriptCode; +} + +static void pop(UScriptRun *scriptRun) +{ + if (STACK_IS_EMPTY(scriptRun)) { + return; + } + + if (scriptRun->fixupCount > 0) { + scriptRun->fixupCount -= 1; + } + + scriptRun->pushCount -= 1; + scriptRun->parenSP = DEC1(scriptRun->parenSP); + + /* If the stack is now empty, reset the stack + pointers to their initial values. + */ + if (STACK_IS_EMPTY(scriptRun)) { + scriptRun->parenSP = -1; + } +} + +static void fixup(UScriptRun *scriptRun, UScriptCode scriptCode) +{ + int32_t fixupSP = DEC(scriptRun->parenSP, scriptRun->fixupCount); + + while (scriptRun->fixupCount-- > 0) { + fixupSP = INC1(fixupSP); + scriptRun->parenStack[fixupSP].scriptCode = scriptCode; + } +} + +static int8_t +highBit(int32_t value) +{ + int8_t bit = 0; + + if (value <= 0) { + return -32; + } + + if (value >= 1 << 16) { + value >>= 16; + bit += 16; + } + + if (value >= 1 << 8) { + value >>= 8; + bit += 8; + } + + if (value >= 1 << 4) { + value >>= 4; + bit += 4; + } + + if (value >= 1 << 2) { + value >>= 2; + bit += 2; + } + + if (value >= 1 << 1) { + //value >>= 1; + bit += 1; + } + + return bit; +} + +static int32_t +getPairIndex(UChar32 ch) +{ + int32_t pairedCharCount = UPRV_LENGTHOF(pairedChars); + int32_t pairedCharPower = 1 << highBit(pairedCharCount); + int32_t pairedCharExtra = pairedCharCount - pairedCharPower; + + int32_t probe = pairedCharPower; + int32_t pairIndex = 0; + + if (ch >= pairedChars[pairedCharExtra]) { + pairIndex = pairedCharExtra; + } + + while (probe > (1 << 0)) { + probe >>= 1; + + if (ch >= pairedChars[pairIndex + probe]) { + pairIndex += probe; + } + } + + if (pairedChars[pairIndex] != ch) { + pairIndex = -1; + } + + return pairIndex; +} + +static UBool +sameScript(UScriptCode scriptOne, UScriptCode scriptTwo) +{ + return scriptOne <= USCRIPT_INHERITED || scriptTwo <= USCRIPT_INHERITED || scriptOne == scriptTwo; +} + +U_CAPI UScriptRun * U_EXPORT2 +uscript_openRun(const UChar *src, int32_t length, UErrorCode *pErrorCode) +{ + UScriptRun *result = NULL; + + if (pErrorCode == NULL || U_FAILURE(*pErrorCode)) { + return NULL; + } + + result = (UScriptRun *)uprv_malloc(sizeof (UScriptRun)); + + if (result == NULL) { + *pErrorCode = U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + + uscript_setRunText(result, src, length, pErrorCode); + + /* Release the UScriptRun if uscript_setRunText() returns an error */ + if (U_FAILURE(*pErrorCode)) { + uprv_free(result); + result = NULL; + } + + return result; +} + +U_CAPI void U_EXPORT2 +uscript_closeRun(UScriptRun *scriptRun) +{ + if (scriptRun != NULL) { + uprv_free(scriptRun); + } +} + +U_CAPI void U_EXPORT2 +uscript_resetRun(UScriptRun *scriptRun) +{ + if (scriptRun != NULL) { + scriptRun->scriptStart = 0; + scriptRun->scriptLimit = 0; + scriptRun->scriptCode = USCRIPT_INVALID_CODE; + scriptRun->parenSP = -1; + scriptRun->pushCount = 0; + scriptRun->fixupCount = 0; + } +} + +U_CAPI void U_EXPORT2 +uscript_setRunText(UScriptRun *scriptRun, const UChar *src, int32_t length, UErrorCode *pErrorCode) +{ + if (pErrorCode == NULL || U_FAILURE(*pErrorCode)) { + return; + } + + if (scriptRun == NULL || length < 0 || ((src == NULL) != (length == 0))) { + *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + scriptRun->textArray = src; + scriptRun->textLength = length; + + uscript_resetRun(scriptRun); +} + +U_CAPI UBool U_EXPORT2 +uscript_nextRun(UScriptRun *scriptRun, int32_t *pRunStart, int32_t *pRunLimit, UScriptCode *pRunScript) +{ + UErrorCode error = U_ZERO_ERROR; + + /* if we've fallen off the end of the text, we're done */ + if (scriptRun == NULL || scriptRun->scriptLimit >= scriptRun->textLength) { + return FALSE; + } + + SYNC_FIXUP(scriptRun); + scriptRun->scriptCode = USCRIPT_COMMON; + + for (scriptRun->scriptStart = scriptRun->scriptLimit; scriptRun->scriptLimit < scriptRun->textLength; scriptRun->scriptLimit += 1) { + UChar high = scriptRun->textArray[scriptRun->scriptLimit]; + UChar32 ch = high; + UScriptCode sc; + int32_t pairIndex; + + /* + * if the character is a high surrogate and it's not the last one + * in the text, see if it's followed by a low surrogate + */ + if (high >= 0xD800 && high <= 0xDBFF && scriptRun->scriptLimit < scriptRun->textLength - 1) { + UChar low = scriptRun->textArray[scriptRun->scriptLimit + 1]; + + /* + * if it is followed by a low surrogate, + * consume it and form the full character + */ + if (low >= 0xDC00 && low <= 0xDFFF) { + ch = (high - 0xD800) * 0x0400 + low - 0xDC00 + 0x10000; + scriptRun->scriptLimit += 1; + } + } + + sc = uscript_getScript(ch, &error); + pairIndex = getPairIndex(ch); + + /* + * Paired character handling: + * + * if it's an open character, push it onto the stack. + * if it's a close character, find the matching open on the + * stack, and use that script code. Any non-matching open + * characters above it on the stack will be poped. + */ + if (pairIndex >= 0) { + if ((pairIndex & 1) == 0) { + push(scriptRun, pairIndex, scriptRun->scriptCode); + } else { + int32_t pi = pairIndex & ~1; + + while (STACK_IS_NOT_EMPTY(scriptRun) && TOP(scriptRun).pairIndex != pi) { + pop(scriptRun); + } + + if (STACK_IS_NOT_EMPTY(scriptRun)) { + sc = TOP(scriptRun).scriptCode; + } + } + } + + if (sameScript(scriptRun->scriptCode, sc)) { + if (scriptRun->scriptCode <= USCRIPT_INHERITED && sc > USCRIPT_INHERITED) { + scriptRun->scriptCode = sc; + + fixup(scriptRun, scriptRun->scriptCode); + } + + /* + * if this character is a close paired character, + * pop the matching open character from the stack + */ + if (pairIndex >= 0 && (pairIndex & 1) != 0) { + pop(scriptRun); + } + } else { + /* + * if the run broke on a surrogate pair, + * end it before the high surrogate + */ + if (ch >= 0x10000) { + scriptRun->scriptLimit -= 1; + } + + break; + } + } + + + if (pRunStart != NULL) { + *pRunStart = scriptRun->scriptStart; + } + + if (pRunLimit != NULL) { + *pRunLimit = scriptRun->scriptLimit; + } + + if (pRunScript != NULL) { + *pRunScript = scriptRun->scriptCode; + } + + return TRUE; +} diff --git a/thirdparty/icu4c/common/usc_impl.h b/thirdparty/icu4c/common/usc_impl.h new file mode 100644 index 0000000000..44899649d4 --- /dev/null +++ b/thirdparty/icu4c/common/usc_impl.h @@ -0,0 +1,139 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 1999-2011, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* +* File USC_IMPL.H +* +* Modification History: +* +* Date Name Description +* 07/08/2002 Eric Mader Creation. +****************************************************************************** +*/ + +#ifndef USC_IMPL_H +#define USC_IMPL_H +#include "unicode/utypes.h" +#include "unicode/uscript.h" + +/** + * <code>UScriptRun</code> is used to find runs of characters in + * the same script. It implements a simple iterator over an array + * of characters. The iterator will resolve script-neutral characters + * like punctuation into the script of the surrounding characters. + * + * The iterator will try to match paired punctuation. If it sees an + * opening punctuation character, it will remember the script that + * was assigned to that character, and assign the same script to the + * matching closing punctuation. + * + * Scripts are chosen based on the <code>UScriptCode</code> enumeration. + * No attempt is made to combine related scripts into a single run. In + * particular, Hiragana, Katakana, and Han characters will appear in seperate + * runs. + + * Here is an example of how to iterate over script runs: + * <pre> + * \code + * void printScriptRuns(const UChar *text, int32_t length) + * { + * UErrorCode error = U_ZERO_ERROR; + * UScriptRun *scriptRun = uscript_openRun(text, testLength, &error); + * int32_t start = 0, limit = 0; + * UScriptCode code = USCRIPT_INVALID_CODE; + * + * while (uscript_nextRun(&start, &limit, &code)) { + * printf("Script '%s' from %d to %d.\n", uscript_getName(code), start, limit); + * } + * + * uscript_closeRun(scriptRun); + * } + * </pre> + */ +struct UScriptRun; + +typedef struct UScriptRun UScriptRun; + +/** + * Create a <code>UScriptRun</code> object for iterating over the given text. This object must + * be freed using <code>uscript_closeRun()</code>. Note that this object does not copy the source text, + * only the pointer to it. You must make sure that the pointer remains valid until you call + * <code>uscript_closeRun()</code> or <code>uscript_setRunText()</code>. + * + * @param src is the address of the array of characters over which to iterate. + * if <code>src == NULL</code> and <code>length == 0</code>, + * an empty <code>UScriptRun</code> object will be returned. + * + * @param length is the number of characters over which to iterate. + * + * @param pErrorCode is a pointer to a valid <code>UErrorCode</code> value. If this value + * indicates a failure on entry, the function will immediately return. + * On exit the value will indicate the success of the operation. + * + * @return the address of <code>UScriptRun</code> object which will iterate over the text, + * or <code>NULL</code> if the operation failed. + */ +U_CAPI UScriptRun * U_EXPORT2 +uscript_openRun(const UChar *src, int32_t length, UErrorCode *pErrorCode); + +/** + * Frees the given <code>UScriptRun</code> object and any storage associated with it. + * On return, scriptRun no longer points to a valid <code>UScriptRun</code> object. + * + * @param scriptRun is the <code>UScriptRun</code> object which will be freed. + */ +U_CAPI void U_EXPORT2 +uscript_closeRun(UScriptRun *scriptRun); + +/** + * Reset the <code>UScriptRun</code> object so that it will start iterating from + * the beginning. + * + * @param scriptRun is the address of the <code>UScriptRun</code> object to be reset. + */ +U_CAPI void U_EXPORT2 +uscript_resetRun(UScriptRun *scriptRun); + +/** + * Change the text over which the given <code>UScriptRun</code> object iterates. + * + * @param scriptRun is the <code>UScriptRun</code> object which will be changed. + * + * @param src is the address of the new array of characters over which to iterate. + * If <code>src == NULL</code> and <code>length == 0</code>, + * the <code>UScriptRun</code> object will become empty. + * + * @param length is the new number of characters over which to iterate + * + * @param pErrorCode is a pointer to a valid <code>UErrorCode</code> value. If this value + * indicates a failure on entry, the function will immediately return. + * On exit the value will indicate the success of the operation. + */ +U_CAPI void U_EXPORT2 +uscript_setRunText(UScriptRun *scriptRun, const UChar *src, int32_t length, UErrorCode *pErrorCode); + +/** + * Advance the <code>UScriptRun</code> object to the next script run, return the start and limit + * offsets, and the script of the run. + * + * @param scriptRun is the address of the <code>UScriptRun</code> object. + * + * @param pRunStart is a pointer to the variable to receive the starting offset of the next run. + * This pointer can be <code>NULL</code> if the value is not needed. + * + * @param pRunLimit is a pointer to the variable to receive the limit offset of the next run. + * This pointer can be <code>NULL</code> if the value is not needed. + * + * @param pRunScript is a pointer to the variable to receive the UScriptCode for the + * script of the current run. This pointer can be <code>NULL</code> if the value is not needed. + * + * @return true if there was another script run. + */ +U_CAPI UBool U_EXPORT2 +uscript_nextRun(UScriptRun *scriptRun, int32_t *pRunStart, int32_t *pRunLimit, UScriptCode *pRunScript); + +#endif diff --git a/thirdparty/icu4c/common/uscript.cpp b/thirdparty/icu4c/common/uscript.cpp new file mode 100644 index 0000000000..f8bd7e7fdd --- /dev/null +++ b/thirdparty/icu4c/common/uscript.cpp @@ -0,0 +1,149 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 1997-2014, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* +* File USCRIPT.C +* +* Modification History: +* +* Date Name Description +* 07/06/2001 Ram Creation. +****************************************************************************** +*/ + +#include "unicode/uchar.h" +#include "unicode/uscript.h" +#include "unicode/uloc.h" +#include "bytesinkutil.h" +#include "charstr.h" +#include "cmemory.h" +#include "cstring.h" +#include "ulocimp.h" + +static const UScriptCode JAPANESE[3] = { USCRIPT_KATAKANA, USCRIPT_HIRAGANA, USCRIPT_HAN }; +static const UScriptCode KOREAN[2] = { USCRIPT_HANGUL, USCRIPT_HAN }; +static const UScriptCode HAN_BOPO[2] = { USCRIPT_HAN, USCRIPT_BOPOMOFO }; + +static int32_t +setCodes(const UScriptCode *src, int32_t length, + UScriptCode *dest, int32_t capacity, UErrorCode *err) { + int32_t i; + if(U_FAILURE(*err)) { return 0; } + if(length > capacity) { + *err = U_BUFFER_OVERFLOW_ERROR; + return length; + } + for(i = 0; i < length; ++i) { + dest[i] = src[i]; + } + return length; +} + +static int32_t +setOneCode(UScriptCode script, UScriptCode *scripts, int32_t capacity, UErrorCode *err) { + if(U_FAILURE(*err)) { return 0; } + if(1 > capacity) { + *err = U_BUFFER_OVERFLOW_ERROR; + return 1; + } + scripts[0] = script; + return 1; +} + +static int32_t +getCodesFromLocale(const char *locale, + UScriptCode *scripts, int32_t capacity, UErrorCode *err) { + UErrorCode internalErrorCode = U_ZERO_ERROR; + char lang[8] = {0}; + char script[8] = {0}; + int32_t scriptLength; + if(U_FAILURE(*err)) { return 0; } + // Multi-script languages, equivalent to the LocaleScript data + // that we used to load from locale resource bundles. + /*length = */ uloc_getLanguage(locale, lang, UPRV_LENGTHOF(lang), &internalErrorCode); + if(U_FAILURE(internalErrorCode) || internalErrorCode == U_STRING_NOT_TERMINATED_WARNING) { + return 0; + } + if(0 == uprv_strcmp(lang, "ja")) { + return setCodes(JAPANESE, UPRV_LENGTHOF(JAPANESE), scripts, capacity, err); + } + if(0 == uprv_strcmp(lang, "ko")) { + return setCodes(KOREAN, UPRV_LENGTHOF(KOREAN), scripts, capacity, err); + } + scriptLength = uloc_getScript(locale, script, UPRV_LENGTHOF(script), &internalErrorCode); + if(U_FAILURE(internalErrorCode) || internalErrorCode == U_STRING_NOT_TERMINATED_WARNING) { + return 0; + } + if(0 == uprv_strcmp(lang, "zh") && 0 == uprv_strcmp(script, "Hant")) { + return setCodes(HAN_BOPO, UPRV_LENGTHOF(HAN_BOPO), scripts, capacity, err); + } + // Explicit script code. + if(scriptLength != 0) { + UScriptCode scriptCode = (UScriptCode)u_getPropertyValueEnum(UCHAR_SCRIPT, script); + if(scriptCode != USCRIPT_INVALID_CODE) { + if(scriptCode == USCRIPT_SIMPLIFIED_HAN || scriptCode == USCRIPT_TRADITIONAL_HAN) { + scriptCode = USCRIPT_HAN; + } + return setOneCode(scriptCode, scripts, capacity, err); + } + } + return 0; +} + +/* TODO: this is a bad API and should be deprecated, ticket #11141 */ +U_CAPI int32_t U_EXPORT2 +uscript_getCode(const char* nameOrAbbrOrLocale, + UScriptCode* fillIn, + int32_t capacity, + UErrorCode* err){ + UBool triedCode; + UErrorCode internalErrorCode; + int32_t length; + + if(U_FAILURE(*err)) { + return 0; + } + if(nameOrAbbrOrLocale==NULL || + (fillIn == NULL ? capacity != 0 : capacity < 0)) { + *err = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + triedCode = FALSE; + if(uprv_strchr(nameOrAbbrOrLocale, '-')==NULL && uprv_strchr(nameOrAbbrOrLocale, '_')==NULL ){ + /* try long and abbreviated script names first */ + UScriptCode code = (UScriptCode) u_getPropertyValueEnum(UCHAR_SCRIPT, nameOrAbbrOrLocale); + if(code!=USCRIPT_INVALID_CODE) { + return setOneCode(code, fillIn, capacity, err); + } + triedCode = TRUE; + } + internalErrorCode = U_ZERO_ERROR; + length = getCodesFromLocale(nameOrAbbrOrLocale, fillIn, capacity, err); + if(U_FAILURE(*err) || length != 0) { + return length; + } + icu::CharString likely; + { + icu::CharStringByteSink sink(&likely); + ulocimp_addLikelySubtags(nameOrAbbrOrLocale, sink, &internalErrorCode); + } + if(U_SUCCESS(internalErrorCode) && internalErrorCode != U_STRING_NOT_TERMINATED_WARNING) { + length = getCodesFromLocale(likely.data(), fillIn, capacity, err); + if(U_FAILURE(*err) || length != 0) { + return length; + } + } + if(!triedCode) { + /* still not found .. try long and abbreviated script names again */ + UScriptCode code = (UScriptCode) u_getPropertyValueEnum(UCHAR_SCRIPT, nameOrAbbrOrLocale); + if(code!=USCRIPT_INVALID_CODE) { + return setOneCode(code, fillIn, capacity, err); + } + } + return 0; +} diff --git a/thirdparty/icu4c/common/uscript_props.cpp b/thirdparty/icu4c/common/uscript_props.cpp new file mode 100644 index 0000000000..25d287b57a --- /dev/null +++ b/thirdparty/icu4c/common/uscript_props.cpp @@ -0,0 +1,302 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2013-2016, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* file name: uscript_props.cpp +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2013feb16 +* created by: Markus W. Scherer +*/ + +#include "unicode/utypes.h" +#include "unicode/unistr.h" +#include "unicode/uscript.h" +#include "unicode/utf16.h" +#include "ustr_imp.h" +#include "cmemory.h" + +namespace { + +// Script metadata (script properties). +// See http://unicode.org/cldr/trac/browser/trunk/common/properties/scriptMetadata.txt + +// 0 = NOT_ENCODED, no sample character, default false script properties. +// Bits 20.. 0: sample character + +// Bits 23..21: usage +const int32_t UNKNOWN = 1 << 21; +const int32_t EXCLUSION = 2 << 21; +const int32_t LIMITED_USE = 3 << 21; +// st int32_t ASPIRATIONAL = 4 << 21; -- not used any more since Unicode 10 +const int32_t RECOMMENDED = 5 << 21; + +// Bits 31..24: Single-bit flags +const int32_t RTL = 1 << 24; +const int32_t LB_LETTERS = 1 << 25; +const int32_t CASED = 1 << 26; + +const int32_t SCRIPT_PROPS[] = { + // Begin copy-paste output from + // tools/trunk/unicode/py/parsescriptmetadata.py + 0x0040 | RECOMMENDED, // Zyyy + 0x0308 | RECOMMENDED, // Zinh + 0x0628 | RECOMMENDED | RTL, // Arab + 0x0531 | RECOMMENDED | CASED, // Armn + 0x0995 | RECOMMENDED, // Beng + 0x3105 | RECOMMENDED | LB_LETTERS, // Bopo + 0x13C4 | LIMITED_USE | CASED, // Cher + 0x03E2 | EXCLUSION | CASED, // Copt + 0x042F | RECOMMENDED | CASED, // Cyrl + 0x10414 | EXCLUSION | CASED, // Dsrt + 0x0905 | RECOMMENDED, // Deva + 0x12A0 | RECOMMENDED, // Ethi + 0x10D3 | RECOMMENDED, // Geor + 0x10330 | EXCLUSION, // Goth + 0x03A9 | RECOMMENDED | CASED, // Grek + 0x0A95 | RECOMMENDED, // Gujr + 0x0A15 | RECOMMENDED, // Guru + 0x5B57 | RECOMMENDED | LB_LETTERS, // Hani + 0xAC00 | RECOMMENDED, // Hang + 0x05D0 | RECOMMENDED | RTL, // Hebr + 0x304B | RECOMMENDED | LB_LETTERS, // Hira + 0x0C95 | RECOMMENDED, // Knda + 0x30AB | RECOMMENDED | LB_LETTERS, // Kana + 0x1780 | RECOMMENDED | LB_LETTERS, // Khmr + 0x0EA5 | RECOMMENDED | LB_LETTERS, // Laoo + 0x004C | RECOMMENDED | CASED, // Latn + 0x0D15 | RECOMMENDED, // Mlym + 0x1826 | EXCLUSION, // Mong + 0x1000 | RECOMMENDED | LB_LETTERS, // Mymr + 0x168F | EXCLUSION, // Ogam + 0x10300 | EXCLUSION, // Ital + 0x0B15 | RECOMMENDED, // Orya + 0x16A0 | EXCLUSION, // Runr + 0x0D85 | RECOMMENDED, // Sinh + 0x0710 | LIMITED_USE | RTL, // Syrc + 0x0B95 | RECOMMENDED, // Taml + 0x0C15 | RECOMMENDED, // Telu + 0x078C | RECOMMENDED | RTL, // Thaa + 0x0E17 | RECOMMENDED | LB_LETTERS, // Thai + 0x0F40 | RECOMMENDED, // Tibt + 0x14C0 | LIMITED_USE, // Cans + 0xA288 | LIMITED_USE | LB_LETTERS, // Yiii + 0x1703 | EXCLUSION, // Tglg + 0x1723 | EXCLUSION, // Hano + 0x1743 | EXCLUSION, // Buhd + 0x1763 | EXCLUSION, // Tagb + 0x280E | UNKNOWN, // Brai + 0x10800 | EXCLUSION | RTL, // Cprt + 0x1900 | LIMITED_USE, // Limb + 0x10000 | EXCLUSION, // Linb + 0x10480 | EXCLUSION, // Osma + 0x10450 | EXCLUSION, // Shaw + 0x1950 | LIMITED_USE | LB_LETTERS, // Tale + 0x10380 | EXCLUSION, // Ugar + 0, + 0x1A00 | EXCLUSION, // Bugi + 0x2C00 | EXCLUSION | CASED, // Glag + 0x10A00 | EXCLUSION | RTL, // Khar + 0xA800 | LIMITED_USE, // Sylo + 0x1980 | LIMITED_USE | LB_LETTERS, // Talu + 0x2D30 | LIMITED_USE, // Tfng + 0x103A0 | EXCLUSION, // Xpeo + 0x1B05 | LIMITED_USE, // Bali + 0x1BC0 | LIMITED_USE, // Batk + 0, + 0x11005 | EXCLUSION, // Brah + 0xAA00 | LIMITED_USE, // Cham + 0, + 0, + 0, + 0, + 0x13153 | EXCLUSION, // Egyp + 0, + 0x5B57 | RECOMMENDED | LB_LETTERS, // Hans + 0x5B57 | RECOMMENDED | LB_LETTERS, // Hant + 0x16B1C | EXCLUSION, // Hmng + 0x10CA1 | EXCLUSION | RTL | CASED, // Hung + 0, + 0xA984 | LIMITED_USE, // Java + 0xA90A | LIMITED_USE, // Kali + 0, + 0, + 0x1C00 | LIMITED_USE, // Lepc + 0x10647 | EXCLUSION, // Lina + 0x0840 | LIMITED_USE | RTL, // Mand + 0, + 0x10980 | EXCLUSION | RTL, // Mero + 0x07CA | LIMITED_USE | RTL, // Nkoo + 0x10C00 | EXCLUSION | RTL, // Orkh + 0x1036B | EXCLUSION, // Perm + 0xA840 | EXCLUSION, // Phag + 0x10900 | EXCLUSION | RTL, // Phnx + 0x16F00 | LIMITED_USE, // Plrd + 0, + 0, + 0, + 0, + 0, + 0, + 0xA549 | LIMITED_USE, // Vaii + 0, + 0x12000 | EXCLUSION, // Xsux + 0, + 0xFDD0 | UNKNOWN, // Zzzz + 0x102A0 | EXCLUSION, // Cari + 0x304B | RECOMMENDED | LB_LETTERS, // Jpan + 0x1A20 | LIMITED_USE | LB_LETTERS, // Lana + 0x10280 | EXCLUSION, // Lyci + 0x10920 | EXCLUSION | RTL, // Lydi + 0x1C5A | LIMITED_USE, // Olck + 0xA930 | EXCLUSION, // Rjng + 0xA882 | LIMITED_USE, // Saur + 0x1D850 | EXCLUSION, // Sgnw + 0x1B83 | LIMITED_USE, // Sund + 0, + 0xABC0 | LIMITED_USE, // Mtei + 0x10840 | EXCLUSION | RTL, // Armi + 0x10B00 | EXCLUSION | RTL, // Avst + 0x11103 | LIMITED_USE, // Cakm + 0xAC00 | RECOMMENDED, // Kore + 0x11083 | EXCLUSION, // Kthi + 0x10AD8 | EXCLUSION | RTL, // Mani + 0x10B60 | EXCLUSION | RTL, // Phli + 0x10B8F | EXCLUSION | RTL, // Phlp + 0, + 0x10B40 | EXCLUSION | RTL, // Prti + 0x0800 | EXCLUSION | RTL, // Samr + 0xAA80 | LIMITED_USE | LB_LETTERS, // Tavt + 0, + 0, + 0xA6A0 | LIMITED_USE, // Bamu + 0xA4D0 | LIMITED_USE, // Lisu + 0, + 0x10A60 | EXCLUSION | RTL, // Sarb + 0x16AE6 | EXCLUSION, // Bass + 0x1BC20 | EXCLUSION, // Dupl + 0x10500 | EXCLUSION, // Elba + 0x11315 | EXCLUSION, // Gran + 0, + 0, + 0x1E802 | EXCLUSION | RTL, // Mend + 0x109A0 | EXCLUSION | RTL, // Merc + 0x10A95 | EXCLUSION | RTL, // Narb + 0x10896 | EXCLUSION | RTL, // Nbat + 0x10873 | EXCLUSION | RTL, // Palm + 0x112BE | EXCLUSION, // Sind + 0x118B4 | EXCLUSION | CASED, // Wara + 0, + 0, + 0x16A4F | EXCLUSION, // Mroo + 0x1B1C4 | EXCLUSION | LB_LETTERS, // Nshu + 0x11183 | EXCLUSION, // Shrd + 0x110D0 | EXCLUSION, // Sora + 0x11680 | EXCLUSION, // Takr + 0x18229 | EXCLUSION | LB_LETTERS, // Tang + 0, + 0x14400 | EXCLUSION, // Hluw + 0x11208 | EXCLUSION, // Khoj + 0x11484 | EXCLUSION, // Tirh + 0x10537 | EXCLUSION, // Aghb + 0x11152 | EXCLUSION, // Mahj + 0x11717 | EXCLUSION | LB_LETTERS, // Ahom + 0x108F4 | EXCLUSION | RTL, // Hatr + 0x1160E | EXCLUSION, // Modi + 0x1128F | EXCLUSION, // Mult + 0x11AC0 | EXCLUSION, // Pauc + 0x1158E | EXCLUSION, // Sidd + 0x1E909 | LIMITED_USE | RTL | CASED, // Adlm + 0x11C0E | EXCLUSION, // Bhks + 0x11C72 | EXCLUSION, // Marc + 0x11412 | LIMITED_USE, // Newa + 0x104B5 | LIMITED_USE | CASED, // Osge + 0x5B57 | RECOMMENDED | LB_LETTERS, // Hanb + 0x1112 | RECOMMENDED, // Jamo + 0, + 0x11D10 | EXCLUSION, // Gonm + 0x11A5C | EXCLUSION, // Soyo + 0x11A0B | EXCLUSION, // Zanb + 0x1180B | EXCLUSION, // Dogr + 0x11D71 | LIMITED_USE, // Gong + 0x11EE5 | EXCLUSION, // Maka + 0x16E40 | EXCLUSION | CASED, // Medf + 0x10D12 | LIMITED_USE | RTL, // Rohg + 0x10F42 | EXCLUSION | RTL, // Sogd + 0x10F19 | EXCLUSION | RTL, // Sogo + 0x10FF1 | EXCLUSION | RTL, // Elym + 0x1E108 | LIMITED_USE, // Hmnp + 0x119CE | EXCLUSION, // Nand + 0x1E2E1 | LIMITED_USE, // Wcho + 0x10FBF | EXCLUSION | RTL, // Chrs + 0x1190C | EXCLUSION, // Diak + 0x18C65 | EXCLUSION | LB_LETTERS, // Kits + 0x10E88 | EXCLUSION | RTL, // Yezi + // End copy-paste from parsescriptmetadata.py +}; + +int32_t getScriptProps(UScriptCode script) { + if (0 <= script && script < UPRV_LENGTHOF(SCRIPT_PROPS)) { + return SCRIPT_PROPS[script]; + } else { + return 0; + } +} + +} // namespace + +U_CAPI int32_t U_EXPORT2 +uscript_getSampleString(UScriptCode script, UChar *dest, int32_t capacity, UErrorCode *pErrorCode) { + if(U_FAILURE(*pErrorCode)) { return 0; } + if(capacity < 0 || (capacity > 0 && dest == NULL)) { + *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + int32_t sampleChar = getScriptProps(script) & 0x1fffff; + int32_t length; + if(sampleChar == 0) { + length = 0; + } else { + length = U16_LENGTH(sampleChar); + if(length <= capacity) { + int32_t i = 0; + U16_APPEND_UNSAFE(dest, i, sampleChar); + } + } + return u_terminateUChars(dest, capacity, length, pErrorCode); +} + +U_COMMON_API icu::UnicodeString U_EXPORT2 +uscript_getSampleUnicodeString(UScriptCode script) { + icu::UnicodeString sample; + int32_t sampleChar = getScriptProps(script) & 0x1fffff; + if(sampleChar != 0) { + sample.append(sampleChar); + } + return sample; +} + +U_CAPI UScriptUsage U_EXPORT2 +uscript_getUsage(UScriptCode script) { + return (UScriptUsage)((getScriptProps(script) >> 21) & 7); +} + +U_CAPI UBool U_EXPORT2 +uscript_isRightToLeft(UScriptCode script) { + return (getScriptProps(script) & RTL) != 0; +} + +U_CAPI UBool U_EXPORT2 +uscript_breaksBetweenLetters(UScriptCode script) { + return (getScriptProps(script) & LB_LETTERS) != 0; +} + +U_CAPI UBool U_EXPORT2 +uscript_isCased(UScriptCode script) { + return (getScriptProps(script) & CASED) != 0; +} diff --git a/thirdparty/icu4c/common/uset.cpp b/thirdparty/icu4c/common/uset.cpp new file mode 100644 index 0000000000..eae7981d52 --- /dev/null +++ b/thirdparty/icu4c/common/uset.cpp @@ -0,0 +1,641 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2002-2011, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: uset.cpp +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2002mar07 +* created by: Markus W. Scherer +* +* There are functions to efficiently serialize a USet into an array of uint16_t +* and functions to use such a serialized form efficiently without +* instantiating a new USet. +*/ + +#include "unicode/utypes.h" +#include "unicode/uobject.h" +#include "unicode/uset.h" +#include "unicode/uniset.h" +#include "cmemory.h" +#include "unicode/ustring.h" +#include "unicode/parsepos.h" + +U_NAMESPACE_USE + +U_CAPI USet* U_EXPORT2 +uset_openEmpty() { + return (USet*) new UnicodeSet(); +} + +U_CAPI USet* U_EXPORT2 +uset_open(UChar32 start, UChar32 end) { + return (USet*) new UnicodeSet(start, end); +} + +U_CAPI void U_EXPORT2 +uset_close(USet* set) { + delete (UnicodeSet*) set; +} + +U_CAPI USet * U_EXPORT2 +uset_clone(const USet *set) { + return (USet*) (((UnicodeSet*) set)->UnicodeSet::clone()); +} + +U_CAPI UBool U_EXPORT2 +uset_isFrozen(const USet *set) { + return ((UnicodeSet*) set)->UnicodeSet::isFrozen(); +} + +U_CAPI void U_EXPORT2 +uset_freeze(USet *set) { + ((UnicodeSet*) set)->UnicodeSet::freeze(); +} + +U_CAPI USet * U_EXPORT2 +uset_cloneAsThawed(const USet *set) { + return (USet*) (((UnicodeSet*) set)->UnicodeSet::cloneAsThawed()); +} + +U_CAPI void U_EXPORT2 +uset_set(USet* set, + UChar32 start, UChar32 end) { + ((UnicodeSet*) set)->UnicodeSet::set(start, end); +} + +U_CAPI void U_EXPORT2 +uset_addAll(USet* set, const USet *additionalSet) { + ((UnicodeSet*) set)->UnicodeSet::addAll(*((const UnicodeSet*)additionalSet)); +} + +U_CAPI void U_EXPORT2 +uset_add(USet* set, UChar32 c) { + ((UnicodeSet*) set)->UnicodeSet::add(c); +} + +U_CAPI void U_EXPORT2 +uset_addRange(USet* set, UChar32 start, UChar32 end) { + ((UnicodeSet*) set)->UnicodeSet::add(start, end); +} + +U_CAPI void U_EXPORT2 +uset_addString(USet* set, const UChar* str, int32_t strLen) { + // UnicodeString handles -1 for strLen + UnicodeString s(strLen<0, str, strLen); + ((UnicodeSet*) set)->UnicodeSet::add(s); +} + +U_CAPI void U_EXPORT2 +uset_addAllCodePoints(USet* set, const UChar *str, int32_t strLen) { + // UnicodeString handles -1 for strLen + UnicodeString s(str, strLen); + ((UnicodeSet*) set)->UnicodeSet::addAll(s); +} + +U_CAPI void U_EXPORT2 +uset_remove(USet* set, UChar32 c) { + ((UnicodeSet*) set)->UnicodeSet::remove(c); +} + +U_CAPI void U_EXPORT2 +uset_removeRange(USet* set, UChar32 start, UChar32 end) { + ((UnicodeSet*) set)->UnicodeSet::remove(start, end); +} + +U_CAPI void U_EXPORT2 +uset_removeString(USet* set, const UChar* str, int32_t strLen) { + UnicodeString s(strLen==-1, str, strLen); + ((UnicodeSet*) set)->UnicodeSet::remove(s); +} + +U_CAPI void U_EXPORT2 +uset_removeAll(USet* set, const USet* remove) { + ((UnicodeSet*) set)->UnicodeSet::removeAll(*(const UnicodeSet*)remove); +} + +U_CAPI void U_EXPORT2 +uset_retain(USet* set, UChar32 start, UChar32 end) { + ((UnicodeSet*) set)->UnicodeSet::retain(start, end); +} + +U_CAPI void U_EXPORT2 +uset_retainAll(USet* set, const USet* retain) { + ((UnicodeSet*) set)->UnicodeSet::retainAll(*(const UnicodeSet*)retain); +} + +U_CAPI void U_EXPORT2 +uset_compact(USet* set) { + ((UnicodeSet*) set)->UnicodeSet::compact(); +} + +U_CAPI void U_EXPORT2 +uset_complement(USet* set) { + ((UnicodeSet*) set)->UnicodeSet::complement(); +} + +U_CAPI void U_EXPORT2 +uset_complementAll(USet* set, const USet* complement) { + ((UnicodeSet*) set)->UnicodeSet::complementAll(*(const UnicodeSet*)complement); +} + +U_CAPI void U_EXPORT2 +uset_clear(USet* set) { + ((UnicodeSet*) set)->UnicodeSet::clear(); +} + +U_CAPI void U_EXPORT2 +uset_removeAllStrings(USet* set) { + ((UnicodeSet*) set)->UnicodeSet::removeAllStrings(); +} + +U_CAPI UBool U_EXPORT2 +uset_isEmpty(const USet* set) { + return ((const UnicodeSet*) set)->UnicodeSet::isEmpty(); +} + +U_CAPI UBool U_EXPORT2 +uset_contains(const USet* set, UChar32 c) { + return ((const UnicodeSet*) set)->UnicodeSet::contains(c); +} + +U_CAPI UBool U_EXPORT2 +uset_containsRange(const USet* set, UChar32 start, UChar32 end) { + return ((const UnicodeSet*) set)->UnicodeSet::contains(start, end); +} + +U_CAPI UBool U_EXPORT2 +uset_containsString(const USet* set, const UChar* str, int32_t strLen) { + UnicodeString s(strLen==-1, str, strLen); + return ((const UnicodeSet*) set)->UnicodeSet::contains(s); +} + +U_CAPI UBool U_EXPORT2 +uset_containsAll(const USet* set1, const USet* set2) { + return ((const UnicodeSet*) set1)->UnicodeSet::containsAll(* (const UnicodeSet*) set2); +} + +U_CAPI UBool U_EXPORT2 +uset_containsAllCodePoints(const USet* set, const UChar *str, int32_t strLen) { + // Create a string alias, since nothing is being added to the set. + UnicodeString s(strLen==-1, str, strLen); + return ((const UnicodeSet*) set)->UnicodeSet::containsAll(s); +} + +U_CAPI UBool U_EXPORT2 +uset_containsNone(const USet* set1, const USet* set2) { + return ((const UnicodeSet*) set1)->UnicodeSet::containsNone(* (const UnicodeSet*) set2); +} + +U_CAPI UBool U_EXPORT2 +uset_containsSome(const USet* set1, const USet* set2) { + return ((const UnicodeSet*) set1)->UnicodeSet::containsSome(* (const UnicodeSet*) set2); +} + +U_CAPI int32_t U_EXPORT2 +uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition) { + return ((UnicodeSet*) set)->UnicodeSet::span(s, length, spanCondition); +} + +U_CAPI int32_t U_EXPORT2 +uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition) { + return ((UnicodeSet*) set)->UnicodeSet::spanBack(s, length, spanCondition); +} + +U_CAPI int32_t U_EXPORT2 +uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition) { + return ((UnicodeSet*) set)->UnicodeSet::spanUTF8(s, length, spanCondition); +} + +U_CAPI int32_t U_EXPORT2 +uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition) { + return ((UnicodeSet*) set)->UnicodeSet::spanBackUTF8(s, length, spanCondition); +} + +U_CAPI UBool U_EXPORT2 +uset_equals(const USet* set1, const USet* set2) { + return *(const UnicodeSet*)set1 == *(const UnicodeSet*)set2; +} + +U_CAPI int32_t U_EXPORT2 +uset_indexOf(const USet* set, UChar32 c) { + return ((UnicodeSet*) set)->UnicodeSet::indexOf(c); +} + +U_CAPI UChar32 U_EXPORT2 +uset_charAt(const USet* set, int32_t index) { + return ((UnicodeSet*) set)->UnicodeSet::charAt(index); +} + +U_CAPI int32_t U_EXPORT2 +uset_size(const USet* set) { + return ((const UnicodeSet*) set)->UnicodeSet::size(); +} + +U_NAMESPACE_BEGIN +/** + * This class only exists to provide access to the UnicodeSet private + * USet support API. Declaring a class a friend is more portable than + * trying to declare extern "C" functions as friends. + */ +class USetAccess /* not : public UObject because all methods are static */ { +public: + /* Try to have the compiler inline these*/ + inline static int32_t getStringCount(const UnicodeSet& set) { + return set.stringsSize(); + } + inline static const UnicodeString* getString(const UnicodeSet& set, + int32_t i) { + return set.getString(i); + } +private: + /* do not instantiate*/ + USetAccess(); +}; +U_NAMESPACE_END + +U_CAPI int32_t U_EXPORT2 +uset_getItemCount(const USet* uset) { + const UnicodeSet& set = *(const UnicodeSet*)uset; + return set.getRangeCount() + USetAccess::getStringCount(set); +} + +U_CAPI int32_t U_EXPORT2 +uset_getItem(const USet* uset, int32_t itemIndex, + UChar32* start, UChar32* end, + UChar* str, int32_t strCapacity, + UErrorCode* ec) { + if (U_FAILURE(*ec)) return 0; + const UnicodeSet& set = *(const UnicodeSet*)uset; + int32_t rangeCount; + + if (itemIndex < 0) { + *ec = U_ILLEGAL_ARGUMENT_ERROR; + return -1; + } else if (itemIndex < (rangeCount = set.getRangeCount())) { + *start = set.getRangeStart(itemIndex); + *end = set.getRangeEnd(itemIndex); + return 0; + } else { + itemIndex -= rangeCount; + if (itemIndex < USetAccess::getStringCount(set)) { + const UnicodeString* s = USetAccess::getString(set, itemIndex); + return s->extract(str, strCapacity, *ec); + } else { + *ec = U_INDEX_OUTOFBOUNDS_ERROR; + return -1; + } + } +} + +//U_CAPI int32_t U_EXPORT2 +//uset_getRangeCount(const USet* set) { +// return ((const UnicodeSet*) set)->getRangeCount(); +//} +// +//U_CAPI UBool U_EXPORT2 +//uset_getRange(const USet* set, int32_t rangeIndex, +// UChar32* pStart, UChar32* pEnd) { +// if ((uint32_t) rangeIndex >= (uint32_t) uset_getRangeCount(set)) { +// return FALSE; +// } +// const UnicodeSet* us = (const UnicodeSet*) set; +// *pStart = us->getRangeStart(rangeIndex); +// *pEnd = us->getRangeEnd(rangeIndex); +// return TRUE; +//} + +/* + * Serialize a USet into 16-bit units. + * Store BMP code points as themselves with one 16-bit unit each. + * + * Important: the code points in the array are in ascending order, + * therefore all BMP code points precede all supplementary code points. + * + * Store each supplementary code point in 2 16-bit units, + * simply with higher-then-lower 16-bit halfs. + * + * Precede the entire list with the length. + * If there are supplementary code points, then set bit 15 in the length + * and add the bmpLength between it and the array. + * + * In other words: + * - all BMP: (length=bmpLength) BMP, .., BMP + * - some supplementary: (length|0x8000) (bmpLength<length) BMP, .., BMP, supp-high, supp-low, .. + */ +U_CAPI int32_t U_EXPORT2 +uset_serialize(const USet* set, uint16_t* dest, int32_t destCapacity, UErrorCode* ec) { + if (ec==NULL || U_FAILURE(*ec)) { + return 0; + } + + return ((const UnicodeSet*) set)->UnicodeSet::serialize(dest, destCapacity,* ec); +} + +U_CAPI UBool U_EXPORT2 +uset_getSerializedSet(USerializedSet* fillSet, const uint16_t* src, int32_t srcLength) { + int32_t length; + + if(fillSet==NULL) { + return FALSE; + } + if(src==NULL || srcLength<=0) { + fillSet->length=fillSet->bmpLength=0; + return FALSE; + } + + length=*src++; + if(length&0x8000) { + /* there are supplementary values */ + length&=0x7fff; + if(srcLength<(2+length)) { + fillSet->length=fillSet->bmpLength=0; + return FALSE; + } + fillSet->bmpLength=*src++; + } else { + /* only BMP values */ + if(srcLength<(1+length)) { + fillSet->length=fillSet->bmpLength=0; + return FALSE; + } + fillSet->bmpLength=length; + } + fillSet->array=src; + fillSet->length=length; + return TRUE; +} + +U_CAPI void U_EXPORT2 +uset_setSerializedToOne(USerializedSet* fillSet, UChar32 c) { + if(fillSet==NULL || (uint32_t)c>0x10ffff) { + return; + } + + fillSet->array=fillSet->staticArray; + if(c<0xffff) { + fillSet->bmpLength=fillSet->length=2; + fillSet->staticArray[0]=(uint16_t)c; + fillSet->staticArray[1]=(uint16_t)c+1; + } else if(c==0xffff) { + fillSet->bmpLength=1; + fillSet->length=3; + fillSet->staticArray[0]=0xffff; + fillSet->staticArray[1]=1; + fillSet->staticArray[2]=0; + } else if(c<0x10ffff) { + fillSet->bmpLength=0; + fillSet->length=4; + fillSet->staticArray[0]=(uint16_t)(c>>16); + fillSet->staticArray[1]=(uint16_t)c; + ++c; + fillSet->staticArray[2]=(uint16_t)(c>>16); + fillSet->staticArray[3]=(uint16_t)c; + } else /* c==0x10ffff */ { + fillSet->bmpLength=0; + fillSet->length=2; + fillSet->staticArray[0]=0x10; + fillSet->staticArray[1]=0xffff; + } +} + +U_CAPI UBool U_EXPORT2 +uset_serializedContains(const USerializedSet* set, UChar32 c) { + const uint16_t* array; + + if(set==NULL || (uint32_t)c>0x10ffff) { + return FALSE; + } + + array=set->array; + if(c<=0xffff) { + /* find c in the BMP part */ + int32_t lo = 0; + int32_t hi = set->bmpLength-1; + if (c < array[0]) { + hi = 0; + } else if (c < array[hi]) { + for(;;) { + int32_t i = (lo + hi) >> 1; + if (i == lo) { + break; // Done! + } else if (c < array[i]) { + hi = i; + } else { + lo = i; + } + } + } else { + hi += 1; + } + return (UBool)(hi&1); + } else { + /* find c in the supplementary part */ + uint16_t high=(uint16_t)(c>>16), low=(uint16_t)c; + int32_t base = set->bmpLength; + int32_t lo = 0; + int32_t hi = set->length - 2 - base; + if (high < array[base] || (high==array[base] && low<array[base+1])) { + hi = 0; + } else if (high < array[base+hi] || (high==array[base+hi] && low<array[base+hi+1])) { + for (;;) { + int32_t i = ((lo + hi) >> 1) & ~1; // Guarantee even result + int32_t iabs = i + base; + if (i == lo) { + break; // Done! + } else if (high < array[iabs] || (high==array[iabs] && low<array[iabs+1])) { + hi = i; + } else { + lo = i; + } + } + } else { + hi += 2; + } + /* count pairs of 16-bit units even per BMP and check if the number of pairs is odd */ + return (UBool)(((hi+(base<<1))&2)!=0); + } +} + +U_CAPI int32_t U_EXPORT2 +uset_getSerializedRangeCount(const USerializedSet* set) { + if(set==NULL) { + return 0; + } + + return (set->bmpLength+(set->length-set->bmpLength)/2+1)/2; +} + +U_CAPI UBool U_EXPORT2 +uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex, + UChar32* pStart, UChar32* pEnd) { + const uint16_t* array; + int32_t bmpLength, length; + + if(set==NULL || rangeIndex<0 || pStart==NULL || pEnd==NULL) { + return FALSE; + } + + array=set->array; + length=set->length; + bmpLength=set->bmpLength; + + rangeIndex*=2; /* address start/limit pairs */ + if(rangeIndex<bmpLength) { + *pStart=array[rangeIndex++]; + if(rangeIndex<bmpLength) { + *pEnd=array[rangeIndex]-1; + } else if(rangeIndex<length) { + *pEnd=((((int32_t)array[rangeIndex])<<16)|array[rangeIndex+1])-1; + } else { + *pEnd=0x10ffff; + } + return TRUE; + } else { + rangeIndex-=bmpLength; + rangeIndex*=2; /* address pairs of pairs of units */ + length-=bmpLength; + if(rangeIndex<length) { + array+=bmpLength; + *pStart=(((int32_t)array[rangeIndex])<<16)|array[rangeIndex+1]; + rangeIndex+=2; + if(rangeIndex<length) { + *pEnd=((((int32_t)array[rangeIndex])<<16)|array[rangeIndex+1])-1; + } else { + *pEnd=0x10ffff; + } + return TRUE; + } else { + return FALSE; + } + } +} + +// TODO The old, internal uset.c had an efficient uset_containsOne function. +// Returned the one and only code point, or else -1 or something. +// Consider adding such a function to both C and C++ UnicodeSet/uset. +// See tools/gennorm/store.c for usage, now usetContainsOne there. + +// TODO Investigate incorporating this code into UnicodeSet to improve +// efficiency. +// --- +// #define USET_GROW_DELTA 20 +// +// static int32_t +// findChar(const UChar32* array, int32_t length, UChar32 c) { +// int32_t i; +// +// /* check the last range limit first for more efficient appending */ +// if(length>0) { +// if(c>=array[length-1]) { +// return length; +// } +// +// /* do not check the last range limit again in the loop below */ +// --length; +// } +// +// for(i=0; i<length && c>=array[i]; ++i) {} +// return i; +// } +// +// static UBool +// addRemove(USet* set, UChar32 c, int32_t doRemove) { +// int32_t i, length, more; +// +// if(set==NULL || (uint32_t)c>0x10ffff) { +// return FALSE; +// } +// +// length=set->length; +// i=findChar(set->array, length, c); +// if((i&1)^doRemove) { +// /* c is already in the set */ +// return TRUE; +// } +// +// /* how many more array items do we need? */ +// if(i<length && (c+1)==set->array[i]) { +// /* c is just before the following range, extend that in-place by one */ +// set->array[i]=c; +// if(i>0) { +// --i; +// if(c==set->array[i]) { +// /* the previous range collapsed, remove it */ +// set->length=length-=2; +// if(i<length) { +// uprv_memmove(set->array+i, set->array+i+2, (length-i)*4); +// } +// } +// } +// return TRUE; +// } else if(i>0 && c==set->array[i-1]) { +// /* c is just after the previous range, extend that in-place by one */ +// if(++c<=0x10ffff) { +// set->array[i-1]=c; +// if(i<length && c==set->array[i]) { +// /* the following range collapsed, remove it */ +// --i; +// set->length=length-=2; +// if(i<length) { +// uprv_memmove(set->array+i, set->array+i+2, (length-i)*4); +// } +// } +// } else { +// /* extend the previous range (had limit 0x10ffff) to the end of Unicode */ +// set->length=i-1; +// } +// return TRUE; +// } else if(i==length && c==0x10ffff) { +// /* insert one range limit c */ +// more=1; +// } else { +// /* insert two range limits c, c+1 */ +// more=2; +// } +// +// /* insert <more> range limits */ +// if(length+more>set->capacity) { +// /* reallocate */ +// int32_t newCapacity=set->capacity+set->capacity/2+USET_GROW_DELTA; +// UChar32* newArray=(UChar32* )uprv_malloc(newCapacity*4); +// if(newArray==NULL) { +// return FALSE; +// } +// set->capacity=newCapacity; +// uprv_memcpy(newArray, set->array, length*4); +// +// if(set->array!=set->staticBuffer) { +// uprv_free(set->array); +// } +// set->array=newArray; +// } +// +// if(i<length) { +// uprv_memmove(set->array+i+more, set->array+i, (length-i)*4); +// } +// set->array[i]=c; +// if(more==2) { +// set->array[i+1]=c+1; +// } +// set->length+=more; +// +// return TRUE; +// } +// +// U_CAPI UBool U_EXPORT2 +// uset_add(USet* set, UChar32 c) { +// return addRemove(set, c, 0); +// } +// +// U_CAPI void U_EXPORT2 +// uset_remove(USet* set, UChar32 c) { +// addRemove(set, c, 1); +// } diff --git a/thirdparty/icu4c/common/uset_imp.h b/thirdparty/icu4c/common/uset_imp.h new file mode 100644 index 0000000000..7233b9303c --- /dev/null +++ b/thirdparty/icu4c/common/uset_imp.h @@ -0,0 +1,62 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2004-2007, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: uset_imp.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2004sep07 +* created by: Markus W. Scherer +* +* Internal USet definitions. +*/ + +#ifndef __USET_IMP_H__ +#define __USET_IMP_H__ + +#include "unicode/utypes.h" +#include "unicode/uset.h" + +U_CDECL_BEGIN + +typedef void U_CALLCONV +USetAdd(USet *set, UChar32 c); + +typedef void U_CALLCONV +USetAddRange(USet *set, UChar32 start, UChar32 end); + +typedef void U_CALLCONV +USetAddString(USet *set, const UChar *str, int32_t length); + +typedef void U_CALLCONV +USetRemove(USet *set, UChar32 c); + +typedef void U_CALLCONV +USetRemoveRange(USet *set, UChar32 start, UChar32 end); + +/** + * Interface for adding items to a USet, to keep low-level code from + * statically depending on the USet implementation. + * Calls will look like sa->add(sa->set, c); + */ +struct USetAdder { + USet *set; + USetAdd *add; + USetAddRange *addRange; + USetAddString *addString; + USetRemove *remove; + USetRemoveRange *removeRange; +}; +typedef struct USetAdder USetAdder; + +U_CDECL_END + +#endif + diff --git a/thirdparty/icu4c/common/uset_props.cpp b/thirdparty/icu4c/common/uset_props.cpp new file mode 100644 index 0000000000..f08e760b10 --- /dev/null +++ b/thirdparty/icu4c/common/uset_props.cpp @@ -0,0 +1,143 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2002-2011, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: uset_props.cpp +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2004aug30 +* created by: Markus W. Scherer +* +* C wrappers around UnicodeSet functions that are implemented in +* uniset_props.cpp, split off for modularization. +*/ + +#include "unicode/utypes.h" +#include "unicode/uobject.h" +#include "unicode/uset.h" +#include "unicode/uniset.h" +#include "cmemory.h" +#include "unicode/ustring.h" +#include "unicode/parsepos.h" + +U_NAMESPACE_USE + +U_CAPI USet* U_EXPORT2 +uset_openPattern(const UChar* pattern, int32_t patternLength, + UErrorCode* ec) +{ + UnicodeString pat(patternLength==-1, pattern, patternLength); + UnicodeSet* set = new UnicodeSet(pat, *ec); + /* test for NULL */ + if(set == 0) { + *ec = U_MEMORY_ALLOCATION_ERROR; + return 0; + } + + if (U_FAILURE(*ec)) { + delete set; + set = NULL; + } + return (USet*) set; +} + +U_CAPI USet* U_EXPORT2 +uset_openPatternOptions(const UChar* pattern, int32_t patternLength, + uint32_t options, + UErrorCode* ec) +{ + UnicodeString pat(patternLength==-1, pattern, patternLength); + UnicodeSet* set = new UnicodeSet(pat, options, NULL, *ec); + /* test for NULL */ + if(set == 0) { + *ec = U_MEMORY_ALLOCATION_ERROR; + return 0; + } + + if (U_FAILURE(*ec)) { + delete set; + set = NULL; + } + return (USet*) set; +} + + +U_CAPI int32_t U_EXPORT2 +uset_applyPattern(USet *set, + const UChar *pattern, int32_t patternLength, + uint32_t options, + UErrorCode *status){ + + // status code needs to be checked since we + // dereference it + if(status == NULL || U_FAILURE(*status)){ + return 0; + } + + // check only the set paramenter + // if pattern is NULL or null terminate + // UnicodeString constructor takes care of it + if(set == NULL){ + *status = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + UnicodeString pat(pattern, patternLength); + + ParsePosition pos; + + ((UnicodeSet*) set)->applyPattern(pat, pos, options, NULL, *status); + + return pos.getIndex(); +} + +U_CAPI void U_EXPORT2 +uset_applyIntPropertyValue(USet* set, + UProperty prop, int32_t value, UErrorCode* ec) { + ((UnicodeSet*) set)->applyIntPropertyValue(prop, value, *ec); +} + +U_CAPI void U_EXPORT2 +uset_applyPropertyAlias(USet* set, + const UChar *prop, int32_t propLength, + const UChar *value, int32_t valueLength, + UErrorCode* ec) { + + UnicodeString p(prop, propLength); + UnicodeString v(value, valueLength); + + ((UnicodeSet*) set)->applyPropertyAlias(p, v, *ec); +} + +U_CAPI UBool U_EXPORT2 +uset_resemblesPattern(const UChar *pattern, int32_t patternLength, + int32_t pos) { + + UnicodeString pat(pattern, patternLength); + + return ((pos+1) < pat.length() && + pat.charAt(pos) == (UChar)91/*[*/) || + UnicodeSet::resemblesPattern(pat, pos); +} + +U_CAPI int32_t U_EXPORT2 +uset_toPattern(const USet* set, + UChar* result, int32_t resultCapacity, + UBool escapeUnprintable, + UErrorCode* ec) { + UnicodeString pat; + ((const UnicodeSet*) set)->toPattern(pat, escapeUnprintable); + return pat.extract(result, resultCapacity, *ec); +} + +U_CAPI void U_EXPORT2 +uset_closeOver(USet* set, int32_t attributes) { + ((UnicodeSet*) set)->UnicodeSet::closeOver(attributes); +} diff --git a/thirdparty/icu4c/common/usetiter.cpp b/thirdparty/icu4c/common/usetiter.cpp new file mode 100644 index 0000000000..7915169049 --- /dev/null +++ b/thirdparty/icu4c/common/usetiter.cpp @@ -0,0 +1,152 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (c) 2002-2006, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +*/ +#include "unicode/usetiter.h" +#include "unicode/uniset.h" +#include "unicode/unistr.h" +#include "uvector.h" + +U_NAMESPACE_BEGIN + +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeSetIterator) + +/** + * Create an iterator + * @param set set to iterate over + */ +UnicodeSetIterator::UnicodeSetIterator(const UnicodeSet& uSet) { + cpString = NULL; + reset(uSet); +} + +/** + * Create an iterator. Convenience for when the contents are to be set later. + */ +UnicodeSetIterator::UnicodeSetIterator() { + this->set = NULL; + cpString = NULL; + reset(); +} + +UnicodeSetIterator::~UnicodeSetIterator() { + delete cpString; +} + +/** + * Returns the next element in the set. + * @return true if there was another element in the set. + * if so, if codepoint == IS_STRING, the value is a string in the string field + * else the value is a single code point in the codepoint field. + * <br>You are guaranteed that the codepoints are in sorted order, and the strings are in sorted order, + * and that all code points are returned before any strings are returned. + * <br>Note also that the codepointEnd is undefined after calling this method. + */ +UBool UnicodeSetIterator::next() { + if (nextElement <= endElement) { + codepoint = codepointEnd = nextElement++; + string = NULL; + return TRUE; + } + if (range < endRange) { + loadRange(++range); + codepoint = codepointEnd = nextElement++; + string = NULL; + return TRUE; + } + + if (nextString >= stringCount) return FALSE; + codepoint = (UChar32)IS_STRING; // signal that value is actually a string + string = (const UnicodeString*) set->strings->elementAt(nextString++); + return TRUE; +} + +/** + * @return true if there was another element in the set. + * if so, if codepoint == IS_STRING, the value is a string in the string field + * else the value is a range of codepoints in the <codepoint, codepointEnd> fields. + * <br>Note that the codepoints are in sorted order, and the strings are in sorted order, + * and that all code points are returned before any strings are returned. + * <br>You are guaranteed that the ranges are in sorted order, and the strings are in sorted order, + * and that all ranges are returned before any strings are returned. + * <br>You are also guaranteed that ranges are disjoint and non-contiguous. + * <br>Note also that the codepointEnd is undefined after calling this method. + */ +UBool UnicodeSetIterator::nextRange() { + string = NULL; + if (nextElement <= endElement) { + codepointEnd = endElement; + codepoint = nextElement; + nextElement = endElement+1; + return TRUE; + } + if (range < endRange) { + loadRange(++range); + codepointEnd = endElement; + codepoint = nextElement; + nextElement = endElement+1; + return TRUE; + } + + if (nextString >= stringCount) return FALSE; + codepoint = (UChar32)IS_STRING; // signal that value is actually a string + string = (const UnicodeString*) set->strings->elementAt(nextString++); + return TRUE; +} + +/** + *@param set the set to iterate over. This allows reuse of the iterator. + */ +void UnicodeSetIterator::reset(const UnicodeSet& uSet) { + this->set = &uSet; + reset(); +} + +/** + * Resets to the start, to allow the iteration to start over again. + */ +void UnicodeSetIterator::reset() { + if (set == NULL) { + // Set up indices to empty iteration + endRange = -1; + stringCount = 0; + } else { + endRange = set->getRangeCount() - 1; + stringCount = set->stringsSize(); + } + range = 0; + endElement = -1; + nextElement = 0; + if (endRange >= 0) { + loadRange(range); + } + nextString = 0; + string = NULL; +} + +void UnicodeSetIterator::loadRange(int32_t iRange) { + nextElement = set->getRangeStart(iRange); + endElement = set->getRangeEnd(iRange); +} + + +const UnicodeString& UnicodeSetIterator::getString() { + if (string==NULL && codepoint!=(UChar32)IS_STRING) { + if (cpString == NULL) { + cpString = new UnicodeString(); + } + if (cpString != NULL) { + cpString->setTo((UChar32)codepoint); + } + string = cpString; + } + return *string; +} + +U_NAMESPACE_END + +//eof diff --git a/thirdparty/icu4c/common/ushape.cpp b/thirdparty/icu4c/common/ushape.cpp new file mode 100644 index 0000000000..ae13b5c118 --- /dev/null +++ b/thirdparty/icu4c/common/ushape.cpp @@ -0,0 +1,1728 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* + ****************************************************************************** + * + * Copyright (C) 2000-2016, International Business Machines + * Corporation and others. All Rights Reserved. + * + ****************************************************************************** + * file name: ushape.cpp + * encoding: UTF-8 + * tab size: 8 (not used) + * indentation:4 + * + * created on: 2000jun29 + * created by: Markus W. Scherer + * + * Arabic letter shaping implemented by Ayman Roshdy + */ + +#include "unicode/utypes.h" +#include "unicode/uchar.h" +#include "unicode/ustring.h" +#include "unicode/ushape.h" +#include "cmemory.h" +#include "putilimp.h" +#include "ustr_imp.h" +#include "ubidi_props.h" +#include "uassert.h" + +/* + * This implementation is designed for 16-bit Unicode strings. + * The main assumption is that the Arabic characters and their + * presentation forms each fit into a single UChar. + * With UTF-8, they occupy 2 or 3 bytes, and more than the ASCII + * characters. + */ + +/* + * ### TODO in general for letter shaping: + * - the letter shaping code is UTF-16-unaware; needs update + * + especially invertBuffer()?! + * - needs to handle the "Arabic Tail" that is used in some legacy codepages + * as a glyph fragment of wide-glyph letters + * + IBM Unicode conversion tables map it to U+200B (ZWSP) + * + IBM Egypt has proposed to encode the tail in Unicode among Arabic Presentation Forms + * + Unicode 3.2 added U+FE73 ARABIC TAIL FRAGMENT + */ + +/* definitions for Arabic letter shaping ------------------------------------ */ + +#define IRRELEVANT 4 +#define LAMTYPE 16 +#define ALEFTYPE 32 +#define LINKR 1 +#define LINKL 2 +#define APRESENT 8 +#define SHADDA 64 +#define CSHADDA 128 +#define COMBINE (SHADDA+CSHADDA) + +#define HAMZAFE_CHAR 0xfe80 +#define HAMZA06_CHAR 0x0621 +#define YEH_HAMZA_CHAR 0x0626 +#define YEH_HAMZAFE_CHAR 0xFE89 +#define LAMALEF_SPACE_SUB 0xFFFF +#define TASHKEEL_SPACE_SUB 0xFFFE +#define NEW_TAIL_CHAR 0xFE73 +#define OLD_TAIL_CHAR 0x200B +#define LAM_CHAR 0x0644 +#define SPACE_CHAR 0x0020 +#define SHADDA_CHAR 0xFE7C +#define TATWEEL_CHAR 0x0640 +#define SHADDA_TATWEEL_CHAR 0xFE7D +#define SHADDA06_CHAR 0x0651 + +#define SHAPE_MODE 0 +#define DESHAPE_MODE 1 + +struct uShapeVariables { + UChar tailChar; + uint32_t uShapeLamalefBegin; + uint32_t uShapeLamalefEnd; + uint32_t uShapeTashkeelBegin; + uint32_t uShapeTashkeelEnd; + int spacesRelativeToTextBeginEnd; +}; + +static const uint8_t tailFamilyIsolatedFinal[] = { + /* FEB1 */ 1, + /* FEB2 */ 1, + /* FEB3 */ 0, + /* FEB4 */ 0, + /* FEB5 */ 1, + /* FEB6 */ 1, + /* FEB7 */ 0, + /* FEB8 */ 0, + /* FEB9 */ 1, + /* FEBA */ 1, + /* FEBB */ 0, + /* FEBC */ 0, + /* FEBD */ 1, + /* FEBE */ 1 +}; + +static const uint8_t tashkeelMedial[] = { + /* FE70 */ 0, + /* FE71 */ 1, + /* FE72 */ 0, + /* FE73 */ 0, + /* FE74 */ 0, + /* FE75 */ 0, + /* FE76 */ 0, + /* FE77 */ 1, + /* FE78 */ 0, + /* FE79 */ 1, + /* FE7A */ 0, + /* FE7B */ 1, + /* FE7C */ 0, + /* FE7D */ 1, + /* FE7E */ 0, + /* FE7F */ 1 +}; + +static const UChar yehHamzaToYeh[] = +{ +/* isolated*/ 0xFEEF, +/* final */ 0xFEF0 +}; + +static const uint8_t IrrelevantPos[] = { + 0x0, 0x2, 0x4, 0x6, + 0x8, 0xA, 0xC, 0xE +}; + + +static const UChar convertLamAlef[] = +{ +/*FEF5*/ 0x0622, +/*FEF6*/ 0x0622, +/*FEF7*/ 0x0623, +/*FEF8*/ 0x0623, +/*FEF9*/ 0x0625, +/*FEFA*/ 0x0625, +/*FEFB*/ 0x0627, +/*FEFC*/ 0x0627 +}; + +static const UChar araLink[178]= +{ + 1 + 32 + 256 * 0x11,/*0x0622*/ + 1 + 32 + 256 * 0x13,/*0x0623*/ + 1 + 256 * 0x15,/*0x0624*/ + 1 + 32 + 256 * 0x17,/*0x0625*/ + 1 + 2 + 256 * 0x19,/*0x0626*/ + 1 + 32 + 256 * 0x1D,/*0x0627*/ + 1 + 2 + 256 * 0x1F,/*0x0628*/ + 1 + 256 * 0x23,/*0x0629*/ + 1 + 2 + 256 * 0x25,/*0x062A*/ + 1 + 2 + 256 * 0x29,/*0x062B*/ + 1 + 2 + 256 * 0x2D,/*0x062C*/ + 1 + 2 + 256 * 0x31,/*0x062D*/ + 1 + 2 + 256 * 0x35,/*0x062E*/ + 1 + 256 * 0x39,/*0x062F*/ + 1 + 256 * 0x3B,/*0x0630*/ + 1 + 256 * 0x3D,/*0x0631*/ + 1 + 256 * 0x3F,/*0x0632*/ + 1 + 2 + 256 * 0x41,/*0x0633*/ + 1 + 2 + 256 * 0x45,/*0x0634*/ + 1 + 2 + 256 * 0x49,/*0x0635*/ + 1 + 2 + 256 * 0x4D,/*0x0636*/ + 1 + 2 + 256 * 0x51,/*0x0637*/ + 1 + 2 + 256 * 0x55,/*0x0638*/ + 1 + 2 + 256 * 0x59,/*0x0639*/ + 1 + 2 + 256 * 0x5D,/*0x063A*/ + 0, 0, 0, 0, 0, /*0x063B-0x063F*/ + 1 + 2, /*0x0640*/ + 1 + 2 + 256 * 0x61,/*0x0641*/ + 1 + 2 + 256 * 0x65,/*0x0642*/ + 1 + 2 + 256 * 0x69,/*0x0643*/ + 1 + 2 + 16 + 256 * 0x6D,/*0x0644*/ + 1 + 2 + 256 * 0x71,/*0x0645*/ + 1 + 2 + 256 * 0x75,/*0x0646*/ + 1 + 2 + 256 * 0x79,/*0x0647*/ + 1 + 256 * 0x7D,/*0x0648*/ + 1 + 256 * 0x7F,/*0x0649*/ + 1 + 2 + 256 * 0x81,/*0x064A*/ + 4 + 256 * 1, /*0x064B*/ + 4 + 128 + 256 * 1, /*0x064C*/ + 4 + 128 + 256 * 1, /*0x064D*/ + 4 + 128 + 256 * 1, /*0x064E*/ + 4 + 128 + 256 * 1, /*0x064F*/ + 4 + 128 + 256 * 1, /*0x0650*/ + 4 + 64 + 256 * 3, /*0x0651*/ + 4 + 256 * 1, /*0x0652*/ + 4 + 256 * 7, /*0x0653*/ + 4 + 256 * 8, /*0x0654*/ + 4 + 256 * 8, /*0x0655*/ + 4 + 256 * 1, /*0x0656*/ + 0, 0, 0, 0, 0, /*0x0657-0x065B*/ + 1 + 256 * 0x85,/*0x065C*/ + 1 + 256 * 0x87,/*0x065D*/ + 1 + 256 * 0x89,/*0x065E*/ + 1 + 256 * 0x8B,/*0x065F*/ + 0, 0, 0, 0, 0, /*0x0660-0x0664*/ + 0, 0, 0, 0, 0, /*0x0665-0x0669*/ + 0, 0, 0, 0, 0, 0, /*0x066A-0x066F*/ + 4 + 256 * 6, /*0x0670*/ + 1 + 8 + 256 * 0x00,/*0x0671*/ + 1 + 32, /*0x0672*/ + 1 + 32, /*0x0673*/ + 0, /*0x0674*/ + 1 + 32, /*0x0675*/ + 1, 1, /*0x0676-0x0677*/ + 1 + 2, /*0x0678*/ + 1 + 2 + 8 + 256 * 0x16,/*0x0679*/ + 1 + 2 + 8 + 256 * 0x0E,/*0x067A*/ + 1 + 2 + 8 + 256 * 0x02,/*0x067B*/ + 1+2, 1+2, /*0x67C-0x067D*/ + 1+2+8+256 * 0x06, 1+2, 1+2, 1+2, 1+2, 1+2, /*0x067E-0x0683*/ + 1+2, 1+2, 1+2+8+256 * 0x2A, 1+2, /*0x0684-0x0687*/ + 1 + 8 + 256 * 0x38,/*0x0688*/ + 1, 1, 1, /*0x0689-0x068B*/ + 1 + 8 + 256 * 0x34,/*0x068C*/ + 1 + 8 + 256 * 0x32,/*0x068D*/ + 1 + 8 + 256 * 0x36,/*0x068E*/ + 1, 1, /*0x068F-0x0690*/ + 1 + 8 + 256 * 0x3C,/*0x0691*/ + 1, 1, 1, 1, 1, 1, 1+8+256 * 0x3A, 1, /*0x0692-0x0699*/ + 1+2, 1+2, 1+2, 1+2, 1+2, 1+2, /*0x069A-0x06A3*/ + 1+2, 1+2, 1+2, 1+2, /*0x069A-0x06A3*/ + 1+2, 1+2, 1+2, 1+2, 1+2, 1+2+8+256 * 0x3E, /*0x06A4-0x06AD*/ + 1+2, 1+2, 1+2, 1+2, /*0x06A4-0x06AD*/ + 1+2, 1+2+8+256 * 0x42, 1+2, 1+2, 1+2, 1+2, /*0x06AE-0x06B7*/ + 1+2, 1+2, 1+2, 1+2, /*0x06AE-0x06B7*/ + 1+2, 1+2, /*0x06B8-0x06B9*/ + 1 + 8 + 256 * 0x4E,/*0x06BA*/ + 1 + 2 + 8 + 256 * 0x50,/*0x06BB*/ + 1+2, 1+2, /*0x06BC-0x06BD*/ + 1 + 2 + 8 + 256 * 0x5A,/*0x06BE*/ + 1+2, /*0x06BF*/ + 1 + 8 + 256 * 0x54,/*0x06C0*/ + 1 + 2 + 8 + 256 * 0x56,/*0x06C1*/ + 1, 1, 1, /*0x06C2-0x06C4*/ + 1 + 8 + 256 * 0x90,/*0x06C5*/ + 1 + 8 + 256 * 0x89,/*0x06C6*/ + 1 + 8 + 256 * 0x87,/*0x06C7*/ + 1 + 8 + 256 * 0x8B,/*0x06C8*/ + 1 + 8 + 256 * 0x92,/*0x06C9*/ + 1, /*0x06CA*/ + 1 + 8 + 256 * 0x8E,/*0x06CB*/ + 1 + 2 + 8 + 256 * 0xAC,/*0x06CC*/ + 1, /*0x06CD*/ + 1+2, 1+2, /*0x06CE-0x06CF*/ + 1 + 2 + 8 + 256 * 0x94,/*0x06D0*/ + 1+2, /*0x06D1*/ + 1 + 8 + 256 * 0x5E,/*0x06D2*/ + 1 + 8 + 256 * 0x60 /*0x06D3*/ +}; + +static const uint8_t presALink[] = { +/***********0*****1*****2*****3*****4*****5*****6*****7*****8*****9*****A*****B*****C*****D*****E*****F*/ +/*FB5*/ 0, 1, 0, 0, 0, 0, 0, 1, 2,1 + 2, 0, 0, 0, 0, 0, 0, +/*FB6*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +/*FB7*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2,1 + 2, 0, 0, +/*FB8*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, +/*FB9*/ 2,1 + 2, 0, 1, 2,1 + 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +/*FBA*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +/*FBB*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +/*FBC*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +/*FBD*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +/*FBE*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +/*FBF*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2,1 + 2, +/*FC0*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +/*FC1*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +/*FC2*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +/*FC3*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +/*FC4*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +/*FC5*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, +/*FC6*/ 4, 4, 4 +}; + +static const uint8_t presBLink[]= +{ +/***********0*****1*****2*****3*****4*****5*****6*****7*****8*****9*****A*****B*****C*****D*****E*****F*/ +/*FE7*/1 + 2,1 + 2,1 + 2, 0,1 + 2, 0,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2, +/*FE8*/ 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2,1 + 2, 0, 1, 0, +/*FE9*/ 1, 2,1 + 2, 0, 1, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2, +/*FEA*/1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 0, 1, 0, 1, 0, +/*FEB*/ 1, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2, +/*FEC*/1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2, +/*FED*/1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2, +/*FEE*/1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 0, +/*FEF*/ 1, 0, 1, 2,1 + 2, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0 +}; + +static const UChar convertFBto06[] = +{ +/***********0******1******2******3******4******5******6******7******8******9******A******B******C******D******E******F***/ +/*FB5*/ 0x671, 0x671, 0x67B, 0x67B, 0x67B, 0x67B, 0x67E, 0x67E, 0x67E, 0x67E, 0, 0, 0, 0, 0x67A, 0x67A, +/*FB6*/ 0x67A, 0x67A, 0, 0, 0, 0, 0x679, 0x679, 0x679, 0x679, 0, 0, 0, 0, 0, 0, +/*FB7*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x686, 0x686, 0x686, 0x686, 0, 0, +/*FB8*/ 0, 0, 0x68D, 0x68D, 0x68C, 0x68C, 0x68E, 0x68E, 0x688, 0x688, 0x698, 0x698, 0x691, 0x691, 0x6A9, 0x6A9, +/*FB9*/ 0x6A9, 0x6A9, 0x6AF, 0x6AF, 0x6AF, 0x6AF, 0, 0, 0, 0, 0, 0, 0, 0, 0x6BA, 0x6BA, +/*FBA*/ 0x6BB, 0x6BB, 0x6BB, 0x6BB, 0x6C0, 0x6C0, 0x6C1, 0x6C1, 0x6C1, 0x6C1, 0x6BE, 0x6BE, 0x6BE, 0x6BE, 0x6d2, 0x6D2, +/*FBB*/ 0x6D3, 0x6D3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +/*FBC*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +/*FBD*/ 0, 0, 0, 0, 0, 0, 0, 0x6C7, 0x6C7, 0x6C6, 0x6C6, 0x6C8, 0x6C8, 0, 0x6CB, 0x6CB, +/*FBE*/ 0x6C5, 0x6C5, 0x6C9, 0x6C9, 0x6D0, 0x6D0, 0x6D0, 0x6D0, 0, 0, 0, 0, 0, 0, 0, 0, +/*FBF*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x6CC, 0x6CC, 0x6CC, 0x6CC +}; + +static const UChar convertFEto06[] = +{ +/***********0******1******2******3******4******5******6******7******8******9******A******B******C******D******E******F***/ +/*FE7*/ 0x64B, 0x64B, 0x64C, 0x64C, 0x64D, 0x64D, 0x64E, 0x64E, 0x64F, 0x64F, 0x650, 0x650, 0x651, 0x651, 0x652, 0x652, +/*FE8*/ 0x621, 0x622, 0x622, 0x623, 0x623, 0x624, 0x624, 0x625, 0x625, 0x626, 0x626, 0x626, 0x626, 0x627, 0x627, 0x628, +/*FE9*/ 0x628, 0x628, 0x628, 0x629, 0x629, 0x62A, 0x62A, 0x62A, 0x62A, 0x62B, 0x62B, 0x62B, 0x62B, 0x62C, 0x62C, 0x62C, +/*FEA*/ 0x62C, 0x62D, 0x62D, 0x62D, 0x62D, 0x62E, 0x62E, 0x62E, 0x62E, 0x62F, 0x62F, 0x630, 0x630, 0x631, 0x631, 0x632, +/*FEB*/ 0x632, 0x633, 0x633, 0x633, 0x633, 0x634, 0x634, 0x634, 0x634, 0x635, 0x635, 0x635, 0x635, 0x636, 0x636, 0x636, +/*FEC*/ 0x636, 0x637, 0x637, 0x637, 0x637, 0x638, 0x638, 0x638, 0x638, 0x639, 0x639, 0x639, 0x639, 0x63A, 0x63A, 0x63A, +/*FED*/ 0x63A, 0x641, 0x641, 0x641, 0x641, 0x642, 0x642, 0x642, 0x642, 0x643, 0x643, 0x643, 0x643, 0x644, 0x644, 0x644, +/*FEE*/ 0x644, 0x645, 0x645, 0x645, 0x645, 0x646, 0x646, 0x646, 0x646, 0x647, 0x647, 0x647, 0x647, 0x648, 0x648, 0x649, +/*FEF*/ 0x649, 0x64A, 0x64A, 0x64A, 0x64A, 0x65C, 0x65C, 0x65D, 0x65D, 0x65E, 0x65E, 0x65F, 0x65F +}; + +static const uint8_t shapeTable[4][4][4]= +{ + { {0,0,0,0}, {0,0,0,0}, {0,1,0,3}, {0,1,0,1} }, + { {0,0,2,2}, {0,0,1,2}, {0,1,1,2}, {0,1,1,3} }, + { {0,0,0,0}, {0,0,0,0}, {0,1,0,3}, {0,1,0,3} }, + { {0,0,1,2}, {0,0,1,2}, {0,1,1,2}, {0,1,1,3} } +}; + +/* + * This function shapes European digits to Arabic-Indic digits + * in-place, writing over the input characters. + * Since we know that we are only looking for BMP code points, + * we can safely just work with code units (again, at least UTF-16). + */ +static void +_shapeToArabicDigitsWithContext(UChar *s, int32_t length, + UChar digitBase, + UBool isLogical, UBool lastStrongWasAL) { + int32_t i; + UChar c; + + digitBase-=0x30; + + /* the iteration direction depends on the type of input */ + if(isLogical) { + for(i=0; i<length; ++i) { + c=s[i]; + switch(ubidi_getClass(c)) { + case U_LEFT_TO_RIGHT: /* L */ + case U_RIGHT_TO_LEFT: /* R */ + lastStrongWasAL=FALSE; + break; + case U_RIGHT_TO_LEFT_ARABIC: /* AL */ + lastStrongWasAL=TRUE; + break; + case U_EUROPEAN_NUMBER: /* EN */ + if(lastStrongWasAL && (uint32_t)(c-0x30)<10) { + s[i]=(UChar)(digitBase+c); /* digitBase+(c-0x30) - digitBase was modified above */ + } + break; + default : + break; + } + } + } else { + for(i=length; i>0; /* pre-decrement in the body */) { + c=s[--i]; + switch(ubidi_getClass(c)) { + case U_LEFT_TO_RIGHT: /* L */ + case U_RIGHT_TO_LEFT: /* R */ + lastStrongWasAL=FALSE; + break; + case U_RIGHT_TO_LEFT_ARABIC: /* AL */ + lastStrongWasAL=TRUE; + break; + case U_EUROPEAN_NUMBER: /* EN */ + if(lastStrongWasAL && (uint32_t)(c-0x30)<10) { + s[i]=(UChar)(digitBase+c); /* digitBase+(c-0x30) - digitBase was modified above */ + } + break; + default : + break; + } + } + } +} + +/* + *Name : invertBuffer + *Function : This function inverts the buffer, it's used + * in case the user specifies the buffer to be + * U_SHAPE_TEXT_DIRECTION_LOGICAL + */ +static void +invertBuffer(UChar *buffer, int32_t size, uint32_t /*options*/, int32_t lowlimit, int32_t highlimit) { + UChar temp; + int32_t i=0,j=0; + for(i=lowlimit,j=size-highlimit-1;i<j;i++,j--) { + temp = buffer[i]; + buffer[i] = buffer[j]; + buffer[j] = temp; + } +} + +/* + *Name : changeLamAlef + *Function : Converts the Alef characters into an equivalent + * LamAlef location in the 0x06xx Range, this is an + * intermediate stage in the operation of the program + * later it'll be converted into the 0xFExx LamAlefs + * in the shaping function. + */ +static inline UChar +changeLamAlef(UChar ch) { + switch(ch) { + case 0x0622 : + return 0x065C; + case 0x0623 : + return 0x065D; + case 0x0625 : + return 0x065E; + case 0x0627 : + return 0x065F; + } + return 0; +} + +/* + *Name : getLink + *Function : Resolves the link between the characters as + * Arabic characters have four forms : + * Isolated, Initial, Middle and Final Form + */ +static UChar +getLink(UChar ch) { + if(ch >= 0x0622 && ch <= 0x06D3) { + return(araLink[ch-0x0622]); + } else if(ch == 0x200D) { + return(3); + } else if(ch >= 0x206D && ch <= 0x206F) { + return(4); + }else if(ch >= 0xFB50 && ch <= 0xFC62) { + return(presALink[ch-0xFB50]); + } else if(ch >= 0xFE70 && ch <= 0xFEFC) { + return(presBLink[ch-0xFE70]); + }else { + return(0); + } +} + +/* + *Name : countSpaces + *Function : Counts the number of spaces + * at each end of the logical buffer + */ +static void +countSpaces(UChar *dest, int32_t size, uint32_t /*options*/, int32_t *spacesCountl, int32_t *spacesCountr) { + int32_t i = 0; + int32_t countl = 0,countr = 0; + while((dest[i] == SPACE_CHAR) && (countl < size)) { + countl++; + i++; + } + if (countl < size) { /* the entire buffer is not all space */ + while(dest[size-1] == SPACE_CHAR) { + countr++; + size--; + } + } + *spacesCountl = countl; + *spacesCountr = countr; +} + +/* + *Name : isTashkeelChar + *Function : Returns 1 for Tashkeel characters in 06 range else return 0 + */ +static inline int32_t +isTashkeelChar(UChar ch) { + return (int32_t)( ch>=0x064B && ch<= 0x0652 ); +} + +/* + *Name : isTashkeelCharFE + *Function : Returns 1 for Tashkeel characters in FE range else return 0 + */ +static inline int32_t +isTashkeelCharFE(UChar ch) { + return (int32_t)( ch>=0xFE70 && ch<= 0xFE7F ); +} + +/* + *Name : isAlefChar + *Function : Returns 1 for Alef characters else return 0 + */ +static inline int32_t +isAlefChar(UChar ch) { + return (int32_t)( (ch==0x0622)||(ch==0x0623)||(ch==0x0625)||(ch==0x0627) ); +} + +/* + *Name : isLamAlefChar + *Function : Returns 1 for LamAlef characters else return 0 + */ +static inline int32_t +isLamAlefChar(UChar ch) { + return (int32_t)((ch>=0xFEF5)&&(ch<=0xFEFC) ); +} + +/*BIDI + *Name : isTailChar + *Function : returns 1 if the character matches one of the tail characters (0xfe73 or 0x200b) otherwise returns 0 + */ + +static inline int32_t +isTailChar(UChar ch) { + if(ch == OLD_TAIL_CHAR || ch == NEW_TAIL_CHAR){ + return 1; + }else{ + return 0; + } +} + +/*BIDI + *Name : isSeenTailFamilyChar + *Function : returns 1 if the character is a seen family isolated character + * in the FE range otherwise returns 0 + */ + +static inline int32_t +isSeenTailFamilyChar(UChar ch) { + if(ch >= 0xfeb1 && ch < 0xfebf){ + return tailFamilyIsolatedFinal [ch - 0xFEB1]; + }else{ + return 0; + } +} + + /* Name : isSeenFamilyChar + * Function : returns 1 if the character is a seen family character in the Unicode + * 06 range otherwise returns 0 + */ + +static inline int32_t +isSeenFamilyChar(UChar ch){ + if(ch >= 0x633 && ch <= 0x636){ + return 1; + }else { + return 0; + } +} + +/*Start of BIDI*/ +/* + *Name : isAlefMaksouraChar + *Function : returns 1 if the character is a Alef Maksoura Final or isolated + * otherwise returns 0 + */ +static inline int32_t +isAlefMaksouraChar(UChar ch) { + return (int32_t)( (ch == 0xFEEF) || ( ch == 0xFEF0) || (ch == 0x0649)); +} + +/* + * Name : isYehHamzaChar + * Function : returns 1 if the character is a yehHamza isolated or yehhamza + * final is found otherwise returns 0 + */ +static inline int32_t +isYehHamzaChar(UChar ch) { + if((ch==0xFE89)||(ch==0xFE8A)){ + return 1; + }else{ + return 0; + } +} + + /* + * Name: isTashkeelOnTatweelChar + * Function: Checks if the Tashkeel Character is on Tatweel or not,if the + * Tashkeel on tatweel (FE range), it returns 1 else if the + * Tashkeel with shadda on tatweel (FC range)return 2 otherwise + * returns 0 + */ +static inline int32_t +isTashkeelOnTatweelChar(UChar ch){ + if(ch >= 0xfe70 && ch <= 0xfe7f && ch != NEW_TAIL_CHAR && ch != 0xFE75 && ch != SHADDA_TATWEEL_CHAR) + { + return tashkeelMedial [ch - 0xFE70]; + }else if( (ch >= 0xfcf2 && ch <= 0xfcf4) || (ch == SHADDA_TATWEEL_CHAR)) { + return 2; + }else{ + return 0; + } +} + +/* + * Name: isIsolatedTashkeelChar + * Function: Checks if the Tashkeel Character is in the isolated form + * (i.e. Unicode FE range) returns 1 else if the Tashkeel + * with shadda is in the isolated form (i.e. Unicode FC range) + * returns 2 otherwise returns 0 + */ +static inline int32_t +isIsolatedTashkeelChar(UChar ch){ + if(ch >= 0xfe70 && ch <= 0xfe7f && ch != NEW_TAIL_CHAR && ch != 0xFE75){ + return (1 - tashkeelMedial [ch - 0xFE70]); + }else if(ch >= 0xfc5e && ch <= 0xfc63){ + return 1; + }else{ + return 0; + } +} + + + + +/* + *Name : calculateSize + *Function : This function calculates the destSize to be used in preflighting + * when the destSize is equal to 0 + * It is used also to calculate the new destsize in case the + * destination buffer will be resized. + */ + +static int32_t +calculateSize(const UChar *source, int32_t sourceLength, +int32_t destSize,uint32_t options) { + int32_t i = 0; + + int lamAlefOption = 0; + int tashkeelOption = 0; + + destSize = sourceLength; + + if (((options&U_SHAPE_LETTERS_MASK) == U_SHAPE_LETTERS_SHAPE || + ((options&U_SHAPE_LETTERS_MASK) == U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED )) && + ((options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_RESIZE )){ + lamAlefOption = 1; + } + if((options&U_SHAPE_LETTERS_MASK) == U_SHAPE_LETTERS_SHAPE && + ((options&U_SHAPE_TASHKEEL_MASK) == U_SHAPE_TASHKEEL_RESIZE ) ){ + tashkeelOption = 1; + } + + if(lamAlefOption || tashkeelOption){ + if((options&U_SHAPE_TEXT_DIRECTION_MASK)==U_SHAPE_TEXT_DIRECTION_VISUAL_LTR) { + for(i=0;i<sourceLength;i++) { + if( ((isAlefChar(source[i]))&& (i<(sourceLength-1)) &&(source[i+1] == LAM_CHAR)) || (isTashkeelCharFE(source[i])) ) { + destSize--; + } + } + }else if((options&U_SHAPE_TEXT_DIRECTION_MASK)==U_SHAPE_TEXT_DIRECTION_LOGICAL) { + for(i=0;i<sourceLength;i++) { + if( ( (source[i] == LAM_CHAR) && (i<(sourceLength-1)) && (isAlefChar(source[i+1]))) || (isTashkeelCharFE(source[i])) ) { + destSize--; + } + } + } + } + + if ((options&U_SHAPE_LETTERS_MASK) == U_SHAPE_LETTERS_UNSHAPE){ + if ( (options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_RESIZE){ + for(i=0;i<sourceLength;i++) { + if(isLamAlefChar(source[i])) + destSize++; + } + } + } + + return destSize; +} + +/* + *Name : handleTashkeelWithTatweel + *Function : Replaces Tashkeel as following: + * Case 1 :if the Tashkeel on tatweel, replace it with Tatweel. + * Case 2 :if the Tashkeel aggregated with Shadda on Tatweel, replace + * it with Shadda on Tatweel. + * Case 3: if the Tashkeel is isolated replace it with Space. + * + */ +static int32_t +handleTashkeelWithTatweel(UChar *dest, int32_t sourceLength, + int32_t /*destSize*/, uint32_t /*options*/, + UErrorCode * /*pErrorCode*/) { + int i; + for(i = 0; i < sourceLength; i++){ + if((isTashkeelOnTatweelChar(dest[i]) == 1)){ + dest[i] = TATWEEL_CHAR; + }else if((isTashkeelOnTatweelChar(dest[i]) == 2)){ + dest[i] = SHADDA_TATWEEL_CHAR; + }else if(isIsolatedTashkeelChar(dest[i]) && dest[i] != SHADDA_CHAR){ + dest[i] = SPACE_CHAR; + } + } + return sourceLength; +} + + + +/* + *Name : handleGeneratedSpaces + *Function : The shapeUnicode function converts Lam + Alef into LamAlef + space, + * and Tashkeel to space. + * handleGeneratedSpaces function puts these generated spaces + * according to the options the user specifies. LamAlef and Tashkeel + * spaces can be replaced at begin, at end, at near or decrease the + * buffer size. + * + * There is also Auto option for LamAlef and tashkeel, which will put + * the spaces at end of the buffer (or end of text if the user used + * the option U_SHAPE_SPACES_RELATIVE_TO_TEXT_BEGIN_END). + * + * If the text type was visual_LTR and the option + * U_SHAPE_SPACES_RELATIVE_TO_TEXT_BEGIN_END was selected the END + * option will place the space at the beginning of the buffer and + * BEGIN will place the space at the end of the buffer. + */ + +static int32_t +handleGeneratedSpaces(UChar *dest, int32_t sourceLength, + int32_t destSize, + uint32_t options, + UErrorCode *pErrorCode,struct uShapeVariables shapeVars ) { + + int32_t i = 0, j = 0; + int32_t count = 0; + UChar *tempbuffer=NULL; + + int lamAlefOption = 0; + int tashkeelOption = 0; + int shapingMode = SHAPE_MODE; + + if (shapingMode == 0){ + if ( (options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_RESIZE ){ + lamAlefOption = 1; + } + if ( (options&U_SHAPE_TASHKEEL_MASK) == U_SHAPE_TASHKEEL_RESIZE ){ + tashkeelOption = 1; + } + } + + tempbuffer = (UChar *)uprv_malloc((sourceLength+1)*U_SIZEOF_UCHAR); + /* Test for NULL */ + if(tempbuffer == NULL) { + *pErrorCode = U_MEMORY_ALLOCATION_ERROR; + return 0; + } + + + if (lamAlefOption || tashkeelOption){ + uprv_memset(tempbuffer, 0, (sourceLength+1)*U_SIZEOF_UCHAR); + + i = j = 0; count = 0; + while(i < sourceLength) { + if ( (lamAlefOption && dest[i] == LAMALEF_SPACE_SUB) || + (tashkeelOption && dest[i] == TASHKEEL_SPACE_SUB) ){ + j--; + count++; + } else { + tempbuffer[j] = dest[i]; + } + i++; + j++; + } + + while(count >= 0) { + tempbuffer[i] = 0x0000; + i--; + count--; + } + + u_memcpy(dest, tempbuffer, sourceLength); + destSize = u_strlen(dest); + } + + lamAlefOption = 0; + + if (shapingMode == 0){ + if ( (options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_NEAR ){ + lamAlefOption = 1; + } + } + + if (lamAlefOption){ + /* Lam+Alef is already shaped into LamAlef + FFFF */ + i = 0; + while(i < sourceLength) { + if(lamAlefOption&&dest[i] == LAMALEF_SPACE_SUB){ + dest[i] = SPACE_CHAR; + } + i++; + } + destSize = sourceLength; + } + lamAlefOption = 0; + tashkeelOption = 0; + + if (shapingMode == 0) { + if ( ((options&U_SHAPE_LAMALEF_MASK) == shapeVars.uShapeLamalefBegin) || + (((options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_AUTO ) + && (shapeVars.spacesRelativeToTextBeginEnd==1)) ) { + lamAlefOption = 1; + } + if ( (options&U_SHAPE_TASHKEEL_MASK) == shapeVars.uShapeTashkeelBegin ) { + tashkeelOption = 1; + } + } + + if(lamAlefOption || tashkeelOption){ + uprv_memset(tempbuffer, 0, (sourceLength+1)*U_SIZEOF_UCHAR); + + i = j = sourceLength; count = 0; + + while(i >= 0) { + if ( (lamAlefOption && dest[i] == LAMALEF_SPACE_SUB) || + (tashkeelOption && dest[i] == TASHKEEL_SPACE_SUB) ){ + j++; + count++; + }else { + tempbuffer[j] = dest[i]; + } + i--; + j--; + } + + for(i=0 ;i < count; i++){ + tempbuffer[i] = SPACE_CHAR; + } + + u_memcpy(dest, tempbuffer, sourceLength); + destSize = sourceLength; + } + + + + lamAlefOption = 0; + tashkeelOption = 0; + + if (shapingMode == 0) { + if ( ((options&U_SHAPE_LAMALEF_MASK) == shapeVars.uShapeLamalefEnd) || + (((options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_AUTO ) + && (shapeVars.spacesRelativeToTextBeginEnd==0)) ) { + lamAlefOption = 1; + } + if ( (options&U_SHAPE_TASHKEEL_MASK) == shapeVars.uShapeTashkeelEnd ){ + tashkeelOption = 1; + } + } + + if(lamAlefOption || tashkeelOption){ + uprv_memset(tempbuffer, 0, (sourceLength+1)*U_SIZEOF_UCHAR); + + i = j = 0; count = 0; + while(i < sourceLength) { + if ( (lamAlefOption && dest[i] == LAMALEF_SPACE_SUB) || + (tashkeelOption && dest[i] == TASHKEEL_SPACE_SUB) ){ + j--; + count++; + }else { + tempbuffer[j] = dest[i]; + } + i++; + j++; + } + + while(count >= 0) { + tempbuffer[i] = SPACE_CHAR; + i--; + count--; + } + + u_memcpy(dest, tempbuffer, sourceLength); + destSize = sourceLength; + } + + + if(tempbuffer){ + uprv_free(tempbuffer); + } + + return destSize; +} + +/* + *Name :expandCompositCharAtBegin + *Function :Expands the LamAlef character to Lam and Alef consuming the required + * space from beginning of the buffer. If the text type was visual_LTR + * and the option U_SHAPE_SPACES_RELATIVE_TO_TEXT_BEGIN_END was selected + * the spaces will be located at end of buffer. + * If there are no spaces to expand the LamAlef, an error + * will be set to U_NO_SPACE_AVAILABLE as defined in utypes.h + */ + +static int32_t +expandCompositCharAtBegin(UChar *dest, int32_t sourceLength, int32_t destSize,UErrorCode *pErrorCode) { + int32_t i = 0,j = 0; + int32_t countl = 0; + UChar *tempbuffer=NULL; + + tempbuffer = (UChar *)uprv_malloc((sourceLength+1)*U_SIZEOF_UCHAR); + + /* Test for NULL */ + if(tempbuffer == NULL) { + *pErrorCode = U_MEMORY_ALLOCATION_ERROR; + return 0; + } + + uprv_memset(tempbuffer, 0, (sourceLength+1)*U_SIZEOF_UCHAR); + + i = 0; + while(dest[i] == SPACE_CHAR) { + countl++; + i++; + } + + i = j = sourceLength-1; + + while(i >= 0 && j >= 0) { + if( countl>0 && isLamAlefChar(dest[i])) { + tempbuffer[j] = LAM_CHAR; + /* to ensure the array index is within the range */ + U_ASSERT(dest[i] >= 0xFEF5u + && dest[i]-0xFEF5u < UPRV_LENGTHOF(convertLamAlef)); + tempbuffer[j-1] = convertLamAlef[ dest[i] - 0xFEF5 ]; + j--; + countl--; + }else { + if( countl == 0 && isLamAlefChar(dest[i]) ) { + *pErrorCode=U_NO_SPACE_AVAILABLE; + } + tempbuffer[j] = dest[i]; + } + i--; + j--; + } + u_memcpy(dest, tempbuffer, sourceLength); + + uprv_free(tempbuffer); + + destSize = sourceLength; + return destSize; +} + +/* + *Name : expandCompositCharAtEnd + *Function : Expands the LamAlef character to Lam and Alef consuming the + * required space from end of the buffer. If the text type was + * Visual LTR and the option U_SHAPE_SPACES_RELATIVE_TO_TEXT_BEGIN_END + * was used, the spaces will be consumed from begin of buffer. If + * there are no spaces to expand the LamAlef, an error + * will be set to U_NO_SPACE_AVAILABLE as defined in utypes.h + */ + +static int32_t +expandCompositCharAtEnd(UChar *dest, int32_t sourceLength, int32_t destSize,UErrorCode *pErrorCode) { + int32_t i = 0,j = 0; + + int32_t countr = 0; + int32_t inpsize = sourceLength; + + UChar *tempbuffer=NULL; + tempbuffer = (UChar *)uprv_malloc((sourceLength+1)*U_SIZEOF_UCHAR); + + /* Test for NULL */ + if(tempbuffer == NULL) { + *pErrorCode = U_MEMORY_ALLOCATION_ERROR; + return 0; + } + + uprv_memset(tempbuffer, 0, (sourceLength+1)*U_SIZEOF_UCHAR); + + while(dest[inpsize-1] == SPACE_CHAR) { + countr++; + inpsize--; + } + + i = sourceLength - countr - 1; + j = sourceLength - 1; + + while(i >= 0 && j >= 0) { + if( countr>0 && isLamAlefChar(dest[i]) ) { + tempbuffer[j] = LAM_CHAR; + tempbuffer[j-1] = convertLamAlef[ dest[i] - 0xFEF5 ]; + j--; + countr--; + }else { + if ((countr == 0) && isLamAlefChar(dest[i]) ) { + *pErrorCode=U_NO_SPACE_AVAILABLE; + } + tempbuffer[j] = dest[i]; + } + i--; + j--; + } + + if(countr > 0) { + u_memmove(tempbuffer, tempbuffer+countr, sourceLength); + if(u_strlen(tempbuffer) < sourceLength) { + for(i=sourceLength-1;i>=sourceLength-countr;i--) { + tempbuffer[i] = SPACE_CHAR; + } + } + } + u_memcpy(dest, tempbuffer, sourceLength); + + uprv_free(tempbuffer); + + destSize = sourceLength; + return destSize; +} + +/* + *Name : expandCompositCharAtNear + *Function : Expands the LamAlef character into Lam + Alef, YehHamza character + * into Yeh + Hamza, SeenFamily character into SeenFamily character + * + Tail, while consuming the space next to the character. + * If there are no spaces next to the character, an error + * will be set to U_NO_SPACE_AVAILABLE as defined in utypes.h + */ + +static int32_t +expandCompositCharAtNear(UChar *dest, int32_t sourceLength, int32_t destSize,UErrorCode *pErrorCode, + int yehHamzaOption, int seenTailOption, int lamAlefOption, struct uShapeVariables shapeVars) { + int32_t i = 0; + + + UChar lamalefChar, yehhamzaChar; + + for(i = 0 ;i<=sourceLength-1;i++) { + if (seenTailOption && isSeenTailFamilyChar(dest[i])) { + if ((i>0) && (dest[i-1] == SPACE_CHAR) ) { + dest[i-1] = shapeVars.tailChar; + }else { + *pErrorCode=U_NO_SPACE_AVAILABLE; + } + }else if(yehHamzaOption && (isYehHamzaChar(dest[i])) ) { + if ((i>0) && (dest[i-1] == SPACE_CHAR) ) { + yehhamzaChar = dest[i]; + dest[i] = yehHamzaToYeh[yehhamzaChar - YEH_HAMZAFE_CHAR]; + dest[i-1] = HAMZAFE_CHAR; + }else { + + *pErrorCode=U_NO_SPACE_AVAILABLE; + } + }else if(lamAlefOption && isLamAlefChar(dest[i+1])) { + if(dest[i] == SPACE_CHAR){ + lamalefChar = dest[i+1]; + dest[i+1] = LAM_CHAR; + dest[i] = convertLamAlef[ lamalefChar - 0xFEF5 ]; + }else { + *pErrorCode=U_NO_SPACE_AVAILABLE; + } + } + } + destSize = sourceLength; + return destSize; +} + /* + * Name : expandCompositChar + * Function : LamAlef, need special handling, since it expands from one + * character into two characters while shaping or deshaping. + * In order to expand it, near or far spaces according to the + * options user specifies. Also buffer size can be increased. + * + * For SeenFamily characters and YehHamza only the near option is + * supported, while for LamAlef we can take spaces from begin, end, + * near or even increase the buffer size. + * There is also the Auto option for LamAlef only, which will first + * search for a space at end, begin then near, respectively. + * If there are no spaces to expand these characters, an error will be set to + * U_NO_SPACE_AVAILABLE as defined in utypes.h + */ + +static int32_t +expandCompositChar(UChar *dest, int32_t sourceLength, + int32_t destSize,uint32_t options, + UErrorCode *pErrorCode, int shapingMode,struct uShapeVariables shapeVars) { + + int32_t i = 0,j = 0; + + UChar *tempbuffer=NULL; + int yehHamzaOption = 0; + int seenTailOption = 0; + int lamAlefOption = 0; + + if (shapingMode == 1){ + if ( (options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_AUTO){ + + if(shapeVars.spacesRelativeToTextBeginEnd == 0) { + destSize = expandCompositCharAtEnd(dest, sourceLength, destSize, pErrorCode); + + if(*pErrorCode == U_NO_SPACE_AVAILABLE) { + *pErrorCode = U_ZERO_ERROR; + destSize = expandCompositCharAtBegin(dest, sourceLength, destSize, pErrorCode); + } + }else { + destSize = expandCompositCharAtBegin(dest, sourceLength, destSize, pErrorCode); + + if(*pErrorCode == U_NO_SPACE_AVAILABLE) { + *pErrorCode = U_ZERO_ERROR; + destSize = expandCompositCharAtEnd(dest, sourceLength, destSize, pErrorCode); + } + } + + if(*pErrorCode == U_NO_SPACE_AVAILABLE) { + *pErrorCode = U_ZERO_ERROR; + destSize = expandCompositCharAtNear(dest, sourceLength, destSize, pErrorCode, yehHamzaOption, + seenTailOption, 1,shapeVars); + } + } + } + + if (shapingMode == 1){ + if ( (options&U_SHAPE_LAMALEF_MASK) == shapeVars.uShapeLamalefEnd){ + destSize = expandCompositCharAtEnd(dest, sourceLength, destSize, pErrorCode); + } + } + + if (shapingMode == 1){ + if ( (options&U_SHAPE_LAMALEF_MASK) == shapeVars.uShapeLamalefBegin){ + destSize = expandCompositCharAtBegin(dest, sourceLength, destSize, pErrorCode); + } + } + + if (shapingMode == 0){ + if ((options&U_SHAPE_YEHHAMZA_MASK) == U_SHAPE_YEHHAMZA_TWOCELL_NEAR){ + yehHamzaOption = 1; + } + if ((options&U_SHAPE_SEEN_MASK) == U_SHAPE_SEEN_TWOCELL_NEAR){ + seenTailOption = 1; + } + } + if (shapingMode == 1) { + if ( (options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_NEAR) { + lamAlefOption = 1; + } + } + + + if (yehHamzaOption || seenTailOption || lamAlefOption){ + destSize = expandCompositCharAtNear(dest, sourceLength, destSize, pErrorCode, yehHamzaOption, + seenTailOption,lamAlefOption,shapeVars); + } + + + if (shapingMode == 1){ + if ( (options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_RESIZE){ + destSize = calculateSize(dest,sourceLength,destSize,options); + tempbuffer = (UChar *)uprv_malloc((destSize+1)*U_SIZEOF_UCHAR); + + /* Test for NULL */ + if(tempbuffer == NULL) { + *pErrorCode = U_MEMORY_ALLOCATION_ERROR; + return 0; + } + + uprv_memset(tempbuffer, 0, (destSize+1)*U_SIZEOF_UCHAR); + + i = j = 0; + while(i < destSize && j < destSize) { + if(isLamAlefChar(dest[i]) ) { + tempbuffer[j] = convertLamAlef[ dest[i] - 0xFEF5 ]; + tempbuffer[j+1] = LAM_CHAR; + j++; + }else { + tempbuffer[j] = dest[i]; + } + i++; + j++; + } + + u_memcpy(dest, tempbuffer, destSize); + } + } + + if(tempbuffer) { + uprv_free(tempbuffer); + } + return destSize; +} + +/* + *Name : shapeUnicode + *Function : Converts an Arabic Unicode buffer in 06xx Range into a shaped + * arabic Unicode buffer in FExx Range + */ +static int32_t +shapeUnicode(UChar *dest, int32_t sourceLength, + int32_t destSize,uint32_t options, + UErrorCode *pErrorCode, + int tashkeelFlag, struct uShapeVariables shapeVars) { + + int32_t i, iend; + int32_t step; + int32_t lastPos,Nx, Nw; + unsigned int Shape; + int32_t lamalef_found = 0; + int32_t seenfamFound = 0, yehhamzaFound =0, tashkeelFound = 0; + UChar prevLink = 0, lastLink = 0, currLink, nextLink = 0; + UChar wLamalef; + + /* + * Converts the input buffer from FExx Range into 06xx Range + * to make sure that all characters are in the 06xx range + * even the lamalef is converted to the special region in + * the 06xx range + */ + if ((options & U_SHAPE_PRESERVE_PRESENTATION_MASK) == U_SHAPE_PRESERVE_PRESENTATION_NOOP) { + for (i = 0; i < sourceLength; i++) { + UChar inputChar = dest[i]; + if ( (inputChar >= 0xFB50) && (inputChar <= 0xFBFF)) { + UChar c = convertFBto06 [ (inputChar - 0xFB50) ]; + if (c != 0) + dest[i] = c; + } else if ( (inputChar >= 0xFE70) && (inputChar <= 0xFEFC)) { + dest[i] = convertFEto06 [ (inputChar - 0xFE70) ] ; + } else { + dest[i] = inputChar ; + } + } + } + + + /* sets the index to the end of the buffer, together with the step point to -1 */ + i = sourceLength - 1; + iend = -1; + step = -1; + + /* + * This function resolves the link between the characters . + * Arabic characters have four forms : + * Isolated Form, Initial Form, Middle Form and Final Form + */ + currLink = getLink(dest[i]); + + lastPos = i; + Nx = -2, Nw = 0; + + while (i != iend) { + /* If high byte of currLink > 0 then more than one shape */ + if ((currLink & 0xFF00) > 0 || (getLink(dest[i]) & IRRELEVANT) != 0) { + Nw = i + step; + while (Nx < 0) { /* we need to know about next char */ + if(Nw == iend) { + nextLink = 0; + Nx = 3000; + } else { + nextLink = getLink(dest[Nw]); + if((nextLink & IRRELEVANT) == 0) { + Nx = Nw; + } else { + Nw = Nw + step; + } + } + } + + if ( ((currLink & ALEFTYPE) > 0) && ((lastLink & LAMTYPE) > 0) ) { + lamalef_found = 1; + wLamalef = changeLamAlef(dest[i]); /*get from 0x065C-0x065f */ + if ( wLamalef != 0) { + dest[i] = LAMALEF_SPACE_SUB; /* The default case is to drop the Alef and replace */ + dest[lastPos] =wLamalef; /* it by LAMALEF_SPACE_SUB which is the last character in the */ + i=lastPos; /* unicode private use area, this is done to make */ + } /* sure that removeLamAlefSpaces() handles only the */ + lastLink = prevLink; /* spaces generated during lamalef generation. */ + currLink = getLink(wLamalef); /* LAMALEF_SPACE_SUB is added here and is replaced by spaces */ + } /* in removeLamAlefSpaces() */ + + if ((i > 0) && (dest[i-1] == SPACE_CHAR)){ + if ( isSeenFamilyChar(dest[i])) { + seenfamFound = 1; + } else if (dest[i] == YEH_HAMZA_CHAR) { + yehhamzaFound = 1; + } + } + else if(i==0){ + if ( isSeenFamilyChar(dest[i])){ + seenfamFound = 1; + } else if (dest[i] == YEH_HAMZA_CHAR) { + yehhamzaFound = 1; + } + } + + /* + * get the proper shape according to link ability of neighbors + * and of character; depends on the order of the shapes + * (isolated, initial, middle, final) in the compatibility area + */ + Shape = shapeTable[nextLink & (LINKR + LINKL)] + [lastLink & (LINKR + LINKL)] + [currLink & (LINKR + LINKL)]; + + if ((currLink & (LINKR+LINKL)) == 1) { + Shape &= 1; + } else if(isTashkeelChar(dest[i])) { + if( (lastLink & LINKL) && (nextLink & LINKR) && (tashkeelFlag == 1) && + dest[i] != 0x064C && dest[i] != 0x064D ) + { + Shape = 1; + if( (nextLink&ALEFTYPE) == ALEFTYPE && (lastLink&LAMTYPE) == LAMTYPE ) { + Shape = 0; + } + } else if(tashkeelFlag == 2 && dest[i] == SHADDA06_CHAR){ + Shape = 1; + } else { + Shape = 0; + } + } + if ((dest[i] ^ 0x0600) < 0x100) { + if ( isTashkeelChar(dest[i]) ){ + if (tashkeelFlag == 2 && dest[i] != SHADDA06_CHAR){ + dest[i] = TASHKEEL_SPACE_SUB; + tashkeelFound = 1; + } else { + /* to ensure the array index is within the range */ + U_ASSERT(dest[i] >= 0x064Bu + && dest[i]-0x064Bu < UPRV_LENGTHOF(IrrelevantPos)); + dest[i] = 0xFE70 + IrrelevantPos[(dest[i] - 0x064B)] + static_cast<UChar>(Shape); + } + }else if ((currLink & APRESENT) > 0) { + dest[i] = (UChar)(0xFB50 + (currLink >> 8) + Shape); + }else if ((currLink >> 8) > 0 && (currLink & IRRELEVANT) == 0) { + dest[i] = (UChar)(0xFE70 + (currLink >> 8) + Shape); + } + } + } + + /* move one notch forward */ + if ((currLink & IRRELEVANT) == 0) { + prevLink = lastLink; + lastLink = currLink; + lastPos = i; + } + + i = i + step; + if (i == Nx) { + currLink = nextLink; + Nx = -2; + } else if(i != iend) { + currLink = getLink(dest[i]); + } + } + destSize = sourceLength; + if ( (lamalef_found != 0 ) || (tashkeelFound != 0) ){ + destSize = handleGeneratedSpaces(dest,sourceLength,destSize,options,pErrorCode, shapeVars); + } + + if ( (seenfamFound != 0) || (yehhamzaFound != 0) ) { + destSize = expandCompositChar(dest, sourceLength,destSize,options,pErrorCode, SHAPE_MODE,shapeVars); + } + return destSize; +} + +/* + *Name : deShapeUnicode + *Function : Converts an Arabic Unicode buffer in FExx Range into unshaped + * arabic Unicode buffer in 06xx Range + */ +static int32_t +deShapeUnicode(UChar *dest, int32_t sourceLength, + int32_t destSize,uint32_t options, + UErrorCode *pErrorCode, struct uShapeVariables shapeVars) { + int32_t i = 0; + int32_t lamalef_found = 0; + int32_t yehHamzaComposeEnabled = 0; + int32_t seenComposeEnabled = 0; + + yehHamzaComposeEnabled = ((options&U_SHAPE_YEHHAMZA_MASK) == U_SHAPE_YEHHAMZA_TWOCELL_NEAR) ? 1 : 0; + seenComposeEnabled = ((options&U_SHAPE_SEEN_MASK) == U_SHAPE_SEEN_TWOCELL_NEAR)? 1 : 0; + + /* + *This for loop changes the buffer from the Unicode FE range to + *the Unicode 06 range + */ + + for(i = 0; i < sourceLength; i++) { + UChar inputChar = dest[i]; + if ( (inputChar >= 0xFB50) && (inputChar <= 0xFBFF)) { /* FBxx Arabic range */ + UChar c = convertFBto06 [ (inputChar - 0xFB50) ]; + if (c != 0) + dest[i] = c; + } else if( (yehHamzaComposeEnabled == 1) && ((inputChar == HAMZA06_CHAR) || (inputChar == HAMZAFE_CHAR)) + && (i < (sourceLength - 1)) && isAlefMaksouraChar(dest[i+1] )) { + dest[i] = SPACE_CHAR; + dest[i+1] = YEH_HAMZA_CHAR; + } else if ( (seenComposeEnabled == 1) && (isTailChar(inputChar)) && (i< (sourceLength - 1)) + && (isSeenTailFamilyChar(dest[i+1])) ) { + dest[i] = SPACE_CHAR; + } else if (( inputChar >= 0xFE70) && (inputChar <= 0xFEF4 )) { /* FExx Arabic range */ + dest[i] = convertFEto06 [ (inputChar - 0xFE70) ]; + } else { + dest[i] = inputChar ; + } + + if( isLamAlefChar(dest[i]) ) + lamalef_found = 1; + } + + destSize = sourceLength; + if (lamalef_found != 0){ + destSize = expandCompositChar(dest,sourceLength,destSize,options,pErrorCode,DESHAPE_MODE, shapeVars); + } + return destSize; +} + +/* + **************************************** + * u_shapeArabic + **************************************** + */ + +U_CAPI int32_t U_EXPORT2 +u_shapeArabic(const UChar *source, int32_t sourceLength, + UChar *dest, int32_t destCapacity, + uint32_t options, + UErrorCode *pErrorCode) { + + int32_t destLength; + struct uShapeVariables shapeVars = { OLD_TAIL_CHAR,U_SHAPE_LAMALEF_BEGIN,U_SHAPE_LAMALEF_END,U_SHAPE_TASHKEEL_BEGIN,U_SHAPE_TASHKEEL_END,0}; + + /* usual error checking */ + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return 0; + } + + /* make sure that no reserved options values are used; allow dest==NULL only for preflighting */ + if( source==NULL || sourceLength<-1 || (dest==NULL && destCapacity!=0) || destCapacity<0 || + (((options&U_SHAPE_TASHKEEL_MASK) > 0) && + ((options&U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED) == U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED) ) || + (((options&U_SHAPE_TASHKEEL_MASK) > 0) && + ((options&U_SHAPE_LETTERS_MASK) == U_SHAPE_LETTERS_UNSHAPE)) || + (options&U_SHAPE_DIGIT_TYPE_RESERVED)==U_SHAPE_DIGIT_TYPE_RESERVED || + (options&U_SHAPE_DIGITS_MASK)==U_SHAPE_DIGITS_RESERVED || + ((options&U_SHAPE_LAMALEF_MASK) != U_SHAPE_LAMALEF_RESIZE && + (options&U_SHAPE_AGGREGATE_TASHKEEL_MASK) != 0) || + ((options&U_SHAPE_AGGREGATE_TASHKEEL_MASK) == U_SHAPE_AGGREGATE_TASHKEEL && + (options&U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED) != U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED) + ) + { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + /* Validate lamalef options */ + if(((options&U_SHAPE_LAMALEF_MASK) > 0)&& + !(((options & U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_BEGIN) || + ((options & U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_END ) || + ((options & U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_RESIZE )|| + ((options & U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_AUTO) || + ((options & U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_NEAR))) + { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + /* Validate Tashkeel options */ + if(((options&U_SHAPE_TASHKEEL_MASK) > 0)&& + !(((options & U_SHAPE_TASHKEEL_MASK)==U_SHAPE_TASHKEEL_BEGIN) || + ((options & U_SHAPE_TASHKEEL_MASK)==U_SHAPE_TASHKEEL_END ) + ||((options & U_SHAPE_TASHKEEL_MASK)==U_SHAPE_TASHKEEL_RESIZE )|| + ((options & U_SHAPE_TASHKEEL_MASK)==U_SHAPE_TASHKEEL_REPLACE_BY_TATWEEL))) + { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + /* determine the source length */ + if(sourceLength==-1) { + sourceLength=u_strlen(source); + } + if(sourceLength<=0) { + return u_terminateUChars(dest, destCapacity, 0, pErrorCode); + } + + /* check that source and destination do not overlap */ + if( dest!=NULL && + ((source<=dest && dest<source+sourceLength) || + (dest<=source && source<dest+destCapacity))) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + /* Does Options contain the new Seen Tail Unicode code point option */ + if ( (options&U_SHAPE_TAIL_TYPE_MASK) == U_SHAPE_TAIL_NEW_UNICODE){ + shapeVars.tailChar = NEW_TAIL_CHAR; + }else { + shapeVars.tailChar = OLD_TAIL_CHAR; + } + + if((options&U_SHAPE_LETTERS_MASK)!=U_SHAPE_LETTERS_NOOP) { + UChar buffer[300]; + UChar *tempbuffer, *tempsource = NULL; + int32_t outputSize, spacesCountl=0, spacesCountr=0; + + if((options&U_SHAPE_AGGREGATE_TASHKEEL_MASK)>0) { + int32_t logical_order = (options&U_SHAPE_TEXT_DIRECTION_MASK) == U_SHAPE_TEXT_DIRECTION_LOGICAL; + int32_t aggregate_tashkeel = + (options&(U_SHAPE_AGGREGATE_TASHKEEL_MASK+U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED)) == + (U_SHAPE_AGGREGATE_TASHKEEL+U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED); + int step=logical_order?1:-1; + int j=logical_order?-1:2*sourceLength; + int i=logical_order?-1:sourceLength; + int end=logical_order?sourceLength:-1; + int aggregation_possible = 1; + UChar prev = 0; + UChar prevLink, currLink = 0; + int newSourceLength = 0; + tempsource = (UChar *)uprv_malloc(2*sourceLength*U_SIZEOF_UCHAR); + if(tempsource == NULL) { + *pErrorCode = U_MEMORY_ALLOCATION_ERROR; + return 0; + } + + while ((i+=step) != end) { + prevLink = currLink; + currLink = getLink(source[i]); + if (aggregate_tashkeel && ((prevLink|currLink)&COMBINE) == COMBINE && aggregation_possible) { + aggregation_possible = 0; + tempsource[j] = (prev<source[i]?prev:source[i])-0x064C+0xFC5E; + currLink = getLink(tempsource[j]); + } else { + aggregation_possible = 1; + tempsource[j+=step] = source[i]; + prev = source[i]; + newSourceLength++; + } + } + source = tempsource+(logical_order?0:j); + sourceLength = newSourceLength; + } + + /* calculate destination size */ + /* TODO: do we ever need to do this pure preflighting? */ + if(((options&U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_RESIZE) || + ((options&U_SHAPE_TASHKEEL_MASK)==U_SHAPE_TASHKEEL_RESIZE)) { + outputSize=calculateSize(source,sourceLength,destCapacity,options); + } else { + outputSize=sourceLength; + } + + if(outputSize>destCapacity) { + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + if (tempsource != NULL) uprv_free(tempsource); + return outputSize; + } + + /* + * need a temporary buffer of size max(outputSize, sourceLength) + * because at first we copy source->temp + */ + if(sourceLength>outputSize) { + outputSize=sourceLength; + } + + /* Start of Arabic letter shaping part */ + if(outputSize<=UPRV_LENGTHOF(buffer)) { + outputSize=UPRV_LENGTHOF(buffer); + tempbuffer=buffer; + } else { + tempbuffer = (UChar *)uprv_malloc(outputSize*U_SIZEOF_UCHAR); + + /*Test for NULL*/ + if(tempbuffer == NULL) { + *pErrorCode = U_MEMORY_ALLOCATION_ERROR; + if (tempsource != NULL) uprv_free(tempsource); + return 0; + } + } + u_memcpy(tempbuffer, source, sourceLength); + if (tempsource != NULL){ + uprv_free(tempsource); + } + + if(sourceLength<outputSize) { + uprv_memset(tempbuffer+sourceLength, 0, (outputSize-sourceLength)*U_SIZEOF_UCHAR); + } + + if((options&U_SHAPE_TEXT_DIRECTION_MASK) == U_SHAPE_TEXT_DIRECTION_LOGICAL) { + countSpaces(tempbuffer,sourceLength,options,&spacesCountl,&spacesCountr); + invertBuffer(tempbuffer,sourceLength,options,spacesCountl,spacesCountr); + } + + if((options&U_SHAPE_TEXT_DIRECTION_MASK) == U_SHAPE_TEXT_DIRECTION_VISUAL_LTR) { + if((options&U_SHAPE_SPACES_RELATIVE_TO_TEXT_MASK) == U_SHAPE_SPACES_RELATIVE_TO_TEXT_BEGIN_END) { + shapeVars.spacesRelativeToTextBeginEnd = 1; + shapeVars.uShapeLamalefBegin = U_SHAPE_LAMALEF_END; + shapeVars.uShapeLamalefEnd = U_SHAPE_LAMALEF_BEGIN; + shapeVars.uShapeTashkeelBegin = U_SHAPE_TASHKEEL_END; + shapeVars.uShapeTashkeelEnd = U_SHAPE_TASHKEEL_BEGIN; + } + } + + switch(options&U_SHAPE_LETTERS_MASK) { + case U_SHAPE_LETTERS_SHAPE : + if( (options&U_SHAPE_TASHKEEL_MASK)> 0 + && ((options&U_SHAPE_TASHKEEL_MASK) !=U_SHAPE_TASHKEEL_REPLACE_BY_TATWEEL)) { + /* Call the shaping function with tashkeel flag == 2 for removal of tashkeel */ + destLength = shapeUnicode(tempbuffer,sourceLength,destCapacity,options,pErrorCode,2,shapeVars); + }else { + /* default Call the shaping function with tashkeel flag == 1 */ + destLength = shapeUnicode(tempbuffer,sourceLength,destCapacity,options,pErrorCode,1,shapeVars); + + /*After shaping text check if user wants to remove tashkeel and replace it with tatweel*/ + if( (options&U_SHAPE_TASHKEEL_MASK) == U_SHAPE_TASHKEEL_REPLACE_BY_TATWEEL){ + destLength = handleTashkeelWithTatweel(tempbuffer,destLength,destCapacity,options,pErrorCode); + } + } + break; + case U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED : + /* Call the shaping function with tashkeel flag == 0 */ + destLength = shapeUnicode(tempbuffer,sourceLength,destCapacity,options,pErrorCode,0,shapeVars); + break; + + case U_SHAPE_LETTERS_UNSHAPE : + /* Call the deshaping function */ + destLength = deShapeUnicode(tempbuffer,sourceLength,destCapacity,options,pErrorCode,shapeVars); + break; + default : + /* will never occur because of validity checks above */ + destLength = 0; + break; + } + + /* + * TODO: (markus 2002aug01) + * For as long as we always preflight the outputSize above + * we should U_ASSERT(outputSize==destLength) + * except for the adjustment above before the tempbuffer allocation + */ + + if((options&U_SHAPE_TEXT_DIRECTION_MASK) == U_SHAPE_TEXT_DIRECTION_LOGICAL) { + countSpaces(tempbuffer,destLength,options,&spacesCountl,&spacesCountr); + invertBuffer(tempbuffer,destLength,options,spacesCountl,spacesCountr); + } + u_memcpy(dest, tempbuffer, uprv_min(destLength, destCapacity)); + + if(tempbuffer!=buffer) { + uprv_free(tempbuffer); + } + + if(destLength>destCapacity) { + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + return destLength; + } + + /* End of Arabic letter shaping part */ + } else { + /* + * No letter shaping: + * just make sure the destination is large enough and copy the string. + */ + if(destCapacity<sourceLength) { + /* this catches preflighting, too */ + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + return sourceLength; + } + u_memcpy(dest, source, sourceLength); + destLength=sourceLength; + } + + /* + * Perform number shaping. + * With UTF-16 or UTF-32, the length of the string is constant. + * The easiest way to do this is to operate on the destination and + * "shape" the digits in-place. + */ + if((options&U_SHAPE_DIGITS_MASK)!=U_SHAPE_DIGITS_NOOP) { + UChar digitBase; + int32_t i; + + /* select the requested digit group */ + switch(options&U_SHAPE_DIGIT_TYPE_MASK) { + case U_SHAPE_DIGIT_TYPE_AN: + digitBase=0x660; /* Unicode: "Arabic-Indic digits" */ + break; + case U_SHAPE_DIGIT_TYPE_AN_EXTENDED: + digitBase=0x6f0; /* Unicode: "Eastern Arabic-Indic digits (Persian and Urdu)" */ + break; + default: + /* will never occur because of validity checks above */ + digitBase=0; + break; + } + + /* perform the requested operation */ + switch(options&U_SHAPE_DIGITS_MASK) { + case U_SHAPE_DIGITS_EN2AN: + /* add (digitBase-'0') to each European (ASCII) digit code point */ + digitBase-=0x30; + for(i=0; i<destLength; ++i) { + if(((uint32_t)dest[i]-0x30)<10) { + dest[i]+=digitBase; + } + } + break; + case U_SHAPE_DIGITS_AN2EN: + /* subtract (digitBase-'0') from each Arabic digit code point */ + for(i=0; i<destLength; ++i) { + if(((uint32_t)dest[i]-(uint32_t)digitBase)<10) { + dest[i]-=digitBase-0x30; + } + } + break; + case U_SHAPE_DIGITS_ALEN2AN_INIT_LR: + _shapeToArabicDigitsWithContext(dest, destLength, + digitBase, + (UBool)((options&U_SHAPE_TEXT_DIRECTION_MASK)==U_SHAPE_TEXT_DIRECTION_LOGICAL), + FALSE); + break; + case U_SHAPE_DIGITS_ALEN2AN_INIT_AL: + _shapeToArabicDigitsWithContext(dest, destLength, + digitBase, + (UBool)((options&U_SHAPE_TEXT_DIRECTION_MASK)==U_SHAPE_TEXT_DIRECTION_LOGICAL), + TRUE); + break; + default: + /* will never occur because of validity checks above */ + break; + } + } + + return u_terminateUChars(dest, destCapacity, destLength, pErrorCode); +} diff --git a/thirdparty/icu4c/common/usprep.cpp b/thirdparty/icu4c/common/usprep.cpp new file mode 100644 index 0000000000..8351a77370 --- /dev/null +++ b/thirdparty/icu4c/common/usprep.cpp @@ -0,0 +1,871 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* + ******************************************************************************* + * + * Copyright (C) 2003-2016, International Business Machines + * Corporation and others. All Rights Reserved. + * + ******************************************************************************* + * file name: usprep.cpp + * encoding: UTF-8 + * tab size: 8 (not used) + * indentation:4 + * + * created on: 2003jul2 + * created by: Ram Viswanadha + */ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_IDNA + +#include "unicode/usprep.h" + +#include "unicode/normalizer2.h" +#include "unicode/ustring.h" +#include "unicode/uchar.h" +#include "unicode/uversion.h" +#include "umutex.h" +#include "cmemory.h" +#include "sprpimpl.h" +#include "ustr_imp.h" +#include "uhash.h" +#include "cstring.h" +#include "udataswp.h" +#include "ucln_cmn.h" +#include "ubidi_props.h" +#include "uprops.h" + +U_NAMESPACE_USE + +U_CDECL_BEGIN + +/* +Static cache for already opened StringPrep profiles +*/ +static UHashtable *SHARED_DATA_HASHTABLE = NULL; +static icu::UInitOnce gSharedDataInitOnce = U_INITONCE_INITIALIZER; + +static UMutex usprepMutex; +/* format version of spp file */ +//static uint8_t formatVersion[4]={ 0, 0, 0, 0 }; + +/* the Unicode version of the sprep data */ +static UVersionInfo dataVersion={ 0, 0, 0, 0 }; + +/* Profile names must be aligned to UStringPrepProfileType */ +static const char * const PROFILE_NAMES[] = { + "rfc3491", /* USPREP_RFC3491_NAMEPREP */ + "rfc3530cs", /* USPREP_RFC3530_NFS4_CS_PREP */ + "rfc3530csci", /* USPREP_RFC3530_NFS4_CS_PREP_CI */ + "rfc3491", /* USPREP_RFC3530_NSF4_CIS_PREP */ + "rfc3530mixp", /* USPREP_RFC3530_NSF4_MIXED_PREP_PREFIX */ + "rfc3491", /* USPREP_RFC3530_NSF4_MIXED_PREP_SUFFIX */ + "rfc3722", /* USPREP_RFC3722_ISCSI */ + "rfc3920node", /* USPREP_RFC3920_NODEPREP */ + "rfc3920res", /* USPREP_RFC3920_RESOURCEPREP */ + "rfc4011", /* USPREP_RFC4011_MIB */ + "rfc4013", /* USPREP_RFC4013_SASLPREP */ + "rfc4505", /* USPREP_RFC4505_TRACE */ + "rfc4518", /* USPREP_RFC4518_LDAP */ + "rfc4518ci", /* USPREP_RFC4518_LDAP_CI */ +}; + +static UBool U_CALLCONV +isSPrepAcceptable(void * /* context */, + const char * /* type */, + const char * /* name */, + const UDataInfo *pInfo) { + if( + pInfo->size>=20 && + pInfo->isBigEndian==U_IS_BIG_ENDIAN && + pInfo->charsetFamily==U_CHARSET_FAMILY && + pInfo->dataFormat[0]==0x53 && /* dataFormat="SPRP" */ + pInfo->dataFormat[1]==0x50 && + pInfo->dataFormat[2]==0x52 && + pInfo->dataFormat[3]==0x50 && + pInfo->formatVersion[0]==3 && + pInfo->formatVersion[2]==UTRIE_SHIFT && + pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT + ) { + //uprv_memcpy(formatVersion, pInfo->formatVersion, 4); + uprv_memcpy(dataVersion, pInfo->dataVersion, 4); + return TRUE; + } else { + return FALSE; + } +} + +static int32_t U_CALLCONV +getSPrepFoldingOffset(uint32_t data) { + + return (int32_t)data; + +} + +/* hashes an entry */ +static int32_t U_CALLCONV +hashEntry(const UHashTok parm) { + UStringPrepKey *b = (UStringPrepKey *)parm.pointer; + UHashTok namekey, pathkey; + namekey.pointer = b->name; + pathkey.pointer = b->path; + uint32_t unsignedHash = static_cast<uint32_t>(uhash_hashChars(namekey)) + + 37u * static_cast<uint32_t>(uhash_hashChars(pathkey)); + return static_cast<int32_t>(unsignedHash); +} + +/* compares two entries */ +static UBool U_CALLCONV +compareEntries(const UHashTok p1, const UHashTok p2) { + UStringPrepKey *b1 = (UStringPrepKey *)p1.pointer; + UStringPrepKey *b2 = (UStringPrepKey *)p2.pointer; + UHashTok name1, name2, path1, path2; + name1.pointer = b1->name; + name2.pointer = b2->name; + path1.pointer = b1->path; + path2.pointer = b2->path; + return ((UBool)(uhash_compareChars(name1, name2) & + uhash_compareChars(path1, path2))); +} + +static void +usprep_unload(UStringPrepProfile* data){ + udata_close(data->sprepData); +} + +static int32_t +usprep_internal_flushCache(UBool noRefCount){ + UStringPrepProfile *profile = NULL; + UStringPrepKey *key = NULL; + int32_t pos = UHASH_FIRST; + int32_t deletedNum = 0; + const UHashElement *e; + + /* + * if shared data hasn't even been lazy evaluated yet + * return 0 + */ + umtx_lock(&usprepMutex); + if (SHARED_DATA_HASHTABLE == NULL) { + umtx_unlock(&usprepMutex); + return 0; + } + + /*creates an enumeration to iterate through every element in the table */ + while ((e = uhash_nextElement(SHARED_DATA_HASHTABLE, &pos)) != NULL) + { + profile = (UStringPrepProfile *) e->value.pointer; + key = (UStringPrepKey *) e->key.pointer; + + if ((noRefCount== FALSE && profile->refCount == 0) || + noRefCount== TRUE) { + deletedNum++; + uhash_removeElement(SHARED_DATA_HASHTABLE, e); + + /* unload the data */ + usprep_unload(profile); + + if(key->name != NULL) { + uprv_free(key->name); + key->name=NULL; + } + if(key->path != NULL) { + uprv_free(key->path); + key->path=NULL; + } + uprv_free(profile); + uprv_free(key); + } + + } + umtx_unlock(&usprepMutex); + + return deletedNum; +} + +/* Works just like ucnv_flushCache() +static int32_t +usprep_flushCache(){ + return usprep_internal_flushCache(FALSE); +} +*/ + +static UBool U_CALLCONV usprep_cleanup(void){ + if (SHARED_DATA_HASHTABLE != NULL) { + usprep_internal_flushCache(TRUE); + if (SHARED_DATA_HASHTABLE != NULL && uhash_count(SHARED_DATA_HASHTABLE) == 0) { + uhash_close(SHARED_DATA_HASHTABLE); + SHARED_DATA_HASHTABLE = NULL; + } + } + gSharedDataInitOnce.reset(); + return (SHARED_DATA_HASHTABLE == NULL); +} +U_CDECL_END + + +/** Initializes the cache for resources */ +static void U_CALLCONV +createCache(UErrorCode &status) { + SHARED_DATA_HASHTABLE = uhash_open(hashEntry, compareEntries, NULL, &status); + if (U_FAILURE(status)) { + SHARED_DATA_HASHTABLE = NULL; + } + ucln_common_registerCleanup(UCLN_COMMON_USPREP, usprep_cleanup); +} + +static void +initCache(UErrorCode *status) { + umtx_initOnce(gSharedDataInitOnce, &createCache, *status); +} + +static UBool U_CALLCONV +loadData(UStringPrepProfile* profile, + const char* path, + const char* name, + const char* type, + UErrorCode* errorCode) { + /* load Unicode SPREP data from file */ + UTrie _sprepTrie={ 0,0,0,0,0,0,0 }; + UDataMemory *dataMemory; + const int32_t *p=NULL; + const uint8_t *pb; + UVersionInfo normUnicodeVersion; + int32_t normUniVer, sprepUniVer, normCorrVer; + + if(errorCode==NULL || U_FAILURE(*errorCode)) { + return 0; + } + + /* open the data outside the mutex block */ + //TODO: change the path + dataMemory=udata_openChoice(path, type, name, isSPrepAcceptable, NULL, errorCode); + if(U_FAILURE(*errorCode)) { + return FALSE; + } + + p=(const int32_t *)udata_getMemory(dataMemory); + pb=(const uint8_t *)(p+_SPREP_INDEX_TOP); + utrie_unserialize(&_sprepTrie, pb, p[_SPREP_INDEX_TRIE_SIZE], errorCode); + _sprepTrie.getFoldingOffset=getSPrepFoldingOffset; + + + if(U_FAILURE(*errorCode)) { + udata_close(dataMemory); + return FALSE; + } + + /* in the mutex block, set the data for this process */ + umtx_lock(&usprepMutex); + if(profile->sprepData==NULL) { + profile->sprepData=dataMemory; + dataMemory=NULL; + uprv_memcpy(&profile->indexes, p, sizeof(profile->indexes)); + uprv_memcpy(&profile->sprepTrie, &_sprepTrie, sizeof(UTrie)); + } else { + p=(const int32_t *)udata_getMemory(profile->sprepData); + } + umtx_unlock(&usprepMutex); + /* initialize some variables */ + profile->mappingData=(uint16_t *)((uint8_t *)(p+_SPREP_INDEX_TOP)+profile->indexes[_SPREP_INDEX_TRIE_SIZE]); + + u_getUnicodeVersion(normUnicodeVersion); + normUniVer = (normUnicodeVersion[0] << 24) + (normUnicodeVersion[1] << 16) + + (normUnicodeVersion[2] << 8 ) + (normUnicodeVersion[3]); + sprepUniVer = (dataVersion[0] << 24) + (dataVersion[1] << 16) + + (dataVersion[2] << 8 ) + (dataVersion[3]); + normCorrVer = profile->indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION]; + + if(U_FAILURE(*errorCode)){ + udata_close(dataMemory); + return FALSE; + } + if( normUniVer < sprepUniVer && /* the Unicode version of SPREP file must be less than the Unicode Vesion of the normalization data */ + normUniVer < normCorrVer && /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Vesion of the normalization data */ + ((profile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0) /* normalization turned on*/ + ){ + *errorCode = U_INVALID_FORMAT_ERROR; + udata_close(dataMemory); + return FALSE; + } + profile->isDataLoaded = TRUE; + + /* if a different thread set it first, then close the extra data */ + if(dataMemory!=NULL) { + udata_close(dataMemory); /* NULL if it was set correctly */ + } + + + return profile->isDataLoaded; +} + +static UStringPrepProfile* +usprep_getProfile(const char* path, + const char* name, + UErrorCode *status){ + + UStringPrepProfile* profile = NULL; + + initCache(status); + + if(U_FAILURE(*status)){ + return NULL; + } + + UStringPrepKey stackKey; + /* + * const is cast way to save malloc, strcpy and free calls + * we use the passed in pointers for fetching the data from the + * hash table which is safe + */ + stackKey.name = (char*) name; + stackKey.path = (char*) path; + + /* fetch the data from the cache */ + umtx_lock(&usprepMutex); + profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey)); + if(profile != NULL) { + profile->refCount++; + } + umtx_unlock(&usprepMutex); + + if(profile == NULL) { + /* else load the data and put the data in the cache */ + LocalMemory<UStringPrepProfile> newProfile; + if(newProfile.allocateInsteadAndReset() == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + + /* load the data */ + if(!loadData(newProfile.getAlias(), path, name, _SPREP_DATA_TYPE, status) || U_FAILURE(*status) ){ + return NULL; + } + + /* get the options */ + newProfile->doNFKC = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0); + newProfile->checkBiDi = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_CHECK_BIDI_ON) > 0); + + LocalMemory<UStringPrepKey> key; + LocalMemory<char> keyName; + LocalMemory<char> keyPath; + if( key.allocateInsteadAndReset() == NULL || + keyName.allocateInsteadAndCopy(static_cast<int32_t>(uprv_strlen(name)+1)) == NULL || + (path != NULL && + keyPath.allocateInsteadAndCopy(static_cast<int32_t>(uprv_strlen(path)+1)) == NULL) + ) { + *status = U_MEMORY_ALLOCATION_ERROR; + usprep_unload(newProfile.getAlias()); + return NULL; + } + + umtx_lock(&usprepMutex); + // If another thread already inserted the same key/value, refcount and cleanup our thread data + profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey)); + if(profile != NULL) { + profile->refCount++; + usprep_unload(newProfile.getAlias()); + } + else { + /* initialize the key members */ + key->name = keyName.orphan(); + uprv_strcpy(key->name, name); + if(path != NULL){ + key->path = keyPath.orphan(); + uprv_strcpy(key->path, path); + } + profile = newProfile.orphan(); + + /* add the data object to the cache */ + profile->refCount = 1; + uhash_put(SHARED_DATA_HASHTABLE, key.orphan(), profile, status); + } + umtx_unlock(&usprepMutex); + } + + return profile; +} + +U_CAPI UStringPrepProfile* U_EXPORT2 +usprep_open(const char* path, + const char* name, + UErrorCode* status){ + + if(status == NULL || U_FAILURE(*status)){ + return NULL; + } + + /* initialize the profile struct members */ + return usprep_getProfile(path,name,status); +} + +U_CAPI UStringPrepProfile* U_EXPORT2 +usprep_openByType(UStringPrepProfileType type, + UErrorCode* status) { + if(status == NULL || U_FAILURE(*status)){ + return NULL; + } + int32_t index = (int32_t)type; + if (index < 0 || index >= UPRV_LENGTHOF(PROFILE_NAMES)) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + return usprep_open(NULL, PROFILE_NAMES[index], status); +} + +U_CAPI void U_EXPORT2 +usprep_close(UStringPrepProfile* profile){ + if(profile==NULL){ + return; + } + + umtx_lock(&usprepMutex); + /* decrement the ref count*/ + if(profile->refCount > 0){ + profile->refCount--; + } + umtx_unlock(&usprepMutex); + +} + +U_CFUNC void +uprv_syntaxError(const UChar* rules, + int32_t pos, + int32_t rulesLen, + UParseError* parseError){ + if(parseError == NULL){ + return; + } + parseError->offset = pos; + parseError->line = 0 ; // we are not using line numbers + + // for pre-context + int32_t start = (pos < U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1)); + int32_t limit = pos; + + u_memcpy(parseError->preContext,rules+start,limit-start); + //null terminate the buffer + parseError->preContext[limit-start] = 0; + + // for post-context; include error rules[pos] + start = pos; + limit = start + (U_PARSE_CONTEXT_LEN-1); + if (limit > rulesLen) { + limit = rulesLen; + } + if (start < rulesLen) { + u_memcpy(parseError->postContext,rules+start,limit-start); + } + //null terminate the buffer + parseError->postContext[limit-start]= 0; +} + + +static inline UStringPrepType +getValues(uint16_t trieWord, int16_t& value, UBool& isIndex){ + + UStringPrepType type; + if(trieWord == 0){ + /* + * Initial value stored in the mapping table + * just return USPREP_TYPE_LIMIT .. so that + * the source codepoint is copied to the destination + */ + type = USPREP_TYPE_LIMIT; + isIndex =FALSE; + value = 0; + }else if(trieWord >= _SPREP_TYPE_THRESHOLD){ + type = (UStringPrepType) (trieWord - _SPREP_TYPE_THRESHOLD); + isIndex =FALSE; + value = 0; + }else{ + /* get the type */ + type = USPREP_MAP; + /* ascertain if the value is index or delta */ + if(trieWord & 0x02){ + isIndex = TRUE; + value = trieWord >> 2; //mask off the lower 2 bits and shift + }else{ + isIndex = FALSE; + value = (int16_t)trieWord; + value = (value >> 2); + } + + if((trieWord>>2) == _SPREP_MAX_INDEX_VALUE){ + type = USPREP_DELETE; + isIndex =FALSE; + value = 0; + } + } + return type; +} + +// TODO: change to writing to UnicodeString not UChar * +static int32_t +usprep_map( const UStringPrepProfile* profile, + const UChar* src, int32_t srcLength, + UChar* dest, int32_t destCapacity, + int32_t options, + UParseError* parseError, + UErrorCode* status ){ + + uint16_t result; + int32_t destIndex=0; + int32_t srcIndex; + UBool allowUnassigned = (UBool) ((options & USPREP_ALLOW_UNASSIGNED)>0); + UStringPrepType type; + int16_t value; + UBool isIndex; + const int32_t* indexes = profile->indexes; + + // no error checking the caller check for error and arguments + // no string length check the caller finds out the string length + + for(srcIndex=0;srcIndex<srcLength;){ + UChar32 ch; + + U16_NEXT(src,srcIndex,srcLength,ch); + + result=0; + + UTRIE_GET16(&profile->sprepTrie,ch,result); + + type = getValues(result, value, isIndex); + + // check if the source codepoint is unassigned + if(type == USPREP_UNASSIGNED && allowUnassigned == FALSE){ + + uprv_syntaxError(src,srcIndex-U16_LENGTH(ch), srcLength,parseError); + *status = U_STRINGPREP_UNASSIGNED_ERROR; + return 0; + + }else if(type == USPREP_MAP){ + + int32_t index, length; + + if(isIndex){ + index = value; + if(index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] && + index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){ + length = 1; + }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] && + index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){ + length = 2; + }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] && + index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){ + length = 3; + }else{ + length = profile->mappingData[index++]; + + } + + /* copy mapping to destination */ + for(int32_t i=0; i< length; i++){ + if(destIndex < destCapacity ){ + dest[destIndex] = profile->mappingData[index+i]; + } + destIndex++; /* for pre-flighting */ + } + continue; + }else{ + // subtract the delta to arrive at the code point + ch -= value; + } + + }else if(type==USPREP_DELETE){ + // just consume the codepoint and contine + continue; + } + //copy the code point into destination + if(ch <= 0xFFFF){ + if(destIndex < destCapacity ){ + dest[destIndex] = (UChar)ch; + } + destIndex++; + }else{ + if(destIndex+1 < destCapacity ){ + dest[destIndex] = U16_LEAD(ch); + dest[destIndex+1] = U16_TRAIL(ch); + } + destIndex +=2; + } + + } + + return u_terminateUChars(dest, destCapacity, destIndex, status); +} + +/* + 1) Map -- For each character in the input, check if it has a mapping + and, if so, replace it with its mapping. + + 2) Normalize -- Possibly normalize the result of step 1 using Unicode + normalization. + + 3) Prohibit -- Check for any characters that are not allowed in the + output. If any are found, return an error. + + 4) Check bidi -- Possibly check for right-to-left characters, and if + any are found, make sure that the whole string satisfies the + requirements for bidirectional strings. If the string does not + satisfy the requirements for bidirectional strings, return an + error. + [Unicode3.2] defines several bidirectional categories; each character + has one bidirectional category assigned to it. For the purposes of + the requirements below, an "RandALCat character" is a character that + has Unicode bidirectional categories "R" or "AL"; an "LCat character" + is a character that has Unicode bidirectional category "L". Note + + + that there are many characters which fall in neither of the above + definitions; Latin digits (<U+0030> through <U+0039>) are examples of + this because they have bidirectional category "EN". + + In any profile that specifies bidirectional character handling, all + three of the following requirements MUST be met: + + 1) The characters in section 5.8 MUST be prohibited. + + 2) If a string contains any RandALCat character, the string MUST NOT + contain any LCat character. + + 3) If a string contains any RandALCat character, a RandALCat + character MUST be the first character of the string, and a + RandALCat character MUST be the last character of the string. +*/ +U_CAPI int32_t U_EXPORT2 +usprep_prepare( const UStringPrepProfile* profile, + const UChar* src, int32_t srcLength, + UChar* dest, int32_t destCapacity, + int32_t options, + UParseError* parseError, + UErrorCode* status ){ + + // check error status + if(U_FAILURE(*status)){ + return 0; + } + + //check arguments + if(profile==NULL || + (src==NULL ? srcLength!=0 : srcLength<-1) || + (dest==NULL ? destCapacity!=0 : destCapacity<0)) { + *status=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + //get the string length + if(srcLength < 0){ + srcLength = u_strlen(src); + } + // map + UnicodeString s1; + UChar *b1 = s1.getBuffer(srcLength); + if(b1==NULL){ + *status = U_MEMORY_ALLOCATION_ERROR; + return 0; + } + int32_t b1Len = usprep_map(profile, src, srcLength, + b1, s1.getCapacity(), options, parseError, status); + s1.releaseBuffer(U_SUCCESS(*status) ? b1Len : 0); + + if(*status == U_BUFFER_OVERFLOW_ERROR){ + // redo processing of string + /* we do not have enough room so grow the buffer*/ + b1 = s1.getBuffer(b1Len); + if(b1==NULL){ + *status = U_MEMORY_ALLOCATION_ERROR; + return 0; + } + + *status = U_ZERO_ERROR; // reset error + b1Len = usprep_map(profile, src, srcLength, + b1, s1.getCapacity(), options, parseError, status); + s1.releaseBuffer(U_SUCCESS(*status) ? b1Len : 0); + } + if(U_FAILURE(*status)){ + return 0; + } + + // normalize + UnicodeString s2; + if(profile->doNFKC){ + const Normalizer2 *n2 = Normalizer2::getNFKCInstance(*status); + FilteredNormalizer2 fn2(*n2, *uniset_getUnicode32Instance(*status)); + if(U_FAILURE(*status)){ + return 0; + } + fn2.normalize(s1, s2, *status); + }else{ + s2.fastCopyFrom(s1); + } + if(U_FAILURE(*status)){ + return 0; + } + + // Prohibit and checkBiDi in one pass + const UChar *b2 = s2.getBuffer(); + int32_t b2Len = s2.length(); + UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT; + UBool leftToRight=FALSE, rightToLeft=FALSE; + int32_t rtlPos =-1, ltrPos =-1; + + for(int32_t b2Index=0; b2Index<b2Len;){ + UChar32 ch = 0; + U16_NEXT(b2, b2Index, b2Len, ch); + + uint16_t result; + UTRIE_GET16(&profile->sprepTrie,ch,result); + + int16_t value; + UBool isIndex; + UStringPrepType type = getValues(result, value, isIndex); + + if( type == USPREP_PROHIBITED || + ((result < _SPREP_TYPE_THRESHOLD) && (result & 0x01) /* first bit says it the code point is prohibited*/) + ){ + *status = U_STRINGPREP_PROHIBITED_ERROR; + uprv_syntaxError(b2, b2Index-U16_LENGTH(ch), b2Len, parseError); + return 0; + } + + if(profile->checkBiDi) { + direction = ubidi_getClass(ch); + if(firstCharDir == U_CHAR_DIRECTION_COUNT){ + firstCharDir = direction; + } + if(direction == U_LEFT_TO_RIGHT){ + leftToRight = TRUE; + ltrPos = b2Index-1; + } + if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){ + rightToLeft = TRUE; + rtlPos = b2Index-1; + } + } + } + if(profile->checkBiDi == TRUE){ + // satisfy 2 + if( leftToRight == TRUE && rightToLeft == TRUE){ + *status = U_STRINGPREP_CHECK_BIDI_ERROR; + uprv_syntaxError(b2,(rtlPos>ltrPos) ? rtlPos : ltrPos, b2Len, parseError); + return 0; + } + + //satisfy 3 + if( rightToLeft == TRUE && + !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) && + (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC)) + ){ + *status = U_STRINGPREP_CHECK_BIDI_ERROR; + uprv_syntaxError(b2, rtlPos, b2Len, parseError); + return FALSE; + } + } + return s2.extract(dest, destCapacity, *status); +} + + +/* data swapping ------------------------------------------------------------ */ + +U_CAPI int32_t U_EXPORT2 +usprep_swap(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode) { + const UDataInfo *pInfo; + int32_t headerSize; + + const uint8_t *inBytes; + uint8_t *outBytes; + + const int32_t *inIndexes; + int32_t indexes[16]; + + int32_t i, offset, count, size; + + /* udata_swapDataHeader checks the arguments */ + headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return 0; + } + + /* check data format and format version */ + pInfo=(const UDataInfo *)((const char *)inData+4); + if(!( + pInfo->dataFormat[0]==0x53 && /* dataFormat="SPRP" */ + pInfo->dataFormat[1]==0x50 && + pInfo->dataFormat[2]==0x52 && + pInfo->dataFormat[3]==0x50 && + pInfo->formatVersion[0]==3 + )) { + udata_printError(ds, "usprep_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as StringPrep .spp data\n", + pInfo->dataFormat[0], pInfo->dataFormat[1], + pInfo->dataFormat[2], pInfo->dataFormat[3], + pInfo->formatVersion[0]); + *pErrorCode=U_UNSUPPORTED_ERROR; + return 0; + } + + inBytes=(const uint8_t *)inData+headerSize; + outBytes=(uint8_t *)outData+headerSize; + + inIndexes=(const int32_t *)inBytes; + + if(length>=0) { + length-=headerSize; + if(length<16*4) { + udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for StringPrep .spp data\n", + length); + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + } + + /* read the first 16 indexes (ICU 2.8/format version 3: _SPREP_INDEX_TOP==16, might grow) */ + for(i=0; i<16; ++i) { + indexes[i]=udata_readInt32(ds, inIndexes[i]); + } + + /* calculate the total length of the data */ + size= + 16*4+ /* size of indexes[] */ + indexes[_SPREP_INDEX_TRIE_SIZE]+ + indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]; + + if(length>=0) { + if(length<size) { + udata_printError(ds, "usprep_swap(): too few bytes (%d after header) for all of StringPrep .spp data\n", + length); + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + + /* copy the data for inaccessible bytes */ + if(inBytes!=outBytes) { + uprv_memcpy(outBytes, inBytes, size); + } + + offset=0; + + /* swap the int32_t indexes[] */ + count=16*4; + ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode); + offset+=count; + + /* swap the UTrie */ + count=indexes[_SPREP_INDEX_TRIE_SIZE]; + utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode); + offset+=count; + + /* swap the uint16_t mappingTable[] */ + count=indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]; + ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode); + //offset+=count; + } + + return headerSize+size; +} + +#endif /* #if !UCONFIG_NO_IDNA */ diff --git a/thirdparty/icu4c/common/ustack.cpp b/thirdparty/icu4c/common/ustack.cpp new file mode 100644 index 0000000000..fb314b0ebe --- /dev/null +++ b/thirdparty/icu4c/common/ustack.cpp @@ -0,0 +1,63 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 2003-2011, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +*/ + +#include "uvector.h" + +U_NAMESPACE_BEGIN + +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UStack) + +UStack::UStack(UErrorCode &status) : + UVector(status) +{ +} + +UStack::UStack(int32_t initialCapacity, UErrorCode &status) : + UVector(initialCapacity, status) +{ +} + +UStack::UStack(UObjectDeleter *d, UElementsAreEqual *c, UErrorCode &status) : + UVector(d, c, status) +{ +} + +UStack::UStack(UObjectDeleter *d, UElementsAreEqual *c, int32_t initialCapacity, UErrorCode &status) : + UVector(d, c, initialCapacity, status) +{ +} + +UStack::~UStack() {} + +void* UStack::pop(void) { + int32_t n = size() - 1; + void* result = 0; + if (n >= 0) { + result = elementAt(n); + removeElementAt(n); + } + return result; +} + +int32_t UStack::popi(void) { + int32_t n = size() - 1; + int32_t result = 0; + if (n >= 0) { + result = elementAti(n); + removeElementAt(n); + } + return result; +} + +int32_t UStack::search(void* obj) const { + int32_t i = indexOf(obj); + return (i >= 0) ? size() - i : i; +} + +U_NAMESPACE_END diff --git a/thirdparty/icu4c/common/ustr_cnv.cpp b/thirdparty/icu4c/common/ustr_cnv.cpp new file mode 100644 index 0000000000..9a25a9905a --- /dev/null +++ b/thirdparty/icu4c/common/ustr_cnv.cpp @@ -0,0 +1,256 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 1998-2014, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: ustr_cnv.cpp +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2004aug24 +* created by: Markus W. Scherer +* +* Character conversion functions moved here from ustring.c +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_CONVERSION + +#include "unicode/ustring.h" +#include "unicode/ucnv.h" +#include "cstring.h" +#include "cmemory.h" +#include "umutex.h" +#include "ustr_cnv.h" +#include "ucnv_bld.h" + +/* mutexed access to a shared default converter ----------------------------- */ + +static UConverter *gDefaultConverter = NULL; + +U_CAPI UConverter* U_EXPORT2 +u_getDefaultConverter(UErrorCode *status) +{ + UConverter *converter = NULL; + + if (gDefaultConverter != NULL) { + icu::umtx_lock(NULL); + + /* need to check to make sure it wasn't taken out from under us */ + if (gDefaultConverter != NULL) { + converter = gDefaultConverter; + gDefaultConverter = NULL; + } + icu::umtx_unlock(NULL); + } + + /* if the cache was empty, create a converter */ + if(converter == NULL) { + converter = ucnv_open(NULL, status); + if(U_FAILURE(*status)) { + ucnv_close(converter); + converter = NULL; + } + } + + return converter; +} + +U_CAPI void U_EXPORT2 +u_releaseDefaultConverter(UConverter *converter) +{ + if(gDefaultConverter == NULL) { + if (converter != NULL) { + ucnv_reset(converter); + } + ucnv_enableCleanup(); + icu::umtx_lock(NULL); + if(gDefaultConverter == NULL) { + gDefaultConverter = converter; + converter = NULL; + } + icu::umtx_unlock(NULL); + } + + if(converter != NULL) { + ucnv_close(converter); + } +} + +U_CAPI void U_EXPORT2 +u_flushDefaultConverter() +{ + UConverter *converter = NULL; + + if (gDefaultConverter != NULL) { + icu::umtx_lock(NULL); + + /* need to check to make sure it wasn't taken out from under us */ + if (gDefaultConverter != NULL) { + converter = gDefaultConverter; + gDefaultConverter = NULL; + } + icu::umtx_unlock(NULL); + } + + /* if the cache was populated, flush it */ + if(converter != NULL) { + ucnv_close(converter); + } +} + + +/* conversions between char* and UChar* ------------------------------------- */ + +/* maximum string length for u_uastrcpy() and u_austrcpy() implementations */ +#define MAX_STRLEN 0x0FFFFFFF + +/* + returns the minimum of (the length of the null-terminated string) and n. +*/ +static int32_t u_astrnlen(const char *s1, int32_t n) +{ + int32_t len = 0; + + if (s1) + { + while (n-- && *(s1++)) + { + len++; + } + } + return len; +} + +U_CAPI UChar* U_EXPORT2 +u_uastrncpy(UChar *ucs1, + const char *s2, + int32_t n) +{ + UChar *target = ucs1; + UErrorCode err = U_ZERO_ERROR; + UConverter *cnv = u_getDefaultConverter(&err); + if(U_SUCCESS(err) && cnv != NULL) { + ucnv_reset(cnv); + ucnv_toUnicode(cnv, + &target, + ucs1+n, + &s2, + s2+u_astrnlen(s2, n), + NULL, + TRUE, + &err); + ucnv_reset(cnv); /* be good citizens */ + u_releaseDefaultConverter(cnv); + if(U_FAILURE(err) && (err != U_BUFFER_OVERFLOW_ERROR) ) { + *ucs1 = 0; /* failure */ + } + if(target < (ucs1+n)) { /* U_BUFFER_OVERFLOW_ERROR isn't an err, just means no termination will happen. */ + *target = 0; /* terminate */ + } + } else { + *ucs1 = 0; + } + return ucs1; +} + +U_CAPI UChar* U_EXPORT2 +u_uastrcpy(UChar *ucs1, + const char *s2 ) +{ + UErrorCode err = U_ZERO_ERROR; + UConverter *cnv = u_getDefaultConverter(&err); + if(U_SUCCESS(err) && cnv != NULL) { + ucnv_toUChars(cnv, + ucs1, + MAX_STRLEN, + s2, + (int32_t)uprv_strlen(s2), + &err); + u_releaseDefaultConverter(cnv); + if(U_FAILURE(err)) { + *ucs1 = 0; + } + } else { + *ucs1 = 0; + } + return ucs1; +} + +/* + returns the minimum of (the length of the null-terminated string) and n. +*/ +static int32_t u_ustrnlen(const UChar *ucs1, int32_t n) +{ + int32_t len = 0; + + if (ucs1) + { + while (n-- && *(ucs1++)) + { + len++; + } + } + return len; +} + +U_CAPI char* U_EXPORT2 +u_austrncpy(char *s1, + const UChar *ucs2, + int32_t n) +{ + char *target = s1; + UErrorCode err = U_ZERO_ERROR; + UConverter *cnv = u_getDefaultConverter(&err); + if(U_SUCCESS(err) && cnv != NULL) { + ucnv_reset(cnv); + ucnv_fromUnicode(cnv, + &target, + s1+n, + &ucs2, + ucs2+u_ustrnlen(ucs2, n), + NULL, + TRUE, + &err); + ucnv_reset(cnv); /* be good citizens */ + u_releaseDefaultConverter(cnv); + if(U_FAILURE(err) && (err != U_BUFFER_OVERFLOW_ERROR) ) { + *s1 = 0; /* failure */ + } + if(target < (s1+n)) { /* U_BUFFER_OVERFLOW_ERROR isn't an err, just means no termination will happen. */ + *target = 0; /* terminate */ + } + } else { + *s1 = 0; + } + return s1; +} + +U_CAPI char* U_EXPORT2 +u_austrcpy(char *s1, + const UChar *ucs2 ) +{ + UErrorCode err = U_ZERO_ERROR; + UConverter *cnv = u_getDefaultConverter(&err); + if(U_SUCCESS(err) && cnv != NULL) { + int32_t len = ucnv_fromUChars(cnv, + s1, + MAX_STRLEN, + ucs2, + -1, + &err); + u_releaseDefaultConverter(cnv); + s1[len] = 0; + } else { + *s1 = 0; + } + return s1; +} + +#endif diff --git a/thirdparty/icu4c/common/ustr_cnv.h b/thirdparty/icu4c/common/ustr_cnv.h new file mode 100644 index 0000000000..861e3ebff0 --- /dev/null +++ b/thirdparty/icu4c/common/ustr_cnv.h @@ -0,0 +1,51 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 1999-2010, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* file name: ustr_cnv.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2004Aug27 +* created by: George Rhoten +*/ + +#ifndef USTR_CNV_IMP_H +#define USTR_CNV_IMP_H + +#include "unicode/utypes.h" +#include "unicode/ucnv.h" + +#if !UCONFIG_NO_CONVERSION + +/** + * Get the default converter. This is a commonly used converter + * that is used for the ustring and UnicodeString API. + * Remember to use the u_releaseDefaultConverter when you are done. + * @internal + */ +U_CAPI UConverter* U_EXPORT2 +u_getDefaultConverter(UErrorCode *status); + + +/** + * Release the default converter to the converter cache. + * @internal + */ +U_CAPI void U_EXPORT2 +u_releaseDefaultConverter(UConverter *converter); + +/** + * Flush the default converter, if cached. + * @internal + */ +U_CAPI void U_EXPORT2 +u_flushDefaultConverter(void); + +#endif + +#endif diff --git a/thirdparty/icu4c/common/ustr_imp.h b/thirdparty/icu4c/common/ustr_imp.h new file mode 100644 index 0000000000..3c4b9cc2a5 --- /dev/null +++ b/thirdparty/icu4c/common/ustr_imp.h @@ -0,0 +1,155 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 1999-2015, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* file name: ustr_imp.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2001jan30 +* created by: Markus W. Scherer +*/ + +#ifndef __USTR_IMP_H__ +#define __USTR_IMP_H__ + +#include "unicode/utypes.h" +#include "unicode/utf8.h" + +/** + * Internal option for unorm_cmpEquivFold() for strncmp style. + * If set, checks for both string length and terminating NUL. + */ +#define _STRNCMP_STYLE 0x1000 + +/** + * Compare two strings in code point order or code unit order. + * Works in strcmp style (both lengths -1), + * strncmp style (lengths equal and >=0, flag true), + * and memcmp/UnicodeString style (at least one length >=0). + */ +U_CFUNC int32_t U_EXPORT2 +uprv_strCompare(const UChar *s1, int32_t length1, + const UChar *s2, int32_t length2, + UBool strncmpStyle, UBool codePointOrder); + +U_CAPI int32_t U_EXPORT2 +ustr_hashUCharsN(const UChar *str, int32_t length); + +U_CAPI int32_t U_EXPORT2 +ustr_hashCharsN(const char *str, int32_t length); + +U_CAPI int32_t U_EXPORT2 +ustr_hashICharsN(const char *str, int32_t length); + +/** + * Convert an ASCII-range lowercase character to uppercase. + * + * @param c A UChar. + * @return If UChar is a lowercase ASCII character, returns the uppercase version. + * Otherwise, returns the input character. + */ +U_CAPI UChar U_EXPORT2 +u_asciiToUpper(UChar c); + +// TODO: Add u_asciiToLower if/when there is a need for it. + +/** + * NUL-terminate a UChar * string if possible. + * If length < destCapacity then NUL-terminate. + * If length == destCapacity then do not terminate but set U_STRING_NOT_TERMINATED_WARNING. + * If length > destCapacity then do not terminate but set U_BUFFER_OVERFLOW_ERROR. + * + * @param dest Destination buffer, can be NULL if destCapacity==0. + * @param destCapacity Number of UChars available at dest. + * @param length Number of UChars that were (to be) written to dest. + * @param pErrorCode ICU error code. + * @return length + */ +U_CAPI int32_t U_EXPORT2 +u_terminateUChars(UChar *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode); + +/** + * NUL-terminate a char * string if possible. + * Same as u_terminateUChars() but for a different string type. + */ +U_CAPI int32_t U_EXPORT2 +u_terminateChars(char *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode); + +/** + * NUL-terminate a UChar32 * string if possible. + * Same as u_terminateUChars() but for a different string type. + */ +U_CAPI int32_t U_EXPORT2 +u_terminateUChar32s(UChar32 *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode); + +/** + * NUL-terminate a wchar_t * string if possible. + * Same as u_terminateUChars() but for a different string type. + */ +U_CAPI int32_t U_EXPORT2 +u_terminateWChars(wchar_t *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode); + +/** + * Counts the bytes of any whole valid sequence for a UTF-8 lead byte. + * Returns 1 for ASCII 0..0x7f. + * Returns 0 for 0x80..0xc1 as well as for 0xf5..0xff. + * leadByte might be evaluated multiple times. + * + * @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff. + * @return 0..4 + */ +#define U8_COUNT_BYTES(leadByte) \ + (U8_IS_SINGLE(leadByte) ? 1 : U8_COUNT_BYTES_NON_ASCII(leadByte)) + +/** + * Counts the bytes of any whole valid sequence for a UTF-8 lead byte. + * Returns 0 for 0x00..0xc1 as well as for 0xf5..0xff. + * leadByte might be evaluated multiple times. + * + * @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff. + * @return 0 or 2..4 + */ +#define U8_COUNT_BYTES_NON_ASCII(leadByte) \ + (U8_IS_LEAD(leadByte) ? ((uint8_t)(leadByte)>=0xe0)+((uint8_t)(leadByte)>=0xf0)+2 : 0) + +#ifdef __cplusplus + +U_NAMESPACE_BEGIN + +class UTF8 { +public: + UTF8() = delete; // all static + + /** + * Is t a valid UTF-8 trail byte? + * + * @param prev Must be the preceding lead byte if i==1 and length>=3; + * otherwise ignored. + * @param t The i-th byte following the lead byte. + * @param i The index (1..3) of byte t in the byte sequence. 0<i<length + * @param length The length (2..4) of the byte sequence according to the lead byte. + * @return true if t is a valid trail byte in this context. + */ + static inline UBool isValidTrail(int32_t prev, uint8_t t, int32_t i, int32_t length) { + // The first trail byte after a 3- or 4-byte lead byte + // needs to be validated together with its lead byte. + if (length <= 2 || i > 1) { + return U8_IS_TRAIL(t); + } else if (length == 3) { + return U8_IS_VALID_LEAD3_AND_T1(prev, t); + } else { // length == 4 + return U8_IS_VALID_LEAD4_AND_T1(prev, t); + } + } +}; + +U_NAMESPACE_END + +#endif // __cplusplus + +#endif diff --git a/thirdparty/icu4c/common/ustr_titlecase_brkiter.cpp b/thirdparty/icu4c/common/ustr_titlecase_brkiter.cpp new file mode 100644 index 0000000000..457905eb60 --- /dev/null +++ b/thirdparty/icu4c/common/ustr_titlecase_brkiter.cpp @@ -0,0 +1,237 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2011, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* file name: ustr_titlecase_brkiter.cpp +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2011may30 +* created by: Markus W. Scherer +* +* Titlecasing functions that are based on BreakIterator +* were moved here to break dependency cycles among parts of the common library. +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_BREAK_ITERATION + +#include "unicode/brkiter.h" +#include "unicode/casemap.h" +#include "unicode/chariter.h" +#include "unicode/localpointer.h" +#include "unicode/ubrk.h" +#include "unicode/ucasemap.h" +#include "unicode/utext.h" +#include "cmemory.h" +#include "uassert.h" +#include "ucase.h" +#include "ucasemap_imp.h" + +U_NAMESPACE_BEGIN + +/** + * Whole-string BreakIterator. + * Titlecasing only calls setText(), first(), and next(). + * We implement the rest only to satisfy the abstract interface. + */ +class WholeStringBreakIterator : public BreakIterator { +public: + WholeStringBreakIterator() : BreakIterator(), length(0) {} + ~WholeStringBreakIterator() U_OVERRIDE; + UBool operator==(const BreakIterator&) const U_OVERRIDE; + WholeStringBreakIterator *clone() const U_OVERRIDE; + static UClassID U_EXPORT2 getStaticClassID(); + UClassID getDynamicClassID() const U_OVERRIDE; + CharacterIterator &getText() const U_OVERRIDE; + UText *getUText(UText *fillIn, UErrorCode &errorCode) const U_OVERRIDE; + void setText(const UnicodeString &text) U_OVERRIDE; + void setText(UText *text, UErrorCode &errorCode) U_OVERRIDE; + void adoptText(CharacterIterator* it) U_OVERRIDE; + int32_t first() U_OVERRIDE; + int32_t last() U_OVERRIDE; + int32_t previous() U_OVERRIDE; + int32_t next() U_OVERRIDE; + int32_t current() const U_OVERRIDE; + int32_t following(int32_t offset) U_OVERRIDE; + int32_t preceding(int32_t offset) U_OVERRIDE; + UBool isBoundary(int32_t offset) U_OVERRIDE; + int32_t next(int32_t n) U_OVERRIDE; + WholeStringBreakIterator *createBufferClone(void *stackBuffer, int32_t &BufferSize, + UErrorCode &errorCode) U_OVERRIDE; + WholeStringBreakIterator &refreshInputText(UText *input, UErrorCode &errorCode) U_OVERRIDE; + +private: + int32_t length; +}; + +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(WholeStringBreakIterator) + +WholeStringBreakIterator::~WholeStringBreakIterator() {} +UBool WholeStringBreakIterator::operator==(const BreakIterator&) const { return FALSE; } +WholeStringBreakIterator *WholeStringBreakIterator::clone() const { return nullptr; } + +CharacterIterator &WholeStringBreakIterator::getText() const { + UPRV_UNREACHABLE; // really should not be called +} +UText *WholeStringBreakIterator::getUText(UText * /*fillIn*/, UErrorCode &errorCode) const { + if (U_SUCCESS(errorCode)) { + errorCode = U_UNSUPPORTED_ERROR; + } + return nullptr; +} + +void WholeStringBreakIterator::setText(const UnicodeString &text) { + length = text.length(); +} +void WholeStringBreakIterator::setText(UText *text, UErrorCode &errorCode) { + if (U_SUCCESS(errorCode)) { + int64_t length64 = utext_nativeLength(text); + if (length64 <= INT32_MAX) { + length = (int32_t)length64; + } else { + errorCode = U_INDEX_OUTOFBOUNDS_ERROR; + } + } +} +void WholeStringBreakIterator::adoptText(CharacterIterator*) { + UPRV_UNREACHABLE; // should not be called +} + +int32_t WholeStringBreakIterator::first() { return 0; } +int32_t WholeStringBreakIterator::last() { return length; } +int32_t WholeStringBreakIterator::previous() { return 0; } +int32_t WholeStringBreakIterator::next() { return length; } +int32_t WholeStringBreakIterator::current() const { return 0; } +int32_t WholeStringBreakIterator::following(int32_t /*offset*/) { return length; } +int32_t WholeStringBreakIterator::preceding(int32_t /*offset*/) { return 0; } +UBool WholeStringBreakIterator::isBoundary(int32_t /*offset*/) { return FALSE; } +int32_t WholeStringBreakIterator::next(int32_t /*n*/) { return length; } + +WholeStringBreakIterator *WholeStringBreakIterator::createBufferClone( + void * /*stackBuffer*/, int32_t & /*BufferSize*/, UErrorCode &errorCode) { + if (U_SUCCESS(errorCode)) { + errorCode = U_UNSUPPORTED_ERROR; + } + return nullptr; +} +WholeStringBreakIterator &WholeStringBreakIterator::refreshInputText( + UText * /*input*/, UErrorCode &errorCode) { + if (U_SUCCESS(errorCode)) { + errorCode = U_UNSUPPORTED_ERROR; + } + return *this; +} + +U_CFUNC +BreakIterator *ustrcase_getTitleBreakIterator( + const Locale *locale, const char *locID, uint32_t options, BreakIterator *iter, + LocalPointer<BreakIterator> &ownedIter, UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { return nullptr; } + options &= U_TITLECASE_ITERATOR_MASK; + if (options != 0 && iter != nullptr) { + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + return nullptr; + } + if (iter == nullptr) { + switch (options) { + case 0: + iter = BreakIterator::createWordInstance( + locale != nullptr ? *locale : Locale(locID), errorCode); + break; + case U_TITLECASE_WHOLE_STRING: + iter = new WholeStringBreakIterator(); + if (iter == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } + break; + case U_TITLECASE_SENTENCES: + iter = BreakIterator::createSentenceInstance( + locale != nullptr ? *locale : Locale(locID), errorCode); + break; + default: + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + break; + } + ownedIter.adoptInstead(iter); + } + return iter; +} + +int32_t CaseMap::toTitle( + const char *locale, uint32_t options, BreakIterator *iter, + const UChar *src, int32_t srcLength, + UChar *dest, int32_t destCapacity, Edits *edits, + UErrorCode &errorCode) { + LocalPointer<BreakIterator> ownedIter; + iter = ustrcase_getTitleBreakIterator(nullptr, locale, options, iter, ownedIter, errorCode); + if(iter==NULL) { + return 0; + } + UnicodeString s(srcLength<0, src, srcLength); + iter->setText(s); + return ustrcase_map( + ustrcase_getCaseLocale(locale), options, iter, + dest, destCapacity, + src, srcLength, + ustrcase_internalToTitle, edits, errorCode); +} + +U_NAMESPACE_END + +U_NAMESPACE_USE + +U_CAPI int32_t U_EXPORT2 +u_strToTitle(UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + UBreakIterator *titleIter, + const char *locale, + UErrorCode *pErrorCode) { + LocalPointer<BreakIterator> ownedIter; + BreakIterator *iter = ustrcase_getTitleBreakIterator( + nullptr, locale, 0, reinterpret_cast<BreakIterator *>(titleIter), + ownedIter, *pErrorCode); + if (iter == nullptr) { + return 0; + } + UnicodeString s(srcLength<0, src, srcLength); + iter->setText(s); + return ustrcase_mapWithOverlap( + ustrcase_getCaseLocale(locale), 0, iter, + dest, destCapacity, + src, srcLength, + ustrcase_internalToTitle, *pErrorCode); +} + +U_CAPI int32_t U_EXPORT2 +ucasemap_toTitle(UCaseMap *csm, + UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + UErrorCode *pErrorCode) { + if (U_FAILURE(*pErrorCode)) { + return 0; + } + if (csm->iter == NULL) { + LocalPointer<BreakIterator> ownedIter; + BreakIterator *iter = ustrcase_getTitleBreakIterator( + nullptr, csm->locale, csm->options, nullptr, ownedIter, *pErrorCode); + if (iter == nullptr) { + return 0; + } + csm->iter = ownedIter.orphan(); + } + UnicodeString s(srcLength<0, src, srcLength); + csm->iter->setText(s); + return ustrcase_map( + csm->caseLocale, csm->options, csm->iter, + dest, destCapacity, + src, srcLength, + ustrcase_internalToTitle, NULL, *pErrorCode); +} + +#endif // !UCONFIG_NO_BREAK_ITERATION diff --git a/thirdparty/icu4c/common/ustr_wcs.cpp b/thirdparty/icu4c/common/ustr_wcs.cpp new file mode 100644 index 0000000000..e9f278e969 --- /dev/null +++ b/thirdparty/icu4c/common/ustr_wcs.cpp @@ -0,0 +1,535 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2001-2012, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: ustr_wcs.cpp +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2004sep07 +* created by: Markus W. Scherer +* +* u_strToWCS() and u_strFromWCS() functions +* moved here from ustrtrns.c for better modularization. +*/ + +#include "unicode/utypes.h" +#include "unicode/ustring.h" +#include "cstring.h" +#include "cwchar.h" +#include "cmemory.h" +#include "ustr_imp.h" +#include "ustr_cnv.h" + +#if defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION + +#define _STACK_BUFFER_CAPACITY 1000 +#define _BUFFER_CAPACITY_MULTIPLIER 2 + +#if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) +// TODO: We should use CharString for char buffers and UnicodeString for UChar buffers. +// Then we could change this to work only with wchar_t buffers. +static inline UBool +u_growAnyBufferFromStatic(void *context, + void **pBuffer, int32_t *pCapacity, int32_t reqCapacity, + int32_t length, int32_t size) { + // Use char* not void* to avoid the compiler's strict-aliasing assumptions + // and related warnings. + char *newBuffer=(char *)uprv_malloc(reqCapacity*size); + if(newBuffer!=NULL) { + if(length>0) { + uprv_memcpy(newBuffer, *pBuffer, (size_t)length*size); + } + *pCapacity=reqCapacity; + } else { + *pCapacity=0; + } + + /* release the old pBuffer if it was not statically allocated */ + if(*pBuffer!=(char *)context) { + uprv_free(*pBuffer); + } + + *pBuffer=newBuffer; + return (UBool)(newBuffer!=NULL); +} + +/* helper function */ +static wchar_t* +_strToWCS(wchar_t *dest, + int32_t destCapacity, + int32_t *pDestLength, + const UChar *src, + int32_t srcLength, + UErrorCode *pErrorCode){ + + char stackBuffer [_STACK_BUFFER_CAPACITY]; + char* tempBuf = stackBuffer; + int32_t tempBufCapacity = _STACK_BUFFER_CAPACITY; + char* tempBufLimit = stackBuffer + tempBufCapacity; + UConverter* conv = NULL; + char* saveBuf = tempBuf; + wchar_t* intTarget=NULL; + int32_t intTargetCapacity=0; + int count=0,retVal=0; + + const UChar *pSrcLimit =NULL; + const UChar *pSrc = src; + + conv = u_getDefaultConverter(pErrorCode); + + if(U_FAILURE(*pErrorCode)){ + return NULL; + } + + if(srcLength == -1){ + srcLength = u_strlen(pSrc); + } + + pSrcLimit = pSrc + srcLength; + + for(;;) { + /* reset the error state */ + *pErrorCode = U_ZERO_ERROR; + + /* convert to chars using default converter */ + ucnv_fromUnicode(conv,&tempBuf,tempBufLimit,&pSrc,pSrcLimit,NULL,(UBool)(pSrc==pSrcLimit),pErrorCode); + count =(tempBuf - saveBuf); + + /* This should rarely occur */ + if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR){ + tempBuf = saveBuf; + + /* we dont have enough room on the stack grow the buffer */ + int32_t newCapacity = 2 * srcLength; + if(newCapacity <= tempBufCapacity) { + newCapacity = _BUFFER_CAPACITY_MULTIPLIER * tempBufCapacity; + } + if(!u_growAnyBufferFromStatic(stackBuffer,(void**) &tempBuf, &tempBufCapacity, + newCapacity, count, 1)) { + goto cleanup; + } + + saveBuf = tempBuf; + tempBufLimit = tempBuf + tempBufCapacity; + tempBuf = tempBuf + count; + + } else { + break; + } + } + + if(U_FAILURE(*pErrorCode)){ + goto cleanup; + } + + /* done with conversion null terminate the char buffer */ + if(count>=tempBufCapacity){ + tempBuf = saveBuf; + /* we dont have enough room on the stack grow the buffer */ + if(!u_growAnyBufferFromStatic(stackBuffer,(void**) &tempBuf, &tempBufCapacity, + count+1, count, 1)) { + goto cleanup; + } + saveBuf = tempBuf; + } + + saveBuf[count]=0; + + + /* allocate more space than required + * here we assume that every char requires + * no more than 2 wchar_ts + */ + intTargetCapacity = (count * _BUFFER_CAPACITY_MULTIPLIER + 1) /*for null termination */; + intTarget = (wchar_t*)uprv_malloc( intTargetCapacity * sizeof(wchar_t) ); + + if(intTarget){ + + int32_t nulLen = 0; + int32_t remaining = intTargetCapacity; + wchar_t* pIntTarget=intTarget; + tempBuf = saveBuf; + + /* now convert the mbs to wcs */ + for(;;){ + + /* we can call the system API since we are sure that + * there is atleast 1 null in the input + */ + retVal = uprv_mbstowcs(pIntTarget,(tempBuf+nulLen),remaining); + + if(retVal==-1){ + *pErrorCode = U_INVALID_CHAR_FOUND; + break; + }else if(retVal== remaining){/* should never occur */ + int numWritten = (pIntTarget-intTarget); + u_growAnyBufferFromStatic(NULL,(void**) &intTarget, + &intTargetCapacity, + intTargetCapacity * _BUFFER_CAPACITY_MULTIPLIER, + numWritten, + sizeof(wchar_t)); + pIntTarget = intTarget; + remaining=intTargetCapacity; + + if(nulLen!=count){ /*there are embedded nulls*/ + pIntTarget+=numWritten; + remaining-=numWritten; + } + + }else{ + int32_t nulVal; + /*scan for nulls */ + /* we donot check for limit since tempBuf is null terminated */ + while(tempBuf[nulLen++] != 0){ + } + nulVal = (nulLen < srcLength) ? 1 : 0; + pIntTarget = pIntTarget + retVal+nulVal; + remaining -=(retVal+nulVal); + + /* check if we have reached the source limit*/ + if(nulLen>=(count)){ + break; + } + } + } + count = (int32_t)(pIntTarget-intTarget); + + if(0 < count && count <= destCapacity){ + uprv_memcpy(dest, intTarget, (size_t)count*sizeof(wchar_t)); + } + + if(pDestLength){ + *pDestLength = count; + } + + /* free the allocated memory */ + uprv_free(intTarget); + + }else{ + *pErrorCode = U_MEMORY_ALLOCATION_ERROR; + } +cleanup: + /* are we still using stack buffer */ + if(stackBuffer != saveBuf){ + uprv_free(saveBuf); + } + u_terminateWChars(dest,destCapacity,count,pErrorCode); + + u_releaseDefaultConverter(conv); + + return dest; +} +#endif + +U_CAPI wchar_t* U_EXPORT2 +u_strToWCS(wchar_t *dest, + int32_t destCapacity, + int32_t *pDestLength, + const UChar *src, + int32_t srcLength, + UErrorCode *pErrorCode){ + + /* args check */ + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){ + return NULL; + } + + if( (src==NULL && srcLength!=0) || srcLength < -1 || + (destCapacity<0) || (dest == NULL && destCapacity > 0) + ) { + *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + +#ifdef U_WCHAR_IS_UTF16 + /* wchar_t is UTF-16 just do a memcpy */ + if(srcLength == -1){ + srcLength = u_strlen(src); + } + if(0 < srcLength && srcLength <= destCapacity){ + u_memcpy((UChar *)dest, src, srcLength); + } + if(pDestLength){ + *pDestLength = srcLength; + } + + u_terminateUChars((UChar *)dest,destCapacity,srcLength,pErrorCode); + + return dest; + +#elif defined U_WCHAR_IS_UTF32 + + return (wchar_t*)u_strToUTF32((UChar32*)dest, destCapacity, pDestLength, + src, srcLength, pErrorCode); + +#else + + return _strToWCS(dest,destCapacity,pDestLength,src,srcLength, pErrorCode); + +#endif + +} + +#if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) +/* helper function */ +static UChar* +_strFromWCS( UChar *dest, + int32_t destCapacity, + int32_t *pDestLength, + const wchar_t *src, + int32_t srcLength, + UErrorCode *pErrorCode) +{ + int32_t retVal =0, count =0 ; + UConverter* conv = NULL; + UChar* pTarget = NULL; + UChar* pTargetLimit = NULL; + UChar* target = NULL; + + UChar uStack [_STACK_BUFFER_CAPACITY]; + + wchar_t wStack[_STACK_BUFFER_CAPACITY]; + wchar_t* pWStack = wStack; + + + char cStack[_STACK_BUFFER_CAPACITY]; + int32_t cStackCap = _STACK_BUFFER_CAPACITY; + char* pCSrc=cStack; + char* pCSave=pCSrc; + char* pCSrcLimit=NULL; + + const wchar_t* pSrc = src; + const wchar_t* pSrcLimit = NULL; + + if(srcLength ==-1){ + /* if the wchar_t source is null terminated we can safely + * assume that there are no embedded nulls, this is a fast + * path for null terminated strings. + */ + for(;;){ + /* convert wchars to chars */ + retVal = uprv_wcstombs(pCSrc,src, cStackCap); + + if(retVal == -1){ + *pErrorCode = U_ILLEGAL_CHAR_FOUND; + goto cleanup; + }else if(retVal >= (cStackCap-1)){ + /* Should rarely occur */ + u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap, + cStackCap * _BUFFER_CAPACITY_MULTIPLIER, 0, sizeof(char)); + pCSave = pCSrc; + }else{ + /* converted every thing */ + pCSrc = pCSrc+retVal; + break; + } + } + + }else{ + /* here the source is not null terminated + * so it may have nulls embeded and we need to + * do some extra processing + */ + int32_t remaining =cStackCap; + + pSrcLimit = src + srcLength; + + for(;;){ + int32_t nulLen = 0; + + /* find nulls in the string */ + while(nulLen<srcLength && pSrc[nulLen++]!=0){ + } + + if((pSrc+nulLen) < pSrcLimit){ + /* check if we have enough room in pCSrc */ + if(remaining < (nulLen * MB_CUR_MAX)){ + /* should rarely occur */ + int32_t len = (pCSrc-pCSave); + pCSrc = pCSave; + /* we do not have enough room so grow the buffer*/ + u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap, + _BUFFER_CAPACITY_MULTIPLIER*cStackCap+(nulLen*MB_CUR_MAX),len,sizeof(char)); + + pCSave = pCSrc; + pCSrc = pCSave+len; + remaining = cStackCap-(pCSrc - pCSave); + } + + /* we have found a null so convert the + * chunk from begining of non-null char to null + */ + retVal = uprv_wcstombs(pCSrc,pSrc,remaining); + + if(retVal==-1){ + /* an error occurred bail out */ + *pErrorCode = U_ILLEGAL_CHAR_FOUND; + goto cleanup; + } + + pCSrc += retVal+1 /* already null terminated */; + + pSrc += nulLen; /* skip past the null */ + srcLength-=nulLen; /* decrement the srcLength */ + remaining -= (pCSrc-pCSave); + + + }else{ + /* the source is not null terminated and we are + * end of source so we copy the source to a temp buffer + * null terminate it and convert wchar_ts to chars + */ + if(nulLen >= _STACK_BUFFER_CAPACITY){ + /* Should rarely occcur */ + /* allocate new buffer buffer */ + pWStack =(wchar_t*) uprv_malloc(sizeof(wchar_t) * (nulLen + 1)); + if(pWStack==NULL){ + *pErrorCode = U_MEMORY_ALLOCATION_ERROR; + goto cleanup; + } + } + if(nulLen>0){ + /* copy the contents to tempStack */ + uprv_memcpy(pWStack, pSrc, (size_t)nulLen*sizeof(wchar_t)); + } + + /* null terminate the tempBuffer */ + pWStack[nulLen] =0 ; + + if(remaining < (nulLen * MB_CUR_MAX)){ + /* Should rarely occur */ + int32_t len = (pCSrc-pCSave); + pCSrc = pCSave; + /* we do not have enough room so grow the buffer*/ + u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap, + cStackCap+(nulLen*MB_CUR_MAX),len,sizeof(char)); + + pCSave = pCSrc; + pCSrc = pCSave+len; + remaining = cStackCap-(pCSrc - pCSave); + } + /* convert to chars */ + retVal = uprv_wcstombs(pCSrc,pWStack,remaining); + + pCSrc += retVal; + pSrc += nulLen; + srcLength-=nulLen; /* decrement the srcLength */ + break; + } + } + } + + /* OK..now we have converted from wchar_ts to chars now + * convert chars to UChars + */ + pCSrcLimit = pCSrc; + pCSrc = pCSave; + pTarget = target= dest; + pTargetLimit = dest + destCapacity; + + conv= u_getDefaultConverter(pErrorCode); + + if(U_FAILURE(*pErrorCode)|| conv==NULL){ + goto cleanup; + } + + for(;;) { + + *pErrorCode = U_ZERO_ERROR; + + /* convert to stack buffer*/ + ucnv_toUnicode(conv,&pTarget,pTargetLimit,(const char**)&pCSrc,pCSrcLimit,NULL,(UBool)(pCSrc==pCSrcLimit),pErrorCode); + + /* increment count to number written to stack */ + count+= pTarget - target; + + if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR){ + target = uStack; + pTarget = uStack; + pTargetLimit = uStack + _STACK_BUFFER_CAPACITY; + } else { + break; + } + + } + + if(pDestLength){ + *pDestLength =count; + } + + u_terminateUChars(dest,destCapacity,count,pErrorCode); + +cleanup: + + if(cStack != pCSave){ + uprv_free(pCSave); + } + + if(wStack != pWStack){ + uprv_free(pWStack); + } + + u_releaseDefaultConverter(conv); + + return dest; +} +#endif + +U_CAPI UChar* U_EXPORT2 +u_strFromWCS(UChar *dest, + int32_t destCapacity, + int32_t *pDestLength, + const wchar_t *src, + int32_t srcLength, + UErrorCode *pErrorCode) +{ + + /* args check */ + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){ + return NULL; + } + + if( (src==NULL && srcLength!=0) || srcLength < -1 || + (destCapacity<0) || (dest == NULL && destCapacity > 0) + ) { + *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + +#ifdef U_WCHAR_IS_UTF16 + /* wchar_t is UTF-16 just do a memcpy */ + if(srcLength == -1){ + srcLength = u_strlen((const UChar *)src); + } + if(0 < srcLength && srcLength <= destCapacity){ + u_memcpy(dest, (const UChar *)src, srcLength); + } + if(pDestLength){ + *pDestLength = srcLength; + } + + u_terminateUChars(dest,destCapacity,srcLength,pErrorCode); + + return dest; + +#elif defined U_WCHAR_IS_UTF32 + + return u_strFromUTF32(dest, destCapacity, pDestLength, + (UChar32*)src, srcLength, pErrorCode); + +#else + + return _strFromWCS(dest,destCapacity,pDestLength,src,srcLength,pErrorCode); + +#endif + +} + +#endif /* #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) && !UCONFIG_NO_CONVERSION */ diff --git a/thirdparty/icu4c/common/ustrcase.cpp b/thirdparty/icu4c/common/ustrcase.cpp new file mode 100644 index 0000000000..618e847c65 --- /dev/null +++ b/thirdparty/icu4c/common/ustrcase.cpp @@ -0,0 +1,1818 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2001-2015, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: ustrcase.cpp +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2002feb20 +* created by: Markus W. Scherer +* +* Implementation file for string casing C API functions. +* Uses functions from uchar.c for basic functionality that requires access +* to the Unicode Character Database (uprops.dat). +*/ + +#include "unicode/utypes.h" +#include "unicode/brkiter.h" +#include "unicode/casemap.h" +#include "unicode/edits.h" +#include "unicode/stringoptions.h" +#include "unicode/ustring.h" +#include "unicode/ucasemap.h" +#include "unicode/ubrk.h" +#include "unicode/utf.h" +#include "unicode/utf16.h" +#include "cmemory.h" +#include "ucase.h" +#include "ucasemap_imp.h" +#include "ustr_imp.h" +#include "uassert.h" + +U_NAMESPACE_BEGIN + +namespace { + +int32_t checkOverflowAndEditsError(int32_t destIndex, int32_t destCapacity, + Edits *edits, UErrorCode &errorCode) { + if (U_SUCCESS(errorCode)) { + if (destIndex > destCapacity) { + errorCode = U_BUFFER_OVERFLOW_ERROR; + } else if (edits != NULL) { + edits->copyErrorTo(errorCode); + } + } + return destIndex; +} + +/* Appends a full case mapping result, see UCASE_MAX_STRING_LENGTH. */ +inline int32_t +appendResult(UChar *dest, int32_t destIndex, int32_t destCapacity, + int32_t result, const UChar *s, + int32_t cpLength, uint32_t options, icu::Edits *edits) { + UChar32 c; + int32_t length; + + /* decode the result */ + if(result<0) { + /* (not) original code point */ + if(edits!=NULL) { + edits->addUnchanged(cpLength); + } + if(options & U_OMIT_UNCHANGED_TEXT) { + return destIndex; + } + c=~result; + if(destIndex<destCapacity && c<=0xffff) { // BMP slightly-fastpath + dest[destIndex++]=(UChar)c; + return destIndex; + } + length=cpLength; + } else { + if(result<=UCASE_MAX_STRING_LENGTH) { + c=U_SENTINEL; + length=result; + } else if(destIndex<destCapacity && result<=0xffff) { // BMP slightly-fastpath + dest[destIndex++]=(UChar)result; + if(edits!=NULL) { + edits->addReplace(cpLength, 1); + } + return destIndex; + } else { + c=result; + length=U16_LENGTH(c); + } + if(edits!=NULL) { + edits->addReplace(cpLength, length); + } + } + if(length>(INT32_MAX-destIndex)) { + return -1; // integer overflow + } + + if(destIndex<destCapacity) { + /* append the result */ + if(c>=0) { + /* code point */ + UBool isError=FALSE; + U16_APPEND(dest, destIndex, destCapacity, c, isError); + if(isError) { + /* overflow, nothing written */ + destIndex+=length; + } + } else { + /* string */ + if((destIndex+length)<=destCapacity) { + while(length>0) { + dest[destIndex++]=*s++; + --length; + } + } else { + /* overflow */ + destIndex+=length; + } + } + } else { + /* preflight */ + destIndex+=length; + } + return destIndex; +} + +inline int32_t +appendUChar(UChar *dest, int32_t destIndex, int32_t destCapacity, UChar c) { + if(destIndex<destCapacity) { + dest[destIndex]=c; + } else if(destIndex==INT32_MAX) { + return -1; // integer overflow + } + return destIndex+1; +} + +int32_t +appendNonEmptyUnchanged(UChar *dest, int32_t destIndex, int32_t destCapacity, + const UChar *s, int32_t length, uint32_t options, icu::Edits *edits) { + if(edits!=NULL) { + edits->addUnchanged(length); + } + if(options & U_OMIT_UNCHANGED_TEXT) { + return destIndex; + } + if(length>(INT32_MAX-destIndex)) { + return -1; // integer overflow + } + if((destIndex+length)<=destCapacity) { + u_memcpy(dest+destIndex, s, length); + } + return destIndex + length; +} + +inline int32_t +appendUnchanged(UChar *dest, int32_t destIndex, int32_t destCapacity, + const UChar *s, int32_t length, uint32_t options, icu::Edits *edits) { + if (length <= 0) { + return destIndex; + } + return appendNonEmptyUnchanged(dest, destIndex, destCapacity, s, length, options, edits); +} + +UChar32 U_CALLCONV +utf16_caseContextIterator(void *context, int8_t dir) { + UCaseContext *csc=(UCaseContext *)context; + UChar32 c; + + if(dir<0) { + /* reset for backward iteration */ + csc->index=csc->cpStart; + csc->dir=dir; + } else if(dir>0) { + /* reset for forward iteration */ + csc->index=csc->cpLimit; + csc->dir=dir; + } else { + /* continue current iteration direction */ + dir=csc->dir; + } + + if(dir<0) { + if(csc->start<csc->index) { + U16_PREV((const UChar *)csc->p, csc->start, csc->index, c); + return c; + } + } else { + if(csc->index<csc->limit) { + U16_NEXT((const UChar *)csc->p, csc->index, csc->limit, c); + return c; + } + } + return U_SENTINEL; +} + +/** + * caseLocale >= 0: Lowercases [srcStart..srcLimit[ but takes context [0..srcLength[ into account. + * caseLocale < 0: Case-folds [srcStart..srcLimit[. + */ +int32_t toLower(int32_t caseLocale, uint32_t options, + UChar *dest, int32_t destCapacity, + const UChar *src, UCaseContext *csc, int32_t srcStart, int32_t srcLimit, + icu::Edits *edits, UErrorCode &errorCode) { + const int8_t *latinToLower; + if (caseLocale == UCASE_LOC_ROOT || + (caseLocale >= 0 ? + !(caseLocale == UCASE_LOC_TURKISH || caseLocale == UCASE_LOC_LITHUANIAN) : + (options & _FOLD_CASE_OPTIONS_MASK) == U_FOLD_CASE_DEFAULT)) { + latinToLower = LatinCase::TO_LOWER_NORMAL; + } else { + latinToLower = LatinCase::TO_LOWER_TR_LT; + } + const UTrie2 *trie = ucase_getTrie(); + int32_t destIndex = 0; + int32_t prev = srcStart; + int32_t srcIndex = srcStart; + for (;;) { + // fast path for simple cases + UChar lead = 0; + while (srcIndex < srcLimit) { + lead = src[srcIndex]; + int32_t delta; + if (lead < LatinCase::LONG_S) { + int8_t d = latinToLower[lead]; + if (d == LatinCase::EXC) { break; } + ++srcIndex; + if (d == 0) { continue; } + delta = d; + } else if (lead >= 0xd800) { + break; // surrogate or higher + } else { + uint16_t props = UTRIE2_GET16_FROM_U16_SINGLE_LEAD(trie, lead); + if (UCASE_HAS_EXCEPTION(props)) { break; } + ++srcIndex; + if (!UCASE_IS_UPPER_OR_TITLE(props) || (delta = UCASE_GET_DELTA(props)) == 0) { + continue; + } + } + lead += static_cast<UChar>(delta); + destIndex = appendUnchanged(dest, destIndex, destCapacity, + src + prev, srcIndex - 1 - prev, options, edits); + if (destIndex >= 0) { + destIndex = appendUChar(dest, destIndex, destCapacity, lead); + if (edits != nullptr) { + edits->addReplace(1, 1); + } + } + if (destIndex < 0) { + errorCode = U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + prev = srcIndex; + } + if (srcIndex >= srcLimit) { + break; + } + // slow path + int32_t cpStart = srcIndex++; + UChar trail; + UChar32 c; + if (U16_IS_LEAD(lead) && srcIndex < srcLimit && U16_IS_TRAIL(trail = src[srcIndex])) { + c = U16_GET_SUPPLEMENTARY(lead, trail); + ++srcIndex; + } else { + c = lead; + } + const UChar *s; + if (caseLocale >= 0) { + csc->cpStart = cpStart; + csc->cpLimit = srcIndex; + c = ucase_toFullLower(c, utf16_caseContextIterator, csc, &s, caseLocale); + } else { + c = ucase_toFullFolding(c, &s, options); + } + if (c >= 0) { + destIndex = appendUnchanged(dest, destIndex, destCapacity, + src + prev, cpStart - prev, options, edits); + if (destIndex >= 0) { + destIndex = appendResult(dest, destIndex, destCapacity, c, s, + srcIndex - cpStart, options, edits); + } + if (destIndex < 0) { + errorCode = U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + prev = srcIndex; + } + } + destIndex = appendUnchanged(dest, destIndex, destCapacity, + src + prev, srcIndex - prev, options, edits); + if (destIndex < 0) { + errorCode = U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + return destIndex; +} + +int32_t toUpper(int32_t caseLocale, uint32_t options, + UChar *dest, int32_t destCapacity, + const UChar *src, UCaseContext *csc, int32_t srcLength, + icu::Edits *edits, UErrorCode &errorCode) { + const int8_t *latinToUpper; + if (caseLocale == UCASE_LOC_TURKISH) { + latinToUpper = LatinCase::TO_UPPER_TR; + } else { + latinToUpper = LatinCase::TO_UPPER_NORMAL; + } + const UTrie2 *trie = ucase_getTrie(); + int32_t destIndex = 0; + int32_t prev = 0; + int32_t srcIndex = 0; + for (;;) { + // fast path for simple cases + UChar lead = 0; + while (srcIndex < srcLength) { + lead = src[srcIndex]; + int32_t delta; + if (lead < LatinCase::LONG_S) { + int8_t d = latinToUpper[lead]; + if (d == LatinCase::EXC) { break; } + ++srcIndex; + if (d == 0) { continue; } + delta = d; + } else if (lead >= 0xd800) { + break; // surrogate or higher + } else { + uint16_t props = UTRIE2_GET16_FROM_U16_SINGLE_LEAD(trie, lead); + if (UCASE_HAS_EXCEPTION(props)) { break; } + ++srcIndex; + if (UCASE_GET_TYPE(props) != UCASE_LOWER || (delta = UCASE_GET_DELTA(props)) == 0) { + continue; + } + } + lead += static_cast<UChar>(delta); + destIndex = appendUnchanged(dest, destIndex, destCapacity, + src + prev, srcIndex - 1 - prev, options, edits); + if (destIndex >= 0) { + destIndex = appendUChar(dest, destIndex, destCapacity, lead); + if (edits != nullptr) { + edits->addReplace(1, 1); + } + } + if (destIndex < 0) { + errorCode = U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + prev = srcIndex; + } + if (srcIndex >= srcLength) { + break; + } + // slow path + int32_t cpStart; + csc->cpStart = cpStart = srcIndex++; + UChar trail; + UChar32 c; + if (U16_IS_LEAD(lead) && srcIndex < srcLength && U16_IS_TRAIL(trail = src[srcIndex])) { + c = U16_GET_SUPPLEMENTARY(lead, trail); + ++srcIndex; + } else { + c = lead; + } + csc->cpLimit = srcIndex; + const UChar *s; + c = ucase_toFullUpper(c, utf16_caseContextIterator, csc, &s, caseLocale); + if (c >= 0) { + destIndex = appendUnchanged(dest, destIndex, destCapacity, + src + prev, cpStart - prev, options, edits); + if (destIndex >= 0) { + destIndex = appendResult(dest, destIndex, destCapacity, c, s, + srcIndex - cpStart, options, edits); + } + if (destIndex < 0) { + errorCode = U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + prev = srcIndex; + } + } + destIndex = appendUnchanged(dest, destIndex, destCapacity, + src + prev, srcIndex - prev, options, edits); + if (destIndex < 0) { + errorCode = U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + return destIndex; +} + +} // namespace + +U_NAMESPACE_END + +U_NAMESPACE_USE + +#if !UCONFIG_NO_BREAK_ITERATION + +U_CFUNC int32_t U_CALLCONV +ustrcase_internalToTitle(int32_t caseLocale, uint32_t options, BreakIterator *iter, + UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + icu::Edits *edits, + UErrorCode &errorCode) { + if (!ustrcase_checkTitleAdjustmentOptions(options, errorCode)) { + return 0; + } + + /* set up local variables */ + UCaseContext csc=UCASECONTEXT_INITIALIZER; + csc.p=(void *)src; + csc.limit=srcLength; + int32_t destIndex=0; + int32_t prev=0; + UBool isFirstIndex=TRUE; + + /* titlecasing loop */ + while(prev<srcLength) { + /* find next index where to titlecase */ + int32_t index; + if(isFirstIndex) { + isFirstIndex=FALSE; + index=iter->first(); + } else { + index=iter->next(); + } + if(index==UBRK_DONE || index>srcLength) { + index=srcLength; + } + + /* + * Segment [prev..index[ into 3 parts: + * a) skipped characters (copy as-is) [prev..titleStart[ + * b) first letter (titlecase) [titleStart..titleLimit[ + * c) subsequent characters (lowercase) [titleLimit..index[ + */ + if(prev<index) { + // Find and copy skipped characters [prev..titleStart[ + int32_t titleStart=prev; + int32_t titleLimit=prev; + UChar32 c; + U16_NEXT(src, titleLimit, index, c); + if ((options&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0) { + // Adjust the titlecasing index to the next cased character, + // or to the next letter/number/symbol/private use. + // Stop with titleStart<titleLimit<=index + // if there is a character to be titlecased, + // or else stop with titleStart==titleLimit==index. + UBool toCased = (options&U_TITLECASE_ADJUST_TO_CASED) != 0; + while (toCased ? UCASE_NONE==ucase_getType(c) : !ustrcase_isLNS(c)) { + titleStart=titleLimit; + if(titleLimit==index) { + break; + } + U16_NEXT(src, titleLimit, index, c); + } + if (prev < titleStart) { + destIndex=appendUnchanged(dest, destIndex, destCapacity, + src+prev, titleStart-prev, options, edits); + if(destIndex<0) { + errorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + } + } + + if(titleStart<titleLimit) { + /* titlecase c which is from [titleStart..titleLimit[ */ + csc.cpStart=titleStart; + csc.cpLimit=titleLimit; + const UChar *s; + c=ucase_toFullTitle(c, utf16_caseContextIterator, &csc, &s, caseLocale); + destIndex=appendResult(dest, destIndex, destCapacity, c, s, + titleLimit-titleStart, options, edits); + if(destIndex<0) { + errorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + + /* Special case Dutch IJ titlecasing */ + if (titleStart+1 < index && + caseLocale == UCASE_LOC_DUTCH && + (src[titleStart] == 0x0049 || src[titleStart] == 0x0069)) { + if (src[titleStart+1] == 0x006A) { + destIndex=appendUChar(dest, destIndex, destCapacity, 0x004A); + if(destIndex<0) { + errorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + if(edits!=NULL) { + edits->addReplace(1, 1); + } + titleLimit++; + } else if (src[titleStart+1] == 0x004A) { + // Keep the capital J from getting lowercased. + destIndex=appendUnchanged(dest, destIndex, destCapacity, + src+titleStart+1, 1, options, edits); + if(destIndex<0) { + errorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + titleLimit++; + } + } + + /* lowercase [titleLimit..index[ */ + if(titleLimit<index) { + if((options&U_TITLECASE_NO_LOWERCASE)==0) { + /* Normal operation: Lowercase the rest of the word. */ + destIndex+= + toLower( + caseLocale, options, + dest+destIndex, destCapacity-destIndex, + src, &csc, titleLimit, index, + edits, errorCode); + if(errorCode==U_BUFFER_OVERFLOW_ERROR) { + errorCode=U_ZERO_ERROR; + } + if(U_FAILURE(errorCode)) { + return destIndex; + } + } else { + /* Optionally just copy the rest of the word unchanged. */ + destIndex=appendUnchanged(dest, destIndex, destCapacity, + src+titleLimit, index-titleLimit, options, edits); + if(destIndex<0) { + errorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + } + } + } + } + + prev=index; + } + + return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode); +} + +#endif // !UCONFIG_NO_BREAK_ITERATION + +U_NAMESPACE_BEGIN +namespace GreekUpper { + +// Data generated by prototype code, see +// http://site.icu-project.org/design/case/greek-upper +// TODO: Move this data into ucase.icu. +static const uint16_t data0370[] = { + // U+0370..03FF + 0x0370, + 0x0370, + 0x0372, + 0x0372, + 0, + 0, + 0x0376, + 0x0376, + 0, + 0, + 0x037A, + 0x03FD, + 0x03FE, + 0x03FF, + 0, + 0x037F, + 0, + 0, + 0, + 0, + 0, + 0, + 0x0391 | HAS_VOWEL | HAS_ACCENT, + 0, + 0x0395 | HAS_VOWEL | HAS_ACCENT, + 0x0397 | HAS_VOWEL | HAS_ACCENT, + 0x0399 | HAS_VOWEL | HAS_ACCENT, + 0, + 0x039F | HAS_VOWEL | HAS_ACCENT, + 0, + 0x03A5 | HAS_VOWEL | HAS_ACCENT, + 0x03A9 | HAS_VOWEL | HAS_ACCENT, + 0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA, + 0x0391 | HAS_VOWEL, + 0x0392, + 0x0393, + 0x0394, + 0x0395 | HAS_VOWEL, + 0x0396, + 0x0397 | HAS_VOWEL, + 0x0398, + 0x0399 | HAS_VOWEL, + 0x039A, + 0x039B, + 0x039C, + 0x039D, + 0x039E, + 0x039F | HAS_VOWEL, + 0x03A0, + 0x03A1, + 0, + 0x03A3, + 0x03A4, + 0x03A5 | HAS_VOWEL, + 0x03A6, + 0x03A7, + 0x03A8, + 0x03A9 | HAS_VOWEL, + 0x0399 | HAS_VOWEL | HAS_DIALYTIKA, + 0x03A5 | HAS_VOWEL | HAS_DIALYTIKA, + 0x0391 | HAS_VOWEL | HAS_ACCENT, + 0x0395 | HAS_VOWEL | HAS_ACCENT, + 0x0397 | HAS_VOWEL | HAS_ACCENT, + 0x0399 | HAS_VOWEL | HAS_ACCENT, + 0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA, + 0x0391 | HAS_VOWEL, + 0x0392, + 0x0393, + 0x0394, + 0x0395 | HAS_VOWEL, + 0x0396, + 0x0397 | HAS_VOWEL, + 0x0398, + 0x0399 | HAS_VOWEL, + 0x039A, + 0x039B, + 0x039C, + 0x039D, + 0x039E, + 0x039F | HAS_VOWEL, + 0x03A0, + 0x03A1, + 0x03A3, + 0x03A3, + 0x03A4, + 0x03A5 | HAS_VOWEL, + 0x03A6, + 0x03A7, + 0x03A8, + 0x03A9 | HAS_VOWEL, + 0x0399 | HAS_VOWEL | HAS_DIALYTIKA, + 0x03A5 | HAS_VOWEL | HAS_DIALYTIKA, + 0x039F | HAS_VOWEL | HAS_ACCENT, + 0x03A5 | HAS_VOWEL | HAS_ACCENT, + 0x03A9 | HAS_VOWEL | HAS_ACCENT, + 0x03CF, + 0x0392, + 0x0398, + 0x03D2, + 0x03D2 | HAS_ACCENT, + 0x03D2 | HAS_DIALYTIKA, + 0x03A6, + 0x03A0, + 0x03CF, + 0x03D8, + 0x03D8, + 0x03DA, + 0x03DA, + 0x03DC, + 0x03DC, + 0x03DE, + 0x03DE, + 0x03E0, + 0x03E0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0x039A, + 0x03A1, + 0x03F9, + 0x037F, + 0x03F4, + 0x0395 | HAS_VOWEL, + 0, + 0x03F7, + 0x03F7, + 0x03F9, + 0x03FA, + 0x03FA, + 0x03FC, + 0x03FD, + 0x03FE, + 0x03FF, +}; + +static const uint16_t data1F00[] = { + // U+1F00..1FFF + 0x0391 | HAS_VOWEL, + 0x0391 | HAS_VOWEL, + 0x0391 | HAS_VOWEL | HAS_ACCENT, + 0x0391 | HAS_VOWEL | HAS_ACCENT, + 0x0391 | HAS_VOWEL | HAS_ACCENT, + 0x0391 | HAS_VOWEL | HAS_ACCENT, + 0x0391 | HAS_VOWEL | HAS_ACCENT, + 0x0391 | HAS_VOWEL | HAS_ACCENT, + 0x0391 | HAS_VOWEL, + 0x0391 | HAS_VOWEL, + 0x0391 | HAS_VOWEL | HAS_ACCENT, + 0x0391 | HAS_VOWEL | HAS_ACCENT, + 0x0391 | HAS_VOWEL | HAS_ACCENT, + 0x0391 | HAS_VOWEL | HAS_ACCENT, + 0x0391 | HAS_VOWEL | HAS_ACCENT, + 0x0391 | HAS_VOWEL | HAS_ACCENT, + 0x0395 | HAS_VOWEL, + 0x0395 | HAS_VOWEL, + 0x0395 | HAS_VOWEL | HAS_ACCENT, + 0x0395 | HAS_VOWEL | HAS_ACCENT, + 0x0395 | HAS_VOWEL | HAS_ACCENT, + 0x0395 | HAS_VOWEL | HAS_ACCENT, + 0, + 0, + 0x0395 | HAS_VOWEL, + 0x0395 | HAS_VOWEL, + 0x0395 | HAS_VOWEL | HAS_ACCENT, + 0x0395 | HAS_VOWEL | HAS_ACCENT, + 0x0395 | HAS_VOWEL | HAS_ACCENT, + 0x0395 | HAS_VOWEL | HAS_ACCENT, + 0, + 0, + 0x0397 | HAS_VOWEL, + 0x0397 | HAS_VOWEL, + 0x0397 | HAS_VOWEL | HAS_ACCENT, + 0x0397 | HAS_VOWEL | HAS_ACCENT, + 0x0397 | HAS_VOWEL | HAS_ACCENT, + 0x0397 | HAS_VOWEL | HAS_ACCENT, + 0x0397 | HAS_VOWEL | HAS_ACCENT, + 0x0397 | HAS_VOWEL | HAS_ACCENT, + 0x0397 | HAS_VOWEL, + 0x0397 | HAS_VOWEL, + 0x0397 | HAS_VOWEL | HAS_ACCENT, + 0x0397 | HAS_VOWEL | HAS_ACCENT, + 0x0397 | HAS_VOWEL | HAS_ACCENT, + 0x0397 | HAS_VOWEL | HAS_ACCENT, + 0x0397 | HAS_VOWEL | HAS_ACCENT, + 0x0397 | HAS_VOWEL | HAS_ACCENT, + 0x0399 | HAS_VOWEL, + 0x0399 | HAS_VOWEL, + 0x0399 | HAS_VOWEL | HAS_ACCENT, + 0x0399 | HAS_VOWEL | HAS_ACCENT, + 0x0399 | HAS_VOWEL | HAS_ACCENT, + 0x0399 | HAS_VOWEL | HAS_ACCENT, + 0x0399 | HAS_VOWEL | HAS_ACCENT, + 0x0399 | HAS_VOWEL | HAS_ACCENT, + 0x0399 | HAS_VOWEL, + 0x0399 | HAS_VOWEL, + 0x0399 | HAS_VOWEL | HAS_ACCENT, + 0x0399 | HAS_VOWEL | HAS_ACCENT, + 0x0399 | HAS_VOWEL | HAS_ACCENT, + 0x0399 | HAS_VOWEL | HAS_ACCENT, + 0x0399 | HAS_VOWEL | HAS_ACCENT, + 0x0399 | HAS_VOWEL | HAS_ACCENT, + 0x039F | HAS_VOWEL, + 0x039F | HAS_VOWEL, + 0x039F | HAS_VOWEL | HAS_ACCENT, + 0x039F | HAS_VOWEL | HAS_ACCENT, + 0x039F | HAS_VOWEL | HAS_ACCENT, + 0x039F | HAS_VOWEL | HAS_ACCENT, + 0, + 0, + 0x039F | HAS_VOWEL, + 0x039F | HAS_VOWEL, + 0x039F | HAS_VOWEL | HAS_ACCENT, + 0x039F | HAS_VOWEL | HAS_ACCENT, + 0x039F | HAS_VOWEL | HAS_ACCENT, + 0x039F | HAS_VOWEL | HAS_ACCENT, + 0, + 0, + 0x03A5 | HAS_VOWEL, + 0x03A5 | HAS_VOWEL, + 0x03A5 | HAS_VOWEL | HAS_ACCENT, + 0x03A5 | HAS_VOWEL | HAS_ACCENT, + 0x03A5 | HAS_VOWEL | HAS_ACCENT, + 0x03A5 | HAS_VOWEL | HAS_ACCENT, + 0x03A5 | HAS_VOWEL | HAS_ACCENT, + 0x03A5 | HAS_VOWEL | HAS_ACCENT, + 0, + 0x03A5 | HAS_VOWEL, + 0, + 0x03A5 | HAS_VOWEL | HAS_ACCENT, + 0, + 0x03A5 | HAS_VOWEL | HAS_ACCENT, + 0, + 0x03A5 | HAS_VOWEL | HAS_ACCENT, + 0x03A9 | HAS_VOWEL, + 0x03A9 | HAS_VOWEL, + 0x03A9 | HAS_VOWEL | HAS_ACCENT, + 0x03A9 | HAS_VOWEL | HAS_ACCENT, + 0x03A9 | HAS_VOWEL | HAS_ACCENT, + 0x03A9 | HAS_VOWEL | HAS_ACCENT, + 0x03A9 | HAS_VOWEL | HAS_ACCENT, + 0x03A9 | HAS_VOWEL | HAS_ACCENT, + 0x03A9 | HAS_VOWEL, + 0x03A9 | HAS_VOWEL, + 0x03A9 | HAS_VOWEL | HAS_ACCENT, + 0x03A9 | HAS_VOWEL | HAS_ACCENT, + 0x03A9 | HAS_VOWEL | HAS_ACCENT, + 0x03A9 | HAS_VOWEL | HAS_ACCENT, + 0x03A9 | HAS_VOWEL | HAS_ACCENT, + 0x03A9 | HAS_VOWEL | HAS_ACCENT, + 0x0391 | HAS_VOWEL | HAS_ACCENT, + 0x0391 | HAS_VOWEL | HAS_ACCENT, + 0x0395 | HAS_VOWEL | HAS_ACCENT, + 0x0395 | HAS_VOWEL | HAS_ACCENT, + 0x0397 | HAS_VOWEL | HAS_ACCENT, + 0x0397 | HAS_VOWEL | HAS_ACCENT, + 0x0399 | HAS_VOWEL | HAS_ACCENT, + 0x0399 | HAS_VOWEL | HAS_ACCENT, + 0x039F | HAS_VOWEL | HAS_ACCENT, + 0x039F | HAS_VOWEL | HAS_ACCENT, + 0x03A5 | HAS_VOWEL | HAS_ACCENT, + 0x03A5 | HAS_VOWEL | HAS_ACCENT, + 0x03A9 | HAS_VOWEL | HAS_ACCENT, + 0x03A9 | HAS_VOWEL | HAS_ACCENT, + 0, + 0, + 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI, + 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI, + 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, + 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, + 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, + 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, + 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, + 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, + 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI, + 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI, + 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, + 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, + 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, + 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, + 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, + 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, + 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI, + 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI, + 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, + 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, + 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, + 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, + 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, + 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, + 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI, + 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI, + 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, + 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, + 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, + 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, + 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, + 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, + 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI, + 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI, + 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, + 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, + 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, + 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, + 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, + 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, + 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI, + 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI, + 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, + 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, + 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, + 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, + 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, + 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, + 0x0391 | HAS_VOWEL, + 0x0391 | HAS_VOWEL, + 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, + 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI, + 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, + 0, + 0x0391 | HAS_VOWEL | HAS_ACCENT, + 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, + 0x0391 | HAS_VOWEL, + 0x0391 | HAS_VOWEL, + 0x0391 | HAS_VOWEL | HAS_ACCENT, + 0x0391 | HAS_VOWEL | HAS_ACCENT, + 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI, + 0, + 0x0399 | HAS_VOWEL, + 0, + 0, + 0, + 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, + 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI, + 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, + 0, + 0x0397 | HAS_VOWEL | HAS_ACCENT, + 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, + 0x0395 | HAS_VOWEL | HAS_ACCENT, + 0x0395 | HAS_VOWEL | HAS_ACCENT, + 0x0397 | HAS_VOWEL | HAS_ACCENT, + 0x0397 | HAS_VOWEL | HAS_ACCENT, + 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI, + 0, + 0, + 0, + 0x0399 | HAS_VOWEL, + 0x0399 | HAS_VOWEL, + 0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA, + 0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA, + 0, + 0, + 0x0399 | HAS_VOWEL | HAS_ACCENT, + 0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA, + 0x0399 | HAS_VOWEL, + 0x0399 | HAS_VOWEL, + 0x0399 | HAS_VOWEL | HAS_ACCENT, + 0x0399 | HAS_VOWEL | HAS_ACCENT, + 0, + 0, + 0, + 0, + 0x03A5 | HAS_VOWEL, + 0x03A5 | HAS_VOWEL, + 0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA, + 0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA, + 0x03A1, + 0x03A1, + 0x03A5 | HAS_VOWEL | HAS_ACCENT, + 0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA, + 0x03A5 | HAS_VOWEL, + 0x03A5 | HAS_VOWEL, + 0x03A5 | HAS_VOWEL | HAS_ACCENT, + 0x03A5 | HAS_VOWEL | HAS_ACCENT, + 0x03A1, + 0, + 0, + 0, + 0, + 0, + 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, + 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI, + 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, + 0, + 0x03A9 | HAS_VOWEL | HAS_ACCENT, + 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, + 0x039F | HAS_VOWEL | HAS_ACCENT, + 0x039F | HAS_VOWEL | HAS_ACCENT, + 0x03A9 | HAS_VOWEL | HAS_ACCENT, + 0x03A9 | HAS_VOWEL | HAS_ACCENT, + 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI, + 0, + 0, + 0, +}; + +// U+2126 Ohm sign +static const uint16_t data2126 = 0x03A9 | HAS_VOWEL; + +uint32_t getLetterData(UChar32 c) { + if (c < 0x370 || 0x2126 < c || (0x3ff < c && c < 0x1f00)) { + return 0; + } else if (c <= 0x3ff) { + return data0370[c - 0x370]; + } else if (c <= 0x1fff) { + return data1F00[c - 0x1f00]; + } else if (c == 0x2126) { + return data2126; + } else { + return 0; + } +} + +uint32_t getDiacriticData(UChar32 c) { + switch (c) { + case 0x0300: // varia + case 0x0301: // tonos = oxia + case 0x0342: // perispomeni + case 0x0302: // circumflex can look like perispomeni + case 0x0303: // tilde can look like perispomeni + case 0x0311: // inverted breve can look like perispomeni + return HAS_ACCENT; + case 0x0308: // dialytika = diaeresis + return HAS_COMBINING_DIALYTIKA; + case 0x0344: // dialytika tonos + return HAS_COMBINING_DIALYTIKA | HAS_ACCENT; + case 0x0345: // ypogegrammeni = iota subscript + return HAS_YPOGEGRAMMENI; + case 0x0304: // macron + case 0x0306: // breve + case 0x0313: // comma above + case 0x0314: // reversed comma above + case 0x0343: // koronis + return HAS_OTHER_GREEK_DIACRITIC; + default: + return 0; + } +} + +UBool isFollowedByCasedLetter(const UChar *s, int32_t i, int32_t length) { + while (i < length) { + UChar32 c; + U16_NEXT(s, i, length, c); + int32_t type = ucase_getTypeOrIgnorable(c); + if ((type & UCASE_IGNORABLE) != 0) { + // Case-ignorable, continue with the loop. + } else if (type != UCASE_NONE) { + return TRUE; // Followed by cased letter. + } else { + return FALSE; // Uncased and not case-ignorable. + } + } + return FALSE; // Not followed by cased letter. +} + +/** + * Greek string uppercasing with a state machine. + * Probably simpler than a stateless function that has to figure out complex context-before + * for each character. + * TODO: Try to re-consolidate one way or another with the non-Greek function. + */ +int32_t toUpper(uint32_t options, + UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + Edits *edits, + UErrorCode &errorCode) { + int32_t destIndex=0; + uint32_t state = 0; + for (int32_t i = 0; i < srcLength;) { + int32_t nextIndex = i; + UChar32 c; + U16_NEXT(src, nextIndex, srcLength, c); + uint32_t nextState = 0; + int32_t type = ucase_getTypeOrIgnorable(c); + if ((type & UCASE_IGNORABLE) != 0) { + // c is case-ignorable + nextState |= (state & AFTER_CASED); + } else if (type != UCASE_NONE) { + // c is cased + nextState |= AFTER_CASED; + } + uint32_t data = getLetterData(c); + if (data > 0) { + uint32_t upper = data & UPPER_MASK; + // Add a dialytika to this iota or ypsilon vowel + // if we removed a tonos from the previous vowel, + // and that previous vowel did not also have (or gain) a dialytika. + // Adding one only to the final vowel in a longer sequence + // (which does not occur in normal writing) would require lookahead. + // Set the same flag as for preserving an existing dialytika. + if ((data & HAS_VOWEL) != 0 && (state & AFTER_VOWEL_WITH_ACCENT) != 0 && + (upper == 0x399 || upper == 0x3A5)) { + data |= HAS_DIALYTIKA; + } + int32_t numYpogegrammeni = 0; // Map each one to a trailing, spacing, capital iota. + if ((data & HAS_YPOGEGRAMMENI) != 0) { + numYpogegrammeni = 1; + } + // Skip combining diacritics after this Greek letter. + while (nextIndex < srcLength) { + uint32_t diacriticData = getDiacriticData(src[nextIndex]); + if (diacriticData != 0) { + data |= diacriticData; + if ((diacriticData & HAS_YPOGEGRAMMENI) != 0) { + ++numYpogegrammeni; + } + ++nextIndex; + } else { + break; // not a Greek diacritic + } + } + if ((data & HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA) == HAS_VOWEL_AND_ACCENT) { + nextState |= AFTER_VOWEL_WITH_ACCENT; + } + // Map according to Greek rules. + UBool addTonos = FALSE; + if (upper == 0x397 && + (data & HAS_ACCENT) != 0 && + numYpogegrammeni == 0 && + (state & AFTER_CASED) == 0 && + !isFollowedByCasedLetter(src, nextIndex, srcLength)) { + // Keep disjunctive "or" with (only) a tonos. + // We use the same "word boundary" conditions as for the Final_Sigma test. + if (i == nextIndex) { + upper = 0x389; // Preserve the precomposed form. + } else { + addTonos = TRUE; + } + } else if ((data & HAS_DIALYTIKA) != 0) { + // Preserve a vowel with dialytika in precomposed form if it exists. + if (upper == 0x399) { + upper = 0x3AA; + data &= ~HAS_EITHER_DIALYTIKA; + } else if (upper == 0x3A5) { + upper = 0x3AB; + data &= ~HAS_EITHER_DIALYTIKA; + } + } + + UBool change; + if (edits == nullptr && (options & U_OMIT_UNCHANGED_TEXT) == 0) { + change = TRUE; // common, simple usage + } else { + // Find out first whether we are changing the text. + change = src[i] != upper || numYpogegrammeni > 0; + int32_t i2 = i + 1; + if ((data & HAS_EITHER_DIALYTIKA) != 0) { + change |= i2 >= nextIndex || src[i2] != 0x308; + ++i2; + } + if (addTonos) { + change |= i2 >= nextIndex || src[i2] != 0x301; + ++i2; + } + int32_t oldLength = nextIndex - i; + int32_t newLength = (i2 - i) + numYpogegrammeni; + change |= oldLength != newLength; + if (change) { + if (edits != NULL) { + edits->addReplace(oldLength, newLength); + } + } else { + if (edits != NULL) { + edits->addUnchanged(oldLength); + } + // Write unchanged text? + change = (options & U_OMIT_UNCHANGED_TEXT) == 0; + } + } + + if (change) { + destIndex=appendUChar(dest, destIndex, destCapacity, (UChar)upper); + if (destIndex >= 0 && (data & HAS_EITHER_DIALYTIKA) != 0) { + destIndex=appendUChar(dest, destIndex, destCapacity, 0x308); // restore or add a dialytika + } + if (destIndex >= 0 && addTonos) { + destIndex=appendUChar(dest, destIndex, destCapacity, 0x301); + } + while (destIndex >= 0 && numYpogegrammeni > 0) { + destIndex=appendUChar(dest, destIndex, destCapacity, 0x399); + --numYpogegrammeni; + } + if(destIndex<0) { + errorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + } + } else { + const UChar *s; + c=ucase_toFullUpper(c, NULL, NULL, &s, UCASE_LOC_GREEK); + destIndex = appendResult(dest, destIndex, destCapacity, c, s, + nextIndex - i, options, edits); + if (destIndex < 0) { + errorCode = U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + } + i = nextIndex; + state = nextState; + } + + return destIndex; +} + +} // namespace GreekUpper +U_NAMESPACE_END + +/* functions available in the common library (for unistr_case.cpp) */ + +U_CFUNC int32_t U_CALLCONV +ustrcase_internalToLower(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED + UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + icu::Edits *edits, + UErrorCode &errorCode) { + UCaseContext csc=UCASECONTEXT_INITIALIZER; + csc.p=(void *)src; + csc.limit=srcLength; + int32_t destIndex = toLower( + caseLocale, options, + dest, destCapacity, + src, &csc, 0, srcLength, + edits, errorCode); + return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode); +} + +U_CFUNC int32_t U_CALLCONV +ustrcase_internalToUpper(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED + UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + icu::Edits *edits, + UErrorCode &errorCode) { + int32_t destIndex; + if (caseLocale == UCASE_LOC_GREEK) { + destIndex = GreekUpper::toUpper(options, dest, destCapacity, + src, srcLength, edits, errorCode); + } else { + UCaseContext csc=UCASECONTEXT_INITIALIZER; + csc.p=(void *)src; + csc.limit=srcLength; + destIndex = toUpper( + caseLocale, options, + dest, destCapacity, + src, &csc, srcLength, + edits, errorCode); + } + return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode); +} + +U_CFUNC int32_t U_CALLCONV +ustrcase_internalFold(int32_t /* caseLocale */, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED + UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + icu::Edits *edits, + UErrorCode &errorCode) { + int32_t destIndex = toLower( + -1, options, + dest, destCapacity, + src, nullptr, 0, srcLength, + edits, errorCode); + return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode); +} + +U_CFUNC int32_t +ustrcase_map(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM + UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + UStringCaseMapper *stringCaseMapper, + icu::Edits *edits, + UErrorCode &errorCode) { + int32_t destLength; + + /* check argument values */ + if(U_FAILURE(errorCode)) { + return 0; + } + if( destCapacity<0 || + (dest==NULL && destCapacity>0) || + src==NULL || + srcLength<-1 + ) { + errorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + /* get the string length */ + if(srcLength==-1) { + srcLength=u_strlen(src); + } + + /* check for overlapping source and destination */ + if( dest!=NULL && + ((src>=dest && src<(dest+destCapacity)) || + (dest>=src && dest<(src+srcLength))) + ) { + errorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) { + edits->reset(); + } + destLength=stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR + dest, destCapacity, src, srcLength, edits, errorCode); + return u_terminateUChars(dest, destCapacity, destLength, &errorCode); +} + +U_CFUNC int32_t +ustrcase_mapWithOverlap(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM + UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + UStringCaseMapper *stringCaseMapper, + UErrorCode &errorCode) { + UChar buffer[300]; + UChar *temp; + + int32_t destLength; + + /* check argument values */ + if(U_FAILURE(errorCode)) { + return 0; + } + if( destCapacity<0 || + (dest==NULL && destCapacity>0) || + src==NULL || + srcLength<-1 + ) { + errorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + /* get the string length */ + if(srcLength==-1) { + srcLength=u_strlen(src); + } + + /* check for overlapping source and destination */ + if( dest!=NULL && + ((src>=dest && src<(dest+destCapacity)) || + (dest>=src && dest<(src+srcLength))) + ) { + /* overlap: provide a temporary destination buffer and later copy the result */ + if(destCapacity<=UPRV_LENGTHOF(buffer)) { + /* the stack buffer is large enough */ + temp=buffer; + } else { + /* allocate a buffer */ + temp=(UChar *)uprv_malloc(destCapacity*U_SIZEOF_UCHAR); + if(temp==NULL) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + return 0; + } + } + } else { + temp=dest; + } + + destLength=stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR + temp, destCapacity, src, srcLength, NULL, errorCode); + if(temp!=dest) { + /* copy the result string to the destination buffer */ + if (U_SUCCESS(errorCode) && 0 < destLength && destLength <= destCapacity) { + u_memmove(dest, temp, destLength); + } + if(temp!=buffer) { + uprv_free(temp); + } + } + + return u_terminateUChars(dest, destCapacity, destLength, &errorCode); +} + +/* public API functions */ + +U_CAPI int32_t U_EXPORT2 +u_strFoldCase(UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + uint32_t options, + UErrorCode *pErrorCode) { + return ustrcase_mapWithOverlap( + UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL + dest, destCapacity, + src, srcLength, + ustrcase_internalFold, *pErrorCode); +} + +U_NAMESPACE_BEGIN + +int32_t CaseMap::fold( + uint32_t options, + const UChar *src, int32_t srcLength, + UChar *dest, int32_t destCapacity, Edits *edits, + UErrorCode &errorCode) { + return ustrcase_map( + UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL + dest, destCapacity, + src, srcLength, + ustrcase_internalFold, edits, errorCode); +} + +U_NAMESPACE_END + +/* case-insensitive string comparisons -------------------------------------- */ + +/* + * This function is a copy of unorm_cmpEquivFold() minus the parts for + * canonical equivalence. + * Keep the functions in sync, and see there for how this works. + * The duplication is for modularization: + * It makes caseless (but not canonical caseless) matches independent of + * the normalization code. + */ + +/* stack element for previous-level source/decomposition pointers */ +struct CmpEquivLevel { + const UChar *start, *s, *limit; +}; +typedef struct CmpEquivLevel CmpEquivLevel; + +/** + * Internal implementation code comparing string with case fold. + * This function is called from u_strcmpFold() and u_caseInsensitivePrefixMatch(). + * + * @param s1 input string 1 + * @param length1 length of string 1, or -1 (NULL terminated) + * @param s2 input string 2 + * @param length2 length of string 2, or -1 (NULL terminated) + * @param options compare options + * @param matchLen1 (output) length of partial prefix match in s1 + * @param matchLen2 (output) length of partial prefix match in s2 + * @param pErrorCode receives error status + * @return The result of comparison + */ +static int32_t _cmpFold( + const UChar *s1, int32_t length1, + const UChar *s2, int32_t length2, + uint32_t options, + int32_t *matchLen1, int32_t *matchLen2, + UErrorCode *pErrorCode) { + int32_t cmpRes = 0; + + /* current-level start/limit - s1/s2 as current */ + const UChar *start1, *start2, *limit1, *limit2; + + /* points to the original start address */ + const UChar *org1, *org2; + + /* points to the end of match + 1 */ + const UChar *m1, *m2; + + /* case folding variables */ + const UChar *p; + int32_t length; + + /* stacks of previous-level start/current/limit */ + CmpEquivLevel stack1[2], stack2[2]; + + /* case folding buffers, only use current-level start/limit */ + UChar fold1[UCASE_MAX_STRING_LENGTH+1], fold2[UCASE_MAX_STRING_LENGTH+1]; + + /* track which is the current level per string */ + int32_t level1, level2; + + /* current code units, and code points for lookups */ + UChar32 c1, c2, cp1, cp2; + + /* no argument error checking because this itself is not an API */ + + /* + * assume that at least the option U_COMPARE_IGNORE_CASE is set + * otherwise this function would have to behave exactly as uprv_strCompare() + */ + if(U_FAILURE(*pErrorCode)) { + return 0; + } + + /* initialize */ + if(matchLen1) { + U_ASSERT(matchLen2 !=NULL); + *matchLen1=0; + *matchLen2=0; + } + + start1=m1=org1=s1; + if(length1==-1) { + limit1=NULL; + } else { + limit1=s1+length1; + } + + start2=m2=org2=s2; + if(length2==-1) { + limit2=NULL; + } else { + limit2=s2+length2; + } + + level1=level2=0; + c1=c2=-1; + + /* comparison loop */ + for(;;) { + /* + * here a code unit value of -1 means "get another code unit" + * below it will mean "this source is finished" + */ + + if(c1<0) { + /* get next code unit from string 1, post-increment */ + for(;;) { + if(s1==limit1 || ((c1=*s1)==0 && (limit1==NULL || (options&_STRNCMP_STYLE)))) { + if(level1==0) { + c1=-1; + break; + } + } else { + ++s1; + break; + } + + /* reached end of level buffer, pop one level */ + do { + --level1; + start1=stack1[level1].start; /*Not uninitialized*/ + } while(start1==NULL); + s1=stack1[level1].s; /*Not uninitialized*/ + limit1=stack1[level1].limit; /*Not uninitialized*/ + } + } + + if(c2<0) { + /* get next code unit from string 2, post-increment */ + for(;;) { + if(s2==limit2 || ((c2=*s2)==0 && (limit2==NULL || (options&_STRNCMP_STYLE)))) { + if(level2==0) { + c2=-1; + break; + } + } else { + ++s2; + break; + } + + /* reached end of level buffer, pop one level */ + do { + --level2; + start2=stack2[level2].start; /*Not uninitialized*/ + } while(start2==NULL); + s2=stack2[level2].s; /*Not uninitialized*/ + limit2=stack2[level2].limit; /*Not uninitialized*/ + } + } + + /* + * compare c1 and c2 + * either variable c1, c2 is -1 only if the corresponding string is finished + */ + if(c1==c2) { + const UChar *next1, *next2; + + if(c1<0) { + cmpRes=0; /* c1==c2==-1 indicating end of strings */ + break; + } + + /* + * Note: Move the match positions in both strings at the same time + * only when corresponding code point(s) in the original strings + * are fully consumed. For example, when comparing s1="Fust" and + * s2="Fu\u00dfball", s2[2] is folded into "ss", and s1[2] matches + * the first code point in the case-folded data. But the second "s" + * has no matching code point in s1, so this implementation returns + * 2 as the prefix match length ("Fu"). + */ + next1=next2=NULL; + if(level1==0) { + next1=s1; + } else if(s1==limit1) { + /* Note: This implementation only use a single level of stack. + * If this code needs to be changed to use multiple levels + * of stacks, the code above should check if the current + * code is at the end of all stacks. + */ + U_ASSERT(level1==1); + + /* is s1 at the end of the current stack? */ + next1=stack1[0].s; + } + + if (next1!=NULL) { + if(level2==0) { + next2=s2; + } else if(s2==limit2) { + U_ASSERT(level2==1); + + /* is s2 at the end of the current stack? */ + next2=stack2[0].s; + } + if(next2!=NULL) { + m1=next1; + m2=next2; + } + } + c1=c2=-1; /* make us fetch new code units */ + continue; + } else if(c1<0) { + cmpRes=-1; /* string 1 ends before string 2 */ + break; + } else if(c2<0) { + cmpRes=1; /* string 2 ends before string 1 */ + break; + } + /* c1!=c2 && c1>=0 && c2>=0 */ + + /* get complete code points for c1, c2 for lookups if either is a surrogate */ + cp1=c1; + if(U_IS_SURROGATE(c1)) { + UChar c; + + if(U_IS_SURROGATE_LEAD(c1)) { + if(s1!=limit1 && U16_IS_TRAIL(c=*s1)) { + /* advance ++s1; only below if cp1 decomposes/case-folds */ + cp1=U16_GET_SUPPLEMENTARY(c1, c); + } + } else /* isTrail(c1) */ { + if(start1<=(s1-2) && U16_IS_LEAD(c=*(s1-2))) { + cp1=U16_GET_SUPPLEMENTARY(c, c1); + } + } + } + + cp2=c2; + if(U_IS_SURROGATE(c2)) { + UChar c; + + if(U_IS_SURROGATE_LEAD(c2)) { + if(s2!=limit2 && U16_IS_TRAIL(c=*s2)) { + /* advance ++s2; only below if cp2 decomposes/case-folds */ + cp2=U16_GET_SUPPLEMENTARY(c2, c); + } + } else /* isTrail(c2) */ { + if(start2<=(s2-2) && U16_IS_LEAD(c=*(s2-2))) { + cp2=U16_GET_SUPPLEMENTARY(c, c2); + } + } + } + + /* + * go down one level for each string + * continue with the main loop as soon as there is a real change + */ + + if( level1==0 && + (length=ucase_toFullFolding((UChar32)cp1, &p, options))>=0 + ) { + /* cp1 case-folds to the code point "length" or to p[length] */ + if(U_IS_SURROGATE(c1)) { + if(U_IS_SURROGATE_LEAD(c1)) { + /* advance beyond source surrogate pair if it case-folds */ + ++s1; + } else /* isTrail(c1) */ { + /* + * we got a supplementary code point when hitting its trail surrogate, + * therefore the lead surrogate must have been the same as in the other string; + * compare this decomposition with the lead surrogate in the other string + * remember that this simulates bulk text replacement: + * the decomposition would replace the entire code point + */ + --s2; + --m2; + c2=*(s2-1); + } + } + + /* push current level pointers */ + stack1[0].start=start1; + stack1[0].s=s1; + stack1[0].limit=limit1; + ++level1; + + /* copy the folding result to fold1[] */ + if(length<=UCASE_MAX_STRING_LENGTH) { + u_memcpy(fold1, p, length); + } else { + int32_t i=0; + U16_APPEND_UNSAFE(fold1, i, length); + length=i; + } + + /* set next level pointers to case folding */ + start1=s1=fold1; + limit1=fold1+length; + + /* get ready to read from decomposition, continue with loop */ + c1=-1; + continue; + } + + if( level2==0 && + (length=ucase_toFullFolding((UChar32)cp2, &p, options))>=0 + ) { + /* cp2 case-folds to the code point "length" or to p[length] */ + if(U_IS_SURROGATE(c2)) { + if(U_IS_SURROGATE_LEAD(c2)) { + /* advance beyond source surrogate pair if it case-folds */ + ++s2; + } else /* isTrail(c2) */ { + /* + * we got a supplementary code point when hitting its trail surrogate, + * therefore the lead surrogate must have been the same as in the other string; + * compare this decomposition with the lead surrogate in the other string + * remember that this simulates bulk text replacement: + * the decomposition would replace the entire code point + */ + --s1; + --m2; + c1=*(s1-1); + } + } + + /* push current level pointers */ + stack2[0].start=start2; + stack2[0].s=s2; + stack2[0].limit=limit2; + ++level2; + + /* copy the folding result to fold2[] */ + if(length<=UCASE_MAX_STRING_LENGTH) { + u_memcpy(fold2, p, length); + } else { + int32_t i=0; + U16_APPEND_UNSAFE(fold2, i, length); + length=i; + } + + /* set next level pointers to case folding */ + start2=s2=fold2; + limit2=fold2+length; + + /* get ready to read from decomposition, continue with loop */ + c2=-1; + continue; + } + + /* + * no decomposition/case folding, max level for both sides: + * return difference result + * + * code point order comparison must not just return cp1-cp2 + * because when single surrogates are present then the surrogate pairs + * that formed cp1 and cp2 may be from different string indexes + * + * example: { d800 d800 dc01 } vs. { d800 dc00 }, compare at second code units + * c1=d800 cp1=10001 c2=dc00 cp2=10000 + * cp1-cp2>0 but c1-c2<0 and in fact in UTF-32 it is { d800 10001 } < { 10000 } + * + * therefore, use same fix-up as in ustring.c/uprv_strCompare() + * except: uprv_strCompare() fetches c=*s while this functions fetches c=*s++ + * so we have slightly different pointer/start/limit comparisons here + */ + + if(c1>=0xd800 && c2>=0xd800 && (options&U_COMPARE_CODE_POINT_ORDER)) { + /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */ + if( + (c1<=0xdbff && s1!=limit1 && U16_IS_TRAIL(*s1)) || + (U16_IS_TRAIL(c1) && start1!=(s1-1) && U16_IS_LEAD(*(s1-2))) + ) { + /* part of a surrogate pair, leave >=d800 */ + } else { + /* BMP code point - may be surrogate code point - make <d800 */ + c1-=0x2800; + } + + if( + (c2<=0xdbff && s2!=limit2 && U16_IS_TRAIL(*s2)) || + (U16_IS_TRAIL(c2) && start2!=(s2-1) && U16_IS_LEAD(*(s2-2))) + ) { + /* part of a surrogate pair, leave >=d800 */ + } else { + /* BMP code point - may be surrogate code point - make <d800 */ + c2-=0x2800; + } + } + + cmpRes=c1-c2; + break; + } + + if(matchLen1) { + *matchLen1=static_cast<int32_t>(m1-org1); + *matchLen2=static_cast<int32_t>(m2-org2); + } + return cmpRes; +} + +/* internal function */ +U_CFUNC int32_t +u_strcmpFold(const UChar *s1, int32_t length1, + const UChar *s2, int32_t length2, + uint32_t options, + UErrorCode *pErrorCode) { + return _cmpFold(s1, length1, s2, length2, options, NULL, NULL, pErrorCode); +} + +/* public API functions */ + +U_CAPI int32_t U_EXPORT2 +u_strCaseCompare(const UChar *s1, int32_t length1, + const UChar *s2, int32_t length2, + uint32_t options, + UErrorCode *pErrorCode) { + /* argument checking */ + if(pErrorCode==0 || U_FAILURE(*pErrorCode)) { + return 0; + } + if(s1==NULL || length1<-1 || s2==NULL || length2<-1) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + return u_strcmpFold(s1, length1, s2, length2, + options|U_COMPARE_IGNORE_CASE, + pErrorCode); +} + +U_CAPI int32_t U_EXPORT2 +u_strcasecmp(const UChar *s1, const UChar *s2, uint32_t options) { + UErrorCode errorCode=U_ZERO_ERROR; + return u_strcmpFold(s1, -1, s2, -1, + options|U_COMPARE_IGNORE_CASE, + &errorCode); +} + +U_CAPI int32_t U_EXPORT2 +u_memcasecmp(const UChar *s1, const UChar *s2, int32_t length, uint32_t options) { + UErrorCode errorCode=U_ZERO_ERROR; + return u_strcmpFold(s1, length, s2, length, + options|U_COMPARE_IGNORE_CASE, + &errorCode); +} + +U_CAPI int32_t U_EXPORT2 +u_strncasecmp(const UChar *s1, const UChar *s2, int32_t n, uint32_t options) { + UErrorCode errorCode=U_ZERO_ERROR; + return u_strcmpFold(s1, n, s2, n, + options|(U_COMPARE_IGNORE_CASE|_STRNCMP_STYLE), + &errorCode); +} + +/* internal API - detect length of shared prefix */ +U_CAPI void +u_caseInsensitivePrefixMatch(const UChar *s1, int32_t length1, + const UChar *s2, int32_t length2, + uint32_t options, + int32_t *matchLen1, int32_t *matchLen2, + UErrorCode *pErrorCode) { + _cmpFold(s1, length1, s2, length2, options, + matchLen1, matchLen2, pErrorCode); +} diff --git a/thirdparty/icu4c/common/ustrcase_locale.cpp b/thirdparty/icu4c/common/ustrcase_locale.cpp new file mode 100644 index 0000000000..2ecd24f03e --- /dev/null +++ b/thirdparty/icu4c/common/ustrcase_locale.cpp @@ -0,0 +1,94 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2011, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* file name: ustrcase_locale.cpp +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2011may31 +* created by: Markus W. Scherer +* +* Locale-sensitive case mapping functions (ones that call uloc_getDefault()) +* were moved here to break dependency cycles among parts of the common library. +*/ + +#include "unicode/utypes.h" +#include "uassert.h" +#include "unicode/brkiter.h" +#include "unicode/casemap.h" +#include "unicode/ucasemap.h" +#include "unicode/uloc.h" +#include "unicode/ustring.h" +#include "ucase.h" +#include "ucasemap_imp.h" + +U_CFUNC int32_t +ustrcase_getCaseLocale(const char *locale) { + if (locale == NULL) { + locale = uloc_getDefault(); + } + if (*locale == 0) { + return UCASE_LOC_ROOT; + } else { + return ucase_getCaseLocale(locale); + } +} + +/* public API functions */ + +U_CAPI int32_t U_EXPORT2 +u_strToLower(UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + const char *locale, + UErrorCode *pErrorCode) { + return ustrcase_mapWithOverlap( + ustrcase_getCaseLocale(locale), 0, UCASEMAP_BREAK_ITERATOR_NULL + dest, destCapacity, + src, srcLength, + ustrcase_internalToLower, *pErrorCode); +} + +U_CAPI int32_t U_EXPORT2 +u_strToUpper(UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + const char *locale, + UErrorCode *pErrorCode) { + return ustrcase_mapWithOverlap( + ustrcase_getCaseLocale(locale), 0, UCASEMAP_BREAK_ITERATOR_NULL + dest, destCapacity, + src, srcLength, + ustrcase_internalToUpper, *pErrorCode); +} + +U_NAMESPACE_BEGIN + +int32_t CaseMap::toLower( + const char *locale, uint32_t options, + const UChar *src, int32_t srcLength, + UChar *dest, int32_t destCapacity, Edits *edits, + UErrorCode &errorCode) { + return ustrcase_map( + ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL + dest, destCapacity, + src, srcLength, + ustrcase_internalToLower, edits, errorCode); +} + +int32_t CaseMap::toUpper( + const char *locale, uint32_t options, + const UChar *src, int32_t srcLength, + UChar *dest, int32_t destCapacity, Edits *edits, + UErrorCode &errorCode) { + return ustrcase_map( + ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL + dest, destCapacity, + src, srcLength, + ustrcase_internalToUpper, edits, errorCode); +} + +U_NAMESPACE_END diff --git a/thirdparty/icu4c/common/ustrenum.cpp b/thirdparty/icu4c/common/ustrenum.cpp new file mode 100644 index 0000000000..ed23eaa232 --- /dev/null +++ b/thirdparty/icu4c/common/ustrenum.cpp @@ -0,0 +1,398 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (c) 2002-2014, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* Author: Alan Liu +* Created: November 11 2002 +* Since: ICU 2.4 +********************************************************************** +*/ +#include "utypeinfo.h" // for 'typeid' to work + +#include "unicode/ustring.h" +#include "unicode/strenum.h" +#include "unicode/putil.h" +#include "uenumimp.h" +#include "ustrenum.h" +#include "cstring.h" +#include "cmemory.h" +#include "uassert.h" + +U_NAMESPACE_BEGIN +// StringEnumeration implementation ---------------------------------------- *** + +StringEnumeration::StringEnumeration() + : chars(charsBuffer), charsCapacity(sizeof(charsBuffer)) { +} + +StringEnumeration::~StringEnumeration() { + if (chars != NULL && chars != charsBuffer) { + uprv_free(chars); + } +} + +// StringEnumeration base class clone() default implementation, does not clone +StringEnumeration * +StringEnumeration::clone() const { + return NULL; +} + +const char * +StringEnumeration::next(int32_t *resultLength, UErrorCode &status) { + const UnicodeString *s=snext(status); + if(U_SUCCESS(status) && s!=NULL) { + unistr=*s; + ensureCharsCapacity(unistr.length()+1, status); + if(U_SUCCESS(status)) { + if(resultLength!=NULL) { + *resultLength=unistr.length(); + } + unistr.extract(0, INT32_MAX, chars, charsCapacity, US_INV); + return chars; + } + } + + return NULL; +} + +const UChar * +StringEnumeration::unext(int32_t *resultLength, UErrorCode &status) { + const UnicodeString *s=snext(status); + if(U_SUCCESS(status) && s!=NULL) { + unistr=*s; + if(resultLength!=NULL) { + *resultLength=unistr.length(); + } + return unistr.getTerminatedBuffer(); + } + + return NULL; +} + +const UnicodeString * +StringEnumeration::snext(UErrorCode &status) { + int32_t length; + const char *s=next(&length, status); + return setChars(s, length, status); +} + +void +StringEnumeration::ensureCharsCapacity(int32_t capacity, UErrorCode &status) { + if(U_SUCCESS(status) && capacity>charsCapacity) { + if(capacity<(charsCapacity+charsCapacity/2)) { + // avoid allocation thrashing + capacity=charsCapacity+charsCapacity/2; + } + if(chars!=charsBuffer) { + uprv_free(chars); + } + chars=(char *)uprv_malloc(capacity); + if(chars==NULL) { + chars=charsBuffer; + charsCapacity=sizeof(charsBuffer); + status=U_MEMORY_ALLOCATION_ERROR; + } else { + charsCapacity=capacity; + } + } +} + +UnicodeString * +StringEnumeration::setChars(const char *s, int32_t length, UErrorCode &status) { + if(U_SUCCESS(status) && s!=NULL) { + if(length<0) { + length=(int32_t)uprv_strlen(s); + } + + UChar *buffer=unistr.getBuffer(length+1); + if(buffer!=NULL) { + u_charsToUChars(s, buffer, length); + buffer[length]=0; + unistr.releaseBuffer(length); + return &unistr; + } else { + status=U_MEMORY_ALLOCATION_ERROR; + } + } + + return NULL; +} +UBool +StringEnumeration::operator==(const StringEnumeration& that)const { + return typeid(*this) == typeid(that); +} + +UBool +StringEnumeration::operator!=(const StringEnumeration& that)const { + return !operator==(that); +} + +// UStringEnumeration implementation --------------------------------------- *** + +UStringEnumeration * U_EXPORT2 +UStringEnumeration::fromUEnumeration( + UEnumeration *uenumToAdopt, UErrorCode &status) { + if (U_FAILURE(status)) { + uenum_close(uenumToAdopt); + return NULL; + } + UStringEnumeration *result = new UStringEnumeration(uenumToAdopt); + if (result == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + uenum_close(uenumToAdopt); + return NULL; + } + return result; +} + +UStringEnumeration::UStringEnumeration(UEnumeration* _uenum) : + uenum(_uenum) { + U_ASSERT(_uenum != 0); +} + +UStringEnumeration::~UStringEnumeration() { + uenum_close(uenum); +} + +int32_t UStringEnumeration::count(UErrorCode& status) const { + return uenum_count(uenum, &status); +} + +const char *UStringEnumeration::next(int32_t *resultLength, UErrorCode &status) { + return uenum_next(uenum, resultLength, &status); +} + +const UnicodeString* UStringEnumeration::snext(UErrorCode& status) { + int32_t length; + const UChar* str = uenum_unext(uenum, &length, &status); + if (str == 0 || U_FAILURE(status)) { + return 0; + } + return &unistr.setTo(str, length); +} + +void UStringEnumeration::reset(UErrorCode& status) { + uenum_reset(uenum, &status); +} + +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UStringEnumeration) +U_NAMESPACE_END + +// C wrapper --------------------------------------------------------------- *** + +#define THIS(en) ((icu::StringEnumeration*)(en->context)) + +U_CDECL_BEGIN + +/** + * Wrapper API to make StringEnumeration look like UEnumeration. + */ +static void U_CALLCONV +ustrenum_close(UEnumeration* en) { + delete THIS(en); + uprv_free(en); +} + +/** + * Wrapper API to make StringEnumeration look like UEnumeration. + */ +static int32_t U_CALLCONV +ustrenum_count(UEnumeration* en, + UErrorCode* ec) +{ + return THIS(en)->count(*ec); +} + +/** + * Wrapper API to make StringEnumeration look like UEnumeration. + */ +static const UChar* U_CALLCONV +ustrenum_unext(UEnumeration* en, + int32_t* resultLength, + UErrorCode* ec) +{ + return THIS(en)->unext(resultLength, *ec); +} + +/** + * Wrapper API to make StringEnumeration look like UEnumeration. + */ +static const char* U_CALLCONV +ustrenum_next(UEnumeration* en, + int32_t* resultLength, + UErrorCode* ec) +{ + return THIS(en)->next(resultLength, *ec); +} + +/** + * Wrapper API to make StringEnumeration look like UEnumeration. + */ +static void U_CALLCONV +ustrenum_reset(UEnumeration* en, + UErrorCode* ec) +{ + THIS(en)->reset(*ec); +} + +/** + * Pseudo-vtable for UEnumeration wrapper around StringEnumeration. + * The StringEnumeration pointer will be stored in 'context'. + */ +static const UEnumeration USTRENUM_VT = { + NULL, + NULL, // store StringEnumeration pointer here + ustrenum_close, + ustrenum_count, + ustrenum_unext, + ustrenum_next, + ustrenum_reset +}; + +U_CDECL_END + +/** + * Given a StringEnumeration, wrap it in a UEnumeration. The + * StringEnumeration is adopted; after this call, the caller must not + * delete it (regardless of error status). + */ +U_CAPI UEnumeration* U_EXPORT2 +uenum_openFromStringEnumeration(icu::StringEnumeration* adopted, UErrorCode* ec) { + UEnumeration* result = NULL; + if (U_SUCCESS(*ec) && adopted != NULL) { + result = (UEnumeration*) uprv_malloc(sizeof(UEnumeration)); + if (result == NULL) { + *ec = U_MEMORY_ALLOCATION_ERROR; + } else { + uprv_memcpy(result, &USTRENUM_VT, sizeof(USTRENUM_VT)); + result->context = adopted; + } + } + if (result == NULL) { + delete adopted; + } + return result; +} + +// C wrapper --------------------------------------------------------------- *** + +U_CDECL_BEGIN + +typedef struct UCharStringEnumeration { + UEnumeration uenum; + int32_t index, count; +} UCharStringEnumeration; + +static void U_CALLCONV +ucharstrenum_close(UEnumeration* en) { + uprv_free(en); +} + +static int32_t U_CALLCONV +ucharstrenum_count(UEnumeration* en, + UErrorCode* /*ec*/) { + return ((UCharStringEnumeration*)en)->count; +} + +static const UChar* U_CALLCONV +ucharstrenum_unext(UEnumeration* en, + int32_t* resultLength, + UErrorCode* /*ec*/) { + UCharStringEnumeration *e = (UCharStringEnumeration*) en; + if (e->index >= e->count) { + return NULL; + } + const UChar* result = ((const UChar**)e->uenum.context)[e->index++]; + if (resultLength) { + *resultLength = (int32_t)u_strlen(result); + } + return result; +} + + +static const char* U_CALLCONV +ucharstrenum_next(UEnumeration* en, + int32_t* resultLength, + UErrorCode* /*ec*/) { + UCharStringEnumeration *e = (UCharStringEnumeration*) en; + if (e->index >= e->count) { + return NULL; + } + const char* result = ((const char**)e->uenum.context)[e->index++]; + if (resultLength) { + *resultLength = (int32_t)uprv_strlen(result); + } + return result; +} + +static void U_CALLCONV +ucharstrenum_reset(UEnumeration* en, + UErrorCode* /*ec*/) { + ((UCharStringEnumeration*)en)->index = 0; +} + +static const UEnumeration UCHARSTRENUM_VT = { + NULL, + NULL, // store StringEnumeration pointer here + ucharstrenum_close, + ucharstrenum_count, + uenum_unextDefault, + ucharstrenum_next, + ucharstrenum_reset +}; + +static const UEnumeration UCHARSTRENUM_U_VT = { + NULL, + NULL, // store StringEnumeration pointer here + ucharstrenum_close, + ucharstrenum_count, + ucharstrenum_unext, + uenum_nextDefault, + ucharstrenum_reset +}; + +U_CDECL_END + +U_CAPI UEnumeration* U_EXPORT2 +uenum_openCharStringsEnumeration(const char* const strings[], int32_t count, + UErrorCode* ec) { + UCharStringEnumeration* result = NULL; + if (U_SUCCESS(*ec) && count >= 0 && (count == 0 || strings != 0)) { + result = (UCharStringEnumeration*) uprv_malloc(sizeof(UCharStringEnumeration)); + if (result == NULL) { + *ec = U_MEMORY_ALLOCATION_ERROR; + } else { + U_ASSERT((char*)result==(char*)(&result->uenum)); + uprv_memcpy(result, &UCHARSTRENUM_VT, sizeof(UCHARSTRENUM_VT)); + result->uenum.context = (void*)strings; + result->index = 0; + result->count = count; + } + } + return (UEnumeration*) result; +} + +U_CAPI UEnumeration* U_EXPORT2 +uenum_openUCharStringsEnumeration(const UChar* const strings[], int32_t count, + UErrorCode* ec) { + UCharStringEnumeration* result = NULL; + if (U_SUCCESS(*ec) && count >= 0 && (count == 0 || strings != 0)) { + result = (UCharStringEnumeration*) uprv_malloc(sizeof(UCharStringEnumeration)); + if (result == NULL) { + *ec = U_MEMORY_ALLOCATION_ERROR; + } else { + U_ASSERT((char*)result==(char*)(&result->uenum)); + uprv_memcpy(result, &UCHARSTRENUM_U_VT, sizeof(UCHARSTRENUM_U_VT)); + result->uenum.context = (void*)strings; + result->index = 0; + result->count = count; + } + } + return (UEnumeration*) result; +} + + +// end C Wrapper diff --git a/thirdparty/icu4c/common/ustrenum.h b/thirdparty/icu4c/common/ustrenum.h new file mode 100644 index 0000000000..a82162e2bd --- /dev/null +++ b/thirdparty/icu4c/common/ustrenum.h @@ -0,0 +1,87 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (c) 2002-2014, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* Author: Alan Liu +* Created: November 11 2002 +* Since: ICU 2.4 +********************************************************************** +*/ +#ifndef _USTRENUM_H_ +#define _USTRENUM_H_ + +#include "unicode/uenum.h" +#include "unicode/strenum.h" + +//---------------------------------------------------------------------- +U_NAMESPACE_BEGIN + +/** + * A wrapper to make a UEnumeration into a StringEnumeration. The + * wrapper adopts the UEnumeration is wraps. + */ +class U_COMMON_API UStringEnumeration : public StringEnumeration { + +public: + /** + * Constructor. This constructor adopts its UEnumeration + * argument. + * @param uenum a UEnumeration object. This object takes + * ownership of 'uenum' and will close it in its destructor. The + * caller must not call uenum_close on 'uenum' after calling this + * constructor. + */ + UStringEnumeration(UEnumeration* uenum); + + /** + * Destructor. This closes the UEnumeration passed in to the + * constructor. + */ + virtual ~UStringEnumeration(); + + /** + * Return the number of elements that the iterator traverses. + * @param status the error code. + * @return number of elements in the iterator. + */ + virtual int32_t count(UErrorCode& status) const; + + virtual const char* next(int32_t *resultLength, UErrorCode& status); + + /** + * Returns the next element a UnicodeString*. If there are no + * more elements, returns NULL. + * @param status the error code. + * @return a pointer to the string, or NULL. + */ + virtual const UnicodeString* snext(UErrorCode& status); + + /** + * Resets the iterator. + * @param status the error code. + */ + virtual void reset(UErrorCode& status); + + /** + * ICU4C "poor man's RTTI", returns a UClassID for the actual ICU class. + */ + virtual UClassID getDynamicClassID() const; + + /** + * ICU4C "poor man's RTTI", returns a UClassID for this ICU class. + */ + static UClassID U_EXPORT2 getStaticClassID(); + + static UStringEnumeration * U_EXPORT2 fromUEnumeration( + UEnumeration *enumToAdopt, UErrorCode &status); +private: + UEnumeration *uenum; // owned +}; + +U_NAMESPACE_END + +#endif + diff --git a/thirdparty/icu4c/common/ustrfmt.cpp b/thirdparty/icu4c/common/ustrfmt.cpp new file mode 100644 index 0000000000..1a9b15a59f --- /dev/null +++ b/thirdparty/icu4c/common/ustrfmt.cpp @@ -0,0 +1,59 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 2001-2006, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +*/ + +#include "cstring.h" +#include "ustrfmt.h" + + +/*** + * Fills in a UChar* string with the radix-based representation of a + * uint32_t number padded with zeroes to minwidth. The result + * will be null terminated if there is room. + * + * @param buffer UChar buffer to receive result + * @param capacity capacity of buffer + * @param i the unsigned number to be formatted + * @param radix the radix from 2..36 + * @param minwidth the minimum width. If the result is narrower than + * this, '0's will be added on the left. Must be <= + * capacity. + * @return the length of the result, not including any terminating + * null + */ +U_CAPI int32_t U_EXPORT2 +uprv_itou (UChar * buffer, int32_t capacity, + uint32_t i, uint32_t radix, int32_t minwidth) +{ + int32_t length = 0; + int digit; + int32_t j; + UChar temp; + + do{ + digit = (int)(i % radix); + buffer[length++]=(UChar)(digit<=9?(0x0030+digit):(0x0030+digit+7)); + i=i/radix; + } while(i && length<capacity); + + while (length < minwidth){ + buffer[length++] = (UChar) 0x0030;/*zero padding */ + } + /* null terminate the buffer */ + if(length<capacity){ + buffer[length] = (UChar) 0x0000; + } + + /* Reverses the string */ + for (j = 0; j < (length / 2); j++){ + temp = buffer[(length-1) - j]; + buffer[(length-1) - j] = buffer[j]; + buffer[j] = temp; + } + return length; +} diff --git a/thirdparty/icu4c/common/ustrfmt.h b/thirdparty/icu4c/common/ustrfmt.h new file mode 100644 index 0000000000..53eb0557e4 --- /dev/null +++ b/thirdparty/icu4c/common/ustrfmt.h @@ -0,0 +1,19 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 2001-2006, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +*/ + +#ifndef USTRFMT_H +#define USTRFMT_H + +#include "unicode/utypes.h" + +U_CAPI int32_t U_EXPORT2 +uprv_itou (UChar * buffer, int32_t capacity, uint32_t i, uint32_t radix, int32_t minwidth); + + +#endif diff --git a/thirdparty/icu4c/common/ustring.cpp b/thirdparty/icu4c/common/ustring.cpp new file mode 100644 index 0000000000..bba2d45c4e --- /dev/null +++ b/thirdparty/icu4c/common/ustring.cpp @@ -0,0 +1,1537 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1998-2016, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* +* File ustring.cpp +* +* Modification History: +* +* Date Name Description +* 12/07/98 bertrand Creation. +****************************************************************************** +*/ + +#include "unicode/utypes.h" +#include "unicode/putil.h" +#include "unicode/uchar.h" +#include "unicode/ustring.h" +#include "unicode/utf16.h" +#include "cstring.h" +#include "cwchar.h" +#include "cmemory.h" +#include "ustr_imp.h" + +/* ANSI string.h - style functions ------------------------------------------ */ + +/* U+ffff is the highest BMP code point, the highest one that fits into a 16-bit UChar */ +#define U_BMP_MAX 0xffff + +/* Forward binary string search functions ----------------------------------- */ + +/* + * Test if a substring match inside a string is at code point boundaries. + * All pointers refer to the same buffer. + * The limit pointer may be NULL, all others must be real pointers. + */ +static inline UBool +isMatchAtCPBoundary(const UChar *start, const UChar *match, const UChar *matchLimit, const UChar *limit) { + if(U16_IS_TRAIL(*match) && start!=match && U16_IS_LEAD(*(match-1))) { + /* the leading edge of the match is in the middle of a surrogate pair */ + return FALSE; + } + if(U16_IS_LEAD(*(matchLimit-1)) && matchLimit!=limit && U16_IS_TRAIL(*matchLimit)) { + /* the trailing edge of the match is in the middle of a surrogate pair */ + return FALSE; + } + return TRUE; +} + +U_CAPI UChar * U_EXPORT2 +u_strFindFirst(const UChar *s, int32_t length, + const UChar *sub, int32_t subLength) { + const UChar *start, *p, *q, *subLimit; + UChar c, cs, cq; + + if(sub==NULL || subLength<-1) { + return (UChar *)s; + } + if(s==NULL || length<-1) { + return NULL; + } + + start=s; + + if(length<0 && subLength<0) { + /* both strings are NUL-terminated */ + if((cs=*sub++)==0) { + return (UChar *)s; + } + if(*sub==0 && !U16_IS_SURROGATE(cs)) { + /* the substring consists of a single, non-surrogate BMP code point */ + return u_strchr(s, cs); + } + + while((c=*s++)!=0) { + if(c==cs) { + /* found first substring UChar, compare rest */ + p=s; + q=sub; + for(;;) { + if((cq=*q)==0) { + if(isMatchAtCPBoundary(start, s-1, p, NULL)) { + return (UChar *)(s-1); /* well-formed match */ + } else { + break; /* no match because surrogate pair is split */ + } + } + if((c=*p)==0) { + return NULL; /* no match, and none possible after s */ + } + if(c!=cq) { + break; /* no match */ + } + ++p; + ++q; + } + } + } + + /* not found */ + return NULL; + } + + if(subLength<0) { + subLength=u_strlen(sub); + } + if(subLength==0) { + return (UChar *)s; + } + + /* get sub[0] to search for it fast */ + cs=*sub++; + --subLength; + subLimit=sub+subLength; + + if(subLength==0 && !U16_IS_SURROGATE(cs)) { + /* the substring consists of a single, non-surrogate BMP code point */ + return length<0 ? u_strchr(s, cs) : u_memchr(s, cs, length); + } + + if(length<0) { + /* s is NUL-terminated */ + while((c=*s++)!=0) { + if(c==cs) { + /* found first substring UChar, compare rest */ + p=s; + q=sub; + for(;;) { + if(q==subLimit) { + if(isMatchAtCPBoundary(start, s-1, p, NULL)) { + return (UChar *)(s-1); /* well-formed match */ + } else { + break; /* no match because surrogate pair is split */ + } + } + if((c=*p)==0) { + return NULL; /* no match, and none possible after s */ + } + if(c!=*q) { + break; /* no match */ + } + ++p; + ++q; + } + } + } + } else { + const UChar *limit, *preLimit; + + /* subLength was decremented above */ + if(length<=subLength) { + return NULL; /* s is shorter than sub */ + } + + limit=s+length; + + /* the substring must start before preLimit */ + preLimit=limit-subLength; + + while(s!=preLimit) { + c=*s++; + if(c==cs) { + /* found first substring UChar, compare rest */ + p=s; + q=sub; + for(;;) { + if(q==subLimit) { + if(isMatchAtCPBoundary(start, s-1, p, limit)) { + return (UChar *)(s-1); /* well-formed match */ + } else { + break; /* no match because surrogate pair is split */ + } + } + if(*p!=*q) { + break; /* no match */ + } + ++p; + ++q; + } + } + } + } + + /* not found */ + return NULL; +} + +U_CAPI UChar * U_EXPORT2 +u_strstr(const UChar *s, const UChar *substring) { + return u_strFindFirst(s, -1, substring, -1); +} + +U_CAPI UChar * U_EXPORT2 +u_strchr(const UChar *s, UChar c) { + if(U16_IS_SURROGATE(c)) { + /* make sure to not find half of a surrogate pair */ + return u_strFindFirst(s, -1, &c, 1); + } else { + UChar cs; + + /* trivial search for a BMP code point */ + for(;;) { + if((cs=*s)==c) { + return (UChar *)s; + } + if(cs==0) { + return NULL; + } + ++s; + } + } +} + +U_CAPI UChar * U_EXPORT2 +u_strchr32(const UChar *s, UChar32 c) { + if((uint32_t)c<=U_BMP_MAX) { + /* find BMP code point */ + return u_strchr(s, (UChar)c); + } else if((uint32_t)c<=UCHAR_MAX_VALUE) { + /* find supplementary code point as surrogate pair */ + UChar cs, lead=U16_LEAD(c), trail=U16_TRAIL(c); + + while((cs=*s++)!=0) { + if(cs==lead && *s==trail) { + return (UChar *)(s-1); + } + } + return NULL; + } else { + /* not a Unicode code point, not findable */ + return NULL; + } +} + +U_CAPI UChar * U_EXPORT2 +u_memchr(const UChar *s, UChar c, int32_t count) { + if(count<=0) { + return NULL; /* no string */ + } else if(U16_IS_SURROGATE(c)) { + /* make sure to not find half of a surrogate pair */ + return u_strFindFirst(s, count, &c, 1); + } else { + /* trivial search for a BMP code point */ + const UChar *limit=s+count; + do { + if(*s==c) { + return (UChar *)s; + } + } while(++s!=limit); + return NULL; + } +} + +U_CAPI UChar * U_EXPORT2 +u_memchr32(const UChar *s, UChar32 c, int32_t count) { + if((uint32_t)c<=U_BMP_MAX) { + /* find BMP code point */ + return u_memchr(s, (UChar)c, count); + } else if(count<2) { + /* too short for a surrogate pair */ + return NULL; + } else if((uint32_t)c<=UCHAR_MAX_VALUE) { + /* find supplementary code point as surrogate pair */ + const UChar *limit=s+count-1; /* -1 so that we do not need a separate check for the trail unit */ + UChar lead=U16_LEAD(c), trail=U16_TRAIL(c); + + do { + if(*s==lead && *(s+1)==trail) { + return (UChar *)s; + } + } while(++s!=limit); + return NULL; + } else { + /* not a Unicode code point, not findable */ + return NULL; + } +} + +/* Backward binary string search functions ---------------------------------- */ + +U_CAPI UChar * U_EXPORT2 +u_strFindLast(const UChar *s, int32_t length, + const UChar *sub, int32_t subLength) { + const UChar *start, *limit, *p, *q, *subLimit; + UChar c, cs; + + if(sub==NULL || subLength<-1) { + return (UChar *)s; + } + if(s==NULL || length<-1) { + return NULL; + } + + /* + * This implementation is more lazy than the one for u_strFindFirst(): + * There is no special search code for NUL-terminated strings. + * It does not seem to be worth it for searching substrings to + * search forward and find all matches like in u_strrchr() and similar. + * Therefore, we simply get both string lengths and search backward. + * + * markus 2002oct23 + */ + + if(subLength<0) { + subLength=u_strlen(sub); + } + if(subLength==0) { + return (UChar *)s; + } + + /* get sub[subLength-1] to search for it fast */ + subLimit=sub+subLength; + cs=*(--subLimit); + --subLength; + + if(subLength==0 && !U16_IS_SURROGATE(cs)) { + /* the substring consists of a single, non-surrogate BMP code point */ + return length<0 ? u_strrchr(s, cs) : u_memrchr(s, cs, length); + } + + if(length<0) { + length=u_strlen(s); + } + + /* subLength was decremented above */ + if(length<=subLength) { + return NULL; /* s is shorter than sub */ + } + + start=s; + limit=s+length; + + /* the substring must start no later than s+subLength */ + s+=subLength; + + while(s!=limit) { + c=*(--limit); + if(c==cs) { + /* found last substring UChar, compare rest */ + p=limit; + q=subLimit; + for(;;) { + if(q==sub) { + if(isMatchAtCPBoundary(start, p, limit+1, start+length)) { + return (UChar *)p; /* well-formed match */ + } else { + break; /* no match because surrogate pair is split */ + } + } + if(*(--p)!=*(--q)) { + break; /* no match */ + } + } + } + } + + /* not found */ + return NULL; +} + +U_CAPI UChar * U_EXPORT2 +u_strrstr(const UChar *s, const UChar *substring) { + return u_strFindLast(s, -1, substring, -1); +} + +U_CAPI UChar * U_EXPORT2 +u_strrchr(const UChar *s, UChar c) { + if(U16_IS_SURROGATE(c)) { + /* make sure to not find half of a surrogate pair */ + return u_strFindLast(s, -1, &c, 1); + } else { + const UChar *result=NULL; + UChar cs; + + /* trivial search for a BMP code point */ + for(;;) { + if((cs=*s)==c) { + result=s; + } + if(cs==0) { + return (UChar *)result; + } + ++s; + } + } +} + +U_CAPI UChar * U_EXPORT2 +u_strrchr32(const UChar *s, UChar32 c) { + if((uint32_t)c<=U_BMP_MAX) { + /* find BMP code point */ + return u_strrchr(s, (UChar)c); + } else if((uint32_t)c<=UCHAR_MAX_VALUE) { + /* find supplementary code point as surrogate pair */ + const UChar *result=NULL; + UChar cs, lead=U16_LEAD(c), trail=U16_TRAIL(c); + + while((cs=*s++)!=0) { + if(cs==lead && *s==trail) { + result=s-1; + } + } + return (UChar *)result; + } else { + /* not a Unicode code point, not findable */ + return NULL; + } +} + +U_CAPI UChar * U_EXPORT2 +u_memrchr(const UChar *s, UChar c, int32_t count) { + if(count<=0) { + return NULL; /* no string */ + } else if(U16_IS_SURROGATE(c)) { + /* make sure to not find half of a surrogate pair */ + return u_strFindLast(s, count, &c, 1); + } else { + /* trivial search for a BMP code point */ + const UChar *limit=s+count; + do { + if(*(--limit)==c) { + return (UChar *)limit; + } + } while(s!=limit); + return NULL; + } +} + +U_CAPI UChar * U_EXPORT2 +u_memrchr32(const UChar *s, UChar32 c, int32_t count) { + if((uint32_t)c<=U_BMP_MAX) { + /* find BMP code point */ + return u_memrchr(s, (UChar)c, count); + } else if(count<2) { + /* too short for a surrogate pair */ + return NULL; + } else if((uint32_t)c<=UCHAR_MAX_VALUE) { + /* find supplementary code point as surrogate pair */ + const UChar *limit=s+count-1; + UChar lead=U16_LEAD(c), trail=U16_TRAIL(c); + + do { + if(*limit==trail && *(limit-1)==lead) { + return (UChar *)(limit-1); + } + } while(s!=--limit); + return NULL; + } else { + /* not a Unicode code point, not findable */ + return NULL; + } +} + +/* Tokenization functions --------------------------------------------------- */ + +/* + * Match each code point in a string against each code point in the matchSet. + * Return the index of the first string code point that + * is (polarity==TRUE) or is not (FALSE) contained in the matchSet. + * Return -(string length)-1 if there is no such code point. + */ +static int32_t +_matchFromSet(const UChar *string, const UChar *matchSet, UBool polarity) { + int32_t matchLen, matchBMPLen, strItr, matchItr; + UChar32 stringCh, matchCh; + UChar c, c2; + + /* first part of matchSet contains only BMP code points */ + matchBMPLen = 0; + while((c = matchSet[matchBMPLen]) != 0 && U16_IS_SINGLE(c)) { + ++matchBMPLen; + } + + /* second part of matchSet contains BMP and supplementary code points */ + matchLen = matchBMPLen; + while(matchSet[matchLen] != 0) { + ++matchLen; + } + + for(strItr = 0; (c = string[strItr]) != 0;) { + ++strItr; + if(U16_IS_SINGLE(c)) { + if(polarity) { + for(matchItr = 0; matchItr < matchLen; ++matchItr) { + if(c == matchSet[matchItr]) { + return strItr - 1; /* one matches */ + } + } + } else { + for(matchItr = 0; matchItr < matchLen; ++matchItr) { + if(c == matchSet[matchItr]) { + goto endloop; + } + } + return strItr - 1; /* none matches */ + } + } else { + /* + * No need to check for string length before U16_IS_TRAIL + * because c2 could at worst be the terminating NUL. + */ + if(U16_IS_SURROGATE_LEAD(c) && U16_IS_TRAIL(c2 = string[strItr])) { + ++strItr; + stringCh = U16_GET_SUPPLEMENTARY(c, c2); + } else { + stringCh = c; /* unpaired trail surrogate */ + } + + if(polarity) { + for(matchItr = matchBMPLen; matchItr < matchLen;) { + U16_NEXT(matchSet, matchItr, matchLen, matchCh); + if(stringCh == matchCh) { + return strItr - U16_LENGTH(stringCh); /* one matches */ + } + } + } else { + for(matchItr = matchBMPLen; matchItr < matchLen;) { + U16_NEXT(matchSet, matchItr, matchLen, matchCh); + if(stringCh == matchCh) { + goto endloop; + } + } + return strItr - U16_LENGTH(stringCh); /* none matches */ + } + } +endloop: + /* wish C had continue with labels like Java... */; + } + + /* Didn't find it. */ + return -strItr-1; +} + +/* Search for a codepoint in a string that matches one of the matchSet codepoints. */ +U_CAPI UChar * U_EXPORT2 +u_strpbrk(const UChar *string, const UChar *matchSet) +{ + int32_t idx = _matchFromSet(string, matchSet, TRUE); + if(idx >= 0) { + return (UChar *)string + idx; + } else { + return NULL; + } +} + +/* Search for a codepoint in a string that matches one of the matchSet codepoints. */ +U_CAPI int32_t U_EXPORT2 +u_strcspn(const UChar *string, const UChar *matchSet) +{ + int32_t idx = _matchFromSet(string, matchSet, TRUE); + if(idx >= 0) { + return idx; + } else { + return -idx - 1; /* == u_strlen(string) */ + } +} + +/* Search for a codepoint in a string that does not match one of the matchSet codepoints. */ +U_CAPI int32_t U_EXPORT2 +u_strspn(const UChar *string, const UChar *matchSet) +{ + int32_t idx = _matchFromSet(string, matchSet, FALSE); + if(idx >= 0) { + return idx; + } else { + return -idx - 1; /* == u_strlen(string) */ + } +} + +/* ----- Text manipulation functions --- */ + +U_CAPI UChar* U_EXPORT2 +u_strtok_r(UChar *src, + const UChar *delim, + UChar **saveState) +{ + UChar *tokSource; + UChar *nextToken; + uint32_t nonDelimIdx; + + /* If saveState is NULL, the user messed up. */ + if (src != NULL) { + tokSource = src; + *saveState = src; /* Set to "src" in case there are no delimiters */ + } + else if (*saveState) { + tokSource = *saveState; + } + else { + /* src == NULL && *saveState == NULL */ + /* This shouldn't happen. We already finished tokenizing. */ + return NULL; + } + + /* Skip initial delimiters */ + nonDelimIdx = u_strspn(tokSource, delim); + tokSource = &tokSource[nonDelimIdx]; + + if (*tokSource) { + nextToken = u_strpbrk(tokSource, delim); + if (nextToken != NULL) { + /* Create a token */ + *(nextToken++) = 0; + *saveState = nextToken; + return tokSource; + } + else if (*saveState) { + /* Return the last token */ + *saveState = NULL; + return tokSource; + } + } + else { + /* No tokens were found. Only delimiters were left. */ + *saveState = NULL; + } + return NULL; +} + +/* Miscellaneous functions -------------------------------------------------- */ + +U_CAPI UChar* U_EXPORT2 +u_strcat(UChar *dst, + const UChar *src) +{ + UChar *anchor = dst; /* save a pointer to start of dst */ + + while(*dst != 0) { /* To end of first string */ + ++dst; + } + while((*(dst++) = *(src++)) != 0) { /* copy string 2 over */ + } + + return anchor; +} + +U_CAPI UChar* U_EXPORT2 +u_strncat(UChar *dst, + const UChar *src, + int32_t n ) +{ + if(n > 0) { + UChar *anchor = dst; /* save a pointer to start of dst */ + + while(*dst != 0) { /* To end of first string */ + ++dst; + } + while((*dst = *src) != 0) { /* copy string 2 over */ + ++dst; + if(--n == 0) { + *dst = 0; + break; + } + ++src; + } + + return anchor; + } else { + return dst; + } +} + +/* ----- Text property functions --- */ + +U_CAPI int32_t U_EXPORT2 +u_strcmp(const UChar *s1, + const UChar *s2) +{ + UChar c1, c2; + + for(;;) { + c1=*s1++; + c2=*s2++; + if (c1 != c2 || c1 == 0) { + break; + } + } + return (int32_t)c1 - (int32_t)c2; +} + +U_CFUNC int32_t U_EXPORT2 +uprv_strCompare(const UChar *s1, int32_t length1, + const UChar *s2, int32_t length2, + UBool strncmpStyle, UBool codePointOrder) { + const UChar *start1, *start2, *limit1, *limit2; + UChar c1, c2; + + /* setup for fix-up */ + start1=s1; + start2=s2; + + /* compare identical prefixes - they do not need to be fixed up */ + if(length1<0 && length2<0) { + /* strcmp style, both NUL-terminated */ + if(s1==s2) { + return 0; + } + + for(;;) { + c1=*s1; + c2=*s2; + if(c1!=c2) { + break; + } + if(c1==0) { + return 0; + } + ++s1; + ++s2; + } + + /* setup for fix-up */ + limit1=limit2=NULL; + } else if(strncmpStyle) { + /* special handling for strncmp, assume length1==length2>=0 but also check for NUL */ + if(s1==s2) { + return 0; + } + + limit1=start1+length1; + + for(;;) { + /* both lengths are same, check only one limit */ + if(s1==limit1) { + return 0; + } + + c1=*s1; + c2=*s2; + if(c1!=c2) { + break; + } + if(c1==0) { + return 0; + } + ++s1; + ++s2; + } + + /* setup for fix-up */ + limit2=start2+length1; /* use length1 here, too, to enforce assumption */ + } else { + /* memcmp/UnicodeString style, both length-specified */ + int32_t lengthResult; + + if(length1<0) { + length1=u_strlen(s1); + } + if(length2<0) { + length2=u_strlen(s2); + } + + /* limit1=start1+min(lenght1, length2) */ + if(length1<length2) { + lengthResult=-1; + limit1=start1+length1; + } else if(length1==length2) { + lengthResult=0; + limit1=start1+length1; + } else /* length1>length2 */ { + lengthResult=1; + limit1=start1+length2; + } + + if(s1==s2) { + return lengthResult; + } + + for(;;) { + /* check pseudo-limit */ + if(s1==limit1) { + return lengthResult; + } + + c1=*s1; + c2=*s2; + if(c1!=c2) { + break; + } + ++s1; + ++s2; + } + + /* setup for fix-up */ + limit1=start1+length1; + limit2=start2+length2; + } + + /* if both values are in or above the surrogate range, fix them up */ + if(c1>=0xd800 && c2>=0xd800 && codePointOrder) { + /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */ + if( + (c1<=0xdbff && (s1+1)!=limit1 && U16_IS_TRAIL(*(s1+1))) || + (U16_IS_TRAIL(c1) && start1!=s1 && U16_IS_LEAD(*(s1-1))) + ) { + /* part of a surrogate pair, leave >=d800 */ + } else { + /* BMP code point - may be surrogate code point - make <d800 */ + c1-=0x2800; + } + + if( + (c2<=0xdbff && (s2+1)!=limit2 && U16_IS_TRAIL(*(s2+1))) || + (U16_IS_TRAIL(c2) && start2!=s2 && U16_IS_LEAD(*(s2-1))) + ) { + /* part of a surrogate pair, leave >=d800 */ + } else { + /* BMP code point - may be surrogate code point - make <d800 */ + c2-=0x2800; + } + } + + /* now c1 and c2 are in the requested (code unit or code point) order */ + return (int32_t)c1-(int32_t)c2; +} + +/* + * Compare two strings as presented by UCharIterators. + * Use code unit or code point order. + * When the function returns, it is undefined where the iterators + * have stopped. + */ +U_CAPI int32_t U_EXPORT2 +u_strCompareIter(UCharIterator *iter1, UCharIterator *iter2, UBool codePointOrder) { + UChar32 c1, c2; + + /* argument checking */ + if(iter1==NULL || iter2==NULL) { + return 0; /* bad arguments */ + } + if(iter1==iter2) { + return 0; /* identical iterators */ + } + + /* reset iterators to start? */ + iter1->move(iter1, 0, UITER_START); + iter2->move(iter2, 0, UITER_START); + + /* compare identical prefixes - they do not need to be fixed up */ + for(;;) { + c1=iter1->next(iter1); + c2=iter2->next(iter2); + if(c1!=c2) { + break; + } + if(c1==-1) { + return 0; + } + } + + /* if both values are in or above the surrogate range, fix them up */ + if(c1>=0xd800 && c2>=0xd800 && codePointOrder) { + /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */ + if( + (c1<=0xdbff && U16_IS_TRAIL(iter1->current(iter1))) || + (U16_IS_TRAIL(c1) && (iter1->previous(iter1), U16_IS_LEAD(iter1->previous(iter1)))) + ) { + /* part of a surrogate pair, leave >=d800 */ + } else { + /* BMP code point - may be surrogate code point - make <d800 */ + c1-=0x2800; + } + + if( + (c2<=0xdbff && U16_IS_TRAIL(iter2->current(iter2))) || + (U16_IS_TRAIL(c2) && (iter2->previous(iter2), U16_IS_LEAD(iter2->previous(iter2)))) + ) { + /* part of a surrogate pair, leave >=d800 */ + } else { + /* BMP code point - may be surrogate code point - make <d800 */ + c2-=0x2800; + } + } + + /* now c1 and c2 are in the requested (code unit or code point) order */ + return (int32_t)c1-(int32_t)c2; +} + +#if 0 +/* + * u_strCompareIter() does not leave the iterators _on_ the different units. + * This is possible but would cost a few extra indirect function calls to back + * up if the last unit (c1 or c2 respectively) was >=0. + * + * Consistently leaving them _behind_ the different units is not an option + * because the current "unit" is the end of the string if that is reached, + * and in such a case the iterator does not move. + * For example, when comparing "ab" with "abc", both iterators rest _on_ the end + * of their strings. Calling previous() on each does not move them to where + * the comparison fails. + * + * So the simplest semantics is to not define where the iterators end up. + * + * The following fragment is part of what would need to be done for backing up. + */ +void fragment { + /* iff a surrogate is part of a surrogate pair, leave >=d800 */ + if(c1<=0xdbff) { + if(!U16_IS_TRAIL(iter1->current(iter1))) { + /* lead surrogate code point - make <d800 */ + c1-=0x2800; + } + } else if(c1<=0xdfff) { + int32_t idx=iter1->getIndex(iter1, UITER_CURRENT); + iter1->previous(iter1); /* ==c1 */ + if(!U16_IS_LEAD(iter1->previous(iter1))) { + /* trail surrogate code point - make <d800 */ + c1-=0x2800; + } + /* go back to behind where the difference is */ + iter1->move(iter1, idx, UITER_ZERO); + } else /* 0xe000<=c1<=0xffff */ { + /* BMP code point - make <d800 */ + c1-=0x2800; + } +} +#endif + +U_CAPI int32_t U_EXPORT2 +u_strCompare(const UChar *s1, int32_t length1, + const UChar *s2, int32_t length2, + UBool codePointOrder) { + /* argument checking */ + if(s1==NULL || length1<-1 || s2==NULL || length2<-1) { + return 0; + } + return uprv_strCompare(s1, length1, s2, length2, FALSE, codePointOrder); +} + +/* String compare in code point order - u_strcmp() compares in code unit order. */ +U_CAPI int32_t U_EXPORT2 +u_strcmpCodePointOrder(const UChar *s1, const UChar *s2) { + return uprv_strCompare(s1, -1, s2, -1, FALSE, TRUE); +} + +U_CAPI int32_t U_EXPORT2 +u_strncmp(const UChar *s1, + const UChar *s2, + int32_t n) +{ + if(n > 0) { + int32_t rc; + for(;;) { + rc = (int32_t)*s1 - (int32_t)*s2; + if(rc != 0 || *s1 == 0 || --n == 0) { + return rc; + } + ++s1; + ++s2; + } + } else { + return 0; + } +} + +U_CAPI int32_t U_EXPORT2 +u_strncmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t n) { + return uprv_strCompare(s1, n, s2, n, TRUE, TRUE); +} + +U_CAPI UChar* U_EXPORT2 +u_strcpy(UChar *dst, + const UChar *src) +{ + UChar *anchor = dst; /* save a pointer to start of dst */ + + while((*(dst++) = *(src++)) != 0) { /* copy string 2 over */ + } + + return anchor; +} + +U_CAPI UChar* U_EXPORT2 +u_strncpy(UChar *dst, + const UChar *src, + int32_t n) +{ + UChar *anchor = dst; /* save a pointer to start of dst */ + + /* copy string 2 over */ + while(n > 0 && (*(dst++) = *(src++)) != 0) { + --n; + } + + return anchor; +} + +U_CAPI int32_t U_EXPORT2 +u_strlen(const UChar *s) +{ +#if U_SIZEOF_WCHAR_T == U_SIZEOF_UCHAR + return (int32_t)uprv_wcslen((const wchar_t *)s); +#else + const UChar *t = s; + while(*t != 0) { + ++t; + } + return t - s; +#endif +} + +U_CAPI int32_t U_EXPORT2 +u_countChar32(const UChar *s, int32_t length) { + int32_t count; + + if(s==NULL || length<-1) { + return 0; + } + + count=0; + if(length>=0) { + while(length>0) { + ++count; + if(U16_IS_LEAD(*s) && length>=2 && U16_IS_TRAIL(*(s+1))) { + s+=2; + length-=2; + } else { + ++s; + --length; + } + } + } else /* length==-1 */ { + UChar c; + + for(;;) { + if((c=*s++)==0) { + break; + } + ++count; + + /* + * sufficient to look ahead one because of UTF-16; + * safe to look ahead one because at worst that would be the terminating NUL + */ + if(U16_IS_LEAD(c) && U16_IS_TRAIL(*s)) { + ++s; + } + } + } + return count; +} + +U_CAPI UBool U_EXPORT2 +u_strHasMoreChar32Than(const UChar *s, int32_t length, int32_t number) { + + if(number<0) { + return TRUE; + } + if(s==NULL || length<-1) { + return FALSE; + } + + if(length==-1) { + /* s is NUL-terminated */ + UChar c; + + /* count code points until they exceed */ + for(;;) { + if((c=*s++)==0) { + return FALSE; + } + if(number==0) { + return TRUE; + } + if(U16_IS_LEAD(c) && U16_IS_TRAIL(*s)) { + ++s; + } + --number; + } + } else { + /* length>=0 known */ + const UChar *limit; + int32_t maxSupplementary; + + /* s contains at least (length+1)/2 code points: <=2 UChars per cp */ + if(((length+1)/2)>number) { + return TRUE; + } + + /* check if s does not even contain enough UChars */ + maxSupplementary=length-number; + if(maxSupplementary<=0) { + return FALSE; + } + /* there are maxSupplementary=length-number more UChars than asked-for code points */ + + /* + * count code points until they exceed and also check that there are + * no more than maxSupplementary supplementary code points (UChar pairs) + */ + limit=s+length; + for(;;) { + if(s==limit) { + return FALSE; + } + if(number==0) { + return TRUE; + } + if(U16_IS_LEAD(*s++) && s!=limit && U16_IS_TRAIL(*s)) { + ++s; + if(--maxSupplementary<=0) { + /* too many pairs - too few code points */ + return FALSE; + } + } + --number; + } + } +} + +U_CAPI UChar * U_EXPORT2 +u_memcpy(UChar *dest, const UChar *src, int32_t count) { + if(count > 0) { + uprv_memcpy(dest, src, (size_t)count*U_SIZEOF_UCHAR); + } + return dest; +} + +U_CAPI UChar * U_EXPORT2 +u_memmove(UChar *dest, const UChar *src, int32_t count) { + if(count > 0) { + uprv_memmove(dest, src, (size_t)count*U_SIZEOF_UCHAR); + } + return dest; +} + +U_CAPI UChar * U_EXPORT2 +u_memset(UChar *dest, UChar c, int32_t count) { + if(count > 0) { + UChar *ptr = dest; + UChar *limit = dest + count; + + while (ptr < limit) { + *(ptr++) = c; + } + } + return dest; +} + +U_CAPI int32_t U_EXPORT2 +u_memcmp(const UChar *buf1, const UChar *buf2, int32_t count) { + if(count > 0) { + const UChar *limit = buf1 + count; + int32_t result; + + while (buf1 < limit) { + result = (int32_t)(uint16_t)*buf1 - (int32_t)(uint16_t)*buf2; + if (result != 0) { + return result; + } + buf1++; + buf2++; + } + } + return 0; +} + +U_CAPI int32_t U_EXPORT2 +u_memcmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t count) { + return uprv_strCompare(s1, count, s2, count, FALSE, TRUE); +} + +/* u_unescape & support fns ------------------------------------------------- */ + +/* This map must be in ASCENDING ORDER OF THE ESCAPE CODE */ +static const UChar UNESCAPE_MAP[] = { + /*" 0x22, 0x22 */ + /*' 0x27, 0x27 */ + /*? 0x3F, 0x3F */ + /*\ 0x5C, 0x5C */ + /*a*/ 0x61, 0x07, + /*b*/ 0x62, 0x08, + /*e*/ 0x65, 0x1b, + /*f*/ 0x66, 0x0c, + /*n*/ 0x6E, 0x0a, + /*r*/ 0x72, 0x0d, + /*t*/ 0x74, 0x09, + /*v*/ 0x76, 0x0b +}; +enum { UNESCAPE_MAP_LENGTH = UPRV_LENGTHOF(UNESCAPE_MAP) }; + +/* Convert one octal digit to a numeric value 0..7, or -1 on failure */ +static int8_t _digit8(UChar c) { + if (c >= 0x0030 && c <= 0x0037) { + return (int8_t)(c - 0x0030); + } + return -1; +} + +/* Convert one hex digit to a numeric value 0..F, or -1 on failure */ +static int8_t _digit16(UChar c) { + if (c >= 0x0030 && c <= 0x0039) { + return (int8_t)(c - 0x0030); + } + if (c >= 0x0041 && c <= 0x0046) { + return (int8_t)(c - (0x0041 - 10)); + } + if (c >= 0x0061 && c <= 0x0066) { + return (int8_t)(c - (0x0061 - 10)); + } + return -1; +} + +/* Parse a single escape sequence. Although this method deals in + * UChars, it does not use C++ or UnicodeString. This allows it to + * be used from C contexts. */ +U_CAPI UChar32 U_EXPORT2 +u_unescapeAt(UNESCAPE_CHAR_AT charAt, + int32_t *offset, + int32_t length, + void *context) { + + int32_t start = *offset; + UChar c; + UChar32 result = 0; + int8_t n = 0; + int8_t minDig = 0; + int8_t maxDig = 0; + int8_t bitsPerDigit = 4; + int8_t dig; + int32_t i; + UBool braces = FALSE; + + /* Check that offset is in range */ + if (*offset < 0 || *offset >= length) { + goto err; + } + + /* Fetch first UChar after '\\' */ + c = charAt((*offset)++, context); + + /* Convert hexadecimal and octal escapes */ + switch (c) { + case 0x0075 /*'u'*/: + minDig = maxDig = 4; + break; + case 0x0055 /*'U'*/: + minDig = maxDig = 8; + break; + case 0x0078 /*'x'*/: + minDig = 1; + if (*offset < length && charAt(*offset, context) == 0x7B /*{*/) { + ++(*offset); + braces = TRUE; + maxDig = 8; + } else { + maxDig = 2; + } + break; + default: + dig = _digit8(c); + if (dig >= 0) { + minDig = 1; + maxDig = 3; + n = 1; /* Already have first octal digit */ + bitsPerDigit = 3; + result = dig; + } + break; + } + if (minDig != 0) { + while (*offset < length && n < maxDig) { + c = charAt(*offset, context); + dig = (int8_t)((bitsPerDigit == 3) ? _digit8(c) : _digit16(c)); + if (dig < 0) { + break; + } + result = (result << bitsPerDigit) | dig; + ++(*offset); + ++n; + } + if (n < minDig) { + goto err; + } + if (braces) { + if (c != 0x7D /*}*/) { + goto err; + } + ++(*offset); + } + if (result < 0 || result >= 0x110000) { + goto err; + } + /* If an escape sequence specifies a lead surrogate, see if + * there is a trail surrogate after it, either as an escape or + * as a literal. If so, join them up into a supplementary. + */ + if (*offset < length && U16_IS_LEAD(result)) { + int32_t ahead = *offset + 1; + c = charAt(*offset, context); + if (c == 0x5C /*'\\'*/ && ahead < length) { + // Calling u_unescapeAt recursively may cause a stack overflow if + // we have repeated surrogate lead after that. Limit the + // length to 5 ('u' and 4 hex) after ahead. + int32_t tailLimit = ahead + 5; + if (tailLimit > length) { + tailLimit = length; + } + c = (UChar) u_unescapeAt(charAt, &ahead, tailLimit, + context); + } + if (U16_IS_TRAIL(c)) { + *offset = ahead; + result = U16_GET_SUPPLEMENTARY(result, c); + } + } + return result; + } + + /* Convert C-style escapes in table */ + for (i=0; i<UNESCAPE_MAP_LENGTH; i+=2) { + if (c == UNESCAPE_MAP[i]) { + return UNESCAPE_MAP[i+1]; + } else if (c < UNESCAPE_MAP[i]) { + break; + } + } + + /* Map \cX to control-X: X & 0x1F */ + if (c == 0x0063 /*'c'*/ && *offset < length) { + c = charAt((*offset)++, context); + if (U16_IS_LEAD(c) && *offset < length) { + UChar c2 = charAt(*offset, context); + if (U16_IS_TRAIL(c2)) { + ++(*offset); + c = (UChar) U16_GET_SUPPLEMENTARY(c, c2); /* [sic] */ + } + } + return 0x1F & c; + } + + /* If no special forms are recognized, then consider + * the backslash to generically escape the next character. + * Deal with surrogate pairs. */ + if (U16_IS_LEAD(c) && *offset < length) { + UChar c2 = charAt(*offset, context); + if (U16_IS_TRAIL(c2)) { + ++(*offset); + return U16_GET_SUPPLEMENTARY(c, c2); + } + } + return c; + + err: + /* Invalid escape sequence */ + *offset = start; /* Reset to initial value */ + return (UChar32)0xFFFFFFFF; +} + +/* u_unescapeAt() callback to return a UChar from a char* */ +static UChar U_CALLCONV +_charPtr_charAt(int32_t offset, void *context) { + UChar c16; + /* It would be more efficient to access the invariant tables + * directly but there is no API for that. */ + u_charsToUChars(((char*) context) + offset, &c16, 1); + return c16; +} + +/* Append an escape-free segment of the text; used by u_unescape() */ +static void _appendUChars(UChar *dest, int32_t destCapacity, + const char *src, int32_t srcLen) { + if (destCapacity < 0) { + destCapacity = 0; + } + if (srcLen > destCapacity) { + srcLen = destCapacity; + } + u_charsToUChars(src, dest, srcLen); +} + +/* Do an invariant conversion of char* -> UChar*, with escape parsing */ +U_CAPI int32_t U_EXPORT2 +u_unescape(const char *src, UChar *dest, int32_t destCapacity) { + const char *segment = src; + int32_t i = 0; + char c; + + while ((c=*src) != 0) { + /* '\\' intentionally written as compiler-specific + * character constant to correspond to compiler-specific + * char* constants. */ + if (c == '\\') { + int32_t lenParsed = 0; + UChar32 c32; + if (src != segment) { + if (dest != NULL) { + _appendUChars(dest + i, destCapacity - i, + segment, (int32_t)(src - segment)); + } + i += (int32_t)(src - segment); + } + ++src; /* advance past '\\' */ + c32 = (UChar32)u_unescapeAt(_charPtr_charAt, &lenParsed, (int32_t)uprv_strlen(src), (void*)src); + if (lenParsed == 0) { + goto err; + } + src += lenParsed; /* advance past escape seq. */ + if (dest != NULL && U16_LENGTH(c32) <= (destCapacity - i)) { + U16_APPEND_UNSAFE(dest, i, c32); + } else { + i += U16_LENGTH(c32); + } + segment = src; + } else { + ++src; + } + } + if (src != segment) { + if (dest != NULL) { + _appendUChars(dest + i, destCapacity - i, + segment, (int32_t)(src - segment)); + } + i += (int32_t)(src - segment); + } + if (dest != NULL && i < destCapacity) { + dest[i] = 0; + } + return i; + + err: + if (dest != NULL && destCapacity > 0) { + *dest = 0; + } + return 0; +} + +/* NUL-termination of strings ----------------------------------------------- */ + +/** + * NUL-terminate a string no matter what its type. + * Set warning and error codes accordingly. + */ +#define __TERMINATE_STRING(dest, destCapacity, length, pErrorCode) UPRV_BLOCK_MACRO_BEGIN { \ + if(pErrorCode!=NULL && U_SUCCESS(*pErrorCode)) { \ + /* not a public function, so no complete argument checking */ \ + \ + if(length<0) { \ + /* assume that the caller handles this */ \ + } else if(length<destCapacity) { \ + /* NUL-terminate the string, the NUL fits */ \ + dest[length]=0; \ + /* unset the not-terminated warning but leave all others */ \ + if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) { \ + *pErrorCode=U_ZERO_ERROR; \ + } \ + } else if(length==destCapacity) { \ + /* unable to NUL-terminate, but the string itself fit - set a warning code */ \ + *pErrorCode=U_STRING_NOT_TERMINATED_WARNING; \ + } else /* length>destCapacity */ { \ + /* even the string itself did not fit - set an error code */ \ + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; \ + } \ + } \ +} UPRV_BLOCK_MACRO_END + +U_CAPI UChar U_EXPORT2 +u_asciiToUpper(UChar c) { + if (u'a' <= c && c <= u'z') { + c = c + u'A' - u'a'; + } + return c; +} + +U_CAPI int32_t U_EXPORT2 +u_terminateUChars(UChar *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) { + __TERMINATE_STRING(dest, destCapacity, length, pErrorCode); + return length; +} + +U_CAPI int32_t U_EXPORT2 +u_terminateChars(char *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) { + __TERMINATE_STRING(dest, destCapacity, length, pErrorCode); + return length; +} + +U_CAPI int32_t U_EXPORT2 +u_terminateUChar32s(UChar32 *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) { + __TERMINATE_STRING(dest, destCapacity, length, pErrorCode); + return length; +} + +U_CAPI int32_t U_EXPORT2 +u_terminateWChars(wchar_t *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) { + __TERMINATE_STRING(dest, destCapacity, length, pErrorCode); + return length; +} + +// Compute the hash code for a string -------------------------------------- *** + +// Moved here from uhash.c so that UnicodeString::hashCode() does not depend +// on UHashtable code. + +/* + Compute the hash by iterating sparsely over about 32 (up to 63) + characters spaced evenly through the string. For each character, + multiply the previous hash value by a prime number and add the new + character in, like a linear congruential random number generator, + producing a pseudorandom deterministic value well distributed over + the output range. [LIU] +*/ + +#define STRING_HASH(TYPE, STR, STRLEN, DEREF) UPRV_BLOCK_MACRO_BEGIN { \ + uint32_t hash = 0; \ + const TYPE *p = (const TYPE*) STR; \ + if (p != NULL) { \ + int32_t len = (int32_t)(STRLEN); \ + int32_t inc = ((len - 32) / 32) + 1; \ + const TYPE *limit = p + len; \ + while (p<limit) { \ + hash = (hash * 37) + DEREF; \ + p += inc; \ + } \ + } \ + return static_cast<int32_t>(hash); \ +} UPRV_BLOCK_MACRO_END + +/* Used by UnicodeString to compute its hashcode - Not public API. */ +U_CAPI int32_t U_EXPORT2 +ustr_hashUCharsN(const UChar *str, int32_t length) { + STRING_HASH(UChar, str, length, *p); +} + +U_CAPI int32_t U_EXPORT2 +ustr_hashCharsN(const char *str, int32_t length) { + STRING_HASH(uint8_t, str, length, *p); +} + +U_CAPI int32_t U_EXPORT2 +ustr_hashICharsN(const char *str, int32_t length) { + STRING_HASH(char, str, length, (uint8_t)uprv_tolower(*p)); +} diff --git a/thirdparty/icu4c/common/ustrtrns.cpp b/thirdparty/icu4c/common/ustrtrns.cpp new file mode 100644 index 0000000000..5dc032c02f --- /dev/null +++ b/thirdparty/icu4c/common/ustrtrns.cpp @@ -0,0 +1,1451 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 2001-2016, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* +* File ustrtrns.cpp +* +* Modification History: +* +* Date Name Description +* 9/10/2001 Ram Creation. +****************************************************************************** +*/ + +/******************************************************************************* + * + * u_strTo* and u_strFrom* APIs + * WCS functions moved to ustr_wcs.c for better modularization + * + ******************************************************************************* + */ + + +#include "unicode/putil.h" +#include "unicode/ustring.h" +#include "unicode/utf.h" +#include "unicode/utf8.h" +#include "unicode/utf16.h" +#include "cstring.h" +#include "cmemory.h" +#include "ustr_imp.h" +#include "uassert.h" + +U_CAPI UChar* U_EXPORT2 +u_strFromUTF32WithSub(UChar *dest, + int32_t destCapacity, + int32_t *pDestLength, + const UChar32 *src, + int32_t srcLength, + UChar32 subchar, int32_t *pNumSubstitutions, + UErrorCode *pErrorCode) { + const UChar32 *srcLimit; + UChar32 ch; + UChar *destLimit; + UChar *pDest; + int32_t reqLength; + int32_t numSubstitutions; + + /* args check */ + if(U_FAILURE(*pErrorCode)){ + return NULL; + } + if( (src==NULL && srcLength!=0) || srcLength < -1 || + (destCapacity<0) || (dest == NULL && destCapacity > 0) || + subchar > 0x10ffff || U_IS_SURROGATE(subchar) + ) { + *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + + if(pNumSubstitutions != NULL) { + *pNumSubstitutions = 0; + } + + pDest = dest; + destLimit = (dest!=NULL)?(dest + destCapacity):NULL; + reqLength = 0; + numSubstitutions = 0; + + if(srcLength < 0) { + /* simple loop for conversion of a NUL-terminated BMP string */ + while((ch=*src) != 0 && + ((uint32_t)ch < 0xd800 || (0xe000 <= ch && ch <= 0xffff))) { + ++src; + if(pDest < destLimit) { + *pDest++ = (UChar)ch; + } else { + ++reqLength; + } + } + srcLimit = src; + if(ch != 0) { + /* "complicated" case, find the end of the remaining string */ + while(*++srcLimit != 0) {} + } + } else { + srcLimit = (src!=NULL)?(src + srcLength):NULL; + } + + /* convert with length */ + while(src < srcLimit) { + ch = *src++; + do { + /* usually "loops" once; twice only for writing subchar */ + if((uint32_t)ch < 0xd800 || (0xe000 <= ch && ch <= 0xffff)) { + if(pDest < destLimit) { + *pDest++ = (UChar)ch; + } else { + ++reqLength; + } + break; + } else if(0x10000 <= ch && ch <= 0x10ffff) { + if(pDest!=NULL && ((pDest + 2) <= destLimit)) { + *pDest++ = U16_LEAD(ch); + *pDest++ = U16_TRAIL(ch); + } else { + reqLength += 2; + } + break; + } else if((ch = subchar) < 0) { + /* surrogate code point, or not a Unicode code point at all */ + *pErrorCode = U_INVALID_CHAR_FOUND; + return NULL; + } else { + ++numSubstitutions; + } + } while(TRUE); + } + + reqLength += (int32_t)(pDest - dest); + if(pDestLength) { + *pDestLength = reqLength; + } + if(pNumSubstitutions != NULL) { + *pNumSubstitutions = numSubstitutions; + } + + /* Terminate the buffer */ + u_terminateUChars(dest, destCapacity, reqLength, pErrorCode); + + return dest; +} + +U_CAPI UChar* U_EXPORT2 +u_strFromUTF32(UChar *dest, + int32_t destCapacity, + int32_t *pDestLength, + const UChar32 *src, + int32_t srcLength, + UErrorCode *pErrorCode) { + return u_strFromUTF32WithSub( + dest, destCapacity, pDestLength, + src, srcLength, + U_SENTINEL, NULL, + pErrorCode); +} + +U_CAPI UChar32* U_EXPORT2 +u_strToUTF32WithSub(UChar32 *dest, + int32_t destCapacity, + int32_t *pDestLength, + const UChar *src, + int32_t srcLength, + UChar32 subchar, int32_t *pNumSubstitutions, + UErrorCode *pErrorCode) { + const UChar *srcLimit; + UChar32 ch; + UChar ch2; + UChar32 *destLimit; + UChar32 *pDest; + int32_t reqLength; + int32_t numSubstitutions; + + /* args check */ + if(U_FAILURE(*pErrorCode)){ + return NULL; + } + if( (src==NULL && srcLength!=0) || srcLength < -1 || + (destCapacity<0) || (dest == NULL && destCapacity > 0) || + subchar > 0x10ffff || U_IS_SURROGATE(subchar) + ) { + *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + + if(pNumSubstitutions != NULL) { + *pNumSubstitutions = 0; + } + + pDest = dest; + destLimit = (dest!=NULL)?(dest + destCapacity):NULL; + reqLength = 0; + numSubstitutions = 0; + + if(srcLength < 0) { + /* simple loop for conversion of a NUL-terminated BMP string */ + while((ch=*src) != 0 && !U16_IS_SURROGATE(ch)) { + ++src; + if(pDest < destLimit) { + *pDest++ = ch; + } else { + ++reqLength; + } + } + srcLimit = src; + if(ch != 0) { + /* "complicated" case, find the end of the remaining string */ + while(*++srcLimit != 0) {} + } + } else { + srcLimit = (src!=NULL)?(src + srcLength):NULL; + } + + /* convert with length */ + while(src < srcLimit) { + ch = *src++; + if(!U16_IS_SURROGATE(ch)) { + /* write or count ch below */ + } else if(U16_IS_SURROGATE_LEAD(ch) && src < srcLimit && U16_IS_TRAIL(ch2 = *src)) { + ++src; + ch = U16_GET_SUPPLEMENTARY(ch, ch2); + } else if((ch = subchar) < 0) { + /* unpaired surrogate */ + *pErrorCode = U_INVALID_CHAR_FOUND; + return NULL; + } else { + ++numSubstitutions; + } + if(pDest < destLimit) { + *pDest++ = ch; + } else { + ++reqLength; + } + } + + reqLength += (int32_t)(pDest - dest); + if(pDestLength) { + *pDestLength = reqLength; + } + if(pNumSubstitutions != NULL) { + *pNumSubstitutions = numSubstitutions; + } + + /* Terminate the buffer */ + u_terminateUChar32s(dest, destCapacity, reqLength, pErrorCode); + + return dest; +} + +U_CAPI UChar32* U_EXPORT2 +u_strToUTF32(UChar32 *dest, + int32_t destCapacity, + int32_t *pDestLength, + const UChar *src, + int32_t srcLength, + UErrorCode *pErrorCode) { + return u_strToUTF32WithSub( + dest, destCapacity, pDestLength, + src, srcLength, + U_SENTINEL, NULL, + pErrorCode); +} + +U_CAPI UChar* U_EXPORT2 +u_strFromUTF8WithSub(UChar *dest, + int32_t destCapacity, + int32_t *pDestLength, + const char* src, + int32_t srcLength, + UChar32 subchar, int32_t *pNumSubstitutions, + UErrorCode *pErrorCode){ + /* args check */ + if(U_FAILURE(*pErrorCode)) { + return NULL; + } + if( (src==NULL && srcLength!=0) || srcLength < -1 || + (destCapacity<0) || (dest == NULL && destCapacity > 0) || + subchar > 0x10ffff || U_IS_SURROGATE(subchar) + ) { + *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + + if(pNumSubstitutions!=NULL) { + *pNumSubstitutions=0; + } + UChar *pDest = dest; + UChar *pDestLimit = dest+destCapacity; + int32_t reqLength = 0; + int32_t numSubstitutions=0; + + /* + * Inline processing of UTF-8 byte sequences: + * + * Byte sequences for the most common characters are handled inline in + * the conversion loops. In order to reduce the path lengths for those + * characters, the tests are arranged in a kind of binary search. + * ASCII (<=0x7f) is checked first, followed by the dividing point + * between 2- and 3-byte sequences (0xe0). + * The 3-byte branch is tested first to speed up CJK text. + * The compiler should combine the subtractions for the two tests for 0xe0. + * Each branch then tests for the other end of its range. + */ + + if(srcLength < 0){ + /* + * Transform a NUL-terminated string. + * The code explicitly checks for NULs only in the lead byte position. + * A NUL byte in the trail byte position fails the trail byte range check anyway. + */ + int32_t i; + UChar32 c; + for(i = 0; (c = (uint8_t)src[i]) != 0 && (pDest < pDestLimit);) { + // modified copy of U8_NEXT() + ++i; + if(U8_IS_SINGLE(c)) { + *pDest++=(UChar)c; + } else { + uint8_t __t1, __t2; + if( /* handle U+0800..U+FFFF inline */ + (0xe0<=(c) && (c)<0xf0) && + U8_IS_VALID_LEAD3_AND_T1((c), src[i]) && + (__t2=src[(i)+1]-0x80)<=0x3f) { + *pDest++ = (((c)&0xf)<<12)|((src[i]&0x3f)<<6)|__t2; + i+=2; + } else if( /* handle U+0080..U+07FF inline */ + ((c)<0xe0 && (c)>=0xc2) && + (__t1=src[i]-0x80)<=0x3f) { + *pDest++ = (((c)&0x1f)<<6)|__t1; + ++(i); + } else { + /* function call for "complicated" and error cases */ + (c)=utf8_nextCharSafeBody((const uint8_t *)src, &(i), -1, c, -1); + if(c<0 && (++numSubstitutions, c = subchar) < 0) { + *pErrorCode = U_INVALID_CHAR_FOUND; + return NULL; + } else if(c<=0xFFFF) { + *(pDest++)=(UChar)c; + } else { + *(pDest++)=U16_LEAD(c); + if(pDest<pDestLimit) { + *(pDest++)=U16_TRAIL(c); + } else { + reqLength++; + break; + } + } + } + } + } + + /* Pre-flight the rest of the string. */ + while((c = (uint8_t)src[i]) != 0) { + // modified copy of U8_NEXT() + ++i; + if(U8_IS_SINGLE(c)) { + ++reqLength; + } else { + uint8_t __t1, __t2; + if( /* handle U+0800..U+FFFF inline */ + (0xe0<=(c) && (c)<0xf0) && + U8_IS_VALID_LEAD3_AND_T1((c), src[i]) && + (__t2=src[(i)+1]-0x80)<=0x3f) { + ++reqLength; + i+=2; + } else if( /* handle U+0080..U+07FF inline */ + ((c)<0xe0 && (c)>=0xc2) && + (__t1=src[i]-0x80)<=0x3f) { + ++reqLength; + ++(i); + } else { + /* function call for "complicated" and error cases */ + (c)=utf8_nextCharSafeBody((const uint8_t *)src, &(i), -1, c, -1); + if(c<0 && (++numSubstitutions, c = subchar) < 0) { + *pErrorCode = U_INVALID_CHAR_FOUND; + return NULL; + } + reqLength += U16_LENGTH(c); + } + } + } + } else /* srcLength >= 0 */ { + /* Faster loop without ongoing checking for srcLength and pDestLimit. */ + int32_t i = 0; + UChar32 c; + for(;;) { + /* + * Each iteration of the inner loop progresses by at most 3 UTF-8 + * bytes and one UChar, for most characters. + * For supplementary code points (4 & 2), which are rare, + * there is an additional adjustment. + */ + int32_t count = (int32_t)(pDestLimit - pDest); + int32_t count2 = (srcLength - i) / 3; + if(count > count2) { + count = count2; /* min(remaining dest, remaining src/3) */ + } + if(count < 3) { + /* + * Too much overhead if we get near the end of the string, + * continue with the next loop. + */ + break; + } + + do { + // modified copy of U8_NEXT() + c = (uint8_t)src[i++]; + if(U8_IS_SINGLE(c)) { + *pDest++=(UChar)c; + } else { + uint8_t __t1, __t2; + if( /* handle U+0800..U+FFFF inline */ + (0xe0<=(c) && (c)<0xf0) && + ((i)+1)<srcLength && + U8_IS_VALID_LEAD3_AND_T1((c), src[i]) && + (__t2=src[(i)+1]-0x80)<=0x3f) { + *pDest++ = (((c)&0xf)<<12)|((src[i]&0x3f)<<6)|__t2; + i+=2; + } else if( /* handle U+0080..U+07FF inline */ + ((c)<0xe0 && (c)>=0xc2) && + ((i)!=srcLength) && + (__t1=src[i]-0x80)<=0x3f) { + *pDest++ = (((c)&0x1f)<<6)|__t1; + ++(i); + } else { + if(c >= 0xf0 || subchar > 0xffff) { + // We may read up to four bytes and write up to two UChars, + // which we didn't account for with computing count, + // so we adjust it here. + if(--count == 0) { + --i; // back out byte c + break; + } + } + + /* function call for "complicated" and error cases */ + (c)=utf8_nextCharSafeBody((const uint8_t *)src, &(i), srcLength, c, -1); + if(c<0 && (++numSubstitutions, c = subchar) < 0) { + *pErrorCode = U_INVALID_CHAR_FOUND; + return NULL; + } else if(c<=0xFFFF) { + *(pDest++)=(UChar)c; + } else { + *(pDest++)=U16_LEAD(c); + *(pDest++)=U16_TRAIL(c); + } + } + } + } while(--count > 0); + } + + while(i < srcLength && (pDest < pDestLimit)) { + // modified copy of U8_NEXT() + c = (uint8_t)src[i++]; + if(U8_IS_SINGLE(c)) { + *pDest++=(UChar)c; + } else { + uint8_t __t1, __t2; + if( /* handle U+0800..U+FFFF inline */ + (0xe0<=(c) && (c)<0xf0) && + ((i)+1)<srcLength && + U8_IS_VALID_LEAD3_AND_T1((c), src[i]) && + (__t2=src[(i)+1]-0x80)<=0x3f) { + *pDest++ = (((c)&0xf)<<12)|((src[i]&0x3f)<<6)|__t2; + i+=2; + } else if( /* handle U+0080..U+07FF inline */ + ((c)<0xe0 && (c)>=0xc2) && + ((i)!=srcLength) && + (__t1=src[i]-0x80)<=0x3f) { + *pDest++ = (((c)&0x1f)<<6)|__t1; + ++(i); + } else { + /* function call for "complicated" and error cases */ + (c)=utf8_nextCharSafeBody((const uint8_t *)src, &(i), srcLength, c, -1); + if(c<0 && (++numSubstitutions, c = subchar) < 0) { + *pErrorCode = U_INVALID_CHAR_FOUND; + return NULL; + } else if(c<=0xFFFF) { + *(pDest++)=(UChar)c; + } else { + *(pDest++)=U16_LEAD(c); + if(pDest<pDestLimit) { + *(pDest++)=U16_TRAIL(c); + } else { + reqLength++; + break; + } + } + } + } + } + + /* Pre-flight the rest of the string. */ + while(i < srcLength) { + // modified copy of U8_NEXT() + c = (uint8_t)src[i++]; + if(U8_IS_SINGLE(c)) { + ++reqLength; + } else { + uint8_t __t1, __t2; + if( /* handle U+0800..U+FFFF inline */ + (0xe0<=(c) && (c)<0xf0) && + ((i)+1)<srcLength && + U8_IS_VALID_LEAD3_AND_T1((c), src[i]) && + (__t2=src[(i)+1]-0x80)<=0x3f) { + ++reqLength; + i+=2; + } else if( /* handle U+0080..U+07FF inline */ + ((c)<0xe0 && (c)>=0xc2) && + ((i)!=srcLength) && + (__t1=src[i]-0x80)<=0x3f) { + ++reqLength; + ++(i); + } else { + /* function call for "complicated" and error cases */ + (c)=utf8_nextCharSafeBody((const uint8_t *)src, &(i), srcLength, c, -1); + if(c<0 && (++numSubstitutions, c = subchar) < 0) { + *pErrorCode = U_INVALID_CHAR_FOUND; + return NULL; + } + reqLength += U16_LENGTH(c); + } + } + } + } + + reqLength+=(int32_t)(pDest - dest); + + if(pNumSubstitutions!=NULL) { + *pNumSubstitutions=numSubstitutions; + } + + if(pDestLength){ + *pDestLength = reqLength; + } + + /* Terminate the buffer */ + u_terminateUChars(dest,destCapacity,reqLength,pErrorCode); + + return dest; +} + +U_CAPI UChar* U_EXPORT2 +u_strFromUTF8(UChar *dest, + int32_t destCapacity, + int32_t *pDestLength, + const char* src, + int32_t srcLength, + UErrorCode *pErrorCode){ + return u_strFromUTF8WithSub( + dest, destCapacity, pDestLength, + src, srcLength, + U_SENTINEL, NULL, + pErrorCode); +} + +U_CAPI UChar * U_EXPORT2 +u_strFromUTF8Lenient(UChar *dest, + int32_t destCapacity, + int32_t *pDestLength, + const char *src, + int32_t srcLength, + UErrorCode *pErrorCode) { + UChar *pDest = dest; + UChar32 ch; + int32_t reqLength = 0; + uint8_t* pSrc = (uint8_t*) src; + + /* args check */ + if(U_FAILURE(*pErrorCode)){ + return NULL; + } + + if( (src==NULL && srcLength!=0) || srcLength < -1 || + (destCapacity<0) || (dest == NULL && destCapacity > 0) + ) { + *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + + if(srcLength < 0) { + /* Transform a NUL-terminated string. */ + UChar *pDestLimit = (dest!=NULL)?(dest+destCapacity):NULL; + uint8_t t1, t2, t3; /* trail bytes */ + + while(((ch = *pSrc) != 0) && (pDest < pDestLimit)) { + if(ch < 0xc0) { + /* + * ASCII, or a trail byte in lead position which is treated like + * a single-byte sequence for better character boundary + * resynchronization after illegal sequences. + */ + *pDest++=(UChar)ch; + ++pSrc; + continue; + } else if(ch < 0xe0) { /* U+0080..U+07FF */ + if((t1 = pSrc[1]) != 0) { + /* 0x3080 = (0xc0 << 6) + 0x80 */ + *pDest++ = (UChar)((ch << 6) + t1 - 0x3080); + pSrc += 2; + continue; + } + } else if(ch < 0xf0) { /* U+0800..U+FFFF */ + if((t1 = pSrc[1]) != 0 && (t2 = pSrc[2]) != 0) { + /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ + /* 0x2080 = (0x80 << 6) + 0x80 */ + *pDest++ = (UChar)((ch << 12) + (t1 << 6) + t2 - 0x2080); + pSrc += 3; + continue; + } + } else /* f0..f4 */ { /* U+10000..U+10FFFF */ + if((t1 = pSrc[1]) != 0 && (t2 = pSrc[2]) != 0 && (t3 = pSrc[3]) != 0) { + pSrc += 4; + /* 0x3c82080 = (0xf0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */ + ch = (ch << 18) + (t1 << 12) + (t2 << 6) + t3 - 0x3c82080; + *(pDest++) = U16_LEAD(ch); + if(pDest < pDestLimit) { + *(pDest++) = U16_TRAIL(ch); + } else { + reqLength = 1; + break; + } + continue; + } + } + + /* truncated character at the end */ + *pDest++ = 0xfffd; + while(*++pSrc != 0) {} + break; + } + + /* Pre-flight the rest of the string. */ + while((ch = *pSrc) != 0) { + if(ch < 0xc0) { + /* + * ASCII, or a trail byte in lead position which is treated like + * a single-byte sequence for better character boundary + * resynchronization after illegal sequences. + */ + ++reqLength; + ++pSrc; + continue; + } else if(ch < 0xe0) { /* U+0080..U+07FF */ + if(pSrc[1] != 0) { + ++reqLength; + pSrc += 2; + continue; + } + } else if(ch < 0xf0) { /* U+0800..U+FFFF */ + if(pSrc[1] != 0 && pSrc[2] != 0) { + ++reqLength; + pSrc += 3; + continue; + } + } else /* f0..f4 */ { /* U+10000..U+10FFFF */ + if(pSrc[1] != 0 && pSrc[2] != 0 && pSrc[3] != 0) { + reqLength += 2; + pSrc += 4; + continue; + } + } + + /* truncated character at the end */ + ++reqLength; + break; + } + } else /* srcLength >= 0 */ { + const uint8_t *pSrcLimit = (pSrc!=NULL)?(pSrc + srcLength):NULL; + + /* + * This function requires that if srcLength is given, then it must be + * destCapatity >= srcLength so that we need not check for + * destination buffer overflow in the loop. + */ + if(destCapacity < srcLength) { + if(pDestLength != NULL) { + *pDestLength = srcLength; /* this likely overestimates the true destLength! */ + } + *pErrorCode = U_BUFFER_OVERFLOW_ERROR; + return NULL; + } + + if((pSrcLimit - pSrc) >= 4) { + pSrcLimit -= 3; /* temporarily reduce pSrcLimit */ + + /* in this loop, we can always access at least 4 bytes, up to pSrc+3 */ + do { + ch = *pSrc++; + if(ch < 0xc0) { + /* + * ASCII, or a trail byte in lead position which is treated like + * a single-byte sequence for better character boundary + * resynchronization after illegal sequences. + */ + *pDest++=(UChar)ch; + } else if(ch < 0xe0) { /* U+0080..U+07FF */ + /* 0x3080 = (0xc0 << 6) + 0x80 */ + *pDest++ = (UChar)((ch << 6) + *pSrc++ - 0x3080); + } else if(ch < 0xf0) { /* U+0800..U+FFFF */ + /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ + /* 0x2080 = (0x80 << 6) + 0x80 */ + ch = (ch << 12) + (*pSrc++ << 6); + *pDest++ = (UChar)(ch + *pSrc++ - 0x2080); + } else /* f0..f4 */ { /* U+10000..U+10FFFF */ + /* 0x3c82080 = (0xf0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */ + ch = (ch << 18) + (*pSrc++ << 12); + ch += *pSrc++ << 6; + ch += *pSrc++ - 0x3c82080; + *(pDest++) = U16_LEAD(ch); + *(pDest++) = U16_TRAIL(ch); + } + } while(pSrc < pSrcLimit); + + pSrcLimit += 3; /* restore original pSrcLimit */ + } + + while(pSrc < pSrcLimit) { + ch = *pSrc++; + if(ch < 0xc0) { + /* + * ASCII, or a trail byte in lead position which is treated like + * a single-byte sequence for better character boundary + * resynchronization after illegal sequences. + */ + *pDest++=(UChar)ch; + continue; + } else if(ch < 0xe0) { /* U+0080..U+07FF */ + if(pSrc < pSrcLimit) { + /* 0x3080 = (0xc0 << 6) + 0x80 */ + *pDest++ = (UChar)((ch << 6) + *pSrc++ - 0x3080); + continue; + } + } else if(ch < 0xf0) { /* U+0800..U+FFFF */ + if((pSrcLimit - pSrc) >= 2) { + /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ + /* 0x2080 = (0x80 << 6) + 0x80 */ + ch = (ch << 12) + (*pSrc++ << 6); + *pDest++ = (UChar)(ch + *pSrc++ - 0x2080); + pSrc += 3; + continue; + } + } else /* f0..f4 */ { /* U+10000..U+10FFFF */ + if((pSrcLimit - pSrc) >= 3) { + /* 0x3c82080 = (0xf0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */ + ch = (ch << 18) + (*pSrc++ << 12); + ch += *pSrc++ << 6; + ch += *pSrc++ - 0x3c82080; + *(pDest++) = U16_LEAD(ch); + *(pDest++) = U16_TRAIL(ch); + pSrc += 4; + continue; + } + } + + /* truncated character at the end */ + *pDest++ = 0xfffd; + break; + } + } + + reqLength+=(int32_t)(pDest - dest); + + if(pDestLength){ + *pDestLength = reqLength; + } + + /* Terminate the buffer */ + u_terminateUChars(dest,destCapacity,reqLength,pErrorCode); + + return dest; +} + +static inline uint8_t * +_appendUTF8(uint8_t *pDest, UChar32 c) { + /* it is 0<=c<=0x10ffff and not a surrogate if called by a validating function */ + if((c)<=0x7f) { + *pDest++=(uint8_t)c; + } else if(c<=0x7ff) { + *pDest++=(uint8_t)((c>>6)|0xc0); + *pDest++=(uint8_t)((c&0x3f)|0x80); + } else if(c<=0xffff) { + *pDest++=(uint8_t)((c>>12)|0xe0); + *pDest++=(uint8_t)(((c>>6)&0x3f)|0x80); + *pDest++=(uint8_t)(((c)&0x3f)|0x80); + } else /* if((uint32_t)(c)<=0x10ffff) */ { + *pDest++=(uint8_t)(((c)>>18)|0xf0); + *pDest++=(uint8_t)((((c)>>12)&0x3f)|0x80); + *pDest++=(uint8_t)((((c)>>6)&0x3f)|0x80); + *pDest++=(uint8_t)(((c)&0x3f)|0x80); + } + return pDest; +} + + +U_CAPI char* U_EXPORT2 +u_strToUTF8WithSub(char *dest, + int32_t destCapacity, + int32_t *pDestLength, + const UChar *pSrc, + int32_t srcLength, + UChar32 subchar, int32_t *pNumSubstitutions, + UErrorCode *pErrorCode){ + int32_t reqLength=0; + uint32_t ch=0,ch2=0; + uint8_t *pDest = (uint8_t *)dest; + uint8_t *pDestLimit = (pDest!=NULL)?(pDest + destCapacity):NULL; + int32_t numSubstitutions; + + /* args check */ + if(U_FAILURE(*pErrorCode)){ + return NULL; + } + + if( (pSrc==NULL && srcLength!=0) || srcLength < -1 || + (destCapacity<0) || (dest == NULL && destCapacity > 0) || + subchar > 0x10ffff || U_IS_SURROGATE(subchar) + ) { + *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + + if(pNumSubstitutions!=NULL) { + *pNumSubstitutions=0; + } + numSubstitutions=0; + + if(srcLength==-1) { + while((ch=*pSrc)!=0) { + ++pSrc; + if(ch <= 0x7f) { + if(pDest<pDestLimit) { + *pDest++ = (uint8_t)ch; + } else { + reqLength = 1; + break; + } + } else if(ch <= 0x7ff) { + if((pDestLimit - pDest) >= 2) { + *pDest++=(uint8_t)((ch>>6)|0xc0); + *pDest++=(uint8_t)((ch&0x3f)|0x80); + } else { + reqLength = 2; + break; + } + } else if(ch <= 0xd7ff || ch >= 0xe000) { + if((pDestLimit - pDest) >= 3) { + *pDest++=(uint8_t)((ch>>12)|0xe0); + *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80); + *pDest++=(uint8_t)((ch&0x3f)|0x80); + } else { + reqLength = 3; + break; + } + } else /* ch is a surrogate */ { + int32_t length; + + /*need not check for NUL because NUL fails U16_IS_TRAIL() anyway*/ + if(U16_IS_SURROGATE_LEAD(ch) && U16_IS_TRAIL(ch2=*pSrc)) { + ++pSrc; + ch=U16_GET_SUPPLEMENTARY(ch, ch2); + } else if(subchar>=0) { + ch=subchar; + ++numSubstitutions; + } else { + /* Unicode 3.2 forbids surrogate code points in UTF-8 */ + *pErrorCode = U_INVALID_CHAR_FOUND; + return NULL; + } + + length = U8_LENGTH(ch); + if((pDestLimit - pDest) >= length) { + /* convert and append*/ + pDest=_appendUTF8(pDest, ch); + } else { + reqLength = length; + break; + } + } + } + while((ch=*pSrc++)!=0) { + if(ch<=0x7f) { + ++reqLength; + } else if(ch<=0x7ff) { + reqLength+=2; + } else if(!U16_IS_SURROGATE(ch)) { + reqLength+=3; + } else if(U16_IS_SURROGATE_LEAD(ch) && U16_IS_TRAIL(ch2=*pSrc)) { + ++pSrc; + reqLength+=4; + } else if(subchar>=0) { + reqLength+=U8_LENGTH(subchar); + ++numSubstitutions; + } else { + /* Unicode 3.2 forbids surrogate code points in UTF-8 */ + *pErrorCode = U_INVALID_CHAR_FOUND; + return NULL; + } + } + } else { + const UChar *pSrcLimit = (pSrc!=NULL)?(pSrc+srcLength):NULL; + int32_t count; + + /* Faster loop without ongoing checking for pSrcLimit and pDestLimit. */ + for(;;) { + /* + * Each iteration of the inner loop progresses by at most 3 UTF-8 + * bytes and one UChar, for most characters. + * For supplementary code points (4 & 2), which are rare, + * there is an additional adjustment. + */ + count = (int32_t)((pDestLimit - pDest) / 3); + srcLength = (int32_t)(pSrcLimit - pSrc); + if(count > srcLength) { + count = srcLength; /* min(remaining dest/3, remaining src) */ + } + if(count < 3) { + /* + * Too much overhead if we get near the end of the string, + * continue with the next loop. + */ + break; + } + do { + ch=*pSrc++; + if(ch <= 0x7f) { + *pDest++ = (uint8_t)ch; + } else if(ch <= 0x7ff) { + *pDest++=(uint8_t)((ch>>6)|0xc0); + *pDest++=(uint8_t)((ch&0x3f)|0x80); + } else if(ch <= 0xd7ff || ch >= 0xe000) { + *pDest++=(uint8_t)((ch>>12)|0xe0); + *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80); + *pDest++=(uint8_t)((ch&0x3f)|0x80); + } else /* ch is a surrogate */ { + /* + * We will read two UChars and probably output four bytes, + * which we didn't account for with computing count, + * so we adjust it here. + */ + if(--count == 0) { + --pSrc; /* undo ch=*pSrc++ for the lead surrogate */ + break; /* recompute count */ + } + + if(U16_IS_SURROGATE_LEAD(ch) && U16_IS_TRAIL(ch2=*pSrc)) { + ++pSrc; + ch=U16_GET_SUPPLEMENTARY(ch, ch2); + + /* writing 4 bytes per 2 UChars is ok */ + *pDest++=(uint8_t)((ch>>18)|0xf0); + *pDest++=(uint8_t)(((ch>>12)&0x3f)|0x80); + *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80); + *pDest++=(uint8_t)((ch&0x3f)|0x80); + } else { + /* Unicode 3.2 forbids surrogate code points in UTF-8 */ + if(subchar>=0) { + ch=subchar; + ++numSubstitutions; + } else { + *pErrorCode = U_INVALID_CHAR_FOUND; + return NULL; + } + + /* convert and append*/ + pDest=_appendUTF8(pDest, ch); + } + } + } while(--count > 0); + } + + while(pSrc<pSrcLimit) { + ch=*pSrc++; + if(ch <= 0x7f) { + if(pDest<pDestLimit) { + *pDest++ = (uint8_t)ch; + } else { + reqLength = 1; + break; + } + } else if(ch <= 0x7ff) { + if((pDestLimit - pDest) >= 2) { + *pDest++=(uint8_t)((ch>>6)|0xc0); + *pDest++=(uint8_t)((ch&0x3f)|0x80); + } else { + reqLength = 2; + break; + } + } else if(ch <= 0xd7ff || ch >= 0xe000) { + if((pDestLimit - pDest) >= 3) { + *pDest++=(uint8_t)((ch>>12)|0xe0); + *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80); + *pDest++=(uint8_t)((ch&0x3f)|0x80); + } else { + reqLength = 3; + break; + } + } else /* ch is a surrogate */ { + int32_t length; + + if(U16_IS_SURROGATE_LEAD(ch) && pSrc<pSrcLimit && U16_IS_TRAIL(ch2=*pSrc)) { + ++pSrc; + ch=U16_GET_SUPPLEMENTARY(ch, ch2); + } else if(subchar>=0) { + ch=subchar; + ++numSubstitutions; + } else { + /* Unicode 3.2 forbids surrogate code points in UTF-8 */ + *pErrorCode = U_INVALID_CHAR_FOUND; + return NULL; + } + + length = U8_LENGTH(ch); + if((pDestLimit - pDest) >= length) { + /* convert and append*/ + pDest=_appendUTF8(pDest, ch); + } else { + reqLength = length; + break; + } + } + } + while(pSrc<pSrcLimit) { + ch=*pSrc++; + if(ch<=0x7f) { + ++reqLength; + } else if(ch<=0x7ff) { + reqLength+=2; + } else if(!U16_IS_SURROGATE(ch)) { + reqLength+=3; + } else if(U16_IS_SURROGATE_LEAD(ch) && pSrc<pSrcLimit && U16_IS_TRAIL(ch2=*pSrc)) { + ++pSrc; + reqLength+=4; + } else if(subchar>=0) { + reqLength+=U8_LENGTH(subchar); + ++numSubstitutions; + } else { + /* Unicode 3.2 forbids surrogate code points in UTF-8 */ + *pErrorCode = U_INVALID_CHAR_FOUND; + return NULL; + } + } + } + + reqLength+=(int32_t)(pDest - (uint8_t *)dest); + + if(pNumSubstitutions!=NULL) { + *pNumSubstitutions=numSubstitutions; + } + + if(pDestLength){ + *pDestLength = reqLength; + } + + /* Terminate the buffer */ + u_terminateChars(dest, destCapacity, reqLength, pErrorCode); + return dest; +} + +U_CAPI char* U_EXPORT2 +u_strToUTF8(char *dest, + int32_t destCapacity, + int32_t *pDestLength, + const UChar *pSrc, + int32_t srcLength, + UErrorCode *pErrorCode){ + return u_strToUTF8WithSub( + dest, destCapacity, pDestLength, + pSrc, srcLength, + U_SENTINEL, NULL, + pErrorCode); +} + +U_CAPI UChar* U_EXPORT2 +u_strFromJavaModifiedUTF8WithSub( + UChar *dest, + int32_t destCapacity, + int32_t *pDestLength, + const char *src, + int32_t srcLength, + UChar32 subchar, int32_t *pNumSubstitutions, + UErrorCode *pErrorCode) { + /* args check */ + if(U_FAILURE(*pErrorCode)) { + return NULL; + } + if( (src==NULL && srcLength!=0) || srcLength < -1 || + (dest==NULL && destCapacity!=0) || destCapacity<0 || + subchar > 0x10ffff || U_IS_SURROGATE(subchar) + ) { + *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + + if(pNumSubstitutions!=NULL) { + *pNumSubstitutions=0; + } + UChar *pDest = dest; + UChar *pDestLimit = dest+destCapacity; + int32_t reqLength = 0; + int32_t numSubstitutions=0; + + if(srcLength < 0) { + /* + * Transform a NUL-terminated ASCII string. + * Handle non-ASCII strings with slower code. + */ + UChar32 c; + while(((c = (uint8_t)*src) != 0) && c <= 0x7f && (pDest < pDestLimit)) { + *pDest++=(UChar)c; + ++src; + } + if(c == 0) { + reqLength=(int32_t)(pDest - dest); + if(pDestLength) { + *pDestLength = reqLength; + } + + /* Terminate the buffer */ + u_terminateUChars(dest, destCapacity, reqLength, pErrorCode); + return dest; + } + srcLength = static_cast<int32_t>(uprv_strlen(src)); + } + + /* Faster loop without ongoing checking for srcLength and pDestLimit. */ + UChar32 ch; + uint8_t t1, t2; + int32_t i = 0; + for(;;) { + int32_t count = (int32_t)(pDestLimit - pDest); + int32_t count2 = srcLength - i; + if(count >= count2 && srcLength > 0 && U8_IS_SINGLE(*src)) { + /* fast ASCII loop */ + int32_t start = i; + uint8_t b; + while(i < srcLength && U8_IS_SINGLE(b = src[i])) { + *pDest++=b; + ++i; + } + int32_t delta = i - start; + count -= delta; + count2 -= delta; + } + /* + * Each iteration of the inner loop progresses by at most 3 UTF-8 + * bytes and one UChar. + */ + if(subchar > 0xFFFF) { + break; + } + count2 /= 3; + if(count > count2) { + count = count2; /* min(remaining dest, remaining src/3) */ + } + if(count < 3) { + /* + * Too much overhead if we get near the end of the string, + * continue with the next loop. + */ + break; + } + do { + ch = (uint8_t)src[i++]; + if(U8_IS_SINGLE(ch)) { + *pDest++=(UChar)ch; + } else { + if(ch >= 0xe0) { + if( /* handle U+0000..U+FFFF inline */ + ch <= 0xef && + (t1 = (uint8_t)(src[i] - 0x80)) <= 0x3f && + (t2 = (uint8_t)(src[i+1] - 0x80)) <= 0x3f + ) { + /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ + *pDest++ = (UChar)((ch << 12) | (t1 << 6) | t2); + i += 2; + continue; + } + } else { + if( /* handle U+0000..U+07FF inline */ + ch >= 0xc0 && + (t1 = (uint8_t)(src[i] - 0x80)) <= 0x3f + ) { + *pDest++ = (UChar)(((ch & 0x1f) << 6) | t1); + ++i; + continue; + } + } + + if(subchar < 0) { + *pErrorCode = U_INVALID_CHAR_FOUND; + return NULL; + } else if(subchar > 0xffff && --count == 0) { + /* + * We need to write two UChars, adjusted count for that, + * and ran out of space. + */ + --i; // back out byte ch + break; + } else { + /* function call for error cases */ + utf8_nextCharSafeBody((const uint8_t *)src, &(i), srcLength, ch, -1); + ++numSubstitutions; + *(pDest++)=(UChar)subchar; + } + } + } while(--count > 0); + } + + while(i < srcLength && (pDest < pDestLimit)) { + ch = (uint8_t)src[i++]; + if(U8_IS_SINGLE(ch)){ + *pDest++=(UChar)ch; + } else { + if(ch >= 0xe0) { + if( /* handle U+0000..U+FFFF inline */ + ch <= 0xef && + (i+1) < srcLength && + (t1 = (uint8_t)(src[i] - 0x80)) <= 0x3f && + (t2 = (uint8_t)(src[i+1] - 0x80)) <= 0x3f + ) { + /* no need for (ch & 0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ + *pDest++ = (UChar)((ch << 12) | (t1 << 6) | t2); + i += 2; + continue; + } + } else { + if( /* handle U+0000..U+07FF inline */ + ch >= 0xc0 && + i < srcLength && + (t1 = (uint8_t)(src[i] - 0x80)) <= 0x3f + ) { + *pDest++ = (UChar)(((ch & 0x1f) << 6) | t1); + ++i; + continue; + } + } + + if(subchar < 0) { + *pErrorCode = U_INVALID_CHAR_FOUND; + return NULL; + } else { + /* function call for error cases */ + utf8_nextCharSafeBody((const uint8_t *)src, &(i), srcLength, ch, -1); + ++numSubstitutions; + if(subchar<=0xFFFF) { + *(pDest++)=(UChar)subchar; + } else { + *(pDest++)=U16_LEAD(subchar); + if(pDest<pDestLimit) { + *(pDest++)=U16_TRAIL(subchar); + } else { + reqLength++; + break; + } + } + } + } + } + + /* Pre-flight the rest of the string. */ + while(i < srcLength) { + ch = (uint8_t)src[i++]; + if(U8_IS_SINGLE(ch)) { + reqLength++; + } else { + if(ch >= 0xe0) { + if( /* handle U+0000..U+FFFF inline */ + ch <= 0xef && + (i+1) < srcLength && + (uint8_t)(src[i] - 0x80) <= 0x3f && + (uint8_t)(src[i+1] - 0x80) <= 0x3f + ) { + reqLength++; + i += 2; + continue; + } + } else { + if( /* handle U+0000..U+07FF inline */ + ch >= 0xc0 && + i < srcLength && + (uint8_t)(src[i] - 0x80) <= 0x3f + ) { + reqLength++; + ++i; + continue; + } + } + + if(subchar < 0) { + *pErrorCode = U_INVALID_CHAR_FOUND; + return NULL; + } else { + /* function call for error cases */ + utf8_nextCharSafeBody((const uint8_t *)src, &(i), srcLength, ch, -1); + ++numSubstitutions; + reqLength+=U16_LENGTH(ch); + } + } + } + + if(pNumSubstitutions!=NULL) { + *pNumSubstitutions=numSubstitutions; + } + + reqLength+=(int32_t)(pDest - dest); + if(pDestLength) { + *pDestLength = reqLength; + } + + /* Terminate the buffer */ + u_terminateUChars(dest, destCapacity, reqLength, pErrorCode); + return dest; +} + +U_CAPI char* U_EXPORT2 +u_strToJavaModifiedUTF8( + char *dest, + int32_t destCapacity, + int32_t *pDestLength, + const UChar *src, + int32_t srcLength, + UErrorCode *pErrorCode) { + int32_t reqLength=0; + uint32_t ch=0; + uint8_t *pDest = (uint8_t *)dest; + uint8_t *pDestLimit = pDest + destCapacity; + const UChar *pSrcLimit; + int32_t count; + + /* args check */ + if(U_FAILURE(*pErrorCode)){ + return NULL; + } + if( (src==NULL && srcLength!=0) || srcLength < -1 || + (dest==NULL && destCapacity!=0) || destCapacity<0 + ) { + *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + + if(srcLength==-1) { + /* Convert NUL-terminated ASCII, then find the string length. */ + while((ch=*src)<=0x7f && ch != 0 && pDest<pDestLimit) { + *pDest++ = (uint8_t)ch; + ++src; + } + if(ch == 0) { + reqLength=(int32_t)(pDest - (uint8_t *)dest); + if(pDestLength) { + *pDestLength = reqLength; + } + + /* Terminate the buffer */ + u_terminateChars(dest, destCapacity, reqLength, pErrorCode); + return dest; + } + srcLength = u_strlen(src); + } + + /* Faster loop without ongoing checking for pSrcLimit and pDestLimit. */ + pSrcLimit = (src!=NULL)?(src+srcLength):NULL; + for(;;) { + count = (int32_t)(pDestLimit - pDest); + srcLength = (int32_t)(pSrcLimit - src); + if(count >= srcLength && srcLength > 0 && *src <= 0x7f) { + /* fast ASCII loop */ + const UChar *prevSrc = src; + int32_t delta; + while(src < pSrcLimit && (ch = *src) <= 0x7f && ch != 0) { + *pDest++=(uint8_t)ch; + ++src; + } + delta = (int32_t)(src - prevSrc); + count -= delta; + srcLength -= delta; + } + /* + * Each iteration of the inner loop progresses by at most 3 UTF-8 + * bytes and one UChar. + */ + count /= 3; + if(count > srcLength) { + count = srcLength; /* min(remaining dest/3, remaining src) */ + } + if(count < 3) { + /* + * Too much overhead if we get near the end of the string, + * continue with the next loop. + */ + break; + } + do { + ch=*src++; + if(ch <= 0x7f && ch != 0) { + *pDest++ = (uint8_t)ch; + } else if(ch <= 0x7ff) { + *pDest++=(uint8_t)((ch>>6)|0xc0); + *pDest++=(uint8_t)((ch&0x3f)|0x80); + } else { + *pDest++=(uint8_t)((ch>>12)|0xe0); + *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80); + *pDest++=(uint8_t)((ch&0x3f)|0x80); + } + } while(--count > 0); + } + + while(src<pSrcLimit) { + ch=*src++; + if(ch <= 0x7f && ch != 0) { + if(pDest<pDestLimit) { + *pDest++ = (uint8_t)ch; + } else { + reqLength = 1; + break; + } + } else if(ch <= 0x7ff) { + if((pDestLimit - pDest) >= 2) { + *pDest++=(uint8_t)((ch>>6)|0xc0); + *pDest++=(uint8_t)((ch&0x3f)|0x80); + } else { + reqLength = 2; + break; + } + } else { + if((pDestLimit - pDest) >= 3) { + *pDest++=(uint8_t)((ch>>12)|0xe0); + *pDest++=(uint8_t)(((ch>>6)&0x3f)|0x80); + *pDest++=(uint8_t)((ch&0x3f)|0x80); + } else { + reqLength = 3; + break; + } + } + } + while(src<pSrcLimit) { + ch=*src++; + if(ch <= 0x7f && ch != 0) { + ++reqLength; + } else if(ch<=0x7ff) { + reqLength+=2; + } else { + reqLength+=3; + } + } + + reqLength+=(int32_t)(pDest - (uint8_t *)dest); + if(pDestLength){ + *pDestLength = reqLength; + } + + /* Terminate the buffer */ + u_terminateChars(dest, destCapacity, reqLength, pErrorCode); + return dest; +} diff --git a/thirdparty/icu4c/common/utext.cpp b/thirdparty/icu4c/common/utext.cpp new file mode 100644 index 0000000000..763b6684fb --- /dev/null +++ b/thirdparty/icu4c/common/utext.cpp @@ -0,0 +1,2877 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2005-2016, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: utext.cpp +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2005apr12 +* created by: Markus W. Scherer +*/ + +#include <cstddef> + +#include "unicode/utypes.h" +#include "unicode/ustring.h" +#include "unicode/unistr.h" +#include "unicode/chariter.h" +#include "unicode/utext.h" +#include "unicode/utf.h" +#include "unicode/utf8.h" +#include "unicode/utf16.h" +#include "ustr_imp.h" +#include "cmemory.h" +#include "cstring.h" +#include "uassert.h" +#include "putilimp.h" + +U_NAMESPACE_USE + +#define I32_FLAG(bitIndex) ((int32_t)1<<(bitIndex)) + + +static UBool +utext_access(UText *ut, int64_t index, UBool forward) { + return ut->pFuncs->access(ut, index, forward); +} + + + +U_CAPI UBool U_EXPORT2 +utext_moveIndex32(UText *ut, int32_t delta) { + UChar32 c; + if (delta > 0) { + do { + if(ut->chunkOffset>=ut->chunkLength && !utext_access(ut, ut->chunkNativeLimit, TRUE)) { + return FALSE; + } + c = ut->chunkContents[ut->chunkOffset]; + if (U16_IS_SURROGATE(c)) { + c = utext_next32(ut); + if (c == U_SENTINEL) { + return FALSE; + } + } else { + ut->chunkOffset++; + } + } while(--delta>0); + + } else if (delta<0) { + do { + if(ut->chunkOffset<=0 && !utext_access(ut, ut->chunkNativeStart, FALSE)) { + return FALSE; + } + c = ut->chunkContents[ut->chunkOffset-1]; + if (U16_IS_SURROGATE(c)) { + c = utext_previous32(ut); + if (c == U_SENTINEL) { + return FALSE; + } + } else { + ut->chunkOffset--; + } + } while(++delta<0); + } + + return TRUE; +} + + +U_CAPI int64_t U_EXPORT2 +utext_nativeLength(UText *ut) { + return ut->pFuncs->nativeLength(ut); +} + + +U_CAPI UBool U_EXPORT2 +utext_isLengthExpensive(const UText *ut) { + UBool r = (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE)) != 0; + return r; +} + + +U_CAPI int64_t U_EXPORT2 +utext_getNativeIndex(const UText *ut) { + if(ut->chunkOffset <= ut->nativeIndexingLimit) { + return ut->chunkNativeStart+ut->chunkOffset; + } else { + return ut->pFuncs->mapOffsetToNative(ut); + } +} + + +U_CAPI void U_EXPORT2 +utext_setNativeIndex(UText *ut, int64_t index) { + if(index<ut->chunkNativeStart || index>=ut->chunkNativeLimit) { + // The desired position is outside of the current chunk. + // Access the new position. Assume a forward iteration from here, + // which will also be optimimum for a single random access. + // Reverse iterations may suffer slightly. + ut->pFuncs->access(ut, index, TRUE); + } else if((int32_t)(index - ut->chunkNativeStart) <= ut->nativeIndexingLimit) { + // utf-16 indexing. + ut->chunkOffset=(int32_t)(index-ut->chunkNativeStart); + } else { + ut->chunkOffset=ut->pFuncs->mapNativeIndexToUTF16(ut, index); + } + // The convention is that the index must always be on a code point boundary. + // Adjust the index position if it is in the middle of a surrogate pair. + if (ut->chunkOffset<ut->chunkLength) { + UChar c= ut->chunkContents[ut->chunkOffset]; + if (U16_IS_TRAIL(c)) { + if (ut->chunkOffset==0) { + ut->pFuncs->access(ut, ut->chunkNativeStart, FALSE); + } + if (ut->chunkOffset>0) { + UChar lead = ut->chunkContents[ut->chunkOffset-1]; + if (U16_IS_LEAD(lead)) { + ut->chunkOffset--; + } + } + } + } +} + + + +U_CAPI int64_t U_EXPORT2 +utext_getPreviousNativeIndex(UText *ut) { + // + // Fast-path the common case. + // Common means current position is not at the beginning of a chunk + // and the preceding character is not supplementary. + // + int32_t i = ut->chunkOffset - 1; + int64_t result; + if (i >= 0) { + UChar c = ut->chunkContents[i]; + if (U16_IS_TRAIL(c) == FALSE) { + if (i <= ut->nativeIndexingLimit) { + result = ut->chunkNativeStart + i; + } else { + ut->chunkOffset = i; + result = ut->pFuncs->mapOffsetToNative(ut); + ut->chunkOffset++; + } + return result; + } + } + + // If at the start of text, simply return 0. + if (ut->chunkOffset==0 && ut->chunkNativeStart==0) { + return 0; + } + + // Harder, less common cases. We are at a chunk boundary, or on a surrogate. + // Keep it simple, use other functions to handle the edges. + // + utext_previous32(ut); + result = UTEXT_GETNATIVEINDEX(ut); + utext_next32(ut); + return result; +} + + +// +// utext_current32. Get the UChar32 at the current position. +// UText iteration position is always on a code point boundary, +// never on the trail half of a surrogate pair. +// +U_CAPI UChar32 U_EXPORT2 +utext_current32(UText *ut) { + UChar32 c; + if (ut->chunkOffset==ut->chunkLength) { + // Current position is just off the end of the chunk. + if (ut->pFuncs->access(ut, ut->chunkNativeLimit, TRUE) == FALSE) { + // Off the end of the text. + return U_SENTINEL; + } + } + + c = ut->chunkContents[ut->chunkOffset]; + if (U16_IS_LEAD(c) == FALSE) { + // Normal, non-supplementary case. + return c; + } + + // + // Possible supplementary char. + // + UChar32 trail = 0; + UChar32 supplementaryC = c; + if ((ut->chunkOffset+1) < ut->chunkLength) { + // The trail surrogate is in the same chunk. + trail = ut->chunkContents[ut->chunkOffset+1]; + } else { + // The trail surrogate is in a different chunk. + // Because we must maintain the iteration position, we need to switch forward + // into the new chunk, get the trail surrogate, then revert the chunk back to the + // original one. + // An edge case to be careful of: the entire text may end with an unpaired + // leading surrogate. The attempt to access the trail will fail, but + // the original position before the unpaired lead still needs to be restored. + int64_t nativePosition = ut->chunkNativeLimit; + int32_t originalOffset = ut->chunkOffset; + if (ut->pFuncs->access(ut, nativePosition, TRUE)) { + trail = ut->chunkContents[ut->chunkOffset]; + } + UBool r = ut->pFuncs->access(ut, nativePosition, FALSE); // reverse iteration flag loads preceding chunk + U_ASSERT(r==TRUE); + ut->chunkOffset = originalOffset; + if(!r) { + return U_SENTINEL; + } + } + + if (U16_IS_TRAIL(trail)) { + supplementaryC = U16_GET_SUPPLEMENTARY(c, trail); + } + return supplementaryC; + +} + + +U_CAPI UChar32 U_EXPORT2 +utext_char32At(UText *ut, int64_t nativeIndex) { + UChar32 c = U_SENTINEL; + + // Fast path the common case. + if (nativeIndex>=ut->chunkNativeStart && nativeIndex < ut->chunkNativeStart + ut->nativeIndexingLimit) { + ut->chunkOffset = (int32_t)(nativeIndex - ut->chunkNativeStart); + c = ut->chunkContents[ut->chunkOffset]; + if (U16_IS_SURROGATE(c) == FALSE) { + return c; + } + } + + + utext_setNativeIndex(ut, nativeIndex); + if (nativeIndex>=ut->chunkNativeStart && ut->chunkOffset<ut->chunkLength) { + c = ut->chunkContents[ut->chunkOffset]; + if (U16_IS_SURROGATE(c)) { + // For surrogates, let current32() deal with the complications + // of supplementaries that may span chunk boundaries. + c = utext_current32(ut); + } + } + return c; +} + + +U_CAPI UChar32 U_EXPORT2 +utext_next32(UText *ut) { + UChar32 c; + + if (ut->chunkOffset >= ut->chunkLength) { + if (ut->pFuncs->access(ut, ut->chunkNativeLimit, TRUE) == FALSE) { + return U_SENTINEL; + } + } + + c = ut->chunkContents[ut->chunkOffset++]; + if (U16_IS_LEAD(c) == FALSE) { + // Normal case, not supplementary. + // (A trail surrogate seen here is just returned as is, as a surrogate value. + // It cannot be part of a pair.) + return c; + } + + if (ut->chunkOffset >= ut->chunkLength) { + if (ut->pFuncs->access(ut, ut->chunkNativeLimit, TRUE) == FALSE) { + // c is an unpaired lead surrogate at the end of the text. + // return it as it is. + return c; + } + } + UChar32 trail = ut->chunkContents[ut->chunkOffset]; + if (U16_IS_TRAIL(trail) == FALSE) { + // c was an unpaired lead surrogate, not at the end of the text. + // return it as it is (unpaired). Iteration position is on the + // following character, possibly in the next chunk, where the + // trail surrogate would have been if it had existed. + return c; + } + + UChar32 supplementary = U16_GET_SUPPLEMENTARY(c, trail); + ut->chunkOffset++; // move iteration position over the trail surrogate. + return supplementary; + } + + +U_CAPI UChar32 U_EXPORT2 +utext_previous32(UText *ut) { + UChar32 c; + + if (ut->chunkOffset <= 0) { + if (ut->pFuncs->access(ut, ut->chunkNativeStart, FALSE) == FALSE) { + return U_SENTINEL; + } + } + ut->chunkOffset--; + c = ut->chunkContents[ut->chunkOffset]; + if (U16_IS_TRAIL(c) == FALSE) { + // Normal case, not supplementary. + // (A lead surrogate seen here is just returned as is, as a surrogate value. + // It cannot be part of a pair.) + return c; + } + + if (ut->chunkOffset <= 0) { + if (ut->pFuncs->access(ut, ut->chunkNativeStart, FALSE) == FALSE) { + // c is an unpaired trail surrogate at the start of the text. + // return it as it is. + return c; + } + } + + UChar32 lead = ut->chunkContents[ut->chunkOffset-1]; + if (U16_IS_LEAD(lead) == FALSE) { + // c was an unpaired trail surrogate, not at the end of the text. + // return it as it is (unpaired). Iteration position is at c + return c; + } + + UChar32 supplementary = U16_GET_SUPPLEMENTARY(lead, c); + ut->chunkOffset--; // move iteration position over the lead surrogate. + return supplementary; +} + + + +U_CAPI UChar32 U_EXPORT2 +utext_next32From(UText *ut, int64_t index) { + UChar32 c = U_SENTINEL; + + if(index<ut->chunkNativeStart || index>=ut->chunkNativeLimit) { + // Desired position is outside of the current chunk. + if(!ut->pFuncs->access(ut, index, TRUE)) { + // no chunk available here + return U_SENTINEL; + } + } else if (index - ut->chunkNativeStart <= (int64_t)ut->nativeIndexingLimit) { + // Desired position is in chunk, with direct 1:1 native to UTF16 indexing + ut->chunkOffset = (int32_t)(index - ut->chunkNativeStart); + } else { + // Desired position is in chunk, with non-UTF16 indexing. + ut->chunkOffset = ut->pFuncs->mapNativeIndexToUTF16(ut, index); + } + + c = ut->chunkContents[ut->chunkOffset++]; + if (U16_IS_SURROGATE(c)) { + // Surrogates. Many edge cases. Use other functions that already + // deal with the problems. + utext_setNativeIndex(ut, index); + c = utext_next32(ut); + } + return c; +} + + +U_CAPI UChar32 U_EXPORT2 +utext_previous32From(UText *ut, int64_t index) { + // + // Return the character preceding the specified index. + // Leave the iteration position at the start of the character that was returned. + // + UChar32 cPrev; // The character preceding cCurr, which is what we will return. + + // Address the chunk containg the position preceding the incoming index + // A tricky edge case: + // We try to test the requested native index against the chunkNativeStart to determine + // whether the character preceding the one at the index is in the current chunk. + // BUT, this test can fail with UTF-8 (or any other multibyte encoding), when the + // requested index is on something other than the first position of the first char. + // + if(index<=ut->chunkNativeStart || index>ut->chunkNativeLimit) { + // Requested native index is outside of the current chunk. + if(!ut->pFuncs->access(ut, index, FALSE)) { + // no chunk available here + return U_SENTINEL; + } + } else if(index - ut->chunkNativeStart <= (int64_t)ut->nativeIndexingLimit) { + // Direct UTF-16 indexing. + ut->chunkOffset = (int32_t)(index - ut->chunkNativeStart); + } else { + ut->chunkOffset=ut->pFuncs->mapNativeIndexToUTF16(ut, index); + if (ut->chunkOffset==0 && !ut->pFuncs->access(ut, index, FALSE)) { + // no chunk available here + return U_SENTINEL; + } + } + + // + // Simple case with no surrogates. + // + ut->chunkOffset--; + cPrev = ut->chunkContents[ut->chunkOffset]; + + if (U16_IS_SURROGATE(cPrev)) { + // Possible supplementary. Many edge cases. + // Let other functions do the heavy lifting. + utext_setNativeIndex(ut, index); + cPrev = utext_previous32(ut); + } + return cPrev; +} + + +U_CAPI int32_t U_EXPORT2 +utext_extract(UText *ut, + int64_t start, int64_t limit, + UChar *dest, int32_t destCapacity, + UErrorCode *status) { + return ut->pFuncs->extract(ut, start, limit, dest, destCapacity, status); + } + + + +U_CAPI UBool U_EXPORT2 +utext_equals(const UText *a, const UText *b) { + if (a==NULL || b==NULL || + a->magic != UTEXT_MAGIC || + b->magic != UTEXT_MAGIC) { + // Null or invalid arguments don't compare equal to anything. + return FALSE; + } + + if (a->pFuncs != b->pFuncs) { + // Different types of text providers. + return FALSE; + } + + if (a->context != b->context) { + // Different sources (different strings) + return FALSE; + } + if (utext_getNativeIndex(a) != utext_getNativeIndex(b)) { + // Different current position in the string. + return FALSE; + } + + return TRUE; +} + +U_CAPI UBool U_EXPORT2 +utext_isWritable(const UText *ut) +{ + UBool b = (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_WRITABLE)) != 0; + return b; +} + + +U_CAPI void U_EXPORT2 +utext_freeze(UText *ut) { + // Zero out the WRITABLE flag. + ut->providerProperties &= ~(I32_FLAG(UTEXT_PROVIDER_WRITABLE)); +} + + +U_CAPI UBool U_EXPORT2 +utext_hasMetaData(const UText *ut) +{ + UBool b = (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_HAS_META_DATA)) != 0; + return b; +} + + + +U_CAPI int32_t U_EXPORT2 +utext_replace(UText *ut, + int64_t nativeStart, int64_t nativeLimit, + const UChar *replacementText, int32_t replacementLength, + UErrorCode *status) +{ + if (U_FAILURE(*status)) { + return 0; + } + if ((ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_WRITABLE)) == 0) { + *status = U_NO_WRITE_PERMISSION; + return 0; + } + int32_t i = ut->pFuncs->replace(ut, nativeStart, nativeLimit, replacementText, replacementLength, status); + return i; +} + +U_CAPI void U_EXPORT2 +utext_copy(UText *ut, + int64_t nativeStart, int64_t nativeLimit, + int64_t destIndex, + UBool move, + UErrorCode *status) +{ + if (U_FAILURE(*status)) { + return; + } + if ((ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_WRITABLE)) == 0) { + *status = U_NO_WRITE_PERMISSION; + return; + } + ut->pFuncs->copy(ut, nativeStart, nativeLimit, destIndex, move, status); +} + + + +U_CAPI UText * U_EXPORT2 +utext_clone(UText *dest, const UText *src, UBool deep, UBool readOnly, UErrorCode *status) { + if (U_FAILURE(*status)) { + return dest; + } + UText *result = src->pFuncs->clone(dest, src, deep, status); + if (U_FAILURE(*status)) { + return result; + } + if (result == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + return result; + } + if (readOnly) { + utext_freeze(result); + } + return result; +} + + + +//------------------------------------------------------------------------------ +// +// UText common functions implementation +// +//------------------------------------------------------------------------------ + +// +// UText.flags bit definitions +// +enum { + UTEXT_HEAP_ALLOCATED = 1, // 1 if ICU has allocated this UText struct on the heap. + // 0 if caller provided storage for the UText. + + UTEXT_EXTRA_HEAP_ALLOCATED = 2, // 1 if ICU has allocated extra storage as a separate + // heap block. + // 0 if there is no separate allocation. Either no extra + // storage was requested, or it is appended to the end + // of the main UText storage. + + UTEXT_OPEN = 4 // 1 if this UText is currently open + // 0 if this UText is not open. +}; + + +// +// Extended form of a UText. The purpose is to aid in computing the total size required +// when a provider asks for a UText to be allocated with extra storage. + +struct ExtendedUText { + UText ut; + std::max_align_t extension; +}; + +static const UText emptyText = UTEXT_INITIALIZER; + +U_CAPI UText * U_EXPORT2 +utext_setup(UText *ut, int32_t extraSpace, UErrorCode *status) { + if (U_FAILURE(*status)) { + return ut; + } + + if (ut == NULL) { + // We need to heap-allocate storage for the new UText + int32_t spaceRequired = sizeof(UText); + if (extraSpace > 0) { + spaceRequired = sizeof(ExtendedUText) + extraSpace - sizeof(std::max_align_t); + } + ut = (UText *)uprv_malloc(spaceRequired); + if (ut == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + return NULL; + } else { + *ut = emptyText; + ut->flags |= UTEXT_HEAP_ALLOCATED; + if (spaceRequired>0) { + ut->extraSize = extraSpace; + ut->pExtra = &((ExtendedUText *)ut)->extension; + } + } + } else { + // We have been supplied with an already existing UText. + // Verify that it really appears to be a UText. + if (ut->magic != UTEXT_MAGIC) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return ut; + } + // If the ut is already open and there's a provider supplied close + // function, call it. + if ((ut->flags & UTEXT_OPEN) && ut->pFuncs->close != NULL) { + ut->pFuncs->close(ut); + } + ut->flags &= ~UTEXT_OPEN; + + // If extra space was requested by our caller, check whether + // sufficient already exists, and allocate new if needed. + if (extraSpace > ut->extraSize) { + // Need more space. If there is existing separately allocated space, + // delete it first, then allocate new space. + if (ut->flags & UTEXT_EXTRA_HEAP_ALLOCATED) { + uprv_free(ut->pExtra); + ut->extraSize = 0; + } + ut->pExtra = uprv_malloc(extraSpace); + if (ut->pExtra == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + } else { + ut->extraSize = extraSpace; + ut->flags |= UTEXT_EXTRA_HEAP_ALLOCATED; + } + } + } + if (U_SUCCESS(*status)) { + ut->flags |= UTEXT_OPEN; + + // Initialize all remaining fields of the UText. + // + ut->context = NULL; + ut->chunkContents = NULL; + ut->p = NULL; + ut->q = NULL; + ut->r = NULL; + ut->a = 0; + ut->b = 0; + ut->c = 0; + ut->chunkOffset = 0; + ut->chunkLength = 0; + ut->chunkNativeStart = 0; + ut->chunkNativeLimit = 0; + ut->nativeIndexingLimit = 0; + ut->providerProperties = 0; + ut->privA = 0; + ut->privB = 0; + ut->privC = 0; + ut->privP = NULL; + if (ut->pExtra!=NULL && ut->extraSize>0) + uprv_memset(ut->pExtra, 0, ut->extraSize); + + } + return ut; +} + + +U_CAPI UText * U_EXPORT2 +utext_close(UText *ut) { + if (ut==NULL || + ut->magic != UTEXT_MAGIC || + (ut->flags & UTEXT_OPEN) == 0) + { + // The supplied ut is not an open UText. + // Do nothing. + return ut; + } + + // If the provider gave us a close function, call it now. + // This will clean up anything allocated specifically by the provider. + if (ut->pFuncs->close != NULL) { + ut->pFuncs->close(ut); + } + ut->flags &= ~UTEXT_OPEN; + + // If we (the framework) allocated the UText or subsidiary storage, + // delete it. + if (ut->flags & UTEXT_EXTRA_HEAP_ALLOCATED) { + uprv_free(ut->pExtra); + ut->pExtra = NULL; + ut->flags &= ~UTEXT_EXTRA_HEAP_ALLOCATED; + ut->extraSize = 0; + } + + // Zero out function table of the closed UText. This is a defensive move, + // inteded to cause applications that inadvertantly use a closed + // utext to crash with null pointer errors. + ut->pFuncs = NULL; + + if (ut->flags & UTEXT_HEAP_ALLOCATED) { + // This UText was allocated by UText setup. We need to free it. + // Clear magic, so we can detect if the user messes up and immediately + // tries to reopen another UText using the deleted storage. + ut->magic = 0; + uprv_free(ut); + ut = NULL; + } + return ut; +} + + + + +// +// invalidateChunk Reset a chunk to have no contents, so that the next call +// to access will cause new data to load. +// This is needed when copy/move/replace operate directly on the +// backing text, potentially putting it out of sync with the +// contents in the chunk. +// +static void +invalidateChunk(UText *ut) { + ut->chunkLength = 0; + ut->chunkNativeLimit = 0; + ut->chunkNativeStart = 0; + ut->chunkOffset = 0; + ut->nativeIndexingLimit = 0; +} + +// +// pinIndex Do range pinning on a native index parameter. +// 64 bit pinning is done in place. +// 32 bit truncated result is returned as a convenience for +// use in providers that don't need 64 bits. +static int32_t +pinIndex(int64_t &index, int64_t limit) { + if (index<0) { + index = 0; + } else if (index > limit) { + index = limit; + } + return (int32_t)index; +} + + +U_CDECL_BEGIN + +// +// Pointer relocation function, +// a utility used by shallow clone. +// Adjust a pointer that refers to something within one UText (the source) +// to refer to the same relative offset within a another UText (the target) +// +static void adjustPointer(UText *dest, const void **destPtr, const UText *src) { + // convert all pointers to (char *) so that byte address arithmetic will work. + char *dptr = (char *)*destPtr; + char *dUText = (char *)dest; + char *sUText = (char *)src; + + if (dptr >= (char *)src->pExtra && dptr < ((char*)src->pExtra)+src->extraSize) { + // target ptr was to something within the src UText's pExtra storage. + // relocate it into the target UText's pExtra region. + *destPtr = ((char *)dest->pExtra) + (dptr - (char *)src->pExtra); + } else if (dptr>=sUText && dptr < sUText+src->sizeOfStruct) { + // target ptr was pointing to somewhere within the source UText itself. + // Move it to the same offset within the target UText. + *destPtr = dUText + (dptr-sUText); + } +} + + +// +// Clone. This is a generic copy-the-utext-by-value clone function that can be +// used as-is with some utext types, and as a helper by other clones. +// +static UText * U_CALLCONV +shallowTextClone(UText * dest, const UText * src, UErrorCode * status) { + if (U_FAILURE(*status)) { + return NULL; + } + int32_t srcExtraSize = src->extraSize; + + // + // Use the generic text_setup to allocate storage if required. + // + dest = utext_setup(dest, srcExtraSize, status); + if (U_FAILURE(*status)) { + return dest; + } + + // + // flags (how the UText was allocated) and the pointer to the + // extra storage must retain the values in the cloned utext that + // were set up by utext_setup. Save them separately before + // copying the whole struct. + // + void *destExtra = dest->pExtra; + int32_t flags = dest->flags; + + + // + // Copy the whole UText struct by value. + // Any "Extra" storage is copied also. + // + int sizeToCopy = src->sizeOfStruct; + if (sizeToCopy > dest->sizeOfStruct) { + sizeToCopy = dest->sizeOfStruct; + } + uprv_memcpy(dest, src, sizeToCopy); + dest->pExtra = destExtra; + dest->flags = flags; + if (srcExtraSize > 0) { + uprv_memcpy(dest->pExtra, src->pExtra, srcExtraSize); + } + + // + // Relocate any pointers in the target that refer to the UText itself + // to point to the cloned copy rather than the original source. + // + adjustPointer(dest, &dest->context, src); + adjustPointer(dest, &dest->p, src); + adjustPointer(dest, &dest->q, src); + adjustPointer(dest, &dest->r, src); + adjustPointer(dest, (const void **)&dest->chunkContents, src); + + // The newly shallow-cloned UText does _not_ own the underlying storage for the text. + // (The source for the clone may or may not have owned the text.) + + dest->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT); + + return dest; +} + + +U_CDECL_END + + + +//------------------------------------------------------------------------------ +// +// UText implementation for UTF-8 char * strings (read-only) +// Limitation: string length must be <= 0x7fffffff in length. +// (length must for in an int32_t variable) +// +// Use of UText data members: +// context pointer to UTF-8 string +// utext.b is the input string length (bytes). +// utext.c Length scanned so far in string +// (for optimizing finding length of zero terminated strings.) +// utext.p pointer to the current buffer +// utext.q pointer to the other buffer. +// +//------------------------------------------------------------------------------ + +// Chunk size. +// Must be less than 85 (256/3), because of byte mapping from UChar indexes to native indexes. +// Worst case is three native bytes to one UChar. (Supplemenaries are 4 native bytes +// to two UChars.) +// The longest illegal byte sequence treated as a single error (and converted to U+FFFD) +// is a three-byte sequence (truncated four-byte sequence). +// +enum { UTF8_TEXT_CHUNK_SIZE=32 }; + +// +// UTF8Buf Two of these structs will be set up in the UText's extra allocated space. +// Each contains the UChar chunk buffer, the to and from native maps, and +// header info. +// +// because backwards iteration fills the buffers starting at the end and +// working towards the front, the filled part of the buffers may not begin +// at the start of the available storage for the buffers. +// +// Buffer size is one bigger than the specified UTF8_TEXT_CHUNK_SIZE to allow for +// the last character added being a supplementary, and thus requiring a surrogate +// pair. Doing this is simpler than checking for the edge case. +// + +struct UTF8Buf { + int32_t bufNativeStart; // Native index of first char in UChar buf + int32_t bufNativeLimit; // Native index following last char in buf. + int32_t bufStartIdx; // First filled position in buf. + int32_t bufLimitIdx; // Limit of filled range in buf. + int32_t bufNILimit; // Limit of native indexing part of buf + int32_t toUCharsMapStart; // Native index corresponding to + // mapToUChars[0]. + // Set to bufNativeStart when filling forwards. + // Set to computed value when filling backwards. + + UChar buf[UTF8_TEXT_CHUNK_SIZE+4]; // The UChar buffer. Requires one extra position beyond the + // the chunk size, to allow for surrogate at the end. + // Length must be identical to mapToNative array, below, + // because of the way indexing works when the array is + // filled backwards during a reverse iteration. Thus, + // the additional extra size. + uint8_t mapToNative[UTF8_TEXT_CHUNK_SIZE+4]; // map UChar index in buf to + // native offset from bufNativeStart. + // Requires two extra slots, + // one for a supplementary starting in the last normal position, + // and one for an entry for the buffer limit position. + uint8_t mapToUChars[UTF8_TEXT_CHUNK_SIZE*3+6]; // Map native offset from bufNativeStart to + // correspoding offset in filled part of buf. + int32_t align; +}; + +U_CDECL_BEGIN + +// +// utf8TextLength +// +// Get the length of the string. If we don't already know it, +// we'll need to scan for the trailing nul. +// +static int64_t U_CALLCONV +utf8TextLength(UText *ut) { + if (ut->b < 0) { + // Zero terminated string, and we haven't scanned to the end yet. + // Scan it now. + const char *r = (const char *)ut->context + ut->c; + while (*r != 0) { + r++; + } + if ((r - (const char *)ut->context) < 0x7fffffff) { + ut->b = (int32_t)(r - (const char *)ut->context); + } else { + // Actual string was bigger (more than 2 gig) than we + // can handle. Clip it to 2 GB. + ut->b = 0x7fffffff; + } + ut->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE); + } + return ut->b; +} + + + + + + +static UBool U_CALLCONV +utf8TextAccess(UText *ut, int64_t index, UBool forward) { + // + // Apologies to those who are allergic to goto statements. + // Consider each goto to a labelled block to be the equivalent of + // call the named block as if it were a function(); + // return; + // + const uint8_t *s8=(const uint8_t *)ut->context; + UTF8Buf *u8b = NULL; + int32_t length = ut->b; // Length of original utf-8 + int32_t ix= (int32_t)index; // Requested index, trimmed to 32 bits. + int32_t mapIndex = 0; + if (index<0) { + ix=0; + } else if (index > 0x7fffffff) { + // Strings with 64 bit lengths not supported by this UTF-8 provider. + ix = 0x7fffffff; + } + + // Pin requested index to the string length. + if (ix>length) { + if (length>=0) { + ix=length; + } else if (ix>=ut->c) { + // Zero terminated string, and requested index is beyond + // the region that has already been scanned. + // Scan up to either the end of the string or to the + // requested position, whichever comes first. + while (ut->c<ix && s8[ut->c]!=0) { + ut->c++; + } + // TODO: support for null terminated string length > 32 bits. + if (s8[ut->c] == 0) { + // We just found the actual length of the string. + // Trim the requested index back to that. + ix = ut->c; + ut->b = ut->c; + length = ut->c; + ut->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE); + } + } + } + + // + // Dispatch to the appropriate action for a forward iteration request. + // + if (forward) { + if (ix==ut->chunkNativeLimit) { + // Check for normal sequential iteration cases first. + if (ix==length) { + // Just reached end of string + // Don't swap buffers, but do set the + // current buffer position. + ut->chunkOffset = ut->chunkLength; + return FALSE; + } else { + // End of current buffer. + // check whether other buffer already has what we need. + UTF8Buf *altB = (UTF8Buf *)ut->q; + if (ix>=altB->bufNativeStart && ix<altB->bufNativeLimit) { + goto swapBuffers; + } + } + } + + // A random access. Desired index could be in either or niether buf. + // For optimizing the order of testing, first check for the index + // being in the other buffer. This will be the case for uses that + // move back and forth over a fairly limited range + { + u8b = (UTF8Buf *)ut->q; // the alternate buffer + if (ix>=u8b->bufNativeStart && ix<u8b->bufNativeLimit) { + // Requested index is in the other buffer. + goto swapBuffers; + } + if (ix == length) { + // Requested index is end-of-string. + // (this is the case of randomly seeking to the end. + // The case of iterating off the end is handled earlier.) + if (ix == ut->chunkNativeLimit) { + // Current buffer extends up to the end of the string. + // Leave it as the current buffer. + ut->chunkOffset = ut->chunkLength; + return FALSE; + } + if (ix == u8b->bufNativeLimit) { + // Alternate buffer extends to the end of string. + // Swap it in as the current buffer. + goto swapBuffersAndFail; + } + + // Neither existing buffer extends to the end of the string. + goto makeStubBuffer; + } + + if (ix<ut->chunkNativeStart || ix>=ut->chunkNativeLimit) { + // Requested index is in neither buffer. + goto fillForward; + } + + // Requested index is in this buffer. + u8b = (UTF8Buf *)ut->p; // the current buffer + mapIndex = ix - u8b->toUCharsMapStart; + U_ASSERT(mapIndex < (int32_t)sizeof(UTF8Buf::mapToUChars)); + ut->chunkOffset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx; + return TRUE; + + } + } + + + // + // Dispatch to the appropriate action for a + // Backwards Diretion iteration request. + // + if (ix==ut->chunkNativeStart) { + // Check for normal sequential iteration cases first. + if (ix==0) { + // Just reached the start of string + // Don't swap buffers, but do set the + // current buffer position. + ut->chunkOffset = 0; + return FALSE; + } else { + // Start of current buffer. + // check whether other buffer already has what we need. + UTF8Buf *altB = (UTF8Buf *)ut->q; + if (ix>altB->bufNativeStart && ix<=altB->bufNativeLimit) { + goto swapBuffers; + } + } + } + + // A random access. Desired index could be in either or niether buf. + // For optimizing the order of testing, + // Most likely case: in the other buffer. + // Second most likely: in neither buffer. + // Unlikely, but must work: in the current buffer. + u8b = (UTF8Buf *)ut->q; // the alternate buffer + if (ix>u8b->bufNativeStart && ix<=u8b->bufNativeLimit) { + // Requested index is in the other buffer. + goto swapBuffers; + } + // Requested index is start-of-string. + // (this is the case of randomly seeking to the start. + // The case of iterating off the start is handled earlier.) + if (ix==0) { + if (u8b->bufNativeStart==0) { + // Alternate buffer contains the data for the start string. + // Make it be the current buffer. + goto swapBuffersAndFail; + } else { + // Request for data before the start of string, + // neither buffer is usable. + // set up a zero-length buffer. + goto makeStubBuffer; + } + } + + if (ix<=ut->chunkNativeStart || ix>ut->chunkNativeLimit) { + // Requested index is in neither buffer. + goto fillReverse; + } + + // Requested index is in this buffer. + // Set the utf16 buffer index. + u8b = (UTF8Buf *)ut->p; + mapIndex = ix - u8b->toUCharsMapStart; + ut->chunkOffset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx; + if (ut->chunkOffset==0) { + // This occurs when the first character in the text is + // a multi-byte UTF-8 char, and the requested index is to + // one of the trailing bytes. Because there is no preceding , + // character, this access fails. We can't pick up on the + // situation sooner because the requested index is not zero. + return FALSE; + } else { + return TRUE; + } + + + +swapBuffers: + // The alternate buffer (ut->q) has the string data that was requested. + // Swap the primary and alternate buffers, and set the + // chunk index into the new primary buffer. + { + u8b = (UTF8Buf *)ut->q; + ut->q = ut->p; + ut->p = u8b; + ut->chunkContents = &u8b->buf[u8b->bufStartIdx]; + ut->chunkLength = u8b->bufLimitIdx - u8b->bufStartIdx; + ut->chunkNativeStart = u8b->bufNativeStart; + ut->chunkNativeLimit = u8b->bufNativeLimit; + ut->nativeIndexingLimit = u8b->bufNILimit; + + // Index into the (now current) chunk + // Use the map to set the chunk index. It's more trouble than it's worth + // to check whether native indexing can be used. + U_ASSERT(ix>=u8b->bufNativeStart); + U_ASSERT(ix<=u8b->bufNativeLimit); + mapIndex = ix - u8b->toUCharsMapStart; + U_ASSERT(mapIndex>=0); + U_ASSERT(mapIndex<(int32_t)sizeof(u8b->mapToUChars)); + ut->chunkOffset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx; + + return TRUE; + } + + + swapBuffersAndFail: + // We got a request for either the start or end of the string, + // with iteration continuing in the out-of-bounds direction. + // The alternate buffer already contains the data up to the + // start/end. + // Swap the buffers, then return failure, indicating that we couldn't + // make things correct for continuing the iteration in the requested + // direction. The position & buffer are correct should the + // user decide to iterate in the opposite direction. + u8b = (UTF8Buf *)ut->q; + ut->q = ut->p; + ut->p = u8b; + ut->chunkContents = &u8b->buf[u8b->bufStartIdx]; + ut->chunkLength = u8b->bufLimitIdx - u8b->bufStartIdx; + ut->chunkNativeStart = u8b->bufNativeStart; + ut->chunkNativeLimit = u8b->bufNativeLimit; + ut->nativeIndexingLimit = u8b->bufNILimit; + + // Index into the (now current) chunk + // For this function (swapBuffersAndFail), the requested index + // will always be at either the start or end of the chunk. + if (ix==u8b->bufNativeLimit) { + ut->chunkOffset = ut->chunkLength; + } else { + ut->chunkOffset = 0; + U_ASSERT(ix == u8b->bufNativeStart); + } + return FALSE; + +makeStubBuffer: + // The user has done a seek/access past the start or end + // of the string. Rather than loading data that is likely + // to never be used, just set up a zero-length buffer at + // the position. + u8b = (UTF8Buf *)ut->q; + u8b->bufNativeStart = ix; + u8b->bufNativeLimit = ix; + u8b->bufStartIdx = 0; + u8b->bufLimitIdx = 0; + u8b->bufNILimit = 0; + u8b->toUCharsMapStart = ix; + u8b->mapToNative[0] = 0; + u8b->mapToUChars[0] = 0; + goto swapBuffersAndFail; + + + +fillForward: + { + // Move the incoming index to a code point boundary. + U8_SET_CP_START(s8, 0, ix); + + // Swap the UText buffers. + // We want to fill what was previously the alternate buffer, + // and make what was the current buffer be the new alternate. + UTF8Buf *u8b_swap = (UTF8Buf *)ut->q; + ut->q = ut->p; + ut->p = u8b_swap; + + int32_t strLen = ut->b; + UBool nulTerminated = FALSE; + if (strLen < 0) { + strLen = 0x7fffffff; + nulTerminated = TRUE; + } + + UChar *buf = u8b_swap->buf; + uint8_t *mapToNative = u8b_swap->mapToNative; + uint8_t *mapToUChars = u8b_swap->mapToUChars; + int32_t destIx = 0; + int32_t srcIx = ix; + UBool seenNonAscii = FALSE; + UChar32 c = 0; + + // Fill the chunk buffer and mapping arrays. + while (destIx<UTF8_TEXT_CHUNK_SIZE) { + c = s8[srcIx]; + if (c>0 && c<0x80) { + // Special case ASCII range for speed. + // zero is excluded to simplify bounds checking. + buf[destIx] = (UChar)c; + mapToNative[destIx] = (uint8_t)(srcIx - ix); + mapToUChars[srcIx-ix] = (uint8_t)destIx; + srcIx++; + destIx++; + } else { + // General case, handle everything. + if (seenNonAscii == FALSE) { + seenNonAscii = TRUE; + u8b_swap->bufNILimit = destIx; + } + + int32_t cIx = srcIx; + int32_t dIx = destIx; + int32_t dIxSaved = destIx; + U8_NEXT_OR_FFFD(s8, srcIx, strLen, c); + if (c==0 && nulTerminated) { + srcIx--; + break; + } + + U16_APPEND_UNSAFE(buf, destIx, c); + do { + mapToNative[dIx++] = (uint8_t)(cIx - ix); + } while (dIx < destIx); + + do { + mapToUChars[cIx++ - ix] = (uint8_t)dIxSaved; + } while (cIx < srcIx); + } + if (srcIx>=strLen) { + break; + } + + } + + // store Native <--> Chunk Map entries for the end of the buffer. + // There is no actual character here, but the index position is valid. + mapToNative[destIx] = (uint8_t)(srcIx - ix); + mapToUChars[srcIx - ix] = (uint8_t)destIx; + + // fill in Buffer descriptor + u8b_swap->bufNativeStart = ix; + u8b_swap->bufNativeLimit = srcIx; + u8b_swap->bufStartIdx = 0; + u8b_swap->bufLimitIdx = destIx; + if (seenNonAscii == FALSE) { + u8b_swap->bufNILimit = destIx; + } + u8b_swap->toUCharsMapStart = u8b_swap->bufNativeStart; + + // Set UText chunk to refer to this buffer. + ut->chunkContents = buf; + ut->chunkOffset = 0; + ut->chunkLength = u8b_swap->bufLimitIdx; + ut->chunkNativeStart = u8b_swap->bufNativeStart; + ut->chunkNativeLimit = u8b_swap->bufNativeLimit; + ut->nativeIndexingLimit = u8b_swap->bufNILimit; + + // For zero terminated strings, keep track of the maximum point + // scanned so far. + if (nulTerminated && srcIx>ut->c) { + ut->c = srcIx; + if (c==0) { + // We scanned to the end. + // Remember the actual length. + ut->b = srcIx; + ut->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE); + } + } + return TRUE; + } + + +fillReverse: + { + // Move the incoming index to a code point boundary. + // Can only do this if the incoming index is somewhere in the interior of the string. + // If index is at the end, there is no character there to look at. + if (ix != ut->b) { + // Note: this function will only move the index back if it is on a trail byte + // and there is a preceding lead byte and the sequence from the lead + // through this trail could be part of a valid UTF-8 sequence + // Otherwise the index remains unchanged. + U8_SET_CP_START(s8, 0, ix); + } + + // Swap the UText buffers. + // We want to fill what was previously the alternate buffer, + // and make what was the current buffer be the new alternate. + UTF8Buf *u8b_swap = (UTF8Buf *)ut->q; + ut->q = ut->p; + ut->p = u8b_swap; + + UChar *buf = u8b_swap->buf; + uint8_t *mapToNative = u8b_swap->mapToNative; + uint8_t *mapToUChars = u8b_swap->mapToUChars; + int32_t toUCharsMapStart = ix - sizeof(UTF8Buf::mapToUChars) + 1; + // Note that toUCharsMapStart can be negative. Happens when the remaining + // text from current position to the beginning is less than the buffer size. + // + 1 because mapToUChars must have a slot at the end for the bufNativeLimit entry. + int32_t destIx = UTF8_TEXT_CHUNK_SIZE+2; // Start in the overflow region + // at end of buffer to leave room + // for a surrogate pair at the + // buffer start. + int32_t srcIx = ix; + int32_t bufNILimit = destIx; + UChar32 c; + + // Map to/from Native Indexes, fill in for the position at the end of + // the buffer. + // + mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart); + mapToUChars[srcIx - toUCharsMapStart] = (uint8_t)destIx; + + // Fill the chunk buffer + // Work backwards, filling from the end of the buffer towards the front. + // + while (destIx>2 && (srcIx - toUCharsMapStart > 5) && (srcIx > 0)) { + srcIx--; + destIx--; + + // Get last byte of the UTF-8 character + c = s8[srcIx]; + if (c<0x80) { + // Special case ASCII range for speed. + buf[destIx] = (UChar)c; + U_ASSERT(toUCharsMapStart <= srcIx); + mapToUChars[srcIx - toUCharsMapStart] = (uint8_t)destIx; + mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart); + } else { + // General case, handle everything non-ASCII. + + int32_t sIx = srcIx; // ix of last byte of multi-byte u8 char + + // Get the full character from the UTF8 string. + // use code derived from tbe macros in utf8.h + // Leaves srcIx pointing at the first byte of the UTF-8 char. + // + c=utf8_prevCharSafeBody(s8, 0, &srcIx, c, -3); + // leaves srcIx at first byte of the multi-byte char. + + // Store the character in UTF-16 buffer. + if (c<0x10000) { + buf[destIx] = (UChar)c; + mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart); + } else { + buf[destIx] = U16_TRAIL(c); + mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart); + buf[--destIx] = U16_LEAD(c); + mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart); + } + + // Fill in the map from native indexes to UChars buf index. + do { + mapToUChars[sIx-- - toUCharsMapStart] = (uint8_t)destIx; + } while (sIx >= srcIx); + U_ASSERT(toUCharsMapStart <= (srcIx+1)); + + // Set native indexing limit to be the current position. + // We are processing a non-ascii, non-native-indexing char now; + // the limit will be here if the rest of the chars to be + // added to this buffer are ascii. + bufNILimit = destIx; + } + } + u8b_swap->bufNativeStart = srcIx; + u8b_swap->bufNativeLimit = ix; + u8b_swap->bufStartIdx = destIx; + u8b_swap->bufLimitIdx = UTF8_TEXT_CHUNK_SIZE+2; + u8b_swap->bufNILimit = bufNILimit - u8b_swap->bufStartIdx; + u8b_swap->toUCharsMapStart = toUCharsMapStart; + + ut->chunkContents = &buf[u8b_swap->bufStartIdx]; + ut->chunkLength = u8b_swap->bufLimitIdx - u8b_swap->bufStartIdx; + ut->chunkOffset = ut->chunkLength; + ut->chunkNativeStart = u8b_swap->bufNativeStart; + ut->chunkNativeLimit = u8b_swap->bufNativeLimit; + ut->nativeIndexingLimit = u8b_swap->bufNILimit; + return TRUE; + } + +} + + + +// +// This is a slightly modified copy of u_strFromUTF8, +// Inserts a Replacement Char rather than failing on invalid UTF-8 +// Removes unnecessary features. +// +static UChar* +utext_strFromUTF8(UChar *dest, + int32_t destCapacity, + int32_t *pDestLength, + const char* src, + int32_t srcLength, // required. NUL terminated not supported. + UErrorCode *pErrorCode + ) +{ + + UChar *pDest = dest; + UChar *pDestLimit = (dest!=NULL)?(dest+destCapacity):NULL; + UChar32 ch=0; + int32_t index = 0; + int32_t reqLength = 0; + uint8_t* pSrc = (uint8_t*) src; + + + while((index < srcLength)&&(pDest<pDestLimit)){ + ch = pSrc[index++]; + if(ch <=0x7f){ + *pDest++=(UChar)ch; + }else{ + ch=utf8_nextCharSafeBody(pSrc, &index, srcLength, ch, -3); + if(U_IS_BMP(ch)){ + *(pDest++)=(UChar)ch; + }else{ + *(pDest++)=U16_LEAD(ch); + if(pDest<pDestLimit){ + *(pDest++)=U16_TRAIL(ch); + }else{ + reqLength++; + break; + } + } + } + } + /* donot fill the dest buffer just count the UChars needed */ + while(index < srcLength){ + ch = pSrc[index++]; + if(ch <= 0x7f){ + reqLength++; + }else{ + ch=utf8_nextCharSafeBody(pSrc, &index, srcLength, ch, -3); + reqLength+=U16_LENGTH(ch); + } + } + + reqLength+=(int32_t)(pDest - dest); + + if(pDestLength){ + *pDestLength = reqLength; + } + + /* Terminate the buffer */ + u_terminateUChars(dest,destCapacity,reqLength,pErrorCode); + + return dest; +} + + + +static int32_t U_CALLCONV +utf8TextExtract(UText *ut, + int64_t start, int64_t limit, + UChar *dest, int32_t destCapacity, + UErrorCode *pErrorCode) { + if(U_FAILURE(*pErrorCode)) { + return 0; + } + if(destCapacity<0 || (dest==NULL && destCapacity>0)) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + int32_t length = ut->b; + int32_t start32 = pinIndex(start, length); + int32_t limit32 = pinIndex(limit, length); + + if(start32>limit32) { + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + + + // adjust the incoming indexes to land on code point boundaries if needed. + // adjust by no more than three, because that is the largest number of trail bytes + // in a well formed UTF8 character. + const uint8_t *buf = (const uint8_t *)ut->context; + int i; + if (start32 < ut->chunkNativeLimit) { + for (i=0; i<3; i++) { + if (U8_IS_SINGLE(buf[start32]) || U8_IS_LEAD(buf[start32]) || start32==0) { + break; + } + start32--; + } + } + + if (limit32 < ut->chunkNativeLimit) { + for (i=0; i<3; i++) { + if (U8_IS_SINGLE(buf[limit32]) || U8_IS_LEAD(buf[limit32]) || limit32==0) { + break; + } + limit32--; + } + } + + // Do the actual extract. + int32_t destLength=0; + utext_strFromUTF8(dest, destCapacity, &destLength, + (const char *)ut->context+start32, limit32-start32, + pErrorCode); + utf8TextAccess(ut, limit32, TRUE); + return destLength; +} + +// +// utf8TextMapOffsetToNative +// +// Map a chunk (UTF-16) offset to a native index. +static int64_t U_CALLCONV +utf8TextMapOffsetToNative(const UText *ut) { + // + UTF8Buf *u8b = (UTF8Buf *)ut->p; + U_ASSERT(ut->chunkOffset>ut->nativeIndexingLimit && ut->chunkOffset<=ut->chunkLength); + int32_t nativeOffset = u8b->mapToNative[ut->chunkOffset + u8b->bufStartIdx] + u8b->toUCharsMapStart; + U_ASSERT(nativeOffset >= ut->chunkNativeStart && nativeOffset <= ut->chunkNativeLimit); + return nativeOffset; +} + +// +// Map a native index to the corrsponding chunk offset +// +static int32_t U_CALLCONV +utf8TextMapIndexToUTF16(const UText *ut, int64_t index64) { + U_ASSERT(index64 <= 0x7fffffff); + int32_t index = (int32_t)index64; + UTF8Buf *u8b = (UTF8Buf *)ut->p; + U_ASSERT(index>=ut->chunkNativeStart+ut->nativeIndexingLimit); + U_ASSERT(index<=ut->chunkNativeLimit); + int32_t mapIndex = index - u8b->toUCharsMapStart; + U_ASSERT(mapIndex < (int32_t)sizeof(UTF8Buf::mapToUChars)); + int32_t offset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx; + U_ASSERT(offset>=0 && offset<=ut->chunkLength); + return offset; +} + +static UText * U_CALLCONV +utf8TextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status) +{ + // First do a generic shallow clone. Does everything needed for the UText struct itself. + dest = shallowTextClone(dest, src, status); + + // For deep clones, make a copy of the string. + // The copied storage is owned by the newly created clone. + // + // TODO: There is an isssue with using utext_nativeLength(). + // That function is non-const in cases where the input was NUL terminated + // and the length has not yet been determined. + // This function (clone()) is const. + // There potentially a thread safety issue lurking here. + // + if (deep && U_SUCCESS(*status)) { + int32_t len = (int32_t)utext_nativeLength((UText *)src); + char *copyStr = (char *)uprv_malloc(len+1); + if (copyStr == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + } else { + uprv_memcpy(copyStr, src->context, len+1); + dest->context = copyStr; + dest->providerProperties |= I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT); + } + } + return dest; +} + + +static void U_CALLCONV +utf8TextClose(UText *ut) { + // Most of the work of close is done by the generic UText framework close. + // All that needs to be done here is to delete the UTF8 string if the UText + // owns it. This occurs if the UText was created by cloning. + if (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT)) { + char *s = (char *)ut->context; + uprv_free(s); + ut->context = NULL; + } +} + +U_CDECL_END + + +static const struct UTextFuncs utf8Funcs = +{ + sizeof(UTextFuncs), + 0, 0, 0, // Reserved alignment padding + utf8TextClone, + utf8TextLength, + utf8TextAccess, + utf8TextExtract, + NULL, /* replace*/ + NULL, /* copy */ + utf8TextMapOffsetToNative, + utf8TextMapIndexToUTF16, + utf8TextClose, + NULL, // spare 1 + NULL, // spare 2 + NULL // spare 3 +}; + + +static const char gEmptyString[] = {0}; + +U_CAPI UText * U_EXPORT2 +utext_openUTF8(UText *ut, const char *s, int64_t length, UErrorCode *status) { + if(U_FAILURE(*status)) { + return NULL; + } + if(s==NULL && length==0) { + s = gEmptyString; + } + + if(s==NULL || length<-1 || length>INT32_MAX) { + *status=U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + + ut = utext_setup(ut, sizeof(UTF8Buf) * 2, status); + if (U_FAILURE(*status)) { + return ut; + } + + ut->pFuncs = &utf8Funcs; + ut->context = s; + ut->b = (int32_t)length; + ut->c = (int32_t)length; + if (ut->c < 0) { + ut->c = 0; + ut->providerProperties |= I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE); + } + ut->p = ut->pExtra; + ut->q = (char *)ut->pExtra + sizeof(UTF8Buf); + return ut; + +} + + + + + + + + +//------------------------------------------------------------------------------ +// +// UText implementation wrapper for Replaceable (read/write) +// +// Use of UText data members: +// context pointer to Replaceable. +// p pointer to Replaceable if it is owned by the UText. +// +//------------------------------------------------------------------------------ + + + +// minimum chunk size for this implementation: 3 +// to allow for possible trimming for code point boundaries +enum { REP_TEXT_CHUNK_SIZE=10 }; + +struct ReplExtra { + /* + * Chunk UChars. + * +1 to simplify filling with surrogate pair at the end. + */ + UChar s[REP_TEXT_CHUNK_SIZE+1]; +}; + + +U_CDECL_BEGIN + +static UText * U_CALLCONV +repTextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status) { + // First do a generic shallow clone. Does everything needed for the UText struct itself. + dest = shallowTextClone(dest, src, status); + + // For deep clones, make a copy of the Replaceable. + // The copied Replaceable storage is owned by the newly created UText clone. + // A non-NULL pointer in UText.p is the signal to the close() function to delete + // it. + // + if (deep && U_SUCCESS(*status)) { + const Replaceable *replSrc = (const Replaceable *)src->context; + dest->context = replSrc->clone(); + dest->providerProperties |= I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT); + + // with deep clone, the copy is writable, even when the source is not. + dest->providerProperties |= I32_FLAG(UTEXT_PROVIDER_WRITABLE); + } + return dest; +} + + +static void U_CALLCONV +repTextClose(UText *ut) { + // Most of the work of close is done by the generic UText framework close. + // All that needs to be done here is delete the Replaceable if the UText + // owns it. This occurs if the UText was created by cloning. + if (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT)) { + Replaceable *rep = (Replaceable *)ut->context; + delete rep; + ut->context = NULL; + } +} + + +static int64_t U_CALLCONV +repTextLength(UText *ut) { + const Replaceable *replSrc = (const Replaceable *)ut->context; + int32_t len = replSrc->length(); + return len; +} + + +static UBool U_CALLCONV +repTextAccess(UText *ut, int64_t index, UBool forward) { + const Replaceable *rep=(const Replaceable *)ut->context; + int32_t length=rep->length(); // Full length of the input text (bigger than a chunk) + + // clip the requested index to the limits of the text. + int32_t index32 = pinIndex(index, length); + U_ASSERT(index<=INT32_MAX); + + + /* + * Compute start/limit boundaries around index, for a segment of text + * to be extracted. + * To allow for the possibility that our user gave an index to the trailing + * half of a surrogate pair, we must request one extra preceding UChar when + * going in the forward direction. This will ensure that the buffer has the + * entire code point at the specified index. + */ + if(forward) { + + if (index32>=ut->chunkNativeStart && index32<ut->chunkNativeLimit) { + // Buffer already contains the requested position. + ut->chunkOffset = (int32_t)(index - ut->chunkNativeStart); + return TRUE; + } + if (index32>=length && ut->chunkNativeLimit==length) { + // Request for end of string, and buffer already extends up to it. + // Can't get the data, but don't change the buffer. + ut->chunkOffset = length - (int32_t)ut->chunkNativeStart; + return FALSE; + } + + ut->chunkNativeLimit = index + REP_TEXT_CHUNK_SIZE - 1; + // Going forward, so we want to have the buffer with stuff at and beyond + // the requested index. The -1 gets us one code point before the + // requested index also, to handle the case of the index being on + // a trail surrogate of a surrogate pair. + if(ut->chunkNativeLimit > length) { + ut->chunkNativeLimit = length; + } + // unless buffer ran off end, start is index-1. + ut->chunkNativeStart = ut->chunkNativeLimit - REP_TEXT_CHUNK_SIZE; + if(ut->chunkNativeStart < 0) { + ut->chunkNativeStart = 0; + } + } else { + // Reverse iteration. Fill buffer with data preceding the requested index. + if (index32>ut->chunkNativeStart && index32<=ut->chunkNativeLimit) { + // Requested position already in buffer. + ut->chunkOffset = index32 - (int32_t)ut->chunkNativeStart; + return TRUE; + } + if (index32==0 && ut->chunkNativeStart==0) { + // Request for start, buffer already begins at start. + // No data, but keep the buffer as is. + ut->chunkOffset = 0; + return FALSE; + } + + // Figure out the bounds of the chunk to extract for reverse iteration. + // Need to worry about chunk not splitting surrogate pairs, and while still + // containing the data we need. + // Fix by requesting a chunk that includes an extra UChar at the end. + // If this turns out to be a lead surrogate, we can lop it off and still have + // the data we wanted. + ut->chunkNativeStart = index32 + 1 - REP_TEXT_CHUNK_SIZE; + if (ut->chunkNativeStart < 0) { + ut->chunkNativeStart = 0; + } + + ut->chunkNativeLimit = index32 + 1; + if (ut->chunkNativeLimit > length) { + ut->chunkNativeLimit = length; + } + } + + // Extract the new chunk of text from the Replaceable source. + ReplExtra *ex = (ReplExtra *)ut->pExtra; + // UnicodeString with its buffer a writable alias to the chunk buffer + UnicodeString buffer(ex->s, 0 /*buffer length*/, REP_TEXT_CHUNK_SIZE /*buffer capacity*/); + rep->extractBetween((int32_t)ut->chunkNativeStart, (int32_t)ut->chunkNativeLimit, buffer); + + ut->chunkContents = ex->s; + ut->chunkLength = (int32_t)(ut->chunkNativeLimit - ut->chunkNativeStart); + ut->chunkOffset = (int32_t)(index32 - ut->chunkNativeStart); + + // Surrogate pairs from the input text must not span chunk boundaries. + // If end of chunk could be the start of a surrogate, trim it off. + if (ut->chunkNativeLimit < length && + U16_IS_LEAD(ex->s[ut->chunkLength-1])) { + ut->chunkLength--; + ut->chunkNativeLimit--; + if (ut->chunkOffset > ut->chunkLength) { + ut->chunkOffset = ut->chunkLength; + } + } + + // if the first UChar in the chunk could be the trailing half of a surrogate pair, + // trim it off. + if(ut->chunkNativeStart>0 && U16_IS_TRAIL(ex->s[0])) { + ++(ut->chunkContents); + ++(ut->chunkNativeStart); + --(ut->chunkLength); + --(ut->chunkOffset); + } + + // adjust the index/chunkOffset to a code point boundary + U16_SET_CP_START(ut->chunkContents, 0, ut->chunkOffset); + + // Use fast indexing for get/setNativeIndex() + ut->nativeIndexingLimit = ut->chunkLength; + + return TRUE; +} + + + +static int32_t U_CALLCONV +repTextExtract(UText *ut, + int64_t start, int64_t limit, + UChar *dest, int32_t destCapacity, + UErrorCode *status) { + const Replaceable *rep=(const Replaceable *)ut->context; + int32_t length=rep->length(); + + if(U_FAILURE(*status)) { + return 0; + } + if(destCapacity<0 || (dest==NULL && destCapacity>0)) { + *status=U_ILLEGAL_ARGUMENT_ERROR; + } + if(start>limit) { + *status=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + + int32_t start32 = pinIndex(start, length); + int32_t limit32 = pinIndex(limit, length); + + // adjust start, limit if they point to trail half of surrogates + if (start32<length && U16_IS_TRAIL(rep->charAt(start32)) && + U_IS_SUPPLEMENTARY(rep->char32At(start32))){ + start32--; + } + if (limit32<length && U16_IS_TRAIL(rep->charAt(limit32)) && + U_IS_SUPPLEMENTARY(rep->char32At(limit32))){ + limit32--; + } + + length=limit32-start32; + if(length>destCapacity) { + limit32 = start32 + destCapacity; + } + UnicodeString buffer(dest, 0, destCapacity); // writable alias + rep->extractBetween(start32, limit32, buffer); + repTextAccess(ut, limit32, TRUE); + + return u_terminateUChars(dest, destCapacity, length, status); +} + +static int32_t U_CALLCONV +repTextReplace(UText *ut, + int64_t start, int64_t limit, + const UChar *src, int32_t length, + UErrorCode *status) { + Replaceable *rep=(Replaceable *)ut->context; + int32_t oldLength; + + if(U_FAILURE(*status)) { + return 0; + } + if(src==NULL && length!=0) { + *status=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + oldLength=rep->length(); // will subtract from new length + if(start>limit ) { + *status=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + + int32_t start32 = pinIndex(start, oldLength); + int32_t limit32 = pinIndex(limit, oldLength); + + // Snap start & limit to code point boundaries. + if (start32<oldLength && U16_IS_TRAIL(rep->charAt(start32)) && + start32>0 && U16_IS_LEAD(rep->charAt(start32-1))) + { + start32--; + } + if (limit32<oldLength && U16_IS_LEAD(rep->charAt(limit32-1)) && + U16_IS_TRAIL(rep->charAt(limit32))) + { + limit32++; + } + + // Do the actual replace operation using methods of the Replaceable class + UnicodeString replStr((UBool)(length<0), src, length); // read-only alias + rep->handleReplaceBetween(start32, limit32, replStr); + int32_t newLength = rep->length(); + int32_t lengthDelta = newLength - oldLength; + + // Is the UText chunk buffer OK? + if (ut->chunkNativeLimit > start32) { + // this replace operation may have impacted the current chunk. + // invalidate it, which will force a reload on the next access. + invalidateChunk(ut); + } + + // set the iteration position to the end of the newly inserted replacement text. + int32_t newIndexPos = limit32 + lengthDelta; + repTextAccess(ut, newIndexPos, TRUE); + + return lengthDelta; +} + + +static void U_CALLCONV +repTextCopy(UText *ut, + int64_t start, int64_t limit, + int64_t destIndex, + UBool move, + UErrorCode *status) +{ + Replaceable *rep=(Replaceable *)ut->context; + int32_t length=rep->length(); + + if(U_FAILURE(*status)) { + return; + } + if (start>limit || (start<destIndex && destIndex<limit)) + { + *status=U_INDEX_OUTOFBOUNDS_ERROR; + return; + } + + int32_t start32 = pinIndex(start, length); + int32_t limit32 = pinIndex(limit, length); + int32_t destIndex32 = pinIndex(destIndex, length); + + // TODO: snap input parameters to code point boundaries. + + if(move) { + // move: copy to destIndex, then replace original with nothing + int32_t segLength=limit32-start32; + rep->copy(start32, limit32, destIndex32); + if(destIndex32<start32) { + start32+=segLength; + limit32+=segLength; + } + rep->handleReplaceBetween(start32, limit32, UnicodeString()); + } else { + // copy + rep->copy(start32, limit32, destIndex32); + } + + // If the change to the text touched the region in the chunk buffer, + // invalidate the buffer. + int32_t firstAffectedIndex = destIndex32; + if (move && start32<firstAffectedIndex) { + firstAffectedIndex = start32; + } + if (firstAffectedIndex < ut->chunkNativeLimit) { + // changes may have affected range covered by the chunk + invalidateChunk(ut); + } + + // Put iteration position at the newly inserted (moved) block, + int32_t nativeIterIndex = destIndex32 + limit32 - start32; + if (move && destIndex32>start32) { + // moved a block of text towards the end of the string. + nativeIterIndex = destIndex32; + } + + // Set position, reload chunk if needed. + repTextAccess(ut, nativeIterIndex, TRUE); +} + +static const struct UTextFuncs repFuncs = +{ + sizeof(UTextFuncs), + 0, 0, 0, // Reserved alignment padding + repTextClone, + repTextLength, + repTextAccess, + repTextExtract, + repTextReplace, + repTextCopy, + NULL, // MapOffsetToNative, + NULL, // MapIndexToUTF16, + repTextClose, + NULL, // spare 1 + NULL, // spare 2 + NULL // spare 3 +}; + + +U_CAPI UText * U_EXPORT2 +utext_openReplaceable(UText *ut, Replaceable *rep, UErrorCode *status) +{ + if(U_FAILURE(*status)) { + return NULL; + } + if(rep==NULL) { + *status=U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + ut = utext_setup(ut, sizeof(ReplExtra), status); + if(U_FAILURE(*status)) { + return ut; + } + + ut->providerProperties = I32_FLAG(UTEXT_PROVIDER_WRITABLE); + if(rep->hasMetaData()) { + ut->providerProperties |=I32_FLAG(UTEXT_PROVIDER_HAS_META_DATA); + } + + ut->pFuncs = &repFuncs; + ut->context = rep; + return ut; +} + +U_CDECL_END + + + + + + + + +//------------------------------------------------------------------------------ +// +// UText implementation for UnicodeString (read/write) and +// for const UnicodeString (read only) +// (same implementation, only the flags are different) +// +// Use of UText data members: +// context pointer to UnicodeString +// p pointer to UnicodeString IF this UText owns the string +// and it must be deleted on close(). NULL otherwise. +// +//------------------------------------------------------------------------------ + +U_CDECL_BEGIN + + +static UText * U_CALLCONV +unistrTextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status) { + // First do a generic shallow clone. Does everything needed for the UText struct itself. + dest = shallowTextClone(dest, src, status); + + // For deep clones, make a copy of the UnicodeSring. + // The copied UnicodeString storage is owned by the newly created UText clone. + // A non-NULL pointer in UText.p is the signal to the close() function to delete + // the UText. + // + if (deep && U_SUCCESS(*status)) { + const UnicodeString *srcString = (const UnicodeString *)src->context; + dest->context = new UnicodeString(*srcString); + dest->providerProperties |= I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT); + + // with deep clone, the copy is writable, even when the source is not. + dest->providerProperties |= I32_FLAG(UTEXT_PROVIDER_WRITABLE); + } + return dest; +} + +static void U_CALLCONV +unistrTextClose(UText *ut) { + // Most of the work of close is done by the generic UText framework close. + // All that needs to be done here is delete the UnicodeString if the UText + // owns it. This occurs if the UText was created by cloning. + if (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT)) { + UnicodeString *str = (UnicodeString *)ut->context; + delete str; + ut->context = NULL; + } +} + + +static int64_t U_CALLCONV +unistrTextLength(UText *t) { + return ((const UnicodeString *)t->context)->length(); +} + + +static UBool U_CALLCONV +unistrTextAccess(UText *ut, int64_t index, UBool forward) { + int32_t length = ut->chunkLength; + ut->chunkOffset = pinIndex(index, length); + + // Check whether request is at the start or end + UBool retVal = (forward && index<length) || (!forward && index>0); + return retVal; +} + + + +static int32_t U_CALLCONV +unistrTextExtract(UText *t, + int64_t start, int64_t limit, + UChar *dest, int32_t destCapacity, + UErrorCode *pErrorCode) { + const UnicodeString *us=(const UnicodeString *)t->context; + int32_t length=us->length(); + + if(U_FAILURE(*pErrorCode)) { + return 0; + } + if(destCapacity<0 || (dest==NULL && destCapacity>0)) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + } + if(start<0 || start>limit) { + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + + int32_t start32 = start<length ? us->getChar32Start((int32_t)start) : length; + int32_t limit32 = limit<length ? us->getChar32Start((int32_t)limit) : length; + + length=limit32-start32; + if (destCapacity>0 && dest!=NULL) { + int32_t trimmedLength = length; + if(trimmedLength>destCapacity) { + trimmedLength=destCapacity; + } + us->extract(start32, trimmedLength, dest); + t->chunkOffset = start32+trimmedLength; + } else { + t->chunkOffset = start32; + } + u_terminateUChars(dest, destCapacity, length, pErrorCode); + return length; +} + +static int32_t U_CALLCONV +unistrTextReplace(UText *ut, + int64_t start, int64_t limit, + const UChar *src, int32_t length, + UErrorCode *pErrorCode) { + UnicodeString *us=(UnicodeString *)ut->context; + int32_t oldLength; + + if(U_FAILURE(*pErrorCode)) { + return 0; + } + if(src==NULL && length!=0) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + } + if(start>limit) { + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + oldLength=us->length(); + int32_t start32 = pinIndex(start, oldLength); + int32_t limit32 = pinIndex(limit, oldLength); + if (start32 < oldLength) { + start32 = us->getChar32Start(start32); + } + if (limit32 < oldLength) { + limit32 = us->getChar32Start(limit32); + } + + // replace + us->replace(start32, limit32-start32, src, length); + int32_t newLength = us->length(); + + // Update the chunk description. + ut->chunkContents = us->getBuffer(); + ut->chunkLength = newLength; + ut->chunkNativeLimit = newLength; + ut->nativeIndexingLimit = newLength; + + // Set iteration position to the point just following the newly inserted text. + int32_t lengthDelta = newLength - oldLength; + ut->chunkOffset = limit32 + lengthDelta; + + return lengthDelta; +} + +static void U_CALLCONV +unistrTextCopy(UText *ut, + int64_t start, int64_t limit, + int64_t destIndex, + UBool move, + UErrorCode *pErrorCode) { + UnicodeString *us=(UnicodeString *)ut->context; + int32_t length=us->length(); + + if(U_FAILURE(*pErrorCode)) { + return; + } + int32_t start32 = pinIndex(start, length); + int32_t limit32 = pinIndex(limit, length); + int32_t destIndex32 = pinIndex(destIndex, length); + + if( start32>limit32 || (start32<destIndex32 && destIndex32<limit32)) { + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return; + } + + if(move) { + // move: copy to destIndex, then remove original + int32_t segLength=limit32-start32; + us->copy(start32, limit32, destIndex32); + if(destIndex32<start32) { + start32+=segLength; + } + us->remove(start32, segLength); + } else { + // copy + us->copy(start32, limit32, destIndex32); + } + + // update chunk description, set iteration position. + ut->chunkContents = us->getBuffer(); + if (move==FALSE) { + // copy operation, string length grows + ut->chunkLength += limit32-start32; + ut->chunkNativeLimit = ut->chunkLength; + ut->nativeIndexingLimit = ut->chunkLength; + } + + // Iteration position to end of the newly inserted text. + ut->chunkOffset = destIndex32+limit32-start32; + if (move && destIndex32>start32) { + ut->chunkOffset = destIndex32; + } + +} + +static const struct UTextFuncs unistrFuncs = +{ + sizeof(UTextFuncs), + 0, 0, 0, // Reserved alignment padding + unistrTextClone, + unistrTextLength, + unistrTextAccess, + unistrTextExtract, + unistrTextReplace, + unistrTextCopy, + NULL, // MapOffsetToNative, + NULL, // MapIndexToUTF16, + unistrTextClose, + NULL, // spare 1 + NULL, // spare 2 + NULL // spare 3 +}; + + + +U_CDECL_END + + +U_CAPI UText * U_EXPORT2 +utext_openUnicodeString(UText *ut, UnicodeString *s, UErrorCode *status) { + ut = utext_openConstUnicodeString(ut, s, status); + if (U_SUCCESS(*status)) { + ut->providerProperties |= I32_FLAG(UTEXT_PROVIDER_WRITABLE); + } + return ut; +} + + + +U_CAPI UText * U_EXPORT2 +utext_openConstUnicodeString(UText *ut, const UnicodeString *s, UErrorCode *status) { + if (U_SUCCESS(*status) && s->isBogus()) { + // The UnicodeString is bogus, but we still need to detach the UText + // from whatever it was hooked to before, if anything. + utext_openUChars(ut, NULL, 0, status); + *status = U_ILLEGAL_ARGUMENT_ERROR; + return ut; + } + ut = utext_setup(ut, 0, status); + // note: use the standard (writable) function table for UnicodeString. + // The flag settings disable writing, so having the functions in + // the table is harmless. + if (U_SUCCESS(*status)) { + ut->pFuncs = &unistrFuncs; + ut->context = s; + ut->providerProperties = I32_FLAG(UTEXT_PROVIDER_STABLE_CHUNKS); + ut->chunkContents = s->getBuffer(); + ut->chunkLength = s->length(); + ut->chunkNativeStart = 0; + ut->chunkNativeLimit = ut->chunkLength; + ut->nativeIndexingLimit = ut->chunkLength; + } + return ut; +} + +//------------------------------------------------------------------------------ +// +// UText implementation for const UChar * strings +// +// Use of UText data members: +// context pointer to UnicodeString +// a length. -1 if not yet known. +// +// TODO: support 64 bit lengths. +// +//------------------------------------------------------------------------------ + +U_CDECL_BEGIN + + +static UText * U_CALLCONV +ucstrTextClone(UText *dest, const UText * src, UBool deep, UErrorCode * status) { + // First do a generic shallow clone. + dest = shallowTextClone(dest, src, status); + + // For deep clones, make a copy of the string. + // The copied storage is owned by the newly created clone. + // A non-NULL pointer in UText.p is the signal to the close() function to delete + // it. + // + if (deep && U_SUCCESS(*status)) { + U_ASSERT(utext_nativeLength(dest) < INT32_MAX); + int32_t len = (int32_t)utext_nativeLength(dest); + + // The cloned string IS going to be NUL terminated, whether or not the original was. + const UChar *srcStr = (const UChar *)src->context; + UChar *copyStr = (UChar *)uprv_malloc((len+1) * sizeof(UChar)); + if (copyStr == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + } else { + int64_t i; + for (i=0; i<len; i++) { + copyStr[i] = srcStr[i]; + } + copyStr[len] = 0; + dest->context = copyStr; + dest->providerProperties |= I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT); + } + } + return dest; +} + + +static void U_CALLCONV +ucstrTextClose(UText *ut) { + // Most of the work of close is done by the generic UText framework close. + // All that needs to be done here is delete the string if the UText + // owns it. This occurs if the UText was created by cloning. + if (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT)) { + UChar *s = (UChar *)ut->context; + uprv_free(s); + ut->context = NULL; + } +} + + + +static int64_t U_CALLCONV +ucstrTextLength(UText *ut) { + if (ut->a < 0) { + // null terminated, we don't yet know the length. Scan for it. + // Access is not convenient for doing this + // because the current interation postion can't be changed. + const UChar *str = (const UChar *)ut->context; + for (;;) { + if (str[ut->chunkNativeLimit] == 0) { + break; + } + ut->chunkNativeLimit++; + } + ut->a = ut->chunkNativeLimit; + ut->chunkLength = (int32_t)ut->chunkNativeLimit; + ut->nativeIndexingLimit = ut->chunkLength; + ut->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE); + } + return ut->a; +} + + +static UBool U_CALLCONV +ucstrTextAccess(UText *ut, int64_t index, UBool forward) { + const UChar *str = (const UChar *)ut->context; + + // pin the requested index to the bounds of the string, + // and set current iteration position. + if (index<0) { + index = 0; + } else if (index < ut->chunkNativeLimit) { + // The request data is within the chunk as it is known so far. + // Put index on a code point boundary. + U16_SET_CP_START(str, 0, index); + } else if (ut->a >= 0) { + // We know the length of this string, and the user is requesting something + // at or beyond the length. Pin the requested index to the length. + index = ut->a; + } else { + // Null terminated string, length not yet known, and the requested index + // is beyond where we have scanned so far. + // Scan to 32 UChars beyond the requested index. The strategy here is + // to avoid fully scanning a long string when the caller only wants to + // see a few characters at its beginning. + int32_t scanLimit = (int32_t)index + 32; + if ((index + 32)>INT32_MAX || (index + 32)<0 ) { // note: int64 expression + scanLimit = INT32_MAX; + } + + int32_t chunkLimit = (int32_t)ut->chunkNativeLimit; + for (; chunkLimit<scanLimit; chunkLimit++) { + if (str[chunkLimit] == 0) { + // We found the end of the string. Remember it, pin the requested index to it, + // and bail out of here. + ut->a = chunkLimit; + ut->chunkLength = chunkLimit; + ut->nativeIndexingLimit = chunkLimit; + if (index >= chunkLimit) { + index = chunkLimit; + } else { + U16_SET_CP_START(str, 0, index); + } + + ut->chunkNativeLimit = chunkLimit; + ut->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE); + goto breakout; + } + } + // We scanned through the next batch of UChars without finding the end. + U16_SET_CP_START(str, 0, index); + if (chunkLimit == INT32_MAX) { + // Scanned to the limit of a 32 bit length. + // Forceably trim the overlength string back so length fits in int32 + // TODO: add support for 64 bit strings. + ut->a = chunkLimit; + ut->chunkLength = chunkLimit; + ut->nativeIndexingLimit = chunkLimit; + if (index > chunkLimit) { + index = chunkLimit; + } + ut->chunkNativeLimit = chunkLimit; + ut->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE); + } else { + // The endpoint of a chunk must not be left in the middle of a surrogate pair. + // If the current end is on a lead surrogate, back the end up by one. + // It doesn't matter if the end char happens to be an unpaired surrogate, + // and it's simpler not to worry about it. + if (U16_IS_LEAD(str[chunkLimit-1])) { + --chunkLimit; + } + // Null-terminated chunk with end still unknown. + // Update the chunk length to reflect what has been scanned thus far. + // That the full length is still unknown is (still) flagged by + // ut->a being < 0. + ut->chunkNativeLimit = chunkLimit; + ut->nativeIndexingLimit = chunkLimit; + ut->chunkLength = chunkLimit; + } + + } +breakout: + U_ASSERT(index<=INT32_MAX); + ut->chunkOffset = (int32_t)index; + + // Check whether request is at the start or end + UBool retVal = (forward && index<ut->chunkNativeLimit) || (!forward && index>0); + return retVal; +} + + + +static int32_t U_CALLCONV +ucstrTextExtract(UText *ut, + int64_t start, int64_t limit, + UChar *dest, int32_t destCapacity, + UErrorCode *pErrorCode) +{ + if(U_FAILURE(*pErrorCode)) { + return 0; + } + if(destCapacity<0 || (dest==NULL && destCapacity>0) || start>limit) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + //const UChar *s=(const UChar *)ut->context; + int32_t si, di; + + int32_t start32; + int32_t limit32; + + // Access the start. Does two things we need: + // Pins 'start' to the length of the string, if it came in out-of-bounds. + // Snaps 'start' to the beginning of a code point. + ucstrTextAccess(ut, start, TRUE); + const UChar *s=ut->chunkContents; + start32 = ut->chunkOffset; + + int32_t strLength=(int32_t)ut->a; + if (strLength >= 0) { + limit32 = pinIndex(limit, strLength); + } else { + limit32 = pinIndex(limit, INT32_MAX); + } + di = 0; + for (si=start32; si<limit32; si++) { + if (strLength<0 && s[si]==0) { + // Just hit the end of a null-terminated string. + ut->a = si; // set string length for this UText + ut->chunkNativeLimit = si; + ut->chunkLength = si; + ut->nativeIndexingLimit = si; + strLength = si; + limit32 = si; + break; + } + U_ASSERT(di>=0); /* to ensure di never exceeds INT32_MAX, which must not happen logically */ + if (di<destCapacity) { + // only store if there is space. + dest[di] = s[si]; + } else { + if (strLength>=0) { + // We have filled the destination buffer, and the string length is known. + // Cut the loop short. There is no need to scan string termination. + di = limit32 - start32; + si = limit32; + break; + } + } + di++; + } + + // If the limit index points to a lead surrogate of a pair, + // add the corresponding trail surrogate to the destination. + if (si>0 && U16_IS_LEAD(s[si-1]) && + ((si<strLength || strLength<0) && U16_IS_TRAIL(s[si]))) + { + if (di<destCapacity) { + // store only if there is space in the output buffer. + dest[di++] = s[si]; + } + si++; + } + + // Put iteration position at the point just following the extracted text + if (si <= ut->chunkNativeLimit) { + ut->chunkOffset = si; + } else { + ucstrTextAccess(ut, si, TRUE); + } + + // Add a terminating NUL if space in the buffer permits, + // and set the error status as required. + u_terminateUChars(dest, destCapacity, di, pErrorCode); + return di; +} + +static const struct UTextFuncs ucstrFuncs = +{ + sizeof(UTextFuncs), + 0, 0, 0, // Reserved alignment padding + ucstrTextClone, + ucstrTextLength, + ucstrTextAccess, + ucstrTextExtract, + NULL, // Replace + NULL, // Copy + NULL, // MapOffsetToNative, + NULL, // MapIndexToUTF16, + ucstrTextClose, + NULL, // spare 1 + NULL, // spare 2 + NULL, // spare 3 +}; + +U_CDECL_END + +static const UChar gEmptyUString[] = {0}; + +U_CAPI UText * U_EXPORT2 +utext_openUChars(UText *ut, const UChar *s, int64_t length, UErrorCode *status) { + if (U_FAILURE(*status)) { + return NULL; + } + if(s==NULL && length==0) { + s = gEmptyUString; + } + if (s==NULL || length < -1 || length>INT32_MAX) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + ut = utext_setup(ut, 0, status); + if (U_SUCCESS(*status)) { + ut->pFuncs = &ucstrFuncs; + ut->context = s; + ut->providerProperties = I32_FLAG(UTEXT_PROVIDER_STABLE_CHUNKS); + if (length==-1) { + ut->providerProperties |= I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE); + } + ut->a = length; + ut->chunkContents = s; + ut->chunkNativeStart = 0; + ut->chunkNativeLimit = length>=0? length : 0; + ut->chunkLength = (int32_t)ut->chunkNativeLimit; + ut->chunkOffset = 0; + ut->nativeIndexingLimit = ut->chunkLength; + } + return ut; +} + + +//------------------------------------------------------------------------------ +// +// UText implementation for text from ICU CharacterIterators +// +// Use of UText data members: +// context pointer to the CharacterIterator +// a length of the full text. +// p pointer to buffer 1 +// b start index of local buffer 1 contents +// q pointer to buffer 2 +// c start index of local buffer 2 contents +// r pointer to the character iterator if the UText owns it. +// Null otherwise. +// +//------------------------------------------------------------------------------ +#define CIBufSize 16 + +U_CDECL_BEGIN +static void U_CALLCONV +charIterTextClose(UText *ut) { + // Most of the work of close is done by the generic UText framework close. + // All that needs to be done here is delete the CharacterIterator if the UText + // owns it. This occurs if the UText was created by cloning. + CharacterIterator *ci = (CharacterIterator *)ut->r; + delete ci; + ut->r = NULL; +} + +static int64_t U_CALLCONV +charIterTextLength(UText *ut) { + return (int32_t)ut->a; +} + +static UBool U_CALLCONV +charIterTextAccess(UText *ut, int64_t index, UBool forward) { + CharacterIterator *ci = (CharacterIterator *)ut->context; + + int32_t clippedIndex = (int32_t)index; + if (clippedIndex<0) { + clippedIndex=0; + } else if (clippedIndex>=ut->a) { + clippedIndex=(int32_t)ut->a; + } + int32_t neededIndex = clippedIndex; + if (!forward && neededIndex>0) { + // reverse iteration, want the position just before what was asked for. + neededIndex--; + } else if (forward && neededIndex==ut->a && neededIndex>0) { + // Forward iteration, don't ask for something past the end of the text. + neededIndex--; + } + + // Find the native index of the start of the buffer containing what we want. + neededIndex -= neededIndex % CIBufSize; + + UChar *buf = NULL; + UBool needChunkSetup = TRUE; + int i; + if (ut->chunkNativeStart == neededIndex) { + // The buffer we want is already the current chunk. + needChunkSetup = FALSE; + } else if (ut->b == neededIndex) { + // The first buffer (buffer p) has what we need. + buf = (UChar *)ut->p; + } else if (ut->c == neededIndex) { + // The second buffer (buffer q) has what we need. + buf = (UChar *)ut->q; + } else { + // Neither buffer already has what we need. + // Load new data from the character iterator. + // Use the buf that is not the current buffer. + buf = (UChar *)ut->p; + if (ut->p == ut->chunkContents) { + buf = (UChar *)ut->q; + } + ci->setIndex(neededIndex); + for (i=0; i<CIBufSize; i++) { + buf[i] = ci->nextPostInc(); + if (i+neededIndex > ut->a) { + break; + } + } + } + + // We have a buffer with the data we need. + // Set it up as the current chunk, if it wasn't already. + if (needChunkSetup) { + ut->chunkContents = buf; + ut->chunkLength = CIBufSize; + ut->chunkNativeStart = neededIndex; + ut->chunkNativeLimit = neededIndex + CIBufSize; + if (ut->chunkNativeLimit > ut->a) { + ut->chunkNativeLimit = ut->a; + ut->chunkLength = (int32_t)(ut->chunkNativeLimit)-(int32_t)(ut->chunkNativeStart); + } + ut->nativeIndexingLimit = ut->chunkLength; + U_ASSERT(ut->chunkOffset>=0 && ut->chunkOffset<=CIBufSize); + } + ut->chunkOffset = clippedIndex - (int32_t)ut->chunkNativeStart; + UBool success = (forward? ut->chunkOffset<ut->chunkLength : ut->chunkOffset>0); + return success; +} + +static UText * U_CALLCONV +charIterTextClone(UText *dest, const UText *src, UBool deep, UErrorCode * status) { + if (U_FAILURE(*status)) { + return NULL; + } + + if (deep) { + // There is no CharacterIterator API for cloning the underlying text storage. + *status = U_UNSUPPORTED_ERROR; + return NULL; + } else { + CharacterIterator *srcCI =(CharacterIterator *)src->context; + srcCI = srcCI->clone(); + dest = utext_openCharacterIterator(dest, srcCI, status); + if (U_FAILURE(*status)) { + return dest; + } + // cast off const on getNativeIndex. + // For CharacterIterator based UTexts, this is safe, the operation is const. + int64_t ix = utext_getNativeIndex((UText *)src); + utext_setNativeIndex(dest, ix); + dest->r = srcCI; // flags that this UText owns the CharacterIterator + } + return dest; +} + +static int32_t U_CALLCONV +charIterTextExtract(UText *ut, + int64_t start, int64_t limit, + UChar *dest, int32_t destCapacity, + UErrorCode *status) +{ + if(U_FAILURE(*status)) { + return 0; + } + if(destCapacity<0 || (dest==NULL && destCapacity>0) || start>limit) { + *status=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + int32_t length = (int32_t)ut->a; + int32_t start32 = pinIndex(start, length); + int32_t limit32 = pinIndex(limit, length); + int32_t desti = 0; + int32_t srci; + int32_t copyLimit; + + CharacterIterator *ci = (CharacterIterator *)ut->context; + ci->setIndex32(start32); // Moves ix to lead of surrogate pair, if needed. + srci = ci->getIndex(); + copyLimit = srci; + while (srci<limit32) { + UChar32 c = ci->next32PostInc(); + int32_t len = U16_LENGTH(c); + U_ASSERT(desti+len>0); /* to ensure desti+len never exceeds MAX_INT32, which must not happen logically */ + if (desti+len <= destCapacity) { + U16_APPEND_UNSAFE(dest, desti, c); + copyLimit = srci+len; + } else { + desti += len; + *status = U_BUFFER_OVERFLOW_ERROR; + } + srci += len; + } + + charIterTextAccess(ut, copyLimit, TRUE); + + u_terminateUChars(dest, destCapacity, desti, status); + return desti; +} + +static const struct UTextFuncs charIterFuncs = +{ + sizeof(UTextFuncs), + 0, 0, 0, // Reserved alignment padding + charIterTextClone, + charIterTextLength, + charIterTextAccess, + charIterTextExtract, + NULL, // Replace + NULL, // Copy + NULL, // MapOffsetToNative, + NULL, // MapIndexToUTF16, + charIterTextClose, + NULL, // spare 1 + NULL, // spare 2 + NULL // spare 3 +}; +U_CDECL_END + + +U_CAPI UText * U_EXPORT2 +utext_openCharacterIterator(UText *ut, CharacterIterator *ci, UErrorCode *status) { + if (U_FAILURE(*status)) { + return NULL; + } + + if (ci->startIndex() > 0) { + // No support for CharacterIterators that do not start indexing from zero. + *status = U_UNSUPPORTED_ERROR; + return NULL; + } + + // Extra space in UText for 2 buffers of CIBufSize UChars each. + int32_t extraSpace = 2 * CIBufSize * sizeof(UChar); + ut = utext_setup(ut, extraSpace, status); + if (U_SUCCESS(*status)) { + ut->pFuncs = &charIterFuncs; + ut->context = ci; + ut->providerProperties = 0; + ut->a = ci->endIndex(); // Length of text + ut->p = ut->pExtra; // First buffer + ut->b = -1; // Native index of first buffer contents + ut->q = (UChar*)ut->pExtra+CIBufSize; // Second buffer + ut->c = -1; // Native index of second buffer contents + + // Initialize current chunk contents to be empty. + // First access will fault something in. + // Note: The initial nativeStart and chunkOffset must sum to zero + // so that getNativeIndex() will correctly compute to zero + // if no call to Access() has ever been made. They can't be both + // zero without Access() thinking that the chunk is valid. + ut->chunkContents = (UChar *)ut->p; + ut->chunkNativeStart = -1; + ut->chunkOffset = 1; + ut->chunkNativeLimit = 0; + ut->chunkLength = 0; + ut->nativeIndexingLimit = ut->chunkOffset; // enables native indexing + } + return ut; +} diff --git a/thirdparty/icu4c/common/utf_impl.cpp b/thirdparty/icu4c/common/utf_impl.cpp new file mode 100644 index 0000000000..9dd241a12b --- /dev/null +++ b/thirdparty/icu4c/common/utf_impl.cpp @@ -0,0 +1,329 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1999-2012, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* file name: utf_impl.cpp +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 1999sep13 +* created by: Markus W. Scherer +* +* This file provides implementation functions for macros in the utfXX.h +* that would otherwise be too long as macros. +*/ + +/* set import/export definitions */ +#ifndef U_UTF8_IMPL +# define U_UTF8_IMPL +#endif + +#include "unicode/utypes.h" +#include "unicode/utf.h" +#include "unicode/utf8.h" +#include "uassert.h" + +/* + * Table of the number of utf8 trail bytes, indexed by the lead byte. + * Used by the deprecated macro UTF8_COUNT_TRAIL_BYTES, defined in utf_old.h + * + * The current macro, U8_COUNT_TRAIL_BYTES, does _not_ use this table. + * + * Note that this table cannot be removed, even if UTF8_COUNT_TRAIL_BYTES were + * changed to no longer use it. References to the table from expansions of UTF8_COUNT_TRAIL_BYTES + * may exist in old client code that must continue to run with newer icu library versions. + * + * This table could be replaced on many machines by + * a few lines of assembler code using an + * "index of first 0-bit from msb" instruction and + * one or two more integer instructions. + * + * For example, on an i386, do something like + * - MOV AL, leadByte + * - NOT AL (8-bit, leave b15..b8==0..0, reverse only b7..b0) + * - MOV AH, 0 + * - BSR BX, AX (16-bit) + * - MOV AX, 6 (result) + * - JZ finish (ZF==1 if leadByte==0xff) + * - SUB AX, BX (result) + * -finish: + * (BSR: Bit Scan Reverse, scans for a 1-bit, starting from the MSB) + */ +extern "C" U_EXPORT const uint8_t +utf8_countTrailBytes[256]={ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + // illegal C0 & C1 + // 2-byte lead bytes C2..DF + 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + + // 3-byte lead bytes E0..EF + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + // 4-byte lead bytes F0..F4 + // illegal F5..FF + 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +static const UChar32 +utf8_errorValue[6]={ + // Same values as UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_2, UTF_ERROR_VALUE, + // but without relying on the obsolete unicode/utf_old.h. + 0x15, 0x9f, 0xffff, + 0x10ffff +}; + +static UChar32 +errorValue(int32_t count, int8_t strict) { + if(strict>=0) { + return utf8_errorValue[count]; + } else if(strict==-3) { + return 0xfffd; + } else { + return U_SENTINEL; + } +} + +/* + * Handle the non-inline part of the U8_NEXT() and U8_NEXT_FFFD() macros + * and their obsolete sibling UTF8_NEXT_CHAR_SAFE(). + * + * U8_NEXT() supports NUL-terminated strings indicated via length<0. + * + * The "strict" parameter controls the error behavior: + * <0 "Safe" behavior of U8_NEXT(): + * -1: All illegal byte sequences yield U_SENTINEL=-1. + * -2: Same as -1, except for lenient treatment of surrogate code points as legal. + * Some implementations use this for roundtripping of + * Unicode 16-bit strings that are not well-formed UTF-16, that is, they + * contain unpaired surrogates. + * -3: All illegal byte sequences yield U+FFFD. + * 0 Obsolete "safe" behavior of UTF8_NEXT_CHAR_SAFE(..., FALSE): + * All illegal byte sequences yield a positive code point such that this + * result code point would be encoded with the same number of bytes as + * the illegal sequence. + * >0 Obsolete "strict" behavior of UTF8_NEXT_CHAR_SAFE(..., TRUE): + * Same as the obsolete "safe" behavior, but non-characters are also treated + * like illegal sequences. + * + * Note that a UBool is the same as an int8_t. + */ +U_CAPI UChar32 U_EXPORT2 +utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, UBool strict) { + // *pi is one after byte c. + int32_t i=*pi; + // length can be negative for NUL-terminated strings: Read and validate one byte at a time. + if(i==length || c>0xf4) { + // end of string, or not a lead byte + } else if(c>=0xf0) { + // Test for 4-byte sequences first because + // U8_NEXT() handles shorter valid sequences inline. + uint8_t t1=s[i], t2, t3; + c&=7; + if(U8_IS_VALID_LEAD4_AND_T1(c, t1) && + ++i!=length && (t2=s[i]-0x80)<=0x3f && + ++i!=length && (t3=s[i]-0x80)<=0x3f) { + ++i; + c=(c<<18)|((t1&0x3f)<<12)|(t2<<6)|t3; + // strict: forbid non-characters like U+fffe + if(strict<=0 || !U_IS_UNICODE_NONCHAR(c)) { + *pi=i; + return c; + } + } + } else if(c>=0xe0) { + c&=0xf; + if(strict!=-2) { + uint8_t t1=s[i], t2; + if(U8_IS_VALID_LEAD3_AND_T1(c, t1) && + ++i!=length && (t2=s[i]-0x80)<=0x3f) { + ++i; + c=(c<<12)|((t1&0x3f)<<6)|t2; + // strict: forbid non-characters like U+fffe + if(strict<=0 || !U_IS_UNICODE_NONCHAR(c)) { + *pi=i; + return c; + } + } + } else { + // strict=-2 -> lenient: allow surrogates + uint8_t t1=s[i]-0x80, t2; + if(t1<=0x3f && (c>0 || t1>=0x20) && + ++i!=length && (t2=s[i]-0x80)<=0x3f) { + *pi=i+1; + return (c<<12)|(t1<<6)|t2; + } + } + } else if(c>=0xc2) { + uint8_t t1=s[i]-0x80; + if(t1<=0x3f) { + *pi=i+1; + return ((c-0xc0)<<6)|t1; + } + } // else 0x80<=c<0xc2 is not a lead byte + + /* error handling */ + c=errorValue(i-*pi, strict); + *pi=i; + return c; +} + +U_CAPI int32_t U_EXPORT2 +utf8_appendCharSafeBody(uint8_t *s, int32_t i, int32_t length, UChar32 c, UBool *pIsError) { + if((uint32_t)(c)<=0x7ff) { + if((i)+1<(length)) { + (s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); + (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); + return i; + } + } else if((uint32_t)(c)<=0xffff) { + /* Starting with Unicode 3.2, surrogate code points must not be encoded in UTF-8. */ + if((i)+2<(length) && !U_IS_SURROGATE(c)) { + (s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); + (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); + (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); + return i; + } + } else if((uint32_t)(c)<=0x10ffff) { + if((i)+3<(length)) { + (s)[(i)++]=(uint8_t)(((c)>>18)|0xf0); + (s)[(i)++]=(uint8_t)((((c)>>12)&0x3f)|0x80); + (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); + (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); + return i; + } + } + /* c>0x10ffff or not enough space, write an error value */ + if(pIsError!=NULL) { + *pIsError=TRUE; + } else { + length-=i; + if(length>0) { + int32_t offset; + if(length>3) { + length=3; + } + s+=i; + offset=0; + c=utf8_errorValue[length-1]; + U8_APPEND_UNSAFE(s, offset, c); + i=i+offset; + } + } + return i; +} + +U_CAPI UChar32 U_EXPORT2 +utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, UBool strict) { + // *pi is the index of byte c. + int32_t i=*pi; + if(U8_IS_TRAIL(c) && i>start) { + uint8_t b1=s[--i]; + if(U8_IS_LEAD(b1)) { + if(b1<0xe0) { + *pi=i; + return ((b1-0xc0)<<6)|(c&0x3f); + } else if(b1<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(b1, c) : U8_IS_VALID_LEAD4_AND_T1(b1, c)) { + // Truncated 3- or 4-byte sequence. + *pi=i; + return errorValue(1, strict); + } + } else if(U8_IS_TRAIL(b1) && i>start) { + // Extract the value bits from the last trail byte. + c&=0x3f; + uint8_t b2=s[--i]; + if(0xe0<=b2 && b2<=0xf4) { + if(b2<0xf0) { + b2&=0xf; + if(strict!=-2) { + if(U8_IS_VALID_LEAD3_AND_T1(b2, b1)) { + *pi=i; + c=(b2<<12)|((b1&0x3f)<<6)|c; + if(strict<=0 || !U_IS_UNICODE_NONCHAR(c)) { + return c; + } else { + // strict: forbid non-characters like U+fffe + return errorValue(2, strict); + } + } + } else { + // strict=-2 -> lenient: allow surrogates + b1-=0x80; + if((b2>0 || b1>=0x20)) { + *pi=i; + return (b2<<12)|(b1<<6)|c; + } + } + } else if(U8_IS_VALID_LEAD4_AND_T1(b2, b1)) { + // Truncated 4-byte sequence. + *pi=i; + return errorValue(2, strict); + } + } else if(U8_IS_TRAIL(b2) && i>start) { + uint8_t b3=s[--i]; + if(0xf0<=b3 && b3<=0xf4) { + b3&=7; + if(U8_IS_VALID_LEAD4_AND_T1(b3, b2)) { + *pi=i; + c=(b3<<18)|((b2&0x3f)<<12)|((b1&0x3f)<<6)|c; + if(strict<=0 || !U_IS_UNICODE_NONCHAR(c)) { + return c; + } else { + // strict: forbid non-characters like U+fffe + return errorValue(3, strict); + } + } + } + } + } + } + return errorValue(0, strict); +} + +U_CAPI int32_t U_EXPORT2 +utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i) { + // Same as utf8_prevCharSafeBody(..., strict=-1) minus assembling code points. + int32_t orig_i=i; + uint8_t c=s[i]; + if(U8_IS_TRAIL(c) && i>start) { + uint8_t b1=s[--i]; + if(U8_IS_LEAD(b1)) { + if(b1<0xe0 || + (b1<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(b1, c) : U8_IS_VALID_LEAD4_AND_T1(b1, c))) { + return i; + } + } else if(U8_IS_TRAIL(b1) && i>start) { + uint8_t b2=s[--i]; + if(0xe0<=b2 && b2<=0xf4) { + if(b2<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(b2, b1) : U8_IS_VALID_LEAD4_AND_T1(b2, b1)) { + return i; + } + } else if(U8_IS_TRAIL(b2) && i>start) { + uint8_t b3=s[--i]; + if(0xf0<=b3 && b3<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(b3, b2)) { + return i; + } + } + } + } + return orig_i; +} diff --git a/thirdparty/icu4c/common/util.cpp b/thirdparty/icu4c/common/util.cpp new file mode 100644 index 0000000000..86e5c791ba --- /dev/null +++ b/thirdparty/icu4c/common/util.cpp @@ -0,0 +1,421 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (c) 2001-2011, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* Date Name Description +* 11/19/2001 aliu Creation. +********************************************************************** +*/ + +#include "unicode/unimatch.h" +#include "unicode/utf16.h" +#include "patternprops.h" +#include "util.h" + +// Define UChar constants using hex for EBCDIC compatibility + +static const UChar BACKSLASH = 0x005C; /*\*/ +static const UChar UPPER_U = 0x0055; /*U*/ +static const UChar LOWER_U = 0x0075; /*u*/ +static const UChar APOSTROPHE = 0x0027; // '\'' +static const UChar SPACE = 0x0020; // ' ' + +// "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" +static const UChar DIGITS[] = { + 48,49,50,51,52,53,54,55,56,57, + 65,66,67,68,69,70,71,72,73,74, + 75,76,77,78,79,80,81,82,83,84, + 85,86,87,88,89,90 +}; + +U_NAMESPACE_BEGIN + +UnicodeString& ICU_Utility::appendNumber(UnicodeString& result, int32_t n, + int32_t radix, int32_t minDigits) { + if (radix < 2 || radix > 36) { + // Bogus radix + return result.append((UChar)63/*?*/); + } + // Handle negatives + if (n < 0) { + n = -n; + result.append((UChar)45/*-*/); + } + // First determine the number of digits + int32_t nn = n; + int32_t r = 1; + while (nn >= radix) { + nn /= radix; + r *= radix; + --minDigits; + } + // Now generate the digits + while (--minDigits > 0) { + result.append(DIGITS[0]); + } + while (r > 0) { + int32_t digit = n / r; + result.append(DIGITS[digit]); + n -= digit * r; + r /= radix; + } + return result; +} + +/** + * Return true if the character is NOT printable ASCII. + */ +UBool ICU_Utility::isUnprintable(UChar32 c) { + return !(c >= 0x20 && c <= 0x7E); +} + +/** + * Escape unprintable characters using \uxxxx notation for U+0000 to + * U+FFFF and \Uxxxxxxxx for U+10000 and above. If the character is + * printable ASCII, then do nothing and return FALSE. Otherwise, + * append the escaped notation and return TRUE. + */ +UBool ICU_Utility::escapeUnprintable(UnicodeString& result, UChar32 c) { + if (isUnprintable(c)) { + result.append(BACKSLASH); + if (c & ~0xFFFF) { + result.append(UPPER_U); + result.append(DIGITS[0xF&(c>>28)]); + result.append(DIGITS[0xF&(c>>24)]); + result.append(DIGITS[0xF&(c>>20)]); + result.append(DIGITS[0xF&(c>>16)]); + } else { + result.append(LOWER_U); + } + result.append(DIGITS[0xF&(c>>12)]); + result.append(DIGITS[0xF&(c>>8)]); + result.append(DIGITS[0xF&(c>>4)]); + result.append(DIGITS[0xF&c]); + return TRUE; + } + return FALSE; +} + +/** + * Returns the index of a character, ignoring quoted text. + * For example, in the string "abc'hide'h", the 'h' in "hide" will not be + * found by a search for 'h'. + */ +// FOR FUTURE USE. DISABLE FOR NOW for coverage reasons. +/* +int32_t ICU_Utility::quotedIndexOf(const UnicodeString& text, + int32_t start, int32_t limit, + UChar charToFind) { + for (int32_t i=start; i<limit; ++i) { + UChar c = text.charAt(i); + if (c == BACKSLASH) { + ++i; + } else if (c == APOSTROPHE) { + while (++i < limit + && text.charAt(i) != APOSTROPHE) {} + } else if (c == charToFind) { + return i; + } + } + return -1; +} +*/ + +/** + * Skip over a sequence of zero or more white space characters at pos. + * @param advance if true, advance pos to the first non-white-space + * character at or after pos, or str.length(), if there is none. + * Otherwise leave pos unchanged. + * @return the index of the first non-white-space character at or + * after pos, or str.length(), if there is none. + */ +int32_t ICU_Utility::skipWhitespace(const UnicodeString& str, int32_t& pos, + UBool advance) { + int32_t p = pos; + const UChar* s = str.getBuffer(); + p = (int32_t)(PatternProps::skipWhiteSpace(s + p, str.length() - p) - s); + if (advance) { + pos = p; + } + return p; +} + +/** + * Skip over Pattern_White_Space in a Replaceable. + * Skipping may be done in the forward or + * reverse direction. In either case, the leftmost index will be + * inclusive, and the rightmost index will be exclusive. That is, + * given a range defined as [start, limit), the call + * skipWhitespace(text, start, limit) will advance start past leading + * whitespace, whereas the call skipWhitespace(text, limit, start), + * will back up limit past trailing whitespace. + * @param text the text to be analyzed + * @param pos either the start or limit of a range of 'text', to skip + * leading or trailing whitespace, respectively + * @param stop either the limit or start of a range of 'text', to skip + * leading or trailing whitespace, respectively + * @return the new start or limit, depending on what was passed in to + * 'pos' + */ +//?FOR FUTURE USE. DISABLE FOR NOW for coverage reasons. +//?int32_t ICU_Utility::skipWhitespace(const Replaceable& text, +//? int32_t pos, int32_t stop) { +//? UChar32 c; +//? UBool isForward = (stop >= pos); +//? +//? if (!isForward) { +//? --pos; // pos is a limit, so back up by one +//? } +//? +//? while (pos != stop && +//? PatternProps::isWhiteSpace(c = text.char32At(pos))) { +//? if (isForward) { +//? pos += U16_LENGTH(c); +//? } else { +//? pos -= U16_LENGTH(c); +//? } +//? } +//? +//? if (!isForward) { +//? ++pos; // make pos back into a limit +//? } +//? +//? return pos; +//?} + +/** + * Parse a single non-whitespace character 'ch', optionally + * preceded by whitespace. + * @param id the string to be parsed + * @param pos INPUT-OUTPUT parameter. On input, pos[0] is the + * offset of the first character to be parsed. On output, pos[0] + * is the index after the last parsed character. If the parse + * fails, pos[0] will be unchanged. + * @param ch the non-whitespace character to be parsed. + * @return true if 'ch' is seen preceded by zero or more + * whitespace characters. + */ +UBool ICU_Utility::parseChar(const UnicodeString& id, int32_t& pos, UChar ch) { + int32_t start = pos; + skipWhitespace(id, pos, TRUE); + if (pos == id.length() || + id.charAt(pos) != ch) { + pos = start; + return FALSE; + } + ++pos; + return TRUE; +} + +/** + * Parse a pattern string within the given Replaceable and a parsing + * pattern. Characters are matched literally and case-sensitively + * except for the following special characters: + * + * ~ zero or more Pattern_White_Space chars + * + * If end of pattern is reached with all matches along the way, + * pos is advanced to the first unparsed index and returned. + * Otherwise -1 is returned. + * @param pat pattern that controls parsing + * @param text text to be parsed, starting at index + * @param index offset to first character to parse + * @param limit offset after last character to parse + * @return index after last parsed character, or -1 on parse failure. + */ +int32_t ICU_Utility::parsePattern(const UnicodeString& pat, + const Replaceable& text, + int32_t index, + int32_t limit) { + int32_t ipat = 0; + + // empty pattern matches immediately + if (ipat == pat.length()) { + return index; + } + + UChar32 cpat = pat.char32At(ipat); + + while (index < limit) { + UChar32 c = text.char32At(index); + + // parse \s* + if (cpat == 126 /*~*/) { + if (PatternProps::isWhiteSpace(c)) { + index += U16_LENGTH(c); + continue; + } else { + if (++ipat == pat.length()) { + return index; // success; c unparsed + } + // fall thru; process c again with next cpat + } + } + + // parse literal + else if (c == cpat) { + index += U16_LENGTH(c); + ipat += U16_LENGTH(cpat); + if (ipat == pat.length()) { + return index; // success; c parsed + } + // fall thru; get next cpat + } + + // match failure of literal + else { + return -1; + } + + cpat = pat.char32At(ipat); + } + + return -1; // text ended before end of pat +} + +int32_t ICU_Utility::parseAsciiInteger(const UnicodeString& str, int32_t& pos) { + int32_t result = 0; + UChar c; + while (pos < str.length() && (c = str.charAt(pos)) >= u'0' && c <= u'9') { + result = result * 10 + (c - u'0'); + pos++; + } + return result; +} + +/** + * Append a character to a rule that is being built up. To flush + * the quoteBuf to rule, make one final call with isLiteral == TRUE. + * If there is no final character, pass in (UChar32)-1 as c. + * @param rule the string to append the character to + * @param c the character to append, or (UChar32)-1 if none. + * @param isLiteral if true, then the given character should not be + * quoted or escaped. Usually this means it is a syntactic element + * such as > or $ + * @param escapeUnprintable if true, then unprintable characters + * should be escaped using \uxxxx or \Uxxxxxxxx. These escapes will + * appear outside of quotes. + * @param quoteBuf a buffer which is used to build up quoted + * substrings. The caller should initially supply an empty buffer, + * and thereafter should not modify the buffer. The buffer should be + * cleared out by, at the end, calling this method with a literal + * character. + */ +void ICU_Utility::appendToRule(UnicodeString& rule, + UChar32 c, + UBool isLiteral, + UBool escapeUnprintable, + UnicodeString& quoteBuf) { + // If we are escaping unprintables, then escape them outside + // quotes. \u and \U are not recognized within quotes. The same + // logic applies to literals, but literals are never escaped. + if (isLiteral || + (escapeUnprintable && ICU_Utility::isUnprintable(c))) { + if (quoteBuf.length() > 0) { + // We prefer backslash APOSTROPHE to double APOSTROPHE + // (more readable, less similar to ") so if there are + // double APOSTROPHEs at the ends, we pull them outside + // of the quote. + + // If the first thing in the quoteBuf is APOSTROPHE + // (doubled) then pull it out. + while (quoteBuf.length() >= 2 && + quoteBuf.charAt(0) == APOSTROPHE && + quoteBuf.charAt(1) == APOSTROPHE) { + rule.append(BACKSLASH).append(APOSTROPHE); + quoteBuf.remove(0, 2); + } + // If the last thing in the quoteBuf is APOSTROPHE + // (doubled) then remove and count it and add it after. + int32_t trailingCount = 0; + while (quoteBuf.length() >= 2 && + quoteBuf.charAt(quoteBuf.length()-2) == APOSTROPHE && + quoteBuf.charAt(quoteBuf.length()-1) == APOSTROPHE) { + quoteBuf.truncate(quoteBuf.length()-2); + ++trailingCount; + } + if (quoteBuf.length() > 0) { + rule.append(APOSTROPHE); + rule.append(quoteBuf); + rule.append(APOSTROPHE); + quoteBuf.truncate(0); + } + while (trailingCount-- > 0) { + rule.append(BACKSLASH).append(APOSTROPHE); + } + } + if (c != (UChar32)-1) { + /* Since spaces are ignored during parsing, they are + * emitted only for readability. We emit one here + * only if there isn't already one at the end of the + * rule. + */ + if (c == SPACE) { + int32_t len = rule.length(); + if (len > 0 && rule.charAt(len-1) != c) { + rule.append(c); + } + } else if (!escapeUnprintable || !ICU_Utility::escapeUnprintable(rule, c)) { + rule.append(c); + } + } + } + + // Escape ' and '\' and don't begin a quote just for them + else if (quoteBuf.length() == 0 && + (c == APOSTROPHE || c == BACKSLASH)) { + rule.append(BACKSLASH); + rule.append(c); + } + + // Specials (printable ascii that isn't [0-9a-zA-Z]) and + // whitespace need quoting. Also append stuff to quotes if we are + // building up a quoted substring already. + else if (quoteBuf.length() > 0 || + (c >= 0x0021 && c <= 0x007E && + !((c >= 0x0030/*'0'*/ && c <= 0x0039/*'9'*/) || + (c >= 0x0041/*'A'*/ && c <= 0x005A/*'Z'*/) || + (c >= 0x0061/*'a'*/ && c <= 0x007A/*'z'*/))) || + PatternProps::isWhiteSpace(c)) { + quoteBuf.append(c); + // Double ' within a quote + if (c == APOSTROPHE) { + quoteBuf.append(c); + } + } + + // Otherwise just append + else { + rule.append(c); + } +} + +void ICU_Utility::appendToRule(UnicodeString& rule, + const UnicodeString& text, + UBool isLiteral, + UBool escapeUnprintable, + UnicodeString& quoteBuf) { + for (int32_t i=0; i<text.length(); ++i) { + appendToRule(rule, text[i], isLiteral, escapeUnprintable, quoteBuf); + } +} + +/** + * Given a matcher reference, which may be null, append its + * pattern as a literal to the given rule. + */ +void ICU_Utility::appendToRule(UnicodeString& rule, + const UnicodeMatcher* matcher, + UBool escapeUnprintable, + UnicodeString& quoteBuf) { + if (matcher != NULL) { + UnicodeString pat; + appendToRule(rule, matcher->toPattern(pat, escapeUnprintable), + TRUE, escapeUnprintable, quoteBuf); + } +} + +U_NAMESPACE_END diff --git a/thirdparty/icu4c/common/util.h b/thirdparty/icu4c/common/util.h new file mode 100644 index 0000000000..9c3b76d9ed --- /dev/null +++ b/thirdparty/icu4c/common/util.h @@ -0,0 +1,257 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* + ********************************************************************** + * Copyright (c) 2001-2011, International Business Machines + * Corporation and others. All Rights Reserved. + ********************************************************************** + * Date Name Description + * 11/19/2001 aliu Creation. + ********************************************************************** + */ + +#ifndef ICU_UTIL_H +#define ICU_UTIL_H + +#include "unicode/utypes.h" +#include "unicode/uobject.h" +#include "unicode/unistr.h" + +//-------------------------------------------------------------------- +// class ICU_Utility +// i18n utility functions, scoped into the class ICU_Utility. +//-------------------------------------------------------------------- + +U_NAMESPACE_BEGIN + +class UnicodeMatcher; + +class U_COMMON_API ICU_Utility /* not : public UObject because all methods are static */ { + public: + + /** + * Append a number to the given UnicodeString in the given radix. + * Standard digits '0'-'9' are used and letters 'A'-'Z' for + * radices 11 through 36. + * @param result the digits of the number are appended here + * @param n the number to be converted to digits; may be negative. + * If negative, a '-' is prepended to the digits. + * @param radix a radix from 2 to 36 inclusive. + * @param minDigits the minimum number of digits, not including + * any '-', to produce. Values less than 2 have no effect. One + * digit is always emitted regardless of this parameter. + * @return a reference to result + */ + static UnicodeString& appendNumber(UnicodeString& result, int32_t n, + int32_t radix = 10, + int32_t minDigits = 1); + + /** Returns a bogus UnicodeString by value. */ + static inline UnicodeString makeBogusString() { + UnicodeString result; + result.setToBogus(); + return result; + } + + /** + * Return true if the character is NOT printable ASCII. + * + * This method should really be in UnicodeString (or similar). For + * now, we implement it here and share it with friend classes. + */ + static UBool isUnprintable(UChar32 c); + + /** + * Escape unprintable characters using \uxxxx notation for U+0000 to + * U+FFFF and \Uxxxxxxxx for U+10000 and above. If the character is + * printable ASCII, then do nothing and return false. Otherwise, + * append the escaped notation and return true. + */ + static UBool escapeUnprintable(UnicodeString& result, UChar32 c); + + /** + * Returns the index of a character, ignoring quoted text. + * For example, in the string "abc'hide'h", the 'h' in "hide" will not be + * found by a search for 'h'. + * @param text text to be searched + * @param start the beginning index, inclusive; <code>0 <= start + * <= limit</code>. + * @param limit the ending index, exclusive; <code>start <= limit + * <= text.length()</code>. + * @param c character to search for + * @return Offset of the first instance of c, or -1 if not found. + */ +//?FOR FUTURE USE. DISABLE FOR NOW for coverage reasons. +// static int32_t quotedIndexOf(const UnicodeString& text, +// int32_t start, int32_t limit, +// UChar c); + + /** + * Skip over a sequence of zero or more white space characters at pos. + * @param advance if true, advance pos to the first non-white-space + * character at or after pos, or str.length(), if there is none. + * Otherwise leave pos unchanged. + * @return the index of the first non-white-space character at or + * after pos, or str.length(), if there is none. + */ + static int32_t skipWhitespace(const UnicodeString& str, int32_t& pos, + UBool advance = false); + + /** + * Skip over Pattern_White_Space in a Replaceable. + * Skipping may be done in the forward or + * reverse direction. In either case, the leftmost index will be + * inclusive, and the rightmost index will be exclusive. That is, + * given a range defined as [start, limit), the call + * skipWhitespace(text, start, limit) will advance start past leading + * whitespace, whereas the call skipWhitespace(text, limit, start), + * will back up limit past trailing whitespace. + * @param text the text to be analyzed + * @param pos either the start or limit of a range of 'text', to skip + * leading or trailing whitespace, respectively + * @param stop either the limit or start of a range of 'text', to skip + * leading or trailing whitespace, respectively + * @return the new start or limit, depending on what was passed in to + * 'pos' + */ +//?FOR FUTURE USE. DISABLE FOR NOW for coverage reasons. +//? static int32_t skipWhitespace(const Replaceable& text, +//? int32_t pos, int32_t stop); + + /** + * Parse a single non-whitespace character 'ch', optionally + * preceded by whitespace. + * @param id the string to be parsed + * @param pos INPUT-OUTPUT parameter. On input, pos[0] is the + * offset of the first character to be parsed. On output, pos[0] + * is the index after the last parsed character. If the parse + * fails, pos[0] will be unchanged. + * @param ch the non-whitespace character to be parsed. + * @return true if 'ch' is seen preceded by zero or more + * whitespace characters. + */ + static UBool parseChar(const UnicodeString& id, int32_t& pos, UChar ch); + + /** + * Parse a pattern string starting at offset pos. Keywords are + * matched case-insensitively. Spaces may be skipped and may be + * optional or required. Integer values may be parsed, and if + * they are, they will be returned in the given array. If + * successful, the offset of the next non-space character is + * returned. On failure, -1 is returned. + * @param pattern must only contain lowercase characters, which + * will match their uppercase equivalents as well. A space + * character matches one or more required spaces. A '~' character + * matches zero or more optional spaces. A '#' character matches + * an integer and stores it in parsedInts, which the caller must + * ensure has enough capacity. + * @param parsedInts array to receive parsed integers. Caller + * must ensure that parsedInts.length is >= the number of '#' + * signs in 'pattern'. + * @return the position after the last character parsed, or -1 if + * the parse failed + */ + static int32_t parsePattern(const UnicodeString& rule, int32_t pos, int32_t limit, + const UnicodeString& pattern, int32_t* parsedInts); + + /** + * Parse a pattern string within the given Replaceable and a parsing + * pattern. Characters are matched literally and case-sensitively + * except for the following special characters: + * + * ~ zero or more Pattern_White_Space chars + * + * If end of pattern is reached with all matches along the way, + * pos is advanced to the first unparsed index and returned. + * Otherwise -1 is returned. + * @param pat pattern that controls parsing + * @param text text to be parsed, starting at index + * @param index offset to first character to parse + * @param limit offset after last character to parse + * @return index after last parsed character, or -1 on parse failure. + */ + static int32_t parsePattern(const UnicodeString& pat, + const Replaceable& text, + int32_t index, + int32_t limit); + + /** + * Parse an integer at pos, either of the form \d+ or of the form + * 0x[0-9A-Fa-f]+ or 0[0-7]+, that is, in standard decimal, hex, + * or octal format. + * @param pos INPUT-OUTPUT parameter. On input, the index of the first + * character to parse. On output, the index of the character after the + * last parsed character. + */ + static int32_t parseInteger(const UnicodeString& rule, int32_t& pos, int32_t limit); + + /** + * Parse an integer at pos using only ASCII digits. + * Base 10 only. + * @param pos INPUT-OUTPUT parameter. On input, the index of the first + * character to parse. On output, the index of the character after the + * last parsed character. + */ + static int32_t parseAsciiInteger(const UnicodeString& str, int32_t& pos); + + /** + * Parse a Unicode identifier from the given string at the given + * position. Return the identifier, or an empty string if there + * is no identifier. + * @param str the string to parse + * @param pos INPUT-OUPUT parameter. On INPUT, pos is the + * first character to examine. It must be less than str.length(), + * and it must not point to a whitespace character. That is, must + * have pos < str.length() and + * !UCharacter::isWhitespace(str.char32At(pos)). On + * OUTPUT, the position after the last parsed character. + * @return the Unicode identifier, or an empty string if there is + * no valid identifier at pos. + */ + static UnicodeString parseUnicodeIdentifier(const UnicodeString& str, int32_t& pos); + + /** + * Parse an unsigned 31-bit integer at the given offset. Use + * UCharacter.digit() to parse individual characters into digits. + * @param text the text to be parsed + * @param pos INPUT-OUTPUT parameter. On entry, pos is the + * offset within text at which to start parsing; it should point + * to a valid digit. On exit, pos is the offset after the last + * parsed character. If the parse failed, it will be unchanged on + * exit. Must be >= 0 on entry. + * @param radix the radix in which to parse; must be >= 2 and <= + * 36. + * @return a non-negative parsed number, or -1 upon parse failure. + * Parse fails if there are no digits, that is, if pos does not + * point to a valid digit on entry, or if the number to be parsed + * does not fit into a 31-bit unsigned integer. + */ + static int32_t parseNumber(const UnicodeString& text, + int32_t& pos, int8_t radix); + + static void appendToRule(UnicodeString& rule, + UChar32 c, + UBool isLiteral, + UBool escapeUnprintable, + UnicodeString& quoteBuf); + + static void appendToRule(UnicodeString& rule, + const UnicodeString& text, + UBool isLiteral, + UBool escapeUnprintable, + UnicodeString& quoteBuf); + + static void appendToRule(UnicodeString& rule, + const UnicodeMatcher* matcher, + UBool escapeUnprintable, + UnicodeString& quoteBuf); + +private: + // do not instantiate + ICU_Utility(); +}; + +U_NAMESPACE_END + +#endif +//eof diff --git a/thirdparty/icu4c/common/util_props.cpp b/thirdparty/icu4c/common/util_props.cpp new file mode 100644 index 0000000000..95a112bc91 --- /dev/null +++ b/thirdparty/icu4c/common/util_props.cpp @@ -0,0 +1,217 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (c) 2001-2016, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* Date Name Description +* 11/19/2001 aliu Creation. +********************************************************************** +*/ + +#include "unicode/uchar.h" +#include "unicode/utf16.h" +#include "patternprops.h" +#include "util.h" + +U_NAMESPACE_BEGIN + +/** + * Parse an integer at pos, either of the form \d+ or of the form + * 0x[0-9A-Fa-f]+ or 0[0-7]+, that is, in standard decimal, hex, + * or octal format. + * @param pos INPUT-OUTPUT parameter. On input, the first + * character to parse. On output, the character after the last + * parsed character. + */ +int32_t ICU_Utility::parseInteger(const UnicodeString& rule, int32_t& pos, int32_t limit) { + int32_t count = 0; + int32_t value = 0; + int32_t p = pos; + int8_t radix = 10; + + if (p < limit && rule.charAt(p) == 48 /*0*/) { + if (p+1 < limit && (rule.charAt(p+1) == 0x78 /*x*/ || rule.charAt(p+1) == 0x58 /*X*/)) { + p += 2; + radix = 16; + } + else { + p++; + count = 1; + radix = 8; + } + } + + while (p < limit) { + int32_t d = u_digit(rule.charAt(p++), radix); + if (d < 0) { + --p; + break; + } + ++count; + int32_t v = (value * radix) + d; + if (v <= value) { + // If there are too many input digits, at some point + // the value will go negative, e.g., if we have seen + // "0x8000000" already and there is another '0', when + // we parse the next 0 the value will go negative. + return 0; + } + value = v; + } + if (count > 0) { + pos = p; + } + return value; +} + +/** + * Parse a pattern string starting at offset pos. Keywords are + * matched case-insensitively. Spaces may be skipped and may be + * optional or required. Integer values may be parsed, and if + * they are, they will be returned in the given array. If + * successful, the offset of the next non-space character is + * returned. On failure, -1 is returned. + * @param pattern must only contain lowercase characters, which + * will match their uppercase equivalents as well. A space + * character matches one or more required spaces. A '~' character + * matches zero or more optional spaces. A '#' character matches + * an integer and stores it in parsedInts, which the caller must + * ensure has enough capacity. + * @param parsedInts array to receive parsed integers. Caller + * must ensure that parsedInts.length is >= the number of '#' + * signs in 'pattern'. + * @return the position after the last character parsed, or -1 if + * the parse failed + */ +int32_t ICU_Utility::parsePattern(const UnicodeString& rule, int32_t pos, int32_t limit, + const UnicodeString& pattern, int32_t* parsedInts) { + // TODO Update this to handle surrogates + int32_t p; + int32_t intCount = 0; // number of integers parsed + for (int32_t i=0; i<pattern.length(); ++i) { + UChar cpat = pattern.charAt(i); + UChar c; + switch (cpat) { + case 32 /*' '*/: + if (pos >= limit) { + return -1; + } + c = rule.charAt(pos++); + if (!PatternProps::isWhiteSpace(c)) { + return -1; + } + // FALL THROUGH to skipWhitespace + U_FALLTHROUGH; + case 126 /*'~'*/: + pos = skipWhitespace(rule, pos); + break; + case 35 /*'#'*/: + p = pos; + parsedInts[intCount++] = parseInteger(rule, p, limit); + if (p == pos) { + // Syntax error; failed to parse integer + return -1; + } + pos = p; + break; + default: + if (pos >= limit) { + return -1; + } + c = (UChar) u_tolower(rule.charAt(pos++)); + if (c != cpat) { + return -1; + } + break; + } + } + return pos; +} + +/** + * Parse a Unicode identifier from the given string at the given + * position. Return the identifier, or an empty string if there + * is no identifier. + * @param str the string to parse + * @param pos INPUT-OUPUT parameter. On INPUT, pos is the + * first character to examine. It must be less than str.length(), + * and it must not point to a whitespace character. That is, must + * have pos < str.length(). On + * OUTPUT, the position after the last parsed character. + * @return the Unicode identifier, or an empty string if there is + * no valid identifier at pos. + */ +UnicodeString ICU_Utility::parseUnicodeIdentifier(const UnicodeString& str, int32_t& pos) { + // assert(pos < str.length()); + UnicodeString buf; + int p = pos; + while (p < str.length()) { + UChar32 ch = str.char32At(p); + if (buf.length() == 0) { + if (u_isIDStart(ch)) { + buf.append(ch); + } else { + buf.truncate(0); + return buf; + } + } else { + if (u_isIDPart(ch)) { + buf.append(ch); + } else { + break; + } + } + p += U16_LENGTH(ch); + } + pos = p; + return buf; +} + +/** + * Parse an unsigned 31-bit integer at the given offset. Use + * UCharacter.digit() to parse individual characters into digits. + * @param text the text to be parsed + * @param pos INPUT-OUTPUT parameter. On entry, pos[0] is the + * offset within text at which to start parsing; it should point + * to a valid digit. On exit, pos[0] is the offset after the last + * parsed character. If the parse failed, it will be unchanged on + * exit. Must be >= 0 on entry. + * @param radix the radix in which to parse; must be >= 2 and <= + * 36. + * @return a non-negative parsed number, or -1 upon parse failure. + * Parse fails if there are no digits, that is, if pos[0] does not + * point to a valid digit on entry, or if the number to be parsed + * does not fit into a 31-bit unsigned integer. + */ +int32_t ICU_Utility::parseNumber(const UnicodeString& text, + int32_t& pos, int8_t radix) { + // assert(pos[0] >= 0); + // assert(radix >= 2); + // assert(radix <= 36); + int32_t n = 0; + int32_t p = pos; + while (p < text.length()) { + UChar32 ch = text.char32At(p); + int32_t d = u_digit(ch, radix); + if (d < 0) { + break; + } + n = radix*n + d; + // ASSUME that when a 32-bit integer overflows it becomes + // negative. E.g., 214748364 * 10 + 8 => negative value. + if (n < 0) { + return -1; + } + ++p; + } + if (p == pos) { + return -1; + } + pos = p; + return n; +} + +U_NAMESPACE_END + diff --git a/thirdparty/icu4c/common/utrace.cpp b/thirdparty/icu4c/common/utrace.cpp new file mode 100644 index 0000000000..c981546594 --- /dev/null +++ b/thirdparty/icu4c/common/utrace.cpp @@ -0,0 +1,504 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2003-2014, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* file name: utrace.c +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +*/ + +#include "unicode/utrace.h" +#include "utracimp.h" +#include "cstring.h" +#include "uassert.h" +#include "ucln_cmn.h" + + +static UTraceEntry *pTraceEntryFunc = NULL; +static UTraceExit *pTraceExitFunc = NULL; +static UTraceData *pTraceDataFunc = NULL; +static const void *gTraceContext = NULL; + +/** + * \var utrace_level + * Trace level variable. Negative for "off". + */ +static int32_t +utrace_level = UTRACE_ERROR; + +U_CAPI void U_EXPORT2 +utrace_entry(int32_t fnNumber) { + if (pTraceEntryFunc != NULL) { + (*pTraceEntryFunc)(gTraceContext, fnNumber); + } +} + + +static const char gExitFmt[] = "Returns."; +static const char gExitFmtValue[] = "Returns %d."; +static const char gExitFmtStatus[] = "Returns. Status = %d."; +static const char gExitFmtValueStatus[] = "Returns %d. Status = %d."; +static const char gExitFmtPtrStatus[] = "Returns %d. Status = %p."; + +U_CAPI void U_EXPORT2 +utrace_exit(int32_t fnNumber, int32_t returnType, ...) { + if (pTraceExitFunc != NULL) { + va_list args; + const char *fmt; + + switch (returnType) { + case 0: + fmt = gExitFmt; + break; + case UTRACE_EXITV_I32: + fmt = gExitFmtValue; + break; + case UTRACE_EXITV_STATUS: + fmt = gExitFmtStatus; + break; + case UTRACE_EXITV_I32 | UTRACE_EXITV_STATUS: + fmt = gExitFmtValueStatus; + break; + case UTRACE_EXITV_PTR | UTRACE_EXITV_STATUS: + fmt = gExitFmtPtrStatus; + break; + default: + UPRV_UNREACHABLE; + } + + va_start(args, returnType); + (*pTraceExitFunc)(gTraceContext, fnNumber, fmt, args); + va_end(args); + } +} + + + +U_CAPI void U_EXPORT2 +utrace_data(int32_t fnNumber, int32_t level, const char *fmt, ...) { + if (pTraceDataFunc != NULL) { + va_list args; + va_start(args, fmt ); + (*pTraceDataFunc)(gTraceContext, fnNumber, level, fmt, args); + va_end(args); + } +} + + +static void outputChar(char c, char *outBuf, int32_t *outIx, int32_t capacity, int32_t indent) { + int32_t i; + /* Check whether a start of line indenting is needed. Three cases: + * 1. At the start of the first line (output index == 0). + * 2. At the start of subsequent lines (preceeding char in buffer == '\n') + * 3. When preflighting buffer len (buffer capacity is exceeded), when + * a \n is output. Ideally we wouldn't do the indent until the following char + * is received, but that won't work because there's no place to remember that + * the preceding char was \n. Meaning that we may overstimate the + * buffer size needed. No harm done. + */ + if (*outIx==0 || /* case 1. */ + (c!='\n' && c!=0 && *outIx < capacity && outBuf[(*outIx)-1]=='\n') || /* case 2. */ + (c=='\n' && *outIx>=capacity)) /* case 3 */ + { + /* At the start of a line. Indent. */ + for(i=0; i<indent; i++) { + if (*outIx < capacity) { + outBuf[*outIx] = ' '; + } + (*outIx)++; + } + } + + if (*outIx < capacity) { + outBuf[*outIx] = c; + } + if (c != 0) { + /* Nulls only appear as end-of-string terminators. Move them to the output + * buffer, but do not update the length of the buffer, so that any + * following output will overwrite the null. */ + (*outIx)++; + } +} + +static void outputHexBytes(int64_t val, int32_t charsToOutput, + char *outBuf, int32_t *outIx, int32_t capacity) { + static const char gHexChars[] = "0123456789abcdef"; + int32_t shiftCount; + for (shiftCount=(charsToOutput-1)*4; shiftCount >= 0; shiftCount-=4) { + char c = gHexChars[(val >> shiftCount) & 0xf]; + outputChar(c, outBuf, outIx, capacity, 0); + } +} + +/* Output a pointer value in hex. Work with any size of pointer */ +static void outputPtrBytes(void *val, char *outBuf, int32_t *outIx, int32_t capacity) { + uint32_t i; + int32_t incVal = 1; /* +1 for big endian, -1 for little endian */ + char *p = (char *)&val; /* point to current byte to output in the ptr val */ + +#if !U_IS_BIG_ENDIAN + /* Little Endian. Move p to most significant end of the value */ + incVal = -1; + p += sizeof(void *) - 1; +#endif + + /* Loop through the bytes of the ptr as it sits in memory, from + * most significant to least significant end */ + for (i=0; i<sizeof(void *); i++) { + outputHexBytes(*p, 2, outBuf, outIx, capacity); + p += incVal; + } +} + +static void outputString(const char *s, char *outBuf, int32_t *outIx, int32_t capacity, int32_t indent) { + int32_t i = 0; + char c; + if (s==NULL) { + s = "*NULL*"; + } + do { + c = s[i++]; + outputChar(c, outBuf, outIx, capacity, indent); + } while (c != 0); +} + + + +static void outputUString(const UChar *s, int32_t len, + char *outBuf, int32_t *outIx, int32_t capacity, int32_t indent) { + int32_t i = 0; + UChar c; + if (s==NULL) { + outputString(NULL, outBuf, outIx, capacity, indent); + return; + } + + for (i=0; i<len || len==-1; i++) { + c = s[i]; + outputHexBytes(c, 4, outBuf, outIx, capacity); + outputChar(' ', outBuf, outIx, capacity, indent); + if (len == -1 && c==0) { + break; + } + } +} + +U_CAPI int32_t U_EXPORT2 +utrace_vformat(char *outBuf, int32_t capacity, int32_t indent, const char *fmt, va_list args) { + int32_t outIx = 0; + int32_t fmtIx = 0; + char fmtC; + char c; + int32_t intArg; + int64_t longArg = 0; + char *ptrArg; + + /* Loop runs once for each character in the format string. + */ + for (;;) { + fmtC = fmt[fmtIx++]; + if (fmtC != '%') { + /* Literal character, not part of a %sequence. Just copy it to the output. */ + outputChar(fmtC, outBuf, &outIx, capacity, indent); + if (fmtC == 0) { + /* We hit the null that terminates the format string. + * This is the normal (and only) exit from the loop that + * interprets the format + */ + break; + } + continue; + } + + /* We encountered a '%'. Pick up the following format char */ + fmtC = fmt[fmtIx++]; + + switch (fmtC) { + case 'c': + /* single 8 bit char */ + c = (char)va_arg(args, int32_t); + outputChar(c, outBuf, &outIx, capacity, indent); + break; + + case 's': + /* char * string, null terminated. */ + ptrArg = va_arg(args, char *); + outputString((const char *)ptrArg, outBuf, &outIx, capacity, indent); + break; + + case 'S': + /* UChar * string, with length, len==-1 for null terminated. */ + ptrArg = va_arg(args, char *); /* Ptr */ + intArg =(int32_t)va_arg(args, int32_t); /* Length */ + outputUString((const UChar *)ptrArg, intArg, outBuf, &outIx, capacity, indent); + break; + + case 'b': + /* 8 bit int */ + intArg = va_arg(args, int); + outputHexBytes(intArg, 2, outBuf, &outIx, capacity); + break; + + case 'h': + /* 16 bit int */ + intArg = va_arg(args, int); + outputHexBytes(intArg, 4, outBuf, &outIx, capacity); + break; + + case 'd': + /* 32 bit int */ + intArg = va_arg(args, int); + outputHexBytes(intArg, 8, outBuf, &outIx, capacity); + break; + + case 'l': + /* 64 bit long */ + longArg = va_arg(args, int64_t); + outputHexBytes(longArg, 16, outBuf, &outIx, capacity); + break; + + case 'p': + /* Pointers. */ + ptrArg = va_arg(args, char *); + outputPtrBytes(ptrArg, outBuf, &outIx, capacity); + break; + + case 0: + /* Single '%' at end of fmt string. Output as literal '%'. + * Back up index into format string so that the terminating null will be + * re-fetched in the outer loop, causing it to terminate. + */ + outputChar('%', outBuf, &outIx, capacity, indent); + fmtIx--; + break; + + case 'v': + { + /* Vector of values, e.g. %vh */ + char vectorType; + int32_t vectorLen; + const char *i8Ptr; + int16_t *i16Ptr; + int32_t *i32Ptr; + int64_t *i64Ptr; + void **ptrPtr; + int32_t charsToOutput = 0; + int32_t i; + + vectorType = fmt[fmtIx]; /* b, h, d, l, p, etc. */ + if (vectorType != 0) { + fmtIx++; + } + i8Ptr = (const char *)va_arg(args, void*); + i16Ptr = (int16_t *)i8Ptr; + i32Ptr = (int32_t *)i8Ptr; + i64Ptr = (int64_t *)i8Ptr; + ptrPtr = (void **)i8Ptr; + vectorLen =(int32_t)va_arg(args, int32_t); + if (ptrPtr == NULL) { + outputString("*NULL* ", outBuf, &outIx, capacity, indent); + } else { + for (i=0; i<vectorLen || vectorLen==-1; i++) { + switch (vectorType) { + case 'b': + charsToOutput = 2; + longArg = *i8Ptr++; + break; + case 'h': + charsToOutput = 4; + longArg = *i16Ptr++; + break; + case 'd': + charsToOutput = 8; + longArg = *i32Ptr++; + break; + case 'l': + charsToOutput = 16; + longArg = *i64Ptr++; + break; + case 'p': + charsToOutput = 0; + outputPtrBytes(*ptrPtr, outBuf, &outIx, capacity); + longArg = *ptrPtr==NULL? 0: 1; /* test for null terminated array. */ + ptrPtr++; + break; + case 'c': + charsToOutput = 0; + outputChar(*i8Ptr, outBuf, &outIx, capacity, indent); + longArg = *i8Ptr; /* for test for null terminated array. */ + i8Ptr++; + break; + case 's': + charsToOutput = 0; + outputString((const char *)*ptrPtr, outBuf, &outIx, capacity, indent); + outputChar('\n', outBuf, &outIx, capacity, indent); + longArg = *ptrPtr==NULL? 0: 1; /* for test for null term. array. */ + ptrPtr++; + break; + + case 'S': + charsToOutput = 0; + outputUString((const UChar *)*ptrPtr, -1, outBuf, &outIx, capacity, indent); + outputChar('\n', outBuf, &outIx, capacity, indent); + longArg = *ptrPtr==NULL? 0: 1; /* for test for null term. array. */ + ptrPtr++; + break; + + + } + if (charsToOutput > 0) { + outputHexBytes(longArg, charsToOutput, outBuf, &outIx, capacity); + outputChar(' ', outBuf, &outIx, capacity, indent); + } + if (vectorLen == -1 && longArg == 0) { + break; + } + } + } + outputChar('[', outBuf, &outIx, capacity, indent); + outputHexBytes(vectorLen, 8, outBuf, &outIx, capacity); + outputChar(']', outBuf, &outIx, capacity, indent); + } + break; + + + default: + /* %. in format string, where . is some character not in the set + * of recognized format chars. Just output it as if % wasn't there. + * (Covers "%%" outputing a single '%') + */ + outputChar(fmtC, outBuf, &outIx, capacity, indent); + } + } + outputChar(0, outBuf, &outIx, capacity, indent); /* Make sure that output is null terminated */ + return outIx + 1; /* outIx + 1 because outIx does not increment when outputing final null. */ +} + + + + +U_CAPI int32_t U_EXPORT2 +utrace_format(char *outBuf, int32_t capacity, + int32_t indent, const char *fmt, ...) { + int32_t retVal; + va_list args; + va_start(args, fmt ); + retVal = utrace_vformat(outBuf, capacity, indent, fmt, args); + va_end(args); + return retVal; +} + + +U_CAPI void U_EXPORT2 +utrace_setFunctions(const void *context, + UTraceEntry *e, UTraceExit *x, UTraceData *d) { + pTraceEntryFunc = e; + pTraceExitFunc = x; + pTraceDataFunc = d; + gTraceContext = context; +} + + +U_CAPI void U_EXPORT2 +utrace_getFunctions(const void **context, + UTraceEntry **e, UTraceExit **x, UTraceData **d) { + *e = pTraceEntryFunc; + *x = pTraceExitFunc; + *d = pTraceDataFunc; + *context = gTraceContext; +} + +U_CAPI void U_EXPORT2 +utrace_setLevel(int32_t level) { + if (level < UTRACE_OFF) { + level = UTRACE_OFF; + } + if (level > UTRACE_VERBOSE) { + level = UTRACE_VERBOSE; + } + utrace_level = level; +} + +U_CAPI int32_t U_EXPORT2 +utrace_getLevel() { + return utrace_level; +} + + +U_CFUNC UBool +utrace_cleanup() { + pTraceEntryFunc = NULL; + pTraceExitFunc = NULL; + pTraceDataFunc = NULL; + utrace_level = UTRACE_OFF; + gTraceContext = NULL; + return TRUE; +} + + +static const char * const +trFnName[] = { + "u_init", + "u_cleanup", + NULL +}; + + +static const char * const +trConvNames[] = { + "ucnv_open", + "ucnv_openPackage", + "ucnv_openAlgorithmic", + "ucnv_clone", + "ucnv_close", + "ucnv_flushCache", + "ucnv_load", + "ucnv_unload", + NULL +}; + + +static const char * const +trCollNames[] = { + "ucol_open", + "ucol_close", + "ucol_strcoll", + "ucol_getSortKey", + "ucol_getLocale", + "ucol_nextSortKeyPart", + "ucol_strcollIter", + "ucol_openFromShortString", + "ucol_strcollUTF8", + NULL +}; + + +static const char* const +trResDataNames[] = { + "resc", + "bundle-open", + "file-open", + "res-open", + NULL +}; + + +U_CAPI const char * U_EXPORT2 +utrace_functionName(int32_t fnNumber) { + if(UTRACE_FUNCTION_START <= fnNumber && fnNumber < UTRACE_FUNCTION_LIMIT) { + return trFnName[fnNumber]; + } else if(UTRACE_CONVERSION_START <= fnNumber && fnNumber < UTRACE_CONVERSION_LIMIT) { + return trConvNames[fnNumber - UTRACE_CONVERSION_START]; + } else if(UTRACE_COLLATION_START <= fnNumber && fnNumber < UTRACE_COLLATION_LIMIT){ + return trCollNames[fnNumber - UTRACE_COLLATION_START]; + } else if(UTRACE_UDATA_START <= fnNumber && fnNumber < UTRACE_RES_DATA_LIMIT){ + return trResDataNames[fnNumber - UTRACE_UDATA_START]; + } else { + return "[BOGUS Trace Function Number]"; + } +} + diff --git a/thirdparty/icu4c/common/utracimp.h b/thirdparty/icu4c/common/utracimp.h new file mode 100644 index 0000000000..f32fe1db39 --- /dev/null +++ b/thirdparty/icu4c/common/utracimp.h @@ -0,0 +1,391 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2003-2009, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: utracimp.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2003aug06 +* created by: Markus W. Scherer +* +* Internal header for ICU tracing/logging. +* +* +* Various notes: +* - using a trace level variable to only call trace functions +* when the level is sufficient +* - using the same variable for tracing on/off to never make a function +* call when off +* - the function number is put into a local variable by the entry macro +* and used implicitly to avoid copy&paste/typing mistakes by the developer +* - the application must call utrace_setFunctions() and pass in +* implementations for the trace functions +* - ICU trace macros call ICU functions that route through the function +* pointers if they have been set; +* this avoids an indirection at the call site +* (which would cost more code for another check and for the indirection) +* +* ### TODO Issues: +* - Verify that va_list is portable among compilers for the same platform. +* va_list should be portable because printf() would fail otherwise! +* - Should enum values like UTraceLevel be passed into int32_t-type arguments, +* or should enum types be used? +*/ + +#ifndef __UTRACIMP_H__ +#define __UTRACIMP_H__ + +#include "unicode/utrace.h" +#include <stdarg.h> + +U_CDECL_BEGIN + +/** + * Traced Function Exit return types. + * Flags indicating the number and types of varargs included in a call + * to a UTraceExit function. + * Bits 0-3: The function return type. First variable param. + * Bit 4: Flag for presence of U_ErrorCode status param. + * @internal + */ +typedef enum UTraceExitVal { + /** The traced function returns no value @internal */ + UTRACE_EXITV_NONE = 0, + /** The traced function returns an int32_t, or compatible, type. @internal */ + UTRACE_EXITV_I32 = 1, + /** The traced function returns a pointer @internal */ + UTRACE_EXITV_PTR = 2, + /** The traced function returns a UBool @internal */ + UTRACE_EXITV_BOOL = 3, + /** Mask to extract the return type values from a UTraceExitVal @internal */ + UTRACE_EXITV_MASK = 0xf, + /** Bit indicating that the traced function includes a UErrorCode parameter @internal */ + UTRACE_EXITV_STATUS = 0x10 +} UTraceExitVal; + +/** + * Trace function for the entry point of a function. + * Do not use directly, use UTRACE_ENTRY instead. + * @param fnNumber The UTraceFunctionNumber for the current function. + * @internal + */ +U_CAPI void U_EXPORT2 +utrace_entry(int32_t fnNumber); + +/** + * Trace function for each exit point of a function. + * Do not use directly, use UTRACE_EXIT* instead. + * @param fnNumber The UTraceFunctionNumber for the current function. + * @param returnType The type of the value returned by the function. + * @param errorCode The UErrorCode value at function exit. See UTRACE_EXIT. + * @internal + */ +U_CAPI void U_EXPORT2 +utrace_exit(int32_t fnNumber, int32_t returnType, ...); + + +/** + * Trace function used inside functions that have a UTRACE_ENTRY() statement. + * Do not use directly, use UTRACE_DATAX() macros instead. + * + * @param utraceFnNumber The number of the current function, from the local + * variable of the same name. + * @param level The trace level for this message. + * @param fmt The trace format string. + * + * @internal + */ +U_CAPI void U_EXPORT2 +utrace_data(int32_t utraceFnNumber, int32_t level, const char *fmt, ...); + +U_CDECL_END + +#if U_ENABLE_TRACING + +/** + * Boolean expression to see if ICU tracing is turned on + * to at least the specified level. + * @internal + */ +#define UTRACE_LEVEL(level) (utrace_getLevel()>=(level)) + +/** + * Flag bit in utraceFnNumber, the local variable added to each function + * with tracing code to contains the function number. + * + * Set the flag if the function's entry is traced, which will cause the + * function's exit to also be traced. utraceFnNumber is uncoditionally + * set at entry, whether or not the entry is traced, so that it will + * always be available for error trace output. + * @internal + */ +#define UTRACE_TRACED_ENTRY 0x80000000 + +/** + * Trace statement for the entry point of a function. + * Stores the function number in a local variable. + * In C code, must be placed immediately after the last variable declaration. + * Must be matched with UTRACE_EXIT() at all function exit points. + * + * Tracing should start with UTRACE_ENTRY after checking for + * U_FAILURE at function entry, so that if a function returns immediately + * because of a pre-existing error condition, it does not show up in the trace, + * consistent with ICU's error handling model. + * + * @param fnNumber The UTraceFunctionNumber for the current function. + * @internal + */ +#define UTRACE_ENTRY(fnNumber) \ + int32_t utraceFnNumber=(fnNumber); \ +UPRV_BLOCK_MACRO_BEGIN { \ + if(utrace_getLevel()>=UTRACE_INFO) { \ + utrace_entry(fnNumber); \ + utraceFnNumber |= UTRACE_TRACED_ENTRY; \ + } \ +} UPRV_BLOCK_MACRO_END + + +/** + * Trace statement for the entry point of open and close functions. + * Produces trace output at a less verbose setting than plain UTRACE_ENTRY + * Stores the function number in a local variable. + * In C code, must be placed immediately after the last variable declaration. + * Must be matched with UTRACE_EXIT() at all function exit points. + * + * @param fnNumber The UTraceFunctionNumber for the current function. + * @internal + */ +#define UTRACE_ENTRY_OC(fnNumber) \ + int32_t utraceFnNumber=(fnNumber); \ +UPRV_BLOCK_MACRO_BEGIN { \ + if(utrace_getLevel()>=UTRACE_OPEN_CLOSE) { \ + utrace_entry(fnNumber); \ + utraceFnNumber |= UTRACE_TRACED_ENTRY; \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Trace statement for each exit point of a function that has a UTRACE_ENTRY() + * statement. + * + * @param errorCode The function's ICU UErrorCode value at function exit, + * or U_ZERO_ERROR if the function does not use a UErrorCode. + * 0==U_ZERO_ERROR indicates success, + * positive values an error (see u_errorName()), + * negative values an informational status. + * + * @internal + */ +#define UTRACE_EXIT() UPRV_BLOCK_MACRO_BEGIN { \ + if(utraceFnNumber & UTRACE_TRACED_ENTRY) { \ + utrace_exit(utraceFnNumber & ~UTRACE_TRACED_ENTRY, UTRACE_EXITV_NONE); \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Trace statement for each exit point of a function that has a UTRACE_ENTRY() + * statement, and that returns a value. + * + * @param val The function's return value, int32_t or comatible type. + * + * @internal + */ +#define UTRACE_EXIT_VALUE(val) UPRV_BLOCK_MACRO_BEGIN { \ + if(utraceFnNumber & UTRACE_TRACED_ENTRY) { \ + utrace_exit(utraceFnNumber & ~UTRACE_TRACED_ENTRY, UTRACE_EXITV_I32, val); \ + } \ +} UPRV_BLOCK_MACRO_END + +#define UTRACE_EXIT_STATUS(status) UPRV_BLOCK_MACRO_BEGIN { \ + if(utraceFnNumber & UTRACE_TRACED_ENTRY) { \ + utrace_exit(utraceFnNumber & ~UTRACE_TRACED_ENTRY, UTRACE_EXITV_STATUS, status); \ + } \ +} UPRV_BLOCK_MACRO_END + +#define UTRACE_EXIT_VALUE_STATUS(val, status) UPRV_BLOCK_MACRO_BEGIN { \ + if(utraceFnNumber & UTRACE_TRACED_ENTRY) { \ + utrace_exit(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (UTRACE_EXITV_I32 | UTRACE_EXITV_STATUS), val, status); \ + } \ +} UPRV_BLOCK_MACRO_END + +#define UTRACE_EXIT_PTR_STATUS(ptr, status) UPRV_BLOCK_MACRO_BEGIN { \ + if(utraceFnNumber & UTRACE_TRACED_ENTRY) { \ + utrace_exit(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (UTRACE_EXITV_PTR | UTRACE_EXITV_STATUS), ptr, status); \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Trace statement used inside functions that have a UTRACE_ENTRY() statement. + * Takes no data arguments. + * The number of arguments for this macro must match the number of inserts + * in the format string. Vector inserts count as two arguments. + * Calls utrace_data() if the level is high enough. + * @internal + */ +#define UTRACE_DATA0(level, fmt) UPRV_BLOCK_MACRO_BEGIN { \ + if(UTRACE_LEVEL(level)) { \ + utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (level), (fmt)); \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Trace statement used inside functions that have a UTRACE_ENTRY() statement. + * Takes one data argument. + * The number of arguments for this macro must match the number of inserts + * in the format string. Vector inserts count as two arguments. + * Calls utrace_data() if the level is high enough. + * @internal + */ +#define UTRACE_DATA1(level, fmt, a) UPRV_BLOCK_MACRO_BEGIN { \ + if(UTRACE_LEVEL(level)) { \ + utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY , (level), (fmt), (a)); \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Trace statement used inside functions that have a UTRACE_ENTRY() statement. + * Takes two data arguments. + * The number of arguments for this macro must match the number of inserts + * in the format string. Vector inserts count as two arguments. + * Calls utrace_data() if the level is high enough. + * @internal + */ +#define UTRACE_DATA2(level, fmt, a, b) UPRV_BLOCK_MACRO_BEGIN { \ + if(UTRACE_LEVEL(level)) { \ + utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY , (level), (fmt), (a), (b)); \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Trace statement used inside functions that have a UTRACE_ENTRY() statement. + * Takes three data arguments. + * The number of arguments for this macro must match the number of inserts + * in the format string. Vector inserts count as two arguments. + * Calls utrace_data() if the level is high enough. + * @internal + */ +#define UTRACE_DATA3(level, fmt, a, b, c) UPRV_BLOCK_MACRO_BEGIN { \ + if(UTRACE_LEVEL(level)) { \ + utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (level), (fmt), (a), (b), (c)); \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Trace statement used inside functions that have a UTRACE_ENTRY() statement. + * Takes four data arguments. + * The number of arguments for this macro must match the number of inserts + * in the format string. Vector inserts count as two arguments. + * Calls utrace_data() if the level is high enough. + * @internal + */ +#define UTRACE_DATA4(level, fmt, a, b, c, d) UPRV_BLOCK_MACRO_BEGIN { \ + if(UTRACE_LEVEL(level)) { \ + utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (level), (fmt), (a), (b), (c), (d)); \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Trace statement used inside functions that have a UTRACE_ENTRY() statement. + * Takes five data arguments. + * The number of arguments for this macro must match the number of inserts + * in the format string. Vector inserts count as two arguments. + * Calls utrace_data() if the level is high enough. + * @internal + */ +#define UTRACE_DATA5(level, fmt, a, b, c, d, e) UPRV_BLOCK_MACRO_BEGIN { \ + if(UTRACE_LEVEL(level)) { \ + utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (level), (fmt), (a), (b), (c), (d), (e)); \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Trace statement used inside functions that have a UTRACE_ENTRY() statement. + * Takes six data arguments. + * The number of arguments for this macro must match the number of inserts + * in the format string. Vector inserts count as two arguments. + * Calls utrace_data() if the level is high enough. + * @internal + */ +#define UTRACE_DATA6(level, fmt, a, b, c, d, e, f) UPRV_BLOCK_MACRO_BEGIN { \ + if(UTRACE_LEVEL(level)) { \ + utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (level), (fmt), (a), (b), (c), (d), (e), (f)); \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Trace statement used inside functions that have a UTRACE_ENTRY() statement. + * Takes seven data arguments. + * The number of arguments for this macro must match the number of inserts + * in the format string. Vector inserts count as two arguments. + * Calls utrace_data() if the level is high enough. + * @internal + */ +#define UTRACE_DATA7(level, fmt, a, b, c, d, e, f, g) UPRV_BLOCK_MACRO_BEGIN { \ + if(UTRACE_LEVEL(level)) { \ + utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (level), (fmt), (a), (b), (c), (d), (e), (f), (g)); \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Trace statement used inside functions that have a UTRACE_ENTRY() statement. + * Takes eight data arguments. + * The number of arguments for this macro must match the number of inserts + * in the format string. Vector inserts count as two arguments. + * Calls utrace_data() if the level is high enough. + * @internal + */ +#define UTRACE_DATA8(level, fmt, a, b, c, d, e, f, g, h) UPRV_BLOCK_MACRO_BEGIN { \ + if(UTRACE_LEVEL(level)) { \ + utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (level), (fmt), (a), (b), (c), (d), (e), (f), (g), (h)); \ + } \ +} UPRV_BLOCK_MACRO_END + +/** + * Trace statement used inside functions that have a UTRACE_ENTRY() statement. + * Takes nine data arguments. + * The number of arguments for this macro must match the number of inserts + * in the format string. Vector inserts count as two arguments. + * Calls utrace_data() if the level is high enough. + * @internal + */ +#define UTRACE_DATA9(level, fmt, a, b, c, d, e, f, g, h, i) UPRV_BLOCK_MACRO_BEGIN { \ + if(UTRACE_LEVEL(level)) { \ + utrace_data(utraceFnNumber & ~UTRACE_TRACED_ENTRY, (level), (fmt), (a), (b), (c), (d), (e), (f), (g), (h), (i)); \ + } \ +} UPRV_BLOCK_MACRO_END + +#else + +/* + * When tracing is disabled, the following macros become empty + */ + +#define UTRACE_LEVEL(level) 0 +#define UTRACE_ENTRY(fnNumber) +#define UTRACE_ENTRY_OC(fnNumber) +#define UTRACE_EXIT() +#define UTRACE_EXIT_VALUE(val) +#define UTRACE_EXIT_STATUS(status) +#define UTRACE_EXIT_VALUE_STATUS(val, status) +#define UTRACE_EXIT_PTR_STATUS(ptr, status) +#define UTRACE_DATA0(level, fmt) +#define UTRACE_DATA1(level, fmt, a) +#define UTRACE_DATA2(level, fmt, a, b) +#define UTRACE_DATA3(level, fmt, a, b, c) +#define UTRACE_DATA4(level, fmt, a, b, c, d) +#define UTRACE_DATA5(level, fmt, a, b, c, d, e) +#define UTRACE_DATA6(level, fmt, a, b, c, d, e, f) +#define UTRACE_DATA7(level, fmt, a, b, c, d, e, f, g) +#define UTRACE_DATA8(level, fmt, a, b, c, d, e, f, g, h) +#define UTRACE_DATA9(level, fmt, a, b, c, d, e, f, g, h, i) + +#endif + +#endif diff --git a/thirdparty/icu4c/common/utrie.cpp b/thirdparty/icu4c/common/utrie.cpp new file mode 100644 index 0000000000..ecf9b1cba7 --- /dev/null +++ b/thirdparty/icu4c/common/utrie.cpp @@ -0,0 +1,1234 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 2001-2012, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* file name: utrie.cpp +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2001oct20 +* created by: Markus W. Scherer +* +* This is a common implementation of a "folded" trie. +* It is a kind of compressed, serializable table of 16- or 32-bit values associated with +* Unicode code points (0..0x10ffff). +*/ + +#ifdef UTRIE_DEBUG +# include <stdio.h> +#endif + +#include "unicode/utypes.h" +#include "cmemory.h" +#include "utrie.h" + +/* miscellaneous ------------------------------------------------------------ */ + +#undef ABS +#define ABS(x) ((x)>=0 ? (x) : -(x)) + +static inline UBool +equal_uint32(const uint32_t *s, const uint32_t *t, int32_t length) { + while(length>0 && *s==*t) { + ++s; + ++t; + --length; + } + return (UBool)(length==0); +} + +/* Building a trie ----------------------------------------------------------*/ + +U_CAPI UNewTrie * U_EXPORT2 +utrie_open(UNewTrie *fillIn, + uint32_t *aliasData, int32_t maxDataLength, + uint32_t initialValue, uint32_t leadUnitValue, + UBool latin1Linear) { + UNewTrie *trie; + int32_t i, j; + + if( maxDataLength<UTRIE_DATA_BLOCK_LENGTH || + (latin1Linear && maxDataLength<1024) + ) { + return NULL; + } + + if(fillIn!=NULL) { + trie=fillIn; + } else { + trie=(UNewTrie *)uprv_malloc(sizeof(UNewTrie)); + if(trie==NULL) { + return NULL; + } + } + uprv_memset(trie, 0, sizeof(UNewTrie)); + trie->isAllocated= (UBool)(fillIn==NULL); + + if(aliasData!=NULL) { + trie->data=aliasData; + trie->isDataAllocated=FALSE; + } else { + trie->data=(uint32_t *)uprv_malloc(maxDataLength*4); + if(trie->data==NULL) { + uprv_free(trie); + return NULL; + } + trie->isDataAllocated=TRUE; + } + + /* preallocate and reset the first data block (block index 0) */ + j=UTRIE_DATA_BLOCK_LENGTH; + + if(latin1Linear) { + /* preallocate and reset the first block (number 0) and Latin-1 (U+0000..U+00ff) after that */ + /* made sure above that maxDataLength>=1024 */ + + /* set indexes to point to consecutive data blocks */ + i=0; + do { + /* do this at least for trie->index[0] even if that block is only partly used for Latin-1 */ + trie->index[i++]=j; + j+=UTRIE_DATA_BLOCK_LENGTH; + } while(i<(256>>UTRIE_SHIFT)); + } + + /* reset the initially allocated blocks to the initial value */ + trie->dataLength=j; + while(j>0) { + trie->data[--j]=initialValue; + } + + trie->leadUnitValue=leadUnitValue; + trie->indexLength=UTRIE_MAX_INDEX_LENGTH; + trie->dataCapacity=maxDataLength; + trie->isLatin1Linear=latin1Linear; + trie->isCompacted=FALSE; + return trie; +} + +U_CAPI UNewTrie * U_EXPORT2 +utrie_clone(UNewTrie *fillIn, const UNewTrie *other, uint32_t *aliasData, int32_t aliasDataCapacity) { + UNewTrie *trie; + UBool isDataAllocated; + + /* do not clone if other is not valid or already compacted */ + if(other==NULL || other->data==NULL || other->isCompacted) { + return NULL; + } + + /* clone data */ + if(aliasData!=NULL && aliasDataCapacity>=other->dataCapacity) { + isDataAllocated=FALSE; + } else { + aliasDataCapacity=other->dataCapacity; + aliasData=(uint32_t *)uprv_malloc(other->dataCapacity*4); + if(aliasData==NULL) { + return NULL; + } + isDataAllocated=TRUE; + } + + trie=utrie_open(fillIn, aliasData, aliasDataCapacity, + other->data[0], other->leadUnitValue, + other->isLatin1Linear); + if(trie==NULL) { + uprv_free(aliasData); + } else { + uprv_memcpy(trie->index, other->index, sizeof(trie->index)); + uprv_memcpy(trie->data, other->data, (size_t)other->dataLength*4); + trie->dataLength=other->dataLength; + trie->isDataAllocated=isDataAllocated; + } + + return trie; +} + +U_CAPI void U_EXPORT2 +utrie_close(UNewTrie *trie) { + if(trie!=NULL) { + if(trie->isDataAllocated) { + uprv_free(trie->data); + trie->data=NULL; + } + if(trie->isAllocated) { + uprv_free(trie); + } + } +} + +U_CAPI uint32_t * U_EXPORT2 +utrie_getData(UNewTrie *trie, int32_t *pLength) { + if(trie==NULL || pLength==NULL) { + return NULL; + } + + *pLength=trie->dataLength; + return trie->data; +} + +static int32_t +utrie_allocDataBlock(UNewTrie *trie) { + int32_t newBlock, newTop; + + newBlock=trie->dataLength; + newTop=newBlock+UTRIE_DATA_BLOCK_LENGTH; + if(newTop>trie->dataCapacity) { + /* out of memory in the data array */ + return -1; + } + trie->dataLength=newTop; + return newBlock; +} + +/** + * No error checking for illegal arguments. + * + * @return -1 if no new data block available (out of memory in data array) + * @internal + */ +static int32_t +utrie_getDataBlock(UNewTrie *trie, UChar32 c) { + int32_t indexValue, newBlock; + + c>>=UTRIE_SHIFT; + indexValue=trie->index[c]; + if(indexValue>0) { + return indexValue; + } + + /* allocate a new data block */ + newBlock=utrie_allocDataBlock(trie); + if(newBlock<0) { + /* out of memory in the data array */ + return -1; + } + trie->index[c]=newBlock; + + /* copy-on-write for a block from a setRange() */ + uprv_memcpy(trie->data+newBlock, trie->data-indexValue, 4*UTRIE_DATA_BLOCK_LENGTH); + return newBlock; +} + +/** + * @return TRUE if the value was successfully set + */ +U_CAPI UBool U_EXPORT2 +utrie_set32(UNewTrie *trie, UChar32 c, uint32_t value) { + int32_t block; + + /* valid, uncompacted trie and valid c? */ + if(trie==NULL || trie->isCompacted || (uint32_t)c>0x10ffff) { + return FALSE; + } + + block=utrie_getDataBlock(trie, c); + if(block<0) { + return FALSE; + } + + trie->data[block+(c&UTRIE_MASK)]=value; + return TRUE; +} + +U_CAPI uint32_t U_EXPORT2 +utrie_get32(UNewTrie *trie, UChar32 c, UBool *pInBlockZero) { + int32_t block; + + /* valid, uncompacted trie and valid c? */ + if(trie==NULL || trie->isCompacted || (uint32_t)c>0x10ffff) { + if(pInBlockZero!=NULL) { + *pInBlockZero=TRUE; + } + return 0; + } + + block=trie->index[c>>UTRIE_SHIFT]; + if(pInBlockZero!=NULL) { + *pInBlockZero= (UBool)(block==0); + } + + return trie->data[ABS(block)+(c&UTRIE_MASK)]; +} + +/** + * @internal + */ +static void +utrie_fillBlock(uint32_t *block, UChar32 start, UChar32 limit, + uint32_t value, uint32_t initialValue, UBool overwrite) { + uint32_t *pLimit; + + pLimit=block+limit; + block+=start; + if(overwrite) { + while(block<pLimit) { + *block++=value; + } + } else { + while(block<pLimit) { + if(*block==initialValue) { + *block=value; + } + ++block; + } + } +} + +U_CAPI UBool U_EXPORT2 +utrie_setRange32(UNewTrie *trie, UChar32 start, UChar32 limit, uint32_t value, UBool overwrite) { + /* + * repeat value in [start..limit[ + * mark index values for repeat-data blocks by setting bit 31 of the index values + * fill around existing values if any, if(overwrite) + */ + uint32_t initialValue; + int32_t block, rest, repeatBlock; + + /* valid, uncompacted trie and valid indexes? */ + if( trie==NULL || trie->isCompacted || + (uint32_t)start>0x10ffff || (uint32_t)limit>0x110000 || start>limit + ) { + return FALSE; + } + if(start==limit) { + return TRUE; /* nothing to do */ + } + + initialValue=trie->data[0]; + if(start&UTRIE_MASK) { + UChar32 nextStart; + + /* set partial block at [start..following block boundary[ */ + block=utrie_getDataBlock(trie, start); + if(block<0) { + return FALSE; + } + + nextStart=(start+UTRIE_DATA_BLOCK_LENGTH)&~UTRIE_MASK; + if(nextStart<=limit) { + utrie_fillBlock(trie->data+block, start&UTRIE_MASK, UTRIE_DATA_BLOCK_LENGTH, + value, initialValue, overwrite); + start=nextStart; + } else { + utrie_fillBlock(trie->data+block, start&UTRIE_MASK, limit&UTRIE_MASK, + value, initialValue, overwrite); + return TRUE; + } + } + + /* number of positions in the last, partial block */ + rest=limit&UTRIE_MASK; + + /* round down limit to a block boundary */ + limit&=~UTRIE_MASK; + + /* iterate over all-value blocks */ + if(value==initialValue) { + repeatBlock=0; + } else { + repeatBlock=-1; + } + while(start<limit) { + /* get index value */ + block=trie->index[start>>UTRIE_SHIFT]; + if(block>0) { + /* already allocated, fill in value */ + utrie_fillBlock(trie->data+block, 0, UTRIE_DATA_BLOCK_LENGTH, value, initialValue, overwrite); + } else if(trie->data[-block]!=value && (block==0 || overwrite)) { + /* set the repeatBlock instead of the current block 0 or range block */ + if(repeatBlock>=0) { + trie->index[start>>UTRIE_SHIFT]=-repeatBlock; + } else { + /* create and set and fill the repeatBlock */ + repeatBlock=utrie_getDataBlock(trie, start); + if(repeatBlock<0) { + return FALSE; + } + + /* set the negative block number to indicate that it is a repeat block */ + trie->index[start>>UTRIE_SHIFT]=-repeatBlock; + utrie_fillBlock(trie->data+repeatBlock, 0, UTRIE_DATA_BLOCK_LENGTH, value, initialValue, TRUE); + } + } + + start+=UTRIE_DATA_BLOCK_LENGTH; + } + + if(rest>0) { + /* set partial block at [last block boundary..limit[ */ + block=utrie_getDataBlock(trie, start); + if(block<0) { + return FALSE; + } + + utrie_fillBlock(trie->data+block, 0, rest, value, initialValue, overwrite); + } + + return TRUE; +} + +static int32_t +_findSameIndexBlock(const int32_t *idx, int32_t indexLength, + int32_t otherBlock) { + int32_t block, i; + + for(block=UTRIE_BMP_INDEX_LENGTH; block<indexLength; block+=UTRIE_SURROGATE_BLOCK_COUNT) { + for(i=0; i<UTRIE_SURROGATE_BLOCK_COUNT; ++i) { + if(idx[block+i]!=idx[otherBlock+i]) { + break; + } + } + if(i==UTRIE_SURROGATE_BLOCK_COUNT) { + return block; + } + } + return indexLength; +} + +/* + * Fold the normalization data for supplementary code points into + * a compact area on top of the BMP-part of the trie index, + * with the lead surrogates indexing this compact area. + * + * Duplicate the index values for lead surrogates: + * From inside the BMP area, where some may be overridden with folded values, + * to just after the BMP area, where they can be retrieved for + * code point lookups. + */ +static void +utrie_fold(UNewTrie *trie, UNewTrieGetFoldedValue *getFoldedValue, UErrorCode *pErrorCode) { + int32_t leadIndexes[UTRIE_SURROGATE_BLOCK_COUNT]; + int32_t *idx; + uint32_t value; + UChar32 c; + int32_t indexLength, block; +#ifdef UTRIE_DEBUG + int countLeadCUWithData=0; +#endif + + idx=trie->index; + + /* copy the lead surrogate indexes into a temporary array */ + uprv_memcpy(leadIndexes, idx+(0xd800>>UTRIE_SHIFT), 4*UTRIE_SURROGATE_BLOCK_COUNT); + + /* + * set all values for lead surrogate code *units* to leadUnitValue + * so that, by default, runtime lookups will find no data for associated + * supplementary code points, unless there is data for such code points + * which will result in a non-zero folding value below that is set for + * the respective lead units + * + * the above saved the indexes for surrogate code *points* + * fill the indexes with simplified code from utrie_setRange32() + */ + if(trie->leadUnitValue==trie->data[0]) { + block=0; /* leadUnitValue==initialValue, use all-initial-value block */ + } else { + /* create and fill the repeatBlock */ + block=utrie_allocDataBlock(trie); + if(block<0) { + /* data table overflow */ + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + return; + } + utrie_fillBlock(trie->data+block, 0, UTRIE_DATA_BLOCK_LENGTH, trie->leadUnitValue, trie->data[0], TRUE); + block=-block; /* negative block number to indicate that it is a repeat block */ + } + for(c=(0xd800>>UTRIE_SHIFT); c<(0xdc00>>UTRIE_SHIFT); ++c) { + trie->index[c]=block; + } + + /* + * Fold significant index values into the area just after the BMP indexes. + * In case the first lead surrogate has significant data, + * its index block must be used first (in which case the folding is a no-op). + * Later all folded index blocks are moved up one to insert the copied + * lead surrogate indexes. + */ + indexLength=UTRIE_BMP_INDEX_LENGTH; + + /* search for any index (stage 1) entries for supplementary code points */ + for(c=0x10000; c<0x110000;) { + if(idx[c>>UTRIE_SHIFT]!=0) { + /* there is data, treat the full block for a lead surrogate */ + c&=~0x3ff; + +#ifdef UTRIE_DEBUG + ++countLeadCUWithData; + /* printf("supplementary data for lead surrogate U+%04lx\n", (long)(0xd7c0+(c>>10))); */ +#endif + + /* is there an identical index block? */ + block=_findSameIndexBlock(idx, indexLength, c>>UTRIE_SHIFT); + + /* + * get a folded value for [c..c+0x400[ and, + * if different from the value for the lead surrogate code point, + * set it for the lead surrogate code unit + */ + value=getFoldedValue(trie, c, block+UTRIE_SURROGATE_BLOCK_COUNT); + if(value!=utrie_get32(trie, U16_LEAD(c), NULL)) { + if(!utrie_set32(trie, U16_LEAD(c), value)) { + /* data table overflow */ + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + return; + } + + /* if we did not find an identical index block... */ + if(block==indexLength) { + /* move the actual index (stage 1) entries from the supplementary position to the new one */ + uprv_memmove(idx+indexLength, + idx+(c>>UTRIE_SHIFT), + 4*UTRIE_SURROGATE_BLOCK_COUNT); + indexLength+=UTRIE_SURROGATE_BLOCK_COUNT; + } + } + c+=0x400; + } else { + c+=UTRIE_DATA_BLOCK_LENGTH; + } + } +#ifdef UTRIE_DEBUG + if(countLeadCUWithData>0) { + printf("supplementary data for %d lead surrogates\n", countLeadCUWithData); + } +#endif + + /* + * index array overflow? + * This is to guarantee that a folding offset is of the form + * UTRIE_BMP_INDEX_LENGTH+n*UTRIE_SURROGATE_BLOCK_COUNT with n=0..1023. + * If the index is too large, then n>=1024 and more than 10 bits are necessary. + * + * In fact, it can only ever become n==1024 with completely unfoldable data and + * the additional block of duplicated values for lead surrogates. + */ + if(indexLength>=UTRIE_MAX_INDEX_LENGTH) { + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return; + } + + /* + * make space for the lead surrogate index block and + * insert it between the BMP indexes and the folded ones + */ + uprv_memmove(idx+UTRIE_BMP_INDEX_LENGTH+UTRIE_SURROGATE_BLOCK_COUNT, + idx+UTRIE_BMP_INDEX_LENGTH, + 4*(indexLength-UTRIE_BMP_INDEX_LENGTH)); + uprv_memcpy(idx+UTRIE_BMP_INDEX_LENGTH, + leadIndexes, + 4*UTRIE_SURROGATE_BLOCK_COUNT); + indexLength+=UTRIE_SURROGATE_BLOCK_COUNT; + +#ifdef UTRIE_DEBUG + printf("trie index count: BMP %ld all Unicode %ld folded %ld\n", + UTRIE_BMP_INDEX_LENGTH, (long)UTRIE_MAX_INDEX_LENGTH, indexLength); +#endif + + trie->indexLength=indexLength; +} + +/* + * Set a value in the trie index map to indicate which data block + * is referenced and which one is not. + * utrie_compact() will remove data blocks that are not used at all. + * Set + * - 0 if it is used + * - -1 if it is not used + */ +static void +_findUnusedBlocks(UNewTrie *trie) { + int32_t i; + + /* fill the entire map with "not used" */ + uprv_memset(trie->map, 0xff, (UTRIE_MAX_BUILD_TIME_DATA_LENGTH>>UTRIE_SHIFT)*4); + + /* mark each block that _is_ used with 0 */ + for(i=0; i<trie->indexLength; ++i) { + trie->map[ABS(trie->index[i])>>UTRIE_SHIFT]=0; + } + + /* never move the all-initial-value block 0 */ + trie->map[0]=0; +} + +static int32_t +_findSameDataBlock(const uint32_t *data, int32_t dataLength, + int32_t otherBlock, int32_t step) { + int32_t block; + + /* ensure that we do not even partially get past dataLength */ + dataLength-=UTRIE_DATA_BLOCK_LENGTH; + + for(block=0; block<=dataLength; block+=step) { + if(equal_uint32(data+block, data+otherBlock, UTRIE_DATA_BLOCK_LENGTH)) { + return block; + } + } + return -1; +} + +/* + * Compact a folded build-time trie. + * + * The compaction + * - removes blocks that are identical with earlier ones + * - overlaps adjacent blocks as much as possible (if overlap==TRUE) + * - moves blocks in steps of the data granularity + * - moves and overlaps blocks that overlap with multiple values in the overlap region + * + * It does not + * - try to move and overlap blocks that are not already adjacent + */ +static void +utrie_compact(UNewTrie *trie, UBool overlap, UErrorCode *pErrorCode) { + int32_t i, start, newStart, overlapStart; + + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return; + } + + /* valid, uncompacted trie? */ + if(trie==NULL) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return; + } + if(trie->isCompacted) { + return; /* nothing left to do */ + } + + /* compaction */ + + /* initialize the index map with "block is used/unused" flags */ + _findUnusedBlocks(trie); + + /* if Latin-1 is preallocated and linear, then do not compact Latin-1 data */ + if(trie->isLatin1Linear && UTRIE_SHIFT<=8) { + overlapStart=UTRIE_DATA_BLOCK_LENGTH+256; + } else { + overlapStart=UTRIE_DATA_BLOCK_LENGTH; + } + + newStart=UTRIE_DATA_BLOCK_LENGTH; + for(start=newStart; start<trie->dataLength;) { + /* + * start: index of first entry of current block + * newStart: index where the current block is to be moved + * (right after current end of already-compacted data) + */ + + /* skip blocks that are not used */ + if(trie->map[start>>UTRIE_SHIFT]<0) { + /* advance start to the next block */ + start+=UTRIE_DATA_BLOCK_LENGTH; + + /* leave newStart with the previous block! */ + continue; + } + + /* search for an identical block */ + if( start>=overlapStart && + (i=_findSameDataBlock(trie->data, newStart, start, + overlap ? UTRIE_DATA_GRANULARITY : UTRIE_DATA_BLOCK_LENGTH)) + >=0 + ) { + /* found an identical block, set the other block's index value for the current block */ + trie->map[start>>UTRIE_SHIFT]=i; + + /* advance start to the next block */ + start+=UTRIE_DATA_BLOCK_LENGTH; + + /* leave newStart with the previous block! */ + continue; + } + + /* see if the beginning of this block can be overlapped with the end of the previous block */ + if(overlap && start>=overlapStart) { + /* look for maximum overlap (modulo granularity) with the previous, adjacent block */ + for(i=UTRIE_DATA_BLOCK_LENGTH-UTRIE_DATA_GRANULARITY; + i>0 && !equal_uint32(trie->data+(newStart-i), trie->data+start, i); + i-=UTRIE_DATA_GRANULARITY) {} + } else { + i=0; + } + + if(i>0) { + /* some overlap */ + trie->map[start>>UTRIE_SHIFT]=newStart-i; + + /* move the non-overlapping indexes to their new positions */ + start+=i; + for(i=UTRIE_DATA_BLOCK_LENGTH-i; i>0; --i) { + trie->data[newStart++]=trie->data[start++]; + } + } else if(newStart<start) { + /* no overlap, just move the indexes to their new positions */ + trie->map[start>>UTRIE_SHIFT]=newStart; + for(i=UTRIE_DATA_BLOCK_LENGTH; i>0; --i) { + trie->data[newStart++]=trie->data[start++]; + } + } else /* no overlap && newStart==start */ { + trie->map[start>>UTRIE_SHIFT]=start; + newStart+=UTRIE_DATA_BLOCK_LENGTH; + start=newStart; + } + } + + /* now adjust the index (stage 1) table */ + for(i=0; i<trie->indexLength; ++i) { + trie->index[i]=trie->map[ABS(trie->index[i])>>UTRIE_SHIFT]; + } + +#ifdef UTRIE_DEBUG + /* we saved some space */ + printf("compacting trie: count of 32-bit words %lu->%lu\n", + (long)trie->dataLength, (long)newStart); +#endif + + trie->dataLength=newStart; +} + +/* serialization ------------------------------------------------------------ */ + +/* + * Default function for the folding value: + * Just store the offset (16 bits) if there is any non-initial-value entry. + * + * The offset parameter is never 0. + * Returning the offset itself is safe for UTRIE_SHIFT>=5 because + * for UTRIE_SHIFT==5 the maximum index length is UTRIE_MAX_INDEX_LENGTH==0x8800 + * which fits into 16-bit trie values; + * for higher UTRIE_SHIFT, UTRIE_MAX_INDEX_LENGTH decreases. + * + * Theoretically, it would be safer for all possible UTRIE_SHIFT including + * those of 4 and lower to return offset>>UTRIE_SURROGATE_BLOCK_BITS + * which would always result in a value of 0x40..0x43f + * (start/end 1k blocks of supplementary Unicode code points). + * However, this would be uglier, and would not work for some existing + * binary data file formats. + * + * Also, we do not plan to change UTRIE_SHIFT because it would change binary + * data file formats, and we would probably not make it smaller because of + * the then even larger BMP index length even for empty tries. + */ +static uint32_t U_CALLCONV +defaultGetFoldedValue(UNewTrie *trie, UChar32 start, int32_t offset) { + uint32_t value, initialValue; + UChar32 limit; + UBool inBlockZero; + + initialValue=trie->data[0]; + limit=start+0x400; + while(start<limit) { + value=utrie_get32(trie, start, &inBlockZero); + if(inBlockZero) { + start+=UTRIE_DATA_BLOCK_LENGTH; + } else if(value!=initialValue) { + return (uint32_t)offset; + } else { + ++start; + } + } + return 0; +} + +U_CAPI int32_t U_EXPORT2 +utrie_serialize(UNewTrie *trie, void *dt, int32_t capacity, + UNewTrieGetFoldedValue *getFoldedValue, + UBool reduceTo16Bits, + UErrorCode *pErrorCode) { + UTrieHeader *header; + uint32_t *p; + uint16_t *dest16; + int32_t i, length; + uint8_t* data = NULL; + + /* argument check */ + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return 0; + } + + if(trie==NULL || capacity<0 || (capacity>0 && dt==NULL)) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + if(getFoldedValue==NULL) { + getFoldedValue=defaultGetFoldedValue; + } + + data = (uint8_t*)dt; + /* fold and compact if necessary, also checks that indexLength is within limits */ + if(!trie->isCompacted) { + /* compact once without overlap to improve folding */ + utrie_compact(trie, FALSE, pErrorCode); + + /* fold the supplementary part of the index array */ + utrie_fold(trie, getFoldedValue, pErrorCode); + + /* compact again with overlap for minimum data array length */ + utrie_compact(trie, TRUE, pErrorCode); + + trie->isCompacted=TRUE; + if(U_FAILURE(*pErrorCode)) { + return 0; + } + } + + /* is dataLength within limits? */ + if( (reduceTo16Bits ? (trie->dataLength+trie->indexLength) : trie->dataLength) >= UTRIE_MAX_DATA_LENGTH) { + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + } + + length=sizeof(UTrieHeader)+2*trie->indexLength; + if(reduceTo16Bits) { + length+=2*trie->dataLength; + } else { + length+=4*trie->dataLength; + } + + if(length>capacity) { + return length; /* preflighting */ + } + +#ifdef UTRIE_DEBUG + printf("**UTrieLengths(serialize)** index:%6ld data:%6ld serialized:%6ld\n", + (long)trie->indexLength, (long)trie->dataLength, (long)length); +#endif + + /* set the header fields */ + header=(UTrieHeader *)data; + data+=sizeof(UTrieHeader); + + header->signature=0x54726965; /* "Trie" */ + header->options=UTRIE_SHIFT | (UTRIE_INDEX_SHIFT<<UTRIE_OPTIONS_INDEX_SHIFT); + + if(!reduceTo16Bits) { + header->options|=UTRIE_OPTIONS_DATA_IS_32_BIT; + } + if(trie->isLatin1Linear) { + header->options|=UTRIE_OPTIONS_LATIN1_IS_LINEAR; + } + + header->indexLength=trie->indexLength; + header->dataLength=trie->dataLength; + + /* write the index (stage 1) array and the 16/32-bit data (stage 2) array */ + if(reduceTo16Bits) { + /* write 16-bit index values shifted right by UTRIE_INDEX_SHIFT, after adding indexLength */ + p=(uint32_t *)trie->index; + dest16=(uint16_t *)data; + for(i=trie->indexLength; i>0; --i) { + *dest16++=(uint16_t)((*p++ + trie->indexLength)>>UTRIE_INDEX_SHIFT); + } + + /* write 16-bit data values */ + p=trie->data; + for(i=trie->dataLength; i>0; --i) { + *dest16++=(uint16_t)*p++; + } + } else { + /* write 16-bit index values shifted right by UTRIE_INDEX_SHIFT */ + p=(uint32_t *)trie->index; + dest16=(uint16_t *)data; + for(i=trie->indexLength; i>0; --i) { + *dest16++=(uint16_t)(*p++ >> UTRIE_INDEX_SHIFT); + } + + /* write 32-bit data values */ + uprv_memcpy(dest16, trie->data, 4*(size_t)trie->dataLength); + } + + return length; +} + +/* inverse to defaultGetFoldedValue() */ +U_CAPI int32_t U_EXPORT2 +utrie_defaultGetFoldingOffset(uint32_t data) { + return (int32_t)data; +} + +U_CAPI int32_t U_EXPORT2 +utrie_unserialize(UTrie *trie, const void *data, int32_t length, UErrorCode *pErrorCode) { + const UTrieHeader *header; + const uint16_t *p16; + uint32_t options; + + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return -1; + } + + /* enough data for a trie header? */ + if(length<(int32_t)sizeof(UTrieHeader)) { + *pErrorCode=U_INVALID_FORMAT_ERROR; + return -1; + } + + /* check the signature */ + header=(const UTrieHeader *)data; + if(header->signature!=0x54726965) { + *pErrorCode=U_INVALID_FORMAT_ERROR; + return -1; + } + + /* get the options and check the shift values */ + options=header->options; + if( (options&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_SHIFT || + ((options>>UTRIE_OPTIONS_INDEX_SHIFT)&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_INDEX_SHIFT + ) { + *pErrorCode=U_INVALID_FORMAT_ERROR; + return -1; + } + trie->isLatin1Linear= (UBool)((options&UTRIE_OPTIONS_LATIN1_IS_LINEAR)!=0); + + /* get the length values */ + trie->indexLength=header->indexLength; + trie->dataLength=header->dataLength; + + length-=(int32_t)sizeof(UTrieHeader); + + /* enough data for the index? */ + if(length<2*trie->indexLength) { + *pErrorCode=U_INVALID_FORMAT_ERROR; + return -1; + } + p16=(const uint16_t *)(header+1); + trie->index=p16; + p16+=trie->indexLength; + length-=2*trie->indexLength; + + /* get the data */ + if(options&UTRIE_OPTIONS_DATA_IS_32_BIT) { + if(length<4*trie->dataLength) { + *pErrorCode=U_INVALID_FORMAT_ERROR; + return -1; + } + trie->data32=(const uint32_t *)p16; + trie->initialValue=trie->data32[0]; + length=(int32_t)sizeof(UTrieHeader)+2*trie->indexLength+4*trie->dataLength; + } else { + if(length<2*trie->dataLength) { + *pErrorCode=U_INVALID_FORMAT_ERROR; + return -1; + } + + /* the "data16" data is used via the index pointer */ + trie->data32=NULL; + trie->initialValue=trie->index[trie->indexLength]; + length=(int32_t)sizeof(UTrieHeader)+2*trie->indexLength+2*trie->dataLength; + } + + trie->getFoldingOffset=utrie_defaultGetFoldingOffset; + + return length; +} + +U_CAPI int32_t U_EXPORT2 +utrie_unserializeDummy(UTrie *trie, + void *data, int32_t length, + uint32_t initialValue, uint32_t leadUnitValue, + UBool make16BitTrie, + UErrorCode *pErrorCode) { + uint16_t *p16; + int32_t actualLength, latin1Length, i, limit; + uint16_t block; + + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return -1; + } + + /* calculate the actual size of the dummy trie data */ + + /* max(Latin-1, block 0) */ + latin1Length= 256; /*UTRIE_SHIFT<=8 ? 256 : UTRIE_DATA_BLOCK_LENGTH;*/ + + trie->indexLength=UTRIE_BMP_INDEX_LENGTH+UTRIE_SURROGATE_BLOCK_COUNT; + trie->dataLength=latin1Length; + if(leadUnitValue!=initialValue) { + trie->dataLength+=UTRIE_DATA_BLOCK_LENGTH; + } + + actualLength=trie->indexLength*2; + if(make16BitTrie) { + actualLength+=trie->dataLength*2; + } else { + actualLength+=trie->dataLength*4; + } + + /* enough space for the dummy trie? */ + if(length<actualLength) { + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + return actualLength; + } + + trie->isLatin1Linear=TRUE; + trie->initialValue=initialValue; + + /* fill the index and data arrays */ + p16=(uint16_t *)data; + trie->index=p16; + + if(make16BitTrie) { + /* indexes to block 0 */ + block=(uint16_t)(trie->indexLength>>UTRIE_INDEX_SHIFT); + limit=trie->indexLength; + for(i=0; i<limit; ++i) { + p16[i]=block; + } + + if(leadUnitValue!=initialValue) { + /* indexes for lead surrogate code units to the block after Latin-1 */ + block+=(uint16_t)(latin1Length>>UTRIE_INDEX_SHIFT); + i=0xd800>>UTRIE_SHIFT; + limit=0xdc00>>UTRIE_SHIFT; + for(; i<limit; ++i) { + p16[i]=block; + } + } + + trie->data32=NULL; + + /* Latin-1 data */ + p16+=trie->indexLength; + for(i=0; i<latin1Length; ++i) { + p16[i]=(uint16_t)initialValue; + } + + /* data for lead surrogate code units */ + if(leadUnitValue!=initialValue) { + limit=latin1Length+UTRIE_DATA_BLOCK_LENGTH; + for(/* i=latin1Length */; i<limit; ++i) { + p16[i]=(uint16_t)leadUnitValue; + } + } + } else { + uint32_t *p32; + + /* indexes to block 0 */ + uprv_memset(p16, 0, trie->indexLength*2); + + if(leadUnitValue!=initialValue) { + /* indexes for lead surrogate code units to the block after Latin-1 */ + block=(uint16_t)(latin1Length>>UTRIE_INDEX_SHIFT); + i=0xd800>>UTRIE_SHIFT; + limit=0xdc00>>UTRIE_SHIFT; + for(; i<limit; ++i) { + p16[i]=block; + } + } + + trie->data32=p32=(uint32_t *)(p16+trie->indexLength); + + /* Latin-1 data */ + for(i=0; i<latin1Length; ++i) { + p32[i]=initialValue; + } + + /* data for lead surrogate code units */ + if(leadUnitValue!=initialValue) { + limit=latin1Length+UTRIE_DATA_BLOCK_LENGTH; + for(/* i=latin1Length */; i<limit; ++i) { + p32[i]=leadUnitValue; + } + } + } + + trie->getFoldingOffset=utrie_defaultGetFoldingOffset; + + return actualLength; +} + +/* enumeration -------------------------------------------------------------- */ + +/* default UTrieEnumValue() returns the input value itself */ +static uint32_t U_CALLCONV +enumSameValue(const void * /*context*/, uint32_t value) { + return value; +} + +/** + * Enumerate all ranges of code points with the same relevant values. + * The values are transformed from the raw trie entries by the enumValue function. + */ +U_CAPI void U_EXPORT2 +utrie_enum(const UTrie *trie, + UTrieEnumValue *enumValue, UTrieEnumRange *enumRange, const void *context) { + const uint32_t *data32; + const uint16_t *idx; + + uint32_t value, prevValue, initialValue; + UChar32 c, prev; + int32_t l, i, j, block, prevBlock, nullBlock, offset; + + /* check arguments */ + if(trie==NULL || trie->index==NULL || enumRange==NULL) { + return; + } + if(enumValue==NULL) { + enumValue=enumSameValue; + } + + idx=trie->index; + data32=trie->data32; + + /* get the enumeration value that corresponds to an initial-value trie data entry */ + initialValue=enumValue(context, trie->initialValue); + + if(data32==NULL) { + nullBlock=trie->indexLength; + } else { + nullBlock=0; + } + + /* set variables for previous range */ + prevBlock=nullBlock; + prev=0; + prevValue=initialValue; + + /* enumerate BMP - the main loop enumerates data blocks */ + for(i=0, c=0; c<=0xffff; ++i) { + if(c==0xd800) { + /* skip lead surrogate code _units_, go to lead surr. code _points_ */ + i=UTRIE_BMP_INDEX_LENGTH; + } else if(c==0xdc00) { + /* go back to regular BMP code points */ + i=c>>UTRIE_SHIFT; + } + + block=idx[i]<<UTRIE_INDEX_SHIFT; + if(block==prevBlock) { + /* the block is the same as the previous one, and filled with value */ + c+=UTRIE_DATA_BLOCK_LENGTH; + } else if(block==nullBlock) { + /* this is the all-initial-value block */ + if(prevValue!=initialValue) { + if(prev<c) { + if(!enumRange(context, prev, c, prevValue)) { + return; + } + } + prevBlock=nullBlock; + prev=c; + prevValue=initialValue; + } + c+=UTRIE_DATA_BLOCK_LENGTH; + } else { + prevBlock=block; + for(j=0; j<UTRIE_DATA_BLOCK_LENGTH; ++j) { + value=enumValue(context, data32!=NULL ? data32[block+j] : idx[block+j]); + if(value!=prevValue) { + if(prev<c) { + if(!enumRange(context, prev, c, prevValue)) { + return; + } + } + if(j>0) { + /* the block is not filled with all the same value */ + prevBlock=-1; + } + prev=c; + prevValue=value; + } + ++c; + } + } + } + + /* enumerate supplementary code points */ + for(l=0xd800; l<0xdc00;) { + /* lead surrogate access */ + offset=idx[l>>UTRIE_SHIFT]<<UTRIE_INDEX_SHIFT; + if(offset==nullBlock) { + /* no entries for a whole block of lead surrogates */ + if(prevValue!=initialValue) { + if(prev<c) { + if(!enumRange(context, prev, c, prevValue)) { + return; + } + } + prevBlock=nullBlock; + prev=c; + prevValue=initialValue; + } + + l+=UTRIE_DATA_BLOCK_LENGTH; + c+=UTRIE_DATA_BLOCK_LENGTH<<10; + continue; + } + + value= data32!=NULL ? data32[offset+(l&UTRIE_MASK)] : idx[offset+(l&UTRIE_MASK)]; + + /* enumerate trail surrogates for this lead surrogate */ + offset=trie->getFoldingOffset(value); + if(offset<=0) { + /* no data for this lead surrogate */ + if(prevValue!=initialValue) { + if(prev<c) { + if(!enumRange(context, prev, c, prevValue)) { + return; + } + } + prevBlock=nullBlock; + prev=c; + prevValue=initialValue; + } + + /* nothing else to do for the supplementary code points for this lead surrogate */ + c+=0x400; + } else { + /* enumerate code points for this lead surrogate */ + i=offset; + offset+=UTRIE_SURROGATE_BLOCK_COUNT; + do { + /* copy of most of the body of the BMP loop */ + block=idx[i]<<UTRIE_INDEX_SHIFT; + if(block==prevBlock) { + /* the block is the same as the previous one, and filled with value */ + c+=UTRIE_DATA_BLOCK_LENGTH; + } else if(block==nullBlock) { + /* this is the all-initial-value block */ + if(prevValue!=initialValue) { + if(prev<c) { + if(!enumRange(context, prev, c, prevValue)) { + return; + } + } + prevBlock=nullBlock; + prev=c; + prevValue=initialValue; + } + c+=UTRIE_DATA_BLOCK_LENGTH; + } else { + prevBlock=block; + for(j=0; j<UTRIE_DATA_BLOCK_LENGTH; ++j) { + value=enumValue(context, data32!=NULL ? data32[block+j] : idx[block+j]); + if(value!=prevValue) { + if(prev<c) { + if(!enumRange(context, prev, c, prevValue)) { + return; + } + } + if(j>0) { + /* the block is not filled with all the same value */ + prevBlock=-1; + } + prev=c; + prevValue=value; + } + ++c; + } + } + } while(++i<offset); + } + + ++l; + } + + /* deliver last range */ + enumRange(context, prev, c, prevValue); +} diff --git a/thirdparty/icu4c/common/utrie.h b/thirdparty/icu4c/common/utrie.h new file mode 100644 index 0000000000..2fd2c461ff --- /dev/null +++ b/thirdparty/icu4c/common/utrie.h @@ -0,0 +1,793 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 2001-2011, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* file name: utrie.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2001nov08 +* created by: Markus W. Scherer +*/ + +#ifndef __UTRIE_H__ +#define __UTRIE_H__ + +#include "unicode/utypes.h" +#include "unicode/utf16.h" + +U_CDECL_BEGIN + +/** + * \file + * + * This is a common implementation of a "folded" trie. + * It is a kind of compressed, serializable table of 16- or 32-bit values associated with + * Unicode code points (0..0x10ffff). + * + * This implementation is optimized for getting values while walking forward + * through a UTF-16 string. + * Therefore, the simplest and fastest access macros are the + * _FROM_LEAD() and _FROM_OFFSET_TRAIL() macros. + * + * The _FROM_BMP() macros are a little more complicated; they get values + * even for lead surrogate code _points_, while the _FROM_LEAD() macros + * get special "folded" values for lead surrogate code _units_ if + * there is relevant data associated with them. + * From such a folded value, an offset needs to be extracted to supply + * to the _FROM_OFFSET_TRAIL() macros. + * + * Most of the more complex (and more convenient) functions/macros call a callback function + * to get that offset from the folded value for a lead surrogate unit. + */ + +/** + * Trie constants, defining shift widths, index array lengths, etc. + */ +enum { + /** Shift size for shifting right the input index. 1..9 */ + UTRIE_SHIFT=5, + + /** Number of data values in a stage 2 (data array) block. 2, 4, 8, .., 0x200 */ + UTRIE_DATA_BLOCK_LENGTH=1<<UTRIE_SHIFT, + + /** Mask for getting the lower bits from the input index. */ + UTRIE_MASK=UTRIE_DATA_BLOCK_LENGTH-1, + + /** + * Lead surrogate code points' index displacement in the index array. + * 0x10000-0xd800=0x2800 + */ + UTRIE_LEAD_INDEX_DISP=0x2800>>UTRIE_SHIFT, + + /** + * Shift size for shifting left the index array values. + * Increases possible data size with 16-bit index values at the cost + * of compactability. + * This requires blocks of stage 2 data to be aligned by UTRIE_DATA_GRANULARITY. + * 0..UTRIE_SHIFT + */ + UTRIE_INDEX_SHIFT=2, + + /** The alignment size of a stage 2 data block. Also the granularity for compaction. */ + UTRIE_DATA_GRANULARITY=1<<UTRIE_INDEX_SHIFT, + + /** Number of bits of a trail surrogate that are used in index table lookups. */ + UTRIE_SURROGATE_BLOCK_BITS=10-UTRIE_SHIFT, + + /** + * Number of index (stage 1) entries per lead surrogate. + * Same as number of index entries for 1024 trail surrogates, + * ==0x400>>UTRIE_SHIFT + */ + UTRIE_SURROGATE_BLOCK_COUNT=(1<<UTRIE_SURROGATE_BLOCK_BITS), + + /** Length of the BMP portion of the index (stage 1) array. */ + UTRIE_BMP_INDEX_LENGTH=0x10000>>UTRIE_SHIFT +}; + +/** + * Length of the index (stage 1) array before folding. + * Maximum number of Unicode code points (0x110000) shifted right by UTRIE_SHIFT. + */ +#define UTRIE_MAX_INDEX_LENGTH (0x110000>>UTRIE_SHIFT) + +/** + * Maximum length of the runtime data (stage 2) array. + * Limited by 16-bit index values that are left-shifted by UTRIE_INDEX_SHIFT. + */ +#define UTRIE_MAX_DATA_LENGTH (0x10000<<UTRIE_INDEX_SHIFT) + +/** + * Maximum length of the build-time data (stage 2) array. + * The maximum length is 0x110000+UTRIE_DATA_BLOCK_LENGTH+0x400. + * (Number of Unicode code points + one all-initial-value block + + * possible duplicate entries for 1024 lead surrogates.) + */ +#define UTRIE_MAX_BUILD_TIME_DATA_LENGTH (0x110000+UTRIE_DATA_BLOCK_LENGTH+0x400) + +/** + * Number of bytes for a dummy trie. + * A dummy trie is an empty runtime trie, used when a real data trie cannot + * be loaded. + * The number of bytes works for Latin-1-linear tries with 32-bit data + * (worst case). + * + * Calculation: + * BMP index + 1 index block for lead surrogate code points + + * Latin-1-linear array + 1 data block for lead surrogate code points + * + * Latin-1: if(UTRIE_SHIFT<=8) { 256 } else { included in first data block } + * + * @see utrie_unserializeDummy + */ +#define UTRIE_DUMMY_SIZE ((UTRIE_BMP_INDEX_LENGTH+UTRIE_SURROGATE_BLOCK_COUNT)*2+(UTRIE_SHIFT<=8?256:UTRIE_DATA_BLOCK_LENGTH)*4+UTRIE_DATA_BLOCK_LENGTH*4) + +/** + * Runtime UTrie callback function. + * Extract from a lead surrogate's data the + * index array offset of the indexes for that lead surrogate. + * + * @param data data value for a surrogate from the trie, including the folding offset + * @return offset>=UTRIE_BMP_INDEX_LENGTH, or 0 if there is no data for the lead surrogate + */ +typedef int32_t U_CALLCONV +UTrieGetFoldingOffset(uint32_t data); + +/** + * Run-time Trie structure. + * + * Either the data table is 16 bits wide and accessed via the index + * pointer, with each index item increased by indexLength; + * in this case, data32==NULL. + * + * Or the data table is 32 bits wide and accessed via the data32 pointer. + */ +struct UTrie { + const uint16_t *index; + const uint32_t *data32; /* NULL if 16b data is used via index */ + + /** + * This function is not used in _FROM_LEAD, _FROM_BMP, and _FROM_OFFSET_TRAIL macros. + * If convenience macros like _GET16 or _NEXT32 are used, this function must be set. + * + * utrie_unserialize() sets a default function which simply returns + * the lead surrogate's value itself - which is the inverse of the default + * folding function used by utrie_serialize(). + * + * @see UTrieGetFoldingOffset + */ + UTrieGetFoldingOffset *getFoldingOffset; + + int32_t indexLength, dataLength; + uint32_t initialValue; + UBool isLatin1Linear; +}; + +#ifndef __UTRIE2_H__ +typedef struct UTrie UTrie; +#endif + +/** Internal trie getter from an offset (0 if c16 is a BMP/lead units) and a 16-bit unit */ +#define _UTRIE_GET_RAW(trie, data, offset, c16) \ + (trie)->data[ \ + ((int32_t)((trie)->index[(offset)+((c16)>>UTRIE_SHIFT)])<<UTRIE_INDEX_SHIFT)+ \ + ((c16)&UTRIE_MASK) \ + ] + +/** Internal trie getter from a pair of surrogates */ +#define _UTRIE_GET_FROM_PAIR(trie, data, c, c2, result, resultType) UPRV_BLOCK_MACRO_BEGIN { \ + int32_t __offset; \ +\ + /* get data for lead surrogate */ \ + (result)=_UTRIE_GET_RAW((trie), data, 0, (c)); \ + __offset=(trie)->getFoldingOffset(result); \ +\ + /* get the real data from the folded lead/trail units */ \ + if(__offset>0) { \ + (result)=_UTRIE_GET_RAW((trie), data, __offset, (c2)&0x3ff); \ + } else { \ + (result)=(resultType)((trie)->initialValue); \ + } \ +} UPRV_BLOCK_MACRO_END + +/** Internal trie getter from a BMP code point, treating a lead surrogate as a normal code point */ +#define _UTRIE_GET_FROM_BMP(trie, data, c16) \ + _UTRIE_GET_RAW(trie, data, 0xd800<=(c16) && (c16)<=0xdbff ? UTRIE_LEAD_INDEX_DISP : 0, c16) + +/** + * Internal trie getter from a code point. + * Could be faster(?) but longer with + * if((c32)<=0xd7ff) { (result)=_UTRIE_GET_RAW(trie, data, 0, c32); } + */ +#define _UTRIE_GET(trie, data, c32, result, resultType) UPRV_BLOCK_MACRO_BEGIN { \ + if((uint32_t)(c32)<=0xffff) { \ + /* BMP code points */ \ + (result)=_UTRIE_GET_FROM_BMP(trie, data, c32); \ + } else if((uint32_t)(c32)<=0x10ffff) { \ + /* supplementary code point */ \ + UChar __lead16=U16_LEAD(c32); \ + _UTRIE_GET_FROM_PAIR(trie, data, __lead16, c32, result, resultType); \ + } else { \ + /* out of range */ \ + (result)=(resultType)((trie)->initialValue); \ + } \ +} UPRV_BLOCK_MACRO_END + +/** Internal next-post-increment: get the next code point (c, c2) and its data */ +#define _UTRIE_NEXT(trie, data, src, limit, c, c2, result, resultType) UPRV_BLOCK_MACRO_BEGIN { \ + (c)=*(src)++; \ + if(!U16_IS_LEAD(c)) { \ + (c2)=0; \ + (result)=_UTRIE_GET_RAW((trie), data, 0, (c)); \ + } else if((src)!=(limit) && U16_IS_TRAIL((c2)=*(src))) { \ + ++(src); \ + _UTRIE_GET_FROM_PAIR((trie), data, (c), (c2), (result), resultType); \ + } else { \ + /* unpaired lead surrogate code point */ \ + (c2)=0; \ + (result)=_UTRIE_GET_RAW((trie), data, UTRIE_LEAD_INDEX_DISP, (c)); \ + } \ +} UPRV_BLOCK_MACRO_END + +/** Internal previous: get the previous code point (c, c2) and its data */ +#define _UTRIE_PREVIOUS(trie, data, start, src, c, c2, result, resultType) UPRV_BLOCK_MACRO_BEGIN { \ + (c)=*--(src); \ + if(!U16_IS_SURROGATE(c)) { \ + (c2)=0; \ + (result)=_UTRIE_GET_RAW((trie), data, 0, (c)); \ + } else if(!U16_IS_SURROGATE_LEAD(c)) { \ + /* trail surrogate */ \ + if((start)!=(src) && U16_IS_LEAD((c2)=*((src)-1))) { \ + --(src); \ + (result)=(c); (c)=(c2); (c2)=(UChar)(result); /* swap c, c2 */ \ + _UTRIE_GET_FROM_PAIR((trie), data, (c), (c2), (result), resultType); \ + } else { \ + /* unpaired trail surrogate code point */ \ + (c2)=0; \ + (result)=_UTRIE_GET_RAW((trie), data, 0, (c)); \ + } \ + } else { \ + /* unpaired lead surrogate code point */ \ + (c2)=0; \ + (result)=_UTRIE_GET_RAW((trie), data, UTRIE_LEAD_INDEX_DISP, (c)); \ + } \ +} UPRV_BLOCK_MACRO_END + +/* Public UTrie API ---------------------------------------------------------*/ + +/** + * Get a pointer to the contiguous part of the data array + * for the Latin-1 range (U+0000..U+00ff). + * Must be used only if the Latin-1 range is in fact linear + * (trie->isLatin1Linear). + * + * @param trie (const UTrie *, in) a pointer to the runtime trie structure + * @return (const uint16_t *) pointer to values for Latin-1 code points + */ +#define UTRIE_GET16_LATIN1(trie) ((trie)->index+(trie)->indexLength+UTRIE_DATA_BLOCK_LENGTH) + +/** + * Get a pointer to the contiguous part of the data array + * for the Latin-1 range (U+0000..U+00ff). + * Must be used only if the Latin-1 range is in fact linear + * (trie->isLatin1Linear). + * + * @param trie (const UTrie *, in) a pointer to the runtime trie structure + * @return (const uint32_t *) pointer to values for Latin-1 code points + */ +#define UTRIE_GET32_LATIN1(trie) ((trie)->data32+UTRIE_DATA_BLOCK_LENGTH) + +/** + * Get a 16-bit trie value from a BMP code point (UChar, <=U+ffff). + * c16 may be a lead surrogate, which may have a value including a folding offset. + * + * @param trie (const UTrie *, in) a pointer to the runtime trie structure + * @param c16 (UChar, in) the input BMP code point + * @return (uint16_t) trie lookup result + */ +#define UTRIE_GET16_FROM_LEAD(trie, c16) _UTRIE_GET_RAW(trie, index, 0, c16) + +/** + * Get a 32-bit trie value from a BMP code point (UChar, <=U+ffff). + * c16 may be a lead surrogate, which may have a value including a folding offset. + * + * @param trie (const UTrie *, in) a pointer to the runtime trie structure + * @param c16 (UChar, in) the input BMP code point + * @return (uint32_t) trie lookup result + */ +#define UTRIE_GET32_FROM_LEAD(trie, c16) _UTRIE_GET_RAW(trie, data32, 0, c16) + +/** + * Get a 16-bit trie value from a BMP code point (UChar, <=U+ffff). + * Even lead surrogate code points are treated as normal code points, + * with unfolded values that may differ from _FROM_LEAD() macro results for them. + * + * @param trie (const UTrie *, in) a pointer to the runtime trie structure + * @param c16 (UChar, in) the input BMP code point + * @return (uint16_t) trie lookup result + */ +#define UTRIE_GET16_FROM_BMP(trie, c16) _UTRIE_GET_FROM_BMP(trie, index, c16) + +/** + * Get a 32-bit trie value from a BMP code point (UChar, <=U+ffff). + * Even lead surrogate code points are treated as normal code points, + * with unfolded values that may differ from _FROM_LEAD() macro results for them. + * + * @param trie (const UTrie *, in) a pointer to the runtime trie structure + * @param c16 (UChar, in) the input BMP code point + * @return (uint32_t) trie lookup result + */ +#define UTRIE_GET32_FROM_BMP(trie, c16) _UTRIE_GET_FROM_BMP(trie, data32, c16) + +/** + * Get a 16-bit trie value from a code point. + * Even lead surrogate code points are treated as normal code points, + * with unfolded values that may differ from _FROM_LEAD() macro results for them. + * + * @param trie (const UTrie *, in) a pointer to the runtime trie structure + * @param c32 (UChar32, in) the input code point + * @param result (uint16_t, out) uint16_t variable for the trie lookup result + */ +#define UTRIE_GET16(trie, c32, result) _UTRIE_GET(trie, index, c32, result, uint16_t) + +/** + * Get a 32-bit trie value from a code point. + * Even lead surrogate code points are treated as normal code points, + * with unfolded values that may differ from _FROM_LEAD() macro results for them. + * + * @param trie (const UTrie *, in) a pointer to the runtime trie structure + * @param c32 (UChar32, in) the input code point + * @param result (uint32_t, out) uint32_t variable for the trie lookup result + */ +#define UTRIE_GET32(trie, c32, result) _UTRIE_GET(trie, data32, c32, result, uint32_t) + +/** + * Get the next code point (c, c2), post-increment src, + * and get a 16-bit value from the trie. + * + * @param trie (const UTrie *, in) a pointer to the runtime trie structure + * @param src (const UChar *, in/out) the source text pointer + * @param limit (const UChar *, in) the limit pointer for the text, or NULL + * @param c (UChar, out) variable for the BMP or lead code unit + * @param c2 (UChar, out) variable for 0 or the trail code unit + * @param result (uint16_t, out) uint16_t variable for the trie lookup result + */ +#define UTRIE_NEXT16(trie, src, limit, c, c2, result) _UTRIE_NEXT(trie, index, src, limit, c, c2, result, uint16_t) + +/** + * Get the next code point (c, c2), post-increment src, + * and get a 32-bit value from the trie. + * + * @param trie (const UTrie *, in) a pointer to the runtime trie structure + * @param src (const UChar *, in/out) the source text pointer + * @param limit (const UChar *, in) the limit pointer for the text, or NULL + * @param c (UChar, out) variable for the BMP or lead code unit + * @param c2 (UChar, out) variable for 0 or the trail code unit + * @param result (uint32_t, out) uint32_t variable for the trie lookup result + */ +#define UTRIE_NEXT32(trie, src, limit, c, c2, result) _UTRIE_NEXT(trie, data32, src, limit, c, c2, result, uint32_t) + +/** + * Get the previous code point (c, c2), pre-decrement src, + * and get a 16-bit value from the trie. + * + * @param trie (const UTrie *, in) a pointer to the runtime trie structure + * @param start (const UChar *, in) the start pointer for the text, or NULL + * @param src (const UChar *, in/out) the source text pointer + * @param c (UChar, out) variable for the BMP or lead code unit + * @param c2 (UChar, out) variable for 0 or the trail code unit + * @param result (uint16_t, out) uint16_t variable for the trie lookup result + */ +#define UTRIE_PREVIOUS16(trie, start, src, c, c2, result) _UTRIE_PREVIOUS(trie, index, start, src, c, c2, result, uint16_t) + +/** + * Get the previous code point (c, c2), pre-decrement src, + * and get a 32-bit value from the trie. + * + * @param trie (const UTrie *, in) a pointer to the runtime trie structure + * @param start (const UChar *, in) the start pointer for the text, or NULL + * @param src (const UChar *, in/out) the source text pointer + * @param c (UChar, out) variable for the BMP or lead code unit + * @param c2 (UChar, out) variable for 0 or the trail code unit + * @param result (uint32_t, out) uint32_t variable for the trie lookup result + */ +#define UTRIE_PREVIOUS32(trie, start, src, c, c2, result) _UTRIE_PREVIOUS(trie, data32, start, src, c, c2, result, uint32_t) + +/** + * Get a 16-bit trie value from a pair of surrogates. + * + * @param trie (const UTrie *, in) a pointer to the runtime trie structure + * @param c (UChar, in) a lead surrogate + * @param c2 (UChar, in) a trail surrogate + * @param result (uint16_t, out) uint16_t variable for the trie lookup result + */ +#define UTRIE_GET16_FROM_PAIR(trie, c, c2, result) _UTRIE_GET_FROM_PAIR(trie, index, c, c2, result, uint16_t) + +/** + * Get a 32-bit trie value from a pair of surrogates. + * + * @param trie (const UTrie *, in) a pointer to the runtime trie structure + * @param c (UChar, in) a lead surrogate + * @param c2 (UChar, in) a trail surrogate + * @param result (uint32_t, out) uint32_t variable for the trie lookup result + */ +#define UTRIE_GET32_FROM_PAIR(trie, c, c2, result) _UTRIE_GET_FROM_PAIR(trie, data32, c, c2, result, uint32_t) + +/** + * Get a 16-bit trie value from a folding offset (from the value of a lead surrogate) + * and a trail surrogate. + * + * @param trie (const UTrie *, in) a pointer to the runtime trie structure + * @param offset (int32_t, in) the folding offset from the value of a lead surrogate + * @param c2 (UChar, in) a trail surrogate (only the 10 low bits are significant) + * @return (uint16_t) trie lookup result + */ +#define UTRIE_GET16_FROM_OFFSET_TRAIL(trie, offset, c2) _UTRIE_GET_RAW(trie, index, offset, (c2)&0x3ff) + +/** + * Get a 32-bit trie value from a folding offset (from the value of a lead surrogate) + * and a trail surrogate. + * + * @param trie (const UTrie *, in) a pointer to the runtime trie structure + * @param offset (int32_t, in) the folding offset from the value of a lead surrogate + * @param c2 (UChar, in) a trail surrogate (only the 10 low bits are significant) + * @return (uint32_t) trie lookup result + */ +#define UTRIE_GET32_FROM_OFFSET_TRAIL(trie, offset, c2) _UTRIE_GET_RAW(trie, data32, offset, (c2)&0x3ff) + +/* enumeration callback types */ + +/** + * Callback from utrie_enum(), extracts a uint32_t value from a + * trie value. This value will be passed on to the UTrieEnumRange function. + * + * @param context an opaque pointer, as passed into utrie_enum() + * @param value a value from the trie + * @return the value that is to be passed on to the UTrieEnumRange function + */ +typedef uint32_t U_CALLCONV +UTrieEnumValue(const void *context, uint32_t value); + +/** + * Callback from utrie_enum(), is called for each contiguous range + * of code points with the same value as retrieved from the trie and + * transformed by the UTrieEnumValue function. + * + * The callback function can stop the enumeration by returning false. + * + * @param context an opaque pointer, as passed into utrie_enum() + * @param start the first code point in a contiguous range with value + * @param limit one past the last code point in a contiguous range with value + * @param value the value that is set for all code points in [start..limit[ + * @return false to stop the enumeration + */ +typedef UBool U_CALLCONV +UTrieEnumRange(const void *context, UChar32 start, UChar32 limit, uint32_t value); + +/** + * Enumerate efficiently all values in a trie. + * For each entry in the trie, the value to be delivered is passed through + * the UTrieEnumValue function. + * The value is unchanged if that function pointer is NULL. + * + * For each contiguous range of code points with a given value, + * the UTrieEnumRange function is called. + * + * @param trie a pointer to the runtime trie structure + * @param enumValue a pointer to a function that may transform the trie entry value, + * or NULL if the values from the trie are to be used directly + * @param enumRange a pointer to a function that is called for each contiguous range + * of code points with the same value + * @param context an opaque pointer that is passed on to the callback functions + */ +U_CAPI void U_EXPORT2 +utrie_enum(const UTrie *trie, + UTrieEnumValue *enumValue, UTrieEnumRange *enumRange, const void *context); + +/** + * Unserialize a trie from 32-bit-aligned memory. + * Inverse of utrie_serialize(). + * Fills the UTrie runtime trie structure with the settings for the trie data. + * + * @param trie a pointer to the runtime trie structure + * @param data a pointer to 32-bit-aligned memory containing trie data + * @param length the number of bytes available at data + * @param pErrorCode an in/out ICU UErrorCode + * @return the number of bytes at data taken up by the trie data + */ +U_CAPI int32_t U_EXPORT2 +utrie_unserialize(UTrie *trie, const void *data, int32_t length, UErrorCode *pErrorCode); + +/** + * "Unserialize" a dummy trie. + * A dummy trie is an empty runtime trie, used when a real data trie cannot + * be loaded. + * + * The input memory is filled so that the trie always returns the initialValue, + * or the leadUnitValue for lead surrogate code points. + * The Latin-1 part is always set up to be linear. + * + * @param trie a pointer to the runtime trie structure + * @param data a pointer to 32-bit-aligned memory to be filled with the dummy trie data + * @param length the number of bytes available at data (recommended to use UTRIE_DUMMY_SIZE) + * @param initialValue the initial value that is set for all code points + * @param leadUnitValue the value for lead surrogate code _units_ that do not + * have associated supplementary data + * @param pErrorCode an in/out ICU UErrorCode + * + * @see UTRIE_DUMMY_SIZE + * @see utrie_open + */ +U_CAPI int32_t U_EXPORT2 +utrie_unserializeDummy(UTrie *trie, + void *data, int32_t length, + uint32_t initialValue, uint32_t leadUnitValue, + UBool make16BitTrie, + UErrorCode *pErrorCode); + +/** + * Default implementation for UTrie.getFoldingOffset, set automatically by + * utrie_unserialize(). + * Simply returns the lead surrogate's value itself - which is the inverse + * of the default folding function used by utrie_serialize(). + * Exported for static const UTrie structures. + * + * @see UTrieGetFoldingOffset + */ +U_CAPI int32_t U_EXPORT2 +utrie_defaultGetFoldingOffset(uint32_t data); + +/* Building a trie ----------------------------------------------------------*/ + +/** + * Build-time trie structure. + * Opaque definition, here only to make fillIn parameters possible + * for utrie_open() and utrie_clone(). + */ +struct UNewTrie { + /** + * Index values at build-time are 32 bits wide for easier processing. + * Bit 31 is set if the data block is used by multiple index values (from utrie_setRange()). + */ + int32_t index[UTRIE_MAX_INDEX_LENGTH+UTRIE_SURROGATE_BLOCK_COUNT]; + uint32_t *data; + + uint32_t leadUnitValue; + int32_t indexLength, dataCapacity, dataLength; + UBool isAllocated, isDataAllocated; + UBool isLatin1Linear, isCompacted; + + /** + * Map of adjusted indexes, used in utrie_compact(). + * Maps from original indexes to new ones. + */ + int32_t map[UTRIE_MAX_BUILD_TIME_DATA_LENGTH>>UTRIE_SHIFT]; +}; + +typedef struct UNewTrie UNewTrie; + +/** + * Build-time trie callback function, used with utrie_serialize(). + * This function calculates a lead surrogate's value including a folding offset + * from the 1024 supplementary code points [start..start+1024[ . + * It is U+10000 <= start <= U+10fc00 and (start&0x3ff)==0. + * + * The folding offset is provided by the caller. + * It is offset=UTRIE_BMP_INDEX_LENGTH+n*UTRIE_SURROGATE_BLOCK_COUNT with n=0..1023. + * Instead of the offset itself, n can be stored in 10 bits - + * or fewer if it can be assumed that few lead surrogates have associated data. + * + * The returned value must be + * - not zero if and only if there is relevant data + * for the corresponding 1024 supplementary code points + * - such that UTrie.getFoldingOffset(UNewTrieGetFoldedValue(..., offset))==offset + * + * @return a folded value, or 0 if there is no relevant data for the lead surrogate. + */ +typedef uint32_t U_CALLCONV +UNewTrieGetFoldedValue(UNewTrie *trie, UChar32 start, int32_t offset); + +/** + * Open a build-time trie structure. + * The size of the build-time data array is specified to avoid allocating a large + * array in all cases. The array itself can also be passed in. + * + * Although the trie is never fully expanded to a linear array, especially when + * utrie_setRange32() is used, the data array could be large during build time. + * The maximum length is + * UTRIE_MAX_BUILD_TIME_DATA_LENGTH=0x110000+UTRIE_DATA_BLOCK_LENGTH+0x400. + * (Number of Unicode code points + one all-initial-value block + + * possible duplicate entries for 1024 lead surrogates.) + * (UTRIE_DATA_BLOCK_LENGTH<=0x200 in all cases.) + * + * @param fillIn a pointer to a UNewTrie structure to be initialized (will not be released), or + * NULL if one is to be allocated + * @param aliasData a pointer to a data array to be used (will not be released), or + * NULL if one is to be allocated + * @param maxDataLength the capacity of aliasData (if not NULL) or + * the length of the data array to be allocated + * @param initialValue the initial value that is set for all code points + * @param leadUnitValue the value for lead surrogate code _units_ that do not + * have associated supplementary data + * @param latin1Linear a flag indicating whether the Latin-1 range is to be allocated and + * kept in a linear, contiguous part of the data array + * @return a pointer to the initialized fillIn or the allocated and initialized new UNewTrie + */ +U_CAPI UNewTrie * U_EXPORT2 +utrie_open(UNewTrie *fillIn, + uint32_t *aliasData, int32_t maxDataLength, + uint32_t initialValue, uint32_t leadUnitValue, + UBool latin1Linear); + +/** + * Clone a build-time trie structure with all entries. + * + * @param fillIn like in utrie_open() + * @param other the build-time trie structure to clone + * @param aliasData like in utrie_open(), + * used if aliasDataLength>=(capacity of other's data array) + * @param aliasDataLength the length of aliasData + * @return a pointer to the initialized fillIn or the allocated and initialized new UNewTrie + */ +U_CAPI UNewTrie * U_EXPORT2 +utrie_clone(UNewTrie *fillIn, const UNewTrie *other, uint32_t *aliasData, int32_t aliasDataLength); + +/** + * Close a build-time trie structure, and release memory + * that was allocated by utrie_open() or utrie_clone(). + * + * @param trie the build-time trie + */ +U_CAPI void U_EXPORT2 +utrie_close(UNewTrie *trie); + +/** + * Get the data array of a build-time trie. + * The data may be modified, but entries that are equal before + * must still be equal after modification. + * + * @param trie the build-time trie + * @param pLength (out) a pointer to a variable that receives the number + * of entries in the data array + * @return the data array + */ +U_CAPI uint32_t * U_EXPORT2 +utrie_getData(UNewTrie *trie, int32_t *pLength); + +/** + * Set a value for a code point. + * + * @param trie the build-time trie + * @param c the code point + * @param value the value + * @return false if a failure occurred (illegal argument or data array overrun) + */ +U_CAPI UBool U_EXPORT2 +utrie_set32(UNewTrie *trie, UChar32 c, uint32_t value); + +/** + * Get a value from a code point as stored in the build-time trie. + * + * @param trie the build-time trie + * @param c the code point + * @param pInBlockZero if not NULL, then *pInBlockZero is set to true + * iff the value is retrieved from block 0; + * block 0 is the all-initial-value initial block + * @return the value + */ +U_CAPI uint32_t U_EXPORT2 +utrie_get32(UNewTrie *trie, UChar32 c, UBool *pInBlockZero); + +/** + * Set a value in a range of code points [start..limit[. + * All code points c with start<=c<limit will get the value if + * overwrite is true or if the old value is 0. + * + * @param trie the build-time trie + * @param start the first code point to get the value + * @param limit one past the last code point to get the value + * @param value the value + * @param overwrite flag for whether old non-initial values are to be overwritten + * @return false if a failure occurred (illegal argument or data array overrun) + */ +U_CAPI UBool U_EXPORT2 +utrie_setRange32(UNewTrie *trie, UChar32 start, UChar32 limit, uint32_t value, UBool overwrite); + +/** + * Compact the build-time trie after all values are set, and then + * serialize it into 32-bit aligned memory. + * + * After this, the trie can only be serizalized again and/or closed; + * no further values can be added. + * + * @see utrie_unserialize() + * + * @param trie the build-time trie + * @param data a pointer to 32-bit-aligned memory for the trie data + * @param capacity the number of bytes available at data + * @param getFoldedValue a callback function that calculates the value for + * a lead surrogate from all of its supplementary code points + * and the folding offset; + * if NULL, then a default function is used which returns just + * the input offset when there are any non-initial-value entries + * @param reduceTo16Bits flag for whether the values are to be reduced to a + * width of 16 bits for serialization and runtime + * @param pErrorCode a UErrorCode argument; among other possible error codes: + * - U_BUFFER_OVERFLOW_ERROR if the data storage block is too small for serialization + * - U_MEMORY_ALLOCATION_ERROR if the trie data array is too small + * - U_INDEX_OUTOFBOUNDS_ERROR if the index or data arrays are too long after compaction for serialization + * + * @return the number of bytes written for the trie + */ +U_CAPI int32_t U_EXPORT2 +utrie_serialize(UNewTrie *trie, void *data, int32_t capacity, + UNewTrieGetFoldedValue *getFoldedValue, + UBool reduceTo16Bits, + UErrorCode *pErrorCode); + +/* serialization ------------------------------------------------------------ */ + +// UTrie signature values, in platform endianness and opposite endianness. +// The UTrie signature ASCII byte values spell "Trie". +#define UTRIE_SIG 0x54726965 +#define UTRIE_OE_SIG 0x65697254 + +/** + * Trie data structure in serialized form: + * + * UTrieHeader header; + * uint16_t index[header.indexLength]; + * uint16_t data[header.dataLength]; + * @internal + */ +typedef struct UTrieHeader { + /** "Trie" in big-endian US-ASCII (0x54726965) */ + uint32_t signature; + + /** + * options bit field: + * 9 1=Latin-1 data is stored linearly at data+UTRIE_DATA_BLOCK_LENGTH + * 8 0=16-bit data, 1=32-bit data + * 7..4 UTRIE_INDEX_SHIFT // 0..UTRIE_SHIFT + * 3..0 UTRIE_SHIFT // 1..9 + */ + uint32_t options; + + /** indexLength is a multiple of UTRIE_SURROGATE_BLOCK_COUNT */ + int32_t indexLength; + + /** dataLength>=UTRIE_DATA_BLOCK_LENGTH */ + int32_t dataLength; +} UTrieHeader; + +/** + * Constants for use with UTrieHeader.options. + * @internal + */ +enum { + /** Mask to get the UTRIE_SHIFT value from options. */ + UTRIE_OPTIONS_SHIFT_MASK=0xf, + + /** Shift options right this much to get the UTRIE_INDEX_SHIFT value. */ + UTRIE_OPTIONS_INDEX_SHIFT=4, + + /** If set, then the data (stage 2) array is 32 bits wide. */ + UTRIE_OPTIONS_DATA_IS_32_BIT=0x100, + + /** + * If set, then Latin-1 data (for U+0000..U+00ff) is stored in the data (stage 2) array + * as a simple, linear array at data+UTRIE_DATA_BLOCK_LENGTH. + */ + UTRIE_OPTIONS_LATIN1_IS_LINEAR=0x200 +}; + +U_CDECL_END + +#endif diff --git a/thirdparty/icu4c/common/utrie2.cpp b/thirdparty/icu4c/common/utrie2.cpp new file mode 100644 index 0000000000..24ef5782c9 --- /dev/null +++ b/thirdparty/icu4c/common/utrie2.cpp @@ -0,0 +1,663 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 2001-2014, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* file name: utrie2.cpp +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2008aug16 (starting from a copy of utrie.c) +* created by: Markus W. Scherer +* +* This is a common implementation of a Unicode trie. +* It is a kind of compressed, serializable table of 16- or 32-bit values associated with +* Unicode code points (0..0x10ffff). +* This is the second common version of a Unicode trie (hence the name UTrie2). +* See utrie2.h for a comparison. +* +* This file contains only the runtime and enumeration code, for read-only access. +* See utrie2_builder.c for the builder code. +*/ +#include "unicode/utypes.h" +#ifdef UCPTRIE_DEBUG +#include "unicode/umutablecptrie.h" +#endif +#include "unicode/utf.h" +#include "unicode/utf8.h" +#include "unicode/utf16.h" +#include "cmemory.h" +#include "utrie2.h" +#include "utrie2_impl.h" +#include "uassert.h" + +/* Public UTrie2 API implementation ----------------------------------------- */ + +static uint32_t +get32(const UNewTrie2 *trie, UChar32 c, UBool fromLSCP) { + int32_t i2, block; + + if(c>=trie->highStart && (!U_IS_LEAD(c) || fromLSCP)) { + return trie->data[trie->dataLength-UTRIE2_DATA_GRANULARITY]; + } + + if(U_IS_LEAD(c) && fromLSCP) { + i2=(UTRIE2_LSCP_INDEX_2_OFFSET-(0xd800>>UTRIE2_SHIFT_2))+ + (c>>UTRIE2_SHIFT_2); + } else { + i2=trie->index1[c>>UTRIE2_SHIFT_1]+ + ((c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK); + } + block=trie->index2[i2]; + return trie->data[block+(c&UTRIE2_DATA_MASK)]; +} + +U_CAPI uint32_t U_EXPORT2 +utrie2_get32(const UTrie2 *trie, UChar32 c) { + if(trie->data16!=NULL) { + return UTRIE2_GET16(trie, c); + } else if(trie->data32!=NULL) { + return UTRIE2_GET32(trie, c); + } else if((uint32_t)c>0x10ffff) { + return trie->errorValue; + } else { + return get32(trie->newTrie, c, TRUE); + } +} + +U_CAPI uint32_t U_EXPORT2 +utrie2_get32FromLeadSurrogateCodeUnit(const UTrie2 *trie, UChar32 c) { + if(!U_IS_LEAD(c)) { + return trie->errorValue; + } + if(trie->data16!=NULL) { + return UTRIE2_GET16_FROM_U16_SINGLE_LEAD(trie, c); + } else if(trie->data32!=NULL) { + return UTRIE2_GET32_FROM_U16_SINGLE_LEAD(trie, c); + } else { + return get32(trie->newTrie, c, FALSE); + } +} + +static inline int32_t +u8Index(const UTrie2 *trie, UChar32 c, int32_t i) { + int32_t idx= + _UTRIE2_INDEX_FROM_CP( + trie, + trie->data32==NULL ? trie->indexLength : 0, + c); + return (idx<<3)|i; +} + +U_CAPI int32_t U_EXPORT2 +utrie2_internalU8NextIndex(const UTrie2 *trie, UChar32 c, + const uint8_t *src, const uint8_t *limit) { + int32_t i, length; + i=0; + /* support 64-bit pointers by avoiding cast of arbitrary difference */ + if((limit-src)<=7) { + length=(int32_t)(limit-src); + } else { + length=7; + } + c=utf8_nextCharSafeBody(src, &i, length, c, -1); + return u8Index(trie, c, i); +} + +U_CAPI int32_t U_EXPORT2 +utrie2_internalU8PrevIndex(const UTrie2 *trie, UChar32 c, + const uint8_t *start, const uint8_t *src) { + int32_t i, length; + /* support 64-bit pointers by avoiding cast of arbitrary difference */ + if((src-start)<=7) { + i=length=(int32_t)(src-start); + } else { + i=length=7; + start=src-7; + } + c=utf8_prevCharSafeBody(start, 0, &i, c, -1); + i=length-i; /* number of bytes read backward from src */ + return u8Index(trie, c, i); +} + +U_CAPI UTrie2 * U_EXPORT2 +utrie2_openFromSerialized(UTrie2ValueBits valueBits, + const void *data, int32_t length, int32_t *pActualLength, + UErrorCode *pErrorCode) { + const UTrie2Header *header; + const uint16_t *p16; + int32_t actualLength; + + UTrie2 tempTrie; + UTrie2 *trie; + + if(U_FAILURE(*pErrorCode)) { + return 0; + } + + if( length<=0 || (U_POINTER_MASK_LSB(data, 3)!=0) || + valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits + ) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + /* enough data for a trie header? */ + if(length<(int32_t)sizeof(UTrie2Header)) { + *pErrorCode=U_INVALID_FORMAT_ERROR; + return 0; + } + + /* check the signature */ + header=(const UTrie2Header *)data; + if(header->signature!=UTRIE2_SIG) { + *pErrorCode=U_INVALID_FORMAT_ERROR; + return 0; + } + + /* get the options */ + if(valueBits!=(UTrie2ValueBits)(header->options&UTRIE2_OPTIONS_VALUE_BITS_MASK)) { + *pErrorCode=U_INVALID_FORMAT_ERROR; + return 0; + } + + /* get the length values and offsets */ + uprv_memset(&tempTrie, 0, sizeof(tempTrie)); + tempTrie.indexLength=header->indexLength; + tempTrie.dataLength=header->shiftedDataLength<<UTRIE2_INDEX_SHIFT; + tempTrie.index2NullOffset=header->index2NullOffset; + tempTrie.dataNullOffset=header->dataNullOffset; + + tempTrie.highStart=header->shiftedHighStart<<UTRIE2_SHIFT_1; + tempTrie.highValueIndex=tempTrie.dataLength-UTRIE2_DATA_GRANULARITY; + if(valueBits==UTRIE2_16_VALUE_BITS) { + tempTrie.highValueIndex+=tempTrie.indexLength; + } + + /* calculate the actual length */ + actualLength=(int32_t)sizeof(UTrie2Header)+tempTrie.indexLength*2; + if(valueBits==UTRIE2_16_VALUE_BITS) { + actualLength+=tempTrie.dataLength*2; + } else { + actualLength+=tempTrie.dataLength*4; + } + if(length<actualLength) { + *pErrorCode=U_INVALID_FORMAT_ERROR; /* not enough bytes */ + return 0; + } + + /* allocate the trie */ + trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2)); + if(trie==NULL) { + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + return 0; + } + uprv_memcpy(trie, &tempTrie, sizeof(tempTrie)); + trie->memory=(uint32_t *)data; + trie->length=actualLength; + trie->isMemoryOwned=FALSE; +#ifdef UTRIE2_DEBUG + trie->name="fromSerialized"; +#endif + + /* set the pointers to its index and data arrays */ + p16=(const uint16_t *)(header+1); + trie->index=p16; + p16+=trie->indexLength; + + /* get the data */ + switch(valueBits) { + case UTRIE2_16_VALUE_BITS: + trie->data16=p16; + trie->data32=NULL; + trie->initialValue=trie->index[trie->dataNullOffset]; + trie->errorValue=trie->data16[UTRIE2_BAD_UTF8_DATA_OFFSET]; + break; + case UTRIE2_32_VALUE_BITS: + trie->data16=NULL; + trie->data32=(const uint32_t *)p16; + trie->initialValue=trie->data32[trie->dataNullOffset]; + trie->errorValue=trie->data32[UTRIE2_BAD_UTF8_DATA_OFFSET]; + break; + default: + *pErrorCode=U_INVALID_FORMAT_ERROR; + return 0; + } + + if(pActualLength!=NULL) { + *pActualLength=actualLength; + } + return trie; +} + +U_CAPI UTrie2 * U_EXPORT2 +utrie2_openDummy(UTrie2ValueBits valueBits, + uint32_t initialValue, uint32_t errorValue, + UErrorCode *pErrorCode) { + UTrie2 *trie; + UTrie2Header *header; + uint32_t *p; + uint16_t *dest16; + int32_t indexLength, dataLength, length, i; + int32_t dataMove; /* >0 if the data is moved to the end of the index array */ + + if(U_FAILURE(*pErrorCode)) { + return 0; + } + + if(valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + /* calculate the total length of the dummy trie data */ + indexLength=UTRIE2_INDEX_1_OFFSET; + dataLength=UTRIE2_DATA_START_OFFSET+UTRIE2_DATA_GRANULARITY; + length=(int32_t)sizeof(UTrie2Header)+indexLength*2; + if(valueBits==UTRIE2_16_VALUE_BITS) { + length+=dataLength*2; + } else { + length+=dataLength*4; + } + + /* allocate the trie */ + trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2)); + if(trie==NULL) { + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + return 0; + } + uprv_memset(trie, 0, sizeof(UTrie2)); + trie->memory=uprv_malloc(length); + if(trie->memory==NULL) { + uprv_free(trie); + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + return 0; + } + trie->length=length; + trie->isMemoryOwned=TRUE; + + /* set the UTrie2 fields */ + if(valueBits==UTRIE2_16_VALUE_BITS) { + dataMove=indexLength; + } else { + dataMove=0; + } + + trie->indexLength=indexLength; + trie->dataLength=dataLength; + trie->index2NullOffset=UTRIE2_INDEX_2_OFFSET; + trie->dataNullOffset=(uint16_t)dataMove; + trie->initialValue=initialValue; + trie->errorValue=errorValue; + trie->highStart=0; + trie->highValueIndex=dataMove+UTRIE2_DATA_START_OFFSET; +#ifdef UTRIE2_DEBUG + trie->name="dummy"; +#endif + + /* set the header fields */ + header=(UTrie2Header *)trie->memory; + + header->signature=UTRIE2_SIG; /* "Tri2" */ + header->options=(uint16_t)valueBits; + + header->indexLength=(uint16_t)indexLength; + header->shiftedDataLength=(uint16_t)(dataLength>>UTRIE2_INDEX_SHIFT); + header->index2NullOffset=(uint16_t)UTRIE2_INDEX_2_OFFSET; + header->dataNullOffset=(uint16_t)dataMove; + header->shiftedHighStart=0; + + /* fill the index and data arrays */ + dest16=(uint16_t *)(header+1); + trie->index=dest16; + + /* write the index-2 array values shifted right by UTRIE2_INDEX_SHIFT */ + for(i=0; i<UTRIE2_INDEX_2_BMP_LENGTH; ++i) { + *dest16++=(uint16_t)(dataMove>>UTRIE2_INDEX_SHIFT); /* null data block */ + } + + /* write UTF-8 2-byte index-2 values, not right-shifted */ + for(i=0; i<(0xc2-0xc0); ++i) { /* C0..C1 */ + *dest16++=(uint16_t)(dataMove+UTRIE2_BAD_UTF8_DATA_OFFSET); + } + for(; i<(0xe0-0xc0); ++i) { /* C2..DF */ + *dest16++=(uint16_t)dataMove; + } + + /* write the 16/32-bit data array */ + switch(valueBits) { + case UTRIE2_16_VALUE_BITS: + /* write 16-bit data values */ + trie->data16=dest16; + trie->data32=NULL; + for(i=0; i<0x80; ++i) { + *dest16++=(uint16_t)initialValue; + } + for(; i<0xc0; ++i) { + *dest16++=(uint16_t)errorValue; + } + /* highValue and reserved values */ + for(i=0; i<UTRIE2_DATA_GRANULARITY; ++i) { + *dest16++=(uint16_t)initialValue; + } + break; + case UTRIE2_32_VALUE_BITS: + /* write 32-bit data values */ + p=(uint32_t *)dest16; + trie->data16=NULL; + trie->data32=p; + for(i=0; i<0x80; ++i) { + *p++=initialValue; + } + for(; i<0xc0; ++i) { + *p++=errorValue; + } + /* highValue and reserved values */ + for(i=0; i<UTRIE2_DATA_GRANULARITY; ++i) { + *p++=initialValue; + } + break; + default: + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + return trie; +} + +U_CAPI void U_EXPORT2 +utrie2_close(UTrie2 *trie) { + if(trie!=NULL) { + if(trie->isMemoryOwned) { + uprv_free(trie->memory); + } + if(trie->newTrie!=NULL) { + uprv_free(trie->newTrie->data); +#ifdef UCPTRIE_DEBUG + umutablecptrie_close(trie->newTrie->t3); +#endif + uprv_free(trie->newTrie); + } + uprv_free(trie); + } +} + +U_CAPI UBool U_EXPORT2 +utrie2_isFrozen(const UTrie2 *trie) { + return (UBool)(trie->newTrie==NULL); +} + +U_CAPI int32_t U_EXPORT2 +utrie2_serialize(const UTrie2 *trie, + void *data, int32_t capacity, + UErrorCode *pErrorCode) { + /* argument check */ + if(U_FAILURE(*pErrorCode)) { + return 0; + } + + if( trie==NULL || trie->memory==NULL || trie->newTrie!=NULL || + capacity<0 || (capacity>0 && (data==NULL || (U_POINTER_MASK_LSB(data, 3)!=0))) + ) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + if(capacity>=trie->length) { + uprv_memcpy(data, trie->memory, trie->length); + } else { + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + } + return trie->length; +} + +/* enumeration -------------------------------------------------------------- */ + +#define MIN_VALUE(a, b) ((a)<(b) ? (a) : (b)) + +/* default UTrie2EnumValue() returns the input value itself */ +static uint32_t U_CALLCONV +enumSameValue(const void * /*context*/, uint32_t value) { + return value; +} + +/** + * Enumerate all ranges of code points with the same relevant values. + * The values are transformed from the raw trie entries by the enumValue function. + * + * Currently requires start<limit and both start and limit must be multiples + * of UTRIE2_DATA_BLOCK_LENGTH. + * + * Optimizations: + * - Skip a whole block if we know that it is filled with a single value, + * and it is the same as we visited just before. + * - Handle the null block specially because we know a priori that it is filled + * with a single value. + */ +static void +enumEitherTrie(const UTrie2 *trie, + UChar32 start, UChar32 limit, + UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange, const void *context) { + const uint32_t *data32; + const uint16_t *idx; + + uint32_t value, prevValue, initialValue; + UChar32 c, prev, highStart; + int32_t j, i2Block, prevI2Block, index2NullOffset, block, prevBlock, nullBlock; + + if(enumRange==NULL) { + return; + } + if(enumValue==NULL) { + enumValue=enumSameValue; + } + + if(trie->newTrie==NULL) { + /* frozen trie */ + idx=trie->index; + U_ASSERT(idx!=NULL); /* the following code assumes trie->newTrie is not NULL when idx is NULL */ + data32=trie->data32; + + index2NullOffset=trie->index2NullOffset; + nullBlock=trie->dataNullOffset; + } else { + /* unfrozen, mutable trie */ + idx=NULL; + data32=trie->newTrie->data; + U_ASSERT(data32!=NULL); /* the following code assumes idx is not NULL when data32 is NULL */ + + index2NullOffset=trie->newTrie->index2NullOffset; + nullBlock=trie->newTrie->dataNullOffset; + } + + highStart=trie->highStart; + + /* get the enumeration value that corresponds to an initial-value trie data entry */ + initialValue=enumValue(context, trie->initialValue); + + /* set variables for previous range */ + prevI2Block=-1; + prevBlock=-1; + prev=start; + prevValue=0; + + /* enumerate index-2 blocks */ + for(c=start; c<limit && c<highStart;) { + /* Code point limit for iterating inside this i2Block. */ + UChar32 tempLimit=c+UTRIE2_CP_PER_INDEX_1_ENTRY; + if(limit<tempLimit) { + tempLimit=limit; + } + if(c<=0xffff) { + if(!U_IS_SURROGATE(c)) { + i2Block=c>>UTRIE2_SHIFT_2; + } else if(U_IS_SURROGATE_LEAD(c)) { + /* + * Enumerate values for lead surrogate code points, not code units: + * This special block has half the normal length. + */ + i2Block=UTRIE2_LSCP_INDEX_2_OFFSET; + tempLimit=MIN_VALUE(0xdc00, limit); + } else { + /* + * Switch back to the normal part of the index-2 table. + * Enumerate the second half of the surrogates block. + */ + i2Block=0xd800>>UTRIE2_SHIFT_2; + tempLimit=MIN_VALUE(0xe000, limit); + } + } else { + /* supplementary code points */ + if(idx!=NULL) { + i2Block=idx[(UTRIE2_INDEX_1_OFFSET-UTRIE2_OMITTED_BMP_INDEX_1_LENGTH)+ + (c>>UTRIE2_SHIFT_1)]; + } else { + i2Block=trie->newTrie->index1[c>>UTRIE2_SHIFT_1]; + } + if(i2Block==prevI2Block && (c-prev)>=UTRIE2_CP_PER_INDEX_1_ENTRY) { + /* + * The index-2 block is the same as the previous one, and filled with prevValue. + * Only possible for supplementary code points because the linear-BMP index-2 + * table creates unique i2Block values. + */ + c+=UTRIE2_CP_PER_INDEX_1_ENTRY; + continue; + } + } + prevI2Block=i2Block; + if(i2Block==index2NullOffset) { + /* this is the null index-2 block */ + if(prevValue!=initialValue) { + if(prev<c && !enumRange(context, prev, c-1, prevValue)) { + return; + } + prevBlock=nullBlock; + prev=c; + prevValue=initialValue; + } + c+=UTRIE2_CP_PER_INDEX_1_ENTRY; + } else { + /* enumerate data blocks for one index-2 block */ + int32_t i2, i2Limit; + i2=(c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK; + if((c>>UTRIE2_SHIFT_1)==(tempLimit>>UTRIE2_SHIFT_1)) { + i2Limit=(tempLimit>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK; + } else { + i2Limit=UTRIE2_INDEX_2_BLOCK_LENGTH; + } + for(; i2<i2Limit; ++i2) { + if(idx!=NULL) { + block=(int32_t)idx[i2Block+i2]<<UTRIE2_INDEX_SHIFT; + } else { + block=trie->newTrie->index2[i2Block+i2]; + } + if(block==prevBlock && (c-prev)>=UTRIE2_DATA_BLOCK_LENGTH) { + /* the block is the same as the previous one, and filled with prevValue */ + c+=UTRIE2_DATA_BLOCK_LENGTH; + continue; + } + prevBlock=block; + if(block==nullBlock) { + /* this is the null data block */ + if(prevValue!=initialValue) { + if(prev<c && !enumRange(context, prev, c-1, prevValue)) { + return; + } + prev=c; + prevValue=initialValue; + } + c+=UTRIE2_DATA_BLOCK_LENGTH; + } else { + for(j=0; j<UTRIE2_DATA_BLOCK_LENGTH; ++j) { + value=enumValue(context, data32!=NULL ? data32[block+j] : idx[block+j]); + if(value!=prevValue) { + if(prev<c && !enumRange(context, prev, c-1, prevValue)) { + return; + } + prev=c; + prevValue=value; + } + ++c; + } + } + } + } + } + + if(c>limit) { + c=limit; /* could be higher if in the index2NullOffset */ + } else if(c<limit) { + /* c==highStart<limit */ + uint32_t highValue; + if(idx!=NULL) { + highValue= + data32!=NULL ? + data32[trie->highValueIndex] : + idx[trie->highValueIndex]; + } else { + highValue=trie->newTrie->data[trie->newTrie->dataLength-UTRIE2_DATA_GRANULARITY]; + } + value=enumValue(context, highValue); + if(value!=prevValue) { + if(prev<c && !enumRange(context, prev, c-1, prevValue)) { + return; + } + prev=c; + prevValue=value; + } + c=limit; + } + + /* deliver last range */ + enumRange(context, prev, c-1, prevValue); +} + +U_CAPI void U_EXPORT2 +utrie2_enum(const UTrie2 *trie, + UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange, const void *context) { + enumEitherTrie(trie, 0, 0x110000, enumValue, enumRange, context); +} + +U_CAPI void U_EXPORT2 +utrie2_enumForLeadSurrogate(const UTrie2 *trie, UChar32 lead, + UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange, + const void *context) { + if(!U16_IS_LEAD(lead)) { + return; + } + lead=(lead-0xd7c0)<<10; /* start code point */ + enumEitherTrie(trie, lead, lead+0x400, enumValue, enumRange, context); +} + +/* C++ convenience wrappers ------------------------------------------------- */ + +U_NAMESPACE_BEGIN + +uint16_t BackwardUTrie2StringIterator::previous16() { + codePointLimit=codePointStart; + if(start>=codePointStart) { + codePoint=U_SENTINEL; + return static_cast<uint16_t>(trie->errorValue); + } + uint16_t result; + UTRIE2_U16_PREV16(trie, start, codePointStart, codePoint, result); + return result; +} + +uint16_t ForwardUTrie2StringIterator::next16() { + codePointStart=codePointLimit; + if(codePointLimit==limit) { + codePoint=U_SENTINEL; + return static_cast<uint16_t>(trie->errorValue); + } + uint16_t result; + UTRIE2_U16_NEXT16(trie, codePointLimit, limit, codePoint, result); + return result; +} + +U_NAMESPACE_END diff --git a/thirdparty/icu4c/common/utrie2.h b/thirdparty/icu4c/common/utrie2.h new file mode 100644 index 0000000000..d1e1e15a6e --- /dev/null +++ b/thirdparty/icu4c/common/utrie2.h @@ -0,0 +1,955 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 2001-2014, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* file name: utrie2.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2008aug16 (starting from a copy of utrie.h) +* created by: Markus W. Scherer +*/ + +#ifndef __UTRIE2_H__ +#define __UTRIE2_H__ + +#include "unicode/utypes.h" +#include "unicode/utf8.h" +#include "putilimp.h" + +U_CDECL_BEGIN + +struct UTrie; /* forward declaration */ +#ifndef __UTRIE_H__ +typedef struct UTrie UTrie; +#endif + +/** + * \file + * + * This is a common implementation of a Unicode trie. + * It is a kind of compressed, serializable table of 16- or 32-bit values associated with + * Unicode code points (0..0x10ffff). (A map from code points to integers.) + * + * This is the second common version of a Unicode trie (hence the name UTrie2). + * Compared with UTrie version 1: + * - Still splitting BMP code points 11:5 bits for index and data table lookups. + * - Still separate data for lead surrogate code _units_ vs. code _points_, + * but the lead surrogate code unit values are not required any more + * for data lookup for supplementary code points. + * - The "folding" mechanism is removed. In UTrie version 1, this somewhat + * hard-to-explain mechanism was meant to be used for optimized UTF-16 + * processing, with application-specific encoding of indexing bits + * in the lead surrogate data for the associated supplementary code points. + * - For the last single-value code point range (ending with U+10ffff), + * the starting code point ("highStart") and the value are stored. + * - For supplementary code points U+10000..highStart-1 a three-table lookup + * (two index tables and one data table) is used. The first index + * is truncated, omitting both the BMP portion and the high range. + * - There is a special small index for 2-byte UTF-8, and the initial data + * entries are designed for fast 1/2-byte UTF-8 lookup. + * Starting with ICU 60, C0 and C1 are not recognized as UTF-8 lead bytes any more at all, + * and the associated 2-byte indexes are unused. + */ + +/** + * Trie structure. + * Use only with public API macros and functions. + */ +struct UTrie2; +typedef struct UTrie2 UTrie2; + +/* Public UTrie2 API functions: read-only access ---------------------------- */ + +/** + * Selectors for the width of a UTrie2 data value. + */ +enum UTrie2ValueBits { + /** 16 bits per UTrie2 data value. */ + UTRIE2_16_VALUE_BITS, + /** 32 bits per UTrie2 data value. */ + UTRIE2_32_VALUE_BITS, + /** Number of selectors for the width of UTrie2 data values. */ + UTRIE2_COUNT_VALUE_BITS +}; +typedef enum UTrie2ValueBits UTrie2ValueBits; + +/** + * Open a frozen trie from its serialized from, stored in 32-bit-aligned memory. + * Inverse of utrie2_serialize(). + * The memory must remain valid and unchanged as long as the trie is used. + * You must utrie2_close() the trie once you are done using it. + * + * @param valueBits selects the data entry size; results in an + * U_INVALID_FORMAT_ERROR if it does not match the serialized form + * @param data a pointer to 32-bit-aligned memory containing the serialized form of a UTrie2 + * @param length the number of bytes available at data; + * can be more than necessary + * @param pActualLength receives the actual number of bytes at data taken up by the trie data; + * can be NULL + * @param pErrorCode an in/out ICU UErrorCode + * @return the unserialized trie + * + * @see utrie2_open + * @see utrie2_serialize + */ +U_CAPI UTrie2 * U_EXPORT2 +utrie2_openFromSerialized(UTrie2ValueBits valueBits, + const void *data, int32_t length, int32_t *pActualLength, + UErrorCode *pErrorCode); + +/** + * Open a frozen, empty "dummy" trie. + * A dummy trie is an empty trie, used when a real data trie cannot + * be loaded. Equivalent to calling utrie2_open() and utrie2_freeze(), + * but without internally creating and compacting/serializing the + * builder data structure. + * + * The trie always returns the initialValue, + * or the errorValue for out-of-range code points and illegal UTF-8. + * + * You must utrie2_close() the trie once you are done using it. + * + * @param valueBits selects the data entry size + * @param initialValue the initial value that is set for all code points + * @param errorValue the value for out-of-range code points and illegal UTF-8 + * @param pErrorCode an in/out ICU UErrorCode + * @return the dummy trie + * + * @see utrie2_openFromSerialized + * @see utrie2_open + */ +U_CAPI UTrie2 * U_EXPORT2 +utrie2_openDummy(UTrie2ValueBits valueBits, + uint32_t initialValue, uint32_t errorValue, + UErrorCode *pErrorCode); + +/** + * Get a value from a code point as stored in the trie. + * Easier to use than UTRIE2_GET16() and UTRIE2_GET32() but slower. + * Easier to use because, unlike the macros, this function works on all UTrie2 + * objects, frozen or not, holding 16-bit or 32-bit data values. + * + * @param trie the trie + * @param c the code point + * @return the value + */ +U_CAPI uint32_t U_EXPORT2 +utrie2_get32(const UTrie2 *trie, UChar32 c); + +/* enumeration callback types */ + +/** + * Callback from utrie2_enum(), extracts a uint32_t value from a + * trie value. This value will be passed on to the UTrie2EnumRange function. + * + * @param context an opaque pointer, as passed into utrie2_enum() + * @param value a value from the trie + * @return the value that is to be passed on to the UTrie2EnumRange function + */ +typedef uint32_t U_CALLCONV +UTrie2EnumValue(const void *context, uint32_t value); + +/** + * Callback from utrie2_enum(), is called for each contiguous range + * of code points with the same value as retrieved from the trie and + * transformed by the UTrie2EnumValue function. + * + * The callback function can stop the enumeration by returning false. + * + * @param context an opaque pointer, as passed into utrie2_enum() + * @param start the first code point in a contiguous range with value + * @param end the last code point in a contiguous range with value (inclusive) + * @param value the value that is set for all code points in [start..end] + * @return false to stop the enumeration + */ +typedef UBool U_CALLCONV +UTrie2EnumRange(const void *context, UChar32 start, UChar32 end, uint32_t value); + +/** + * Enumerate efficiently all values in a trie. + * Do not modify the trie during the enumeration. + * + * For each entry in the trie, the value to be delivered is passed through + * the UTrie2EnumValue function. + * The value is unchanged if that function pointer is NULL. + * + * For each contiguous range of code points with a given (transformed) value, + * the UTrie2EnumRange function is called. + * + * @param trie a pointer to the trie + * @param enumValue a pointer to a function that may transform the trie entry value, + * or NULL if the values from the trie are to be used directly + * @param enumRange a pointer to a function that is called for each contiguous range + * of code points with the same (transformed) value + * @param context an opaque pointer that is passed on to the callback functions + */ +U_CAPI void U_EXPORT2 +utrie2_enum(const UTrie2 *trie, + UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange, const void *context); + +/* Building a trie ---------------------------------------------------------- */ + +/** + * Open an empty, writable trie. At build time, 32-bit data values are used. + * utrie2_freeze() takes a valueBits parameter + * which determines the data value width in the serialized and frozen forms. + * You must utrie2_close() the trie once you are done using it. + * + * @param initialValue the initial value that is set for all code points + * @param errorValue the value for out-of-range code points and illegal UTF-8 + * @param pErrorCode an in/out ICU UErrorCode + * @return a pointer to the allocated and initialized new trie + */ +U_CAPI UTrie2 * U_EXPORT2 +utrie2_open(uint32_t initialValue, uint32_t errorValue, UErrorCode *pErrorCode); + +/** + * Clone a trie. + * You must utrie2_close() the clone once you are done using it. + * + * @param other the trie to clone + * @param pErrorCode an in/out ICU UErrorCode + * @return a pointer to the new trie clone + */ +U_CAPI UTrie2 * U_EXPORT2 +utrie2_clone(const UTrie2 *other, UErrorCode *pErrorCode); + +/** + * Clone a trie. The clone will be mutable/writable even if the other trie + * is frozen. (See utrie2_freeze().) + * You must utrie2_close() the clone once you are done using it. + * + * @param other the trie to clone + * @param pErrorCode an in/out ICU UErrorCode + * @return a pointer to the new trie clone + */ +U_CAPI UTrie2 * U_EXPORT2 +utrie2_cloneAsThawed(const UTrie2 *other, UErrorCode *pErrorCode); + +/** + * Close a trie and release associated memory. + * + * @param trie the trie + */ +U_CAPI void U_EXPORT2 +utrie2_close(UTrie2 *trie); + +/** + * Set a value for a code point. + * + * @param trie the unfrozen trie + * @param c the code point + * @param value the value + * @param pErrorCode an in/out ICU UErrorCode; among other possible error codes: + * - U_NO_WRITE_PERMISSION if the trie is frozen + */ +U_CAPI void U_EXPORT2 +utrie2_set32(UTrie2 *trie, UChar32 c, uint32_t value, UErrorCode *pErrorCode); + +/** + * Set a value in a range of code points [start..end]. + * All code points c with start<=c<=end will get the value if + * overwrite is true or if the old value is the initial value. + * + * @param trie the unfrozen trie + * @param start the first code point to get the value + * @param end the last code point to get the value (inclusive) + * @param value the value + * @param overwrite flag for whether old non-initial values are to be overwritten + * @param pErrorCode an in/out ICU UErrorCode; among other possible error codes: + * - U_NO_WRITE_PERMISSION if the trie is frozen + */ +U_CAPI void U_EXPORT2 +utrie2_setRange32(UTrie2 *trie, + UChar32 start, UChar32 end, + uint32_t value, UBool overwrite, + UErrorCode *pErrorCode); + +/** + * Freeze a trie. Make it immutable (read-only) and compact it, + * ready for serialization and for use with fast macros. + * Functions to set values will fail after serializing. + * + * A trie can be frozen only once. If this function is called again with different + * valueBits then it will set a U_ILLEGAL_ARGUMENT_ERROR. + * + * @param trie the trie + * @param valueBits selects the data entry size; if smaller than 32 bits, then + * the values stored in the trie will be truncated + * @param pErrorCode an in/out ICU UErrorCode; among other possible error codes: + * - U_INDEX_OUTOFBOUNDS_ERROR if the compacted index or data arrays are too long + * for serialization + * (the trie will be immutable and usable, + * but not frozen and not usable with the fast macros) + * + * @see utrie2_cloneAsThawed + */ +U_CAPI void U_EXPORT2 +utrie2_freeze(UTrie2 *trie, UTrie2ValueBits valueBits, UErrorCode *pErrorCode); + +/** + * Test if the trie is frozen. (See utrie2_freeze().) + * + * @param trie the trie + * @return true if the trie is frozen, that is, immutable, ready for serialization + * and for use with fast macros + */ +U_CAPI UBool U_EXPORT2 +utrie2_isFrozen(const UTrie2 *trie); + +/** + * Serialize a frozen trie into 32-bit aligned memory. + * If the trie is not frozen, then the function returns with a U_ILLEGAL_ARGUMENT_ERROR. + * A trie can be serialized multiple times. + * + * @param trie the frozen trie + * @param data a pointer to 32-bit-aligned memory to be filled with the trie data, + * can be NULL if capacity==0 + * @param capacity the number of bytes available at data, + * or 0 for preflighting + * @param pErrorCode an in/out ICU UErrorCode; among other possible error codes: + * - U_BUFFER_OVERFLOW_ERROR if the data storage block is too small for serialization + * - U_ILLEGAL_ARGUMENT_ERROR if the trie is not frozen or the data and capacity + * parameters are bad + * @return the number of bytes written or needed for the trie + * + * @see utrie2_openFromSerialized() + */ +U_CAPI int32_t U_EXPORT2 +utrie2_serialize(const UTrie2 *trie, + void *data, int32_t capacity, + UErrorCode *pErrorCode); + +/* Public UTrie2 API: miscellaneous functions ------------------------------- */ + +/** + * Build a UTrie2 (version 2) from a UTrie (version 1). + * Enumerates all values in the UTrie and builds a UTrie2 with the same values. + * The resulting UTrie2 will be frozen. + * + * @param trie1 the runtime UTrie structure to be enumerated + * @param errorValue the value for out-of-range code points and illegal UTF-8 + * @param pErrorCode an in/out ICU UErrorCode + * @return The frozen UTrie2 with the same values as the UTrie. + */ +U_CAPI UTrie2 * U_EXPORT2 +utrie2_fromUTrie(const UTrie *trie1, uint32_t errorValue, UErrorCode *pErrorCode); + +/* Public UTrie2 API macros ------------------------------------------------- */ + +/* + * These macros provide fast data lookup from a frozen trie. + * They will crash when used on an unfrozen trie. + */ + +/** + * Return a 16-bit trie value from a code point, with range checking. + * Returns trie->errorValue if c is not in the range 0..U+10ffff. + * + * @param trie (const UTrie2 *, in) a frozen trie + * @param c (UChar32, in) the input code point + * @return (uint16_t) The code point's trie value. + */ +#define UTRIE2_GET16(trie, c) _UTRIE2_GET((trie), index, (trie)->indexLength, (c)) + +/** + * Return a 32-bit trie value from a code point, with range checking. + * Returns trie->errorValue if c is not in the range 0..U+10ffff. + * + * @param trie (const UTrie2 *, in) a frozen trie + * @param c (UChar32, in) the input code point + * @return (uint32_t) The code point's trie value. + */ +#define UTRIE2_GET32(trie, c) _UTRIE2_GET((trie), data32, 0, (c)) + +/** + * UTF-16: Get the next code point (UChar32 c, out), post-increment src, + * and get a 16-bit value from the trie. + * + * @param trie (const UTrie2 *, in) a frozen trie + * @param src (const UChar *, in/out) the source text pointer + * @param limit (const UChar *, in) the limit pointer for the text, or NULL if NUL-terminated + * @param c (UChar32, out) variable for the code point + * @param result (uint16_t, out) uint16_t variable for the trie lookup result + */ +#define UTRIE2_U16_NEXT16(trie, src, limit, c, result) _UTRIE2_U16_NEXT(trie, index, src, limit, c, result) + +/** + * UTF-16: Get the next code point (UChar32 c, out), post-increment src, + * and get a 32-bit value from the trie. + * + * @param trie (const UTrie2 *, in) a frozen trie + * @param src (const UChar *, in/out) the source text pointer + * @param limit (const UChar *, in) the limit pointer for the text, or NULL if NUL-terminated + * @param c (UChar32, out) variable for the code point + * @param result (uint32_t, out) uint32_t variable for the trie lookup result + */ +#define UTRIE2_U16_NEXT32(trie, src, limit, c, result) _UTRIE2_U16_NEXT(trie, data32, src, limit, c, result) + +/** + * UTF-16: Get the previous code point (UChar32 c, out), pre-decrement src, + * and get a 16-bit value from the trie. + * + * @param trie (const UTrie2 *, in) a frozen trie + * @param start (const UChar *, in) the start pointer for the text + * @param src (const UChar *, in/out) the source text pointer + * @param c (UChar32, out) variable for the code point + * @param result (uint16_t, out) uint16_t variable for the trie lookup result + */ +#define UTRIE2_U16_PREV16(trie, start, src, c, result) _UTRIE2_U16_PREV(trie, index, start, src, c, result) + +/** + * UTF-16: Get the previous code point (UChar32 c, out), pre-decrement src, + * and get a 32-bit value from the trie. + * + * @param trie (const UTrie2 *, in) a frozen trie + * @param start (const UChar *, in) the start pointer for the text + * @param src (const UChar *, in/out) the source text pointer + * @param c (UChar32, out) variable for the code point + * @param result (uint32_t, out) uint32_t variable for the trie lookup result + */ +#define UTRIE2_U16_PREV32(trie, start, src, c, result) _UTRIE2_U16_PREV(trie, data32, start, src, c, result) + +/** + * UTF-8: Post-increment src and get a 16-bit value from the trie. + * + * @param trie (const UTrie2 *, in) a frozen trie + * @param src (const char *, in/out) the source text pointer + * @param limit (const char *, in) the limit pointer for the text (must not be NULL) + * @param result (uint16_t, out) uint16_t variable for the trie lookup result + */ +#define UTRIE2_U8_NEXT16(trie, src, limit, result)\ + _UTRIE2_U8_NEXT(trie, data16, index, src, limit, result) + +/** + * UTF-8: Post-increment src and get a 32-bit value from the trie. + * + * @param trie (const UTrie2 *, in) a frozen trie + * @param src (const char *, in/out) the source text pointer + * @param limit (const char *, in) the limit pointer for the text (must not be NULL) + * @param result (uint16_t, out) uint32_t variable for the trie lookup result + */ +#define UTRIE2_U8_NEXT32(trie, src, limit, result) \ + _UTRIE2_U8_NEXT(trie, data32, data32, src, limit, result) + +/** + * UTF-8: Pre-decrement src and get a 16-bit value from the trie. + * + * @param trie (const UTrie2 *, in) a frozen trie + * @param start (const char *, in) the start pointer for the text + * @param src (const char *, in/out) the source text pointer + * @param result (uint16_t, out) uint16_t variable for the trie lookup result + */ +#define UTRIE2_U8_PREV16(trie, start, src, result) \ + _UTRIE2_U8_PREV(trie, data16, index, start, src, result) + +/** + * UTF-8: Pre-decrement src and get a 32-bit value from the trie. + * + * @param trie (const UTrie2 *, in) a frozen trie + * @param start (const char *, in) the start pointer for the text + * @param src (const char *, in/out) the source text pointer + * @param result (uint16_t, out) uint32_t variable for the trie lookup result + */ +#define UTRIE2_U8_PREV32(trie, start, src, result) \ + _UTRIE2_U8_PREV(trie, data32, data32, start, src, result) + +/* Public UTrie2 API: optimized UTF-16 access ------------------------------- */ + +/* + * The following functions and macros are used for highly optimized UTF-16 + * text processing. The UTRIE2_U16_NEXTxy() macros do not depend on these. + * + * A UTrie2 stores separate values for lead surrogate code _units_ vs. code _points_. + * UTF-16 text processing can be optimized by detecting surrogate pairs and + * assembling supplementary code points only when there is non-trivial data + * available. + * + * At build-time, use utrie2_enumForLeadSurrogate() to see if there + * is non-trivial (non-initialValue) data for any of the supplementary + * code points associated with a lead surrogate. + * If so, then set a special (application-specific) value for the + * lead surrogate code _unit_, with utrie2_set32ForLeadSurrogateCodeUnit(). + * + * At runtime, use UTRIE2_GET16_FROM_U16_SINGLE_LEAD() or + * UTRIE2_GET32_FROM_U16_SINGLE_LEAD() per code unit. If there is non-trivial + * data and the code unit is a lead surrogate, then check if a trail surrogate + * follows. If so, assemble the supplementary code point with + * U16_GET_SUPPLEMENTARY() and look up its value with UTRIE2_GET16_FROM_SUPP() + * or UTRIE2_GET32_FROM_SUPP(); otherwise reset the lead + * surrogate's value or do a code point lookup for it. + * + * If there is only trivial data for lead and trail surrogates, then processing + * can often skip them. For example, in normalization or case mapping + * all characters that do not have any mappings are simply copied as is. + */ + +/** + * Get a value from a lead surrogate code unit as stored in the trie. + * + * @param trie the trie + * @param c the code unit (U+D800..U+DBFF) + * @return the value + */ +U_CAPI uint32_t U_EXPORT2 +utrie2_get32FromLeadSurrogateCodeUnit(const UTrie2 *trie, UChar32 c); + +/** + * Enumerate the trie values for the 1024=0x400 code points + * corresponding to a given lead surrogate. + * For example, for the lead surrogate U+D87E it will enumerate the values + * for [U+2F800..U+2FC00[. + * Used by data builder code that sets special lead surrogate code unit values + * for optimized UTF-16 string processing. + * + * Do not modify the trie during the enumeration. + * + * Except for the limited code point range, this functions just like utrie2_enum(): + * For each entry in the trie, the value to be delivered is passed through + * the UTrie2EnumValue function. + * The value is unchanged if that function pointer is NULL. + * + * For each contiguous range of code points with a given (transformed) value, + * the UTrie2EnumRange function is called. + * + * @param trie a pointer to the trie + * @param enumValue a pointer to a function that may transform the trie entry value, + * or NULL if the values from the trie are to be used directly + * @param enumRange a pointer to a function that is called for each contiguous range + * of code points with the same (transformed) value + * @param context an opaque pointer that is passed on to the callback functions + */ +U_CAPI void U_EXPORT2 +utrie2_enumForLeadSurrogate(const UTrie2 *trie, UChar32 lead, + UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange, + const void *context); + +/** + * Set a value for a lead surrogate code unit. + * + * @param trie the unfrozen trie + * @param lead the lead surrogate code unit (U+D800..U+DBFF) + * @param value the value + * @param pErrorCode an in/out ICU UErrorCode; among other possible error codes: + * - U_NO_WRITE_PERMISSION if the trie is frozen + */ +U_CAPI void U_EXPORT2 +utrie2_set32ForLeadSurrogateCodeUnit(UTrie2 *trie, + UChar32 lead, uint32_t value, + UErrorCode *pErrorCode); + +/** + * Return a 16-bit trie value from a UTF-16 single/lead code unit (<=U+ffff). + * Same as UTRIE2_GET16() if c is a BMP code point except for lead surrogates, + * but smaller and faster. + * + * @param trie (const UTrie2 *, in) a frozen trie + * @param c (UChar32, in) the input code unit, must be 0<=c<=U+ffff + * @return (uint16_t) The code unit's trie value. + */ +#define UTRIE2_GET16_FROM_U16_SINGLE_LEAD(trie, c) _UTRIE2_GET_FROM_U16_SINGLE_LEAD((trie), index, c) + +/** + * Return a 32-bit trie value from a UTF-16 single/lead code unit (<=U+ffff). + * Same as UTRIE2_GET32() if c is a BMP code point except for lead surrogates, + * but smaller and faster. + * + * @param trie (const UTrie2 *, in) a frozen trie + * @param c (UChar32, in) the input code unit, must be 0<=c<=U+ffff + * @return (uint32_t) The code unit's trie value. + */ +#define UTRIE2_GET32_FROM_U16_SINGLE_LEAD(trie, c) _UTRIE2_GET_FROM_U16_SINGLE_LEAD((trie), data32, c) + +/** + * Return a 16-bit trie value from a supplementary code point (U+10000..U+10ffff). + * + * @param trie (const UTrie2 *, in) a frozen trie + * @param c (UChar32, in) the input code point, must be U+10000<=c<=U+10ffff + * @return (uint16_t) The code point's trie value. + */ +#define UTRIE2_GET16_FROM_SUPP(trie, c) _UTRIE2_GET_FROM_SUPP((trie), index, c) + +/** + * Return a 32-bit trie value from a supplementary code point (U+10000..U+10ffff). + * + * @param trie (const UTrie2 *, in) a frozen trie + * @param c (UChar32, in) the input code point, must be U+10000<=c<=U+10ffff + * @return (uint32_t) The code point's trie value. + */ +#define UTRIE2_GET32_FROM_SUPP(trie, c) _UTRIE2_GET_FROM_SUPP((trie), data32, c) + +U_CDECL_END + +/* C++ convenience wrappers ------------------------------------------------- */ + +#ifdef __cplusplus + +#include "unicode/utf.h" +#include "mutex.h" + +U_NAMESPACE_BEGIN + +// Use the Forward/Backward subclasses below. +class UTrie2StringIterator : public UMemory { +public: + UTrie2StringIterator(const UTrie2 *t, const UChar *p) : + trie(t), codePointStart(p), codePointLimit(p), codePoint(U_SENTINEL) {} + + const UTrie2 *trie; + const UChar *codePointStart, *codePointLimit; + UChar32 codePoint; +}; + +class BackwardUTrie2StringIterator : public UTrie2StringIterator { +public: + BackwardUTrie2StringIterator(const UTrie2 *t, const UChar *s, const UChar *p) : + UTrie2StringIterator(t, p), start(s) {} + + uint16_t previous16(); + + const UChar *start; +}; + +class ForwardUTrie2StringIterator : public UTrie2StringIterator { +public: + // Iteration limit l can be NULL. + // In that case, the caller must detect c==0 and stop. + ForwardUTrie2StringIterator(const UTrie2 *t, const UChar *p, const UChar *l) : + UTrie2StringIterator(t, p), limit(l) {} + + uint16_t next16(); + + const UChar *limit; +}; + +U_NAMESPACE_END + +#endif + +/* Internal definitions ----------------------------------------------------- */ + +U_CDECL_BEGIN + +/** Build-time trie structure. */ +struct UNewTrie2; +typedef struct UNewTrie2 UNewTrie2; + +/* + * Trie structure definition. + * + * Either the data table is 16 bits wide and accessed via the index + * pointer, with each index item increased by indexLength; + * in this case, data32==NULL, and data16 is used for direct ASCII access. + * + * Or the data table is 32 bits wide and accessed via the data32 pointer. + */ +struct UTrie2 { + /* protected: used by macros and functions for reading values */ + const uint16_t *index; + const uint16_t *data16; /* for fast UTF-8 ASCII access, if 16b data */ + const uint32_t *data32; /* NULL if 16b data is used via index */ + + int32_t indexLength, dataLength; + uint16_t index2NullOffset; /* 0xffff if there is no dedicated index-2 null block */ + uint16_t dataNullOffset; + uint32_t initialValue; + /** Value returned for out-of-range code points and illegal UTF-8. */ + uint32_t errorValue; + + /* Start of the last range which ends at U+10ffff, and its value. */ + UChar32 highStart; + int32_t highValueIndex; + + /* private: used by builder and unserialization functions */ + void *memory; /* serialized bytes; NULL if not frozen yet */ + int32_t length; /* number of serialized bytes at memory; 0 if not frozen yet */ + UBool isMemoryOwned; /* true if the trie owns the memory */ + UBool padding1; + int16_t padding2; + UNewTrie2 *newTrie; /* builder object; NULL when frozen */ + +#ifdef UTRIE2_DEBUG + const char *name; +#endif +}; + +/** + * Trie constants, defining shift widths, index array lengths, etc. + * + * These are needed for the runtime macros but users can treat these as + * implementation details and skip to the actual public API further below. + */ +enum { + /** Shift size for getting the index-1 table offset. */ + UTRIE2_SHIFT_1=6+5, + + /** Shift size for getting the index-2 table offset. */ + UTRIE2_SHIFT_2=5, + + /** + * Difference between the two shift sizes, + * for getting an index-1 offset from an index-2 offset. 6=11-5 + */ + UTRIE2_SHIFT_1_2=UTRIE2_SHIFT_1-UTRIE2_SHIFT_2, + + /** + * Number of index-1 entries for the BMP. 32=0x20 + * This part of the index-1 table is omitted from the serialized form. + */ + UTRIE2_OMITTED_BMP_INDEX_1_LENGTH=0x10000>>UTRIE2_SHIFT_1, + + /** Number of code points per index-1 table entry. 2048=0x800 */ + UTRIE2_CP_PER_INDEX_1_ENTRY=1<<UTRIE2_SHIFT_1, + + /** Number of entries in an index-2 block. 64=0x40 */ + UTRIE2_INDEX_2_BLOCK_LENGTH=1<<UTRIE2_SHIFT_1_2, + + /** Mask for getting the lower bits for the in-index-2-block offset. */ + UTRIE2_INDEX_2_MASK=UTRIE2_INDEX_2_BLOCK_LENGTH-1, + + /** Number of entries in a data block. 32=0x20 */ + UTRIE2_DATA_BLOCK_LENGTH=1<<UTRIE2_SHIFT_2, + + /** Mask for getting the lower bits for the in-data-block offset. */ + UTRIE2_DATA_MASK=UTRIE2_DATA_BLOCK_LENGTH-1, + + /** + * Shift size for shifting left the index array values. + * Increases possible data size with 16-bit index values at the cost + * of compactability. + * This requires data blocks to be aligned by UTRIE2_DATA_GRANULARITY. + */ + UTRIE2_INDEX_SHIFT=2, + + /** The alignment size of a data block. Also the granularity for compaction. */ + UTRIE2_DATA_GRANULARITY=1<<UTRIE2_INDEX_SHIFT, + + /* Fixed layout of the first part of the index array. ------------------- */ + + /** + * The BMP part of the index-2 table is fixed and linear and starts at offset 0. + * Length=2048=0x800=0x10000>>UTRIE2_SHIFT_2. + */ + UTRIE2_INDEX_2_OFFSET=0, + + /** + * The part of the index-2 table for U+D800..U+DBFF stores values for + * lead surrogate code _units_ not code _points_. + * Values for lead surrogate code _points_ are indexed with this portion of the table. + * Length=32=0x20=0x400>>UTRIE2_SHIFT_2. (There are 1024=0x400 lead surrogates.) + */ + UTRIE2_LSCP_INDEX_2_OFFSET=0x10000>>UTRIE2_SHIFT_2, + UTRIE2_LSCP_INDEX_2_LENGTH=0x400>>UTRIE2_SHIFT_2, + + /** Count the lengths of both BMP pieces. 2080=0x820 */ + UTRIE2_INDEX_2_BMP_LENGTH=UTRIE2_LSCP_INDEX_2_OFFSET+UTRIE2_LSCP_INDEX_2_LENGTH, + + /** + * The 2-byte UTF-8 version of the index-2 table follows at offset 2080=0x820. + * Length 32=0x20 for lead bytes C0..DF, regardless of UTRIE2_SHIFT_2. + */ + UTRIE2_UTF8_2B_INDEX_2_OFFSET=UTRIE2_INDEX_2_BMP_LENGTH, + UTRIE2_UTF8_2B_INDEX_2_LENGTH=0x800>>6, /* U+0800 is the first code point after 2-byte UTF-8 */ + + /** + * The index-1 table, only used for supplementary code points, at offset 2112=0x840. + * Variable length, for code points up to highStart, where the last single-value range starts. + * Maximum length 512=0x200=0x100000>>UTRIE2_SHIFT_1. + * (For 0x100000 supplementary code points U+10000..U+10ffff.) + * + * The part of the index-2 table for supplementary code points starts + * after this index-1 table. + * + * Both the index-1 table and the following part of the index-2 table + * are omitted completely if there is only BMP data. + */ + UTRIE2_INDEX_1_OFFSET=UTRIE2_UTF8_2B_INDEX_2_OFFSET+UTRIE2_UTF8_2B_INDEX_2_LENGTH, + UTRIE2_MAX_INDEX_1_LENGTH=0x100000>>UTRIE2_SHIFT_1, + + /* + * Fixed layout of the first part of the data array. ----------------------- + * Starts with 4 blocks (128=0x80 entries) for ASCII. + */ + + /** + * The illegal-UTF-8 data block follows the ASCII block, at offset 128=0x80. + * Used with linear access for single bytes 0..0xbf for simple error handling. + * Length 64=0x40, not UTRIE2_DATA_BLOCK_LENGTH. + */ + UTRIE2_BAD_UTF8_DATA_OFFSET=0x80, + + /** The start of non-linear-ASCII data blocks, at offset 192=0xc0. */ + UTRIE2_DATA_START_OFFSET=0xc0 +}; + +/* Internal functions and macros -------------------------------------------- */ + +/** + * Internal function for part of the UTRIE2_U8_NEXTxx() macro implementations. + * Do not call directly. + * @internal + */ +U_CAPI int32_t U_EXPORT2 +utrie2_internalU8NextIndex(const UTrie2 *trie, UChar32 c, + const uint8_t *src, const uint8_t *limit); + +/** + * Internal function for part of the UTRIE2_U8_PREVxx() macro implementations. + * Do not call directly. + * @internal + */ +U_CAPI int32_t U_EXPORT2 +utrie2_internalU8PrevIndex(const UTrie2 *trie, UChar32 c, + const uint8_t *start, const uint8_t *src); + + +/** Internal low-level trie getter. Returns a data index. */ +#define _UTRIE2_INDEX_RAW(offset, trieIndex, c) \ + (((int32_t)((trieIndex)[(offset)+((c)>>UTRIE2_SHIFT_2)]) \ + <<UTRIE2_INDEX_SHIFT)+ \ + ((c)&UTRIE2_DATA_MASK)) + +/** Internal trie getter from a UTF-16 single/lead code unit. Returns the data index. */ +#define _UTRIE2_INDEX_FROM_U16_SINGLE_LEAD(trieIndex, c) _UTRIE2_INDEX_RAW(0, trieIndex, c) + +/** Internal trie getter from a lead surrogate code point (D800..DBFF). Returns the data index. */ +#define _UTRIE2_INDEX_FROM_LSCP(trieIndex, c) \ + _UTRIE2_INDEX_RAW(UTRIE2_LSCP_INDEX_2_OFFSET-(0xd800>>UTRIE2_SHIFT_2), trieIndex, c) + +/** Internal trie getter from a BMP code point. Returns the data index. */ +#define _UTRIE2_INDEX_FROM_BMP(trieIndex, c) \ + _UTRIE2_INDEX_RAW(U_IS_LEAD(c) ? UTRIE2_LSCP_INDEX_2_OFFSET-(0xd800>>UTRIE2_SHIFT_2) : 0, \ + trieIndex, c) + +/** Internal trie getter from a supplementary code point below highStart. Returns the data index. */ +#define _UTRIE2_INDEX_FROM_SUPP(trieIndex, c) \ + (((int32_t)((trieIndex)[ \ + (trieIndex)[(UTRIE2_INDEX_1_OFFSET-UTRIE2_OMITTED_BMP_INDEX_1_LENGTH)+ \ + ((c)>>UTRIE2_SHIFT_1)]+ \ + (((c)>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK)]) \ + <<UTRIE2_INDEX_SHIFT)+ \ + ((c)&UTRIE2_DATA_MASK)) + +/** + * Internal trie getter from a code point, with checking that c is in 0..10FFFF. + * Returns the data index. + */ +#define _UTRIE2_INDEX_FROM_CP(trie, asciiOffset, c) \ + ((uint32_t)(c)<0xd800 ? \ + _UTRIE2_INDEX_RAW(0, (trie)->index, c) : \ + (uint32_t)(c)<=0xffff ? \ + _UTRIE2_INDEX_RAW( \ + (c)<=0xdbff ? UTRIE2_LSCP_INDEX_2_OFFSET-(0xd800>>UTRIE2_SHIFT_2) : 0, \ + (trie)->index, c) : \ + (uint32_t)(c)>0x10ffff ? \ + (asciiOffset)+UTRIE2_BAD_UTF8_DATA_OFFSET : \ + (c)>=(trie)->highStart ? \ + (trie)->highValueIndex : \ + _UTRIE2_INDEX_FROM_SUPP((trie)->index, c)) + +/** Internal trie getter from a UTF-16 single/lead code unit. Returns the data. */ +#define _UTRIE2_GET_FROM_U16_SINGLE_LEAD(trie, data, c) \ + (trie)->data[_UTRIE2_INDEX_FROM_U16_SINGLE_LEAD((trie)->index, c)] + +/** Internal trie getter from a supplementary code point. Returns the data. */ +#define _UTRIE2_GET_FROM_SUPP(trie, data, c) \ + (trie)->data[(c)>=(trie)->highStart ? (trie)->highValueIndex : \ + _UTRIE2_INDEX_FROM_SUPP((trie)->index, c)] + +/** + * Internal trie getter from a code point, with checking that c is in 0..10FFFF. + * Returns the data. + */ +#define _UTRIE2_GET(trie, data, asciiOffset, c) \ + (trie)->data[_UTRIE2_INDEX_FROM_CP(trie, asciiOffset, c)] + +/** Internal next-post-increment: get the next code point (c) and its data. */ +#define _UTRIE2_U16_NEXT(trie, data, src, limit, c, result) UPRV_BLOCK_MACRO_BEGIN { \ + { \ + uint16_t __c2; \ + (c)=*(src)++; \ + if(!U16_IS_LEAD(c)) { \ + (result)=_UTRIE2_GET_FROM_U16_SINGLE_LEAD(trie, data, c); \ + } else if((src)==(limit) || !U16_IS_TRAIL(__c2=*(src))) { \ + (result)=(trie)->data[_UTRIE2_INDEX_FROM_LSCP((trie)->index, c)]; \ + } else { \ + ++(src); \ + (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ + (result)=_UTRIE2_GET_FROM_SUPP((trie), data, (c)); \ + } \ + } \ +} UPRV_BLOCK_MACRO_END + +/** Internal pre-decrement-previous: get the previous code point (c) and its data */ +#define _UTRIE2_U16_PREV(trie, data, start, src, c, result) UPRV_BLOCK_MACRO_BEGIN { \ + { \ + uint16_t __c2; \ + (c)=*--(src); \ + if(!U16_IS_TRAIL(c) || (src)==(start) || !U16_IS_LEAD(__c2=*((src)-1))) { \ + (result)=(trie)->data[_UTRIE2_INDEX_FROM_BMP((trie)->index, c)]; \ + } else { \ + --(src); \ + (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ + (result)=_UTRIE2_GET_FROM_SUPP((trie), data, (c)); \ + } \ + } \ +} UPRV_BLOCK_MACRO_END + +/** Internal UTF-8 next-post-increment: get the next code point's data. */ +#define _UTRIE2_U8_NEXT(trie, ascii, data, src, limit, result) UPRV_BLOCK_MACRO_BEGIN { \ + uint8_t __lead=(uint8_t)*(src)++; \ + if(U8_IS_SINGLE(__lead)) { \ + (result)=(trie)->ascii[__lead]; \ + } else { \ + uint8_t __t1, __t2; \ + if( /* handle U+0800..U+FFFF inline */ \ + 0xe0<=__lead && __lead<0xf0 && ((src)+1)<(limit) && \ + U8_IS_VALID_LEAD3_AND_T1(__lead, __t1=(uint8_t)*(src)) && \ + (__t2=(uint8_t)(*((src)+1)-0x80))<= 0x3f \ + ) { \ + (src)+=2; \ + (result)=(trie)->data[ \ + ((int32_t)((trie)->index[((__lead-0xe0)<<(12-UTRIE2_SHIFT_2))+ \ + ((__t1&0x3f)<<(6-UTRIE2_SHIFT_2))+(__t2>>UTRIE2_SHIFT_2)]) \ + <<UTRIE2_INDEX_SHIFT)+ \ + (__t2&UTRIE2_DATA_MASK)]; \ + } else if( /* handle U+0080..U+07FF inline */ \ + __lead<0xe0 && __lead>=0xc2 && (src)<(limit) && \ + (__t1=(uint8_t)(*(src)-0x80))<=0x3f \ + ) { \ + ++(src); \ + (result)=(trie)->data[ \ + (trie)->index[(UTRIE2_UTF8_2B_INDEX_2_OFFSET-0xc0)+__lead]+ \ + __t1]; \ + } else { \ + int32_t __index=utrie2_internalU8NextIndex((trie), __lead, (const uint8_t *)(src), \ + (const uint8_t *)(limit)); \ + (src)+=__index&7; \ + (result)=(trie)->data[__index>>3]; \ + } \ + } \ +} UPRV_BLOCK_MACRO_END + +/** Internal UTF-8 pre-decrement-previous: get the previous code point's data. */ +#define _UTRIE2_U8_PREV(trie, ascii, data, start, src, result) UPRV_BLOCK_MACRO_BEGIN { \ + uint8_t __b=(uint8_t)*--(src); \ + if(U8_IS_SINGLE(__b)) { \ + (result)=(trie)->ascii[__b]; \ + } else { \ + int32_t __index=utrie2_internalU8PrevIndex((trie), __b, (const uint8_t *)(start), \ + (const uint8_t *)(src)); \ + (src)-=__index&7; \ + (result)=(trie)->data[__index>>3]; \ + } \ +} UPRV_BLOCK_MACRO_END + +U_CDECL_END + +#endif diff --git a/thirdparty/icu4c/common/utrie2_builder.cpp b/thirdparty/icu4c/common/utrie2_builder.cpp new file mode 100644 index 0000000000..8de824cc3d --- /dev/null +++ b/thirdparty/icu4c/common/utrie2_builder.cpp @@ -0,0 +1,1483 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 2001-2014, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* file name: utrie2_builder.cpp +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2008sep26 (split off from utrie2.c) +* created by: Markus W. Scherer +* +* This is a common implementation of a Unicode trie. +* It is a kind of compressed, serializable table of 16- or 32-bit values associated with +* Unicode code points (0..0x10ffff). +* This is the second common version of a Unicode trie (hence the name UTrie2). +* See utrie2.h for a comparison. +* +* This file contains only the builder code. +* See utrie2.c for the runtime and enumeration code. +*/ +// #define UTRIE2_DEBUG +#ifdef UTRIE2_DEBUG +# include <stdio.h> +#endif +// #define UCPTRIE_DEBUG + +#include "unicode/utypes.h" +#ifdef UCPTRIE_DEBUG +#include "unicode/ucptrie.h" +#include "unicode/umutablecptrie.h" +#include "ucptrie_impl.h" +#endif +#include "cmemory.h" +#include "utrie2.h" +#include "utrie2_impl.h" + +#include "utrie.h" // for utrie2_fromUTrie() + +/* Implementation notes ----------------------------------------------------- */ + +/* + * The UTRIE2_SHIFT_1, UTRIE2_SHIFT_2, UTRIE2_INDEX_SHIFT and other values + * have been chosen to minimize trie sizes overall. + * Most of the code is flexible enough to work with a range of values, + * within certain limits. + * + * Exception: Support for separate values for lead surrogate code _units_ + * vs. code _points_ was added after the constants were fixed, + * and has not been tested nor particularly designed for different constant values. + * (Especially the utrie2_enum() code that jumps to the special LSCP index-2 + * part and back.) + * + * Requires UTRIE2_SHIFT_2<=6. Otherwise 0xc0 which is the top of the ASCII-linear data + * including the bad-UTF-8-data block is not a multiple of UTRIE2_DATA_BLOCK_LENGTH + * and map[block>>UTRIE2_SHIFT_2] (used in reference counting and compaction + * remapping) stops working. + * + * Requires UTRIE2_SHIFT_1>=10 because utrie2_enumForLeadSurrogate() + * assumes that a single index-2 block is used for 0x400 code points + * corresponding to one lead surrogate. + * + * Requires UTRIE2_SHIFT_1<=16. Otherwise one single index-2 block contains + * more than one Unicode plane, and the split of the index-2 table into a BMP + * part and a supplementary part, with a gap in between, would not work. + * + * Requires UTRIE2_INDEX_SHIFT>=1 not because of the code but because + * there is data with more than 64k distinct values, + * for example for Unihan collation with a separate collation weight per + * Han character. + */ + +/* Building a trie ----------------------------------------------------------*/ + +enum { + /** The null index-2 block, following the gap in the index-2 table. */ + UNEWTRIE2_INDEX_2_NULL_OFFSET=UNEWTRIE2_INDEX_GAP_OFFSET+UNEWTRIE2_INDEX_GAP_LENGTH, + + /** The start of allocated index-2 blocks. */ + UNEWTRIE2_INDEX_2_START_OFFSET=UNEWTRIE2_INDEX_2_NULL_OFFSET+UTRIE2_INDEX_2_BLOCK_LENGTH, + + /** + * The null data block. + * Length 64=0x40 even if UTRIE2_DATA_BLOCK_LENGTH is smaller, + * to work with 6-bit trail bytes from 2-byte UTF-8. + */ + UNEWTRIE2_DATA_NULL_OFFSET=UTRIE2_DATA_START_OFFSET, + + /** The start of allocated data blocks. */ + UNEWTRIE2_DATA_START_OFFSET=UNEWTRIE2_DATA_NULL_OFFSET+0x40, + + /** + * The start of data blocks for U+0800 and above. + * Below, compaction uses a block length of 64 for 2-byte UTF-8. + * From here on, compaction uses UTRIE2_DATA_BLOCK_LENGTH. + * Data values for 0x780 code points beyond ASCII. + */ + UNEWTRIE2_DATA_0800_OFFSET=UNEWTRIE2_DATA_START_OFFSET+0x780 +}; + +/* Start with allocation of 16k data entries. */ +#define UNEWTRIE2_INITIAL_DATA_LENGTH ((int32_t)1<<14) + +/* Grow about 8x each time. */ +#define UNEWTRIE2_MEDIUM_DATA_LENGTH ((int32_t)1<<17) + +static int32_t +allocIndex2Block(UNewTrie2 *trie); + +U_CAPI UTrie2 * U_EXPORT2 +utrie2_open(uint32_t initialValue, uint32_t errorValue, UErrorCode *pErrorCode) { + UTrie2 *trie; + UNewTrie2 *newTrie; + uint32_t *data; + int32_t i, j; + + if(U_FAILURE(*pErrorCode)) { + return NULL; + } + + trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2)); + newTrie=(UNewTrie2 *)uprv_malloc(sizeof(UNewTrie2)); + data=(uint32_t *)uprv_malloc(UNEWTRIE2_INITIAL_DATA_LENGTH*4); + if(trie==NULL || newTrie==NULL || data==NULL) { + uprv_free(trie); + uprv_free(newTrie); + uprv_free(data); + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + return 0; + } + + uprv_memset(trie, 0, sizeof(UTrie2)); + trie->initialValue=initialValue; + trie->errorValue=errorValue; + trie->highStart=0x110000; + trie->newTrie=newTrie; +#ifdef UTRIE2_DEBUG + trie->name="open"; +#endif + + newTrie->data=data; +#ifdef UCPTRIE_DEBUG + newTrie->t3=umutablecptrie_open(initialValue, errorValue, pErrorCode); +#endif + newTrie->dataCapacity=UNEWTRIE2_INITIAL_DATA_LENGTH; + newTrie->initialValue=initialValue; + newTrie->errorValue=errorValue; + newTrie->highStart=0x110000; + newTrie->firstFreeBlock=0; /* no free block in the list */ + newTrie->isCompacted=FALSE; + + /* + * preallocate and reset + * - ASCII + * - the bad-UTF-8-data block + * - the null data block + */ + for(i=0; i<0x80; ++i) { + newTrie->data[i]=initialValue; + } + for(; i<0xc0; ++i) { + newTrie->data[i]=errorValue; + } + for(i=UNEWTRIE2_DATA_NULL_OFFSET; i<UNEWTRIE2_DATA_START_OFFSET; ++i) { + newTrie->data[i]=initialValue; + } + newTrie->dataNullOffset=UNEWTRIE2_DATA_NULL_OFFSET; + newTrie->dataLength=UNEWTRIE2_DATA_START_OFFSET; + + /* set the index-2 indexes for the 2=0x80>>UTRIE2_SHIFT_2 ASCII data blocks */ + for(i=0, j=0; j<0x80; ++i, j+=UTRIE2_DATA_BLOCK_LENGTH) { + newTrie->index2[i]=j; + newTrie->map[i]=1; + } + /* reference counts for the bad-UTF-8-data block */ + for(; j<0xc0; ++i, j+=UTRIE2_DATA_BLOCK_LENGTH) { + newTrie->map[i]=0; + } + /* + * Reference counts for the null data block: all blocks except for the ASCII blocks. + * Plus 1 so that we don't drop this block during compaction. + * Plus as many as needed for lead surrogate code points. + */ + /* i==newTrie->dataNullOffset */ + newTrie->map[i++]= + (0x110000>>UTRIE2_SHIFT_2)- + (0x80>>UTRIE2_SHIFT_2)+ + 1+ + UTRIE2_LSCP_INDEX_2_LENGTH; + j+=UTRIE2_DATA_BLOCK_LENGTH; + for(; j<UNEWTRIE2_DATA_START_OFFSET; ++i, j+=UTRIE2_DATA_BLOCK_LENGTH) { + newTrie->map[i]=0; + } + + /* + * set the remaining indexes in the BMP index-2 block + * to the null data block + */ + for(i=0x80>>UTRIE2_SHIFT_2; i<UTRIE2_INDEX_2_BMP_LENGTH; ++i) { + newTrie->index2[i]=UNEWTRIE2_DATA_NULL_OFFSET; + } + + /* + * Fill the index gap with impossible values so that compaction + * does not overlap other index-2 blocks with the gap. + */ + for(i=0; i<UNEWTRIE2_INDEX_GAP_LENGTH; ++i) { + newTrie->index2[UNEWTRIE2_INDEX_GAP_OFFSET+i]=-1; + } + + /* set the indexes in the null index-2 block */ + for(i=0; i<UTRIE2_INDEX_2_BLOCK_LENGTH; ++i) { + newTrie->index2[UNEWTRIE2_INDEX_2_NULL_OFFSET+i]=UNEWTRIE2_DATA_NULL_OFFSET; + } + newTrie->index2NullOffset=UNEWTRIE2_INDEX_2_NULL_OFFSET; + newTrie->index2Length=UNEWTRIE2_INDEX_2_START_OFFSET; + + /* set the index-1 indexes for the linear index-2 block */ + for(i=0, j=0; + i<UTRIE2_OMITTED_BMP_INDEX_1_LENGTH; + ++i, j+=UTRIE2_INDEX_2_BLOCK_LENGTH + ) { + newTrie->index1[i]=j; + } + + /* set the remaining index-1 indexes to the null index-2 block */ + for(; i<UNEWTRIE2_INDEX_1_LENGTH; ++i) { + newTrie->index1[i]=UNEWTRIE2_INDEX_2_NULL_OFFSET; + } + + /* + * Preallocate and reset data for U+0080..U+07ff, + * for 2-byte UTF-8 which will be compacted in 64-blocks + * even if UTRIE2_DATA_BLOCK_LENGTH is smaller. + */ + for(i=0x80; i<0x800; i+=UTRIE2_DATA_BLOCK_LENGTH) { + utrie2_set32(trie, i, initialValue, pErrorCode); + } + + return trie; +} + +static UNewTrie2 * +cloneBuilder(const UNewTrie2 *other) { + UNewTrie2 *trie; + + trie=(UNewTrie2 *)uprv_malloc(sizeof(UNewTrie2)); + if(trie==NULL) { + return NULL; + } + + trie->data=(uint32_t *)uprv_malloc(other->dataCapacity*4); + if(trie->data==NULL) { + uprv_free(trie); + return NULL; + } +#ifdef UCPTRIE_DEBUG + if(other->t3==nullptr) { + trie->t3=nullptr; + } else { + UErrorCode errorCode=U_ZERO_ERROR; + trie->t3=umutablecptrie_clone(other->t3, &errorCode); + } +#endif + trie->dataCapacity=other->dataCapacity; + + /* clone data */ + uprv_memcpy(trie->index1, other->index1, sizeof(trie->index1)); + uprv_memcpy(trie->index2, other->index2, (size_t)other->index2Length*4); + trie->index2NullOffset=other->index2NullOffset; + trie->index2Length=other->index2Length; + + uprv_memcpy(trie->data, other->data, (size_t)other->dataLength*4); + trie->dataNullOffset=other->dataNullOffset; + trie->dataLength=other->dataLength; + + /* reference counters */ + if(other->isCompacted) { + trie->firstFreeBlock=0; + } else { + uprv_memcpy(trie->map, other->map, ((size_t)other->dataLength>>UTRIE2_SHIFT_2)*4); + trie->firstFreeBlock=other->firstFreeBlock; + } + + trie->initialValue=other->initialValue; + trie->errorValue=other->errorValue; + trie->highStart=other->highStart; + trie->isCompacted=other->isCompacted; + + return trie; +} + +U_CAPI UTrie2 * U_EXPORT2 +utrie2_clone(const UTrie2 *other, UErrorCode *pErrorCode) { + UTrie2 *trie; + + if(U_FAILURE(*pErrorCode)) { + return NULL; + } + if(other==NULL || (other->memory==NULL && other->newTrie==NULL)) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + + trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2)); + if(trie==NULL) { + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + uprv_memcpy(trie, other, sizeof(UTrie2)); + + if(other->memory!=NULL) { + trie->memory=uprv_malloc(other->length); + if(trie->memory!=NULL) { + trie->isMemoryOwned=TRUE; + uprv_memcpy(trie->memory, other->memory, other->length); + + /* make the clone's pointers point to its own memory */ + trie->index=(uint16_t *)trie->memory+(other->index-(uint16_t *)other->memory); + if(other->data16!=NULL) { + trie->data16=(uint16_t *)trie->memory+(other->data16-(uint16_t *)other->memory); + } + if(other->data32!=NULL) { + trie->data32=(uint32_t *)trie->memory+(other->data32-(uint32_t *)other->memory); + } + } + } else /* other->newTrie!=NULL */ { + trie->newTrie=cloneBuilder(other->newTrie); + } + + if(trie->memory==NULL && trie->newTrie==NULL) { + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + uprv_free(trie); + trie=NULL; + } + return trie; +} + +typedef struct NewTrieAndStatus { + UTrie2 *trie; + UErrorCode errorCode; + UBool exclusiveLimit; /* rather than inclusive range end */ +} NewTrieAndStatus; + +static UBool U_CALLCONV +copyEnumRange(const void *context, UChar32 start, UChar32 end, uint32_t value) { + NewTrieAndStatus *nt=(NewTrieAndStatus *)context; + if(value!=nt->trie->initialValue) { + if(nt->exclusiveLimit) { + --end; + } + if(start==end) { + utrie2_set32(nt->trie, start, value, &nt->errorCode); + } else { + utrie2_setRange32(nt->trie, start, end, value, TRUE, &nt->errorCode); + } + return U_SUCCESS(nt->errorCode); + } else { + return TRUE; + } +} + +#ifdef UTRIE2_DEBUG +static long countInitial(const UTrie2 *trie) { + uint32_t initialValue=trie->initialValue; + int32_t length=trie->dataLength; + long count=0; + if(trie->data16!=nullptr) { + for(int32_t i=0; i<length; ++i) { + if(trie->data16[i]==initialValue) { ++count; } + } + } else { + for(int32_t i=0; i<length; ++i) { + if(trie->data32[i]==initialValue) { ++count; } + } + } + return count; +} + +static void +utrie_printLengths(const UTrie *trie) { + long indexLength=trie->indexLength; + long dataLength=(long)trie->dataLength; + long totalLength=(long)sizeof(UTrieHeader)+indexLength*2+dataLength*(trie->data32!=NULL ? 4 : 2); + printf("**UTrieLengths** index:%6ld data:%6ld serialized:%6ld\n", + indexLength, dataLength, totalLength); +} + +static void +utrie2_printLengths(const UTrie2 *trie, const char *which) { + long indexLength=trie->indexLength; + long dataLength=(long)trie->dataLength; + long totalLength=(long)sizeof(UTrie2Header)+indexLength*2+dataLength*(trie->data32!=NULL ? 4 : 2); + printf("**UTrie2Lengths(%s %s)** index:%6ld data:%6ld countInitial:%6ld serialized:%6ld\n", + which, trie->name, indexLength, dataLength, countInitial(trie), totalLength); +} +#endif + +U_CAPI UTrie2 * U_EXPORT2 +utrie2_cloneAsThawed(const UTrie2 *other, UErrorCode *pErrorCode) { + NewTrieAndStatus context; + UChar lead; + + if(U_FAILURE(*pErrorCode)) { + return NULL; + } + if(other==NULL || (other->memory==NULL && other->newTrie==NULL)) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + if(other->newTrie!=NULL && !other->newTrie->isCompacted) { + return utrie2_clone(other, pErrorCode); /* clone an unfrozen trie */ + } + + /* Clone the frozen trie by enumerating it and building a new one. */ + context.trie=utrie2_open(other->initialValue, other->errorValue, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + return NULL; + } + context.exclusiveLimit=FALSE; + context.errorCode=*pErrorCode; + utrie2_enum(other, NULL, copyEnumRange, &context); + *pErrorCode=context.errorCode; + for(lead=0xd800; lead<0xdc00; ++lead) { + uint32_t value; + if(other->data32==NULL) { + value=UTRIE2_GET16_FROM_U16_SINGLE_LEAD(other, lead); + } else { + value=UTRIE2_GET32_FROM_U16_SINGLE_LEAD(other, lead); + } + if(value!=other->initialValue) { + utrie2_set32ForLeadSurrogateCodeUnit(context.trie, lead, value, pErrorCode); + } + } + if(U_FAILURE(*pErrorCode)) { + utrie2_close(context.trie); + context.trie=NULL; + } + return context.trie; +} + +/* Almost the same as utrie2_cloneAsThawed() but copies a UTrie and freezes the clone. */ +U_CAPI UTrie2 * U_EXPORT2 +utrie2_fromUTrie(const UTrie *trie1, uint32_t errorValue, UErrorCode *pErrorCode) { + NewTrieAndStatus context; + UChar lead; + + if(U_FAILURE(*pErrorCode)) { + return NULL; + } + if(trie1==NULL) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + context.trie=utrie2_open(trie1->initialValue, errorValue, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + return NULL; + } + context.exclusiveLimit=TRUE; + context.errorCode=*pErrorCode; + utrie_enum(trie1, NULL, copyEnumRange, &context); + *pErrorCode=context.errorCode; + for(lead=0xd800; lead<0xdc00; ++lead) { + uint32_t value; + if(trie1->data32==NULL) { + value=UTRIE_GET16_FROM_LEAD(trie1, lead); + } else { + value=UTRIE_GET32_FROM_LEAD(trie1, lead); + } + if(value!=trie1->initialValue) { + utrie2_set32ForLeadSurrogateCodeUnit(context.trie, lead, value, pErrorCode); + } + } + if(U_SUCCESS(*pErrorCode)) { + utrie2_freeze(context.trie, + trie1->data32!=NULL ? UTRIE2_32_VALUE_BITS : UTRIE2_16_VALUE_BITS, + pErrorCode); + } +#ifdef UTRIE2_DEBUG + if(U_SUCCESS(*pErrorCode)) { + utrie_printLengths(trie1); + utrie2_printLengths(context.trie, "fromUTrie"); + } +#endif + if(U_FAILURE(*pErrorCode)) { + utrie2_close(context.trie); + context.trie=NULL; + } + return context.trie; +} + +static inline UBool +isInNullBlock(UNewTrie2 *trie, UChar32 c, UBool forLSCP) { + int32_t i2, block; + + if(U_IS_LEAD(c) && forLSCP) { + i2=(UTRIE2_LSCP_INDEX_2_OFFSET-(0xd800>>UTRIE2_SHIFT_2))+ + (c>>UTRIE2_SHIFT_2); + } else { + i2=trie->index1[c>>UTRIE2_SHIFT_1]+ + ((c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK); + } + block=trie->index2[i2]; + return (UBool)(block==trie->dataNullOffset); +} + +static int32_t +allocIndex2Block(UNewTrie2 *trie) { + int32_t newBlock, newTop; + + newBlock=trie->index2Length; + newTop=newBlock+UTRIE2_INDEX_2_BLOCK_LENGTH; + if(newTop>UPRV_LENGTHOF(trie->index2)) { + /* + * Should never occur. + * Either UTRIE2_MAX_BUILD_TIME_INDEX_LENGTH is incorrect, + * or the code writes more values than should be possible. + */ + return -1; + } + trie->index2Length=newTop; + uprv_memcpy(trie->index2+newBlock, trie->index2+trie->index2NullOffset, UTRIE2_INDEX_2_BLOCK_LENGTH*4); + return newBlock; +} + +static int32_t +getIndex2Block(UNewTrie2 *trie, UChar32 c, UBool forLSCP) { + int32_t i1, i2; + + if(U_IS_LEAD(c) && forLSCP) { + return UTRIE2_LSCP_INDEX_2_OFFSET; + } + + i1=c>>UTRIE2_SHIFT_1; + i2=trie->index1[i1]; + if(i2==trie->index2NullOffset) { + i2=allocIndex2Block(trie); + if(i2<0) { + return -1; /* program error */ + } + trie->index1[i1]=i2; + } + return i2; +} + +static int32_t +allocDataBlock(UNewTrie2 *trie, int32_t copyBlock) { + int32_t newBlock, newTop; + + if(trie->firstFreeBlock!=0) { + /* get the first free block */ + newBlock=trie->firstFreeBlock; + trie->firstFreeBlock=-trie->map[newBlock>>UTRIE2_SHIFT_2]; + } else { + /* get a new block from the high end */ + newBlock=trie->dataLength; + newTop=newBlock+UTRIE2_DATA_BLOCK_LENGTH; + if(newTop>trie->dataCapacity) { + /* out of memory in the data array */ + int32_t capacity; + uint32_t *data; + + if(trie->dataCapacity<UNEWTRIE2_MEDIUM_DATA_LENGTH) { + capacity=UNEWTRIE2_MEDIUM_DATA_LENGTH; + } else if(trie->dataCapacity<UNEWTRIE2_MAX_DATA_LENGTH) { + capacity=UNEWTRIE2_MAX_DATA_LENGTH; + } else { + /* + * Should never occur. + * Either UNEWTRIE2_MAX_DATA_LENGTH is incorrect, + * or the code writes more values than should be possible. + */ + return -1; + } + data=(uint32_t *)uprv_malloc(capacity*4); + if(data==NULL) { + return -1; + } + uprv_memcpy(data, trie->data, (size_t)trie->dataLength*4); + uprv_free(trie->data); + trie->data=data; + trie->dataCapacity=capacity; + } + trie->dataLength=newTop; + } + uprv_memcpy(trie->data+newBlock, trie->data+copyBlock, UTRIE2_DATA_BLOCK_LENGTH*4); + trie->map[newBlock>>UTRIE2_SHIFT_2]=0; + return newBlock; +} + +/* call when the block's reference counter reaches 0 */ +static void +releaseDataBlock(UNewTrie2 *trie, int32_t block) { + /* put this block at the front of the free-block chain */ + trie->map[block>>UTRIE2_SHIFT_2]=-trie->firstFreeBlock; + trie->firstFreeBlock=block; +} + +static inline UBool +isWritableBlock(UNewTrie2 *trie, int32_t block) { + return (UBool)(block!=trie->dataNullOffset && 1==trie->map[block>>UTRIE2_SHIFT_2]); +} + +static inline void +setIndex2Entry(UNewTrie2 *trie, int32_t i2, int32_t block) { + int32_t oldBlock; + ++trie->map[block>>UTRIE2_SHIFT_2]; /* increment first, in case block==oldBlock! */ + oldBlock=trie->index2[i2]; + if(0 == --trie->map[oldBlock>>UTRIE2_SHIFT_2]) { + releaseDataBlock(trie, oldBlock); + } + trie->index2[i2]=block; +} + +/** + * No error checking for illegal arguments. + * + * @return -1 if no new data block available (out of memory in data array) + * @internal + */ +static int32_t +getDataBlock(UNewTrie2 *trie, UChar32 c, UBool forLSCP) { + int32_t i2, oldBlock, newBlock; + + i2=getIndex2Block(trie, c, forLSCP); + if(i2<0) { + return -1; /* program error */ + } + + i2+=(c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK; + oldBlock=trie->index2[i2]; + if(isWritableBlock(trie, oldBlock)) { + return oldBlock; + } + + /* allocate a new data block */ + newBlock=allocDataBlock(trie, oldBlock); + if(newBlock<0) { + /* out of memory in the data array */ + return -1; + } + setIndex2Entry(trie, i2, newBlock); + return newBlock; +} + +/** + * @return TRUE if the value was successfully set + */ +static void +set32(UNewTrie2 *trie, + UChar32 c, UBool forLSCP, uint32_t value, + UErrorCode *pErrorCode) { + int32_t block; + + if(trie==NULL || trie->isCompacted) { + *pErrorCode=U_NO_WRITE_PERMISSION; + return; + } +#ifdef UCPTRIE_DEBUG + umutablecptrie_set(trie->t3, c, value, pErrorCode); +#endif + + block=getDataBlock(trie, c, forLSCP); + if(block<0) { + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + return; + } + + trie->data[block+(c&UTRIE2_DATA_MASK)]=value; +} + +U_CAPI void U_EXPORT2 +utrie2_set32(UTrie2 *trie, UChar32 c, uint32_t value, UErrorCode *pErrorCode) { + if(U_FAILURE(*pErrorCode)) { + return; + } + if((uint32_t)c>0x10ffff) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return; + } + set32(trie->newTrie, c, TRUE, value, pErrorCode); +} + +U_CAPI void U_EXPORT2 +utrie2_set32ForLeadSurrogateCodeUnit(UTrie2 *trie, + UChar32 c, uint32_t value, + UErrorCode *pErrorCode) { + if(U_FAILURE(*pErrorCode)) { + return; + } + if(!U_IS_LEAD(c)) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return; + } + set32(trie->newTrie, c, FALSE, value, pErrorCode); +} + +static void +writeBlock(uint32_t *block, uint32_t value) { + uint32_t *limit=block+UTRIE2_DATA_BLOCK_LENGTH; + while(block<limit) { + *block++=value; + } +} + +/** + * initialValue is ignored if overwrite=TRUE + * @internal + */ +static void +fillBlock(uint32_t *block, UChar32 start, UChar32 limit, + uint32_t value, uint32_t initialValue, UBool overwrite) { + uint32_t *pLimit; + + pLimit=block+limit; + block+=start; + if(overwrite) { + while(block<pLimit) { + *block++=value; + } + } else { + while(block<pLimit) { + if(*block==initialValue) { + *block=value; + } + ++block; + } + } +} + +U_CAPI void U_EXPORT2 +utrie2_setRange32(UTrie2 *trie, + UChar32 start, UChar32 end, + uint32_t value, UBool overwrite, + UErrorCode *pErrorCode) { + /* + * repeat value in [start..end] + * mark index values for repeat-data blocks by setting bit 31 of the index values + * fill around existing values if any, if(overwrite) + */ + UNewTrie2 *newTrie; + int32_t block, rest, repeatBlock; + UChar32 limit; + + if(U_FAILURE(*pErrorCode)) { + return; + } + if((uint32_t)start>0x10ffff || (uint32_t)end>0x10ffff || start>end) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return; + } + newTrie=trie->newTrie; + if(newTrie==NULL || newTrie->isCompacted) { + *pErrorCode=U_NO_WRITE_PERMISSION; + return; + } +#ifdef UCPTRIE_DEBUG + umutablecptrie_setRange(newTrie->t3, start, end, value, pErrorCode); +#endif + if(!overwrite && value==newTrie->initialValue) { + return; /* nothing to do */ + } + + limit=end+1; + if(start&UTRIE2_DATA_MASK) { + UChar32 nextStart; + + /* set partial block at [start..following block boundary[ */ + block=getDataBlock(newTrie, start, TRUE); + if(block<0) { + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + return; + } + + nextStart=(start+UTRIE2_DATA_MASK)&~UTRIE2_DATA_MASK; + if(nextStart<=limit) { + fillBlock(newTrie->data+block, start&UTRIE2_DATA_MASK, UTRIE2_DATA_BLOCK_LENGTH, + value, newTrie->initialValue, overwrite); + start=nextStart; + } else { + fillBlock(newTrie->data+block, start&UTRIE2_DATA_MASK, limit&UTRIE2_DATA_MASK, + value, newTrie->initialValue, overwrite); + return; + } + } + + /* number of positions in the last, partial block */ + rest=limit&UTRIE2_DATA_MASK; + + /* round down limit to a block boundary */ + limit&=~UTRIE2_DATA_MASK; + + /* iterate over all-value blocks */ + if(value==newTrie->initialValue) { + repeatBlock=newTrie->dataNullOffset; + } else { + repeatBlock=-1; + } + + while(start<limit) { + int32_t i2; + UBool setRepeatBlock=FALSE; + + if(value==newTrie->initialValue && isInNullBlock(newTrie, start, TRUE)) { + start+=UTRIE2_DATA_BLOCK_LENGTH; /* nothing to do */ + continue; + } + + /* get index value */ + i2=getIndex2Block(newTrie, start, TRUE); + if(i2<0) { + *pErrorCode=U_INTERNAL_PROGRAM_ERROR; + return; + } + i2+=(start>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK; + block=newTrie->index2[i2]; + if(isWritableBlock(newTrie, block)) { + /* already allocated */ + if(overwrite && block>=UNEWTRIE2_DATA_0800_OFFSET) { + /* + * We overwrite all values, and it's not a + * protected (ASCII-linear or 2-byte UTF-8) block: + * replace with the repeatBlock. + */ + setRepeatBlock=TRUE; + } else { + /* !overwrite, or protected block: just write the values into this block */ + fillBlock(newTrie->data+block, + 0, UTRIE2_DATA_BLOCK_LENGTH, + value, newTrie->initialValue, overwrite); + } + } else if(newTrie->data[block]!=value && (overwrite || block==newTrie->dataNullOffset)) { + /* + * Set the repeatBlock instead of the null block or previous repeat block: + * + * If !isWritableBlock() then all entries in the block have the same value + * because it's the null block or a range block (the repeatBlock from a previous + * call to utrie2_setRange32()). + * No other blocks are used multiple times before compacting. + * + * The null block is the only non-writable block with the initialValue because + * of the repeatBlock initialization above. (If value==initialValue, then + * the repeatBlock will be the null data block.) + * + * We set our repeatBlock if the desired value differs from the block's value, + * and if we overwrite any data or if the data is all initial values + * (which is the same as the block being the null block, see above). + */ + setRepeatBlock=TRUE; + } + if(setRepeatBlock) { + if(repeatBlock>=0) { + setIndex2Entry(newTrie, i2, repeatBlock); + } else { + /* create and set and fill the repeatBlock */ + repeatBlock=getDataBlock(newTrie, start, TRUE); + if(repeatBlock<0) { + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + return; + } + writeBlock(newTrie->data+repeatBlock, value); + } + } + + start+=UTRIE2_DATA_BLOCK_LENGTH; + } + + if(rest>0) { + /* set partial block at [last block boundary..limit[ */ + block=getDataBlock(newTrie, start, TRUE); + if(block<0) { + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + return; + } + + fillBlock(newTrie->data+block, 0, rest, value, newTrie->initialValue, overwrite); + } + + return; +} + +/* compaction --------------------------------------------------------------- */ + +static inline UBool +equal_int32(const int32_t *s, const int32_t *t, int32_t length) { + while(length>0 && *s==*t) { + ++s; + ++t; + --length; + } + return (UBool)(length==0); +} + +static inline UBool +equal_uint32(const uint32_t *s, const uint32_t *t, int32_t length) { + while(length>0 && *s==*t) { + ++s; + ++t; + --length; + } + return (UBool)(length==0); +} + +static int32_t +findSameIndex2Block(const int32_t *idx, int32_t index2Length, int32_t otherBlock) { + int32_t block; + + /* ensure that we do not even partially get past index2Length */ + index2Length-=UTRIE2_INDEX_2_BLOCK_LENGTH; + + for(block=0; block<=index2Length; ++block) { + if(equal_int32(idx+block, idx+otherBlock, UTRIE2_INDEX_2_BLOCK_LENGTH)) { + return block; + } + } + return -1; +} + +static int32_t +findSameDataBlock(const uint32_t *data, int32_t dataLength, int32_t otherBlock, int32_t blockLength) { + int32_t block; + + /* ensure that we do not even partially get past dataLength */ + dataLength-=blockLength; + + for(block=0; block<=dataLength; block+=UTRIE2_DATA_GRANULARITY) { + if(equal_uint32(data+block, data+otherBlock, blockLength)) { + return block; + } + } + return -1; +} + +/* + * Find the start of the last range in the trie by enumerating backward. + * Indexes for supplementary code points higher than this will be omitted. + */ +static UChar32 +findHighStart(UNewTrie2 *trie, uint32_t highValue) { + const uint32_t *data32; + + uint32_t value, initialValue; + UChar32 c, prev; + int32_t i1, i2, j, i2Block, prevI2Block, index2NullOffset, block, prevBlock, nullBlock; + + data32=trie->data; + initialValue=trie->initialValue; + + index2NullOffset=trie->index2NullOffset; + nullBlock=trie->dataNullOffset; + + /* set variables for previous range */ + if(highValue==initialValue) { + prevI2Block=index2NullOffset; + prevBlock=nullBlock; + } else { + prevI2Block=-1; + prevBlock=-1; + } + prev=0x110000; + + /* enumerate index-2 blocks */ + i1=UNEWTRIE2_INDEX_1_LENGTH; + c=prev; + while(c>0) { + i2Block=trie->index1[--i1]; + if(i2Block==prevI2Block) { + /* the index-2 block is the same as the previous one, and filled with highValue */ + c-=UTRIE2_CP_PER_INDEX_1_ENTRY; + continue; + } + prevI2Block=i2Block; + if(i2Block==index2NullOffset) { + /* this is the null index-2 block */ + if(highValue!=initialValue) { + return c; + } + c-=UTRIE2_CP_PER_INDEX_1_ENTRY; + } else { + /* enumerate data blocks for one index-2 block */ + for(i2=UTRIE2_INDEX_2_BLOCK_LENGTH; i2>0;) { + block=trie->index2[i2Block+ --i2]; + if(block==prevBlock) { + /* the block is the same as the previous one, and filled with highValue */ + c-=UTRIE2_DATA_BLOCK_LENGTH; + continue; + } + prevBlock=block; + if(block==nullBlock) { + /* this is the null data block */ + if(highValue!=initialValue) { + return c; + } + c-=UTRIE2_DATA_BLOCK_LENGTH; + } else { + for(j=UTRIE2_DATA_BLOCK_LENGTH; j>0;) { + value=data32[block+ --j]; + if(value!=highValue) { + return c; + } + --c; + } + } + } + } + } + + /* deliver last range */ + return 0; +} + +/* + * Compact a build-time trie. + * + * The compaction + * - removes blocks that are identical with earlier ones + * - overlaps adjacent blocks as much as possible (if overlap==TRUE) + * - moves blocks in steps of the data granularity + * - moves and overlaps blocks that overlap with multiple values in the overlap region + * + * It does not + * - try to move and overlap blocks that are not already adjacent + */ +static void +compactData(UNewTrie2 *trie) { +#ifdef UTRIE2_DEBUG + int32_t countSame=0, sumOverlaps=0; +#endif + + int32_t start, newStart, movedStart; + int32_t blockLength, overlap; + int32_t i, mapIndex, blockCount; + + /* do not compact linear-ASCII data */ + newStart=UTRIE2_DATA_START_OFFSET; + for(start=0, i=0; start<newStart; start+=UTRIE2_DATA_BLOCK_LENGTH, ++i) { + trie->map[i]=start; + } + + /* + * Start with a block length of 64 for 2-byte UTF-8, + * then switch to UTRIE2_DATA_BLOCK_LENGTH. + */ + blockLength=64; + blockCount=blockLength>>UTRIE2_SHIFT_2; + for(start=newStart; start<trie->dataLength;) { + /* + * start: index of first entry of current block + * newStart: index where the current block is to be moved + * (right after current end of already-compacted data) + */ + if(start==UNEWTRIE2_DATA_0800_OFFSET) { + blockLength=UTRIE2_DATA_BLOCK_LENGTH; + blockCount=1; + } + + /* skip blocks that are not used */ + if(trie->map[start>>UTRIE2_SHIFT_2]<=0) { + /* advance start to the next block */ + start+=blockLength; + + /* leave newStart with the previous block! */ + continue; + } + + /* search for an identical block */ + if( (movedStart=findSameDataBlock(trie->data, newStart, start, blockLength)) + >=0 + ) { +#ifdef UTRIE2_DEBUG + ++countSame; +#endif + /* found an identical block, set the other block's index value for the current block */ + for(i=blockCount, mapIndex=start>>UTRIE2_SHIFT_2; i>0; --i) { + trie->map[mapIndex++]=movedStart; + movedStart+=UTRIE2_DATA_BLOCK_LENGTH; + } + + /* advance start to the next block */ + start+=blockLength; + + /* leave newStart with the previous block! */ + continue; + } + + /* see if the beginning of this block can be overlapped with the end of the previous block */ + /* look for maximum overlap (modulo granularity) with the previous, adjacent block */ + for(overlap=blockLength-UTRIE2_DATA_GRANULARITY; + overlap>0 && !equal_uint32(trie->data+(newStart-overlap), trie->data+start, overlap); + overlap-=UTRIE2_DATA_GRANULARITY) {} + +#ifdef UTRIE2_DEBUG + sumOverlaps+=overlap; +#endif + if(overlap>0 || newStart<start) { + /* some overlap, or just move the whole block */ + movedStart=newStart-overlap; + for(i=blockCount, mapIndex=start>>UTRIE2_SHIFT_2; i>0; --i) { + trie->map[mapIndex++]=movedStart; + movedStart+=UTRIE2_DATA_BLOCK_LENGTH; + } + + /* move the non-overlapping indexes to their new positions */ + start+=overlap; + for(i=blockLength-overlap; i>0; --i) { + trie->data[newStart++]=trie->data[start++]; + } + } else /* no overlap && newStart==start */ { + for(i=blockCount, mapIndex=start>>UTRIE2_SHIFT_2; i>0; --i) { + trie->map[mapIndex++]=start; + start+=UTRIE2_DATA_BLOCK_LENGTH; + } + newStart=start; + } + } + + /* now adjust the index-2 table */ + for(i=0; i<trie->index2Length; ++i) { + if(i==UNEWTRIE2_INDEX_GAP_OFFSET) { + /* Gap indexes are invalid (-1). Skip over the gap. */ + i+=UNEWTRIE2_INDEX_GAP_LENGTH; + } + trie->index2[i]=trie->map[trie->index2[i]>>UTRIE2_SHIFT_2]; + } + trie->dataNullOffset=trie->map[trie->dataNullOffset>>UTRIE2_SHIFT_2]; + + /* ensure dataLength alignment */ + while((newStart&(UTRIE2_DATA_GRANULARITY-1))!=0) { + trie->data[newStart++]=trie->initialValue; + } + +#ifdef UTRIE2_DEBUG + /* we saved some space */ + printf("compacting UTrie2: count of 32-bit data words %lu->%lu countSame=%ld sumOverlaps=%ld\n", + (long)trie->dataLength, (long)newStart, (long)countSame, (long)sumOverlaps); +#endif + + trie->dataLength=newStart; +} + +static void +compactIndex2(UNewTrie2 *trie) { + int32_t i, start, newStart, movedStart, overlap; + + /* do not compact linear-BMP index-2 blocks */ + newStart=UTRIE2_INDEX_2_BMP_LENGTH; + for(start=0, i=0; start<newStart; start+=UTRIE2_INDEX_2_BLOCK_LENGTH, ++i) { + trie->map[i]=start; + } + + /* Reduce the index table gap to what will be needed at runtime. */ + newStart+=UTRIE2_UTF8_2B_INDEX_2_LENGTH+((trie->highStart-0x10000)>>UTRIE2_SHIFT_1); + + for(start=UNEWTRIE2_INDEX_2_NULL_OFFSET; start<trie->index2Length;) { + /* + * start: index of first entry of current block + * newStart: index where the current block is to be moved + * (right after current end of already-compacted data) + */ + + /* search for an identical block */ + if( (movedStart=findSameIndex2Block(trie->index2, newStart, start)) + >=0 + ) { + /* found an identical block, set the other block's index value for the current block */ + trie->map[start>>UTRIE2_SHIFT_1_2]=movedStart; + + /* advance start to the next block */ + start+=UTRIE2_INDEX_2_BLOCK_LENGTH; + + /* leave newStart with the previous block! */ + continue; + } + + /* see if the beginning of this block can be overlapped with the end of the previous block */ + /* look for maximum overlap with the previous, adjacent block */ + for(overlap=UTRIE2_INDEX_2_BLOCK_LENGTH-1; + overlap>0 && !equal_int32(trie->index2+(newStart-overlap), trie->index2+start, overlap); + --overlap) {} + + if(overlap>0 || newStart<start) { + /* some overlap, or just move the whole block */ + trie->map[start>>UTRIE2_SHIFT_1_2]=newStart-overlap; + + /* move the non-overlapping indexes to their new positions */ + start+=overlap; + for(i=UTRIE2_INDEX_2_BLOCK_LENGTH-overlap; i>0; --i) { + trie->index2[newStart++]=trie->index2[start++]; + } + } else /* no overlap && newStart==start */ { + trie->map[start>>UTRIE2_SHIFT_1_2]=start; + start+=UTRIE2_INDEX_2_BLOCK_LENGTH; + newStart=start; + } + } + + /* now adjust the index-1 table */ + for(i=0; i<UNEWTRIE2_INDEX_1_LENGTH; ++i) { + trie->index1[i]=trie->map[trie->index1[i]>>UTRIE2_SHIFT_1_2]; + } + trie->index2NullOffset=trie->map[trie->index2NullOffset>>UTRIE2_SHIFT_1_2]; + + /* + * Ensure data table alignment: + * Needs to be granularity-aligned for 16-bit trie + * (so that dataMove will be down-shiftable), + * and 2-aligned for uint32_t data. + */ + while((newStart&((UTRIE2_DATA_GRANULARITY-1)|1))!=0) { + /* Arbitrary value: 0x3fffc not possible for real data. */ + trie->index2[newStart++]=(int32_t)0xffff<<UTRIE2_INDEX_SHIFT; + } + +#ifdef UTRIE2_DEBUG + /* we saved some space */ + printf("compacting UTrie2: count of 16-bit index words %lu->%lu\n", + (long)trie->index2Length, (long)newStart); +#endif + + trie->index2Length=newStart; +} + +static void +compactTrie(UTrie2 *trie, UErrorCode *pErrorCode) { + UNewTrie2 *newTrie; + UChar32 highStart, suppHighStart; + uint32_t highValue; + + newTrie=trie->newTrie; + + /* find highStart and round it up */ + highValue=utrie2_get32(trie, 0x10ffff); + highStart=findHighStart(newTrie, highValue); + highStart=(highStart+(UTRIE2_CP_PER_INDEX_1_ENTRY-1))&~(UTRIE2_CP_PER_INDEX_1_ENTRY-1); + if(highStart==0x110000) { + highValue=trie->errorValue; + } + + /* + * Set trie->highStart only after utrie2_get32(trie, highStart). + * Otherwise utrie2_get32(trie, highStart) would try to read the highValue. + */ + trie->highStart=newTrie->highStart=highStart; + +#ifdef UTRIE2_DEBUG + printf("UTrie2: highStart U+%06lx highValue 0x%lx initialValue 0x%lx\n", + (long)highStart, (long)highValue, (long)trie->initialValue); +#endif + + if(highStart<0x110000) { + /* Blank out [highStart..10ffff] to release associated data blocks. */ + suppHighStart= highStart<=0x10000 ? 0x10000 : highStart; + utrie2_setRange32(trie, suppHighStart, 0x10ffff, trie->initialValue, TRUE, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + return; + } + } + + compactData(newTrie); + if(highStart>0x10000) { + compactIndex2(newTrie); +#ifdef UTRIE2_DEBUG + } else { + printf("UTrie2: highStart U+%04lx count of 16-bit index words %lu->%lu\n", + (long)highStart, (long)trie->newTrie->index2Length, (long)UTRIE2_INDEX_1_OFFSET); +#endif + } + + /* + * Store the highValue in the data array and round up the dataLength. + * Must be done after compactData() because that assumes that dataLength + * is a multiple of UTRIE2_DATA_BLOCK_LENGTH. + */ + newTrie->data[newTrie->dataLength++]=highValue; + while((newTrie->dataLength&(UTRIE2_DATA_GRANULARITY-1))!=0) { + newTrie->data[newTrie->dataLength++]=trie->initialValue; + } + + newTrie->isCompacted=TRUE; +} + +/* serialization ------------------------------------------------------------ */ + +/** + * Maximum length of the runtime index array. + * Limited by its own 16-bit index values, and by uint16_t UTrie2Header.indexLength. + * (The actual maximum length is lower, + * (0x110000>>UTRIE2_SHIFT_2)+UTRIE2_UTF8_2B_INDEX_2_LENGTH+UTRIE2_MAX_INDEX_1_LENGTH.) + */ +#define UTRIE2_MAX_INDEX_LENGTH 0xffff + +/** + * Maximum length of the runtime data array. + * Limited by 16-bit index values that are left-shifted by UTRIE2_INDEX_SHIFT, + * and by uint16_t UTrie2Header.shiftedDataLength. + */ +#define UTRIE2_MAX_DATA_LENGTH (0xffff<<UTRIE2_INDEX_SHIFT) + +/* Compact and internally serialize the trie. */ +U_CAPI void U_EXPORT2 +utrie2_freeze(UTrie2 *trie, UTrie2ValueBits valueBits, UErrorCode *pErrorCode) { + UNewTrie2 *newTrie; + UTrie2Header *header; + uint32_t *p; + uint16_t *dest16; + int32_t i, length; + int32_t allIndexesLength; + int32_t dataMove; /* >0 if the data is moved to the end of the index array */ + UChar32 highStart; + + /* argument check */ + if(U_FAILURE(*pErrorCode)) { + return; + } + if( trie==NULL || + valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits + ) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return; + } + newTrie=trie->newTrie; + if(newTrie==NULL) { + /* already frozen */ + UTrie2ValueBits frozenValueBits= + trie->data16!=NULL ? UTRIE2_16_VALUE_BITS : UTRIE2_32_VALUE_BITS; + if(valueBits!=frozenValueBits) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + } + return; + } + + /* compact if necessary */ + if(!newTrie->isCompacted) { + compactTrie(trie, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + return; + } + } + highStart=trie->highStart; + + if(highStart<=0x10000) { + allIndexesLength=UTRIE2_INDEX_1_OFFSET; + } else { + allIndexesLength=newTrie->index2Length; + } + if(valueBits==UTRIE2_16_VALUE_BITS) { + dataMove=allIndexesLength; + } else { + dataMove=0; + } + + /* are indexLength and dataLength within limits? */ + if( /* for unshifted indexLength */ + allIndexesLength>UTRIE2_MAX_INDEX_LENGTH || + /* for unshifted dataNullOffset */ + (dataMove+newTrie->dataNullOffset)>0xffff || + /* for unshifted 2-byte UTF-8 index-2 values */ + (dataMove+UNEWTRIE2_DATA_0800_OFFSET)>0xffff || + /* for shiftedDataLength */ + (dataMove+newTrie->dataLength)>UTRIE2_MAX_DATA_LENGTH + ) { + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return; + } + + /* calculate the total serialized length */ + length=sizeof(UTrie2Header)+allIndexesLength*2; + if(valueBits==UTRIE2_16_VALUE_BITS) { + length+=newTrie->dataLength*2; + } else { + length+=newTrie->dataLength*4; + } + + trie->memory=uprv_malloc(length); + if(trie->memory==NULL) { + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + return; + } + trie->length=length; + trie->isMemoryOwned=TRUE; + + trie->indexLength=allIndexesLength; + trie->dataLength=newTrie->dataLength; + if(highStart<=0x10000) { + trie->index2NullOffset=0xffff; + } else { + trie->index2NullOffset=static_cast<uint16_t>(UTRIE2_INDEX_2_OFFSET+newTrie->index2NullOffset); + } + trie->dataNullOffset=(uint16_t)(dataMove+newTrie->dataNullOffset); + trie->highValueIndex=dataMove+trie->dataLength-UTRIE2_DATA_GRANULARITY; + + /* set the header fields */ + header=(UTrie2Header *)trie->memory; + + header->signature=UTRIE2_SIG; /* "Tri2" */ + header->options=(uint16_t)valueBits; + + header->indexLength=(uint16_t)trie->indexLength; + header->shiftedDataLength=(uint16_t)(trie->dataLength>>UTRIE2_INDEX_SHIFT); + header->index2NullOffset=trie->index2NullOffset; + header->dataNullOffset=trie->dataNullOffset; + header->shiftedHighStart=(uint16_t)(highStart>>UTRIE2_SHIFT_1); + + /* fill the index and data arrays */ + dest16=(uint16_t *)(header+1); + trie->index=dest16; + + /* write the index-2 array values shifted right by UTRIE2_INDEX_SHIFT, after adding dataMove */ + p=(uint32_t *)newTrie->index2; + for(i=UTRIE2_INDEX_2_BMP_LENGTH; i>0; --i) { + *dest16++=(uint16_t)((dataMove + *p++)>>UTRIE2_INDEX_SHIFT); + } + + /* write UTF-8 2-byte index-2 values, not right-shifted */ + for(i=0; i<(0xc2-0xc0); ++i) { /* C0..C1 */ + *dest16++=(uint16_t)(dataMove+UTRIE2_BAD_UTF8_DATA_OFFSET); + } + for(; i<(0xe0-0xc0); ++i) { /* C2..DF */ + *dest16++=(uint16_t)(dataMove+newTrie->index2[i<<(6-UTRIE2_SHIFT_2)]); + } + + if(highStart>0x10000) { + int32_t index1Length=(highStart-0x10000)>>UTRIE2_SHIFT_1; + int32_t index2Offset=UTRIE2_INDEX_2_BMP_LENGTH+UTRIE2_UTF8_2B_INDEX_2_LENGTH+index1Length; + + /* write 16-bit index-1 values for supplementary code points */ + p=(uint32_t *)newTrie->index1+UTRIE2_OMITTED_BMP_INDEX_1_LENGTH; + for(i=index1Length; i>0; --i) { + *dest16++=(uint16_t)(UTRIE2_INDEX_2_OFFSET + *p++); + } + + /* + * write the index-2 array values for supplementary code points, + * shifted right by UTRIE2_INDEX_SHIFT, after adding dataMove + */ + p=(uint32_t *)newTrie->index2+index2Offset; + for(i=newTrie->index2Length-index2Offset; i>0; --i) { + *dest16++=(uint16_t)((dataMove + *p++)>>UTRIE2_INDEX_SHIFT); + } + } + + /* write the 16/32-bit data array */ + switch(valueBits) { + case UTRIE2_16_VALUE_BITS: + /* write 16-bit data values */ + trie->data16=dest16; + trie->data32=NULL; + p=newTrie->data; + for(i=newTrie->dataLength; i>0; --i) { + *dest16++=(uint16_t)*p++; + } + break; + case UTRIE2_32_VALUE_BITS: + /* write 32-bit data values */ + trie->data16=NULL; + trie->data32=(uint32_t *)dest16; + uprv_memcpy(dest16, newTrie->data, (size_t)newTrie->dataLength*4); + break; + default: + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return; + } + +#ifdef UTRIE2_DEBUG + utrie2_printLengths(trie, ""); +#endif + +#ifdef UCPTRIE_DEBUG + umutablecptrie_setName(newTrie->t3, trie->name); + ucptrie_close( + umutablecptrie_buildImmutable( + newTrie->t3, UCPTRIE_TYPE_FAST, (UCPTrieValueWidth)valueBits, pErrorCode)); +#endif + /* Delete the UNewTrie2. */ + uprv_free(newTrie->data); + uprv_free(newTrie); + trie->newTrie=NULL; +} diff --git a/thirdparty/icu4c/common/utrie2_impl.h b/thirdparty/icu4c/common/utrie2_impl.h new file mode 100644 index 0000000000..2a14db3a6b --- /dev/null +++ b/thirdparty/icu4c/common/utrie2_impl.h @@ -0,0 +1,175 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 2001-2008, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* file name: utrie2_impl.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2008sep26 (split off from utrie2.c) +* created by: Markus W. Scherer +* +* Definitions needed for both runtime and builder code for UTrie2, +* used by utrie2.c and utrie2_builder.c. +*/ + +#ifndef __UTRIE2_IMPL_H__ +#define __UTRIE2_IMPL_H__ + +#ifdef UCPTRIE_DEBUG +#include "unicode/umutablecptrie.h" +#endif +#include "utrie2.h" + +/* Public UTrie2 API implementation ----------------------------------------- */ + +/* + * These definitions are mostly needed by utrie2.cpp, + * but also by utrie2_serialize() and utrie2_swap(). + */ + +// UTrie2 signature values, in platform endianness and opposite endianness. +// The UTrie2 signature ASCII byte values spell "Tri2". +#define UTRIE2_SIG 0x54726932 +#define UTRIE2_OE_SIG 0x32697254 + +/** + * Trie data structure in serialized form: + * + * UTrie2Header header; + * uint16_t index[header.index2Length]; + * uint16_t data[header.shiftedDataLength<<2]; -- or uint32_t data[...] + * @internal + */ +typedef struct UTrie2Header { + /** "Tri2" in big-endian US-ASCII (0x54726932) */ + uint32_t signature; + + /** + * options bit field: + * 15.. 4 reserved (0) + * 3.. 0 UTrie2ValueBits valueBits + */ + uint16_t options; + + /** UTRIE2_INDEX_1_OFFSET..UTRIE2_MAX_INDEX_LENGTH */ + uint16_t indexLength; + + /** (UTRIE2_DATA_START_OFFSET..UTRIE2_MAX_DATA_LENGTH)>>UTRIE2_INDEX_SHIFT */ + uint16_t shiftedDataLength; + + /** Null index and data blocks, not shifted. */ + uint16_t index2NullOffset, dataNullOffset; + + /** + * First code point of the single-value range ending with U+10ffff, + * rounded up and then shifted right by UTRIE2_SHIFT_1. + */ + uint16_t shiftedHighStart; +} UTrie2Header; + +/** + * Constants for use with UTrie2Header.options. + * @internal + */ +enum { + /** Mask to get the UTrie2ValueBits valueBits from options. */ + UTRIE2_OPTIONS_VALUE_BITS_MASK=0xf +}; + +/* Building a trie ---------------------------------------------------------- */ + +/* + * These definitions are mostly needed by utrie2_builder.c, but also by + * utrie2_get32() and utrie2_enum(). + */ + +enum { + /** + * At build time, leave a gap in the index-2 table, + * at least as long as the maximum lengths of the 2-byte UTF-8 index-2 table + * and the supplementary index-1 table. + * Round up to UTRIE2_INDEX_2_BLOCK_LENGTH for proper compacting. + */ + UNEWTRIE2_INDEX_GAP_OFFSET=UTRIE2_INDEX_2_BMP_LENGTH, + UNEWTRIE2_INDEX_GAP_LENGTH= + ((UTRIE2_UTF8_2B_INDEX_2_LENGTH+UTRIE2_MAX_INDEX_1_LENGTH)+UTRIE2_INDEX_2_MASK)& + ~UTRIE2_INDEX_2_MASK, + + /** + * Maximum length of the build-time index-2 array. + * Maximum number of Unicode code points (0x110000) shifted right by UTRIE2_SHIFT_2, + * plus the part of the index-2 table for lead surrogate code points, + * plus the build-time index gap, + * plus the null index-2 block. + */ + UNEWTRIE2_MAX_INDEX_2_LENGTH= + (0x110000>>UTRIE2_SHIFT_2)+ + UTRIE2_LSCP_INDEX_2_LENGTH+ + UNEWTRIE2_INDEX_GAP_LENGTH+ + UTRIE2_INDEX_2_BLOCK_LENGTH, + + UNEWTRIE2_INDEX_1_LENGTH=0x110000>>UTRIE2_SHIFT_1 +}; + +/** + * Maximum length of the build-time data array. + * One entry per 0x110000 code points, plus the illegal-UTF-8 block and the null block, + * plus values for the 0x400 surrogate code units. + */ +#define UNEWTRIE2_MAX_DATA_LENGTH (0x110000+0x40+0x40+0x400) + +/* + * Build-time trie structure. + * + * Just using a boolean flag for "repeat use" could lead to data array overflow + * because we would not be able to detect when a data block becomes unused. + * It also leads to orphan data blocks that are kept through serialization. + * + * Need to use reference counting for data blocks, + * and allocDataBlock() needs to look for a free block before increasing dataLength. + * + * This scheme seems like overkill for index-2 blocks since the whole index array is + * preallocated anyway (unlike the growable data array). + * Just allocating multiple index-2 blocks as needed. + */ +struct UNewTrie2 { + int32_t index1[UNEWTRIE2_INDEX_1_LENGTH]; + int32_t index2[UNEWTRIE2_MAX_INDEX_2_LENGTH]; + uint32_t *data; +#ifdef UCPTRIE_DEBUG + UMutableCPTrie *t3; +#endif + + uint32_t initialValue, errorValue; + int32_t index2Length, dataCapacity, dataLength; + int32_t firstFreeBlock; + int32_t index2NullOffset, dataNullOffset; + UChar32 highStart; + UBool isCompacted; + + /** + * Multi-purpose per-data-block table. + * + * Before compacting: + * + * Per-data-block reference counters/free-block list. + * 0: unused + * >0: reference counter (number of index-2 entries pointing here) + * <0: next free data block in free-block list + * + * While compacting: + * + * Map of adjusted indexes, used in compactData() and compactIndex2(). + * Maps from original indexes to new ones. + */ + int32_t map[UNEWTRIE2_MAX_DATA_LENGTH>>UTRIE2_SHIFT_2]; +}; + +#endif diff --git a/thirdparty/icu4c/common/utrie_swap.cpp b/thirdparty/icu4c/common/utrie_swap.cpp new file mode 100644 index 0000000000..6e8b138394 --- /dev/null +++ b/thirdparty/icu4c/common/utrie_swap.cpp @@ -0,0 +1,348 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// utrie_swap.cpp +// created: 2018aug08 Markus W. Scherer + +#include "unicode/utypes.h" +#include "cmemory.h" +#include "ucptrie_impl.h" +#include "udataswp.h" +#include "utrie.h" +#include "utrie2_impl.h" + +// These functions for swapping different generations of ICU code point tries are here +// so that their implementation files need not depend on swapper code, +// need not depend on each other, and so that other swapper code +// need not depend on other trie code. + +namespace { + +constexpr int32_t ASCII_LIMIT = 0x80; + +} // namespace + +U_CAPI int32_t U_EXPORT2 +utrie_swap(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode) { + const UTrieHeader *inTrie; + UTrieHeader trie; + int32_t size; + UBool dataIs32; + + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return 0; + } + if(ds==NULL || inData==NULL || (length>=0 && outData==NULL)) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + /* setup and swapping */ + if(length>=0 && (uint32_t)length<sizeof(UTrieHeader)) { + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + + inTrie=(const UTrieHeader *)inData; + trie.signature=ds->readUInt32(inTrie->signature); + trie.options=ds->readUInt32(inTrie->options); + trie.indexLength=udata_readInt32(ds, inTrie->indexLength); + trie.dataLength=udata_readInt32(ds, inTrie->dataLength); + + if( trie.signature!=0x54726965 || + (trie.options&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_SHIFT || + ((trie.options>>UTRIE_OPTIONS_INDEX_SHIFT)&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_INDEX_SHIFT || + trie.indexLength<UTRIE_BMP_INDEX_LENGTH || + (trie.indexLength&(UTRIE_SURROGATE_BLOCK_COUNT-1))!=0 || + trie.dataLength<UTRIE_DATA_BLOCK_LENGTH || + (trie.dataLength&(UTRIE_DATA_GRANULARITY-1))!=0 || + ((trie.options&UTRIE_OPTIONS_LATIN1_IS_LINEAR)!=0 && trie.dataLength<(UTRIE_DATA_BLOCK_LENGTH+0x100)) + ) { + *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UTrie */ + return 0; + } + + dataIs32=(UBool)((trie.options&UTRIE_OPTIONS_DATA_IS_32_BIT)!=0); + size=sizeof(UTrieHeader)+trie.indexLength*2+trie.dataLength*(dataIs32?4:2); + + if(length>=0) { + UTrieHeader *outTrie; + + if(length<size) { + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + + outTrie=(UTrieHeader *)outData; + + /* swap the header */ + ds->swapArray32(ds, inTrie, sizeof(UTrieHeader), outTrie, pErrorCode); + + /* swap the index and the data */ + if(dataIs32) { + ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode); + ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, trie.dataLength*4, + (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode); + } else { + ds->swapArray16(ds, inTrie+1, (trie.indexLength+trie.dataLength)*2, outTrie+1, pErrorCode); + } + } + + return size; +} + +U_CAPI int32_t U_EXPORT2 +utrie2_swap(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode) { + const UTrie2Header *inTrie; + UTrie2Header trie; + int32_t dataLength, size; + UTrie2ValueBits valueBits; + + if(U_FAILURE(*pErrorCode)) { + return 0; + } + if(ds==NULL || inData==NULL || (length>=0 && outData==NULL)) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + /* setup and swapping */ + if(length>=0 && length<(int32_t)sizeof(UTrie2Header)) { + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + + inTrie=(const UTrie2Header *)inData; + trie.signature=ds->readUInt32(inTrie->signature); + trie.options=ds->readUInt16(inTrie->options); + trie.indexLength=ds->readUInt16(inTrie->indexLength); + trie.shiftedDataLength=ds->readUInt16(inTrie->shiftedDataLength); + + valueBits=(UTrie2ValueBits)(trie.options&UTRIE2_OPTIONS_VALUE_BITS_MASK); + dataLength=(int32_t)trie.shiftedDataLength<<UTRIE2_INDEX_SHIFT; + + if( trie.signature!=UTRIE2_SIG || + valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits || + trie.indexLength<UTRIE2_INDEX_1_OFFSET || + dataLength<UTRIE2_DATA_START_OFFSET + ) { + *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UTrie */ + return 0; + } + + size=sizeof(UTrie2Header)+trie.indexLength*2; + switch(valueBits) { + case UTRIE2_16_VALUE_BITS: + size+=dataLength*2; + break; + case UTRIE2_32_VALUE_BITS: + size+=dataLength*4; + break; + default: + *pErrorCode=U_INVALID_FORMAT_ERROR; + return 0; + } + + if(length>=0) { + UTrie2Header *outTrie; + + if(length<size) { + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + + outTrie=(UTrie2Header *)outData; + + /* swap the header */ + ds->swapArray32(ds, &inTrie->signature, 4, &outTrie->signature, pErrorCode); + ds->swapArray16(ds, &inTrie->options, 12, &outTrie->options, pErrorCode); + + /* swap the index and the data */ + switch(valueBits) { + case UTRIE2_16_VALUE_BITS: + ds->swapArray16(ds, inTrie+1, (trie.indexLength+dataLength)*2, outTrie+1, pErrorCode); + break; + case UTRIE2_32_VALUE_BITS: + ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode); + ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, dataLength*4, + (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode); + break; + default: + *pErrorCode=U_INVALID_FORMAT_ERROR; + return 0; + } + } + + return size; +} + +U_CAPI int32_t U_EXPORT2 +ucptrie_swap(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode) { + const UCPTrieHeader *inTrie; + UCPTrieHeader trie; + int32_t dataLength, size; + UCPTrieValueWidth valueWidth; + + if(U_FAILURE(*pErrorCode)) { + return 0; + } + if(ds==nullptr || inData==nullptr || (length>=0 && outData==nullptr)) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + /* setup and swapping */ + if(length>=0 && length<(int32_t)sizeof(UCPTrieHeader)) { + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + + inTrie=(const UCPTrieHeader *)inData; + trie.signature=ds->readUInt32(inTrie->signature); + trie.options=ds->readUInt16(inTrie->options); + trie.indexLength=ds->readUInt16(inTrie->indexLength); + trie.dataLength = ds->readUInt16(inTrie->dataLength); + + UCPTrieType type = (UCPTrieType)((trie.options >> 6) & 3); + valueWidth = (UCPTrieValueWidth)(trie.options & UCPTRIE_OPTIONS_VALUE_BITS_MASK); + dataLength = ((int32_t)(trie.options & UCPTRIE_OPTIONS_DATA_LENGTH_MASK) << 4) | trie.dataLength; + + int32_t minIndexLength = type == UCPTRIE_TYPE_FAST ? + UCPTRIE_BMP_INDEX_LENGTH : UCPTRIE_SMALL_INDEX_LENGTH; + if( trie.signature!=UCPTRIE_SIG || + type > UCPTRIE_TYPE_SMALL || + (trie.options & UCPTRIE_OPTIONS_RESERVED_MASK) != 0 || + valueWidth > UCPTRIE_VALUE_BITS_8 || + trie.indexLength < minIndexLength || + dataLength < ASCII_LIMIT + ) { + *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UCPTrie */ + return 0; + } + + size=sizeof(UCPTrieHeader)+trie.indexLength*2; + switch(valueWidth) { + case UCPTRIE_VALUE_BITS_16: + size+=dataLength*2; + break; + case UCPTRIE_VALUE_BITS_32: + size+=dataLength*4; + break; + case UCPTRIE_VALUE_BITS_8: + size+=dataLength; + break; + default: + *pErrorCode=U_INVALID_FORMAT_ERROR; + return 0; + } + + if(length>=0) { + UCPTrieHeader *outTrie; + + if(length<size) { + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + + outTrie=(UCPTrieHeader *)outData; + + /* swap the header */ + ds->swapArray32(ds, &inTrie->signature, 4, &outTrie->signature, pErrorCode); + ds->swapArray16(ds, &inTrie->options, 12, &outTrie->options, pErrorCode); + + /* swap the index */ + const uint16_t *inIndex=reinterpret_cast<const uint16_t *>(inTrie+1); + uint16_t *outIndex=reinterpret_cast<uint16_t *>(outTrie+1); + ds->swapArray16(ds, inIndex, trie.indexLength*2, outIndex, pErrorCode); + + /* swap the data */ + const uint16_t *inData=inIndex+trie.indexLength; + uint16_t *outData=outIndex+trie.indexLength; + switch(valueWidth) { + case UCPTRIE_VALUE_BITS_16: + ds->swapArray16(ds, inData, dataLength*2, outData, pErrorCode); + break; + case UCPTRIE_VALUE_BITS_32: + ds->swapArray32(ds, inData, dataLength*4, outData, pErrorCode); + break; + case UCPTRIE_VALUE_BITS_8: + if(inTrie!=outTrie) { + uprv_memmove(outData, inData, dataLength); + } + break; + default: + *pErrorCode=U_INVALID_FORMAT_ERROR; + return 0; + } + } + + return size; +} + +namespace { + +/** + * Gets the trie version from 32-bit-aligned memory containing the serialized form + * of a UTrie (version 1), a UTrie2 (version 2), or a UCPTrie (version 3). + * + * @param data a pointer to 32-bit-aligned memory containing the serialized form of a trie + * @param length the number of bytes available at data; + * can be more than necessary (see return value) + * @param anyEndianOk If FALSE, only platform-endian serialized forms are recognized. + * If TRUE, opposite-endian serialized forms are recognized as well. + * @return the trie version of the serialized form, or 0 if it is not + * recognized as a serialized trie + */ +int32_t +getVersion(const void *data, int32_t length, UBool anyEndianOk) { + uint32_t signature; + if(length<16 || data==nullptr || (U_POINTER_MASK_LSB(data, 3)!=0)) { + return 0; + } + signature=*(const uint32_t *)data; + if(signature==UCPTRIE_SIG) { + return 3; + } + if(anyEndianOk && signature==UCPTRIE_OE_SIG) { + return 3; + } + if(signature==UTRIE2_SIG) { + return 2; + } + if(anyEndianOk && signature==UTRIE2_OE_SIG) { + return 2; + } + if(signature==UTRIE_SIG) { + return 1; + } + if(anyEndianOk && signature==UTRIE_OE_SIG) { + return 1; + } + return 0; +} + +} // namespace + +U_CAPI int32_t U_EXPORT2 +utrie_swapAnyVersion(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode) { + if(U_FAILURE(*pErrorCode)) { return 0; } + switch(getVersion(inData, length, TRUE)) { + case 1: + return utrie_swap(ds, inData, length, outData, pErrorCode); + case 2: + return utrie2_swap(ds, inData, length, outData, pErrorCode); + case 3: + return ucptrie_swap(ds, inData, length, outData, pErrorCode); + default: + *pErrorCode=U_INVALID_FORMAT_ERROR; + return 0; + } +} diff --git a/thirdparty/icu4c/common/uts46.cpp b/thirdparty/icu4c/common/uts46.cpp new file mode 100644 index 0000000000..f25b4e12f1 --- /dev/null +++ b/thirdparty/icu4c/common/uts46.cpp @@ -0,0 +1,1494 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2010-2015, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* file name: uts46.cpp +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2010mar09 +* created by: Markus W. Scherer +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_IDNA + +#include "unicode/idna.h" +#include "unicode/normalizer2.h" +#include "unicode/uscript.h" +#include "unicode/ustring.h" +#include "unicode/utf16.h" +#include "cmemory.h" +#include "cstring.h" +#include "punycode.h" +#include "ubidi_props.h" +#include "ustr_imp.h" + +// Note about tests for UIDNA_ERROR_DOMAIN_NAME_TOO_LONG: +// +// The domain name length limit is 255 octets in an internal DNS representation +// where the last ("root") label is the empty label +// represented by length byte 0 alone. +// In a conventional string, this translates to 253 characters, or 254 +// if there is a trailing dot for the root label. + +U_NAMESPACE_BEGIN + +// Severe errors which usually result in a U+FFFD replacement character in the result string. +const uint32_t severeErrors= + UIDNA_ERROR_LEADING_COMBINING_MARK| + UIDNA_ERROR_DISALLOWED| + UIDNA_ERROR_PUNYCODE| + UIDNA_ERROR_LABEL_HAS_DOT| + UIDNA_ERROR_INVALID_ACE_LABEL; + +static inline UBool +isASCIIString(const UnicodeString &dest) { + const UChar *s=dest.getBuffer(); + const UChar *limit=s+dest.length(); + while(s<limit) { + if(*s++>0x7f) { + return FALSE; + } + } + return TRUE; +} + +static UBool +isASCIIOkBiDi(const UChar *s, int32_t length); + +static UBool +isASCIIOkBiDi(const char *s, int32_t length); + +// IDNA class default implementations -------------------------------------- *** + +IDNA::~IDNA() {} + +void +IDNA::labelToASCII_UTF8(StringPiece label, ByteSink &dest, + IDNAInfo &info, UErrorCode &errorCode) const { + if(U_SUCCESS(errorCode)) { + UnicodeString destString; + labelToASCII(UnicodeString::fromUTF8(label), destString, + info, errorCode).toUTF8(dest); + } +} + +void +IDNA::labelToUnicodeUTF8(StringPiece label, ByteSink &dest, + IDNAInfo &info, UErrorCode &errorCode) const { + if(U_SUCCESS(errorCode)) { + UnicodeString destString; + labelToUnicode(UnicodeString::fromUTF8(label), destString, + info, errorCode).toUTF8(dest); + } +} + +void +IDNA::nameToASCII_UTF8(StringPiece name, ByteSink &dest, + IDNAInfo &info, UErrorCode &errorCode) const { + if(U_SUCCESS(errorCode)) { + UnicodeString destString; + nameToASCII(UnicodeString::fromUTF8(name), destString, + info, errorCode).toUTF8(dest); + } +} + +void +IDNA::nameToUnicodeUTF8(StringPiece name, ByteSink &dest, + IDNAInfo &info, UErrorCode &errorCode) const { + if(U_SUCCESS(errorCode)) { + UnicodeString destString; + nameToUnicode(UnicodeString::fromUTF8(name), destString, + info, errorCode).toUTF8(dest); + } +} + +// UTS46 class declaration ------------------------------------------------- *** + +class UTS46 : public IDNA { +public: + UTS46(uint32_t options, UErrorCode &errorCode); + virtual ~UTS46(); + + virtual UnicodeString & + labelToASCII(const UnicodeString &label, UnicodeString &dest, + IDNAInfo &info, UErrorCode &errorCode) const; + + virtual UnicodeString & + labelToUnicode(const UnicodeString &label, UnicodeString &dest, + IDNAInfo &info, UErrorCode &errorCode) const; + + virtual UnicodeString & + nameToASCII(const UnicodeString &name, UnicodeString &dest, + IDNAInfo &info, UErrorCode &errorCode) const; + + virtual UnicodeString & + nameToUnicode(const UnicodeString &name, UnicodeString &dest, + IDNAInfo &info, UErrorCode &errorCode) const; + + virtual void + labelToASCII_UTF8(StringPiece label, ByteSink &dest, + IDNAInfo &info, UErrorCode &errorCode) const; + + virtual void + labelToUnicodeUTF8(StringPiece label, ByteSink &dest, + IDNAInfo &info, UErrorCode &errorCode) const; + + virtual void + nameToASCII_UTF8(StringPiece name, ByteSink &dest, + IDNAInfo &info, UErrorCode &errorCode) const; + + virtual void + nameToUnicodeUTF8(StringPiece name, ByteSink &dest, + IDNAInfo &info, UErrorCode &errorCode) const; + +private: + UnicodeString & + process(const UnicodeString &src, + UBool isLabel, UBool toASCII, + UnicodeString &dest, + IDNAInfo &info, UErrorCode &errorCode) const; + + void + processUTF8(StringPiece src, + UBool isLabel, UBool toASCII, + ByteSink &dest, + IDNAInfo &info, UErrorCode &errorCode) const; + + UnicodeString & + processUnicode(const UnicodeString &src, + int32_t labelStart, int32_t mappingStart, + UBool isLabel, UBool toASCII, + UnicodeString &dest, + IDNAInfo &info, UErrorCode &errorCode) const; + + // returns the new dest.length() + int32_t + mapDevChars(UnicodeString &dest, int32_t labelStart, int32_t mappingStart, + UErrorCode &errorCode) const; + + // returns the new label length + int32_t + processLabel(UnicodeString &dest, + int32_t labelStart, int32_t labelLength, + UBool toASCII, + IDNAInfo &info, UErrorCode &errorCode) const; + int32_t + markBadACELabel(UnicodeString &dest, + int32_t labelStart, int32_t labelLength, + UBool toASCII, IDNAInfo &info, UErrorCode &errorCode) const; + + void + checkLabelBiDi(const UChar *label, int32_t labelLength, IDNAInfo &info) const; + + UBool + isLabelOkContextJ(const UChar *label, int32_t labelLength) const; + + void + checkLabelContextO(const UChar *label, int32_t labelLength, IDNAInfo &info) const; + + const Normalizer2 &uts46Norm2; // uts46.nrm + uint32_t options; +}; + +IDNA * +IDNA::createUTS46Instance(uint32_t options, UErrorCode &errorCode) { + if(U_SUCCESS(errorCode)) { + IDNA *idna=new UTS46(options, errorCode); + if(idna==NULL) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + } else if(U_FAILURE(errorCode)) { + delete idna; + idna=NULL; + } + return idna; + } else { + return NULL; + } +} + +// UTS46 implementation ---------------------------------------------------- *** + +UTS46::UTS46(uint32_t opt, UErrorCode &errorCode) + : uts46Norm2(*Normalizer2::getInstance(NULL, "uts46", UNORM2_COMPOSE, errorCode)), + options(opt) {} + +UTS46::~UTS46() {} + +UnicodeString & +UTS46::labelToASCII(const UnicodeString &label, UnicodeString &dest, + IDNAInfo &info, UErrorCode &errorCode) const { + return process(label, TRUE, TRUE, dest, info, errorCode); +} + +UnicodeString & +UTS46::labelToUnicode(const UnicodeString &label, UnicodeString &dest, + IDNAInfo &info, UErrorCode &errorCode) const { + return process(label, TRUE, FALSE, dest, info, errorCode); +} + +UnicodeString & +UTS46::nameToASCII(const UnicodeString &name, UnicodeString &dest, + IDNAInfo &info, UErrorCode &errorCode) const { + process(name, FALSE, TRUE, dest, info, errorCode); + if( dest.length()>=254 && (info.errors&UIDNA_ERROR_DOMAIN_NAME_TOO_LONG)==0 && + isASCIIString(dest) && + (dest.length()>254 || dest[253]!=0x2e) + ) { + info.errors|=UIDNA_ERROR_DOMAIN_NAME_TOO_LONG; + } + return dest; +} + +UnicodeString & +UTS46::nameToUnicode(const UnicodeString &name, UnicodeString &dest, + IDNAInfo &info, UErrorCode &errorCode) const { + return process(name, FALSE, FALSE, dest, info, errorCode); +} + +void +UTS46::labelToASCII_UTF8(StringPiece label, ByteSink &dest, + IDNAInfo &info, UErrorCode &errorCode) const { + processUTF8(label, TRUE, TRUE, dest, info, errorCode); +} + +void +UTS46::labelToUnicodeUTF8(StringPiece label, ByteSink &dest, + IDNAInfo &info, UErrorCode &errorCode) const { + processUTF8(label, TRUE, FALSE, dest, info, errorCode); +} + +void +UTS46::nameToASCII_UTF8(StringPiece name, ByteSink &dest, + IDNAInfo &info, UErrorCode &errorCode) const { + processUTF8(name, FALSE, TRUE, dest, info, errorCode); +} + +void +UTS46::nameToUnicodeUTF8(StringPiece name, ByteSink &dest, + IDNAInfo &info, UErrorCode &errorCode) const { + processUTF8(name, FALSE, FALSE, dest, info, errorCode); +} + +// UTS #46 data for ASCII characters. +// The normalizer (using uts46.nrm) maps uppercase ASCII letters to lowercase +// and passes through all other ASCII characters. +// If UIDNA_USE_STD3_RULES is set, then non-LDH characters are disallowed +// using this data. +// The ASCII fastpath also uses this data. +// Values: -1=disallowed 0==valid 1==mapped (lowercase) +static const int8_t asciiData[128]={ + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + // 002D..002E; valid # HYPHEN-MINUS..FULL STOP + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, -1, + // 0030..0039; valid # DIGIT ZERO..DIGIT NINE + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, + // 0041..005A; mapped # LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z + -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, + // 0061..007A; valid # LATIN SMALL LETTER A..LATIN SMALL LETTER Z + -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1 +}; + +UnicodeString & +UTS46::process(const UnicodeString &src, + UBool isLabel, UBool toASCII, + UnicodeString &dest, + IDNAInfo &info, UErrorCode &errorCode) const { + // uts46Norm2.normalize() would do all of this error checking and setup, + // but with the ASCII fastpath we do not always call it, and do not + // call it first. + if(U_FAILURE(errorCode)) { + dest.setToBogus(); + return dest; + } + const UChar *srcArray=src.getBuffer(); + if(&dest==&src || srcArray==NULL) { + errorCode=U_ILLEGAL_ARGUMENT_ERROR; + dest.setToBogus(); + return dest; + } + // Arguments are fine, reset output values. + dest.remove(); + info.reset(); + int32_t srcLength=src.length(); + if(srcLength==0) { + info.errors|=UIDNA_ERROR_EMPTY_LABEL; + return dest; + } + UChar *destArray=dest.getBuffer(srcLength); + if(destArray==NULL) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + return dest; + } + // ASCII fastpath + UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0; + int32_t labelStart=0; + int32_t i; + for(i=0;; ++i) { + if(i==srcLength) { + if(toASCII) { + if((i-labelStart)>63) { + info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; + } + // There is a trailing dot if labelStart==i. + if(!isLabel && i>=254 && (i>254 || labelStart<i)) { + info.errors|=UIDNA_ERROR_DOMAIN_NAME_TOO_LONG; + } + } + info.errors|=info.labelErrors; + dest.releaseBuffer(i); + return dest; + } + UChar c=srcArray[i]; + if(c>0x7f) { + break; + } + int cData=asciiData[c]; + if(cData>0) { + destArray[i]=c+0x20; // Lowercase an uppercase ASCII letter. + } else if(cData<0 && disallowNonLDHDot) { + break; // Replacing with U+FFFD can be complicated for toASCII. + } else { + destArray[i]=c; + if(c==0x2d) { // hyphen + if(i==(labelStart+3) && srcArray[i-1]==0x2d) { + // "??--..." is Punycode or forbidden. + ++i; // '-' was copied to dest already + break; + } + if(i==labelStart) { + // label starts with "-" + info.labelErrors|=UIDNA_ERROR_LEADING_HYPHEN; + } + if((i+1)==srcLength || srcArray[i+1]==0x2e) { + // label ends with "-" + info.labelErrors|=UIDNA_ERROR_TRAILING_HYPHEN; + } + } else if(c==0x2e) { // dot + if(isLabel) { + // Replacing with U+FFFD can be complicated for toASCII. + ++i; // '.' was copied to dest already + break; + } + if(i==labelStart) { + info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL; + } + if(toASCII && (i-labelStart)>63) { + info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; + } + info.errors|=info.labelErrors; + info.labelErrors=0; + labelStart=i+1; + } + } + } + info.errors|=info.labelErrors; + dest.releaseBuffer(i); + processUnicode(src, labelStart, i, isLabel, toASCII, dest, info, errorCode); + if( info.isBiDi && U_SUCCESS(errorCode) && (info.errors&severeErrors)==0 && + (!info.isOkBiDi || (labelStart>0 && !isASCIIOkBiDi(dest.getBuffer(), labelStart))) + ) { + info.errors|=UIDNA_ERROR_BIDI; + } + return dest; +} + +void +UTS46::processUTF8(StringPiece src, + UBool isLabel, UBool toASCII, + ByteSink &dest, + IDNAInfo &info, UErrorCode &errorCode) const { + if(U_FAILURE(errorCode)) { + return; + } + const char *srcArray=src.data(); + int32_t srcLength=src.length(); + if(srcArray==NULL && srcLength!=0) { + errorCode=U_ILLEGAL_ARGUMENT_ERROR; + return; + } + // Arguments are fine, reset output values. + info.reset(); + if(srcLength==0) { + info.errors|=UIDNA_ERROR_EMPTY_LABEL; + dest.Flush(); + return; + } + UnicodeString destString; + int32_t labelStart=0; + if(srcLength<=256) { // length of stackArray[] + // ASCII fastpath + char stackArray[256]; + int32_t destCapacity; + char *destArray=dest.GetAppendBuffer(srcLength, srcLength+20, + stackArray, UPRV_LENGTHOF(stackArray), &destCapacity); + UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0; + int32_t i; + for(i=0;; ++i) { + if(i==srcLength) { + if(toASCII) { + if((i-labelStart)>63) { + info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; + } + // There is a trailing dot if labelStart==i. + if(!isLabel && i>=254 && (i>254 || labelStart<i)) { + info.errors|=UIDNA_ERROR_DOMAIN_NAME_TOO_LONG; + } + } + info.errors|=info.labelErrors; + dest.Append(destArray, i); + dest.Flush(); + return; + } + char c=srcArray[i]; + if((int8_t)c<0) { // (uint8_t)c>0x7f + break; + } + int cData=asciiData[(int)c]; // Cast: gcc warns about indexing with a char. + if(cData>0) { + destArray[i]=c+0x20; // Lowercase an uppercase ASCII letter. + } else if(cData<0 && disallowNonLDHDot) { + break; // Replacing with U+FFFD can be complicated for toASCII. + } else { + destArray[i]=c; + if(c==0x2d) { // hyphen + if(i==(labelStart+3) && srcArray[i-1]==0x2d) { + // "??--..." is Punycode or forbidden. + break; + } + if(i==labelStart) { + // label starts with "-" + info.labelErrors|=UIDNA_ERROR_LEADING_HYPHEN; + } + if((i+1)==srcLength || srcArray[i+1]==0x2e) { + // label ends with "-" + info.labelErrors|=UIDNA_ERROR_TRAILING_HYPHEN; + } + } else if(c==0x2e) { // dot + if(isLabel) { + break; // Replacing with U+FFFD can be complicated for toASCII. + } + if(i==labelStart) { + info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL; + } + if(toASCII && (i-labelStart)>63) { + info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; + } + info.errors|=info.labelErrors; + info.labelErrors=0; + labelStart=i+1; + } + } + } + info.errors|=info.labelErrors; + // Convert the processed ASCII prefix of the current label to UTF-16. + int32_t mappingStart=i-labelStart; + destString=UnicodeString::fromUTF8(StringPiece(destArray+labelStart, mappingStart)); + // Output the previous ASCII labels and process the rest of src in UTF-16. + dest.Append(destArray, labelStart); + processUnicode(UnicodeString::fromUTF8(StringPiece(src, labelStart)), 0, mappingStart, + isLabel, toASCII, + destString, info, errorCode); + } else { + // src is too long for the ASCII fastpath implementation. + processUnicode(UnicodeString::fromUTF8(src), 0, 0, + isLabel, toASCII, + destString, info, errorCode); + } + destString.toUTF8(dest); // calls dest.Flush() + if(toASCII && !isLabel) { + // length==labelStart==254 means that there is a trailing dot (ok) and + // destString is empty (do not index at 253-labelStart). + int32_t length=labelStart+destString.length(); + if( length>=254 && isASCIIString(destString) && + (length>254 || + (labelStart<254 && destString[253-labelStart]!=0x2e)) + ) { + info.errors|=UIDNA_ERROR_DOMAIN_NAME_TOO_LONG; + } + } + if( info.isBiDi && U_SUCCESS(errorCode) && (info.errors&severeErrors)==0 && + (!info.isOkBiDi || (labelStart>0 && !isASCIIOkBiDi(srcArray, labelStart))) + ) { + info.errors|=UIDNA_ERROR_BIDI; + } +} + +UnicodeString & +UTS46::processUnicode(const UnicodeString &src, + int32_t labelStart, int32_t mappingStart, + UBool isLabel, UBool toASCII, + UnicodeString &dest, + IDNAInfo &info, UErrorCode &errorCode) const { + if(mappingStart==0) { + uts46Norm2.normalize(src, dest, errorCode); + } else { + uts46Norm2.normalizeSecondAndAppend(dest, src.tempSubString(mappingStart), errorCode); + } + if(U_FAILURE(errorCode)) { + return dest; + } + UBool doMapDevChars= + toASCII ? (options&UIDNA_NONTRANSITIONAL_TO_ASCII)==0 : + (options&UIDNA_NONTRANSITIONAL_TO_UNICODE)==0; + const UChar *destArray=dest.getBuffer(); + int32_t destLength=dest.length(); + int32_t labelLimit=labelStart; + while(labelLimit<destLength) { + UChar c=destArray[labelLimit]; + if(c==0x2e && !isLabel) { + int32_t labelLength=labelLimit-labelStart; + int32_t newLength=processLabel(dest, labelStart, labelLength, + toASCII, info, errorCode); + info.errors|=info.labelErrors; + info.labelErrors=0; + if(U_FAILURE(errorCode)) { + return dest; + } + destArray=dest.getBuffer(); + destLength+=newLength-labelLength; + labelLimit=labelStart+=newLength+1; + continue; + } else if(c<0xdf) { + // pass + } else if(c<=0x200d && (c==0xdf || c==0x3c2 || c>=0x200c)) { + info.isTransDiff=TRUE; + if(doMapDevChars) { + destLength=mapDevChars(dest, labelStart, labelLimit, errorCode); + if(U_FAILURE(errorCode)) { + return dest; + } + destArray=dest.getBuffer(); + // All deviation characters have been mapped, no need to check for them again. + doMapDevChars=FALSE; + // Do not increment labelLimit in case c was removed. + continue; + } + } else if(U16_IS_SURROGATE(c)) { + if(U16_IS_SURROGATE_LEAD(c) ? + (labelLimit+1)==destLength || !U16_IS_TRAIL(destArray[labelLimit+1]) : + labelLimit==labelStart || !U16_IS_LEAD(destArray[labelLimit-1])) { + // Map an unpaired surrogate to U+FFFD before normalization so that when + // that removes characters we do not turn two unpaired ones into a pair. + info.labelErrors|=UIDNA_ERROR_DISALLOWED; + dest.setCharAt(labelLimit, 0xfffd); + destArray=dest.getBuffer(); + } + } + ++labelLimit; + } + // Permit an empty label at the end (0<labelStart==labelLimit==destLength is ok) + // but not an empty label elsewhere nor a completely empty domain name. + // processLabel() sets UIDNA_ERROR_EMPTY_LABEL when labelLength==0. + if(0==labelStart || labelStart<labelLimit) { + processLabel(dest, labelStart, labelLimit-labelStart, + toASCII, info, errorCode); + info.errors|=info.labelErrors; + } + return dest; +} + +int32_t +UTS46::mapDevChars(UnicodeString &dest, int32_t labelStart, int32_t mappingStart, + UErrorCode &errorCode) const { + if(U_FAILURE(errorCode)) { + return 0; + } + int32_t length=dest.length(); + UChar *s=dest.getBuffer(dest[mappingStart]==0xdf ? length+1 : length); + if(s==NULL) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + return length; + } + int32_t capacity=dest.getCapacity(); + UBool didMapDevChars=FALSE; + int32_t readIndex=mappingStart, writeIndex=mappingStart; + do { + UChar c=s[readIndex++]; + switch(c) { + case 0xdf: + // Map sharp s to ss. + didMapDevChars=TRUE; + s[writeIndex++]=0x73; // Replace sharp s with first s. + // Insert second s and account for possible buffer reallocation. + if(writeIndex==readIndex) { + if(length==capacity) { + dest.releaseBuffer(length); + s=dest.getBuffer(length+1); + if(s==NULL) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + return length; + } + capacity=dest.getCapacity(); + } + u_memmove(s+writeIndex+1, s+writeIndex, length-writeIndex); + ++readIndex; + } + s[writeIndex++]=0x73; + ++length; + break; + case 0x3c2: // Map final sigma to nonfinal sigma. + didMapDevChars=TRUE; + s[writeIndex++]=0x3c3; + break; + case 0x200c: // Ignore/remove ZWNJ. + case 0x200d: // Ignore/remove ZWJ. + didMapDevChars=TRUE; + --length; + break; + default: + // Only really necessary if writeIndex was different from readIndex. + s[writeIndex++]=c; + break; + } + } while(writeIndex<length); + dest.releaseBuffer(length); + if(didMapDevChars) { + // Mapping deviation characters might have resulted in an un-NFC string. + // We could use either the NFC or the UTS #46 normalizer. + // By using the UTS #46 normalizer again, we avoid having to load a second .nrm data file. + UnicodeString normalized; + uts46Norm2.normalize(dest.tempSubString(labelStart), normalized, errorCode); + if(U_SUCCESS(errorCode)) { + dest.replace(labelStart, 0x7fffffff, normalized); + if(dest.isBogus()) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + } + return dest.length(); + } + } + return length; +} + +// Some non-ASCII characters are equivalent to sequences with +// non-LDH ASCII characters. To find them: +// grep disallowed_STD3_valid IdnaMappingTable.txt (or uts46.txt) +static inline UBool +isNonASCIIDisallowedSTD3Valid(UChar32 c) { + return c==0x2260 || c==0x226E || c==0x226F; +} + +// Replace the label in dest with the label string, if the label was modified. +// If &label==&dest then the label was modified in-place and labelLength +// is the new label length, different from label.length(). +// If &label!=&dest then labelLength==label.length(). +// Returns labelLength (= the new label length). +static int32_t +replaceLabel(UnicodeString &dest, int32_t destLabelStart, int32_t destLabelLength, + const UnicodeString &label, int32_t labelLength, UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { + return 0; + } + if(&label!=&dest) { + dest.replace(destLabelStart, destLabelLength, label); + if(dest.isBogus()) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + return 0; + } + } + return labelLength; +} + +int32_t +UTS46::processLabel(UnicodeString &dest, + int32_t labelStart, int32_t labelLength, + UBool toASCII, + IDNAInfo &info, UErrorCode &errorCode) const { + if(U_FAILURE(errorCode)) { + return 0; + } + UnicodeString fromPunycode; + UnicodeString *labelString; + const UChar *label=dest.getBuffer()+labelStart; + int32_t destLabelStart=labelStart; + int32_t destLabelLength=labelLength; + UBool wasPunycode; + if(labelLength>=4 && label[0]==0x78 && label[1]==0x6e && label[2]==0x2d && label[3]==0x2d) { + // Label starts with "xn--", try to un-Punycode it. + // In IDNA2008, labels like "xn--" (decodes to an empty string) and + // "xn--ASCII-" (decodes to just "ASCII") fail the round-trip validation from + // comparing the ToUnicode input with the back-to-ToASCII output. + // They are alternate encodings of the respective ASCII labels. + // Ignore "xn---" here: It will fail Punycode.decode() which logically comes before + // the round-trip verification. + if(labelLength==4 || (labelLength>5 && label[labelLength-1]==u'-')) { + info.labelErrors|=UIDNA_ERROR_INVALID_ACE_LABEL; + return markBadACELabel(dest, labelStart, labelLength, toASCII, info, errorCode); + } + wasPunycode=TRUE; + UChar *unicodeBuffer=fromPunycode.getBuffer(-1); // capacity==-1: most labels should fit + if(unicodeBuffer==NULL) { + // Should never occur if we used capacity==-1 which uses the internal buffer. + errorCode=U_MEMORY_ALLOCATION_ERROR; + return labelLength; + } + UErrorCode punycodeErrorCode=U_ZERO_ERROR; + int32_t unicodeLength=u_strFromPunycode(label+4, labelLength-4, + unicodeBuffer, fromPunycode.getCapacity(), + NULL, &punycodeErrorCode); + if(punycodeErrorCode==U_BUFFER_OVERFLOW_ERROR) { + fromPunycode.releaseBuffer(0); + unicodeBuffer=fromPunycode.getBuffer(unicodeLength); + if(unicodeBuffer==NULL) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + return labelLength; + } + punycodeErrorCode=U_ZERO_ERROR; + unicodeLength=u_strFromPunycode(label+4, labelLength-4, + unicodeBuffer, fromPunycode.getCapacity(), + NULL, &punycodeErrorCode); + } + fromPunycode.releaseBuffer(unicodeLength); + if(U_FAILURE(punycodeErrorCode)) { + info.labelErrors|=UIDNA_ERROR_PUNYCODE; + return markBadACELabel(dest, labelStart, labelLength, toASCII, info, errorCode); + } + // Check for NFC, and for characters that are not + // valid or deviation characters according to the normalizer. + // If there is something wrong, then the string will change. + // Note that the normalizer passes through non-LDH ASCII and deviation characters. + // Deviation characters are ok in Punycode even in transitional processing. + // In the code further below, if we find non-LDH ASCII and we have UIDNA_USE_STD3_RULES + // then we will set UIDNA_ERROR_INVALID_ACE_LABEL there too. + UBool isValid=uts46Norm2.isNormalized(fromPunycode, errorCode); + if(U_FAILURE(errorCode)) { + return labelLength; + } + if(!isValid) { + info.labelErrors|=UIDNA_ERROR_INVALID_ACE_LABEL; + return markBadACELabel(dest, labelStart, labelLength, toASCII, info, errorCode); + } + labelString=&fromPunycode; + label=fromPunycode.getBuffer(); + labelStart=0; + labelLength=fromPunycode.length(); + } else { + wasPunycode=FALSE; + labelString=&dest; + } + // Validity check + if(labelLength==0) { + info.labelErrors|=UIDNA_ERROR_EMPTY_LABEL; + return replaceLabel(dest, destLabelStart, destLabelLength, + *labelString, labelLength, errorCode); + } + // labelLength>0 + if(labelLength>=4 && label[2]==0x2d && label[3]==0x2d) { + // label starts with "??--" + info.labelErrors|=UIDNA_ERROR_HYPHEN_3_4; + } + if(label[0]==0x2d) { + // label starts with "-" + info.labelErrors|=UIDNA_ERROR_LEADING_HYPHEN; + } + if(label[labelLength-1]==0x2d) { + // label ends with "-" + info.labelErrors|=UIDNA_ERROR_TRAILING_HYPHEN; + } + // If the label was not a Punycode label, then it was the result of + // mapping, normalization and label segmentation. + // If the label was in Punycode, then we mapped it again above + // and checked its validity. + // Now we handle the STD3 restriction to LDH characters (if set) + // and we look for U+FFFD which indicates disallowed characters + // in a non-Punycode label or U+FFFD itself in a Punycode label. + // We also check for dots which can come from the input to a single-label function. + // Ok to cast away const because we own the UnicodeString. + UChar *s=(UChar *)label; + const UChar *limit=label+labelLength; + UChar oredChars=0; + // If we enforce STD3 rules, then ASCII characters other than LDH and dot are disallowed. + UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0; + do { + UChar c=*s; + if(c<=0x7f) { + if(c==0x2e) { + info.labelErrors|=UIDNA_ERROR_LABEL_HAS_DOT; + *s=0xfffd; + } else if(disallowNonLDHDot && asciiData[c]<0) { + info.labelErrors|=UIDNA_ERROR_DISALLOWED; + *s=0xfffd; + } + } else { + oredChars|=c; + if(disallowNonLDHDot && isNonASCIIDisallowedSTD3Valid(c)) { + info.labelErrors|=UIDNA_ERROR_DISALLOWED; + *s=0xfffd; + } else if(c==0xfffd) { + info.labelErrors|=UIDNA_ERROR_DISALLOWED; + } + } + ++s; + } while(s<limit); + // Check for a leading combining mark after other validity checks + // so that we don't report UIDNA_ERROR_DISALLOWED for the U+FFFD from here. + UChar32 c; + int32_t cpLength=0; + // "Unsafe" is ok because unpaired surrogates were mapped to U+FFFD. + U16_NEXT_UNSAFE(label, cpLength, c); + if((U_GET_GC_MASK(c)&U_GC_M_MASK)!=0) { + info.labelErrors|=UIDNA_ERROR_LEADING_COMBINING_MARK; + labelString->replace(labelStart, cpLength, (UChar)0xfffd); + label=labelString->getBuffer()+labelStart; + labelLength+=1-cpLength; + if(labelString==&dest) { + destLabelLength=labelLength; + } + } + if((info.labelErrors&severeErrors)==0) { + // Do contextual checks only if we do not have U+FFFD from a severe error + // because U+FFFD can make these checks fail. + if((options&UIDNA_CHECK_BIDI)!=0 && (!info.isBiDi || info.isOkBiDi)) { + checkLabelBiDi(label, labelLength, info); + } + if( (options&UIDNA_CHECK_CONTEXTJ)!=0 && (oredChars&0x200c)==0x200c && + !isLabelOkContextJ(label, labelLength) + ) { + info.labelErrors|=UIDNA_ERROR_CONTEXTJ; + } + if((options&UIDNA_CHECK_CONTEXTO)!=0 && oredChars>=0xb7) { + checkLabelContextO(label, labelLength, info); + } + if(toASCII) { + if(wasPunycode) { + // Leave a Punycode label unchanged if it has no severe errors. + if(destLabelLength>63) { + info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; + } + return destLabelLength; + } else if(oredChars>=0x80) { + // Contains non-ASCII characters. + UnicodeString punycode; + UChar *buffer=punycode.getBuffer(63); // 63==maximum DNS label length + if(buffer==NULL) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + return destLabelLength; + } + buffer[0]=0x78; // Write "xn--". + buffer[1]=0x6e; + buffer[2]=0x2d; + buffer[3]=0x2d; + int32_t punycodeLength=u_strToPunycode(label, labelLength, + buffer+4, punycode.getCapacity()-4, + NULL, &errorCode); + if(errorCode==U_BUFFER_OVERFLOW_ERROR) { + errorCode=U_ZERO_ERROR; + punycode.releaseBuffer(4); + buffer=punycode.getBuffer(4+punycodeLength); + if(buffer==NULL) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + return destLabelLength; + } + punycodeLength=u_strToPunycode(label, labelLength, + buffer+4, punycode.getCapacity()-4, + NULL, &errorCode); + } + punycodeLength+=4; + punycode.releaseBuffer(punycodeLength); + if(U_FAILURE(errorCode)) { + return destLabelLength; + } + if(punycodeLength>63) { + info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; + } + return replaceLabel(dest, destLabelStart, destLabelLength, + punycode, punycodeLength, errorCode); + } else { + // all-ASCII label + if(labelLength>63) { + info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; + } + } + } + } else { + // If a Punycode label has severe errors, + // then leave it but make sure it does not look valid. + if(wasPunycode) { + info.labelErrors|=UIDNA_ERROR_INVALID_ACE_LABEL; + return markBadACELabel(dest, destLabelStart, destLabelLength, toASCII, info, errorCode); + } + } + return replaceLabel(dest, destLabelStart, destLabelLength, + *labelString, labelLength, errorCode); +} + +// Make sure an ACE label does not look valid. +// Append U+FFFD if the label has only LDH characters. +// If UIDNA_USE_STD3_RULES, also replace disallowed ASCII characters with U+FFFD. +int32_t +UTS46::markBadACELabel(UnicodeString &dest, + int32_t labelStart, int32_t labelLength, + UBool toASCII, IDNAInfo &info, UErrorCode &errorCode) const { + if(U_FAILURE(errorCode)) { + return 0; + } + UBool disallowNonLDHDot=(options&UIDNA_USE_STD3_RULES)!=0; + UBool isASCII=TRUE; + UBool onlyLDH=TRUE; + const UChar *label=dest.getBuffer()+labelStart; + const UChar *limit=label+labelLength; + // Start after the initial "xn--". + // Ok to cast away const because we own the UnicodeString. + for(UChar *s=const_cast<UChar *>(label+4); s<limit; ++s) { + UChar c=*s; + if(c<=0x7f) { + if(c==0x2e) { + info.labelErrors|=UIDNA_ERROR_LABEL_HAS_DOT; + *s=0xfffd; + isASCII=onlyLDH=FALSE; + } else if(asciiData[c]<0) { + onlyLDH=FALSE; + if(disallowNonLDHDot) { + *s=0xfffd; + isASCII=FALSE; + } + } + } else { + isASCII=onlyLDH=FALSE; + } + } + if(onlyLDH) { + dest.insert(labelStart+labelLength, (UChar)0xfffd); + if(dest.isBogus()) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + return 0; + } + ++labelLength; + } else { + if(toASCII && isASCII && labelLength>63) { + info.labelErrors|=UIDNA_ERROR_LABEL_TOO_LONG; + } + } + return labelLength; +} + +const uint32_t L_MASK=U_MASK(U_LEFT_TO_RIGHT); +const uint32_t R_AL_MASK=U_MASK(U_RIGHT_TO_LEFT)|U_MASK(U_RIGHT_TO_LEFT_ARABIC); +const uint32_t L_R_AL_MASK=L_MASK|R_AL_MASK; + +const uint32_t R_AL_AN_MASK=R_AL_MASK|U_MASK(U_ARABIC_NUMBER); + +const uint32_t EN_AN_MASK=U_MASK(U_EUROPEAN_NUMBER)|U_MASK(U_ARABIC_NUMBER); +const uint32_t R_AL_EN_AN_MASK=R_AL_MASK|EN_AN_MASK; +const uint32_t L_EN_MASK=L_MASK|U_MASK(U_EUROPEAN_NUMBER); + +const uint32_t ES_CS_ET_ON_BN_NSM_MASK= + U_MASK(U_EUROPEAN_NUMBER_SEPARATOR)| + U_MASK(U_COMMON_NUMBER_SEPARATOR)| + U_MASK(U_EUROPEAN_NUMBER_TERMINATOR)| + U_MASK(U_OTHER_NEUTRAL)| + U_MASK(U_BOUNDARY_NEUTRAL)| + U_MASK(U_DIR_NON_SPACING_MARK); +const uint32_t L_EN_ES_CS_ET_ON_BN_NSM_MASK=L_EN_MASK|ES_CS_ET_ON_BN_NSM_MASK; +const uint32_t R_AL_AN_EN_ES_CS_ET_ON_BN_NSM_MASK=R_AL_MASK|EN_AN_MASK|ES_CS_ET_ON_BN_NSM_MASK; + +// We scan the whole label and check both for whether it contains RTL characters +// and whether it passes the BiDi Rule. +// In a BiDi domain name, all labels must pass the BiDi Rule, but we might find +// that a domain name is a BiDi domain name (has an RTL label) only after +// processing several earlier labels. +void +UTS46::checkLabelBiDi(const UChar *label, int32_t labelLength, IDNAInfo &info) const { + // IDNA2008 BiDi rule + // Get the directionality of the first character. + UChar32 c; + int32_t i=0; + U16_NEXT_UNSAFE(label, i, c); + uint32_t firstMask=U_MASK(u_charDirection(c)); + // 1. The first character must be a character with BIDI property L, R + // or AL. If it has the R or AL property, it is an RTL label; if it + // has the L property, it is an LTR label. + if((firstMask&~L_R_AL_MASK)!=0) { + info.isOkBiDi=FALSE; + } + // Get the directionality of the last non-NSM character. + uint32_t lastMask; + for(;;) { + if(i>=labelLength) { + lastMask=firstMask; + break; + } + U16_PREV_UNSAFE(label, labelLength, c); + UCharDirection dir=u_charDirection(c); + if(dir!=U_DIR_NON_SPACING_MARK) { + lastMask=U_MASK(dir); + break; + } + } + // 3. In an RTL label, the end of the label must be a character with + // BIDI property R, AL, EN or AN, followed by zero or more + // characters with BIDI property NSM. + // 6. In an LTR label, the end of the label must be a character with + // BIDI property L or EN, followed by zero or more characters with + // BIDI property NSM. + if( (firstMask&L_MASK)!=0 ? + (lastMask&~L_EN_MASK)!=0 : + (lastMask&~R_AL_EN_AN_MASK)!=0 + ) { + info.isOkBiDi=FALSE; + } + // Add the directionalities of the intervening characters. + uint32_t mask=firstMask|lastMask; + while(i<labelLength) { + U16_NEXT_UNSAFE(label, i, c); + mask|=U_MASK(u_charDirection(c)); + } + if(firstMask&L_MASK) { + // 5. In an LTR label, only characters with the BIDI properties L, EN, + // ES, CS, ET, ON, BN and NSM are allowed. + if((mask&~L_EN_ES_CS_ET_ON_BN_NSM_MASK)!=0) { + info.isOkBiDi=FALSE; + } + } else { + // 2. In an RTL label, only characters with the BIDI properties R, AL, + // AN, EN, ES, CS, ET, ON, BN and NSM are allowed. + if((mask&~R_AL_AN_EN_ES_CS_ET_ON_BN_NSM_MASK)!=0) { + info.isOkBiDi=FALSE; + } + // 4. In an RTL label, if an EN is present, no AN may be present, and + // vice versa. + if((mask&EN_AN_MASK)==EN_AN_MASK) { + info.isOkBiDi=FALSE; + } + } + // An RTL label is a label that contains at least one character of type + // R, AL or AN. [...] + // A "BIDI domain name" is a domain name that contains at least one RTL + // label. [...] + // The following rule, consisting of six conditions, applies to labels + // in BIDI domain names. + if((mask&R_AL_AN_MASK)!=0) { + info.isBiDi=TRUE; + } +} + +// Special code for the ASCII prefix of a BiDi domain name. +// The ASCII prefix is all-LTR. + +// IDNA2008 BiDi rule, parts relevant to ASCII labels: +// 1. The first character must be a character with BIDI property L [...] +// 5. In an LTR label, only characters with the BIDI properties L, EN, +// ES, CS, ET, ON, BN and NSM are allowed. +// 6. In an LTR label, the end of the label must be a character with +// BIDI property L or EN [...] + +// UTF-16 version, called for mapped ASCII prefix. +// Cannot contain uppercase A-Z. +// s[length-1] must be the trailing dot. +static UBool +isASCIIOkBiDi(const UChar *s, int32_t length) { + int32_t labelStart=0; + for(int32_t i=0; i<length; ++i) { + UChar c=s[i]; + if(c==0x2e) { // dot + if(i>labelStart) { + c=s[i-1]; + if(!(0x61<=c && c<=0x7a) && !(0x30<=c && c<=0x39)) { + // Last character in the label is not an L or EN. + return FALSE; + } + } + labelStart=i+1; + } else if(i==labelStart) { + if(!(0x61<=c && c<=0x7a)) { + // First character in the label is not an L. + return FALSE; + } + } else { + if(c<=0x20 && (c>=0x1c || (9<=c && c<=0xd))) { + // Intermediate character in the label is a B, S or WS. + return FALSE; + } + } + } + return TRUE; +} + +// UTF-8 version, called for source ASCII prefix. +// Can contain uppercase A-Z. +// s[length-1] must be the trailing dot. +static UBool +isASCIIOkBiDi(const char *s, int32_t length) { + int32_t labelStart=0; + for(int32_t i=0; i<length; ++i) { + char c=s[i]; + if(c==0x2e) { // dot + if(i>labelStart) { + c=s[i-1]; + if(!(0x61<=c && c<=0x7a) && !(0x41<=c && c<=0x5a) && !(0x30<=c && c<=0x39)) { + // Last character in the label is not an L or EN. + return FALSE; + } + } + labelStart=i+1; + } else if(i==labelStart) { + if(!(0x61<=c && c<=0x7a) && !(0x41<=c && c<=0x5a)) { + // First character in the label is not an L. + return FALSE; + } + } else { + if(c<=0x20 && (c>=0x1c || (9<=c && c<=0xd))) { + // Intermediate character in the label is a B, S or WS. + return FALSE; + } + } + } + return TRUE; +} + +UBool +UTS46::isLabelOkContextJ(const UChar *label, int32_t labelLength) const { + // [IDNA2008-Tables] + // 200C..200D ; CONTEXTJ # ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER + for(int32_t i=0; i<labelLength; ++i) { + if(label[i]==0x200c) { + // Appendix A.1. ZERO WIDTH NON-JOINER + // Rule Set: + // False; + // If Canonical_Combining_Class(Before(cp)) .eq. Virama Then True; + // If RegExpMatch((Joining_Type:{L,D})(Joining_Type:T)*\u200C + // (Joining_Type:T)*(Joining_Type:{R,D})) Then True; + if(i==0) { + return FALSE; + } + UChar32 c; + int32_t j=i; + U16_PREV_UNSAFE(label, j, c); + if(uts46Norm2.getCombiningClass(c)==9) { + continue; + } + // check precontext (Joining_Type:{L,D})(Joining_Type:T)* + for(;;) { + UJoiningType type=ubidi_getJoiningType(c); + if(type==U_JT_TRANSPARENT) { + if(j==0) { + return FALSE; + } + U16_PREV_UNSAFE(label, j, c); + } else if(type==U_JT_LEFT_JOINING || type==U_JT_DUAL_JOINING) { + break; // precontext fulfilled + } else { + return FALSE; + } + } + // check postcontext (Joining_Type:T)*(Joining_Type:{R,D}) + for(j=i+1;;) { + if(j==labelLength) { + return FALSE; + } + U16_NEXT_UNSAFE(label, j, c); + UJoiningType type=ubidi_getJoiningType(c); + if(type==U_JT_TRANSPARENT) { + // just skip this character + } else if(type==U_JT_RIGHT_JOINING || type==U_JT_DUAL_JOINING) { + break; // postcontext fulfilled + } else { + return FALSE; + } + } + } else if(label[i]==0x200d) { + // Appendix A.2. ZERO WIDTH JOINER (U+200D) + // Rule Set: + // False; + // If Canonical_Combining_Class(Before(cp)) .eq. Virama Then True; + if(i==0) { + return FALSE; + } + UChar32 c; + int32_t j=i; + U16_PREV_UNSAFE(label, j, c); + if(uts46Norm2.getCombiningClass(c)!=9) { + return FALSE; + } + } + } + return TRUE; +} + +void +UTS46::checkLabelContextO(const UChar *label, int32_t labelLength, IDNAInfo &info) const { + int32_t labelEnd=labelLength-1; // inclusive + int32_t arabicDigits=0; // -1 for 066x, +1 for 06Fx + for(int32_t i=0; i<=labelEnd; ++i) { + UChar32 c=label[i]; + if(c<0xb7) { + // ASCII fastpath + } else if(c<=0x6f9) { + if(c==0xb7) { + // Appendix A.3. MIDDLE DOT (U+00B7) + // Rule Set: + // False; + // If Before(cp) .eq. U+006C And + // After(cp) .eq. U+006C Then True; + if(!(0<i && label[i-1]==0x6c && + i<labelEnd && label[i+1]==0x6c)) { + info.labelErrors|=UIDNA_ERROR_CONTEXTO_PUNCTUATION; + } + } else if(c==0x375) { + // Appendix A.4. GREEK LOWER NUMERAL SIGN (KERAIA) (U+0375) + // Rule Set: + // False; + // If Script(After(cp)) .eq. Greek Then True; + UScriptCode script=USCRIPT_INVALID_CODE; + if(i<labelEnd) { + UErrorCode errorCode=U_ZERO_ERROR; + int32_t j=i+1; + U16_NEXT(label, j, labelLength, c); + script=uscript_getScript(c, &errorCode); + } + if(script!=USCRIPT_GREEK) { + info.labelErrors|=UIDNA_ERROR_CONTEXTO_PUNCTUATION; + } + } else if(c==0x5f3 || c==0x5f4) { + // Appendix A.5. HEBREW PUNCTUATION GERESH (U+05F3) + // Rule Set: + // False; + // If Script(Before(cp)) .eq. Hebrew Then True; + // + // Appendix A.6. HEBREW PUNCTUATION GERSHAYIM (U+05F4) + // Rule Set: + // False; + // If Script(Before(cp)) .eq. Hebrew Then True; + UScriptCode script=USCRIPT_INVALID_CODE; + if(0<i) { + UErrorCode errorCode=U_ZERO_ERROR; + int32_t j=i; + U16_PREV(label, 0, j, c); + script=uscript_getScript(c, &errorCode); + } + if(script!=USCRIPT_HEBREW) { + info.labelErrors|=UIDNA_ERROR_CONTEXTO_PUNCTUATION; + } + } else if(0x660<=c /* && c<=0x6f9 */) { + // Appendix A.8. ARABIC-INDIC DIGITS (0660..0669) + // Rule Set: + // True; + // For All Characters: + // If cp .in. 06F0..06F9 Then False; + // End For; + // + // Appendix A.9. EXTENDED ARABIC-INDIC DIGITS (06F0..06F9) + // Rule Set: + // True; + // For All Characters: + // If cp .in. 0660..0669 Then False; + // End For; + if(c<=0x669) { + if(arabicDigits>0) { + info.labelErrors|=UIDNA_ERROR_CONTEXTO_DIGITS; + } + arabicDigits=-1; + } else if(0x6f0<=c) { + if(arabicDigits<0) { + info.labelErrors|=UIDNA_ERROR_CONTEXTO_DIGITS; + } + arabicDigits=1; + } + } + } else if(c==0x30fb) { + // Appendix A.7. KATAKANA MIDDLE DOT (U+30FB) + // Rule Set: + // False; + // For All Characters: + // If Script(cp) .in. {Hiragana, Katakana, Han} Then True; + // End For; + UErrorCode errorCode=U_ZERO_ERROR; + for(int j=0;;) { + if(j>labelEnd) { + info.labelErrors|=UIDNA_ERROR_CONTEXTO_PUNCTUATION; + break; + } + U16_NEXT(label, j, labelLength, c); + UScriptCode script=uscript_getScript(c, &errorCode); + if(script==USCRIPT_HIRAGANA || script==USCRIPT_KATAKANA || script==USCRIPT_HAN) { + break; + } + } + } + } +} + +U_NAMESPACE_END + +// C API ------------------------------------------------------------------- *** + +U_NAMESPACE_USE + +U_CAPI UIDNA * U_EXPORT2 +uidna_openUTS46(uint32_t options, UErrorCode *pErrorCode) { + return reinterpret_cast<UIDNA *>(IDNA::createUTS46Instance(options, *pErrorCode)); +} + +U_CAPI void U_EXPORT2 +uidna_close(UIDNA *idna) { + delete reinterpret_cast<IDNA *>(idna); +} + +static UBool +checkArgs(const void *label, int32_t length, + void *dest, int32_t capacity, + UIDNAInfo *pInfo, UErrorCode *pErrorCode) { + if(U_FAILURE(*pErrorCode)) { + return FALSE; + } + // sizeof(UIDNAInfo)=16 in the first API version. + if(pInfo==NULL || pInfo->size<16) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return FALSE; + } + if( (label==NULL ? length!=0 : length<-1) || + (dest==NULL ? capacity!=0 : capacity<0) || + (dest==label && label!=NULL) + ) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return FALSE; + } + // Set all *pInfo bytes to 0 except for the size field itself. + uprv_memset(&pInfo->size+1, 0, pInfo->size-sizeof(pInfo->size)); + return TRUE; +} + +static void +idnaInfoToStruct(IDNAInfo &info, UIDNAInfo *pInfo) { + pInfo->isTransitionalDifferent=info.isTransitionalDifferent(); + pInfo->errors=info.getErrors(); +} + +U_CAPI int32_t U_EXPORT2 +uidna_labelToASCII(const UIDNA *idna, + const UChar *label, int32_t length, + UChar *dest, int32_t capacity, + UIDNAInfo *pInfo, UErrorCode *pErrorCode) { + if(!checkArgs(label, length, dest, capacity, pInfo, pErrorCode)) { + return 0; + } + UnicodeString src((UBool)(length<0), label, length); + UnicodeString destString(dest, 0, capacity); + IDNAInfo info; + reinterpret_cast<const IDNA *>(idna)->labelToASCII(src, destString, info, *pErrorCode); + idnaInfoToStruct(info, pInfo); + return destString.extract(dest, capacity, *pErrorCode); +} + +U_CAPI int32_t U_EXPORT2 +uidna_labelToUnicode(const UIDNA *idna, + const UChar *label, int32_t length, + UChar *dest, int32_t capacity, + UIDNAInfo *pInfo, UErrorCode *pErrorCode) { + if(!checkArgs(label, length, dest, capacity, pInfo, pErrorCode)) { + return 0; + } + UnicodeString src((UBool)(length<0), label, length); + UnicodeString destString(dest, 0, capacity); + IDNAInfo info; + reinterpret_cast<const IDNA *>(idna)->labelToUnicode(src, destString, info, *pErrorCode); + idnaInfoToStruct(info, pInfo); + return destString.extract(dest, capacity, *pErrorCode); +} + +U_CAPI int32_t U_EXPORT2 +uidna_nameToASCII(const UIDNA *idna, + const UChar *name, int32_t length, + UChar *dest, int32_t capacity, + UIDNAInfo *pInfo, UErrorCode *pErrorCode) { + if(!checkArgs(name, length, dest, capacity, pInfo, pErrorCode)) { + return 0; + } + UnicodeString src((UBool)(length<0), name, length); + UnicodeString destString(dest, 0, capacity); + IDNAInfo info; + reinterpret_cast<const IDNA *>(idna)->nameToASCII(src, destString, info, *pErrorCode); + idnaInfoToStruct(info, pInfo); + return destString.extract(dest, capacity, *pErrorCode); +} + +U_CAPI int32_t U_EXPORT2 +uidna_nameToUnicode(const UIDNA *idna, + const UChar *name, int32_t length, + UChar *dest, int32_t capacity, + UIDNAInfo *pInfo, UErrorCode *pErrorCode) { + if(!checkArgs(name, length, dest, capacity, pInfo, pErrorCode)) { + return 0; + } + UnicodeString src((UBool)(length<0), name, length); + UnicodeString destString(dest, 0, capacity); + IDNAInfo info; + reinterpret_cast<const IDNA *>(idna)->nameToUnicode(src, destString, info, *pErrorCode); + idnaInfoToStruct(info, pInfo); + return destString.extract(dest, capacity, *pErrorCode); +} + +U_CAPI int32_t U_EXPORT2 +uidna_labelToASCII_UTF8(const UIDNA *idna, + const char *label, int32_t length, + char *dest, int32_t capacity, + UIDNAInfo *pInfo, UErrorCode *pErrorCode) { + if(!checkArgs(label, length, dest, capacity, pInfo, pErrorCode)) { + return 0; + } + StringPiece src(label, length<0 ? static_cast<int32_t>(uprv_strlen(label)) : length); + CheckedArrayByteSink sink(dest, capacity); + IDNAInfo info; + reinterpret_cast<const IDNA *>(idna)->labelToASCII_UTF8(src, sink, info, *pErrorCode); + idnaInfoToStruct(info, pInfo); + return u_terminateChars(dest, capacity, sink.NumberOfBytesAppended(), pErrorCode); +} + +U_CAPI int32_t U_EXPORT2 +uidna_labelToUnicodeUTF8(const UIDNA *idna, + const char *label, int32_t length, + char *dest, int32_t capacity, + UIDNAInfo *pInfo, UErrorCode *pErrorCode) { + if(!checkArgs(label, length, dest, capacity, pInfo, pErrorCode)) { + return 0; + } + StringPiece src(label, length<0 ? static_cast<int32_t>(uprv_strlen(label)) : length); + CheckedArrayByteSink sink(dest, capacity); + IDNAInfo info; + reinterpret_cast<const IDNA *>(idna)->labelToUnicodeUTF8(src, sink, info, *pErrorCode); + idnaInfoToStruct(info, pInfo); + return u_terminateChars(dest, capacity, sink.NumberOfBytesAppended(), pErrorCode); +} + +U_CAPI int32_t U_EXPORT2 +uidna_nameToASCII_UTF8(const UIDNA *idna, + const char *name, int32_t length, + char *dest, int32_t capacity, + UIDNAInfo *pInfo, UErrorCode *pErrorCode) { + if(!checkArgs(name, length, dest, capacity, pInfo, pErrorCode)) { + return 0; + } + StringPiece src(name, length<0 ? static_cast<int32_t>(uprv_strlen(name)) : length); + CheckedArrayByteSink sink(dest, capacity); + IDNAInfo info; + reinterpret_cast<const IDNA *>(idna)->nameToASCII_UTF8(src, sink, info, *pErrorCode); + idnaInfoToStruct(info, pInfo); + return u_terminateChars(dest, capacity, sink.NumberOfBytesAppended(), pErrorCode); +} + +U_CAPI int32_t U_EXPORT2 +uidna_nameToUnicodeUTF8(const UIDNA *idna, + const char *name, int32_t length, + char *dest, int32_t capacity, + UIDNAInfo *pInfo, UErrorCode *pErrorCode) { + if(!checkArgs(name, length, dest, capacity, pInfo, pErrorCode)) { + return 0; + } + StringPiece src(name, length<0 ? static_cast<int32_t>(uprv_strlen(name)) : length); + CheckedArrayByteSink sink(dest, capacity); + IDNAInfo info; + reinterpret_cast<const IDNA *>(idna)->nameToUnicodeUTF8(src, sink, info, *pErrorCode); + idnaInfoToStruct(info, pInfo); + return u_terminateChars(dest, capacity, sink.NumberOfBytesAppended(), pErrorCode); +} + +#endif // UCONFIG_NO_IDNA diff --git a/thirdparty/icu4c/common/utypeinfo.h b/thirdparty/icu4c/common/utypeinfo.h new file mode 100644 index 0000000000..c6663734fc --- /dev/null +++ b/thirdparty/icu4c/common/utypeinfo.h @@ -0,0 +1,32 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 2012-2016, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +*/ + +#ifndef __UTYPEINFO_H__ +#define __UTYPEINFO_H__ + +// Windows header <typeinfo> does not define 'exception' in 'std' namespace. +// Therefore, a project using ICU cannot be compiled with _HAS_EXCEPTIONS +// set to 0 on Windows with Visual Studio. To work around that, we have to +// include <exception> explicitly and add using statement below. +// Whenever 'typeid' is used, this header has to be included +// instead of <typeinfo>. +// Visual Studio 10 emits warning 4275 with this change. If you compile +// with exception disabled, you have to suppress warning 4275. +#if defined(_MSC_VER) && _HAS_EXCEPTIONS == 0 +#include <exception> +using std::exception; +#endif +#if defined(__GLIBCXX__) +namespace std { class type_info; } // WORKAROUND: http://llvm.org/bugs/show_bug.cgi?id=13364 +#endif +#include <typeinfo> // for 'typeid' to work + +#endif diff --git a/thirdparty/icu4c/common/utypes.cpp b/thirdparty/icu4c/common/utypes.cpp new file mode 100644 index 0000000000..63e05b1249 --- /dev/null +++ b/thirdparty/icu4c/common/utypes.cpp @@ -0,0 +1,227 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1997-2015, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* +* FILE NAME : utypes.c (previously putil.c) +* +* Date Name Description +* 10/07/2004 grhoten split from putil.c +****************************************************************************** +*/ + +#include "unicode/utypes.h" + +/* u_errorName() ------------------------------------------------------------ */ + +static const char * const +_uErrorInfoName[U_ERROR_WARNING_LIMIT-U_ERROR_WARNING_START]={ + "U_USING_FALLBACK_WARNING", + "U_USING_DEFAULT_WARNING", + "U_SAFECLONE_ALLOCATED_WARNING", + "U_STATE_OLD_WARNING", + "U_STRING_NOT_TERMINATED_WARNING", + "U_SORT_KEY_TOO_SHORT_WARNING", + "U_AMBIGUOUS_ALIAS_WARNING", + "U_DIFFERENT_UCA_VERSION", + "U_PLUGIN_CHANGED_LEVEL_WARNING", +}; + +static const char * const +_uTransErrorName[U_PARSE_ERROR_LIMIT - U_PARSE_ERROR_START]={ + "U_BAD_VARIABLE_DEFINITION", + "U_MALFORMED_RULE", + "U_MALFORMED_SET", + "U_MALFORMED_SYMBOL_REFERENCE", + "U_MALFORMED_UNICODE_ESCAPE", + "U_MALFORMED_VARIABLE_DEFINITION", + "U_MALFORMED_VARIABLE_REFERENCE", + "U_MISMATCHED_SEGMENT_DELIMITERS", + "U_MISPLACED_ANCHOR_START", + "U_MISPLACED_CURSOR_OFFSET", + "U_MISPLACED_QUANTIFIER", + "U_MISSING_OPERATOR", + "U_MISSING_SEGMENT_CLOSE", + "U_MULTIPLE_ANTE_CONTEXTS", + "U_MULTIPLE_CURSORS", + "U_MULTIPLE_POST_CONTEXTS", + "U_TRAILING_BACKSLASH", + "U_UNDEFINED_SEGMENT_REFERENCE", + "U_UNDEFINED_VARIABLE", + "U_UNQUOTED_SPECIAL", + "U_UNTERMINATED_QUOTE", + "U_RULE_MASK_ERROR", + "U_MISPLACED_COMPOUND_FILTER", + "U_MULTIPLE_COMPOUND_FILTERS", + "U_INVALID_RBT_SYNTAX", + "U_INVALID_PROPERTY_PATTERN", + "U_MALFORMED_PRAGMA", + "U_UNCLOSED_SEGMENT", + "U_ILLEGAL_CHAR_IN_SEGMENT", + "U_VARIABLE_RANGE_EXHAUSTED", + "U_VARIABLE_RANGE_OVERLAP", + "U_ILLEGAL_CHARACTER", + "U_INTERNAL_TRANSLITERATOR_ERROR", + "U_INVALID_ID", + "U_INVALID_FUNCTION" +}; + +static const char * const +_uErrorName[U_STANDARD_ERROR_LIMIT]={ + "U_ZERO_ERROR", + + "U_ILLEGAL_ARGUMENT_ERROR", + "U_MISSING_RESOURCE_ERROR", + "U_INVALID_FORMAT_ERROR", + "U_FILE_ACCESS_ERROR", + "U_INTERNAL_PROGRAM_ERROR", + "U_MESSAGE_PARSE_ERROR", + "U_MEMORY_ALLOCATION_ERROR", + "U_INDEX_OUTOFBOUNDS_ERROR", + "U_PARSE_ERROR", + "U_INVALID_CHAR_FOUND", + "U_TRUNCATED_CHAR_FOUND", + "U_ILLEGAL_CHAR_FOUND", + "U_INVALID_TABLE_FORMAT", + "U_INVALID_TABLE_FILE", + "U_BUFFER_OVERFLOW_ERROR", + "U_UNSUPPORTED_ERROR", + "U_RESOURCE_TYPE_MISMATCH", + "U_ILLEGAL_ESCAPE_SEQUENCE", + "U_UNSUPPORTED_ESCAPE_SEQUENCE", + "U_NO_SPACE_AVAILABLE", + "U_CE_NOT_FOUND_ERROR", + "U_PRIMARY_TOO_LONG_ERROR", + "U_STATE_TOO_OLD_ERROR", + "U_TOO_MANY_ALIASES_ERROR", + "U_ENUM_OUT_OF_SYNC_ERROR", + "U_INVARIANT_CONVERSION_ERROR", + "U_INVALID_STATE_ERROR", + "U_COLLATOR_VERSION_MISMATCH", + "U_USELESS_COLLATOR_ERROR", + "U_NO_WRITE_PERMISSION", + "U_INPUT_TOO_LONG_ERROR" +}; +static const char * const +_uFmtErrorName[U_FMT_PARSE_ERROR_LIMIT - U_FMT_PARSE_ERROR_START] = { + "U_UNEXPECTED_TOKEN", + "U_MULTIPLE_DECIMAL_SEPARATORS", + "U_MULTIPLE_EXPONENTIAL_SYMBOLS", + "U_MALFORMED_EXPONENTIAL_PATTERN", + "U_MULTIPLE_PERCENT_SYMBOLS", + "U_MULTIPLE_PERMILL_SYMBOLS", + "U_MULTIPLE_PAD_SPECIFIERS", + "U_PATTERN_SYNTAX_ERROR", + "U_ILLEGAL_PAD_POSITION", + "U_UNMATCHED_BRACES", + "U_UNSUPPORTED_PROPERTY", + "U_UNSUPPORTED_ATTRIBUTE", + "U_ARGUMENT_TYPE_MISMATCH", + "U_DUPLICATE_KEYWORD", + "U_UNDEFINED_KEYWORD", + "U_DEFAULT_KEYWORD_MISSING", + "U_DECIMAL_NUMBER_SYNTAX_ERROR", + "U_FORMAT_INEXACT_ERROR", + "U_NUMBER_ARG_OUTOFBOUNDS_ERROR", + "U_NUMBER_SKELETON_SYNTAX_ERROR", +}; + +static const char * const +_uBrkErrorName[U_BRK_ERROR_LIMIT - U_BRK_ERROR_START] = { + "U_BRK_INTERNAL_ERROR", + "U_BRK_HEX_DIGITS_EXPECTED", + "U_BRK_SEMICOLON_EXPECTED", + "U_BRK_RULE_SYNTAX", + "U_BRK_UNCLOSED_SET", + "U_BRK_ASSIGN_ERROR", + "U_BRK_VARIABLE_REDFINITION", + "U_BRK_MISMATCHED_PAREN", + "U_BRK_NEW_LINE_IN_QUOTED_STRING", + "U_BRK_UNDEFINED_VARIABLE", + "U_BRK_INIT_ERROR", + "U_BRK_RULE_EMPTY_SET", + "U_BRK_UNRECOGNIZED_OPTION", + "U_BRK_MALFORMED_RULE_TAG" +}; + +static const char * const +_uRegexErrorName[U_REGEX_ERROR_LIMIT - U_REGEX_ERROR_START] = { + "U_REGEX_INTERNAL_ERROR", + "U_REGEX_RULE_SYNTAX", + "U_REGEX_INVALID_STATE", + "U_REGEX_BAD_ESCAPE_SEQUENCE", + "U_REGEX_PROPERTY_SYNTAX", + "U_REGEX_UNIMPLEMENTED", + "U_REGEX_MISMATCHED_PAREN", + "U_REGEX_NUMBER_TOO_BIG", + "U_REGEX_BAD_INTERVAL", + "U_REGEX_MAX_LT_MIN", + "U_REGEX_INVALID_BACK_REF", + "U_REGEX_INVALID_FLAG", + "U_REGEX_LOOK_BEHIND_LIMIT", + "U_REGEX_SET_CONTAINS_STRING", + "U_REGEX_OCTAL_TOO_BIG", + "U_REGEX_MISSING_CLOSE_BRACKET", + "U_REGEX_INVALID_RANGE", + "U_REGEX_STACK_OVERFLOW", + "U_REGEX_TIME_OUT", + "U_REGEX_STOPPED_BY_CALLER", + "U_REGEX_PATTERN_TOO_BIG", + "U_REGEX_INVALID_CAPTURE_GROUP_NAME" +}; + +static const char * const +_uIDNAErrorName[U_IDNA_ERROR_LIMIT - U_IDNA_ERROR_START] = { + "U_STRINGPREP_PROHIBITED_ERROR", + "U_STRINGPREP_UNASSIGNED_ERROR", + "U_STRINGPREP_CHECK_BIDI_ERROR", + "U_IDNA_STD3_ASCII_RULES_ERROR", + "U_IDNA_ACE_PREFIX_ERROR", + "U_IDNA_VERIFICATION_ERROR", + "U_IDNA_LABEL_TOO_LONG_ERROR", + "U_IDNA_ZERO_LENGTH_LABEL_ERROR", + "U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR" +}; + +static const char * const +_uPluginErrorName[U_PLUGIN_ERROR_LIMIT - U_PLUGIN_ERROR_START] = { + "U_PLUGIN_TOO_HIGH", + "U_PLUGIN_DIDNT_SET_LEVEL", +}; + +U_CAPI const char * U_EXPORT2 +u_errorName(UErrorCode code) { + if(U_ZERO_ERROR <= code && code < U_STANDARD_ERROR_LIMIT) { + return _uErrorName[code]; + } else if(U_ERROR_WARNING_START <= code && code < U_ERROR_WARNING_LIMIT) { + return _uErrorInfoName[code - U_ERROR_WARNING_START]; + } else if(U_PARSE_ERROR_START <= code && code < U_PARSE_ERROR_LIMIT){ + return _uTransErrorName[code - U_PARSE_ERROR_START]; + } else if(U_FMT_PARSE_ERROR_START <= code && code < U_FMT_PARSE_ERROR_LIMIT){ + return _uFmtErrorName[code - U_FMT_PARSE_ERROR_START]; + } else if (U_BRK_ERROR_START <= code && code < U_BRK_ERROR_LIMIT){ + return _uBrkErrorName[code - U_BRK_ERROR_START]; + } else if (U_REGEX_ERROR_START <= code && code < U_REGEX_ERROR_LIMIT) { + return _uRegexErrorName[code - U_REGEX_ERROR_START]; + } else if(U_IDNA_ERROR_START <= code && code < U_IDNA_ERROR_LIMIT) { + return _uIDNAErrorName[code - U_IDNA_ERROR_START]; + } else if(U_PLUGIN_ERROR_START <= code && code < U_PLUGIN_ERROR_LIMIT) { + return _uPluginErrorName[code - U_PLUGIN_ERROR_START]; + } else { + return "[BOGUS UErrorCode]"; + } +} + +/* + * Hey, Emacs, please set the following: + * + * Local Variables: + * indent-tabs-mode: nil + * End: + * + */ diff --git a/thirdparty/icu4c/common/uvector.cpp b/thirdparty/icu4c/common/uvector.cpp new file mode 100644 index 0000000000..cf19edf646 --- /dev/null +++ b/thirdparty/icu4c/common/uvector.cpp @@ -0,0 +1,567 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* Copyright (C) 1999-2013, International Business Machines Corporation and +* others. All Rights Reserved. +****************************************************************************** +* Date Name Description +* 10/22/99 alan Creation. +********************************************************************** +*/ + +#include "uvector.h" +#include "cmemory.h" +#include "uarrsort.h" +#include "uelement.h" + +U_NAMESPACE_BEGIN + +#define DEFAULT_CAPACITY 8 + +/* + * Constants for hinting whether a key is an integer + * or a pointer. If a hint bit is zero, then the associated + * token is assumed to be an integer. This is needed for iSeries + */ +#define HINT_KEY_POINTER (1) +#define HINT_KEY_INTEGER (0) + +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UVector) + +UVector::UVector(UErrorCode &status) : + count(0), + capacity(0), + elements(0), + deleter(0), + comparer(0) +{ + _init(DEFAULT_CAPACITY, status); +} + +UVector::UVector(int32_t initialCapacity, UErrorCode &status) : + count(0), + capacity(0), + elements(0), + deleter(0), + comparer(0) +{ + _init(initialCapacity, status); +} + +UVector::UVector(UObjectDeleter *d, UElementsAreEqual *c, UErrorCode &status) : + count(0), + capacity(0), + elements(0), + deleter(d), + comparer(c) +{ + _init(DEFAULT_CAPACITY, status); +} + +UVector::UVector(UObjectDeleter *d, UElementsAreEqual *c, int32_t initialCapacity, UErrorCode &status) : + count(0), + capacity(0), + elements(0), + deleter(d), + comparer(c) +{ + _init(initialCapacity, status); +} + +void UVector::_init(int32_t initialCapacity, UErrorCode &status) { + if (U_FAILURE(status)) { + return; + } + // Fix bogus initialCapacity values; avoid malloc(0) and integer overflow + if ((initialCapacity < 1) || (initialCapacity > (int32_t)(INT32_MAX / sizeof(UElement)))) { + initialCapacity = DEFAULT_CAPACITY; + } + elements = (UElement *)uprv_malloc(sizeof(UElement)*initialCapacity); + if (elements == 0) { + status = U_MEMORY_ALLOCATION_ERROR; + } else { + capacity = initialCapacity; + } +} + +UVector::~UVector() { + removeAllElements(); + uprv_free(elements); + elements = 0; +} + +/** + * Assign this object to another (make this a copy of 'other'). + * Use the 'assign' function to assign each element. + */ +void UVector::assign(const UVector& other, UElementAssigner *assign, UErrorCode &ec) { + if (ensureCapacity(other.count, ec)) { + setSize(other.count, ec); + if (U_SUCCESS(ec)) { + for (int32_t i=0; i<other.count; ++i) { + if (elements[i].pointer != 0 && deleter != 0) { + (*deleter)(elements[i].pointer); + } + (*assign)(&elements[i], &other.elements[i]); + } + } + } +} + +// This only does something sensible if this object has a non-null comparer +UBool UVector::operator==(const UVector& other) { + int32_t i; + if (count != other.count) return FALSE; + if (comparer != NULL) { + // Compare using this object's comparer + for (i=0; i<count; ++i) { + if (!(*comparer)(elements[i], other.elements[i])) { + return FALSE; + } + } + } + return TRUE; +} + +void UVector::addElement(void* obj, UErrorCode &status) { + if (ensureCapacity(count + 1, status)) { + elements[count++].pointer = obj; + } +} + +void UVector::addElement(int32_t elem, UErrorCode &status) { + if (ensureCapacity(count + 1, status)) { + elements[count].pointer = NULL; // Pointers may be bigger than ints. + elements[count].integer = elem; + count++; + } +} + +void UVector::setElementAt(void* obj, int32_t index) { + if (0 <= index && index < count) { + if (elements[index].pointer != 0 && deleter != 0) { + (*deleter)(elements[index].pointer); + } + elements[index].pointer = obj; + } + /* else index out of range */ +} + +void UVector::setElementAt(int32_t elem, int32_t index) { + if (0 <= index && index < count) { + if (elements[index].pointer != 0 && deleter != 0) { + // TODO: this should be an error. mixing up ints and pointers. + (*deleter)(elements[index].pointer); + } + elements[index].pointer = NULL; + elements[index].integer = elem; + } + /* else index out of range */ +} + +void UVector::insertElementAt(void* obj, int32_t index, UErrorCode &status) { + // must have 0 <= index <= count + if (0 <= index && index <= count && ensureCapacity(count + 1, status)) { + for (int32_t i=count; i>index; --i) { + elements[i] = elements[i-1]; + } + elements[index].pointer = obj; + ++count; + } + /* else index out of range */ +} + +void UVector::insertElementAt(int32_t elem, int32_t index, UErrorCode &status) { + // must have 0 <= index <= count + if (0 <= index && index <= count && ensureCapacity(count + 1, status)) { + for (int32_t i=count; i>index; --i) { + elements[i] = elements[i-1]; + } + elements[index].pointer = NULL; + elements[index].integer = elem; + ++count; + } + /* else index out of range */ +} + +void* UVector::elementAt(int32_t index) const { + return (0 <= index && index < count) ? elements[index].pointer : 0; +} + +int32_t UVector::elementAti(int32_t index) const { + return (0 <= index && index < count) ? elements[index].integer : 0; +} + +UBool UVector::containsAll(const UVector& other) const { + for (int32_t i=0; i<other.size(); ++i) { + if (indexOf(other.elements[i]) < 0) { + return FALSE; + } + } + return TRUE; +} + +UBool UVector::containsNone(const UVector& other) const { + for (int32_t i=0; i<other.size(); ++i) { + if (indexOf(other.elements[i]) >= 0) { + return FALSE; + } + } + return TRUE; +} + +UBool UVector::removeAll(const UVector& other) { + UBool changed = FALSE; + for (int32_t i=0; i<other.size(); ++i) { + int32_t j = indexOf(other.elements[i]); + if (j >= 0) { + removeElementAt(j); + changed = TRUE; + } + } + return changed; +} + +UBool UVector::retainAll(const UVector& other) { + UBool changed = FALSE; + for (int32_t j=size()-1; j>=0; --j) { + int32_t i = other.indexOf(elements[j]); + if (i < 0) { + removeElementAt(j); + changed = TRUE; + } + } + return changed; +} + +void UVector::removeElementAt(int32_t index) { + void* e = orphanElementAt(index); + if (e != 0 && deleter != 0) { + (*deleter)(e); + } +} + +UBool UVector::removeElement(void* obj) { + int32_t i = indexOf(obj); + if (i >= 0) { + removeElementAt(i); + return TRUE; + } + return FALSE; +} + +void UVector::removeAllElements(void) { + if (deleter != 0) { + for (int32_t i=0; i<count; ++i) { + if (elements[i].pointer != 0) { + (*deleter)(elements[i].pointer); + } + } + } + count = 0; +} + +UBool UVector::equals(const UVector &other) const { + int i; + + if (this->count != other.count) { + return FALSE; + } + if (comparer == 0) { + for (i=0; i<count; i++) { + if (elements[i].pointer != other.elements[i].pointer) { + return FALSE; + } + } + } else { + UElement key; + for (i=0; i<count; i++) { + key.pointer = &other.elements[i]; + if (!(*comparer)(key, elements[i])) { + return FALSE; + } + } + } + return TRUE; +} + + + +int32_t UVector::indexOf(void* obj, int32_t startIndex) const { + UElement key; + key.pointer = obj; + return indexOf(key, startIndex, HINT_KEY_POINTER); +} + +int32_t UVector::indexOf(int32_t obj, int32_t startIndex) const { + UElement key; + key.integer = obj; + return indexOf(key, startIndex, HINT_KEY_INTEGER); +} + +// This only works if this object has a non-null comparer +int32_t UVector::indexOf(UElement key, int32_t startIndex, int8_t hint) const { + int32_t i; + if (comparer != 0) { + for (i=startIndex; i<count; ++i) { + if ((*comparer)(key, elements[i])) { + return i; + } + } + } else { + for (i=startIndex; i<count; ++i) { + /* Pointers are not always the same size as ints so to perform + * a valid comparision we need to know whether we are being + * provided an int or a pointer. */ + if (hint & HINT_KEY_POINTER) { + if (key.pointer == elements[i].pointer) { + return i; + } + } else { + if (key.integer == elements[i].integer) { + return i; + } + } + } + } + return -1; +} + +UBool UVector::ensureCapacity(int32_t minimumCapacity, UErrorCode &status) { + if (minimumCapacity < 0) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return FALSE; + } + if (capacity < minimumCapacity) { + if (capacity > (INT32_MAX - 1) / 2) { // integer overflow check + status = U_ILLEGAL_ARGUMENT_ERROR; + return FALSE; + } + int32_t newCap = capacity * 2; + if (newCap < minimumCapacity) { + newCap = minimumCapacity; + } + if (newCap > (int32_t)(INT32_MAX / sizeof(UElement))) { // integer overflow check + // We keep the original memory contents on bad minimumCapacity. + status = U_ILLEGAL_ARGUMENT_ERROR; + return FALSE; + } + UElement* newElems = (UElement *)uprv_realloc(elements, sizeof(UElement)*newCap); + if (newElems == NULL) { + // We keep the original contents on the memory failure on realloc or bad minimumCapacity. + status = U_MEMORY_ALLOCATION_ERROR; + return FALSE; + } + elements = newElems; + capacity = newCap; + } + return TRUE; +} + +/** + * Change the size of this vector as follows: If newSize is smaller, + * then truncate the array, possibly deleting held elements for i >= + * newSize. If newSize is larger, grow the array, filling in new + * slots with NULL. + */ +void UVector::setSize(int32_t newSize, UErrorCode &status) { + int32_t i; + if (newSize < 0) { + return; + } + if (newSize > count) { + if (!ensureCapacity(newSize, status)) { + return; + } + UElement empty; + empty.pointer = NULL; + empty.integer = 0; + for (i=count; i<newSize; ++i) { + elements[i] = empty; + } + } else { + /* Most efficient to count down */ + for (i=count-1; i>=newSize; --i) { + removeElementAt(i); + } + } + count = newSize; +} + +/** + * Fill in the given array with all elements of this vector. + */ +void** UVector::toArray(void** result) const { + void** a = result; + for (int i=0; i<count; ++i) { + *a++ = elements[i].pointer; + } + return result; +} + +UObjectDeleter *UVector::setDeleter(UObjectDeleter *d) { + UObjectDeleter *old = deleter; + deleter = d; + return old; +} + +UElementsAreEqual *UVector::setComparer(UElementsAreEqual *d) { + UElementsAreEqual *old = comparer; + comparer = d; + return old; +} + +/** + * Removes the element at the given index from this vector and + * transfer ownership of it to the caller. After this call, the + * caller owns the result and must delete it and the vector entry + * at 'index' is removed, shifting all subsequent entries back by + * one index and shortening the size of the vector by one. If the + * index is out of range or if there is no item at the given index + * then 0 is returned and the vector is unchanged. + */ +void* UVector::orphanElementAt(int32_t index) { + void* e = 0; + if (0 <= index && index < count) { + e = elements[index].pointer; + for (int32_t i=index; i<count-1; ++i) { + elements[i] = elements[i+1]; + } + --count; + } + /* else index out of range */ + return e; +} + +/** + * Insert the given object into this vector at its sorted position + * as defined by 'compare'. The current elements are assumed to + * be sorted already. + */ +void UVector::sortedInsert(void* obj, UElementComparator *compare, UErrorCode& ec) { + UElement e; + e.pointer = obj; + sortedInsert(e, compare, ec); +} + +/** + * Insert the given integer into this vector at its sorted position + * as defined by 'compare'. The current elements are assumed to + * be sorted already. + */ +void UVector::sortedInsert(int32_t obj, UElementComparator *compare, UErrorCode& ec) { + UElement e; + e.integer = obj; + sortedInsert(e, compare, ec); +} + +// ASSUME elements[] IS CURRENTLY SORTED +void UVector::sortedInsert(UElement e, UElementComparator *compare, UErrorCode& ec) { + // Perform a binary search for the location to insert tok at. Tok + // will be inserted between two elements a and b such that a <= + // tok && tok < b, where there is a 'virtual' elements[-1] always + // less than tok and a 'virtual' elements[count] always greater + // than tok. + int32_t min = 0, max = count; + while (min != max) { + int32_t probe = (min + max) / 2; + int8_t c = (*compare)(elements[probe], e); + if (c > 0) { + max = probe; + } else { + // assert(c <= 0); + min = probe + 1; + } + } + if (ensureCapacity(count + 1, ec)) { + for (int32_t i=count; i>min; --i) { + elements[i] = elements[i-1]; + } + elements[min] = e; + ++count; + } +} + +/** + * Array sort comparator function. + * Used from UVector::sort() + * Conforms to function signature required for uprv_sortArray(). + * This function is essentially just a wrapper, to make a + * UVector style comparator function usable with uprv_sortArray(). + * + * The context pointer to this function is a pointer back + * (with some extra indirection) to the user supplied comparator. + * + */ +static int32_t U_CALLCONV +sortComparator(const void *context, const void *left, const void *right) { + UElementComparator *compare = *static_cast<UElementComparator * const *>(context); + UElement e1 = *static_cast<const UElement *>(left); + UElement e2 = *static_cast<const UElement *>(right); + int32_t result = (*compare)(e1, e2); + return result; +} + + +/** + * Array sort comparison function for use from UVector::sorti() + * Compares int32_t vector elements. + */ +static int32_t U_CALLCONV +sortiComparator(const void * /*context */, const void *left, const void *right) { + const UElement *e1 = static_cast<const UElement *>(left); + const UElement *e2 = static_cast<const UElement *>(right); + int32_t result = e1->integer < e2->integer? -1 : + e1->integer == e2->integer? 0 : 1; + return result; +} + +/** + * Sort the vector, assuming it constains ints. + * (A more general sort would take a comparison function, but it's + * not clear whether UVector's UElementComparator or + * UComparator from uprv_sortAray would be more appropriate.) + */ +void UVector::sorti(UErrorCode &ec) { + if (U_SUCCESS(ec)) { + uprv_sortArray(elements, count, sizeof(UElement), + sortiComparator, NULL, FALSE, &ec); + } +} + + +/** + * Sort with a user supplied comparator. + * + * The comparator function handling is confusing because the function type + * for UVector (as defined for sortedInsert()) is different from the signature + * required by uprv_sortArray(). This is handled by passing the + * the UVector sort function pointer via the context pointer to a + * sortArray() comparator function, which can then call back to + * the original user functtion. + * + * An additional twist is that it's not safe to pass a pointer-to-function + * as a (void *) data pointer, so instead we pass a (data) pointer to a + * pointer-to-function variable. + */ +void UVector::sort(UElementComparator *compare, UErrorCode &ec) { + if (U_SUCCESS(ec)) { + uprv_sortArray(elements, count, sizeof(UElement), + sortComparator, &compare, FALSE, &ec); + } +} + + +/** + * Stable sort with a user supplied comparator of type UComparator. + */ +void UVector::sortWithUComparator(UComparator *compare, const void *context, UErrorCode &ec) { + if (U_SUCCESS(ec)) { + uprv_sortArray(elements, count, sizeof(UElement), + compare, context, TRUE, &ec); + } +} + +U_NAMESPACE_END + diff --git a/thirdparty/icu4c/common/uvector.h b/thirdparty/icu4c/common/uvector.h new file mode 100644 index 0000000000..a2bef923af --- /dev/null +++ b/thirdparty/icu4c/common/uvector.h @@ -0,0 +1,415 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 1999-2016, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* Date Name Description +* 10/22/99 alan Creation. This is an internal header. +* It should not be exported. +********************************************************************** +*/ + +#ifndef UVECTOR_H +#define UVECTOR_H + +#include "unicode/utypes.h" +#include "unicode/uobject.h" +#include "cmemory.h" +#include "uarrsort.h" +#include "uelement.h" + +U_NAMESPACE_BEGIN + +/** + * <p>Ultralightweight C++ implementation of a <tt>void*</tt> vector + * that is (mostly) compatible with java.util.Vector. + * + * <p>This is a very simple implementation, written to satisfy an + * immediate porting need. As such, it is not completely fleshed out, + * and it aims for simplicity and conformity. Nonetheless, it serves + * its purpose (porting code from java that uses java.util.Vector) + * well, and it could be easily made into a more robust vector class. + * + * <p><b>Design notes</b> + * + * <p>There is index bounds checking, but little is done about it. If + * indices are out of bounds, either nothing happens, or zero is + * returned. We <em>do</em> avoid indexing off into the weeds. + * + * <p>There is detection of out of memory, but the handling is very + * coarse-grained -- similar to UnicodeString's protocol, but even + * coarser. The class contains <em>one static flag</em> that is set + * when any call to <tt>new</tt> returns zero. This allows the caller + * to use several vectors and make just one check at the end to see if + * a memory failure occurred. This is more efficient than making a + * check after each call on each vector when doing many operations on + * multiple vectors. The single static flag works best when memory + * failures are infrequent, and when recovery options are limited or + * nonexistent. + * + * <p>Since we don't have garbage collection, UVector was given the + * option to <em>own</em>its contents. To employ this, set a deleter + * function. The deleter is called on a void* pointer when that + * pointer is released by the vector, either when the vector itself is + * destructed, or when a call to setElementAt() overwrites an element, + * or when a call to remove() or one of its variants explicitly + * removes an element. If no deleter is set, or the deleter is set to + * zero, then it is assumed that the caller will delete elements as + * needed. + * + * <p>In order to implement methods such as contains() and indexOf(), + * UVector needs a way to compare objects for equality. To do so, it + * uses a comparison function, or "comparer." If the comparer is not + * set, or is set to zero, then all such methods will act as if the + * vector contains no element. That is, indexOf() will always return + * -1, contains() will always return false, etc. + * + * <p><b>To do</b> + * + * <p>Improve the handling of index out of bounds errors. + * + * @author Alan Liu + */ +class U_COMMON_API UVector : public UObject { + // NOTE: UVector uses the UHashKey (union of void* and int32_t) as + // its basic storage type. It uses UElementsAreEqual as its + // comparison function. It uses UObjectDeleter as its deleter + // function. These are named for hashtables, but used here as-is + // rather than duplicating the type. This allows sharing of + // support functions. + +private: + int32_t count; + + int32_t capacity; + + UElement* elements; + + UObjectDeleter *deleter; + + UElementsAreEqual *comparer; + +public: + UVector(UErrorCode &status); + + UVector(int32_t initialCapacity, UErrorCode &status); + + UVector(UObjectDeleter *d, UElementsAreEqual *c, UErrorCode &status); + + UVector(UObjectDeleter *d, UElementsAreEqual *c, int32_t initialCapacity, UErrorCode &status); + + virtual ~UVector(); + + /** + * Assign this object to another (make this a copy of 'other'). + * Use the 'assign' function to assign each element. + */ + void assign(const UVector& other, UElementAssigner *assign, UErrorCode &ec); + + /** + * Compare this vector with another. They will be considered + * equal if they are of the same size and all elements are equal, + * as compared using this object's comparer. + */ + UBool operator==(const UVector& other); + + /** + * Equivalent to !operator==() + */ + inline UBool operator!=(const UVector& other); + + //------------------------------------------------------------ + // java.util.Vector API + //------------------------------------------------------------ + + void addElement(void* obj, UErrorCode &status); + + void addElement(int32_t elem, UErrorCode &status); + + void setElementAt(void* obj, int32_t index); + + void setElementAt(int32_t elem, int32_t index); + + void insertElementAt(void* obj, int32_t index, UErrorCode &status); + + void insertElementAt(int32_t elem, int32_t index, UErrorCode &status); + + void* elementAt(int32_t index) const; + + int32_t elementAti(int32_t index) const; + + UBool equals(const UVector &other) const; + + inline void* firstElement(void) const; + + inline void* lastElement(void) const; + + inline int32_t lastElementi(void) const; + + int32_t indexOf(void* obj, int32_t startIndex = 0) const; + + int32_t indexOf(int32_t obj, int32_t startIndex = 0) const; + + inline UBool contains(void* obj) const; + + inline UBool contains(int32_t obj) const; + + UBool containsAll(const UVector& other) const; + + UBool removeAll(const UVector& other); + + UBool retainAll(const UVector& other); + + void removeElementAt(int32_t index); + + UBool removeElement(void* obj); + + void removeAllElements(); + + inline int32_t size(void) const; + + inline UBool isEmpty(void) const; + + UBool ensureCapacity(int32_t minimumCapacity, UErrorCode &status); + + /** + * Change the size of this vector as follows: If newSize is + * smaller, then truncate the array, possibly deleting held + * elements for i >= newSize. If newSize is larger, grow the + * array, filling in new slots with NULL. + */ + void setSize(int32_t newSize, UErrorCode &status); + + /** + * Fill in the given array with all elements of this vector. + */ + void** toArray(void** result) const; + + //------------------------------------------------------------ + // New API + //------------------------------------------------------------ + + UObjectDeleter *setDeleter(UObjectDeleter *d); + + UElementsAreEqual *setComparer(UElementsAreEqual *c); + + inline void* operator[](int32_t index) const; + + /** + * Removes the element at the given index from this vector and + * transfer ownership of it to the caller. After this call, the + * caller owns the result and must delete it and the vector entry + * at 'index' is removed, shifting all subsequent entries back by + * one index and shortening the size of the vector by one. If the + * index is out of range or if there is no item at the given index + * then 0 is returned and the vector is unchanged. + */ + void* orphanElementAt(int32_t index); + + /** + * Returns true if this vector contains none of the elements + * of the given vector. + * @param other vector to be checked for containment + * @return true if the test condition is met + */ + UBool containsNone(const UVector& other) const; + + /** + * Insert the given object into this vector at its sorted position + * as defined by 'compare'. The current elements are assumed to + * be sorted already. + */ + void sortedInsert(void* obj, UElementComparator *compare, UErrorCode& ec); + + /** + * Insert the given integer into this vector at its sorted position + * as defined by 'compare'. The current elements are assumed to + * be sorted already. + */ + void sortedInsert(int32_t obj, UElementComparator *compare, UErrorCode& ec); + + /** + * Sort the contents of the vector, assuming that the contents of the + * vector are of type int32_t. + */ + void sorti(UErrorCode &ec); + + /** + * Sort the contents of this vector, using a caller-supplied function + * to do the comparisons. (It's confusing that + * UVector's UElementComparator function is different from the + * UComparator function type defined in uarrsort.h) + */ + void sort(UElementComparator *compare, UErrorCode &ec); + + /** + * Stable sort the contents of this vector using a caller-supplied function + * of type UComparator to do the comparison. Provides more flexibility + * than UVector::sort() because an additional user parameter can be passed to + * the comparison function. + */ + void sortWithUComparator(UComparator *compare, const void *context, UErrorCode &ec); + + /** + * ICU "poor man's RTTI", returns a UClassID for this class. + */ + static UClassID U_EXPORT2 getStaticClassID(); + + /** + * ICU "poor man's RTTI", returns a UClassID for the actual class. + */ + virtual UClassID getDynamicClassID() const; + +private: + void _init(int32_t initialCapacity, UErrorCode &status); + + int32_t indexOf(UElement key, int32_t startIndex = 0, int8_t hint = 0) const; + + void sortedInsert(UElement e, UElementComparator *compare, UErrorCode& ec); + + // Disallow + UVector(const UVector&); + + // Disallow + UVector& operator=(const UVector&); + +}; + + +/** + * <p>Ultralightweight C++ implementation of a <tt>void*</tt> stack + * that is (mostly) compatible with java.util.Stack. As in java, this + * is merely a paper thin layer around UVector. See the UVector + * documentation for further information. + * + * <p><b>Design notes</b> + * + * <p>The element at index <tt>n-1</tt> is (of course) the top of the + * stack. + * + * <p>The poorly named <tt>empty()</tt> method doesn't empty the + * stack; it determines if the stack is empty. + * + * @author Alan Liu + */ +class U_COMMON_API UStack : public UVector { +public: + UStack(UErrorCode &status); + + UStack(int32_t initialCapacity, UErrorCode &status); + + UStack(UObjectDeleter *d, UElementsAreEqual *c, UErrorCode &status); + + UStack(UObjectDeleter *d, UElementsAreEqual *c, int32_t initialCapacity, UErrorCode &status); + + virtual ~UStack(); + + // It's okay not to have a virtual destructor (in UVector) + // because UStack has no special cleanup to do. + + inline UBool empty(void) const; + + inline void* peek(void) const; + + inline int32_t peeki(void) const; + + void* pop(void); + + int32_t popi(void); + + inline void* push(void* obj, UErrorCode &status); + + inline int32_t push(int32_t i, UErrorCode &status); + + /* + If the object o occurs as an item in this stack, + this method returns the 1-based distance from the top of the stack. + */ + int32_t search(void* obj) const; + + /** + * ICU "poor man's RTTI", returns a UClassID for this class. + */ + static UClassID U_EXPORT2 getStaticClassID(); + + /** + * ICU "poor man's RTTI", returns a UClassID for the actual class. + */ + virtual UClassID getDynamicClassID() const; + +private: + // Disallow + UStack(const UStack&); + + // Disallow + UStack& operator=(const UStack&); +}; + + +// UVector inlines + +inline int32_t UVector::size(void) const { + return count; +} + +inline UBool UVector::isEmpty(void) const { + return count == 0; +} + +inline UBool UVector::contains(void* obj) const { + return indexOf(obj) >= 0; +} + +inline UBool UVector::contains(int32_t obj) const { + return indexOf(obj) >= 0; +} + +inline void* UVector::firstElement(void) const { + return elementAt(0); +} + +inline void* UVector::lastElement(void) const { + return elementAt(count-1); +} + +inline int32_t UVector::lastElementi(void) const { + return elementAti(count-1); +} + +inline void* UVector::operator[](int32_t index) const { + return elementAt(index); +} + +inline UBool UVector::operator!=(const UVector& other) { + return !operator==(other); +} + +// UStack inlines + +inline UBool UStack::empty(void) const { + return isEmpty(); +} + +inline void* UStack::peek(void) const { + return lastElement(); +} + +inline int32_t UStack::peeki(void) const { + return lastElementi(); +} + +inline void* UStack::push(void* obj, UErrorCode &status) { + addElement(obj, status); + return obj; +} + +inline int32_t UStack::push(int32_t i, UErrorCode &status) { + addElement(i, status); + return i; +} + +U_NAMESPACE_END + +#endif diff --git a/thirdparty/icu4c/common/uvectr32.cpp b/thirdparty/icu4c/common/uvectr32.cpp new file mode 100644 index 0000000000..d1ae659958 --- /dev/null +++ b/thirdparty/icu4c/common/uvectr32.cpp @@ -0,0 +1,335 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* Copyright (C) 1999-2015, International Business Machines Corporation and +* others. All Rights Reserved. +****************************************************************************** +* Date Name Description +* 10/22/99 alan Creation. +********************************************************************** +*/ + +#include "uvectr32.h" +#include "cmemory.h" +#include "putilimp.h" + +U_NAMESPACE_BEGIN + +#define DEFAULT_CAPACITY 8 + +/* + * Constants for hinting whether a key is an integer + * or a pointer. If a hint bit is zero, then the associated + * token is assumed to be an integer. This is needed for iSeries + */ + +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UVector32) + +UVector32::UVector32(UErrorCode &status) : + count(0), + capacity(0), + maxCapacity(0), + elements(NULL) +{ + _init(DEFAULT_CAPACITY, status); +} + +UVector32::UVector32(int32_t initialCapacity, UErrorCode &status) : + count(0), + capacity(0), + maxCapacity(0), + elements(0) +{ + _init(initialCapacity, status); +} + + + +void UVector32::_init(int32_t initialCapacity, UErrorCode &status) { + // Fix bogus initialCapacity values; avoid malloc(0) + if (initialCapacity < 1) { + initialCapacity = DEFAULT_CAPACITY; + } + if (maxCapacity>0 && maxCapacity<initialCapacity) { + initialCapacity = maxCapacity; + } + if (initialCapacity > (int32_t)(INT32_MAX / sizeof(int32_t))) { + initialCapacity = uprv_min(DEFAULT_CAPACITY, maxCapacity); + } + elements = (int32_t *)uprv_malloc(sizeof(int32_t)*initialCapacity); + if (elements == 0) { + status = U_MEMORY_ALLOCATION_ERROR; + } else { + capacity = initialCapacity; + } +} + +UVector32::~UVector32() { + uprv_free(elements); + elements = 0; +} + +/** + * Assign this object to another (make this a copy of 'other'). + */ +void UVector32::assign(const UVector32& other, UErrorCode &ec) { + if (ensureCapacity(other.count, ec)) { + setSize(other.count); + for (int32_t i=0; i<other.count; ++i) { + elements[i] = other.elements[i]; + } + } +} + + +UBool UVector32::operator==(const UVector32& other) { + int32_t i; + if (count != other.count) return FALSE; + for (i=0; i<count; ++i) { + if (elements[i] != other.elements[i]) { + return FALSE; + } + } + return TRUE; +} + + +void UVector32::setElementAt(int32_t elem, int32_t index) { + if (0 <= index && index < count) { + elements[index] = elem; + } + /* else index out of range */ +} + +void UVector32::insertElementAt(int32_t elem, int32_t index, UErrorCode &status) { + // must have 0 <= index <= count + if (0 <= index && index <= count && ensureCapacity(count + 1, status)) { + for (int32_t i=count; i>index; --i) { + elements[i] = elements[i-1]; + } + elements[index] = elem; + ++count; + } + /* else index out of range */ +} + +UBool UVector32::containsAll(const UVector32& other) const { + for (int32_t i=0; i<other.size(); ++i) { + if (indexOf(other.elements[i]) < 0) { + return FALSE; + } + } + return TRUE; +} + +UBool UVector32::containsNone(const UVector32& other) const { + for (int32_t i=0; i<other.size(); ++i) { + if (indexOf(other.elements[i]) >= 0) { + return FALSE; + } + } + return TRUE; +} + +UBool UVector32::removeAll(const UVector32& other) { + UBool changed = FALSE; + for (int32_t i=0; i<other.size(); ++i) { + int32_t j = indexOf(other.elements[i]); + if (j >= 0) { + removeElementAt(j); + changed = TRUE; + } + } + return changed; +} + +UBool UVector32::retainAll(const UVector32& other) { + UBool changed = FALSE; + for (int32_t j=size()-1; j>=0; --j) { + int32_t i = other.indexOf(elements[j]); + if (i < 0) { + removeElementAt(j); + changed = TRUE; + } + } + return changed; +} + +void UVector32::removeElementAt(int32_t index) { + if (index >= 0) { + for (int32_t i=index; i<count-1; ++i) { + elements[i] = elements[i+1]; + } + --count; + } +} + +void UVector32::removeAllElements(void) { + count = 0; +} + +UBool UVector32::equals(const UVector32 &other) const { + int i; + + if (this->count != other.count) { + return FALSE; + } + for (i=0; i<count; i++) { + if (elements[i] != other.elements[i]) { + return FALSE; + } + } + return TRUE; +} + + + + +int32_t UVector32::indexOf(int32_t key, int32_t startIndex) const { + int32_t i; + for (i=startIndex; i<count; ++i) { + if (key == elements[i]) { + return i; + } + } + return -1; +} + + +UBool UVector32::expandCapacity(int32_t minimumCapacity, UErrorCode &status) { + if (U_FAILURE(status)) { + return FALSE; + } + if (minimumCapacity < 0) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return FALSE; + } + if (capacity >= minimumCapacity) { + return TRUE; + } + if (maxCapacity>0 && minimumCapacity>maxCapacity) { + status = U_BUFFER_OVERFLOW_ERROR; + return FALSE; + } + if (capacity > (INT32_MAX - 1) / 2) { // integer overflow check + status = U_ILLEGAL_ARGUMENT_ERROR; + return FALSE; + } + int32_t newCap = capacity * 2; + if (newCap < minimumCapacity) { + newCap = minimumCapacity; + } + if (maxCapacity > 0 && newCap > maxCapacity) { + newCap = maxCapacity; + } + if (newCap > (int32_t)(INT32_MAX / sizeof(int32_t))) { // integer overflow check + // We keep the original memory contents on bad minimumCapacity/maxCapacity. + status = U_ILLEGAL_ARGUMENT_ERROR; + return FALSE; + } + int32_t* newElems = (int32_t *)uprv_realloc(elements, sizeof(int32_t)*newCap); + if (newElems == NULL) { + // We keep the original contents on the memory failure on realloc. + status = U_MEMORY_ALLOCATION_ERROR; + return FALSE; + } + elements = newElems; + capacity = newCap; + return TRUE; +} + +void UVector32::setMaxCapacity(int32_t limit) { + U_ASSERT(limit >= 0); + if (limit < 0) { + limit = 0; + } + if (limit > (int32_t)(INT32_MAX / sizeof(int32_t))) { // integer overflow check for realloc + // Something is very wrong, don't realloc, leave capacity and maxCapacity unchanged + return; + } + maxCapacity = limit; + if (capacity <= maxCapacity || maxCapacity == 0) { + // Current capacity is within the new limit. + return; + } + + // New maximum capacity is smaller than the current size. + // Realloc the storage to the new, smaller size. + int32_t* newElems = (int32_t *)uprv_realloc(elements, sizeof(int32_t)*maxCapacity); + if (newElems == NULL) { + // Realloc to smaller failed. + // Just keep what we had. No need to call it a failure. + return; + } + elements = newElems; + capacity = maxCapacity; + if (count > capacity) { + count = capacity; + } +} + +/** + * Change the size of this vector as follows: If newSize is smaller, + * then truncate the array, possibly deleting held elements for i >= + * newSize. If newSize is larger, grow the array, filling in new + * slots with NULL. + */ +void UVector32::setSize(int32_t newSize) { + int32_t i; + if (newSize < 0) { + return; + } + if (newSize > count) { + UErrorCode ec = U_ZERO_ERROR; + if (!ensureCapacity(newSize, ec)) { + return; + } + for (i=count; i<newSize; ++i) { + elements[i] = 0; + } + } + count = newSize; +} + + + + +/** + * Insert the given integer into this vector at its sorted position + * as defined by 'compare'. The current elements are assumed to + * be sorted already. + */ +void UVector32::sortedInsert(int32_t tok, UErrorCode& ec) { + // Perform a binary search for the location to insert tok at. Tok + // will be inserted between two elements a and b such that a <= + // tok && tok < b, where there is a 'virtual' elements[-1] always + // less than tok and a 'virtual' elements[count] always greater + // than tok. + int32_t min = 0, max = count; + while (min != max) { + int32_t probe = (min + max) / 2; + //int8_t c = (*compare)(elements[probe], tok); + //if (c > 0) { + if (elements[probe] > tok) { + max = probe; + } else { + // assert(c <= 0); + min = probe + 1; + } + } + if (ensureCapacity(count + 1, ec)) { + for (int32_t i=count; i>min; --i) { + elements[i] = elements[i-1]; + } + elements[min] = tok; + ++count; + } +} + + + + + +U_NAMESPACE_END + diff --git a/thirdparty/icu4c/common/uvectr32.h b/thirdparty/icu4c/common/uvectr32.h new file mode 100644 index 0000000000..0d81dfb5c1 --- /dev/null +++ b/thirdparty/icu4c/common/uvectr32.h @@ -0,0 +1,306 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 1999-2011, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +*/ + +// +// UVector32 is a class implementing a vector of 32 bit integers. +// It is similar to UVector, but holds int32_t values rather than pointers. +// Most of the code is unchanged from UVector. +// + +#ifndef UVECTOR32_H +#define UVECTOR32_H + +#include "unicode/utypes.h" +#include "unicode/uobject.h" +#include "uhash.h" +#include "uassert.h" + +U_NAMESPACE_BEGIN + + + +/** + * <p>Ultralightweight C++ implementation of a <tt>void*</tt> vector + * that is (mostly) compatible with java.util.Vector. + * + * <p>This is a very simple implementation, written to satisfy an + * immediate porting need. As such, it is not completely fleshed out, + * and it aims for simplicity and conformity. Nonetheless, it serves + * its purpose (porting code from java that uses java.util.Vector) + * well, and it could be easily made into a more robust vector class. + * + * <p><b>Design notes</b> + * + * <p>There is index bounds checking, but little is done about it. If + * indices are out of bounds, either nothing happens, or zero is + * returned. We <em>do</em> avoid indexing off into the weeds. + * + * <p>There is detection of out of memory, but the handling is very + * coarse-grained -- similar to UnicodeString's protocol, but even + * coarser. The class contains <em>one static flag</em> that is set + * when any call to <tt>new</tt> returns zero. This allows the caller + * to use several vectors and make just one check at the end to see if + * a memory failure occurred. This is more efficient than making a + * check after each call on each vector when doing many operations on + * multiple vectors. The single static flag works best when memory + * failures are infrequent, and when recovery options are limited or + * nonexistent. + * + * <p><b>To do</b> + * + * <p>Improve the handling of index out of bounds errors. + * + * @author Alan Liu + */ +class U_COMMON_API UVector32 : public UObject { +private: + int32_t count; + + int32_t capacity; + + int32_t maxCapacity; // Limit beyond which capacity is not permitted to grow. + + int32_t* elements; + +public: + UVector32(UErrorCode &status); + + UVector32(int32_t initialCapacity, UErrorCode &status); + + virtual ~UVector32(); + + /** + * Assign this object to another (make this a copy of 'other'). + * Use the 'assign' function to assign each element. + */ + void assign(const UVector32& other, UErrorCode &ec); + + /** + * Compare this vector with another. They will be considered + * equal if they are of the same size and all elements are equal, + * as compared using this object's comparer. + */ + UBool operator==(const UVector32& other); + + /** + * Equivalent to !operator==() + */ + inline UBool operator!=(const UVector32& other); + + //------------------------------------------------------------ + // java.util.Vector API + //------------------------------------------------------------ + + inline void addElement(int32_t elem, UErrorCode &status); + + void setElementAt(int32_t elem, int32_t index); + + void insertElementAt(int32_t elem, int32_t index, UErrorCode &status); + + inline int32_t elementAti(int32_t index) const; + + UBool equals(const UVector32 &other) const; + + inline int32_t lastElementi(void) const; + + int32_t indexOf(int32_t elem, int32_t startIndex = 0) const; + + inline UBool contains(int32_t elem) const; + + UBool containsAll(const UVector32& other) const; + + UBool removeAll(const UVector32& other); + + UBool retainAll(const UVector32& other); + + void removeElementAt(int32_t index); + + void removeAllElements(); + + inline int32_t size(void) const; + + inline UBool isEmpty(void) const; + + // Inline. Use this one for speedy size check. + inline UBool ensureCapacity(int32_t minimumCapacity, UErrorCode &status); + + // Out-of-line, handles actual growth. Called by ensureCapacity() when necessary. + UBool expandCapacity(int32_t minimumCapacity, UErrorCode &status); + + /** + * Change the size of this vector as follows: If newSize is + * smaller, then truncate the array, possibly deleting held + * elements for i >= newSize. If newSize is larger, grow the + * array, filling in new slows with zero. + */ + void setSize(int32_t newSize); + + //------------------------------------------------------------ + // New API + //------------------------------------------------------------ + + /** + * Returns true if this vector contains none of the elements + * of the given vector. + * @param other vector to be checked for containment + * @return true if the test condition is met + */ + UBool containsNone(const UVector32& other) const; + + + /** + * Insert the given integer into this vector at its sorted position. + * The current elements are assumed to be sorted already. + */ + void sortedInsert(int32_t elem, UErrorCode& ec); + + /** + * Returns a pointer to the internal array holding the vector. + */ + inline int32_t *getBuffer() const; + + /** + * Set the maximum allowed buffer capacity for this vector/stack. + * Default with no limit set is unlimited, go until malloc() fails. + * A Limit of zero means unlimited capacity. + * Units are vector elements (32 bits each), not bytes. + */ + void setMaxCapacity(int32_t limit); + + /** + * ICU "poor man's RTTI", returns a UClassID for this class. + */ + static UClassID U_EXPORT2 getStaticClassID(); + + /** + * ICU "poor man's RTTI", returns a UClassID for the actual class. + */ + virtual UClassID getDynamicClassID() const; + +private: + void _init(int32_t initialCapacity, UErrorCode &status); + + // Disallow + UVector32(const UVector32&); + + // Disallow + UVector32& operator=(const UVector32&); + + + // API Functions for Stack operations. + // In the original UVector, these were in a separate derived class, UStack. + // Here in UVector32, they are all together. +public: + inline UBool empty(void) const; // TODO: redundant, same as empty(). Remove it? + + inline int32_t peeki(void) const; + + inline int32_t popi(void); + + inline int32_t push(int32_t i, UErrorCode &status); + + inline int32_t *reserveBlock(int32_t size, UErrorCode &status); + inline int32_t *popFrame(int32_t size); +}; + + +// UVector32 inlines + +inline UBool UVector32::ensureCapacity(int32_t minimumCapacity, UErrorCode &status) { + if ((minimumCapacity >= 0) && (capacity >= minimumCapacity)) { + return true; + } else { + return expandCapacity(minimumCapacity, status); + } +} + +inline int32_t UVector32::elementAti(int32_t index) const { + return (index >= 0 && count > 0 && count - index > 0) ? elements[index] : 0; +} + + +inline void UVector32::addElement(int32_t elem, UErrorCode &status) { + if (ensureCapacity(count + 1, status)) { + elements[count] = elem; + count++; + } +} + +inline int32_t *UVector32::reserveBlock(int32_t size, UErrorCode &status) { + if (ensureCapacity(count+size, status) == false) { + return NULL; + } + int32_t *rp = elements+count; + count += size; + return rp; +} + +inline int32_t *UVector32::popFrame(int32_t size) { + U_ASSERT(count >= size); + count -= size; + if (count < 0) { + count = 0; + } + return elements+count-size; +} + + + +inline int32_t UVector32::size(void) const { + return count; +} + +inline UBool UVector32::isEmpty(void) const { + return count == 0; +} + +inline UBool UVector32::contains(int32_t obj) const { + return indexOf(obj) >= 0; +} + +inline int32_t UVector32::lastElementi(void) const { + return elementAti(count-1); +} + +inline UBool UVector32::operator!=(const UVector32& other) { + return !operator==(other); +} + +inline int32_t *UVector32::getBuffer() const { + return elements; +} + + +// UStack inlines + +inline UBool UVector32::empty(void) const { + return isEmpty(); +} + +inline int32_t UVector32::peeki(void) const { + return lastElementi(); +} + +inline int32_t UVector32::push(int32_t i, UErrorCode &status) { + addElement(i, status); + return i; +} + +inline int32_t UVector32::popi(void) { + int32_t result = 0; + if (count > 0) { + count--; + result = elements[count]; + } + return result; +} + +U_NAMESPACE_END + +#endif diff --git a/thirdparty/icu4c/common/uvectr64.cpp b/thirdparty/icu4c/common/uvectr64.cpp new file mode 100644 index 0000000000..081565959c --- /dev/null +++ b/thirdparty/icu4c/common/uvectr64.cpp @@ -0,0 +1,214 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* Copyright (C) 1999-2015, International Business Machines Corporation and +* others. All Rights Reserved. +****************************************************************************** +*/ + +#include "uvectr64.h" +#include "cmemory.h" +#include "putilimp.h" + +U_NAMESPACE_BEGIN + +#define DEFAULT_CAPACITY 8 + +/* + * Constants for hinting whether a key is an integer + * or a pointer. If a hint bit is zero, then the associated + * token is assumed to be an integer. This is needed for iSeries + */ + +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UVector64) + +UVector64::UVector64(UErrorCode &status) : + count(0), + capacity(0), + maxCapacity(0), + elements(NULL) +{ + _init(DEFAULT_CAPACITY, status); +} + +UVector64::UVector64(int32_t initialCapacity, UErrorCode &status) : + count(0), + capacity(0), + maxCapacity(0), + elements(0) +{ + _init(initialCapacity, status); +} + + + +void UVector64::_init(int32_t initialCapacity, UErrorCode &status) { + // Fix bogus initialCapacity values; avoid malloc(0) + if (initialCapacity < 1) { + initialCapacity = DEFAULT_CAPACITY; + } + if (maxCapacity>0 && maxCapacity<initialCapacity) { + initialCapacity = maxCapacity; + } + if (initialCapacity > (int32_t)(INT32_MAX / sizeof(int64_t))) { + initialCapacity = uprv_min(DEFAULT_CAPACITY, maxCapacity); + } + elements = (int64_t *)uprv_malloc(sizeof(int64_t)*initialCapacity); + if (elements == 0) { + status = U_MEMORY_ALLOCATION_ERROR; + } else { + capacity = initialCapacity; + } +} + +UVector64::~UVector64() { + uprv_free(elements); + elements = 0; +} + +/** + * Assign this object to another (make this a copy of 'other'). + */ +void UVector64::assign(const UVector64& other, UErrorCode &ec) { + if (ensureCapacity(other.count, ec)) { + setSize(other.count); + for (int32_t i=0; i<other.count; ++i) { + elements[i] = other.elements[i]; + } + } +} + + +UBool UVector64::operator==(const UVector64& other) { + int32_t i; + if (count != other.count) return FALSE; + for (i=0; i<count; ++i) { + if (elements[i] != other.elements[i]) { + return FALSE; + } + } + return TRUE; +} + + +void UVector64::setElementAt(int64_t elem, int32_t index) { + if (0 <= index && index < count) { + elements[index] = elem; + } + /* else index out of range */ +} + +void UVector64::insertElementAt(int64_t elem, int32_t index, UErrorCode &status) { + // must have 0 <= index <= count + if (0 <= index && index <= count && ensureCapacity(count + 1, status)) { + for (int32_t i=count; i>index; --i) { + elements[i] = elements[i-1]; + } + elements[index] = elem; + ++count; + } + /* else index out of range */ +} + +void UVector64::removeAllElements(void) { + count = 0; +} + +UBool UVector64::expandCapacity(int32_t minimumCapacity, UErrorCode &status) { + if (U_FAILURE(status)) { + return FALSE; + } + if (minimumCapacity < 0) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return FALSE; + } + if (capacity >= minimumCapacity) { + return TRUE; + } + if (maxCapacity>0 && minimumCapacity>maxCapacity) { + status = U_BUFFER_OVERFLOW_ERROR; + return FALSE; + } + if (capacity > (INT32_MAX - 1) / 2) { // integer overflow check + status = U_ILLEGAL_ARGUMENT_ERROR; + return FALSE; + } + int32_t newCap = capacity * 2; + if (newCap < minimumCapacity) { + newCap = minimumCapacity; + } + if (maxCapacity > 0 && newCap > maxCapacity) { + newCap = maxCapacity; + } + if (newCap > (int32_t)(INT32_MAX / sizeof(int64_t))) { // integer overflow check + // We keep the original memory contents on bad minimumCapacity/maxCapacity. + status = U_ILLEGAL_ARGUMENT_ERROR; + return FALSE; + } + int64_t* newElems = (int64_t *)uprv_realloc(elements, sizeof(int64_t)*newCap); + if (newElems == NULL) { + // We keep the original contents on the memory failure on realloc. + status = U_MEMORY_ALLOCATION_ERROR; + return FALSE; + } + elements = newElems; + capacity = newCap; + return TRUE; +} + +void UVector64::setMaxCapacity(int32_t limit) { + U_ASSERT(limit >= 0); + if (limit < 0) { + limit = 0; + } + if (limit > (int32_t)(INT32_MAX / sizeof(int64_t))) { // integer overflow check for realloc + // Something is very wrong, don't realloc, leave capacity and maxCapacity unchanged + return; + } + maxCapacity = limit; + if (capacity <= maxCapacity || maxCapacity == 0) { + // Current capacity is within the new limit. + return; + } + + // New maximum capacity is smaller than the current size. + // Realloc the storage to the new, smaller size. + int64_t* newElems = (int64_t *)uprv_realloc(elements, sizeof(int64_t)*maxCapacity); + if (newElems == NULL) { + // Realloc to smaller failed. + // Just keep what we had. No need to call it a failure. + return; + } + elements = newElems; + capacity = maxCapacity; + if (count > capacity) { + count = capacity; + } +} + +/** + * Change the size of this vector as follows: If newSize is smaller, + * then truncate the array, possibly deleting held elements for i >= + * newSize. If newSize is larger, grow the array, filling in new + * slots with NULL. + */ +void UVector64::setSize(int32_t newSize) { + int32_t i; + if (newSize < 0) { + return; + } + if (newSize > count) { + UErrorCode ec = U_ZERO_ERROR; + if (!ensureCapacity(newSize, ec)) { + return; + } + for (i=count; i<newSize; ++i) { + elements[i] = 0; + } + } + count = newSize; +} + +U_NAMESPACE_END + diff --git a/thirdparty/icu4c/common/uvectr64.h b/thirdparty/icu4c/common/uvectr64.h new file mode 100644 index 0000000000..15c9b3f830 --- /dev/null +++ b/thirdparty/icu4c/common/uvectr64.h @@ -0,0 +1,279 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 1999-2014, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +*/ + +// +// UVector64 is a class implementing a vector of 64 bit integers. +// It is similar to UVector32, but holds int64_t values rather than int32_t. +// Most of the code is unchanged from UVector. +// + +#ifndef UVECTOR64_H +#define UVECTOR64_H + +#include "unicode/utypes.h" +#include "unicode/uobject.h" +#include "uhash.h" +#include "uassert.h" + +U_NAMESPACE_BEGIN + + + +/** + * <p>Ultralightweight C++ implementation of an <tt>int64_t</tt> vector + * that has a subset of methods from UVector32 + * + * <p>This is a very simple implementation, written to satisfy an + * immediate porting need. As such, it is not completely fleshed out, + * and it aims for simplicity and conformity. Nonetheless, it serves + * its purpose (porting code from java that uses java.util.Vector) + * well, and it could be easily made into a more robust vector class. + * + * <p><b>Design notes</b> + * + * <p>There is index bounds checking, but little is done about it. If + * indices are out of bounds, either nothing happens, or zero is + * returned. We <em>do</em> avoid indexing off into the weeds. + * + * <p>There is detection of out of memory, but the handling is very + * coarse-grained -- similar to UnicodeString's protocol, but even + * coarser. The class contains <em>one static flag</em> that is set + * when any call to <tt>new</tt> returns zero. This allows the caller + * to use several vectors and make just one check at the end to see if + * a memory failure occurred. This is more efficient than making a + * check after each call on each vector when doing many operations on + * multiple vectors. The single static flag works best when memory + * failures are infrequent, and when recovery options are limited or + * nonexistent. + * + * <p><b>To do</b> + * + * <p>Improve the handling of index out of bounds errors. + * + */ +class U_COMMON_API UVector64 : public UObject { +private: + int32_t count; + + int32_t capacity; + + int32_t maxCapacity; // Limit beyond which capacity is not permitted to grow. + + int64_t* elements; + +public: + UVector64(UErrorCode &status); + + UVector64(int32_t initialCapacity, UErrorCode &status); + + virtual ~UVector64(); + + /** + * Assign this object to another (make this a copy of 'other'). + * Use the 'assign' function to assign each element. + */ + void assign(const UVector64& other, UErrorCode &ec); + + /** + * Compare this vector with another. They will be considered + * equal if they are of the same size and all elements are equal, + * as compared using this object's comparer. + */ + UBool operator==(const UVector64& other); + + /** + * Equivalent to !operator==() + */ + inline UBool operator!=(const UVector64& other); + + //------------------------------------------------------------ + // subset of java.util.Vector API + //------------------------------------------------------------ + + inline void addElement(int64_t elem, UErrorCode &status); + + void setElementAt(int64_t elem, int32_t index); + + void insertElementAt(int64_t elem, int32_t index, UErrorCode &status); + + inline int64_t elementAti(int32_t index) const; + + //UBool equals(const UVector64 &other) const; + + inline int64_t lastElementi(void) const; + + //int32_t indexOf(int64_t elem, int32_t startIndex = 0) const; + + //UBool contains(int64_t elem) const; + + //UBool containsAll(const UVector64& other) const; + + //UBool removeAll(const UVector64& other); + + //UBool retainAll(const UVector64& other); + + //void removeElementAt(int32_t index); + + void removeAllElements(); + + inline int32_t size(void) const; + + inline UBool isEmpty(void) const { return count == 0; } + + // Inline. Use this one for speedy size check. + inline UBool ensureCapacity(int32_t minimumCapacity, UErrorCode &status); + + // Out-of-line, handles actual growth. Called by ensureCapacity() when necessary. + UBool expandCapacity(int32_t minimumCapacity, UErrorCode &status); + + /** + * Change the size of this vector as follows: If newSize is + * smaller, then truncate the array, possibly deleting held + * elements for i >= newSize. If newSize is larger, grow the + * array, filling in new slows with zero. + */ + void setSize(int32_t newSize); + + //------------------------------------------------------------ + // New API + //------------------------------------------------------------ + + //UBool containsNone(const UVector64& other) const; + + + //void sortedInsert(int64_t elem, UErrorCode& ec); + + /** + * Returns a pointer to the internal array holding the vector. + */ + inline int64_t *getBuffer() const; + + /** + * Set the maximum allowed buffer capacity for this vector/stack. + * Default with no limit set is unlimited, go until malloc() fails. + * A Limit of zero means unlimited capacity. + * Units are vector elements (64 bits each), not bytes. + */ + void setMaxCapacity(int32_t limit); + + /** + * ICU "poor man's RTTI", returns a UClassID for this class. + */ + static UClassID U_EXPORT2 getStaticClassID(); + + /** + * ICU "poor man's RTTI", returns a UClassID for the actual class. + */ + virtual UClassID getDynamicClassID() const; + +private: + void _init(int32_t initialCapacity, UErrorCode &status); + + // Disallow + UVector64(const UVector64&); + + // Disallow + UVector64& operator=(const UVector64&); + + + // API Functions for Stack operations. + // In the original UVector, these were in a separate derived class, UStack. + // Here in UVector64, they are all together. +public: + //UBool empty(void) const; // TODO: redundant, same as empty(). Remove it? + + //int64_t peeki(void) const; + + inline int64_t popi(void); + + inline int64_t push(int64_t i, UErrorCode &status); + + inline int64_t *reserveBlock(int32_t size, UErrorCode &status); + inline int64_t *popFrame(int32_t size); +}; + + +// UVector64 inlines + +inline UBool UVector64::ensureCapacity(int32_t minimumCapacity, UErrorCode &status) { + if ((minimumCapacity >= 0) && (capacity >= minimumCapacity)) { + return true; + } else { + return expandCapacity(minimumCapacity, status); + } +} + +inline int64_t UVector64::elementAti(int32_t index) const { + return (0 <= index && index < count) ? elements[index] : 0; +} + + +inline void UVector64::addElement(int64_t elem, UErrorCode &status) { + if (ensureCapacity(count + 1, status)) { + elements[count] = elem; + count++; + } +} + +inline int64_t *UVector64::reserveBlock(int32_t size, UErrorCode &status) { + if (ensureCapacity(count+size, status) == false) { + return NULL; + } + int64_t *rp = elements+count; + count += size; + return rp; +} + +inline int64_t *UVector64::popFrame(int32_t size) { + U_ASSERT(count >= size); + count -= size; + if (count < 0) { + count = 0; + } + return elements+count-size; +} + + + +inline int32_t UVector64::size(void) const { + return count; +} + +inline int64_t UVector64::lastElementi(void) const { + return elementAti(count-1); +} + +inline UBool UVector64::operator!=(const UVector64& other) { + return !operator==(other); +} + +inline int64_t *UVector64::getBuffer() const { + return elements; +} + + +// UStack inlines + +inline int64_t UVector64::push(int64_t i, UErrorCode &status) { + addElement(i, status); + return i; +} + +inline int64_t UVector64::popi(void) { + int64_t result = 0; + if (count > 0) { + count--; + result = elements[count]; + } + return result; +} + +U_NAMESPACE_END + +#endif diff --git a/thirdparty/icu4c/common/wintz.cpp b/thirdparty/icu4c/common/wintz.cpp new file mode 100644 index 0000000000..3730232286 --- /dev/null +++ b/thirdparty/icu4c/common/wintz.cpp @@ -0,0 +1,147 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************** +* Copyright (C) 2005-2015, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************** +* +* File WINTZ.CPP +* +******************************************************************************** +*/ + +#include "unicode/utypes.h" + +#if U_PLATFORM_USES_ONLY_WIN32_API + +#include "wintz.h" +#include "charstr.h" +#include "cmemory.h" +#include "cstring.h" + +#include "unicode/ures.h" +#include "unicode/unistr.h" +#include "uresimp.h" + +#ifndef WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN +#endif +# define VC_EXTRALEAN +# define NOUSER +# define NOSERVICE +# define NOIME +# define NOMCX +#include <windows.h> + +U_NAMESPACE_BEGIN + +/** +* Main Windows time zone detection function. +* Returns the Windows time zone converted to an ICU time zone as a heap-allocated buffer, or nullptr upon failure. +* +* Note: We use the Win32 API GetDynamicTimeZoneInformation (available since Vista+) to get the current time zone info. +* This API returns a non-localized time zone name, which is mapped to an ICU time zone ID (~ Olsen ID). +*/ +U_CAPI const char* U_EXPORT2 +uprv_detectWindowsTimeZone() +{ + // Obtain the DYNAMIC_TIME_ZONE_INFORMATION info to get the non-localized time zone name. + DYNAMIC_TIME_ZONE_INFORMATION dynamicTZI; + uprv_memset(&dynamicTZI, 0, sizeof(dynamicTZI)); + SYSTEMTIME systemTimeAllZero; + uprv_memset(&systemTimeAllZero, 0, sizeof(systemTimeAllZero)); + + if (GetDynamicTimeZoneInformation(&dynamicTZI) == TIME_ZONE_ID_INVALID) { + return nullptr; + } + + // If the DST setting has been turned off in the Control Panel, then return "Etc/GMT<offset>". + // + // Note: This logic is based on how the Control Panel itself determines if DST is 'off' on Windows. + // The code is somewhat convoluted; in a sort of pseudo-code it looks like this: + // + // IF (GetDynamicTimeZoneInformation != TIME_ZONE_ID_INVALID) && (DynamicDaylightTimeDisabled != 0) && + // (StandardDate == DaylightDate) && + // ( + // (TimeZoneKeyName != Empty && StandardDate == 0) || + // (TimeZoneKeyName == Empty && StandardDate != 0) + // ) + // THEN + // DST setting is "Disabled". + // + if (dynamicTZI.DynamicDaylightTimeDisabled != 0 && + uprv_memcmp(&dynamicTZI.StandardDate, &dynamicTZI.DaylightDate, sizeof(dynamicTZI.StandardDate)) == 0 && + ((dynamicTZI.TimeZoneKeyName[0] != L'\0' && uprv_memcmp(&dynamicTZI.StandardDate, &systemTimeAllZero, sizeof(systemTimeAllZero)) == 0) || + (dynamicTZI.TimeZoneKeyName[0] == L'\0' && uprv_memcmp(&dynamicTZI.StandardDate, &systemTimeAllZero, sizeof(systemTimeAllZero)) != 0))) + { + LONG utcOffsetMins = dynamicTZI.Bias; + if (utcOffsetMins == 0) { + return uprv_strdup("Etc/UTC"); + } + + // No way to support when DST is turned off and the offset in minutes is not a multiple of 60. + if (utcOffsetMins % 60 == 0) { + char gmtOffsetTz[11] = {}; // "Etc/GMT+dd" is 11-char long with a terminal null. + // Note '-' before 'utcOffsetMin'. The timezone ID's sign convention + // is that a timezone ahead of UTC is Etc/GMT-<offset> and a timezone + // behind UTC is Etc/GMT+<offset>. + int ret = snprintf(gmtOffsetTz, UPRV_LENGTHOF(gmtOffsetTz), "Etc/GMT%+d", -utcOffsetMins / 60); + if (ret > 0 && ret < UPRV_LENGTHOF(gmtOffsetTz)) { + return uprv_strdup(gmtOffsetTz); + } + } + } + + // If DST is NOT disabled, but we have an empty TimeZoneKeyName, then it is unclear + // what we should do as this should not happen. + if (dynamicTZI.TimeZoneKeyName[0] == 0) { + return nullptr; + } + + CharString winTZ; + UErrorCode status = U_ZERO_ERROR; + winTZ.appendInvariantChars(UnicodeString(TRUE, dynamicTZI.TimeZoneKeyName, -1), status); + + // Map Windows Timezone name (non-localized) to ICU timezone ID (~ Olson timezone id). + StackUResourceBundle winTZBundle; + ures_openDirectFillIn(winTZBundle.getAlias(), nullptr, "windowsZones", &status); + ures_getByKey(winTZBundle.getAlias(), "mapTimezones", winTZBundle.getAlias(), &status); + ures_getByKey(winTZBundle.getAlias(), winTZ.data(), winTZBundle.getAlias(), &status); + + if (U_FAILURE(status)) { + return nullptr; + } + + // Note: Since the ISO 3166 country/region codes are all invariant ASCII chars, we can + // directly downcast from wchar_t to do the conversion. + // We could call the A version of the GetGeoInfo API, but that would be slightly slower than calling the W API, + // as the A version of the API will end up calling MultiByteToWideChar anyways internally. + wchar_t regionCodeW[3] = {}; + char regionCode[3] = {}; // 2 letter ISO 3166 country/region code made entirely of invariant chars. + int geoId = GetUserGeoID(GEOCLASS_NATION); + int regionCodeLen = GetGeoInfoW(geoId, GEO_ISO2, regionCodeW, UPRV_LENGTHOF(regionCodeW), 0); + + const UChar *icuTZ16 = nullptr; + int32_t tzLen; + + if (regionCodeLen != 0) { + for (int i = 0; i < UPRV_LENGTHOF(regionCodeW); i++) { + regionCode[i] = static_cast<char>(regionCodeW[i]); + } + icuTZ16 = ures_getStringByKey(winTZBundle.getAlias(), regionCode, &tzLen, &status); + } + if (regionCodeLen == 0 || U_FAILURE(status)) { + // fallback to default "001" (world) + status = U_ZERO_ERROR; + icuTZ16 = ures_getStringByKey(winTZBundle.getAlias(), "001", &tzLen, &status); + } + + // Note: cloneData returns nullptr if the status is a failure, so this + // will return nullptr if the above look-up fails. + CharString icuTZStr; + return icuTZStr.appendInvariantChars(icuTZ16, tzLen, status).cloneData(status); +} + +U_NAMESPACE_END +#endif /* U_PLATFORM_USES_ONLY_WIN32_API */ diff --git a/thirdparty/icu4c/common/wintz.h b/thirdparty/icu4c/common/wintz.h new file mode 100644 index 0000000000..ce9c1e9019 --- /dev/null +++ b/thirdparty/icu4c/common/wintz.h @@ -0,0 +1,36 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************** +* Copyright (C) 2005-2011, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************** +* +* File WINTZ.H +* +******************************************************************************** +*/ + +#ifndef __WINTZ +#define __WINTZ + +#include "unicode/utypes.h" + +#if U_PLATFORM_USES_ONLY_WIN32_API + +/** + * \file + * \brief C API: Utilities for dealing w/ Windows time zones. + */ + +U_CDECL_BEGIN +/* Forward declarations for Windows types... */ +typedef struct _TIME_ZONE_INFORMATION TIME_ZONE_INFORMATION; +U_CDECL_END + +U_CAPI const char* U_EXPORT2 +uprv_detectWindowsTimeZone(); + +#endif /* U_PLATFORM_USES_ONLY_WIN32_API */ + +#endif /* __WINTZ */ diff --git a/thirdparty/icu4c/godot_data.json b/thirdparty/icu4c/godot_data.json new file mode 100644 index 0000000000..16cfcd2651 --- /dev/null +++ b/thirdparty/icu4c/godot_data.json @@ -0,0 +1,9 @@ +{ + strategy: "additive" + featureFilters: { + brkitr_rules: include + brkitr_dictionaries: include + brkitr_tree: include + misc: include + } +}
\ No newline at end of file diff --git a/thirdparty/icu4c/icudt68l.dat b/thirdparty/icu4c/icudt68l.dat Binary files differnew file mode 100644 index 0000000000..548c1a5a72 --- /dev/null +++ b/thirdparty/icu4c/icudt68l.dat |